diff --git a/.asf.yaml b/.asf.yaml
index 4bd5191a7a6..2c66ce5be63 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -15,6 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+github:
+  description: "Apache Arrow is a multi-language toolbox for accelerated data interchange and in-memory processing"
+  homepage: https://arrow.apache.org/
+
 notifications:
   commits:      commits@arrow.apache.org
   issues:       github@arrow.apache.org
diff --git a/.dockerignore b/.dockerignore
index eb71138c679..a369d7d59a6 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,7 +27,7 @@
 # include explicitly
 !ci/**
 !c_glib/Gemfile
-!dev/archery/requirements*.txt
+!dev/archery/setup.py
 !python/requirements*.txt
 !python/manylinux1/**
 !python/manylinux2010/**
diff --git a/.env b/.env
index cd6b57e004a..0af36084bd7 100644
--- a/.env
+++ b/.env
@@ -42,12 +42,11 @@ ULIMIT_CORE=-1
 REPO=apache/arrow-dev
 CUDA=9.1
 DEBIAN=10
-UBUNTU=18.04
+UBUNTU=20.04
 FEDORA=33
 PYTHON=3.6
-LLVM=11
+LLVM=12
 CLANG_TOOLS=8
-RUST=nightly-2020-11-24
 GO=1.15
 NODE=14
 MAVEN=3.5.4
@@ -60,15 +59,17 @@ KARTOTHEK=latest
 HDFS=3.2.1
 SPARK=master
 DOTNET=3.1
-R=4.0
+R=4.1
 ARROW_R_DEV=TRUE
+GCC_VERSION=""
 # These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-gcc-release:latest
 R_ORG=rhub
 R_IMAGE=ubuntu-gcc-release
 R_TAG=latest
+TZ=UTC
 # -1 does not attempt to install a devtoolset version, any positive integer will install devtoolset-n
 DEVTOOLSET_VERSION=-1
 
 # Used for the manylinux and windows wheels, please update the crossbow configuration on update:
 #   https://github.com/ursacomputing/crossbow/blob/master/.github/workflows/cache_vcpkg.yml
-VCPKG=fced4bef1606260f110d74de1ae1975c2b9ac549
+VCPKG="2021.04.30"
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 761e0459543..66cd04a37c9 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -31,21 +31,25 @@ on:
       - 'dev/tasks/**'
       - 'docker-compose.yml'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
 
   test:
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     name: Archery Unittests and Crossbow Check Config
     runs-on: ubuntu-latest
+    timeout-minutes: 15
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
         with:
           fetch-depth: 0
       - name: Git Fixup
-        if: ${{ github.event_name == 'pull_request' }}
         shell: bash
-        run: git branch master origin/master
+        run: git branch master origin/master || true
       - name: Free Up Disk Space
         run: ci/scripts/util_cleanup.sh
       - name: Setup Python
@@ -58,7 +62,7 @@ jobs:
         working-directory: dev/archery
         run: pytest -v archery
       - name: Archery Docker Validation
-        run: archery docker
+        run: archery docker check-config
       - name: Crossbow Check Config
         working-directory: dev/tasks
         run: archery crossbow check-config
diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
deleted file mode 100644
index de980eb6d05..00000000000
--- a/.github/workflows/cancel.yml
+++ /dev/null
@@ -1,123 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Cancel stale runs
-
-on:
-  workflow_run:
-    # The name of another workflow (whichever one) that always runs on PRs
-    workflows: ['Dev']
-    types: ['requested']
-
-jobs:
-  cancel-stale-workflow-runs:
-    name: "Cancel stale workflow runs"
-    runs-on: ubuntu-latest
-    steps:
-      # Unfortunately, we need to define a separate cancellation step for
-      # each workflow where we want to cancel stale runs.
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale C++ runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: cpp.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale C# runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: csharp.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Dev runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: dev.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Go runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: go.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Integration runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: integration.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Java JNI runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: java_jni.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Java runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: java.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale JS runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: js.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Julia runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: julia.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Python runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: python.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale R runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: r.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Ruby runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: ruby.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Rust runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: rust.yml
-          skipEventTypes: '["push", "schedule"]'
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 9e103003eee..35d889152fb 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -68,6 +68,7 @@ jobs:
           }
           if changed '^r/.*\.R$'; then
             echo "R_DOCS=true" >> $GITHUB_ENV
+            echo "R_CODE=true" >> $GITHUB_ENV
           fi
           if changed 'cmake' || changed 'CMake'; then
             echo "CMAKE_FORMAT=true" >> $GITHUB_ENV
@@ -78,6 +79,16 @@ jobs:
           if changed '^r/src'; then
             echo "CLANG_FORMAT_R=true" >> $GITHUB_ENV
           fi
+      - name: Ensure clang-format has the appropriate versoin
+        if: env.CMAKE_FORMAT == 'true' ||
+          env.CLANG_FORMAT_CPP == 'true' ||
+          env.CLANG_FORMAT_R == 'true' ||
+          endsWith(github.event.comment.body, 'everything')
+        run: |
+          set -e
+          . .env # To get the clang version we use
+          sudo apt update
+          sudo apt install -y clang-format-${CLANG_TOOLS}
       - name: Run cmake_format
         if: env.CMAKE_FORMAT == 'true' || endsWith(github.event.comment.body, 'everything')
         run: |
@@ -103,15 +114,30 @@ jobs:
               --exclude_glob=cpp/build-support/lint_exclusions.txt \
               --source_dir=r/src --quiet --fix
       - uses: r-lib/actions/setup-r@v1
-        if: env.R_DOCS == 'true' || endsWith(github.event.comment.body, 'everything')
+        if: env.R_DOCS == 'true' || env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything')
       - name: Update R docs
         if: env.R_DOCS == 'true' || endsWith(github.event.comment.body, 'everything')
         shell: Rscript {0}
         run: |
           source("ci/etc/rprofile")
           install.packages(c("remotes", "roxygen2"))
+          # We currently need dev roxygen2 (> 7.1.1) until they release
+          remotes::install_github("r-lib/roxygen2")
           remotes::install_deps("r")
           roxygen2::roxygenize("r")
+      - name: Style R code
+        if: env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything')
+        shell: Rscript {0}
+        run: |
+          changed_files <- system("git diff --name-only HEAD..upstream/master 2>&1", intern = TRUE)
+          # only grab the .R files under r/
+          changed_files <- grep('^r/.*\\.R$', changed_files, value = TRUE)
+          # remove latin1 which is unstylable due to encoding and codegen.R which is unique
+          changed_files <- changed_files[!changed_files %in% file.path("r", source("r/.styler_excludes.R")$value)]
+          source("ci/etc/rprofile")
+          install.packages(c("remotes", "styler"))
+          remotes::install_deps("r")
+          styler::style_file(changed_files)
       - name: Commit results
         run: |
           git config user.name "$(git log -1 --pretty=format:%an)"
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 5f25deb4512..086f45d6fee 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -37,6 +37,10 @@ on:
       - 'cpp/**'
       - 'format/Flight.proto'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
@@ -49,6 +53,7 @@ jobs:
     name: ${{ matrix.title }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
       matrix:
@@ -59,7 +64,7 @@ jobs:
           - image: conda-cpp
             title: AMD64 Conda C++
           - image: ubuntu-cpp-sanitizer
-            title: AMD64 Ubuntu 18.04 C++ ASAN UBSAN
+            title: AMD64 Ubuntu 20.04 C++ ASAN UBSAN
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
@@ -91,84 +96,11 @@ jobs:
         continue-on-error: true
         run: archery docker push ${{ matrix.image }}
 
-  docker-arm:
-    # NOTE: this job is specific for self-hosted runners
-    # CACHING: don't use the cache plugin because of various permission
-    #          issues and keep the cached docker volumes permanently on the
-    #          host
-    # PYTHON: no distributions are built for arm machines by the github
-    #         actions team, so python>3.6 must be preinstalled on the self
-    #         hosted machines
-    name: ${{ matrix.title }}
-    runs-on: ${{ matrix.runner }}
-    # TODO(kszucs): re-enable once the self-hosted workers are properly
-    # registered to github
-    if: false && github.event_name == 'push'
-    defaults:
-      # To use certain environment variables set by .bashrc, an interactive
-      # bash shell must be used
-      run:
-        shell: bash -i {0}
-    strategy:
-      fail-fast: false
-      matrix:
-        name:
-          - arm32v7-debian-10-cpp
-          - arm64v8-ubuntu-20.04-cpp
-        include:
-          - name: arm32v7-debian-10-cpp
-            debian: 10
-            title: ARM32v7 Debian 10 C++
-            image: |
-              -e CPP_MAKE_PARALLELISM=2 \
-              -e CXXFLAGS=-Wno-psabi \
-              -e ARROW_PARQUET=OFF \
-              -e ARROW_FLIGHT=OFF \
-              -e ARROW_GANDIVA=OFF \
-              -e ARROW_ORC=OFF \
-              -e CMAKE_ARGS=-DARROW_CPU_FLAG=armv7 \
-              debian-cpp
-            arch: 'arm32v7'
-            runner: [self-hosted, linux, ARM]
-          - name: arm64v8-ubuntu-20.04-cpp
-            ubuntu: 20.04
-            title: ARM64v8 Ubuntu 20.04 C++
-            image: |
-              -e CPP_MAKE_PARALLELISM=1 \
-              -e ARROW_PARQUET=OFF \
-              ubuntu-cpp
-            arch: 'arm64v8'
-            runner: [self-hosted, linux, ARM64]
-    env:
-      # the defaults here should correspond to the values in .env
-      ARCH: ${{ matrix.arch || 'arm64v8' }}
-      DEBIAN: ${{ matrix.debian || 10 }}
-      FEDORA: ${{ matrix.fedora || 32 }}
-      UBUNTU: ${{ matrix.ubuntu || 18.04 }}
-      LLVM: 8
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        run: ci/scripts/util_checkout.sh
-      - name: Setup Archery
-        run: pip install -U -e dev/archery[docker]
-      - name: Execute Docker Build
-        # parallelism is reduced because the ARM builders are low on memory
-        run: |
-          ulimit -c unlimited
-          archery docker run ${{ matrix.image }}
-      - name: Docker Push
-        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
-        continue-on-error: true
-        run: archery docker push ${{ matrix.image }}
-
   build-example:
     name: C++ Minimal Build Example
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
     steps:
@@ -185,6 +117,7 @@ jobs:
     name: AMD64 MacOS 10.15 C++
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
     env:
@@ -219,7 +152,6 @@ jobs:
         run: |
           rm -f /usr/local/bin/2to3
           brew update --preinstall
-          brew unlink gcc@8 gcc@9
           brew bundle --file=cpp/Brewfile
       - name: Build
         shell: bash
@@ -236,6 +168,7 @@ jobs:
     name: AMD64 ${{ matrix.name }} C++
     runs-on: ${{ matrix.os }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
       matrix:
@@ -305,6 +238,7 @@ jobs:
     name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} C++
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/cpp_cron.yml b/.github/workflows/cpp_cron.yml
index c229ad93be3..c031e5961cb 100644
--- a/.github/workflows/cpp_cron.yml
+++ b/.github/workflows/cpp_cron.yml
@@ -36,76 +36,12 @@ env:
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
 
 jobs:
-  docker:
-    name: ${{ matrix.title }}
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
-    strategy:
-      fail-fast: false
-      matrix:
-        name:
-          - amd64-debian-10-cpp
-          - amd64-fedora-33-cpp
-          - amd64-ubuntu-16.04-cpp
-          - amd64-ubuntu-18.04-cpp
-        include:
-          - name: amd64-debian-10-cpp
-            image: debian-cpp
-            title: AMD64 Debian 10 C++
-            debian: 10
-          - name: amd64-fedora-33-cpp
-            image: fedora-cpp
-            title: AMD64 Fedora 33 C++
-            fedora: 33
-          - name: amd64-ubuntu-16.04-cpp
-            image: ubuntu-cpp
-            title: AMD64 Ubuntu 16.04 C++
-            ubuntu: 16.04
-          - name: amd64-ubuntu-18.04-cpp
-            image: ubuntu-cpp
-            title: AMD64 Ubuntu 18.04 C++
-            ubuntu: 18.04
-    env:
-      # the defaults here should correspond to the values in .env
-      ARCH: 'amd64'
-      DEBIAN: ${{ matrix.debian || 10 }}
-      FEDORA: ${{ matrix.fedora || 33 }}
-      UBUNTU: ${{ matrix.ubuntu || 18.04 }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        run: ci/scripts/util_cleanup.sh
-      - name: Cache Docker Volumes
-        uses: actions/cache@v2
-        with:
-          path: .docker
-          key: ${{ matrix.name }}-${{ hashFiles('cpp/**') }}
-          restore-keys: ${{ matrix.name }}-
-      - name: Setup Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e dev/archery[docker]
-      - name: Execute Docker Build
-        run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
-          archery docker run ${{ matrix.image }}
-      - name: Docker Push
-        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
-        continue-on-error: true
-        run: archery docker push ${{ matrix.image }}
 
   oss-fuzz:
     name: OSS-Fuzz build check
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 03a297bb914..b339b8f4655 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -29,12 +29,17 @@ on:
       - 'ci/scripts/csharp_*'
       - 'csharp/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
 
   ubuntu:
     name: AMD64 Ubuntu 18.04 C# ${{ matrix.dotnet }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -65,6 +70,7 @@ jobs:
     name: AMD64 Windows 2019 18.04 C# ${{ matrix.dotnet }}
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -94,6 +100,7 @@ jobs:
     name: AMD64 MacOS 10.15 C# ${{ matrix.dotnet }}
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 37016efcbfe..9ef46c31fa3 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -22,6 +22,10 @@ on:
   push:
   pull_request:
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -29,7 +33,7 @@ env:
 jobs:
 
   lint:
-    name: Lint C++, Python, R, Rust, Docker, RAT
+    name: Lint C++, Python, R, Docker, RAT
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     steps:
@@ -51,7 +55,7 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run ubuntu-lint
+          archery docker run -e GITHUB_ACTIONS=true ubuntu-lint
       - name: Docker Push
         if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
         continue-on-error: true
@@ -79,13 +83,13 @@ jobs:
         with:
           python-version: '3.6'
       - name: Install Ruby
-        uses: actions/setup-ruby@v1
+        uses: ruby/setup-ruby@v1
         with:
           ruby-version: '2.6'
       - name: Install Dependencies
         shell: bash
         run: |
-          pip install cython setuptools pytest jira
+          pip install cython setuptools six pytest jira
       - name: Run Release Test
         shell: bash
         run: |
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 7b92b897051..5f3acd7bebf 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -28,6 +28,9 @@ on:
       - edited
       - synchronize
 
+# NOTE: not using the "cancel-in-progress" feature here as the group key
+# does not have enough information for linking it to a particular PR
+
 jobs:
   process:
     name: Process
diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 098e1bad7f4..8860d91f813 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -36,6 +36,9 @@ lang-js:
 lang-julia:
   - julia/**/*
 
+lang-matlab:
+  - matlab/**/*
+
 lang-python:
   - python/**/*
 
@@ -45,19 +48,9 @@ lang-R:
 lang-ruby:
   - ruby/**/*
 
-lang-rust:
-  - rust/**/*
-
-datafusion:
-  - rust/datafusion/**/*
-
-ballista:
-  - rust/ballista/**/*
-
 flight:
   - cpp/src/arrow/flight/**/*
   - r/R/flight.*
-  - rust/arrow-flight/**/*
   - python/pyarrow/*flight.*
 
 gandiva:
@@ -71,4 +64,3 @@ parquet:
   - cpp/src/parquet/**/*
   - r/R/parquet.*
   - ruby/red-parquet/**/*
-  - rust/parquet*/**/*
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 574795f5e9b..3c9100c20b7 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -32,6 +32,10 @@ on:
       - 'ci/scripts/go_*'
       - 'go/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -42,6 +46,7 @@ jobs:
     name: AMD64 Debian 10 Go ${{ matrix.go }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -74,6 +79,7 @@ jobs:
     name: AMD64 Windows 2019 Go ${{ matrix.go }}
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -101,6 +107,7 @@ jobs:
     name: AMD64 MacOS 10.15 Go ${{ matrix.go }}
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 20112553ea2..7a4deb8e3ea 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -29,7 +29,6 @@ on:
       - 'cpp/**'
       - 'java/**'
       - 'format/**'
-      - 'rust/**'
   pull_request:
     paths:
       - '.github/workflows/integration.yml'
@@ -41,7 +40,10 @@ on:
       - 'cpp/**'
       - 'java/**'
       - 'format/**'
-      - 'rust/**'
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
 
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
@@ -54,6 +56,7 @@ jobs:
     name: AMD64 Conda Integration Test
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
@@ -61,6 +64,11 @@ jobs:
           fetch-depth: 0
       - name: Fetch Submodules and Tags
         run: ci/scripts/util_checkout.sh
+      - name: Checkout Arrow Rust
+        uses: actions/checkout@v2
+        with:
+          repository: apache/arrow-rs
+          path: rust
       - name: Free Up Disk Space
         run: ci/scripts/util_cleanup.sh
       - name: Cache Docker Volumes
@@ -76,7 +84,7 @@ jobs:
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
-        run: archery docker run conda-integration
+        run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration
       - name: Docker Push
         if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
         continue-on-error: true
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 7f6f29f0f44..72f4df7e36e 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -35,6 +35,10 @@ on:
       - 'format/Flight.proto'
       - 'java/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
@@ -46,6 +50,7 @@ jobs:
     name: AMD64 Debian 9 Java JDK ${{ matrix.jdk }} Maven ${{ matrix.maven }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
@@ -88,6 +93,7 @@ jobs:
     name: AMD64 MacOS 10.15 Java JDK ${{ matrix.jdk }}
     runs-on: macos-latest
     if: github.event_name == 'push'
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 5f25e8c053d..48351f3c22a 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -35,6 +35,10 @@ on:
       - 'cpp/**'
       - 'java/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
@@ -46,14 +50,7 @@ jobs:
     name: AMD64 Debian 9 Java JNI (Gandiva, Plasma, ORC, Dataset)
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    strategy:
-      fail-fast: false
-      matrix:
-        jdk: [8]
-        maven: [3.5.2]
-    env:
-      JDK: ${{ matrix.jdk }}
-      MAVEN: ${{ matrix.maven }}
+    timeout-minutes: 90
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 354c45c60d3..95414909d39 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -31,6 +31,10 @@ on:
       - 'ci/scripts/js_*'
       - 'js/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -41,6 +45,7 @@ jobs:
     name: AMD64 Debian 10 NodeJS 14
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
@@ -70,6 +75,7 @@ jobs:
     name: AMD64 MacOS 10.15 NodeJS ${{ matrix.node }}
     runs-on: macos-latest
     if: github.event_name == 'push'
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -93,30 +99,26 @@ jobs:
         shell: bash
         run: ci/scripts/js_test.sh $(pwd)
 
-  # TODO(kszucs): the windows build fails with platform specific npm error
-  # windows:
-  #   name: AMD64 Windows 2019 NodeJS ${{ matrix.node }}
-  #   runs-on: windows-latest
-  #   if: github.event_name == 'push'
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       node: [14]
-  #   steps:
-  #     - name: Checkout Arrow
-  #       uses: actions/checkout@v1
-  #       with:
-  #         submodules: true
-  #     - name: Install NodeJS
-  #       uses: actions/setup-node@v1
-  #       with:
-  #         node-version: ${{ matrix.node }}
-  #     - name: Install Platform Dependencies
-  #       shell: bash
-  #       run: yarn add -g cross-env
-  #     - name: Build
-  #       shell: bash
-  #       run: ci/scripts/js_build.sh $(pwd)
-  #     - name: Test
-  #       shell: bash
-  #       run: ci/scripts/js_test.sh $(pwd)
+  windows:
+    name: AMD64 Windows 2019 NodeJS ${{ matrix.node }}
+    runs-on: windows-latest
+    if: github.event_name == 'push'
+    strategy:
+      fail-fast: false
+      matrix:
+        node: [14]
+    steps:
+      - name: Checkout Arrow
+        uses: actions/checkout@v1
+        with:
+          submodules: true
+      - name: Install NodeJS
+        uses: actions/setup-node@v1
+        with:
+          node-version: ${{ matrix.node }}
+      - name: Build
+        shell: bash
+        run: ci/scripts/js_build.sh $(pwd)
+      - name: Test
+        shell: bash
+        run: ci/scripts/js_test.sh $(pwd)
diff --git a/.github/workflows/julia.yml b/.github/workflows/julia.yml
index 64ea6c947a1..226ec3e6ad0 100644
--- a/.github/workflows/julia.yml
+++ b/.github/workflows/julia.yml
@@ -26,12 +26,17 @@ on:
       - '.github/workflows/julia.yml'
       - 'julia/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
   test:
     name: AMD64 ${{ matrix.os }} Julia ${{ matrix.version }}
     env:
       JULIA_NUM_THREADS: 2
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 9062e93e665..59b14dc3287 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -31,6 +31,10 @@ on:
       - 'cpp/**'
       - 'python/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
@@ -42,6 +46,7 @@ jobs:
     name: ${{ matrix.title }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -61,6 +66,7 @@ jobs:
             title: AMD64 Conda Python 3.6 Pandas 0.23
             python: 3.6
             pandas: 0.23
+            numpy: 1.16
           - name: conda-python-3.7-pandas-latest
             cache: conda-python-3.7
             image: conda-python-pandas
@@ -107,6 +113,7 @@ jobs:
     name: AMD64 MacOS 10.15 Python 3
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     env:
       ARROW_HOME: /usr/local
       ARROW_DATASET: ON
@@ -124,6 +131,7 @@ jobs:
       ARROW_WITH_BROTLI: ON
       ARROW_BUILD_TESTS: OFF
       CMAKE_ARGS: "-DPython3_EXECUTABLE=/usr/local/bin/python3"
+      PYARROW_TEST_LARGE_MEMORY: ON
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
@@ -137,7 +145,6 @@ jobs:
         run: |
           rm -f /usr/local/bin/2to3
           brew update --preinstall
-          brew unlink gcc@8 gcc@9
           brew bundle --file=cpp/Brewfile
           brew install coreutils
           python3 -mpip install \
diff --git a/.github/workflows/python_cron.yml b/.github/workflows/python_cron.yml
deleted file mode 100644
index 7a4401af1c3..00000000000
--- a/.github/workflows/python_cron.yml
+++ /dev/null
@@ -1,141 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Python Cron
-
-on:
-  push:
-    paths:
-      - '.github/workflows/python_cron.yml'
-  pull_request:
-    paths:
-      - '.github/workflows/python_cron.yml'
-  schedule:
-    - cron: |
-        0 */12 * * *
-
-env:
-  DOCKER_VOLUME_PREFIX: ".docker/"
-  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-
-jobs:
-
-  docker:
-    name: ${{ matrix.title }}
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
-    strategy:
-      fail-fast: false
-      matrix:
-        name:
-          - debian-10-python-3
-          - fedora-33-python-3
-          - ubuntu-18.04-python-3
-          - conda-python-3.7-dask-latest
-          - conda-python-3.7-turbodbc-latest
-          - conda-python-3.7-kartothek-latest
-          - conda-python-3.7-pandas-0.24
-          - conda-python-3.7-pandas-master
-          - conda-python-3.7-hdfs-2.9.2
-        include:
-          - name: debian-10-python-3
-            cache: debian-10-python-3
-            image: debian-python
-            title: AMD64 Debian 10 Python 3
-            debian: 10
-          - name: fedora-33-python-3
-            cache: fedora-33-python-3
-            image: fedora-python
-            title: AMD64 Fedora 33 Python 3
-            fedora: 33
-          - name: ubuntu-18.04-python-3
-            cache: ubuntu-18.04-python-3
-            image: ubuntu-python
-            title: AMD64 Ubuntu 18.04 Python 3
-            ubuntu: 18.04
-          - name: conda-python-3.7-dask-latest
-            cache: conda-python-3.7
-            image: conda-python-dask
-            title: AMD64 Conda Python 3.7 Dask latest
-            dask: latest
-          - name: conda-python-3.7-turbodbc-latest
-            cache: conda-python-3.7
-            image: conda-python-turbodbc
-            title: AMD64 Conda Python 3.7 Turbodbc latest
-            turbodbc: latest
-          - name: conda-python-3.7-kartothek-latest
-            cache: conda-python-3.7
-            image: conda-python-kartothek
-            title: AMD64 Conda Python 3.7 Kartothek latest
-            kartothek: latest
-          - name: conda-python-3.7-pandas-0.24
-            cache: conda-python-3.7
-            image: conda-python-pandas
-            title: AMD64 Conda Python 3.7 Pandas 0.24
-            pandas: 0.24
-          - name: conda-python-3.7-pandas-master
-            cache: conda-python-3.7
-            image: --no-leaf-cache conda-python-pandas
-            title: AMD64 Conda Python 3.7 Pandas master
-            pandas: master
-          - name: conda-python-3.7-hdfs-2.9.2
-            cache: conda-python-3.7
-            image: conda-python-hdfs
-            title: AMD64 Conda Python 3.7 HDFS 2.9.2
-            hdfs: 2.9.2
-    env:
-      # the defaults here should correspond to the values in .env
-      DEBIAN: ${{ matrix.debian || 10 }}
-      FEDORA: ${{ matrix.fedora || 33 }}
-      UBUNTU: ${{ matrix.ubuntu || 18.04 }}
-      PYTHON: ${{ matrix.python || 3.7 }}
-      HDFS: ${{ matrix.hdfs || '2.9.2' }}
-      DASK: ${{ matrix.dask || 'latest' }}
-      TURBODBC: ${{ matrix.turbodbc || 'latest' }}
-      PANDAS: ${{ matrix.pandas || 'latest' }}
-      KARTOTHEK: ${{ matrix.kartothek || 'latest' }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        run: ci/scripts/util_cleanup.sh
-      - name: Cache Docker Volumes
-        uses: actions/cache@v2
-        with:
-          path: .docker
-          key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
-          restore-keys: ${{ matrix.cache }}-
-      - name: Setup Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e dev/archery[docker]
-      - name: Execute Docker Build
-        run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
-          archery docker run ${{ matrix.image }}
-      - name: Docker Push
-        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
-        continue-on-error: true
-        run: archery docker push ${{ matrix.image }}
diff --git a/.github/workflows/r-without-arrow.yml b/.github/workflows/r-without-arrow.yml
new file mode 100644
index 00000000000..309c6ece5d0
--- /dev/null
+++ b/.github/workflows/r-without-arrow.yml
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: R without Arrow
+
+on:
+  push:
+    paths:
+      - ".github/workflows/r-without-arrow.yml"
+      - "r/src/**"
+  pull_request:
+    paths:
+      - ".github/workflows/r-without-arrow.yml"
+      - "r/src/**"
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+env:
+  DOCKER_VOLUME_PREFIX: ".docker/"
+  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
+  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
+
+jobs:
+  bundled:
+    name: "R package without arrow"
+    runs-on: ubuntu-latest
+    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+    env:
+      R_ORG: rhub
+      R_IMAGE: ubuntu-gcc-release
+      R_TAG: latest
+    steps:
+      - name: Checkout Arrow
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Fetch Submodules and Tags
+        run: ci/scripts/util_checkout.sh
+      - name: Free Up Disk Space
+        run: ci/scripts/util_cleanup.sh
+      - name: Cache Docker Volumes
+        uses: actions/cache@v2
+        with:
+          path: .docker
+          key: ubuntu-gcc-release-r-${{ hashFiles('cpp/**') }}
+          restore-keys: ubuntu-gcc-release-r-
+      - name: Setup Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.8
+      - name: Setup Archery
+        run: pip install -e dev/archery[docker]
+      - name: Execute Docker Build
+        run: |
+          sudo sysctl -w kernel.core_pattern="core.%e.%p"
+          ulimit -c unlimited
+          archery docker run -e LIBARROW_DOWNLOAD=FALSE -e LIBARROW_BUILD=FALSE -e TEST_R_WITH_ARROW=FALSE -e NOT_CRAN=FALSE r
+      - name: Dump install logs
+        run: cat r/check/arrow.Rcheck/00install.out
+        if: always()
+      - name: Dump test logs
+        run: cat r/check/arrow.Rcheck/tests/testthat.Rout*
+        if: always()
+      - name: Save the test output
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: test-output
+          path: r/check/arrow.Rcheck/tests/testthat.Rout*
+      - name: Docker Push
+        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
+        continue-on-error: true
+        run: archery docker push r
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 7851b6b1915..9a2fcf5daec 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -39,6 +39,10 @@ on:
       - "cpp/**"
       - "r/**"
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
@@ -49,11 +53,12 @@ jobs:
     name: AMD64 Ubuntu ${{ matrix.ubuntu }} R ${{ matrix.r }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
-        r: ["3.6"]
-        ubuntu: [18.04]
+        r: ["4.1"]
+        ubuntu: [20.04]
     env:
       R: ${{ matrix.r }}
       UBUNTU: ${{ matrix.ubuntu }}
@@ -84,7 +89,9 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run ubuntu-r
+          # Setting a non-default and non-probable Marquesas French Polynesia time
+          # it has both with a .45 offset and very very few people who live there.
+          archery docker run -e TZ=MART ubuntu-r
       - name: Dump install logs
         run: cat r/check/arrow.Rcheck/00install.out
         if: always()
@@ -106,6 +113,7 @@ jobs:
     name: "${{ matrix.config.org }}/${{ matrix.config.image }}:${{ matrix.config.tag }}"
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -141,7 +149,10 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run r
+          # Don't set a TZ here to test that case. These builds will have the following warning in them:
+          #   System has not been booted with systemd as init system (PID 1). Can't operate.
+          #   Failed to connect to bus: Host is down
+          archery docker run -e TZ="" r
       - name: Dump install logs
         run: cat r/check/arrow.Rcheck/00install.out
         if: always()
@@ -163,6 +174,7 @@ jobs:
     name: AMD64 Windows RTools ${{ matrix.rtools }}
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -207,7 +219,7 @@ jobs:
       - uses: r-lib/actions/setup-r@master
         with:
           rtools-version: 40
-          r-version: "4.0"
+          r-version: "4.1"
           Ncpus: 2
       - uses: r-lib/actions/setup-r@master
         if: ${{ matrix.rtools == 35 }}
@@ -235,7 +247,8 @@ jobs:
         run: |
           Sys.setenv(
             RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "libarrow.zip"),
-            MAKEFLAGS = paste0("-j", parallel::detectCores())
+            MAKEFLAGS = paste0("-j", parallel::detectCores()),
+            "_R_CHECK_FORCE_SUGGESTS_" = FALSE
           )
           rcmdcheck::rcmdcheck("r",
             build_args = '--no-build-vignettes',
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index d9430f536b2..067b40aefe9 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -43,6 +43,10 @@ on:
       - 'cpp/**'
       - 'ruby/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
@@ -54,11 +58,11 @@ jobs:
     name: AMD64 Ubuntu ${{ matrix.ubuntu }} GLib & Ruby
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 40
     strategy:
       fail-fast: false
       matrix:
         ubuntu:
-          - 18.04
           - 20.04
     env:
       UBUNTU: ${{ matrix.ubuntu }}
@@ -89,7 +93,11 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run ubuntu-ruby
+          archery docker run \
+            -e ARROW_FLIGHT=ON \
+            -e Protobuf_SOURCE=BUNDLED \
+            -e gRPC_SOURCE=BUNDLED \
+            ubuntu-ruby
       - name: Docker Push
         if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
         continue-on-error: true
@@ -100,10 +108,12 @@ jobs:
     name: AMD64 MacOS 10.15 GLib & Ruby
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 40
     strategy:
       fail-fast: false
     env:
       ARROW_BUILD_TESTS: OFF
+      ARROW_FLIGHT: ON
       ARROW_GANDIVA: ON
       ARROW_GLIB_DEVELOPMENT_MODE: true
       ARROW_GLIB_GTK_DOC: true
@@ -130,7 +140,6 @@ jobs:
         run: |
           rm -f /usr/local/bin/2to3
           brew update --preinstall
-          brew unlink gcc@8 gcc@9
           brew bundle --file=cpp/Brewfile
           brew bundle --file=c_glib/Brewfile
       - name: Install Ruby Dependencies
@@ -171,19 +180,19 @@ jobs:
     name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} GLib & Ruby
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 40
     strategy:
       fail-fast: false
       matrix:
         mingw-n-bits:
           - 64
         ruby-version:
-          - 2.6
+          - "3.0"
     env:
       ARROW_BUILD_SHARED: ON
       ARROW_BUILD_STATIC: OFF
       ARROW_BUILD_TESTS: OFF
       ARROW_BUILD_TYPE: release
-      ARROW_DATASET: ON
       ARROW_FLIGHT: ON
       ARROW_GANDIVA: ON
       ARROW_HDFS: OFF
@@ -223,7 +232,7 @@ jobs:
         shell: bash
         run: ci/scripts/util_checkout.sh
       - name: Setup Ruby
-        uses: actions/setup-ruby@v1
+        uses: ruby/setup-ruby@v1
         with:
           ruby-version: ${{ matrix.ruby-version }}
       - name: Upgrade MSYS2
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
deleted file mode 100644
index 6d87e6b6260..00000000000
--- a/.github/workflows/rust.yml
+++ /dev/null
@@ -1,470 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Rust
-
-on:
-  push:
-    paths:
-      - '.github/workflows/rust.yml'
-      - 'rust/**'
-      - 'format/Flight.proto'
-  pull_request:
-    paths:
-      - '.github/workflows/rust.yml'
-      - 'rust/**'
-      - 'format/Flight.proto'
-
-jobs:
-
-  # build the library, a compilation step used by multiple steps below
-  linux-build-lib:
-    name: Build Libraries on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-    steps:
-      - uses: actions/checkout@v2
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          # these represent dependencies downloaded by cargo
-          # and thus do not depend on the OS, arch nor rust version.
-          path: /github/home/.cargo
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          # these represent compiled steps of both dependencies and arrow
-          # and thus are specific for a particular OS, arch and rust version.
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Build Workspace
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust
-          cargo build
-      # Ballista is currently not part of the main workspace so requires a separate build step
-      - name: Build Ballista
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/ballista/rust
-          # snmalloc requires cmake so build without default features
-          cargo build --no-default-features
-
-  # test the crate
-  linux-test:
-    name: Test Workspace on AMD64 Rust ${{ matrix.rust }}
-    needs: [linux-build-lib]
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-        PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Run tests
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust
-          # run tests on all workspace members with default feature list
-          cargo test
-          # test datafusion examples
-          cd datafusion-examples
-          cargo test --no-default-features
-          cargo run --example csv_sql
-          cargo run --example parquet_sql
-          cd ..
-          cd arrow
-          # re-run tests on arrow workspace with additional features
-          cargo test --features=prettyprint
-          cargo run --example builders
-          cargo run --example dynamic_types
-          cargo run --example read_csv
-          cargo run --example read_csv_infer_schema
-      # Ballista is currently not part of the main workspace so requires a separate test step
-      - name: Run Ballista tests
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/ballista/rust
-          # snmalloc requires cmake so build without default features
-          cargo test --no-default-features
-
-  # test the --features "simd" of the arrow crate. This requires nightly.
-  linux-test-simd:
-    name: Test SIMD on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [nightly-2020-11-24]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Run tests
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/arrow
-          cargo test --features "simd"
-
-  windows-and-macos:
-    name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [windows-latest, macos-latest]
-        rust: [stable]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      # TODO: this won't cache anything, which is expensive. Setup this action
-      # with a OS-dependent path.
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Run tests
-        shell: bash
-        run: |
-          export ARROW_TEST_DATA=$(pwd)/testing/data
-          export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
-          # do not produce debug symbols to keep memory usage down
-          export RUSTFLAGS="-C debuginfo=0"
-          cd rust
-          cargo test
-
-  clippy:
-    name: Clippy
-    needs: [linux-build-lib]
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy
-      - name: Run clippy
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust
-          cargo clippy --all-targets --workspace -- -D warnings -A clippy::redundant_field_names
-
-  miri-checks:
-    name: MIRI
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [nightly-2021-01-19]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - uses: actions/cache@v2
-        with:
-          path: |
-            ~/.cargo/registry
-            ~/.cargo/git
-            target
-          key: ${{ runner.os }}-cargo-miri-${{ hashFiles('**/Cargo.lock') }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy miri
-      - name: Run Miri Checks
-        env:
-          RUST_BACKTRACE: full
-          RUST_LOG: 'trace'
-        run: |
-          export MIRIFLAGS="-Zmiri-disable-isolation"
-          cd rust
-          cargo miri setup
-          cargo clean
-          # Ignore MIRI errors until we can get a clean run
-          cargo miri test || true
-
-  coverage:
-    name: Coverage
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/.cargo
-          # this key is not equal because the user is different than on a container (runner vs github)
-          key: cargo-coverage-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/target
-          # this key is not equal because coverage uses different compilation flags.
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}-
-      - name: Run coverage
-        run: |
-          export CARGO_HOME="/home/runner/.cargo"
-          export CARGO_TARGET_DIR="/home/runner/target"
-
-          export ARROW_TEST_DATA=$(pwd)/testing/data
-          export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
-
-          # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
-          # see https://github.com/xd009642/tarpaulin/issues/618
-          cargo install --version 0.16.0 cargo-tarpaulin
-          cd rust
-          cargo tarpaulin --out Xml
-      - name: Report coverage
-        continue-on-error: true
-        run: bash <(curl -s https://codecov.io/bash)
-
-  # test FFI against the C-Data interface exposed by pyarrow
-  pyarrow-integration-test:
-    name: Test Pyarrow C Data Interface
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        rust: [stable]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/.cargo
-          key: cargo-maturin-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/target
-          # this key is not equal because maturin uses different compilation flags.
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-maturin-cache-${{ matrix.rust }}-
-      - uses: actions/setup-python@v2
-        with:
-          python-version: '3.7'
-      - name: Install Python dependencies
-        run: python -m pip install --upgrade pip setuptools wheel
-      - name: Run tests
-        run: |
-          export CARGO_HOME="/home/runner/.cargo"
-          export CARGO_TARGET_DIR="/home/runner/target"
-
-          cd rust/arrow-pyarrow-integration-testing
-
-          python -m venv venv
-          source venv/bin/activate
-
-          pip install maturin==0.8.2 toml==0.10.1 pyarrow==1.0.0
-          maturin develop
-          python -m unittest discover tests
-
-  # test the arrow crate builds against wasm32 in stable rust
-  wasm32-build:
-    name: Build wasm32 on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [nightly-2020-11-24]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-        PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-wasm32-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup override set ${{ matrix.rust }}
-          rustup component add rustfmt
-          rustup target add wasm32-unknown-unknown
-      - name: Build arrow crate
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/arrow
-          cargo build --target wasm32-unknown-unknown
-
-  # test the projects can build without default features
-  default-build:
-    name: Check No Defaults on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-        PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-wasm32-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup override set ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Build arrow crate
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/arrow
-          cargo check --all-targets --no-default-features
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9d2d2d81d68..0718072308a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,21 +29,6 @@ repos:
         entry: bash -c "git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar && ./dev/release/run-rat.sh arrow-src.tar"
         always_run: true
         pass_filenames: false
-      - id: rustfmt
-        name: Rust Format
-        language: system
-        entry: bash -c "cd rust && cargo +stable fmt --all -- --check"
-        files: ^rust/.*\.rs$
-        types:
-          - file
-          - rust
-      - id: cmake-format
-        name: CMake Format
-        language: python
-        entry: python run-cmake-format.py
-        types: [cmake]
-        additional_dependencies:
-          - cmake_format==0.5.2
       - id: hadolint
         name: Docker Format
         language: docker_image
diff --git a/.travis.yml b/.travis.yml
index 2cf70cca982..6a279a2f87b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-dist: bionic
+dist: focal
 
 language: minimal
 
@@ -43,7 +43,13 @@ jobs:
   include:
     - name: "C++ on ARM"
       os: linux
-      arch: arm64
+      arch: arm64-graviton2
+      # This is required for arm64-graviton2.
+      # https://docs.travis-ci.com/user/multi-cpu-architectures/#example-multi-architecture-build-matrix
+      group: edge
+      # This is required for arm64-graviton2.
+      # https://docs.travis-ci.com/user/multi-cpu-architectures/#testing-on-multiple-cpu-architectures
+      virt: vm
       env:
         <<: *global_env
         ARCH: arm64v8
@@ -51,28 +57,29 @@ jobs:
         DOCKER_IMAGE_ID: ubuntu-cpp
         # ARROW_USE_GLOG=OFF is needed to avoid build error caused by
         # glog and CMAKE_UNITY_BUILD=ON.
-        #
-        # Disable ARROW_S3 because it often causes "No output has
-        # been received in the last 10m0s, this potentially indicates
-        # a stalled build or something wrong with the build itself."
-        # on Travis CI.
-        #
-        # Limiting CPP_MAKE_PARALLELISM is required to avoid random compiler
-        # crashes.
         DOCKER_RUN_ARGS: >-
           "
           -e ARROW_BUILD_STATIC=OFF
           -e ARROW_ORC=OFF
-          -e ARROW_S3=OFF
           -e ARROW_USE_GLOG=OFF
           -e CMAKE_UNITY_BUILD=ON
-          -e CPP_MAKE_PARALLELISM=4
           "
-        # The LLVM's APT repository provides only arm64 binaries.
+        # The LLVM's APT repository doesn't provide arm64 binaries.
         # We should use LLVM provided by Ubuntu.
         LLVM: "10"
         UBUNTU: "20.04"
 
+    - name: "Go on ARM"
+      os: linux
+      arch: arm64-graviton2
+      group: edge
+      virt: vm
+      env:
+        <<: *global_env
+        ARCH: arm64v8
+        ARROW_CI_MODULES: "GO"
+        DOCKER_IMAGE_ID: debian-go
+
     - name: "C++ on s390x"
       os: linux
       arch: s390x
@@ -97,11 +104,10 @@ jobs:
           -e PARQUET_BUILD_EXAMPLES=OFF
           -e PARQUET_BUILD_EXECUTABLES=OFF
           -e Protobuf_SOURCE=BUNDLED
-          -e cares_SOURCE=BUNDLED
           -e gRPC_SOURCE=BUNDLED
           "
-        # The LLVM's APT repository provides only arm64 binaries.
-        # We should use LLVM provided by Ubuntu.
+        # The LLVM's APT repository causes download error for s390x binary
+        # We should use the LLVM provided by the default APT repository
         LLVM: "10"
         UBUNTU: "20.04"
 
@@ -125,6 +131,7 @@ jobs:
         JDK: 11
 
   allow_failures:
+    - name: "Go on ARM"
     - name: "Go on s390x"
     - name: "Java on s390x"
 
@@ -143,14 +150,11 @@ before_install:
     fi
 
 install:
-  - pip3 install -e dev/archery[docker]
+  - sudo -H pip3 install --upgrade pip
+  - sudo -H pip3 install 'docker-compose>=1.27.0'
+  - sudo -H pip3 install -e dev/archery[docker]
 
 script:
-  - sudo sysctl -w kernel.core_pattern="core.%e.%p"
-  # This isn't allowed on Travis CI:
-  #   /home/travis/.travis/functions: line 109: ulimit: core file size: cannot modify limit: Operation not permitted
-  - |
-    ulimit -c unlimited || :
   - |
     archery docker run \
       ${DOCKER_RUN_ARGS} \
diff --git a/LICENSE.txt b/LICENSE.txt
index 4cec07fd0c9..5d4de206545 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -2218,3 +2218,25 @@ https://github.com/pypa/packaging/
 
 which is made available under both the Apache license v2.0 and the
 BSD 2-clause license.
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/pcg contain code from
+
+https://github.com/imneme/pcg-cpp
+
+and have the following copyright notice:
+
+Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+                    and the PCG Project contributors.
+
+SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+Licensed under the Apache License, Version 2.0 (provided in
+LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+or under the MIT license (provided in LICENSE-MIT.txt and at
+http://opensource.org/licenses/MIT), at your option. This file may not
+be copied, modified, or distributed except according to those terms.
+
+Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+express or implied.  See your chosen license for details.
diff --git a/README.md b/README.md
index 133018c72df..7d10b81c6e4 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,6 @@
 
 # Apache Arrow
 
-[![Build Status](https://ci.appveyor.com/api/projects/status/github/apache/arrow/branch/master?svg=true)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/arrow/branch/master)
-[![Coverage Status](https://codecov.io/gh/apache/arrow/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/arrow?branch=master)
 [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/arrow.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:arrow)
 [![License](http://img.shields.io/:license-Apache%202-blue.svg)](https://github.com/apache/arrow/blob/master/LICENSE.txt)
 [![Twitter Follow](https://img.shields.io/twitter/follow/apachearrow.svg?style=social&label=Follow)](https://twitter.com/apachearrow)
@@ -53,7 +51,7 @@ Major components of the project include:
  - [Python libraries](https://github.com/apache/arrow/tree/master/python)
  - [R libraries](https://github.com/apache/arrow/tree/master/r)
  - [Ruby libraries](https://github.com/apache/arrow/tree/master/ruby)
- - [Rust libraries](https://github.com/apache/arrow/tree/master/rust)
+ - [Rust libraries](https://github.com/apache/arrow-rs)
 
 Arrow is an [Apache Software Foundation](https://www.apache.org) project. Learn more at
 [arrow.apache.org](https://arrow.apache.org).
diff --git a/c_glib/Gemfile b/c_glib/Gemfile
index 4b570902bcd..6864cfd3244 100644
--- a/c_glib/Gemfile
+++ b/c_glib/Gemfile
@@ -20,4 +20,4 @@
 source "https://rubygems.org/"
 
 gem "test-unit"
-gem "gobject-introspection"
+gem "gobject-introspection", ">= 3.4.7"
diff --git a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
index ff160452845..03e56516112 100644
--- a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
+++ b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
@@ -21,6 +21,8 @@
 
 #include <arrow-glib/arrow-glib.h>
 
+#include <arrow-dataset-glib/dataset-factory.h>
+#include <arrow-dataset-glib/dataset.h>
 #include <arrow-dataset-glib/file-format.h>
 #include <arrow-dataset-glib/fragment.h>
 #include <arrow-dataset-glib/scanner.h>
diff --git a/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp b/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
index c221825bc2a..65341b9b77e 100644
--- a/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
+++ b/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
@@ -21,6 +21,8 @@
 
 #include <arrow-glib/arrow-glib.hpp>
 
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
 #include <arrow-dataset-glib/file-format.hpp>
 #include <arrow-dataset-glib/fragment.hpp>
 #include <arrow-dataset-glib/scanner.hpp>
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.cpp b/c_glib/arrow-dataset-glib/dataset-factory.cpp
new file mode 100644
index 00000000000..146db69adfc
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset-factory.cpp
@@ -0,0 +1,468 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/file-system.hpp>
+
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/file-format.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: dataset-factory
+ * @section_id: dataset-factory
+ * @title: Dataset factory related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetDatasetFactory is a base class for dataset factories.
+ *
+ * #GADatasetFileSystemDatasetFactory is a class for
+ * #GADatasetFileSystemDataset factory.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetDatasetFactoryPrivate_ {
+  std::shared_ptr<arrow::dataset::DatasetFactory> factory;
+} GADatasetDatasetFactoryPrivate;
+
+enum {
+  PROP_DATASET_FACTORY = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDatasetFactory,
+                                    gadataset_dataset_factory,
+                                    G_TYPE_OBJECT)
+
+#define GADATASET_DATASET_FACTORY_GET_PRIVATE(obj)        \
+  static_cast<GADatasetDatasetFactoryPrivate *>(          \
+    gadataset_dataset_factory_get_instance_private(       \
+      GADATASET_DATASET_FACTORY(obj)))
+
+static void
+gadataset_dataset_factory_finalize(GObject *object)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+  priv->factory.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_dataset_factory_parent_class)->finalize(object);
+}
+
+static void
+gadataset_dataset_factory_set_property(GObject *object,
+                                       guint prop_id,
+                                       const GValue *value,
+                                       GParamSpec *pspec)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATASET_FACTORY:
+    {
+      auto arrow_factory_pointer =
+        static_cast<std::shared_ptr<arrow::dataset::DatasetFactory> *>(
+          g_value_get_pointer(value));
+      if (arrow_factory_pointer) {
+        priv->factory = *arrow_factory_pointer;
+      }
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_dataset_factory_init(GADatasetDatasetFactory *object)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+  new(&priv->factory) std::shared_ptr<arrow::dataset::DatasetFactory>;
+}
+
+static void
+gadataset_dataset_factory_class_init(GADatasetDatasetFactoryClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = gadataset_dataset_factory_finalize;
+  gobject_class->set_property = gadataset_dataset_factory_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("dataset-factory",
+                              "Dataset factory",
+                              "The raw "
+                              "std::shared<arrow::dataset::DatasetFactory> *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATASET_FACTORY, spec);
+}
+
+/**
+ * gadataset_dataset_factory_finish:
+ * @factory: A #GADatasetDatasetFactory.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetDataset on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetDataset *
+gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
+                                 GError **error)
+{
+  auto arrow_factory = gadataset_dataset_factory_get_raw(factory);
+  auto arrow_dataset_result = arrow_factory->Finish();
+  if (garrow::check(error, arrow_dataset_result, "[dataset-factory][finish]")) {
+    auto arrow_dataset = *arrow_dataset_result;
+    return gadataset_dataset_new_raw(&arrow_dataset);
+  } else {
+    return NULL;
+  }
+}
+
+
+typedef struct GADatasetFileSystemDatasetFactoryPrivate_ {
+  GADatasetFileFormat *format;
+  GArrowFileSystem *file_system;
+  GList *files;
+  arrow::dataset::FileSystemFactoryOptions options;
+} GADatasetFileSystemDatasetFactoryPrivate;
+
+enum {
+  PROP_FORMAT = 1,
+  PROP_FILE_SYSTEM,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetFactory,
+                           gadataset_file_system_dataset_factory,
+                           GADATASET_TYPE_DATASET_FACTORY)
+
+#define GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(obj)  \
+  static_cast<GADatasetFileSystemDatasetFactoryPrivate *>(      \
+    gadataset_file_system_dataset_factory_get_instance_private( \
+      GADATASET_FILE_SYSTEM_DATASET_FACTORY(obj)))
+
+static void
+gadataset_file_system_dataset_factory_dispose(GObject *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+  if (priv->format) {
+    g_object_unref(priv->format);
+    priv->format = NULL;
+  }
+
+  if (priv->file_system) {
+    g_object_unref(priv->file_system);
+    priv->file_system = NULL;
+  }
+
+  if (priv->files) {
+    g_list_free_full(priv->files, g_object_unref);
+    priv->files = NULL;
+  }
+
+  G_OBJECT_CLASS(
+    gadataset_file_system_dataset_factory_parent_class)->dispose(object);
+}
+
+static void
+gadataset_file_system_dataset_factory_finalize(GObject *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+  priv->options.~FileSystemFactoryOptions();
+  G_OBJECT_CLASS(
+    gadataset_file_system_dataset_factory_parent_class)->finalize(object);
+}
+
+static void
+gadataset_file_system_dataset_factory_set_property(GObject *object,
+                                                   guint prop_id,
+                                                   const GValue *value,
+                                                   GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_factory_get_property(GObject *object,
+                                                   guint prop_id,
+                                                   GValue *value,
+                                                   GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    g_value_set_object(value, priv->format);
+    break;
+  case PROP_FILE_SYSTEM:
+    g_value_set_object(value, priv->file_system);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_factory_init(
+  GADatasetFileSystemDatasetFactory *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+  new(&priv->options) arrow::dataset::FileSystemFactoryOptions;
+}
+
+static void
+gadataset_file_system_dataset_factory_class_init(
+  GADatasetFileSystemDatasetFactoryClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = gadataset_file_system_dataset_factory_dispose;
+  gobject_class->finalize     = gadataset_file_system_dataset_factory_finalize;
+  gobject_class->set_property = gadataset_file_system_dataset_factory_set_property;
+  gobject_class->get_property = gadataset_file_system_dataset_factory_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GADatasetFileSystemDatasetFactory:format:
+   *
+   * Format passed to #GADatasetFileSystemDataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("format",
+                             "Format",
+                             "Format passed to GADatasetFileSystemDataset",
+                             GADATASET_TYPE_FILE_FORMAT,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
+
+  /**
+   * GADatasetFileSystemDatasetFactory:file-system:
+   *
+   * File system passed to #GADatasetFileSystemDataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("file-system",
+                             "File system",
+                             "File system passed to GADatasetFileSystemDataset",
+                             GARROW_TYPE_FILE_SYSTEM,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);
+}
+
+/**
+ * gadataset_file_system_factory_new:
+ * @format: A #GADatasetFileFormat.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: A newly created #GADatasetDatasetFileSystemFactory on success,
+ *   %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetFileSystemDatasetFactory *
+gadataset_file_system_dataset_factory_new(GADatasetFileFormat *format)
+{
+  return GADATASET_FILE_SYSTEM_DATASET_FACTORY(
+    g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY,
+                 "format", format,
+                 NULL));
+}
+
+/**
+ * gadataset_file_system_dataset_factory_set_file_system:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @file_system: A #GArrowFileSystem.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_set_file_system(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrowFileSystem *file_system,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][set-file-system]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system is already set"),
+                  context);
+    return FALSE;
+  }
+  priv->file_system = file_system;
+  g_object_ref(priv->file_system);
+  return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_set_file_system_uri:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @uri: An URI for file system.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_set_file_system_uri(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *uri,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][set-file-system-uri]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system is already set"),
+                  context);
+    return FALSE;
+  }
+  std::string internal_path;
+  auto arrow_file_system_result =
+    arrow::fs::FileSystemFromUri(uri, &internal_path);
+  if (!garrow::check(error, arrow_file_system_result, context)) {
+    return FALSE;
+  }
+  auto arrow_file_system = *arrow_file_system_result;
+  auto arrow_file_info_result = arrow_file_system->GetFileInfo(internal_path);
+  if (!garrow::check(error, arrow_file_info_result, context)) {
+    return FALSE;
+  }
+  priv->file_system = garrow_file_system_new_raw(&arrow_file_system);
+  auto file_info = garrow_file_info_new_raw(*arrow_file_info_result);
+  priv->files = g_list_prepend(priv->files, file_info);
+  return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_add_path:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @path: A path to be added.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_add_path(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *path,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][add-path]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (!priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system isn't set"),
+                  context);
+    return FALSE;
+  }
+  auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
+  auto arrow_file_info_result = arrow_file_system->GetFileInfo(path);
+  if (!garrow::check(error, arrow_file_info_result, context)) {
+    return FALSE;
+  }
+  auto file_info = garrow_file_info_new_raw(*arrow_file_info_result);
+  priv->files = g_list_prepend(priv->files, file_info);
+  return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_finish:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetFileSystemDataset on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetFileSystemDataset *
+gadataset_file_system_dataset_factory_finish(
+  GADatasetFileSystemDatasetFactory *factory,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][finish]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (!priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system isn't set"),
+                  context);
+    return NULL;
+  }
+  auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
+  auto arrow_format = gadataset_file_format_get_raw(priv->format);
+  std::vector<arrow::fs::FileInfo> arrow_files;
+  priv->files = g_list_reverse(priv->files);
+  for (auto node = priv->files; node; node = node->next) {
+    auto file = GARROW_FILE_INFO(node->data);
+    arrow_files.push_back(*garrow_file_info_get_raw(file));
+  }
+  priv->files = g_list_reverse(priv->files);
+  auto arrow_factory_result =
+    arrow::dataset::FileSystemDatasetFactory::Make(arrow_file_system,
+                                                   arrow_files,
+                                                   arrow_format,
+                                                   priv->options);
+  if (!garrow::check(error, arrow_factory_result, context)) {
+    return NULL;
+  }
+  auto arrow_dataset_result = (*arrow_factory_result)->Finish();
+  if (!garrow::check(error, arrow_dataset_result, context)) {
+    return NULL;
+  }
+  auto arrow_dataset = *arrow_dataset_result;
+  return GADATASET_FILE_SYSTEM_DATASET(
+    gadataset_dataset_new_raw(&arrow_dataset,
+                              "dataset", &arrow_dataset,
+                              "file-system", priv->file_system,
+                              "format", priv->format,
+                              NULL));
+}
+
+
+G_END_DECLS
+
+std::shared_ptr<arrow::dataset::DatasetFactory>
+gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(factory);
+  return priv->factory;
+}
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h
new file mode 100644
index 00000000000..e2ee3ed9806
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset-factory.h
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/dataset.h>
+
+G_BEGIN_DECLS
+
+#define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetDatasetFactory,
+                         gadataset_dataset_factory,
+                         GADATASET,
+                         DATASET_FACTORY,
+                         GObject)
+struct _GADatasetDatasetFactoryClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetDataset *
+gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
+                                 GError **error);
+
+
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY      \
+  (gadataset_file_system_dataset_factory_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory,
+                         gadataset_file_system_dataset_factory,
+                         GADATASET,
+                         FILE_SYSTEM_DATASET_FACTORY,
+                         GADatasetDatasetFactory)
+struct _GADatasetFileSystemDatasetFactoryClass
+{
+  GADatasetDatasetFactoryClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetFileSystemDatasetFactory *
+gadataset_file_system_dataset_factory_new(GADatasetFileFormat *file_format);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_set_file_system(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrowFileSystem *file_system,
+  GError **error);
+gboolean
+gadataset_file_system_dataset_factory_set_file_system_uri(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *uri,
+  GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_path(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *path,
+  GError **error);
+/*
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_file(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrowFileInfo *file,
+  GError **error);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_selector(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrorFileSelector *selector,
+  GError **error);
+*/
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetFileSystemDataset *
+gadataset_file_system_dataset_factory_finish(
+  GADatasetFileSystemDatasetFactory *factory,
+  GError **error);
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.hpp b/c_glib/arrow-dataset-glib/dataset-factory.hpp
new file mode 100644
index 00000000000..114db35bc59
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset-factory.hpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/dataset-factory.h>
+
+std::shared_ptr<arrow::dataset::DatasetFactory>
+gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory);
diff --git a/c_glib/arrow-dataset-glib/dataset.cpp b/c_glib/arrow-dataset-glib/dataset.cpp
new file mode 100644
index 00000000000..3bd62f99ef3
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset.cpp
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/table.hpp>
+
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/scanner.h>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: dataset
+ * @section_id: dataset
+ * @title: Dataset related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetDataset is a base class for datasets.
+ *
+ * #GADatasetFileSystemDataset is a class for file system dataset.
+ *
+ * #GADatasetFileFormat is a base class for file formats.
+ *
+ * #GADatasetCSVFileFormat is a class for CSV file format.
+ *
+ * #GADatasetIPCFileFormat is a class for IPC file format.
+ *
+ * #GADatasetParquetFileFormat is a class for Apache Parquet file format.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetDatasetPrivate_ {
+  std::shared_ptr<arrow::dataset::Dataset> dataset;
+} GADatasetDatasetPrivate;
+
+enum {
+  PROP_DATASET = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDataset,
+                                    gadataset_dataset,
+                                    G_TYPE_OBJECT)
+
+#define GADATASET_DATASET_GET_PRIVATE(obj)         \
+  static_cast<GADatasetDatasetPrivate *>(          \
+    gadataset_dataset_get_instance_private(        \
+      GADATASET_DATASET(obj)))
+
+static void
+gadataset_dataset_finalize(GObject *object)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+  priv->dataset.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_dataset_parent_class)->finalize(object);
+}
+
+static void
+gadataset_dataset_set_property(GObject *object,
+                               guint prop_id,
+                               const GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATASET:
+    priv->dataset =
+      *static_cast<std::shared_ptr<arrow::dataset::Dataset> *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_dataset_init(GADatasetDataset *object)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+  new(&priv->dataset) std::shared_ptr<arrow::dataset::Dataset>;
+}
+
+static void
+gadataset_dataset_class_init(GADatasetDatasetClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = gadataset_dataset_finalize;
+  gobject_class->set_property = gadataset_dataset_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("dataset",
+                              "Dataset",
+                              "The raw "
+                              "std::shared<arrow::dataset::Dataset> *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATASET, spec);
+}
+
+/**
+ * gadataset_dataset_begin_scan:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetScannerBuilder on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetScannerBuilder *
+gadataset_dataset_begin_scan(GADatasetDataset *dataset,
+                             GError **error)
+{
+  return gadataset_scanner_builder_new(dataset, error);
+}
+
+/**
+ * gadataset_dataset_to_table:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A loaded #GArrowTable on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowTable *
+gadataset_dataset_to_table(GADatasetDataset *dataset,
+                           GError **error)
+{
+  auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+  auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+  if (!garrow::check(error,
+                     arrow_scanner_builder_result,
+                     "[dataset][to-table]")) {
+    return NULL;
+  }
+  auto arrow_scanner_builder = *arrow_scanner_builder_result;
+  auto arrow_scanner_result = arrow_scanner_builder->Finish();
+  if (!garrow::check(error,
+                     arrow_scanner_result,
+                     "[dataset][to-table]")) {
+    return NULL;
+  }
+  auto arrow_scanner = *arrow_scanner_result;
+  auto arrow_table_result = arrow_scanner->ToTable();
+  if (!garrow::check(error,
+                     arrow_scanner_result,
+                     "[dataset][to-table]")) {
+    return NULL;
+  }
+  return garrow_table_new_raw(&(*arrow_table_result));
+}
+
+/**
+ * gadataset_dataset_get_type_name:
+ * @dataset: A #GADatasetDataset.
+ *
+ * Returns: The type name of @dataset.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gadataset_dataset_get_type_name(GADatasetDataset *dataset)
+{
+  const auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+  const auto &type_name = arrow_dataset->type_name();
+  return g_strndup(type_name.data(), type_name.size());
+}
+
+
+typedef struct GADatasetFileSystemDatasetPrivate_ {
+  GADatasetFileFormat *format;
+  GArrowFileSystem *file_system;
+} GADatasetFileSystemDatasetPrivate;
+
+enum {
+  PROP_FORMAT = 1,
+  PROP_FILE_SYSTEM,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDataset,
+                           gadataset_file_system_dataset,
+                           GADATASET_TYPE_DATASET)
+
+#define GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(obj)   \
+  static_cast<GADatasetFileSystemDatasetPrivate *>(      \
+    gadataset_file_system_dataset_get_instance_private(  \
+      GADATASET_FILE_SYSTEM_DATASET(obj)))
+
+static void
+gadataset_file_system_dataset_dispose(GObject *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+  if (priv->format) {
+    g_object_unref(priv->format);
+    priv->format = NULL;
+  }
+
+  if (priv->file_system) {
+    g_object_unref(priv->file_system);
+    priv->file_system = NULL;
+  }
+
+  G_OBJECT_CLASS(gadataset_file_system_dataset_parent_class)->dispose(object);
+}
+
+static void
+gadataset_file_system_dataset_set_property(GObject *object,
+                                           guint prop_id,
+                                           const GValue *value,
+                                           GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value));
+    break;
+  case PROP_FILE_SYSTEM:
+    priv->file_system = GARROW_FILE_SYSTEM(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_get_property(GObject *object,
+                                           guint prop_id,
+                                           GValue *value,
+                                           GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    g_value_set_object(value, priv->format);
+    break;
+  case PROP_FILE_SYSTEM:
+    g_value_set_object(value, priv->file_system);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_init(GADatasetFileSystemDataset *object)
+{
+}
+
+static void
+gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = gadataset_file_system_dataset_dispose;
+  gobject_class->set_property = gadataset_file_system_dataset_set_property;
+  gobject_class->get_property = gadataset_file_system_dataset_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GADatasetFileSystemDataset:format:
+   *
+   * Format of the dataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("format",
+                             "Format",
+                             "Format of the dataset",
+                             GADATASET_TYPE_FILE_FORMAT,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
+
+  /**
+   * GADatasetFileSystemDataset:file-system:
+   *
+   * File system of the dataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("file-system",
+                             "File system",
+                             "File system of the dataset",
+                             GARROW_TYPE_FILE_SYSTEM,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);
+}
+
+
+G_END_DECLS
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset)
+{
+  return gadataset_dataset_new_raw(arrow_dataset,
+                                   "dataset", arrow_dataset,
+                                   NULL);
+}
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  ...)
+{
+  va_list args;
+  va_start(args, first_property_name);
+  auto array = gadataset_dataset_new_raw_valist(arrow_dataset,
+                                                first_property_name,
+                                                args);
+  va_end(args);
+  return array;
+}
+
+GADatasetDataset *
+gadataset_dataset_new_raw_valist(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  va_list args)
+{
+  GType type = GADATASET_TYPE_DATASET;
+  const auto type_name = (*arrow_dataset)->type_name();
+  if (type_name == "filesystem") {
+    type = GADATASET_TYPE_FILE_SYSTEM_DATASET;
+  }
+  return GADATASET_DATASET(g_object_new_valist(type,
+                                               first_property_name,
+                                               args));
+}
+
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(dataset);
+  return priv->dataset;
+}
diff --git a/c_glib/arrow-dataset-glib/dataset.h b/c_glib/arrow-dataset-glib/dataset.h
new file mode 100644
index 00000000000..97cf35d74d7
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset.h
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/file-format.h>
+
+G_BEGIN_DECLS
+
+typedef struct _GADatasetScannerBuilder GADatasetScannerBuilder;
+
+#define GADATASET_TYPE_DATASET (gadataset_dataset_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetDataset,
+                         gadataset_dataset,
+                         GADATASET,
+                         DATASET,
+                         GObject)
+struct _GADatasetDatasetClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetScannerBuilder *
+gadataset_dataset_begin_scan(GADatasetDataset *dataset,
+                             GError **error);
+GARROW_AVAILABLE_IN_5_0
+GArrowTable *
+gadataset_dataset_to_table(GADatasetDataset *dataset,
+                           GError **error);
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gadataset_dataset_get_type_name(GADatasetDataset *dataset);
+
+
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET      \
+  (gadataset_file_system_dataset_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset,
+                         gadataset_file_system_dataset,
+                         GADATASET,
+                         FILE_SYSTEM_DATASET,
+                         GADatasetDataset)
+struct _GADatasetFileSystemDatasetClass
+{
+  GADatasetDatasetClass parent_class;
+};
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/dataset.hpp b/c_glib/arrow-dataset-glib/dataset.hpp
new file mode 100644
index 00000000000..94dddd2eb7a
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset.hpp
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/dataset.h>
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset);
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  ...);
+GADatasetDataset *
+gadataset_dataset_new_raw_valist(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  va_list arg);
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset);
+
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
+  std::shared_ptr<arrow::dataset::FileFormat> *arrow_format);
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset);
+
+
diff --git a/c_glib/arrow-dataset-glib/file-format.cpp b/c_glib/arrow-dataset-glib/file-format.cpp
index 7f10c9debbe..43f6a198f23 100644
--- a/c_glib/arrow-dataset-glib/file-format.cpp
+++ b/c_glib/arrow-dataset-glib/file-format.cpp
@@ -29,56 +29,57 @@ G_BEGIN_DECLS
  * @title: File format classes
  * @include: arrow-dataset-glib/arrow-dataset-glib.h
  *
- * #GADFileFormat is a base class for file format classes.
+ * #GADatasetFileFormat is a base class for file format classes.
  *
- * #GADCSVFileFormat is a class for CSV file format.
+ * #GADatasetCSVFileFormat is a class for CSV file format.
  *
- * #GADIPCFileFormat is a class for IPC file format.
+ * #GADatasetIPCFileFormat is a class for IPC file format.
  *
- * #GADParquetFileFormat is a class for Parquet file format.
+ * #GADatasetParquetFileFormat is a class for Parquet file format.
  *
- * * Since: 3.0.0
+ * Since: 3.0.0
  */
 
-typedef struct GADFileFormatPrivate_ {
+typedef struct GADatasetFileFormatPrivate_ {
   std::shared_ptr<arrow::dataset::FileFormat> file_format;
-} GADFileFormatPrivate;
+} GADatasetFileFormatPrivate;
 
 enum {
   PROP_FILE_FORMAT = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADFileFormat,
-                           gad_file_format,
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileFormat,
+                           gadataset_file_format,
                            G_TYPE_OBJECT)
 
-#define GAD_FILE_FORMAT_GET_PRIVATE(obj)        \
-  static_cast<GADFileFormatPrivate *>(          \
-    gad_file_format_get_instance_private(       \
-      GAD_FILE_FORMAT(obj)))
+#define GADATASET_FILE_FORMAT_GET_PRIVATE(obj)        \
+  static_cast<GADatasetFileFormatPrivate *>(          \
+    gadataset_file_format_get_instance_private(       \
+      GADATASET_FILE_FORMAT(obj)))
 
 static void
-gad_file_format_finalize(GObject *object)
+gadataset_file_format_finalize(GObject *object)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(object);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
 
   priv->file_format.~shared_ptr();
 
-  G_OBJECT_CLASS(gad_file_format_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gadataset_file_format_parent_class)->finalize(object);
 }
 
 static void
-gad_file_format_set_property(GObject *object,
-                             guint prop_id,
-                             const GValue *value,
-                             GParamSpec *pspec)
+gadataset_file_format_set_property(GObject *object,
+                                   guint prop_id,
+                                   const GValue *value,
+                                   GParamSpec *pspec)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(object);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_FILE_FORMAT:
     priv->file_format =
-      *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -87,19 +88,19 @@ gad_file_format_set_property(GObject *object,
 }
 
 static void
-gad_file_format_init(GADFileFormat *object)
+gadataset_file_format_init(GADatasetFileFormat *object)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(object);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
   new(&priv->file_format) std::shared_ptr<arrow::dataset::FileFormat>;
 }
 
 static void
-gad_file_format_class_init(GADFileFormatClass *klass)
+gadataset_file_format_class_init(GADatasetFileFormatClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gad_file_format_finalize;
-  gobject_class->set_property = gad_file_format_set_property;
+  gobject_class->finalize     = gadataset_file_format_finalize;
+  gobject_class->set_property = gadataset_file_format_set_property;
 
   GParamSpec *spec;
   spec = g_param_spec_pointer("file-format",
@@ -111,8 +112,8 @@ gad_file_format_class_init(GADFileFormatClass *klass)
 }
 
 /**
- * gad_file_format_get_type_name:
- * @file_format: A #GADFileFormat.
+ * gadataset_file_format_get_type_name:
+ * @file_format: A #GADatasetFileFormat.
  *
  * Returns: The type name of @file_format.
  *
@@ -121,145 +122,149 @@ gad_file_format_class_init(GADFileFormatClass *klass)
  * Since: 3.0.0
  */
 gchar *
-gad_file_format_get_type_name(GADFileFormat *file_format)
+gadataset_file_format_get_type_name(GADatasetFileFormat *file_format)
 {
-  const auto arrow_file_format = gad_file_format_get_raw(file_format);
+  const auto arrow_file_format = gadataset_file_format_get_raw(file_format);
   const auto &type_name = arrow_file_format->type_name();
   return g_strndup(type_name.data(), type_name.size());
 }
 
 /**
- * gad_file_format_equal:
- * @file_format: A #GADFileFormat.
- * @other_file_format: A #GADFileFormat to be compared.
+ * gadataset_file_format_equal:
+ * @file_format: A #GADatasetFileFormat.
+ * @other_file_format: A #GADatasetFileFormat to be compared.
  *
  * Returns: %TRUE if they are the same content file format, %FALSE otherwise.
  *
  * Since: 3.0.0
  */
 gboolean
-gad_file_format_equal(GADFileFormat *file_format,
-                      GADFileFormat *other_file_format)
+gadataset_file_format_equal(GADatasetFileFormat *file_format,
+                      GADatasetFileFormat *other_file_format)
 {
-  const auto arrow_file_format = gad_file_format_get_raw(file_format);
-  const auto arrow_other_file_format = gad_file_format_get_raw(other_file_format);
+  const auto arrow_file_format = gadataset_file_format_get_raw(file_format);
+  const auto arrow_other_file_format =
+    gadataset_file_format_get_raw(other_file_format);
   return arrow_file_format->Equals(*arrow_other_file_format);
 }
 
 
-G_DEFINE_TYPE(GADCSVFileFormat,
-              gad_csv_file_format,
-              GAD_TYPE_FILE_FORMAT)
+G_DEFINE_TYPE(GADatasetCSVFileFormat,
+              gadataset_csv_file_format,
+              GADATASET_TYPE_FILE_FORMAT)
 
 static void
-gad_csv_file_format_init(GADCSVFileFormat *object)
+gadataset_csv_file_format_init(GADatasetCSVFileFormat *object)
 {
 }
 
 static void
-gad_csv_file_format_class_init(GADCSVFileFormatClass *klass)
+gadataset_csv_file_format_class_init(GADatasetCSVFileFormatClass *klass)
 {
 }
 
 /**
- * gad_csv_file_format_new:
+ * gadataset_csv_file_format_new:
  *
  * Returns: The newly created CSV file format.
  *
  * Since: 3.0.0
  */
-GADCSVFileFormat *
-gad_csv_file_format_new(void)
+GADatasetCSVFileFormat *
+gadataset_csv_file_format_new(void)
 {
   std::shared_ptr<arrow::dataset::FileFormat> arrow_file_format =
     std::make_shared<arrow::dataset::CsvFileFormat>();
-  return GAD_CSV_FILE_FORMAT(gad_file_format_new_raw(&arrow_file_format));
+  return GADATASET_CSV_FILE_FORMAT(
+    gadataset_file_format_new_raw(&arrow_file_format));
 }
 
 
-G_DEFINE_TYPE(GADIPCFileFormat,
-              gad_ipc_file_format,
-              GAD_TYPE_FILE_FORMAT)
+G_DEFINE_TYPE(GADatasetIPCFileFormat,
+              gadataset_ipc_file_format,
+              GADATASET_TYPE_FILE_FORMAT)
 
 static void
-gad_ipc_file_format_init(GADIPCFileFormat *object)
+gadataset_ipc_file_format_init(GADatasetIPCFileFormat *object)
 {
 }
 
 static void
-gad_ipc_file_format_class_init(GADIPCFileFormatClass *klass)
+gadataset_ipc_file_format_class_init(GADatasetIPCFileFormatClass *klass)
 {
 }
 
 /**
- * gad_ipc_file_format_new:
+ * gadataset_ipc_file_format_new:
  *
  * Returns: The newly created IPC file format.
  *
  * Since: 3.0.0
  */
-GADIPCFileFormat *
-gad_ipc_file_format_new(void)
+GADatasetIPCFileFormat *
+gadataset_ipc_file_format_new(void)
 {
   std::shared_ptr<arrow::dataset::FileFormat> arrow_file_format =
     std::make_shared<arrow::dataset::IpcFileFormat>();
-  return GAD_IPC_FILE_FORMAT(gad_file_format_new_raw(&arrow_file_format));
+  return GADATASET_IPC_FILE_FORMAT(
+    gadataset_file_format_new_raw(&arrow_file_format));
 }
 
 
-G_DEFINE_TYPE(GADParquetFileFormat,
-              gad_parquet_file_format,
-              GAD_TYPE_FILE_FORMAT)
+G_DEFINE_TYPE(GADatasetParquetFileFormat,
+              gadataset_parquet_file_format,
+              GADATASET_TYPE_FILE_FORMAT)
 
 static void
-gad_parquet_file_format_init(GADParquetFileFormat *object)
+gadataset_parquet_file_format_init(GADatasetParquetFileFormat *object)
 {
 }
 
 static void
-gad_parquet_file_format_class_init(GADParquetFileFormatClass *klass)
+gadataset_parquet_file_format_class_init(GADatasetParquetFileFormatClass *klass)
 {
 }
 
 /**
- * gad_parquet_file_format_new:
+ * gadataset_parquet_file_format_new:
  *
  * Returns: The newly created Parquet file format.
  *
  * Since: 3.0.0
  */
-GADParquetFileFormat *
-gad_parquet_file_format_new(void)
+GADatasetParquetFileFormat *
+gadataset_parquet_file_format_new(void)
 {
   std::shared_ptr<arrow::dataset::FileFormat> arrow_file_format =
     std::make_shared<arrow::dataset::ParquetFileFormat>();
-  return GAD_PARQUET_FILE_FORMAT(gad_file_format_new_raw(&arrow_file_format));
+  return GADATASET_PARQUET_FILE_FORMAT(
+    gadataset_file_format_new_raw(&arrow_file_format));
 }
 
 
 G_END_DECLS
 
-GADFileFormat *
-gad_file_format_new_raw(
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
   std::shared_ptr<arrow::dataset::FileFormat> *arrow_file_format)
 {
-  GType type = GAD_TYPE_FILE_FORMAT;
+  GType type = GADATASET_TYPE_FILE_FORMAT;
   const auto &type_name = (*arrow_file_format)->type_name();
   if (type_name == "csv") {
-    type = GAD_TYPE_CSV_FILE_FORMAT;
+    type = GADATASET_TYPE_CSV_FILE_FORMAT;
   } else if (type_name == "ipc") {
-    type = GAD_TYPE_IPC_FILE_FORMAT;
+    type = GADATASET_TYPE_IPC_FILE_FORMAT;
   } else if (type_name == "parquet") {
-    type = GAD_TYPE_PARQUET_FILE_FORMAT;
+    type = GADATASET_TYPE_PARQUET_FILE_FORMAT;
   }
-  return GAD_FILE_FORMAT(g_object_new(type,
-                                      "file-format", arrow_file_format,
-                                      NULL));
+  return GADATASET_FILE_FORMAT(g_object_new(type,
+                                            "file-format", arrow_file_format,
+                                            NULL));
 }
 
 std::shared_ptr<arrow::dataset::FileFormat>
-gad_file_format_get_raw(GADFileFormat *file_format)
+gadataset_file_format_get_raw(GADatasetFileFormat *file_format)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(file_format);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(file_format);
   return priv->file_format;
 }
diff --git a/c_glib/arrow-dataset-glib/file-format.h b/c_glib/arrow-dataset-glib/file-format.h
index f77addc8da6..7a6f46f56e9 100644
--- a/c_glib/arrow-dataset-glib/file-format.h
+++ b/c_glib/arrow-dataset-glib/file-format.h
@@ -23,70 +23,71 @@
 
 G_BEGIN_DECLS
 
-#define GAD_TYPE_FILE_FORMAT (gad_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADFileFormat,
-                         gad_file_format,
-                         GAD,
+#define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileFormat,
+                         gadataset_file_format,
+                         GADATASET,
                          FILE_FORMAT,
                          GObject)
-struct _GADFileFormatClass
+struct _GADatasetFileFormatClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
 gchar *
-gad_file_format_get_type_name(GADFileFormat *file_format);
+gadataset_file_format_get_type_name(GADatasetFileFormat *file_format);
 
 GARROW_AVAILABLE_IN_3_0
 gboolean
-gad_file_format_equal(GADFileFormat *file_format,
-                      GADFileFormat *other_file_format);
+gadataset_file_format_equal(GADatasetFileFormat *file_format,
+                            GADatasetFileFormat *other_file_format);
 
 
-#define GAD_TYPE_CSV_FILE_FORMAT (gad_csv_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADCSVFileFormat,
-                         gad_csv_file_format,
-                         GAD,
+#define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat,
+                         gadataset_csv_file_format,
+                         GADATASET,
                          CSV_FILE_FORMAT,
-                         GADFileFormat)
-struct _GADCSVFileFormatClass
+                         GADatasetFileFormat)
+struct _GADatasetCSVFileFormatClass
 {
-  GADFileFormatClass parent_class;
+  GADatasetFileFormatClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADCSVFileFormat *gad_csv_file_format_new(void);
+GADatasetCSVFileFormat *gadataset_csv_file_format_new(void);
 
 
-#define GAD_TYPE_IPC_FILE_FORMAT (gad_ipc_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADIPCFileFormat,
-                         gad_ipc_file_format,
-                         GAD,
+#define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat,
+                         gadataset_ipc_file_format,
+                         GADATASET,
                          IPC_FILE_FORMAT,
-                         GADFileFormat)
-struct _GADIPCFileFormatClass
+                         GADatasetFileFormat)
+struct _GADatasetIPCFileFormatClass
 {
-  GADFileFormatClass parent_class;
+  GADatasetFileFormatClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADIPCFileFormat *gad_ipc_file_format_new(void);
+GADatasetIPCFileFormat *gadataset_ipc_file_format_new(void);
 
 
-#define GAD_TYPE_PARQUET_FILE_FORMAT (gad_parquet_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADParquetFileFormat,
-                         gad_parquet_file_format,
-                         GAD,
+#define GADATASET_TYPE_PARQUET_FILE_FORMAT      \
+  (gadataset_parquet_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat,
+                         gadataset_parquet_file_format,
+                         GADATASET,
                          PARQUET_FILE_FORMAT,
-                         GADFileFormat)
-struct _GADParquetFileFormatClass
+                         GADatasetFileFormat)
+struct _GADatasetParquetFileFormatClass
 {
-  GADFileFormatClass parent_class;
+  GADatasetFileFormatClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADParquetFileFormat *gad_parquet_file_format_new(void);
+GADatasetParquetFileFormat *gadataset_parquet_file_format_new(void);
 
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/file-format.hpp b/c_glib/arrow-dataset-glib/file-format.hpp
index e7e73f4ed98..5dfb20b3caa 100644
--- a/c_glib/arrow-dataset-glib/file-format.hpp
+++ b/c_glib/arrow-dataset-glib/file-format.hpp
@@ -23,8 +23,8 @@
 
 #include <arrow-dataset-glib/file-format.h>
 
-GADFileFormat *
-gad_file_format_new_raw(
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
   std::shared_ptr<arrow::dataset::FileFormat> *arrow_file_format);
 std::shared_ptr<arrow::dataset::FileFormat>
-gad_file_format_get_raw(GADFileFormat *file_format);
+gadataset_file_format_get_raw(GADatasetFileFormat *file_format);
diff --git a/c_glib/arrow-dataset-glib/fragment.cpp b/c_glib/arrow-dataset-glib/fragment.cpp
index 515a370d8e6..f2f0cd1c3e9 100644
--- a/c_glib/arrow-dataset-glib/fragment.cpp
+++ b/c_glib/arrow-dataset-glib/fragment.cpp
@@ -30,54 +30,55 @@ G_BEGIN_DECLS
  * @title: Fragment classes
  * @include: arrow-dataset-glib/arrow-dataset-glib.h
  *
- * #GADFragment is a base class for all fragment classes.
+ * #GADatasetFragment is a base class for all fragment classes.
  *
- * #GADInMemoryFragment is a class for in-memory fragment.
+ * #GADatasetInMemoryFragment is a class for in-memory fragment.
  *
  * Since: 4.0.0
  */
 
 /* arrow::dataset::Fragment */
 
-typedef struct GADFragmentPrivate_ {
+typedef struct GADatasetFragmentPrivate_ {
   std::shared_ptr<arrow::dataset::Fragment> fragment;
-} GADFragmentPrivate;
+} GADatasetFragmentPrivate;
 
 enum {
   PROP_FRAGMENT = 1,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADFragment,
-                                    gad_fragment,
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetFragment,
+                                    gadataset_fragment,
                                     G_TYPE_OBJECT)
 
-#define GAD_FRAGMENT_GET_PRIVATE(obj)           \
-  static_cast<GADFragmentPrivate *>(            \
-    gad_fragment_get_instance_private(          \
-      GAD_FRAGMENT(obj)))
+#define GADATASET_FRAGMENT_GET_PRIVATE(obj)           \
+  static_cast<GADatasetFragmentPrivate *>(            \
+    gadataset_fragment_get_instance_private(          \
+      GADATASET_FRAGMENT(obj)))
 
 static void
-gad_fragment_finalize(GObject *object)
+gadataset_fragment_finalize(GObject *object)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(object);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
 
   priv->fragment.~shared_ptr();
 
-  G_OBJECT_CLASS(gad_fragment_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gadataset_fragment_parent_class)->finalize(object);
 }
 
 static void
-gad_fragment_set_property(GObject *object,
-                          guint prop_id,
-                          const GValue *value,
-                          GParamSpec *pspec)
+gadataset_fragment_set_property(GObject *object,
+                                guint prop_id,
+                                const GValue *value,
+                                GParamSpec *pspec)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(object);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_FRAGMENT:
     priv->fragment =
-      *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -86,19 +87,19 @@ gad_fragment_set_property(GObject *object,
 }
 
 static void
-gad_fragment_init(GADFragment *object)
+gadataset_fragment_init(GADatasetFragment *object)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(object);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
   new(&priv->fragment) std::shared_ptr<arrow::dataset::Fragment>;
 }
 
 static void
-gad_fragment_class_init(GADFragmentClass *klass)
+gadataset_fragment_class_init(GADatasetFragmentClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gad_fragment_finalize;
-  gobject_class->set_property = gad_fragment_set_property;
+  gobject_class->finalize     = gadataset_fragment_finalize;
+  gobject_class->set_property = gadataset_fragment_set_property;
 
   GParamSpec *spec;
   spec = g_param_spec_pointer("fragment",
@@ -111,35 +112,35 @@ gad_fragment_class_init(GADFragmentClass *klass)
 
 /* arrow::dataset::InMemoryFragment */
 
-G_DEFINE_TYPE(GADInMemoryFragment,
-              gad_in_memory_fragment,
-              GAD_TYPE_FRAGMENT)
+G_DEFINE_TYPE(GADatasetInMemoryFragment,
+              gadataset_in_memory_fragment,
+              GADATASET_TYPE_FRAGMENT)
 
 static void
-gad_in_memory_fragment_init(GADInMemoryFragment *object)
+gadataset_in_memory_fragment_init(GADatasetInMemoryFragment *object)
 {
 }
 
 static void
-gad_in_memory_fragment_class_init(GADInMemoryFragmentClass *klass)
+gadataset_in_memory_fragment_class_init(GADatasetInMemoryFragmentClass *klass)
 {
 }
 
 /**
- * gad_in_memory_fragment_new:
+ * gadataset_in_memory_fragment_new:
  * @schema: A #GArrowSchema.
  * @record_batches: (array length=n_record_batches):
  *   (element-type GArrowRecordBatch): The record batches of the table.
  * @n_record_batches: The number of record batches.
  *
- * Returns: A newly created #GADInMemoryFragment.
+ * Returns: A newly created #GADatasetInMemoryFragment.
  *
  * Since: 4.0.0
  */
-GADInMemoryFragment *
-gad_in_memory_fragment_new(GArrowSchema *schema,
-                           GArrowRecordBatch **record_batches,
-                           gsize n_record_batches)
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new(GArrowSchema *schema,
+                                 GArrowRecordBatch **record_batches,
+                                 gsize n_record_batches)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
@@ -151,34 +152,36 @@ gad_in_memory_fragment_new(GArrowSchema *schema,
   auto arrow_in_memory_fragment =
     std::make_shared<arrow::dataset::InMemoryFragment>(arrow_schema,
                                                        arrow_record_batches);
-  return gad_in_memory_fragment_new_raw(&arrow_in_memory_fragment);
+  return gadataset_in_memory_fragment_new_raw(&arrow_in_memory_fragment);
 }
 
 G_END_DECLS
 
-GADFragment *
-gad_fragment_new_raw(std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment)
+GADatasetFragment *
+gadataset_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment)
 {
   auto fragment =
-    GAD_FRAGMENT(g_object_new(GAD_TYPE_FRAGMENT,
-                              "fragment", arrow_fragment,
-                              NULL));
+    GADATASET_FRAGMENT(g_object_new(GADATASET_TYPE_FRAGMENT,
+                                    "fragment", arrow_fragment,
+                                    NULL));
   return fragment;
 }
 
 std::shared_ptr<arrow::dataset::Fragment>
-gad_fragment_get_raw(GADFragment *fragment)
+gadataset_fragment_get_raw(GADatasetFragment *fragment)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(fragment);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(fragment);
   return priv->fragment;
 }
 
-GADInMemoryFragment *
-gad_in_memory_fragment_new_raw(std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment)
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment)
 {
   auto fragment =
-    GAD_IN_MEMORY_FRAGMENT(g_object_new(GAD_TYPE_IN_MEMORY_FRAGMENT,
-                                        "fragment", arrow_fragment,
-                                        NULL));
+    GADATASET_IN_MEMORY_FRAGMENT(g_object_new(GADATASET_TYPE_IN_MEMORY_FRAGMENT,
+                                              "fragment", arrow_fragment,
+                                              NULL));
   return fragment;
 }
diff --git a/c_glib/arrow-dataset-glib/fragment.h b/c_glib/arrow-dataset-glib/fragment.h
index c0ee8769db1..9376b6cf3ee 100644
--- a/c_glib/arrow-dataset-glib/fragment.h
+++ b/c_glib/arrow-dataset-glib/fragment.h
@@ -25,34 +25,35 @@ G_BEGIN_DECLS
 
 /* arrow::dataset::Fragment */
 
-#define GAD_TYPE_FRAGMENT (gad_fragment_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADFragment,
-                         gad_fragment,
-                         GAD,
+#define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFragment,
+                         gadataset_fragment,
+                         GADATASET,
                          FRAGMENT,
                          GObject)
-struct _GADFragmentClass
+struct _GADatasetFragmentClass
 {
   GObjectClass parent_class;
 };
 
 /* arrow::dataset::InMemoryFragment */
 
-#define GAD_TYPE_IN_MEMORY_FRAGMENT (gad_in_memory_fragment_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADInMemoryFragment,
-                         gad_in_memory_fragment,
-                         GAD,
+#define GADATASET_TYPE_IN_MEMORY_FRAGMENT       \
+  (gadataset_in_memory_fragment_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment,
+                         gadataset_in_memory_fragment,
+                         GADATASET,
                          IN_MEMORY_FRAGMENT,
-                         GADFragment)
-struct _GADInMemoryFragmentClass
+                         GADatasetFragment)
+struct _GADatasetInMemoryFragmentClass
 {
-  GADFragmentClass parent_class;
+  GADatasetFragmentClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_4_0
-GADInMemoryFragment *
-gad_in_memory_fragment_new(GArrowSchema *schema,
-                           GArrowRecordBatch **record_batches,
-                           gsize n_record_batches);
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new(GArrowSchema *schema,
+                                 GArrowRecordBatch **record_batches,
+                                 gsize n_record_batches);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/fragment.hpp b/c_glib/arrow-dataset-glib/fragment.hpp
index 441b7c99cb8..904f8365396 100644
--- a/c_glib/arrow-dataset-glib/fragment.hpp
+++ b/c_glib/arrow-dataset-glib/fragment.hpp
@@ -24,10 +24,12 @@
 #include <arrow-dataset-glib/fragment.h>
 
 std::shared_ptr<arrow::dataset::Fragment>
-gad_fragment_get_raw(GADFragment *fragment);
+gadataset_fragment_get_raw(GADatasetFragment *fragment);
 
-GADFragment*
-gad_fragment_new_raw(std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment);
+GADatasetFragment*
+gadataset_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment);
 
-GADInMemoryFragment*
-gad_in_memory_fragment_new_raw(std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment);
+GADatasetInMemoryFragment*
+gadataset_in_memory_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment);
diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build
index 83b57504f81..b3f617330cf 100644
--- a/c_glib/arrow-dataset-glib/meson.build
+++ b/c_glib/arrow-dataset-glib/meson.build
@@ -18,6 +18,8 @@
 # under the License.
 
 sources = files(
+  'dataset-factory.cpp',
+  'dataset.cpp',
   'file-format.cpp',
   'fragment.cpp',
   'scanner.cpp',
@@ -25,6 +27,8 @@ sources = files(
 
 c_headers = files(
   'arrow-dataset-glib.h',
+  'dataset-factory.h',
+  'dataset.h',
   'file-format.h',
   'fragment.h',
   'scanner.h',
@@ -32,6 +36,8 @@ c_headers = files(
 
 cpp_headers = files(
   'arrow-dataset-glib.hpp',
+  'dataset-factory.hpp',
+  'dataset.hpp',
   'file-format.hpp',
   'fragment.hpp',
   'scanner.hpp',
@@ -68,8 +74,8 @@ if have_gi
                      sources: sources + c_headers,
                      namespace: 'ArrowDataset',
                      nsversion: api_version,
-                     identifier_prefix: 'GAD',
-                     symbol_prefix: 'gad',
+                     identifier_prefix: 'GADataset',
+                     symbol_prefix: 'gadataset',
                      export_packages: 'arrow-dataset-glib',
                      includes: [
                        'Arrow-1.0',
diff --git a/c_glib/arrow-dataset-glib/scanner.cpp b/c_glib/arrow-dataset-glib/scanner.cpp
index 36701ca373a..efa2a5c3287 100644
--- a/c_glib/arrow-dataset-glib/scanner.cpp
+++ b/c_glib/arrow-dataset-glib/scanner.cpp
@@ -17,13 +17,10 @@
  * under the License.
  */
 
-#include <arrow/util/iterator.h>
-
 #include <arrow-glib/error.hpp>
-#include <arrow-glib/record-batch.hpp>
-#include <arrow-glib/schema.hpp>
+#include <arrow-glib/table.hpp>
 
-#include <arrow-dataset-glib/fragment.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
 #include <arrow-dataset-glib/scanner.hpp>
 
 G_BEGIN_DECLS
@@ -31,70 +28,54 @@ G_BEGIN_DECLS
 /**
  * SECTION: scanner
  * @section_id: scanner
- * @title: Scanner classes
+ * @title: Scanner related classes
  * @include: arrow-dataset-glib/arrow-dataset-glib.h
  *
- * #GADScanOptions is a class for a set of scan options.
- *
- * #GADScanTask is an abstract class for a scan task.
+ * #GADatasetScanner is a class for scanning dataset.
  *
- * #GADInMemoryScanTask is a class for a scan task of record batches.
+ * #GADatasetScannerBuilder is a class for building a scanner.
  *
- * Since: 1.0.0
+ * Since: 5.0.0
  */
 
-/* arrow::dataset::ScanOptions */
-
-typedef struct GADScanOptionsPrivate_ {
-  std::shared_ptr<arrow::dataset::ScanOptions> scan_options;
-} GADScanOptionsPrivate;
+typedef struct GADatasetScannerPrivate_ {
+  std::shared_ptr<arrow::dataset::Scanner> scanner;
+} GADatasetScannerPrivate;
 
 enum {
-  PROP_SCAN_OPTIONS = 1,
-  PROP_FILTER,
-  PROP_EVALUATOR,
-  PROP_PROJECTOR,
-  PROP_BATCH_SIZE,
-  PROP_USE_THREADS,
+  PROP_SCANNER = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADScanOptions,
-                           gad_scan_options,
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanner,
+                           gadataset_scanner,
                            G_TYPE_OBJECT)
 
-#define GAD_SCAN_OPTIONS_GET_PRIVATE(obj)       \
-  static_cast<GADScanOptionsPrivate *>(         \
-    gad_scan_options_get_instance_private(      \
-      GAD_SCAN_OPTIONS(obj)))
+#define GADATASET_SCANNER_GET_PRIVATE(obj)        \
+  static_cast<GADatasetScannerPrivate *>(         \
+    gadataset_scanner_get_instance_private(       \
+      GADATASET_SCANNER(obj)))
 
 static void
-gad_scan_options_finalize(GObject *object)
+gadataset_scanner_finalize(GObject *object)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
-
-  priv->scan_options.~shared_ptr();
-
-  G_OBJECT_CLASS(gad_scan_options_parent_class)->finalize(object);
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+  priv->scanner.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_scanner_parent_class)->finalize(object);
 }
 
 static void
-gad_scan_options_set_property(GObject *object,
-                              guint prop_id,
-                              const GValue *value,
-                              GParamSpec *pspec)
+gadataset_scanner_set_property(GObject *object,
+                               guint prop_id,
+                               const GValue *value,
+                               GParamSpec *pspec)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_SCAN_OPTIONS:
-    priv->scan_options =
-      *static_cast<std::shared_ptr<arrow::dataset::ScanOptions> *>(g_value_get_pointer(value));
-    break;
-  case PROP_BATCH_SIZE:
-    priv->scan_options->batch_size = g_value_get_int64(value);
-    break;
-  case PROP_USE_THREADS:
-    priv->scan_options->use_threads = g_value_get_boolean(value);
+  case PROP_SCANNER:
+    priv->scanner =
+      *static_cast<std::shared_ptr<arrow::dataset::Scanner> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -103,212 +84,91 @@ gad_scan_options_set_property(GObject *object,
 }
 
 static void
-gad_scan_options_get_property(GObject *object,
-                              guint prop_id,
-                              GValue *value,
-                              GParamSpec *pspec)
+gadataset_scanner_init(GADatasetScanner *object)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_BATCH_SIZE:
-    g_value_set_int64(value, priv->scan_options->batch_size);
-    break;
-  case PROP_USE_THREADS:
-    g_value_set_boolean(value, priv->scan_options->use_threads);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+  new(&priv->scanner) std::shared_ptr<arrow::dataset::Scanner>;
 }
 
 static void
-gad_scan_options_init(GADScanOptions *object)
+gadataset_scanner_class_init(GADatasetScannerClass *klass)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
-  new(&priv->scan_options) std::shared_ptr<arrow::dataset::ScanOptions>;
-}
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = gadataset_scanner_finalize;
+  gobject_class->set_property = gadataset_scanner_set_property;
 
-static void
-gad_scan_options_class_init(GADScanOptionsClass *klass)
-{
-  GObjectClass *gobject_class;
   GParamSpec *spec;
-
-  gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize     = gad_scan_options_finalize;
-  gobject_class->set_property = gad_scan_options_set_property;
-  gobject_class->get_property = gad_scan_options_get_property;
-
-  auto scan_options = std::make_shared<arrow::dataset::ScanOptions>();
-
-  spec = g_param_spec_pointer("scan-options",
-                              "ScanOptions",
-                              "The raw std::shared<arrow::dataset::ScanOptions> *",
+  spec = g_param_spec_pointer("scanner",
+                              "Scanner",
+                              "The raw std::shared<arrow::dataset::Scanner> *",
                               static_cast<GParamFlags>(G_PARAM_WRITABLE |
                                                        G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_SCAN_OPTIONS, spec);
-
-  // TODO: PROP_FILTER
-  // TODO: PROP_EVALUATOR
-  // TODO: PROP_PROJECTOR
-
-  /**
-   * GADScanOptions:batch-size:
-   *
-   * Maximum row count for scanned batches.
-   *
-   * Since: 1.0.0
-   */
-  spec = g_param_spec_int64("batch-size",
-                            "Batch size",
-                            "Maximum row count for scanned batches",
-                            0,
-                            G_MAXINT64,
-                            scan_options->batch_size,
-                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_BATCH_SIZE, spec);
-
-  /**
-   * GADScanOptions:use-threads:
-   *
-   * Indicate if the Scanner should make use of a ThreadPool.
-   *
-   * Since: 4.0.0
-   */
-  spec = g_param_spec_boolean("use-threads",
-                              "Use threads",
-                              "Indicate if the Scanner should make use of a ThreadPool",
-                              scan_options->use_threads,
-                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_USE_THREADS, spec);
+  g_object_class_install_property(gobject_class, PROP_SCANNER, spec);
 }
 
 /**
- * gad_scan_options_new:
- * @schema: A #GArrowSchema.
- *
- * Returns: A newly created #GADScanOptions.
- *
- * Since: 1.0.0
- */
-GADScanOptions *
-gad_scan_options_new(GArrowSchema *schema)
-{
-  auto arrow_schema = garrow_schema_get_raw(schema);
-  auto arrow_scan_options = std::make_shared<arrow::dataset::ScanOptions>();
-  arrow_scan_options->dataset_schema = arrow_schema;
-  return gad_scan_options_new_raw(&arrow_scan_options);
-}
-
-/**
- * gad_scan_options_get_schema:
- * @scan_options: A #GADScanOptions.
+ * gadataset_scanner_to_table:
+ * @scanner: A #GADatasetScanner.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (transfer full): A #GArrowSchema.
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GArrowTable on success, %NULL on error.
  *
- * Since: 1.0.0
+ * Since: 5.0.0
  */
-GArrowSchema *
-gad_scan_options_get_schema(GADScanOptions *scan_options)
+GArrowTable *
+gadataset_scanner_to_table(GADatasetScanner *scanner,
+                           GError **error)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(scan_options);
-  auto arrow_schema = priv->scan_options->dataset_schema;
-  return garrow_schema_new_raw(&arrow_schema);
+  auto arrow_scanner = gadataset_scanner_get_raw(scanner);
+  auto arrow_table_result = arrow_scanner->ToTable();
+  if (garrow::check(error, arrow_table_result, "[scanner][to-table]")) {
+    auto arrow_table = *arrow_table_result;
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
 }
 
-/* arrow::dataset::ScanTask */
 
-typedef struct GADScanTaskPrivate_ {
-  std::shared_ptr<arrow::dataset::ScanTask> scan_task;
-  GADScanOptions *options;
-  GADFragment *fragment;
-} GADScanTaskPrivate;
+typedef struct GADatasetScannerBuilderPrivate_ {
+  std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder;
+} GADatasetScannerBuilderPrivate;
 
 enum {
-  PROP_SCAN_TASK = 1,
-  PROP_OPTIONS,
-  PROP_FRAGMENT,
+  PROP_SCANNER_BUILDER = 1,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADScanTask,
-                                    gad_scan_task,
-                                    G_TYPE_OBJECT)
-
-#define GAD_SCAN_TASK_GET_PRIVATE(obj)          \
-  static_cast<GADScanTaskPrivate *>(            \
-    gad_scan_task_get_instance_private(         \
-      GAD_SCAN_TASK(obj)))
-
-static void
-gad_scan_task_dispose(GObject *object)
-{
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
-
-  if (priv->options) {
-    g_object_unref(priv->options);
-    priv->options = NULL;
-  }
-
-  if (priv->fragment) {
-    g_object_unref(priv->fragment);
-    priv->fragment = NULL;
-  }
-
-  G_OBJECT_CLASS(gad_scan_task_parent_class)->dispose(object);
-}
-
-static void
-gad_scan_task_finalize(GObject *object)
-{
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
-
-  priv->scan_task.~shared_ptr();
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScannerBuilder,
+                           gadataset_scanner_builder,
+                           G_TYPE_OBJECT)
 
-  G_OBJECT_CLASS(gad_scan_task_parent_class)->finalize(object);
-}
+#define GADATASET_SCANNER_BUILDER_GET_PRIVATE(obj)        \
+  static_cast<GADatasetScannerBuilderPrivate *>(          \
+    gadataset_scanner_builder_get_instance_private(       \
+      GADATASET_SCANNER_BUILDER(obj)))
 
 static void
-gad_scan_task_set_property(GObject *object,
-                           guint prop_id,
-                           const GValue *value,
-                           GParamSpec *pspec)
+gadataset_scanner_builder_finalize(GObject *object)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_SCAN_TASK:
-    priv->scan_task =
-      *static_cast<std::shared_ptr<arrow::dataset::ScanTask> *>(g_value_get_pointer(value));
-    break;
-  case PROP_OPTIONS:
-    priv->options = GAD_SCAN_OPTIONS(g_value_dup_object(value));
-    break;
-  case PROP_FRAGMENT:
-    priv->fragment = GAD_FRAGMENT(g_value_dup_object(value));
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+  priv->scanner_builder.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_scanner_builder_parent_class)->finalize(object);
 }
 
 static void
-gad_scan_task_get_property(GObject *object,
-                           guint prop_id,
-                           GValue *value,
-                           GParamSpec *pspec)
+gadataset_scanner_builder_set_property(GObject *object,
+                                       guint prop_id,
+                                       const GValue *value,
+                                       GParamSpec *pspec)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_OPTIONS:
-    g_value_set_object(value, priv->options);
-    break;
-  case PROP_FRAGMENT:
-    g_value_set_object(value, priv->fragment);
+  case PROP_SCANNER_BUILDER:
+    priv->scanner_builder =
+      *static_cast<std::shared_ptr<arrow::dataset::ScannerBuilder> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -317,206 +177,112 @@ gad_scan_task_get_property(GObject *object,
 }
 
 static void
-gad_scan_task_init(GADScanTask *object)
+gadataset_scanner_builder_init(GADatasetScannerBuilder *object)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
-  new(&priv->scan_task) std::shared_ptr<arrow::dataset::ScanTask>;
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+  new(&priv->scanner_builder) std::shared_ptr<arrow::dataset::ScannerBuilder>;
 }
 
 static void
-gad_scan_task_class_init(GADScanTaskClass *klass)
+gadataset_scanner_builder_class_init(GADatasetScannerBuilderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->dispose      = gad_scan_task_dispose;
-  gobject_class->finalize     = gad_scan_task_finalize;
-  gobject_class->set_property = gad_scan_task_set_property;
-  gobject_class->get_property = gad_scan_task_get_property;
+  gobject_class->finalize     = gadataset_scanner_builder_finalize;
+  gobject_class->set_property = gadataset_scanner_builder_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("scan-task",
-                              "ScanTask",
-                              "The raw std::shared<arrow::dataset::ScanTask> *",
+  spec = g_param_spec_pointer("scanner-builder",
+                              "Scanner builder",
+                              "The raw "
+                              "std::shared<arrow::dataset::ScannerBuilder> *",
                               static_cast<GParamFlags>(G_PARAM_WRITABLE |
                                                        G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_SCAN_TASK, spec);
-
-  /**
-   * GADScanTask:options:
-   *
-   * The options of the scan task.
-   *
-   * Since: 1.0.0
-   */
-  spec = g_param_spec_object("options",
-                             "Options",
-                             "The options of the scan task",
-                             GAD_TYPE_SCAN_OPTIONS,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_OPTIONS, spec);
-
-  /**
-   * GADScanTask:fragment:
-   *
-   * The fragment of the scan task.
-   *
-   * Since: 4.0.0
-   */
-  spec = g_param_spec_object("fragment",
-                             "Fragment",
-                             "The fragment of the scan task",
-                             GAD_TYPE_FRAGMENT,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_FRAGMENT, spec);
+  g_object_class_install_property(gobject_class, PROP_SCANNER_BUILDER, spec);
 }
 
 /**
- * gad_scan_task_get_options:
- * @scan_task: A #GADScanTask.
- *
- * Returns: (transfer full): A #GADScanOptions.
- *
- * Since: 1.0.0
- */
-GADScanOptions *
-gad_scan_task_get_options(GADScanTask *scan_task)
-{
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(scan_task);
-  if (priv->options) {
-    g_object_ref(priv->options);
-    return priv->options;
-  }
-
-  auto arrow_options = priv->scan_task->options();
-  return gad_scan_options_new_raw(&arrow_options);
-}
-
-/**
- * gad_scan_task_get_fragment:
- * @scan_task: A #GADFragment.
+ * gadataset_scanner_builder_new:
+ * @dataset: A #GADatasetDataset to be scanned.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (transfer full): A #GADFragment.
+ * Returns: (nullable): A newly created #GADatasetScannerBuilder on success,
+ *   %NULL on error.
  *
- * Since: 4.0.0
+ * Since: 5.0.0
  */
-GADFragment *
-gad_scan_task_get_fragment(GADScanTask *scan_task)
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(scan_task);
-  if (priv->fragment) {
-    g_object_ref(priv->fragment);
-    return priv->fragment;
+  auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+  auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+  if (garrow::check(error,
+                    arrow_scanner_builder_result,
+                    "[scanner-builder][new]")) {
+    auto arrow_scanner_builder = *arrow_scanner_builder_result;
+    return gadataset_scanner_builder_new_raw(&arrow_scanner_builder);
+  } else {
+    return NULL;
   }
-
-  auto arrow_fragment = priv->scan_task->fragment();
-  return gad_fragment_new_raw(&arrow_fragment);
 }
 
 /**
- * gad_scan_task_execute:
- * @scan_task: A #GADScanTask.
+ * gadataset_scanner_builder_finish:
+ * @builder: A #GADatasetScannerBuilder.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): A newly created #GArrowRecordBatchIterator,
- *   or %NULL on error.
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetScanner on success, %NULL on error.
  *
- * Since: 1.0.0
+ * Since: 5.0.0
  */
-GArrowRecordBatchIterator *gad_scan_task_execute(GADScanTask *scan_task,
-                                                 GError **error)
+GADatasetScanner *
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
+                                 GError **error)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(scan_task);
-  auto arrow_result = priv->scan_task->Execute();
-  if (garrow::check(error, arrow_result, "[datasets][scan-task][execute]")) {
-    auto arrow_record_batch_iteraor = std::move(*arrow_result);
-    return garrow_record_batch_iterator_new_raw(&arrow_record_batch_iteraor);
+  auto arrow_builder = gadataset_scanner_builder_get_raw(builder);
+  auto arrow_scanner_result = arrow_builder->Finish();
+  if (garrow::check(error, arrow_scanner_result, "[scanner-builder][finish]")) {
+    auto arrow_scanner = *arrow_scanner_result;
+    return gadataset_scanner_new_raw(&arrow_scanner);
   } else {
     return NULL;
   }
 }
 
-/* arrow::dataset::InMemoryScanTask */
-
-G_DEFINE_TYPE(GADInMemoryScanTask,
-              gad_in_memory_scan_task,
-              GAD_TYPE_SCAN_TASK)
-
-static void
-gad_in_memory_scan_task_init(GADInMemoryScanTask *object)
-{
-}
 
-static void
-gad_in_memory_scan_task_class_init(GADInMemoryScanTaskClass *klass)
-{
-}
+G_END_DECLS
 
-/**
- * gad_in_memory_scan_task_new:
- * @record_batches: (array length=n_record_batches):
- *   (element-type GArrowRecordBatch): The record batches of the table.
- * @n_record_batches: The number of record batches.
- * @options: A #GADScanOptions.
- * @fragment: A #GADInMemoryFragment.
- *
- * Returns: A newly created #GADInMemoryScanTask.
- *
- * Since: 1.0.0
- */
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
-                            gsize n_record_batches,
-                            GADScanOptions *options,
-                            GADInMemoryFragment *fragment)
+GADatasetScanner *
+gadataset_scanner_new_raw(
+  std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner)
 {
-  std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
-  arrow_record_batches.reserve(n_record_batches);
-  for (gsize i = 0; i < n_record_batches; ++i) {
-    auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]);
-    arrow_record_batches.push_back(arrow_record_batch);
-  }
-  auto arrow_options = gad_scan_options_get_raw(options);
-  auto arrow_fragment = gad_fragment_get_raw(GAD_FRAGMENT(fragment));
-  auto arrow_in_memory_scan_task =
-    std::make_shared<arrow::dataset::InMemoryScanTask>(arrow_record_batches,
-                                                       arrow_options,
-                                                       arrow_fragment);
-  return gad_in_memory_scan_task_new_raw(&arrow_in_memory_scan_task,
-                                         options,
-                                         fragment);
+  auto scanner =
+    GADATASET_SCANNER(g_object_new(GADATASET_TYPE_SCANNER,
+                                   "scanner", arrow_scanner,
+                                   NULL));
+  return scanner;
 }
 
-G_END_DECLS
-
-GADScanOptions *
-gad_scan_options_new_raw(std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options)
+std::shared_ptr<arrow::dataset::Scanner>
+gadataset_scanner_get_raw(GADatasetScanner *scanner)
 {
-  auto scan_options =
-    GAD_SCAN_OPTIONS(g_object_new(GAD_TYPE_SCAN_OPTIONS,
-                                  "scan-options", arrow_scan_options,
-                                  NULL));
-  return scan_options;
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(scanner);
+  return priv->scanner;
 }
 
-std::shared_ptr<arrow::dataset::ScanOptions>
-gad_scan_options_get_raw(GADScanOptions *scan_options)
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_raw(
+  std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(scan_options);
-  return priv->scan_options;
+  return GADATASET_SCANNER_BUILDER(
+    g_object_new(GADATASET_TYPE_SCANNER_BUILDER,
+                 "scanner-builder", arrow_scanner_builder,
+                 NULL));
 }
 
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new_raw(std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
-                                GADScanOptions *options,
-                                GADInMemoryFragment *fragment)
+std::shared_ptr<arrow::dataset::ScannerBuilder>
+gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder)
 {
-  auto in_memory_scan_task =
-    GAD_IN_MEMORY_SCAN_TASK(g_object_new(GAD_TYPE_IN_MEMORY_SCAN_TASK,
-                                         "scan-task", arrow_in_memory_scan_task,
-                                         "options", options,
-                                         "fragment", fragment,
-                                         NULL));
-  return in_memory_scan_task;
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(scanner_builder);
+  return priv->scanner_builder;
 }
diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h
index f387e8948f2..446815d6db1 100644
--- a/c_glib/arrow-dataset-glib/scanner.h
+++ b/c_glib/arrow-dataset-glib/scanner.h
@@ -19,70 +19,45 @@
 
 #pragma once
 
-#include <arrow-glib/arrow-glib.h>
-
+#include <arrow-dataset-glib/dataset.h>
 #include <arrow-dataset-glib/fragment.h>
 
 G_BEGIN_DECLS
 
-/* arrow::dataset::ScanOptions */
-
-#define GAD_TYPE_SCAN_OPTIONS (gad_scan_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADScanOptions,
-                         gad_scan_options,
-                         GAD,
-                         SCAN_OPTIONS,
+#define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScanner,
+                         gadataset_scanner,
+                         GADATASET,
+                         SCANNER,
                          GObject)
-struct _GADScanOptionsClass
+struct _GADatasetScannerClass
 {
   GObjectClass parent_class;
 };
 
+GARROW_AVAILABLE_IN_5_0
+GArrowTable *
+gadataset_scanner_to_table(GADatasetScanner *scanner,
+                           GError **error);
 
-GARROW_AVAILABLE_IN_1_0
-GADScanOptions *gad_scan_options_new(GArrowSchema *schema);
-GARROW_AVAILABLE_IN_1_0
-GArrowSchema *gad_scan_options_get_schema(GADScanOptions *scan_options);
-
-/* arrow::dataset::ScanTask */
-
-#define GAD_TYPE_SCAN_TASK (gad_scan_task_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADScanTask,
-                         gad_scan_task,
-                         GAD,
-                         SCAN_TASK,
+#define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScannerBuilder,
+                         gadataset_scanner_builder,
+                         GADATASET,
+                         SCANNER_BUILDER,
                          GObject)
-struct _GADScanTaskClass
+struct _GADatasetScannerBuilderClass
 {
   GObjectClass parent_class;
 };
 
-GARROW_AVAILABLE_IN_1_0
-GADScanOptions *gad_scan_task_get_options(GADScanTask *scan_task);
-GARROW_AVAILABLE_IN_4_0
-GADFragment *gad_scan_task_get_fragment(GADScanTask *scan_task);
-GARROW_AVAILABLE_IN_1_0
-GArrowRecordBatchIterator *gad_scan_task_execute(GADScanTask *scan_task,
-                                                 GError **error);
-
-/* arrow::dataset::InMemoryScanTask */
-
-#define GAD_TYPE_IN_MEMORY_SCAN_TASK (gad_in_memory_scan_task_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADInMemoryScanTask,
-                         gad_in_memory_scan_task,
-                         GAD,
-                         IN_MEMORY_SCAN_TASK,
-                         GADScanTask)
-struct _GADInMemoryScanTaskClass
-{
-  GADScanTaskClass parent_class;
-};
-
-GARROW_AVAILABLE_IN_1_0
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
-                            gsize n_record_batches,
-                            GADScanOptions *options,
-                            GADInMemoryFragment *fragment);
+GARROW_AVAILABLE_IN_5_0
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new(GADatasetDataset *dataset,
+                              GError **error);
+GARROW_AVAILABLE_IN_5_0
+GADatasetScanner *
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
+                                 GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/scanner.hpp b/c_glib/arrow-dataset-glib/scanner.hpp
index f10351ee99b..663ab6fc44b 100644
--- a/c_glib/arrow-dataset-glib/scanner.hpp
+++ b/c_glib/arrow-dataset-glib/scanner.hpp
@@ -24,12 +24,14 @@
 #include <arrow-dataset-glib/fragment.h>
 #include <arrow-dataset-glib/scanner.h>
 
-GADScanOptions *
-gad_scan_options_new_raw(std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options);
-std::shared_ptr<arrow::dataset::ScanOptions>
-gad_scan_options_get_raw(GADScanOptions *scan_options);
+GADatasetScanner *
+gadataset_scanner_new_raw(
+  std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner);
+std::shared_ptr<arrow::dataset::Scanner>
+gadataset_scanner_get_raw(GADatasetScanner *scanner);
 
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new_raw(std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
-                                GADScanOptions *scan_options,
-                                GADInMemoryFragment *fragment);
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_raw(
+  std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder);
+std::shared_ptr<arrow::dataset::ScannerBuilder>
+gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder);
diff --git a/c_glib/arrow-flight-glib/arrow-flight-glib.h b/c_glib/arrow-flight-glib/arrow-flight-glib.h
new file mode 100644
index 00000000000..6fc8f43d840
--- /dev/null
+++ b/c_glib/arrow-flight-glib/arrow-flight-glib.h
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/client.h>
+#include <arrow-flight-glib/common.h>
+#include <arrow-flight-glib/server.h>
diff --git a/c_glib/arrow-flight-glib/arrow-flight-glib.hpp b/c_glib/arrow-flight-glib/arrow-flight-glib.hpp
new file mode 100644
index 00000000000..11e1fe94d52
--- /dev/null
+++ b/c_glib/arrow-flight-glib/arrow-flight-glib.hpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/client.hpp>
+#include <arrow-flight-glib/common.hpp>
+#include <arrow-flight-glib/server.hpp>
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
new file mode 100644
index 00000000000..7610fc98570
--- /dev/null
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -0,0 +1,405 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/arrow-glib.hpp>
+
+#include <arrow-flight-glib/client.hpp>
+#include <arrow-flight-glib/common.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: client
+ * @section_id: client
+ * @title: Client related classes
+ * @include: arrow-flight-glib/arrow-flight-glib.h
+ *
+ * #GAFlightStreamReader is a class for reading record batches from a
+ * server.
+ *
+ * #GAFlightCallOptions is a class for options of each call.
+ *
+ * #GAFlightClientOptions is a class for options of each client.
+ *
+ * #GAFlightClient is a class for Apache Arrow Flight client.
+ *
+ * Since: 5.0.0
+ */
+
+G_DEFINE_TYPE(GAFlightStreamReader,
+              gaflight_stream_reader,
+              GAFLIGHT_TYPE_RECORD_BATCH_READER)
+
+static void
+gaflight_stream_reader_init(GAFlightStreamReader *object)
+{
+}
+
+static void
+gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass)
+{
+}
+
+typedef struct GAFlightCallOptionsPrivate_ {
+  arrow::flight::FlightCallOptions options;
+} GAFlightCallOptionsPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCallOptions,
+                           gaflight_call_options,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(obj)        \
+  static_cast<GAFlightCallOptionsPrivate *>(          \
+    gaflight_call_options_get_instance_private(       \
+      GAFLIGHT_CALL_OPTIONS(obj)))
+
+static void
+gaflight_call_options_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object);
+
+  priv->options.~FlightCallOptions();
+
+  G_OBJECT_CLASS(gaflight_call_options_parent_class)->finalize(object);
+}
+
+static void
+gaflight_call_options_init(GAFlightCallOptions *object)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object);
+  new(&priv->options) arrow::flight::FlightCallOptions;
+}
+
+static void
+gaflight_call_options_class_init(GAFlightCallOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_call_options_finalize;
+}
+
+/**
+ * gaflight_call_options_new:
+ *
+ * Returns: The newly created options for a call.
+ *
+ * Since: 5.0.0
+ */
+GAFlightCallOptions *
+gaflight_call_options_new(void)
+{
+  return static_cast<GAFlightCallOptions *>(
+    g_object_new(GAFLIGHT_TYPE_CALL_OPTIONS, NULL));
+}
+
+
+typedef struct GAFlightClientOptionsPrivate_ {
+  arrow::flight::FlightClientOptions options;
+} GAFlightClientOptionsPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClientOptions,
+                           gaflight_client_options,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(obj)        \
+  static_cast<GAFlightClientOptionsPrivate *>(          \
+    gaflight_client_options_get_instance_private(       \
+      GAFLIGHT_CLIENT_OPTIONS(obj)))
+
+static void
+gaflight_client_options_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(object);
+
+  priv->options.~FlightClientOptions();
+
+  G_OBJECT_CLASS(gaflight_client_options_parent_class)->finalize(object);
+}
+
+static void
+gaflight_client_options_init(GAFlightClientOptions *object)
+{
+  auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(object);
+  new(&(priv->options)) arrow::flight::FlightClientOptions;
+  priv->options = arrow::flight::FlightClientOptions::Defaults();
+}
+
+static void
+gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_client_options_finalize;
+}
+
+/**
+ * gaflight_client_options_new:
+ *
+ * Returns: The newly created options for a client.
+ *
+ * Since: 5.0.0
+ */
+GAFlightClientOptions *
+gaflight_client_options_new(void)
+{
+  return static_cast<GAFlightClientOptions *>(
+    g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL));
+}
+
+
+typedef struct GAFlightClientPrivate_ {
+  arrow::flight::FlightClient *client;
+} GAFlightClientPrivate;
+
+enum {
+  PROP_CLIENT = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClient,
+                           gaflight_client,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CLIENT_GET_PRIVATE(obj)         \
+  static_cast<GAFlightClientPrivate *>(          \
+    gaflight_client_get_instance_private(        \
+      GAFLIGHT_CLIENT(obj)))
+
+static void
+gaflight_client_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(object);
+
+  delete priv->client;
+
+  G_OBJECT_CLASS(gaflight_client_parent_class)->finalize(object);
+}
+
+static void
+gaflight_client_set_property(GObject *object,
+                             guint prop_id,
+                             const GValue *value,
+                             GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CLIENT:
+    priv->client =
+      static_cast<arrow::flight::FlightClient *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_client_init(GAFlightClient *object)
+{
+}
+
+static void
+gaflight_client_class_init(GAFlightClientClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_client_finalize;
+  gobject_class->set_property = gaflight_client_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("client",
+                              "Client",
+                              "The raw arrow::flight::FlightClient *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CLIENT, spec);
+}
+
+/**
+ * gaflight_client_new:
+ * @location: A #GAFlightLocation to be connected.
+ * @options: (nullable): A #GAFlightClientOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): The newly created client, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightClient *
+gaflight_client_new(GAFlightLocation *location,
+                    GAFlightClientOptions *options,
+                    GError **error)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  std::unique_ptr<arrow::flight::FlightClient> flight_client;
+  arrow::Status status;
+  if (options) {
+    const auto flight_options = gaflight_client_options_get_raw(options);
+    status = arrow::flight::FlightClient::Connect(*flight_location,
+                                                  *flight_options,
+                                                  &flight_client);
+  } else {
+    status = arrow::flight::FlightClient::Connect(*flight_location,
+                                                  &flight_client);
+  }
+  if (garrow::check(error, status, "[flight-client][new]")) {
+    return gaflight_client_new_raw(flight_client.release());
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * gaflight_client_list_flights:
+ * @client: A #GAFlightClient.
+ * @criteria: (nullable): A #GAFlightCriteria.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (element-type GAFlightInfo) (transfer full):
+ *   The returned list of #GAFlightInfo on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_client_list_flights(GAFlightClient *client,
+                             GAFlightCriteria *criteria,
+                             GAFlightCallOptions *options,
+                             GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  arrow::flight::Criteria flight_default_criteria;
+  auto flight_criteria = &flight_default_criteria;
+  if (criteria) {
+    flight_criteria = gaflight_criteria_get_raw(criteria);
+  }
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  std::unique_ptr<arrow::flight::FlightListing> flight_listing;
+  auto status = flight_client->ListFlights(*flight_options,
+                                           *flight_criteria,
+                                           &flight_listing);
+  if (!garrow::check(error,
+                     status,
+                     "[flight-client][list-flights]")) {
+    return NULL;
+  }
+  GList *listing = NULL;
+  std::unique_ptr<arrow::flight::FlightInfo> flight_info;
+  while (true) {
+    status = flight_listing->Next(&flight_info);
+    if (!garrow::check(error,
+                       status,
+                       "[flight-client][list-flights]")) {
+      g_list_free_full(listing, g_object_unref);
+      return NULL;
+    }
+    if (!flight_info) {
+      break;
+    }
+    auto info = gaflight_info_new_raw(flight_info.release());
+    listing = g_list_prepend(listing, info);
+  }
+  return g_list_reverse(listing);
+}
+
+/**
+ * gaflight_client_do_get:
+ * @client: A #GAFlightClient.
+ * @ticket: A #GAFlightTicket.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The #GAFlightStreamReader to read record batched from the server
+ *   on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GAFlightStreamReader *
+gaflight_client_do_get(GAFlightClient *client,
+                       GAFlightTicket *ticket,
+                       GAFlightCallOptions *options,
+                       GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  const auto flight_ticket = gaflight_ticket_get_raw(ticket);
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  std::unique_ptr<arrow::flight::FlightStreamReader> flight_reader;
+  auto status = flight_client->DoGet(*flight_options,
+                                     *flight_ticket,
+                                     &flight_reader);
+  if (garrow::check(error,
+                    status,
+                    "[flight-client][do-get]")) {
+    return gaflight_stream_reader_new_raw(flight_reader.release());
+  } else {
+    return NULL;
+  }
+}
+
+
+G_END_DECLS
+
+
+GAFlightStreamReader *
+gaflight_stream_reader_new_raw(
+  arrow::flight::FlightStreamReader *flight_reader)
+{
+  return GAFLIGHT_STREAM_READER(
+    g_object_new(GAFLIGHT_TYPE_STREAM_READER,
+                 "reader", flight_reader,
+                 NULL));
+}
+
+arrow::flight::FlightCallOptions *
+gaflight_call_options_get_raw(GAFlightCallOptions *options)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(options);
+  return &(priv->options);
+}
+
+arrow::flight::FlightClientOptions *
+gaflight_client_options_get_raw(GAFlightClientOptions *options)
+{
+  auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(options);
+  return &(priv->options);
+}
+
+arrow::flight::FlightClient *
+gaflight_client_get_raw(GAFlightClient *client)
+{
+  auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(client);
+  return priv->client;
+}
+
+GAFlightClient *
+gaflight_client_new_raw(arrow::flight::FlightClient *flight_client)
+{
+  return GAFLIGHT_CLIENT(g_object_new(GAFLIGHT_TYPE_CLIENT,
+                                      "client", flight_client,
+                                      NULL));
+}
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
new file mode 100644
index 00000000000..bc297116135
--- /dev/null
+++ b/c_glib/arrow-flight-glib/client.h
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/common.h>
+
+G_BEGIN_DECLS
+
+
+#define GAFLIGHT_TYPE_STREAM_READER       \
+  (gaflight_stream_reader_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamReader,
+                         gaflight_stream_reader,
+                         GAFLIGHT,
+                         STREAM_READER,
+                         GAFlightRecordBatchReader)
+struct _GAFlightStreamReaderClass
+{
+  GAFlightRecordBatchReaderClass parent_class;
+};
+
+
+#define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightCallOptions,
+                         gaflight_call_options,
+                         GAFLIGHT,
+                         CALL_OPTIONS,
+                         GObject)
+struct _GAFlightCallOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightCallOptions *
+gaflight_call_options_new(void);
+
+
+#define GAFLIGHT_TYPE_CLIENT_OPTIONS (gaflight_client_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightClientOptions,
+                         gaflight_client_options,
+                         GAFLIGHT,
+                         CLIENT_OPTIONS,
+                         GObject)
+struct _GAFlightClientOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightClientOptions *
+gaflight_client_options_new(void);
+
+
+#define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightClient,
+                         gaflight_client,
+                         GAFLIGHT,
+                         CLIENT,
+                         GObject)
+struct _GAFlightClientClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightClient *
+gaflight_client_new(GAFlightLocation *location,
+                    GAFlightClientOptions *options,
+                    GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_client_list_flights(GAFlightClient *client,
+                             GAFlightCriteria *criteria,
+                             GAFlightCallOptions *options,
+                             GError **error);
+
+GARROW_AVAILABLE_IN_6_0
+GAFlightStreamReader *
+gaflight_client_do_get(GAFlightClient *client,
+                       GAFlightTicket *ticket,
+                       GAFlightCallOptions *options,
+                       GError **error);
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
new file mode 100644
index 00000000000..1e68761b7ee
--- /dev/null
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/flight/api.h>
+
+#include <arrow-flight-glib/client.h>
+
+
+GAFlightStreamReader *
+gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader);
+
+arrow::flight::FlightCallOptions *
+gaflight_call_options_get_raw(GAFlightCallOptions *options);
+
+arrow::flight::FlightClientOptions *
+gaflight_client_options_get_raw(GAFlightClientOptions *options);
+
+arrow::flight::FlightClient *
+gaflight_client_get_raw(GAFlightClient *client);
+GAFlightClient *
+gaflight_client_new_raw(arrow::flight::FlightClient *flight_client);
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
new file mode 100644
index 00000000000..81b00f7a369
--- /dev/null
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -0,0 +1,1467 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/arrow-glib.hpp>
+
+#include <arrow-flight-glib/common.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: common
+ * @section_id: common
+ * @title: Classes both for client and server
+ * @include: arrow-flight-glib/arrow-flight-glib.h
+ *
+ * #GAFlightCriteria is a class for criteria.
+ *
+ * #GAFlightLocation is a class for location.
+ *
+ * #GAFlightDescriptor is a base class for all descriptor classes such
+ * as #GAFlightPathDescriptor.
+ *
+ * #GAFlightPathDescriptor is a class for path descriptor.
+ *
+ * #GAFlightCommandDescriptor is a class for command descriptor.
+ *
+ * #GAFlightTicket is a class for ticket.
+ *
+ * #GAFlightEndpoint is a class for endpoint.
+ *
+ * #GAFlightInfo is a class for flight information.
+ *
+ * #GAFlightStreamChunk is a class for a chunk in stream.
+ *
+ * #GAFlightRecordBatchReader is a class for reading record batches.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GAFlightCriteriaPrivate_ {
+  arrow::flight::Criteria criteria;
+  GBytes *expression;
+} GAFlightCriteriaPrivate;
+
+enum {
+  PROP_EXPRESSION = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCriteria,
+                           gaflight_criteria,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CRITERIA_GET_PRIVATE(obj)            \
+  static_cast<GAFlightCriteriaPrivate *>(             \
+    gaflight_criteria_get_instance_private(           \
+      GAFLIGHT_CRITERIA(obj)))
+
+static void
+gaflight_criteria_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  if (priv->expression) {
+    g_bytes_unref(priv->expression);
+    priv->expression = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_criteria_parent_class)->dispose(object);
+}
+
+static void
+gaflight_criteria_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  priv->criteria.~Criteria();
+
+  G_OBJECT_CLASS(gaflight_criteria_parent_class)->finalize(object);
+}
+
+static void
+gaflight_criteria_set_property(GObject *object,
+                               guint prop_id,
+                               const GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_EXPRESSION:
+    if (priv->expression) {
+      g_bytes_unref(priv->expression);
+    }
+    priv->expression = static_cast<GBytes *>(g_value_dup_boxed(value));
+    {
+      gsize size;
+      auto data = g_bytes_get_data(priv->expression, &size);
+      priv->criteria.expression.assign(static_cast<const char *>(data),
+                                       size);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_criteria_get_property(GObject *object,
+                               guint prop_id,
+                               GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_EXPRESSION:
+    g_value_set_boxed(value, priv->expression);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_criteria_init(GAFlightCriteria *object)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+  new(&priv->criteria) arrow::flight::Criteria;
+}
+
+static void
+gaflight_criteria_class_init(GAFlightCriteriaClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_criteria_dispose;
+  gobject_class->finalize = gaflight_criteria_finalize;
+  gobject_class->set_property = gaflight_criteria_set_property;
+  gobject_class->get_property = gaflight_criteria_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightCriteria:expression:
+   *
+   * Opaque criteria expression, dependent on server implementation.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boxed("expression",
+                            "Expression",
+                            "Opaque criteria expression, "
+                            "dependent on server implementation",
+                            G_TYPE_BYTES,
+                            static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec);
+}
+
+/**
+ * gaflight_criteria_new:
+ * @expression: A #GBytes.
+ *
+ * Returns: The newly created #GAFlightCriteria, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightCriteria *
+gaflight_criteria_new(GBytes *expression)
+{
+  return GAFLIGHT_CRITERIA(
+    g_object_new(GAFLIGHT_TYPE_CRITERIA,
+                 "expression", expression,
+                 NULL));
+}
+
+
+typedef struct GAFlightLocationPrivate_ {
+  arrow::flight::Location location;
+} GAFlightLocationPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightLocation,
+                           gaflight_location,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_LOCATION_GET_PRIVATE(obj)            \
+  static_cast<GAFlightLocationPrivate *>(             \
+    gaflight_location_get_instance_private(           \
+      GAFLIGHT_LOCATION(obj)))
+
+static void
+gaflight_location_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(object);
+
+  priv->location.~Location();
+
+  G_OBJECT_CLASS(gaflight_location_parent_class)->finalize(object);
+}
+
+static void
+gaflight_location_init(GAFlightLocation *object)
+{
+  auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(object);
+  new(&priv->location) arrow::flight::Location;
+}
+
+static void
+gaflight_location_class_init(GAFlightLocationClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_location_finalize;
+}
+
+/**
+ * gaflight_location_new:
+ * @uri: An URI to specify location.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): The newly created location, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightLocation *
+gaflight_location_new(const gchar *uri,
+                      GError **error)
+{
+  auto location = GAFLIGHT_LOCATION(g_object_new(GAFLIGHT_TYPE_LOCATION, NULL));
+  auto flight_location = gaflight_location_get_raw(location);
+  if (garrow::check(error,
+                    arrow::flight::Location::Parse(uri, flight_location),
+                    "[flight-location][new]")) {
+    return location;
+  } else {
+    g_object_unref(location);
+    return NULL;
+  }
+}
+
+/**
+ * gaflight_location_to_string:
+ * @location: A #GAFlightLocation.
+ *
+ * Returns: A representation of this URI as a string.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_location_to_string(GAFlightLocation *location)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  return g_strdup(flight_location->ToString().c_str());
+}
+
+/**
+ * gaflight_location_get_scheme:
+ * @location: A #GAFlightLocation.
+ *
+ * Returns: The scheme of this URI.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_location_get_scheme(GAFlightLocation *location)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  return g_strdup(flight_location->scheme().c_str());
+}
+
+/**
+ * gaflight_location_equal:
+ * @location: A #GAFlightLocation.
+ * @other_location: A #GAFlightLocation to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same URI, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_location_equal(GAFlightLocation *location,
+                        GAFlightLocation *other_location)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  const auto flight_other_location = gaflight_location_get_raw(other_location);
+  return flight_location->Equals(*flight_other_location);
+}
+
+
+typedef struct GAFlightDescriptorPrivate_ {
+  arrow::flight::FlightDescriptor descriptor;
+} GAFlightDescriptorPrivate;
+
+enum {
+  PROP_DESCRIPTOR = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightDescriptor,
+                                    gaflight_descriptor,
+                                    G_TYPE_OBJECT)
+
+#define GAFLIGHT_DESCRIPTOR_GET_PRIVATE(obj)            \
+  static_cast<GAFlightDescriptorPrivate *>(             \
+    gaflight_descriptor_get_instance_private(           \
+      GAFLIGHT_DESCRIPTOR(obj)))
+
+static void
+gaflight_descriptor_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object);
+
+  priv->descriptor.~FlightDescriptor();
+
+  G_OBJECT_CLASS(gaflight_descriptor_parent_class)->finalize(object);
+}
+
+static void
+gaflight_descriptor_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DESCRIPTOR:
+    priv->descriptor = *static_cast<arrow::flight::FlightDescriptor *>(
+      g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_descriptor_init(GAFlightDescriptor *object)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object);
+  new(&priv->descriptor) arrow::flight::FlightDescriptor;
+}
+
+static void
+gaflight_descriptor_class_init(GAFlightDescriptorClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_descriptor_finalize;
+  gobject_class->set_property = gaflight_descriptor_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("descriptor",
+                              "Descriptor",
+                              "The raw arrow::flight::FlightDescriptor",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec);
+}
+
+/**
+ * gaflight_descriptor_to_string:
+ * @descriptor: A #GAFlightDescriptor.
+ *
+ * Returns: A descriptor as a string.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_descriptor_to_string(GAFlightDescriptor *descriptor)
+{
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  return g_strdup(flight_descriptor->ToString().c_str());
+}
+
+/**
+ * gaflight_descriptor_equal:
+ * @descriptor: A #GAFlightDescriptor.
+ * @other_descriptor: A #GAFlightDescriptor to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same descriptor,
+ *   %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_descriptor_equal(GAFlightDescriptor *descriptor,
+                          GAFlightDescriptor *other_descriptor)
+{
+  const auto flight_descriptor =
+    gaflight_descriptor_get_raw(descriptor);
+  const auto flight_other_descriptor =
+    gaflight_descriptor_get_raw(other_descriptor);
+  return flight_descriptor->Equals(*flight_other_descriptor);
+}
+
+
+G_DEFINE_TYPE(GAFlightPathDescriptor,
+              gaflight_path_descriptor,
+              GAFLIGHT_TYPE_DESCRIPTOR)
+
+static void
+gaflight_path_descriptor_init(GAFlightPathDescriptor *object)
+{
+}
+
+static void
+gaflight_path_descriptor_class_init(GAFlightPathDescriptorClass *klass)
+{
+}
+
+/**
+ * gaflight_path_descriptor_new:
+ * @paths: (array length=n_paths): List of paths identifying a
+ *   particular dataset.
+ * @n_paths: The number of @paths.
+ *
+ * Returns: The newly created #GAFlightPathDescriptor.
+ *
+ * Since: 5.0.0
+ */
+GAFlightPathDescriptor *
+gaflight_path_descriptor_new(const gchar **paths,
+                             gsize n_paths)
+{
+  std::vector<std::string> flight_paths;
+  for (gsize i = 0; i < n_paths; i++) {
+    flight_paths.push_back(paths[i]);
+  }
+  auto flight_descriptor = arrow::flight::FlightDescriptor::Path(flight_paths);
+  return GAFLIGHT_PATH_DESCRIPTOR(
+    gaflight_descriptor_new_raw(&flight_descriptor));
+}
+
+/**
+ * gaflight_path_descriptor_get_paths:
+ * @descriptor: A #GAFlightPathDescriptor.
+ *
+ * Returns: (nullable) (array zero-terminated=1) (transfer full):
+ *   The paths in this descriptor.
+ *
+ *   It must be freed with g_strfreev() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar **
+gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor)
+{
+  const auto flight_descriptor =
+    gaflight_descriptor_get_raw(GAFLIGHT_DESCRIPTOR(descriptor));
+  const auto &flight_paths = flight_descriptor->path;
+  if (flight_paths.empty()) {
+    return NULL;
+  } else {
+    auto paths = g_new(gchar *, flight_paths.size() + 1);
+    gsize i = 0;
+    for (const auto &flight_path : flight_paths) {
+      paths[i++] = g_strdup(flight_path.c_str());
+    }
+    paths[i] = NULL;
+    return paths;
+  }
+}
+
+
+G_DEFINE_TYPE(GAFlightCommandDescriptor,
+              gaflight_command_descriptor,
+              GAFLIGHT_TYPE_DESCRIPTOR)
+
+static void
+gaflight_command_descriptor_init(GAFlightCommandDescriptor *object)
+{
+}
+
+static void
+gaflight_command_descriptor_class_init(GAFlightCommandDescriptorClass *klass)
+{
+}
+
+/**
+ * gaflight_command_descriptor_new:
+ * @command: Opaque value used to express a command.
+ *
+ * Returns: The newly created #GAFlightCommandDescriptor.
+ *
+ * Since: 5.0.0
+ */
+GAFlightCommandDescriptor *
+gaflight_command_descriptor_new(const gchar *command)
+{
+  auto flight_descriptor = arrow::flight::FlightDescriptor::Command(command);
+  return GAFLIGHT_COMMAND_DESCRIPTOR(
+    gaflight_descriptor_new_raw(&flight_descriptor));
+}
+
+/**
+ * gaflight_command_descriptor_get_command:
+ * @descriptor: A #GAFlightCommandDescriptor.
+ *
+ * Returns: The opaque value used to express a command.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor)
+{
+  const auto flight_descriptor =
+    gaflight_descriptor_get_raw(GAFLIGHT_DESCRIPTOR(descriptor));
+  const auto &flight_command = flight_descriptor->cmd;
+  return g_strdup(flight_command.c_str());
+}
+
+
+typedef struct GAFlightTicketPrivate_ {
+  arrow::flight::Ticket ticket;
+  GBytes *data;
+} GAFlightTicketPrivate;
+
+enum {
+  PROP_DATA = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightTicket,
+                           gaflight_ticket,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_TICKET_GET_PRIVATE(obj)            \
+  static_cast<GAFlightTicketPrivate *>(             \
+    gaflight_ticket_get_instance_private(           \
+      GAFLIGHT_TICKET(obj)))
+
+static void
+gaflight_ticket_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  if (priv->data) {
+    g_bytes_unref(priv->data);
+    priv->data = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_ticket_parent_class)->dispose(object);
+}
+
+static void
+gaflight_ticket_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  priv->ticket.~Ticket();
+
+  G_OBJECT_CLASS(gaflight_ticket_parent_class)->finalize(object);
+}
+
+static void
+gaflight_ticket_set_property(GObject *object,
+                             guint prop_id,
+                             const GValue *value,
+                             GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATA:
+    if (priv->data) {
+      g_bytes_unref(priv->data);
+    }
+    priv->data = static_cast<GBytes *>(g_value_dup_boxed(value));
+    {
+      gsize size;
+      auto data = g_bytes_get_data(priv->data, &size);
+      priv->ticket.ticket.assign(static_cast<const char *>(data),
+                                 size);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_ticket_get_property(GObject *object,
+                             guint prop_id,
+                             GValue *value,
+                             GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATA:
+    g_value_set_boxed(value, priv->data);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_ticket_init(GAFlightTicket *object)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+  new(&priv->ticket) arrow::flight::Ticket;
+}
+
+static void
+gaflight_ticket_class_init(GAFlightTicketClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_ticket_dispose;
+  gobject_class->finalize = gaflight_ticket_finalize;
+  gobject_class->set_property = gaflight_ticket_set_property;
+  gobject_class->get_property = gaflight_ticket_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightTicket:data:
+   *
+   * Opaque identifier or credential to use when requesting a data
+   * stream with the DoGet RPC.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boxed("data",
+                            "Data",
+                            "Opaque identifier or credential to use "
+                            "when requesting a data stream with the DoGet RPC",
+                            G_TYPE_BYTES,
+                            static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_DATA, spec);
+}
+
+/**
+ * gaflight_ticket_new:
+ * @data: A #GBytes.
+ *
+ * Returns: The newly created #GAFlightTicket, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightTicket *
+gaflight_ticket_new(GBytes *data)
+{
+  return GAFLIGHT_TICKET(
+    g_object_new(GAFLIGHT_TYPE_TICKET,
+                 "data", data,
+                 NULL));
+}
+
+/**
+ * gaflight_ticket_equal:
+ * @ticket: A #GAFlightTicket.
+ * @other_ticket: A #GAFlightTicket to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same ticket, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_ticket_equal(GAFlightTicket *ticket,
+                      GAFlightTicket *other_ticket)
+{
+  const auto flight_ticket = gaflight_ticket_get_raw(ticket);
+  const auto flight_other_ticket = gaflight_ticket_get_raw(other_ticket);
+  return flight_ticket->Equals(*flight_other_ticket);
+}
+
+
+typedef struct GAFlightEndpointPrivate_ {
+  arrow::flight::FlightEndpoint endpoint;
+  GAFlightTicket *ticket;
+  GList *locations;
+} GAFlightEndpointPrivate;
+
+enum {
+  PROP_TICKET = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightEndpoint,
+                           gaflight_endpoint,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_ENDPOINT_GET_PRIVATE(obj)            \
+  static_cast<GAFlightEndpointPrivate *>(             \
+    gaflight_endpoint_get_instance_private(           \
+      GAFLIGHT_ENDPOINT(obj)))
+
+static void
+gaflight_endpoint_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+
+  if (priv->ticket) {
+    g_object_unref(priv->ticket);
+    priv->ticket = NULL;
+  }
+
+  if (priv->locations) {
+    g_list_free_full(priv->locations, g_object_unref);
+    priv->locations = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_endpoint_parent_class)->dispose(object);
+}
+
+static void
+gaflight_endpoint_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+
+  priv->endpoint.~FlightEndpoint();
+
+  G_OBJECT_CLASS(gaflight_endpoint_parent_class)->finalize(object);
+}
+
+static void
+gaflight_endpoint_get_property(GObject *object,
+                               guint prop_id,
+                               GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_TICKET:
+    g_value_set_object(value, priv->ticket);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_endpoint_init(GAFlightEndpoint *object)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+  new(&priv->endpoint) arrow::flight::FlightEndpoint;
+}
+
+static void
+gaflight_endpoint_class_init(GAFlightEndpointClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_endpoint_dispose;
+  gobject_class->finalize = gaflight_endpoint_finalize;
+  gobject_class->get_property = gaflight_endpoint_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightEndpoint:ticket:
+   *
+   * Opaque ticket identify; use with DoGet RPC.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("ticket",
+                             "Ticket",
+                             "Opaque ticket identify; use with DoGet RPC",
+                             GAFLIGHT_TYPE_TICKET,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_TICKET, spec);
+}
+
+/**
+ * gaflight_endpoint_new:
+ * @ticket: A #GAFlightTicket.
+ * @locations: (element-type GAFlightLocation): A list of #GAFlightLocation.
+ *
+ * Returns: The newly created #GAFlightEndpoint, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightEndpoint *
+gaflight_endpoint_new(GAFlightTicket *ticket,
+                      GList *locations)
+{
+  auto endpoint = gaflight_endpoint_new_raw(nullptr, ticket);
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
+  for (auto node = locations; node; node = node->next) {
+    auto location = GAFLIGHT_LOCATION(node->data);
+    priv->endpoint.locations.push_back(*gaflight_location_get_raw(location));
+  }
+  return endpoint;
+}
+
+/**
+ * gaflight_endpoint_equal:
+ * @endpoint: A #GAFlightEndpoint.
+ * @other_endpoint: A #GAFlightEndpoint to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same endpoint,
+ *   %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_endpoint_equal(GAFlightEndpoint *endpoint,
+                        GAFlightEndpoint *other_endpoint)
+{
+  const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint);
+  const auto flight_other_endpoint = gaflight_endpoint_get_raw(other_endpoint);
+  return flight_endpoint->Equals(*flight_other_endpoint);
+}
+
+/**
+ * gaflight_endpoint_get_locations:
+ * @endpoint: A #GAFlightEndpoint.
+ *
+ * Returns: (nullable) (element-type GAFlightLocation) (transfer full):
+ *   The locations in this endpoint.
+ *
+ *   It must be freed with g_list_free() and g_object_unref() when no
+ *   longer needed. You can use `g_list_free_full(locations,
+ *   g_object_unref)`.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint)
+{
+  const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint);
+  GList *locations = NULL;
+  for (const auto &flight_location : flight_endpoint->locations) {
+    auto location = gaflight_location_new(flight_location.ToString().c_str(),
+                                          nullptr);
+    locations = g_list_prepend(locations, location);
+  }
+  return g_list_reverse(locations);
+}
+
+
+typedef struct GAFlightInfoPrivate_ {
+  arrow::flight::FlightInfo info;
+} GAFlightInfoPrivate;
+
+enum {
+  PROP_INFO = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightInfo,
+                           gaflight_info,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_INFO_GET_PRIVATE(obj)            \
+  static_cast<GAFlightInfoPrivate *>(             \
+    gaflight_info_get_instance_private(           \
+      GAFLIGHT_INFO(obj)))
+
+static void
+gaflight_info_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_INFO_GET_PRIVATE(object);
+
+  priv->info.~FlightInfo();
+
+  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+}
+
+static void
+gaflight_info_set_property(GObject *object,
+                           guint prop_id,
+                           const GValue *value,
+                           GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_INFO_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_INFO:
+    {
+      auto info =
+        static_cast<arrow::flight::FlightInfo *>(g_value_get_pointer(value));
+      new(&(priv->info)) arrow::flight::FlightInfo(*info);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_info_init(GAFlightInfo *object)
+{
+}
+
+static void
+gaflight_info_class_init(GAFlightInfoClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_info_finalize;
+  gobject_class->set_property = gaflight_info_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("info",
+                              "Info",
+                              "The raw arrow::flight::FlightInfo *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_INFO, spec);
+}
+
+/**
+ * gaflight_info_new:
+ * @schema: A #GArrowSchema.
+ * @descriptor: A #GAFlightDescriptor.
+ * @endpoints: (element-type GAFlightEndpoint): A list of #GAFlightEndpoint.
+ * @total_records: The number of total records.
+ * @total_bytes: The number of total bytes.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): The newly created #GAFlightInfo, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightInfo *
+gaflight_info_new(GArrowSchema *schema,
+                  GAFlightDescriptor *descriptor,
+                  GList *endpoints,
+                  gint64 total_records,
+                  gint64 total_bytes,
+                  GError **error)
+{
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  std::vector<arrow::flight::FlightEndpoint> flight_endpoints;
+  for (auto node = endpoints; node; node = node->next) {
+    auto endpoint = GAFLIGHT_ENDPOINT(node->data);
+    flight_endpoints.push_back(*gaflight_endpoint_get_raw(endpoint));
+  }
+  auto flight_info_result =
+    arrow::flight::FlightInfo::Make(*arrow_schema,
+                                    *flight_descriptor,
+                                    flight_endpoints,
+                                    total_records,
+                                    total_bytes);
+  if (!garrow::check(error,
+                     flight_info_result,
+                     "[flight-info][new]")) {
+    return NULL;
+  }
+  return gaflight_info_new_raw(&(*flight_info_result));
+}
+
+/**
+ * gaflight_info_equal:
+ * @info: A #GAFlightInfo.
+ * @other_info: A #GAFlightInfo to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same information,
+ *   %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_info_equal(GAFlightInfo *info,
+                    GAFlightInfo *other_info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  const auto flight_other_info = gaflight_info_get_raw(other_info);
+  return
+    (flight_info->serialized_schema() ==
+     flight_other_info->serialized_schema()) &&
+    (flight_info->descriptor() ==
+     flight_other_info->descriptor()) &&
+    (flight_info->endpoints() ==
+     flight_other_info->endpoints()) &&
+    (flight_info->total_records() ==
+     flight_other_info->total_records()) &&
+    (flight_info->total_bytes() ==
+     flight_other_info->total_bytes());
+}
+
+/**
+ * gaflight_info_get_schema:
+ * @info: A #GAFlightInfo.
+ * @options: (nullable): A #GArrowReadOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): Deserialized #GArrowSchema, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowSchema *
+gaflight_info_get_schema(GAFlightInfo *info,
+                         GArrowReadOptions *options,
+                         GError **error)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  arrow::Status status;
+  std::shared_ptr<arrow::Schema> arrow_schema;
+  if (options) {
+    auto arrow_memo = garrow_read_options_get_dictionary_memo_raw(options);
+    status = flight_info->GetSchema(arrow_memo, &arrow_schema);
+  } else {
+    arrow::ipc::DictionaryMemo arrow_memo;
+    status = flight_info->GetSchema(&arrow_memo, &arrow_schema);
+  }
+  if (garrow::check(error, status, "[flight-info][get-schema]")) {
+    return garrow_schema_new_raw(&arrow_schema);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * gaflight_info_get_descriptor:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: (transfer full): The #GAFlightDescriptor of the information.
+ *
+ * Since: 5.0.0
+ */
+GAFlightDescriptor *
+gaflight_info_get_descriptor(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  return gaflight_descriptor_new_raw(&(flight_info->descriptor()));
+}
+
+/**
+ * gaflight_info_get_endpoints:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: (element-type GAFlightEndpoint) (transfer full):
+ *   The list of #GAFlightEndpoint of the information.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_info_get_endpoints(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  GList *endpoints = NULL;
+  for (const auto &flight_endpoint : flight_info->endpoints()) {
+    auto endpoint = gaflight_endpoint_new_raw(&flight_endpoint, nullptr);
+    endpoints = g_list_prepend(endpoints, endpoint);
+  }
+  return g_list_reverse(endpoints);
+}
+
+/**
+ * gaflight_info_get_total_records:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: The number of total records of the information.
+ *
+ * Since: 5.0.0
+ */
+gint64
+gaflight_info_get_total_records(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  return flight_info->total_records();
+}
+
+/**
+ * gaflight_info_get_total_bytes:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: The number of total bytes of the information.
+ *
+ * Since: 5.0.0
+ */
+gint64
+gaflight_info_get_total_bytes(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  return flight_info->total_bytes();
+}
+
+typedef struct GAFlightStreamChunkPrivate_ {
+  arrow::flight::FlightStreamChunk chunk;
+} GAFlightStreamChunkPrivate;
+
+enum {
+  PROP_CHUNK = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightStreamChunk,
+                           gaflight_stream_chunk,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(obj)            \
+  static_cast<GAFlightStreamChunkPrivate *>(             \
+    gaflight_stream_chunk_get_instance_private(           \
+      GAFLIGHT_STREAM_CHUNK(obj)))
+
+static void
+gaflight_stream_chunk_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(object);
+
+  priv->chunk.~FlightStreamChunk();
+
+  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+}
+
+static void
+gaflight_stream_chunk_set_property(GObject *object,
+                                   guint prop_id,
+                                   const GValue *value,
+                                   GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CHUNK:
+    priv->chunk =
+      *static_cast<arrow::flight::FlightStreamChunk *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_stream_chunk_init(GAFlightStreamChunk *object)
+{
+}
+
+static void
+gaflight_stream_chunk_class_init(GAFlightStreamChunkClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_stream_chunk_finalize;
+  gobject_class->set_property = gaflight_stream_chunk_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("chunk",
+                              "Stream chunk",
+                              "The raw arrow::flight::FlightStreamChunk *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CHUNK, spec);
+}
+
+/**
+ * gaflight_stream_chunk_get_data:
+ * @chunk: A #GAFlightStreamChunk.
+ *
+ * Returns: (transfer full): The data of the chunk.
+ *
+ * Since: 6.0.0
+ */
+GArrowRecordBatch *
+gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk)
+{
+  auto flight_chunk = gaflight_stream_chunk_get_raw(chunk);
+  return garrow_record_batch_new_raw(&(flight_chunk->data));
+}
+
+/**
+ * gaflight_stream_chunk_get_metadata:
+ * @chunk: A #GAFlightStreamChunk.
+ *
+ * Returns: (nullable) (transfer full): The metadata of the chunk.
+ *
+ *   The metadata may be NULL.
+ *
+ * Since: 6.0.0
+ */
+GArrowBuffer *
+gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk)
+{
+  auto flight_chunk = gaflight_stream_chunk_get_raw(chunk);
+  if (flight_chunk->app_metadata) {
+    return garrow_buffer_new_raw(&(flight_chunk->app_metadata));
+  } else {
+    return NULL;
+  }
+}
+
+
+typedef struct GAFlightRecordBatchReaderPrivate_ {
+  arrow::flight::MetadataRecordBatchReader *reader;
+} GAFlightRecordBatchReaderPrivate;
+
+enum {
+  PROP_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
+                           gaflight_record_batch_reader,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj)            \
+  static_cast<GAFlightRecordBatchReaderPrivate *>(               \
+    gaflight_record_batch_reader_get_instance_private(           \
+      GAFLIGHT_RECORD_BATCH_READER(obj)))
+
+static void
+gaflight_record_batch_reader_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object);
+
+  delete priv->reader;
+
+  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+}
+
+static void
+gaflight_record_batch_reader_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_READER:
+    priv->reader =
+      static_cast<arrow::flight::MetadataRecordBatchReader *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_record_batch_reader_init(GAFlightRecordBatchReader *object)
+{
+}
+
+static void
+gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_record_batch_reader_finalize;
+  gobject_class->set_property = gaflight_record_batch_reader_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("reader",
+                              "Reader",
+                              "The raw arrow::flight::MetadataRecordBatchReader *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_READER, spec);
+}
+
+/**
+ * gaflight_record_batch_reader_read_next:
+ * @reader: A #GAFlightRecordBatchReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The next chunk on success, %NULL on end
+ *   of stream, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GAFlightStreamChunk *
+gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader,
+                                       GError **error)
+{
+  auto flight_reader = gaflight_record_batch_reader_get_raw(reader);
+  arrow::flight::FlightStreamChunk flight_chunk;
+  auto status = flight_reader->Next(&flight_chunk);
+  if (garrow::check(error, status, "[flight-record-batch-reader][read-next]")) {
+    if (flight_chunk.data) {
+      return gaflight_stream_chunk_new_raw(&flight_chunk);
+    } else {
+      return NULL;
+    }
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * gaflight_record_batch_reader_read_all:
+ * @reader: A #GAFlightRecordBatchReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The all data on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GArrowTable *
+gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader,
+                                      GError **error)
+{
+  auto flight_reader = gaflight_record_batch_reader_get_raw(reader);
+  std::shared_ptr<arrow::Table> arrow_table;
+  auto status = flight_reader->ReadAll(&arrow_table);
+  if (garrow::check(error, status, "[flight-record-batch-reader][read-all]")) {
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
+}
+
+
+G_END_DECLS
+
+
+GAFlightCriteria *
+gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria)
+{
+  auto criteria = g_object_new(GAFLIGHT_TYPE_CRITERIA, NULL);
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(criteria);
+  priv->criteria = *flight_criteria;
+  priv->expression = g_bytes_new(priv->criteria.expression.data(),
+                                 priv->criteria.expression.size());
+  return GAFLIGHT_CRITERIA(criteria);
+}
+
+arrow::flight::Criteria *
+gaflight_criteria_get_raw(GAFlightCriteria *criteria)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(criteria);
+  return &(priv->criteria);
+}
+
+arrow::flight::Location *
+gaflight_location_get_raw(GAFlightLocation *location)
+{
+  auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(location);
+  return &(priv->location);
+}
+
+GAFlightDescriptor *
+gaflight_descriptor_new_raw(
+  const arrow::flight::FlightDescriptor *flight_descriptor)
+{
+  GType gtype = GAFLIGHT_TYPE_DESCRIPTOR;
+  switch (flight_descriptor->type) {
+  case arrow::flight::FlightDescriptor::DescriptorType::PATH:
+    gtype = GAFLIGHT_TYPE_PATH_DESCRIPTOR;
+    break;
+  case arrow::flight::FlightDescriptor::DescriptorType::CMD:
+    gtype = GAFLIGHT_TYPE_COMMAND_DESCRIPTOR;
+    break;
+  default:
+    break;
+  }
+  return GAFLIGHT_DESCRIPTOR(g_object_new(gtype,
+                                          "descriptor", flight_descriptor,
+                                          NULL));
+}
+
+arrow::flight::FlightDescriptor *
+gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(descriptor);
+  return &(priv->descriptor);
+}
+
+GAFlightTicket *
+gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket)
+{
+  auto ticket = g_object_new(GAFLIGHT_TYPE_TICKET, NULL);
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(ticket);
+  priv->ticket = *flight_ticket;
+  priv->data = g_bytes_new(priv->ticket.ticket.data(),
+                           priv->ticket.ticket.size());
+  return GAFLIGHT_TICKET(ticket);
+}
+
+arrow::flight::Ticket *
+gaflight_ticket_get_raw(GAFlightTicket *ticket)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(ticket);
+  return &(priv->ticket);
+}
+
+GAFlightEndpoint *
+gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint,
+                          GAFlightTicket *ticket)
+{
+  auto endpoint = GAFLIGHT_ENDPOINT(g_object_new(GAFLIGHT_TYPE_ENDPOINT,
+                                                 NULL));
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
+  if (ticket) {
+    priv->ticket = ticket;
+    g_object_ref(priv->ticket);
+    priv->endpoint.ticket = *gaflight_ticket_get_raw(priv->ticket);
+  } else {
+    auto data = g_bytes_new(flight_endpoint->ticket.ticket.data(),
+                            flight_endpoint->ticket.ticket.length());
+    auto ticket = gaflight_ticket_new(data);
+    g_bytes_unref(data);
+    priv->ticket = ticket;
+    priv->endpoint.ticket.ticket = flight_endpoint->ticket.ticket;
+  }
+  if (flight_endpoint) {
+    priv->endpoint.locations = flight_endpoint->locations;
+  }
+  return endpoint;
+}
+
+arrow::flight::FlightEndpoint *
+gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
+  return &(priv->endpoint);
+}
+
+GAFlightInfo *
+gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info)
+{
+  return GAFLIGHT_INFO(g_object_new(GAFLIGHT_TYPE_INFO,
+                                    "info", flight_info,
+                                    NULL));
+}
+
+arrow::flight::FlightInfo *
+gaflight_info_get_raw(GAFlightInfo *info)
+{
+  auto priv = GAFLIGHT_INFO_GET_PRIVATE(info);
+  return &(priv->info);
+}
+
+GAFlightStreamChunk *
+gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk)
+{
+  return GAFLIGHT_STREAM_CHUNK(
+    g_object_new(GAFLIGHT_TYPE_STREAM_CHUNK,
+                 "chunk", flight_chunk,
+                 NULL));
+}
+
+arrow::flight::FlightStreamChunk *
+gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk)
+{
+  auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(chunk);
+  return &(priv->chunk);
+}
+
+arrow::flight::MetadataRecordBatchReader *
+gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(reader);
+  return priv->reader;
+}
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
new file mode 100644
index 00000000000..368fb665b47
--- /dev/null
+++ b/c_glib/arrow-flight-glib/common.h
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.h>
+
+G_BEGIN_DECLS
+
+
+#define GAFLIGHT_TYPE_CRITERIA (gaflight_criteria_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria,
+                         gaflight_criteria,
+                         GAFLIGHT,
+                         CRITERIA,
+                         GObject)
+struct _GAFlightCriteriaClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightCriteria *
+gaflight_criteria_new(GBytes *expression);
+
+
+#define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightLocation,
+                         gaflight_location,
+                         GAFLIGHT,
+                         LOCATION,
+                         GObject)
+struct _GAFlightLocationClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightLocation *
+gaflight_location_new(const gchar *uri,
+                      GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_location_to_string(GAFlightLocation *location);
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_location_get_scheme(GAFlightLocation *location);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_location_equal(GAFlightLocation *location,
+                        GAFlightLocation *other_location);
+
+
+#define GAFLIGHT_TYPE_DESCRIPTOR (gaflight_descriptor_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightDescriptor,
+                         gaflight_descriptor,
+                         GAFLIGHT,
+                         DESCRIPTOR,
+                         GObject)
+struct _GAFlightDescriptorClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_descriptor_to_string(GAFlightDescriptor *descriptor);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_descriptor_equal(GAFlightDescriptor *descriptor,
+                          GAFlightDescriptor *other_descriptor);
+
+
+#define GAFLIGHT_TYPE_PATH_DESCRIPTOR (gaflight_path_descriptor_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightPathDescriptor,
+                         gaflight_path_descriptor,
+                         GAFLIGHT,
+                         PATH_DESCRIPTOR,
+                         GAFlightDescriptor)
+struct _GAFlightPathDescriptorClass
+{
+  GAFlightDescriptorClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightPathDescriptor *
+gaflight_path_descriptor_new(const gchar **paths,
+                             gsize n_paths);
+
+GARROW_AVAILABLE_IN_5_0
+gchar **
+gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor);
+
+
+#define GAFLIGHT_TYPE_COMMAND_DESCRIPTOR (gaflight_command_descriptor_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightCommandDescriptor,
+                         gaflight_command_descriptor,
+                         GAFLIGHT,
+                         COMMAND_DESCRIPTOR,
+                         GAFlightDescriptor)
+struct _GAFlightCommandDescriptorClass
+{
+  GAFlightDescriptorClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightCommandDescriptor *
+gaflight_command_descriptor_new(const gchar *command);
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor);
+
+
+#define GAFLIGHT_TYPE_TICKET (gaflight_ticket_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightTicket,
+                         gaflight_ticket,
+                         GAFLIGHT,
+                         TICKET,
+                         GObject)
+struct _GAFlightTicketClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightTicket *
+gaflight_ticket_new(GBytes *data);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_ticket_equal(GAFlightTicket *ticket,
+                      GAFlightTicket *other_ticket);
+
+
+#define GAFLIGHT_TYPE_ENDPOINT (gaflight_endpoint_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint,
+                         gaflight_endpoint,
+                         GAFLIGHT,
+                         ENDPOINT,
+                         GObject)
+struct _GAFlightEndpointClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightEndpoint *
+gaflight_endpoint_new(GAFlightTicket *ticket,
+                      GList *locations);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_endpoint_equal(GAFlightEndpoint *endpoint,
+                        GAFlightEndpoint *other_endpoint);
+
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint);
+
+
+#define GAFLIGHT_TYPE_INFO (gaflight_info_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightInfo,
+                         gaflight_info,
+                         GAFLIGHT,
+                         INFO,
+                         GObject)
+struct _GAFlightInfoClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightInfo *
+gaflight_info_new(GArrowSchema *schema,
+                  GAFlightDescriptor *descriptor,
+                  GList *endpoints,
+                  gint64 total_records,
+                  gint64 total_bytes,
+                  GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_info_equal(GAFlightInfo *info,
+                    GAFlightInfo *other_info);
+
+GARROW_AVAILABLE_IN_5_0
+GArrowSchema *
+gaflight_info_get_schema(GAFlightInfo *info,
+                         GArrowReadOptions *options,
+                         GError **error);
+GARROW_AVAILABLE_IN_5_0
+GAFlightDescriptor *
+gaflight_info_get_descriptor(GAFlightInfo *info);
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_info_get_endpoints(GAFlightInfo *info);
+GARROW_AVAILABLE_IN_5_0
+gint64
+gaflight_info_get_total_records(GAFlightInfo *info);
+GARROW_AVAILABLE_IN_5_0
+gint64
+gaflight_info_get_total_bytes(GAFlightInfo *info);
+
+
+#define GAFLIGHT_TYPE_STREAM_CHUNK (gaflight_stream_chunk_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamChunk,
+                         gaflight_stream_chunk,
+                         GAFLIGHT,
+                         STREAM_CHUNK,
+                         GObject)
+struct _GAFlightStreamChunkClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GArrowRecordBatch *
+gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk);
+GARROW_AVAILABLE_IN_6_0
+GArrowBuffer *
+gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk);
+
+
+#define GAFLIGHT_TYPE_RECORD_BATCH_READER       \
+  (gaflight_record_batch_reader_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchReader,
+                         gaflight_record_batch_reader,
+                         GAFLIGHT,
+                         RECORD_BATCH_READER,
+                         GObject)
+struct _GAFlightRecordBatchReaderClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GAFlightStreamChunk *
+gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader,
+                                       GError **error);
+
+GARROW_AVAILABLE_IN_6_0
+GArrowTable *
+gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader,
+                                      GError **error);
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
new file mode 100644
index 00000000000..d23f7c8867f
--- /dev/null
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/flight/api.h>
+
+#include <arrow-flight-glib/common.h>
+
+
+GAFlightCriteria *
+gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria);
+arrow::flight::Criteria *
+gaflight_criteria_get_raw(GAFlightCriteria *criteria);
+
+arrow::flight::Location *
+gaflight_location_get_raw(GAFlightLocation *location);
+
+GAFlightDescriptor *
+gaflight_descriptor_new_raw(
+  const arrow::flight::FlightDescriptor *flight_descriptor);
+arrow::flight::FlightDescriptor *
+gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor);
+
+GAFlightTicket *
+gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket);
+arrow::flight::Ticket *
+gaflight_ticket_get_raw(GAFlightTicket *ticket);
+
+GAFlightEndpoint *
+gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint,
+                          GAFlightTicket *ticket);
+arrow::flight::FlightEndpoint *
+gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint);
+
+GAFlightInfo *
+gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info);
+arrow::flight::FlightInfo *
+gaflight_info_get_raw(GAFlightInfo *info);
+
+GAFlightStreamChunk *
+gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk);
+arrow::flight::FlightStreamChunk *
+gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk);
+
+arrow::flight::MetadataRecordBatchReader *
+gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader);
diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build
new file mode 100644
index 00000000000..c17415fee3d
--- /dev/null
+++ b/c_glib/arrow-flight-glib/meson.build
@@ -0,0 +1,82 @@
+# -*- indent-tabs-mode: nil -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+sources = files(
+  'client.cpp',
+  'common.cpp',
+  'server.cpp',
+)
+
+c_headers = files(
+  'arrow-flight-glib.h',
+  'client.h',
+  'common.h',
+  'server.h',
+)
+
+cpp_headers = files(
+  'arrow-flight-glib.hpp',
+  'client.hpp',
+  'common.hpp',
+  'server.hpp',
+)
+
+headers = c_headers + cpp_headers
+install_headers(headers, subdir: 'arrow-flight-glib')
+
+dependencies = [
+  arrow_flight,
+  arrow_glib,
+]
+libarrow_flight_glib = library('arrow-flight-glib',
+                               sources: sources,
+                               install: true,
+                               dependencies: dependencies,
+                               include_directories: base_include_directories,
+                               soversion: so_version,
+                               version: library_version)
+arrow_flight_glib = declare_dependency(link_with: libarrow_flight_glib,
+                                       include_directories: base_include_directories,
+                                       dependencies: dependencies)
+
+pkgconfig.generate(libarrow_flight_glib,
+                   filebase: 'arrow-flight-glib',
+                   name: 'Apache Arrow Flight GLib',
+                   description: 'C API for Apache Arrow Flight based on GLib',
+                   version: version,
+                   requires: ['arrow-glib', 'arrow-flight'])
+
+if have_gi
+  gnome.generate_gir(libarrow_flight_glib,
+                     dependencies: declare_dependency(sources: arrow_glib_gir),
+                     sources: sources + c_headers,
+                     namespace: 'ArrowFlight',
+                     nsversion: api_version,
+                     identifier_prefix: 'GAFlight',
+                     symbol_prefix: 'gaflight',
+                     export_packages: 'arrow-flight-glib',
+                     includes: [
+                       'Arrow-1.0',
+                     ],
+                     install: true,
+                     extra_args: [
+                       '--warn-all',
+                       '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+                     ])
+endif
diff --git a/c_glib/arrow-flight-glib/server.cpp b/c_glib/arrow-flight-glib/server.cpp
new file mode 100644
index 00000000000..e283b6d2688
--- /dev/null
+++ b/c_glib/arrow-flight-glib/server.cpp
@@ -0,0 +1,724 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow/util/make_unique.h>
+
+#include <arrow-glib/arrow-glib.hpp>
+
+#include <arrow-flight-glib/common.hpp>
+#include <arrow-flight-glib/server.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: server
+ * @section_id: server
+ * @title: Server related classes
+ * @include: arrow-flight-glib/arrow-flight-glib.h
+ *
+ * #GAFlightDataStream is a class for producing a sequence of IPC
+ * payloads to be sent in `FlightData` protobuf messages. Generally,
+ * this is not used directly. Generally, #GAFlightRecordBatchStream is
+ * used instead.
+ *
+ * #GAFlightRecordBatchStream is a class for producing a sequence of
+ * IPC payloads to be sent in `FlightData` protobuf messages by
+ * #GArrowREcordBatchReader`.
+ *
+ * #GAFlightServerOptions is a class for options of each server.
+ *
+ * #GAFlightServerCallContext is a class for context of each server call.
+ *
+ * #GAFlightServer is a class to develop an Apache Arrow Flight server.
+ *
+ * Since: 5.0.0
+ */
+
+
+typedef struct GAFlightDataStreamPrivate_ {
+  arrow::flight::FlightDataStream *stream;
+} GAFlightDataStreamPrivate;
+
+enum {
+  PROP_STREAM = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDataStream,
+                           gaflight_data_stream,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_DATA_STREAM_GET_PRIVATE(obj)        \
+  static_cast<GAFlightDataStreamPrivate *>(          \
+    gaflight_data_stream_get_instance_private(       \
+      GAFLIGHT_DATA_STREAM(obj)))
+
+static void
+gaflight_data_stream_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(object);
+
+  delete priv->stream;
+
+  G_OBJECT_CLASS(gaflight_data_stream_parent_class)->finalize(object);
+}
+
+static void
+gaflight_data_stream_set_property(GObject *object,
+                                  guint prop_id,
+                                  const GValue *value,
+                                  GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_STREAM:
+    priv->stream = static_cast<arrow::flight::FlightDataStream *>(
+      g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_data_stream_init(GAFlightDataStream *object)
+{
+}
+
+static void
+gaflight_data_stream_class_init(GAFlightDataStreamClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_data_stream_finalize;
+  gobject_class->set_property = gaflight_data_stream_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("stream",
+                              "Stream",
+                              "The raw arrow::flight::FlightDataStream *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_STREAM, spec);
+}
+
+
+typedef struct GAFlightRecordBatchStreamPrivate_ {
+  GArrowRecordBatchReader *reader;
+} GAFlightRecordBatchStreamPrivate;
+
+enum {
+  PROP_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchStream,
+                           gaflight_record_batch_stream,
+                           GAFLIGHT_TYPE_DATA_STREAM)
+
+#define GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(obj)        \
+  static_cast<GAFlightRecordBatchStreamPrivate *>(           \
+    gaflight_record_batch_stream_get_instance_private(       \
+      GAFLIGHT_RECORD_BATCH_STREAM(obj)))
+
+static void
+gaflight_record_batch_stream_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object);
+
+  if (priv->reader) {
+    g_object_unref(priv->reader);
+    priv->reader = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_record_batch_stream_parent_class)->dispose(object);
+}
+
+static void
+gaflight_record_batch_stream_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_READER:
+    priv->reader = GARROW_RECORD_BATCH_READER(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_record_batch_stream_get_property(GObject *object,
+                                          guint prop_id,
+                                          GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_READER:
+    g_value_set_object(value, priv->reader);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_record_batch_stream_init(GAFlightRecordBatchStream *object)
+{
+}
+
+static void
+gaflight_record_batch_stream_class_init(GAFlightRecordBatchStreamClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_record_batch_stream_dispose;
+  gobject_class->set_property = gaflight_record_batch_stream_set_property;
+  gobject_class->get_property = gaflight_record_batch_stream_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightRecordBatchStream:reader:
+   *
+   * The reader that produces record batches.
+   *
+   * Since: 6.0.0
+   */
+  spec = g_param_spec_object("reader",
+                             "Reader",
+                             "The reader that produces record batches",
+                             GARROW_TYPE_RECORD_BATCH_READER,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_READER, spec);
+}
+
+/**
+ * gaflight_record_batch_stream_new:
+ * @reader: A #GArrowRecordBatchReader to be read.
+ * @options: (nullable): A #GArrowWriteOptions for writing record batches to
+ *   a client.
+ *
+ * Returns: The newly created #GAFlightRecordBatchStream.
+ *
+ * Since: 6.0.0
+ */
+GAFlightRecordBatchStream *
+gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader,
+                                 GArrowWriteOptions *options)
+{
+  auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
+  auto arrow_options_default = arrow::ipc::IpcWriteOptions::Defaults();
+  arrow::ipc::IpcWriteOptions *arrow_options = NULL;
+  if (options) {
+    arrow_options = garrow_write_options_get_raw(options);
+  } else {
+    arrow_options = &arrow_options_default;
+  }
+  auto stream = arrow::internal::make_unique<
+    arrow::flight::RecordBatchStream>(arrow_reader, *arrow_options);
+  return static_cast<GAFlightRecordBatchStream *>(
+    g_object_new(GAFLIGHT_TYPE_RECORD_BATCH_STREAM,
+                 "stream", stream.release(),
+                 "reader", reader,
+                 NULL));
+}
+
+
+typedef struct GAFlightServerOptionsPrivate_ {
+  arrow::flight::FlightServerOptions options;
+  GAFlightLocation *location;
+} GAFlightServerOptionsPrivate;
+
+enum {
+  PROP_LOCATION = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerOptions,
+                           gaflight_server_options,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(obj)        \
+  static_cast<GAFlightServerOptionsPrivate *>(          \
+    gaflight_server_options_get_instance_private(       \
+      GAFLIGHT_SERVER_OPTIONS(obj)))
+
+static void
+gaflight_server_options_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  if (priv->location) {
+    g_object_unref(priv->location);
+    priv->location = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_server_options_parent_class)->dispose(object);
+}
+
+static void
+gaflight_server_options_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  priv->options.~FlightServerOptions();
+
+  G_OBJECT_CLASS(gaflight_server_options_parent_class)->finalize(object);
+}
+
+static void
+gaflight_server_options_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_LOCATION:
+    {
+      priv->location = GAFLIGHT_LOCATION(g_value_dup_object(value));
+      auto flight_location = gaflight_location_get_raw(priv->location);
+      new(&(priv->options)) arrow::flight::FlightServerOptions(*flight_location);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_server_options_get_property(GObject *object,
+                                     guint prop_id,
+                                     GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_LOCATION:
+    g_value_set_object(value, priv->location);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_server_options_init(GAFlightServerOptions *object)
+{
+}
+
+static void
+gaflight_server_options_class_init(GAFlightServerOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_server_options_dispose;
+  gobject_class->finalize = gaflight_server_options_finalize;
+  gobject_class->set_property = gaflight_server_options_set_property;
+  gobject_class->get_property = gaflight_server_options_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_object("location",
+                             "Location",
+                             "The location to be listened",
+                             GAFLIGHT_TYPE_LOCATION,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_LOCATION, spec);
+}
+
+/**
+ * gaflight_server_options_new:
+ * @location: A #GAFlightLocation to be listened.
+ *
+ * Returns: The newly created options for a server.
+ *
+ * Since: 5.0.0
+ */
+GAFlightServerOptions *
+gaflight_server_options_new(GAFlightLocation *location)
+{
+  return static_cast<GAFlightServerOptions *>(
+    g_object_new(GAFLIGHT_TYPE_SERVER_OPTIONS,
+                 "location", location,
+                 NULL));
+}
+
+
+typedef struct GAFlightServerCallContextPrivate_ {
+  arrow::flight::ServerCallContext *call_context;
+} GAFlightServerCallContextPrivate;
+
+enum {
+  PROP_CALL_CONTEXT = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerCallContext,
+                           gaflight_server_call_context,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(obj)   \
+  static_cast<GAFlightServerCallContextPrivate *>(      \
+    gaflight_server_call_context_get_instance_private(  \
+      GAFLIGHT_SERVER_CALL_CONTEXT(obj)))
+
+static void
+gaflight_server_call_context_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CALL_CONTEXT:
+    priv->call_context =
+      static_cast<arrow::flight::ServerCallContext *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_server_call_context_init(GAFlightServerCallContext *object)
+{
+}
+
+static void
+gaflight_server_call_context_class_init(GAFlightServerCallContextClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = gaflight_server_call_context_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("call-context",
+                              "Call context",
+                              "The raw arrow::flight::ServerCallContext",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CALL_CONTEXT, spec);
+}
+
+
+G_END_DECLS
+namespace gaflight {
+  class DataStream : public arrow::flight::FlightDataStream {
+  public:
+    DataStream(GAFlightDataStream *gastream) :
+      arrow::flight::FlightDataStream(),
+      gastream_(gastream) {
+    }
+
+    ~DataStream() override {
+      g_object_unref(gastream_);
+    }
+
+    std::shared_ptr<arrow::Schema> schema() override {
+      auto stream = gaflight_data_stream_get_raw(gastream_);
+      return stream->schema();
+    }
+
+    arrow::Status GetSchemaPayload(
+      arrow::flight::FlightPayload *payload) override {
+      auto stream = gaflight_data_stream_get_raw(gastream_);
+      return stream->GetSchemaPayload(payload);
+    }
+
+    arrow::Status Next(arrow::flight::FlightPayload *payload) override {
+      auto stream = gaflight_data_stream_get_raw(gastream_);
+      return stream->Next(payload);
+    }
+
+  private:
+    GAFlightDataStream *gastream_;
+  };
+
+  class Server : public arrow::flight::FlightServerBase {
+  public:
+    Server(GAFlightServer *gaserver) : gaserver_(gaserver) {
+    }
+
+    arrow::Status
+    ListFlights(
+      const arrow::flight::ServerCallContext &context,
+      const arrow::flight::Criteria *criteria,
+      std::unique_ptr<arrow::flight::FlightListing> *listing) override {
+      auto gacontext = gaflight_server_call_context_new_raw(&context);
+      GAFlightCriteria *gacriteria = NULL;
+      if (criteria) {
+        gacriteria = gaflight_criteria_new_raw(criteria);
+      }
+      GError *gerror = NULL;
+      auto gaflights = gaflight_server_list_flights(gaserver_,
+                                                    gacontext,
+                                                    gacriteria,
+                                                    &gerror);
+      if (gacriteria) {
+        g_object_unref(gacriteria);
+      }
+      g_object_unref(gacontext);
+      if (gerror) {
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-server][list-flights]");
+      }
+      std::vector<arrow::flight::FlightInfo> flights;
+      for (auto node = gaflights; node; node = node->next) {
+        auto gaflight = GAFLIGHT_INFO(node->data);
+        flights.push_back(*gaflight_info_get_raw(gaflight));
+        g_object_unref(gaflight);
+      }
+      g_list_free(gaflights);
+      *listing = arrow::internal::make_unique<
+        arrow::flight::SimpleFlightListing>(flights);
+      return arrow::Status::OK();
+    }
+
+    arrow::Status DoGet(
+      const arrow::flight::ServerCallContext &context,
+      const arrow::flight::Ticket &ticket,
+      std::unique_ptr<arrow::flight::FlightDataStream> *stream) override {
+      auto gacontext = gaflight_server_call_context_new_raw(&context);
+      auto gaticket = gaflight_ticket_new_raw(&ticket);
+      GError *gerror = NULL;
+      auto gastream = gaflight_server_do_get(gaserver_,
+                                             gacontext,
+                                             gaticket,
+                                             &gerror);
+      g_object_unref(gaticket);
+      g_object_unref(gacontext);
+      if (gerror) {
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-server][do-get]");
+      }
+      *stream = arrow::internal::make_unique<DataStream>(gastream);
+      return arrow::Status::OK();
+    }
+
+  private:
+    GAFlightServer *gaserver_;
+  };
+};
+G_BEGIN_DECLS
+
+typedef struct GAFlightServerPrivate_ {
+  gaflight::Server server;
+} GAFlightServerPrivate;
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightServer,
+                                    gaflight_server,
+                                    G_TYPE_OBJECT)
+
+#define GAFLIGHT_SERVER_GET_PRIVATE(obj)         \
+  static_cast<GAFlightServerPrivate *>(          \
+    gaflight_server_get_instance_private(        \
+      GAFLIGHT_SERVER(obj)))
+
+static void
+gaflight_server_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object);
+
+  priv->server.~Server();
+
+  G_OBJECT_CLASS(gaflight_server_parent_class)->finalize(object);
+}
+
+static void
+gaflight_server_init(GAFlightServer *object)
+{
+  auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object);
+  new(&(priv->server)) gaflight::Server(object);
+}
+
+static void
+gaflight_server_class_init(GAFlightServerClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_server_finalize;
+}
+
+/**
+ * gaflight_server_listen:
+ * @server: A #GAFlightServer.
+ * @options: A #GAFlightServerOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_server_listen(GAFlightServer *server,
+                       GAFlightServerOptions *options,
+                       GError **error)
+{
+  auto flight_server = gaflight_server_get_raw(server);
+  const auto flight_options = gaflight_server_options_get_raw(options);
+  return garrow::check(error,
+                       flight_server->Init(*flight_options),
+                       "[flight-server][listen]");
+}
+
+/**
+ * gaflight_server_new:
+ * @server: A #GAFlightServer.
+ *
+ * Returns: The port number listening.
+ *
+ * Since: 5.0.0
+ */
+gint
+gaflight_server_get_port(GAFlightServer *server)
+{
+  const auto flight_server = gaflight_server_get_raw(server);
+  return flight_server->port();
+}
+
+/**
+ * gaflight_server_shutdown:
+ * @server: A #GAFlightServer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Shuts down the serve. This function can be called from signal
+ * handler or another thread.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_server_shutdown(GAFlightServer *server,
+                         GError **error)
+{
+  auto flight_server = gaflight_server_get_raw(server);
+  return garrow::check(error,
+                       flight_server->Shutdown(),
+                       "[flight-server][shutdown]");
+}
+
+/**
+ * gaflight_server_list_flights:
+ * @server: A #GAFlightServer.
+ * @context: A #GAFlightServerCallContext.
+ * @criteria: (nullable): A #GAFlightCriteria.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (element-type GAFlightInfo) (transfer full):
+ *   #GList of #GAFlightInfo on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_server_list_flights(GAFlightServer *server,
+                             GAFlightServerCallContext *context,
+                             GAFlightCriteria *criteria,
+                             GError **error)
+{
+  auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
+  if (!(klass && klass->list_flights)) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_NOT_IMPLEMENTED,
+                "not implemented");
+    return NULL;
+  }
+  return (*(klass->list_flights))(server, context, criteria, error);
+}
+
+/**
+ * gaflight_server_do_get:
+ * @server: A #GAFlightServer.
+ * @context: A #GAFlightServerCallContext.
+ * @ticket: A #GAFlightTicket.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): #GAFlightDataStream on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GAFlightDataStream *
+gaflight_server_do_get(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightTicket *ticket,
+                       GError **error)
+{
+  auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
+  if (!(klass && klass->do_get)) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_NOT_IMPLEMENTED,
+                "not implemented");
+    return NULL;
+  }
+  return (*(klass->do_get))(server, context, ticket, error);
+}
+
+
+G_END_DECLS
+
+
+arrow::flight::FlightDataStream *
+gaflight_data_stream_get_raw(GAFlightDataStream *stream)
+{
+  auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(stream);
+  return priv->stream;
+}
+
+arrow::flight::FlightServerOptions *
+gaflight_server_options_get_raw(GAFlightServerOptions *options)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(options);
+  return &(priv->options);
+}
+
+GAFlightServerCallContext *
+gaflight_server_call_context_new_raw(
+  const arrow::flight::ServerCallContext *call_context)
+{
+  return GAFLIGHT_SERVER_CALL_CONTEXT(
+    g_object_new(GAFLIGHT_TYPE_SERVER_CALL_CONTEXT,
+                 "call-context", call_context,
+                 NULL));
+}
+
+arrow::flight::FlightServerBase *
+gaflight_server_get_raw(GAFlightServer *server)
+{
+  auto priv = GAFLIGHT_SERVER_GET_PRIVATE(server);
+  return &(priv->server);
+}
diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h
new file mode 100644
index 00000000000..107fe44bf77
--- /dev/null
+++ b/c_glib/arrow-flight-glib/server.h
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/common.h>
+
+G_BEGIN_DECLS
+
+
+#define GAFLIGHT_TYPE_DATA_STREAM       \
+  (gaflight_data_stream_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightDataStream,
+                         gaflight_data_stream,
+                         GAFLIGHT,
+                         DATA_STREAM,
+                         GObject)
+struct _GAFlightDataStreamClass
+{
+  GObjectClass parent_class;
+};
+
+
+#define GAFLIGHT_TYPE_RECORD_BATCH_STREAM       \
+  (gaflight_record_batch_stream_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchStream,
+                         gaflight_record_batch_stream,
+                         GAFLIGHT,
+                         RECORD_BATCH_STREAM,
+                         GAFlightDataStream)
+struct _GAFlightRecordBatchStreamClass
+{
+  GAFlightDataStreamClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GAFlightRecordBatchStream *
+gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader,
+                                 GArrowWriteOptions *options);
+
+
+#define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightServerOptions,
+                         gaflight_server_options,
+                         GAFLIGHT,
+                         SERVER_OPTIONS,
+                         GObject)
+struct _GAFlightServerOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightServerOptions *
+gaflight_server_options_new(GAFlightLocation *location);
+
+
+#define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT       \
+  (gaflight_server_call_context_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext,
+                         gaflight_server_call_context,
+                         GAFLIGHT,
+                         SERVER_CALL_CONTEXT,
+                         GObject)
+struct _GAFlightServerCallContextClass
+{
+  GObjectClass parent_class;
+};
+
+
+#define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightServer,
+                         gaflight_server,
+                         GAFLIGHT,
+                         SERVER,
+                         GObject)
+/**
+ * GAFlightServerClass:
+ * @list_flights: A virtual function to implement `ListFlights` API.
+ * @do_get: A virtual function to implement `DoGet` API.
+ *
+ * Since: 5.0.0
+ */
+struct _GAFlightServerClass
+{
+  GObjectClass parent_class;
+
+  GList *(*list_flights)(GAFlightServer *server,
+                         GAFlightServerCallContext *context,
+                         GAFlightCriteria *criteria,
+                         GError **error);
+  GAFlightDataStream *(*do_get)(GAFlightServer *server,
+                                GAFlightServerCallContext *context,
+                                GAFlightTicket *ticket,
+                                GError **error);
+};
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_server_listen(GAFlightServer *server,
+                       GAFlightServerOptions *options,
+                       GError **error);
+GARROW_AVAILABLE_IN_5_0
+gint
+gaflight_server_get_port(GAFlightServer *server);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_server_shutdown(GAFlightServer *server,
+                         GError **error);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_server_wait(GAFlightServer *server,
+                     GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_server_list_flights(GAFlightServer *server,
+                             GAFlightServerCallContext *context,
+                             GAFlightCriteria *criteria,
+                             GError **error);
+GARROW_AVAILABLE_IN_6_0
+GAFlightDataStream *
+gaflight_server_do_get(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightTicket *ticket,
+                       GError **error);
+
+G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp
new file mode 100644
index 00000000000..f7f2a7aba1b
--- /dev/null
+++ b/c_glib/arrow-flight-glib/server.hpp
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/flight/api.h>
+
+#include <arrow-flight-glib/server.h>
+
+
+arrow::flight::FlightDataStream *
+gaflight_data_stream_get_raw(GAFlightDataStream *stream);
+
+arrow::flight::FlightServerOptions *
+gaflight_server_options_get_raw(GAFlightServerOptions *options);
+
+GAFlightServerCallContext *
+gaflight_server_call_context_new_raw(
+  const arrow::flight::ServerCallContext *flight_context);
+
+arrow::flight::FlightServerBase *
+gaflight_server_get_raw(GAFlightServer *server);
diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp
index c9ac8f5755c..c5ae035a7bb 100644
--- a/c_glib/arrow-glib/array-builder.cpp
+++ b/c_glib/arrow-glib/array-builder.cpp
@@ -6142,9 +6142,9 @@ garrow_array_builder_new_raw(arrow::ArrayBuilder *arrow_builder,
       break;
     case arrow::Type::type::DICTIONARY:
       {
-        const auto& dict_type =
-          arrow::internal::checked_cast<arrow::DictionaryType&>(*arrow_builder->type());
-        switch (dict_type.value_type()->id()) {
+        auto dict_type =
+          std::static_pointer_cast<arrow::DictionaryType>(arrow_builder->type());
+        switch (dict_type->value_type()->id()) {
           case arrow::Type::type::BINARY:
             type = GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER;
             break;
diff --git a/c_glib/arrow-glib/arrow-glib.h b/c_glib/arrow-glib/arrow-glib.h
index 74d9f9209ed..e25044ec9f0 100644
--- a/c_glib/arrow-glib/arrow-glib.h
+++ b/c_glib/arrow-glib/arrow-glib.h
@@ -33,6 +33,7 @@
 #include <arrow-glib/error.h>
 #include <arrow-glib/field.h>
 #include <arrow-glib/record-batch.h>
+#include <arrow-glib/scalar.h>
 #include <arrow-glib/schema.h>
 #include <arrow-glib/table.h>
 #include <arrow-glib/table-builder.h>
diff --git a/c_glib/arrow-glib/arrow-glib.hpp b/c_glib/arrow-glib/arrow-glib.hpp
index 4382328f1bd..6dc6d43f2f9 100644
--- a/c_glib/arrow-glib/arrow-glib.hpp
+++ b/c_glib/arrow-glib/arrow-glib.hpp
@@ -31,6 +31,7 @@
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/field.hpp>
 #include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/scalar.hpp>
 #include <arrow-glib/schema.hpp>
 #include <arrow-glib/table.hpp>
 #include <arrow-glib/table-builder.hpp>
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index f2a924ee45c..1eb65b88964 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -167,6 +167,178 @@ G_BEGIN_DECLS
  * extension types.
  */
 
+typedef struct GArrowEqualOptionsPrivate_ {
+  gboolean approx;
+  arrow::EqualOptions options;
+} GArrowEqualOptionsPrivate;
+
+enum {
+  PROP_APPROX = 1,
+  PROP_NANS_EQUAL,
+  PROP_ABSOLUTE_TOLERANCE,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowEqualOptions,
+                           garrow_equal_options,
+                           G_TYPE_OBJECT)
+
+#define GARROW_EQUAL_OPTIONS_GET_PRIVATE(object) \
+  static_cast<GArrowEqualOptionsPrivate *>(      \
+    garrow_equal_options_get_instance_private(   \
+      GARROW_EQUAL_OPTIONS(object)))
+
+static void
+garrow_equal_options_finalize(GObject *object)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+  priv->options.~EqualOptions();
+  G_OBJECT_CLASS(garrow_equal_options_parent_class)->finalize(object);
+}
+
+static void
+garrow_equal_options_set_property(GObject *object,
+                                  guint prop_id,
+                                  const GValue *value,
+                                  GParamSpec *pspec)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_APPROX:
+    priv->approx = g_value_get_boolean(value);
+    break;
+  case PROP_NANS_EQUAL:
+    priv->options = priv->options.nans_equal(g_value_get_boolean(value));
+    break;
+  case PROP_ABSOLUTE_TOLERANCE:
+    priv->options = priv->options.atol(g_value_get_double(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_equal_options_get_property(GObject *object,
+                                  guint prop_id,
+                                  GValue *value,
+                                  GParamSpec *pspec)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_APPROX:
+    g_value_set_boolean(value, priv->approx);
+    break;
+  case PROP_NANS_EQUAL:
+    g_value_set_boolean(value, priv->options.nans_equal());
+    break;
+  case PROP_ABSOLUTE_TOLERANCE:
+    g_value_set_double(value, priv->options.atol());
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_equal_options_init(GArrowEqualOptions *object)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+  priv->approx = FALSE;
+  new(&priv->options) arrow::EqualOptions;
+  priv->options = arrow::EqualOptions::Defaults();
+}
+
+static void
+garrow_equal_options_class_init(GArrowEqualOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = garrow_equal_options_finalize;
+  gobject_class->set_property = garrow_equal_options_set_property;
+  gobject_class->get_property = garrow_equal_options_get_property;
+
+  auto options = arrow::EqualOptions::Defaults();
+  GParamSpec *spec;
+  /**
+   * GArrowEqualOptions:approx:
+   *
+   * Whether or not approximate comparison is used.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boolean("approx",
+                              "Approx",
+                              "Whether or not approximate comparison is used",
+                              FALSE,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_APPROX, spec);
+
+  /**
+   * GArrowEqualOptions:nans-equal:
+   *
+   * Whether or not NaNs are considered equal.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boolean("nans-equal",
+                              "NaNs equal",
+                              "Whether or not NaNs are considered equal",
+                              options.nans_equal(),
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_NANS_EQUAL, spec);
+
+  /**
+   * GArrowEqualOptions:absolute-tolerance:
+   *
+   * The absolute tolerance for approximate comparison of
+   * floating-point values.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_double("absolute-tolerance",
+                             "Absolute tolerance",
+                             "The absolute tolerance for approximate comparison "
+                             "of floating-point values",
+                             -G_MAXDOUBLE,
+                             G_MAXDOUBLE,
+                             options.atol(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_ABSOLUTE_TOLERANCE, spec);
+}
+
+/**
+ * garrow_equal_options_new:
+ *
+ * Returns: A newly created #GArrowEqualOptions.
+ *
+ * Since: 5.0.0
+ */
+GArrowEqualOptions *
+garrow_equal_options_new(void)
+{
+  auto equal_options = g_object_new(GARROW_TYPE_EQUAL_OPTIONS, NULL);
+  return GARROW_EQUAL_OPTIONS(equal_options);
+}
+
+/**
+ * garrow_equal_options_is_approx:
+ * @options: A #GArrowEqualOptions.
+ *
+ * Returns: %TRUE if approximate comparison is used, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_equal_options_is_approx(GArrowEqualOptions *options)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(options);
+  return priv->approx;
+}
+
+
 typedef struct GArrowArrayPrivate_ {
   std::shared_ptr<arrow::Array> array;
   GArrowDataType *value_data_type;
@@ -396,10 +568,39 @@ garrow_array_class_init(GArrowArrayClass *klass)
  */
 gboolean
 garrow_array_equal(GArrowArray *array, GArrowArray *other_array)
+{
+  return garrow_array_equal_options(array, other_array, NULL);
+}
+
+/**
+ * garrow_array_equal_options:
+ * @array: A #GArrowArray.
+ * @other_array: A #GArrowArray to be compared.
+ * @options: (nullable): A #GArrowEqualOptions to custom how to compare.
+ *
+ * Returns: %TRUE if both of them have the same data, %FALSE
+ *   otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_array_equal_options(GArrowArray *array,
+                           GArrowArray *other_array,
+                           GArrowEqualOptions *options)
 {
   const auto arrow_array = garrow_array_get_raw(array);
   const auto arrow_other_array = garrow_array_get_raw(other_array);
-  return arrow_array->Equals(arrow_other_array);
+  if (options) {
+    auto is_approx = garrow_equal_options_is_approx(options);
+    const auto arrow_options = garrow_equal_options_get_raw(options);
+    if (is_approx) {
+      return arrow_array->ApproxEquals(arrow_other_array, *arrow_options);
+    } else {
+      return arrow_array->Equals(arrow_other_array, *arrow_options);
+    }
+  } else {
+    return arrow_array->Equals(arrow_other_array);
+  }
 }
 
 /**
@@ -429,6 +630,7 @@ garrow_array_equal_approx(GArrowArray *array, GArrowArray *other_array)
  * @end_index: The end index of @array to be used. The end index of
  *   @other_array is "@other_start_index + (@end_index -
  *   @start_index)".
+ * @options: (nullable): A #GArrowEqualOptions to custom how to compare.
  *
  * Returns: %TRUE if both of them have the same data in the range,
  *   %FALSE otherwise.
@@ -440,14 +642,24 @@ garrow_array_equal_range(GArrowArray *array,
                          gint64 start_index,
                          GArrowArray *other_array,
                          gint64 other_start_index,
-                         gint64 end_index)
+                         gint64 end_index,
+                         GArrowEqualOptions *options)
 {
   const auto arrow_array = garrow_array_get_raw(array);
   const auto arrow_other_array = garrow_array_get_raw(other_array);
-  return arrow_array->RangeEquals(*arrow_other_array,
-                                  start_index,
-                                  end_index,
-                                  other_start_index);
+  if (options) {
+    const auto arrow_options = garrow_equal_options_get_raw(options);
+    return arrow_array->RangeEquals(arrow_other_array,
+                                    start_index,
+                                    end_index,
+                                    other_start_index,
+                                    *arrow_options);
+  } else {
+    return arrow_array->RangeEquals(arrow_other_array,
+                                    start_index,
+                                    end_index,
+                                    other_start_index);
+  }
 }
 
 /**
@@ -2848,6 +3060,13 @@ garrow_extension_array_get_storage(GArrowExtensionArray *array)
 
 G_END_DECLS
 
+arrow::EqualOptions *
+garrow_equal_options_get_raw(GArrowEqualOptions *equal_options)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(equal_options);
+  return &(priv->options);
+}
+
 GArrowArray *
 garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
 {
diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h
index 9835db5e67a..b4b3de15217 100644
--- a/c_glib/arrow-glib/basic-array.h
+++ b/c_glib/arrow-glib/basic-array.h
@@ -24,6 +24,25 @@
 
 G_BEGIN_DECLS
 
+#define GARROW_TYPE_EQUAL_OPTIONS (garrow_equal_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowEqualOptions,
+                         garrow_equal_options,
+                         GARROW,
+                         EQUAL_OPTIONS,
+                         GObject)
+struct _GArrowEqualOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowEqualOptions *
+garrow_equal_options_new(void);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_equal_options_is_approx(GArrowEqualOptions *options);
+
+
 #define GARROW_TYPE_ARRAY (garrow_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowArray,
                          garrow_array,
@@ -37,13 +56,18 @@ struct _GArrowArrayClass
 
 gboolean       garrow_array_equal       (GArrowArray *array,
                                          GArrowArray *other_array);
+GARROW_AVAILABLE_IN_5_0
+gboolean       garrow_array_equal_options(GArrowArray *array,
+                                          GArrowArray *other_array,
+                                          GArrowEqualOptions *options);
 gboolean       garrow_array_equal_approx(GArrowArray *array,
                                          GArrowArray *other_array);
 gboolean       garrow_array_equal_range (GArrowArray *array,
                                          gint64 start_index,
                                          GArrowArray *other_array,
                                          gint64 other_start_index,
-                                         gint64 end_index);
+                                         gint64 end_index,
+                                         GArrowEqualOptions *options);
 
 gboolean       garrow_array_is_null     (GArrowArray *array,
                                          gint64 i);
diff --git a/c_glib/arrow-glib/basic-array.hpp b/c_glib/arrow-glib/basic-array.hpp
index effebb01a6f..3ef1c196976 100644
--- a/c_glib/arrow-glib/basic-array.hpp
+++ b/c_glib/arrow-glib/basic-array.hpp
@@ -23,6 +23,9 @@
 
 #include <arrow-glib/basic-array.h>
 
+arrow::EqualOptions *
+garrow_equal_options_get_raw(GArrowEqualOptions *equal_options);
+
 GArrowArray *
 garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array);
 GArrowArray *
diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 1f2082712da..d7e3ca85f38 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1925,6 +1925,9 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
   case arrow::Type::type::DICTIONARY:
     type = GARROW_TYPE_DICTIONARY_DATA_TYPE;
     break;
+  case arrow::Type::type::MAP:
+    type = GARROW_TYPE_MAP_DATA_TYPE;
+    break;
   case arrow::Type::type::DECIMAL128:
     type = GARROW_TYPE_DECIMAL128_DATA_TYPE;
     break;
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 275e406be79..e845b1d80cc 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -52,27 +52,6 @@ garrow_numeric_array_sum(GArrowArrayType array,
   }
 }
 
-template <typename GArrowArrayType, typename VALUE>
-GArrowBooleanArray *
-garrow_numeric_array_compare(GArrowArrayType array,
-                             VALUE value,
-                             GArrowCompareOptions *options,
-                             GError **error,
-                             const gchar *tag)
-{
-  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_options = garrow_compare_options_get_raw(options);
-  auto arrow_compared_datum = arrow::compute::Compare(arrow_array,
-                                                      arrow::Datum(value),
-                                                      *arrow_options);
-  if (garrow::check(error, arrow_compared_datum, tag)) {
-    auto arrow_compared_array = (*arrow_compared_datum).make_array();
-    return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_compared_array));
-  } else {
-    return NULL;
-  }
-}
-
 template <typename GArrowTypeNewRaw>
 auto
 garrow_take(arrow::Datum arrow_values,
@@ -130,8 +109,9 @@ G_BEGIN_DECLS
  * #GArrowCastOptions is a class to customize the `cast` function and
  * garrow_array_cast().
  *
- * #GArrowCountOptions is a class to customize the `count` function and
- * garrow_array_count().
+ * #GArrowScalarAggregateOptions is a class to customize the scalar
+ * aggregate functions such as `count` function and convenient
+ * functions of them such as garrow_array_count().
  *
  * #GArrowFilterOptions is a class to customize the `filter` function and
  * garrow_array_filter() family.
@@ -139,9 +119,6 @@ G_BEGIN_DECLS
  * #GArrowTakeOptions is a class to customize the `take` function and
  * garrow_array_take() family.
  *
- * #GArrowCompareOptions is a class to customize the `equal` function
- * family and garrow_int8_array_compare() family.
- *
  * #GArrowArraySortOptions is a class to customize the
  * `array_sort_indices` function.
  *
@@ -525,7 +502,7 @@ garrow_cast_options_class_init(GArrowCastOptionsClass *klass)
   /**
    * GArrowCastOptions:to-data-type:
    *
-   * The GArrowDataType being casted to.
+   * The #GArrowDataType being casted to.
    *
    * Since: 1.0.0
    */
@@ -636,60 +613,65 @@ garrow_cast_options_new(void)
 }
 
 
-typedef struct GArrowCountOptionsPrivate_ {
-  arrow::compute::CountOptions options;
-} GArrowCountOptionsPrivate;
+typedef struct GArrowScalarAggregateOptionsPrivate_ {
+  arrow::compute::ScalarAggregateOptions options;
+} GArrowScalarAggregateOptionsPrivate;
 
 enum {
-  PROP_MODE = 1,
+  PROP_SKIP_NULLS = 1,
+  PROP_MIN_COUNT,
 };
 
 static arrow::compute::FunctionOptions *
-garrow_count_options_get_raw_function_options(GArrowFunctionOptions *options)
+garrow_scalar_aggregate_options_get_raw_function_options(
+  GArrowFunctionOptions *options)
 {
-  return garrow_count_options_get_raw(GARROW_COUNT_OPTIONS(options));
+  return garrow_scalar_aggregate_options_get_raw(
+    GARROW_SCALAR_AGGREGATE_OPTIONS(options));
 }
 
 static void
-garrow_count_options_function_options_interface_init(
+garrow_scalar_aggregate_options_function_options_interface_init(
   GArrowFunctionOptionsInterface *iface)
 {
-  iface->get_raw = garrow_count_options_get_raw_function_options;
+  iface->get_raw = garrow_scalar_aggregate_options_get_raw_function_options;
 }
 
-G_DEFINE_TYPE_WITH_CODE(GArrowCountOptions,
-                        garrow_count_options,
+G_DEFINE_TYPE_WITH_CODE(GArrowScalarAggregateOptions,
+                        garrow_scalar_aggregate_options,
                         G_TYPE_OBJECT,
-                        G_ADD_PRIVATE(GArrowCountOptions)
+                        G_ADD_PRIVATE(GArrowScalarAggregateOptions)
                         G_IMPLEMENT_INTERFACE(
                           GARROW_TYPE_FUNCTION_OPTIONS,
-                          garrow_count_options_function_options_interface_init))
+                          garrow_scalar_aggregate_options_function_options_interface_init))
 
-#define GARROW_COUNT_OPTIONS_GET_PRIVATE(object)        \
-  static_cast<GArrowCountOptionsPrivate *>(             \
-    garrow_count_options_get_instance_private(          \
-      GARROW_COUNT_OPTIONS(object)))
+#define GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object)        \
+  static_cast<GArrowScalarAggregateOptionsPrivate *>(              \
+    garrow_scalar_aggregate_options_get_instance_private(          \
+      GARROW_SCALAR_AGGREGATE_OPTIONS(object)))
 
 static void
-garrow_count_options_finalize(GObject *object)
+garrow_scalar_aggregate_options_finalize(GObject *object)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
-  priv->options.~CountOptions();
-  G_OBJECT_CLASS(garrow_count_options_parent_class)->finalize(object);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
+  priv->options.~ScalarAggregateOptions();
+  G_OBJECT_CLASS(garrow_scalar_aggregate_options_parent_class)->finalize(object);
 }
 
 static void
-garrow_count_options_set_property(GObject *object,
-                                  guint prop_id,
-                                  const GValue *value,
-                                  GParamSpec *pspec)
+garrow_scalar_aggregate_options_set_property(GObject *object,
+                                             guint prop_id,
+                                             const GValue *value,
+                                             GParamSpec *pspec)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_MODE:
-    priv->options.count_mode =
-      static_cast<arrow::compute::CountOptions::Mode>(g_value_get_enum(value));
+  case PROP_SKIP_NULLS:
+    priv->options.skip_nulls = g_value_get_boolean(value);
+    break;
+  case PROP_MIN_COUNT:
+    priv->options.min_count = g_value_get_uint(value);
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -698,16 +680,19 @@ garrow_count_options_set_property(GObject *object,
 }
 
 static void
-garrow_count_options_get_property(GObject *object,
-                                 guint prop_id,
-                                 GValue *value,
-                                 GParamSpec *pspec)
+garrow_scalar_aggregate_options_get_property(GObject *object,
+                                             guint prop_id,
+                                             GValue *value,
+                                             GParamSpec *pspec)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_MODE:
-    g_value_set_enum(value, priv->options.count_mode);
+  case PROP_SKIP_NULLS:
+    g_value_set_boolean(value, priv->options.skip_nulls);
+    break;
+  case PROP_MIN_COUNT:
+    g_value_set_uint(value, priv->options.min_count);
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -716,51 +701,69 @@ garrow_count_options_get_property(GObject *object,
 }
 
 static void
-garrow_count_options_init(GArrowCountOptions *object)
+garrow_scalar_aggregate_options_init(GArrowScalarAggregateOptions *object)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::compute::CountOptions(
-    arrow::compute::CountOptions::COUNT_NON_NULL);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
+  new(&priv->options) arrow::compute::ScalarAggregateOptions();
 }
 
 static void
-garrow_count_options_class_init(GArrowCountOptionsClass *klass)
+garrow_scalar_aggregate_options_class_init(
+  GArrowScalarAggregateOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_count_options_finalize;
-  gobject_class->set_property = garrow_count_options_set_property;
-  gobject_class->get_property = garrow_count_options_get_property;
+  gobject_class->finalize     = garrow_scalar_aggregate_options_finalize;
+  gobject_class->set_property = garrow_scalar_aggregate_options_set_property;
+  gobject_class->get_property = garrow_scalar_aggregate_options_get_property;
+
+  auto options = arrow::compute::ScalarAggregateOptions::Defaults();
 
   GParamSpec *spec;
   /**
-   * GArrowCountOptions:mode:
+   * GArrowScalarAggregateOptions:skip-nulls:
    *
-   * How to count values.
+   * Whether NULLs are skipped or not.
    *
-   * Since: 0.13.0
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boolean("skip-nulls",
+                              "Skip NULLs",
+                              "Whether NULLs are skipped or not",
+                              options.skip_nulls,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_SKIP_NULLS, spec);
+
+  /**
+   * GArrowScalarAggregateOptions:min-count:
+   *
+   * The minimum required number of values.
+   *
+   * Since: 5.0.0
    */
-  spec = g_param_spec_enum("mode",
-                           "Mode",
-                           "How to count values",
-                           GARROW_TYPE_COUNT_MODE,
-                           GARROW_COUNT_ALL,
+  spec = g_param_spec_uint("min-count",
+                           "Min count",
+                           "The minimum required number of values",
+                           0,
+                           G_MAXUINT,
+                           options.min_count,
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_MODE, spec);
+  g_object_class_install_property(gobject_class, PROP_MIN_COUNT, spec);
 }
 
 /**
- * garrow_count_options_new:
+ * garrow_scalar_aggregate_options_new:
  *
- * Returns: A newly created #GArrowCountOptions.
+ * Returns: A newly created #GArrowScalarAggregateOptions.
  *
- * Since: 0.13.0
+ * Since: 5.0.0
  */
-GArrowCountOptions *
-garrow_count_options_new(void)
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new(void)
 {
-  auto count_options = g_object_new(GARROW_TYPE_COUNT_OPTIONS, NULL);
-  return GARROW_COUNT_OPTIONS(count_options);
+  auto scalar_aggregate_options =
+    g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS, NULL);
+  return GARROW_SCALAR_AGGREGATE_OPTIONS(scalar_aggregate_options);
 }
 
 
@@ -863,14 +866,14 @@ garrow_filter_options_class_init(GArrowFilterOptionsClass *klass)
 
   GParamSpec *spec;
   /**
-   * GArrowFilterOptions:null_selection_behavior:
+   * GArrowFilterOptions:null-selection-behavior:
    *
    * How to handle filtered values.
    *
    * Since: 0.17.0
    */
-  spec = g_param_spec_enum("null_selection_behavior",
-                           "Null selection behavior",
+  spec = g_param_spec_enum("null-selection-behavior",
+                           "NULL selection behavior",
                            "How to handle filtered values",
                            GARROW_TYPE_FILTER_NULL_SELECTION_BEHAVIOR,
                            static_cast<GArrowFilterNullSelectionBehavior>(
@@ -962,133 +965,6 @@ garrow_take_options_new(void)
 }
 
 
-typedef struct GArrowCompareOptionsPrivate_ {
-  arrow::compute::CompareOptions options;
-} GArrowCompareOptionsPrivate;
-
-enum {
-  PROP_OPERATOR = 1,
-};
-
-static arrow::compute::FunctionOptions *
-garrow_compare_options_get_raw_function_options(GArrowFunctionOptions *options)
-{
-  return garrow_compare_options_get_raw(GARROW_COMPARE_OPTIONS(options));
-}
-
-static void
-garrow_compare_options_function_options_interface_init(
-  GArrowFunctionOptionsInterface *iface)
-{
-  iface->get_raw = garrow_compare_options_get_raw_function_options;
-}
-
-G_DEFINE_TYPE_WITH_CODE(GArrowCompareOptions,
-                        garrow_compare_options,
-                        G_TYPE_OBJECT,
-                        G_ADD_PRIVATE(GArrowCompareOptions)
-                        G_IMPLEMENT_INTERFACE(
-                          GARROW_TYPE_FUNCTION_OPTIONS,
-                          garrow_compare_options_function_options_interface_init))
-
-#define GARROW_COMPARE_OPTIONS_GET_PRIVATE(object)        \
-  static_cast<GArrowCompareOptionsPrivate *>(             \
-    garrow_compare_options_get_instance_private(          \
-      GARROW_COMPARE_OPTIONS(object)))
-
-static void
-garrow_compare_options_finalize(GObject *object)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-  priv->options.~CompareOptions();
-  G_OBJECT_CLASS(garrow_compare_options_parent_class)->finalize(object);
-}
-
-static void
-garrow_compare_options_set_property(GObject *object,
-                                    guint prop_id,
-                                    const GValue *value,
-                                    GParamSpec *pspec)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_OPERATOR:
-    priv->options.op =
-      static_cast<arrow::compute::CompareOperator>(g_value_get_enum(value));
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_compare_options_get_property(GObject *object,
-                                    guint prop_id,
-                                    GValue *value,
-                                    GParamSpec *pspec)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_OPERATOR:
-    g_value_set_enum(value, priv->options.op);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_compare_options_init(GArrowCompareOptions *object)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::compute::CompareOptions(arrow::compute::EQUAL);
-}
-
-static void
-garrow_compare_options_class_init(GArrowCompareOptionsClass *klass)
-{
-  auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize     = garrow_compare_options_finalize;
-  gobject_class->set_property = garrow_compare_options_set_property;
-  gobject_class->get_property = garrow_compare_options_get_property;
-
-  GParamSpec *spec;
-  /**
-   * GArrowCompareOptions:operator:
-   *
-   * How to compare the value.
-   *
-   * Since: 0.14.0
-   */
-  spec = g_param_spec_enum("operator",
-                           "Operator",
-                           "How to compare the value",
-                           GARROW_TYPE_COMPARE_OPERATOR,
-                           0,
-                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_OPERATOR, spec);
-}
-
-/**
- * garrow_compare_options_new:
- *
- * Returns: A newly created #GArrowCompareOptions.
- *
- * Since: 0.14.0
- */
-GArrowCompareOptions *
-garrow_compare_options_new(void)
-{
-  auto compare_options = g_object_new(GARROW_TYPE_COMPARE_OPTIONS, NULL);
-  return GARROW_COMPARE_OPTIONS(compare_options);
-}
-
-
 typedef struct GArrowArraySortOptionsPrivate_ {
   arrow::compute::ArraySortOptions options;
 } GArrowArraySortOptionsPrivate;
@@ -1682,7 +1558,7 @@ garrow_array_dictionary_encode(GArrowArray *array,
 /**
  * garrow_array_count:
  * @array: A #GArrowArray.
- * @options: (nullable): A #GArrowCountOptions.
+ * @options: (nullable): A #GArrowScalarAggregateOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: The number of target values on success. If an error is occurred,
@@ -1692,14 +1568,14 @@ garrow_array_dictionary_encode(GArrowArray *array,
  */
 gint64
 garrow_array_count(GArrowArray *array,
-                   GArrowCountOptions *options,
+                   GArrowScalarAggregateOptions *options,
                    GError **error)
 {
   auto arrow_array = garrow_array_get_raw(array);
   auto arrow_array_raw = arrow_array.get();
   arrow::Result<arrow::Datum> arrow_counted_datum;
   if (options) {
-    auto arrow_options = garrow_count_options_get_raw(options);
+    auto arrow_options = garrow_scalar_aggregate_options_get_raw(options);
     arrow_counted_datum =
       arrow::compute::Count(*arrow_array_raw, *arrow_options);
   } else {
@@ -2305,267 +2181,6 @@ garrow_record_batch_take(GArrowRecordBatch *record_batch,
     "[record-batch][take]");
 }
 
-
-/**
- * garrow_int8_array_compare:
- * @array: A #GArrowInt8Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int8_array_compare(GArrowInt8Array *array,
-                          gint8 value,
-                          GArrowCompareOptions *options,
-                          GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int8-array][compare]");
-}
-
-/**
- * garrow_uint8_array_compare:
- * @array: A #GArrowUInt8Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint8_array_compare(GArrowUInt8Array *array,
-                           guint8 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint8-array][compare]");
-}
-
-/**
- * garrow_int16_array_compare:
- * @array: A #GArrowInt16Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int16_array_compare(GArrowInt16Array *array,
-                           gint16 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int16-array][compare]");
-}
-
-/**
- * garrow_uint16_array_compare:
- * @array: A #GArrowUInt16Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint16_array_compare(GArrowUInt16Array *array,
-                            guint16 value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint16-array][compare]");
-}
-
-/**
- * garrow_int32_array_compare:
- * @array: A #GArrowUInt32Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int32_array_compare(GArrowInt32Array *array,
-                           gint32 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int32-array][compare]");
-}
-
-/**
- * garrow_uint32_array_compare:
- * @array: A #GArrowUInt32Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint32_array_compare(GArrowUInt32Array *array,
-                            guint32 value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint32-array][compare]");
-}
-
-/**
- * garrow_int64_array_compare:
- * @array: A #GArrowInt64Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int64_array_compare(GArrowInt64Array *array,
-                           gint64 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int64-array][compare]");
-}
-
-/**
- * garrow_uint64_array_compare:
- * @array: A #GArrowUInt64Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint64_array_compare(GArrowUInt64Array *array,
-                            guint64 value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint64-array][compare]");
-}
-
-/**
- * garrow_float_array_compare:
- * @array: A #GArrowFloatArray.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_float_array_compare(GArrowFloatArray *array,
-                           gfloat value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[float-array][compare]");
-}
-
-/**
- * garrow_double_array_compare:
- * @array: A #GArrowDoubleArray.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_double_array_compare(GArrowDoubleArray *array,
-                            gdouble value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[double-array][compare]");
-}
-
 /**
  * garrow_array_filter:
  * @array: A #GArrowArray.
@@ -3059,20 +2674,23 @@ garrow_cast_options_get_raw(GArrowCastOptions *cast_options)
   return &(priv->options);
 }
 
-GArrowCountOptions *
-garrow_count_options_new_raw(arrow::compute::CountOptions *arrow_count_options)
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new_raw(
+  arrow::compute::ScalarAggregateOptions *arrow_scalar_aggregate_options)
 {
-  auto count_options =
-    g_object_new(GARROW_TYPE_COUNT_OPTIONS,
-                 "mode", arrow_count_options->count_mode,
+  auto scalar_aggregate_options =
+    g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS,
+                 "skip-nulls", arrow_scalar_aggregate_options->skip_nulls,
+                 "min-count", arrow_scalar_aggregate_options->min_count,
                  NULL);
-  return GARROW_COUNT_OPTIONS(count_options);
+  return GARROW_SCALAR_AGGREGATE_OPTIONS(scalar_aggregate_options);
 }
 
-arrow::compute::CountOptions *
-garrow_count_options_get_raw(GArrowCountOptions *count_options)
+arrow::compute::ScalarAggregateOptions *
+garrow_scalar_aggregate_options_get_raw(
+  GArrowScalarAggregateOptions *scalar_aggregate_options)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(count_options);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(scalar_aggregate_options);
   return &(priv->options);
 }
 
@@ -3090,13 +2708,6 @@ garrow_take_options_get_raw(GArrowTakeOptions *take_options)
   return &(priv->options);
 }
 
-arrow::compute::CompareOptions *
-garrow_compare_options_get_raw(GArrowCompareOptions *compare_options)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(compare_options);
-  return &(priv->options);
-}
-
 arrow::compute::ArraySortOptions *
 garrow_array_sort_options_get_raw(GArrowArraySortOptions *array_sort_options)
 {
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 63ba6e0eae5..1163983644c 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -83,32 +83,20 @@ struct _GArrowCastOptionsClass
 GArrowCastOptions *garrow_cast_options_new(void);
 
 
-/**
- * GArrowCountMode:
- * @GARROW_COUNT_ALL: Count all non-null values.
- * @GARROW_COUNT_NULL: Count all null values.
- *
- * They are corresponding to `arrow::compute::CountOptions::Mode` values.
- */
-typedef enum {
-  GARROW_COUNT_ALL,
-  GARROW_COUNT_NULL,
-} GArrowCountMode;
-
-#define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCountOptions,
-                         garrow_count_options,
+#define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS (garrow_scalar_aggregate_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowScalarAggregateOptions,
+                         garrow_scalar_aggregate_options,
                          GARROW,
-                         COUNT_OPTIONS,
+                         SCALAR_AGGREGATE_OPTIONS,
                          GObject)
-struct _GArrowCountOptionsClass
+struct _GArrowScalarAggregateOptionsClass
 {
   GObjectClass parent_class;
 };
 
-GARROW_AVAILABLE_IN_0_13
-GArrowCountOptions *
-garrow_count_options_new(void);
+GARROW_AVAILABLE_IN_5_0
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new(void);
 
 
 /**
@@ -158,42 +146,6 @@ GArrowTakeOptions *
 garrow_take_options_new(void);
 
 
-/**
- * GArrowCompareOperator:
- * @GARROW_COMPARE_EQUAL: Equal operator.
- * @GARROW_COMPARE_NOT_EQUAL: Not equal operator.
- * @GARROW_COMPARE_GREATER: Greater operator.
- * @GARROW_COMPARE_GREATER_EQUAL: Greater equal operator.
- * @GARROW_COMPARE_LESS: Less operator.
- * @GARROW_COMPARE_LESS_EQUAL: Less equal operator.
- *
- * They are corresponding to `arrow::compute::CompareOperator` values.
- */
-typedef enum {
-  GARROW_COMPARE_EQUAL,
-  GARROW_COMPARE_NOT_EQUAL,
-  GARROW_COMPARE_GREATER,
-  GARROW_COMPARE_GREATER_EQUAL,
-  GARROW_COMPARE_LESS,
-  GARROW_COMPARE_LESS_EQUAL
-} GArrowCompareOperator;
-
-#define GARROW_TYPE_COMPARE_OPTIONS (garrow_compare_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCompareOptions,
-                         garrow_compare_options,
-                         GARROW,
-                         COMPARE_OPTIONS,
-                         GObject)
-struct _GArrowCompareOptionsClass
-{
-  GObjectClass parent_class;
-};
-
-GARROW_AVAILABLE_IN_0_14
-GArrowCompareOptions *
-garrow_compare_options_new(void);
-
-
 /**
  * GArrowSortOrder:
  * @GARROW_SORT_ORDER_ASCENDING: Sort in ascending order.
@@ -290,7 +242,7 @@ GArrowDictionaryArray *garrow_array_dictionary_encode(GArrowArray *array,
                                                       GError **error);
 GARROW_AVAILABLE_IN_0_13
 gint64 garrow_array_count(GArrowArray *array,
-                          GArrowCountOptions *options,
+                          GArrowScalarAggregateOptions *options,
                           GError **error);
 GARROW_AVAILABLE_IN_0_13
 GArrowStructArray *garrow_array_count_values(GArrowArray *array,
@@ -387,66 +339,6 @@ garrow_record_batch_take(GArrowRecordBatch *record_batch,
                          GArrowArray *indices,
                          GArrowTakeOptions *options,
                          GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int8_array_compare(GArrowInt8Array *array,
-                          gint8 value,
-                          GArrowCompareOptions *options,
-                          GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint8_array_compare(GArrowUInt8Array *array,
-                           guint8 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int16_array_compare(GArrowInt16Array *array,
-                           gint16 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint16_array_compare(GArrowUInt16Array *array,
-                            guint16 value,
-                            GArrowCompareOptions *options,
-                            GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int32_array_compare(GArrowInt32Array *array,
-                           gint32 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint32_array_compare(GArrowUInt32Array *array,
-                            guint32 value,
-                            GArrowCompareOptions *options,
-                            GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int64_array_compare(GArrowInt64Array *array,
-                           gint64 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint64_array_compare(GArrowUInt64Array *array,
-                            guint64 value,
-                            GArrowCompareOptions *options,
-                            GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_float_array_compare(GArrowFloatArray *array,
-                           gfloat value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_double_array_compare(GArrowDoubleArray *array,
-                            gdouble value,
-                            GArrowCompareOptions *options,
-                            GError **error);
 GARROW_AVAILABLE_IN_0_15
 GArrowArray *
 garrow_array_filter(GArrowArray *array,
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 1bc6fefdd40..8089a1d3364 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -46,10 +46,12 @@ garrow_function_get_raw(GArrowFunction *function);
 GArrowCastOptions *garrow_cast_options_new_raw(arrow::compute::CastOptions *arrow_cast_options);
 arrow::compute::CastOptions *garrow_cast_options_get_raw(GArrowCastOptions *cast_options);
 
-GArrowCountOptions *
-garrow_count_options_new_raw(arrow::compute::CountOptions *arrow_count_options);
-arrow::compute::CountOptions *
-garrow_count_options_get_raw(GArrowCountOptions *count_options);
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new_raw(
+  arrow::compute::ScalarAggregateOptions *arrow_scalar_aggregate_options);
+arrow::compute::ScalarAggregateOptions *
+garrow_scalar_aggregate_options_get_raw(
+  GArrowScalarAggregateOptions *scalar_aggregate_options);
 
 arrow::compute::FilterOptions *
 garrow_filter_options_get_raw(GArrowFilterOptions *filter_options);
@@ -57,9 +59,6 @@ garrow_filter_options_get_raw(GArrowFilterOptions *filter_options);
 arrow::compute::TakeOptions *
 garrow_take_options_get_raw(GArrowTakeOptions *take_options);
 
-arrow::compute::CompareOptions *
-garrow_compare_options_get_raw(GArrowCompareOptions *compare_options);
-
 arrow::compute::ArraySortOptions *
 garrow_array_sort_options_get_raw(GArrowArraySortOptions *array_sort_options);
 
diff --git a/c_glib/arrow-glib/datum.cpp b/c_glib/arrow-glib/datum.cpp
index 781dc086e46..66993d6c229 100644
--- a/c_glib/arrow-glib/datum.cpp
+++ b/c_glib/arrow-glib/datum.cpp
@@ -21,6 +21,7 @@
 #include <arrow-glib/chunked-array.hpp>
 #include <arrow-glib/datum.hpp>
 #include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/scalar.hpp>
 #include <arrow-glib/table.hpp>
 
 G_BEGIN_DECLS
@@ -143,6 +144,37 @@ garrow_datum_is_array_like(GArrowDatum *datum)
   return arrow_datum.is_arraylike();
 }
 
+/**
+ * garrow_datum_is_scalar:
+ * @datum: A #GArrowDatum.
+ *
+ * Returns: %TRUE if the datum holds a #GArrowScalar, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_datum_is_scalar(GArrowDatum *datum)
+{
+  const auto &arrow_datum = garrow_datum_get_raw(datum);
+  return arrow_datum.is_scalar();
+}
+
+/**
+ * garrow_datum_is_value:
+ * @datum: A #GArrowDatum.
+ *
+ * Returns: %TRUE if the datum holds a #GArrowArray, #GArrowChunkedArray or
+ *   #GArrowScalar, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_datum_is_value(GArrowDatum *datum)
+{
+  const auto &arrow_datum = garrow_datum_get_raw(datum);
+  return arrow_datum.is_value();
+}
+
 /**
  * garrow_datum_equal:
  * @datum: A #GArrowDatum.
@@ -286,6 +318,109 @@ garrow_array_datum_new(GArrowArray *value)
 }
 
 
+typedef struct GArrowScalarDatumPrivate_ {
+  GArrowScalar *value;
+} GArrowScalarDatumPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowScalarDatum,
+                           garrow_scalar_datum,
+                           GARROW_TYPE_DATUM)
+
+#define GARROW_SCALAR_DATUM_GET_PRIVATE(obj)         \
+  static_cast<GArrowScalarDatumPrivate *>(           \
+    garrow_scalar_datum_get_instance_private(        \
+      GARROW_SCALAR_DATUM(obj)))
+
+static void
+garrow_scalar_datum_dispose(GObject *object)
+{
+  auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_scalar_datum_parent_class)->dispose(object);
+}
+
+static void
+garrow_scalar_datum_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_SCALAR(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_scalar_datum_get_property(GObject *object,
+                                 guint prop_id,
+                                 GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    g_value_set_object(value, priv->value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_scalar_datum_init(GArrowScalarDatum *object)
+{
+}
+
+static void
+garrow_scalar_datum_class_init(GArrowScalarDatumClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_scalar_datum_dispose;
+  gobject_class->set_property = garrow_scalar_datum_set_property;
+  gobject_class->get_property = garrow_scalar_datum_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The scalar held by this datum",
+                             GARROW_TYPE_SCALAR,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+/**
+ * garrow_scalar_datum_new:
+ * @value: A #GArrowScalar.
+ *
+ * Returns: A newly created #GArrowScalarDatum.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalarDatum *
+garrow_scalar_datum_new(GArrowScalar *value)
+{
+  auto arrow_value = garrow_scalar_get_raw(value);
+  arrow::Datum arrow_datum(arrow_value);
+  return garrow_scalar_datum_new_raw(&arrow_datum, value);
+}
+
+
 typedef struct GArrowChunkedArrayDatumPrivate_ {
   GArrowChunkedArray *value;
 } GArrowChunkedArrayDatumPrivate;
@@ -608,6 +743,12 @@ GArrowDatum *
 garrow_datum_new_raw(arrow::Datum *arrow_datum)
 {
   switch (arrow_datum->kind()) {
+  case arrow::Datum::SCALAR:
+    {
+      auto arrow_scalar = arrow_datum->scalar();
+      auto scalar = garrow_scalar_new_raw(&arrow_scalar);
+      return GARROW_DATUM(garrow_scalar_datum_new_raw(arrow_datum, scalar));
+    }
   case arrow::Datum::ARRAY:
     {
       auto arrow_array = arrow_datum->make_array();
@@ -642,6 +783,16 @@ garrow_datum_new_raw(arrow::Datum *arrow_datum)
   }
 }
 
+GArrowScalarDatum *
+garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum,
+                            GArrowScalar *value)
+{
+  return GARROW_SCALAR_DATUM(g_object_new(GARROW_TYPE_SCALAR_DATUM,
+                                         "datum", arrow_datum,
+                                         "value", value,
+                                         NULL));
+}
+
 GArrowArrayDatum *
 garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
                            GArrowArray *value)
diff --git a/c_glib/arrow-glib/datum.h b/c_glib/arrow-glib/datum.h
index 9b1544f3271..bc7dda36911 100644
--- a/c_glib/arrow-glib/datum.h
+++ b/c_glib/arrow-glib/datum.h
@@ -22,6 +22,7 @@
 #include <arrow-glib/array.h>
 #include <arrow-glib/chunked-array.h>
 #include <arrow-glib/record-batch.h>
+#include <arrow-glib/scalar.h>
 #include <arrow-glib/table.h>
 
 G_BEGIN_DECLS
@@ -41,10 +42,12 @@ GARROW_AVAILABLE_IN_1_0
 gboolean garrow_datum_is_array(GArrowDatum *datum);
 GARROW_AVAILABLE_IN_1_0
 gboolean garrow_datum_is_array_like(GArrowDatum *datum);
-/*
-GARROW_AVAILABLE_IN_1_0
+GARROW_AVAILABLE_IN_5_0
 gboolean garrow_datum_is_scalar(GArrowDatum *datum);
-GARROW_AVAILABLE_IN_1_0
+GARROW_AVAILABLE_IN_5_0
+gboolean garrow_datum_is_value(GArrowDatum *datum);
+/*
+GARROW_AVAILABLE_IN_5_0
 gboolean garrow_datum_is_collection(GArrowDatum *datum);
 */
 GARROW_AVAILABLE_IN_1_0
@@ -54,9 +57,20 @@ GARROW_AVAILABLE_IN_1_0
 gchar *garrow_datum_to_string(GArrowDatum *datum);
 
 /* GARROW_TYPE_NONE_DATUM */
-/* GARROW_TYPE_SCALAR_DATUM */
-/* GARROW_TYPE_INT8_SCALAR_DATUM */
-/* ... */
+
+#define GARROW_TYPE_SCALAR_DATUM (garrow_scalar_datum_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowScalarDatum,
+                         garrow_scalar_datum,
+                         GARROW,
+                         SCALAR_DATUM,
+                         GArrowDatum)
+struct _GArrowScalarDatumClass
+{
+  GArrowDatumClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowScalarDatum *garrow_scalar_datum_new(GArrowScalar *value);
 
 #define GARROW_TYPE_ARRAY_DATUM (garrow_array_datum_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowArrayDatum,
diff --git a/c_glib/arrow-glib/datum.hpp b/c_glib/arrow-glib/datum.hpp
index 673501f89ed..d1acfc58c93 100644
--- a/c_glib/arrow-glib/datum.hpp
+++ b/c_glib/arrow-glib/datum.hpp
@@ -28,6 +28,9 @@ garrow_datum_get_raw(GArrowDatum *datum);
 GArrowDatum *
 garrow_datum_new_raw(arrow::Datum *arrow_datum);
 
+GArrowScalarDatum *
+garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum,
+                            GArrowScalar *value);
 GArrowArrayDatum *
 garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
                            GArrowArray *value);
diff --git a/c_glib/arrow-glib/decimal.cpp b/c_glib/arrow-glib/decimal.cpp
index cf0a08a3d7c..497d76fcfaa 100644
--- a/c_glib/arrow-glib/decimal.cpp
+++ b/c_glib/arrow-glib/decimal.cpp
@@ -177,7 +177,7 @@ garrow_decimal_to_bytes(typename DecimalConverter<Decimal>::GArrowType *decimal)
 {
   DecimalConverter<Decimal> converter;
   const auto arrow_decimal = converter.get_raw(decimal);
-  uint8_t data[DecimalConverter<Decimal>::ArrowType::bit_width / 8];
+  uint8_t data[DecimalConverter<Decimal>::ArrowType::kBitWidth / 8];
   arrow_decimal->ToBytes(data);
   return g_bytes_new(data, sizeof(data));
 }
diff --git a/c_glib/arrow-glib/error.cpp b/c_glib/arrow-glib/error.cpp
index 9502d114e88..ac61ddc499a 100644
--- a/c_glib/arrow-glib/error.cpp
+++ b/c_glib/arrow-glib/error.cpp
@@ -135,8 +135,8 @@ garrow_error_to_status(GError *error,
   message << context << ": " << g_quark_to_string(error->domain);
   message << "(" << error->code << "): ";
   message << error->message;
-  g_error_free(error);
   auto code = garrow_error_to_status_code(error, default_code);
+  g_error_free(error);
   return arrow::Status(code, message.str());
 }
 
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index dbfea52a847..d0479634d6d 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -31,6 +31,7 @@ sources = files(
   'error.cpp',
   'field.cpp',
   'record-batch.cpp',
+  'scalar.cpp',
   'schema.cpp',
   'table.cpp',
   'table-builder.cpp',
@@ -88,6 +89,7 @@ c_headers = files(
   'field.h',
   'gobject-type.h',
   'record-batch.h',
+  'scalar.h',
   'schema.h',
   'table.h',
   'table-builder.h',
@@ -144,6 +146,7 @@ cpp_headers = files(
   'error.hpp',
   'field.hpp',
   'record-batch.hpp',
+  'scalar.hpp',
   'schema.hpp',
   'table.hpp',
   'table-builder.hpp',
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 762d0c30faf..ca580e8dcf3 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -144,6 +144,42 @@ garrow_record_batch_reader_class_init(GArrowRecordBatchReaderClass *klass)
   g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER, spec);
 }
 
+/**
+ * garrow_record_batch_reader_new:
+ * @record_batches: (element-type GArrowRecordBatch):
+ *   A list of #GArrowRecordBatch.
+ * @schema: (nullable): A #GArrowSchema to confirm to.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: The schema in the stream on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GArrowRecordBatchReader *
+garrow_record_batch_reader_new(GList *record_batches,
+                               GArrowSchema *schema,
+                               GError **error)
+{
+  std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
+  for (auto node = record_batches; node; node = node->next) {
+    auto record_batch = GARROW_RECORD_BATCH(node->data);
+    arrow_record_batches.push_back(garrow_record_batch_get_raw(record_batch));
+  }
+  std::shared_ptr<arrow::Schema> arrow_schema;
+  if (schema) {
+    arrow_schema = garrow_schema_get_raw(schema);
+  }
+  auto arrow_reader_result =
+    arrow::RecordBatchReader::Make(arrow_record_batches, arrow_schema);
+  if (garrow::check(error,
+                    arrow_reader_result,
+                    "[record-batch-stream-reader][new]")) {
+    return garrow_record_batch_reader_new_raw(&*arrow_reader_result);
+  } else {
+    return NULL;
+  }
+}
+
 /**
  * garrow_record_batch_reader_get_schema:
  * @reader: A #GArrowRecordBatchReader.
@@ -231,6 +267,33 @@ garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader,
   }
 }
 
+/**
+ * garrow_record_batch_reader_read_all:
+ * @reader: A #GArrowRecordBatchReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The all record batches in the stream as #GArrowTable.
+ *
+ * Since: 6.0.0
+ */
+GArrowTable *
+garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader,
+                                    GError **error)
+{
+  auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
+  std::shared_ptr<arrow::Table> arrow_table;
+  auto status = arrow_reader->ReadAll(&arrow_table);
+
+  if (garrow::check(error,
+                    status,
+                    "[record-batch-reader][read-all]")) {
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
+}
+
 
 G_DEFINE_TYPE(GArrowTableBatchReader,
               garrow_table_batch_reader,
@@ -2077,13 +2140,13 @@ garrow_json_reader_read(GArrowJSONReader *reader,
 G_END_DECLS
 
 GArrowRecordBatchReader *
-garrow_record_batch_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchReader> *arrow_reader)
+garrow_record_batch_reader_new_raw(
+  std::shared_ptr<arrow::RecordBatchReader> *arrow_reader)
 {
-  auto reader =
-    GARROW_RECORD_BATCH_READER(g_object_new(GARROW_TYPE_RECORD_BATCH_READER,
-                                            "record-batch-reader", arrow_reader,
-                                            NULL));
-  return reader;
+  return GARROW_RECORD_BATCH_READER(
+    g_object_new(GARROW_TYPE_RECORD_BATCH_READER,
+                 "record-batch-reader", arrow_reader,
+                 NULL));
 }
 
 std::shared_ptr<arrow::ipc::RecordBatchReader>
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index 2628a7292ee..563b0cf227a 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -41,6 +41,12 @@ struct _GArrowRecordBatchReaderClass
   GObjectClass parent_class;
 };
 
+GARROW_AVAILABLE_IN_6_0
+GArrowRecordBatchReader *
+garrow_record_batch_reader_new(GList *record_batches,
+                               GArrowSchema *schema,
+                               GError **error);
+
 GArrowSchema *garrow_record_batch_reader_get_schema(
   GArrowRecordBatchReader *reader);
 #ifndef GARROW_DISABLE_DEPRECATED
@@ -58,7 +64,10 @@ GArrowRecordBatch *garrow_record_batch_reader_read_next_record_batch(
 GArrowRecordBatch *garrow_record_batch_reader_read_next(
   GArrowRecordBatchReader *reader,
   GError **error);
-
+GARROW_AVAILABLE_IN_6_0
+GArrowTable *
+garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader,
+                                    GError **error);
 
 #define GARROW_TYPE_TABLE_BATCH_READER (garrow_table_batch_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTableBatchReader,
diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp
new file mode 100644
index 00000000000..847b48620bd
--- /dev/null
+++ b/c_glib/arrow-glib/scalar.cpp
@@ -0,0 +1,2405 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/basic-array.hpp>
+#include <arrow-glib/buffer.hpp>
+#include <arrow-glib/data-type.hpp>
+#include <arrow-glib/decimal.hpp>
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/scalar.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: scalar
+ * @section_id: scalar-classes
+ * @title: Scalar classes
+ * @include: arrow-glib/arrow-glib.h
+ *
+ * #GArrowScalar is a base class for all scalar classes such as
+ * #GArrowBooleanScalar.
+ *
+ * #GArrowNullScalar is a class for a null scalar.
+ *
+ * #GArrowBooleanScalar is a class for a boolean scalar.
+ *
+ * #GArrowInt8Scalar is a class for a 8-bit integer scalar.
+ *
+ * #GArrowInt16Scalar is a class for a 16-bit integer scalar.
+ *
+ * #GArrowInt32Scalar is a class for a 32-bit integer scalar.
+ *
+ * #GArrowInt64Scalar is a class for a 64-bit integer scalar.
+ *
+ * #GArrowUInt8Scalar is a class for a 8-bit unsigned integer scalar.
+ *
+ * #GArrowUInt16Scalar is a class for a 16-bit unsigned integer scalar.
+ *
+ * #GArrowUInt32Scalar is a class for a 32-bit unsigned integer scalar.
+ *
+ * #GArrowUInt64Scalar is a class for a 64-bit unsigned integer scalar.
+ *
+ * #GArrowFloatScalar is a class for a 32-bit floating point scalar.
+ *
+ * #GArrowDoubleScalar is a class for a 64-bit floating point scalar.
+ *
+ * #GArrowBaseBinaryScalar is a base class for all binary and string
+ * scalar classes such as #GArrowBinaryScalar.
+ *
+ * #GArrowBinaryScalar is a class for a binary scalar.
+ *
+ * #GArrowStringScalar is a class for an UTF-8 encoded string scalar.
+ *
+ * #GArrowLargeBinaryScalar is a class for a 64-bit offsets binary
+ * scalar.
+ *
+ * #GArrowLargeStringScalar is a class for a 64-bit offsets UTF-8
+ * encoded string scalar.
+ *
+ * #GArrowFixedSizeBinaryScalar is a class for a fixed-size binary
+ * scalar.
+ *
+ * #GArrowDate32Scalar is a class for the number of days since UNIX
+ * epoch in a 32-bit signed integer scalar.
+ *
+ * #GArrowDate64Scalar is a class for the number of milliseconds
+ * since UNIX epoch in a 64-bit signed integer scalar.
+ *
+ * #GArrowTime32Scalar is a class for the number of seconds or
+ * milliseconds since midnight in a 32-bit signed integer scalar.
+ *
+ * #GArrowTime64Scalar is a class for the number of microseconds or
+ * nanoseconds since midnight in a 64-bit signed integer scalar.
+ *
+ * #GArrowTimestampScalar is a class for the number of
+ * seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in
+ * a 64-bit signed integer scalar.
+ *
+ * #GArrowDecimal128Scalar is a class for a 128-bit decimal scalar.
+ *
+ * #GArrowDecimal256Scalar is a class for a 256-bit decimal scalar.
+ *
+ * #GArrowBaseListScalar is a base class for all list scalar classes
+ * such as #GArrowListScalar.
+ *
+ * #GArrowListScalar is a class for a list scalar.
+ *
+ * #GArrowLargeListScalar is a class for a large list scalar.
+ *
+ * #GArrowMapScalar is a class for a map list scalar.
+ *
+ * #GArrowStructScalar is a class for a struct list scalar.
+ *
+ * #GArrowUnionScalar is a base class for all union scalar classes
+ * such as #GArrowSparseUnionScalar.
+ *
+ * #GArrowSparseUnionScalar is a class for a sparse union scalar.
+ *
+ * #GArrowDenseUnionScalar is a class for a dense union scalar.
+ *
+ * #GArrowExtensionScalar is a base class for user-defined extension
+ * scalar.
+ */
+
+typedef struct GArrowScalarPrivate_ {
+  std::shared_ptr<arrow::Scalar> scalar;
+  GArrowDataType *data_type;
+} GArrowScalarPrivate;
+
+enum {
+  PROP_SCALAR = 1,
+  PROP_DATA_TYPE,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowScalar,
+                                    garrow_scalar,
+                                    G_TYPE_OBJECT)
+
+#define GARROW_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowScalarPrivate *>(             \
+    garrow_scalar_get_instance_private(           \
+      GARROW_SCALAR(obj)))
+
+static void
+garrow_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+
+  if (priv->data_type) {
+    g_object_unref(priv->data_type);
+    priv->data_type = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_scalar_finalize(GObject *object)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+
+  priv->scalar.~shared_ptr();
+
+  G_OBJECT_CLASS(garrow_scalar_parent_class)->finalize(object);
+}
+
+static void
+garrow_scalar_set_property(GObject *object,
+                           guint prop_id,
+                           const GValue *value,
+                           GParamSpec *pspec)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_SCALAR:
+    priv->scalar =
+      *static_cast<std::shared_ptr<arrow::Scalar> *>(g_value_get_pointer(value));
+    break;
+  case PROP_DATA_TYPE:
+    priv->data_type = GARROW_DATA_TYPE(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_scalar_init(GArrowScalar *object)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+  new(&priv->scalar) std::shared_ptr<arrow::Scalar>;
+}
+
+static void
+garrow_scalar_class_init(GArrowScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_scalar_dispose;
+  gobject_class->finalize     = garrow_scalar_finalize;
+  gobject_class->set_property = garrow_scalar_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("scalar",
+                              "Scalar",
+                              "The raw std::shared<arrow::Scalar> *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_SCALAR, spec);
+
+  /**
+   * GArrowScalar:data-type:
+   *
+   * The data type of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("data-type",
+                             "Data type",
+                             "The data type of the scalar",
+                             GARROW_TYPE_DATA_TYPE,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec);
+}
+
+/**
+ * garrow_scalar_parse:
+ * @data_type: A #GArrowDataType for the parsed scalar.
+ * @data: (array length=size): Data to be parsed.
+ * @size: The number of bytes of the data.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   A newly created #GArrowScalar if the data is parsed successfully,
+ *   %NULL otherwise.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalar *
+garrow_scalar_parse(GArrowDataType *data_type,
+                    const guint8 *data,
+                    gsize size,
+                    GError **error)
+{
+  const auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  auto arrow_data =
+    arrow::util::string_view(reinterpret_cast<const char *>(data),
+                             size);
+  auto arrow_scalar_result = arrow::Scalar::Parse(arrow_data_type, arrow_data);
+  if (garrow::check(error, arrow_scalar_result, "[scalar][parse]")) {
+    auto arrow_scalar = *arrow_scalar_result;
+    return garrow_scalar_new_raw(&arrow_scalar,
+                                 "scalar", &arrow_scalar,
+                                 "data-type", data_type,
+                                 NULL);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_scalar_get_data_type:
+ * @scalar: A #GArrowScalar.
+ *
+ * Returns: (transfer none): The #GArrowDataType for the scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDataType *
+garrow_scalar_get_data_type(GArrowScalar *scalar)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->data_type) {
+    priv->data_type = garrow_data_type_new_raw(&(priv->scalar->type));
+  }
+  return priv->data_type;
+}
+
+/**
+ * garrow_scalar_is_valid:
+ * @scalar: A #GArrowScalar.
+ *
+ * Returns: %TRUE if the scalar is valid, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_scalar_is_valid(GArrowScalar *scalar)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  return arrow_scalar->is_valid;
+}
+
+/**
+ * garrow_scalar_equal:
+ * @scalar: A #GArrowScalar.
+ * @other_scalar: A #GArrowScalar to be compared.
+ *
+ * Returns: %TRUE if both of them have the same data, %FALSE
+ *   otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_scalar_equal(GArrowScalar *scalar,
+                    GArrowScalar *other_scalar)
+{
+  return garrow_scalar_equal_options(scalar, other_scalar, NULL);
+}
+
+/**
+ * garrow_scalar_equal_options:
+ * @scalar: A #GArrowScalar.
+ * @other_scalar: A #GArrowScalar to be compared.
+ * @options: (nullable): A #GArrowEqualOptions.
+ *
+ * Returns: %TRUE if both of them have the same data, %FALSE
+ *   otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_scalar_equal_options(GArrowScalar *scalar,
+                            GArrowScalar *other_scalar,
+                            GArrowEqualOptions *options)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  const auto arrow_other_scalar = garrow_scalar_get_raw(other_scalar);
+  if (options) {
+    auto is_approx = garrow_equal_options_is_approx(options);
+    const auto arrow_options = garrow_equal_options_get_raw(options);
+    if (is_approx) {
+      return arrow_scalar->ApproxEquals(*arrow_other_scalar, *arrow_options);
+    } else {
+      return arrow_scalar->Equals(arrow_other_scalar, *arrow_options);
+    }
+  } else {
+    return arrow_scalar->Equals(arrow_other_scalar);
+  }
+}
+
+/**
+ * garrow_scalar_to_string:
+ * @scalar: A #GArrowScalar.
+ *
+ * Returns: The string representation of the scalar.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+garrow_scalar_to_string(GArrowScalar *scalar)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  return g_strdup(arrow_scalar->ToString().c_str());
+}
+
+/**
+ * garrow_scalar_cast:
+ * @scalar: A #GArrowScalar.
+ * @data_type: A #GArrowDataType of the casted scalar.
+ * @options: (nullable): A #GArrowCastOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   A newly created casted scalar on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalar *
+garrow_scalar_cast(GArrowScalar *scalar,
+                   GArrowDataType *data_type,
+                   GArrowCastOptions *options,
+                   GError **error)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  const auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  auto arrow_casted_scalar_result = arrow_scalar->CastTo(arrow_data_type);
+  if (garrow::check(error, arrow_casted_scalar_result, "[scalar][cast]")) {
+    auto arrow_casted_scalar = *arrow_casted_scalar_result;
+    return garrow_scalar_new_raw(&arrow_casted_scalar,
+                                 "scalar", &arrow_casted_scalar,
+                                 "data-type", data_type,
+                                 NULL);
+  } else {
+    return NULL;
+  }
+}
+
+
+G_DEFINE_TYPE(GArrowNullScalar,
+              garrow_null_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_null_scalar_init(GArrowNullScalar *object)
+{
+}
+
+static void
+garrow_null_scalar_class_init(GArrowNullScalarClass *klass)
+{
+}
+
+/**
+ * garrow_null_scalar_new:
+ *
+ * Returns: A newly created #GArrowNullScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowNullScalar *
+garrow_null_scalar_new(void)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::NullScalar>());
+  return GARROW_NULL_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+
+G_DEFINE_TYPE(GArrowBooleanScalar,
+              garrow_boolean_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_boolean_scalar_init(GArrowBooleanScalar *object)
+{
+}
+
+static void
+garrow_boolean_scalar_class_init(GArrowBooleanScalarClass *klass)
+{
+}
+
+/**
+ * garrow_boolean_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowBooleanScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowBooleanScalar *
+garrow_boolean_scalar_new(gboolean value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::BooleanScalar>(value));
+  return GARROW_BOOLEAN_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_boolean_scalar_get_value:
+ * @scalar: A #GArrowBooleanScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::BooleanScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt8Scalar,
+              garrow_int8_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int8_scalar_init(GArrowInt8Scalar *object)
+{
+}
+
+static void
+garrow_int8_scalar_class_init(GArrowInt8ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int8_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt8Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt8Scalar *
+garrow_int8_scalar_new(gint8 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int8Scalar>(value));
+  return GARROW_INT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int8_scalar_get_value:
+ * @scalar: A #GArrowInt8Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint8
+garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int8Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt16Scalar,
+              garrow_int16_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int16_scalar_init(GArrowInt16Scalar *object)
+{
+}
+
+static void
+garrow_int16_scalar_class_init(GArrowInt16ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int16_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt16Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt16Scalar *
+garrow_int16_scalar_new(gint16 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int16Scalar>(value));
+  return GARROW_INT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int16_scalar_get_value:
+ * @scalar: A #GArrowInt16Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint16
+garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int16Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt32Scalar,
+              garrow_int32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int32_scalar_init(GArrowInt32Scalar *object)
+{
+}
+
+static void
+garrow_int32_scalar_class_init(GArrowInt32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int32_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt32Scalar *
+garrow_int32_scalar_new(gint32 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int32Scalar>(value));
+  return GARROW_INT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int32_scalar_get_value:
+ * @scalar: A #GArrowInt32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint32
+garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt64Scalar,
+              garrow_int64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int64_scalar_init(GArrowInt64Scalar *object)
+{
+}
+
+static void
+garrow_int64_scalar_class_init(GArrowInt64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int64_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt64Scalar *
+garrow_int64_scalar_new(gint64 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int64Scalar>(value));
+  return GARROW_INT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int64_scalar_get_value:
+ * @scalar: A #GArrowInt64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt8Scalar,
+              garrow_uint8_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint8_scalar_init(GArrowUInt8Scalar *object)
+{
+}
+
+static void
+garrow_uint8_scalar_class_init(GArrowUInt8ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint8_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt8Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt8Scalar *
+garrow_uint8_scalar_new(guint8 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt8Scalar>(value));
+  return GARROW_UINT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint8_scalar_get_value:
+ * @scalar: A #GArrowUInt8Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint8
+garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt8Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt16Scalar,
+              garrow_uint16_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint16_scalar_init(GArrowUInt16Scalar *object)
+{
+}
+
+static void
+garrow_uint16_scalar_class_init(GArrowUInt16ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint16_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt16Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt16Scalar *
+garrow_uint16_scalar_new(guint16 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt16Scalar>(value));
+  return GARROW_UINT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint16_scalar_get_value:
+ * @scalar: A #GArrowUInt16Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint16
+garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt16Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt32Scalar,
+              garrow_uint32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint32_scalar_init(GArrowUInt32Scalar *object)
+{
+}
+
+static void
+garrow_uint32_scalar_class_init(GArrowUInt32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint32_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt32Scalar *
+garrow_uint32_scalar_new(guint32 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt32Scalar>(value));
+  return GARROW_UINT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint32_scalar_get_value:
+ * @scalar: A #GArrowUInt32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint32
+garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt64Scalar,
+              garrow_uint64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint64_scalar_init(GArrowUInt64Scalar *object)
+{
+}
+
+static void
+garrow_uint64_scalar_class_init(GArrowUInt64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint64_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt64Scalar *
+garrow_uint64_scalar_new(guint64 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt64Scalar>(value));
+  return GARROW_UINT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint64_scalar_get_value:
+ * @scalar: A #GArrowUInt64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint64
+garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowFloatScalar,
+              garrow_float_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_float_scalar_init(GArrowFloatScalar *object)
+{
+}
+
+static void
+garrow_float_scalar_class_init(GArrowFloatScalarClass *klass)
+{
+}
+
+/**
+ * garrow_float_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowFloatScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowFloatScalar *
+garrow_float_scalar_new(gfloat value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::FloatScalar>(value));
+  return GARROW_FLOAT_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_float_scalar_get_value:
+ * @scalar: A #GArrowFloatScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gfloat
+garrow_float_scalar_get_value(GArrowFloatScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::FloatScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowDoubleScalar,
+              garrow_double_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_double_scalar_init(GArrowDoubleScalar *object)
+{
+}
+
+static void
+garrow_double_scalar_class_init(GArrowDoubleScalarClass *klass)
+{
+}
+
+/**
+ * garrow_double_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDoubleScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDoubleScalar *
+garrow_double_scalar_new(gdouble value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::DoubleScalar>(value));
+  return GARROW_DOUBLE_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_double_scalar_get_value:
+ * @scalar: A #GArrowDoubleScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gdouble
+garrow_double_scalar_get_value(GArrowDoubleScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::DoubleScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+typedef struct GArrowBaseBinaryScalarPrivate_ {
+  GArrowBuffer *value;
+} GArrowBaseBinaryScalarPrivate;
+
+enum {
+  PROP_VALUE = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseBinaryScalar,
+                                    garrow_base_binary_scalar,
+                                    GARROW_TYPE_SCALAR)
+
+#define GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowBaseBinaryScalarPrivate *>(               \
+    garrow_base_binary_scalar_get_instance_private(           \
+      GARROW_BASE_BINARY_SCALAR(obj)))
+
+static void
+garrow_base_binary_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_base_binary_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_base_binary_scalar_set_property(GObject *object,
+                                       guint prop_id,
+                                       const GValue *value,
+                                       GParamSpec *pspec)
+{
+  auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_BUFFER(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_base_binary_scalar_init(GArrowBaseBinaryScalar *object)
+{
+}
+
+static void
+garrow_base_binary_scalar_class_init(GArrowBaseBinaryScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = garrow_base_binary_scalar_dispose;
+  gobject_class->set_property = garrow_base_binary_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowBaseBinaryScalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             GARROW_TYPE_BUFFER,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+G_END_DECLS
+template<typename ArrowBinaryScalarType>
+GArrowScalar *
+garrow_base_binary_scalar_new(GArrowBuffer *value)
+{
+  auto arrow_value = garrow_buffer_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<ArrowBinaryScalarType>(arrow_value));
+  return garrow_scalar_new_raw(&arrow_scalar,
+                               "scalar", &arrow_scalar,
+                               "value", value,
+                               NULL);
+}
+G_BEGIN_DECLS
+
+/**
+ * garrow_base_binary_scalar_get_value:
+ * @scalar: A #GArrowBaseBinaryScalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowBuffer *
+garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar)
+{
+  auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    const auto arrow_scalar =
+      std::static_pointer_cast<arrow::BaseBinaryScalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    priv->value = garrow_buffer_new_raw(&(arrow_scalar->value));
+  }
+  return priv->value;
+}
+
+
+G_DEFINE_TYPE(GArrowBinaryScalar,
+              garrow_binary_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_binary_scalar_init(GArrowBinaryScalar *object)
+{
+}
+
+static void
+garrow_binary_scalar_class_init(GArrowBinaryScalarClass *klass)
+{
+}
+
+/**
+ * garrow_binary_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowBinaryScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowBinaryScalar *
+garrow_binary_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_BINARY_SCALAR(
+    garrow_base_binary_scalar_new<arrow::BinaryScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowStringScalar,
+              garrow_string_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_string_scalar_init(GArrowStringScalar *object)
+{
+}
+
+static void
+garrow_string_scalar_class_init(GArrowStringScalarClass *klass)
+{
+}
+
+/**
+ * garrow_string_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowStringScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowStringScalar *
+garrow_string_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_STRING_SCALAR(
+    garrow_base_binary_scalar_new<arrow::StringScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowLargeBinaryScalar,
+              garrow_large_binary_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_large_binary_scalar_init(GArrowLargeBinaryScalar *object)
+{
+}
+
+static void
+garrow_large_binary_scalar_class_init(GArrowLargeBinaryScalarClass *klass)
+{
+}
+
+/**
+ * garrow_large_binary_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowLargeBinaryScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowLargeBinaryScalar *
+garrow_large_binary_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_LARGE_BINARY_SCALAR(
+    garrow_base_binary_scalar_new<arrow::LargeBinaryScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowLargeStringScalar,
+              garrow_large_string_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_large_string_scalar_init(GArrowLargeStringScalar *object)
+{
+}
+
+static void
+garrow_large_string_scalar_class_init(GArrowLargeStringScalarClass *klass)
+{
+}
+
+/**
+ * garrow_large_string_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowLargeStringScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowLargeStringScalar *
+garrow_large_string_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_LARGE_STRING_SCALAR(
+    garrow_base_binary_scalar_new<arrow::LargeStringScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowFixedSizeBinaryScalar,
+              garrow_fixed_size_binary_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_fixed_size_binary_scalar_init(GArrowFixedSizeBinaryScalar *object)
+{
+}
+
+static void
+garrow_fixed_size_binary_scalar_class_init(
+  GArrowFixedSizeBinaryScalarClass *klass)
+{
+}
+
+/**
+ * garrow_fixed_size_binary_scalar_new:
+ * @data_type: A #GArrowFixedSizeBinaryDataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowFixedSizeBinaryScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowFixedSizeBinaryScalar *
+garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type,
+                                    GArrowBuffer *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_buffer_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::FixedSizeBinaryScalar>(
+        arrow_value, arrow_data_type));
+  return GARROW_FIXED_SIZE_BINARY_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          "value", value,
+                          NULL));
+}
+
+
+G_DEFINE_TYPE(GArrowDate32Scalar,
+              garrow_date32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_date32_scalar_init(GArrowDate32Scalar *object)
+{
+}
+
+static void
+garrow_date32_scalar_class_init(GArrowDate32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_date32_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDate32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDate32Scalar *
+garrow_date32_scalar_new(gint32 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Date32Scalar>(value));
+  return GARROW_DATE32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_date32_scalar_get_value:
+ * @scalar: A #GArrowDate32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint32
+garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Date32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowDate64Scalar,
+              garrow_date64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_date64_scalar_init(GArrowDate64Scalar *object)
+{
+}
+
+static void
+garrow_date64_scalar_class_init(GArrowDate64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_date64_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDate64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDate64Scalar *
+garrow_date64_scalar_new(gint64 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Date64Scalar>(value));
+  return GARROW_DATE64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_date64_scalar_get_value:
+ * @scalar: A #GArrowDate64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Date64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowTime32Scalar,
+              garrow_time32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_time32_scalar_init(GArrowTime32Scalar *object)
+{
+}
+
+static void
+garrow_time32_scalar_class_init(GArrowTime32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_time32_scalar_new:
+ * @data_type: A #GArrowTime32DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowTime32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowTime32Scalar *
+garrow_time32_scalar_new(GArrowTime32DataType *data_type,
+                         gint32 value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Time32Scalar>(value, arrow_data_type));
+  return GARROW_TIME32_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          NULL));
+}
+
+/**
+ * garrow_time32_scalar_get_value:
+ * @scalar: A #GArrowTime32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint32
+garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Time32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowTime64Scalar,
+              garrow_time64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_time64_scalar_init(GArrowTime64Scalar *object)
+{
+}
+
+static void
+garrow_time64_scalar_class_init(GArrowTime64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_time64_scalar_new:
+ * @data_type: A #GArrowTime64DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowTime64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowTime64Scalar *
+garrow_time64_scalar_new(GArrowTime64DataType *data_type,
+                         gint64 value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Time64Scalar>(value, arrow_data_type));
+  return GARROW_TIME64_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          NULL));
+}
+
+/**
+ * garrow_time64_scalar_get_value:
+ * @scalar: A #GArrowTime64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Time64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowTimestampScalar,
+              garrow_timestamp_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_timestamp_scalar_init(GArrowTimestampScalar *object)
+{
+}
+
+static void
+garrow_timestamp_scalar_class_init(GArrowTimestampScalarClass *klass)
+{
+}
+
+/**
+ * garrow_timestamp_scalar_new:
+ * @data_type: A #GArrowTimestampDataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowTimestampScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowTimestampScalar *
+garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type,
+                            gint64 value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::TimestampScalar>(value, arrow_data_type));
+  return GARROW_TIMESTAMP_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          NULL));
+}
+
+/**
+ * garrow_timestamp_scalar_get_value:
+ * @scalar: A #GArrowTimestampScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::TimestampScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+typedef struct GArrowDecimal128ScalarPrivate_ {
+  GArrowDecimal128 *value;
+} GArrowDecimal128ScalarPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal128Scalar,
+                           garrow_decimal128_scalar,
+                           GARROW_TYPE_SCALAR)
+
+#define GARROW_DECIMAL128_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowDecimal128ScalarPrivate *>(              \
+    garrow_decimal128_scalar_get_instance_private(           \
+      GARROW_DECIMAL128_SCALAR(obj)))
+
+static void
+garrow_decimal128_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_decimal128_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_decimal128_scalar_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_DECIMAL128(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_decimal128_scalar_init(GArrowDecimal128Scalar *object)
+{
+}
+
+static void
+garrow_decimal128_scalar_class_init(GArrowDecimal128ScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_decimal128_scalar_dispose;
+  gobject_class->set_property = garrow_decimal128_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowDecimal128Scalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             garrow_decimal128_get_type(),
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+/**
+ * garrow_decimal128_scalar_new:
+ * @data_type: A #GArrowDecimal128DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDecimal128Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal128Scalar *
+garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type,
+                             GArrowDecimal128 *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_decimal128_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Decimal128Scalar>(*arrow_value, arrow_data_type));
+  return GARROW_DECIMAL128_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          "value", value,
+                          NULL));
+}
+
+/**
+ * garrow_decimal128_scalar_get_value:
+ * @scalar: A #GArrowDecimal128Scalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal128 *
+garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar)
+{
+  auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    auto arrow_scalar =
+      std::static_pointer_cast<arrow::Decimal128Scalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_value = std::make_shared<arrow::Decimal128>(arrow_scalar->value);
+    priv->value = garrow_decimal128_new_raw(&arrow_value);
+  }
+  return priv->value;
+}
+
+
+typedef struct GArrowDecimal256ScalarPrivate_ {
+  GArrowDecimal256 *value;
+} GArrowDecimal256ScalarPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal256Scalar,
+                           garrow_decimal256_scalar,
+                           GARROW_TYPE_SCALAR)
+
+#define GARROW_DECIMAL256_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowDecimal256ScalarPrivate *>(              \
+    garrow_decimal256_scalar_get_instance_private(           \
+      GARROW_DECIMAL256_SCALAR(obj)))
+
+static void
+garrow_decimal256_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_decimal256_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_decimal256_scalar_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_DECIMAL256(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_decimal256_scalar_init(GArrowDecimal256Scalar *object)
+{
+}
+
+static void
+garrow_decimal256_scalar_class_init(GArrowDecimal256ScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_decimal256_scalar_dispose;
+  gobject_class->set_property = garrow_decimal256_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowDecimal256Scalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             garrow_decimal256_get_type(),
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+/**
+ * garrow_decimal256_scalar_new:
+ * @data_type: A #GArrowDecimal256DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDecimal256Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal256Scalar *
+garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type,
+                             GArrowDecimal256 *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_decimal256_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Decimal256Scalar>(*arrow_value, arrow_data_type));
+  return GARROW_DECIMAL256_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                        "scalar", &arrow_scalar,
+                                                        "data-type", data_type,
+                                                        "value", value,
+                                                        NULL));
+}
+
+/**
+ * garrow_decimal256_scalar_get_value:
+ * @scalar: A #GArrowDecimal256Scalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal256 *
+garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar)
+{
+  auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    auto arrow_scalar =
+      std::static_pointer_cast<arrow::Decimal256Scalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_value = std::make_shared<arrow::Decimal256>(arrow_scalar->value);
+    priv->value = garrow_decimal256_new_raw(&arrow_value);
+  }
+  return priv->value;
+}
+
+
+typedef struct GArrowBaseListScalarPrivate_ {
+  GArrowArray *value;
+} GArrowBaseListScalarPrivate;
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseListScalar,
+                                    garrow_base_list_scalar,
+                                    GARROW_TYPE_SCALAR)
+
+#define GARROW_BASE_LIST_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowBaseListScalarPrivate *>(               \
+    garrow_base_list_scalar_get_instance_private(           \
+      GARROW_BASE_LIST_SCALAR(obj)))
+
+static void
+garrow_base_list_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_base_list_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_base_list_scalar_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_ARRAY(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_base_list_scalar_init(GArrowBaseListScalar *object)
+{
+}
+
+static void
+garrow_base_list_scalar_class_init(GArrowBaseListScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_base_list_scalar_dispose;
+  gobject_class->set_property = garrow_base_list_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowBaseListScalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             GARROW_TYPE_ARRAY,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+G_END_DECLS
+template<typename ArrowListScalarType>
+GArrowScalar *
+garrow_base_list_scalar_new(GArrowArray *value)
+{
+  auto arrow_value = garrow_array_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<ArrowListScalarType>(arrow_value));
+  auto data_type = garrow_array_get_value_data_type(value);
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  g_object_unref(data_type);
+  return scalar;
+}
+G_BEGIN_DECLS
+
+/**
+ * garrow_base_list_scalar_get_value:
+ * @scalar: A #GArrowBaseListScalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowArray *
+garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar)
+{
+  auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    const auto arrow_scalar =
+      std::static_pointer_cast<arrow::BaseListScalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    priv->value = garrow_array_new_raw(&(arrow_scalar->value));
+  }
+  return priv->value;
+}
+
+
+G_DEFINE_TYPE(GArrowListScalar,
+              garrow_list_scalar,
+              GARROW_TYPE_BASE_LIST_SCALAR)
+
+static void
+garrow_list_scalar_init(GArrowListScalar *object)
+{
+}
+
+static void
+garrow_list_scalar_class_init(GArrowListScalarClass *klass)
+{
+}
+
+/**
+ * garrow_list_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowListScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowListScalar *
+garrow_list_scalar_new(GArrowListArray *value)
+{
+  return GARROW_LIST_SCALAR(
+    garrow_base_list_scalar_new<arrow::ListScalar>(GARROW_ARRAY(value)));
+}
+
+
+G_DEFINE_TYPE(GArrowLargeListScalar,
+              garrow_large_list_scalar,
+              GARROW_TYPE_BASE_LIST_SCALAR)
+
+static void
+garrow_large_list_scalar_init(GArrowLargeListScalar *object)
+{
+}
+
+static void
+garrow_large_list_scalar_class_init(GArrowLargeListScalarClass *klass)
+{
+}
+
+/**
+ * garrow_large_list_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowLargeListScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowLargeListScalar *
+garrow_large_list_scalar_new(GArrowLargeListArray *value)
+{
+  return GARROW_LARGE_LIST_SCALAR(
+    garrow_base_list_scalar_new<arrow::LargeListScalar>(GARROW_ARRAY(value)));
+}
+
+
+G_DEFINE_TYPE(GArrowMapScalar,
+              garrow_map_scalar,
+              GARROW_TYPE_BASE_LIST_SCALAR)
+
+static void
+garrow_map_scalar_init(GArrowMapScalar *object)
+{
+}
+
+static void
+garrow_map_scalar_class_init(GArrowMapScalarClass *klass)
+{
+}
+
+/**
+ * garrow_map_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowMapScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowMapScalar *
+garrow_map_scalar_new(GArrowStructArray *value)
+{
+  return GARROW_MAP_SCALAR(
+    garrow_base_list_scalar_new<arrow::MapScalar>(GARROW_ARRAY(value)));
+}
+
+
+typedef struct GArrowStructScalarPrivate_ {
+  GList *value;
+} GArrowStructScalarPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructScalar,
+                           garrow_struct_scalar,
+                           GARROW_TYPE_SCALAR)
+
+#define GARROW_STRUCT_SCALAR_GET_PRIVATE(obj)             \
+  static_cast<GArrowStructScalarPrivate *>(               \
+    garrow_struct_scalar_get_instance_private(            \
+      GARROW_STRUCT_SCALAR(obj)))
+
+static void
+garrow_struct_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_list_free_full(priv->value, g_object_unref);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_struct_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_struct_scalar_init(GArrowStructScalar *object)
+{
+}
+
+static void
+garrow_struct_scalar_class_init(GArrowStructScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose = garrow_struct_scalar_dispose;
+}
+
+/**
+ * garrow_struct_scalar_new:
+ * @data_type: A #GArrowStructDataType for this scalar.
+ * @value: (element-type GArrowScalar): The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDecimal256Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowStructScalar *
+garrow_struct_scalar_new(GArrowStructDataType *data_type,
+                         GList *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  std::vector<std::shared_ptr<arrow::Scalar>> arrow_value;
+  for (GList *node = value; node; node = node->next) {
+    auto field = GARROW_SCALAR(node->data);
+    auto arrow_field = garrow_scalar_get_raw(field);
+    arrow_value.push_back(arrow_field);
+  }
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::StructScalar>(arrow_value, arrow_data_type));
+  auto scalar =
+    GARROW_STRUCT_SCALAR(
+      garrow_scalar_new_raw(&arrow_scalar,
+                            "scalar", &arrow_scalar,
+                            "data-type", data_type,
+                            NULL));
+  auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar);
+  priv->value = g_list_copy_deep(value,
+                                 reinterpret_cast<GCopyFunc>(g_object_ref),
+                                 NULL);
+  return scalar;
+}
+
+/**
+ * garrow_struct_scalar_get_value:
+ * @scalar: A #GArrowStructScalar.
+ *
+ * Returns: (element-type GArrowScalar) (transfer none):
+ *   The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GList *
+garrow_struct_scalar_get_value(GArrowStructScalar *scalar)
+{
+  auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar);
+  return priv->value;
+}
+
+
+typedef struct GArrowUnionScalarPrivate_ {
+  GArrowScalar *value;
+} GArrowUnionScalarPrivate;
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowUnionScalar,
+                                    garrow_union_scalar,
+                                    GARROW_TYPE_SCALAR)
+
+#define GARROW_UNION_SCALAR_GET_PRIVATE(obj)             \
+  static_cast<GArrowUnionScalarPrivate *>(               \
+    garrow_union_scalar_get_instance_private(            \
+      GARROW_UNION_SCALAR(obj)))
+
+static void
+garrow_union_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_union_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_union_scalar_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_SCALAR(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_union_scalar_init(GArrowUnionScalar *object)
+{
+}
+
+static void
+garrow_union_scalar_class_init(GArrowUnionScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = garrow_union_scalar_dispose;
+  gobject_class->set_property = garrow_union_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowUnionScalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             GARROW_TYPE_SCALAR,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+G_END_DECLS
+template<typename ArrowUnionScalarType>
+GArrowScalar *
+garrow_union_scalar_new(GArrowDataType *data_type,
+                        gint8 type_code,
+                        GArrowScalar *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  auto arrow_value = garrow_scalar_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<ArrowUnionScalarType>(arrow_value, type_code,
+                                             arrow_data_type));
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  return scalar;
+}
+G_BEGIN_DECLS
+
+/**
+ * garrow_union_scalar_get_type_code:
+ * @scalar: A #GArrowUnionScalar.
+ *
+ * Returns: The type code of this scalar.
+ *
+ * Since: 6.0.0
+ */
+gint8
+garrow_union_scalar_get_type_code(GArrowUnionScalar *scalar)
+{
+  const auto &arrow_scalar =
+    std::static_pointer_cast<arrow::UnionScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->type_code;
+}
+
+/**
+ * garrow_union_scalar_get_value:
+ * @scalar: A #GArrowUnionScalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalar *
+garrow_union_scalar_get_value(GArrowUnionScalar *scalar)
+{
+  auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(scalar);
+  return priv->value;
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionScalar,
+              garrow_sparse_union_scalar,
+              GARROW_TYPE_UNION_SCALAR)
+
+static void
+garrow_sparse_union_scalar_init(GArrowSparseUnionScalar *object)
+{
+}
+
+static void
+garrow_sparse_union_scalar_class_init(GArrowSparseUnionScalarClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_scalar_new:
+ * @data_type: A #GArrowSparseUnionDataType for this scalar.
+ * @type_code: The type code of this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowSparseUnionScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowSparseUnionScalar *
+garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
+                               gint8 type_code,
+                               GArrowScalar *value)
+{
+  return GARROW_SPARSE_UNION_SCALAR(
+    garrow_union_scalar_new<arrow::SparseUnionScalar>(
+      GARROW_DATA_TYPE(data_type), type_code, value));
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionScalar,
+              garrow_dense_union_scalar,
+              GARROW_TYPE_UNION_SCALAR)
+
+static void
+garrow_dense_union_scalar_init(GArrowDenseUnionScalar *object)
+{
+}
+
+static void
+garrow_dense_union_scalar_class_init(GArrowDenseUnionScalarClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_scalar_new:
+ * @data_type: A #GArrowDenseUnionDataType for this scalar.
+ * @type_code: The type code of this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDenseUnionScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDenseUnionScalar *
+garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
+                              gint8 type_code,
+                              GArrowScalar *value)
+{
+  return GARROW_DENSE_UNION_SCALAR(
+    garrow_union_scalar_new<arrow::DenseUnionScalar>(
+      GARROW_DATA_TYPE(data_type), type_code, value));
+}
+
+
+G_DEFINE_TYPE(GArrowExtensionScalar,
+              garrow_extension_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_extension_scalar_init(GArrowExtensionScalar *object)
+{
+}
+
+static void
+garrow_extension_scalar_class_init(GArrowExtensionScalarClass *klass)
+{
+}
+
+
+G_END_DECLS
+
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar)
+{
+  return garrow_scalar_new_raw(arrow_scalar,
+                               "scalar", arrow_scalar,
+                               NULL);
+}
+
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                      const gchar *first_property_name,
+                      ...)
+{
+  va_list args;
+  va_start(args, first_property_name);
+  auto array = garrow_scalar_new_raw_valist(arrow_scalar,
+                                            first_property_name,
+                                            args);
+  va_end(args);
+  return array;
+}
+
+GArrowScalar *
+garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                             const gchar *first_property_name,
+                             va_list args)
+{
+  GType type;
+  GArrowScalar *scalar;
+
+  switch ((*arrow_scalar)->type->id()) {
+  case arrow::Type::type::NA:
+    type = GARROW_TYPE_NULL_SCALAR;
+    break;
+  case arrow::Type::type::BOOL:
+    type = GARROW_TYPE_BOOLEAN_SCALAR;
+    break;
+  case arrow::Type::type::INT8:
+    type = GARROW_TYPE_INT8_SCALAR;
+    break;
+  case arrow::Type::type::INT16:
+    type = GARROW_TYPE_INT16_SCALAR;
+    break;
+  case arrow::Type::type::INT32:
+    type = GARROW_TYPE_INT32_SCALAR;
+    break;
+  case arrow::Type::type::INT64:
+    type = GARROW_TYPE_INT64_SCALAR;
+    break;
+  case arrow::Type::type::UINT8:
+    type = GARROW_TYPE_UINT8_SCALAR;
+    break;
+  case arrow::Type::type::UINT16:
+    type = GARROW_TYPE_UINT16_SCALAR;
+    break;
+  case arrow::Type::type::UINT32:
+    type = GARROW_TYPE_UINT32_SCALAR;
+    break;
+  case arrow::Type::type::UINT64:
+    type = GARROW_TYPE_UINT64_SCALAR;
+    break;
+  case arrow::Type::type::FLOAT:
+    type = GARROW_TYPE_FLOAT_SCALAR;
+    break;
+  case arrow::Type::type::DOUBLE:
+    type = GARROW_TYPE_DOUBLE_SCALAR;
+    break;
+  case arrow::Type::type::BINARY:
+    type = GARROW_TYPE_BINARY_SCALAR;
+    break;
+  case arrow::Type::type::STRING:
+    type = GARROW_TYPE_STRING_SCALAR;
+    break;
+  case arrow::Type::type::LARGE_BINARY:
+    type = GARROW_TYPE_LARGE_BINARY_SCALAR;
+    break;
+  case arrow::Type::type::LARGE_STRING:
+    type = GARROW_TYPE_LARGE_STRING_SCALAR;
+    break;
+  case arrow::Type::type::FIXED_SIZE_BINARY:
+    type = GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR;
+    break;
+  case arrow::Type::type::DATE32:
+    type = GARROW_TYPE_DATE32_SCALAR;
+    break;
+  case arrow::Type::type::DATE64:
+    type = GARROW_TYPE_DATE64_SCALAR;
+    break;
+  case arrow::Type::type::TIME32:
+    type = GARROW_TYPE_TIME32_SCALAR;
+    break;
+  case arrow::Type::type::TIME64:
+    type = GARROW_TYPE_TIME64_SCALAR;
+    break;
+  case arrow::Type::type::TIMESTAMP:
+    type = GARROW_TYPE_TIMESTAMP_SCALAR;
+    break;
+  case arrow::Type::type::DECIMAL128:
+    type = GARROW_TYPE_DECIMAL128_SCALAR;
+    break;
+  case arrow::Type::type::DECIMAL256:
+    type = GARROW_TYPE_DECIMAL256_SCALAR;
+    break;
+  case arrow::Type::type::LIST:
+    type = GARROW_TYPE_LIST_SCALAR;
+    break;
+  case arrow::Type::type::LARGE_LIST:
+    type = GARROW_TYPE_LARGE_LIST_SCALAR;
+    break;
+/*
+  case arrow::Type::type::FIXED_SIZE_LIST:
+    type = GARROW_TYPE_FIXED_SIZE_LIST_SCALAR;
+    break;
+*/
+  case arrow::Type::type::MAP:
+    type = GARROW_TYPE_MAP_SCALAR;
+    break;
+  case arrow::Type::type::STRUCT:
+    type = GARROW_TYPE_STRUCT_SCALAR;
+    break;
+  case arrow::Type::type::SPARSE_UNION:
+    type = GARROW_TYPE_SPARSE_UNION_SCALAR;
+    break;
+  case arrow::Type::type::DENSE_UNION:
+    type = GARROW_TYPE_DENSE_UNION_SCALAR;
+    break;
+  case arrow::Type::type::EXTENSION:
+    type = GARROW_TYPE_EXTENSION_SCALAR;
+    break;
+  default:
+    type = GARROW_TYPE_SCALAR;
+    break;
+  }
+  scalar = GARROW_SCALAR(g_object_new_valist(type,
+                                             first_property_name,
+                                             args));
+  return scalar;
+}
+
+std::shared_ptr<arrow::Scalar>
+garrow_scalar_get_raw(GArrowScalar *scalar)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(scalar);
+  return priv->scalar;
+}
diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h
new file mode 100644
index 00000000000..a110d1c5ef6
--- /dev/null
+++ b/c_glib/arrow-glib/scalar.h
@@ -0,0 +1,683 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/array.h>
+
+G_BEGIN_DECLS
+
+typedef struct _GArrowCastOptions GArrowCastOptions;
+
+#define GARROW_TYPE_SCALAR (garrow_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowScalar,
+                         garrow_scalar,
+                         GARROW,
+                         SCALAR,
+                         GObject)
+struct _GArrowScalarClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowScalar *
+garrow_scalar_parse(GArrowDataType *data_type,
+                    const guint8 *data,
+                    gsize size,
+                    GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDataType *
+garrow_scalar_get_data_type(GArrowScalar *scalar);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_scalar_is_valid(GArrowScalar *scalar);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_scalar_equal(GArrowScalar *scalar,
+                    GArrowScalar *other_scalar);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_scalar_equal_options(GArrowScalar *scalar,
+                            GArrowScalar *other_scalar,
+                            GArrowEqualOptions *options);
+GARROW_AVAILABLE_IN_5_0
+gchar *
+garrow_scalar_to_string(GArrowScalar *scalar);
+
+GARROW_AVAILABLE_IN_5_0
+GArrowScalar *
+garrow_scalar_cast(GArrowScalar *scalar,
+                   GArrowDataType *data_type,
+                   GArrowCastOptions *options,
+                   GError **error);
+
+
+#define GARROW_TYPE_NULL_SCALAR (garrow_null_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowNullScalar,
+                         garrow_null_scalar,
+                         GARROW,
+                         NULL_SCALAR,
+                         GArrowScalar)
+struct _GArrowNullScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowNullScalar *
+garrow_null_scalar_new(void);
+
+
+#define GARROW_TYPE_BOOLEAN_SCALAR (garrow_boolean_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBooleanScalar,
+                         garrow_boolean_scalar,
+                         GARROW,
+                         BOOLEAN_SCALAR,
+                         GArrowScalar)
+struct _GArrowBooleanScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowBooleanScalar *
+garrow_boolean_scalar_new(gboolean value);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar);
+
+
+#define GARROW_TYPE_INT8_SCALAR (garrow_int8_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt8Scalar,
+                         garrow_int8_scalar,
+                         GARROW,
+                         INT8_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt8ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt8Scalar *
+garrow_int8_scalar_new(gint8 value);
+GARROW_AVAILABLE_IN_5_0
+gint8
+garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar);
+
+
+#define GARROW_TYPE_INT16_SCALAR (garrow_int16_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt16Scalar,
+                         garrow_int16_scalar,
+                         GARROW,
+                         INT16_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt16ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt16Scalar *
+garrow_int16_scalar_new(gint16 value);
+GARROW_AVAILABLE_IN_5_0
+gint16
+garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar);
+
+
+#define GARROW_TYPE_INT32_SCALAR (garrow_int32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt32Scalar,
+                         garrow_int32_scalar,
+                         GARROW,
+                         INT32_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt32Scalar *
+garrow_int32_scalar_new(gint32 value);
+GARROW_AVAILABLE_IN_5_0
+gint32
+garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar);
+
+
+#define GARROW_TYPE_INT64_SCALAR (garrow_int64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt64Scalar,
+                         garrow_int64_scalar,
+                         GARROW,
+                         INT64_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt64Scalar *
+garrow_int64_scalar_new(gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT8_SCALAR (garrow_uint8_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt8Scalar,
+                         garrow_uint8_scalar,
+                         GARROW,
+                         UINT8_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt8ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt8Scalar *
+garrow_uint8_scalar_new(guint8 value);
+GARROW_AVAILABLE_IN_5_0
+guint8
+garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT16_SCALAR (garrow_uint16_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt16Scalar,
+                         garrow_uint16_scalar,
+                         GARROW,
+                         UINT16_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt16ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt16Scalar *
+garrow_uint16_scalar_new(guint16 value);
+GARROW_AVAILABLE_IN_5_0
+guint16
+garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT32_SCALAR (garrow_uint32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt32Scalar,
+                         garrow_uint32_scalar,
+                         GARROW,
+                         UINT32_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt32Scalar *
+garrow_uint32_scalar_new(guint32 value);
+GARROW_AVAILABLE_IN_5_0
+guint32
+garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT64_SCALAR (garrow_uint64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt64Scalar,
+                         garrow_uint64_scalar,
+                         GARROW,
+                         UINT64_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt64Scalar *
+garrow_uint64_scalar_new(guint64 value);
+GARROW_AVAILABLE_IN_5_0
+guint64
+garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar);
+
+
+#define GARROW_TYPE_FLOAT_SCALAR (garrow_float_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowFloatScalar,
+                         garrow_float_scalar,
+                         GARROW,
+                         FLOAT_SCALAR,
+                         GArrowScalar)
+struct _GArrowFloatScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowFloatScalar *
+garrow_float_scalar_new(gfloat value);
+GARROW_AVAILABLE_IN_5_0
+gfloat
+garrow_float_scalar_get_value(GArrowFloatScalar *scalar);
+
+
+#define GARROW_TYPE_DOUBLE_SCALAR (garrow_double_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDoubleScalar,
+                         garrow_double_scalar,
+                         GARROW,
+                         DOUBLE_SCALAR,
+                         GArrowScalar)
+struct _GArrowDoubleScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDoubleScalar *
+garrow_double_scalar_new(gdouble value);
+GARROW_AVAILABLE_IN_5_0
+gdouble
+garrow_double_scalar_get_value(GArrowDoubleScalar *scalar);
+
+
+#define GARROW_TYPE_BASE_BINARY_SCALAR (garrow_base_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBaseBinaryScalar,
+                         garrow_base_binary_scalar,
+                         GARROW,
+                         BASE_BINARY_SCALAR,
+                         GArrowScalar)
+struct _GArrowBaseBinaryScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowBuffer *
+garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar);
+
+
+#define GARROW_TYPE_BINARY_SCALAR (garrow_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBinaryScalar,
+                         garrow_binary_scalar,
+                         GARROW,
+                         BINARY_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowBinaryScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowBinaryScalar *
+garrow_binary_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_STRING_SCALAR (garrow_string_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStringScalar,
+                         garrow_string_scalar,
+                         GARROW,
+                         STRING_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowStringScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowStringScalar *
+garrow_string_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_LARGE_BINARY_SCALAR (garrow_large_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryScalar,
+                         garrow_large_binary_scalar,
+                         GARROW,
+                         LARGE_BINARY_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowLargeBinaryScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowLargeBinaryScalar *
+garrow_large_binary_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_LARGE_STRING_SCALAR (garrow_large_string_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringScalar,
+                         garrow_large_string_scalar,
+                         GARROW,
+                         LARGE_STRING_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowLargeStringScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowLargeStringScalar *
+garrow_large_string_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR    \
+  (garrow_fixed_size_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryScalar,
+                         garrow_fixed_size_binary_scalar,
+                         GARROW,
+                         FIXED_SIZE_BINARY_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowFixedSizeBinaryScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowFixedSizeBinaryScalar *
+garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type,
+                                    GArrowBuffer *value);
+
+
+#define GARROW_TYPE_DATE32_SCALAR (garrow_date32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDate32Scalar,
+                         garrow_date32_scalar,
+                         GARROW,
+                         DATE32_SCALAR,
+                         GArrowScalar)
+struct _GArrowDate32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDate32Scalar *
+garrow_date32_scalar_new(gint32 value);
+GARROW_AVAILABLE_IN_5_0
+gint32
+garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar);
+
+
+#define GARROW_TYPE_DATE64_SCALAR (garrow_date64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDate64Scalar,
+                         garrow_date64_scalar,
+                         GARROW,
+                         DATE64_SCALAR,
+                         GArrowScalar)
+struct _GArrowDate64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDate64Scalar *
+garrow_date64_scalar_new(gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar);
+
+
+#define GARROW_TYPE_TIME32_SCALAR (garrow_time32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowTime32Scalar,
+                         garrow_time32_scalar,
+                         GARROW,
+                         TIME32_SCALAR,
+                         GArrowScalar)
+struct _GArrowTime32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowTime32Scalar *
+garrow_time32_scalar_new(GArrowTime32DataType *data_type,
+                         gint32 value);
+GARROW_AVAILABLE_IN_5_0
+gint32
+garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar);
+
+
+#define GARROW_TYPE_TIME64_SCALAR (garrow_time64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowTime64Scalar,
+                         garrow_time64_scalar,
+                         GARROW,
+                         TIME64_SCALAR,
+                         GArrowScalar)
+struct _GArrowTime64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowTime64Scalar *
+garrow_time64_scalar_new(GArrowTime64DataType *data_type,
+                         gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar);
+
+
+#define GARROW_TYPE_TIMESTAMP_SCALAR (garrow_timestamp_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowTimestampScalar,
+                         garrow_timestamp_scalar,
+                         GARROW,
+                         TIMESTAMP_SCALAR,
+                         GArrowScalar)
+struct _GArrowTimestampScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowTimestampScalar *
+garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type,
+                            gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar);
+
+
+#define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar,
+                         garrow_decimal128_scalar,
+                         GARROW,
+                         DECIMAL128_SCALAR,
+                         GArrowScalar)
+struct _GArrowDecimal128ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal128Scalar *
+garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type,
+                             GArrowDecimal128 *value);
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal128 *
+garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar);
+
+
+#define GARROW_TYPE_DECIMAL256_SCALAR (garrow_decimal256_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Scalar,
+                         garrow_decimal256_scalar,
+                         GARROW,
+                         DECIMAL256_SCALAR,
+                         GArrowScalar)
+struct _GArrowDecimal256ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal256Scalar *
+garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type,
+                             GArrowDecimal256 *value);
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal256 *
+garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar);
+
+
+#define GARROW_TYPE_BASE_LIST_SCALAR (garrow_base_list_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBaseListScalar,
+                         garrow_base_list_scalar,
+                         GARROW,
+                         BASE_LIST_SCALAR,
+                         GArrowScalar)
+struct _GArrowBaseListScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowArray *
+garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar);
+
+#define GARROW_TYPE_LIST_SCALAR (garrow_list_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowListScalar,
+                         garrow_list_scalar,
+                         GARROW,
+                         LIST_SCALAR,
+                         GArrowBaseListScalar)
+struct _GArrowListScalarClass
+{
+  GArrowBaseListScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowListScalar *
+garrow_list_scalar_new(GArrowListArray *value);
+
+
+#define GARROW_TYPE_LARGE_LIST_SCALAR (garrow_large_list_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowLargeListScalar,
+                         garrow_large_list_scalar,
+                         GARROW,
+                         LARGE_LIST_SCALAR,
+                         GArrowBaseListScalar)
+struct _GArrowLargeListScalarClass
+{
+  GArrowBaseListScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowLargeListScalar *
+garrow_large_list_scalar_new(GArrowLargeListArray *value);
+
+
+#define GARROW_TYPE_MAP_SCALAR (garrow_map_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowMapScalar,
+                         garrow_map_scalar,
+                         GARROW,
+                         MAP_SCALAR,
+                         GArrowBaseListScalar)
+struct _GArrowMapScalarClass
+{
+  GArrowBaseListScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowMapScalar *
+garrow_map_scalar_new(GArrowStructArray *value);
+
+
+#define GARROW_TYPE_STRUCT_SCALAR (garrow_struct_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStructScalar,
+                         garrow_struct_scalar,
+                         GARROW,
+                         STRUCT_SCALAR,
+                         GArrowScalar)
+struct _GArrowStructScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowStructScalar *
+garrow_struct_scalar_new(GArrowStructDataType *data_type,
+                         GList *value);
+GARROW_AVAILABLE_IN_5_0
+GList *
+garrow_struct_scalar_get_value(GArrowStructScalar *scalar);
+
+
+#define GARROW_TYPE_UNION_SCALAR (garrow_union_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionScalar,
+                         garrow_union_scalar,
+                         GARROW,
+                         UNION_SCALAR,
+                         GArrowScalar)
+struct _GArrowUnionScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+gint8
+garrow_union_scalar_get_type_code(GArrowUnionScalar *scalar);
+GARROW_AVAILABLE_IN_5_0
+GArrowScalar *
+garrow_union_scalar_get_value(GArrowUnionScalar *scalar);
+
+
+#define GARROW_TYPE_SPARSE_UNION_SCALAR (garrow_sparse_union_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionScalar,
+                         garrow_sparse_union_scalar,
+                         GARROW,
+                         SPARSE_UNION_SCALAR,
+                         GArrowUnionScalar)
+struct _GArrowSparseUnionScalarClass
+{
+  GArrowUnionScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowSparseUnionScalar *
+garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
+                               gint8 type_code,
+                               GArrowScalar *value);
+
+
+#define GARROW_TYPE_DENSE_UNION_SCALAR (garrow_dense_union_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionScalar,
+                         garrow_dense_union_scalar,
+                         GARROW,
+                         DENSE_UNION_SCALAR,
+                         GArrowUnionScalar)
+struct _GArrowDenseUnionScalarClass
+{
+  GArrowUnionScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDenseUnionScalar *
+garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
+                              gint8 type_code,
+                              GArrowScalar *value);
+
+
+#define GARROW_TYPE_EXTENSION_SCALAR (garrow_extension_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowExtensionScalar,
+                         garrow_extension_scalar,
+                         GARROW,
+                         EXTENSION_SCALAR,
+                         GArrowScalar)
+struct _GArrowExtensionScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+G_END_DECLS
diff --git a/c_glib/arrow-glib/scalar.hpp b/c_glib/arrow-glib/scalar.hpp
new file mode 100644
index 00000000000..46ac73e21e8
--- /dev/null
+++ b/c_glib/arrow-glib/scalar.hpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/api.h>
+
+#include <arrow-glib/scalar.h>
+
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar);
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                      const gchar *first_property_name,
+                      ...);
+GArrowScalar *
+garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                             const gchar *first_property_name,
+                             va_list args);
+std::shared_ptr<arrow::Scalar>
+garrow_scalar_get_raw(GArrowScalar *scalar);
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index 5a74566fd4a..193853602ff 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -110,6 +110,24 @@
 #  define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
 #endif
 
+/**
+ * GARROW_VERSION_6_0:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 6.0.0
+ */
+#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6, 0)
+
+/**
+ * GARROW_VERSION_5_0:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 5.0.0
+ */
+#define GARROW_VERSION_5_0 G_ENCODE_VERSION(5, 0)
+
 /**
  * GARROW_VERSION_4_0:
  *
@@ -256,6 +274,34 @@
 
 #define GARROW_AVAILABLE_IN_ALL
 
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_6_0
+#  define GARROW_DEPRECATED_IN_6_0                GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_6_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_6_0
+#  define GARROW_DEPRECATED_IN_6_0_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_6_0
+#  define GARROW_AVAILABLE_IN_6_0 GARROW_UNAVAILABLE(6, 0)
+#else
+#  define GARROW_AVAILABLE_IN_6_0
+#endif
+
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_5_0
+#  define GARROW_DEPRECATED_IN_5_0                GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_5_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_5_0
+#  define GARROW_DEPRECATED_IN_5_0_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_5_0
+#  define GARROW_AVAILABLE_IN_5_0 GARROW_UNAVAILABLE(5, 0)
+#else
+#  define GARROW_AVAILABLE_IN_5_0
+#endif
+
 #if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_4_0
 #  define GARROW_DEPRECATED_IN_4_0                GARROW_DEPRECATED
 #  define GARROW_DEPRECATED_IN_4_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
diff --git a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
index 92ae0405dac..3e8da5bd9d1 100644
--- a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
+++ b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
@@ -36,13 +36,21 @@
     </releaseinfo>
   </bookinfo>
 
-  <part id="read">
-    <title>Read</title>
-    <chapter id="scan">
+  <part id="data">
+    <title>Data</title>
+    <chapter id="source">
+      <title>Dataset</title>
+      <xi:include href="xml/dataset.xml"/>
+      <title>Dataset factory</title>
+      <xi:include href="xml/dataset-factory.xml"/>
+    </chapter>
+    <chapter id="read">
       <title>Scan</title>
       <xi:include href="xml/scanner.xml"/>
       <title>Fragment</title>
       <xi:include href="xml/fragment.xml"/>
+      <title>File format</title>
+      <xi:include href="xml/file-format.xml"/>
     </chapter>
   </part>
 
@@ -58,9 +66,17 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
-  <index id="api-index-1-0-0" role="1.0.0">
-    <title>Index of new symbols in 1.0.0</title>
-    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
+  <index id="api-index-5-0-0" role="5.0.0">
+    <title>Index of new symbols in 4.0.0</title>
+    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="api-index-4-0-0" role="4.0.0">
+    <title>Index of new symbols in 4.0.0</title>
+    <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="api-index-3-0-0" role="3.0.0">
+    <title>Index of new symbols in 3.0.0</title>
+    <xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include>
   </index>
   <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
 </book>
diff --git a/c_glib/doc/arrow-dataset-glib/meson.build b/c_glib/doc/arrow-dataset-glib/meson.build
index 1cb2f9e99c8..ca037b7e36a 100644
--- a/c_glib/doc/arrow-dataset-glib/meson.build
+++ b/c_glib/doc/arrow-dataset-glib/meson.build
@@ -70,7 +70,7 @@ gnome.gtkdoc(package_id,
              ],
              mkdb_args: [
                '--output-format=xml',
-               '--name-space=gad',
+               '--name-space=gadataset',
                '--source-suffixes=c,cpp,h',
              ],
              fixxref_args: [
diff --git a/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml b/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
new file mode 100644
index 00000000000..397a8bec0d0
--- /dev/null
+++ b/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
+               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
+[
+  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
+  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
+  %gtkdocentities;
+]>
+<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
+  <bookinfo>
+    <title>&package_name; Reference Manual</title>
+    <releaseinfo>
+      for &package_string;.
+      <!--
+      The latest version of this documentation can be found on-line at
+      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
+      -->
+    </releaseinfo>
+  </bookinfo>
+
+  <part id="rpc">
+    <title>RPC</title>
+    <xi:include href="xml/common.xml"/>
+    <xi:include href="xml/client.xml"/>
+    <xi:include href="xml/server.xml"/>
+  </part>
+
+  <chapter id="object-tree">
+    <title>Object Hierarchy</title>
+    <xi:include href="xml/tree_index.sgml"/>
+  </chapter>
+  <index id="api-index-full">
+    <title>API Index</title>
+    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="deprecated-api-index" role="deprecated">
+    <title>Index of deprecated API</title>
+    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="api-index-6-0-0" role="6.0.0">
+    <title>Index of new symbols in 6.0.0</title>
+    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="api-index-5-0-0" role="5.0.0">
+    <title>Index of new symbols in 5.0.0</title>
+    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
+  </index>
+  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
+</book>
diff --git a/c_glib/doc/arrow-flight-glib/entities.xml.in b/c_glib/doc/arrow-flight-glib/entities.xml.in
new file mode 100644
index 00000000000..aa5addb4e84
--- /dev/null
+++ b/c_glib/doc/arrow-flight-glib/entities.xml.in
@@ -0,0 +1,24 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<!ENTITY package "@PACKAGE@">
+<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
+<!ENTITY package_name "@PACKAGE_NAME@">
+<!ENTITY package_string "@PACKAGE_STRING@">
+<!ENTITY package_url "@PACKAGE_URL@">
+<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/arrow-flight-glib/meson.build b/c_glib/doc/arrow-flight-glib/meson.build
new file mode 100644
index 00000000000..7ae38e4f5e4
--- /dev/null
+++ b/c_glib/doc/arrow-flight-glib/meson.build
@@ -0,0 +1,83 @@
+# -*- indent-tabs-mode: nil -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+package_id = 'arrow-flight-glib'
+package_name = 'Apache Arrow Flight GLib'
+entities_conf = configuration_data()
+entities_conf.set('PACKAGE', package_id)
+entities_conf.set('PACKAGE_BUGREPORT',
+                  'https://issues.apache.org/jira/browse/ARROW')
+entities_conf.set('PACKAGE_NAME', package_name)
+entities_conf.set('PACKAGE_STRING',
+                  ' '.join([package_id, version]))
+entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
+entities_conf.set('PACKAGE_VERSION', version)
+configure_file(input: 'entities.xml.in',
+               output: 'entities.xml',
+               configuration: entities_conf)
+
+private_headers = [
+]
+
+content_files = [
+]
+
+html_images = [
+]
+
+glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
+glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
+arrow_glib_doc_path = join_paths(data_dir,
+                                 'gtk-doc',
+                                 'html',
+                                 'arrow-glib')
+doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
+
+source_directories = [
+  join_paths(meson.source_root(), package_id),
+  join_paths(meson.build_root(), package_id),
+]
+dependencies = [
+  arrow_glib,
+  arrow_flight_glib,
+]
+ignore_headers = []
+gnome.gtkdoc(package_id,
+             main_xml: package_id + '-docs.xml',
+             src_dir: source_directories,
+             dependencies: dependencies,
+             ignore_headers: ignore_headers,
+             gobject_typesfile: package_id + '.types',
+             scan_args: [
+               '--rebuild-types',
+               '--deprecated-guards=GARROW_DISABLE_DEPRECATED',
+             ],
+             mkdb_args: [
+               '--output-format=xml',
+               '--name-space=gad',
+               '--source-suffixes=c,cpp,h',
+             ],
+             fixxref_args: [
+               '--html-dir=' + doc_path,
+               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
+               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
+               '--extra-dir=' + arrow_glib_doc_path,
+             ],
+             html_assets: html_images,
+             install: true)
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
index 9198b6a13a6..4c061c06c40 100644
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
@@ -55,6 +55,10 @@
       <title>Value</title>
       <xi:include href="xml/decimal.xml"/>
     </chapter>
+    <chapter id="scalar">
+      <title>Scalar</title>
+      <xi:include href="xml/scalar.xml"/>
+    </chapter>
     <chapter id="type">
       <title>Type</title>
       <xi:include href="xml/type.xml"/>
@@ -179,6 +183,14 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-6-0-0" role="6.0.0">
+    <title>Index of new symbols in 6.0.0</title>
+    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="api-index-5-0-0" role="5.0.0">
+    <title>Index of new symbols in 5.0.0</title>
+    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-4-0-0" role="4.0.0">
     <title>Index of new symbols in 4.0.0</title>
     <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/meson.build b/c_glib/meson.build
index 4ac407e97d2..0e090c97968 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -23,7 +23,7 @@ project('arrow-glib', 'c', 'cpp',
           'cpp_std=c++11',
         ])
 
-version = '4.0.0-SNAPSHOT'
+version = '6.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
@@ -75,6 +75,7 @@ if arrow_cpp_build_lib_dir == ''
   have_arrow_orc = dependency('arrow-orc', required: false).found()
   arrow_cuda = dependency('arrow-cuda', required: false)
   arrow_dataset = dependency('arrow-dataset', required: false)
+  arrow_flight = dependency('arrow-flight', required: false)
   gandiva = dependency('gandiva', required: false)
   parquet = dependency('parquet', required: false)
   plasma = dependency('plasma', required: false)
@@ -105,6 +106,9 @@ main(void)
   arrow_dataset = cpp_compiler.find_library('arrow_dataset',
                                             dirs: [arrow_cpp_build_lib_dir],
                                             required: false)
+  arrow_flight = cpp_compiler.find_library('arrow_flight',
+                                           dirs: [arrow_cpp_build_lib_dir],
+                                           required: false)
   gandiva = cpp_compiler.find_library('gandiva',
                                       dirs: [arrow_cpp_build_lib_dir],
                                       required: false)
@@ -137,6 +141,9 @@ endif
 if arrow_dataset.found()
   subdir('arrow-dataset-glib')
 endif
+if arrow_flight.found()
+  subdir('arrow-flight-glib')
+endif
 if gandiva.found()
   subdir('gandiva-glib')
 endif
@@ -153,6 +160,9 @@ if get_option('gtk_doc')
   if arrow_dataset.found()
     subdir('doc/arrow-dataset-glib')
   endif
+  if arrow_flight.found()
+    subdir('doc/arrow-flight-glib')
+  endif
   if gandiva.found()
     subdir('doc/gandiva-glib')
   endif
diff --git a/c_glib/plasma-glib/client.cpp b/c_glib/plasma-glib/client.cpp
index 2a5ccf98bd1..26476f4d6b5 100644
--- a/c_glib/plasma-glib/client.cpp
+++ b/c_glib/plasma-glib/client.cpp
@@ -265,6 +265,10 @@ gplasma_client_create_options_new(void)
   return GPLASMA_CLIENT_CREATE_OPTIONS(options);
 }
 
+#if !GLIB_CHECK_VERSION(2, 68, 0)
+#  define g_memdup2(memory, byte_size) g_memdup(memory, byte_size)
+#endif
+
 /**
  * gplasma_client_create_options_set_metadata:
  * @options: A #GPlasmaClientCreateOptions.
@@ -282,7 +286,7 @@ gplasma_client_create_options_set_metadata(GPlasmaClientCreateOptions *options,
   if (priv->metadata) {
     g_free(priv->metadata);
   }
-  priv->metadata = static_cast<guint8 *>(g_memdup(metadata, size));
+  priv->metadata = static_cast<guint8 *>(g_memdup2(metadata, size));
   priv->metadata_size = size;
 }
 
diff --git a/c_glib/test/dataset/test-file-system-dataset-factory.rb b/c_glib/test/dataset/test-file-system-dataset-factory.rb
new file mode 100644
index 00000000000..9ef629c222e
--- /dev/null
+++ b/c_glib/test/dataset/test-file-system-dataset-factory.rb
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDatasetFileSystemDatasetFactory < Test::Unit::TestCase
+  include Helper::Buildable
+  include Helper::Writable
+
+  def setup
+    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
+    Dir.mktmpdir do |tmpdir|
+      @dir = tmpdir
+      @path = File.join(@dir, "table.arrow")
+      @table = build_table(visible: [
+                             build_boolean_array([true, false, true]),
+                             build_boolean_array([false, true, false, true]),
+                           ],
+                           point: [
+                             build_int32_array([1, 2, 3]),
+                             build_int32_array([-1, -2, -3, -4]),
+                           ])
+      @format = ArrowDataset::IPCFileFormat.new
+      write_table(@table, @path)
+      yield
+    end
+  end
+
+  def test_file_system
+    factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
+    factory.file_system = Arrow::LocalFileSystem.new
+    factory.add_path(File.expand_path(@path))
+    dataset = factory.finish
+    assert_equal(@table, dataset.to_table)
+  end
+
+  def test_file_system_uri
+    factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
+    factory.file_system_uri = build_file_uri(@path)
+    dataset = factory.finish
+    assert_equal(@table, dataset.to_table)
+  end
+end
diff --git a/c_glib/test/dataset/test-file-system-dataset.rb b/c_glib/test/dataset/test-file-system-dataset.rb
new file mode 100644
index 00000000000..6d6ec3b18c6
--- /dev/null
+++ b/c_glib/test/dataset/test-file-system-dataset.rb
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDatasetFileSystemDataset < Test::Unit::TestCase
+  def setup
+    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
+    Dir.mktmpdir do |tmpdir|
+      @dir = tmpdir
+      format = ArrowDataset::IPCFileFormat.new
+      factory = ArrowDataset::FileSystemDatasetFactory.new(format)
+      factory.file_system = Arrow::LocalFileSystem.new
+      @dataset = factory.finish
+      yield
+    end
+  end
+
+  def test_type_name
+    assert_equal("filesystem", @dataset.type_name)
+  end
+end
diff --git a/c_glib/test/dataset/test-in-memory-scan-task.rb b/c_glib/test/dataset/test-in-memory-scan-task.rb
deleted file mode 100644
index 06e3d0d2424..00000000000
--- a/c_glib/test/dataset/test-in-memory-scan-task.rb
+++ /dev/null
@@ -1,59 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestDatasetInMemoryScanTask < Test::Unit::TestCase
-  include Helper::Buildable
-
-  def setup
-    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
-    fields = [
-      Arrow::Field.new("visible", Arrow::BooleanDataType.new),
-      Arrow::Field.new("point", Arrow::Int32DataType.new),
-    ]
-    @schema = Arrow::Schema.new(fields)
-    @record_batches = [
-      [
-        build_boolean_array([true, false, true]),
-        build_int32_array([1, 2, 3]),
-      ],
-      [
-        build_boolean_array([false, true, false, true]),
-        build_int32_array([-1, -2, -3, -4]),
-      ]
-    ].collect do |columns|
-      Arrow::RecordBatch.new(@schema, columns[0].length, columns)
-    end
-
-    @scan_options = ArrowDataset::ScanOptions.new(@schema)
-
-    @fragment = ArrowDataset::InMemoryFragment.new(@schema,
-                                                   @record_batches)
-
-    @scan_task = ArrowDataset::InMemoryScanTask.new(@record_batches,
-                                                    @scan_options,
-                                                    @fragment)
-  end
-
-  def test_scan_options
-    assert_equal(@scan_options, @scan_task.options)
-  end
-
-  def test_execute
-    assert_equal(@record_batches,
-                 @scan_task.execute.to_list)
-  end
-end
diff --git a/c_glib/test/dataset/test-scan-options.rb b/c_glib/test/dataset/test-scan-options.rb
deleted file mode 100644
index 0536b2a7cca..00000000000
--- a/c_glib/test/dataset/test-scan-options.rb
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestDatasetScanOptions < Test::Unit::TestCase
-  def setup
-    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
-    @schema = Arrow::Schema.new([])
-    @scan_options = ArrowDataset::ScanOptions.new(@schema)
-  end
-
-  def test_schema
-    assert_equal(@schema,
-                 @scan_options.schema)
-  end
-
-  def test_batch_size
-    assert_equal(1<<20,
-                 @scan_options.batch_size)
-    @scan_options.batch_size = 42
-    assert_equal(42,
-                 @scan_options.batch_size)
-  end
-
-  def test_use_threads
-    assert do
-      not @scan_options.use_threads?
-    end
-    @scan_options.use_threads = true
-    assert do
-      @scan_options.use_threads?
-    end
-  end
-end
diff --git a/c_glib/test/dataset/test-scanner.rb b/c_glib/test/dataset/test-scanner.rb
new file mode 100644
index 00000000000..f7702d4905f
--- /dev/null
+++ b/c_glib/test/dataset/test-scanner.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDatasetScanner < Test::Unit::TestCase
+  include Helper::Buildable
+  include Helper::Writable
+
+  def setup
+    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
+    Dir.mktmpdir do |tmpdir|
+      path = File.join(tmpdir, "table.arrow")
+      @table = build_table(visible: [
+                             build_boolean_array([true, false, true]),
+                             build_boolean_array([false, true, false, true]),
+                           ],
+                           point: [
+                             build_int32_array([1, 2, 3]),
+                             build_int32_array([-1, -2, -3, -4]),
+                           ])
+      @format = ArrowDataset::IPCFileFormat.new
+      write_table(@table, path)
+      factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
+      factory.file_system_uri = build_file_uri(path)
+      @dataset = factory.finish
+      builder = @dataset.begin_scan
+      @scanner = builder.finish
+      yield
+    end
+  end
+
+  def test_to_table
+    assert_equal(@table, @scanner.to_table)
+  end
+end
diff --git a/c_glib/test/flight/test-client.rb b/c_glib/test/flight/test-client.rb
new file mode 100644
index 00000000000..f6660a4ca49
--- /dev/null
+++ b/c_glib/test/flight/test-client.rb
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightClient < Test::Unit::TestCase
+  include Helper::Omittable
+
+  def setup
+    @server = nil
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+    omit("Unstable on Windows") if Gem.win_platform?
+    require_gi_bindings(3, 4, 7)
+    @server = Helper::FlightServer.new
+    host = "127.0.0.1"
+    location = ArrowFlight::Location.new("grpc://#{host}:0")
+    options = ArrowFlight::ServerOptions.new(location)
+    @server.listen(options)
+    @location = ArrowFlight::Location.new("grpc://#{host}:#{@server.port}")
+  end
+
+  def teardown
+    return if @server.nil?
+    @server.shutdown
+  end
+
+  def test_list_flights
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::FlightInfoGenerator.new
+    assert_equal([generator.page_view],
+                 client.list_flights)
+  end
+
+  sub_test_case("#do_get") do
+    def test_success
+      client = ArrowFlight::Client.new(@location)
+      info = client.list_flights.first
+      endpoint = info.endpoints.first
+      generator = Helper::FlightInfoGenerator.new
+      reader = client.do_get(endpoint.ticket)
+      assert_equal(generator.page_view_table,
+                   reader.read_all)
+    end
+
+    def test_error
+      client = ArrowFlight::Client.new(@location)
+      assert_raise(Arrow::Error::Invalid) do
+        client.do_get(ArrowFlight::Ticket.new("invalid"))
+      end
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-command-descriptor.rb b/c_glib/test/flight/test-command-descriptor.rb
new file mode 100644
index 00000000000..316973287f0
--- /dev/null
+++ b/c_glib/test/flight/test-command-descriptor.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightCommandDescriptor < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_to_s
+    descriptor = ArrowFlight::CommandDescriptor.new("command")
+    assert_equal("FlightDescriptor<cmd = 'command'>",
+                 descriptor.to_s)
+  end
+
+  def test_command
+    command = "command"
+    descriptor = ArrowFlight::CommandDescriptor.new(command)
+    assert_equal(command, descriptor.command)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      descriptor1 = ArrowFlight::CommandDescriptor.new("command")
+      descriptor2 = ArrowFlight::CommandDescriptor.new("command")
+      assert do
+        descriptor1 == descriptor2
+      end
+    end
+
+    def test_false
+      descriptor1 = ArrowFlight::CommandDescriptor.new("command1")
+      descriptor2 = ArrowFlight::CommandDescriptor.new("command2")
+      assert do
+        not (descriptor1 == descriptor2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-criteria.rb b/c_glib/test/flight/test-criteria.rb
new file mode 100644
index 00000000000..d5f60a8953d
--- /dev/null
+++ b/c_glib/test/flight/test-criteria.rb
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightCriteria < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_expression
+    expression = "expression"
+    criteria = ArrowFlight::Criteria.new(expression)
+    assert_equal(expression,
+                 criteria.expression.to_s)
+  end
+end
diff --git a/c_glib/test/flight/test-endpoint.rb b/c_glib/test/flight/test-endpoint.rb
new file mode 100644
index 00000000000..06cddf0019b
--- /dev/null
+++ b/c_glib/test/flight/test-endpoint.rb
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightEndpoint < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_ticket
+    ticket = ArrowFlight::Ticket.new("data")
+    locations = [
+       ArrowFlight::Location.new("grpc://127.0.0.1:2929"),
+       ArrowFlight::Location.new("grpc+tcp://127.0.0.1:12929"),
+    ]
+    endpoint = ArrowFlight::Endpoint.new(ticket, locations)
+    assert_equal(ticket,
+                 endpoint.ticket)
+  end
+
+  def test_locations
+    ticket = ArrowFlight::Ticket.new("data")
+    locations = [
+       ArrowFlight::Location.new("grpc://127.0.0.1:2929"),
+       ArrowFlight::Location.new("grpc+tcp://127.0.0.1:12929"),
+    ]
+    endpoint = ArrowFlight::Endpoint.new(ticket, locations)
+    assert_equal(locations,
+                 endpoint.locations)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      ticket = ArrowFlight::Ticket.new("data")
+      location = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+      endpoint1 = ArrowFlight::Endpoint.new(ticket, [location])
+      endpoint2 = ArrowFlight::Endpoint.new(ticket, [location])
+      assert do
+        endpoint1 == endpoint2
+      end
+    end
+
+    def test_false
+      ticket = ArrowFlight::Ticket.new("data")
+      location1 = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+      location2 = ArrowFlight::Location.new("grpc://127.0.0.1:1129")
+      endpoint1 = ArrowFlight::Endpoint.new(ticket, [location1])
+      endpoint2 = ArrowFlight::Endpoint.new(ticket, [location2])
+      assert do
+        not (endpoint1 == endpoint2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-info.rb b/c_glib/test/flight/test-info.rb
new file mode 100644
index 00000000000..5bf0fbfad88
--- /dev/null
+++ b/c_glib/test/flight/test-info.rb
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightInfo < Test::Unit::TestCase
+  include Helper::Writable
+
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+    @generator = Helper::FlightInfoGenerator.new
+  end
+
+  sub_test_case("#get_schema") do
+    def test_with_options
+      info = @generator.page_view
+      table = @generator.page_view_table
+      options = Arrow::ReadOptions.new
+      assert_equal(table.schema,
+                   info.get_schema(options))
+    end
+
+    def test_without_options
+      info = @generator.page_view
+      table = @generator.page_view_table
+      assert_equal(table.schema,
+                   info.get_schema)
+    end
+  end
+
+  def test_descriptor
+    info = @generator.page_view
+    assert_equal(@generator.page_view_descriptor,
+                 info.descriptor)
+  end
+
+  def test_endpoints
+    info = @generator.page_view
+    assert_equal(@generator.page_view_endpoints,
+                 info.endpoints)
+  end
+
+  def test_total_records
+    info = @generator.page_view
+    table = @generator.page_view_table
+    assert_equal(table.n_rows,
+                 info.total_records)
+  end
+
+  def test_total_bytes
+    info = @generator.page_view
+    table = @generator.page_view_table
+    output = Arrow::ResizableBuffer.new(0)
+    write_table(table, output, type: :stream)
+    assert_equal(output.size,
+                 info.total_bytes)
+  end
+
+  def test_equal
+    info1 = @generator.page_view
+    info2 = @generator.page_view
+    assert do
+      info1 == info2
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-location.rb b/c_glib/test/flight/test-location.rb
new file mode 100644
index 00000000000..5b167932218
--- /dev/null
+++ b/c_glib/test/flight/test-location.rb
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightLocation < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_to_s
+    location = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    assert_equal("grpc://127.0.0.1:2929", location.to_s)
+  end
+
+  def test_scheme
+    location = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    assert_equal("grpc", location.scheme)
+  end
+
+  def test_equal
+    location1 = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    location2 = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    assert do
+      location1 == location2
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-path-descriptor.rb b/c_glib/test/flight/test-path-descriptor.rb
new file mode 100644
index 00000000000..441fc7bb043
--- /dev/null
+++ b/c_glib/test/flight/test-path-descriptor.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightPathDescriptor < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_to_s
+    descriptor = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+    assert_equal("FlightDescriptor<path = 'a/b/c'>",
+                 descriptor.to_s)
+  end
+
+  def test_paths
+    paths = ["a", "b", "c"]
+    descriptor = ArrowFlight::PathDescriptor.new(paths)
+    assert_equal(paths, descriptor.paths)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      descriptor1 = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+      descriptor2 = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+      assert do
+        descriptor1 == descriptor2
+      end
+    end
+
+    def test_false
+      descriptor1 = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+      descriptor2 = ArrowFlight::PathDescriptor.new(["A", "B", "C"])
+      assert do
+        not (descriptor1 == descriptor2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-server-options.rb b/c_glib/test/flight/test-server-options.rb
new file mode 100644
index 00000000000..93a90297ea2
--- /dev/null
+++ b/c_glib/test/flight/test-server-options.rb
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightServerOptions < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_location
+    location = ArrowFlight::Location.new("grpc://127.0.0.1:0")
+    options = ArrowFlight::ServerOptions.new(location)
+    assert_equal(location, options.location)
+  end
+end
diff --git a/c_glib/test/flight/test-stream-reader.rb b/c_glib/test/flight/test-stream-reader.rb
new file mode 100644
index 00000000000..f2e6229b0b3
--- /dev/null
+++ b/c_glib/test/flight/test-stream-reader.rb
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightStreamReader < Test::Unit::TestCase
+  include Helper::Omittable
+
+  def setup
+    @server = nil
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+    omit("Unstable on Windows") if Gem.win_platform?
+    require_gi_bindings(3, 4, 5)
+    @server = Helper::FlightServer.new
+    host = "127.0.0.1"
+    location = ArrowFlight::Location.new("grpc://#{host}:0")
+    options = ArrowFlight::ServerOptions.new(location)
+    @server.listen(options)
+    location = ArrowFlight::Location.new("grpc://#{host}:#{@server.port}")
+    client = ArrowFlight::Client.new(location)
+    @generator = Helper::FlightInfoGenerator.new
+    @reader = client.do_get(@generator.page_view_ticket)
+  end
+
+  def teardown
+    return if @server.nil?
+    @server.shutdown
+  end
+
+  def test_read_next
+    chunks = []
+    loop do
+      chunk = @reader.read_next
+      break if chunk.nil?
+      chunks << chunk
+    end
+    chunks_content = chunks.collect do |chunk|
+      [
+        chunk.data,
+        chunk.metadata&.data&.to_s,
+      ]
+    end
+    table_batch_reader = Arrow::TableBatchReader.new(@generator.page_view_table)
+    assert_equal([
+                   [
+                     table_batch_reader.read_next,
+                     nil,
+                   ],
+                 ],
+                 chunks_content)
+  end
+
+  def test_read_all
+    assert_equal(@generator.page_view_table,
+                 @reader.read_all)
+  end
+end
diff --git a/c_glib/test/flight/test-ticket.rb b/c_glib/test/flight/test-ticket.rb
new file mode 100644
index 00000000000..976089762f0
--- /dev/null
+++ b/c_glib/test/flight/test-ticket.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightTicket < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_data
+    data = "data"
+    ticket = ArrowFlight::Ticket.new(data)
+    assert_equal(data,
+                 ticket.data.to_s)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      ticket1 = ArrowFlight::Ticket.new("data")
+      ticket2 = ArrowFlight::Ticket.new("data")
+      assert do
+        ticket1 == ticket2
+      end
+    end
+
+    def test_false
+      ticket1 = ArrowFlight::Ticket.new("data1")
+      ticket2 = ArrowFlight::Ticket.new("data2")
+      assert do
+        not (ticket1 == ticket2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb
index 3528c2fbdc7..356fa651c6a 100644
--- a/c_glib/test/helper/buildable.rb
+++ b/c_glib/test/helper/buildable.rb
@@ -136,11 +136,7 @@ def build_list_array(value_data_type, values_list, field_name: "value")
       data_type = Arrow::ListDataType.new(value_field)
       builder = Arrow::ListArrayBuilder.new(data_type)
       values_list.each do |values|
-        if values.nil?
-          builder.append_null
-        else
-          append_to_builder(builder, values)
-        end
+        append_to_builder(builder, values)
       end
       builder.finish
     end
@@ -150,11 +146,16 @@ def build_large_list_array(value_data_type, values_list, field_name: "value")
       data_type = Arrow::LargeListDataType.new(value_field)
       builder = Arrow::LargeListArrayBuilder.new(data_type)
       values_list.each do |values|
-        if values.nil?
-          builder.append_null
-        else
-          append_to_builder(builder, values)
-        end
+        append_to_builder(builder, values)
+      end
+      builder.finish
+    end
+
+    def build_map_array(key_data_type, item_data_type, maps)
+      data_type = Arrow::MapDataType.new(key_data_type, item_data_type)
+      builder = Arrow::MapArrayBuilder.new(data_type)
+      maps.each do |map|
+        append_to_builder(builder, map)
       end
       builder.finish
     end
@@ -163,11 +164,7 @@ def build_struct_array(fields, structs)
       data_type = Arrow::StructDataType.new(fields)
       builder = Arrow::StructArrayBuilder.new(data_type)
       structs.each do |struct|
-        if struct.nil?
-          builder.append_null
-        else
-          append_to_builder(builder, struct)
-        end
+        append_to_builder(builder, struct)
       end
       builder.finish
     end
@@ -178,6 +175,14 @@ def append_to_builder(builder, value)
       else
         data_type = builder.value_data_type
         case data_type
+        when Arrow::MapDataType
+          builder.append_value
+          key_builder = builder.key_builder
+          item_builder = builder.item_builder
+          value.each do |k, v|
+            append_to_builder(key_builder, k)
+            append_to_builder(item_builder, v)
+          end
         when Arrow::ListDataType, Arrow::LargeListDataType
           builder.append_value
           value_builder = builder.value_builder
@@ -200,7 +205,15 @@ def append_to_builder(builder, value)
     def build_table(columns)
       fields = []
       chunked_arrays = []
-      columns.each do |name, chunked_array|
+      columns.each do |name, data|
+        case data
+        when Arrow::Array
+          chunked_array = Arrow::ChunkedArray.new([data])
+        when Array
+          chunked_array = Arrow::ChunkedArray.new(data)
+        else
+          chunked_array = data
+        end
         fields << Arrow::Field.new(name, chunked_array.value_data_type)
         chunked_arrays << chunked_array
       end
@@ -217,6 +230,15 @@ def build_record_batch(columns)
       Arrow::RecordBatch.new(schema, n_rows, columns.values)
     end
 
+    def build_file_uri(path)
+      absolute_path = File.expand_path(path)
+      if absolute_path.start_with?("/")
+        "file://#{absolute_path}"
+      else
+        "file:///#{absolute_path}"
+      end
+    end
+
     private
     def build_array(builder, values)
       values.each do |value|
diff --git a/c_glib/test/helper/data-type.rb b/c_glib/test/helper/data-type.rb
index b8224409873..bbe6866f5b9 100644
--- a/c_glib/test/helper/data-type.rb
+++ b/c_glib/test/helper/data-type.rb
@@ -52,6 +52,22 @@ def int64_data_type
       Arrow::Int64DataType.new
     end
 
+    def uint8_data_type
+      Arrow::UInt8DataType.new
+    end
+
+    def uint16_data_type
+      Arrow::UInt16DataType.new
+    end
+
+    def uint32_data_type
+      Arrow::UInt32DataType.new
+    end
+
+    def uint64_data_type
+      Arrow::UInt64DataType.new
+    end
+
     def string_data_type
       Arrow::StringDataType.new
     end
diff --git a/c_glib/test/helper/flight-info-generator.rb b/c_glib/test/helper/flight-info-generator.rb
new file mode 100644
index 00000000000..c57530879cb
--- /dev/null
+++ b/c_glib/test/helper/flight-info-generator.rb
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "buildable"
+require_relative "data-type"
+require_relative "writable"
+
+module Helper
+  class FlightInfoGenerator
+    include Buildable
+    include DataType
+    include Writable
+
+    def page_view_table
+      build_table("count" => build_uint64_array([1, 2, 3]),
+                  "private" => build_boolean_array([true, false, true]))
+    end
+
+    def page_view_descriptor
+      ArrowFlight::PathDescriptor.new(["page-view"])
+    end
+
+    def page_view_ticket
+      ArrowFlight::Ticket.new("page-view")
+    end
+
+    def page_view_endpoints
+      locations = [
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10000"),
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10001"),
+      ]
+      [
+        ArrowFlight::Endpoint.new(page_view_ticket, locations),
+      ]
+    end
+
+    def page_view
+      table = page_view_table
+      descriptor = page_view_descriptor
+      endpoints = page_view_endpoints
+      output = Arrow::ResizableBuffer.new(0)
+      write_table(table, output, type: :stream)
+      ArrowFlight::Info.new(table.schema,
+                            descriptor,
+                            endpoints,
+                            table.n_rows,
+                            output.size)
+    end
+  end
+end
diff --git a/c_glib/test/helper/flight-server.rb b/c_glib/test/helper/flight-server.rb
new file mode 100644
index 00000000000..89fd13b4211
--- /dev/null
+++ b/c_glib/test/helper/flight-server.rb
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "flight-info-generator"
+
+module Helper
+  class FlightServer < ArrowFlight::Server
+    type_register
+
+    private
+    def virtual_do_list_flights(context, criteria)
+      generator = FlightInfoGenerator.new
+      [generator.page_view]
+    end
+
+    def virtual_do_do_get(context, ticket)
+      generator = FlightInfoGenerator.new
+      unless ticket == generator.page_view_ticket
+        raise Arrow::Error::Invalid.new("invalid ticket")
+      end
+      table = generator.page_view_table
+      reader = Arrow::TableBatchReader.new(table)
+      ArrowFlight::RecordBatchStream.new(reader)
+    end
+  end
+end
diff --git a/c_glib/test/helper/writable.rb b/c_glib/test/helper/writable.rb
new file mode 100644
index 00000000000..1c8db756c38
--- /dev/null
+++ b/c_glib/test/helper/writable.rb
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Helper
+  module Writable
+    def write_table(table, output, type: :file)
+      if output.is_a?(Arrow::Buffer)
+        output_stream = Arrow::BufferOutputStream.new(output)
+      else
+        output_stream = Arrow::FileOutputStream.new(output, false)
+      end
+      begin
+        if type == :file
+          writer_class = Arrow::RecordBatchFileWriter
+        else
+          writer_class = Arrow::RecordBatchStreamWriter
+        end
+        writer = writer_class.new(output_stream, table.schema)
+        begin
+          writer.write_table(table)
+        ensure
+          writer.close
+        end
+      ensure
+        output_stream.close
+      end
+    end
+  end
+end
diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb
index 7911cf44b6e..abae4e722c5 100755
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/run-test.rb
@@ -39,6 +39,10 @@ def initialize(data)
       @data = data
     end
   end
+
+  class BooleanScalar
+    alias_method :value, :value?
+  end
 end
 
 begin
@@ -51,6 +55,19 @@ def initialize(data)
 rescue GObjectIntrospection::RepositoryError::TypelibNotFound
 end
 
+begin
+  class ArrowFlightLoader < GI::Loader
+    def should_unlock_gvl?(info, klass)
+      true
+    end
+  end
+  flight_module = Module.new
+  ArrowFlightLoader.load("ArrowFlight", flight_module)
+  ArrowFlight = flight_module
+  GObjectIntrospection::Loader.start_callback_dispatch_thread
+rescue GObjectIntrospection::RepositoryError::TypelibNotFound
+end
+
 begin
   Gandiva = GI.load("Gandiva")
 rescue GObjectIntrospection::RepositoryError::TypelibNotFound
@@ -74,7 +91,12 @@ def initialize(data)
 require_relative "helper/buildable"
 require_relative "helper/data-type"
 require_relative "helper/fixture"
+if defined?(ArrowFlight)
+  require_relative "helper/flight-info-generator"
+  require_relative "helper/flight-server"
+end
 require_relative "helper/omittable"
 require_relative "helper/plasma-store"
+require_relative "helper/writable"
 
 exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/c_glib/test/run-test.sh b/c_glib/test/run-test.sh
index 2120aa9f8f0..7e0901df5b5 100755
--- a/c_glib/test/run-test.sh
+++ b/c_glib/test/run-test.sh
@@ -20,7 +20,7 @@
 test_dir="$(cd $(dirname $0); pwd)"
 build_dir="$(cd .; pwd)"
 
-modules="arrow-glib arrow-cuda-glib arrow-dataset-glib gandiva-glib parquet-glib plasma-glib"
+modules="arrow-glib arrow-cuda-glib arrow-dataset-glib arrow-flight-glib gandiva-glib parquet-glib plasma-glib"
 
 for module in ${modules}; do
   module_build_dir="${build_dir}/${module}"
diff --git a/c_glib/test/test-array-datum.rb b/c_glib/test/test-array-datum.rb
index f4bc9be7f14..623e5589ce4 100644
--- a/c_glib/test/test-array-datum.rb
+++ b/c_glib/test/test-array-datum.rb
@@ -35,6 +35,18 @@ def test_array_like?
     end
   end
 
+  def test_scalar?
+    assert do
+      not @datum.scalar?
+    end
+  end
+
+  def test_value?
+    assert do
+      @datum.value?
+    end
+  end
+
   sub_test_case("==") do
     def test_true
       assert_equal(Arrow::ArrayDatum.new(@array),
diff --git a/c_glib/test/test-binary-scalar.rb b/c_glib/test/test-binary-scalar.rb
new file mode 100644
index 00000000000..4efc50da080
--- /dev/null
+++ b/c_glib/test/test-binary-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestBinaryScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("\x03\x01\x02")
+    @scalar = Arrow::BinaryScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::BinaryDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::BinaryScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("\x03\x01\x02", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-boolean-scalar.rb b/c_glib/test/test-boolean-scalar.rb
new file mode 100644
index 00000000000..f8913d6a7e4
--- /dev/null
+++ b/c_glib/test/test-boolean-scalar.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestBooleanScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::BooleanScalar.new(true)
+  end
+
+  def test_parse
+    assert_equal(@scalar,
+                 Arrow::Scalar.parse(Arrow::BooleanDataType.new,
+                                     "true"))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::BooleanDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::BooleanScalar.new(true),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("true", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(true, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-compare.rb b/c_glib/test/test-compare.rb
deleted file mode 100644
index 2ffe39839df..00000000000
--- a/c_glib/test/test-compare.rb
+++ /dev/null
@@ -1,69 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestCompare < Test::Unit::TestCase
-  include Helper::Buildable
-
-  def setup
-    @options = Arrow::CompareOptions.new
-  end
-
-  sub_test_case("CompareOptions") do
-    def test_default_operator
-      assert_equal(Arrow::CompareOperator::EQUAL,
-                   @options.operator)
-    end
-  end
-
-  sub_test_case("operator") do
-    def test_equal
-      @options.operator = :equal
-      assert_equal(build_boolean_array([true, nil, false]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_not_equal
-      @options.operator = :not_equal
-      assert_equal(build_boolean_array([false, nil, true]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_greater
-      @options.operator = :greater
-      assert_equal(build_boolean_array([false, nil, true]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_greater_equal
-      @options.operator = :greater_equal
-      assert_equal(build_boolean_array([true, nil, true]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_less
-      @options.operator = :less
-      assert_equal(build_boolean_array([false, nil, false]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_less_equal
-      @options.operator = :less_equal
-      assert_equal(build_boolean_array([true, nil, false]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-  end
-end
diff --git a/c_glib/test/test-count.rb b/c_glib/test/test-count.rb
index 36390f880aa..39b6f06c4e6 100644
--- a/c_glib/test/test-count.rb
+++ b/c_glib/test/test-count.rb
@@ -19,27 +19,14 @@ class TestCount < Test::Unit::TestCase
   include Helper::Buildable
   include Helper::Omittable
 
-  sub_test_case("CountOptions") do
-    def test_default_mode
-      assert_equal(Arrow::CountMode::ALL,
-                   Arrow::CountOptions.new.mode)
-    end
-  end
-
-  sub_test_case("mode") do
+  sub_test_case("skip_nulls") do
     def test_default
       assert_equal(2, build_int32_array([1, nil, 3]).count)
     end
 
-    def test_all
-      options = Arrow::CountOptions.new
-      options.mode = :all
-      assert_equal(2, build_int32_array([1, nil, 3]).count(options))
-    end
-
-    def test_null
-      options = Arrow::CountOptions.new
-      options.mode = :null
+    def test_false
+      options = Arrow::ScalarAggregateOptions.new
+      options.skip_nulls = false
       assert_equal(1, build_int32_array([1, nil, 3]).count(options))
     end
   end
diff --git a/c_glib/test/test-date32-scalar.rb b/c_glib/test/test-date32-scalar.rb
new file mode 100644
index 00000000000..ae41ebf72f5
--- /dev/null
+++ b/c_glib/test/test-date32-scalar.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDate32Scalar < Test::Unit::TestCase
+  def setup
+    @value = 17406 # 2017-08-28
+    @scalar = Arrow::Date32Scalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Date32DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Date32Scalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("2017-08-28", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-date64-scalar.rb b/c_glib/test/test-date64-scalar.rb
new file mode 100644
index 00000000000..ce39d3c2d74
--- /dev/null
+++ b/c_glib/test/test-date64-scalar.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDate64Scalar < Test::Unit::TestCase
+  def setup
+    @value = 1503878400000 # 2017-08-28T00:00:00Z
+    @scalar = Arrow::Date64Scalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Date64DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Date64Scalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("2017-08-28", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-decimal128-scalar.rb b/c_glib/test/test-decimal128-scalar.rb
new file mode 100644
index 00000000000..380623a6701
--- /dev/null
+++ b/c_glib/test/test-decimal128-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDecimal128Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Decimal128DataType.new(8, 2)
+    @value = Arrow::Decimal128.new("23423445")
+    @scalar = Arrow::Decimal128Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Decimal128Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("234234.45", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-decimal256-scalar.rb b/c_glib/test/test-decimal256-scalar.rb
new file mode 100644
index 00000000000..2c419940df7
--- /dev/null
+++ b/c_glib/test/test-decimal256-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDecimal256Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Decimal256DataType.new(8, 2)
+    @value = Arrow::Decimal256.new("23423445")
+    @scalar = Arrow::Decimal256Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Decimal256Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("234234.45", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-dense-union-scalar.rb b/c_glib/test/test-dense-union-scalar.rb
new file mode 100644
index 00000000000..ec2053b3fe9
--- /dev/null
+++ b/c_glib/test/test-dense-union-scalar.rb
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDenseUnionScalar < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("number", Arrow::Int8DataType.new),
+      Arrow::Field.new("text", Arrow::StringDataType.new),
+    ]
+    @data_type = Arrow::DenseUnionDataType.new(fields, [2, 9])
+    @type_code = 2
+    @value = Arrow::Int8Scalar.new(-29)
+    @scalar = Arrow::DenseUnionScalar.new(@data_type, @type_code, @value)
+  end
+
+  def test_type_code
+    assert_equal(@type_code,
+                 @scalar.type_code)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::DenseUnionScalar.new(@data_type, @type_code, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-double-scalar.rb b/c_glib/test/test-double-scalar.rb
new file mode 100644
index 00000000000..eea673b41e5
--- /dev/null
+++ b/c_glib/test/test-double-scalar.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDoubleScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::DoubleScalar.new(1.1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::DoubleDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    options = Arrow::EqualOptions.new
+    options.approx = true
+    assert do
+      @scalar.equal_options(Arrow::DoubleScalar.new(1.1), options)
+    end
+  end
+
+  def test_to_s
+    assert_equal("1.1", @scalar.to_s)
+  end
+
+  def test_value
+    assert_in_delta(1.1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-equal-options.rb b/c_glib/test/test-equal-options.rb
new file mode 100644
index 00000000000..4ea1979a76b
--- /dev/null
+++ b/c_glib/test/test-equal-options.rb
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestEqualOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  sub_test_case("approx") do
+    def setup
+      @options = Arrow::EqualOptions.new
+    end
+
+    def test_accessor
+      assert do
+        not @options.approx?
+      end
+      @options.approx = true
+      assert do
+        @options.approx?
+      end
+    end
+
+    def test_compare
+      array1 = build_float_array([0.01])
+      array2 = build_float_array([0.010001])
+      @options.approx = true
+      assert do
+        array1.equal_options(array2, @options)
+      end
+    end
+  end
+
+  sub_test_case("nans-equal") do
+    def setup
+      @options = Arrow::EqualOptions.new
+    end
+
+    def test_accessor
+      assert do
+        not @options.nans_equal?
+      end
+      @options.nans_equal = true
+      assert do
+        @options.nans_equal?
+      end
+    end
+
+    def test_compare
+      array1 = build_float_array([0.1, Float::NAN, 0.2])
+      array2 = build_float_array([0.1, Float::NAN, 0.2])
+      @options.nans_equal = true
+      assert do
+        array1.equal_options(array2, @options)
+      end
+    end
+  end
+
+  sub_test_case("absolute-tolerance") do
+    def setup
+      @options = Arrow::EqualOptions.new
+    end
+
+    def test_accessor
+      assert do
+        @options.absolute_tolerance < 0.001
+      end
+      @options.absolute_tolerance = 0.001
+      assert do
+        @options.absolute_tolerance >= 0.001
+      end
+    end
+
+    def test_compare
+      array1 = build_float_array([0.01])
+      array2 = build_float_array([0.0109])
+      @options.approx = true
+      @options.absolute_tolerance = 0.001
+      assert do
+        array1.equal_options(array2, @options)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/test-fixed-size-binary-scalar.rb b/c_glib/test/test-fixed-size-binary-scalar.rb
new file mode 100644
index 00000000000..1a6f0703594
--- /dev/null
+++ b/c_glib/test/test-fixed-size-binary-scalar.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFixedSizeBinaryScalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::FixedSizeBinaryDataType.new(3)
+    @buffer = Arrow::Buffer.new("\x03\x01\x02")
+    @scalar = Arrow::FixedSizeBinaryScalar.new(@data_type, @buffer)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::FixedSizeBinaryScalar.new(@data_type, @buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("\x03\x01\x02", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-float-scalar.rb b/c_glib/test/test-float-scalar.rb
new file mode 100644
index 00000000000..1b830408cbb
--- /dev/null
+++ b/c_glib/test/test-float-scalar.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFloatScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::FloatScalar.new(1.1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::FloatDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    options = Arrow::EqualOptions.new
+    options.approx = true
+    assert do
+      @scalar.equal_options(Arrow::FloatScalar.new(1.1), options)
+    end
+  end
+
+  def test_to_s
+    assert_equal("1.1", @scalar.to_s)
+  end
+
+  def test_value
+    assert_in_delta(1.1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-function.rb b/c_glib/test/test-function.rb
index 8530ea5c153..390bed5cc94 100644
--- a/c_glib/test/test-function.rb
+++ b/c_glib/test/test-function.rb
@@ -50,6 +50,25 @@ def test_chunked_array
                    or_function.execute(args).value)
     end
 
+    def test_input_scalar
+      add_function = Arrow::Function.find("add")
+      args = [
+        Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])),
+        Arrow::ScalarDatum.new(Arrow::Int8Scalar.new(5)),
+      ]
+      assert_equal(build_int8_array([6, 7, 8]),
+                   add_function.execute(args).value)
+    end
+
+    def test_output_scalar
+      sum_function = Arrow::Function.find("sum")
+      args = [
+        Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])),
+      ]
+      assert_equal(Arrow::Int64Scalar.new(6),
+                   sum_function.execute(args).value)
+    end
+
     def test_options
       cast_function = Arrow::Function.find("cast")
       args = [
diff --git a/c_glib/test/test-int16-scalar.rb b/c_glib/test/test-int16-scalar.rb
new file mode 100644
index 00000000000..1a792714079
--- /dev/null
+++ b/c_glib/test/test-int16-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt16Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int16Scalar.new(-(2 ** 15))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int16DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int16Scalar.new(-(2 ** 15)),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal((-(2 ** 15)).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-(2 ** 15), @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-int32-scalar.rb b/c_glib/test/test-int32-scalar.rb
new file mode 100644
index 00000000000..eba554845c7
--- /dev/null
+++ b/c_glib/test/test-int32-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt32Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int32Scalar.new(-(2 ** 31))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int32DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int32Scalar.new(-(2 ** 31)),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal((-(2 ** 31)).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-(2 ** 31), @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-int64-scalar.rb b/c_glib/test/test-int64-scalar.rb
new file mode 100644
index 00000000000..bfa7b4529e8
--- /dev/null
+++ b/c_glib/test/test-int64-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt64Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int64Scalar.new(-(2 ** 63))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int64DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int64Scalar.new(-(2 ** 63)),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal((-(2 ** 63)).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-(2 ** 63), @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-int8-scalar.rb b/c_glib/test/test-int8-scalar.rb
new file mode 100644
index 00000000000..214c5907375
--- /dev/null
+++ b/c_glib/test/test-int8-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt8Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int8Scalar.new(-128)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int8DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int8Scalar.new(-128),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("-128", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-128, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-large-binary-scalar.rb b/c_glib/test/test-large-binary-scalar.rb
new file mode 100644
index 00000000000..a6bc4addb10
--- /dev/null
+++ b/c_glib/test/test-large-binary-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestLargeBinaryScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("\x03\x01\x02")
+    @scalar = Arrow::LargeBinaryScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::LargeBinaryDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::LargeBinaryScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-large-string-scalar.rb b/c_glib/test/test-large-string-scalar.rb
new file mode 100644
index 00000000000..13e28f647ac
--- /dev/null
+++ b/c_glib/test/test-large-string-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestLargeStringScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("Hello")
+    @scalar = Arrow::LargeStringScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::LargeStringDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::LargeStringScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-list-scalar.rb b/c_glib/test/test-list-scalar.rb
new file mode 100644
index 00000000000..3fda3f25bbb
--- /dev/null
+++ b/c_glib/test/test-list-scalar.rb
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestListScalar < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @value = build_list_array(Arrow::Int8DataType.new,
+                              [[1, 2, 3]])
+    @scalar = Arrow::ListScalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(@value.value_data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::ListScalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-map-scalar.rb b/c_glib/test/test-map-scalar.rb
new file mode 100644
index 00000000000..9c6eb69e0a8
--- /dev/null
+++ b/c_glib/test/test-map-scalar.rb
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestMapScalar < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @value = build_struct_array([
+                                  Arrow::Field.new("key",
+                                                   Arrow::StringDataType.new,
+                                                   false),
+                                  Arrow::Field.new("value",
+                                                   Arrow::Int8DataType.new),
+                                ],
+                                [
+                                  {
+                                    "key" => "hello",
+                                    "value" => 1,
+                                  },
+                                  {
+                                    "key" => "world",
+                                    "value" => 2,
+                                  },
+                                ])
+    @scalar = Arrow::MapScalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(@value.value_data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::MapScalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-null-scalar.rb b/c_glib/test/test-null-scalar.rb
new file mode 100644
index 00000000000..07b887040fb
--- /dev/null
+++ b/c_glib/test/test-null-scalar.rb
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestNullScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::NullScalar.new
+  end
+
+  def test_data_type
+    assert_equal(Arrow::NullDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      not @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::NullScalar.new,
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("null", @scalar.to_s)
+  end
+end
diff --git a/c_glib/test/test-orc-file-reader.rb b/c_glib/test/test-orc-file-reader.rb
index cd57cee4de6..38900cf12f3 100644
--- a/c_glib/test/test-orc-file-reader.rb
+++ b/c_glib/test/test-orc-file-reader.rb
@@ -40,7 +40,7 @@ def test_read_type
 string1: string
 middle: struct<list: list<item: struct<int1: int32, string1: string>>>
 list: list<item: struct<int1: int32, string1: string>>
-map: list<item: struct<key: string, value: struct<int1: int32, string1: string>>>
+map: map<string, struct<int1: int32, string1: string>>
     SCHEMA
   end
 
@@ -80,21 +80,6 @@ def build_middle_array(middles)
     build_struct_array(middle_fields, middles)
   end
 
-  def key_value_fields
-    [
-      Arrow::Field.new("key", Arrow::StringDataType.new),
-      Arrow::Field.new("value", item_data_type),
-    ]
-  end
-
-  def key_value_data_type
-    Arrow::StructDataType.new(key_value_fields)
-  end
-
-  def build_key_value_array(key_value_array)
-    build_list_array(key_value_data_type, key_value_array, field_name: "item")
-  end
-
   def middle_array
     build_middle_array([
                          {
@@ -154,26 +139,21 @@ def list_array
   end
 
   def map_array
-    build_key_value_array([
-                            [
-                            ],
-                            [
-                              {
-                                "key" => "chani",
-                                "value" => {
-                                  "int1" => 5,
-                                  "string1" => "chani",
-                                },
-                              },
-                              {
-                                "key" => "mauddib",
-                                "value" => {
-                                  "int1" => 1,
-                                  "string1" => "mauddib",
-                                },
-                              },
-                            ],
-                          ])
+    build_map_array(Arrow::StringDataType.new,
+                    item_data_type,
+                    [
+                      {},
+                      {
+                        "chani" => {
+                          "int1" => 5,
+                          "string1" => "chani",
+                        },
+                        "mauddib" => {
+                          "int1" => 1,
+                          "string1" => "mauddib",
+                        },
+                      },
+                    ])
   end
 
   def all_columns
diff --git a/c_glib/test/test-record-batch-reader.rb b/c_glib/test/test-record-batch-reader.rb
new file mode 100644
index 00000000000..a41da65fd76
--- /dev/null
+++ b/c_glib/test/test-record-batch-reader.rb
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestRecordBatchReader <Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    fields = [
+      Arrow::Field.new("visible", Arrow::BooleanDataType.new),
+      Arrow::Field.new("point", Arrow::Int32DataType.new),
+    ]
+    @schema = Arrow::Schema.new(fields)
+    @record_batches = [
+      [
+        build_boolean_array([true, false, true]),
+        build_int32_array([1, 2, 3]),
+      ],
+      [
+        build_boolean_array([false, true, false, true]),
+        build_int32_array([-1, -2, -3, -4]),
+      ]
+    ].collect do |columns|
+      Arrow::RecordBatch.new(@schema, columns[0].length, columns)
+    end
+    @reader = Arrow::RecordBatchReader.new(@record_batches, @schema)
+  end
+
+  def test_schema
+    assert_equal(@schema, @reader.schema)
+  end
+
+  def test_read_next
+    assert_equal(@record_batches[0], @reader.read_next)
+    assert_equal(@record_batches[1], @reader.read_next)
+    assert_nil(@reader.read_next)
+  end
+
+  def test_read_all
+    assert_equal(Arrow::Table.new(@schema, @record_batches),
+                 @reader.read_all)
+  end
+end
diff --git a/c_glib/test/test-scalar-aggregate-options.rb b/c_glib/test/test-scalar-aggregate-options.rb
new file mode 100644
index 00000000000..a794b53243a
--- /dev/null
+++ b/c_glib/test/test-scalar-aggregate-options.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestScalarAggregateOptions < Test::Unit::TestCase
+  def setup
+    @options = Arrow::ScalarAggregateOptions.new
+  end
+
+  sub_test_case("skip_nulls") do
+    def test_default
+      assert do
+        @options.skip_nulls?
+      end
+    end
+
+    def test_accessor
+      @options.skip_nulls = false
+      assert do
+        not @options.skip_nulls?
+      end
+    end
+  end
+
+  sub_test_case("min_count") do
+    def test_default
+      assert_equal(1, @options.min_count)
+    end
+
+    def test_accessor
+      @options.min_count = 0
+      assert_equal(0, @options.min_count)
+    end
+  end
+end
diff --git a/c_glib/test/test-scalar-datum.rb b/c_glib/test/test-scalar-datum.rb
new file mode 100644
index 00000000000..17e5d6b061c
--- /dev/null
+++ b/c_glib/test/test-scalar-datum.rb
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestScalarDatum < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @scalar = Arrow::BooleanScalar.new(true)
+    @datum = Arrow::ScalarDatum.new(@scalar)
+  end
+
+  def test_array?
+    assert do
+      not @datum.array?
+    end
+  end
+
+  def test_array_like?
+    assert do
+      not @datum.array_like?
+    end
+  end
+
+  def test_scalar?
+    assert do
+      @datum.scalar?
+    end
+  end
+
+  def test_value?
+    assert do
+      @datum.value?
+    end
+  end
+
+  sub_test_case("==") do
+    def test_true
+      assert_equal(Arrow::ScalarDatum.new(@scalar),
+                   Arrow::ScalarDatum.new(@scalar))
+    end
+
+    def test_false
+      assert_not_equal(@datum,
+                       Arrow::ArrayDatum.new(build_boolean_array([true, false])))
+    end
+  end
+
+  def test_to_string
+    assert_equal("Scalar", @datum.to_s)
+  end
+
+  def test_value
+    assert_equal(@scalar, @datum.value)
+  end
+end
diff --git a/c_glib/test/test-sparse-union-scalar.rb b/c_glib/test/test-sparse-union-scalar.rb
new file mode 100644
index 00000000000..acb8531560b
--- /dev/null
+++ b/c_glib/test/test-sparse-union-scalar.rb
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestSparseUnionScalar < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("number", Arrow::Int8DataType.new),
+      Arrow::Field.new("text", Arrow::StringDataType.new),
+    ]
+    @data_type = Arrow::SparseUnionDataType.new(fields, [2, 9])
+    @type_code = 2
+    @value = Arrow::Int8Scalar.new(-29)
+    @scalar = Arrow::SparseUnionScalar.new(@data_type, @type_code, @value)
+  end
+
+  def test_type_code
+    assert_equal(@type_code,
+                 @scalar.type_code)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::SparseUnionScalar.new(@data_type, @type_code, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-string-scalar.rb b/c_glib/test/test-string-scalar.rb
new file mode 100644
index 00000000000..3b9499ef950
--- /dev/null
+++ b/c_glib/test/test-string-scalar.rb
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStringScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("Hello")
+    @scalar = Arrow::StringScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::StringDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::StringScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("Hello", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+
+  def test_cast
+    buffer = Arrow::Buffer.new("-10")
+    scalar = Arrow::StringScalar.new(buffer)
+    assert_equal(Arrow::Int8Scalar.new(-10),
+                 scalar.cast(Arrow::Int8DataType.new))
+  end
+end
diff --git a/c_glib/test/test-struct-scalar.rb b/c_glib/test/test-struct-scalar.rb
new file mode 100644
index 00000000000..9774943ba09
--- /dev/null
+++ b/c_glib/test/test-struct-scalar.rb
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStructScalar < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("score", Arrow::Int8DataType.new),
+      Arrow::Field.new("enabled", Arrow::BooleanDataType.new),
+    ]
+    @data_type = Arrow::StructDataType.new(fields)
+    @value = [
+      Arrow::Int8Scalar.new(-29),
+      Arrow::BooleanScalar.new(true),
+    ]
+    @scalar = Arrow::StructScalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::StructScalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("{score:int8 = -29, enabled:bool = true}", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-time32-scalar.rb b/c_glib/test/test-time32-scalar.rb
new file mode 100644
index 00000000000..94c0a759281
--- /dev/null
+++ b/c_glib/test/test-time32-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTime32Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Time32DataType.new(:second)
+    @value = 60 * 10 # 00:10:00
+    @scalar = Arrow::Time32Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Time32Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("00:10:00", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-time64-scalar.rb b/c_glib/test/test-time64-scalar.rb
new file mode 100644
index 00000000000..fb2843ca61a
--- /dev/null
+++ b/c_glib/test/test-time64-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTime64Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Time64DataType.new(:micro)
+    @value = 60 * 10 * 1000 * 1000 # 00:10:00.000000
+    @scalar = Arrow::Time64Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Time64Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("00:10:00.000000", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-timestamp-scalar.rb b/c_glib/test/test-timestamp-scalar.rb
new file mode 100644
index 00000000000..9aa676b5d5f
--- /dev/null
+++ b/c_glib/test/test-timestamp-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTimestampScalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::TimestampDataType.new(:milli)
+    @value = 1504953190854 # 2017-09-09T10:33:10.854Z
+    @scalar = Arrow::TimestampScalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::TimestampScalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("2017-09-09 10:33:10.854", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint16-scalar.rb b/c_glib/test/test-uint16-scalar.rb
new file mode 100644
index 00000000000..000d620b30d
--- /dev/null
+++ b/c_glib/test/test-uint16-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt16Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt16Scalar.new((2 ** 16) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt16DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt16Scalar.new((2 ** 16) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 16) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 16) - 1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint32-scalar.rb b/c_glib/test/test-uint32-scalar.rb
new file mode 100644
index 00000000000..c41f99330cc
--- /dev/null
+++ b/c_glib/test/test-uint32-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt32Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt32Scalar.new((2 ** 32) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt32DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt32Scalar.new((2 ** 32) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 32) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 32) - 1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint64-scalar.rb b/c_glib/test/test-uint64-scalar.rb
new file mode 100644
index 00000000000..19c12461c7a
--- /dev/null
+++ b/c_glib/test/test-uint64-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt64Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt64Scalar.new((2 ** 64) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt64DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt64Scalar.new((2 ** 64) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 64) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 64) - 1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint8-scalar.rb b/c_glib/test/test-uint8-scalar.rb
new file mode 100644
index 00000000000..54bc1c954a0
--- /dev/null
+++ b/c_glib/test/test-uint8-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt8Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt8Scalar.new((2 ** 8) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt8DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt8Scalar.new((2 ** 8) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 8) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 8) - 1, @scalar.value)
+  end
+end
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 6b930939660..534f73c2d50 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -97,6 +97,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
       -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
       -DARROW_DATASET=ON ^
       -DARROW_ENABLE_TIMING_TESTS=OFF ^
+      -DARROW_ENGINE=ON ^
       -DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^
       -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^
       -DARROW_MIMALLOC=ON ^
diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat
index 616232d202c..4f790f05f00 100644
--- a/ci/appveyor-cpp-setup.bat
+++ b/ci/appveyor-cpp-setup.bat
@@ -50,16 +50,16 @@ set CONDA_PACKAGES=
 
 if "%ARROW_BUILD_GANDIVA%" == "ON" (
   @rem Install llvmdev in the toolchain if building gandiva.dll
-  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.yml
+  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.txt
 )
 if "%JOB%" == "Toolchain" (
   @rem Install pre-built "toolchain" packages for faster builds
-  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.yml
+  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt
 )
 if "%JOB%" NEQ "Build_Debug" (
   @rem Arrow conda environment is only required for the Build and Toolchain jobs
   conda create -n arrow -q -y -c conda-forge ^
-    --file=ci\conda_env_python.yml ^
+    --file=ci\conda_env_python.txt ^
     %CONDA_PACKAGES%  ^
     "cmake=3.17" ^
     "ninja" ^
@@ -86,12 +86,12 @@ if defined need_vcvarsall (
 @rem
 @rem Use clcache for faster builds
 @rem
-pip install -q git+https://github.com/frerich/clcache.git
+pip install -q clcache-alt || exit /B
 @rem Limit cache size to 500 MB
 clcache -M 500000000
 clcache -c
 clcache -s
-powershell.exe -Command "Start-Process clcache-server"
+powershell.exe -Command "Start-Process clcache-server" || exit /B
 
 @rem
 @rem Download Minio somewhere on PATH, for unit tests
diff --git a/ci/conda_env_archery.yml b/ci/conda_env_archery.txt
similarity index 100%
rename from ci/conda_env_archery.yml
rename to ci/conda_env_archery.txt
diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt
new file mode 100644
index 00000000000..2b5fc32d711
--- /dev/null
+++ b/ci/conda_env_cpp.txt
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# workaround for https://issues.apache.org/jira/browse/ARROW-13134
+aws-sdk-cpp<1.9
+benchmark>=1.5.4
+boost-cpp>=1.68.0
+brotli
+bzip2
+c-ares
+cmake
+gflags
+glog
+gmock>=1.10.0
+grpc-cpp>=1.27.3
+gtest=1.10.0
+libprotobuf
+libutf8proc
+lz4-c
+make
+ninja
+pkg-config
+python
+rapidjson
+re2
+snappy
+thrift-cpp>=0.11.0
+zlib
+zstd
diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml
deleted file mode 100644
index 390eb7dcdd5..00000000000
--- a/ci/conda_env_cpp.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-aws-sdk-cpp
-benchmark=1.5.2
-boost-cpp>=1.68.0
-brotli
-bzip2
-c-ares
-cmake
-gflags
-glog
-gmock>=1.10.0
-grpc-cpp>=1.27.3
-gtest=1.10.0
-libprotobuf
-libutf8proc
-lz4-c
-make
-ninja
-pkg-config
-python
-rapidjson
-re2
-snappy
-thrift-cpp>=0.11.0
-zlib
-zstd
diff --git a/ci/conda_env_gandiva.yml b/ci/conda_env_gandiva.txt
similarity index 100%
rename from ci/conda_env_gandiva.yml
rename to ci/conda_env_gandiva.txt
diff --git a/ci/conda_env_gandiva_win.yml b/ci/conda_env_gandiva_win.txt
similarity index 100%
rename from ci/conda_env_gandiva_win.yml
rename to ci/conda_env_gandiva_win.txt
diff --git a/ci/conda_env_python.yml b/ci/conda_env_python.txt
similarity index 100%
rename from ci/conda_env_python.yml
rename to ci/conda_env_python.txt
diff --git a/ci/conda_env_r.yml b/ci/conda_env_r.txt
similarity index 100%
rename from ci/conda_env_r.yml
rename to ci/conda_env_r.txt
diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
new file mode 100644
index 00000000000..49388e2b437
--- /dev/null
+++ b/ci/conda_env_sphinx.txt
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Requirements for building the documentation
+breathe
+doxygen
+ipython
+# Pinned per ARROW-9693
+sphinx=3.1.2
+pydata-sphinx-theme
diff --git a/ci/conda_env_sphinx.yml b/ci/conda_env_sphinx.yml
deleted file mode 100644
index 8654d231065..00000000000
--- a/ci/conda_env_sphinx.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Requirements for building the documentation
-breathe
-doxygen
-ipython
-# Pinned per ARROW-9693
-sphinx=3.1.2
-sphinx_rtd_theme
diff --git a/ci/conda_env_unix.yml b/ci/conda_env_unix.txt
similarity index 100%
rename from ci/conda_env_unix.yml
rename to ci/conda_env_unix.txt
diff --git a/ci/detect-changes.py b/ci/detect-changes.py
index c32f6e040dd..14e71ed48ce 100644
--- a/ci/detect-changes.py
+++ b/ci/detect-changes.py
@@ -140,7 +140,7 @@ def list_github_actions_affected_files():
 
 
 LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python',
-                   'r', 'ruby', 'rust', 'csharp']
+                   'r', 'ruby', 'csharp']
 
 ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev']
 
@@ -161,7 +161,7 @@ def list_github_actions_affected_files():
 }
 
 COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js',
-              'rust', 'csharp', 'go', 'docs', 'python', 'dev'}
+              'csharp', 'go', 'docs', 'python', 'dev'}
 
 
 def get_affected_topics(affected_files):
@@ -298,7 +298,6 @@ def test_get_affected_topics():
         'python': True,
         'r': True,
         'ruby': True,
-        'rust': False,
         'csharp': False,
         'integration': True,
         'dev': False
@@ -315,7 +314,6 @@ def test_get_affected_topics():
         'python': True,
         'r': True,
         'ruby': True,
-        'rust': True,
         'csharp': True,
         'integration': True,
         'dev': False
@@ -332,7 +330,6 @@ def test_get_affected_topics():
         'python': True,
         'r': True,
         'ruby': True,
-        'rust': True,
         'csharp': True,
         'integration': True,
         'dev': True,
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index 1a5b87ef729..ff31930c06c 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -20,15 +20,14 @@ ARG arch
 FROM ${repo}:${arch}-conda
 
 # install the required conda packages into the test environment
-COPY ci/conda_env_cpp.yml \
-     ci/conda_env_gandiva.yml \
+COPY ci/conda_env_cpp.txt \
+     ci/conda_env_gandiva.txt \
      /arrow/ci/
 RUN conda install \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_gandiva.yml \
+        --file arrow/ci/conda_env_cpp.txt \
+        --file arrow/ci/conda_env_gandiva.txt \
         compilers \
         doxygen \
-        gdb \
         valgrind && \
     conda clean --all
 
diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile
index 1f2c9ac5da2..8a7dd48b947 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -26,10 +26,9 @@ ARG jdk=8
 ARG go=1.15
 
 # Install Archery and integration dependencies
-COPY ci/conda_env_archery.yml /arrow/ci/
+COPY ci/conda_env_archery.txt /arrow/ci/
 RUN conda install -q \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_archery.yml \
+        --file arrow/ci/conda_env_archery.txt \
         numpy \
         compilers \
         maven=${maven} \
diff --git a/ci/docker/conda-python-kartothek.dockerfile b/ci/docker/conda-python-kartothek.dockerfile
index b1c1ed860a9..d523161822c 100644
--- a/ci/docker/conda-python-kartothek.dockerfile
+++ b/ci/docker/conda-python-kartothek.dockerfile
@@ -38,9 +38,7 @@ RUN conda install -c conda-forge -q \
         storefact \
         toolz \
         urlquote \
-        zstandard \
-        # temporary pin for numpy (see https://issues.apache.org/jira/browse/ARROW-11472)
-        numpy=1.19 && \
+        zstandard && \
     conda clean --all
 
 ARG kartothek=latest
diff --git a/ci/docker/conda-python-turbodbc.dockerfile b/ci/docker/conda-python-turbodbc.dockerfile
index ff7fdf6e1d0..e748604dee3 100644
--- a/ci/docker/conda-python-turbodbc.dockerfile
+++ b/ci/docker/conda-python-turbodbc.dockerfile
@@ -30,7 +30,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     rm -rf /var/lib/apt/lists/*
 
 # install turbodbc dependencies from conda-forge
-RUN conda install -c conda-forge -q\
+RUN conda install -c conda-forge -q \
         pybind11 \
         pytest-cov \
         mock \
diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index a7e76974825..ab3f77be1b6 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -21,9 +21,9 @@ FROM ${repo}:${arch}-conda-cpp
 
 # install python specific packages
 ARG python=3.6
-COPY ci/conda_env_python.yml /arrow/ci/
+COPY ci/conda_env_python.txt /arrow/ci/
 RUN conda install -q \
-        --file arrow/ci/conda_env_python.yml \
+        --file arrow/ci/conda_env_python.txt \
         $([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \
         python=${python} \
         nomkl && \
diff --git a/ci/docker/conda.dockerfile b/ci/docker/conda.dockerfile
index 94de009904a..2e773b5437e 100644
--- a/ci/docker/conda.dockerfile
+++ b/ci/docker/conda.dockerfile
@@ -25,7 +25,7 @@ ARG prefix=/opt/conda
 # install build essentials
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update -y -q && \
-    apt-get install -y -q wget tzdata libc6-dbg \
+    apt-get install -y -q wget tzdata libc6-dbg gdb \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
@@ -38,8 +38,8 @@ RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix}
 RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix}
 
 # create a conda environment
-ADD ci/conda_env_unix.yml /arrow/ci/
-RUN conda create -n arrow --file arrow/ci/conda_env_unix.yml git && \
+ADD ci/conda_env_unix.txt /arrow/ci/
+RUN conda create -n arrow --file arrow/ci/conda_env_unix.txt git && \
     conda clean --all
 
 # activate the created environment by default
diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile
index 83f8ce529cb..d99a2c161bd 100644
--- a/ci/docker/debian-10-cpp.dockerfile
+++ b/ci/docker/debian-10-cpp.dockerfile
@@ -60,7 +60,6 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libthrift-dev \
         libutf8proc-dev \
-        libzstd-dev \
         llvm-${llvm}-dev \
         make \
         ninja-build \
@@ -77,8 +76,8 @@ COPY ci/scripts/install_minio.sh \
 RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
 
 ENV ARROW_BUILD_TESTS=ON \
-    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_DATASET=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_FLIGHT=ON \
     ARROW_GANDIVA=ON \
     ARROW_HOME=/usr/local \
@@ -101,4 +100,5 @@ ENV ARROW_BUILD_TESTS=ON \
     GTest_SOURCE=BUNDLED \
     ORC_SOURCE=BUNDLED \
     PATH=/usr/lib/ccache/:$PATH \
-    Protobuf_SOURCE=BUNDLED
+    Protobuf_SOURCE=BUNDLED \
+    zstd_SOURCE=BUNDLED
diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile
new file mode 100644
index 00000000000..021dab686f3
--- /dev/null
+++ b/ci/docker/java-jni-manylinux-201x.dockerfile
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+# Install the libaries required by the Gandiva to run
+RUN vcpkg install --clean-after-build \
+        llvm \
+        boost-system \
+        boost-date-time \
+        boost-regex \
+        boost-predef \
+        boost-algorithm \
+        boost-locale \
+        boost-format \
+        boost-variant \
+        boost-multiprecision
+
+# Install Java
+ARG java=1.8.0
+RUN yum install -y java-$java-openjdk-devel && yum clean all
+ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 604a05afb07..8f124a77658 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -18,9 +18,10 @@
 ARG base
 FROM ${base}
 
-ARG r=3.6
+ARG r=4.1
 ARG jdk=8
 
+# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/
 RUN apt-get update -y && \
     apt-get install -y \
         dirmngr \
@@ -29,8 +30,8 @@ RUN apt-get update -y && \
     apt-key adv \
         --keyserver keyserver.ubuntu.com \
         --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
-    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran35/' && \
-    apt-get install -y \
+    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \
+    apt-get install -y --no-install-recommends \
         autoconf-archive \
         automake \
         curl \
@@ -43,12 +44,14 @@ RUN apt-get update -y && \
         libgirepository1.0-dev \
         libglib2.0-doc \
         libharfbuzz-dev \
+        libtiff-dev \
         libtool \
         libxml2-dev \
         ninja-build \
         nvidia-cuda-toolkit \
         openjdk-${jdk}-jdk-headless \
         pandoc \
+        r-recommended=${r}* \
         r-base=${r}* \
         rsync \
         ruby-dev \
@@ -72,13 +75,14 @@ RUN wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \
     rm -rf /var/lib/apt/lists/* && \
     npm install -g yarn
 
-# Sphinx is pinned because of ARROW-9693
+# ARROW-13353: breathe >= 4.29.1 tries to parse template arguments,
+# but Sphinx can't parse constructs like `typename...`.
 RUN pip install \
         meson \
-        breathe \
+        breathe==4.29.0 \
         ipython \
         sphinx \
-        sphinx_rtd_theme
+        pydata-sphinx-theme
 
 COPY c_glib/Gemfile /arrow/c_glib/
 RUN gem install --no-document bundler && \
@@ -96,8 +100,11 @@ COPY r/DESCRIPTION /arrow/r/
 RUN /arrow/ci/scripts/r_deps.sh /arrow && \
     R -e "install.packages('pkgdown')"
 
-ENV ARROW_PYTHON=ON \
+ENV ARROW_FLIGHT=ON \
+    ARROW_PYTHON=ON \
+    ARROW_S3=ON \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
     ARROW_BUILD_UTILITIES=OFF \
     ARROW_USE_GLOG=OFF \
+    CMAKE_UNITY_BUILD=ON \
diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile
index 4be9fcc7f62..04646585322 100644
--- a/ci/docker/linux-apt-lint.dockerfile
+++ b/ci/docker/linux-apt-lint.dockerfile
@@ -35,9 +35,48 @@ RUN apt-get update && \
         python3-dev \
         python3-pip \
         ruby \
+        apt-transport-https \
+        software-properties-common \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
+ARG r=4.1
+RUN apt-key adv \
+        --keyserver keyserver.ubuntu.com \
+        --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
+    # NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix
+    # for trusty, xenial, bionic, and eoan (as of May 2020)
+    # -cran40 has 4.0 versions for bionic and focal
+    # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial
+    # TODO: make sure OS version and R version are valid together and conditionally set repo suffix
+    # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40:
+    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \
+    apt-get install -y \
+        r-base=${r}* \
+        r-recommended=${r}* \
+        libxml2-dev
+
+# Ensure parallel R package installation, set CRAN repo mirror,
+# and use pre-built binaries where possible
+COPY ci/etc/rprofile /arrow/ci/etc/
+RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+# Also ensure parallel compilation of C/C++ code
+RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf
+
+
+COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
+COPY r/DESCRIPTION /arrow/r/
+# We need to install Arrow's dependencies in order for lintr's namespace searching to work.
+# This could be removed if lintr no longer loads the dependency namespaces (see issues/PRs below)
+RUN /arrow/ci/scripts/r_deps.sh /arrow
+# This fork has a number of changes that have PRs and Issues to resolve upstream:
+#   https://github.com/jimhester/lintr/pull/843
+#   https://github.com/jimhester/lintr/pull/841
+#   https://github.com/jimhester/lintr/pull/845
+#   https://github.com/jimhester/lintr/issues/842
+#   https://github.com/jimhester/lintr/issues/846
+RUN R -e "remotes::install_github('jonkeane/lintr@arrow-branch')"
+
 # Docker linter
 COPY --from=hadolint /bin/hadolint /usr/bin/hadolint
 
@@ -45,25 +84,12 @@ COPY --from=hadolint /bin/hadolint /usr/bin/hadolint
 COPY ci/scripts/install_iwyu.sh /arrow/ci/scripts/
 RUN arrow/ci/scripts/install_iwyu.sh /tmp/iwyu /usr/local ${clang_tools}
 
-# Rust linter
-ARG rust=nightly-2019-09-25
-RUN curl https://sh.rustup.rs -sSf | \
-    sh -s -- --default-toolchain stable -y
-ENV PATH /root/.cargo/bin:$PATH
-RUN rustup install ${rust} && \
-    rustup default ${rust} && \
-    rustup component add rustfmt
-
 # Use python3 by default in scripts
 RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
     ln -s /usr/bin/pip3 /usr/local/bin/pip
 
-COPY dev/archery/requirements.txt \
-     dev/archery/requirements-lint.txt \
-     /arrow/dev/archery/
-RUN pip install \
-      -r arrow/dev/archery/requirements.txt \
-      -r arrow/dev/archery/requirements-lint.txt
+COPY dev/archery/setup.py /arrow/dev/archery/
+RUN pip install -e arrow/dev/archery[lint]
 
 ENV LC_ALL=C.UTF-8 \
     LANG=C.UTF-8
diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile
index f47044e334b..97029ce62ad 100644
--- a/ci/docker/linux-apt-r.dockerfile
+++ b/ci/docker/linux-apt-r.dockerfile
@@ -19,6 +19,9 @@ ARG base
 FROM ${base}
 ARG arch
 
+ARG tz="UTC"
+ENV TZ=${tz}
+
 # Build R
 # [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04
 # [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran
@@ -36,10 +39,11 @@ RUN apt-get update -y && \
     # -cran40 has 4.0 versions for bionic and focal
     # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial
     # TODO: make sure OS version and R version are valid together and conditionally set repo suffix
-    # This is a hack to turn 3.6 into 35 and 4.0 into 40:
-    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5)'/' && \
+    # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40:
+    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \
     apt-get install -y \
         r-base=${r}* \
+        r-recommended=${r}* \
         # system libs needed by core R packages
         libxml2-dev \
         libgit2-dev \
@@ -60,6 +64,16 @@ RUN apt-get update -y && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+ARG gcc_version=""
+RUN if [ "${gcc_version}" != "" ]; then \
+      update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30 && \
+      update-alternatives --set cc /usr/bin/gcc && \
+      update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30 && \
+      update-alternatives --set c++ /usr/bin/g++; \
+    fi
+
 # Ensure parallel R package installation, set CRAN repo mirror,
 # and use pre-built binaries where possible
 COPY ci/etc/rprofile /arrow/ci/etc/
diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile
index ac414829d42..a501d69955c 100644
--- a/ci/docker/linux-r.dockerfile
+++ b/ci/docker/linux-r.dockerfile
@@ -30,6 +30,9 @@ ENV ARROW_R_DEV=${r_dev}
 ARG devtoolset_version=-1
 ENV DEVTOOLSET_VERSION=${devtoolset_version}
 
+ARG tz="UTC"
+ENV TZ=${tz}
+
 # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile)
 ENV PATH "${RPREFIX}/bin:${PATH}"
 
diff --git a/ci/docker/python-wheel-manylinux-201x.dockerfile b/ci/docker/python-wheel-manylinux-201x.dockerfile
index 19246a46764..ae1b0a7767c 100644
--- a/ci/docker/python-wheel-manylinux-201x.dockerfile
+++ b/ci/docker/python-wheel-manylinux-201x.dockerfile
@@ -58,7 +58,9 @@ RUN git clone https://github.com/microsoft/vcpkg /opt/vcpkg && \
     ln -s /opt/vcpkg/vcpkg /usr/bin/vcpkg
 
 # Patch ports files as needed
-COPY ci/vcpkg arrow/ci/vcpkg
+COPY ci/vcpkg/*.patch \
+     ci/vcpkg/*linux*.cmake \
+     arrow/ci/vcpkg/
 RUN cd /opt/vcpkg && git apply --ignore-whitespace /arrow/ci/vcpkg/ports.patch
 
 ARG build_type=release
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile
index 0f66a20396e..ebf51d75d29 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2017.dockerfile
@@ -27,14 +27,19 @@ RUN choco install --no-progress -r -y cmake --installargs 'ADD_CMAKE_TO_PATH=Sys
 RUN setx path "%path%;C:\Program Files\Git\usr\bin"
 
 # Install vcpkg
+#
+# Compiling vcpkg itself from a git tag doesn't work anymore since vcpkg has
+# started to ship precompiled binaries for the vcpkg-tool.
 ARG vcpkg
 RUN git clone https://github.com/Microsoft/vcpkg && \
-    git -C vcpkg checkout %vcpkg% && \
-    vcpkg\bootstrap-vcpkg.bat -disableMetrics -win64 && \
-    setx PATH "%PATH%;C:\vcpkg"
+    vcpkg\bootstrap-vcpkg.bat -disableMetrics && \
+    setx PATH "%PATH%;C:\vcpkg" && \
+    git -C vcpkg checkout %vcpkg%
 
 # Patch ports files as needed
-COPY ci/vcpkg arrow/ci/vcpkg
+COPY ci/vcpkg/*.patch \
+     ci/vcpkg/*windows*.cmake \
+     arrow/ci/vcpkg/
 RUN cd vcpkg && git apply --ignore-whitespace C:/arrow/ci/vcpkg/ports.patch
 
 # Configure vcpkg and install dependencies
@@ -42,12 +47,12 @@ RUN cd vcpkg && git apply --ignore-whitespace C:/arrow/ci/vcpkg/ports.patch
 # statements but bash notation in ENV statements
 # VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system
 # cmake's and ninja's versions are recent enough
-COPY ci/vcpkg arrow/ci/vcpkg
 ARG build_type=release
 ENV CMAKE_BUILD_TYPE=${build_type} \
     VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \
     VCPKG_DEFAULT_TRIPLET=x64-windows-static-md-${build_type} \
     VCPKG_FEATURE_FLAGS=-manifests
+
 RUN vcpkg install --clean-after-build \
         abseil \
         aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile
index 4b855b52610..0c05ac4ee6b 100644
--- a/ci/docker/ubuntu-18.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-18.04-cpp.dockerfile
@@ -69,6 +69,7 @@ RUN apt-get update -y -q && \
         libboost-system-dev \
         libbrotli-dev \
         libbz2-dev \
+        libc-ares-dev \
         libcurl4-openssl-dev \
         libgflags-dev \
         libgoogle-glog-dev \
@@ -78,8 +79,6 @@ RUN apt-get update -y -q && \
         libre2-dev \
         libsnappy-dev \
         libssl-dev \
-        libutf8proc-dev \
-        libzstd-dev \
         ninja-build \
         pkg-config \
         protobuf-compiler \
@@ -96,10 +95,11 @@ RUN apt-get update -y -q && \
 # - libgtest-dev only provide sources
 # - libprotobuf-dev only provide sources
 # - thrift is too old
+# - utf8proc is too old(v2.1.0)
 # - s3 tests would require boost-asio that is included since Boost 1.66.0
 ENV ARROW_BUILD_TESTS=ON \
-    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_DATASET=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_FLIGHT=OFF \
     ARROW_GANDIVA=ON \
     ARROW_HDFS=ON \
@@ -122,7 +122,9 @@ ENV ARROW_BUILD_TESTS=ON \
     AWSSDK_SOURCE=BUNDLED \
     GTest_SOURCE=BUNDLED \
     ORC_SOURCE=BUNDLED \
-    PARQUET_BUILD_EXECUTABLES=ON \
     PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
     PATH=/usr/lib/ccache/:$PATH \
-    Thrift_SOURCE=BUNDLED
+    Thrift_SOURCE=BUNDLED \
+    utf8proc_SOURCE=BUNDLED \
+    zstd_SOURCE=BUNDLED
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 3a37ace1381..c2a468d9e35 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -71,8 +71,9 @@ RUN apt-get update -y -q && \
         libboost-system-dev \
         libbrotli-dev \
         libbz2-dev \
-        libgflags-dev \
+        libc-ares-dev \
         libcurl4-openssl-dev \
+        libgflags-dev \
         libgoogle-glog-dev \
         liblz4-dev \
         libprotobuf-dev \
@@ -126,10 +127,13 @@ ENV ARROW_BUILD_TESTS=ON \
     ARROW_WITH_SNAPPY=ON \
     ARROW_WITH_ZLIB=ON \
     ARROW_WITH_ZSTD=ON \
+    ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \
     AWSSDK_SOURCE=BUNDLED \
     GTest_SOURCE=BUNDLED \
+    gRPC_SOURCE=BUNDLED \
     ORC_SOURCE=BUNDLED \
     PARQUET_BUILD_EXAMPLES=ON \
     PARQUET_BUILD_EXECUTABLES=ON \
     PATH=/usr/lib/ccache/:$PATH \
+    Protobuf_SOURCE=BUNDLED \
     PYTHON=python3
diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile
new file mode 100644
index 00000000000..6cefecfd678
--- /dev/null
+++ b/ci/docker/ubuntu-20.10-cpp.dockerfile
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.10
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+        debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN if [ "${llvm}" -gt "10" ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          apt-transport-https \
+          ca-certificates \
+          gnupg \
+          wget && \
+      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+      echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${llvm} main" > \
+         /etc/apt/sources.list.d/llvm.list && \
+      if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \
+        echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${clang_tools} main" > \
+           /etc/apt/sources.list.d/clang-tools.list; \
+      fi \
+    fi && \
+    apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        clang-${clang_tools} \
+        clang-${llvm} \
+        clang-format-${clang_tools} \
+        clang-tidy-${clang_tools} \
+        llvm-${llvm}-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        autoconf \
+        ca-certificates \
+        ccache \
+        cmake \
+        g++ \
+        gcc \
+        gdb \
+        git \
+        libbenchmark-dev \
+        libboost-filesystem-dev \
+        libboost-system-dev \
+        libbrotli-dev \
+        libbz2-dev \
+        libc-ares-dev \
+        libcurl4-openssl-dev \
+        libgflags-dev \
+        libgoogle-glog-dev \
+        libgrpc++-dev \
+        liblz4-dev \
+        libprotobuf-dev \
+        libprotoc-dev \
+        libre2-dev \
+        libsnappy-dev \
+        libssl-dev \
+        libthrift-dev \
+        libutf8proc-dev \
+        libzstd-dev \
+        make \
+        ninja-build \
+        pkg-config \
+        protobuf-compiler \
+        protobuf-compiler-grpc \
+        rapidjson-dev \
+        tzdata \
+        wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh \
+     /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+ENV ARROW_BUILD_TESTS=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
+    ARROW_DATASET=ON \
+    ARROW_FLIGHT=OFF \
+    ARROW_GANDIVA=ON \
+    ARROW_HDFS=ON \
+    ARROW_HOME=/usr/local \
+    ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_NO_DEPRECATED_API=ON \
+    ARROW_ORC=ON \
+    ARROW_PARQUET=ON \
+    ARROW_PLASMA=ON \
+    ARROW_S3=ON \
+    ARROW_USE_ASAN=OFF \
+    ARROW_USE_CCACHE=ON \
+    ARROW_USE_UBSAN=OFF \
+    ARROW_WITH_BROTLI=ON \
+    ARROW_WITH_BZ2=ON \
+    ARROW_WITH_LZ4=ON \
+    ARROW_WITH_SNAPPY=ON \
+    ARROW_WITH_ZLIB=ON \
+    ARROW_WITH_ZSTD=ON \
+    AWSSDK_SOURCE=BUNDLED \
+    GTest_SOURCE=BUNDLED \
+    ORC_SOURCE=BUNDLED \
+    PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
+    PATH=/usr/lib/ccache/:$PATH \
+    PYTHON=python3
diff --git a/ci/docker/ubuntu-21.04-cpp.dockerfile b/ci/docker/ubuntu-21.04-cpp.dockerfile
new file mode 100644
index 00000000000..18c377811bc
--- /dev/null
+++ b/ci/docker/ubuntu-21.04-cpp.dockerfile
@@ -0,0 +1,160 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.04
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+        debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN if [ "${llvm}" -gt "10" ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          apt-transport-https \
+          ca-certificates \
+          gnupg \
+          wget && \
+      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+      echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${llvm} main" > \
+         /etc/apt/sources.list.d/llvm.list && \
+      if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \
+        echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${clang_tools} main" > \
+           /etc/apt/sources.list.d/clang-tools.list; \
+      fi \
+    fi && \
+    apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        clang-${clang_tools} \
+        clang-${llvm} \
+        clang-format-${clang_tools} \
+        clang-tidy-${clang_tools} \
+        llvm-${llvm}-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        autoconf \
+        ca-certificates \
+        ccache \
+        cmake \
+        gdb \
+        git \
+        libbenchmark-dev \
+        libboost-filesystem-dev \
+        libboost-system-dev \
+        libbrotli-dev \
+        libbz2-dev \
+        libc-ares-dev \
+        libcurl4-openssl-dev \
+        libgflags-dev \
+        libgoogle-glog-dev \
+        libgrpc++-dev \
+        liblz4-dev \
+        libprotobuf-dev \
+        libprotoc-dev \
+        libre2-dev \
+        libsnappy-dev \
+        libssl-dev \
+        libthrift-dev \
+        libutf8proc-dev \
+        libzstd-dev \
+        make \
+        ninja-build \
+        pkg-config \
+        protobuf-compiler \
+        protobuf-compiler-grpc \
+        rapidjson-dev \
+        tzdata \
+        wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh \
+     /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+ENV ARROW_BUILD_TESTS=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
+    ARROW_DATASET=ON \
+    ARROW_FLIGHT=OFF \
+    ARROW_GANDIVA=ON \
+    ARROW_HDFS=ON \
+    ARROW_HOME=/usr/local \
+    ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_NO_DEPRECATED_API=ON \
+    ARROW_ORC=ON \
+    ARROW_PARQUET=ON \
+    ARROW_PLASMA=ON \
+    ARROW_S3=ON \
+    ARROW_USE_ASAN=OFF \
+    ARROW_USE_CCACHE=ON \
+    ARROW_USE_UBSAN=OFF \
+    ARROW_WITH_BROTLI=ON \
+    ARROW_WITH_BZ2=ON \
+    ARROW_WITH_LZ4=ON \
+    ARROW_WITH_SNAPPY=ON \
+    ARROW_WITH_ZLIB=ON \
+    ARROW_WITH_ZSTD=ON \
+    AWSSDK_SOURCE=BUNDLED \
+    GTest_SOURCE=BUNDLED \
+    ORC_SOURCE=BUNDLED \
+    PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
+    PATH=/usr/lib/ccache/:$PATH \
+    PYTHON=python3
+
+ARG gcc_version=""
+RUN if [ "${gcc_version}" = "" ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          g++ \
+          gcc; \
+    else \
+      if [ "${gcc_version}" -gt "10" ]; then \
+          apt-get update -y -q && \
+          apt-get install -y -q --no-install-recommends software-properties-common && \
+          add-apt-repository ppa:ubuntu-toolchain-r/volatile; \
+      fi; \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          g++-${gcc_version} \
+          gcc-${gcc_version} && \
+      update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
+      update-alternatives --set cc /usr/bin/gcc && \
+      update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \
+      update-alternatives --set c++ /usr/bin/g++; \
+    fi
\ No newline at end of file
diff --git a/ci/etc/rprofile b/ci/etc/rprofile
index 524eb50aee0..229a0101a25 100644
--- a/ci/etc/rprofile
+++ b/ci/etc/rprofile
@@ -1,51 +1,53 @@
-.pick_cran <- function() {
-  # Return a CRAN repo URL, preferring RSPM binaries if available for this OS
-  rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest"
-  supported_os <- c("xenial", "bionic", "centos7", "opensuse42", "opensuse15")
-
-  if (nzchar(Sys.which("lsb_release"))) {
-    os <- tolower(system("lsb_release -cs", intern = TRUE))
-    if (os %in% supported_os) {
-      return(sprintf(rspm_template, os))
+ local({
+  .pick_cran <- function() {
+    # Return a CRAN repo URL, preferring RSPM binaries if available for this OS
+    rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest"
+    supported_os <- c("focal", "xenial", "bionic", "centos7", "centos8", "opensuse42", "opensuse15", "opensuse152")
+  
+    if (nzchar(Sys.which("lsb_release"))) {
+      os <- tolower(system("lsb_release -cs", intern = TRUE))
+      if (os %in% supported_os) {
+        return(sprintf(rspm_template, os))
+      }
     }
-  }
-  if (file.exists("/etc/os-release")) {
-    os_release <- readLines("/etc/os-release")
-    vals <- sub("^.*=(.*)$", "\\1", os_release)
-    os <- intersect(vals, supported_os)
-    if (length(os)) {
-      # e.g. "bionic"
-      return(sprintf(rspm_template, os))
-    } else {
-      names(vals) <- sub("^(.*)=.*$", "\\1", os_release)
-      if (vals["ID"] == "opensuse") {
-        version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"])
-        os <- paste0("opensuse", version)
-        if (os %in% supported_os) {
-          return(sprintf(rspm_template, os))
+    if (file.exists("/etc/os-release")) {
+      os_release <- readLines("/etc/os-release")
+      vals <- sub("^.*=(.*)$", "\\1", os_release)
+      os <- intersect(vals, supported_os)
+      if (length(os)) {
+        # e.g. "bionic"
+        return(sprintf(rspm_template, os))
+      } else {
+        names(vals) <- sub("^(.*)=.*$", "\\1", os_release)
+        if (vals["ID"] == "opensuse") {
+          version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"])
+          os <- paste0("opensuse", version)
+          if (os %in% supported_os) {
+            return(sprintf(rspm_template, os))
+          }
         }
       }
     }
-  }
-  if (file.exists("/etc/system-release")) {
-    # Something like "CentOS Linux release 7.7.1908 (Core)"
-    system_release <- tolower(utils::head(readLines("/etc/system-release"), 1))
-    # Extract from that the distro and the major version number
-    os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release)
-    if (os %in% supported_os) {
-      return(sprintf(rspm_template, os))
+    if (file.exists("/etc/system-release")) {
+      # Something like "CentOS Linux release 7.7.1908 (Core)"
+      system_release <- tolower(utils::head(readLines("/etc/system-release"), 1))
+      # Extract from that the distro and the major version number
+      os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release)
+      if (os %in% supported_os) {
+        return(sprintf(rspm_template, os))
+      }
     }
+  
+    return("https://cloud.r-project.org")
   }
-
-  return("https://cloud.r-project.org")
-}
-
-options(
-  Ncpus = parallel::detectCores(),
-  repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"),
-  HTTPUserAgent = sprintf(
-    'R/%s R (%s)',
-    getRversion(),
-    paste(getRversion(), R.version$platform, R.version$arch, R.version$os)
+  
+  options(
+    Ncpus = parallel::detectCores(),
+    repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"),
+    HTTPUserAgent = sprintf(
+      'R/%s R (%s)',
+      getRversion(),
+      paste(getRversion(), R.version$platform, R.version$arch, R.version$os)
+    )
   )
-)
+})
diff --git a/ci/etc/valgrind-cran.supp b/ci/etc/valgrind-cran.supp
new file mode 100644
index 00000000000..4d292202608
--- /dev/null
+++ b/ci/etc/valgrind-cran.supp
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{
+   # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. 
+   <testthat_skip_error>
+   Memcheck:Cond
+   fun:gregexpr_Regexc
+   fun:do_regexpr
+   fun:bcEval
+   fun:Rf_eval
+   fun:R_execClosure
+   fun:Rf_applyClosure
+   fun:bcEval
+   fun:Rf_eval
+   fun:forcePromise
+   fun:FORCE_PROMISE
+   fun:getvar
+   fun:bcEval
+}
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index c5b55eef42a..56d70d83daf 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=3.0.0.9000
+pkgver=5.0.0.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 8a1e4f32f3a..46845d0e623 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -41,6 +41,11 @@ if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
     ccache -s
 fi
 
+if [ "${ARROW_USE_TSAN}" == "ON" ] && [ ! -x "${ASAN_SYMBOLIZER_PATH}" ]; then
+    echo -e "Invalid value for \$ASAN_SYMBOLIZER_PATH: ${ASAN_SYMBOLIZER_PATH}"
+    exit 1
+fi
+
 mkdir -p ${build_dir}
 pushd ${build_dir}
 
@@ -59,6 +64,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \
       -DARROW_CUDA=${ARROW_CUDA:-OFF} \
       -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
       -DARROW_DATASET=${ARROW_DATASET:-ON} \
+      -DARROW_ENGINE=${ARROW_ENGINE:-ON} \
       -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
       -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
       -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
diff --git a/ci/scripts/docs_build.sh b/ci/scripts/docs_build.sh
index a0d926a335e..e6ee768ee87 100755
--- a/ci/scripts/docs_build.sh
+++ b/ci/scripts/docs_build.sh
@@ -27,8 +27,10 @@ export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 export CFLAGS="-DARROW_NO_DEPRECATED_API"
 export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
 
-# Prose and Python
-sphinx-build -b html ${arrow_dir}/docs/source ${build_dir}
+ncpus=$(python3 -c "import os; print(os.cpu_count())")
+
+# Sphinx docs
+sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir}
 
 # C++ - original doxygen
 # rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp
diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
new file mode 100755
index 00000000000..088da817676
--- /dev/null
+++ b/ci/scripts/install_python.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eu
+
+declare -A platforms
+platforms=([windows]=Windows
+           [macos]=MacOSX
+           [linux]=Linux)
+
+declare -A versions
+versions=([3.6]=3.6.8
+          [3.7]=3.7.9
+          [3.8]=3.8.10
+          [3.9]=3.9.6)
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <platform> <version>"
+  exit 1
+elif [[ -z ${platforms[$1]} ]]; then
+  echo "Unexpected platform: ${1}"
+  exit 1
+fi
+
+platform=${platforms[$1]}
+version=$2
+full_version=${versions[$2]}
+
+if [ $platform = "MacOSX" ]; then
+    echo "Downloading Python installer..."
+    if [ "$(uname -m)" = "arm64" ]; then
+        fname="python-${full_version}-macos11.pkg"
+    else
+        fname="python-${full_version}-macosx10.9.pkg"
+    fi
+    wget "https://www.python.org/ftp/python/${full_version}/${fname}"
+
+    echo "Installing Python..."
+    installer -pkg $fname -target /
+    rm $fname
+
+    echo "Installing Pip..."
+    python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}"
+    pip="${python} -m pip"
+
+    $python -m ensurepip
+    $pip install -U pip setuptools virtualenv
+else
+    echo "Unsupported platform: $platform"
+fi
diff --git a/ci/scripts/install_turbodbc.sh b/ci/scripts/install_turbodbc.sh
index a71520bebf4..3e644a3e27a 100755
--- a/ci/scripts/install_turbodbc.sh
+++ b/ci/scripts/install_turbodbc.sh
@@ -35,3 +35,9 @@ elif [ "${turbodbc}" = "latest" ]; then
 else
   git -C "${target}" checkout ${turbodbc};
 fi
+
+pushd ${target}
+wget -q https://github.com/pybind/pybind11/archive/v2.6.2.tar.gz
+tar xvf v2.6.2.tar.gz
+mv pybind11-2.6.2 pybind11
+popd
diff --git a/ci/scripts/install_vcpkg.sh b/ci/scripts/install_vcpkg.sh
new file mode 100755
index 00000000000..fe99a7fea2f
--- /dev/null
+++ b/ci/scripts/install_vcpkg.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <vcpkg version> <target directory>"
+  exit 1
+fi
+
+vcpkg_version=$1
+vcpkg_destination=$2
+vcpkg_patch=$(realpath $(dirname "${0}")/../vcpkg/ports.patch)
+
+git clone --depth 1 --branch ${vcpkg_version} https://github.com/microsoft/vcpkg ${vcpkg_destination}
+
+pushd ${vcpkg_destination}
+
+./bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics
+git apply --ignore-whitespace ${vcpkg_patch}
+echo "Patch successfully applied!"
+
+popd
diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh
index 5d2e71916ed..453561d3171 100755
--- a/ci/scripts/integration_arrow.sh
+++ b/ci/scripts/integration_arrow.sh
@@ -20,14 +20,17 @@
 set -ex
 
 arrow_dir=${1}
-source_dir=${1}/cpp
-build_dir=${2}/cpp
-
 gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
 
 pip install -e $arrow_dir/dev/archery
 
-archery integration --with-all --run-flight \
+# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
+archery integration \
+    --run-flight \
+    --with-cpp=1 \
+    --with-java=1 \
+    --with-js=1 \
+    --with-go=1 \
     --gold-dirs=$gold_dir/0.14.1 \
     --gold-dirs=$gold_dir/0.17.1 \
     --gold-dirs=$gold_dir/1.0.0-bigendian \
diff --git a/ci/scripts/integration_turbodbc.sh b/ci/scripts/integration_turbodbc.sh
index f56074358a6..f0fafd51228 100755
--- a/ci/scripts/integration_turbodbc.sh
+++ b/ci/scripts/integration_turbodbc.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 
 source_dir=${1}
 build_dir=${2}/turbodbc
@@ -31,7 +31,7 @@ mkdir -p ${build_dir}
 pushd ${build_dir}
 
 cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-      -DCMAKE_CXX_FLAGS=${CXXFLAGS} \
+      -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \
       -DPYTHON_EXECUTABLE=$(which python) \
       -GNinja \
       ${source_dir}
@@ -39,7 +39,7 @@ ninja install
 
 # TODO(ARROW-5074)
 export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}"
-export ODBCSYSINI="${source_dir}/travis/odbc/"
+export ODBCSYSINI="${source_dir}/earthly/odbc/"
 
 service postgresql start
 ctest --output-on-failure
diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh
index 54cddb50372..5ef150fd1e7 100755
--- a/ci/scripts/java_build.sh
+++ b/ci/scripts/java_build.sh
@@ -25,59 +25,50 @@ with_docs=${3:-false}
 
 if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then
   # Since some files for s390_64 are not available at maven central,
-  # download pre-build files from bintray and install them explicitly
+  # download pre-build files from Artifactory and install them explicitly
   mvn_install="mvn install:install-file"
   wget="wget"
-  bintray_base_url="https://dl.bintray.com/apache/arrow"
+  artifactory_base_url="https://apache.jfrog.io/artifactory/arrow"
 
-  bintray_dir="flatc-binary"
-  group="com.github.icexelloss"
-  artifact="flatc-linux-s390_64"
-  ver="1.9.0"
-  extension="exe"
-  target=${artifact}-${ver}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
-  ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
-
-  bintray_dir="protoc-binary"
+  artifactory_dir="protoc-binary"
   group="com.google.protobuf"
   artifact="protoc"
   ver="3.7.1"
   classifier="linux-s390_64"
   extension="exe"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
   # protoc requires libprotoc.so.18 libprotobuf.so.18
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/libprotoc.so.18
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/libprotobuf.so.18
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotoc.so.18
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotobuf.so.18
   mkdir -p ${ARROW_HOME}/lib
   cp lib*.so.18 ${ARROW_HOME}/lib
   export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ARROW_HOME}/lib
 
-  bintray_dir="protoc-gen-grpc-java-binary"
+  artifactory_dir="protoc-gen-grpc-java-binary"
   group="io.grpc"
   artifact="protoc-gen-grpc-java"
   ver="1.30.2"
   classifier="linux-s390_64"
   extension="exe"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
 
-  bintray_dir="netty-binary"
+  artifactory_dir="netty-binary"
   group="io.netty"
   artifact="netty-transport-native-unix-common"
   ver="4.1.48.Final"
   classifier="linux-s390_64"
   extension="jar"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
   artifact="netty-transport-native-epoll"
   extension="jar"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
 fi
 
@@ -104,7 +95,8 @@ if [ "${ARROW_PLASMA}" = "ON" ]; then
 fi
 
 if [ "${with_docs}" == "true" ]; then
-  ${mvn} -Dcheckstyle.skip=true install site
+  # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633
+  ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false install site
 fi
 
 popd
diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh
new file mode 100755
index 00000000000..b4ae48f3d9a
--- /dev/null
+++ b/ci/scripts/java_jni_build.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+arrow_dir=${1}
+cpp_lib_dir=${2}
+java_dist_dir=${3}
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+
+pushd ${arrow_dir}/java
+
+# build the entire project
+mvn clean install -P arrow-jni -Darrow.cpp.build.dir=$cpp_lib_dir
+
+# copy all jars and pom files to the distribution folder
+find . -name "*.jar" -exec echo {} \; -exec cp {} $java_dist_dir \;
+find . -name "*.pom" -exec echo {} \; -exec cp {} $java_dist_dir \;
+
+popd
diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh
new file mode 100755
index 00000000000..5c11ee97584
--- /dev/null
+++ b/ci/scripts/java_jni_macos_build.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+: ${ARROW_BUILD_TESTS:=ON}
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+  -DARROW_BOOST_USE_SHARED=OFF \
+  -DARROW_BROTLI_USE_SHARED=OFF \
+  -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+  -DARROW_BUILD_UTILITIES=OFF \
+  -DARROW_BZ2_USE_SHARED=OFF \
+  -DARROW_DATASET=${ARROW_DATASET} \
+  -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+  -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+  -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
+  -DARROW_GANDIVA=${ARROW_GANDIVA} \
+  -DARROW_GFLAGS_USE_SHARED=OFF \
+  -DARROW_GRPC_USE_SHARED=OFF \
+  -DARROW_JNI=ON \
+  -DARROW_LZ4_USE_SHARED=OFF \
+  -DARROW_OPENSSL_USE_SHARED=OFF \
+  -DARROW_ORC=${ARROW_ORC} \
+  -DARROW_PARQUET=${ARROW_PARQUET} \
+  -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+  -DARROW_PLASMA=${ARROW_PLASMA} \
+  -DARROW_PROTOBUF_USE_SHARED=OFF \
+  -DARROW_PYTHON=${ARROW_PYTHON} \
+  -DARROW_SNAPPY_USE_SHARED=OFF \
+  -DARROW_THRIFT_USE_SHARED=OFF \
+  -DARROW_UTF8PROC_USE_SHARED=OFF \
+  -DARROW_ZSTD_USE_SHARED=OFF \
+  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DCMAKE_INSTALL_PREFIX=${build_dir} \
+  -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+  -DPARQUET_BUILD_EXAMPLES=OFF \
+  -DPARQUET_BUILD_EXECUTABLES=OFF \
+  -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+  ${arrow_dir}/cpp
+cmake --build . --target install
+
+if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
+  ctest
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+  --allow libarrow_dataset_jni \
+  --allow libarrow_orc_jni \
+  --allow libc++ \
+  --allow libgandiva_jni \
+  --allow libncurses \
+  --allow libSystem \
+  --allow libz \
+  libgandiva_jni.dylib \
+  libarrow_dataset_jni.dylib \
+  libarrow_orc_jni.dylib
+popd
diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh
new file mode 100755
index 00000000000..4d01c1c30d1
--- /dev/null
+++ b/ci/scripts/java_jni_manylinux_build.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+: ${ARROW_DATASET:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_RPATH_ORIGIN:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${ARROW_BUILD_TESTS:=ON}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+: ${GANDIVA_CXX_FLAGS:=-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9;-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9/x86_64-redhat-linux;-isystem;-lpthread}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+  -DARROW_BOOST_USE_SHARED=OFF \
+  -DARROW_BROTLI_USE_SHARED=OFF \
+  -DARROW_BUILD_SHARED=ON \
+  -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+  -DARROW_BUILD_UTILITIES=OFF \
+  -DARROW_BZ2_USE_SHARED=OFF \
+  -DARROW_DATASET=${ARROW_DATASET} \
+  -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+  -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+  -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+  -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
+  -DARROW_GANDIVA=${ARROW_GANDIVA} \
+  -DARROW_GRPC_USE_SHARED=OFF \
+  -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+  -DARROW_JNI=ON \
+  -DARROW_LZ4_USE_SHARED=OFF \
+  -DARROW_OPENSSL_USE_SHARED=OFF \
+  -DARROW_ORC=${ARROW_ORC} \
+  -DARROW_PARQUET=${ARROW_PARQUET} \
+  -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+  -DARROW_PLASMA=${ARROW_PLASMA} \
+  -DARROW_PROTOBUF_USE_SHARED=OFF \
+  -DARROW_PYTHON=${ARROW_PYTHON} \
+  -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
+  -DARROW_SNAPPY_USE_SHARED=OFF \
+  -DARROW_THRIFT_USE_SHARED=OFF \
+  -DARROW_UTF8PROC_USE_SHARED=OFF \
+  -DARROW_ZSTD_USE_SHARED=OFF \
+  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DCMAKE_INSTALL_PREFIX=${build_dir} \
+  -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+  -DPARQUET_BUILD_EXAMPLES=OFF \
+  -DPARQUET_BUILD_EXECUTABLES=OFF \
+  -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+  -DPythonInterp_FIND_VERSION_MAJOR=3 \
+  -DPythonInterp_FIND_VERSION=ON \
+  -DVCPKG_MANIFEST_MODE=OFF \
+  -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+  -GNinja \
+  ${arrow_dir}/cpp
+ninja install
+
+if [ $ARROW_BUILD_TESTS = "ON" ]; then
+  CTEST_OUTPUT_ON_FAILURE=1 ninja test
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.so ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+  --allow ld-linux-x86-64 \
+  --allow libc \
+  --allow libdl \
+  --allow libgcc_s \
+  --allow libm \
+  --allow libpthread \
+  --allow librt \
+  --allow libstdc++ \
+  --allow libz \
+  --allow linux-vdso \
+  libgandiva_jni.so \
+  libarrow_dataset_jni.so \
+  libarrow_orc_jni.so
+popd
diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh
index cb6ca30a64e..991d98bbb4a 100755
--- a/ci/scripts/msys2_setup.sh
+++ b/ci/scripts/msys2_setup.sh
@@ -35,6 +35,7 @@ case "${target}" in
     packages+=(${MINGW_PACKAGE_PREFIX}-grpc)
     packages+=(${MINGW_PACKAGE_PREFIX}-gtest)
     packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc)
+    packages+=(${MINGW_PACKAGE_PREFIX}-libxml2)
     packages+=(${MINGW_PACKAGE_PREFIX}-llvm)
     packages+=(${MINGW_PACKAGE_PREFIX}-lz4)
     packages+=(${MINGW_PACKAGE_PREFIX}-ninja)
diff --git a/ci/scripts/msys2_system_upgrade.sh b/ci/scripts/msys2_system_upgrade.sh
index aecd3089332..646428fbb7a 100755
--- a/ci/scripts/msys2_system_upgrade.sh
+++ b/ci/scripts/msys2_system_upgrade.sh
@@ -19,26 +19,6 @@
 
 set -eux
 
-# https://www.msys2.org/news/#2020-06-29-new-packagers
-msys2_repo_base_url=https://repo.msys2.org/msys
-# Mirror
-msys2_repo_base_url=https://sourceforge.net/projects/msys2/files/REPOS/MSYS2
-msys2_keyring_pkg=msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-for suffix in "" ".sig"; do
-  curl \
-    --location \
-    --remote-name \
-    --show-error \
-    --silent \
-    ${msys2_repo_base_url}/x86_64/${msys2_keyring_pkg}${suffix}
-done
-pacman-key --verify ${msys2_keyring_pkg}.sig
-pacman \
-  --noconfirm \
-  --upgrade \
-  ${msys2_keyring_pkg}
-
-
 pacman \
   --noconfirm \
   --refresh \
diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
index 1388ca09e43..3dd7d7ddd5b 100755
--- a/ci/scripts/python_sdist_test.sh
+++ b/ci/scripts/python_sdist_test.sh
@@ -42,10 +42,16 @@ export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
 # unset ARROW_HOME
 # apt purge -y pkg-config
 
+# ARROW-12619
+if command -v git &> /dev/null; then
+  echo "Git exists, remove it from PATH before executing this script."
+  exit 1
+fi
+
 if [ -n "${PYARROW_VERSION:-}" ]; then
   sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz"
 else
-  sdist=$(ls "${arrow_dir}/python/dist/pyarrow-*.tar.gz" | sort -r | head -n1)
+  sdist=$(ls ${arrow_dir}/python/dist/pyarrow-*.tar.gz | sort -r | head -n1)
 fi
 ${PYTHON:-python} -m pip install ${sdist}
 
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 80a9cdef4a3..6e05af89a19 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -29,4 +29,4 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 # Enable some checks inside Python itself
 export PYTHONDEVMODE=1
 
-pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
+pytest -r s -v ${PYTEST_ARGS} --pyargs pyarrow
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 7a021f70f74..82e0339c9d0 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -19,8 +19,9 @@
 
 set -ex
 
-source_dir=${1}
-build_dir=${2}
+arch=${1}
+source_dir=${2}
+build_dir=${3}
 
 echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
 # Clear output directories and leftovers
@@ -31,11 +32,32 @@ rm -rf ${source_dir}/python/repaired_wheels
 rm -rf ${source_dir}/python/pyarrow/*.so
 rm -rf ${source_dir}/python/pyarrow/*.so.*
 
-echo "=== (${PYTHON_VERSION}) Set OSX SDK and C flags ==="
-# Arrow is 64-bit-only at the moment
-export CFLAGS="-fPIC -arch x86_64 ${CFLAGS//-arch i386/}"
-export CXXFLAGS="-fPIC -arch x86_64 ${CXXFLAGS//-arch i386} -std=c++11"
-export SDKROOT="$(xcrun --show-sdk-path)"
+echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
+export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.9}
+export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
+
+if [ $arch = "arm64" ]; then
+  export CMAKE_OSX_ARCHITECTURES="arm64"
+elif [ $arch = "x86_64" ]; then
+  export CMAKE_OSX_ARCHITECTURES="x86_64"
+elif [ $arch = "universal2" ]; then
+  export CMAKE_OSX_ARCHITECTURES="x86_64;arm64"
+else
+  echo "Unexpected architecture: $arch"
+  exit 1
+fi
+
+echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
+export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
+export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
+
+pip install \
+  --only-binary=:all: \
+  --target $PIP_SITE_PACKAGES \
+  --platform $PIP_TARGET_PLATFORM \
+  -r ${source_dir}/python/requirements-wheel-build.txt
+pip install "delocate>=0.9"
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_DATASET:=ON}
@@ -48,6 +70,7 @@ echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_PARQUET:=ON}
 : ${ARROW_PLASMA:=ON}
 : ${ARROW_S3:=ON}
+: ${ARROW_SIMD_LEVEL:="SSE4_2"}
 : ${ARROW_TENSORFLOW:=ON}
 : ${ARROW_WITH_BROTLI:=ON}
 : ${ARROW_WITH_BZ2:=ON}
@@ -57,30 +80,35 @@ echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_WITH_ZSTD:=ON}
 : ${CMAKE_BUILD_TYPE:=release}
 : ${CMAKE_GENERATOR:=Ninja}
+: ${CMAKE_UNITY_BUILD:=ON}
 : ${VCPKG_FEATURE_FLAGS:=-manifests}
 : ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}}
 
 mkdir -p ${build_dir}/build
 pushd ${build_dir}/build
+
 cmake \
     -DARROW_BUILD_SHARED=ON \
+    -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \
+    -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
     -DARROW_DATASET=${ARROW_DATASET} \
     -DARROW_DEPENDENCY_SOURCE="VCPKG" \
     -DARROW_DEPENDENCY_USE_SHARED=OFF \
-    -DARROW_FLIGHT==${ARROW_FLIGHT} \
+    -DARROW_FLIGHT=${ARROW_FLIGHT} \
     -DARROW_GANDIVA=${ARROW_GANDIVA} \
     -DARROW_HDFS=${ARROW_HDFS} \
     -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
     -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
     -DARROW_ORC=${ARROW_ORC} \
-    -DARROW_PACKAGE_KIND="manylinux${MANYLINUX_VERSION}" \
+    -DARROW_PACKAGE_KIND="python-wheel-macos" \
     -DARROW_PARQUET=${ARROW_PARQUET} \
     -DARROW_PLASMA=${ARROW_PLASMA} \
     -DARROW_PYTHON=ON \
     -DARROW_RPATH_ORIGIN=ON \
     -DARROW_S3=${ARROW_S3} \
+    -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} \
     -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \
     -DARROW_USE_CCACHE=ON \
     -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \
@@ -92,7 +120,7 @@ cmake \
     -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
     -DCMAKE_INSTALL_LIBDIR=lib \
     -DCMAKE_INSTALL_PREFIX=${build_dir}/install \
-    -DCMAKE_UNITY_BUILD=ON \
+    -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
     -DOPENSSL_USE_STATIC_LIBS=ON \
     -DVCPKG_MANIFEST_MODE=OFF \
     -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
@@ -101,9 +129,6 @@ cmake \
 cmake --build . --target install
 popd
 
-# Check that we don't expose any unwanted symbols
-# check_arrow_visibility
-
 echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
@@ -117,8 +142,11 @@ export PYARROW_WITH_ORC=${ARROW_ORC}
 export PYARROW_WITH_PARQUET=${ARROW_PARQUET}
 export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
 export PYARROW_WITH_S3=${ARROW_S3}
+export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
 # PyArrow build configuration
 export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig
+# Set PyArrow version explicitly
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION}
 
 pushd ${source_dir}/python
 python setup.py bdist_wheel
@@ -127,7 +155,11 @@ popd
 echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ==="
 deps=$(delocate-listdeps ${source_dir}/python/dist/*.whl)
 
-if echo $deps | grep -v "^@rpath/lib\(arrow\|gandiva\|parquet\|plasma\)"; then
+if echo $deps | grep -v "^pyarrow/lib\(arrow\|gandiva\|parquet\|plasma\)"; then
   echo "There are non-bundled shared library dependencies."
   exit 1
 fi
+
+# Move the verified wheels
+mkdir -p ${source_dir}/python/repaired_wheels
+mv ${source_dir}/python/dist/*.whl ${source_dir}/python/repaired_wheels/
diff --git a/ci/scripts/python_wheel_macos_test.sh b/ci/scripts/python_wheel_macos_test.sh
deleted file mode 100755
index 6ac8576d484..00000000000
--- a/ci/scripts/python_wheel_macos_test.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -ex
-
-source_dir=${1}
-
-: ${ARROW_S3:=ON}
-
-export PYARROW_TEST_CYTHON=OFF
-export PYARROW_TEST_DATASET=ON
-export PYARROW_TEST_GANDIVA=OFF
-export PYARROW_TEST_HDFS=ON
-export PYARROW_TEST_ORC=ON
-export PYARROW_TEST_PANDAS=ON
-export PYARROW_TEST_PARQUET=ON
-export PYARROW_TEST_PLASMA=ON
-export PYARROW_TEST_S3=${ARROW_S3}
-export PYARROW_TEST_TENSORFLOW=ON
-export PYARROW_TEST_FLIGHT=ON
-
-export ARROW_TEST_DATA=${source_dir}/testing/data
-export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
-
-# Install the built wheels
-pip install ${source_dir}/python/dist/*.whl
-
-# Test that the modules are importable
-python -c "
-import pyarrow
-import pyarrow._hdfs
-import pyarrow.csv
-import pyarrow.dataset
-import pyarrow.flight
-import pyarrow.fs
-import pyarrow.json
-import pyarrow.orc
-import pyarrow.parquet
-import pyarrow.plasma
-"
-
-if [ "${PYARROW_TEST_S3}" == "ON" ]; then
-  python -c "import pyarrow._s3fs"
-fi
-
-# Install testing dependencies
-pip install -r ${source_dir}/python/requirements-wheel-test.txt
-
-# Execute unittest
-pytest -r s --pyargs pyarrow
diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh
index 83aa623b49b..312e1c3b9b7 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -87,7 +87,7 @@ cmake \
     -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
     -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
     -DARROW_ORC=${ARROW_ORC} \
-    -DARROW_PACKAGE_KIND="manylinux${MANYLINUX_VERSION}" \
+    -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \
     -DARROW_PARQUET=${ARROW_PARQUET} \
     -DARROW_PLASMA=${ARROW_PLASMA} \
     -DARROW_PYTHON=ON \
diff --git a/ci/scripts/python_wheel_manylinux_test.sh b/ci/scripts/python_wheel_manylinux_test.sh
deleted file mode 100755
index 21987748f73..00000000000
--- a/ci/scripts/python_wheel_manylinux_test.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -x
-set -o pipefail
-
-case $# in
-  1) KIND="$1"
-     case $KIND in
-       imports|unittests) ;;
-       *) echo "Invalid argument: '${KIND}', valid options are 'imports', 'unittests'"
-          exit 1
-          ;;
-     esac
-     ;;
-  *) echo "Usage: $0 imports|unittests"
-     exit 1
-     ;;
-esac
-
-export PYARROW_TEST_CYTHON=OFF
-export PYARROW_TEST_DATASET=ON
-export PYARROW_TEST_GANDIVA=OFF
-export PYARROW_TEST_HDFS=ON
-export PYARROW_TEST_ORC=ON
-export PYARROW_TEST_PANDAS=ON
-export PYARROW_TEST_PARQUET=ON
-export PYARROW_TEST_PLASMA=ON
-export PYARROW_TEST_S3=ON
-export PYARROW_TEST_TENSORFLOW=ON
-export PYARROW_TEST_FLIGHT=ON
-
-export ARROW_TEST_DATA=/arrow/testing/data
-export PARQUET_TEST_DATA=/arrow/submodules/parquet-testing/data
-
-# Install the built wheels
-pip install /arrow/python/repaired_wheels/*.whl
-
-if [ "${KIND}" == "imports" ]; then
-  # Test that the modules are importable
-  python -c "
-import pyarrow
-import pyarrow._hdfs
-import pyarrow._s3fs
-import pyarrow.csv
-import pyarrow.dataset
-import pyarrow.flight
-import pyarrow.fs
-import pyarrow.json
-import pyarrow.orc
-import pyarrow.parquet
-import pyarrow.plasma"
-elif [ "${KIND}" == "unittests" ]; then
-  # Execute unittest, test dependencies must be installed
-  pytest -r s --pyargs pyarrow
-fi
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
new file mode 100755
index 00000000000..50d3ccb4f8e
--- /dev/null
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -x
+set -o pipefail
+
+if [ "$#" -ne 1 ]; then
+  echo "Usage: $0 <arrow-src-dir>"
+  exit 1
+fi
+
+source_dir=${1}
+
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_S3:=ON}
+: ${CHECK_IMPORTS:=ON}
+: ${CHECK_UNITTESTS:=ON}
+: ${INSTALL_PYARROW:=ON}
+
+export PYARROW_TEST_CYTHON=OFF
+export PYARROW_TEST_DATASET=ON
+export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT}
+export PYARROW_TEST_GANDIVA=OFF
+export PYARROW_TEST_HDFS=ON
+export PYARROW_TEST_ORC=ON
+export PYARROW_TEST_PANDAS=ON
+export PYARROW_TEST_PARQUET=ON
+export PYARROW_TEST_PLASMA=ON
+export PYARROW_TEST_S3=${ARROW_S3}
+export PYARROW_TEST_TENSORFLOW=ON
+
+export ARROW_TEST_DATA=${source_dir}/testing/data
+export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
+
+if [ "${INSTALL_PYARROW}" == "ON" ]; then
+  # Install the built wheels
+  pip install ${source_dir}/python/repaired_wheels/*.whl
+fi
+
+if [ "${CHECK_IMPORTS}" == "ON" ]; then
+  # Test that the modules are importable
+  python -c "
+import pyarrow
+import pyarrow._hdfs
+import pyarrow.csv
+import pyarrow.dataset
+import pyarrow.fs
+import pyarrow.json
+import pyarrow.orc
+import pyarrow.parquet
+import pyarrow.plasma
+"
+  if [ "${PYARROW_TEST_S3}" == "ON" ]; then
+    python -c "import pyarrow._s3fs"
+  fi
+  if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
+    python -c "import pyarrow.flight"
+  fi
+fi
+
+if [ "${CHECK_UNITTESTS}" == "ON" ]; then
+  # Install testing dependencies
+  pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
+  # Execute unittest, test dependencies must be installed
+  python -c 'import pyarrow; pyarrow.create_library_symlinks()'
+  pytest -r s --pyargs pyarrow
+fi
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index 18c1b657b21..23be7f512d6 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -64,7 +64,7 @@ cmake ^
     -DARROW_HDFS=%ARROW_HDFS% ^
     -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^
     -DARROW_ORC=%ARROW_ORC% ^
-    -DARROW_PACKAGE_KIND="wheel-windows" ^
+    -DARROW_PACKAGE_KIND="python-wheel-windows" ^
     -DARROW_PARQUET=%ARROW_PARQUET% ^
     -DARROW_PYTHON=ON ^
     -DARROW_S3=%ARROW_S3% ^
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index 8352e586226..586fd58f651 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -19,6 +19,7 @@
 
 set PYARROW_TEST_CYTHON=OFF
 set PYARROW_TEST_DATASET=ON
+set PYARROW_TEST_FLIGHT=ON
 set PYARROW_TEST_GANDIVA=OFF
 set PYARROW_TEST_HDFS=ON
 set PYARROW_TEST_ORC=OFF
@@ -27,7 +28,6 @@ set PYARROW_TEST_PARQUET=ON
 set PYARROW_TEST_PLASMA=OFF
 set PYARROW_TEST_S3=OFF
 set PYARROW_TEST_TENSORFLOW=ON
-set PYARROW_TEST_FLIGHT=ON
 
 set ARROW_TEST_DATA=C:\arrow\testing\data
 set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
diff --git a/ci/scripts/r_deps.sh b/ci/scripts/r_deps.sh
index 7e9d2eac7a9..243a7efc9cf 100755
--- a/ci/scripts/r_deps.sh
+++ b/ci/scripts/r_deps.sh
@@ -26,6 +26,15 @@ pushd ${source_dir}
 
 # Install R package dependencies
 ${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))"
+
+if [ ${R_BIN} = "RDsan" ]; then
+  # To prevent the build from timing out, let's prune some optional deps (and their possible version requirements)
+  ${R_BIN} -e 'd <- read.dcf("DESCRIPTION")
+  to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", "rmarkdown", "pkgload", "reticulate")
+  pattern <- paste0("\\n?", to_prune, " (\\\\(.*\\\\))?,?", collapse = "|")
+  d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"])
+  write.dcf(d, "DESCRIPTION")'
+fi
 ${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
 
 popd
diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh
index 3e553fe9edd..2b9bc03bea0 100755
--- a/ci/scripts/r_docker_configure.sh
+++ b/ci/scripts/r_docker_configure.sh
@@ -37,6 +37,9 @@ if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then
   dnf install -y libcxx-devel
   sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf
   rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
+  
+  sed -i.bak -E -e 's/(CXXFLAGS = )(.*)/\1 -g -O3 -Wall -pedantic -frtti -fPIC/' $(${R_BIN} RHOME)/etc/Makeconf
+  rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
 fi
 
 # Special hacking to try to reproduce quirks on centos using non-default build
diff --git a/ci/scripts/r_revdepcheck.sh b/ci/scripts/r_revdepcheck.sh
new file mode 100755
index 00000000000..79ace9ca09d
--- /dev/null
+++ b/ci/scripts/r_revdepcheck.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+# cpp building dependencies
+apt install -y cmake
+
+# system dependencies needed for arrow's reverse dependencies
+apt install -y libxml2-dev \
+  libfontconfig1-dev \
+  libcairo2-dev \
+  libglpk-dev \
+  libmysqlclient-dev \
+  unixodbc-dev \
+  libpq-dev \
+  coinor-libsymphony-dev \
+  coinor-libcgl-dev \
+  coinor-symphony \
+  libzmq3-dev \
+  libudunits2-dev \
+  libgdal-dev \
+  libgeos-dev \
+  libproj-dev
+
+pushd ${source_dir}
+
+printenv
+
+: ${TEST_R_WITH_ARROW:=TRUE}
+export TEST_R_WITH_ARROW=$TEST_R_WITH_ARROW
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+# Set crancache dir so we can cache it
+export CRANCACHE_DIR="/arrow/.crancache"
+
+SCRIPT="
+    # We can't use RSPM binaries because we need source packages
+    options('repos' = c(CRAN = 'https://packagemanager.rstudio.com/all/latest'))
+    remotes::install_github('r-lib/revdepcheck')
+
+    # zoo is needed by RcisTarget tests, though only listed in enhances so not installed by revdepcheck
+    install.packages('zoo')
+
+    # actually run revdepcheck
+    revdepcheck::revdep_check(
+    quiet = FALSE,
+    timeout = as.difftime(120, units = 'mins'),
+    num_workers = 1,
+    env = c(
+        ARROW_R_DEV = '$ARROW_R_DEV',
+        LIBARROW_DOWNLOAD = TRUE,
+        LIBARROW_MINIMAL = FALSE,
+        revdepcheck::revdep_env_vars()
+    ))
+    revdepcheck::revdep_report(all = TRUE)
+
+    # Go through the summary and fail if any of the statuses include -
+    summary <- revdepcheck::revdep_summary()
+    failed <- lapply(summary, function(check) grepl('-', check[['status']]))
+
+    if (any(unlist(failed))) {
+      quit(status = 1)
+    }
+    "
+
+echo "$SCRIPT" | ${R_BIN} --no-save
+
+popd
diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh
index 89963eb2dd8..61d0755878f 100755
--- a/ci/scripts/r_sanitize.sh
+++ b/ci/scripts/r_sanitize.sh
@@ -22,12 +22,23 @@ set -ex
 
 source_dir=${1}/r
 
-${R_BIN} CMD INSTALL ${source_dir}
-pushd ${source_dir}/tests
+pushd ${source_dir}
+
+# Unity builds were causing the CI job to run out of memory
+export CMAKE_UNITY_BUILD=OFF
+# Make installation verbose so that the CI job doesn't time out due to silence
+export ARROW_R_DEV=TRUE
+${R_BIN} CMD INSTALL .
+# But unset the env var so that it doesn't cause us to run extra dev tests
+unset ARROW_R_DEV
 
 export TEST_R_WITH_ARROW=TRUE
 export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
+
+pushd tests
 ${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; }
+popd
+${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> testthat.out 2>&1 || { cat testthat.out; exit 1; }
 
 cat testthat.out
 if grep -q "runtime error" testthat.out; then
diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh
new file mode 100755
index 00000000000..ae61d076655
--- /dev/null
+++ b/ci/scripts/r_valgrind.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=RDvalgrind}
+
+source_dir=${1}/r
+
+export CMAKE_BUILD_TYPE=RelWithDebInfo
+
+${R_BIN} CMD INSTALL ${source_dir}
+pushd ${source_dir}/tests
+
+export TEST_R_WITH_ARROW=TRUE
+
+# to generate suppression files run:
+# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp
+${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out
+
+# valgrind --error-exitcode=1 should return an erroring exit code that we can catch,
+# but R eats that and returns 0, so we need to look at the output and make sure that
+# we have 0 errors instead.
+if [ $(grep -c "ERROR SUMMARY: 0 errors" testthat.out) != 1 ]; then
+  cat testthat.out
+  echo "Found Valgrind errors"
+  exit 1
+fi
+
+# We might also considering using the greps that LibthGBM uses:
+# https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85
+
+popd
diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh
index be03b75f5ad..20f824a9e01 100755
--- a/ci/scripts/r_windows_build.sh
+++ b/ci/scripts/r_windows_build.sh
@@ -26,16 +26,6 @@ export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)"
 if [ "$RTOOLS_VERSION" = "35" ]; then
   # Use rtools-backports if building with rtools35
   curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf
-  # Update keys: https://www.msys2.org/news/#2020-06-29-new-packagers
-  msys2_repo_base_url=https://repo.msys2.org/msys
-  # Mirror
-  msys2_repo_base_url=https://sourceforge.net/projects/msys2/files/REPOS/MSYS2
-  curl -OSsL "${msys2_repo_base_url}/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
-  pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-  # Use sf.net instead of http://repo.msys2.org/ temporary.
-  sed -i -e "s,^Server = http://repo\.msys2\.org/msys,Server = ${msys2_repo_base_url},g" \
-    /etc/pacman.conf
-  pacman --noconfirm -Scc
   pacman --noconfirm -Syy
   # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5)
   RWINLIB_LIB_DIR="lib-4.9.3"
@@ -48,6 +38,7 @@ else
 
   pacman --noconfirm -Syy
   RWINLIB_LIB_DIR="lib"
+  export MINGW_ARCH="mingw32 mingw64 ucrt64"
 fi
 
 cp $ARROW_HOME/ci/scripts/PKGBUILD .
@@ -69,7 +60,7 @@ MSYS_LIB_DIR="/c/rtools40"
 ls $MSYS_LIB_DIR/mingw64/lib/
 ls $MSYS_LIB_DIR/mingw32/lib/
 
-# Untar the two builds we made
+# Untar the three builds we made
 ls *.xz | xargs -n 1 tar -xJf
 mkdir -p $DST_DIR
 # Grab the headers from one, either one is fine
@@ -99,6 +90,14 @@ cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i3
 cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64
 cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386
 
+# Do the same also for ucrt64
+if [ "$RTOOLS_VERSION" != "35" ]; then
+ls $MSYS_LIB_DIR/ucrt64/lib/
+mkdir -p $DST_DIR/lib/x64-ucrt
+mv ucrt64/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/x64-ucrt
+cp $MSYS_LIB_DIR/ucrt64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64-ucrt
+fi
+
 # Create build artifact
 zip -r ${DST_DIR}.zip $DST_DIR
 
diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh
index 726ecd80f1a..3532ea3d5c6 100755
--- a/ci/scripts/rust_build.sh
+++ b/ci/scripts/rust_build.sh
@@ -17,13 +17,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
 
+arrow_dir=${1}
 source_dir=${1}/rust
 
-# This file is used to build the rust binaries needed for the
-# archery integration tests. Testing of the rust implementation
-# in normal CI is handled by github workflows
+# This file is used to build the rust binaries needed for the archery
+# integration tests. Testing of the rust implementation in normal CI is handled
+# by github workflows in the arrow-rs repository.
 
 # Disable full debug symbol generation to speed up CI build / reduce memory required
 export RUSTFLAGS="-C debuginfo=1"
@@ -31,6 +32,22 @@ export RUSTFLAGS="-C debuginfo=1"
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
 export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
 
+if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then
+  echo "====================================================================="
+  echo "Not building the Rust implementation."
+  echo "====================================================================="
+  exit 0;
+elif [ ! -d "${source_dir}" ]; then
+  echo "====================================================================="
+  echo "The Rust source is missing. Please clone the arrow-rs repository"
+  echo "to arrow/rust before running the integration tests:"
+  echo "  git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust"
+  echo "====================================================================="
+  exit 1;
+fi
+
+set -x
+
 # show activated toolchain
 rustup show
 
@@ -39,7 +56,4 @@ pushd ${source_dir}
 # build only the integration testing binaries
 cargo build -p arrow-integration-testing
 
-# Remove incremental build artifacts to save space
-rm -rf  target/debug/deps/ target/debug/build/
-
 popd
diff --git a/ci/vcpkg/arm64-osx-static-debug.cmake b/ci/vcpkg/arm64-osx-static-debug.cmake
new file mode 100644
index 00000000000..f511819a2ed
--- /dev/null
+++ b/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES arm64)
+set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/arm64-osx-static-release.cmake b/ci/vcpkg/arm64-osx-static-release.cmake
new file mode 100644
index 00000000000..43d65efb265
--- /dev/null
+++ b/ci/vcpkg/arm64-osx-static-release.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES arm64)
+set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch
index 14b9678690e..7bcba49c194 100644
--- a/ci/vcpkg/ports.patch
+++ b/ci/vcpkg/ports.patch
@@ -1,5 +1,5 @@
 diff --git a/ports/aws-c-common/portfile.cmake b/ports/aws-c-common/portfile.cmake
-index f3704ef05..3af543058 100644
+index f3704ef05b..3af543058d 100644
 --- a/ports/aws-c-common/portfile.cmake
 +++ b/ports/aws-c-common/portfile.cmake
 @@ -1,8 +1,8 @@
@@ -12,22 +12,22 @@ index f3704ef05..3af543058 100644
 +    SHA512 28256522ac6af544d7464e3e7dcd4dc802ae2b09728bf8f167f86a6487bb756d0cad5eb4a2480610b2967b9c24c4a7f70621894517aa2828ffdeb0479453803b
      HEAD_REF master
      PATCHES
-         disable-error-4068.patch # This patch fixes dependency port compilation failure
+         disable-error-4068.patch # This patch fixes dependency port compilation failure
 diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake
-index 6e18aecd0..2ccecf33c 100644
+index be66d452be..a5ce325e9d 100644
 --- a/ports/curl/portfile.cmake
 +++ b/ports/curl/portfile.cmake
-@@ -76,6 +76,8 @@ vcpkg_configure_cmake(
+@@ -94,6 +94,8 @@ vcpkg_configure_cmake(
          -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON
          -DENABLE_DEBUG=ON
          -DCURL_CA_FALLBACK=ON
 +        -DCURL_CA_PATH=none
 +        -DCURL_CA_BUNDLE=none
- )
- 
- vcpkg_install_cmake()
+     OPTIONS_DEBUG
+         ${EXTRA_ARGS_DEBUG}
+     OPTIONS_RELEASE
 diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake
-index 75dd13302..84345c7ca 100644
+index 75dd133027..84345c7caa 100644
 --- a/ports/snappy/portfile.cmake
 +++ b/ports/snappy/portfile.cmake
 @@ -4,6 +4,7 @@ vcpkg_from_github(
@@ -36,11 +36,11 @@ index 75dd13302..84345c7ca 100644
      HEAD_REF master
 +    PATCHES "snappy-disable-bmi.patch"
  )
- 
+
  vcpkg_configure_cmake(
 diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
 new file mode 100644
-index 000000000..2cbb1533a
+index 0000000000..2cbb1533a8
 --- /dev/null
 +++ b/ports/snappy/snappy-disable-bmi.patch
 @@ -0,0 +1,17 @@
diff --git a/ci/vcpkg/universal2-osx-static-debug.cmake b/ci/vcpkg/universal2-osx-static-debug.cmake
new file mode 100644
index 00000000000..706ac47a72c
--- /dev/null
+++ b/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13")
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/universal2-osx-static-release.cmake b/ci/vcpkg/universal2-osx-static-release.cmake
new file mode 100644
index 00000000000..8670690171e
--- /dev/null
+++ b/ci/vcpkg/universal2-osx-static-release.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13")
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/cmake-format.py b/cmake-format.py
index 0976642031f..3e77733f4d1 100644
--- a/cmake-format.py
+++ b/cmake-format.py
@@ -16,44 +16,61 @@
 # under the License.
 
 # cmake-format configuration file
-# Use run-cmake-format.py to reformat all cmake files in the source tree
+# Use `archery lint --cmake-format --fix` to reformat all cmake files in the
+# source tree
 
-# How wide to allow formatted cmake files
-line_width = 90
+# -----------------------------
+# Options affecting formatting.
+# -----------------------------
+with section("format"):
+    # How wide to allow formatted cmake files
+    line_width = 90
 
-# How many spaces to tab for indent
-tab_size = 2
+    # How many spaces to tab for indent
+    tab_size = 2
 
-# If arglists are longer than this, break them always
-max_subargs_per_line = 4
+    # If a positional argument group contains more than this many arguments,
+    # then force it to a vertical layout.
+    max_pargs_hwrap = 4
 
-# If true, separate flow control names from their parentheses with a space
-separate_ctrl_name_with_space = False
+    # If the statement spelling length (including space and parenthesis) is
+    # smaller than this amount, then force reject nested layouts.
+    # This value only comes into play when considering whether or not to nest
+    # arguments below their parent. If the number of characters in the parent
+    # is less than this value, we will not nest.
+    min_prefix_chars = 32
 
-# If true, separate function names from parentheses with a space
-separate_fn_name_with_space = False
+    # If true, separate flow control names from their parentheses with a space
+    separate_ctrl_name_with_space = False
 
-# If a statement is wrapped to more than one line, than dangle the closing
-# parenthesis on it's own line
-dangle_parens = False
+    # If true, separate function names from parentheses with a space
+    separate_fn_name_with_space = False
 
-# What style line endings to use in the output.
-line_ending = 'unix'
+    # If a statement is wrapped to more than one line, than dangle the closing
+    # parenthesis on it's own line
+    dangle_parens = False
 
-# Format command names consistently as 'lower' or 'upper' case
-command_case = 'lower'
+    # What style line endings to use in the output.
+    line_ending = 'unix'
 
-# Format keywords consistently as 'lower' or 'upper' case
-keyword_case = 'unchanged'
+    # Format command names consistently as 'lower' or 'upper' case
+    command_case = 'lower'
 
-# enable comment markup parsing and reflow
-enable_markup = False
+    # Format keywords consistently as 'lower' or 'upper' case
+    keyword_case = 'unchanged'
 
-# If comment markup is enabled, don't reflow the first comment block in
-# eachlistfile. Use this to preserve formatting of your
-# copyright/licensestatements.
-first_comment_is_literal = False
+# ------------------------------------------------
+# Options affecting comment reflow and formatting.
+# ------------------------------------------------
+with section("markup"):
+    # enable comment markup parsing and reflow
+    enable_markup = False
 
-# If comment markup is enabled, don't reflow any comment block which matchesthis
-# (regex) pattern. Default is `None` (disabled).
-literal_comment_pattern = None
+    # If comment markup is enabled, don't reflow the first comment block in
+    # eachlistfile. Use this to preserve formatting of your
+    # copyright/licensestatements.
+    first_comment_is_literal = True
+
+    # If comment markup is enabled, don't reflow any comment block which
+    # matchesthis (regex) pattern. Default is `None` (disabled).
+    literal_comment_pattern = None
diff --git a/cpp/Brewfile b/cpp/Brewfile
index 7de6c7deabe..78ee5e64c8f 100644
--- a/cpp/Brewfile
+++ b/cpp/Brewfile
@@ -35,9 +35,6 @@ brew "openssl@1.1"
 brew "protobuf"
 brew "python"
 brew "rapidjson"
-# grpc bundles re2 and causes a conflict when Homebrew tries to install it,
-# so temporarily skip installing re2. See ARROW-9972.
-# brew "re2"
 brew "snappy"
 brew "thrift"
 brew "wget"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1705e854fb1..2bcdc0de179 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -47,13 +47,15 @@ if(POLICY CMP0074)
   cmake_policy(SET CMP0074 NEW)
 endif()
 
-set(ARROW_VERSION "4.0.0-SNAPSHOT")
+set(ARROW_VERSION "6.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
 # if no build build type is specified, default to release builds
 if(NOT DEFINED CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build.")
+  set(CMAKE_BUILD_TYPE
+      Release
+      CACHE STRING "Choose the type of build.")
 endif()
 string(TOLOWER ${CMAKE_BUILD_TYPE} LOWERCASE_BUILD_TYPE)
 string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
@@ -109,6 +111,7 @@ set(ARROW_CMAKE_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
 set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
 
 set(ARROW_LLVM_VERSIONS
+    "12.0"
     "11.1"
     "11.0"
     "10"
@@ -116,18 +119,15 @@ set(ARROW_LLVM_VERSIONS
     "8"
     "7")
 list(GET ARROW_LLVM_VERSIONS 0 ARROW_LLVM_VERSION_PRIMARY)
-string(REGEX
-       REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_LLVM_VERSION_PRIMARY_MAJOR
-               "${ARROW_LLVM_VERSION_PRIMARY}")
+string(REGEX REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_LLVM_VERSION_PRIMARY_MAJOR
+                     "${ARROW_LLVM_VERSION_PRIMARY}")
 
 file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../.env ARROW_ENV)
 string(REGEX MATCH "CLANG_TOOLS=[^\n]+" ARROW_ENV_CLANG_TOOLS_VERSION "${ARROW_ENV}")
-string(REGEX
-       REPLACE "^CLANG_TOOLS=" "" ARROW_CLANG_TOOLS_VERSION
-               "${ARROW_ENV_CLANG_TOOLS_VERSION}")
-string(REGEX
-       REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_CLANG_TOOLS_VERSION_MAJOR
-               "${ARROW_CLANG_TOOLS_VERSION}")
+string(REGEX REPLACE "^CLANG_TOOLS=" "" ARROW_CLANG_TOOLS_VERSION
+                     "${ARROW_ENV_CLANG_TOOLS_VERSION}")
+string(REGEX REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_CLANG_TOOLS_VERSION_MAJOR
+                     "${ARROW_CLANG_TOOLS_VERSION}")
 
 if(APPLE)
   find_program(BREW_BIN brew)
@@ -162,7 +162,9 @@ endif()
 
 find_package(ClangTools)
 find_package(InferTools)
-if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR CLANG_TIDY_FOUND OR INFER_FOUND)
+if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1"
+   OR CLANG_TIDY_FOUND
+   OR INFER_FOUND)
   # Generate a Clang compile_commands.json "compilation database" file for use
   # with various development tools, such as Vim's YouCompleteMe plugin.
   # See http://clang.llvm.org/docs/JSONCompilationDatabase.html
@@ -225,7 +227,9 @@ if(NOT LINT_EXCLUSIONS_FILE)
   set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
 endif()
 
-find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
+find_program(CPPLINT_BIN
+             NAMES cpplint cpplint.py
+             HINTS ${BUILD_SUPPORT_DIR})
 message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
 
 add_custom_target(lint
@@ -270,7 +274,7 @@ if(${CLANG_FORMAT_FOUND})
 endif()
 
 add_custom_target(lint_cpp_cli ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/lint_cpp_cli.py
-                  ${CMAKE_CURRENT_SOURCE_DIR}/src)
+                               ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
 if(ARROW_LINT_ONLY)
   message("ARROW_LINT_ONLY was specified, this is only a partial build directory")
@@ -342,6 +346,10 @@ if(ARROW_CUDA
   set(ARROW_IPC ON)
 endif()
 
+if(ARROW_ENGINE)
+  set(ARROW_COMPUTE ON)
+endif()
+
 if(ARROW_DATASET)
   set(ARROW_COMPUTE ON)
   set(ARROW_FILESYSTEM ON)
@@ -445,6 +453,26 @@ endif()
 
 include(SetupCxxFlags)
 
+#
+# Linker flags
+#
+
+# Localize thirdparty symbols using a linker version script. This hides them
+# from the client application. The OS X linker does not support the
+# version-script option.
+if(CMAKE_VERSION VERSION_LESS 3.18)
+  if(APPLE OR WIN32)
+    set(CXX_LINKER_SUPPORTS_VERSION_SCRIPT FALSE)
+  else()
+    set(CXX_LINKER_SUPPORTS_VERSION_SCRIPT TRUE)
+  endif()
+else()
+  include(CheckLinkerFlag)
+  check_linker_flag(CXX
+                    "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/arrow/symbols.map"
+                    CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
+endif()
+
 #
 # Build output directory
 #
@@ -464,10 +492,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
   if(NOT APPLE)
     set(MORE_ARGS "-T")
   endif()
-  execute_process(COMMAND ln
-                          ${MORE_ARGS}
-                          -sf
-                          ${BUILD_OUTPUT_ROOT_DIRECTORY}
+  execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
                           ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
 else()
   set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}/")
@@ -502,6 +527,11 @@ endif()
 include(BuildUtils)
 enable_testing()
 
+# For arrow.pc. Requires.private and Libs.private are used when
+# "pkg-config --libs --static arrow" is used.
+set(ARROW_PC_REQUIRES_PRIVATE)
+set(ARROW_PC_LIBS_PRIVATE)
+
 include(ThirdpartyToolchain)
 
 # Add common flags
@@ -545,12 +575,9 @@ include_directories(src/generated)
 #
 if(PARQUET_BUILD_SHARED)
   set_target_properties(arrow_shared
-                        PROPERTIES C_VISIBILITY_PRESET
-                                   hidden
-                                   CXX_VISIBILITY_PRESET
-                                   hidden
-                                   VISIBILITY_INLINES_HIDDEN
-                                   1)
+                        PROPERTIES C_VISIBILITY_PRESET hidden
+                                   CXX_VISIBILITY_PRESET hidden
+                                   VISIBILITY_INLINES_HIDDEN 1)
 endif()
 
 #
@@ -594,7 +621,9 @@ endif(UNIX)
 # "make cscope" target
 #
 if(UNIX)
-  add_custom_target(cscope find ${CMAKE_CURRENT_SOURCE_DIR}
+  add_custom_target(cscope
+                    find
+                    ${CMAKE_CURRENT_SOURCE_DIR}
                     (-name
                      \\*.cc
                      -or
@@ -631,23 +660,14 @@ endif(UNIX)
 
 if(${INFER_FOUND})
   # runs infer capture
-  add_custom_target(infer
-                    ${BUILD_SUPPORT_DIR}/run-infer.sh
-                    ${INFER_BIN}
-                    ${CMAKE_BINARY_DIR}/compile_commands.json
-                    1)
+  add_custom_target(infer ${BUILD_SUPPORT_DIR}/run-infer.sh ${INFER_BIN}
+                          ${CMAKE_BINARY_DIR}/compile_commands.json 1)
   # runs infer analyze
-  add_custom_target(infer-analyze
-                    ${BUILD_SUPPORT_DIR}/run-infer.sh
-                    ${INFER_BIN}
-                    ${CMAKE_BINARY_DIR}/compile_commands.json
-                    2)
+  add_custom_target(infer-analyze ${BUILD_SUPPORT_DIR}/run-infer.sh ${INFER_BIN}
+                                  ${CMAKE_BINARY_DIR}/compile_commands.json 2)
   # runs infer report
-  add_custom_target(infer-report
-                    ${BUILD_SUPPORT_DIR}/run-infer.sh
-                    ${INFER_BIN}
-                    ${CMAKE_BINARY_DIR}/compile_commands.json
-                    3)
+  add_custom_target(infer-report ${BUILD_SUPPORT_DIR}/run-infer.sh ${INFER_BIN}
+                                 ${CMAKE_BINARY_DIR}/compile_commands.json 3)
 endif()
 
 #
@@ -716,7 +736,7 @@ if(ARROW_ORC)
   list(APPEND ARROW_STATIC_LINK_LIBS orc::liborc ${ARROW_PROTOBUF_LIBPROTOBUF})
   if(ORC_SOURCE STREQUAL "SYSTEM")
     list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::liborc
-                ${ARROW_PROTOBUF_LIBPROTOBUF})
+         ${ARROW_PROTOBUF_LIBPROTOBUF})
   endif()
 endif()
 
@@ -860,8 +880,9 @@ endif()
 
 set(ARROW_SYSTEM_LINK_LIBS)
 
-if(THREADS_FOUND)
-  list(APPEND ARROW_SYSTEM_LINK_LIBS Threads::Threads)
+list(APPEND ARROW_SYSTEM_LINK_LIBS Threads::Threads)
+if(CMAKE_THREAD_LIBS_INIT)
+  string(APPEND ARROW_PC_LIBS_PRIVATE " ${CMAKE_THREAD_LIBS_INIT}")
 endif()
 
 if(WIN32)
@@ -911,8 +932,7 @@ endif()
 
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/../LICENSE.txt
               ${CMAKE_CURRENT_SOURCE_DIR}/../NOTICE.txt
-              ${CMAKE_CURRENT_SOURCE_DIR}/README.md
-        DESTINATION "${ARROW_DOC_DIR}")
+              ${CMAKE_CURRENT_SOURCE_DIR}/README.md DESTINATION "${ARROW_DOC_DIR}")
 
 #
 # Validate and print out Arrow configuration options
diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile
index f6b782276e3..d8b0928ed3c 100644
--- a/cpp/apidoc/Doxyfile
+++ b/cpp/apidoc/Doxyfile
@@ -2170,6 +2170,7 @@ PREDEFINED             = __attribute__(x)= \
                          __declspec(x)= \
                          PARQUET_EXPORT= \
                          ARROW_EXPORT= \
+                         ARROW_DS_EXPORT= \
                          ARROW_FLIGHT_EXPORT= \
                          ARROW_EXTERN_TEMPLATE= \
                          ARROW_DEPRECATED(x)=
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 2fd897b5d1d..cd8290d1bbb 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -62,17 +62,16 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
   if(ARG_STATIC_LIB AND ARG_SHARED_LIB)
     set(AUG_LIB_NAME "${LIB_NAME}_static")
     add_library(${AUG_LIB_NAME} STATIC IMPORTED)
-    set_target_properties(${AUG_LIB_NAME}
-                          PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                     "${ARG_STATIC_LIB}")
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     message(STATUS "Added static library dependency ${AUG_LIB_NAME}: ${ARG_STATIC_LIB}")
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
 
     set(AUG_LIB_NAME "${LIB_NAME}_shared")
@@ -80,36 +79,34 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
 
     if(WIN32)
       # Mark the ".lib" location as part of a Windows DLL
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_IMPLIB
+                                                       "${ARG_SHARED_LIB}")
     else()
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                       "${ARG_SHARED_LIB}")
     endif()
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     message(STATUS "Added shared library dependency ${AUG_LIB_NAME}: ${ARG_SHARED_LIB}")
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
   elseif(ARG_STATIC_LIB)
     set(AUG_LIB_NAME "${LIB_NAME}_static")
     add_library(${AUG_LIB_NAME} STATIC IMPORTED)
-    set_target_properties(${AUG_LIB_NAME}
-                          PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                     "${ARG_STATIC_LIB}")
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     message(STATUS "Added static library dependency ${AUG_LIB_NAME}: ${ARG_STATIC_LIB}")
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
   elseif(ARG_SHARED_LIB)
     set(AUG_LIB_NAME "${LIB_NAME}_shared")
@@ -117,21 +114,20 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
 
     if(WIN32)
       # Mark the ".lib" location as part of a Windows DLL
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_IMPLIB
+                                                       "${ARG_SHARED_LIB}")
     else()
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                       "${ARG_SHARED_LIB}")
     endif()
     message(STATUS "Added shared library dependency ${AUG_LIB_NAME}: ${ARG_SHARED_LIB}")
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
   else()
     message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
@@ -159,10 +155,9 @@ function(create_merged_static_lib output_target)
     message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
   endif()
 
-  set(
-    output_lib_path
-    ${BUILD_OUTPUT_ROOT_DIRECTORY}${CMAKE_STATIC_LIBRARY_PREFIX}${ARG_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}
-    )
+  set(output_lib_path
+      ${BUILD_OUTPUT_ROOT_DIRECTORY}${CMAKE_STATIC_LIBRARY_PREFIX}${ARG_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}
+  )
 
   set(all_library_paths $<TARGET_FILE:${ARG_ROOT}>)
   foreach(lib ${ARG_TO_MERGE})
@@ -170,13 +165,8 @@ function(create_merged_static_lib output_target)
   endforeach()
 
   if(APPLE)
-    set(BUNDLE_COMMAND
-        "libtool"
-        "-no_warning_for_no_symbols"
-        "-static"
-        "-o"
-        ${output_lib_path}
-        ${all_library_paths})
+    set(BUNDLE_COMMAND "libtool" "-no_warning_for_no_symbols" "-static" "-o"
+                       ${output_lib_path} ${all_library_paths})
   elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel)$")
     set(ar_script_path ${CMAKE_BINARY_DIR}/${ARG_NAME}.ar)
 
@@ -188,7 +178,9 @@ function(create_merged_static_lib output_target)
     endforeach()
 
     file(APPEND ${ar_script_path}.in "SAVE\nEND\n")
-    file(GENERATE OUTPUT ${ar_script_path} INPUT ${ar_script_path}.in)
+    file(GENERATE
+         OUTPUT ${ar_script_path}
+         INPUT ${ar_script_path}.in)
     set(ar_tool ${CMAKE_AR})
 
     if(CMAKE_INTERPROCEDURAL_OPTIMIZATION)
@@ -218,9 +210,8 @@ function(create_merged_static_lib output_target)
                      COMMENT "Bundling ${output_lib_path}"
                      VERBATIM)
 
-  message(
-    STATUS "Creating bundled static library target ${output_target} at ${output_lib_path}"
-    )
+  message(STATUS "Creating bundled static library target ${output_target} at ${output_lib_path}"
+  )
 
   add_custom_target(${output_target} ALL DEPENDS ${output_lib_path})
   add_dependencies(${output_target} ${ARG_ROOT} ${ARG_TO_MERGE})
@@ -355,7 +346,9 @@ function(ADD_ARROW_LIB LIB_NAME)
     endif()
 
     # On iOS, specifying -undefined conflicts with enabling bitcode
-    if(APPLE AND NOT IOS AND NOT DEFINED ENV{EMSCRIPTEN})
+    if(APPLE
+       AND NOT IOS
+       AND NOT DEFINED ENV{EMSCRIPTEN})
       # On OS X, you can avoid linking at library load time and instead
       # expecting that the symbols have been loaded separately. This happens
       # with libpython* where there can be conflicts between system Python and
@@ -367,20 +360,13 @@ function(ADD_ARROW_LIB LIB_NAME)
     endif()
 
     set_target_properties(${LIB_NAME}_shared
-                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY
-                                     "${OUTPUT_PATH}"
-                                     RUNTIME_OUTPUT_DIRECTORY
-                                     "${OUTPUT_PATH}"
-                                     PDB_OUTPUT_DIRECTORY
-                                     "${OUTPUT_PATH}"
-                                     LINK_FLAGS
-                                     "${ARG_SHARED_LINK_FLAGS}"
-                                     OUTPUT_NAME
-                                     ${LIB_NAME}
-                                     VERSION
-                                     "${ARROW_FULL_SO_VERSION}"
-                                     SOVERSION
-                                     "${ARROW_SO_VERSION}")
+                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     RUNTIME_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     PDB_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     LINK_FLAGS "${ARG_SHARED_LINK_FLAGS}"
+                                     OUTPUT_NAME ${LIB_NAME}
+                                     VERSION "${ARROW_FULL_SO_VERSION}"
+                                     SOVERSION "${ARROW_SO_VERSION}")
 
     target_link_libraries(${LIB_NAME}_shared
                           LINK_PUBLIC
@@ -395,8 +381,8 @@ function(ADD_ARROW_LIB LIB_NAME)
       else()
         set(_lib_install_rpath "\$ORIGIN")
       endif()
-      set_target_properties(${LIB_NAME}_shared
-                            PROPERTIES INSTALL_RPATH ${_lib_install_rpath})
+      set_target_properties(${LIB_NAME}_shared PROPERTIES INSTALL_RPATH
+                                                          ${_lib_install_rpath})
     endif()
 
     if(APPLE)
@@ -407,7 +393,7 @@ function(ADD_ARROW_LIB LIB_NAME)
       endif()
       set_target_properties(${LIB_NAME}_shared
                             PROPERTIES BUILD_WITH_INSTALL_RPATH ON INSTALL_NAME_DIR
-                                       "${_lib_install_name}")
+                                                                   "${_lib_install_name}")
     endif()
 
     install(TARGETS ${LIB_NAME}_shared ${INSTALL_IS_OPTIONAL}
@@ -415,7 +401,8 @@ function(ADD_ARROW_LIB LIB_NAME)
             RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR}
             LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
             ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+            INCLUDES
+            DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
   endif()
 
   if(BUILD_STATIC)
@@ -451,8 +438,8 @@ function(ADD_ARROW_LIB LIB_NAME)
     endif()
 
     set_target_properties(${LIB_NAME}_static
-                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OUTPUT_PATH}" OUTPUT_NAME
-                                     ${LIB_NAME_STATIC})
+                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     OUTPUT_NAME ${LIB_NAME_STATIC})
 
     if(ARG_STATIC_INSTALL_INTERFACE_LIBS)
       target_link_libraries(${LIB_NAME}_static LINK_PUBLIC
@@ -469,7 +456,8 @@ function(ADD_ARROW_LIB LIB_NAME)
             RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR}
             LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
             ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+            INCLUDES
+            DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
   endif()
 
   if(ARG_CMAKE_PACKAGE_NAME)
@@ -488,9 +476,10 @@ function(ADD_ARROW_LIB LIB_NAME)
 
     set(CONFIG_VERSION_CMAKE "${ARG_CMAKE_PACKAGE_NAME}ConfigVersion.cmake")
     set(BUILT_CONFIG_VERSION_CMAKE "${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_VERSION_CMAKE}")
-    write_basic_package_version_file("${BUILT_CONFIG_VERSION_CMAKE}"
-                                     VERSION ${${PROJECT_NAME}_VERSION}
-                                     COMPATIBILITY AnyNewerVersion)
+    write_basic_package_version_file(
+      "${BUILT_CONFIG_VERSION_CMAKE}"
+      VERSION ${${PROJECT_NAME}_VERSION}
+      COMPATIBILITY AnyNewerVersion)
     install(FILES "${BUILT_CONFIG_VERSION_CMAKE}"
             DESTINATION "${ARROW_CMAKE_INSTALL_DIR}")
   endif()
@@ -501,7 +490,9 @@ function(ADD_ARROW_LIB LIB_NAME)
 
   # Modify variable in calling scope
   if(ARG_OUTPUTS)
-    set(${ARG_OUTPUTS} ${${ARG_OUTPUTS}} PARENT_SCOPE)
+    set(${ARG_OUTPUTS}
+        ${${ARG_OUTPUTS}}
+        PARENT_SCOPE)
   endif()
 endfunction()
 
@@ -589,10 +580,8 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
   # installed there.
   if(NOT "$ENV{CONDA_PREFIX}" STREQUAL "" AND APPLE)
     set_target_properties(${BENCHMARK_NAME}
-                          PROPERTIES BUILD_WITH_INSTALL_RPATH
-                                     TRUE
-                                     INSTALL_RPATH_USE_LINK_PATH
-                                     TRUE
+                          PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
+                                     INSTALL_RPATH_USE_LINK_PATH TRUE
                                      INSTALL_RPATH
                                      "$ENV{CONDA_PREFIX}/lib;${EXECUTABLE_OUTPUT_PATH}")
   endif()
@@ -619,7 +608,9 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
            benchmark
            ${BENCHMARK_PATH}
            ${NO_COLOR})
-  set_property(TEST ${BENCHMARK_NAME} APPEND PROPERTY LABELS ${ARG_LABELS})
+  set_property(TEST ${BENCHMARK_NAME}
+               APPEND
+               PROPERTY LABELS ${ARG_LABELS})
 endfunction()
 
 #
@@ -699,10 +690,8 @@ function(ADD_TEST_CASE REL_TEST_NAME)
   # installed there.
   if(NOT "$ENV{CONDA_PREFIX}" STREQUAL "" AND APPLE)
     set_target_properties(${TEST_NAME}
-                          PROPERTIES BUILD_WITH_INSTALL_RPATH
-                                     TRUE
-                                     INSTALL_RPATH_USE_LINK_PATH
-                                     TRUE
+                          PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
+                                     INSTALL_RPATH_USE_LINK_PATH TRUE
                                      INSTALL_RPATH
                                      "${EXECUTABLE_OUTPUT_PATH};$ENV{CONDA_PREFIX}/lib")
   endif()
@@ -735,9 +724,10 @@ function(ADD_TEST_CASE REL_TEST_NAME)
   endif()
 
   if(ARROW_TEST_MEMCHECK AND NOT ARG_NO_VALGRIND)
-    add_test(
-      ${TEST_NAME} bash -c
-      "cd '${CMAKE_SOURCE_DIR}'; \
+    add_test(${TEST_NAME}
+             bash
+             -c
+             "cd '${CMAKE_SOURCE_DIR}'; \
                valgrind --suppressions=valgrind.supp --tool=memcheck --gen-suppressions=all \
                  --num-callers=500 --leak-check=full --leak-check-heuristics=stdstring \
                  --error-exitcode=1 ${TEST_PATH}")
@@ -773,17 +763,16 @@ function(ADD_TEST_CASE REL_TEST_NAME)
     set(LABEL_TEST_NAME "test-${LABEL}")
     if(NOT TARGET ${LABEL_TEST_NAME})
       add_custom_target(${LABEL_TEST_NAME}
-                        ctest
-                        -L
-                        "${LABEL}"
-                        --output-on-failure
+                        ctest -L "${LABEL}" --output-on-failure
                         USES_TERMINAL)
     endif()
     # ensure the test is (re)built before the LABEL test runs
     add_dependencies(${LABEL_TEST_NAME} ${TEST_NAME})
   endforeach()
 
-  set_property(TEST ${TEST_NAME} APPEND PROPERTY LABELS ${LABELS})
+  set_property(TEST ${TEST_NAME}
+               APPEND
+               PROPERTY LABELS ${LABELS})
 endfunction()
 
 #
@@ -896,8 +885,8 @@ function(ADD_FUZZ_TARGET REL_FUZZING_NAME)
   add_executable(${FUZZING_NAME} "${REL_FUZZING_NAME}.cc")
   target_link_libraries(${FUZZING_NAME} ${LINK_LIBS})
   target_compile_options(${FUZZING_NAME} PRIVATE ${FUZZ_LDFLAGS})
-  set_target_properties(${FUZZING_NAME}
-                        PROPERTIES LINK_FLAGS ${FUZZ_LDFLAGS} LABELS "fuzzing")
+  set_target_properties(${FUZZING_NAME} PROPERTIES LINK_FLAGS ${FUZZ_LDFLAGS} LABELS
+                                                                              "fuzzing")
 endfunction()
 
 function(ARROW_INSTALL_ALL_HEADERS PATH)
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 0e92811da8c..e2a85a4aa55 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -33,7 +33,9 @@ endfunction()
 
 function(list_join lst glue out)
   if("${${lst}}" STREQUAL "")
-    set(${out} "" PARENT_SCOPE)
+    set(${out}
+        ""
+        PARENT_SCOPE)
     return()
   endif()
 
@@ -42,7 +44,9 @@ function(list_join lst glue out)
   foreach(item ${${lst}})
     set(joined "${joined}${glue}${item}")
   endforeach()
-  set(${out} ${joined} PARENT_SCOPE)
+  set(${out}
+      ${joined}
+      PARENT_SCOPE)
 endfunction()
 
 macro(define_option name description default)
@@ -61,7 +65,9 @@ macro(define_option_string name description default)
   check_description_length(${name} ${description})
   list_join(description "\n" multiline_description)
 
-  set(${name} ${default} CACHE STRING "${multiline_description}")
+  set(${name}
+      ${default}
+      CACHE STRING "${multiline_description}")
 
   list(APPEND "ARROW_${ARROW_OPTION_CATEGORY}_OPTION_NAMES" ${name})
   set("${name}_OPTION_DESCRIPTION" ${description})
@@ -181,8 +187,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
 
   define_option(ARROW_ONLY_LINT "Only define the lint and check-format targets" OFF)
 
-  define_option(ARROW_VERBOSE_LINT "If off, 'quiet' flags will be passed to linting tools"
-                OFF)
+  define_option(ARROW_VERBOSE_LINT
+                "If off, 'quiet' flags will be passed to linting tools" OFF)
 
   define_option(ARROW_GENERATE_COVERAGE "Build with C++ code coverage enabled" OFF)
 
@@ -320,7 +326,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   define_option(ARROW_LZ4_USE_SHARED "Rely on lz4 shared libraries where relevant"
                 ${ARROW_DEPENDENCY_USE_SHARED})
 
-  define_option(ARROW_OPENSSL_USE_SHARED "Rely on OpenSSL shared libraries where relevant"
+  define_option(ARROW_OPENSSL_USE_SHARED
+                "Rely on OpenSSL shared libraries where relevant"
                 ${ARROW_DEPENDENCY_USE_SHARED})
 
   define_option(ARROW_PROTOBUF_USE_SHARED
@@ -363,14 +370,12 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   define_option(ARROW_WITH_ZLIB "Build with zlib compression" OFF)
   define_option(ARROW_WITH_ZSTD "Build with zstd compression" OFF)
 
-  define_option(
-    ARROW_WITH_UTF8PROC
-    "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON)"
-    ON)
-  define_option(
-    ARROW_WITH_RE2
-    "Build with support for regular expressions using the re2 library;(only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)"
-    ON)
+  define_option(ARROW_WITH_UTF8PROC
+                "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON or ARROW_GANDIVA is ON)"
+                ON)
+  define_option(ARROW_WITH_RE2
+                "Build with support for regular expressions using the re2 library;(only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)"
+                ON)
 
   #----------------------------------------------------------------------
   if(MSVC_TOOLCHAIN)
@@ -416,9 +421,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
                 "Depend only on Thirdparty headers to build libparquet.;\
 Always OFF if building binaries" OFF)
 
-  define_option(
-    PARQUET_BUILD_EXECUTABLES
-    "Build the Parquet executable CLI tools. Requires static libraries to be built." OFF)
+  define_option(PARQUET_BUILD_EXECUTABLES
+                "Build the Parquet executable CLI tools. Requires static libraries to be built."
+                OFF)
 
   define_option(PARQUET_BUILD_EXAMPLES
                 "Build the Parquet examples. Requires static libraries to be built." OFF)
@@ -432,10 +437,9 @@ Always OFF if building binaries" OFF)
   define_option(ARROW_GANDIVA_JAVA "Build the Gandiva JNI wrappers" OFF)
 
   # ARROW-3860: Temporary workaround
-  define_option(
-    ARROW_GANDIVA_STATIC_LIBSTDCPP
-    "Include -static-libstdc++ -static-libgcc when linking with;Gandiva static libraries"
-    OFF)
+  define_option(ARROW_GANDIVA_STATIC_LIBSTDCPP
+                "Include -static-libstdc++ -static-libgcc when linking with;Gandiva static libraries"
+                OFF)
 
   define_option_string(ARROW_GANDIVA_PC_CXX_FLAGS
                        "Compiler flags to append when pre-compiling Gandiva operations"
@@ -450,7 +454,8 @@ Always OFF if building binaries" OFF)
   define_option(ARROW_OPTIONAL_INSTALL
                 "If enabled install ONLY targets that have already been built. Please be;\
 advised that if this is enabled 'install' will fail silently on components;\
-that have not been built" OFF)
+that have not been built"
+                OFF)
 
   option(ARROW_BUILD_CONFIG_SUMMARY_JSON "Summarize build configuration in a JSON file"
          ON)
@@ -465,9 +470,8 @@ macro(validate_config)
       set(value "${${name}}")
       if(possible_values)
         if(NOT "${value}" IN_LIST possible_values)
-          message(
-            FATAL_ERROR "Configuration option ${name} got invalid value '${value}'. "
-                        "Allowed values: ${${name}_OPTION_ENUM}.")
+          message(FATAL_ERROR "Configuration option ${name} got invalid value '${value}'. "
+                              "Allowed values: ${${name}_OPTION_ENUM}.")
         endif()
       endif()
     endforeach()
@@ -486,8 +490,8 @@ macro(config_summary_message)
   message(STATUS "  Source directory: ${CMAKE_CURRENT_SOURCE_DIR}")
   message(STATUS "  Install prefix: ${CMAKE_INSTALL_PREFIX}")
   if(${CMAKE_EXPORT_COMPILE_COMMANDS})
-    message(
-      STATUS "  Compile commands: ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json")
+    message(STATUS "  Compile commands: ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json"
+    )
   endif()
 
   foreach(category ${ARROW_OPTION_CATEGORIES})
diff --git a/cpp/cmake_modules/FindArrow.cmake b/cpp/cmake_modules/FindArrow.cmake
index 9c987665896..68024cc2760 100644
--- a/cpp/cmake_modules/FindArrow.cmake
+++ b/cpp/cmake_modules/FindArrow.cmake
@@ -50,11 +50,12 @@ set(ARROW_SEARCH_LIB_PATH_SUFFIXES)
 if(CMAKE_LIBRARY_ARCHITECTURE)
   list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}")
 endif()
-list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES
-            "lib64"
-            "lib32"
-            "lib"
-            "bin")
+list(APPEND
+     ARROW_SEARCH_LIB_PATH_SUFFIXES
+     "lib64"
+     "lib32"
+     "lib"
+     "bin")
 set(ARROW_CONFIG_SUFFIXES
     "_RELEASE"
     "_RELWITHDEBINFO"
@@ -120,10 +121,9 @@ endfunction()
 #   # -> ARROW_STATIC_LIBRARY_NAME=arrow.lib with MSVC on Windows
 #   # -> ARROW_STATIC_LIBRARY_NAME=libarrow.dll.a with MinGW on Windows
 function(arrow_build_static_library_name output_variable base_name)
-  set(
-    ${output_variable}
-    "${CMAKE_STATIC_LIBRARY_PREFIX}${base_name}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    PARENT_SCOPE)
+  set(${output_variable}
+      "${CMAKE_STATIC_LIBRARY_PREFIX}${base_name}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+      PARENT_SCOPE)
 endfunction()
 
 # Internal function.
@@ -138,9 +138,11 @@ endfunction()
 function(arrow_extract_macro_value output_variable macro_name header_content)
   string(REGEX MATCH "#define +${macro_name} +[^\r\n]+" macro_definition
                "${header_content}")
-  string(REGEX
-         REPLACE "^#define +${macro_name} +(.+)$" "\\1" macro_value "${macro_definition}")
-  set(${output_variable} "${macro_value}" PARENT_SCOPE)
+  string(REGEX REPLACE "^#define +${macro_name} +(.+)$" "\\1" macro_value
+                       "${macro_definition}")
+  set(${output_variable}
+      "${macro_value}"
+      PARENT_SCOPE)
 endfunction()
 
 # Internal macro only for arrow_find_package.
@@ -152,7 +154,9 @@ macro(arrow_find_package_home)
             PATH_SUFFIXES "include"
             NO_DEFAULT_PATH)
   set(include_dir "${${prefix}_include_dir}")
-  set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE)
+  set(${prefix}_INCLUDE_DIR
+      "${include_dir}"
+      PARENT_SCOPE)
 
   if(MSVC_TOOLCHAIN)
     set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES})
@@ -169,13 +173,15 @@ macro(arrow_find_package_home)
     set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL})
   endif()
   set(shared_lib "${${prefix}_shared_lib}")
-  set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE)
+  set(${prefix}_SHARED_LIB
+      "${shared_lib}"
+      PARENT_SCOPE)
   if(shared_lib)
     add_library(${target_shared} SHARED IMPORTED)
     set_target_properties(${target_shared} PROPERTIES IMPORTED_LOCATION "${shared_lib}")
     if(include_dir)
-      set_target_properties(${target_shared}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}")
+      set_target_properties(${target_shared} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                        "${include_dir}")
     endif()
     find_library(${prefix}_import_lib
                  NAMES "${import_lib_name}"
@@ -183,7 +189,9 @@ macro(arrow_find_package_home)
                  PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
                  NO_DEFAULT_PATH)
     set(import_lib "${${prefix}_import_lib}")
-    set(${prefix}_IMPORT_LIB "${import_lib}" PARENT_SCOPE)
+    set(${prefix}_IMPORT_LIB
+        "${import_lib}"
+        PARENT_SCOPE)
     if(import_lib)
       set_target_properties(${target_shared} PROPERTIES IMPORTED_IMPLIB "${import_lib}")
     endif()
@@ -195,13 +203,15 @@ macro(arrow_find_package_home)
                PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
                NO_DEFAULT_PATH)
   set(static_lib "${${prefix}_static_lib}")
-  set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE)
+  set(${prefix}_STATIC_LIB
+      "${static_lib}"
+      PARENT_SCOPE)
   if(static_lib)
     add_library(${target_static} STATIC IMPORTED)
     set_target_properties(${target_static} PROPERTIES IMPORTED_LOCATION "${static_lib}")
     if(include_dir)
-      set_target_properties(${target_static}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}")
+      set_target_properties(${target_static} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                        "${include_dir}")
     endif()
   endif()
 endmacro()
@@ -212,7 +222,9 @@ endmacro()
 macro(arrow_find_package_cmake_package_configuration)
   find_package(${cmake_package_name} CONFIG)
   if(${cmake_package_name}_FOUND)
-    set(${prefix}_USE_CMAKE_PACKAGE_CONFIG TRUE PARENT_SCOPE)
+    set(${prefix}_USE_CMAKE_PACKAGE_CONFIG
+        TRUE
+        PARENT_SCOPE)
     if(TARGET ${target_shared})
       foreach(suffix ${ARROW_CONFIG_SUFFIXES})
         get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION${suffix})
@@ -221,10 +233,11 @@ macro(arrow_find_package_cmake_package_configuration)
           #   libarrow.so.100.0.0 -> libarrow.so
           # Because ARROW_HOME and pkg-config approaches don't add
           # shared library version.
-          string(REGEX
-                 REPLACE "(${CMAKE_SHARED_LIBRARY_SUFFIX})[.0-9]+$" "\\1" shared_lib
-                         "${shared_lib}")
-          set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE)
+          string(REGEX REPLACE "(${CMAKE_SHARED_LIBRARY_SUFFIX})[.0-9]+$" "\\1"
+                               shared_lib "${shared_lib}")
+          set(${prefix}_SHARED_LIB
+              "${shared_lib}"
+              PARENT_SCOPE)
           break()
         endif()
       endforeach()
@@ -233,7 +246,9 @@ macro(arrow_find_package_cmake_package_configuration)
       foreach(suffix ${ARROW_CONFIG_SUFFIXES})
         get_target_property(static_lib ${target_static} IMPORTED_LOCATION${suffix})
         if(static_lib)
-          set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE)
+          set(${prefix}_STATIC_LIB
+              "${static_lib}"
+              PARENT_SCOPE)
           break()
         endif()
       endforeach()
@@ -247,7 +262,9 @@ endmacro()
 macro(arrow_find_package_pkg_config)
   pkg_check_modules(${prefix}_PC ${pkg_config_name})
   if(${prefix}_PC_FOUND)
-    set(${prefix}_USE_PKG_CONFIG TRUE PARENT_SCOPE)
+    set(${prefix}_USE_PKG_CONFIG
+        TRUE
+        PARENT_SCOPE)
 
     set(include_dir "${${prefix}_PC_INCLUDEDIR}")
     set(lib_dir "${${prefix}_PC_LIBDIR}")
@@ -270,18 +287,21 @@ macro(arrow_find_package_pkg_config)
            rest_shared_lib_paths)
     endif()
 
-    set(${prefix}_VERSION "${${prefix}_PC_VERSION}" PARENT_SCOPE)
-    set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE)
-    set(${prefix}_SHARED_LIB "${first_shared_lib_path}" PARENT_SCOPE)
+    set(${prefix}_VERSION
+        "${${prefix}_PC_VERSION}"
+        PARENT_SCOPE)
+    set(${prefix}_INCLUDE_DIR
+        "${include_dir}"
+        PARENT_SCOPE)
+    set(${prefix}_SHARED_LIB
+        "${first_shared_lib_path}"
+        PARENT_SCOPE)
 
     add_library(${target_shared} SHARED IMPORTED)
     set_target_properties(${target_shared}
-                          PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                     "${include_dir}"
-                                     INTERFACE_LINK_LIBRARIES
-                                     "${rest_shared_lib_paths}"
-                                     IMPORTED_LOCATION
-                                     "${first_shared_lib_path}")
+                          PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}"
+                                     INTERFACE_LINK_LIBRARIES "${rest_shared_lib_paths}"
+                                     IMPORTED_LOCATION "${first_shared_lib_path}")
     get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION)
 
     find_library(${prefix}_static_lib
@@ -289,7 +309,9 @@ macro(arrow_find_package_pkg_config)
                  PATHS "${lib_dir}"
                  NO_DEFAULT_PATH)
     set(static_lib "${${prefix}_static_lib}")
-    set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE)
+    set(${prefix}_STATIC_LIB
+        "${static_lib}"
+        PARENT_SCOPE)
     if(static_lib)
       add_library(${target_static} STATIC IMPORTED)
       set_target_properties(${target_static}
@@ -315,7 +337,9 @@ function(arrow_find_package
 
   if(home)
     arrow_find_package_home()
-    set(${prefix}_FIND_APPROACH "HOME: ${home}" PARENT_SCOPE)
+    set(${prefix}_FIND_APPROACH
+        "HOME: ${home}"
+        PARENT_SCOPE)
   else()
     arrow_find_package_cmake_package_configuration()
     if(${cmake_package_name}_FOUND)
@@ -324,7 +348,9 @@ function(arrow_find_package
           PARENT_SCOPE)
     else()
       arrow_find_package_pkg_config()
-      set(${prefix}_FIND_APPROACH "pkg-config: ${pkg_config_name}" PARENT_SCOPE)
+      set(${prefix}_FIND_APPROACH
+          "pkg-config: ${pkg_config_name}"
+          PARENT_SCOPE)
     endif()
   endif()
 
@@ -336,7 +362,9 @@ function(arrow_find_package
     endif()
   endif()
   if(include_dir)
-    set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE)
+    set(${prefix}_INCLUDE_DIR
+        "${include_dir}"
+        PARENT_SCOPE)
   endif()
 
   if(shared_lib)
@@ -346,9 +374,13 @@ function(arrow_find_package
   else()
     set(lib_dir NOTFOUND)
   endif()
-  set(${prefix}_LIB_DIR "${lib_dir}" PARENT_SCOPE)
+  set(${prefix}_LIB_DIR
+      "${lib_dir}"
+      PARENT_SCOPE)
   # For backward compatibility
-  set(${prefix}_LIBS "${lib_dir}" PARENT_SCOPE)
+  set(${prefix}_LIBS
+      "${lib_dir}"
+      PARENT_SCOPE)
 endfunction()
 
 if(NOT "$ENV{ARROW_HOME}" STREQUAL "")
@@ -384,9 +416,8 @@ if(ARROW_HOME)
     string(REGEX REPLACE "^\"(.+)\"$" "\\1" ARROW_SO_VERSION "${ARROW_SO_VERSION_QUOTED}")
     arrow_extract_macro_value(ARROW_FULL_SO_VERSION_QUOTED "ARROW_FULL_SO_VERSION"
                               "${ARROW_CONFIG_H_CONTENT}")
-    string(REGEX
-           REPLACE "^\"(.+)\"$" "\\1" ARROW_FULL_SO_VERSION
-                   "${ARROW_FULL_SO_VERSION_QUOTED}")
+    string(REGEX REPLACE "^\"(.+)\"$" "\\1" ARROW_FULL_SO_VERSION
+                         "${ARROW_FULL_SO_VERSION_QUOTED}")
   endif()
 else()
   if(ARROW_USE_CMAKE_PACKAGE_CONFIG)
@@ -416,16 +447,13 @@ mark_as_advanced(ARROW_ABI_VERSION
                  ARROW_VERSION_MINOR
                  ARROW_VERSION_PATCH)
 
-find_package_handle_standard_args(Arrow REQUIRED_VARS
-                                  # The first required variable is shown
-                                  # in the found message. So this list is
-                                  # not sorted alphabetically.
-                                  ARROW_INCLUDE_DIR
-                                  ARROW_LIB_DIR
-                                  ARROW_FULL_SO_VERSION
-                                  ARROW_SO_VERSION
-                                  VERSION_VAR
-                                  ARROW_VERSION)
+find_package_handle_standard_args(
+  Arrow
+  REQUIRED_VARS # The first required variable is shown
+                # in the found message. So this list is
+                # not sorted alphabetically.
+                ARROW_INCLUDE_DIR ARROW_LIB_DIR ARROW_FULL_SO_VERSION ARROW_SO_VERSION
+  VERSION_VAR ARROW_VERSION)
 set(ARROW_FOUND ${Arrow_FOUND})
 
 if(Arrow_FOUND AND NOT Arrow_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowCUDA.cmake b/cpp/cmake_modules/FindArrowCUDA.cmake
index 7bc2f5b745b..014386f3012 100644
--- a/cpp/cmake_modules/FindArrowCUDA.cmake
+++ b/cpp/cmake_modules/FindArrowCUDA.cmake
@@ -74,13 +74,10 @@ mark_as_advanced(ARROW_CUDA_IMPORT_LIB
                  ARROW_CUDA_VERSION
                  ARROW_CUDA_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowCUDA
-                                  REQUIRED_VARS
-                                  ARROW_CUDA_INCLUDE_DIR
-                                  ARROW_CUDA_LIB_DIR
-                                  ARROW_CUDA_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_CUDA_VERSION)
+find_package_handle_standard_args(
+  ArrowCUDA
+  REQUIRED_VARS ARROW_CUDA_INCLUDE_DIR ARROW_CUDA_LIB_DIR ARROW_CUDA_VERSION_MATCH
+  VERSION_VAR ARROW_CUDA_VERSION)
 set(ARROW_CUDA_FOUND ${ArrowCUDA_FOUND})
 
 if(ArrowCUDA_FOUND AND NOT ArrowCUDA_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowDataset.cmake b/cpp/cmake_modules/FindArrowDataset.cmake
index d45fae6799b..fe74f247fc3 100644
--- a/cpp/cmake_modules/FindArrowDataset.cmake
+++ b/cpp/cmake_modules/FindArrowDataset.cmake
@@ -74,13 +74,11 @@ mark_as_advanced(ARROW_DATASET_IMPORT_LIB
                  ARROW_DATASET_VERSION
                  ARROW_DATASET_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowDataset
-                                  REQUIRED_VARS
-                                  ARROW_DATASET_INCLUDE_DIR
-                                  ARROW_DATASET_LIB_DIR
-                                  ARROW_DATASET_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_DATASET_VERSION)
+find_package_handle_standard_args(
+  ArrowDataset
+  REQUIRED_VARS ARROW_DATASET_INCLUDE_DIR ARROW_DATASET_LIB_DIR
+                ARROW_DATASET_VERSION_MATCH
+  VERSION_VAR ARROW_DATASET_VERSION)
 set(ARROW_DATASET_FOUND ${ArrowDataset_FOUND})
 
 if(ArrowDataset_FOUND AND NOT ArrowDataset_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowFlight.cmake b/cpp/cmake_modules/FindArrowFlight.cmake
index 344c408995c..805a4ff3803 100644
--- a/cpp/cmake_modules/FindArrowFlight.cmake
+++ b/cpp/cmake_modules/FindArrowFlight.cmake
@@ -75,13 +75,10 @@ mark_as_advanced(ARROW_FLIGHT_IMPORT_LIB
                  ARROW_FLIGHT_VERSION
                  ARROW_FLIGHT_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowFlight
-                                  REQUIRED_VARS
-                                  ARROW_FLIGHT_INCLUDE_DIR
-                                  ARROW_FLIGHT_LIB_DIR
-                                  ARROW_FLIGHT_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_FLIGHT_VERSION)
+find_package_handle_standard_args(
+  ArrowFlight
+  REQUIRED_VARS ARROW_FLIGHT_INCLUDE_DIR ARROW_FLIGHT_LIB_DIR ARROW_FLIGHT_VERSION_MATCH
+  VERSION_VAR ARROW_FLIGHT_VERSION)
 set(ARROW_FLIGHT_FOUND ${ArrowFlight_FOUND})
 
 if(ArrowFlight_FOUND AND NOT ArrowFlight_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowFlightTesting.cmake b/cpp/cmake_modules/FindArrowFlightTesting.cmake
index feb2790dfc6..c0756cf637c 100644
--- a/cpp/cmake_modules/FindArrowFlightTesting.cmake
+++ b/cpp/cmake_modules/FindArrowFlightTesting.cmake
@@ -79,25 +79,20 @@ mark_as_advanced(ARROW_FLIGHT_TESTING_IMPORT_LIB
                  ARROW_FLIGHT_TESTING_VERSION
                  ARROW_FLIGHT_TESTING_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowFlightTesting
-                                  REQUIRED_VARS
-                                  ARROW_FLIGHT_TESTING_INCLUDE_DIR
-                                  ARROW_FLIGHT_TESTING_LIB_DIR
-                                  ARROW_FLIGHT_TESTING_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_FLIGHT_TESTING_VERSION)
+find_package_handle_standard_args(
+  ArrowFlightTesting
+  REQUIRED_VARS ARROW_FLIGHT_TESTING_INCLUDE_DIR ARROW_FLIGHT_TESTING_LIB_DIR
+                ARROW_FLIGHT_TESTING_VERSION_MATCH
+  VERSION_VAR ARROW_FLIGHT_TESTING_VERSION)
 set(ARROW_FLIGHT_TESTING_FOUND ${ArrowFlightTesting_FOUND})
 
 if(ArrowFlightTesting_FOUND AND NOT ArrowFlightTesting_FIND_QUIETLY)
-  message(
-    STATUS "Found the Arrow Flight testing by ${ARROW_FLIGHT_TESTING_FIND_APPROACH}")
-  message(
-    STATUS
-      "Found the Arrow Flight testing shared library: ${ARROW_FLIGHT_TESTING_SHARED_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Flight testing import library: ${ARROW_FLIGHT_TESTING_IMPORT_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Flight testing static library: ${ARROW_FLIGHT_TESTING_STATIC_LIB}")
+  message(STATUS "Found the Arrow Flight testing by ${ARROW_FLIGHT_TESTING_FIND_APPROACH}"
+  )
+  message(STATUS "Found the Arrow Flight testing shared library: ${ARROW_FLIGHT_TESTING_SHARED_LIB}"
+  )
+  message(STATUS "Found the Arrow Flight testing import library: ${ARROW_FLIGHT_TESTING_IMPORT_LIB}"
+  )
+  message(STATUS "Found the Arrow Flight testing static library: ${ARROW_FLIGHT_TESTING_STATIC_LIB}"
+  )
 endif()
diff --git a/cpp/cmake_modules/FindArrowPython.cmake b/cpp/cmake_modules/FindArrowPython.cmake
index 3d1280dff72..b503e6a9e02 100644
--- a/cpp/cmake_modules/FindArrowPython.cmake
+++ b/cpp/cmake_modules/FindArrowPython.cmake
@@ -73,13 +73,10 @@ mark_as_advanced(ARROW_PYTHON_IMPORT_LIB
                  ARROW_PYTHON_VERSION
                  ARROW_PYTHON_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowPython
-                                  REQUIRED_VARS
-                                  ARROW_PYTHON_INCLUDE_DIR
-                                  ARROW_PYTHON_LIB_DIR
-                                  ARROW_PYTHON_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_PYTHON_VERSION)
+find_package_handle_standard_args(
+  ArrowPython
+  REQUIRED_VARS ARROW_PYTHON_INCLUDE_DIR ARROW_PYTHON_LIB_DIR ARROW_PYTHON_VERSION_MATCH
+  VERSION_VAR ARROW_PYTHON_VERSION)
 set(ARROW_PYTHON_FOUND ${ArrowPython_FOUND})
 
 if(ArrowPython_FOUND AND NOT ArrowPython_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowPythonFlight.cmake b/cpp/cmake_modules/FindArrowPythonFlight.cmake
index acb22c64231..3a639928ce5 100644
--- a/cpp/cmake_modules/FindArrowPythonFlight.cmake
+++ b/cpp/cmake_modules/FindArrowPythonFlight.cmake
@@ -76,24 +76,19 @@ mark_as_advanced(ARROW_PYTHON_FLIGHT_IMPORT_LIB
                  ARROW_PYTHON_FLIGHT_VERSION
                  ARROW_PYTHON_FLIGHT_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowPythonFlight
-                                  REQUIRED_VARS
-                                  ARROW_PYTHON_FLIGHT_INCLUDE_DIR
-                                  ARROW_PYTHON_FLIGHT_LIB_DIR
-                                  ARROW_PYTHON_FLIGHT_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_PYTHON_FLIGHT_VERSION)
+find_package_handle_standard_args(
+  ArrowPythonFlight
+  REQUIRED_VARS ARROW_PYTHON_FLIGHT_INCLUDE_DIR ARROW_PYTHON_FLIGHT_LIB_DIR
+                ARROW_PYTHON_FLIGHT_VERSION_MATCH
+  VERSION_VAR ARROW_PYTHON_FLIGHT_VERSION)
 set(ARROW_PYTHON_FLIGHT_FOUND ${ArrowPythonFlight_FOUND})
 
 if(ArrowPythonFlight_FOUND AND NOT ArrowPythonFlight_FIND_QUIETLY)
   message(STATUS "Found the Arrow Python Flight by ${ARROW_PYTHON_FLIGHT_FIND_APPROACH}")
-  message(
-    STATUS
-      "Found the Arrow Python Flight shared library: ${ARROW_PYTHON_FLIGHT_SHARED_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Python Flight import library: ${ARROW_PYTHON_FLIGHT_IMPORT_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Python Flight static library: ${ARROW_PYTHON_FLIGHT_STATIC_LIB}")
+  message(STATUS "Found the Arrow Python Flight shared library: ${ARROW_PYTHON_FLIGHT_SHARED_LIB}"
+  )
+  message(STATUS "Found the Arrow Python Flight import library: ${ARROW_PYTHON_FLIGHT_IMPORT_LIB}"
+  )
+  message(STATUS "Found the Arrow Python Flight static library: ${ARROW_PYTHON_FLIGHT_STATIC_LIB}"
+  )
 endif()
diff --git a/cpp/cmake_modules/FindArrowTesting.cmake b/cpp/cmake_modules/FindArrowTesting.cmake
index ed5a28cd3e4..c405003ad70 100644
--- a/cpp/cmake_modules/FindArrowTesting.cmake
+++ b/cpp/cmake_modules/FindArrowTesting.cmake
@@ -74,13 +74,11 @@ mark_as_advanced(ARROW_TESTING_IMPORT_LIB
                  ARROW_TESTING_VERSION
                  ARROW_TESTING_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowTesting
-                                  REQUIRED_VARS
-                                  ARROW_TESTING_INCLUDE_DIR
-                                  ARROW_TESTING_LIB_DIR
-                                  ARROW_TESTING_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_TESTING_VERSION)
+find_package_handle_standard_args(
+  ArrowTesting
+  REQUIRED_VARS ARROW_TESTING_INCLUDE_DIR ARROW_TESTING_LIB_DIR
+                ARROW_TESTING_VERSION_MATCH
+  VERSION_VAR ARROW_TESTING_VERSION)
 set(ARROW_TESTING_FOUND ${ArrowTesting_FOUND})
 
 if(ArrowTesting_FOUND AND NOT ArrowTesting_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindBrotli.cmake b/cpp/cmake_modules/FindBrotli.cmake
index b46a0f1a0cf..e2670b51a9e 100644
--- a/cpp/cmake_modules/FindBrotli.cmake
+++ b/cpp/cmake_modules/FindBrotli.cmake
@@ -110,12 +110,9 @@ else()
   endif()
 endif()
 
-find_package_handle_standard_args(Brotli
-                                  REQUIRED_VARS
-                                  BROTLI_COMMON_LIBRARY
-                                  BROTLI_ENC_LIBRARY
-                                  BROTLI_DEC_LIBRARY
-                                  BROTLI_INCLUDE_DIR)
+find_package_handle_standard_args(
+  Brotli REQUIRED_VARS BROTLI_COMMON_LIBRARY BROTLI_ENC_LIBRARY BROTLI_DEC_LIBRARY
+                       BROTLI_INCLUDE_DIR)
 if(Brotli_FOUND OR BROTLI_FOUND)
   set(Brotli_FOUND TRUE)
   add_library(Brotli::brotlicommon UNKNOWN IMPORTED)
diff --git a/cpp/cmake_modules/FindClangTools.cmake b/cpp/cmake_modules/FindClangTools.cmake
index 88171abed92..52fc59895b8 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -69,15 +69,18 @@ function(FIND_CLANG_TOOL NAME OUTPUT VERSION_CHECK_PATTERN)
     endif()
   endif()
   if(CLANG_TOOL_BIN)
-    set(${OUTPUT} ${CLANG_TOOL_BIN} PARENT_SCOPE)
+    set(${OUTPUT}
+        ${CLANG_TOOL_BIN}
+        PARENT_SCOPE)
   else()
-    set(${OUTPUT} "${OUTPUT}-NOTFOUND" PARENT_SCOPE)
+    set(${OUTPUT}
+        "${OUTPUT}-NOTFOUND"
+        PARENT_SCOPE)
   endif()
 endfunction()
 
-string(REGEX
-       REPLACE "\\." "\\\\." ARROW_CLANG_TOOLS_VERSION_ESCAPED
-               "${ARROW_CLANG_TOOLS_VERSION}")
+string(REGEX REPLACE "\\." "\\\\." ARROW_CLANG_TOOLS_VERSION_ESCAPED
+                     "${ARROW_CLANG_TOOLS_VERSION}")
 
 find_clang_tool(clang-tidy CLANG_TIDY_BIN
                 "LLVM version ${ARROW_CLANG_TOOLS_VERSION_ESCAPED}")
@@ -100,4 +103,4 @@ else()
 endif()
 
 find_package_handle_standard_args(ClangTools REQUIRED_VARS CLANG_FORMAT_BIN
-                                  CLANG_TIDY_BIN)
+                                                           CLANG_TIDY_BIN)
diff --git a/cpp/cmake_modules/FindGLOG.cmake b/cpp/cmake_modules/FindGLOG.cmake
index 81c3f2ec57e..d67eb005621 100644
--- a/cpp/cmake_modules/FindGLOG.cmake
+++ b/cpp/cmake_modules/FindGLOG.cmake
@@ -38,7 +38,9 @@ elseif(GLOG_ROOT)
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 else()
-  find_library(GLOG_LIB NAMES glog PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_library(GLOG_LIB
+               NAMES glog
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
   find_path(GLOG_INCLUDE_DIR
             NAMES glog/logging.h
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
diff --git a/cpp/cmake_modules/FindGandiva.cmake b/cpp/cmake_modules/FindGandiva.cmake
index 15279fd841a..c533abed733 100644
--- a/cpp/cmake_modules/FindGandiva.cmake
+++ b/cpp/cmake_modules/FindGandiva.cmake
@@ -79,14 +79,11 @@ mark_as_advanced(GANDIVA_ABI_VERSION
                  GANDIVA_VERSION
                  GANDIVA_VERSION_MATCH)
 
-find_package_handle_standard_args(Gandiva
-                                  REQUIRED_VARS
-                                  GANDIVA_INCLUDE_DIR
-                                  GANDIVA_LIB_DIR
-                                  GANDIVA_SO_VERSION
-                                  GANDIVA_VERSION_MATCH
-                                  VERSION_VAR
-                                  GANDIVA_VERSION)
+find_package_handle_standard_args(
+  Gandiva
+  REQUIRED_VARS GANDIVA_INCLUDE_DIR GANDIVA_LIB_DIR GANDIVA_SO_VERSION
+                GANDIVA_VERSION_MATCH
+  VERSION_VAR GANDIVA_VERSION)
 set(GANDIVA_FOUND ${Gandiva_FOUND})
 
 if(Gandiva_FOUND AND NOT Gandiva_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake
index 7695c09ae8c..380f2d47c72 100644
--- a/cpp/cmake_modules/FindLLVMAlt.cmake
+++ b/cpp/cmake_modules/FindLLVMAlt.cmake
@@ -58,22 +58,17 @@ if(LLVM_FOUND)
   add_library(LLVM::LLVM_INTERFACE INTERFACE IMPORTED)
 
   set_target_properties(LLVM::LLVM_INTERFACE
-                        PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                   "${LLVM_INCLUDE_DIRS}"
-                                   INTERFACE_COMPILE_FLAGS
-                                   "${LLVM_DEFINITIONS}"
-                                   INTERFACE_LINK_LIBRARIES
-                                   "${LLVM_LIBS}")
+                        PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LLVM_INCLUDE_DIRS}"
+                                   INTERFACE_COMPILE_FLAGS "${LLVM_DEFINITIONS}"
+                                   INTERFACE_LINK_LIBRARIES "${LLVM_LIBS}")
 endif()
 
 mark_as_advanced(CLANG_EXECUTABLE LLVM_LINK_EXECUTABLE)
 
-find_package_handle_standard_args(LLVMAlt
-                                  REQUIRED_VARS # The first variable is used for display.
-                                  LLVM_PACKAGE_VERSION
-                                  CLANG_EXECUTABLE
-                                  LLVM_FOUND
-                                  LLVM_LINK_EXECUTABLE)
+find_package_handle_standard_args(
+  LLVMAlt
+  REQUIRED_VARS # The first variable is used for display.
+                LLVM_PACKAGE_VERSION CLANG_EXECUTABLE LLVM_FOUND LLVM_LINK_EXECUTABLE)
 if(LLVMAlt_FOUND)
   message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
   message(STATUS "Found llvm-link ${LLVM_LINK_EXECUTABLE}")
diff --git a/cpp/cmake_modules/FindLz4.cmake b/cpp/cmake_modules/FindLz4.cmake
index 14b6d93b983..bc8051fe9c5 100644
--- a/cpp/cmake_modules/FindLz4.cmake
+++ b/cpp/cmake_modules/FindLz4.cmake
@@ -23,16 +23,13 @@ set(LZ4_LIB_NAME_BASE "${LZ4_MSVC_LIB_PREFIX}lz4")
 if(ARROW_LZ4_USE_SHARED)
   set(LZ4_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(
-      APPEND
-        LZ4_LIB_NAMES
-        "${CMAKE_IMPORT_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
+    list(APPEND
+         LZ4_LIB_NAMES
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
+    )
   endif()
-  list(
-    APPEND
-      LZ4_LIB_NAMES
-      "${CMAKE_SHARED_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  list(APPEND LZ4_LIB_NAMES
+       "${CMAKE_SHARED_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}")
 else()
   if(MSVC AND NOT DEFINED LZ4_MSVC_STATIC_LIB_SUFFIX)
     set(LZ4_MSVC_STATIC_LIB_SUFFIX "_static")
@@ -70,7 +67,9 @@ else()
     find_library(LZ4_LIB
                  NAMES ${LZ4_LIB_NAMES}
                  PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_path(LZ4_INCLUDE_DIR NAMES lz4.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+    find_path(LZ4_INCLUDE_DIR
+              NAMES lz4.h
+              PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
   endif()
 endif()
 
diff --git a/cpp/cmake_modules/FindORC.cmake b/cpp/cmake_modules/FindORC.cmake
index 061a0df2e9e..d45b1607833 100644
--- a/cpp/cmake_modules/FindORC.cmake
+++ b/cpp/cmake_modules/FindORC.cmake
@@ -33,7 +33,9 @@ if(ORC_ROOT)
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 else()
-  find_library(ORC_STATIC_LIB NAMES orc PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_library(ORC_STATIC_LIB
+               NAMES orc
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
   find_path(ORC_INCLUDE_DIR
             NAMES orc/orc-config.hh
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
diff --git a/cpp/cmake_modules/FindParquet.cmake b/cpp/cmake_modules/FindParquet.cmake
index 99124b2c037..e071fc822b6 100644
--- a/cpp/cmake_modules/FindParquet.cmake
+++ b/cpp/cmake_modules/FindParquet.cmake
@@ -83,13 +83,12 @@ if(ARROW_FOUND)
 
       arrow_extract_macro_value(PARQUET_SO_VERSION_QUOTED "PARQUET_SO_VERSION"
                                 "${PARQUET_VERSION_H_CONTENT}")
-      string(REGEX
-             REPLACE "^\"(.+)\"$" "\\1" PARQUET_SO_VERSION "${PARQUET_SO_VERSION_QUOTED}")
+      string(REGEX REPLACE "^\"(.+)\"$" "\\1" PARQUET_SO_VERSION
+                           "${PARQUET_SO_VERSION_QUOTED}")
       arrow_extract_macro_value(PARQUET_FULL_SO_VERSION_QUOTED "PARQUET_FULL_SO_VERSION"
                                 "${PARQUET_VERSION_H_CONTENT}")
-      string(REGEX
-             REPLACE "^\"(.+)\"$" "\\1" PARQUET_FULL_SO_VERSION
-                     "${PARQUET_FULL_SO_VERSION_QUOTED}")
+      string(REGEX REPLACE "^\"(.+)\"$" "\\1" PARQUET_FULL_SO_VERSION
+                           "${PARQUET_FULL_SO_VERSION_QUOTED}")
     endif()
   else()
     if(PARQUET_USE_CMAKE_PACKAGE_CONFIG)
@@ -113,13 +112,10 @@ mark_as_advanced(PARQUET_ABI_VERSION
                  PARQUET_STATIC_LIB
                  PARQUET_VERSION)
 
-find_package_handle_standard_args(Parquet
-                                  REQUIRED_VARS
-                                  PARQUET_INCLUDE_DIR
-                                  PARQUET_LIB_DIR
-                                  PARQUET_SO_VERSION
-                                  VERSION_VAR
-                                  PARQUET_VERSION)
+find_package_handle_standard_args(
+  Parquet
+  REQUIRED_VARS PARQUET_INCLUDE_DIR PARQUET_LIB_DIR PARQUET_SO_VERSION
+  VERSION_VAR PARQUET_VERSION)
 set(PARQUET_FOUND ${Parquet_FOUND})
 
 if(Parquet_FOUND AND NOT Parquet_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindPlasma.cmake b/cpp/cmake_modules/FindPlasma.cmake
index d56b7141320..2e634844c59 100644
--- a/cpp/cmake_modules/FindPlasma.cmake
+++ b/cpp/cmake_modules/FindPlasma.cmake
@@ -87,14 +87,10 @@ mark_as_advanced(PLASMA_ABI_VERSION
                  PLASMA_STORE_SERVER
                  PLASMA_VERSION)
 
-find_package_handle_standard_args(Plasma
-                                  REQUIRED_VARS
-                                  PLASMA_INCLUDE_DIR
-                                  PLASMA_LIB_DIR
-                                  PLASMA_SO_VERSION
-                                  PLASMA_STORE_SERVER
-                                  VERSION_VAR
-                                  PLASMA_VERSION)
+find_package_handle_standard_args(
+  Plasma
+  REQUIRED_VARS PLASMA_INCLUDE_DIR PLASMA_LIB_DIR PLASMA_SO_VERSION PLASMA_STORE_SERVER
+  VERSION_VAR PLASMA_VERSION)
 set(PLASMA_FOUND ${Plasma_FOUND})
 
 if(Plasma_FOUND AND NOT Plasma_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindPython3Alt.cmake b/cpp/cmake_modules/FindPython3Alt.cmake
index 131a0d395fc..ab91c7be052 100644
--- a/cpp/cmake_modules/FindPython3Alt.cmake
+++ b/cpp/cmake_modules/FindPython3Alt.cmake
@@ -33,11 +33,8 @@ if(${CMAKE_VERSION} VERSION_LESS "3.15.0")
     find_package(PythonLibsNew)
     find_package(NumPy)
   endif()
-  find_package_handle_standard_args(Python3Alt
-                                    REQUIRED_VARS
-                                    PYTHON_EXECUTABLE
-                                    PYTHON_INCLUDE_DIRS
-                                    NUMPY_INCLUDE_DIRS)
+  find_package_handle_standard_args(
+    Python3Alt REQUIRED_VARS PYTHON_EXECUTABLE PYTHON_INCLUDE_DIRS NUMPY_INCLUDE_DIRS)
   return()
 endif()
 
@@ -46,13 +43,17 @@ if(${CMAKE_VERSION} VERSION_LESS "3.18.0" OR ARROW_BUILD_TESTS)
   # the full "Development" component.  Also ask for it on CMake < 3.18,
   # where "Development.Module" is not available.
   if(Python3Alt_FIND_REQUIRED)
-    find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED)
+    find_package(Python3
+                 COMPONENTS Interpreter Development NumPy
+                 REQUIRED)
   else()
     find_package(Python3 COMPONENTS Interpreter Development NumPy)
   endif()
 else()
   if(Python3Alt_FIND_REQUIRED)
-    find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
+    find_package(Python3
+                 COMPONENTS Interpreter Development.Module NumPy
+                 REQUIRED)
   else()
     find_package(Python3 COMPONENTS Interpreter Development.Module NumPy)
   endif()
@@ -72,12 +73,11 @@ get_target_property(NUMPY_INCLUDE_DIRS Python3::NumPy INTERFACE_INCLUDE_DIRECTOR
 # CMake's python3_add_library() doesn't apply the required extension suffix,
 # detect it ourselves.
 # (https://gitlab.kitware.com/cmake/cmake/issues/20408)
-execute_process(
-  COMMAND "${PYTHON_EXECUTABLE}" "-c"
-          "from distutils import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))"
-  RESULT_VARIABLE _PYTHON_RESULT
-  OUTPUT_VARIABLE _PYTHON_STDOUT
-  ERROR_VARIABLE _PYTHON_STDERR)
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+                        "from distutils import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))"
+                RESULT_VARIABLE _PYTHON_RESULT
+                OUTPUT_VARIABLE _PYTHON_STDOUT
+                ERROR_VARIABLE _PYTHON_STDERR)
 
 if(NOT _PYTHON_RESULT MATCHES 0)
   if(Python3Alt_FIND_REQUIRED)
@@ -92,8 +92,5 @@ function(PYTHON_ADD_MODULE name)
   set_target_properties(${name} PROPERTIES SUFFIX ${_EXT_SUFFIX})
 endfunction()
 
-find_package_handle_standard_args(Python3Alt
-                                  REQUIRED_VARS
-                                  PYTHON_EXECUTABLE
-                                  PYTHON_INCLUDE_DIRS
-                                  NUMPY_INCLUDE_DIRS)
+find_package_handle_standard_args(
+  Python3Alt REQUIRED_VARS PYTHON_EXECUTABLE PYTHON_INCLUDE_DIRS NUMPY_INCLUDE_DIRS)
diff --git a/cpp/cmake_modules/FindRapidJSONAlt.cmake b/cpp/cmake_modules/FindRapidJSONAlt.cmake
index a967ef61a66..9a449a5280e 100644
--- a/cpp/cmake_modules/FindRapidJSONAlt.cmake
+++ b/cpp/cmake_modules/FindRapidJSONAlt.cmake
@@ -36,39 +36,37 @@ if(RapidJSON_ROOT)
             NO_DEFAULT_PATH
             PATH_SUFFIXES "include")
 else()
-  find_path(RAPIDJSON_INCLUDE_DIR NAMES rapidjson/rapidjson.h PATH_SUFFIXES "include")
+  find_path(RAPIDJSON_INCLUDE_DIR
+            NAMES rapidjson/rapidjson.h
+            PATH_SUFFIXES "include")
 endif()
 
 if(RAPIDJSON_INCLUDE_DIR)
   file(READ "${RAPIDJSON_INCLUDE_DIR}/rapidjson/rapidjson.h" RAPIDJSON_H_CONTENT)
   string(REGEX MATCH "#define RAPIDJSON_MAJOR_VERSION ([0-9]+)"
                RAPIDJSON_MAJOR_VERSION_DEFINITION "${RAPIDJSON_H_CONTENT}")
-  string(REGEX
-         REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MAJOR_VERSION
-                 "${RAPIDJSON_MAJOR_VERSION_DEFINITION}")
+  string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MAJOR_VERSION
+                       "${RAPIDJSON_MAJOR_VERSION_DEFINITION}")
   string(REGEX MATCH "#define RAPIDJSON_MINOR_VERSION ([0-9]+)"
                RAPIDJSON_MINOR_VERSION_DEFINITION "${RAPIDJSON_H_CONTENT}")
-  string(REGEX
-         REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MINOR_VERSION
-                 "${RAPIDJSON_MINOR_VERSION_DEFINITION}")
+  string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MINOR_VERSION
+                       "${RAPIDJSON_MINOR_VERSION_DEFINITION}")
   string(REGEX MATCH "#define RAPIDJSON_PATCH_VERSION ([0-9]+)"
                RAPIDJSON_PATCH_VERSION_DEFINITION "${RAPIDJSON_H_CONTENT}")
-  string(REGEX
-         REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_PATCH_VERSION
-                 "${RAPIDJSON_PATCH_VERSION_DEFINITION}")
+  string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_PATCH_VERSION
+                       "${RAPIDJSON_PATCH_VERSION_DEFINITION}")
   if("${RAPIDJSON_MAJOR_VERSION}" STREQUAL ""
      OR "${RAPIDJSON_MINOR_VERSION}" STREQUAL ""
      OR "${RAPIDJSON_PATCH_VERSION}" STREQUAL "")
     set(RAPIDJSON_VERSION "0.0.0")
   else()
-    set(
-      RAPIDJSON_VERSION
-      "${RAPIDJSON_MAJOR_VERSION}.${RAPIDJSON_MINOR_VERSION}.${RAPIDJSON_PATCH_VERSION}")
+    set(RAPIDJSON_VERSION
+        "${RAPIDJSON_MAJOR_VERSION}.${RAPIDJSON_MINOR_VERSION}.${RAPIDJSON_PATCH_VERSION}"
+    )
   endif()
 endif()
 
-find_package_handle_standard_args(RapidJSONAlt
-                                  REQUIRED_VARS
-                                  RAPIDJSON_INCLUDE_DIR
-                                  VERSION_VAR
-                                  RAPIDJSON_VERSION)
+find_package_handle_standard_args(
+  RapidJSONAlt
+  REQUIRED_VARS RAPIDJSON_INCLUDE_DIR
+  VERSION_VAR RAPIDJSON_VERSION)
diff --git a/cpp/cmake_modules/FindSnappy.cmake b/cpp/cmake_modules/FindSnappy.cmake
index 26cccb786c5..747df31854d 100644
--- a/cpp/cmake_modules/FindSnappy.cmake
+++ b/cpp/cmake_modules/FindSnappy.cmake
@@ -19,20 +19,19 @@ if(ARROW_SNAPPY_USE_SHARED)
   set(SNAPPY_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
     list(APPEND SNAPPY_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}snappy${CMAKE_IMPORT_LIBRARY_SUFFIX}")
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}snappy${CMAKE_IMPORT_LIBRARY_SUFFIX}")
   endif()
   list(APPEND SNAPPY_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}snappy${CMAKE_SHARED_LIBRARY_SUFFIX}")
+       "${CMAKE_SHARED_LIBRARY_PREFIX}snappy${CMAKE_SHARED_LIBRARY_SUFFIX}")
 else()
   set(SNAPPY_STATIC_LIB_NAME_BASE "snappy")
   if(MSVC)
     set(SNAPPY_STATIC_LIB_NAME_BASE
         "${SNAPPY_STATIC_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}")
   endif()
-  set(
-    SNAPPY_LIB_NAMES
-    "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(SNAPPY_LIB_NAMES
+      "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 endif()
 
 if(Snappy_ROOT)
diff --git a/cpp/cmake_modules/FindThrift.cmake b/cpp/cmake_modules/FindThrift.cmake
index 273d907ed07..750d8ce8341 100644
--- a/cpp/cmake_modules/FindThrift.cmake
+++ b/cpp/cmake_modules/FindThrift.cmake
@@ -33,9 +33,13 @@ function(EXTRACT_THRIFT_VERSION)
     string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" THRIFT_VERSION_DEFINITION
                  "${THRIFT_CONFIG_H_CONTENT}")
     string(REGEX MATCH "[0-9.]+" THRIFT_VERSION "${THRIFT_VERSION_DEFINITION}")
-    set(THRIFT_VERSION "${THRIFT_VERSION}" PARENT_SCOPE)
+    set(THRIFT_VERSION
+        "${THRIFT_VERSION}"
+        PARENT_SCOPE)
   else()
-    set(THRIFT_VERSION "" PARENT_SCOPE)
+    set(THRIFT_VERSION
+        ""
+        PARENT_SCOPE)
   endif()
 endfunction(EXTRACT_THRIFT_VERSION)
 
@@ -53,21 +57,19 @@ set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}")
 if(ARROW_THRIFT_USE_SHARED)
   set(THRIFT_LIB_NAMES thrift)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(
-      APPEND
-        THRIFT_LIB_NAMES
-        "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
-  endif()
-  list(
-    APPEND
-      THRIFT_LIB_NAMES
-      "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+    list(APPEND
+         THRIFT_LIB_NAMES
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
     )
+  endif()
+  list(APPEND
+       THRIFT_LIB_NAMES
+       "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+  )
 else()
-  set(
-    THRIFT_LIB_NAMES
-    "${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(THRIFT_LIB_NAMES
+      "${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 endif()
 
 if(Thrift_ROOT)
@@ -78,7 +80,9 @@ if(Thrift_ROOT)
   find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h
             PATHS ${Thrift_ROOT}
             PATH_SUFFIXES "include")
-  find_program(THRIFT_COMPILER thrift PATHS ${Thrift_ROOT} PATH_SUFFIXES "bin")
+  find_program(THRIFT_COMPILER thrift
+               PATHS ${Thrift_ROOT}
+               PATH_SUFFIXES "bin")
   extract_thrift_version()
 else()
   # THRIFT-4760: The pkgconfig files are currently only installed when using autotools.
@@ -115,13 +119,11 @@ else()
   set(Thrift_COMPILER_FOUND FALSE)
 endif()
 
-find_package_handle_standard_args(Thrift
-                                  REQUIRED_VARS
-                                  THRIFT_LIB
-                                  THRIFT_INCLUDE_DIR
-                                  VERSION_VAR
-                                  THRIFT_VERSION
-                                  HANDLE_COMPONENTS)
+find_package_handle_standard_args(
+  Thrift
+  REQUIRED_VARS THRIFT_LIB THRIFT_INCLUDE_DIR
+  VERSION_VAR THRIFT_VERSION
+  HANDLE_COMPONENTS)
 
 if(Thrift_FOUND OR THRIFT_FOUND)
   set(Thrift_FOUND TRUE)
diff --git a/cpp/cmake_modules/Findc-aresAlt.cmake b/cpp/cmake_modules/Findc-aresAlt.cmake
new file mode 100644
index 00000000000..5213e8d12a1
--- /dev/null
+++ b/cpp/cmake_modules/Findc-aresAlt.cmake
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(find_package_args)
+if(c-aresAlt_FIND_VERSION)
+  list(APPEND find_package_args ${c-aresAlt_FIND_VERSION})
+endif()
+if(c-aresAlt_FIND_QUIETLY)
+  list(APPEND find_package_args QUIET)
+endif()
+find_package(c-ares ${find_package_args})
+if(c-ares_FOUND)
+  set(c-aresAlt_FOUND TRUE)
+  return()
+endif()
+
+find_package(PkgConfig QUIET)
+pkg_check_modules(c-ares_PC libcares)
+if(c-ares_PC_FOUND)
+  set(c-ares_INCLUDE_DIR "${c-ares_PC_INCLUDEDIR}")
+
+  list(APPEND c-ares_PC_LIBRARY_DIRS "${c-ares_PC_LIBDIR}")
+  find_library(c-ares_LIB cares
+               PATHS ${c-ares_PC_LIBRARY_DIRS}
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
+               NO_DEFAULT_PATH)
+elseif(c-ares_ROOT)
+  find_library(c-ares_LIB
+               NAMES cares
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}cares${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATHS ${c-ares_ROOT}
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
+               NO_DEFAULT_PATH)
+  find_path(c-ares_INCLUDE_DIR
+            NAMES ares.h
+            PATHS ${c-ares_ROOT}
+            NO_DEFAULT_PATH
+            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+else()
+  find_library(c-ares_LIB
+               NAMES cares
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}cares${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_path(c-ares_INCLUDE_DIR
+            NAMES ares.h
+            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+endif()
+
+find_package_handle_standard_args(c-aresAlt REQUIRED_VARS c-ares_LIB c-ares_INCLUDE_DIR)
+
+if(c-aresAlt_FOUND)
+  if(NOT TARGET c-ares::cares)
+    add_library(c-ares::cares UNKNOWN IMPORTED)
+    set_target_properties(c-ares::cares
+                          PROPERTIES IMPORTED_LOCATION "${c-ares_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${c-ares_INCLUDE_DIR}")
+  endif()
+endif()
diff --git a/cpp/cmake_modules/FindgRPCAlt.cmake b/cpp/cmake_modules/FindgRPCAlt.cmake
index 79fe01744d3..18b23f32269 100644
--- a/cpp/cmake_modules/FindgRPCAlt.cmake
+++ b/cpp/cmake_modules/FindgRPCAlt.cmake
@@ -24,224 +24,53 @@ if(gRPC_FOUND)
   return()
 endif()
 
-unset(GRPC_ALT_VERSION)
-
-if(ARROW_GRPC_USE_SHARED)
-  set(GRPC_GPR_LIB_NAMES)
-  set(GRPC_GRPC_LIB_NAMES)
-  set(GRPC_GRPCPP_LIB_NAMES)
-  set(GRPC_ADDRESS_SORTING_LIB_NAMES)
-  set(GRPC_UPB_LIB_NAMES)
-  if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(APPEND GRPC_GPR_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}gpr${CMAKE_IMPORT_LIBRARY_SUFFIX}")
-    list(APPEND GRPC_GRPC_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}grpc${CMAKE_IMPORT_LIBRARY_SUFFIX}")
-    list(APPEND GRPC_GRPCPP_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}grpc++${CMAKE_IMPORT_LIBRARY_SUFFIX}")
-    list(
-      APPEND GRPC_ADDRESS_SORTING_LIB_NAMES
-             "${CMAKE_IMPORT_LIBRARY_PREFIX}address_sorting${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
-    list(APPEND GRPC_UPB_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}upb${CMAKE_IMPORT_LIBRARY_SUFFIX}")
+find_package(PkgConfig QUIET)
+pkg_check_modules(GRPCPP_PC grpc++)
+if(GRPCPP_PC_FOUND)
+  set(gRPCAlt_VERSION "${GRPCPP_PC_VERSION}")
+  set(GRPCPP_INCLUDE_DIRECTORIES ${GRPCPP_PC_INCLUDEDIR})
+  if(ARROW_GRPC_USE_SHARED)
+    set(GRPCPP_LINK_LIBRARIES ${GRPCPP_PC_LINK_LIBRARIES})
+    set(GRPCPP_LINK_OPTIONS ${GRPCPP_PC_LDFLAGS_OTHER})
+    set(GRPCPP_COMPILE_OPTIONS ${GRPCPP_PC_CFLAGS_OTHER})
+  else()
+    set(GRPCPP_LINK_LIBRARIES)
+    foreach(GRPCPP_LIBRARY_NAME ${GRPCPP_PC_STATIC_LIBRARIES})
+      find_library(GRPCPP_LIBRARY_${GRPCPP_LIBRARY_NAME}
+                   NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}${GRPCPP_LIBRARY_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+                   HINTS ${GRPCPP_PC_STATIC_LIBRARY_DIRS})
+      list(APPEND GRPCPP_LINK_LIBRARIES "${GRPCPP_LIBRARY_${GRPCPP_LIBRARY_NAME}}")
+    endforeach()
+    set(GRPCPP_LINK_OPTIONS ${GRPCPP_PC_STATIC_LDFLAGS_OTHER})
+    set(GRPCPP_COMPILE_OPTIONS ${GRPCPP_PC_STATIC_CFLAGS_OTHER})
   endif()
-  list(APPEND GRPC_GPR_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}gpr${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(APPEND GRPC_GRPC_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}grpc${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(APPEND GRPC_GRPCPP_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}grpc++${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(
-    APPEND GRPC_ADDRESS_SORTING_LIB_NAMES
-           "${CMAKE_SHARED_LIBRARY_PREFIX}address_sorting${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(APPEND GRPC_UPB_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}upb${CMAKE_SHARED_LIBRARY_SUFFIX}")
-else()
-  set(GRPC_GPR_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_GRPC_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_GRPCPP_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_ADDRESS_SORTING_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_UPB_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}upb${CMAKE_STATIC_LIBRARY_SUFFIX}")
-endif()
-
-if(gRPC_ROOT)
-  find_library(GRPC_GPR_LIB
-               NAMES ${GRPC_GPR_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_GRPC_LIB
-               NAMES ${GRPC_GRPC_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_GRPCPP_LIB
-               NAMES ${GRPC_GRPCPP_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_ADDRESS_SORTING_LIB
-               NAMES ${GRPC_ADDRESS_SORTING_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_UPB_LIB
-               NAMES ${GRPC_UPB_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin NO_DEFAULT_PATH
-               PATHS ${gRPC_ROOT}
+  list(GET GRPCPP_LINK_LIBRARIES 0 GRPCPP_IMPORTED_LOCATION)
+  list(REMOVE_AT GRPCPP_LINK_LIBRARIES 0)
+  find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin
+               HINTS ${GRPCPP_PC_PREFIX}
+               NO_DEFAULT_PATH
                PATH_SUFFIXES "bin")
-  find_path(GRPC_INCLUDE_DIR
-            NAMES grpc/grpc.h
-            PATHS ${gRPC_ROOT}
-            NO_DEFAULT_PATH
-            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
-else()
-  find_package(PkgConfig QUIET)
-  pkg_check_modules(GRPC_PC grpc++)
-  if(GRPC_PC_FOUND)
-    set(GRPC_ALT_VERSION "${GRPC_PC_VERSION}")
-    set(GRPC_INCLUDE_DIR "${GRPC_PC_INCLUDEDIR}")
-    list(APPEND GRPC_PC_LIBRARY_DIRS "${GRPC_PC_LIBDIR}")
-    message(STATUS "${GRPC_PC_LIBRARY_DIRS}")
-
-    find_library(GRPC_GPR_LIB
-                 NAMES ${GRPC_GPR_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_GRPC_LIB
-                 NAMES ${GRPC_GRPC_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_GRPCPP_LIB
-                 NAMES ${GRPC_GRPCPP_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_ADDRESS_SORTING_LIB
-                 NAMES ${GRPC_ADDRESS_SORTING_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_UPB_LIB
-                 NAMES ${GRPC_UPB_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin
-                 HINTS ${GRPC_PC_PREFIX}
-                 NO_DEFAULT_PATH
-                 PATH_SUFFIXES "bin")
-  else()
-    find_library(GRPC_GPR_LIB
-                 NAMES ${GRPC_GPR_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_GRPC_LIB
-                 NAMES ${GRPC_GRPC_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_GRPCPP_LIB
-                 NAMES ${GRPC_GRPCPP_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_ADDRESS_SORTING_LIB
-                 NAMES ${GRPC_ADDRESS_SORTING_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_UPB_LIB
-                 NAMES ${GRPC_UPB_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin PATH_SUFFIXES "bin")
-    find_path(GRPC_INCLUDE_DIR
-              NAMES grpc/grpc.h
-              PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  set(gRPCAlt_FIND_PACKAGE_ARGS gRPCAlt REQUIRED_VARS GRPCPP_IMPORTED_LOCATION
+                                GRPC_CPP_PLUGIN)
+  if(gRPCAlt_VERSION)
+    list(APPEND gRPCAlt_FIND_PACKAGE_ARGS VERSION_VAR gRPCAlt_VERSION)
   endif()
+  find_package_handle_standard_args(${gRPCAlt_FIND_PACKAGE_ARGS})
+else()
+  set(gRPCAlt_FOUND FALSE)
 endif()
 
-set(GRPC_ALT_FIND_PACKAGE_ARGS
-    gRPCAlt
-    REQUIRED_VARS
-    GRPC_INCLUDE_DIR
-    GRPC_GPR_LIB
-    GRPC_GRPC_LIB
-    GRPC_GRPCPP_LIB
-    GRPC_CPP_PLUGIN)
-if(GRPC_ALT_VERSION)
-  list(APPEND GRPC_ALT_FIND_PACKAGE_ARGS VERSION_VAR GRPC_ALT_VERSION)
-endif()
-find_package_handle_standard_args(${GRPC_ALT_FIND_PACKAGE_ARGS})
-
 if(gRPCAlt_FOUND)
-  add_library(gRPC::gpr UNKNOWN IMPORTED)
-  set_target_properties(gRPC::gpr
-                        PROPERTIES IMPORTED_LOCATION "${GRPC_GPR_LIB}"
-                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-
-  add_library(gRPC::grpc UNKNOWN IMPORTED)
-  set_target_properties(
-    gRPC::grpc
-    PROPERTIES IMPORTED_LOCATION
-               "${GRPC_GRPC_LIB}"
-               INTERFACE_INCLUDE_DIRECTORIES
-               "${GRPC_INCLUDE_DIR}"
-               INTERFACE_LINK_LIBRARIES
-               "OpenSSL::SSL;OpenSSL::Crypto;ZLIB::ZLIB;c-ares::cares")
-
-  set(_GRPCPP_LINK_LIBRARIES "gRPC::grpc;gRPC::gpr")
-
-  if(GRPC_ADDRESS_SORTING_LIB)
-    # Address sorting is optional and not always required.
-    add_library(gRPC::address_sorting UNKNOWN IMPORTED)
-    set_target_properties(gRPC::address_sorting
-                          PROPERTIES IMPORTED_LOCATION "${GRPC_ADDRESS_SORTING_LIB}"
-                                     INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-    set(_GRPCPP_LINK_LIBRARIES "${_GRPCPP_LINK_LIBRARIES};gRPC::address_sorting")
-  endif()
-
-  if(GRPC_UPB_LIB)
-    # upb is used by recent gRPC versions
-    add_library(gRPC::upb UNKNOWN IMPORTED)
-    set_target_properties(gRPC::upb
-                          PROPERTIES IMPORTED_LOCATION "${GRPC_UPB_LIB}"
-                                     INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-    set(_GRPCPP_LINK_LIBRARIES "${_GRPCPP_LINK_LIBRARIES};gRPC::upb")
-  endif()
-
-  find_package(absl CONFIG)
-  if(absl_FOUND)
-    # Abseil libraries that recent gRPC versions depend on
-    set(_ABSL_LIBS
-        bad_optional_access
-        int128
-        raw_logging_internal
-        str_format_internal
-        strings
-        throw_delegate
-        time
-        time_zone)
-
-    foreach(_ABSL_LIB ${_ABSL_LIBS})
-      set(_GRPCPP_LINK_LIBRARIES "${_GRPCPP_LINK_LIBRARIES};absl::${_ABSL_LIB}")
-    endforeach()
-  endif()
-
   add_library(gRPC::grpc++ UNKNOWN IMPORTED)
   set_target_properties(gRPC::grpc++
-                        PROPERTIES IMPORTED_LOCATION
-                                   "${GRPC_GRPCPP_LIB}"
-                                   INTERFACE_LINK_LIBRARIES
-                                   "${_GRPCPP_LINK_LIBRARIES}"
+                        PROPERTIES IMPORTED_LOCATION "${GRPCPP_IMPORTED_LOCATION}"
+                                   INTERFACE_COMPILE_OPTIONS "${GRPCPP_COMPILE_OPTIONS}"
                                    INTERFACE_INCLUDE_DIRECTORIES
-                                   "${GRPC_INCLUDE_DIR}")
+                                   "${GRPCPP_INCLUDE_DIRECTORIES}"
+                                   INTERFACE_LINK_LIBRARIES "${GRPCPP_LINK_LIBRARIES}"
+                                   INTERFACE_LINK_OPTIONS "${GRPCPP_LINK_OPTIONS}")
 
   add_executable(gRPC::grpc_cpp_plugin IMPORTED)
-  set_target_properties(gRPC::grpc_cpp_plugin
-                        PROPERTIES IMPORTED_LOCATION ${GRPC_CPP_PLUGIN})
+  set_target_properties(gRPC::grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION
+                                                         ${GRPC_CPP_PLUGIN})
 endif()
diff --git a/cpp/cmake_modules/Findre2Alt.cmake b/cpp/cmake_modules/Findre2Alt.cmake
index 93b69ce77cb..68abf1b75fe 100644
--- a/cpp/cmake_modules/Findre2Alt.cmake
+++ b/cpp/cmake_modules/Findre2Alt.cmake
@@ -42,35 +42,37 @@ if(RE2_PC_FOUND)
   # On Fedora, the reported prefix is wrong. As users likely run into this,
   # workaround.
   # https://bugzilla.redhat.com/show_bug.cgi?id=1652589
-  if(UNIX AND NOT APPLE AND NOT RE2_LIB)
+  if(UNIX
+     AND NOT APPLE
+     AND NOT RE2_LIB)
     if(RE2_PC_PREFIX STREQUAL "/usr/local")
       find_library(RE2_LIB re2)
     endif()
   endif()
 elseif(RE2_ROOT)
-  find_library(
-    RE2_LIB
-    NAMES
-      re2_static re2
-      "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
-    PATHS ${RE2_ROOT}
-    PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-    NO_DEFAULT_PATH)
+  find_library(RE2_LIB
+               NAMES re2_static
+                     re2
+                     "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATHS ${RE2_ROOT}
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
+               NO_DEFAULT_PATH)
   find_path(RE2_INCLUDE_DIR
             NAMES re2/re2.h
             PATHS ${RE2_ROOT}
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 else()
-  find_library(
-    RE2_LIB
-    NAMES
-      re2_static re2
-      "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
-    PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-  find_path(RE2_INCLUDE_DIR NAMES re2/re2.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  find_library(RE2_LIB
+               NAMES re2_static
+                     re2
+                     "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_path(RE2_INCLUDE_DIR
+            NAMES re2/re2.h
+            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 endif()
 
 find_package_handle_standard_args(re2Alt REQUIRED_VARS RE2_LIB RE2_INCLUDE_DIR)
diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake
index edea73b8dae..4d732f18694 100644
--- a/cpp/cmake_modules/Findutf8proc.cmake
+++ b/cpp/cmake_modules/Findutf8proc.cmake
@@ -15,14 +15,41 @@
 # specific language governing permissions and limitations
 # under the License.
 
+function(extract_utf8proc_version)
+  if(utf8proc_INCLUDE_DIR)
+    file(READ "${utf8proc_INCLUDE_DIR}/utf8proc.h" UTF8PROC_H_CONTENT)
+
+    string(REGEX MATCH "#define UTF8PROC_VERSION_MAJOR [0-9]+"
+                 UTF8PROC_MAJOR_VERSION_DEFINITION "${UTF8PROC_H_CONTENT}")
+    string(REGEX MATCH "#define UTF8PROC_VERSION_MINOR [0-9]+"
+                 UTF8PROC_MINOR_VERSION_DEFINITION "${UTF8PROC_H_CONTENT}")
+    string(REGEX MATCH "#define UTF8PROC_VERSION_PATCH [0-9]+"
+                 UTF8PROC_PATCH_VERSION_DEFINITION "${UTF8PROC_H_CONTENT}")
+
+    string(REGEX MATCH "[0-9]+$" UTF8PROC_MAJOR_VERSION
+                 "${UTF8PROC_MAJOR_VERSION_DEFINITION}")
+    string(REGEX MATCH "[0-9]+$" UTF8PROC_MINOR_VERSION
+                 "${UTF8PROC_MINOR_VERSION_DEFINITION}")
+    string(REGEX MATCH "[0-9]+$" UTF8PROC_PATCH_VERSION
+                 "${UTF8PROC_PATCH_VERSION_DEFINITION}")
+    set(utf8proc_VERSION
+        "${UTF8PROC_MAJOR_VERSION}.${UTF8PROC_MINOR_VERSION}.${UTF8PROC_PATCH_VERSION}"
+        PARENT_SCOPE)
+  else()
+    set(utf8proc_VERSION
+        ""
+        PARENT_SCOPE)
+  endif()
+endfunction(extract_utf8proc_version)
+
 if(ARROW_UTF8PROC_USE_SHARED)
   set(utf8proc_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
     list(APPEND utf8proc_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}utf8proc${CMAKE_IMPORT_LIBRARY_SUFFIX}")
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}utf8proc${CMAKE_IMPORT_LIBRARY_SUFFIX}")
   endif()
   list(APPEND utf8proc_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}utf8proc${CMAKE_SHARED_LIBRARY_SUFFIX}")
+       "${CMAKE_SHARED_LIBRARY_PREFIX}utf8proc${CMAKE_SHARED_LIBRARY_SUFFIX}")
 else()
   if(MSVC AND NOT DEFINED utf8proc_MSVC_STATIC_LIB_SUFFIX)
     set(utf8proc_MSVC_STATIC_LIB_SUFFIX "_static")
@@ -44,6 +71,7 @@ if(utf8proc_ROOT)
             PATHS ${utf8proc_ROOT}
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  extract_utf8proc_version()
 else()
   find_library(utf8proc_LIB
                NAMES ${utf8proc_LIB_NAMES}
@@ -51,20 +79,23 @@ else()
   find_path(utf8proc_INCLUDE_DIR
             NAMES utf8proc.h
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  extract_utf8proc_version()
 endif()
 
-find_package_handle_standard_args(utf8proc REQUIRED_VARS utf8proc_LIB
-                                  utf8proc_INCLUDE_DIR)
+find_package_handle_standard_args(
+  utf8proc
+  REQUIRED_VARS utf8proc_LIB utf8proc_INCLUDE_DIR
+  VERSION_VAR utf8proc_VERSION)
 
 if(utf8proc_FOUND)
   set(utf8proc_FOUND TRUE)
   add_library(utf8proc::utf8proc UNKNOWN IMPORTED)
-  set_target_properties(
-    utf8proc::utf8proc
-    PROPERTIES IMPORTED_LOCATION "${utf8proc_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${utf8proc_INCLUDE_DIR}")
+  set_target_properties(utf8proc::utf8proc
+                        PROPERTIES IMPORTED_LOCATION "${utf8proc_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${utf8proc_INCLUDE_DIR}")
   if(NOT ARROW_UTF8PROC_USE_SHARED)
-    set_target_properties(utf8proc::utf8proc
-                          PROPERTIES INTERFACE_COMPILER_DEFINITIONS "UTF8PROC_STATIC")
+    set_target_properties(utf8proc::utf8proc PROPERTIES INTERFACE_COMPILER_DEFINITIONS
+                                                        "UTF8PROC_STATIC")
   endif()
 endif()
diff --git a/cpp/cmake_modules/Findzstd.cmake b/cpp/cmake_modules/Findzstd.cmake
index f32892aecb8..3fc14ec0d72 100644
--- a/cpp/cmake_modules/Findzstd.cmake
+++ b/cpp/cmake_modules/Findzstd.cmake
@@ -23,16 +23,14 @@ set(ZSTD_LIB_NAME_BASE "${ZSTD_MSVC_LIB_PREFIX}zstd")
 if(ARROW_ZSTD_USE_SHARED)
   set(ZSTD_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(
-      APPEND
-        ZSTD_LIB_NAMES
-        "${CMAKE_IMPORT_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
+    list(APPEND
+         ZSTD_LIB_NAMES
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
+    )
   endif()
-  list(
-    APPEND
-      ZSTD_LIB_NAMES
-      "${CMAKE_SHARED_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  list(APPEND ZSTD_LIB_NAMES
+       "${CMAKE_SHARED_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+  )
 else()
   if(MSVC AND NOT DEFINED ZSTD_MSVC_STATIC_LIB_SUFFIX)
     set(ZSTD_MSVC_STATIC_LIB_SUFFIX "_static")
@@ -43,7 +41,7 @@ else()
       "${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${ZSTD_STATIC_LIB_SUFFIX}")
 endif()
 
-# First, find via if specified ZTD_ROOT
+# First, find via if specified ZSTD_ROOT
 if(ZSTD_ROOT)
   message(STATUS "Using ZSTD_ROOT: ${ZSTD_ROOT}")
   find_library(ZSTD_LIB
@@ -75,7 +73,9 @@ else()
     find_library(ZSTD_LIB
                  NAMES ${ZSTD_LIB_NAMES}
                  PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_path(ZSTD_INCLUDE_DIR NAMES zstd.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+    find_path(ZSTD_INCLUDE_DIR
+              NAMES zstd.h
+              PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
   endif()
 endif()
 
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 9f68c560472..86c6e9706e0 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -28,7 +28,7 @@ if(NOT DEFINED ARROW_CPU_FLAG)
     set(ARROW_CPU_FLAG "armv8")
   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "armv7")
     set(ARROW_CPU_FLAG "armv7")
-  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc")
+  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc|ppc")
     set(ARROW_CPU_FLAG "ppc")
   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
     set(ARROW_CPU_FLAG "s390x")
@@ -76,12 +76,13 @@ if(ARROW_CPU_FLAG STREQUAL "x86")
         char out[32];
         _mm512_storeu_si512(out, mask);
         return 0;
-      }" CXX_SUPPORTS_AVX512)
+      }"
+                              CXX_SUPPORTS_AVX512)
     set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
   endif()
   # Runtime SIMD level it can get from compiler and ARROW_RUNTIME_SIMD_LEVEL
-  if(CXX_SUPPORTS_SSE4_2
-     AND ARROW_RUNTIME_SIMD_LEVEL MATCHES "^(SSE4_2|AVX2|AVX512|MAX)$")
+  if(CXX_SUPPORTS_SSE4_2 AND ARROW_RUNTIME_SIMD_LEVEL MATCHES
+                             "^(SSE4_2|AVX2|AVX512|MAX)$")
     set(ARROW_HAVE_RUNTIME_SSE4_2 ON)
     add_definitions(-DARROW_HAVE_RUNTIME_SSE4_2)
   endif()
@@ -252,30 +253,26 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4365")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4267")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4838")
-  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-         OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                        "Clang")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
-    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand")
   elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-conversion")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion")
-    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-variable")
   elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
     if(WIN32)
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wno-deprecated")
-      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wno-unused-variable")
     else()
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated")
-      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-variable")
     endif()
   else()
     message(FATAL_ERROR "${UNKNOWN_COMPILER_MESSAGE}")
@@ -289,8 +286,8 @@ elseif("${BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
     # https://docs.microsoft.com/en-us/cpp/build/reference/compiler-option-warning-level
     # /wdnnnn disables a warning where "nnnn" is a warning number
-  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-         OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                        "Clang")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-c++98-compat")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-c++98-compat-pedantic")
@@ -344,9 +341,10 @@ if(MSVC)
   # Disable "switch statement contains 'default' but no 'case' labels" warning
   # (required for protobuf, see https://github.com/protocolbuffers/protobuf/issues/6885)
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4065")
+
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "7.0"
-     OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7.0")
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "7.0" OR CMAKE_CXX_COMPILER_VERSION
+                                                       VERSION_GREATER "7.0")
     # Without this, gcc >= 7 warns related to changes in C++17
     set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -Wno-noexcept-type")
   endif()
@@ -373,8 +371,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -Wno-subobject-linkage")
   endif()
 
-elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-       OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                      "Clang")
   # Clang options for all builds
 
   # Using Clang with ccache causes a bunch of spurious warnings that are
@@ -385,7 +383,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qunused-arguments")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")
 
-  # Avoid clang error when an unknown warning flag is passed
+  # Avoid error when an unknown warning flag is passed
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
   # Add colors when paired with ninja
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
@@ -443,28 +441,31 @@ if(ARROW_CPU_FLAG STREQUAL "ppc")
 endif()
 
 if(ARROW_CPU_FLAG STREQUAL "armv8")
-  if(NOT CXX_SUPPORTS_ARMV8_ARCH)
-    message(FATAL_ERROR "Unsupported arch flag: ${ARROW_ARMV8_ARCH_FLAG}.")
-  endif()
-  if(ARROW_ARMV8_ARCH_FLAG MATCHES "native")
-    message(FATAL_ERROR "native arch not allowed, please specify arch explicitly.")
-  endif()
-  set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${ARROW_ARMV8_ARCH_FLAG}")
-
   if(NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
-    add_definitions(-DARROW_HAVE_NEON)
-  endif()
+    set(ARROW_HAVE_NEON ON)
 
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
-     AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4")
-    message(WARNING "Disable Armv8 CRC and Crypto as compiler doesn't support them well.")
-  else()
-    if(ARROW_ARMV8_ARCH_FLAG MATCHES "\\+crypto")
-      add_definitions(-DARROW_HAVE_ARMV8_CRYPTO)
+    if(NOT CXX_SUPPORTS_ARMV8_ARCH)
+      message(FATAL_ERROR "Unsupported arch flag: ${ARROW_ARMV8_ARCH_FLAG}.")
+    endif()
+    if(ARROW_ARMV8_ARCH_FLAG MATCHES "native")
+      message(FATAL_ERROR "native arch not allowed, please specify arch explicitly.")
     endif()
-    # armv8.1+ implies crc support
-    if(ARROW_ARMV8_ARCH_FLAG MATCHES "armv8\\.[1-9]|\\+crc")
-      add_definitions(-DARROW_HAVE_ARMV8_CRC)
+    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${ARROW_ARMV8_ARCH_FLAG}")
+
+    add_definitions(-DARROW_HAVE_NEON)
+
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS
+                                                "5.4")
+      message(WARNING "Disable Armv8 CRC and Crypto as compiler doesn't support them well."
+      )
+    else()
+      if(ARROW_ARMV8_ARCH_FLAG MATCHES "\\+crypto")
+        add_definitions(-DARROW_HAVE_ARMV8_CRYPTO)
+      endif()
+      # armv8.1+ implies crc support
+      if(ARROW_ARMV8_ARCH_FLAG MATCHES "armv8\\.[1-9]|\\+crc")
+        add_definitions(-DARROW_HAVE_ARMV8_CRC)
+      endif()
     endif()
   endif()
 endif()
@@ -493,7 +494,9 @@ function(GET_GOLD_VERSION)
       message(SEND_ERROR "Could not extract GNU gold version. "
                          "Linker version output: ${LINKER_OUTPUT}")
     endif()
-    set(GOLD_VERSION "${CMAKE_MATCH_1}" PARENT_SCOPE)
+    set(GOLD_VERSION
+        "${CMAKE_MATCH_1}"
+        PARENT_SCOPE)
   endif()
 endfunction()
 
@@ -590,9 +593,8 @@ set(CXX_FLAGS_PROFILE_GEN "${CXX_FLAGS_RELEASE} -fprofile-generate")
 set(CXX_FLAGS_PROFILE_BUILD "${CXX_FLAGS_RELEASE} -fprofile-use")
 
 # Set compile flags based on the build type.
-message(
-  "Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})"
-  )
+message("Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})"
+)
 if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_DEBUG}")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}")
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 9f240e448f6..0631d277b08 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -196,6 +196,7 @@ endmacro()
 macro(resolve_dependency DEPENDENCY_NAME)
   set(options)
   set(one_value_args HAVE_ALT IS_RUNTIME_DEPENDENCY REQUIRED_VERSION USE_CONFIG)
+  set(multi_value_args PC_PACKAGE_NAMES)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -236,6 +237,17 @@ macro(resolve_dependency DEPENDENCY_NAME)
   if(${DEPENDENCY_NAME}_SOURCE STREQUAL "SYSTEM" AND ARG_IS_RUNTIME_DEPENDENCY)
     provide_find_module(${PACKAGE_NAME})
     list(APPEND ARROW_SYSTEM_DEPENDENCIES ${PACKAGE_NAME})
+    find_package(PkgConfig QUIET)
+    foreach(ARG_PC_PACKAGE_NAME ${ARG_PC_PACKAGE_NAMES})
+      pkg_check_modules(${ARG_PC_PACKAGE_NAME}_PC
+                        ${ARG_PC_PACKAGE_NAME}
+                        NO_CMAKE_PATH
+                        NO_CMAKE_ENVIRONMENT_PATH
+                        QUIET)
+      if(${${ARG_PC_PACKAGE_NAME}_PC_FOUND})
+        string(APPEND ARROW_PC_REQUIRES_PRIVATE " ${ARG_PC_PACKAGE_NAME}")
+      endif()
+    endforeach()
   endif()
 endmacro()
 
@@ -279,7 +291,9 @@ if(ARROW_JSON)
   set(ARROW_WITH_RAPIDJSON ON)
 endif()
 
-if(ARROW_ORC OR ARROW_FLIGHT OR ARROW_GANDIVA)
+if(ARROW_ORC
+   OR ARROW_FLIGHT
+   OR ARROW_GANDIVA)
   set(ARROW_WITH_PROTOBUF ON)
 endif()
 
@@ -287,11 +301,13 @@ if(ARROW_S3)
   set(ARROW_WITH_ZLIB ON)
 endif()
 
-if(NOT ARROW_COMPUTE)
-  # utf8proc is only potentially used in kernels for now
+if((NOT ARROW_COMPUTE) AND (NOT ARROW_GANDIVA))
   set(ARROW_WITH_UTF8PROC OFF)
 endif()
-if((NOT ARROW_COMPUTE) AND (NOT ARROW_GANDIVA) AND (NOT ARROW_WITH_GRPC))
+
+if((NOT ARROW_COMPUTE)
+   AND (NOT ARROW_GANDIVA)
+   AND (NOT ARROW_WITH_GRPC))
   set(ARROW_WITH_RE2 OFF)
 endif()
 
@@ -313,9 +329,8 @@ endmacro()
 file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT)
 foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
   # Exclude comments
-  if(NOT
-     ((_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=")
-      OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_CHECKSUM=")))
+  if(NOT ((_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=")
+          OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_CHECKSUM=")))
     continue()
   endif()
 
@@ -336,46 +351,42 @@ endforeach()
 if(DEFINED ENV{ARROW_ABSL_URL})
   set(ABSL_SOURCE_URL "$ENV{ARROW_ABSL_URL}")
 else()
-  set_urls(
-    ABSL_SOURCE_URL
-    "https://github.com/abseil/abseil-cpp/archive/${ARROW_ABSL_BUILD_VERSION}.tar.gz")
+  set_urls(ABSL_SOURCE_URL
+           "https://github.com/abseil/abseil-cpp/archive/${ARROW_ABSL_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWS_C_COMMON_URL})
   set(AWS_C_COMMON_SOURCE_URL "$ENV{ARROW_AWS_C_COMMON_URL}")
 else()
-  set_urls(
-    AWS_C_COMMON_SOURCE_URL
-    "https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWS_C_COMMON_SOURCE_URL
+           "https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWS_CHECKSUMS_URL})
   set(AWS_CHECKSUMS_SOURCE_URL "$ENV{ARROW_AWS_CHECKSUMS_URL}")
 else()
-  set_urls(
-    AWS_CHECKSUMS_SOURCE_URL
-    "https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWS_CHECKSUMS_SOURCE_URL
+           "https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWS_C_EVENT_STREAM_URL})
   set(AWS_C_EVENT_STREAM_SOURCE_URL "$ENV{ARROW_AWS_C_EVENT_STREAM_URL}")
 else()
-  set_urls(
-    AWS_C_EVENT_STREAM_SOURCE_URL
-    "https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWS_C_EVENT_STREAM_SOURCE_URL
+           "https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWSSDK_URL})
   set(AWSSDK_SOURCE_URL "$ENV{ARROW_AWSSDK_URL}")
 else()
-  set_urls(
-    AWSSDK_SOURCE_URL
-    "https://github.com/aws/aws-sdk-cpp/archive/${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWSSDK_SOURCE_URL
+           "https://github.com/aws/aws-sdk-cpp/archive/${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_BOOST_URL})
@@ -383,136 +394,125 @@ if(DEFINED ENV{ARROW_BOOST_URL})
 else()
   string(REPLACE "." "_" ARROW_BOOST_BUILD_VERSION_UNDERSCORES
                  ${ARROW_BOOST_BUILD_VERSION})
-  set_urls(
-    BOOST_SOURCE_URL
-    # These are trimmed boost bundles we maintain.
-    # See cpp/build-support/trim-boost.sh
-    # FIXME(ARROW-6407) automate uploading this archive to ensure it reflects
-    # our currently used packages and doesn't fall out of sync with
-    # ${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
-    "https://sourceforge.net/projects/boost/files/boost/${ARROW_BOOST_BUILD_VERSION}/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
-    "https://github.com/boostorg/boost/archive/boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz")
+  set_urls(BOOST_SOURCE_URL
+           # These are trimmed boost bundles we maintain.
+           # See cpp/build-support/trim-boost.sh
+           # FIXME(ARROW-6407) automate uploading this archive to ensure it reflects
+           # our currently used packages and doesn't fall out of sync with
+           # ${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
+           "https://sourceforge.net/projects/boost/files/boost/${ARROW_BOOST_BUILD_VERSION}/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
+           "https://github.com/boostorg/boost/archive/boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_BROTLI_URL})
   set(BROTLI_SOURCE_URL "$ENV{ARROW_BROTLI_URL}")
 else()
-  set_urls(
-    BROTLI_SOURCE_URL
-    "https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(BROTLI_SOURCE_URL
+           "https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_BZIP2_URL})
   set(ARROW_BZIP2_SOURCE_URL "$ENV{ARROW_BZIP2_URL}")
 else()
-  set_urls(
-    ARROW_BZIP2_SOURCE_URL
-    "https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ARROW_BZIP2_SOURCE_URL
+           "https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_CARES_URL})
   set(CARES_SOURCE_URL "$ENV{ARROW_CARES_URL}")
 else()
-  set_urls(
-    CARES_SOURCE_URL
-    "https://c-ares.haxx.se/download/c-ares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/cares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(CARES_SOURCE_URL
+           "https://c-ares.haxx.se/download/c-ares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/cares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GBENCHMARK_URL})
   set(GBENCHMARK_SOURCE_URL "$ENV{ARROW_GBENCHMARK_URL}")
 else()
-  set_urls(
-    GBENCHMARK_SOURCE_URL
-    "https://github.com/google/benchmark/archive/${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/gbenchmark-${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GBENCHMARK_SOURCE_URL
+           "https://github.com/google/benchmark/archive/${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/gbenchmark-${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GFLAGS_URL})
   set(GFLAGS_SOURCE_URL "$ENV{ARROW_GFLAGS_URL}")
 else()
-  set_urls(
-    GFLAGS_SOURCE_URL
-    "https://github.com/gflags/gflags/archive/${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/gflags-${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GFLAGS_SOURCE_URL
+           "https://github.com/gflags/gflags/archive/${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/gflags-${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GLOG_URL})
   set(GLOG_SOURCE_URL "$ENV{ARROW_GLOG_URL}")
 else()
-  set_urls(
-    GLOG_SOURCE_URL
-    "https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GLOG_SOURCE_URL
+           "https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GRPC_URL})
   set(GRPC_SOURCE_URL "$ENV{ARROW_GRPC_URL}")
 else()
-  set_urls(
-    GRPC_SOURCE_URL
-    "https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GRPC_SOURCE_URL
+           "https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GTEST_URL})
   set(GTEST_SOURCE_URL "$ENV{ARROW_GTEST_URL}")
 else()
-  set_urls(
-    GTEST_SOURCE_URL
-    "https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
-    "https://chromium.googlesource.com/external/github.com/google/googletest/+archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GTEST_SOURCE_URL
+           "https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
+           "https://chromium.googlesource.com/external/github.com/google/googletest/+archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_JEMALLOC_URL})
   set(JEMALLOC_SOURCE_URL "$ENV{ARROW_JEMALLOC_URL}")
 else()
-  set_urls(
-    JEMALLOC_SOURCE_URL
-    "https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
-    )
+  set_urls(JEMALLOC_SOURCE_URL
+           "https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_MIMALLOC_URL})
   set(MIMALLOC_SOURCE_URL "$ENV{ARROW_MIMALLOC_URL}")
 else()
-  set_urls(
-    MIMALLOC_SOURCE_URL
-    "https://github.com/microsoft/mimalloc/archive/${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/mimalloc-${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(MIMALLOC_SOURCE_URL
+           "https://github.com/microsoft/mimalloc/archive/${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/mimalloc-${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_LZ4_URL})
   set(LZ4_SOURCE_URL "$ENV{ARROW_LZ4_URL}")
 else()
-  set_urls(
-    LZ4_SOURCE_URL "https://github.com/lz4/lz4/archive/${ARROW_LZ4_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/lz4-${ARROW_LZ4_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(LZ4_SOURCE_URL
+           "https://github.com/lz4/lz4/archive/${ARROW_LZ4_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/lz4-${ARROW_LZ4_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_ORC_URL})
   set(ORC_SOURCE_URL "$ENV{ARROW_ORC_URL}")
 else()
-  set_urls(
-    ORC_SOURCE_URL
-    "https://github.com/apache/orc/archive/rel/release-${ARROW_ORC_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ORC_SOURCE_URL
+           "https://github.com/apache/orc/archive/rel/release-${ARROW_ORC_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_PROTOBUF_URL})
@@ -521,109 +521,101 @@ else()
   string(SUBSTRING ${ARROW_PROTOBUF_BUILD_VERSION} 1 -1
                    ARROW_PROTOBUF_STRIPPED_BUILD_VERSION)
   # strip the leading `v`
-  set_urls(
-    PROTOBUF_SOURCE_URL
-    "https://github.com/protocolbuffers/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-all-${ARROW_PROTOBUF_STRIPPED_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(PROTOBUF_SOURCE_URL
+           "https://github.com/protocolbuffers/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-all-${ARROW_PROTOBUF_STRIPPED_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_RE2_URL})
   set(RE2_SOURCE_URL "$ENV{ARROW_RE2_URL}")
 else()
-  set_urls(
-    RE2_SOURCE_URL
-    "https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/re2-${ARROW_RE2_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(RE2_SOURCE_URL
+           "https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/re2-${ARROW_RE2_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_RAPIDJSON_URL})
   set(RAPIDJSON_SOURCE_URL "$ENV{ARROW_RAPIDJSON_URL}")
 else()
-  set_urls(
-    RAPIDJSON_SOURCE_URL
-    "https://github.com/miloyip/rapidjson/archive/${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/rapidjson-${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(RAPIDJSON_SOURCE_URL
+           "https://github.com/miloyip/rapidjson/archive/${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/rapidjson-${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_SNAPPY_URL})
   set(SNAPPY_SOURCE_URL "$ENV{ARROW_SNAPPY_URL}")
 else()
-  set_urls(
-    SNAPPY_SOURCE_URL
-    "https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(SNAPPY_SOURCE_URL
+           "https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_THRIFT_URL})
   set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}")
 else()
-  set_urls(
-    THRIFT_SOURCE_URL
-    "http://www.apache.org/dyn/closer.cgi?action=download&filename=/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://downloads.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://github.com/apache/thrift/archive/v${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.claz.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.cs.utah.edu/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.mirrors.lucidnetworks.net/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.osuosl.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://ftp.wayne.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirror.olnevhost.net/pub/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.gigenet.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.koehn.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.ocf.berkeley.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.sonic.net/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://us.mirrors.quenda.co/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(THRIFT_SOURCE_URL
+           "http://www.apache.org/dyn/closer.cgi?action=download&filename=/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://downloads.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://github.com/apache/thrift/archive/v${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.claz.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.cs.utah.edu/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.mirrors.lucidnetworks.net/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.osuosl.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://ftp.wayne.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirror.olnevhost.net/pub/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.gigenet.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.koehn.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.ocf.berkeley.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.sonic.net/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://us.mirrors.quenda.co/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_UTF8PROC_URL})
   set(ARROW_UTF8PROC_SOURCE_URL "$ENV{ARROW_UTF8PROC_URL}")
 else()
-  set_urls(
-    ARROW_UTF8PROC_SOURCE_URL
-    "https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ARROW_UTF8PROC_SOURCE_URL
+           "https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_XSIMD_URL})
   set(XSIMD_SOURCE_URL "$ENV{ARROW_XSIMD_URL}")
 else()
-  set_urls(
-    XSIMD_SOURCE_URL
-    "https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz")
+  set_urls(XSIMD_SOURCE_URL
+           "https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_ZLIB_URL})
   set(ZLIB_SOURCE_URL "$ENV{ARROW_ZLIB_URL}")
 else()
-  set_urls(
-    ZLIB_SOURCE_URL "https://zlib.net/fossils/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ZLIB_SOURCE_URL
+           "https://zlib.net/fossils/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_ZSTD_URL})
   set(ZSTD_SOURCE_URL "$ENV{ARROW_ZSTD_URL}")
 else()
-  set_urls(
-    ZSTD_SOURCE_URL
-    "https://github.com/facebook/zstd/archive/${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/zstd-${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ZSTD_SOURCE_URL
+           "https://github.com/facebook/zstd/archive/${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/zstd-${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 # ----------------------------------------------------------------------
 # ExternalProject options
 
-set(
-  EP_CXX_FLAGS
-  "${CMAKE_CXX_COMPILER_ARG1} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}"
-  )
+set(EP_CXX_FLAGS
+    "${CMAKE_CXX_COMPILER_ARG1} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}"
+)
 set(EP_C_FLAGS
     "${CMAKE_C_COMPILER_ARG1} ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}")
 
@@ -730,12 +722,12 @@ macro(build_boost)
     set(BOOST_BUILD_WITH_LIBRARIES "filesystem" "system")
     string(REPLACE ";" "," BOOST_CONFIGURE_LIBRARIES "${BOOST_BUILD_WITH_LIBRARIES}")
     list(APPEND BOOST_CONFIGURE_COMMAND "--prefix=${BOOST_PREFIX}"
-                "--with-libraries=${BOOST_CONFIGURE_LIBRARIES}")
+         "--with-libraries=${BOOST_CONFIGURE_LIBRARIES}")
     set(BOOST_BUILD_COMMAND "./b2" "-j${NPROC}" "link=${BOOST_BUILD_LINK}"
                             "variant=${BOOST_BUILD_VARIANT}")
     if(MSVC)
-      string(REGEX
-             REPLACE "([0-9])$" ".\\1" BOOST_TOOLSET_MSVC_VERSION ${MSVC_TOOLSET_VERSION})
+      string(REGEX REPLACE "([0-9])$" ".\\1" BOOST_TOOLSET_MSVC_VERSION
+                           ${MSVC_TOOLSET_VERSION})
       list(APPEND BOOST_BUILD_COMMAND "toolset=msvc-${BOOST_TOOLSET_MSVC_VERSION}")
       set(BOOST_BUILD_WITH_LIBRARIES_MSVC)
       foreach(_BOOST_LIB ${BOOST_BUILD_WITH_LIBRARIES})
@@ -760,14 +752,12 @@ macro(build_boost)
     else()
       set(BOOST_LIBRARY_SUFFIX "")
     endif()
-    set(
-      BOOST_STATIC_SYSTEM_LIBRARY
-      "${BOOST_LIB_DIR}/libboost_system${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
-    set(
-      BOOST_STATIC_FILESYSTEM_LIBRARY
-      "${BOOST_LIB_DIR}/libboost_filesystem${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(BOOST_STATIC_SYSTEM_LIBRARY
+        "${BOOST_LIB_DIR}/libboost_system${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(BOOST_STATIC_FILESYSTEM_LIBRARY
+        "${BOOST_LIB_DIR}/libboost_filesystem${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
     set(BOOST_SYSTEM_LIBRARY boost_system_static)
     set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
     set(BOOST_BUILD_PRODUCTS ${BOOST_STATIC_SYSTEM_LIBRARY}
@@ -925,14 +915,13 @@ macro(build_snappy)
   message(STATUS "Building snappy from source")
   set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep/src/snappy_ep-install")
   set(SNAPPY_STATIC_LIB_NAME snappy)
-  set(
-    SNAPPY_STATIC_LIB
-    "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(SNAPPY_STATIC_LIB
+      "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
-  set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=lib
-                        -DSNAPPY_BUILD_TESTS=OFF
-                        "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
+  set(SNAPPY_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=lib -DSNAPPY_BUILD_TESTS=OFF
+      "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
 
   externalproject_add(snappy_ep
                       ${EP_LOG_OPTIONS}
@@ -956,7 +945,11 @@ macro(build_snappy)
 endmacro()
 
 if(ARROW_WITH_SNAPPY)
-  resolve_dependency(Snappy)
+  resolve_dependency(Snappy PC_PACKAGE_NAMES snappy)
+  if(${Snappy_SOURCE} STREQUAL "SYSTEM" AND NOT snappy_PC_FOUND)
+    get_target_property(SNAPPY_LIB Snappy::snappy IMPORTED_LOCATION)
+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${SNAPPY_LIB}")
+  endif()
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(SNAPPY_INCLUDE_DIRS Snappy::snappy INTERFACE_INCLUDE_DIRECTORIES)
   include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS})
@@ -970,18 +963,15 @@ macro(build_brotli)
   set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install")
   set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include")
   set(BROTLI_LIB_DIR lib)
-  set(
-    BROTLI_STATIC_LIBRARY_ENC
-    "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    BROTLI_STATIC_LIBRARY_DEC
-    "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    BROTLI_STATIC_LIBRARY_COMMON
-    "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(BROTLI_STATIC_LIBRARY_ENC
+      "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(BROTLI_STATIC_LIBRARY_DEC
+      "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(BROTLI_STATIC_LIBRARY_COMMON
+      "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}"
                         -DCMAKE_INSTALL_LIBDIR=${BROTLI_LIB_DIR})
 
@@ -1016,12 +1006,15 @@ macro(build_brotli)
                                    INTERFACE_INCLUDE_DIRECTORIES "${BROTLI_INCLUDE_DIR}")
   add_dependencies(Brotli::brotlidec brotli_ep)
 
-  list(APPEND ARROW_BUNDLED_STATIC_LIBS Brotli::brotlicommon Brotli::brotlienc
-              Brotli::brotlidec)
+  list(APPEND
+       ARROW_BUNDLED_STATIC_LIBS
+       Brotli::brotlicommon
+       Brotli::brotlienc
+       Brotli::brotlidec)
 endmacro()
 
 if(ARROW_WITH_BROTLI)
-  resolve_dependency(Brotli)
+  resolve_dependency(Brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon
                       INTERFACE_INCLUDE_DIRECTORIES)
@@ -1049,7 +1042,9 @@ if(BREW_BIN AND NOT OPENSSL_ROOT_DIR)
 endif()
 
 set(ARROW_USE_OPENSSL OFF)
-if(PARQUET_REQUIRE_ENCRYPTION OR ARROW_FLIGHT OR ARROW_S3)
+if(PARQUET_REQUIRE_ENCRYPTION
+   OR ARROW_FLIGHT
+   OR ARROW_S3)
   # OpenSSL is required
   if(ARROW_OPENSSL_USE_SHARED)
     # Find shared OpenSSL libraries.
@@ -1079,10 +1074,8 @@ if(ARROW_USE_OPENSSL)
 
   include_directories(SYSTEM ${OPENSSL_INCLUDE_DIR})
 else()
-  message(
-    STATUS
-      "Building without OpenSSL support. Minimum OpenSSL version ${ARROW_OPENSSL_REQUIRED_VERSION} required."
-    )
+  message(STATUS "Building without OpenSSL support. Minimum OpenSSL version ${ARROW_OPENSSL_REQUIRED_VERSION} required."
+  )
 endif()
 
 # ----------------------------------------------------------------------
@@ -1097,15 +1090,14 @@ macro(build_glog)
   else()
     set(GLOG_LIB_SUFFIX "")
   endif()
-  set(
-    GLOG_STATIC_LIB
-    "${GLOG_BUILD_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}glog${GLOG_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(GLOG_STATIC_LIB
+      "${GLOG_BUILD_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}glog${GLOG_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
   set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} -fPIC")
-  if(Threads::Threads)
-    set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
-    set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} -fPIC -pthread")
+  if(CMAKE_THREAD_LIBS_INIT)
+    set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_THREAD_LIBS_INIT}")
+    set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} ${CMAKE_THREAD_LIBS_INIT}")
   endif()
 
   if(APPLE)
@@ -1141,7 +1133,7 @@ macro(build_glog)
 endmacro()
 
 if(ARROW_USE_GLOG)
-  resolve_dependency(GLOG)
+  resolve_dependency(GLOG PC_PACKAGE_NAMES libglog)
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(GLOG_INCLUDE_DIR glog::glog INTERFACE_INCLUDE_DIRECTORIES)
   include_directories(SYSTEM ${GLOG_INCLUDE_DIR})
@@ -1201,8 +1193,8 @@ macro(build_gflags)
                         PROPERTIES INTERFACE_COMPILE_DEFINITIONS "GFLAGS_IS_A_DLL=0"
                                    INTERFACE_INCLUDE_DIRECTORIES "${GFLAGS_INCLUDE_DIR}")
   if(MSVC)
-    set_target_properties(${GFLAGS_LIBRARY}
-                          PROPERTIES INTERFACE_LINK_LIBRARIES "shlwapi.lib")
+    set_target_properties(${GFLAGS_LIBRARY} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "shlwapi.lib")
   endif()
   set(GFLAGS_LIBRARIES ${GFLAGS_LIBRARY})
 
@@ -1237,8 +1229,8 @@ endif()
 
 macro(build_thrift)
   if(CMAKE_VERSION VERSION_LESS 3.10)
-    message(
-      FATAL_ERROR "Building thrift using ExternalProject requires at least CMake 3.10")
+    message(FATAL_ERROR "Building thrift using ExternalProject requires at least CMake 3.10"
+    )
   endif()
   message("Building Apache Thrift from source")
   set(THRIFT_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/thrift_ep-install")
@@ -1316,7 +1308,11 @@ if(ARROW_WITH_THRIFT)
   # to build Boost, so don't look again if already found.
   if(NOT Thrift_FOUND AND NOT THRIFT_FOUND)
     # Thrift c++ code generated by 0.13 requires 0.11 or greater
-    resolve_dependency(Thrift REQUIRED_VERSION 0.11.0)
+    resolve_dependency(Thrift
+                       REQUIRED_VERSION
+                       0.11.0
+                       PC_PACKAGE_NAMES
+                       thrift)
   endif()
   # TODO: Don't use global includes but rather target_include_directories
   include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
@@ -1372,8 +1368,8 @@ macro(build_protobuf)
     if(ZLIB_ROOT)
       list(APPEND PROTOBUF_CMAKE_ARGS "-DZLIB_ROOT=${ZLIB_ROOT}")
     endif()
-    set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} SOURCE_SUBDIR
-                                           "cmake")
+    set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS}
+                                           SOURCE_SUBDIR "cmake")
   endif()
 
   externalproject_add(protobuf_ep
@@ -1386,18 +1382,18 @@ macro(build_protobuf)
   file(MAKE_DIRECTORY "${PROTOBUF_INCLUDE_DIR}")
 
   add_library(arrow::protobuf::libprotobuf STATIC IMPORTED)
-  set_target_properties(
-    arrow::protobuf::libprotobuf
-    PROPERTIES IMPORTED_LOCATION "${PROTOBUF_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${PROTOBUF_INCLUDE_DIR}")
+  set_target_properties(arrow::protobuf::libprotobuf
+                        PROPERTIES IMPORTED_LOCATION "${PROTOBUF_STATIC_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${PROTOBUF_INCLUDE_DIR}")
   add_library(arrow::protobuf::libprotoc STATIC IMPORTED)
-  set_target_properties(
-    arrow::protobuf::libprotoc
-    PROPERTIES IMPORTED_LOCATION "${PROTOC_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${PROTOBUF_INCLUDE_DIR}")
+  set_target_properties(arrow::protobuf::libprotoc
+                        PROPERTIES IMPORTED_LOCATION "${PROTOC_STATIC_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${PROTOBUF_INCLUDE_DIR}")
   add_executable(arrow::protobuf::protoc IMPORTED)
-  set_target_properties(arrow::protobuf::protoc
-                        PROPERTIES IMPORTED_LOCATION "${PROTOBUF_COMPILER}")
+  set_target_properties(arrow::protobuf::protoc PROPERTIES IMPORTED_LOCATION
+                                                           "${PROTOBUF_COMPILER}")
 
   add_dependencies(toolchain protobuf_ep)
   add_dependencies(arrow::protobuf::libprotobuf protobuf_ep)
@@ -1416,7 +1412,11 @@ if(ARROW_WITH_PROTOBUF)
   else()
     set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1")
   endif()
-  resolve_dependency(Protobuf REQUIRED_VERSION ${ARROW_PROTOBUF_REQUIRED_VERSION})
+  resolve_dependency(Protobuf
+                     REQUIRED_VERSION
+                     ${ARROW_PROTOBUF_REQUIRED_VERSION}
+                     PC_PACKAGE_NAMES
+                     protobuf)
 
   if(ARROW_PROTOBUF_USE_SHARED AND MSVC_TOOLCHAIN)
     add_definitions(-DPROTOBUF_USE_DLLS)
@@ -1463,8 +1463,8 @@ if(ARROW_WITH_PROTOBUF)
   else()
     if(NOT TARGET protobuf::protoc)
       add_executable(protobuf::protoc IMPORTED)
-      set_target_properties(protobuf::protoc
-                            PROPERTIES IMPORTED_LOCATION "${PROTOBUF_PROTOC_EXECUTABLE}")
+      set_target_properties(protobuf::protoc PROPERTIES IMPORTED_LOCATION
+                                                        "${PROTOBUF_PROTOC_EXECUTABLE}")
     endif()
     set(ARROW_PROTOBUF_PROTOC protobuf::protoc)
   endif()
@@ -1495,37 +1495,40 @@ if(ARROW_JEMALLOC)
   set(ARROW_JEMALLOC_USE_SHARED OFF)
   set(JEMALLOC_PREFIX
       "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/")
+  set(JEMALLOC_LIB_DIR "${JEMALLOC_PREFIX}/lib")
   set(JEMALLOC_STATIC_LIB
-      "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
+      "${JEMALLOC_LIB_DIR}/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
   set(JEMALLOC_CONFIGURE_COMMAND ./configure "AR=${CMAKE_AR}" "CC=${CMAKE_C_COMPILER}")
   if(CMAKE_OSX_SYSROOT)
     list(APPEND JEMALLOC_CONFIGURE_COMMAND "SDKROOT=${CMAKE_OSX_SYSROOT}")
   endif()
-  list(APPEND JEMALLOC_CONFIGURE_COMMAND
-              "--prefix=${JEMALLOC_PREFIX}"
-              "--with-jemalloc-prefix=je_arrow_"
-              "--with-private-namespace=je_arrow_private_"
-              "--without-export"
-              "--disable-shared"
-              # Don't override operator new()
-              "--disable-cxx" "--disable-libdl"
-              # See https://github.com/jemalloc/jemalloc/issues/1237
-              "--disable-initial-exec-tls" ${EP_LOG_OPTIONS})
+  list(APPEND
+       JEMALLOC_CONFIGURE_COMMAND
+       "--prefix=${JEMALLOC_PREFIX}"
+       "--libdir=${JEMALLOC_LIB_DIR}"
+       "--with-jemalloc-prefix=je_arrow_"
+       "--with-private-namespace=je_arrow_private_"
+       "--without-export"
+       "--disable-shared"
+       # Don't override operator new()
+       "--disable-cxx"
+       "--disable-libdl"
+       # See https://github.com/jemalloc/jemalloc/issues/1237
+       "--disable-initial-exec-tls"
+       ${EP_LOG_OPTIONS})
   set(JEMALLOC_BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS})
   if(CMAKE_OSX_SYSROOT)
     list(APPEND JEMALLOC_BUILD_COMMAND "SDKROOT=${CMAKE_OSX_SYSROOT}")
   endif()
-  externalproject_add(
-    jemalloc_ep
-    URL ${JEMALLOC_SOURCE_URL}
-    PATCH_COMMAND
-      touch doc/jemalloc.3 doc/jemalloc.html
-      # The prefix "je_arrow_" must be kept in sync with the value in memory_pool.cc
-    CONFIGURE_COMMAND ${JEMALLOC_CONFIGURE_COMMAND}
-    BUILD_IN_SOURCE 1
-    BUILD_COMMAND ${JEMALLOC_BUILD_COMMAND}
-    BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}"
-    INSTALL_COMMAND ${MAKE} -j1 install)
+  externalproject_add(jemalloc_ep
+                      URL ${JEMALLOC_SOURCE_URL}
+                      PATCH_COMMAND touch doc/jemalloc.3 doc/jemalloc.html
+                                    # The prefix "je_arrow_" must be kept in sync with the value in memory_pool.cc
+                      CONFIGURE_COMMAND ${JEMALLOC_CONFIGURE_COMMAND}
+                      BUILD_IN_SOURCE 1
+                      BUILD_COMMAND ${JEMALLOC_BUILD_COMMAND}
+                      BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}"
+                      INSTALL_COMMAND ${MAKE} -j1 install)
 
   # Don't use the include directory directly so that we can point to a path
   # that is unique to our codebase.
@@ -1534,10 +1537,8 @@ if(ARROW_JEMALLOC)
   file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/")
   add_library(jemalloc::jemalloc STATIC IMPORTED)
   set_target_properties(jemalloc::jemalloc
-                        PROPERTIES INTERFACE_LINK_LIBRARIES
-                                   Threads::Threads
-                                   IMPORTED_LOCATION
-                                   "${JEMALLOC_STATIC_LIB}"
+                        PROPERTIES INTERFACE_LINK_LIBRARIES Threads::Threads
+                                   IMPORTED_LOCATION "${JEMALLOC_STATIC_LIB}"
                                    INTERFACE_INCLUDE_DIRECTORIES
                                    "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src")
   add_dependencies(jemalloc::jemalloc jemalloc_ep)
@@ -1561,11 +1562,10 @@ if(ARROW_MIMALLOC)
   endif()
 
   set(MIMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/mimalloc_ep/src/mimalloc_ep")
-  set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/lib/mimalloc-2.0/include")
-  set(
-    MIMALLOC_STATIC_LIB
-    "${MIMALLOC_PREFIX}/lib/mimalloc-2.0/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/include/mimalloc-1.7")
+  set(MIMALLOC_STATIC_LIB
+      "${MIMALLOC_PREFIX}/lib/mimalloc-1.7/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
   set(MIMALLOC_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS}
@@ -1586,10 +1586,8 @@ if(ARROW_MIMALLOC)
 
   add_library(mimalloc::mimalloc STATIC IMPORTED)
   set_target_properties(mimalloc::mimalloc
-                        PROPERTIES INTERFACE_LINK_LIBRARIES
-                                   Threads::Threads
-                                   IMPORTED_LOCATION
-                                   "${MIMALLOC_STATIC_LIB}"
+                        PROPERTIES INTERFACE_LINK_LIBRARIES Threads::Threads
+                                   IMPORTED_LOCATION "${MIMALLOC_STATIC_LIB}"
                                    INTERFACE_INCLUDE_DIRECTORIES
                                    "${MIMALLOC_INCLUDE_DIR}")
   add_dependencies(mimalloc::mimalloc mimalloc_ep)
@@ -1641,10 +1639,9 @@ macro(build_gtest)
       "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${_GTEST_LIBRARY_SUFFIX}")
   set(GMOCK_SHARED_LIB
       "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gmock${_GTEST_LIBRARY_SUFFIX}")
-  set(
-    GTEST_MAIN_SHARED_LIB
-    "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_LIBRARY_SUFFIX}"
-    )
+  set(GTEST_MAIN_SHARED_LIB
+      "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_LIBRARY_SUFFIX}"
+  )
   set(GTEST_INSTALL_NAME_DIR "$<INSTALL_PREFIX$<ANGLE-R>/lib")
   # Fix syntax highlighting mess introduced by unclosed bracket above
   set(dummy ">")
@@ -1682,20 +1679,18 @@ macro(build_gtest)
     set(_GTEST_RUNTIME_DIR "${GTEST_PREFIX}/bin")
     set(_GTEST_RUNTIME_SUFFIX
         "${CMAKE_GTEST_DEBUG_EXTENSION}${CMAKE_SHARED_LIBRARY_SUFFIX}")
-    set(
-      _GTEST_RUNTIME_LIB
-      "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${_GTEST_RUNTIME_SUFFIX}")
-    set(
-      _GMOCK_RUNTIME_LIB
-      "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gmock${_GTEST_RUNTIME_SUFFIX}")
-    set(
-      _GTEST_MAIN_RUNTIME_LIB
-      "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_RUNTIME_SUFFIX}"
-      )
+    set(_GTEST_RUNTIME_LIB
+        "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${_GTEST_RUNTIME_SUFFIX}"
+    )
+    set(_GMOCK_RUNTIME_LIB
+        "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gmock${_GTEST_RUNTIME_SUFFIX}"
+    )
+    set(_GTEST_MAIN_RUNTIME_LIB
+        "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_RUNTIME_SUFFIX}"
+    )
     if(CMAKE_VERSION VERSION_LESS 3.9)
-      message(
-        FATAL_ERROR
-          "Building GoogleTest from source on Windows requires at least CMake 3.9")
+      message(FATAL_ERROR "Building GoogleTest from source on Windows requires at least CMake 3.9"
+      )
     endif()
     get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
     if(_GENERATOR_IS_MULTI_CONFIG)
@@ -1706,20 +1701,11 @@ macro(build_gtest)
     externalproject_add_step(googletest_ep copy
                              COMMAND ${CMAKE_COMMAND} -E make_directory
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
-                             COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy
-                                     ${_GTEST_RUNTIME_LIB}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${_GTEST_RUNTIME_LIB}
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
-                             COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy
-                                     ${_GMOCK_RUNTIME_LIB}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${_GMOCK_RUNTIME_LIB}
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
-                             COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy
-                                     ${_GTEST_MAIN_RUNTIME_LIB}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${_GTEST_MAIN_RUNTIME_LIB}
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
                              DEPENDEES install)
   endif()
@@ -1798,25 +1784,20 @@ macro(build_benchmark)
     set(GBENCHMARK_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -std=c++11")
   endif()
 
-  if(APPLE
-     AND (CMAKE_CXX_COMPILER_ID
-          STREQUAL
-          "AppleClang"
-          OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
+  if(APPLE AND (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
+                                                               STREQUAL "Clang"))
     set(GBENCHMARK_CMAKE_CXX_FLAGS "${GBENCHMARK_CMAKE_CXX_FLAGS} -stdlib=libc++")
   endif()
 
   set(GBENCHMARK_PREFIX
       "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
   set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
-  set(
-    GBENCHMARK_STATIC_LIB
-    "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    GBENCHMARK_MAIN_STATIC_LIB
-    "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark_main${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(GBENCHMARK_STATIC_LIB
+      "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GBENCHMARK_MAIN_STATIC_LIB
+      "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark_main${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(GBENCHMARK_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS}
       "-DCMAKE_INSTALL_PREFIX=${GBENCHMARK_PREFIX}"
@@ -1856,19 +1837,11 @@ endmacro()
 if(ARROW_BUILD_BENCHMARKS)
   # ArgsProduct() is available since 1.5.2
   set(BENCHMARK_REQUIRED_VERSION 1.5.2)
-  if("${ARROW_DEPENDENCY_SOURCE}" STREQUAL "CONDA"
-     AND "${benchmark_SOURCE}" STREQUAL "SYSTEM")
-    # TODO: Remove this workaround once
-    # https://github.com/google/benchmark/issues/1046 is resolved.
-    #
-    # benchmark doesn't set suitable version when we use released
-    # archive. So the benchmark package on conda-forge isn't report
-    # the real version. We accept all the benchmark package with
-    # conda. Conda users should install benchmark 1.5.2 or later by
-    # ci/conda_env_cpp.yml.
-    set(BENCHMARK_REQUIRED_VERSION 0.0.0)
-  endif()
-  resolve_dependency(benchmark REQUIRED_VERSION ${BENCHMARK_REQUIRED_VERSION})
+  resolve_dependency(benchmark
+                     REQUIRED_VERSION
+                     ${BENCHMARK_REQUIRED_VERSION}
+                     IS_RUNTIME_DEPENDENCY
+                     FALSE)
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(BENCHMARK_INCLUDE_DIR benchmark::benchmark
                       INTERFACE_INCLUDE_DIRECTORIES)
@@ -1938,8 +1911,8 @@ macro(build_xsimd)
   set(XSIMD_VENDORED TRUE)
 endmacro()
 
-# For now xsimd is always bundled from upstream
-if(NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
+if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE") OR (NOT ARROW_RUNTIME_SIMD_LEVEL STREQUAL "NONE"
+                                             ))
   set(xsimd_SOURCE "BUNDLED")
   resolve_dependency(xsimd)
   # TODO: Don't use global includes but rather target_include_directories
@@ -1983,7 +1956,7 @@ macro(build_zlib)
 endmacro()
 
 if(ARROW_WITH_ZLIB)
-  resolve_dependency(ZLIB)
+  resolve_dependency(ZLIB PC_PACKAGE_NAMES zlib)
 
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
@@ -2006,29 +1979,21 @@ macro(build_lz4)
     set(LZ4_STATIC_LIB
         "${LZ4_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/liblz4_static.lib")
     set(LZ4_BUILD_COMMAND
-        BUILD_COMMAND
-        msbuild.exe
-        /m
-        /p:Configuration=${CMAKE_BUILD_TYPE}
-        /p:Platform=x64
-        /p:PlatformToolset=v140
-        ${LZ4_RUNTIME_LIBRARY_LINKAGE}
-        /t:Build
+        BUILD_COMMAND msbuild.exe /m /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64
+        /p:PlatformToolset=v140 ${LZ4_RUNTIME_LIBRARY_LINKAGE} /t:Build
         ${LZ4_BUILD_DIR}/build/VS2010/lz4.sln)
   else()
     set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a")
-    set(LZ4_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh
-                          "AR=${CMAKE_AR}" "OS=${CMAKE_SYSTEM_NAME}")
+    set(LZ4_BUILD_COMMAND
+        BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh "AR=${CMAKE_AR}"
+        "OS=${CMAKE_SYSTEM_NAME}")
   endif()
 
   # We need to copy the header in lib to directory outside of the build
   externalproject_add(lz4_ep
                       URL ${LZ4_SOURCE_URL} ${EP_LOG_OPTIONS}
-                      UPDATE_COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy_directory
-                                     "${LZ4_BUILD_DIR}/lib"
-                                     "${LZ4_PREFIX}/include"
+                      UPDATE_COMMAND ${CMAKE_COMMAND} -E copy_directory
+                                     "${LZ4_BUILD_DIR}/lib" "${LZ4_PREFIX}/include"
                                      ${LZ4_PATCH_COMMAND}
                       CONFIGURE_COMMAND ""
                       INSTALL_COMMAND ""
@@ -2047,7 +2012,7 @@ macro(build_lz4)
 endmacro()
 
 if(ARROW_WITH_LZ4)
-  resolve_dependency(Lz4)
+  resolve_dependency(Lz4 PC_PACKAGE_NAMES liblz4)
 
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
@@ -2111,7 +2076,12 @@ macro(build_zstd)
 endmacro()
 
 if(ARROW_WITH_ZSTD)
-  resolve_dependency(zstd)
+  # ARROW-13384: ZSTD_minCLevel was added in v1.4.0, required by ARROW-13091
+  resolve_dependency(zstd
+                     PC_PACKAGE_NAMES
+                     libzstd
+                     REQUIRED_VERSION
+                     1.4.0)
 
   if(TARGET zstd::libzstd)
     set(ARROW_ZSTD_LIBZSTD zstd::libzstd)
@@ -2171,7 +2141,14 @@ macro(build_re2)
 endmacro()
 
 if(ARROW_WITH_RE2)
+  # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may
+  # include -std=c++11. It's not compatible with C source and C++
+  # source not uses C++ 11.
   resolve_dependency(re2 HAVE_ALT TRUE)
+  if(${re2_SOURCE} STREQUAL "SYSTEM")
+    get_target_property(RE2_LIB re2::re2 IMPORTED_LOCATION)
+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${RE2_LIB}")
+  endif()
   add_definitions(-DARROW_WITH_RE2)
 
   # TODO: Don't use global includes but rather target_include_directories
@@ -2182,9 +2159,9 @@ endif()
 macro(build_bzip2)
   message(STATUS "Building BZip2 from source")
   set(BZIP2_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/bzip2_ep-install")
-  set(
-    BZIP2_STATIC_LIB
-    "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BZIP2_STATIC_LIB
+      "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
   set(BZIP2_EXTRA_ARGS "CC=${CMAKE_C_COMPILER}" "CFLAGS=${EP_C_FLAGS}")
 
@@ -2206,10 +2183,10 @@ macro(build_bzip2)
 
   file(MAKE_DIRECTORY "${BZIP2_PREFIX}/include")
   add_library(BZip2::BZip2 STATIC IMPORTED)
-  set_target_properties(
-    BZip2::BZip2
-    PROPERTIES IMPORTED_LOCATION "${BZIP2_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${BZIP2_PREFIX}/include")
+  set_target_properties(BZip2::BZip2
+                        PROPERTIES IMPORTED_LOCATION "${BZIP2_STATIC_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${BZIP2_PREFIX}/include")
   set(BZIP2_INCLUDE_DIR "${BZIP2_PREFIX}/include")
 
   add_dependencies(toolchain bzip2_ep)
@@ -2220,6 +2197,9 @@ endmacro()
 
 if(ARROW_WITH_BZ2)
   resolve_dependency(BZip2)
+  if(${BZip2_SOURCE} STREQUAL "SYSTEM")
+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${BZIP2_LIBRARIES}")
+  endif()
 
   if(NOT TARGET BZip2::BZip2)
     add_library(BZip2::BZip2 UNKNOWN IMPORTED)
@@ -2236,10 +2216,9 @@ macro(build_utf8proc)
   if(MSVC)
     set(UTF8PROC_STATIC_LIB "${UTF8PROC_PREFIX}/lib/utf8proc_static.lib")
   else()
-    set(
-      UTF8PROC_STATIC_LIB
-      "${UTF8PROC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}utf8proc${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(UTF8PROC_STATIC_LIB
+        "${UTF8PROC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}utf8proc${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
   endif()
 
   set(UTF8PROC_CMAKE_ARGS
@@ -2259,10 +2238,8 @@ macro(build_utf8proc)
   file(MAKE_DIRECTORY "${UTF8PROC_PREFIX}/include")
   add_library(utf8proc::utf8proc STATIC IMPORTED)
   set_target_properties(utf8proc::utf8proc
-                        PROPERTIES IMPORTED_LOCATION
-                                   "${UTF8PROC_STATIC_LIB}"
-                                   INTERFACE_COMPILER_DEFINITIONS
-                                   "UTF8PROC_STATIC"
+                        PROPERTIES IMPORTED_LOCATION "${UTF8PROC_STATIC_LIB}"
+                                   INTERFACE_COMPILER_DEFINITIONS "UTF8PROC_STATIC"
                                    INTERFACE_INCLUDE_DIRECTORIES
                                    "${UTF8PROC_PREFIX}/include")
 
@@ -2273,7 +2250,11 @@ macro(build_utf8proc)
 endmacro()
 
 if(ARROW_WITH_UTF8PROC)
-  resolve_dependency(utf8proc)
+  resolve_dependency(utf8proc
+                     REQUIRED_VERSION
+                     "2.2.0"
+                     PC_PACKAGE_NAMES
+                     libutf8proc)
 
   add_definitions(-DARROW_WITH_UTF8PROC)
 
@@ -2299,10 +2280,9 @@ macro(build_cares)
 
   # If you set -DCARES_SHARED=ON then the build system names the library
   # libcares_static.a
-  set(
-    CARES_STATIC_LIB
-    "${CARES_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}cares${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(CARES_STATIC_LIB
+      "${CARES_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}cares${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
   set(CARES_CMAKE_ARGS
       "${EP_COMMON_CMAKE_ARGS}"
@@ -2329,8 +2309,8 @@ macro(build_cares)
   if(APPLE)
     # libresolv must be linked from c-ares version 1.16.1
     find_library(LIBRESOLV_LIBRARY NAMES resolv libresolv REQUIRED)
-    set_target_properties(c-ares::cares
-                          PROPERTIES INTERFACE_LINK_LIBRARIES "${LIBRESOLV_LIBRARY}")
+    set_target_properties(c-ares::cares PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                   "${LIBRESOLV_LIBRARY}")
   endif()
 
   set(CARES_VENDORED TRUE)
@@ -2338,29 +2318,19 @@ macro(build_cares)
   list(APPEND ARROW_BUNDLED_STATIC_LIBS c-ares::cares)
 endmacro()
 
-if(ARROW_WITH_GRPC)
-  if(c-ares_SOURCE STREQUAL "AUTO")
-    find_package(c-ares QUIET CONFIG)
-    if(c-ares_FOUND)
-      set(CARES_INCLUDE_DIR ${c-ares_INCLUDE_DIR})
-    else()
-      build_cares()
-    endif()
-  elseif(c-ares_SOURCE STREQUAL "BUNDLED")
-    build_cares()
-  elseif(c-ares_SOURCE STREQUAL "SYSTEM")
-    find_package(c-ares REQUIRED CONFIG)
-    set(CARES_INCLUDE_DIR ${c-ares_INCLUDE_DIR})
-  endif()
-
-  # TODO: Don't use global includes but rather target_include_directories
-  include_directories(SYSTEM ${CARES_INCLUDE_DIR})
-endif()
-
 # ----------------------------------------------------------------------
 # Dependencies for Arrow Flight RPC
 
 macro(build_grpc)
+  resolve_dependency(c-ares
+                     HAVE_ALT
+                     TRUE
+                     PC_PACKAGE_NAMES
+                     libcares)
+  # TODO: Don't use global includes but rather target_include_directories
+  get_target_property(c-ares_INCLUDE_DIR c-ares::cares INTERFACE_INCLUDE_DIRECTORIES)
+  include_directories(SYSTEM ${c-ares_INCLUDE_DIR})
+
   message(STATUS "Building gRPC from source")
 
   # First need to build Abseil
@@ -2401,13 +2371,12 @@ macro(build_grpc)
       raw_logging_internal)
 
   foreach(_ABSL_LIB ${_ABSL_LIBS})
-    set(
-      _ABSL_STATIC_LIBRARY
-      "${ABSL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}absl_${_ABSL_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(_ABSL_STATIC_LIBRARY
+        "${ABSL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}absl_${_ABSL_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
     add_library(absl::${_ABSL_LIB} STATIC IMPORTED)
-    set_target_properties(absl::${_ABSL_LIB}
-                          PROPERTIES IMPORTED_LOCATION ${_ABSL_STATIC_LIBRARY})
+    set_target_properties(absl::${_ABSL_LIB} PROPERTIES IMPORTED_LOCATION
+                                                        ${_ABSL_STATIC_LIBRARY})
     list(APPEND ABSL_BUILD_BYPRODUCTS ${_ABSL_STATIC_LIBRARY})
     list(APPEND ABSL_LIBRARIES absl::${_ABSL_LIB})
   endforeach()
@@ -2423,23 +2392,21 @@ macro(build_grpc)
   set(GRPC_HOME "${GRPC_PREFIX}")
   set(GRPC_INCLUDE_DIR "${GRPC_PREFIX}/include")
 
-  set(
-    GRPC_STATIC_LIBRARY_GPR
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(
-    GRPC_STATIC_LIBRARY_GRPC
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(
-    GRPC_STATIC_LIBRARY_GRPCPP
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    GRPC_STATIC_LIBRARY_ADDRESS_SORTING
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    GRPC_STATIC_LIBRARY_UPB
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}upb${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(GRPC_STATIC_LIBRARY_GPR
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_GRPC
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_GRPCPP
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_ADDRESS_SORTING
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_UPB
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}upb${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(GRPC_CPP_PLUGIN "${GRPC_PREFIX}/bin/grpc_cpp_plugin${CMAKE_EXECUTABLE_SUFFIX}")
 
   set(GRPC_CMAKE_PREFIX)
@@ -2548,31 +2515,38 @@ macro(build_grpc)
                         PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GPR}"
                                    INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
 
-  add_library(gRPC::grpc STATIC IMPORTED)
-  set_target_properties(gRPC::grpc
-                        PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GRPC}"
-                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-
   add_library(gRPC::address_sorting STATIC IMPORTED)
   set_target_properties(gRPC::address_sorting
                         PROPERTIES IMPORTED_LOCATION
                                    "${GRPC_STATIC_LIBRARY_ADDRESS_SORTING}"
                                    INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
 
+  add_library(gRPC::grpc STATIC IMPORTED)
+  set(GRPC_LINK_LIBRARIES
+      gRPC::gpr
+      gRPC::upb
+      gRPC::address_sorting
+      ${ABSL_LIBRARIES}
+      re2::re2
+      c-ares::cares
+      ZLIB::ZLIB
+      OpenSSL::SSL
+      Threads::Threads)
+  set_target_properties(gRPC::grpc
+                        PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GRPC}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}"
+                                   INTERFACE_LINK_LIBRARIES "${GRPC_LINK_LIBRARIES}")
+
   add_library(gRPC::grpc++ STATIC IMPORTED)
-  set_target_properties(
-    gRPC::grpc++
-    PROPERTIES
-      IMPORTED_LOCATION
-      "${GRPC_STATIC_LIBRARY_GRPCPP}"
-      INTERFACE_LINK_LIBRARIES
-      "gRPC::grpc;gRPC::gpr;gRPC::upb;gRPC::address_sorting;${ABSL_LIBRARIES};Threads::Threads"
-      INTERFACE_INCLUDE_DIRECTORIES
-      "${GRPC_INCLUDE_DIR}")
+  set(GRPCPP_LINK_LIBRARIES gRPC::grpc ${ARROW_PROTOBUF_LIBPROTOBUF})
+  set_target_properties(gRPC::grpc++
+                        PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GRPCPP}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}"
+                                   INTERFACE_LINK_LIBRARIES "${GRPCPP_LINK_LIBRARIES}")
 
   add_executable(gRPC::grpc_cpp_plugin IMPORTED)
-  set_target_properties(gRPC::grpc_cpp_plugin
-                        PROPERTIES IMPORTED_LOCATION ${GRPC_CPP_PLUGIN})
+  set_target_properties(gRPC::grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION
+                                                         ${GRPC_CPP_PLUGIN})
 
   add_dependencies(grpc_ep grpc_dependencies)
   add_dependencies(toolchain grpc_ep)
@@ -2584,15 +2558,11 @@ macro(build_grpc)
   # continuation character in these scripts, so we have to create a copy of the
   # static lib that we will bundle later
 
-  set(
-    GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR
-    "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}grpcpp${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR
+      "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}grpcpp${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   add_custom_command(OUTPUT ${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}
-                     COMMAND ${CMAKE_COMMAND}
-                             -E
-                             copy
-                             $<TARGET_FILE:gRPC::grpc++>
+                     COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:gRPC::grpc++>
                              ${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}
                      DEPENDS grpc_ep)
   add_library(gRPC::grpcpp_for_bundling STATIC IMPORTED)
@@ -2601,17 +2571,18 @@ macro(build_grpc)
                                    "${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}")
 
   set_source_files_properties("${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}" PROPERTIES GENERATED
-                              TRUE)
+                                                                                TRUE)
   add_custom_target(grpc_copy_grpc++ ALL DEPENDS "${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}")
   add_dependencies(gRPC::grpcpp_for_bundling grpc_copy_grpc++)
 
-  list(APPEND ARROW_BUNDLED_STATIC_LIBS
-              ${ABSL_LIBRARIES}
-              gRPC::upb
-              gRPC::gpr
-              gRPC::grpc
-              gRPC::address_sorting
-              gRPC::grpcpp_for_bundling)
+  list(APPEND
+       ARROW_BUNDLED_STATIC_LIBS
+       ${ABSL_LIBRARIES}
+       gRPC::address_sorting
+       gRPC::gpr
+       gRPC::grpc
+       gRPC::grpcpp_for_bundling
+       gRPC::upb)
 endmacro()
 
 if(ARROW_WITH_GRPC)
@@ -2620,16 +2591,12 @@ if(ARROW_WITH_GRPC)
                      HAVE_ALT
                      TRUE
                      REQUIRED_VERSION
-                     ${ARROW_GRPC_REQUIRED_VERSION})
-
-  if(TARGET gRPC::address_sorting)
-    set(GRPC_HAS_ADDRESS_SORTING TRUE)
-  else()
-    set(GRPC_HAS_ADDRESS_SORTING FALSE)
-  endif()
+                     ${ARROW_GRPC_REQUIRED_VERSION}
+                     PC_PACKAGE_NAMES
+                     grpc++)
 
   # TODO: Don't use global includes but rather target_include_directories
-  get_target_property(GRPC_INCLUDE_DIR gRPC::grpc INTERFACE_INCLUDE_DIRECTORIES)
+  get_target_property(GRPC_INCLUDE_DIR gRPC::grpc++ INTERFACE_INCLUDE_DIRECTORIES)
   include_directories(SYSTEM ${GRPC_INCLUDE_DIR})
 
   if(GRPC_VENDORED)
@@ -2754,8 +2721,8 @@ endif()
 
 macro(build_awssdk)
   message("Building AWS C++ SDK from source")
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
-     AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS
+                                              "4.9")
     message(FATAL_ERROR "AWS C++ SDK requires gcc >= 4.9")
   endif()
   set(AWSSDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/awssdk_ep-install")
@@ -2782,14 +2749,14 @@ macro(build_awssdk)
       "-DCMAKE_INSTALL_PREFIX=${AWSSDK_PREFIX}"
       "-DCMAKE_PREFIX_PATH=${AWSSDK_PREFIX}")
 
-  set(
-    AWSSDK_CMAKE_ARGS
-    ${AWSSDK_COMMON_CMAKE_ARGS} -DBUILD_DEPS=OFF
-    -DBUILD_ONLY=config\\$<SEMICOLON>s3\\$<SEMICOLON>transfer\\$<SEMICOLON>identity-management\\$<SEMICOLON>sts
-    -DMINIMIZE_SIZE=ON)
+  set(AWSSDK_CMAKE_ARGS
+      ${AWSSDK_COMMON_CMAKE_ARGS}
+      -DBUILD_DEPS=OFF
+      -DBUILD_ONLY=config\\$<SEMICOLON>s3\\$<SEMICOLON>transfer\\$<SEMICOLON>identity-management\\$<SEMICOLON>sts
+      -DMINIMIZE_SIZE=ON)
   if(UNIX AND TARGET zlib_ep)
     list(APPEND AWSSDK_CMAKE_ARGS -DZLIB_INCLUDE_DIR=${ZLIB_INCLUDE_DIRS}
-                -DZLIB_LIBRARY=${ZLIB_LIBRARIES})
+         -DZLIB_LIBRARY=${ZLIB_LIBRARIES})
   endif()
 
   file(MAKE_DIRECTORY ${AWSSDK_INCLUDE_DIR})
@@ -2810,20 +2777,19 @@ macro(build_awssdk)
     string(TOUPPER ${_AWSSDK_LIB} _AWSSDK_LIB_UPPER)
     # AWS-C-COMMON -> AWS_C_COMMON
     string(REPLACE "-" "_" _AWSSDK_LIB_NAME_PREFIX ${_AWSSDK_LIB_UPPER})
-    set(
-      _AWSSDK_STATIC_LIBRARY
-      "${AWSSDK_PREFIX}/${AWSSDK_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${_AWSSDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(_AWSSDK_STATIC_LIBRARY
+        "${AWSSDK_PREFIX}/${AWSSDK_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${_AWSSDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
     if(${_AWSSDK_LIB} MATCHES "^aws-cpp-sdk-")
       set(_AWSSDK_TARGET_NAME ${_AWSSDK_LIB})
     else()
       set(_AWSSDK_TARGET_NAME AWS::${_AWSSDK_LIB})
     endif()
     add_library(${_AWSSDK_TARGET_NAME} STATIC IMPORTED)
-    set_target_properties(
-      ${_AWSSDK_TARGET_NAME}
-      PROPERTIES IMPORTED_LOCATION ${_AWSSDK_STATIC_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
-                 "${AWSSDK_INCLUDE_DIR}")
+    set_target_properties(${_AWSSDK_TARGET_NAME}
+                          PROPERTIES IMPORTED_LOCATION ${_AWSSDK_STATIC_LIBRARY}
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${AWSSDK_INCLUDE_DIR}")
     set("${_AWSSDK_LIB_NAME_PREFIX}_STATIC_LIBRARY" ${_AWSSDK_STATIC_LIBRARY})
     list(APPEND AWSSDK_LIBRARIES ${_AWSSDK_TARGET_NAME})
   endforeach()
@@ -2880,7 +2846,7 @@ macro(build_awssdk)
       set_target_properties(CURL::libcurl
                             PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
                                        "${CURL_INCLUDE_DIRS}" IMPORTED_LOCATION
-                                       "${CURL_LIBRARIES}")
+                                                              "${CURL_LIBRARIES}")
     endif()
     set_property(TARGET aws-cpp-sdk-core
                  APPEND
diff --git a/cpp/cmake_modules/UseCython.cmake b/cpp/cmake_modules/UseCython.cmake
index 0d4b17d3e57..f2025efb4c9 100644
--- a/cpp/cmake_modules/UseCython.cmake
+++ b/cpp/cmake_modules/UseCython.cmake
@@ -107,8 +107,9 @@ function(compile_pyx
   endif()
 
   if(NOT WIN32)
-    if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug"
-       OR "${CMAKE_BUILD_TYPE}" STREQUAL "RelWithDebInfo")
+    string( TOLOWER "${CMAKE_BUILD_TYPE}" build_type )
+    if("${build_type}" STREQUAL "debug"
+       OR "${build_type}" STREQUAL "relwithdebinfo")
       set(cython_debug_arg "--gdb")
     endif()
   endif()
@@ -144,6 +145,8 @@ function(compile_pyx
             ${no_docstrings_arg}
             ${cython_debug_arg}
             ${CYTHON_FLAGS}
+            # Necessary for autodoc of function arguments
+            --directive embedsignature=True
             # Necessary for Cython code coverage
             --working
             ${CMAKE_CURRENT_SOURCE_DIR}
diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake
index 781bec436f3..06ac4dd075d 100644
--- a/cpp/cmake_modules/Usevcpkg.cmake
+++ b/cpp/cmake_modules/Usevcpkg.cmake
@@ -22,7 +22,9 @@ message(STATUS "Using vcpkg to find dependencies")
 
 # macro to list subdirectirectories (non-recursive)
 macro(list_subdirs SUBDIRS DIR)
-  file(GLOB children_ RELATIVE ${DIR} ${DIR}/*)
+  file(GLOB children_
+       RELATIVE ${DIR}
+       ${DIR}/*)
   set(subdirs_ "")
   foreach(child_ ${children_})
     if(IS_DIRECTORY "${DIR}/${child_}")
@@ -44,24 +46,27 @@ if(DEFINED CMAKE_TOOLCHAIN_FILE)
     get_filename_component(_VCPKG_BUILDSYSTEMS_DIR "${CMAKE_TOOLCHAIN_FILE}" DIRECTORY)
     get_filename_component(VCPKG_ROOT "${_VCPKG_BUILDSYSTEMS_DIR}/../.." ABSOLUTE)
   else()
-    message(
-      FATAL_ERROR
-        "vcpkg toolchain file not found at path specified in -DCMAKE_TOOLCHAIN_FILE")
+    message(FATAL_ERROR "vcpkg toolchain file not found at path specified in -DCMAKE_TOOLCHAIN_FILE"
+    )
   endif()
 else()
   if(DEFINED VCPKG_ROOT)
     # Get it from the CMake variable VCPKG_ROOT
-    find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH)
+    find_program(_VCPKG_BIN vcpkg
+                 PATHS "${VCPKG_ROOT}"
+                 NO_DEFAULT_PATH)
     if(NOT _VCPKG_BIN)
       message(FATAL_ERROR "vcpkg not found in directory specified in -DVCPKG_ROOT")
     endif()
   elseif(DEFINED ENV{VCPKG_ROOT})
     # Get it from the environment variable VCPKG_ROOT
     set(VCPKG_ROOT $ENV{VCPKG_ROOT})
-    find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH)
+    find_program(_VCPKG_BIN vcpkg
+                 PATHS "${VCPKG_ROOT}"
+                 NO_DEFAULT_PATH)
     if(NOT _VCPKG_BIN)
-      message(
-        FATAL_ERROR "vcpkg not found in directory in environment variable VCPKG_ROOT")
+      message(FATAL_ERROR "vcpkg not found in directory in environment variable VCPKG_ROOT"
+      )
     endif()
   else()
     # Get it from the file vcpkg.path.txt
@@ -78,12 +83,13 @@ else()
       if(EXISTS "${_VCPKG_PATH_TXT}")
         file(READ "${_VCPKG_PATH_TXT}" VCPKG_ROOT)
       else()
-        message(
-          FATAL_ERROR
-            "vcpkg not found. Install vcpkg if not installed, "
-            "then run vcpkg integrate install or set environment variable VCPKG_ROOT.")
+        message(FATAL_ERROR "vcpkg not found. Install vcpkg if not installed, "
+                            "then run vcpkg integrate install or set environment variable VCPKG_ROOT."
+        )
       endif()
-      find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH)
+      find_program(_VCPKG_BIN vcpkg
+                   PATHS "${VCPKG_ROOT}"
+                   NO_DEFAULT_PATH)
       if(NOT _VCPKG_BIN)
         message(FATAL_ERROR "vcpkg not found. Re-run vcpkg integrate install "
                             "or set environment variable VCPKG_ROOT.")
@@ -105,7 +111,9 @@ if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET} AND NOT DEFINED VCPKG_TARGET_TRIPLET)
 endif()
 # Explicitly set manifest mode on if it is not set and vcpkg.json exists
 if(NOT DEFINED VCPKG_MANIFEST_MODE AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg.json")
-  set(VCPKG_MANIFEST_MODE ON CACHE BOOL "Use vcpkg.json manifest")
+  set(VCPKG_MANIFEST_MODE
+      ON
+      CACHE BOOL "Use vcpkg.json manifest")
   message(STATUS "vcpkg.json manifest found. Using VCPKG_MANIFEST_MODE: ON")
 endif()
 # vcpkg can install packages in three different places
@@ -113,13 +121,7 @@ set(_INST_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed") # try here fi
 set(_INST_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg_installed") # try here second
 set(_INST_VCPKG_ROOT "${VCPKG_ROOT}/installed")
 # Iterate over the places
-foreach(_INST_DIR
-        IN
-        LISTS
-        _INST_BUILD_DIR
-        _INST_SOURCE_DIR
-        _INST_VCPKG_ROOT
-        "notfound")
+foreach(_INST_DIR IN LISTS _INST_BUILD_DIR _INST_SOURCE_DIR _INST_VCPKG_ROOT "notfound")
   if(_INST_DIR STREQUAL "notfound")
     message(FATAL_ERROR "vcpkg installed libraries directory not found. "
                         "Install packages with vcpkg before executing cmake.")
@@ -158,10 +160,8 @@ if(NOT DEFINED VCPKG_TARGET_TRIPLET)
   message(FATAL_ERROR "Could not infer VCPKG_TARGET_TRIPLET. "
                       "Specify triplet with -DVCPKG_TARGET_TRIPLET.")
 elseif(NOT DEFINED _VCPKG_INSTALLED_DIR)
-  message(
-    FATAL_ERROR
-      "Could not find installed vcpkg packages for triplet ${VCPKG_TARGET_TRIPLET}. "
-      "Install packages with vcpkg before executing cmake.")
+  message(FATAL_ERROR "Could not find installed vcpkg packages for triplet ${VCPKG_TARGET_TRIPLET}. "
+                      "Install packages with vcpkg before executing cmake.")
 endif()
 
 set(VCPKG_TARGET_TRIPLET
@@ -194,24 +194,56 @@ set(ARROW_VCPKG_PREFIX
     "${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}"
     CACHE PATH "Path to target triplet subdirectory in vcpkg installed directory")
 
-set(ARROW_VCPKG ON CACHE BOOL "Use vcpkg for dependencies")
+set(ARROW_VCPKG
+    ON
+    CACHE BOOL "Use vcpkg for dependencies")
 
 set(ARROW_DEPENDENCY_SOURCE
     "SYSTEM"
     CACHE STRING "The specified value VCPKG is implemented internally as SYSTEM" FORCE)
 
-set(BOOST_ROOT "${ARROW_VCPKG_PREFIX}" CACHE STRING "")
-set(BOOST_INCLUDEDIR "${ARROW_VCPKG_PREFIX}/include/boost" CACHE STRING "")
-set(BOOST_LIBRARYDIR "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "")
-set(OPENSSL_INCLUDE_DIR "${ARROW_VCPKG_PREFIX}/include" CACHE STRING "")
-set(OPENSSL_LIBRARIES "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "")
-set(OPENSSL_ROOT_DIR "${ARROW_VCPKG_PREFIX}" CACHE STRING "")
-set(Thrift_ROOT "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "")
-set(ZSTD_INCLUDE_DIR "${ARROW_VCPKG_PREFIX}/include" CACHE STRING "")
-set(ZSTD_ROOT "${ARROW_VCPKG_PREFIX}" CACHE STRING "")
+set(BOOST_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
+set(BOOST_INCLUDEDIR
+    "${ARROW_VCPKG_PREFIX}/include/boost"
+    CACHE STRING "")
+set(BOOST_LIBRARYDIR
+    "${ARROW_VCPKG_PREFIX}/lib"
+    CACHE STRING "")
+set(OPENSSL_INCLUDE_DIR
+    "${ARROW_VCPKG_PREFIX}/include"
+    CACHE STRING "")
+set(OPENSSL_LIBRARIES
+    "${ARROW_VCPKG_PREFIX}/lib"
+    CACHE STRING "")
+set(OPENSSL_ROOT_DIR
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
+set(Thrift_ROOT
+    "${ARROW_VCPKG_PREFIX}/lib"
+    CACHE STRING "")
+set(ZSTD_INCLUDE_DIR
+    "${ARROW_VCPKG_PREFIX}/include"
+    CACHE STRING "")
+set(ZSTD_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
+set(BROTLI_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
+set(LZ4_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
 
 if(CMAKE_HOST_WIN32)
-  set(LZ4_MSVC_LIB_PREFIX "" CACHE STRING "")
-  set(LZ4_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING "")
-  set(ZSTD_MSVC_LIB_PREFIX "" CACHE STRING "")
+  set(LZ4_MSVC_LIB_PREFIX
+      ""
+      CACHE STRING "")
+  set(LZ4_MSVC_STATIC_LIB_SUFFIX
+      ""
+      CACHE STRING "")
+  set(ZSTD_MSVC_LIB_PREFIX
+      ""
+      CACHE STRING "")
 endif()
diff --git a/cpp/cmake_modules/san-config.cmake b/cpp/cmake_modules/san-config.cmake
index 5eee6278009..bde9af23e57 100644
--- a/cpp/cmake_modules/san-config.cmake
+++ b/cpp/cmake_modules/san-config.cmake
@@ -20,10 +20,8 @@ endif()
 if(${ARROW_USE_ASAN})
   if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
      OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
-     OR (CMAKE_CXX_COMPILER_ID
-         STREQUAL
-         "GNU"
-         AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.8"))
+     OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
+                                                  VERSION_GREATER "4.8"))
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -DADDRESS_SANITIZER")
   else()
     message(SEND_ERROR "Cannot use ASAN without clang or gcc >= 4.8")
@@ -41,18 +39,16 @@ endif()
 #   (https://bugs.llvm.org/show_bug.cgi?id=17000#c1)
 #   Note: GCC does not support the 'function' flag.
 if(${ARROW_USE_UBSAN})
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function,float-divide-by-zero -fno-sanitize-recover=all"
-      )
-  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
-         AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "5.1")
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all"
-      )
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function,float-divide-by-zero -fno-sanitize-recover=all"
+    )
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
+                                                  VERSION_GREATER_EQUAL "5.1")
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all"
+    )
   else()
     message(SEND_ERROR "Cannot use UBSAN without clang or gcc >= 5.1")
   endif()
@@ -61,14 +57,10 @@ endif()
 # Flag to enable thread sanitizer (clang or gcc 4.8)
 if(${ARROW_USE_TSAN})
   if(NOT
-     (CMAKE_CXX_COMPILER_ID
-      STREQUAL
-      "AppleClang"
+     (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
       OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
-      OR (CMAKE_CXX_COMPILER_ID
-          STREQUAL
-          "GNU"
-          AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.8")))
+      OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
+                                                   VERSION_GREATER "4.8")))
     message(SEND_ERROR "Cannot use TSAN without clang or gcc >= 4.8")
   endif()
 
@@ -100,34 +92,31 @@ if(${ARROW_USE_TSAN})
 endif()
 
 if(${ARROW_USE_COVERAGE})
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    add_definitions(
-      "-fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
-      )
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
+    add_definitions("-fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
+    )
 
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
-      )
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
+    )
   else()
     message(SEND_ERROR "You can only enable coverage with clang")
   endif()
 endif()
 
-if("${ARROW_USE_UBSAN}" OR "${ARROW_USE_ASAN}" OR "${ARROW_USE_TSAN}")
+if("${ARROW_USE_UBSAN}"
+   OR "${ARROW_USE_ASAN}"
+   OR "${ARROW_USE_TSAN}")
   # GCC 4.8 and 4.9 (latest as of this writing) don't allow you to specify
   # disallowed entries for the sanitizer.
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitizer-disallowed-entries.txt"
-      )
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitizer-disallowed-entries.txt"
+    )
   else()
-    message(
-      WARNING
-        "GCC does not support specifying a sanitizer disallowed entries list. Known sanitizer check failures will not be suppressed."
-      )
+    message(WARNING "GCC does not support specifying a sanitizer disallowed entries list. Known sanitizer check failures will not be suppressed."
+    )
   endif()
 endif()
diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt
index 1abbf52ac3e..0bcf5de0ad1 100644
--- a/cpp/examples/arrow/CMakeLists.txt
+++ b/cpp/examples/arrow/CMakeLists.txt
@@ -17,6 +17,10 @@
 
 ADD_ARROW_EXAMPLE(row_wise_conversion_example)
 
+if (ARROW_COMPUTE)
+  ADD_ARROW_EXAMPLE(compute_register_example)
+endif()
+
 if (ARROW_PARQUET AND ARROW_DATASET)
   if (ARROW_BUILD_SHARED)
     set(DATASET_EXAMPLES_LINK_LIBS arrow_dataset_shared)
@@ -28,4 +32,9 @@ if (ARROW_PARQUET AND ARROW_DATASET)
     EXTRA_LINK_LIBS
     ${DATASET_EXAMPLES_LINK_LIBS})
   add_dependencies(dataset_parquet_scan_example parquet)
+
+  ADD_ARROW_EXAMPLE(dataset_documentation_example
+    EXTRA_LINK_LIBS
+    ${DATASET_EXAMPLES_LINK_LIBS})
+  add_dependencies(dataset_documentation_example parquet)
 endif()
diff --git a/cpp/examples/arrow/compute_register_example.cc b/cpp/examples/arrow/compute_register_example.cc
new file mode 100644
index 00000000000..3c20a3d2a87
--- /dev/null
+++ b/cpp/examples/arrow/compute_register_example.cc
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/expression.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+
+// Demonstrate registering an Arrow compute function outside of the Arrow source tree
+
+namespace cp = ::arrow::compute;
+
+#define ABORT_ON_FAILURE(expr)                     \
+  do {                                             \
+    arrow::Status status_ = (expr);                \
+    if (!status_.ok()) {                           \
+      std::cerr << status_.message() << std::endl; \
+      abort();                                     \
+    }                                              \
+  } while (0);
+
+class ExampleFunctionOptionsType : public cp::FunctionOptionsType {
+  const char* type_name() const override { return "ExampleFunctionOptionsType"; }
+  std::string Stringify(const cp::FunctionOptions&) const override {
+    return "ExampleFunctionOptionsType";
+  }
+  bool Compare(const cp::FunctionOptions&, const cp::FunctionOptions&) const override {
+    return true;
+  }
+  // optional: support for serialization
+  // Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const override;
+  // Result<std::unique_ptr<FunctionOptions>> Deserialize(const Buffer&) const override;
+};
+
+cp::FunctionOptionsType* GetExampleFunctionOptionsType() {
+  static ExampleFunctionOptionsType options_type;
+  return &options_type;
+}
+
+class ExampleFunctionOptions : public cp::FunctionOptions {
+ public:
+  ExampleFunctionOptions() : cp::FunctionOptions(GetExampleFunctionOptionsType()) {}
+};
+
+arrow::Status ExampleFunctionImpl(cp::KernelContext* ctx, const cp::ExecBatch& batch,
+                                  arrow::Datum* out) {
+  *out->mutable_array() = *batch[0].array();
+  return arrow::Status::OK();
+}
+
+const cp::FunctionDoc func_doc{
+    "Example function to demonstrate registering an out-of-tree function",
+    "",
+    {"x"},
+    "ExampleFunctionOptions"};
+
+int main(int argc, char** argv) {
+  const std::string name = "compute_register_example";
+  auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(), &func_doc);
+  ABORT_ON_FAILURE(func->AddKernel({cp::InputType::Array(arrow::int64())}, arrow::int64(),
+                                   ExampleFunctionImpl));
+
+  auto registry = cp::GetFunctionRegistry();
+  ABORT_ON_FAILURE(registry->AddFunction(std::move(func)));
+
+  arrow::Int64Builder builder(arrow::default_memory_pool());
+  std::shared_ptr<arrow::Array> arr;
+  ABORT_ON_FAILURE(builder.Append(42));
+  ABORT_ON_FAILURE(builder.Finish(&arr));
+  auto options = std::make_shared<ExampleFunctionOptions>();
+  auto maybe_result = cp::CallFunction(name, {arr}, options.get());
+  ABORT_ON_FAILURE(maybe_result.status());
+
+  std::cout << maybe_result->make_array()->ToString() << std::endl;
+
+  // Expression serialization will raise NotImplemented if an expression includes
+  // FunctionOptions for which serialization is not supported.
+  auto expr = cp::call(name, {}, options);
+  auto maybe_serialized = cp::Serialize(expr);
+  std::cerr << maybe_serialized.status().ToString() << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/cpp/examples/arrow/dataset_documentation_example.cc b/cpp/examples/arrow/dataset_documentation_example.cc
new file mode 100644
index 00000000000..1aac66d4a6c
--- /dev/null
+++ b/cpp/examples/arrow/dataset_documentation_example.cc
@@ -0,0 +1,374 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This example showcases various ways to work with Datasets. It's
+// intended to be paired with the documentation.
+
+#include <arrow/api.h>
+#include <arrow/compute/cast.h>
+#include <arrow/compute/exec/expression.h>
+#include <arrow/dataset/dataset.h>
+#include <arrow/dataset/discovery.h>
+#include <arrow/dataset/file_base.h>
+#include <arrow/dataset/file_ipc.h>
+#include <arrow/dataset/file_parquet.h>
+#include <arrow/dataset/scanner.h>
+#include <arrow/filesystem/filesystem.h>
+#include <arrow/ipc/writer.h>
+#include <arrow/util/iterator.h>
+#include <parquet/arrow/writer.h>
+
+#include <iostream>
+#include <vector>
+
+namespace ds = arrow::dataset;
+namespace fs = arrow::fs;
+namespace cp = arrow::compute;
+
+#define ABORT_ON_FAILURE(expr)                     \
+  do {                                             \
+    arrow::Status status_ = (expr);                \
+    if (!status_.ok()) {                           \
+      std::cerr << status_.message() << std::endl; \
+      abort();                                     \
+    }                                              \
+  } while (0);
+
+// (Doc section: Reading Datasets)
+// Generate some data for the rest of this example.
+std::shared_ptr<arrow::Table> CreateTable() {
+  auto schema =
+      arrow::schema({arrow::field("a", arrow::int64()), arrow::field("b", arrow::int64()),
+                     arrow::field("c", arrow::int64())});
+  std::shared_ptr<arrow::Array> array_a;
+  std::shared_ptr<arrow::Array> array_b;
+  std::shared_ptr<arrow::Array> array_c;
+  arrow::NumericBuilder<arrow::Int64Type> builder;
+  ABORT_ON_FAILURE(builder.AppendValues({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
+  ABORT_ON_FAILURE(builder.Finish(&array_a));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}));
+  ABORT_ON_FAILURE(builder.Finish(&array_b));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({1, 2, 1, 2, 1, 2, 1, 2, 1, 2}));
+  ABORT_ON_FAILURE(builder.Finish(&array_c));
+  return arrow::Table::Make(schema, {array_a, array_b, array_c});
+}
+
+// Set up a dataset by writing two Parquet files.
+std::string CreateExampleParquetDataset(const std::shared_ptr<fs::FileSystem>& filesystem,
+                                        const std::string& root_path) {
+  auto base_path = root_path + "/parquet_dataset";
+  ABORT_ON_FAILURE(filesystem->CreateDir(base_path));
+  // Create an Arrow Table
+  auto table = CreateTable();
+  // Write it into two Parquet files
+  auto output = filesystem->OpenOutputStream(base_path + "/data1.parquet").ValueOrDie();
+  ABORT_ON_FAILURE(parquet::arrow::WriteTable(
+      *table->Slice(0, 5), arrow::default_memory_pool(), output, /*chunk_size=*/2048));
+  output = filesystem->OpenOutputStream(base_path + "/data2.parquet").ValueOrDie();
+  ABORT_ON_FAILURE(parquet::arrow::WriteTable(
+      *table->Slice(5), arrow::default_memory_pool(), output, /*chunk_size=*/2048));
+  return base_path;
+}
+// (Doc section: Reading Datasets)
+
+// (Doc section: Reading different file formats)
+// Set up a dataset by writing two Feather files.
+std::string CreateExampleFeatherDataset(const std::shared_ptr<fs::FileSystem>& filesystem,
+                                        const std::string& root_path) {
+  auto base_path = root_path + "/feather_dataset";
+  ABORT_ON_FAILURE(filesystem->CreateDir(base_path));
+  // Create an Arrow Table
+  auto table = CreateTable();
+  // Write it into two Feather files
+  auto output = filesystem->OpenOutputStream(base_path + "/data1.feather").ValueOrDie();
+  auto writer = arrow::ipc::MakeFileWriter(output.get(), table->schema()).ValueOrDie();
+  ABORT_ON_FAILURE(writer->WriteTable(*table->Slice(0, 5)));
+  ABORT_ON_FAILURE(writer->Close());
+  output = filesystem->OpenOutputStream(base_path + "/data2.feather").ValueOrDie();
+  writer = arrow::ipc::MakeFileWriter(output.get(), table->schema()).ValueOrDie();
+  ABORT_ON_FAILURE(writer->WriteTable(*table->Slice(5)));
+  ABORT_ON_FAILURE(writer->Close());
+  return base_path;
+}
+// (Doc section: Reading different file formats)
+
+// (Doc section: Reading and writing partitioned data)
+// Set up a dataset by writing files with partitioning
+std::string CreateExampleParquetHivePartitionedDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem, const std::string& root_path) {
+  auto base_path = root_path + "/parquet_dataset";
+  ABORT_ON_FAILURE(filesystem->CreateDir(base_path));
+  // Create an Arrow Table
+  auto schema = arrow::schema(
+      {arrow::field("a", arrow::int64()), arrow::field("b", arrow::int64()),
+       arrow::field("c", arrow::int64()), arrow::field("part", arrow::utf8())});
+  std::vector<std::shared_ptr<arrow::Array>> arrays(4);
+  arrow::NumericBuilder<arrow::Int64Type> builder;
+  ABORT_ON_FAILURE(builder.AppendValues({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
+  ABORT_ON_FAILURE(builder.Finish(&arrays[0]));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}));
+  ABORT_ON_FAILURE(builder.Finish(&arrays[1]));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({1, 2, 1, 2, 1, 2, 1, 2, 1, 2}));
+  ABORT_ON_FAILURE(builder.Finish(&arrays[2]));
+  arrow::StringBuilder string_builder;
+  ABORT_ON_FAILURE(
+      string_builder.AppendValues({"a", "a", "a", "a", "a", "b", "b", "b", "b", "b"}));
+  ABORT_ON_FAILURE(string_builder.Finish(&arrays[3]));
+  auto table = arrow::Table::Make(schema, arrays);
+  // Write it using Datasets
+  auto dataset = std::make_shared<ds::InMemoryDataset>(table);
+  auto scanner_builder = dataset->NewScan().ValueOrDie();
+  auto scanner = scanner_builder->Finish().ValueOrDie();
+
+  // The partition schema determines which fields are part of the partitioning.
+  auto partition_schema = arrow::schema({arrow::field("part", arrow::utf8())});
+  // We'll use Hive-style partitioning, which creates directories with "key=value" pairs.
+  auto partitioning = std::make_shared<ds::HivePartitioning>(partition_schema);
+  // We'll write Parquet files.
+  auto format = std::make_shared<ds::ParquetFileFormat>();
+  ds::FileSystemDatasetWriteOptions write_options;
+  write_options.file_write_options = format->DefaultWriteOptions();
+  write_options.filesystem = filesystem;
+  write_options.base_dir = base_path;
+  write_options.partitioning = partitioning;
+  write_options.basename_template = "part{i}.parquet";
+  ABORT_ON_FAILURE(ds::FileSystemDataset::Write(write_options, scanner));
+  return base_path;
+}
+// (Doc section: Reading and writing partitioned data)
+
+// (Doc section: Dataset discovery)
+// Read the whole dataset with the given format, without partitioning.
+std::shared_ptr<arrow::Table> ScanWholeDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  // Create a dataset by scanning the filesystem for files
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Print out the fragments
+  for (const auto& fragment : dataset->GetFragments().ValueOrDie()) {
+    std::cout << "Found fragment: " << (*fragment)->ToString() << std::endl;
+  }
+  // Read the entire dataset as a Table
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+// (Doc section: Dataset discovery)
+
+// (Doc section: Filtering data)
+// Read a dataset, but select only column "b" and only rows where b < 4.
+//
+// This is useful when you only want a few columns from a dataset. Where possible,
+// Datasets will push down the column selection such that less work is done.
+std::shared_ptr<arrow::Table> FilterAndSelectDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Read specified columns with a row filter
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  ABORT_ON_FAILURE(scan_builder->Project({"b"}));
+  ABORT_ON_FAILURE(scan_builder->Filter(cp::less(cp::field_ref("b"), cp::literal(4))));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+// (Doc section: Filtering data)
+
+// (Doc section: Projecting columns)
+// Read a dataset, but with column projection.
+//
+// This is useful to derive new columns from existing data. For example, here we
+// demonstrate casting a column to a different type, and turning a numeric column into a
+// boolean column based on a predicate. You could also rename columns or perform
+// computations involving multiple columns.
+std::shared_ptr<arrow::Table> ProjectDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Read specified columns with a row filter
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  ABORT_ON_FAILURE(scan_builder->Project(
+      {
+          // Leave column "a" as-is.
+          cp::field_ref("a"),
+          // Cast column "b" to float32.
+          cp::call("cast", {cp::field_ref("b")},
+                   arrow::compute::CastOptions::Safe(arrow::float32())),
+          // Derive a boolean column from "c".
+          cp::equal(cp::field_ref("c"), cp::literal(1)),
+      },
+      {"a_renamed", "b_as_float32", "c_1"}));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+// (Doc section: Projecting columns)
+
+// (Doc section: Projecting columns #2)
+// Read a dataset, but with column projection.
+//
+// This time, we read all original columns plus one derived column. This simply combines
+// the previous two examples: selecting a subset of columns by name, and deriving new
+// columns with an expression.
+std::shared_ptr<arrow::Table> SelectAndProjectDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Read specified columns with a row filter
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  std::vector<std::string> names;
+  std::vector<cp::Expression> exprs;
+  // Read all the original columns.
+  for (const auto& field : dataset->schema()->fields()) {
+    names.push_back(field->name());
+    exprs.push_back(cp::field_ref(field->name()));
+  }
+  // Also derive a new column.
+  names.emplace_back("b_large");
+  exprs.push_back(cp::greater(cp::field_ref("b"), cp::literal(1)));
+  ABORT_ON_FAILURE(scan_builder->Project(exprs, names));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+// (Doc section: Projecting columns #2)
+
+// (Doc section: Reading and writing partitioned data #2)
+// Read an entire dataset, but with partitioning information.
+std::shared_ptr<arrow::Table> ScanPartitionedDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  selector.recursive = true;  // Make sure to search subdirectories
+  ds::FileSystemFactoryOptions options;
+  // We'll use Hive-style partitioning. We'll let Arrow Datasets infer the partition
+  // schema.
+  options.partitioning = ds::HivePartitioning::MakeFactory();
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format, options)
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Print out the fragments
+  for (const auto& fragment : dataset->GetFragments().ValueOrDie()) {
+    std::cout << "Found fragment: " << (*fragment)->ToString() << std::endl;
+    std::cout << "Partition expression: "
+              << (*fragment)->partition_expression().ToString() << std::endl;
+  }
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+// (Doc section: Reading and writing partitioned data #2)
+
+// (Doc section: Reading and writing partitioned data #3)
+// Read an entire dataset, but with partitioning information. Also, filter the dataset on
+// the partition values.
+std::shared_ptr<arrow::Table> FilterPartitionedDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  selector.recursive = true;
+  ds::FileSystemFactoryOptions options;
+  options.partitioning = ds::HivePartitioning::MakeFactory();
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format, options)
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  // Filter based on the partition values. This will mean that we won't even read the
+  // files whose partition expressions don't match the filter.
+  ABORT_ON_FAILURE(
+      scan_builder->Filter(cp::equal(cp::field_ref("part"), cp::literal("b"))));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+// (Doc section: Reading and writing partitioned data #3)
+
+int main(int argc, char** argv) {
+  if (argc < 3) {
+    // Fake success for CI purposes.
+    return EXIT_SUCCESS;
+  }
+
+  std::string uri = argv[1];
+  std::string format_name = argv[2];
+  std::string mode = argc > 3 ? argv[3] : "no_filter";
+  std::string root_path;
+  auto fs = fs::FileSystemFromUri(uri, &root_path).ValueOrDie();
+
+  std::string base_path;
+  std::shared_ptr<ds::FileFormat> format;
+  if (format_name == "feather") {
+    format = std::make_shared<ds::IpcFileFormat>();
+    base_path = CreateExampleFeatherDataset(fs, root_path);
+  } else if (format_name == "parquet") {
+    format = std::make_shared<ds::ParquetFileFormat>();
+    base_path = CreateExampleParquetDataset(fs, root_path);
+  } else if (format_name == "parquet_hive") {
+    format = std::make_shared<ds::ParquetFileFormat>();
+    base_path = CreateExampleParquetHivePartitionedDataset(fs, root_path);
+  } else {
+    std::cerr << "Unknown format: " << format_name << std::endl;
+    std::cerr << "Supported formats: feather, parquet, parquet_hive" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::shared_ptr<arrow::Table> table;
+  if (mode == "no_filter") {
+    table = ScanWholeDataset(fs, format, base_path);
+  } else if (mode == "filter") {
+    table = FilterAndSelectDataset(fs, format, base_path);
+  } else if (mode == "project") {
+    table = ProjectDataset(fs, format, base_path);
+  } else if (mode == "select_project") {
+    table = SelectAndProjectDataset(fs, format, base_path);
+  } else if (mode == "partitioned") {
+    table = ScanPartitionedDataset(fs, format, base_path);
+  } else if (mode == "filter_partitioned") {
+    table = FilterPartitionedDataset(fs, format, base_path);
+  } else {
+    std::cerr << "Unknown mode: " << mode << std::endl;
+    std::cerr
+        << "Supported modes: no_filter, filter, project, select_project, partitioned"
+        << std::endl;
+    return EXIT_FAILURE;
+  }
+  std::cout << "Read " << table->num_rows() << " rows" << std::endl;
+  std::cout << table->ToString() << std::endl;
+  return EXIT_SUCCESS;
+}
diff --git a/cpp/examples/arrow/dataset_parquet_scan_example.cc b/cpp/examples/arrow/dataset_parquet_scan_example.cc
index 197ca5aa4c6..cd9b89fe380 100644
--- a/cpp/examples/arrow/dataset_parquet_scan_example.cc
+++ b/cpp/examples/arrow/dataset_parquet_scan_example.cc
@@ -16,9 +16,9 @@
 // under the License.
 
 #include <arrow/api.h>
+#include <arrow/compute/exec/expression.h>
 #include <arrow/dataset/dataset.h>
 #include <arrow/dataset/discovery.h>
-#include <arrow/dataset/expression.h>
 #include <arrow/dataset/file_base.h>
 #include <arrow/dataset/file_parquet.h>
 #include <arrow/dataset/scanner.h>
@@ -37,6 +37,8 @@ namespace fs = arrow::fs;
 
 namespace ds = arrow::dataset;
 
+namespace cp = arrow::compute;
+
 #define ABORT_ON_FAILURE(expr)                     \
   do {                                             \
     arrow::Status status_ = (expr);                \
@@ -60,8 +62,8 @@ struct Configuration {
 
   // Indicates the filter by which rows will be filtered. This optimization can
   // make use of partition information and/or file metadata if possible.
-  ds::Expression filter =
-      ds::greater(ds::field_ref("total_amount"), ds::literal(1000.0f));
+  cp::Expression filter =
+      cp::greater(cp::field_ref("total_amount"), cp::literal(1000.0f));
 
   ds::InspectOptions inspect_options{};
   ds::FinishOptions finish_options{};
@@ -146,7 +148,7 @@ std::shared_ptr<ds::Dataset> GetDatasetFromPath(
 
 std::shared_ptr<ds::Scanner> GetScannerFromDataset(std::shared_ptr<ds::Dataset> dataset,
                                                    std::vector<std::string> columns,
-                                                   ds::Expression filter,
+                                                   cp::Expression filter,
                                                    bool use_threads) {
   auto scanner_builder = dataset->NewScan().ValueOrDie();
 
diff --git a/cpp/examples/minimal_build/example.cc b/cpp/examples/minimal_build/example.cc
index e1b5c123a85..2ca163155ee 100644
--- a/cpp/examples/minimal_build/example.cc
+++ b/cpp/examples/minimal_build/example.cc
@@ -38,8 +38,7 @@ Status RunMain(int argc, char** argv) {
                         arrow::io::ReadableFile::Open(csv_filename));
   ARROW_ASSIGN_OR_RAISE(
       auto csv_reader,
-      arrow::csv::TableReader::Make(arrow::default_memory_pool(),
-                                    arrow::io::default_io_context(),
+      arrow::csv::TableReader::Make(arrow::io::default_io_context(),
                                     input_file,
                                     arrow::csv::ReadOptions::Defaults(),
                                     arrow::csv::ParseOptions::Defaults(),
diff --git a/cpp/examples/minimal_build/minimal.dockerfile b/cpp/examples/minimal_build/minimal.dockerfile
index 95f73e9a549..9361fc5e81d 100644
--- a/cpp/examples/minimal_build/minimal.dockerfile
+++ b/cpp/examples/minimal_build/minimal.dockerfile
@@ -22,5 +22,6 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
       build-essential \
-      cmake && \
+      cmake \
+      pkg-config && \
     apt-get clean && rm -rf /var/lib/apt/lists*
diff --git a/cpp/examples/minimal_build/run_static.sh b/cpp/examples/minimal_build/run_static.sh
index 05804a0366c..ff3bb894570 100755
--- a/cpp/examples/minimal_build/run_static.sh
+++ b/cpp/examples/minimal_build/run_static.sh
@@ -67,10 +67,12 @@ popd
 
 echo
 echo "=="
+echo "== CMake:"
 echo "== Building example project using Arrow C++ library"
 echo "=="
 echo
 
+rm -rf $EXAMPLE_BUILD_DIR
 mkdir -p $EXAMPLE_BUILD_DIR
 pushd $EXAMPLE_BUILD_DIR
 
@@ -81,10 +83,39 @@ popd
 
 echo
 echo "=="
+echo "== CMake:"
 echo "== Running example project"
 echo "=="
 echo
 
 pushd $EXAMPLE_DIR
 
-${EXAMPLE_BUILD_DIR}/arrow_example
+$EXAMPLE_BUILD_DIR/arrow_example
+
+echo
+echo "=="
+echo "== pkg-config"
+echo "== Building example project using Arrow C++ library"
+echo "=="
+echo
+
+rm -rf $EXAMPLE_BUILD_DIR
+mkdir -p $EXAMPLE_BUILD_DIR
+${CXX:-c++} \
+  -o $EXAMPLE_BUILD_DIR/arrow_example \
+  $EXAMPLE_DIR/example.cc \
+  $(PKG_CONFIG_PATH=$ARROW_BUILD_DIR/lib/pkgconfig \
+     pkg-config --cflags --libs --static arrow)
+
+popd
+
+echo
+echo "=="
+echo "== pkg-config:"
+echo "== Running example project"
+echo "=="
+echo
+
+pushd $EXAMPLE_DIR
+
+$EXAMPLE_BUILD_DIR/arrow_example
diff --git a/cpp/examples/minimal_build/system_dependency.dockerfile b/cpp/examples/minimal_build/system_dependency.dockerfile
index f0b29cef990..926fcaf6f4b 100644
--- a/cpp/examples/minimal_build/system_dependency.dockerfile
+++ b/cpp/examples/minimal_build/system_dependency.dockerfile
@@ -37,6 +37,7 @@ RUN apt-get update -y -q && \
       libthrift-dev \
       libutf8proc-dev \
       libzstd-dev \
+      pkg-config \
       protobuf-compiler \
       rapidjson-dev \
       zlib1g-dev && \
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index df72dcc5b6b..f13e5b1ef75 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -119,6 +119,22 @@ function(ADD_ARROW_BENCHMARK REL_TEST_NAME)
                 ${ARG_UNPARSED_ARGUMENTS})
 endfunction()
 
+macro(append_avx2_src SRC)
+  if(ARROW_HAVE_RUNTIME_AVX2)
+    list(APPEND ARROW_SRCS ${SRC})
+    set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
+    set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX2_FLAG})
+  endif()
+endmacro()
+
+macro(append_avx512_src SRC)
+  if(ARROW_HAVE_RUNTIME_AVX512)
+    list(APPEND ARROW_SRCS ${SRC})
+    set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
+    set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX512_FLAG})
+  endif()
+endmacro()
+
 set(ARROW_SRCS
     array/array_base.cc
     array/array_binary.cc
@@ -172,6 +188,7 @@ set(ARROW_SRCS
     io/interfaces.cc
     io/memory.cc
     io/slow.cc
+    io/stdio.cc
     io/transform.cc
     util/basic_decimal.cc
     util/bit_block_counter.cc
@@ -201,6 +218,7 @@ set(ARROW_SRCS
     util/thread_pool.cc
     util/time.cc
     util/trie.cc
+    util/unreachable.cc
     util/uri.cc
     util/utf8.cc
     util/value_parsing.cc
@@ -215,18 +233,11 @@ set(ARROW_SRCS
     vendored/double-conversion/diy-fp.cc
     vendored/double-conversion/strtod.cc)
 
-if(ARROW_HAVE_RUNTIME_AVX2)
-  list(APPEND ARROW_SRCS util/bpacking_avx2.cc)
-  set_source_files_properties(util/bpacking_avx2.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
-  set_source_files_properties(util/bpacking_avx2.cc PROPERTIES COMPILE_FLAGS
-                              ${ARROW_AVX2_FLAG})
-endif()
-if(ARROW_HAVE_RUNTIME_AVX512)
-  list(APPEND ARROW_SRCS util/bpacking_avx512.cc)
-  set_source_files_properties(util/bpacking_avx512.cc PROPERTIES SKIP_PRECOMPILE_HEADERS
-                              ON)
-  set_source_files_properties(util/bpacking_avx512.cc PROPERTIES COMPILE_FLAGS
-                              ${ARROW_AVX512_FLAG})
+append_avx2_src(util/bpacking_avx2.cc)
+append_avx512_src(util/bpacking_avx512.cc)
+
+if(ARROW_HAVE_NEON)
+  list(APPEND ARROW_SRCS util/bpacking_neon.cc)
 endif()
 
 if(APPLE)
@@ -252,11 +263,8 @@ set(ARROW_C_SRCS
     vendored/uriparser/UriShorten.c)
 
 set_source_files_properties(vendored/datetime/tz.cpp
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+                            PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                       SKIP_UNITY_BUILD_INCLUSION ON)
 
 # Disable DLL exports in vendored uriparser library
 add_definitions(-DURI_STATIC_BUILD)
@@ -317,16 +325,12 @@ endif()
 if(_allocator_dependencies)
   if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
     set_source_files_properties(memory_pool.cc PROPERTIES OBJECT_DEPENDS
-                                "${_allocator_dependencies}")
+                                                          "${_allocator_dependencies}")
   else()
     add_dependencies(arrow_dependencies ${_allocator_dependencies})
   endif()
-  set_source_files_properties(memory_pool.cc
-                              PROPERTIES
-                              SKIP_PRECOMPILE_HEADERS
-                              ON
-                              SKIP_UNITY_BUILD_INCLUSION
-                              ON)
+  set_source_files_properties(memory_pool.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                        SKIP_UNITY_BUILD_INCLUSION ON)
 endif()
 
 unset(_allocator_dependencies)
@@ -342,14 +346,15 @@ endif()
 #
 
 if(ARROW_CSV)
-  list(APPEND ARROW_SRCS
-              csv/converter.cc
-              csv/chunker.cc
-              csv/column_builder.cc
-              csv/column_decoder.cc
-              csv/options.cc
-              csv/parser.cc
-              csv/reader.cc)
+  list(APPEND
+       ARROW_SRCS
+       csv/converter.cc
+       csv/chunker.cc
+       csv/column_builder.cc
+       csv/column_decoder.cc
+       csv/options.cc
+       csv/parser.cc
+       csv/reader.cc)
   if(ARROW_COMPUTE)
     list(APPEND ARROW_SRCS csv/writer.cc)
   endif()
@@ -358,56 +363,65 @@ if(ARROW_CSV)
 endif()
 
 if(ARROW_COMPUTE)
-  list(APPEND ARROW_SRCS
-              compute/api_aggregate.cc
-              compute/api_scalar.cc
-              compute/api_vector.cc
-              compute/cast.cc
-              compute/exec.cc
-              compute/function.cc
-              compute/kernel.cc
-              compute/registry.cc
-              compute/kernels/aggregate_basic.cc
-              compute/kernels/aggregate_mode.cc
-              compute/kernels/aggregate_quantile.cc
-              compute/kernels/aggregate_tdigest.cc
-              compute/kernels/aggregate_var_std.cc
-              compute/kernels/codegen_internal.cc
-              compute/kernels/hash_aggregate.cc
-              compute/kernels/scalar_arithmetic.cc
-              compute/kernels/scalar_boolean.cc
-              compute/kernels/scalar_cast_boolean.cc
-              compute/kernels/scalar_cast_internal.cc
-              compute/kernels/scalar_cast_nested.cc
-              compute/kernels/scalar_cast_numeric.cc
-              compute/kernels/scalar_cast_string.cc
-              compute/kernels/scalar_cast_temporal.cc
-              compute/kernels/scalar_compare.cc
-              compute/kernels/scalar_nested.cc
-              compute/kernels/scalar_set_lookup.cc
-              compute/kernels/scalar_string.cc
-              compute/kernels/scalar_validity.cc
-              compute/kernels/scalar_fill_null.cc
-              compute/kernels/util_internal.cc
-              compute/kernels/vector_hash.cc
-              compute/kernels/vector_nested.cc
-              compute/kernels/vector_selection.cc
-              compute/kernels/vector_sort.cc)
-
-  if(ARROW_HAVE_RUNTIME_AVX2)
-    list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx2.cc)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES
-                                SKIP_PRECOMPILE_HEADERS ON)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES
-                                COMPILE_FLAGS ${ARROW_AVX2_FLAG})
-  endif()
-  if(ARROW_HAVE_RUNTIME_AVX512)
-    list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx512.cc)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
-                                SKIP_PRECOMPILE_HEADERS ON)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
-                                COMPILE_FLAGS ${ARROW_AVX512_FLAG})
-  endif()
+  list(APPEND
+       ARROW_SRCS
+       compute/api_aggregate.cc
+       compute/api_scalar.cc
+       compute/api_vector.cc
+       compute/cast.cc
+       compute/exec.cc
+       compute/exec/exec_plan.cc
+       compute/exec/expression.cc
+       compute/function.cc
+       compute/function_internal.cc
+       compute/kernel.cc
+       compute/registry.cc
+       compute/kernels/aggregate_basic.cc
+       compute/kernels/aggregate_mode.cc
+       compute/kernels/aggregate_quantile.cc
+       compute/kernels/aggregate_tdigest.cc
+       compute/kernels/aggregate_var_std.cc
+       compute/kernels/codegen_internal.cc
+       compute/kernels/hash_aggregate.cc
+       compute/kernels/scalar_arithmetic.cc
+       compute/kernels/scalar_boolean.cc
+       compute/kernels/scalar_cast_boolean.cc
+       compute/kernels/scalar_cast_dictionary.cc
+       compute/kernels/scalar_cast_internal.cc
+       compute/kernels/scalar_cast_nested.cc
+       compute/kernels/scalar_cast_numeric.cc
+       compute/kernels/scalar_cast_string.cc
+       compute/kernels/scalar_cast_temporal.cc
+       compute/kernels/scalar_compare.cc
+       compute/kernels/scalar_nested.cc
+       compute/kernels/scalar_set_lookup.cc
+       compute/kernels/scalar_string.cc
+       compute/kernels/scalar_temporal.cc
+       compute/kernels/scalar_validity.cc
+       compute/kernels/scalar_fill_null.cc
+       compute/kernels/scalar_if_else.cc
+       compute/kernels/util_internal.cc
+       compute/kernels/vector_hash.cc
+       compute/kernels/vector_nested.cc
+       compute/kernels/vector_replace.cc
+       compute/kernels/vector_selection.cc
+       compute/kernels/vector_sort.cc
+       compute/exec/key_hash.cc
+       compute/exec/key_map.cc
+       compute/exec/key_compare.cc
+       compute/exec/key_encode.cc
+       compute/exec/util.cc)
+
+  append_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
+  append_avx512_src(compute/kernels/aggregate_basic_avx512.cc)
+
+  append_avx2_src(compute/exec/key_hash_avx2.cc)
+  append_avx2_src(compute/exec/key_map_avx2.cc)
+  append_avx2_src(compute/exec/key_compare_avx2.cc)
+  append_avx2_src(compute/exec/key_encode_avx2.cc)
+  append_avx2_src(compute/exec/util_avx2.cc)
+
+  list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc)
 endif()
 
 if(ARROW_FILESYSTEM)
@@ -415,12 +429,13 @@ if(ARROW_FILESYSTEM)
     add_definitions(-DARROW_HDFS)
   endif()
 
-  list(APPEND ARROW_SRCS
-              filesystem/filesystem.cc
-              filesystem/localfs.cc
-              filesystem/mockfs.cc
-              filesystem/path_util.cc
-              filesystem/util_internal.cc)
+  list(APPEND
+       ARROW_SRCS
+       filesystem/filesystem.cc
+       filesystem/localfs.cc
+       filesystem/mockfs.cc
+       filesystem/path_util.cc
+       filesystem/util_internal.cc)
 
   if(ARROW_HDFS)
     list(APPEND ARROW_SRCS filesystem/hdfs.cc)
@@ -428,25 +443,23 @@ if(ARROW_FILESYSTEM)
   if(ARROW_S3)
     list(APPEND ARROW_SRCS filesystem/s3fs.cc)
     set_source_files_properties(filesystem/s3fs.cc
-                                PROPERTIES
-                                SKIP_PRECOMPILE_HEADERS
-                                ON
-                                SKIP_UNITY_BUILD_INCLUSION
-                                ON)
+                                PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                           SKIP_UNITY_BUILD_INCLUSION ON)
   endif()
 
   list(APPEND ARROW_TESTING_SRCS filesystem/test_util.cc)
 endif()
 
 if(ARROW_IPC)
-  list(APPEND ARROW_SRCS
-              ipc/dictionary.cc
-              ipc/feather.cc
-              ipc/message.cc
-              ipc/metadata_internal.cc
-              ipc/options.cc
-              ipc/reader.cc
-              ipc/writer.cc)
+  list(APPEND
+       ARROW_SRCS
+       ipc/dictionary.cc
+       ipc/feather.cc
+       ipc/message.cc
+       ipc/metadata_internal.cc
+       ipc/options.cc
+       ipc/reader.cc
+       ipc/writer.cc)
 
   if(ARROW_JSON)
     list(APPEND ARROW_SRCS ipc/json_simple.cc)
@@ -454,25 +467,23 @@ if(ARROW_IPC)
 endif()
 
 if(ARROW_JSON)
-  list(APPEND ARROW_SRCS
-              json/options.cc
-              json/chunked_builder.cc
-              json/chunker.cc
-              json/converter.cc
-              json/object_parser.cc
-              json/object_writer.cc
-              json/parser.cc
-              json/reader.cc)
+  list(APPEND
+       ARROW_SRCS
+       json/options.cc
+       json/chunked_builder.cc
+       json/chunker.cc
+       json/converter.cc
+       json/object_parser.cc
+       json/object_writer.cc
+       json/parser.cc
+       json/reader.cc)
 endif()
 
 if(ARROW_ORC)
   list(APPEND ARROW_SRCS adapters/orc/adapter.cc adapters/orc/adapter_util.cc)
 endif()
 
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(ARROW_VERSION_SCRIPT_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
   set(ARROW_SHARED_LINK_FLAGS ${ARROW_VERSION_SCRIPT_FLAGS})
@@ -480,6 +491,21 @@ endif()
 
 set(ARROW_ALL_SRCS ${ARROW_SRCS} ${ARROW_C_SRCS})
 
+if(ARROW_BUILD_STATIC AND ARROW_BUNDLED_STATIC_LIBS)
+  set(ARROW_BUILD_BUNDLED_DEPENDENCIES TRUE)
+else()
+  set(ARROW_BUILD_BUNDLED_DEPENDENCIES FALSE)
+endif()
+
+if(ARROW_BUILD_BUNDLED_DEPENDENCIES)
+  string(APPEND ARROW_PC_LIBS_PRIVATE " -larrow_bundled_dependencies")
+endif()
+# Need -latomic on Raspbian.
+# See also: https://issues.apache.org/jira/browse/ARROW-12860
+if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
+  string(APPEND ARROW_PC_LIBS_PRIVATE " -latomic")
+endif()
+
 add_arrow_lib(arrow
               CMAKE_PACKAGE_NAME
               Arrow
@@ -512,18 +538,21 @@ if(ARROW_BUILD_STATIC AND WIN32)
   target_compile_definitions(arrow_static PUBLIC ARROW_STATIC)
 endif()
 
+foreach(LIB_TARGET ${ARROW_LIBRARIES})
+  target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING)
+endforeach()
+
 if(ARROW_WITH_BACKTRACE)
   find_package(Backtrace)
 
   foreach(LIB_TARGET ${ARROW_LIBRARIES})
-    target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING)
     if(Backtrace_FOUND AND ARROW_WITH_BACKTRACE)
       target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_WITH_BACKTRACE)
     endif()
   endforeach()
 endif()
 
-if(ARROW_BUILD_STATIC AND ARROW_BUNDLED_STATIC_LIBS)
+if(ARROW_BUILD_BUNDLED_DEPENDENCIES)
   arrow_car(_FIRST_LIB ${ARROW_BUNDLED_STATIC_LIBS})
   arrow_cdr(_OTHER_LIBS ${ARROW_BUNDLED_STATIC_LIBS})
   create_merged_static_lib(arrow_bundled_dependencies
@@ -610,12 +639,8 @@ add_arrow_test(misc_test
 
 add_arrow_test(public_api_test)
 
-set_source_files_properties(public_api_test.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                          SKIP_UNITY_BUILD_INCLUSION ON)
 
 add_arrow_test(scalar_test)
 add_arrow_test(type_test)
diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt
index 516196c2eef..ca901b07dfd 100644
--- a/cpp/src/arrow/adapters/orc/CMakeLists.txt
+++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt
@@ -53,9 +53,5 @@ add_arrow_test(adapter_test
                STATIC_LINK_LIBS
                ${ORC_STATIC_TEST_LINK_LIBS})
 
-set_source_files_properties(adapter_test.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(adapter_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                       SKIP_UNITY_BUILD_INCLUSION ON)
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 2c61f8995de..2f74b40e40d 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -16,10 +16,10 @@
 // under the License.
 
 #include "arrow/adapters/orc/adapter.h"
-#include "arrow/adapters/orc/adapter_util.h"
 
 #include <algorithm>
 #include <cstdint>
+#include <functional>
 #include <list>
 #include <memory>
 #include <sstream>
@@ -27,6 +27,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/adapters/orc/adapter_util.h"
 #include "arrow/buffer.h"
 #include "arrow/builder.h"
 #include "arrow/io/interfaces.h"
@@ -44,20 +45,11 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/range.h"
 #include "arrow/util/visibility.h"
-
 #include "orc/Exceptions.hh"
-#include "orc/OrcFile.hh"
 
 // alias to not interfere with nested orc namespace
 namespace liborc = orc;
 
-namespace arrow {
-
-using internal::checked_cast;
-
-namespace adapters {
-namespace orc {
-
 #define ORC_THROW_NOT_OK(s)                   \
   do {                                        \
     Status _s = (s);                          \
@@ -77,6 +69,35 @@ namespace orc {
   ORC_ASSIGN_OR_THROW_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
                            lhs, rexpr);
 
+#define ORC_BEGIN_CATCH_NOT_OK try {
+#define ORC_END_CATCH_NOT_OK                   \
+  }                                            \
+  catch (const liborc::ParseError& e) {        \
+    return Status::IOError(e.what());          \
+  }                                            \
+  catch (const liborc::InvalidArgument& e) {   \
+    return Status::Invalid(e.what());          \
+  }                                            \
+  catch (const liborc::NotImplementedYet& e) { \
+    return Status::NotImplemented(e.what());   \
+  }
+
+#define ORC_CATCH_NOT_OK(_s)  \
+  ORC_BEGIN_CATCH_NOT_OK(_s); \
+  ORC_END_CATCH_NOT_OK
+
+namespace arrow {
+namespace adapters {
+namespace orc {
+
+namespace {
+
+// The following are required by ORC to be uint64_t
+constexpr uint64_t kOrcWriterBatchSize = 128 * 1024;
+constexpr uint64_t kOrcNaturalWriteSize = 128 * 1024;
+
+using internal::checked_cast;
+
 class ArrowInputFile : public liborc::InputStream {
  public:
   explicit ArrowInputFile(const std::shared_ptr<io::RandomAccessFile>& file)
@@ -129,11 +150,7 @@ class OrcStripeReader : public RecordBatchReader {
 
   Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
     std::unique_ptr<liborc::ColumnVectorBatch> batch;
-    try {
-      batch = row_reader_->createRowBatch(batch_size_);
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+    ORC_CATCH_NOT_OK(batch = row_reader_->createRowBatch(batch_size_));
 
     const liborc::Type& type = row_reader_->getSelectedType();
     if (!row_reader_->next(*batch)) {
@@ -163,6 +180,8 @@ class OrcStripeReader : public RecordBatchReader {
   int64_t batch_size_;
 };
 
+}  // namespace
+
 class ORCFileReader::Impl {
  public:
   Impl() {}
@@ -172,11 +191,7 @@ class ORCFileReader::Impl {
     std::unique_ptr<ArrowInputFile> io_wrapper(new ArrowInputFile(file));
     liborc::ReaderOptions options;
     std::unique_ptr<liborc::Reader> liborc_reader;
-    try {
-      liborc_reader = createReader(std::move(io_wrapper), options);
-    } catch (const liborc::ParseError& e) {
-      return Status::IOError(e.what());
-    }
+    ORC_CATCH_NOT_OK(liborc_reader = createReader(std::move(io_wrapper), options));
     pool_ = pool;
     reader_ = std::move(liborc_reader);
     current_row_ = 0;
@@ -209,15 +224,20 @@ class ORCFileReader::Impl {
 
   Status ReadSchema(const liborc::RowReaderOptions& opts, std::shared_ptr<Schema>* out) {
     std::unique_ptr<liborc::RowReader> row_reader;
-    try {
-      row_reader = reader_->createRowReader(opts);
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+    ORC_CATCH_NOT_OK(row_reader = reader_->createRowReader(opts));
     const liborc::Type& type = row_reader->getSelectedType();
     return GetArrowSchema(type, out);
   }
 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() {
+    const std::list<std::string> keys = reader_->getMetadataKeys();
+    auto metadata = std::make_shared<KeyValueMetadata>();
+    for (const auto& key : keys) {
+      metadata->Append(key, reader_->getMetadataValue(key));
+    }
+    return std::const_pointer_cast<const KeyValueMetadata>(metadata);
+  }
+
   Status GetArrowSchema(const liborc::Type& type, std::shared_ptr<Schema>* out) {
     if (type.getKind() != liborc::STRUCT) {
       return Status::NotImplemented(
@@ -232,16 +252,8 @@ class ORCFileReader::Impl {
       std::string name = type.getFieldName(child);
       fields.push_back(field(name, elemtype));
     }
-    std::list<std::string> keys = reader_->getMetadataKeys();
-    std::shared_ptr<KeyValueMetadata> metadata;
-    if (!keys.empty()) {
-      metadata = std::make_shared<KeyValueMetadata>();
-      for (auto it = keys.begin(); it != keys.end(); ++it) {
-        metadata->Append(*it, reader_->getMetadataValue(*it));
-      }
-    }
-
-    *out = std::make_shared<Schema>(fields, metadata);
+    ARROW_ASSIGN_OR_RAISE(auto metadata, ReadMetadata());
+    *out = std::make_shared<Schema>(std::move(fields), std::move(metadata));
     return Status::OK();
   }
 
@@ -342,12 +354,12 @@ class ORCFileReader::Impl {
                    std::shared_ptr<RecordBatch>* out) {
     std::unique_ptr<liborc::RowReader> row_reader;
     std::unique_ptr<liborc::ColumnVectorBatch> batch;
-    try {
-      row_reader = reader_->createRowReader(opts);
-      batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch));
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+
+    ORC_BEGIN_CATCH_NOT_OK
+    row_reader = reader_->createRowReader(opts);
+    batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch));
+    ORC_END_CATCH_NOT_OK
+
     std::unique_ptr<RecordBatchBuilder> builder;
     RETURN_NOT_OK(RecordBatchBuilder::Make(schema, pool_, nrows, &builder));
 
@@ -389,13 +401,12 @@ class ORCFileReader::Impl {
     std::shared_ptr<Schema> schema;
     RETURN_NOT_OK(ReadSchema(opts, &schema));
     std::unique_ptr<liborc::RowReader> row_reader;
-    try {
-      row_reader = reader_->createRowReader(opts);
-      row_reader->seekToRow(current_row_);
-      current_row_ = stripe_info.first_row_of_stripe + stripe_info.num_rows;
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+
+    ORC_BEGIN_CATCH_NOT_OK
+    row_reader = reader_->createRowReader(opts);
+    row_reader->seekToRow(current_row_);
+    current_row_ = stripe_info.first_row_of_stripe + stripe_info.num_rows;
+    ORC_END_CATCH_NOT_OK
 
     *out = std::shared_ptr<RecordBatchReader>(
         new OrcStripeReader(std::move(row_reader), schema, batch_size, pool_));
@@ -425,6 +436,10 @@ Status ORCFileReader::Open(const std::shared_ptr<io::RandomAccessFile>& file,
   return Status::OK();
 }
 
+Result<std::shared_ptr<const KeyValueMetadata>> ORCFileReader::ReadMetadata() {
+  return impl_->ReadMetadata();
+}
+
 Status ORCFileReader::ReadSchema(std::shared_ptr<Schema>* out) {
   return impl_->ReadSchema(out);
 }
@@ -473,6 +488,108 @@ int64_t ORCFileReader::NumberOfStripes() { return impl_->NumberOfStripes(); }
 
 int64_t ORCFileReader::NumberOfRows() { return impl_->NumberOfRows(); }
 
+namespace {
+
+class ArrowOutputStream : public liborc::OutputStream {
+ public:
+  explicit ArrowOutputStream(arrow::io::OutputStream& output_stream)
+      : output_stream_(output_stream), length_(0) {}
+
+  uint64_t getLength() const override { return length_; }
+
+  uint64_t getNaturalWriteSize() const override { return kOrcNaturalWriteSize; }
+
+  void write(const void* buf, size_t length) override {
+    ORC_THROW_NOT_OK(output_stream_.Write(buf, static_cast<int64_t>(length)));
+    length_ += static_cast<int64_t>(length);
+  }
+
+  // Mandatory due to us implementing an ORC virtual class.
+  // Used by ORC for error messages, not used by Arrow
+  const std::string& getName() const override {
+    static const std::string filename("ArrowOutputFile");
+    return filename;
+  }
+
+  void close() override {
+    if (!output_stream_.closed()) {
+      ORC_THROW_NOT_OK(output_stream_.Close());
+    }
+  }
+
+  void set_length(int64_t length) { length_ = length; }
+
+ private:
+  arrow::io::OutputStream& output_stream_;
+  int64_t length_;
+};
+
+}  // namespace
+
+class ORCFileWriter::Impl {
+ public:
+  Status Open(arrow::io::OutputStream* output_stream) {
+    out_stream_ = std::unique_ptr<liborc::OutputStream>(
+        checked_cast<liborc::OutputStream*>(new ArrowOutputStream(*output_stream)));
+    return Status::OK();
+  }
+
+  Status Write(const Table& table) {
+    std::unique_ptr<liborc::WriterOptions> orc_options =
+        std::unique_ptr<liborc::WriterOptions>(new liborc::WriterOptions());
+    ARROW_ASSIGN_OR_RAISE(auto orc_schema, GetOrcType(*(table.schema())));
+    ORC_CATCH_NOT_OK(
+        writer_ = liborc::createWriter(*orc_schema, out_stream_.get(), *orc_options))
+
+    int64_t num_rows = table.num_rows();
+    const int num_cols_ = table.num_columns();
+    std::vector<int64_t> arrow_index_offset(num_cols_, 0);
+    std::vector<int> arrow_chunk_offset(num_cols_, 0);
+    std::unique_ptr<liborc::ColumnVectorBatch> batch =
+        writer_->createRowBatch(kOrcWriterBatchSize);
+    liborc::StructVectorBatch* root =
+        internal::checked_cast<liborc::StructVectorBatch*>(batch.get());
+    while (num_rows > 0) {
+      for (int i = 0; i < num_cols_; i++) {
+        RETURN_NOT_OK(adapters::orc::WriteBatch(
+            *(table.column(i)), kOrcWriterBatchSize, &(arrow_chunk_offset[i]),
+            &(arrow_index_offset[i]), (root->fields)[i]));
+      }
+      root->numElements = (root->fields)[0]->numElements;
+      writer_->add(*batch);
+      batch->clear();
+      num_rows -= kOrcWriterBatchSize;
+    }
+    return Status::OK();
+  }
+
+  Status Close() {
+    writer_->close();
+    return Status::OK();
+  }
+
+ private:
+  std::unique_ptr<liborc::Writer> writer_;
+  std::unique_ptr<liborc::OutputStream> out_stream_;
+};
+
+ORCFileWriter::~ORCFileWriter() {}
+
+ORCFileWriter::ORCFileWriter() { impl_.reset(new ORCFileWriter::Impl()); }
+
+Result<std::unique_ptr<ORCFileWriter>> ORCFileWriter::Open(
+    io::OutputStream* output_stream) {
+  std::unique_ptr<ORCFileWriter> result =
+      std::unique_ptr<ORCFileWriter>(new ORCFileWriter());
+  Status status = result->impl_->Open(output_stream);
+  RETURN_NOT_OK(status);
+  return std::move(result);
+}
+
+Status ORCFileWriter::Write(const Table& table) { return impl_->Write(table); }
+
+Status ORCFileWriter::Close() { return impl_->Close(); }
+
 }  // namespace orc
 }  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter.h b/cpp/src/arrow/adapters/orc/adapter.h
index 9bf18674af4..012c1701980 100644
--- a/cpp/src/arrow/adapters/orc/adapter.h
+++ b/cpp/src/arrow/adapters/orc/adapter.h
@@ -26,12 +26,11 @@
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
-
 namespace adapters {
-
 namespace orc {
 
 /// \class ORCFileReader
@@ -49,6 +48,11 @@ class ARROW_EXPORT ORCFileReader {
   static Status Open(const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool,
                      std::unique_ptr<ORCFileReader>* reader);
 
+  /// \brief Return the metadata read from the ORC file
+  ///
+  /// \return A KeyValueMetadata object containing the ORC metadata
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
+
   /// \brief Return the schema read from the ORC file
   ///
   /// \param[out] out the returned Schema object
@@ -142,8 +146,36 @@ class ARROW_EXPORT ORCFileReader {
   ORCFileReader();
 };
 
-}  // namespace orc
+/// \class ORCFileWriter
+/// \brief Write an Arrow Table or RecordBatch to an ORC file.
+class ARROW_EXPORT ORCFileWriter {
+ public:
+  ~ORCFileWriter();
+  /// \brief Creates a new ORC writer.
+  ///
+  /// \param[in] output_stream a pointer to the io::OutputStream to write into
+  /// \return the returned writer object
+  static Result<std::unique_ptr<ORCFileWriter>> Open(io::OutputStream* output_stream);
 
-}  // namespace adapters
+  /// \brief Write a table
+  ///
+  /// \param[in] table the Arrow table from which data is extracted
+  /// \return Status
+  Status Write(const Table& table);
+
+  /// \brief Close an ORC writer (orc::Writer)
+  ///
+  /// \return Status
+  Status Close();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
 
+ private:
+  ORCFileWriter();
+};
+
+}  // namespace orc
+}  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index 09e47fb7626..9f7fb561362 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -15,20 +15,47 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <string>
-
 #include "arrow/adapters/orc/adapter.h"
-#include "arrow/array.h"
-#include "arrow/io/api.h"
 
 #include <gtest/gtest.h>
+
 #include <orc/OrcFile.hh>
+#include <string>
+
+#include "arrow/adapters/orc/adapter_util.h"
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/chunked_array.h"
+#include "arrow/compute/cast.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/type.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace liborc = orc;
 
 namespace arrow {
 
-constexpr int DEFAULT_MEM_STREAM_SIZE = 100 * 1024 * 1024;
+using internal::checked_pointer_cast;
+
+constexpr int kDefaultSmallMemStreamSize = 16384 * 5;  // 80KB
+constexpr int kDefaultMemStreamSize = 10 * 1024 * 1024;
+constexpr int64_t kNanoMax = std::numeric_limits<int64_t>::max();
+constexpr int64_t kNanoMin = std::numeric_limits<int64_t>::lowest();
+const int64_t kMicroMax = std::floor(kNanoMax / 1000);
+const int64_t kMicroMin = std::ceil(kNanoMin / 1000);
+const int64_t kMilliMax = std::floor(kMicroMax / 1000);
+const int64_t kMilliMin = std::ceil(kMicroMin / 1000);
+const int64_t kSecondMax = std::floor(kMilliMax / 1000);
+const int64_t kSecondMin = std::ceil(kMilliMin / 1000);
+
+static constexpr random::SeedType kRandomSeed = 0x0ff1ce;
 
 class MemoryOutputStream : public liborc::OutputStream {
  public:
@@ -58,6 +85,189 @@ class MemoryOutputStream : public liborc::OutputStream {
   uint64_t length_, natural_write_size_;
 };
 
+std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
+                                                      int64_t length) {
+  BufferBuilder builder;
+  int32_t offsets[length];
+  ARROW_EXPECT_OK(builder.Resize(4 * length));
+  for (int32_t i = 0; i < length; i++) {
+    offsets[i] = fixed_length * i;
+  }
+  ARROW_EXPECT_OK(builder.Append(offsets, 4 * length));
+  std::shared_ptr<Buffer> buffer;
+  ARROW_EXPECT_OK(builder.Finish(&buffer));
+  return buffer;
+}
+
+std::shared_ptr<Array> CastFixedSizeBinaryArrayToBinaryArray(
+    std::shared_ptr<Array> array) {
+  auto fixed_size_binary_array = checked_pointer_cast<FixedSizeBinaryArray>(array);
+  std::shared_ptr<Buffer> value_offsets = GenerateFixedDifferenceBuffer(
+      fixed_size_binary_array->byte_width(), array->length() + 1);
+  return std::make_shared<BinaryArray>(array->length(), value_offsets,
+                                       array->data()->buffers[1],
+                                       array->data()->buffers[0]);
+}
+
+template <typename TargetArrayType>
+std::shared_ptr<Array> CastInt64ArrayToTemporalArray(
+    const std::shared_ptr<DataType>& type, std::shared_ptr<Array> array) {
+  std::shared_ptr<ArrayData> new_array_data =
+      ArrayData::Make(type, array->length(), array->data()->buffers);
+  return std::make_shared<TargetArrayType>(new_array_data);
+}
+
+Result<std::shared_ptr<Array>> GenerateRandomDate64Array(int64_t size,
+                                                         double null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  return CastInt64ArrayToTemporalArray<Date64Array>(
+      date64(), rand.Int64(size, kMilliMin, kMilliMax, null_probability));
+}
+
+Result<std::shared_ptr<Array>> GenerateRandomTimestampArray(int64_t size,
+                                                            arrow::TimeUnit::type type,
+                                                            double null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  switch (type) {
+    case arrow::TimeUnit::type::SECOND: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::SECOND),
+          rand.Int64(size, kSecondMin, kSecondMax, null_probability));
+    }
+    case arrow::TimeUnit::type::MILLI: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::MILLI),
+          rand.Int64(size, kMilliMin, kMilliMax, null_probability));
+    }
+    case arrow::TimeUnit::type::MICRO: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::MICRO),
+          rand.Int64(size, kMicroMin, kMicroMax, null_probability));
+    }
+    case arrow::TimeUnit::type::NANO: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::NANO),
+          rand.Int64(size, kNanoMin, kNanoMax, null_probability));
+    }
+    default: {
+      return arrow::Status::TypeError("Unknown or unsupported Arrow TimeUnit: ", type);
+    }
+  }
+}
+
+/// \brief Construct a random weak composition of a nonnegative integer
+/// i.e. a way of writing it as the sum of a sequence of n non-negative
+/// integers.
+///
+/// \param[in] n the number of integers in the weak composition
+/// \param[in] sum the integer of which a random weak composition is generated
+/// \param[out] out The generated weak composition
+template <typename T, typename U>
+void RandWeakComposition(int64_t n, T sum, std::vector<U>* out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  out->resize(n, static_cast<T>(0));
+  T remaining_sum = sum;
+  std::generate(out->begin(), out->end() - 1, [&gen, &remaining_sum] {
+    std::uniform_int_distribution<T> d(static_cast<T>(0), remaining_sum);
+    auto res = d(gen);
+    remaining_sum -= res;
+    return static_cast<U>(res);
+  });
+  (*out)[n - 1] += remaining_sum;
+  std::random_shuffle(out->begin(), out->end());
+}
+
+std::shared_ptr<ChunkedArray> GenerateRandomChunkedArray(
+    const std::shared_ptr<DataType>& data_type, int64_t size, int64_t min_num_chunks,
+    int64_t max_num_chunks, double null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  std::vector<int64_t> num_chunks(1, 0);
+  std::vector<int64_t> current_size_chunks;
+  arrow::randint<int64_t, int64_t>(1, min_num_chunks, max_num_chunks, &num_chunks);
+  int64_t current_num_chunks = num_chunks[0];
+  ArrayVector arrays(current_num_chunks, nullptr);
+  arrow::RandWeakComposition(current_num_chunks, size, &current_size_chunks);
+  for (int j = 0; j < current_num_chunks; j++) {
+    switch (data_type->id()) {
+      case arrow::Type::type::DATE64: {
+        EXPECT_OK_AND_ASSIGN(arrays[j], GenerateRandomDate64Array(current_size_chunks[j],
+                                                                  null_probability));
+        break;
+      }
+      case arrow::Type::type::TIMESTAMP: {
+        EXPECT_OK_AND_ASSIGN(
+            arrays[j],
+            GenerateRandomTimestampArray(
+                current_size_chunks[j],
+                arrow::internal::checked_pointer_cast<arrow::TimestampType>(data_type)
+                    ->unit(),
+                null_probability));
+        break;
+      }
+      default:
+        arrays[j] = rand.ArrayOf(data_type, current_size_chunks[j], null_probability);
+    }
+  }
+  return std::make_shared<ChunkedArray>(arrays);
+}
+
+std::shared_ptr<Table> GenerateRandomTable(const std::shared_ptr<Schema>& schema,
+                                           int64_t size, int64_t min_num_chunks,
+                                           int64_t max_num_chunks,
+                                           double null_probability) {
+  int num_cols = schema->num_fields();
+  ChunkedArrayVector cv;
+  for (int col = 0; col < num_cols; col++) {
+    cv.push_back(GenerateRandomChunkedArray(schema->field(col)->type(), size,
+                                            min_num_chunks, max_num_chunks,
+                                            null_probability));
+  }
+  return Table::Make(schema, cv);
+}
+
+void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
+                               const std::shared_ptr<Table>& expected_output_table,
+                               const int64_t max_size = kDefaultSmallMemStreamSize) {
+  EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
+                       io::BufferOutputStream::Create(max_size));
+  EXPECT_OK_AND_ASSIGN(auto writer,
+                       adapters::orc::ORCFileWriter::Open(buffer_output_stream.get()));
+  ARROW_EXPECT_OK(writer->Write(*input_table));
+  ARROW_EXPECT_OK(writer->Close());
+  EXPECT_OK_AND_ASSIGN(auto buffer, buffer_output_stream->Finish());
+  std::shared_ptr<io::RandomAccessFile> in_stream(new io::BufferReader(buffer));
+  std::unique_ptr<adapters::orc::ORCFileReader> reader;
+  ARROW_EXPECT_OK(
+      adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), &reader));
+  std::shared_ptr<Table> actual_output_table;
+  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
+  AssertTablesEqual(*expected_output_table, *actual_output_table, false, false);
+}
+void AssertArrayWriteReadEqual(const std::shared_ptr<Array>& input_array,
+                               const std::shared_ptr<Array>& expected_output_array,
+                               const int64_t max_size = kDefaultSmallMemStreamSize) {
+  std::shared_ptr<Schema> input_schema = schema({field("col0", input_array->type())}),
+                          output_schema =
+                              schema({field("col0", expected_output_array->type())});
+  auto input_chunked_array = std::make_shared<ChunkedArray>(input_array),
+       expected_output_chunked_array =
+           std::make_shared<ChunkedArray>(expected_output_array);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, {input_chunked_array}),
+                         expected_output_table =
+                             Table::Make(output_schema, {expected_output_chunked_array});
+  AssertTableWriteReadEqual(input_table, expected_output_table, max_size);
+}
+
+void SchemaORCWriteReadTest(const std::shared_ptr<Schema>& schema, int64_t size,
+                            int64_t min_num_chunks, int64_t max_num_chunks,
+                            double null_probability,
+                            int64_t max_size = kDefaultSmallMemStreamSize) {
+  const std::shared_ptr<Table> table =
+      GenerateRandomTable(schema, size, min_num_chunks, max_num_chunks, null_probability);
+  AssertTableWriteReadEqual(table, table, max_size);
+}
+
 std::unique_ptr<liborc::Writer> CreateWriter(uint64_t stripe_size,
                                              const liborc::Type& type,
                                              liborc::OutputStream* stream) {
@@ -69,32 +279,34 @@ std::unique_ptr<liborc::Writer> CreateWriter(uint64_t stripe_size,
   return liborc::createWriter(type, stream, options);
 }
 
-TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
-  MemoryOutputStream mem_stream(DEFAULT_MEM_STREAM_SIZE);
+TEST(TestAdapterRead, ReadIntAndStringFileMultipleStripes) {
+  MemoryOutputStream mem_stream(kDefaultMemStreamSize);
   ORC_UNIQUE_PTR<liborc::Type> type(
       liborc::Type::buildTypeFromString("struct<col1:int,col2:string>"));
 
   constexpr uint64_t stripe_size = 1024;  // 1K
   constexpr uint64_t stripe_count = 10;
-  constexpr uint64_t stripe_row_count = 65535;
+  constexpr uint64_t stripe_row_count = 16384;
   constexpr uint64_t reader_batch_size = 1024;
 
   auto writer = CreateWriter(stripe_size, *type, &mem_stream);
   auto batch = writer->createRowBatch(stripe_row_count);
-  auto struct_batch = dynamic_cast<liborc::StructVectorBatch*>(batch.get());
-  auto long_batch = dynamic_cast<liborc::LongVectorBatch*>(struct_batch->fields[0]);
-  auto str_batch = dynamic_cast<liborc::StringVectorBatch*>(struct_batch->fields[1]);
+  auto struct_batch = internal::checked_cast<liborc::StructVectorBatch*>(batch.get());
+  auto long_batch =
+      internal::checked_cast<liborc::LongVectorBatch*>(struct_batch->fields[0]);
+  auto str_batch =
+      internal::checked_cast<liborc::StringVectorBatch*>(struct_batch->fields[1]);
   int64_t accumulated = 0;
 
   for (uint64_t j = 0; j < stripe_count; ++j) {
-    char data_buffer[327675];
+    std::string data_buffer(stripe_row_count * 5, '\0');
     uint64_t offset = 0;
     for (uint64_t i = 0; i < stripe_row_count; ++i) {
       std::string str_data = std::to_string(accumulated % stripe_row_count);
       long_batch->data[i] = static_cast<int64_t>(accumulated % stripe_row_count);
-      str_batch->data[i] = data_buffer + offset;
+      str_batch->data[i] = &data_buffer[offset];
       str_batch->length[i] = static_cast<int64_t>(str_data.size());
-      memcpy(data_buffer + offset, str_data.c_str(), str_data.size());
+      memcpy(&data_buffer[offset], str_data.c_str(), str_data.size());
       accumulated++;
       offset += str_data.size();
     }
@@ -115,6 +327,10 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
   ASSERT_TRUE(
       adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), &reader).ok());
 
+  EXPECT_OK_AND_ASSIGN(auto metadata, reader->ReadMetadata());
+  auto expected_metadata = std::const_pointer_cast<const KeyValueMetadata>(
+      key_value_metadata(std::vector<std::string>(), std::vector<std::string>()));
+  ASSERT_TRUE(metadata->Equals(*expected_metadata));
   ASSERT_EQ(stripe_row_count * stripe_count, reader->NumberOfRows());
   ASSERT_EQ(stripe_count, reader->NumberOfStripes());
   accumulated = 0;
@@ -124,8 +340,8 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     std::shared_ptr<RecordBatch> record_batch;
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
     while (record_batch) {
-      auto int32_array = std::dynamic_pointer_cast<Int32Array>(record_batch->column(0));
-      auto str_array = std::dynamic_pointer_cast<StringArray>(record_batch->column(1));
+      auto int32_array = checked_pointer_cast<Int32Array>(record_batch->column(0));
+      auto str_array = checked_pointer_cast<StringArray>(record_batch->column(1));
       for (int j = 0; j < record_batch->num_rows(); ++j) {
         EXPECT_EQ(accumulated % stripe_row_count, int32_array->Value(j));
         EXPECT_EQ(std::to_string(accumulated % stripe_row_count),
@@ -157,4 +373,317 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
   }
 }
+
+// WriteORC tests
+// Trivial
+
+class TestORCWriterTrivialNoConversion : public ::testing::Test {
+ public:
+  TestORCWriterTrivialNoConversion() {
+    table_schema = schema(
+        {field("bool", boolean()), field("int8", int8()), field("int16", int16()),
+         field("int32", int32()), field("int64", int64()), field("float", float32()),
+         field("double", float64()), field("decimal128nz", decimal128(25, 6)),
+         field("decimal128z", decimal128(32, 0)), field("date32", date32()),
+         field("ts3", timestamp(TimeUnit::NANO)), field("string", utf8()),
+         field("binary", binary()),
+         field("struct", struct_({field("a", utf8()), field("b", int64())})),
+         field("list", list(int32())),
+         field("lsl", list(struct_({field("lsl0", list(int32()))}))),
+         field("map", map(utf8(), utf8()))});
+  }
+
+ protected:
+  std::shared_ptr<Schema> table_schema;
+};
+TEST_F(TestORCWriterTrivialNoConversion, writeTrivialChunk) {
+  std::shared_ptr<Table> table = TableFromJSON(table_schema, {R"([])"});
+  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16);
+}
+TEST_F(TestORCWriterTrivialNoConversion, writeChunkless) {
+  std::shared_ptr<Table> table = TableFromJSON(table_schema, {});
+  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16);
+}
+class TestORCWriterTrivialWithConversion : public ::testing::Test {
+ public:
+  TestORCWriterTrivialWithConversion() {
+    input_schema = schema(
+        {field("date64", date64()), field("ts0", timestamp(TimeUnit::SECOND)),
+         field("ts1", timestamp(TimeUnit::MILLI)),
+         field("ts2", timestamp(TimeUnit::MICRO)), field("large_string", large_utf8()),
+         field("large_binary", large_binary()),
+         field("fixed_size_binary0", fixed_size_binary(0)),
+         field("fixed_size_binary", fixed_size_binary(5)),
+         field("large_list", large_list(int32())),
+         field("fixed_size_list", fixed_size_list(int32(), 3))}),
+    output_schema = schema(
+        {field("date64", timestamp(TimeUnit::NANO)),
+         field("ts0", timestamp(TimeUnit::NANO)), field("ts1", timestamp(TimeUnit::NANO)),
+         field("ts2", timestamp(TimeUnit::NANO)), field("large_string", utf8()),
+         field("large_binary", binary()), field("fixed_size_binary0", binary()),
+         field("fixed_size_binary", binary()), field("large_list", list(int32())),
+         field("fixed_size_list", list(int32()))});
+  }
+
+ protected:
+  std::shared_ptr<Schema> input_schema, output_schema;
+};
+TEST_F(TestORCWriterTrivialWithConversion, writeTrivialChunk) {
+  std::shared_ptr<Table> input_table = TableFromJSON(input_schema, {R"([])"}),
+                         expected_output_table = TableFromJSON(output_schema, {R"([])"});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize / 16);
+}
+TEST_F(TestORCWriterTrivialWithConversion, writeChunkless) {
+  std::shared_ptr<Table> input_table = TableFromJSON(input_schema, {}),
+                         expected_output_table = TableFromJSON(output_schema, {});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize / 16);
+}
+
+// General
+
+class TestORCWriterNoConversion : public ::testing::Test {
+ public:
+  TestORCWriterNoConversion() {
+    table_schema = schema(
+        {field("bool", boolean()), field("int8", int8()), field("int16", int16()),
+         field("int32", int32()), field("int64", int64()), field("float", float32()),
+         field("double", float64()), field("date32", date32()),
+         field("decimal64", decimal128(18, 4)), field("decimal64z", decimal128(18, 0)),
+         field("ts3", timestamp(TimeUnit::NANO)), field("string", utf8()),
+         field("binary", binary())});
+  }
+
+ protected:
+  std::shared_ptr<Schema> table_schema;
+};
+TEST_F(TestORCWriterNoConversion, writeNoNulls) {
+  SchemaORCWriteReadTest(table_schema, 11203, 5, 10, 0, kDefaultSmallMemStreamSize * 5);
+}
+TEST_F(TestORCWriterNoConversion, writeMixed) {
+  SchemaORCWriteReadTest(table_schema, 9405, 1, 20, 0.6, kDefaultSmallMemStreamSize * 5);
+}
+TEST_F(TestORCWriterNoConversion, writeAllNulls) {
+  SchemaORCWriteReadTest(table_schema, 4006, 1, 5, 1);
+}
+
+// Converts
+// Since Arrow has way more types than ORC type conversions are unavoidable
+class TestORCWriterWithConversion : public ::testing::Test {
+ public:
+  TestORCWriterWithConversion() {
+    input_schema = schema(
+        {field("date64", date64()), field("ts0", timestamp(TimeUnit::SECOND)),
+         field("ts1", timestamp(TimeUnit::MILLI)),
+         field("ts2", timestamp(TimeUnit::MICRO)), field("large_string", large_utf8()),
+         field("large_binary", large_binary()),
+         field("fixed_size_binary0", fixed_size_binary(0)),
+         field("fixed_size_binary", fixed_size_binary(5))}),
+    output_schema = schema(
+        {field("date64", timestamp(TimeUnit::NANO)),
+         field("ts0", timestamp(TimeUnit::NANO)), field("ts1", timestamp(TimeUnit::NANO)),
+         field("ts2", timestamp(TimeUnit::NANO)), field("large_string", utf8()),
+         field("large_binary", binary()), field("fixed_size_binary0", binary()),
+         field("fixed_size_binary", binary())});
+  }
+  void RunTest(int64_t num_rows, double null_possibility,
+               int64_t max_size = kDefaultSmallMemStreamSize) {
+    int64_t num_cols = (input_schema->fields()).size();
+    std::shared_ptr<Table> input_table =
+        GenerateRandomTable(input_schema, num_rows, 1, 1, null_possibility);
+    ArrayVector av(num_cols);
+    for (int i = 0; i < num_cols - 2; i++) {
+      EXPECT_OK_AND_ASSIGN(av[i],
+                           arrow::compute::Cast(*(input_table->column(i)->chunk(0)),
+                                                output_schema->field(i)->type()));
+    }
+    for (int i = num_cols - 2; i < num_cols; i++) {
+      av[i] = CastFixedSizeBinaryArrayToBinaryArray(input_table->column(i)->chunk(0));
+    }
+    std::shared_ptr<Table> expected_output_table = Table::Make(output_schema, av);
+    AssertTableWriteReadEqual(input_table, expected_output_table, max_size);
+  }
+
+ protected:
+  std::shared_ptr<Schema> input_schema, output_schema;
+};
+TEST_F(TestORCWriterWithConversion, writeAllNulls) { RunTest(12000, 1); }
+TEST_F(TestORCWriterWithConversion, writeNoNulls) { RunTest(10009, 0); }
+TEST_F(TestORCWriterWithConversion, writeMixed) { RunTest(8021, 0.5); }
+
+class TestORCWriterSingleArray : public ::testing::Test {
+ public:
+  TestORCWriterSingleArray() : rand(kRandomSeed) {}
+
+ protected:
+  arrow::random::RandomArrayGenerator rand;
+};
+
+// Nested types
+TEST_F(TestORCWriterSingleArray, WriteStruct) {
+  std::vector<std::shared_ptr<Field>> subfields{field("int32", boolean())};
+  const int64_t num_rows = 1234;
+  int num_subcols = subfields.size();
+  ArrayVector av0(num_subcols);
+  for (int i = 0; i < num_subcols; i++) {
+    av0[i] = rand.ArrayOf(subfields[i]->type(), num_rows, 0.4);
+  }
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 0.5);
+  std::shared_ptr<Array> array =
+      std::make_shared<StructArray>(struct_(subfields), num_rows, av0, bitmap);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteStructOfStruct) {
+  std::vector<std::shared_ptr<Field>> subsubfields{
+      field("bool", boolean()),
+      field("int8", int8()),
+      field("int16", int16()),
+      field("int32", int32()),
+      field("int64", int64()),
+      field("date32", date32()),
+      field("ts3", timestamp(TimeUnit::NANO)),
+      field("string", utf8()),
+      field("binary", binary())};
+  const int64_t num_rows = 1234;
+  int num_subsubcols = subsubfields.size();
+  ArrayVector av00(num_subsubcols), av0(1);
+  for (int i = 0; i < num_subsubcols; i++) {
+    av00[i] = rand.ArrayOf(subsubfields[i]->type(), num_rows, 0);
+  }
+  std::shared_ptr<Buffer> bitmap0 = rand.NullBitmap(num_rows, 0);
+  av0[0] = std::make_shared<StructArray>(struct_(subsubfields), num_rows, av00, bitmap0);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 0.2);
+  std::shared_ptr<Array> array = std::make_shared<StructArray>(
+      struct_({field("struct2", struct_(subsubfields))}), num_rows, av0, bitmap);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteList) {
+  const int64_t num_rows = 1234;
+  auto value_array = rand.ArrayOf(int32(), 125 * num_rows, 0);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 100);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteLargeList) {
+  const int64_t num_rows = 1234;
+  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.5);
+  auto output_offsets = rand.Offsets(num_rows + 1, 0, 5 * num_rows, 0.6, false);
+  EXPECT_OK_AND_ASSIGN(auto input_offsets,
+                       arrow::compute::Cast(*output_offsets, int64()));
+  EXPECT_OK_AND_ASSIGN(auto input_array,
+                       arrow::LargeListArray::FromArrays(*input_offsets, *value_array));
+  EXPECT_OK_AND_ASSIGN(auto output_array,
+                       arrow::ListArray::FromArrays(*output_offsets, *value_array));
+  AssertArrayWriteReadEqual(input_array, output_array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteFixedSizeList) {
+  const int64_t num_rows = 1234;
+  std::shared_ptr<Array> value_array = rand.ArrayOf(int32(), 3 * num_rows, 0.8);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 1);
+  std::shared_ptr<Buffer> buffer = GenerateFixedDifferenceBuffer(3, num_rows + 1);
+  std::shared_ptr<Array> input_array = std::make_shared<FixedSizeListArray>(
+                             fixed_size_list(int32(), 3), num_rows, value_array, bitmap),
+                         output_array = std::make_shared<ListArray>(
+                             list(int32()), num_rows, buffer, value_array, bitmap);
+  AssertArrayWriteReadEqual(input_array, output_array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfList) {
+  const int64_t num_rows = 1234;
+  auto value_value_array = rand.ArrayOf(utf8(), 4 * num_rows, 0.5);
+  std::shared_ptr<Array> value_array = rand.List(*value_value_array, 2 * num_rows, 0.7);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0.4);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfListOfList) {
+  const int64_t num_rows = 1234;
+  auto value3_array = rand.ArrayOf(int64(), 12 * num_rows, 0.1);
+  std::shared_ptr<Array> value2_array = rand.List(*value3_array, 5 * num_rows, 0);
+  std::shared_ptr<Array> value_array = rand.List(*value2_array, 2 * num_rows, 0.1);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0.1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 35);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfStruct) {
+  const int64_t num_rows = 1234, num_values = 3 * num_rows;
+  ArrayVector av00(1);
+  av00[0] = rand.ArrayOf(int32(), num_values, 0);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_values, 0.2);
+  std::shared_ptr<Array> value_array = std::make_shared<StructArray>(
+      struct_({field("a", int32())}), num_values, av00, bitmap);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 30);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteStructOfList) {
+  const int64_t num_rows = 1234;
+  ArrayVector av0(1);
+  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.2);
+  av0[0] = rand.List(*value_array, num_rows, 0);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 0.2);
+  std::shared_ptr<Array> array = std::make_shared<StructArray>(
+      struct_({field("a", list(int32()))}), num_rows, av0, bitmap);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 20);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteMap) {
+  const int64_t num_rows = 1234;
+  auto key_array = rand.ArrayOf(int32(), 20 * num_rows, 0);
+  auto item_array = rand.ArrayOf(int32(), 20 * num_rows, 1);
+  std::shared_ptr<Array> array = rand.Map(key_array, item_array, num_rows, 0.1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 50);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteStructOfMap) {
+  const int64_t num_rows = 1234, num_values = 5 * num_rows;
+  ArrayVector av0(1);
+  auto key_array = rand.ArrayOf(binary(), num_values, 0);
+  auto item_array = rand.ArrayOf(int32(), num_values, 0.5);
+  av0[0] = rand.Map(key_array, item_array, num_rows, 0.2);
+  std::shared_ptr<Array> array = std::make_shared<StructArray>(
+      struct_({field("a", map(binary(), int32()))}), num_rows, av0);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 20);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteMapOfStruct) {
+  const int64_t num_rows = 1234, num_values = 10 * num_rows;
+  std::shared_ptr<Array> key_array = rand.ArrayOf(utf8(), num_values, 0);
+  ArrayVector av00(1);
+  av00[0] = rand.ArrayOf(int32(), num_values, 0.1);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_values, 0.2);
+  std::shared_ptr<Array> item_array = std::make_shared<StructArray>(
+      struct_({field("a", int32())}), num_values, av00, bitmap);
+  std::shared_ptr<Array> array = rand.Map(key_array, item_array, num_rows, 0.1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteMapOfMap) {
+  const int64_t num_rows = 1234;
+  auto key_key_array = rand.ArrayOf(utf8(), 4 * num_rows, 0);
+  auto key_item_array = rand.ArrayOf(int32(), 4 * num_rows, 0.5);
+  std::shared_ptr<Array> key_array =
+      rand.Map(key_key_array, key_item_array, 2 * num_rows, 0);
+  auto item_key_array = rand.ArrayOf(utf8(), 4 * num_rows, 0);
+  auto item_item_array = rand.ArrayOf(int32(), 4 * num_rows, 0.2);
+  std::shared_ptr<Array> item_array =
+      rand.Map(item_key_array, item_item_array, 2 * num_rows, 0.3);
+  std::shared_ptr<Array> array = rand.Map(key_array, item_array, num_rows, 0.4);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfMap) {
+  const int64_t num_rows = 1234;
+  auto value_key_array = rand.ArrayOf(utf8(), 4 * num_rows, 0);
+  auto value_item_array = rand.ArrayOf(int32(), 4 * num_rows, 0.5);
+  std::shared_ptr<Array> value_array =
+      rand.Map(value_key_array, value_item_array, 2 * num_rows, 0.2);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0.4);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter_util.cc b/cpp/src/arrow/adapters/orc/adapter_util.cc
index 5a36e2c0100..f956a6f6217 100644
--- a/cpp/src/arrow/adapters/orc/adapter_util.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_util.cc
@@ -15,18 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/adapters/orc/adapter_util.h"
+
+#include <cmath>
 #include <string>
 #include <vector>
 
-#include "arrow/adapters/orc/adapter_util.h"
 #include "arrow/array/builder_base.h"
 #include "arrow/builder.h"
+#include "arrow/chunked_array.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
+#include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/range.h"
-
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
 #include "orc/Exceptions.hh"
+#include "orc/MemoryPool.hh"
 #include "orc/OrcFile.hh"
 
 // alias to not interfere with nested orc namespace
@@ -34,19 +41,25 @@ namespace liborc = orc;
 
 namespace arrow {
 
-namespace adapters {
+using internal::checked_cast;
 
+namespace adapters {
 namespace orc {
 
-using internal::checked_cast;
+namespace {
 
-// The number of nanoseconds in a second
+// The number of milliseconds, microseconds and nanoseconds in a second
+constexpr int64_t kOneSecondMillis = 1000LL;
+constexpr int64_t kOneMicroNanos = 1000LL;
+constexpr int64_t kOneSecondMicros = 1000000LL;
+constexpr int64_t kOneMilliNanos = 1000000LL;
 constexpr int64_t kOneSecondNanos = 1000000000LL;
 
-Status AppendStructBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                         int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+Status AppendStructBatch(const liborc::Type* type,
+                         liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                         int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<StructBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::StructVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch);
 
   const uint8_t* valid_bytes = nullptr;
   if (batch->hasNulls) {
@@ -61,10 +74,11 @@ Status AppendStructBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cb
   return Status::OK();
 }
 
-Status AppendListBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                       int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+Status AppendListBatch(const liborc::Type* type,
+                       liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                       int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<ListBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::ListVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch);
   liborc::ColumnVectorBatch* elements = batch->elements.get();
   const liborc::Type* elemtype = type->getSubtype(0);
 
@@ -83,37 +97,38 @@ Status AppendListBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbat
   return Status::OK();
 }
 
-Status AppendMapBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                      int64_t offset, int64_t length, ArrayBuilder* abuilder) {
-  auto list_builder = checked_cast<ListBuilder*>(abuilder);
-  auto struct_builder = checked_cast<StructBuilder*>(list_builder->value_builder());
-  auto batch = checked_cast<liborc::MapVectorBatch*>(cbatch);
+Status AppendMapBatch(const liborc::Type* type,
+                      liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                      int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<MapBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch);
   liborc::ColumnVectorBatch* keys = batch->keys.get();
-  liborc::ColumnVectorBatch* vals = batch->elements.get();
-  const liborc::Type* keytype = type->getSubtype(0);
-  const liborc::Type* valtype = type->getSubtype(1);
+  liborc::ColumnVectorBatch* items = batch->elements.get();
+  const liborc::Type* key_type = type->getSubtype(0);
+  const liborc::Type* item_type = type->getSubtype(1);
 
   const bool has_nulls = batch->hasNulls;
   for (int64_t i = offset; i < length + offset; i++) {
-    RETURN_NOT_OK(list_builder->Append());
-    int64_t start = batch->offsets[i];
-    int64_t list_length = batch->offsets[i + 1] - start;
-    if (list_length && (!has_nulls || batch->notNull[i])) {
-      RETURN_NOT_OK(struct_builder->AppendValues(list_length, nullptr));
-      RETURN_NOT_OK(AppendBatch(keytype, keys, start, list_length,
-                                struct_builder->field_builder(0)));
-      RETURN_NOT_OK(AppendBatch(valtype, vals, start, list_length,
-                                struct_builder->field_builder(1)));
+    if (!has_nulls || batch->notNull[i]) {
+      int64_t start = batch->offsets[i];
+      int64_t end = batch->offsets[i + 1];
+      RETURN_NOT_OK(builder->Append());
+      RETURN_NOT_OK(
+          AppendBatch(key_type, keys, start, end - start, builder->key_builder()));
+      RETURN_NOT_OK(
+          AppendBatch(item_type, items, start, end - start, builder->item_builder()));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
     }
   }
   return Status::OK();
 }
 
-template <class builder_type, class batch_type, class elem_type>
-Status AppendNumericBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
+template <class BuilderType, class BatchType, class ElemType>
+Status AppendNumericBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
                           int64_t length, ArrayBuilder* abuilder) {
-  auto builder = checked_cast<builder_type*>(abuilder);
-  auto batch = checked_cast<batch_type*>(cbatch);
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -122,16 +137,16 @@ Status AppendNumericBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   if (batch->hasNulls) {
     valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
   }
-  const elem_type* source = batch->data.data() + offset;
+  const ElemType* source = batch->data.data() + offset;
   RETURN_NOT_OK(builder->AppendValues(source, length, valid_bytes));
   return Status::OK();
 }
 
-template <class builder_type, class target_type, class batch_type, class source_type>
-Status AppendNumericBatchCast(liborc::ColumnVectorBatch* cbatch, int64_t offset,
-                              int64_t length, ArrayBuilder* abuilder) {
-  auto builder = checked_cast<builder_type*>(abuilder);
-  auto batch = checked_cast<batch_type*>(cbatch);
+template <class BuilderType, class TargetType, class BatchType, class SourceType>
+Status AppendNumericBatchCast(liborc::ColumnVectorBatch* column_vector_batch,
+                              int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -141,9 +156,9 @@ Status AppendNumericBatchCast(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   if (batch->hasNulls) {
     valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
   }
-  const source_type* source = batch->data.data() + offset;
+  const SourceType* source = batch->data.data() + offset;
   auto cast_iter = internal::MakeLazyRange(
-      [&source](int64_t index) { return static_cast<target_type>(source[index]); },
+      [&source](int64_t index) { return static_cast<TargetType>(source[index]); },
       length);
 
   RETURN_NOT_OK(builder->AppendValues(cast_iter.begin(), cast_iter.end(), valid_bytes));
@@ -151,10 +166,10 @@ Status AppendNumericBatchCast(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-Status AppendBoolBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset, int64_t length,
-                       ArrayBuilder* abuilder) {
+Status AppendBoolBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                       int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<BooleanBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::LongVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::LongVectorBatch*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -174,10 +189,10 @@ Status AppendBoolBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset, int64_
   return Status::OK();
 }
 
-Status AppendTimestampBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
-                            int64_t length, ArrayBuilder* abuilder) {
+Status AppendTimestampBatch(liborc::ColumnVectorBatch* column_vector_batch,
+                            int64_t offset, int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<TimestampBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::TimestampVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -202,11 +217,11 @@ Status AppendTimestampBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-template <class builder_type>
-Status AppendBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
+template <class BuilderType>
+Status AppendBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
                          int64_t length, ArrayBuilder* abuilder) {
-  auto builder = checked_cast<builder_type*>(abuilder);
-  auto batch = checked_cast<liborc::StringVectorBatch*>(cbatch);
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
 
   const bool has_nulls = batch->hasNulls;
   for (int64_t i = offset; i < length + offset; i++) {
@@ -220,10 +235,10 @@ Status AppendBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
-                              int64_t length, ArrayBuilder* abuilder) {
+Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch,
+                              int64_t offset, int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<FixedSizeBinaryBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::StringVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
 
   const bool has_nulls = batch->hasNulls;
   for (int64_t i = offset; i < length + offset; i++) {
@@ -236,13 +251,14 @@ Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-Status AppendDecimalBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                          int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+Status AppendDecimalBatch(const liborc::Type* type,
+                          liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                          int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<Decimal128Builder*>(abuilder);
 
-  const bool has_nulls = cbatch->hasNulls;
+  const bool has_nulls = column_vector_batch->hasNulls;
   if (type->getPrecision() == 0 || type->getPrecision() > 18) {
-    auto batch = checked_cast<liborc::Decimal128VectorBatch*>(cbatch);
+    auto batch = checked_cast<liborc::Decimal128VectorBatch*>(column_vector_batch);
     for (int64_t i = offset; i < length + offset; i++) {
       if (!has_nulls || batch->notNull[i]) {
         RETURN_NOT_OK(builder->Append(
@@ -252,7 +268,7 @@ Status AppendDecimalBatch(const liborc::Type* type, liborc::ColumnVectorBatch* c
       }
     }
   } else {
-    auto batch = checked_cast<liborc::Decimal64VectorBatch*>(cbatch);
+    auto batch = checked_cast<liborc::Decimal64VectorBatch*>(column_vector_batch);
     for (int64_t i = offset; i < length + offset; i++) {
       if (!has_nulls || batch->notNull[i]) {
         RETURN_NOT_OK(builder->Append(Decimal128(batch->values[i])));
@@ -264,6 +280,8 @@ Status AppendDecimalBatch(const liborc::Type* type, liborc::ColumnVectorBatch* c
   return Status::OK();
 }
 
+}  // namespace
+
 Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
                    int64_t offset, int64_t length, ArrayBuilder* builder) {
   if (type == nullptr) {
@@ -316,6 +334,615 @@ Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
   }
 }
 
+namespace {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+Status WriteBatch(const Array& parray, int64_t orc_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch);
+
+// Make sure children of StructArray have appropriate null.
+Result<std::shared_ptr<Array>> NormalizeArray(const std::shared_ptr<Array>& array) {
+  Type::type kind = array->type_id();
+  switch (kind) {
+    case Type::type::STRUCT: {
+      if (array->null_count() == 0) {
+        return array;
+      } else {
+        auto struct_array = checked_pointer_cast<StructArray>(array);
+        const std::shared_ptr<Buffer> bitmap = struct_array->null_bitmap();
+        std::shared_ptr<DataType> struct_type = struct_array->type();
+        std::size_t size = struct_type->fields().size();
+        std::vector<std::shared_ptr<Array>> new_children(size, nullptr);
+        for (std::size_t i = 0; i < size; i++) {
+          std::shared_ptr<Array> child = struct_array->field(i);
+          const std::shared_ptr<Buffer> child_bitmap = child->null_bitmap();
+          std::shared_ptr<Buffer> final_child_bitmap;
+          if (child_bitmap == nullptr) {
+            final_child_bitmap = bitmap;
+          } else {
+            ARROW_ASSIGN_OR_RAISE(
+                final_child_bitmap,
+                internal::BitmapAnd(default_memory_pool(), bitmap->data(), 0,
+                                    child_bitmap->data(), 0, struct_array->length(), 0));
+          }
+          std::shared_ptr<ArrayData> child_array_data = child->data();
+          std::vector<std::shared_ptr<Buffer>> child_buffers = child_array_data->buffers;
+          child_buffers[0] = final_child_bitmap;
+          std::shared_ptr<ArrayData> new_child_array_data =
+              ArrayData::Make(child->type(), child->length(), child_buffers,
+                              child_array_data->child_data, child_array_data->dictionary);
+          ARROW_ASSIGN_OR_RAISE(new_children[i],
+                                NormalizeArray(MakeArray(new_child_array_data)));
+        }
+        return std::make_shared<StructArray>(struct_type, struct_array->length(),
+                                             new_children, bitmap);
+      }
+    }
+    case Type::type::LIST: {
+      auto list_array = checked_pointer_cast<ListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<ListArray>(list_array->type(), list_array->length(),
+                                         list_array->value_offsets(), value_array,
+                                         list_array->null_bitmap());
+    }
+    case Type::type::LARGE_LIST: {
+      auto list_array = checked_pointer_cast<LargeListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<LargeListArray>(list_array->type(), list_array->length(),
+                                              list_array->value_offsets(), value_array,
+                                              list_array->null_bitmap());
+    }
+    case Type::type::FIXED_SIZE_LIST: {
+      auto list_array = checked_pointer_cast<FixedSizeListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<FixedSizeListArray>(list_array->type(),
+                                                  list_array->length(), value_array,
+                                                  list_array->null_bitmap());
+    }
+    case Type::type::MAP: {
+      auto map_array = checked_pointer_cast<MapArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto key_array, NormalizeArray(map_array->keys()));
+      ARROW_ASSIGN_OR_RAISE(auto item_array, NormalizeArray(map_array->items()));
+      return std::make_shared<MapArray>(map_array->type(), map_array->length(),
+                                        map_array->value_offsets(), key_array, item_array,
+                                        map_array->null_bitmap());
+    }
+    default: {
+      return array;
+    }
+  }
+}
+
+template <class DataType, class BatchType, typename Enable = void>
+struct Appender {};
+
+// Types for long/double-like Appender, that is, numeric, boolean or date32
+template <typename T>
+using is_generic_type =
+    std::integral_constant<bool, is_number_type<T>::value ||
+                                     std::is_same<Date32Type, T>::value ||
+                                     is_boolean_type<T>::value>;
+template <typename T, typename R = void>
+using enable_if_generic = enable_if_t<is_generic_type<T>::value, R>;
+
+// Number-like
+template <class DataType, class BatchType>
+struct Appender<DataType, BatchType, enable_if_generic<DataType>> {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  using ValueType = typename TypeTraits<DataType>::CType;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(ValueType v) {
+    batch->data[running_orc_offset] = array.Value(running_arrow_offset);
+    batch->notNull[running_orc_offset] = true;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  BatchType* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Binary
+template <class DataType>
+struct Appender<DataType, liborc::StringVectorBatch> {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  using COffsetType = typename TypeTraits<DataType>::OffsetType::c_type;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    COffsetType data_length = 0;
+    batch->data[running_orc_offset] = reinterpret_cast<char*>(
+        const_cast<uint8_t*>(array.GetValue(running_arrow_offset, &data_length)));
+    batch->length[running_orc_offset] = data_length;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  liborc::StringVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Decimal
+template <>
+struct Appender<Decimal128Type, liborc::Decimal64VectorBatch> {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    const Decimal128 dec_value(array.GetValue(running_arrow_offset));
+    batch->values[running_orc_offset] = static_cast<int64_t>(dec_value.low_bits());
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const Decimal128Array& array;
+  liborc::Decimal64VectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+template <>
+struct Appender<Decimal128Type, liborc::Decimal128VectorBatch> {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    const Decimal128 dec_value(array.GetValue(running_arrow_offset));
+    batch->values[running_orc_offset] =
+        liborc::Int128(dec_value.high_bits(), dec_value.low_bits());
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const Decimal128Array& array;
+  liborc::Decimal128VectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Date64 and Timestamp
+template <class DataType>
+struct TimestampAppender {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(int64_t v) {
+    int64_t data = array.Value(running_arrow_offset);
+    batch->notNull[running_orc_offset] = true;
+    batch->data[running_orc_offset] =
+        static_cast<int64_t>(std::floor(data / conversion_factor_from_second));
+    batch->nanoseconds[running_orc_offset] =
+        (data - conversion_factor_from_second * batch->data[running_orc_offset]) *
+        conversion_factor_to_nano;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  liborc::TimestampVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+  int64_t conversion_factor_from_second, conversion_factor_to_nano;
+};
+
+// FSB
+struct FixedSizeBinaryAppender {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    batch->data[running_orc_offset] = reinterpret_cast<char*>(
+        const_cast<uint8_t*>(array.GetValue(running_arrow_offset)));
+    batch->length[running_orc_offset] = data_length;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const FixedSizeBinaryArray& array;
+  liborc::StringVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+  const int32_t data_length;
+};
+
+// static_cast from int64_t or double to itself shouldn't introduce overhead
+// Pleae see
+// https://stackoverflow.com/questions/19106826/
+// can-static-cast-to-same-type-introduce-runtime-overhead
+template <class DataType, class BatchType>
+Status WriteGenericBatch(const Array& array, int64_t orc_offset,
+                         liborc::ColumnVectorBatch* column_vector_batch) {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  const ArrayType& array_(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  Appender<DataType, BatchType> appender{array_, batch, orc_offset, 0};
+  ArrayDataVisitor<DataType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+template <class DataType>
+Status WriteTimestampBatch(const Array& array, int64_t orc_offset,
+                           liborc::ColumnVectorBatch* column_vector_batch,
+                           const int64_t& conversion_factor_from_second,
+                           const int64_t& conversion_factor_to_nano) {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  const ArrayType& array_(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  TimestampAppender<DataType> appender{array_,
+                                       batch,
+                                       orc_offset,
+                                       0,
+                                       conversion_factor_from_second,
+                                       conversion_factor_to_nano};
+  ArrayDataVisitor<DataType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+Status WriteFixedSizeBinaryBatch(const Array& array, int64_t orc_offset,
+                                 liborc::ColumnVectorBatch* column_vector_batch) {
+  const FixedSizeBinaryArray& array_(checked_cast<const FixedSizeBinaryArray&>(array));
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  FixedSizeBinaryAppender appender{array_, batch, orc_offset, 0, array_.byte_width()};
+  ArrayDataVisitor<FixedSizeBinaryType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+Status WriteStructBatch(const Array& array, int64_t orc_offset,
+                        liborc::ColumnVectorBatch* column_vector_batch) {
+  std::shared_ptr<Array> array_ = MakeArray(array.data());
+  std::shared_ptr<StructArray> struct_array(checked_pointer_cast<StructArray>(array_));
+  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch);
+  std::size_t size = array.type()->fields().size();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  // First fill fields of ColumnVectorBatch
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+    } else {
+      batch->notNull[running_orc_offset] = true;
+    }
+  }
+  // Fill the fields
+  for (std::size_t i = 0; i < size; i++) {
+    batch->fields[i]->resize(orc_offset + arrow_length);
+    RETURN_NOT_OK(WriteBatch(*(struct_array->field(i)), orc_offset, batch->fields[i]));
+  }
+  return Status::OK();
+}
+
+template <class ArrayType>
+Status WriteListBatch(const Array& array, int64_t orc_offset,
+                      liborc::ColumnVectorBatch* column_vector_batch) {
+  const ArrayType& list_array(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* element_batch = (batch->elements).get();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  if (orc_offset == 0) {
+    batch->offsets[0] = 0;
+  }
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset];
+    } else {
+      batch->notNull[running_orc_offset] = true;
+      batch->offsets[running_orc_offset + 1] =
+          batch->offsets[running_orc_offset] +
+          list_array.value_offset(running_arrow_offset + 1) -
+          list_array.value_offset(running_arrow_offset);
+      element_batch->resize(batch->offsets[running_orc_offset + 1]);
+      int64_t subarray_arrow_offset = list_array.value_offset(running_arrow_offset),
+              subarray_orc_offset = batch->offsets[running_orc_offset],
+              subarray_orc_length =
+                  batch->offsets[running_orc_offset + 1] - subarray_orc_offset;
+      RETURN_NOT_OK(WriteBatch(
+          *(list_array.values()->Slice(subarray_arrow_offset, subarray_orc_length)),
+          subarray_orc_offset, element_batch));
+    }
+  }
+  return Status::OK();
+}
+
+Status WriteMapBatch(const Array& array, int64_t orc_offset,
+                     liborc::ColumnVectorBatch* column_vector_batch) {
+  const MapArray& map_array(checked_cast<const MapArray&>(array));
+  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* key_batch = (batch->keys).get();
+  liborc::ColumnVectorBatch* element_batch = (batch->elements).get();
+  std::shared_ptr<Array> key_array = map_array.keys();
+  std::shared_ptr<Array> element_array = map_array.items();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  if (orc_offset == 0) {
+    batch->offsets[0] = 0;
+  }
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset];
+    } else {
+      batch->notNull[running_orc_offset] = true;
+      batch->offsets[running_orc_offset + 1] =
+          batch->offsets[running_orc_offset] +
+          map_array.value_offset(running_arrow_offset + 1) -
+          map_array.value_offset(running_arrow_offset);
+      int64_t subarray_arrow_offset = map_array.value_offset(running_arrow_offset),
+              subarray_orc_offset = batch->offsets[running_orc_offset],
+              new_subarray_orc_offset = batch->offsets[running_orc_offset + 1],
+              subarray_orc_length = new_subarray_orc_offset - subarray_orc_offset;
+      key_batch->resize(new_subarray_orc_offset);
+      element_batch->resize(new_subarray_orc_offset);
+      RETURN_NOT_OK(
+          WriteBatch(*(key_array->Slice(subarray_arrow_offset, subarray_orc_length)),
+                     subarray_orc_offset, key_batch));
+      RETURN_NOT_OK(
+          WriteBatch(*(element_array->Slice(subarray_arrow_offset, subarray_orc_length)),
+                     subarray_orc_offset, element_batch));
+    }
+  }
+  return Status::OK();
+}
+
+Status WriteBatch(const Array& array, int64_t orc_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch) {
+  Type::type kind = array.type_id();
+  column_vector_batch->numElements = orc_offset;
+  switch (kind) {
+    case Type::type::BOOL:
+      return WriteGenericBatch<BooleanType, liborc::LongVectorBatch>(array, orc_offset,
+                                                                     column_vector_batch);
+    case Type::type::INT8:
+      return WriteGenericBatch<Int8Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                  column_vector_batch);
+    case Type::type::INT16:
+      return WriteGenericBatch<Int16Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::INT32:
+      return WriteGenericBatch<Int32Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::INT64:
+      return WriteGenericBatch<Int64Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::FLOAT:
+      return WriteGenericBatch<FloatType, liborc::DoubleVectorBatch>(array, orc_offset,
+                                                                     column_vector_batch);
+    case Type::type::DOUBLE:
+      return WriteGenericBatch<DoubleType, liborc::DoubleVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::BINARY:
+      return WriteGenericBatch<BinaryType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_BINARY:
+      return WriteGenericBatch<LargeBinaryType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::STRING:
+      return WriteGenericBatch<StringType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_STRING:
+      return WriteGenericBatch<LargeStringType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::FIXED_SIZE_BINARY:
+      return WriteFixedSizeBinaryBatch(array, orc_offset, column_vector_batch);
+    case Type::type::DATE32:
+      return WriteGenericBatch<Date32Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                    column_vector_batch);
+    case Type::type::DATE64:
+      return WriteTimestampBatch<Date64Type>(array, orc_offset, column_vector_batch,
+                                             kOneSecondMillis, kOneMilliNanos);
+    case Type::type::TIMESTAMP: {
+      switch (internal::checked_pointer_cast<TimestampType>(array.type())->unit()) {
+        case TimeUnit::type::SECOND:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, 1, kOneSecondNanos);
+        case TimeUnit::type::MILLI:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondMillis, kOneMilliNanos);
+        case TimeUnit::type::MICRO:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondMicros, kOneMicroNanos);
+        case TimeUnit::type::NANO:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondNanos, 1);
+        default:
+          return Status::TypeError("Unknown or unsupported Arrow type: ",
+                                   array.type()->ToString());
+      }
+    }
+    case Type::type::DECIMAL128: {
+      int32_t precision = checked_pointer_cast<Decimal128Type>(array.type())->precision();
+      if (precision > 18) {
+        return WriteGenericBatch<Decimal128Type, liborc::Decimal128VectorBatch>(
+            array, orc_offset, column_vector_batch);
+      } else {
+        return WriteGenericBatch<Decimal128Type, liborc::Decimal64VectorBatch>(
+            array, orc_offset, column_vector_batch);
+      }
+    }
+    case Type::type::STRUCT:
+      return WriteStructBatch(array, orc_offset, column_vector_batch);
+    case Type::type::LIST:
+      return WriteListBatch<ListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_LIST:
+      return WriteListBatch<LargeListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::FIXED_SIZE_LIST:
+      return WriteListBatch<FixedSizeListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::MAP:
+      return WriteMapBatch(array, orc_offset, column_vector_batch);
+    default: {
+      return Status::NotImplemented("Unknown or unsupported Arrow type: ",
+                                    array.type()->ToString());
+    }
+  }
+  return Status::OK();
+}
+
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const DataType& type) {
+  Type::type kind = type.id();
+  switch (kind) {
+    case Type::type::BOOL:
+      return liborc::createPrimitiveType(liborc::TypeKind::BOOLEAN);
+    case Type::type::INT8:
+      return liborc::createPrimitiveType(liborc::TypeKind::BYTE);
+    case Type::type::INT16:
+      return liborc::createPrimitiveType(liborc::TypeKind::SHORT);
+    case Type::type::INT32:
+      return liborc::createPrimitiveType(liborc::TypeKind::INT);
+    case Type::type::INT64:
+      return liborc::createPrimitiveType(liborc::TypeKind::LONG);
+    case Type::type::FLOAT:
+      return liborc::createPrimitiveType(liborc::TypeKind::FLOAT);
+    case Type::type::DOUBLE:
+      return liborc::createPrimitiveType(liborc::TypeKind::DOUBLE);
+    // Use STRING instead of VARCHAR for now, both use UTF-8
+    case Type::type::STRING:
+    case Type::type::LARGE_STRING:
+      return liborc::createPrimitiveType(liborc::TypeKind::STRING);
+    case Type::type::BINARY:
+    case Type::type::LARGE_BINARY:
+    case Type::type::FIXED_SIZE_BINARY:
+      return liborc::createPrimitiveType(liborc::TypeKind::BINARY);
+    case Type::type::DATE32:
+      return liborc::createPrimitiveType(liborc::TypeKind::DATE);
+    case Type::type::DATE64:
+    case Type::type::TIMESTAMP:
+      return liborc::createPrimitiveType(liborc::TypeKind::TIMESTAMP);
+    case Type::type::DECIMAL128: {
+      const uint64_t precision =
+          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).precision());
+      const uint64_t scale =
+          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).scale());
+      return liborc::createDecimalType(precision, scale);
+    }
+    case Type::type::LIST:
+    case Type::type::FIXED_SIZE_LIST:
+    case Type::type::LARGE_LIST: {
+      std::shared_ptr<DataType> arrow_child_type =
+          checked_cast<const BaseListType&>(type).value_type();
+      ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+      return liborc::createListType(std::move(orc_subtype));
+    }
+    case Type::type::STRUCT: {
+      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType();
+      std::vector<std::shared_ptr<Field>> arrow_fields =
+          checked_cast<const StructType&>(type).fields();
+      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin();
+           it != arrow_fields.end(); ++it) {
+        std::string field_name = (*it)->name();
+        std::shared_ptr<DataType> arrow_child_type = (*it)->type();
+        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+        out_type->addStructField(field_name, std::move(orc_subtype));
+      }
+      return std::move(out_type);
+    }
+    case Type::type::MAP: {
+      std::shared_ptr<DataType> key_arrow_type =
+          checked_cast<const MapType&>(type).key_type();
+      std::shared_ptr<DataType> item_arrow_type =
+          checked_cast<const MapType&>(type).item_type();
+      ARROW_ASSIGN_OR_RAISE(auto key_orc_type, GetOrcType(*key_arrow_type));
+      ARROW_ASSIGN_OR_RAISE(auto item_orc_type, GetOrcType(*item_arrow_type));
+      return liborc::createMapType(std::move(key_orc_type), std::move(item_orc_type));
+    }
+    case Type::type::DENSE_UNION:
+    case Type::type::SPARSE_UNION: {
+      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createUnionType();
+      std::vector<std::shared_ptr<Field>> arrow_fields =
+          checked_cast<const UnionType&>(type).fields();
+      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin();
+           it != arrow_fields.end(); ++it) {
+        std::string field_name = (*it)->name();
+        std::shared_ptr<DataType> arrow_child_type = (*it)->type();
+        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+        out_type->addUnionChild(std::move(orc_subtype));
+      }
+      return std::move(out_type);
+    }
+    default: {
+      return Status::NotImplemented("Unknown or unsupported Arrow type: ",
+                                    type.ToString());
+    }
+  }
+}
+
+}  // namespace
+
+Status WriteBatch(const ChunkedArray& chunked_array, int64_t length,
+                  int* arrow_chunk_offset, int64_t* arrow_index_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch) {
+  int num_batch = chunked_array.num_chunks();
+  int64_t orc_offset = 0;
+  while (*arrow_chunk_offset < num_batch && orc_offset < length) {
+    ARROW_ASSIGN_OR_RAISE(auto array,
+                          NormalizeArray(chunked_array.chunk(*arrow_chunk_offset)));
+    int64_t num_written_elements =
+        std::min(length - orc_offset, array->length() - *arrow_index_offset);
+    if (num_written_elements > 0) {
+      RETURN_NOT_OK(WriteBatch(*(array->Slice(*arrow_index_offset, num_written_elements)),
+                               orc_offset, column_vector_batch));
+      orc_offset += num_written_elements;
+      *arrow_index_offset += num_written_elements;
+    }
+    if (orc_offset < length) {  // Another Arrow Array done
+      *arrow_index_offset = 0;
+      (*arrow_chunk_offset)++;
+    }
+  }
+  column_vector_batch->numElements = orc_offset;
+  return Status::OK();
+}
+
 Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
   // When subselecting fields on read, liborc will set some nodes to nullptr,
   // so we need to check for nullptr before progressing
@@ -369,15 +996,15 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
       const int scale = static_cast<int>(type->getScale());
       if (precision == 0) {
         // In HIVE 0.11/0.12 precision is set as 0, but means max precision
-        *out = decimal(38, 6);
+        *out = decimal128(38, 6);
       } else {
-        *out = decimal(precision, scale);
+        *out = decimal128(precision, scale);
       }
       break;
     }
     case liborc::LIST: {
       if (subtype_count != 1) {
-        return Status::Invalid("Invalid Orc List type");
+        return Status::TypeError("Invalid Orc List type");
       }
       std::shared_ptr<DataType> elemtype;
       RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &elemtype));
@@ -386,22 +1013,21 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
     }
     case liborc::MAP: {
       if (subtype_count != 2) {
-        return Status::Invalid("Invalid Orc Map type");
+        return Status::TypeError("Invalid Orc Map type");
       }
-      std::shared_ptr<DataType> keytype;
-      std::shared_ptr<DataType> valtype;
-      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &keytype));
-      RETURN_NOT_OK(GetArrowType(type->getSubtype(1), &valtype));
-      *out = list(struct_({field("key", keytype), field("value", valtype)}));
+      std::shared_ptr<DataType> key_type, item_type;
+      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &key_type));
+      RETURN_NOT_OK(GetArrowType(type->getSubtype(1), &item_type));
+      *out = map(key_type, item_type);
       break;
     }
     case liborc::STRUCT: {
       std::vector<std::shared_ptr<Field>> fields;
       for (int child = 0; child < subtype_count; ++child) {
-        std::shared_ptr<DataType> elemtype;
-        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elemtype));
+        std::shared_ptr<DataType> elem_type;
+        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
         std::string name = type->getFieldName(child);
-        fields.push_back(field(name, elemtype));
+        fields.push_back(field(name, elem_type));
       }
       *out = struct_(fields);
       break;
@@ -410,21 +1036,34 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
       std::vector<std::shared_ptr<Field>> fields;
       std::vector<int8_t> type_codes;
       for (int child = 0; child < subtype_count; ++child) {
-        std::shared_ptr<DataType> elemtype;
-        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elemtype));
-        fields.push_back(field("_union_" + std::to_string(child), elemtype));
+        std::shared_ptr<DataType> elem_type;
+        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
+        fields.push_back(field("_union_" + std::to_string(child), elem_type));
         type_codes.push_back(static_cast<int8_t>(child));
       }
       *out = sparse_union(fields, type_codes);
       break;
     }
     default: {
-      return Status::Invalid("Unknown Orc type kind: ", kind);
+      return Status::TypeError("Unknown Orc type kind: ", type->toString());
     }
   }
   return Status::OK();
 }
 
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema) {
+  int numFields = schema.num_fields();
+  ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType();
+  for (int i = 0; i < numFields; i++) {
+    std::shared_ptr<Field> field = schema.field(i);
+    std::string field_name = field->name();
+    std::shared_ptr<DataType> arrow_child_type = field->type();
+    ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+    out_type->addStructField(field_name, std::move(orc_subtype));
+  }
+  return std::move(out_type);
+}
+
 }  // namespace orc
 }  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter_util.h b/cpp/src/arrow/adapters/orc/adapter_util.h
index 13a62f2bbd3..3e6d0fcc660 100644
--- a/cpp/src/arrow/adapters/orc/adapter_util.h
+++ b/cpp/src/arrow/adapters/orc/adapter_util.h
@@ -34,8 +34,24 @@ namespace orc {
 
 Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out);
 
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema);
+
 Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
-                   int64_t offset, int64_t length, ArrayBuilder* builder);
+                   int64_t offset, int64_t length, arrow::ArrayBuilder* builder);
+
+/// \brief Write a chunked array to an orc::ColumnVectorBatch
+///
+/// \param[in] chunked_array the chunked array
+/// \param[in] length the orc::ColumnVectorBatch size limit
+/// \param[in,out] arrow_chunk_offset The current chunk being processed
+/// \param[in,out] arrow_index_offset The index of the arrow_chunk_offset array
+/// before or after a process
+/// \param[in,out] column_vector_batch the orc::ColumnVectorBatch to be filled
+/// \return Status
+Status WriteBatch(const ChunkedArray& chunked_array, int64_t length,
+                  int* arrow_chunk_offset, int64_t* arrow_index_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch);
+
 }  // namespace orc
 }  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index 67c5ca84e1f..dad689d3ca7 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -103,28 +103,30 @@ struct ScalarFromArraySlotImpl {
   }
 
   Status Visit(const SparseUnionArray& a) {
+    const auto type_code = a.type_code(index_);
     // child array which stores the actual value
-    auto arr = a.field(a.child_id(index_));
+    const auto arr = a.field(a.child_id(index_));
     // no need to adjust the index
     ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(index_));
     if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, a.type()));
+      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, type_code, a.type()));
     } else {
-      out_ = MakeNullScalar(a.type());
+      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(type_code, a.type()));
     }
     return Status::OK();
   }
 
   Status Visit(const DenseUnionArray& a) {
+    const auto type_code = a.type_code(index_);
     // child array which stores the actual value
     auto arr = a.field(a.child_id(index_));
     // need to look up the value based on offsets
     auto offset = a.value_offset(index_);
     ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(offset));
     if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, a.type()));
+      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, type_code, a.type()));
     } else {
-      out_ = MakeNullScalar(a.type());
+      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(type_code, a.type()));
     }
     return Status::OK();
   }
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index e29db00cfcf..2add572e7a4 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -56,15 +56,17 @@ class ARROW_EXPORT Array {
 
   /// \brief Return true if value at index is null. Does not boundscheck
   bool IsNull(int64_t i) const {
-    return null_bitmap_data_ != NULLPTR &&
-           !BitUtil::GetBit(null_bitmap_data_, i + data_->offset);
+    return null_bitmap_data_ != NULLPTR
+               ? !BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count == data_->length;
   }
 
   /// \brief Return true if value at index is valid (not null). Does not
   /// boundscheck
   bool IsValid(int64_t i) const {
-    return null_bitmap_data_ == NULLPTR ||
-           BitUtil::GetBit(null_bitmap_data_, i + data_->offset);
+    return null_bitmap_data_ != NULLPTR
+               ? BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count != data_->length;
   }
 
   /// \brief Return a Scalar containing the value of this array at i
diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h
index db3c640b9a4..f8e8c4f8a44 100644
--- a/cpp/src/arrow/array/array_binary.h
+++ b/cpp/src/arrow/array/array_binary.h
@@ -71,6 +71,13 @@ class BaseBinaryArray : public FlatArray {
                              raw_value_offsets_[i + 1] - pos);
   }
 
+  /// \brief Get binary value as a string_view
+  /// Provided for consistency with other arrays.
+  ///
+  /// \param i the value index
+  /// \return the view over the selected value
+  util::string_view Value(int64_t i) const { return GetView(i); }
+
   /// \brief Get binary value as a std::string
   ///
   /// \param i the value index
diff --git a/cpp/src/arrow/array/array_binary_test.cc b/cpp/src/arrow/array/array_binary_test.cc
index 5c247a6dc66..e593cf7e6c4 100644
--- a/cpp/src/arrow/array/array_binary_test.cc
+++ b/cpp/src/arrow/array/array_binary_test.cc
@@ -473,6 +473,70 @@ class TestStringBuilder : public TestBuilder {
     CheckStringArray(*result_, strings, is_valid, reps);
   }
 
+  void TestExtendCurrent() {
+    std::vector<std::string> strings = {"", "bbbb", "aaaaa", "", "ccc"};
+    std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+
+    int N = static_cast<int>(strings.size());
+    int reps = 10;
+
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        if (!is_valid[i]) {
+          ASSERT_OK(builder_->AppendNull());
+        } else if (strings[i].length() > 3) {
+          ASSERT_OK(builder_->Append(strings[i].substr(0, 3)));
+          ASSERT_OK(builder_->ExtendCurrent(strings[i].substr(3)));
+        } else {
+          ASSERT_OK(builder_->Append(strings[i]));
+        }
+      }
+    }
+    Done();
+
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 12, result_->value_data()->size());
+
+    CheckStringArray(*result_, strings, is_valid, reps);
+  }
+
+  void TestExtendCurrentUnsafe() {
+    std::vector<std::string> strings = {"", "bbbb", "aaaaa", "", "ccc"};
+    std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+
+    int N = static_cast<int>(strings.size());
+    int reps = 13;
+    int64_t total_length = 0;
+    for (const auto& s : strings) {
+      total_length += static_cast<int64_t>(s.size());
+    }
+
+    ASSERT_OK(builder_->Reserve(N * reps));
+    ASSERT_OK(builder_->ReserveData(total_length * reps));
+
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        if (!is_valid[i]) {
+          builder_->UnsafeAppendNull();
+        } else if (strings[i].length() > 3) {
+          builder_->UnsafeAppend(strings[i].substr(0, 3));
+          builder_->UnsafeExtendCurrent(strings[i].substr(3));
+        } else {
+          builder_->UnsafeAppend(strings[i]);
+        }
+      }
+    }
+    ASSERT_EQ(builder_->value_data_length(), total_length * reps);
+    Done();
+
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 12, result_->value_data()->size());
+
+    CheckStringArray(*result_, strings, is_valid, reps);
+  }
+
   void TestVectorAppend() {
     std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
     std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
@@ -608,6 +672,12 @@ TYPED_TEST(TestStringBuilder, TestScalarAppend) { this->TestScalarAppend(); }
 
 TYPED_TEST(TestStringBuilder, TestScalarAppendUnsafe) { this->TestScalarAppendUnsafe(); }
 
+TYPED_TEST(TestStringBuilder, TestExtendCurrent) { this->TestExtendCurrent(); }
+
+TYPED_TEST(TestStringBuilder, TestExtendCurrentUnsafe) {
+  this->TestExtendCurrentUnsafe();
+}
+
 TYPED_TEST(TestStringBuilder, TestVectorAppend) { this->TestVectorAppend(); }
 
 TYPED_TEST(TestStringBuilder, TestAppendCStringsWithValidBytes) {
diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc
index a50cbcc13cf..faeeaf56333 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1036,7 +1036,7 @@ void ValidateBasicFixedSizeListArray(const FixedSizeListArray* result,
     ASSERT_EQ(is_valid[i] == 0, result->IsNull(i));
   }
 
-  ASSERT_EQ(result->length() * result->value_length(), result->values()->length());
+  ASSERT_LE(result->length() * result->value_length(), result->values()->length());
   auto varr = std::dynamic_pointer_cast<Int32Array>(result->values());
 
   for (size_t i = 0; i < values.size(); ++i) {
@@ -1084,7 +1084,7 @@ TEST_F(TestFixedSizeListArray, BulkAppend) {
   ValidateBasicFixedSizeListArray(result_.get(), values, is_valid);
 }
 
-TEST_F(TestFixedSizeListArray, BulkAppendInvalid) {
+TEST_F(TestFixedSizeListArray, BulkAppendExcess) {
   std::vector<int32_t> values = {0, 1, 2, 3, 4, 5};
   std::vector<uint8_t> is_valid = {1, 0, 1};
 
@@ -1099,7 +1099,8 @@ TEST_F(TestFixedSizeListArray, BulkAppendInvalid) {
   }
 
   Done();
-  ASSERT_RAISES(Invalid, result_->ValidateFull());
+  // We appended too many values to the child array, but that's OK
+  ValidateBasicFixedSizeListArray(result_.get(), values, is_valid);
 }
 
 TEST_F(TestFixedSizeListArray, TestZeroLength) {
@@ -1131,4 +1132,16 @@ TEST_F(TestFixedSizeListArray, NegativeLength) {
   ASSERT_RAISES(Invalid, result_->ValidateFull());
 }
 
+TEST_F(TestFixedSizeListArray, NotEnoughValues) {
+  type_ = fixed_size_list(value_type_, 2);
+  auto values = ArrayFromJSON(value_type_, "[]");
+  result_ = std::make_shared<FixedSizeListArray>(type_, 1, values);
+  ASSERT_RAISES(Invalid, result_->ValidateFull());
+
+  // ARROW-13437: too many values is OK though
+  values = ArrayFromJSON(value_type_, "[1, 2, 3, 4]");
+  result_ = std::make_shared<FixedSizeListArray>(type_, 1, values);
+  ASSERT_OK(result_->ValidateFull());
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc
index f967127c5f1..102a82512e1 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -730,8 +730,6 @@ Result<std::shared_ptr<Array>> SparseUnionArray::Make(
   return std::make_shared<SparseUnionArray>(std::move(internal_data));
 }
 
-std::shared_ptr<Array> UnionArray::child(int i) const { return field(i); }
-
 std::shared_ptr<Array> UnionArray::field(int i) const {
   if (i < 0 ||
       static_cast<decltype(boxed_fields_)::size_type>(i) >= boxed_fields_.size()) {
diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h
index d39f33f4702..bd5abaa3a8f 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -378,6 +378,9 @@ class ARROW_EXPORT UnionArray : public Array {
 
   const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
 
+  /// The logical type code of the value at index.
+  type_code_t type_code(int64_t i) const { return raw_type_codes_[i + data_->offset]; }
+
   /// The physical child id containing value at index.
   int child_id(int64_t i) const {
     return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
@@ -387,12 +390,6 @@ class ARROW_EXPORT UnionArray : public Array {
 
   UnionMode::type mode() const { return union_type_->mode(); }
 
-  // Return the given field as an individual array.
-  // For sparse unions, the returned array has its offset, length and null
-  // count adjusted.
-  ARROW_DEPRECATED("Deprecated in 1.0.0. Use field(pos)")
-  std::shared_ptr<Array> child(int pos) const;
-
   /// \brief Return the given field as an individual array.
   ///
   /// For sparse unions, the returned array has its offset, length and null
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index a97bf134604..5cee0a2691f 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -111,6 +111,14 @@ TEST_F(TestArray, TestLength) {
   ASSERT_EQ(arr->length(), 100);
 }
 
+TEST_F(TestArray, TestNullToString) {
+  // Invalid NULL buffer
+  auto data = std::make_shared<Buffer>(nullptr, 400);
+
+  std::unique_ptr<Int32Array> arr(new Int32Array(100, data));
+  ASSERT_EQ(arr->ToString(), "<InvalidArray: Missing values buffer in non-empty array>");
+}
+
 TEST_F(TestArray, TestSliceSafe) {
   std::vector<int32_t> original_data{1, 2, 3, 4, 5, 6, 7};
   auto arr = std::make_shared<Int32Array>(7, Buffer::Wrap(original_data));
@@ -322,8 +330,6 @@ TEST_F(TestArray, BuildLargeInMemoryArray) {
   ASSERT_EQ(length, result->length());
 }
 
-TEST_F(TestArray, TestCopy) {}
-
 TEST_F(TestArray, TestMakeArrayOfNull) {
   std::shared_ptr<DataType> types[] = {
       // clang-format off
@@ -356,6 +362,10 @@ TEST_F(TestArray, TestMakeArrayOfNull) {
       ASSERT_OK(array->ValidateFull());
       ASSERT_EQ(array->length(), length);
       ASSERT_EQ(array->null_count(), length);
+      for (int64_t i = 0; i < length; ++i) {
+        ASSERT_TRUE(array->IsNull(i));
+        ASSERT_FALSE(array->IsValid(i));
+      }
     }
   }
 }
@@ -397,38 +407,68 @@ TEST_F(TestArray, TestMakeArrayOfNullUnion) {
   }
 }
 
-TEST_F(TestArray, TestMakeArrayFromScalar) {
-  ASSERT_OK_AND_ASSIGN(auto null_array, MakeArrayFromScalar(NullScalar(), 5));
-  ASSERT_OK(null_array->ValidateFull());
-  ASSERT_EQ(null_array->length(), 5);
-  ASSERT_EQ(null_array->null_count(), 5);
+void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar) {
+  std::unique_ptr<arrow::ArrayBuilder> builder;
+  auto null_scalar = MakeNullScalar(scalar->type);
+  ASSERT_OK(MakeBuilder(pool, scalar->type, &builder));
+  ASSERT_OK(builder->AppendScalar(*scalar));
+  ASSERT_OK(builder->AppendScalar(*scalar));
+  ASSERT_OK(builder->AppendScalar(*null_scalar));
+  ASSERT_OK(builder->AppendScalars({scalar, null_scalar}));
+  ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2));
+  ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2));
+
+  std::shared_ptr<Array> out;
+  FinishAndCheckPadding(builder.get(), &out);
+  ASSERT_OK(out->ValidateFull());
+  AssertTypeEqual(scalar->type, out->type());
+  ASSERT_EQ(out->length(), 9);
+
+  const bool can_check_nulls = internal::HasValidityBitmap(out->type()->id());
+
+  if (can_check_nulls) {
+    ASSERT_EQ(out->null_count(), 4);
+  }
+  for (const auto index : {0, 1, 3, 5, 6}) {
+    ASSERT_FALSE(out->IsNull(index));
+    ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
+    AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true);
+  }
+  for (const auto index : {2, 4, 7, 8}) {
+    ASSERT_EQ(out->IsNull(index), can_check_nulls);
+    ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
+    AssertScalarsEqual(*null_scalar, *scalar_i, /*verbose=*/true);
+  }
+}
 
+static ScalarVector GetScalars() {
   auto hello = Buffer::FromString("hello");
   DayTimeIntervalType::DayMilliseconds daytime{1, 100};
 
-  ScalarVector scalars{
-      std::make_shared<BooleanScalar>(false),
-      std::make_shared<Int8Scalar>(3),
-      std::make_shared<UInt16Scalar>(3),
-      std::make_shared<Int32Scalar>(3),
-      std::make_shared<UInt64Scalar>(3),
-      std::make_shared<DoubleScalar>(3.0),
-      std::make_shared<Date32Scalar>(10),
-      std::make_shared<Date64Scalar>(11),
+  FieldVector union_fields{field("string", utf8()), field("number", int32()),
+                           field("other_number", int32())};
+  std::vector<int8_t> union_type_codes{5, 6, 42};
+
+  const auto sparse_union_ty = ::arrow::sparse_union(union_fields, union_type_codes);
+  const auto dense_union_ty = ::arrow::dense_union(union_fields, union_type_codes);
+
+  return {
+      std::make_shared<BooleanScalar>(false), std::make_shared<Int8Scalar>(3),
+      std::make_shared<UInt16Scalar>(3), std::make_shared<Int32Scalar>(3),
+      std::make_shared<UInt64Scalar>(3), std::make_shared<DoubleScalar>(3.0),
+      std::make_shared<Date32Scalar>(10), std::make_shared<Date64Scalar>(11),
       std::make_shared<Time32Scalar>(1000, time32(TimeUnit::SECOND)),
       std::make_shared<Time64Scalar>(1111, time64(TimeUnit::MICRO)),
       std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
       std::make_shared<MonthIntervalScalar>(1),
       std::make_shared<DayTimeIntervalScalar>(daytime),
       std::make_shared<DurationScalar>(60, duration(TimeUnit::SECOND)),
-      std::make_shared<BinaryScalar>(hello),
-      std::make_shared<LargeBinaryScalar>(hello),
+      std::make_shared<BinaryScalar>(hello), std::make_shared<LargeBinaryScalar>(hello),
       std::make_shared<FixedSizeBinaryScalar>(
           hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
       std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
       std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
-      std::make_shared<StringScalar>(hello),
-      std::make_shared<LargeStringScalar>(hello),
+      std::make_shared<StringScalar>(hello), std::make_shared<LargeStringScalar>(hello),
       std::make_shared<ListScalar>(ArrayFromJSON(int8(), "[1, 2, 3]")),
       std::make_shared<LargeListScalar>(ArrayFromJSON(int8(), "[1, 1, 2, 2, 3, 3]")),
       std::make_shared<FixedSizeListScalar>(ArrayFromJSON(int8(), "[1, 2, 3, 4]")),
@@ -437,7 +477,25 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
               std::make_shared<Int32Scalar>(2),
               std::make_shared<Int32Scalar>(6),
           },
-          struct_({field("min", int32()), field("max", int32())}))};
+          struct_({field("min", int32()), field("max", int32())})),
+      // Same values, different union type codes
+      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 6,
+                                          sparse_union_ty),
+      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 42,
+                                          sparse_union_ty),
+      std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 6,
+                                         dense_union_ty),
+      std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 42,
+                                         dense_union_ty)};
+}
+
+TEST_F(TestArray, TestMakeArrayFromScalar) {
+  ASSERT_OK_AND_ASSIGN(auto null_array, MakeArrayFromScalar(NullScalar(), 5));
+  ASSERT_OK(null_array->ValidateFull());
+  ASSERT_EQ(null_array->length(), 5);
+  ASSERT_EQ(null_array->null_count(), 5);
+
+  auto scalars = GetScalars();
 
   for (int64_t length : {16}) {
     for (auto scalar : scalars) {
@@ -445,8 +503,32 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
       ASSERT_OK(array->ValidateFull());
       ASSERT_EQ(array->length(), length);
       ASSERT_EQ(array->null_count(), 0);
+
+      // test case for ARROW-13321
+      for (int64_t i : std::vector<int64_t>{0, length / 2, length - 1}) {
+        ASSERT_OK_AND_ASSIGN(auto s, array->GetScalar(i));
+        AssertScalarsEqual(*s, *scalar, /*verbose=*/true);
+      }
     }
   }
+
+  for (auto scalar : scalars) {
+    AssertAppendScalar(pool_, scalar);
+  }
+}
+
+TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
+  // Regression test for ARROW-13437
+  auto scalars = GetScalars();
+
+  for (auto scalar : scalars) {
+    SCOPED_TRACE(scalar->type->ToString());
+    ASSERT_OK_AND_ASSIGN(auto array, MakeArrayFromScalar(*scalar, 32));
+    auto sliced = array->Slice(1, 4);
+    ASSERT_EQ(sliced->length(), 4);
+    ASSERT_EQ(sliced->null_count(), 0);
+    ARROW_EXPECT_OK(sliced->ValidateFull());
+  }
 }
 
 TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
@@ -481,6 +563,8 @@ TEST_F(TestArray, TestMakeArrayFromMapScalar) {
     ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i));
     ASSERT_TRUE(item->Equals(scalar));
   }
+
+  AssertAppendScalar(pool_, std::make_shared<MapScalar>(scalar));
 }
 
 TEST_F(TestArray, ValidateBuffersPrimitive) {
diff --git a/cpp/src/arrow/array/array_union_test.cc b/cpp/src/arrow/array/array_union_test.cc
index 88d25e823bb..d3afe40df8d 100644
--- a/cpp/src/arrow/array/array_union_test.cc
+++ b/cpp/src/arrow/array/array_union_test.cc
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <string>
-
 #include <gtest/gtest.h>
 
+#include <string>
+
 #include "arrow/array.h"
 #include "arrow/array/builder_nested.h"
 #include "arrow/array/builder_union.h"
@@ -107,11 +107,11 @@ class TestUnionArrayFactories : public ::testing::Test {
  public:
   void SetUp() {
     pool_ = default_memory_pool();
-    type_codes_ = {1, 2, 4, 8};
+    type_codes_ = {1, 2, 4, 127};
     ArrayFromVector<Int8Type>({0, 1, 2, 0, 1, 3, 2, 0, 2, 1}, &type_ids_);
-    ArrayFromVector<Int8Type>({1, 2, 4, 1, 2, 8, 4, 1, 4, 2}, &logical_type_ids_);
-    ArrayFromVector<Int8Type>({1, 2, 4, 1, -2, 8, 4, 1, 4, 2}, &invalid_type_ids1_);
-    ArrayFromVector<Int8Type>({1, 2, 4, 1, 3, 8, 4, 1, 4, 2}, &invalid_type_ids2_);
+    ArrayFromVector<Int8Type>({1, 2, 4, 1, 2, 127, 4, 1, 4, 2}, &logical_type_ids_);
+    ArrayFromVector<Int8Type>({1, 2, 4, 1, -2, 127, 4, 1, 4, 2}, &invalid_type_ids1_);
+    ArrayFromVector<Int8Type>({1, 2, 4, 1, 3, 127, 4, 1, 4, 2}, &invalid_type_ids2_);
   }
 
   void CheckUnionArray(const UnionArray& array, UnionMode::type mode,
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index b92cc285894..2f4e63b546d 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -18,14 +18,18 @@
 #include "arrow/array/builder_base.h"
 
 #include <cstdint>
+#include <type_traits>
 #include <vector>
 
 #include "arrow/array/array_base.h"
 #include "arrow/array/data.h"
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
 
@@ -92,6 +96,210 @@ Status ArrayBuilder::Advance(int64_t elements) {
   return null_bitmap_builder_.Advance(elements);
 }
 
+namespace {
+
+struct AppendScalarImpl {
+  template <typename T>
+  enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value ||
+                  is_fixed_size_binary_type<T>::value,
+              Status>
+  Visit(const T&) {
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+           raw++) {
+        auto scalar =
+            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        if (scalar->is_valid) {
+          builder->UnsafeAppend(scalar->value);
+        } else {
+          builder->UnsafeAppendNull();
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_base_binary<T, Status> Visit(const T&) {
+    int64_t data_size = 0;
+    for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+         raw++) {
+      auto scalar =
+          internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+      if (scalar->is_valid) {
+        data_size += scalar->value->size();
+      }
+    }
+
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
+    RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+           raw++) {
+        auto scalar =
+            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        if (scalar->is_valid) {
+          builder->UnsafeAppend(util::string_view{*scalar->value});
+        } else {
+          builder->UnsafeAppendNull();
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_list_like<T, Status> Visit(const T&) {
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    int64_t num_children = 0;
+    for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
+         scalar++) {
+      if (!(*scalar)->is_valid) continue;
+      num_children +=
+          internal::checked_cast<const BaseListScalar&>(**scalar).value->length();
+    }
+    RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
+           scalar++) {
+        if ((*scalar)->is_valid) {
+          RETURN_NOT_OK(builder->Append());
+          const Array& list =
+              *internal::checked_cast<const BaseListScalar&>(**scalar).value;
+          for (int64_t i = 0; i < list.length(); i++) {
+            ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
+            RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
+          }
+        } else {
+          RETURN_NOT_OK(builder_->AppendNull());
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const StructType& type) {
+    auto* builder = internal::checked_cast<StructBuilder*>(builder_);
+    auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
+    RETURN_NOT_OK(builder->Reserve(count));
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count));
+    }
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
+        const auto& scalar = internal::checked_cast<const StructScalar&>(**s);
+        for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+          if (!scalar.is_valid || !scalar.value[field_index]) {
+            RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
+          } else {
+            RETURN_NOT_OK(builder->field_builder(field_index)
+                              ->AppendScalar(*scalar.value[field_index]));
+          }
+        }
+        RETURN_NOT_OK(builder->Append(scalar.is_valid));
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const SparseUnionType& type) { return MakeUnionArray(type); }
+
+  Status Visit(const DenseUnionType& type) { return MakeUnionArray(type); }
+
+  template <typename T>
+  Status MakeUnionArray(const T& type) {
+    using BuilderType = typename TypeTraits<T>::BuilderType;
+    constexpr bool is_dense = std::is_same<T, DenseUnionType>::value;
+
+    auto* builder = internal::checked_cast<BuilderType*>(builder_);
+    const auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
+
+    RETURN_NOT_OK(builder->Reserve(count));
+
+    DCHECK_EQ(type.num_fields(), builder->num_children());
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      RETURN_NOT_OK(builder->child_builder(field_index)->Reserve(count));
+    }
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
+        // For each scalar,
+        //  1. append the type code,
+        //  2. append the value to the corresponding child,
+        //  3. if the union is sparse, append null to the other children.
+        const auto& scalar = internal::checked_cast<const UnionScalar&>(**s);
+        const auto scalar_field_index = type.child_ids()[scalar.type_code];
+        RETURN_NOT_OK(builder->Append(scalar.type_code));
+
+        for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+          auto* child_builder = builder->child_builder(field_index).get();
+          if (field_index == scalar_field_index) {
+            if (scalar.is_valid) {
+              RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value));
+            } else {
+              RETURN_NOT_OK(child_builder->AppendNull());
+            }
+          } else if (!is_dense) {
+            RETURN_NOT_OK(child_builder->AppendNull());
+          }
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("AppendScalar for type ", type);
+  }
+
+  Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); }
+
+  const std::shared_ptr<Scalar>* scalars_begin_;
+  const std::shared_ptr<Scalar>* scalars_end_;
+  int64_t n_repeats_;
+  ArrayBuilder* builder_;
+};
+
+}  // namespace
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
+  if (!scalar.type->Equals(type())) {
+    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+                           " to builder for type ", type()->ToString());
+  }
+  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
+  return AppendScalarImpl{&shared, &shared + 1, /*n_repeats=*/1, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
+  if (!scalar.type->Equals(type())) {
+    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+                           " to builder for type ", type()->ToString());
+  }
+  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
+  return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
+  if (scalars.empty()) return Status::OK();
+  const auto ty = type();
+  for (const auto& scalar : scalars) {
+    if (!scalar->type->Equals(ty)) {
+      return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
+                             " to builder for type ", type()->ToString());
+    }
+  }
+  return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(),
+                          /*n_repeats=*/1, this}
+      .Convert();
+}
+
 Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
   std::shared_ptr<ArrayData> internal_data;
   RETURN_NOT_OK(FinishInternal(&internal_data));
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 15c726241b5..c2aba4e959f 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -50,6 +50,8 @@ class ARROW_EXPORT ArrayBuilder {
  public:
   explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
 
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
+
   virtual ~ArrayBuilder() = default;
 
   /// For nested types. Since the objects are owned by this class instance, we
@@ -116,6 +118,11 @@ class ARROW_EXPORT ArrayBuilder {
   /// This method is useful when appending null values to a parent nested type.
   virtual Status AppendEmptyValues(int64_t length) = 0;
 
+  /// \brief Append a value from a scalar
+  Status AppendScalar(const Scalar& scalar);
+  Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
+  Status AppendScalars(const ScalarVector& scalars);
+
   /// For cases where raw data was memcpy'd into the internal buffers, allows us
   /// to advance the length of the builder. It is your responsibility to use
   /// this function responsibly.
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index bc49c7d6787..7653eeca5c4 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -77,6 +77,23 @@ class BaseBinaryBuilder : public ArrayBuilder {
     return Append(value.data(), static_cast<offset_type>(value.size()));
   }
 
+  /// Extend the last appended value by appending more data at the end
+  ///
+  /// Unlike Append, this does not create a new offset.
+  Status ExtendCurrent(const uint8_t* value, offset_type length) {
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
+    return Status::OK();
+  }
+
+  Status ExtendCurrent(util::string_view value) {
+    return ExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                         static_cast<offset_type>(value.size()));
+  }
+
   Status AppendNulls(int64_t length) final {
     const int64_t num_bytes = value_data_builder_.length();
     ARROW_RETURN_NOT_OK(Reserve(length));
@@ -133,12 +150,28 @@ class BaseBinaryBuilder : public ArrayBuilder {
     UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
   }
 
+  /// Like ExtendCurrent, but do not check capacity
+  void UnsafeExtendCurrent(const uint8_t* value, offset_type length) {
+    value_data_builder_.UnsafeAppend(value, length);
+  }
+
+  void UnsafeExtendCurrent(util::string_view value) {
+    UnsafeExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                        static_cast<offset_type>(value.size()));
+  }
+
   void UnsafeAppendNull() {
     const int64_t num_bytes = value_data_builder_.length();
     offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
     UnsafeAppendToBitmap(false);
   }
 
+  void UnsafeAppendEmptyValue() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    UnsafeAppendToBitmap(true);
+  }
+
   /// \brief Append a sequence of strings in one shot.
   ///
   /// \param[in] values a vector of strings
@@ -258,14 +291,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
   }
 
   Status Resize(int64_t capacity) override {
-    // XXX Why is this check necessary?  There is no reason to disallow, say,
-    // binary arrays with more than 2**31 empty or null values.
-    if (capacity > memory_limit()) {
-      return Status::CapacityError("BinaryBuilder cannot reserve space for more than ",
-                                   memory_limit(), " child elements, got ", capacity);
-    }
     ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
-
     // One more than requested for offsets
     ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
     return ArrayBuilder::Resize(capacity);
@@ -441,6 +467,14 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
+  Status Append(const Buffer& s) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(util::string_view(s));
+    return Status::OK();
+  }
+
+  Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }
+
   template <size_t NBYTES>
   Status Append(const std::array<uint8_t, NBYTES>& value) {
     ARROW_RETURN_NOT_OK(Reserve(1));
@@ -476,6 +510,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
     UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
   }
 
+  void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }
+
+  void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }
+
   void UnsafeAppendNull() {
     UnsafeAppendToBitmap(false);
     byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
diff --git a/cpp/src/arrow/array/builder_decimal.h b/cpp/src/arrow/array/builder_decimal.h
index 8c75e7dd674..f48392ed001 100644
--- a/cpp/src/arrow/array/builder_decimal.h
+++ b/cpp/src/arrow/array/builder_decimal.h
@@ -32,6 +32,7 @@ namespace arrow {
 class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
  public:
   using TypeClass = Decimal128Type;
+  using ValueType = Decimal128;
 
   explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
                              MemoryPool* pool = default_memory_pool());
@@ -61,6 +62,7 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
 class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
  public:
   using TypeClass = Decimal256Type;
+  using ValueType = Decimal256;
 
   explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
                              MemoryPool* pool = default_memory_pool());
diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h
index 40d6ce1ba9a..455cb3df7b1 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -29,6 +29,7 @@
 #include "arrow/array/builder_primitive.h"  // IWYU pragma: export
 #include "arrow/array/data.h"
 #include "arrow/array/util.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/array/builder_primitive.cc b/cpp/src/arrow/array/builder_primitive.cc
index 037a1ecbf91..e403c42411d 100644
--- a/cpp/src/arrow/array/builder_primitive.cc
+++ b/cpp/src/arrow/array/builder_primitive.cc
@@ -65,9 +65,8 @@ Status BooleanBuilder::Resize(int64_t capacity) {
 }
 
 Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  std::shared_ptr<Buffer> null_bitmap, data;
-  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-  RETURN_NOT_OK(data_builder_.Finish(&data));
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_));
+  ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
 
   *out = ArrayData::Make(boolean(), length_, {null_bitmap, data}, null_count_);
 
diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h
index e10f11fdd6c..e0f39f97967 100644
--- a/cpp/src/arrow/array/builder_primitive.h
+++ b/cpp/src/arrow/array/builder_primitive.h
@@ -23,6 +23,7 @@
 
 #include "arrow/array/builder_base.h"
 #include "arrow/array/data.h"
+#include "arrow/result.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 
@@ -185,9 +186,9 @@ class NumericBuilder : public ArrayBuilder {
   }
 
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
-    std::shared_ptr<Buffer> data, null_bitmap;
-    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-    ARROW_RETURN_NOT_OK(data_builder_.Finish(&data));
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap,
+                          null_bitmap_builder_.FinishWithLength(length_));
+    ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
     *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
     capacity_ = length_ = null_count_ = 0;
     return Status::OK();
diff --git a/cpp/src/arrow/array/builder_union.cc b/cpp/src/arrow/array/builder_union.cc
index 90d4f42084a..8617cb73fce 100644
--- a/cpp/src/arrow/array/builder_union.cc
+++ b/cpp/src/arrow/array/builder_union.cc
@@ -65,8 +65,8 @@ BasicUnionBuilder::BasicUnionBuilder(
   children_ = children;
 
   type_id_to_children_.resize(union_type.max_type_code() + 1, nullptr);
-  DCHECK_LT(
-      type_id_to_children_.size(),
+  DCHECK_LE(
+      type_id_to_children_.size() - 1,
       static_cast<decltype(type_id_to_children_)::size_type>(UnionType::kMaxTypeCode));
 
   for (size_t i = 0; i < children.size(); ++i) {
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index 32478783394..e2a5898c209 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -482,9 +482,4 @@ Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool
   return MakeArray(std::move(out_data));
 }
 
-Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
-                   std::shared_ptr<Array>* out) {
-  return Concatenate(arrays, pool).Value(out);
-}
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/concatenate.h b/cpp/src/arrow/array/concatenate.h
index a6c1c3cf3c1..e7597aad812 100644
--- a/cpp/src/arrow/array/concatenate.h
+++ b/cpp/src/arrow/array/concatenate.h
@@ -34,9 +34,4 @@ ARROW_EXPORT
 Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays,
                                            MemoryPool* pool = default_memory_pool());
 
-ARROW_DEPRECATED("Use Result-returning version")
-ARROW_EXPORT
-Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
-                   std::shared_ptr<Array>* out);
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index e397a752cd8..5a214473972 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -56,41 +56,39 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length,
   }
 }
 
-std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>& type,
-                                           int64_t length,
+std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
                                            std::vector<std::shared_ptr<Buffer>> buffers,
                                            int64_t null_count, int64_t offset) {
   AdjustNonNullable(type->id(), length, &buffers, &null_count);
-  return std::make_shared<ArrayData>(type, length, std::move(buffers), null_count,
-                                     offset);
+  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
+                                     null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Make(
-    const std::shared_ptr<DataType>& type, int64_t length,
+    std::shared_ptr<DataType> type, int64_t length,
     std::vector<std::shared_ptr<Buffer>> buffers,
     std::vector<std::shared_ptr<ArrayData>> child_data, int64_t null_count,
     int64_t offset) {
   AdjustNonNullable(type->id(), length, &buffers, &null_count);
-  return std::make_shared<ArrayData>(type, length, std::move(buffers),
+  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
                                      std::move(child_data), null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Make(
-    const std::shared_ptr<DataType>& type, int64_t length,
+    std::shared_ptr<DataType> type, int64_t length,
     std::vector<std::shared_ptr<Buffer>> buffers,
     std::vector<std::shared_ptr<ArrayData>> child_data,
     std::shared_ptr<ArrayData> dictionary, int64_t null_count, int64_t offset) {
   AdjustNonNullable(type->id(), length, &buffers, &null_count);
-  auto data = std::make_shared<ArrayData>(type, length, std::move(buffers),
+  auto data = std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
                                           std::move(child_data), null_count, offset);
   data->dictionary = std::move(dictionary);
   return data;
 }
 
-std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>& type,
-                                           int64_t length, int64_t null_count,
-                                           int64_t offset) {
-  return std::make_shared<ArrayData>(type, length, null_count, offset);
+std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
+                                           int64_t null_count, int64_t offset) {
+  return std::make_shared<ArrayData>(std::move(type), length, null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index 02a49949e1f..418d09def6b 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -71,49 +71,47 @@ constexpr int64_t kUnknownNullCount = -1;
 /// input array and replace them with newly-allocated data, changing the output
 /// data type as well.
 struct ARROW_EXPORT ArrayData {
-  ArrayData() : length(0), null_count(0), offset(0) {}
+  ArrayData() = default;
 
-  ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : type(type), length(length), null_count(null_count), offset(offset) {}
+      : type(std::move(type)), length(length), null_count(null_count), offset(offset) {}
 
-  ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             std::vector<std::shared_ptr<Buffer>> buffers,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : ArrayData(type, length, null_count, offset) {
+      : ArrayData(std::move(type), length, null_count, offset) {
     this->buffers = std::move(buffers);
   }
 
-  ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             std::vector<std::shared_ptr<Buffer>> buffers,
             std::vector<std::shared_ptr<ArrayData>> child_data,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : ArrayData(type, length, null_count, offset) {
+      : ArrayData(std::move(type), length, null_count, offset) {
     this->buffers = std::move(buffers);
     this->child_data = std::move(child_data);
   }
 
-  static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
-                                         int64_t length,
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
                                          std::vector<std::shared_ptr<Buffer>> buffers,
                                          int64_t null_count = kUnknownNullCount,
                                          int64_t offset = 0);
 
   static std::shared_ptr<ArrayData> Make(
-      const std::shared_ptr<DataType>& type, int64_t length,
+      std::shared_ptr<DataType> type, int64_t length,
       std::vector<std::shared_ptr<Buffer>> buffers,
       std::vector<std::shared_ptr<ArrayData>> child_data,
       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
   static std::shared_ptr<ArrayData> Make(
-      const std::shared_ptr<DataType>& type, int64_t length,
+      std::shared_ptr<DataType> type, int64_t length,
       std::vector<std::shared_ptr<Buffer>> buffers,
       std::vector<std::shared_ptr<ArrayData>> child_data,
       std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
       int64_t offset = 0);
 
-  static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
-                                         int64_t length,
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
                                          int64_t null_count = kUnknownNullCount,
                                          int64_t offset = 0);
 
@@ -232,11 +230,11 @@ struct ARROW_EXPORT ArrayData {
   }
 
   std::shared_ptr<DataType> type;
-  int64_t length;
-  mutable std::atomic<int64_t> null_count;
+  int64_t length = 0;
+  mutable std::atomic<int64_t> null_count{0};
   // The logical start point into the physical buffers (in values, not bytes).
   // Note that, for child data, this must be *added* to the child data's own offset.
-  int64_t offset;
+  int64_t offset = 0;
   std::vector<std::shared_ptr<Buffer>> buffers;
   std::vector<std::shared_ptr<ArrayData>> child_data;
 
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index 297745a2b17..fae379e51f4 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -34,6 +34,7 @@
 #include "arrow/buffer.h"
 #include "arrow/buffer_builder.h"
 #include "arrow/extension_type.h"
+#include "arrow/result.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -286,7 +287,7 @@ std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data) {
 // ----------------------------------------------------------------------
 // Misc APIs
 
-namespace internal {
+namespace {
 
 // get the maximum buffer length required, then allocate a single zeroed buffer
 // to use anywhere a buffer is required
@@ -510,16 +511,26 @@ class RepeatedArrayFactory {
   }
 
   template <typename T>
-  enable_if_t<is_number_type<T>::value || is_fixed_size_binary_type<T>::value ||
-                  is_temporal_type<T>::value,
-              Status>
-  Visit(const T&) {
+  enable_if_t<is_number_type<T>::value || is_temporal_type<T>::value, Status> Visit(
+      const T&) {
     auto value = checked_cast<const typename TypeTraits<T>::ScalarType&>(scalar_).value;
     return FinishFixedWidth(&value, sizeof(value));
   }
 
-  Status Visit(const Decimal128Type&) {
-    auto value = checked_cast<const Decimal128Scalar&>(scalar_).value.ToBytes();
+  Status Visit(const FixedSizeBinaryType& type) {
+    auto value = checked_cast<const FixedSizeBinaryScalar&>(scalar_).value;
+    return FinishFixedWidth(value->data(), type.byte_width());
+  }
+
+  template <typename T>
+  enable_if_decimal<T, Status> Visit(const T&) {
+    using ScalarType = typename TypeTraits<T>::ScalarType;
+    auto value = checked_cast<const ScalarType&>(scalar_).value.ToBytes();
+    return FinishFixedWidth(value.data(), value.size());
+  }
+
+  Status Visit(const Decimal256Type&) {
+    auto value = checked_cast<const Decimal256Scalar&>(scalar_).value.ToBytes();
     return FinishFixedWidth(value.data(), value.size());
   }
 
@@ -603,18 +614,85 @@ class RepeatedArrayFactory {
     return Status::OK();
   }
 
-  Status Visit(const ExtensionType& type) {
-    return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
+  Status Visit(const SparseUnionType& type) {
+    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
+    const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
+    const auto scalar_type_code = union_scalar.type_code;
+    const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
+
+    // Create child arrays: most of them are all-null, except for the child array
+    // for the given type code (if the scalar is valid).
+    ArrayVector fields;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      fields.emplace_back();
+      if (i == scalar_child_id && scalar_.is_valid) {
+        ARROW_ASSIGN_OR_RAISE(fields.back(),
+                              MakeArrayFromScalar(*union_scalar.value, length_, pool_));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(
+            fields.back(), MakeArrayOfNull(union_type.field(i)->type(), length_, pool_));
+      }
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto type_codes_buffer, CreateUnionTypeCodes(scalar_type_code));
+
+    out_ = std::make_shared<SparseUnionArray>(scalar_.type, length_, std::move(fields),
+                                              std::move(type_codes_buffer));
+    return Status::OK();
   }
 
   Status Visit(const DenseUnionType& type) {
-    return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
+    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
+    const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
+    const auto scalar_type_code = union_scalar.type_code;
+    const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
+
+    // Create child arrays: all of them are empty, except for the child array
+    // for the given type code (if length > 0).
+    ArrayVector fields;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      fields.emplace_back();
+      if (i == scalar_child_id && length_ > 0) {
+        if (scalar_.is_valid) {
+          // One valid element (will be referenced by multiple offsets)
+          ARROW_ASSIGN_OR_RAISE(fields.back(),
+                                MakeArrayFromScalar(*union_scalar.value, 1, pool_));
+        } else {
+          // One null element (will be referenced by multiple offsets)
+          ARROW_ASSIGN_OR_RAISE(fields.back(),
+                                MakeArrayOfNull(union_type.field(i)->type(), 1, pool_));
+        }
+      } else {
+        // Zero element (will not be referenced by any offset)
+        ARROW_ASSIGN_OR_RAISE(fields.back(),
+                              MakeArrayOfNull(union_type.field(i)->type(), 0, pool_));
+      }
+    }
+
+    // Create an offsets buffer with all offsets equal to 0
+    ARROW_ASSIGN_OR_RAISE(auto offsets_buffer,
+                          AllocateBuffer(length_ * sizeof(int32_t), pool_));
+    memset(offsets_buffer->mutable_data(), 0, offsets_buffer->size());
+
+    ARROW_ASSIGN_OR_RAISE(auto type_codes_buffer, CreateUnionTypeCodes(scalar_type_code));
+
+    out_ = std::make_shared<DenseUnionArray>(scalar_.type, length_, std::move(fields),
+                                             std::move(type_codes_buffer),
+                                             std::move(offsets_buffer));
+    return Status::OK();
   }
 
-  Status Visit(const SparseUnionType& type) {
+  Status Visit(const ExtensionType& type) {
     return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
   }
 
+  Result<std::shared_ptr<Buffer>> CreateUnionTypeCodes(int8_t type_code) {
+    TypedBufferBuilder<int8_t> builder(pool_);
+    RETURN_NOT_OK(builder.Resize(length_));
+    builder.UnsafeAppend(length_, type_code);
+    return builder.Finish();
+  }
+
   template <typename OffsetType>
   Status CreateOffsetsBuffer(OffsetType value_length, std::shared_ptr<Buffer>* out) {
     TypedBufferBuilder<OffsetType> builder(pool_);
@@ -650,12 +728,11 @@ class RepeatedArrayFactory {
   std::shared_ptr<Array> out_;
 };
 
-}  // namespace internal
+}  // namespace
 
 Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
                                                int64_t length, MemoryPool* pool) {
-  ARROW_ASSIGN_OR_RAISE(auto data,
-                        internal::NullArrayFactory(pool, type, length).Create());
+  ARROW_ASSIGN_OR_RAISE(auto data, NullArrayFactory(pool, type, length).Create());
   return MakeArray(data);
 }
 
@@ -664,7 +741,7 @@ Result<std::shared_ptr<Array>> MakeArrayFromScalar(const Scalar& scalar, int64_t
   if (!scalar.is_valid) {
     return MakeArrayOfNull(scalar.type, length, pool);
   }
-  return internal::RepeatedArrayFactory(pool, scalar, length).Create();
+  return RepeatedArrayFactory(pool, scalar, length).Create();
 }
 
 namespace internal {
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 6ac885f8443..0ffba4a5071 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -85,9 +85,9 @@ struct ValidateArrayImpl {
 
     int64_t expected_values_length = -1;
     if (MultiplyWithOverflow(data.length, list_size, &expected_values_length) ||
-        values.length != expected_values_length) {
+        values.length < expected_values_length) {
       return Status::Invalid("Values length (", values.length,
-                             ") is not equal to the length (", data.length,
+                             ") is less than the length (", data.length,
                              ") multiplied by the value size (", list_size, ")");
     }
 
@@ -555,7 +555,7 @@ struct ValidateArrayFullImpl {
       const ArrayData& field = *data.child_data[i];
       const Status field_valid = ValidateArrayFull(field);
       if (!field_valid.ok()) {
-        return Status::Invalid("Struct child array #", i,
+        return Status::Invalid("Union child array #", i,
                                " invalid: ", field_valid.ToString());
       }
     }
diff --git a/cpp/src/arrow/arrow.pc.in b/cpp/src/arrow/arrow.pc.in
index 947d534fdbf..ef995fdc3db 100644
--- a/cpp/src/arrow/arrow.pc.in
+++ b/cpp/src/arrow/arrow.pc.in
@@ -25,5 +25,7 @@ full_so_version=@ARROW_FULL_SO_VERSION@
 Name: Apache Arrow
 Description: Arrow is a set of technologies that enable big-data systems to process and move data fast.
 Version: @ARROW_VERSION@
+Requires.private:@ARROW_PC_REQUIRES_PRIVATE@
 Libs: -L${libdir} -larrow
+Libs.private:@ARROW_PC_LIBS_PRIVATE@
 Cflags: -I${includedir}
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 9215d9ab544..b1b2945d0f5 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -21,7 +21,6 @@
 #include <cstdint>
 #include <utility>
 
-#include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/util/bit_util.h"
@@ -171,111 +170,6 @@ MutableBuffer::MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_
   parent_ = parent;
 }
 
-// -----------------------------------------------------------------------
-// Pool buffer and allocation
-
-/// A Buffer whose lifetime is tied to a particular MemoryPool
-class PoolBuffer : public ResizableBuffer {
- public:
-  explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool)
-      : ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {}
-
-  ~PoolBuffer() override {
-    if (mutable_data_ != nullptr) {
-      pool_->Free(mutable_data_, capacity_);
-    }
-  }
-
-  Status Reserve(const int64_t capacity) override {
-    if (capacity < 0) {
-      return Status::Invalid("Negative buffer capacity: ", capacity);
-    }
-    if (!mutable_data_ || capacity > capacity_) {
-      uint8_t* new_data;
-      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
-      if (mutable_data_) {
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
-      } else {
-        RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
-        mutable_data_ = new_data;
-      }
-      data_ = mutable_data_;
-      capacity_ = new_capacity;
-    }
-    return Status::OK();
-  }
-
-  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
-    if (ARROW_PREDICT_FALSE(new_size < 0)) {
-      return Status::Invalid("Negative buffer resize: ", new_size);
-    }
-    if (mutable_data_ && shrink_to_fit && new_size <= size_) {
-      // Buffer is non-null and is not growing, so shrink to the requested size without
-      // excess space.
-      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
-      if (capacity_ != new_capacity) {
-        // Buffer hasn't got yet the requested size.
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
-        data_ = mutable_data_;
-        capacity_ = new_capacity;
-      }
-    } else {
-      RETURN_NOT_OK(Reserve(new_size));
-    }
-    size_ = new_size;
-
-    return Status::OK();
-  }
-
-  static std::shared_ptr<PoolBuffer> MakeShared(MemoryPool* pool) {
-    std::shared_ptr<MemoryManager> mm;
-    if (pool == nullptr) {
-      pool = default_memory_pool();
-      mm = default_cpu_memory_manager();
-    } else {
-      mm = CPUDevice::memory_manager(pool);
-    }
-    return std::make_shared<PoolBuffer>(std::move(mm), pool);
-  }
-
-  static std::unique_ptr<PoolBuffer> MakeUnique(MemoryPool* pool) {
-    std::shared_ptr<MemoryManager> mm;
-    if (pool == nullptr) {
-      pool = default_memory_pool();
-      mm = default_cpu_memory_manager();
-    } else {
-      mm = CPUDevice::memory_manager(pool);
-    }
-    return std::unique_ptr<PoolBuffer>(new PoolBuffer(std::move(mm), pool));
-  }
-
- private:
-  MemoryPool* pool_;
-};
-
-namespace {
-// A utility that does most of the work of the `AllocateBuffer` and
-// `AllocateResizableBuffer` methods. The argument `buffer` should be a smart pointer to
-// a PoolBuffer.
-template <typename BufferPtr, typename PoolBufferPtr>
-inline Result<BufferPtr> ResizePoolBuffer(PoolBufferPtr&& buffer, const int64_t size) {
-  RETURN_NOT_OK(buffer->Resize(size));
-  buffer->ZeroPadding();
-  return std::move(buffer);
-}
-
-}  // namespace
-
-Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size, MemoryPool* pool) {
-  return ResizePoolBuffer<std::unique_ptr<Buffer>>(PoolBuffer::MakeUnique(pool), size);
-}
-
-Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size,
-                                                                 MemoryPool* pool) {
-  return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(PoolBuffer::MakeUnique(pool),
-                                                            size);
-}
-
 Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length, MemoryPool* pool) {
   ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(BitUtil::BytesForBits(length), pool));
   // Zero out any trailing bits
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 1a3bb29e439..cfd525ab2d6 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -56,23 +56,13 @@ class ARROW_EXPORT Buffer {
   ///
   /// \note The passed memory must be kept alive through some other means
   Buffer(const uint8_t* data, int64_t size)
-      : is_mutable_(false),
-        is_cpu_(true),
-        data_(data),
-        mutable_data_(NULLPTR),
-        size_(size),
-        capacity_(size) {
+      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), capacity_(size) {
     SetMemoryManager(default_cpu_memory_manager());
   }
 
   Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
          std::shared_ptr<Buffer> parent = NULLPTR)
-      : is_mutable_(false),
-        data_(data),
-        mutable_data_(NULLPTR),
-        size_(size),
-        capacity_(size),
-        parent_(parent) {
+      : is_mutable_(false), data_(data), size_(size), capacity_(size), parent_(parent) {
     SetMemoryManager(std::move(mm));
   }
 
@@ -131,7 +121,7 @@ class ARROW_EXPORT Buffer {
 #endif
     // A zero-capacity buffer can have a null data pointer
     if (capacity_ != 0) {
-      memset(mutable_data_ + size_, 0, static_cast<size_t>(capacity_ - size_));
+      memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_));
     }
   }
 
@@ -205,7 +195,8 @@ class ARROW_EXPORT Buffer {
     CheckCPU();
     CheckMutable();
 #endif
-    return ARROW_PREDICT_TRUE(is_cpu_) ? mutable_data_ : NULLPTR;
+    return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_)
+                                                      : NULLPTR;
   }
 
   /// \brief Return the device address of the buffer's data
@@ -219,7 +210,7 @@ class ARROW_EXPORT Buffer {
 #ifndef NDEBUG
     CheckMutable();
 #endif
-    return reinterpret_cast<uintptr_t>(mutable_data_);
+    return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0;
   }
 
   /// \brief Return the buffer's size in bytes
@@ -289,7 +280,6 @@ class ARROW_EXPORT Buffer {
   bool is_mutable_;
   bool is_cpu_;
   const uint8_t* data_;
-  uint8_t* mutable_data_;
   int64_t size_;
   int64_t capacity_;
 
@@ -389,13 +379,11 @@ Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
 class ARROW_EXPORT MutableBuffer : public Buffer {
  public:
   MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
-    mutable_data_ = data;
     is_mutable_ = true;
   }
 
   MutableBuffer(uint8_t* data, const int64_t size, std::shared_ptr<MemoryManager> mm)
       : Buffer(data, size, std::move(mm)) {
-    mutable_data_ = data;
     is_mutable_ = true;
   }
 
@@ -428,7 +416,10 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
   ///
   /// @param new_size The new size for the buffer.
   /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
-  virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
+  virtual Status Resize(const int64_t new_size, bool shrink_to_fit) = 0;
+  Status Resize(const int64_t new_size) {
+    return Resize(new_size, /*shrink_to_fit=*/true);
+  }
 
   /// Ensure that buffer has enough memory allocated to fit the indicated
   /// capacity (and meets the 64 byte padding requirement in Layout.md).
diff --git a/cpp/src/arrow/buffer_builder.h b/cpp/src/arrow/buffer_builder.h
index f525ec23c58..eb3f68affc0 100644
--- a/cpp/src/arrow/buffer_builder.h
+++ b/cpp/src/arrow/buffer_builder.h
@@ -45,8 +45,7 @@ class ARROW_EXPORT BufferBuilder {
   explicit BufferBuilder(MemoryPool* pool = default_memory_pool())
       : pool_(pool),
         data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
-              &util::internal::non_null_filler),
-
+              util::MakeNonNull<uint8_t>()),
         capacity_(0),
         size_(0) {}
 
@@ -64,15 +63,12 @@ class ARROW_EXPORT BufferBuilder {
   /// \brief Resize the buffer to the nearest multiple of 64 bytes
   ///
   /// \param new_capacity the new capacity of the of the builder. Will be
-  /// rounded up to a multiple of 64 bytes for padding \param shrink_to_fit if
-  /// new capacity is smaller than the existing size, reallocate internal
-  /// buffer. Set to false to avoid reallocations when shrinking the builder.
+  /// rounded up to a multiple of 64 bytes for padding
+  /// \param shrink_to_fit if new capacity is smaller than the existing,
+  /// reallocate internal buffer. Set to false to avoid reallocations when
+  /// shrinking the builder.
   /// \return Status
   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
-    // Resize(0) is a no-op
-    if (new_capacity == 0) {
-      return Status::OK();
-    }
     if (buffer_ == NULLPTR) {
       ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(new_capacity, pool_));
     } else {
@@ -168,6 +164,17 @@ class ARROW_EXPORT BufferBuilder {
     return out;
   }
 
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using BufferBuilder
+  /// mostly for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    size_ = final_length;
+    return Finish(shrink_to_fit);
+  }
+
   void Reset() {
     buffer_ = NULLPTR;
     capacity_ = size_ = 0;
@@ -273,6 +280,16 @@ class TypedBufferBuilder<
     return out;
   }
 
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
+  }
+
   void Reset() { bytes_builder_.Reset(); }
 
   int64_t length() const { return bytes_builder_.length() / sizeof(T); }
@@ -399,6 +416,19 @@ class TypedBufferBuilder<bool> {
     return out;
   }
 
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    const auto final_byte_length = BitUtil::BytesForBits(final_length);
+    bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length());
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
+  }
+
   void Reset() {
     bytes_builder_.Reset();
     bit_length_ = false_count_ = 0;
diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc
index 02b96c3b493..4295d4ca692 100644
--- a/cpp/src/arrow/buffer_test.cc
+++ b/cpp/src/arrow/buffer_test.cc
@@ -653,18 +653,77 @@ TEST(TestBufferBuilder, ResizeReserve) {
 
   ASSERT_OK(builder.Resize(128));
   ASSERT_EQ(128, builder.capacity());
+  ASSERT_EQ(9, builder.length());
 
   // Do not shrink to fit
   ASSERT_OK(builder.Resize(64, false));
   ASSERT_EQ(128, builder.capacity());
+  ASSERT_EQ(9, builder.length());
 
   // Shrink to fit
   ASSERT_OK(builder.Resize(64));
   ASSERT_EQ(64, builder.capacity());
+  ASSERT_EQ(9, builder.length());
 
   // Reserve elements
   ASSERT_OK(builder.Reserve(60));
   ASSERT_EQ(128, builder.capacity());
+  ASSERT_EQ(9, builder.length());
+}
+
+TEST(TestBufferBuilder, Finish) {
+  const std::string data = "some data";
+  auto data_ptr = data.c_str();
+
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_OK(builder.Append(data_ptr, 9));
+    ASSERT_OK(builder.Append(data_ptr, 9));
+    ASSERT_EQ(18, builder.length());
+    ASSERT_EQ(64, builder.capacity());
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 18);
+    ASSERT_EQ(buf->capacity(), 64);
+  }
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_OK(builder.Reserve(1024));
+    builder.UnsafeAppend(data_ptr, 9);
+    builder.UnsafeAppend(data_ptr, 9);
+    ASSERT_EQ(18, builder.length());
+    ASSERT_EQ(builder.capacity(), 1024);
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 18);
+    ASSERT_EQ(buf->capacity(), shrink_to_fit ? 64 : 1024);
+  }
+}
+
+TEST(TestBufferBuilder, FinishEmpty) {
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_EQ(0, builder.length());
+    ASSERT_EQ(0, builder.capacity());
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 0);
+    ASSERT_EQ(buf->capacity(), 0);
+  }
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_OK(builder.Reserve(1024));
+    ASSERT_EQ(0, builder.length());
+    ASSERT_EQ(1024, builder.capacity());
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 0);
+    ASSERT_EQ(buf->capacity(), shrink_to_fit ? 0 : 1024);
+  }
 }
 
 template <typename T>
@@ -717,7 +776,7 @@ TYPED_TEST(TypedTestBufferBuilder, AppendCopies) {
   }
 }
 
-TEST(TestBufferBuilder, BasicBoolBufferBuilderUsage) {
+TEST(TestBoolBufferBuilder, Basics) {
   TypedBufferBuilder<bool> builder;
 
   ASSERT_OK(builder.Append(false));
@@ -746,7 +805,7 @@ TEST(TestBufferBuilder, BasicBoolBufferBuilderUsage) {
   ASSERT_EQ(built->size(), BitUtil::BytesForBits(nvalues + 1));
 }
 
-TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) {
+TEST(TestBoolBufferBuilder, AppendCopies) {
   TypedBufferBuilder<bool> builder;
 
   ASSERT_OK(builder.Append(13, true));
@@ -766,6 +825,21 @@ TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) {
   ASSERT_EQ(built->size(), BitUtil::BytesForBits(13 + 17));
 }
 
+TEST(TestBoolBufferBuilder, Reserve) {
+  TypedBufferBuilder<bool> builder;
+
+  ASSERT_OK(builder.Reserve(13 + 17));
+  builder.UnsafeAppend(13, true);
+  builder.UnsafeAppend(17, false);
+  ASSERT_EQ(builder.length(), 13 + 17);
+  ASSERT_EQ(builder.capacity(), 64 * 8);
+  ASSERT_EQ(builder.false_count(), 17);
+
+  ASSERT_OK_AND_ASSIGN(auto built, builder.Finish());
+  AssertIsCPUBuffer(*built);
+  ASSERT_EQ(built->size(), BitUtil::BytesForBits(13 + 17));
+}
+
 template <typename T>
 class TypedTestBuffer : public ::testing::Test {};
 
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 5cb3e577235..a43bf8104f2 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -984,11 +984,11 @@ struct SchemaImporter {
     if (prec_scale.size() != 2 && prec_scale.size() != 3) {
       return f_parser_.Invalid();
     }
-    if (prec_scale[0] <= 0 || prec_scale[1] <= 0) {
+    if (prec_scale[0] <= 0) {
       return f_parser_.Invalid();
     }
     if (prec_scale.size() == 2 || prec_scale[2] == 128) {
-      type_ = decimal(prec_scale[0], prec_scale[1]);
+      type_ = decimal128(prec_scale[0], prec_scale[1]);
     } else if (prec_scale[2] == 256) {
       type_ = decimal256(prec_scale[0], prec_scale[1]);
     } else {
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 317fd01f17c..54ce0efcf9d 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -283,6 +283,12 @@ TEST_F(TestSchemaExport, Primitive) {
 
   TestPrimitive(decimal(16, 4), "d:16,4");
   TestPrimitive(decimal256(16, 4), "d:16,4,256");
+
+  TestPrimitive(decimal(15, 0), "d:15,0");
+  TestPrimitive(decimal256(15, 0), "d:15,0,256");
+
+  TestPrimitive(decimal(15, -4), "d:15,-4");
+  TestPrimitive(decimal256(15, -4), "d:15,-4,256");
 }
 
 TEST_F(TestSchemaExport, Temporal) {
@@ -1196,6 +1202,20 @@ TEST_F(TestSchemaImport, Primitive) {
   CheckImport(field("", decimal128(16, 4)));
   FillPrimitive("d:16,4,256");
   CheckImport(field("", decimal256(16, 4)));
+
+  FillPrimitive("d:16,0");
+  CheckImport(field("", decimal128(16, 0)));
+  FillPrimitive("d:16,0,128");
+  CheckImport(field("", decimal128(16, 0)));
+  FillPrimitive("d:16,0,256");
+  CheckImport(field("", decimal256(16, 0)));
+
+  FillPrimitive("d:16,-4");
+  CheckImport(field("", decimal128(16, -4)));
+  FillPrimitive("d:16,-4,128");
+  CheckImport(field("", decimal128(16, -4)));
+  FillPrimitive("d:16,-4,256");
+  CheckImport(field("", decimal256(16, -4)));
 }
 
 TEST_F(TestSchemaImport, Temporal) {
@@ -1395,6 +1415,8 @@ TEST_F(TestSchemaImport, FormatStringError) {
   CheckImportError();
   FillPrimitive("d:15.4");
   CheckImportError();
+  FillPrimitive("d:15,z");
+  CheckImportError();
   FillPrimitive("t");
   CheckImportError();
   FillPrimitive("td");
@@ -2382,9 +2404,12 @@ TEST_F(TestSchemaRoundtrip, Primitive) {
   TestWithTypeFactory(boolean);
   TestWithTypeFactory(float16);
 
-  TestWithTypeFactory(std::bind(decimal, 19, 4));
   TestWithTypeFactory(std::bind(decimal128, 19, 4));
   TestWithTypeFactory(std::bind(decimal256, 19, 4));
+  TestWithTypeFactory(std::bind(decimal128, 19, 0));
+  TestWithTypeFactory(std::bind(decimal256, 19, 0));
+  TestWithTypeFactory(std::bind(decimal128, 19, -5));
+  TestWithTypeFactory(std::bind(decimal256, 19, -5));
   TestWithTypeFactory(std::bind(fixed_size_binary, 3));
   TestWithTypeFactory(binary);
   TestWithTypeFactory(large_utf8);
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index b259b05d7cf..142bd0d8c89 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -118,6 +118,33 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
   return Equals(*other.get());
 }
 
+bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
+                                const EqualOptions& equal_options) const {
+  if (length_ != other.length()) {
+    return false;
+  }
+  if (null_count_ != other.null_count()) {
+    return false;
+  }
+  // We cannot toggle check_metadata here yet, so we don't check it
+  if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
+    return false;
+  }
+
+  // Check contents of the underlying arrays. This checks for equality of
+  // the underlying data independently of the chunk size.
+  return internal::ApplyBinaryChunked(
+             *this, other,
+             [&](const Array& left_piece, const Array& right_piece,
+                 int64_t ARROW_ARG_UNUSED(position)) {
+               if (!left_piece.ApproxEquals(right_piece, equal_options)) {
+                 return Status::Invalid("Unequal piece");
+               }
+               return Status::OK();
+             })
+      .ok();
+}
+
 std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
   ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length";
   bool offset_equals_length = offset == length_;
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 5c0dda91850..2ace045c2bf 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -23,6 +23,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compare.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
@@ -136,6 +137,9 @@ class ARROW_EXPORT ChunkedArray {
   bool Equals(const ChunkedArray& other) const;
   /// \brief Determine if two chunked arrays are equal.
   bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
+  /// \brief Determine if two chunked arrays approximately equal
+  bool ApproxEquals(const ChunkedArray& other,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
 
   /// \return PrettyPrint representation suitable for debugging
   std::string ToString() const;
diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt
index e781dff90e2..897dc32f357 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -68,3 +68,5 @@ add_arrow_compute_test(internals_test
 add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")
 
 add_subdirectory(kernels)
+
+add_subdirectory(exec)
diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc
index 5afa1048960..1b00c366bfd 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -18,35 +18,151 @@
 #include "arrow/compute/api_aggregate.h"
 
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/compute/util_internal.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
+
+namespace internal {
+template <>
+struct EnumTraits<compute::QuantileOptions::Interpolation>
+    : BasicEnumTraits<compute::QuantileOptions::Interpolation,
+                      compute::QuantileOptions::LINEAR, compute::QuantileOptions::LOWER,
+                      compute::QuantileOptions::HIGHER, compute::QuantileOptions::NEAREST,
+                      compute::QuantileOptions::MIDPOINT> {
+  static std::string name() { return "QuantileOptions::Interpolation"; }
+  static std::string value_name(compute::QuantileOptions::Interpolation value) {
+    switch (value) {
+      case compute::QuantileOptions::LINEAR:
+        return "LINEAR";
+      case compute::QuantileOptions::LOWER:
+        return "LOWER";
+      case compute::QuantileOptions::HIGHER:
+        return "HIGHER";
+      case compute::QuantileOptions::NEAREST:
+        return "NEAREST";
+      case compute::QuantileOptions::MIDPOINT:
+        return "MIDPOINT";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
+// ----------------------------------------------------------------------
+// Function options
+
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kScalarAggregateOptionsType = GetFunctionOptionsType<ScalarAggregateOptions>(
+    DataMember("skip_nulls", &ScalarAggregateOptions::skip_nulls),
+    DataMember("min_count", &ScalarAggregateOptions::min_count));
+static auto kModeOptionsType =
+    GetFunctionOptionsType<ModeOptions>(DataMember("n", &ModeOptions::n));
+static auto kVarianceOptionsType =
+    GetFunctionOptionsType<VarianceOptions>(DataMember("ddof", &VarianceOptions::ddof));
+static auto kQuantileOptionsType = GetFunctionOptionsType<QuantileOptions>(
+    DataMember("q", &QuantileOptions::q),
+    DataMember("interpolation", &QuantileOptions::interpolation));
+static auto kTDigestOptionsType = GetFunctionOptionsType<TDigestOptions>(
+    DataMember("q", &TDigestOptions::q), DataMember("delta", &TDigestOptions::delta),
+    DataMember("buffer_size", &TDigestOptions::buffer_size));
+static auto kIndexOptionsType =
+    GetFunctionOptionsType<IndexOptions>(DataMember("value", &IndexOptions::value));
+}  // namespace
+}  // namespace internal
+
+ScalarAggregateOptions::ScalarAggregateOptions(bool skip_nulls, uint32_t min_count)
+    : FunctionOptions(internal::kScalarAggregateOptionsType),
+      skip_nulls(skip_nulls),
+      min_count(min_count) {}
+constexpr char ScalarAggregateOptions::kTypeName[];
+
+ModeOptions::ModeOptions(int64_t n) : FunctionOptions(internal::kModeOptionsType), n(n) {}
+constexpr char ModeOptions::kTypeName[];
+
+VarianceOptions::VarianceOptions(int ddof)
+    : FunctionOptions(internal::kVarianceOptionsType), ddof(ddof) {}
+constexpr char VarianceOptions::kTypeName[];
+
+QuantileOptions::QuantileOptions(double q, enum Interpolation interpolation)
+    : FunctionOptions(internal::kQuantileOptionsType),
+      q{q},
+      interpolation{interpolation} {}
+QuantileOptions::QuantileOptions(std::vector<double> q, enum Interpolation interpolation)
+    : FunctionOptions(internal::kQuantileOptionsType),
+      q{std::move(q)},
+      interpolation{interpolation} {}
+constexpr char QuantileOptions::kTypeName[];
+
+TDigestOptions::TDigestOptions(double q, uint32_t delta, uint32_t buffer_size)
+    : FunctionOptions(internal::kTDigestOptionsType),
+      q{q},
+      delta{delta},
+      buffer_size{buffer_size} {}
+TDigestOptions::TDigestOptions(std::vector<double> q, uint32_t delta,
+                               uint32_t buffer_size)
+    : FunctionOptions(internal::kTDigestOptionsType),
+      q{std::move(q)},
+      delta{delta},
+      buffer_size{buffer_size} {}
+constexpr char TDigestOptions::kTypeName[];
+
+IndexOptions::IndexOptions(std::shared_ptr<Scalar> value)
+    : FunctionOptions(internal::kIndexOptionsType), value{std::move(value)} {}
+IndexOptions::IndexOptions() : IndexOptions(std::make_shared<NullScalar>()) {}
+constexpr char IndexOptions::kTypeName[];
+
+namespace internal {
+void RegisterAggregateOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kScalarAggregateOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kModeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kVarianceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kQuantileOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTDigestOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kIndexOptionsType));
+}
+}  // namespace internal
+
 // ----------------------------------------------------------------------
 // Scalar aggregates
 
-Result<Datum> Count(const Datum& value, CountOptions options, ExecContext* ctx) {
+Result<Datum> Count(const Datum& value, const ScalarAggregateOptions& options,
+                    ExecContext* ctx) {
   return CallFunction("count", {value}, &options, ctx);
 }
 
-Result<Datum> Mean(const Datum& value, ExecContext* ctx) {
-  return CallFunction("mean", {value}, ctx);
+Result<Datum> Mean(const Datum& value, const ScalarAggregateOptions& options,
+                   ExecContext* ctx) {
+  return CallFunction("mean", {value}, &options, ctx);
 }
 
-Result<Datum> Sum(const Datum& value, ExecContext* ctx) {
-  return CallFunction("sum", {value}, ctx);
+Result<Datum> Sum(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("sum", {value}, &options, ctx);
 }
 
-Result<Datum> MinMax(const Datum& value, const MinMaxOptions& options, ExecContext* ctx) {
+Result<Datum> MinMax(const Datum& value, const ScalarAggregateOptions& options,
+                     ExecContext* ctx) {
   return CallFunction("min_max", {value}, &options, ctx);
 }
 
-Result<Datum> Any(const Datum& value, ExecContext* ctx) {
-  return CallFunction("any", {value}, ctx);
+Result<Datum> Any(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("any", {value}, &options, ctx);
 }
 
-Result<Datum> All(const Datum& value, ExecContext* ctx) {
-  return CallFunction("all", {value}, ctx);
+Result<Datum> All(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("all", {value}, &options, ctx);
 }
 
 Result<Datum> Mode(const Datum& value, const ModeOptions& options, ExecContext* ctx) {
@@ -73,5 +189,9 @@ Result<Datum> TDigest(const Datum& value, const TDigestOptions& options,
   return CallFunction("tdigest", {value}, &options, ctx);
 }
 
+Result<Datum> Index(const Datum& value, const IndexOptions& options, ExecContext* ctx) {
+  return CallFunction("index", {value}, &options, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index ca118ec5678..d66d4f1517c 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -40,49 +40,27 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-/// \brief Control Count kernel behavior
-///
-/// By default, all non-null values are counted.
-struct ARROW_EXPORT CountOptions : public FunctionOptions {
-  enum Mode {
-    /// Count all non-null values.
-    COUNT_NON_NULL = 0,
-    /// Count all null values.
-    COUNT_NULL,
-  };
-
-  explicit CountOptions(enum Mode count_mode = COUNT_NON_NULL) : count_mode(count_mode) {}
-
-  static CountOptions Defaults() { return CountOptions(COUNT_NON_NULL); }
-
-  enum Mode count_mode;
-};
-
-/// \brief Control MinMax kernel behavior
+/// \brief Control general scalar aggregate kernel behavior
 ///
 /// By default, null values are ignored
-struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
-  enum Mode {
-    /// Skip null values
-    SKIP = 0,
-    /// Any nulls will result in null output
-    EMIT_NULL
-  };
-
-  explicit MinMaxOptions(enum Mode null_handling = SKIP) : null_handling(null_handling) {}
-
-  static MinMaxOptions Defaults() { return MinMaxOptions{}; }
+class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
+ public:
+  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1);
+  constexpr static char const kTypeName[] = "ScalarAggregateOptions";
+  static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; }
 
-  enum Mode null_handling;
+  bool skip_nulls;
+  uint32_t min_count;
 };
 
 /// \brief Control Mode kernel behavior
 ///
 /// Returns top-n common values and counts.
 /// By default, returns the most common value and count.
-struct ARROW_EXPORT ModeOptions : public FunctionOptions {
-  explicit ModeOptions(int64_t n = 1) : n(n) {}
-
+class ARROW_EXPORT ModeOptions : public FunctionOptions {
+ public:
+  explicit ModeOptions(int64_t n = 1);
+  constexpr static char const kTypeName[] = "ModeOptions";
   static ModeOptions Defaults() { return ModeOptions{}; }
 
   int64_t n = 1;
@@ -92,9 +70,10 @@ struct ARROW_EXPORT ModeOptions : public FunctionOptions {
 ///
 /// The divisor used in calculations is N - ddof, where N is the number of elements.
 /// By default, ddof is zero, and population variance or stddev is returned.
-struct ARROW_EXPORT VarianceOptions : public FunctionOptions {
-  explicit VarianceOptions(int ddof = 0) : ddof(ddof) {}
-
+class ARROW_EXPORT VarianceOptions : public FunctionOptions {
+ public:
+  explicit VarianceOptions(int ddof = 0);
+  constexpr static char const kTypeName[] = "VarianceOptions";
   static VarianceOptions Defaults() { return VarianceOptions{}; }
 
   int ddof = 0;
@@ -103,7 +82,8 @@ struct ARROW_EXPORT VarianceOptions : public FunctionOptions {
 /// \brief Control Quantile kernel behavior
 ///
 /// By default, returns the median value.
-struct ARROW_EXPORT QuantileOptions : public FunctionOptions {
+class ARROW_EXPORT QuantileOptions : public FunctionOptions {
+ public:
   /// Interpolation method to use when quantile lies between two data points
   enum Interpolation {
     LINEAR = 0,
@@ -113,13 +93,12 @@ struct ARROW_EXPORT QuantileOptions : public FunctionOptions {
     MIDPOINT,
   };
 
-  explicit QuantileOptions(double q = 0.5, enum Interpolation interpolation = LINEAR)
-      : q{q}, interpolation{interpolation} {}
+  explicit QuantileOptions(double q = 0.5, enum Interpolation interpolation = LINEAR);
 
   explicit QuantileOptions(std::vector<double> q,
-                           enum Interpolation interpolation = LINEAR)
-      : q{std::move(q)}, interpolation{interpolation} {}
+                           enum Interpolation interpolation = LINEAR);
 
+  constexpr static char const kTypeName[] = "QuantileOptions";
   static QuantileOptions Defaults() { return QuantileOptions{}; }
 
   /// quantile must be between 0 and 1 inclusive
@@ -130,15 +109,13 @@ struct ARROW_EXPORT QuantileOptions : public FunctionOptions {
 /// \brief Control TDigest approximate quantile kernel behavior
 ///
 /// By default, returns the median value.
-struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
+class ARROW_EXPORT TDigestOptions : public FunctionOptions {
+ public:
   explicit TDigestOptions(double q = 0.5, uint32_t delta = 100,
-                          uint32_t buffer_size = 500)
-      : q{q}, delta{delta}, buffer_size{buffer_size} {}
-
+                          uint32_t buffer_size = 500);
   explicit TDigestOptions(std::vector<double> q, uint32_t delta = 100,
-                          uint32_t buffer_size = 500)
-      : q{std::move(q)}, delta{delta}, buffer_size{buffer_size} {}
-
+                          uint32_t buffer_size = 500);
+  constexpr static char const kTypeName[] = "TDigestOptions";
   static TDigestOptions Defaults() { return TDigestOptions{}; }
 
   /// quantile must be between 0 and 1 inclusive
@@ -149,11 +126,22 @@ struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
   uint32_t buffer_size;
 };
 
+/// \brief Control Index kernel behavior
+class ARROW_EXPORT IndexOptions : public FunctionOptions {
+ public:
+  explicit IndexOptions(std::shared_ptr<Scalar> value);
+  // Default constructor for serialization
+  IndexOptions();
+  constexpr static char const kTypeName[] = "IndexOptions";
+
+  std::shared_ptr<Scalar> value;
+};
+
 /// @}
 
 /// \brief Count non-null (or null) values in an array.
 ///
-/// \param[in] options counting options, see CountOptions for more information
+/// \param[in] options counting options, see ScalarAggregateOptions for more information
 /// \param[in] datum to count
 /// \param[in] ctx the function execution context, optional
 /// \return out resulting datum
@@ -161,30 +149,40 @@ struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Count(const Datum& datum, CountOptions options = CountOptions::Defaults(),
-                    ExecContext* ctx = NULLPTR);
+Result<Datum> Count(
+    const Datum& datum,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Compute the mean of a numeric array.
 ///
 /// \param[in] value datum to compute the mean, expecting Array
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return datum of the computed mean as a DoubleScalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Mean(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> Mean(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Sum values of a numeric array.
 ///
 /// \param[in] value datum to sum, expecting Array or ChunkedArray
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return datum of the computed sum as a Scalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Sum(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> Sum(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Calculate the min / max of a numeric array
 ///
@@ -192,44 +190,59 @@ Result<Datum> Sum(const Datum& value, ExecContext* ctx = NULLPTR);
 /// struct<min: T, max: T>, where T is the input type
 ///
 /// \param[in] value input datum, expecting Array or ChunkedArray
-/// \param[in] options see MinMaxOptions for more information
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return resulting datum as a struct<min: T, max: T> scalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> MinMax(const Datum& value,
-                     const MinMaxOptions& options = MinMaxOptions::Defaults(),
-                     ExecContext* ctx = NULLPTR);
+Result<Datum> MinMax(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Test whether any element in a boolean array evaluates to true.
 ///
 /// This function returns true if any of the elements in the array evaluates
-/// to true and false otherwise. Null values are skipped.
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneOr for more details on Kleene logic.
 ///
 /// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return resulting datum as a BooleanScalar
 ///
 /// \since 3.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Any(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> Any(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Test whether all elements in a boolean array evaluate to true.
 ///
 /// This function returns true if all of the elements in the array evaluate
-/// to true and false otherwise. Null values are skipped.
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneAnd for more details on Kleene logic.
 ///
 /// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return resulting datum as a BooleanScalar
 
 /// \since 3.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> All(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> All(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Calculate the modal (most common) value of a numeric array
 ///
@@ -306,6 +319,19 @@ Result<Datum> TDigest(const Datum& value,
                       const TDigestOptions& options = TDigestOptions::Defaults(),
                       ExecContext* ctx = NULLPTR);
 
+/// \brief Find the first index of a value in an array.
+///
+/// \param[in] value The array to search.
+/// \param[in] options The array to search for. See IndexOoptions.
+/// \param[in] ctx the function execution context, optional
+/// \return out a Scalar containing the index (or -1 if not found).
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Index(const Datum& value, const IndexOptions& options,
+                    ExecContext* ctx = NULLPTR);
+
 namespace internal {
 
 /// Internal use only: streaming group identifier.
@@ -399,7 +425,7 @@ struct ARROW_EXPORT Aggregate {
 /// This will be replaced by streaming execution operators.
 ARROW_EXPORT
 Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
-                      const std::vector<Aggregate>& aggregates,
+                      const std::vector<Aggregate>& aggregates, bool use_threads = false,
                       ExecContext* ctx = default_exec_context());
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index f4696fbe02a..1feb4e7eee0 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -21,13 +21,287 @@
 #include <sstream>
 #include <string>
 
+#include "arrow/array/array_base.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/compute/util_internal.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
+
+namespace internal {
+template <>
+struct EnumTraits<compute::JoinOptions::NullHandlingBehavior>
+    : BasicEnumTraits<compute::JoinOptions::NullHandlingBehavior,
+                      compute::JoinOptions::NullHandlingBehavior::EMIT_NULL,
+                      compute::JoinOptions::NullHandlingBehavior::SKIP,
+                      compute::JoinOptions::NullHandlingBehavior::REPLACE> {
+  static std::string name() { return "JoinOptions::NullHandlingBehavior"; }
+  static std::string value_name(compute::JoinOptions::NullHandlingBehavior value) {
+    switch (value) {
+      case compute::JoinOptions::NullHandlingBehavior::EMIT_NULL:
+        return "EMIT_NULL";
+      case compute::JoinOptions::NullHandlingBehavior::SKIP:
+        return "SKIP";
+      case compute::JoinOptions::NullHandlingBehavior::REPLACE:
+        return "REPLACE";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<TimeUnit::type>
+    : BasicEnumTraits<TimeUnit::type, TimeUnit::type::SECOND, TimeUnit::type::MILLI,
+                      TimeUnit::type::MICRO, TimeUnit::type::NANO> {
+  static std::string name() { return "TimeUnit::type"; }
+  static std::string value_name(TimeUnit::type value) {
+    switch (value) {
+      case TimeUnit::type::SECOND:
+        return "SECOND";
+      case TimeUnit::type::MILLI:
+        return "MILLI";
+      case TimeUnit::type::MICRO:
+        return "MICRO";
+      case TimeUnit::type::NANO:
+        return "NANO";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<compute::CompareOperator>
+    : BasicEnumTraits<
+          compute::CompareOperator, compute::CompareOperator::EQUAL,
+          compute::CompareOperator::NOT_EQUAL, compute::CompareOperator::GREATER,
+          compute::CompareOperator::GREATER_EQUAL, compute::CompareOperator::LESS,
+          compute::CompareOperator::LESS_EQUAL> {
+  static std::string name() { return "compute::CompareOperator"; }
+  static std::string value_name(compute::CompareOperator value) {
+    switch (value) {
+      case compute::CompareOperator::EQUAL:
+        return "EQUAL";
+      case compute::CompareOperator::NOT_EQUAL:
+        return "NOT_EQUAL";
+      case compute::CompareOperator::GREATER:
+        return "GREATER";
+      case compute::CompareOperator::GREATER_EQUAL:
+        return "GREATER_EQUAL";
+      case compute::CompareOperator::LESS:
+        return "LESS";
+      case compute::CompareOperator::LESS_EQUAL:
+        return "LESS_EQUAL";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
+// ----------------------------------------------------------------------
+// Function options
+
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kArithmeticOptionsType = GetFunctionOptionsType<ArithmeticOptions>(
+    DataMember("check_overflow", &ArithmeticOptions::check_overflow));
+static auto kElementWiseAggregateOptionsType =
+    GetFunctionOptionsType<ElementWiseAggregateOptions>(
+        DataMember("skip_nulls", &ElementWiseAggregateOptions::skip_nulls));
+static auto kJoinOptionsType = GetFunctionOptionsType<JoinOptions>(
+    DataMember("null_handling", &JoinOptions::null_handling),
+    DataMember("null_replacement", &JoinOptions::null_replacement));
+static auto kMatchSubstringOptionsType = GetFunctionOptionsType<MatchSubstringOptions>(
+    DataMember("pattern", &MatchSubstringOptions::pattern),
+    DataMember("ignore_case", &MatchSubstringOptions::ignore_case));
+static auto kSplitOptionsType = GetFunctionOptionsType<SplitOptions>(
+    DataMember("max_splits", &SplitOptions::max_splits),
+    DataMember("reverse", &SplitOptions::reverse));
+static auto kSplitPatternOptionsType = GetFunctionOptionsType<SplitPatternOptions>(
+    DataMember("pattern", &SplitPatternOptions::pattern),
+    DataMember("max_splits", &SplitPatternOptions::max_splits),
+    DataMember("reverse", &SplitPatternOptions::reverse));
+static auto kReplaceSliceOptionsType = GetFunctionOptionsType<ReplaceSliceOptions>(
+    DataMember("start", &ReplaceSliceOptions::start),
+    DataMember("stop", &ReplaceSliceOptions::stop),
+    DataMember("replacement", &ReplaceSliceOptions::replacement));
+static auto kReplaceSubstringOptionsType =
+    GetFunctionOptionsType<ReplaceSubstringOptions>(
+        DataMember("pattern", &ReplaceSubstringOptions::pattern),
+        DataMember("replacement", &ReplaceSubstringOptions::replacement),
+        DataMember("max_replacements", &ReplaceSubstringOptions::max_replacements));
+static auto kExtractRegexOptionsType = GetFunctionOptionsType<ExtractRegexOptions>(
+    DataMember("pattern", &ExtractRegexOptions::pattern));
+static auto kSetLookupOptionsType = GetFunctionOptionsType<SetLookupOptions>(
+    DataMember("value_set", &SetLookupOptions::value_set),
+    DataMember("skip_nulls", &SetLookupOptions::skip_nulls));
+static auto kStrptimeOptionsType = GetFunctionOptionsType<StrptimeOptions>(
+    DataMember("format", &StrptimeOptions::format),
+    DataMember("unit", &StrptimeOptions::unit));
+static auto kPadOptionsType = GetFunctionOptionsType<PadOptions>(
+    DataMember("width", &PadOptions::width), DataMember("padding", &PadOptions::padding));
+static auto kTrimOptionsType = GetFunctionOptionsType<TrimOptions>(
+    DataMember("characters", &TrimOptions::characters));
+static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
+    DataMember("start", &SliceOptions::start), DataMember("stop", &SliceOptions::stop),
+    DataMember("step", &SliceOptions::step));
+static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>(
+    DataMember("field_names", &MakeStructOptions::field_names),
+    DataMember("field_nullability", &MakeStructOptions::field_nullability),
+    DataMember("field_metadata", &MakeStructOptions::field_metadata));
+static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
+    DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering),
+    DataMember("week_start", &DayOfWeekOptions::week_start));
+}  // namespace
+}  // namespace internal
+
+ArithmeticOptions::ArithmeticOptions(bool check_overflow)
+    : FunctionOptions(internal::kArithmeticOptionsType), check_overflow(check_overflow) {}
+constexpr char ArithmeticOptions::kTypeName[];
+
+ElementWiseAggregateOptions::ElementWiseAggregateOptions(bool skip_nulls)
+    : FunctionOptions(internal::kElementWiseAggregateOptionsType),
+      skip_nulls(skip_nulls) {}
+constexpr char ElementWiseAggregateOptions::kTypeName[];
+
+JoinOptions::JoinOptions(NullHandlingBehavior null_handling, std::string null_replacement)
+    : FunctionOptions(internal::kJoinOptionsType),
+      null_handling(null_handling),
+      null_replacement(std::move(null_replacement)) {}
+constexpr char JoinOptions::kTypeName[];
+
+MatchSubstringOptions::MatchSubstringOptions(std::string pattern, bool ignore_case)
+    : FunctionOptions(internal::kMatchSubstringOptionsType),
+      pattern(std::move(pattern)),
+      ignore_case(ignore_case) {}
+MatchSubstringOptions::MatchSubstringOptions() : MatchSubstringOptions("", false) {}
+constexpr char MatchSubstringOptions::kTypeName[];
+
+SplitOptions::SplitOptions(int64_t max_splits, bool reverse)
+    : FunctionOptions(internal::kSplitOptionsType),
+      max_splits(max_splits),
+      reverse(reverse) {}
+constexpr char SplitOptions::kTypeName[];
+
+SplitPatternOptions::SplitPatternOptions(std::string pattern, int64_t max_splits,
+                                         bool reverse)
+    : FunctionOptions(internal::kSplitPatternOptionsType),
+      pattern(std::move(pattern)),
+      max_splits(max_splits),
+      reverse(reverse) {}
+SplitPatternOptions::SplitPatternOptions() : SplitPatternOptions("", -1, false) {}
+constexpr char SplitPatternOptions::kTypeName[];
+
+ReplaceSliceOptions::ReplaceSliceOptions(int64_t start, int64_t stop,
+                                         std::string replacement)
+    : FunctionOptions(internal::kReplaceSliceOptionsType),
+      start(start),
+      stop(stop),
+      replacement(std::move(replacement)) {}
+ReplaceSliceOptions::ReplaceSliceOptions() : ReplaceSliceOptions(0, 0, "") {}
+constexpr char ReplaceSliceOptions::kTypeName[];
+
+ReplaceSubstringOptions::ReplaceSubstringOptions(std::string pattern,
+                                                 std::string replacement,
+                                                 int64_t max_replacements)
+    : FunctionOptions(internal::kReplaceSubstringOptionsType),
+      pattern(std::move(pattern)),
+      replacement(std::move(replacement)),
+      max_replacements(max_replacements) {}
+ReplaceSubstringOptions::ReplaceSubstringOptions()
+    : ReplaceSubstringOptions("", "", -1) {}
+constexpr char ReplaceSubstringOptions::kTypeName[];
+
+ExtractRegexOptions::ExtractRegexOptions(std::string pattern)
+    : FunctionOptions(internal::kExtractRegexOptionsType), pattern(std::move(pattern)) {}
+ExtractRegexOptions::ExtractRegexOptions() : ExtractRegexOptions("") {}
+constexpr char ExtractRegexOptions::kTypeName[];
+
+SetLookupOptions::SetLookupOptions(Datum value_set, bool skip_nulls)
+    : FunctionOptions(internal::kSetLookupOptionsType),
+      value_set(std::move(value_set)),
+      skip_nulls(skip_nulls) {}
+SetLookupOptions::SetLookupOptions() : SetLookupOptions({}, false) {}
+constexpr char SetLookupOptions::kTypeName[];
+
+StrptimeOptions::StrptimeOptions(std::string format, TimeUnit::type unit)
+    : FunctionOptions(internal::kStrptimeOptionsType),
+      format(std::move(format)),
+      unit(unit) {}
+StrptimeOptions::StrptimeOptions() : StrptimeOptions("", TimeUnit::SECOND) {}
+constexpr char StrptimeOptions::kTypeName[];
+
+PadOptions::PadOptions(int64_t width, std::string padding)
+    : FunctionOptions(internal::kPadOptionsType),
+      width(width),
+      padding(std::move(padding)) {}
+PadOptions::PadOptions() : PadOptions(0, " ") {}
+constexpr char PadOptions::kTypeName[];
+
+TrimOptions::TrimOptions(std::string characters)
+    : FunctionOptions(internal::kTrimOptionsType), characters(std::move(characters)) {}
+TrimOptions::TrimOptions() : TrimOptions("") {}
+constexpr char TrimOptions::kTypeName[];
+
+SliceOptions::SliceOptions(int64_t start, int64_t stop, int64_t step)
+    : FunctionOptions(internal::kSliceOptionsType),
+      start(start),
+      stop(stop),
+      step(step) {}
+SliceOptions::SliceOptions() : SliceOptions(0, 0, 1) {}
+constexpr char SliceOptions::kTypeName[];
+
+MakeStructOptions::MakeStructOptions(
+    std::vector<std::string> n, std::vector<bool> r,
+    std::vector<std::shared_ptr<const KeyValueMetadata>> m)
+    : FunctionOptions(internal::kMakeStructOptionsType),
+      field_names(std::move(n)),
+      field_nullability(std::move(r)),
+      field_metadata(std::move(m)) {}
+
+MakeStructOptions::MakeStructOptions(std::vector<std::string> n)
+    : FunctionOptions(internal::kMakeStructOptionsType),
+      field_names(std::move(n)),
+      field_nullability(field_names.size(), true),
+      field_metadata(field_names.size(), NULLPTR) {}
+
+MakeStructOptions::MakeStructOptions() : MakeStructOptions(std::vector<std::string>()) {}
+constexpr char MakeStructOptions::kTypeName[];
+
+DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
+    : FunctionOptions(internal::kDayOfWeekOptionsType),
+      one_based_numbering(one_based_numbering),
+      week_start(week_start) {}
+constexpr char DayOfWeekOptions::kTypeName[];
+
+namespace internal {
+void RegisterScalarOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kElementWiseAggregateOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kJoinOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kMatchSubstringOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSplitOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSplitPatternOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSliceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSubstringOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kExtractRegexOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSetLookupOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kStrptimeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kPadOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
+}
+}  // namespace internal
+
 #define SCALAR_EAGER_UNARY(NAME, REGISTRY_NAME)              \
   Result<Datum> NAME(const Datum& value, ExecContext* ctx) { \
     return CallFunction(REGISTRY_NAME, {value}, ctx);        \
@@ -41,6 +315,26 @@ namespace compute {
 // ----------------------------------------------------------------------
 // Arithmetic
 
+#define SCALAR_ARITHMETIC_UNARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)            \
+  Result<Datum> NAME(const Datum& arg, ArithmeticOptions options, ExecContext* ctx) {  \
+    auto func_name = (options.check_overflow) ? REGISTRY_CHECKED_NAME : REGISTRY_NAME; \
+    return CallFunction(func_name, {arg}, ctx);                                        \
+  }
+
+SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
+SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
+SCALAR_EAGER_UNARY(Sign, "sign")
+SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
+SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked")
+SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
+SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
+SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
+SCALAR_EAGER_UNARY(Atan, "atan")
+SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked")
+SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked")
+SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked")
+SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked")
+
 #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)           \
   Result<Datum> NAME(const Datum& left, const Datum& right, ArithmeticOptions options, \
                      ExecContext* ctx) {                                               \
@@ -52,6 +346,23 @@ SCALAR_ARITHMETIC_BINARY(Add, "add", "add_checked")
 SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked")
 SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
 SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
+SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
+SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked")
+SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked")
+SCALAR_EAGER_BINARY(Atan2, "atan2")
+SCALAR_EAGER_UNARY(Floor, "floor")
+SCALAR_EAGER_UNARY(Ceil, "ceil")
+SCALAR_EAGER_UNARY(Trunc, "trunc")
+
+Result<Datum> MaxElementWise(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options, ExecContext* ctx) {
+  return CallFunction("max_element_wise", args, &options, ctx);
+}
+
+Result<Datum> MinElementWise(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options, ExecContext* ctx) {
+  return CallFunction("min_element_wise", args, &options, ctx);
+}
 
 // ----------------------------------------------------------------------
 // Set-related operations
@@ -133,7 +444,7 @@ Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions opti
       func_name = "less_equal";
       break;
   }
-  return CallFunction(func_name, {left, right}, &options, ctx);
+  return CallFunction(func_name, {left, right}, nullptr, ctx);
 }
 
 // ----------------------------------------------------------------------
@@ -147,5 +458,41 @@ Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext
   return CallFunction("fill_null", {values, fill_value}, ctx);
 }
 
+Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_false,
+                     ExecContext* ctx) {
+  return CallFunction("if_else", {cond, if_true, if_false}, ctx);
+}
+
+Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases,
+                       ExecContext* ctx) {
+  std::vector<Datum> args = {cond};
+  args.reserve(cases.size() + 1);
+  args.insert(args.end(), cases.begin(), cases.end());
+  return CallFunction("case_when", args, ctx);
+}
+
+// ----------------------------------------------------------------------
+// Temporal functions
+
+SCALAR_EAGER_UNARY(Year, "year")
+SCALAR_EAGER_UNARY(Month, "month")
+SCALAR_EAGER_UNARY(Day, "day")
+SCALAR_EAGER_UNARY(DayOfYear, "day_of_year")
+SCALAR_EAGER_UNARY(ISOYear, "iso_year")
+SCALAR_EAGER_UNARY(ISOWeek, "iso_week")
+SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar")
+SCALAR_EAGER_UNARY(Quarter, "quarter")
+SCALAR_EAGER_UNARY(Hour, "hour")
+SCALAR_EAGER_UNARY(Minute, "minute")
+SCALAR_EAGER_UNARY(Second, "second")
+SCALAR_EAGER_UNARY(Millisecond, "millisecond")
+SCALAR_EAGER_UNARY(Microsecond, "microsecond")
+SCALAR_EAGER_UNARY(Nanosecond, "nanosecond")
+SCALAR_EAGER_UNARY(Subsecond, "subsecond")
+
+Result<Datum> DayOfWeek(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) {
+  return CallFunction("day_of_week", {arg}, &options, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index f59426d8f1b..e07e41569a1 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -37,21 +37,58 @@ namespace compute {
 ///
 /// @{
 
-struct ArithmeticOptions : public FunctionOptions {
-  ArithmeticOptions() : check_overflow(false) {}
+class ARROW_EXPORT ArithmeticOptions : public FunctionOptions {
+ public:
+  explicit ArithmeticOptions(bool check_overflow = false);
+  constexpr static char const kTypeName[] = "ArithmeticOptions";
   bool check_overflow;
 };
 
-struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
-  explicit MatchSubstringOptions(std::string pattern) : pattern(std::move(pattern)) {}
+class ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
+ public:
+  explicit ElementWiseAggregateOptions(bool skip_nulls = true);
+  constexpr static char const kTypeName[] = "ElementWiseAggregateOptions";
+  static ElementWiseAggregateOptions Defaults() { return ElementWiseAggregateOptions{}; }
+
+  bool skip_nulls;
+};
+
+/// Options for var_args_join.
+class ARROW_EXPORT JoinOptions : public FunctionOptions {
+ public:
+  /// How to handle null values. (A null separator always results in a null output.)
+  enum NullHandlingBehavior {
+    /// A null in any input results in a null in the output.
+    EMIT_NULL,
+    /// Nulls in inputs are skipped.
+    SKIP,
+    /// Nulls in inputs are replaced with the replacement string.
+    REPLACE,
+  };
+  explicit JoinOptions(NullHandlingBehavior null_handling = EMIT_NULL,
+                       std::string null_replacement = "");
+  constexpr static char const kTypeName[] = "JoinOptions";
+  static JoinOptions Defaults() { return JoinOptions(); }
+  NullHandlingBehavior null_handling;
+  std::string null_replacement;
+};
+
+class ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
+ public:
+  explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false);
+  MatchSubstringOptions();
+  constexpr static char const kTypeName[] = "MatchSubstringOptions";
 
   /// The exact substring (or regex, depending on kernel) to look for inside input values.
   std::string pattern;
+  /// Whether to perform a case-insensitive match.
+  bool ignore_case = false;
 };
 
-struct ARROW_EXPORT SplitOptions : public FunctionOptions {
-  explicit SplitOptions(int64_t max_splits = -1, bool reverse = false)
-      : max_splits(max_splits), reverse(reverse) {}
+class ARROW_EXPORT SplitOptions : public FunctionOptions {
+ public:
+  explicit SplitOptions(int64_t max_splits = -1, bool reverse = false);
+  constexpr static char const kTypeName[] = "SplitOptions";
 
   /// Maximum number of splits allowed, or unlimited when -1
   int64_t max_splits;
@@ -59,19 +96,41 @@ struct ARROW_EXPORT SplitOptions : public FunctionOptions {
   bool reverse;
 };
 
-struct ARROW_EXPORT SplitPatternOptions : public SplitOptions {
+class ARROW_EXPORT SplitPatternOptions : public FunctionOptions {
+ public:
   explicit SplitPatternOptions(std::string pattern, int64_t max_splits = -1,
-                               bool reverse = false)
-      : SplitOptions(max_splits, reverse), pattern(std::move(pattern)) {}
+                               bool reverse = false);
+  SplitPatternOptions();
+  constexpr static char const kTypeName[] = "SplitPatternOptions";
 
-  /// The exact substring to look for inside input values.
+  /// The exact substring to split on.
   std::string pattern;
+  /// Maximum number of splits allowed, or unlimited when -1
+  int64_t max_splits;
+  /// Start splitting from the end of the string (only relevant when max_splits != -1)
+  bool reverse;
+};
+
+class ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions {
+ public:
+  explicit ReplaceSliceOptions(int64_t start, int64_t stop, std::string replacement);
+  ReplaceSliceOptions();
+  constexpr static char const kTypeName[] = "ReplaceSliceOptions";
+
+  /// Index to start slicing at
+  int64_t start;
+  /// Index to stop slicing at
+  int64_t stop;
+  /// String to replace the slice with
+  std::string replacement;
 };
 
-struct ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
+class ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
+ public:
   explicit ReplaceSubstringOptions(std::string pattern, std::string replacement,
-                                   int64_t max_replacements = -1)
-      : pattern(pattern), replacement(replacement), max_replacements(max_replacements) {}
+                                   int64_t max_replacements = -1);
+  ReplaceSubstringOptions();
+  constexpr static char const kTypeName[] = "ReplaceSubstringOptions";
 
   /// Pattern to match, literal, or regular expression depending on which kernel is used
   std::string pattern;
@@ -81,10 +140,22 @@ struct ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
   int64_t max_replacements;
 };
 
+class ARROW_EXPORT ExtractRegexOptions : public FunctionOptions {
+ public:
+  explicit ExtractRegexOptions(std::string pattern);
+  ExtractRegexOptions();
+  constexpr static char const kTypeName[] = "ExtractRegexOptions";
+
+  /// Regular expression with named capture fields
+  std::string pattern;
+};
+
 /// Options for IsIn and IndexIn functions
-struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
-  explicit SetLookupOptions(Datum value_set, bool skip_nulls = false)
-      : value_set(std::move(value_set)), skip_nulls(skip_nulls) {}
+class ARROW_EXPORT SetLookupOptions : public FunctionOptions {
+ public:
+  explicit SetLookupOptions(Datum value_set, bool skip_nulls = false);
+  SetLookupOptions();
+  constexpr static char const kTypeName[] = "SetLookupOptions";
 
   /// The set of values to look up input values into.
   Datum value_set;
@@ -97,21 +168,47 @@ struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
   bool skip_nulls;
 };
 
-struct ARROW_EXPORT StrptimeOptions : public FunctionOptions {
-  explicit StrptimeOptions(std::string format, TimeUnit::type unit)
-      : format(std::move(format)), unit(unit) {}
+class ARROW_EXPORT StrptimeOptions : public FunctionOptions {
+ public:
+  explicit StrptimeOptions(std::string format, TimeUnit::type unit);
+  StrptimeOptions();
+  constexpr static char const kTypeName[] = "StrptimeOptions";
 
   std::string format;
   TimeUnit::type unit;
 };
 
-struct ARROW_EXPORT TrimOptions : public FunctionOptions {
-  explicit TrimOptions(std::string characters) : characters(std::move(characters)) {}
+class ARROW_EXPORT PadOptions : public FunctionOptions {
+ public:
+  explicit PadOptions(int64_t width, std::string padding = " ");
+  PadOptions();
+  constexpr static char const kTypeName[] = "PadOptions";
+
+  /// The desired string length.
+  int64_t width;
+  /// What to pad the string with. Should be one codepoint (Unicode)/byte (ASCII).
+  std::string padding;
+};
+
+class ARROW_EXPORT TrimOptions : public FunctionOptions {
+ public:
+  explicit TrimOptions(std::string characters);
+  TrimOptions();
+  constexpr static char const kTypeName[] = "TrimOptions";
 
   /// The individual characters that can be trimmed from the string.
   std::string characters;
 };
 
+class ARROW_EXPORT SliceOptions : public FunctionOptions {
+ public:
+  explicit SliceOptions(int64_t start, int64_t stop = std::numeric_limits<int64_t>::max(),
+                        int64_t step = 1);
+  SliceOptions();
+  constexpr static char const kTypeName[] = "SliceOptions";
+  int64_t start, stop, step;
+};
+
 enum CompareOperator : int8_t {
   EQUAL,
   NOT_EQUAL,
@@ -121,23 +218,19 @@ enum CompareOperator : int8_t {
   LESS_EQUAL,
 };
 
-struct CompareOptions : public FunctionOptions {
+struct ARROW_EXPORT CompareOptions {
   explicit CompareOptions(CompareOperator op) : op(op) {}
-
+  CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
   enum CompareOperator op;
 };
 
-struct ARROW_EXPORT ProjectOptions : public FunctionOptions {
-  ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
-                 std::vector<std::shared_ptr<const KeyValueMetadata>> m)
-      : field_names(std::move(n)),
-        field_nullability(std::move(r)),
-        field_metadata(std::move(m)) {}
-
-  explicit ProjectOptions(std::vector<std::string> n)
-      : field_names(std::move(n)),
-        field_nullability(field_names.size(), true),
-        field_metadata(field_names.size(), NULLPTR) {}
+class ARROW_EXPORT MakeStructOptions : public FunctionOptions {
+ public:
+  MakeStructOptions(std::vector<std::string> n, std::vector<bool> r,
+                    std::vector<std::shared_ptr<const KeyValueMetadata>> m);
+  explicit MakeStructOptions(std::vector<std::string> n);
+  MakeStructOptions();
+  constexpr static char const kTypeName[] = "MakeStructOptions";
 
   /// Names for wrapped columns
   std::vector<std::string> field_names;
@@ -149,8 +242,33 @@ struct ARROW_EXPORT ProjectOptions : public FunctionOptions {
   std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata;
 };
 
+struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions {
+ public:
+  explicit DayOfWeekOptions(bool one_based_numbering = false, uint32_t week_start = 1);
+  constexpr static char const kTypeName[] = "DayOfWeekOptions";
+  static DayOfWeekOptions Defaults() { return DayOfWeekOptions{}; }
+
+  /// Number days from 1 if true and from 0 if false
+  bool one_based_numbering;
+  /// What day does the week start with (Monday=1, Sunday=7)
+  uint32_t week_start;
+};
+
 /// @}
 
+/// \brief Get the absolute value of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg the value transformed
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise absolute value
+ARROW_EXPORT
+Result<Datum> AbsoluteValue(const Datum& arg,
+                            ArithmeticOptions options = ArithmeticOptions(),
+                            ExecContext* ctx = NULLPTR);
+
 /// \brief Add two values together. Array values must be the same length. If
 /// either addend is null the result will be null.
 ///
@@ -204,6 +322,233 @@ Result<Datum> Divide(const Datum& left, const Datum& right,
                      ArithmeticOptions options = ArithmeticOptions(),
                      ExecContext* ctx = NULLPTR);
 
+/// \brief Negate values.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg the value negated
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise negation
+ARROW_EXPORT
+Result<Datum> Negate(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                     ExecContext* ctx = NULLPTR);
+
+/// \brief Raise the values of base array to the power of the exponent array values.
+/// Array values must be the same length. If either base or exponent is null the result
+/// will be null.
+///
+/// \param[in] left the base
+/// \param[in] right the exponent
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise base value raised to the power of exponent
+ARROW_EXPORT
+Result<Datum> Power(const Datum& left, const Datum& right,
+                    ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Left shift the left array by the right array. Array values must be the
+/// same length. If either operand is null, the result will be null.
+///
+/// \param[in] left the value to shift
+/// \param[in] right the value to shift by
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise left value shifted left by the right value
+ARROW_EXPORT
+Result<Datum> ShiftLeft(const Datum& left, const Datum& right,
+                        ArithmeticOptions options = ArithmeticOptions(),
+                        ExecContext* ctx = NULLPTR);
+
+/// \brief Right shift the left array by the right array. Array values must be the
+/// same length. If either operand is null, the result will be null. Performs a
+/// logical shift for unsigned values, and an arithmetic shift for signed values.
+///
+/// \param[in] left the value to shift
+/// \param[in] right the value to shift by
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise left value shifted right by the right value
+ARROW_EXPORT
+Result<Datum> ShiftRight(const Datum& left, const Datum& right,
+                         ArithmeticOptions options = ArithmeticOptions(),
+                         ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the sine of the array values.
+/// \param[in] arg The values to compute the sine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise sine of the values
+ARROW_EXPORT
+Result<Datum> Sin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the cosine of the array values.
+/// \param[in] arg The values to compute the cosine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise cosine of the values
+ARROW_EXPORT
+Result<Datum> Cos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse sine (arcsine) of the array values.
+/// \param[in] arg The values to compute the inverse sine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse sine of the values
+ARROW_EXPORT
+Result<Datum> Asin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse cosine (arccosine) of the array values.
+/// \param[in] arg The values to compute the inverse cosine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse cosine of the values
+ARROW_EXPORT
+Result<Datum> Acos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the tangent of the array values.
+/// \param[in] arg The values to compute the tangent for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise tangent of the values
+ARROW_EXPORT
+Result<Datum> Tan(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse tangent (arctangent) of the array values.
+/// \param[in] arg The values to compute the inverse tangent for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse tangent of the values
+ARROW_EXPORT
+Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse tangent (arctangent) of y/x, using the
+/// argument signs to determine the correct quadrant.
+/// \param[in] y The y-values to compute the inverse tangent for.
+/// \param[in] x The x-values to compute the inverse tangent for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse tangent of the values
+ARROW_EXPORT
+Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR);
+
+/// \brief Get the natural log of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise natural log
+ARROW_EXPORT
+Result<Datum> Ln(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                 ExecContext* ctx = NULLPTR);
+
+/// \brief Get the log base 10 of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise log base 10
+ARROW_EXPORT
+Result<Datum> Log10(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Get the log base 2 of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise log base 2
+ARROW_EXPORT
+Result<Datum> Log2(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Get the natural log of (1 + value).
+///
+/// If argument is null the result will be null.
+/// This function may be more accurate than Log(1 + value) for values close to zero.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise natural log
+ARROW_EXPORT
+Result<Datum> Log1p(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Round to the nearest integer less than or equal in magnitude to the
+/// argument. Array values can be of arbitrary length. If argument is null the
+/// result will be null.
+///
+/// \param[in] arg the value to round
+/// \param[in] ctx the function execution context, optional
+/// \return the rounded value
+ARROW_EXPORT
+Result<Datum> Floor(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Round to the nearest integer greater than or equal in magnitude to the
+/// argument. Array values can be of arbitrary length. If argument is null the
+/// result will be null.
+///
+/// \param[in] arg the value to round
+/// \param[in] ctx the function execution context, optional
+/// \return the rounded value
+ARROW_EXPORT
+Result<Datum> Ceil(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Get the integral part without fractional digits. Array values can be
+/// of arbitrary length. If argument is null the result will be null.
+///
+/// \param[in] arg the value to truncate
+/// \param[in] ctx the function execution context, optional
+/// \return the truncated value
+ARROW_EXPORT
+Result<Datum> Trunc(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Find the element-wise maximum of any number of arrays or scalars.
+/// Array values must be the same length.
+///
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise maximum
+ARROW_EXPORT
+Result<Datum> MaxElementWise(
+    const std::vector<Datum>& args,
+    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Find the element-wise minimum of any number of arrays or scalars.
+/// Array values must be the same length.
+///
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise minimum
+ARROW_EXPORT
+Result<Datum> MinElementWise(
+    const std::vector<Datum>& args,
+    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Get the sign of a value. Array values can be of arbitrary length. If argument
+/// is null the result will be null.
+///
+/// \param[in] arg the value to extract sign from
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise sign function
+ARROW_EXPORT
+Result<Datum> Sign(const Datum& arg, ExecContext* ctx = NULLPTR);
+
 /// \brief Compare a numeric array with a scalar.
 ///
 /// \param[in] left datum to compare, must be an Array
@@ -217,9 +562,10 @@ Result<Datum> Divide(const Datum& left, const Datum& right,
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
+ARROW_DEPRECATED("Deprecated in 5.0.0. Use each compare function directly")
 ARROW_EXPORT
-Result<Datum> Compare(const Datum& left, const Datum& right,
-                      struct CompareOptions options, ExecContext* ctx = NULLPTR);
+Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions options,
+                      ExecContext* ctx = NULLPTR);
 
 /// \brief Invert the values of a boolean datum
 /// \param[in] value datum to invert
@@ -416,5 +762,228 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
+/// \brief IfElse returns elements chosen from `left` or `right`
+/// depending on `cond`. `null` values in `cond` will be promoted to the result
+///
+/// \param[in] cond `Boolean` condition Scalar/ Array
+/// \param[in] left Scalar/ Array
+/// \param[in] right Scalar/ Array
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> IfElse(const Datum& cond, const Datum& left, const Datum& right,
+                     ExecContext* ctx = NULLPTR);
+
+/// \brief CaseWhen behaves like a switch/case or if-else if-else statement: for
+/// each row, select the first value for which the corresponding condition is
+/// true, or (if given) select the 'else' value, else emit null. Note that a
+/// null condition is the same as false.
+///
+/// \param[in] cond Conditions (Boolean)
+/// \param[in] cases Values (any type), along with an optional 'else' value.
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases,
+                       ExecContext* ctx = NULLPTR);
+
+/// \brief Year returns year for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month for each element of `values`.
+/// Month is encoded as January=1, December=12
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day number for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief DayOfWeek returns number of the day of the week value for each element of
+/// `values`.
+///
+/// By default week starts on Monday denoted by 0 and ends on Sunday denoted
+/// by 6. Start day of the week (Monday=1, Sunday=7) and numbering base (0 or 1) can be
+/// set using DayOfWeekOptions
+///
+/// \param[in] values input to extract number of the day of the week from
+/// \param[in] options for setting start of the week and day numbering
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values,
+                                     DayOfWeekOptions options = DayOfWeekOptions(),
+                                     ExecContext* ctx = NULLPTR);
+
+/// \brief DayOfYear returns number of day of the year for each element of `values`.
+/// January 1st maps to day number 1, February 1st to 32, etc.
+///
+/// \param[in] values input to extract number of day of the year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOYear returns ISO year number for each element of `values`.
+/// First week of an ISO year has the majority (4 or more) of its days in January.
+///
+/// \param[in] values input to extract ISO year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> ISOYear(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOWeek returns ISO week of year number for each element of `values`.
+/// First ISO week has the majority (4 or more) of its days in January.
+/// Week of the year starts with 1 and can run up to 53.
+///
+/// \param[in] values input to extract ISO week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> ISOWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOCalendar returns a (ISO year, ISO week, ISO day of week) struct for
+/// each element of `values`.
+/// ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.
+///
+/// \param[in] values input to ISO calendar struct from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> ISOCalendar(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Quarter returns the quarter of year number for each element of `values`
+/// First quarter maps to 1 and fourth quarter maps to 4.
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns number of milliseconds since the last full second
+/// for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns number of microseconds since the last full millisecond
+/// for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns number of nanoseconds since the last full millisecond
+/// for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Subsecond returns the fraction of second elapsed since last full second
+/// as a float for each element of `values`
+///
+/// \param[in] values input to extract subsecond from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Subsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index 0082d48112d..9f3b3fa71b3 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -18,23 +18,139 @@
 #include "arrow/compute/api_vector.h"
 
 #include <memory>
+#include <sstream>
 #include <utility>
 #include <vector>
 
 #include "arrow/array/array_nested.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::checked_pointer_cast;
 
+namespace internal {
+using compute::DictionaryEncodeOptions;
+using compute::FilterOptions;
+template <>
+struct EnumTraits<FilterOptions::NullSelectionBehavior>
+    : BasicEnumTraits<FilterOptions::NullSelectionBehavior, FilterOptions::DROP,
+                      FilterOptions::EMIT_NULL> {
+  static std::string name() { return "FilterOptions::NullSelectionBehavior"; }
+  static std::string value_name(FilterOptions::NullSelectionBehavior value) {
+    switch (value) {
+      case FilterOptions::DROP:
+        return "DROP";
+      case FilterOptions::EMIT_NULL:
+        return "EMIT_NULL";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<DictionaryEncodeOptions::NullEncodingBehavior>
+    : BasicEnumTraits<DictionaryEncodeOptions::NullEncodingBehavior,
+                      DictionaryEncodeOptions::ENCODE, DictionaryEncodeOptions::MASK> {
+  static std::string name() { return "DictionaryEncodeOptions::NullEncodingBehavior"; }
+  static std::string value_name(DictionaryEncodeOptions::NullEncodingBehavior value) {
+    switch (value) {
+      case DictionaryEncodeOptions::ENCODE:
+        return "ENCODE";
+      case DictionaryEncodeOptions::MASK:
+        return "MASK";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
+// ----------------------------------------------------------------------
+// Function options
+
+bool SortKey::Equals(const SortKey& other) const {
+  return name == other.name && order == other.order;
+}
+std::string SortKey::ToString() const {
+  std::stringstream ss;
+  ss << name << ' ';
+  switch (order) {
+    case SortOrder::Ascending:
+      ss << "ASC";
+      break;
+    case SortOrder::Descending:
+      ss << "DESC";
+      break;
+  }
+  return ss.str();
+}
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kFilterOptionsType = GetFunctionOptionsType<FilterOptions>(
+    DataMember("null_selection_behavior", &FilterOptions::null_selection_behavior));
+static auto kTakeOptionsType = GetFunctionOptionsType<TakeOptions>(
+    DataMember("boundscheck", &TakeOptions::boundscheck));
+static auto kDictionaryEncodeOptionsType =
+    GetFunctionOptionsType<DictionaryEncodeOptions>(DataMember(
+        "null_encoding_behavior", &DictionaryEncodeOptions::null_encoding_behavior));
+static auto kArraySortOptionsType = GetFunctionOptionsType<ArraySortOptions>(
+    DataMember("order", &ArraySortOptions::order));
+static auto kSortOptionsType =
+    GetFunctionOptionsType<SortOptions>(DataMember("sort_keys", &SortOptions::sort_keys));
+static auto kPartitionNthOptionsType = GetFunctionOptionsType<PartitionNthOptions>(
+    DataMember("pivot", &PartitionNthOptions::pivot));
+}  // namespace
+}  // namespace internal
+
+FilterOptions::FilterOptions(NullSelectionBehavior null_selection)
+    : FunctionOptions(internal::kFilterOptionsType),
+      null_selection_behavior(null_selection) {}
+constexpr char FilterOptions::kTypeName[];
+
+TakeOptions::TakeOptions(bool boundscheck)
+    : FunctionOptions(internal::kTakeOptionsType), boundscheck(boundscheck) {}
+constexpr char TakeOptions::kTypeName[];
+
+DictionaryEncodeOptions::DictionaryEncodeOptions(NullEncodingBehavior null_encoding)
+    : FunctionOptions(internal::kDictionaryEncodeOptionsType),
+      null_encoding_behavior(null_encoding) {}
+constexpr char DictionaryEncodeOptions::kTypeName[];
+
+ArraySortOptions::ArraySortOptions(SortOrder order)
+    : FunctionOptions(internal::kArraySortOptionsType), order(order) {}
+constexpr char ArraySortOptions::kTypeName[];
+
+SortOptions::SortOptions(std::vector<SortKey> sort_keys)
+    : FunctionOptions(internal::kSortOptionsType), sort_keys(std::move(sort_keys)) {}
+constexpr char SortOptions::kTypeName[];
+
+PartitionNthOptions::PartitionNthOptions(int64_t pivot)
+    : FunctionOptions(internal::kPartitionNthOptionsType), pivot(pivot) {}
+constexpr char PartitionNthOptions::kTypeName[];
+
+namespace internal {
+void RegisterVectorOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTakeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
+}
+}  // namespace internal
+
 // ----------------------------------------------------------------------
 // Direct exec interface to kernels
 
@@ -46,6 +162,11 @@ Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
   return result.make_array();
 }
 
+Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
+                              const Datum& replacements, ExecContext* ctx) {
+  return CallFunction("replace_with_mask", {values, mask, replacements}, ctx);
+}
+
 Result<std::shared_ptr<Array>> SortIndices(const Array& values, SortOrder order,
                                            ExecContext* ctx) {
   ArraySortOptions options(order);
@@ -115,45 +236,6 @@ Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices,
 // ----------------------------------------------------------------------
 // Deprecated functions
 
-Result<std::shared_ptr<ChunkedArray>> Take(const ChunkedArray& values,
-                                           const Array& indices,
-                                           const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(values), Datum(indices), options, ctx));
-  return result.chunked_array();
-}
-
-Result<std::shared_ptr<ChunkedArray>> Take(const ChunkedArray& values,
-                                           const ChunkedArray& indices,
-                                           const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(values), Datum(indices), options, ctx));
-  return result.chunked_array();
-}
-
-Result<std::shared_ptr<ChunkedArray>> Take(const Array& values,
-                                           const ChunkedArray& indices,
-                                           const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(values), Datum(indices), options, ctx));
-  return result.chunked_array();
-}
-
-Result<std::shared_ptr<RecordBatch>> Take(const RecordBatch& batch, const Array& indices,
-                                          const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(batch), Datum(indices), options, ctx));
-  return result.record_batch();
-}
-
-Result<std::shared_ptr<Table>> Take(const Table& table, const Array& indices,
-                                    const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(table), Datum(indices), options, ctx));
-  return result.table();
-}
-
-Result<std::shared_ptr<Table>> Take(const Table& table, const ChunkedArray& indices,
-                                    const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(table), Datum(indices), options, ctx));
-  return result.table();
-}
-
 Result<std::shared_ptr<Array>> SortToIndices(const Array& values, ExecContext* ctx) {
   return SortIndices(values, SortOrder::Ascending, ctx);
 }
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index d67568e1567..2d9522b0732 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <memory>
+#include <utility>
 
 #include "arrow/compute/function.h"
 #include "arrow/datum.h"
@@ -32,7 +33,8 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-struct FilterOptions : public FunctionOptions {
+class ARROW_EXPORT FilterOptions : public FunctionOptions {
+ public:
   /// Configure the action taken when a slot of the selection mask is null
   enum NullSelectionBehavior {
     /// the corresponding filtered value will be removed in the output
@@ -41,30 +43,27 @@ struct FilterOptions : public FunctionOptions {
     EMIT_NULL,
   };
 
-  explicit FilterOptions(NullSelectionBehavior null_selection = DROP)
-      : null_selection_behavior(null_selection) {}
-
+  explicit FilterOptions(NullSelectionBehavior null_selection = DROP);
+  constexpr static char const kTypeName[] = "FilterOptions";
   static FilterOptions Defaults() { return FilterOptions(); }
 
   NullSelectionBehavior null_selection_behavior = DROP;
 };
 
-struct ARROW_EXPORT TakeOptions : public FunctionOptions {
-  explicit TakeOptions(bool boundscheck = true) : boundscheck(boundscheck) {}
-
-  bool boundscheck = true;
+class ARROW_EXPORT TakeOptions : public FunctionOptions {
+ public:
+  explicit TakeOptions(bool boundscheck = true);
+  constexpr static char const kTypeName[] = "TakeOptions";
   static TakeOptions BoundsCheck() { return TakeOptions(true); }
   static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
   static TakeOptions Defaults() { return BoundsCheck(); }
-};
 
-enum class SortOrder {
-  Ascending,
-  Descending,
+  bool boundscheck = true;
 };
 
 /// \brief Options for the dictionary encode function
-struct DictionaryEncodeOptions : public FunctionOptions {
+class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions {
+ public:
   /// Configure how null values will be encoded
   enum NullEncodingBehavior {
     /// the null value will be added to the dictionary with a proper index
@@ -73,18 +72,29 @@ struct DictionaryEncodeOptions : public FunctionOptions {
     MASK
   };
 
-  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK)
-      : null_encoding_behavior(null_encoding) {}
-
+  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK);
+  constexpr static char const kTypeName[] = "DictionaryEncodeOptions";
   static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }
 
   NullEncodingBehavior null_encoding_behavior = MASK;
 };
 
+enum class SortOrder {
+  Ascending,
+  Descending,
+};
+
 /// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
-struct ARROW_EXPORT SortKey {
+class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
+ public:
   explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
-      : name(name), order(order) {}
+      : name(std::move(name)), order(order) {}
+
+  using util::EqualityComparable<SortKey>::Equals;
+  using util::EqualityComparable<SortKey>::operator==;
+  using util::EqualityComparable<SortKey>::operator!=;
+  bool Equals(const SortKey& other) const;
+  std::string ToString() const;
 
   /// The name of the sort column.
   std::string name;
@@ -92,25 +102,30 @@ struct ARROW_EXPORT SortKey {
   SortOrder order;
 };
 
-struct ARROW_EXPORT ArraySortOptions : public FunctionOptions {
-  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending) : order(order) {}
-
+class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
+ public:
+  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending);
+  constexpr static char const kTypeName[] = "ArraySortOptions";
   static ArraySortOptions Defaults() { return ArraySortOptions{}; }
 
   SortOrder order;
 };
 
-struct ARROW_EXPORT SortOptions : public FunctionOptions {
-  explicit SortOptions(std::vector<SortKey> sort_keys = {}) : sort_keys(sort_keys) {}
-
+class ARROW_EXPORT SortOptions : public FunctionOptions {
+ public:
+  explicit SortOptions(std::vector<SortKey> sort_keys = {});
+  constexpr static char const kTypeName[] = "SortOptions";
   static SortOptions Defaults() { return SortOptions{}; }
 
   std::vector<SortKey> sort_keys;
 };
 
 /// \brief Partitioning options for NthToIndices
-struct ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
-  explicit PartitionNthOptions(int64_t pivot) : pivot(pivot) {}
+class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
+ public:
+  explicit PartitionNthOptions(int64_t pivot);
+  PartitionNthOptions() : PartitionNthOptions(0) {}
+  constexpr static char const kTypeName[] = "PartitionNthOptions";
 
   /// The index into the equivalent sorted array of the partition pivot element.
   int64_t pivot;
@@ -157,6 +172,23 @@ Result<std::shared_ptr<ArrayData>> GetTakeIndices(
 
 }  // namespace internal
 
+/// \brief ReplaceWithMask replaces each value in the array corresponding
+/// to a true value in the mask with the next element from `replacements`.
+///
+/// \param[in] values Array input to replace
+/// \param[in] mask Array or Scalar of Boolean mask values
+/// \param[in] replacements The replacement values to draw from. There must
+/// be as many replacement values as true values in the mask.
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
+                              const Datum& replacements, ExecContext* ctx = NULLPTR);
+
 /// \brief Take from an array of values at indices in another array
 ///
 /// The output array will be of the same type as the input values
@@ -334,42 +366,6 @@ Result<Datum> DictionaryEncode(
 // ----------------------------------------------------------------------
 // Deprecated functions
 
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<ChunkedArray>> Take(
-    const ChunkedArray& values, const Array& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<ChunkedArray>> Take(
-    const ChunkedArray& values, const ChunkedArray& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<ChunkedArray>> Take(
-    const Array& values, const ChunkedArray& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatch>> Take(
-    const RecordBatch& batch, const Array& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<Table>> Take(const Table& table, const Array& indices,
-                                    const TakeOptions& options = TakeOptions::Defaults(),
-                                    ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<Table>> Take(const Table& table, const ChunkedArray& indices,
-                                    const TakeOptions& options = TakeOptions::Defaults(),
-                                    ExecContext* context = NULLPTR);
-
 ARROW_DEPRECATED("Deprecated in 3.0.0. Use SortIndices()")
 ARROW_EXPORT
 Result<std::shared_ptr<Array>> SortToIndices(const Array& values,
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 8a091f2355d..4de68ba8d90 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -18,6 +18,7 @@
 #include "arrow/compute/cast.h"
 
 #include <mutex>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -26,10 +27,12 @@
 
 #include "arrow/compute/cast_internal.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
 
 namespace arrow {
 
@@ -38,6 +41,9 @@ using internal::ToTypeName;
 namespace compute {
 namespace internal {
 
+// ----------------------------------------------------------------------
+// Function options
+
 namespace {
 
 std::unordered_map<int, std::shared_ptr<CastFunction>> g_cast_table;
@@ -55,6 +61,7 @@ void InitCastTable() {
   AddCastFunctions(GetNestedCasts());
   AddCastFunctions(GetNumericCasts());
   AddCastFunctions(GetTemporalCasts());
+  AddCastFunctions(GetDictionaryCasts());
 }
 
 void EnsureInitCastTable() { std::call_once(cast_table_initialized, InitCastTable); }
@@ -116,14 +123,35 @@ class CastMetaFunction : public MetaFunction {
   }
 };
 
+static auto kCastOptionsType = GetFunctionOptionsType<CastOptions>(
+    arrow::internal::DataMember("to_type", &CastOptions::to_type),
+    arrow::internal::DataMember("allow_int_overflow", &CastOptions::allow_int_overflow),
+    arrow::internal::DataMember("allow_time_truncate", &CastOptions::allow_time_truncate),
+    arrow::internal::DataMember("allow_time_overflow", &CastOptions::allow_time_overflow),
+    arrow::internal::DataMember("allow_decimal_truncate",
+                                &CastOptions::allow_decimal_truncate),
+    arrow::internal::DataMember("allow_float_truncate",
+                                &CastOptions::allow_float_truncate),
+    arrow::internal::DataMember("allow_invalid_utf8", &CastOptions::allow_invalid_utf8));
 }  // namespace
 
 void RegisterScalarCast(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::make_shared<CastMetaFunction>()));
+  DCHECK_OK(registry->AddFunctionOptionsType(kCastOptionsType));
 }
-
 }  // namespace internal
 
+CastOptions::CastOptions(bool safe)
+    : FunctionOptions(internal::kCastOptionsType),
+      allow_int_overflow(!safe),
+      allow_time_truncate(!safe),
+      allow_time_overflow(!safe),
+      allow_decimal_truncate(!safe),
+      allow_float_truncate(!safe),
+      allow_invalid_utf8(!safe) {}
+
+constexpr char CastOptions::kTypeName[];
+
 CastFunction::CastFunction(std::string name, Type::type out_type_id)
     : ScalarFunction(std::move(name), Arity::Unary(), /*doc=*/nullptr),
       out_type_id_(out_type_id) {}
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 818f2ef9182..131f57f892f 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -41,15 +41,11 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-struct ARROW_EXPORT CastOptions : public FunctionOptions {
-  explicit CastOptions(bool safe = true)
-      : allow_int_overflow(!safe),
-        allow_time_truncate(!safe),
-        allow_time_overflow(!safe),
-        allow_decimal_truncate(!safe),
-        allow_float_truncate(!safe),
-        allow_invalid_utf8(!safe) {}
+class ARROW_EXPORT CastOptions : public FunctionOptions {
+ public:
+  explicit CastOptions(bool safe = true);
 
+  constexpr static char const kTypeName[] = "CastOptions";
   static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) {
     CastOptions safe(true);
     safe.to_type = std::move(to_type);
diff --git a/cpp/src/arrow/compute/cast_internal.h b/cpp/src/arrow/compute/cast_internal.h
index c152d10bd86..0105d08a573 100644
--- a/cpp/src/arrow/compute/cast_internal.h
+++ b/cpp/src/arrow/compute/cast_internal.h
@@ -36,6 +36,7 @@ std::vector<std::shared_ptr<CastFunction>> GetNumericCasts();
 std::vector<std::shared_ptr<CastFunction>> GetTemporalCasts();
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts();
 std::vector<std::shared_ptr<CastFunction>> GetNestedCasts();
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts();
 
 }  // namespace internal
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index c3187a3995a..7d6db9f58db 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -21,6 +21,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <sstream>
 #include <utility>
 #include <vector>
 
@@ -36,6 +37,7 @@
 #include "arrow/compute/registry.h"
 #include "arrow/compute/util_internal.h"
 #include "arrow/datum.h"
+#include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
@@ -69,6 +71,54 @@ ExecBatch::ExecBatch(const RecordBatch& batch)
   std::move(columns.begin(), columns.end(), values.begin());
 }
 
+bool ExecBatch::Equals(const ExecBatch& other) const {
+  return guarantee == other.guarantee && values == other.values;
+}
+
+void PrintTo(const ExecBatch& batch, std::ostream* os) {
+  *os << "ExecBatch\n";
+
+  static const std::string indent = "    ";
+
+  *os << indent << "# Rows: " << batch.length << "\n";
+  if (batch.guarantee != literal(true)) {
+    *os << indent << "Guarantee: " << batch.guarantee.ToString() << "\n";
+  }
+
+  int i = 0;
+  for (const Datum& value : batch.values) {
+    *os << indent << "" << i++ << ": ";
+
+    if (value.is_scalar()) {
+      *os << "Scalar[" << value.scalar()->ToString() << "]\n";
+      continue;
+    }
+
+    auto array = value.make_array();
+    PrettyPrintOptions options;
+    options.skip_new_lines = true;
+    *os << "Array";
+    ARROW_CHECK_OK(PrettyPrint(*array, options, os));
+    *os << "\n";
+  }
+}
+
+std::string ExecBatch::ToString() const {
+  std::stringstream ss;
+  PrintTo(*this, &ss);
+  return ss.str();
+}
+
+ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
+  ExecBatch out = *this;
+  for (auto& value : out.values) {
+    if (value.is_scalar()) continue;
+    value = value.array()->Slice(offset, length);
+  }
+  out.length = std::min(length, this->length - offset);
+  return out;
+}
+
 Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
   if (values.empty()) {
     return Status::Invalid("Cannot infer ExecBatch length without at least one value");
@@ -77,9 +127,6 @@ Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
   int64_t length = -1;
   for (const auto& value : values) {
     if (value.is_scalar()) {
-      if (length == -1) {
-        length = 1;
-      }
       continue;
     }
 
@@ -94,8 +141,29 @@ Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
     }
   }
 
+  if (length == -1) {
+    length = 1;
+  }
+
   return ExecBatch(std::move(values), length);
 }
+
+Result<std::shared_ptr<RecordBatch>> ExecBatch::ToRecordBatch(
+    std::shared_ptr<Schema> schema, MemoryPool* pool) const {
+  ArrayVector columns(schema->num_fields());
+
+  for (size_t i = 0; i < columns.size(); ++i) {
+    const Datum& value = values[i];
+    if (value.is_array()) {
+      columns[i] = value.make_array();
+      continue;
+    }
+    ARROW_ASSIGN_OR_RAISE(columns[i], MakeArrayFromScalar(*value.scalar(), length, pool));
+  }
+
+  return RecordBatch::Make(std::move(schema), length, std::move(columns));
+}
+
 namespace {
 
 Result<std::shared_ptr<Buffer>> AllocateDataBuffer(KernelContext* ctx, int64_t length,
@@ -106,7 +174,6 @@ Result<std::shared_ptr<Buffer>> AllocateDataBuffer(KernelContext* ctx, int64_t l
     int64_t buffer_size = BitUtil::BytesForBits(length * bit_width);
     return ctx->Allocate(buffer_size);
   }
-  return Status::OK();
 }
 
 struct BufferPreallocation {
@@ -269,7 +336,7 @@ struct NullGeneralization {
 
     // Do not count the bits if they haven't been counted already
     const int64_t known_null_count = arr.null_count.load();
-    if (known_null_count == 0) {
+    if ((known_null_count == 0) || (arr.buffers[0] == NULLPTR)) {
       return ALL_VALID;
     }
 
@@ -616,8 +683,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       }
     }
 
-    kernel_->exec(kernel_ctx_, batch, &out);
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out));
     if (!preallocate_contiguous_) {
       // If we are producing chunked output rather than one big array, then
       // emit each chunk as soon as it's available
@@ -704,6 +770,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     preallocate_contiguous_ =
         (exec_context()->preallocate_contiguous() && kernel_->can_write_into_slices &&
          validity_preallocated_ && !is_nested(output_descr_.type->id()) &&
+         !is_dictionary(output_descr_.type->id()) &&
          data_preallocated_.size() == static_cast<size_t>(output_num_buffers_ - 1) &&
          std::all_of(data_preallocated_.begin(), data_preallocated_.end(),
                      [](const BufferPreallocation& prealloc) {
@@ -793,8 +860,7 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
         output_descr_.shape == ValueDescr::ARRAY) {
       RETURN_NOT_OK(PropagateNulls(kernel_ctx_, batch, out.mutable_array()));
     }
-    kernel_->exec(kernel_ctx_, batch, &out);
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out));
     if (!kernel_->finalize) {
       // If there is no result finalizer (e.g. for hash-based functions, we can
       // emit the processed batch right away rather than waiting
@@ -809,8 +875,7 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
     if (kernel_->finalize) {
       // Intermediate results require post-processing after the execution is
       // completed (possibly involving some accumulated state)
-      kernel_->finalize(kernel_ctx_, &results_);
-      ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+      RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &results_));
       for (const auto& result : results_) {
         RETURN_NOT_OK(listener->OnResult(result));
       }
@@ -863,8 +928,7 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
     }
 
     Datum out;
-    kernel_->finalize(kernel_ctx_, &out);
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &out));
     RETURN_NOT_OK(listener->OnResult(std::move(out)));
     return Status::OK();
   }
@@ -878,24 +942,19 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
  private:
   Status Consume(const ExecBatch& batch) {
     // FIXME(ARROW-11840) don't merge *any* aggegates for every batch
-    auto batch_state = kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_});
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    ARROW_ASSIGN_OR_RAISE(
+        auto batch_state,
+        kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_}));
 
     if (batch_state == nullptr) {
-      kernel_ctx_->SetStatus(
-          Status::Invalid("ScalarAggregation requires non-null kernel state"));
-      return kernel_ctx_->status();
+      return Status::Invalid("ScalarAggregation requires non-null kernel state");
     }
 
     KernelContext batch_ctx(exec_context());
     batch_ctx.SetState(batch_state.get());
 
-    kernel_->consume(&batch_ctx, batch);
-    ARROW_CTX_RETURN_IF_ERROR(&batch_ctx);
-
-    kernel_->merge(kernel_ctx_, std::move(*batch_state), state());
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
-
+    RETURN_NOT_OK(kernel_->consume(&batch_ctx, batch));
+    RETURN_NOT_OK(kernel_->merge(kernel_ctx_, std::move(*batch_state), state()));
     return Status::OK();
   }
 
@@ -951,8 +1010,9 @@ std::unique_ptr<KernelExecutor> KernelExecutor::MakeScalarAggregate() {
 
 }  // namespace detail
 
-ExecContext::ExecContext(MemoryPool* pool, FunctionRegistry* func_registry)
-    : pool_(pool) {
+ExecContext::ExecContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
+                         FunctionRegistry* func_registry)
+    : pool_(pool), executor_(executor) {
   this->func_registry_ = func_registry == nullptr ? GetFunctionRegistry() : func_registry;
 }
 
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index 7659442d8bf..1b70ee244cb 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -28,11 +28,13 @@
 #include <vector>
 
 #include "arrow/array/data.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -44,7 +46,7 @@ class CpuInfo;
 
 namespace compute {
 
-struct FunctionOptions;
+class FunctionOptions;
 class FunctionRegistry;
 
 // It seems like 64K might be a good default chunksize to use for execution
@@ -59,6 +61,7 @@ class ARROW_EXPORT ExecContext {
  public:
   // If no function registry passed, the default is used.
   explicit ExecContext(MemoryPool* pool = default_memory_pool(),
+                       ::arrow::internal::Executor* executor = NULLPTR,
                        FunctionRegistry* func_registry = NULLPTR);
 
   /// \brief The MemoryPool used for allocations, default is
@@ -67,6 +70,9 @@ class ARROW_EXPORT ExecContext {
 
   ::arrow::internal::CpuInfo* cpu_info() const;
 
+  /// \brief An Executor which may be used to parallelize execution.
+  ::arrow::internal::Executor* executor() const { return executor_; }
+
   /// \brief The FunctionRegistry for looking up functions by name and
   /// selecting kernels for execution. Defaults to the library-global function
   /// registry provided by GetFunctionRegistry.
@@ -113,6 +119,7 @@ class ARROW_EXPORT ExecContext {
 
  private:
   MemoryPool* pool_;
+  ::arrow::internal::Executor* executor_;
   FunctionRegistry* func_registry_;
   int64_t exec_chunksize_ = std::numeric_limits<int64_t>::max();
   bool preallocate_contiguous_ = true;
@@ -175,6 +182,9 @@ struct ARROW_EXPORT ExecBatch {
 
   static Result<ExecBatch> Make(std::vector<Datum> values);
 
+  Result<std::shared_ptr<RecordBatch>> ToRecordBatch(
+      std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const;
+
   /// The values representing positional arguments to be passed to a kernel's
   /// exec function for processing.
   std::vector<Datum> values;
@@ -186,6 +196,9 @@ struct ARROW_EXPORT ExecBatch {
   /// ExecBatch::length is equal to the length of this array.
   std::shared_ptr<SelectionVector> selection_vector;
 
+  /// A predicate Expression guaranteed to evaluate to true for all rows in this batch.
+  Expression guarantee = literal(true);
+
   /// The semantic length of the ExecBatch. When the values are all scalars,
   /// the length should be set to 1, otherwise the length is taken from the
   /// array values, except when there is a selection vector. When there is a
@@ -203,9 +216,13 @@ struct ARROW_EXPORT ExecBatch {
     return values[i];
   }
 
+  bool Equals(const ExecBatch& other) const;
+
   /// \brief A convenience for the number of values / arguments.
   int num_values() const { return static_cast<int>(values.size()); }
 
+  ExecBatch Slice(int64_t offset, int64_t length) const;
+
   /// \brief A convenience for returning the ValueDescr objects (types and
   /// shapes) from the batch.
   std::vector<ValueDescr> GetDescriptors() const {
@@ -215,8 +232,15 @@ struct ARROW_EXPORT ExecBatch {
     }
     return result;
   }
+
+  std::string ToString() const;
+
+  ARROW_EXPORT friend void PrintTo(const ExecBatch&, std::ostream*);
 };
 
+inline bool operator==(const ExecBatch& l, const ExecBatch& r) { return l.Equals(r); }
+inline bool operator!=(const ExecBatch& l, const ExecBatch& r) { return !l.Equals(r); }
+
 /// \defgroup compute-call-function One-shot calls to compute functions
 ///
 /// @{
diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt
new file mode 100644
index 00000000000..2ed8b1c9480
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_install_all_headers("arrow/compute/exec")
+
+add_arrow_compute_test(expression_test
+                       PREFIX
+                       "arrow-compute"
+                       SOURCES
+                       expression_test.cc
+                       subtree_test.cc)
+
+add_arrow_compute_test(plan_test PREFIX "arrow-compute")
+
+add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/exec/doc/exec_node.md b/cpp/src/arrow/compute/exec/doc/exec_node.md
new file mode 100644
index 00000000000..797cc87d90a
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/doc/exec_node.md
@@ -0,0 +1,147 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# ExecNodes and logical operators
+
+`ExecNode`s are intended to implement individual logical operators
+in a streaming execution graph. Each node receives batches from
+upstream nodes (inputs), processes them in some way, then pushes
+results to downstream nodes (outputs). `ExecNode`s are owned and
+(to an extent) coordinated by an `ExecPlan`.
+
+> Terminology: "operator" and "node" are mostly interchangable, like
+> "Interface" and "Abstract Base Class" in c++ space. The latter is
+> a formal and specific bit of code which implements the abstract
+> concept.
+
+## Types of logical operators
+
+Each of these will have at least one corresponding concrete
+`ExecNode`. Where possible, compatible implementations of a
+logical operator will *not* be exposed as independent subclasses
+of `ExecNode`. Instead we prefer that they be
+be encapsulated internally by a single subclass of `ExecNode`
+to permit switching between them during a query.
+
+- Scan: materializes in-memory batches from storage (e.g. Parquet
+  files, flight stream, ...)
+- Filter: evaluates an `Expression` on each input batch and outputs
+  a copy with any rows excluded for which the filter did not return
+  `true`.
+- Project: evaluates `Expression`s on each input batch to produce
+  the columns of an output batch.
+- Grouped Aggregate: identify groups based on one or more key columns
+  in each input batch, then update aggregates corresponding to those
+  groups. Node that this is a pipeline breaker; it will wait for its
+  inputs to complete before outputting any batches.
+- Union: merge two or more streams of batches into a single stream
+  of batches.
+- Write: write each batch to storage
+- ToTable: Collect batches into a `Table` with stable row ordering where
+  possible.
+
+#### Not in scope for Arrow 5.0:
+
+- Join: perform an inner, left, outer, semi, or anti join given some
+  join predicates.
+- Sort: accumulate all input batches into a single table, reorder its
+  rows by some sorting condition, then stream the sorted table out as
+  batches
+- Top-K: retrieve a limited subset of rows from a table as though it
+  were in sorted order.
+
+For example: a dataset scan with only a filter and a
+projection will correspond to a fairly trivial graph:
+
+```
+ScanNode -> FilterNode -> ProjectNode -> ToTableNode
+```
+
+A scan node loads batches from disk and pushes to a filter node.
+The filter node excludes some rows based on an `Expression` then
+pushes filtered batches to a project node. The project node
+materializes new columns based on `Expression`s then pushes those
+batches to a table collection node. The table collection node
+assembles these batches into a `Table` which is handed off as the
+result of the `ExecPlan`.
+
+## Parallelism, pipelines
+
+The execution graph is orthogonal to parallelism; any
+node may push to any other node from any thread. A scan node causes
+each batch to arrive on a thread after which it will pass through
+each node in the example graph above, never leaving that thread
+(memory/other resource pressure permitting).
+
+The example graph above happens to be simple enough that processing
+of any batch by any node is independent of other nodes and other
+batches; it is a pipeline. Note that there is no explicit `Pipeline`
+class- pipelined execution is an emergent property of some sub
+graphs.
+
+Nodes which do not share this property (pipeline breakers) are
+responsible for deciding when they have received sufficient input,
+when they can start emitting output, etc. For example a `GroupByNode`
+will wait for its input to be exhausted before it begins pushing
+batches to its own outputs.
+
+Parallelism is "seeded" by `ScanNode` (or other source nodes)- it
+owns a reference to the thread pool on which the graph is executing
+and fans out pushing to its outputs across that pool. A subsequent
+`ProjectNode` will process the batch immediately after it is handed
+off by the `ScanNode`- no explicit scheduling required.
+Eventually, individual nodes may internally
+parallelize processing of individual batches (for example, if a
+`FilterNode`'s filter expression is slow). This decision is also left
+up to each `ExecNode` implementation.
+
+# ExecNode interface and usage
+
+`ExecNode`s are constructed using one of the available factory
+functions, such as `arrow::compute::MakeFilterNode`
+or `arrow::dataset::MakeScanNode`. Any inputs to an `ExecNode`
+must be provided when the node is constructed, so the first
+nodes to be constructed are source nodes with no inputs
+such as `ScanNode`.
+
+The batches yielded by an `ExecNode` always conform precisely
+to its output schema. NB: no by-name field lookups or type
+checks are performed during execution. The output schema
+is usually derived from the output schemas of inputs. For
+example a `FilterNode`'s output schema is always identical to
+that of its input since batches are only modified by exclusion
+of some rows.
+
+An `ExecNode` will begin producing batches when
+`node->StartProducing()` is invoked and will proceed until stopped
+with `node->StopProducing()`. Started nodes may not be destroyed
+until stopped. `ExecNode`s are not currently restartable.
+An `ExecNode` pushes batches to its outputs by passing each batch
+to `output->InputReceived()`. It signals exhaustion by invoking
+`output->InputFinished()`.
+
+Error recovery is permitted within a node. For example, if evaluation
+of an `Expression` runs out of memory the governing node may
+try that evaluation again after some memory has been freed up.
+If a node experiences an error from which it cannot recover (for
+example an IO error while parsing a CSV file) then it reports this
+with `output->ErrorReceived()`. An error which escapes the scope of
+a single node should not be considered recoverable (no `FilterNode`
+should `try/catch` the IO error above).
+
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_1.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_1.jpg
new file mode 100644
index 00000000000..814ad8a69f6
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_1.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_10.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_10.jpg
new file mode 100644
index 00000000000..7a75c96dfc5
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_10.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_11.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_11.jpg
new file mode 100644
index 00000000000..59bcc167ed2
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_11.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_2.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_2.jpg
new file mode 100644
index 00000000000..4484c57a81d
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_2.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_3.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_3.jpg
new file mode 100644
index 00000000000..afd33aba2e0
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_3.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_4.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_4.jpg
new file mode 100644
index 00000000000..f026aebe9a2
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_4.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_5.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_5.jpg
new file mode 100644
index 00000000000..8e1981b6571
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_5.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_6.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_6.jpg
new file mode 100644
index 00000000000..e976a461459
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_6.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_7.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_7.jpg
new file mode 100644
index 00000000000..7552d5af6af
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_7.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_8.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_8.jpg
new file mode 100644
index 00000000000..242f1305328
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_8.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_9.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_9.jpg
new file mode 100644
index 00000000000..4c064595c9a
Binary files /dev/null and b/cpp/src/arrow/compute/exec/doc/img/key_map_9.jpg differ
diff --git a/cpp/src/arrow/compute/exec/doc/key_map.md b/cpp/src/arrow/compute/exec/doc/key_map.md
new file mode 100644
index 00000000000..fdedc88c4d4
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/doc/key_map.md
@@ -0,0 +1,223 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Swiss Table
+
+A specialized hash table implementation used to dynamically map combinations of key field values to a dense set of integer ids. Ids can later be used in place of keys to identify groups of rows with equal keys.
+
+## Introduction
+
+Hash group-by in Arrow uses a variant of a hash table based on a data structure called Swiss table. Swiss table uses linear probing. There is an array of slots and the information related to inserted keys is stored in these slots. A hash function determines the slot where the search for a matching key will start during hash table lookup. Then the slots are visited sequentially, wrapping around the end of an array, until either a match or an empty slot is found, the latter case meaning that there is no match.  Swiss table organizes the slots in blocks of 8 and has a design that enables data level parallelism at the block level. More precisely, it allows for visiting all slots within a block at once during lookups, by simply using 64-bit arithmetic. SIMD instructions can further enhance this data level parallelism allowing to process multiple blocks related to multiple input keys together using SIMD vectors of 64-bit elements. Occupied slots within a block are always clustered together. The name Swiss table comes from likening resulting sequences of empty slots to holes in a one dimensional cheese.
+
+## Interface
+
+Hash table used in query processing for implementing join and group-by operators does not need to provide all of the operations that a general purpose hash table would. Simplified requirements can help achieve a simpler and more efficient design. For instance we do not need to be able to remove previously inserted keys. It’s an append-only data structure: new keys can be added but old keys are never erased. Also, only a single copy of each key can be inserted - it is like `std::map` in that sense and not `std::multimap`.
+
+Our Swiss table is fully vectorized. That means that all methods work on vectors of input keys processing them in batches. Specialized SIMD implementations of processing functions are almost always provided for performance critical operations. All callback interfaces used from the core hash table code are also designed to work on batches of inputs instead of individual keys. The batch size can be almost arbitrary and is selected by the client of the hash table. Batch size should be the smallest number of input items, big enough so that the benefits of vectorization and SIMD can be fully experienced. Keeping it small means less memory used for temporary arrays storing intermediate results of computation (vector equivalent of some temporary variables kept on the stack). That in turn means smaller space in CPU caches, which also means less impact on other memory access intensive operations. We pick 1024 as the default size of the batch. We will call it a **mini-batch** to distinguish it from potentially other forms of batches used at higher levels in the code, e.g. when scheduling work for worker threads or relational operators inside an analytic query.
+
+The main functionality provided by Swiss table is mapping of arbitrarily complex keys to unique integer ids. Let us call it **lookup-or-insert**. Given a sequence of key values, return a corresponding sequence of integer ids, such that all keys that are equal receive the same id and for K distinct keys the integer ids will be assigned from the set of numbers 0 to (K-1). If we find a matching key in a hash table for a given input, we return the **key id** assigned when the key was first inserted into a hash table. If we fail to find an already inserted match, we assign the first unused integer as a key id and add a new entry to a hash table. Due to vectorized processing, which may result in out-of-order processing of individual inputs, it is not guaranteed that if there are two new key values in the same input batch and one of them appears earlier in the input sequence, then it will receive a smaller key id. Additional mapping functionality can be built on top of basic mapping to integer key id, for instance if we want to assign and perhaps keep updating some values to all unique keys, we can keep these values in a resizable vector indexed by obtained key id.
+
+The implementation of Swiss table does not need to have any information related to the domain of the keys. It does not use their logical data type or information about their physical representation and does not even use pointers to keys. All access to keys is delegated to a separate class or classes that provide callback functions for three operations:
+-   computing hashes of keys;
+-   checking equality for given pairs of keys;
+-   appending a given sequence of keys to a stack maintained outside of Swiss table object, so that they can be referenced later on by key ids (key ids will be equal to their positions in the stack).
+
+
+When passing arguments to callback functions the keys are referenced using integer ids. For the left side - that is the keys present in the input mini-batch - ordinal positions within that mini-batch are used. For the right side - that is the keys inserted into the hash table - these are identified by key ids assigned to them and stored inside Swiss table when they were first encountered and processed.
+
+Diagram with logical view of information passing in callbacks:
+
+![alt text](img/key_map_1.jpg)
+
+Hash table values for inserted keys are also stored inside Swiss table. Because of that, hash table logic does not need to ever re-evaluate the hash, and there is actually no need for a hash function callback. It is enough that the caller provides hash values for all entries in the batch when calling lookup-or-insert.
+
+## Basic architecture and organization of data
+The hash table is an array of **slots**. Slots are grouped in groups of 8 called **blocks**. The number of blocks is a power of 2. The empty hash table starts with a single block, with all slots empty. Then, as the keys are getting inserted and the amount of empty slots is shrinking, at some point resizing of the hash table is triggered. The data stored in slots is moved to a new hash table that has the double of the number of blocks.
+
+The diagram below shows the basic organization of data in our implementation of Swiss table:
+
+![alt text](img/key_map_2.jpg)
+
+N is the log of the number of blocks, 2<sup>n+3</sup> is  the number of slots and also the maximum number of inserted keys and hence (N + 3) is the number of bits required to store a key id. We will refer to N as the **size of the hash table**.
+
+Index of a block within an array will be called **block id**, and similarly index of a slot will be **slot id**. Sometimes we will focus on a single block and refer to slots that belong to it by using a **local slot id**, which is an index from 0 to 7.
+
+Every slot can either be **empty** or store data related to a single inserted key. There are three pieces of information stored inside a slot:
+- status byte,
+- key id,
+- key hash.
+
+Status byte, as the name suggests, stores 8 bits. The highest bit indicates if the slot is empty (the highest bit is set) or corresponds to one of inserted keys (the highest bit is zero). The remaining 7 bits contain 7 bits of key hash that we call a **stamp**. The stamp is used to eliminate some false positives when searching for a matching key for a given input. Slot also stores **key id**, which is a non-negative integer smaller than the number of inserted keys, that is used as a reference to the actual inserted key. The last piece of information related to an inserted key is its **hash** value. We store hashes for all keys, so that they never need to be re-computed. That greatly simplifies some operations, like resizing of a hash table, that may not even need to look at the keys at all. For an empty slot, the status byte is 0x80, key id is zero and the hash is not used and can be set to any number.
+
+A single block contains 8 slots and can be viewed as a micro-stack of up to 8 inserted keys. When the first key is inserted into an empty block, it will occupy a slot with local id 0. The second inserted key will go into slot number 1 and so on. We use N highest bits of hash to get an index of a **start block**, when searching for a match or an empty slot to insert a previously not seen key when that is the case. If the start block contains any empty slots, then the search for either a match or place to insert a key will end at that block. We will call such a block an **open block**. A block that is not open is a full block. In the case of full block, the input key related search may continue in the next block module the number of blocks. If the key is not inserted into its start block, we will refer to it as an **overflow** entry, other entries being **non-overflow**. Overflow entries are slower to process, since they require visiting more than one block, so we want to keep their percentage low. This is done by choosing the right **load factor** (percentage of occupied slots in the hash table) at which the hash table gets resized and the number of blocks gets doubled. By tuning this value we can control the probability of encountering an overflow entry.
+
+The most interesting part of each block is the set of status bytes of its slots, which is simply a single 64-bit word. The implementation of efficient searches across these bytes during lookups require using either leading zero count or trailing zero count intrinsic. Since there are cases when only the first one is available, in order to take advantage of it, we order the bytes in the 64-bit status word so that the first slot within a block uses the highest byte and the last one uses the lowest byte (slots are in reversed bytes order). The diagram below shows how the information about slots is stored within a 64-bit status word:
+
+![alt text](img/key_map_3.jpg)
+
+Each status byte has a 7-bit fragment of hash value - a **stamp** - and an empty slot bit. Empty slots have status byte equal to 0x80 - the highest bit is set to 1 to indicate an empty slot and the lowest bits, which are used by a stamp, are set to zero.
+
+The diagram below shows which bits of hash value are used by hash table:
+
+![alt text](img/key_map_4.jpg)
+
+If a hash table has 2<sup>N</sup> blocks, then we use N highest bits of a hash to select a start block when searching for a match. The next 7 bits are used as a stamp. Using the highest bits to pick a start block means that a range of hash values can be easily mapped to a range of block ids of start blocks for hashes in that range. This is useful when resizing a hash table or merging two hash tables together.
+
+### Interleaving status bytes and key ids
+
+Status bytes and key ids for all slots are stored in a single array of bytes. They are first grouped by 8 into blocks, then each block of status bytes is interleaved with a corresponding block of key ids. Finally key ids are represented using the smallest possible number of bits and bit-packed (bits representing each next key id start right after the last bit of the previous key id). Note that regardless of the chosen number of bits, a block of bit-packed key ids (that is 8 of them) will start and end on the byte boundary.
+
+The diagram below shows the organization of bytes and bits of a single block in interleaved array:
+![alt text](img/key_map_5.jpg)
+
+From the size of the hash table we can derive the number K of bits needed in the worst case to encode any key id. K is equal to the number of bits needed to represent slot id (number of keys is not greater than the number of slots and any key id is strictly less than the number of keys), which for a hash table of size N (N blocks) equals (N+3). To simplify bit packing and unpacking and avoid handling of special cases, we will round up K to full bytes for K > 24 bits.
+
+Status bytes are stored in a single 64-bit word in reverse byte order (the last byte corresponds to the slot with local id 0). On the other hand key ids are stored in the normal order (the order of slot ids).
+
+Since both status byte and key id for a given slot are stored in the same array close to each other, we can expect that most of the lookups will read only one CPU cache-line from memory inside Swiss table code (then at least another one outside Swiss table to access the bytes of the key for the purpose of comparison). Even if we hit an overflow entry, it is still likely to reside on the same cache-line as the start block data. Hash values, which are stored separately from status byte and key id, are only used when resizing and do not impact the lookups outside these events.
+
+> Improvement to consider:
+> In addition to the Swiss table data, we need to store an array of inserted keys, one for each key id. If keys are of fixed length, then the address of the bytes of the key can be calculated by multiplying key id by the common length of the key. If keys are of varying length, then there will be an additional array with an offset of each key within the array of concatenated bytes of keys. That means that any key comparison during lookup will involve 3 arrays: one to get key id, one to get key offset and final one with bytes of the key. This could be reduced to 2 array lookups if we stored key offset instead of key id interleaved with slot status bytes. Offset indexed by key id and stored in its own array becomes offset indexed by slot id and stored interleaved with slot status bytes. At the same time key id indexed by slot id and interleaved with slot status bytes before becomes key id referenced using offset and stored with key bytes. There may be a slight increase in the total size of memory needed by the hash table, equal to the difference in the number of bits used to store offset and those used to store key id, multiplied by the number of slots, but that should be a small fraction of the total size.
+
+### 32-bit hash vs 64-bit hash
+
+Currently we use 32-bit hash values in Swiss table code and 32-bit integers as key ids. For the robust implementation, sooner or later we will need to support 64-bit hash and 64-bit key ids. When we use 32-bit hash, it means that we run out of hash bits when hash table size N is greater than 25 (25 bits of hash needed to select a block and 7 bits needed to generate a stamp byte reach 32 total bits). When the number of inserted keys exceeds the maximal number of keys stored in a hash table of size 25 (which is at least 2<sup>24</sup>), the chance of false positives during lookups will start quickly growing. 32-bit hash should not be used with more than about 16 million inserted keys.
+
+### Low memory footprint and low chance of hash collisions
+
+Swiss table is a good choice of a hash table for modern hardware, because it combines lookups that can take advantage of special CPU instructions with space efficiency and low chance of hash collisions.
+
+Space efficiency is important for performance, because the cost of random array accesses, often dominating the lookup cost for larger hash tables, increases with the size of the arrays. This happens due to limited space of CPU caches. Let us look at what is the amortized additional storage cost for a key in a hash table apart from the essential cost of storing data of all those keys. Furthermore, we can skip the storage of hash values, since these are only used during infrequent hash table resize operations (should not have a big impact on CPU cache usage in normal cases).
+
+Half full hash table of size N will use 2 status bytes per inserted key (because for every filled slot there is one empty slot) and 2\*(N+3) bits for key id (again, one for the occupied slot and one for the empty). For N = 16 for instance this is slightly under 7 bytes per inserted key.
+
+Swiss table also has a low probability of false positives leading to wasted key comparisons. Here is some rationale behind why this should be the case. Hash table of size N can contain up to 2<sup>N+3</sup> keys. Search for a match involves (N + 7) hash bits: N to select a start block and 7 to use as a stamp. There are always at least 16 times more combinations of used hash bits than there are keys in the hash table (32 times more if the hash table is half full). These numbers mean that the probability of false positives resulting from a search for a matching slot should be low. That corresponds to an expected number of comparisons per lookup being close to 1 for keys already present and 0 for new keys.
+
+## Lookup
+
+Lookup-or-insert operation, given a hash of a key, finds a list of candidate slots with corresponding keys that are likely to be equal to the input key. The list may be empty, which means that the key does not exist yet in the hash table. If it is not empty, then the callback function for key comparison is called for each next candidate to verify that there is indeed a match. False positives get rejected and we end up either finding an actual match or an empty slot, which means that the key is new to the hash table. New keys get assigned next available integers as key ids, and are appended to the set of keys stored in the hash table. As a result of inserting new keys to the hash table, the density of occupied slots may reach an upper limit, at which point the hash table will be resized and will afterwards have twice as many slots. That is in summary lookup-or-insert functionality, but the actual implementation is a bit more involved, because of vectorization of the processing and various optimizations for common cases.
+
+### Search within a single block
+
+There are three possible cases that can occur when searching for a match for a given key (that is, for a given stamp of a key) within a single block, illustrated below.
+
+ 1. There is a matching stamp in the block of status bytes:
+
+![alt text](img/key_map_6.jpg)
+
+ 2. There is no matching stamp in the block, but there is an empty slot in the block: 
+
+![alt text](img/key_map_7.jpg)
+
+ 3. There is no matching stamp in the block and the block is full (there are no empty slots left): 
+
+![alt text](img/key_map_8.jpg)
+
+64-bit arithmetic can be used to search for a matching slot within the entire single block at once, without iterating over all slots in it. Following is an example of a sequence of steps to find the first status byte for a given stamp, returning the first empty slot on miss if the block is not full or 8 (one past maximum local slot id) otherwise.
+
+Following is a sketch of the possible steps to execute when searching for the matching stamp in a single block. 
+
+*Example will use input stamp 0x5E and a 64-bit status bytes word with one empty slot:  
+0x 4B17 5E3A 5E2B 1180*.
+
+1. [1 instruction] Replicate stamp to all bytes by multiplying it by 0x 0101 0101 0101 0101.  
+
+	*We obtain: 0x 5E5E 5E5E 5E5E 5E5E.*
+
+2. [1 instruction] XOR replicated stamp with status bytes word. Bytes corresponding to a matching stamp will be 0, bytes corresponding to empty slots will have a value between 128 and 255, bytes corresponding to non-matching non-empty slots will have a value between 1 and 127.
+
+	*We obtain: 0x 1549 0064 0075 4FDE.*
+
+3. [2 instructions] In the next step we want to have information about a match in the highest bit of each byte. We can ignore here empty slot bytes, because they will be taken care of at a later step. Set the highest bit in each byte (OR with 0x 8080 8080 8080 8080) and then subtract 1 from each byte (subtract 0x 0101 0101 0101 0101 from 64-bit word). Now if a byte corresponds to a non-empty slot then the highest bit 0 indicates a match and 1 indicates a miss.
+
+	*We obtain: 0x 95C9 80E4 80F5 CFDE, 
+	then 0x 94C8 7FE3 7FF4 CEDD.*
+
+4. [3 instructions] In the next step we want to obtain in each byte one of two values: 0x80 if it is either an empty slot or a match, 0x00 otherwise. We do it in three steps: NOT the result of the previous step to change the meaning of the highest bit; OR with the original status word to set highest bit in a byte to 1 for empty slots; mask out everything other than the highest bits in all bytes (AND with 0x 8080 8080 8080 8080).
+
+	*We obtain: 6B37 801C 800B 3122,  
+	then 6B37 DE3E DE2B 31A2,  
+	finally 0x0000 8000 8000 0080.*
+
+5. [2 instructions] Finally, use leading zero bits count and divide it by 8 to find an index of the last byte that corresponds either to a match or an empty slot. If the leading zero count intrinsic returns 64 for a 64-bit input zero, then after dividing by 8 we will also get the desired answer in case of a full block without any matches.
+
+	*We obtain: 16,  
+	then 2 (index of the first slot within the block that matches the stamp).*
+
+If SIMD instructions with 64-bit lanes are available, multiple single block searches for different keys can be executed together. For instance AVX2 instruction set allows to process quadruplets of 64-bit values in a single instruction, four searches at once.
+
+### Complete search potentially across multiple blocks
+
+Full implementation of a search for a matching key may involve visiting multiple blocks beginning with the start block selected based on the hash of the key. We move to the next block modulo the number of blocks, whenever we do not find a match in the current block and the current block is full. The search may also involve visiting one or more slots in each block. Visiting in this case means calling a comparison callback to verify the match whenever a slot with a matching stamp is encountered. Eventually the search stops when either:  
+- the matching key is found in one of the slots matching the stamp, or
+
+- an empty slot is reached. This is illustrated in the diagram below:
+![alt text](img/key_map_9.jpg)
+
+
+### Optimistic processing with two passes
+
+Hash table lookups may have high cost in the pessimistic case, when we encounter cases of hash collisions and full blocks that lead to visiting further blocks. In the majority of cases we can expect an optimistic situation - the start block is not full, so we will only visit this one block, and all stamps in the block are different, so we will need at most one comparison to find a match. We can expect about 90% of the key lookups for an existing key to go through the optimistic path of processing. For that reason it pays off to optimize especially for this 90% of inputs.
+
+Lookups in Swiss table are split into two passes over an input batch of keys. The **first pass:  fast-path lookup** , is a highly optimized, vectorized, SIMD-friendly, branch-free code that fully handles optimistic cases. The **second pass: slow-path lookup** , is normally executed only for the selection of inputs that have not been finished in the first pass, although it can also be called directly on all of the inputs, skipping fast-path lookup. It handles all special cases and inserts but in order to be robust it is not as efficient as fast-path. Slow-path lookup does not need to repeat the work done in fast-path lookup - it can use the state reached at the end of fast-path lookup as a starting point.
+
+Fast-path lookup implements search only for the first stamp match and only within the start block. It only makes sense when we already have at least one key inserted into the hash table, since it does not handle inserts. It takes a vector of key hashes as an input and based on it outputs three pieces of information for each key:
+
+- Key id corresponding to the slot in which a matching stamp was found. Any valid key id if a matching stamp was not found.
+- A flag indicating if a match was found or not.  
+- Slot id of a slot from which slow-path should pick up the search if the first match was either not found or it turns out to be false positive after evaluating key comparison.
+
+> Improvement to consider: 
+> precomputing 1st pass lookup results.
+> 
+> If the hash table is small, the number of inserted keys is small, we could further simplify and speed-up the first pass by storing in a lookup table pre-computed results for all combinations of hash bits. Let us consider the case of Swiss table of size 5 that has 256 slots and up to 128 inserted keys. Only 12 bits of hash are used by lookup in that case: 5 to select a block, 7 to create a stamp. For all 2<sup>12</sup> combinations of those bits we could keep the result of first pass lookup in an array. Key id and a match indicating flag can use one byte: 7 bits for key id and 1 bit for the flag. Note that slot id is only needed if we go into 2nd pass lookup, so it can be stored separately and likely only accessed by a small subset of keys. Fast-path lookup becomes almost a single fetch of result from a 4KB array. Lookup arrays used to implement this need to be kept in sync with the main copy of data about slots, which requires extra care during inserts. Since the number of entries in lookup arrays is much higher than the number of slots, this technique only makes sense for small hash tables.
+
+### Dense comparisons
+
+If there is at least one key inserted into a hash table, then every slot contains a key id value that corresponds to some actual key that can be used in comparison. That is because empty slots are initialized with 0 as their key id. After the fast-path lookup we get a match-found flag for each input. If it is set, then we need to run a comparison of the input key with the key in the hash table identified by key id returned by fast-path code. The comparison will verify that there is a true match between the keys. We only need to do this for a subset of inputs that have a match candidate, but since we have key id values corresponding to some real key for all inputs, we may as well execute comparisons on all inputs unconditionally. If the majority (e.g. more than 80%) of the keys have a match candidate, the cost of evaluating comparison for the remaining fraction of keys but without filtering may actually be cheaper than the cost of running evaluation only for required keys while referencing filter information. This can be seen as a variant of general preconditioning techniques used to avoid diverging conditional branches in the code. It may be used, based on some heuristic, to verify matches reported by fast-path lookups and is referred to as **dense comparisons**.
+
+## Resizing
+
+New hash table is initialized as empty and has only a single block with a space for only a few key entries. Doubling of the hash table size becomes necessary as more keys get inserted. It is invoked during the 2nd pass of the lookups, which also handles inserts. It happens immediately after the number of inserted keys reaches a specific upper limit decided based on a current size of the hash table. There may still be unprocessed entries from the input mini-batch after resizing, so the 2nd pass of the lookup is restarted right after, with the bigger hash table and the remaining subset of unprocessed entries.
+
+Current policy, that should work reasonably well, is to resize a small hash table (up to 8KB) when it is 50% full. Larger hash tables are resized when 75% full. We want to keep size in memory as small as possible, while maintaining a low probability of blocks becoming full.
+  
+When discussing resizing we will be talking about **resize source** and **resize target** tables. The diagram below shows how the same hash bits are interpreted differently by the source and the target.
+
+![alt text](img/key_map_10.jpg)
+
+For a given hash, if a start block id was L in the source table, it will be either (2\*L+0) or (2\*L+1) in the target table. Based on that we can expect data access locality when migrating the data between the tables.
+
+Resizing is cheap also thanks to the fact that hash values for keys in the hash table are kept together with other slot data and do not need to be recomputed. That means that resizing procedure does not ever need to access the actual bytes of the key.
+
+### 1st pass
+
+Based on the hash value for a given slot we can tell whether this slot contains an overflow or non-overflow entry. In the first pass we go over all source slots in sequence, filter out overflow entries and move to the target table all other entries. Non-overflow entries from a block L will be distributed between blocks (2\*L+0) and (2\*L+1) of the target table. None of these target blocks can overflow, since they will be accommodating at most 8 input entries during this pass.
+
+For every non-overflow entry, the highest bit of a stamp in the source slot decides whether it will go to the left or to the right target block. It is further possible to avoid any conditional branches in this partitioning code, so that the result is friendly to the CPU execution pipeline.
+
+![alt text](img/key_map_11.jpg)
+
+
+### 2nd pass
+
+In the second pass of resizing, we scan all source slots again, this time focusing only on the overflow entries that were all skipped in the 1st pass. We simply reinsert them in the target table using generic insertion code with one exception. Since we know that all the source keys are different, there is no need to search for a matching stamp or run key comparisons (or look at the key values). We just need to find the first open block beginning with the start block in the target table and use its first empty slot as the insert destination.
+
+We expect overflow entries to be rare and therefore the relative cost of that pass should stay low.
+
diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
new file mode 100644
index 00000000000..4a4758c8471
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -0,0 +1,1312 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/exec_plan.h"
+
+#include <mutex>
+#include <thread>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "arrow/array/concatenate.h"
+#include "arrow/array/util.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/expression.h"
+#include "arrow/compute/exec_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+#include "arrow/util/unreachable.h"
+#include "arrow/util/vector.h"
+
+namespace arrow {
+
+using BitUtil::CountLeadingZeros;
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+namespace compute {
+
+namespace {
+
+struct ExecPlanImpl : public ExecPlan {
+  explicit ExecPlanImpl(ExecContext* exec_context) : ExecPlan(exec_context) {}
+
+  ~ExecPlanImpl() override {
+    if (started_ && !finished_.is_finished()) {
+      ARROW_LOG(WARNING) << "Plan was destroyed before finishing";
+      StopProducing();
+      finished().Wait();
+    }
+  }
+
+  ExecNode* AddNode(std::unique_ptr<ExecNode> node) {
+    if (node->num_inputs() == 0) {
+      sources_.push_back(node.get());
+    }
+    if (node->num_outputs() == 0) {
+      sinks_.push_back(node.get());
+    }
+    nodes_.push_back(std::move(node));
+    return nodes_.back().get();
+  }
+
+  Status Validate() const {
+    if (nodes_.empty()) {
+      return Status::Invalid("ExecPlan has no node");
+    }
+    for (const auto& node : nodes_) {
+      RETURN_NOT_OK(node->Validate());
+    }
+    return Status::OK();
+  }
+
+  Status StartProducing() {
+    if (started_) {
+      return Status::Invalid("restarted ExecPlan");
+    }
+    started_ = true;
+
+    // producers precede consumers
+    sorted_nodes_ = TopoSort();
+
+    std::vector<Future<>> futures;
+
+    Status st = Status::OK();
+
+    using rev_it = std::reverse_iterator<NodeVector::iterator>;
+    for (rev_it it(sorted_nodes_.end()), end(sorted_nodes_.begin()); it != end; ++it) {
+      auto node = *it;
+
+      st = node->StartProducing();
+      if (!st.ok()) {
+        // Stop nodes that successfully started, in reverse order
+        stopped_ = true;
+        StopProducingImpl(it.base(), sorted_nodes_.end());
+        break;
+      }
+
+      futures.push_back(node->finished());
+    }
+
+    finished_ = AllComplete(std::move(futures));
+    return st;
+  }
+
+  void StopProducing() {
+    DCHECK(started_) << "stopped an ExecPlan which never started";
+    stopped_ = true;
+
+    StopProducingImpl(sorted_nodes_.begin(), sorted_nodes_.end());
+  }
+
+  template <typename It>
+  void StopProducingImpl(It begin, It end) {
+    for (auto it = begin; it != end; ++it) {
+      auto node = *it;
+      node->StopProducing();
+    }
+  }
+
+  NodeVector TopoSort() {
+    struct Impl {
+      const std::vector<std::unique_ptr<ExecNode>>& nodes;
+      std::unordered_set<ExecNode*> visited;
+      NodeVector sorted;
+
+      explicit Impl(const std::vector<std::unique_ptr<ExecNode>>& nodes) : nodes(nodes) {
+        visited.reserve(nodes.size());
+        sorted.resize(nodes.size());
+
+        for (const auto& node : nodes) {
+          Visit(node.get());
+        }
+
+        DCHECK_EQ(visited.size(), nodes.size());
+      }
+
+      void Visit(ExecNode* node) {
+        if (visited.count(node) != 0) return;
+
+        for (auto input : node->inputs()) {
+          // Ensure that producers are inserted before this consumer
+          Visit(input);
+        }
+
+        sorted[visited.size()] = node;
+        visited.insert(node);
+      }
+    };
+
+    return std::move(Impl{nodes_}.sorted);
+  }
+
+  Future<> finished_ = Future<>::MakeFinished();
+  bool started_ = false, stopped_ = false;
+  std::vector<std::unique_ptr<ExecNode>> nodes_;
+  NodeVector sources_, sinks_;
+  NodeVector sorted_nodes_;
+};
+
+ExecPlanImpl* ToDerived(ExecPlan* ptr) { return checked_cast<ExecPlanImpl*>(ptr); }
+
+const ExecPlanImpl* ToDerived(const ExecPlan* ptr) {
+  return checked_cast<const ExecPlanImpl*>(ptr);
+}
+
+util::optional<int> GetNodeIndex(const std::vector<ExecNode*>& nodes,
+                                 const ExecNode* node) {
+  for (int i = 0; i < static_cast<int>(nodes.size()); ++i) {
+    if (nodes[i] == node) return i;
+  }
+  return util::nullopt;
+}
+
+}  // namespace
+
+Result<std::shared_ptr<ExecPlan>> ExecPlan::Make(ExecContext* ctx) {
+  return std::shared_ptr<ExecPlan>(new ExecPlanImpl{ctx});
+}
+
+ExecNode* ExecPlan::AddNode(std::unique_ptr<ExecNode> node) {
+  return ToDerived(this)->AddNode(std::move(node));
+}
+
+const ExecPlan::NodeVector& ExecPlan::sources() const {
+  return ToDerived(this)->sources_;
+}
+
+const ExecPlan::NodeVector& ExecPlan::sinks() const { return ToDerived(this)->sinks_; }
+
+Status ExecPlan::Validate() { return ToDerived(this)->Validate(); }
+
+Status ExecPlan::StartProducing() { return ToDerived(this)->StartProducing(); }
+
+void ExecPlan::StopProducing() { ToDerived(this)->StopProducing(); }
+
+Future<> ExecPlan::finished() { return ToDerived(this)->finished_; }
+
+ExecNode::ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
+                   std::vector<std::string> input_labels,
+                   std::shared_ptr<Schema> output_schema, int num_outputs)
+    : plan_(plan),
+      label_(std::move(label)),
+      inputs_(std::move(inputs)),
+      input_labels_(std::move(input_labels)),
+      output_schema_(std::move(output_schema)),
+      num_outputs_(num_outputs) {
+  for (auto input : inputs_) {
+    input->outputs_.push_back(this);
+  }
+}
+
+Status ExecNode::Validate() const {
+  if (inputs_.size() != input_labels_.size()) {
+    return Status::Invalid("Invalid number of inputs for '", label(), "' (expected ",
+                           num_inputs(), ", actual ", input_labels_.size(), ")");
+  }
+
+  if (static_cast<int>(outputs_.size()) != num_outputs_) {
+    return Status::Invalid("Invalid number of outputs for '", label(), "' (expected ",
+                           num_outputs(), ", actual ", outputs_.size(), ")");
+  }
+
+  for (auto out : outputs_) {
+    auto input_index = GetNodeIndex(out->inputs(), this);
+    if (!input_index) {
+      return Status::Invalid("Node '", label(), "' outputs to node '", out->label(),
+                             "' but is not listed as an input.");
+    }
+  }
+
+  return Status::OK();
+}
+
+bool ExecNode::ErrorIfNotOk(Status status) {
+  if (status.ok()) return false;
+
+  for (auto out : outputs_) {
+    out->ErrorReceived(this, out == outputs_.back() ? std::move(status) : status);
+  }
+  return true;
+}
+
+struct SourceNode : ExecNode {
+  SourceNode(ExecPlan* plan, std::string label, std::shared_ptr<Schema> output_schema,
+             AsyncGenerator<util::optional<ExecBatch>> generator)
+      : ExecNode(plan, std::move(label), {}, {}, std::move(output_schema),
+                 /*num_outputs=*/1),
+        generator_(std::move(generator)) {}
+
+  const char* kind_name() override { return "SourceNode"; }
+
+  [[noreturn]] static void NoInputs() {
+    Unreachable("no inputs; this should never be called");
+  }
+  [[noreturn]] void InputReceived(ExecNode*, int, ExecBatch) override { NoInputs(); }
+  [[noreturn]] void ErrorReceived(ExecNode*, Status) override { NoInputs(); }
+  [[noreturn]] void InputFinished(ExecNode*, int) override { NoInputs(); }
+
+  Status StartProducing() override {
+    DCHECK(!stop_requested_) << "Restarted SourceNode";
+
+    CallbackOptions options;
+    if (auto executor = plan()->exec_context()->executor()) {
+      // These options will transfer execution to the desired Executor if necessary.
+      // This can happen for in-memory scans where batches didn't require
+      // any CPU work to decode. Otherwise, parsing etc should have already
+      // been placed us on the desired Executor and no queues will be pushed to.
+      options.executor = executor;
+      options.should_schedule = ShouldSchedule::IfDifferentExecutor;
+    }
+
+    finished_ = Loop([this, options] {
+                  std::unique_lock<std::mutex> lock(mutex_);
+                  int seq = batch_count_++;
+                  if (stop_requested_) {
+                    return Future<ControlFlow<int>>::MakeFinished(Break(seq));
+                  }
+                  lock.unlock();
+
+                  return generator_().Then(
+                      [=](const util::optional<ExecBatch>& batch) -> ControlFlow<int> {
+                        std::unique_lock<std::mutex> lock(mutex_);
+                        if (IsIterationEnd(batch) || stop_requested_) {
+                          stop_requested_ = true;
+                          return Break(seq);
+                        }
+                        lock.unlock();
+
+                        outputs_[0]->InputReceived(this, seq, *batch);
+                        return Continue();
+                      },
+                      [=](const Status& error) -> ControlFlow<int> {
+                        // NB: ErrorReceived is independent of InputFinished, but
+                        // ErrorReceived will usually prompt StopProducing which will
+                        // prompt InputFinished. ErrorReceived may still be called from a
+                        // node which was requested to stop (indeed, the request to stop
+                        // may prompt an error).
+                        std::unique_lock<std::mutex> lock(mutex_);
+                        stop_requested_ = true;
+                        lock.unlock();
+                        outputs_[0]->ErrorReceived(this, error);
+                        return Break(seq);
+                      },
+                      options);
+                }).Then([&](int seq) { outputs_[0]->InputFinished(this, seq); });
+
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    stop_requested_ = true;
+  }
+
+  Future<> finished() override { return finished_; }
+
+ private:
+  std::mutex mutex_;
+  bool stop_requested_{false};
+  int batch_count_{0};
+  Future<> finished_ = Future<>::MakeFinished();
+  AsyncGenerator<util::optional<ExecBatch>> generator_;
+};
+
+ExecNode* MakeSourceNode(ExecPlan* plan, std::string label,
+                         std::shared_ptr<Schema> output_schema,
+                         AsyncGenerator<util::optional<ExecBatch>> generator) {
+  return plan->EmplaceNode<SourceNode>(plan, std::move(label), std::move(output_schema),
+                                       std::move(generator));
+}
+
+struct FilterNode : ExecNode {
+  FilterNode(ExecNode* input, std::string label, Expression filter)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/input->output_schema(),
+                 /*num_outputs=*/1),
+        filter_(std::move(filter)) {}
+
+  const char* kind_name() override { return "FilterNode"; }
+
+  Result<ExecBatch> DoFilter(const ExecBatch& target) {
+    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+                          SimplifyWithGuarantee(filter_, target.guarantee));
+
+    ARROW_ASSIGN_OR_RAISE(Datum mask, ExecuteScalarExpression(simplified_filter, target,
+                                                              plan()->exec_context()));
+
+    if (mask.is_scalar()) {
+      const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
+      if (mask_scalar.is_valid && mask_scalar.value) {
+        return target;
+      }
+
+      return target.Slice(0, 0);
+    }
+
+    // if the values are all scalar then the mask must also be
+    DCHECK(!std::all_of(target.values.begin(), target.values.end(),
+                        [](const Datum& value) { return value.is_scalar(); }));
+
+    auto values = target.values;
+    for (auto& value : values) {
+      if (value.is_scalar()) continue;
+      ARROW_ASSIGN_OR_RAISE(value, Filter(value, mask, FilterOptions::Defaults()));
+    }
+    return ExecBatch::Make(std::move(values));
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    auto maybe_filtered = DoFilter(std::move(batch));
+    if (ErrorIfNotOk(maybe_filtered.status())) return;
+
+    maybe_filtered->guarantee = batch.guarantee;
+    outputs_[0]->InputReceived(this, seq, maybe_filtered.MoveValueUnsafe());
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->InputFinished(this, seq);
+  }
+
+  Status StartProducing() override { return Status::OK(); }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override { inputs_[0]->StopProducing(this); }
+
+  Future<> finished() override { return inputs_[0]->finished(); }
+
+ private:
+  Expression filter_;
+};
+
+Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter) {
+  if (!filter.IsBound()) {
+    ARROW_ASSIGN_OR_RAISE(filter, filter.Bind(*input->output_schema()));
+  }
+
+  if (filter.type()->id() != Type::BOOL) {
+    return Status::TypeError("Filter expression must evaluate to bool, but ",
+                             filter.ToString(), " evaluates to ",
+                             filter.type()->ToString());
+  }
+
+  return input->plan()->EmplaceNode<FilterNode>(input, std::move(label),
+                                                std::move(filter));
+}
+
+struct ProjectNode : ExecNode {
+  ProjectNode(ExecNode* input, std::string label, std::shared_ptr<Schema> output_schema,
+              std::vector<Expression> exprs)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/std::move(output_schema),
+                 /*num_outputs=*/1),
+        exprs_(std::move(exprs)) {}
+
+  const char* kind_name() override { return "ProjectNode"; }
+
+  Result<ExecBatch> DoProject(const ExecBatch& target) {
+    std::vector<Datum> values{exprs_.size()};
+    for (size_t i = 0; i < exprs_.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(Expression simplified_expr,
+                            SimplifyWithGuarantee(exprs_[i], target.guarantee));
+
+      ARROW_ASSIGN_OR_RAISE(values[i], ExecuteScalarExpression(simplified_expr, target,
+                                                               plan()->exec_context()));
+    }
+    return ExecBatch{std::move(values), target.length};
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    auto maybe_projected = DoProject(std::move(batch));
+    if (ErrorIfNotOk(maybe_projected.status())) return;
+
+    maybe_projected->guarantee = batch.guarantee;
+    outputs_[0]->InputReceived(this, seq, maybe_projected.MoveValueUnsafe());
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->InputFinished(this, seq);
+  }
+
+  Status StartProducing() override { return Status::OK(); }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override { inputs_[0]->StopProducing(this); }
+
+  Future<> finished() override { return inputs_[0]->finished(); }
+
+ private:
+  std::vector<Expression> exprs_;
+};
+
+Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
+                                  std::vector<Expression> exprs,
+                                  std::vector<std::string> names) {
+  FieldVector fields(exprs.size());
+
+  if (names.size() == 0) {
+    names.resize(exprs.size());
+    for (size_t i = 0; i < exprs.size(); ++i) {
+      names[i] = exprs[i].ToString();
+    }
+  }
+
+  int i = 0;
+  for (auto& expr : exprs) {
+    if (!expr.IsBound()) {
+      ARROW_ASSIGN_OR_RAISE(expr, expr.Bind(*input->output_schema()));
+    }
+    fields[i] = field(std::move(names[i]), expr.type());
+    ++i;
+  }
+
+  return input->plan()->EmplaceNode<ProjectNode>(
+      input, std::move(label), schema(std::move(fields)), std::move(exprs));
+}
+
+class AtomicCounter {
+ public:
+  AtomicCounter() = default;
+
+  int count() const { return count_.load(); }
+
+  util::optional<int> total() const {
+    int total = total_.load();
+    if (total == -1) return {};
+    return total;
+  }
+
+  // return true if the counter is complete
+  bool Increment() {
+    DCHECK_NE(count_.load(), total_.load());
+    int count = count_.fetch_add(1) + 1;
+    if (count != total_.load()) return false;
+    return DoneOnce();
+  }
+
+  // return true if the counter is complete
+  bool SetTotal(int total) {
+    total_.store(total);
+    if (count_.load() != total) return false;
+    return DoneOnce();
+  }
+
+  // return true if the counter has not already been completed
+  bool Cancel() { return DoneOnce(); }
+
+ private:
+  // ensure there is only one true return from Increment(), SetTotal(), or Cancel()
+  bool DoneOnce() {
+    bool expected = false;
+    return complete_.compare_exchange_strong(expected, true);
+  }
+
+  std::atomic<int> count_{0}, total_{-1};
+  std::atomic<bool> complete_{false};
+};
+
+struct SinkNode : ExecNode {
+  SinkNode(ExecNode* input, std::string label,
+           AsyncGenerator<util::optional<ExecBatch>>* generator)
+      : ExecNode(input->plan(), std::move(label), {input}, {"collected"}, {},
+                 /*num_outputs=*/0),
+        producer_(MakeProducer(generator)) {}
+
+  static PushGenerator<util::optional<ExecBatch>>::Producer MakeProducer(
+      AsyncGenerator<util::optional<ExecBatch>>* out_gen) {
+    PushGenerator<util::optional<ExecBatch>> gen;
+    auto out = gen.producer();
+    *out_gen = std::move(gen);
+    return out;
+  }
+
+  const char* kind_name() override { return "SinkNode"; }
+
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+    return Status::OK();
+  }
+
+  // sink nodes have no outputs from which to feel backpressure
+  [[noreturn]] static void NoOutputs() {
+    Unreachable("no outputs; this should never be called");
+  }
+  [[noreturn]] void ResumeProducing(ExecNode* output) override { NoOutputs(); }
+  [[noreturn]] void PauseProducing(ExecNode* output) override { NoOutputs(); }
+  [[noreturn]] void StopProducing(ExecNode* output) override { NoOutputs(); }
+
+  void StopProducing() override {
+    Finish();
+    inputs_[0]->StopProducing(this);
+  }
+
+  Future<> finished() override { return finished_; }
+
+  void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    bool did_push = producer_.Push(std::move(batch));
+    if (!did_push) return;  // producer_ was Closed already
+
+    if (auto total = input_counter_.total()) {
+      DCHECK_LE(seq_num, *total);
+    }
+
+    if (input_counter_.Increment()) {
+      Finish();
+    }
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    producer_.Push(std::move(error));
+
+    if (input_counter_.Cancel()) {
+      Finish();
+    }
+    inputs_[0]->StopProducing(this);
+  }
+
+  void InputFinished(ExecNode* input, int seq_stop) override {
+    if (input_counter_.SetTotal(seq_stop)) {
+      Finish();
+    }
+  }
+
+ private:
+  void Finish() {
+    if (producer_.Close()) {
+      finished_.MarkFinished();
+    }
+  }
+
+  AtomicCounter input_counter_;
+  Future<> finished_ = Future<>::MakeFinished();
+
+  PushGenerator<util::optional<ExecBatch>>::Producer producer_;
+};
+
+AsyncGenerator<util::optional<ExecBatch>> MakeSinkNode(ExecNode* input,
+                                                       std::string label) {
+  AsyncGenerator<util::optional<ExecBatch>> out;
+  (void)input->plan()->EmplaceNode<SinkNode>(input, std::move(label), &out);
+  return out;
+}
+
+std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
+    std::shared_ptr<Schema> schema,
+    std::function<Future<util::optional<ExecBatch>>()> gen, MemoryPool* pool) {
+  struct Impl : RecordBatchReader {
+    std::shared_ptr<Schema> schema() const override { return schema_; }
+
+    Status ReadNext(std::shared_ptr<RecordBatch>* record_batch) override {
+      ARROW_ASSIGN_OR_RAISE(auto batch, iterator_.Next());
+      if (batch) {
+        ARROW_ASSIGN_OR_RAISE(*record_batch, batch->ToRecordBatch(schema_, pool_));
+      } else {
+        *record_batch = IterationEnd<std::shared_ptr<RecordBatch>>();
+      }
+      return Status::OK();
+    }
+
+    MemoryPool* pool_;
+    std::shared_ptr<Schema> schema_;
+    Iterator<util::optional<ExecBatch>> iterator_;
+  };
+
+  auto out = std::make_shared<Impl>();
+  out->pool_ = pool;
+  out->schema_ = std::move(schema);
+  out->iterator_ = MakeGeneratorIterator(std::move(gen));
+  return out;
+}
+
+class ThreadIndexer {
+ public:
+  size_t operator()() {
+    auto id = std::this_thread::get_id();
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    const auto& id_index = *id_to_index_.emplace(id, id_to_index_.size()).first;
+
+    return Check(id_index.second);
+  }
+
+  static size_t Capacity() {
+    static size_t max_size = arrow::internal::ThreadPool::DefaultCapacity();
+    return max_size;
+  }
+
+ private:
+  size_t Check(size_t thread_index) {
+    DCHECK_LT(thread_index, Capacity()) << "thread index " << thread_index
+                                        << " is out of range [0, " << Capacity() << ")";
+
+    return thread_index;
+  }
+
+  std::mutex mutex_;
+  std::unordered_map<std::thread::id, size_t> id_to_index_;
+};
+
+struct ScalarAggregateNode : ExecNode {
+  ScalarAggregateNode(ExecNode* input, std::string label,
+                      std::shared_ptr<Schema> output_schema,
+                      std::vector<const ScalarAggregateKernel*> kernels,
+                      std::vector<int> argument_indices,
+                      std::vector<std::vector<std::unique_ptr<KernelState>>> states)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/std::move(output_schema),
+                 /*num_outputs=*/1),
+        kernels_(std::move(kernels)),
+        argument_indices_(std::move(argument_indices)),
+        states_(std::move(states)) {}
+
+  const char* kind_name() override { return "ScalarAggregateNode"; }
+
+  Status DoConsume(const ExecBatch& batch, size_t thread_index) {
+    for (size_t i = 0; i < kernels_.size(); ++i) {
+      KernelContext batch_ctx{plan()->exec_context()};
+      batch_ctx.SetState(states_[i][thread_index].get());
+
+      ExecBatch single_column_batch{{batch[argument_indices_[i]]}, batch.length};
+      RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch));
+    }
+    return Status::OK();
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    auto thread_index = get_thread_index_();
+
+    if (ErrorIfNotOk(DoConsume(std::move(batch), thread_index))) return;
+
+    if (input_counter_.Increment()) {
+      ErrorIfNotOk(Finish());
+    }
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int num_total) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    if (input_counter_.SetTotal(num_total)) {
+      ErrorIfNotOk(Finish());
+    }
+  }
+
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+    // Scalar aggregates will only output a single batch
+    outputs_[0]->InputFinished(this, 1);
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override {
+    if (input_counter_.Cancel()) {
+      finished_.MarkFinished();
+    }
+    inputs_[0]->StopProducing(this);
+  }
+
+  Future<> finished() override { return finished_; }
+
+ private:
+  Status Finish() {
+    ExecBatch batch{{}, 1};
+    batch.values.resize(kernels_.size());
+
+    for (size_t i = 0; i < kernels_.size(); ++i) {
+      KernelContext ctx{plan()->exec_context()};
+      ARROW_ASSIGN_OR_RAISE(auto merged, ScalarAggregateKernel::MergeAll(
+                                             kernels_[i], &ctx, std::move(states_[i])));
+      RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i]));
+    }
+
+    outputs_[0]->InputReceived(this, 0, std::move(batch));
+    finished_.MarkFinished();
+    return Status::OK();
+  }
+
+  Future<> finished_ = Future<>::MakeFinished();
+  const std::vector<const ScalarAggregateKernel*> kernels_;
+  const std::vector<int> argument_indices_;
+
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
+
+  ThreadIndexer get_thread_index_;
+  AtomicCounter input_counter_;
+};
+
+Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
+                                          std::vector<internal::Aggregate> aggregates,
+                                          std::vector<FieldRef> arguments,
+                                          std::vector<std::string> out_field_names) {
+  if (aggregates.size() != arguments.size()) {
+    return Status::Invalid("Provided ", aggregates.size(), " aggregates but ",
+                           arguments.size(), " arguments.");
+  }
+
+  if (aggregates.size() != out_field_names.size()) {
+    return Status::Invalid("Provided ", aggregates.size(), " aggregates but ",
+                           out_field_names.size(), " field names for the output.");
+  }
+
+  auto exec_ctx = input->plan()->exec_context();
+
+  std::vector<const ScalarAggregateKernel*> kernels(aggregates.size());
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size());
+  FieldVector fields(kernels.size());
+  std::vector<int> argument_indices(kernels.size());
+
+  for (size_t i = 0; i < kernels.size(); ++i) {
+    if (!arguments[i].IsName()) {
+      return Status::NotImplemented("Non name field refs");
+    }
+    ARROW_ASSIGN_OR_RAISE(auto match,
+                          arguments[i].FindOneOrNone(*input->output_schema()));
+    argument_indices[i] = match[0];
+
+    ARROW_ASSIGN_OR_RAISE(auto function,
+                          exec_ctx->func_registry()->GetFunction(aggregates[i].function));
+
+    if (function->kind() != Function::SCALAR_AGGREGATE) {
+      return Status::Invalid("Provided non ScalarAggregateFunction ",
+                             aggregates[i].function);
+    }
+
+    auto in_type = ValueDescr::Array(input->output_schema()->fields()[i]->type());
+
+    ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, function->DispatchExact({in_type}));
+    kernels[i] = static_cast<const ScalarAggregateKernel*>(kernel);
+
+    if (aggregates[i].options == nullptr) {
+      aggregates[i].options = function->default_options();
+    }
+
+    KernelContext kernel_ctx{exec_ctx};
+    states[i].resize(ThreadIndexer::Capacity());
+    RETURN_NOT_OK(Kernel::InitAll(&kernel_ctx,
+                                  KernelInitArgs{kernels[i],
+                                                 {
+                                                     in_type,
+                                                 },
+                                                 aggregates[i].options},
+                                  &states[i]));
+
+    // pick one to resolve the kernel signature
+    kernel_ctx.SetState(states[i][0].get());
+    ARROW_ASSIGN_OR_RAISE(
+        auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
+
+    fields[i] = field(std::move(out_field_names[i]), std::move(descr.type));
+  }
+
+  return input->plan()->EmplaceNode<ScalarAggregateNode>(
+      input, std::move(label), schema(std::move(fields)), std::move(kernels),
+      std::move(argument_indices), std::move(states));
+}
+
+namespace internal {
+
+Result<std::vector<const HashAggregateKernel*>> GetKernels(
+    ExecContext* ctx, const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<ValueDescr>& in_descrs);
+
+Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
+    const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
+    const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<ValueDescr>& in_descrs);
+
+Result<FieldVector> ResolveKernels(
+    const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<const HashAggregateKernel*>& kernels,
+    const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
+    const std::vector<ValueDescr>& descrs);
+
+}  // namespace internal
+
+struct GroupByNode : ExecNode {
+  GroupByNode(ExecNode* input, std::string label, std::shared_ptr<Schema> output_schema,
+              ExecContext* ctx, const std::vector<int>&& key_field_ids,
+              const std::vector<int>&& agg_src_field_ids,
+              const std::vector<internal::Aggregate>&& aggs,
+              const std::vector<const HashAggregateKernel*>&& agg_kernels)
+      : ExecNode(input->plan(), std::move(label), {input}, {"groupby"},
+                 std::move(output_schema), /*num_outputs=*/1),
+        ctx_(ctx),
+        key_field_ids_(std::move(key_field_ids)),
+        agg_src_field_ids_(std::move(agg_src_field_ids)),
+        aggs_(std::move(aggs)),
+        agg_kernels_(std::move(agg_kernels)) {}
+
+  const char* kind_name() override { return "GroupByNode"; }
+
+  Status Consume(ExecBatch batch) {
+    size_t thread_index = get_thread_index_();
+    if (thread_index >= local_states_.size()) {
+      return Status::IndexError("thread index ", thread_index, " is out of range [0, ",
+                                local_states_.size(), ")");
+    }
+
+    auto state = &local_states_[thread_index];
+    RETURN_NOT_OK(InitLocalStateIfNeeded(state));
+
+    // Create a batch with key columns
+    std::vector<Datum> keys(key_field_ids_.size());
+    for (size_t i = 0; i < key_field_ids_.size(); ++i) {
+      keys[i] = batch.values[key_field_ids_[i]];
+    }
+    ARROW_ASSIGN_OR_RAISE(ExecBatch key_batch, ExecBatch::Make(keys));
+
+    // Create a batch with group ids
+    ARROW_ASSIGN_OR_RAISE(Datum id_batch, state->grouper->Consume(key_batch));
+
+    // Execute aggregate kernels
+    for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+      KernelContext kernel_ctx{ctx_};
+      kernel_ctx.SetState(state->agg_states[i].get());
+
+      ARROW_ASSIGN_OR_RAISE(
+          auto agg_batch,
+          ExecBatch::Make({batch.values[agg_src_field_ids_[i]], id_batch}));
+
+      RETURN_NOT_OK(agg_kernels_[i]->resize(&kernel_ctx, state->grouper->num_groups()));
+      RETURN_NOT_OK(agg_kernels_[i]->consume(&kernel_ctx, agg_batch));
+    }
+
+    return Status::OK();
+  }
+
+  Status Merge() {
+    ThreadLocalState* state0 = &local_states_[0];
+    for (size_t i = 1; i < local_states_.size(); ++i) {
+      ThreadLocalState* state = &local_states_[i];
+      if (!state->grouper) {
+        continue;
+      }
+
+      ARROW_ASSIGN_OR_RAISE(ExecBatch other_keys, state->grouper->GetUniques());
+      ARROW_ASSIGN_OR_RAISE(Datum transposition, state0->grouper->Consume(other_keys));
+      state->grouper.reset();
+
+      for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+        KernelContext batch_ctx{ctx_};
+        DCHECK(state0->agg_states[i]);
+        batch_ctx.SetState(state0->agg_states[i].get());
+
+        RETURN_NOT_OK(agg_kernels_[i]->resize(&batch_ctx, state0->grouper->num_groups()));
+        RETURN_NOT_OK(agg_kernels_[i]->merge(&batch_ctx, std::move(*state->agg_states[i]),
+                                             *transposition.array()));
+        state->agg_states[i].reset();
+      }
+    }
+    return Status::OK();
+  }
+
+  Result<ExecBatch> Finalize() {
+    ThreadLocalState* state = &local_states_[0];
+
+    ExecBatch out_data{{}, state->grouper->num_groups()};
+    out_data.values.resize(agg_kernels_.size() + key_field_ids_.size());
+
+    // Aggregate fields come before key fields to match the behavior of GroupBy function
+    for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+      KernelContext batch_ctx{ctx_};
+      batch_ctx.SetState(state->agg_states[i].get());
+      RETURN_NOT_OK(agg_kernels_[i]->finalize(&batch_ctx, &out_data.values[i]));
+      state->agg_states[i].reset();
+    }
+
+    ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, state->grouper->GetUniques());
+    std::move(out_keys.values.begin(), out_keys.values.end(),
+              out_data.values.begin() + agg_kernels_.size());
+    state->grouper.reset();
+
+    if (output_counter_.SetTotal(
+            static_cast<int>(BitUtil::CeilDiv(out_data.length, output_batch_size())))) {
+      // this will be hit if out_data.length == 0
+      finished_.MarkFinished();
+    }
+    return out_data;
+  }
+
+  void OutputNthBatch(int n) {
+    // bail if StopProducing was called
+    if (finished_.is_finished()) return;
+
+    int64_t batch_size = output_batch_size();
+    outputs_[0]->InputReceived(this, n, out_data_.Slice(batch_size * n, batch_size));
+
+    if (output_counter_.Increment()) {
+      finished_.MarkFinished();
+    }
+  }
+
+  Status OutputResult() {
+    RETURN_NOT_OK(Merge());
+    ARROW_ASSIGN_OR_RAISE(out_data_, Finalize());
+
+    int num_output_batches = *output_counter_.total();
+    outputs_[0]->InputFinished(this, num_output_batches);
+
+    auto executor = ctx_->executor();
+    for (int i = 0; i < num_output_batches; ++i) {
+      if (executor) {
+        // bail if StopProducing was called
+        if (finished_.is_finished()) break;
+
+        RETURN_NOT_OK(executor->Spawn([this, i] { OutputNthBatch(i); }));
+      } else {
+        OutputNthBatch(i);
+      }
+    }
+
+    return Status::OK();
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    // bail if StopProducing was called
+    if (finished_.is_finished()) return;
+
+    DCHECK_EQ(input, inputs_[0]);
+
+    if (ErrorIfNotOk(Consume(std::move(batch)))) return;
+
+    if (input_counter_.Increment()) {
+      ErrorIfNotOk(OutputResult());
+    }
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int num_total) override {
+    // bail if StopProducing was called
+    if (finished_.is_finished()) return;
+
+    DCHECK_EQ(input, inputs_[0]);
+
+    if (input_counter_.SetTotal(num_total)) {
+      ErrorIfNotOk(OutputResult());
+    }
+  }
+
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+
+    local_states_.resize(ThreadIndexer::Capacity());
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+
+    if (input_counter_.Cancel()) {
+      finished_.MarkFinished();
+    } else if (output_counter_.Cancel()) {
+      finished_.MarkFinished();
+    }
+    inputs_[0]->StopProducing(this);
+  }
+
+  void StopProducing() override { StopProducing(outputs_[0]); }
+
+  Future<> finished() override { return finished_; }
+
+ private:
+  struct ThreadLocalState {
+    std::unique_ptr<internal::Grouper> grouper;
+    std::vector<std::unique_ptr<KernelState>> agg_states;
+  };
+
+  ThreadLocalState* GetLocalState() {
+    size_t thread_index = get_thread_index_();
+    return &local_states_[thread_index];
+  }
+
+  Status InitLocalStateIfNeeded(ThreadLocalState* state) {
+    // Get input schema
+    auto input_schema = inputs_[0]->output_schema();
+
+    if (state->grouper != nullptr) return Status::OK();
+
+    // Build vector of key field data types
+    std::vector<ValueDescr> key_descrs(key_field_ids_.size());
+    for (size_t i = 0; i < key_field_ids_.size(); ++i) {
+      auto key_field_id = key_field_ids_[i];
+      key_descrs[i] = ValueDescr(input_schema->field(key_field_id)->type());
+    }
+
+    // Construct grouper
+    ARROW_ASSIGN_OR_RAISE(state->grouper, internal::Grouper::Make(key_descrs, ctx_));
+
+    // Build vector of aggregate source field data types
+    std::vector<ValueDescr> agg_src_descrs(agg_kernels_.size());
+    for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+      auto agg_src_field_id = agg_src_field_ids_[i];
+      agg_src_descrs[i] =
+          ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(
+        state->agg_states,
+        internal::InitKernels(agg_kernels_, ctx_, aggs_, agg_src_descrs));
+
+    return Status::OK();
+  }
+
+  int output_batch_size() const {
+    int result = static_cast<int>(ctx_->exec_chunksize());
+    if (result < 0) {
+      result = 32 * 1024;
+    }
+    return result;
+  }
+
+  ExecContext* ctx_;
+  Future<> finished_ = Future<>::MakeFinished();
+
+  const std::vector<int> key_field_ids_;
+  const std::vector<int> agg_src_field_ids_;
+  const std::vector<internal::Aggregate> aggs_;
+  const std::vector<const HashAggregateKernel*> agg_kernels_;
+
+  ThreadIndexer get_thread_index_;
+  AtomicCounter input_counter_, output_counter_;
+
+  std::vector<ThreadLocalState> local_states_;
+  ExecBatch out_data_;
+};
+
+Result<ExecNode*> MakeGroupByNode(ExecNode* input, std::string label,
+                                  std::vector<std::string> keys,
+                                  std::vector<std::string> agg_srcs,
+                                  std::vector<internal::Aggregate> aggs) {
+  // Get input schema
+  auto input_schema = input->output_schema();
+
+  // Find input field indices for key fields
+  std::vector<int> key_field_ids(keys.size());
+  for (size_t i = 0; i < keys.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto match, FieldRef(keys[i]).FindOne(*input_schema));
+    key_field_ids[i] = match[0];
+  }
+
+  // Find input field indices for aggregates
+  std::vector<int> agg_src_field_ids(aggs.size());
+  for (size_t i = 0; i < aggs.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto match, FieldRef(agg_srcs[i]).FindOne(*input_schema));
+    agg_src_field_ids[i] = match[0];
+  }
+
+  // Build vector of aggregate source field data types
+  DCHECK_EQ(agg_srcs.size(), aggs.size());
+  std::vector<ValueDescr> agg_src_descrs(aggs.size());
+  for (size_t i = 0; i < aggs.size(); ++i) {
+    auto agg_src_field_id = agg_src_field_ids[i];
+    agg_src_descrs[i] =
+        ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+  }
+
+  auto ctx = input->plan()->exec_context();
+
+  // Construct aggregates
+  ARROW_ASSIGN_OR_RAISE(auto agg_kernels,
+                        internal::GetKernels(ctx, aggs, agg_src_descrs));
+
+  ARROW_ASSIGN_OR_RAISE(auto agg_states,
+                        internal::InitKernels(agg_kernels, ctx, aggs, agg_src_descrs));
+
+  ARROW_ASSIGN_OR_RAISE(
+      FieldVector agg_result_fields,
+      internal::ResolveKernels(aggs, agg_kernels, agg_states, ctx, agg_src_descrs));
+
+  // Build field vector for output schema
+  FieldVector output_fields{keys.size() + aggs.size()};
+
+  // Aggregate fields come before key fields to match the behavior of GroupBy function
+  for (size_t i = 0; i < aggs.size(); ++i) {
+    output_fields[i] = agg_result_fields[i];
+  }
+  size_t base = aggs.size();
+  for (size_t i = 0; i < keys.size(); ++i) {
+    int key_field_id = key_field_ids[i];
+    output_fields[base + i] = input_schema->field(key_field_id);
+  }
+
+  auto aggs_copy = aggs;
+
+  return input->plan()->EmplaceNode<GroupByNode>(
+      input, std::move(label), schema(std::move(output_fields)), ctx,
+      std::move(key_field_ids), std::move(agg_src_field_ids), std::move(aggs),
+      std::move(agg_kernels));
+}
+
+Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
+                                   const std::vector<Datum>& keys,
+                                   const std::vector<internal::Aggregate>& aggregates,
+                                   bool use_threads, ExecContext* ctx) {
+  using arrow::compute::detail::ExecBatchIterator;
+
+  FieldVector scan_fields(arguments.size() + keys.size());
+  std::vector<std::string> keys_str(keys.size());
+  std::vector<std::string> arguments_str(arguments.size());
+  for (size_t i = 0; i < arguments.size(); ++i) {
+    arguments_str[i] = std::string("agg_") + std::to_string(i);
+    scan_fields[i] = field(arguments_str[i], arguments[i].type());
+  }
+  for (size_t i = 0; i < keys.size(); ++i) {
+    keys_str[i] = std::string("key_") + std::to_string(i);
+    scan_fields[arguments.size() + i] = field(keys_str[i], keys[i].type());
+  }
+
+  std::vector<ExecBatch> scan_batches;
+  std::vector<Datum> inputs;
+  for (const auto& argument : arguments) {
+    inputs.push_back(argument);
+  }
+  for (const auto& key : keys) {
+    inputs.push_back(key);
+  }
+  ARROW_ASSIGN_OR_RAISE(auto batch_iterator,
+                        ExecBatchIterator::Make(inputs, ctx->exec_chunksize()));
+  ExecBatch batch;
+  while (batch_iterator->Next(&batch)) {
+    if (batch.length == 0) continue;
+    scan_batches.push_back(batch);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto plan, ExecPlan::Make(ctx));
+  auto source = MakeSourceNode(
+      plan.get(), "source", schema(std::move(scan_fields)),
+      MakeVectorGenerator(arrow::internal::MapVector(
+          [](ExecBatch batch) { return util::make_optional(std::move(batch)); },
+          std::move(scan_batches))));
+
+  ARROW_ASSIGN_OR_RAISE(
+      auto gby, MakeGroupByNode(source, "gby", keys_str, arguments_str, aggregates));
+  auto sink_gen = MakeSinkNode(gby, "sink");
+
+  RETURN_NOT_OK(plan->Validate());
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto collected_fut = CollectAsyncGenerator(sink_gen);
+
+  auto start_and_collect =
+      AllComplete({plan->finished(), Future<>(collected_fut)})
+          .Then([collected_fut]() -> Result<std::vector<ExecBatch>> {
+            ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result());
+            return ::arrow::internal::MapVector(
+                [](util::optional<ExecBatch> batch) { return std::move(*batch); },
+                std::move(collected));
+          });
+
+  std::vector<ExecBatch> output_batches =
+      start_and_collect.MoveResult().MoveValueUnsafe();
+
+  ArrayDataVector out_data(arguments.size() + keys.size());
+  for (size_t i = 0; i < arguments.size() + keys.size(); ++i) {
+    std::vector<std::shared_ptr<Array>> arrays(output_batches.size());
+    for (size_t j = 0; j < output_batches.size(); ++j) {
+      arrays[j] = output_batches[j].values[i].make_array();
+    }
+    ARROW_ASSIGN_OR_RAISE(auto concatenated_array, Concatenate(arrays));
+    out_data[i] = concatenated_array->data();
+  }
+
+  int64_t length = out_data[0]->length;
+  return ArrayData::Make(struct_(gby->output_schema()->fields()), length,
+                         {/*null_bitmap=*/nullptr}, std::move(out_data),
+                         /*null_count=*/0);
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
new file mode 100644
index 00000000000..fc3af92af4a
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -0,0 +1,307 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+
+  virtual ~ExecPlan() = default;
+
+  ExecContext* exec_context() const { return exec_context_; }
+
+  /// Make an empty exec plan
+  static Result<std::shared_ptr<ExecPlan>> Make(ExecContext* = default_exec_context());
+
+  ExecNode* AddNode(std::unique_ptr<ExecNode> node);
+
+  template <typename Node, typename... Args>
+  Node* EmplaceNode(Args&&... args) {
+    std::unique_ptr<Node> node{new Node{std::forward<Args>(args)...}};
+    auto out = node.get();
+    AddNode(std::move(node));
+    return out;
+  }
+
+  /// The initial inputs
+  const NodeVector& sources() const;
+
+  /// The final outputs
+  const NodeVector& sinks() const;
+
+  Status Validate();
+
+  /// \brief Start producing on all nodes
+  ///
+  /// Nodes are started in reverse topological order, such that any node
+  /// is started before all of its inputs.
+  Status StartProducing();
+
+  /// \brief Stop producing on all nodes
+  ///
+  /// Nodes are stopped in topological order, such that any node
+  /// is stopped before all of its outputs.
+  void StopProducing();
+
+  /// \brief A future which will be marked finished when all nodes have stopped producing.
+  Future<> finished();
+
+ protected:
+  ExecContext* exec_context_;
+  explicit ExecPlan(ExecContext* exec_context) : exec_context_(exec_context) {}
+};
+
+class ARROW_EXPORT ExecNode {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+
+  virtual ~ExecNode() = default;
+
+  virtual const char* kind_name() = 0;
+
+  // The number of inputs/outputs expected by this node
+  int num_inputs() const { return static_cast<int>(inputs_.size()); }
+  int num_outputs() const { return num_outputs_; }
+
+  /// This node's predecessors in the exec plan
+  const NodeVector& inputs() const { return inputs_; }
+
+  /// \brief Labels identifying the function of each input.
+  const std::vector<std::string>& input_labels() const { return input_labels_; }
+
+  /// This node's successors in the exec plan
+  const NodeVector& outputs() const { return outputs_; }
+
+  /// The datatypes for batches produced by this node
+  const std::shared_ptr<Schema>& output_schema() const { return output_schema_; }
+
+  /// This node's exec plan
+  ExecPlan* plan() { return plan_; }
+
+  /// \brief An optional label, for display and debugging
+  ///
+  /// There is no guarantee that this value is non-empty or unique.
+  const std::string& label() const { return label_; }
+
+  Status Validate() const;
+
+  /// Upstream API:
+  /// These functions are called by input nodes that want to inform this node
+  /// about an updated condition (a new input batch, an error, an impeding
+  /// end of stream).
+  ///
+  /// Implementation rules:
+  /// - these may be called anytime after StartProducing() has succeeded
+  ///   (and even during or after StopProducing())
+  /// - these may be called concurrently
+  /// - these are allowed to call back into PauseProducing(), ResumeProducing()
+  ///   and StopProducing()
+
+  /// Transfer input batch to ExecNode
+  virtual void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) = 0;
+
+  /// Signal error to ExecNode
+  virtual void ErrorReceived(ExecNode* input, Status error) = 0;
+
+  /// Mark the inputs finished after the given number of batches.
+  ///
+  /// This may be called before all inputs are received.  This simply fixes
+  /// the total number of incoming batches for an input, so that the ExecNode
+  /// knows when it has received all input, regardless of order.
+  virtual void InputFinished(ExecNode* input, int seq_stop) = 0;
+
+  /// Lifecycle API:
+  /// - start / stop to initiate and terminate production
+  /// - pause / resume to apply backpressure
+  ///
+  /// Implementation rules:
+  /// - StartProducing() should not recurse into the inputs, as it is
+  ///   handled by ExecPlan::StartProducing()
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   concurrently (but only after StartProducing() has returned successfully)
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   by the downstream nodes' InputReceived(), ErrorReceived(), InputFinished()
+  ///   methods
+  /// - StopProducing() should recurse into the inputs
+  /// - StopProducing() must be idempotent
+
+  // XXX What happens if StartProducing() calls an output's InputReceived()
+  // synchronously, and InputReceived() decides to call back into StopProducing()
+  // (or PauseProducing()) because it received enough data?
+  //
+  // Right now, since synchronous calls happen in both directions (input to
+  // output and then output to input), a node must be careful to be reentrant
+  // against synchronous calls from its output, *and* also concurrent calls from
+  // other threads.  The most reliable solution is to update the internal state
+  // first, and notify outputs only at the end.
+  //
+  // Alternate rules:
+  // - StartProducing(), ResumeProducing() can call synchronously into
+  //   its ouputs' consuming methods (InputReceived() etc.)
+  // - InputReceived(), ErrorReceived(), InputFinished() can call asynchronously
+  //   into its inputs' PauseProducing(), StopProducing()
+  //
+  // Alternate API:
+  // - InputReceived(), ErrorReceived(), InputFinished() return a ProductionHint
+  //   enum: either None (default), PauseProducing, ResumeProducing, StopProducing
+  // - A method allows passing a ProductionHint asynchronously from an output node
+  //   (replacing PauseProducing(), ResumeProducing(), StopProducing())
+
+  /// \brief Start producing
+  ///
+  /// This must only be called once.  If this fails, then other lifecycle
+  /// methods must not be called.
+  ///
+  /// This is typically called automatically by ExecPlan::StartProducing().
+  virtual Status StartProducing() = 0;
+
+  /// \brief Pause producing temporarily
+  ///
+  /// This call is a hint that an output node is currently not willing
+  /// to receive data.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  /// However, the node is still free to produce data (which may be difficult
+  /// to prevent anyway if data is produced using multiple threads).
+  virtual void PauseProducing(ExecNode* output) = 0;
+
+  /// \brief Resume producing after a temporary pause
+  ///
+  /// This call is a hint that an output node is willing to receive data again.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  /// This may also be called concurrently with PauseProducing(), which suggests
+  /// the implementation may use an atomic counter.
+  virtual void ResumeProducing(ExecNode* output) = 0;
+
+  /// \brief Stop producing definitively to a single output
+  ///
+  /// This call is a hint that an output node has completed and is not willing
+  /// to receive any further data.
+  virtual void StopProducing(ExecNode* output) = 0;
+
+  /// \brief Stop producing definitively to all outputs
+  virtual void StopProducing() = 0;
+
+  /// \brief A future which will be marked finished when this node has stopped producing.
+  virtual Future<> finished() = 0;
+
+ protected:
+  ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
+           std::vector<std::string> input_labels, std::shared_ptr<Schema> output_schema,
+           int num_outputs);
+
+  // A helper method to send an error status to all outputs.
+  // Returns true if the status was an error.
+  bool ErrorIfNotOk(Status status);
+
+  ExecPlan* plan_;
+  std::string label_;
+
+  NodeVector inputs_;
+  std::vector<std::string> input_labels_;
+
+  std::shared_ptr<Schema> output_schema_;
+  int num_outputs_;
+  NodeVector outputs_;
+};
+
+/// \brief Adapt an AsyncGenerator<ExecBatch> as a source node
+///
+/// plan->exec_context()->executor() is used to parallelize pushing to
+/// outputs, if provided.
+ARROW_EXPORT
+ExecNode* MakeSourceNode(ExecPlan* plan, std::string label,
+                         std::shared_ptr<Schema> output_schema,
+                         std::function<Future<util::optional<ExecBatch>>()>);
+
+/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
+///
+/// Emitted batches will not be ordered.
+ARROW_EXPORT
+std::function<Future<util::optional<ExecBatch>>()> MakeSinkNode(ExecNode* input,
+                                                                std::string label);
+
+/// \brief Wrap an ExecBatch generator in a RecordBatchReader.
+///
+/// The RecordBatchReader does not impose any ordering on emitted batches.
+ARROW_EXPORT
+std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
+    std::shared_ptr<Schema>, std::function<Future<util::optional<ExecBatch>>()>,
+    MemoryPool*);
+
+/// \brief Make a node which excludes some rows from batches passed through it
+///
+/// The filter Expression will be evaluated against each batch which is pushed to
+/// this node. Any rows for which the filter does not evaluate to `true` will be excluded
+/// in the batch emitted by this node.
+///
+/// If the filter is not already bound, it will be bound against the input's schema.
+ARROW_EXPORT
+Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter);
+
+/// \brief Make a node which executes expressions on input batches, producing new batches.
+///
+/// Each expression will be evaluated against each batch which is pushed to
+/// this node to produce a corresponding output column.
+///
+/// If exprs are not already bound, they will be bound against the input's schema.
+/// If names are not provided, the string representations of exprs will be used.
+ARROW_EXPORT
+Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
+                                  std::vector<Expression> exprs,
+                                  std::vector<std::string> names = {});
+
+ARROW_EXPORT
+Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
+                                          std::vector<internal::Aggregate> aggregates,
+                                          std::vector<FieldRef> arguments,
+                                          std::vector<std::string> out_field_names);
+
+/// \brief Make a node which groups input rows based on key fields and computes
+/// aggregates for each group
+ARROW_EXPORT
+Result<ExecNode*> MakeGroupByNode(ExecNode* input, std::string label,
+                                  std::vector<std::string> keys,
+                                  std::vector<std::string> agg_srcs,
+                                  std::vector<internal::Aggregate> aggs);
+
+ARROW_EXPORT
+Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
+                                   const std::vector<Datum>& keys,
+                                   const std::vector<internal::Aggregate>& aggregates,
+                                   bool use_threads, ExecContext* ctx);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
similarity index 77%
rename from cpp/src/arrow/dataset/expression.cc
rename to cpp/src/arrow/compute/exec/expression.cc
index 627477b3038..4aab64a46a4 100644
--- a/cpp/src/arrow/dataset/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -15,19 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 
 #include <unordered_map>
 #include <unordered_set>
 
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec/expression_internal.h"
 #include "arrow/compute/exec_internal.h"
-#include "arrow/dataset/expression_internal.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
-#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/hash_util.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/optional.h"
@@ -39,9 +40,19 @@ namespace arrow {
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
-namespace dataset {
+namespace compute {
 
-Expression::Expression(Call call) : impl_(std::make_shared<Impl>(std::move(call))) {}
+void Expression::Call::ComputeHash() {
+  hash = std::hash<std::string>{}(function_name);
+  for (const auto& arg : arguments) {
+    arrow::internal::hash_combine(hash, arg.hash());
+  }
+}
+
+Expression::Expression(Call call) {
+  call.ComputeHash();
+  impl_ = std::make_shared<Impl>(std::move(call));
+}
 
 Expression::Expression(Datum literal)
     : impl_(std::make_shared<Impl>(std::move(literal))) {}
@@ -52,7 +63,7 @@ Expression::Expression(Parameter parameter)
 Expression literal(Datum lit) { return Expression(std::move(lit)); }
 
 Expression field_ref(FieldRef ref) {
-  return Expression(Expression::Parameter{std::move(ref), {}});
+  return Expression(Expression::Parameter{std::move(ref), ValueDescr{}, -1});
 }
 
 Expression call(std::string function, std::vector<Expression> arguments,
@@ -66,8 +77,12 @@ Expression call(std::string function, std::vector<Expression> arguments,
 
 const Datum* Expression::literal() const { return util::get_if<Datum>(impl_.get()); }
 
+const Expression::Parameter* Expression::parameter() const {
+  return util::get_if<Parameter>(impl_.get());
+}
+
 const FieldRef* Expression::field_ref() const {
-  if (auto parameter = util::get_if<Parameter>(impl_.get())) {
+  if (auto parameter = this->parameter()) {
     return &parameter->ref;
   }
   return nullptr;
@@ -84,7 +99,7 @@ ValueDescr Expression::descr() const {
     return lit->descr();
   }
 
-  if (auto parameter = util::get_if<Parameter>(impl_.get())) {
+  if (auto parameter = this->parameter()) {
     return parameter->descr;
   }
 
@@ -151,7 +166,7 @@ std::string Expression::ToString() const {
     return binary(std::move(op));
   }
 
-  if (auto options = GetProjectOptions(*call)) {
+  if (auto options = GetMakeStructOptions(*call)) {
     std::string out = "{";
     auto argument = call->arguments.begin();
     for (const auto& field_name : options->field_names) {
@@ -167,41 +182,14 @@ std::string Expression::ToString() const {
     out += arg.ToString() + ", ";
   }
 
-  if (call->options == nullptr) {
+  if (call->options) {
+    out += call->options->ToString();
+    out.resize(out.size() + 1);
+  } else {
     out.resize(out.size() - 1);
-    out.back() = ')';
-    return out;
   }
-
-  if (auto options = GetSetLookupOptions(*call)) {
-    DCHECK_EQ(options->value_set.kind(), Datum::ARRAY);
-    out += "value_set=" + options->value_set.make_array()->ToString();
-    if (options->skip_nulls) {
-      out += ", skip_nulls";
-    }
-    return out + ")";
-  }
-
-  if (auto options = GetCastOptions(*call)) {
-    if (options->to_type == nullptr) {
-      return out + "to_type=<INVALID NOT PROVIDED>)";
-    }
-    out += "to_type=" + options->to_type->ToString();
-    if (options->allow_int_overflow) out += ", allow_int_overflow";
-    if (options->allow_time_truncate) out += ", allow_time_truncate";
-    if (options->allow_time_overflow) out += ", allow_time_overflow";
-    if (options->allow_decimal_truncate) out += ", allow_decimal_truncate";
-    if (options->allow_float_truncate) out += ", allow_float_truncate";
-    if (options->allow_invalid_utf8) out += ", allow_invalid_utf8";
-    return out + ")";
-  }
-
-  if (auto options = GetStrptimeOptions(*call)) {
-    return out + "format=" + options->format +
-           ", unit=" + internal::ToString(options->unit) + ")";
-  }
-
-  return out + "{NON-REPRESENTABLE OPTIONS})";
+  out.back() = ')';
+  return out;
 }
 
 void PrintTo(const Expression& expr, std::ostream* os) {
@@ -241,41 +229,9 @@ bool Expression::Equals(const Expression& other) const {
   }
 
   if (call->options == other_call->options) return true;
-
-  if (auto options = GetSetLookupOptions(*call)) {
-    auto other_options = GetSetLookupOptions(*other_call);
-    return options->value_set == other_options->value_set &&
-           options->skip_nulls == other_options->skip_nulls;
-  }
-
-  if (auto options = GetCastOptions(*call)) {
-    auto other_options = GetCastOptions(*other_call);
-    for (auto safety_opt : {
-             &compute::CastOptions::allow_int_overflow,
-             &compute::CastOptions::allow_time_truncate,
-             &compute::CastOptions::allow_time_overflow,
-             &compute::CastOptions::allow_decimal_truncate,
-             &compute::CastOptions::allow_float_truncate,
-             &compute::CastOptions::allow_invalid_utf8,
-         }) {
-      if (options->*safety_opt != other_options->*safety_opt) return false;
-    }
-    return options->to_type->Equals(other_options->to_type);
+  if (call->options && other_call->options) {
+    return call->options->Equals(other_call->options);
   }
-
-  if (auto options = GetProjectOptions(*call)) {
-    auto other_options = GetProjectOptions(*other_call);
-    return options->field_names == other_options->field_names;
-  }
-
-  if (auto options = GetStrptimeOptions(*call)) {
-    auto other_options = GetStrptimeOptions(*other_call);
-    return options->format == other_options->format &&
-           options->unit == other_options->unit;
-  }
-
-  ARROW_LOG(WARNING) << "comparing unknown FunctionOptions for function "
-                     << call->function_name;
   return false;
 }
 
@@ -293,20 +249,7 @@ size_t Expression::hash() const {
     return ref->hash();
   }
 
-  auto call = CallNotNull(*this);
-  if (call->hash != nullptr) {
-    return call->hash->load();
-  }
-
-  size_t out = std::hash<std::string>{}(call->function_name);
-  for (const auto& arg : call->arguments) {
-    out ^= arg.hash();
-  }
-
-  std::shared_ptr<std::atomic<size_t>> expected = nullptr;
-  internal::atomic_compare_exchange_strong(&const_cast<Call*>(call)->hash, &expected,
-                                           std::make_shared<std::atomic<size_t>>(out));
-  return out;
+  return CallNotNull(*this)->hash;
 }
 
 bool Expression::IsBound() const {
@@ -427,10 +370,10 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
 
   compute::KernelContext kernel_context(exec_context);
   if (call.kernel->init) {
-    call.kernel_state =
-        call.kernel->init(&kernel_context, {call.kernel, descrs, call.options.get()});
+    ARROW_ASSIGN_OR_RAISE(
+        call.kernel_state,
+        call.kernel->init(&kernel_context, {call.kernel, descrs, call.options.get()}));
 
-    RETURN_NOT_OK(kernel_context.status());
     kernel_context.SetState(call.kernel_state.get());
   }
 
@@ -440,76 +383,113 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
   return Expression(std::move(call));
 }
 
-struct FieldPathGetDatumImpl {
-  template <typename T, typename = decltype(FieldPath{}.Get(std::declval<const T&>()))>
-  Result<Datum> operator()(const std::shared_ptr<T>& ptr) {
-    return path_.Get(*ptr).template As<Datum>();
-  }
-
-  template <typename T>
-  Result<Datum> operator()(const T&) {
-    return Status::NotImplemented("FieldPath::Get() into Datum ", datum_.ToString());
+template <typename TypeOrSchema>
+Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in,
+                            ValueDescr::Shape shape, compute::ExecContext* exec_context) {
+  if (exec_context == nullptr) {
+    compute::ExecContext exec_context;
+    return BindImpl(std::move(expr), in, shape, &exec_context);
   }
 
-  const Datum& datum_;
-  const FieldPath& path_;
-};
+  if (expr.literal()) return expr;
 
-inline Result<Datum> GetDatumField(const FieldRef& ref, const Datum& input) {
-  Datum field;
+  if (auto ref = expr.field_ref()) {
+    if (ref->IsNested()) {
+      return Status::NotImplemented("nested field references");
+    }
 
-  FieldPath match;
-  if (auto type = input.type()) {
-    ARROW_ASSIGN_OR_RAISE(match, ref.FindOneOrNone(*type));
-  } else if (auto schema = input.schema()) {
-    ARROW_ASSIGN_OR_RAISE(match, ref.FindOneOrNone(*schema));
-  } else {
-    return Status::NotImplemented("retrieving fields from datum ", input.ToString());
-  }
+    ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
 
-  if (!match.empty()) {
-    ARROW_ASSIGN_OR_RAISE(field,
-                          util::visit(FieldPathGetDatumImpl{input, match}, input.value));
+    auto bound = *expr.parameter();
+    bound.index = path[0];
+    ARROW_ASSIGN_OR_RAISE(auto field, path.Get(in));
+    bound.descr.type = field->type();
+    bound.descr.shape = shape;
+    return Expression{std::move(bound)};
   }
 
-  if (field == Datum{}) {
-    return Datum(std::make_shared<NullScalar>());
+  auto call = *CallNotNull(expr);
+  for (auto& argument : call.arguments) {
+    ARROW_ASSIGN_OR_RAISE(argument,
+                          BindImpl(std::move(argument), in, shape, exec_context));
   }
-
-  return field;
+  return BindNonRecursive(std::move(call),
+                          /*insert_implicit_casts=*/true, exec_context);
 }
 
 }  // namespace
 
-Result<Expression> Expression::Bind(ValueDescr in,
+Result<Expression> Expression::Bind(const ValueDescr& in,
                                     compute::ExecContext* exec_context) const {
-  if (exec_context == nullptr) {
-    compute::ExecContext exec_context;
-    return Bind(std::move(in), &exec_context);
-  }
+  return BindImpl(*this, *in.type, in.shape, exec_context);
+}
 
-  if (literal()) return *this;
+Result<Expression> Expression::Bind(const Schema& in_schema,
+                                    compute::ExecContext* exec_context) const {
+  return BindImpl(*this, in_schema, ValueDescr::ARRAY, exec_context);
+}
 
-  if (auto ref = field_ref()) {
-    ARROW_ASSIGN_OR_RAISE(auto field, ref->GetOneOrNone(*in.type));
-    auto descr = field ? ValueDescr{field->type(), in.shape} : ValueDescr::Scalar(null());
-    return Expression{Parameter{*ref, std::move(descr)}};
+Result<ExecBatch> MakeExecBatch(const Schema& full_schema, const Datum& partial) {
+  ExecBatch out;
+
+  if (partial.kind() == Datum::RECORD_BATCH) {
+    const auto& partial_batch = *partial.record_batch();
+    out.length = partial_batch.num_rows();
+
+    for (const auto& field : full_schema.fields()) {
+      ARROW_ASSIGN_OR_RAISE(auto column,
+                            FieldRef(field->name()).GetOneOrNone(partial_batch));
+
+      if (column) {
+        if (!column->type()->Equals(field->type())) {
+          // Referenced field was present but didn't have the expected type.
+          // This *should* be handled by readers, and will just be an error in the future.
+          ARROW_ASSIGN_OR_RAISE(
+              auto converted,
+              compute::Cast(column, field->type(), compute::CastOptions::Safe()));
+          column = converted.make_array();
+        }
+        out.values.emplace_back(std::move(column));
+      } else {
+        out.values.emplace_back(MakeNullScalar(field->type()));
+      }
+    }
+    return out;
   }
 
-  auto call = *CallNotNull(*this);
-  for (auto& argument : call.arguments) {
-    ARROW_ASSIGN_OR_RAISE(argument, argument.Bind(in, exec_context));
+  // wasteful but useful for testing:
+  if (partial.type()->id() == Type::STRUCT) {
+    if (partial.is_array()) {
+      ARROW_ASSIGN_OR_RAISE(auto partial_batch,
+                            RecordBatch::FromStructArray(partial.make_array()));
+
+      return MakeExecBatch(full_schema, partial_batch);
+    }
+
+    if (partial.is_scalar()) {
+      ARROW_ASSIGN_OR_RAISE(auto partial_array,
+                            MakeArrayFromScalar(*partial.scalar(), 1));
+      ARROW_ASSIGN_OR_RAISE(auto out, MakeExecBatch(full_schema, partial_array));
+
+      for (Datum& value : out.values) {
+        if (value.is_scalar()) continue;
+        ARROW_ASSIGN_OR_RAISE(value, value.make_array()->GetScalar(0));
+      }
+      return out;
+    }
   }
-  return BindNonRecursive(std::move(call),
-                          /*insert_implicit_casts=*/true, exec_context);
+
+  return Status::NotImplemented("MakeExecBatch from ", PrintDatum(partial));
 }
 
-Result<Expression> Expression::Bind(const Schema& in_schema,
-                                    compute::ExecContext* exec_context) const {
-  return Bind(ValueDescr::Array(struct_(in_schema.fields())), exec_context);
+Result<Datum> ExecuteScalarExpression(const Expression& expr, const Schema& full_schema,
+                                      const Datum& partial_input,
+                                      compute::ExecContext* exec_context) {
+  ARROW_ASSIGN_OR_RAISE(auto input, MakeExecBatch(full_schema, partial_input));
+  return ExecuteScalarExpression(expr, input, exec_context);
 }
 
-Result<Datum> ExecuteScalarExpression(const Expression& expr, const Datum& input,
+Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& input,
                                       compute::ExecContext* exec_context) {
   if (exec_context == nullptr) {
     compute::ExecContext exec_context;
@@ -527,15 +507,16 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const Datum& input
 
   if (auto lit = expr.literal()) return *lit;
 
-  if (auto ref = expr.field_ref()) {
-    ARROW_ASSIGN_OR_RAISE(Datum field, GetDatumField(*ref, input));
+  if (auto param = expr.parameter()) {
+    if (param->descr.type->id() == Type::NA) {
+      return MakeNullScalar(null());
+    }
 
-    if (field.descr() != expr.descr()) {
-      // Refernced field was present but didn't have the expected type.
-      // Should we just error here? For now, pay dispatch cost and just cast.
-      ARROW_ASSIGN_OR_RAISE(
-          field,
-          compute::Cast(field, expr.type(), compute::CastOptions::Safe(), exec_context));
+    const Datum& field = input[param->index];
+    if (!field.type()->Equals(param->descr.type)) {
+      return Status::Invalid("Referenced field ", expr.ToString(), " was ",
+                             field.type()->ToString(), " but should have been ",
+                             param->descr.type->ToString());
     }
 
     return field;
@@ -612,6 +593,17 @@ std::vector<FieldRef> FieldsInExpression(const Expression& expr) {
   return fields;
 }
 
+bool ExpressionHasFieldRefs(const Expression& expr) {
+  if (expr.literal()) return false;
+
+  if (expr.field_ref()) return true;
+
+  for (const Expression& arg : CallNotNull(expr)->arguments) {
+    if (ExpressionHasFieldRefs(arg)) return true;
+  }
+  return false;
+}
+
 Result<Expression> FoldConstants(Expression expr) {
   return Modify(
       std::move(expr), [](Expression expr) { return expr; },
@@ -620,7 +612,7 @@ Result<Expression> FoldConstants(Expression expr) {
         if (std::all_of(call->arguments.begin(), call->arguments.end(),
                         [](const Expression& argument) { return argument.literal(); })) {
           // all arguments are literal; we can evaluate this subexpression *now*
-          static const Datum ignored_input = Datum{};
+          static const ExecBatch ignored_input = ExecBatch{};
           ARROW_ASSIGN_OR_RAISE(Datum constant,
                                 ExecuteScalarExpression(expr, ignored_input));
 
@@ -729,17 +721,16 @@ Status ExtractKnownFieldValuesImpl(
 
 }  // namespace
 
-Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldValues(
+Result<KnownFieldValues> ExtractKnownFieldValues(
     const Expression& guaranteed_true_predicate) {
   auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate);
-  std::unordered_map<FieldRef, Datum, FieldRef::Hash> known_values;
-  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values));
+  KnownFieldValues known_values;
+  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map));
   return known_values;
 }
 
-Result<Expression> ReplaceFieldsWithKnownValues(
-    const std::unordered_map<FieldRef, Datum, FieldRef::Hash>& known_values,
-    Expression expr) {
+Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
+                                                Expression expr) {
   if (!expr.IsBound()) {
     return Status::Invalid(
         "ReplaceFieldsWithKnownValues called on an unbound Expression");
@@ -749,8 +740,8 @@ Result<Expression> ReplaceFieldsWithKnownValues(
       std::move(expr),
       [&known_values](Expression expr) -> Result<Expression> {
         if (auto ref = expr.field_ref()) {
-          auto it = known_values.find(*ref);
-          if (it != known_values.end()) {
+          auto it = known_values.map.find(*ref);
+          if (it != known_values.map.end()) {
             Datum lit = it->second;
             if (lit.descr() == expr.descr()) return literal(std::move(lit));
             // type mismatch, try casting the known value to the correct type
@@ -952,8 +943,8 @@ Result<Expression> SimplifyWithGuarantee(Expression expr,
                                          const Expression& guaranteed_true_predicate) {
   auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate);
 
-  std::unordered_map<FieldRef, Datum, FieldRef::Hash> known_values;
-  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values));
+  KnownFieldValues known_values;
+  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map));
 
   ARROW_ASSIGN_OR_RAISE(expr,
                         ReplaceFieldsWithKnownValues(known_values, std::move(expr)));
@@ -980,92 +971,6 @@ Result<Expression> SimplifyWithGuarantee(Expression expr,
   return expr;
 }
 
-namespace {
-
-Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
-    const Expression::Call& call) {
-  if (call.options == nullptr) {
-    return nullptr;
-  }
-
-  if (auto options = GetSetLookupOptions(call)) {
-    if (!options->value_set.is_array()) {
-      return Status::NotImplemented("chunked value_set");
-    }
-    return StructScalar::Make(
-        {
-            std::make_shared<ListScalar>(options->value_set.make_array()),
-            MakeScalar(options->skip_nulls),
-        },
-        {"value_set", "skip_nulls"});
-  }
-
-  if (auto options = GetCastOptions(call)) {
-    return StructScalar::Make(
-        {
-            MakeNullScalar(options->to_type),
-            MakeScalar(options->allow_int_overflow),
-            MakeScalar(options->allow_time_truncate),
-            MakeScalar(options->allow_time_overflow),
-            MakeScalar(options->allow_decimal_truncate),
-            MakeScalar(options->allow_float_truncate),
-            MakeScalar(options->allow_invalid_utf8),
-        },
-        {
-            "to_type_holder",
-            "allow_int_overflow",
-            "allow_time_truncate",
-            "allow_time_overflow",
-            "allow_decimal_truncate",
-            "allow_float_truncate",
-            "allow_invalid_utf8",
-        });
-  }
-
-  return Status::NotImplemented("conversion of options for ", call.function_name);
-}
-
-Status FunctionOptionsFromStructScalar(const StructScalar* repr, Expression::Call* call) {
-  if (repr == nullptr) {
-    call->options = nullptr;
-    return Status::OK();
-  }
-
-  if (IsSetLookup(call->function_name)) {
-    ARROW_ASSIGN_OR_RAISE(auto value_set, repr->field("value_set"));
-    ARROW_ASSIGN_OR_RAISE(auto skip_nulls, repr->field("skip_nulls"));
-    call->options = std::make_shared<compute::SetLookupOptions>(
-        checked_cast<const ListScalar&>(*value_set).value,
-        checked_cast<const BooleanScalar&>(*skip_nulls).value);
-    return Status::OK();
-  }
-
-  if (call->function_name == "cast") {
-    auto options = std::make_shared<compute::CastOptions>();
-    ARROW_ASSIGN_OR_RAISE(auto to_type_holder, repr->field("to_type_holder"));
-    options->to_type = to_type_holder->type;
-
-    int i = 1;
-    for (bool* opt : {
-             &options->allow_int_overflow,
-             &options->allow_time_truncate,
-             &options->allow_time_overflow,
-             &options->allow_decimal_truncate,
-             &options->allow_float_truncate,
-             &options->allow_invalid_utf8,
-         }) {
-      *opt = checked_cast<const BooleanScalar&>(*repr->value[i++]).value;
-    }
-
-    call->options = std::move(options);
-    return Status::OK();
-  }
-
-  return Status::NotImplemented("conversion of options for ", call->function_name);
-}
-
-}  // namespace
-
 // Serialization is accomplished by converting expressions to KeyValueMetadata and storing
 // this in the schema of a RecordBatch. Embedded arrays and scalars are stored in its
 // columns. Finally, the RecordBatch is written to an IPC file.
@@ -1107,7 +1012,8 @@ Result<std::shared_ptr<Buffer>> Serialize(const Expression& expr) {
       }
 
       if (call->options) {
-        ARROW_ASSIGN_OR_RAISE(auto options_scalar, FunctionOptionsToStructScalar(*call));
+        ARROW_ASSIGN_OR_RAISE(auto options_scalar,
+                              internal::FunctionOptionsToStructScalar(*call->options));
         ARROW_ASSIGN_OR_RAISE(auto value, AddScalar(*options_scalar));
         metadata_->Append("options", std::move(value));
       }
@@ -1156,7 +1062,8 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
 
     Result<std::shared_ptr<Scalar>> GetScalar(const std::string& i) {
       int32_t column_index;
-      if (!internal::ParseValue<Int32Type>(i.data(), i.length(), &column_index)) {
+      if (!::arrow::internal::ParseValue<Int32Type>(i.data(), i.length(),
+                                                    &column_index)) {
         return Status::Invalid("Couldn't parse column_index");
       }
       if (column_index >= batch_.num_columns()) {
@@ -1191,10 +1098,13 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
       while (metadata().key(index_) != "end") {
         if (metadata().key(index_) == "options") {
           ARROW_ASSIGN_OR_RAISE(auto options_scalar, GetScalar(metadata().value(index_)));
-          auto expr = call(value, std::move(arguments));
-          RETURN_NOT_OK(FunctionOptionsFromStructScalar(
-              checked_cast<const StructScalar*>(options_scalar.get()),
-              const_cast<Expression::Call*>(expr.call())));
+          std::shared_ptr<compute::FunctionOptions> options;
+          if (options_scalar) {
+            ARROW_ASSIGN_OR_RAISE(
+                options, internal::FunctionOptionsFromStructScalar(
+                             checked_cast<const StructScalar&>(*options_scalar)));
+          }
+          auto expr = call(value, std::move(arguments), std::move(options));
           index_ += 2;
           return expr;
         }
@@ -1212,7 +1122,8 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
 }
 
 Expression project(std::vector<Expression> values, std::vector<std::string> names) {
-  return call("project", std::move(values), compute::ProjectOptions{std::move(names)});
+  return call("make_struct", std::move(values),
+              compute::MakeStructOptions{std::move(names)});
 }
 
 Expression equal(Expression lhs, Expression rhs) {
@@ -1271,13 +1182,5 @@ Expression or_(const std::vector<Expression>& operands) {
 
 Expression not_(Expression operand) { return call("invert", {std::move(operand)}); }
 
-Expression operator&&(Expression lhs, Expression rhs) {
-  return and_(std::move(lhs), std::move(rhs));
-}
-
-Expression operator||(Expression lhs, Expression rhs) {
-  return or_(std::move(lhs), std::move(rhs));
-}
-
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h
new file mode 100644
index 00000000000..3810accf70a
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -0,0 +1,269 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/datum.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/variant.h"
+
+namespace arrow {
+namespace compute {
+
+/// An unbound expression which maps a single Datum to another Datum.
+/// An expression is one of
+/// - A literal Datum.
+/// - A reference to a single (potentially nested) field of the input Datum.
+/// - A call to a compute function, with arguments specified by other Expressions.
+class ARROW_EXPORT Expression {
+ public:
+  struct Call {
+    std::string function_name;
+    std::vector<Expression> arguments;
+    std::shared_ptr<FunctionOptions> options;
+    // Cached hash value
+    size_t hash;
+
+    // post-Bind properties:
+    std::shared_ptr<Function> function;
+    const Kernel* kernel = NULLPTR;
+    std::shared_ptr<KernelState> kernel_state;
+    ValueDescr descr;
+
+    void ComputeHash();
+  };
+
+  std::string ToString() const;
+  bool Equals(const Expression& other) const;
+  size_t hash() const;
+  struct Hash {
+    size_t operator()(const Expression& expr) const { return expr.hash(); }
+  };
+
+  /// Bind this expression to the given input type, looking up Kernels and field types.
+  /// Some expression simplification may be performed and implicit casts will be inserted.
+  /// Any state necessary for execution will be initialized and returned.
+  Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
+  Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
+
+  // XXX someday
+  // Clone all KernelState in this bound expression. If any function referenced by this
+  // expression has mutable KernelState, it is not safe to execute or apply simplification
+  // passes to it (or copies of it!) from multiple threads. Cloning state produces new
+  // KernelStates where necessary to ensure that Expressions may be manipulated safely
+  // on multiple threads.
+  // Result<ExpressionState> CloneState() const;
+  // Status SetState(ExpressionState);
+
+  /// Return true if all an expression's field references have explicit ValueDescr and all
+  /// of its functions' kernels are looked up.
+  bool IsBound() const;
+
+  /// Return true if this expression is composed only of Scalar literals, field
+  /// references, and calls to ScalarFunctions.
+  bool IsScalarExpression() const;
+
+  /// Return true if this expression is literal and entirely null.
+  bool IsNullLiteral() const;
+
+  /// Return true if this expression could evaluate to true.
+  bool IsSatisfiable() const;
+
+  // XXX someday
+  // Result<PipelineGraph> GetPipelines();
+
+  /// Access a Call or return nullptr if this expression is not a call
+  const Call* call() const;
+  /// Access a Datum or return nullptr if this expression is not a literal
+  const Datum* literal() const;
+  /// Access a FieldRef or return nullptr if this expression is not a field_ref
+  const FieldRef* field_ref() const;
+
+  /// The type and shape to which this expression will evaluate
+  ValueDescr descr() const;
+  std::shared_ptr<DataType> type() const { return descr().type; }
+  // XXX someday
+  // NullGeneralization::type nullable() const;
+
+  struct Parameter {
+    FieldRef ref;
+
+    // post-bind properties
+    ValueDescr descr;
+    int index;
+  };
+  const Parameter* parameter() const;
+
+  Expression() = default;
+  explicit Expression(Call call);
+  explicit Expression(Datum literal);
+  explicit Expression(Parameter parameter);
+
+ private:
+  using Impl = util::Variant<Datum, Parameter, Call>;
+  std::shared_ptr<Impl> impl_;
+
+  ARROW_EXPORT friend bool Identical(const Expression& l, const Expression& r);
+
+  ARROW_EXPORT friend void PrintTo(const Expression&, std::ostream*);
+};
+
+inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
+inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); }
+
+// Factories
+
+ARROW_EXPORT
+Expression literal(Datum lit);
+
+template <typename Arg>
+Expression literal(Arg&& arg) {
+  return literal(Datum(std::forward<Arg>(arg)));
+}
+
+ARROW_EXPORT
+Expression field_ref(FieldRef ref);
+
+ARROW_EXPORT
+Expression call(std::string function, std::vector<Expression> arguments,
+                std::shared_ptr<FunctionOptions> options = NULLPTR);
+
+template <typename Options, typename = typename std::enable_if<
+                                std::is_base_of<FunctionOptions, Options>::value>::type>
+Expression call(std::string function, std::vector<Expression> arguments,
+                Options options) {
+  return call(std::move(function), std::move(arguments),
+              std::make_shared<Options>(std::move(options)));
+}
+
+/// Assemble a list of all fields referenced by an Expression at any depth.
+ARROW_EXPORT
+std::vector<FieldRef> FieldsInExpression(const Expression&);
+
+/// Check if the expression references any fields.
+ARROW_EXPORT
+bool ExpressionHasFieldRefs(const Expression&);
+
+/// Assemble a mapping from field references to known values.
+struct ARROW_EXPORT KnownFieldValues;
+ARROW_EXPORT
+Result<KnownFieldValues> ExtractKnownFieldValues(
+    const Expression& guaranteed_true_predicate);
+
+/// \defgroup expression-passes Functions for modification of Expressions
+///
+/// @{
+///
+/// These transform bound expressions. Some transforms utilize a guarantee, which is
+/// provided as an Expression which is guaranteed to evaluate to true. The
+/// guaranteed_true_predicate need not be bound, but canonicalization is currently
+/// deferred to producers of guarantees. For example in order to be recognized as a
+/// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
+/// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
+/// other semantically identical Expressions will not be recognized.
+
+/// Weak canonicalization which establishes guarantees for subsequent passes. Even
+/// equivalent Expressions may result in different canonicalized expressions.
+/// TODO this could be a strong canonicalization
+ARROW_EXPORT
+Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR);
+
+/// Simplify Expressions based on literal arguments (for example, add(null, x) will always
+/// be null so replace the call with a null literal). Includes early evaluation of all
+/// calls whose arguments are entirely literal.
+ARROW_EXPORT
+Result<Expression> FoldConstants(Expression);
+
+/// Simplify Expressions by replacing with known values of the fields which it references.
+ARROW_EXPORT
+Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
+                                                Expression);
+
+/// Simplify an expression by replacing subexpressions based on a guarantee:
+/// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
+/// used to remove redundant function calls from a filter expression or to replace a
+/// reference to a constant-value field with a literal.
+ARROW_EXPORT
+Result<Expression> SimplifyWithGuarantee(Expression,
+                                         const Expression& guaranteed_true_predicate);
+
+/// @}
+
+// Execution
+
+/// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
+/// RecordBatch which may have missing or incorrectly ordered columns.
+/// Missing fields will be replaced with null scalars.
+ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
+                                             const Datum& partial);
+
+/// Execute a scalar expression against the provided state and input ExecBatch. This
+/// expression must be bound.
+ARROW_EXPORT
+Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input,
+                                      ExecContext* = NULLPTR);
+
+/// Convenience function for invoking against a RecordBatch
+ARROW_EXPORT
+Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema,
+                                      const Datum& partial_input, ExecContext* = NULLPTR);
+
+// Serialization
+
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
+
+ARROW_EXPORT
+Result<Expression> Deserialize(std::shared_ptr<Buffer>);
+
+// Convenience aliases for factories
+
+ARROW_EXPORT Expression project(std::vector<Expression> values,
+                                std::vector<std::string> names);
+
+ARROW_EXPORT Expression equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression less(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression greater(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression is_null(Expression lhs);
+
+ARROW_EXPORT Expression is_valid(Expression lhs);
+
+ARROW_EXPORT Expression and_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression and_(const std::vector<Expression>&);
+ARROW_EXPORT Expression or_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression or_(const std::vector<Expression>&);
+ARROW_EXPORT Expression not_(Expression operand);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression_benchmark.cc b/cpp/src/arrow/compute/exec/expression_benchmark.cc
similarity index 81%
rename from cpp/src/arrow/dataset/expression_benchmark.cc
rename to cpp/src/arrow/compute/exec/expression_benchmark.cc
index 24870f38c14..1899b7caab6 100644
--- a/cpp/src/arrow/dataset/expression_benchmark.cc
+++ b/cpp/src/arrow/compute/exec/expression_benchmark.cc
@@ -18,23 +18,16 @@
 #include "benchmark/benchmark.h"
 
 #include "arrow/compute/cast.h"
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/partition.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 
 namespace arrow {
-namespace dataset {
+namespace compute {
 
-static Expression GetPartitionExpression(const std::string& path, bool infer_dictionary) {
-  auto options = HivePartitioningFactoryOptions();
-  options.infer_dictionary = infer_dictionary;
-  auto factory = HivePartitioning::MakeFactory(options);
-  ASSIGN_OR_ABORT(auto schema, factory->Inspect({path}));
-  ASSIGN_OR_ABORT(auto partitioning, factory->Finish(schema));
-  ASSIGN_OR_ABORT(auto expr, partitioning->Parse(path));
-  return expr;
-}
+std::shared_ptr<Scalar> ninety_nine_dict =
+    DictionaryScalar::Make(MakeScalar(0), ArrayFromJSON(int64(), "[99]"));
 
 // A benchmark of SimplifyWithGuarantee using expressions arising from partitioning.
 static void SimplifyFilterWithGuarantee(benchmark::State& state, Expression filter,
@@ -61,11 +54,15 @@ auto filter_cast_negative =
 auto filter_cast_positive =
     and_(equal(call("cast", {field_ref("a")}, to_int64), literal(99)),
          equal(call("cast", {field_ref("b")}, to_int64), literal(99)));
-// A fully simplified partition expression.
-auto guarantee = GetPartitionExpression("a=99/b=99", /*infer_dictionary=*/false);
-// A partition expression that uses dictionaries, which are inferred by default.
-auto guarantee_dictionary =
-    GetPartitionExpression("a=99/b=99", /*infer_dictionary=*/true);
+
+// An unencoded partition expression for "a=99/b=99".
+auto guarantee = and_(equal(field_ref("a"), literal(int64_t(99))),
+                      equal(field_ref("b"), literal(int64_t(99))));
+
+// A partition expression for "a=99/b=99" that uses dictionaries (inferred by default).
+auto guarantee_dictionary = and_(equal(field_ref("a"), literal(ninety_nine_dict)),
+                                 equal(field_ref("b"), literal(ninety_nine_dict)));
+
 // Negative queries (partition expressions that fail the filter)
 BENCHMARK_CAPTURE(SimplifyFilterWithGuarantee, negative_filter_simple_guarantee_simple,
                   filter_simple_negative, guarantee);
@@ -87,5 +84,5 @@ BENCHMARK_CAPTURE(SimplifyFilterWithGuarantee,
 BENCHMARK_CAPTURE(SimplifyFilterWithGuarantee, positive_filter_cast_guarantee_dictionary,
                   filter_cast_positive, guarantee_dictionary);
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
similarity index 93%
rename from cpp/src/arrow/dataset/expression_internal.h
rename to cpp/src/arrow/compute/exec/expression_internal.h
index 24e60377f5a..dc38924d932 100644
--- a/cpp/src/arrow/dataset/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 
 #include <unordered_map>
 #include <unordered_set>
@@ -32,7 +32,11 @@ namespace arrow {
 
 using internal::checked_cast;
 
-namespace dataset {
+namespace compute {
+
+struct KnownFieldValues {
+  std::unordered_map<FieldRef, Datum, FieldRef::Hash> map;
+};
 
 inline const Expression::Call* CallNotNull(const Expression& expr) {
   auto call = expr.call();
@@ -216,20 +220,10 @@ inline bool IsSetLookup(const std::string& function) {
   return function == "is_in" || function == "index_in";
 }
 
-inline const compute::SetLookupOptions* GetSetLookupOptions(
+inline const compute::MakeStructOptions* GetMakeStructOptions(
     const Expression::Call& call) {
-  if (!IsSetLookup(call.function_name)) return nullptr;
-  return checked_cast<const compute::SetLookupOptions*>(call.options.get());
-}
-
-inline const compute::ProjectOptions* GetProjectOptions(const Expression::Call& call) {
-  if (call.function_name != "project") return nullptr;
-  return checked_cast<const compute::ProjectOptions*>(call.options.get());
-}
-
-inline const compute::StrptimeOptions* GetStrptimeOptions(const Expression::Call& call) {
-  if (call.function_name != "strptime") return nullptr;
-  return checked_cast<const compute::StrptimeOptions*>(call.options.get());
+  if (call.function_name != "make_struct") return nullptr;
+  return checked_cast<const compute::MakeStructOptions*>(call.options.get());
 }
 
 /// A helper for unboxing an Expression composed of associative function calls.
@@ -338,5 +332,5 @@ Result<Expression> Modify(Expression expr, const PreVisit& pre,
   return post_call(std::move(expr), nullptr);
 }
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
similarity index 84%
rename from cpp/src/arrow/dataset/expression_test.cc
rename to cpp/src/arrow/compute/exec/expression_test.cc
index 2ab796b052f..b59f8762818 100644
--- a/cpp/src/arrow/dataset/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 
 #include <cstdint>
 #include <memory>
@@ -26,9 +26,9 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "arrow/compute/exec/expression_internal.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry.h"
-#include "arrow/dataset/expression_internal.h"
-#include "arrow/dataset/test_util.h"
 #include "arrow/testing/gtest_util.h"
 
 using testing::HasSubstr;
@@ -39,7 +39,24 @@ namespace arrow {
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
-namespace dataset {
+namespace compute {
+
+const std::shared_ptr<Schema> kBoringSchema = schema({
+    field("bool", boolean()),
+    field("i8", int8()),
+    field("i32", int32()),
+    field("i32_req", int32(), /*nullable=*/false),
+    field("u32", uint32()),
+    field("i64", int64()),
+    field("f32", float32()),
+    field("f32_req", float32(), /*nullable=*/false),
+    field("f64", float64()),
+    field("date64", date64()),
+    field("str", utf8()),
+    field("dict_str", dictionary(int32(), utf8())),
+    field("dict_i32", dictionary(int32(), int32())),
+    field("ts_ns", timestamp(TimeUnit::NANO)),
+});
 
 #define EXPECT_OK ARROW_EXPECT_OK
 
@@ -149,6 +166,56 @@ TEST(ExpressionUtils, StripOrderPreservingCasts) {
   Expect(cast(field_ref("i32"), uint64()), no_change);
 }
 
+TEST(ExpressionUtils, MakeExecBatch) {
+  auto Expect = [](std::shared_ptr<RecordBatch> partial_batch) {
+    SCOPED_TRACE(partial_batch->ToString());
+    ASSERT_OK_AND_ASSIGN(auto batch, MakeExecBatch(*kBoringSchema, partial_batch));
+
+    ASSERT_EQ(batch.num_values(), kBoringSchema->num_fields());
+    for (int i = 0; i < kBoringSchema->num_fields(); ++i) {
+      const auto& field = *kBoringSchema->field(i);
+
+      SCOPED_TRACE("Field#" + std::to_string(i) + " " + field.ToString());
+
+      EXPECT_TRUE(batch[i].type()->Equals(field.type()))
+          << "Incorrect type " << batch[i].type()->ToString();
+
+      ASSERT_OK_AND_ASSIGN(auto col, FieldRef(field.name()).GetOneOrNone(*partial_batch));
+
+      if (batch[i].is_scalar()) {
+        EXPECT_FALSE(batch[i].scalar()->is_valid)
+            << "Non-null placeholder scalar was injected";
+
+        EXPECT_EQ(col, nullptr)
+            << "Placeholder scalar overwrote column " << col->ToString();
+      } else {
+        AssertDatumsEqual(col, batch[i]);
+      }
+    }
+  };
+
+  auto GetField = [](std::string name) { return kBoringSchema->GetFieldByName(name); };
+
+  constexpr int64_t kNumRows = 3;
+  auto i32 = ArrayFromJSON(int32(), "[1, 2, 3]");
+  auto f32 = ArrayFromJSON(float32(), "[1.5, 2.25, 3.125]");
+
+  // empty
+  Expect(RecordBatchFromJSON(kBoringSchema, "[]"));
+
+  // subset
+  Expect(RecordBatch::Make(schema({GetField("i32"), GetField("f32")}), kNumRows,
+                           {i32, f32}));
+
+  // flipped subset
+  Expect(RecordBatch::Make(schema({GetField("f32"), GetField("i32")}), kNumRows,
+                           {f32, i32}));
+
+  auto duplicated_names =
+      RecordBatch::Make(schema({GetField("i32"), GetField("i32")}), kNumRows, {i32, i32});
+  ASSERT_RAISES(Invalid, MakeExecBatch(*kBoringSchema, duplicated_names));
+}
+
 TEST(Expression, ToString) {
   EXPECT_EQ(field_ref("alpha").ToString(), "alpha");
 
@@ -156,6 +223,7 @@ TEST(Expression, ToString) {
   EXPECT_EQ(literal("a").ToString(), "\"a\"");
   EXPECT_EQ(literal("a\nb").ToString(), "\"a\\nb\"");
   EXPECT_EQ(literal(std::make_shared<BooleanScalar>()).ToString(), "null");
+  EXPECT_EQ(literal(std::make_shared<Int64Scalar>()).ToString(), "null");
   EXPECT_EQ(literal(std::make_shared<BinaryScalar>(Buffer::FromString("az"))).ToString(),
             "\"617A\"");
 
@@ -167,17 +235,43 @@ TEST(Expression, ToString) {
   auto in_12 = call("index_in", {field_ref("beta")},
                     compute::SetLookupOptions{ArrayFromJSON(int32(), "[1,2]")});
 
-  EXPECT_EQ(in_12.ToString(), "index_in(beta, value_set=[\n  1,\n  2\n])");
+  EXPECT_EQ(in_12.ToString(),
+            "index_in(beta, {value_set=int32:[\n  1,\n  2\n], skip_nulls=false})");
 
   EXPECT_EQ(and_(field_ref("a"), field_ref("b")).ToString(), "(a and b)");
   EXPECT_EQ(or_(field_ref("a"), field_ref("b")).ToString(), "(a or b)");
   EXPECT_EQ(not_(field_ref("a")).ToString(), "invert(a)");
 
-  EXPECT_EQ(cast(field_ref("a"), int32()).ToString(), "cast(a, to_type=int32)");
-  EXPECT_EQ(cast(field_ref("a"), nullptr).ToString(),
-            "cast(a, to_type=<INVALID NOT PROVIDED>)");
-
-  struct WidgetifyOptions : compute::FunctionOptions {
+  EXPECT_EQ(
+      cast(field_ref("a"), int32()).ToString(),
+      "cast(a, {to_type=int32, allow_int_overflow=false, allow_time_truncate=false, "
+      "allow_time_overflow=false, allow_decimal_truncate=false, "
+      "allow_float_truncate=false, allow_invalid_utf8=false})");
+  EXPECT_EQ(
+      cast(field_ref("a"), nullptr).ToString(),
+      "cast(a, {to_type=<NULLPTR>, allow_int_overflow=false, allow_time_truncate=false, "
+      "allow_time_overflow=false, allow_decimal_truncate=false, "
+      "allow_float_truncate=false, allow_invalid_utf8=false})");
+
+  class WidgetifyOptionsType : public FunctionOptionsType {
+   public:
+    static const FunctionOptionsType* GetInstance() {
+      static std::unique_ptr<FunctionOptionsType> instance(new WidgetifyOptionsType());
+      return instance.get();
+    }
+    const char* type_name() const override { return "widgetify"; }
+    std::string Stringify(const FunctionOptions& options) const override {
+      return type_name();
+    }
+    bool Compare(const FunctionOptions& options,
+                 const FunctionOptions& other) const override {
+      return true;
+    }
+  };
+  class WidgetifyOptions : public compute::FunctionOptions {
+   public:
+    explicit WidgetifyOptions(bool really = true)
+        : FunctionOptions(WidgetifyOptionsType::GetInstance()), really(really) {}
     bool really;
   };
 
@@ -185,7 +279,7 @@ TEST(Expression, ToString) {
   EXPECT_EQ(call("widgetify", {}).ToString(), "widgetif)");
   EXPECT_EQ(
       call("widgetify", {literal(1)}, std::make_shared<WidgetifyOptions>()).ToString(),
-      "widgetify(1, {NON-REPRESENTABLE OPTIONS})");
+      "widgetify(1, widgetify)");
 
   EXPECT_EQ(equal(field_ref("a"), literal(1)).ToString(), "(a == 1)");
   EXPECT_EQ(less(field_ref("a"), literal(2)).ToString(), "(a < 2)");
@@ -340,6 +434,28 @@ TEST(Expression, FieldsInExpression) {
                   {"a", "b", "c"});
 }
 
+TEST(Expression, ExpressionHasFieldRefs) {
+  EXPECT_FALSE(ExpressionHasFieldRefs(literal(true)));
+
+  EXPECT_FALSE(ExpressionHasFieldRefs(call("add", {literal(1), literal(3)})));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(field_ref("a")));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(equal(field_ref("a"), literal(1))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(equal(field_ref("a"), field_ref("b"))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(
+      or_(equal(field_ref("a"), literal(1)), equal(field_ref("a"), literal(2)))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(
+      or_(equal(field_ref("a"), literal(1)), equal(field_ref("b"), literal(2)))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(or_(
+      and_(not_(equal(field_ref("a"), literal(1))), equal(field_ref("b"), literal(2))),
+      not_(less(field_ref("c"), literal(3))))));
+}
+
 TEST(Expression, BindLiteral) {
   for (Datum dat : {
            Datum(3),
@@ -379,21 +495,18 @@ TEST(Expression, BindFieldRef) {
   ExpectBindsTo(field_ref("i32"), no_change, &expr);
   EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
 
-  // if the field is not found, a null scalar will be emitted
-  ExpectBindsTo(field_ref("no such field"), no_change, &expr);
-  EXPECT_EQ(expr.descr(), ValueDescr::Scalar(null()));
+  // if the field is not found, an error will be raised
+  ASSERT_RAISES(Invalid, field_ref("no such field").Bind(*kBoringSchema));
 
   // referencing a field by name is not supported if that name is not unique
   // in the input schema
   ASSERT_RAISES(Invalid, field_ref("alpha").Bind(Schema(
                              {field("alpha", int32()), field("alpha", float32())})));
 
-  // referencing nested fields is supported
-  ASSERT_OK_AND_ASSIGN(expr,
-                       field_ref(FieldRef("a", "b"))
-                           .Bind(Schema({field("a", struct_({field("b", int32())}))})));
-  EXPECT_TRUE(expr.IsBound());
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  // referencing nested fields is not supported
+  ASSERT_RAISES(NotImplemented,
+                field_ref(FieldRef("a", "b"))
+                    .Bind(Schema({field("a", struct_({field("b", int32())}))})));
 }
 
 TEST(Expression, BindCall) {
@@ -459,7 +572,8 @@ TEST(Expression, ExecuteFieldRef) {
     auto expr = field_ref(ref);
 
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
-    ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(expr, in));
+    ASSERT_OK_AND_ASSIGN(Datum actual,
+                         ExecuteScalarExpression(expr, Schema(in.type()->fields()), in));
 
     AssertDatumsEqual(actual, expected, /*verbose=*/true);
   };
@@ -471,39 +585,45 @@ TEST(Expression, ExecuteFieldRef) {
   ])"),
               ArrayFromJSON(float64(), R"([6.125, 0.0, -1])"));
 
-  // more nested:
-  ExpectRefIs(FieldRef{"a", "a"},
-              ArrayFromJSON(struct_({field("a", struct_({field("a", float64())}))}), R"([
-    {"a": {"a": 6.125}},
-    {"a": {"a": 0.0}},
-    {"a": {"a": -1}}
+  ExpectRefIs("a",
+              ArrayFromJSON(struct_({
+                                field("a", float64()),
+                                field("b", float64()),
+                            }),
+                            R"([
+    {"a": 6.125, "b": 7.5},
+    {"a": 0.0,   "b": 2.125},
+    {"a": -1,    "b": 4.0}
   ])"),
               ArrayFromJSON(float64(), R"([6.125, 0.0, -1])"));
 
-  // absent fields are resolved as a null scalar:
-  ExpectRefIs(FieldRef{"b"}, ArrayFromJSON(struct_({field("a", float64())}), R"([
-    {"a": 6.125},
-    {"a": 0.0},
-    {"a": -1}
+  ExpectRefIs("b",
+              ArrayFromJSON(struct_({
+                                field("a", float64()),
+                                field("b", float64()),
+                            }),
+                            R"([
+    {"a": 6.125, "b": 7.5},
+    {"a": 0.0,   "b": 2.125},
+    {"a": -1,    "b": 4.0}
   ])"),
-              MakeNullScalar(null()));
-
-  // XXX this *should* fail in Bind but for now it will just error in
-  // ExecuteScalarExpression
-  ASSERT_OK_AND_ASSIGN(auto list_item, field_ref("item").Bind(list(int32())));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("non-struct array"),
-      ExecuteScalarExpression(list_item,
-                              ArrayFromJSON(list(int32()), "[[1,2], [], null, [5]]")));
+              ArrayFromJSON(float64(), R"([7.5, 2.125, 4.0])"));
 }
 
 Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum& input) {
-  auto call = expr.call();
-  if (call == nullptr) {
-    // already tested execution of field_ref, execution of literal is trivial
-    return ExecuteScalarExpression(expr, input);
+  if (auto lit = expr.literal()) {
+    return *lit;
   }
 
+  if (auto ref = expr.field_ref()) {
+    if (input.type()) {
+      return ref->GetOneOrNone(*input.make_array());
+    }
+    return ref->GetOneOrNone(*input.record_batch());
+  }
+
+  auto call = CallNotNull(expr);
+
   std::vector<Datum> arguments(call->arguments.size());
   for (size_t i = 0; i < arguments.size(); ++i) {
     ARROW_ASSIGN_OR_RAISE(arguments[i],
@@ -521,13 +641,16 @@ Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum&
 }
 
 void ExpectExecute(Expression expr, Datum in, Datum* actual_out = NULLPTR) {
+  std::shared_ptr<Schema> schm;
   if (in.is_value()) {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
+    schm = schema(in.type()->fields());
   } else {
-    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*in.record_batch()->schema()));
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*in.schema()));
+    schm = in.schema();
   }
 
-  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(expr, in));
+  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(expr, *schm, in));
 
   ASSERT_OK_AND_ASSIGN(Datum expected, NaiveExecuteScalarExpression(expr, in));
 
@@ -587,9 +710,9 @@ TEST(Expression, ExecuteDictionaryTransparent) {
   ASSERT_OK_AND_ASSIGN(
       expr, SimplifyWithGuarantee(expr, equal(field_ref("dict_str"), literal("eh"))));
 
-  ASSERT_OK_AND_ASSIGN(
-      auto res,
-      ExecuteScalarExpression(expr, ArrayFromJSON(struct_({field("i32", int32())}), R"([
+  ASSERT_OK_AND_ASSIGN(auto res, ExecuteScalarExpression(
+                                     expr, *kBoringSchema,
+                                     ArrayFromJSON(struct_({field("i32", int32())}), R"([
     {"i32": 0},
     {"i32": 1},
     {"i32": 2}
@@ -707,7 +830,7 @@ TEST(Expression, ExtractKnownFieldValues) {
     void operator()(Expression guarantee,
                     std::unordered_map<FieldRef, Datum, FieldRef::Hash> expected) {
       ASSERT_OK_AND_ASSIGN(auto actual, ExtractKnownFieldValues(guarantee));
-      EXPECT_THAT(actual, UnorderedElementsAreArray(expected))
+      EXPECT_THAT(actual.map, UnorderedElementsAreArray(expected))
           << "  guarantee: " << guarantee.ToString();
     }
   } ExpectKnown;
@@ -759,8 +882,8 @@ TEST(Expression, ReplaceFieldsWithKnownValues) {
          Expression unbound_expected) {
         ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*kBoringSchema));
         ASSERT_OK_AND_ASSIGN(auto expected, unbound_expected.Bind(*kBoringSchema));
-        ASSERT_OK_AND_ASSIGN(auto replaced,
-                             ReplaceFieldsWithKnownValues(known_values, expr));
+        ASSERT_OK_AND_ASSIGN(auto replaced, ReplaceFieldsWithKnownValues(
+                                                KnownFieldValues{known_values}, expr));
 
         EXPECT_EQ(replaced, expected);
         ExpectIdenticalIfUnchanged(replaced, expr);
@@ -775,7 +898,7 @@ TEST(Expression, ReplaceFieldsWithKnownValues) {
   // NB: known_values will be cast
   ExpectReplacesTo(field_ref("i32"), {{"i32", Datum("3")}}, literal(3));
 
-  ExpectReplacesTo(field_ref("b"), i32_is_3, field_ref("b"));
+  ExpectReplacesTo(field_ref("f32"), i32_is_3, field_ref("f32"));
 
   ExpectReplacesTo(equal(field_ref("i32"), literal(1)), i32_is_3,
                    equal(literal(3), literal(1)));
@@ -816,17 +939,16 @@ TEST(Expression, ReplaceFieldsWithKnownValues) {
   ExpectReplacesTo(is_valid(field_ref("str")), i32_valid_str_null,
                    is_valid(null_literal(utf8())));
 
-  ASSERT_OK_AND_ASSIGN(auto expr, field_ref("dict_str").Bind(*kBoringSchema));
   Datum dict_i32{
       DictionaryScalar::Make(MakeScalar<int32_t>(0), ArrayFromJSON(int32(), R"([3])"))};
-  // Unsupported cast dictionary(int32(), int32()) -> dictionary(int32(), utf8())
-  ASSERT_RAISES(NotImplemented,
-                ReplaceFieldsWithKnownValues({{"dict_str", dict_i32}}, expr));
-  // Unsupported cast dictionary(int8(), utf8()) -> dictionary(int32(), utf8())
-  dict_str = Datum{
-      DictionaryScalar::Make(MakeScalar<int8_t>(0), ArrayFromJSON(utf8(), R"(["a"])"))};
-  ASSERT_RAISES(NotImplemented,
-                ReplaceFieldsWithKnownValues({{"dict_str", dict_str}}, expr));
+  // cast dictionary(int32(), int32()) -> dictionary(int32(), utf8())
+  ExpectReplacesTo(field_ref("dict_str"), {{"dict_str", dict_i32}}, literal(dict_str));
+
+  // cast dictionary(int8(), utf8()) -> dictionary(int32(), utf8())
+  auto dict_int8_str = Datum{
+      DictionaryScalar::Make(MakeScalar<int8_t>(0), ArrayFromJSON(utf8(), R"(["3"])"))};
+  ExpectReplacesTo(field_ref("dict_str"), {{"dict_str", dict_int8_str}},
+                   literal(dict_str));
 }
 
 struct {
@@ -1016,7 +1138,8 @@ TEST(Expression, SingleComparisonGuarantees) {
                               {"i32"}));
 
         ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*kBoringSchema));
-        ASSERT_OK_AND_ASSIGN(Datum evaluated, ExecuteScalarExpression(filter, input));
+        ASSERT_OK_AND_ASSIGN(Datum evaluated,
+                             ExecuteScalarExpression(filter, *kBoringSchema, input));
 
         // ensure that the simplified filter is as simplified as it could be
         // (this is always possible for single comparisons)
@@ -1127,7 +1250,8 @@ TEST(Expression, Filter) {
     auto expected_mask = batch->column(0);
 
     ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*kBoringSchema));
-    ASSERT_OK_AND_ASSIGN(Datum mask, ExecuteScalarExpression(filter, batch));
+    ASSERT_OK_AND_ASSIGN(Datum mask,
+                         ExecuteScalarExpression(filter, *kBoringSchema, batch));
 
     AssertDatumsEqual(expected_mask, mask);
   };
@@ -1220,7 +1344,8 @@ TEST(Projection, AugmentWithNull) {
 
   auto ExpectProject = [&](Expression proj, Datum expected) {
     ASSERT_OK_AND_ASSIGN(proj, proj.Bind(*kBoringSchema));
-    ASSERT_OK_AND_ASSIGN(auto actual, ExecuteScalarExpression(proj, input));
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         ExecuteScalarExpression(proj, *kBoringSchema, input));
     AssertDatumsEqual(Datum(expected), actual);
   };
 
@@ -1250,7 +1375,8 @@ TEST(Projection, AugmentWithKnownValues) {
                                       Expression guarantee) {
     ASSERT_OK_AND_ASSIGN(proj, proj.Bind(*kBoringSchema));
     ASSERT_OK_AND_ASSIGN(proj, SimplifyWithGuarantee(proj, guarantee));
-    ASSERT_OK_AND_ASSIGN(auto actual, ExecuteScalarExpression(proj, input));
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         ExecuteScalarExpression(proj, *kBoringSchema, input));
     AssertDatumsEqual(Datum(expected), actual);
   };
 
@@ -1278,5 +1404,5 @@ TEST(Projection, AugmentWithKnownValues) {
       }));
 }
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/forest_internal.h b/cpp/src/arrow/compute/exec/forest_internal.h
similarity index 96%
rename from cpp/src/arrow/dataset/forest_internal.h
rename to cpp/src/arrow/compute/exec/forest_internal.h
index 1a7b874065e..7b55a0aabf3 100644
--- a/cpp/src/arrow/dataset/forest_internal.h
+++ b/cpp/src/arrow/compute/exec/forest_internal.h
@@ -21,15 +21,16 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/dataset/visibility.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
 
 namespace arrow {
-namespace dataset {
+namespace compute {
 
 /// A Forest is a view of a sorted range which carries an ancestry relation in addition
 /// to an ordering relation: each element's descendants appear directly after it.
 /// This can be used to efficiently skip subtrees when iterating through the range.
-class ARROW_DS_EXPORT Forest {
+class Forest {
  public:
   Forest() = default;
 
@@ -69,7 +70,7 @@ class ARROW_DS_EXPORT Forest {
            std::equal(it, it + size_, other.descendant_counts_->begin());
   }
 
-  struct ARROW_DS_EXPORT Ref {
+  struct Ref {
     int num_descendants() const { return forest->descendant_counts_->at(i); }
 
     bool IsAncestorOf(const Ref& ref) const {
@@ -120,5 +121,5 @@ class ARROW_DS_EXPORT Forest {
   std::shared_ptr<std::vector<int>> descendant_counts_;
 };
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_compare.cc b/cpp/src/arrow/compute/exec/key_compare.cc
new file mode 100644
index 00000000000..7a5b0be9990
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_compare.cc
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_compare.h"
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace compute {
+
+void KeyCompare::CompareRows(uint32_t num_rows_to_compare,
+                             const uint16_t* sel_left_maybe_null,
+                             const uint32_t* left_to_right_map,
+                             KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows,
+                             uint16_t* out_sel_left_maybe_same,
+                             const KeyEncoder::KeyRowArray& rows_left,
+                             const KeyEncoder::KeyRowArray& rows_right) {
+  ARROW_DCHECK(rows_left.metadata().is_compatible(rows_right.metadata()));
+
+  if (num_rows_to_compare == 0) {
+    *out_num_rows = 0;
+    return;
+  }
+
+  // Allocate temporary byte and bit vectors
+  auto bytevector_holder =
+      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
+  auto bitvector_holder =
+      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
+
+  uint8_t* match_bytevector = bytevector_holder.mutable_data();
+  uint8_t* match_bitvector = bitvector_holder.mutable_data();
+
+  // All comparison functions called here will update match byte vector
+  // (AND it with comparison result) instead of overwriting it.
+  memset(match_bytevector, 0xff, num_rows_to_compare);
+
+  if (rows_left.metadata().is_fixed_length) {
+    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                       match_bytevector, ctx, rows_left.metadata().fixed_length,
+                       rows_left.data(1), rows_right.data(1));
+  } else {
+    CompareVaryingLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                         match_bytevector, ctx, rows_left.data(2), rows_right.data(2),
+                         rows_left.offsets(), rows_right.offsets());
+  }
+
+  // CompareFixedLength can be used to compare nulls as well
+  bool nulls_present = rows_left.has_any_nulls(ctx) || rows_right.has_any_nulls(ctx);
+  if (nulls_present) {
+    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                       match_bytevector, ctx,
+                       rows_left.metadata().null_masks_bytes_per_row,
+                       rows_left.null_masks(), rows_right.null_masks());
+  }
+
+  util::BitUtil::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare, match_bytevector,
+                               match_bitvector);
+  if (sel_left_maybe_null) {
+    int out_num_rows_int;
+    util::BitUtil::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare,
+                                       match_bitvector, sel_left_maybe_null,
+                                       &out_num_rows_int, out_sel_left_maybe_same);
+    *out_num_rows = out_num_rows_int;
+  } else {
+    int out_num_rows_int;
+    util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare,
+                                   match_bitvector, &out_num_rows_int,
+                                   out_sel_left_maybe_same);
+    *out_num_rows = out_num_rows_int;
+  }
+}
+
+void KeyCompare::CompareFixedLength(uint32_t num_rows_to_compare,
+                                    const uint16_t* sel_left_maybe_null,
+                                    const uint32_t* left_to_right_map,
+                                    uint8_t* match_bytevector,
+                                    KeyEncoder::KeyEncoderContext* ctx,
+                                    uint32_t fixed_length, const uint8_t* rows_left,
+                                    const uint8_t* rows_right) {
+  bool use_selection = (sel_left_maybe_null != nullptr);
+
+  uint32_t num_rows_already_processed = 0;
+
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && !use_selection) {
+    // Choose between up-to-8B length, up-to-16B length and any size versions
+    if (fixed_length <= 8) {
+      num_rows_already_processed = CompareFixedLength_UpTo8B_avx2(
+          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length,
+          rows_left, rows_right);
+    } else if (fixed_length <= 16) {
+      num_rows_already_processed = CompareFixedLength_UpTo16B_avx2(
+          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length,
+          rows_left, rows_right);
+    } else {
+      num_rows_already_processed =
+          CompareFixedLength_avx2(num_rows_to_compare, left_to_right_map,
+                                  match_bytevector, fixed_length, rows_left, rows_right);
+    }
+  }
+#endif
+
+  typedef void (*CompareFixedLengthImp_t)(uint32_t, uint32_t, const uint16_t*,
+                                          const uint32_t*, uint8_t*, uint32_t,
+                                          const uint8_t*, const uint8_t*);
+  static const CompareFixedLengthImp_t CompareFixedLengthImp_fn[] = {
+      CompareFixedLengthImp<false, 1>, CompareFixedLengthImp<false, 2>,
+      CompareFixedLengthImp<false, 0>, CompareFixedLengthImp<true, 1>,
+      CompareFixedLengthImp<true, 2>,  CompareFixedLengthImp<true, 0>};
+  int dispatch_const = (use_selection ? 3 : 0) +
+                       ((fixed_length <= 8) ? 0 : ((fixed_length <= 16) ? 1 : 2));
+  CompareFixedLengthImp_fn[dispatch_const](
+      num_rows_already_processed, num_rows_to_compare, sel_left_maybe_null,
+      left_to_right_map, match_bytevector, fixed_length, rows_left, rows_right);
+}
+
+template <bool use_selection, int num_64bit_words>
+void KeyCompare::CompareFixedLengthImp(uint32_t num_rows_already_processed,
+                                       uint32_t num_rows,
+                                       const uint16_t* sel_left_maybe_null,
+                                       const uint32_t* left_to_right_map,
+                                       uint8_t* match_bytevector, uint32_t length,
+                                       const uint8_t* rows_left,
+                                       const uint8_t* rows_right) {
+  // Key length (for encoded key) has to be non-zero
+  ARROW_DCHECK(length > 0);
+
+  // Non-zero length guarantees no underflow
+  int32_t num_loops_less_one = (static_cast<int32_t>(length) + 7) / 8 - 1;
+
+  // Length remaining in last loop can only be zero for input length equal to zero
+  uint32_t length_remaining_last_loop = length - num_loops_less_one * 8;
+  uint64_t tail_mask = (~0ULL) >> (8 * (8 - length_remaining_last_loop));
+
+  for (uint32_t id_input = num_rows_already_processed; id_input < num_rows; ++id_input) {
+    uint32_t irow_left = use_selection ? sel_left_maybe_null[id_input] : id_input;
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = length * irow_left;
+    uint32_t begin_right = length * irow_right;
+    const uint64_t* key_left_ptr =
+        reinterpret_cast<const uint64_t*>(rows_left + begin_left);
+    const uint64_t* key_right_ptr =
+        reinterpret_cast<const uint64_t*>(rows_right + begin_right);
+    uint64_t result_or = 0ULL;
+    int32_t istripe = 0;
+
+    // Specializations for keys up to 8 bytes and between 9 and 16 bytes to
+    // avoid internal loop over words in the value for short ones.
+    //
+    // Template argument 0 means arbitrarily many 64-bit words,
+    // 1 means up to 1 and 2 means up to 2.
+    //
+    if (num_64bit_words == 0) {
+      for (; istripe < num_loops_less_one; ++istripe) {
+        uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+        uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+        result_or |= (key_left ^ key_right);
+      }
+    } else if (num_64bit_words == 2) {
+      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+      result_or |= (key_left ^ key_right);
+      ++istripe;
+    }
+
+    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+    result_or |= (tail_mask & (key_left ^ key_right));
+
+    int result = (result_or == 0 ? 0xff : 0);
+    match_bytevector[id_input] &= result;
+  }
+}
+
+void KeyCompare::CompareVaryingLength(uint32_t num_rows_to_compare,
+                                      const uint16_t* sel_left_maybe_null,
+                                      const uint32_t* left_to_right_map,
+                                      uint8_t* match_bytevector,
+                                      KeyEncoder::KeyEncoderContext* ctx,
+                                      const uint8_t* rows_left, const uint8_t* rows_right,
+                                      const uint32_t* offsets_left,
+                                      const uint32_t* offsets_right) {
+  bool use_selection = (sel_left_maybe_null != nullptr);
+
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && !use_selection) {
+    CompareVaryingLength_avx2(num_rows_to_compare, left_to_right_map, match_bytevector,
+                              rows_left, rows_right, offsets_left, offsets_right);
+  } else {
+#endif
+    if (use_selection) {
+      CompareVaryingLengthImp<true>(num_rows_to_compare, sel_left_maybe_null,
+                                    left_to_right_map, match_bytevector, rows_left,
+                                    rows_right, offsets_left, offsets_right);
+    } else {
+      CompareVaryingLengthImp<false>(num_rows_to_compare, sel_left_maybe_null,
+                                     left_to_right_map, match_bytevector, rows_left,
+                                     rows_right, offsets_left, offsets_right);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+template <bool use_selection>
+void KeyCompare::CompareVaryingLengthImp(
+    uint32_t num_rows, const uint16_t* sel_left_maybe_null,
+    const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+    const uint32_t* offsets_right) {
+  static const uint64_t tail_masks[] = {
+      0x0000000000000000ULL, 0x00000000000000ffULL, 0x000000000000ffffULL,
+      0x0000000000ffffffULL, 0x00000000ffffffffULL, 0x000000ffffffffffULL,
+      0x0000ffffffffffffULL, 0x00ffffffffffffffULL, 0xffffffffffffffffULL};
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = offsets_left[irow_left];
+    uint32_t begin_right = offsets_right[irow_right];
+    uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
+    uint32_t length_right = offsets_right[irow_right + 1] - begin_right;
+    uint32_t length = std::min(length_left, length_right);
+    const uint64_t* key_left_ptr =
+        reinterpret_cast<const uint64_t*>(rows_left + begin_left);
+    const uint64_t* key_right_ptr =
+        reinterpret_cast<const uint64_t*>(rows_right + begin_right);
+    uint64_t result_or = 0;
+    int32_t istripe;
+    // length can be zero
+    for (istripe = 0; istripe < (static_cast<int32_t>(length) + 7) / 8 - 1; ++istripe) {
+      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+      result_or |= (key_left ^ key_right);
+    }
+
+    uint32_t length_remaining = length - static_cast<uint32_t>(istripe) * 8;
+    uint64_t tail_mask = tail_masks[length_remaining];
+
+    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+    result_or |= (tail_mask & (key_left ^ key_right));
+
+    int result = (result_or == 0 ? 0xff : 0);
+    match_bytevector[i] &= result;
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_compare.h b/cpp/src/arrow/compute/exec/key_compare.h
new file mode 100644
index 00000000000..1dffabb884b
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_compare.h
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class KeyCompare {
+ public:
+  // Returns a single 16-bit selection vector of rows that failed comparison.
+  // If there is input selection on the left, the resulting selection is a filtered image
+  // of input selection.
+  static void CompareRows(uint32_t num_rows_to_compare,
+                          const uint16_t* sel_left_maybe_null,
+                          const uint32_t* left_to_right_map,
+                          KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows,
+                          uint16_t* out_sel_left_maybe_same,
+                          const KeyEncoder::KeyRowArray& rows_left,
+                          const KeyEncoder::KeyRowArray& rows_right);
+
+ private:
+  static void CompareFixedLength(uint32_t num_rows_to_compare,
+                                 const uint16_t* sel_left_maybe_null,
+                                 const uint32_t* left_to_right_map,
+                                 uint8_t* match_bytevector,
+                                 KeyEncoder::KeyEncoderContext* ctx,
+                                 uint32_t fixed_length, const uint8_t* rows_left,
+                                 const uint8_t* rows_right);
+  static void CompareVaryingLength(uint32_t num_rows_to_compare,
+                                   const uint16_t* sel_left_maybe_null,
+                                   const uint32_t* left_to_right_map,
+                                   uint8_t* match_bytevector,
+                                   KeyEncoder::KeyEncoderContext* ctx,
+                                   const uint8_t* rows_left, const uint8_t* rows_right,
+                                   const uint32_t* offsets_left,
+                                   const uint32_t* offsets_right);
+
+  // Second template argument is 0, 1 or 2.
+  // 0 means arbitrarily many 64-bit words, 1 means up to 1 and 2 means up to 2.
+  template <bool use_selection, int num_64bit_words>
+  static void CompareFixedLengthImp(uint32_t num_rows_already_processed,
+                                    uint32_t num_rows,
+                                    const uint16_t* sel_left_maybe_null,
+                                    const uint32_t* left_to_right_map,
+                                    uint8_t* match_bytevector, uint32_t length,
+                                    const uint8_t* rows_left, const uint8_t* rows_right);
+  template <bool use_selection>
+  static void CompareVaryingLengthImp(uint32_t num_rows,
+                                      const uint16_t* sel_left_maybe_null,
+                                      const uint32_t* left_to_right_map,
+                                      uint8_t* match_bytevector, const uint8_t* rows_left,
+                                      const uint8_t* rows_right,
+                                      const uint32_t* offsets_left,
+                                      const uint32_t* offsets_right);
+
+#if defined(ARROW_HAVE_AVX2)
+
+  static uint32_t CompareFixedLength_UpTo8B_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right);
+  static uint32_t CompareFixedLength_UpTo16B_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right);
+  static uint32_t CompareFixedLength_avx2(uint32_t num_rows,
+                                          const uint32_t* left_to_right_map,
+                                          uint8_t* match_bytevector, uint32_t length,
+                                          const uint8_t* rows_left,
+                                          const uint8_t* rows_right);
+  static void CompareVaryingLength_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+      const uint32_t* offsets_right);
+
+#endif
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_compare_avx2.cc b/cpp/src/arrow/compute/exec/key_compare_avx2.cc
new file mode 100644
index 00000000000..6abdf6c3c3a
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_compare_avx2.cc
@@ -0,0 +1,188 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_compare.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+uint32_t KeyCompare::CompareFixedLength_UpTo8B_avx2(
+    uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right) {
+  ARROW_DCHECK(length <= 8);
+  __m256i offset_left = _mm256_setr_epi64x(0, length, length * 2, length * 3);
+  __m256i offset_left_incr = _mm256_set1_epi64x(length * 4);
+  __m256i mask = _mm256_set1_epi64x(~0ULL >> (8 * (8 - length)));
+
+  constexpr uint32_t unroll = 4;
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    auto key_left = _mm256_i64gather_epi64(
+        reinterpret_cast<arrow::util::int64_for_gather_t*>(rows_left), offset_left, 1);
+    offset_left = _mm256_add_epi64(offset_left, offset_left_incr);
+    __m128i offset_right =
+        _mm_loadu_si128(reinterpret_cast<const __m128i*>(left_to_right_map) + i);
+    offset_right = _mm_mullo_epi32(offset_right, _mm_set1_epi32(length));
+
+    auto key_right = _mm256_i32gather_epi64(
+        reinterpret_cast<arrow::util::int64_for_gather_t*>(rows_right), offset_right, 1);
+    uint32_t cmp = _mm256_movemask_epi8(_mm256_cmpeq_epi64(
+        _mm256_and_si256(key_left, mask), _mm256_and_si256(key_right, mask)));
+    reinterpret_cast<uint32_t*>(match_bytevector)[i] &= cmp;
+  }
+
+  uint32_t num_rows_processed = num_rows - (num_rows % unroll);
+  return num_rows_processed;
+}
+
+uint32_t KeyCompare::CompareFixedLength_UpTo16B_avx2(
+    uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right) {
+  ARROW_DCHECK(length <= 16);
+
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+
+  __m256i mask =
+      _mm256_cmpgt_epi8(_mm256_set1_epi8(length),
+                        _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                           kByteSequence0To7, kByteSequence8To15));
+  const uint8_t* key_left_ptr = rows_left;
+
+  constexpr uint32_t unroll = 2;
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    auto key_left = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(
+            _mm_loadu_si128(reinterpret_cast<const __m128i*>(key_left_ptr))),
+        _mm_loadu_si128(reinterpret_cast<const __m128i*>(key_left_ptr + length)), 1);
+    key_left_ptr += length * 2;
+    auto key_right = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(
+            rows_right + length * left_to_right_map[2 * i]))),
+        _mm_loadu_si128(reinterpret_cast<const __m128i*>(
+            rows_right + length * left_to_right_map[2 * i + 1])),
+        1);
+    __m256i cmp = _mm256_cmpeq_epi64(_mm256_and_si256(key_left, mask),
+                                     _mm256_and_si256(key_right, mask));
+    cmp = _mm256_and_si256(cmp, _mm256_shuffle_epi32(cmp, 0xee));  // 0b11101110
+    cmp = _mm256_permute4x64_epi64(cmp, 0x08);                     // 0b00001000
+    reinterpret_cast<uint16_t*>(match_bytevector)[i] &=
+        (_mm256_movemask_epi8(cmp) & 0xffff);
+  }
+
+  uint32_t num_rows_processed = num_rows - (num_rows % unroll);
+  return num_rows_processed;
+}
+
+uint32_t KeyCompare::CompareFixedLength_avx2(uint32_t num_rows,
+                                             const uint32_t* left_to_right_map,
+                                             uint8_t* match_bytevector, uint32_t length,
+                                             const uint8_t* rows_left,
+                                             const uint8_t* rows_right) {
+  ARROW_DCHECK(length > 0);
+
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+  constexpr uint64_t kByteSequence16To23 = 0x1716151413121110ULL;
+  constexpr uint64_t kByteSequence24To31 = 0x1f1e1d1c1b1a1918ULL;
+
+  // Non-zero length guarantees no underflow
+  int32_t num_loops_less_one = (static_cast<int32_t>(length) + 31) / 32 - 1;
+
+  __m256i tail_mask =
+      _mm256_cmpgt_epi8(_mm256_set1_epi8(length - num_loops_less_one * 32),
+                        _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                           kByteSequence16To23, kByteSequence24To31));
+
+  for (uint32_t irow_left = 0; irow_left < num_rows; ++irow_left) {
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = length * irow_left;
+    uint32_t begin_right = length * irow_right;
+    const __m256i* key_left_ptr =
+        reinterpret_cast<const __m256i*>(rows_left + begin_left);
+    const __m256i* key_right_ptr =
+        reinterpret_cast<const __m256i*>(rows_right + begin_right);
+    __m256i result_or = _mm256_setzero_si256();
+    int32_t i;
+    // length cannot be zero
+    for (i = 0; i < num_loops_less_one; ++i) {
+      __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+      __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+      result_or = _mm256_or_si256(result_or, _mm256_xor_si256(key_left, key_right));
+    }
+
+    __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+    __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+    result_or = _mm256_or_si256(
+        result_or, _mm256_and_si256(tail_mask, _mm256_xor_si256(key_left, key_right)));
+    int result = _mm256_testz_si256(result_or, result_or) * 0xff;
+    match_bytevector[irow_left] &= result;
+  }
+
+  uint32_t num_rows_processed = num_rows;
+  return num_rows_processed;
+}
+
+void KeyCompare::CompareVaryingLength_avx2(
+    uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+    const uint32_t* offsets_right) {
+  for (uint32_t irow_left = 0; irow_left < num_rows; ++irow_left) {
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = offsets_left[irow_left];
+    uint32_t begin_right = offsets_right[irow_right];
+    uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
+    uint32_t length_right = offsets_right[irow_right + 1] - begin_right;
+    uint32_t length = std::min(length_left, length_right);
+    auto key_left_ptr = reinterpret_cast<const __m256i*>(rows_left + begin_left);
+    auto key_right_ptr = reinterpret_cast<const __m256i*>(rows_right + begin_right);
+    __m256i result_or = _mm256_setzero_si256();
+    int32_t i;
+    // length can be zero
+    for (i = 0; i < (static_cast<int32_t>(length) + 31) / 32 - 1; ++i) {
+      __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+      __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+      result_or = _mm256_or_si256(result_or, _mm256_xor_si256(key_left, key_right));
+    }
+
+    constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+    constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+    constexpr uint64_t kByteSequence16To23 = 0x1716151413121110ULL;
+    constexpr uint64_t kByteSequence24To31 = 0x1f1e1d1c1b1a1918ULL;
+
+    __m256i tail_mask =
+        _mm256_cmpgt_epi8(_mm256_set1_epi8(length - i * 32),
+                          _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                             kByteSequence16To23, kByteSequence24To31));
+
+    __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+    __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+    result_or = _mm256_or_si256(
+        result_or, _mm256_and_si256(tail_mask, _mm256_xor_si256(key_left, key_right)));
+    int result = _mm256_testz_si256(result_or, result_or) * 0xff;
+    match_bytevector[irow_left] &= result;
+  }
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc
new file mode 100644
index 00000000000..de79558f2c2
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_encode.cc
@@ -0,0 +1,1649 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_encode.h"
+
+#include <memory.h>
+
+#include <algorithm>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace compute {
+
+KeyEncoder::KeyRowArray::KeyRowArray()
+    : pool_(nullptr), rows_capacity_(0), bytes_capacity_(0) {}
+
+Status KeyEncoder::KeyRowArray::Init(MemoryPool* pool, const KeyRowMetadata& metadata) {
+  pool_ = pool;
+  metadata_ = metadata;
+
+  DCHECK(!null_masks_ && !offsets_ && !rows_);
+
+  constexpr int64_t rows_capacity = 8;
+  constexpr int64_t bytes_capacity = 1024;
+
+  // Null masks
+  ARROW_ASSIGN_OR_RAISE(auto null_masks,
+                        AllocateResizableBuffer(size_null_masks(rows_capacity), pool_));
+  null_masks_ = std::move(null_masks);
+  memset(null_masks_->mutable_data(), 0, size_null_masks(rows_capacity));
+
+  // Offsets and rows
+  if (!metadata.is_fixed_length) {
+    ARROW_ASSIGN_OR_RAISE(auto offsets,
+                          AllocateResizableBuffer(size_offsets(rows_capacity), pool_));
+    offsets_ = std::move(offsets);
+    memset(offsets_->mutable_data(), 0, size_offsets(rows_capacity));
+    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+
+    ARROW_ASSIGN_OR_RAISE(
+        auto rows,
+        AllocateResizableBuffer(size_rows_varying_length(bytes_capacity), pool_));
+    rows_ = std::move(rows);
+    memset(rows_->mutable_data(), 0, size_rows_varying_length(bytes_capacity));
+    bytes_capacity_ = size_rows_varying_length(bytes_capacity) - padding_for_vectors;
+  } else {
+    ARROW_ASSIGN_OR_RAISE(
+        auto rows, AllocateResizableBuffer(size_rows_fixed_length(rows_capacity), pool_));
+    rows_ = std::move(rows);
+    memset(rows_->mutable_data(), 0, size_rows_fixed_length(rows_capacity));
+    bytes_capacity_ = size_rows_fixed_length(rows_capacity) - padding_for_vectors;
+  }
+
+  update_buffer_pointers();
+
+  rows_capacity_ = rows_capacity;
+
+  num_rows_ = 0;
+  num_rows_for_has_any_nulls_ = 0;
+  has_any_nulls_ = false;
+
+  return Status::OK();
+}
+
+void KeyEncoder::KeyRowArray::Clean() {
+  num_rows_ = 0;
+  num_rows_for_has_any_nulls_ = 0;
+  has_any_nulls_ = false;
+
+  if (!metadata_.is_fixed_length) {
+    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+  }
+}
+
+int64_t KeyEncoder::KeyRowArray::size_null_masks(int64_t num_rows) {
+  return num_rows * metadata_.null_masks_bytes_per_row + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_offsets(int64_t num_rows) {
+  return (num_rows + 1) * sizeof(uint32_t) + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_rows_fixed_length(int64_t num_rows) {
+  return num_rows * metadata_.fixed_length + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_rows_varying_length(int64_t num_bytes) {
+  return num_bytes + padding_for_vectors;
+}
+
+void KeyEncoder::KeyRowArray::update_buffer_pointers() {
+  buffers_[0] = mutable_buffers_[0] = null_masks_->mutable_data();
+  if (metadata_.is_fixed_length) {
+    buffers_[1] = mutable_buffers_[1] = rows_->mutable_data();
+    buffers_[2] = mutable_buffers_[2] = nullptr;
+  } else {
+    buffers_[1] = mutable_buffers_[1] = offsets_->mutable_data();
+    buffers_[2] = mutable_buffers_[2] = rows_->mutable_data();
+  }
+}
+
+Status KeyEncoder::KeyRowArray::ResizeFixedLengthBuffers(int64_t num_extra_rows) {
+  if (rows_capacity_ >= num_rows_ + num_extra_rows) {
+    return Status::OK();
+  }
+
+  int64_t rows_capacity_new = std::max(static_cast<int64_t>(1), 2 * rows_capacity_);
+  while (rows_capacity_new < num_rows_ + num_extra_rows) {
+    rows_capacity_new *= 2;
+  }
+
+  // Null masks
+  RETURN_NOT_OK(null_masks_->Resize(size_null_masks(rows_capacity_new), false));
+  memset(null_masks_->mutable_data() + size_null_masks(rows_capacity_), 0,
+         size_null_masks(rows_capacity_new) - size_null_masks(rows_capacity_));
+
+  // Either offsets or rows
+  if (!metadata_.is_fixed_length) {
+    RETURN_NOT_OK(offsets_->Resize(size_offsets(rows_capacity_new), false));
+    memset(offsets_->mutable_data() + size_offsets(rows_capacity_), 0,
+           size_offsets(rows_capacity_new) - size_offsets(rows_capacity_));
+  } else {
+    RETURN_NOT_OK(rows_->Resize(size_rows_fixed_length(rows_capacity_new), false));
+    memset(rows_->mutable_data() + size_rows_fixed_length(rows_capacity_), 0,
+           size_rows_fixed_length(rows_capacity_new) -
+               size_rows_fixed_length(rows_capacity_));
+    bytes_capacity_ = size_rows_fixed_length(rows_capacity_new) - padding_for_vectors;
+  }
+
+  update_buffer_pointers();
+
+  rows_capacity_ = rows_capacity_new;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::ResizeOptionalVaryingLengthBuffer(
+    int64_t num_extra_bytes) {
+  int64_t num_bytes = offsets()[num_rows_];
+  if (bytes_capacity_ >= num_bytes + num_extra_bytes || metadata_.is_fixed_length) {
+    return Status::OK();
+  }
+
+  int64_t bytes_capacity_new = std::max(static_cast<int64_t>(1), 2 * bytes_capacity_);
+  while (bytes_capacity_new < num_bytes + num_extra_bytes) {
+    bytes_capacity_new *= 2;
+  }
+
+  RETURN_NOT_OK(rows_->Resize(size_rows_varying_length(bytes_capacity_new), false));
+  memset(rows_->mutable_data() + size_rows_varying_length(bytes_capacity_), 0,
+         size_rows_varying_length(bytes_capacity_new) -
+             size_rows_varying_length(bytes_capacity_));
+
+  update_buffer_pointers();
+
+  bytes_capacity_ = bytes_capacity_new;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::AppendSelectionFrom(const KeyRowArray& from,
+                                                    uint32_t num_rows_to_append,
+                                                    const uint16_t* source_row_ids) {
+  DCHECK(metadata_.is_compatible(from.metadata()));
+
+  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
+
+  if (!metadata_.is_fixed_length) {
+    // Varying-length rows
+    auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data());
+    auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
+    uint32_t total_length = to_offsets[num_rows_];
+    uint32_t total_length_to_append = 0;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      total_length_to_append += length;
+      to_offsets[num_rows_ + i + 1] = total_length + total_length_to_append;
+    }
+
+    RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(total_length_to_append));
+
+    const uint8_t* src = from.rows_->data();
+    uint8_t* dst = rows_->mutable_data() + total_length;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      auto src64 = reinterpret_cast<const uint64_t*>(src + from_offsets[row_id]);
+      auto dst64 = reinterpret_cast<uint64_t*>(dst);
+      for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
+        dst64[j] = src64[j];
+      }
+      dst += length;
+    }
+  } else {
+    // Fixed-length rows
+    const uint8_t* src = from.rows_->data();
+    uint8_t* dst = rows_->mutable_data() + num_rows_ * metadata_.fixed_length;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = metadata_.fixed_length;
+      auto src64 = reinterpret_cast<const uint64_t*>(src + length * row_id);
+      auto dst64 = reinterpret_cast<uint64_t*>(dst);
+      for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
+        dst64[j] = src64[j];
+      }
+      dst += length;
+    }
+  }
+
+  // Null masks
+  uint32_t byte_length = metadata_.null_masks_bytes_per_row;
+  uint64_t dst_byte_offset = num_rows_ * byte_length;
+  const uint8_t* src_base = from.null_masks_->data();
+  uint8_t* dst_base = null_masks_->mutable_data();
+  for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+    uint32_t row_id = source_row_ids[i];
+    int64_t src_byte_offset = row_id * byte_length;
+    const uint8_t* src = src_base + src_byte_offset;
+    uint8_t* dst = dst_base + dst_byte_offset;
+    for (uint32_t ibyte = 0; ibyte < byte_length; ++ibyte) {
+      dst[ibyte] = src[ibyte];
+    }
+    dst_byte_offset += byte_length;
+  }
+
+  num_rows_ += num_rows_to_append;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::AppendEmpty(uint32_t num_rows_to_append,
+                                            uint32_t num_extra_bytes_to_append) {
+  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
+  RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+  num_rows_ += num_rows_to_append;
+  if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) {
+    memset(rows_->mutable_data(), 0, bytes_capacity_);
+  }
+  return Status::OK();
+}
+
+bool KeyEncoder::KeyRowArray::has_any_nulls(const KeyEncoderContext* ctx) const {
+  if (has_any_nulls_) {
+    return true;
+  }
+  if (num_rows_for_has_any_nulls_ < num_rows_) {
+    auto size_per_row = metadata().null_masks_bytes_per_row;
+    has_any_nulls_ = !util::BitUtil::are_all_bytes_zero(
+        ctx->hardware_flags, null_masks() + size_per_row * num_rows_for_has_any_nulls_,
+        static_cast<uint32_t>(size_per_row * (num_rows_ - num_rows_for_has_any_nulls_)));
+    num_rows_for_has_any_nulls_ = num_rows_;
+  }
+  return has_any_nulls_;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           const KeyColumnArray& left,
+                                           const KeyColumnArray& right,
+                                           int buffer_id_to_replace) {
+  metadata_ = metadata;
+  length_ = left.length();
+  for (int i = 0; i < max_buffers_; ++i) {
+    buffers_[i] = left.buffers_[i];
+    mutable_buffers_[i] = left.mutable_buffers_[i];
+  }
+  buffers_[buffer_id_to_replace] = right.buffers_[buffer_id_to_replace];
+  mutable_buffers_[buffer_id_to_replace] = right.mutable_buffers_[buffer_id_to_replace];
+  bit_offset_[0] = left.bit_offset_[0];
+  bit_offset_[1] = left.bit_offset_[1];
+  if (buffer_id_to_replace < max_buffers_ - 1) {
+    bit_offset_[buffer_id_to_replace] = right.bit_offset_[buffer_id_to_replace];
+  }
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           int64_t length, const uint8_t* buffer0,
+                                           const uint8_t* buffer1, const uint8_t* buffer2,
+                                           int bit_offset0, int bit_offset1) {
+  metadata_ = metadata;
+  length_ = length;
+  buffers_[0] = buffer0;
+  buffers_[1] = buffer1;
+  buffers_[2] = buffer2;
+  mutable_buffers_[0] = mutable_buffers_[1] = mutable_buffers_[2] = nullptr;
+  bit_offset_[0] = bit_offset0;
+  bit_offset_[1] = bit_offset1;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           int64_t length, uint8_t* buffer0,
+                                           uint8_t* buffer1, uint8_t* buffer2,
+                                           int bit_offset0, int bit_offset1) {
+  metadata_ = metadata;
+  length_ = length;
+  buffers_[0] = mutable_buffers_[0] = buffer0;
+  buffers_[1] = mutable_buffers_[1] = buffer1;
+  buffers_[2] = mutable_buffers_[2] = buffer2;
+  bit_offset_[0] = bit_offset0;
+  bit_offset_[1] = bit_offset1;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnArray& from, int64_t start,
+                                           int64_t length) {
+  metadata_ = from.metadata_;
+  length_ = length;
+  uint32_t fixed_size =
+      !metadata_.is_fixed_length ? sizeof(uint32_t) : metadata_.fixed_length;
+
+  buffers_[0] =
+      from.buffers_[0] ? from.buffers_[0] + (from.bit_offset_[0] + start) / 8 : nullptr;
+  mutable_buffers_[0] = from.mutable_buffers_[0]
+                            ? from.mutable_buffers_[0] + (from.bit_offset_[0] + start) / 8
+                            : nullptr;
+  bit_offset_[0] = (from.bit_offset_[0] + start) % 8;
+
+  if (fixed_size == 0) {
+    buffers_[1] =
+        from.buffers_[1] ? from.buffers_[1] + (from.bit_offset_[1] + start) / 8 : nullptr;
+    mutable_buffers_[1] = from.mutable_buffers_[1] ? from.mutable_buffers_[1] +
+                                                         (from.bit_offset_[1] + start) / 8
+                                                   : nullptr;
+    bit_offset_[1] = (from.bit_offset_[1] + start) % 8;
+  } else {
+    buffers_[1] = from.buffers_[1] ? from.buffers_[1] + start * fixed_size : nullptr;
+    mutable_buffers_[1] = from.mutable_buffers_[1]
+                              ? from.mutable_buffers_[1] + start * fixed_size
+                              : nullptr;
+    bit_offset_[1] = 0;
+  }
+
+  buffers_[2] = from.buffers_[2];
+  mutable_buffers_[2] = from.mutable_buffers_[2];
+}
+
+KeyEncoder::KeyColumnArray KeyEncoder::TransformBoolean::ArrayReplace(
+    const KeyColumnArray& column, const KeyColumnArray& temp) {
+  // Make sure that the temp buffer is large enough
+  DCHECK(temp.length() >= column.length() && temp.metadata().is_fixed_length &&
+         temp.metadata().fixed_length >= sizeof(uint8_t));
+  KeyColumnMetadata metadata;
+  metadata.is_fixed_length = true;
+  metadata.fixed_length = sizeof(uint8_t);
+  constexpr int buffer_index = 1;
+  KeyColumnArray result = KeyColumnArray(metadata, column, temp, buffer_index);
+  return result;
+}
+
+void KeyEncoder::TransformBoolean::PreEncode(const KeyColumnArray& input,
+                                             KeyColumnArray* output,
+                                             KeyEncoderContext* ctx) {
+  // Make sure that metadata and lengths are compatible.
+  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  DCHECK(output->metadata().fixed_length == 1 && input.metadata().fixed_length == 0);
+  DCHECK(output->length() == input.length());
+  constexpr int buffer_index = 1;
+  DCHECK(input.data(buffer_index) != nullptr);
+  DCHECK(output->mutable_data(buffer_index) != nullptr);
+  util::BitUtil::bits_to_bytes(
+      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index),
+      output->mutable_data(buffer_index), input.bit_offset(buffer_index));
+}
+
+void KeyEncoder::TransformBoolean::PostDecode(const KeyColumnArray& input,
+                                              KeyColumnArray* output,
+                                              KeyEncoderContext* ctx) {
+  // Make sure that metadata and lengths are compatible.
+  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  DCHECK(output->metadata().fixed_length == 0 && input.metadata().fixed_length == 1);
+  DCHECK(output->length() == input.length());
+  constexpr int buffer_index = 1;
+  DCHECK(input.data(buffer_index) != nullptr);
+  DCHECK(output->mutable_data(buffer_index) != nullptr);
+
+  util::BitUtil::bytes_to_bits(
+      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index),
+      output->mutable_data(buffer_index), output->bit_offset(buffer_index));
+}
+
+bool KeyEncoder::EncoderInteger::IsBoolean(const KeyColumnMetadata& metadata) {
+  return metadata.is_fixed_length && metadata.fixed_length == 0;
+}
+
+bool KeyEncoder::EncoderInteger::UsesTransform(const KeyColumnArray& column) {
+  return IsBoolean(column.metadata());
+}
+
+KeyEncoder::KeyColumnArray KeyEncoder::EncoderInteger::ArrayReplace(
+    const KeyColumnArray& column, const KeyColumnArray& temp) {
+  if (IsBoolean(column.metadata())) {
+    return TransformBoolean::ArrayReplace(column, temp);
+  }
+  return column;
+}
+
+void KeyEncoder::EncoderInteger::PreEncode(const KeyColumnArray& input,
+                                           KeyColumnArray* output,
+                                           KeyEncoderContext* ctx) {
+  if (IsBoolean(input.metadata())) {
+    TransformBoolean::PreEncode(input, output, ctx);
+  }
+}
+
+void KeyEncoder::EncoderInteger::PostDecode(const KeyColumnArray& input,
+                                            KeyColumnArray* output,
+                                            KeyEncoderContext* ctx) {
+  if (IsBoolean(output->metadata())) {
+    TransformBoolean::PostDecode(input, output, ctx);
+  }
+}
+
+void KeyEncoder::EncoderInteger::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                        const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                        KeyColumnArray* temp) {
+  KeyColumnArray col_prep;
+  if (UsesTransform(col)) {
+    col_prep = ArrayReplace(col, *temp);
+    PreEncode(col, &col_prep, ctx);
+  } else {
+    col_prep = col;
+  }
+
+  const auto num_rows = static_cast<uint32_t>(col.length());
+
+  // When we have a single fixed length column we can just do memcpy
+  if (rows->metadata().is_fixed_length &&
+      rows->metadata().fixed_length == col.metadata().fixed_length) {
+    DCHECK_EQ(offset_within_row, 0);
+    uint32_t row_size = col.metadata().fixed_length;
+    memcpy(rows->mutable_data(1), col.data(1), num_rows * row_size);
+  } else if (rows->metadata().is_fixed_length) {
+    uint32_t row_size = rows->metadata().fixed_length;
+    uint8_t* row_base = rows->mutable_data(1) + offset_within_row;
+    const uint8_t* col_base = col_prep.data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          row_base[i * row_size] = col_base[i];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint16_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint16_t*>(col_base)[i];
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint32_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint32_t*>(col_base)[i];
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint64_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint64_t*>(col_base)[i];
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  } else {
+    const uint32_t* row_offsets = rows->offsets();
+    uint8_t* row_base = rows->mutable_data(2) + offset_within_row;
+    const uint8_t* col_base = col_prep.data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          row_base[row_offsets[i]] = col_base[i];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint16_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint16_t*>(col_base)[i];
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint32_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint32_t*>(col_base)[i];
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint64_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint64_t*>(col_base)[i];
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  }
+}
+
+void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
+                                        uint32_t offset_within_row,
+                                        const KeyRowArray& rows, KeyColumnArray* col,
+                                        KeyEncoderContext* ctx, KeyColumnArray* temp) {
+  KeyColumnArray col_prep;
+  if (UsesTransform(*col)) {
+    col_prep = ArrayReplace(*col, *temp);
+  } else {
+    col_prep = *col;
+  }
+
+  // When we have a single fixed length column we can just do memcpy
+  if (rows.metadata().is_fixed_length &&
+      col_prep.metadata().fixed_length == rows.metadata().fixed_length) {
+    DCHECK_EQ(offset_within_row, 0);
+    uint32_t row_size = rows.metadata().fixed_length;
+    memcpy(col_prep.mutable_data(1), rows.data(1) + start_row * row_size,
+           num_rows * row_size);
+  } else if (rows.metadata().is_fixed_length) {
+    uint32_t row_size = rows.metadata().fixed_length;
+    const uint8_t* row_base = rows.data(1) + start_row * row_size;
+    row_base += offset_within_row;
+    uint8_t* col_base = col_prep.mutable_data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          col_base[i] = row_base[i * row_size];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint16_t*>(col_base)[i] =
+              *reinterpret_cast<const uint16_t*>(row_base + i * row_size);
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint32_t*>(col_base)[i] =
+              *reinterpret_cast<const uint32_t*>(row_base + i * row_size);
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint64_t*>(col_base)[i] =
+              *reinterpret_cast<const uint64_t*>(row_base + i * row_size);
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  } else {
+    const uint32_t* row_offsets = rows.offsets() + start_row;
+    const uint8_t* row_base = rows.data(2);
+    row_base += offset_within_row;
+    uint8_t* col_base = col_prep.mutable_data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          col_base[i] = row_base[row_offsets[i]];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint16_t*>(col_base)[i] =
+              *reinterpret_cast<const uint16_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint32_t*>(col_base)[i] =
+              *reinterpret_cast<const uint32_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint64_t*>(col_base)[i] =
+              *reinterpret_cast<const uint64_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  }
+
+  if (UsesTransform(*col)) {
+    PostDecode(col_prep, col, ctx);
+  }
+}
+
+bool KeyEncoder::EncoderBinary::IsInteger(const KeyColumnMetadata& metadata) {
+  bool is_fixed_length = metadata.is_fixed_length;
+  auto size = metadata.fixed_length;
+  return is_fixed_length &&
+         (size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
+}
+
+void KeyEncoder::EncoderBinary::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                       KeyColumnArray* temp) {
+  if (IsInteger(col.metadata())) {
+    EncoderInteger::Encode(offset_within_row, rows, col, ctx, temp);
+  } else {
+    KeyColumnArray col_prep;
+    if (EncoderInteger::UsesTransform(col)) {
+      col_prep = EncoderInteger::ArrayReplace(col, *temp);
+      EncoderInteger::PreEncode(col, &col_prep, ctx);
+    } else {
+      col_prep = col;
+    }
+
+    bool is_row_fixed_length = rows->metadata().is_fixed_length;
+
+#if defined(ARROW_HAVE_AVX2)
+    if (ctx->has_avx2()) {
+      EncodeHelper_avx2(is_row_fixed_length, offset_within_row, rows, col);
+    } else {
+#endif
+      if (is_row_fixed_length) {
+        EncodeImp<true>(offset_within_row, rows, col);
+      } else {
+        EncodeImp<false>(offset_within_row, rows, col);
+      }
+#if defined(ARROW_HAVE_AVX2)
+    }
+#endif
+  }
+
+  DCHECK(temp->metadata().is_fixed_length);
+  DCHECK(temp->length() * temp->metadata().fixed_length >=
+         col.length() * static_cast<int64_t>(sizeof(uint16_t)));
+
+  KeyColumnArray temp16bit(KeyColumnMetadata(true, sizeof(uint16_t)), col.length(),
+                           nullptr, temp->mutable_data(1), nullptr);
+  ColumnMemsetNulls(offset_within_row, rows, col, ctx, &temp16bit, 0xae);
+}
+
+void KeyEncoder::EncoderBinary::Decode(uint32_t start_row, uint32_t num_rows,
+                                       uint32_t offset_within_row,
+                                       const KeyRowArray& rows, KeyColumnArray* col,
+                                       KeyEncoderContext* ctx, KeyColumnArray* temp) {
+  if (IsInteger(col->metadata())) {
+    EncoderInteger::Decode(start_row, num_rows, offset_within_row, rows, col, ctx, temp);
+  } else {
+    KeyColumnArray col_prep;
+    if (EncoderInteger::UsesTransform(*col)) {
+      col_prep = EncoderInteger::ArrayReplace(*col, *temp);
+    } else {
+      col_prep = *col;
+    }
+
+    bool is_row_fixed_length = rows.metadata().is_fixed_length;
+
+#if defined(ARROW_HAVE_AVX2)
+    if (ctx->has_avx2()) {
+      DecodeHelper_avx2(is_row_fixed_length, start_row, num_rows, offset_within_row, rows,
+                        col);
+    } else {
+#endif
+      if (is_row_fixed_length) {
+        DecodeImp<true>(start_row, num_rows, offset_within_row, rows, col);
+      } else {
+        DecodeImp<false>(start_row, num_rows, offset_within_row, rows, col);
+      }
+#if defined(ARROW_HAVE_AVX2)
+    }
+#endif
+
+    if (EncoderInteger::UsesTransform(*col)) {
+      EncoderInteger::PostDecode(col_prep, col, ctx);
+    }
+  }
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::EncodeImp(uint32_t offset_within_row, KeyRowArray* rows,
+                                          const KeyColumnArray& col) {
+  EncodeDecodeHelper<is_row_fixed_length, true>(
+      0, static_cast<uint32_t>(col.length()), offset_within_row, rows, rows, &col,
+      nullptr, [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        auto dst64 = reinterpret_cast<uint64_t*>(dst);
+        auto src64 = reinterpret_cast<const uint64_t*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 8; ++istripe) {
+          dst64[istripe] = util::SafeLoad(src64 + istripe);
+        }
+        if ((length % 8) > 0) {
+          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length));
+          dst64[istripe] = (dst64[istripe] & ~mask_last) |
+                           (util::SafeLoad(src64 + istripe) & mask_last);
+        }
+      });
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t offset_within_row,
+                                          const KeyRowArray& rows, KeyColumnArray* col) {
+  EncodeDecodeHelper<is_row_fixed_length, false>(
+      start_row, num_rows, offset_within_row, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
+          auto dst64 = reinterpret_cast<uint64_t*>(dst);
+          auto src64 = reinterpret_cast<const uint64_t*>(src);
+          util::SafeStore(dst64 + istripe, src64[istripe]);
+        }
+      });
+}
+
+void KeyEncoder::EncoderBinary::ColumnMemsetNulls(
+    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col,
+    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) {
+  using ColumnMemsetNullsImp_t = void (*)(uint32_t, KeyRowArray*, const KeyColumnArray&,
+                                          KeyEncoderContext*, KeyColumnArray*, uint8_t);
+  static const ColumnMemsetNullsImp_t ColumnMemsetNullsImp_fn[] = {
+      ColumnMemsetNullsImp<false, 1>,  ColumnMemsetNullsImp<false, 2>,
+      ColumnMemsetNullsImp<false, 4>,  ColumnMemsetNullsImp<false, 8>,
+      ColumnMemsetNullsImp<false, 16>, ColumnMemsetNullsImp<true, 1>,
+      ColumnMemsetNullsImp<true, 2>,   ColumnMemsetNullsImp<true, 4>,
+      ColumnMemsetNullsImp<true, 8>,   ColumnMemsetNullsImp<true, 16>};
+  uint32_t col_width = col.metadata().fixed_length;
+  int dispatch_const =
+      (rows->metadata().is_fixed_length ? 5 : 0) +
+      (col_width == 1 ? 0
+                      : col_width == 2 ? 1 : col_width == 4 ? 2 : col_width == 8 ? 3 : 4);
+  ColumnMemsetNullsImp_fn[dispatch_const](offset_within_row, rows, col, ctx,
+                                          temp_vector_16bit, byte_value);
+}
+
+template <bool is_row_fixed_length, uint32_t col_width>
+void KeyEncoder::EncoderBinary::ColumnMemsetNullsImp(
+    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col,
+    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) {
+  // Nothing to do when there are no nulls
+  if (!col.data(0)) {
+    return;
+  }
+
+  const auto num_rows = static_cast<uint32_t>(col.length());
+
+  // Temp vector needs space for the required number of rows
+  DCHECK(temp_vector_16bit->length() >= num_rows);
+  DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+  auto temp_vector = reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1));
+
+  // Bit vector to index vector of null positions
+  int num_selected;
+  util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, static_cast<int>(col.length()),
+                                 col.data(0), &num_selected, temp_vector,
+                                 col.bit_offset(0));
+
+  for (int i = 0; i < num_selected; ++i) {
+    uint32_t row_id = temp_vector[i];
+
+    // Target binary field pointer
+    uint8_t* dst;
+    if (is_row_fixed_length) {
+      dst = rows->mutable_data(1) + rows->metadata().fixed_length * row_id;
+    } else {
+      dst = rows->mutable_data(2) + rows->offsets()[row_id];
+    }
+    dst += offset_within_row;
+
+    if (col_width == 1) {
+      *dst = byte_value;
+    } else if (col_width == 2) {
+      *reinterpret_cast<uint16_t*>(dst) =
+          (static_cast<uint16_t>(byte_value) * static_cast<uint16_t>(0x0101));
+    } else if (col_width == 4) {
+      *reinterpret_cast<uint32_t*>(dst) =
+          (static_cast<uint32_t>(byte_value) * static_cast<uint32_t>(0x01010101));
+    } else if (col_width == 8) {
+      *reinterpret_cast<uint64_t*>(dst) =
+          (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL);
+    } else {
+      uint64_t value = (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL);
+      uint32_t col_width_actual = col.metadata().fixed_length;
+      uint32_t j;
+      for (j = 0; j < col_width_actual / 8; ++j) {
+        reinterpret_cast<uint64_t*>(dst)[j] = value;
+      }
+      int tail = col_width_actual % 8;
+      if (tail) {
+        uint64_t mask = ~0ULL >> (8 * (8 - tail));
+        reinterpret_cast<uint64_t*>(dst)[j] =
+            (reinterpret_cast<const uint64_t*>(dst)[j] & ~mask) | (value & mask);
+      }
+    }
+  }
+}
+
+void KeyEncoder::EncoderBinaryPair::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                           const KeyColumnArray& col1,
+                                           const KeyColumnArray& col2,
+                                           KeyEncoderContext* ctx, KeyColumnArray* temp1,
+                                           KeyColumnArray* temp2) {
+  DCHECK(CanProcessPair(col1.metadata(), col2.metadata()));
+
+  KeyColumnArray col_prep[2];
+  if (EncoderInteger::UsesTransform(col1)) {
+    col_prep[0] = EncoderInteger::ArrayReplace(col1, *temp1);
+    EncoderInteger::PreEncode(col1, &(col_prep[0]), ctx);
+  } else {
+    col_prep[0] = col1;
+  }
+  if (EncoderInteger::UsesTransform(col2)) {
+    col_prep[1] = EncoderInteger::ArrayReplace(col2, *temp2);
+    EncoderInteger::PreEncode(col2, &(col_prep[1]), ctx);
+  } else {
+    col_prep[1] = col2;
+  }
+
+  uint32_t col_width1 = col_prep[0].metadata().fixed_length;
+  uint32_t col_width2 = col_prep[1].metadata().fixed_length;
+  int log_col_width1 =
+      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0;
+  int log_col_width2 =
+      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0;
+
+  bool is_row_fixed_length = rows->metadata().is_fixed_length;
+
+  const auto num_rows = static_cast<uint32_t>(col1.length());
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && col_width1 == col_width2) {
+    num_processed = EncodeHelper_avx2(is_row_fixed_length, col_width1, offset_within_row,
+                                      rows, col_prep[0], col_prep[1]);
+  }
+#endif
+  if (num_processed < num_rows) {
+    using EncodeImp_t = void (*)(uint32_t, uint32_t, KeyRowArray*, const KeyColumnArray&,
+                                 const KeyColumnArray&);
+    static const EncodeImp_t EncodeImp_fn[] = {
+        EncodeImp<false, uint8_t, uint8_t>,   EncodeImp<false, uint16_t, uint8_t>,
+        EncodeImp<false, uint32_t, uint8_t>,  EncodeImp<false, uint64_t, uint8_t>,
+        EncodeImp<false, uint8_t, uint16_t>,  EncodeImp<false, uint16_t, uint16_t>,
+        EncodeImp<false, uint32_t, uint16_t>, EncodeImp<false, uint64_t, uint16_t>,
+        EncodeImp<false, uint8_t, uint32_t>,  EncodeImp<false, uint16_t, uint32_t>,
+        EncodeImp<false, uint32_t, uint32_t>, EncodeImp<false, uint64_t, uint32_t>,
+        EncodeImp<false, uint8_t, uint64_t>,  EncodeImp<false, uint16_t, uint64_t>,
+        EncodeImp<false, uint32_t, uint64_t>, EncodeImp<false, uint64_t, uint64_t>,
+        EncodeImp<true, uint8_t, uint8_t>,    EncodeImp<true, uint16_t, uint8_t>,
+        EncodeImp<true, uint32_t, uint8_t>,   EncodeImp<true, uint64_t, uint8_t>,
+        EncodeImp<true, uint8_t, uint16_t>,   EncodeImp<true, uint16_t, uint16_t>,
+        EncodeImp<true, uint32_t, uint16_t>,  EncodeImp<true, uint64_t, uint16_t>,
+        EncodeImp<true, uint8_t, uint32_t>,   EncodeImp<true, uint16_t, uint32_t>,
+        EncodeImp<true, uint32_t, uint32_t>,  EncodeImp<true, uint64_t, uint32_t>,
+        EncodeImp<true, uint8_t, uint64_t>,   EncodeImp<true, uint16_t, uint64_t>,
+        EncodeImp<true, uint32_t, uint64_t>,  EncodeImp<true, uint64_t, uint64_t>};
+    int dispatch_const = (log_col_width2 << 2) | log_col_width1;
+    dispatch_const += (is_row_fixed_length ? 16 : 0);
+    EncodeImp_fn[dispatch_const](num_processed, offset_within_row, rows, col_prep[0],
+                                 col_prep[1]);
+  }
+}
+
+template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+void KeyEncoder::EncoderBinaryPair::EncodeImp(uint32_t num_rows_to_skip,
+                                              uint32_t offset_within_row,
+                                              KeyRowArray* rows,
+                                              const KeyColumnArray& col1,
+                                              const KeyColumnArray& col2) {
+  const uint8_t* src_A = col1.data(1);
+  const uint8_t* src_B = col2.data(1);
+
+  const auto num_rows = static_cast<uint32_t>(col1.length());
+
+  uint32_t fixed_length = rows->metadata().fixed_length;
+  const uint32_t* offsets;
+  uint8_t* dst_base;
+  if (is_row_fixed_length) {
+    dst_base = rows->mutable_data(1) + offset_within_row;
+    offsets = nullptr;
+  } else {
+    dst_base = rows->mutable_data(2) + offset_within_row;
+    offsets = rows->offsets();
+  }
+
+  using col1_type_const = typename std::add_const<col1_type>::type;
+  using col2_type_const = typename std::add_const<col2_type>::type;
+
+  if (is_row_fixed_length) {
+    uint8_t* dst = dst_base + num_rows_to_skip * fixed_length;
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i];
+      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) =
+          reinterpret_cast<col2_type_const*>(src_B)[i];
+      dst += fixed_length;
+    }
+  } else {
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      uint8_t* dst = dst_base + offsets[i];
+      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i];
+      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) =
+          reinterpret_cast<col2_type_const*>(src_B)[i];
+    }
+  }
+}
+
+void KeyEncoder::EncoderBinaryPair::Decode(uint32_t start_row, uint32_t num_rows,
+                                           uint32_t offset_within_row,
+                                           const KeyRowArray& rows, KeyColumnArray* col1,
+                                           KeyColumnArray* col2, KeyEncoderContext* ctx,
+                                           KeyColumnArray* temp1, KeyColumnArray* temp2) {
+  DCHECK(CanProcessPair(col1->metadata(), col2->metadata()));
+
+  KeyColumnArray col_prep[2];
+  if (EncoderInteger::UsesTransform(*col1)) {
+    col_prep[0] = EncoderInteger::ArrayReplace(*col1, *temp1);
+  } else {
+    col_prep[0] = *col1;
+  }
+  if (EncoderInteger::UsesTransform(*col2)) {
+    col_prep[1] = EncoderInteger::ArrayReplace(*col2, *temp2);
+  } else {
+    col_prep[1] = *col2;
+  }
+
+  uint32_t col_width1 = col_prep[0].metadata().fixed_length;
+  uint32_t col_width2 = col_prep[1].metadata().fixed_length;
+  int log_col_width1 =
+      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0;
+  int log_col_width2 =
+      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0;
+
+  bool is_row_fixed_length = rows.metadata().is_fixed_length;
+
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && col_width1 == col_width2) {
+    num_processed =
+        DecodeHelper_avx2(is_row_fixed_length, col_width1, start_row, num_rows,
+                          offset_within_row, rows, &col_prep[0], &col_prep[1]);
+  }
+#endif
+  if (num_processed < num_rows) {
+    using DecodeImp_t = void (*)(uint32_t, uint32_t, uint32_t, uint32_t,
+                                 const KeyRowArray&, KeyColumnArray*, KeyColumnArray*);
+    static const DecodeImp_t DecodeImp_fn[] = {
+        DecodeImp<false, uint8_t, uint8_t>,   DecodeImp<false, uint16_t, uint8_t>,
+        DecodeImp<false, uint32_t, uint8_t>,  DecodeImp<false, uint64_t, uint8_t>,
+        DecodeImp<false, uint8_t, uint16_t>,  DecodeImp<false, uint16_t, uint16_t>,
+        DecodeImp<false, uint32_t, uint16_t>, DecodeImp<false, uint64_t, uint16_t>,
+        DecodeImp<false, uint8_t, uint32_t>,  DecodeImp<false, uint16_t, uint32_t>,
+        DecodeImp<false, uint32_t, uint32_t>, DecodeImp<false, uint64_t, uint32_t>,
+        DecodeImp<false, uint8_t, uint64_t>,  DecodeImp<false, uint16_t, uint64_t>,
+        DecodeImp<false, uint32_t, uint64_t>, DecodeImp<false, uint64_t, uint64_t>,
+        DecodeImp<true, uint8_t, uint8_t>,    DecodeImp<true, uint16_t, uint8_t>,
+        DecodeImp<true, uint32_t, uint8_t>,   DecodeImp<true, uint64_t, uint8_t>,
+        DecodeImp<true, uint8_t, uint16_t>,   DecodeImp<true, uint16_t, uint16_t>,
+        DecodeImp<true, uint32_t, uint16_t>,  DecodeImp<true, uint64_t, uint16_t>,
+        DecodeImp<true, uint8_t, uint32_t>,   DecodeImp<true, uint16_t, uint32_t>,
+        DecodeImp<true, uint32_t, uint32_t>,  DecodeImp<true, uint64_t, uint32_t>,
+        DecodeImp<true, uint8_t, uint64_t>,   DecodeImp<true, uint16_t, uint64_t>,
+        DecodeImp<true, uint32_t, uint64_t>,  DecodeImp<true, uint64_t, uint64_t>};
+    int dispatch_const =
+        (log_col_width2 << 2) | log_col_width1 | (is_row_fixed_length ? 16 : 0);
+    DecodeImp_fn[dispatch_const](num_processed, start_row, num_rows, offset_within_row,
+                                 rows, &(col_prep[0]), &(col_prep[1]));
+  }
+
+  if (EncoderInteger::UsesTransform(*col1)) {
+    EncoderInteger::PostDecode(col_prep[0], col1, ctx);
+  }
+  if (EncoderInteger::UsesTransform(*col2)) {
+    EncoderInteger::PostDecode(col_prep[1], col2, ctx);
+  }
+}
+
+template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+void KeyEncoder::EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip,
+                                              uint32_t start_row, uint32_t num_rows,
+                                              uint32_t offset_within_row,
+                                              const KeyRowArray& rows,
+                                              KeyColumnArray* col1,
+                                              KeyColumnArray* col2) {
+  DCHECK(rows.length() >= start_row + num_rows);
+  DCHECK(col1->length() == num_rows && col2->length() == num_rows);
+
+  uint8_t* dst_A = col1->mutable_data(1);
+  uint8_t* dst_B = col2->mutable_data(1);
+
+  uint32_t fixed_length = rows.metadata().fixed_length;
+  const uint32_t* offsets;
+  const uint8_t* src_base;
+  if (is_row_fixed_length) {
+    src_base = rows.data(1) + fixed_length * start_row + offset_within_row;
+    offsets = nullptr;
+  } else {
+    src_base = rows.data(2) + offset_within_row;
+    offsets = rows.offsets() + start_row;
+  }
+
+  using col1_type_const = typename std::add_const<col1_type>::type;
+  using col2_type_const = typename std::add_const<col2_type>::type;
+
+  if (is_row_fixed_length) {
+    const uint8_t* src = src_base + num_rows_to_skip * fixed_length;
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src);
+      reinterpret_cast<col2_type*>(dst_B)[i] =
+          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type));
+      src += fixed_length;
+    }
+  } else {
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      const uint8_t* src = src_base + offsets[i];
+      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src);
+      reinterpret_cast<col2_type*>(dst_B)[i] =
+          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type));
+    }
+  }
+}
+
+void KeyEncoder::EncoderOffsets::Encode(KeyRowArray* rows,
+                                        const std::vector<KeyColumnArray>& varbinary_cols,
+                                        KeyEncoderContext* ctx) {
+  DCHECK(!varbinary_cols.empty());
+
+  // Rows and columns must all be varying-length
+  DCHECK(!rows->metadata().is_fixed_length);
+  for (const auto& col : varbinary_cols) {
+    DCHECK(!col.metadata().is_fixed_length);
+  }
+
+  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  // Whether any of the columns has non-zero starting bit offset for non-nulls bit vector
+  bool has_bit_offset = false;
+
+  // The space in columns must be exactly equal to a space for offsets in rows
+  DCHECK(rows->length() == num_rows);
+  for (const auto& col : varbinary_cols) {
+    DCHECK(col.length() == num_rows);
+    if (col.bit_offset(0) != 0) {
+      has_bit_offset = true;
+    }
+  }
+
+  if (ctx->has_avx2() && !has_bit_offset) {
+    // Create a temp vector sized based on the number of columns
+    auto temp_buffer_holder = util::TempVectorHolder<uint32_t>(
+        ctx->stack, static_cast<uint32_t>(varbinary_cols.size()) * 8);
+    auto temp_buffer_32B_per_col = KeyColumnArray(
+        KeyColumnMetadata(true, sizeof(uint32_t)), varbinary_cols.size() * 8, nullptr,
+        reinterpret_cast<uint8_t*>(temp_buffer_holder.mutable_data()), nullptr);
+
+    num_processed = EncodeImp_avx2(rows, varbinary_cols, &temp_buffer_32B_per_col);
+  }
+#endif
+  if (num_processed < num_rows) {
+    EncodeImp(num_processed, rows, varbinary_cols);
+  }
+}
+
+void KeyEncoder::EncoderOffsets::EncodeImp(
+    uint32_t num_rows_already_processed, KeyRowArray* rows,
+    const std::vector<KeyColumnArray>& varbinary_cols) {
+  DCHECK_GT(varbinary_cols.size(), 0);
+
+  int row_alignment = rows->metadata().row_alignment;
+  int string_alignment = rows->metadata().string_alignment;
+
+  uint32_t* row_offsets = rows->mutable_offsets();
+  uint8_t* row_values = rows->mutable_data(2);
+  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  if (num_rows_already_processed == 0) {
+    row_offsets[0] = 0;
+  }
+
+  uint32_t row_offset = row_offsets[num_rows_already_processed];
+  for (uint32_t i = num_rows_already_processed; i < num_rows; ++i) {
+    uint32_t* varbinary_end =
+        rows->metadata().varbinary_end_array(row_values + row_offset);
+
+    // Zero out lengths for nulls.
+    // Add lengths of all columns to get row size.
+    // Store varbinary field ends while summing their lengths.
+
+    uint32_t offset_within_row = rows->metadata().fixed_length;
+
+    for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+      const uint32_t* col_offsets = varbinary_cols[col].offsets();
+      uint32_t col_length = col_offsets[i + 1] - col_offsets[i];
+
+      const int bit_offset = varbinary_cols[col].bit_offset(0);
+
+      const uint8_t* non_nulls = varbinary_cols[col].data(0);
+      if (non_nulls && BitUtil::GetBit(non_nulls, bit_offset + i) == 0) {
+        col_length = 0;
+      }
+
+      offset_within_row +=
+          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment);
+      offset_within_row += col_length;
+
+      varbinary_end[col] = offset_within_row;
+    }
+
+    offset_within_row +=
+        KeyRowMetadata::padding_for_alignment(offset_within_row, row_alignment);
+    row_offset += offset_within_row;
+    row_offsets[i + 1] = row_offset;
+  }
+}
+
+void KeyEncoder::EncoderOffsets::Decode(
+    uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+    std::vector<KeyColumnArray>* varbinary_cols,
+    const std::vector<uint32_t>& varbinary_cols_base_offset, KeyEncoderContext* ctx) {
+  DCHECK(!varbinary_cols->empty());
+  DCHECK(varbinary_cols->size() == varbinary_cols_base_offset.size());
+
+  DCHECK(!rows.metadata().is_fixed_length);
+  DCHECK(rows.length() >= start_row + num_rows);
+  for (const auto& col : *varbinary_cols) {
+    // Rows and columns must all be varying-length
+    DCHECK(!col.metadata().is_fixed_length);
+    // The space in columns must be exactly equal to a subset of rows selected
+    DCHECK(col.length() == num_rows);
+  }
+
+  // Offsets of varbinary columns data within each encoded row are stored
+  // in the same encoded row as an array of 32-bit integers.
+  // This array follows immediately the data of fixed-length columns.
+  // There is one element for each varying-length column.
+  // The Nth element is the sum of all the lengths of varbinary columns data in
+  // that row, up to and including Nth varbinary column.
+
+  const uint32_t* row_offsets = rows.offsets() + start_row;
+
+  // Set the base offset for each column
+  for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+    uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets();
+    col_offsets[0] = varbinary_cols_base_offset[col];
+  }
+
+  int string_alignment = rows.metadata().string_alignment;
+
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    // Find the beginning of cumulative lengths array for next row
+    const uint8_t* row = rows.data(2) + row_offsets[i];
+    const uint32_t* varbinary_ends = rows.metadata().varbinary_end_array(row);
+
+    // Update the offset of each column
+    uint32_t offset_within_row = rows.metadata().fixed_length;
+    for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+      offset_within_row +=
+          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment);
+      uint32_t length = varbinary_ends[col] - offset_within_row;
+      offset_within_row = varbinary_ends[col];
+      uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets();
+      col_offsets[i + 1] = col_offsets[i] + length;
+    }
+  }
+}
+
+void KeyEncoder::EncoderVarBinary::Encode(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                          const KeyColumnArray& col,
+                                          KeyEncoderContext* ctx) {
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2()) {
+    EncodeHelper_avx2(varbinary_col_id, rows, col);
+  } else {
+#endif
+    if (varbinary_col_id == 0) {
+      EncodeImp<true>(varbinary_col_id, rows, col);
+    } else {
+      EncodeImp<false>(varbinary_col_id, rows, col);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+void KeyEncoder::EncoderVarBinary::Decode(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t varbinary_col_id,
+                                          const KeyRowArray& rows, KeyColumnArray* col,
+                                          KeyEncoderContext* ctx) {
+  // Output column varbinary buffer needs an extra 32B
+  // at the end in avx2 version and 8B otherwise.
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2()) {
+    DecodeHelper_avx2(start_row, num_rows, varbinary_col_id, rows, col);
+  } else {
+#endif
+    if (varbinary_col_id == 0) {
+      DecodeImp<true>(start_row, num_rows, varbinary_col_id, rows, col);
+    } else {
+      DecodeImp<false>(start_row, num_rows, varbinary_col_id, rows, col);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                             const KeyColumnArray& col) {
+  EncodeDecodeHelper<first_varbinary_col, true>(
+      0, static_cast<uint32_t>(col.length()), varbinary_col_id, rows, rows, &col, nullptr,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        auto dst64 = reinterpret_cast<uint64_t*>(dst);
+        auto src64 = reinterpret_cast<const uint64_t*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 8; ++istripe) {
+          dst64[istripe] = util::SafeLoad(src64 + istripe);
+        }
+        if ((length % 8) > 0) {
+          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length));
+          dst64[istripe] = (dst64[istripe] & ~mask_last) |
+                           (util::SafeLoad(src64 + istripe) & mask_last);
+        }
+      });
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
+                                             uint32_t varbinary_col_id,
+                                             const KeyRowArray& rows,
+                                             KeyColumnArray* col) {
+  EncodeDecodeHelper<first_varbinary_col, false>(
+      start_row, num_rows, varbinary_col_id, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
+          auto dst64 = reinterpret_cast<uint64_t*>(dst);
+          auto src64 = reinterpret_cast<const uint64_t*>(src);
+          util::SafeStore(dst64 + istripe, src64[istripe]);
+        }
+      });
+}
+
+void KeyEncoder::EncoderNulls::Encode(KeyRowArray* rows,
+                                      const std::vector<KeyColumnArray>& cols,
+                                      KeyEncoderContext* ctx,
+                                      KeyColumnArray* temp_vector_16bit) {
+  DCHECK_GT(cols.size(), 0);
+  const auto num_rows = static_cast<uint32_t>(rows->length());
+
+  // All input columns should have the same number of rows.
+  // They may or may not have non-nulls bit-vectors allocated.
+  for (const auto& col : cols) {
+    DCHECK(col.length() == num_rows);
+  }
+
+  // Temp vector needs space for the required number of rows
+  DCHECK(temp_vector_16bit->length() >= num_rows);
+  DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+
+  uint8_t* null_masks = rows->null_masks();
+  uint32_t null_masks_bytes_per_row = rows->metadata().null_masks_bytes_per_row;
+  memset(null_masks, 0, null_masks_bytes_per_row * num_rows);
+  for (size_t col = 0; col < cols.size(); ++col) {
+    const uint8_t* non_nulls = cols[col].data(0);
+    if (!non_nulls) {
+      continue;
+    }
+    int bit_offset = cols[col].bit_offset(0);
+    DCHECK_LT(bit_offset, 8);
+    int num_selected;
+    util::BitUtil::bits_to_indexes(
+        0, ctx->hardware_flags, num_rows, non_nulls, &num_selected,
+        reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1)), bit_offset);
+    for (int i = 0; i < num_selected; ++i) {
+      uint16_t row_id = reinterpret_cast<const uint16_t*>(temp_vector_16bit->data(1))[i];
+      int64_t null_masks_bit_id = row_id * null_masks_bytes_per_row * 8 + col;
+      BitUtil::SetBit(null_masks, null_masks_bit_id);
+    }
+  }
+}
+
+void KeyEncoder::EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows,
+                                      const KeyRowArray& rows,
+                                      std::vector<KeyColumnArray>* cols) {
+  // Every output column needs to have a space for exactly the required number
+  // of rows. It also needs to have non-nulls bit-vector allocated and mutable.
+  DCHECK_GT(cols->size(), 0);
+  for (auto& col : *cols) {
+    DCHECK(col.length() == num_rows);
+    DCHECK(col.mutable_data(0));
+  }
+
+  const uint8_t* null_masks = rows.null_masks();
+  uint32_t null_masks_bytes_per_row = rows.metadata().null_masks_bytes_per_row;
+  for (size_t col = 0; col < cols->size(); ++col) {
+    uint8_t* non_nulls = (*cols)[col].mutable_data(0);
+    const int bit_offset = (*cols)[col].bit_offset(0);
+    DCHECK_LT(bit_offset, 8);
+    non_nulls[0] |= 0xff << (bit_offset);
+    if (bit_offset + num_rows > 8) {
+      int bits_in_first_byte = 8 - bit_offset;
+      memset(non_nulls + 1, 0xff, BitUtil::BytesForBits(num_rows - bits_in_first_byte));
+    }
+    for (uint32_t row = 0; row < num_rows; ++row) {
+      uint32_t null_masks_bit_id =
+          (start_row + row) * null_masks_bytes_per_row * 8 + static_cast<uint32_t>(col);
+      bool is_set = BitUtil::GetBit(null_masks, null_masks_bit_id);
+      if (is_set) {
+        BitUtil::ClearBit(non_nulls, bit_offset + row);
+      }
+    }
+  }
+}
+
+uint32_t KeyEncoder::KeyRowMetadata::num_varbinary_cols() const {
+  uint32_t result = 0;
+  for (auto column_metadata : column_metadatas) {
+    if (!column_metadata.is_fixed_length) {
+      ++result;
+    }
+  }
+  return result;
+}
+
+bool KeyEncoder::KeyRowMetadata::is_compatible(const KeyRowMetadata& other) const {
+  if (other.num_cols() != num_cols()) {
+    return false;
+  }
+  if (row_alignment != other.row_alignment ||
+      string_alignment != other.string_alignment) {
+    return false;
+  }
+  for (size_t i = 0; i < column_metadatas.size(); ++i) {
+    if (column_metadatas[i].is_fixed_length !=
+        other.column_metadatas[i].is_fixed_length) {
+      return false;
+    }
+    if (column_metadatas[i].fixed_length != other.column_metadatas[i].fixed_length) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void KeyEncoder::KeyRowMetadata::FromColumnMetadataVector(
+    const std::vector<KeyColumnMetadata>& cols, int in_row_alignment,
+    int in_string_alignment) {
+  column_metadatas.resize(cols.size());
+  for (size_t i = 0; i < cols.size(); ++i) {
+    column_metadatas[i] = cols[i];
+  }
+
+  const auto num_cols = static_cast<uint32_t>(cols.size());
+
+  // Sort columns.
+  // Columns are sorted based on the size in bytes of their fixed-length part.
+  // For the varying-length column, the fixed-length part is the 32-bit field storing
+  // cumulative length of varying-length fields.
+  // The rules are:
+  // a) Boolean column, marked with fixed-length 0, is considered to have fixed-length
+  // part of 1 byte. b) Columns with fixed-length part being power of 2 or multiple of row
+  // alignment precede other columns. They are sorted among themselves based on size of
+  // fixed-length part. c) Fixed-length columns precede varying-length columns when both
+  // have the same size fixed-length part.
+  column_order.resize(num_cols);
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    column_order[i] = i;
+  }
+  std::sort(
+      column_order.begin(), column_order.end(), [&cols](uint32_t left, uint32_t right) {
+        bool is_left_pow2 =
+            !cols[left].is_fixed_length || ARROW_POPCOUNT64(cols[left].fixed_length) <= 1;
+        bool is_right_pow2 = !cols[right].is_fixed_length ||
+                             ARROW_POPCOUNT64(cols[right].fixed_length) <= 1;
+        bool is_left_fixedlen = cols[left].is_fixed_length;
+        bool is_right_fixedlen = cols[right].is_fixed_length;
+        uint32_t width_left =
+            cols[left].is_fixed_length ? cols[left].fixed_length : sizeof(uint32_t);
+        uint32_t width_right =
+            cols[right].is_fixed_length ? cols[right].fixed_length : sizeof(uint32_t);
+        if (is_left_pow2 != is_right_pow2) {
+          return is_left_pow2;
+        }
+        if (!is_left_pow2) {
+          return left < right;
+        }
+        if (width_left != width_right) {
+          return width_left > width_right;
+        }
+        if (is_left_fixedlen != is_right_fixedlen) {
+          return is_left_fixedlen;
+        }
+        return left < right;
+      });
+
+  row_alignment = in_row_alignment;
+  string_alignment = in_string_alignment;
+  varbinary_end_array_offset = 0;
+
+  column_offsets.resize(num_cols);
+  uint32_t num_varbinary_cols = 0;
+  uint32_t offset_within_row = 0;
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    const KeyColumnMetadata& col = cols[column_order[i]];
+    offset_within_row +=
+        KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment, col);
+    column_offsets[i] = offset_within_row;
+    if (!col.is_fixed_length) {
+      if (num_varbinary_cols == 0) {
+        varbinary_end_array_offset = offset_within_row;
+      }
+      DCHECK(column_offsets[i] - varbinary_end_array_offset ==
+             num_varbinary_cols * sizeof(uint32_t));
+      ++num_varbinary_cols;
+      offset_within_row += sizeof(uint32_t);
+    } else {
+      // Boolean column is a bit-vector, which is indicated by
+      // setting fixed length in column metadata to zero.
+      // It will be stored as a byte in output row.
+      if (col.fixed_length == 0) {
+        offset_within_row += 1;
+      } else {
+        offset_within_row += col.fixed_length;
+      }
+    }
+  }
+
+  is_fixed_length = (num_varbinary_cols == 0);
+  fixed_length =
+      offset_within_row +
+      KeyRowMetadata::padding_for_alignment(
+          offset_within_row, num_varbinary_cols == 0 ? row_alignment : string_alignment);
+
+  // We set the number of bytes per row storing null masks of individual key columns
+  // to be a power of two. This is not required. It could be also set to the minimal
+  // number of bytes required for a given number of bits (one bit per column).
+  null_masks_bytes_per_row = 1;
+  while (static_cast<uint32_t>(null_masks_bytes_per_row * 8) < num_cols) {
+    null_masks_bytes_per_row *= 2;
+  }
+}
+
+void KeyEncoder::Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
+                      int row_alignment, int string_alignment) {
+  ctx_ = ctx;
+  row_metadata_.FromColumnMetadataVector(cols, row_alignment, string_alignment);
+  uint32_t num_cols = row_metadata_.num_cols();
+  uint32_t num_varbinary_cols = row_metadata_.num_varbinary_cols();
+  batch_all_cols_.resize(num_cols);
+  batch_varbinary_cols_.resize(num_varbinary_cols);
+  batch_varbinary_cols_base_offsets_.resize(num_varbinary_cols);
+}
+
+void KeyEncoder::PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
+                                        const std::vector<KeyColumnArray>& cols_in) {
+  const auto num_cols = static_cast<uint32_t>(cols_in.size());
+  DCHECK(batch_all_cols_.size() == num_cols);
+
+  uint32_t num_varbinary_visited = 0;
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    const KeyColumnArray& col = cols_in[row_metadata_.column_order[i]];
+    KeyColumnArray col_window(col, start_row, num_rows);
+    batch_all_cols_[i] = col_window;
+    if (!col.metadata().is_fixed_length) {
+      DCHECK(num_varbinary_visited < batch_varbinary_cols_.size());
+      // If start row is zero, then base offset of varbinary column is also zero.
+      if (start_row == 0) {
+        batch_varbinary_cols_base_offsets_[num_varbinary_visited] = 0;
+      } else {
+        batch_varbinary_cols_base_offsets_[num_varbinary_visited] =
+            col.offsets()[start_row];
+      }
+      batch_varbinary_cols_[num_varbinary_visited++] = col_window;
+    }
+  }
+}
+
+Status KeyEncoder::PrepareOutputForEncode(int64_t start_row, int64_t num_rows,
+                                          KeyRowArray* rows,
+                                          const std::vector<KeyColumnArray>& all_cols) {
+  int64_t num_bytes_required = 0;
+
+  int64_t fixed_part = row_metadata_.fixed_length * num_rows;
+  int64_t var_part = 0;
+  for (const auto& col : all_cols) {
+    if (!col.metadata().is_fixed_length) {
+      DCHECK(col.length() >= start_row + num_rows);
+      const uint32_t* offsets = col.offsets();
+      var_part += offsets[start_row + num_rows] - offsets[start_row];
+      // Include maximum padding that can be added to align the start of varbinary fields.
+      var_part += num_rows * row_metadata_.string_alignment;
+    }
+  }
+  // Include maximum padding that can be added to align the start of the rows.
+  if (!row_metadata_.is_fixed_length) {
+    fixed_part += row_metadata_.row_alignment * num_rows;
+  }
+  num_bytes_required = fixed_part + var_part;
+
+  rows->Clean();
+  RETURN_NOT_OK(rows->AppendEmpty(static_cast<uint32_t>(num_rows),
+                                  static_cast<uint32_t>(num_bytes_required)));
+
+  return Status::OK();
+}
+
+void KeyEncoder::Encode(int64_t start_row, int64_t num_rows, KeyRowArray* rows,
+                        const std::vector<KeyColumnArray>& cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row, num_rows, cols);
+
+  // Create two temp vectors with 16-bit elements
+  auto temp_buffer_holder_A =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_A = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr);
+  auto temp_buffer_holder_B =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_B = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    // This call will generate and fill in data for both:
+    // - offsets to the entire encoded arrays
+    // - offsets for individual varbinary fields within each row
+    EncoderOffsets::Encode(rows, batch_varbinary_cols_, ctx_);
+
+    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) {
+      // Memcpy varbinary fields into precomputed in the previous step
+      // positions in the output row buffer.
+      EncoderVarBinary::Encode(static_cast<uint32_t>(i), rows, batch_varbinary_cols_[i],
+                               ctx_);
+    }
+  }
+
+  // Process fixed length columns
+  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  for (uint32_t i = 0; i < num_cols;) {
+    if (!batch_all_cols_[i].metadata().is_fixed_length) {
+      i += 1;
+      continue;
+    }
+    bool can_process_pair =
+        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length &&
+        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(),
+                                          batch_all_cols_[i + 1].metadata());
+    if (!can_process_pair) {
+      EncoderBinary::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i],
+                            ctx_, &temp_buffer_A);
+      i += 1;
+    } else {
+      EncoderBinaryPair::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i],
+                                batch_all_cols_[i + 1], ctx_, &temp_buffer_A,
+                                &temp_buffer_B);
+      i += 2;
+    }
+  }
+
+  // Process nulls
+  EncoderNulls::Encode(rows, batch_all_cols_, ctx_, &temp_buffer_A);
+}
+
+void KeyEncoder::DecodeFixedLengthBuffers(int64_t start_row_input,
+                                          int64_t start_row_output, int64_t num_rows,
+                                          const KeyRowArray& rows,
+                                          std::vector<KeyColumnArray>* cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row_output, num_rows, *cols);
+
+  // Create two temp vectors with 16-bit elements
+  auto temp_buffer_holder_A =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_A = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr);
+  auto temp_buffer_holder_B =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_B = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    EncoderOffsets::Decode(static_cast<uint32_t>(start_row_input),
+                           static_cast<uint32_t>(num_rows), rows, &batch_varbinary_cols_,
+                           batch_varbinary_cols_base_offsets_, ctx_);
+  }
+
+  // Process fixed length columns
+  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  for (uint32_t i = 0; i < num_cols;) {
+    if (!batch_all_cols_[i].metadata().is_fixed_length) {
+      i += 1;
+      continue;
+    }
+    bool can_process_pair =
+        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length &&
+        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(),
+                                          batch_all_cols_[i + 1].metadata());
+    if (!can_process_pair) {
+      EncoderBinary::Decode(static_cast<uint32_t>(start_row_input),
+                            static_cast<uint32_t>(num_rows),
+                            row_metadata_.column_offsets[i], rows, &batch_all_cols_[i],
+                            ctx_, &temp_buffer_A);
+      i += 1;
+    } else {
+      EncoderBinaryPair::Decode(
+          static_cast<uint32_t>(start_row_input), static_cast<uint32_t>(num_rows),
+          row_metadata_.column_offsets[i], rows, &batch_all_cols_[i],
+          &batch_all_cols_[i + 1], ctx_, &temp_buffer_A, &temp_buffer_B);
+      i += 2;
+    }
+  }
+
+  // Process nulls
+  EncoderNulls::Decode(static_cast<uint32_t>(start_row_input),
+                       static_cast<uint32_t>(num_rows), rows, &batch_all_cols_);
+}
+
+void KeyEncoder::DecodeVaryingLengthBuffers(int64_t start_row_input,
+                                            int64_t start_row_output, int64_t num_rows,
+                                            const KeyRowArray& rows,
+                                            std::vector<KeyColumnArray>* cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row_output, num_rows, *cols);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) {
+      // Memcpy varbinary fields into precomputed in the previous step
+      // positions in the output row buffer.
+      EncoderVarBinary::Decode(static_cast<uint32_t>(start_row_input),
+                               static_cast<uint32_t>(num_rows), static_cast<uint32_t>(i),
+                               rows, &batch_varbinary_cols_[i], ctx_);
+    }
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_encode.h b/cpp/src/arrow/compute/exec/key_encode.h
new file mode 100644
index 00000000000..e5397b9dfd4
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_encode.h
@@ -0,0 +1,635 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace compute {
+
+class KeyColumnMetadata;
+
+/// Converts between key representation as a collection of arrays for
+/// individual columns and another representation as a single array of rows
+/// combining data from all columns into one value.
+/// This conversion is reversible.
+/// Row-oriented storage is beneficial when there is a need for random access
+/// of individual rows and at the same time all included columns are likely to
+/// be accessed together, as in the case of hash table key.
+class KeyEncoder {
+ public:
+  struct KeyEncoderContext {
+    bool has_avx2() const {
+      return (hardware_flags & arrow::internal::CpuInfo::AVX2) > 0;
+    }
+    int64_t hardware_flags;
+    util::TempVectorStack* stack;
+  };
+
+  /// Description of a storage format of a single key column as needed
+  /// for the purpose of row encoding.
+  struct KeyColumnMetadata {
+    KeyColumnMetadata() = default;
+    KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in)
+        : is_fixed_length(is_fixed_length_in), fixed_length(fixed_length_in) {}
+    /// Is column storing a varying-length binary, using offsets array
+    /// to find a beginning of a value, or is it a fixed-length binary.
+    bool is_fixed_length;
+    /// For a fixed-length binary column: number of bytes per value.
+    /// Zero has a special meaning, indicating a bit vector with one bit per value.
+    /// For a varying-length binary column: number of bytes per offset.
+    uint32_t fixed_length;
+  };
+
+  /// Description of a storage format for rows produced by encoder.
+  struct KeyRowMetadata {
+    /// Is row a varying-length binary, using offsets array to find a beginning of a row,
+    /// or is it a fixed-length binary.
+    bool is_fixed_length;
+
+    /// For a fixed-length binary row, common size of rows in bytes,
+    /// rounded up to the multiple of alignment.
+    ///
+    /// For a varying-length binary, size of all encoded fixed-length key columns,
+    /// including lengths of varying-length columns, rounded up to the multiple of string
+    /// alignment.
+    uint32_t fixed_length;
+
+    /// Offset within a row to the array of 32-bit offsets within a row of
+    /// ends of varbinary fields.
+    /// Used only when the row is not fixed-length, zero for fixed-length row.
+    /// There are N elements for N varbinary fields.
+    /// Each element is the offset within a row of the first byte after
+    /// the corresponding varbinary field bytes in that row.
+    /// If varbinary fields begin at aligned addresses, than the end of the previous
+    /// varbinary field needs to be rounded up according to the specified alignment
+    /// to obtain the beginning of the next varbinary field.
+    /// The first varbinary field starts at offset specified by fixed_length,
+    /// which should already be aligned.
+    uint32_t varbinary_end_array_offset;
+
+    /// Fixed number of bytes per row that are used to encode null masks.
+    /// Null masks indicate for a single row which of its key columns are null.
+    /// Nth bit in the sequence of bytes assigned to a row represents null
+    /// information for Nth field according to the order in which they are encoded.
+    int null_masks_bytes_per_row;
+
+    /// Power of 2. Every row will start at the offset aligned to that number of bytes.
+    int row_alignment;
+
+    /// Power of 2. Must be no greater than row alignment.
+    /// Every non-power-of-2 binary field and every varbinary field bytes
+    /// will start aligned to that number of bytes.
+    int string_alignment;
+
+    /// Metadata of encoded columns in their original order.
+    std::vector<KeyColumnMetadata> column_metadatas;
+
+    /// Order in which fields are encoded.
+    std::vector<uint32_t> column_order;
+
+    /// Offsets within a row to fields in their encoding order.
+    std::vector<uint32_t> column_offsets;
+
+    /// Rounding up offset to the nearest multiple of alignment value.
+    /// Alignment must be a power of 2.
+    static inline uint32_t padding_for_alignment(uint32_t offset,
+                                                 int required_alignment) {
+      ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
+      return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
+                                   (required_alignment - 1));
+    }
+
+    /// Rounding up offset to the beginning of next column,
+    /// chosing required alignment based on the data type of that column.
+    static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment,
+                                                 const KeyColumnMetadata& col_metadata) {
+      if (!col_metadata.is_fixed_length ||
+          ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
+        return 0;
+      } else {
+        return padding_for_alignment(offset, string_alignment);
+      }
+    }
+
+    /// Returns an array of offsets within a row of ends of varbinary fields.
+    inline const uint32_t* varbinary_end_array(const uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<const uint32_t*>(row + varbinary_end_array_offset);
+    }
+    inline uint32_t* varbinary_end_array(uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<uint32_t*>(row + varbinary_end_array_offset);
+    }
+
+    /// Returns the offset within the row and length of the first varbinary field.
+    inline void first_varbinary_offset_and_length(const uint8_t* row, uint32_t* offset,
+                                                  uint32_t* length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      *offset = fixed_length;
+      *length = varbinary_end_array(row)[0] - fixed_length;
+    }
+
+    /// Returns the offset within the row and length of the second and further varbinary
+    /// fields.
+    inline void nth_varbinary_offset_and_length(const uint8_t* row, int varbinary_id,
+                                                uint32_t* out_offset,
+                                                uint32_t* out_length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      ARROW_DCHECK(varbinary_id > 0);
+      const uint32_t* varbinary_end = varbinary_end_array(row);
+      uint32_t offset = varbinary_end[varbinary_id - 1];
+      offset += padding_for_alignment(offset, string_alignment);
+      *out_offset = offset;
+      *out_length = varbinary_end[varbinary_id] - offset;
+    }
+
+    uint32_t encoded_field_order(uint32_t icol) const { return column_order[icol]; }
+
+    uint32_t encoded_field_offset(uint32_t icol) const { return column_offsets[icol]; }
+
+    uint32_t num_cols() const { return static_cast<uint32_t>(column_metadatas.size()); }
+
+    uint32_t num_varbinary_cols() const;
+
+    void FromColumnMetadataVector(const std::vector<KeyColumnMetadata>& cols,
+                                  int in_row_alignment, int in_string_alignment);
+
+    bool is_compatible(const KeyRowMetadata& other) const;
+  };
+
+  class KeyRowArray {
+   public:
+    KeyRowArray();
+    Status Init(MemoryPool* pool, const KeyRowMetadata& metadata);
+    void Clean();
+    Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append);
+    Status AppendSelectionFrom(const KeyRowArray& from, uint32_t num_rows_to_append,
+                               const uint16_t* source_row_ids);
+    const KeyRowMetadata& metadata() const { return metadata_; }
+    int64_t length() const { return num_rows_; }
+    const uint8_t* data(int i) const {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return buffers_[i];
+    }
+    uint8_t* mutable_data(int i) {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return mutable_buffers_[i];
+    }
+    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
+    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+    const uint8_t* null_masks() const { return null_masks_->data(); }
+    uint8_t* null_masks() { return null_masks_->mutable_data(); }
+
+    bool has_any_nulls(const KeyEncoderContext* ctx) const;
+
+   private:
+    Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
+    Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
+
+    int64_t size_null_masks(int64_t num_rows);
+    int64_t size_offsets(int64_t num_rows);
+    int64_t size_rows_fixed_length(int64_t num_rows);
+    int64_t size_rows_varying_length(int64_t num_bytes);
+    void update_buffer_pointers();
+
+    static constexpr int64_t padding_for_vectors = 64;
+    MemoryPool* pool_;
+    KeyRowMetadata metadata_;
+    /// Buffers can only expand during lifetime and never shrink.
+    std::unique_ptr<ResizableBuffer> null_masks_;
+    std::unique_ptr<ResizableBuffer> offsets_;
+    std::unique_ptr<ResizableBuffer> rows_;
+    static constexpr int max_buffers_ = 3;
+    const uint8_t* buffers_[max_buffers_];
+    uint8_t* mutable_buffers_[max_buffers_];
+    int64_t num_rows_;
+    int64_t rows_capacity_;
+    int64_t bytes_capacity_;
+
+    // Mutable to allow lazy evaluation
+    mutable int64_t num_rows_for_has_any_nulls_;
+    mutable bool has_any_nulls_;
+  };
+
+  /// A lightweight description of an array representing one of key columns.
+  class KeyColumnArray {
+   public:
+    KeyColumnArray() = default;
+    /// Create as a mix of buffers according to the mask from two descriptions
+    /// (Nth bit is set to 0 if Nth buffer from the first input
+    /// should be used and is set to 1 otherwise).
+    /// Metadata is inherited from the first input.
+    KeyColumnArray(const KeyColumnMetadata& metadata, const KeyColumnArray& left,
+                   const KeyColumnArray& right, int buffer_id_to_replace);
+    /// Create for reading
+    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length,
+                   const uint8_t* buffer0, const uint8_t* buffer1, const uint8_t* buffer2,
+                   int bit_offset0 = 0, int bit_offset1 = 0);
+    /// Create for writing
+    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length, uint8_t* buffer0,
+                   uint8_t* buffer1, uint8_t* buffer2, int bit_offset0 = 0,
+                   int bit_offset1 = 0);
+    /// Create as a window view of original description that is offset
+    /// by a given number of rows.
+    /// The number of rows used in offset must be divisible by 8
+    /// in order to not split bit vectors within a single byte.
+    KeyColumnArray(const KeyColumnArray& from, int64_t start, int64_t length);
+    uint8_t* mutable_data(int i) {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return mutable_buffers_[i];
+    }
+    const uint8_t* data(int i) const {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return buffers_[i];
+    }
+    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
+    const KeyColumnMetadata& metadata() const { return metadata_; }
+    int64_t length() const { return length_; }
+    int bit_offset(int i) const {
+      ARROW_DCHECK(i >= 0 && i < max_buffers_);
+      return bit_offset_[i];
+    }
+
+   private:
+    static constexpr int max_buffers_ = 3;
+    const uint8_t* buffers_[max_buffers_];
+    uint8_t* mutable_buffers_[max_buffers_];
+    KeyColumnMetadata metadata_;
+    int64_t length_;
+    // Starting bit offset within the first byte (between 0 and 7)
+    // to be used when accessing buffers that store bit vectors.
+    int bit_offset_[max_buffers_ - 1];
+  };
+
+  void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
+            int row_alignment, int string_alignment);
+
+  const KeyRowMetadata& row_metadata() { return row_metadata_; }
+
+  /// Find out the required sizes of all buffers output buffers for encoding
+  /// (including varying-length buffers).
+  /// Use that information to resize provided row array so that it can fit
+  /// encoded data.
+  Status PrepareOutputForEncode(int64_t start_input_row, int64_t num_input_rows,
+                                KeyRowArray* rows,
+                                const std::vector<KeyColumnArray>& all_cols);
+
+  /// Encode a window of column oriented data into the entire output
+  /// row oriented storage.
+  /// The output buffers for encoding need to be correctly sized before
+  /// starting encoding.
+  void Encode(int64_t start_input_row, int64_t num_input_rows, KeyRowArray* rows,
+              const std::vector<KeyColumnArray>& cols);
+
+  /// Decode a window of row oriented data into a corresponding
+  /// window of column oriented storage.
+  /// The output buffers need to be correctly allocated and sized before
+  /// calling each method.
+  /// For that reason decoding is split into two functions.
+  /// The output of the first one, that processes everything except for
+  /// varying length buffers, can be used to find out required varying
+  /// length buffers sizes.
+  void DecodeFixedLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                int64_t num_rows, const KeyRowArray& rows,
+                                std::vector<KeyColumnArray>* cols);
+
+  void DecodeVaryingLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                  int64_t num_rows, const KeyRowArray& rows,
+                                  std::vector<KeyColumnArray>* cols);
+
+ private:
+  /// Prepare column array vectors.
+  /// Output column arrays represent a range of input column arrays
+  /// specified by starting row and number of rows.
+  /// Three vectors are generated:
+  /// - all columns
+  /// - fixed-length columns only
+  /// - varying-length columns only
+  void PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
+                              const std::vector<KeyColumnArray>& cols_in);
+
+  class TransformBoolean {
+   public:
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output,
+                          KeyEncoderContext* ctx);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+  };
+
+  class EncoderInteger {
+   public:
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool UsesTransform(const KeyColumnArray& column);
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output,
+                          KeyEncoderContext* ctx);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+
+   private:
+    static bool IsBoolean(const KeyColumnMetadata& metadata);
+  };
+
+  class EncoderBinary {
+   public:
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool IsInteger(const KeyColumnMetadata& metadata);
+
+   private:
+    template <bool is_row_fixed_length, bool is_encoding, class COPY_FN>
+    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t offset_within_row,
+                                          const KeyRowArray* rows_const,
+                                          KeyRowArray* rows_mutable_maybe_null,
+                                          const KeyColumnArray* col_const,
+                                          KeyColumnArray* col_mutable_maybe_null,
+                                          COPY_FN copy_fn);
+    template <bool is_row_fixed_length>
+    static void EncodeImp(uint32_t offset_within_row, KeyRowArray* rows,
+                          const KeyColumnArray& col);
+    template <bool is_row_fixed_length>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t offset_within_row, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void EncodeHelper_avx2(bool is_row_fixed_length, uint32_t offset_within_row,
+                                  KeyRowArray* rows, const KeyColumnArray& col);
+    static void DecodeHelper_avx2(bool is_row_fixed_length, uint32_t start_row,
+                                  uint32_t num_rows, uint32_t offset_within_row,
+                                  const KeyRowArray& rows, KeyColumnArray* col);
+    template <bool is_row_fixed_length>
+    static void EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows,
+                               const KeyColumnArray& col);
+    template <bool is_row_fixed_length>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t offset_within_row, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+    static void ColumnMemsetNulls(uint32_t offset_within_row, KeyRowArray* rows,
+                                  const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                  KeyColumnArray* temp_vector_16bit, uint8_t byte_value);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static void ColumnMemsetNullsImp(uint32_t offset_within_row, KeyRowArray* rows,
+                                     const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                     KeyColumnArray* temp_vector_16bit,
+                                     uint8_t byte_value);
+  };
+
+  class EncoderBinaryPair {
+   public:
+    static bool CanProcessPair(const KeyColumnMetadata& col1,
+                               const KeyColumnMetadata& col2) {
+      return EncoderBinary::IsInteger(col1) && EncoderBinary::IsInteger(col2);
+    }
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col1, const KeyColumnArray& col2,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp1,
+                       KeyColumnArray* temp2);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col1,
+                       KeyColumnArray* col2, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp1, KeyColumnArray* temp2);
+
+   private:
+    template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+    static void EncodeImp(uint32_t num_rows_to_skip, uint32_t offset_within_row,
+                          KeyRowArray* rows, const KeyColumnArray& col1,
+                          const KeyColumnArray& col2);
+    template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+    static void DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
+                          uint32_t num_rows, uint32_t offset_within_row,
+                          const KeyRowArray& rows, KeyColumnArray* col1,
+                          KeyColumnArray* col2);
+#if defined(ARROW_HAVE_AVX2)
+    static uint32_t EncodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
+                                      uint32_t offset_within_row, KeyRowArray* rows,
+                                      const KeyColumnArray& col1,
+                                      const KeyColumnArray& col2);
+    static uint32_t DecodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
+                                      uint32_t start_row, uint32_t num_rows,
+                                      uint32_t offset_within_row, const KeyRowArray& rows,
+                                      KeyColumnArray* col1, KeyColumnArray* col2);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static uint32_t EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows,
+                                   const KeyColumnArray& col1,
+                                   const KeyColumnArray& col2);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static uint32_t DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                   uint32_t offset_within_row, const KeyRowArray& rows,
+                                   KeyColumnArray* col1, KeyColumnArray* col2);
+#endif
+  };
+
+  class EncoderOffsets {
+   public:
+    // In order not to repeat work twice,
+    // encoding combines in a single pass computing of:
+    // a) row offsets for varying-length rows
+    // b) within each new row, the cumulative length array
+    // of varying-length values within a row.
+    static void Encode(KeyRowArray* rows,
+                       const std::vector<KeyColumnArray>& varbinary_cols,
+                       KeyEncoderContext* ctx);
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* varbinary_cols,
+                       const std::vector<uint32_t>& varbinary_cols_base_offset,
+                       KeyEncoderContext* ctx);
+
+   private:
+    static void EncodeImp(uint32_t num_rows_already_processed, KeyRowArray* rows,
+                          const std::vector<KeyColumnArray>& varbinary_cols);
+#if defined(ARROW_HAVE_AVX2)
+    static uint32_t EncodeImp_avx2(KeyRowArray* rows,
+                                   const std::vector<KeyColumnArray>& varbinary_cols,
+                                   KeyColumnArray* temp_buffer_32B_per_col);
+#endif
+  };
+
+  class EncoderVarBinary {
+   public:
+    static void Encode(uint32_t varbinary_col_id, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx);
+
+   private:
+    template <bool first_varbinary_col, bool is_encoding, class COPY_FN>
+    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t varbinary_col_id,
+                                          const KeyRowArray* rows_const,
+                                          KeyRowArray* rows_mutable_maybe_null,
+                                          const KeyColumnArray* col_const,
+                                          KeyColumnArray* col_mutable_maybe_null,
+                                          COPY_FN copy_fn);
+    template <bool first_varbinary_col>
+    static void EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows,
+                          const KeyColumnArray& col);
+    template <bool first_varbinary_col>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t varbinary_col_id, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void EncodeHelper_avx2(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                  const KeyColumnArray& col);
+    static void DecodeHelper_avx2(uint32_t start_row, uint32_t num_rows,
+                                  uint32_t varbinary_col_id, const KeyRowArray& rows,
+                                  KeyColumnArray* col);
+    template <bool first_varbinary_col>
+    static void EncodeImp_avx2(uint32_t varbinary_col_id, KeyRowArray* rows,
+                               const KeyColumnArray& col);
+    template <bool first_varbinary_col>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t varbinary_col_id, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+  };
+
+  class EncoderNulls {
+   public:
+    static void Encode(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit);
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* cols);
+  };
+
+  KeyEncoderContext* ctx_;
+
+  // Data initialized once, based on data types of key columns
+  KeyRowMetadata row_metadata_;
+
+  // Data initialized for each input batch.
+  // All elements are ordered according to the order of encoded fields in a row.
+  std::vector<KeyColumnArray> batch_all_cols_;
+  std::vector<KeyColumnArray> batch_varbinary_cols_;
+  std::vector<uint32_t> batch_varbinary_cols_base_offsets_;
+};
+
+template <bool is_row_fixed_length, bool is_encoding, class COPY_FN>
+inline void KeyEncoder::EncoderBinary::EncodeDecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  ARROW_DCHECK(col_const && col_const->metadata().is_fixed_length);
+  uint32_t col_width = col_const->metadata().fixed_length;
+
+  if (is_row_fixed_length) {
+    uint32_t row_width = rows_const->metadata().fixed_length;
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      if (is_encoding) {
+        src = col_const->data(1) + col_width * i;
+        dst = rows_mutable_maybe_null->mutable_data(1) + row_width * (start_row + i) +
+              offset_within_row;
+      } else {
+        src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row;
+        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      }
+      copy_fn(dst, src, col_width);
+    }
+  } else {
+    const uint32_t* row_offsets = rows_const->offsets();
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      if (is_encoding) {
+        src = col_const->data(1) + col_width * i;
+        dst = rows_mutable_maybe_null->mutable_data(2) + row_offsets[start_row + i] +
+              offset_within_row;
+      } else {
+        src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row;
+        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      }
+      copy_fn(dst, src, col_width);
+    }
+  }
+}
+
+template <bool first_varbinary_col, bool is_encoding, class COPY_FN>
+inline void KeyEncoder::EncoderVarBinary::EncodeDecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  // Column and rows need to be varying length
+  ARROW_DCHECK(!rows_const->metadata().is_fixed_length &&
+               !col_const->metadata().is_fixed_length);
+
+  const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row;
+  const uint32_t* col_offsets = col_const->offsets();
+
+  uint32_t col_offset_next = col_offsets[0];
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    uint32_t col_offset = col_offset_next;
+    col_offset_next = col_offsets[i + 1];
+
+    uint32_t row_offset = row_offsets_for_batch[i];
+    const uint8_t* row = rows_const->data(2) + row_offset;
+
+    uint32_t offset_within_row;
+    uint32_t length;
+    if (first_varbinary_col) {
+      rows_const->metadata().first_varbinary_offset_and_length(row, &offset_within_row,
+                                                               &length);
+    } else {
+      rows_const->metadata().nth_varbinary_offset_and_length(row, varbinary_col_id,
+                                                             &offset_within_row, &length);
+    }
+
+    row_offset += offset_within_row;
+
+    const uint8_t* src;
+    uint8_t* dst;
+    if (is_encoding) {
+      src = col_const->data(2) + col_offset;
+      dst = rows_mutable_maybe_null->mutable_data(2) + row_offset;
+    } else {
+      src = rows_const->data(2) + row_offset;
+      dst = col_mutable_maybe_null->mutable_data(2) + col_offset;
+    }
+    copy_fn(dst, src, length);
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_encode_avx2.cc b/cpp/src/arrow/compute/exec/key_encode_avx2.cc
new file mode 100644
index 00000000000..d875412cf88
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_encode_avx2.cc
@@ -0,0 +1,545 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_encode.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+inline __m256i set_first_n_bytes_avx2(int n) {
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+  constexpr uint64_t kByteSequence16To23 = 0x1716151413121110ULL;
+  constexpr uint64_t kByteSequence24To31 = 0x1f1e1d1c1b1a1918ULL;
+
+  return _mm256_cmpgt_epi8(_mm256_set1_epi8(n),
+                           _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                              kByteSequence16To23, kByteSequence24To31));
+}
+
+inline __m256i inclusive_prefix_sum_32bit_avx2(__m256i x) {
+  x = _mm256_add_epi32(
+      x, _mm256_permutevar8x32_epi32(
+             _mm256_andnot_si256(_mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0xffffffff), x),
+             _mm256_setr_epi32(7, 0, 1, 2, 3, 4, 5, 6)));
+  x = _mm256_add_epi32(
+      x, _mm256_permute4x64_epi64(
+             _mm256_andnot_si256(
+                 _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0xffffffff, 0xffffffff), x),
+             0x93));  // 0b10010011
+  x = _mm256_add_epi32(
+      x, _mm256_permute4x64_epi64(
+             _mm256_andnot_si256(
+                 _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0xffffffff, 0xffffffff), x),
+             0x4f));  // 0b01001111
+  return x;
+}
+
+void KeyEncoder::EncoderBinary::EncodeHelper_avx2(bool is_row_fixed_length,
+                                                  uint32_t offset_within_row,
+                                                  KeyRowArray* rows,
+                                                  const KeyColumnArray& col) {
+  if (is_row_fixed_length) {
+    EncodeImp_avx2<true>(offset_within_row, rows, col);
+  } else {
+    EncodeImp_avx2<false>(offset_within_row, rows, col);
+  }
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::EncodeImp_avx2(uint32_t offset_within_row,
+                                               KeyRowArray* rows,
+                                               const KeyColumnArray& col) {
+  EncodeDecodeHelper<is_row_fixed_length, true>(
+      0, static_cast<uint32_t>(col.length()), offset_within_row, rows, rows, &col,
+      nullptr, [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+        const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 32; ++istripe) {
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+        if ((length % 32) > 0) {
+          __m256i mask = set_first_n_bytes_avx2(length % 32);
+          _mm256_storeu_si256(
+              dst256 + istripe,
+              _mm256_blendv_epi8(_mm256_loadu_si256(dst256 + istripe),
+                                 _mm256_loadu_si256(src256 + istripe), mask));
+        }
+      });
+}
+
+void KeyEncoder::EncoderBinary::DecodeHelper_avx2(bool is_row_fixed_length,
+                                                  uint32_t start_row, uint32_t num_rows,
+                                                  uint32_t offset_within_row,
+                                                  const KeyRowArray& rows,
+                                                  KeyColumnArray* col) {
+  if (is_row_fixed_length) {
+    DecodeImp_avx2<true>(start_row, num_rows, offset_within_row, rows, col);
+  } else {
+    DecodeImp_avx2<false>(start_row, num_rows, offset_within_row, rows, col);
+  }
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                               uint32_t offset_within_row,
+                                               const KeyRowArray& rows,
+                                               KeyColumnArray* col) {
+  EncodeDecodeHelper<is_row_fixed_length, false>(
+      start_row, num_rows, offset_within_row, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 31) / 32; ++istripe) {
+          __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+          const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+      });
+}
+
+uint32_t KeyEncoder::EncoderBinaryPair::EncodeHelper_avx2(
+    bool is_row_fixed_length, uint32_t col_width, uint32_t offset_within_row,
+    KeyRowArray* rows, const KeyColumnArray& col1, const KeyColumnArray& col2) {
+  using EncodeImp_avx2_t =
+      uint32_t (*)(uint32_t, KeyRowArray*, const KeyColumnArray&, const KeyColumnArray&);
+  static const EncodeImp_avx2_t EncodeImp_avx2_fn[] = {
+      EncodeImp_avx2<false, 1>, EncodeImp_avx2<false, 2>, EncodeImp_avx2<false, 4>,
+      EncodeImp_avx2<false, 8>, EncodeImp_avx2<true, 1>,  EncodeImp_avx2<true, 2>,
+      EncodeImp_avx2<true, 4>,  EncodeImp_avx2<true, 8>,
+  };
+  int log_col_width = col_width == 8 ? 3 : col_width == 4 ? 2 : col_width == 2 ? 1 : 0;
+  int dispatch_const = (is_row_fixed_length ? 4 : 0) + log_col_width;
+  return EncodeImp_avx2_fn[dispatch_const](offset_within_row, rows, col1, col2);
+}
+
+template <bool is_row_fixed_length, uint32_t col_width>
+uint32_t KeyEncoder::EncoderBinaryPair::EncodeImp_avx2(uint32_t offset_within_row,
+                                                       KeyRowArray* rows,
+                                                       const KeyColumnArray& col1,
+                                                       const KeyColumnArray& col2) {
+  uint32_t num_rows = static_cast<uint32_t>(col1.length());
+  ARROW_DCHECK(col_width == 1 || col_width == 2 || col_width == 4 || col_width == 8);
+
+  const uint8_t* col_vals_A = col1.data(1);
+  const uint8_t* col_vals_B = col2.data(1);
+  uint8_t* row_vals = is_row_fixed_length ? rows->mutable_data(1) : rows->mutable_data(2);
+
+  constexpr int unroll = 32 / col_width;
+
+  uint32_t num_processed = num_rows / unroll * unroll;
+
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    __m256i col_A = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_vals_A) + i);
+    __m256i col_B = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_vals_B) + i);
+    __m256i r0, r1;
+    if (col_width == 1) {
+      // results in 16-bit outputs in the order: 0..7, 16..23
+      r0 = _mm256_unpacklo_epi8(col_A, col_B);
+      // results in 16-bit outputs in the order: 8..15, 24..31
+      r1 = _mm256_unpackhi_epi8(col_A, col_B);
+    } else if (col_width == 2) {
+      // results in 32-bit outputs in the order: 0..3, 8..11
+      r0 = _mm256_unpacklo_epi16(col_A, col_B);
+      // results in 32-bit outputs in the order: 4..7, 12..15
+      r1 = _mm256_unpackhi_epi16(col_A, col_B);
+    } else if (col_width == 4) {
+      // results in 64-bit outputs in the order: 0..1, 4..5
+      r0 = _mm256_unpacklo_epi32(col_A, col_B);
+      // results in 64-bit outputs in the order: 2..3, 6..7
+      r1 = _mm256_unpackhi_epi32(col_A, col_B);
+    } else if (col_width == 8) {
+      // results in 128-bit outputs in the order: 0, 2
+      r0 = _mm256_unpacklo_epi64(col_A, col_B);
+      // results in 128-bit outputs in the order: 1, 3
+      r1 = _mm256_unpackhi_epi64(col_A, col_B);
+    }
+    col_A = _mm256_permute2x128_si256(r0, r1, 0x20);
+    col_B = _mm256_permute2x128_si256(r0, r1, 0x31);
+    if (col_width == 8) {
+      __m128i *dst0, *dst1, *dst2, *dst3;
+      if (is_row_fixed_length) {
+        uint32_t fixed_length = rows->metadata().fixed_length;
+        uint8_t* dst = row_vals + offset_within_row + fixed_length * i * unroll;
+        dst0 = reinterpret_cast<__m128i*>(dst);
+        dst1 = reinterpret_cast<__m128i*>(dst + fixed_length);
+        dst2 = reinterpret_cast<__m128i*>(dst + fixed_length * 2);
+        dst3 = reinterpret_cast<__m128i*>(dst + fixed_length * 3);
+      } else {
+        const uint32_t* row_offsets = rows->offsets() + i * unroll;
+        uint8_t* dst = row_vals + offset_within_row;
+        dst0 = reinterpret_cast<__m128i*>(dst + row_offsets[0]);
+        dst1 = reinterpret_cast<__m128i*>(dst + row_offsets[1]);
+        dst2 = reinterpret_cast<__m128i*>(dst + row_offsets[2]);
+        dst3 = reinterpret_cast<__m128i*>(dst + row_offsets[3]);
+      }
+      _mm_storeu_si128(dst0, _mm256_castsi256_si128(r0));
+      _mm_storeu_si128(dst1, _mm256_castsi256_si128(r1));
+      _mm_storeu_si128(dst2, _mm256_extracti128_si256(r0, 1));
+      _mm_storeu_si128(dst3, _mm256_extracti128_si256(r1, 1));
+
+    } else {
+      uint8_t buffer[64];
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(buffer), col_A);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(buffer) + 1, col_B);
+
+      if (is_row_fixed_length) {
+        uint32_t fixed_length = rows->metadata().fixed_length;
+        uint8_t* dst = row_vals + offset_within_row + fixed_length * i * unroll;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            *reinterpret_cast<uint16_t*>(dst + fixed_length * j) =
+                reinterpret_cast<const uint16_t*>(buffer)[j];
+          } else if (col_width == 2) {
+            *reinterpret_cast<uint32_t*>(dst + fixed_length * j) =
+                reinterpret_cast<const uint32_t*>(buffer)[j];
+          } else if (col_width == 4) {
+            *reinterpret_cast<uint64_t*>(dst + fixed_length * j) =
+                reinterpret_cast<const uint64_t*>(buffer)[j];
+          }
+        }
+      } else {
+        const uint32_t* row_offsets = rows->offsets() + i * unroll;
+        uint8_t* dst = row_vals + offset_within_row;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            *reinterpret_cast<uint16_t*>(dst + row_offsets[j]) =
+                reinterpret_cast<const uint16_t*>(buffer)[j];
+          } else if (col_width == 2) {
+            *reinterpret_cast<uint32_t*>(dst + row_offsets[j]) =
+                reinterpret_cast<const uint32_t*>(buffer)[j];
+          } else if (col_width == 4) {
+            *reinterpret_cast<uint64_t*>(dst + row_offsets[j]) =
+                reinterpret_cast<const uint64_t*>(buffer)[j];
+          }
+        }
+      }
+    }
+  }
+
+  return num_processed;
+}
+
+uint32_t KeyEncoder::EncoderBinaryPair::DecodeHelper_avx2(
+    bool is_row_fixed_length, uint32_t col_width, uint32_t start_row, uint32_t num_rows,
+    uint32_t offset_within_row, const KeyRowArray& rows, KeyColumnArray* col1,
+    KeyColumnArray* col2) {
+  using DecodeImp_avx2_t =
+      uint32_t (*)(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                   const KeyRowArray& rows, KeyColumnArray* col1, KeyColumnArray* col2);
+  static const DecodeImp_avx2_t DecodeImp_avx2_fn[] = {
+      DecodeImp_avx2<false, 1>, DecodeImp_avx2<false, 2>, DecodeImp_avx2<false, 4>,
+      DecodeImp_avx2<false, 8>, DecodeImp_avx2<true, 1>,  DecodeImp_avx2<true, 2>,
+      DecodeImp_avx2<true, 4>,  DecodeImp_avx2<true, 8>};
+  int log_col_width = col_width == 8 ? 3 : col_width == 4 ? 2 : col_width == 2 ? 1 : 0;
+  int dispatch_const = log_col_width | (is_row_fixed_length ? 4 : 0);
+  return DecodeImp_avx2_fn[dispatch_const](start_row, num_rows, offset_within_row, rows,
+                                           col1, col2);
+}
+
+template <bool is_row_fixed_length, uint32_t col_width>
+uint32_t KeyEncoder::EncoderBinaryPair::DecodeImp_avx2(
+    uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+    const KeyRowArray& rows, KeyColumnArray* col1, KeyColumnArray* col2) {
+  ARROW_DCHECK(col_width == 1 || col_width == 2 || col_width == 4 || col_width == 8);
+
+  uint8_t* col_vals_A = col1->mutable_data(1);
+  uint8_t* col_vals_B = col2->mutable_data(1);
+
+  uint32_t fixed_length = rows.metadata().fixed_length;
+  const uint32_t* offsets;
+  const uint8_t* src_base;
+  if (is_row_fixed_length) {
+    src_base = rows.data(1) + fixed_length * start_row + offset_within_row;
+    offsets = nullptr;
+  } else {
+    src_base = rows.data(2) + offset_within_row;
+    offsets = rows.offsets() + start_row;
+  }
+
+  constexpr int unroll = 32 / col_width;
+
+  uint32_t num_processed = num_rows / unroll * unroll;
+
+  if (col_width == 8) {
+    for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+      const __m128i *src0, *src1, *src2, *src3;
+      if (is_row_fixed_length) {
+        const uint8_t* src = src_base + (i * unroll) * fixed_length;
+        src0 = reinterpret_cast<const __m128i*>(src);
+        src1 = reinterpret_cast<const __m128i*>(src + fixed_length);
+        src2 = reinterpret_cast<const __m128i*>(src + fixed_length * 2);
+        src3 = reinterpret_cast<const __m128i*>(src + fixed_length * 3);
+      } else {
+        const uint32_t* row_offsets = offsets + i * unroll;
+        const uint8_t* src = src_base;
+        src0 = reinterpret_cast<const __m128i*>(src + row_offsets[0]);
+        src1 = reinterpret_cast<const __m128i*>(src + row_offsets[1]);
+        src2 = reinterpret_cast<const __m128i*>(src + row_offsets[2]);
+        src3 = reinterpret_cast<const __m128i*>(src + row_offsets[3]);
+      }
+
+      __m256i r0 = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128(src0)),
+                                           _mm_loadu_si128(src1), 1);
+      __m256i r1 = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128(src2)),
+                                           _mm_loadu_si128(src3), 1);
+
+      r0 = _mm256_permute4x64_epi64(r0, 0xd8);  // 0b11011000
+      r1 = _mm256_permute4x64_epi64(r1, 0xd8);
+
+      // First 128-bit lanes from both inputs
+      __m256i c1 = _mm256_permute2x128_si256(r0, r1, 0x20);
+      // Second 128-bit lanes from both inputs
+      __m256i c2 = _mm256_permute2x128_si256(r0, r1, 0x31);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_A) + i, c1);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_B) + i, c2);
+    }
+  } else {
+    uint8_t buffer[64];
+    for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+      if (is_row_fixed_length) {
+        const uint8_t* src = src_base + (i * unroll) * fixed_length;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            reinterpret_cast<uint16_t*>(buffer)[j] =
+                *reinterpret_cast<const uint16_t*>(src + fixed_length * j);
+          } else if (col_width == 2) {
+            reinterpret_cast<uint32_t*>(buffer)[j] =
+                *reinterpret_cast<const uint32_t*>(src + fixed_length * j);
+          } else if (col_width == 4) {
+            reinterpret_cast<uint64_t*>(buffer)[j] =
+                *reinterpret_cast<const uint64_t*>(src + fixed_length * j);
+          }
+        }
+      } else {
+        const uint32_t* row_offsets = offsets + i * unroll;
+        const uint8_t* src = src_base;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            reinterpret_cast<uint16_t*>(buffer)[j] =
+                *reinterpret_cast<const uint16_t*>(src + row_offsets[j]);
+          } else if (col_width == 2) {
+            reinterpret_cast<uint32_t*>(buffer)[j] =
+                *reinterpret_cast<const uint32_t*>(src + row_offsets[j]);
+          } else if (col_width == 4) {
+            reinterpret_cast<uint64_t*>(buffer)[j] =
+                *reinterpret_cast<const uint64_t*>(src + row_offsets[j]);
+          }
+        }
+      }
+
+      __m256i r0 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(buffer));
+      __m256i r1 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(buffer) + 1);
+
+      constexpr uint64_t kByteSequence_0_2_4_6_8_10_12_14 = 0x0e0c0a0806040200ULL;
+      constexpr uint64_t kByteSequence_1_3_5_7_9_11_13_15 = 0x0f0d0b0907050301ULL;
+      constexpr uint64_t kByteSequence_0_1_4_5_8_9_12_13 = 0x0d0c090805040100ULL;
+      constexpr uint64_t kByteSequence_2_3_6_7_10_11_14_15 = 0x0f0e0b0a07060302ULL;
+
+      if (col_width == 1) {
+        // Collect every second byte next to each other
+        const __m256i shuffle_const = _mm256_setr_epi64x(
+            kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15,
+            kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15);
+        r0 = _mm256_shuffle_epi8(r0, shuffle_const);
+        r1 = _mm256_shuffle_epi8(r1, shuffle_const);
+        // 0b11011000 swapping second and third 64-bit lane
+        r0 = _mm256_permute4x64_epi64(r0, 0xd8);
+        r1 = _mm256_permute4x64_epi64(r1, 0xd8);
+      } else if (col_width == 2) {
+        // Collect every second 16-bit word next to each other
+        const __m256i shuffle_const = _mm256_setr_epi64x(
+            kByteSequence_0_1_4_5_8_9_12_13, kByteSequence_2_3_6_7_10_11_14_15,
+            kByteSequence_0_1_4_5_8_9_12_13, kByteSequence_2_3_6_7_10_11_14_15);
+        r0 = _mm256_shuffle_epi8(r0, shuffle_const);
+        r1 = _mm256_shuffle_epi8(r1, shuffle_const);
+        // 0b11011000 swapping second and third 64-bit lane
+        r0 = _mm256_permute4x64_epi64(r0, 0xd8);
+        r1 = _mm256_permute4x64_epi64(r1, 0xd8);
+      } else if (col_width == 4) {
+        // Collect every second 32-bit word next to each other
+        const __m256i permute_const = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7);
+        r0 = _mm256_permutevar8x32_epi32(r0, permute_const);
+        r1 = _mm256_permutevar8x32_epi32(r1, permute_const);
+      }
+
+      // First 128-bit lanes from both inputs
+      __m256i c1 = _mm256_permute2x128_si256(r0, r1, 0x20);
+      // Second 128-bit lanes from both inputs
+      __m256i c2 = _mm256_permute2x128_si256(r0, r1, 0x31);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_A) + i, c1);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_B) + i, c2);
+    }
+  }
+
+  return num_processed;
+}
+
+uint32_t KeyEncoder::EncoderOffsets::EncodeImp_avx2(
+    KeyRowArray* rows, const std::vector<KeyColumnArray>& varbinary_cols,
+    KeyColumnArray* temp_buffer_32B_per_col) {
+  ARROW_DCHECK(temp_buffer_32B_per_col->metadata().is_fixed_length &&
+               temp_buffer_32B_per_col->metadata().fixed_length ==
+                   static_cast<uint32_t>(sizeof(uint32_t)) &&
+               temp_buffer_32B_per_col->length() >=
+                   static_cast<int64_t>(varbinary_cols.size()) * 8);
+  ARROW_DCHECK(varbinary_cols.size() > 0);
+
+  int row_alignment = rows->metadata().row_alignment;
+  int string_alignment = rows->metadata().string_alignment;
+
+  uint32_t* row_offsets = rows->mutable_offsets();
+  uint8_t* row_values = rows->mutable_data(2);
+  uint32_t num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  constexpr int unroll = 8;
+  uint32_t num_processed = num_rows / unroll * unroll;
+  uint32_t* temp_varbinary_ends =
+      reinterpret_cast<uint32_t*>(temp_buffer_32B_per_col->mutable_data(1));
+
+  row_offsets[0] = 0;
+
+  __m256i row_offset = _mm256_setzero_si256();
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    // Zero out lengths for nulls.
+    // Add lengths of all columns to get row size.
+    // Store in temp buffer varbinary field ends while summing their lengths.
+
+    __m256i offset_within_row = _mm256_set1_epi32(rows->metadata().fixed_length);
+
+    for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+      const uint32_t* col_offsets = varbinary_cols[col].offsets();
+      __m256i col_length = _mm256_sub_epi32(
+          _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_offsets + 1) + i),
+          _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_offsets + 0) + i));
+
+      const uint8_t* non_nulls = varbinary_cols[col].data(0);
+      if (non_nulls && non_nulls[i] != 0xff) {
+        // Zero out lengths for values that are not null
+        const __m256i individual_bits =
+            _mm256_setr_epi32(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
+        __m256i null_mask = _mm256_cmpeq_epi32(
+            _mm256_setzero_si256(),
+            _mm256_and_si256(_mm256_set1_epi32(non_nulls[i]), individual_bits));
+        col_length = _mm256_andnot_si256(null_mask, col_length);
+      }
+
+      __m256i padding =
+          _mm256_and_si256(_mm256_sub_epi32(_mm256_setzero_si256(), offset_within_row),
+                           _mm256_set1_epi32(string_alignment - 1));
+      offset_within_row = _mm256_add_epi32(offset_within_row, padding);
+      offset_within_row = _mm256_add_epi32(offset_within_row, col_length);
+
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(temp_varbinary_ends) + col,
+                          offset_within_row);
+    }
+
+    __m256i padding =
+        _mm256_and_si256(_mm256_sub_epi32(_mm256_setzero_si256(), offset_within_row),
+                         _mm256_set1_epi32(row_alignment - 1));
+    offset_within_row = _mm256_add_epi32(offset_within_row, padding);
+
+    // Inclusive prefix sum of 32-bit elements
+    __m256i row_offset_delta = inclusive_prefix_sum_32bit_avx2(offset_within_row);
+    row_offset = _mm256_add_epi32(
+        _mm256_permutevar8x32_epi32(row_offset, _mm256_set1_epi32(7)), row_offset_delta);
+
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(row_offsets + 1) + i, row_offset);
+
+    // Output varbinary ends for all fields in each row
+    for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+      for (uint32_t row = 0; row < unroll; ++row) {
+        uint32_t* dst = rows->metadata().varbinary_end_array(
+                            row_values + row_offsets[i * unroll + row]) +
+                        col;
+        const uint32_t* src = temp_varbinary_ends + (col * unroll + row);
+        *dst = *src;
+      }
+    }
+  }
+
+  return num_processed;
+}
+
+void KeyEncoder::EncoderVarBinary::EncodeHelper_avx2(uint32_t varbinary_col_id,
+                                                     KeyRowArray* rows,
+                                                     const KeyColumnArray& col) {
+  if (varbinary_col_id == 0) {
+    EncodeImp_avx2<true>(varbinary_col_id, rows, col);
+  } else {
+    EncodeImp_avx2<false>(varbinary_col_id, rows, col);
+  }
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::EncodeImp_avx2(uint32_t varbinary_col_id,
+                                                  KeyRowArray* rows,
+                                                  const KeyColumnArray& col) {
+  EncodeDecodeHelper<first_varbinary_col, true>(
+      0, static_cast<uint32_t>(col.length()), varbinary_col_id, rows, rows, &col, nullptr,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+        const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 32; ++istripe) {
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+        if ((length % 32) > 0) {
+          __m256i mask = set_first_n_bytes_avx2(length % 32);
+          _mm256_storeu_si256(
+              dst256 + istripe,
+              _mm256_blendv_epi8(_mm256_loadu_si256(dst256 + istripe),
+                                 _mm256_loadu_si256(src256 + istripe), mask));
+        }
+      });
+}
+
+void KeyEncoder::EncoderVarBinary::DecodeHelper_avx2(uint32_t start_row,
+                                                     uint32_t num_rows,
+                                                     uint32_t varbinary_col_id,
+                                                     const KeyRowArray& rows,
+                                                     KeyColumnArray* col) {
+  if (varbinary_col_id == 0) {
+    DecodeImp_avx2<true>(start_row, num_rows, varbinary_col_id, rows, col);
+  } else {
+    DecodeImp_avx2<false>(start_row, num_rows, varbinary_col_id, rows, col);
+  }
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                                  uint32_t varbinary_col_id,
+                                                  const KeyRowArray& rows,
+                                                  KeyColumnArray* col) {
+  EncodeDecodeHelper<first_varbinary_col, false>(
+      start_row, num_rows, varbinary_col_id, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 31) / 32; ++istripe) {
+          __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+          const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+      });
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_hash.cc b/cpp/src/arrow/compute/exec/key_hash.cc
new file mode 100644
index 00000000000..081411e708e
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_hash.cc
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_hash.h"
+
+#include <memory.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+inline uint32_t Hashing::avalanche_helper(uint32_t acc) {
+  acc ^= (acc >> 15);
+  acc *= PRIME32_2;
+  acc ^= (acc >> 13);
+  acc *= PRIME32_3;
+  acc ^= (acc >> 16);
+  return acc;
+}
+
+void Hashing::avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 8;
+    avalanche_avx2(num_keys - tail, hashes);
+    processed = num_keys - tail;
+  }
+#endif
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    hashes[i] = avalanche_helper(hashes[i]);
+  }
+}
+
+inline uint32_t Hashing::combine_accumulators(const uint32_t acc1, const uint32_t acc2,
+                                              const uint32_t acc3, const uint32_t acc4) {
+  return ROTL(acc1, 1) + ROTL(acc2, 7) + ROTL(acc3, 12) + ROTL(acc4, 18);
+}
+
+inline void Hashing::helper_8B(uint32_t key_length, uint32_t num_keys,
+                               const uint8_t* keys, uint32_t* hashes) {
+  ARROW_DCHECK(key_length <= 8);
+  uint64_t mask = ~0ULL >> (8 * (8 - key_length));
+  constexpr uint64_t multiplier = 14029467366897019727ULL;
+  uint32_t offset = 0;
+  for (uint32_t ikey = 0; ikey < num_keys; ++ikey) {
+    uint64_t x = *reinterpret_cast<const uint64_t*>(keys + offset);
+    x &= mask;
+    hashes[ikey] = static_cast<uint32_t>(BYTESWAP(x * multiplier));
+    offset += key_length;
+  }
+}
+
+inline void Hashing::helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys,
+                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3,
+                                   uint32_t& acc4) {
+  uint64_t v1 = reinterpret_cast<const uint64_t*>(keys + offset)[0];
+  // We do not need to mask v1, because we will not process a stripe
+  // unless at least 9 bytes of it are part of the key.
+  uint64_t v2 = reinterpret_cast<const uint64_t*>(keys + offset)[1];
+  v2 &= mask_hi;
+  uint32_t x1 = static_cast<uint32_t>(v1);
+  uint32_t x2 = static_cast<uint32_t>(v1 >> 32);
+  uint32_t x3 = static_cast<uint32_t>(v2);
+  uint32_t x4 = static_cast<uint32_t>(v2 >> 32);
+  acc1 += x1 * PRIME32_2;
+  acc1 = ROTL(acc1, 13) * PRIME32_1;
+  acc2 += x2 * PRIME32_2;
+  acc2 = ROTL(acc2, 13) * PRIME32_1;
+  acc3 += x3 * PRIME32_2;
+  acc3 = ROTL(acc3, 13) * PRIME32_1;
+  acc4 += x4 * PRIME32_2;
+  acc4 = ROTL(acc4, 13) * PRIME32_1;
+}
+
+void Hashing::helper_stripes(int64_t hardware_flags, uint32_t num_keys,
+                             uint32_t key_length, const uint8_t* keys, uint32_t* hash) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 2;
+    helper_stripes_avx2(num_keys - tail, key_length, keys, hash);
+    processed = num_keys - tail;
+  }
+#endif
+
+  // If length modulo stripe length is less than or equal 8, round down to the nearest 16B
+  // boundary (8B ending will be processed in a separate function), otherwise round up.
+  const uint32_t num_stripes = (key_length + 7) / 16;
+  uint64_t mask_hi =
+      ~0ULL >>
+      (8 * ((num_stripes * 16 > key_length) ? num_stripes * 16 - key_length : 0));
+
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    uint32_t acc1, acc2, acc3, acc4;
+    acc1 = static_cast<uint32_t>(
+        (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) &
+        0xffffffff);
+    acc2 = PRIME32_2;
+    acc3 = 0;
+    acc4 = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1));
+    uint32_t offset = i * key_length;
+    for (uint32_t stripe = 0; stripe < num_stripes - 1; ++stripe) {
+      helper_stripe(offset, ~0ULL, keys, acc1, acc2, acc3, acc4);
+      offset += 16;
+    }
+    helper_stripe(offset, mask_hi, keys, acc1, acc2, acc3, acc4);
+    hash[i] = combine_accumulators(acc1, acc2, acc3, acc4);
+  }
+}
+
+inline uint32_t Hashing::helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys,
+                                     uint32_t acc) {
+  uint64_t v = reinterpret_cast<const uint64_t*>(keys + offset)[0];
+  v &= mask;
+  uint32_t x1 = static_cast<uint32_t>(v);
+  uint32_t x2 = static_cast<uint32_t>(v >> 32);
+  acc += x1 * PRIME32_3;
+  acc = ROTL(acc, 17) * PRIME32_4;
+  acc += x2 * PRIME32_3;
+  acc = ROTL(acc, 17) * PRIME32_4;
+  return acc;
+}
+
+void Hashing::helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length,
+                           const uint8_t* keys, uint32_t* hash) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 8;
+    helper_tails_avx2(num_keys - tail, key_length, keys, hash);
+    processed = num_keys - tail;
+  }
+#endif
+  uint64_t mask = ~0ULL >> (8 * (((key_length % 8) == 0) ? 0 : 8 - (key_length % 8)));
+  uint32_t offset = key_length / 16 * 16;
+  offset += processed * key_length;
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    hash[i] = helper_tail(offset, mask, keys, hash[i]);
+    offset += key_length;
+  }
+}
+
+void Hashing::hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key,
+                         const uint8_t* keys, uint32_t* hashes) {
+  ARROW_DCHECK(length_key > 0);
+
+  if (length_key <= 8) {
+    helper_8B(length_key, num_keys, keys, hashes);
+    return;
+  }
+  helper_stripes(hardware_flags, num_keys, length_key, keys, hashes);
+  if ((length_key % 16) > 0 && (length_key % 16) <= 8) {
+    helper_tails(hardware_flags, num_keys, length_key, keys, hashes);
+  }
+  avalanche(hardware_flags, num_keys, hashes);
+}
+
+void Hashing::hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc) {
+  for (uint32_t i = 0; i < length / 16; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      uint32_t lane = reinterpret_cast<const uint32_t*>(key)[i * 4 + j];
+      acc[j] += (lane * PRIME32_2);
+      acc[j] = ROTL(acc[j], 13);
+      acc[j] *= PRIME32_1;
+    }
+  }
+
+  int tail = length % 16;
+  if (tail) {
+    uint64_t last_stripe[2];
+    const uint64_t* last_stripe_base =
+        reinterpret_cast<const uint64_t*>(key + length - (length % 16));
+    last_stripe[0] = last_stripe_base[0];
+    uint64_t mask = ~0ULL >> (8 * ((length + 7) / 8 * 8 - length));
+    if (tail <= 8) {
+      last_stripe[1] = 0;
+      last_stripe[0] &= mask;
+    } else {
+      last_stripe[1] = last_stripe_base[1];
+      last_stripe[1] &= mask;
+    }
+    for (int j = 0; j < 4; ++j) {
+      uint32_t lane = reinterpret_cast<const uint32_t*>(last_stripe)[j];
+      acc[j] += (lane * PRIME32_2);
+      acc[j] = ROTL(acc[j], 13);
+      acc[j] *= PRIME32_1;
+    }
+  }
+}
+
+void Hashing::hash_varlen(int64_t hardware_flags, uint32_t num_rows,
+                          const uint32_t* offsets, const uint8_t* concatenated_keys,
+                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                          uint32_t* hashes) {
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    hash_varlen_avx2(num_rows, offsets, concatenated_keys, temp_buffer, hashes);
+  } else {
+#endif
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      uint32_t acc[4];
+      acc[0] = static_cast<uint32_t>(
+          (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) &
+          0xffffffff);
+      acc[1] = PRIME32_2;
+      acc[2] = 0;
+      acc[3] = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1));
+      uint32_t length = offsets[i + 1] - offsets[i];
+      hash_varlen_helper(length, concatenated_keys + offsets[i], acc);
+      hashes[i] = combine_accumulators(acc[0], acc[1], acc[2], acc[3]);
+    }
+    avalanche(hardware_flags, num_rows, hashes);
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_hash.h b/cpp/src/arrow/compute/exec/key_hash.h
new file mode 100644
index 00000000000..7f8ab5185cc
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_hash.h
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_HAVE_AVX2)
+#include <immintrin.h>
+#endif
+
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+// Implementations are based on xxh3 32-bit algorithm description from:
+// https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
+//
+class Hashing {
+ public:
+  static void hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key,
+                         const uint8_t* keys, uint32_t* hashes);
+
+  static void hash_varlen(int64_t hardware_flags, uint32_t num_rows,
+                          const uint32_t* offsets, const uint8_t* concatenated_keys,
+                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                          uint32_t* hashes);
+
+ private:
+  static const uint32_t PRIME32_1 = 0x9E3779B1;  // 0b10011110001101110111100110110001
+  static const uint32_t PRIME32_2 = 0x85EBCA77;  // 0b10000101111010111100101001110111
+  static const uint32_t PRIME32_3 = 0xC2B2AE3D;  // 0b11000010101100101010111000111101
+  static const uint32_t PRIME32_4 = 0x27D4EB2F;  // 0b00100111110101001110101100101111
+  static const uint32_t PRIME32_5 = 0x165667B1;  // 0b00010110010101100110011110110001
+
+  // Avalanche
+  static inline uint32_t avalanche_helper(uint32_t acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void avalanche_avx2(uint32_t num_keys, uint32_t* hashes);
+#endif
+  static void avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes);
+
+  // Accumulator combine
+  static inline uint32_t combine_accumulators(const uint32_t acc1, const uint32_t acc2,
+                                              const uint32_t acc3, const uint32_t acc4);
+#if defined(ARROW_HAVE_AVX2)
+  static inline uint64_t combine_accumulators_avx2(__m256i acc);
+#endif
+
+  // Helpers
+  static inline void helper_8B(uint32_t key_length, uint32_t num_keys,
+                               const uint8_t* keys, uint32_t* hashes);
+  static inline void helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys,
+                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3,
+                                   uint32_t& acc4);
+  static inline uint32_t helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys,
+                                     uint32_t acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void helper_stripes_avx2(uint32_t num_keys, uint32_t key_length,
+                                  const uint8_t* keys, uint32_t* hash);
+  static void helper_tails_avx2(uint32_t num_keys, uint32_t key_length,
+                                const uint8_t* keys, uint32_t* hash);
+#endif
+  static void helper_stripes(int64_t hardware_flags, uint32_t num_keys,
+                             uint32_t key_length, const uint8_t* keys, uint32_t* hash);
+  static void helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length,
+                           const uint8_t* keys, uint32_t* hash);
+
+  static void hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void hash_varlen_avx2(uint32_t num_rows, const uint32_t* offsets,
+                               const uint8_t* concatenated_keys,
+                               uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                               uint32_t* hashes);
+#endif
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_hash_avx2.cc b/cpp/src/arrow/compute/exec/key_hash_avx2.cc
new file mode 100644
index 00000000000..b58db015088
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_hash_avx2.cc
@@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_hash.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+void Hashing::avalanche_avx2(uint32_t num_keys, uint32_t* hashes) {
+  constexpr int unroll = 8;
+  ARROW_DCHECK(num_keys % unroll == 0);
+  for (uint32_t i = 0; i < num_keys / unroll; ++i) {
+    __m256i hash = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + i);
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 15));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_2));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 13));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_3));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 16));
+    _mm256_storeu_si256((reinterpret_cast<__m256i*>(hashes)) + i, hash);
+  }
+}
+
+inline uint64_t Hashing::combine_accumulators_avx2(__m256i acc) {
+  acc = _mm256_or_si256(
+      _mm256_sllv_epi32(acc, _mm256_setr_epi32(1, 7, 12, 18, 1, 7, 12, 18)),
+      _mm256_srlv_epi32(acc, _mm256_setr_epi32(32 - 1, 32 - 7, 32 - 12, 32 - 18, 32 - 1,
+                                               32 - 7, 32 - 12, 32 - 18)));
+  acc = _mm256_add_epi32(acc, _mm256_shuffle_epi32(acc, 0xee));  // 0b11101110
+  acc = _mm256_add_epi32(acc, _mm256_srli_epi64(acc, 32));
+  acc = _mm256_permutevar8x32_epi32(acc, _mm256_setr_epi32(0, 4, 0, 0, 0, 0, 0, 0));
+  uint64_t result = _mm256_extract_epi64(acc, 0);
+  return result;
+}
+
+void Hashing::helper_stripes_avx2(uint32_t num_keys, uint32_t key_length,
+                                  const uint8_t* keys, uint32_t* hash) {
+  constexpr int unroll = 2;
+  ARROW_DCHECK(num_keys % unroll == 0);
+
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+
+  const __m256i mask_last_stripe =
+      (key_length % 16) <= 8
+          ? _mm256_set1_epi8(static_cast<char>(0xffU))
+          : _mm256_cmpgt_epi8(_mm256_set1_epi8(key_length % 16),
+                              _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                                 kByteSequence0To7, kByteSequence8To15));
+
+  // If length modulo stripe length is less than or equal 8, round down to the nearest 16B
+  // boundary (8B ending will be processed in a separate function), otherwise round up.
+  const uint32_t num_stripes = (key_length + 7) / 16;
+  for (uint32_t i = 0; i < num_keys / unroll; ++i) {
+    __m256i acc = _mm256_setr_epi32(
+        static_cast<uint32_t>((static_cast<uint64_t>(PRIME32_1) + PRIME32_2) &
+                              0xffffffff),
+        PRIME32_2, 0, static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)),
+        static_cast<uint32_t>((static_cast<uint64_t>(PRIME32_1) + PRIME32_2) &
+                              0xffffffff),
+        PRIME32_2, 0, static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)));
+    auto key0 = reinterpret_cast<const __m128i*>(keys + key_length * 2 * i);
+    auto key1 = reinterpret_cast<const __m128i*>(keys + key_length * 2 * i + key_length);
+    for (uint32_t stripe = 0; stripe < num_stripes - 1; ++stripe) {
+      auto key_stripe =
+          _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128(key0 + stripe)),
+                                  _mm_loadu_si128(key1 + stripe), 1);
+      acc = _mm256_add_epi32(
+          acc, _mm256_mullo_epi32(key_stripe, _mm256_set1_epi32(PRIME32_2)));
+      acc = _mm256_or_si256(_mm256_slli_epi32(acc, 13), _mm256_srli_epi32(acc, 32 - 13));
+      acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_1));
+    }
+    auto key_stripe = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_loadu_si128(key0 + num_stripes - 1)),
+        _mm_loadu_si128(key1 + num_stripes - 1), 1);
+    key_stripe = _mm256_and_si256(key_stripe, mask_last_stripe);
+    acc = _mm256_add_epi32(acc,
+                           _mm256_mullo_epi32(key_stripe, _mm256_set1_epi32(PRIME32_2)));
+    acc = _mm256_or_si256(_mm256_slli_epi32(acc, 13), _mm256_srli_epi32(acc, 32 - 13));
+    acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_1));
+    uint64_t result = combine_accumulators_avx2(acc);
+    reinterpret_cast<uint64_t*>(hash)[i] = result;
+  }
+}
+
+void Hashing::helper_tails_avx2(uint32_t num_keys, uint32_t key_length,
+                                const uint8_t* keys, uint32_t* hash) {
+  constexpr int unroll = 8;
+  ARROW_DCHECK(num_keys % unroll == 0);
+  auto keys_i64 = reinterpret_cast<arrow::util::int64_for_gather_t*>(keys);
+
+  // Process between 1 and 8 last bytes of each key, starting from 16B boundary.
+  // The caller needs to make sure that there are no more than 8 bytes to process after
+  // that 16B boundary.
+  uint32_t first_offset = key_length - (key_length % 16);
+  __m256i mask = _mm256_set1_epi64x((~0ULL) >> (8 * (8 - (key_length % 16))));
+  __m256i offset =
+      _mm256_setr_epi32(0, key_length, key_length * 2, key_length * 3, key_length * 4,
+                        key_length * 5, key_length * 6, key_length * 7);
+  offset = _mm256_add_epi32(offset, _mm256_set1_epi32(first_offset));
+  __m256i offset_incr = _mm256_set1_epi32(key_length * 8);
+
+  for (uint32_t i = 0; i < num_keys / unroll; ++i) {
+    auto v1 = _mm256_i32gather_epi64(keys_i64, _mm256_castsi256_si128(offset), 1);
+    auto v2 = _mm256_i32gather_epi64(keys_i64, _mm256_extracti128_si256(offset, 1), 1);
+    v1 = _mm256_and_si256(v1, mask);
+    v2 = _mm256_and_si256(v2, mask);
+    v1 = _mm256_permutevar8x32_epi32(v1, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7));
+    v2 = _mm256_permutevar8x32_epi32(v2, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7));
+    auto x1 = _mm256_permute2x128_si256(v1, v2, 0x20);
+    auto x2 = _mm256_permute2x128_si256(v1, v2, 0x31);
+    __m256i acc = _mm256_loadu_si256((reinterpret_cast<const __m256i*>(hash)) + i);
+
+    acc = _mm256_add_epi32(acc, _mm256_mullo_epi32(x1, _mm256_set1_epi32(PRIME32_3)));
+    acc = _mm256_or_si256(_mm256_slli_epi32(acc, 17), _mm256_srli_epi32(acc, 32 - 17));
+    acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_4));
+
+    acc = _mm256_add_epi32(acc, _mm256_mullo_epi32(x2, _mm256_set1_epi32(PRIME32_3)));
+    acc = _mm256_or_si256(_mm256_slli_epi32(acc, 17), _mm256_srli_epi32(acc, 32 - 17));
+    acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_4));
+
+    _mm256_storeu_si256((reinterpret_cast<__m256i*>(hash)) + i, acc);
+
+    offset = _mm256_add_epi32(offset, offset_incr);
+  }
+}
+
+void Hashing::hash_varlen_avx2(uint32_t num_rows, const uint32_t* offsets,
+                               const uint8_t* concatenated_keys,
+                               uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                               uint32_t* hashes) {
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+
+  const __m128i sequence = _mm_set_epi64x(kByteSequence8To15, kByteSequence0To7);
+  const __m128i acc_init = _mm_setr_epi32(
+      static_cast<uint32_t>((static_cast<uint64_t>(PRIME32_1) + PRIME32_2) & 0xffffffff),
+      PRIME32_2, 0, static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)));
+
+  // Variable length keys are always processed as a sequence of 16B stripes,
+  // with the last stripe, if extending past the end of the key, having extra bytes set to
+  // 0 on the fly.
+  for (uint32_t ikey = 0; ikey < num_rows; ++ikey) {
+    uint32_t begin = offsets[ikey];
+    uint32_t end = offsets[ikey + 1];
+    uint32_t length = end - begin;
+    const uint8_t* base = concatenated_keys + begin;
+
+    __m128i acc = acc_init;
+
+    uint32_t i;
+    for (i = 0; i < (length - 1) / 16; ++i) {
+      __m128i key_stripe = _mm_loadu_si128(reinterpret_cast<const __m128i*>(base) + i);
+      acc = _mm_add_epi32(acc, _mm_mullo_epi32(key_stripe, _mm_set1_epi32(PRIME32_2)));
+      acc = _mm_or_si128(_mm_slli_epi32(acc, 13), _mm_srli_epi32(acc, 32 - 13));
+      acc = _mm_mullo_epi32(acc, _mm_set1_epi32(PRIME32_1));
+    }
+    __m128i key_stripe = _mm_loadu_si128(reinterpret_cast<const __m128i*>(base) + i);
+    __m128i mask = _mm_cmpgt_epi8(_mm_set1_epi8(((length - 1) % 16) + 1), sequence);
+    key_stripe = _mm_and_si128(key_stripe, mask);
+    acc = _mm_add_epi32(acc, _mm_mullo_epi32(key_stripe, _mm_set1_epi32(PRIME32_2)));
+    acc = _mm_or_si128(_mm_slli_epi32(acc, 13), _mm_srli_epi32(acc, 32 - 13));
+    acc = _mm_mullo_epi32(acc, _mm_set1_epi32(PRIME32_1));
+
+    _mm_storeu_si128(reinterpret_cast<__m128i*>(temp_buffer) + ikey, acc);
+  }
+
+  // Combine accumulators and perform avalanche
+  constexpr int unroll = 8;
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    __m256i accA =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 0);
+    __m256i accB =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 1);
+    __m256i accC =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 2);
+    __m256i accD =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 3);
+    // Transpose 2x 4x4 32-bit matrices
+    __m256i r0 = _mm256_unpacklo_epi32(accA, accB);
+    __m256i r1 = _mm256_unpackhi_epi32(accA, accB);
+    __m256i r2 = _mm256_unpacklo_epi32(accC, accD);
+    __m256i r3 = _mm256_unpackhi_epi32(accC, accD);
+    accA = _mm256_unpacklo_epi64(r0, r2);
+    accB = _mm256_unpackhi_epi64(r0, r2);
+    accC = _mm256_unpacklo_epi64(r1, r3);
+    accD = _mm256_unpackhi_epi64(r1, r3);
+    // _rotl(accA, 1)
+    // _rotl(accB, 7)
+    // _rotl(accC, 12)
+    // _rotl(accD, 18)
+    accA = _mm256_or_si256(_mm256_slli_epi32(accA, 1), _mm256_srli_epi32(accA, 32 - 1));
+    accB = _mm256_or_si256(_mm256_slli_epi32(accB, 7), _mm256_srli_epi32(accB, 32 - 7));
+    accC = _mm256_or_si256(_mm256_slli_epi32(accC, 12), _mm256_srli_epi32(accC, 32 - 12));
+    accD = _mm256_or_si256(_mm256_slli_epi32(accD, 18), _mm256_srli_epi32(accD, 32 - 18));
+    accA = _mm256_add_epi32(_mm256_add_epi32(accA, accB), _mm256_add_epi32(accC, accD));
+    // avalanche
+    __m256i hash = accA;
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 15));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_2));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 13));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_3));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 16));
+    // Store.
+    // At this point, because of way 2x 4x4 transposition was done, output hashes are in
+    // order: 0, 2, 4, 6, 1, 3, 5, 7. Bring back the original order.
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(hashes) + i,
+        _mm256_permutevar8x32_epi32(hash, _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7)));
+  }
+  // Process the tail of up to 7 hashes
+  for (uint32_t i = num_rows - num_rows % unroll; i < num_rows; ++i) {
+    uint32_t* temp_buffer_base = temp_buffer + i * 4;
+    uint32_t acc = ROTL(temp_buffer_base[0], 1) + ROTL(temp_buffer_base[1], 7) +
+                   ROTL(temp_buffer_base[2], 12) + ROTL(temp_buffer_base[3], 18);
+
+    // avalanche
+    acc ^= (acc >> 15);
+    acc *= PRIME32_2;
+    acc ^= (acc >> 13);
+    acc *= PRIME32_3;
+    acc ^= (acc >> 16);
+
+    hashes[i] = acc;
+  }
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_map.cc b/cpp/src/arrow/compute/exec/key_map.cc
new file mode 100644
index 00000000000..ac47c04403c
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_map.cc
@@ -0,0 +1,610 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_map.h"
+
+#include <memory.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+
+using BitUtil::CountLeadingZeros;
+
+namespace compute {
+
+constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL;
+
+// Search status bytes inside a block of 8 slots (64-bit word).
+// Try to find a slot that contains a 7-bit stamp matching the one provided.
+// There are three possible outcomes:
+// 1. A matching slot is found.
+// -> Return its index between 0 and 7 and set match found flag.
+// 2. A matching slot is not found and there is an empty slot in the block.
+// -> Return the index of the first empty slot and clear match found flag.
+// 3. A matching slot is not found and there are no empty slots in the block.
+// -> Return 8 as the output slot index and clear match found flag.
+//
+// Optionally an index of the first slot to start the search from can be specified.
+// In this case slots before it will be ignored.
+//
+template <bool use_start_slot>
+inline void SwissTable::search_block(uint64_t block, int stamp, int start_slot,
+                                     int* out_slot, int* out_match_found) {
+  // Filled slot bytes have the highest bit set to 0 and empty slots are equal to 0x80.
+  uint64_t block_high_bits = block & kHighBitOfEachByte;
+
+  // Replicate 7-bit stamp to all non-empty slots, leaving zeroes for empty slots.
+  uint64_t stamp_pattern = stamp * ((block_high_bits ^ kHighBitOfEachByte) >> 7);
+
+  // If we xor this pattern with block status bytes we get in individual bytes:
+  // a) 0x00, for filled slots matching the stamp,
+  // b) 0x00 < x < 0x80, for filled slots not matching the stamp,
+  // c) 0x80, for empty slots.
+  uint64_t block_xor_pattern = block ^ stamp_pattern;
+
+  // If we then add 0x7f to every byte, we get:
+  // a) 0x7F
+  // b) 0x80 <= x < 0xFF
+  // c) 0xFF
+  uint64_t match_base = block_xor_pattern + ~kHighBitOfEachByte;
+
+  // The highest bit now tells us if we have a match (0) or not (1).
+  // We will negate the bits so that match is represented by a set bit.
+  uint64_t matches = ~match_base;
+
+  // Clear 7 non-relevant bits in each byte.
+  // Also clear bytes that correspond to slots that we were supposed to
+  // skip due to provided start slot index.
+  // Note: the highest byte corresponds to the first slot.
+  if (use_start_slot) {
+    matches &= kHighBitOfEachByte >> (8 * start_slot);
+  } else {
+    matches &= kHighBitOfEachByte;
+  }
+
+  // We get 0 if there are no matches
+  *out_match_found = (matches == 0 ? 0 : 1);
+
+  // Now if we or with the highest bits of the block and scan zero bits in reverse,
+  // we get 8x slot index that we were looking for.
+  // This formula works in all three cases a), b) and c).
+  *out_slot = static_cast<int>(CountLeadingZeros(matches | block_high_bits) >> 3);
+}
+
+// This call follows the call to search_block.
+// The input slot index is the output returned by it, which is a value from 0 to 8,
+// with 8 indicating that both: no match was found and there were no empty slots.
+//
+// If the slot corresponds to a non-empty slot return a group id associated with it.
+// Otherwise return any group id from any of the slots or
+// zero, which is the default value stored in empty slots.
+//
+inline uint64_t SwissTable::extract_group_id(const uint8_t* block_ptr, int slot,
+                                             uint64_t group_id_mask) {
+  // Input slot can be equal to 8, in which case we need to output any valid group id
+  // value, so we take the one from slot 0 in the block.
+  int clamped_slot = slot & 7;
+
+  // Group id values for all 8 slots in the block are bit-packed and follow the status
+  // bytes. We assume here that the number of bits is rounded up to 8, 16, 32 or 64. In
+  // that case we can extract group id using aligned 64-bit word access.
+  int num_groupid_bits = static_cast<int>(ARROW_POPCOUNT64(group_id_mask));
+  ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
+               num_groupid_bits == 32 || num_groupid_bits == 64);
+
+  int bit_offset = clamped_slot * num_groupid_bits;
+  const uint64_t* group_id_bytes =
+      reinterpret_cast<const uint64_t*>(block_ptr) + 1 + (bit_offset >> 6);
+  uint64_t group_id = (*group_id_bytes >> (bit_offset & 63)) & group_id_mask;
+
+  return group_id;
+}
+
+// Return global slot id (the index including the information about the block)
+// where the search should continue if the first comparison fails.
+// This function always follows search_block and receives the slot id returned by it.
+//
+inline uint64_t SwissTable::next_slot_to_visit(uint64_t block_index, int slot,
+                                               int match_found) {
+  // The result should be taken modulo the number of all slots in all blocks,
+  // but here we allow it to take a value one above the last slot index.
+  // Modulo operation is postponed to later.
+  return block_index * 8 + slot + match_found;
+}
+
+// Implements first (fast-path, optimistic) lookup.
+// Searches for a match only within the start block and
+// trying only the first slot with a matching stamp.
+//
+// Comparison callback needed for match verification is done outside of this function.
+// Match bit vector filled by it only indicates finding a matching stamp in a slot.
+//
+template <bool use_selection>
+void SwissTable::lookup_1(const uint16_t* selection, const int num_keys,
+                          const uint32_t* hashes, uint8_t* out_match_bitvector,
+                          uint32_t* out_groupids, uint32_t* out_slot_ids) {
+  // Clear the output bit vector
+  memset(out_match_bitvector, 0, (num_keys + 7) / 8);
+
+  // Based on the size of the table, prepare bit number constants.
+  uint32_t stamp_mask = (1 << bits_stamp_) - 1;
+  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint32_t groupid_mask = (1 << num_groupid_bits) - 1;
+
+  for (int i = 0; i < num_keys; ++i) {
+    int id;
+    if (use_selection) {
+      id = util::SafeLoad(&selection[i]);
+    } else {
+      id = i;
+    }
+
+    // Extract from hash: block index and stamp
+    //
+    uint32_t hash = hashes[id];
+    uint32_t iblock = hash >> (bits_hash_ - bits_stamp_ - log_blocks_);
+    uint32_t stamp = iblock & stamp_mask;
+    iblock >>= bits_stamp_;
+
+    uint32_t num_block_bytes = num_groupid_bits + 8;
+    const uint8_t* blockbase = reinterpret_cast<const uint8_t*>(blocks_) +
+                               static_cast<uint64_t>(iblock) * num_block_bytes;
+    uint64_t block = util::SafeLoadAs<uint64_t>(blockbase);
+
+    // Call helper functions to obtain the output triplet:
+    // - match (of a stamp) found flag
+    // - group id for key comparison
+    // - slot to resume search from in case of no match or false positive
+    int match_found;
+    int islot_in_block;
+    search_block<false>(block, stamp, 0, &islot_in_block, &match_found);
+    uint64_t groupid = extract_group_id(blockbase, islot_in_block, groupid_mask);
+    ARROW_DCHECK(groupid < num_inserted_ || num_inserted_ == 0);
+    uint64_t islot = next_slot_to_visit(iblock, islot_in_block, match_found);
+
+    out_match_bitvector[id / 8] |= match_found << (id & 7);
+    util::SafeStore(&out_groupids[id], static_cast<uint32_t>(groupid));
+    util::SafeStore(&out_slot_ids[id], static_cast<uint32_t>(islot));
+  }
+}
+
+// How many groups we can keep in the hash table without the need for resizing.
+// When we reach this limit, we need to break processing of any further rows and resize.
+//
+uint64_t SwissTable::num_groups_for_resize() const {
+  // Resize small hash tables when 50% full (up to 12KB).
+  // Resize large hash tables when 75% full.
+  constexpr int log_blocks_small_ = 9;
+  uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+  if (log_blocks_ <= log_blocks_small_) {
+    return num_slots / 2;
+  } else {
+    return num_slots * 3 / 4;
+  }
+}
+
+uint64_t SwissTable::wrap_global_slot_id(uint64_t global_slot_id) {
+  uint64_t global_slot_id_mask = (1 << (log_blocks_ + 3)) - 1;
+  return global_slot_id & global_slot_id_mask;
+}
+
+// Run a single round of slot search - comparison / insert - filter unprocessed.
+// Update selection vector to reflect which items have been processed.
+// Ids in selection vector do not have to be sorted.
+//
+Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected,
+                            uint16_t* inout_selection, bool* out_need_resize,
+                            uint32_t* out_group_ids, uint32_t* inout_next_slot_ids) {
+  auto num_groups_limit = num_groups_for_resize();
+  ARROW_DCHECK(num_inserted_ < num_groups_limit);
+
+  // Temporary arrays are of limited size.
+  // The input needs to be split into smaller portions if it exceeds that limit.
+  //
+  ARROW_DCHECK(*inout_num_selected <= static_cast<uint32_t>(1 << log_minibatch_));
+
+  // We will split input row ids into three categories:
+  // - needing to visit next block [0]
+  // - needing comparison [1]
+  // - inserted [2]
+  //
+  auto ids_inserted_buf =
+      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected);
+  auto ids_for_comparison_buf =
+      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected);
+  constexpr int category_nomatch = 0;
+  constexpr int category_cmp = 1;
+  constexpr int category_inserted = 2;
+  int num_ids[3];
+  num_ids[0] = num_ids[1] = num_ids[2] = 0;
+  uint16_t* ids[3]{inout_selection, ids_for_comparison_buf.mutable_data(),
+                   ids_inserted_buf.mutable_data()};
+  auto push_id = [&num_ids, &ids](int category, int id) {
+    util::SafeStore(&ids[category][num_ids[category]++], static_cast<uint16_t>(id));
+  };
+
+  uint64_t num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint64_t groupid_mask = (1ULL << num_groupid_bits) - 1;
+  constexpr uint64_t stamp_mask = 0x7f;
+  uint64_t num_block_bytes = (8 + num_groupid_bits);
+
+  uint32_t num_processed;
+  for (num_processed = 0;
+       // Second condition in for loop:
+       // We need to break processing and have the caller of this function
+       // resize hash table if we reach the limit of the number of groups present.
+       num_processed < *inout_num_selected &&
+       num_inserted_ + num_ids[category_inserted] < num_groups_limit;
+       ++num_processed) {
+    // row id in original batch
+    int id = util::SafeLoad(&inout_selection[num_processed]);
+
+    uint64_t slot_id = wrap_global_slot_id(util::SafeLoad(&inout_next_slot_ids[id]));
+    uint64_t block_id = slot_id >> 3;
+    uint32_t hash = hashes[id];
+    uint8_t* blockbase = blocks_ + num_block_bytes * block_id;
+    uint64_t block = *reinterpret_cast<uint64_t*>(blockbase);
+    uint64_t stamp = (hash >> (bits_hash_ - log_blocks_ - bits_stamp_)) & stamp_mask;
+    int start_slot = (slot_id & 7);
+
+    bool isempty = (blockbase[7 - start_slot] == 0x80);
+    if (isempty) {
+      // If we reach the empty slot we insert key for new group
+
+      blockbase[7 - start_slot] = static_cast<uint8_t>(stamp);
+      uint32_t group_id = num_inserted_ + num_ids[category_inserted];
+      int groupid_bit_offset = static_cast<int>(start_slot * num_groupid_bits);
+
+      // We assume here that the number of bits is rounded up to 8, 16, 32 or 64.
+      // In that case we can insert group id value using aligned 64-bit word access.
+      ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
+                   num_groupid_bits == 32 || num_groupid_bits == 64);
+      uint64_t* ptr =
+          &reinterpret_cast<uint64_t*>(blockbase + 8)[groupid_bit_offset >> 6];
+      util::SafeStore(ptr, util::SafeLoad(ptr) | (static_cast<uint64_t>(group_id)
+                                                  << (groupid_bit_offset & 63)));
+
+      hashes_[slot_id] = hash;
+      util::SafeStore(&out_group_ids[id], group_id);
+      push_id(category_inserted, id);
+    } else {
+      // We search for a slot with a matching stamp within a single block.
+      // We append row id to the appropriate sequence of ids based on
+      // whether the match has been found or not.
+
+      int new_match_found;
+      int new_slot;
+      search_block<true>(block, static_cast<int>(stamp), start_slot, &new_slot,
+                         &new_match_found);
+      auto new_groupid =
+          static_cast<uint32_t>(extract_group_id(blockbase, new_slot, groupid_mask));
+      ARROW_DCHECK(new_groupid < num_inserted_ + num_ids[category_inserted]);
+      new_slot =
+          static_cast<int>(next_slot_to_visit(block_id, new_slot, new_match_found));
+      util::SafeStore(&inout_next_slot_ids[id], new_slot);
+      util::SafeStore(&out_group_ids[id], new_groupid);
+      push_id(new_match_found, id);
+    }
+  }
+
+  // Copy keys for newly inserted rows using callback
+  RETURN_NOT_OK(append_impl_(num_ids[category_inserted], ids[category_inserted]));
+  num_inserted_ += num_ids[category_inserted];
+
+  // Evaluate comparisons and append ids of rows that failed it to the non-match set.
+  uint32_t num_not_equal;
+  equal_impl_(num_ids[category_cmp], ids[category_cmp], out_group_ids, &num_not_equal,
+              ids[category_nomatch] + num_ids[category_nomatch]);
+  num_ids[category_nomatch] += num_not_equal;
+
+  // Append ids of any unprocessed entries if we aborted processing due to the need
+  // to resize.
+  if (num_processed < *inout_num_selected) {
+    memmove(ids[category_nomatch] + num_ids[category_nomatch],
+            inout_selection + num_processed,
+            sizeof(uint16_t) * (*inout_num_selected - num_processed));
+    num_ids[category_nomatch] += (*inout_num_selected - num_processed);
+  }
+
+  *out_need_resize = (num_inserted_ == num_groups_limit);
+  *inout_num_selected = num_ids[category_nomatch];
+  return Status::OK();
+}
+
+// Use hashes and callbacks to find group ids for already existing keys and
+// to insert and report newly assigned group ids for new keys.
+//
+Status SwissTable::map(const int num_keys, const uint32_t* hashes,
+                       uint32_t* out_groupids) {
+  // Temporary buffers have limited size.
+  // Caller is responsible for splitting larger input arrays into smaller chunks.
+  ARROW_DCHECK(num_keys <= (1 << log_minibatch_));
+
+  // Allocate temporary buffers with a lifetime of this function
+  auto match_bitvector_buf = util::TempVectorHolder<uint8_t>(temp_stack_, num_keys);
+  uint8_t* match_bitvector = match_bitvector_buf.mutable_data();
+  auto slot_ids_buf = util::TempVectorHolder<uint32_t>(temp_stack_, num_keys);
+  uint32_t* slot_ids = slot_ids_buf.mutable_data();
+  auto ids_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys);
+  uint16_t* ids = ids_buf.mutable_data();
+  uint32_t num_ids;
+
+  // First-pass processing.
+  // Optimistically use simplified lookup involving only a start block to find
+  // a single group id candidate for every input.
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags_ & arrow::internal::CpuInfo::AVX2) {
+    if (log_blocks_ <= 4) {
+      int tail = num_keys % 32;
+      int delta = num_keys - tail;
+      lookup_1_avx2_x32(num_keys - tail, hashes, match_bitvector, out_groupids, slot_ids);
+      lookup_1_avx2_x8(tail, hashes + delta, match_bitvector + delta / 8,
+                       out_groupids + delta, slot_ids + delta);
+    } else {
+      lookup_1_avx2_x8(num_keys, hashes, match_bitvector, out_groupids, slot_ids);
+    }
+  } else {
+#endif
+    lookup_1<false>(nullptr, num_keys, hashes, match_bitvector, out_groupids, slot_ids);
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+
+  int64_t num_matches =
+      arrow::internal::CountSetBits(match_bitvector, /*offset=*/0, num_keys);
+
+  // After the first-pass processing count rows with matches (based on stamp comparison)
+  // and decide based on their percentage whether to call dense or sparse comparison
+  // function. Dense comparison means evaluating it for all inputs, even if the matching
+  // stamp was not found. It may be cheaper to evaluate comparison for all inputs if the
+  // extra cost of filtering is higher than the wasted processing of rows with no match.
+  //
+  // Dense comparison can only be used if there is at least one inserted key,
+  // because otherwise there is no key to compare to.
+  //
+  if (num_inserted_ > 0 && num_matches > 0 && num_matches > 3 * num_keys / 4) {
+    // Dense comparisons
+    equal_impl_(num_keys, nullptr, out_groupids, &num_ids, ids);
+  } else {
+    // Sparse comparisons that involve filtering the input set of keys
+    auto ids_cmp_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys);
+    uint16_t* ids_cmp = ids_cmp_buf.mutable_data();
+    int num_ids_result;
+    util::BitUtil::bits_split_indexes(hardware_flags_, num_keys, match_bitvector,
+                                      &num_ids_result, ids, ids_cmp);
+    num_ids = num_ids_result;
+    uint32_t num_not_equal;
+    equal_impl_(num_keys - num_ids, ids_cmp, out_groupids, &num_not_equal, ids + num_ids);
+    num_ids += num_not_equal;
+  }
+
+  do {
+    // A single round of slow-pass (robust) lookup or insert.
+    // A single round ends with either a single comparison verifying the match candidate
+    // or inserting a new key. A single round of slow-pass may return early if we reach
+    // the limit of the number of groups due to inserts of new keys. In that case we need
+    // to resize and recalculating starting global slot ids for new bigger hash table.
+    bool out_of_capacity;
+    RETURN_NOT_OK(
+        lookup_2(hashes, &num_ids, ids, &out_of_capacity, out_groupids, slot_ids));
+    if (out_of_capacity) {
+      RETURN_NOT_OK(grow_double());
+      // Reset start slot ids for still unprocessed input keys.
+      //
+      for (uint32_t i = 0; i < num_ids; ++i) {
+        // First slot in the new starting block
+        const int16_t id = util::SafeLoad(&ids[i]);
+        util::SafeStore(&slot_ids[id], (hashes[id] >> (bits_hash_ - log_blocks_)) * 8);
+      }
+    }
+  } while (num_ids > 0);
+
+  return Status::OK();
+}
+
+Status SwissTable::grow_double() {
+  // Before and after metadata
+  int num_group_id_bits_before = num_groupid_bits_from_log_blocks(log_blocks_);
+  int num_group_id_bits_after = num_groupid_bits_from_log_blocks(log_blocks_ + 1);
+  uint64_t group_id_mask_before = ~0ULL >> (64 - num_group_id_bits_before);
+  int log_blocks_before = log_blocks_;
+  int log_blocks_after = log_blocks_ + 1;
+  uint64_t block_size_before = (8 + num_group_id_bits_before);
+  uint64_t block_size_after = (8 + num_group_id_bits_after);
+  uint64_t block_size_total_before = (block_size_before << log_blocks_before) + padding_;
+  uint64_t block_size_total_after = (block_size_after << log_blocks_after) + padding_;
+  uint64_t hashes_size_total_before =
+      (bits_hash_ / 8 * (1 << (log_blocks_before + 3))) + padding_;
+  uint64_t hashes_size_total_after =
+      (bits_hash_ / 8 * (1 << (log_blocks_after + 3))) + padding_;
+  constexpr uint32_t stamp_mask = (1 << bits_stamp_) - 1;
+
+  // Allocate new buffers
+  uint8_t* blocks_new;
+  RETURN_NOT_OK(pool_->Allocate(block_size_total_after, &blocks_new));
+  memset(blocks_new, 0, block_size_total_after);
+  uint8_t* hashes_new_8B;
+  uint32_t* hashes_new;
+  RETURN_NOT_OK(pool_->Allocate(hashes_size_total_after, &hashes_new_8B));
+  hashes_new = reinterpret_cast<uint32_t*>(hashes_new_8B);
+
+  // First pass over all old blocks.
+  // Reinsert entries that were not in the overflow block
+  // (block other than selected by hash bits corresponding to the entry).
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    // How many full slots in this block
+    uint8_t* block_base = blocks_ + i * block_size_before;
+    uint8_t* double_block_base_new = blocks_new + 2 * i * block_size_after;
+    uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
+
+    auto full_slots =
+        static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
+    int full_slots_new[2];
+    full_slots_new[0] = full_slots_new[1] = 0;
+    util::SafeStore(double_block_base_new, kHighBitOfEachByte);
+    util::SafeStore(double_block_base_new + block_size_after, kHighBitOfEachByte);
+
+    for (int j = 0; j < full_slots; ++j) {
+      uint64_t slot_id = i * 8 + j;
+      uint32_t hash = hashes_[slot_id];
+      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
+      if (is_overflow_entry) {
+        continue;
+      }
+
+      int ihalf = block_id_new & 1;
+      uint8_t stamp_new =
+          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
+      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
+      uint64_t group_id =
+          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
+           (group_id_bit_offs & 7)) &
+          group_id_mask_before;
+
+      uint64_t slot_id_new = i * 16 + ihalf * 8 + full_slots_new[ihalf];
+      hashes_new[slot_id_new] = hash;
+      uint8_t* block_base_new = double_block_base_new + ihalf * block_size_after;
+      block_base_new[7 - full_slots_new[ihalf]] = stamp_new;
+      int group_id_bit_offs_new = full_slots_new[ihalf] * num_group_id_bits_after;
+      uint64_t* ptr =
+          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr,
+                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
+      full_slots_new[ihalf]++;
+    }
+  }
+
+  // Second pass over all old blocks.
+  // Reinsert entries that were in an overflow block.
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    // How many full slots in this block
+    uint8_t* block_base = blocks_ + i * block_size_before;
+    uint64_t block = util::SafeLoadAs<uint64_t>(block_base);
+    int full_slots = static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
+
+    for (int j = 0; j < full_slots; ++j) {
+      uint64_t slot_id = i * 8 + j;
+      uint32_t hash = hashes_[slot_id];
+      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
+      if (!is_overflow_entry) {
+        continue;
+      }
+
+      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
+      uint64_t group_id =
+          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
+           (group_id_bit_offs & 7)) &
+          group_id_mask_before;
+      uint8_t stamp_new =
+          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
+
+      uint8_t* block_base_new = blocks_new + block_id_new * block_size_after;
+      uint64_t block_new = util::SafeLoadAs<uint64_t>(block_base_new);
+      int full_slots_new =
+          static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
+      while (full_slots_new == 8) {
+        block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1);
+        block_base_new = blocks_new + block_id_new * block_size_after;
+        block_new = util::SafeLoadAs<uint64_t>(block_base_new);
+        full_slots_new =
+            static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
+      }
+
+      hashes_new[block_id_new * 8 + full_slots_new] = hash;
+      block_base_new[7 - full_slots_new] = stamp_new;
+      int group_id_bit_offs_new = full_slots_new * num_group_id_bits_after;
+      uint64_t* ptr =
+          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr,
+                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
+    }
+  }
+
+  pool_->Free(blocks_, block_size_total_before);
+  pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hashes_size_total_before);
+  log_blocks_ = log_blocks_after;
+  blocks_ = blocks_new;
+  hashes_ = hashes_new;
+
+  return Status::OK();
+}
+
+Status SwissTable::init(int64_t hardware_flags, MemoryPool* pool,
+                        util::TempVectorStack* temp_stack, int log_minibatch,
+                        EqualImpl equal_impl, AppendImpl append_impl) {
+  hardware_flags_ = hardware_flags;
+  pool_ = pool;
+  temp_stack_ = temp_stack;
+  log_minibatch_ = log_minibatch;
+  equal_impl_ = equal_impl;
+  append_impl_ = append_impl;
+
+  log_blocks_ = 0;
+  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  num_inserted_ = 0;
+
+  const uint64_t block_bytes = 8 + num_groupid_bits;
+  const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_;
+  RETURN_NOT_OK(pool_->Allocate(slot_bytes, &blocks_));
+
+  // Make sure group ids are initially set to zero for all slots.
+  memset(blocks_, 0, slot_bytes);
+
+  // Initialize all status bytes to represent an empty slot.
+  for (uint64_t i = 0; i < (static_cast<uint64_t>(1) << log_blocks_); ++i) {
+    util::SafeStore(blocks_ + i * block_bytes, kHighBitOfEachByte);
+  }
+
+  uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+  const uint64_t hash_size = sizeof(uint32_t);
+  const uint64_t hash_bytes = hash_size * num_slots + padding_;
+  uint8_t* hashes8;
+  RETURN_NOT_OK(pool_->Allocate(hash_bytes, &hashes8));
+  hashes_ = reinterpret_cast<uint32_t*>(hashes8);
+
+  return Status::OK();
+}
+
+void SwissTable::cleanup() {
+  if (blocks_) {
+    int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+    const uint64_t block_bytes = 8 + num_groupid_bits;
+    const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_;
+    pool_->Free(blocks_, slot_bytes);
+    blocks_ = nullptr;
+  }
+  if (hashes_) {
+    uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+    const uint64_t hash_size = sizeof(uint32_t);
+    const uint64_t hash_bytes = hash_size * num_slots + padding_;
+    pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hash_bytes);
+    hashes_ = nullptr;
+  }
+  log_blocks_ = 0;
+  num_inserted_ = 0;
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_map.h b/cpp/src/arrow/compute/exec/key_map.h
new file mode 100644
index 00000000000..8c472736ec4
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_map.h
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class SwissTable {
+ public:
+  SwissTable() = default;
+  ~SwissTable() { cleanup(); }
+
+  using EqualImpl =
+      std::function<void(int num_keys, const uint16_t* selection /* may be null */,
+                         const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
+                         uint16_t* out_selection_mismatch)>;
+  using AppendImpl = std::function<Status(int num_keys, const uint16_t* selection)>;
+
+  Status init(int64_t hardware_flags, MemoryPool* pool, util::TempVectorStack* temp_stack,
+              int log_minibatch, EqualImpl equal_impl, AppendImpl append_impl);
+  void cleanup();
+
+  Status map(const int ckeys, const uint32_t* hashes, uint32_t* outgroupids);
+
+ private:
+  // Lookup helpers
+
+  /// \brief Scan bytes in block in reverse and stop as soon
+  /// as a position of interest is found.
+  ///
+  /// Positions of interest:
+  /// a) slot with a matching stamp is encountered,
+  /// b) first empty slot is encountered,
+  /// c) we reach the end of the block.
+  ///
+  /// \param[in] block 8 byte block of hash table
+  /// \param[in] stamp 7 bits of hash used as a stamp
+  /// \param[in] start_slot Index of the first slot in the block to start search from.  We
+  ///            assume that this index always points to a non-empty slot, equivalently
+  ///            that it comes before any empty slots.  (Used only by one template
+  ///            variant.)
+  /// \param[out] out_slot index corresponding to the discovered position of interest (8
+  ///            represents end of block).
+  /// \param[out] out_match_found an integer flag (0 or 1) indicating if we found a
+  ///            matching stamp.
+  template <bool use_start_slot>
+  inline void search_block(uint64_t block, int stamp, int start_slot, int* out_slot,
+                           int* out_match_found);
+
+  /// \brief Extract group id for a given slot in a given block.
+  ///
+  /// Group ids follow in memory after 64-bit block data.
+  /// Maximum number of groups inserted is equal to the number
+  /// of all slots in all blocks, which is 8 * the number of blocks.
+  /// Group ids are bit packed using that maximum to determine the necessary number of
+  /// bits.
+  inline uint64_t extract_group_id(const uint8_t* block_ptr, int slot,
+                                   uint64_t group_id_mask);
+
+  inline uint64_t next_slot_to_visit(uint64_t block_index, int slot, int match_found);
+
+  inline void insert(uint8_t* block_base, uint64_t slot_id, uint32_t hash, uint8_t stamp,
+                     uint32_t group_id);
+
+  inline uint64_t num_groups_for_resize() const;
+
+  inline uint64_t wrap_global_slot_id(uint64_t global_slot_id);
+
+  // First hash table access
+  // Find first match in the start block if exists.
+  // Possible cases:
+  // 1. Stamp match in a block
+  // 2. No stamp match in a block, no empty buckets in a block
+  // 3. No stamp match in a block, empty buckets in a block
+  //
+  template <bool use_selection>
+  void lookup_1(const uint16_t* selection, const int num_keys, const uint32_t* hashes,
+                uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                uint32_t* out_slot_ids);
+#if defined(ARROW_HAVE_AVX2)
+  void lookup_1_avx2_x8(const int num_hashes, const uint32_t* hashes,
+                        uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                        uint32_t* out_next_slot_ids);
+  void lookup_1_avx2_x32(const int num_hashes, const uint32_t* hashes,
+                         uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                         uint32_t* out_next_slot_ids);
+#endif
+
+  // Completing hash table lookup post first access
+  Status lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected,
+                  uint16_t* inout_selection, bool* out_need_resize,
+                  uint32_t* out_group_ids, uint32_t* out_next_slot_ids);
+
+  // Resize small hash tables when 50% full (up to 8KB).
+  // Resize large hash tables when 75% full.
+  Status grow_double();
+
+  static int num_groupid_bits_from_log_blocks(int log_blocks) {
+    int required_bits = log_blocks + 3;
+    return required_bits <= 8 ? 8
+                              : required_bits <= 16 ? 16 : required_bits <= 32 ? 32 : 64;
+  }
+
+  // Use 32-bit hash for now
+  static constexpr int bits_hash_ = 32;
+
+  // Number of hash bits stored in slots in a block.
+  // The highest bits of hash determine block id.
+  // The next set of highest bits is a "stamp" stored in a slot in a block.
+  static constexpr int bits_stamp_ = 7;
+
+  // Padding bytes added at the end of buffers for ease of SIMD access
+  static constexpr int padding_ = 64;
+
+  int log_minibatch_;
+  // Base 2 log of the number of blocks
+  int log_blocks_ = 0;
+  // Number of keys inserted into hash table
+  uint32_t num_inserted_ = 0;
+
+  // Data for blocks.
+  // Each block has 8 status bytes for 8 slots, followed by 8 bit packed group ids for
+  // these slots. In 8B status word, the order of bytes is reversed. Group ids are in
+  // normal order. There is 64B padding at the end.
+  //
+  // 0 byte - 7 bucket | 1. byte - 6 bucket | ...
+  // ---------------------------------------------------
+  // |     Empty bit*   |    Empty bit       |
+  // ---------------------------------------------------
+  // |   7-bit hash    |    7-bit hash      |
+  // ---------------------------------------------------
+  // * Empty bucket has value 0x80. Non-empty bucket has highest bit set to 0.
+  //
+  uint8_t* blocks_;
+
+  // Array of hashes of values inserted into slots.
+  // Undefined if the corresponding slot is empty.
+  // There is 64B padding at the end.
+  uint32_t* hashes_;
+
+  int64_t hardware_flags_;
+  MemoryPool* pool_;
+  util::TempVectorStack* temp_stack_;
+
+  EqualImpl equal_impl_;
+  AppendImpl append_impl_;
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_map_avx2.cc b/cpp/src/arrow/compute/exec/key_map_avx2.cc
new file mode 100644
index 00000000000..a2efb4d1bb9
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_map_avx2.cc
@@ -0,0 +1,407 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_map.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+// Why it is OK to round up number of rows internally:
+// All of the buffers: hashes, out_match_bitvector, out_group_ids, out_next_slot_ids
+// are temporary buffers of group id mapping.
+// Temporary buffers are buffers that live only within the boundaries of a single
+// minibatch. Temporary buffers add 64B at the end, so that SIMD code does not have to
+// worry about reading and writing outside of the end of the buffer up to 64B. If the
+// hashes array contains garbage after the last element, it cannot cause computation to
+// fail, since any random data is a valid hash for the purpose of lookup.
+//
+// This is more or less translation of equivalent scalar code, adjusted for a different
+// instruction set (e.g. missing leading zero count instruction).
+//
+void SwissTable::lookup_1_avx2_x8(const int num_hashes, const uint32_t* hashes,
+                                  uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                                  uint32_t* out_next_slot_ids) {
+  // Number of inputs processed together in a loop
+  constexpr int unroll = 8;
+
+  const int num_group_id_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint32_t group_id_mask = ~static_cast<uint32_t>(0) >> (32 - num_group_id_bits);
+  const __m256i* vhash_ptr = reinterpret_cast<const __m256i*>(hashes);
+  const __m256i vstamp_mask = _mm256_set1_epi32((1 << bits_stamp_) - 1);
+
+  // TODO: explain why it is ok to process hashes outside of buffer boundaries
+  for (int i = 0; i < ((num_hashes + unroll - 1) / unroll); ++i) {
+    constexpr uint64_t kEachByteIs8 = 0x0808080808080808ULL;
+    constexpr uint64_t kByteSequenceOfPowersOf2 = 0x8040201008040201ULL;
+
+    // Calculate block index and hash stamp for a byte in a block
+    //
+    __m256i vhash = _mm256_loadu_si256(vhash_ptr + i);
+    __m256i vblock_id = _mm256_srlv_epi32(
+        vhash, _mm256_set1_epi32(bits_hash_ - bits_stamp_ - log_blocks_));
+    __m256i vstamp = _mm256_and_si256(vblock_id, vstamp_mask);
+    vblock_id = _mm256_srli_epi32(vblock_id, bits_stamp_);
+
+    // We now split inputs and process 4 at a time,
+    // in order to process 64-bit blocks
+    //
+    __m256i vblock_offset =
+        _mm256_mullo_epi32(vblock_id, _mm256_set1_epi32(num_group_id_bits + 8));
+    __m256i voffset_A = _mm256_and_si256(vblock_offset, _mm256_set1_epi64x(0xffffffff));
+    __m256i vstamp_A = _mm256_and_si256(vstamp, _mm256_set1_epi64x(0xffffffff));
+    __m256i voffset_B = _mm256_srli_epi64(vblock_offset, 32);
+    __m256i vstamp_B = _mm256_srli_epi64(vstamp, 32);
+
+    auto blocks_i64 = reinterpret_cast<arrow::util::int64_for_gather_t*>(blocks_);
+    auto vblock_A = _mm256_i64gather_epi64(blocks_i64, voffset_A, 1);
+    auto vblock_B = _mm256_i64gather_epi64(blocks_i64, voffset_B, 1);
+    __m256i vblock_highbits_A =
+        _mm256_cmpeq_epi8(vblock_A, _mm256_set1_epi8(static_cast<unsigned char>(0x80)));
+    __m256i vblock_highbits_B =
+        _mm256_cmpeq_epi8(vblock_B, _mm256_set1_epi8(static_cast<unsigned char>(0x80)));
+    __m256i vbyte_repeat_pattern =
+        _mm256_setr_epi64x(0ULL, kEachByteIs8, 0ULL, kEachByteIs8);
+    vstamp_A = _mm256_shuffle_epi8(
+        vstamp_A, _mm256_or_si256(vbyte_repeat_pattern, vblock_highbits_A));
+    vstamp_B = _mm256_shuffle_epi8(
+        vstamp_B, _mm256_or_si256(vbyte_repeat_pattern, vblock_highbits_B));
+    __m256i vmatches_A = _mm256_cmpeq_epi8(vblock_A, vstamp_A);
+    __m256i vmatches_B = _mm256_cmpeq_epi8(vblock_B, vstamp_B);
+    __m256i vmatch_found = _mm256_andnot_si256(
+        _mm256_blend_epi32(_mm256_cmpeq_epi64(vmatches_A, _mm256_setzero_si256()),
+                           _mm256_cmpeq_epi64(vmatches_B, _mm256_setzero_si256()),
+                           0xaa),  // 0b10101010
+        _mm256_set1_epi8(static_cast<unsigned char>(0xff)));
+    vmatches_A =
+        _mm256_sad_epu8(_mm256_and_si256(_mm256_or_si256(vmatches_A, vblock_highbits_A),
+                                         _mm256_set1_epi64x(kByteSequenceOfPowersOf2)),
+                        _mm256_setzero_si256());
+    vmatches_B =
+        _mm256_sad_epu8(_mm256_and_si256(_mm256_or_si256(vmatches_B, vblock_highbits_B),
+                                         _mm256_set1_epi64x(kByteSequenceOfPowersOf2)),
+                        _mm256_setzero_si256());
+    __m256i vmatches = _mm256_or_si256(vmatches_A, _mm256_slli_epi64(vmatches_B, 32));
+
+    // We are now back to processing 8 at a time.
+    // Each lane contains 8-bit bit vector marking slots that are matches.
+    // We need to find leading zeroes count for all slots.
+    //
+    // Emulating lzcnt in lowest bytes of 32-bit elements
+    __m256i vgt = _mm256_cmpgt_epi32(_mm256_set1_epi32(16), vmatches);
+    __m256i vnext_slot_id =
+        _mm256_blendv_epi8(_mm256_srli_epi32(vmatches, 4),
+                           _mm256_and_si256(vmatches, _mm256_set1_epi32(0x0f)), vgt);
+    vnext_slot_id = _mm256_shuffle_epi8(
+        _mm256_setr_epi8(4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2, 2, 1, 1,
+                         1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
+        vnext_slot_id);
+    vnext_slot_id =
+        _mm256_add_epi32(_mm256_and_si256(vnext_slot_id, _mm256_set1_epi32(0xff)),
+                         _mm256_and_si256(vgt, _mm256_set1_epi32(4)));
+
+    // Lookup group ids
+    //
+    __m256i vgroupid_bit_offset =
+        _mm256_mullo_epi32(_mm256_and_si256(vnext_slot_id, _mm256_set1_epi32(7)),
+                           _mm256_set1_epi32(num_group_id_bits));
+
+    // This only works for up to 25 bits per group id, since it uses 32-bit gather
+    // TODO: make sure this will never get called when there are more than 2^25 groups.
+    __m256i vgroupid =
+        _mm256_add_epi32(_mm256_srli_epi32(vgroupid_bit_offset, 3),
+                         _mm256_add_epi32(vblock_offset, _mm256_set1_epi32(8)));
+    vgroupid = _mm256_i32gather_epi32(reinterpret_cast<const int*>(blocks_), vgroupid, 1);
+    vgroupid = _mm256_srlv_epi32(
+        vgroupid, _mm256_and_si256(vgroupid_bit_offset, _mm256_set1_epi32(7)));
+    vgroupid = _mm256_and_si256(vgroupid, _mm256_set1_epi32(group_id_mask));
+
+    // Convert slot id relative to the block to slot id relative to the beginnning of the
+    // table
+    //
+    vnext_slot_id = _mm256_add_epi32(
+        _mm256_add_epi32(vnext_slot_id,
+                         _mm256_and_si256(vmatch_found, _mm256_set1_epi32(1))),
+        _mm256_slli_epi32(vblock_id, 3));
+
+    // Convert match found vector from 32-bit elements to bit vector
+    out_match_bitvector[i] = _pext_u32(_mm256_movemask_epi8(vmatch_found),
+                                       0x11111111);  // 0b00010001 repeated 4x
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_group_ids) + i, vgroupid);
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_next_slot_ids) + i, vnext_slot_id);
+  }
+}
+
+// Take a set of 16 64-bit elements,
+// Output one AVX2 register per byte (0 to 7), containing a sequence of 16 bytes,
+// one from each input 64-bit word, all from the same position in 64-bit word.
+// 16 bytes are replicated in lower and upper half of each output register.
+//
+inline void split_bytes_avx2(__m256i word0, __m256i word1, __m256i word2, __m256i word3,
+                             __m256i& byte0, __m256i& byte1, __m256i& byte2,
+                             __m256i& byte3, __m256i& byte4, __m256i& byte5,
+                             __m256i& byte6, __m256i& byte7) {
+  __m256i word01lo = _mm256_unpacklo_epi8(
+      word0, word1);  // {a0, e0, a1, e1, ... a7, e7, c0, g0, c1, g1, ... c7, g7}
+  __m256i word23lo = _mm256_unpacklo_epi8(
+      word2, word3);  // {i0, m0, i1, m1, ... i7, m7, k0, o0, k1, o1, ... k7, o7}
+  __m256i word01hi = _mm256_unpackhi_epi8(
+      word0, word1);  // {b0, f0, b1, f1, ... b7, f1, d0, h0, d1, h1, ... d7, h7}
+  __m256i word23hi = _mm256_unpackhi_epi8(
+      word2, word3);  // {j0, n0, j1, n1, ... j7, n7, l0, p0, l1, p1, ... l7, p7}
+
+  __m256i a =
+      _mm256_unpacklo_epi16(word01lo, word01hi);  // {a0, e0, b0, f0, ... a3, e3, b3, f3,
+                                                  // c0, g0, d0, h0, ... c3, g3, d3, h3}
+  __m256i b =
+      _mm256_unpacklo_epi16(word23lo, word23hi);  // {i0, m0, j0, n0, ... i3, m3, j3, n3,
+                                                  // k0, o0, l0, p0, ... k3, o3, l3, p3}
+  __m256i c =
+      _mm256_unpackhi_epi16(word01lo, word01hi);  // {a4, e4, b4, f4, ... a7, e7, b7, f7,
+                                                  // c4, g4, d4, h4, ... c7, g7, d7, h7}
+  __m256i d =
+      _mm256_unpackhi_epi16(word23lo, word23hi);  // {i4, m4, j4, n4, ... i7, m7, j7, n7,
+                                                  // k4, o4, l4, p4, ... k7, o7, l7, p7}
+
+  __m256i byte01 = _mm256_unpacklo_epi32(
+      a, b);  // {a0, e0, b0, f0, i0, m0, j0, n0, a1, e1, b1, f1, i1, m1, j1, n1, c0, g0,
+              // d0, h0, k0, o0, l0, p0, ...}
+  __m256i shuffle_const =
+      _mm256_setr_epi8(0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15, 0, 2, 8, 10,
+                       1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15);
+  byte01 = _mm256_permute4x64_epi64(
+      byte01, 0xd8);  // 11011000 b - swapping middle two 64-bit elements
+  byte01 = _mm256_shuffle_epi8(byte01, shuffle_const);
+  __m256i byte23 = _mm256_unpackhi_epi32(a, b);
+  byte23 = _mm256_permute4x64_epi64(byte23, 0xd8);
+  byte23 = _mm256_shuffle_epi8(byte23, shuffle_const);
+  __m256i byte45 = _mm256_unpacklo_epi32(c, d);
+  byte45 = _mm256_permute4x64_epi64(byte45, 0xd8);
+  byte45 = _mm256_shuffle_epi8(byte45, shuffle_const);
+  __m256i byte67 = _mm256_unpackhi_epi32(c, d);
+  byte67 = _mm256_permute4x64_epi64(byte67, 0xd8);
+  byte67 = _mm256_shuffle_epi8(byte67, shuffle_const);
+
+  byte0 = _mm256_permute4x64_epi64(byte01, 0x44);  // 01000100 b
+  byte1 = _mm256_permute4x64_epi64(byte01, 0xee);  // 11101110 b
+  byte2 = _mm256_permute4x64_epi64(byte23, 0x44);  // 01000100 b
+  byte3 = _mm256_permute4x64_epi64(byte23, 0xee);  // 11101110 b
+  byte4 = _mm256_permute4x64_epi64(byte45, 0x44);  // 01000100 b
+  byte5 = _mm256_permute4x64_epi64(byte45, 0xee);  // 11101110 b
+  byte6 = _mm256_permute4x64_epi64(byte67, 0x44);  // 01000100 b
+  byte7 = _mm256_permute4x64_epi64(byte67, 0xee);  // 11101110 b
+}
+
+// This one can only process a multiple of 32 values.
+// The caller needs to process the remaining tail, if the input is not divisible by 32,
+// using a different method.
+// TODO: Explain the idea behind storing arrays in SIMD registers.
+// Explain why it is faster with SIMD than using memory loads.
+void SwissTable::lookup_1_avx2_x32(const int num_hashes, const uint32_t* hashes,
+                                   uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                                   uint32_t* out_next_slot_ids) {
+  constexpr int unroll = 32;
+
+  // There is a limit on the number of input blocks,
+  // because we want to store all their data in a set of AVX2 registers.
+  ARROW_DCHECK(log_blocks_ <= 4);
+
+  // Remember that block bytes and group id bytes are in opposite orders in memory of hash
+  // table. We put them in the same order.
+  __m256i vblock_byte0, vblock_byte1, vblock_byte2, vblock_byte3, vblock_byte4,
+      vblock_byte5, vblock_byte6, vblock_byte7;
+  __m256i vgroupid_byte0, vgroupid_byte1, vgroupid_byte2, vgroupid_byte3, vgroupid_byte4,
+      vgroupid_byte5, vgroupid_byte6, vgroupid_byte7;
+  // What we output if there is no match in the block
+  __m256i vslot_empty_or_end;
+
+  constexpr uint32_t k4ByteSequence_0_4_8_12 = 0x0c080400;
+  constexpr uint32_t k4ByteSequence_1_5_9_13 = 0x0d090501;
+  constexpr uint32_t k4ByteSequence_2_6_10_14 = 0x0e0a0602;
+  constexpr uint32_t k4ByteSequence_3_7_11_15 = 0x0f0b0703;
+  constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL;
+  constexpr uint64_t kByteSequence7DownTo0 = 0x0001020304050607ULL;
+  constexpr uint64_t kByteSequence15DownTo8 = 0x08090A0B0C0D0E0FULL;
+
+  // Bit unpack group ids into 1B.
+  // Assemble the sequence of block bytes.
+  uint64_t block_bytes[16];
+  uint64_t groupid_bytes[16];
+  const int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint64_t bit_unpack_mask = ((1 << num_groupid_bits) - 1) * kEachByteIs1;
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    uint64_t in_groupids =
+        *reinterpret_cast<const uint64_t*>(blocks_ + (8 + num_groupid_bits) * i + 8);
+    uint64_t in_blockbytes =
+        *reinterpret_cast<const uint64_t*>(blocks_ + (8 + num_groupid_bits) * i);
+    groupid_bytes[i] = _pdep_u64(in_groupids, bit_unpack_mask);
+    block_bytes[i] = in_blockbytes;
+  }
+
+  // Split a sequence of 64-bit words into SIMD vectors holding individual bytes
+  __m256i vblock_words0 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 0);
+  __m256i vblock_words1 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 1);
+  __m256i vblock_words2 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 2);
+  __m256i vblock_words3 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 3);
+  // Reverse the bytes in blocks
+  __m256i vshuffle_const =
+      _mm256_setr_epi64x(kByteSequence7DownTo0, kByteSequence15DownTo8,
+                         kByteSequence7DownTo0, kByteSequence15DownTo8);
+  vblock_words0 = _mm256_shuffle_epi8(vblock_words0, vshuffle_const);
+  vblock_words1 = _mm256_shuffle_epi8(vblock_words1, vshuffle_const);
+  vblock_words2 = _mm256_shuffle_epi8(vblock_words2, vshuffle_const);
+  vblock_words3 = _mm256_shuffle_epi8(vblock_words3, vshuffle_const);
+  split_bytes_avx2(vblock_words0, vblock_words1, vblock_words2, vblock_words3,
+                   vblock_byte0, vblock_byte1, vblock_byte2, vblock_byte3, vblock_byte4,
+                   vblock_byte5, vblock_byte6, vblock_byte7);
+  split_bytes_avx2(
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 0),
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 1),
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 2),
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 3),
+      vgroupid_byte0, vgroupid_byte1, vgroupid_byte2, vgroupid_byte3, vgroupid_byte4,
+      vgroupid_byte5, vgroupid_byte6, vgroupid_byte7);
+
+  // Calculate the slot to output when there is no match in a block.
+  // It will be the index of the first empty slot or 8 (the number of slots in block)
+  // if there are no empty slots.
+  vslot_empty_or_end = _mm256_set1_epi8(8);
+  {
+    __m256i vis_empty;
+#define CMP(VBLOCKBYTE, BYTENUM)                                                         \
+  vis_empty =                                                                            \
+      _mm256_cmpeq_epi8(VBLOCKBYTE, _mm256_set1_epi8(static_cast<unsigned char>(0x80))); \
+  vslot_empty_or_end =                                                                   \
+      _mm256_blendv_epi8(vslot_empty_or_end, _mm256_set1_epi8(BYTENUM), vis_empty);
+    CMP(vblock_byte7, 7);
+    CMP(vblock_byte6, 6);
+    CMP(vblock_byte5, 5);
+    CMP(vblock_byte4, 4);
+    CMP(vblock_byte3, 3);
+    CMP(vblock_byte2, 2);
+    CMP(vblock_byte1, 1);
+    CMP(vblock_byte0, 0);
+#undef CMP
+  }
+
+  const int block_id_mask = (1 << log_blocks_) - 1;
+
+  for (int i = 0; i < num_hashes / unroll; ++i) {
+    __m256i vhash0 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 0);
+    __m256i vhash1 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 1);
+    __m256i vhash2 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 2);
+    __m256i vhash3 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 3);
+
+    // We will get input in byte lanes in the order: [0, 8, 16, 24, 1, 9, 17, 25, 2, 10,
+    // 18, 26, ...]
+    vhash0 = _mm256_or_si256(_mm256_srli_epi32(vhash0, 16),
+                             _mm256_and_si256(vhash2, _mm256_set1_epi32(0xffff0000)));
+    vhash1 = _mm256_or_si256(_mm256_srli_epi32(vhash1, 16),
+                             _mm256_and_si256(vhash3, _mm256_set1_epi32(0xffff0000)));
+    __m256i vstamp_A = _mm256_and_si256(
+        _mm256_srlv_epi32(vhash0, _mm256_set1_epi32(16 - log_blocks_ - 7)),
+        _mm256_set1_epi16(0x7f));
+    __m256i vstamp_B = _mm256_and_si256(
+        _mm256_srlv_epi32(vhash1, _mm256_set1_epi32(16 - log_blocks_ - 7)),
+        _mm256_set1_epi16(0x7f));
+    __m256i vstamp = _mm256_or_si256(vstamp_A, _mm256_slli_epi16(vstamp_B, 8));
+    __m256i vblock_id_A =
+        _mm256_and_si256(_mm256_srlv_epi32(vhash0, _mm256_set1_epi32(16 - log_blocks_)),
+                         _mm256_set1_epi16(block_id_mask));
+    __m256i vblock_id_B =
+        _mm256_and_si256(_mm256_srlv_epi32(vhash1, _mm256_set1_epi32(16 - log_blocks_)),
+                         _mm256_set1_epi16(block_id_mask));
+    __m256i vblock_id = _mm256_or_si256(vblock_id_A, _mm256_slli_epi16(vblock_id_B, 8));
+
+    // Visit all block bytes in reverse order (overwriting data on multiple matches)
+    __m256i vmatch_found = _mm256_setzero_si256();
+    __m256i vslot_id = _mm256_shuffle_epi8(vslot_empty_or_end, vblock_id);
+    __m256i vgroup_id = _mm256_setzero_si256();
+#define CMP(VBLOCK_BYTE, VGROUPID_BYTE, BYTENUM)                                         \
+  {                                                                                      \
+    __m256i vcmp =                                                                       \
+        _mm256_cmpeq_epi8(_mm256_shuffle_epi8(VBLOCK_BYTE, vblock_id), vstamp);          \
+    vmatch_found = _mm256_or_si256(vmatch_found, vcmp);                                  \
+    vgroup_id = _mm256_blendv_epi8(vgroup_id,                                            \
+                                   _mm256_shuffle_epi8(VGROUPID_BYTE, vblock_id), vcmp); \
+    vslot_id = _mm256_blendv_epi8(vslot_id, _mm256_set1_epi8(BYTENUM + 1), vcmp);        \
+  }
+    CMP(vblock_byte7, vgroupid_byte7, 7);
+    CMP(vblock_byte6, vgroupid_byte6, 6);
+    CMP(vblock_byte5, vgroupid_byte5, 5);
+    CMP(vblock_byte4, vgroupid_byte4, 4);
+    CMP(vblock_byte3, vgroupid_byte3, 3);
+    CMP(vblock_byte2, vgroupid_byte2, 2);
+    CMP(vblock_byte1, vgroupid_byte1, 1);
+    CMP(vblock_byte0, vgroupid_byte0, 0);
+#undef CMP
+
+    vslot_id = _mm256_add_epi8(vslot_id, _mm256_slli_epi32(vblock_id, 3));
+    // So far the output is in the order: [0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, ...]
+    vmatch_found = _mm256_shuffle_epi8(
+        vmatch_found,
+        _mm256_setr_epi32(k4ByteSequence_0_4_8_12, k4ByteSequence_1_5_9_13,
+                          k4ByteSequence_2_6_10_14, k4ByteSequence_3_7_11_15,
+                          k4ByteSequence_0_4_8_12, k4ByteSequence_1_5_9_13,
+                          k4ByteSequence_2_6_10_14, k4ByteSequence_3_7_11_15));
+    // Now it is: [0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27, | 4, 5, 6, 7,
+    // 12, 13, 14, 15, ...]
+    vmatch_found = _mm256_permutevar8x32_epi32(vmatch_found,
+                                               _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7));
+
+    reinterpret_cast<uint32_t*>(out_match_bitvector)[i] =
+        _mm256_movemask_epi8(vmatch_found);
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 0,
+                        _mm256_and_si256(vgroup_id, _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 1,
+        _mm256_and_si256(_mm256_srli_epi32(vgroup_id, 8), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 2,
+        _mm256_and_si256(_mm256_srli_epi32(vgroup_id, 16), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 3,
+        _mm256_and_si256(_mm256_srli_epi32(vgroup_id, 24), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 0,
+                        _mm256_and_si256(vslot_id, _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 1,
+        _mm256_and_si256(_mm256_srli_epi32(vslot_id, 8), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 2,
+        _mm256_and_si256(_mm256_srli_epi32(vslot_id, 16), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 3,
+        _mm256_and_si256(_mm256_srli_epi32(vslot_id, 24), _mm256_set1_epi32(0xff)));
+  }
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
new file mode 100644
index 00000000000..101257f5de8
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -0,0 +1,585 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock-matchers.h>
+
+#include <functional>
+#include <memory>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/exec/expression.h"
+#include "arrow/compute/exec/test_util.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/future_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/thread_pool.h"
+#include "arrow/util/vector.h"
+
+using testing::ElementsAre;
+using testing::HasSubstr;
+using testing::Optional;
+using testing::UnorderedElementsAreArray;
+
+namespace arrow {
+
+namespace compute {
+
+TEST(ExecPlanConstruction, Empty) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  ASSERT_THAT(plan->Validate(), Raises(StatusCode::Invalid));
+}
+
+TEST(ExecPlanConstruction, SingleNode) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto node = MakeDummyNode(plan.get(), "dummy", /*inputs=*/{}, /*num_outputs=*/0);
+  ASSERT_OK(plan->Validate());
+  ASSERT_THAT(plan->sources(), ElementsAre(node));
+  ASSERT_THAT(plan->sinks(), ElementsAre(node));
+
+  ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
+  node = MakeDummyNode(plan.get(), "dummy", /*inputs=*/{}, /*num_outputs=*/1);
+  // Output not bound
+  ASSERT_THAT(plan->Validate(), Raises(StatusCode::Invalid));
+}
+
+TEST(ExecPlanConstruction, SourceSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto source = MakeDummyNode(plan.get(), "source", /*inputs=*/{}, /*num_outputs=*/1);
+  auto sink = MakeDummyNode(plan.get(), "sink", /*inputs=*/{source}, /*num_outputs=*/0);
+
+  ASSERT_OK(plan->Validate());
+  EXPECT_THAT(plan->sources(), ElementsAre(source));
+  EXPECT_THAT(plan->sinks(), ElementsAre(sink));
+}
+
+TEST(ExecPlanConstruction, MultipleNode) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto source1 = MakeDummyNode(plan.get(), "source1", /*inputs=*/{}, /*num_outputs=*/2);
+
+  auto source2 = MakeDummyNode(plan.get(), "source2", /*inputs=*/{}, /*num_outputs=*/1);
+
+  auto process1 =
+      MakeDummyNode(plan.get(), "process1", /*inputs=*/{source1}, /*num_outputs=*/2);
+
+  auto process2 = MakeDummyNode(plan.get(), "process1", /*inputs=*/{source1, source2},
+                                /*num_outputs=*/1);
+
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*inputs=*/{process1, process2, process1},
+                    /*num_outputs=*/1);
+
+  auto sink = MakeDummyNode(plan.get(), "sink", /*inputs=*/{process3}, /*num_outputs=*/0);
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_THAT(plan->sources(), ElementsAre(source1, source2));
+  ASSERT_THAT(plan->sinks(), ElementsAre(sink));
+}
+
+struct StartStopTracker {
+  std::vector<std::string> started, stopped;
+
+  StartProducingFunc start_producing_func(Status st = Status::OK()) {
+    return [this, st](ExecNode* node) {
+      started.push_back(node->label());
+      return st;
+    };
+  }
+
+  StopProducingFunc stop_producing_func() {
+    return [this](ExecNode* node) { stopped.push_back(node->label()); };
+  }
+};
+
+TEST(ExecPlan, DummyStartProducing) {
+  StartStopTracker t;
+
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto source1 = MakeDummyNode(plan.get(), "source1", /*inputs=*/{}, /*num_outputs=*/2,
+                               t.start_producing_func(), t.stop_producing_func());
+
+  auto source2 = MakeDummyNode(plan.get(), "source2", /*inputs=*/{}, /*num_outputs=*/1,
+                               t.start_producing_func(), t.stop_producing_func());
+
+  auto process1 =
+      MakeDummyNode(plan.get(), "process1", /*inputs=*/{source1}, /*num_outputs=*/2,
+                    t.start_producing_func(), t.stop_producing_func());
+
+  auto process2 =
+      MakeDummyNode(plan.get(), "process2", /*inputs=*/{process1, source2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
+
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*inputs=*/{process1, source1, process2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
+
+  MakeDummyNode(plan.get(), "sink", /*inputs=*/{process3}, /*num_outputs=*/0,
+                t.start_producing_func(), t.stop_producing_func());
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_EQ(t.started.size(), 0);
+  ASSERT_EQ(t.stopped.size(), 0);
+
+  ASSERT_OK(plan->StartProducing());
+  // Note that any correct reverse topological order may do
+  ASSERT_THAT(t.started, ElementsAre("sink", "process3", "process2", "process1",
+                                     "source2", "source1"));
+
+  plan->StopProducing();
+  ASSERT_THAT(plan->finished(), Finishes(Ok()));
+  // Note that any correct topological order may do
+  ASSERT_THAT(t.stopped, ElementsAre("source1", "source2", "process1", "process2",
+                                     "process3", "sink"));
+
+  ASSERT_THAT(plan->StartProducing(),
+              Raises(StatusCode::Invalid, HasSubstr("restarted")));
+}
+
+TEST(ExecPlan, DummyStartProducingError) {
+  StartStopTracker t;
+
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto source1 = MakeDummyNode(
+      plan.get(), "source1", /*num_inputs=*/{}, /*num_outputs=*/2,
+      t.start_producing_func(Status::NotImplemented("zzz")), t.stop_producing_func());
+
+  auto source2 =
+      MakeDummyNode(plan.get(), "source2", /*num_inputs=*/{}, /*num_outputs=*/1,
+                    t.start_producing_func(), t.stop_producing_func());
+
+  auto process1 = MakeDummyNode(
+      plan.get(), "process1", /*num_inputs=*/{source1}, /*num_outputs=*/2,
+      t.start_producing_func(Status::IOError("xxx")), t.stop_producing_func());
+
+  auto process2 =
+      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/{process1, source2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
+
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/{process1, source1, process2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
+
+  MakeDummyNode(plan.get(), "sink", /*num_inputs=*/{process3}, /*num_outputs=*/0,
+                t.start_producing_func(), t.stop_producing_func());
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_EQ(t.started.size(), 0);
+  ASSERT_EQ(t.stopped.size(), 0);
+
+  // `process1` raises IOError
+  ASSERT_THAT(plan->StartProducing(), Raises(StatusCode::IOError));
+  ASSERT_THAT(t.started, ElementsAre("sink", "process3", "process2", "process1"));
+  // Nodes that started successfully were stopped in reverse order
+  ASSERT_THAT(t.stopped, ElementsAre("process2", "process3", "sink"));
+}
+
+namespace {
+
+struct BatchesWithSchema {
+  std::vector<ExecBatch> batches;
+  std::shared_ptr<Schema> schema;
+};
+
+Result<ExecNode*> MakeTestSourceNode(ExecPlan* plan, std::string label,
+                                     BatchesWithSchema batches_with_schema, bool parallel,
+                                     bool slow) {
+  DCHECK_GT(batches_with_schema.batches.size(), 0);
+
+  auto opt_batches = ::arrow::internal::MapVector(
+      [](ExecBatch batch) { return util::make_optional(std::move(batch)); },
+      std::move(batches_with_schema.batches));
+
+  AsyncGenerator<util::optional<ExecBatch>> gen;
+
+  if (parallel) {
+    // emulate batches completing initial decode-after-scan on a cpu thread
+    ARROW_ASSIGN_OR_RAISE(
+        gen, MakeBackgroundGenerator(MakeVectorIterator(std::move(opt_batches)),
+                                     ::arrow::internal::GetCpuThreadPool()));
+
+    // ensure that callbacks are not executed immediately on a background thread
+    gen = MakeTransferredGenerator(std::move(gen), ::arrow::internal::GetCpuThreadPool());
+  } else {
+    gen = MakeVectorGenerator(std::move(opt_batches));
+  }
+
+  if (slow) {
+    gen = MakeMappedGenerator(std::move(gen), [](const util::optional<ExecBatch>& batch) {
+      SleepABit();
+      return batch;
+    });
+  }
+
+  return MakeSourceNode(plan, label, std::move(batches_with_schema.schema),
+                        std::move(gen));
+}
+
+Future<std::vector<ExecBatch>> StartAndCollect(
+    ExecPlan* plan, AsyncGenerator<util::optional<ExecBatch>> gen) {
+  RETURN_NOT_OK(plan->Validate());
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto collected_fut = CollectAsyncGenerator(gen);
+
+  return AllComplete({plan->finished(), Future<>(collected_fut)})
+      .Then([collected_fut]() -> Result<std::vector<ExecBatch>> {
+        ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result());
+        return ::arrow::internal::MapVector(
+            [](util::optional<ExecBatch> batch) { return std::move(*batch); },
+            std::move(collected));
+      });
+}
+
+BatchesWithSchema MakeBasicBatches() {
+  BatchesWithSchema out;
+  out.batches = {
+      ExecBatchFromJSON({int32(), boolean()}, "[[null, true], [4, false]]"),
+      ExecBatchFromJSON({int32(), boolean()}, "[[5, null], [6, false], [7, false]]")};
+  out.schema = schema({field("i32", int32()), field("bool", boolean())});
+  return out;
+}
+
+BatchesWithSchema MakeRandomBatches(const std::shared_ptr<Schema>& schema,
+                                    int num_batches = 10, int batch_size = 4) {
+  BatchesWithSchema out;
+
+  random::RandomArrayGenerator rng(42);
+  out.batches.resize(num_batches);
+
+  for (int i = 0; i < num_batches; ++i) {
+    out.batches[i] = ExecBatch(*rng.BatchOf(schema->fields(), batch_size));
+    // add a tag scalar to ensure the batches are unique
+    out.batches[i].values.emplace_back(i);
+  }
+  return out;
+}
+}  // namespace
+
+TEST(ExecPlanExecution, SourceSink) {
+  for (bool slow : {false, true}) {
+    SCOPED_TRACE(slow ? "slowed" : "unslowed");
+
+    for (bool parallel : {false, true}) {
+      SCOPED_TRACE(parallel ? "parallel" : "single threaded");
+
+      ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+      auto basic_data = MakeBasicBatches();
+
+      ASSERT_OK_AND_ASSIGN(auto source, MakeTestSourceNode(plan.get(), "source",
+                                                           basic_data, parallel, slow));
+
+      auto sink_gen = MakeSinkNode(source, "sink");
+
+      ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                  Finishes(ResultWith(UnorderedElementsAreArray(basic_data.batches))));
+    }
+  }
+}
+
+TEST(ExecPlanExecution, SourceSinkError) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto basic_data = MakeBasicBatches();
+  auto it = basic_data.batches.begin();
+  AsyncGenerator<util::optional<ExecBatch>> gen =
+      [&]() -> Result<util::optional<ExecBatch>> {
+    if (it == basic_data.batches.end()) {
+      return Status::Invalid("Artificial error");
+    }
+    return util::make_optional(*it++);
+  };
+
+  auto source = MakeSourceNode(plan.get(), "source", {}, gen);
+  auto sink_gen = MakeSinkNode(source, "sink");
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(Raises(StatusCode::Invalid, HasSubstr("Artificial"))));
+}
+
+TEST(ExecPlanExecution, StressSourceSink) {
+  for (bool slow : {false, true}) {
+    SCOPED_TRACE(slow ? "slowed" : "unslowed");
+
+    for (bool parallel : {false, true}) {
+      SCOPED_TRACE(parallel ? "parallel" : "single threaded");
+
+      int num_batches = slow && !parallel ? 30 : 300;
+
+      ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+      auto random_data = MakeRandomBatches(
+          schema({field("a", int32()), field("b", boolean())}), num_batches);
+
+      ASSERT_OK_AND_ASSIGN(auto source, MakeTestSourceNode(plan.get(), "source",
+                                                           random_data, parallel, slow));
+
+      auto sink_gen = MakeSinkNode(source, "sink");
+
+      ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                  Finishes(ResultWith(UnorderedElementsAreArray(random_data.batches))));
+    }
+  }
+}
+
+TEST(ExecPlanExecution, StressSourceSinkStopped) {
+  for (bool slow : {false, true}) {
+    SCOPED_TRACE(slow ? "slowed" : "unslowed");
+
+    for (bool parallel : {false, true}) {
+      SCOPED_TRACE(parallel ? "parallel" : "single threaded");
+
+      int num_batches = slow && !parallel ? 30 : 300;
+
+      ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+      auto random_data = MakeRandomBatches(
+          schema({field("a", int32()), field("b", boolean())}), num_batches);
+
+      ASSERT_OK_AND_ASSIGN(auto source, MakeTestSourceNode(plan.get(), "source",
+                                                           random_data, parallel, slow));
+
+      auto sink_gen = MakeSinkNode(source, "sink");
+
+      ASSERT_OK(plan->Validate());
+      ASSERT_OK(plan->StartProducing());
+
+      EXPECT_THAT(sink_gen(), Finishes(ResultWith(Optional(random_data.batches[0]))));
+
+      plan->StopProducing();
+      ASSERT_THAT(plan->finished(), Finishes(Ok()));
+    }
+  }
+}
+
+TEST(ExecPlanExecution, SourceFilterSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto basic_data = MakeBasicBatches();
+
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto filter, MakeFilterNode(source, "filter", equal(field_ref("i32"), literal(6))));
+
+  auto sink_gen = MakeSinkNode(filter, "sink");
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(
+                  {ExecBatchFromJSON({int32(), boolean()}, "[]"),
+                   ExecBatchFromJSON({int32(), boolean()}, "[[6, false]]")}))));
+}
+
+TEST(ExecPlanExecution, SourceProjectSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto basic_data = MakeBasicBatches();
+
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
+
+  std::vector<Expression> exprs{
+      not_(field_ref("bool")),
+      call("add", {field_ref("i32"), literal(1)}),
+  };
+  for (auto& expr : exprs) {
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*basic_data.schema));
+  }
+
+  ASSERT_OK_AND_ASSIGN(auto projection,
+                       MakeProjectNode(source, "project", exprs, {"!bool", "i32 + 1"}));
+
+  auto sink_gen = MakeSinkNode(projection, "sink");
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(
+                  {ExecBatchFromJSON({boolean(), int32()}, "[[false, null], [true, 5]]"),
+                   ExecBatchFromJSON({boolean(), int32()},
+                                     "[[null, 6], [true, 7], [true, 8]]")}))));
+}
+
+namespace {
+
+BatchesWithSchema MakeGroupableBatches(int multiplicity = 1) {
+  BatchesWithSchema out;
+
+  out.batches = {ExecBatchFromJSON({int32(), utf8()}, R"([
+                   [12, "alfa"],
+                   [7,  "beta"],
+                   [3,  "alfa"]
+                 ])"),
+                 ExecBatchFromJSON({int32(), utf8()}, R"([
+                   [-2, "alfa"],
+                   [-1, "gama"],
+                   [3,  "alfa"]
+                 ])"),
+                 ExecBatchFromJSON({int32(), utf8()}, R"([
+                   [5,  "gama"],
+                   [3,  "beta"],
+                   [-8, "alfa"]
+                 ])")};
+
+  size_t batch_count = out.batches.size();
+  for (int repeat = 1; repeat < multiplicity; ++repeat) {
+    for (size_t i = 0; i < batch_count; ++i) {
+      out.batches.push_back(out.batches[i]);
+    }
+  }
+
+  out.schema = schema({field("i32", int32()), field("str", utf8())});
+
+  return out;
+}
+}  // namespace
+
+TEST(ExecPlanExecution, SourceGroupedSum) {
+  for (bool parallel : {false, true}) {
+    SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
+
+    auto input = MakeGroupableBatches(/*multiplicity=*/parallel ? 100 : 1);
+
+    ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+    ASSERT_OK_AND_ASSIGN(auto source,
+                         MakeTestSourceNode(plan.get(), "source", input,
+                                            /*parallel=*/parallel, /*slow=*/false));
+    ASSERT_OK_AND_ASSIGN(
+        auto gby, MakeGroupByNode(source, "gby", /*keys=*/{"str"}, /*targets=*/{"i32"},
+                                  {{"hash_sum", nullptr}}));
+    auto sink_gen = MakeSinkNode(gby, "sink");
+
+    ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                Finishes(ResultWith(UnorderedElementsAreArray({ExecBatchFromJSON(
+                    {int64(), utf8()},
+                    parallel ? R"([[800, "alfa"], [1000, "beta"], [400, "gama"]])"
+                             : R"([[8, "alfa"], [10, "beta"], [4, "gama"]])")}))));
+  }
+}
+
+TEST(ExecPlanExecution, SourceFilterProjectGroupedSumFilter) {
+  for (bool parallel : {false, true}) {
+    SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
+
+    int batch_multiplicity = parallel ? 100 : 1;
+    auto input = MakeGroupableBatches(/*multiplicity=*/batch_multiplicity);
+
+    ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+    ASSERT_OK_AND_ASSIGN(auto source,
+                         MakeTestSourceNode(plan.get(), "source", input,
+                                            /*parallel=*/parallel, /*slow=*/false));
+    ASSERT_OK_AND_ASSIGN(
+        auto filter,
+        MakeFilterNode(source, "filter", greater_equal(field_ref("i32"), literal(0))));
+
+    ASSERT_OK_AND_ASSIGN(
+        auto projection,
+        MakeProjectNode(filter, "project",
+                        {
+                            field_ref("str"),
+                            call("multiply", {field_ref("i32"), literal(2)}),
+                        }));
+
+    ASSERT_OK_AND_ASSIGN(auto gby, MakeGroupByNode(projection, "gby", /*keys=*/{"str"},
+                                                   /*targets=*/{"multiply(i32, 2)"},
+                                                   {{"hash_sum", nullptr}}));
+
+    ASSERT_OK_AND_ASSIGN(
+        auto having,
+        MakeFilterNode(gby, "having",
+                       greater(field_ref("hash_sum"), literal(10 * batch_multiplicity))));
+
+    auto sink_gen = MakeSinkNode(having, "sink");
+
+    ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                Finishes(ResultWith(UnorderedElementsAreArray({ExecBatchFromJSON(
+                    {int64(), utf8()}, parallel ? R"([[3600, "alfa"], [2000, "beta"]])"
+                                                : R"([[36, "alfa"], [20, "beta"]])")}))));
+  }
+}
+
+TEST(ExecPlanExecution, SourceScalarAggSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto basic_data = MakeBasicBatches();
+
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto scalar_agg,
+      MakeScalarAggregateNode(source, "scalar_agg", {{"sum", nullptr}, {"any", nullptr}},
+                              /*targets=*/{"i32", "bool"},
+                              /*out_field_names=*/{"sum(i32)", "any(bool)"}));
+
+  auto sink_gen = MakeSinkNode(scalar_agg, "sink");
+
+  ASSERT_THAT(
+      StartAndCollect(plan.get(), sink_gen),
+      Finishes(ResultWith(UnorderedElementsAreArray({
+          ExecBatchFromJSON({ValueDescr::Scalar(int64()), ValueDescr::Scalar(boolean())},
+                            "[[22, true]]"),
+      }))));
+}
+
+TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  BatchesWithSchema basic_data;
+  basic_data.batches = {
+      ExecBatchFromJSON({ValueDescr::Scalar(int32()), ValueDescr::Scalar(int32()),
+                         ValueDescr::Scalar(int32())},
+                        "[[5, 5, 5], [5, 5, 5], [5, 5, 5]]"),
+      ExecBatchFromJSON({int32(), int32(), int32()},
+                        "[[5, 5, 5], [6, 6, 6], [7, 7, 7]]")};
+  basic_data.schema =
+      schema({field("a", int32()), field("b", int32()), field("c", int32())});
+
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto scalar_agg,
+      MakeScalarAggregateNode(source, "scalar_agg",
+                              {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}},
+                              {"a", "b", "c"}, {"sum a", "sum b", "sum c"}));
+
+  auto sink_gen = MakeSinkNode(scalar_agg, "sink");
+
+  ASSERT_THAT(
+      StartAndCollect(plan.get(), sink_gen),
+      Finishes(ResultWith(UnorderedElementsAreArray({
+          ExecBatchFromJSON({ValueDescr::Scalar(int64()), ValueDescr::Scalar(int64()),
+                             ValueDescr::Scalar(float64())},
+                            "[[6, 33, 5.5]]"),
+      }))));
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/subtree_internal.h b/cpp/src/arrow/compute/exec/subtree_internal.h
new file mode 100644
index 00000000000..72d419df225
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/subtree_internal.h
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "arrow/compute/exec/expression.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+namespace compute {
+// Helper class for efficiently detecting subtrees given expressions.
+//
+// Using fragment partition expressions as an example:
+// Partition expressions are broken into conjunction members and each member dictionary
+// encoded to impose a sortable ordering. In addition, subtrees are generated which span
+// groups of fragments and nested subtrees. After encoding each fragment is guaranteed to
+// be a descendant of at least one subtree. For example, given fragments in a
+// HivePartitioning with paths:
+//
+//   /num=0/al=eh/dat.par
+//   /num=0/al=be/dat.par
+//   /num=1/al=eh/dat.par
+//   /num=1/al=be/dat.par
+//
+// The following subtrees will be introduced:
+//
+//   /num=0/
+//   /num=0/al=eh/
+//   /num=0/al=eh/dat.par
+//   /num=0/al=be/
+//   /num=0/al=be/dat.par
+//   /num=1/
+//   /num=1/al=eh/
+//   /num=1/al=eh/dat.par
+//   /num=1/al=be/
+//   /num=1/al=be/dat.par
+struct SubtreeImpl {
+  // Each unique conjunction member is mapped to an integer.
+  using expression_code = char32_t;
+  // Partition expressions are mapped to strings of codes; strings give us lexicographic
+  // ordering (and potentially useful optimizations).
+  using expression_codes = std::basic_string<expression_code>;
+  // An encoded guarantee (if index is set) or subtree.
+  struct Encoded {
+    // An external index identifying the corresponding object (e.g. a Fragment) of the
+    // guarantee.
+    util::optional<int> index;
+    // An encoded expression representing a guarantee.
+    expression_codes guarantee;
+  };
+
+  std::unordered_map<compute::Expression, expression_code, compute::Expression::Hash>
+      expr_to_code_;
+  std::vector<compute::Expression> code_to_expr_;
+  std::unordered_set<expression_codes> subtree_exprs_;
+
+  // Encode a subexpression (returning the existing code if possible).
+  expression_code GetOrInsert(const compute::Expression& expr) {
+    auto next_code = static_cast<int>(expr_to_code_.size());
+    auto it_success = expr_to_code_.emplace(expr, next_code);
+
+    if (it_success.second) {
+      code_to_expr_.push_back(expr);
+    }
+    return it_success.first->second;
+  }
+
+  // Encode an expression (recursively breaking up conjunction members if possible).
+  void EncodeConjunctionMembers(const compute::Expression& expr,
+                                expression_codes* codes) {
+    if (auto call = expr.call()) {
+      if (call->function_name == "and_kleene") {
+        // expr is a conjunction, encode its arguments
+        EncodeConjunctionMembers(call->arguments[0], codes);
+        EncodeConjunctionMembers(call->arguments[1], codes);
+        return;
+      }
+    }
+    // expr is not a conjunction, encode it whole
+    codes->push_back(GetOrInsert(expr));
+  }
+
+  // Convert an encoded subtree or guarantee back into an expression.
+  compute::Expression GetSubtreeExpression(const Encoded& encoded_subtree) {
+    // Filters will already be simplified by all of a subtree's ancestors, so
+    // we only need to simplify the filter by the trailing conjunction member
+    // of each subtree.
+    return code_to_expr_[encoded_subtree.guarantee.back()];
+  }
+
+  // Insert subtrees for each component of an encoded partition expression.
+  void GenerateSubtrees(expression_codes guarantee, std::vector<Encoded>* encoded) {
+    while (!guarantee.empty()) {
+      if (subtree_exprs_.insert(guarantee).second) {
+        Encoded encoded_subtree{/*index=*/util::nullopt, guarantee};
+        encoded->push_back(std::move(encoded_subtree));
+      }
+      guarantee.resize(guarantee.size() - 1);
+    }
+  }
+
+  // Encode a guarantee, and generate subtrees for it as well.
+  void EncodeOneGuarantee(int index, const Expression& guarantee,
+                          std::vector<Encoded>* encoded) {
+    Encoded encoded_guarantee{index, {}};
+    EncodeConjunctionMembers(guarantee, &encoded_guarantee.guarantee);
+    GenerateSubtrees(encoded_guarantee.guarantee, encoded);
+    encoded->push_back(std::move(encoded_guarantee));
+  }
+
+  template <typename GetGuarantee>
+  std::vector<Encoded> EncodeGuarantees(const GetGuarantee& get, int count) {
+    std::vector<Encoded> encoded;
+    for (int i = 0; i < count; ++i) {
+      EncodeOneGuarantee(i, get(i), &encoded);
+    }
+    return encoded;
+  }
+
+  // Comparator for sort
+  struct ByGuarantee {
+    bool operator()(const Encoded& l, const Encoded& r) {
+      const auto cmp = l.guarantee.compare(r.guarantee);
+      if (cmp != 0) {
+        return cmp < 0;
+      }
+      // Equal guarantees; sort encodings with indices after encodings without
+      return (l.index ? 1 : 0) < (r.index ? 1 : 0);
+    }
+  };
+
+  // Comparator for building a Forest
+  struct IsAncestor {
+    const std::vector<Encoded> encoded;
+
+    bool operator()(int l, int r) const {
+      if (encoded[l].index) {
+        // Leaf-level object (e.g. a Fragment): not an ancestor.
+        return false;
+      }
+
+      const auto& ancestor = encoded[l].guarantee;
+      const auto& descendant = encoded[r].guarantee;
+
+      if (descendant.size() >= ancestor.size()) {
+        return std::equal(ancestor.begin(), ancestor.end(), descendant.begin());
+      }
+      return false;
+    }
+  };
+};
+
+inline bool operator==(const SubtreeImpl::Encoded& l, const SubtreeImpl::Encoded& r) {
+  return l.index == r.index && l.guarantee == r.guarantee;
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/subtree_test.cc b/cpp/src/arrow/compute/exec/subtree_test.cc
new file mode 100644
index 00000000000..97213104454
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/subtree_test.cc
@@ -0,0 +1,377 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "arrow/compute/exec/forest_internal.h"
+#include "arrow/compute/exec/subtree_internal.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace compute {
+
+using testing::ContainerEq;
+
+// Tests of subtree pruning
+
+// Don't depend on FileSystem - port just enough to be useful here
+struct FileInfo {
+  bool is_dir;
+  std::string path;
+
+  bool operator==(const FileInfo& other) const {
+    return is_dir == other.is_dir && path == other.path;
+  }
+
+  static FileInfo Dir(std::string path) { return FileInfo{true, std::move(path)}; }
+
+  static FileInfo File(std::string path) { return FileInfo{false, std::move(path)}; }
+
+  static bool ByPath(const FileInfo& l, const FileInfo& r) { return l.path < r.path; }
+};
+
+struct TestPathTree {
+  FileInfo info;
+  std::vector<TestPathTree> subtrees;
+
+  explicit TestPathTree(std::string file_path)
+      : info(FileInfo::File(std::move(file_path))) {}
+
+  TestPathTree(std::string dir_path, std::vector<TestPathTree> subtrees)
+      : info(FileInfo::Dir(std::move(dir_path))), subtrees(std::move(subtrees)) {}
+
+  TestPathTree(Forest::Ref ref, const std::vector<FileInfo>& infos) : info(infos[ref.i]) {
+    const Forest& forest = *ref.forest;
+
+    int begin = ref.i + 1;
+    int end = begin + ref.num_descendants();
+
+    for (int i = begin; i < end; ++i) {
+      subtrees.emplace_back(forest[i], infos);
+      i += forest[i].num_descendants();
+    }
+  }
+
+  bool operator==(const TestPathTree& other) const {
+    return info == other.info && subtrees == other.subtrees;
+  }
+
+  std::string ToString() const {
+    auto out = "\n" + info.path;
+    if (info.is_dir) out += "/";
+
+    for (const auto& subtree : subtrees) {
+      out += subtree.ToString();
+    }
+    return out;
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const TestPathTree& tree) {
+    return os << tree.ToString();
+  }
+};
+
+using PT = TestPathTree;
+
+util::string_view RemoveTrailingSlash(util::string_view key) {
+  while (!key.empty() && key.back() == '/') {
+    key.remove_suffix(1);
+  }
+  return key;
+}
+bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) {
+  // See filesystem/path_util.h
+  ancestor = RemoveTrailingSlash(ancestor);
+  if (ancestor == "") return true;
+  descendant = RemoveTrailingSlash(descendant);
+  if (!descendant.starts_with(ancestor)) return false;
+  descendant.remove_prefix(ancestor.size());
+  if (descendant.empty()) return true;
+  return descendant.front() == '/';
+}
+
+Forest MakeForest(std::vector<FileInfo>* infos) {
+  std::sort(infos->begin(), infos->end(), FileInfo::ByPath);
+
+  return Forest(static_cast<int>(infos->size()), [&](int i, int j) {
+    return IsAncestorOf(infos->at(i).path, infos->at(j).path);
+  });
+}
+
+void ExpectForestIs(std::vector<FileInfo> infos, std::vector<PT> expected_roots) {
+  auto forest = MakeForest(&infos);
+
+  std::vector<PT> actual_roots;
+  ASSERT_OK(forest.Visit(
+      [&](Forest::Ref ref) -> Result<bool> {
+        actual_roots.emplace_back(ref, infos);
+        return false;  // only vist roots
+      },
+      [](Forest::Ref) {}));
+
+  // visit expected and assert equality
+  EXPECT_THAT(actual_roots, ContainerEq(expected_roots));
+}
+
+TEST(Forest, Basic) {
+  ExpectForestIs({}, {});
+
+  ExpectForestIs({FileInfo::File("aa")}, {PT("aa")});
+  ExpectForestIs({FileInfo::Dir("AA")}, {PT("AA", {})});
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::File("AA/aa")},
+                 {PT("AA", {PT("AA/aa")})});
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::Dir("AA/BB"), FileInfo::File("AA/BB/0")},
+                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})})});
+
+  // Missing parent can still find ancestor.
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::File("AA/BB/bb")},
+                 {PT("AA", {PT("AA/BB/bb")})});
+
+  // Ancestors should link to parent regardless of ordering.
+  ExpectForestIs({FileInfo::File("AA/aa"), FileInfo::Dir("AA")},
+                 {PT("AA", {PT("AA/aa")})});
+
+  // Multiple roots are supported.
+  ExpectForestIs({FileInfo::File("aa"), FileInfo::File("bb")}, {PT("aa"), PT("bb")});
+  ExpectForestIs({FileInfo::File("00"), FileInfo::Dir("AA"), FileInfo::File("AA/aa"),
+                  FileInfo::File("BB/bb")},
+                 {PT("00"), PT("AA", {PT("AA/aa")}), PT("BB/bb")});
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::Dir("AA/BB"), FileInfo::File("AA/BB/0"),
+                  FileInfo::Dir("CC"), FileInfo::Dir("CC/BB"), FileInfo::File("CC/BB/0")},
+                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})}),
+                  PT("CC", {PT("CC/BB", {PT("CC/BB/0")})})});
+}
+
+TEST(Forest, HourlyETL) {
+  // This test mimics a scenario where an ETL dumps hourly files in a structure
+  // `$year/$month/$day/$hour/*.parquet`.
+  constexpr int64_t kYears = 3;
+  constexpr int64_t kMonthsPerYear = 12;
+  constexpr int64_t kDaysPerMonth = 31;
+  constexpr int64_t kHoursPerDay = 24;
+  constexpr int64_t kFilesPerHour = 2;
+
+  // Avoid constructing strings
+  std::vector<std::string> numbers{kDaysPerMonth + 1};
+  for (size_t i = 0; i < numbers.size(); i++) {
+    numbers[i] = std::to_string(i);
+    if (numbers[i].size() == 1) {
+      numbers[i] = "0" + numbers[i];
+    }
+  }
+
+  auto join = [](const std::vector<std::string>& path) {
+    if (path.empty()) return std::string("");
+    std::string result = path[0];
+    for (const auto& part : path) {
+      result += '/';
+      result += part;
+    }
+    return result;
+  };
+
+  std::vector<FileInfo> infos;
+
+  std::vector<PT> forest;
+  for (int64_t year = 0; year < kYears; year++) {
+    auto year_str = std::to_string(year + 2000);
+    auto year_dir = FileInfo::Dir(year_str);
+    infos.push_back(year_dir);
+
+    std::vector<PT> months;
+    for (int64_t month = 0; month < kMonthsPerYear; month++) {
+      auto month_str = join({year_str, numbers[month + 1]});
+      auto month_dir = FileInfo::Dir(month_str);
+      infos.push_back(month_dir);
+
+      std::vector<PT> days;
+      for (int64_t day = 0; day < kDaysPerMonth; day++) {
+        auto day_str = join({month_str, numbers[day + 1]});
+        auto day_dir = FileInfo::Dir(day_str);
+        infos.push_back(day_dir);
+
+        std::vector<PT> hours;
+        for (int64_t hour = 0; hour < kHoursPerDay; hour++) {
+          auto hour_str = join({day_str, numbers[hour]});
+          auto hour_dir = FileInfo::Dir(hour_str);
+          infos.push_back(hour_dir);
+
+          std::vector<PT> files;
+          for (int64_t file = 0; file < kFilesPerHour; file++) {
+            auto file_str = join({hour_str, numbers[file] + ".parquet"});
+            auto file_fd = FileInfo::File(file_str);
+            infos.push_back(file_fd);
+            files.emplace_back(file_str);
+          }
+
+          auto hour_pt = PT(hour_str, std::move(files));
+          hours.push_back(hour_pt);
+        }
+
+        auto day_pt = PT(day_str, std::move(hours));
+        days.push_back(day_pt);
+      }
+
+      auto month_pt = PT(month_str, std::move(days));
+      months.push_back(month_pt);
+    }
+
+    auto year_pt = PT(year_str, std::move(months));
+    forest.push_back(year_pt);
+  }
+
+  ExpectForestIs(infos, forest);
+}
+
+TEST(Forest, Visit) {
+  using Infos = std::vector<FileInfo>;
+
+  for (auto infos :
+       {Infos{}, Infos{FileInfo::Dir("A"), FileInfo::File("A/a")},
+        Infos{FileInfo::Dir("AA"), FileInfo::Dir("AA/BB"), FileInfo::File("AA/BB/0"),
+              FileInfo::Dir("CC"), FileInfo::Dir("CC/BB"), FileInfo::File("CC/BB/0")}}) {
+    ASSERT_TRUE(std::is_sorted(infos.begin(), infos.end(), FileInfo::ByPath));
+
+    auto forest = MakeForest(&infos);
+
+    auto ignore_post = [](Forest::Ref) {};
+
+    // noop is fine
+    ASSERT_OK(
+        forest.Visit([](Forest::Ref) -> Result<bool> { return false; }, ignore_post));
+
+    // Should propagate failure
+    if (forest.size() != 0) {
+      ASSERT_RAISES(
+          Invalid,
+          forest.Visit([](Forest::Ref) -> Result<bool> { return Status::Invalid(""); },
+                       ignore_post));
+    }
+
+    // Ensure basic visit of all nodes
+    int i = 0;
+    ASSERT_OK(forest.Visit(
+        [&](Forest::Ref ref) -> Result<bool> {
+          EXPECT_EQ(ref.i, i);
+          ++i;
+          return true;
+        },
+        ignore_post));
+
+    // Visit only directories
+    Infos actual_dirs;
+    ASSERT_OK(forest.Visit(
+        [&](Forest::Ref ref) -> Result<bool> {
+          if (!infos[ref.i].is_dir) {
+            return false;
+          }
+          actual_dirs.push_back(infos[ref.i]);
+          return true;
+        },
+        ignore_post));
+
+    Infos expected_dirs;
+    for (const auto& info : infos) {
+      if (info.is_dir) {
+        expected_dirs.push_back(info);
+      }
+    }
+    EXPECT_THAT(actual_dirs, ContainerEq(expected_dirs));
+  }
+}
+
+TEST(Subtree, EncodeExpression) {
+  SubtreeImpl tree;
+  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
+  // Should be idempotent
+  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
+  ASSERT_EQ(equal(field_ref("a"), literal("1")), tree.code_to_expr_[0]);
+
+  SubtreeImpl::expression_codes codes;
+  auto conj =
+      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
+  tree.EncodeConjunctionMembers(conj, &codes);
+  ASSERT_EQ(SubtreeImpl::expression_codes({0, 1}), codes);
+
+  codes.clear();
+  conj = or_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
+  tree.EncodeConjunctionMembers(conj, &codes);
+  ASSERT_EQ(SubtreeImpl::expression_codes({2}), codes);
+}
+
+TEST(Subtree, GetSubtreeExpression) {
+  SubtreeImpl tree;
+  const auto expr_a = equal(field_ref("a"), literal("1"));
+  const auto expr_b = equal(field_ref("b"), literal("2"));
+  const auto code_a = tree.GetOrInsert(expr_a);
+  const auto code_b = tree.GetOrInsert(expr_b);
+  ASSERT_EQ(expr_a,
+            tree.GetSubtreeExpression(SubtreeImpl::Encoded{util::nullopt, {code_a}}));
+  ASSERT_EQ(expr_b, tree.GetSubtreeExpression(
+                        SubtreeImpl::Encoded{util::nullopt, {code_a, code_b}}));
+}
+
+class FakeFragment {
+ public:
+  explicit FakeFragment(Expression partition_expression)
+      : partition_expression_(partition_expression) {}
+  const Expression& partition_expression() const { return partition_expression_; }
+
+ private:
+  Expression partition_expression_;
+};
+
+TEST(Subtree, EncodeFragments) {
+  const auto expr_a =
+      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
+  const auto expr_b =
+      and_(equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3")));
+  std::vector<std::shared_ptr<FakeFragment>> fragments;
+  fragments.push_back(std::make_shared<FakeFragment>(expr_a));
+  fragments.push_back(std::make_shared<FakeFragment>(expr_b));
+
+  SubtreeImpl tree;
+  auto encoded = tree.EncodeGuarantees(
+      [&](int index) { return fragments[index]->partition_expression(); },
+      static_cast<int>(fragments.size()));
+  EXPECT_THAT(
+      tree.code_to_expr_,
+      ContainerEq(std::vector<compute::Expression>{
+          equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")),
+          equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3"))}));
+  EXPECT_THAT(
+      encoded,
+      testing::UnorderedElementsAreArray({
+          SubtreeImpl::Encoded{util::make_optional<int>(0),
+                               SubtreeImpl::expression_codes({0, 1})},
+          SubtreeImpl::Encoded{util::make_optional<int>(1),
+                               SubtreeImpl::expression_codes({2, 3})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0, 1})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2, 3})},
+      }));
+}
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
new file mode 100644
index 00000000000..b47d6087c0b
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/test_util.h"
+
+#include <gmock/gmock-matchers.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/type.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/vector.h"
+
+namespace arrow {
+
+using internal::Executor;
+
+namespace compute {
+namespace {
+
+struct DummyNode : ExecNode {
+  DummyNode(ExecPlan* plan, std::string label, NodeVector inputs, int num_outputs,
+            StartProducingFunc start_producing, StopProducingFunc stop_producing)
+      : ExecNode(plan, std::move(label), std::move(inputs), {}, dummy_schema(),
+                 num_outputs),
+        start_producing_(std::move(start_producing)),
+        stop_producing_(std::move(stop_producing)) {
+    input_labels_.resize(inputs_.size());
+    for (size_t i = 0; i < input_labels_.size(); ++i) {
+      input_labels_[i] = std::to_string(i);
+    }
+  }
+
+  const char* kind_name() override { return "Dummy"; }
+
+  void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override {}
+
+  void ErrorReceived(ExecNode* input, Status error) override {}
+
+  void InputFinished(ExecNode* input, int seq_stop) override {}
+
+  Status StartProducing() override {
+    if (start_producing_) {
+      RETURN_NOT_OK(start_producing_(this));
+    }
+    started_ = true;
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {
+    ASSERT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
+    AssertIsOutput(output);
+  }
+
+  void ResumeProducing(ExecNode* output) override {
+    ASSERT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
+    AssertIsOutput(output);
+  }
+
+  void StopProducing(ExecNode* output) override {
+    EXPECT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
+    AssertIsOutput(output);
+  }
+
+  void StopProducing() override {
+    if (started_) {
+      for (const auto& input : inputs_) {
+        input->StopProducing(this);
+      }
+      if (stop_producing_) {
+        stop_producing_(this);
+      }
+    }
+  }
+
+  Future<> finished() override { return Future<>::MakeFinished(); }
+
+ private:
+  void AssertIsOutput(ExecNode* output) {
+    auto it = std::find(outputs_.begin(), outputs_.end(), output);
+    ASSERT_NE(it, outputs_.end());
+  }
+
+  std::shared_ptr<Schema> dummy_schema() const {
+    return schema({field("dummy", null())});
+  }
+
+  StartProducingFunc start_producing_;
+  StopProducingFunc stop_producing_;
+  std::unordered_set<ExecNode*> requested_stop_;
+  bool started_ = false;
+};
+
+}  // namespace
+
+ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*> inputs,
+                        int num_outputs, StartProducingFunc start_producing,
+                        StopProducingFunc stop_producing) {
+  return plan->EmplaceNode<DummyNode>(plan, std::move(label), std::move(inputs),
+                                      num_outputs, std::move(start_producing),
+                                      std::move(stop_producing));
+}
+
+ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
+                            util::string_view json) {
+  auto fields = ::arrow::internal::MapVector(
+      [](const ValueDescr& descr) { return field("", descr.type); }, descrs);
+
+  ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
+
+  auto value_it = batch.values.begin();
+  for (const auto& descr : descrs) {
+    if (descr.shape == ValueDescr::SCALAR) {
+      if (batch.length == 0) {
+        *value_it = MakeNullScalar(value_it->type());
+      } else {
+        *value_it = value_it->make_array()->GetScalar(0).ValueOrDie();
+      }
+    }
+    ++value_it;
+  }
+
+  return batch;
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/test_util.h b/cpp/src/arrow/compute/exec/test_util.h
new file mode 100644
index 00000000000..faa395bab78
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/test_util.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/testing/visibility.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace compute {
+
+using StartProducingFunc = std::function<Status(ExecNode*)>;
+using StopProducingFunc = std::function<void(ExecNode*)>;
+
+// Make a dummy node that has no execution behaviour
+ARROW_TESTING_EXPORT
+ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*> inputs,
+                        int num_outputs, StartProducingFunc = {}, StopProducingFunc = {});
+
+ARROW_TESTING_EXPORT
+ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
+                            util::string_view json);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/util.cc b/cpp/src/arrow/compute/exec/util.cc
new file mode 100644
index 00000000000..a44676c2f0d
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/util.cc
@@ -0,0 +1,278 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/util.h"
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+
+using BitUtil::CountTrailingZeros;
+
+namespace util {
+
+inline void BitUtil::bits_to_indexes_helper(uint64_t word, uint16_t base_index,
+                                            int* num_indexes, uint16_t* indexes) {
+  int n = *num_indexes;
+  while (word) {
+    indexes[n++] = base_index + static_cast<uint16_t>(CountTrailingZeros(word));
+    word &= word - 1;
+  }
+  *num_indexes = n;
+}
+
+inline void BitUtil::bits_filter_indexes_helper(uint64_t word,
+                                                const uint16_t* input_indexes,
+                                                int* num_indexes, uint16_t* indexes) {
+  int n = *num_indexes;
+  while (word) {
+    indexes[n++] = input_indexes[CountTrailingZeros(word)];
+    word &= word - 1;
+  }
+  *num_indexes = n;
+}
+
+template <int bit_to_search, bool filter_input_indexes>
+void BitUtil::bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes) {
+  // 64 bits at a time
+  constexpr int unroll = 64;
+  int tail = num_bits % unroll;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    if (filter_input_indexes) {
+      bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes,
+                               num_indexes, indexes);
+    } else {
+      bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, indexes);
+    }
+  } else {
+#endif
+    *num_indexes = 0;
+    for (int i = 0; i < num_bits / unroll; ++i) {
+      uint64_t word = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[i]);
+      if (bit_to_search == 0) {
+        word = ~word;
+      }
+      if (filter_input_indexes) {
+        bits_filter_indexes_helper(word, input_indexes + i * 64, num_indexes, indexes);
+      } else {
+        bits_to_indexes_helper(word, i * 64, num_indexes, indexes);
+      }
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+  // Optionally process the last partial word with masking out bits outside range
+  if (tail) {
+    uint64_t word =
+        util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[num_bits / unroll]);
+    if (bit_to_search == 0) {
+      word = ~word;
+    }
+    word &= ~0ULL >> (64 - tail);
+    if (filter_input_indexes) {
+      bits_filter_indexes_helper(word, input_indexes + num_bits - tail, num_indexes,
+                                 indexes);
+    } else {
+      bits_to_indexes_helper(word, num_bits - tail, num_indexes, indexes);
+    }
+  }
+}
+
+void BitUtil::bits_to_indexes(int bit_to_search, int64_t hardware_flags,
+                              const int num_bits, const uint8_t* bits, int* num_indexes,
+                              uint16_t* indexes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    int num_indexes_head = 0;
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_to_indexes(bit_to_search, hardware_flags, bits_in_first_byte,
+                    reinterpret_cast<const uint8_t*>(&bits_head), &num_indexes_head,
+                    indexes);
+    int num_indexes_tail = 0;
+    if (num_bits > bits_in_first_byte) {
+      bits_to_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte,
+                      bits + 1, &num_indexes_tail, indexes + num_indexes_head);
+    }
+    *num_indexes = num_indexes_head + num_indexes_tail;
+    return;
+  }
+
+  if (bit_to_search == 0) {
+    bits_to_indexes_internal<0, false>(hardware_flags, num_bits, bits, nullptr,
+                                       num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_internal<1, false>(hardware_flags, num_bits, bits, nullptr,
+                                       num_indexes, indexes);
+  }
+}
+
+void BitUtil::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+                                  const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* num_indexes,
+                                  uint16_t* indexes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    int num_indexes_head = 0;
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_filter_indexes(bit_to_search, hardware_flags, bits_in_first_byte,
+                        reinterpret_cast<const uint8_t*>(&bits_head), input_indexes,
+                        &num_indexes_head, indexes);
+    int num_indexes_tail = 0;
+    if (num_bits > bits_in_first_byte) {
+      bits_filter_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte,
+                          bits + 1, input_indexes + bits_in_first_byte, &num_indexes_tail,
+                          indexes + num_indexes_head);
+    }
+    *num_indexes = num_indexes_head + num_indexes_tail;
+    return;
+  }
+
+  if (bit_to_search == 0) {
+    bits_to_indexes_internal<0, true>(hardware_flags, num_bits, bits, input_indexes,
+                                      num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_internal<1, true>(hardware_flags, num_bits, bits, input_indexes,
+                                      num_indexes, indexes);
+  }
+}
+
+void BitUtil::bits_split_indexes(int64_t hardware_flags, const int num_bits,
+                                 const uint8_t* bits, int* num_indexes_bit0,
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
+                                 int bit_offset) {
+  bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0,
+                  bit_offset);
+  int num_indexes_bit1;
+  bits_to_indexes(1, hardware_flags, num_bits, bits, &num_indexes_bit1, indexes_bit1,
+                  bit_offset);
+}
+
+void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_to_bytes(hardware_flags, bits_in_first_byte,
+                  reinterpret_cast<const uint8_t*>(&bits_head), bytes);
+    if (num_bits > bits_in_first_byte) {
+      bits_to_bytes(hardware_flags, num_bits - bits_in_first_byte, bits + 1,
+                    bytes + bits_in_first_byte);
+    }
+    return;
+  }
+
+  int num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    // The function call below processes whole 32 bit chunks together.
+    num_processed = num_bits - (num_bits % 32);
+    bits_to_bytes_avx2(num_processed, bits, bytes);
+  }
+#endif
+  // Processing 8 bits at a time
+  constexpr int unroll = 8;
+  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint8_t bits_next = bits[i];
+    // Clear the lowest bit and then make 8 copies of remaining 7 bits, each 7 bits apart
+    // from the previous.
+    uint64_t unpacked = static_cast<uint64_t>(bits_next & 0xfe) *
+                        ((1ULL << 7) | (1ULL << 14) | (1ULL << 21) | (1ULL << 28) |
+                         (1ULL << 35) | (1ULL << 42) | (1ULL << 49));
+    unpacked |= (bits_next & 1);
+    unpacked &= 0x0101010101010101ULL;
+    unpacked *= 255;
+    util::SafeStore(&reinterpret_cast<uint64_t*>(bytes)[i], unpacked);
+  }
+}
+
+void BitUtil::bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bytes, uint8_t* bits, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    uint64_t bits_head;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bytes_to_bits(hardware_flags, bits_in_first_byte, bytes,
+                  reinterpret_cast<uint8_t*>(&bits_head));
+    uint8_t mask = (1 << bit_offset) - 1;
+    *bits = static_cast<uint8_t>((*bits & mask) | (bits_head << bit_offset));
+
+    if (num_bits > bits_in_first_byte) {
+      bytes_to_bits(hardware_flags, num_bits - bits_in_first_byte,
+                    bytes + bits_in_first_byte, bits + 1);
+    }
+    return;
+  }
+
+  int num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    // The function call below processes whole 32 bit chunks together.
+    num_processed = num_bits - (num_bits % 32);
+    bytes_to_bits_avx2(num_processed, bytes, bits);
+  }
+#endif
+  // Process 8 bits at a time
+  constexpr int unroll = 8;
+  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
+    bytes_next &= 0x0101010101010101ULL;
+    bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 28);  // All 8 output bits in the lowest byte
+    bits[i] = static_cast<uint8_t>(bytes_next & 0xff);
+  }
+}
+
+bool BitUtil::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                                 uint32_t num_bytes) {
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    return are_all_bytes_zero_avx2(bytes, num_bytes);
+  }
+#endif
+  uint64_t result_or = 0;
+  uint32_t i;
+  for (i = 0; i < num_bytes / 8; ++i) {
+    uint64_t x = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
+    result_or |= x;
+  }
+  if (num_bytes % 8 > 0) {
+    uint64_t tail = 0;
+    result_or |= memcmp(bytes + i * 8, &tail, num_bytes % 8);
+  }
+  return result_or == 0;
+}
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h
new file mode 100644
index 00000000000..d8248ceacab
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/util.h
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h"
+
+#if defined(__clang__) || defined(__GNUC__)
+#define BYTESWAP(x) __builtin_bswap64(x)
+#define ROTL(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#define BYTESWAP(x) _byteswap_uint64(x)
+#define ROTL(x, n) _rotl((x), (n))
+#endif
+
+namespace arrow {
+namespace util {
+
+// Some platforms typedef int64_t as long int instead of long long int,
+// which breaks the _mm256_i64gather_epi64 and _mm256_i32gather_epi64 intrinsics
+// which need long long.
+// We use the cast to the type below in these intrinsics to make the code
+// compile in all cases.
+//
+using int64_for_gather_t = const long long int;  // NOLINT runtime-int
+
+/// Storage used to allocate temporary vectors of a batch size.
+/// Temporary vectors should resemble allocating temporary variables on the stack
+/// but in the context of vectorized processing where we need to store a vector of
+/// temporaries instead of a single value.
+class TempVectorStack {
+  template <typename>
+  friend class TempVectorHolder;
+
+ public:
+  Status Init(MemoryPool* pool, int64_t size) {
+    num_vectors_ = 0;
+    top_ = 0;
+    buffer_size_ = size;
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
+    buffer_ = std::move(buffer);
+    return Status::OK();
+  }
+
+ private:
+  int64_t PaddedAllocationSize(int64_t num_bytes) {
+    // Round up allocation size to multiple of 8 bytes
+    // to avoid returning temp vectors with unaligned address.
+    //
+    // Also add padding at the end to facilitate loads and stores
+    // using SIMD when number of vector elements is not divisible
+    // by the number of SIMD lanes.
+    //
+    return ::arrow::BitUtil::RoundUp(num_bytes, sizeof(int64_t)) + padding;
+  }
+  void alloc(uint32_t num_bytes, uint8_t** data, int* id) {
+    int64_t old_top = top_;
+    top_ += PaddedAllocationSize(num_bytes);
+    // Stack overflow check
+    ARROW_DCHECK(top_ <= buffer_size_);
+    *data = buffer_->mutable_data() + old_top;
+    *id = num_vectors_++;
+  }
+  void release(int id, uint32_t num_bytes) {
+    ARROW_DCHECK(num_vectors_ == id + 1);
+    int64_t size = PaddedAllocationSize(num_bytes);
+    ARROW_DCHECK(top_ >= size);
+    top_ -= size;
+    --num_vectors_;
+  }
+  static constexpr int64_t padding = 64;
+  int num_vectors_;
+  int64_t top_;
+  std::unique_ptr<Buffer> buffer_;
+  int64_t buffer_size_;
+};
+
+template <typename T>
+class TempVectorHolder {
+  friend class TempVectorStack;
+
+ public:
+  ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); }
+  T* mutable_data() { return reinterpret_cast<T*>(data_); }
+  TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) {
+    stack_ = stack;
+    num_elements_ = num_elements;
+    stack_->alloc(num_elements * sizeof(T), &data_, &id_);
+  }
+
+ private:
+  TempVectorStack* stack_;
+  uint8_t* data_;
+  int id_;
+  uint32_t num_elements_;
+};
+
+class BitUtil {
+ public:
+  static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
+                              const int num_bits, const uint8_t* bits, int* num_indexes,
+                              uint16_t* indexes, int bit_offset = 0);
+
+  static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+                                  const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* num_indexes,
+                                  uint16_t* indexes, int bit_offset = 0);
+
+  // Input and output indexes may be pointing to the same data (in-place filtering).
+  static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
+                                 const uint8_t* bits, int* num_indexes_bit0,
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
+                                 int bit_offset = 0);
+
+  // Bit 1 is replaced with byte 0xFF.
+  static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
+
+  // Return highest bit of each byte.
+  static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
+
+  static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                                 uint32_t num_bytes);
+
+ private:
+  inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index,
+                                            int* num_indexes, uint16_t* indexes);
+  inline static void bits_filter_indexes_helper(uint64_t word,
+                                                const uint16_t* input_indexes,
+                                                int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search, bool filter_input_indexes>
+  static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes);
+
+#if defined(ARROW_HAVE_AVX2)
+  static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
+                                   const uint8_t* bits, int* num_indexes,
+                                   uint16_t* indexes);
+  static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search>
+  static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                       int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search>
+  static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                           const uint16_t* input_indexes,
+                                           int* num_indexes, uint16_t* indexes);
+  static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes);
+  static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits);
+  static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
+#endif
+};
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/util_avx2.cc b/cpp/src/arrow/compute/exec/util_avx2.cc
new file mode 100644
index 00000000000..8cf0104db46
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/util_avx2.cc
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace util {
+
+#if defined(ARROW_HAVE_AVX2)
+
+void BitUtil::bits_to_indexes_avx2(int bit_to_search, const int num_bits,
+                                   const uint8_t* bits, int* num_indexes,
+                                   uint16_t* indexes) {
+  if (bit_to_search == 0) {
+    bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes);
+  }
+}
+
+template <int bit_to_search>
+void BitUtil::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                       int* num_indexes, uint16_t* indexes) {
+  // 64 bits at a time
+  constexpr int unroll = 64;
+
+  // The caller takes care of processing the remaining bits at the end outside of the
+  // multiples of 64
+  ARROW_DCHECK(num_bits % unroll == 0);
+
+  constexpr uint64_t kEachByteIs1 = 0X0101010101010101ULL;
+  constexpr uint64_t kEachByteIs8 = 0x0808080808080808ULL;
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+
+  uint8_t byte_indexes[64];
+  const uint64_t incr = kEachByteIs8;
+  const uint64_t mask = kByteSequence0To7;
+  *num_indexes = 0;
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    uint64_t word = reinterpret_cast<const uint64_t*>(bits)[i];
+    if (bit_to_search == 0) {
+      word = ~word;
+    }
+    uint64_t base = 0;
+    int num_indexes_loop = 0;
+    while (word) {
+      uint64_t byte_indexes_next =
+          _pext_u64(mask, _pdep_u64(word, kEachByteIs1) * 0xff) + base;
+      *reinterpret_cast<uint64_t*>(byte_indexes + num_indexes_loop) = byte_indexes_next;
+      base += incr;
+      num_indexes_loop += static_cast<int>(arrow::BitUtil::PopCount(word & 0xff));
+      word >>= 8;
+    }
+    // Unpack indexes to 16-bits and either add the base of i * 64 or shuffle input
+    // indexes
+    for (int j = 0; j < (num_indexes_loop + 15) / 16; ++j) {
+      __m256i output = _mm256_cvtepi8_epi16(
+          _mm_loadu_si128(reinterpret_cast<const __m128i*>(byte_indexes) + j));
+      output = _mm256_add_epi16(output, _mm256_set1_epi16(i * 64));
+      _mm256_storeu_si256(((__m256i*)(indexes + *num_indexes)) + j, output);
+    }
+    *num_indexes += num_indexes_loop;
+  }
+}
+
+void BitUtil::bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes) {
+  if (bit_to_search == 0) {
+    bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes);
+  } else {
+    bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes);
+  }
+}
+
+template <int bit_to_search>
+void BitUtil::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                           const uint16_t* input_indexes,
+                                           int* out_num_indexes, uint16_t* indexes) {
+  // 64 bits at a time
+  constexpr int unroll = 64;
+
+  // The caller takes care of processing the remaining bits at the end outside of the
+  // multiples of 64
+  ARROW_DCHECK(num_bits % unroll == 0);
+
+  constexpr uint64_t kRepeatedBitPattern0001 = 0x1111111111111111ULL;
+  constexpr uint64_t k4BitSequence0To15 = 0xfedcba9876543210ULL;
+  constexpr uint64_t kByteSequence_0_0_1_1_2_2_3_3 = 0x0303020201010000ULL;
+  constexpr uint64_t kByteSequence_4_4_5_5_6_6_7_7 = 0x0707060605050404ULL;
+  constexpr uint64_t kByteSequence_0_2_4_6_8_10_12_14 = 0x0e0c0a0806040200ULL;
+  constexpr uint64_t kByteSequence_1_3_5_7_9_11_13_15 = 0x0f0d0b0907050301ULL;
+  constexpr uint64_t kByteSequence_0_8_1_9_2_10_3_11 = 0x0b030a0209010800ULL;
+  constexpr uint64_t kByteSequence_4_12_5_13_6_14_7_15 = 0x0f070e060d050c04ULL;
+
+  const uint64_t mask = k4BitSequence0To15;
+  int num_indexes = 0;
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    uint64_t word = reinterpret_cast<const uint64_t*>(bits)[i];
+    if (bit_to_search == 0) {
+      word = ~word;
+    }
+
+    int loop_id = 0;
+    while (word) {
+      uint64_t indexes_4bit =
+          _pext_u64(mask, _pdep_u64(word, kRepeatedBitPattern0001) * 0xf);
+      // Unpack 4 bit indexes to 8 bits
+      __m256i indexes_8bit = _mm256_set1_epi64x(indexes_4bit);
+      indexes_8bit = _mm256_shuffle_epi8(
+          indexes_8bit,
+          _mm256_setr_epi64x(kByteSequence_0_0_1_1_2_2_3_3, kByteSequence_4_4_5_5_6_6_7_7,
+                             kByteSequence_0_0_1_1_2_2_3_3,
+                             kByteSequence_4_4_5_5_6_6_7_7));
+      indexes_8bit = _mm256_blendv_epi8(
+          _mm256_and_si256(indexes_8bit, _mm256_set1_epi8(0x0f)),
+          _mm256_and_si256(_mm256_srli_epi32(indexes_8bit, 4), _mm256_set1_epi8(0x0f)),
+          _mm256_set1_epi16(static_cast<uint16_t>(0xff00)));
+      __m256i input =
+          _mm256_loadu_si256(((const __m256i*)input_indexes) + 4 * i + loop_id);
+      // Shuffle bytes to get low bytes in the first 128-bit lane and high bytes in the
+      // second
+      input = _mm256_shuffle_epi8(
+          input, _mm256_setr_epi64x(
+                     kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15,
+                     kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15));
+      input = _mm256_permute4x64_epi64(input, 0xd8);  // 0b11011000
+      // Apply permutation
+      __m256i output = _mm256_shuffle_epi8(input, indexes_8bit);
+      // Move low and high bytes across 128-bit lanes to assemble back 16-bit indexes.
+      // (This is the reverse of the byte permutation we did on the input)
+      output = _mm256_permute4x64_epi64(output,
+                                        0xd8);  // The reverse of swapping 2nd and 3rd
+                                                // 64-bit element is the same permutation
+      output = _mm256_shuffle_epi8(output,
+                                   _mm256_setr_epi64x(kByteSequence_0_8_1_9_2_10_3_11,
+                                                      kByteSequence_4_12_5_13_6_14_7_15,
+                                                      kByteSequence_0_8_1_9_2_10_3_11,
+                                                      kByteSequence_4_12_5_13_6_14_7_15));
+      _mm256_storeu_si256((__m256i*)(indexes + num_indexes), output);
+      num_indexes += static_cast<int>(arrow::BitUtil::PopCount(word & 0xffff));
+      word >>= 16;
+      ++loop_id;
+    }
+  }
+
+  *out_num_indexes = num_indexes;
+}
+
+void BitUtil::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
+                                 uint8_t* bytes) {
+  constexpr int unroll = 32;
+
+  constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL;
+  constexpr uint64_t kEachByteIs2 = 0x0202020202020202ULL;
+  constexpr uint64_t kEachByteIs3 = 0x0303030303030303ULL;
+  constexpr uint64_t kByteSequencePowersOf2 = 0x8040201008040201ULL;
+
+  // Processing 32 bits at a time
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    __m256i unpacked = _mm256_set1_epi32(reinterpret_cast<const uint32_t*>(bits)[i]);
+    unpacked = _mm256_shuffle_epi8(
+        unpacked, _mm256_setr_epi64x(0ULL, kEachByteIs1, kEachByteIs2, kEachByteIs3));
+    __m256i bits_in_bytes = _mm256_set1_epi64x(kByteSequencePowersOf2);
+    unpacked =
+        _mm256_cmpeq_epi8(bits_in_bytes, _mm256_and_si256(unpacked, bits_in_bytes));
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(bytes) + i, unpacked);
+  }
+}
+
+void BitUtil::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
+                                 uint8_t* bits) {
+  constexpr int unroll = 32;
+  // Processing 32 bits at a time
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    reinterpret_cast<uint32_t*>(bits)[i] = _mm256_movemask_epi8(
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(bytes) + i));
+  }
+}
+
+bool BitUtil::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) {
+  __m256i result_or = _mm256_setzero_si256();
+  uint32_t i;
+  for (i = 0; i < num_bytes / 32; ++i) {
+    __m256i x = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(bytes) + i);
+    result_or = _mm256_or_si256(result_or, x);
+  }
+  uint32_t result_or32 = _mm256_movemask_epi8(result_or);
+  if (num_bytes % 32 > 0) {
+    uint64_t tail[4] = {0, 0, 0, 0};
+    result_or32 |= memcmp(bytes + i * 32, tail, num_bytes % 32);
+  }
+  return result_or32 == 0;
+}
+
+#endif  // ARROW_HAVE_AVX2
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc
index e9bd57596b5..2c145dadaeb 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -31,6 +31,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/registry.h"
 #include "arrow/memory_pool.h"
@@ -50,6 +51,10 @@ using internal::checked_cast;
 namespace compute {
 namespace detail {
 
+using ::arrow::internal::BitmapEquals;
+using ::arrow::internal::CopyBitmap;
+using ::arrow::internal::CountSetBits;
+
 TEST(ExecContext, BasicWorkings) {
   {
     ExecContext ctx;
@@ -58,13 +63,13 @@ TEST(ExecContext, BasicWorkings) {
     ASSERT_EQ(std::numeric_limits<int64_t>::max(), ctx.exec_chunksize());
 
     ASSERT_TRUE(ctx.use_threads());
-    ASSERT_EQ(internal::CpuInfo::GetInstance(), ctx.cpu_info());
+    ASSERT_EQ(arrow::internal::CpuInfo::GetInstance(), ctx.cpu_info());
   }
 
   // Now, let's customize all the things
   LoggingMemoryPool my_pool(default_memory_pool());
   std::unique_ptr<FunctionRegistry> custom_reg = FunctionRegistry::Make();
-  ExecContext ctx(&my_pool, custom_reg.get());
+  ExecContext ctx(&my_pool, /*executor=*/nullptr, custom_reg.get());
 
   ASSERT_EQ(custom_reg.get(), ctx.func_registry());
   ASSERT_EQ(&my_pool, ctx.memory_pool());
@@ -277,9 +282,9 @@ TEST_F(TestPropagateNulls, SingleValueWithNulls) {
 
     ASSERT_EQ(arr->Slice(offset)->null_count(), output.GetNullCount());
 
-    ASSERT_TRUE(internal::BitmapEquals(output.buffers[0]->data(), output.offset,
-                                       sliced->null_bitmap_data(), sliced->offset(),
-                                       output.length));
+    ASSERT_TRUE(BitmapEquals(output.buffers[0]->data(), output.offset,
+                             sliced->null_bitmap_data(), sliced->offset(),
+                             output.length));
     AssertValidityZeroExtraBits(output);
   };
 
@@ -372,8 +377,8 @@ TEST_F(TestPropagateNulls, IntersectsNulls) {
 
     const auto& out_buffer = *output.buffers[0];
 
-    ASSERT_TRUE(internal::BitmapEquals(out_buffer.data(), output_offset, ex_bitmap,
-                                       /*ex_offset=*/0, length));
+    ASSERT_TRUE(BitmapEquals(out_buffer.data(), output_offset, ex_bitmap,
+                             /*ex_offset=*/0, length));
 
     // Now check that the rest of the bits in out_buffer are still 0
     AssertValidityZeroExtraBits(output);
@@ -537,7 +542,7 @@ TEST_F(TestExecBatchIterator, ZeroLengthInputs) {
 // ----------------------------------------------------------------------
 // Scalar function execution
 
-void ExecCopy(KernelContext*, const ExecBatch& batch, Datum* out) {
+Status ExecCopy(KernelContext*, const ExecBatch& batch, Datum* out) {
   DCHECK_EQ(1, batch.num_values());
   const auto& type = checked_cast<const FixedWidthType&>(*batch[0].type());
   int value_size = type.bit_width() / 8;
@@ -547,27 +552,27 @@ void ExecCopy(KernelContext*, const ExecBatch& batch, Datum* out) {
   uint8_t* dst = out_arr->buffers[1]->mutable_data() + out_arr->offset * value_size;
   const uint8_t* src = arg0.buffers[1]->data() + arg0.offset * value_size;
   std::memcpy(dst, src, batch.length * value_size);
+  return Status::OK();
 }
 
-void ExecComputedBitmap(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecComputedBitmap(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // Propagate nulls not used. Check that the out bitmap isn't the same already
   // as the input bitmap
   const ArrayData& arg0 = *batch[0].array();
   ArrayData* out_arr = out->mutable_array();
 
-  if (internal::CountSetBits(arg0.buffers[0]->data(), arg0.offset, batch.length) > 0) {
+  if (CountSetBits(arg0.buffers[0]->data(), arg0.offset, batch.length) > 0) {
     // Check that the bitmap has not been already copied over
-    DCHECK(!internal::BitmapEquals(arg0.buffers[0]->data(), arg0.offset,
-                                   out_arr->buffers[0]->data(), out_arr->offset,
-                                   batch.length));
+    DCHECK(!BitmapEquals(arg0.buffers[0]->data(), arg0.offset,
+                         out_arr->buffers[0]->data(), out_arr->offset, batch.length));
   }
 
-  internal::CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
-                       out_arr->buffers[0]->mutable_data(), out_arr->offset);
-  ExecCopy(ctx, batch, out);
+  CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
+             out_arr->buffers[0]->mutable_data(), out_arr->offset);
+  return ExecCopy(ctx, batch, out);
 }
 
-void ExecNoPreallocatedData(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecNoPreallocatedData(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // Validity preallocated, but not the data
   ArrayData* out_arr = out->mutable_array();
   DCHECK_EQ(0, out_arr->offset);
@@ -575,26 +580,44 @@ void ExecNoPreallocatedData(KernelContext* ctx, const ExecBatch& batch, Datum* o
   int value_size = type.bit_width() / 8;
   Status s = (ctx->Allocate(out_arr->length * value_size).Value(&out_arr->buffers[1]));
   DCHECK_OK(s);
-  ExecCopy(ctx, batch, out);
+  return ExecCopy(ctx, batch, out);
 }
 
-void ExecNoPreallocatedAnything(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecNoPreallocatedAnything(KernelContext* ctx, const ExecBatch& batch,
+                                  Datum* out) {
   // Neither validity nor data preallocated
   ArrayData* out_arr = out->mutable_array();
   DCHECK_EQ(0, out_arr->offset);
   Status s = (ctx->AllocateBitmap(out_arr->length).Value(&out_arr->buffers[0]));
   DCHECK_OK(s);
   const ArrayData& arg0 = *batch[0].array();
-  internal::CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
-                       out_arr->buffers[0]->mutable_data(), /*offset=*/0);
+  CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
+             out_arr->buffers[0]->mutable_data(), /*offset=*/0);
 
   // Reuse the kernel that allocates the data
-  ExecNoPreallocatedData(ctx, batch, out);
+  return ExecNoPreallocatedData(ctx, batch, out);
 }
 
-struct ExampleOptions : public FunctionOptions {
+class ExampleOptionsType : public FunctionOptionsType {
+ public:
+  static const FunctionOptionsType* GetInstance() {
+    static std::unique_ptr<FunctionOptionsType> instance(new ExampleOptionsType());
+    return instance.get();
+  }
+  const char* type_name() const override { return "example"; }
+  std::string Stringify(const FunctionOptions& options) const override {
+    return type_name();
+  }
+  bool Compare(const FunctionOptions& options,
+               const FunctionOptions& other) const override {
+    return true;
+  }
+};
+class ExampleOptions : public FunctionOptions {
+ public:
+  explicit ExampleOptions(std::shared_ptr<Scalar> value)
+      : FunctionOptions(ExampleOptionsType::GetInstance()), value(std::move(value)) {}
   std::shared_ptr<Scalar> value;
-  explicit ExampleOptions(std::shared_ptr<Scalar> value) : value(std::move(value)) {}
 };
 
 struct ExampleState : public KernelState {
@@ -602,12 +625,13 @@ struct ExampleState : public KernelState {
   explicit ExampleState(std::shared_ptr<Scalar> value) : value(std::move(value)) {}
 };
 
-std::unique_ptr<KernelState> InitStateful(KernelContext*, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> InitStateful(KernelContext*,
+                                                  const KernelInitArgs& args) {
   auto func_options = static_cast<const ExampleOptions*>(args.options);
   return std::unique_ptr<KernelState>(new ExampleState{func_options->value});
 }
 
-void ExecStateful(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecStateful(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // We take the value from the state and multiply the data in batch[0] with it
   ExampleState* state = static_cast<ExampleState*>(ctx->state());
   int32_t multiplier = checked_cast<const Int32Scalar&>(*state->value).value;
@@ -619,12 +643,14 @@ void ExecStateful(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   for (int64_t i = 0; i < arg0.length; ++i) {
     dst[i] = arg0_data[i] * multiplier;
   }
+  return Status::OK();
 }
 
-void ExecAddInt32(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecAddInt32(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Int32Scalar& arg0 = batch[0].scalar_as<Int32Scalar>();
   const Int32Scalar& arg1 = batch[1].scalar_as<Int32Scalar>();
   out->value = std::make_shared<Int32Scalar>(arg0.value + arg1.value);
+  return Status::OK();
 }
 
 class TestCallScalarFunction : public TestComputeInternals {
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index c8fc8b8dec0..05d14d03b16 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -21,10 +21,13 @@
 #include <memory>
 #include <sstream>
 
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/util/cpu_info.h"
 
@@ -33,6 +36,38 @@ namespace arrow {
 using internal::checked_cast;
 
 namespace compute {
+Result<std::shared_ptr<Buffer>> FunctionOptionsType::Serialize(
+    const FunctionOptions&) const {
+  return Status::NotImplemented("Serialize for ", type_name());
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsType::Deserialize(
+    const Buffer& buffer) const {
+  return Status::NotImplemented("Deserialize for ", type_name());
+}
+
+std::string FunctionOptions::ToString() const { return options_type()->Stringify(*this); }
+
+bool FunctionOptions::Equals(const FunctionOptions& other) const {
+  if (this == &other) return true;
+  if (options_type() != other.options_type()) return false;
+  return options_type()->Compare(*this, other);
+}
+
+Result<std::shared_ptr<Buffer>> FunctionOptions::Serialize() const {
+  return options_type()->Serialize(*this);
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptions::Deserialize(
+    const std::string& type_name, const Buffer& buffer) {
+  ARROW_ASSIGN_OR_RAISE(auto options,
+                        GetFunctionRegistry()->GetFunctionOptionsType(type_name));
+  return options->Deserialize(buffer);
+}
+
+void PrintTo(const FunctionOptions& options, std::ostream* os) {
+  *os << options.ToString();
+}
 
 static const FunctionDoc kEmptyFunctionDoc{};
 
@@ -179,8 +214,7 @@ Result<Datum> Function::Execute(const std::vector<Datum>& args,
 
   KernelContext kernel_ctx{ctx};
   if (kernel->init) {
-    state = kernel->init(&kernel_ctx, {kernel, inputs, options});
-    RETURN_NOT_OK(kernel_ctx.status());
+    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, inputs, options}));
     kernel_ctx.SetState(state.get());
   }
 
@@ -211,8 +245,9 @@ Status Function::Validate() const {
     if (arity_.is_varargs && arg_count == arity_.num_args + 1) {
       return Status::OK();
     }
-    return Status::Invalid("In function '", name_,
-                           "': ", "number of argument names != function arity");
+    return Status::Invalid(
+        "In function '", name_,
+        "': ", "number of argument names for function documentation != function arity");
   }
   return Status::OK();
 }
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index 9a3e1c1852f..bd854bbb28e 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -29,6 +29,7 @@
 #include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/compare.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -39,12 +40,50 @@ namespace compute {
 ///
 /// @{
 
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+};
+
 /// \brief Base class for specifying options configuring a function's behavior,
 /// such as error handling.
-struct ARROW_EXPORT FunctionOptions {
+class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
+ public:
   virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  using util::EqualityComparable<FunctionOptions>::Equals;
+  using util::EqualityComparable<FunctionOptions>::operator==;
+  using util::EqualityComparable<FunctionOptions>::operator!=;
+  std::string ToString() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.
+  /// Note: this will only look for `type_name` in the default FunctionRegistry;
+  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
+  /// call FunctionOptionsType::Deserialize().
+  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const std::string& type_name, const Buffer& buffer);
+
+ protected:
+  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
+  const FunctionOptionsType* options_type_;
 };
 
+ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
+
 /// \brief Contains the number of required arguments for the function.
 ///
 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
diff --git a/cpp/src/arrow/compute/function_benchmark.cc b/cpp/src/arrow/compute/function_benchmark.cc
index 5dc305bdd89..a29a766be79 100644
--- a/cpp/src/arrow/compute/function_benchmark.cc
+++ b/cpp/src/arrow/compute/function_benchmark.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/compute/api.h"
+#include "arrow/compute/exec_internal.h"
 #include "arrow/memory_pool.h"
 #include "arrow/scalar.h"
 #include "arrow/testing/gtest_util.h"
@@ -78,16 +79,17 @@ void BM_CastDispatchBaseline(benchmark::State& state) {
 
   ExecContext exec_context;
   KernelContext kernel_context(&exec_context);
-  auto cast_state =
-      cast_kernel->init(&kernel_context, {cast_kernel, {double_type}, &cast_options});
-  ABORT_NOT_OK(kernel_context.status());
+  auto cast_state = cast_kernel
+                        ->init(&kernel_context,
+                               KernelInitArgs{cast_kernel, {double_type}, &cast_options})
+                        .ValueOrDie();
   kernel_context.SetState(cast_state.get());
 
   for (auto _ : state) {
     Datum timestamp_scalar = MakeNullScalar(double_type);
     for (Datum int_scalar : int_scalars) {
-      exec(&kernel_context, {{std::move(int_scalar)}, 1}, &timestamp_scalar);
-      ABORT_NOT_OK(kernel_context.status());
+      ABORT_NOT_OK(
+          exec(&kernel_context, {{std::move(int_scalar)}, 1}, &timestamp_scalar));
     }
     benchmark::DoNotOptimize(timestamp_scalar);
   }
@@ -164,8 +166,7 @@ void BM_ExecuteScalarKernelOnScalar(benchmark::State& state) {
     int64_t total = 0;
     for (const auto& scalar : scalars) {
       Datum result{MakeNullScalar(int64())};
-      exec(&kernel_context, ExecBatch{{scalar}, /*length=*/1}, &result);
-      ABORT_NOT_OK(kernel_context.status());
+      ABORT_NOT_OK(exec(&kernel_context, ExecBatch{{scalar}, /*length=*/1}, &result));
       total += result.scalar()->is_valid;
     }
     benchmark::DoNotOptimize(total);
@@ -174,11 +175,44 @@ void BM_ExecuteScalarKernelOnScalar(benchmark::State& state) {
   state.SetItemsProcessed(state.iterations() * N);
 }
 
+void BM_ExecBatchIterator(benchmark::State& state) {
+  // Measure overhead related to splitting ExecBatch into smaller ExecBatches
+  // for parallelism or more optimal CPU cache affinity
+  random::RandomArrayGenerator rag(kSeed);
+
+  const int64_t length = 1 << 20;
+  const int num_fields = 32;
+
+  std::vector<Datum> args(num_fields);
+  for (int i = 0; i < num_fields; ++i) {
+    args[i] = rag.Int64(length, 0, 100)->data();
+  }
+
+  const int64_t blocksize = state.range(0);
+  for (auto _ : state) {
+    std::unique_ptr<detail::ExecBatchIterator> it =
+        *detail::ExecBatchIterator::Make(args, blocksize);
+    ExecBatch batch;
+    while (it->Next(&batch)) {
+      for (int i = 0; i < num_fields; ++i) {
+        auto data = batch.values[i].array()->buffers[1]->data();
+        benchmark::DoNotOptimize(data);
+      }
+    }
+    benchmark::DoNotOptimize(batch);
+  }
+  // Provides comparability across blocksizes by looking at the iterations per
+  // second. So 1000 iterations/second means that input splitting associated
+  // with ExecBatchIterator takes up 1ms every time.
+  state.SetItemsProcessed(state.iterations());
+}
+
 BENCHMARK(BM_CastDispatch);
 BENCHMARK(BM_CastDispatchBaseline);
 BENCHMARK(BM_AddDispatch);
 BENCHMARK(BM_ExecuteScalarFunctionOnScalar);
 BENCHMARK(BM_ExecuteScalarKernelOnScalar);
+BENCHMARK(BM_ExecBatchIterator)->RangeMultiplier(4)->Range(1024, 64 * 1024);
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/function_internal.cc b/cpp/src/arrow/compute/function_internal.cc
new file mode 100644
index 00000000000..0a926e0a39c
--- /dev/null
+++ b/cpp/src/arrow/compute/function_internal.cc
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/function_internal.h"
+
+#include "arrow/array/util.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/registry.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/scalar.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+using ::arrow::internal::checked_cast;
+
+constexpr char kTypeNameField[] = "_type_name";
+
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions& options) {
+  std::vector<std::string> field_names;
+  std::vector<std::shared_ptr<Scalar>> values;
+  const auto* options_type =
+      dynamic_cast<const GenericOptionsType*>(options.options_type());
+  if (!options_type) {
+    return Status::NotImplemented("serializing ", options.type_name(),
+                                  " to StructScalar");
+  }
+  RETURN_NOT_OK(options_type->ToStructScalar(options, &field_names, &values));
+  field_names.push_back(kTypeNameField);
+  const char* options_name = options.type_name();
+  values.emplace_back(
+      new BinaryScalar(Buffer::Wrap(options_name, std::strlen(options_name))));
+  return StructScalar::Make(std::move(values), std::move(field_names));
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar& scalar) {
+  ARROW_ASSIGN_OR_RAISE(auto type_name_holder, scalar.field(kTypeNameField));
+  const std::string type_name =
+      checked_cast<const BinaryScalar&>(*type_name_holder).value->ToString();
+  ARROW_ASSIGN_OR_RAISE(auto raw_options_type,
+                        GetFunctionRegistry()->GetFunctionOptionsType(type_name));
+  const auto* options_type = checked_cast<const GenericOptionsType*>(raw_options_type);
+  return options_type->FromStructScalar(scalar);
+}
+
+Result<std::shared_ptr<Buffer>> GenericOptionsType::Serialize(
+    const FunctionOptions& options) const {
+  ARROW_ASSIGN_OR_RAISE(auto scalar, FunctionOptionsToStructScalar(options));
+  ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*scalar, 1));
+  auto batch =
+      RecordBatch::Make(schema({field("", array->type())}), /*num_rows=*/1, {array});
+  ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create());
+  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(stream, batch->schema()));
+  RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+  RETURN_NOT_OK(writer->Close());
+  return stream->Finish();
+}
+
+Result<std::unique_ptr<FunctionOptions>> GenericOptionsType::Deserialize(
+    const Buffer& buffer) const {
+  return DeserializeFunctionOptions(buffer);
+}
+
+Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(
+    const Buffer& buffer) {
+  io::BufferReader stream(buffer);
+  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::RecordBatchFileReader::Open(&stream));
+  ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(0));
+  if (batch->num_rows() != 1) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a single row - had ",
+        batch->num_rows());
+  }
+  if (batch->num_columns() != 1) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a single column - had ",
+        batch->num_columns());
+  }
+  auto column = batch->column(0);
+  if (column->type()->id() != Type::STRUCT) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a struct column - was ",
+        column->type()->ToString());
+  }
+  ARROW_ASSIGN_OR_RAISE(auto raw_scalar,
+                        checked_cast<const StructArray&>(*column).GetScalar(0));
+  auto scalar = checked_cast<const StructScalar&>(*raw_scalar);
+  return FunctionOptionsFromStructScalar(scalar);
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h
new file mode 100644
index 00000000000..fdd7f09ba1f
--- /dev/null
+++ b/cpp/src/arrow/compute/function_internal.h
@@ -0,0 +1,626 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+template <>
+struct EnumTraits<compute::SortOrder>
+    : BasicEnumTraits<compute::SortOrder, compute::SortOrder::Ascending,
+                      compute::SortOrder::Descending> {
+  static std::string name() { return "SortOrder"; }
+  static std::string value_name(compute::SortOrder value) {
+    switch (value) {
+      case compute::SortOrder::Ascending:
+        return "Ascending";
+      case compute::SortOrder::Descending:
+        return "Descending";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
+namespace compute {
+namespace internal {
+
+using arrow::internal::EnumTraits;
+using arrow::internal::has_enum_traits;
+
+template <typename Enum, typename CType = typename std::underlying_type<Enum>::type>
+Result<Enum> ValidateEnumValue(CType raw) {
+  for (auto valid : EnumTraits<Enum>::values()) {
+    if (raw == static_cast<CType>(valid)) {
+      return static_cast<Enum>(raw);
+    }
+  }
+  return Status::Invalid("Invalid value for ", EnumTraits<Enum>::name(), ": ", raw);
+}
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(const Buffer& buffer);
+
+template <typename T>
+static inline enable_if_t<!has_enum_traits<T>::value, std::string> GenericToString(
+    const T& value) {
+  std::stringstream ss;
+  ss << value;
+  return ss.str();
+}
+
+static inline std::string GenericToString(bool value) { return value ? "true" : "false"; }
+
+static inline std::string GenericToString(const std::string& value) {
+  std::stringstream ss;
+  ss << '"' << value << '"';
+  return ss.str();
+}
+
+template <typename T>
+static inline enable_if_t<has_enum_traits<T>::value, std::string> GenericToString(
+    const T value) {
+  return EnumTraits<T>::value_name(value);
+}
+
+template <typename T>
+static inline std::string GenericToString(const std::shared_ptr<T>& value) {
+  std::stringstream ss;
+  return value ? value->ToString() : "<NULLPTR>";
+}
+
+static inline std::string GenericToString(const std::shared_ptr<Scalar>& value) {
+  std::stringstream ss;
+  ss << value->type->ToString() << ":" << value->ToString();
+  return ss.str();
+}
+
+static inline std::string GenericToString(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  std::stringstream ss;
+  ss << "KeyValueMetadata{";
+  if (value) {
+    bool first = true;
+    for (const auto& pair : value->sorted_pairs()) {
+      if (!first) ss << ", ";
+      first = false;
+      ss << pair.first << ':' << pair.second;
+    }
+  }
+  ss << '}';
+  return ss.str();
+}
+
+static inline std::string GenericToString(const Datum& value) {
+  switch (value.kind()) {
+    case Datum::NONE:
+      return "<NULL DATUM>";
+    case Datum::SCALAR:
+      return GenericToString(value.scalar());
+    case Datum::ARRAY: {
+      std::stringstream ss;
+      ss << value.type()->ToString() << ':' << value.make_array()->ToString();
+      return ss.str();
+    }
+    case Datum::CHUNKED_ARRAY:
+    case Datum::RECORD_BATCH:
+    case Datum::TABLE:
+    case Datum::COLLECTION:
+      return value.ToString();
+  }
+  return value.ToString();
+}
+
+template <typename T>
+static inline std::string GenericToString(const std::vector<T>& value) {
+  std::stringstream ss;
+  ss << "[";
+  bool first = true;
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    if (!first) ss << ", ";
+    first = false;
+    ss << GenericToString(*it);
+  }
+  ss << ']';
+  return ss.str();
+}
+
+static inline std::string GenericToString(SortOrder value) {
+  switch (value) {
+    case SortOrder::Ascending:
+      return "Ascending";
+    case SortOrder::Descending:
+      return "Descending";
+  }
+  return "<INVALID SORT ORDER>";
+}
+
+static inline std::string GenericToString(const std::vector<SortKey>& value) {
+  std::stringstream ss;
+  ss << '[';
+  bool first = true;
+  for (const auto& key : value) {
+    if (!first) {
+      ss << ", ";
+    }
+    first = false;
+    ss << key.ToString();
+  }
+  ss << ']';
+  return ss.str();
+}
+
+template <typename T>
+static inline bool GenericEquals(const T& left, const T& right) {
+  return left == right;
+}
+
+template <typename T>
+static inline bool GenericEquals(const std::shared_ptr<T>& left,
+                                 const std::shared_ptr<T>& right) {
+  if (left && right) {
+    return left->Equals(*right);
+  }
+  return left == right;
+}
+
+static inline bool IsEmpty(const std::shared_ptr<const KeyValueMetadata>& meta) {
+  return !meta || meta->size() == 0;
+}
+
+static inline bool GenericEquals(const std::shared_ptr<const KeyValueMetadata>& left,
+                                 const std::shared_ptr<const KeyValueMetadata>& right) {
+  // Special case since null metadata is considered equivalent to empty
+  if (IsEmpty(left) || IsEmpty(right)) {
+    return IsEmpty(left) && IsEmpty(right);
+  }
+  return left->Equals(*right);
+}
+
+template <typename T>
+static inline bool GenericEquals(const std::vector<T>& left,
+                                 const std::vector<T>& right) {
+  if (left.size() != right.size()) return false;
+  for (size_t i = 0; i < left.size(); i++) {
+    if (!GenericEquals(left[i], right[i])) return false;
+  }
+  return true;
+}
+
+template <typename T>
+static inline decltype(TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton())
+GenericTypeSingleton() {
+  return TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton();
+}
+
+template <typename T>
+static inline enable_if_same<T, std::shared_ptr<const KeyValueMetadata>,
+                             std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  return map(binary(), binary());
+}
+
+template <typename T>
+static inline enable_if_t<has_enum_traits<T>::value, std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  return TypeTraits<typename EnumTraits<T>::Type>::type_singleton();
+}
+
+template <typename T>
+static inline enable_if_same<T, SortKey, std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  std::vector<std::shared_ptr<Field>> fields;
+  fields.emplace_back(new Field("name", GenericTypeSingleton<std::string>()));
+  fields.emplace_back(new Field("order", GenericTypeSingleton<SortOrder>()));
+  return std::make_shared<StructType>(std::move(fields));
+}
+
+// N.B. ordering of overloads is relatively fragile
+template <typename T>
+static inline Result<decltype(MakeScalar(std::declval<T>()))> GenericToScalar(
+    const T& value) {
+  return MakeScalar(value);
+}
+
+// For Clang/libc++: when iterating through vector<bool>, we can't
+// pass it by reference so the overload above doesn't apply
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(bool value) {
+  return MakeScalar(value);
+}
+
+template <typename T, typename Enable = enable_if_t<has_enum_traits<T>::value>>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const T value) {
+  using CType = typename EnumTraits<T>::CType;
+  return GenericToScalar(static_cast<CType>(value));
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const SortKey& value) {
+  ARROW_ASSIGN_OR_RAISE(auto name, GenericToScalar(value.name));
+  ARROW_ASSIGN_OR_RAISE(auto order, GenericToScalar(value.order));
+  return StructScalar::Make({name, order}, {"name", "order"});
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), ty, &builder));
+  auto* map_builder = checked_cast<MapBuilder*>(builder.get());
+  auto* key_builder = checked_cast<BinaryBuilder*>(map_builder->key_builder());
+  auto* item_builder = checked_cast<BinaryBuilder*>(map_builder->item_builder());
+  RETURN_NOT_OK(map_builder->Append());
+  if (value) {
+    RETURN_NOT_OK(key_builder->AppendValues(value->keys()));
+    RETURN_NOT_OK(item_builder->AppendValues(value->values()));
+  }
+  std::shared_ptr<Array> arr;
+  RETURN_NOT_OK(map_builder->Finish(&arr));
+  return arr->GetScalar(0);
+}
+
+template <typename T>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::vector<T>& value) {
+  std::shared_ptr<DataType> type = GenericTypeSingleton<T>();
+  std::vector<std::shared_ptr<Scalar>> scalars;
+  scalars.reserve(value.size());
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, GenericToScalar(*it));
+    scalars.push_back(std::move(scalar));
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(
+      MakeBuilder(default_memory_pool(), type ? type : scalars[0]->type, &builder));
+  RETURN_NOT_OK(builder->AppendScalars(scalars));
+  std::shared_ptr<Array> out;
+  RETURN_NOT_OK(builder->Finish(&out));
+  return std::make_shared<ListScalar>(std::move(out));
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<DataType>& value) {
+  if (!value) {
+    return Status::Invalid("shared_ptr<DataType> is nullptr");
+  }
+  return MakeNullScalar(value);
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Array>& value) {
+  return std::make_shared<ListScalar>(value);
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const Datum& value) {
+  // TODO(ARROW-9434): store in a union instead.
+  switch (value.kind()) {
+    case Datum::ARRAY:
+      return GenericToScalar(value.make_array());
+      break;
+    default:
+      return Status::NotImplemented("Cannot serialize Datum kind ", value.kind());
+  }
+}
+
+template <typename T>
+static inline enable_if_primitive_ctype<typename CTypeTraits<T>::ArrowType, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ArrowType = typename CTypeTraits<T>::ArrowType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+  if (value->type->id() != ArrowType::type_id) {
+    return Status::Invalid("Expected type ", ArrowType::type_id, " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const ScalarType&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value;
+}
+
+template <typename T>
+static inline enable_if_primitive_ctype<typename EnumTraits<T>::Type, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  ARROW_ASSIGN_OR_RAISE(auto raw_val,
+                        GenericFromScalar<typename EnumTraits<T>::CType>(value));
+  return ValidateEnumValue<T>(raw_val);
+}
+
+template <typename T, typename U>
+using enable_if_same_result = enable_if_same<T, U, Result<T>>;
+
+template <typename T>
+static inline enable_if_same_result<T, std::string> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (!is_base_binary_like(value->type->id())) {
+    return Status::Invalid("Expected binary-like type but got ", value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseBinaryScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value->ToString();
+}
+
+template <typename T>
+static inline enable_if_same_result<T, SortKey> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() != Type::STRUCT) {
+    return Status::Invalid("Expected type STRUCT but got ", value->type->id());
+  }
+  if (!value->is_valid) return Status::Invalid("Got null scalar");
+  const auto& holder = checked_cast<const StructScalar&>(*value);
+  ARROW_ASSIGN_OR_RAISE(auto name_holder, holder.field("name"));
+  ARROW_ASSIGN_OR_RAISE(auto order_holder, holder.field("order"));
+  ARROW_ASSIGN_OR_RAISE(auto name, GenericFromScalar<std::string>(name_holder));
+  ARROW_ASSIGN_OR_RAISE(auto order, GenericFromScalar<SortOrder>(order_holder));
+  return SortKey{std::move(name), order};
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<DataType>> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value->type;
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<Scalar>> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<const KeyValueMetadata>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  if (!value->type->Equals(ty)) {
+    return Status::Invalid("Expected ", ty->ToString(), " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const MapScalar&>(*value);
+  std::vector<std::string> keys;
+  std::vector<std::string> values;
+  const auto& list = checked_cast<const StructArray&>(*holder.value);
+  const auto& key_arr = checked_cast<const BinaryArray&>(*list.field(0));
+  const auto& value_arr = checked_cast<const BinaryArray&>(*list.field(1));
+  for (int64_t i = 0; i < list.length(); i++) {
+    keys.push_back(key_arr.GetString(i));
+    values.push_back(value_arr.GetString(i));
+  }
+  return key_value_metadata(std::move(keys), std::move(values));
+}
+
+template <typename T>
+static inline enable_if_same_result<T, Datum> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() == Type::LIST) {
+    const auto& holder = checked_cast<const BaseListScalar&>(*value);
+    return holder.value;
+  }
+  // TODO(ARROW-9434): handle other possible datum kinds by looking for a union
+  return Status::Invalid("Cannot deserialize Datum from ", value->ToString());
+}
+
+template <typename T>
+static enable_if_same<typename CTypeTraits<T>::ArrowType, ListType, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ValueType = typename T::value_type;
+  if (value->type->id() != Type::LIST) {
+    return Status::Invalid("Expected type LIST but got ", value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseListScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  std::vector<ValueType> result;
+  for (int i = 0; i < holder.value->length(); i++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, holder.value->GetScalar(i));
+    ARROW_ASSIGN_OR_RAISE(auto v, GenericFromScalar<ValueType>(scalar));
+    result.push_back(std::move(v));
+  }
+  return result;
+}
+
+template <typename Options>
+struct StringifyImpl {
+  template <typename Tuple>
+  StringifyImpl(const Options& obj, const Tuple& props)
+      : obj_(obj), members_(props.size()) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    std::stringstream ss;
+    ss << prop.name() << '=' << GenericToString(prop.get(obj_));
+    members_[i] = ss.str();
+  }
+
+  std::string Finish() {
+    return "{" + arrow::internal::JoinStrings(members_, ", ") + "}";
+  }
+
+  const Options& obj_;
+  std::vector<std::string> members_;
+};
+
+template <typename Options>
+struct CompareImpl {
+  template <typename Tuple>
+  CompareImpl(const Options& l, const Options& r, const Tuple& props)
+      : left_(l), right_(r) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    equal_ &= GenericEquals(prop.get(left_), prop.get(right_));
+  }
+
+  const Options& left_;
+  const Options& right_;
+  bool equal_ = true;
+};
+
+template <typename Options>
+struct ToStructScalarImpl {
+  template <typename Tuple>
+  ToStructScalarImpl(const Options& obj, const Tuple& props,
+                     std::vector<std::string>* field_names,
+                     std::vector<std::shared_ptr<Scalar>>* values)
+      : obj_(obj), field_names_(field_names), values_(values) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    if (!status_.ok()) return;
+    auto result = GenericToScalar(prop.get(obj_));
+    if (!result.ok()) {
+      status_ = result.status().WithMessage("Could not serialize field ", prop.name(),
+                                            " of options type ", Options::kTypeName, ": ",
+                                            result.status().message());
+      return;
+    }
+    field_names_->emplace_back(prop.name());
+    values_->push_back(result.MoveValueUnsafe());
+  }
+
+  const Options& obj_;
+  Status status_;
+  std::vector<std::string>* field_names_;
+  std::vector<std::shared_ptr<Scalar>>* values_;
+};
+
+template <typename Options>
+struct FromStructScalarImpl {
+  template <typename Tuple>
+  FromStructScalarImpl(Options* obj, const StructScalar& scalar, const Tuple& props)
+      : obj_(obj), scalar_(scalar) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    if (!status_.ok()) return;
+    auto maybe_holder = scalar_.field(std::string(prop.name()));
+    if (!maybe_holder.ok()) {
+      status_ = maybe_holder.status().WithMessage(
+          "Cannot deserialize field ", prop.name(), " of options type ",
+          Options::kTypeName, ": ", maybe_holder.status().message());
+      return;
+    }
+    auto holder = maybe_holder.MoveValueUnsafe();
+    auto result = GenericFromScalar<typename Property::Type>(holder);
+    if (!result.ok()) {
+      status_ = result.status().WithMessage("Cannot deserialize field ", prop.name(),
+                                            " of options type ", Options::kTypeName, ": ",
+                                            result.status().message());
+      return;
+    }
+    prop.set(obj_, result.MoveValueUnsafe());
+  }
+
+  Options* obj_;
+  Status status_;
+  const StructScalar& scalar_;
+};
+
+template <typename Options, typename... Properties>
+const FunctionOptionsType* GetFunctionOptionsType(const Properties&... properties) {
+  static const class OptionsType : public GenericOptionsType {
+   public:
+    explicit OptionsType(const arrow::internal::PropertyTuple<Properties...> properties)
+        : properties_(properties) {}
+
+    const char* type_name() const override { return Options::kTypeName; }
+
+    std::string Stringify(const FunctionOptions& options) const override {
+      const auto& self = checked_cast<const Options&>(options);
+      return StringifyImpl<Options>(self, properties_).Finish();
+    }
+    bool Compare(const FunctionOptions& options,
+                 const FunctionOptions& other) const override {
+      const auto& lhs = checked_cast<const Options&>(options);
+      const auto& rhs = checked_cast<const Options&>(other);
+      return CompareImpl<Options>(lhs, rhs, properties_).equal_;
+    }
+    Status ToStructScalar(const FunctionOptions& options,
+                          std::vector<std::string>* field_names,
+                          std::vector<std::shared_ptr<Scalar>>* values) const override {
+      const auto& self = checked_cast<const Options&>(options);
+      RETURN_NOT_OK(
+          ToStructScalarImpl<Options>(self, properties_, field_names, values).status_);
+      return Status::OK();
+    }
+    Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+        const StructScalar& scalar) const override {
+      auto options = std::unique_ptr<Options>(new Options());
+      RETURN_NOT_OK(
+          FromStructScalarImpl<Options>(options.get(), scalar, properties_).status_);
+      return std::move(options);
+    }
+
+   private:
+    const arrow::internal::PropertyTuple<Properties...> properties_;
+  } instance(arrow::internal::MakeProperties(properties...));
+  return &instance;
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index b6f1815b89e..7aca10ef0fa 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -21,16 +21,114 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/cast.h"
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/datum.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 namespace compute {
 
+TEST(FunctionOptions, Equality) {
+  std::vector<std::shared_ptr<FunctionOptions>> options;
+  options.emplace_back(new ScalarAggregateOptions());
+  options.emplace_back(new ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  options.emplace_back(new ModeOptions());
+  options.emplace_back(new ModeOptions(/*n=*/2));
+  options.emplace_back(new VarianceOptions());
+  options.emplace_back(new VarianceOptions(/*ddof=*/2));
+  options.emplace_back(new QuantileOptions());
+  options.emplace_back(
+      new QuantileOptions(/*q=*/0.75, QuantileOptions::Interpolation::MIDPOINT));
+  options.emplace_back(new TDigestOptions());
+  options.emplace_back(
+      new TDigestOptions(/*q=*/0.75, /*delta=*/50, /*buffer_size=*/1024));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(int64(), "16")));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "true")));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "null")));
+  options.emplace_back(new ArithmeticOptions());
+  options.emplace_back(new ArithmeticOptions(/*check_overflow=*/true));
+  options.emplace_back(new ElementWiseAggregateOptions());
+  options.emplace_back(new ElementWiseAggregateOptions(/*skip_nulls=*/false));
+  options.emplace_back(new JoinOptions());
+  options.emplace_back(new JoinOptions(JoinOptions::REPLACE, "replacement"));
+  options.emplace_back(new MatchSubstringOptions("pattern"));
+  options.emplace_back(new MatchSubstringOptions("pattern", /*ignore_case=*/true));
+  options.emplace_back(new SplitOptions());
+  options.emplace_back(new SplitOptions(/*max_splits=*/2, /*reverse=*/true));
+  options.emplace_back(new SplitPatternOptions("pattern"));
+  options.emplace_back(
+      new SplitPatternOptions("pattern", /*max_splits=*/2, /*reverse=*/true));
+  options.emplace_back(new ReplaceSubstringOptions("pattern", "replacement"));
+  options.emplace_back(
+      new ReplaceSubstringOptions("pattern", "replacement", /*max_replacements=*/2));
+  options.emplace_back(new ReplaceSliceOptions(0, 1, "foo"));
+  options.emplace_back(new ReplaceSliceOptions(1, -1, "bar"));
+  options.emplace_back(new ExtractRegexOptions("pattern"));
+  options.emplace_back(new ExtractRegexOptions("pattern2"));
+  options.emplace_back(new SetLookupOptions(ArrayFromJSON(int64(), "[1, 2, 3, 4]")));
+  options.emplace_back(new SetLookupOptions(ArrayFromJSON(boolean(), "[true, false]")));
+  options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::MILLI));
+  options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::NANO));
+  options.emplace_back(new PadOptions(5, " "));
+  options.emplace_back(new PadOptions(10, "A"));
+  options.emplace_back(new TrimOptions(" "));
+  options.emplace_back(new TrimOptions("abc"));
+  options.emplace_back(new SliceOptions(/*start=*/1));
+  options.emplace_back(new SliceOptions(/*start=*/1, /*stop=*/-5, /*step=*/-2));
+  // N.B. we never actually use field_nullability or field_metadata in Arrow
+  options.emplace_back(new MakeStructOptions({"col1"}, {true}, {}));
+  options.emplace_back(new MakeStructOptions({"col1"}, {false}, {}));
+  options.emplace_back(
+      new MakeStructOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
+  options.emplace_back(new DayOfWeekOptions(false, 1));
+  options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
+  options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
+  options.emplace_back(new FilterOptions());
+  options.emplace_back(
+      new FilterOptions(FilterOptions::NullSelectionBehavior::EMIT_NULL));
+  options.emplace_back(new TakeOptions());
+  options.emplace_back(new TakeOptions(/*boundscheck=*/false));
+  options.emplace_back(new DictionaryEncodeOptions());
+  options.emplace_back(
+      new DictionaryEncodeOptions(DictionaryEncodeOptions::NullEncodingBehavior::ENCODE));
+  options.emplace_back(new ArraySortOptions());
+  options.emplace_back(new ArraySortOptions(SortOrder::Descending));
+  options.emplace_back(new SortOptions());
+  options.emplace_back(new SortOptions({SortKey("key", SortOrder::Ascending)}));
+  options.emplace_back(new SortOptions(
+      {SortKey("key", SortOrder::Descending), SortKey("value", SortOrder::Descending)}));
+  options.emplace_back(new PartitionNthOptions(/*pivot=*/0));
+  options.emplace_back(new PartitionNthOptions(/*pivot=*/42));
+
+  for (size_t i = 0; i < options.size(); i++) {
+    const size_t prev_i = i == 0 ? options.size() - 1 : i - 1;
+    const FunctionOptions& cur = *options[i];
+    const FunctionOptions& prev = *options[prev_i];
+    SCOPED_TRACE(cur.type_name());
+    SCOPED_TRACE(cur.ToString());
+    ASSERT_EQ(cur, cur);
+    ASSERT_NE(cur, prev);
+    ASSERT_NE(prev, cur);
+    ASSERT_NE("", cur.ToString());
+
+    ASSERT_OK_AND_ASSIGN(auto serialized, cur.Serialize());
+    const auto* type_name = cur.type_name();
+    ASSERT_OK_AND_ASSIGN(
+        auto deserialized,
+        FunctionOptions::Deserialize(std::string(type_name, std::strlen(type_name)),
+                                     *serialized));
+    ASSERT_TRUE(cur.Equals(*deserialized));
+  }
+}
+
 struct ExecBatch;
 
 TEST(Arity, Basics) {
@@ -87,8 +185,7 @@ TEST(VectorFunction, Basics) {
 }
 
 auto ExecNYI = [](KernelContext* ctx, const ExecBatch& args, Datum* out) {
-  ctx->SetStatus(Status::NotImplemented("NYI"));
-  return;
+  return Status::NotImplemented("NYI");
 };
 
 template <typename FunctionType>
@@ -181,13 +278,15 @@ TEST(ScalarAggregateFunction, Basics) {
   ASSERT_EQ(Function::SCALAR_AGGREGATE, func.kind());
 }
 
-std::unique_ptr<KernelState> NoopInit(KernelContext*, const KernelInitArgs&) {
+Result<std::unique_ptr<KernelState>> NoopInit(KernelContext*, const KernelInitArgs&) {
   return nullptr;
 }
 
-void NoopConsume(KernelContext*, const ExecBatch&) {}
-void NoopMerge(KernelContext*, const KernelState&, KernelState*) {}
-void NoopFinalize(KernelContext*, Datum*) {}
+Status NoopConsume(KernelContext*, const ExecBatch&) { return Status::OK(); }
+Status NoopMerge(KernelContext*, const KernelState&, KernelState*) {
+  return Status::OK();
+}
+Status NoopFinalize(KernelContext*, Datum*) { return Status::OK(); }
 
 TEST(ScalarAggregateFunction, DispatchExact) {
   ScalarAggregateFunction func("agg_test", Arity::Unary(), /*doc=*/nullptr);
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 88b42716fa2..f131f524d2e 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -59,15 +59,25 @@ Result<std::shared_ptr<ResizableBuffer>> KernelContext::AllocateBitmap(int64_t n
   return result;
 }
 
-void KernelContext::SetStatus(const Status& status) {
-  if (ARROW_PREDICT_TRUE(status.ok())) {
-    return;
+Status Kernel::InitAll(KernelContext* ctx, const KernelInitArgs& args,
+                       std::vector<std::unique_ptr<KernelState>>* states) {
+  for (auto& state : *states) {
+    ARROW_ASSIGN_OR_RAISE(state, args.kernel->init(ctx, args));
   }
-  status_ = status;
+  return Status::OK();
 }
 
-/// \brief Clear any error status
-void KernelContext::ResetStatus() { status_ = Status::OK(); }
+Result<std::unique_ptr<KernelState>> ScalarAggregateKernel::MergeAll(
+    const ScalarAggregateKernel* kernel, KernelContext* ctx,
+    std::vector<std::unique_ptr<KernelState>> states) {
+  auto out = std::move(states.back());
+  states.pop_back();
+  ctx->SetState(out.get());
+  for (auto& state : states) {
+    RETURN_NOT_OK(kernel->merge(ctx, std::move(*state), out.get()));
+  }
+  return std::move(out);
+}
 
 // ----------------------------------------------------------------------
 // Some basic TypeMatcher implementations
@@ -392,8 +402,7 @@ KernelSignature::KernelSignature(std::vector<InputType> in_types, OutputType out
       out_type_(std::move(out_type)),
       is_varargs_(is_varargs),
       hash_code_(0) {
-  // VarArgs sigs must have only a single input type to use for argument validation
-  DCHECK(!is_varargs || (is_varargs && (in_types_.size() == 1)));
+  DCHECK(!is_varargs || (is_varargs && (in_types_.size() >= 1)));
 }
 
 std::shared_ptr<KernelSignature> KernelSignature::Make(std::vector<InputType> in_types,
@@ -420,8 +429,8 @@ bool KernelSignature::Equals(const KernelSignature& other) const {
 
 bool KernelSignature::MatchesInputs(const std::vector<ValueDescr>& args) const {
   if (is_varargs_) {
-    for (const auto& arg : args) {
-      if (!in_types_[0].Matches(arg)) {
+    for (size_t i = 0; i < args.size(); ++i) {
+      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(args[i])) {
         return false;
       }
     }
@@ -454,15 +463,19 @@ std::string KernelSignature::ToString() const {
   std::stringstream ss;
 
   if (is_varargs_) {
-    ss << "varargs[" << in_types_[0].ToString() << "]";
+    ss << "varargs[";
   } else {
     ss << "(";
-    for (size_t i = 0; i < in_types_.size(); ++i) {
-      if (i > 0) {
-        ss << ", ";
-      }
-      ss << in_types_[i].ToString();
+  }
+  for (size_t i = 0; i < in_types_.size(); ++i) {
+    if (i > 0) {
+      ss << ", ";
     }
+    ss << in_types_[i].ToString();
+  }
+  if (is_varargs_) {
+    ss << "]";
+  } else {
     ss << ")";
   }
   ss << " -> " << out_type_.ToString();
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index b99b41170d2..099bd95bbf2 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -41,7 +41,7 @@
 namespace arrow {
 namespace compute {
 
-struct FunctionOptions;
+class FunctionOptions;
 
 /// \brief Base class for opaque kernel-specific state. For example, if there
 /// is some kind of initialization required.
@@ -63,22 +63,6 @@ class ARROW_EXPORT KernelContext {
   /// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
   Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);
 
-  /// \brief Indicate that an error has occurred, to be checked by a exec caller
-  /// \param[in] status a Status instance.
-  ///
-  /// \note Will not overwrite a prior set Status, so we will have the first
-  /// error that occurred until ExecContext::ResetStatus is called.
-  void SetStatus(const Status& status);
-
-  /// \brief Clear any error status.
-  void ResetStatus();
-
-  /// \brief Return true if an error has occurred.
-  bool HasError() const { return !status_.ok(); }
-
-  /// \brief Return the current status of the context.
-  const Status& status() const { return status_; }
-
   /// \brief Assign the active KernelState to be utilized for each stage of
   /// kernel execution. Ownership and memory lifetime of the KernelState must
   /// be minded separately.
@@ -96,21 +80,9 @@ class ARROW_EXPORT KernelContext {
 
  private:
   ExecContext* exec_ctx_;
-  Status status_;
-  KernelState* state_;
+  KernelState* state_ = NULLPTR;
 };
 
-// A macro to invoke for error control flow after invoking functions (such as
-// kernel init or exec functions) that propagate errors via KernelContext.
-#define ARROW_CTX_RETURN_IF_ERROR(CTX)            \
-  do {                                            \
-    if (ARROW_PREDICT_FALSE((CTX)->HasError())) { \
-      Status s = (CTX)->status();                 \
-      (CTX)->ResetStatus();                       \
-      return s;                                   \
-    }                                             \
-  } while (0)
-
 /// \brief The standard kernel execution API that must be implemented for
 /// SCALAR and VECTOR kernel types. This includes both stateless and stateful
 /// kernels. Kernels depending on some execution state access that state via
@@ -119,7 +91,7 @@ class ARROW_EXPORT KernelContext {
 /// into pre-allocated memory if they are able, though for some kernels
 /// (e.g. in cases when a builder like StringBuilder) must be employed this may
 /// not be possible.
-using ArrayKernelExec = std::function<void(KernelContext*, const ExecBatch&, Datum*)>;
+using ArrayKernelExec = std::function<Status(KernelContext*, const ExecBatch&, Datum*)>;
 
 /// \brief An type-checking interface to permit customizable validation rules
 /// for use with InputType and KernelSignature. This is for scenarios where the
@@ -349,6 +321,9 @@ class ARROW_EXPORT OutputType {
     this->resolver_ = other.resolver_;
   }
 
+  OutputType& operator=(const OutputType&) = default;
+  OutputType& operator=(OutputType&&) = default;
+
   /// \brief Return the shape and type of the expected output value of the
   /// kernel given the value descriptors (shapes and types) of the input
   /// arguments. The resolver may make use of state information kept in the
@@ -391,8 +366,10 @@ class ARROW_EXPORT OutputType {
 
 /// \brief Holds the input types and output type of the kernel.
 ///
-/// VarArgs functions should pass a single input type to be used to validate
-/// the input types of a function invocation.
+/// VarArgs functions with minimum N arguments should pass up to N input types to be
+/// used to validate the input types of a function invocation. The first N-1 types
+/// will be matched against the first N-1 arguments, and the last type will be
+/// matched against the remaining arguments.
 class ARROW_EXPORT KernelSignature {
  public:
   KernelSignature(std::vector<InputType> in_types, OutputType out_type,
@@ -523,9 +500,8 @@ struct KernelInitArgs {
 };
 
 /// \brief Common initializer function for all kernel types.
-/// If an error occurs it will be stored in the KernelContext; nullptr will be returned.
-using KernelInit =
-    std::function<std::unique_ptr<KernelState>(KernelContext*, const KernelInitArgs&)>;
+using KernelInit = std::function<Result<std::unique_ptr<KernelState>>(
+    KernelContext*, const KernelInitArgs&)>;
 
 /// \brief Base type for kernels. Contains the function signature and
 /// optionally the state initialization function, along with some common
@@ -548,6 +524,10 @@ struct Kernel {
   /// set up any options or state relevant for execution.
   KernelInit init;
 
+  /// \brief Create a vector of new KernelState for invocations of this kernel.
+  static Status InitAll(KernelContext*, const KernelInitArgs&,
+                        std::vector<std::unique_ptr<KernelState>>*);
+
   /// \brief Indicates whether execution can benefit from parallelization
   /// (splitting large chunks into smaller chunks and using multiple
   /// threads). Some kernels may not support parallel execution at
@@ -608,7 +588,7 @@ struct ScalarKernel : public ArrayKernel {
 // VectorKernel (for VectorFunction)
 
 /// \brief See VectorKernel::finalize member for usage
-using VectorFinalize = std::function<void(KernelContext*, std::vector<Datum>*)>;
+using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)>;
 
 /// \brief Kernel data structure for implementations of VectorFunction. In
 /// addition to the members found in ArrayKernel, contains an optional
@@ -663,13 +643,13 @@ struct VectorKernel : public ArrayKernel {
 // ----------------------------------------------------------------------
 // ScalarAggregateKernel (for ScalarAggregateFunction)
 
-using ScalarAggregateConsume = std::function<void(KernelContext*, const ExecBatch&)>;
+using ScalarAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
 
 using ScalarAggregateMerge =
-    std::function<void(KernelContext*, KernelState&&, KernelState*)>;
+    std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
 
 // Finalize returns Datum to permit multiple return values
-using ScalarAggregateFinalize = std::function<void(KernelContext*, Datum*)>;
+using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
 
 /// \brief Kernel data structure for implementations of
 /// ScalarAggregateFunction. The four necessary components of an aggregation
@@ -699,6 +679,12 @@ struct ScalarAggregateKernel : public Kernel {
             KernelSignature::Make(std::move(in_types), std::move(out_type)),
             std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
 
+  /// \brief Merge a vector of KernelStates into a single KernelState.
+  /// The merged state will be returned and will be set on the KernelContext.
+  static Result<std::unique_ptr<KernelState>> MergeAll(
+      const ScalarAggregateKernel* kernel, KernelContext* ctx,
+      std::vector<std::unique_ptr<KernelState>> states);
+
   ScalarAggregateConsume consume;
   ScalarAggregateMerge merge;
   ScalarAggregateFinalize finalize;
@@ -707,19 +693,22 @@ struct ScalarAggregateKernel : public Kernel {
 // ----------------------------------------------------------------------
 // HashAggregateKernel (for HashAggregateFunction)
 
-using HashAggregateConsume = std::function<void(KernelContext*, const ExecBatch&)>;
+using HashAggregateResize = std::function<Status(KernelContext*, int64_t)>;
+
+using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
 
 using HashAggregateMerge =
-    std::function<void(KernelContext*, KernelState&&, KernelState*)>;
+    std::function<Status(KernelContext*, KernelState&&, const ArrayData&)>;
 
 // Finalize returns Datum to permit multiple return values
-using HashAggregateFinalize = std::function<void(KernelContext*, Datum*)>;
+using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
 
 /// \brief Kernel data structure for implementations of
 /// HashAggregateFunction. The four necessary components of an aggregation
 /// kernel are the init, consume, merge, and finalize functions.
 ///
 /// * init: creates a new KernelState for a kernel.
+/// * resize: ensure that the KernelState can accommodate the specified number of groups.
 /// * consume: processes an ExecBatch (which includes the argument as well
 ///   as an array of group identifiers) and updates the KernelState found in the
 ///   KernelContext.
@@ -730,20 +719,24 @@ struct HashAggregateKernel : public Kernel {
   HashAggregateKernel() = default;
 
   HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
-                      HashAggregateConsume consume, HashAggregateMerge merge,
-                      HashAggregateFinalize finalize)
+                      HashAggregateResize resize, HashAggregateConsume consume,
+                      HashAggregateMerge merge, HashAggregateFinalize finalize)
       : Kernel(std::move(sig), std::move(init)),
+        resize(std::move(resize)),
         consume(std::move(consume)),
         merge(std::move(merge)),
         finalize(std::move(finalize)) {}
 
   HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
-                      KernelInit init, HashAggregateMerge merge,
-                      HashAggregateConsume consume, HashAggregateFinalize finalize)
+                      KernelInit init, HashAggregateConsume consume,
+                      HashAggregateResize resize, HashAggregateMerge merge,
+                      HashAggregateFinalize finalize)
       : HashAggregateKernel(
             KernelSignature::Make(std::move(in_types), std::move(out_type)),
-            std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
+            std::move(init), std::move(resize), std::move(consume), std::move(merge),
+            std::move(finalize)) {}
 
+  HashAggregateResize resize;
   HashAggregateConsume consume;
   HashAggregateMerge merge;
   HashAggregateFinalize finalize;
diff --git a/cpp/src/arrow/compute/kernel_test.cc b/cpp/src/arrow/compute/kernel_test.cc
index a5ef9d44e18..a63c42d4fde 100644
--- a/cpp/src/arrow/compute/kernel_test.cc
+++ b/cpp/src/arrow/compute/kernel_test.cc
@@ -468,15 +468,28 @@ TEST(KernelSignature, MatchesInputs) {
 }
 
 TEST(KernelSignature, VarArgsMatchesInputs) {
-  KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
-
-  std::vector<ValueDescr> args = {int8()};
-  ASSERT_TRUE(sig.MatchesInputs(args));
-  args.push_back(ValueDescr::Scalar(int8()));
-  args.push_back(ValueDescr::Array(int8()));
-  ASSERT_TRUE(sig.MatchesInputs(args));
-  args.push_back(int32());
-  ASSERT_FALSE(sig.MatchesInputs(args));
+  {
+    KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
+
+    std::vector<ValueDescr> args = {int8()};
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(ValueDescr::Scalar(int8()));
+    args.push_back(ValueDescr::Array(int8()));
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(int32());
+    ASSERT_FALSE(sig.MatchesInputs(args));
+  }
+  {
+    KernelSignature sig({int8(), utf8()}, utf8(), /*is_varargs=*/true);
+
+    std::vector<ValueDescr> args = {int8()};
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(ValueDescr::Scalar(utf8()));
+    args.push_back(ValueDescr::Array(utf8()));
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(int32());
+    ASSERT_FALSE(sig.MatchesInputs(args));
+  }
 }
 
 TEST(KernelSignature, ToString) {
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 5e223a1f906..474ce1418fd 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -27,14 +27,17 @@ add_arrow_compute_test(scalar_test
                        scalar_nested_test.cc
                        scalar_set_lookup_test.cc
                        scalar_string_test.cc
+                       scalar_temporal_test.cc
                        scalar_validity_test.cc
                        scalar_fill_null_test.cc
+                       scalar_if_else_test.cc
                        test_util.cc)
 
 add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
 
@@ -45,6 +48,7 @@ add_arrow_compute_test(vector_test
                        SOURCES
                        vector_hash_test.cc
                        vector_nested_test.cc
+                       vector_replace_test.cc
                        vector_selection_test.cc
                        vector_sort_test.cc
                        test_util.cc)
@@ -52,6 +56,7 @@ add_arrow_compute_test(vector_test
 add_arrow_benchmark(vector_hash_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_sort_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_partition_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(vector_replace_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
 
 # ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 61dc8cb403c..a7df66695b2 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -27,16 +27,16 @@ namespace compute {
 
 namespace {
 
-void AggregateConsume(KernelContext* ctx, const ExecBatch& batch) {
-  checked_cast<ScalarAggregator*>(ctx->state())->Consume(ctx, batch);
+Status AggregateConsume(KernelContext* ctx, const ExecBatch& batch) {
+  return checked_cast<ScalarAggregator*>(ctx->state())->Consume(ctx, batch);
 }
 
-void AggregateMerge(KernelContext* ctx, KernelState&& src, KernelState* dst) {
-  checked_cast<ScalarAggregator*>(dst)->MergeFrom(ctx, std::move(src));
+Status AggregateMerge(KernelContext* ctx, KernelState&& src, KernelState* dst) {
+  return checked_cast<ScalarAggregator*>(dst)->MergeFrom(ctx, std::move(src));
 }
 
-void AggregateFinalize(KernelContext* ctx, Datum* out) {
-  checked_cast<ScalarAggregator*>(ctx->state())->Finalize(ctx, out);
+Status AggregateFinalize(KernelContext* ctx, Datum* out) {
+  return checked_cast<ScalarAggregator*>(ctx->state())->Finalize(ctx, out);
 }
 
 }  // namespace
@@ -56,72 +56,91 @@ namespace aggregate {
 // Count implementation
 
 struct CountImpl : public ScalarAggregator {
-  explicit CountImpl(CountOptions options) : options(std::move(options)) {}
-
-  void Consume(KernelContext*, const ExecBatch& batch) override {
-    const ArrayData& input = *batch[0].array();
-    const int64_t nulls = input.GetNullCount();
-    this->nulls += nulls;
-    this->non_nulls += input.length - nulls;
+  explicit CountImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      const ArrayData& input = *batch[0].array();
+      const int64_t nulls = input.GetNullCount();
+      this->nulls += nulls;
+      this->non_nulls += input.length - nulls;
+    } else {
+      const Scalar& input = *batch[0].scalar();
+      this->nulls += !input.is_valid * batch.length;
+      this->non_nulls += input.is_valid * batch.length;
+    }
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other_state = checked_cast<const CountImpl&>(src);
     this->non_nulls += other_state.non_nulls;
     this->nulls += other_state.nulls;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext* ctx, Datum* out) override {
+  Status Finalize(KernelContext* ctx, Datum* out) override {
     const auto& state = checked_cast<const CountImpl&>(*ctx->state());
-    switch (state.options.count_mode) {
-      case CountOptions::COUNT_NON_NULL:
-        *out = Datum(state.non_nulls);
-        break;
-      case CountOptions::COUNT_NULL:
-        *out = Datum(state.nulls);
-        break;
-      default:
-        ctx->SetStatus(Status::Invalid("Unknown CountOptions encountered"));
-        break;
+    if (state.options.skip_nulls) {
+      *out = Datum(state.non_nulls);
+    } else {
+      *out = Datum(state.nulls);
     }
+    return Status::OK();
   }
 
-  CountOptions options;
+  ScalarAggregateOptions options;
   int64_t non_nulls = 0;
   int64_t nulls = 0;
 };
 
-std::unique_ptr<KernelState> CountInit(KernelContext*, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> CountInit(KernelContext*,
+                                               const KernelInitArgs& args) {
   return ::arrow::internal::make_unique<CountImpl>(
-      static_cast<const CountOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
 // ----------------------------------------------------------------------
 // Sum implementation
 
 template <typename ArrowType>
-struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {};
+struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {
+  explicit SumImplDefault(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 template <typename ArrowType>
-struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {};
+struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
+  explicit MeanImplDefault(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
-std::unique_ptr<KernelState> SumInit(KernelContext* ctx, const KernelInitArgs& args) {
-  SumLikeInit<SumImplDefault> visitor(ctx, *args.inputs[0].type);
+Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
+                                             const KernelInitArgs& args) {
+  SumLikeInit<SumImplDefault> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> MeanInit(KernelContext* ctx, const KernelInitArgs& args) {
-  SumLikeInit<MeanImplDefault> visitor(ctx, *args.inputs[0].type);
+Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
+                                              const KernelInitArgs& args) {
+  SumLikeInit<MeanImplDefault> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-std::unique_ptr<KernelState> MinMaxInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::NONE> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const MinMaxOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
@@ -129,13 +148,21 @@ std::unique_ptr<KernelState> MinMaxInit(KernelContext* ctx, const KernelInitArgs
 // Any implementation
 
 struct BooleanAnyImpl : public ScalarAggregator {
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  explicit BooleanAnyImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     // short-circuit if seen a True already
     if (this->any == true) {
-      return;
+      return Status::OK();
+    }
+    if (batch[0].is_scalar()) {
+      const auto& scalar = *batch[0].scalar();
+      this->has_nulls = !scalar.is_valid;
+      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      return Status::OK();
     }
-
     const auto& data = *batch[0].array();
+    this->has_nulls = data.GetNullCount() > 0;
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[0], data.offset, data.buffers[1], data.offset, data.length);
     int64_t position = 0;
@@ -147,34 +174,60 @@ struct BooleanAnyImpl : public ScalarAggregator {
       }
       position += block.length;
     }
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const BooleanAnyImpl&>(src);
     this->any |= other.any;
+    this->has_nulls |= other.has_nulls;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
-    out->value = std::make_shared<BooleanScalar>(this->any);
+  Status Finalize(KernelContext* ctx, Datum* out) override {
+    if (!options.skip_nulls && !this->any && this->has_nulls) {
+      out->value = std::make_shared<BooleanScalar>();
+    } else {
+      out->value = std::make_shared<BooleanScalar>(this->any);
+    }
+    return Status::OK();
   }
+
   bool any = false;
+  bool has_nulls = false;
+  ScalarAggregateOptions options;
 };
 
-std::unique_ptr<KernelState> AnyInit(KernelContext*, const KernelInitArgs& args) {
-  return ::arrow::internal::make_unique<BooleanAnyImpl>();
+Result<std::unique_ptr<KernelState>> AnyInit(KernelContext*, const KernelInitArgs& args) {
+  const ScalarAggregateOptions options =
+      static_cast<const ScalarAggregateOptions&>(*args.options);
+  return ::arrow::internal::make_unique<BooleanAnyImpl>(
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
 // ----------------------------------------------------------------------
 // All implementation
 
 struct BooleanAllImpl : public ScalarAggregator {
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  explicit BooleanAllImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     // short-circuit if seen a false already
     if (this->all == false) {
-      return;
+      return Status::OK();
+    }
+    // short-circuit if seen a null already
+    if (!options.skip_nulls && this->has_nulls) {
+      return Status::OK();
+    }
+    if (batch[0].is_scalar()) {
+      const auto& scalar = *batch[0].scalar();
+      this->has_nulls = !scalar.is_valid;
+      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      return Status::OK();
     }
-
     const auto& data = *batch[0].array();
+    this->has_nulls = data.GetNullCount() > 0;
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[1], data.offset, data.buffers[0], data.offset, data.length);
     int64_t position = 0;
@@ -186,23 +239,161 @@ struct BooleanAllImpl : public ScalarAggregator {
       }
       position += block.length;
     }
+
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const BooleanAllImpl&>(src);
     this->all &= other.all;
+    this->has_nulls |= other.has_nulls;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
-    out->value = std::make_shared<BooleanScalar>(this->all);
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (!options.skip_nulls && this->all && this->has_nulls) {
+      out->value = std::make_shared<BooleanScalar>();
+    } else {
+      out->value = std::make_shared<BooleanScalar>(this->all);
+    }
+    return Status::OK();
   }
+
   bool all = true;
+  bool has_nulls = false;
+  ScalarAggregateOptions options;
 };
 
-std::unique_ptr<KernelState> AllInit(KernelContext*, const KernelInitArgs& args) {
-  return ::arrow::internal::make_unique<BooleanAllImpl>();
+Result<std::unique_ptr<KernelState>> AllInit(KernelContext*, const KernelInitArgs& args) {
+  return ::arrow::internal::make_unique<BooleanAllImpl>(
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
+// ----------------------------------------------------------------------
+// Index implementation
+
+template <typename ArgType>
+struct IndexImpl : public ScalarAggregator {
+  using ArgValue = typename internal::GetViewType<ArgType>::T;
+
+  explicit IndexImpl(IndexOptions options, KernelState* raw_state)
+      : options(std::move(options)), seen(0), index(-1) {
+    if (auto state = static_cast<IndexImpl<ArgType>*>(raw_state)) {
+      seen = state->seen;
+      index = state->index;
+    }
+  }
+
+  Status Consume(KernelContext* ctx, const ExecBatch& batch) override {
+    // short-circuit
+    if (index >= 0 || !options.value->is_valid) {
+      return Status::OK();
+    }
+
+    auto input = batch[0].array();
+    seen = input->length;
+    const ArgValue desired = internal::UnboxScalar<ArgType>::Unbox(*options.value);
+    int64_t i = 0;
+
+    ARROW_UNUSED(internal::VisitArrayValuesInline<ArgType>(
+        *input,
+        [&](ArgValue v) -> Status {
+          if (v == desired) {
+            index = i;
+            return Status::Cancelled("Found");
+          } else {
+            ++i;
+            return Status::OK();
+          }
+        },
+        [&]() -> Status {
+          ++i;
+          return Status::OK();
+        }));
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const IndexImpl&>(src);
+    if (index < 0 && other.index >= 0) {
+      index = seen + other.index;
+    }
+    seen += other.seen;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    out->value = std::make_shared<Int64Scalar>(index >= 0 ? index : -1);
+    return Status::OK();
+  }
+
+  const IndexOptions options;
+  int64_t seen = 0;
+  int64_t index = -1;
+};
+
+struct IndexInit {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const IndexOptions& options;
+  const DataType& type;
+
+  IndexInit(KernelContext* ctx, const IndexOptions& options, const DataType& type)
+      : ctx(ctx), options(options), type(type) {}
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Index kernel not implemented for ", type.ToString());
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new IndexImpl<BooleanType>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_number<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_date<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_time<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_timestamp<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(type, this));
+    return std::move(state);
+  }
+
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
+    IndexInit visitor(ctx, static_cast<const IndexOptions&>(*args.options),
+                      *args.inputs[0].type);
+    return visitor.Create();
+  }
+};
+
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
                         std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
@@ -214,13 +405,33 @@ void AddBasicAggKernels(KernelInit init,
   }
 }
 
+void AddScalarAggKernels(KernelInit init,
+                         const std::vector<std::shared_ptr<DataType>>& types,
+                         std::shared_ptr<DataType> out_ty,
+                         ScalarAggregateFunction* func) {
+  for (const auto& ty : types) {
+    // scalar[InT] -> scalar[OutT]
+    auto sig = KernelSignature::Make({InputType::Scalar(ty)}, ValueDescr::Scalar(out_ty));
+    AddAggKernel(std::move(sig), init, func, SimdLevel::NONE);
+  }
+}
+
+void AddArrayScalarAggKernels(KernelInit init,
+                              const std::vector<std::shared_ptr<DataType>>& types,
+                              std::shared_ptr<DataType> out_ty,
+                              ScalarAggregateFunction* func,
+                              SimdLevel::type simd_level = SimdLevel::NONE) {
+  AddBasicAggKernels(init, types, out_ty, func, simd_level);
+  AddScalarAggKernels(init, types, out_ty, func);
+}
+
 void AddMinMaxKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
                       ScalarAggregateFunction* func, SimdLevel::type simd_level) {
   for (const auto& ty : types) {
-    // array[T] -> scalar[struct<min: T, max: T>]
+    // any[T] -> scalar[struct<min: T, max: T>]
     auto out_ty = struct_({field("min", ty), field("max", ty)});
-    auto sig = KernelSignature::Make({InputType::Array(ty)}, ValueDescr::Scalar(out_ty));
+    auto sig = KernelSignature::Make({InputType(ty)}, ValueDescr::Scalar(out_ty));
     AddAggKernel(std::move(sig), init, func, simd_level);
   }
 }
@@ -231,54 +442,85 @@ namespace internal {
 namespace {
 
 const FunctionDoc count_doc{"Count the number of null / non-null values",
-                            ("By default, non-null values are counted.\n"
-                             "This can be changed through CountOptions."),
+                            ("By default, only non-null values are counted.\n"
+                             "This can be changed through ScalarAggregateOptions."),
                             {"array"},
-                            "CountOptions"};
+                            "ScalarAggregateOptions"};
 
 const FunctionDoc sum_doc{
-    "Sum values of a numeric array", ("Null values are ignored."), {"array"}};
-
-const FunctionDoc mean_doc{"Compute the mean of a numeric array",
-                           ("Null values are ignored. The result is always computed\n"
-                            "as a double, regardless of the input types"),
-                           {"array"}};
+    "Compute the sum of a numeric array",
+    ("Null values are ignored by default. Minimum count of non-null\n"
+     "values can be set and null is returned if too few are present.\n"
+     "This can be changed through ScalarAggregateOptions."),
+    {"array"},
+    "ScalarAggregateOptions"};
+
+const FunctionDoc mean_doc{
+    "Compute the mean of a numeric array",
+    ("Null values are ignored by default. Minimum count of non-null\n"
+     "values can be set and null is returned if too few are "
+     "present.\nThis can be changed through ScalarAggregateOptions.\n"
+     "The result is always computed as a double, regardless of the input types."),
+    {"array"},
+    "ScalarAggregateOptions"};
 
 const FunctionDoc min_max_doc{"Compute the minimum and maximum values of a numeric array",
                               ("Null values are ignored by default.\n"
-                               "This can be changed through MinMaxOptions."),
+                               "This can be changed through ScalarAggregateOptions."),
                               {"array"},
-                              "MinMaxOptions"};
+                              "ScalarAggregateOptions"};
 
 const FunctionDoc any_doc{"Test whether any element in a boolean array evaluates to true",
-                          ("Null values are ignored."),
-                          {"array"}};
+                          ("Null values are ignored by default.\n"
+                           "If null values are taken into account by setting "
+                           "ScalarAggregateOptions parameter skip_nulls = false then "
+                           "Kleene logic is used.\n"
+                           "See KleeneOr for more details on Kleene logic."),
+                          {"array"},
+                          "ScalarAggregateOptions"};
 
 const FunctionDoc all_doc{"Test whether all elements in a boolean array evaluate to true",
-                          ("Null values are ignored."),
-                          {"array"}};
+                          ("Null values are ignored by default.\n"
+                           "If null values are taken into account by setting "
+                           "ScalarAggregateOptions parameter skip_nulls = false then "
+                           "Kleene logic is used.\n"
+                           "See KleeneAnd for more details on Kleene logic."),
+                          {"array"},
+                          "ScalarAggregateOptions"};
+
+const FunctionDoc index_doc{"Find the index of the first occurrence of a given value",
+                            ("The result is always computed as an int64_t, regardless\n"
+                             "of the offset type of the input array."),
+                            {"array"},
+                            "IndexOptions"};
 
 }  // namespace
 
 void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
-  static auto default_count_options = CountOptions::Defaults();
+  static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
+
   auto func = std::make_shared<ScalarAggregateFunction>(
-      "count", Arity::Unary(), &count_doc, &default_count_options);
+      "count", Arity::Unary(), &count_doc, &default_scalar_aggregate_options);
 
   // Takes any array input, outputs int64 scalar
   InputType any_array(ValueDescr::ARRAY);
   AddAggKernel(KernelSignature::Make({any_array}, ValueDescr::Scalar(int64())),
                aggregate::CountInit, func.get());
+  AddAggKernel(
+      KernelSignature::Make({InputType(ValueDescr::SCALAR)}, ValueDescr::Scalar(int64())),
+      aggregate::CountInit, func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), &sum_doc);
-  aggregate::AddBasicAggKernels(aggregate::SumInit, {boolean()}, int64(), func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInit, SignedIntTypes(), int64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInit, UnsignedIntTypes(), uint64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInit, FloatingPointTypes(), float64(),
-                                func.get());
+  func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), &sum_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, {boolean()}, int64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, SignedIntTypes(), int64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, UnsignedIntTypes(), uint64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, FloatingPointTypes(), float64(),
+                                      func.get());
   // Add the SIMD variants for sum
 #if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_AVX512)
   auto cpu_info = arrow::internal::CpuInfo::GetInstance();
@@ -295,10 +537,12 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 #endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), &mean_doc);
-  aggregate::AddBasicAggKernels(aggregate::MeanInit, {boolean()}, float64(), func.get());
-  aggregate::AddBasicAggKernels(aggregate::MeanInit, NumericTypes(), float64(),
-                                func.get());
+  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), &mean_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, {boolean()}, float64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, NumericTypes(), float64(),
+                                      func.get());
   // Add the SIMD variants for mean
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
@@ -312,9 +556,8 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 #endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  static auto default_minmax_options = MinMaxOptions::Defaults();
-  func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(),
-                                                   &min_max_doc, &default_minmax_options);
+  func = std::make_shared<ScalarAggregateFunction>(
+      "min_max", Arity::Unary(), &min_max_doc, &default_scalar_aggregate_options);
   aggregate::AddMinMaxKernels(aggregate::MinMaxInit, {boolean()}, func.get());
   aggregate::AddMinMaxKernels(aggregate::MinMaxInit, NumericTypes(), func.get());
   // Add the SIMD variants for min max
@@ -332,13 +575,27 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   // any
-  func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc);
-  aggregate::AddBasicAggKernels(aggregate::AnyInit, {boolean()}, boolean(), func.get());
+  func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::AnyInit, {boolean()}, boolean(),
+                                      func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   // all
-  func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc);
-  aggregate::AddBasicAggKernels(aggregate::AllInit, {boolean()}, boolean(), func.get());
+  func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::AllInit, {boolean()}, boolean(),
+                                      func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+
+  // index
+  func = std::make_shared<ScalarAggregateFunction>("index", Arity::Unary(), &index_doc);
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, BaseBinaryTypes(), int64(),
+                                func.get());
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, PrimitiveTypes(), int64(),
+                                func.get());
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, TemporalTypes(), int64(),
+                                func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index feeb66a1489..8d3e5a0409d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -25,30 +25,43 @@ namespace aggregate {
 // Sum implementation
 
 template <typename ArrowType>
-struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {};
+struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {
+  explicit SumImplAvx2(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 template <typename ArrowType>
-struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {};
+struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
+  explicit MeanImplAvx2(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
-std::unique_ptr<KernelState> SumInitAvx2(KernelContext* ctx, const KernelInitArgs& args) {
-  SumLikeInit<SumImplAvx2> visitor(ctx, *args.inputs[0].type);
+Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
+  SumLikeInit<SumImplAvx2> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> MeanInitAvx2(KernelContext* ctx,
-                                          const KernelInitArgs& args) {
-  SumLikeInit<MeanImplAvx2> visitor(ctx, *args.inputs[0].type);
+Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
+  SumLikeInit<MeanImplAvx2> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-std::unique_ptr<KernelState> MinMaxInitAvx2(KernelContext* ctx,
-                                            const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
+                                                    const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::AVX2> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const MinMaxOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 522564a8469..4f8ad74a086 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -25,31 +25,43 @@ namespace aggregate {
 // Sum implementation
 
 template <typename ArrowType>
-struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {};
+struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {
+  explicit SumImplAvx512(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 template <typename ArrowType>
-struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {};
+struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
+  explicit MeanImplAvx512(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
-std::unique_ptr<KernelState> SumInitAvx512(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
-  SumLikeInit<SumImplAvx512> visitor(ctx, *args.inputs[0].type);
+Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
+  SumLikeInit<SumImplAvx512> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> MeanInitAvx512(KernelContext* ctx,
-                                            const KernelInitArgs& args) {
-  SumLikeInit<MeanImplAvx512> visitor(ctx, *args.inputs[0].type);
+Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
+                                                    const KernelInitArgs& args) {
+  SumLikeInit<MeanImplAvx512> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-std::unique_ptr<KernelState> MinMaxInitAvx512(KernelContext* ctx,
-                                              const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
+                                                      const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::AVX512> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const MinMaxOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index 5029c1855c0..3d02b273066 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <cmath>
+#include <utility>
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
@@ -58,45 +59,61 @@ struct SumImpl : public ScalarAggregator {
   using SumType = typename FindAccumulatorType<ArrowType>::Type;
   using OutputType = typename TypeTraits<SumType>::ScalarType;
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
-    const auto& data = batch[0].array();
-    this->count = data->length - data->GetNullCount();
-    if (is_boolean_type<ArrowType>::value) {
-      this->sum = static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      const auto& data = batch[0].array();
+      this->count += data->length - data->GetNullCount();
+      if (is_boolean_type<ArrowType>::value) {
+        this->sum +=
+            static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
+      } else {
+        this->sum +=
+            arrow::compute::detail::SumArray<CType, typename SumType::c_type, SimdLevel>(
+                *data);
+      }
     } else {
-      this->sum =
-          arrow::compute::detail::SumArray<CType, typename SumType::c_type>(*data);
+      const auto& data = *batch[0].scalar();
+      this->count += data.is_valid * batch.length;
+      if (data.is_valid) {
+        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
+      }
     }
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->count += other.count;
     this->sum += other.sum;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
-    if (this->count == 0) {
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (this->count < options.min_count) {
       out->value = std::make_shared<OutputType>();
     } else {
       out->value = MakeScalar(this->sum);
     }
+    return Status::OK();
   }
 
   size_t count = 0;
   typename SumType::c_type sum = 0;
+  ScalarAggregateOptions options;
 };
 
 template <typename ArrowType, SimdLevel::type SimdLevel>
 struct MeanImpl : public SumImpl<ArrowType, SimdLevel> {
-  void Finalize(KernelContext*, Datum* out) override {
-    if (this->count == 0) {
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (this->count < options.min_count) {
       out->value = std::make_shared<DoubleScalar>();
     } else {
       const double mean = static_cast<double>(this->sum) / this->count;
       out->value = std::make_shared<DoubleScalar>(mean);
     }
+    return Status::OK();
   }
+  ScalarAggregateOptions options;
 };
 
 template <template <typename> class KernelClass>
@@ -104,8 +121,11 @@ struct SumLikeInit {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
   const DataType& type;
+  const ScalarAggregateOptions& options;
 
-  SumLikeInit(KernelContext* ctx, const DataType& type) : ctx(ctx), type(type) {}
+  SumLikeInit(KernelContext* ctx, const DataType& type,
+              const ScalarAggregateOptions& options)
+      : ctx(ctx), type(type), options(options) {}
 
   Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
 
@@ -114,18 +134,18 @@ struct SumLikeInit {
   }
 
   Status Visit(const BooleanType&) {
-    state.reset(new KernelClass<BooleanType>());
+    state.reset(new KernelClass<BooleanType>(options));
     return Status::OK();
   }
 
   template <typename Type>
   enable_if_number<Type, Status> Visit(const Type&) {
-    state.reset(new KernelClass<Type>());
+    state.reset(new KernelClass<Type>(options));
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(type, this));
     return std::move(state);
   }
 };
@@ -214,21 +234,41 @@ struct MinMaxImpl : public ScalarAggregator {
   using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
   using StateType = MinMaxState<ArrowType, SimdLevel>;
 
-  MinMaxImpl(const std::shared_ptr<DataType>& out_type, const MinMaxOptions& options)
-      : out_type(out_type), options(options) {}
+  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(ArrayType(batch[0].array()));
+    }
+    return ConsumeScalar(*batch[0].scalar());
+  }
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status ConsumeScalar(const Scalar& scalar) {
     StateType local;
+    local.has_nulls = !scalar.is_valid;
+    local.has_values = scalar.is_valid;
 
-    ArrayType arr(batch[0].array());
+    if (local.has_nulls && !options.skip_nulls) {
+      this->state = local;
+      return Status::OK();
+    }
+
+    local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    this->state = local;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArrayType& arr) {
+    StateType local;
 
     const auto null_count = arr.null_count();
     local.has_nulls = null_count > 0;
     local.has_values = (arr.length() - null_count) > 0;
 
-    if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
+    if (local.has_nulls && !options.skip_nulls) {
       this->state = local;
-      return;
+      return Status::OK();
     }
 
     if (local.has_nulls) {
@@ -239,19 +279,20 @@ struct MinMaxImpl : public ScalarAggregator {
       }
     }
     this->state = local;
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->state += other.state;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
 
     std::vector<std::shared_ptr<Scalar>> values;
-    if (!state.has_values ||
-        (state.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL)) {
+    if (!state.has_values || (state.has_nulls && !options.skip_nulls)) {
       // (null, null)
       values = {std::make_shared<ScalarType>(), std::make_shared<ScalarType>()};
     } else {
@@ -259,10 +300,11 @@ struct MinMaxImpl : public ScalarAggregator {
                 std::make_shared<ScalarType>(state.max)};
     }
     out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
   }
 
   std::shared_ptr<DataType> out_type;
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   MinMaxState<ArrowType, SimdLevel> state;
 
  private:
@@ -331,7 +373,10 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
   using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
   using MinMaxImpl<BooleanType, SimdLevel>::options;
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
+      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar()));
+    }
     StateType local;
     ArrayType arr(batch[0].array());
 
@@ -341,9 +386,9 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
 
     local.has_nulls = null_count > 0;
     local.has_values = valid_count > 0;
-    if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
+    if (local.has_nulls && !options.skip_nulls) {
       this->state = local;
-      return;
+      return Status::OK();
     }
 
     const auto true_count = arr.true_count();
@@ -352,6 +397,26 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
     local.min = false_count == 0;
 
     this->state = local;
+    return Status::OK();
+  }
+
+  Status ConsumeScalar(const BooleanScalar& scalar) {
+    StateType local;
+
+    local.has_nulls = !scalar.is_valid;
+    local.has_values = scalar.is_valid;
+    if (local.has_nulls && !options.skip_nulls) {
+      this->state = local;
+      return Status::OK();
+    }
+
+    const int true_count = scalar.is_valid && scalar.value;
+    const int false_count = scalar.is_valid && !scalar.value;
+    local.max = true_count > 0;
+    local.min = false_count == 0;
+
+    this->state = local;
+    return Status::OK();
   }
 };
 
@@ -361,10 +426,11 @@ struct MinMaxInitState {
   KernelContext* ctx;
   const DataType& in_type;
   const std::shared_ptr<DataType>& out_type;
-  const MinMaxOptions& options;
+  const ScalarAggregateOptions& options;
 
   MinMaxInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type, const MinMaxOptions& options)
+                  const std::shared_ptr<DataType>& out_type,
+                  const ScalarAggregateOptions& options)
       : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
 
   Status Visit(const DataType&) {
@@ -386,8 +452,8 @@ struct MinMaxInitState {
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
diff --git a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
index 42be0c36544..39cfeb039a8 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
@@ -501,21 +501,31 @@ SUM_KERNEL_BENCHMARK(SumKernelInt64, Int64Type);
 //
 
 template <typename ArrowType>
-void ModeKernelBench(benchmark::State& state) {
+void ModeKernel(benchmark::State& state, int min, int max) {
   using CType = typename TypeTraits<ArrowType>::CType;
 
   RegressionArgs args(state);
   const int64_t array_size = args.size / sizeof(CType);
   auto rand = random::RandomArrayGenerator(1924);
-  auto array = rand.Numeric<ArrowType>(array_size, -100, 100, args.null_proportion);
+  auto array = rand.Numeric<ArrowType>(array_size, min, max, args.null_proportion);
 
   for (auto _ : state) {
     ABORT_NOT_OK(Mode(array).status());
   }
 }
 
+template <typename ArrowType>
+void ModeKernelNarrow(benchmark::State& state) {
+  ModeKernel<ArrowType>(state, -5000, 8000);  // max - min < 16384
+}
+
+template <>
+void ModeKernelNarrow<Int8Type>(benchmark::State& state) {
+  ModeKernel<Int8Type>(state, -128, 127);
+}
+
 template <>
-void ModeKernelBench<BooleanType>(benchmark::State& state) {
+void ModeKernelNarrow<BooleanType>(benchmark::State& state) {
   RegressionArgs args(state);
   auto rand = random::RandomArrayGenerator(1924);
   auto array = rand.Boolean(args.size * 8, 0.5, args.null_proportion);
@@ -525,19 +535,23 @@ void ModeKernelBench<BooleanType>(benchmark::State& state) {
   }
 }
 
-static void ModeKernelBenchArgs(benchmark::internal::Benchmark* bench) {
-  BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});  // 1M
+template <typename ArrowType>
+void ModeKernelWide(benchmark::State& state) {
+  ModeKernel<ArrowType>(state, -1234567, 7654321);
 }
 
-#define MODE_KERNEL_BENCHMARK(FuncName, Type)                                     \
-  static void FuncName(benchmark::State& state) { ModeKernelBench<Type>(state); } \
-  BENCHMARK(FuncName)->Apply(ModeKernelBenchArgs)
+static void ModeKernelArgs(benchmark::internal::Benchmark* bench) {
+  BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});  // 1M
+}
 
-MODE_KERNEL_BENCHMARK(ModeKernelBoolean, BooleanType);
-MODE_KERNEL_BENCHMARK(ModeKernelInt8, Int8Type);
-MODE_KERNEL_BENCHMARK(ModeKernelInt16, Int16Type);
-MODE_KERNEL_BENCHMARK(ModeKernelInt32, Int32Type);
-MODE_KERNEL_BENCHMARK(ModeKernelInt64, Int64Type);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, BooleanType)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, Int8Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, Int32Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, Int64Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, Int32Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, Int64Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, FloatType)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, DoubleType)->Apply(ModeKernelArgs);
 
 //
 // MinMax
diff --git a/cpp/src/arrow/compute/kernels/aggregate_internal.h b/cpp/src/arrow/compute/kernels/aggregate_internal.h
index 67337f22c5b..3f5ba39d30e 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_internal.h
@@ -50,11 +50,15 @@ struct FindAccumulatorType<I, enable_if_floating_point<I>> {
 };
 
 struct ScalarAggregator : public KernelState {
-  virtual void Consume(KernelContext* ctx, const ExecBatch& batch) = 0;
-  virtual void MergeFrom(KernelContext* ctx, KernelState&& src) = 0;
-  virtual void Finalize(KernelContext* ctx, Datum* out) = 0;
+  virtual Status Consume(KernelContext* ctx, const ExecBatch& batch) = 0;
+  virtual Status MergeFrom(KernelContext* ctx, KernelState&& src) = 0;
+  virtual Status Finalize(KernelContext* ctx, Datum* out) = 0;
 };
 
+// Helper to differentiate between var/std calculation so we can fold
+// kernel implementations together
+enum class VarOrStd : bool { Var, Std };
+
 void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
                   ScalarAggregateFunction* func,
                   SimdLevel::type simd_level = SimdLevel::NONE);
@@ -63,9 +67,15 @@ namespace detail {
 
 using arrow::internal::VisitSetBitRunsVoid;
 
+// SumArray must be parameterized with the SIMD level since it's called both from
+// translation units with and without vectorization. Normally it gets inlined but
+// if not, without the parameter, we'll have multiple definitions of the same
+// symbol and we'll get unexpected results.
+
 // non-recursive pairwise summation for floating points
 // https://en.wikipedia.org/wiki/Pairwise_summation
-template <typename ValueType, typename SumType, typename ValueFunc>
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel,
+          typename ValueFunc>
 enable_if_t<std::is_floating_point<SumType>::value, SumType> SumArray(
     const ArrayData& data, ValueFunc&& func) {
   const int64_t data_size = data.length - data.GetNullCount();
@@ -139,7 +149,8 @@ enable_if_t<std::is_floating_point<SumType>::value, SumType> SumArray(
 }
 
 // naive summation for integers
-template <typename ValueType, typename SumType, typename ValueFunc>
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel,
+          typename ValueFunc>
 enable_if_t<!std::is_floating_point<SumType>::value, SumType> SumArray(
     const ArrayData& data, ValueFunc&& func) {
   SumType sum = 0;
@@ -153,9 +164,9 @@ enable_if_t<!std::is_floating_point<SumType>::value, SumType> SumArray(
   return sum;
 }
 
-template <typename ValueType, typename SumType>
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel>
 SumType SumArray(const ArrayData& data) {
-  return SumArray<ValueType, SumType>(
+  return SumArray<ValueType, SumType, SimdLevel>(
       data, [](ValueType v) { return static_cast<SumType>(v); });
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index 3a60cab2cca..6ad0eeb6456 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -17,13 +17,15 @@
 
 #include <cmath>
 #include <queue>
-#include <unordered_map>
+#include <utility>
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/stl_allocator.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/bit_run_reader.h"
 
 namespace arrow {
 namespace compute {
@@ -31,340 +33,335 @@ namespace internal {
 
 namespace {
 
+using ModeState = OptionsWrapper<ModeOptions>;
+
 constexpr char kModeFieldName[] = "mode";
 constexpr char kCountFieldName[] = "count";
 
-// {value:count} map
-template <typename CType>
-using CounterMap = std::unordered_map<CType, int64_t>;
-
-// map based counter for floating points
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<std::is_floating_point<CType>::value, CounterMap<CType>> CountValuesByMap(
-    const ArrayType& array, int64_t& nan_count) {
-  CounterMap<CType> value_counts_map;
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  nan_count = 0;
-  if (array.length() > array.null_count()) {
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             const auto value = values[pos + i];
-                                             if (std::isnan(value)) {
-                                               ++nan_count;
-                                             } else {
-                                               ++value_counts_map[value];
-                                             }
-                                           }
-                                         });
-  }
+constexpr uint64_t kCountEOF = ~0ULL;
 
-  return value_counts_map;
-}
+template <typename InType, typename CType = typename InType::c_type>
+Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
+                                                  Datum* out) {
+  const auto& mode_type = TypeTraits<InType>::type_singleton();
+  const auto& count_type = int64();
 
-// map base counter for non floating points
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<!std::is_floating_point<CType>::value, CounterMap<CType>> CountValuesByMap(
-    const ArrayType& array) {
-  CounterMap<CType> value_counts_map;
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  if (array.length() > array.null_count()) {
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             ++value_counts_map[values[pos + i]];
-                                           }
-                                         });
-  }
+  auto mode_data = ArrayData::Make(mode_type, /*length=*/n, /*null_count=*/0);
+  mode_data->buffers.resize(2, nullptr);
+  auto count_data = ArrayData::Make(count_type, n, 0);
+  count_data->buffers.resize(2, nullptr);
 
-  return value_counts_map;
-}
+  CType* mode_buffer = nullptr;
+  int64_t* count_buffer = nullptr;
 
-// vector based counter for int8 or integers with small value range
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-CounterMap<CType> CountValuesByVector(const ArrayType& array, CType min, CType max) {
-  const int range = static_cast<int>(max - min);
-  DCHECK(range >= 0 && range < 64 * 1024 * 1024);
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  std::vector<int64_t> value_counts_vector(range + 1);
-  if (array.length() > array.null_count()) {
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             ++value_counts_vector[values[pos + i] - min];
-                                           }
-                                         });
+  if (n > 0) {
+    ARROW_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx->Allocate(n * sizeof(CType)));
+    ARROW_ASSIGN_OR_RAISE(count_data->buffers[1], ctx->Allocate(n * sizeof(int64_t)));
+    mode_buffer = mode_data->template GetMutableValues<CType>(1);
+    count_buffer = count_data->template GetMutableValues<int64_t>(1);
   }
 
-  // Transfer value counts to a map to be consistent with other chunks
-  CounterMap<CType> value_counts_map(range + 1);
-  for (int i = 0; i <= range; ++i) {
-    CType value = static_cast<CType>(i + min);
-    int64_t count = value_counts_vector[i];
-    if (count) {
-      value_counts_map[value] = count;
-    }
-  }
+  const auto& out_type =
+      struct_({field(kModeFieldName, mode_type), field(kCountFieldName, count_type)});
+  *out = Datum(ArrayData::Make(out_type, n, {nullptr}, {mode_data, count_data}, 0));
 
-  return value_counts_map;
+  return std::make_pair(mode_buffer, count_buffer);
 }
 
-// map or vector based counter for int16/32/64 per value range
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-CounterMap<CType> CountValuesByMapOrVector(const ArrayType& array) {
-  // see https://issues.apache.org/jira/browse/ARROW-9873
-  static constexpr int kMinArraySize = 8192 / sizeof(CType);
-  static constexpr int kMaxValueRange = 16384;
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  if ((array.length() - array.null_count()) >= kMinArraySize) {
-    CType min = std::numeric_limits<CType>::max();
-    CType max = std::numeric_limits<CType>::min();
-
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             const auto value = values[pos + i];
-                                             min = std::min(min, value);
-                                             max = std::max(max, value);
-                                           }
-                                         });
-
-    if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
-      return CountValuesByVector(array, min, max);
+// find top-n value:count pairs with minimal heap
+// suboptimal for tiny or large n, possibly okay as we're not in hot path
+template <typename InType, typename Generator>
+Status Finalize(KernelContext* ctx, Datum* out, Generator&& gen) {
+  using CType = typename InType::c_type;
+
+  using ValueCountPair = std::pair<CType, uint64_t>;
+  auto gt = [](const ValueCountPair& lhs, const ValueCountPair& rhs) {
+    const bool rhs_is_nan = rhs.first != rhs.first;  // nan as largest value
+    return lhs.second > rhs.second ||
+           (lhs.second == rhs.second && (lhs.first < rhs.first || rhs_is_nan));
+  };
+
+  std::priority_queue<ValueCountPair, std::vector<ValueCountPair>, decltype(gt)> min_heap(
+      std::move(gt));
+
+  const ModeOptions& options = ModeState::Get(ctx);
+  while (true) {
+    const ValueCountPair& value_count = gen();
+    DCHECK_NE(value_count.second, 0);
+    if (value_count.second == kCountEOF) break;
+    if (static_cast<int64_t>(min_heap.size()) < options.n) {
+      min_heap.push(value_count);
+    } else if (gt(value_count, min_heap.top())) {
+      min_heap.pop();
+      min_heap.push(value_count);
     }
   }
-  return CountValuesByMap(array);
-}
+  const int64_t n = min_heap.size();
 
-// bool
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<is_boolean_type<typename ArrayType::TypeClass>::value, CounterMap<CType>>
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  // we need just count ones and zeros
-  CounterMap<CType> map;
-  if (array.length() > array.null_count()) {
-    map[true] = array.true_count();
-    map[false] = array.length() - array.null_count() - map[true];
+  CType* mode_buffer;
+  int64_t* count_buffer;
+  ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer),
+                        PrepareOutput<InType>(n, ctx, out));
+
+  for (int64_t i = n - 1; i >= 0; --i) {
+    std::tie(mode_buffer[i], count_buffer[i]) = min_heap.top();
+    min_heap.pop();
   }
-  nan_count = 0;
-  return map;
-}
 
-// int8
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<is_integer_type<typename ArrayType::TypeClass>::value && sizeof(CType) == 1,
-            CounterMap<CType>>
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  using Limits = std::numeric_limits<CType>;
-  nan_count = 0;
-  return CountValuesByVector(array, Limits::min(), Limits::max());
+  return Status::OK();
 }
 
-// int16/32/64
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<is_integer_type<typename ArrayType::TypeClass>::value && (sizeof(CType) > 1),
-            CounterMap<CType>>
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  nan_count = 0;
-  return CountValuesByMapOrVector(array);
-}
+// count value occurances for integers with narrow value range
+// O(1) space, O(n) time
+template <typename T>
+struct CountModer {
+  using CType = typename T::c_type;
 
-// float/double
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<(std::is_floating_point<CType>::value), CounterMap<CType>>  // NOLINT format
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  nan_count = 0;
-  return CountValuesByMap(array, nan_count);
-}
+  CType min;
+  std::vector<uint64_t> counts;
 
-template <typename ArrowType>
-struct ModeState {
-  using ThisType = ModeState<ArrowType>;
-  using CType = typename ArrowType::c_type;
-
-  void MergeFrom(ThisType&& state) {
-    if (this->value_counts.empty()) {
-      this->value_counts = std::move(state.value_counts);
-    } else {
-      for (const auto& value_count : state.value_counts) {
-        auto value = value_count.first;
-        auto count = value_count.second;
-        this->value_counts[value] += count;
-      }
-    }
-    if (is_floating_type<ArrowType>::value) {
-      this->nan_count += state.nan_count;
-    }
+  CountModer(CType min, CType max) {
+    uint32_t value_range = static_cast<uint32_t>(max - min) + 1;
+    DCHECK_LT(value_range, 1 << 20);
+    this->min = min;
+    this->counts.resize(value_range, 0);
   }
 
-  // find top-n value/count pairs with min-heap (priority queue with '>' comparator)
-  void Finalize(CType* modes, int64_t* counts, const int64_t n) {
-    DCHECK(n >= 1 && n <= this->DistinctValues());
-
-    // mode 'greater than' comparator: larger count or same count with smaller value
-    using ValueCountPair = std::pair<CType, int64_t>;
-    auto mode_gt = [](const ValueCountPair& lhs, const ValueCountPair& rhs) {
-      const bool rhs_is_nan = rhs.first != rhs.first;  // nan as largest value
-      return lhs.second > rhs.second ||
-             (lhs.second == rhs.second && (lhs.first < rhs.first || rhs_is_nan));
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // count values in all chunks, ignore nulls
+    const Datum& datum = batch[0];
+    CountValues<CType>(this->counts.data(), datum, this->min);
+
+    // generator to emit next value:count pair
+    int index = 0;
+    auto gen = [&]() {
+      for (; index < static_cast<int>(counts.size()); ++index) {
+        if (counts[index] != 0) {
+          auto value_count =
+              std::make_pair(static_cast<CType>(index + this->min), counts[index]);
+          ++index;
+          return value_count;
+        }
+      }
+      return std::pair<CType, uint64_t>(0, kCountEOF);
     };
 
-    // initialize min-heap with first n modes
-    std::vector<ValueCountPair> vector(n);
-    // push nan if exists
-    const bool has_nan = is_floating_type<ArrowType>::value && this->nan_count > 0;
-    if (has_nan) {
-      vector[0] = std::make_pair(static_cast<CType>(NAN), this->nan_count);
-    }
-    // push n or n-1 modes
-    auto it = this->value_counts.cbegin();
-    for (int i = has_nan; i < n; ++i) {
-      vector[i] = *it++;
-    }
-    // turn to min-heap
-    std::priority_queue<ValueCountPair, std::vector<ValueCountPair>, decltype(mode_gt)>
-        min_heap(std::move(mode_gt), std::move(vector));
-
-    // iterate and insert modes into min-heap
-    // - mode < heap top: ignore mode
-    // - mode > heap top: discard heap top, insert mode
-    for (; it != this->value_counts.cend(); ++it) {
-      if (mode_gt(*it, min_heap.top())) {
-        min_heap.pop();
-        min_heap.push(*it);
+    return Finalize<T>(ctx, out, std::move(gen));
+  }
+};
+
+// booleans can be handled more straightforward
+template <>
+struct CountModer<BooleanType> {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    int64_t counts[2]{};
+
+    const Datum& datum = batch[0];
+    for (const auto& array : datum.chunks()) {
+      if (array->length() > array->null_count()) {
+        const int64_t true_count =
+            arrow::internal::checked_pointer_cast<BooleanArray>(array)->true_count();
+        const int64_t false_count = array->length() - array->null_count() - true_count;
+        counts[true] += true_count;
+        counts[false] += false_count;
       }
     }
 
-    // pop modes from min-heap and insert into output array (in reverse order)
-    DCHECK_EQ(min_heap.size(), static_cast<size_t>(n));
-    for (int64_t i = n - 1; i >= 0; --i) {
-      std::tie(modes[i], counts[i]) = min_heap.top();
-      min_heap.pop();
+    const ModeOptions& options = ModeState::Get(ctx);
+    const int64_t distinct_values = (counts[0] != 0) + (counts[1] != 0);
+    const int64_t n = std::min(options.n, distinct_values);
+
+    bool* mode_buffer;
+    int64_t* count_buffer;
+    ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer),
+                          PrepareOutput<BooleanType>(n, ctx, out));
+
+    if (n >= 1) {
+      const bool index = counts[1] > counts[0];
+      mode_buffer[0] = index;
+      count_buffer[0] = counts[index];
+      if (n == 2) {
+        mode_buffer[1] = !index;
+        count_buffer[1] = counts[!index];
+      }
     }
-  }
 
-  int64_t DistinctValues() const {
-    return this->value_counts.size() +
-           (is_floating_type<ArrowType>::value && this->nan_count > 0);
+    return Status::OK();
   }
-
-  int64_t nan_count = 0;  // only make sense to floating types
-  CounterMap<CType> value_counts;
 };
 
-template <typename ArrowType>
-struct ModeImpl : public ScalarAggregator {
-  using ThisType = ModeImpl<ArrowType>;
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using CType = typename ArrowType::c_type;
+// copy and sort approach for floating points or integers with wide value range
+// O(n) space, O(nlogn) time
+template <typename T>
+struct SortModer {
+  using CType = typename T::c_type;
+  using Allocator = arrow::stl::allocator<CType>;
+
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // copy all chunks to a buffer, ignore nulls and nans
+    std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool()));
+
+    uint64_t nan_count = 0;
+    const Datum& datum = batch[0];
+    const int64_t in_length = datum.length() - datum.null_count();
+    if (in_length > 0) {
+      in_buffer.resize(in_length);
+      CopyNonNullValues(datum, in_buffer.data());
+
+      // drop nan
+      if (is_floating_type<T>::value) {
+        const auto& it = std::remove_if(in_buffer.begin(), in_buffer.end(),
+                                        [](CType v) { return v != v; });
+        nan_count = in_buffer.end() - it;
+        in_buffer.resize(it - in_buffer.begin());
+      }
+    }
 
-  ModeImpl(const std::shared_ptr<DataType>& out_type, const ModeOptions& options)
-      : out_type(out_type), options(options) {}
+    // sort the input data to count same values
+    std::sort(in_buffer.begin(), in_buffer.end());
+
+    // generator to emit next value:count pair
+    auto it = in_buffer.cbegin();
+    auto gen = [&]() {
+      if (ARROW_PREDICT_FALSE(it == in_buffer.cend())) {
+        // handle NAN at last
+        if (nan_count > 0) {
+          auto value_count = std::make_pair(static_cast<CType>(NAN), nan_count);
+          nan_count = 0;
+          return value_count;
+        }
+        return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+      }
+      // count same values
+      const CType value = *it;
+      uint64_t count = 0;
+      do {
+        ++it;
+        ++count;
+      } while (it != in_buffer.cend() && *it == value);
+      return std::make_pair(value, count);
+    };
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
-    ArrayType array(batch[0].array());
-    this->state.value_counts = CountValues(array, this->state.nan_count);
+    return Finalize<T>(ctx, out, std::move(gen));
   }
+};
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
-    auto& other = checked_cast<ThisType&>(src);
-    this->state.MergeFrom(std::move(other.state));
-  }
+// pick counting or sorting approach per integers value range
+template <typename T>
+struct CountOrSortModer {
+  using CType = typename T::c_type;
 
-  static std::shared_ptr<ArrayData> MakeArrayData(
-      const std::shared_ptr<DataType>& data_type, int64_t n) {
-    auto data = ArrayData::Make(data_type, n, 0);
-    data->buffers.resize(2);
-    data->buffers[0] = nullptr;
-    data->buffers[1] = nullptr;
-    return data;
-  }
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // cross point to benefit from counting approach
+    // about 2x improvement for int32/64 from micro-benchmarking
+    static constexpr int kMinArraySize = 8192;
+    static constexpr int kMaxValueRange = 32768;
 
-  void Finalize(KernelContext* ctx, Datum* out) override {
-    const auto& mode_type = TypeTraits<ArrowType>::type_singleton();
-    const auto& count_type = int64();
-    const auto& out_type =
-        struct_({field(kModeFieldName, mode_type), field(kCountFieldName, count_type)});
-
-    int64_t n = this->options.n;
-    if (n > state.DistinctValues()) {
-      n = state.DistinctValues();
-    } else if (n < 0) {
-      n = 0;
-    }
+    const Datum& datum = batch[0];
+    if (datum.length() - datum.null_count() >= kMinArraySize) {
+      CType min, max;
+      std::tie(min, max) = GetMinMax<CType>(datum);
 
-    auto mode_data = this->MakeArrayData(mode_type, n);
-    auto count_data = this->MakeArrayData(count_type, n);
-    if (n > 0) {
-      KERNEL_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx,
-                             ctx->Allocate(n * sizeof(CType)));
-      KERNEL_ASSIGN_OR_RAISE(count_data->buffers[1], ctx,
-                             ctx->Allocate(n * sizeof(int64_t)));
-      CType* mode_buffer = mode_data->template GetMutableValues<CType>(1);
-      int64_t* count_buffer = count_data->template GetMutableValues<int64_t>(1);
-      this->state.Finalize(mode_buffer, count_buffer, n);
+      if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
+        return CountModer<T>(min, max).Exec(ctx, batch, out);
+      }
     }
 
-    *out = Datum(ArrayData::Make(out_type, n, {nullptr}, {mode_data, count_data}, 0));
+    return SortModer<T>().Exec(ctx, batch, out);
   }
+};
+
+template <typename InType, typename Enable = void>
+struct Moder;
 
-  std::shared_ptr<DataType> out_type;
-  ModeState<ArrowType> state;
-  ModeOptions options;
+template <>
+struct Moder<Int8Type> {
+  CountModer<Int8Type> impl;
+  Moder() : impl(-128, 127) {}
 };
 
-struct ModeInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  const std::shared_ptr<DataType>& out_type;
-  const ModeOptions& options;
+template <>
+struct Moder<UInt8Type> {
+  CountModer<UInt8Type> impl;
+  Moder() : impl(0, 255) {}
+};
+
+template <>
+struct Moder<BooleanType> {
+  CountModer<BooleanType> impl;
+};
 
-  ModeInitState(KernelContext* ctx, const DataType& in_type,
-                const std::shared_ptr<DataType>& out_type, const ModeOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+template <typename InType>
+struct Moder<InType, enable_if_t<(is_integer_type<InType>::value &&
+                                  (sizeof(typename InType::c_type) > 1))>> {
+  CountOrSortModer<InType> impl;
+};
 
-  Status Visit(const DataType&) { return Status::NotImplemented("No mode implemented"); }
+template <typename InType>
+struct Moder<InType, enable_if_t<is_floating_type<InType>::value>> {
+  SortModer<InType> impl;
+};
 
-  Status Visit(const HalfFloatType&) {
-    return Status::NotImplemented("No mode implemented");
+template <typename T>
+Status ScalarMode(KernelContext* ctx, const Scalar& scalar, Datum* out) {
+  using CType = typename T::c_type;
+  if (scalar.is_valid) {
+    bool called = false;
+    return Finalize<T>(ctx, out, [&]() {
+      if (!called) {
+        called = true;
+        return std::pair<CType, uint64_t>(UnboxScalar<T>::Unbox(scalar), 1);
+      }
+      return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+    });
   }
+  return Finalize<T>(ctx, out, []() {
+    return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+  });
+}
 
-  template <typename Type>
-  enable_if_t<is_number_type<Type>::value || is_boolean_type<Type>::value, Status> Visit(
-      const Type&) {
-    state.reset(new ModeImpl<Type>(out_type, options));
-    return Status::OK();
-  }
+template <typename _, typename InType>
+struct ModeExecutor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (ctx->state() == nullptr) {
+      return Status::Invalid("Mode requires ModeOptions");
+    }
+    const ModeOptions& options = ModeState::Get(ctx);
+    if (options.n <= 0) {
+      return Status::Invalid("ModeOption::n must be strictly positive");
+    }
+
+    if (batch[0].is_scalar()) {
+      return ScalarMode<InType>(ctx, *batch[0].scalar(), out);
+    }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
-    return std::move(state);
+    return Moder<InType>().impl.Exec(ctx, batch, out);
   }
 };
 
-std::unique_ptr<KernelState> ModeInit(KernelContext* ctx, const KernelInitArgs& args) {
-  ModeInitState visitor(ctx, *args.inputs[0].type,
-                        args.kernel->signature->out_type().type(),
-                        static_cast<const ModeOptions&>(*args.options));
-  return visitor.Create();
+VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type) {
+  VectorKernel kernel;
+  kernel.init = ModeState::Init;
+  kernel.can_execute_chunkwise = false;
+  kernel.output_chunked = false;
+  auto out_type =
+      struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
+  kernel.signature =
+      KernelSignature::Make({InputType(in_type)}, ValueDescr::Array(out_type));
+  return kernel;
 }
 
-void AddModeKernels(KernelInit init, const std::vector<std::shared_ptr<DataType>>& types,
-                    ScalarAggregateFunction* func) {
-  for (const auto& ty : types) {
-    // array[T] -> array[struct<mode: T, count: int64_t>]
-    auto out_ty = struct_({field(kModeFieldName, ty), field(kCountFieldName, int64())});
-    auto sig = KernelSignature::Make({InputType::Array(ty)}, ValueDescr::Array(out_ty));
-    AddAggKernel(std::move(sig), init, func);
+void AddBooleanModeKernel(VectorFunction* func) {
+  VectorKernel kernel = NewModeKernel(boolean());
+  kernel.exec = ModeExecutor<StructType, BooleanType>::Exec;
+  DCHECK_OK(func->AddKernel(kernel));
+}
+
+void AddNumericModeKernels(VectorFunction* func) {
+  for (const auto& type : NumericTypes()) {
+    VectorKernel kernel = NewModeKernel(type);
+    kernel.exec = GenerateNumeric<ModeExecutor, StructType>(*type);
+    DCHECK_OK(func->AddKernel(kernel));
   }
 }
 
@@ -379,19 +376,15 @@ const FunctionDoc mode_doc{
     {"array"},
     "ModeOptions"};
 
-std::shared_ptr<ScalarAggregateFunction> AddModeAggKernels() {
-  static auto default_mode_options = ModeOptions::Defaults();
-  auto func = std::make_shared<ScalarAggregateFunction>("mode", Arity::Unary(), &mode_doc,
-                                                        &default_mode_options);
-  AddModeKernels(ModeInit, {boolean()}, func.get());
-  AddModeKernels(ModeInit, NumericTypes(), func.get());
-  return func;
-}
-
 }  // namespace
 
 void RegisterScalarAggregateMode(FunctionRegistry* registry) {
-  DCHECK_OK(registry->AddFunction(AddModeAggKernels()));
+  static auto default_options = ModeOptions::Defaults();
+  auto func = std::make_shared<VectorFunction>("mode", Arity::Unary(), &mode_doc,
+                                               &default_options);
+  AddBooleanModeKernel(func.get());
+  AddNumericModeKernels(func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 705ecd4f9d5..7d2ffe0770c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -20,8 +20,8 @@
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
 #include "arrow/stl_allocator.h"
-#include "arrow/util/bit_run_reader.h"
 
 namespace arrow {
 namespace compute {
@@ -29,9 +29,6 @@ namespace internal {
 
 namespace {
 
-using arrow::internal::checked_pointer_cast;
-using arrow::internal::VisitSetBitRunsVoid;
-
 using QuantileState = internal::OptionsWrapper<QuantileOptions>;
 
 // output is at some input data point, not interpolated
@@ -80,7 +77,7 @@ struct SortQuantiler {
   using CType = typename InType::c_type;
   using Allocator = arrow::stl::allocator<CType>;
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const QuantileOptions& options = QuantileState::Get(ctx);
 
     // copy all chunks to a buffer, ignore nulls and nans
@@ -90,12 +87,7 @@ struct SortQuantiler {
     const int64_t in_length = datum.length() - datum.null_count();
     if (in_length > 0) {
       in_buffer.resize(in_length);
-
-      int64_t index = 0;
-      for (const auto& array : datum.chunks()) {
-        index += CopyArray(in_buffer.data() + index, *array);
-      }
-      DCHECK_EQ(index, in_length);
+      CopyNonNullValues(datum, in_buffer.data());
 
       // drop nan
       if (is_floating_type<InType>::value) {
@@ -119,9 +111,8 @@ struct SortQuantiler {
 
     // calculate quantiles
     if (out_length > 0) {
-      const auto out_bit_width = checked_pointer_cast<NumberType>(out_type)->bit_width();
-      KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * out_bit_width / 8));
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
 
       // find quantiles in descending order
       std::vector<int64_t> q_indices(out_length);
@@ -152,22 +143,7 @@ struct SortQuantiler {
     }
 
     *out = Datum(std::move(out_data));
-  }
-
-  int64_t CopyArray(CType* buffer, const Array& array) {
-    const int64_t n = array.length() - array.null_count();
-    if (n > 0) {
-      int64_t index = 0;
-      const ArrayData& data = *array.data();
-      const CType* values = data.GetValues<CType>(1);
-      VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                          [&](int64_t pos, int64_t len) {
-                            memcpy(buffer + index, values + pos, len * sizeof(CType));
-                            index += len;
-                          });
-      DCHECK_EQ(index, n);
-    }
-    return n;
+    return Status::OK();
   }
 
   // return quantile located exactly at some input data point
@@ -248,28 +224,15 @@ struct CountQuantiler {
     uint32_t value_range = static_cast<uint32_t>(max - min) + 1;
     DCHECK_LT(value_range, 1 << 30);
     this->min = min;
-    this->counts.resize(value_range);
+    this->counts.resize(value_range, 0);
   }
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const QuantileOptions& options = QuantileState::Get(ctx);
 
     // count values in all chunks, ignore nulls
     const Datum& datum = batch[0];
-    const int64_t in_length = datum.length() - datum.null_count();
-    if (in_length > 0) {
-      for (auto& c : this->counts) c = 0;
-      for (const auto& array : datum.chunks()) {
-        const ArrayData& data = *array->data();
-        const CType* values = data.GetValues<CType>(1);
-        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                            [&](int64_t pos, int64_t len) {
-                              for (int64_t i = 0; i < len; ++i) {
-                                ++this->counts[values[pos + i] - this->min];
-                              }
-                            });
-      }
-    }
+    int64_t in_length = CountValues<CType>(this->counts.data(), datum, this->min);
 
     // prepare out array
     int64_t out_length = options.q.size();
@@ -285,9 +248,8 @@ struct CountQuantiler {
 
     // calculate quantiles
     if (out_length > 0) {
-      const auto out_bit_width = checked_pointer_cast<NumberType>(out_type)->bit_width();
-      KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * out_bit_width / 8));
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
 
       // find quantiles in ascending order
       std::vector<int64_t> q_indices(out_length);
@@ -316,6 +278,7 @@ struct CountQuantiler {
     }
 
     *out = Datum(std::move(out_data));
+    return Status::OK();
   }
 
   // return quantile located exactly at some input data point
@@ -380,7 +343,7 @@ template <typename InType>
 struct CountOrSortQuantiler {
   using CType = typename InType::c_type;
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // cross point to benefit from histogram approach
     // parameters estimated from ad-hoc benchmarks manually
     static constexpr int kMinArraySize = 65536;
@@ -388,28 +351,15 @@ struct CountOrSortQuantiler {
 
     const Datum& datum = batch[0];
     if (datum.length() - datum.null_count() >= kMinArraySize) {
-      CType min = std::numeric_limits<CType>::max();
-      CType max = std::numeric_limits<CType>::min();
-
-      for (const auto& array : datum.chunks()) {
-        const ArrayData& data = *array->data();
-        const CType* values = data.GetValues<CType>(1);
-        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                            [&](int64_t pos, int64_t len) {
-                              for (int64_t i = 0; i < len; ++i) {
-                                min = std::min(min, values[pos + i]);
-                                max = std::max(max, values[pos + i]);
-                              }
-                            });
-      }
+      CType min, max;
+      std::tie(min, max) = GetMinMax<CType>(datum);
 
       if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
-        CountQuantiler<InType>(min, max).Exec(ctx, batch, out);
-        return;
+        return CountQuantiler<InType>(min, max).Exec(ctx, batch, out);
       }
     }
 
-    SortQuantiler<InType>().Exec(ctx, batch, out);
+    return SortQuantiler<InType>().Exec(ctx, batch, out);
   }
 };
 
@@ -439,27 +389,58 @@ struct ExactQuantiler<InType, enable_if_t<is_floating_type<InType>::value>> {
   SortQuantiler<InType> impl;
 };
 
+template <typename T>
+Status ScalarQuantile(KernelContext* ctx, const QuantileOptions& options,
+                      const Scalar& scalar, Datum* out) {
+  using CType = typename T::c_type;
+  ArrayData* output = out->mutable_array();
+  if (!scalar.is_valid) {
+    output->length = 0;
+    output->null_count = 0;
+    return Status::OK();
+  }
+  auto out_type = IsDataPoint(options) ? scalar.type : float64();
+  output->length = options.q.size();
+  output->null_count = 0;
+  ARROW_ASSIGN_OR_RAISE(
+      output->buffers[1],
+      ctx->Allocate(output->length * BitUtil::BytesForBits(GetBitWidth(*out_type))));
+  if (IsDataPoint(options)) {
+    CType* out_buffer = output->template GetMutableValues<CType>(1);
+    for (int64_t i = 0; i < output->length; i++) {
+      out_buffer[i] = UnboxScalar<T>::Unbox(scalar);
+    }
+  } else {
+    double* out_buffer = output->template GetMutableValues<double>(1);
+    for (int64_t i = 0; i < output->length; i++) {
+      out_buffer[i] = static_cast<double>(UnboxScalar<T>::Unbox(scalar));
+    }
+  }
+  return Status::OK();
+}
+
 template <typename _, typename InType>
 struct QuantileExecutor {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (ctx->state() == nullptr) {
-      ctx->SetStatus(Status::Invalid("Quantile requires QuantileOptions"));
-      return;
+      return Status::Invalid("Quantile requires QuantileOptions");
     }
 
     const QuantileOptions& options = QuantileState::Get(ctx);
     if (options.q.empty()) {
-      ctx->SetStatus(Status::Invalid("Requires quantile argument"));
-      return;
+      return Status::Invalid("Requires quantile argument");
     }
     for (double q : options.q) {
       if (q < 0 || q > 1) {
-        ctx->SetStatus(Status::Invalid("Quantile must be between 0 and 1"));
-        return;
+        return Status::Invalid("Quantile must be between 0 and 1");
       }
     }
 
-    ExactQuantiler<InType>().impl.Exec(ctx, batch, out);
+    if (batch[0].is_scalar()) {
+      return ScalarQuantile<InType>(ctx, options, *batch[0].scalar(), out);
+    }
+
+    return ExactQuantiler<InType>().impl.Exec(ctx, batch, out);
   }
 };
 
@@ -480,8 +461,7 @@ void AddQuantileKernels(VectorFunction* func) {
   base.output_chunked = false;
 
   for (const auto& ty : NumericTypes()) {
-    base.signature =
-        KernelSignature::Make({InputType::Array(ty)}, OutputType(ResolveOutput));
+    base.signature = KernelSignature::Make({InputType(ty)}, OutputType(ResolveOutput));
     // output type is determined at runtime, set template argument to nulltype
     base.exec = GenerateNumeric<QuantileExecutor, NullType>(*ty);
     DCHECK_OK(func->AddKernel(base));
diff --git a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index fc8f43b0ae2..4c261604c85 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -39,35 +39,44 @@ struct TDigestImpl : public ScalarAggregator {
   explicit TDigestImpl(const TDigestOptions& options)
       : q{options.q}, tdigest{options.delta, options.buffer_size} {}
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
-    const ArrayData& data = *batch[0].array();
-    const CType* values = data.GetValues<CType>(1);
-
-    if (data.length > data.GetNullCount()) {
-      VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                          [&](int64_t pos, int64_t len) {
-                            for (int64_t i = 0; i < len; ++i) {
-                              this->tdigest.NanAdd(values[pos + i]);
-                            }
-                          });
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      const ArrayData& data = *batch[0].array();
+      const CType* values = data.GetValues<CType>(1);
+
+      if (data.length > data.GetNullCount()) {
+        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                            [&](int64_t pos, int64_t len) {
+                              for (int64_t i = 0; i < len; ++i) {
+                                this->tdigest.NanAdd(values[pos + i]);
+                              }
+                            });
+      }
+    } else {
+      const CType value = UnboxScalar<ArrowType>::Unbox(*batch[0].scalar());
+      if (batch[0].scalar()->is_valid) {
+        this->tdigest.NanAdd(value);
+      }
     }
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     auto& other = checked_cast<ThisType&>(src);
     std::vector<TDigest> other_tdigest;
     other_tdigest.push_back(std::move(other.tdigest));
     this->tdigest.Merge(&other_tdigest);
+    return Status::OK();
   }
 
-  void Finalize(KernelContext* ctx, Datum* out) override {
+  Status Finalize(KernelContext* ctx, Datum* out) override {
     const int64_t out_length = this->tdigest.is_empty() ? 0 : this->q.size();
     auto out_data = ArrayData::Make(float64(), out_length, 0);
     out_data->buffers.resize(2, nullptr);
 
     if (out_length > 0) {
-      KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * sizeof(double)));
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * sizeof(double)));
       double* out_buffer = out_data->template GetMutableValues<double>(1);
       for (int64_t i = 0; i < out_length; ++i) {
         out_buffer[i] = this->tdigest.Quantile(this->q[i]);
@@ -75,6 +84,7 @@ struct TDigestImpl : public ScalarAggregator {
     }
 
     *out = Datum(std::move(out_data));
+    return Status::OK();
   }
 
   const std::vector<double>& q;
@@ -105,13 +115,14 @@ struct TDigestInitState {
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
 
-std::unique_ptr<KernelState> TDigestInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> TDigestInit(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
   TDigestInitState visitor(ctx, *args.inputs[0].type,
                            static_cast<const TDigestOptions&>(*args.options));
   return visitor.Create();
@@ -121,7 +132,7 @@ void AddTDigestKernels(KernelInit init,
                        const std::vector<std::shared_ptr<DataType>>& types,
                        ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
-    auto sig = KernelSignature::Make({InputType::Array(ty)}, float64());
+    auto sig = KernelSignature::Make({InputType(ty)}, float64());
     AddAggKernel(std::move(sig), init, func);
   }
 }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 22e7f512e97..7d3c3d7a908 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -41,6 +41,7 @@
 
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/logging.h"
 
@@ -103,72 +104,130 @@ static Datum NaiveSum(const Array& array) {
   return Datum(std::make_shared<SumScalarType>(result.first));
 }
 
-template <typename ArrowType>
-void ValidateSum(const Array& input, Datum expected) {
-  using OutputType = typename FindAccumulatorType<ArrowType>::Type;
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input));
-  DatumEqual<OutputType>::EnsureEqual(result, expected);
-}
-
-template <typename ArrowType>
-void ValidateSum(const std::shared_ptr<ChunkedArray>& input, Datum expected) {
-  using OutputType = typename FindAccumulatorType<ArrowType>::Type;
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input));
-  DatumEqual<OutputType>::EnsureEqual(result, expected);
+void ValidateSum(
+    const Datum input, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
+  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input, options));
+  AssertDatumsApproxEqual(expected, result, /*verbose=*/true);
 }
 
 template <typename ArrowType>
-void ValidateSum(const char* json, Datum expected) {
+void ValidateSum(
+    const char* json, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateSum<ArrowType>(*array, expected);
+  ValidateSum(*array, expected, options);
 }
 
 template <typename ArrowType>
-void ValidateSum(const std::vector<std::string>& json, Datum expected) {
+void ValidateSum(
+    const std::vector<std::string>& json, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ChunkedArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateSum<ArrowType>(array, expected);
+  ValidateSum(array, expected, options);
 }
 
 template <typename ArrowType>
-void ValidateSum(const Array& array) {
-  ValidateSum<ArrowType>(array, NaiveSum<ArrowType>(array));
+void ValidateSum(const Array& array, const ScalarAggregateOptions& options =
+                                         ScalarAggregateOptions::Defaults()) {
+  ValidateSum(array, NaiveSum<ArrowType>(array), options);
 }
 
-using UnaryOp = Result<Datum>(const Datum&, ExecContext*);
+using UnaryOp = Result<Datum>(const Datum&, const ScalarAggregateOptions&, ExecContext*);
 
-template <UnaryOp& Op, typename ScalarType>
+template <UnaryOp& Op, typename ScalarAggregateOptions, typename ScalarType>
 void ValidateBooleanAgg(const std::string& json,
-                        const std::shared_ptr<ScalarType>& expected) {
+                        const std::shared_ptr<ScalarType>& expected,
+                        const ScalarAggregateOptions& options) {
   auto array = ArrayFromJSON(boolean(), json);
-  auto exp = Datum(expected);
-  ASSERT_OK_AND_ASSIGN(Datum result, Op(array, nullptr));
-  ASSERT_TRUE(result.Equals(exp));
+  ASSERT_OK_AND_ASSIGN(Datum result, Op(array, options, nullptr));
+
+  const auto& exp = Datum(expected);
+  const auto& res = checked_pointer_cast<ScalarType>(result.scalar());
+  if (!(std::isnan((double)res->value) && std::isnan((double)expected->value))) {
+    ASSERT_TRUE(result.Equals(exp));
+  }
 }
 
 TEST(TestBooleanAggregation, Sum) {
-  ValidateBooleanAgg<Sum>("[]", std::make_shared<UInt64Scalar>());
-  ValidateBooleanAgg<Sum>("[null]", std::make_shared<UInt64Scalar>());
-  ValidateBooleanAgg<Sum>("[null, false]", std::make_shared<UInt64Scalar>(0));
-  ValidateBooleanAgg<Sum>("[true]", std::make_shared<UInt64Scalar>(1));
-  ValidateBooleanAgg<Sum>("[true, false, true]", std::make_shared<UInt64Scalar>(2));
+  const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults();
+  ValidateBooleanAgg<Sum>("[]", std::make_shared<UInt64Scalar>(), options);
+  ValidateBooleanAgg<Sum>("[null]", std::make_shared<UInt64Scalar>(), options);
+  ValidateBooleanAgg<Sum>("[null, false]", std::make_shared<UInt64Scalar>(0), options);
+  ValidateBooleanAgg<Sum>("[true]", std::make_shared<UInt64Scalar>(1), options);
+  ValidateBooleanAgg<Sum>("[true, false, true]", std::make_shared<UInt64Scalar>(2),
+                          options);
   ValidateBooleanAgg<Sum>("[true, false, true, true, null]",
-                          std::make_shared<UInt64Scalar>(3));
+                          std::make_shared<UInt64Scalar>(3), options);
+
+  const ScalarAggregateOptions& options_min_count_zero =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  ValidateBooleanAgg<Sum>("[]", std::make_shared<UInt64Scalar>(0),
+                          options_min_count_zero);
+  ValidateBooleanAgg<Sum>("[null]", std::make_shared<UInt64Scalar>(0),
+                          options_min_count_zero);
+
+  const char* json = "[true, null, false, null]";
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(),
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(),
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+
+  EXPECT_THAT(Sum(MakeScalar(true)),
+              ResultWith(Datum(std::make_shared<UInt64Scalar>(1))));
+  EXPECT_THAT(Sum(MakeScalar(false)),
+              ResultWith(Datum(std::make_shared<UInt64Scalar>(0))));
+  EXPECT_THAT(Sum(MakeNullScalar(boolean())),
+              ResultWith(Datum(MakeNullScalar(uint64()))));
 }
 
 TEST(TestBooleanAggregation, Mean) {
-  ValidateBooleanAgg<Mean>("[]", std::make_shared<DoubleScalar>());
-  ValidateBooleanAgg<Mean>("[null]", std::make_shared<DoubleScalar>());
-  ValidateBooleanAgg<Mean>("[null, false]", std::make_shared<DoubleScalar>(0));
-  ValidateBooleanAgg<Mean>("[true]", std::make_shared<DoubleScalar>(1));
+  const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults();
+  ValidateBooleanAgg<Mean>("[]", std::make_shared<DoubleScalar>(), options);
+  ValidateBooleanAgg<Mean>("[null]", std::make_shared<DoubleScalar>(), options);
+  ValidateBooleanAgg<Mean>("[null, false]", std::make_shared<DoubleScalar>(0), options);
+  ValidateBooleanAgg<Mean>("[true]", std::make_shared<DoubleScalar>(1), options);
   ValidateBooleanAgg<Mean>("[true, false, true, false]",
-                           std::make_shared<DoubleScalar>(0.5));
-  ValidateBooleanAgg<Mean>("[true, null]", std::make_shared<DoubleScalar>(1));
+                           std::make_shared<DoubleScalar>(0.5), options);
+  ValidateBooleanAgg<Mean>("[true, null]", std::make_shared<DoubleScalar>(1), options);
   ValidateBooleanAgg<Mean>("[true, null, false, true, true]",
-                           std::make_shared<DoubleScalar>(0.75));
+                           std::make_shared<DoubleScalar>(0.75), options);
   ValidateBooleanAgg<Mean>("[true, null, false, false, false]",
-                           std::make_shared<DoubleScalar>(0.25));
+                           std::make_shared<DoubleScalar>(0.25), options);
+
+  const ScalarAggregateOptions& options_min_count_zero =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  ValidateBooleanAgg<Mean>("[]", std::make_shared<DoubleScalar>(NAN),
+                           options_min_count_zero);
+  ValidateBooleanAgg<Mean>("[null]", std::make_shared<DoubleScalar>(NAN),
+                           options_min_count_zero);
+
+  const char* json = "[true, null, false, null]";
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(),
+                           ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(),
+                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+
+  EXPECT_THAT(Mean(MakeScalar(true)), ResultWith(Datum(MakeScalar(1.0))));
+  EXPECT_THAT(Mean(MakeScalar(false)), ResultWith(Datum(MakeScalar(0.0))));
+  EXPECT_THAT(Mean(MakeNullScalar(boolean())),
+              ResultWith(Datum(MakeNullScalar(float64()))));
 }
 
 template <typename ArrowType>
@@ -178,6 +237,7 @@ TYPED_TEST_SUITE(TestNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestNumericSumKernel, SimpleSum) {
   using SumType = typename FindAccumulatorType<TypeParam>::Type;
   using ScalarType = typename TypeTraits<SumType>::ScalarType;
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
   using T = typename TypeParam::c_type;
 
   ValidateSum<TypeParam>("[]", Datum(std::make_shared<ScalarType>()));
@@ -199,13 +259,71 @@ TYPED_TEST(TestNumericSumKernel, SimpleSum) {
   ValidateSum<TypeParam>(chunks,
                          Datum(std::make_shared<ScalarType>(static_cast<T>(5 * 6 / 2))));
 
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+
+  ValidateSum<TypeParam>("[]", Datum(std::make_shared<ScalarType>(static_cast<T>(0))),
+                         options);
+
+  ValidateSum<TypeParam>("[null]", Datum(std::make_shared<ScalarType>(static_cast<T>(0))),
+                         options);
+
   chunks = {};
-  ValidateSum<TypeParam>(chunks,
-                         Datum(std::make_shared<ScalarType>()));  // null
+  ValidateSum<TypeParam>(chunks, Datum(std::make_shared<ScalarType>(static_cast<T>(0))),
+                         options);
 
   const T expected_result = static_cast<T>(14);
   ValidateSum<TypeParam>("[1, null, 3, null, 3, null, 7]",
-                         Datum(std::make_shared<ScalarType>(expected_result)));
+                         Datum(std::make_shared<ScalarType>(expected_result)), options);
+
+  EXPECT_THAT(Sum(Datum(std::make_shared<InputScalarType>(static_cast<T>(5)))),
+              ResultWith(Datum(std::make_shared<ScalarType>(static_cast<T>(5)))));
+  EXPECT_THAT(Sum(MakeNullScalar(TypeTraits<TypeParam>::type_singleton())),
+              ResultWith(Datum(MakeNullScalar(TypeTraits<SumType>::type_singleton()))));
+}
+
+TYPED_TEST(TestNumericSumKernel, ScalarAggregateOptions) {
+  using SumType = typename FindAccumulatorType<TypeParam>::Type;
+  using ScalarType = typename TypeTraits<SumType>::ScalarType;
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
+  using T = typename TypeParam::c_type;
+
+  const T expected_result = static_cast<T>(14);
+  auto null_result = Datum(std::make_shared<ScalarType>());
+  auto zero_result = Datum(std::make_shared<ScalarType>(static_cast<T>(0)));
+  auto result = Datum(std::make_shared<ScalarType>(expected_result));
+  const char* json = "[1, null, 3, null, 3, null, 7]";
+
+  ValidateSum<TypeParam>("[]", zero_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateSum<TypeParam>("[null]", zero_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/4));
+  ValidateSum<TypeParam>(json, null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/5));
+  ValidateSum<TypeParam>("[]", null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateSum<TypeParam>("[null]", null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/4));
+  ValidateSum<TypeParam>(json, null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/5));
+
+  EXPECT_THAT(Sum(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                  ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(std::make_shared<ScalarType>(static_cast<T>(5)))));
+  EXPECT_THAT(Sum(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                  ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2)),
+              ResultWith(Datum(MakeNullScalar(TypeTraits<SumType>::type_singleton()))));
+  EXPECT_THAT(Sum(MakeNullScalar(TypeTraits<TypeParam>::type_singleton()),
+                  ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(MakeNullScalar(TypeTraits<SumType>::type_singleton()))));
 }
 
 template <typename ArrowType>
@@ -214,13 +332,15 @@ class TestRandomNumericSumKernel : public ::testing::Test {};
 TYPED_TEST_SUITE(TestRandomNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericSumKernel, RandomArraySum) {
   auto rand = random::RandomArrayGenerator(0x5487655);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   // Test size up to 1<<13 (8192).
   for (size_t i = 3; i < 14; i += 2) {
     for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
-        ValidateSum<TypeParam>(*array);
+        ValidateSum<TypeParam>(*array, options);
       }
     }
   }
@@ -240,12 +360,14 @@ TYPED_TEST(TestRandomNumericSumKernel, RandomArraySumOverflow) {
   int64_t length = 1024;
 
   auto rand = random::RandomArrayGenerator(0x5487655);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
     // Test overflow on the original type
     auto array = rand.Numeric<TypeParam>(length, max - 200, max - 100, null_probability);
-    ValidateSum<TypeParam>(*array);
+    ValidateSum<TypeParam>(*array, options);
     array = rand.Numeric<TypeParam>(length, min + 100, min + 200, null_probability);
-    ValidateSum<TypeParam>(*array);
+    ValidateSum<TypeParam>(*array, options);
   }
 }
 
@@ -309,8 +431,8 @@ static CountPair NaiveCount(const Array& array) {
 }
 
 void ValidateCount(const Array& input, CountPair expected) {
-  CountOptions all = CountOptions(CountOptions::COUNT_NON_NULL);
-  CountOptions nulls = CountOptions(CountOptions::COUNT_NULL);
+  ScalarAggregateOptions all = ScalarAggregateOptions(/*skip_nulls=*/true);
+  ScalarAggregateOptions nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
 
   ASSERT_OK_AND_ASSIGN(Datum result, Count(input, all));
   AssertDatumsEqual(result, Datum(expected.first));
@@ -337,6 +459,14 @@ TYPED_TEST(TestCountKernel, SimpleCount) {
   ValidateCount<TypeParam>("[1, null, 2]", {2, 1});
   ValidateCount<TypeParam>("[null, null, null]", {0, 3});
   ValidateCount<TypeParam>("[1, 2, 3, 4, 5, 6, 7, 8, 9]", {9, 0});
+
+  auto ty = TypeTraits<TypeParam>::type_singleton();
+  EXPECT_THAT(Count(MakeNullScalar(ty)), ResultWith(Datum(int64_t(0))));
+  EXPECT_THAT(Count(MakeNullScalar(ty), ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(int64_t(1))));
+  EXPECT_THAT(Count(*MakeScalar(ty, 1)), ResultWith(Datum(int64_t(1))));
+  EXPECT_THAT(Count(*MakeScalar(ty, 1), ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(int64_t(0))));
 }
 
 template <typename ArrowType>
@@ -374,22 +504,25 @@ static Datum NaiveMean(const Array& array) {
 }
 
 template <typename ArrowType>
-void ValidateMean(const Array& input, Datum expected) {
-  using OutputType = typename FindAccumulatorType<DoubleType>::Type;
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Mean(input));
-  DatumEqual<OutputType>::EnsureEqual(result, expected);
+void ValidateMean(const Array& input, Datum expected,
+                  const ScalarAggregateOptions& options) {
+  ASSERT_OK_AND_ASSIGN(Datum result, Mean(input, options, nullptr));
+  auto equal_options = EqualOptions::Defaults().nans_equal(true);
+  AssertDatumsApproxEqual(expected, result, /*verbose=*/true, equal_options);
 }
 
 template <typename ArrowType>
-void ValidateMean(const char* json, Datum expected) {
+void ValidateMean(
+    const char* json, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateMean<ArrowType>(*array, expected);
+  ValidateMean<ArrowType>(*array, expected, options);
 }
 
 template <typename ArrowType>
-void ValidateMean(const Array& array) {
-  ValidateMean<ArrowType>(array, NaiveMean<ArrowType>(array));
+void ValidateMean(const Array& array, const ScalarAggregateOptions& options =
+                                          ScalarAggregateOptions::Defaults()) {
+  ValidateMean<ArrowType>(array, NaiveMean<ArrowType>(array), options);
 }
 
 template <typename ArrowType>
@@ -398,6 +531,15 @@ class TestMeanKernelNumeric : public ::testing::Test {};
 TYPED_TEST_SUITE(TestMeanKernelNumeric, NumericArrowTypes);
 TYPED_TEST(TestMeanKernelNumeric, SimpleMean) {
   using ScalarType = typename TypeTraits<DoubleType>::ScalarType;
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
+  using T = typename TypeParam::c_type;
+
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+
+  ValidateMean<TypeParam>("[]", Datum(std::make_shared<ScalarType>(NAN)), options);
+
+  ValidateMean<TypeParam>("[null]", Datum(std::make_shared<ScalarType>(NAN)), options);
 
   ValidateMean<TypeParam>("[]", Datum(std::make_shared<ScalarType>()));
 
@@ -413,6 +555,65 @@ TYPED_TEST(TestMeanKernelNumeric, SimpleMean) {
 
   ValidateMean<TypeParam>("[1, 1, 1, 1, 1, 1, 1, 1]",
                           Datum(std::make_shared<ScalarType>(1.0)));
+
+  EXPECT_THAT(Mean(Datum(std::make_shared<InputScalarType>(static_cast<T>(5)))),
+              ResultWith(Datum(std::make_shared<ScalarType>(5.0))));
+  EXPECT_THAT(Mean(MakeNullScalar(TypeTraits<TypeParam>::type_singleton())),
+              ResultWith(Datum(MakeNullScalar(float64()))));
+}
+
+TYPED_TEST(TestMeanKernelNumeric, ScalarAggregateOptions) {
+  using ScalarType = typename TypeTraits<DoubleType>::ScalarType;
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
+  using T = typename TypeParam::c_type;
+  auto expected_result = Datum(std::make_shared<ScalarType>(3));
+  auto null_result = Datum(std::make_shared<ScalarType>());
+  auto nan_result = Datum(std::make_shared<ScalarType>(NAN));
+  const char* json = "[1, null, 2, 2, null, 7]";
+
+  ValidateMean<TypeParam>("[]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateMean<TypeParam>("[null]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateMean<TypeParam>("[]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateMean<TypeParam>("[null]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/4));
+  ValidateMean<TypeParam>(json, null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/5));
+
+  ValidateMean<TypeParam>("[]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/0));
+  ValidateMean<TypeParam>("[null]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/0));
+  ValidateMean<TypeParam>("[]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateMean<TypeParam>("[null]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/0));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/4));
+  ValidateMean<TypeParam>(json, null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/15));
+
+  EXPECT_THAT(Mean(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                   ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(std::make_shared<ScalarType>(5.0))));
+  EXPECT_THAT(Mean(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                   ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2)),
+              ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Mean(MakeNullScalar(TypeTraits<TypeParam>::type_singleton()),
+                   ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(MakeNullScalar(float64()))));
 }
 
 template <typename ArrowType>
@@ -421,13 +622,15 @@ class TestRandomNumericMeanKernel : public ::testing::Test {};
 TYPED_TEST_SUITE(TestRandomNumericMeanKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMeanKernel, RandomArrayMean) {
   auto rand = random::RandomArrayGenerator(0x8afc055);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   // Test size up to 1<<13 (8192).
   for (size_t i = 3; i < 14; i += 2) {
     for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
-        ValidateMean<TypeParam>(*array);
+        ValidateMean<TypeParam>(*array, options);
       }
     }
   }
@@ -447,12 +650,14 @@ TYPED_TEST(TestRandomNumericMeanKernel, RandomArrayMeanOverflow) {
   int64_t length = 1024;
 
   auto rand = random::RandomArrayGenerator(0x8afc055);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
     // Test overflow on the original type
     auto array = rand.Numeric<TypeParam>(length, max - 200, max - 100, null_probability);
-    ValidateMean<TypeParam>(*array);
+    ValidateMean<TypeParam>(*array, options);
     array = rand.Numeric<TypeParam>(length, min + 100, min + 200, null_probability);
-    ValidateMean<TypeParam>(*array);
+    ValidateMean<TypeParam>(*array, options);
   }
 }
 
@@ -469,7 +674,7 @@ class TestPrimitiveMinMaxKernel : public ::testing::Test {
 
  public:
   void AssertMinMaxIs(const Datum& array, c_type expected_min, c_type expected_max,
-                      const MinMaxOptions& options) {
+                      const ScalarAggregateOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
     const StructScalar& value = out.scalar_as<StructScalar>();
 
@@ -481,31 +686,32 @@ class TestPrimitiveMinMaxKernel : public ::testing::Test {
   }
 
   void AssertMinMaxIs(const std::string& json, c_type expected_min, c_type expected_max,
-                      const MinMaxOptions& options) {
+                      const ScalarAggregateOptions& options) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertMinMaxIs(array, expected_min, expected_max, options);
   }
 
   void AssertMinMaxIs(const std::vector<std::string>& json, c_type expected_min,
-                      c_type expected_max, const MinMaxOptions& options) {
+                      c_type expected_max, const ScalarAggregateOptions& options) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertMinMaxIs(array, expected_min, expected_max, options);
   }
 
-  void AssertMinMaxIsNull(const Datum& array, const MinMaxOptions& options) {
+  void AssertMinMaxIsNull(const Datum& array, const ScalarAggregateOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
     for (const auto& val : out.scalar_as<StructScalar>().value) {
       ASSERT_FALSE(val->is_valid);
     }
   }
 
-  void AssertMinMaxIsNull(const std::string& json, const MinMaxOptions& options) {
+  void AssertMinMaxIsNull(const std::string& json,
+                          const ScalarAggregateOptions& options) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertMinMaxIsNull(array, options);
   }
 
   void AssertMinMaxIsNull(const std::vector<std::string>& json,
-                          const MinMaxOptions& options) {
+                          const ScalarAggregateOptions& options) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertMinMaxIsNull(array, options);
   }
@@ -522,13 +728,15 @@ class TestFloatingMinMaxKernel : public TestPrimitiveMinMaxKernel<ArrowType> {};
 class TestBooleanMinMaxKernel : public TestPrimitiveMinMaxKernel<BooleanType> {};
 
 TEST_F(TestBooleanMinMaxKernel, Basics) {
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   std::vector<std::string> chunked_input0 = {"[]", "[]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[true, null]", "[null, false]"};
+  auto ty = struct_({field("min", boolean()), field("max", boolean())});
 
   // SKIP nulls by default
+  options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
   this->AssertMinMaxIs("[false, false, false]", false, false, options);
@@ -541,7 +749,14 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIs(chunked_input3, false, true, options);
 
-  options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
+  Datum null_min_max = ScalarFromJSON(ty, "[null, null]");
+  Datum true_min_max = ScalarFromJSON(ty, "[true, true]");
+  Datum false_min_max = ScalarFromJSON(ty, "[false, false]");
+  EXPECT_THAT(MinMax(MakeNullScalar(boolean())), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(MakeScalar(true)), ResultWith(true_min_max));
+  EXPECT_THAT(MinMax(MakeScalar(false)), ResultWith(false_min_max));
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1);
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
   this->AssertMinMaxIsNull("[false, null, false]", options);
@@ -553,14 +768,24 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
   this->AssertMinMaxIsNull(chunked_input1, options);
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2);
+  EXPECT_THAT(MinMax(MakeNullScalar(boolean()), options), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(MakeScalar(true), options), ResultWith(true_min_max));
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  this->AssertMinMaxIsNull("[]", options);
+  this->AssertMinMaxIsNull("[null]", options);
 }
 
 TYPED_TEST_SUITE(TestIntegerMinMaxKernel, IntegralArrowTypes);
 TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};
+  auto item_ty = TypeTraits<TypeParam>::type_singleton();
+  auto ty = struct_({field("min", item_ty), field("max", item_ty)});
 
   // SKIP nulls by default
   this->AssertMinMaxIsNull("[]", options);
@@ -571,7 +796,15 @@ TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);
 
-  options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
+  Datum null_min_max(std::make_shared<StructScalar>(
+      ScalarVector{MakeNullScalar(item_ty), MakeNullScalar(item_ty)}, ty));
+  auto one_scalar = *MakeScalar(item_ty, static_cast<typename TypeParam::c_type>(1));
+  Datum one_min_max(
+      std::make_shared<StructScalar>(ScalarVector{one_scalar, one_scalar}, ty));
+  EXPECT_THAT(MinMax(MakeNullScalar(item_ty)), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(one_scalar), ResultWith(one_min_max));
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   // output null
   this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options);
@@ -583,10 +816,12 @@ TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
 
 TYPED_TEST_SUITE(TestFloatingMinMaxKernel, RealArrowTypes);
 TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};
+  auto item_ty = TypeTraits<TypeParam>::type_singleton();
+  auto ty = struct_({field("min", item_ty), field("max", item_ty)});
 
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
@@ -598,7 +833,15 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);
 
-  options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
+  Datum null_min_max(std::make_shared<StructScalar>(
+      ScalarVector{MakeNullScalar(item_ty), MakeNullScalar(item_ty)}, ty));
+  auto one_scalar = *MakeScalar(item_ty, static_cast<typename TypeParam::c_type>(1));
+  Datum one_min_max(
+      std::make_shared<StructScalar>(ScalarVector{one_scalar, one_scalar}, ty));
+  EXPECT_THAT(MinMax(MakeNullScalar(item_ty)), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(one_scalar), ResultWith(one_min_max));
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
   // output null
@@ -609,6 +852,14 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   this->AssertMinMaxIsNull(chunked_input1, options);
   this->AssertMinMaxIsNull(chunked_input2, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  this->AssertMinMaxIsNull("[]", options);
+  this->AssertMinMaxIsNull("[null]", options);
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1);
+  this->AssertMinMaxIsNull("[]", options);
+  this->AssertMinMaxIsNull("[null]", options);
 }
 
 TYPED_TEST(TestFloatingMinMaxKernel, DefaultOptions) {
@@ -616,7 +867,7 @@ TYPED_TEST(TestFloatingMinMaxKernel, DefaultOptions) {
 
   ASSERT_OK_AND_ASSIGN(auto no_options_provided, CallFunction("min_max", {values}));
 
-  auto default_options = MinMaxOptions::Defaults();
+  auto default_options = ScalarAggregateOptions::Defaults();
   ASSERT_OK_AND_ASSIGN(auto explicit_defaults,
                        CallFunction("min_max", {values}, &default_options));
 
@@ -711,11 +962,11 @@ static enable_if_floating_point<ArrowType, MinMaxResult<ArrowType>> NaiveMinMax(
 }
 
 template <typename ArrowType>
-void ValidateMinMax(const Array& array) {
+void ValidateMinMax(const Array& array, const ScalarAggregateOptions& options) {
   using Traits = TypeTraits<ArrowType>;
   using ScalarType = typename Traits::ScalarType;
 
-  ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array));
+  ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
   const StructScalar& value = out.scalar_as<StructScalar>();
 
   auto expected = NaiveMinMax<ArrowType>(array);
@@ -739,6 +990,8 @@ class TestRandomNumericMinMaxKernel : public ::testing::Test {};
 TYPED_TEST_SUITE(TestRandomNumericMinMaxKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
   auto rand = random::RandomArrayGenerator(0x8afc055);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   // Test size up to 1<<11 (2048).
   for (size_t i = 3; i < 12; i += 2) {
     for (auto null_probability : {0.0, 0.01, 0.1, 0.5, 0.99, 1.0}) {
@@ -746,7 +999,7 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
       auto array = rand.Numeric<TypeParam>(base_length, 0, 100, null_probability);
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
-        ValidateMinMax<TypeParam>(*array->Slice(0, length));
+        ValidateMinMax<TypeParam>(*array->Slice(0, length), options);
       }
     }
   }
@@ -758,21 +1011,26 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
 
 class TestPrimitiveAnyKernel : public ::testing::Test {
  public:
-  void AssertAnyIs(const Datum& array, bool expected) {
-    ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
+  void AssertAnyIs(const Datum& array, const std::shared_ptr<BooleanScalar>& expected,
+                   const ScalarAggregateOptions& options) {
+    ASSERT_OK_AND_ASSIGN(Datum out, Any(array, options, nullptr));
     const BooleanScalar& out_any = out.scalar_as<BooleanScalar>();
-    const auto expected_any = static_cast<const BooleanScalar>(expected);
-    ASSERT_EQ(out_any, expected_any);
+    ASSERT_EQ(out_any, *expected);
   }
 
-  void AssertAnyIs(const std::string& json, bool expected) {
+  void AssertAnyIs(
+      const std::string& json, const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ArrayFromJSON(type_singleton(), json);
-    AssertAnyIs(array, expected);
+    AssertAnyIs(array, expected, options);
   }
 
-  void AssertAnyIs(const std::vector<std::string>& json, bool expected) {
+  void AssertAnyIs(
+      const std::vector<std::string>& json,
+      const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
-    AssertAnyIs(array, expected);
+    AssertAnyIs(array, expected, options);
   }
 
   std::shared_ptr<DataType> type_singleton() {
@@ -783,26 +1041,56 @@ class TestPrimitiveAnyKernel : public ::testing::Test {
 class TestAnyKernel : public TestPrimitiveAnyKernel {};
 
 TEST_F(TestAnyKernel, Basics) {
+  auto true_value = std::make_shared<BooleanScalar>(true);
+  auto false_value = std::make_shared<BooleanScalar>(false);
+  auto null_value = std::make_shared<BooleanScalar>();
+  null_value->is_valid = false;
+
   std::vector<std::string> chunked_input0 = {"[]", "[true]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
   std::vector<std::string> chunked_input4 = {"[true, null]", "[null, false]"};
 
-  this->AssertAnyIs("[]", false);
-  this->AssertAnyIs("[false]", false);
-  this->AssertAnyIs("[true, false]", true);
-  this->AssertAnyIs("[null, null, null]", false);
-  this->AssertAnyIs("[false, false, false]", false);
-  this->AssertAnyIs("[false, false, false, null]", false);
-  this->AssertAnyIs("[true, null, true, true]", true);
-  this->AssertAnyIs("[false, null, false, true]", true);
-  this->AssertAnyIs("[true, null, false, true]", true);
-  this->AssertAnyIs(chunked_input0, true);
-  this->AssertAnyIs(chunked_input1, true);
-  this->AssertAnyIs(chunked_input2, false);
-  this->AssertAnyIs(chunked_input3, false);
-  this->AssertAnyIs(chunked_input4, true);
+  this->AssertAnyIs("[]", false_value);
+  this->AssertAnyIs("[false]", false_value);
+  this->AssertAnyIs("[true, false]", true_value);
+  this->AssertAnyIs("[null, null, null]", false_value);
+  this->AssertAnyIs("[false, false, false]", false_value);
+  this->AssertAnyIs("[false, false, false, null]", false_value);
+  this->AssertAnyIs("[true, null, true, true]", true_value);
+  this->AssertAnyIs("[false, null, false, true]", true_value);
+  this->AssertAnyIs("[true, null, false, true]", true_value);
+  this->AssertAnyIs(chunked_input0, true_value);
+  this->AssertAnyIs(chunked_input1, true_value);
+  this->AssertAnyIs(chunked_input2, false_value);
+  this->AssertAnyIs(chunked_input3, false_value);
+  this->AssertAnyIs(chunked_input4, true_value);
+
+  EXPECT_THAT(Any(Datum(true)), ResultWith(Datum(true)));
+  EXPECT_THAT(Any(Datum(false)), ResultWith(Datum(false)));
+  EXPECT_THAT(Any(MakeNullScalar(boolean())), ResultWith(Datum(false)));
+
+  const ScalarAggregateOptions& keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
+  this->AssertAnyIs("[]", false_value, keep_nulls);
+  this->AssertAnyIs("[false]", false_value, keep_nulls);
+  this->AssertAnyIs("[true, false]", true_value, keep_nulls);
+  this->AssertAnyIs("[null, null, null]", null_value, keep_nulls);
+  this->AssertAnyIs("[false, false, false]", false_value, keep_nulls);
+  this->AssertAnyIs("[false, false, false, null]", null_value, keep_nulls);
+  this->AssertAnyIs("[true, null, true, true]", true_value, keep_nulls);
+  this->AssertAnyIs("[false, null, false, true]", true_value, keep_nulls);
+  this->AssertAnyIs("[true, null, false, true]", true_value, keep_nulls);
+  this->AssertAnyIs(chunked_input0, true_value, keep_nulls);
+  this->AssertAnyIs(chunked_input1, true_value, keep_nulls);
+  this->AssertAnyIs(chunked_input2, false_value, keep_nulls);
+  this->AssertAnyIs(chunked_input3, null_value, keep_nulls);
+  this->AssertAnyIs(chunked_input4, true_value, keep_nulls);
+
+  EXPECT_THAT(Any(Datum(true), keep_nulls), ResultWith(Datum(true)));
+  EXPECT_THAT(Any(Datum(false), keep_nulls), ResultWith(Datum(false)));
+  EXPECT_THAT(Any(MakeNullScalar(boolean()), keep_nulls),
+              ResultWith(Datum(MakeNullScalar(boolean()))));
 }
 
 //
@@ -811,21 +1099,26 @@ TEST_F(TestAnyKernel, Basics) {
 
 class TestPrimitiveAllKernel : public ::testing::Test {
  public:
-  void AssertAllIs(const Datum& array, bool expected) {
-    ASSERT_OK_AND_ASSIGN(Datum out, All(array));
+  void AssertAllIs(const Datum& array, const std::shared_ptr<BooleanScalar>& expected,
+                   const ScalarAggregateOptions& options) {
+    ASSERT_OK_AND_ASSIGN(Datum out, All(array, options, nullptr));
     const BooleanScalar& out_all = out.scalar_as<BooleanScalar>();
-    const auto expected_all = static_cast<const BooleanScalar>(expected);
-    ASSERT_EQ(out_all, expected_all);
+    ASSERT_EQ(out_all, *expected);
   }
 
-  void AssertAllIs(const std::string& json, bool expected) {
+  void AssertAllIs(
+      const std::string& json, const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ArrayFromJSON(type_singleton(), json);
-    AssertAllIs(array, expected);
+    AssertAllIs(array, expected, options);
   }
 
-  void AssertAllIs(const std::vector<std::string>& json, bool expected) {
+  void AssertAllIs(
+      const std::vector<std::string>& json,
+      const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
-    AssertAllIs(array, expected);
+    AssertAllIs(array, expected, options);
   }
 
   std::shared_ptr<DataType> type_singleton() {
@@ -836,6 +1129,11 @@ class TestPrimitiveAllKernel : public ::testing::Test {
 class TestAllKernel : public TestPrimitiveAllKernel {};
 
 TEST_F(TestAllKernel, Basics) {
+  auto true_value = std::make_shared<BooleanScalar>(true);
+  auto false_value = std::make_shared<BooleanScalar>(false);
+  auto null_value = std::make_shared<BooleanScalar>();
+  null_value->is_valid = false;
+
   std::vector<std::string> chunked_input0 = {"[]", "[true]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
@@ -843,21 +1141,203 @@ TEST_F(TestAllKernel, Basics) {
   std::vector<std::string> chunked_input4 = {"[true, null]", "[null, false]"};
   std::vector<std::string> chunked_input5 = {"[false, null]", "[null, true]"};
 
-  this->AssertAllIs("[]", true);
-  this->AssertAllIs("[false]", false);
-  this->AssertAllIs("[true, false]", false);
-  this->AssertAllIs("[null, null, null]", true);
-  this->AssertAllIs("[false, false, false]", false);
-  this->AssertAllIs("[false, false, false, null]", false);
-  this->AssertAllIs("[true, null, true, true]", true);
-  this->AssertAllIs("[false, null, false, true]", false);
-  this->AssertAllIs("[true, null, false, true]", false);
-  this->AssertAllIs(chunked_input0, true);
-  this->AssertAllIs(chunked_input1, true);
-  this->AssertAllIs(chunked_input2, false);
-  this->AssertAllIs(chunked_input3, false);
-  this->AssertAllIs(chunked_input4, false);
-  this->AssertAllIs(chunked_input5, false);
+  this->AssertAllIs("[]", true_value);
+  this->AssertAllIs("[false]", false_value);
+  this->AssertAllIs("[true, false]", false_value);
+  this->AssertAllIs("[null, null, null]", true_value);
+  this->AssertAllIs("[false, false, false]", false_value);
+  this->AssertAllIs("[false, false, false, null]", false_value);
+  this->AssertAllIs("[true, null, true, true]", true_value);
+  this->AssertAllIs("[false, null, false, true]", false_value);
+  this->AssertAllIs("[true, null, false, true]", false_value);
+  this->AssertAllIs(chunked_input0, true_value);
+  this->AssertAllIs(chunked_input1, true_value);
+  this->AssertAllIs(chunked_input2, false_value);
+  this->AssertAllIs(chunked_input3, false_value);
+  this->AssertAllIs(chunked_input4, false_value);
+  this->AssertAllIs(chunked_input5, false_value);
+
+  EXPECT_THAT(All(Datum(true)), ResultWith(Datum(true)));
+  EXPECT_THAT(All(Datum(false)), ResultWith(Datum(false)));
+  EXPECT_THAT(All(MakeNullScalar(boolean())), ResultWith(Datum(true)));
+
+  const ScalarAggregateOptions keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
+  this->AssertAllIs("[]", true_value, keep_nulls);
+  this->AssertAllIs("[false]", false_value, keep_nulls);
+  this->AssertAllIs("[true, false]", false_value, keep_nulls);
+  this->AssertAllIs("[null, null, null]", null_value, keep_nulls);
+  this->AssertAllIs("[false, false, false]", false_value, keep_nulls);
+  this->AssertAllIs("[false, false, false, null]", false_value, keep_nulls);
+  this->AssertAllIs("[true, null, true, true]", null_value, keep_nulls);
+  this->AssertAllIs("[false, null, false, true]", false_value, keep_nulls);
+  this->AssertAllIs("[true, null, false, true]", false_value, keep_nulls);
+  this->AssertAllIs(chunked_input0, true_value, keep_nulls);
+  this->AssertAllIs(chunked_input1, null_value, keep_nulls);
+  this->AssertAllIs(chunked_input2, false_value, keep_nulls);
+  this->AssertAllIs(chunked_input3, false_value, keep_nulls);
+  this->AssertAllIs(chunked_input4, false_value, keep_nulls);
+  this->AssertAllIs(chunked_input5, false_value, keep_nulls);
+
+  EXPECT_THAT(All(Datum(true), keep_nulls), ResultWith(Datum(true)));
+  EXPECT_THAT(All(Datum(false), keep_nulls), ResultWith(Datum(false)));
+  EXPECT_THAT(All(MakeNullScalar(boolean()), keep_nulls),
+              ResultWith(Datum(MakeNullScalar(boolean()))));
+}
+
+//
+// Index
+//
+
+template <typename ArrowType>
+class TestIndexKernel : public ::testing::Test {
+ public:
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+  void AssertIndexIs(const Datum& array, const std::shared_ptr<ScalarType>& value,
+                     int64_t expected) {
+    IndexOptions options(value);
+    ASSERT_OK_AND_ASSIGN(Datum out, Index(array, options));
+    const Int64Scalar& out_index = out.scalar_as<Int64Scalar>();
+    ASSERT_EQ(out_index.value, expected);
+  }
+
+  void AssertIndexIs(const std::string& json, const std::shared_ptr<ScalarType>& value,
+                     int64_t expected) {
+    SCOPED_TRACE("Value: " + value->ToString());
+    SCOPED_TRACE("Input: " + json);
+    auto array = ArrayFromJSON(type_singleton(), json);
+    AssertIndexIs(array, value, expected);
+  }
+
+  void AssertIndexIs(const std::vector<std::string>& json,
+                     const std::shared_ptr<ScalarType>& value, int64_t expected) {
+    SCOPED_TRACE("Value: " + value->ToString());
+    auto array = ChunkedArrayFromJSON(type_singleton(), json);
+    SCOPED_TRACE("Input: " + array->ToString());
+    AssertIndexIs(array, value, expected);
+  }
+
+  std::shared_ptr<DataType> type_singleton() { return std::make_shared<ArrowType>(); }
+};
+
+template <typename ArrowType>
+class TestNumericIndexKernel : public TestIndexKernel<ArrowType> {
+ public:
+  using CType = typename TypeTraits<ArrowType>::CType;
+};
+TYPED_TEST_SUITE(TestNumericIndexKernel, NumericArrowTypes);
+TYPED_TEST(TestNumericIndexKernel, Basics) {
+  std::vector<std::string> chunked_input0 = {"[]", "[0]"};
+  std::vector<std::string> chunked_input1 = {"[1, 0, null]", "[0, 0]"};
+  std::vector<std::string> chunked_input2 = {"[1, 1, 1]", "[1, 0]", "[0, 1]"};
+  std::vector<std::string> chunked_input3 = {"[1, 1, 1]", "[1, 1]"};
+  std::vector<std::string> chunked_input4 = {"[1, 1, 1]", "[1, 1]", "[0]"};
+
+  auto value = std::make_shared<typename TestFixture::ScalarType>(
+      static_cast<typename TestFixture::CType>(0));
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(
+      static_cast<typename TestFixture::CType>(0));
+  null_value->is_valid = false;
+
+  this->AssertIndexIs("[]", value, -1);
+  this->AssertIndexIs("[0]", value, 0);
+  this->AssertIndexIs("[1, 2, 3, 4]", value, -1);
+  this->AssertIndexIs("[1, 2, 3, 4, 0]", value, 4);
+  this->AssertIndexIs("[null, null, null]", value, -1);
+  this->AssertIndexIs("[null, null, null]", null_value, -1);
+  this->AssertIndexIs("[0, null, null]", null_value, -1);
+  this->AssertIndexIs(chunked_input0, value, 0);
+  this->AssertIndexIs(chunked_input1, value, 1);
+  this->AssertIndexIs(chunked_input2, value, 4);
+  this->AssertIndexIs(chunked_input3, value, -1);
+  this->AssertIndexIs(chunked_input4, value, 5);
+}
+TYPED_TEST(TestNumericIndexKernel, Random) {
+  constexpr auto kChunks = 4;
+  auto rand = random::RandomArrayGenerator(0x5487655);
+  auto value = std::make_shared<typename TestFixture::ScalarType>(
+      static_cast<typename TestFixture::CType>(0));
+
+  // Test chunked array sizes from 32 to 2048
+  for (size_t i = 3; i <= 9; i += 2) {
+    const int64_t chunk_length = static_cast<int64_t>(1) << i;
+    ArrayVector chunks;
+    for (int i = 0; i < kChunks; i++) {
+      chunks.push_back(
+          rand.ArrayOf(this->type_singleton(), chunk_length, /*null_probability=*/0.1));
+    }
+    ChunkedArray chunked_array(std::move(chunks));
+
+    int64_t expected = -1;
+    int64_t index = 0;
+    for (auto chunk : chunked_array.chunks()) {
+      auto typed_chunk = arrow::internal::checked_pointer_cast<
+          typename TypeTraits<TypeParam>::ArrayType>(chunk);
+      for (auto value : *typed_chunk) {
+        if (value.has_value() &&
+            value.value() == static_cast<typename TestFixture::CType>(0)) {
+          expected = index;
+          break;
+        }
+        index++;
+      }
+      if (expected >= 0) break;
+    }
+
+    this->AssertIndexIs(Datum(chunked_array), value, expected);
+  }
+}
+
+template <typename ArrowType>
+class TestDateTimeIndexKernel : public TestIndexKernel<ArrowType> {};
+TYPED_TEST_SUITE(TestDateTimeIndexKernel, TemporalArrowTypes);
+TYPED_TEST(TestDateTimeIndexKernel, Basics) {
+  auto type = this->type_singleton();
+  auto value = std::make_shared<typename TestFixture::ScalarType>(42, type);
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(42, type);
+  null_value->is_valid = false;
+
+  this->AssertIndexIs("[]", value, -1);
+  this->AssertIndexIs("[42]", value, 0);
+  this->AssertIndexIs("[84, 84, 84, 84]", value, -1);
+  this->AssertIndexIs("[84, 84, 84, 84, 42]", value, 4);
+  this->AssertIndexIs("[null, null, null]", value, -1);
+  this->AssertIndexIs("[null, null, null]", null_value, -1);
+  this->AssertIndexIs("[42, null, null]", null_value, -1);
+}
+
+template <typename ArrowType>
+class TestBooleanIndexKernel : public TestIndexKernel<ArrowType> {};
+TYPED_TEST_SUITE(TestBooleanIndexKernel, ::testing::Types<BooleanType>);
+TYPED_TEST(TestBooleanIndexKernel, Basics) {
+  auto value = std::make_shared<typename TestFixture::ScalarType>(true);
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(true);
+  null_value->is_valid = false;
+
+  this->AssertIndexIs("[]", value, -1);
+  this->AssertIndexIs("[true]", value, 0);
+  this->AssertIndexIs("[false, false, false, false]", value, -1);
+  this->AssertIndexIs("[false, false, false, false, true]", value, 4);
+  this->AssertIndexIs("[null, null, null]", value, -1);
+  this->AssertIndexIs("[null, null, null]", null_value, -1);
+  this->AssertIndexIs("[true, null, null]", null_value, -1);
+}
+
+template <typename ArrowType>
+class TestStringIndexKernel : public TestIndexKernel<ArrowType> {};
+TYPED_TEST_SUITE(TestStringIndexKernel, BinaryTypes);
+TYPED_TEST(TestStringIndexKernel, Basics) {
+  auto buffer = Buffer::FromString("foo");
+  auto value = std::make_shared<typename TestFixture::ScalarType>(buffer);
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(buffer);
+  null_value->is_valid = false;
+
+  this->AssertIndexIs(R"([])", value, -1);
+  this->AssertIndexIs(R"(["foo"])", value, 0);
+  this->AssertIndexIs(R"(["bar", "bar", "bar", "bar"])", value, -1);
+  this->AssertIndexIs(R"(["bar", "bar", "bar", "bar", "foo"])", value, 4);
+  this->AssertIndexIs(R"([null, null, null])", value, -1);
+  this->AssertIndexIs(R"([null, null, null])", null_value, -1);
+  this->AssertIndexIs(R"(["foo", null, null])", null_value, -1);
 }
 
 //
@@ -875,7 +1355,7 @@ class TestPrimitiveModeKernel : public ::testing::Test {
                       const std::vector<CType>& expected_modes,
                       const std::vector<int64_t>& expected_counts) {
     ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
-    ASSERT_OK(out.make_array()->ValidateFull());
+    ValidateOutput(out);
     const StructArray out_array(out.array());
     ASSERT_EQ(out_array.length(), expected_modes.size());
     ASSERT_EQ(out_array.num_fields(), 2);
@@ -916,7 +1396,8 @@ class TestPrimitiveModeKernel : public ::testing::Test {
 
   void AssertModesEmpty(const Datum& array, int n) {
     ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
-    ASSERT_OK(out.make_array()->ValidateFull());
+    auto out_array = out.make_array();
+    ValidateOutput(*out_array);
     ASSERT_EQ(out.array()->length, 0);
   }
 
@@ -964,6 +1445,14 @@ TEST_F(TestBooleanModeKernel, Basics) {
   this->AssertModesAre("[true, null, false, false, null, true, null, null, true]", 100,
                        {true, false}, {3, 2});
   this->AssertModesEmpty({"[null, null]", "[]", "[null]"}, 4);
+
+  auto ty = struct_({field("mode", boolean()), field("count", int64())});
+  Datum mode_true = ArrayFromJSON(ty, "[[true, 1]]");
+  Datum mode_false = ArrayFromJSON(ty, "[[false, 1]]");
+  Datum mode_empty = ArrayFromJSON(ty, "[]");
+  EXPECT_THAT(Mode(Datum(true)), ResultWith(mode_true));
+  EXPECT_THAT(Mode(Datum(false)), ResultWith(mode_false));
+  EXPECT_THAT(Mode(MakeNullScalar(boolean())), ResultWith(mode_empty));
 }
 
 TYPED_TEST_SUITE(TestIntegerModeKernel, IntegralArrowTypes);
@@ -983,6 +1472,12 @@ TYPED_TEST(TestIntegerModeKernel, Basics) {
   this->AssertModesAre("[127, 0, 127, 127, 0, 1, 0, 127]", 2, {127, 0}, {4, 3});
   this->AssertModesAre("[null, null, 2, null, 1]", 3, {1, 2}, {1, 1});
   this->AssertModesEmpty("[null, null, null]", 10);
+
+  auto in_ty = this->type_singleton();
+  auto ty = struct_({field("mode", in_ty), field("count", int64())});
+  EXPECT_THAT(Mode(*MakeScalar(in_ty, 5)),
+              ResultWith(Datum(ArrayFromJSON(ty, "[[5, 1]]"))));
+  EXPECT_THAT(Mode(MakeNullScalar(in_ty)), ResultWith(Datum(ArrayFromJSON(ty, "[]"))));
 }
 
 TYPED_TEST_SUITE(TestFloatingModeKernel, RealArrowTypes);
@@ -1008,6 +1503,12 @@ TYPED_TEST(TestFloatingModeKernel, Floats) {
 
   this->AssertModesAre("[Inf, 100, Inf, 100, Inf]", 2, {INFINITY, 100}, {3, 2});
   this->AssertModesAre("[NaN, NaN, 1, null, 1, 2, 2]", 3, {1, 2, NAN}, {2, 2, 2});
+
+  auto in_ty = this->type_singleton();
+  auto ty = struct_({field("mode", in_ty), field("count", int64())});
+  EXPECT_THAT(Mode(*MakeScalar(in_ty, 5.0)),
+              ResultWith(Datum(ArrayFromJSON(ty, "[[5.0, 1]]"))));
+  EXPECT_THAT(Mode(MakeNullScalar(in_ty)), ResultWith(Datum(ArrayFromJSON(ty, "[]"))));
 }
 
 TEST_F(TestInt8ModeKernelValueRange, Basics) {
@@ -1054,15 +1555,11 @@ ModeResult<ArrowType> NaiveMode(const Array& array) {
 }
 
 template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
-void CheckModeWithRange(CTYPE range_min, CTYPE range_max) {
-  auto rand = random::RandomArrayGenerator(0x5487655);
-  // 32K items (>= counting mode cutoff) within range, 10% null
-  auto array = rand.Numeric<ArrowType>(32 * 1024, range_min, range_max, 0.1);
-
+void VerifyMode(const std::shared_ptr<Array>& array) {
   auto expected = NaiveMode<ArrowType>(*array);
   ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
-  ASSERT_OK(out.make_array()->ValidateFull());
   const StructArray out_array(out.array());
+  ValidateOutput(out_array);
   ASSERT_EQ(out_array.length(), 1);
   ASSERT_EQ(out_array.num_fields(), 2);
 
@@ -1072,16 +1569,46 @@ void CheckModeWithRange(CTYPE range_min, CTYPE range_max) {
   ASSERT_EQ(out_counts[0], expected.count);
 }
 
+template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
+void CheckModeWithRange(CTYPE range_min, CTYPE range_max) {
+  auto rand = random::RandomArrayGenerator(0x5487655);
+  // 32K items (>= counting mode cutoff) within range, 10% null
+  auto array = rand.Numeric<ArrowType>(32 * 1024, range_min, range_max, 0.1);
+  VerifyMode<ArrowType>(array);
+}
+
+template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
+void CheckModeWithRangeSliced(CTYPE range_min, CTYPE range_max) {
+  auto rand = random::RandomArrayGenerator(0x5487655);
+  auto array = rand.Numeric<ArrowType>(32 * 1024, range_min, range_max, 0.1);
+
+  const int64_t array_size = array->length();
+  const std::vector<std::array<int64_t, 2>> offset_size{
+      {0, 40},
+      {array_size - 40, 40},
+      {array_size / 3, array_size / 6},
+      {array_size * 9 / 10, array_size / 10},
+  };
+  for (const auto& os : offset_size) {
+    VerifyMode<ArrowType>(array->Slice(os[0], os[1]));
+  }
+}
+
 TEST_F(TestInt32ModeKernel, SmallValueRange) {
   // Small value range => should exercise counter-based Mode implementation
   CheckModeWithRange<ArrowType>(-100, 100);
 }
 
 TEST_F(TestInt32ModeKernel, LargeValueRange) {
-  // Large value range => should exercise hashmap-based Mode implementation
+  // Large value range => should exercise sorter-based Mode implementation
   CheckModeWithRange<ArrowType>(-10000000, 10000000);
 }
 
+TEST_F(TestInt32ModeKernel, Sliced) {
+  CheckModeWithRangeSliced<ArrowType>(-100, 100);
+  CheckModeWithRangeSliced<ArrowType>(-10000000, 10000000);
+}
+
 //
 // Variance/Stddev
 //
@@ -1196,6 +1723,16 @@ TYPED_TEST(TestNumericVarStdKernel, Basics) {
   this->AssertVarStdIsInvalid("[100, null, null]", options);
   chunks = {"[100]", "[null]", "[]"};
   this->AssertVarStdIsInvalid(chunks, options);
+
+  auto ty = this->type_singleton();
+  EXPECT_THAT(Stddev(*MakeScalar(ty, 5)), ResultWith(Datum(0.0)));
+  EXPECT_THAT(Variance(*MakeScalar(ty, 5)), ResultWith(Datum(0.0)));
+  EXPECT_THAT(Stddev(*MakeScalar(ty, 5), options),
+              ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Variance(*MakeScalar(ty, 5), options),
+              ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Stddev(MakeNullScalar(ty)), ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Variance(MakeNullScalar(ty)), ResultWith(Datum(MakeNullScalar(float64()))));
 }
 
 // Test numerical stability
@@ -1303,12 +1840,17 @@ std::pair<double, double> WelfordVar(const ArrayType& array) {
 template <typename ArrowType>
 class TestVarStdKernelRandom : public TestPrimitiveVarStdKernel<ArrowType> {};
 
-typedef ::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, FloatType,
-                         DoubleType>
-    VarStdRandomTypes;
+using VarStdRandomTypes =
+    ::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType>;
 
 TYPED_TEST_SUITE(TestVarStdKernelRandom, VarStdRandomTypes);
+
 TYPED_TEST(TestVarStdKernelRandom, Basics) {
+#if defined(__MINGW32__) && !defined(__MINGW64__)
+  if (TypeParam::type_id == Type::FLOAT) {
+    GTEST_SKIP() << "Precision issues on MinGW32 with float32";
+  }
+#endif
   // Cut array into small chunks
   constexpr int array_size = 5000;
   constexpr int chunk_size_max = 50;
@@ -1385,7 +1927,7 @@ class TestPrimitiveQuantileKernel : public ::testing::Test {
 
       ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
       const auto& out_array = out.make_array();
-      ASSERT_OK(out_array->ValidateFull());
+      ValidateOutput(*out_array);
       ASSERT_EQ(out_array->length(), options.q.size());
       ASSERT_EQ(out_array->null_count(), 0);
       AssertTypeEqual(out_array->type(), expected[0][i].type());
@@ -1445,7 +1987,8 @@ class TestPrimitiveQuantileKernel : public ::testing::Test {
     for (auto interpolation : this->interpolations_) {
       options.interpolation = interpolation;
       ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
-      ASSERT_OK(out.make_array()->ValidateFull());
+      auto out_array = out.make_array();
+      ValidateOutput(*out_array);
       ASSERT_EQ(out.array()->length, 0);
     }
   }
@@ -1508,6 +2051,19 @@ TYPED_TEST(TestIntegerQuantileKernel, Basics) {
   this->AssertQuantilesEmpty("[]", {0.5});
   this->AssertQuantilesEmpty("[null, null, null]", {0.1, 0.2});
   this->AssertQuantilesEmpty({"[null, null]", "[]", "[null]"}, {0.3, 0.4});
+
+  auto ty = this->type_singleton();
+  for (const auto interpolation : this->interpolations_) {
+    QuantileOptions options({0.0, 0.5, 1.0}, interpolation);
+    auto expected_ty = (interpolation == QuantileOptions::LINEAR ||
+                        interpolation == QuantileOptions::MIDPOINT)
+                           ? float64()
+                           : ty;
+    EXPECT_THAT(Quantile(*MakeScalar(ty, 1), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[1, 1, 1]")));
+    EXPECT_THAT(Quantile(MakeNullScalar(ty), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[]")));
+  }
 }
 
 template <typename ArrowType>
@@ -1541,6 +2097,19 @@ TYPED_TEST(TestFloatingQuantileKernel, Floats) {
   this->AssertQuantilesEmpty("[]", {0.5, 0.6});
   this->AssertQuantilesEmpty("[null, NaN, null]", {0.1});
   this->AssertQuantilesEmpty({"[NaN, NaN]", "[]", "[null]"}, {0.3, 0.4});
+
+  auto ty = this->type_singleton();
+  for (const auto interpolation : this->interpolations_) {
+    QuantileOptions options({0.0, 0.5, 1.0}, interpolation);
+    auto expected_ty = (interpolation == QuantileOptions::LINEAR ||
+                        interpolation == QuantileOptions::MIDPOINT)
+                           ? float64()
+                           : ty;
+    EXPECT_THAT(Quantile(*MakeScalar(ty, 1), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[1, 1, 1]")));
+    EXPECT_THAT(Quantile(MakeNullScalar(ty), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[]")));
+  }
 }
 
 class TestInt8QuantileKernel : public TestPrimitiveQuantileKernel<Int8Type> {};
@@ -1575,7 +2144,10 @@ TEST_F(TestInt64QuantileKernel, Int64) {
 #undef O
 
 #ifndef __MINGW32__
-class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType> {
+template <typename ArrowType>
+class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<ArrowType> {
+  using CType = typename ArrowType::c_type;
+
  public:
   void CheckQuantiles(int64_t array_size, int64_t num_quantiles) {
     std::shared_ptr<Array> array;
@@ -1584,17 +2156,77 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
     GenerateTestData(array_size, num_quantiles, -100, 200, &array, &quantiles);
 
     this->AssertQuantilesAre(array, QuantileOptions{quantiles},
-                             NaiveQuantile(*array, quantiles, interpolations_));
+                             NaiveQuantile(array, quantiles, this->interpolations_));
+  }
+
+  void CheckQuantilesSliced(int64_t array_size, int64_t num_quantiles) {
+    std::shared_ptr<Array> array;
+    std::vector<double> quantiles;
+    GenerateTestData(array_size, num_quantiles, -100, 200, &array, &quantiles);
+
+    const std::vector<std::array<int64_t, 2>> offset_size{
+        {0, array_size - 1},
+        {1, array_size - 1},
+        {array_size / 3, array_size / 2},
+        {array_size * 9 / 10, array_size / 10},
+    };
+    for (const auto& os : offset_size) {
+      auto sliced = array->Slice(os[0], os[1]);
+      this->AssertQuantilesAre(sliced, QuantileOptions{quantiles},
+                               NaiveQuantile(sliced, quantiles, this->interpolations_));
+    }
   }
 
   void CheckTDigests(const std::vector<int>& chunk_sizes, int64_t num_quantiles) {
+    std::shared_ptr<ChunkedArray> chunked;
+    std::vector<double> quantiles;
+    GenerateChunked(chunk_sizes, num_quantiles, &chunked, &quantiles);
+
+    VerifyTDigest(chunked, quantiles);
+  }
+
+  void CheckTDigestsSliced(const std::vector<int>& chunk_sizes, int64_t num_quantiles) {
+    std::shared_ptr<ChunkedArray> chunked;
+    std::vector<double> quantiles;
+    GenerateChunked(chunk_sizes, num_quantiles, &chunked, &quantiles);
+
+    const int64_t size = chunked->length();
+    const std::vector<std::array<int64_t, 2>> offset_size{
+        {0, size - 1},
+        {1, size - 1},
+        {size / 3, size / 2},
+        {size * 9 / 10, size / 10},
+    };
+    for (const auto& os : offset_size) {
+      VerifyTDigest(chunked->Slice(os[0], os[1]), quantiles);
+    }
+  }
+
+ private:
+  void GenerateTestData(int64_t array_size, int64_t num_quantiles, int min, int max,
+                        std::shared_ptr<Array>* array, std::vector<double>* quantiles) {
+    auto rand = random::RandomArrayGenerator(0x5487658);
+    if (is_floating_type<ArrowType>::value) {
+      *array = rand.Float64(array_size, min, max, /*null_prob=*/0.1, /*nan_prob=*/0.2);
+    } else {
+      *array = rand.Int64(array_size, min, max, /*null_prob=*/0.1);
+    }
+
+    random_real(num_quantiles, 0x5487658, 0.0, 1.0, quantiles);
+    // make sure to exercise 0 and 1 quantiles
+    *std::min_element(quantiles->begin(), quantiles->end()) = 0;
+    *std::max_element(quantiles->begin(), quantiles->end()) = 1;
+  }
+
+  void GenerateChunked(const std::vector<int>& chunk_sizes, int64_t num_quantiles,
+                       std::shared_ptr<ChunkedArray>* chunked,
+                       std::vector<double>* quantiles) {
     int total_size = 0;
     for (int size : chunk_sizes) {
       total_size += size;
     }
     std::shared_ptr<Array> array;
-    std::vector<double> quantiles;
-    GenerateTestData(total_size, num_quantiles, 100, 123456789, &array, &quantiles);
+    GenerateTestData(total_size, num_quantiles, 100, 123456789, &array, quantiles);
 
     total_size = 0;
     ArrayVector array_vector;
@@ -1602,19 +2234,22 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
       array_vector.emplace_back(array->Slice(total_size, size));
       total_size += size;
     }
-    auto chunked = *ChunkedArray::Make(array_vector);
+    *chunked = ChunkedArray::Make(array_vector).ValueOrDie();
+  }
 
+  void VerifyTDigest(const std::shared_ptr<ChunkedArray>& chunked,
+                     std::vector<double>& quantiles) {
     TDigestOptions options(quantiles);
     ASSERT_OK_AND_ASSIGN(Datum out, TDigest(chunked, options));
     const auto& out_array = out.make_array();
-    ASSERT_OK(out_array->ValidateFull());
+    ValidateOutput(*out_array);
     ASSERT_EQ(out_array->length(), quantiles.size());
     ASSERT_EQ(out_array->null_count(), 0);
     AssertTypeEqual(out_array->type(), float64());
 
     // linear interpolated exact quantile as reference
     std::vector<std::vector<Datum>> exact =
-        NaiveQuantile(*array, quantiles, {QuantileOptions::LINEAR});
+        NaiveQuantile(*chunked, quantiles, {QuantileOptions::LINEAR});
     const double* approx = out_array->data()->GetValues<double>(1);
     for (size_t i = 0; i < quantiles.size(); ++i) {
       const auto& exact_scalar = checked_pointer_cast<DoubleScalar>(exact[i][0].scalar());
@@ -1623,29 +2258,26 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
     }
   }
 
- private:
-  void GenerateTestData(int64_t array_size, int64_t num_quantiles, int min, int max,
-                        std::shared_ptr<Array>* array, std::vector<double>* quantiles) {
-    auto rand = random::RandomArrayGenerator(0x5487658);
-    *array = rand.Float64(array_size, min, max, /*null_prob=*/0.1, /*nan_prob=*/0.2);
-
-    random_real(num_quantiles, 0x5487658, 0.0, 1.0, quantiles);
-    // make sure to exercise 0 and 1 quantiles
-    *std::min_element(quantiles->begin(), quantiles->end()) = 0;
-    *std::max_element(quantiles->begin(), quantiles->end()) = 1;
+  std::vector<std::vector<Datum>> NaiveQuantile(
+      const std::shared_ptr<Array>& array, const std::vector<double>& quantiles,
+      const std::vector<enum QuantileOptions::Interpolation>& interpolations) {
+    return NaiveQuantile(ChunkedArray(array), quantiles, interpolations);
   }
 
   std::vector<std::vector<Datum>> NaiveQuantile(
-      const Array& array, const std::vector<double>& quantiles,
+      const ChunkedArray& chunked, const std::vector<double>& quantiles,
       const std::vector<enum QuantileOptions::Interpolation>& interpolations) {
-    // copy and sort input array
-    std::vector<double> input(array.length() - array.null_count());
-    const double* values = array.data()->GetValues<double>(1);
-    const auto bitmap = array.null_bitmap_data();
+    // copy and sort input chunked array
     int64_t index = 0;
-    for (int64_t i = 0; i < array.length(); ++i) {
-      if (BitUtil::GetBit(bitmap, i) && !std::isnan(values[i])) {
-        input[index++] = values[i];
+    std::vector<CType> input(chunked.length() - chunked.null_count());
+    for (const auto& array : chunked.chunks()) {
+      const CType* values = array->data()->GetValues<CType>(1);
+      const auto bitmap = array->null_bitmap_data();
+      for (int64_t i = 0; i < array->length(); ++i) {
+        if ((!bitmap || BitUtil::GetBit(bitmap, array->data()->offset + i)) &&
+            !std::isnan(static_cast<double>(values[i]))) {
+          input[index++] = values[i];
+        }
       }
     }
     input.resize(index);
@@ -1662,7 +2294,7 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
     return output;
   }
 
-  Datum GetQuantile(const std::vector<double>& input, double q,
+  Datum GetQuantile(const std::vector<CType>& input, double q,
                     enum QuantileOptions::Interpolation interp) {
     const double index = (input.size() - 1) * q;
     const uint64_t lower_index = static_cast<uint64_t>(index);
@@ -1683,14 +2315,14 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
         }
       case QuantileOptions::LINEAR:
         if (fraction == 0) {
-          return Datum(input[lower_index]);
+          return Datum(input[lower_index] * 1.0);
         } else {
           return Datum(fraction * input[lower_index + 1] +
                        (1 - fraction) * input[lower_index]);
         }
       case QuantileOptions::MIDPOINT:
         if (fraction == 0) {
-          return Datum(input[lower_index]);
+          return Datum(input[lower_index] * 1.0);
         } else {
           return Datum(input[lower_index] / 2.0 + input[lower_index + 1] / 2.0);
         }
@@ -1700,29 +2332,45 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
   }
 };
 
-TEST_F(TestRandomQuantileKernel, Normal) {
+class TestRandomInt64QuantileKernel : public TestRandomQuantileKernel<Int64Type> {};
+
+TEST_F(TestRandomInt64QuantileKernel, Normal) {
   // exercise copy and sort approach: size < 65536
   this->CheckQuantiles(/*array_size=*/10000, /*num_quantiles=*/100);
 }
 
-TEST_F(TestRandomQuantileKernel, Overlapped) {
+TEST_F(TestRandomInt64QuantileKernel, Overlapped) {
   // much more quantiles than array size => many overlaps
   this->CheckQuantiles(/*array_size=*/999, /*num_quantiles=*/9999);
 }
 
-TEST_F(TestRandomQuantileKernel, Histogram) {
+TEST_F(TestRandomInt64QuantileKernel, Histogram) {
   // exercise histogram approach: size >= 65536, range <= 65536
   this->CheckQuantiles(/*array_size=*/80000, /*num_quantiles=*/100);
 }
 
-TEST_F(TestRandomQuantileKernel, TDigest) {
+TEST_F(TestRandomInt64QuantileKernel, Sliced) {
+  this->CheckQuantilesSliced(1000, 10);   // sort
+  this->CheckQuantilesSliced(66000, 10);  // count
+}
+
+class TestRandomFloatQuantileKernel : public TestRandomQuantileKernel<DoubleType> {};
+
+TEST_F(TestRandomFloatQuantileKernel, Exact) {
+  this->CheckQuantiles(/*array_size=*/1000, /*num_quantiles=*/100);
+}
+
+TEST_F(TestRandomFloatQuantileKernel, TDigest) {
   this->CheckTDigests(/*chunk_sizes=*/{12345, 6789, 8765, 4321}, /*num_quantiles=*/100);
 }
-#endif
 
-class TestTDigestKernel : public ::testing::Test {};
+TEST_F(TestRandomFloatQuantileKernel, Sliced) {
+  this->CheckQuantilesSliced(1000, 10);
+  this->CheckTDigestsSliced({200, 600}, 10);
+}
+#endif
 
-TEST_F(TestTDigestKernel, AllNullsOrNaNs) {
+TEST(TestTDigestKernel, AllNullsOrNaNs) {
   const std::vector<std::vector<std::string>> tests = {
       {"[]"},
       {"[null, null]", "[]", "[null]"},
@@ -1734,10 +2382,19 @@ TEST_F(TestTDigestKernel, AllNullsOrNaNs) {
   for (const auto& json : tests) {
     auto chunked = ChunkedArrayFromJSON(float64(), json);
     ASSERT_OK_AND_ASSIGN(Datum out, TDigest(chunked, TDigestOptions()));
-    ASSERT_OK(out.make_array()->ValidateFull());
+    auto out_array = out.make_array();
+    ValidateOutput(*out_array);
     ASSERT_EQ(out.array()->length, 0);
   }
 }
 
+TEST(TestTDigestKernel, Scalar) {
+  for (const auto& ty : {float64(), int64(), uint64()}) {
+    TDigestOptions options(std::vector<double>{0.0, 0.5, 1.0});
+    EXPECT_THAT(TDigest(*MakeScalar(ty, 1), options),
+                ResultWith(ArrayFromJSON(float64(), "[1, 1, 1]")));
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index d11e73efd77..6fa49d03d76 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/int128_internal.h"
@@ -50,10 +51,11 @@ struct VarStdState {
 
     using SumType =
         typename std::conditional<is_floating_type<T>::value, double, int128_t>::type;
-    SumType sum = arrow::compute::detail::SumArray<CType, SumType>(*array.data());
+    SumType sum =
+        arrow::compute::detail::SumArray<CType, SumType, SimdLevel::NONE>(*array.data());
 
     const double mean = static_cast<double>(sum) / count;
-    const double m2 = arrow::compute::detail::SumArray<CType, double>(
+    const double m2 = arrow::compute::detail::SumArray<CType, double, SimdLevel::NONE>(
         *array.data(), [mean](CType value) {
           const double v = static_cast<double>(value);
           return (v - mean) * (v - mean);
@@ -84,32 +86,22 @@ struct VarStdState {
       valid_count -= count;
 
       if (count > 0) {
-        int64_t sum = 0;
-        int128_t square_sum = 0;
+        IntegerVarStd<ArrowType> var_std;
         const ArrayData& data = *slice->data();
         const CType* values = data.GetValues<CType>(1);
         VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
                             [&](int64_t pos, int64_t len) {
                               for (int64_t i = 0; i < len; ++i) {
                                 const auto value = values[pos + i];
-                                sum += value;
-                                square_sum += static_cast<uint64_t>(value) * value;
+                                var_std.ConsumeOne(value);
                               }
                             });
 
-        const double mean = static_cast<double>(sum) / count;
-        // calculate m2 = square_sum - sum * sum / count
-        // decompose `sum * sum / count` into integers and fractions
-        const int128_t sum_square = static_cast<int128_t>(sum) * sum;
-        const int128_t integers = sum_square / count;
-        const double fractions = static_cast<double>(sum_square % count) / count;
-        const double m2 = static_cast<double>(square_sum - integers) - fractions;
-
         // merge variance
         ThisType state;
-        state.count = count;
-        state.mean = mean;
-        state.m2 = m2;
+        state.count = var_std.count;
+        state.mean = var_std.mean();
+        state.m2 = var_std.m2();
         this->MergeFrom(state);
       }
     }
@@ -127,12 +119,8 @@ struct VarStdState {
       this->m2 = state.m2;
       return;
     }
-    double mean = (this->mean * this->count + state.mean * state.count) /
-                  (this->count + state.count);
-    this->m2 += state.m2 + this->count * (this->mean - mean) * (this->mean - mean) +
-                state.count * (state.mean - mean) * (state.mean - mean);
-    this->count += state.count;
-    this->mean = mean;
+    MergeVarStd(this->count, this->mean, state.count, state.mean, state.m2, &this->count,
+                &this->mean, &this->m2);
   }
 
   int64_t count = 0;
@@ -140,8 +128,6 @@ struct VarStdState {
   double m2 = 0;  // m2 = count*s2 = sum((X-mean)^2)
 };
 
-enum class VarOrStd : bool { Var, Std };
-
 template <typename ArrowType>
 struct VarStdImpl : public ScalarAggregator {
   using ThisType = VarStdImpl<ArrowType>;
@@ -151,17 +137,19 @@ struct VarStdImpl : public ScalarAggregator {
                       const VarianceOptions& options, VarOrStd return_type)
       : out_type(out_type), options(options), return_type(return_type) {}
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     ArrayType array(batch[0].array());
     this->state.Consume(array);
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->state.MergeFrom(other.state);
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     if (this->state.count <= options.ddof) {
       out->value = std::make_shared<DoubleScalar>();
     } else {
@@ -169,6 +157,7 @@ struct VarStdImpl : public ScalarAggregator {
       out->value =
           std::make_shared<DoubleScalar>(return_type == VarOrStd::Var ? var : sqrt(var));
     }
+    return Status::OK();
   }
 
   std::shared_ptr<DataType> out_type;
@@ -177,6 +166,34 @@ struct VarStdImpl : public ScalarAggregator {
   VarOrStd return_type;
 };
 
+struct ScalarVarStdImpl : public ScalarAggregator {
+  explicit ScalarVarStdImpl(const VarianceOptions& options)
+      : options(options), seen(false) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    seen = batch[0].scalar()->is_valid;
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ScalarVarStdImpl&>(src);
+    seen = seen || other.seen;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (!seen || options.ddof > 0) {
+      out->value = std::make_shared<DoubleScalar>();
+    } else {
+      out->value = std::make_shared<DoubleScalar>(0.0);
+    }
+    return Status::OK();
+  }
+
+  const VarianceOptions options;
+  bool seen;
+};
+
 struct VarStdInitState {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
@@ -208,33 +225,43 @@ struct VarStdInitState {
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
 
-std::unique_ptr<KernelState> StddevInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> StddevInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
   VarStdInitState visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const VarianceOptions&>(*args.options), VarOrStd::Std);
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> VarianceInit(KernelContext* ctx,
-                                          const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> VarianceInit(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
   VarStdInitState visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const VarianceOptions&>(*args.options), VarOrStd::Var);
   return visitor.Create();
 }
 
+Result<std::unique_ptr<KernelState>> ScalarVarStdInit(KernelContext* ctx,
+                                                      const KernelInitArgs& args) {
+  return arrow::internal::make_unique<ScalarVarStdImpl>(
+      static_cast<const VarianceOptions&>(*args.options));
+}
+
 void AddVarStdKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
                       ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
     auto sig = KernelSignature::Make({InputType::Array(ty)}, float64());
     AddAggKernel(std::move(sig), init, func);
+
+    sig = KernelSignature::Make({InputType::Scalar(ty)}, float64());
+    AddAggKernel(std::move(sig), ScalarVarStdInit, func);
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h b/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h
new file mode 100644
index 00000000000..675ebfd91d3
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/int128_internal.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+using arrow::internal::int128_t;
+
+// Accumulate sum/squared sum (using naive summation)
+// Shared implementation between scalar/hash aggregate variance/stddev kernels
+template <typename ArrowType>
+struct IntegerVarStd {
+  using c_type = typename ArrowType::c_type;
+
+  int64_t count = 0;
+  int64_t sum = 0;
+  int128_t square_sum = 0;
+
+  void ConsumeOne(const c_type value) {
+    sum += value;
+    square_sum += static_cast<uint64_t>(value) * value;
+    count++;
+  }
+
+  double mean() const { return static_cast<double>(sum) / count; }
+
+  double m2() const {
+    // calculate m2 = square_sum - sum * sum / count
+    // decompose `sum * sum / count` into integers and fractions
+    const int128_t sum_square = static_cast<int128_t>(sum) * sum;
+    const int128_t integers = sum_square / count;
+    const double fractions = static_cast<double>(sum_square % count) / count;
+    return static_cast<double>(square_sum - integers) - fractions;
+  }
+};
+
+static inline void MergeVarStd(int64_t count1, double mean1, int64_t count2, double mean2,
+                               double m22, int64_t* out_count, double* out_mean,
+                               double* out_m2) {
+  double mean = (mean1 * count1 + mean2 * count2) / (count1 + count2);
+  *out_m2 += m22 + count1 * (mean1 - mean) * (mean1 - mean) +
+             count2 * (mean2 - mean) * (mean2 - mean);
+  *out_count += count2;
+  *out_mean = mean;
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index ad43b7a3aa9..f8b90085010 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -28,15 +28,15 @@ namespace arrow {
 namespace compute {
 namespace internal {
 
-void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  ctx->SetStatus(Status::NotImplemented("This kernel is malformed"));
+Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return Status::NotImplemented("This kernel is malformed");
 }
 
 ArrayKernelExec MakeFlippedBinaryExec(ArrayKernelExec exec) {
   return [exec](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     ExecBatch flipped_batch = batch;
     std::swap(flipped_batch.values[0], flipped_batch.values[1]);
-    exec(ctx, flipped_batch, out);
+    return exec(ctx, flipped_batch, out);
   };
 }
 
@@ -47,6 +47,7 @@ std::vector<std::shared_ptr<DataType>> g_floating_types;
 std::vector<std::shared_ptr<DataType>> g_numeric_types;
 std::vector<std::shared_ptr<DataType>> g_base_binary_types;
 std::vector<std::shared_ptr<DataType>> g_temporal_types;
+std::vector<std::shared_ptr<DataType>> g_interval_types;
 std::vector<std::shared_ptr<DataType>> g_primitive_types;
 std::vector<Type::type> g_decimal_type_ids;
 static std::once_flag codegen_static_initialized;
@@ -91,6 +92,9 @@ static void InitStaticData() {
                       timestamp(TimeUnit::MICRO),
                       timestamp(TimeUnit::NANO)};
 
+  // Interval types
+  g_interval_types = {day_time_interval(), month_interval()};
+
   // Base binary types (without FixedSizeBinary)
   g_base_binary_types = {binary(), utf8(), large_binary(), large_utf8()};
 
@@ -157,6 +161,11 @@ const std::vector<std::shared_ptr<DataType>>& TemporalTypes() {
   return g_temporal_types;
 }
 
+const std::vector<std::shared_ptr<DataType>>& IntervalTypes() {
+  std::call_once(codegen_static_initialized, InitStaticData);
+  return g_interval_types;
+}
+
 const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes() {
   std::call_once(codegen_static_initialized, InitStaticData);
   return g_primitive_types;
@@ -185,7 +194,9 @@ const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
 // work above
 
 Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return descrs[0];
+  ValueDescr result = descrs.front();
+  result.shape = GetBroadcastShape(descrs);
+  return result;
 }
 
 void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs) {
@@ -218,9 +229,14 @@ void ReplaceTypes(const std::shared_ptr<DataType>& type,
 }
 
 std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
-  DCHECK(!descrs.empty()) << "tried to find CommonNumeric type of an empty set";
+  return CommonNumeric(descrs.data(), descrs.size());
+}
 
-  for (const auto& descr : descrs) {
+std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
+  DCHECK_GT(count, 0) << "tried to find CommonNumeric type of an empty set";
+
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     auto id = descr.type->id();
     if (!is_floating(id) && !is_integer(id)) {
       // a common numeric type is only possible if all types are numeric
@@ -232,19 +248,22 @@ std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
     }
   }
 
-  for (const auto& descr : descrs) {
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     if (descr.type->id() == Type::DOUBLE) return float64();
   }
 
-  for (const auto& descr : descrs) {
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     if (descr.type->id() == Type::FLOAT) return float32();
   }
 
   int max_width_signed = 0, max_width_unsigned = 0;
 
-  for (const auto& descr : descrs) {
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     auto id = descr.type->id();
-    auto max_width = is_signed_integer(id) ? &max_width_signed : &max_width_unsigned;
+    auto max_width = &(is_signed_integer(id) ? max_width_signed : max_width_unsigned);
     *max_width = std::max(bit_width(id), *max_width);
   }
 
@@ -253,7 +272,7 @@ std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
     if (max_width_unsigned == 32) return uint32();
     if (max_width_unsigned == 16) return uint16();
     DCHECK_EQ(max_width_unsigned, 8);
-    return int8();
+    return uint8();
   }
 
   if (max_width_signed <= max_width_unsigned) {
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index b5d6c3807f1..9c8b2cef198 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <cstdint>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <utility>
@@ -63,42 +64,6 @@ using internal::VisitTwoBitBlocksVoid;
 namespace compute {
 namespace internal {
 
-#ifdef ARROW_EXTRA_ERROR_CONTEXT
-
-#define KERNEL_RETURN_IF_ERROR(ctx, expr)            \
-  do {                                               \
-    Status _st = (expr);                             \
-    if (ARROW_PREDICT_FALSE(!_st.ok())) {            \
-      _st.AddContextLine(__FILE__, __LINE__, #expr); \
-      ctx->SetStatus(_st);                           \
-      return;                                        \
-    }                                                \
-  } while (0)
-
-#else
-
-#define KERNEL_RETURN_IF_ERROR(ctx, expr) \
-  do {                                    \
-    Status _st = (expr);                  \
-    if (ARROW_PREDICT_FALSE(!_st.ok())) { \
-      ctx->SetStatus(_st);                \
-      return;                             \
-    }                                     \
-  } while (0)
-
-#endif  // ARROW_EXTRA_ERROR_CONTEXT
-
-#define KERNEL_ASSIGN_OR_RAISE_IMPL(result_name, lhs, ctx, rexpr) \
-  auto result_name = (rexpr);                                     \
-  KERNEL_RETURN_IF_ERROR(ctx, (result_name).status());            \
-  lhs = std::move(result_name).MoveValueUnsafe();
-
-#define KERNEL_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
-
-#define KERNEL_ASSIGN_OR_RAISE(lhs, ctx, rexpr)                                          \
-  KERNEL_ASSIGN_OR_RAISE_IMPL(KERNEL_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
-                              lhs, ctx, rexpr);
-
 /// KernelState adapter for the common case of kernels whose only
 /// state is an instance of a subclass of FunctionOptions.
 /// Default FunctionOptions are *not* handled here.
@@ -106,15 +71,14 @@ template <typename OptionsType>
 struct OptionsWrapper : public KernelState {
   explicit OptionsWrapper(OptionsType options) : options(std::move(options)) {}
 
-  static std::unique_ptr<KernelState> Init(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
     if (auto options = static_cast<const OptionsType*>(args.options)) {
       return ::arrow::internal::make_unique<OptionsWrapper>(*options);
     }
 
-    ctx->SetStatus(
-        Status::Invalid("Attempted to initialize KernelState from null FunctionOptions"));
-    return NULLPTR;
+    return Status::Invalid(
+        "Attempted to initialize KernelState from null FunctionOptions");
   }
 
   static const OptionsType& Get(const KernelState& state) {
@@ -133,16 +97,15 @@ struct KernelStateFromFunctionOptions : public KernelState {
   explicit KernelStateFromFunctionOptions(KernelContext* ctx, OptionsType state)
       : state(StateType(ctx, std::move(state))) {}
 
-  static std::unique_ptr<KernelState> Init(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
     if (auto options = static_cast<const OptionsType*>(args.options)) {
       return ::arrow::internal::make_unique<KernelStateFromFunctionOptions>(ctx,
                                                                             *options);
     }
 
-    ctx->SetStatus(
-        Status::Invalid("Attempted to initialize KernelState from null FunctionOptions"));
-    return NULLPTR;
+    return Status::Invalid(
+        "Attempted to initialize KernelState from null FunctionOptions");
   }
 
   static const StateType& Get(const KernelState& state) {
@@ -186,6 +149,8 @@ struct GetViewType<Decimal128Type> {
   static T LogicalValue(PhysicalType value) {
     return Decimal128(reinterpret_cast<const uint8_t*>(value.data()));
   }
+
+  static T LogicalValue(T value) { return value; }
 };
 
 template <>
@@ -196,6 +161,8 @@ struct GetViewType<Decimal256Type> {
   static T LogicalValue(PhysicalType value) {
     return Decimal256(reinterpret_cast<const uint8_t*>(value.data()));
   }
+
+  static T LogicalValue(T value) { return value; }
 };
 
 template <typename Type, typename Enable = void>
@@ -225,8 +192,8 @@ struct GetOutputType<Decimal256Type> {
 // Iteration / value access utilities
 
 template <typename T, typename R = void>
-using enable_if_has_c_type_not_boolean =
-    enable_if_t<has_c_type<T>::value && !is_boolean_type<T>::value, R>;
+using enable_if_c_number_or_decimal = enable_if_t<
+    (has_c_type<T>::value && !is_boolean_type<T>::value) || is_decimal_type<T>::value, R>;
 
 // Iterator over various input array types, yielding a GetViewType<Type>
 
@@ -234,8 +201,8 @@ template <typename Type, typename Enable = void>
 struct ArrayIterator;
 
 template <typename Type>
-struct ArrayIterator<Type, enable_if_has_c_type_not_boolean<Type>> {
-  using T = typename Type::c_type;
+struct ArrayIterator<Type, enable_if_c_number_or_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
   const T* values;
 
   explicit ArrayIterator(const ArrayData& data) : values(data.GetValues<T>(1)) {}
@@ -286,8 +253,8 @@ template <typename Type, typename Enable = void>
 struct OutputArrayWriter;
 
 template <typename Type>
-struct OutputArrayWriter<Type, enable_if_has_c_type_not_boolean<Type>> {
-  using T = typename Type::c_type;
+struct OutputArrayWriter<Type, enable_if_c_number_or_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
   T* values;
 
   explicit OutputArrayWriter(ArrayData* data) : values(data->GetMutableValues<T>(1)) {}
@@ -297,6 +264,10 @@ struct OutputArrayWriter<Type, enable_if_has_c_type_not_boolean<Type>> {
   // Note that this doesn't write the null bitmap, which should be consistent
   // with Write / WriteNull calls
   void WriteNull() { *values++ = T{}; }
+
+  void WriteAllNull(int64_t length) {
+    std::memset(static_cast<void*>(values), 0, sizeof(T) * length);
+  }
 };
 
 // (Un)box Scalar to / from C++ value
@@ -314,7 +285,7 @@ struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
 };
 
 template <typename Type>
-struct UnboxScalar<Type, enable_if_base_binary<Type>> {
+struct UnboxScalar<Type, enable_if_has_string_view<Type>> {
   static util::string_view Unbox(const Scalar& val) {
     if (!val.is_valid) return util::string_view();
     return util::string_view(*checked_cast<const BaseBinaryScalar&>(val).value);
@@ -341,8 +312,12 @@ struct BoxScalar;
 template <typename Type>
 struct BoxScalar<Type, enable_if_has_c_type<Type>> {
   using T = typename GetOutputType<Type>::T;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  static void Box(T val, Scalar* out) { checked_cast<ScalarType*>(out)->value = val; }
+  static void Box(T val, Scalar* out) {
+    // Enables BoxScalar<Int64Type> to work on a (for example) Time64Scalar
+    T* mutable_data = reinterpret_cast<T*>(
+        checked_cast<::arrow::internal::PrimitiveScalarBase*>(out)->mutable_data());
+    *mutable_data = val;
+  }
 };
 
 template <typename Type>
@@ -372,8 +347,9 @@ struct BoxScalar<Decimal256Type> {
 // values, such as Decimal128 rather than util::string_view.
 
 template <typename T, typename VisitFunc, typename NullFunc>
-static void VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
-                                   NullFunc&& null_func) {
+static typename arrow::internal::call_traits::enable_if_return<VisitFunc, void>::type
+VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
+                       NullFunc&& null_func) {
   VisitArrayDataInline<T>(
       arr,
       [&](typename GetViewType<T>::PhysicalType v) {
@@ -382,6 +358,18 @@ static void VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
       std::forward<NullFunc>(null_func));
 }
 
+template <typename T, typename VisitFunc, typename NullFunc>
+static typename arrow::internal::call_traits::enable_if_return<VisitFunc, Status>::type
+VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
+                       NullFunc&& null_func) {
+  return VisitArrayDataInline<T>(
+      arr,
+      [&](typename GetViewType<T>::PhysicalType v) {
+        return valid_func(GetViewType<T>::LogicalValue(std::move(v)));
+      },
+      std::forward<NullFunc>(null_func));
+}
+
 // Like VisitArrayValuesInline, but for binary functions.
 
 template <typename Arg0Type, typename Arg1Type, typename VisitFunc, typename NullFunc>
@@ -411,7 +399,7 @@ Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& desc
 // ----------------------------------------------------------------------
 // Generate an array kernel given template classes
 
-void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 ArrayKernelExec MakeFlippedBinaryExec(ArrayKernelExec exec);
 
@@ -454,6 +442,9 @@ const std::vector<std::shared_ptr<DataType>>& NumericTypes();
 // Temporal types including time and timestamps for each unit
 const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
 
+// Interval types
+const std::vector<std::shared_ptr<DataType>>& IntervalTypes();
+
 // Integer, floating point, base binary, and temporal
 const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes();
 
@@ -469,15 +460,16 @@ namespace applicator {
 //
 // Operator must implement
 //
-// static void Call(KernelContext*, const ArrayData& in, ArrayData* out)
-// static void Call(KernelContext*, const Scalar& in, Scalar* out)
+// static Status Call(KernelContext*, const ArrayData& in, ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& in, Scalar* out)
 template <typename Operator>
-static void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+static Status SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (batch[0].kind() == Datum::SCALAR) {
-    Operator::Call(ctx, *batch[0].scalar(), out->scalar().get());
+    return Operator::Call(ctx, *batch[0].scalar(), out->scalar().get());
   } else if (batch.length > 0) {
-    Operator::Call(ctx, *batch[0].array(), out->mutable_array());
+    return Operator::Call(ctx, *batch[0].array(), out->mutable_array());
   }
+  return Status::OK();
 }
 
 // Generate an ArrayKernelExec given a functor that handles all of its own
@@ -485,29 +477,33 @@ static void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out)
 //
 // Operator must implement
 //
-// static void Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1,
-//                  ArrayData* out)
-// static void Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
-//                  ArrayData* out)
-// static void Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
-//                  ArrayData* out)
-// static void Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
-//                  Scalar* out)
+// static Status Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
+//                    Scalar* out)
 template <typename Operator>
-static void SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  if (batch.length == 0) return;
+static Status SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch.length == 0) return Status::OK();
 
   if (batch[0].kind() == Datum::ARRAY) {
     if (batch[1].kind() == Datum::ARRAY) {
-      Operator::Call(ctx, *batch[0].array(), *batch[1].array(), out->mutable_array());
+      return Operator::Call(ctx, *batch[0].array(), *batch[1].array(),
+                            out->mutable_array());
     } else {
-      Operator::Call(ctx, *batch[0].array(), *batch[1].scalar(), out->mutable_array());
+      return Operator::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                            out->mutable_array());
     }
   } else {
     if (batch[1].kind() == Datum::ARRAY) {
-      Operator::Call(ctx, *batch[0].scalar(), *batch[1].array(), out->mutable_array());
+      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].array(),
+                            out->mutable_array());
     } else {
-      Operator::Call(ctx, *batch[0].scalar(), *batch[1].scalar(), out->scalar().get());
+      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].scalar(),
+                            out->scalar().get());
     }
   }
 }
@@ -522,32 +518,36 @@ struct OutputAdapter;
 template <typename Type>
 struct OutputAdapter<Type, enable_if_boolean<Type>> {
   template <typename Generator>
-  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
     ArrayData* out_arr = out->mutable_array();
     auto out_bitmap = out_arr->buffers[1]->mutable_data();
     GenerateBitsUnrolled(out_bitmap, out_arr->offset, out_arr->length,
                          std::forward<Generator>(generator));
+    return Status::OK();
   }
 };
 
 template <typename Type>
-struct OutputAdapter<Type, enable_if_has_c_type_not_boolean<Type>> {
+struct OutputAdapter<Type, enable_if_c_number_or_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+
   template <typename Generator>
-  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
     ArrayData* out_arr = out->mutable_array();
-    auto out_data = out_arr->GetMutableValues<typename Type::c_type>(1);
+    auto out_data = out_arr->GetMutableValues<T>(1);
     // TODO: Is this as fast as a more explicitly inlined function?
     for (int64_t i = 0; i < out_arr->length; ++i) {
       *out_data++ = generator();
     }
+    return Status::OK();
   }
 };
 
 template <typename Type>
 struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   template <typename Generator>
-  static void Write(KernelContext* ctx, Datum* out, Generator&& generator) {
-    ctx->SetStatus(Status::NotImplemented("NYI"));
+  static Status Write(KernelContext* ctx, Datum* out, Generator&& generator) {
+    return Status::NotImplemented("NYI");
   }
 };
 
@@ -563,8 +563,10 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> {
 //
 // struct Op {
 //   template <typename OutValue, typename Arg0Value>
-//   static OutValue Call(KernelContext* ctx, Arg0Value val) {
+//   static OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) {
 //     // implementation
+//     // NOTE: "status" should only populated with errors,
+//     //        leave it unmodified to indicate Status::OK()
 //   }
 // };
 template <typename OutType, typename Arg0Type, typename Op>
@@ -572,26 +574,30 @@ struct ScalarUnary {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static void ExecArray(KernelContext* ctx, const ArrayData& arg0, Datum* out) {
+  static Status ExecArray(KernelContext* ctx, const ArrayData& arg0, Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call<OutValue, Arg0Value>(ctx, arg0_it());
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value>(ctx, arg0_it(), &st);
+    }));
+    return st;
   }
 
-  static void ExecScalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+  static Status ExecScalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+    Status st = Status::OK();
     Scalar* out_scalar = out->scalar().get();
     if (arg0.is_valid) {
       Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       out_scalar->is_valid = true;
-      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val),
+      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
                               out_scalar);
     } else {
       out_scalar->is_valid = false;
     }
+    return st;
   }
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       return ExecArray(ctx, *batch[0].array(), out);
     } else {
@@ -615,66 +621,68 @@ struct ScalarUnaryNotNullStateful {
 
   template <typename Type, typename Enable = void>
   struct ArrayExec {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ExecBatch& batch,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx,
+                       const ExecBatch& batch, Datum* out) {
       ARROW_LOG(FATAL) << "Missing ArrayExec specialization for output type "
                        << out->type();
+      return Status::NotImplemented("NYI");
     }
   };
 
   template <typename Type>
-  struct ArrayExec<
-      Type, enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
+  struct ArrayExec<Type, enable_if_c_number_or_decimal<Type>> {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
       auto out_data = out_arr->GetMutableValues<OutValue>(1);
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            *out_data++ = functor.op.template Call<OutValue, Arg0Value>(ctx, v);
+            *out_data++ = functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st);
           },
           [&]() {
             // null
-            ++out_data;
+            *out_data++ = OutValue{};
           });
+      return st;
     }
   };
 
   template <typename Type>
   struct ArrayExec<Type, enable_if_base_binary<Type>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
       // NOTE: This code is not currently used by any kernels and has
       // suboptimal performance because it's recomputing the validity bitmap
       // that is already computed by the kernel execution layer. Consider
       // writing a lower-level "output adapter" for base binary types.
       typename TypeTraits<Type>::BuilderType builder;
-      VisitArrayValuesInline<Arg0Type>(
-          arg0,
-          [&](Arg0Value v) {
-            KERNEL_RETURN_IF_ERROR(ctx, builder.Append(functor.op.Call(ctx, v)));
-          },
-          [&]() { KERNEL_RETURN_IF_ERROR(ctx, builder.AppendNull()); });
-      if (!ctx->HasError()) {
+      Status st = Status::OK();
+      RETURN_NOT_OK(VisitArrayValuesInline<Arg0Type>(
+          arg0, [&](Arg0Value v) { return builder.Append(functor.op.Call(ctx, v, &st)); },
+          [&]() { return builder.AppendNull(); }));
+      if (st.ok()) {
         std::shared_ptr<ArrayData> result;
-        ctx->SetStatus(builder.FinishInternal(&result));
+        RETURN_NOT_OK(builder.FinishInternal(&result));
         out->value = std::move(result);
       }
+      return st;
     }
   };
 
   template <typename Type>
   struct ArrayExec<Type, enable_if_t<is_boolean_type<Type>::value>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
       FirstTimeBitmapWriter out_writer(out_arr->buffers[1]->mutable_data(),
                                        out_arr->offset, out_arr->length);
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            if (functor.op.template Call<OutValue, Arg0Value>(ctx, v)) {
+            if (functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)) {
               out_writer.Set();
             }
             out_writer.Next();
@@ -685,39 +693,24 @@ struct ScalarUnaryNotNullStateful {
             out_writer.Next();
           });
       out_writer.Finish();
+      return st;
     }
   };
 
-  template <typename Type>
-  struct ArrayExec<Type, enable_if_decimal<Type>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
-      ArrayData* out_arr = out->mutable_array();
-      // Decimal128 data buffers are not safely reinterpret_cast-able on big-endian
-      using endian_agnostic =
-          std::array<uint8_t, sizeof(typename TypeTraits<Type>::ScalarType::ValueType)>;
-      auto out_data = out_arr->GetMutableValues<endian_agnostic>(1);
-      VisitArrayValuesInline<Arg0Type>(
-          arg0,
-          [&](Arg0Value v) {
-            functor.op.template Call<OutValue, Arg0Value>(ctx, v).ToBytes(
-                out_data++->data());
-          },
-          [&]() { ++out_data; });
-    }
-  };
-
-  void Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+  Status Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+    Status st = Status::OK();
     if (arg0.is_valid) {
       Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      BoxScalar<OutType>::Box(this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val),
-                              out->scalar().get());
+      BoxScalar<OutType>::Box(
+          this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
+          out->scalar().get());
     }
+    return st;
   }
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
-      ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out);
+      return ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out);
     } else {
       return Scalar(ctx, *batch[0].scalar(), out);
     }
@@ -732,7 +725,7 @@ struct ScalarUnaryNotNull {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Seed kernel with dummy state
     ScalarUnaryNotNullStateful<OutType, Arg0Type, Op> kernel({});
     return kernel.Exec(ctx, batch, out);
@@ -751,8 +744,11 @@ struct ScalarUnaryNotNull {
 //
 // struct Op {
 //   template <typename OutValue, typename Arg0Value, typename Arg1Value>
-//   static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1) {
+//   static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1, Status* st)
+//   {
 //     // implementation
+//     // NOTE: "status" should only populated with errors,
+//     //       leave it unmodified to indicate Status::OK()
 //   }
 // };
 template <typename OutType, typename Arg0Type, typename Arg1Type, typename Op>
@@ -761,44 +757,56 @@ struct ScalarBinary {
   using Arg0Value = typename GetViewType<Arg0Type>::T;
   using Arg1Value = typename GetViewType<Arg1Type>::T;
 
-  static void ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1,
-                         Datum* out) {
+  static Status ArrayArray(KernelContext* ctx, const ArrayData& arg0,
+                           const ArrayData& arg1, Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_it(), arg1_it());
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_it(),
+                                                               &st);
+    }));
+    return st;
   }
 
-  static void ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
-                          Datum* out) {
+  static Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
+                            Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_it(), arg1_val);
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_val,
+                                                               &st);
+    }));
+    return st;
   }
 
-  static void ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
-                          Datum* out) {
+  static Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
+                            Datum* out) {
+    Status st = Status::OK();
     auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_val, arg1_it());
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_it(),
+                                                               &st);
+    }));
+    return st;
   }
 
-  static void ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                           Datum* out) {
+  static Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
+                             Datum* out) {
+    Status st = Status::OK();
     if (out->scalar()->is_valid) {
       auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box(Op::template Call(ctx, arg0_val, arg1_val),
-                              out->scalar().get());
+      BoxScalar<OutType>::Box(
+          Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
+          out->scalar().get());
     }
+    return st;
   }
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       if (batch[1].kind() == Datum::ARRAY) {
         return ArrayArray(ctx, *batch[0].array(), *batch[1].array(), out);
@@ -829,19 +837,22 @@ struct ScalarBinaryNotNullStateful {
 
   // NOTE: In ArrayExec<Type>, Type is really OutputType
 
-  void ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1,
-                  Datum* out) {
+  Status ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1,
+                    Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     VisitTwoArrayValuesInline<Arg0Type, Arg1Type>(
         arg0, arg1,
         [&](Arg0Value u, Arg1Value v) {
-          writer.Write(op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, v));
+          writer.Write(op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, v, &st));
         },
         [&]() { writer.WriteNull(); });
+    return st;
   }
 
-  void ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
-                   Datum* out) {
+  Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
+                     Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     if (arg1.is_valid) {
       const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
@@ -849,14 +860,18 @@ struct ScalarBinaryNotNullStateful {
           arg0,
           [&](Arg0Value u) {
             writer.Write(
-                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, arg1_val));
+                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, arg1_val, &st));
           },
           [&]() { writer.WriteNull(); });
+    } else {
+      writer.WriteAllNull(out->mutable_array()->length);
     }
+    return st;
   }
 
-  void ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
-                   Datum* out) {
+  Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
+                     Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     if (arg0.is_valid) {
       const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
@@ -864,24 +879,29 @@ struct ScalarBinaryNotNullStateful {
           arg1,
           [&](Arg1Value v) {
             writer.Write(
-                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, v));
+                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, v, &st));
           },
           [&]() { writer.WriteNull(); });
+    } else {
+      writer.WriteAllNull(out->mutable_array()->length);
     }
+    return st;
   }
 
-  void ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                    Datum* out) {
+  Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
+                      Datum* out) {
+    Status st = Status::OK();
     if (arg0.is_valid && arg1.is_valid) {
       const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
       BoxScalar<OutType>::Box(
-          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val),
+          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
           out->scalar().get());
     }
+    return st;
   }
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       if (batch[1].kind() == Datum::ARRAY) {
         return ArrayArray(ctx, *batch[0].array(), *batch[1].array(), out);
@@ -908,7 +928,7 @@ struct ScalarBinaryNotNull {
   using Arg0Value = typename GetViewType<Arg0Type>::T;
   using Arg1Value = typename GetViewType<Arg1Type>::T;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Seed kernel with dummy state
     ScalarBinaryNotNullStateful<OutType, Arg0Type, Arg1Type, Op> kernel({});
     return kernel.Exec(ctx, batch, out);
@@ -1077,6 +1097,41 @@ ArrayKernelExec GeneratePhysicalInteger(detail::GetTypeId get_id) {
   }
 }
 
+template <template <typename... Args> class Generator, typename... Args>
+ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return Generator<Int8Type, Args...>::Exec;
+    case Type::INT16:
+      return Generator<Int16Type, Args...>::Exec;
+    case Type::INT32:
+    case Type::DATE32:
+    case Type::TIME32:
+      return Generator<Int32Type, Args...>::Exec;
+    case Type::INT64:
+    case Type::DATE64:
+    case Type::TIMESTAMP:
+    case Type::TIME64:
+    case Type::DURATION:
+      return Generator<Int64Type, Args...>::Exec;
+    case Type::UINT8:
+      return Generator<UInt8Type, Args...>::Exec;
+    case Type::UINT16:
+      return Generator<UInt16Type, Args...>::Exec;
+    case Type::UINT32:
+      return Generator<UInt32Type, Args...>::Exec;
+    case Type::UINT64:
+      return Generator<UInt64Type, Args...>::Exec;
+    case Type::FLOAT:
+      return Generator<FloatType, Args...>::Exec;
+    case Type::DOUBLE:
+      return Generator<DoubleType, Args...>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
 // Generate a kernel given a templated functor for integer types
 //
 // See "Numeric" above for description of the generator functor
@@ -1104,25 +1159,26 @@ ArrayKernelExec GenerateSignedInteger(detail::GetTypeId get_id) {
 // bits).
 //
 // See "Numeric" above for description of the generator functor
-template <template <typename...> class Generator>
+template <template <typename...> class Generator, typename... Args>
 ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::NA:
-      return Generator<NullType>::Exec;
+      return Generator<NullType, Args...>::Exec;
     case Type::BOOL:
-      return Generator<BooleanType>::Exec;
+      return Generator<BooleanType, Args...>::Exec;
     case Type::UINT8:
     case Type::INT8:
-      return Generator<UInt8Type>::Exec;
+      return Generator<UInt8Type, Args...>::Exec;
     case Type::UINT16:
     case Type::INT16:
-      return Generator<UInt16Type>::Exec;
+      return Generator<UInt16Type, Args...>::Exec;
     case Type::UINT32:
     case Type::INT32:
     case Type::FLOAT:
     case Type::DATE32:
     case Type::TIME32:
-      return Generator<UInt32Type>::Exec;
+    case Type::INTERVAL_MONTHS:
+      return Generator<UInt32Type, Args...>::Exec;
     case Type::UINT64:
     case Type::INT64:
     case Type::DOUBLE:
@@ -1130,7 +1186,8 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
     case Type::TIMESTAMP:
     case Type::TIME64:
     case Type::DURATION:
-      return Generator<UInt64Type>::Exec;
+    case Type::INTERVAL_DAY_TIME:
+      return Generator<UInt64Type, Args...>::Exec;
     default:
       DCHECK(false);
       return ExecFail;
@@ -1138,15 +1195,15 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
 }
 
 // similar to GenerateTypeAgnosticPrimitive, but for variable types
-template <template <typename...> class Generator>
+template <template <typename...> class Generator, typename... Args>
 ArrayKernelExec GenerateTypeAgnosticVarBinaryBase(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::BINARY:
     case Type::STRING:
-      return Generator<BinaryType>::Exec;
+      return Generator<BinaryType, Args...>::Exec;
     case Type::LARGE_BINARY:
     case Type::LARGE_STRING:
-      return Generator<LargeBinaryType>::Exec;
+      return Generator<LargeBinaryType, Args...>::Exec;
     default:
       DCHECK(false);
       return ExecFail;
@@ -1247,6 +1304,9 @@ void ReplaceTypes(const std::shared_ptr<DataType>&, std::vector<ValueDescr>* des
 ARROW_EXPORT
 std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs);
 
+ARROW_EXPORT
+std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count);
+
 ARROW_EXPORT
 std::shared_ptr<DataType> CommonTimestamp(const std::vector<ValueDescr>& descrs);
 
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index dccbe50d9cf..ba5c90f15de 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -15,25 +15,38 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/api_aggregate.h"
-
+#include <cmath>
 #include <functional>
 #include <memory>
+#include <mutex>
 #include <string>
+#include <thread>
 #include <unordered_map>
 #include <vector>
 
 #include "arrow/buffer_builder.h"
+#include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec/key_compare.h"
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/key_hash.h"
+#include "arrow/compute/exec/key_map.h"
+#include "arrow/compute/exec/util.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_writer.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/int128_internal.h"
 #include "arrow/util/make_unique.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/tdigest.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -436,69 +449,435 @@ struct GrouperImpl : Grouper {
   std::vector<std::unique_ptr<KeyEncoder>> encoders_;
 };
 
+struct GrouperFastImpl : Grouper {
+  static constexpr int kBitmapPaddingForSIMD = 64;  // bits
+  static constexpr int kPaddingForSIMD = 32;        // bytes
+
+  static bool CanUse(const std::vector<ValueDescr>& keys) {
+#if ARROW_LITTLE_ENDIAN
+    for (size_t i = 0; i < keys.size(); ++i) {
+      const auto& key = keys[i].type;
+      if (is_large_binary_like(key->id())) {
+        return false;
+      }
+    }
+    return true;
+#else
+    return false;
+#endif
+  }
+
+  static Result<std::unique_ptr<GrouperFastImpl>> Make(
+      const std::vector<ValueDescr>& keys, ExecContext* ctx) {
+    auto impl = ::arrow::internal::make_unique<GrouperFastImpl>();
+    impl->ctx_ = ctx;
+
+    RETURN_NOT_OK(impl->temp_stack_.Init(ctx->memory_pool(), 64 * minibatch_size_max_));
+    impl->encode_ctx_.hardware_flags =
+        arrow::internal::CpuInfo::GetInstance()->hardware_flags();
+    impl->encode_ctx_.stack = &impl->temp_stack_;
+
+    auto num_columns = keys.size();
+    impl->col_metadata_.resize(num_columns);
+    impl->key_types_.resize(num_columns);
+    impl->dictionaries_.resize(num_columns);
+    for (size_t icol = 0; icol < num_columns; ++icol) {
+      const auto& key = keys[icol].type;
+      if (key->id() == Type::DICTIONARY) {
+        auto bit_width = checked_cast<const FixedWidthType&>(*key).bit_width();
+        ARROW_DCHECK(bit_width % 8 == 0);
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(true, bit_width / 8);
+      } else if (key->id() == Type::BOOL) {
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(true, 0);
+      } else if (is_fixed_width(key->id())) {
+        impl->col_metadata_[icol] = arrow::compute::KeyEncoder::KeyColumnMetadata(
+            true, checked_cast<const FixedWidthType&>(*key).bit_width() / 8);
+      } else if (is_binary_like(key->id())) {
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(false, sizeof(uint32_t));
+      } else {
+        return Status::NotImplemented("Keys of type ", *key);
+      }
+      impl->key_types_[icol] = key;
+    }
+
+    impl->encoder_.Init(impl->col_metadata_, &impl->encode_ctx_,
+                        /* row_alignment = */ sizeof(uint64_t),
+                        /* string_alignment = */ sizeof(uint64_t));
+    RETURN_NOT_OK(impl->rows_.Init(ctx->memory_pool(), impl->encoder_.row_metadata()));
+    RETURN_NOT_OK(
+        impl->rows_minibatch_.Init(ctx->memory_pool(), impl->encoder_.row_metadata()));
+    impl->minibatch_size_ = impl->minibatch_size_min_;
+    GrouperFastImpl* impl_ptr = impl.get();
+    auto equal_func = [impl_ptr](
+                          int num_keys_to_compare, const uint16_t* selection_may_be_null,
+                          const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
+                          uint16_t* out_selection_mismatch) {
+      arrow::compute::KeyCompare::CompareRows(
+          num_keys_to_compare, selection_may_be_null, group_ids, &impl_ptr->encode_ctx_,
+          out_num_keys_mismatch, out_selection_mismatch, impl_ptr->rows_minibatch_,
+          impl_ptr->rows_);
+    };
+    auto append_func = [impl_ptr](int num_keys, const uint16_t* selection) {
+      return impl_ptr->rows_.AppendSelectionFrom(impl_ptr->rows_minibatch_, num_keys,
+                                                 selection);
+    };
+    RETURN_NOT_OK(impl->map_.init(impl->encode_ctx_.hardware_flags, ctx->memory_pool(),
+                                  impl->encode_ctx_.stack, impl->log_minibatch_max_,
+                                  equal_func, append_func));
+    impl->cols_.resize(num_columns);
+    impl->minibatch_hashes_.resize(impl->minibatch_size_max_ +
+                                   kPaddingForSIMD / sizeof(uint32_t));
+
+    return std::move(impl);
+  }
+
+  ~GrouperFastImpl() { map_.cleanup(); }
+
+  Result<Datum> Consume(const ExecBatch& batch) override {
+    int64_t num_rows = batch.length;
+    int num_columns = batch.num_values();
+
+    // Process dictionaries
+    for (int icol = 0; icol < num_columns; ++icol) {
+      if (key_types_[icol]->id() == Type::DICTIONARY) {
+        auto data = batch[icol].array();
+        auto dict = MakeArray(data->dictionary);
+        if (dictionaries_[icol]) {
+          if (!dictionaries_[icol]->Equals(dict)) {
+            // TODO(bkietz) unify if necessary. For now, just error if any batch's
+            // dictionary differs from the first we saw for this key
+            return Status::NotImplemented("Unifying differing dictionaries");
+          }
+        } else {
+          dictionaries_[icol] = std::move(dict);
+        }
+      }
+    }
+
+    std::shared_ptr<arrow::Buffer> group_ids;
+    ARROW_ASSIGN_OR_RAISE(
+        group_ids, AllocateBuffer(sizeof(uint32_t) * num_rows, ctx_->memory_pool()));
+
+    for (int icol = 0; icol < num_columns; ++icol) {
+      const uint8_t* non_nulls = nullptr;
+      if (batch[icol].array()->buffers[0] != NULLPTR) {
+        non_nulls = batch[icol].array()->buffers[0]->data();
+      }
+      const uint8_t* fixedlen = batch[icol].array()->buffers[1]->data();
+      const uint8_t* varlen = nullptr;
+      if (!col_metadata_[icol].is_fixed_length) {
+        varlen = batch[icol].array()->buffers[2]->data();
+      }
+
+      int64_t offset = batch[icol].array()->offset;
+
+      auto col_base = arrow::compute::KeyEncoder::KeyColumnArray(
+          col_metadata_[icol], offset + num_rows, non_nulls, fixedlen, varlen);
+
+      cols_[icol] =
+          arrow::compute::KeyEncoder::KeyColumnArray(col_base, offset, num_rows);
+    }
+
+    // Split into smaller mini-batches
+    //
+    for (uint32_t start_row = 0; start_row < num_rows;) {
+      uint32_t batch_size_next = std::min(static_cast<uint32_t>(minibatch_size_),
+                                          static_cast<uint32_t>(num_rows) - start_row);
+
+      // Encode
+      rows_minibatch_.Clean();
+      RETURN_NOT_OK(encoder_.PrepareOutputForEncode(start_row, batch_size_next,
+                                                    &rows_minibatch_, cols_));
+      encoder_.Encode(start_row, batch_size_next, &rows_minibatch_, cols_);
+
+      // Compute hash
+      if (encoder_.row_metadata().is_fixed_length) {
+        Hashing::hash_fixed(encode_ctx_.hardware_flags, batch_size_next,
+                            encoder_.row_metadata().fixed_length, rows_minibatch_.data(1),
+                            minibatch_hashes_.data());
+      } else {
+        auto hash_temp_buf =
+            util::TempVectorHolder<uint32_t>(&temp_stack_, 4 * batch_size_next);
+        Hashing::hash_varlen(encode_ctx_.hardware_flags, batch_size_next,
+                             rows_minibatch_.offsets(), rows_minibatch_.data(2),
+                             hash_temp_buf.mutable_data(), minibatch_hashes_.data());
+      }
+
+      // Map
+      RETURN_NOT_OK(
+          map_.map(batch_size_next, minibatch_hashes_.data(),
+                   reinterpret_cast<uint32_t*>(group_ids->mutable_data()) + start_row));
+
+      start_row += batch_size_next;
+
+      if (minibatch_size_ * 2 <= minibatch_size_max_) {
+        minibatch_size_ *= 2;
+      }
+    }
+
+    return Datum(UInt32Array(batch.length, std::move(group_ids)));
+  }
+
+  uint32_t num_groups() const override { return static_cast<uint32_t>(rows_.length()); }
+
+  // Make sure padded buffers end up with the right logical size
+
+  Result<std::shared_ptr<Buffer>> AllocatePaddedBitmap(int64_t length) {
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> buf,
+        AllocateBitmap(length + kBitmapPaddingForSIMD, ctx_->memory_pool()));
+    return SliceMutableBuffer(buf, 0, BitUtil::BytesForBits(length));
+  }
+
+  Result<std::shared_ptr<Buffer>> AllocatePaddedBuffer(int64_t size) {
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> buf,
+        AllocateBuffer(size + kBitmapPaddingForSIMD, ctx_->memory_pool()));
+    return SliceMutableBuffer(buf, 0, size);
+  }
+
+  Result<ExecBatch> GetUniques() override {
+    auto num_columns = static_cast<uint32_t>(col_metadata_.size());
+    int64_t num_groups = rows_.length();
+
+    std::vector<std::shared_ptr<Buffer>> non_null_bufs(num_columns);
+    std::vector<std::shared_ptr<Buffer>> fixedlen_bufs(num_columns);
+    std::vector<std::shared_ptr<Buffer>> varlen_bufs(num_columns);
+
+    for (size_t i = 0; i < num_columns; ++i) {
+      ARROW_ASSIGN_OR_RAISE(non_null_bufs[i], AllocatePaddedBitmap(num_groups));
+      if (col_metadata_[i].is_fixed_length) {
+        if (col_metadata_[i].fixed_length == 0) {
+          ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i], AllocatePaddedBitmap(num_groups));
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              fixedlen_bufs[i],
+              AllocatePaddedBuffer(num_groups * col_metadata_[i].fixed_length));
+        }
+      } else {
+        ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i],
+                              AllocatePaddedBuffer((num_groups + 1) * sizeof(uint32_t)));
+      }
+      cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
+          col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
+          fixedlen_bufs[i]->mutable_data(), nullptr);
+    }
+
+    for (int64_t start_row = 0; start_row < num_groups;) {
+      int64_t batch_size_next =
+          std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_));
+      encoder_.DecodeFixedLengthBuffers(start_row, start_row, batch_size_next, rows_,
+                                        &cols_);
+      start_row += batch_size_next;
+    }
+
+    if (!rows_.metadata().is_fixed_length) {
+      for (size_t i = 0; i < num_columns; ++i) {
+        if (!col_metadata_[i].is_fixed_length) {
+          auto varlen_size =
+              reinterpret_cast<const uint32_t*>(fixedlen_bufs[i]->data())[num_groups];
+          ARROW_ASSIGN_OR_RAISE(varlen_bufs[i], AllocatePaddedBuffer(varlen_size));
+          cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
+              col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
+              fixedlen_bufs[i]->mutable_data(), varlen_bufs[i]->mutable_data());
+        }
+      }
+
+      for (int64_t start_row = 0; start_row < num_groups;) {
+        int64_t batch_size_next =
+            std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_));
+        encoder_.DecodeVaryingLengthBuffers(start_row, start_row, batch_size_next, rows_,
+                                            &cols_);
+        start_row += batch_size_next;
+      }
+    }
+
+    ExecBatch out({}, num_groups);
+    out.values.resize(num_columns);
+    for (size_t i = 0; i < num_columns; ++i) {
+      auto valid_count = arrow::internal::CountSetBits(
+          non_null_bufs[i]->data(), /*offset=*/0, static_cast<int64_t>(num_groups));
+      int null_count = static_cast<int>(num_groups) - static_cast<int>(valid_count);
+
+      if (col_metadata_[i].is_fixed_length) {
+        out.values[i] = ArrayData::Make(
+            key_types_[i], num_groups,
+            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i])}, null_count);
+      } else {
+        out.values[i] =
+            ArrayData::Make(key_types_[i], num_groups,
+                            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i]),
+                             std::move(varlen_bufs[i])},
+                            null_count);
+      }
+    }
+
+    // Process dictionaries
+    for (size_t icol = 0; icol < num_columns; ++icol) {
+      if (key_types_[icol]->id() == Type::DICTIONARY) {
+        if (dictionaries_[icol]) {
+          out.values[icol].array()->dictionary = dictionaries_[icol]->data();
+        } else {
+          ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(key_types_[icol], 0));
+          out.values[icol].array()->dictionary = dict->data();
+        }
+      }
+    }
+
+    return out;
+  }
+
+  static constexpr int log_minibatch_max_ = 10;
+  static constexpr int minibatch_size_max_ = 1 << log_minibatch_max_;
+  static constexpr int minibatch_size_min_ = 128;
+  int minibatch_size_;
+
+  ExecContext* ctx_;
+  arrow::util::TempVectorStack temp_stack_;
+  arrow::compute::KeyEncoder::KeyEncoderContext encode_ctx_;
+
+  std::vector<std::shared_ptr<arrow::DataType>> key_types_;
+  std::vector<arrow::compute::KeyEncoder::KeyColumnMetadata> col_metadata_;
+  std::vector<arrow::compute::KeyEncoder::KeyColumnArray> cols_;
+  std::vector<uint32_t> minibatch_hashes_;
+
+  std::vector<std::shared_ptr<Array>> dictionaries_;
+
+  arrow::compute::KeyEncoder::KeyRowArray rows_;
+  arrow::compute::KeyEncoder::KeyRowArray rows_minibatch_;
+  arrow::compute::KeyEncoder encoder_;
+  arrow::compute::SwissTable map_;
+};
+
 /// C++ abstract base class for the HashAggregateKernel interface.
 /// Implementations should be default constructible and perform initialization in
 /// Init().
 struct GroupedAggregator : KernelState {
-  virtual Status Init(ExecContext*, const FunctionOptions*,
-                      const std::shared_ptr<DataType>&) = 0;
+  virtual Status Init(ExecContext*, const FunctionOptions*) = 0;
+
+  virtual Status Resize(int64_t new_num_groups) = 0;
 
   virtual Status Consume(const ExecBatch& batch) = 0;
 
-  virtual Result<Datum> Finalize() = 0;
+  virtual Status Merge(GroupedAggregator&& other, const ArrayData& group_id_mapping) = 0;
 
-  template <typename Reserve>
-  Status MaybeReserve(int64_t old_num_groups, const ExecBatch& batch,
-                      const Reserve& reserve) {
-    int64_t new_num_groups = batch[2].scalar_as<UInt32Scalar>().value;
-    if (new_num_groups <= old_num_groups) {
-      return Status::OK();
-    }
-    return reserve(new_num_groups - old_num_groups);
-  }
+  virtual Result<Datum> Finalize() = 0;
 
   virtual std::shared_ptr<DataType> out_type() const = 0;
 };
 
+template <typename Impl>
+Result<std::unique_ptr<KernelState>> HashAggregateInit(KernelContext* ctx,
+                                                       const KernelInitArgs& args) {
+  auto impl = ::arrow::internal::make_unique<Impl>();
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options));
+  return std::move(impl);
+}
+
+HashAggregateKernel MakeKernel(InputType argument_type, KernelInit init) {
+  HashAggregateKernel kernel;
+
+  kernel.init = std::move(init);
+
+  kernel.signature = KernelSignature::Make(
+      {std::move(argument_type), InputType::Array(Type::UINT32)},
+      OutputType(
+          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> {
+            return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
+          }));
+
+  kernel.resize = [](KernelContext* ctx, int64_t num_groups) {
+    return checked_cast<GroupedAggregator*>(ctx->state())->Resize(num_groups);
+  };
+
+  kernel.consume = [](KernelContext* ctx, const ExecBatch& batch) {
+    return checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch);
+  };
+
+  kernel.merge = [](KernelContext* ctx, KernelState&& other,
+                    const ArrayData& group_id_mapping) {
+    return checked_cast<GroupedAggregator*>(ctx->state())
+        ->Merge(checked_cast<GroupedAggregator&&>(other), group_id_mapping);
+  };
+
+  kernel.finalize = [](KernelContext* ctx, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(*out,
+                          checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
+    return Status::OK();
+  };
+
+  return kernel;
+}
+
+Status AddHashAggKernels(
+    const std::vector<std::shared_ptr<DataType>>& types,
+    Result<HashAggregateKernel> make_kernel(const std::shared_ptr<DataType>&),
+    HashAggregateFunction* function) {
+  for (const auto& ty : types) {
+    ARROW_ASSIGN_OR_RAISE(auto kernel, make_kernel(ty));
+    RETURN_NOT_OK(function->AddKernel(std::move(kernel)));
+  }
+  return Status::OK();
+}
+
 // ----------------------------------------------------------------------
 // Count implementation
 
 struct GroupedCountImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const FunctionOptions* options,
-              const std::shared_ptr<DataType>&) override {
-    options_ = checked_cast<const CountOptions&>(*options);
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
     counts_ = BufferBuilder(ctx->memory_pool());
     return Status::OK();
   }
 
-  Status Consume(const ExecBatch& batch) override {
-    RETURN_NOT_OK(MaybeReserve(counts_.length(), batch, [&](int64_t added_groups) {
-      num_groups_ += added_groups;
-      return counts_.Append(added_groups * sizeof(int64_t), 0);
-    }));
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    return counts_.Append(added_groups * sizeof(int64_t), 0);
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedCountImpl*>(&raw_other);
+
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    auto other_counts = reinterpret_cast<const int64_t*>(other->counts_.mutable_data());
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      counts[*g] += other_counts[other_g];
+    }
+    return Status::OK();
+  }
 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
-    auto raw_counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+  Status Consume(const ExecBatch& batch) override {
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
 
     const auto& input = batch[0].array();
 
-    if (options_.count_mode == CountOptions::COUNT_NULL) {
-      for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
-        auto g = group_ids[i];
-        raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i);
+    if (options_.skip_nulls) {
+      auto g_begin =
+          reinterpret_cast<const uint32_t*>(batch[1].array()->buffers[1]->data());
+
+      arrow::internal::VisitSetBitRunsVoid(input->buffers[0], input->offset,
+                                           input->length,
+                                           [&](int64_t offset, int64_t length) {
+                                             auto g = g_begin + offset;
+                                             for (int64_t i = 0; i < length; ++i, ++g) {
+                                               counts[*g] += 1;
+                                             }
+                                           });
+    } else if (input->MayHaveNulls()) {
+      auto g = batch[1].array()->GetValues<uint32_t>(1);
+
+      auto end = input->offset + input->length;
+      for (int64_t i = input->offset; i < end; ++i, ++g) {
+        counts[*g] += !BitUtil::GetBit(input->buffers[0]->data(), i);
       }
-      return Status::OK();
     }
-
-    arrow::internal::VisitSetBitRunsVoid(
-        input->buffers[0], input->offset, input->length,
-        [&](int64_t begin, int64_t length) {
-          for (int64_t input_i = begin, i = begin - input->offset;
-               input_i < begin + length; ++input_i, ++i) {
-            auto g = group_ids[i];
-            raw_counts[g] += 1;
-          }
-        });
     return Status::OK();
   }
 
@@ -510,79 +889,67 @@ struct GroupedCountImpl : public GroupedAggregator {
   std::shared_ptr<DataType> out_type() const override { return int64(); }
 
   int64_t num_groups_ = 0;
-  CountOptions options_;
+  ScalarAggregateOptions options_;
   BufferBuilder counts_;
 };
 
 // ----------------------------------------------------------------------
 // Sum implementation
 
+template <typename Type>
 struct GroupedSumImpl : public GroupedAggregator {
-  // NB: whether we are accumulating into double, int64_t, or uint64_t
-  // we always have 64 bits per group in the sums buffer.
-  static constexpr size_t kSumSize = sizeof(int64_t);
-
-  using ConsumeImpl = std::function<void(const std::shared_ptr<ArrayData>&,
-                                         const uint32_t*, void*, int64_t*)>;
-
-  struct GetConsumeImpl {
-    template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
-    Status Visit(const T&) {
-      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group,
-                        void* boxed_sums, int64_t* counts) {
-        auto sums = reinterpret_cast<typename TypeTraits<AccType>::CType*>(boxed_sums);
-
-        VisitArrayDataInline<T>(
-            *input,
-            [&](typename TypeTraits<T>::CType value) {
-              sums[*group] += value;
-              counts[*group] += 1;
-              ++group;
-            },
-            [&] { ++group; });
-      };
-      out_type = TypeTraits<AccType>::type_singleton();
-      return Status::OK();
-    }
+  using AccType = typename FindAccumulatorType<Type>::Type;
+  using SumType = typename TypeTraits<AccType>::CType;
 
-    Status Visit(const HalfFloatType& type) {
-      return Status::NotImplemented("Summing data of type ", type);
-    }
-
-    Status Visit(const DataType& type) {
-      return Status::NotImplemented("Summing data of type ", type);
-    }
-
-    ConsumeImpl consume_impl;
-    std::shared_ptr<DataType> out_type;
-  };
-
-  Status Init(ExecContext* ctx, const FunctionOptions*,
-              const std::shared_ptr<DataType>& input_type) override {
+  Status Init(ExecContext* ctx, const FunctionOptions*) override {
     pool_ = ctx->memory_pool();
     sums_ = BufferBuilder(pool_);
     counts_ = BufferBuilder(pool_);
+    out_type_ = TypeTraits<AccType>::type_singleton();
+    return Status::OK();
+  }
 
-    GetConsumeImpl get_consume_impl;
-    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_consume_impl));
-
-    consume_impl_ = std::move(get_consume_impl.consume_impl);
-    out_type_ = std::move(get_consume_impl.out_type);
-
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    RETURN_NOT_OK(sums_.Append(added_groups * sizeof(AccType), 0));
+    RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0));
     return Status::OK();
   }
 
   Status Consume(const ExecBatch& batch) override {
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
-      num_groups_ += added_groups;
-      RETURN_NOT_OK(sums_.Append(added_groups * kSumSize, 0));
-      RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0));
-      return Status::OK();
-    }));
+    auto sums = reinterpret_cast<SumType*>(sums_.mutable_data());
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+
+    // XXX this uses naive summation; we should switch to pairwise summation as was
+    // done for the scalar aggregate kernel in ARROW-11758
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          sums[*g] += value;
+          counts[*g] += 1;
+          ++g;
+        },
+        [&] { ++g; });
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedSumImpl*>(&raw_other);
+
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    auto sums = reinterpret_cast<SumType*>(sums_.mutable_data());
 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
-    consume_impl_(batch[0].array(), group_ids, sums_.mutable_data(),
-                  reinterpret_cast<int64_t*>(counts_.mutable_data()));
+    auto other_counts = reinterpret_cast<const int64_t*>(other->counts_.mutable_data());
+    auto other_sums = reinterpret_cast<const SumType*>(other->sums_.mutable_data());
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      counts[*g] += other_counts[other_g];
+      sums[*g] += other_sums[other_g];
+    }
     return Status::OK();
   }
 
@@ -615,121 +982,539 @@ struct GroupedSumImpl : public GroupedAggregator {
   int64_t num_groups_ = 0;
   BufferBuilder sums_, counts_;
   std::shared_ptr<DataType> out_type_;
-  ConsumeImpl consume_impl_;
   MemoryPool* pool_;
 };
 
+struct GroupedSumFactory {
+  template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
+  Status Visit(const T&) {
+    kernel = MakeKernel(std::move(argument_type), HashAggregateInit<GroupedSumImpl<T>>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedSumFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
+
 // ----------------------------------------------------------------------
-// MinMax implementation
+// Mean implementation
 
-template <typename CType>
-struct Extrema : std::numeric_limits<CType> {};
+template <typename Type>
+struct GroupedMeanImpl : public GroupedSumImpl<Type> {
+  Result<Datum> Finalize() override {
+    using SumType = typename GroupedSumImpl<Type>::SumType;
+    std::shared_ptr<Buffer> null_bitmap;
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> values,
+                          AllocateBuffer(num_groups_ * sizeof(double), pool_));
+    int64_t null_count = 0;
 
-template <>
-struct Extrema<float> {
-  static constexpr float min() { return -std::numeric_limits<float>::infinity(); }
-  static constexpr float max() { return std::numeric_limits<float>::infinity(); }
+    const int64_t* counts = reinterpret_cast<const int64_t*>(counts_.data());
+    const auto* sums = reinterpret_cast<const SumType*>(sums_.data());
+    double* means = reinterpret_cast<double*>(values->mutable_data());
+    for (int64_t i = 0; i < num_groups_; ++i) {
+      if (counts[i] > 0) {
+        means[i] = static_cast<double>(sums[i]) / counts[i];
+        continue;
+      }
+      means[i] = 0;
+
+      if (null_bitmap == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(num_groups_, pool_));
+        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, num_groups_, true);
+      }
+
+      null_count += 1;
+      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false);
+    }
+
+    return ArrayData::Make(float64(), num_groups_,
+                           {std::move(null_bitmap), std::move(values)}, null_count);
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return float64(); }
+
+  using GroupedSumImpl<Type>::num_groups_;
+  using GroupedSumImpl<Type>::pool_;
+  using GroupedSumImpl<Type>::counts_;
+  using GroupedSumImpl<Type>::sums_;
 };
 
-template <>
-struct Extrema<double> {
-  static constexpr double min() { return -std::numeric_limits<double>::infinity(); }
-  static constexpr double max() { return std::numeric_limits<double>::infinity(); }
+struct GroupedMeanFactory {
+  template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
+  Status Visit(const T&) {
+    kernel = MakeKernel(std::move(argument_type), HashAggregateInit<GroupedMeanImpl<T>>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Computing mean of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Computing mean of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedMeanFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
+  HashAggregateKernel kernel;
+  InputType argument_type;
 };
 
-struct GroupedMinMaxImpl : public GroupedAggregator {
-  using ConsumeImpl =
-      std::function<void(const std::shared_ptr<ArrayData>&, const uint32_t*, void*, void*,
-                         uint8_t*, uint8_t*)>;
-
-  using ResizeImpl = std::function<Status(BufferBuilder*, int64_t)>;
-
-  template <typename CType>
-  static ResizeImpl MakeResizeImpl(CType anti_extreme) {
-    // resize a min or max buffer, storing the correct anti extreme
-    return [anti_extreme](BufferBuilder* builder, int64_t added_groups) {
-      TypedBufferBuilder<CType> typed_builder(std::move(*builder));
-      RETURN_NOT_OK(typed_builder.Append(added_groups, anti_extreme));
-      *builder = std::move(*typed_builder.bytes_builder());
-      return Status::OK();
-    };
+// Variance/Stdev implementation
+
+using arrow::internal::int128_t;
+
+template <typename Type>
+struct GroupedVarStdImpl : public GroupedAggregator {
+  using CType = typename Type::c_type;
+
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+    options_ = *checked_cast<const VarianceOptions*>(options);
+    ctx_ = ctx;
+    pool_ = ctx->memory_pool();
+    counts_ = BufferBuilder(pool_);
+    means_ = BufferBuilder(pool_);
+    m2s_ = BufferBuilder(pool_);
+    return Status::OK();
   }
 
-  struct GetImpl {
-    template <typename T, typename CType = typename TypeTraits<T>::CType>
-    enable_if_number<T, Status> Visit(const T&) {
-      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group,
-                        void* mins, void* maxes, uint8_t* has_values,
-                        uint8_t* has_nulls) {
-        auto raw_mins = reinterpret_cast<CType*>(mins);
-        auto raw_maxes = reinterpret_cast<CType*>(maxes);
-
-        VisitArrayDataInline<T>(
-            *input,
-            [&](CType val) {
-              raw_maxes[*group] = std::max(raw_maxes[*group], val);
-              raw_mins[*group] = std::min(raw_mins[*group], val);
-              BitUtil::SetBit(has_values, *group++);
-            },
-            [&] { BitUtil::SetBit(has_nulls, *group++); });
-      };
-
-      resize_min_impl = MakeResizeImpl(Extrema<CType>::max());
-      resize_max_impl = MakeResizeImpl(Extrema<CType>::min());
-      return Status::OK();
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0));
+    RETURN_NOT_OK(means_.Append(added_groups * sizeof(double), 0));
+    RETURN_NOT_OK(m2s_.Append(added_groups * sizeof(double), 0));
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override { return ConsumeImpl(batch); }
+
+  // float/double/int64: calculate `m2` (sum((X-mean)^2)) with `two pass algorithm`
+  // (see aggregate_var_std.cc)
+  template <typename T = Type>
+  enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4), Status> ConsumeImpl(
+      const ExecBatch& batch) {
+    using SumType =
+        typename std::conditional<is_floating_type<T>::value, double, int128_t>::type;
+
+    int64_t* counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    double* means = reinterpret_cast<double*>(means_.mutable_data());
+    double* m2s = reinterpret_cast<double*>(m2s_.mutable_data());
+
+    // XXX this uses naive summation; we should switch to pairwise summation as was
+    // done for the scalar aggregate kernel in ARROW-11567
+    std::vector<SumType> sums(num_groups_);
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          sums[*g] += value;
+          counts[*g] += 1;
+          ++g;
+        },
+        [&] { ++g; });
+
+    for (int64_t i = 0; i < num_groups_; i++) {
+      means[i] = static_cast<double>(sums[i]) / counts[i];
     }
 
-    Status Visit(const BooleanType& type) {
-      return Status::NotImplemented("Grouped MinMax data of type ", type);
+    g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          const double v = static_cast<double>(value);
+          m2s[*g] += (v - means[*g]) * (v - means[*g]);
+          ++g;
+        },
+        [&] { ++g; });
+
+    return Status::OK();
+  }
+
+  // int32/16/8: textbook one pass algorithm with integer arithmetic (see
+  // aggregate_var_std.cc)
+  template <typename T = Type>
+  enable_if_t<is_integer_type<T>::value && (sizeof(CType) <= 4), Status> ConsumeImpl(
+      const ExecBatch& batch) {
+    // max number of elements that sum will not overflow int64 (2Gi int32 elements)
+    // for uint32:    0 <= sum < 2^63 (int64 >= 0)
+    // for int32: -2^62 <= sum < 2^62
+    constexpr int64_t max_length = 1ULL << (63 - sizeof(CType) * 8);
+
+    const auto& array = *batch[0].array();
+    const auto g = batch[1].array()->GetValues<uint32_t>(1);
+
+    std::vector<IntegerVarStd<Type>> var_std(num_groups_);
+
+    ARROW_ASSIGN_OR_RAISE(auto mapping,
+                          AllocateBuffer(num_groups_ * sizeof(uint32_t), pool_));
+    for (uint32_t i = 0; static_cast<int64_t>(i) < num_groups_; i++) {
+      reinterpret_cast<uint32_t*>(mapping->mutable_data())[i] = i;
+    }
+    ArrayData group_id_mapping(uint32(), num_groups_, {nullptr, std::move(mapping)},
+                               /*null_count=*/0);
+
+    const CType* values = array.GetValues<CType>(1);
+
+    for (int64_t start_index = 0; start_index < batch.length; start_index += max_length) {
+      // process in chunks that overflow will never happen
+
+      // reset state
+      var_std.clear();
+      var_std.resize(num_groups_);
+      GroupedVarStdImpl<Type> state;
+      RETURN_NOT_OK(state.Init(ctx_, &options_));
+      RETURN_NOT_OK(state.Resize(num_groups_));
+      int64_t* other_counts = reinterpret_cast<int64_t*>(state.counts_.mutable_data());
+      double* other_means = reinterpret_cast<double*>(state.means_.mutable_data());
+      double* other_m2s = reinterpret_cast<double*>(state.m2s_.mutable_data());
+
+      arrow::internal::VisitSetBitRunsVoid(
+          array.buffers[0], array.offset + start_index,
+          std::min(max_length, batch.length - start_index),
+          [&](int64_t pos, int64_t len) {
+            for (int64_t i = 0; i < len; ++i) {
+              const int64_t index = start_index + pos + i;
+              const auto value = values[index];
+              var_std[g[index]].ConsumeOne(value);
+            }
+          });
+
+      for (int64_t i = 0; i < num_groups_; i++) {
+        if (var_std[i].count == 0) continue;
+
+        other_counts[i] = var_std[i].count;
+        other_means[i] = var_std[i].mean();
+        other_m2s[i] = var_std[i].m2();
+      }
+      RETURN_NOT_OK(this->Merge(std::move(state), group_id_mapping));
     }
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    // Combine m2 from two chunks (see aggregate_var_std.cc)
+    auto other = checked_cast<GroupedVarStdImpl*>(&raw_other);
+
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    auto means = reinterpret_cast<double*>(means_.mutable_data());
+    auto m2s = reinterpret_cast<double*>(m2s_.mutable_data());
 
-    Status Visit(const HalfFloatType& type) {
-      return Status::NotImplemented("Grouped MinMax data of type ", type);
+    const auto* other_counts = reinterpret_cast<const int64_t*>(other->counts_.data());
+    const auto* other_means = reinterpret_cast<const double*>(other->means_.data());
+    const auto* other_m2s = reinterpret_cast<const double*>(other->m2s_.data());
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      if (other_counts[other_g] == 0) continue;
+      MergeVarStd(counts[*g], means[*g], other_counts[other_g], other_means[other_g],
+                  other_m2s[other_g], &counts[*g], &means[*g], &m2s[*g]);
     }
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    std::shared_ptr<Buffer> null_bitmap;
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> values,
+                          AllocateBuffer(num_groups_ * sizeof(double), pool_));
+    int64_t null_count = 0;
 
-    Status Visit(const DataType& type) {
-      return Status::NotImplemented("Grouped MinMax data of type ", type);
+    double* results = reinterpret_cast<double*>(values->mutable_data());
+    const int64_t* counts = reinterpret_cast<const int64_t*>(counts_.data());
+    const double* m2s = reinterpret_cast<const double*>(m2s_.data());
+    for (int64_t i = 0; i < num_groups_; ++i) {
+      if (counts[i] > options_.ddof) {
+        const double variance = m2s[i] / (counts[i] - options_.ddof);
+        results[i] = result_type_ == VarOrStd::Var ? variance : std::sqrt(variance);
+        continue;
+      }
+
+      results[i] = 0;
+      if (null_bitmap == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(num_groups_, pool_));
+        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, num_groups_, true);
+      }
+
+      null_count += 1;
+      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false);
     }
 
-    ConsumeImpl consume_impl;
-    ResizeImpl resize_min_impl, resize_max_impl;
-  };
+    return ArrayData::Make(float64(), num_groups_,
+                           {std::move(null_bitmap), std::move(values)}, null_count);
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return float64(); }
+
+  VarOrStd result_type_;
+  VarianceOptions options_;
+  int64_t num_groups_ = 0;
+  // m2 = count * s2 = sum((X-mean)^2)
+  BufferBuilder counts_, means_, m2s_;
+  ExecContext* ctx_;
+  MemoryPool* pool_;
+};
 
-  Status Init(ExecContext* ctx, const FunctionOptions* options,
-              const std::shared_ptr<DataType>& input_type) override {
-    options_ = *checked_cast<const MinMaxOptions*>(options);
-    type_ = input_type;
+template <typename T, VarOrStd result_type>
+Result<std::unique_ptr<KernelState>> VarStdInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
+  auto impl = ::arrow::internal::make_unique<GroupedVarStdImpl<T>>();
+  impl->result_type_ = result_type;
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options));
+  return std::move(impl);
+}
 
-    mins_ = BufferBuilder(ctx->memory_pool());
-    maxes_ = BufferBuilder(ctx->memory_pool());
-    has_values_ = BufferBuilder(ctx->memory_pool());
-    has_nulls_ = BufferBuilder(ctx->memory_pool());
+template <VarOrStd result_type>
+struct GroupedVarStdFactory {
+  template <typename T, typename Enable = enable_if_t<is_integer_type<T>::value ||
+                                                      is_floating_type<T>::value>>
+  Status Visit(const T&) {
+    kernel = MakeKernel(std::move(argument_type), VarStdInit<T, result_type>);
+    return Status::OK();
+  }
 
-    GetImpl get_impl;
-    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_impl));
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Computing variance/stddev of data of type ", type);
+  }
 
-    consume_impl_ = std::move(get_impl.consume_impl);
-    resize_min_impl_ = std::move(get_impl.resize_min_impl);
-    resize_max_impl_ = std::move(get_impl.resize_max_impl);
-    resize_bitmap_impl_ = MakeResizeImpl(false);
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Computing variance/stddev of data of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedVarStdFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
 
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
+
+// ----------------------------------------------------------------------
+// TDigest implementation
+
+using arrow::internal::TDigest;
+
+template <typename Type>
+struct GroupedTDigestImpl : public GroupedAggregator {
+  using CType = typename Type::c_type;
+
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+    options_ = *checked_cast<const TDigestOptions*>(options);
+    ctx_ = ctx;
+    pool_ = ctx->memory_pool();
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    const int64_t added_groups = new_num_groups - tdigests_.size();
+    tdigests_.reserve(new_num_groups);
+    for (int64_t i = 0; i < added_groups; i++) {
+      tdigests_.emplace_back(options_.delta, options_.buffer_size);
+    }
     return Status::OK();
   }
 
   Status Consume(const ExecBatch& batch) override {
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
-      num_groups_ += added_groups;
-      RETURN_NOT_OK(resize_min_impl_(&mins_, added_groups));
-      RETURN_NOT_OK(resize_max_impl_(&maxes_, added_groups));
-      RETURN_NOT_OK(resize_bitmap_impl_(&has_values_, added_groups));
-      RETURN_NOT_OK(resize_bitmap_impl_(&has_nulls_, added_groups));
-      return Status::OK();
-    }));
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          this->tdigests_[*g].NanAdd(value);
+          ++g;
+        },
+        [&] { ++g; });
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedTDigestImpl*>(&raw_other);
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    std::vector<TDigest> other_tdigest(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      other_tdigest[0] = std::move(other->tdigests_[other_g]);
+      tdigests_[*g].Merge(&other_tdigest);
+    }
+
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    std::shared_ptr<Buffer> null_bitmap;
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> values,
+        AllocateBuffer(tdigests_.size() * options_.q.size() * sizeof(double), pool_));
+    int64_t null_count = 0;
+    const int64_t slot_length = options_.q.size();
+
+    double* results = reinterpret_cast<double*>(values->mutable_data());
+    for (int64_t i = 0; static_cast<size_t>(i) < tdigests_.size(); ++i) {
+      if (!tdigests_[i].is_empty()) {
+        for (int64_t j = 0; j < slot_length; j++) {
+          results[i * slot_length + j] = tdigests_[i].Quantile(options_.q[j]);
+        }
+        continue;
+      }
+
+      if (!null_bitmap) {
+        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(tdigests_.size(), pool_));
+        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, tdigests_.size(), true);
+      }
+      null_count++;
+      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false);
+      std::fill(&results[i * slot_length], &results[(i + 1) * slot_length], 0.0);
+    }
+
+    auto child = ArrayData::Make(float64(), tdigests_.size() * options_.q.size(),
+                                 {nullptr, std::move(values)}, /*null_count=*/0);
+    return ArrayData::Make(out_type(), tdigests_.size(), {std::move(null_bitmap)},
+                           {std::move(child)}, null_count);
+  }
 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
-    consume_impl_(batch[0].array(), group_ids, mins_.mutable_data(),
-                  maxes_.mutable_data(), has_values_.mutable_data(),
-                  has_nulls_.mutable_data());
+  std::shared_ptr<DataType> out_type() const override {
+    return fixed_size_list(float64(), static_cast<int32_t>(options_.q.size()));
+  }
+
+  TDigestOptions options_;
+  std::vector<TDigest> tdigests_;
+  ExecContext* ctx_;
+  MemoryPool* pool_;
+};
+
+struct GroupedTDigestFactory {
+  template <typename T>
+  enable_if_number<T, Status> Visit(const T&) {
+    kernel =
+        MakeKernel(std::move(argument_type), HashAggregateInit<GroupedTDigestImpl<T>>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Computing t-digest of data of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Computing t-digest of data of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedTDigestFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
+
+// ----------------------------------------------------------------------
+// MinMax implementation
+
+template <typename CType>
+struct AntiExtrema {
+  static constexpr CType anti_min() { return std::numeric_limits<CType>::max(); }
+  static constexpr CType anti_max() { return std::numeric_limits<CType>::min(); }
+};
+
+template <>
+struct AntiExtrema<float> {
+  static constexpr float anti_min() { return std::numeric_limits<float>::infinity(); }
+  static constexpr float anti_max() { return -std::numeric_limits<float>::infinity(); }
+};
+
+template <>
+struct AntiExtrema<double> {
+  static constexpr double anti_min() { return std::numeric_limits<double>::infinity(); }
+  static constexpr double anti_max() { return -std::numeric_limits<double>::infinity(); }
+};
+
+template <typename Type>
+struct GroupedMinMaxImpl : public GroupedAggregator {
+  using CType = typename TypeTraits<Type>::CType;
+
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+    options_ = *checked_cast<const ScalarAggregateOptions*>(options);
+    type_ = TypeTraits<Type>::type_singleton();
+    mins_ = TypedBufferBuilder<CType>(ctx->memory_pool());
+    maxes_ = TypedBufferBuilder<CType>(ctx->memory_pool());
+    has_values_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    has_nulls_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    RETURN_NOT_OK(mins_.Append(added_groups, AntiExtrema<CType>::anti_min()));
+    RETURN_NOT_OK(maxes_.Append(added_groups, AntiExtrema<CType>::anti_max()));
+    RETURN_NOT_OK(has_values_.Append(added_groups, false));
+    RETURN_NOT_OK(has_nulls_.Append(added_groups, false));
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    auto raw_mins = reinterpret_cast<CType*>(mins_.mutable_data());
+    auto raw_maxes = reinterpret_cast<CType*>(maxes_.mutable_data());
+
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](CType val) {
+          raw_maxes[*g] = std::max(raw_maxes[*g], val);
+          raw_mins[*g] = std::min(raw_mins[*g], val);
+          BitUtil::SetBit(has_values_.mutable_data(), *g++);
+        },
+        [&] { BitUtil::SetBit(has_nulls_.mutable_data(), *g++); });
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedMinMaxImpl*>(&raw_other);
+
+    auto raw_mins = reinterpret_cast<CType*>(mins_.mutable_data());
+    auto raw_maxes = reinterpret_cast<CType*>(maxes_.mutable_data());
+
+    auto other_raw_mins = reinterpret_cast<const CType*>(other->mins_.mutable_data());
+    auto other_raw_maxes = reinterpret_cast<const CType*>(other->maxes_.mutable_data());
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      raw_mins[*g] = std::min(raw_mins[*g], other_raw_mins[other_g]);
+      raw_maxes[*g] = std::max(raw_maxes[*g], other_raw_maxes[other_g]);
+
+      if (BitUtil::GetBit(other->has_values_.data(), other_g)) {
+        BitUtil::SetBit(has_values_.mutable_data(), *g);
+      }
+      if (BitUtil::GetBit(other->has_nulls_.data(), other_g)) {
+        BitUtil::SetBit(has_nulls_.mutable_data(), *g);
+      }
+    }
     return Status::OK();
   }
 
@@ -737,7 +1522,7 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
     // aggregation for group is valid if there was at least one value in that group
     ARROW_ASSIGN_OR_RAISE(auto null_bitmap, has_values_.Finish());
 
-    if (options_.null_handling == MinMaxOptions::EMIT_NULL) {
+    if (!options_.skip_nulls) {
       // ... and there were no nulls in that group
       ARROW_ASSIGN_OR_RAISE(auto has_nulls, has_nulls_.Finish());
       arrow::internal::BitmapAndNot(null_bitmap->data(), 0, has_nulls->data(), 0,
@@ -758,51 +1543,157 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
   }
 
   int64_t num_groups_;
-  BufferBuilder mins_, maxes_, has_values_, has_nulls_;
+  TypedBufferBuilder<CType> mins_, maxes_;
+  TypedBufferBuilder<bool> has_values_, has_nulls_;
   std::shared_ptr<DataType> type_;
-  ConsumeImpl consume_impl_;
-  ResizeImpl resize_min_impl_, resize_max_impl_, resize_bitmap_impl_;
-  MinMaxOptions options_;
+  ScalarAggregateOptions options_;
 };
 
-template <typename Impl>
-HashAggregateKernel MakeKernel(InputType argument_type) {
+struct GroupedMinMaxFactory {
+  template <typename T>
+  enable_if_number<T, Status> Visit(const T&) {
+    kernel =
+        MakeKernel(std::move(argument_type), HashAggregateInit<GroupedMinMaxImpl<T>>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedMinMaxFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
   HashAggregateKernel kernel;
+  InputType argument_type;
+};
 
-  kernel.init = [](KernelContext* ctx,
-                   const KernelInitArgs& args) -> std::unique_ptr<KernelState> {
-    auto impl = ::arrow::internal::make_unique<Impl>();
-    // FIXME(bkietz) Init should not take a type. That should be an unboxed template arg
-    // for the Impl. Otherwise we're not exposing dispatch as well as we should.
-    ctx->SetStatus(impl->Init(ctx->exec_context(), args.options, args.inputs[0].type));
-    if (ctx->HasError()) return nullptr;
-    return std::move(impl);
-  };
+// ----------------------------------------------------------------------
+// Any/All implementation
 
-  kernel.signature = KernelSignature::Make(
-      {std::move(argument_type), InputType::Array(Type::UINT32),
-       InputType::Scalar(Type::UINT32)},
-      OutputType(
-          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> {
-            return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
-          }));
+struct GroupedAnyImpl : public GroupedAggregator {
+  Status Init(ExecContext* ctx, const FunctionOptions*) override {
+    seen_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    return Status::OK();
+  }
 
-  kernel.consume = [](KernelContext* ctx, const ExecBatch& batch) {
-    ctx->SetStatus(checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch));
-  };
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    return seen_.Append(added_groups, false);
+  }
 
-  kernel.merge = [](KernelContext* ctx, KernelState&&, KernelState*) {
-    // TODO(ARROW-11840) merge two hash tables
-    ctx->SetStatus(Status::NotImplemented("Merge hashed aggregations"));
-  };
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedAnyImpl*>(&raw_other);
 
-  kernel.finalize = [](KernelContext* ctx, Datum* out) {
-    KERNEL_ASSIGN_OR_RAISE(*out, ctx,
-                           checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
-  };
+    auto seen = seen_.mutable_data();
+    auto other_seen = other->seen_.data();
 
-  return kernel;
-}
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      if (BitUtil::GetBit(other_seen, other_g)) BitUtil::SetBitTo(seen, *g, true);
+    }
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    auto seen = seen_.mutable_data();
+
+    const auto& input = *batch[0].array();
+
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    arrow::internal::VisitTwoBitBlocksVoid(
+        input.buffers[0], input.offset, input.buffers[1], input.offset, input.length,
+        [&](int64_t) { BitUtil::SetBitTo(seen, *g++, true); }, [&]() { g++; });
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    ARROW_ASSIGN_OR_RAISE(auto seen, seen_.Finish());
+    return std::make_shared<BooleanArray>(num_groups_, std::move(seen));
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return boolean(); }
+
+  int64_t num_groups_ = 0;
+  ScalarAggregateOptions options_;
+  TypedBufferBuilder<bool> seen_;
+};
+
+struct GroupedAllImpl : public GroupedAggregator {
+  Status Init(ExecContext* ctx, const FunctionOptions*) override {
+    seen_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    return seen_.Append(added_groups, true);
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedAllImpl*>(&raw_other);
+
+    auto seen = seen_.mutable_data();
+    auto other_seen = other->seen_.data();
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      BitUtil::SetBitTo(
+          seen, *g, BitUtil::GetBit(seen, *g) && BitUtil::GetBit(other_seen, other_g));
+    }
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    auto seen = seen_.mutable_data();
+
+    const auto& input = *batch[0].array();
+
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    if (input.MayHaveNulls()) {
+      const uint8_t* bitmap = input.buffers[1]->data();
+      arrow::internal::VisitBitBlocksVoid(
+          input.buffers[0], input.offset, input.length,
+          [&](int64_t position) {
+            BitUtil::SetBitTo(seen, *g,
+                              BitUtil::GetBit(seen, *g) &&
+                                  BitUtil::GetBit(bitmap, input.offset + position));
+            g++;
+          },
+          [&]() { g++; });
+    } else {
+      arrow::internal::VisitBitBlocksVoid(
+          input.buffers[1], input.offset, input.length, [&](int64_t) { g++; },
+          [&]() { BitUtil::SetBitTo(seen, *g++, false); });
+    }
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    ARROW_ASSIGN_OR_RAISE(auto seen, seen_.Finish());
+    return std::make_shared<BooleanArray>(num_groups_, std::move(seen));
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return boolean(); }
+
+  int64_t num_groups_ = 0;
+  ScalarAggregateOptions options_;
+  TypedBufferBuilder<bool> seen_;
+};
+
+}  // namespace
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
     ExecContext* ctx, const std::vector<Aggregate>& aggregates,
@@ -819,8 +1710,7 @@ Result<std::vector<const HashAggregateKernel*>> GetKernels(
                           ctx->func_registry()->GetFunction(aggregates[i].function));
     ARROW_ASSIGN_OR_RAISE(
         const Kernel* kernel,
-        function->DispatchExact(
-            {in_descrs[i], ValueDescr::Array(uint32()), ValueDescr::Scalar(uint32())}));
+        function->DispatchExact({in_descrs[i], ValueDescr::Array(uint32())}));
     kernels[i] = static_cast<const HashAggregateKernel*>(kernel);
   }
   return kernels;
@@ -843,14 +1733,14 @@ Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     }
 
     KernelContext kernel_ctx{ctx};
-    states[i] = kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
-                                                             {
-                                                                 in_descrs[i].type,
-                                                                 uint32(),
-                                                                 uint32(),
-                                                             },
-                                                             options});
-    if (kernel_ctx.HasError()) return kernel_ctx.status();
+    ARROW_ASSIGN_OR_RAISE(
+        states[i],
+        kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
+                                                     {
+                                                         in_descrs[i],
+                                                         ValueDescr::Array(uint32()),
+                                                     },
+                                                     options}));
   }
 
   return std::move(states);
@@ -869,24 +1759,30 @@ Result<FieldVector> ResolveKernels(
 
     ARROW_ASSIGN_OR_RAISE(auto descr, kernels[i]->signature->out_type().Resolve(
                                           &kernel_ctx, {
-                                                           descrs[i].type,
-                                                           uint32(),
-                                                           uint32(),
+                                                           descrs[i],
+                                                           ValueDescr::Array(uint32()),
                                                        }));
     fields[i] = field(aggregates[i].function, std::move(descr.type));
   }
   return fields;
 }
 
-}  // namespace
-
 Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& descrs,
                                                ExecContext* ctx) {
+  if (GrouperFastImpl::CanUse(descrs)) {
+    return GrouperFastImpl::Make(descrs, ctx);
+  }
   return GrouperImpl::Make(descrs, ctx);
 }
 
 Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
-                      const std::vector<Aggregate>& aggregates, ExecContext* ctx) {
+                      const std::vector<Aggregate>& aggregates, bool use_threads,
+                      ExecContext* ctx) {
+  auto task_group =
+      use_threads
+          ? arrow::internal::TaskGroup::MakeThreaded(arrow::internal::GetCpuThreadPool())
+          : arrow::internal::TaskGroup::MakeSerial();
+
   // Construct and initialize HashAggregateKernels
   ARROW_ASSIGN_OR_RAISE(auto argument_descrs,
                         ExecBatch::Make(arguments).Map(
@@ -894,24 +1790,33 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
   ARROW_ASSIGN_OR_RAISE(auto kernels, GetKernels(ctx, aggregates, argument_descrs));
 
-  ARROW_ASSIGN_OR_RAISE(auto states,
-                        InitKernels(kernels, ctx, aggregates, argument_descrs));
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states(
+      task_group->parallelism());
+  for (auto& state : states) {
+    ARROW_ASSIGN_OR_RAISE(state, InitKernels(kernels, ctx, aggregates, argument_descrs));
+  }
 
   ARROW_ASSIGN_OR_RAISE(
       FieldVector out_fields,
-      ResolveKernels(aggregates, kernels, states, ctx, argument_descrs));
+      ResolveKernels(aggregates, kernels, states[0], ctx, argument_descrs));
 
   using arrow::compute::detail::ExecBatchIterator;
 
   ARROW_ASSIGN_OR_RAISE(auto argument_batch_iterator,
                         ExecBatchIterator::Make(arguments, ctx->exec_chunksize()));
 
-  // Construct Grouper
+  // Construct Groupers
   ARROW_ASSIGN_OR_RAISE(auto key_descrs, ExecBatch::Make(keys).Map([](ExecBatch batch) {
     return batch.GetDescriptors();
   }));
 
-  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_descrs, ctx));
+  std::vector<std::unique_ptr<Grouper>> groupers(task_group->parallelism());
+  for (auto& grouper : groupers) {
+    ARROW_ASSIGN_OR_RAISE(grouper, Grouper::Make(key_descrs, ctx));
+  }
+
+  std::mutex mutex;
+  std::unordered_map<std::thread::id, size_t> thread_ids;
 
   int i = 0;
   for (ValueDescr& key_descr : key_descrs) {
@@ -927,17 +1832,49 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
          key_batch_iterator->Next(&key_batch)) {
     if (key_batch.length == 0) continue;
 
-    // compute a batch of group ids
-    ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
+    task_group->Append([&, key_batch, argument_batch] {
+      size_t thread_index;
+      {
+        std::unique_lock<std::mutex> lock(mutex);
+        auto it = thread_ids.emplace(std::this_thread::get_id(), thread_ids.size()).first;
+        thread_index = it->second;
+        DCHECK_LT(static_cast<int>(thread_index), task_group->parallelism());
+      }
+
+      auto grouper = groupers[thread_index].get();
+
+      // compute a batch of group ids
+      ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
+
+      // consume group ids with HashAggregateKernels
+      for (size_t i = 0; i < kernels.size(); ++i) {
+        KernelContext batch_ctx{ctx};
+        batch_ctx.SetState(states[thread_index][i].get());
+        ARROW_ASSIGN_OR_RAISE(auto batch, ExecBatch::Make({argument_batch[i], id_batch}));
+        RETURN_NOT_OK(kernels[i]->resize(&batch_ctx, grouper->num_groups()));
+        RETURN_NOT_OK(kernels[i]->consume(&batch_ctx, batch));
+      }
+
+      return Status::OK();
+    });
+  }
+
+  RETURN_NOT_OK(task_group->Finish());
+
+  // Merge if necessary
+  for (size_t thread_index = 1; thread_index < thread_ids.size(); ++thread_index) {
+    ARROW_ASSIGN_OR_RAISE(ExecBatch other_keys, groupers[thread_index]->GetUniques());
+    ARROW_ASSIGN_OR_RAISE(Datum transposition, groupers[0]->Consume(other_keys));
+    groupers[thread_index].reset();
 
-    // consume group ids with HashAggregateKernels
     for (size_t i = 0; i < kernels.size(); ++i) {
       KernelContext batch_ctx{ctx};
-      batch_ctx.SetState(states[i].get());
-      ARROW_ASSIGN_OR_RAISE(auto batch, ExecBatch::Make({argument_batch[i], id_batch,
-                                                         Datum(grouper->num_groups())}));
-      kernels[i]->consume(&batch_ctx, batch);
-      if (batch_ctx.HasError()) return batch_ctx.status();
+      batch_ctx.SetState(states[0][i].get());
+
+      RETURN_NOT_OK(kernels[i]->resize(&batch_ctx, groupers[0]->num_groups()));
+      RETURN_NOT_OK(kernels[i]->merge(&batch_ctx, std::move(*states[thread_index][i]),
+                                      *transposition.array()));
+      states[thread_index][i].reset();
     }
   }
 
@@ -947,14 +1884,13 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
   for (size_t i = 0; i < kernels.size(); ++i) {
     KernelContext batch_ctx{ctx};
-    batch_ctx.SetState(states[i].get());
+    batch_ctx.SetState(states[0][i].get());
     Datum out;
-    kernels[i]->finalize(&batch_ctx, &out);
-    if (batch_ctx.HasError()) return batch_ctx.status();
+    RETURN_NOT_OK(kernels[i]->finalize(&batch_ctx, &out));
     *it++ = out.array();
   }
 
-  ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, grouper->GetUniques());
+  ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, groupers[0]->GetUniques());
   for (const auto& key : out_keys.values) {
     *it++ = key.array();
   }
@@ -1020,43 +1956,151 @@ Result<std::shared_ptr<ListArray>> Grouper::MakeGroupings(const UInt32Array& ids
 namespace {
 const FunctionDoc hash_count_doc{"Count the number of null / non-null values",
                                  ("By default, non-null values are counted.\n"
-                                  "This can be changed through CountOptions."),
-                                 {"array", "group_id_array", "group_count"},
-                                 "CountOptions"};
+                                  "This can be changed through ScalarAggregateOptions."),
+                                 {"array", "group_id_array"},
+                                 "ScalarAggregateOptions"};
 
 const FunctionDoc hash_sum_doc{"Sum values of a numeric array",
                                ("Null values are ignored."),
-                               {"array", "group_id_array", "group_count"}};
+                               {"array", "group_id_array"}};
+
+const FunctionDoc hash_mean_doc{"Average values of a numeric array",
+                                ("Null values are ignored."),
+                                {"array", "group_id_array"}};
+
+const FunctionDoc hash_stddev_doc{
+    "Calculate the standard deviation of a numeric array",
+    ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
+     "By default (`ddof` = 0), the population standard deviation is calculated.\n"
+     "Nulls are ignored.  If there are not enough non-null values in the array\n"
+     "to satisfy `ddof`, null is returned."),
+    {"array", "group_id_array"}};
+
+const FunctionDoc hash_variance_doc{
+    "Calculate the variance of a numeric array",
+    ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
+     "By default (`ddof` = 0), the population variance is calculated.\n"
+     "Nulls are ignored.  If there are not enough non-null values in the array\n"
+     "to satisfy `ddof`, null is returned."),
+    {"array", "group_id_array"}};
+
+const FunctionDoc hash_tdigest_doc{
+    "Calculate approximate quantiles of a numeric array with the T-Digest algorithm",
+    ("By default, the 0.5 quantile (median) is returned.\n"
+     "Nulls and NaNs are ignored.\n"
+     "A null array is returned if there are no valid data points."),
+    {"array", "group_id_array"}};
 
 const FunctionDoc hash_min_max_doc{
     "Compute the minimum and maximum values of a numeric array",
     ("Null values are ignored by default.\n"
-     "This can be changed through MinMaxOptions."),
-    {"array", "group_id_array", "group_count"},
-    "MinMaxOptions"};
+     "This can be changed through ScalarAggregateOptions."),
+    {"array", "group_id_array"},
+    "ScalarAggregateOptions"};
+
+const FunctionDoc hash_any_doc{"Test whether any element evaluates to true",
+                               ("Null values are ignored."),
+                               {"array", "group_id_array"}};
+
+const FunctionDoc hash_all_doc{"Test whether all elements evaluate to true",
+                               ("Null values are ignored."),
+                               {"array", "group_id_array"}};
 }  // namespace
 
 void RegisterHashAggregateBasic(FunctionRegistry* registry) {
   {
-    static auto default_count_options = CountOptions::Defaults();
+    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
-        "hash_count", Arity::Ternary(), &hash_count_doc, &default_count_options);
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedCountImpl>(ValueDescr::ARRAY)));
+        "hash_count", Arity::Binary(), &hash_count_doc,
+        &default_scalar_aggregate_options);
+
+    DCHECK_OK(func->AddKernel(
+        MakeKernel(ValueDescr::ARRAY, HashAggregateInit<GroupedCountImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
   {
-    auto func = std::make_shared<HashAggregateFunction>("hash_sum", Arity::Ternary(),
+    auto func = std::make_shared<HashAggregateFunction>("hash_sum", Arity::Binary(),
                                                         &hash_sum_doc);
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedSumImpl>(ValueDescr::ARRAY)));
+    DCHECK_OK(AddHashAggKernels({boolean()}, GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(), GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(), GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(FloatingPointTypes(), GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_mean", Arity::Binary(),
+                                                        &hash_mean_doc);
+    DCHECK_OK(AddHashAggKernels({boolean()}, GroupedMeanFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(), GroupedMeanFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(UnsignedIntTypes(), GroupedMeanFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(FloatingPointTypes(), GroupedMeanFactory::Make, func.get()));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
+  static auto default_variance_options = VarianceOptions::Defaults();
   {
-    static auto default_minmax_options = MinMaxOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
-        "hash_min_max", Arity::Ternary(), &hash_min_max_doc, &default_minmax_options);
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedMinMaxImpl>(ValueDescr::ARRAY)));
+        "hash_stddev", Arity::Binary(), &hash_stddev_doc, &default_variance_options);
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
+                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_variance", Arity::Binary(), &hash_variance_doc, &default_variance_options);
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
+                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  static auto default_tdigest_options = TDigestOptions::Defaults();
+  {
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_tdigest", Arity::Binary(), &hash_tdigest_doc, &default_tdigest_options);
+    DCHECK_OK(
+        AddHashAggKernels(SignedIntTypes(), GroupedTDigestFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(UnsignedIntTypes(), GroupedTDigestFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(FloatingPointTypes(), GroupedTDigestFactory::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_min_max", Arity::Binary(), &hash_min_max_doc,
+        &default_scalar_aggregate_options);
+    DCHECK_OK(AddHashAggKernels({boolean()}, GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(NumericTypes(), GroupedMinMaxFactory::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_any", Arity::Binary(),
+                                                        &hash_any_doc);
+    DCHECK_OK(func->AddKernel(MakeKernel(boolean(), HashAggregateInit<GroupedAnyImpl>)));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_all", Arity::Binary(),
+                                                        &hash_all_doc);
+    DCHECK_OK(func->AddKernel(MakeKernel(boolean(), HashAggregateInit<GroupedAllImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 }
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 7858d8bb147..d37d8f32ac8 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <gtest/gtest.h>
+
 #include <algorithm>
 #include <limits>
 #include <memory>
@@ -22,21 +24,24 @@
 #include <unordered_map>
 #include <utility>
 
-#include <gtest/gtest.h>
-
 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/exec/test_util.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/registry.h"
+#include "arrow/table.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -45,6 +50,7 @@
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/thread_pool.h"
 
 using testing::HasSubstr;
 
@@ -115,11 +121,32 @@ void ValidateGroupBy(const std::vector<internal::Aggregate>& aggregates,
   ASSERT_OK_AND_ASSIGN(Datum actual, GroupBy(arguments, keys, aggregates));
 
   ASSERT_OK(expected.make_array()->ValidateFull());
-  ASSERT_OK(actual.make_array()->ValidateFull());
+  ValidateOutput(actual);
 
   AssertDatumsEqual(expected, actual, /*verbose=*/true);
 }
 
+ExecContext* small_chunksize_context(bool use_threads = false) {
+  static ExecContext ctx,
+      ctx_with_threads{default_memory_pool(), arrow::internal::GetCpuThreadPool()};
+  ctx.set_exec_chunksize(2);
+  ctx_with_threads.set_exec_chunksize(2);
+  return use_threads ? &ctx_with_threads : &ctx;
+}
+
+Result<Datum> GroupByTest(
+    const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
+    const std::vector<::arrow::compute::internal::Aggregate>& aggregates,
+    bool use_threads, bool use_exec_plan) {
+  if (use_exec_plan) {
+    return GroupByUsingExecPlan(arguments, keys, aggregates, use_threads,
+                                small_chunksize_context(use_threads));
+  } else {
+    return internal::GroupBy(arguments, keys, aggregates, use_threads,
+                             default_exec_context());
+  }
+}
+
 }  // namespace
 
 TEST(Grouper, SupportedKeys) {
@@ -174,18 +201,58 @@ struct TestGrouper {
   }
 
   void ExpectConsume(const std::string& key_json, const std::string& expected) {
-    ExpectConsume(ExecBatch(*RecordBatchFromJSON(key_schema_, key_json)),
+    ExpectConsume(ExecBatchFromJSON(descrs_, key_json),
                   ArrayFromJSON(uint32(), expected));
   }
 
-  void ExpectConsume(const std::vector<Datum>& key_batch, Datum expected) {
-    ExpectConsume(*ExecBatch::Make(key_batch), expected);
+  void ExpectConsume(const std::vector<Datum>& key_values, Datum expected) {
+    ASSERT_OK_AND_ASSIGN(auto key_batch, ExecBatch::Make(key_values));
+    ExpectConsume(key_batch, expected);
   }
 
   void ExpectConsume(const ExecBatch& key_batch, Datum expected) {
     Datum ids;
     ConsumeAndValidate(key_batch, &ids);
-    AssertDatumsEqual(expected, ids, /*verbose=*/true);
+    AssertEquivalentIds(expected, ids);
+  }
+
+  void AssertEquivalentIds(const Datum& expected, const Datum& actual) {
+    auto left = expected.make_array();
+    auto right = actual.make_array();
+    ASSERT_EQ(left->length(), right->length()) << "#ids unequal";
+    int64_t num_ids = left->length();
+    auto left_data = left->data();
+    auto right_data = right->data();
+    auto left_ids = reinterpret_cast<const uint32_t*>(left_data->buffers[1]->data());
+    auto right_ids = reinterpret_cast<const uint32_t*>(right_data->buffers[1]->data());
+    uint32_t max_left_id = 0;
+    uint32_t max_right_id = 0;
+    for (int64_t i = 0; i < num_ids; ++i) {
+      if (left_ids[i] > max_left_id) {
+        max_left_id = left_ids[i];
+      }
+      if (right_ids[i] > max_right_id) {
+        max_right_id = right_ids[i];
+      }
+    }
+    std::vector<bool> right_to_left_present(max_right_id + 1, false);
+    std::vector<bool> left_to_right_present(max_left_id + 1, false);
+    std::vector<uint32_t> right_to_left(max_right_id + 1);
+    std::vector<uint32_t> left_to_right(max_left_id + 1);
+    for (int64_t i = 0; i < num_ids; ++i) {
+      uint32_t left_id = left_ids[i];
+      uint32_t right_id = right_ids[i];
+      if (!left_to_right_present[left_id]) {
+        left_to_right[left_id] = right_id;
+        left_to_right_present[left_id] = true;
+      }
+      if (!right_to_left_present[right_id]) {
+        right_to_left[right_id] = left_id;
+        right_to_left_present[right_id] = true;
+      }
+      ASSERT_EQ(left_id, right_to_left[right_id]);
+      ASSERT_EQ(right_id, left_to_right[left_id]);
+    }
   }
 
   void ConsumeAndValidate(const ExecBatch& key_batch, Datum* ids = nullptr) {
@@ -207,7 +274,7 @@ struct TestGrouper {
       // check that uniques_ are prefixes of new_uniques
       for (int i = 0; i < uniques_.num_values(); ++i) {
         auto new_unique = new_uniques[i].make_array();
-        ASSERT_OK(new_unique->ValidateFull());
+        ValidateOutput(*new_unique);
 
         AssertDatumsEqual(uniques_[i], new_unique->Slice(0, uniques_.length),
                           /*verbose=*/true);
@@ -218,7 +285,7 @@ struct TestGrouper {
 
     // check that the ids encode an equivalent key sequence
     auto ids = id_batch.make_array();
-    ASSERT_OK(ids->ValidateFull());
+    ValidateOutput(*ids);
 
     for (int i = 0; i < key_batch.num_values(); ++i) {
       SCOPED_TRACE(std::to_string(i) + "th key array");
@@ -471,85 +538,432 @@ TEST(GroupBy, Errors) {
     [null,  3]
   ])");
 
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("Direct execution of HASH_AGGREGATE functions"),
-      CallFunction("hash_sum", {batch->GetColumnByName("argument"),
-                                batch->GetColumnByName("group_id"), Datum(uint32_t(4))}));
+  EXPECT_THAT(CallFunction("hash_sum", {batch->GetColumnByName("argument"),
+                                        batch->GetColumnByName("group_id")}),
+              Raises(StatusCode::NotImplemented,
+                     HasSubstr("Direct execution of HASH_AGGREGATE functions")));
 }
 
-TEST(GroupBy, SumOnly) {
-  auto batch = RecordBatchFromJSON(
-      schema({field("argument", float64()), field("key", int64())}), R"([
+namespace {
+void SortBy(std::vector<std::string> names, Datum* aggregated_and_grouped) {
+  SortOptions options{{SortKey("key_0", SortOrder::Ascending)}};
+
+  ASSERT_OK_AND_ASSIGN(
+      auto batch, RecordBatch::FromStructArray(aggregated_and_grouped->make_array()));
+  ASSERT_OK_AND_ASSIGN(Datum sort_indices, SortIndices(batch, options));
+
+  ASSERT_OK_AND_ASSIGN(*aggregated_and_grouped,
+                       Take(*aggregated_and_grouped, sort_indices));
+}
+}  // namespace
+
+TEST(GroupBy, CountOnly) {
+  for (bool use_exec_plan : {false, true}) {
+    for (bool use_threads : {true, false}) {
+      SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+      auto table = TableFromJSON(
+          schema({field("argument", float64()), field("key", int64())}), {R"([
     [1.0,   1],
-    [null,  1],
+    [null,  1]
+                        ])",
+                                                                          R"([
     [0.0,   2],
     [null,  3],
     [4.0,   null],
     [3.25,  1],
-    [0.125, 2],
+    [0.125, 2]
+                        ])",
+                                                                          R"([
     [-0.25, 2],
     [0.75,  null],
     [null,  3]
-  ])");
+                        ])"});
+
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest({table->GetColumnByName("argument")},
+                                       {table->GetColumnByName("key")},
+                                       {
+                                           {"hash_count", nullptr},
+                                       },
+                                       use_threads, use_exec_plan));
+      SortBy({"key_0"}, &aggregated_and_grouped);
+
+      AssertDatumsEqual(ArrayFromJSON(struct_({
+                                          field("hash_count", int64()),
+                                          field("key_0", int64()),
+                                      }),
+                                      R"([
+    [2,   1],
+    [3,   2],
+    [0,   3],
+    [2,   null]
+  ])"),
+                        aggregated_and_grouped,
+                        /*verbose=*/true);
+    }
+  }
+}
 
-  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy({batch->GetColumnByName("argument")},
-                                         {batch->GetColumnByName("key")},
-                                         {
-                                             {"hash_sum", nullptr},
-                                         }));
+TEST(GroupBy, SumOnly) {
+  for (bool use_exec_plan : {false, true}) {
+    for (bool use_threads : {true, false}) {
+      SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
 
-  AssertDatumsEqual(ArrayFromJSON(struct_({
-                                      field("hash_sum", float64()),
-                                      field("key_0", int64()),
-                                  }),
-                                  R"([
+      auto table = TableFromJSON(
+          schema({field("argument", float64()), field("key", int64())}), {R"([
+    [1.0,   1],
+    [null,  1]
+                        ])",
+                                                                          R"([
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.25,  1],
+    [0.125, 2]
+                        ])",
+                                                                          R"([
+    [-0.25, 2],
+    [0.75,  null],
+    [null,  3]
+                        ])"});
+
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest({table->GetColumnByName("argument")},
+                                       {table->GetColumnByName("key")},
+                                       {
+                                           {"hash_sum", nullptr},
+                                       },
+                                       use_threads, use_exec_plan));
+      SortBy({"key_0"}, &aggregated_and_grouped);
+
+      AssertDatumsEqual(ArrayFromJSON(struct_({
+                                          field("hash_sum", float64()),
+                                          field("key_0", int64()),
+                                      }),
+                                      R"([
     [4.25,   1],
     [-0.125, 2],
     [null,   3],
     [4.75,   null]
   ])"),
-                    aggregated_and_grouped,
-                    /*verbose=*/true);
+                        aggregated_and_grouped,
+                        /*verbose=*/true);
+    }
+  }
 }
 
-TEST(GroupBy, MinMaxOnly) {
-  auto batch = RecordBatchFromJSON(
-      schema({field("argument", float64()), field("key", int64())}), R"([
+TEST(GroupBy, MeanOnly) {
+  for (bool use_threads : {true, false}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+    auto table =
+        TableFromJSON(schema({field("argument", float64()), field("key", int64())}), {R"([
     [1.0,   1],
-    [null,  1],
+    [null,  1]
+                        ])",
+                                                                                      R"([
     [0.0,   2],
     [null,  3],
     [4.0,   null],
     [3.25,  1],
-    [0.125, 2],
+    [0.125, 2]
+                        ])",
+                                                                                      R"([
     [-0.25, 2],
     [0.75,  null],
+    [null,  3]
+                        ])"});
+
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         internal::GroupBy({table->GetColumnByName("argument")},
+                                           {table->GetColumnByName("key")},
+                                           {
+                                               {"hash_mean", nullptr},
+                                           },
+                                           use_threads));
+    SortBy({"key_0"}, &aggregated_and_grouped);
+
+    AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                              field("hash_mean", float64()),
+                                              field("key_0", int64()),
+                                          }),
+                                          R"([
+    [2.125,   1],
+    [-0.041666666666666664, 2],
+    [null,   3],
+    [2.375,   null]
+  ])"),
+                            aggregated_and_grouped,
+                            /*verbose=*/true);
+  }
+}
+
+TEST(GroupBy, VarianceAndStddev) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", int32()), field("key", int64())}), R"([
+    [1,   1],
+    [null,  1],
+    [0,   2],
+    [null,  3],
+    [4,   null],
+    [3,  1],
+    [0, 2],
+    [-1, 2],
+    [1,  null],
     [null,  3]
   ])");
 
   ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy({batch->GetColumnByName("argument")},
-                                         {batch->GetColumnByName("key")},
-                                         {
-                                             {"hash_min_max", nullptr},
-                                         }));
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_variance", nullptr},
+                               {"hash_stddev", nullptr},
+                           }));
+
+  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("key_0", int64()),
+                                        }),
+                                        R"([
+    [1.0,                 1.0,                1],
+    [0.22222222222222224, 0.4714045207910317, 2],
+    [null,                null,               3],
+    [2.25,                1.5,                null]
+  ])"),
+                          aggregated_and_grouped,
+                          /*verbose=*/true);
 
-  AssertDatumsEqual(ArrayFromJSON(struct_({
-                                      field("hash_min_max", struct_({
-                                                                field("min", float64()),
-                                                                field("max", float64()),
-                                                            })),
-                                      field("key_0", int64()),
-                                  }),
-                                  R"([
+  batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", int64())}), R"([
+    [1.0,   1],
+    [null,  1],
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.0,  1],
+    [0.0, 2],
+    [-1.0, 2],
+    [1.0,  null],
+    [null,  3]
+  ])");
+
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped, internal::GroupBy(
+                                                   {
+                                                       batch->GetColumnByName("argument"),
+                                                       batch->GetColumnByName("argument"),
+                                                   },
+                                                   {
+                                                       batch->GetColumnByName("key"),
+                                                   },
+                                                   {
+                                                       {"hash_variance", nullptr},
+                                                       {"hash_stddev", nullptr},
+                                                   }));
+
+  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("key_0", int64()),
+                                        }),
+                                        R"([
+    [1.0,                 1.0,                1],
+    [0.22222222222222224, 0.4714045207910317, 2],
+    [null,                null,               3],
+    [2.25,                1.5,                null]
+  ])"),
+                          aggregated_and_grouped,
+                          /*verbose=*/true);
+
+  // Test ddof
+  VarianceOptions variance_options(/*ddof=*/2);
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_variance", &variance_options},
+                               {"hash_stddev", &variance_options},
+                           }));
+
+  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("key_0", int64()),
+                                        }),
+                                        R"([
+    [null,                null,               1],
+    [0.6666666666666667,  0.816496580927726,  2],
+    [null,                null,               3],
+    [null,                null,               null]
+  ])"),
+                          aggregated_and_grouped,
+                          /*verbose=*/true);
+}
+
+TEST(GroupBy, TDigest) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", int64())}), R"([
+    [1,   1],
+    [null,  1],
+    [0,   2],
+    [null,  3],
+    [4,   null],
+    [3,  1],
+    [0, 2],
+    [-1, 2],
+    [1,  null],
+    [NaN,  3]
+  ])");
+
+  TDigestOptions options1(std::vector<double>{0.5, 0.9, 0.99});
+  TDigestOptions options2(std::vector<double>{0.5, 0.9, 0.99}, /*delta=*/50,
+                          /*buffer_size=*/1024);
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_tdigest", nullptr},
+                               {"hash_tdigest", &options1},
+                               {"hash_tdigest", &options2},
+                           }));
+
+  AssertDatumsApproxEqual(
+      ArrayFromJSON(struct_({
+                        field("hash_tdigest", fixed_size_list(float64(), 1)),
+                        field("hash_tdigest", fixed_size_list(float64(), 3)),
+                        field("hash_tdigest", fixed_size_list(float64(), 3)),
+                        field("key_0", int64()),
+                    }),
+                    R"([
+    [[1.0], [1.0, 3.0, 3.0], [1.0, 3.0, 3.0], 1],
+    [[0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], 2],
+    [null,  null,            null,            3],
+    [[1.0], [1.0, 4.0, 4.0], [1.0, 4.0, 4.0], null]
+  ])"),
+      aggregated_and_grouped,
+      /*verbose=*/true);
+}
+
+TEST(GroupBy, MinMaxOnly) {
+  for (bool use_exec_plan : {false, true}) {
+    for (bool use_threads : {true, false}) {
+      SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+      auto table = TableFromJSON(
+          schema({field("argument", float64()), field("key", int64())}), {R"([
+    [1.0,   1],
+    [null,  1]
+                        ])",
+                                                                          R"([
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.25,  1],
+    [0.125, 2]
+                        ])",
+                                                                          R"([
+    [-0.25, 2],
+    [0.75,  null],
+    [null,  3]
+                        ])"});
+
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest({table->GetColumnByName("argument")},
+                                       {table->GetColumnByName("key")},
+                                       {
+                                           {"hash_min_max", nullptr},
+                                       },
+                                       use_threads, use_exec_plan));
+      SortBy({"key_0"}, &aggregated_and_grouped);
+
+      AssertDatumsEqual(
+          ArrayFromJSON(struct_({
+                            field("hash_min_max", struct_({
+                                                      field("min", float64()),
+                                                      field("max", float64()),
+                                                  })),
+                            field("key_0", int64()),
+                        }),
+                        R"([
     [{"min": 1.0,   "max": 3.25},  1],
     [{"min": -0.25, "max": 0.125}, 2],
     [{"min": null,  "max": null},  3],
     [{"min": 0.75,  "max": 4.0},   null]
   ])"),
-                    aggregated_and_grouped,
-                    /*verbose=*/true);
+          aggregated_and_grouped,
+          /*verbose=*/true);
+    }
+  }
+}
+
+TEST(GroupBy, AnyAndAll) {
+  for (bool use_threads : {true, false}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+    auto table =
+        TableFromJSON(schema({field("argument", boolean()), field("key", int64())}), {R"([
+    [true,  1],
+    [null,  1]
+                        ])",
+                                                                                      R"([
+    [false, 2],
+    [null,  3],
+    [false, null],
+    [true,  1],
+    [true,  2]
+                        ])",
+                                                                                      R"([
+    [true,  2],
+    [false, null],
+    [null,  3]
+                        ])"});
+
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         internal::GroupBy({table->GetColumnByName("argument"),
+                                            table->GetColumnByName("argument")},
+                                           {table->GetColumnByName("key")},
+                                           {
+                                               {"hash_any", nullptr},
+                                               {"hash_all", nullptr},
+                                           },
+                                           use_threads));
+    SortBy({"key_0"}, &aggregated_and_grouped);
+
+    AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("hash_any", boolean()),
+                                        field("hash_all", boolean()),
+                                        field("key_0", int64()),
+                                    }),
+                                    R"([
+    [true,  true,  1],
+    [true,  false, 2],
+    [false, true, 3],
+    [false, false, null]
+  ])"),
+                      aggregated_and_grouped,
+                      /*verbose=*/true);
+  }
 }
 
 TEST(GroupBy, CountAndSum) {
@@ -567,7 +981,7 @@ TEST(GroupBy, CountAndSum) {
     [null,  3]
   ])");
 
-  CountOptions count_options;
+  ScalarAggregateOptions count_options;
   ASSERT_OK_AND_ASSIGN(
       Datum aggregated_and_grouped,
       internal::GroupBy(
@@ -659,18 +1073,38 @@ TEST(GroupBy, ConcreteCaseWithValidateGroupBy) {
     [null,  "gama"]
   ])");
 
-  CountOptions count_non_null{CountOptions::COUNT_NON_NULL},
-      count_null{CountOptions::COUNT_NULL};
-
-  MinMaxOptions emit_null{MinMaxOptions::EMIT_NULL};
+  ScalarAggregateOptions keepna{false, 1};
+  ScalarAggregateOptions skipna{true, 1};
 
   using internal::Aggregate;
   for (auto agg : {
            Aggregate{"hash_sum", nullptr},
-           Aggregate{"hash_count", &count_non_null},
-           Aggregate{"hash_count", &count_null},
+           Aggregate{"hash_count", &skipna},
+           Aggregate{"hash_count", &keepna},
            Aggregate{"hash_min_max", nullptr},
-           Aggregate{"hash_min_max", &emit_null},
+           Aggregate{"hash_min_max", &keepna},
+       }) {
+    SCOPED_TRACE(agg.function);
+    ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
+                    {batch->GetColumnByName("key")});
+  }
+}
+
+// Count nulls/non_nulls from record batch with no nulls
+TEST(GroupBy, CountNull) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", utf8())}), R"([
+    [1.0, "alfa"],
+    [2.0, "beta"],
+    [3.0, "gama"]
+  ])");
+
+  ScalarAggregateOptions keepna{false}, skipna{true};
+
+  using internal::Aggregate;
+  for (auto agg : {
+           Aggregate{"hash_count", &keepna},
+           Aggregate{"hash_count", &skipna},
        }) {
     SCOPED_TRACE(agg.function);
     ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
@@ -699,5 +1133,123 @@ TEST(GroupBy, RandomArraySum) {
   }
 }
 
+TEST(GroupBy, WithChunkedArray) {
+  auto table =
+      TableFromJSON(schema({field("argument", float64()), field("key", int64())}),
+                    {R"([{"argument": 1.0,   "key": 1},
+                         {"argument": null,  "key": 1}
+                        ])",
+                     R"([{"argument": 0.0,   "key": 2},
+                         {"argument": null,  "key": 3},
+                         {"argument": 4.0,   "key": null},
+                         {"argument": 3.25,  "key": 1},
+                         {"argument": 0.125, "key": 2},
+                         {"argument": -0.25, "key": 2},
+                         {"argument": 0.75,  "key": null},
+                         {"argument": null,  "key": 3}
+                        ])"});
+  ScalarAggregateOptions count_options;
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               table->GetColumnByName("argument"),
+                               table->GetColumnByName("argument"),
+                               table->GetColumnByName("argument"),
+                           },
+                           {
+                               table->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_count", &count_options},
+                               {"hash_sum", nullptr},
+                               {"hash_min_max", nullptr},
+                           }));
+
+  AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("hash_count", int64()),
+                                      field("hash_sum", float64()),
+                                      field("hash_min_max", struct_({
+                                                                field("min", float64()),
+                                                                field("max", float64()),
+                                                            })),
+                                      field("key_0", int64()),
+                                  }),
+                                  R"([
+    [2, 4.25,   {"min": 1.0,   "max": 3.25},  1],
+    [3, -0.125, {"min": -0.25, "max": 0.125}, 2],
+    [0, null,   {"min": null,  "max": null},  3],
+    [2, 4.75,   {"min": 0.75,  "max": 4.0},   null]
+  ])"),
+                    aggregated_and_grouped,
+                    /*verbose=*/true);
+}
+
+TEST(GroupBy, MinMaxWithNewGroupsInChunkedArray) {
+  auto table = TableFromJSON(
+      schema({field("argument", int64()), field("key", int64())}),
+      {R"([{"argument": 1, "key": 0}])", R"([{"argument": 0,   "key": 1}])"});
+  ScalarAggregateOptions count_options;
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               table->GetColumnByName("argument"),
+                           },
+                           {
+                               table->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_min_max", nullptr},
+                           }));
+
+  AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("hash_min_max", struct_({
+                                                                field("min", int64()),
+                                                                field("max", int64()),
+                                                            })),
+                                      field("key_0", int64()),
+                                  }),
+                                  R"([
+    [{"min": 1, "max": 1}, 0],
+    [{"min": 0, "max": 0}, 1]
+  ])"),
+                    aggregated_and_grouped,
+                    /*verbose=*/true);
+}
+
+TEST(GroupBy, SmallChunkSizeSumOnly) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", int64())}), R"([
+    [1.0,   1],
+    [null,  1],
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.25,  1],
+    [0.125, 2],
+    [-0.25, 2],
+    [0.75,  null],
+    [null,  3]
+  ])");
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy({batch->GetColumnByName("argument")},
+                                         {batch->GetColumnByName("key")},
+                                         {
+                                             {"hash_sum", nullptr},
+                                         },
+                                         small_chunksize_context()));
+  AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("hash_sum", float64()),
+                                      field("key_0", int64()),
+                                  }),
+                                  R"([
+    [4.25,   1],
+    [-0.125, 2],
+    [null,   3],
+    [4.75,   null]
+  ])"),
+                    aggregated_and_grouped,
+                    /*verbose=*/true);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 7abaa1c1a59..a5d4a557740 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -15,7 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/macros.h"
 
@@ -24,6 +34,7 @@ namespace arrow {
 using internal::AddWithOverflow;
 using internal::DivideWithOverflow;
 using internal::MultiplyWithOverflow;
+using internal::NegateWithOverflow;
 using internal::SubtractWithOverflow;
 
 namespace compute {
@@ -31,6 +42,8 @@ namespace internal {
 
 using applicator::ScalarBinaryEqualTypes;
 using applicator::ScalarBinaryNotNullEqualTypes;
+using applicator::ScalarUnary;
+using applicator::ScalarUnaryNotNull;
 
 namespace {
 
@@ -42,92 +55,169 @@ template <typename T>
 using is_signed_integer =
     std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;
 
-template <typename T>
-using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, T>;
+template <typename T, typename R = T>
+using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, R>;
 
-template <typename T>
-using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, T>;
+template <typename T, typename R = T>
+using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, R>;
 
-template <typename T>
+template <typename T, typename R = T>
 using enable_if_integer =
-    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, T>;
+    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, R>;
+
+template <typename T, typename R = T>
+using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, R>;
 
 template <typename T>
-using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;
+using enable_if_decimal =
+    enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256, T>::value,
+                T>;
 
 template <typename T, typename Unsigned = typename std::make_unsigned<T>::type>
 constexpr Unsigned to_unsigned(T signed_) {
   return static_cast<Unsigned>(signed_);
 }
 
+struct AbsoluteValue {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T arg, Status*) {
+    return std::fabs(arg);
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T arg, Status*) {
+    return arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T arg, Status* st) {
+    return (arg < 0) ? arrow::internal::SafeSignedNegate(arg) : arg;
+  }
+};
+
+struct AbsoluteValueChecked {
+  template <typename T, typename Arg>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == std::numeric_limits<Arg>::min()) {
+      *st = Status::Invalid("overflow");
+      return arg;
+    }
+    return std::abs(arg);
+  }
+
+  template <typename T, typename Arg>
+  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return std::fabs(arg);
+  }
+};
+
 struct Add {
-  template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
     return left + right;
   }
 
-  template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left,
+                                                      Arg1 right, Status*) {
     return left + right;
   }
 
-  template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
     return arrow::internal::SafeSignedAdd(left, right);
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
 };
 
 struct AddChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(AddWithOverflow(left, right, &result))) {
-      ctx->SetStatus(Status::Invalid("overflow"));
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left + right;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
 };
 
 struct Subtract {
-  template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
 
-  template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left,
+                                                      Arg1 right, Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
 
-  template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return arrow::internal::SafeSignedSubtract(left, right);
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + (-right);
+  }
 };
 
 struct SubtractChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(SubtractWithOverflow(left, right, &result))) {
-      ctx->SetStatus(Status::Invalid("overflow"));
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + (-right);
+  }
 };
 
 struct Multiply {
@@ -140,18 +230,23 @@ struct Multiply {
   static_assert(std::is_same<decltype(int64_t() * int64_t()), int64_t>::value, "");
   static_assert(std::is_same<decltype(uint64_t() * uint64_t()), uint64_t>::value, "");
 
-  template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return left * right;
   }
 
-  template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_t<
+      is_unsigned_integer<T>::value && !std::is_same<T, uint16_t>::value, T>
+  Call(KernelContext*, T left, T right, Status*) {
     return left * right;
   }
 
-  template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_t<
+      is_signed_integer<T>::value && !std::is_same<T, int16_t>::value, T>
+  Call(KernelContext*, T left, T right, Status*) {
     return to_unsigned(left) * to_unsigned(right);
   }
 
@@ -159,254 +254,1568 @@ struct Multiply {
   // integer. However, some inputs may nevertheless overflow (which triggers undefined
   // behaviour). Therefore we first cast to 32 bit unsigned integers where overflow is
   // well defined.
-  template <typename T = void>
-  static constexpr int16_t Call(KernelContext*, int16_t left, int16_t right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_same<T, int16_t, T> Call(KernelContext*, int16_t left,
+                                                      int16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
-  template <typename T = void>
-  static constexpr uint16_t Call(KernelContext*, uint16_t left, uint16_t right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_same<T, uint16_t, T> Call(KernelContext*, uint16_t left,
+                                                       uint16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left * right;
+  }
 };
 
 struct MultiplyChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(MultiplyWithOverflow(left, right, &result))) {
-      ctx->SetStatus(Status::Invalid("overflow"));
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left * right;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left * right;
+  }
 };
 
 struct Divide {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     return left / right;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     T result;
     if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, &result))) {
       if (right == 0) {
-        ctx->SetStatus(Status::Invalid("divide by zero"));
+        *st = Status::Invalid("divide by zero");
       } else {
         result = 0;
       }
     }
     return result;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+    if (right == Arg1()) {
+      *st = Status::Invalid("Divide by zero");
+      return T();
+    } else {
+      return left / right;
+    }
+  }
 };
 
 struct DivideChecked {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result;
     if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, &result))) {
       if (right == 0) {
-        ctx->SetStatus(Status::Invalid("divide by zero"));
+        *st = Status::Invalid("divide by zero");
       } else {
-        ctx->SetStatus(Status::Invalid("overflow"));
+        *st = Status::Invalid("overflow");
       }
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     if (ARROW_PREDICT_FALSE(right == 0)) {
-      ctx->SetStatus(Status::Invalid("divide by zero"));
+      *st = Status::Invalid("divide by zero");
       return 0;
     }
     return left / right;
   }
-};
 
-// Generate a kernel given an arithmetic functor
-template <template <typename... Args> class KernelGenerator, typename Op>
-ArrayKernelExec NumericEqualTypesBinary(detail::GetTypeId get_id) {
-  switch (get_id.id) {
-    case Type::INT8:
-      return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
-    case Type::UINT8:
-      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
-    case Type::INT16:
-      return KernelGenerator<Int16Type, Int16Type, Op>::Exec;
-    case Type::UINT16:
-      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
-    case Type::INT32:
-      return KernelGenerator<Int32Type, Int32Type, Op>::Exec;
-    case Type::UINT32:
-      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
-    case Type::INT64:
-    case Type::TIMESTAMP:
-      return KernelGenerator<Int64Type, Int64Type, Op>::Exec;
-    case Type::UINT64:
-      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
-    case Type::FLOAT:
-      return KernelGenerator<FloatType, FloatType, Op>::Exec;
-    case Type::DOUBLE:
-      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
-    default:
-      DCHECK(false);
-      return ExecFail;
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext* ctx, Arg0 left, Arg1 right,
+                                   Status* st) {
+    return Divide::Call<T>(ctx, left, right, st);
   }
-}
-
-struct ArithmeticFunction : ScalarFunction {
-  using ScalarFunction::ScalarFunction;
+};
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+struct Negate {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return -arg;
+  }
 
-    using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return ~arg + 1;
+  }
 
-    EnsureDictionaryDecoded(values);
-    ReplaceNullWithOtherType(values);
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return arrow::internal::SafeSignedNegate(arg);
+  }
+};
 
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+struct NegateChecked {
+  template <typename T, typename Arg>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    T result = 0;
+    if (ARROW_PREDICT_FALSE(NegateWithOverflow(arg, &result))) {
+      *st = Status::Invalid("overflow");
     }
+    return result;
+  }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  template <typename T, typename Arg>
+  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    DCHECK(false) << "This is included only for the purposes of instantiability from the "
+                     "arithmetic kernel generator";
+    return 0;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return -arg;
   }
 };
 
-template <typename Op>
-std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
-                                                       const FunctionDoc* doc) {
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
-  for (const auto& ty : NumericTypes()) {
-    auto exec = NumericEqualTypesBinary<ScalarBinaryEqualTypes, Op>(ty);
-    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+struct Power {
+  ARROW_NOINLINE
+  static uint64_t IntegerPower(uint64_t base, uint64_t exp) {
+    // right to left O(logn) power
+    uint64_t pow = 1;
+    while (exp) {
+      pow *= (exp & 1) ? base : 1;
+      base *= base;
+      exp >>= 1;
+    }
+    return pow;
   }
-  return func;
-}
 
-// Like MakeArithmeticFunction, but for arithmetic ops that need to run
-// only on non-null output.
-template <typename Op>
-std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
-                                                              const FunctionDoc* doc) {
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
-  for (const auto& ty : NumericTypes()) {
-    auto exec = NumericEqualTypesBinary<ScalarBinaryNotNullEqualTypes, Op>(ty);
-    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(KernelContext*, T base, T exp, Status* st) {
+    if (exp < 0) {
+      *st = Status::Invalid("integers to negative integer powers are not allowed");
+      return 0;
+    }
+    return static_cast<T>(IntegerPower(base, exp));
   }
-  return func;
-}
 
-const FunctionDoc add_doc{"Add the arguments element-wise",
-                          ("Results will wrap around on integer overflow.\n"
-                           "Use function \"add_checked\" if you want overflow\n"
-                           "to return an error."),
-                          {"x", "y"}};
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(KernelContext*, T base, T exp, Status*) {
+    return std::pow(base, exp);
+  }
+};
 
-const FunctionDoc add_checked_doc{
-    "Add the arguments element-wise",
-    ("This function returns an error on overflow.  For a variant that\n"
-     "doesn't fail on overflow, use function \"add\"."),
-    {"x", "y"}};
+struct PowerChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status* st) {
+    if (exp < 0) {
+      *st = Status::Invalid("integers to negative integer powers are not allowed");
+      return 0;
+    } else if (exp == 0) {
+      return 1;
+    }
+    // left to right O(logn) power with overflow checks
+    bool overflow = false;
+    uint64_t bitmask =
+        1ULL << (63 - BitUtil::CountLeadingZeros(static_cast<uint64_t>(exp)));
+    T pow = 1;
+    while (bitmask) {
+      overflow |= MultiplyWithOverflow(pow, pow, &pow);
+      if (exp & bitmask) {
+        overflow |= MultiplyWithOverflow(pow, base, &pow);
+      }
+      bitmask >>= 1;
+    }
+    if (overflow) {
+      *st = Status::Invalid("overflow");
+    }
+    return pow;
+  }
 
-const FunctionDoc sub_doc{"Substract the arguments element-wise",
-                          ("Results will wrap around on integer overflow.\n"
-                           "Use function \"subtract_checked\" if you want overflow\n"
-                           "to return an error."),
-                          {"x", "y"}};
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
+    return std::pow(base, exp);
+  }
+};
 
-const FunctionDoc sub_checked_doc{
-    "Substract the arguments element-wise",
-    ("This function returns an error on overflow.  For a variant that\n"
-     "doesn't fail on overflow, use function \"subtract\"."),
-    {"x", "y"}};
+struct Sign {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::isnan(arg) ? arg : ((arg == 0) ? 0 : (std::signbit(arg) ? -1 : 1));
+  }
 
-const FunctionDoc mul_doc{"Multiply the arguments element-wise",
-                          ("Results will wrap around on integer overflow.\n"
-                           "Use function \"multiply_checked\" if you want overflow\n"
-                           "to return an error."),
-                          {"x", "y"}};
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return arg > 0;
+  }
 
-const FunctionDoc mul_checked_doc{
-    "Multiply the arguments element-wise",
-    ("This function returns an error on overflow.  For a variant that\n"
-     "doesn't fail on overflow, use function \"multiply\"."),
-    {"x", "y"}};
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return (arg > 0) ? 1 : ((arg == 0) ? 0 : -1);
+  }
+};
 
-const FunctionDoc div_doc{
-    "Divide the arguments element-wise",
-    ("Integer division by zero returns an error. However, integer overflow\n"
-     "wraps around, and floating-point division by zero returns an infinite.\n"
-     "Use function \"divide_checked\" if you want to get an error\n"
-     "in all the aforementioned cases."),
-    {"dividend", "divisor"}};
+// Bitwise operations
 
-const FunctionDoc div_checked_doc{
-    "Divide the arguments element-wise",
-    ("An error is returned when trying to divide by zero, or when\n"
-     "integer overflow is encountered."),
-    {"dividend", "divisor"}};
+struct BitWiseNot {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    return ~arg;
+  }
+};
 
-}  // namespace
+struct BitWiseAnd {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs & rhs;
+  }
+};
 
-void RegisterScalarArithmetic(FunctionRegistry* registry) {
-  // ----------------------------------------------------------------------
-  auto add = MakeArithmeticFunction<Add>("add", &add_doc);
-  DCHECK_OK(registry->AddFunction(std::move(add)));
+struct BitWiseOr {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs | rhs;
+  }
+};
 
-  // ----------------------------------------------------------------------
-  auto add_checked =
-      MakeArithmeticFunctionNotNull<AddChecked>("add_checked", &add_checked_doc);
-  DCHECK_OK(registry->AddFunction(std::move(add_checked)));
+struct BitWiseXor {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs ^ rhs;
+  }
+};
 
-  // ----------------------------------------------------------------------
-  // subtract
-  auto subtract = MakeArithmeticFunction<Subtract>("subtract", &sub_doc);
+struct ShiftLeft {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    using Unsigned = typename std::make_unsigned<Arg0>::type;
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      return lhs;
+    }
+    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs));
+  }
+};
 
-  // Add subtract(timestamp, timestamp) -> duration
-  for (auto unit : AllTimeUnits()) {
-    InputType in_type(match::TimestampTypeUnit(unit));
-    auto exec =
-        NumericEqualTypesBinary<ScalarBinaryEqualTypes, Subtract>(Type::TIMESTAMP);
-    DCHECK_OK(subtract->AddKernel({in_type, in_type}, duration(unit), std::move(exec)));
+// See SEI CERT C Coding Standard rule INT34-C
+struct ShiftLeftChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs,
+                                            Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    return lhs << rhs;
   }
 
-  DCHECK_OK(registry->AddFunction(std::move(subtract)));
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs,
+                                          Status* st) {
+    using Unsigned = typename std::make_unsigned<Arg0>::type;
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    // In C/C++ left shift of a negative number is undefined (C++11 standard 5.8.2)
+    // Mimic Java/etc. and treat left shift as based on two's complement representation
+    // Assumes two's complement machine
+    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs));
+  }
+};
 
-  // ----------------------------------------------------------------------
-  auto subtract_checked = MakeArithmeticFunctionNotNull<SubtractChecked>(
-      "subtract_checked", &sub_checked_doc);
-  DCHECK_OK(registry->AddFunction(std::move(subtract_checked)));
+struct ShiftRight {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    // Logical right shift when Arg0 is unsigned
+    // Arithmetic otherwise (this is implementation-defined but GCC and MSVC document this
+    // as arithmetic right shift)
+    // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
+    // https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=msvc-160
+    // Clang doesn't document their behavior.
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      return lhs;
+    }
+    return lhs >> rhs;
+  }
+};
 
-  // ----------------------------------------------------------------------
-  auto multiply = MakeArithmeticFunction<Multiply>("multiply", &mul_doc);
-  DCHECK_OK(registry->AddFunction(std::move(multiply)));
+struct ShiftRightChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    return lhs >> rhs;
+  }
+};
 
-  // ----------------------------------------------------------------------
-  auto multiply_checked = MakeArithmeticFunctionNotNull<MultiplyChecked>(
-      "multiply_checked", &mul_checked_doc);
-  DCHECK_OK(registry->AddFunction(std::move(multiply_checked)));
+struct Sin {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::sin(val);
+  }
+};
 
-  // ----------------------------------------------------------------------
-  auto divide = MakeArithmeticFunctionNotNull<Divide>("divide", &div_doc);
-  DCHECK_OK(registry->AddFunction(std::move(divide)));
+struct SinChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::sin(val);
+  }
+};
+
+struct Cos {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::cos(val);
+  }
+};
+
+struct CosChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::cos(val);
+  }
+};
+
+struct Tan {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::tan(val);
+  }
+};
+
+struct TanChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    // Cannot raise range errors (overflow) since PI/2 is not exactly representable
+    return std::tan(val);
+  }
+};
+
+struct Asin {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::asin(val);
+  }
+};
+
+struct AsinChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::asin(val);
+  }
+};
+
+struct Acos {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::acos(val);
+  }
+};
+
+struct AcosChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::acos(val);
+  }
+};
+
+struct Atan {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::atan(val);
+  }
+};
+
+struct Atan2 {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 y, Arg1 x, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    static_assert(std::is_same<Arg0, Arg1>::value, "");
+    return std::atan2(y, x);
+  }
+};
+
+struct LogNatural {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log(arg);
+  }
+};
+
+struct LogNaturalChecked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0.0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log(arg);
+  }
+};
+
+struct Log10 {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log10(arg);
+  }
+};
+
+struct Log10Checked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log10(arg);
+  }
+};
+
+struct Log2 {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log2(arg);
+  }
+};
+
+struct Log2Checked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0.0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log2(arg);
+  }
+};
+
+struct Log1p {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == -1) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < -1) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log1p(arg);
+  }
+};
+
+struct Log1pChecked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == -1) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < -1) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log1p(arg);
+  }
+};
+
+struct Floor {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::floor(arg);
+  }
+};
+
+struct Ceil {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::ceil(arg);
+  }
+};
+
+struct Trunc {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::trunc(arg);
+  }
+};
+
+// Generate a kernel given an arithmetic functor
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
+    case Type::UINT8:
+      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
+    case Type::INT16:
+      return KernelGenerator<Int16Type, Int16Type, Op>::Exec;
+    case Type::UINT16:
+      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
+    case Type::INT32:
+      return KernelGenerator<Int32Type, Int32Type, Op>::Exec;
+    case Type::UINT32:
+      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
+    case Type::INT64:
+    case Type::TIMESTAMP:
+      return KernelGenerator<Int64Type, Int64Type, Op>::Exec;
+    case Type::UINT64:
+      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
+    case Type::FLOAT:
+      return KernelGenerator<FloatType, FloatType, Op>::Exec;
+    case Type::DOUBLE:
+      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+// Generate a kernel given a bitwise arithmetic functor. Assumes the
+// functor treats all integer types of equal width identically
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+    case Type::UINT8:
+      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
+    case Type::INT16:
+    case Type::UINT16:
+      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
+    case Type::INT32:
+    case Type::UINT32:
+      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
+    case Type::INT64:
+    case Type::UINT64:
+      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
+    case Type::UINT8:
+      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
+    case Type::INT16:
+      return KernelGenerator<Int16Type, Int16Type, Op>::Exec;
+    case Type::UINT16:
+      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
+    case Type::INT32:
+      return KernelGenerator<Int32Type, Int32Type, Op>::Exec;
+    case Type::UINT32:
+      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
+    case Type::INT64:
+      return KernelGenerator<Int64Type, Int64Type, Op>::Exec;
+    case Type::UINT64:
+      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::FLOAT:
+      return KernelGenerator<FloatType, FloatType, Op>::Exec;
+    case Type::DOUBLE:
+      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+Status CastBinaryDecimalArgs(const std::string& func_name,
+                             std::vector<ValueDescr>* values) {
+  auto& left_type = (*values)[0].type;
+  auto& right_type = (*values)[1].type;
+  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+
+  // decimal + float = float
+  if (is_floating(left_type->id())) {
+    right_type = left_type;
+    return Status::OK();
+  } else if (is_floating(right_type->id())) {
+    left_type = right_type;
+    return Status::OK();
+  }
+
+  // precision, scale of left and right args
+  int32_t p1, s1, p2, s2;
+
+  // decimal + integer = decimal
+  if (is_decimal(left_type->id())) {
+    auto decimal = checked_cast<const DecimalType*>(left_type.get());
+    p1 = decimal->precision();
+    s1 = decimal->scale();
+  } else {
+    DCHECK(is_integer(left_type->id()));
+    p1 = static_cast<int32_t>(std::ceil(std::log10(bit_width(left_type->id()))));
+    s1 = 0;
+  }
+  if (is_decimal(right_type->id())) {
+    auto decimal = checked_cast<const DecimalType*>(right_type.get());
+    p2 = decimal->precision();
+    s2 = decimal->scale();
+  } else {
+    DCHECK(is_integer(right_type->id()));
+    p2 = static_cast<int32_t>(std::ceil(std::log10(bit_width(right_type->id()))));
+    s2 = 0;
+  }
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  // decimal128 + decimal256 = decimal256
+  Type::type casted_type_id = Type::DECIMAL128;
+  if (left_type->id() == Type::DECIMAL256 || right_type->id() == Type::DECIMAL256) {
+    casted_type_id = Type::DECIMAL256;
+  }
+
+  // decimal promotion rules compatible with amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  int32_t left_scaleup, right_scaleup;
+
+  // "add_checked" -> "add"
+  const std::string op = func_name.substr(0, func_name.find("_"));
+  if (op == "add" || op == "subtract") {
+    left_scaleup = std::max(s1, s2) - s1;
+    right_scaleup = std::max(s1, s2) - s2;
+  } else if (op == "multiply") {
+    left_scaleup = right_scaleup = 0;
+  } else if (op == "divide") {
+    left_scaleup = std::max(4, s1 + p2 - s2 + 1) + s2 - s1;
+    right_scaleup = 0;
+  } else {
+    return Status::Invalid("Invalid decimal function: ", func_name);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(
+      left_type, DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup));
+  ARROW_ASSIGN_OR_RAISE(right_type, DecimalType::Make(casted_type_id, p2 + right_scaleup,
+                                                      s2 + right_scaleup));
+  return Status::OK();
+}
+
+// resolve decimal binary operation output type per *casted* args
+template <typename OutputGetter>
+Result<ValueDescr> ResolveDecimalBinaryOperationOutput(
+    const std::vector<ValueDescr>& args, OutputGetter&& getter) {
+  // casted args should be same size decimals
+  auto left_type = checked_cast<const DecimalType*>(args[0].type.get());
+  auto right_type = checked_cast<const DecimalType*>(args[1].type.get());
+  DCHECK_EQ(left_type->id(), right_type->id());
+
+  int32_t precision, scale;
+  std::tie(precision, scale) = getter(left_type->precision(), left_type->scale(),
+                                      right_type->precision(), right_type->scale());
+  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type->id(), precision, scale));
+  return ValueDescr(std::move(type), GetBroadcastShape(args));
+}
+
+Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput(
+    KernelContext*, const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        DCHECK_EQ(s1, s2);
+        const int32_t scale = s1;
+        const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+Result<ValueDescr> ResolveDecimalMultiplicationOutput(
+    KernelContext*, const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        const int32_t scale = s1 + s2;
+        const int32_t precision = p1 + p2 + 1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
+                                                const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        DCHECK_GE(s1, s2);
+        const int32_t scale = s1 - s2;
+        const int32_t precision = p1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+template <typename Op>
+void AddDecimalBinaryKernels(const std::string& name,
+                             std::shared_ptr<ScalarFunction>* func) {
+  OutputType out_type(null());
+  const std::string op = name.substr(0, name.find("_"));
+  if (op == "add" || op == "subtract") {
+    out_type = OutputType(ResolveDecimalAdditionOrSubtractionOutput);
+  } else if (op == "multiply") {
+    out_type = OutputType(ResolveDecimalMultiplicationOutput);
+  } else if (op == "divide") {
+    out_type = OutputType(ResolveDecimalDivisionOutput);
+  } else {
+    DCHECK(false);
+  }
+
+  auto in_type128 = InputType(Type::DECIMAL128);
+  auto in_type256 = InputType(Type::DECIMAL256);
+  auto exec128 = ScalarBinaryNotNullEqualTypes<Decimal128Type, Decimal128Type, Op>::Exec;
+  auto exec256 = ScalarBinaryNotNullEqualTypes<Decimal256Type, Decimal256Type, Op>::Exec;
+  DCHECK_OK((*func)->AddKernel({in_type128, in_type128}, out_type, exec128));
+  DCHECK_OK((*func)->AddKernel({in_type256, in_type256}, out_type, exec256));
+}
+
+// Generate a kernel given an arithmetic functor
+template <template <typename...> class KernelGenerator, typename OutType, typename Op>
+ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return KernelGenerator<OutType, Int8Type, Op>::Exec;
+    case Type::UINT8:
+      return KernelGenerator<OutType, UInt8Type, Op>::Exec;
+    case Type::INT16:
+      return KernelGenerator<OutType, Int16Type, Op>::Exec;
+    case Type::UINT16:
+      return KernelGenerator<OutType, UInt16Type, Op>::Exec;
+    case Type::INT32:
+      return KernelGenerator<OutType, Int32Type, Op>::Exec;
+    case Type::UINT32:
+      return KernelGenerator<OutType, UInt32Type, Op>::Exec;
+    case Type::INT64:
+    case Type::TIMESTAMP:
+      return KernelGenerator<OutType, Int64Type, Op>::Exec;
+    case Type::UINT64:
+      return KernelGenerator<OutType, UInt64Type, Op>::Exec;
+    case Type::FLOAT:
+      return KernelGenerator<FloatType, FloatType, Op>::Exec;
+    case Type::DOUBLE:
+      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+struct ArithmeticFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    RETURN_NOT_OK(CheckDecimals(values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    // Only promote types for binary functions
+    if (values->size() == 2) {
+      ReplaceNullWithOtherType(values);
+
+      if (auto type = CommonNumeric(*values)) {
+        ReplaceTypes(type, values);
+      }
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+
+  Status CheckDecimals(std::vector<ValueDescr>* values) const {
+    bool has_decimal = false;
+    for (const auto& value : *values) {
+      if (is_decimal(value.type->id())) {
+        has_decimal = true;
+        break;
+      }
+    }
+    if (!has_decimal) return Status::OK();
+
+    if (values->size() == 2) {
+      return CastBinaryDecimalArgs(name(), values);
+    }
+    return Status::OK();
+  }
+};
+
+/// An ArithmeticFunction that promotes integer arguments to double.
+struct ArithmeticFloatingPointFunction : public ArithmeticFunction {
+  using ArithmeticFunction::ArithmeticFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+    RETURN_NOT_OK(CheckDecimals(values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    if (values->size() == 2) {
+      ReplaceNullWithOtherType(values);
+    }
+
+    for (auto& descr : *values) {
+      if (is_integer(descr.type->id())) {
+        descr.type = float64();
+      }
+    }
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
+                                                       const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
+// Like MakeArithmeticFunction, but for arithmetic ops that need to run
+// only on non-null output.
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
+                                                              const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunction(std::string name,
+                                                            const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnary, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, ty, exec));
+  }
+  return func;
+}
+
+// Like MakeUnaryArithmeticFunction, but for unary arithmetic ops with a fixed
+// output type for integral inputs.
+template <typename Op, typename IntOutType>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionWithFixedIntOutType(
+    std::string name, const FunctionDoc* doc) {
+  auto int_out_ty = TypeTraits<IntOutType>::type_singleton();
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto out_ty = arrow::is_floating(ty->id()) ? ty : int_out_ty;
+    auto exec = GenerateArithmeticWithFixedIntOutType<ScalarUnary, IntOutType, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, out_ty, exec));
+  }
+  return func;
+}
+
+// Like MakeUnaryArithmeticFunction, but for arithmetic ops that need to run
+// only on non-null output.
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, ty, exec));
+  }
+  return func;
+}
+
+// Like MakeUnaryArithmeticFunction, but for signed arithmetic ops that need to run
+// only on non-null output.
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    if (!arrow::is_unsigned_integer(ty->id())) {
+      auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
+      DCHECK_OK(func->AddKernel({ty}, ty, exec));
+    }
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeBitWiseFunctionNotNull(std::string name,
+                                                           const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : IntTypes()) {
+    auto exec = TypeAgnosticBitWiseExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeShiftFunctionNotNull(std::string name,
+                                                         const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : IntTypes()) {
+    auto exec = ShiftExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPoint(
+    std::string name, const FunctionDoc* doc) {
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarUnary, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, output, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPointNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarUnaryNotNull, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, output, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeArithmeticFunctionFloatingPoint(
+    std::string name, const FunctionDoc* doc) {
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarBinaryEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, output, exec));
+  }
+  return func;
+}
+
+const FunctionDoc absolute_value_doc{
+    "Calculate the absolute value of the argument element-wise",
+    ("Results will wrap around on integer overflow.\n"
+     "Use function \"abs_checked\" if you want overflow\n"
+     "to return an error."),
+    {"x"}};
+
+const FunctionDoc absolute_value_checked_doc{
+    "Calculate the absolute value of the argument element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"abs\"."),
+    {"x"}};
+
+const FunctionDoc add_doc{"Add the arguments element-wise",
+                          ("Results will wrap around on integer overflow.\n"
+                           "Use function \"add_checked\" if you want overflow\n"
+                           "to return an error."),
+                          {"x", "y"}};
+
+const FunctionDoc add_checked_doc{
+    "Add the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"add\"."),
+    {"x", "y"}};
+
+const FunctionDoc sub_doc{"Subtract the arguments element-wise",
+                          ("Results will wrap around on integer overflow.\n"
+                           "Use function \"subtract_checked\" if you want overflow\n"
+                           "to return an error."),
+                          {"x", "y"}};
+
+const FunctionDoc sub_checked_doc{
+    "Subtract the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"subtract\"."),
+    {"x", "y"}};
+
+const FunctionDoc mul_doc{"Multiply the arguments element-wise",
+                          ("Results will wrap around on integer overflow.\n"
+                           "Use function \"multiply_checked\" if you want overflow\n"
+                           "to return an error."),
+                          {"x", "y"}};
+
+const FunctionDoc mul_checked_doc{
+    "Multiply the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"multiply\"."),
+    {"x", "y"}};
+
+const FunctionDoc div_doc{
+    "Divide the arguments element-wise",
+    ("Integer division by zero returns an error. However, integer overflow\n"
+     "wraps around, and floating-point division by zero returns an infinite.\n"
+     "Use function \"divide_checked\" if you want to get an error\n"
+     "in all the aforementioned cases."),
+    {"dividend", "divisor"}};
+
+const FunctionDoc div_checked_doc{
+    "Divide the arguments element-wise",
+    ("An error is returned when trying to divide by zero, or when\n"
+     "integer overflow is encountered."),
+    {"dividend", "divisor"}};
+
+const FunctionDoc negate_doc{"Negate the argument element-wise",
+                             ("Results will wrap around on integer overflow.\n"
+                              "Use function \"negate_checked\" if you want overflow\n"
+                              "to return an error."),
+                             {"x"}};
+
+const FunctionDoc negate_checked_doc{
+    "Negate the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"negate\"."),
+    {"x"}};
+
+const FunctionDoc pow_doc{
+    "Raise arguments to power element-wise",
+    ("Integer to negative integer power returns an error. However, integer overflow\n"
+     "wraps around. If either base or exponent is null the result will be null."),
+    {"base", "exponent"}};
+
+const FunctionDoc pow_checked_doc{
+    "Raise arguments to power element-wise",
+    ("An error is returned when integer to negative integer power is encountered,\n"
+     "or integer overflow is encountered."),
+    {"base", "exponent"}};
+
+const FunctionDoc sign_doc{
+    "Get the signedness of the arguments element-wise",
+    ("Output is any of (-1,1) for nonzero inputs and 0 for zero input.\n"
+     "NaN values return NaN.  Integral values return signedness as Int8 and\n"
+     "floating-point values return it with the same type as the input values."),
+    {"x"}};
+
+const FunctionDoc bit_wise_not_doc{
+    "Bit-wise negate the arguments element-wise", "Null values return null.", {"x"}};
+
+const FunctionDoc bit_wise_and_doc{
+    "Bit-wise AND the arguments element-wise", "Null values return null.", {"x", "y"}};
+
+const FunctionDoc bit_wise_or_doc{
+    "Bit-wise OR the arguments element-wise", "Null values return null.", {"x", "y"}};
+
+const FunctionDoc bit_wise_xor_doc{
+    "Bit-wise XOR the arguments element-wise", "Null values return null.", {"x", "y"}};
+
+const FunctionDoc shift_left_doc{
+    "Left shift `x` by `y`",
+    ("This function will return `x` if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "The shift operates as if on the two's complement representation of the number. "
+     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, "
+     "even if overflow occurs.\n"
+     "Use function \"shift_left_checked\" if you want an invalid shift amount to "
+     "return an error."),
+    {"x", "y"}};
+
+const FunctionDoc shift_left_checked_doc{
+    "Left shift `x` by `y` with invalid shift check",
+    ("This function will raise an error if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`. "
+     "The shift operates as if on the two's complement representation of the number. "
+     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, "
+     "even if overflow occurs.\n"
+     "See \"shift_left\" for a variant that doesn't fail for an invalid shift amount."),
+    {"x", "y"}};
+
+const FunctionDoc shift_right_doc{
+    "Right shift `x` by `y`",
+    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n"
+     "This function will return `x` if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "Use function \"shift_right_checked\" if you want an invalid shift amount to return "
+     "an error."),
+    {"x", "y"}};
+
+const FunctionDoc shift_right_checked_doc{
+    "Right shift `x` by `y` with invalid shift check",
+    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n"
+     "This function will raise an error if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "See \"shift_right\" for a variant that doesn't fail for an invalid shift amount"),
+    {"x", "y"}};
+
+const FunctionDoc sin_doc{"Compute the sine of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"sin_checked\"."),
+                          {"x"}};
+
+const FunctionDoc sin_checked_doc{
+    "Compute the sine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"sin\"."),
+    {"x"}};
+
+const FunctionDoc cos_doc{"Compute the cosine of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"cos_checked\"."),
+                          {"x"}};
+
+const FunctionDoc cos_checked_doc{
+    "Compute the cosine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"cos\"."),
+    {"x"}};
+
+const FunctionDoc tan_doc{"Compute the tangent of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"tan_checked\"."),
+                          {"x"}};
+
+const FunctionDoc tan_checked_doc{
+    "Compute the tangent of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"tan\"."),
+    {"x"}};
+
+const FunctionDoc asin_doc{"Compute the inverse sine of the elements argument-wise",
+                           ("Integer arguments return double values. "
+                            "This function returns NaN on values outside its domain. "
+                            "To raise an error instead, see \"asin_checked\"."),
+                           {"x"}};
+
+const FunctionDoc asin_checked_doc{
+    "Compute the inverse sine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"asin\"."),
+    {"x"}};
+
+const FunctionDoc acos_doc{"Compute the inverse cosine of the elements argument-wise",
+                           ("Integer arguments return double values. "
+                            "This function returns NaN on values outside its domain. "
+                            "To raise an error instead, see \"acos_checked\"."),
+                           {"x"}};
+
+const FunctionDoc acos_checked_doc{
+    "Compute the inverse cosine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"acos\"."),
+    {"x"}};
+
+const FunctionDoc atan_doc{"Compute the principal value of the inverse tangent",
+                           "Integer arguments return double values.",
+                           {"x"}};
+
+const FunctionDoc atan2_doc{
+    "Compute the inverse tangent using argument signs to determine the quadrant",
+    "Integer arguments return double values.",
+    {"y", "x"}};
+
+const FunctionDoc ln_doc{
+    "Compute natural log of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"ln_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc ln_checked_doc{
+    "Compute natural log of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"ln\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log10_doc{
+    "Compute log base 10 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log10_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log10_checked_doc{
+    "Compute log base 10 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log10\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log2_doc{
+    "Compute log base 2 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log2_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log2_checked_doc{
+    "Compute log base 2 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log2\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log1p_doc{
+    "Compute natural log of (1+x) element-wise",
+    ("Values <= -1 return -inf or NaN. Null values return null.\n"
+     "This function may be more precise than log(1 + x) for x close to zero."
+     "Use function \"log1p_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log1p_checked_doc{
+    "Compute natural log of (1+x) element-wise",
+    ("Values <= -1 return -inf or NaN. Null values return null.\n"
+     "This function may be more precise than log(1 + x) for x close to zero."
+     "Use function \"log1p\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc floor_doc{
+    "Round down to the nearest integer",
+    ("Calculate the nearest integer less than or equal in magnitude to the "
+     "argument element-wise"),
+    {"x"}};
+
+const FunctionDoc ceil_doc{
+    "Round up to the nearest integer",
+    ("Calculate the nearest integer greater than or equal in magnitude to the "
+     "argument element-wise"),
+    {"x"}};
+
+const FunctionDoc trunc_doc{
+    "Get the integral part without fractional digits",
+    ("Calculate the nearest integer not greater in magnitude than to the "
+     "argument element-wise."),
+    {"x"}};
+}  // namespace
+
+void RegisterScalarArithmetic(FunctionRegistry* registry) {
+  // ----------------------------------------------------------------------
+  auto absolute_value =
+      MakeUnaryArithmeticFunction<AbsoluteValue>("abs", &absolute_value_doc);
+  DCHECK_OK(registry->AddFunction(std::move(absolute_value)));
+
+  // ----------------------------------------------------------------------
+  auto absolute_value_checked = MakeUnaryArithmeticFunctionNotNull<AbsoluteValueChecked>(
+      "abs_checked", &absolute_value_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(absolute_value_checked)));
+
+  // ----------------------------------------------------------------------
+  auto add = MakeArithmeticFunction<Add>("add", &add_doc);
+  AddDecimalBinaryKernels<Add>("add", &add);
+  DCHECK_OK(registry->AddFunction(std::move(add)));
+
+  // ----------------------------------------------------------------------
+  auto add_checked =
+      MakeArithmeticFunctionNotNull<AddChecked>("add_checked", &add_checked_doc);
+  AddDecimalBinaryKernels<AddChecked>("add_checked", &add_checked);
+  DCHECK_OK(registry->AddFunction(std::move(add_checked)));
+
+  // ----------------------------------------------------------------------
+  auto subtract = MakeArithmeticFunction<Subtract>("subtract", &sub_doc);
+  AddDecimalBinaryKernels<Subtract>("subtract", &subtract);
+
+  // Add subtract(timestamp, timestamp) -> duration
+  for (auto unit : AllTimeUnits()) {
+    InputType in_type(match::TimestampTypeUnit(unit));
+    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Subtract>(Type::TIMESTAMP);
+    DCHECK_OK(subtract->AddKernel({in_type, in_type}, duration(unit), std::move(exec)));
+  }
+
+  DCHECK_OK(registry->AddFunction(std::move(subtract)));
+
+  // ----------------------------------------------------------------------
+  auto subtract_checked = MakeArithmeticFunctionNotNull<SubtractChecked>(
+      "subtract_checked", &sub_checked_doc);
+  AddDecimalBinaryKernels<SubtractChecked>("subtract_checked", &subtract_checked);
+  DCHECK_OK(registry->AddFunction(std::move(subtract_checked)));
+
+  // ----------------------------------------------------------------------
+  auto multiply = MakeArithmeticFunction<Multiply>("multiply", &mul_doc);
+  AddDecimalBinaryKernels<Multiply>("multiply", &multiply);
+  DCHECK_OK(registry->AddFunction(std::move(multiply)));
+
+  // ----------------------------------------------------------------------
+  auto multiply_checked = MakeArithmeticFunctionNotNull<MultiplyChecked>(
+      "multiply_checked", &mul_checked_doc);
+  AddDecimalBinaryKernels<MultiplyChecked>("multiply_checked", &multiply_checked);
+  DCHECK_OK(registry->AddFunction(std::move(multiply_checked)));
+
+  // ----------------------------------------------------------------------
+  auto divide = MakeArithmeticFunctionNotNull<Divide>("divide", &div_doc);
+  AddDecimalBinaryKernels<Divide>("divide", &divide);
+  DCHECK_OK(registry->AddFunction(std::move(divide)));
 
   // ----------------------------------------------------------------------
   auto divide_checked =
       MakeArithmeticFunctionNotNull<DivideChecked>("divide_checked", &div_checked_doc);
+  AddDecimalBinaryKernels<DivideChecked>("divide_checked", &divide_checked);
   DCHECK_OK(registry->AddFunction(std::move(divide_checked)));
+
+  // ----------------------------------------------------------------------
+  auto negate = MakeUnaryArithmeticFunction<Negate>("negate", &negate_doc);
+  DCHECK_OK(registry->AddFunction(std::move(negate)));
+
+  // ----------------------------------------------------------------------
+  auto negate_checked = MakeUnarySignedArithmeticFunctionNotNull<NegateChecked>(
+      "negate_checked", &negate_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(negate_checked)));
+
+  // ----------------------------------------------------------------------
+  auto power = MakeArithmeticFunction<Power>("power", &pow_doc);
+  DCHECK_OK(registry->AddFunction(std::move(power)));
+
+  // ----------------------------------------------------------------------
+  auto power_checked =
+      MakeArithmeticFunctionNotNull<PowerChecked>("power_checked", &pow_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(power_checked)));
+
+  // ----------------------------------------------------------------------
+  auto sign =
+      MakeUnaryArithmeticFunctionWithFixedIntOutType<Sign, Int8Type>("sign", &sign_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sign)));
+
+  // ----------------------------------------------------------------------
+  // Bitwise functions
+  {
+    auto bit_wise_not = std::make_shared<ArithmeticFunction>(
+        "bit_wise_not", Arity::Unary(), &bit_wise_not_doc);
+    for (const auto& ty : IntTypes()) {
+      auto exec = TypeAgnosticBitWiseExecFromOp<ScalarUnaryNotNull, BitWiseNot>(ty);
+      DCHECK_OK(bit_wise_not->AddKernel({ty}, ty, exec));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(bit_wise_not)));
+  }
+
+  auto bit_wise_and =
+      MakeBitWiseFunctionNotNull<BitWiseAnd>("bit_wise_and", &bit_wise_and_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_and)));
+
+  auto bit_wise_or =
+      MakeBitWiseFunctionNotNull<BitWiseOr>("bit_wise_or", &bit_wise_or_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_or)));
+
+  auto bit_wise_xor =
+      MakeBitWiseFunctionNotNull<BitWiseXor>("bit_wise_xor", &bit_wise_xor_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_xor)));
+
+  auto shift_left = MakeShiftFunctionNotNull<ShiftLeft>("shift_left", &shift_left_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_left)));
+
+  auto shift_left_checked = MakeShiftFunctionNotNull<ShiftLeftChecked>(
+      "shift_left_checked", &shift_left_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_left_checked)));
+
+  auto shift_right =
+      MakeShiftFunctionNotNull<ShiftRight>("shift_right", &shift_right_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_right)));
+
+  auto shift_right_checked = MakeShiftFunctionNotNull<ShiftRightChecked>(
+      "shift_right_checked", &shift_right_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_right_checked)));
+
+  // ----------------------------------------------------------------------
+  // Trig functions
+  auto sin = MakeUnaryArithmeticFunctionFloatingPoint<Sin>("sin", &sin_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sin)));
+
+  auto sin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<SinChecked>(
+      "sin_checked", &sin_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sin_checked)));
+
+  auto cos = MakeUnaryArithmeticFunctionFloatingPoint<Cos>("cos", &cos_doc);
+  DCHECK_OK(registry->AddFunction(std::move(cos)));
+
+  auto cos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<CosChecked>(
+      "cos_checked", &cos_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(cos_checked)));
+
+  auto tan = MakeUnaryArithmeticFunctionFloatingPoint<Tan>("tan", &tan_doc);
+  DCHECK_OK(registry->AddFunction(std::move(tan)));
+
+  auto tan_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<TanChecked>(
+      "tan_checked", &tan_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(tan_checked)));
+
+  auto asin = MakeUnaryArithmeticFunctionFloatingPoint<Asin>("asin", &asin_doc);
+  DCHECK_OK(registry->AddFunction(std::move(asin)));
+
+  auto asin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AsinChecked>(
+      "asin_checked", &asin_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(asin_checked)));
+
+  auto acos = MakeUnaryArithmeticFunctionFloatingPoint<Acos>("acos", &acos_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acos)));
+
+  auto acos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AcosChecked>(
+      "acos_checked", &acos_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acos_checked)));
+
+  auto atan = MakeUnaryArithmeticFunctionFloatingPoint<Atan>("atan", &atan_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atan)));
+
+  auto atan2 = MakeArithmeticFunctionFloatingPoint<Atan2>("atan2", &atan2_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atan2)));
+
+  // ----------------------------------------------------------------------
+  // Logarithms
+  auto ln = MakeUnaryArithmeticFunctionFloatingPoint<LogNatural>("ln", &ln_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ln)));
+
+  auto ln_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<LogNaturalChecked>(
+      "ln_checked", &ln_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ln_checked)));
+
+  auto log10 = MakeUnaryArithmeticFunctionFloatingPoint<Log10>("log10", &log10_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log10)));
+
+  auto log10_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log10Checked>(
+      "log10_checked", &log10_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log10_checked)));
+
+  auto log2 = MakeUnaryArithmeticFunctionFloatingPoint<Log2>("log2", &log2_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log2)));
+
+  auto log2_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log2Checked>(
+      "log2_checked", &log2_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log2_checked)));
+
+  auto log1p = MakeUnaryArithmeticFunctionFloatingPoint<Log1p>("log1p", &log1p_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log1p)));
+
+  auto log1p_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log1pChecked>(
+      "log1p_checked", &log1p_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log1p_checked)));
+
+  // ----------------------------------------------------------------------
+  // Rounding functions
+  auto floor = MakeUnaryArithmeticFunctionFloatingPoint<Floor>("floor", &floor_doc);
+  DCHECK_OK(registry->AddFunction(std::move(floor)));
+
+  auto ceil = MakeUnaryArithmeticFunctionFloatingPoint<Ceil>("ceil", &ceil_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ceil)));
+
+  auto trunc = MakeUnaryArithmeticFunctionFloatingPoint<Trunc>("trunc", &trunc_doc);
+  DCHECK_OK(registry->AddFunction(std::move(trunc)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 4d4f14e1154..692d4579719 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -16,6 +16,8 @@
 // under the License.
 
 #include <algorithm>
+#define _USE_MATH_DEFINES
+#include <cmath>
 #include <memory>
 #include <string>
 #include <type_traits>
@@ -41,18 +43,134 @@
 namespace arrow {
 namespace compute {
 
-std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
-                                        int64_t index, bool validity) {
-  auto data = array->data()->Copy();
-  if (data->buffers[0] == nullptr) {
-    data->buffers[0] = *AllocateBitmap(data->length);
-    BitUtil::SetBitsTo(data->buffers[0]->mutable_data(), 0, data->length, true);
+template <typename T>
+class TestUnaryArithmetic : public TestBase {
+ protected:
+  using ArrowType = T;
+  using CType = typename ArrowType::c_type;
+
+  static std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<ArrowType>::type_singleton();
   }
-  BitUtil::SetBitTo(data->buffers[0]->mutable_data(), index, validity);
-  data->null_count = kUnknownNullCount;
-  // Need to return a new array, because Array caches the null bitmap pointer
-  return MakeArray(data);
-}
+
+  using UnaryFunction =
+      std::function<Result<Datum>(const Datum&, ArithmeticOptions, ExecContext*)>;
+
+  void SetUp() override { options_.check_overflow = false; }
+
+  std::shared_ptr<Scalar> MakeNullScalar() {
+    return arrow::MakeNullScalar(type_singleton());
+  }
+
+  std::shared_ptr<Scalar> MakeScalar(CType value) {
+    return *arrow::MakeScalar(type_singleton(), value);
+  }
+
+  // (CScalar, CScalar)
+  void AssertUnaryOp(UnaryFunction func, CType argument, CType expected) {
+    auto arg = MakeScalar(argument);
+    auto exp = MakeScalar(expected);
+    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
+    AssertScalarsApproxEqual(*exp, *actual.scalar(), /*verbose=*/true);
+  }
+
+  // (Scalar, Scalar)
+  void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Scalar>& arg,
+                     const std::shared_ptr<Scalar>& expected) {
+    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
+    AssertScalarsApproxEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+
+  // (JSON, JSON)
+  void AssertUnaryOp(UnaryFunction func, const std::string& arg_json,
+                     const std::string& expected_json) {
+    auto arg = ArrayFromJSON(type_singleton(), arg_json);
+    auto expected = ArrayFromJSON(type_singleton(), expected_json);
+    AssertUnaryOp(func, arg, expected);
+  }
+
+  // (Array, JSON)
+  void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Array>& arg,
+                     const std::string& expected_json) {
+    const auto expected = ArrayFromJSON(type_singleton(), expected_json);
+    AssertUnaryOp(func, arg, expected);
+  }
+
+  // (JSON, Array)
+  void AssertUnaryOp(UnaryFunction func, const std::string& arg_json,
+                     const std::shared_ptr<Array>& expected) {
+    auto arg = ArrayFromJSON(type_singleton(), arg_json);
+    AssertUnaryOp(func, arg, expected);
+  }
+
+  // (Array, Array)
+  void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Array>& arg,
+                     const std::shared_ptr<Array>& expected) {
+    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
+    ValidateAndAssertApproxEqual(actual.make_array(), expected);
+
+    // Also check (Scalar, Scalar) operations
+    const int64_t length = expected->length();
+    for (int64_t i = 0; i < length; ++i) {
+      const auto expected_scalar = *expected->GetScalar(i);
+      ASSERT_OK_AND_ASSIGN(actual, func(*arg->GetScalar(i), options_, nullptr));
+      AssertScalarsApproxEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true,
+                               equal_options_);
+    }
+  }
+
+  void AssertUnaryOpRaises(UnaryFunction func, const std::string& argument,
+                           const std::string& expected_msg) {
+    auto arg = ArrayFromJSON(type_singleton(), argument);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr(expected_msg),
+                                    func(arg, options_, nullptr));
+    for (int64_t i = 0; i < arg->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(auto scalar, arg->GetScalar(i));
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr(expected_msg),
+                                      func(scalar, options_, nullptr));
+    }
+  }
+
+  void AssertUnaryOpNotImplemented(UnaryFunction func, const std::string& argument) {
+    auto arg = ArrayFromJSON(type_singleton(), argument);
+    const char* expected_msg = "has no kernel matching input types";
+    EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_msg),
+                                    func(arg, options_, nullptr));
+  }
+
+  void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
+                                    const std::string& expected) {
+    const auto exp = ArrayFromJSON(type_singleton(), expected);
+    ValidateAndAssertApproxEqual(actual, exp);
+  }
+
+  void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
+                                    const std::shared_ptr<Array>& expected) {
+    ValidateOutput(*actual);
+    AssertArraysApproxEqual(*expected, *actual, /*verbose=*/true, equal_options_);
+  }
+
+  void SetOverflowCheck(bool value = true) { options_.check_overflow = value; }
+
+  void SetNansEqual(bool value = true) {
+    this->equal_options_ = equal_options_.nans_equal(value);
+  }
+
+  ArithmeticOptions options_ = ArithmeticOptions();
+  EqualOptions equal_options_ = EqualOptions::Defaults();
+};
+
+template <typename T>
+class TestUnaryArithmeticIntegral : public TestUnaryArithmetic<T> {};
+
+template <typename T>
+class TestUnaryArithmeticSigned : public TestUnaryArithmeticIntegral<T> {};
+
+template <typename T>
+class TestUnaryArithmeticUnsigned : public TestUnaryArithmeticIntegral<T> {};
+
+template <typename T>
+class TestUnaryArithmeticFloating : public TestUnaryArithmetic<T> {};
 
 template <typename T>
 class TestBinaryArithmetic : public TestBase {
@@ -135,6 +253,12 @@ class TestBinaryArithmetic : public TestBase {
                    const std::shared_ptr<Array>& right,
                    const std::string& expected_json) {
     const auto expected = ArrayFromJSON(type_singleton(), expected_json);
+    AssertBinop(func, left, right, expected);
+  }
+
+  void AssertBinop(BinaryFunction func, const std::shared_ptr<Array>& left,
+                   const std::shared_ptr<Array>& right,
+                   const std::shared_ptr<Array>& expected) {
     ASSERT_OK_AND_ASSIGN(Datum actual, func(left, right, options_, nullptr));
     ValidateAndAssertApproxEqual(actual.make_array(), expected);
 
@@ -165,7 +289,7 @@ class TestBinaryArithmetic : public TestBase {
 
   void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
                                     const std::shared_ptr<Array>& expected) {
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(*actual);
     AssertArraysApproxEqual(*expected, *actual, /*verbose=*/true, equal_options_);
   }
 
@@ -202,6 +326,66 @@ class TestBinaryArithmeticUnsigned : public TestBinaryArithmeticIntegral<T> {};
 template <typename T>
 class TestBinaryArithmeticFloating : public TestBinaryArithmetic<T> {};
 
+template <typename T>
+class TestBitWiseArithmetic : public TestBase {
+ protected:
+  using ArrowType = T;
+  using CType = typename ArrowType::c_type;
+
+  static std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<ArrowType>::type_singleton();
+  }
+
+  void AssertUnaryOp(const std::string& func, const std::vector<uint8_t>& args,
+                     const std::vector<uint8_t>& expected) {
+    auto input = ExpandByteArray(args);
+    auto output = ExpandByteArray(expected);
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {input}));
+    ValidateAndAssertEqual(actual.make_array(), output);
+    for (int64_t i = 0; i < output->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {*input->GetScalar(i)}));
+      const auto expected_scalar = *output->GetScalar(i);
+      AssertScalarsEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true);
+    }
+  }
+
+  void AssertBinaryOp(const std::string& func, const std::vector<uint8_t>& arg0,
+                      const std::vector<uint8_t>& arg1,
+                      const std::vector<uint8_t>& expected) {
+    auto input0 = ExpandByteArray(arg0);
+    auto input1 = ExpandByteArray(arg1);
+    auto output = ExpandByteArray(expected);
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {input0, input1}));
+    ValidateAndAssertEqual(actual.make_array(), output);
+    for (int64_t i = 0; i < output->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {*input0->GetScalar(i),
+                                                             *input1->GetScalar(i)}));
+      const auto expected_scalar = *output->GetScalar(i);
+      AssertScalarsEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true);
+    }
+  }
+
+  // To make it easier to test different widths, tests give bytes which get repeated to
+  // make an array of the actual type
+  std::shared_ptr<Array> ExpandByteArray(const std::vector<uint8_t>& values) {
+    std::vector<CType> c_values(values.size() + 1);
+    for (size_t i = 0; i < values.size(); i++) {
+      std::memset(&c_values[i], values[i], sizeof(CType));
+    }
+    std::vector<bool> valid(values.size() + 1, true);
+    valid.back() = false;
+    std::shared_ptr<Array> arr;
+    ArrayFromVector<ArrowType>(valid, c_values, &arr);
+    return arr;
+  }
+
+  void ValidateAndAssertEqual(const std::shared_ptr<Array>& actual,
+                              const std::shared_ptr<Array>& expected) {
+    ASSERT_OK(actual->ValidateFull());
+    AssertArraysEqual(*expected, *actual, /*verbose=*/true);
+  }
+};
+
 // InputType - OutputType pairs
 using IntegralTypes = testing::Types<Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type,
                                      UInt16Type, UInt32Type, UInt64Type>;
@@ -214,11 +398,41 @@ using UnsignedIntegerTypes =
 // TODO(kszucs): add half-float
 using FloatingTypes = testing::Types<FloatType, DoubleType>;
 
+TYPED_TEST_SUITE(TestUnaryArithmeticIntegral, IntegralTypes);
+TYPED_TEST_SUITE(TestUnaryArithmeticSigned, SignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryArithmeticUnsigned, UnsignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryArithmeticFloating, FloatingTypes);
+
 TYPED_TEST_SUITE(TestBinaryArithmeticIntegral, IntegralTypes);
 TYPED_TEST_SUITE(TestBinaryArithmeticSigned, SignedIntegerTypes);
 TYPED_TEST_SUITE(TestBinaryArithmeticUnsigned, UnsignedIntegerTypes);
 TYPED_TEST_SUITE(TestBinaryArithmeticFloating, FloatingTypes);
 
+TYPED_TEST_SUITE(TestBitWiseArithmetic, IntegralTypes);
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseNot) {
+  this->AssertUnaryOp("bit_wise_not", std::vector<uint8_t>{0x00, 0x55, 0xAA, 0xFF},
+                      std::vector<uint8_t>{0xFF, 0xAA, 0x55, 0x00});
+}
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseAnd) {
+  this->AssertBinaryOp("bit_wise_and", std::vector<uint8_t>{0x00, 0xFF, 0x00, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0xFF, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0x00, 0xFF});
+}
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseOr) {
+  this->AssertBinaryOp("bit_wise_or", std::vector<uint8_t>{0x00, 0xFF, 0x00, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0xFF, 0xFF},
+                       std::vector<uint8_t>{0x00, 0xFF, 0xFF, 0xFF});
+}
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseXor) {
+  this->AssertBinaryOp("bit_wise_xor", std::vector<uint8_t>{0x00, 0xFF, 0x00, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0xFF, 0xFF},
+                       std::vector<uint8_t>{0x00, 0xFF, 0xFF, 0x00});
+}
+
 TYPED_TEST(TestBinaryArithmeticIntegral, Add) {
   for (auto check_overflow : {false, true}) {
     this->SetOverflowCheck(check_overflow);
@@ -590,6 +804,114 @@ TYPED_TEST(TestBinaryArithmeticSigned, DivideOverflowRaises) {
   this->AssertBinop(Divide, MakeArray(min), MakeArray(-1), "[0]");
 }
 
+TYPED_TEST(TestBinaryArithmeticFloating, Power) {
+  using CType = typename TestFixture::CType;
+  auto max = std::numeric_limits<CType>::max();
+  this->SetNansEqual(true);
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    // Empty arrays
+    this->AssertBinop(Power, "[]", "[]", "[]");
+    // Ordinary arrays
+    this->AssertBinop(Power, "[3.4, 16, 0.64, 1.2, 0]", "[1, 0.5, 2, 4, 0]",
+                      "[3.4, 4, 0.4096, 2.0736, 1]");
+    // Array with nulls
+    this->AssertBinop(Power, "[null, 1, 3.3, null, 2]", "[1, 4, 2, 5, 0.1]",
+                      "[null, 1, 10.89, null, 1.07177346]");
+    // Scalar exponentiated by array
+    this->AssertBinop(Power, 10.0F, "[null, 1, 2.5, null, 2, 5]",
+                      "[null, 10, 316.227766017, null, 100, 100000]");
+    // Array exponentiated by scalar
+    this->AssertBinop(Power, "[null, 1, 2.5, null, 2, 5]", 10.0F,
+                      "[null, 1, 9536.74316406, null, 1024, 9765625]");
+    // Array with infinity
+    this->AssertBinop(Power, "[3.4, Inf, -Inf, 1.1, 100000]", "[1, 2, 3, Inf, 100000]",
+                      "[3.4, Inf, -Inf, Inf, Inf]");
+    // Array with NaN
+    this->AssertBinop(Power, "[3.4, NaN, 2.0]", "[1, 2, 2.0]", "[3.4, NaN, 4.0]");
+    // Scalar exponentiated by scalar
+    this->AssertBinop(Power, 21.0F, 3.0F, 9261.0F);
+    // Divide by zero
+    this->AssertBinop(Power, "[0.0, 0.0]", "[-1.0, -3.0]", "[Inf, Inf]");
+    // Check overflow behaviour
+    this->AssertBinop(Power, max, 10, INFINITY);
+  }
+
+  // Edge cases - removing NaNs
+  this->AssertBinop(Power, "[1, NaN, 0, null, 1.2, -Inf, Inf, 1.1, 1, 0, 1, 0]",
+                    "[NaN, 0, NaN, 1, null, 1, 2, -Inf, Inf, 0, 0, 42]",
+                    "[1, 1, NaN, null, null, -Inf, Inf, 0, 1, 1, 1, 0]");
+}
+
+TYPED_TEST(TestBinaryArithmeticIntegral, Power) {
+  using CType = typename TestFixture::CType;
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    // Empty arrays
+    this->AssertBinop(Power, "[]", "[]", "[]");
+    // Ordinary arrays
+    this->AssertBinop(Power, "[3, 2, 6, 2]", "[1, 1, 2, 0]", "[3, 2, 36, 1]");
+    // Array with nulls
+    this->AssertBinop(Power, "[null, 2, 3, null, 20]", "[1, 6, 2, 5, 1]",
+                      "[null, 64, 9, null, 20]");
+    // Scalar exponentiated by array
+    this->AssertBinop(Power, 3, "[null, 3, 4, null, 2]", "[null, 27, 81, null, 9]");
+    // Array exponentiated by scalar
+    this->AssertBinop(Power, "[null, 10, 3, null, 2]", 2, "[null, 100, 9, null, 4]");
+    // Scalar exponentiated by scalar
+    this->AssertBinop(Power, 4, 3, 64);
+    // Edge cases
+    this->AssertBinop(Power, "[0, 1, 0]", "[0, 0, 42]", "[1, 1, 0]");
+  }
+
+  // Overflow raises
+  this->SetOverflowCheck(true);
+  this->AssertBinopRaises(Power, MakeArray(max), MakeArray(10), "overflow");
+  // Disable overflow check
+  this->SetOverflowCheck(false);
+  this->AssertBinop(Power, max, 10, 1);
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, Power) {
+  using CType = typename TestFixture::CType;
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    // Empty arrays
+    this->AssertBinop(Power, "[]", "[]", "[]");
+    // Ordinary arrays
+    this->AssertBinop(Power, "[-3, 2, -6, 2]", "[3, 1, 2, 0]", "[-27, 2, 36, 1]");
+    // Array with nulls
+    this->AssertBinop(Power, "[null, 10, 127, null, -20]", "[1, 2, 1, 5, 1]",
+                      "[null, 100, 127, null, -20]");
+    // Scalar exponentiated by array
+    this->AssertBinop(Power, 11, "[null, 1, null, 2]", "[null, 11, null, 121]");
+    // Array exponentiated by scalar
+    this->AssertBinop(Power, "[null, 1, 3, null, 2]", 3, "[null, 1, 27, null, 8]");
+    // Scalar exponentiated by scalar
+    this->AssertBinop(Power, 16, 1, 16);
+    // Edge cases
+    this->AssertBinop(Power, "[1, 0, -1, 2]", "[0, 42, 0, 1]", "[1, 0, 1, 2]");
+    // Divide by zero raises
+    this->AssertBinopRaises(Power, MakeArray(0), MakeArray(-1),
+                            "integers to negative integer powers are not allowed");
+  }
+
+  // Overflow raises
+  this->SetOverflowCheck(true);
+  this->AssertBinopRaises(Power, MakeArray(max), MakeArray(10), "overflow");
+  // Disable overflow check
+  this->SetOverflowCheck(false);
+  this->AssertBinop(Power, max, 10, 1);
+}
+
 TYPED_TEST(TestBinaryArithmeticFloating, Sub) {
   this->AssertBinop(Subtract, "[]", "[]", "[]");
 
@@ -638,7 +960,7 @@ TYPED_TEST(TestBinaryArithmeticFloating, Mul) {
 }
 
 TEST(TestBinaryArithmetic, DispatchBest) {
-  for (std::string name : {"add", "subtract", "multiply", "divide"}) {
+  for (std::string name : {"add", "subtract", "multiply", "divide", "power"}) {
     for (std::string suffix : {"", "_checked"}) {
       name += suffix;
 
@@ -669,6 +991,13 @@ TEST(TestBinaryArithmetic, DispatchBest) {
                         {float64(), float64()});
     }
   }
+
+  CheckDispatchBest("atan2", {int32(), float64()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {int32(), uint8()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {int32(), null()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {float32(), float64()}, {float64(), float64()});
+  // Integer always promotes to double
+  CheckDispatchBest("atan2", {float32(), int8()}, {float64(), float64()});
 }
 
 TEST(TestBinaryArithmetic, AddWithImplicitCasts) {
@@ -709,5 +1038,1195 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
                                      ArrayFromJSON(uint64(), "[18446744073709551615]")}));
 }
 
+TEST(TestUnaryArithmetic, DispatchBest) {
+  // All types (with _checked variant)
+  for (std::string name : {"abs"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+      for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(),
+                             uint32(), uint64(), float32(), float64()}) {
+        CheckDispatchBest(name, {ty}, {ty});
+        CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+      }
+    }
+  }
+
+  // All types
+  for (std::string name : {"negate", "sign"}) {
+    for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(),
+                           uint64(), float32(), float64()}) {
+      CheckDispatchBest(name, {ty}, {ty});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+    }
+  }
+
+  // Fail on null type (with _checked variant)
+  for (std::string name : {"negate", "abs", "ln", "log2", "log10", "log1p", "sin", "cos",
+                           "tan", "asin", "acos"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+      CheckDispatchFails(name, {null()});
+    }
+  }
+
+  // Fail on null type
+  for (std::string name : {"atan", "sign", "floor", "ceil", "trunc"}) {
+    CheckDispatchFails(name, {null()});
+  }
+
+  // Signed types
+  for (std::string name : {"negate_checked"}) {
+    for (const auto& ty : {int8(), int16(), int32(), int64(), float32(), float64()}) {
+      CheckDispatchBest(name, {ty}, {ty});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+    }
+  }
+
+  // Float types (with _checked variant)
+  for (std::string name :
+       {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+      for (const auto& ty : {float32(), float64()}) {
+        CheckDispatchBest(name, {ty}, {ty});
+        CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+      }
+    }
+  }
+
+  // Float types
+  for (std::string name : {"atan", "floor", "ceil", "trunc"}) {
+    for (const auto& ty : {float32(), float64()}) {
+      CheckDispatchBest(name, {ty}, {ty});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+    }
+  }
+
+  // Integer -> Float64 (with _checked variant)
+  for (std::string name :
+       {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+      for (const auto& ty :
+           {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
+        CheckDispatchBest(name, {ty}, {float64()});
+        CheckDispatchBest(name, {dictionary(int8(), ty)}, {float64()});
+      }
+    }
+  }
+
+  // Integer -> Float64
+  for (std::string name : {"atan", "floor", "ceil", "trunc"}) {
+    for (const auto& ty :
+         {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
+      CheckDispatchBest(name, {ty}, {float64()});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {float64()});
+    }
+  }
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Negate) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty arrays
+    this->AssertUnaryOp(Negate, "[]", "[]");
+    // Array with nulls
+    this->AssertUnaryOp(Negate, "[null]", "[null]");
+    this->AssertUnaryOp(Negate, this->MakeNullScalar(), this->MakeNullScalar());
+    this->AssertUnaryOp(Negate, "[1, null, -10]", "[-1, null, 10]");
+    // Arrays with zeros
+    this->AssertUnaryOp(Negate, "[0, 0, -0]", "[0, -0, 0]");
+    this->AssertUnaryOp(Negate, 0, -0);
+    this->AssertUnaryOp(Negate, -0, 0);
+    this->AssertUnaryOp(Negate, 0, 0);
+    // Ordinary arrays (positive inputs)
+    this->AssertUnaryOp(Negate, "[1, 10, 127]", "[-1, -10, -127]");
+    this->AssertUnaryOp(Negate, 1, -1);
+    this->AssertUnaryOp(Negate, this->MakeScalar(1), this->MakeScalar(-1));
+    // Ordinary arrays (negative inputs)
+    this->AssertUnaryOp(Negate, "[-1, -10, -127]", "[1, 10, 127]");
+    this->AssertUnaryOp(Negate, -1, 1);
+    this->AssertUnaryOp(Negate, MakeArray(-1), "[1]");
+    // Min/max (wrap arounds and overflow)
+    this->AssertUnaryOp(Negate, max, min + 1);
+    if (check_overflow) {
+      this->AssertUnaryOpRaises(Negate, MakeArray(min), "overflow");
+    } else {
+      this->AssertUnaryOp(Negate, min, min);
+    }
+  }
+
+  // Overflow should not be checked on underlying value slots when output would be null
+  this->SetOverflowCheck(true);
+  auto arg = ArrayFromJSON(this->type_singleton(), MakeArray(1, max, min));
+  arg = TweakValidityBit(arg, 1, false);
+  arg = TweakValidityBit(arg, 2, false);
+  this->AssertUnaryOp(Negate, arg, "[-1, null, null]");
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Negate) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  // Empty arrays
+  this->AssertUnaryOp(Negate, "[]", "[]");
+  // Array with nulls
+  this->AssertUnaryOp(Negate, "[null]", "[null]");
+  this->AssertUnaryOp(Negate, this->MakeNullScalar(), this->MakeNullScalar());
+  // Min/max (wrap around)
+  this->AssertUnaryOp(Negate, min, min);
+  this->AssertUnaryOp(Negate, max, 1);
+  this->AssertUnaryOp(Negate, 1, max);
+  // Not implemented kernels
+  this->SetOverflowCheck(true);
+  this->AssertUnaryOpNotImplemented(Negate, "[0]");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Negate) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty arrays
+    this->AssertUnaryOp(Negate, "[]", "[]");
+    // Array with nulls
+    this->AssertUnaryOp(Negate, "[null]", "[null]");
+    this->AssertUnaryOp(Negate, this->MakeNullScalar(), this->MakeNullScalar());
+    this->AssertUnaryOp(Negate, "[1.3, null, -10.80]", "[-1.3, null, 10.80]");
+    // Arrays with zeros
+    this->AssertUnaryOp(Negate, "[0.0, 0.0, -0.0]", "[0.0, -0.0, 0.0]");
+    this->AssertUnaryOp(Negate, 0.0F, -0.0F);
+    this->AssertUnaryOp(Negate, -0.0F, 0.0F);
+    this->AssertUnaryOp(Negate, 0.0F, 0.0F);
+    // Ordinary arrays (positive inputs)
+    this->AssertUnaryOp(Negate, "[1.3, 10.80, 12748.001]", "[-1.3, -10.80, -12748.001]");
+    this->AssertUnaryOp(Negate, 1.3F, -1.3F);
+    this->AssertUnaryOp(Negate, this->MakeScalar(1.3F), this->MakeScalar(-1.3F));
+    // Ordinary arrays (negative inputs)
+    this->AssertUnaryOp(Negate, "[-1.3, -10.80, -12748.001]", "[1.3, 10.80, 12748.001]");
+    this->AssertUnaryOp(Negate, -1.3F, 1.3F);
+    this->AssertUnaryOp(Negate, MakeArray(-1.3F), "[1.3]");
+    // Arrays with infinites
+    this->AssertUnaryOp(Negate, "[Inf, -Inf]", "[-Inf, Inf]");
+    // Arrays with NaNs
+    this->SetNansEqual(true);
+    this->AssertUnaryOp(Negate, "[NaN]", "[NaN]");
+    this->AssertUnaryOp(Negate, "[NaN]", "[-NaN]");
+    this->AssertUnaryOp(Negate, "[-NaN]", "[NaN]");
+    // Min/max
+    this->AssertUnaryOp(Negate, min, max);
+    this->AssertUnaryOp(Negate, max, min);
+  }
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, AbsoluteValue) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty array
+    this->AssertUnaryOp(AbsoluteValue, "[]", "[]");
+    // Scalar/arrays with nulls
+    this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]");
+    this->AssertUnaryOp(AbsoluteValue, "[1, null, -10]", "[1, null, 10]");
+    this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar());
+    // Scalar/arrays with zeros
+    this->AssertUnaryOp(AbsoluteValue, "[0, -0]", "[0, 0]");
+    this->AssertUnaryOp(AbsoluteValue, -0, 0);
+    this->AssertUnaryOp(AbsoluteValue, 0, 0);
+    // Ordinary scalar/arrays (positive inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[1, 10, 127]", "[1, 10, 127]");
+    this->AssertUnaryOp(AbsoluteValue, 1, 1);
+    this->AssertUnaryOp(AbsoluteValue, this->MakeScalar(1), this->MakeScalar(1));
+    // Ordinary scalar/arrays (negative inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[-1, -10, -127]", "[1, 10, 127]");
+    this->AssertUnaryOp(AbsoluteValue, -1, 1);
+    this->AssertUnaryOp(AbsoluteValue, MakeArray(-1), "[1]");
+    // Min/max
+    this->AssertUnaryOp(AbsoluteValue, max, max);
+    if (check_overflow) {
+      this->AssertUnaryOpRaises(AbsoluteValue, MakeArray(min), "overflow");
+    } else {
+      this->AssertUnaryOp(AbsoluteValue, min, min);
+    }
+  }
+
+  // Overflow should not be checked on underlying value slots when output would be null
+  this->SetOverflowCheck(true);
+  auto arg = ArrayFromJSON(this->type_singleton(), MakeArray(-1, max, min));
+  arg = TweakValidityBit(arg, 1, false);
+  arg = TweakValidityBit(arg, 2, false);
+  this->AssertUnaryOp(AbsoluteValue, arg, "[1, null, null]");
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, AbsoluteValue) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty arrays
+    this->AssertUnaryOp(AbsoluteValue, "[]", "[]");
+    // Array with nulls
+    this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]");
+    this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar());
+    // Ordinary arrays
+    this->AssertUnaryOp(AbsoluteValue, "[0, 1, 10, 127]", "[0, 1, 10, 127]");
+    // Min/max
+    this->AssertUnaryOp(AbsoluteValue, min, min);
+    this->AssertUnaryOp(AbsoluteValue, max, max);
+  }
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty array
+    this->AssertUnaryOp(AbsoluteValue, "[]", "[]");
+    // Scalar/arrays with nulls
+    this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]");
+    this->AssertUnaryOp(AbsoluteValue, "[1.3, null, -10.80]", "[1.3, null, 10.80]");
+    this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar());
+    // Scalars/arrays with zeros
+    this->AssertUnaryOp(AbsoluteValue, "[0.0, -0.0]", "[0.0, 0.0]");
+    this->AssertUnaryOp(AbsoluteValue, -0.0F, 0.0F);
+    this->AssertUnaryOp(AbsoluteValue, 0.0F, 0.0F);
+    // Ordinary scalars/arrays (positive inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[1.3, 10.80, 12748.001]",
+                        "[1.3, 10.80, 12748.001]");
+    this->AssertUnaryOp(AbsoluteValue, 1.3F, 1.3F);
+    this->AssertUnaryOp(AbsoluteValue, this->MakeScalar(1.3F), this->MakeScalar(1.3F));
+    // Ordinary scalars/arrays (negative inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[-1.3, -10.80, -12748.001]",
+                        "[1.3, 10.80, 12748.001]");
+    this->AssertUnaryOp(AbsoluteValue, -1.3F, 1.3F);
+    this->AssertUnaryOp(AbsoluteValue, MakeArray(-1.3F), "[1.3]");
+    // Arrays with infinites
+    this->AssertUnaryOp(AbsoluteValue, "[Inf, -Inf]", "[Inf, Inf]");
+    // Arrays with NaNs
+    this->SetNansEqual(true);
+    this->AssertUnaryOp(AbsoluteValue, "[NaN]", "[NaN]");
+    this->AssertUnaryOp(AbsoluteValue, "[-NaN]", "[NaN]");
+    // Min/max
+    this->AssertUnaryOp(AbsoluteValue, min, max);
+    this->AssertUnaryOp(AbsoluteValue, max, max);
+  }
+}
+
+TEST(TestBinaryDecimalArithmetic, DispatchBest) {
+  // decimal, floating point
+  for (std::string name : {"add", "subtract", "multiply", "divide"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(1, 0), float32()}, {float32(), float32()});
+      CheckDispatchBest(name, {decimal256(1, 0), float64()}, {float64(), float64()});
+      CheckDispatchBest(name, {float32(), decimal256(1, 0)}, {float32(), float32()});
+      CheckDispatchBest(name, {float64(), decimal128(1, 0)}, {float64(), float64()});
+    }
+  }
+
+  // decimal, decimal
+  for (std::string name : {"add", "subtract"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(2, 1), decimal128(2, 1)},
+                        {decimal128(3, 1), decimal128(3, 1)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal256(2, 1)},
+                        {decimal256(3, 1), decimal256(3, 1)});
+      CheckDispatchBest(name, {decimal128(2, 1), decimal256(2, 1)},
+                        {decimal256(3, 1), decimal256(3, 1)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal128(2, 1)},
+                        {decimal256(3, 1), decimal256(3, 1)});
+    }
+  }
+  {
+    std::string name = "multiply";
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(2, 1), decimal128(2, 1)},
+                        {decimal128(5, 2), decimal128(5, 2)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal256(2, 1)},
+                        {decimal256(5, 2), decimal256(5, 2)});
+      CheckDispatchBest(name, {decimal128(2, 1), decimal256(2, 1)},
+                        {decimal256(5, 2), decimal256(5, 2)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal128(2, 1)},
+                        {decimal256(5, 2), decimal256(5, 2)});
+    }
+  }
+  {
+    std::string name = "divide";
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(2, 1), decimal128(2, 1)},
+                        {decimal128(6, 4), decimal128(6, 4)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal256(2, 1)},
+                        {decimal256(6, 4), decimal256(6, 4)});
+      CheckDispatchBest(name, {decimal128(2, 1), decimal256(2, 1)},
+                        {decimal256(6, 4), decimal256(6, 4)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal128(2, 1)},
+                        {decimal256(6, 4), decimal256(6, 4)});
+    }
+  }
+
+  // TODO(ARROW-13067): add 'integer, decimal' tests
+}
+
+// reference result from bc (precsion=100, scale=40)
+TEST(TestBinaryArithmeticDecimal, AddSubtract) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(30, 3),
+                              R"([
+        "1.000",
+        "-123456789012345678901234567.890",
+        "98765432109876543210.987",
+        "-999999999999999999999999999.999"
+      ])");
+    auto right = ArrayFromJSON(decimal128(20, 9),
+                               R"([
+        "-1.000000000",
+        "12345678901.234567890",
+        "98765.432101234",
+        "-99999999999.999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal128(37, 9),
+                               R"([
+      "0.000000000",
+      "-123456789012345666555555666.655432110",
+      "98765432109876641976.419101234",
+      "-1000000000000000099999999999.998999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal128(37, 9),
+                                    R"([
+      "2.000000000",
+      "-123456789012345691246913469.124567890",
+      "98765432109876444445.554898766",
+      "-999999999999999899999999999.999000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // array array, decimal256
+  {
+    auto left = ArrayFromJSON(decimal256(30, 20),
+                              R"([
+        "-1.00000000000000000001",
+        "1234567890.12345678900000000000",
+        "-9876543210.09876543210987654321",
+        "9999999999.99999999999999999999"
+      ])");
+    auto right = ArrayFromJSON(decimal256(30, 10),
+                               R"([
+        "1.0000000000",
+        "-1234567890.1234567890",
+        "6789.5432101234",
+        "99999999999999999999.9999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal256(41, 20),
+                               R"([
+      "-0.00000000000000000001",
+      "0.00000000000000000000",
+      "-9876536420.55555530870987654321",
+      "100000000009999999999.99999999989999999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal256(41, 20),
+                                    R"([
+      "-2.00000000000000000001",
+      "2469135780.24691357800000000000",
+      "-9876549999.64197555550987654321",
+      "-99999999989999999999.99999999990000000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // scalar array
+  {
+    auto left = ScalarFromJSON(decimal128(6, 1), R"("12345.6")");
+    auto right = ArrayFromJSON(decimal128(10, 3),
+                               R"(["1.234", "1234.000", "-9876.543", "666.888"])");
+    auto added = ArrayFromJSON(decimal128(11, 3),
+                               R"(["12346.834", "13579.600", "2469.057", "13012.488"])");
+    auto left_sub_right = ArrayFromJSON(
+        decimal128(11, 3), R"(["12344.366", "11111.600", "22222.143", "11678.712"])");
+    auto right_sub_left = ArrayFromJSON(
+        decimal128(11, 3), R"(["-12344.366", "-11111.600", "-22222.143", "-11678.712"])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+    CheckScalarBinary("subtract", left, right, left_sub_right);
+    CheckScalarBinary("subtract", right, left, right_sub_left);
+  }
+
+  // scalar scalar
+  {
+    auto left = ScalarFromJSON(decimal256(3, 0), R"("666")");
+    auto right = ScalarFromJSON(decimal256(3, 0), R"("888")");
+    auto added = ScalarFromJSON(decimal256(4, 0), R"("1554")");
+    auto subtracted = ScalarFromJSON(decimal256(4, 0), R"("-222")");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // decimal128 decimal256
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    auto right = ScalarFromJSON(decimal256(3, 0), R"("888")");
+    auto added = ScalarFromJSON(decimal256(4, 0), R"("1554")");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+  }
+
+  // decimal float
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    ASSIGN_OR_ABORT(auto right, arrow::MakeScalar(float64(), 888));
+    ASSIGN_OR_ABORT(auto added, arrow::MakeScalar(float64(), 1554));
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+  }
+
+  // TODO: decimal integer
+
+  // failed case: result maybe overflow
+  {
+    std::shared_ptr<Scalar> left, right;
+
+    left = ScalarFromJSON(decimal128(21, 20), R"("0.12345678901234567890")");
+    right = ScalarFromJSON(decimal128(21, 1), R"("1.0")");
+    ASSERT_RAISES(Invalid, CallFunction("add", {left, right}));
+    ASSERT_RAISES(Invalid, CallFunction("subtract", {left, right}));
+
+    left = ScalarFromJSON(decimal256(75, 0), R"("0")");
+    right = ScalarFromJSON(decimal256(2, 1), R"("0.0")");
+    ASSERT_RAISES(Invalid, CallFunction("add", {left, right}));
+    ASSERT_RAISES(Invalid, CallFunction("subtract", {left, right}));
+  }
+}
+
+TEST(TestBinaryArithmeticDecimal, Multiply) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(20, 10),
+                              R"([
+        "1234567890.1234567890",
+        "-0.0000000001",
+        "-9999999999.9999999999"
+      ])");
+    auto right = ArrayFromJSON(decimal128(13, 3),
+                               R"([
+        "1234567890.123",
+        "0.001",
+        "-9999999999.999"
+      ])");
+    auto expected = ArrayFromJSON(decimal128(34, 13),
+                                  R"([
+      "1524157875323319737.9870903950470",
+      "-0.0000000000001",
+      "99999999999989999999.0000000000001"
+    ])");
+    CheckScalarBinary("multiply", left, right, expected);
+  }
+
+  // array array, decimal26
+  {
+    auto left = ArrayFromJSON(decimal256(30, 3),
+                              R"([
+        "123456789012345678901234567.890",
+        "0.000"
+      ])");
+    auto right = ArrayFromJSON(decimal256(20, 9),
+                               R"([
+        "-12345678901.234567890",
+        "99999999999.999999999"
+      ])");
+    auto expected = ArrayFromJSON(decimal256(51, 12),
+                                  R"([
+      "-1524157875323883675034293577501905199.875019052100",
+      "0.000000000000"
+    ])");
+    CheckScalarBinary("multiply", left, right, expected);
+  }
+
+  // scalar array
+  {
+    auto left = ScalarFromJSON(decimal128(3, 2), R"("3.14")");
+    auto right = ArrayFromJSON(decimal128(1, 0), R"(["1", "2", "3", "4", "5"])");
+    auto expected =
+        ArrayFromJSON(decimal128(5, 2), R"(["3.14", "6.28", "9.42", "12.56", "15.70"])");
+    CheckScalarBinary("multiply", left, right, expected);
+    CheckScalarBinary("multiply", right, left, expected);
+  }
+
+  // scalar scalar
+  {
+    auto left = ScalarFromJSON(decimal128(1, 0), R"("1")");
+    auto right = ScalarFromJSON(decimal128(1, 0), R"("1")");
+    auto expected = ScalarFromJSON(decimal128(3, 0), R"("1")");
+    CheckScalarBinary("multiply", left, right, expected);
+  }
+
+  // decimal128 decimal256
+  {
+    auto left = ScalarFromJSON(decimal128(3, 2), R"("6.66")");
+    auto right = ScalarFromJSON(decimal256(3, 1), R"("88.8")");
+    auto expected = ScalarFromJSON(decimal256(7, 3), R"("591.408")");
+    CheckScalarBinary("multiply", left, right, expected);
+    CheckScalarBinary("multiply", right, left, expected);
+  }
+
+  // decimal float
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    ASSIGN_OR_ABORT(auto right, arrow::MakeScalar(float64(), 888));
+    ASSIGN_OR_ABORT(auto expected, arrow::MakeScalar(float64(), 591408));
+    CheckScalarBinary("multiply", left, right, expected);
+    CheckScalarBinary("multiply", right, left, expected);
+  }
+
+  // TODO: decimal integer
+
+  // failed case: result maybe overflow
+  {
+    auto left = ScalarFromJSON(decimal128(20, 0), R"("1")");
+    auto right = ScalarFromJSON(decimal128(18, 1), R"("1.0")");
+    ASSERT_RAISES(Invalid, CallFunction("multiply", {left, right}));
+  }
+}
+
+TEST(TestBinaryArithmeticDecimal, Divide) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(13, 3), R"(["1234567890.123", "0.001"])");
+    auto right = ArrayFromJSON(decimal128(3, 0), R"(["-987", "999"])");
+    auto expected =
+        ArrayFromJSON(decimal128(17, 7), R"(["-1250828.6627386", "0.0000010"])");
+    CheckScalarBinary("divide", left, right, expected);
+  }
+
+  // array array, decimal256
+  {
+    auto left = ArrayFromJSON(decimal256(20, 10),
+                              R"(["1234567890.1234567890", "9999999999.9999999999"])");
+    auto right = ArrayFromJSON(decimal256(13, 3), R"(["1234567890.123", "0.001"])");
+    auto expected = ArrayFromJSON(
+        decimal256(34, 21),
+        R"(["1.000000000000369999093", "9999999999999.999999900000000000000"])");
+    CheckScalarBinary("divide", left, right, expected);
+  }
+
+  // scalar array
+  {
+    auto left = ScalarFromJSON(decimal128(1, 0), R"("1")");
+    auto right = ArrayFromJSON(decimal128(1, 0), R"(["1", "2", "3", "4"])");
+    auto left_div_right =
+        ArrayFromJSON(decimal128(5, 4), R"(["1.0000", "0.5000", "0.3333", "0.2500"])");
+    auto right_div_left =
+        ArrayFromJSON(decimal128(5, 4), R"(["1.0000", "2.0000", "3.0000", "4.0000"])");
+    CheckScalarBinary("divide", left, right, left_div_right);
+    CheckScalarBinary("divide", right, left, right_div_left);
+  }
+
+  // scalar scalar
+  {
+    auto left = ScalarFromJSON(decimal256(6, 5), R"("2.71828")");
+    auto right = ScalarFromJSON(decimal256(6, 5), R"("3.14159")");
+    auto expected = ScalarFromJSON(decimal256(13, 7), R"("0.8652561")");
+    CheckScalarBinary("divide", left, right, expected);
+  }
+
+  // decimal128 decimal256
+  {
+    auto left = ScalarFromJSON(decimal256(6, 5), R"("2.71828")");
+    auto right = ScalarFromJSON(decimal128(6, 5), R"("3.14159")");
+    auto left_div_right = ScalarFromJSON(decimal256(13, 7), R"("0.8652561")");
+    auto right_div_left = ScalarFromJSON(decimal256(13, 7), R"("1.1557271")");
+    CheckScalarBinary("divide", left, right, left_div_right);
+    CheckScalarBinary("divide", right, left, right_div_left);
+  }
+
+  // decimal float
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("100")");
+    ASSIGN_OR_ABORT(auto right, arrow::MakeScalar(float64(), 50));
+    ASSIGN_OR_ABORT(auto left_div_right, arrow::MakeScalar(float64(), 2));
+    ASSIGN_OR_ABORT(auto right_div_left, arrow::MakeScalar(float64(), 0.5));
+    CheckScalarBinary("divide", left, right, left_div_right);
+    CheckScalarBinary("divide", right, left, right_div_left);
+  }
+
+  // TODO: decimal integer
+
+  // failed case: result maybe overflow
+  {
+    auto left = ScalarFromJSON(decimal128(20, 20), R"("0.12345678901234567890")");
+    auto right = ScalarFromJSON(decimal128(20, 0), R"("12345678901234567890")");
+    ASSERT_RAISES(Invalid, CallFunction("divide", {left, right}));
+  }
+
+  // failed case: divide by 0
+  {
+    auto left = ScalarFromJSON(decimal256(1, 0), R"("1")");
+    auto right = ScalarFromJSON(decimal256(1, 0), R"("0")");
+    ASSERT_RAISES(Invalid, CallFunction("divide", {left, right}));
+  }
+}
+
+TYPED_TEST(TestBinaryArithmeticIntegral, ShiftLeft) {
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    this->AssertBinop(ShiftLeft, "[]", "[]", "[]");
+    this->AssertBinop(ShiftLeft, "[0, 1, 2, 3]", "[2, 3, 4, 5]", "[0, 8, 32, 96]");
+    // Nulls on one side
+    this->AssertBinop(ShiftLeft, "[0, null, 2, 3]", "[2, 3, 4, 5]", "[0, null, 32, 96]");
+    this->AssertBinop(ShiftLeft, "[0, 1, 2, 3]", "[2, 3, null, 5]", "[0, 8, null, 96]");
+    // Nulls on both sides
+    this->AssertBinop(ShiftLeft, "[0, null, 2, 3]", "[2, 3, null, 5]",
+                      "[0, null, null, 96]");
+    // All nulls
+    this->AssertBinop(ShiftLeft, "[null]", "[null]", "[null]");
+
+    // Scalar on the left
+    this->AssertBinop(ShiftLeft, 2, "[null, 5]", "[null, 64]");
+    this->AssertBinop(ShiftLeft, this->MakeNullScalar(), "[null, 5]", "[null, null]");
+    // Scalar on the right
+    this->AssertBinop(ShiftLeft, "[null, 5]", 3, "[null, 40]");
+    this->AssertBinop(ShiftLeft, "[null, 5]", this->MakeNullScalar(), "[null, null]");
+  }
+}
+
+TYPED_TEST(TestBinaryArithmeticIntegral, ShiftRight) {
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    this->AssertBinop(ShiftRight, "[]", "[]", "[]");
+    this->AssertBinop(ShiftRight, "[0, 1, 4, 8]", "[1, 1, 1, 4]", "[0, 0, 2, 0]");
+    // Nulls on one side
+    this->AssertBinop(ShiftRight, "[0, null, 4, 8]", "[1, 1, 1, 4]", "[0, null, 2, 0]");
+    this->AssertBinop(ShiftRight, "[0, 1, 4, 8]", "[1, 1, null, 4]", "[0, 0, null, 0]");
+    // Nulls on both sides
+    this->AssertBinop(ShiftRight, "[0, null, 4, 8]", "[1, 1, null, 4]",
+                      "[0, null, null, 0]");
+    // All nulls
+    this->AssertBinop(ShiftRight, "[null]", "[null]", "[null]");
+
+    // Scalar on the left
+    this->AssertBinop(ShiftRight, 64, "[null, 2, 6]", "[null, 16, 1]");
+    this->AssertBinop(ShiftRight, this->MakeNullScalar(), "[null, 2, 6]",
+                      "[null, null, null]");
+    // Scalar on the right
+    this->AssertBinop(ShiftRight, "[null, 3, 96]", 3, "[null, 0, 12]");
+    this->AssertBinop(ShiftRight, "[null, 3, 96]", this->MakeNullScalar(),
+                      "[null, null, null]");
+  }
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, ShiftLeftOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  const CType min = std::numeric_limits<CType>::min();
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftLeft, "[1]", MakeArray(bit_width - 1),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 2),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  // Shift a bit into the sign bit
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 1), MakeArray(min));
+  // Shift a bit past the sign bit
+  this->AssertBinop(ShiftLeft, "[4]", MakeArray(bit_width - 1), "[0]");
+  this->AssertBinop(ShiftLeft, MakeArray(min), "[1]", "[0]");
+  this->AssertBinopRaises(ShiftLeft, "[1, 2]", "[1, -1]",
+                          "shift amount must be >= 0 and less than precision of type");
+  this->AssertBinopRaises(ShiftLeft, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+
+  this->SetOverflowCheck(false);
+  this->AssertBinop(ShiftLeft, "[1, 1]", MakeArray(-1, bit_width), "[1, 1]");
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, ShiftRightOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  const CType max = std::numeric_limits<CType>::max();
+  const CType min = std::numeric_limits<CType>::min();
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftRight, MakeArray(max), MakeArray(bit_width - 1), "[1]");
+  this->AssertBinop(ShiftRight, "[-1, -1]", "[1, 5]", "[-1, -1]");
+  this->AssertBinop(ShiftRight, MakeArray(min), "[1]", MakeArray(min / 2));
+  this->AssertBinopRaises(ShiftRight, "[1, 2]", "[1, -1]",
+                          "shift amount must be >= 0 and less than precision of type");
+  this->AssertBinopRaises(ShiftRight, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+
+  this->SetOverflowCheck(false);
+  this->AssertBinop(ShiftRight, "[1, 1]", MakeArray(-1, bit_width), "[1, 1]");
+}
+
+TYPED_TEST(TestBinaryArithmeticUnsigned, ShiftLeftOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftLeft, "[1]", MakeArray(bit_width - 1),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 2),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 1), "[0]");
+  this->AssertBinop(ShiftLeft, "[4]", MakeArray(bit_width - 1), "[0]");
+  this->AssertBinopRaises(ShiftLeft, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+}
+
+TYPED_TEST(TestBinaryArithmeticUnsigned, ShiftRightOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  const CType max = std::numeric_limits<CType>::max();
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftRight, MakeArray(max), MakeArray(bit_width - 1), "[1]");
+  this->AssertBinopRaises(ShiftRight, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigSin) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Sin, "[Inf, -Inf]", "[NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Sin, "[]", "[]");
+    this->AssertUnaryOp(Sin, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Sin, MakeArray(0, M_PI_2, M_PI), "[0, 1, 0]");
+  }
+  this->AssertUnaryOpRaises(Sin, "[Inf, -Inf]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigCos) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Cos, "[Inf, -Inf]", "[NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Cos, "[]", "[]");
+    this->AssertUnaryOp(Cos, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Cos, MakeArray(0, M_PI_2, M_PI), "[1, 0, -1]");
+  }
+  this->AssertUnaryOpRaises(Cos, "[Inf, -Inf]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigTan) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Tan, "[Inf, -Inf]", "[NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Tan, "[]", "[]");
+    this->AssertUnaryOp(Tan, "[null, NaN]", "[null, NaN]");
+    // N.B. pi/2 isn't representable exactly -> there are no poles
+    // (i.e. tan(pi/2) is merely a large value and not +Inf)
+    this->AssertUnaryOp(Tan, MakeArray(0, M_PI), "[0, 0]");
+  }
+  this->AssertUnaryOpRaises(Tan, "[Inf, -Inf]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigAsin) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Asin, "[Inf, -Inf, -2, 2]", "[NaN, NaN, NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Asin, "[]", "[]");
+    this->AssertUnaryOp(Asin, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Asin, "[0, 1, -1]", MakeArray(0, M_PI_2, -M_PI_2));
+  }
+  this->AssertUnaryOpRaises(Asin, "[Inf, -Inf, -2, 2]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigAcos) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Asin, "[Inf, -Inf, -2, 2]", "[NaN, NaN, NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Acos, "[]", "[]");
+    this->AssertUnaryOp(Acos, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Acos, "[0, 1, -1]", MakeArray(M_PI_2, 0, M_PI));
+  }
+  this->AssertUnaryOpRaises(Acos, "[Inf, -Inf, -2, 2]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigAtan) {
+  this->SetNansEqual(true);
+  auto atan = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Atan(arg, ctx);
+  };
+  this->AssertUnaryOp(atan, "[]", "[]");
+  this->AssertUnaryOp(atan, "[null, NaN]", "[null, NaN]");
+  this->AssertUnaryOp(atan, "[0, 1, -1, Inf, -Inf]",
+                      MakeArray(0, M_PI_4, -M_PI_4, M_PI_2, -M_PI_2));
+}
+
+TYPED_TEST(TestBinaryArithmeticFloating, TrigAtan2) {
+  this->SetNansEqual(true);
+  auto atan2 = [](const Datum& y, const Datum& x, ArithmeticOptions, ExecContext* ctx) {
+    return Atan2(y, x, ctx);
+  };
+  this->AssertBinop(atan2, "[]", "[]", "[]");
+  this->AssertBinop(atan2, "[0, 0, null, NaN]", "[null, NaN, 0, 0]",
+                    "[null, NaN, null, NaN]");
+  this->AssertBinop(atan2, "[0, 0, -0.0, 0, -0.0, 0, 1, 0, -1, Inf, -Inf, 0, 0]",
+                    "[0, 0, 0, -0.0, -0.0, 1, 0, -1, 0, 0, 0, Inf, -Inf]",
+                    MakeArray(0, 0, -0.0, M_PI, -M_PI, 0, M_PI_2, M_PI, -M_PI_2, M_PI_2,
+                              -M_PI_2, 0, M_PI));
+}
+
+TYPED_TEST(TestUnaryArithmeticIntegral, Trig) {
+  // Integer arguments promoted to double, sanity check here
+  auto atan = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Atan(arg, ctx);
+  };
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Sin, "[0, 1]",
+                        ArrayFromJSON(float64(), "[0, 0.8414709848078965]"));
+    this->AssertUnaryOp(Cos, "[0, 1]",
+                        ArrayFromJSON(float64(), "[1, 0.5403023058681398]"));
+    this->AssertUnaryOp(Tan, "[0, 1]",
+                        ArrayFromJSON(float64(), "[0, 1.5574077246549023]"));
+    this->AssertUnaryOp(Asin, "[0, 1]", ArrayFromJSON(float64(), MakeArray(0, M_PI_2)));
+    this->AssertUnaryOp(Acos, "[0, 1]", ArrayFromJSON(float64(), MakeArray(M_PI_2, 0)));
+    this->AssertUnaryOp(atan, "[0, 1]", ArrayFromJSON(float64(), MakeArray(0, M_PI_4)));
+  }
+}
+
+TYPED_TEST(TestBinaryArithmeticIntegral, Trig) {
+  // Integer arguments promoted to double, sanity check here
+  auto ty = this->type_singleton();
+  auto atan2 = [](const Datum& y, const Datum& x, ArithmeticOptions, ExecContext* ctx) {
+    return Atan2(y, x, ctx);
+  };
+  this->AssertBinop(atan2, ArrayFromJSON(ty, "[0, 1]"), ArrayFromJSON(ty, "[1, 0]"),
+                    ArrayFromJSON(float64(), MakeArray(0, M_PI_2)));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Log) {
+  using CType = typename TestFixture::CType;
+  this->SetNansEqual(true);
+  auto min_val = std::numeric_limits<CType>::min();
+  auto max_val = std::numeric_limits<CType>::max();
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Ln, "[1, 2.718281828459045, null, NaN, Inf]",
+                        "[0, 1, null, NaN, Inf]");
+    // N.B. min() for float types is smallest normal number > 0
+    this->AssertUnaryOp(Ln, min_val, std::log(min_val));
+    this->AssertUnaryOp(Ln, max_val, std::log(max_val));
+    this->AssertUnaryOp(Log10, "[1, 10, null, NaN, Inf]", "[0, 1, null, NaN, Inf]");
+    this->AssertUnaryOp(Log10, min_val, std::log10(min_val));
+    this->AssertUnaryOp(Log10, max_val, std::log10(max_val));
+    this->AssertUnaryOp(Log2, "[1, 2, null, NaN, Inf]", "[0, 1, null, NaN, Inf]");
+    this->AssertUnaryOp(Log2, min_val, std::log2(min_val));
+    this->AssertUnaryOp(Log2, max_val, std::log2(max_val));
+    this->AssertUnaryOp(Log1p, "[0, 1.718281828459045, null, NaN, Inf]",
+                        "[0, 1, null, NaN, Inf]");
+    this->AssertUnaryOp(Log1p, min_val, std::log1p(min_val));
+    this->AssertUnaryOp(Log1p, max_val, std::log1p(max_val));
+  }
+  this->SetOverflowCheck(false);
+  this->AssertUnaryOp(Ln, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->AssertUnaryOp(Log10, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->AssertUnaryOp(Log2, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->AssertUnaryOp(Log1p, "[-Inf, -2, -1, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->SetOverflowCheck(true);
+  this->AssertUnaryOpRaises(Ln, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Ln, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Ln, "[-Inf]", "logarithm of negative number");
+
+  auto lowest_val = MakeScalar(std::numeric_limits<CType>::lowest());
+  // N.B. RapidJSON on some platforms raises "Number too big to be stored in double" so
+  // don't bounce through JSON
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Ln(lowest_val, this->options_));
+  this->AssertUnaryOpRaises(Log10, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log10, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log10, "[-Inf]", "logarithm of negative number");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Log10(lowest_val, this->options_));
+  this->AssertUnaryOpRaises(Log2, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log2, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log2, "[-Inf]", "logarithm of negative number");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Log2(lowest_val, this->options_));
+  this->AssertUnaryOpRaises(Log1p, "[-1]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log1p, "[-2]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log1p, "[-Inf]", "logarithm of negative number");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Log1p(lowest_val, this->options_));
+}
+
+TYPED_TEST(TestUnaryArithmeticIntegral, Log) {
+  // Integer arguments promoted to double, sanity check here
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Ln, "[1, null]", ArrayFromJSON(float64(), "[0, null]"));
+    this->AssertUnaryOp(Log10, "[1, 10, null]", ArrayFromJSON(float64(), "[0, 1, null]"));
+    this->AssertUnaryOp(Log2, "[1, 2, null]", ArrayFromJSON(float64(), "[0, 1, null]"));
+    this->AssertUnaryOp(Log1p, "[0, null]", ArrayFromJSON(float64(), "[0, null]"));
+  }
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Log) {
+  // Integer arguments promoted to double, sanity check here
+  this->SetNansEqual(true);
+  this->SetOverflowCheck(false);
+  this->AssertUnaryOp(Ln, "[-1, 0]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log10, "[-1, 0]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log2, "[-1, 0]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log1p, "[-2, -1]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->SetOverflowCheck(true);
+  this->AssertUnaryOpRaises(Ln, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Ln, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log10, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log10, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log2, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log2, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log1p, "[-1]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log1p, "[-2]", "logarithm of negative number");
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Sign) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Sign(arg, ctx);
+  };
+
+  this->AssertUnaryOp(sign, "[]", ArrayFromJSON(int8(), "[]"));
+  this->AssertUnaryOp(sign, "[null]", ArrayFromJSON(int8(), "[null]"));
+  this->AssertUnaryOp(sign, "[1, null, -10]", ArrayFromJSON(int8(), "[1, null, -1]"));
+  this->AssertUnaryOp(sign, "[0]", ArrayFromJSON(int8(), "[0]"));
+  this->AssertUnaryOp(sign, "[1, 10, 127]", ArrayFromJSON(int8(), "[1, 1, 1]"));
+  this->AssertUnaryOp(sign, "[-1, -10, -127]", ArrayFromJSON(int8(), "[-1, -1, -1]"));
+  this->AssertUnaryOp(sign, this->MakeScalar(min), *arrow::MakeScalar(int8(), -1));
+  this->AssertUnaryOp(sign, this->MakeScalar(max), *arrow::MakeScalar(int8(), 1));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Sign) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Sign(arg, ctx);
+  };
+
+  this->AssertUnaryOp(sign, "[]", ArrayFromJSON(int8(), "[]"));
+  this->AssertUnaryOp(sign, "[null]", ArrayFromJSON(int8(), "[null]"));
+  this->AssertUnaryOp(sign, "[1, null, 10]", ArrayFromJSON(int8(), "[1, null, 1]"));
+  this->AssertUnaryOp(sign, "[0]", ArrayFromJSON(int8(), "[0]"));
+  this->AssertUnaryOp(sign, "[1, 10, 127]", ArrayFromJSON(int8(), "[1, 1, 1]"));
+  this->AssertUnaryOp(sign, this->MakeScalar(min), *arrow::MakeScalar(int8(), 0));
+  this->AssertUnaryOp(sign, this->MakeScalar(max), *arrow::MakeScalar(int8(), 1));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Sign) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Sign(arg, ctx);
+  };
+
+  this->AssertUnaryOp(sign, "[]", "[]");
+  this->AssertUnaryOp(sign, "[null]", "[null]");
+  this->AssertUnaryOp(sign, "[1.3, null, -10.80]", "[1, null, -1]");
+  this->AssertUnaryOp(sign, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(sign, "[1.3, 10.80, 12748.001]", "[1, 1, 1]");
+  this->AssertUnaryOp(sign, "[-1.3, -10.80, -12748.001]", "[-1, -1, -1]");
+  this->AssertUnaryOp(sign, "[Inf, -Inf]", "[1, -1]");
+  this->AssertUnaryOp(sign, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(sign, this->MakeScalar(min), this->MakeScalar(-1));
+  this->AssertUnaryOp(sign, this->MakeScalar(max), this->MakeScalar(1));
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Floor) {
+  auto floor = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Floor(arg, ctx);
+  };
+
+  this->AssertUnaryOp(floor, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(floor, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(floor, "[1, null, -10]",
+                      ArrayFromJSON(float64(), "[1, null, -10]"));
+  this->AssertUnaryOp(floor, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(floor, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+  this->AssertUnaryOp(floor, "[-1, -10, -127]",
+                      ArrayFromJSON(float64(), "[-1, -10, -127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Floor) {
+  auto floor = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Floor(arg, ctx);
+  };
+
+  this->AssertUnaryOp(floor, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(floor, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(floor, "[1, null, 10]", ArrayFromJSON(float64(), "[1, null, 10]"));
+  this->AssertUnaryOp(floor, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(floor, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Floor) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  auto floor = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Floor(arg, ctx);
+  };
+
+  this->AssertUnaryOp(floor, "[]", "[]");
+  this->AssertUnaryOp(floor, "[null]", "[null]");
+  this->AssertUnaryOp(floor, "[1.3, null, -10.80]", "[1, null, -11]");
+  this->AssertUnaryOp(floor, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(floor, "[1.3, 10.80, 12748.001]", "[1, 10, 12748]");
+  this->AssertUnaryOp(floor, "[-1.3, -10.80, -12748.001]", "[-2, -11, -12749]");
+  this->AssertUnaryOp(floor, "[Inf, -Inf]", "[Inf, -Inf]");
+  this->AssertUnaryOp(floor, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(floor, this->MakeScalar(min), this->MakeScalar(min));
+  this->AssertUnaryOp(floor, this->MakeScalar(max), this->MakeScalar(max));
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Ceil) {
+  auto ceil = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Ceil(arg, ctx);
+  };
+
+  this->AssertUnaryOp(ceil, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(ceil, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(ceil, "[1, null, -10]", ArrayFromJSON(float64(), "[1, null, -10]"));
+  this->AssertUnaryOp(ceil, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(ceil, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+  this->AssertUnaryOp(ceil, "[-1, -10, -127]",
+                      ArrayFromJSON(float64(), "[-1, -10, -127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Ceil) {
+  auto ceil = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Ceil(arg, ctx);
+  };
+
+  this->AssertUnaryOp(ceil, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(ceil, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(ceil, "[1, null, 10]", ArrayFromJSON(float64(), "[1, null, 10]"));
+  this->AssertUnaryOp(ceil, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(ceil, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Ceil) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  auto ceil = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Ceil(arg, ctx);
+  };
+
+  this->AssertUnaryOp(ceil, "[]", "[]");
+  this->AssertUnaryOp(ceil, "[null]", "[null]");
+  this->AssertUnaryOp(ceil, "[1.3, null, -10.80]", "[2, null, -10]");
+  this->AssertUnaryOp(ceil, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(ceil, "[1.3, 10.80, 12748.001]", "[2, 11, 12749]");
+  this->AssertUnaryOp(ceil, "[-1.3, -10.80, -12748.001]", "[-1, -10, -12748]");
+  this->AssertUnaryOp(ceil, "[Inf, -Inf]", "[Inf, -Inf]");
+  this->AssertUnaryOp(ceil, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(ceil, this->MakeScalar(min), this->MakeScalar(min));
+  this->AssertUnaryOp(ceil, this->MakeScalar(max), this->MakeScalar(max));
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Trunc) {
+  auto trunc = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Trunc(arg, ctx);
+  };
+
+  this->AssertUnaryOp(trunc, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(trunc, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(trunc, "[1, null, -10]",
+                      ArrayFromJSON(float64(), "[1, null, -10]"));
+  this->AssertUnaryOp(trunc, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(trunc, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+  this->AssertUnaryOp(trunc, "[-1, -10, -127]",
+                      ArrayFromJSON(float64(), "[-1, -10, -127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Trunc) {
+  auto trunc = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Trunc(arg, ctx);
+  };
+
+  this->AssertUnaryOp(trunc, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(trunc, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(trunc, "[1, null, 10]", ArrayFromJSON(float64(), "[1, null, 10]"));
+  this->AssertUnaryOp(trunc, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(trunc, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Trunc) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  auto trunc = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Trunc(arg, ctx);
+  };
+
+  this->AssertUnaryOp(trunc, "[]", "[]");
+  this->AssertUnaryOp(trunc, "[null]", "[null]");
+  this->AssertUnaryOp(trunc, "[1.3, null, -10.80]", "[1, null, -10]");
+  this->AssertUnaryOp(trunc, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(trunc, "[1.3, 10.80, 12748.001]", "[1, 10, 12748]");
+  this->AssertUnaryOp(trunc, "[-1.3, -10.80, -12748.001]", "[-1, -10, -12748]");
+  this->AssertUnaryOp(trunc, "[Inf, -Inf]", "[Inf, -Inf]");
+  this->AssertUnaryOp(trunc, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(trunc, this->MakeScalar(min), this->MakeScalar(min));
+  this->AssertUnaryOp(trunc, this->MakeScalar(max), this->MakeScalar(max));
+}
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 009b968809c..7a0e3654edb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -30,60 +30,60 @@ namespace compute {
 
 namespace {
 
-enum BitmapIndex { LEFT_VALID, LEFT_DATA, RIGHT_VALID, RIGHT_DATA };
-
 template <typename ComputeWord>
 void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayData& left,
                    const ArrayData& right, ArrayData* out) {
   DCHECK(left.null_count != 0 || right.null_count != 0)
       << "ComputeKleene is unnecessarily expensive for the non-null case";
 
-  Bitmap bitmaps[4];
-  bitmaps[LEFT_VALID] = {left.buffers[0], left.offset, left.length};
-  bitmaps[LEFT_DATA] = {left.buffers[1], left.offset, left.length};
+  Bitmap left_valid_bm{left.buffers[0], left.offset, left.length};
+  Bitmap left_data_bm{left.buffers[1], left.offset, left.length};
 
-  bitmaps[RIGHT_VALID] = {right.buffers[0], right.offset, right.length};
-  bitmaps[RIGHT_DATA] = {right.buffers[1], right.offset, right.length};
+  Bitmap right_valid_bm{right.buffers[0], right.offset, right.length};
+  Bitmap right_data_bm{right.buffers[1], right.offset, right.length};
 
-  auto out_validity = out->GetMutableValues<uint64_t>(0);
-  auto out_data = out->GetMutableValues<uint64_t>(1);
+  std::array<Bitmap, 2> out_bms{Bitmap(out->buffers[0], out->offset, out->length),
+                                Bitmap(out->buffers[1], out->offset, out->length)};
 
-  int64_t i = 0;
   auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid,
-                   uint64_t right_data) {
+                   uint64_t right_data, uint64_t* out_validity, uint64_t* out_data) {
     auto left_true = left_valid & left_data;
     auto left_false = left_valid & ~left_data;
 
     auto right_true = right_valid & right_data;
     auto right_false = right_valid & ~right_data;
 
-    compute_word(left_true, left_false, right_true, right_false, &out_validity[i],
-                 &out_data[i]);
-    ++i;
+    compute_word(left_true, left_false, right_true, right_false, out_validity, out_data);
   };
 
   if (right.null_count == 0) {
-    // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit()
-    bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];
-    Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-      apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]);
-    });
+    std::array<Bitmap, 3> in_bms{left_valid_bm, left_data_bm, right_data_bm};
+    Bitmap::VisitWordsAndWrite(
+        in_bms, &out_bms,
+        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+          apply(in[0], in[1], ~uint64_t(0), in[2], &(out->at(0)), &(out->at(1)));
+        });
     return;
   }
 
   if (left.null_count == 0) {
-    // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit()
-    bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA];
-    Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-      apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
-    });
+    std::array<Bitmap, 3> in_bms{left_data_bm, right_valid_bm, right_data_bm};
+    Bitmap::VisitWordsAndWrite(
+        in_bms, &out_bms,
+        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+          apply(~uint64_t(0), in[0], in[1], in[2], &(out->at(0)), &(out->at(1)));
+        });
     return;
   }
 
   DCHECK(left.null_count != 0 && right.null_count != 0);
-  Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-    apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
-  });
+  std::array<Bitmap, 4> in_bms{left_valid_bm, left_data_bm, right_valid_bm,
+                               right_data_bm};
+  Bitmap::VisitWordsAndWrite(
+      in_bms, &out_bms,
+      [&](const std::array<uint64_t, 4>& in, std::array<uint64_t, 2>* out) {
+        apply(in[0], in[1], in[2], in[3], &(out->at(0)), &(out->at(1)));
+      });
 }
 
 inline BooleanScalar InvertScalar(const Scalar& in) {
@@ -95,58 +95,63 @@ inline Bitmap GetBitmap(const ArrayData& arr, int index) {
   return Bitmap{arr.buffers[index], arr.offset, arr.length};
 }
 
-struct Invert {
-  static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+struct InvertOp {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
     GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1));
+    return Status::OK();
   }
 };
 
 template <typename Op>
 struct Commutative {
-  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
-                   ArrayData* out) {
-    Op::Call(ctx, right, left, out);
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
+    return Op::Call(ctx, right, left, out);
   }
 };
 
-struct And : Commutative<And> {
-  using Commutative<And>::Call;
+struct AndOp : Commutative<AndOp> {
+  using Commutative<AndOp>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     if (left.is_valid && right.is_valid) {
       checked_cast<BooleanScalar*>(out)->value =
           checked_cast<const BooleanScalar&>(left).value &&
           checked_cast<const BooleanScalar&>(right).value;
     }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    if (!right.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(right).value
-               ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))
-               : GetBitmap(*out, 1).SetBitsTo(false);
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))
+          : GetBitmap(*out, 1).SetBitsTo(false);
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset,
                                  right.buffers[1]->data(), right.offset, right.length,
                                  out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
-struct KleeneAnd : Commutative<KleeneAnd> {
-  using Commutative<KleeneAnd>::Call;
+struct KleeneAndOp : Commutative<KleeneAndOp> {
+  using Commutative<KleeneAndOp>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
     bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
 
@@ -155,36 +160,53 @@ struct KleeneAnd : Commutative<KleeneAnd> {
 
     checked_cast<BooleanScalar*>(out)->value = left_true && right_true;
     out->is_valid = left_false || right_false || (left_true && right_true);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
     bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
     bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
 
     if (right_false) {
-      return GetBitmap(*out, 0).SetBitsTo(true),
-             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
+      GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      return Status::OK();
     }
 
     if (right_true) {
-      return GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)),
-             GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      if (left.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      }
+      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff left[i] was false
-    ::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
-                                    left.buffers[1]->data(), left.offset, left.length,
-                                    out->offset, out->buffers[0]->mutable_data());
+    if (left.GetNullCount() == 0) {
+      ::arrow::internal::InvertBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                      out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
+                                      left.buffers[1]->data(), left.offset, left.length,
+                                      out->offset, out->buffers[0]->mutable_data());
+    }
     ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
                                   out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      GetBitmap(*out, 0).SetBitsTo(true);
-      return And::Call(ctx, left, right, out);
+      out->null_count = 0;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
+      return AndOp::Call(ctx, left, right, out);
     }
     auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
                            uint64_t right_false, uint64_t* out_valid,
@@ -193,43 +215,47 @@ struct KleeneAnd : Commutative<KleeneAnd> {
       *out_valid = left_false | right_false | (left_true & right_true);
     };
     ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
-struct Or : Commutative<Or> {
-  using Commutative<Or>::Call;
+struct OrOp : Commutative<OrOp> {
+  using Commutative<OrOp>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     if (left.is_valid && right.is_valid) {
       checked_cast<BooleanScalar*>(out)->value =
           checked_cast<const BooleanScalar&>(left).value ||
           checked_cast<const BooleanScalar&>(right).value;
     }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    if (!right.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(right).value
-               ? GetBitmap(*out, 1).SetBitsTo(true)
-               : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).SetBitsTo(true)
+          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapOr(left.buffers[1]->data(), left.offset,
                                 right.buffers[1]->data(), right.offset, right.length,
                                 out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
-struct KleeneOr : Commutative<KleeneOr> {
-  using Commutative<KleeneOr>::Call;
+struct KleeneOrOp : Commutative<KleeneOrOp> {
+  using Commutative<KleeneOrOp>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
     bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
 
@@ -238,36 +264,53 @@ struct KleeneOr : Commutative<KleeneOr> {
 
     checked_cast<BooleanScalar*>(out)->value = left_true || right_true;
     out->is_valid = left_true || right_true || (left_false && right_false);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
     bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
     bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
 
     if (right_true) {
-      return GetBitmap(*out, 0).SetBitsTo(true),
-             GetBitmap(*out, 1).SetBitsTo(true);  // all true case
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
+      GetBitmap(*out, 1).SetBitsTo(true);  // all true case
+      return Status::OK();
     }
 
     if (right_false) {
-      return GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)),
-             GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      if (left.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      }
+      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff left[i] was true
-    ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
-                                 left.buffers[1]->data(), left.offset, left.length,
-                                 out->offset, out->buffers[0]->mutable_data());
+    if (left.GetNullCount() == 0) {
+      ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                    out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
+                                   left.buffers[1]->data(), left.offset, left.length,
+                                   out->offset, out->buffers[0]->mutable_data());
+    }
     ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
                                   out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      GetBitmap(*out, 0).SetBitsTo(true);
-      return Or::Call(ctx, left, right, out);
+      out->null_count = 0;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
+      return OrOp::Call(ctx, left, right, out);
     }
 
     static auto compute_word = [](uint64_t left_true, uint64_t left_false,
@@ -277,106 +320,128 @@ struct KleeneOr : Commutative<KleeneOr> {
       *out_valid = left_true | right_true | (left_false & right_false);
     };
 
-    return ComputeKleene(compute_word, ctx, left, right, out);
+    ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
-struct Xor : Commutative<Xor> {
-  using Commutative<Xor>::Call;
+struct XorOp : Commutative<XorOp> {
+  using Commutative<XorOp>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     if (left.is_valid && right.is_valid) {
       checked_cast<BooleanScalar*>(out)->value =
           checked_cast<const BooleanScalar&>(left).value ^
           checked_cast<const BooleanScalar&>(right).value;
     }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    if (!right.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(right).value
-               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(left, 1))
-               : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(left, 1))
+          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapXor(left.buffers[1]->data(), left.offset,
                                  right.buffers[1]->data(), right.offset, right.length,
                                  out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
-struct AndNot {
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
-    And::Call(ctx, left, InvertScalar(right), out);
+struct AndNotOp {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    return AndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
-                   ArrayData* out) {
-    if (!left.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(left).value
-               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
-               : GetBitmap(*out, 1).SetBitsTo(false);
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
+    if (left.is_valid) {
+      checked_cast<const BooleanScalar&>(left).value
+          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+          : GetBitmap(*out, 1).SetBitsTo(false);
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    And::Call(ctx, left, InvertScalar(right), out);
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    return AndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
                                     right.buffers[1]->data(), right.offset, right.length,
                                     out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
-struct KleeneAndNot {
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
-    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+struct KleeneAndNotOp {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
     bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
     bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
 
     if (left_false) {
-      return GetBitmap(*out, 0).SetBitsTo(true),
-             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
+      GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      return Status::OK();
     }
 
     if (left_true) {
-      return GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)),
-             GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+      if (right.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
+      }
+      GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+      return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff right[i] was true
-    ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
-                                 right.buffers[1]->data(), right.offset, right.length,
-                                 out->offset, out->buffers[0]->mutable_data());
+    if (right.GetNullCount() == 0) {
+      ::arrow::internal::CopyBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+                                   right.buffers[1]->data(), right.offset, right.length,
+                                   out->offset, out->buffers[0]->mutable_data());
+    }
     ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
                                     out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      GetBitmap(*out, 0).SetBitsTo(true);
-      return AndNot::Call(ctx, left, right, out);
+      out->null_count = 0;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
+      return AndNotOp::Call(ctx, left, right, out);
     }
 
     static auto compute_word = [](uint64_t left_true, uint64_t left_false,
@@ -386,13 +451,13 @@ struct KleeneAndNot {
       *out_valid = left_false | right_true | (left_true & right_false);
     };
 
-    return ComputeKleene(compute_word, ctx, left, right, out);
+    ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
-void MakeFunction(std::string name, int arity, ArrayKernelExec exec,
+void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec,
                   const FunctionDoc* doc, FunctionRegistry* registry,
-                  bool can_write_into_slices = true,
                   NullHandling::type null_handling = NullHandling::INTERSECTION) {
   auto func = std::make_shared<ScalarFunction>(name, Arity(arity), doc);
 
@@ -400,7 +465,6 @@ void MakeFunction(std::string name, int arity, ArrayKernelExec exec,
   std::vector<InputType> in_types(arity, InputType(boolean()));
   ScalarKernel kernel(std::move(in_types), boolean(), exec);
   kernel.null_handling = null_handling;
-  kernel.can_write_into_slices = can_write_into_slices;
 
   DCHECK_OK(func->AddKernel(kernel));
   DCHECK_OK(registry->AddFunction(std::move(func)));
@@ -480,22 +544,18 @@ namespace internal {
 
 void RegisterScalarBoolean(FunctionRegistry* registry) {
   // These functions can write into sliced output bitmaps
-  MakeFunction("invert", 1, applicator::SimpleUnary<Invert>, &invert_doc, registry);
-  MakeFunction("and", 2, applicator::SimpleBinary<And>, &and_doc, registry);
-  MakeFunction("and_not", 2, applicator::SimpleBinary<AndNot>, &and_not_doc, registry);
-  MakeFunction("or", 2, applicator::SimpleBinary<Or>, &or_doc, registry);
-  MakeFunction("xor", 2, applicator::SimpleBinary<Xor>, &xor_doc, registry);
-
-  // The Kleene logic kernels cannot write into sliced output bitmaps
-  MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAnd>, &and_kleene_doc,
-               registry,
-               /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
-  MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNot>,
-               &and_not_kleene_doc, registry,
-               /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
-  MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOr>, &or_kleene_doc,
-               registry,
-               /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
+  MakeFunction("invert", 1, applicator::SimpleUnary<InvertOp>, &invert_doc, registry);
+  MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, &and_doc, registry);
+  MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, &and_not_doc, registry);
+  MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, &or_doc, registry);
+  MakeFunction("xor", 2, applicator::SimpleBinary<XorOp>, &xor_doc, registry);
+
+  MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAndOp>, &and_kleene_doc,
+               registry, NullHandling::COMPUTED_PREALLOCATE);
+  MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNotOp>,
+               &and_not_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE);
+  MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOrOp>, &or_kleene_doc,
+               registry, NullHandling::COMPUTED_PREALLOCATE);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
index 7d3f68e2aef..4c11eb6db30 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
@@ -110,6 +110,12 @@ TEST(TestBooleanKernel, KleeneAnd) {
   expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
   CheckScalarBinary("and_kleene", left, right, expected);
   CheckBooleanScalarArrayBinary("and_kleene", left);
+
+  left = ArrayFromJSON(boolean(), "    [true, true,  false, true]");
+  right = ArrayFromJSON(boolean(), "   [true, false, false, false]");
+  expected = ArrayFromJSON(boolean(), "[true, false, false, false]");
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);
 }
 
 TEST(TestBooleanKernel, KleeneAndNot) {
@@ -121,6 +127,12 @@ TEST(TestBooleanKernel, KleeneAndNot) {
       boolean(), "[false, true,  null, false, false, false, false, null, null]");
   CheckScalarBinary("and_not_kleene", left, right, expected);
   CheckBooleanScalarArrayBinary("and_not_kleene", left);
+
+  left = ArrayFromJSON(boolean(), "    [true,  true,  false, false]");
+  right = ArrayFromJSON(boolean(), "   [true,  false, true,  false]");
+  expected = ArrayFromJSON(boolean(), "[false, true,  false, false]");
+  CheckScalarBinary("and_not_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_not_kleene", left);
 }
 
 TEST(TestBooleanKernel, KleeneOr) {
@@ -135,6 +147,12 @@ TEST(TestBooleanKernel, KleeneOr) {
   expected = ArrayFromJSON(boolean(), "[true, true,  false, true, null]");
   CheckScalarBinary("or_kleene", left, right, expected);
   CheckBooleanScalarArrayBinary("or_kleene", left);
+
+  left = ArrayFromJSON(boolean(), "    [true, true,  false, false]");
+  right = ArrayFromJSON(boolean(), "   [true, false, false, true]");
+  expected = ArrayFromJSON(boolean(), "[true, true,  false, true]");
+  CheckScalarBinary("or_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("or_kleene", left);
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
index e529d3791aa..dad94c1ace7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
@@ -31,17 +31,17 @@ namespace internal {
 
 struct IsNonZero {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     return val != 0;
   }
 };
 
 struct ParseBooleanString {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext* ctx, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status* st) {
     bool result = false;
     if (ARROW_PREDICT_FALSE(!ParseValue<BooleanType>(val.data(), val.size(), &result))) {
-      ctx->SetStatus(Status::Invalid("Failed to parse value: ", val));
+      *st = Status::Invalid("Failed to parse value: ", val);
     }
     return result;
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
new file mode 100644
index 00000000000..b1e1164fd34
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implementation of casting to dictionary type
+
+#include <arrow/util/bitmap_ops.h>
+#include <arrow/util/checked_cast.h>
+
+#include "arrow/array/builder_primitive.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/kernels/scalar_cast_internal.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+using internal::CopyBitmap;
+
+namespace compute {
+namespace internal {
+
+Status CastDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const CastOptions& options = CastState::Get(ctx);
+  auto out_type = std::static_pointer_cast<DictionaryType>(out->type());
+
+  // if out type is same as in type, return input
+  if (out_type->Equals(batch[0].type())) {
+    *out = batch[0];
+    return Status::OK();
+  }
+
+  if (batch[0].is_scalar()) {  // if input is scalar
+    auto in_scalar = checked_cast<const DictionaryScalar&>(*batch[0].scalar());
+
+    // if invalid scalar, return null scalar
+    if (!in_scalar.is_valid) {
+      *out = MakeNullScalar(out_type);
+      return Status::OK();
+    }
+
+    Datum casted_index, casted_dict;
+    if (in_scalar.value.index->type->Equals(out_type->index_type())) {
+      casted_index = in_scalar.value.index;
+    } else {
+      ARROW_ASSIGN_OR_RAISE(casted_index,
+                            Cast(in_scalar.value.index, out_type->index_type(), options,
+                                 ctx->exec_context()));
+    }
+
+    if (in_scalar.value.dictionary->type()->Equals(out_type->value_type())) {
+      casted_dict = in_scalar.value.dictionary;
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          casted_dict, Cast(in_scalar.value.dictionary, out_type->value_type(), options,
+                            ctx->exec_context()));
+    }
+
+    *out = std::static_pointer_cast<Scalar>(
+        DictionaryScalar::Make(casted_index.scalar(), casted_dict.make_array()));
+
+    return Status::OK();
+  }
+
+  // if input is array
+  const std::shared_ptr<ArrayData>& in_array = batch[0].array();
+  const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
+
+  ArrayData* out_array = out->mutable_array();
+
+  if (in_type.index_type()->Equals(out_type->index_type())) {
+    out_array->buffers[0] = in_array->buffers[0];
+    out_array->buffers[1] = in_array->buffers[1];
+    out_array->null_count = in_array->GetNullCount();
+    out_array->offset = in_array->offset;
+  } else {
+    // for indices, create a dummy ArrayData with index_type()
+    const std::shared_ptr<ArrayData>& indices_arr =
+        ArrayData::Make(in_type.index_type(), in_array->length, in_array->buffers,
+                        in_array->GetNullCount(), in_array->offset);
+    ARROW_ASSIGN_OR_RAISE(auto casted_indices, Cast(indices_arr, out_type->index_type(),
+                                                    options, ctx->exec_context()));
+    out_array->buffers[0] = std::move(casted_indices.array()->buffers[0]);
+    out_array->buffers[1] = std::move(casted_indices.array()->buffers[1]);
+  }
+
+  // data (dict)
+  if (in_type.value_type()->Equals(out_type->value_type())) {
+    out_array->dictionary = in_array->dictionary;
+  } else {
+    const std::shared_ptr<Array>& dict_arr = MakeArray(in_array->dictionary);
+    ARROW_ASSIGN_OR_RAISE(auto casted_data, Cast(dict_arr, out_type->value_type(),
+                                                 options, ctx->exec_context()));
+    out_array->dictionary = casted_data.array();
+  }
+  return Status::OK();
+}
+
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
+  auto func = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
+
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get());
+  ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType, CastDictionary);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+
+  DCHECK_OK(func->AddKernel(Type::DICTIONARY, std::move(kernel)));
+
+  return {func};
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index 7221722d53a..8076c35a132 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -29,6 +29,8 @@ namespace internal {
 
 // ----------------------------------------------------------------------
 
+namespace {
+
 template <typename OutT, typename InT>
 ARROW_DISABLE_UBSAN("float-cast-overflow")
 void DoStaticCast(const void* in_data, int64_t in_offset, int64_t length,
@@ -117,6 +119,8 @@ void CastNumberImpl(Type::type out_type, const Datum& input, Datum* out) {
   }
 }
 
+}  // namespace
+
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Datum& input,
                               Datum* out) {
   switch (in_type) {
@@ -148,7 +152,7 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Dat
 
 // ----------------------------------------------------------------------
 
-void UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DCHECK(out->is_array());
 
   DictionaryArray dict_arr(batch[0].array());
@@ -156,32 +160,32 @@ void UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 
   const auto& dict_type = *dict_arr.dictionary()->type();
   if (!dict_type.Equals(options.to_type) && !CanCast(dict_type, *options.to_type)) {
-    ctx->SetStatus(Status::Invalid("Cast type ", options.to_type->ToString(),
-                                   " incompatible with dictionary type ",
-                                   dict_type.ToString()));
-    return;
+    return Status::Invalid("Cast type ", options.to_type->ToString(),
+                           " incompatible with dictionary type ", dict_type.ToString());
   }
 
-  KERNEL_ASSIGN_OR_RAISE(*out, ctx,
-                         Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()),
-                              TakeOptions::Defaults(), ctx->exec_context()));
+  ARROW_ASSIGN_OR_RAISE(*out,
+                        Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()),
+                             TakeOptions::Defaults(), ctx->exec_context()));
 
   if (!dict_type.Equals(options.to_type)) {
-    KERNEL_ASSIGN_OR_RAISE(*out, ctx, Cast(*out, options));
+    ARROW_ASSIGN_OR_RAISE(*out, Cast(*out, options));
   }
+  return Status::OK();
 }
 
-void OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (out->is_scalar()) {
     out->scalar()->is_valid = false;
-    return;
+  } else {
+    ArrayData* output = out->mutable_array();
+    output->buffers = {nullptr};
+    output->null_count = batch.length;
   }
-  ArrayData* output = out->mutable_array();
-  output->buffers = {nullptr};
-  output->null_count = batch.length;
+  return Status::OK();
 }
 
-void CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const CastOptions& options = checked_cast<const CastState*>(ctx->state())->options;
 
   const DataType& in_type = *batch[0].type();
@@ -190,20 +194,20 @@ void CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray extension(batch[0].array());
 
   Datum casted_storage;
-  KERNEL_RETURN_IF_ERROR(
-      ctx, Cast(*extension.storage(), out->type(), options, ctx->exec_context())
-               .Value(&casted_storage));
+  RETURN_NOT_OK(Cast(*extension.storage(), out->type(), options, ctx->exec_context())
+                    .Value(&casted_storage));
   out->value = casted_storage.array();
+  return Status::OK();
 }
 
-void CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  if (batch[0].is_scalar()) return;
-
-  ArrayData* output = out->mutable_array();
-  std::shared_ptr<Array> nulls;
-  Status s = MakeArrayOfNull(output->type, batch.length).Value(&nulls);
-  KERNEL_RETURN_IF_ERROR(ctx, s);
-  out->value = nulls->data();
+Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (!batch[0].is_scalar()) {
+    ArrayData* output = out->mutable_array();
+    std::shared_ptr<Array> nulls;
+    RETURN_NOT_OK(MakeArrayOfNull(output->type, batch.length).Value(&nulls));
+    out->value = nulls->data();
+  }
+  return Status::OK();
 }
 
 Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
@@ -223,7 +227,7 @@ Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
 
 OutputType kOutputTargetType(ResolveOutputFromOptions);
 
-void ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
   // Make a copy of the buffers into a destination array without carrying
   // the type
@@ -234,6 +238,7 @@ void ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   output->buffers = input.buffers;
   output->offset = input.offset;
   output->child_data = input.child_data;
+  return Status::OK();
 }
 
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
@@ -254,7 +259,12 @@ static bool CanCastFromDictionary(Type::type type_id) {
 
 void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* func) {
   // From null to this type
-  DCHECK_OK(func->AddKernel(Type::NA, {null()}, out_ty, CastFromNull));
+  ScalarKernel kernel;
+  kernel.exec = CastFromNull;
+  kernel.signature = KernelSignature::Make({null()}, out_ty);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(Type::NA, std::move(kernel)));
 
   // From dictionary to this type
   if (CanCastFromDictionary(out_type_id)) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
index dabf0c2b061..2419d898a68 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
@@ -37,10 +37,10 @@ struct CastFunctor {};
 template <typename O, typename I>
 struct CastFunctor<
     O, I, enable_if_t<std::is_same<O, I>::value && is_parameter_free_type<I>::value>> {
-  static void Exec(KernelContext*, const ExecBatch&, Datum*) {}
+  static Status Exec(KernelContext*, const ExecBatch&, Datum*) { return Status::OK(); }
 };
 
-void CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 // Utility for numeric casts
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Datum& input,
@@ -49,11 +49,11 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Dat
 // ----------------------------------------------------------------------
 // Dictionary to other things
 
-void UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
-void OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
-void CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 // Adds a cast function where CastFunctor is specialized and the input and output
 // types are parameter free (have a type_singleton). Scalar inputs are handled by
@@ -65,7 +65,7 @@ void AddSimpleCast(InputType in_ty, OutputType out_ty, CastFunction* func) {
       TrivialScalarUnaryAsArraysExec(CastFunctor<OutType, InType>::Exec)));
 }
 
-void ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
                      CastFunction* func);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index 2592b77ab66..ab583bbbe8c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -17,6 +17,7 @@
 
 // Implementation of casting to (or between) list types
 
+#include <limits>
 #include <utility>
 #include <vector>
 
@@ -26,6 +27,7 @@
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/int_util.h"
 
 namespace arrow {
 
@@ -34,80 +36,135 @@ using internal::CopyBitmap;
 namespace compute {
 namespace internal {
 
-template <typename Type>
-void CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  using offset_type = typename Type::offset_type;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
+namespace {
 
-  const CastOptions& options = CastState::Get(ctx);
+// (Large)List<T> -> (Large)List<U>
 
-  auto child_type = checked_cast<const Type&>(*out->type()).value_type();
+template <typename SrcType, typename DestType>
+typename std::enable_if<SrcType::type_id == DestType::type_id, Status>::type
+CastListOffsets(KernelContext* ctx, const ArrayData& in_array, ArrayData* out_array) {
+  return Status::OK();
+}
+
+template <typename SrcType, typename DestType>
+typename std::enable_if<SrcType::type_id != DestType::type_id, Status>::type
+CastListOffsets(KernelContext* ctx, const ArrayData& in_array, ArrayData* out_array) {
+  using src_offset_type = typename SrcType::offset_type;
+  using dest_offset_type = typename DestType::offset_type;
+
+  ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
+                        ctx->Allocate(sizeof(dest_offset_type) * (in_array.length + 1)));
+  ::arrow::internal::CastInts(in_array.GetValues<src_offset_type>(1),
+                              out_array->GetMutableValues<dest_offset_type>(1),
+                              in_array.length + 1);
+  return Status::OK();
+}
+
+template <typename SrcType, typename DestType>
+struct CastList {
+  using src_offset_type = typename SrcType::offset_type;
+  using dest_offset_type = typename DestType::offset_type;
 
-  if (out->kind() == Datum::SCALAR) {
-    const auto& in_scalar = checked_cast<const ScalarType&>(*batch[0].scalar());
-    auto out_scalar = checked_cast<ScalarType*>(out->scalar().get());
+  static constexpr bool is_upcast = sizeof(src_offset_type) < sizeof(dest_offset_type);
+  static constexpr bool is_downcast = sizeof(src_offset_type) > sizeof(dest_offset_type);
 
-    DCHECK(!out_scalar->is_valid);
-    if (in_scalar.is_valid) {
-      KERNEL_ASSIGN_OR_RAISE(
-          out_scalar->value, ctx,
-          Cast(*in_scalar.value, child_type, options, ctx->exec_context()));
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const CastOptions& options = CastState::Get(ctx);
 
-      out_scalar->is_valid = true;
+    auto child_type = checked_cast<const DestType&>(*out->type()).value_type();
+
+    if (out->kind() == Datum::SCALAR) {
+      // The scalar case is simple, as only the underlying values must be cast
+      const auto& in_scalar = checked_cast<const BaseListScalar&>(*batch[0].scalar());
+      auto out_scalar = checked_cast<BaseListScalar*>(out->scalar().get());
+
+      DCHECK(!out_scalar->is_valid);
+      if (in_scalar.is_valid) {
+        ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type,
+                                                      options, ctx->exec_context()));
+
+        out_scalar->is_valid = true;
+      }
+      return Status::OK();
     }
-    return;
-  }
 
-  const ArrayData& in_array = *batch[0].array();
-  ArrayData* out_array = out->mutable_array();
+    const ArrayData& in_array = *batch[0].array();
+    auto offsets = in_array.GetValues<src_offset_type>(1);
+    Datum values = in_array.child_data[0];
 
-  // Copy from parent
-  out_array->buffers = in_array.buffers;
-  Datum values = in_array.child_data[0];
+    ArrayData* out_array = out->mutable_array();
+    out_array->buffers = in_array.buffers;
 
-  if (in_array.offset != 0) {
-    KERNEL_ASSIGN_OR_RAISE(out_array->buffers[0], ctx,
-                           CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
-                                      in_array.offset, in_array.length));
-    KERNEL_ASSIGN_OR_RAISE(out_array->buffers[1], ctx,
-                           ctx->Allocate(sizeof(offset_type) * (in_array.length + 1)));
+    // Shift bitmap in case the source offset is non-zero
+    if (in_array.offset != 0 && in_array.buffers[0]) {
+      ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                            CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
+                                       in_array.offset, in_array.length));
+    }
 
-    auto offsets = in_array.GetValues<offset_type>(1);
-    auto shifted_offsets = out_array->GetMutableValues<offset_type>(1);
+    // Handle list offsets
+    // Several cases can arise:
+    // - the source offset is non-zero, in which case we slice the underlying values
+    //   and shift the list offsets (regardless of their respective types)
+    // - the source offset is zero but source and destination types have
+    //   different list offset types, in which case we cast the list offsets
+    // - otherwise, we simply keep the original list offsets
+    if (is_downcast) {
+      if (offsets[in_array.length] > std::numeric_limits<dest_offset_type>::max()) {
+        return Status::Invalid("Array of type ", in_array.type->ToString(),
+                               " too large to convert to ", out_array->type->ToString());
+      }
+    }
 
-    for (int64_t i = 0; i < in_array.length + 1; ++i) {
-      shifted_offsets[i] = offsets[i] - offsets[0];
+    if (in_array.offset != 0) {
+      ARROW_ASSIGN_OR_RAISE(
+          out_array->buffers[1],
+          ctx->Allocate(sizeof(dest_offset_type) * (in_array.length + 1)));
+
+      auto shifted_offsets = out_array->GetMutableValues<dest_offset_type>(1);
+      for (int64_t i = 0; i < in_array.length + 1; ++i) {
+        shifted_offsets[i] = static_cast<dest_offset_type>(offsets[i] - offsets[0]);
+      }
+      values = in_array.child_data[0]->Slice(offsets[0], offsets[in_array.length]);
+    } else {
+      RETURN_NOT_OK((CastListOffsets<SrcType, DestType>(ctx, in_array, out_array)));
     }
-    values = in_array.child_data[0]->Slice(offsets[0], offsets[in_array.length]);
-  }
 
-  KERNEL_ASSIGN_OR_RAISE(Datum cast_values, ctx,
-                         Cast(values, child_type, options, ctx->exec_context()));
+    // Handle values
+    ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+                          Cast(values, child_type, options, ctx->exec_context()));
 
-  DCHECK_EQ(Datum::ARRAY, cast_values.kind());
-  out_array->child_data.push_back(cast_values.array());
-}
+    DCHECK_EQ(Datum::ARRAY, cast_values.kind());
+    out_array->child_data.push_back(cast_values.array());
+    return Status::OK();
+  }
+};
 
-template <typename Type>
+template <typename SrcType, typename DestType>
 void AddListCast(CastFunction* func) {
   ScalarKernel kernel;
-  kernel.exec = CastListExec<Type>;
-  kernel.signature = KernelSignature::Make({InputType(Type::type_id)}, kOutputTargetType);
+  kernel.exec = CastList<SrcType, DestType>::Exec;
+  kernel.signature =
+      KernelSignature::Make({InputType(SrcType::type_id)}, kOutputTargetType);
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-  DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
+  DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+}  // namespace
+
 std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   // We use the list<T> from the CastOptions when resolving the output type
 
   auto cast_list = std::make_shared<CastFunction>("cast_list", Type::LIST);
   AddCommonCasts(Type::LIST, kOutputTargetType, cast_list.get());
-  AddListCast<ListType>(cast_list.get());
+  AddListCast<ListType, ListType>(cast_list.get());
+  AddListCast<LargeListType, ListType>(cast_list.get());
 
   auto cast_large_list =
       std::make_shared<CastFunction>("cast_large_list", Type::LARGE_LIST);
   AddCommonCasts(Type::LARGE_LIST, kOutputTargetType, cast_large_list.get());
-  AddListCast<LargeListType>(cast_large_list.get());
+  AddListCast<ListType, LargeListType>(cast_large_list.get());
+  AddListCast<LargeListType, LargeListType>(cast_large_list.get());
 
   // FSL is a bit incomplete at the moment
   auto cast_fsl =
@@ -118,7 +175,12 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   auto cast_struct = std::make_shared<CastFunction>("cast_struct", Type::STRUCT);
   AddCommonCasts(Type::STRUCT, kOutputTargetType, cast_struct.get());
 
-  return {cast_list, cast_large_list, cast_fsl, cast_struct};
+  // So is dictionary
+  auto cast_dictionary =
+      std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dictionary.get());
+
+  return {cast_list, cast_large_list, cast_fsl, cast_struct, cast_dictionary};
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index 160c4ce8857..cd89a57ed77 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -36,16 +36,18 @@ using internal::ParseValue;
 namespace compute {
 namespace internal {
 
-void CastIntegerToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastIntegerToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   if (!options.allow_int_overflow) {
-    KERNEL_RETURN_IF_ERROR(ctx, IntegersCanFit(batch[0], *out->type()));
+    RETURN_NOT_OK(IntegersCanFit(batch[0], *out->type()));
   }
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  return Status::OK();
 }
 
-void CastFloatingToFloating(KernelContext*, const ExecBatch& batch, Datum* out) {
+Status CastFloatingToFloating(KernelContext*, const ExecBatch& batch, Datum* out) {
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -168,12 +170,13 @@ Status CheckFloatToIntTruncation(const Datum& input, const Datum& output) {
   return Status::OK();
 }
 
-void CastFloatingToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastFloatingToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
   if (!options.allow_float_truncate) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckFloatToIntTruncation(batch[0], *out));
+    RETURN_NOT_OK(CheckFloatToIntTruncation(batch[0], *out));
   }
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -246,13 +249,14 @@ Status CheckForIntegerToFloatingTruncation(const Datum& input, Type::type out_ty
   return Status::OK();
 }
 
-void CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   Type::type out_type = out->type()->id();
   if (!options.allow_float_truncate) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckForIntegerToFloatingTruncation(batch[0], out_type));
+    RETURN_NOT_OK(CheckForIntegerToFloatingTruncation(batch[0], out_type));
   }
   CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0], out);
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -260,7 +264,7 @@ void CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* ou
 
 struct BooleanToNumber {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     constexpr auto kOne = static_cast<OutValue>(1);
     constexpr auto kZero = static_cast<OutValue>(0);
     return val ? kOne : kZero;
@@ -269,8 +273,9 @@ struct BooleanToNumber {
 
 template <typename O>
 struct CastFunctor<O, BooleanType, enable_if_number<O>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    applicator::ScalarUnary<O, BooleanType, BooleanToNumber>::Exec(ctx, batch, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return applicator::ScalarUnary<O, BooleanType, BooleanToNumber>::Exec(ctx, batch,
+                                                                          out);
   }
 };
 
@@ -280,12 +285,11 @@ struct CastFunctor<O, BooleanType, enable_if_number<O>> {
 template <typename OutType>
 struct ParseString {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
     OutValue result = OutValue(0);
     if (ARROW_PREDICT_FALSE(!ParseValue<OutType>(val.data(), val.size(), &result))) {
-      ctx->SetStatus(Status::Invalid("Failed to parse string: '", val,
-                                     "' as a scalar of type ",
-                                     TypeTraits<OutType>::type_singleton()->ToString()));
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            TypeTraits<OutType>::type_singleton()->ToString());
     }
     return result;
   }
@@ -293,8 +297,8 @@ struct ParseString {
 
 template <typename O, typename I>
 struct CastFunctor<O, I, enable_if_base_binary<I>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
   }
 };
 
@@ -303,12 +307,12 @@ struct CastFunctor<O, I, enable_if_base_binary<I>> {
 
 struct DecimalToIntegerMixin {
   template <typename OutValue, typename Arg0Value>
-  OutValue ToInteger(KernelContext* ctx, const Arg0Value& val) const {
+  OutValue ToInteger(KernelContext* ctx, const Arg0Value& val, Status* st) const {
     constexpr auto min_value = std::numeric_limits<OutValue>::min();
     constexpr auto max_value = std::numeric_limits<OutValue>::max();
 
     if (!allow_int_overflow_ && ARROW_PREDICT_FALSE(val < min_value || val > max_value)) {
-      ctx->SetStatus(Status::Invalid("Integer value out of bounds"));
+      *st = Status::Invalid("Integer value out of bounds");
       return OutValue{};  // Zero
     } else {
       return static_cast<OutValue>(val.low_bits());
@@ -326,8 +330,8 @@ struct UnsafeUpscaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
-    return ToInteger<OutValue>(ctx, val.IncreaseScaleBy(-in_scale_));
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
+    return ToInteger<OutValue>(ctx, val.IncreaseScaleBy(-in_scale_), st);
   }
 };
 
@@ -335,8 +339,8 @@ struct UnsafeDownscaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
-    return ToInteger<OutValue>(ctx, val.ReduceScaleBy(in_scale_, false));
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
+    return ToInteger<OutValue>(ctx, val.ReduceScaleBy(in_scale_, false), st);
   }
 };
 
@@ -344,13 +348,13 @@ struct SafeRescaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
     auto result = val.Rescale(in_scale_, 0);
     if (ARROW_PREDICT_FALSE(!result.ok())) {
-      ctx->SetStatus(result.status());
+      *st = result.status();
       return OutValue{};  // Zero
     } else {
-      return ToInteger<OutValue>(ctx, *result);
+      return ToInteger<OutValue>(ctx, *result, st);
     }
   }
 };
@@ -360,7 +364,7 @@ struct CastFunctor<O, I,
                    enable_if_t<is_integer_type<O>::value && is_decimal_type<I>::value>> {
   using out_type = typename O::c_type;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
 
     const auto& in_type_inst = checked_cast<const I&>(*batch[0].type());
@@ -408,7 +412,8 @@ struct DecimalConversions<Decimal128, Decimal256> {
   // Scale then truncate
   static Decimal256 ConvertInput(Decimal256&& val) { return val; }
   static Decimal128 ConvertOutput(Decimal256&& val) {
-    return Decimal128(val.little_endian_array()[1], val.little_endian_array()[0]);
+    const auto array_le = BitUtil::LittleEndianArray::Make(val.native_endian_array());
+    return Decimal128(array_le[1], array_le[0]);
   }
 };
 
@@ -420,7 +425,7 @@ struct DecimalConversions<Decimal128, Decimal128> {
 
 struct UnsafeUpscaleDecimal {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status*) const {
     using Conv = DecimalConversions<OutValue, Arg0Value>;
     return Conv::ConvertOutput(Conv::ConvertInput(std::move(val)).IncreaseScaleBy(by_));
   }
@@ -429,7 +434,7 @@ struct UnsafeUpscaleDecimal {
 
 struct UnsafeDownscaleDecimal {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status*) const {
     using Conv = DecimalConversions<OutValue, Arg0Value>;
     return Conv::ConvertOutput(
         Conv::ConvertInput(std::move(val)).ReduceScaleBy(by_, false));
@@ -439,12 +444,12 @@ struct UnsafeDownscaleDecimal {
 
 struct SafeRescaleDecimal {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status* st) const {
     using Conv = DecimalConversions<OutValue, Arg0Value>;
     auto maybe_rescaled =
         Conv::ConvertInput(std::move(val)).Rescale(in_scale_, out_scale_);
     if (ARROW_PREDICT_FALSE(!maybe_rescaled.ok())) {
-      ctx->SetStatus(maybe_rescaled.status());
+      *st = maybe_rescaled.status();
       return {};  // Zero
     }
 
@@ -452,8 +457,7 @@ struct SafeRescaleDecimal {
       return Conv::ConvertOutput(maybe_rescaled.MoveValueUnsafe());
     }
 
-    ctx->SetStatus(
-        Status::Invalid("Decimal value does not fit in precision ", out_precision_));
+    *st = Status::Invalid("Decimal value does not fit in precision ", out_precision_);
     return {};  // Zero
   }
 
@@ -463,7 +467,7 @@ struct SafeRescaleDecimal {
 template <typename O, typename I>
 struct CastFunctor<O, I,
                    enable_if_t<is_decimal_type<O>::value && is_decimal_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
 
     const auto& in_type = checked_cast<const I&>(*batch[0].type());
@@ -497,7 +501,7 @@ struct CastFunctor<O, I,
 
 struct RealToDecimal {
   template <typename OutValue, typename RealType>
-  OutValue Call(KernelContext* ctx, RealType val) const {
+  OutValue Call(KernelContext*, RealType val, Status* st) const {
     auto maybe_decimal = OutValue::FromReal(val, out_precision_, out_scale_);
 
     if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) {
@@ -505,7 +509,7 @@ struct RealToDecimal {
     }
 
     if (!allow_truncate_) {
-      ctx->SetStatus(maybe_decimal.status());
+      *st = maybe_decimal.status();
     }
     return {};  // Zero
   }
@@ -517,7 +521,7 @@ struct RealToDecimal {
 template <typename O, typename I>
 struct CastFunctor<O, I,
                    enable_if_t<is_decimal_type<O>::value && is_floating_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
     const auto& out_type = checked_cast<const O&>(*out->type());
     const auto out_scale = out_type.scale();
@@ -534,7 +538,7 @@ struct CastFunctor<O, I,
 
 struct DecimalToReal {
   template <typename RealType, typename Arg0Value>
-  RealType Call(KernelContext* ctx, const Arg0Value& val) const {
+  RealType Call(KernelContext*, const Arg0Value& val, Status*) const {
     return val.template ToReal<RealType>(in_scale_);
   }
 
@@ -544,7 +548,7 @@ struct DecimalToReal {
 template <typename O, typename I>
 struct CastFunctor<O, I,
                    enable_if_t<is_floating_type<O>::value && is_decimal_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& in_type = checked_cast<const I&>(*batch[0].type());
     const auto in_scale = in_type.scale();
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index 6f965a46676..3ce537b7223 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -48,11 +48,11 @@ struct NumericToStringCastFunctor {
   using BuilderType = typename TypeTraits<O>::BuilderType;
   using FormatterType = StringFormatter<I>;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK(out->is_array());
     const ArrayData& input = *batch[0].array();
     ArrayData* output = out->mutable_array();
-    ctx->SetStatus(Convert(ctx, input, output));
+    return Convert(ctx, input, output);
   }
 
   static Status Convert(KernelContext* ctx, const ArrayData& input, ArrayData* output) {
@@ -94,33 +94,35 @@ struct Utf8Validator {
 };
 
 template <typename I, typename O>
-void CastBinaryToBinaryOffsets(KernelContext* ctx, const ArrayData& input,
-                               ArrayData* output) {
+Status CastBinaryToBinaryOffsets(KernelContext* ctx, const ArrayData& input,
+                                 ArrayData* output) {
   static_assert(std::is_same<I, O>::value, "Cast same-width offsets (no-op)");
+  return Status::OK();
 }
 
 // Upcast offsets
 template <>
-void CastBinaryToBinaryOffsets<int32_t, int64_t>(KernelContext* ctx,
-                                                 const ArrayData& input,
-                                                 ArrayData* output) {
+Status CastBinaryToBinaryOffsets<int32_t, int64_t>(KernelContext* ctx,
+                                                   const ArrayData& input,
+                                                   ArrayData* output) {
   using input_offset_type = int32_t;
   using output_offset_type = int64_t;
-  KERNEL_ASSIGN_OR_RAISE(
-      output->buffers[1], ctx,
+  ARROW_ASSIGN_OR_RAISE(
+      output->buffers[1],
       ctx->Allocate((output->length + output->offset + 1) * sizeof(output_offset_type)));
   memset(output->buffers[1]->mutable_data(), 0,
          output->offset * sizeof(output_offset_type));
   ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1),
                               output->GetMutableValues<output_offset_type>(1),
                               output->length + 1);
+  return Status::OK();
 }
 
 // Downcast offsets
 template <>
-void CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
-                                                 const ArrayData& input,
-                                                 ArrayData* output) {
+Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
+                                                   const ArrayData& input,
+                                                   ArrayData* output) {
   using input_offset_type = int64_t;
   using output_offset_type = int32_t;
 
@@ -130,22 +132,23 @@ void CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
 
   // Binary offsets are ascending, so it's enough to check the last one for overflow.
   if (input_offsets[input.length] > kMaxOffset) {
-    ctx->SetStatus(Status::Invalid("Failed casting from ", input.type->ToString(), " to ",
-                                   output->type->ToString(), ": input array too large"));
+    return Status::Invalid("Failed casting from ", input.type->ToString(), " to ",
+                           output->type->ToString(), ": input array too large");
   } else {
-    KERNEL_ASSIGN_OR_RAISE(output->buffers[1], ctx,
-                           ctx->Allocate((output->length + output->offset + 1) *
-                                         sizeof(output_offset_type)));
+    ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+                          ctx->Allocate((output->length + output->offset + 1) *
+                                        sizeof(output_offset_type)));
     memset(output->buffers[1]->mutable_data(), 0,
            output->offset * sizeof(output_offset_type));
     ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1),
                                 output->GetMutableValues<output_offset_type>(1),
                                 output->length + 1);
+    return Status::OK();
   }
 }
 
 template <typename O, typename I>
-void BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DCHECK(out->is_array());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArrayData& input = *batch[0].array();
@@ -155,17 +158,12 @@ void BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* o
 
     ArrayDataVisitor<I> visitor;
     Utf8Validator validator;
-    Status st = visitor.Visit(input, &validator);
-    if (!st.ok()) {
-      ctx->SetStatus(st);
-      return;
-    }
+    RETURN_NOT_OK(visitor.Visit(input, &validator));
   }
 
   // Start with a zero-copy cast, but change indices to expected size
-  ZeroCopyCastExec(ctx, batch, out);
-
-  CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>(
+  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
+  return CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>(
       ctx, input, out->mutable_array());
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index d7d1faf7ae5..1a58fce7c74 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -39,8 +39,8 @@ constexpr int64_t kMillisecondsInDay = 86400000;
 // From one timestamp to another
 
 template <typename in_type, typename out_type>
-void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
-               const int64_t factor, const ArrayData& input, ArrayData* output) {
+Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
+                 const int64_t factor, const ArrayData& input, ArrayData* output) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   auto in_data = input.GetValues<in_type>(1);
   auto out_data = output->GetMutableValues<out_type>(1);
@@ -55,10 +55,10 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         out_data[i] = static_cast<out_type>(in_data[i] * factor);
       }
     } else {
-#define RAISE_OVERFLOW_CAST(VAL)                                                  \
-  ctx->SetStatus(Status::Invalid("Casting from ", input.type->ToString(), " to ", \
-                                 output->type->ToString(), " would result in ",   \
-                                 "out of bounds timestamp: ", VAL));
+#define RAISE_OVERFLOW_CAST(VAL)                                          \
+  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \
+                         output->type->ToString(), " would result in ",   \
+                         "out of bounds timestamp: ", VAL);
 
       int64_t max_val = std::numeric_limits<int64_t>::max() / factor;
       int64_t min_val = std::numeric_limits<int64_t>::min() / factor;
@@ -67,7 +67,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         for (int64_t i = 0; i < input.length; i++) {
           if (bit_reader.IsSet() && (in_data[i] < min_val || in_data[i] > max_val)) {
             RAISE_OVERFLOW_CAST(in_data[i]);
-            break;
           }
           out_data[i] = static_cast<out_type>(in_data[i] * factor);
           bit_reader.Next();
@@ -76,7 +75,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         for (int64_t i = 0; i < input.length; i++) {
           if (in_data[i] < min_val || in_data[i] > max_val) {
             RAISE_OVERFLOW_CAST(in_data[i]);
-            break;
           }
           out_data[i] = static_cast<out_type>(in_data[i] * factor);
         }
@@ -90,9 +88,9 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         out_data[i] = static_cast<out_type>(in_data[i] / factor);
       }
     } else {
-#define RAISE_INVALID_CAST(VAL)                                                   \
-  ctx->SetStatus(Status::Invalid("Casting from ", input.type->ToString(), " to ", \
-                                 output->type->ToString(), " would lose data: ", VAL));
+#define RAISE_INVALID_CAST(VAL)                                           \
+  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \
+                         output->type->ToString(), " would lose data: ", VAL);
 
       if (input.null_count != 0) {
         BitmapReader bit_reader(input.buffers[0]->data(), input.offset, input.length);
@@ -100,7 +98,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
           out_data[i] = static_cast<out_type>(in_data[i] / factor);
           if (bit_reader.IsSet() && (out_data[i] * factor != in_data[i])) {
             RAISE_INVALID_CAST(in_data[i]);
-            break;
           }
           bit_reader.Next();
         }
@@ -109,7 +106,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
           out_data[i] = static_cast<out_type>(in_data[i] / factor);
           if (out_data[i] * factor != in_data[i]) {
             RAISE_INVALID_CAST(in_data[i]);
-            break;
           }
         }
       }
@@ -117,6 +113,8 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
 #undef RAISE_INVALID_CAST
     }
   }
+
+  return Status::OK();
 }
 
 // <TimestampType, TimestampType> and <DurationType, DurationType>
@@ -125,7 +123,7 @@ struct CastFunctor<
     O, I,
     enable_if_t<(is_timestamp_type<O>::value && is_timestamp_type<I>::value) ||
                 (is_duration_type<O>::value && is_duration_type<I>::value)>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -139,13 +137,14 @@ struct CastFunctor<
     // lengths to make this zero copy in the future but we leave it for now
 
     auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
-    ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input, output);
+    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input,
+                                       output);
   }
 };
 
 template <>
 struct CastFunctor<Date32Type, TimestampType> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -161,13 +160,13 @@ struct CastFunctor<Date32Type, TimestampType> {
     };
 
     const int64_t factor = kTimestampToDateFactors[static_cast<int>(in_type.unit())];
-    ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, factor, input, output);
+    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, factor, input, output);
   }
 };
 
 template <>
 struct CastFunctor<Date64Type, TimestampType> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
@@ -176,10 +175,8 @@ struct CastFunctor<Date64Type, TimestampType> {
     const auto& in_type = checked_cast<const TimestampType&>(*input.type);
 
     auto conversion = util::GetTimestampConversion(in_type.unit(), TimeUnit::MILLI);
-    ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input, output);
-    if (!ctx->status().ok()) {
-      return;
-    }
+    RETURN_NOT_OK((ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
+                                               input, output)));
 
     // Ensure that intraday milliseconds have been zeroed out
     auto out_data = output->GetMutableValues<int64_t>(1);
@@ -191,9 +188,7 @@ struct CastFunctor<Date64Type, TimestampType> {
         const int64_t remainder = out_data[i] % kMillisecondsInDay;
         if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && bit_reader.IsSet() &&
                                 remainder > 0)) {
-          ctx->SetStatus(
-              Status::Invalid("Timestamp value had non-zero intraday milliseconds"));
-          break;
+          return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
         }
         out_data[i] -= remainder;
         bit_reader.Next();
@@ -202,13 +197,13 @@ struct CastFunctor<Date64Type, TimestampType> {
       for (int64_t i = 0; i < input.length; ++i) {
         const int64_t remainder = out_data[i] % kMillisecondsInDay;
         if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && remainder > 0)) {
-          ctx->SetStatus(
-              Status::Invalid("Timestamp value had non-zero intraday milliseconds"));
-          break;
+          return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
         }
         out_data[i] -= remainder;
       }
     }
+
+    return Status::OK();
   }
 };
 
@@ -220,7 +215,7 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
   using in_t = typename I::c_type;
   using out_t = typename O::c_type;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -231,7 +226,8 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
     const auto& out_type = checked_cast<const O&>(*output->type);
     DCHECK_NE(in_type.unit(), out_type.unit()) << "Do not cast equal types";
     auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
-    ShiftTime<in_t, out_t>(ctx, conversion.first, conversion.second, input, output);
+    return ShiftTime<in_t, out_t>(ctx, conversion.first, conversion.second, input,
+                                  output);
   }
 };
 
@@ -240,21 +236,21 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
 
 template <>
 struct CastFunctor<Date64Type, Date32Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
-    ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, kMillisecondsInDay,
-                                *batch[0].array(), out->mutable_array());
+    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, kMillisecondsInDay,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
 template <>
 struct CastFunctor<Date32Type, Date64Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
-    ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, kMillisecondsInDay, *batch[0].array(),
-                                out->mutable_array());
+    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, kMillisecondsInDay,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
@@ -263,7 +259,7 @@ struct CastFunctor<Date32Type, Date64Type> {
 
 template <>
 struct CastFunctor<TimestampType, Date32Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const auto& out_type = checked_cast<const TimestampType&>(*out->type());
@@ -273,22 +269,22 @@ struct CastFunctor<TimestampType, Date32Type> {
 
     // multiply to achieve days -> unit
     conversion.second *= kMillisecondsInDay / 1000;
-    ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, conversion.second, *batch[0].array(),
-                                out->mutable_array());
+    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, conversion.second,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
 template <>
 struct CastFunctor<TimestampType, Date64Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const auto& out_type = checked_cast<const TimestampType&>(*out->type());
 
     // date64 is ms since epoch
     auto conversion = util::GetTimestampConversion(TimeUnit::MILLI, out_type.unit());
-    ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
-                                *batch[0].array(), out->mutable_array());
+    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
@@ -297,11 +293,11 @@ struct CastFunctor<TimestampType, Date64Type> {
 
 struct ParseTimestamp {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status* st) const {
     OutValue result = 0;
     if (ARROW_PREDICT_FALSE(!ParseValue(type, val.data(), val.size(), &result))) {
-      ctx->SetStatus(Status::Invalid("Failed to parse string: '", val,
-                                     "' as a scalar of type ", type.ToString()));
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            type.ToString());
     }
     return result;
   }
@@ -311,7 +307,7 @@ struct ParseTimestamp {
 
 template <typename I>
 struct CastFunctor<TimestampType, I, enable_if_t<is_base_binary_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& out_type = checked_cast<const TimestampType&>(*out->type());
     applicator::ScalarUnaryNotNullStateful<TimestampType, I, ParseTimestamp> kernel(
         ParseTimestamp{out_type});
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 10e5ed26e5d..9f537fecf55 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -113,7 +113,7 @@ static void CheckCastZeroCopy(std::shared_ptr<Array> input,
                               std::shared_ptr<DataType> to_type,
                               CastOptions options = CastOptions::Safe()) {
   ASSERT_OK_AND_ASSIGN(auto converted, Cast(*input, to_type, options));
-  ASSERT_OK(converted->ValidateFull());
+  ValidateOutput(*converted);
 
   ASSERT_EQ(input->data()->buffers.size(), converted->data()->buffers.size());
   for (size_t i = 0; i < input->data()->buffers.size(); ++i) {
@@ -1583,7 +1583,7 @@ TEST(Cast, BinaryOrStringToBinary) {
 
       // invalid utf-8 is not an error for binary
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
-      ASSERT_OK(strings->ValidateFull());
+      ValidateOutput(*strings);
       AssertBinaryZeroCopy(invalid_utf8, strings);
 
       // invalid utf-8 masked by a null bit is not an error
@@ -1676,42 +1676,51 @@ TEST(Cast, ListToPrimitive) {
       Cast(*ArrayFromJSON(list(binary()), R"([["1", "2"], ["3", "4"]])"), utf8()));
 }
 
-TEST(Cast, ListToList) {
-  using make_list_t = std::shared_ptr<DataType>(const std::shared_ptr<DataType>&);
-  for (auto make_list : std::vector<make_list_t*>{&list, &large_list}) {
-    auto list_int32 =
-        ArrayFromJSON(make_list(int32()),
-                      "[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]")
-            ->data();
-
-    auto list_int64 = list_int32->Copy();
-    list_int64->type = make_list(int64());
-    list_int64->child_data[0] = Cast(list_int32->child_data[0], int64())->array();
-    ASSERT_OK(MakeArray(list_int64)->ValidateFull());
-
-    auto list_float32 = list_int32->Copy();
-    list_float32->type = make_list(float32());
-    list_float32->child_data[0] = Cast(list_int32->child_data[0], float32())->array();
-    ASSERT_OK(MakeArray(list_float32)->ValidateFull());
+using make_list_t = std::shared_ptr<DataType>(const std::shared_ptr<DataType>&);
+
+static const auto list_factories = std::vector<make_list_t*>{&list, &large_list};
+
+static void CheckListToList(const std::vector<std::shared_ptr<DataType>>& value_types,
+                            const std::string& json_data) {
+  for (auto make_src_list : list_factories) {
+    for (auto make_dest_list : list_factories) {
+      for (const auto& src_value_type : value_types) {
+        for (const auto& dest_value_type : value_types) {
+          const auto src_type = make_src_list(src_value_type);
+          const auto dest_type = make_dest_list(dest_value_type);
+          ARROW_SCOPED_TRACE("src_type = ", src_type->ToString(),
+                             ", dest_type = ", dest_type->ToString());
+          CheckCast(ArrayFromJSON(src_type, json_data),
+                    ArrayFromJSON(dest_type, json_data));
+        }
+      }
+    }
+  }
+}
 
-    CheckCast(MakeArray(list_int32), MakeArray(list_float32));
-    CheckCast(MakeArray(list_float32), MakeArray(list_int64));
-    CheckCast(MakeArray(list_int64), MakeArray(list_float32));
+TEST(Cast, ListToList) {
+  CheckListToList({int32(), float32(), int64()},
+                  "[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]");
+}
 
-    CheckCast(MakeArray(list_int32), MakeArray(list_int64));
-    CheckCast(MakeArray(list_float32), MakeArray(list_int32));
-    CheckCast(MakeArray(list_int64), MakeArray(list_int32));
-  }
+TEST(Cast, ListToListNoNulls) {
+  // ARROW-12568
+  CheckListToList({int32(), float32(), int64()},
+                  "[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]");
 }
 
 TEST(Cast, ListToListOptionsPassthru) {
-  auto list_int32 = ArrayFromJSON(list(int32()), "[[87654321]]");
+  for (auto make_src_list : list_factories) {
+    for (auto make_dest_list : list_factories) {
+      auto list_int32 = ArrayFromJSON(make_src_list(int32()), "[[87654321]]");
 
-  auto options = CastOptions::Safe(list(int16()));
-  CheckCastFails(list_int32, options);
+      auto options = CastOptions::Safe(make_dest_list(int16()));
+      CheckCastFails(list_int32, options);
 
-  options.allow_int_overflow = true;
-  CheckCast(list_int32, ArrayFromJSON(list(int16()), "[[32689]]"), options);
+      options.allow_int_overflow = true;
+      CheckCast(list_int32, ArrayFromJSON(make_dest_list(int16()), "[[32689]]"), options);
+    }
+  }
 }
 
 TEST(Cast, IdentityCasts) {
@@ -1755,6 +1764,20 @@ TEST(Cast, EmptyCasts) {
   }
 }
 
+TEST(Cast, CastWithNoValidityBitmapButUnknownNullCount) {
+  // ARROW-12672 segfault when casting slightly malformed array
+  // (no validity bitmap but atomic null count non-zero)
+  auto values = ArrayFromJSON(boolean(), "[true, true, false]");
+
+  ASSERT_OK_AND_ASSIGN(auto expected, Cast(*values, int8()));
+
+  ASSERT_EQ(values->data()->buffers[0], NULLPTR);
+  values->data()->null_count = kUnknownNullCount;
+  ASSERT_OK_AND_ASSIGN(auto result, Cast(*values, int8()));
+
+  AssertArraysEqual(*expected, *result);
+}
+
 // ----------------------------------------------------------------------
 // Test casting from NullType
 
@@ -1782,6 +1805,14 @@ TEST(Cast, FromNull) {
   }
 }
 
+TEST(Cast, FromNullToDictionary) {
+  auto from = std::make_shared<NullArray>(10);
+  auto to_type = dictionary(int8(), boolean());
+
+  ASSERT_OK_AND_ASSIGN(auto expected, MakeArrayOfNull(to_type, 10));
+  CheckCast(from, expected);
+}
+
 // ----------------------------------------------------------------------
 // Test casting from DictionaryType
 
@@ -1825,7 +1856,7 @@ TEST(Cast, FromDictionary) {
     data->buffers[0] = nullptr;
     data->null_count = 0;
     std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
-    ASSERT_OK(dict_array->ValidateFull());
+    ValidateOutput(*dict_array);
 
     CheckCast(dict_array, no_nulls);
   }
@@ -1875,5 +1906,39 @@ TEST(Cast, ExtensionTypeToIntDowncast) {
   }
 }
 
+TEST(Cast, DictTypeToAnotherDict) {
+  auto check_cast = [&](const std::shared_ptr<DataType>& in_type,
+                        const std::shared_ptr<DataType>& out_type,
+                        const std::string& json_str,
+                        const CastOptions& options = CastOptions()) {
+    auto arr = ArrayFromJSON(in_type, json_str);
+    auto exp = in_type->Equals(out_type) ? arr : ArrayFromJSON(out_type, json_str);
+    // this checks for scalars as well
+    CheckCast(arr, exp, options);
+  };
+
+  //    check same type passed on to casting
+  check_cast(dictionary(int8(), int16()), dictionary(int8(), int16()),
+             "[1, 2, 3, 1, null, 3]");
+  check_cast(dictionary(int8(), int16()), dictionary(int32(), int64()),
+             "[1, 2, 3, 1, null, 3]");
+  check_cast(dictionary(int8(), int16()), dictionary(int32(), float64()),
+             "[1, 2, 3, 1, null, 3]");
+  check_cast(dictionary(int32(), utf8()), dictionary(int8(), utf8()),
+             R"(["a", "b", "a", null])");
+
+  auto arr = ArrayFromJSON(dictionary(int32(), int32()), "[1, 1000]");
+  // check casting unsafe values (checking for unsafe indices is unnecessary, because it
+  // would create an invalid index array which results in a ValidateOutput failure)
+  ASSERT_OK_AND_ASSIGN(auto casted,
+                       Cast(arr, dictionary(int8(), int8()), CastOptions::Unsafe()));
+  ValidateOutput(casted);
+
+  // check safe casting values
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Integer value 1000 not in range"),
+      Cast(arr, dictionary(int8(), int8()), CastOptions::Safe()));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 58d3e6fc781..4342d776c38 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -15,7 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cmath>
+#include <limits>
+
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/util/bitmap_ops.h"
 
 namespace arrow {
 
@@ -29,33 +34,110 @@ namespace internal {
 namespace {
 
 struct Equal {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left == right;
   }
 };
 
 struct NotEqual {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left != right;
   }
 };
 
 struct Greater {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left > right;
   }
 };
 
 struct GreaterEqual {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left >= right;
   }
 };
 
+template <typename T>
+using is_unsigned_integer = std::integral_constant<bool, std::is_integral<T>::value &&
+                                                             std::is_unsigned<T>::value>;
+
+template <typename T>
+using is_signed_integer =
+    std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;
+
+template <typename T>
+using enable_if_integer =
+    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, T>;
+
+template <typename T>
+using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;
+
+struct Minimum {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::fmin(left, right);
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::min(left, right);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() {
+    return std::nanf("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() {
+    return std::nan("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_integer<T> antiextreme() {
+    return std::numeric_limits<T>::max();
+  }
+};
+
+struct Maximum {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::fmax(left, right);
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::max(left, right);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() {
+    return std::nanf("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() {
+    return std::nan("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_integer<T> antiextreme() {
+    return std::numeric_limits<T>::min();
+  }
+};
+
 // Implement Less, LessEqual by flipping arguments to Greater, GreaterEqual
 
 template <typename Op>
@@ -97,6 +179,28 @@ struct CompareFunction : ScalarFunction {
   }
 };
 
+struct VarArgsCompareFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    } else if (auto type = CommonTimestamp(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
 template <typename Op>
 std::shared_ptr<ScalarFunction> MakeCompareFunction(std::string name,
                                                     const FunctionDoc* doc) {
@@ -170,6 +274,182 @@ std::shared_ptr<ScalarFunction> MakeFlippedFunction(std::string name,
   return flipped_func;
 }
 
+using MinMaxState = OptionsWrapper<ElementWiseAggregateOptions>;
+
+// Implement a variadic scalar min/max kernel.
+template <typename OutType, typename Op>
+struct ScalarMinMax {
+  using OutValue = typename GetOutputType<OutType>::T;
+
+  static void ExecScalar(const ExecBatch& batch,
+                         const ElementWiseAggregateOptions& options, Scalar* out) {
+    // All arguments are scalar
+    OutValue value{};
+    bool valid = false;
+    for (const auto& arg : batch.values) {
+      // Ignore non-scalar arguments so we can use it in the mixed-scalar-and-array case
+      if (!arg.is_scalar()) continue;
+      const auto& scalar = *arg.scalar();
+      if (!scalar.is_valid) {
+        if (options.skip_nulls) continue;
+        out->is_valid = false;
+        return;
+      }
+      if (!valid) {
+        value = UnboxScalar<OutType>::Unbox(scalar);
+        valid = true;
+      } else {
+        value = Op::template Call<OutValue, OutValue, OutValue>(
+            value, UnboxScalar<OutType>::Unbox(scalar));
+      }
+    }
+    out->is_valid = valid;
+    if (valid) {
+      BoxScalar<OutType>::Box(value, out);
+    }
+  }
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
+    const auto descrs = batch.GetDescriptors();
+    const size_t scalar_count =
+        static_cast<size_t>(std::count_if(batch.values.begin(), batch.values.end(),
+                                          [](const Datum& d) { return d.is_scalar(); }));
+    if (scalar_count == batch.values.size()) {
+      ExecScalar(batch, options, out->scalar().get());
+      return Status::OK();
+    }
+
+    ArrayData* output = out->mutable_array();
+
+    // At least one array, two or more arguments
+    ArrayDataVector arrays;
+    for (const auto& arg : batch.values) {
+      if (!arg.is_array()) continue;
+      arrays.push_back(arg.array());
+    }
+
+    bool initialize_output = true;
+    if (scalar_count > 0) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> temp_scalar,
+                            MakeScalar(out->type(), 0));
+      ExecScalar(batch, options, temp_scalar.get());
+      if (temp_scalar->is_valid) {
+        const auto value = UnboxScalar<OutType>::Unbox(*temp_scalar);
+        initialize_output = false;
+        OutValue* out = output->GetMutableValues<OutValue>(1);
+        std::fill(out, out + batch.length, value);
+      } else if (!options.skip_nulls) {
+        // Abort early
+        ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
+                                                              ctx->memory_pool()));
+        *output = *array->data();
+        return Status::OK();
+      }
+    }
+
+    if (initialize_output) {
+      OutValue* out = output->GetMutableValues<OutValue>(1);
+      std::fill(out, out + batch.length, Op::template antiextreme<OutValue>());
+    }
+
+    // Precompute the validity buffer
+    if (options.skip_nulls && initialize_output) {
+      // OR together the validity buffers of all arrays
+      if (std::all_of(arrays.begin(), arrays.end(),
+                      [](const std::shared_ptr<ArrayData>& arr) {
+                        return arr->MayHaveNulls();
+                      })) {
+        for (const auto& arr : arrays) {
+          if (!arr->MayHaveNulls()) continue;
+          if (!output->buffers[0]) {
+            ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+            ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
+
+                                          batch.length,
+                                          output->buffers[0]->mutable_data(),
+                                          /*dest_offset=*/0);
+          } else {
+            ::arrow::internal::BitmapOr(
+                output->buffers[0]->data(), /*left_offset=*/0, arr->buffers[0]->data(),
+                arr->offset, batch.length,
+                /*out_offset=*/0, output->buffers[0]->mutable_data());
+          }
+        }
+      }
+    } else if (!options.skip_nulls) {
+      // AND together the validity buffers of all arrays
+      for (const auto& arr : arrays) {
+        if (!arr->MayHaveNulls()) continue;
+        if (!output->buffers[0]) {
+          ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+          ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
+                                        batch.length, output->buffers[0]->mutable_data(),
+                                        /*dest_offset=*/0);
+        } else {
+          ::arrow::internal::BitmapAnd(output->buffers[0]->data(), /*left_offset=*/0,
+                                       arr->buffers[0]->data(), arr->offset, batch.length,
+                                       /*out_offset=*/0,
+                                       output->buffers[0]->mutable_data());
+        }
+      }
+    }
+
+    for (const auto& array : arrays) {
+      OutputArrayWriter<OutType> writer(out->mutable_array());
+      ArrayIterator<OutType> out_it(*output);
+      int64_t index = 0;
+      VisitArrayValuesInline<OutType>(
+          *array,
+          [&](OutValue value) {
+            auto u = out_it();
+            if (!output->buffers[0] ||
+                BitUtil::GetBit(output->buffers[0]->data(), index)) {
+              writer.Write(Op::template Call<OutValue, OutValue, OutValue>(u, value));
+            } else {
+              writer.Write(value);
+            }
+            index++;
+          },
+          [&]() {
+            // RHS is null, preserve the LHS
+            writer.values++;
+            index++;
+            out_it();
+          });
+    }
+    output->null_count = output->buffers[0] ? -1 : 0;
+    return Status::OK();
+  }
+};
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarMinMax(std::string name,
+                                                 const FunctionDoc* doc) {
+  static auto default_element_wise_aggregate_options =
+      ElementWiseAggregateOptions::Defaults();
+
+  auto func = std::make_shared<VarArgsCompareFunction>(
+      name, Arity::VarArgs(), doc, &default_element_wise_aggregate_options);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
+    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+                        MinMaxState::Init};
+    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  for (const auto& ty : TemporalTypes()) {
+    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
+    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+                        MinMaxState::Init};
+    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  return func;
+}
+
 const FunctionDoc equal_doc{"Compare values for equality (x == y)",
                             ("A null on either side emits a null comparison result."),
                             {"x", "y"}};
@@ -196,6 +476,19 @@ const FunctionDoc less_equal_doc{
     ("A null on either side emits a null comparison result."),
     {"x", "y"}};
 
+const FunctionDoc min_element_wise_doc{
+    "Find the element-wise minimum value",
+    ("Nulls will be ignored (default) or propagated. "
+     "NaN will be taken over null, but not over any valid float."),
+    {"*args"},
+    "ElementWiseAggregateOptions"};
+
+const FunctionDoc max_element_wise_doc{
+    "Find the element-wise maximum value",
+    ("Nulls will be ignored (default) or propagated. "
+     "NaN will be taken over null, but not over any valid float."),
+    {"*args"},
+    "ElementWiseAggregateOptions"};
 }  // namespace
 
 void RegisterScalarComparison(FunctionRegistry* registry) {
@@ -213,6 +506,17 @@ void RegisterScalarComparison(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(less_equal)));
   DCHECK_OK(registry->AddFunction(std::move(greater)));
   DCHECK_OK(registry->AddFunction(std::move(greater_equal)));
+
+  // ----------------------------------------------------------------------
+  // Variadic element-wise functions
+
+  auto min_element_wise =
+      MakeScalarMinMax<Minimum>("min_element_wise", &min_element_wise_doc);
+  DCHECK_OK(registry->AddFunction(std::move(min_element_wise)));
+
+  auto max_element_wise =
+      MakeScalarMinMax<Maximum>("max_element_wise", &max_element_wise_doc);
+  DCHECK_OK(registry->AddFunction(std::move(max_element_wise)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
index ce18365fb5d..86be319a345 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
@@ -38,7 +38,8 @@ static void CompareArrayScalar(benchmark::State& state) {
   auto array = rand.ArrayOf(ty, args.size, args.null_proportion);
   auto scalar = *rand.ArrayOf(ty, 1, 0)->GetScalar(0);
   for (auto _ : state) {
-    ABORT_NOT_OK(Compare(array, Datum(scalar), CompareOptions(op)).status());
+    ABORT_NOT_OK(
+        CallFunction(CompareOperatorToFunctionName(op), {array, Datum(scalar)}).status());
   }
 }
 
@@ -50,7 +51,7 @@ static void CompareArrayArray(benchmark::State& state) {
   auto lhs = rand.ArrayOf(ty, args.size, args.null_proportion);
   auto rhs = rand.ArrayOf(ty, args.size, args.null_proportion);
   for (auto _ : state) {
-    ABORT_NOT_OK(Compare(lhs, rhs, CompareOptions(op)).status());
+    ABORT_NOT_OK(CallFunction(CompareOperatorToFunctionName(op), {lhs, rhs}).status());
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 7b0906395d7..37680945a3e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -46,7 +46,8 @@ using util::string_view;
 template <typename ArrowType>
 static void ValidateCompare(CompareOptions options, const Datum& lhs, const Datum& rhs,
                             const Datum& expected) {
-  ASSERT_OK_AND_ASSIGN(Datum result, Compare(lhs, rhs, options));
+  ASSERT_OK_AND_ASSIGN(
+      Datum result, CallFunction(CompareOperatorToFunctionName(options.op), {lhs, rhs}));
   AssertArraysEqual(*expected.make_array(), *result.make_array(),
                     /*verbose=*/true);
 }
@@ -430,7 +431,8 @@ TEST(TestCompareTimestamps, Basics) {
     auto lhs = ArrayFromJSON(type, example1_json);
     auto rhs = ArrayFromJSON(type, example2_json);
     auto expected = ArrayFromJSON(boolean(), expected_json);
-    ASSERT_OK_AND_ASSIGN(Datum result, Compare(lhs, rhs, CompareOptions(op)));
+    ASSERT_OK_AND_ASSIGN(Datum result,
+                         CallFunction(CompareOperatorToFunctionName(op), {lhs, rhs}));
     AssertArraysEqual(*expected, *result.make_array(), /*verbose=*/true);
   };
 
@@ -652,5 +654,350 @@ TEST_F(TestStringCompareKernel, RandomCompareArrayArray) {
   }
 }
 
+template <typename T>
+class TestVarArgsCompare : public TestBase {
+ protected:
+  static std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<T>::type_singleton();
+  }
+
+  using VarArgsFunction = std::function<Result<Datum>(
+      const std::vector<Datum>&, ElementWiseAggregateOptions, ExecContext*)>;
+
+  void SetUp() override { equal_options_ = equal_options_.nans_equal(true); }
+
+  Datum scalar(const std::string& value) {
+    return ScalarFromJSON(type_singleton(), value);
+  }
+
+  Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
+
+  Datum Eval(VarArgsFunction func, const std::vector<Datum>& args) {
+    EXPECT_OK_AND_ASSIGN(auto actual,
+                         func(args, element_wise_aggregate_options_, nullptr));
+    ValidateOutput(actual);
+    return actual;
+  }
+
+  void AssertNullScalar(VarArgsFunction func, const std::vector<Datum>& args) {
+    auto datum = this->Eval(func, args);
+    ASSERT_TRUE(datum.is_scalar());
+    ASSERT_FALSE(datum.scalar()->is_valid);
+  }
+
+  void Assert(VarArgsFunction func, Datum expected, const std::vector<Datum>& args) {
+    auto actual = Eval(func, args);
+    AssertDatumsApproxEqual(expected, actual, /*verbose=*/true, equal_options_);
+  }
+
+  EqualOptions equal_options_ = EqualOptions::Defaults();
+  ElementWiseAggregateOptions element_wise_aggregate_options_;
+};
+
+template <typename T>
+class TestVarArgsCompareNumeric : public TestVarArgsCompare<T> {};
+
+template <typename T>
+class TestVarArgsCompareFloating : public TestVarArgsCompare<T> {};
+
+template <typename T>
+class TestVarArgsCompareParametricTemporal : public TestVarArgsCompare<T> {
+ protected:
+  static std::shared_ptr<DataType> type_singleton() {
+    // Time32 requires second/milli, Time64 requires nano/micro
+    if (TypeTraits<T>::bytes_required(1) == 4) {
+      return std::make_shared<T>(TimeUnit::type::SECOND);
+    } else {
+      return std::make_shared<T>(TimeUnit::type::NANO);
+    }
+  }
+
+  Datum scalar(const std::string& value) {
+    return ScalarFromJSON(type_singleton(), value);
+  }
+
+  Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
+};
+
+using NumericBasedTypes =
+    ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
+                     Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type>;
+using ParametricTemporalTypes = ::testing::Types<TimestampType, Time32Type, Time64Type>;
+
+TYPED_TEST_SUITE(TestVarArgsCompareNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareFloating, RealArrowTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareParametricTemporal, ParametricTemporalTypes);
+
+TYPED_TEST(TestVarArgsCompareNumeric, MinElementWise) {
+  this->AssertNullScalar(MinElementWise, {});
+  this->AssertNullScalar(MinElementWise, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(MinElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MinElementWise, this->scalar("0"),
+               {this->scalar("2"), this->scalar("0"), this->scalar("1")});
+  this->Assert(
+      MinElementWise, this->scalar("0"),
+      {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
+  this->Assert(MinElementWise, this->scalar("1"),
+               {this->scalar("null"), this->scalar("null"), this->scalar("1"),
+                this->scalar("null")});
+
+  this->Assert(MinElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->scalar("2")});
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2")});
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
+
+  this->Assert(MinElementWise, this->array("[1, 2, null, 6]"),
+               {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
+  this->Assert(MinElementWise, this->array("[1, 2, null, 6]"),
+               {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 3, 4]"),
+               {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
+
+  this->Assert(MinElementWise, this->array("[1, 1, 1, 1]"),
+               {this->scalar("1"), this->array("[1, 2, 3, 4]")});
+  this->Assert(MinElementWise, this->array("[1, 1, 1, 1]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(MinElementWise, this->array("[1, 1, 1, 1]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[null, null, null, null]")});
+
+  // Test null handling
+  this->element_wise_aggregate_options_.skip_nulls = false;
+  this->AssertNullScalar(MinElementWise, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(MinElementWise, {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(MinElementWise, this->array("[1, null, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+  this->Assert(MinElementWise, this->array("[1, null, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+}
+
+TYPED_TEST(TestVarArgsCompareFloating, MinElementWise) {
+  auto Check = [this](const std::string& expected,
+                      const std::vector<std::string>& inputs) {
+    std::vector<Datum> args;
+    for (const auto& input : inputs) {
+      args.emplace_back(this->scalar(input));
+    }
+    this->Assert(MinElementWise, this->scalar(expected), args);
+
+    args.clear();
+    for (const auto& input : inputs) {
+      args.emplace_back(this->array("[" + input + "]"));
+    }
+    this->Assert(MinElementWise, this->array("[" + expected + "]"), args);
+  };
+  Check("-0.0", {"0.0", "-0.0"});
+  Check("-0.0", {"1.0", "-0.0", "0.0"});
+  Check("-1.0", {"-1.0", "-0.0"});
+  Check("0", {"0", "NaN"});
+  Check("0", {"NaN", "0"});
+  Check("Inf", {"Inf", "NaN"});
+  Check("Inf", {"NaN", "Inf"});
+  Check("-Inf", {"-Inf", "NaN"});
+  Check("-Inf", {"NaN", "-Inf"});
+  Check("NaN", {"NaN", "null"});
+  Check("0", {"0", "Inf"});
+  Check("-Inf", {"0", "-Inf"});
+}
+
+TYPED_TEST(TestVarArgsCompareParametricTemporal, MinElementWise) {
+  // Temporal kernel is implemented with numeric kernel underneath
+  this->AssertNullScalar(MinElementWise, {});
+  this->AssertNullScalar(MinElementWise, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(MinElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MinElementWise, this->scalar("0"), {this->scalar("2"), this->scalar("0")});
+  this->Assert(MinElementWise, this->scalar("0"),
+               {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(MinElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(MinElementWise, this->array("[1, 2, 3, 2]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
+}
+
+TYPED_TEST(TestVarArgsCompareNumeric, MaxElementWise) {
+  this->AssertNullScalar(MaxElementWise, {});
+  this->AssertNullScalar(MaxElementWise, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(MaxElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MaxElementWise, this->scalar("2"),
+               {this->scalar("2"), this->scalar("0"), this->scalar("1")});
+  this->Assert(
+      MaxElementWise, this->scalar("2"),
+      {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
+  this->Assert(MaxElementWise, this->scalar("1"),
+               {this->scalar("null"), this->scalar("null"), this->scalar("1"),
+                this->scalar("null")});
+
+  this->Assert(MaxElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->scalar("2")});
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2")});
+  this->Assert(MaxElementWise, this->array("[4, 4, 4, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
+
+  this->Assert(MaxElementWise, this->array("[4, 2, null, 6]"),
+               {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
+  this->Assert(MaxElementWise, this->array("[4, 2, null, 6]"),
+               {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, 4]"),
+               {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
+
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, 4]"),
+               {this->scalar("1"), this->array("[1, 2, 3, 4]")});
+  this->Assert(MaxElementWise, this->array("[1, 1, 1, 1]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(MaxElementWise, this->array("[1, 1, 1, 1]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[null, null, null, null]")});
+
+  // Test null handling
+  this->element_wise_aggregate_options_.skip_nulls = false;
+  this->AssertNullScalar(MaxElementWise, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(MaxElementWise, {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(MaxElementWise, this->array("[4, null, 4, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+  this->Assert(MaxElementWise, this->array("[2, null, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+}
+
+TYPED_TEST(TestVarArgsCompareFloating, MaxElementWise) {
+  auto Check = [this](const std::string& expected,
+                      const std::vector<std::string>& inputs) {
+    std::vector<Datum> args;
+    for (const auto& input : inputs) {
+      args.emplace_back(this->scalar(input));
+    }
+    this->Assert(MaxElementWise, this->scalar(expected), args);
+
+    args.clear();
+    for (const auto& input : inputs) {
+      args.emplace_back(this->array("[" + input + "]"));
+    }
+    this->Assert(MaxElementWise, this->array("[" + expected + "]"), args);
+  };
+  Check("0.0", {"0.0", "-0.0"});
+  Check("1.0", {"1.0", "-0.0", "0.0"});
+  Check("-0.0", {"-1.0", "-0.0"});
+  Check("0", {"0", "NaN"});
+  Check("0", {"NaN", "0"});
+  Check("Inf", {"Inf", "NaN"});
+  Check("Inf", {"NaN", "Inf"});
+  Check("-Inf", {"-Inf", "NaN"});
+  Check("-Inf", {"NaN", "-Inf"});
+  Check("NaN", {"NaN", "null"});
+  Check("Inf", {"0", "Inf"});
+  Check("0", {"0", "-Inf"});
+}
+
+TYPED_TEST(TestVarArgsCompareParametricTemporal, MaxElementWise) {
+  // Temporal kernel is implemented with numeric kernel underneath
+  this->AssertNullScalar(MaxElementWise, {});
+  this->AssertNullScalar(MaxElementWise, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(MaxElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MaxElementWise, this->scalar("2"), {this->scalar("2"), this->scalar("0")});
+  this->Assert(MaxElementWise, this->scalar("0"),
+               {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(MaxElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
+}
+
+TEST(TestMaxElementWiseMinElementWise, CommonTimestamp) {
+  {
+    auto t1 = std::make_shared<TimestampType>(TimeUnit::SECOND);
+    auto t2 = std::make_shared<TimestampType>(TimeUnit::MILLI);
+    auto expected = MakeScalar(t2, 1000).ValueOrDie();
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         MinElementWise({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                                         Datum(MakeScalar(t2, 12000).ValueOrDie())}));
+    AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+  {
+    auto t1 = std::make_shared<Date32Type>();
+    auto t2 = std::make_shared<TimestampType>(TimeUnit::SECOND);
+    auto expected = MakeScalar(t2, 86401).ValueOrDie();
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         MaxElementWise({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                                         Datum(MakeScalar(t2, 86401).ValueOrDie())}));
+    AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+  {
+    auto t1 = std::make_shared<Date32Type>();
+    auto t2 = std::make_shared<Date64Type>();
+    auto t3 = std::make_shared<TimestampType>(TimeUnit::SECOND);
+    auto expected = MakeScalar(t3, 86400).ValueOrDie();
+    ASSERT_OK_AND_ASSIGN(
+        auto actual, MinElementWise({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                                     Datum(MakeScalar(t2, 2 * 86400000).ValueOrDie())}));
+    AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
index 9624f88e68f..cf22b0de3dc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
@@ -43,7 +43,7 @@ template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
   using T = typename TypeTraits<Type>::CType;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& data = *batch[0].array();
     const Scalar& fill_value = *batch[1].scalar();
     ArrayData* output = out->mutable_array();
@@ -54,8 +54,8 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
 
     T value = UnboxScalar<Type>::Unbox(fill_value);
     if (data.MayHaveNulls() != 0 && fill_value.is_valid) {
-      KERNEL_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf, ctx,
-                             ctx->Allocate(data.length * sizeof(T)));
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                            ctx->Allocate(data.length * sizeof(T)));
 
       const uint8_t* is_valid = data.buffers[0]->data();
       const T* in_values = data.GetValues<T>(1);
@@ -80,9 +80,12 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
         in_values += block.length;
       }
       output->buffers[1] = out_buf;
+      output->null_count = 0;
     } else {
       *output = data;
     }
+
+    return Status::OK();
   }
 };
 
@@ -90,15 +93,15 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
 
 template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& data = *batch[0].array();
     const Scalar& fill_value = *batch[1].scalar();
     ArrayData* output = out->mutable_array();
 
     bool value = UnboxScalar<BooleanType>::Unbox(fill_value);
     if (data.MayHaveNulls() != 0 && fill_value.is_valid) {
-      KERNEL_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf, ctx,
-                             ctx->AllocateBitmap(data.length));
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                            ctx->AllocateBitmap(data.length));
 
       const uint8_t* is_valid = data.buffers[0]->data();
       const uint8_t* data_bitmap = data.buffers[1]->data();
@@ -129,9 +132,12 @@ struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
         out_offset += block.length;
       }
       output->buffers[1] = out_buf;
+      output->null_count = 0;
     } else {
       *output = data;
     }
+
+    return Status::OK();
   }
 };
 
@@ -139,9 +145,10 @@ struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
 
 template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_null_type<Type>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Nothing preallocated, so we assign into the output
     *out->mutable_array() = *batch[0].array();
+    return Status::OK();
   }
 };
 
@@ -151,11 +158,10 @@ template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
   using BuilderType = typename TypeTraits<Type>::BuilderType;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& input = *batch[0].array();
     const auto& fill_value_scalar =
         checked_cast<const BaseBinaryScalar&>(*batch[1].scalar());
-    util::string_view fill_value(*fill_value_scalar.value);
     ArrayData* output = out->mutable_array();
 
     // Ensure the kernel is configured properly to have no validity bitmap /
@@ -165,23 +171,17 @@ struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
     const int64_t null_count = input.GetNullCount();
 
     if (null_count > 0 && fill_value_scalar.is_valid) {
+      util::string_view fill_value(*fill_value_scalar.value);
       BuilderType builder(input.type, ctx->memory_pool());
-      KERNEL_RETURN_IF_ERROR(ctx, builder.ReserveData(input.buffers[2]->size() +
-                                                      fill_value.length() * null_count));
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Resize(input.length));
-
-      KERNEL_RETURN_IF_ERROR(ctx, VisitArrayDataInline<Type>(
-                                      input,
-                                      [&](util::string_view s) {
-                                        builder.UnsafeAppend(s);
-                                        return Status::OK();
-                                      },
-                                      [&]() {
-                                        builder.UnsafeAppend(fill_value);
-                                        return Status::OK();
-                                      }));
+      RETURN_NOT_OK(builder.ReserveData(input.buffers[2]->size() +
+                                        fill_value.length() * null_count));
+      RETURN_NOT_OK(builder.Resize(input.length));
+
+      VisitArrayDataInline<Type>(
+          input, [&](util::string_view s) { builder.UnsafeAppend(s); },
+          [&]() { builder.UnsafeAppend(fill_value); });
       std::shared_ptr<Array> string_array;
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Finish(&string_array));
+      RETURN_NOT_OK(builder.Finish(&string_array));
       *output = *string_array->data();
       // The builder does not match the logical type, due to
       // GenerateTypeAgnosticVarBinaryBase
@@ -189,6 +189,8 @@ struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
     } else {
       *output = input;
     }
+
+    return Status::OK();
   }
 };
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc b/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
index de1e0802343..70ce4d5ca7b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
@@ -22,6 +22,7 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/compute/api.h"
+#include "arrow/compute/kernels/test_util.h"
 #include "arrow/result.h"
 #include "arrow/scalar.h"
 #include "arrow/testing/gtest_compat.h"
@@ -33,12 +34,17 @@
 namespace arrow {
 namespace compute {
 
-void CheckFillNull(const Array& input, const Datum& fill_value, const Array& expected) {
+void CheckFillNull(const Array& input, const Datum& fill_value, const Array& expected,
+                   bool all_valid = true) {
   auto Check = [&](const Array& input, const Array& expected) {
     ASSERT_OK_AND_ASSIGN(Datum datum_out, FillNull(input, fill_value));
     std::shared_ptr<Array> result = datum_out.make_array();
-    ASSERT_OK(result->ValidateFull());
+    ValidateOutput(*result);
     AssertArraysEqual(expected, *result, /*verbose=*/true);
+    if (all_valid) {
+      // Check null count of ArrayData is set, not the computed Array.null_count
+      ASSERT_EQ(result->data()->null_count, 0);
+    }
   };
 
   Check(input, expected);
@@ -48,10 +54,11 @@ void CheckFillNull(const Array& input, const Datum& fill_value, const Array& exp
 }
 
 void CheckFillNull(const std::shared_ptr<DataType>& type, const std::string& in_values,
-                   const Datum& fill_value, const std::string& out_values) {
+                   const Datum& fill_value, const std::string& out_values,
+                   bool all_valid = true) {
   std::shared_ptr<Array> input = ArrayFromJSON(type, in_values);
   std::shared_ptr<Array> expected = ArrayFromJSON(type, out_values);
-  CheckFillNull(*input, fill_value, *expected);
+  CheckFillNull(*input, fill_value, *expected, all_valid);
 }
 
 class TestFillNullKernel : public ::testing::Test {};
@@ -67,7 +74,8 @@ typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
 TEST_F(TestFillNullKernel, FillNullInvalidScalar) {
   auto scalar = std::make_shared<Int8Scalar>(3);
   scalar->is_valid = false;
-  CheckFillNull(int8(), "[1, null, 3, 2]", Datum(scalar), "[1, null, 3, 2]");
+  CheckFillNull(int8(), "[1, null, 3, 2]", Datum(scalar), "[1, null, 3, 2]",
+                /*all_valid=*/false);
 }
 
 TYPED_TEST_SUITE(TestFillNullPrimitive, PrimitiveTypes);
@@ -106,7 +114,8 @@ TYPED_TEST(TestFillNullPrimitive, FillNull) {
 
 TEST_F(TestFillNullKernel, FillNullNull) {
   auto datum = Datum(std::make_shared<NullScalar>());
-  CheckFillNull(null(), "[null, null, null, null]", datum, "[null, null, null, null]");
+  CheckFillNull(null(), "[null, null, null, null]", datum, "[null, null, null, null]",
+                /*all_valid=*/false);
 }
 
 TEST_F(TestFillNullKernel, FillNullBoolean) {
@@ -164,5 +173,12 @@ TEST_F(TestFillNullKernel, FillNullString) {
                 R"(["foo", "bar", "arrow"])");
 }
 
+TEST_F(TestFillNullKernel, FillNullSetsZeroNullCount) {
+  auto arr = ArrayFromJSON(int32(), "[1, null, 3, 4]");
+  auto fill_value = Datum(std::make_shared<Int32Scalar>(2, int32()));
+  std::shared_ptr<ArrayData> result = (*FillNull(arr, fill_value)).array();
+  ASSERT_EQ(result->null_count, 0);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
new file mode 100644
index 00000000000..cb261ec59a7
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -0,0 +1,1998 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/kernels/codegen_internal.h>
+#include <arrow/compute/util_internal.h>
+#include <arrow/util/bit_block_counter.h>
+#include <arrow/util/bitmap.h>
+#include <arrow/util/bitmap_ops.h>
+#include <arrow/util/bitmap_reader.h>
+
+namespace arrow {
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::Bitmap;
+using internal::BitmapWordReader;
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+constexpr uint64_t kAllNull = 0;
+constexpr uint64_t kAllValid = ~kAllNull;
+
+util::optional<uint64_t> GetConstantValidityWord(const Datum& data) {
+  if (data.is_scalar()) {
+    return data.scalar()->is_valid ? kAllValid : kAllNull;
+  }
+
+  if (data.array()->null_count == data.array()->length) return kAllNull;
+
+  if (!data.array()->MayHaveNulls()) return kAllValid;
+
+  // no constant validity word available
+  return {};
+}
+
+inline Bitmap GetBitmap(const Datum& datum, int i) {
+  if (datum.is_scalar()) return {};
+  const ArrayData& a = *datum.array();
+  return Bitmap{a.buffers[i], a.offset, a.length};
+}
+
+// if the condition is null then output is null otherwise we take validity from the
+// selected argument
+// ie. cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
+template <typename AllocateNullBitmap>
+Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& left_d,
+                           const Datum& right_d, ArrayData* output) {
+  auto cond_const = GetConstantValidityWord(cond_d);
+  auto left_const = GetConstantValidityWord(left_d);
+  auto right_const = GetConstantValidityWord(right_d);
+
+  enum { COND_CONST = 1, LEFT_CONST = 2, RIGHT_CONST = 4 };
+  auto flag = COND_CONST * cond_const.has_value() | LEFT_CONST * left_const.has_value() |
+              RIGHT_CONST * right_const.has_value();
+
+  const ArrayData& cond = *cond_d.array();
+  // cond.data will always be available
+  Bitmap cond_data{cond.buffers[1], cond.offset, cond.length};
+  Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length};
+  Bitmap left_valid = GetBitmap(left_d, 0);
+  Bitmap right_valid = GetBitmap(right_d, 0);
+
+  // cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
+  // In the following cases, we dont need to allocate out_valid bitmap
+
+  // if cond & left & right all ones, then output is all valid.
+  // if output validity buffer is already allocated (NullHandling::
+  // COMPUTED_PREALLOCATE) -> set all bits
+  // else, return nullptr
+  if (cond_const == kAllValid && left_const == kAllValid && right_const == kAllValid) {
+    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE
+      output->buffers[0] = nullptr;
+    } else {  // NullHandling::COMPUTED_PREALLOCATE
+      BitUtil::SetBitmap(output->buffers[0]->mutable_data(), output->offset,
+                         output->length);
+    }
+    return Status::OK();
+  }
+
+  if (left_const == kAllValid && right_const == kAllValid) {
+    // if both left and right are valid, no need to calculate out_valid bitmap. Copy
+    // cond validity buffer
+    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE
+      // if there's an offset, copy bitmap (cannot slice a bitmap)
+      if (cond.offset) {
+        ARROW_ASSIGN_OR_RAISE(
+            output->buffers[0],
+            arrow::internal::CopyBitmap(ctx->memory_pool(), cond.buffers[0]->data(),
+                                        cond.offset, cond.length));
+      } else {  // just copy assign cond validity buffer
+        output->buffers[0] = cond.buffers[0];
+      }
+    } else {  // NullHandling::COMPUTED_PREALLOCATE
+      arrow::internal::CopyBitmap(cond.buffers[0]->data(), cond.offset, cond.length,
+                                  output->buffers[0]->mutable_data(), output->offset);
+    }
+    return Status::OK();
+  }
+
+  // lambda function that will be used inside the visitor
+  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid,
+                   uint64_t r_valid) {
+    return c_valid & ((c_data & l_valid) | (~c_data & r_valid));
+  };
+
+  if (AllocateNullBitmap::value) {
+    // following cases requires a separate out_valid buffer. COMPUTED_NO_PREALLOCATE
+    // would not have allocated buffers for it.
+    ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length));
+  }
+
+  std::array<Bitmap, 1> out_bitmaps{
+      Bitmap{output->buffers[0], output->offset, output->length}};
+
+  switch (flag) {
+    case COND_CONST | LEFT_CONST | RIGHT_CONST: {
+      std::array<Bitmap, 1> bitmaps{cond_data};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 1>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           *left_const, *right_const);
+                                 });
+      break;
+    }
+    case LEFT_CONST | RIGHT_CONST: {
+      std::array<Bitmap, 2> bitmaps{cond_valid, cond_data};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           *left_const, *right_const);
+                                 });
+      break;
+    }
+    case COND_CONST | RIGHT_CONST: {
+      // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for
+      // Visit()
+      std::array<Bitmap, 2> bitmaps{cond_data, left_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           words_in[1], *right_const);
+                                 });
+      break;
+    }
+    case RIGHT_CONST: {
+      // bitmaps[R_VALID] might be null; override to make it safe for Visit()
+      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, left_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           words_in[2], *right_const);
+                                 });
+      break;
+    }
+    case COND_CONST | LEFT_CONST: {
+      // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for
+      // Visit()
+      std::array<Bitmap, 2> bitmaps{cond_data, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           *left_const, words_in[1]);
+                                 });
+      break;
+    }
+    case LEFT_CONST: {
+      // bitmaps[L_VALID] might be null; override to make it safe for Visit()
+      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           *left_const, words_in[2]);
+                                 });
+      break;
+    }
+    case COND_CONST: {
+      // bitmaps[C_VALID] might be null; override to make it safe for Visit()
+      std::array<Bitmap, 3> bitmaps{cond_data, left_valid, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           words_in[1], words_in[2]);
+                                 });
+      break;
+    }
+    case 0: {
+      std::array<Bitmap, 4> bitmaps{cond_valid, cond_data, left_valid, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 4>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           words_in[2], words_in[3]);
+                                 });
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+using Word = uint64_t;
+static constexpr int64_t word_len = sizeof(Word) * 8;
+
+/// Runs the main if_else loop. Here, it is expected that the right data has already
+/// been copied to the output.
+/// If `invert` is meant to invert the cond.data. If is set to `true`, then the
+/// buffer will be inverted before calling the handle_block or handle_each functions.
+/// This is useful, when left is an array and right is scalar. Then rather than
+/// copying data from the right to output, we can copy left data to the output and
+/// invert the cond data to fill right values. Filling out with a scalar is presumed to
+/// be more efficient than filling with an array
+///
+/// `HandleBlock` has the signature:
+///     [](int64_t offset, int64_t length){...}
+/// It should copy `length` number of elements from source array to output array with
+/// `offset` offset in both arrays
+template <typename HandleBlock, bool invert = false>
+void RunIfElseLoop(const ArrayData& cond, const HandleBlock& handle_block) {
+  int64_t data_offset = 0;
+  int64_t bit_offset = cond.offset;
+  const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
+
+  BitmapWordReader<Word> cond_reader(cond_data, cond.offset, cond.length);
+
+  constexpr Word pickAll = invert ? 0 : UINT64_MAX;
+  constexpr Word pickNone = ~pickAll;
+
+  int64_t cnt = cond_reader.words();
+  while (cnt--) {
+    Word word = cond_reader.NextWord();
+
+    if (word == pickAll) {
+      handle_block(data_offset, word_len);
+    } else if (word != pickNone) {
+      for (int64_t i = 0; i < word_len; ++i) {
+        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) {
+          handle_block(data_offset + i, 1);
+        }
+      }
+    }
+    data_offset += word_len;
+    bit_offset += word_len;
+  }
+
+  constexpr uint8_t pickAllByte = invert ? 0 : UINT8_MAX;
+  // byte bit-wise inversion is int-wide. Hence XOR with 0xff
+  constexpr uint8_t pickNoneByte = pickAllByte ^ 0xff;
+
+  cnt = cond_reader.trailing_bytes();
+  while (cnt--) {
+    int valid_bits;
+    uint8_t byte = cond_reader.NextTrailingByte(valid_bits);
+
+    if (byte == pickAllByte && valid_bits == 8) {
+      handle_block(data_offset, 8);
+    } else if (byte != pickNoneByte) {
+      for (int i = 0; i < valid_bits; ++i) {
+        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) {
+          handle_block(data_offset + i, 1);
+        }
+      }
+    }
+    data_offset += 8;
+    bit_offset += 8;
+  }
+}
+
+template <typename HandleBlock>
+void RunIfElseLoopInverted(const ArrayData& cond, const HandleBlock& handle_block) {
+  RunIfElseLoop<HandleBlock, true>(cond, handle_block);
+}
+
+/// Runs if-else when cond is a scalar. Two special functions are required,
+/// 1.CopyArrayData, 2. BroadcastScalar
+template <typename CopyArrayData, typename BroadcastScalar>
+Status RunIfElseScalar(const BooleanScalar& cond, const Datum& left, const Datum& right,
+                       Datum* out, const CopyArrayData& copy_array_data,
+                       const BroadcastScalar& broadcast_scalar) {
+  if (left.is_scalar() && right.is_scalar()) {  // output will be a scalar
+    if (cond.is_valid) {
+      *out = cond.value ? left.scalar() : right.scalar();
+    } else {
+      *out = MakeNullScalar(left.type());
+    }
+    return Status::OK();
+  }
+
+  // either left or right is an array. Output is always an array`
+  const std::shared_ptr<ArrayData>& out_array = out->array();
+  if (!cond.is_valid) {
+    // cond is null; output is all null --> clear validity buffer
+    BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+    return Status::OK();
+  }
+
+  // cond is a non-null scalar
+  const auto& valid_data = cond.value ? left : right;
+  if (valid_data.is_array()) {
+    // valid_data is an array. Hence copy data to the output buffers
+    const auto& valid_array = valid_data.array();
+    if (valid_array->MayHaveNulls()) {
+      arrow::internal::CopyBitmap(
+          valid_array->buffers[0]->data(), valid_array->offset, valid_array->length,
+          out_array->buffers[0]->mutable_data(), out_array->offset);
+    } else {  // validity buffer is nullptr --> set all bits
+      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+    }
+    copy_array_data(*valid_array, out_array.get());
+    return Status::OK();
+
+  } else {  // valid data is scalar
+    // valid data is a scalar that needs to be broadcasted
+    const auto& valid_scalar = *valid_data.scalar();
+    if (valid_scalar.is_valid) {  // if the scalar is non-null, broadcast
+      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+      broadcast_scalar(*valid_data.scalar(), out_array.get());
+    } else {  // scalar is null, clear the output validity buffer
+      BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                           out_array->length);
+    }
+    return Status::OK();
+  }
+}
+
+template <typename Type, typename Enable = void>
+struct IfElseFunctor {};
+
+// only number types needs to be handled for Fixed sized primitive data types because,
+// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned
+// int type
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_number<Type>> {
+  using T = typename TypeTraits<Type>::CType;
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          std::memcpy(out_array->GetMutableValues<T>(1), valid_array.GetValues<T>(1),
+                      valid_array.length * sizeof(T));
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          T scalar_data = internal::UnboxScalar<Type>::Unbox(scalar);
+          std::fill(out_array->GetMutableValues<T>(1),
+                    out_array->GetMutableValues<T>(1) + out_array->length, scalar_data);
+        });
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy right data to out_buff
+    const T* right_data = right.GetValues<T>(1);
+    std::memcpy(out_values, right_data, right.length * sizeof(T));
+
+    // selectively copy values from left data
+    const T* left_data = left.GetValues<T>(1);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::memcpy(out_values + data_offset, left_data + data_offset,
+                  num_elems * sizeof(T));
+    });
+
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy right data to out_buff
+    const T* right_data = right.GetValues<T>(1);
+    std::memcpy(out_values, right_data, right.length * sizeof(T));
+
+    // selectively copy values from left data
+    T left_data = internal::UnboxScalar<Type>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                left_data);
+    });
+
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy left data to out_buff
+    const T* left_data = left.GetValues<T>(1);
+    std::memcpy(out_values, left_data, left.length * sizeof(T));
+
+    T right_data = internal::UnboxScalar<Type>::Unbox(right);
+
+    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                right_data);
+    });
+
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy right data to out_buff
+    T right_data = internal::UnboxScalar<Type>::Unbox(right);
+    std::fill(out_values, out_values + cond.length, right_data);
+
+    // selectively copy values from left data
+    T left_data = internal::UnboxScalar<Type>::Unbox(left);
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                left_data);
+    });
+
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_boolean<Type>> {
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          arrow::internal::CopyBitmap(
+              valid_array.buffers[1]->data(), valid_array.offset, valid_array.length,
+              out_array->buffers[1]->mutable_data(), out_array->offset);
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          bool scalar_data = internal::UnboxScalar<Type>::Unbox(scalar);
+          BitUtil::SetBitsTo(out_array->buffers[1]->mutable_data(), out_array->offset,
+                             out_array->length, scalar_data);
+        });
+  }
+
+  // AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    // out_buff = right & ~cond
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset,
+                                  cond.buffers[1]->data(), cond.offset, cond.length,
+                                  out->offset, out_buf->mutable_data());
+
+    // out_buff = left & cond
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> temp_buf,
+                          arrow::internal::BitmapAnd(
+                              ctx->memory_pool(), left.buffers[1]->data(), left.offset,
+                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+
+    arrow::internal::BitmapOr(out_buf->data(), out->offset, temp_buf->data(), 0,
+                              cond.length, out->offset, out_buf->mutable_data());
+
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    // out_buff = right & ~cond
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset,
+                                  cond.buffers[1]->data(), cond.offset, cond.length,
+                                  out->offset, out_buf->mutable_data());
+
+    // out_buff = left & cond
+    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
+    if (left_data) {
+      arrow::internal::BitmapOr(out_buf->data(), out->offset, cond.buffers[1]->data(),
+                                cond.offset, cond.length, out->offset,
+                                out_buf->mutable_data());
+    }
+
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    // out_buff = left & cond
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset,
+                               cond.buffers[1]->data(), cond.offset, cond.length,
+                               out->offset, out_buf->mutable_data());
+
+    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
+
+    // out_buff = left & cond | right & ~cond
+    if (right_data) {
+      arrow::internal::BitmapOrNot(out_buf->data(), out->offset, cond.buffers[1]->data(),
+                                   cond.offset, cond.length, out->offset,
+                                   out_buf->mutable_data());
+    }
+
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
+    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
+
+    const auto& out_buf = out->buffers[1];
+
+    // out_buf = left & cond | right & ~cond
+    //    std::shared_ptr<Buffer> out_buf = nullptr;
+    if (left_data) {
+      if (right_data) {
+        // out_buf = ones
+        BitUtil::SetBitmap(out_buf->mutable_data(), out->offset, cond.length);
+      } else {
+        // out_buf = cond
+        arrow::internal::CopyBitmap(cond.buffers[1]->data(), cond.offset, cond.length,
+                                    out_buf->mutable_data(), out->offset);
+      }
+    } else {
+      if (right_data) {
+        // out_buf = ~cond
+        arrow::internal::InvertBitmap(cond.buffers[1]->data(), cond.offset, cond.length,
+                                      out_buf->mutable_data(), out->offset);
+      } else {
+        // out_buf = zeros
+        BitUtil::ClearBitmap(out_buf->mutable_data(), out->offset, cond.length);
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_base_binary<Type>> {
+  using OffsetType = typename TypeTraits<Type>::OffsetType::c_type;
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    if (left.is_scalar() && right.is_scalar()) {
+      if (cond.is_valid) {
+        *out = cond.value ? left.scalar() : right.scalar();
+      } else {
+        *out = MakeNullScalar(left.type());
+      }
+      return Status::OK();
+    }
+    // either left or right is an array. Output is always an array
+    int64_t out_arr_len = std::max(left.length(), right.length());
+    if (!cond.is_valid) {
+      // cond is null; just create a null array
+      ARROW_ASSIGN_OR_RAISE(*out,
+                            MakeArrayOfNull(left.type(), out_arr_len, ctx->memory_pool()))
+      return Status::OK();
+    }
+
+    const auto& valid_data = cond.value ? left : right;
+    if (valid_data.is_array()) {
+      *out = valid_data;
+    } else {
+      // valid data is a scalar that needs to be broadcasted
+      ARROW_ASSIGN_OR_RAISE(*out, MakeArrayFromScalar(*valid_data.scalar(), out_arr_len,
+                                                      ctx->memory_pool()));
+    }
+    return Status::OK();
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    const auto* left_offsets = left.GetValues<OffsetType>(1);
+    const uint8_t* left_data = left.buffers[2]->data();
+    const auto* right_offsets = right.GetValues<OffsetType>(1);
+    const uint8_t* right_data = right.buffers[2]->data();
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc = left_offsets[left.length] - left_offsets[0] +
+                              right_offsets[right.length] - right_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out,
+        [&](int64_t i) {
+          builder.UnsafeAppend(left_data + left_offsets[i],
+                               left_offsets[i + 1] - left_offsets[i]);
+        },
+        [&](int64_t i) {
+          builder.UnsafeAppend(right_data + right_offsets[i],
+                               right_offsets[i + 1] - right_offsets[i]);
+        },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left);
+    auto left_size = static_cast<OffsetType>(left_data.size());
+
+    const auto* right_offsets = right.GetValues<OffsetType>(1);
+    const uint8_t* right_data = right.buffers[2]->data();
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc =
+        left_size * cond.length + right_offsets[right.length] - right_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); },
+        [&](int64_t i) {
+          builder.UnsafeAppend(right_data + right_offsets[i],
+                               right_offsets[i + 1] - right_offsets[i]);
+        },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    const auto* left_offsets = left.GetValues<OffsetType>(1);
+    const uint8_t* left_data = left.buffers[2]->data();
+
+    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right);
+    auto right_size = static_cast<OffsetType>(right_data.size());
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc =
+        right_size * cond.length + left_offsets[left.length] - left_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out,
+        [&](int64_t i) {
+          builder.UnsafeAppend(left_data + left_offsets[i],
+                               left_offsets[i + 1] - left_offsets[i]);
+        },
+        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left);
+    auto left_size = static_cast<OffsetType>(left_data.size());
+
+    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right);
+    auto right_size = static_cast<OffsetType>(right_data.size());
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc = std::max(right_size, left_size) * cond.length;
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); },
+        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  template <typename HandleLeft, typename HandleRight, typename HandleNull>
+  static void RunLoop(const ArrayData& cond, const ArrayData& output,
+                      HandleLeft&& handle_left, HandleRight&& handle_right,
+                      HandleNull&& handle_null) {
+    const auto* cond_data = cond.buffers[1]->data();
+
+    if (output.buffers[0]) {  // output may have nulls
+      // output validity buffer is allocated internally from the IfElseFunctor. Therefore
+      // it is cond.length'd with 0 offset.
+      const auto* out_valid = output.buffers[0]->data();
+
+      for (int64_t i = 0; i < cond.length; i++) {
+        if (BitUtil::GetBit(out_valid, i)) {
+          BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i);
+        } else {
+          handle_null();
+        }
+      }
+    } else {  // output is all valid (no nulls)
+      for (int64_t i = 0; i < cond.length; i++) {
+        BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i);
+      }
+    }
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_fixed_size_binary<Type>> {
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type(), *right.type()));
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          std::memcpy(
+              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width,
+              valid_array.buffers[1]->data() + valid_array.offset * byte_width,
+              valid_array.length * byte_width);
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          const util::string_view& scalar_data =
+              internal::UnboxScalar<FixedSizeBinaryType>::Unbox(scalar);
+          uint8_t* start =
+              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width;
+          for (int64_t i = 0; i < out_array->length; i++) {
+            std::memcpy(start + i * byte_width, scalar_data.data(), scalar_data.size());
+          }
+        });
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width;
+    std::memcpy(out_values, right_data, right.length * byte_width);
+
+    // selectively copy values from left data
+    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width;
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::memcpy(out_values + data_offset * byte_width,
+                  left_data + data_offset * byte_width, num_elems * byte_width);
+    });
+
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width;
+    std::memcpy(out_values, right_data, right.length * byte_width);
+
+    // selectively copy values from left data
+    const util::string_view& left_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (left_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(),
+                      left_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy left data to out_buff
+    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width;
+    std::memcpy(out_values, left_data, left.length * byte_width);
+
+    const util::string_view& right_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right);
+
+    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (right_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, right_data.data(),
+                      right_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const util::string_view& right_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right);
+    if (right_data.data()) {
+      for (int64_t i = 0; i < cond.length; i++) {
+        std::memcpy(out_values + i * byte_width, right_data.data(), right_data.size());
+      }
+    }
+
+    // selectively copy values from left data
+    const util::string_view& left_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (left_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(),
+                      left_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  static Result<int32_t> GetByteWidth(const DataType& left_type,
+                                      const DataType& right_type) {
+    int width = checked_cast<const FixedSizeBinaryType&>(left_type).byte_width();
+    if (width == checked_cast<const FixedSizeBinaryType&>(right_type).byte_width()) {
+      return width;
+    } else {
+      return Status::Invalid("FixedSizeBinaryType byte_widths should be equal");
+    }
+  }
+};
+
+template <typename Type, typename AllocateMem>
+struct ResolveIfElseExec {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // cond is scalar
+    if (batch[0].is_scalar()) {
+      const auto& cond = batch[0].scalar_as<BooleanScalar>();
+      return IfElseFunctor<Type>::Call(ctx, cond, batch[1], batch[2], out);
+    }
+
+    // cond is array. Use functors to sort things out
+    ARROW_RETURN_NOT_OK(PromoteNullsVisitor<AllocateMem>(ctx, batch[0], batch[1],
+                                                         batch[2], out->mutable_array()));
+
+    if (batch[1].kind() == Datum::ARRAY) {
+      if (batch[2].kind() == Datum::ARRAY) {  // AAA
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(),
+                                         *batch[2].array(), out->mutable_array());
+      } else {  // AAS
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(),
+                                         *batch[2].scalar(), out->mutable_array());
+      }
+    } else {
+      if (batch[2].kind() == Datum::ARRAY) {  // ASA
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                                         *batch[2].array(), out->mutable_array());
+      } else {  // ASS
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                                         *batch[2].scalar(), out->mutable_array());
+      }
+    }
+  }
+};
+
+template <typename AllocateMem>
+struct ResolveIfElseExec<NullType, AllocateMem> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // if all are scalars, return a null scalar
+    if (batch[0].is_scalar() && batch[1].is_scalar() && batch[2].is_scalar()) {
+      *out = MakeNullScalar(null());
+    } else {
+      ARROW_ASSIGN_OR_RAISE(*out,
+                            MakeArrayOfNull(null(), batch.length, ctx->memory_pool()));
+    }
+    return Status::OK();
+  }
+};
+
+struct IfElseFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    // if 0th descriptor is null, replace with bool
+    if (values->at(0).type->id() == Type::NA) {
+      values->at(0).type = boolean();
+    }
+
+    // if-else 0'th descriptor is bool, so skip it
+    std::vector<ValueDescr> values_copy(values->begin() + 1, values->end());
+    internal::EnsureDictionaryDecoded(&values_copy);
+    internal::ReplaceNullWithOtherType(&values_copy);
+
+    if (auto type = internal::CommonNumeric(values_copy)) {
+      internal::ReplaceTypes(type, &values_copy);
+    }
+
+    std::move(values_copy.begin(), values_copy.end(), values->begin() + 1);
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+void AddNullIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
+  ScalarKernel kernel({boolean(), null(), null()}, null(),
+                      ResolveIfElseExec<NullType,
+                                        /*AllocateMem=*/std::true_type>::Exec);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.can_write_into_slices = false;
+
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                               const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec =
+        internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec,
+                                                /*AllocateMem=*/std::false_type>(*type);
+    // cond array needs to be boolean always
+    ScalarKernel kernel({boolean(), type, type}, type, exec);
+    kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::PREALLOCATE;
+    kernel.can_write_into_slices = true;
+
+    DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+  }
+}
+
+void AddBinaryIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_function,
+                            const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec =
+        internal::GenerateTypeAgnosticVarBinaryBase<ResolveIfElseExec,
+                                                    /*AllocateMem=*/std::true_type>(
+            *type);
+    // cond array needs to be boolean always
+    ScalarKernel kernel({boolean(), type, type}, type, exec);
+    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+    kernel.can_write_into_slices = false;
+
+    DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+  }
+}
+
+void AddFSBinaryIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
+  // cond array needs to be boolean always
+  ScalarKernel kernel(
+      {boolean(), InputType(Type::FIXED_SIZE_BINARY), InputType(Type::FIXED_SIZE_BINARY)},
+      OutputType([](KernelContext*, const std::vector<ValueDescr>& descrs) {
+        return ValueDescr(descrs[1].type, ValueDescr::ANY);
+      }),
+      ResolveIfElseExec<FixedSizeBinaryType, /*AllocateMem=*/std::false_type>::Exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = true;
+
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+// Helper to copy or broadcast fixed-width values between buffers.
+template <typename Type, typename Enable = void>
+struct CopyFixedWidth {};
+template <>
+struct CopyFixedWidth<BooleanType> {
+  static void CopyScalar(const Scalar& scalar, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const bool value = UnboxScalar<BooleanType>::Unbox(scalar);
+    BitUtil::SetBitsTo(raw_out_values, out_offset, length, value);
+  }
+  static void CopyArray(const DataType&, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    arrow::internal::CopyBitmap(in_values, in_offset, length, raw_out_values, out_offset);
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_number<Type>> {
+  using CType = typename TypeTraits<Type>::CType;
+  static void CopyScalar(const Scalar& scalar, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    CType* out_values = reinterpret_cast<CType*>(raw_out_values);
+    const CType value = UnboxScalar<Type>::Unbox(scalar);
+    std::fill(out_values + out_offset, out_values + out_offset + length, value);
+  }
+  static void CopyArray(const DataType&, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    std::memcpy(raw_out_values + out_offset * sizeof(CType),
+                in_values + in_offset * sizeof(CType), length * sizeof(CType));
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_same<Type, FixedSizeBinaryType>> {
+  static void CopyScalar(const Scalar& values, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width =
+        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(values);
+    // Scalar may have null value buffer
+    if (!scalar.value) {
+      std::memset(next, 0x00, width * length);
+    } else {
+      DCHECK_EQ(scalar.value->size(), width);
+      for (int i = 0; i < length; i++) {
+        std::memcpy(next, scalar.value->data(), width);
+        next += width;
+      }
+    }
+  }
+  static void CopyArray(const DataType& type, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    std::memcpy(next, in_values + in_offset * width, length * width);
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_decimal<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static void CopyScalar(const Scalar& values, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width =
+        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    const auto& scalar = checked_cast<const ScalarType&>(values);
+    const auto value = scalar.value.ToBytes();
+    for (int i = 0; i < length; i++) {
+      std::memcpy(next, value.data(), width);
+      next += width;
+    }
+  }
+  static void CopyArray(const DataType& type, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    std::memcpy(next, in_values + in_offset * width, length * width);
+  }
+};
+// Copy fixed-width values from a scalar/array datum into an output values buffer
+template <typename Type>
+void CopyValues(const Datum& in_values, const int64_t in_offset, const int64_t length,
+                uint8_t* out_valid, uint8_t* out_values, const int64_t out_offset) {
+  if (in_values.is_scalar()) {
+    const auto& scalar = *in_values.scalar();
+    if (out_valid) {
+      BitUtil::SetBitsTo(out_valid, out_offset, length, scalar.is_valid);
+    }
+    CopyFixedWidth<Type>::CopyScalar(scalar, length, out_values, out_offset);
+  } else {
+    const ArrayData& array = *in_values.array();
+    if (out_valid) {
+      if (array.MayHaveNulls()) {
+        if (length == 1) {
+          // CopyBitmap is slow for short runs
+          BitUtil::SetBitTo(
+              out_valid, out_offset,
+              BitUtil::GetBit(array.buffers[0]->data(), array.offset + in_offset));
+        } else {
+          arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset + in_offset,
+                                      length, out_valid, out_offset);
+        }
+      } else {
+        BitUtil::SetBitsTo(out_valid, out_offset, length, true);
+      }
+    }
+    CopyFixedWidth<Type>::CopyArray(*array.type, array.buffers[1]->data(),
+                                    array.offset + in_offset, length, out_values,
+                                    out_offset);
+  }
+}
+
+// Specialized helper to copy a single value from a source array. Allows avoiding
+// repeatedly calling MayHaveNulls and Buffer::data() which have internal checks that
+// add up when called in a loop.
+template <typename Type>
+void CopyOneArrayValue(const DataType& type, const uint8_t* in_valid,
+                       const uint8_t* in_values, const int64_t in_offset,
+                       uint8_t* out_valid, uint8_t* out_values,
+                       const int64_t out_offset) {
+  if (out_valid) {
+    BitUtil::SetBitTo(out_valid, out_offset,
+                      !in_valid || BitUtil::GetBit(in_valid, in_offset));
+  }
+  CopyFixedWidth<Type>::CopyArray(type, in_values, in_offset, /*length=*/1, out_values,
+                                  out_offset);
+}
+
+template <typename Type>
+void CopyOneScalarValue(const Scalar& scalar, uint8_t* out_valid, uint8_t* out_values,
+                        const int64_t out_offset) {
+  if (out_valid) {
+    BitUtil::SetBitTo(out_valid, out_offset, scalar.is_valid);
+  }
+  CopyFixedWidth<Type>::CopyScalar(scalar, /*length=*/1, out_values, out_offset);
+}
+
+template <typename Type>
+void CopyOneValue(const Datum& in_values, const int64_t in_offset, uint8_t* out_valid,
+                  uint8_t* out_values, const int64_t out_offset) {
+  if (in_values.is_array()) {
+    const ArrayData& array = *in_values.array();
+    CopyOneArrayValue<Type>(*array.type, array.GetValues<uint8_t>(0, 0),
+                            array.GetValues<uint8_t>(1, 0), array.offset + in_offset,
+                            out_valid, out_values, out_offset);
+  } else {
+    CopyOneScalarValue<Type>(*in_values.scalar(), out_valid, out_values, out_offset);
+  }
+}
+
+struct CaseWhenFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    // The first function is a struct of booleans, where the number of fields in the
+    // struct is either equal to the number of other arguments or is one less.
+    RETURN_NOT_OK(CheckArity(*values));
+    EnsureDictionaryDecoded(values);
+    auto first_type = (*values)[0].type;
+    if (first_type->id() != Type::STRUCT) {
+      return Status::TypeError("case_when: first argument must be STRUCT, not ",
+                               *first_type);
+    }
+    auto num_fields = static_cast<size_t>(first_type->num_fields());
+    if (num_fields < values->size() - 2 || num_fields >= values->size()) {
+      return Status::Invalid(
+          "case_when: number of struct fields must be equal to or one less than count of "
+          "remaining arguments (",
+          values->size() - 1, "), got: ", first_type->num_fields());
+    }
+    for (const auto& field : first_type->fields()) {
+      if (field->type()->id() != Type::BOOL) {
+        return Status::TypeError(
+            "case_when: all fields of first argument must be BOOL, but ", field->name(),
+            " was of type: ", *field->type());
+      }
+    }
+
+    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) {
+      for (auto it = values->begin() + 1; it != values->end(); it++) {
+        it->type = type;
+      }
+    }
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// Implement a 'case when' (SQL)/'select' (NumPy) function for any scalar conditions
+template <typename Type>
+Status ExecScalarCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& conds = checked_cast<const StructScalar&>(*batch.values[0].scalar());
+  if (!conds.is_valid) {
+    return Status::Invalid("cond struct must not be null");
+  }
+  Datum result;
+  for (size_t i = 0; i < batch.values.size() - 1; i++) {
+    if (i < conds.value.size()) {
+      const Scalar& cond = *conds.value[i];
+      if (cond.is_valid && internal::UnboxScalar<BooleanType>::Unbox(cond)) {
+        result = batch[i + 1];
+        break;
+      }
+    } else {
+      // ELSE clause
+      result = batch[i + 1];
+      break;
+    }
+  }
+  if (out->is_scalar()) {
+    *out = result.is_scalar() ? result.scalar() : MakeNullScalar(out->type());
+    return Status::OK();
+  }
+  ArrayData* output = out->mutable_array();
+  if (!result.is_value()) {
+    // All conditions false, no 'else' argument
+    result = MakeNullScalar(out->type());
+  }
+  CopyValues<Type>(result, /*in_offset=*/0, batch.length,
+                   output->GetMutableValues<uint8_t>(0, 0),
+                   output->GetMutableValues<uint8_t>(1, 0), output->offset);
+  return Status::OK();
+}
+
+// Implement 'case when' for any mix of scalar/array arguments for any fixed-width type,
+// given helper functions to copy data from a source array to a target array
+template <typename Type>
+Status ExecArrayCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& conds_array = *batch.values[0].array();
+  if (conds_array.GetNullCount() > 0) {
+    return Status::Invalid("cond struct must not have top-level nulls");
+  }
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  const auto num_value_args = batch.values.size() - 1;
+  const bool have_else_arg =
+      static_cast<size_t>(conds_array.type->num_fields()) < num_value_args;
+  uint8_t* out_valid = output->buffers[0]->mutable_data();
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  if (have_else_arg) {
+    // Copy 'else' value into output
+    CopyValues<Type>(batch.values.back(), /*in_offset=*/0, batch.length, out_valid,
+                     out_values, out_offset);
+  } else {
+    // There's no 'else' argument, so we should have an all-null validity bitmap
+    BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false);
+  }
+
+  // Allocate a temporary bitmap to determine which elements still need setting.
+  ARROW_ASSIGN_OR_RAISE(auto mask_buffer, ctx->AllocateBitmap(batch.length));
+  uint8_t* mask = mask_buffer->mutable_data();
+  std::memset(mask, 0xFF, mask_buffer->size());
+
+  // Then iterate through each argument in turn and set elements.
+  for (size_t i = 0; i < batch.values.size() - (have_else_arg ? 2 : 1); i++) {
+    const ArrayData& cond_array = *conds_array.child_data[i];
+    const int64_t cond_offset = conds_array.offset + cond_array.offset;
+    const uint8_t* cond_values = cond_array.buffers[1]->data();
+    const Datum& values_datum = batch[i + 1];
+    int64_t offset = 0;
+
+    if (cond_array.GetNullCount() == 0) {
+      // If no valid buffer, visit mask & cond bitmap simultaneously
+      BinaryBitBlockCounter counter(mask, /*start_offset=*/0, cond_values, cond_offset,
+                                    batch.length);
+      while (offset < batch.length) {
+        const auto block = counter.NextAndWord();
+        if (block.AllSet()) {
+          CopyValues<Type>(values_datum, offset, block.length, out_valid, out_values,
+                           out_offset + offset);
+          BitUtil::SetBitsTo(mask, offset, block.length, false);
+        } else if (block.popcount) {
+          for (int64_t j = 0; j < block.length; ++j) {
+            if (BitUtil::GetBit(mask, offset + j) &&
+                BitUtil::GetBit(cond_values, cond_offset + offset + j)) {
+              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid,
+                               out_values, out_offset + offset + j);
+              BitUtil::SetBitTo(mask, offset + j, false);
+            }
+          }
+        }
+        offset += block.length;
+      }
+    } else {
+      // Visit mask & cond bitmap & cond validity
+      const uint8_t* cond_valid = cond_array.buffers[0]->data();
+      Bitmap bitmaps[3] = {{mask, /*offset=*/0, batch.length},
+                           {cond_values, cond_offset, batch.length},
+                           {cond_valid, cond_offset, batch.length}};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 3> words) {
+        const uint64_t word = words[0] & words[1] & words[2];
+        const int64_t block_length = std::min<int64_t>(64, batch.length - offset);
+        if (word == std::numeric_limits<uint64_t>::max()) {
+          CopyValues<Type>(values_datum, offset, block_length, out_valid, out_values,
+                           out_offset + offset);
+          BitUtil::SetBitsTo(mask, offset, block_length, false);
+        } else if (word) {
+          for (int64_t j = 0; j < block_length; ++j) {
+            if (BitUtil::GetBit(mask, offset + j) &&
+                BitUtil::GetBit(cond_valid, cond_offset + offset + j) &&
+                BitUtil::GetBit(cond_values, cond_offset + offset + j)) {
+              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid,
+                               out_values, out_offset + offset + j);
+              BitUtil::SetBitTo(mask, offset + j, false);
+            }
+          }
+        }
+      });
+    }
+  }
+  if (!have_else_arg) {
+    // Need to initialize any remaining null slots (uninitialized memory)
+    BitBlockCounter counter(mask, /*offset=*/0, batch.length);
+    int64_t offset = 0;
+    auto bit_width = checked_cast<const FixedWidthType&>(*out->type()).bit_width();
+    auto byte_width = BitUtil::BytesForBits(bit_width);
+    while (offset < batch.length) {
+      const auto block = counter.NextWord();
+      if (block.AllSet()) {
+        if (bit_width == 1) {
+          BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false);
+        } else {
+          std::memset(out_values + (out_offset + offset) * byte_width, 0x00,
+                      byte_width * block.length);
+        }
+      } else if (!block.NoneSet()) {
+        for (int64_t j = 0; j < block.length; ++j) {
+          if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue;
+          if (bit_width == 1) {
+            BitUtil::ClearBit(out_values, out_offset + offset + j);
+          } else {
+            std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00,
+                        byte_width);
+          }
+        }
+      }
+      offset += block.length;
+    }
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct CaseWhenFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch.values[0].is_array()) {
+      return ExecArrayCaseWhen<Type>(ctx, batch, out);
+    }
+    return ExecScalarCaseWhen<Type>(ctx, batch, out);
+  }
+};
+
+template <>
+struct CaseWhenFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+struct CoalesceFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    EnsureDictionaryDecoded(values);
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    }
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// Implement a 'coalesce' (SQL) operator for any number of scalar inputs
+Status ExecScalarCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  for (const auto& datum : batch.values) {
+    if (datum.scalar()->is_valid) {
+      *out = datum;
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+// Helper: copy from a source datum into all null slots of the output
+template <typename Type>
+void CopyValuesAllValid(Datum source, uint8_t* out_valid, uint8_t* out_values,
+                        const int64_t out_offset, const int64_t length) {
+  BitBlockCounter counter(out_valid, out_offset, length);
+  int64_t offset = 0;
+  while (offset < length) {
+    const auto block = counter.NextWord();
+    if (block.NoneSet()) {
+      CopyValues<Type>(source, offset, block.length, out_valid, out_values,
+                       out_offset + offset);
+    } else if (!block.AllSet()) {
+      for (int64_t j = 0; j < block.length; ++j) {
+        if (!BitUtil::GetBit(out_valid, out_offset + offset + j)) {
+          CopyValues<Type>(source, offset + j, 1, out_valid, out_values,
+                           out_offset + offset + j);
+        }
+      }
+    }
+    offset += block.length;
+  }
+}
+
+// Helper: zero the values buffer of the output wherever the slot is null
+void InitializeNullSlots(const DataType& type, uint8_t* out_valid, uint8_t* out_values,
+                         const int64_t out_offset, const int64_t length) {
+  BitBlockCounter counter(out_valid, out_offset, length);
+  int64_t offset = 0;
+  auto bit_width = checked_cast<const FixedWidthType&>(type).bit_width();
+  auto byte_width = BitUtil::BytesForBits(bit_width);
+  while (offset < length) {
+    const auto block = counter.NextWord();
+    if (block.NoneSet()) {
+      if (bit_width == 1) {
+        BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false);
+      } else {
+        std::memset(out_values + (out_offset + offset) * byte_width, 0x00,
+                    byte_width * block.length);
+      }
+    } else if (!block.AllSet()) {
+      for (int64_t j = 0; j < block.length; ++j) {
+        if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue;
+        if (bit_width == 1) {
+          BitUtil::ClearBit(out_values, out_offset + offset + j);
+        } else {
+          std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00,
+                      byte_width);
+        }
+      }
+    }
+    offset += block.length;
+  }
+}
+
+// Implement 'coalesce' for any mix of scalar/array arguments for any fixed-width type
+template <typename Type>
+Status ExecArrayCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  // Use output validity buffer as mask to decide what values to copy
+  uint8_t* out_valid = output->buffers[0]->mutable_data();
+  // Clear output buffer - no values are set initially
+  BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false);
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+
+  for (const auto& datum : batch.values) {
+    if ((datum.is_scalar() && datum.scalar()->is_valid) ||
+        (datum.is_array() && !datum.array()->MayHaveNulls())) {
+      // Valid scalar, or all-valid array
+      CopyValuesAllValid<Type>(datum, out_valid, out_values, out_offset, batch.length);
+      break;
+    } else if (datum.is_array()) {
+      // Array with nulls
+      const ArrayData& arr = *datum.array();
+      const DataType& type = *datum.type();
+      const uint8_t* in_valid = arr.buffers[0]->data();
+      const uint8_t* in_values = arr.buffers[1]->data();
+      BinaryBitBlockCounter counter(in_valid, arr.offset, out_valid, out_offset,
+                                    batch.length);
+      int64_t offset = 0;
+      while (offset < batch.length) {
+        const auto block = counter.NextAndNotWord();
+        if (block.AllSet()) {
+          CopyValues<Type>(datum, offset, block.length, out_valid, out_values,
+                           out_offset + offset);
+        } else if (block.popcount) {
+          for (int64_t j = 0; j < block.length; ++j) {
+            if (!BitUtil::GetBit(out_valid, out_offset + offset + j) &&
+                BitUtil::GetBit(in_valid, arr.offset + offset + j)) {
+              // This version lets us avoid calling MayHaveNulls() on every iteration
+              // (which does an atomic load and can add up)
+              CopyOneArrayValue<Type>(type, in_valid, in_values, arr.offset + offset + j,
+                                      out_valid, out_values, out_offset + offset + j);
+            }
+          }
+        }
+        offset += block.length;
+      }
+    }
+  }
+
+  // Initialize any remaining null slots (uninitialized memory)
+  InitializeNullSlots(*out->type(), out_valid, out_values, out_offset, batch.length);
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct CoalesceFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArrayCoalesce<Type>(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+};
+
+template <>
+struct CoalesceFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // Special case: grab any leading non-null scalar or array arguments
+    for (const auto& datum : batch.values) {
+      if (datum.is_scalar()) {
+        if (!datum.scalar()->is_valid) continue;
+        ARROW_ASSIGN_OR_RAISE(
+            *out, MakeArrayFromScalar(*datum.scalar(), batch.length, ctx->memory_pool()));
+        return Status::OK();
+      } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
+        *out = datum;
+        return Status::OK();
+      }
+      break;
+    }
+    ArrayData* output = out->mutable_array();
+    BuilderType builder(batch[0].type(), ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    for (int64_t i = 0; i < batch.length; i++) {
+      bool set = false;
+      for (const auto& datum : batch.values) {
+        if (datum.is_scalar()) {
+          if (datum.scalar()->is_valid) {
+            RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(*datum.scalar())));
+            set = true;
+            break;
+          }
+        } else {
+          const ArrayData& source = *datum.array();
+          if (!source.MayHaveNulls() ||
+              BitUtil::GetBit(source.buffers[0]->data(), source.offset + i)) {
+            const uint8_t* data = source.buffers[2]->data();
+            const offset_type* offsets = source.GetValues<offset_type>(1);
+            const offset_type offset0 = offsets[i];
+            const offset_type offset1 = offsets[i + 1];
+            RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0));
+            set = true;
+            break;
+          }
+        }
+      }
+      if (!set) RETURN_NOT_OK(builder.AppendNull());
+    }
+    ARROW_ASSIGN_OR_RAISE(auto temp_output, builder.Finish());
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = batch[0].type();
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+Status ExecScalarChoose(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& index_scalar = *batch[0].scalar();
+  if (!index_scalar.is_valid) {
+    if (out->is_array()) {
+      auto source = MakeNullScalar(out->type());
+      ArrayData* output = out->mutable_array();
+      CopyValues<Type>(source, /*row=*/0, batch.length,
+                       output->GetMutableValues<uint8_t>(0, /*absolute_offset=*/0),
+                       output->GetMutableValues<uint8_t>(1, /*absolute_offset=*/0),
+                       output->offset);
+    }
+    return Status::OK();
+  }
+  auto index = UnboxScalar<Int64Type>::Unbox(index_scalar);
+  if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+    return Status::IndexError("choose: index ", index, " out of range");
+  }
+  auto source = batch.values[index + 1];
+  if (out->is_scalar()) {
+    *out = source;
+  } else {
+    ArrayData* output = out->mutable_array();
+    CopyValues<Type>(source, /*row=*/0, batch.length,
+                     output->GetMutableValues<uint8_t>(0, /*absolute_offset=*/0),
+                     output->GetMutableValues<uint8_t>(1, /*absolute_offset=*/0),
+                     output->offset);
+  }
+  return Status::OK();
+}
+
+template <typename Type>
+Status ExecArrayChoose(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  // Need a null bitmap if any input has nulls
+  uint8_t* out_valid = nullptr;
+  if (std::any_of(batch.values.begin(), batch.values.end(),
+                  [](const Datum& d) { return d.null_count() > 0; })) {
+    out_valid = output->buffers[0]->mutable_data();
+  } else {
+    BitUtil::SetBitsTo(output->buffers[0]->mutable_data(), out_offset, batch.length,
+                       true);
+  }
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  int64_t row = 0;
+  return VisitArrayValuesInline<Int64Type>(
+      *batch[0].array(),
+      [&](int64_t index) {
+        if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+          return Status::IndexError("choose: index ", index, " out of range");
+        }
+        const auto& source = batch.values[index + 1];
+        CopyOneValue<Type>(source, row, out_valid, out_values, out_offset + row);
+        row++;
+        return Status::OK();
+      },
+      [&]() {
+        // Index is null, but we should still initialize the output with some value
+        const auto& source = batch.values[1];
+        CopyOneValue<Type>(source, row, out_valid, out_values, out_offset + row);
+        BitUtil::ClearBit(out_valid, out_offset + row);
+        row++;
+        return Status::OK();
+      });
+}
+
+template <typename Type, typename Enable = void>
+struct ChooseFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch.values[0].is_scalar()) {
+      return ExecScalarChoose<Type>(ctx, batch, out);
+    }
+    return ExecArrayChoose<Type>(ctx, batch, out);
+  }
+};
+
+template <>
+struct ChooseFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch.values[0].is_scalar()) {
+      const auto& index_scalar = *batch[0].scalar();
+      if (!index_scalar.is_valid) {
+        if (out->is_array()) {
+          ARROW_ASSIGN_OR_RAISE(
+              auto temp_array,
+              MakeArrayOfNull(out->type(), batch.length, ctx->memory_pool()));
+          *out->mutable_array() = *temp_array->data();
+        }
+        return Status::OK();
+      }
+      auto index = UnboxScalar<Int64Type>::Unbox(index_scalar);
+      if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+        return Status::IndexError("choose: index ", index, " out of range");
+      }
+      auto source = batch.values[index + 1];
+      if (source.is_scalar() && out->is_array()) {
+        ARROW_ASSIGN_OR_RAISE(
+            auto temp_array,
+            MakeArrayFromScalar(*source.scalar(), batch.length, ctx->memory_pool()));
+        *out->mutable_array() = *temp_array->data();
+      } else {
+        *out = source;
+      }
+      return Status::OK();
+    }
+
+    // Row-wise implementation
+    BuilderType builder(out->type(), ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    int64_t reserve_data = 0;
+    for (const auto& value : batch.values) {
+      if (value.is_scalar()) {
+        if (!value.scalar()->is_valid) continue;
+        const auto row_length =
+            checked_cast<const BaseBinaryScalar&>(*value.scalar()).value->size();
+        reserve_data = std::max<int64_t>(reserve_data, batch.length * row_length);
+        continue;
+      }
+      const ArrayData& arr = *value.array();
+      const offset_type* offsets = arr.GetValues<offset_type>(1);
+      const offset_type values_length = offsets[arr.length] - offsets[0];
+      reserve_data = std::max<int64_t>(reserve_data, values_length);
+    }
+    RETURN_NOT_OK(builder.ReserveData(reserve_data));
+    int64_t row = 0;
+    RETURN_NOT_OK(VisitArrayValuesInline<Int64Type>(
+        *batch[0].array(),
+        [&](int64_t index) {
+          if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+            return Status::IndexError("choose: index ", index, " out of range");
+          }
+          const auto& source = batch.values[index + 1];
+          return CopyValue(source, &builder, row++);
+        },
+        [&]() {
+          row++;
+          return builder.AppendNull();
+        }));
+    auto actual_type = out->type();
+    std::shared_ptr<Array> temp_output;
+    RETURN_NOT_OK(builder.Finish(&temp_output));
+    ArrayData* output = out->mutable_array();
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = std::move(actual_type);
+    return Status::OK();
+  }
+
+  static Status CopyValue(const Datum& datum, BuilderType* builder, int64_t row) {
+    if (datum.is_scalar()) {
+      const auto& scalar = checked_cast<const BaseBinaryScalar&>(*datum.scalar());
+      if (!scalar.value) return builder->AppendNull();
+      return builder->Append(scalar.value->data(),
+                             static_cast<offset_type>(scalar.value->size()));
+    }
+    const ArrayData& source = *datum.array();
+    if (!source.MayHaveNulls() ||
+        BitUtil::GetBit(source.buffers[0]->data(), source.offset + row)) {
+      const uint8_t* data = source.buffers[2]->data();
+      const offset_type* offsets = source.GetValues<offset_type>(1);
+      const offset_type offset0 = offsets[row];
+      const offset_type offset1 = offsets[row + 1];
+      return builder->Append(data + offset0, offset1 - offset0);
+    }
+    return builder->AppendNull();
+  }
+};
+
+struct ChooseFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    // The first argument is always int64 or promoted to it. The kernel is dispatched
+    // based on the type of the rest of the arguments.
+    RETURN_NOT_OK(CheckArity(*values));
+    EnsureDictionaryDecoded(values);
+    if (values->front().type->id() != Type::INT64) {
+      values->front().type = int64();
+    }
+    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) {
+      for (auto it = values->begin() + 1; it != values->end(); it++) {
+        it->type = type;
+      }
+    }
+    if (auto kernel = DispatchExactImpl(this, {values->back()})) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) {
+  ValueDescr result = descrs.back();
+  result.shape = GetBroadcastShape(descrs);
+  return result;
+}
+
+void AddCaseWhenKernel(const std::shared_ptr<CaseWhenFunction>& scalar_function,
+                       detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(
+      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)},
+                            OutputType(LastType),
+                            /*is_varargs=*/true),
+      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveCaseWhenKernels(const std::shared_ptr<CaseWhenFunction>& scalar_function,
+                                 const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<CaseWhenFunctor>(*type);
+    AddCaseWhenKernel(scalar_function, type, std::move(exec));
+  }
+}
+
+void AddCoalesceKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
+                       detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, OutputType(FirstType),
+                                            /*is_varargs=*/true),
+                      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveCoalesceKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                                 const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<CoalesceFunctor>(*type);
+    AddCoalesceKernel(scalar_function, type, std::move(exec));
+  }
+}
+
+void AddChooseKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
+                     detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(
+      KernelSignature::Make({Type::INT64, InputType(get_id.id)}, OutputType(LastType),
+                            /*is_varargs=*/true),
+      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveChooseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                               const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<ChooseFunctor>(*type);
+    AddChooseKernel(scalar_function, type, std::move(exec));
+  }
+}
+
+const FunctionDoc if_else_doc{"Choose values based on a condition",
+                              ("`cond` must be a Boolean scalar/ array. \n`left` or "
+                               "`right` must be of the same type scalar/ array.\n"
+                               "`null` values in `cond` will be promoted to the"
+                               " output."),
+                              {"cond", "left", "right"}};
+
+const FunctionDoc case_when_doc{
+    "Choose values based on multiple conditions",
+    ("`cond` must be a struct of Boolean values. `cases` can be a mix "
+     "of scalar and array arguments (of any type, but all must be the "
+     "same type or castable to a common type), with either exactly one "
+     "datum per child of `cond`, or one more `cases` than children of "
+     "`cond` (in which case we have an \"else\" value).\n"
+     "Each row of the output will be the corresponding value of the "
+     "first datum in `cases` for which the corresponding child of `cond` "
+     "is true, or otherwise the \"else\" value (if given), or null. "
+     "Essentially, this implements a switch-case or if-else, if-else... "
+     "statement."),
+    {"cond", "*cases"}};
+
+const FunctionDoc coalesce_doc{
+    "Select the first non-null value in each slot",
+    ("Each row of the output will be the value from the first corresponding input "
+     "for which the value is not null. If all inputs are null in a row, the output "
+     "will be null."),
+    {"*values"}};
+
+const FunctionDoc choose_doc{
+    "Given indices and arrays, choose the value from the corresponding array for each "
+    "index",
+    ("For each row, the value of the first argument is used as a 0-based index into the "
+     "rest of the arguments (i.e. index 0 selects the second argument). The output value "
+     "is the corresponding value of the selected argument.\n"
+     "If an index is null, the output will be null."),
+    {"indices", "*values"}};
+}  // namespace
+
+void RegisterScalarIfElse(FunctionRegistry* registry) {
+  {
+    auto func =
+        std::make_shared<IfElseFunction>("if_else", Arity::Ternary(), &if_else_doc);
+
+    AddPrimitiveIfElseKernels(func, NumericTypes());
+    AddPrimitiveIfElseKernels(func, TemporalTypes());
+    AddPrimitiveIfElseKernels(func, IntervalTypes());
+    AddPrimitiveIfElseKernels(func, {boolean()});
+    AddNullIfElseKernel(func);
+    AddBinaryIfElseKernels(func, BaseBinaryTypes());
+    AddFSBinaryIfElseKernel(func);
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<CaseWhenFunction>(
+        "case_when", Arity::VarArgs(/*min_args=*/1), &case_when_doc);
+    AddPrimitiveCaseWhenKernels(func, NumericTypes());
+    AddPrimitiveCaseWhenKernels(func, TemporalTypes());
+    AddPrimitiveCaseWhenKernels(func, IntervalTypes());
+    AddPrimitiveCaseWhenKernels(func, {boolean(), null()});
+    AddCaseWhenKernel(func, Type::FIXED_SIZE_BINARY,
+                      CaseWhenFunctor<FixedSizeBinaryType>::Exec);
+    AddCaseWhenKernel(func, Type::DECIMAL128, CaseWhenFunctor<Decimal128Type>::Exec);
+    AddCaseWhenKernel(func, Type::DECIMAL256, CaseWhenFunctor<Decimal256Type>::Exec);
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<CoalesceFunction>(
+        "coalesce", Arity::VarArgs(/*min_args=*/1), &coalesce_doc);
+    AddPrimitiveCoalesceKernels(func, NumericTypes());
+    AddPrimitiveCoalesceKernels(func, TemporalTypes());
+    AddPrimitiveCoalesceKernels(func, IntervalTypes());
+    AddPrimitiveCoalesceKernels(func, {boolean(), null()});
+    AddCoalesceKernel(func, Type::FIXED_SIZE_BINARY,
+                      CoalesceFunctor<FixedSizeBinaryType>::Exec);
+    AddCoalesceKernel(func, Type::DECIMAL128, CoalesceFunctor<Decimal128Type>::Exec);
+    AddCoalesceKernel(func, Type::DECIMAL256, CoalesceFunctor<Decimal256Type>::Exec);
+    for (const auto& ty : BaseBinaryTypes()) {
+      AddCoalesceKernel(func, ty, GenerateTypeAgnosticVarBinaryBase<CoalesceFunctor>(ty));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ChooseFunction>("choose", Arity::VarArgs(/*min_args=*/2),
+                                                 &choose_doc);
+    AddPrimitiveChooseKernels(func, NumericTypes());
+    AddPrimitiveChooseKernels(func, TemporalTypes());
+    AddPrimitiveChooseKernels(func, IntervalTypes());
+    AddPrimitiveChooseKernels(func, {boolean(), null()});
+    AddChooseKernel(func, Type::FIXED_SIZE_BINARY,
+                    ChooseFunctor<FixedSizeBinaryType>::Exec);
+    AddChooseKernel(func, Type::DECIMAL128, ChooseFunctor<Decimal128Type>::Exec);
+    AddChooseKernel(func, Type::DECIMAL256, ChooseFunctor<Decimal256Type>::Exec);
+    for (const auto& ty : BaseBinaryTypes()) {
+      AddChooseKernel(func, ty, GenerateTypeAgnosticVarBinaryBase<ChooseFunctor>(ty));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
new file mode 100644
index 00000000000..9b59d54c3da
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -0,0 +1,350 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <benchmark/benchmark.h>
+
+#include "arrow/array/concatenate.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/key_value_metadata.h"
+
+namespace arrow {
+namespace compute {
+
+const int64_t kNumItems = 1024 * 1024;
+
+template <typename Type, typename Enable = void>
+struct SetBytesProcessed {};
+
+template <typename Type>
+struct SetBytesProcessed<Type, enable_if_number<Type>> {
+  static void Set(const std::shared_ptr<Array>& cond, const std::shared_ptr<Array>& left,
+                  const std::shared_ptr<Array>& right, benchmark::State* state) {
+    using CType = typename Type::c_type;
+    state->SetBytesProcessed(state->iterations() *
+                             (cond->length() / 8 + 2 * cond->length() * sizeof(CType)));
+  }
+};
+
+template <typename Type>
+struct SetBytesProcessed<Type, enable_if_base_binary<Type>> {
+  static void Set(const std::shared_ptr<Array>& cond, const std::shared_ptr<Array>& left,
+                  const std::shared_ptr<Array>& right, benchmark::State* state) {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    using OffsetType = typename TypeTraits<Type>::OffsetType::c_type;
+
+    state->SetBytesProcessed(
+        state->iterations() *
+        (cond->length() / 8 + 2 * cond->length() * sizeof(OffsetType) +
+         std::static_pointer_cast<ArrayType>(left)->total_values_length() +
+         std::static_pointer_cast<ArrayType>(right)->total_values_length()));
+  }
+};
+
+template <typename Type>
+static void IfElseBench(benchmark::State& state) {
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  auto cond = std::static_pointer_cast<BooleanArray>(
+                  rand.ArrayOf(boolean(), len, /*null_probability=*/0.01))
+                  ->Slice(offset);
+  auto left = std::static_pointer_cast<ArrayType>(
+                  rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                  ->Slice(offset);
+  auto right = std::static_pointer_cast<ArrayType>(
+                   rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                   ->Slice(offset);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(IfElse(cond, left, right));
+  }
+
+  SetBytesProcessed<Type>::Set(cond, left, right, &state);
+}
+
+template <typename Type>
+static void IfElseBenchContiguous(benchmark::State& state) {
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), len / 2));
+  ASSERT_OK_AND_ASSIGN(auto temp2,
+                       MakeArrayFromScalar(BooleanScalar(false), len - len / 2));
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat)->Slice(offset);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  auto left = std::static_pointer_cast<ArrayType>(
+                  rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                  ->Slice(offset);
+  auto right = std::static_pointer_cast<ArrayType>(
+                   rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                   ->Slice(offset);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(IfElse(cond, left, right));
+  }
+
+  SetBytesProcessed<Type>::Set(cond, left, right, &state);
+}
+
+static void IfElseBench64(benchmark::State& state) {
+  return IfElseBench<UInt64Type>(state);
+}
+
+static void IfElseBench32(benchmark::State& state) {
+  return IfElseBench<UInt32Type>(state);
+}
+
+static void IfElseBenchString32(benchmark::State& state) {
+  return IfElseBench<StringType>(state);
+}
+
+static void IfElseBenchString64(benchmark::State& state) {
+  return IfElseBench<LargeStringType>(state);
+}
+
+static void IfElseBench64Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt64Type>(state);
+}
+
+static void IfElseBench32Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt32Type>(state);
+}
+
+static void IfElseBenchString64Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt64Type>(state);
+}
+
+static void IfElseBenchString32Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt32Type>(state);
+}
+
+template <typename Type>
+static void CaseWhenBench(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  auto cond1 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto cond2 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto cond3 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto cond_field =
+      field("cond", boolean(), key_value_metadata({{"null_probability", "0.01"}}));
+  auto cond = rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}),
+                                  key_value_metadata({{"null_probability", "0.0"}})),
+                           len);
+  auto val1 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val2 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val3 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val4 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  for (auto _ : state) {
+    ABORT_NOT_OK(
+        CaseWhen(cond->Slice(offset), {val1->Slice(offset), val2->Slice(offset),
+                                       val3->Slice(offset), val4->Slice(offset)}));
+  }
+
+  state.SetBytesProcessed(state.iterations() * (len - offset) * sizeof(CType));
+}
+
+template <typename Type>
+static void CaseWhenBenchContiguous(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  ASSERT_OK_AND_ASSIGN(auto trues, MakeArrayFromScalar(BooleanScalar(true), len / 3));
+  ASSERT_OK_AND_ASSIGN(auto falses, MakeArrayFromScalar(BooleanScalar(false), len / 3));
+  ASSERT_OK_AND_ASSIGN(auto nulls, MakeArrayOfNull(boolean(), len - 2 * (len / 3)));
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({trues, falses, nulls}));
+  auto cond1 = std::static_pointer_cast<BooleanArray>(concat);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  auto cond2 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto val1 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val2 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val3 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  ASSERT_OK_AND_ASSIGN(
+      auto cond, StructArray::Make({cond1, cond2}, std::vector<std::string>{"a", "b"},
+                                   nullptr, /*null_count=*/0));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CaseWhen(cond->Slice(offset), {val1->Slice(offset), val2->Slice(offset),
+                                                val3->Slice(offset)}));
+  }
+
+  state.SetBytesProcessed(state.iterations() * (len - offset) * sizeof(CType));
+}
+
+static void CaseWhenBench64(benchmark::State& state) {
+  return CaseWhenBench<UInt64Type>(state);
+}
+
+static void CaseWhenBench64Contiguous(benchmark::State& state) {
+  return CaseWhenBenchContiguous<UInt64Type>(state);
+}
+
+template <typename Type>
+static void CoalesceBench(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  std::vector<Datum> arguments;
+  for (int i = 0; i < 4; i++) {
+    arguments.emplace_back(
+        rand.ArrayOf(type, len, /*null_probability=*/0.25)->Slice(offset));
+  }
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("coalesce", arguments));
+  }
+
+  state.SetBytesProcessed(state.iterations() * arguments.size() * (len - offset) *
+                          sizeof(CType));
+}
+
+template <typename Type>
+static void CoalesceNonNullBench(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  std::vector<Datum> arguments;
+  arguments.emplace_back(
+      rand.ArrayOf(type, len, /*null_probability=*/0.25)->Slice(offset));
+  arguments.emplace_back(rand.ArrayOf(type, len, /*null_probability=*/0)->Slice(offset));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("coalesce", arguments));
+  }
+
+  state.SetBytesProcessed(state.iterations() * arguments.size() * (len - offset) *
+                          sizeof(CType));
+}
+
+static void CoalesceBench64(benchmark::State& state) {
+  return CoalesceBench<Int64Type>(state);
+}
+
+static void CoalesceNonNullBench64(benchmark::State& state) {
+  return CoalesceBench<Int64Type>(state);
+}
+
+template <typename Type>
+static void ChooseBench(benchmark::State& state) {
+  constexpr int kNumChoices = 5;
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  std::vector<Datum> arguments;
+  arguments.emplace_back(
+      rand.Int64(len, /*min=*/0, /*max=*/kNumChoices - 1, /*null_probability=*/0.1)
+          ->Slice(offset));
+  for (int i = 0; i < kNumChoices; i++) {
+    arguments.emplace_back(
+        rand.ArrayOf(type, len, /*null_probability=*/0.25)->Slice(offset));
+  }
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("choose", arguments));
+  }
+
+  state.SetBytesProcessed(state.iterations() * (len - offset) * sizeof(CType));
+}
+
+static void ChooseBench64(benchmark::State& state) {
+  return ChooseBench<Int64Type>(state);
+}
+
+BENCHMARK(IfElseBench32)->Args({kNumItems, 0});
+BENCHMARK(IfElseBench64)->Args({kNumItems, 0});
+
+BENCHMARK(IfElseBench32)->Args({kNumItems, 99});
+BENCHMARK(IfElseBench64)->Args({kNumItems, 99});
+
+BENCHMARK(IfElseBench32Contiguous)->Args({kNumItems, 0});
+BENCHMARK(IfElseBench64Contiguous)->Args({kNumItems, 0});
+
+BENCHMARK(IfElseBench32Contiguous)->Args({kNumItems, 99});
+BENCHMARK(IfElseBench64Contiguous)->Args({kNumItems, 99});
+
+BENCHMARK(IfElseBenchString32)->Args({kNumItems, 0});
+BENCHMARK(IfElseBenchString64)->Args({kNumItems, 0});
+
+BENCHMARK(IfElseBenchString32Contiguous)->Args({kNumItems, 99});
+BENCHMARK(IfElseBenchString64Contiguous)->Args({kNumItems, 99});
+
+BENCHMARK(CaseWhenBench64)->Args({kNumItems, 0});
+BENCHMARK(CaseWhenBench64)->Args({kNumItems, 99});
+
+BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 0});
+BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 99});
+
+BENCHMARK(CoalesceBench64)->Args({kNumItems, 0});
+BENCHMARK(CoalesceBench64)->Args({kNumItems, 99});
+
+BENCHMARK(CoalesceNonNullBench64)->Args({kNumItems, 0});
+BENCHMARK(CoalesceNonNullBench64)->Args({kNumItems, 99});
+
+BENCHMARK(ChooseBench64)->Args({kNumItems, 0});
+BENCHMARK(ChooseBench64)->Args({kNumItems, 99});
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
new file mode 100644
index 00000000000..f06a6822a0f
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -0,0 +1,1244 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/array.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/registry.h"
+#include "arrow/testing/gtest_util.h"
+
+namespace arrow {
+namespace compute {
+
+void CheckIfElseOutput(const Datum& cond, const Datum& left, const Datum& right,
+                       const Datum& expected) {
+  ASSERT_OK_AND_ASSIGN(Datum datum_out, IfElse(cond, left, right));
+  if (datum_out.is_array()) {
+    std::shared_ptr<Array> result = datum_out.make_array();
+    ValidateOutput(*result);
+    std::shared_ptr<Array> expected_ = expected.make_array();
+    AssertArraysEqual(*expected_, *result, /*verbose=*/true);
+  } else {  // expecting scalar
+    const std::shared_ptr<Scalar>& result = datum_out.scalar();
+    const std::shared_ptr<Scalar>& expected_ = expected.scalar();
+    AssertScalarsEqual(*expected_, *result, /*verbose=*/true);
+  }
+}
+
+class TestIfElseKernel : public ::testing::Test {};
+
+template <typename Type>
+class TestIfElsePrimitive : public ::testing::Test {};
+
+using NumericBasedTypes =
+    ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
+                     Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type,
+                     Time32Type, Time64Type, TimestampType, MonthIntervalType>;
+
+TYPED_TEST_SUITE(TestIfElsePrimitive, NumericBasedTypes);
+
+TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  auto type = default_type_instance<TypeParam>();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+
+  // adding 64 consecutive 1's and 0's in the cond array to test all-true/ all-false
+  // word code paths
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64));
+  ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64));
+  auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01);
+
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+  auto left = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  typename TypeTraits<TypeParam>::BuilderType builder(type, default_memory_pool());
+
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    if (cond->Value(i)) {
+      ASSERT_OK(builder.Append(left->Value(i)));
+    } else {
+      ASSERT_OK(builder.Append(right->Value(i)));
+    }
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
+void CheckWithDifferentShapes(const std::shared_ptr<Array>& cond,
+                              const std::shared_ptr<Array>& left,
+                              const std::shared_ptr<Array>& right,
+                              const std::shared_ptr<Array>& expected) {
+  // this will check for whole arrays, every scalar at i'th index and slicing (offset)
+  CheckScalar("if_else", {cond, left, right}, expected);
+
+  auto len = left->length();
+
+  enum { COND_SCALAR = 1, LEFT_SCALAR = 2, RIGHT_SCALAR = 4 };
+  for (int mask = 0; mask < (COND_SCALAR | LEFT_SCALAR | RIGHT_SCALAR); ++mask) {
+    for (int64_t cond_idx = 0; cond_idx < len; ++cond_idx) {
+      Datum cond_in, cond_bcast;
+      std::string trace_cond = "Cond";
+      if (mask & COND_SCALAR) {
+        ASSERT_OK_AND_ASSIGN(cond_in, cond->GetScalar(cond_idx));
+        ASSERT_OK_AND_ASSIGN(cond_bcast, MakeArrayFromScalar(*cond_in.scalar(), len));
+        trace_cond += "@" + std::to_string(cond_idx) + "=" + cond_in.scalar()->ToString();
+      } else {
+        cond_in = cond_bcast = cond;
+      }
+      SCOPED_TRACE(trace_cond);
+
+      for (int64_t left_idx = 0; left_idx < len; ++left_idx) {
+        Datum left_in, left_bcast;
+        std::string trace_left = "Left";
+        if (mask & LEFT_SCALAR) {
+          ASSERT_OK_AND_ASSIGN(left_in, left->GetScalar(left_idx).As<Datum>());
+          ASSERT_OK_AND_ASSIGN(left_bcast, MakeArrayFromScalar(*left_in.scalar(), len));
+          trace_cond +=
+              "@" + std::to_string(left_idx) + "=" + left_in.scalar()->ToString();
+        } else {
+          left_in = left_bcast = left;
+        }
+        SCOPED_TRACE(trace_left);
+
+        for (int64_t right_idx = 0; right_idx < len; ++right_idx) {
+          Datum right_in, right_bcast;
+          std::string trace_right = "Right";
+          if (mask & RIGHT_SCALAR) {
+            ASSERT_OK_AND_ASSIGN(right_in, right->GetScalar(right_idx));
+            ASSERT_OK_AND_ASSIGN(right_bcast,
+                                 MakeArrayFromScalar(*right_in.scalar(), len));
+            trace_right +=
+                "@" + std::to_string(right_idx) + "=" + right_in.scalar()->ToString();
+          } else {
+            right_in = right_bcast = right;
+          }
+          SCOPED_TRACE(trace_right);
+
+          ASSERT_OK_AND_ASSIGN(auto exp, IfElse(cond_bcast, left_bcast, right_bcast));
+          ASSERT_OK_AND_ASSIGN(auto actual, IfElse(cond_in, left_in, right_in));
+          AssertDatumsEqual(exp, actual, /*verbose=*/true);
+
+          if (right_in.is_array()) break;
+        }
+        if (left_in.is_array()) break;
+      }
+      if (cond_in.is_array()) break;
+    }
+  }  // for (mask)
+}
+
+TYPED_TEST(TestIfElsePrimitive, IfElseFixedSize) {
+  auto type = default_type_instance<TypeParam>();
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 8]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[1, 2, 3, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[1, 2, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[1, 2, null, 8]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[null, 2, null, 8]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[null, 2, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[null, 2, 3, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[null, 2, 3, 8]"));
+}
+
+TEST_F(TestIfElseKernel, IfElseBoolean) {
+  auto type = boolean();
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[false, false, false, true]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[false, false, false, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[false, false, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[false, false, null, true]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[null, false, null, true]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[null, false, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[null, false, false, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[null, false, false, true]"));
+}
+
+TEST_F(TestIfElseKernel, IfElseBooleanRand) {
+  auto type = boolean();
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+  auto cond = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto left = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  BooleanBuilder builder;
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    if (cond->Value(i)) {
+      ASSERT_OK(builder.Append(left->Value(i)));
+    } else {
+      ASSERT_OK(builder.Append(right->Value(i)));
+    }
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
+TEST_F(TestIfElseKernel, IfElseNull) {
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, null, null, null]"),
+                           ArrayFromJSON(null(), "[null, null, null, null]"),
+                           ArrayFromJSON(null(), "[null, null, null, null]"),
+                           ArrayFromJSON(null(), "[null, null, null, null]"));
+}
+
+TEST_F(TestIfElseKernel, IfElseMultiType) {
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(int32(), "[1, 2, 3, 4]"),
+                           ArrayFromJSON(float32(), "[5, 6, 7, 8]"),
+                           ArrayFromJSON(float32(), "[1, 2, 3, 8]"));
+}
+
+TEST_F(TestIfElseKernel, IfElseDispatchBest) {
+  std::string name = "if_else";
+  CheckDispatchBest(name, {boolean(), int32(), int32()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), null()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), null(), int32()}, {boolean(), int32(), int32()});
+
+  CheckDispatchBest(name, {boolean(), int32(), int8()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), int16()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), int32()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), int64()}, {boolean(), int64(), int64()});
+
+  CheckDispatchBest(name, {boolean(), int32(), uint8()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), uint16()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), uint32()}, {boolean(), int64(), int64()});
+  CheckDispatchBest(name, {boolean(), int32(), uint64()}, {boolean(), int64(), int64()});
+
+  CheckDispatchBest(name, {boolean(), uint8(), uint8()}, {boolean(), uint8(), uint8()});
+  CheckDispatchBest(name, {boolean(), uint8(), uint16()},
+                    {boolean(), uint16(), uint16()});
+
+  CheckDispatchBest(name, {boolean(), int32(), float32()},
+                    {boolean(), float32(), float32()});
+  CheckDispatchBest(name, {boolean(), float32(), int64()},
+                    {boolean(), float32(), float32()});
+  CheckDispatchBest(name, {boolean(), float64(), int32()},
+                    {boolean(), float64(), float64()});
+
+  CheckDispatchBest(name, {null(), uint8(), int8()}, {boolean(), int16(), int16()});
+}
+
+template <typename Type>
+class TestIfElseBaseBinary : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestIfElseBaseBinary, BinaryTypes);
+
+TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinary) {
+  auto type = TypeTraits<TypeParam>::type_singleton();
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "l"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "l"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"([null, "ab", null, "l"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"([null, "ab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"([null, "ab", "abc", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"([null, "ab", "abc", "l"])"));
+}
+
+TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  using OffsetType = typename TypeTraits<TypeParam>::OffsetType::c_type;
+  auto type = TypeTraits<TypeParam>::type_singleton();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+
+  //  this is to check the BitBlockCount::AllSet/ NoneSet code paths
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64));
+  ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64));
+  auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01);
+
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+
+  auto left = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  typename TypeTraits<TypeParam>::BuilderType builder;
+
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    OffsetType offset;
+    const uint8_t* val;
+    if (cond->Value(i)) {
+      val = left->GetValue(i, &offset);
+    } else {
+      val = right->GetValue(i, &offset);
+    }
+    ASSERT_OK(builder.Append(val, offset));
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
+TEST_F(TestIfElseKernel, IfElseFSBinary) {
+  auto type = std::make_shared<FixedSizeBinaryType>(4);
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "llll"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "llll"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"([null, "abab", null, "llll"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"([null, "abab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"([null, "abab", "abca", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"([null, "abab", "abca", "llll"])"));
+
+  // should fails for non-equal byte_widths
+  auto type1 = std::make_shared<FixedSizeBinaryType>(5);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("FixedSizeBinaryType byte_widths should be equal"),
+      CallFunction("if_else", {ArrayFromJSON(boolean(), "[true]"),
+                               ArrayFromJSON(type, R"(["aaaa"])"),
+                               ArrayFromJSON(type1, R"(["aaaaa"])")}));
+}
+
+TEST_F(TestIfElseKernel, IfElseFSBinaryRand) {
+  auto type = std::make_shared<FixedSizeBinaryType>(5);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+
+  //  this is to check the BitBlockCount::AllSet/ NoneSet code paths
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64));
+  ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64));
+  auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01);
+
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+
+  auto left = std::static_pointer_cast<FixedSizeBinaryArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<FixedSizeBinaryArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  FixedSizeBinaryBuilder builder(type);
+
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    const uint8_t* val;
+    if (cond->Value(i)) {
+      val = left->GetValue(i);
+    } else {
+      val = right->GetValue(i);
+    }
+    ASSERT_OK(builder.Append(val));
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
+template <typename Type>
+class TestCaseWhenNumeric : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestCaseWhenNumeric, NumericBasedTypes);
+
+Datum MakeStruct(const std::vector<Datum>& conds) {
+  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
+  return result;
+}
+
+TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
+  auto type = default_type_instance<TypeParam>();
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "1");
+  auto scalar2 = ScalarFromJSON(type, "2");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[3, null, 5, 6]");
+  auto values2 = ArrayFromJSON(type, "[7, 8, null, 10]");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, "[1, 1, 2, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, "[null, null, 1, 1]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, "[1, 1, 2, 1]"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, "[3, null, null, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, "[3, null, null, 6]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, "[null, null, null, 6]"));
+
+  CheckScalar(
+      "case_when",
+      {MakeStruct(
+           {ArrayFromJSON(boolean(),
+                          "[true, true, true, false, false, false, null, null, null]"),
+            ArrayFromJSON(boolean(),
+                          "[true, false, null, true, false, null, true, false, null]")}),
+       ArrayFromJSON(type, "[10, 11, 12, 13, 14, 15, 16, 17, 18]"),
+       ArrayFromJSON(type, "[20, 21, 22, 23, 24, 25, 26, 27, 28]")},
+      ArrayFromJSON(type, "[10, 11, 12, 23, null, null, 26, null, null]"));
+  CheckScalar(
+      "case_when",
+      {MakeStruct(
+           {ArrayFromJSON(boolean(),
+                          "[true, true, true, false, false, false, null, null, null]"),
+            ArrayFromJSON(boolean(),
+                          "[true, false, null, true, false, null, true, false, null]")}),
+       ArrayFromJSON(type, "[10, 11, 12, 13, 14, 15, 16, 17, 18]"),
+
+       ArrayFromJSON(type, "[20, 21, 22, 23, 24, 25, 26, 27, 28]"),
+       ArrayFromJSON(type, "[30, 31, 32, 33, 34, null, 36, 37, null]")},
+      ArrayFromJSON(type, "[10, 11, 12, 23, 34, null, 26, 37, null]"));
+
+  // Error cases
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("cond struct must not be null"),
+      CallFunction(
+          "case_when",
+          {Datum(std::make_shared<StructScalar>(struct_({field("", boolean())}))),
+           Datum(scalar1)}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("cond struct must not have top-level nulls"),
+      CallFunction(
+          "case_when",
+          {Datum(*MakeArrayOfNull(struct_({field("", boolean())}), 4)), Datum(values1)}));
+}
+
+TEST(TestCaseWhen, Null) {
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_arr = ArrayFromJSON(boolean(), "[true, true, false, null]");
+  auto scalar = ScalarFromJSON(null(), "null");
+  auto array = ArrayFromJSON(null(), "[null, null, null, null]");
+  CheckScalar("case_when", {MakeStruct({}), array}, array);
+  CheckScalar("case_when", {MakeStruct({cond_false}), array}, array);
+  CheckScalar("case_when", {MakeStruct({cond_true}), array, array}, array);
+  CheckScalar("case_when", {MakeStruct({cond_arr, cond_true}), array, array}, array);
+}
+
+TEST(TestCaseWhen, Boolean) {
+  auto type = boolean();
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "true");
+  auto scalar2 = ScalarFromJSON(type, "false");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[true, null, true, true]");
+  auto values2 = ArrayFromJSON(type, "[false, false, null, false]");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, "[true, true, false, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, "[null, null, true, true]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, "[true, true, false, true]"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, "[true, null, null, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, "[true, null, null, true]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, "[null, null, null, true]"));
+}
+
+TEST(TestCaseWhen, DayTimeInterval) {
+  auto type = day_time_interval();
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "[1, 1]");
+  auto scalar2 = ScalarFromJSON(type, "[2, 2]");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[[3, 3], null, [5, 5], [6, 6]]");
+  auto values2 = ArrayFromJSON(type, "[[7, 7], [8, 8], null, [10, 10]]");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, "[[1, 1], [1, 1], [2, 2], null]"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, "[null, null, [1, 1], [1, 1]]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, "[[1, 1], [1, 1], [2, 2], [1, 1]]"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, "[[3, 3], null, null, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, "[[3, 3], null, null, [6, 6]]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, "[null, null, null, [6, 6]]"));
+}
+
+TEST(TestCaseWhen, Decimal) {
+  for (const auto& type :
+       std::vector<std::shared_ptr<DataType>>{decimal128(3, 2), decimal256(3, 2)}) {
+    auto cond_true = ScalarFromJSON(boolean(), "true");
+    auto cond_false = ScalarFromJSON(boolean(), "false");
+    auto cond_null = ScalarFromJSON(boolean(), "null");
+    auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+    auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+    auto scalar_null = ScalarFromJSON(type, "null");
+    auto scalar1 = ScalarFromJSON(type, R"("1.23")");
+    auto scalar2 = ScalarFromJSON(type, R"("2.34")");
+    auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+    auto values1 = ArrayFromJSON(type, R"(["3.45", null, "5.67", "6.78"])");
+    auto values2 = ArrayFromJSON(type, R"(["7.89", "8.90", null, "1.01"])");
+
+    CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+    CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+    CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+    CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+    CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+    CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+    CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+    CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+    CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+                values1);
+    CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+                values_null);
+    CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+                values1);
+    CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+                values2);
+    CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+                values2);
+    CheckScalar("case_when",
+                {MakeStruct({cond_false, cond_false}), values1, values2, values2},
+                values2);
+
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "2.34", null])"));
+    CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+    CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+                ArrayFromJSON(type, R"([null, null, "1.23", "1.23"])"));
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "2.34", "1.23"])"));
+
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+                ArrayFromJSON(type, R"(["3.45", null, null, null])"));
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+                ArrayFromJSON(type, R"(["3.45", null, null, "6.78"])"));
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+                ArrayFromJSON(type, R"([null, null, null, "6.78"])"));
+  }
+}
+
+TEST(TestCaseWhen, FixedSizeBinary) {
+  auto type = fixed_size_binary(3);
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, R"("abc")");
+  auto scalar2 = ScalarFromJSON(type, R"("bcd")");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, R"(["cde", null, "def", "efg"])");
+  auto values2 = ArrayFromJSON(type, R"(["fgh", "ghi", null, "hij"])");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, R"(["abc", "abc", "bcd", null])"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, R"([null, null, "abc", "abc"])"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, R"(["abc", "abc", "bcd", "abc"])"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, R"(["cde", null, null, null])"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, R"(["cde", null, null, "efg"])"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, R"([null, null, null, "efg"])"));
+}
+
+TEST(TestCaseWhen, DispatchBest) {
+  CheckDispatchBest("case_when", {struct_({field("", boolean())}), int64(), int32()},
+                    {struct_({field("", boolean())}), int64(), int64()});
+
+  ASSERT_RAISES(Invalid, CallFunction("case_when", {}));
+  // Too many/too few conditions
+  ASSERT_RAISES(
+      Invalid, CallFunction("case_when", {MakeStruct({ArrayFromJSON(boolean(), "[]")})}));
+  ASSERT_RAISES(Invalid,
+                CallFunction("case_when", {MakeStruct({}), ArrayFromJSON(int64(), "[]"),
+                                           ArrayFromJSON(int64(), "[]")}));
+  // Conditions must be struct of boolean
+  ASSERT_RAISES(TypeError,
+                CallFunction("case_when", {MakeStruct({ArrayFromJSON(int64(), "[]")}),
+                                           ArrayFromJSON(int64(), "[]")}));
+  ASSERT_RAISES(TypeError, CallFunction("case_when", {ArrayFromJSON(boolean(), "[true]"),
+                                                      ArrayFromJSON(int32(), "[0]")}));
+  // Values must have compatible types
+  ASSERT_RAISES(NotImplemented,
+                CallFunction("case_when", {MakeStruct({ArrayFromJSON(boolean(), "[]")}),
+                                           ArrayFromJSON(int64(), "[]"),
+                                           ArrayFromJSON(utf8(), "[]")}));
+}
+
+template <typename Type>
+class TestCoalesceNumeric : public ::testing::Test {};
+template <typename Type>
+class TestCoalesceBinary : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestCoalesceNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestCoalesceBinary, BinaryTypes);
+
+TYPED_TEST(TestCoalesceNumeric, FixedSize) {
+  auto type = default_type_instance<TypeParam>();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "20");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[null, 10, 11, 12]");
+  auto values2 = ArrayFromJSON(type, "[13, 14, 15, 16]");
+  auto values3 = ArrayFromJSON(type, "[17, 18, 19, null]");
+  // N.B. all-scalar cases are checked in CheckScalar
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, "[20, 20, 20, 20]"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1}, ArrayFromJSON(type, "[20, 10, 11, 12]"));
+  CheckScalar("coalesce", {values1, values2}, ArrayFromJSON(type, "[13, 10, 11, 12]"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, "[13, 10, 11, 12]"));
+  CheckScalar("coalesce", {scalar1, values1}, ArrayFromJSON(type, "[20, 20, 20, 20]"));
+}
+
+TYPED_TEST(TestCoalesceBinary, Basics) {
+  auto type = default_type_instance<TypeParam>();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, R"("a")");
+  auto values_null = ArrayFromJSON(type, R"([null, null, null, null])");
+  auto values1 = ArrayFromJSON(type, R"([null, "bc", "def", "ghij"])");
+  auto values2 = ArrayFromJSON(type, R"(["klmno", "p", "qr", "stu"])");
+  auto values3 = ArrayFromJSON(type, R"(["vwxy", "zabc", "d", null])");
+  // N.B. all-scalar cases are checked in CheckScalar
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, R"(["a", "a", "a", "a"])"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, R"(["a", "bc", "def", "ghij"])"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, R"(["klmno", "bc", "def", "ghij"])"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, R"(["klmno", "bc", "def", "ghij"])"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, R"(["a", "a", "a", "a"])"));
+}
+
+TEST(TestCoalesce, Null) {
+  auto type = null();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar_null}, values_null);
+}
+
+TEST(TestCoalesce, Boolean) {
+  auto type = boolean();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "false");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[null, true, false, true]");
+  auto values2 = ArrayFromJSON(type, "[true, false, true, false]");
+  auto values3 = ArrayFromJSON(type, "[false, true, false, null]");
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, "[false, false, false, false]"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, "[false, true, false, true]"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, "[true, true, false, true]"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, "[true, true, false, true]"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, "[false, false, false, false]"));
+}
+
+TEST(TestCoalesce, DayTimeInterval) {
+  auto type = day_time_interval();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "[1, 2]");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[null, [3, 4], [5, 6], [7, 8]]");
+  auto values2 = ArrayFromJSON(type, "[[9, 10], [11, 12], [13, 14], [15, 16]]");
+  auto values3 = ArrayFromJSON(type, "[[17, 18], [19, 20], [21, 22], null]");
+  // N.B. all-scalar cases are checked in CheckScalar
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, "[[1, 2], [1, 2], [1, 2], [1, 2]]"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, "[[1, 2], [3, 4], [5, 6], [7, 8]]"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, "[[9, 10], [3, 4], [5, 6], [7, 8]]"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, "[[9, 10], [3, 4], [5, 6], [7, 8]]"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, "[[1, 2], [1, 2], [1, 2], [1, 2]]"));
+}
+
+TEST(TestCoalesce, Decimal) {
+  for (const auto& type :
+       std::vector<std::shared_ptr<DataType>>{decimal128(3, 2), decimal256(3, 2)}) {
+    auto scalar_null = ScalarFromJSON(type, "null");
+    auto scalar1 = ScalarFromJSON(type, R"("1.23")");
+    auto values_null = ArrayFromJSON(type, R"([null, null, null, null])");
+    auto values1 = ArrayFromJSON(type, R"([null, "4.56", "7.89", "1.34"])");
+    auto values2 = ArrayFromJSON(type, R"(["1.45", "2.34", "3.45", "4.56"])");
+    auto values3 = ArrayFromJSON(type, R"(["5.67", "6.78", "7.91", null])");
+    CheckScalar("coalesce", {values_null}, values_null);
+    CheckScalar("coalesce", {values_null, scalar1},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "1.23", "1.23"])"));
+    CheckScalar("coalesce", {values_null, values1}, values1);
+    CheckScalar("coalesce", {values_null, values2}, values2);
+    CheckScalar("coalesce", {values1, values_null}, values1);
+    CheckScalar("coalesce", {values2, values_null}, values2);
+    CheckScalar("coalesce", {scalar_null, values1}, values1);
+    CheckScalar("coalesce", {values1, scalar_null}, values1);
+    CheckScalar("coalesce", {values2, values1, values_null}, values2);
+    CheckScalar("coalesce", {values1, scalar1},
+                ArrayFromJSON(type, R"(["1.23", "4.56", "7.89", "1.34"])"));
+    CheckScalar("coalesce", {values1, values2},
+                ArrayFromJSON(type, R"(["1.45", "4.56", "7.89", "1.34"])"));
+    CheckScalar("coalesce", {values1, values2, values3},
+                ArrayFromJSON(type, R"(["1.45", "4.56", "7.89", "1.34"])"));
+    CheckScalar("coalesce", {scalar1, values1},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "1.23", "1.23"])"));
+  }
+}
+
+TEST(TestCoalesce, FixedSizeBinary) {
+  auto type = fixed_size_binary(3);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, R"("abc")");
+  auto values_null = ArrayFromJSON(type, R"([null, null, null, null])");
+  auto values1 = ArrayFromJSON(type, R"([null, "def", "ghi", "jkl"])");
+  auto values2 = ArrayFromJSON(type, R"(["mno", "pqr", "stu", "vwx"])");
+  auto values3 = ArrayFromJSON(type, R"(["yza", "bcd", "efg", null])");
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, R"(["abc", "abc", "abc", "abc"])"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, R"(["abc", "def", "ghi", "jkl"])"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, R"(["mno", "def", "ghi", "jkl"])"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, R"(["mno", "def", "ghi", "jkl"])"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, R"(["abc", "abc", "abc", "abc"])"));
+}
+
+template <typename Type>
+class TestChooseNumeric : public ::testing::Test {};
+template <typename Type>
+class TestChooseBinary : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestChooseNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestChooseBinary, BinaryTypes);
+
+TYPED_TEST(TestChooseNumeric, FixedSize) {
+  auto type = default_type_instance<TypeParam>();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, "[10, 11, null, null, 14]");
+  auto values2 = ArrayFromJSON(type, "[20, 21, null, null, 24]");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, "[10, 21, null, null, null]"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, "1"), values1},
+              ArrayFromJSON(type, "[1, 11, 1, null, null]"));
+  // Mixed scalar and array (note CheckScalar checks all-scalar cases for us)
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, "42");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TYPED_TEST(TestChooseBinary, Basics) {
+  auto type = default_type_instance<TypeParam>();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, R"(["a", "bc", null, null, "def"])");
+  auto values2 = ArrayFromJSON(type, R"(["ghij", "klmno", null, null, "pqrstu"])");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, R"(["a", "klmno", null, null, null])"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, R"("foo")"), values1},
+              ArrayFromJSON(type, R"(["foo", "bc", "foo", null, null])"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, R"("abcd")");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChoose, Null) {
+  auto type = null();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto nulls = *MakeArrayOfNull(type, 5);
+  CheckScalar("choose", {indices1, nulls, nulls}, nulls);
+  CheckScalar("choose", {indices1, MakeNullScalar(type), nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), nulls, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), nulls, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), nulls, nulls}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar_null, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), nulls, nulls}, nulls);
+}
+
+TEST(TestChoose, Boolean) {
+  auto type = boolean();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, "[true, true, null, null, true]");
+  auto values2 = ArrayFromJSON(type, "[false, false, null, null, false]");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, "[true, false, null, null, null]"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, "false"), values1},
+              ArrayFromJSON(type, "[false, true, false, null, null]"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, "true");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChoose, DayTimeInterval) {
+  auto type = day_time_interval();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, "[[10, 1], [10, 1], null, null, [10, 1]]");
+  auto values2 = ArrayFromJSON(type, "[[2, 20], [2, 20], null, null, [2, 20]]");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, "[[10, 1], [2, 20], null, null, null]"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, "[1, 2]"), values1},
+              ArrayFromJSON(type, "[[1, 2], [10, 1], [1, 2], null, null]"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, "[10, 1]");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChoose, Decimal) {
+  for (const auto& type : {decimal128(3, 2), decimal256(3, 2)}) {
+    auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+    auto values1 = ArrayFromJSON(type, R"(["1.23", "1.24", null, null, "1.25"])");
+    auto values2 = ArrayFromJSON(type, R"(["4.56", "4.57", null, null, "4.58"])");
+    auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+    CheckScalar("choose", {indices1, values1, values2},
+                ArrayFromJSON(type, R"(["1.23", "4.57", null, null, null])"));
+    CheckScalar("choose", {indices1, ScalarFromJSON(type, R"("2.34")"), values1},
+                ArrayFromJSON(type, R"(["2.34", "1.24", "2.34", null, null])"));
+    CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+    CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+    CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+    auto scalar1 = ScalarFromJSON(type, R"("1.23")");
+    CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+                *MakeArrayFromScalar(*scalar1, 5));
+    CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+    CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+    auto scalar_null = ScalarFromJSON(type, "null");
+    CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+                *MakeArrayOfNull(type, 5));
+  }
+}
+
+TEST(TestChoose, FixedSizeBinary) {
+  auto type = fixed_size_binary(3);
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, R"(["abc", "abd", null, null, "abe"])");
+  auto values2 = ArrayFromJSON(type, R"(["def", "deg", null, null, "deh"])");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, R"(["abc", "deg", null, null, null])"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, R"("xyz")"), values1},
+              ArrayFromJSON(type, R"(["xyz", "abd", "xyz", null, null])"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, R"("abc")");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChooseKernel, DispatchBest) {
+  ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction("choose"));
+  auto Check = [&](std::vector<ValueDescr> original_values) {
+    auto values = original_values;
+    ARROW_EXPECT_OK(function->DispatchBest(&values));
+    return values;
+  };
+
+  // Since DispatchBest for this kernel pulls tricks, we can't compare it to DispatchExact
+  // as CheckDispatchBest does
+  for (auto ty :
+       {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
+    // Index always promoted to int64
+    EXPECT_EQ((std::vector<ValueDescr>{int64(), ty}), Check({ty, ty}));
+    EXPECT_EQ((std::vector<ValueDescr>{int64(), int64(), int64()}),
+              Check({ty, ty, int64()}));
+  }
+  // Other arguments promoted separately from index
+  EXPECT_EQ((std::vector<ValueDescr>{int64(), int32(), int32()}),
+            Check({int8(), int32(), uint8()}));
+}
+
+TEST(TestChooseKernel, Errors) {
+  ASSERT_RAISES(Invalid, CallFunction("choose", {}));
+  ASSERT_RAISES(Invalid, CallFunction("choose", {ArrayFromJSON(int64(), "[]")}));
+  ASSERT_RAISES(Invalid, CallFunction("choose", {ArrayFromJSON(utf8(), "[\"a\"]"),
+                                                 ArrayFromJSON(int64(), "[0]")}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IndexError, ::testing::HasSubstr("choose: index 1 out of range"),
+      CallFunction("choose",
+                   {ArrayFromJSON(int64(), "[1]"), ArrayFromJSON(int32(), "[0]")}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IndexError, ::testing::HasSubstr("choose: index -1 out of range"),
+      CallFunction("choose",
+                   {ArrayFromJSON(int64(), "[-1]"), ArrayFromJSON(int32(), "[0]")}));
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 8a6a69932c0..e9f0696c8fd 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -29,7 +29,7 @@ namespace internal {
 namespace {
 
 template <typename Type, typename offset_type = typename Type::offset_type>
-void ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   using ScalarType = typename TypeTraits<Type>::ScalarType;
   using OffsetScalarType = typename TypeTraits<Type>::OffsetScalarType;
 
@@ -51,6 +51,8 @@ void ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
           static_cast<offset_type>(arg0.value->length());
     }
   }
+
+  return Status::OK();
 }
 
 const FunctionDoc list_value_length_doc{
@@ -60,15 +62,23 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
-Result<ValueDescr> ProjectResolve(KernelContext* ctx,
-                                  const std::vector<ValueDescr>& descrs) {
-  const auto& names = OptionsWrapper<ProjectOptions>::Get(ctx).field_names;
-  const auto& nullable = OptionsWrapper<ProjectOptions>::Get(ctx).field_nullability;
-  const auto& metadata = OptionsWrapper<ProjectOptions>::Get(ctx).field_metadata;
-
-  if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
-      metadata.size() != descrs.size()) {
-    return Status::Invalid("project() was passed ", descrs.size(), " arguments but ",
+Result<ValueDescr> MakeStructResolve(KernelContext* ctx,
+                                     const std::vector<ValueDescr>& descrs) {
+  auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names;
+  auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability;
+  auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata;
+
+  if (names.size() == 0) {
+    names.resize(descrs.size());
+    nullable.resize(descrs.size(), true);
+    metadata.resize(descrs.size(), nullptr);
+    int i = 0;
+    for (auto& name : names) {
+      name = std::to_string(i++);
+    }
+  } else if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
+             metadata.size() != descrs.size()) {
+    return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ",
                            names.size(), " field names, ", nullable.size(),
                            " nullability bits, and ", metadata.size(),
                            " metadata dictionaries.");
@@ -92,23 +102,23 @@ Result<ValueDescr> ProjectResolve(KernelContext* ctx,
       }
     }
 
-    fields[i] = field(names[i], descr.type, nullable[i], metadata[i]);
+    fields[i] =
+        field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i]));
     ++i;
   }
 
   return ValueDescr{struct_(std::move(fields)), shape};
 }
 
-void ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  KERNEL_ASSIGN_OR_RAISE(auto descr, ctx, ProjectResolve(ctx, batch.GetDescriptors()));
+Status MakeStructExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors()));
 
   for (int i = 0; i < batch.num_values(); ++i) {
     const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
     if (batch[i].null_count() > 0 && !field->nullable()) {
-      ctx->SetStatus(Status::Invalid("Output field ", field, " (#", i,
-                                     ") does not allow nulls but the corresponding "
-                                     "argument was not entirely valid."));
-      return;
+      return Status::Invalid("Output field ", field, " (#", i,
+                             ") does not allow nulls but the corresponding "
+                             "argument was not entirely valid.");
     }
   }
 
@@ -120,7 +130,7 @@ void ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 
     *out =
         Datum(std::make_shared<StructScalar>(std::move(scalars), std::move(descr.type)));
-    return;
+    return Status::OK();
   }
 
   ArrayVector arrays(batch.num_values());
@@ -130,19 +140,19 @@ void ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       continue;
     }
 
-    KERNEL_ASSIGN_OR_RAISE(
-        arrays[i], ctx,
-        MakeArrayFromScalar(*batch[i].scalar(), batch.length, ctx->memory_pool()));
+    ARROW_ASSIGN_OR_RAISE(arrays[i], MakeArrayFromScalar(*batch[i].scalar(), batch.length,
+                                                         ctx->memory_pool()));
   }
 
   *out = std::make_shared<StructArray>(descr.type, batch.length, std::move(arrays));
+  return Status::OK();
 }
 
-const FunctionDoc project_doc{"Wrap Arrays into a StructArray",
-                              ("Names of the StructArray's fields are\n"
-                               "specified through ProjectOptions."),
-                              {"*args"},
-                              "ProjectOptions"};
+const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
+                                  ("Names of the StructArray's fields are\n"
+                                   "specified through MakeStructOptions."),
+                                  {"*args"},
+                                  "MakeStructOptions"};
 
 }  // namespace
 
@@ -155,15 +165,17 @@ void RegisterScalarNested(FunctionRegistry* registry) {
                                          ListValueLength<LargeListType>));
   DCHECK_OK(registry->AddFunction(std::move(list_value_length)));
 
-  auto project_function =
-      std::make_shared<ScalarFunction>("project", Arity::VarArgs(), &project_doc);
-  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{ProjectResolve},
+  static MakeStructOptions kDefaultMakeStructOptions;
+  auto make_struct_function = std::make_shared<ScalarFunction>(
+      "make_struct", Arity::VarArgs(), &make_struct_doc, &kDefaultMakeStructOptions);
+
+  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{MakeStructResolve},
                                             /*is_varargs=*/true),
-                      ProjectExec, OptionsWrapper<ProjectOptions>::Init};
+                      MakeStructExec, OptionsWrapper<MakeStructOptions>::Init};
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-  DCHECK_OK(project_function->AddKernel(std::move(kernel)));
-  DCHECK_OK(registry->AddFunction(std::move(project_function)));
+  DCHECK_OK(make_struct_function->AddKernel(std::move(kernel)));
+  DCHECK_OK(registry->AddFunction(std::move(make_struct_function)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index 42de9bcdb50..ef489955fa6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -22,6 +22,7 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/result.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
@@ -39,48 +40,55 @@ TEST(TestScalarNested, ListValueLength) {
 }
 
 struct {
+  Result<Datum> operator()(std::vector<Datum> args) {
+    return CallFunction("make_struct", args);
+  }
+
   template <typename... Options>
   Result<Datum> operator()(std::vector<Datum> args, std::vector<std::string> field_names,
                            Options... options) {
-    ProjectOptions opts{field_names, options...};
-    return CallFunction("project", args, &opts);
+    MakeStructOptions opts{field_names, options...};
+    return CallFunction("make_struct", args, &opts);
   }
-} Project;
+} MakeStruct;
 
-TEST(Project, Scalar) {
+TEST(MakeStruct, Scalar) {
   auto i32 = MakeScalar(1);
   auto f64 = MakeScalar(2.5);
   auto str = MakeScalar("yo");
 
-  ASSERT_OK_AND_ASSIGN(auto expected,
-                       StructScalar::Make({i32, f64, str}, {"i", "f", "s"}));
-  ASSERT_OK_AND_EQ(Datum(expected), Project({i32, f64, str}, {"i", "f", "s"}));
+  EXPECT_THAT(MakeStruct({i32, f64, str}, {"i", "f", "s"}),
+              ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"i", "f", "s"}))));
 
-  // Three field names but one input value
-  ASSERT_RAISES(Invalid, Project({str}, {"i", "f", "s"}));
+  // Names default to field_index
+  EXPECT_THAT(MakeStruct({i32, f64, str}),
+              ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
 
   // No field names or input values is fine
-  expected.reset(new StructScalar{{}, struct_({})});
-  ASSERT_OK_AND_EQ(Datum(expected), Project(/*args=*/{}, /*field_names=*/{}));
+  EXPECT_THAT(MakeStruct({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
+
+  // Three field names but one input value
+  EXPECT_THAT(MakeStruct({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
 }
 
-TEST(Project, Array) {
+TEST(MakeStruct, Array) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
   auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
-  ASSERT_OK_AND_ASSIGN(Datum expected, StructArray::Make({i32, str}, field_names));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  EXPECT_THAT(MakeStruct({i32, str}, {"i", "s"}),
+              ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  EXPECT_THAT(MakeStruct({i32, MakeScalar("aa")}, {"i", "s"}),
+              ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  EXPECT_THAT(MakeStruct({i32->Slice(1), str}, field_names), Raises(StatusCode::Invalid));
 }
 
-TEST(Project, NullableMetadataPassedThru) {
+TEST(MakeStruct, NullableMetadataPassedThru) {
   auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
   auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
 
@@ -90,7 +98,7 @@ TEST(Project, NullableMetadataPassedThru) {
       key_value_metadata({"a", "b"}, {"ALPHA", "BRAVO"}), nullptr};
 
   ASSERT_OK_AND_ASSIGN(auto proj,
-                       Project({i32, str}, field_names, nullability, metadata));
+                       MakeStruct({i32, str}, field_names, nullability, metadata));
 
   AssertTypeEqual(*proj.type(), StructType({
                                     field("i", int32(), /*nullable=*/true, metadata[0]),
@@ -98,11 +106,12 @@ TEST(Project, NullableMetadataPassedThru) {
                                 }));
 
   // error: projecting an array containing nulls with nullable=false
-  str = ArrayFromJSON(utf8(), R"(["aa", null, "aa"])");
-  ASSERT_RAISES(Invalid, Project({i32, str}, field_names, nullability, metadata));
+  EXPECT_THAT(MakeStruct({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
+                         field_names, nullability, metadata),
+              Raises(StatusCode::Invalid));
 }
 
-TEST(Project, ChunkedArray) {
+TEST(MakeStruct, ChunkedArray) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -122,16 +131,16 @@ TEST(Project, ChunkedArray) {
   ASSERT_OK_AND_ASSIGN(Datum expected,
                        ChunkedArray::Make({expected_0, expected_1, expected_2}));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
 }
 
-TEST(Project, ChunkedArrayDifferentChunking) {
+TEST(MakeStruct, ChunkedArrayDifferentChunking) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -159,13 +168,13 @@ TEST(Project, ChunkedArrayDifferentChunking) {
 
   ASSERT_OK_AND_ASSIGN(Datum expected, ChunkedArray::Make(expected_chunks));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index ffc1e11a7be..3e2e95e5401 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -41,32 +41,52 @@ struct SetLookupState : public KernelState {
 
   Status Init(const SetLookupOptions& options) {
     if (options.value_set.kind() == Datum::ARRAY) {
-      RETURN_NOT_OK(AddArrayValueSet(*options.value_set.array()));
+      const ArrayData& value_set = *options.value_set.array();
+      memo_index_to_value_index.reserve(value_set.length);
+      RETURN_NOT_OK(AddArrayValueSet(options, *options.value_set.array()));
     } else if (options.value_set.kind() == Datum::CHUNKED_ARRAY) {
       const ChunkedArray& value_set = *options.value_set.chunked_array();
+      memo_index_to_value_index.reserve(value_set.length());
+      int64_t offset = 0;
       for (const std::shared_ptr<Array>& chunk : value_set.chunks()) {
-        RETURN_NOT_OK(AddArrayValueSet(*chunk->data()));
+        RETURN_NOT_OK(AddArrayValueSet(options, *chunk->data(), offset));
+        offset += chunk->length();
       }
     } else {
       return Status::Invalid("value_set should be an array or chunked array");
     }
-    if (lookup_table.size() != options.value_set.length()) {
-      return Status::NotImplemented("duplicate values in value_set");
-    }
-    if (!options.skip_nulls) {
-      null_index = lookup_table.GetNull();
+    if (!options.skip_nulls && lookup_table.GetNull() >= 0) {
+      null_index = memo_index_to_value_index[lookup_table.GetNull()];
     }
     return Status::OK();
   }
 
-  Status AddArrayValueSet(const ArrayData& data) {
+  Status AddArrayValueSet(const SetLookupOptions& options, const ArrayData& data,
+                          int64_t start_index = 0) {
     using T = typename GetViewType<Type>::T;
+    int32_t index = static_cast<int32_t>(start_index);
     auto visit_valid = [&](T v) {
+      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size());
       int32_t unused_memo_index;
-      return lookup_table.GetOrInsert(v, &unused_memo_index);
+      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); };
+      auto on_not_found = [&](int32_t memo_index) {
+        DCHECK_EQ(memo_index, memo_size);
+        memo_index_to_value_index.push_back(index);
+      };
+      RETURN_NOT_OK(lookup_table.GetOrInsert(
+          v, std::move(on_found), std::move(on_not_found), &unused_memo_index));
+      ++index;
+      return Status::OK();
     };
     auto visit_null = [&]() {
-      lookup_table.GetOrInsertNull();
+      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size());
+      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); };
+      auto on_not_found = [&](int32_t memo_index) {
+        DCHECK_EQ(memo_index, memo_size);
+        memo_index_to_value_index.push_back(index);
+      };
+      lookup_table.GetOrInsertNull(std::move(on_found), std::move(on_not_found));
+      ++index;
       return Status::OK();
     };
 
@@ -75,6 +95,9 @@ struct SetLookupState : public KernelState {
 
   using MemoTable = typename HashTraits<Type>::MemoTableType;
   MemoTable lookup_table;
+  // When there are duplicates in value_set, the MemoTable indices must
+  // be mapped back to indices in the value_set.
+  std::vector<int32_t> memo_index_to_value_index;
   int32_t null_index = -1;
 };
 
@@ -154,7 +177,7 @@ struct InitStateVisitor {
   // Handle Decimal128Type, FixedSizeBinaryType
   Status Visit(const FixedSizeBinaryType& type) { return Init<FixedSizeBinaryType>(); }
 
-  Status GetResult(std::unique_ptr<KernelState>* out) {
+  Result<std::unique_ptr<KernelState>> GetResult() {
     if (!options.value_set.type()->Equals(arg_type)) {
       ARROW_ASSIGN_OR_RAISE(
           options.value_set,
@@ -162,22 +185,18 @@ struct InitStateVisitor {
     }
 
     RETURN_NOT_OK(VisitTypeInline(*arg_type, this));
-    *out = std::move(result);
-    return Status::OK();
+    return std::move(result);
   }
 };
 
-std::unique_ptr<KernelState> InitSetLookup(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> InitSetLookup(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
   if (args.options == nullptr) {
-    ctx->SetStatus(Status::Invalid(
-        "Attempted to call a set lookup function without SetLookupOptions"));
-    return nullptr;
+    return Status::Invalid(
+        "Attempted to call a set lookup function without SetLookupOptions");
   }
 
-  std::unique_ptr<KernelState> result;
-  ctx->SetStatus(InitStateVisitor{ctx, args}.GetResult(&result));
-  return result;
+  return InitStateVisitor{ctx, args}.GetResult();
 }
 
 struct IndexInVisitor {
@@ -219,7 +238,7 @@ struct IndexInVisitor {
           int32_t index = state.lookup_table.Get(v);
           if (index != -1) {
             // matching needle; output index from value_set
-            this->builder.UnsafeAppend(index);
+            this->builder.UnsafeAppend(state.memo_index_to_value_index[index]);
           } else {
             // no matching needle; output null
             this->builder.UnsafeAppendNull();
@@ -271,8 +290,8 @@ struct IndexInVisitor {
   }
 };
 
-void ExecIndexIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  KERNEL_RETURN_IF_ERROR(ctx, IndexInVisitor(ctx, *batch[0].array(), out).Execute());
+Status ExecIndexIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return IndexInVisitor(ctx, *batch[0].array(), out).Execute();
 }
 
 // ----------------------------------------------------------------------
@@ -351,8 +370,8 @@ struct IsInVisitor {
   Status Execute() { return VisitTypeInline(*data.type, this); }
 };
 
-void ExecIsIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  KERNEL_RETURN_IF_ERROR(ctx, IsInVisitor(ctx, *batch[0].array(), out).Execute());
+Status ExecIsIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return IsInVisitor(ctx, *batch[0].array(), out).Execute();
 }
 
 // Unary set lookup kernels available for the following input types
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
index 272502caa57..9b6ded0bbe7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
@@ -57,7 +57,7 @@ void CheckIsIn(const std::shared_ptr<DataType>& type, const std::string& input_j
   ASSERT_OK_AND_ASSIGN(Datum actual_datum,
                        IsIn(input, SetLookupOptions(value_set, skip_nulls)));
   std::shared_ptr<Array> actual = actual_datum.make_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual_datum);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -68,7 +68,7 @@ void CheckIsInChunked(const std::shared_ptr<ChunkedArray>& input,
   ASSERT_OK_AND_ASSIGN(Datum actual_datum,
                        IsIn(input, SetLookupOptions(value_set, skip_nulls)));
   auto actual = actual_datum.chunked_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual_datum);
   AssertChunkedEqual(*expected, *actual);
 }
 
@@ -89,7 +89,7 @@ void CheckIsInDictionary(const std::shared_ptr<DataType>& type,
   ASSERT_OK_AND_ASSIGN(Datum actual_datum,
                        IsIn(input, SetLookupOptions(value_set, skip_nulls)));
   std::shared_ptr<Array> actual = actual_datum.make_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual_datum);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -157,6 +157,13 @@ TYPED_TEST(TestIsInKernelPrimitive, IsIn) {
   CheckIsIn(type, "[null, 1, 2, 3, 2]", "[2, null, 1]",
             "[false, true, true, false, true]", /*skip_nulls=*/true);
 
+  // Duplicates in right array
+  CheckIsIn(type, "[null, 1, 2, 3, 2]", "[null, 2, 2, null, 1, 1]",
+            "[true, true, true, false, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, "[null, 1, 2, 3, 2]", "[null, 2, 2, null, 1, 1]",
+            "[false, true, true, false, true]", /*skip_nulls=*/true);
+
   // Empty Arrays
   CheckIsIn(type, "[]", "[]", "[]");
 }
@@ -170,6 +177,10 @@ TEST_F(TestIsInKernel, NullType) {
 
   CheckIsIn(type, "[null, null]", "[null]", "[false, false]", /*skip_nulls=*/true);
   CheckIsIn(type, "[null, null]", "[]", "[false, false]", /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, "[null, null, null]", "[null, null]", "[true, true, true]");
+  CheckIsIn(type, "[null, null]", "[null, null]", "[false, false]", /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, TimeTimestamp) {
@@ -179,6 +190,12 @@ TEST_F(TestIsInKernel, TimeTimestamp) {
               "[true, true, false, true, true]", /*skip_nulls=*/false);
     CheckIsIn(type, "[1, null, 5, 1, 2]", "[2, 1, null]",
               "[true, false, false, true, true]", /*skip_nulls=*/true);
+
+    // Duplicates in right array
+    CheckIsIn(type, "[1, null, 5, 1, 2]", "[2, 1, 1, null, 2]",
+              "[true, true, false, true, true]", /*skip_nulls=*/false);
+    CheckIsIn(type, "[1, null, 5, 1, 2]", "[2, 1, 1, null, 2]",
+              "[true, false, false, true, true]", /*skip_nulls=*/true);
   }
 }
 
@@ -194,6 +211,12 @@ TEST_F(TestIsInKernel, Boolean) {
             "[false, true, true, false, true]", /*skip_nulls=*/false);
   CheckIsIn(type, "[true, false, null, true, false]", "[false, null]",
             "[false, true, false, false, true]", /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, "[true, false, null, true, false]", "[null, false, false, null]",
+            "[false, true, true, false, true]", /*skip_nulls=*/false);
+  CheckIsIn(type, "[true, false, null, true, false]", "[null, false, false, null]",
+            "[false, true, false, false, true]", /*skip_nulls=*/true);
 }
 
 TYPED_TEST_SUITE(TestIsInKernelBinary, BinaryTypes);
@@ -214,6 +237,14 @@ TYPED_TEST(TestIsInKernelBinary, Binary) {
   CheckIsIn(type, R"(["aaa", "", "cc", null, ""])", R"(["aaa", "", null])",
             "[true, true, false, false, true]",
             /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, R"(["aaa", "", "cc", null, ""])",
+            R"([null, "aaa", "aaa", "", "", null])", "[true, true, false, true, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, R"(["aaa", "", "cc", null, ""])",
+            R"([null, "aaa", "aaa", "", "", null])", "[true, true, false, false, true]",
+            /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, FixedSizeBinary) {
@@ -232,6 +263,16 @@ TEST_F(TestIsInKernel, FixedSizeBinary) {
   CheckIsIn(type, R"(["aaa", "bbb", "ccc", null, "bbb"])", R"(["aaa", "bbb", null])",
             "[true, true, false, false, true]",
             /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, R"(["aaa", "bbb", "ccc", null, "bbb"])",
+            R"(["aaa", null, "aaa", "bbb", "bbb", null])",
+            "[true, true, false, true, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, R"(["aaa", "bbb", "ccc", null, "bbb"])",
+            R"(["aaa", null, "aaa", "bbb", "bbb", null])",
+            "[true, true, false, false, true]",
+            /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, Decimal) {
@@ -250,6 +291,16 @@ TEST_F(TestIsInKernel, Decimal) {
   CheckIsIn(type, R"(["12.3", "45.6", "78.9", null, "12.3"])",
             R"(["12.3", "78.9", null])", "[true, false, true, false, true]",
             /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, R"(["12.3", "45.6", "78.9", null, "12.3"])",
+            R"([null, "12.3", "12.3", "78.9", "78.9", null])",
+            "[true, false, true, true, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, R"(["12.3", "45.6", "78.9", null, "12.3"])",
+            R"([null, "12.3", "12.3", "78.9", "78.9", null])",
+            "[true, false, true, false, true]",
+            /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, DictionaryArray) {
@@ -314,6 +365,29 @@ TEST_F(TestIsInKernel, DictionaryArray) {
                         /*value_set_json=*/R"(["C", "B", "A"])",
                         /*expected_json=*/"[false, false, false, true, false]",
                         /*skip_nulls=*/true);
+
+    // With duplicates in value_set
+    CheckIsInDictionary(/*type=*/utf8(),
+                        /*index_type=*/index_ty,
+                        /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                        /*input_index_json=*/"[1, 2, null, 0]",
+                        /*value_set_json=*/R"(["A", "A", "B", "A", "B", "C"])",
+                        /*expected_json=*/"[true, true, false, true]",
+                        /*skip_nulls=*/false);
+    CheckIsInDictionary(/*type=*/utf8(),
+                        /*index_type=*/index_ty,
+                        /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                        /*input_index_json=*/"[1, 3, null, 0, 1]",
+                        /*value_set_json=*/R"(["C", "C", "B", "A", null, null, "B"])",
+                        /*expected_json=*/"[true, false, true, true, true]",
+                        /*skip_nulls=*/false);
+    CheckIsInDictionary(/*type=*/utf8(),
+                        /*index_type=*/index_ty,
+                        /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                        /*input_index_json=*/"[1, 3, null, 0, 1]",
+                        /*value_set_json=*/R"(["C", "C", "B", "A", null, null, "B"])",
+                        /*expected_json=*/"[true, false, false, true, true]",
+                        /*skip_nulls=*/true);
   }
 }
 
@@ -335,6 +409,16 @@ TEST_F(TestIsInKernel, ChunkedArrayInvoke) {
   expected = ChunkedArrayFromJSON(
       boolean(), {"[false, true, true, false, false]", "[true, false, false, false]"});
   CheckIsInChunked(input, value_set, expected, /*skip_nulls=*/true);
+
+  // Duplicates in value_set
+  value_set =
+      ChunkedArrayFromJSON(utf8(), {R"(["", null, "", "def"])", R"(["def", null])"});
+  expected = ChunkedArrayFromJSON(
+      boolean(), {"[false, true, true, false, false]", "[true, true, false, false]"});
+  CheckIsInChunked(input, value_set, expected, /*skip_nulls=*/false);
+  expected = ChunkedArrayFromJSON(
+      boolean(), {"[false, true, true, false, false]", "[true, false, false, false]"});
+  CheckIsInChunked(input, value_set, expected, /*skip_nulls=*/true);
 }
 
 // ----------------------------------------------------------------------
@@ -352,7 +436,7 @@ class TestIndexInKernel : public ::testing::Test {
     SetLookupOptions options(value_set, skip_nulls);
     ASSERT_OK_AND_ASSIGN(Datum actual_datum, IndexIn(input, options));
     std::shared_ptr<Array> actual = actual_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual_datum);
     AssertArraysEqual(*expected, *actual, /*verbose=*/true);
   }
 
@@ -363,7 +447,7 @@ class TestIndexInKernel : public ::testing::Test {
     ASSERT_OK_AND_ASSIGN(Datum actual,
                          IndexIn(input, SetLookupOptions(value_set, skip_nulls)));
     ASSERT_EQ(Datum::CHUNKED_ARRAY, actual.kind());
-    ASSERT_OK(actual.chunked_array()->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*expected, *actual.chunked_array());
   }
 
@@ -385,7 +469,7 @@ class TestIndexInKernel : public ::testing::Test {
     SetLookupOptions options(value_set, skip_nulls);
     ASSERT_OK_AND_ASSIGN(Datum actual_datum, IndexIn(input, options));
     std::shared_ptr<Array> actual = actual_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual_datum);
     AssertArraysEqual(*expected, *actual, /*verbose=*/true);
   }
 };
@@ -439,6 +523,18 @@ TYPED_TEST(TestIndexInKernelPrimitive, IndexIn) {
                      /* value_set= */ "[null]",
                      /* expected= */ "[0, 0, 0, 0]");
 
+  // Duplicates in value_set
+  this->CheckIndexIn(type,
+                     /* input= */ "[2, 1, 2, 1, 2, 3]",
+                     /* value_set= */ "[2, 2, 1, 1, 1, 3, 3]",
+                     /* expected= */ "[0, 2, 0, 2, 0, 5]");
+
+  // Duplicates and nulls in value_set
+  this->CheckIndexIn(type,
+                     /* input= */ "[2, 1, 2, 1, 2, 3]",
+                     /* value_set= */ "[2, 2, null, null, 1, 1, 1, 3, 3]",
+                     /* expected= */ "[0, 4, 0, 4, 0, 7]");
+
   // No Match
   this->CheckIndexIn(type,
                      /* input= */ "[2, null, 7, 3, 8]",
@@ -463,6 +559,17 @@ TYPED_TEST(TestIndexInKernelPrimitive, SkipNulls) {
                      /*value_set=*/"[1, 3]",
                      /*expected=*/"[null, 0, null, 1, null]",
                      /*skip_nulls=*/true);
+  // Same with duplicates in value_set
+  this->CheckIndexIn(type,
+                     /*input=*/"[0, 1, 2, 3, null]",
+                     /*value_set=*/"[1, 1, 3, 3]",
+                     /*expected=*/"[null, 0, null, 2, null]",
+                     /*skip_nulls=*/false);
+  this->CheckIndexIn(type,
+                     /*input=*/"[0, 1, 2, 3, null]",
+                     /*value_set=*/"[1, 1, 3, 3]",
+                     /*expected=*/"[null, 0, null, 2, null]",
+                     /*skip_nulls=*/true);
 
   // Nulls in value_set
   this->CheckIndexIn(type,
@@ -472,9 +579,15 @@ TYPED_TEST(TestIndexInKernelPrimitive, SkipNulls) {
                      /*skip_nulls=*/false);
   this->CheckIndexIn(type,
                      /*input=*/"[0, 1, 2, 3, null]",
-                     /*value_set=*/"[1, null, 3]",
-                     /*expected=*/"[null, 0, null, 2, null]",
+                     /*value_set=*/"[1, 1, null, null, 3, 3]",
+                     /*expected=*/"[null, 0, null, 4, null]",
                      /*skip_nulls=*/true);
+  // Same with duplicates in value_set
+  this->CheckIndexIn(type,
+                     /*input=*/"[0, 1, 2, 3, null]",
+                     /*value_set=*/"[1, 1, null, null, 3, 3]",
+                     /*expected=*/"[null, 0, null, 4, 2]",
+                     /*skip_nulls=*/false);
 }
 
 TEST_F(TestIndexInKernel, NullType) {
@@ -493,6 +606,12 @@ TEST_F(TestIndexInKernel, TimeTimestamp) {
                /* value_set= */ "[2, 1, null]",
                /* expected= */ "[1, 2, null, 1, 0]");
 
+  // Duplicates in value_set
+  CheckIndexIn(time32(TimeUnit::SECOND),
+               /* input= */ "[1, null, 5, 1, 2]",
+               /* value_set= */ "[2, 2, 1, 1, null, null]",
+               /* expected= */ "[2, 4, null, 2, 0]");
+
   // Needles array has no nulls
   CheckIndexIn(time32(TimeUnit::SECOND),
                /* input= */ "[2, null, 5, 1]",
@@ -531,6 +650,10 @@ TEST_F(TestIndexInKernel, Boolean) {
   CheckIndexIn(boolean(), "[false, null, false, true]", "[false, true, null]",
                "[0, 2, 0, 1]");
 
+  // Duplicates in value_set
+  CheckIndexIn(boolean(), "[false, null, false, true]",
+               "[false, false, true, true, null, null]", "[0, 4, 0, 2]");
+
   // No Nulls
   CheckIndexIn(boolean(), "[true, true, false, true]", "[false, true]", "[1, 1, 0, 1]");
 
@@ -562,6 +685,10 @@ TYPED_TEST(TestIndexInKernelBinary, Binary) {
   this->CheckIndexIn(type, R"(["foo", null, "bar", "foo"])", R"(["foo", null, "bar"])",
                      R"([0, 1, 2, 0])");
 
+  // Duplicates in value_set
+  this->CheckIndexIn(type, R"(["foo", null, "bar", "foo"])",
+                     R"(["foo", "foo", null, null, "bar", "bar"])", R"([0, 2, 4, 0])");
+
   // No match
   this->CheckIndexIn(type,
                      /* input= */ R"(["foo", null, "bar", "foo"])",
@@ -653,6 +780,17 @@ TEST_F(TestIndexInKernel, FixedSizeBinary) {
                /*expected=*/R"([1, null, null, 0, 2, 0])",
                /*skip_nulls=*/true);
 
+  // Duplicates in value_set
+  CheckIndexIn(fixed_size_binary(3),
+               /*input=*/R"(["bbb", null, "ddd", "aaa", "ccc", "aaa"])",
+               /*value_set=*/R"(["aaa", "aaa", null, null, "bbb", "bbb", "ccc"])",
+               /*expected=*/R"([4, 2, null, 0, 6, 0])");
+  CheckIndexIn(fixed_size_binary(3),
+               /*input=*/R"(["bbb", null, "ddd", "aaa", "ccc", "aaa"])",
+               /*value_set=*/R"(["aaa", "aaa", null, null, "bbb", "bbb", "ccc"])",
+               /*expected=*/R"([4, null, null, 0, 6, 0])",
+               /*skip_nulls=*/true);
+
   // Empty input array
   CheckIndexIn(fixed_size_binary(5), R"([])", R"(["bbbbb", null, "aaaaa", "ccccc"])",
                R"([])");
@@ -689,6 +827,18 @@ TEST_F(TestIndexInKernel, Decimal) {
                /*value_set=*/R"(["11", "12"])",
                /*expected=*/R"([1, null, 0, 1, null])",
                /*skip_nulls=*/true);
+
+  // Duplicates in value_set
+  CheckIndexIn(type,
+               /*input=*/R"(["12", null, "11", "12", "13"])",
+               /*value_set=*/R"([null, null, "11", "11", "12", "12"])",
+               /*expected=*/R"([4, 0, 2, 4, null])",
+               /*skip_nulls=*/false);
+  CheckIndexIn(type,
+               /*input=*/R"(["12", null, "11", "12", "13"])",
+               /*value_set=*/R"([null, null, "11", "11", "12", "12"])",
+               /*expected=*/R"([4, null, 2, 4, null])",
+               /*skip_nulls=*/true);
 }
 
 TEST_F(TestIndexInKernel, DictionaryArray) {
@@ -753,6 +903,29 @@ TEST_F(TestIndexInKernel, DictionaryArray) {
                            /*value_set_json=*/R"(["C", "B", "A"])",
                            /*expected_json=*/"[null, null, null, 2, null]",
                            /*skip_nulls=*/true);
+
+    // With duplicates in value_set
+    CheckIndexInDictionary(/*type=*/utf8(),
+                           /*index_type=*/index_ty,
+                           /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                           /*input_index_json=*/"[1, 2, null, 0]",
+                           /*value_set_json=*/R"(["A", "A", "B", "B", "C", "C"])",
+                           /*expected_json=*/"[2, 4, null, 0]",
+                           /*skip_nulls=*/false);
+    CheckIndexInDictionary(/*type=*/utf8(),
+                           /*index_type=*/index_ty,
+                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
+                           /*input_index_json=*/"[1, 3, null, 0, 1]",
+                           /*value_set_json=*/R"(["C", "C", "B", "B", "A", "A", null])",
+                           /*expected_json=*/"[6, null, 6, 4, 6]",
+                           /*skip_nulls=*/false);
+    CheckIndexInDictionary(/*type=*/utf8(),
+                           /*index_type=*/index_ty,
+                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
+                           /*input_index_json=*/"[1, 3, null, 0, 1]",
+                           /*value_set_json=*/R"(["C", "C", "B", "B", "A", "A", null])",
+                           /*expected_json=*/"[null, null, null, 4, null]",
+                           /*skip_nulls=*/true);
   }
 }
 
@@ -773,6 +946,14 @@ TEST_F(TestIndexInKernel, ChunkedArrayInvoke) {
   CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/false);
   expected = ChunkedArrayFromJSON(int32(), {"[3, 1, 0, 3, null]", "[1, null, 3, null]"});
   CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/true);
+
+  // Duplicates in value_set
+  value_set = ChunkedArrayFromJSON(
+      utf8(), {R"(["ghi", "ghi", "def"])", R"(["def", null, null, "abc"])"});
+  expected = ChunkedArrayFromJSON(int32(), {"[6, 2, 0, 6, null]", "[2, 4, 6, null]"});
+  CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/false);
+  expected = ChunkedArrayFromJSON(int32(), {"[6, 2, 0, 6, null]", "[2, null, 6, null]"});
+  CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/true);
 }
 
 TEST(TestSetLookup, DispatchBest) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 9ec1fe005d4..8d815274479 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <cctype>
+#include <iterator>
 #include <string>
 
 #ifdef ARROW_WITH_UTF8PROC
@@ -30,17 +31,41 @@
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_nested.h"
 #include "arrow/buffer_builder.h"
+
+#include "arrow/builder.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
+
+using internal::checked_cast;
+
 namespace compute {
 namespace internal {
 
 namespace {
 
+#ifdef ARROW_WITH_RE2
+util::string_view ToStringView(re2::StringPiece piece) {
+  return {piece.data(), piece.length()};
+}
+
+re2::StringPiece ToStringPiece(util::string_view view) {
+  return {view.data(), view.length()};
+}
+
+Status RegexStatus(const RE2& regex) {
+  if (!regex.ok()) {
+    return Status::Invalid("Invalid regular expression: ", regex.error());
+  }
+  return Status::OK();
+}
+#endif
+
 // Code units in the range [a-z] can only be an encoding of an ascii
 // character/codepoint, not the 2nd, 3rd or 4th code unit (byte) of an different
 // codepoint. This guaranteed by non-overlap design of the unicode standard. (see
@@ -56,6 +81,51 @@ static inline uint8_t ascii_toupper(uint8_t utf8_code_unit) {
                                                               : utf8_code_unit;
 }
 
+static inline bool IsLowerCaseCharacterAscii(uint8_t ascii_character) {
+  return (ascii_character >= 'a') && (ascii_character <= 'z');
+}
+
+static inline bool IsUpperCaseCharacterAscii(uint8_t ascii_character) {
+  return (ascii_character >= 'A') && (ascii_character <= 'Z');
+}
+
+static inline bool IsCasedCharacterAscii(uint8_t ascii_character) {
+  return IsLowerCaseCharacterAscii(ascii_character) ||
+         IsUpperCaseCharacterAscii(ascii_character);
+}
+
+static inline bool IsAlphaCharacterAscii(uint8_t ascii_character) {
+  return IsCasedCharacterAscii(ascii_character);  // same
+}
+
+static inline bool IsAlphaNumericCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= '0') && (ascii_character <= '9')) ||
+         ((ascii_character >= 'a') && (ascii_character <= 'z')) ||
+         ((ascii_character >= 'A') && (ascii_character <= 'Z'));
+}
+
+static inline bool IsDecimalCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= '0') && (ascii_character <= '9'));
+}
+
+static inline bool IsSpaceCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= 0x09) && (ascii_character <= 0x0D)) ||
+         (ascii_character == ' ');
+}
+
+static inline bool IsPrintableCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= ' ') && (ascii_character <= '~'));
+}
+
+static inline uint8_t ascii_swapcase(uint8_t utf8_code_unit) {
+  if (IsLowerCaseCharacterAscii(utf8_code_unit)) {
+    utf8_code_unit -= 32;
+  } else if (IsUpperCaseCharacterAscii(utf8_code_unit)) {
+    utf8_code_unit += 32;
+  }
+  return utf8_code_unit;
+}
+
 template <typename T>
 static inline bool IsAsciiCharacter(T character) {
   return character < 128;
@@ -63,24 +133,17 @@ static inline bool IsAsciiCharacter(T character) {
 
 struct BinaryLength {
   template <typename OutValue, typename Arg0Value = util::string_view>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     return static_cast<OutValue>(val.size());
   }
 };
 
 struct Utf8Length {
   template <typename OutValue, typename Arg0Value = util::string_view>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     auto str = reinterpret_cast<const uint8_t*>(val.data());
     auto strlen = val.size();
-
-    OutValue length = 0;
-    while (strlen > 0) {
-      length += ((*str & 0xc0) != 0x80);
-      ++str;
-      --strlen;
-    }
-    return length;
+    return static_cast<OutValue>(util::UTF8Length(str, str + strlen));
   }
 };
 
@@ -91,163 +154,418 @@ constexpr uint32_t kMaxCodepointLookup =
     0xffff;  // up to this codepoint is in a lookup table
 std::vector<uint32_t> lut_upper_codepoint;
 std::vector<uint32_t> lut_lower_codepoint;
+std::vector<uint32_t> lut_swapcase_codepoint;
 std::vector<utf8proc_category_t> lut_category;
 std::once_flag flag_case_luts;
 
+// IsAlpha/Digit etc
+
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask) {
+  utf8proc_category_t general_category = codepoint <= kMaxCodepointLookup
+                                             ? lut_category[codepoint]
+                                             : utf8proc_category(codepoint);
+  uint32_t general_category_bit = 1 << general_category;
+  // for e.g. undefined (but valid) codepoints, general_category == 0 ==
+  // UTF8PROC_CATEGORY_CN
+  return (general_category != UTF8PROC_CATEGORY_CN) &&
+         ((general_category_bit & mask) != 0);
+}
+
+template <typename... Categories>
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask,
+                                                utf8proc_category_t category,
+                                                Categories... categories) {
+  return HasAnyUnicodeGeneralCategory(codepoint, mask | (1 << category), categories...);
+}
+
+template <typename... Categories>
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint,
+                                                utf8proc_category_t category,
+                                                Categories... categories) {
+  return HasAnyUnicodeGeneralCategory(codepoint, static_cast<uint32_t>(1u << category),
+                                      categories...);
+}
+
+static inline bool IsCasedCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
+                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT) ||
+         ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) ||
+          (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint));
+}
+
+static inline bool IsLowerCaseCharacterUnicode(uint32_t codepoint) {
+  // although this trick seems to work for upper case, this is not enough for lower case
+  // testing, see https://github.com/JuliaStrings/utf8proc/issues/195 . But currently the
+  // best we can do
+  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LL) ||
+          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) &&
+           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) == codepoint))) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
+}
+
+static inline bool IsUpperCaseCharacterUnicode(uint32_t codepoint) {
+  // this seems to be a good workaround for utf8proc not having case information
+  // https://github.com/JuliaStrings/utf8proc/issues/195
+  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU) ||
+          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) == codepoint) &&
+           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint))) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
+}
+
+static inline bool IsAlphaNumericCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(
+      codepoint, UTF8PROC_CATEGORY_LU, UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
+      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO, UTF8PROC_CATEGORY_ND,
+      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
+}
+
+static inline bool IsAlphaCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
+                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
+                                      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO);
+}
+
+static inline bool IsDecimalCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
+}
+
+static inline bool IsDigitCharacterUnicode(uint32_t codepoint) {
+  // Python defines this as Numeric_Type=Digit or Numeric_Type=Decimal.
+  // utf8proc has no support for this, this is the best we can do:
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
+}
+
+static inline bool IsNumericCharacterUnicode(uint32_t codepoint) {
+  // Formally this is not correct, but utf8proc does not allow us to query for Numerical
+  // properties, e.g. Numeric_Value and Numeric_Type
+  // Python defines Numeric as Numeric_Type=Digit, Numeric_Type=Decimal or
+  // Numeric_Type=Numeric.
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND,
+                                      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
+}
+
+static inline bool IsSpaceCharacterUnicode(uint32_t codepoint) {
+  auto property = utf8proc_get_property(codepoint);
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ZS) ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_WS ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_B ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_S;
+}
+
+static inline bool IsPrintableCharacterUnicode(uint32_t codepoint) {
+  uint32_t general_category = utf8proc_category(codepoint);
+  return (general_category != UTF8PROC_CATEGORY_CN) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_CC,
+                                       UTF8PROC_CATEGORY_CF, UTF8PROC_CATEGORY_CS,
+                                       UTF8PROC_CATEGORY_CO, UTF8PROC_CATEGORY_ZS,
+                                       UTF8PROC_CATEGORY_ZL, UTF8PROC_CATEGORY_ZP);
+}
+
 void EnsureLookupTablesFilled() {
   std::call_once(flag_case_luts, []() {
     lut_upper_codepoint.reserve(kMaxCodepointLookup + 1);
     lut_lower_codepoint.reserve(kMaxCodepointLookup + 1);
+    lut_swapcase_codepoint.reserve(kMaxCodepointLookup + 1);
     for (uint32_t i = 0; i <= kMaxCodepointLookup; i++) {
       lut_upper_codepoint.push_back(utf8proc_toupper(i));
       lut_lower_codepoint.push_back(utf8proc_tolower(i));
       lut_category.push_back(utf8proc_category(i));
+
+      if (IsLowerCaseCharacterUnicode(i)) {
+        lut_swapcase_codepoint.push_back(utf8proc_toupper(i));
+      } else if (IsUpperCaseCharacterUnicode(i)) {
+        lut_swapcase_codepoint.push_back(utf8proc_tolower(i));
+      } else {
+        lut_swapcase_codepoint.push_back(i);
+      }
     }
   });
 }
 
+#else
+
+void EnsureLookupTablesFilled() {}
+
 #endif  // ARROW_WITH_UTF8PROC
 
-/// Transform string -> string with a reasonable guess on the maximum number of codepoints
-template <typename Type, typename Derived>
-struct StringTransform {
+constexpr int64_t kTransformError = -1;
+
+struct StringTransformBase {
+  virtual Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+
+  // Return the maximum total size of the output in codeunits (i.e. bytes)
+  // given input characteristics.
+  virtual int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
+    return input_ncodeunits;
+  }
+
+  virtual Status InvalidStatus() {
+    return Status::Invalid("Invalid UTF8 sequence in input");
+  }
+
+  // Derived classes should also define this method:
+  //   int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+  //                     uint8_t* output);
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExecBase {
   using offset_type = typename Type::offset_type;
   using ArrayType = typename TypeTraits<Type>::ArrayType;
 
-  static int64_t MaxCodeunits(offset_type input_ncodeunits) { return input_ncodeunits; }
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    Derived().Execute(ctx, batch, out);
-  }
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Execute(KernelContext* ctx, StringTransform* transform,
+                        const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
-      const ArrayData& input = *batch[0].array();
-      ArrayType input_boxed(batch[0].array());
-      ArrayData* output = out->mutable_array();
+      return ExecArray(ctx, transform, batch[0].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
+    return ExecScalar(ctx, transform, batch[0].scalar(), out);
+  }
 
-      offset_type input_ncodeunits = input_boxed.total_values_length();
-      offset_type input_nstrings = static_cast<offset_type>(input.length);
+  static Status ExecArray(KernelContext* ctx, StringTransform* transform,
+                          const std::shared_ptr<ArrayData>& data, Datum* out) {
+    ArrayType input(data);
+    ArrayData* output = out->mutable_array();
 
-      int64_t output_ncodeunits_max = Derived::MaxCodeunits(input_ncodeunits);
-      if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
-        ctx->SetStatus(Status::CapacityError(
-            "Result might not fit in a 32bit utf8 array, convert to large_utf8"));
-        return;
-      }
+    const int64_t input_ncodeunits = input.total_values_length();
+    const int64_t input_nstrings = input.length();
 
-      KERNEL_ASSIGN_OR_RAISE(auto values_buffer, ctx,
-                             ctx->Allocate(output_ncodeunits_max));
-      output->buffers[2] = values_buffer;
+    const int64_t output_ncodeunits_max =
+        transform->MaxCodeunits(input_nstrings, input_ncodeunits);
+    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
+      return Status::CapacityError(
+          "Result might not fit in a 32bit utf8 array, convert to large_utf8");
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto values_buffer, ctx->Allocate(output_ncodeunits_max));
+    output->buffers[2] = values_buffer;
 
-      // String offsets are preallocated
-      offset_type* output_string_offsets = output->GetMutableValues<offset_type>(1);
-      uint8_t* output_str = output->buffers[2]->mutable_data();
-      offset_type output_ncodeunits = 0;
+    // String offsets are preallocated
+    offset_type* output_string_offsets = output->GetMutableValues<offset_type>(1);
+    uint8_t* output_str = output->buffers[2]->mutable_data();
+    offset_type output_ncodeunits = 0;
 
-      output_string_offsets[0] = 0;
-      for (int64_t i = 0; i < input_nstrings; i++) {
+    output_string_offsets[0] = 0;
+    for (int64_t i = 0; i < input_nstrings; i++) {
+      if (!input.IsNull(i)) {
         offset_type input_string_ncodeunits;
-        const uint8_t* input_string = input_boxed.GetValue(i, &input_string_ncodeunits);
-        offset_type encoded_nbytes = 0;
-        if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
-                input_string, input_string_ncodeunits, output_str + output_ncodeunits,
-                &encoded_nbytes))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
-          return;
+        const uint8_t* input_string = input.GetValue(i, &input_string_ncodeunits);
+        auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
+            input_string, input_string_ncodeunits, output_str + output_ncodeunits));
+        if (encoded_nbytes < 0) {
+          return transform->InvalidStatus();
         }
         output_ncodeunits += encoded_nbytes;
-        output_string_offsets[i + 1] = output_ncodeunits;
       }
+      output_string_offsets[i + 1] = output_ncodeunits;
+    }
+    DCHECK_LE(output_ncodeunits, output_ncodeunits_max);
 
-      // Trim the codepoint buffer, since we allocated too much
-      KERNEL_RETURN_IF_ERROR(
-          ctx, values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true));
-    } else {
-      const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
-      auto result = checked_pointer_cast<BaseBinaryScalar>(MakeNullScalar(out->type()));
-      if (input.is_valid) {
-        result->is_valid = true;
-        offset_type data_nbytes = static_cast<offset_type>(input.value->size());
+    // Trim the codepoint buffer, since we allocated too much
+    return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true);
+  }
 
-        int64_t output_ncodeunits_max = Derived::MaxCodeunits(data_nbytes);
-        if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
-          ctx->SetStatus(Status::CapacityError(
-              "Result might not fit in a 32bit utf8 array, convert to large_utf8"));
-          return;
-        }
-        KERNEL_ASSIGN_OR_RAISE(auto value_buffer, ctx,
-                               ctx->Allocate(output_ncodeunits_max));
-        result->value = value_buffer;
-        offset_type encoded_nbytes = 0;
-        if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
-                input.value->data(), data_nbytes, value_buffer->mutable_data(),
-                &encoded_nbytes))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
-          return;
-        }
-        KERNEL_RETURN_IF_ERROR(
-            ctx, value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true));
-      }
-      out->value = result;
+  static Status ExecScalar(KernelContext* ctx, StringTransform* transform,
+                           const std::shared_ptr<Scalar>& scalar, Datum* out) {
+    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar);
+    if (!input.is_valid) {
+      return Status::OK();
     }
+    auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    result->is_valid = true;
+    const int64_t data_nbytes = static_cast<int64_t>(input.value->size());
+
+    const int64_t output_ncodeunits_max = transform->MaxCodeunits(1, data_nbytes);
+    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
+      return Status::CapacityError(
+          "Result might not fit in a 32bit utf8 array, convert to large_utf8");
+    }
+    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max));
+    result->value = value_buffer;
+    auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
+        input.value->data(), data_nbytes, value_buffer->mutable_data()));
+    if (encoded_nbytes < 0) {
+      return transform->InvalidStatus();
+    }
+    DCHECK_LE(encoded_nbytes, output_ncodeunits_max);
+    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
+  }
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExec : public StringTransformExecBase<Type, StringTransform> {
+  using StringTransformExecBase<Type, StringTransform>::Execute;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    StringTransform transform;
+    RETURN_NOT_OK(transform.PreExec(ctx, batch, out));
+    return Execute(ctx, &transform, batch, out);
+  }
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExecWithState
+    : public StringTransformExecBase<Type, StringTransform> {
+  using State = typename StringTransform::State;
+  using StringTransformExecBase<Type, StringTransform>::Execute;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    StringTransform transform(State::Get(ctx));
+    RETURN_NOT_OK(transform.PreExec(ctx, batch, out));
+    return Execute(ctx, &transform, batch, out);
   }
 };
 
 #ifdef ARROW_WITH_UTF8PROC
 
-// transforms per codepoint
-template <typename Type, typename Derived>
-struct StringTransformCodepoint : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
+template <typename CodepointTransform>
+struct StringTransformCodepoint : public StringTransformBase {
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    return CodepointTransform::MaxCodeunits(ninputs, input_ncodeunits);
+  }
 
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     uint8_t* output_start = output;
     if (ARROW_PREDICT_FALSE(
             !arrow::util::UTF8Transform(input, input + input_string_ncodeunits, &output,
-                                        Derived::TransformCodepoint))) {
-      return false;
+                                        CodepointTransform::TransformCodepoint))) {
+      return kTransformError;
     }
-    *output_written = static_cast<offset_type>(output - output_start);
-    return true;
+    return output - output_start;
   }
-  static int64_t MaxCodeunits(offset_type input_ncodeunits) {
-    // Section 5.18 of the Unicode spec claim that the number of codepoints for case
+};
+
+// struct CaseMappingMixin {
+struct CaseMappingTransform {
+  static int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
+    // Section 5.18 of the Unicode spec claims that the number of codepoints for case
     // mapping can grow by a factor of 3. This means grow by a factor of 3 in bytes
     // However, since we don't support all casings (SpecialCasing.txt) the growth
-    // in bytes iss actually only at max 3/2 (as covered by the unittest).
+    // in bytes is actually only at max 3/2 (as covered by the unittest).
     // Note that rounding down the 3/2 is ok, since only codepoints encoded by
     // two code units (even) can grow to 3 code units.
     return static_cast<int64_t>(input_ncodeunits) * 3 / 2;
   }
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();
-    Base::Execute(ctx, batch, out);
-  }
 };
 
-template <typename Type>
-struct UTF8Upper : StringTransformCodepoint<Type, UTF8Upper<Type>> {
-  inline static uint32_t TransformCodepoint(uint32_t codepoint) {
+struct UTF8UpperTransform : public CaseMappingTransform {
+  static uint32_t TransformCodepoint(uint32_t codepoint) {
     return codepoint <= kMaxCodepointLookup ? lut_upper_codepoint[codepoint]
                                             : utf8proc_toupper(codepoint);
   }
 };
 
 template <typename Type>
-struct UTF8Lower : StringTransformCodepoint<Type, UTF8Lower<Type>> {
-  inline static uint32_t TransformCodepoint(uint32_t codepoint) {
+using UTF8Upper = StringTransformExec<Type, StringTransformCodepoint<UTF8UpperTransform>>;
+
+struct UTF8LowerTransform : public CaseMappingTransform {
+  static uint32_t TransformCodepoint(uint32_t codepoint) {
     return codepoint <= kMaxCodepointLookup ? lut_lower_codepoint[codepoint]
                                             : utf8proc_tolower(codepoint);
   }
 };
 
-#else
+template <typename Type>
+using UTF8Lower = StringTransformExec<Type, StringTransformCodepoint<UTF8LowerTransform>>;
 
-void EnsureLookupTablesFilled() {}
+struct UTF8SwapCaseTransform : public CaseMappingTransform {
+  static uint32_t TransformCodepoint(uint32_t codepoint) {
+    if (codepoint <= kMaxCodepointLookup) {
+      return lut_swapcase_codepoint[codepoint];
+    } else {
+      if (IsLowerCaseCharacterUnicode(codepoint)) {
+        return utf8proc_toupper(codepoint);
+      } else if (IsUpperCaseCharacterUnicode(codepoint)) {
+        return utf8proc_tolower(codepoint);
+      }
+    }
+
+    return codepoint;
+  }
+};
+
+template <typename Type>
+using UTF8SwapCase =
+    StringTransformExec<Type, StringTransformCodepoint<UTF8SwapCaseTransform>>;
+
+struct Utf8CapitalizeTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    uint8_t* output_start = output;
+    if (input_string_ncodeunits > 0) {
+      // Get number of code units in first code point
+      uint32_t codepoint = 0;
+      const uint8_t* i = input;
+      if (ARROW_PREDICT_FALSE(!util::UTF8Decode(&i, &codepoint))) {
+        return kTransformError;
+      }
+      int64_t codepoint_ncodeunits =
+          std::min(static_cast<int64_t>(i - input), input_string_ncodeunits);
+      if (ARROW_PREDICT_FALSE(
+              !util::UTF8Transform(input, input + codepoint_ncodeunits, &output,
+                                   UTF8UpperTransform::TransformCodepoint))) {
+        return kTransformError;
+      }
+      if (ARROW_PREDICT_FALSE(!util::UTF8Transform(
+              input + codepoint_ncodeunits, input + input_string_ncodeunits, &output,
+              UTF8LowerTransform::TransformCodepoint))) {
+        return kTransformError;
+      }
+    }
+    return output - output_start;
+  }
+};
+
+template <typename Type>
+using Utf8Capitalize = StringTransformExec<Type, Utf8CapitalizeTransform>;
 
 #endif  // ARROW_WITH_UTF8PROC
 
+struct AsciiReverseTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    uint8_t utf8_char_found = 0;
+    for (int64_t i = 0; i < input_string_ncodeunits; i++) {
+      // if a utf8 char is found, report to utf8_char_found
+      utf8_char_found |= input[i] & 0x80;
+      output[input_string_ncodeunits - i - 1] = input[i];
+    }
+    return utf8_char_found ? kTransformError : input_string_ncodeunits;
+  }
+
+  Status InvalidStatus() override {
+    return Status::Invalid("Non-ASCII sequence in input");
+  }
+};
+
+template <typename Type>
+using AsciiReverse = StringTransformExec<Type, AsciiReverseTransform>;
+
+struct Utf8ReverseTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    int64_t i = 0;
+    while (i < input_string_ncodeunits) {
+      int64_t char_end = std::min(i + util::ValidUtf8CodepointByteSize(input + i),
+                                  input_string_ncodeunits);
+      std::copy(input + i, input + char_end, output + input_string_ncodeunits - char_end);
+      i = char_end;
+    }
+    return input_string_ncodeunits;
+  }
+};
+
+template <typename Type>
+using Utf8Reverse = StringTransformExec<Type, Utf8ReverseTransform>;
+
 using TransformFunc = std::function<void(const uint8_t*, int64_t, uint8_t*)>;
 
 // Transform a buffer of offsets to one which begins with 0 and has same
@@ -269,8 +587,8 @@ Status GetShiftedOffsets(KernelContext* ctx, const Buffer& input_buffer, int64_t
 // Apply `transform` to input character data- this function cannot change the
 // length
 template <typename Type>
-void StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
-                         TransformFunc transform, Datum* out) {
+Status StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
+                           TransformFunc transform, Datum* out) {
   using ArrayType = typename TypeTraits<Type>::ArrayType;
   using offset_type = typename Type::offset_type;
 
@@ -286,14 +604,13 @@ void StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
     } else {
       DCHECK(input.buffers[1]);
       // We must allocate new space for the offsets and shift the existing offsets
-      KERNEL_RETURN_IF_ERROR(
-          ctx, GetShiftedOffsets<offset_type>(ctx, *input.buffers[1], input.offset,
-                                              input.length, &out_arr->buffers[1]));
+      RETURN_NOT_OK(GetShiftedOffsets<offset_type>(ctx, *input.buffers[1], input.offset,
+                                                   input.length, &out_arr->buffers[1]));
     }
 
     // Allocate space for output data
     int64_t data_nbytes = input_boxed.total_values_length();
-    KERNEL_RETURN_IF_ERROR(ctx, ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
+    RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
     if (input.length > 0) {
       transform(input.buffers[2]->data() + input_boxed.value_offset(0), data_nbytes,
                 out_arr->buffers[2]->mutable_data());
@@ -304,11 +621,13 @@ void StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
     if (input.is_valid) {
       result->is_valid = true;
       int64_t data_nbytes = input.value->size();
-      KERNEL_RETURN_IF_ERROR(ctx, ctx->Allocate(data_nbytes).Value(&result->value));
+      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&result->value));
       transform(input.value->data(), data_nbytes, result->value->mutable_data());
     }
     out->value = result;
   }
+
+  return Status::OK();
 }
 
 void TransformAsciiUpper(const uint8_t* input, int64_t length, uint8_t* output) {
@@ -317,8 +636,8 @@ void TransformAsciiUpper(const uint8_t* input, int64_t length, uint8_t* output)
 
 template <typename Type>
 struct AsciiUpper {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    StringDataTransform<Type>(ctx, batch, TransformAsciiUpper, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiUpper, out);
   }
 };
 
@@ -328,15 +647,40 @@ void TransformAsciiLower(const uint8_t* input, int64_t length, uint8_t* output)
 
 template <typename Type>
 struct AsciiLower {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    StringDataTransform<Type>(ctx, batch, TransformAsciiLower, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiLower, out);
   }
 };
 
-// ----------------------------------------------------------------------
-// exact pattern detection
+void TransformAsciiSwapCase(const uint8_t* input, int64_t length, uint8_t* output) {
+  std::transform(input, input + length, output, ascii_swapcase);
+}
 
-using StrToBoolTransformFunc =
+template <typename Type>
+struct AsciiSwapCase {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiSwapCase, out);
+  }
+};
+
+struct AsciiCapitalizeTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (input_string_ncodeunits > 0) {
+      *output = ascii_toupper(*input);
+      TransformAsciiLower(input + 1, input_string_ncodeunits - 1, output + 1);
+    }
+    return input_string_ncodeunits;
+  }
+};
+
+template <typename Type>
+using AsciiCapitalize = StringTransformExec<Type, AsciiCapitalizeTransform>;
+
+// ----------------------------------------------------------------------
+// exact pattern detection
+
+using StrToBoolTransformFunc =
     std::function<void(const void*, const uint8_t*, int64_t, int64_t, uint8_t*)>;
 
 // Apply `transform` to input character data- this function cannot change the
@@ -370,39 +714,19 @@ void StringBoolTransform(KernelContext* ctx, const ExecBatch& batch,
 
 using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
 
-template <typename Type, typename Matcher>
-struct MatchSubstring {
-  using offset_type = typename Type::offset_type;
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    // TODO Cache matcher across invocations (for regex compilation)
-    Matcher matcher(ctx, MatchSubstringState::Get(ctx));
-    if (ctx->HasError()) return;
-    StringBoolTransform<Type>(
-        ctx, batch,
-        [&matcher](const void* raw_offsets, const uint8_t* data, int64_t length,
-                   int64_t output_offset, uint8_t* output) {
-          const offset_type* offsets = reinterpret_cast<const offset_type*>(raw_offsets);
-          FirstTimeBitmapWriter bitmap_writer(output, output_offset, length);
-          for (int64_t i = 0; i < length; ++i) {
-            const char* current_data = reinterpret_cast<const char*>(data + offsets[i]);
-            int64_t current_length = offsets[i + 1] - offsets[i];
-            if (matcher.Match(util::string_view(current_data, current_length))) {
-              bitmap_writer.Set();
-            }
-            bitmap_writer.Next();
-          }
-          bitmap_writer.Finish();
-        },
-        out);
-  }
-};
-
 // This is an implementation of the Knuth-Morris-Pratt algorithm
 struct PlainSubstringMatcher {
   const MatchSubstringOptions& options_;
   std::vector<int64_t> prefix_table;
 
-  PlainSubstringMatcher(KernelContext* ctx, const MatchSubstringOptions& options)
+  static Result<std::unique_ptr<PlainSubstringMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainSubstringMatcher>(options);
+  }
+
+  explicit PlainSubstringMatcher(const MatchSubstringOptions& options)
       : options_(options) {
     // Phase 1: Build the prefix table
     const auto pattern_length = options_.pattern.size();
@@ -420,53 +744,336 @@ struct PlainSubstringMatcher {
     }
   }
 
-  bool Match(util::string_view current) {
+  int64_t Find(util::string_view current) const {
     // Phase 2: Find the prefix in the data
     const auto pattern_length = options_.pattern.size();
     int64_t pattern_pos = 0;
+    int64_t pos = 0;
+    if (pattern_length == 0) return 0;
     for (const auto c : current) {
       while ((pattern_pos >= 0) && (options_.pattern[pattern_pos] != c)) {
         pattern_pos = prefix_table[pattern_pos];
       }
       pattern_pos++;
       if (static_cast<size_t>(pattern_pos) == pattern_length) {
-        return true;
+        return pos + 1 - pattern_length;
       }
+      pos++;
     }
-    return false;
+    return -1;
   }
+
+  bool Match(util::string_view current) const { return Find(current) >= 0; }
 };
 
-const FunctionDoc match_substring_doc(
-    "Match strings against literal pattern",
-    ("For each string in `strings`, emit true iff it contains a given pattern.\n"
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."),
-    {"strings"}, "MatchSubstringOptions");
+struct PlainStartsWithMatcher {
+  const MatchSubstringOptions& options_;
+
+  explicit PlainStartsWithMatcher(const MatchSubstringOptions& options)
+      : options_(options) {}
+
+  static Result<std::unique_ptr<PlainStartsWithMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainStartsWithMatcher>(options);
+  }
+
+  bool Match(util::string_view current) const {
+    // string_view::starts_with is C++20
+    return current.substr(0, options_.pattern.size()) == options_.pattern;
+  }
+};
+
+struct PlainEndsWithMatcher {
+  const MatchSubstringOptions& options_;
+
+  explicit PlainEndsWithMatcher(const MatchSubstringOptions& options)
+      : options_(options) {}
+
+  static Result<std::unique_ptr<PlainEndsWithMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainEndsWithMatcher>(options);
+  }
+
+  bool Match(util::string_view current) const {
+    // string_view::ends_with is C++20
+    return current.size() >= options_.pattern.size() &&
+           current.substr(current.size() - options_.pattern.size(),
+                          options_.pattern.size()) == options_.pattern;
+  }
+};
 
 #ifdef ARROW_WITH_RE2
 struct RegexSubstringMatcher {
   const MatchSubstringOptions& options_;
   const RE2 regex_match_;
 
-  RegexSubstringMatcher(KernelContext* ctx, const MatchSubstringOptions& options)
-      : options_(options), regex_match_(options_.pattern) {
-    if (!regex_match_.ok()) {
-      ctx->SetStatus(Status::Invalid("Regular expression error"));
-    }
+  static Result<std::unique_ptr<RegexSubstringMatcher>> Make(
+      const MatchSubstringOptions& options, bool literal = false) {
+    auto matcher =
+        ::arrow::internal::make_unique<RegexSubstringMatcher>(options, literal);
+    RETURN_NOT_OK(RegexStatus(matcher->regex_match_));
+    return std::move(matcher);
   }
 
-  bool Match(util::string_view current) {
+  explicit RegexSubstringMatcher(const MatchSubstringOptions& options,
+                                 bool literal = false)
+      : options_(options),
+        regex_match_(options_.pattern, MakeRE2Options(options, literal)) {}
+
+  bool Match(util::string_view current) const {
     auto piece = re2::StringPiece(current.data(), current.length());
     return re2::RE2::PartialMatch(piece, regex_match_);
   }
+
+  static RE2::RE2::Options MakeRE2Options(const MatchSubstringOptions& options,
+                                          bool literal) {
+    RE2::RE2::Options re2_options(RE2::Quiet);
+    re2_options.set_case_sensitive(!options.ignore_case);
+    re2_options.set_literal(literal);
+    return re2_options;
+  }
+};
+#endif
+
+template <typename Type, typename Matcher>
+struct MatchSubstringImpl {
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out,
+                     const Matcher* matcher) {
+    StringBoolTransform<Type>(
+        ctx, batch,
+        [&matcher](const void* raw_offsets, const uint8_t* data, int64_t length,
+                   int64_t output_offset, uint8_t* output) {
+          const offset_type* offsets = reinterpret_cast<const offset_type*>(raw_offsets);
+          FirstTimeBitmapWriter bitmap_writer(output, output_offset, length);
+          for (int64_t i = 0; i < length; ++i) {
+            const char* current_data = reinterpret_cast<const char*>(data + offsets[i]);
+            int64_t current_length = offsets[i + 1] - offsets[i];
+            if (matcher->Match(util::string_view(current_data, current_length))) {
+              bitmap_writer.Set();
+            }
+            bitmap_writer.Next();
+          }
+          bitmap_writer.Finish();
+        },
+        out);
+    return Status::OK();
+  }
 };
 
+template <typename Type, typename Matcher>
+struct MatchSubstring {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // TODO Cache matcher across invocations (for regex compilation)
+    ARROW_ASSIGN_OR_RAISE(auto matcher, Matcher::Make(MatchSubstringState::Get(ctx)));
+    return MatchSubstringImpl<Type, Matcher>::Exec(ctx, batch, out, matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainSubstringMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      ARROW_ASSIGN_OR_RAISE(auto matcher,
+                            RegexSubstringMatcher::Make(options, /*literal=*/true));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainSubstringMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainSubstringMatcher>::Exec(ctx, batch, out,
+                                                                 matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainStartsWithMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      MatchSubstringOptions converted_options = options;
+      converted_options.pattern = "^" + RE2::QuoteMeta(options.pattern);
+      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainStartsWithMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainStartsWithMatcher>::Exec(ctx, batch, out,
+                                                                  matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainEndsWithMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      MatchSubstringOptions converted_options = options;
+      converted_options.pattern = RE2::QuoteMeta(options.pattern) + "$";
+      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainEndsWithMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainEndsWithMatcher>::Exec(ctx, batch, out,
+                                                                matcher.get());
+  }
+};
+
+const FunctionDoc match_substring_doc(
+    "Match strings against literal pattern",
+    ("For each string in `strings`, emit true iff it contains a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+const FunctionDoc starts_with_doc(
+    "Check if strings start with a literal pattern",
+    ("For each string in `strings`, emit true iff it starts with a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+const FunctionDoc ends_with_doc(
+    "Check if strings end with a literal pattern",
+    ("For each string in `strings`, emit true iff it ends with a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+#ifdef ARROW_WITH_RE2
 const FunctionDoc match_substring_regex_doc(
     "Match strings against regex pattern",
     ("For each string in `strings`, emit true iff it matches a given pattern at any "
      "position.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+// SQL LIKE match
+
+/// Convert a SQL-style LIKE pattern (using '%' and '_') into a regex pattern
+std::string MakeLikeRegex(const MatchSubstringOptions& options) {
+  // Allow . to match \n
+  std::string like_pattern = "(?s:^";
+  like_pattern.reserve(options.pattern.size() + 7);
+  bool escaped = false;
+  for (const char c : options.pattern) {
+    if (!escaped && c == '%') {
+      like_pattern.append(".*");
+    } else if (!escaped && c == '_') {
+      like_pattern.append(".");
+    } else if (!escaped && c == '\\') {
+      escaped = true;
+    } else {
+      switch (c) {
+        case '.':
+        case '?':
+        case '+':
+        case '*':
+        case '^':
+        case '$':
+        case '\\':
+        case '[':
+        case '{':
+        case '(':
+        case ')':
+        case '|': {
+          like_pattern.push_back('\\');
+          like_pattern.push_back(c);
+          escaped = false;
+          break;
+        }
+        default: {
+          like_pattern.push_back(c);
+          escaped = false;
+          break;
+        }
+      }
+    }
+  }
+  like_pattern.append("$)");
+  return like_pattern;
+}
+
+// Evaluate a SQL-like LIKE pattern by translating it to a regexp or
+// substring search as appropriate. See what Apache Impala does:
+// https://github.com/apache/impala/blob/9c38568657d62b6f6d7b10aa1c721ba843374dd8/be/src/exprs/like-predicate.cc
+template <typename StringType>
+struct MatchLike {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // NOTE: avoid making those constants global to avoid compiling regexes at startup
+    // A LIKE pattern matching this regex can be translated into a substring search.
+    static const RE2 kLikePatternIsSubstringMatch(R"(%+([^%_]*[^\\%_])?%+)");
+    // A LIKE pattern matching this regex can be translated into a prefix search.
+    static const RE2 kLikePatternIsStartsWith(R"(([^%_]*[^\\%_])?%+)");
+    // A LIKE pattern matching this regex can be translated into a suffix search.
+    static const RE2 kLikePatternIsEndsWith(R"(%+([^%_]*))");
+
+    auto original_options = MatchSubstringState::Get(ctx);
+    auto original_state = ctx->state();
+
+    Status status;
+    std::string pattern;
+    if (!original_options.ignore_case &&
+        re2::RE2::FullMatch(original_options.pattern, kLikePatternIsSubstringMatch,
+                            &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainSubstringMatcher>::Exec(ctx, batch, out);
+    } else if (!original_options.ignore_case &&
+               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsStartsWith,
+                                   &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec(ctx, batch, out);
+    } else if (!original_options.ignore_case &&
+               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsEndsWith,
+                                   &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec(ctx, batch, out);
+    } else {
+      MatchSubstringOptions converted_options{MakeLikeRegex(original_options),
+                                              original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, RegexSubstringMatcher>::Exec(ctx, batch, out);
+    }
+    ctx->SetState(original_state);
+    return status;
+  }
+};
+
+const FunctionDoc match_like_doc(
+    "Match strings against SQL-style LIKE pattern",
+    ("For each string in `strings`, emit true iff it fully matches a given pattern "
+     "at any position. That is, '%' will match any number of characters, '_' will "
+     "match exactly one character, and any other character matches itself. To "
+     "match a literal '%', '_', or '\\', precede the character with a backslash.\n"
      "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."),
     {"strings"}, "MatchSubstringOptions");
+
 #endif
 
 void AddMatchSubstring(FunctionRegistry* registry) {
@@ -480,6 +1087,26 @@ void AddMatchSubstring(FunctionRegistry* registry) {
         func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+  {
+    auto func = std::make_shared<ScalarFunction>("starts_with", Arity::Unary(),
+                                                 &match_substring_doc);
+    auto exec_32 = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, PlainStartsWithMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ScalarFunction>("ends_with", Arity::Unary(),
+                                                 &match_substring_doc);
+    auto exec_32 = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, PlainEndsWithMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 #ifdef ARROW_WITH_RE2
   {
     auto func = std::make_shared<ScalarFunction>("match_substring_regex", Arity::Unary(),
@@ -491,156 +1118,477 @@ void AddMatchSubstring(FunctionRegistry* registry) {
         func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+  {
+    auto func =
+        std::make_shared<ScalarFunction>("match_like", Arity::Unary(), &match_like_doc);
+    auto exec_32 = MatchLike<StringType>::Exec;
+    auto exec_64 = MatchLike<LargeStringType>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 #endif
 }
 
-// IsAlpha/Digit etc
+// Substring find - lfind/index/etc.
 
-#ifdef ARROW_WITH_UTF8PROC
+struct FindSubstring {
+  const PlainSubstringMatcher matcher_;
 
-static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask) {
-  utf8proc_category_t general_category = codepoint <= kMaxCodepointLookup
-                                             ? lut_category[codepoint]
-                                             : utf8proc_category(codepoint);
-  uint32_t general_category_bit = 1 << general_category;
-  // for e.g. undefined (but valid) codepoints, general_category == 0 ==
-  // UTF8PROC_CATEGORY_CN
-  return (general_category != UTF8PROC_CATEGORY_CN) &&
-         ((general_category_bit & mask) != 0);
-}
+  explicit FindSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {}
 
-template <typename... Categories>
-static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask,
-                                                utf8proc_category_t category,
-                                                Categories... categories) {
-  return HasAnyUnicodeGeneralCategory(codepoint, mask | (1 << category), categories...);
-}
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    return static_cast<OutValue>(matcher_.Find(val));
+  }
+};
 
-template <typename... Categories>
-static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint,
-                                                utf8proc_category_t category,
-                                                Categories... categories) {
-  return HasAnyUnicodeGeneralCategory(codepoint, static_cast<uint32_t>(1u << category),
-                                      categories...);
-}
+#ifdef ARROW_WITH_RE2
+struct FindSubstringRegex {
+  std::unique_ptr<RE2> regex_match_;
+
+  explicit FindSubstringRegex(const MatchSubstringOptions& options,
+                              bool literal = false) {
+    std::string regex = "(";
+    regex.reserve(options.pattern.length() + 2);
+    regex += literal ? RE2::QuoteMeta(options.pattern) : options.pattern;
+    regex += ")";
+    regex_match_.reset(new RE2(std::move(regex), RegexSubstringMatcher::MakeRE2Options(
+                                                     options, /*literal=*/false)));
+  }
 
-static inline bool IsCasedCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
-                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT) ||
-         ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) ||
-          (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint));
-}
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    re2::StringPiece piece(val.data(), val.length());
+    re2::StringPiece match;
+    if (re2::RE2::PartialMatch(piece, *regex_match_, &match)) {
+      return static_cast<OutValue>(match.data() - piece.data());
+    }
+    return -1;
+  }
+};
+#endif
 
-static inline bool IsLowerCaseCharacterUnicode(uint32_t codepoint) {
-  // although this trick seems to work for upper case, this is not enough for lower case
-  // testing, see https://github.com/JuliaStrings/utf8proc/issues/195 . But currently the
-  // best we can do
-  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LL) ||
-          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) &&
-           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) == codepoint))) &&
-         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
-}
+template <typename InputType>
+struct FindSubstringExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex>
+          kernel{FindSubstringRegex(options, /*literal=*/true)};
+      return kernel.Exec(ctx, batch, out);
+#endif
+      return Status::NotImplemented("ignore_case requires RE2");
+    }
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstring> kernel{
+        FindSubstring(PlainSubstringMatcher(options))};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
 
-static inline bool IsUpperCaseCharacterUnicode(uint32_t codepoint) {
-  // this seems to be a good workaround for utf8proc not having case information
-  // https://github.com/JuliaStrings/utf8proc/issues/195
-  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU) ||
-          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) == codepoint) &&
-           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint))) &&
-         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
-}
+const FunctionDoc find_substring_doc(
+    "Find first occurrence of substring",
+    ("For each string in `strings`, emit the index of the first occurrence of the given "
+     "pattern, or -1 if not found.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
 
-static inline bool IsAlphaNumericCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(
-      codepoint, UTF8PROC_CATEGORY_LU, UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
-      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO, UTF8PROC_CATEGORY_ND,
-      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
-}
+#ifdef ARROW_WITH_RE2
+template <typename InputType>
+struct FindSubstringRegexExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex>
+        kernel{FindSubstringRegex(options, /*literal=*/false)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
 
-static inline bool IsAlphaCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
-                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
-                                      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO);
-}
+const FunctionDoc find_substring_regex_doc(
+    "Find location of first match of regex pattern",
+    ("For each string in `strings`, emit the index of the first match of the given "
+     "pattern, or -1 if not found.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+#endif
 
-static inline bool IsDecimalCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
+void AddFindSubstring(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("find_substring", Arity::Unary(),
+                                                 &find_substring_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(func->AddKernel({ty}, offset_type,
+                                GenerateTypeAgnosticVarBinaryBase<FindSubstringExec>(ty),
+                                MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#ifdef ARROW_WITH_RE2
+  {
+    auto func = std::make_shared<ScalarFunction>("find_substring_regex", Arity::Unary(),
+                                                 &find_substring_regex_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(
+          func->AddKernel({ty}, offset_type,
+                          GenerateTypeAgnosticVarBinaryBase<FindSubstringRegexExec>(ty),
+                          MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#endif
 }
 
-static inline bool IsDigitCharacterUnicode(uint32_t codepoint) {
-  // Python defines this as Numeric_Type=Digit or Numeric_Type=Decimal.
-  // utf8proc has no support for this, this is the best we can do:
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
-}
+// Substring count
 
-static inline bool IsNumericCharacterUnicode(uint32_t codepoint) {
-  // Formally this is not correct, but utf8proc does not allow us to query for Numerical
-  // properties, e.g. Numeric_Value and Numeric_Type
-  // Python defines Numeric as Numeric_Type=Digit, Numeric_Type=Decimal or
-  // Numeric_Type=Numeric.
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND,
-                                      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
-}
+struct CountSubstring {
+  const PlainSubstringMatcher matcher_;
 
-static inline bool IsSpaceCharacterUnicode(uint32_t codepoint) {
-  auto property = utf8proc_get_property(codepoint);
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ZS) ||
-         property->bidi_class == UTF8PROC_BIDI_CLASS_WS ||
-         property->bidi_class == UTF8PROC_BIDI_CLASS_B ||
-         property->bidi_class == UTF8PROC_BIDI_CLASS_S;
-}
+  explicit CountSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {}
 
-static inline bool IsPrintableCharacterUnicode(uint32_t codepoint) {
-  uint32_t general_category = utf8proc_category(codepoint);
-  return (general_category != UTF8PROC_CATEGORY_CN) &&
-         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_CC,
-                                       UTF8PROC_CATEGORY_CF, UTF8PROC_CATEGORY_CS,
-                                       UTF8PROC_CATEGORY_CO, UTF8PROC_CATEGORY_ZS,
-                                       UTF8PROC_CATEGORY_ZL, UTF8PROC_CATEGORY_ZP);
-}
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    OutValue count = 0;
+    uint64_t start = 0;
+    const auto pattern_size = std::max<uint64_t>(1, matcher_.options_.pattern.size());
+    while (start <= val.size()) {
+      const int64_t index = matcher_.Find(val.substr(start));
+      if (index >= 0) {
+        count++;
+        start += index + pattern_size;
+      } else {
+        break;
+      }
+    }
+    return count;
+  }
+};
+
+#ifdef ARROW_WITH_RE2
+struct CountSubstringRegex {
+  std::unique_ptr<RE2> regex_match_;
+
+  explicit CountSubstringRegex(const MatchSubstringOptions& options, bool literal = false)
+      : regex_match_(new RE2(options.pattern,
+                             RegexSubstringMatcher::MakeRE2Options(options, literal))) {}
+
+  static Result<CountSubstringRegex> Make(const MatchSubstringOptions& options,
+                                          bool literal = false) {
+    CountSubstringRegex counter(options, literal);
+    RETURN_NOT_OK(RegexStatus(*counter.regex_match_));
+    return std::move(counter);
+  }
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    OutValue count = 0;
+    re2::StringPiece input(val.data(), val.size());
+    auto last_size = input.size();
+    while (re2::RE2::FindAndConsume(&input, *regex_match_)) {
+      count++;
+      if (last_size == input.size()) {
+        // 0-length match
+        if (input.size() > 0) {
+          input.remove_prefix(1);
+        } else {
+          break;
+        }
+      }
+      last_size = input.size();
+    }
+    return count;
+  }
+};
 
+template <typename InputType>
+struct CountSubstringRegexExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    ARROW_ASSIGN_OR_RAISE(auto counter, CountSubstringRegex::Make(options));
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex>
+        kernel{std::move(counter)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
 #endif
 
-static inline bool IsLowerCaseCharacterAscii(uint8_t ascii_character) {
-  return (ascii_character >= 'a') && (ascii_character <= 'z');
-}
+template <typename InputType>
+struct CountSubstringExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      ARROW_ASSIGN_OR_RAISE(auto counter,
+                            CountSubstringRegex::Make(options, /*literal=*/true));
+      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex>
+          kernel{std::move(counter)};
+      return kernel.Exec(ctx, batch, out);
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstring> kernel{
+        CountSubstring(PlainSubstringMatcher(options))};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
 
-static inline bool IsUpperCaseCharacterAscii(uint8_t ascii_character) {
-  return (ascii_character >= 'A') && (ascii_character <= 'Z');
-}
+const FunctionDoc count_substring_doc(
+    "Count occurrences of substring",
+    ("For each string in `strings`, emit the number of occurrences of the given "
+     "pattern.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
 
-static inline bool IsCasedCharacterAscii(uint8_t ascii_character) {
-  return IsLowerCaseCharacterAscii(ascii_character) ||
-         IsUpperCaseCharacterAscii(ascii_character);
-}
+#ifdef ARROW_WITH_RE2
+const FunctionDoc count_substring_regex_doc(
+    "Count occurrences of substring",
+    ("For each string in `strings`, emit the number of occurrences of the given "
+     "regex pattern.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+#endif
 
-static inline bool IsAlphaCharacterAscii(uint8_t ascii_character) {
-  return IsCasedCharacterAscii(ascii_character);  // same
+void AddCountSubstring(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("count_substring", Arity::Unary(),
+                                                 &count_substring_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(func->AddKernel({ty}, offset_type,
+                                GenerateTypeAgnosticVarBinaryBase<CountSubstringExec>(ty),
+                                MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#ifdef ARROW_WITH_RE2
+  {
+    auto func = std::make_shared<ScalarFunction>("count_substring_regex", Arity::Unary(),
+                                                 &count_substring_regex_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(
+          func->AddKernel({ty}, offset_type,
+                          GenerateTypeAgnosticVarBinaryBase<CountSubstringRegexExec>(ty),
+                          MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#endif
 }
 
-static inline bool IsAlphaNumericCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= '0') && (ascii_character <= '9')) ||
-         ((ascii_character >= 'a') && (ascii_character <= 'z')) ||
-         ((ascii_character >= 'A') && (ascii_character <= 'Z'));
-}
+// Slicing
+
+struct SliceTransformBase : public StringTransformBase {
+  using State = OptionsWrapper<SliceOptions>;
+
+  const SliceOptions* options;
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    options = &State::Get(ctx);
+    if (options->step == 0) {
+      return Status::Invalid("Slice step cannot be zero");
+    }
+    return Status::OK();
+  }
+};
+
+struct SliceCodeunitsTransform : SliceTransformBase {
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    const SliceOptions& opt = *this->options;
+    if ((opt.start >= 0) != (opt.stop >= 0)) {
+      // If start and stop don't have the same sign, we can't guess an upper bound
+      // on the resulting slice lengths, so return a worst case estimate.
+      return input_ncodeunits;
+    }
+    int64_t max_slice_codepoints = (opt.stop - opt.start + opt.step - 1) / opt.step;
+    // The maximum UTF8 byte size of a codepoint is 4
+    return std::min(input_ncodeunits,
+                    4 * ninputs * std::max<int64_t>(0, max_slice_codepoints));
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (options->step >= 1) {
+      return SliceForward(input, input_string_ncodeunits, output);
+    }
+    return SliceBackward(input, input_string_ncodeunits, output);
+  }
+
+#define RETURN_IF_UTF8_ERROR(expr)    \
+  do {                                \
+    if (ARROW_PREDICT_FALSE(!expr)) { \
+      return kTransformError;         \
+    }                                 \
+  } while (0)
+
+  int64_t SliceForward(const uint8_t* input, int64_t input_string_ncodeunits,
+                       uint8_t* output) {
+    // Slice in forward order (step > 0)
+    const SliceOptions& opt = *this->options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* begin_sliced = begin;
+    const uint8_t* end_sliced = end;
+
+    // First, compute begin_sliced and end_sliced
+    if (opt.start >= 0) {
+      // start counting from the left
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start));
+      if (opt.stop > opt.start) {
+        // continue counting from begin_sliced
+        const int64_t length = opt.stop - opt.start;
+        RETURN_IF_UTF8_ERROR(
+            arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length));
+      } else if (opt.stop < 0) {
+        // or from the end (but we will never need to < begin_sliced)
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -opt.stop));
+      } else {
+        // zero length slice
+        return 0;
+      }
+    } else {
+      // start counting from the right
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &begin_sliced, -opt.start));
+      if (opt.stop > 0) {
+        // continue counting from the left, we cannot start from begin_sliced because we
+        // don't know how many codepoints are between begin and begin_sliced
+        RETURN_IF_UTF8_ERROR(
+            arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop));
+        // and therefore we also needs this
+        if (end_sliced <= begin_sliced) {
+          // zero length slice
+          return 0;
+        }
+      } else if ((opt.stop < 0) && (opt.stop > opt.start)) {
+        // stop is negative, but larger than start, so we count again from the right
+        // in some cases we can optimize this, depending on the shortest path (from end
+        // or begin_sliced), but begin_sliced and opt.start can be 'out of sync',
+        // for instance when start=-100, when the string length is only 10.
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -opt.stop));
+      } else {
+        // zero length slice
+        return 0;
+      }
+    }
+
+    // Second, copy computed slice to output
+    DCHECK(begin_sliced <= end_sliced);
+    if (opt.step == 1) {
+      // fast case, where we simply can finish with a memcpy
+      std::copy(begin_sliced, end_sliced, output);
+      return end_sliced - begin_sliced;
+    }
+    uint8_t* dest = output;
+    const uint8_t* i = begin_sliced;
+
+    while (i < end_sliced) {
+      uint32_t codepoint = 0;
+      // write a single codepoint
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint));
+      dest = arrow::util::UTF8Encode(dest, codepoint);
+      // and skip the remainder
+      int64_t skips = opt.step - 1;
+      while ((skips--) && (i < end_sliced)) {
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint));
+      }
+    }
+    return dest - output;
+  }
+
+  int64_t SliceBackward(const uint8_t* input, int64_t input_string_ncodeunits,
+                        uint8_t* output) {
+    // Slice in reverse order (step < 0)
+    const SliceOptions& opt = *this->options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* begin_sliced = begin;
+    const uint8_t* end_sliced = end;
+
+    // Serious +1 -1 kung fu because begin_sliced and end_sliced act like
+    // reverse iterators.
+    if (opt.start >= 0) {
+      // +1 because begin_sliced acts as as the end of a reverse iterator
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start + 1));
+    } else {
+      // -1 because start=-1 means the last codeunit, which is 0 advances
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &begin_sliced, -opt.start - 1));
+    }
+    // make it point at the last codeunit of the previous codeunit
+    begin_sliced--;
 
-static inline bool IsDecimalCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= '0') && (ascii_character <= '9'));
-}
+    // similar to opt.start
+    if (opt.stop >= 0) {
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop + 1));
+    } else {
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &end_sliced, -opt.stop - 1));
+    }
+    end_sliced--;
 
-static inline bool IsSpaceCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= 0x09) && (ascii_character <= 0x0D)) ||
-         (ascii_character == ' ');
-}
+    // Copy computed slice to output
+    uint8_t* dest = output;
+    const uint8_t* i = begin_sliced;
+    while (i > end_sliced) {
+      uint32_t codepoint = 0;
+      // write a single codepoint
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+      dest = arrow::util::UTF8Encode(dest, codepoint);
+      // and skip the remainder
+      int64_t skips = -opt.step - 1;
+      while ((skips--) && (i > end_sliced)) {
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+      }
+    }
+    return dest - output;
+  }
 
-static inline bool IsPrintableCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= ' ') && (ascii_character <= '~'));
+#undef RETURN_IF_UTF8_ERROR
+};
+
+template <typename Type>
+using SliceCodeunits = StringTransformExec<Type, SliceCodeunitsTransform>;
+
+const FunctionDoc utf8_slice_codeunits_doc(
+    "Slice string ",
+    ("For each string in `strings`, slice into a substring defined by\n"
+     "`start`, `stop`, `step`) as given by `SliceOptions` where `start` is inclusive\n"
+     "and `stop` is exclusive and are measured in codeunits. If step is negative, the\n"
+     "string will be advanced in reversed order. A `step` of zero is considered an\n"
+     "error.\n"
+     "Null inputs emit null."),
+    {"strings"}, "SliceOptions");
+
+void AddSlice(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("utf8_slice_codeunits", Arity::Unary(),
+                                               &utf8_slice_codeunits_doc);
+  using t32 = SliceCodeunits<StringType>;
+  using t64 = SliceCodeunits<LargeStringType>;
+  DCHECK_OK(
+      func->AddKernel({utf8()}, utf8(), t32::Exec, SliceCodeunitsTransform::State::Init));
+  DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(), t64::Exec,
+                            SliceCodeunitsTransform::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
 template <typename Derived, bool allow_empty = false>
 struct CharacterPredicateUnicode {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status* st) {
     if (allow_empty && input_string_ncodeunits == 0) {
       return true;
     }
@@ -651,7 +1599,7 @@ struct CharacterPredicateUnicode {
               any |= Derived::PredicateCharacterAny(codepoint);
               return Derived::PredicateCharacterAll(codepoint);
             }))) {
-      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      *st = Status::Invalid("Invalid UTF8 sequence in input");
       return false;
     }
     return all & any;
@@ -664,8 +1612,8 @@ struct CharacterPredicateUnicode {
 
 template <typename Derived, bool allow_empty = false>
 struct CharacterPredicateAscii {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status*) {
     if (allow_empty && input_string_ncodeunits == 0) {
       return true;
     }
@@ -742,8 +1690,8 @@ struct IsNumericUnicode : CharacterPredicateUnicode<IsNumericUnicode> {
 #endif
 
 struct IsAscii {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_nascii_characters) {
+  static bool Call(KernelContext*, const uint8_t* input,
+                   size_t input_string_nascii_characters, Status*) {
     return std::all_of(input, input + input_string_nascii_characters,
                        IsAsciiCharacter<uint8_t>);
   }
@@ -804,8 +1752,8 @@ struct IsSpaceAscii : CharacterPredicateAscii<IsSpaceAscii> {
 
 #ifdef ARROW_WITH_UTF8PROC
 struct IsTitleUnicode {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status* st) {
     // rules:
     // * 1: lower case follows cased
     // * 2: upper case follows uncased
@@ -832,7 +1780,7 @@ struct IsTitleUnicode {
                                  return true;
                                });
     if (!ARROW_PREDICT_TRUE(status)) {
-      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      *st = Status::Invalid("Invalid UTF8 sequence in input");
       return false;
     }
     return rules_1_and_2 & rule_3;
@@ -841,8 +1789,8 @@ struct IsTitleUnicode {
 #endif
 
 struct IsTitleAscii {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status*) {
     // rules:
     // * 1: lower case follows cased
     // * 2: upper case follows uncased
@@ -903,8 +1851,25 @@ struct IsUpperAscii : CharacterPredicateAscii<IsUpperAscii> {
 
 // splitting
 
-template <typename Type, typename ListType, typename Options, typename Derived>
-struct SplitBaseTransform {
+template <typename Options>
+struct SplitFinderBase {
+  virtual Status PreExec(const Options& options) { return Status::OK(); }
+
+  // Derived classes should also define these methods:
+  //   static bool Find(const uint8_t* begin, const uint8_t* end,
+  //                    const uint8_t** separator_begin,
+  //                    const uint8_t** separator_end,
+  //                    const SplitPatternOptions& options);
+  //
+  //   static bool FindReverse(const uint8_t* begin, const uint8_t* end,
+  //                           const uint8_t** separator_begin,
+  //                           const uint8_t** separator_end,
+  //                           const SplitPatternOptions& options);
+};
+
+template <typename Type, typename ListType, typename SplitFinder,
+          typename Options = typename SplitFinder::Options>
+struct SplitExec {
   using string_offset_type = typename Type::offset_type;
   using list_offset_type = typename ListType::offset_type;
   using ArrayType = typename TypeTraits<Type>::ArrayType;
@@ -915,12 +1880,75 @@ struct SplitBaseTransform {
   using ListOffsetsBuilderType = TypedBufferBuilder<list_offset_type>;
   using State = OptionsWrapper<Options>;
 
+  // Keep the temporary storage accross individual values, to minimize reallocations
   std::vector<util::string_view> parts;
   Options options;
 
-  explicit SplitBaseTransform(Options options) : options(options) {}
+  explicit SplitExec(const Options& options) : options(options) {}
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return SplitExec{State::Get(ctx)}.Execute(ctx, batch, out);
+  }
+
+  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    SplitFinder finder;
+    RETURN_NOT_OK(finder.PreExec(options));
+    if (batch[0].kind() == Datum::ARRAY) {
+      return Execute(ctx, &finder, batch[0].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
+    return Execute(ctx, &finder, batch[0].scalar(), out);
+  }
+
+  Status Execute(KernelContext* ctx, SplitFinder* finder,
+                 const std::shared_ptr<ArrayData>& data, Datum* out) {
+    const ArrayType input(data);
+
+    BuilderType builder(input.type(), ctx->memory_pool());
+    // A slight overestimate of the data needed
+    RETURN_NOT_OK(builder.ReserveData(input.total_values_length()));
+    // The minimum amount of strings needed
+    RETURN_NOT_OK(builder.Resize(input.length() - input.null_count()));
+
+    ArrayData* output_list = out->mutable_array();
+    // List offsets were preallocated
+    auto* list_offsets = output_list->GetMutableValues<list_offset_type>(1);
+    DCHECK_NE(list_offsets, nullptr);
+    // Initial value
+    *list_offsets++ = 0;
+    for (int64_t i = 0; i < input.length(); ++i) {
+      if (!input.IsNull(i)) {
+        RETURN_NOT_OK(SplitString(input.GetView(i), finder, &builder));
+        if (ARROW_PREDICT_FALSE(builder.length() >
+                                std::numeric_limits<list_offset_type>::max())) {
+          return Status::CapacityError("List offset does not fit into 32 bit");
+        }
+      }
+      *list_offsets++ = static_cast<list_offset_type>(builder.length());
+    }
+    // Assign string array to list child data
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder.Finish(&string_array));
+    output_list->child_data.push_back(string_array->data());
+    return Status::OK();
+  }
+
+  Status Execute(KernelContext* ctx, SplitFinder* finder,
+                 const std::shared_ptr<Scalar>& scalar, Datum* out) {
+    const auto& input = checked_cast<const ScalarType&>(*scalar);
+    auto result = checked_cast<ListScalarType*>(out->scalar().get());
+    if (input.is_valid) {
+      result->is_valid = true;
+      BuilderType builder(input.type, ctx->memory_pool());
+      util::string_view s(*input.value);
+      RETURN_NOT_OK(SplitString(s, finder, &builder));
+      RETURN_NOT_OK(builder.Finish(&result->value));
+    }
+    return Status::OK();
+  }
 
-  Status Split(const util::string_view& s, BuilderType* builder) {
+  Status SplitString(const util::string_view& s, SplitFinder* finder,
+                     BuilderType* builder) {
     const uint8_t* begin = reinterpret_cast<const uint8_t*>(s.data());
     const uint8_t* end = begin + s.length();
 
@@ -938,8 +1966,7 @@ struct SplitBaseTransform {
       while (max_splits != 0) {
         const uint8_t *separator_begin, *separator_end;
         // find with whatever algo the part we will 'cut out'
-        if (static_cast<Derived&>(*this).FindReverse(begin, i, &separator_begin,
-                                                     &separator_end, options)) {
+        if (finder->FindReverse(begin, i, &separator_begin, &separator_end, options)) {
           parts.emplace_back(reinterpret_cast<const char*>(separator_end),
                              i - separator_end);
           i = separator_begin;
@@ -959,8 +1986,7 @@ struct SplitBaseTransform {
       while (max_splits != 0) {
         const uint8_t *separator_begin, *separator_end;
         // find with whatever algo the part we will 'cut out'
-        if (static_cast<Derived&>(*this).Find(i, end, &separator_begin, &separator_end,
-                                              options)) {
+        if (finder->Find(i, end, &separator_begin, &separator_end, options)) {
           // the part till the beginning of the 'cut'
           RETURN_NOT_OK(
               builder->Append(i, static_cast<string_offset_type>(separator_begin - i)));
@@ -976,84 +2002,12 @@ struct SplitBaseTransform {
     }
     return Status::OK();
   }
-
-  static Status CheckOptions(const Options& options) { return Status::OK(); }
-
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    Options options = State::Get(ctx);
-    Derived splitter(options);  // we make an instance to reuse the parts vectors
-    splitter.Split(ctx, batch, out);
-  }
-
-  void Split(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();  // only needed for unicode
-    KERNEL_RETURN_IF_ERROR(ctx, Derived::CheckOptions(options));
-
-    if (batch[0].kind() == Datum::ARRAY) {
-      const ArrayData& input = *batch[0].array();
-      ArrayType input_boxed(batch[0].array());
-
-      BuilderType builder(input.type, ctx->memory_pool());
-      // a slight overestimate of the data needed
-      KERNEL_RETURN_IF_ERROR(ctx, builder.ReserveData(input_boxed.total_values_length()));
-      // the minimum amount of strings needed
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Resize(input.length));
-
-      ArrayData* output_list = out->mutable_array();
-      // list offsets were preallocated
-      auto* list_offsets = output_list->GetMutableValues<list_offset_type>(1);
-      DCHECK_NE(list_offsets, nullptr);
-      // initial value
-      *list_offsets++ = 0;
-      KERNEL_RETURN_IF_ERROR(
-          ctx,
-          VisitArrayDataInline<Type>(
-              input,
-              [&](util::string_view s) {
-                RETURN_NOT_OK(Split(s, &builder));
-                if (ARROW_PREDICT_FALSE(builder.length() >
-                                        std::numeric_limits<list_offset_type>::max())) {
-                  return Status::CapacityError("List offset does not fit into 32 bit");
-                }
-                *list_offsets++ = static_cast<list_offset_type>(builder.length());
-                return Status::OK();
-              },
-              [&]() {
-                // null value is already taken from input
-                *list_offsets++ = static_cast<list_offset_type>(builder.length());
-                return Status::OK();
-              }));
-      // assign list child data
-      std::shared_ptr<Array> string_array;
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Finish(&string_array));
-      output_list->child_data.push_back(string_array->data());
-
-    } else {
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
-      auto result = checked_pointer_cast<ListScalarType>(MakeNullScalar(out->type()));
-      if (input.is_valid) {
-        result->is_valid = true;
-        BuilderType builder(input.type, ctx->memory_pool());
-        util::string_view s(*input.value);
-        KERNEL_RETURN_IF_ERROR(ctx, Split(s, &builder));
-        KERNEL_RETURN_IF_ERROR(ctx, builder.Finish(&result->value));
-      }
-      out->value = result;
-    }
-  }
 };
 
-template <typename Type, typename ListType>
-struct SplitPatternTransform : SplitBaseTransform<Type, ListType, SplitPatternOptions,
-                                                  SplitPatternTransform<Type, ListType>> {
-  using Base = SplitBaseTransform<Type, ListType, SplitPatternOptions,
-                                  SplitPatternTransform<Type, ListType>>;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using string_offset_type = typename Type::offset_type;
-  using Base::Base;
+struct SplitPatternFinder : public SplitFinderBase<SplitPatternOptions> {
+  using Options = SplitPatternOptions;
 
-  static Status CheckOptions(const SplitPatternOptions& options) {
+  Status PreExec(const SplitPatternOptions& options) override {
     if (options.pattern.length() == 0) {
       return Status::Invalid("Empty separator");
     }
@@ -1102,6 +2056,9 @@ struct SplitPatternTransform : SplitBaseTransform<Type, ListType, SplitPatternOp
   }
 };
 
+template <typename Type, typename ListType>
+using SplitPatternExec = SplitExec<Type, ListType, SplitPatternFinder>;
+
 const FunctionDoc split_pattern_doc(
     "Split string according to separator",
     ("Split each string according to the exact `pattern` defined in\n"
@@ -1135,29 +2092,22 @@ const FunctionDoc utf8_split_whitespace_doc(
 void AddSplitPattern(FunctionRegistry* registry) {
   auto func = std::make_shared<ScalarFunction>("split_pattern", Arity::Unary(),
                                                &split_pattern_doc);
-  using t32 = SplitPatternTransform<StringType, ListType>;
-  using t64 = SplitPatternTransform<LargeStringType, ListType>;
+  using t32 = SplitPatternExec<StringType, ListType>;
+  using t64 = SplitPatternExec<LargeStringType, ListType>;
   DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
   DCHECK_OK(
       func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-template <typename Type, typename ListType>
-struct SplitWhitespaceAsciiTransform
-    : SplitBaseTransform<Type, ListType, SplitOptions,
-                         SplitWhitespaceAsciiTransform<Type, ListType>> {
-  using Base = SplitBaseTransform<Type, ListType, SplitOptions,
-                                  SplitWhitespaceAsciiTransform<Type, ListType>>;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using string_offset_type = typename Type::offset_type;
-  using Base::Base;
+struct SplitWhitespaceAsciiFinder : public SplitFinderBase<SplitOptions> {
+  using Options = SplitOptions;
+
   static bool Find(const uint8_t* begin, const uint8_t* end,
                    const uint8_t** separator_begin, const uint8_t** separator_end,
                    const SplitOptions& options) {
     const uint8_t* i = begin;
-    while ((i < end)) {
+    while (i < end) {
       if (IsSpaceCharacterAscii(*i)) {
         *separator_begin = i;
         do {
@@ -1170,6 +2120,7 @@ struct SplitWhitespaceAsciiTransform
     }
     return false;
   }
+
   static bool FindReverse(const uint8_t* begin, const uint8_t* end,
                           const uint8_t** separator_begin, const uint8_t** separator_end,
                           const SplitOptions& options) {
@@ -1189,13 +2140,16 @@ struct SplitWhitespaceAsciiTransform
   }
 };
 
+template <typename Type, typename ListType>
+using SplitWhitespaceAsciiExec = SplitExec<Type, ListType, SplitWhitespaceAsciiFinder>;
+
 void AddSplitWhitespaceAscii(FunctionRegistry* registry) {
   static const SplitOptions default_options{};
   auto func =
       std::make_shared<ScalarFunction>("ascii_split_whitespace", Arity::Unary(),
                                        &ascii_split_whitespace_doc, &default_options);
-  using t32 = SplitWhitespaceAsciiTransform<StringType, ListType>;
-  using t64 = SplitWhitespaceAsciiTransform<LargeStringType, ListType>;
+  using t32 = SplitWhitespaceAsciiExec<StringType, ListType>;
+  using t64 = SplitWhitespaceAsciiExec<LargeStringType, ListType>;
   DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
   DCHECK_OK(
       func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
@@ -1203,19 +2157,16 @@ void AddSplitWhitespaceAscii(FunctionRegistry* registry) {
 }
 
 #ifdef ARROW_WITH_UTF8PROC
-template <typename Type, typename ListType>
-struct SplitWhitespaceUtf8Transform
-    : SplitBaseTransform<Type, ListType, SplitOptions,
-                         SplitWhitespaceUtf8Transform<Type, ListType>> {
-  using Base = SplitBaseTransform<Type, ListType, SplitOptions,
-                                  SplitWhitespaceUtf8Transform<Type, ListType>>;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
-  using string_offset_type = typename Type::offset_type;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using Base::Base;
-  static bool Find(const uint8_t* begin, const uint8_t* end,
-                   const uint8_t** separator_begin, const uint8_t** separator_end,
-                   const SplitOptions& options) {
+struct SplitWhitespaceUtf8Finder : public SplitFinderBase<SplitOptions> {
+  using Options = SplitOptions;
+
+  Status PreExec(const SplitOptions& options) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin,
+            const uint8_t** separator_end, const SplitOptions& options) {
     const uint8_t* i = begin;
     while ((i < end)) {
       uint32_t codepoint = 0;
@@ -1235,9 +2186,10 @@ struct SplitWhitespaceUtf8Transform
     }
     return false;
   }
-  static bool FindReverse(const uint8_t* begin, const uint8_t* end,
-                          const uint8_t** separator_begin, const uint8_t** separator_end,
-                          const SplitOptions& options) {
+
+  bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitOptions& options) {
     const uint8_t* i = end - 1;
     while ((i >= begin)) {
       uint32_t codepoint = 0;
@@ -1259,19 +2211,89 @@ struct SplitWhitespaceUtf8Transform
   }
 };
 
+template <typename Type, typename ListType>
+using SplitWhitespaceUtf8Exec = SplitExec<Type, ListType, SplitWhitespaceUtf8Finder>;
+
 void AddSplitWhitespaceUTF8(FunctionRegistry* registry) {
   static const SplitOptions default_options{};
   auto func =
       std::make_shared<ScalarFunction>("utf8_split_whitespace", Arity::Unary(),
                                        &utf8_split_whitespace_doc, &default_options);
-  using t32 = SplitWhitespaceUtf8Transform<StringType, ListType>;
-  using t64 = SplitWhitespaceUtf8Transform<LargeStringType, ListType>;
+  using t32 = SplitWhitespaceUtf8Exec<StringType, ListType>;
+  using t64 = SplitWhitespaceUtf8Exec<LargeStringType, ListType>;
   DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
   DCHECK_OK(
       func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
-#endif
+#endif  // ARROW_WITH_UTF8PROC
+
+#ifdef ARROW_WITH_RE2
+struct SplitRegexFinder : public SplitFinderBase<SplitPatternOptions> {
+  using Options = SplitPatternOptions;
+
+  util::optional<RE2> regex_split;
+
+  Status PreExec(const SplitPatternOptions& options) override {
+    if (options.reverse) {
+      return Status::NotImplemented("Cannot split in reverse with regex");
+    }
+    // RE2 does *not* give you the full match! Must wrap the regex in a capture group
+    // There is FindAndConsume, but it would give only the end of the separator
+    std::string pattern = "(";
+    pattern.reserve(options.pattern.size() + 2);
+    pattern += options.pattern;
+    pattern += ')';
+    regex_split.emplace(std::move(pattern));
+    return RegexStatus(*regex_split);
+  }
+
+  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin,
+            const uint8_t** separator_end, const SplitPatternOptions& options) {
+    re2::StringPiece piece(reinterpret_cast<const char*>(begin),
+                           std::distance(begin, end));
+    // "StringPiece is mutated to point to matched piece"
+    re2::StringPiece result;
+    if (!re2::RE2::PartialMatch(piece, *regex_split, &result)) {
+      return false;
+    }
+    *separator_begin = reinterpret_cast<const uint8_t*>(result.data());
+    *separator_end = reinterpret_cast<const uint8_t*>(result.data() + result.size());
+    return true;
+  }
+
+  bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitPatternOptions& options) {
+    // Unsupported (see PreExec)
+    return false;
+  }
+};
+
+template <typename Type, typename ListType>
+using SplitRegexExec = SplitExec<Type, ListType, SplitRegexFinder>;
+
+const FunctionDoc split_pattern_regex_doc(
+    "Split string according to regex pattern",
+    ("Split each string according to the regex `pattern` defined in\n"
+     "SplitPatternOptions.  The output for each string input is a list\n"
+     "of strings.\n"
+     "\n"
+     "The maximum number of splits and direction of splitting\n"
+     "(forward, reverse) can optionally be defined in SplitPatternOptions."),
+    {"strings"}, "SplitPatternOptions");
+
+void AddSplitRegex(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("split_pattern_regex", Arity::Unary(),
+                                               &split_pattern_regex_doc);
+  using t32 = SplitRegexExec<StringType, ListType>;
+  using t64 = SplitRegexExec<LargeStringType, ListType>;
+  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
+  DCHECK_OK(
+      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+#endif  // ARROW_WITH_RE2
 
 void AddSplit(FunctionRegistry* registry) {
   AddSplitPattern(registry);
@@ -1279,6 +2301,9 @@ void AddSplit(FunctionRegistry* registry) {
 #ifdef ARROW_WITH_UTF8PROC
   AddSplitWhitespaceUTF8(registry);
 #endif
+#ifdef ARROW_WITH_RE2
+  AddSplitRegex(registry);
+#endif
 }
 
 // ----------------------------------------------------------------------
@@ -1292,64 +2317,68 @@ struct ReplaceSubString {
   using OffsetBuilder = TypedBufferBuilder<offset_type>;
   using State = OptionsWrapper<ReplaceSubstringOptions>;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // TODO Cache replacer across invocations (for regex compilation)
-    Replacer replacer{ctx, State::Get(ctx)};
-    if (!ctx->HasError()) {
-      Replace(ctx, batch, &replacer, out);
-    }
+    ARROW_ASSIGN_OR_RAISE(auto replacer, Replacer::Make(State::Get(ctx)));
+    return Replace(ctx, batch, *replacer, out);
   }
 
-  static void Replace(KernelContext* ctx, const ExecBatch& batch, Replacer* replacer,
-                      Datum* out) {
+  static Status Replace(KernelContext* ctx, const ExecBatch& batch,
+                        const Replacer& replacer, Datum* out) {
     ValueDataBuilder value_data_builder(ctx->memory_pool());
     OffsetBuilder offset_builder(ctx->memory_pool());
 
     if (batch[0].kind() == Datum::ARRAY) {
       // We already know how many strings we have, so we can use Reserve/UnsafeAppend
-      KERNEL_RETURN_IF_ERROR(ctx, offset_builder.Reserve(batch[0].array()->length));
+      RETURN_NOT_OK(offset_builder.Reserve(batch[0].array()->length + 1));
       offset_builder.UnsafeAppend(0);  // offsets start at 0
 
       const ArrayData& input = *batch[0].array();
-      KERNEL_RETURN_IF_ERROR(
-          ctx, VisitArrayDataInline<Type>(
-                   input,
-                   [&](util::string_view s) {
-                     RETURN_NOT_OK(replacer->ReplaceString(s, &value_data_builder));
-                     offset_builder.UnsafeAppend(
-                         static_cast<offset_type>(value_data_builder.length()));
-                     return Status::OK();
-                   },
-                   [&]() {
-                     // offset for null value
-                     offset_builder.UnsafeAppend(
-                         static_cast<offset_type>(value_data_builder.length()));
-                     return Status::OK();
-                   }));
+      RETURN_NOT_OK(VisitArrayDataInline<Type>(
+          input,
+          [&](util::string_view s) {
+            RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+            offset_builder.UnsafeAppend(
+                static_cast<offset_type>(value_data_builder.length()));
+            return Status::OK();
+          },
+          [&]() {
+            // offset for null value
+            offset_builder.UnsafeAppend(
+                static_cast<offset_type>(value_data_builder.length()));
+            return Status::OK();
+          }));
       ArrayData* output = out->mutable_array();
-      KERNEL_RETURN_IF_ERROR(ctx, value_data_builder.Finish(&output->buffers[2]));
-      KERNEL_RETURN_IF_ERROR(ctx, offset_builder.Finish(&output->buffers[1]));
+      RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2]));
+      RETURN_NOT_OK(offset_builder.Finish(&output->buffers[1]));
     } else {
       const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
       auto result = std::make_shared<ScalarType>();
       if (input.is_valid) {
         util::string_view s = static_cast<util::string_view>(*input.value);
-        KERNEL_RETURN_IF_ERROR(ctx, replacer->ReplaceString(s, &value_data_builder));
-        KERNEL_RETURN_IF_ERROR(ctx, value_data_builder.Finish(&result->value));
+        RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+        RETURN_NOT_OK(value_data_builder.Finish(&result->value));
         result->is_valid = true;
       }
       out->value = result;
     }
+
+    return Status::OK();
   }
 };
 
 struct PlainSubStringReplacer {
   const ReplaceSubstringOptions& options_;
 
-  PlainSubStringReplacer(KernelContext* ctx, const ReplaceSubstringOptions& options)
+  static Result<std::unique_ptr<PlainSubStringReplacer>> Make(
+      const ReplaceSubstringOptions& options) {
+    return arrow::internal::make_unique<PlainSubStringReplacer>(options);
+  }
+
+  explicit PlainSubStringReplacer(const ReplaceSubstringOptions& options)
       : options_(options) {}
 
-  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) {
+  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const {
     const char* i = s.begin();
     const char* end = s.end();
     int64_t max_replacements = options_.max_replacements;
@@ -1374,9 +2403,8 @@ struct PlainSubStringReplacer {
       }
     }
     // if we exited early due to max_replacements, add the trailing part
-    RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
-                                  static_cast<int64_t>(end - i)));
-    return Status::OK();
+    return builder->Append(reinterpret_cast<const uint8_t*>(i),
+                           static_cast<int64_t>(end - i));
   }
 };
 
@@ -1386,26 +2414,38 @@ struct RegexSubStringReplacer {
   const RE2 regex_find_;
   const RE2 regex_replacement_;
 
+  static Result<std::unique_ptr<RegexSubStringReplacer>> Make(
+      const ReplaceSubstringOptions& options) {
+    auto replacer = arrow::internal::make_unique<RegexSubStringReplacer>(options);
+
+    RETURN_NOT_OK(RegexStatus(replacer->regex_find_));
+    RETURN_NOT_OK(RegexStatus(replacer->regex_replacement_));
+
+    std::string replacement_error;
+    if (!replacer->regex_replacement_.CheckRewriteString(replacer->options_.replacement,
+                                                         &replacement_error)) {
+      return Status::Invalid("Invalid replacement string: ",
+                             std::move(replacement_error));
+    }
+
+    return std::move(replacer);
+  }
+
   // Using RE2::FindAndConsume we can only find the pattern if it is a group, therefore
   // we have 2 regexes, one with () around it, one without.
-  RegexSubStringReplacer(KernelContext* ctx, const ReplaceSubstringOptions& options)
+  explicit RegexSubStringReplacer(const ReplaceSubstringOptions& options)
       : options_(options),
-        regex_find_("(" + options_.pattern + ")"),
-        regex_replacement_(options_.pattern) {
-    if (!(regex_find_.ok() && regex_replacement_.ok())) {
-      ctx->SetStatus(Status::Invalid("Regular expression error"));
-      return;
-    }
-  }
+        regex_find_("(" + options_.pattern + ")", RE2::Quiet),
+        regex_replacement_(options_.pattern, RE2::Quiet) {}
 
-  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) {
+  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const {
     re2::StringPiece replacement(options_.replacement);
+
     if (options_.max_replacements == -1) {
       std::string s_copy(s.to_string());
       re2::RE2::GlobalReplace(&s_copy, regex_replacement_, replacement);
-      RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(s_copy.data()),
-                                    s_copy.length()));
-      return Status::OK();
+      return builder->Append(reinterpret_cast<const uint8_t*>(s_copy.data()),
+                             s_copy.length());
     }
 
     // Since RE2 does not have the concept of max_replacements, we have to do some work
@@ -1440,9 +2480,8 @@ struct RegexSubStringReplacer {
       }
     }
     // If we exited early due to max_replacements, add the trailing part
-    RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
-                                  static_cast<int64_t>(end - i)));
-    return Status::OK();
+    return builder->Append(reinterpret_cast<const uint8_t*>(i),
+                           static_cast<int64_t>(end - i));
   }
 };
 #endif
@@ -1458,19 +2497,371 @@ const FunctionDoc replace_substring_doc(
      "null."),
     {"strings"}, "ReplaceSubstringOptions");
 
-#ifdef ARROW_WITH_RE2
-template <typename Type>
-using ReplaceSubStringRegex = ReplaceSubString<Type, RegexSubStringReplacer>;
+#ifdef ARROW_WITH_RE2
+template <typename Type>
+using ReplaceSubStringRegex = ReplaceSubString<Type, RegexSubStringReplacer>;
+
+const FunctionDoc replace_substring_regex_doc(
+    "Replace non-overlapping substrings that match regex `pattern` by `replacement`",
+    ("For each string in `strings`, replace non-overlapping substrings that match the\n"
+     "regular expression `pattern` by `replacement` using the Google RE2 library.\n"
+     "If `max_replacements != -1`, it determines the maximum amount of replacements\n"
+     "made, counting from the left. Note that if the pattern contains groups,\n"
+     "backreferencing macan be used. Null values emit null."),
+    {"strings"}, "ReplaceSubstringOptions");
+#endif
+
+// ----------------------------------------------------------------------
+// Replace slice
+
+struct ReplaceSliceTransformBase : public StringTransformBase {
+  using State = OptionsWrapper<ReplaceSliceOptions>;
+
+  const ReplaceSliceOptions* options;
+
+  explicit ReplaceSliceTransformBase(const ReplaceSliceOptions& options)
+      : options{&options} {}
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    return ninputs * options->replacement.size() + input_ncodeunits;
+  }
+};
+
+struct BinaryReplaceSliceTransform : ReplaceSliceTransformBase {
+  using ReplaceSliceTransformBase::ReplaceSliceTransformBase;
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const auto& opts = *options;
+    int64_t before_slice = 0;
+    int64_t after_slice = 0;
+    uint8_t* output_start = output;
+
+    if (opts.start >= 0) {
+      // Count from left
+      before_slice = std::min<int64_t>(input_string_ncodeunits, opts.start);
+    } else {
+      // Count from right
+      before_slice = std::max<int64_t>(0, input_string_ncodeunits + opts.start);
+    }
+    // Mimic Pandas: if stop would be before start, treat as 0-length slice
+    if (opts.stop >= 0) {
+      // Count from left
+      after_slice =
+          std::min<int64_t>(input_string_ncodeunits, std::max(before_slice, opts.stop));
+    } else {
+      // Count from right
+      after_slice = std::max<int64_t>(before_slice, input_string_ncodeunits + opts.stop);
+    }
+    output = std::copy(input, input + before_slice, output);
+    output = std::copy(opts.replacement.begin(), opts.replacement.end(), output);
+    output = std::copy(input + after_slice, input + input_string_ncodeunits, output);
+    return output - output_start;
+  }
+};
+
+struct Utf8ReplaceSliceTransform : ReplaceSliceTransformBase {
+  using ReplaceSliceTransformBase::ReplaceSliceTransformBase;
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const auto& opts = *options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t *begin_sliced, *end_sliced;
+    uint8_t* output_start = output;
+
+    // Mimic Pandas: if stop would be before start, treat as 0-length slice
+    if (opts.start >= 0) {
+      // Count from left
+      if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opts.start)) {
+        return kTransformError;
+      }
+      if (opts.stop > options->start) {
+        // Continue counting from left
+        const int64_t length = opts.stop - options->start;
+        if (!arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length)) {
+          return kTransformError;
+        }
+      } else if (opts.stop < 0) {
+        // Count from right
+        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced,
+                                                       -opts.stop)) {
+          return kTransformError;
+        }
+      } else {
+        // Zero-length slice
+        end_sliced = begin_sliced;
+      }
+    } else {
+      // Count from right
+      if (!arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced,
+                                                     -opts.start)) {
+        return kTransformError;
+      }
+      if (opts.stop >= 0) {
+        // Restart counting from left
+        if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opts.stop)) {
+          return kTransformError;
+        }
+        if (end_sliced <= begin_sliced) {
+          // Zero-length slice
+          end_sliced = begin_sliced;
+        }
+      } else if ((opts.stop < 0) && (options->stop > options->start)) {
+        // Count from right
+        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced,
+                                                       -opts.stop)) {
+          return kTransformError;
+        }
+      } else {
+        // zero-length slice
+        end_sliced = begin_sliced;
+      }
+    }
+    output = std::copy(begin, begin_sliced, output);
+    output = std::copy(opts.replacement.begin(), options->replacement.end(), output);
+    output = std::copy(end_sliced, end, output);
+    return output - output_start;
+  }
+};
+
+template <typename Type>
+using BinaryReplaceSlice =
+    StringTransformExecWithState<Type, BinaryReplaceSliceTransform>;
+template <typename Type>
+using Utf8ReplaceSlice = StringTransformExecWithState<Type, Utf8ReplaceSliceTransform>;
+
+const FunctionDoc binary_replace_slice_doc(
+    "Replace a slice of a binary string with `replacement`",
+    ("For each string in `strings`, replace a slice of the string defined by `start`"
+     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, "
+     "and both are measured in bytes.\n"
+     "Null values emit null."),
+    {"strings"}, "ReplaceSliceOptions");
+
+const FunctionDoc utf8_replace_slice_doc(
+    "Replace a slice of a string with `replacement`",
+    ("For each string in `strings`, replace a slice of the string defined by `start`"
+     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, "
+     "and both are measured in codeunits.\n"
+     "Null values emit null."),
+    {"strings"}, "ReplaceSliceOptions");
+
+void AddReplaceSlice(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("binary_replace_slice", Arity::Unary(),
+                                                 &binary_replace_slice_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      DCHECK_OK(func->AddKernel({ty}, ty,
+                                GenerateTypeAgnosticVarBinaryBase<BinaryReplaceSlice>(ty),
+                                ReplaceSliceTransformBase::State::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<ScalarFunction>("utf8_replace_slice", Arity::Unary(),
+                                                 &utf8_replace_slice_doc);
+    DCHECK_OK(func->AddKernel({utf8()}, utf8(), Utf8ReplaceSlice<StringType>::Exec,
+                              ReplaceSliceTransformBase::State::Init));
+    DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(),
+                              Utf8ReplaceSlice<LargeStringType>::Exec,
+                              ReplaceSliceTransformBase::State::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
+// ----------------------------------------------------------------------
+// Extract with regex
+
+#ifdef ARROW_WITH_RE2
+
+// TODO cache this once per ExtractRegexOptions
+struct ExtractRegexData {
+  // Use unique_ptr<> because RE2 is non-movable
+  std::unique_ptr<RE2> regex;
+  std::vector<std::string> group_names;
+
+  static Result<ExtractRegexData> Make(const ExtractRegexOptions& options) {
+    ExtractRegexData data(options.pattern);
+    RETURN_NOT_OK(RegexStatus(*data.regex));
+
+    const int group_count = data.regex->NumberOfCapturingGroups();
+    const auto& name_map = data.regex->CapturingGroupNames();
+    data.group_names.reserve(group_count);
+
+    for (int i = 0; i < group_count; i++) {
+      auto item = name_map.find(i + 1);  // re2 starts counting from 1
+      if (item == name_map.end()) {
+        // XXX should we instead just create fields with an empty name?
+        return Status::Invalid("Regular expression contains unnamed groups");
+      }
+      data.group_names.emplace_back(item->second);
+    }
+    return std::move(data);
+  }
+
+  Result<ValueDescr> ResolveOutputType(const std::vector<ValueDescr>& args) const {
+    const auto& input_type = args[0].type;
+    if (input_type == nullptr) {
+      // No input type specified => propagate shape
+      return args[0];
+    }
+    // Input type is either String or LargeString and is also the type of each
+    // field in the output struct type.
+    DCHECK(input_type->id() == Type::STRING || input_type->id() == Type::LARGE_STRING);
+    FieldVector fields;
+    fields.reserve(group_names.size());
+    std::transform(group_names.begin(), group_names.end(), std::back_inserter(fields),
+                   [&](const std::string& name) { return field(name, input_type); });
+    return struct_(std::move(fields));
+  }
+
+ private:
+  explicit ExtractRegexData(const std::string& pattern)
+      : regex(new RE2(pattern, RE2::Quiet)) {}
+};
+
+Result<ValueDescr> ResolveExtractRegexOutput(KernelContext* ctx,
+                                             const std::vector<ValueDescr>& args) {
+  using State = OptionsWrapper<ExtractRegexOptions>;
+  ExtractRegexOptions options = State::Get(ctx);
+  ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
+  return data.ResolveOutputType(args);
+}
+
+struct ExtractRegexBase {
+  const ExtractRegexData& data;
+  const int group_count;
+  std::vector<re2::StringPiece> found_values;
+  std::vector<re2::RE2::Arg> args;
+  std::vector<const re2::RE2::Arg*> args_pointers;
+  const re2::RE2::Arg** args_pointers_start;
+  const re2::RE2::Arg* null_arg = nullptr;
+
+  explicit ExtractRegexBase(const ExtractRegexData& data)
+      : data(data),
+        group_count(static_cast<int>(data.group_names.size())),
+        found_values(group_count) {
+    args.reserve(group_count);
+    args_pointers.reserve(group_count);
+
+    for (int i = 0; i < group_count; i++) {
+      args.emplace_back(&found_values[i]);
+      // Since we reserved capacity, we're guaranteed the pointer remains valid
+      args_pointers.push_back(&args[i]);
+    }
+    // Avoid null pointer if there is no capture group
+    args_pointers_start = (group_count > 0) ? args_pointers.data() : &null_arg;
+  }
+
+  bool Match(util::string_view s) {
+    return re2::RE2::PartialMatchN(ToStringPiece(s), *data.regex, args_pointers_start,
+                                   group_count);
+  }
+};
+
+template <typename Type>
+struct ExtractRegex : public ExtractRegexBase {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using State = OptionsWrapper<ExtractRegexOptions>;
+
+  using ExtractRegexBase::ExtractRegexBase;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ExtractRegexOptions options = State::Get(ctx);
+    ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
+    return ExtractRegex{data}.Extract(ctx, batch, out);
+  }
+
+  Status Extract(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(auto descr, data.ResolveOutputType(batch.GetDescriptors()));
+    DCHECK_NE(descr.type, nullptr);
+    const auto& type = descr.type;
+
+    if (batch[0].kind() == Datum::ARRAY) {
+      std::unique_ptr<ArrayBuilder> array_builder;
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder));
+      StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
+
+      std::vector<BuilderType*> field_builders;
+      field_builders.reserve(group_count);
+      for (int i = 0; i < group_count; i++) {
+        field_builders.push_back(
+            checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+      }
+
+      auto visit_null = [&]() { return struct_builder->AppendNull(); };
+      auto visit_value = [&](util::string_view s) {
+        if (Match(s)) {
+          for (int i = 0; i < group_count; i++) {
+            RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i])));
+          }
+          return struct_builder->Append();
+        } else {
+          return struct_builder->AppendNull();
+        }
+      };
+      const ArrayData& input = *batch[0].array();
+      RETURN_NOT_OK(VisitArrayDataInline<Type>(input, visit_value, visit_null));
+
+      std::shared_ptr<Array> out_array;
+      RETURN_NOT_OK(struct_builder->Finish(&out_array));
+      *out = std::move(out_array);
+    } else {
+      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
+      auto result = std::make_shared<StructScalar>(type);
+      if (input.is_valid && Match(util::string_view(*input.value))) {
+        result->value.reserve(group_count);
+        for (int i = 0; i < group_count; i++) {
+          result->value.push_back(
+              std::make_shared<ScalarType>(found_values[i].as_string()));
+        }
+        result->is_valid = true;
+      } else {
+        result->is_valid = false;
+      }
+      out->value = std::move(result);
+    }
+
+    return Status::OK();
+  }
+};
 
-const FunctionDoc replace_substring_regex_doc(
-    "Replace non-overlapping substrings that match regex `pattern` by `replacement`",
-    ("For each string in `strings`, replace non-overlapping substrings that match the\n"
-     "regular expression `pattern` by `replacement` using the Google RE2 library.\n"
-     "If `max_replacements != -1`, it determines the maximum amount of replacements\n"
-     "made, counting from the left. Note that if the pattern contains groups,\n"
-     "backreferencing macan be used. Null values emit null."),
-    {"strings"}, "ReplaceSubstringOptions");
-#endif
+const FunctionDoc extract_regex_doc(
+    "Extract substrings captured by a regex pattern",
+    ("For each string in `strings`, match the regular expression and, if\n"
+     "successful, emit a struct with field names and values coming from the\n"
+     "regular expression's named capture groups. If the input is null or the\n"
+     "regular expression fails matching, a null output value is emitted.\n"
+     "\n"
+     "Regular expression matching is done using the Google RE2 library."),
+    {"strings"}, "ExtractRegexOptions");
+
+void AddExtractRegex(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("extract_regex", Arity::Unary(),
+                                               &extract_regex_doc);
+  using t32 = ExtractRegex<StringType>;
+  using t64 = ExtractRegex<LargeStringType>;
+  OutputType out_ty(ResolveExtractRegexOutput);
+  ScalarKernel kernel;
+
+  // Null values will be computed based on regex match or not
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.signature.reset(new KernelSignature({utf8()}, out_ty));
+  kernel.exec = t32::Exec;
+  kernel.init = t32::State::Init;
+  DCHECK_OK(func->AddKernel(kernel));
+  kernel.signature.reset(new KernelSignature({large_utf8()}, out_ty));
+  kernel.exec = t64::Exec;
+  kernel.init = t64::State::Init;
+  DCHECK_OK(func->AddKernel(kernel));
+
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+#endif  // ARROW_WITH_RE2
 
 // ----------------------------------------------------------------------
 // strptime string parsing
@@ -1482,12 +2873,11 @@ struct ParseStrptime {
       : parser(TimestampParser::MakeStrptime(options.format)), unit(options.unit) {}
 
   template <typename... Ignored>
-  int64_t Call(KernelContext* ctx, util::string_view val) const {
+  int64_t Call(KernelContext*, util::string_view val, Status* st) const {
     int64_t result = 0;
     if (!(*parser)(val.data(), val.size(), unit, &result)) {
-      ctx->SetStatus(Status::Invalid("Failed to parse string: '", val,
-                                     "' as a scalar of type ",
-                                     TimestampType(unit).ToString()));
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            TimestampType(unit).ToString());
     }
     return result;
   }
@@ -1497,7 +2887,7 @@ struct ParseStrptime {
 };
 
 template <typename InputType>
-void StrptimeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status StrptimeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   applicator::ScalarUnaryNotNullStateful<TimestampType, InputType, ParseStrptime> kernel{
       ParseStrptime(StrptimeState::Get(ctx))};
   return kernel.Exec(ctx, batch, out);
@@ -1511,56 +2901,188 @@ Result<ValueDescr> StrptimeResolve(KernelContext* ctx, const std::vector<ValueDe
   return Status::Invalid("strptime does not provide default StrptimeOptions");
 }
 
+// ----------------------------------------------------------------------
+// string padding
+
+template <bool PadLeft, bool PadRight>
+struct AsciiPadTransform : public StringTransformBase {
+  using State = OptionsWrapper<PadOptions>;
+
+  const PadOptions& options_;
+
+  explicit AsciiPadTransform(const PadOptions& options) : options_(options) {}
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    if (options_.padding.size() != 1) {
+      return Status::Invalid("Padding must be one byte, got '", options_.padding, "'");
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    // This is likely very overallocated but hard to do better without
+    // actually looking at each string (because of strings that may be
+    // longer than the given width)
+    return input_ncodeunits + ninputs * options_.width;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (input_string_ncodeunits >= options_.width) {
+      std::copy(input, input + input_string_ncodeunits, output);
+      return input_string_ncodeunits;
+    }
+    const int64_t spaces = options_.width - input_string_ncodeunits;
+    int64_t left = 0;
+    int64_t right = 0;
+    if (PadLeft && PadRight) {
+      // If odd number of spaces, put the extra space on the right
+      left = spaces / 2;
+      right = spaces - left;
+    } else if (PadLeft) {
+      left = spaces;
+    } else if (PadRight) {
+      right = spaces;
+    } else {
+      DCHECK(false) << "unreachable";
+      return 0;
+    }
+    std::fill(output, output + left, options_.padding[0]);
+    output += left;
+    output = std::copy(input, input + input_string_ncodeunits, output);
+    std::fill(output, output + right, options_.padding[0]);
+    return options_.width;
+  }
+};
+
+template <bool PadLeft, bool PadRight>
+struct Utf8PadTransform : public StringTransformBase {
+  using State = OptionsWrapper<PadOptions>;
+
+  const PadOptions& options_;
+
+  explicit Utf8PadTransform(const PadOptions& options) : options_(options) {}
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    auto str = reinterpret_cast<const uint8_t*>(options_.padding.data());
+    auto strlen = options_.padding.size();
+    if (util::UTF8Length(str, str + strlen) != 1) {
+      return Status::Invalid("Padding must be one codepoint, got '", options_.padding,
+                             "'");
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    // This is likely very overallocated but hard to do better without
+    // actually looking at each string (because of strings that may be
+    // longer than the given width)
+    // One codepoint may be up to 4 bytes
+    return input_ncodeunits + 4 * ninputs * options_.width;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const int64_t input_width = util::UTF8Length(input, input + input_string_ncodeunits);
+    if (input_width >= options_.width) {
+      std::copy(input, input + input_string_ncodeunits, output);
+      return input_string_ncodeunits;
+    }
+    const int64_t spaces = options_.width - input_width;
+    int64_t left = 0;
+    int64_t right = 0;
+    if (PadLeft && PadRight) {
+      // If odd number of spaces, put the extra space on the right
+      left = spaces / 2;
+      right = spaces - left;
+    } else if (PadLeft) {
+      left = spaces;
+    } else if (PadRight) {
+      right = spaces;
+    } else {
+      DCHECK(false) << "unreachable";
+      return 0;
+    }
+    uint8_t* start = output;
+    while (left) {
+      output = std::copy(options_.padding.begin(), options_.padding.end(), output);
+      left--;
+    }
+    output = std::copy(input, input + input_string_ncodeunits, output);
+    while (right) {
+      output = std::copy(options_.padding.begin(), options_.padding.end(), output);
+      right--;
+    }
+    return output - start;
+  }
+};
+
+template <typename Type>
+using AsciiLPad = StringTransformExecWithState<Type, AsciiPadTransform<true, false>>;
+template <typename Type>
+using AsciiRPad = StringTransformExecWithState<Type, AsciiPadTransform<false, true>>;
+template <typename Type>
+using AsciiCenter = StringTransformExecWithState<Type, AsciiPadTransform<true, true>>;
+template <typename Type>
+using Utf8LPad = StringTransformExecWithState<Type, Utf8PadTransform<true, false>>;
+template <typename Type>
+using Utf8RPad = StringTransformExecWithState<Type, Utf8PadTransform<false, true>>;
+template <typename Type>
+using Utf8Center = StringTransformExecWithState<Type, Utf8PadTransform<true, true>>;
+
+// ----------------------------------------------------------------------
+// string trimming
+
 #ifdef ARROW_WITH_UTF8PROC
 
-template <typename Type, bool left, bool right, typename Derived>
-struct UTF8TrimWhitespaceBase : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+template <bool TrimLeft, bool TrimRight>
+struct UTF8TrimWhitespaceTransform : public StringTransformBase {
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
     const uint8_t* begin_trimmed = begin;
 
     auto predicate = [](uint32_t c) { return !IsSpaceCharacterUnicode(c); };
-    if (left && !ARROW_PREDICT_TRUE(
-                    arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
-      return false;
+    if (TrimLeft && !ARROW_PREDICT_TRUE(
+                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
+      return kTransformError;
     }
-    if (right && (begin_trimmed < end)) {
+    if (TrimRight && begin_trimmed < end) {
       if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end,
                                                              predicate, &end_trimmed))) {
-        return false;
+        return kTransformError;
       }
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
-  }
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();
-    Base::Execute(ctx, batch, out);
+    return end_trimmed - begin_trimmed;
   }
 };
 
 template <typename Type>
-struct UTF8TrimWhitespace
-    : UTF8TrimWhitespaceBase<Type, true, true, UTF8TrimWhitespace<Type>> {};
+using UTF8TrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, true>>;
 
 template <typename Type>
-struct UTF8LTrimWhitespace
-    : UTF8TrimWhitespaceBase<Type, true, false, UTF8LTrimWhitespace<Type>> {};
+using UTF8LTrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, false>>;
 
 template <typename Type>
-struct UTF8RTrimWhitespace
-    : UTF8TrimWhitespaceBase<Type, false, true, UTF8RTrimWhitespace<Type>> {};
+using UTF8RTrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<false, true>>;
 
-struct TrimStateUTF8 {
+struct UTF8TrimState {
   TrimOptions options_;
   std::vector<bool> codepoints_;
-  explicit TrimStateUTF8(KernelContext* ctx, TrimOptions options)
+  Status status_ = Status::OK();
+
+  explicit UTF8TrimState(KernelContext* ctx, TrimOptions options)
       : options_(std::move(options)) {
     if (!ARROW_PREDICT_TRUE(
             arrow::util::UTF8ForEach(options_.characters, [&](uint32_t c) {
@@ -1568,170 +3090,179 @@ struct TrimStateUTF8 {
                   std::max(c + 1, static_cast<uint32_t>(codepoints_.size())));
               codepoints_.at(c) = true;
             }))) {
-      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      status_ = Status::Invalid("Invalid UTF8 sequence in input");
     }
   }
 };
 
-template <typename Type, bool left, bool right, typename Derived>
-struct UTF8TrimBase : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
-  using State = KernelStateFromFunctionOptions<TrimStateUTF8, TrimOptions>;
-  TrimStateUTF8 state_;
+template <bool TrimLeft, bool TrimRight>
+struct UTF8TrimTransform : public StringTransformBase {
+  using State = KernelStateFromFunctionOptions<UTF8TrimState, TrimOptions>;
 
-  explicit UTF8TrimBase(TrimStateUTF8 state) : state_(std::move(state)) {}
+  const UTF8TrimState& state_;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    TrimStateUTF8 state = State::Get(ctx);
-    Derived(state).Execute(ctx, batch, out);
-  }
+  explicit UTF8TrimTransform(const UTF8TrimState& state) : state_(state) {}
 
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();
-    Base::Execute(ctx, batch, out);
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    return state_.status_;
   }
 
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
     const uint8_t* begin_trimmed = begin;
+    const auto& codepoints = state_.codepoints_;
 
-    auto predicate = [&](uint32_t c) {
-      bool contains = state_.codepoints_[c];
-      return !contains;
-    };
-    if (left && !ARROW_PREDICT_TRUE(
-                    arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
-      return false;
+    auto predicate = [&](uint32_t c) { return c >= codepoints.size() || !codepoints[c]; };
+    if (TrimLeft && !ARROW_PREDICT_TRUE(
+                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
+      return kTransformError;
     }
-    if (right && (begin_trimmed < end)) {
+    if (TrimRight && begin_trimmed < end) {
       if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end,
                                                              predicate, &end_trimmed))) {
-        return false;
+        return kTransformError;
       }
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
+    return end_trimmed - begin_trimmed;
   }
 };
+
 template <typename Type>
-struct UTF8Trim : UTF8TrimBase<Type, true, true, UTF8Trim<Type>> {
-  using Base = UTF8TrimBase<Type, true, true, UTF8Trim<Type>>;
-  using Base::Base;
-};
+using UTF8Trim = StringTransformExecWithState<Type, UTF8TrimTransform<true, true>>;
 
 template <typename Type>
-struct UTF8LTrim : UTF8TrimBase<Type, true, false, UTF8LTrim<Type>> {
-  using Base = UTF8TrimBase<Type, true, false, UTF8LTrim<Type>>;
-  using Base::Base;
-};
+using UTF8LTrim = StringTransformExecWithState<Type, UTF8TrimTransform<true, false>>;
 
 template <typename Type>
-struct UTF8RTrim : UTF8TrimBase<Type, false, true, UTF8RTrim<Type>> {
-  using Base = UTF8TrimBase<Type, false, true, UTF8RTrim<Type>>;
-  using Base::Base;
-};
+using UTF8RTrim = StringTransformExecWithState<Type, UTF8TrimTransform<false, true>>;
 
 #endif
 
-template <typename Type, bool left, bool right, typename Derived>
-struct AsciiTrimWhitespaceBase : StringTransform<Type, Derived> {
-  using offset_type = typename Type::offset_type;
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+template <bool TrimLeft, bool TrimRight>
+struct AsciiTrimWhitespaceTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
+    const uint8_t* begin_trimmed = begin;
 
     auto predicate = [](unsigned char c) { return !IsSpaceCharacterAscii(c); };
-    const uint8_t* begin_trimmed = left ? std::find_if(begin, end, predicate) : begin;
-    if (right & (begin_trimmed < end)) {
+    if (TrimLeft) {
+      begin_trimmed = std::find_if(begin, end, predicate);
+    }
+    if (TrimRight && begin_trimmed < end) {
       std::reverse_iterator<const uint8_t*> rbegin(end);
       std::reverse_iterator<const uint8_t*> rend(begin_trimmed);
       end_trimmed = std::find_if(rbegin, rend, predicate).base();
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
+    return end_trimmed - begin_trimmed;
   }
 };
 
 template <typename Type>
-struct AsciiTrimWhitespace
-    : AsciiTrimWhitespaceBase<Type, true, true, AsciiTrimWhitespace<Type>> {};
+using AsciiTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, true>>;
 
 template <typename Type>
-struct AsciiLTrimWhitespace
-    : AsciiTrimWhitespaceBase<Type, true, false, AsciiLTrimWhitespace<Type>> {};
+using AsciiLTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, false>>;
 
 template <typename Type>
-struct AsciiRTrimWhitespace
-    : AsciiTrimWhitespaceBase<Type, false, true, AsciiRTrimWhitespace<Type>> {};
-
-template <typename Type, bool left, bool right, typename Derived>
-struct AsciiTrimBase : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
-  using State = OptionsWrapper<TrimOptions>;
+using AsciiRTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<false, true>>;
+
+struct AsciiTrimState {
   TrimOptions options_;
   std::vector<bool> characters_;
 
-  explicit AsciiTrimBase(TrimOptions options)
+  explicit AsciiTrimState(KernelContext* ctx, TrimOptions options)
       : options_(std::move(options)), characters_(256) {
-    std::for_each(options_.characters.begin(), options_.characters.end(),
-                  [&](char c) { characters_[static_cast<unsigned char>(c)] = true; });
+    for (const auto c : options_.characters) {
+      characters_[static_cast<unsigned char>(c)] = true;
+    }
   }
+};
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    TrimOptions options = State::Get(ctx);
-    Derived(options).Execute(ctx, batch, out);
-  }
+template <bool TrimLeft, bool TrimRight>
+struct AsciiTrimTransform : public StringTransformBase {
+  using State = KernelStateFromFunctionOptions<AsciiTrimState, TrimOptions>;
+
+  const AsciiTrimState& state_;
 
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+  explicit AsciiTrimTransform(const AsciiTrimState& state) : state_(state) {}
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
-    const uint8_t* begin_trimmed;
-
-    auto predicate = [&](unsigned char c) {
-      bool contains = characters_[c];
-      return !contains;
-    };
+    const uint8_t* begin_trimmed = begin;
+    const auto& characters = state_.characters_;
 
-    begin_trimmed = left ? std::find_if(begin, end, predicate) : begin;
-    if (right & (begin_trimmed < end)) {
+    auto predicate = [&](uint8_t c) { return !characters[c]; };
+    if (TrimLeft) {
+      begin_trimmed = std::find_if(begin, end, predicate);
+    }
+    if (TrimRight && begin_trimmed < end) {
       std::reverse_iterator<const uint8_t*> rbegin(end);
       std::reverse_iterator<const uint8_t*> rend(begin_trimmed);
       end_trimmed = std::find_if(rbegin, rend, predicate).base();
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
+    return end_trimmed - begin_trimmed;
   }
 };
 
 template <typename Type>
-struct AsciiTrim : AsciiTrimBase<Type, true, true, AsciiTrim<Type>> {
-  using Base = AsciiTrimBase<Type, true, true, AsciiTrim<Type>>;
-  using Base::Base;
-};
+using AsciiTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, true>>;
 
 template <typename Type>
-struct AsciiLTrim : AsciiTrimBase<Type, true, false, AsciiLTrim<Type>> {
-  using Base = AsciiTrimBase<Type, true, false, AsciiLTrim<Type>>;
-  using Base::Base;
-};
+using AsciiLTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, false>>;
 
 template <typename Type>
-struct AsciiRTrim : AsciiTrimBase<Type, false, true, AsciiRTrim<Type>> {
-  using Base = AsciiTrimBase<Type, false, true, AsciiRTrim<Type>>;
-  using Base::Base;
-};
+using AsciiRTrim = StringTransformExecWithState<Type, AsciiTrimTransform<false, true>>;
+
+const FunctionDoc utf8_center_doc(
+    "Center strings by padding with a given character",
+    ("For each string in `strings`, emit a centered string by padding both sides \n"
+     "with the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc utf8_lpad_doc(
+    "Right-align strings by padding with a given character",
+    ("For each string in `strings`, emit a right-aligned string by prepending \n"
+     "the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc utf8_rpad_doc(
+    "Left-align strings by padding with a given character",
+    ("For each string in `strings`, emit a left-aligned string by appending \n"
+     "the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_center_doc(
+    utf8_center_doc.description + "",
+    ("For each string in `strings`, emit a centered string by padding both sides \n"
+     "with the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_lpad_doc(
+    utf8_lpad_doc.description + "",
+    ("For each string in `strings`, emit a right-aligned string by prepending \n"
+     "the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_rpad_doc(
+    utf8_rpad_doc.description + "",
+    ("For each string in `strings`, emit a left-aligned string by appending \n"
+     "the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
 
 const FunctionDoc utf8_trim_whitespace_doc(
     "Trim leading and trailing whitespace characters",
@@ -1875,6 +3406,514 @@ void AddUtf8Length(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
+template <typename BinaryType, typename ListType>
+struct BinaryJoin {
+  using ArrayType = typename TypeTraits<BinaryType>::ArrayType;
+  using ListArrayType = typename TypeTraits<ListType>::ArrayType;
+  using ListScalarType = typename TypeTraits<ListType>::ScalarType;
+  using ListOffsetType = typename ListArrayType::offset_type;
+  using BuilderType = typename TypeTraits<BinaryType>::BuilderType;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].kind() == Datum::SCALAR) {
+      if (batch[1].kind() == Datum::SCALAR) {
+        return ExecScalarScalar(ctx, *batch[0].scalar(), *batch[1].scalar(), out);
+      }
+      DCHECK_EQ(batch[1].kind(), Datum::ARRAY);
+      return ExecScalarArray(ctx, *batch[0].scalar(), batch[1].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+    if (batch[1].kind() == Datum::SCALAR) {
+      return ExecArrayScalar(ctx, batch[0].array(), *batch[1].scalar(), out);
+    }
+    DCHECK_EQ(batch[1].kind(), Datum::ARRAY);
+    return ExecArrayArray(ctx, batch[0].array(), batch[1].array(), out);
+  }
+
+  struct ListScalarOffsetLookup {
+    const ArrayType& values;
+
+    int64_t GetStart(int64_t i) { return 0; }
+    int64_t GetStop(int64_t i) { return values.length(); }
+    bool IsNull(int64_t i) { return false; }
+  };
+
+  struct ListArrayOffsetLookup {
+    explicit ListArrayOffsetLookup(const ListArrayType& lists)
+        : lists_(lists), offsets_(lists.raw_value_offsets()) {}
+
+    int64_t GetStart(int64_t i) { return offsets_[i]; }
+    int64_t GetStop(int64_t i) { return offsets_[i + 1]; }
+    bool IsNull(int64_t i) { return lists_.IsNull(i); }
+
+   private:
+    const ListArrayType& lists_;
+    const ListOffsetType* offsets_;
+  };
+
+  struct SeparatorScalarLookup {
+    const util::string_view separator;
+
+    bool IsNull(int64_t i) { return false; }
+    util::string_view GetView(int64_t i) { return separator; }
+  };
+
+  struct SeparatorArrayLookup {
+    const ArrayType& separators;
+
+    bool IsNull(int64_t i) { return separators.IsNull(i); }
+    util::string_view GetView(int64_t i) { return separators.GetView(i); }
+  };
+
+  // Scalar, scalar -> scalar
+  static Status ExecScalarScalar(KernelContext* ctx, const Scalar& left,
+                                 const Scalar& right, Datum* out) {
+    const auto& list = checked_cast<const ListScalarType&>(left);
+    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
+    if (!list.is_valid || !separator_scalar.is_valid) {
+      return Status::OK();
+    }
+    util::string_view separator(*separator_scalar.value);
+
+    const auto& strings = checked_cast<const ArrayType&>(*list.value);
+    if (strings.null_count() > 0) {
+      out->scalar()->is_valid = false;
+      return Status::OK();
+    }
+
+    TypedBufferBuilder<uint8_t> builder(ctx->memory_pool());
+    auto Append = [&](util::string_view value) {
+      return builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
+                            static_cast<int64_t>(value.size()));
+    };
+    if (strings.length() > 0) {
+      auto data_length =
+          strings.total_values_length() + (strings.length() - 1) * separator.length();
+      RETURN_NOT_OK(builder.Reserve(data_length));
+      RETURN_NOT_OK(Append(strings.GetView(0)));
+      for (int64_t j = 1; j < strings.length(); j++) {
+        RETURN_NOT_OK(Append(separator));
+        RETURN_NOT_OK(Append(strings.GetView(j)));
+      }
+    }
+    auto out_scalar = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    return builder.Finish(&out_scalar->value);
+  }
+
+  // Scalar, array -> array
+  static Status ExecScalarArray(KernelContext* ctx, const Scalar& left,
+                                const std::shared_ptr<ArrayData>& right, Datum* out) {
+    const auto& list_scalar = checked_cast<const BaseListScalar&>(left);
+    if (!list_scalar.is_valid) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+    const auto& strings = checked_cast<const ArrayType&>(*list_scalar.value);
+    if (strings.null_count() != 0) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+    const ArrayType separators(right);
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(separators.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = 0;
+    const int64_t list_length = strings.length();
+    if (list_length) {
+      const int64_t string_length = strings.total_values_length();
+      total_data_length +=
+          string_length * (separators.length() - separators.null_count());
+      for (int64_t i = 0; i < separators.length(); ++i) {
+        if (separators.IsNull(i)) {
+          continue;
+        }
+        total_data_length += (list_length - 1) * separators.value_length(i);
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    return JoinStrings(separators.length(), strings, ListScalarOffsetLookup{strings},
+                       SeparatorArrayLookup{separators}, &builder, out);
+  }
+
+  // Array, scalar -> array
+  static Status ExecArrayScalar(KernelContext* ctx,
+                                const std::shared_ptr<ArrayData>& left,
+                                const Scalar& right, Datum* out) {
+    const ListArrayType lists(left);
+    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
+
+    if (!separator_scalar.is_valid) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls,
+          MakeArrayOfNull(lists.value_type(), lists.length(), ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+
+    util::string_view separator(*separator_scalar.value);
+    const auto& strings = checked_cast<const ArrayType&>(*lists.values());
+    const auto list_offsets = lists.raw_value_offsets();
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(lists.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = strings.total_values_length();
+    for (int64_t i = 0; i < lists.length(); ++i) {
+      const auto start = list_offsets[i], end = list_offsets[i + 1];
+      if (end > start && !ValuesContainNull(strings, start, end)) {
+        total_data_length += (end - start - 1) * separator.length();
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists},
+                       SeparatorScalarLookup{separator}, &builder, out);
+  }
+
+  // Array, array -> array
+  static Status ExecArrayArray(KernelContext* ctx, const std::shared_ptr<ArrayData>& left,
+                               const std::shared_ptr<ArrayData>& right, Datum* out) {
+    const ListArrayType lists(left);
+    const auto& strings = checked_cast<const ArrayType&>(*lists.values());
+    const auto list_offsets = lists.raw_value_offsets();
+    const auto string_offsets = strings.raw_value_offsets();
+    const ArrayType separators(right);
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(lists.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = 0;
+    for (int64_t i = 0; i < lists.length(); ++i) {
+      if (separators.IsNull(i)) {
+        continue;
+      }
+      const auto start = list_offsets[i], end = list_offsets[i + 1];
+      if (end > start && !ValuesContainNull(strings, start, end)) {
+        total_data_length += string_offsets[end] - string_offsets[start];
+        total_data_length += (end - start - 1) * separators.value_length(i);
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    struct SeparatorLookup {
+      const ArrayType& separators;
+
+      bool IsNull(int64_t i) { return separators.IsNull(i); }
+      util::string_view GetView(int64_t i) { return separators.GetView(i); }
+    };
+    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists},
+                       SeparatorArrayLookup{separators}, &builder, out);
+  }
+
+  template <typename ListOffsetLookup, typename SeparatorLookup>
+  static Status JoinStrings(int64_t length, const ArrayType& strings,
+                            ListOffsetLookup&& list_offsets, SeparatorLookup&& separators,
+                            BuilderType* builder, Datum* out) {
+    for (int64_t i = 0; i < length; ++i) {
+      if (list_offsets.IsNull(i) || separators.IsNull(i)) {
+        builder->UnsafeAppendNull();
+        continue;
+      }
+      const auto j_start = list_offsets.GetStart(i), j_end = list_offsets.GetStop(i);
+      if (j_start == j_end) {
+        builder->UnsafeAppendEmptyValue();
+        continue;
+      }
+      if (ValuesContainNull(strings, j_start, j_end)) {
+        builder->UnsafeAppendNull();
+        continue;
+      }
+      builder->UnsafeAppend(strings.GetView(j_start));
+      for (int64_t j = j_start + 1; j < j_end; ++j) {
+        builder->UnsafeExtendCurrent(separators.GetView(i));
+        builder->UnsafeExtendCurrent(strings.GetView(j));
+      }
+    }
+
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder->Finish(&string_array));
+    *out = *string_array->data();
+    // Correct the output type based on the input
+    out->mutable_array()->type = strings.type();
+    return Status::OK();
+  }
+
+  static bool ValuesContainNull(const ArrayType& values, int64_t start, int64_t end) {
+    if (values.null_count() == 0) {
+      return false;
+    }
+    for (int64_t i = start; i < end; ++i) {
+      if (values.IsNull(i)) {
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+using BinaryJoinElementWiseState = OptionsWrapper<JoinOptions>;
+
+template <typename Type>
+struct BinaryJoinElementWise {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    JoinOptions options = BinaryJoinElementWiseState::Get(ctx);
+    // Last argument is the separator (for consistency with binary_join)
+    if (std::all_of(batch.values.begin(), batch.values.end(),
+                    [](const Datum& d) { return d.is_scalar(); })) {
+      return ExecOnlyScalar(ctx, options, batch, out);
+    }
+    return ExecContainingArrays(ctx, options, batch, out);
+  }
+
+  static Status ExecOnlyScalar(KernelContext* ctx, const JoinOptions& options,
+                               const ExecBatch& batch, Datum* out) {
+    BaseBinaryScalar* output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    const size_t num_args = batch.values.size();
+    if (num_args == 1) {
+      // Only separator, no values
+      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
+      output->is_valid = batch.values[0].scalar()->is_valid;
+      return Status::OK();
+    }
+
+    int64_t final_size = CalculateRowSize(options, batch, 0);
+    if (final_size < 0) {
+      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
+      output->is_valid = false;
+      return Status::OK();
+    }
+    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(final_size));
+    const auto separator = UnboxScalar<Type>::Unbox(*batch.values.back().scalar());
+    uint8_t* buf = output->value->mutable_data();
+    bool first = true;
+    for (size_t i = 0; i < num_args - 1; i++) {
+      const Scalar& scalar = *batch[i].scalar();
+      util::string_view s;
+      if (scalar.is_valid) {
+        s = UnboxScalar<Type>::Unbox(scalar);
+      } else {
+        switch (options.null_handling) {
+          case JoinOptions::EMIT_NULL:
+            // Handled by CalculateRowSize
+            DCHECK(false) << "unreachable";
+            break;
+          case JoinOptions::SKIP:
+            continue;
+          case JoinOptions::REPLACE:
+            s = options.null_replacement;
+            break;
+        }
+      }
+      if (!first) {
+        buf = std::copy(separator.begin(), separator.end(), buf);
+      }
+      first = false;
+      buf = std::copy(s.begin(), s.end(), buf);
+    }
+    output->is_valid = true;
+    DCHECK_EQ(final_size, buf - output->value->mutable_data());
+    return Status::OK();
+  }
+
+  static Status ExecContainingArrays(KernelContext* ctx, const JoinOptions& options,
+                                     const ExecBatch& batch, Datum* out) {
+    // Presize data to avoid reallocations
+    int64_t final_size = 0;
+    for (int64_t i = 0; i < batch.length; i++) {
+      auto size = CalculateRowSize(options, batch, i);
+      if (size > 0) final_size += size;
+    }
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    RETURN_NOT_OK(builder.ReserveData(final_size));
+
+    std::vector<util::string_view> valid_cols(batch.values.size());
+    for (size_t row = 0; row < static_cast<size_t>(batch.length); row++) {
+      size_t num_valid = 0;  // Not counting separator
+      for (size_t col = 0; col < batch.values.size(); col++) {
+        if (batch[col].is_scalar()) {
+          const auto& scalar = *batch[col].scalar();
+          if (scalar.is_valid) {
+            valid_cols[col] = UnboxScalar<Type>::Unbox(scalar);
+            if (col < batch.values.size() - 1) num_valid++;
+          } else {
+            valid_cols[col] = util::string_view();
+          }
+        } else {
+          const ArrayData& array = *batch[col].array();
+          if (!array.MayHaveNulls() ||
+              BitUtil::GetBit(array.buffers[0]->data(), array.offset + row)) {
+            const offset_type* offsets = array.GetValues<offset_type>(1);
+            const uint8_t* data = array.GetValues<uint8_t>(2, /*absolute_offset=*/0);
+            const int64_t length = offsets[row + 1] - offsets[row];
+            valid_cols[col] = util::string_view(
+                reinterpret_cast<const char*>(data + offsets[row]), length);
+            if (col < batch.values.size() - 1) num_valid++;
+          } else {
+            valid_cols[col] = util::string_view();
+          }
+        }
+      }
+
+      if (!valid_cols.back().data()) {
+        // Separator is null
+        builder.UnsafeAppendNull();
+        continue;
+      } else if (batch.values.size() == 1) {
+        // Only given separator
+        builder.UnsafeAppendEmptyValue();
+        continue;
+      } else if (num_valid < batch.values.size() - 1) {
+        // We had some nulls
+        if (options.null_handling == JoinOptions::EMIT_NULL) {
+          builder.UnsafeAppendNull();
+          continue;
+        }
+      }
+      const auto separator = valid_cols.back();
+      bool first = true;
+      for (size_t col = 0; col < batch.values.size() - 1; col++) {
+        util::string_view value = valid_cols[col];
+        if (!value.data()) {
+          switch (options.null_handling) {
+            case JoinOptions::EMIT_NULL:
+              DCHECK(false) << "unreachable";
+              break;
+            case JoinOptions::SKIP:
+              continue;
+            case JoinOptions::REPLACE:
+              value = options.null_replacement;
+              break;
+          }
+        }
+        if (first) {
+          builder.UnsafeAppend(value);
+          first = false;
+          continue;
+        }
+        builder.UnsafeExtendCurrent(separator);
+        builder.UnsafeExtendCurrent(value);
+      }
+    }
+
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder.Finish(&string_array));
+    *out = *string_array->data();
+    out->mutable_array()->type = batch[0].type();
+    DCHECK_EQ(batch.length, out->array()->length);
+    DCHECK_EQ(final_size,
+              checked_cast<const ArrayType&>(*string_array).total_values_length());
+    return Status::OK();
+  }
+
+  // Compute the length of the output for the given position, or -1 if it would be null.
+  static int64_t CalculateRowSize(const JoinOptions& options, const ExecBatch& batch,
+                                  const int64_t index) {
+    const auto num_args = batch.values.size();
+    int64_t final_size = 0;
+    int64_t num_non_null_args = 0;
+    for (size_t i = 0; i < num_args; i++) {
+      int64_t element_size = 0;
+      bool valid = true;
+      if (batch[i].is_scalar()) {
+        const Scalar& scalar = *batch[i].scalar();
+        valid = scalar.is_valid;
+        element_size = UnboxScalar<Type>::Unbox(scalar).size();
+      } else {
+        const ArrayData& array = *batch[i].array();
+        valid = !array.MayHaveNulls() ||
+                BitUtil::GetBit(array.buffers[0]->data(), array.offset + index);
+        const offset_type* offsets = array.GetValues<offset_type>(1);
+        element_size = offsets[index + 1] - offsets[index];
+      }
+      if (i == num_args - 1) {
+        if (!valid) return -1;
+        if (num_non_null_args > 1) {
+          // Add separator size (only if there were values to join)
+          final_size += (num_non_null_args - 1) * element_size;
+        }
+        break;
+      }
+      if (!valid) {
+        switch (options.null_handling) {
+          case JoinOptions::EMIT_NULL:
+            return -1;
+          case JoinOptions::SKIP:
+            continue;
+          case JoinOptions::REPLACE:
+            element_size = options.null_replacement.size();
+            break;
+        }
+      }
+      num_non_null_args++;
+      final_size += element_size;
+    }
+    return final_size;
+  }
+};
+
+const FunctionDoc binary_join_doc(
+    "Join a list of strings together with a `separator` to form a single string",
+    ("Insert `separator` between `list` elements, and concatenate them.\n"
+     "Any null input and any null `list` element emits a null output.\n"),
+    {"list", "separator"});
+
+const FunctionDoc binary_join_element_wise_doc(
+    "Join string arguments into one, using the last argument as the separator",
+    ("Insert the last argument of `strings` between the rest of the elements, "
+     "and concatenate them.\n"
+     "Any null separator element emits a null output. Null elements either "
+     "emit a null (the default), are skipped, or replaced with a given string.\n"),
+    {"*strings"}, "JoinOptions");
+
+const auto kDefaultJoinOptions = JoinOptions::Defaults();
+
+template <typename ListType>
+void AddBinaryJoinForListType(ScalarFunction* func) {
+  for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) {
+    auto exec = GenerateTypeAgnosticVarBinaryBase<BinaryJoin, ListType>(*ty);
+    auto list_ty = std::make_shared<ListType>(ty);
+    DCHECK_OK(func->AddKernel({InputType(list_ty), InputType(ty)}, ty, exec));
+  }
+}
+
+void AddBinaryJoin(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("binary_join", Arity::Binary(),
+                                                 &binary_join_doc);
+    AddBinaryJoinForListType<ListType>(func.get());
+    AddBinaryJoinForListType<LargeListType>(func.get());
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ScalarFunction>(
+        "binary_join_element_wise", Arity::VarArgs(/*min_args=*/1),
+        &binary_join_element_wise_doc, &kDefaultJoinOptions);
+    for (const auto& ty : BaseBinaryTypes()) {
+      ScalarKernel kernel{KernelSignature::Make({InputType(ty)}, ty, /*is_varargs=*/true),
+                          GenerateTypeAgnosticVarBinaryBase<BinaryJoinElementWise>(ty),
+                          BinaryJoinElementWiseState::Init};
+      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+      DCHECK_OK(func->AddKernel(std::move(kernel)));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
 template <template <typename> class ExecFunctor>
 void MakeUnaryStringBatchKernel(
     std::string name, FunctionRegistry* registry, const FunctionDoc* doc,
@@ -1930,11 +3969,15 @@ void MakeUnaryStringUTF8TransformKernel(std::string name, FunctionRegistry* regi
 
 #endif
 
-using StringPredicate = std::function<bool(KernelContext*, const uint8_t*, size_t)>;
+// NOTE: Predicate should only populate 'status' with errors,
+//       leave it unmodified to indicate Status::OK()
+using StringPredicate =
+    std::function<bool(KernelContext*, const uint8_t*, size_t, Status*)>;
 
 template <typename Type>
-void ApplyPredicate(KernelContext* ctx, const ExecBatch& batch, StringPredicate predicate,
-                    Datum* out) {
+Status ApplyPredicate(KernelContext* ctx, const ExecBatch& batch,
+                      StringPredicate predicate, Datum* out) {
+  Status st = Status::OK();
   EnsureLookupTablesFilled();
   if (batch[0].kind() == Datum::ARRAY) {
     const ArrayData& input = *batch[0].array();
@@ -1944,20 +3987,21 @@ void ApplyPredicate(KernelContext* ctx, const ExecBatch& batch, StringPredicate
         out_arr->buffers[1]->mutable_data(), out_arr->offset, input.length,
         [&]() -> bool {
           util::string_view val = input_it();
-          return predicate(ctx, reinterpret_cast<const uint8_t*>(val.data()), val.size());
+          return predicate(ctx, reinterpret_cast<const uint8_t*>(val.data()), val.size(),
+                           &st);
         });
   } else {
     const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
     if (input.is_valid) {
-      bool boolean_result =
-          predicate(ctx, input.value->data(), static_cast<size_t>(input.value->size()));
-      if (!ctx->status().ok()) {
-        // UTF decoding can lead to issues
-        return;
+      bool boolean_result = predicate(ctx, input.value->data(),
+                                      static_cast<size_t>(input.value->size()), &st);
+      // UTF decoding can lead to issues
+      if (st.ok()) {
+        out->value = std::make_shared<BooleanScalar>(boolean_result);
       }
-      out->value = std::make_shared<BooleanScalar>(boolean_result);
     }
   }
+  return st;
 }
 
 template <typename Predicate>
@@ -1965,10 +4009,10 @@ void AddUnaryStringPredicate(std::string name, FunctionRegistry* registry,
                              const FunctionDoc* doc) {
   auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
   auto exec_32 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    ApplyPredicate<StringType>(ctx, batch, Predicate::Call, out);
+    return ApplyPredicate<StringType>(ctx, batch, Predicate::Call, out);
   };
   auto exec_64 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    ApplyPredicate<LargeStringType>(ctx, batch, Predicate::Call, out);
+    return ApplyPredicate<LargeStringType>(ctx, batch, Predicate::Call, out);
   };
   DCHECK_OK(func->AddKernel({utf8()}, boolean(), std::move(exec_32)));
   DCHECK_OK(func->AddKernel({large_utf8()}, boolean(), std::move(exec_64)));
@@ -2063,10 +4107,32 @@ const FunctionDoc ascii_upper_doc(
 const FunctionDoc ascii_lower_doc(
     "Transform ASCII input to lowercase",
     ("For each string in `strings`, return a lowercase version.\n\n"
-     "This function assumes the input is fully ASCII.  It it may contain\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
      "non-ASCII characters, use \"utf8_lower\" instead."),
     {"strings"});
 
+const FunctionDoc ascii_swapcase_doc(
+    "Transform ASCII input lowercase characters to uppercase and uppercase characters to "
+    "lowercase",
+    ("For each string in `strings`, return a string with opposite casing.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_swapcase\" instead."),
+    {"strings"});
+
+const FunctionDoc ascii_capitalize_doc(
+    "Capitalize the first character of ASCII input",
+    ("For each string in `strings`, return a capitalized version.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_capitalize\" instead."),
+    {"strings"});
+
+const FunctionDoc ascii_reverse_doc(
+    "Reverse ASCII input",
+    ("For each ASCII string in `strings`, return a reversed version.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_reverse\" instead."),
+    {"strings"});
+
 const FunctionDoc utf8_upper_doc(
     "Transform input to uppercase",
     ("For each string in `strings`, return an uppercase version."), {"strings"});
@@ -2075,6 +4141,25 @@ const FunctionDoc utf8_lower_doc(
     "Transform input to lowercase",
     ("For each string in `strings`, return a lowercase version."), {"strings"});
 
+const FunctionDoc utf8_swapcase_doc(
+    "Transform input lowercase characters to uppercase and uppercase characters to "
+    "lowercase",
+    ("For each string in `strings`, return an opposite case version."), {"strings"});
+
+const FunctionDoc utf8_capitalize_doc(
+    "Capitalize the first character of input",
+    ("For each string in `strings`, return a capitalized version,\n"
+     "with the first character uppercased and the others lowercased."),
+    {"strings"});
+
+const FunctionDoc utf8_reverse_doc(
+    "Reverse input",
+    ("For each string in `strings`, return a reversed version.\n\n"
+     "This function operates on Unicode codepoints, not grapheme\n"
+     "clusters. Hence, it will not correctly reverse grapheme clusters\n"
+     "composed of multiple codepoints."),
+    {"strings"});
+
 }  // namespace
 
 void RegisterScalarStringAscii(FunctionRegistry* registry) {
@@ -2084,18 +4169,33 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
                                          MemAllocation::NO_PREALLOCATE);
   MakeUnaryStringBatchKernel<AsciiLower>("ascii_lower", registry, &ascii_lower_doc,
                                          MemAllocation::NO_PREALLOCATE);
+  MakeUnaryStringBatchKernel<AsciiSwapCase>(
+      "ascii_swapcase", registry, &ascii_swapcase_doc, MemAllocation::NO_PREALLOCATE);
+  MakeUnaryStringBatchKernel<AsciiCapitalize>("ascii_capitalize", registry,
+                                              &ascii_capitalize_doc);
   MakeUnaryStringBatchKernel<AsciiTrimWhitespace>("ascii_trim_whitespace", registry,
                                                   &ascii_trim_whitespace_doc);
   MakeUnaryStringBatchKernel<AsciiLTrimWhitespace>("ascii_ltrim_whitespace", registry,
                                                    &ascii_ltrim_whitespace_doc);
   MakeUnaryStringBatchKernel<AsciiRTrimWhitespace>("ascii_rtrim_whitespace", registry,
                                                    &ascii_rtrim_whitespace_doc);
-  MakeUnaryStringBatchKernelWithState<AsciiTrim>("ascii_trim", registry,
-                                                 &ascii_lower_doc);
+  MakeUnaryStringBatchKernel<AsciiReverse>("ascii_reverse", registry, &ascii_reverse_doc);
+  MakeUnaryStringBatchKernel<Utf8Reverse>("utf8_reverse", registry, &utf8_reverse_doc);
+
+  MakeUnaryStringBatchKernelWithState<AsciiCenter>("ascii_center", registry,
+                                                   &ascii_center_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiLPad>("ascii_lpad", registry, &ascii_lpad_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiRPad>("ascii_rpad", registry, &ascii_rpad_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8Center>("utf8_center", registry,
+                                                  &utf8_center_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8LPad>("utf8_lpad", registry, &utf8_lpad_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8RPad>("utf8_rpad", registry, &utf8_rpad_doc);
+
+  MakeUnaryStringBatchKernelWithState<AsciiTrim>("ascii_trim", registry, &ascii_trim_doc);
   MakeUnaryStringBatchKernelWithState<AsciiLTrim>("ascii_ltrim", registry,
-                                                  &ascii_lower_doc);
+                                                  &ascii_ltrim_doc);
   MakeUnaryStringBatchKernelWithState<AsciiRTrim>("ascii_rtrim", registry,
-                                                  &ascii_lower_doc);
+                                                  &ascii_rtrim_doc);
 
   AddUnaryStringPredicate<IsAscii>("string_is_ascii", registry, &string_is_ascii_doc);
 
@@ -2116,6 +4216,10 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
 #ifdef ARROW_WITH_UTF8PROC
   MakeUnaryStringUTF8TransformKernel<UTF8Upper>("utf8_upper", registry, &utf8_upper_doc);
   MakeUnaryStringUTF8TransformKernel<UTF8Lower>("utf8_lower", registry, &utf8_lower_doc);
+  MakeUnaryStringUTF8TransformKernel<UTF8SwapCase>("utf8_swapcase", registry,
+                                                   &utf8_swapcase_doc);
+  MakeUnaryStringBatchKernel<Utf8Capitalize>("utf8_capitalize", registry,
+                                             &utf8_capitalize_doc);
   MakeUnaryStringBatchKernel<UTF8TrimWhitespace>("utf8_trim_whitespace", registry,
                                                  &utf8_trim_whitespace_doc);
   MakeUnaryStringBatchKernel<UTF8LTrimWhitespace>("utf8_ltrim_whitespace", registry,
@@ -2142,10 +4246,11 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   AddUnaryStringPredicate<IsUpperUnicode>("utf8_is_upper", registry, &utf8_is_upper_doc);
 #endif
 
-  AddSplit(registry);
   AddBinaryLength(registry);
   AddUtf8Length(registry);
   AddMatchSubstring(registry);
+  AddFindSubstring(registry);
+  AddCountSubstring(registry);
   MakeUnaryStringBatchKernelWithState<ReplaceSubStringPlain>(
       "replace_substring", registry, &replace_substring_doc,
       MemAllocation::NO_PREALLOCATE);
@@ -2153,8 +4258,13 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   MakeUnaryStringBatchKernelWithState<ReplaceSubStringRegex>(
       "replace_substring_regex", registry, &replace_substring_regex_doc,
       MemAllocation::NO_PREALLOCATE);
+  AddExtractRegex(registry);
 #endif
+  AddReplaceSlice(registry);
+  AddSlice(registry);
+  AddSplit(registry);
   AddStrptime(registry);
+  AddBinaryJoin(registry);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
index 32ac5a7df3b..ddc3a56f00f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <functional>
+
 #include "benchmark/benchmark.h"
 
 #include "arrow/compute/api_scalar.h"
@@ -22,8 +24,12 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_cast;
+
 namespace compute {
 
 constexpr auto kSeed = 0x94378165;
@@ -81,6 +87,30 @@ static void TrimManyAscii(benchmark::State& state) {
   UnaryStringBenchmark(state, "ascii_trim", &options);
 }
 
+#ifdef ARROW_WITH_RE2
+static void MatchLike(benchmark::State& state) {
+  MatchSubstringOptions options("ab%ac");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+
+// MatchLike optimizes the following three into a substring/prefix/suffix search instead
+// of using RE2
+static void MatchLikeSubstring(benchmark::State& state) {
+  MatchSubstringOptions options("%abac%");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+
+static void MatchLikePrefix(benchmark::State& state) {
+  MatchSubstringOptions options("%abac");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+
+static void MatchLikeSuffix(benchmark::State& state) {
+  MatchSubstringOptions options("%abac");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+#endif
+
 #ifdef ARROW_WITH_UTF8PROC
 static void Utf8Upper(benchmark::State& state) {
   UnaryStringBenchmark(state, "utf8_upper");
@@ -104,6 +134,82 @@ static void TrimManyUtf8(benchmark::State& state) {
 }
 #endif
 
+using SeparatorFactory = std::function<Datum(int64_t n, double null_probability)>;
+
+static void BinaryJoin(benchmark::State& state, SeparatorFactory make_separator) {
+  const int64_t n_strings = 10000;
+  const int64_t n_lists = 1000;
+  const double null_probability = 0.02;
+
+  random::RandomArrayGenerator rng(kSeed);
+
+  auto strings =
+      rng.String(n_strings, /*min_length=*/5, /*max_length=*/20, null_probability);
+  auto lists = rng.List(*strings, n_lists, null_probability, /*force_empty_nulls=*/true);
+  auto separator = make_separator(n_lists, null_probability);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("binary_join", {lists, separator}));
+  }
+  state.SetBytesProcessed(
+      state.iterations() *
+      checked_cast<const StringArray&>(*strings).total_values_length());
+}
+
+static void BinaryJoinArrayScalar(benchmark::State& state) {
+  BinaryJoin(state, [](int64_t n, double null_probability) -> Datum {
+    return ScalarFromJSON(utf8(), R"("--")");
+  });
+}
+
+static void BinaryJoinArrayArray(benchmark::State& state) {
+  BinaryJoin(state, [](int64_t n, double null_probability) -> Datum {
+    random::RandomArrayGenerator rng(kSeed + 1);
+    return rng.String(n, /*min_length=*/0, /*max_length=*/4, null_probability);
+  });
+}
+
+static void BinaryJoinElementWise(benchmark::State& state,
+                                  SeparatorFactory make_separator) {
+  // Unfortunately benchmark is not 1:1 with BinaryJoin since BinaryJoin can join a
+  // varying number of inputs per output
+  const int64_t n_rows = 10000;
+  const int64_t n_cols = state.range(0);
+  const double null_probability = 0.02;
+
+  random::RandomArrayGenerator rng(kSeed);
+
+  DatumVector args;
+  ArrayVector strings;
+  int64_t total_values_length = 0;
+  for (int i = 0; i < n_cols; i++) {
+    auto arr = rng.String(n_rows, /*min_length=*/5, /*max_length=*/20, null_probability);
+    strings.push_back(arr);
+    args.emplace_back(arr);
+    total_values_length += checked_cast<const StringArray&>(*arr).total_values_length();
+  }
+  auto separator = make_separator(n_rows, null_probability);
+  args.emplace_back(separator);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("binary_join_element_wise", args));
+  }
+  state.SetBytesProcessed(state.iterations() * total_values_length);
+}
+
+static void BinaryJoinElementWiseArrayScalar(benchmark::State& state) {
+  BinaryJoinElementWise(state, [](int64_t n, double null_probability) -> Datum {
+    return ScalarFromJSON(utf8(), R"("--")");
+  });
+}
+
+static void BinaryJoinElementWiseArrayArray(benchmark::State& state) {
+  BinaryJoinElementWise(state, [](int64_t n, double null_probability) -> Datum {
+    random::RandomArrayGenerator rng(kSeed + 1);
+    return rng.String(n, /*min_length=*/0, /*max_length=*/4, null_probability);
+  });
+}
+
 BENCHMARK(AsciiLower);
 BENCHMARK(AsciiUpper);
 BENCHMARK(IsAlphaNumericAscii);
@@ -111,6 +217,12 @@ BENCHMARK(MatchSubstring);
 BENCHMARK(SplitPattern);
 BENCHMARK(TrimSingleAscii);
 BENCHMARK(TrimManyAscii);
+#ifdef ARROW_WITH_RE2
+BENCHMARK(MatchLike);
+BENCHMARK(MatchLikeSubstring);
+BENCHMARK(MatchLikePrefix);
+BENCHMARK(MatchLikeSuffix);
+#endif
 #ifdef ARROW_WITH_UTF8PROC
 BENCHMARK(Utf8Lower);
 BENCHMARK(Utf8Upper);
@@ -119,5 +231,10 @@ BENCHMARK(TrimSingleUtf8);
 BENCHMARK(TrimManyUtf8);
 #endif
 
+BENCHMARK(BinaryJoinArrayScalar);
+BENCHMARK(BinaryJoinArrayArray);
+BENCHMARK(BinaryJoinElementWiseArrayScalar)->RangeMultiplier(8)->Range(2, 128);
+BENCHMARK(BinaryJoinElementWiseArrayArray)->RangeMultiplier(8)->Range(2, 128);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 2dd0a4d8c74..1f1a05d9643 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -17,6 +17,7 @@
 
 #include <memory>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #ifdef ARROW_WITH_UTF8PROC
@@ -41,6 +42,7 @@ template <typename TestType>
 class BaseTestStringKernels : public ::testing::Test {
  protected:
   using OffsetType = typename TypeTraits<TestType>::OffsetType;
+  using ScalarType = typename TypeTraits<TestType>::ScalarType;
 
   void CheckUnary(std::string func_name, std::string json_input,
                   std::shared_ptr<DataType> out_ty, std::string json_expected,
@@ -56,8 +58,33 @@ class BaseTestStringKernels : public ::testing::Test {
                             json_expected, options);
   }
 
+  void CheckVarArgsScalar(std::string func_name, std::string json_input,
+                          std::shared_ptr<DataType> out_ty, std::string json_expected,
+                          const FunctionOptions* options = nullptr) {
+    // CheckScalar (on arrays) checks scalar arguments individually,
+    // but this lets us test the all-scalar case explicitly
+    ScalarVector inputs;
+    std::shared_ptr<Array> args = ArrayFromJSON(type(), json_input);
+    for (int64_t i = 0; i < args->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(auto scalar, args->GetScalar(i));
+      inputs.push_back(std::move(scalar));
+    }
+    CheckScalar(func_name, inputs, ScalarFromJSON(out_ty, json_expected), options);
+  }
+
+  void CheckVarArgs(std::string func_name, const std::vector<Datum>& inputs,
+                    std::shared_ptr<DataType> out_ty, std::string json_expected,
+                    const FunctionOptions* options = nullptr) {
+    CheckScalar(func_name, inputs, ArrayFromJSON(out_ty, json_expected), options);
+  }
+
   std::shared_ptr<DataType> type() { return TypeTraits<TestType>::type_singleton(); }
 
+  template <typename CType>
+  std::shared_ptr<ScalarType> scalar(CType value) {
+    return std::make_shared<ScalarType>(value);
+  }
+
   std::shared_ptr<DataType> offset_type() {
     return TypeTraits<OffsetType>::type_singleton();
   }
@@ -73,6 +100,284 @@ TYPED_TEST(TestBinaryKernels, BinaryLength) {
                    this->offset_type(), "[3, null, 10, 0, 1]");
 }
 
+TYPED_TEST(TestBinaryKernels, BinaryReplaceSlice) {
+  ReplaceSliceOptions options{0, 1, "XX"};
+  this->CheckUnary("binary_replace_slice", "[]", this->type(), "[]", &options);
+  this->CheckUnary("binary_replace_slice", R"([null, "", "a", "ab", "abc"])",
+                   this->type(), R"([null, "XX", "XX", "XXb", "XXbc"])", &options);
+
+  ReplaceSliceOptions options_whole{0, 5, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcde", "abcdef"])", this->type(),
+                   R"([null, "XX", "XX", "XX", "XX", "XX", "XXf"])", &options_whole);
+
+  ReplaceSliceOptions options_middle{2, 4, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "aXX", "abXX", "abXX", "abXX", "abXXe"])",
+                   &options_middle);
+
+  ReplaceSliceOptions options_neg_start{-3, -2, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "XXa", "XXab", "XXbc", "aXXcd", "abXXde"])",
+                   &options_neg_start);
+
+  ReplaceSliceOptions options_neg_end{2, -2, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "aXX", "abXX", "abXXc", "abXXcd", "abXXde"])",
+                   &options_neg_end);
+
+  ReplaceSliceOptions options_neg_pos{-1, 2, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "XX", "aXX", "abXXc", "abcXXd", "abcdXXe"])",
+                   &options_neg_pos);
+
+  // Effectively the same as [2, 2)
+  ReplaceSliceOptions options_flip{2, 0, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "aXX", "abXX", "abXXc", "abXXcd", "abXXcde"])",
+                   &options_flip);
+
+  // Effectively the same as [-3, -3)
+  ReplaceSliceOptions options_neg_flip{-3, -5, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "XXa", "XXab", "XXabc", "aXXbcd", "abXXcde"])",
+                   &options_neg_flip);
+}
+
+TYPED_TEST(TestBinaryKernels, FindSubstring) {
+  MatchSubstringOptions options{"ab"};
+  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring", R"(["abc", "acb", "cab", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options);
+
+  MatchSubstringOptions options_repeated{"abab"};
+  this->CheckUnary("find_substring", R"(["abab", "ab", "cababc", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options_repeated);
+
+  MatchSubstringOptions options_double_char{"aab"};
+  this->CheckUnary("find_substring", R"(["aacb", "aab", "ab", "aaab"])",
+                   this->offset_type(), "[-1, 0, -1, 1]", &options_double_char);
+
+  MatchSubstringOptions options_double_char_2{"bbcaa"};
+  this->CheckUnary("find_substring", R"(["abcbaabbbcaabccabaab"])", this->offset_type(),
+                   "[7]", &options_double_char_2);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("find_substring", R"(["", "a", null])", this->offset_type(),
+                   "[0, 0, null]", &options_empty);
+}
+
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestBinaryKernels, FindSubstringIgnoreCase) {
+  MatchSubstringOptions options{"?AB)", /*ignore_case=*/true};
+  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring",
+                   R"-(["?aB)c", "acb", "c?Ab)", null, "?aBc", "AB)"])-",
+                   this->offset_type(), "[0, -1, 1, null, -1, -1]", &options);
+}
+
+TYPED_TEST(TestBinaryKernels, FindSubstringRegex) {
+  MatchSubstringOptions options{"a+", /*ignore_case=*/false};
+  this->CheckUnary("find_substring_regex", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring_regex", R"(["a", "A", "baaa", null, "", "AaaA"])",
+                   this->offset_type(), "[0, -1, 1, null, -1, 1]", &options);
+
+  options.ignore_case = true;
+  this->CheckUnary("find_substring_regex", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring_regex", R"(["a", "A", "baaa", null, "", "AaaA"])",
+                   this->offset_type(), "[0, 0, 1, null, -1, 0]", &options);
+}
+#else
+TYPED_TEST(TestBinaryKernels, FindSubstringIgnoreCase) {
+  MatchSubstringOptions options{"a+", /*ignore_case=*/true};
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("find_substring", {input}, &options));
+}
+#endif
+
+TYPED_TEST(TestBinaryKernels, CountSubstring) {
+  MatchSubstringOptions options{"aba"};
+  this->CheckUnary("count_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary(
+      "count_substring",
+      R"(["", null, "ab", "aba", "baba", "ababa", "abaaba", "babacaba", "ABA"])",
+      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 0]", &options);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("count_substring", R"(["", null, "abc"])", this->offset_type(),
+                   "[1, null, 4]", &options_empty);
+
+  MatchSubstringOptions options_repeated{"aaa"};
+  this->CheckUnary("count_substring", R"(["", "aaaa", "aaaaa", "aaaaaa", "aaá"])",
+                   this->offset_type(), "[0, 1, 1, 2, 0]", &options_repeated);
+}
+
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestBinaryKernels, CountSubstringRegex) {
+  MatchSubstringOptions options{"aba"};
+  this->CheckUnary("count_substring_regex", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary(
+      "count_substring",
+      R"(["", null, "ab", "aba", "baba", "ababa", "abaaba", "babacaba", "ABA"])",
+      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 0]", &options);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("count_substring_regex", R"(["", null, "abc"])", this->offset_type(),
+                   "[1, null, 4]", &options_empty);
+
+  MatchSubstringOptions options_as{"a+"};
+  this->CheckUnary("count_substring_regex", R"(["", "bacaaadaaaa", "c", "AAA"])",
+                   this->offset_type(), "[0, 3, 0, 0]", &options_as);
+
+  MatchSubstringOptions options_empty_match{"a*"};
+  this->CheckUnary("count_substring_regex", R"(["", "bacaaadaaaa", "c", "AAA"])",
+                   // 7 is because it matches at |b|a|c|aaa|d|aaaa|
+                   this->offset_type(), "[1, 7, 2, 4]", &options_empty_match);
+
+  MatchSubstringOptions options_repeated{"aaa"};
+  this->CheckUnary("count_substring", R"(["", "aaaa", "aaaaa", "aaaaaa", "aaá"])",
+                   this->offset_type(), "[0, 1, 1, 2, 0]", &options_repeated);
+}
+
+TYPED_TEST(TestBinaryKernels, CountSubstringIgnoreCase) {
+  MatchSubstringOptions options{"aba", /*ignore_case=*/true};
+  this->CheckUnary("count_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary(
+      "count_substring",
+      R"(["", null, "ab", "aBa", "bAbA", "aBaBa", "abaAbA", "babacaba", "ABA"])",
+      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 1]", &options);
+
+  MatchSubstringOptions options_empty{"", /*ignore_case=*/true};
+  this->CheckUnary("count_substring", R"(["", null, "abc"])", this->offset_type(),
+                   "[1, null, 4]", &options_empty);
+}
+
+TYPED_TEST(TestBinaryKernels, CountSubstringRegexIgnoreCase) {
+  MatchSubstringOptions options_as{"a+", /*ignore_case=*/true};
+  this->CheckUnary("count_substring_regex", R"(["", "bacAaAdaAaA", "c", "AAA"])",
+                   this->offset_type(), "[0, 3, 0, 1]", &options_as);
+
+  MatchSubstringOptions options_empty_match{"a*", /*ignore_case=*/true};
+  this->CheckUnary("count_substring_regex", R"(["", "bacAaAdaAaA", "c", "AAA"])",
+                   this->offset_type(), "[1, 7, 2, 2]", &options_empty_match);
+}
+#else
+TYPED_TEST(TestBinaryKernels, CountSubstringIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("count_substring", {input}, &options));
+}
+#endif
+
+TYPED_TEST(TestBinaryKernels, BinaryJoinElementWise) {
+  const auto ty = this->type();
+  JoinOptions options;
+  JoinOptions options_skip(JoinOptions::SKIP);
+  JoinOptions options_replace(JoinOptions::REPLACE, "X");
+  // Scalar args, Scalar separator
+  this->CheckVarArgsScalar("binary_join_element_wise", R"([null])", ty, R"(null)",
+                           &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["-"])", ty, R"("")", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", "-"])", ty, R"("a")",
+                           &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", "b", "-"])", ty,
+                           R"("a-b")", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", "b", null])", ty,
+                           R"(null)", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "-"])", ty,
+                           R"(null)", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["foo", "bar", "baz", "++"])",
+                           ty, R"("foo++bar++baz")", &options);
+
+  // Scalar args, Array separator
+  const auto sep = ArrayFromJSON(ty, R"([null, "-", "--"])");
+  const auto scalar1 = ScalarFromJSON(ty, R"("foo")");
+  const auto scalar2 = ScalarFromJSON(ty, R"("bar")");
+  const auto scalar3 = ScalarFromJSON(ty, R"("")");
+  const auto scalar_null = ScalarFromJSON(ty, R"(null)");
+  this->CheckVarArgs("binary_join_element_wise", {sep}, ty, R"([null, "", ""])",
+                     &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, sep}, ty,
+                     R"([null, "foo", "foo"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, scalar2, sep}, ty,
+                     R"([null, "foo-bar", "foo--bar"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, scalar_null, sep}, ty,
+                     R"([null, null, null])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, scalar2, scalar3, sep}, ty,
+                     R"([null, "foo-bar-", "foo--bar--"])", &options);
+
+  // Array args, Scalar separator
+  const auto sep1 = ScalarFromJSON(ty, R"("-")");
+  const auto sep2 = ScalarFromJSON(ty, R"("--")");
+  const auto arr1 = ArrayFromJSON(ty, R"([null, "a", "bb", "ccc"])");
+  const auto arr2 = ArrayFromJSON(ty, R"(["d", null, "e", ""])");
+  const auto arr3 = ArrayFromJSON(ty, R"(["gg", null, "h", "iii"])");
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, scalar_null}, ty,
+                     R"([null, null, null, null])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, sep1}, ty,
+                     R"([null, null, "bb-e-h", "ccc--iii"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, sep2}, ty,
+                     R"([null, null, "bb--e--h", "ccc----iii"])", &options);
+
+  // Array args, Array separator
+  const auto sep3 = ArrayFromJSON(ty, R"(["-", "--", null, "---"])");
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, sep3}, ty,
+                     R"([null, null, null, "ccc------iii"])", &options);
+
+  // Mixed
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep3}, ty,
+                     R"([null, null, null, "ccc------bar"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, sep3}, ty,
+                     R"([null, null, null, null])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep1}, ty,
+                     R"([null, null, "bb-e-bar", "ccc--bar"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, scalar_null},
+                     ty, R"([null, null, null, null])", &options);
+
+  // Skip
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", "-"])", ty,
+                           R"("a-b")", &options_skip);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", null])", ty,
+                           R"(null)", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep3}, ty,
+                     R"(["d-bar", "a--bar", null, "ccc------bar"])", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, sep3}, ty,
+                     R"(["d", "a", null, "ccc---"])", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep1}, ty,
+                     R"(["d-bar", "a-bar", "bb-e-bar", "ccc--bar"])", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, scalar_null},
+                     ty, R"([null, null, null, null])", &options_skip);
+
+  // Replace
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", "-"])", ty,
+                           R"("a-X-b")", &options_replace);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", null])", ty,
+                           R"(null)", &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep3}, ty,
+                     R"(["X-d-bar", "a--X--bar", null, "ccc------bar"])",
+                     &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, sep3}, ty,
+                     R"(["X-d-X", "a--X--X", null, "ccc------X"])", &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep1}, ty,
+                     R"(["X-d-bar", "a-X-bar", "bb-e-bar", "ccc--bar"])",
+                     &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, scalar_null},
+                     ty, R"([null, null, null, null])", &options_replace);
+
+  // Error cases
+  ASSERT_RAISES(Invalid, CallFunction("binary_join_element_wise", {}, &options));
+}
+
 template <typename TestType>
 class TestStringKernels : public BaseTestStringKernels<TestType> {};
 
@@ -90,6 +395,52 @@ TYPED_TEST(TestStringKernels, AsciiLower) {
                    "[\"aaazzæÆ&\", null, \"\", \"bbb\"]");
 }
 
+TYPED_TEST(TestStringKernels, AsciiSwapCase) {
+  this->CheckUnary("ascii_swapcase", "[]", this->type(), "[]");
+  this->CheckUnary("ascii_swapcase", "[\"aAazZæÆ&\", null, \"\", \"BbB\"]", this->type(),
+                   "[\"AaAZzæÆ&\", null, \"\", \"bBb\"]");
+  this->CheckUnary("ascii_swapcase", "[\"hEllO, WoRld!\", \"$. A35?\"]", this->type(),
+                   "[\"HeLLo, wOrLD!\", \"$. a35?\"]");
+}
+
+TYPED_TEST(TestStringKernels, AsciiCapitalize) {
+  this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]");
+  this->CheckUnary("ascii_capitalize",
+                   "[\"aAazZæÆ&\", null, \"\", \"bBB\", \"hEllO, WoRld!\", \"$. A3\", "
+                   "\"!hELlo, wORLd!\"]",
+                   this->type(),
+                   "[\"AaazzæÆ&\", null, \"\", \"Bbb\", \"Hello, world!\", \"$. a3\", "
+                   "\"!hello, world!\"]");
+}
+
+TYPED_TEST(TestStringKernels, AsciiReverse) {
+  this->CheckUnary("ascii_reverse", "[]", this->type(), "[]");
+  this->CheckUnary("ascii_reverse", R"(["abcd", null, "", "bbb"])", this->type(),
+                   R"(["dcba", null, "", "bbb"])");
+
+  auto invalid_input = ArrayFromJSON(this->type(), R"(["aAazZæÆ&", null, "", "bcd"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  testing::HasSubstr("Non-ASCII sequence in input"),
+                                  CallFunction("ascii_reverse", {invalid_input}));
+  auto masked_input = TweakValidityBit(invalid_input, 0, false);
+  CheckScalarUnary("ascii_reverse", masked_input,
+                   ArrayFromJSON(this->type(), R"([null, null, "", "dcb"])"));
+}
+
+TYPED_TEST(TestStringKernels, Utf8Reverse) {
+  this->CheckUnary("utf8_reverse", "[]", this->type(), "[]");
+  this->CheckUnary("utf8_reverse", R"(["abcd", null, "", "bbb"])", this->type(),
+                   R"(["dcba", null, "", "bbb"])");
+  this->CheckUnary("utf8_reverse", R"(["aAazZæÆ&", null, "", "bbb", "ɑɽⱤæÆ"])",
+                   this->type(), R"(["&ÆæZzaAa", null, "", "bbb", "ÆæⱤɽɑ"])");
+
+  // inputs with malformed utf8 chars would produce garbage output, but the end result
+  // would produce arrays with same lengths. Hence checking offset buffer equality
+  auto malformed_input = ArrayFromJSON(this->type(), "[\"ɑ\xFFɑa\", \"ɽ\xe1\xbdɽa\"]");
+  const Result<Datum>& res = CallFunction("utf8_reverse", {malformed_input});
+  ASSERT_TRUE(res->array()->buffers[1]->Equals(*malformed_input->data()->buffers[1]));
+}
+
 TEST(TestStringKernels, LARGE_MEMORY_TEST(Utf8Upper32bitGrowth)) {
   // 0x7fff * 0xffff is the max a 32 bit string array can hold
   // since the utf8_upper kernel can grow it by 3/2, the max we should accept is is
@@ -121,7 +472,7 @@ TYPED_TEST(TestStringKernels, Utf8Upper) {
   this->CheckUnary("utf8_upper", "[\"aAazZæÆ&\", null, \"\", \"b\"]", this->type(),
                    "[\"AAAZZÆÆ&\", null, \"\", \"B\"]");
 
-  // test varying encoding lenghts and thus changing indices/offsets
+  // test varying encoding lengths and thus changing indices/offsets
   this->CheckUnary("utf8_upper", "[\"ɑɽⱤoW\", null, \"ıI\", \"b\"]", this->type(),
                    "[\"ⱭⱤⱤOW\", null, \"II\", \"B\"]");
 
@@ -160,6 +511,36 @@ TYPED_TEST(TestStringKernels, Utf8Lower) {
                                   CallFunction("utf8_lower", {invalid_input}));
 }
 
+TYPED_TEST(TestStringKernels, Utf8SwapCase) {
+  this->CheckUnary("utf8_swapcase", "[\"aAazZæÆ&\", null, \"\", \"b\"]", this->type(),
+                   "[\"AaAZzÆæ&\", null, \"\", \"B\"]");
+
+  // test varying encoding lengths and thus changing indices/offsets
+  this->CheckUnary("utf8_swapcase", "[\"ⱭɽⱤoW\", null, \"ıI\", \"B\"]", this->type(),
+                   "[\"ɑⱤɽOw\", null, \"Ii\", \"b\"]");
+
+  // test maximum buffer growth
+  this->CheckUnary("utf8_swapcase", "[\"ȺȺȺȺ\"]", this->type(), "[\"ⱥⱥⱥⱥ\"]");
+
+  this->CheckUnary("ascii_swapcase", "[\"hEllO, WoRld!\", \"$. A35?\"]", this->type(),
+                   "[\"HeLLo, wOrLD!\", \"$. a35?\"]");
+
+  // Test invalid data
+  auto invalid_input = ArrayFromJSON(this->type(), "[\"Ⱥa\xFFⱭ\", \"Ɽ\xe1\xbdⱤaA\"]");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Invalid UTF8 sequence"),
+                                  CallFunction("utf8_swapcase", {invalid_input}));
+}
+
+TYPED_TEST(TestStringKernels, Utf8Capitalize) {
+  this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]");
+  this->CheckUnary("utf8_capitalize",
+                   "[\"aAazZæÆ&\", null, \"\", \"b\", \"ɑɽⱤoW\", \"ıI\", \"ⱥⱥⱥȺ\", "
+                   "\"hEllO, WoRld!\", \"$. A3\", \"!ɑⱤⱤow\"]",
+                   this->type(),
+                   "[\"Aaazzææ&\", null, \"\", \"B\", \"Ɑɽɽow\", \"Ii\", \"Ⱥⱥⱥⱥ\", "
+                   "\"Hello, world!\", \"$. a3\", \"!ɑɽɽow\"]");
+}
+
 TYPED_TEST(TestStringKernels, IsAlphaNumericUnicode) {
   // U+08BE (utf8: 	\xE0\xA2\xBE) is undefined, but utf8proc things it is
   // UTF8PROC_CATEGORY_LO
@@ -332,8 +713,8 @@ TYPED_TEST(TestStringKernels, IsUpperAscii) {
 TYPED_TEST(TestStringKernels, MatchSubstring) {
   MatchSubstringOptions options{"ab"};
   this->CheckUnary("match_substring", "[]", boolean(), "[]", &options);
-  this->CheckUnary("match_substring", R"(["abc", "acb", "cab", null, "bac"])", boolean(),
-                   "[true, false, true, null, false]", &options);
+  this->CheckUnary("match_substring", R"(["abc", "acb", "cab", null, "bac", "AB"])",
+                   boolean(), "[true, false, true, null, false, false]", &options);
 
   MatchSubstringOptions options_repeated{"abab"};
   this->CheckUnary("match_substring", R"(["abab", "ab", "cababc", null, "bac"])",
@@ -346,14 +727,91 @@ TYPED_TEST(TestStringKernels, MatchSubstring) {
   MatchSubstringOptions options_double_char_2{"bbcaa"};
   this->CheckUnary("match_substring", R"(["abcbaabbbcaabccabaab"])", boolean(), "[true]",
                    &options_double_char_2);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("match_substring", "[]", boolean(), "[]", &options);
+  this->CheckUnary("match_substring", R"(["abc", "acb", "cab", null, "bac", "AB", ""])",
+                   boolean(), "[true, true, true, null, true, true, true]",
+                   &options_empty);
+}
+
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestStringKernels, MatchSubstringIgnoreCase) {
+  MatchSubstringOptions options_insensitive{"aé(", /*ignore_case=*/true};
+  this->CheckUnary("match_substring", R"(["abc", "aEb", "baÉ(", "aé(", "ae(", "Aé("])",
+                   boolean(), "[false, false, true, true, false, true]",
+                   &options_insensitive);
+}
+#else
+TYPED_TEST(TestStringKernels, MatchSubstringIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("match_substring", {input}, &options));
+}
+#endif
+
+TYPED_TEST(TestStringKernels, MatchStartsWith) {
+  MatchSubstringOptions options{"abab"};
+  this->CheckUnary("starts_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("starts_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, false, true]", &options);
+  this->CheckUnary("starts_with", R"(["ABAB", "BABAB", "ABABC", "bAbAb", "aBaBc"])",
+                   boolean(), "[false, false, false, false, false]", &options);
 }
 
+TYPED_TEST(TestStringKernels, MatchEndsWith) {
+  MatchSubstringOptions options{"abab"};
+  this->CheckUnary("ends_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("ends_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, true, false]", &options);
+  this->CheckUnary("ends_with", R"(["ABAB", "BABAB", "ABABC", "bAbAb", "aBaBc"])",
+                   boolean(), "[false, false, false, false, false]", &options);
+}
+
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestStringKernels, MatchStartsWithIgnoreCase) {
+  MatchSubstringOptions options{"aBAb", /*ignore_case=*/true};
+  this->CheckUnary("starts_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("starts_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, false, true]", &options);
+  this->CheckUnary("starts_with", R"(["ABAB", "$ABAB", "ABAB$", "$AbAb", "aBaB$"])",
+                   boolean(), "[true, false, true, false, true]", &options);
+}
+
+TYPED_TEST(TestStringKernels, MatchEndsWithIgnoreCase) {
+  MatchSubstringOptions options{"aBAb", /*ignore_case=*/true};
+  this->CheckUnary("ends_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("ends_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, true, false]", &options);
+  this->CheckUnary("ends_with", R"(["ABAB", "$ABAB", "ABAB$", "$AbAb", "aBaB$"])",
+                   boolean(), "[true, true, false, true, false]", &options);
+}
+#else
+TYPED_TEST(TestStringKernels, MatchStartsWithIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("starts_with", {input}, &options));
+}
+
+TYPED_TEST(TestStringKernels, MatchEndsWithIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("ends_with", {input}, &options));
+}
+#endif
+
 #ifdef ARROW_WITH_RE2
 TYPED_TEST(TestStringKernels, MatchSubstringRegex) {
   MatchSubstringOptions options{"ab"};
   this->CheckUnary("match_substring_regex", "[]", boolean(), "[]", &options);
-  this->CheckUnary("match_substring_regex", R"(["abc", "acb", "cab", null, "bac"])",
-                   boolean(), "[true, false, true, null, false]", &options);
+  this->CheckUnary("match_substring_regex", R"(["abc", "acb", "cab", null, "bac", "AB"])",
+                   boolean(), "[true, false, true, null, false, false]", &options);
   MatchSubstringOptions options_repeated{"(ab){2}"};
   this->CheckUnary("match_substring_regex", R"(["abab", "ab", "cababc", null, "bac"])",
                    boolean(), "[true, false, true, null, false]", &options_repeated);
@@ -366,9 +824,121 @@ TYPED_TEST(TestStringKernels, MatchSubstringRegex) {
   MatchSubstringOptions options_plus{"a+b"};
   this->CheckUnary("match_substring_regex", R"(["aacb", "aab", "dab", "caaab", "b", ""])",
                    boolean(), "[false, true, true, true, false, false]", &options_plus);
+  MatchSubstringOptions options_insensitive{"ab|é", /*ignore_case=*/true};
+  this->CheckUnary("match_substring_regex", R"(["abc", "acb", "É", null, "bac", "AB"])",
+                   boolean(), "[true, false, true, null, false, true]",
+                   &options_insensitive);
+
+  // Unicode character semantics
+  // "\pL" means: unicode category "letter"
+  // (re2 interprets "\w" as ASCII-only: https://github.com/google/re2/wiki/Syntax)
+  MatchSubstringOptions options_unicode{"^\\pL+$"};
+  this->CheckUnary("match_substring_regex", R"(["été", "ß", "€", ""])", boolean(),
+                   "[true, true, false, false]", &options_unicode);
+}
+
+TYPED_TEST(TestStringKernels, MatchSubstringRegexNoOptions) {
+  Datum input = ArrayFromJSON(this->type(), "[]");
+  ASSERT_RAISES(Invalid, CallFunction("match_substring_regex", {input}));
+}
+
+TYPED_TEST(TestStringKernels, MatchSubstringRegexInvalid) {
+  Datum input = ArrayFromJSON(this->type(), "[null]");
+  MatchSubstringOptions options{"invalid["};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid regular expression: missing ]"),
+      CallFunction("match_substring_regex", {input}, &options));
+}
+
+TYPED_TEST(TestStringKernels, MatchLike) {
+  auto inputs = R"(["foo", "bar", "foobar", "barfoo", "o", "\nfoo", "foo\n", null])";
+
+  MatchSubstringOptions prefix_match{"foo%"};
+  this->CheckUnary("match_like", "[]", boolean(), "[]", &prefix_match);
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, false, true, false, false, false, true, null]", &prefix_match);
+
+  MatchSubstringOptions suffix_match{"%foo"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, false, false, true, false, true, false, null]", &suffix_match);
+
+  MatchSubstringOptions substring_match{"%foo%"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, false, true, true, false, true, true, null]",
+                   &substring_match);
+
+  MatchSubstringOptions trivial_match{"%%"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, true, true, true, true, true, true, null]", &trivial_match);
+
+  MatchSubstringOptions regex_match{"foo%bar"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[false, false, true, false, false, false, false, null]",
+                   &regex_match);
+
+  // ignore_case means this still gets mapped to a regex search
+  MatchSubstringOptions insensitive_substring{"%é%", /*ignore_case=*/true};
+  this->CheckUnary("match_like", R"(["é", "fooÉbar", "e"])", boolean(),
+                   "[true, true, false]", &insensitive_substring);
+
+  MatchSubstringOptions insensitive_regex{"_é%", /*ignore_case=*/true};
+  this->CheckUnary("match_like", R"(["éfoo", "aÉfoo", "e"])", boolean(),
+                   "[false, true, false]", &insensitive_regex);
+}
+
+TYPED_TEST(TestStringKernels, MatchLikeEscaping) {
+  auto inputs = R"(["%%foo", "_bar", "({", "\\baz"])";
+
+  // N.B. I believe Impala mistakenly optimizes these into substring searches
+  MatchSubstringOptions escape_percent{"\\%%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[true, false, false, false]",
+                   &escape_percent);
+
+  MatchSubstringOptions not_substring{"%\\%%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[true, false, false, false]",
+                   &not_substring);
+
+  MatchSubstringOptions escape_underscore{"\\____"};
+  this->CheckUnary("match_like", inputs, boolean(), "[false, true, false, false]",
+                   &escape_underscore);
+
+  MatchSubstringOptions escape_regex{"(%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[false, false, true, false]",
+                   &escape_regex);
+
+  MatchSubstringOptions escape_escape{"\\\\%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[false, false, false, true]",
+                   &escape_escape);
+
+  MatchSubstringOptions special_chars{"!@#$^&*()[]{}.?"};
+  this->CheckUnary("match_like", R"(["!@#$^&*()[]{}.?"])", boolean(), "[true]",
+                   &special_chars);
+
+  MatchSubstringOptions escape_sequences{"\n\t%"};
+  this->CheckUnary("match_like", R"(["\n\tfoo\t", "\n\t", "\n"])", boolean(),
+                   "[true, true, false]", &escape_sequences);
 }
 #endif
 
+TYPED_TEST(TestStringKernels, FindSubstring) {
+  MatchSubstringOptions options{"ab"};
+  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring", R"(["abc", "acb", "cab", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options);
+
+  MatchSubstringOptions options_repeated{"abab"};
+  this->CheckUnary("find_substring", R"(["abab", "ab", "cababc", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options_repeated);
+
+  MatchSubstringOptions options_double_char{"aab"};
+  this->CheckUnary("find_substring", R"(["aacb", "aab", "ab", "aaab"])",
+                   this->offset_type(), "[-1, 0, -1, 1]", &options_double_char);
+
+  MatchSubstringOptions options_double_char_2{"bbcaa"};
+  this->CheckUnary("find_substring", R"(["abcbaabbbcaabccabaab"])", this->offset_type(),
+                   "[7]", &options_double_char_2);
+}
+
 TYPED_TEST(TestStringKernels, SplitBasics) {
   SplitPatternOptions options{" "};
   // basics
@@ -451,6 +1021,84 @@ TYPED_TEST(TestStringKernels, SplitWhitespaceUTF8Reverse) {
                    &options_max);
 }
 
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestStringKernels, SplitRegex) {
+  SplitPatternOptions options{"a+|b"};
+
+  this->CheckUnary(
+      "split_pattern_regex", R"(["aaaab", "foob", "foo bar", "foo", "AaaaBaaaC", null])",
+      list(this->type()),
+      R"([["", "", ""], ["foo", ""], ["foo ", "", "r"], ["foo"], ["A", "B", "C"], null])",
+      &options);
+
+  options.max_splits = 1;
+  this->CheckUnary(
+      "split_pattern_regex", R"(["aaaab", "foob", "foo bar", "foo", "AaaaBaaaC", null])",
+      list(this->type()),
+      R"([["", "b"], ["foo", ""], ["foo ", "ar"], ["foo"], ["A", "BaaaC"], null])",
+      &options);
+}
+
+TYPED_TEST(TestStringKernels, SplitRegexReverse) {
+  SplitPatternOptions options{"a+|b", /*max_splits=*/1, /*reverse=*/true};
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      NotImplemented, ::testing::HasSubstr("Cannot split in reverse with regex"),
+      CallFunction("split_pattern_regex", {input}, &options));
+}
+#endif
+
+TYPED_TEST(TestStringKernels, Utf8ReplaceSlice) {
+  ReplaceSliceOptions options{0, 1, "χχ"};
+  this->CheckUnary("utf8_replace_slice", "[]", this->type(), "[]", &options);
+  this->CheckUnary("utf8_replace_slice", R"([null, "", "π", "πb", "πbθ"])", this->type(),
+                   R"([null, "χχ", "χχ", "χχb", "χχbθ"])", &options);
+
+  ReplaceSliceOptions options_whole{0, 5, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθde", "πbθdef"])", this->type(),
+                   R"([null, "χχ", "χχ", "χχ", "χχ", "χχ", "χχf"])", &options_whole);
+
+  ReplaceSliceOptions options_middle{2, 4, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "πχχ", "πbχχ", "πbχχ", "πbχχ", "πbχχe"])",
+                   &options_middle);
+
+  ReplaceSliceOptions options_neg_start{-3, -2, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "χχπ", "χχπb", "χχbθ", "πχχθd", "πbχχde"])",
+                   &options_neg_start);
+
+  ReplaceSliceOptions options_neg_end{2, -2, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "πχχ", "πbχχ", "πbχχθ", "πbχχθd", "πbχχde"])",
+                   &options_neg_end);
+
+  ReplaceSliceOptions options_neg_pos{-1, 2, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "χχ", "πχχ", "πbχχθ", "πbθχχd", "πbθdχχe"])",
+                   &options_neg_pos);
+
+  // Effectively the same as [2, 2)
+  ReplaceSliceOptions options_flip{2, 0, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "πχχ", "πbχχ", "πbχχθ", "πbχχθd", "πbχχθde"])",
+                   &options_flip);
+
+  // Effectively the same as [-3, -3)
+  ReplaceSliceOptions options_neg_flip{-3, -5, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "χχπ", "χχπb", "χχπbθ", "πχχbθd", "πbχχθde"])",
+                   &options_neg_flip);
+}
+
 TYPED_TEST(TestStringKernels, ReplaceSubstring) {
   ReplaceSubstringOptions options{"foo", "bazz"};
   this->CheckUnary("replace_substring", R"(["foo", "this foo that foo", null])",
@@ -478,6 +1126,14 @@ TYPED_TEST(TestStringKernels, ReplaceSubstringRegex) {
   ReplaceSubstringOptions options_regex2{"(a.a)", "aba\\1"};
   this->CheckUnary("replace_substring_regex", R"(["aaaaaa"])", this->type(),
                    R"(["abaaaaabaaaa"])", &options_regex2);
+
+  // ARROW-12774
+  ReplaceSubstringOptions options_regex3{"X", "Y"};
+  this->CheckUnary("replace_substring_regex",
+                   R"(["A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A"])",
+                   this->type(),
+                   R"(["A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A"])",
+                   &options_regex3);
 }
 
 TYPED_TEST(TestStringKernels, ReplaceSubstringRegexLimited) {
@@ -495,6 +1151,67 @@ TYPED_TEST(TestStringKernels, ReplaceSubstringRegexNoOptions) {
   Datum input = ArrayFromJSON(this->type(), "[]");
   ASSERT_RAISES(Invalid, CallFunction("replace_substring_regex", {input}));
 }
+
+TYPED_TEST(TestStringKernels, ReplaceSubstringRegexInvalid) {
+  Datum input = ArrayFromJSON(this->type(), R"(["foo"])");
+  ReplaceSubstringOptions options{"invalid[", ""};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid regular expression: missing ]"),
+      CallFunction("replace_substring_regex", {input}, &options));
+
+  // Capture group number out of range
+  options = ReplaceSubstringOptions{"(.)", "\\9"};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid replacement string"),
+      CallFunction("replace_substring_regex", {input}, &options));
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegex) {
+  ExtractRegexOptions options{"(?P<letter>[ab])(?P<digit>\\d)"};
+  auto type = struct_({field("letter", this->type()), field("digit", this->type())});
+  this->CheckUnary("extract_regex", R"([])", type, R"([])", &options);
+  this->CheckUnary(
+      "extract_regex", R"(["a1", "b2", "c3", null])", type,
+      R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "2"}, null, null])",
+      &options);
+  this->CheckUnary(
+      "extract_regex", R"(["a1", "c3", null, "b2"])", type,
+      R"([{"letter": "a", "digit": "1"}, null, null, {"letter": "b", "digit": "2"}])",
+      &options);
+  this->CheckUnary("extract_regex", R"(["a1", "b2"])", type,
+                   R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "2"}])",
+                   &options);
+  this->CheckUnary("extract_regex", R"(["a1", "zb3z"])", type,
+                   R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "3"}])",
+                   &options);
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegexNoCapture) {
+  // XXX Should we accept this or is it a user error?
+  ExtractRegexOptions options{"foo"};
+  auto type = struct_({});
+  this->CheckUnary("extract_regex", R"(["oofoo", "bar", null])", type,
+                   R"([{}, null, null])", &options);
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegexNoOptions) {
+  Datum input = ArrayFromJSON(this->type(), "[]");
+  ASSERT_RAISES(Invalid, CallFunction("extract_regex", {input}));
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegexInvalid) {
+  Datum input = ArrayFromJSON(this->type(), "[]");
+  ExtractRegexOptions options{"invalid["};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid regular expression: missing ]"),
+      CallFunction("extract_regex", {input}, &options));
+
+  options = ExtractRegexOptions{"(.)"};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Regular expression contains unnamed groups"),
+      CallFunction("extract_regex", {input}, &options));
+}
+
 #endif
 
 TYPED_TEST(TestStringKernels, Strptime) {
@@ -509,6 +1226,80 @@ TYPED_TEST(TestStringKernels, StrptimeDoesNotProvideDefaultOptions) {
   ASSERT_RAISES(Invalid, CallFunction("strptime", {input}));
 }
 
+TYPED_TEST(TestStringKernels, BinaryJoin) {
+  // Scalar separator
+  auto separator = this->scalar("--");
+  std::string list_json =
+      R"([["a", "bb", "ccc"], [], null, ["dd"], ["eee", null], ["ff", ""]])";
+  auto expected =
+      ArrayFromJSON(this->type(), R"(["a--bb--ccc", "", null, "dd", null, "ff--"])");
+  CheckScalarBinary("binary_join", ArrayFromJSON(list(this->type()), list_json),
+                    Datum(separator), expected);
+  CheckScalarBinary("binary_join", ArrayFromJSON(large_list(this->type()), list_json),
+                    Datum(separator), expected);
+
+  auto separator_null = MakeNullScalar(this->type());
+  expected = ArrayFromJSON(this->type(), R"([null, null, null, null, null, null])");
+  CheckScalarBinary("binary_join", ArrayFromJSON(list(this->type()), list_json),
+                    separator_null, expected);
+  CheckScalarBinary("binary_join", ArrayFromJSON(large_list(this->type()), list_json),
+                    separator_null, expected);
+
+  // Array list, Array separator
+  auto separators =
+      ArrayFromJSON(this->type(), R"(["1", "2", "3", "4", "5", "6", null])");
+  list_json =
+      R"([["a", "bb", "ccc"], [], null, ["dd"], ["eee", null], ["ff", ""], ["hh", "ii"]])";
+  expected =
+      ArrayFromJSON(this->type(), R"(["a1bb1ccc", "", null, "dd", null, "ff6", null])");
+  CheckScalarBinary("binary_join", ArrayFromJSON(list(this->type()), list_json),
+                    separators, expected);
+  CheckScalarBinary("binary_join", ArrayFromJSON(large_list(this->type()), list_json),
+                    separators, expected);
+
+  // Scalar list, Array separator
+  separators = ArrayFromJSON(this->type(), R"(["1", "", null])");
+  list_json = R"(["a", "bb", "ccc"])";
+  expected = ArrayFromJSON(this->type(), R"(["a1bb1ccc", "abbccc", null])");
+  CheckScalarBinary("binary_join", ScalarFromJSON(list(this->type()), list_json),
+                    separators, expected);
+  CheckScalarBinary("binary_join", ScalarFromJSON(large_list(this->type()), list_json),
+                    separators, expected);
+  list_json = R"(["a", "bb", null])";
+  expected = ArrayFromJSON(this->type(), R"([null, null, null])");
+  CheckScalarBinary("binary_join", ScalarFromJSON(list(this->type()), list_json),
+                    separators, expected);
+  CheckScalarBinary("binary_join", ScalarFromJSON(large_list(this->type()), list_json),
+                    separators, expected);
+}
+
+TYPED_TEST(TestStringKernels, PadUTF8) {
+  // \xe2\x80\x88 = \u2008 is punctuation space, \xc3\xa1 = \u00E1 = á
+  PadOptions options{/*width=*/5, "\xe2\x80\x88"};
+  this->CheckUnary(
+      "utf8_center", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
+      R"([null, "\u2008\u2008a\u2008\u2008", "\u2008bb\u2008\u2008", "\u2008b\u00E1r\u2008", "foobar"])",
+      &options);
+  this->CheckUnary(
+      "utf8_lpad", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
+      R"([null, "\u2008\u2008\u2008\u2008a", "\u2008\u2008\u2008bb", "\u2008\u2008b\u00E1r", "foobar"])",
+      &options);
+  this->CheckUnary(
+      "utf8_rpad", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
+      R"([null, "a\u2008\u2008\u2008\u2008", "bb\u2008\u2008\u2008", "b\u00E1r\u2008\u2008", "foobar"])",
+      &options);
+
+  PadOptions options_bad{/*width=*/3, /*padding=*/"spam"};
+  auto input = ArrayFromJSON(this->type(), R"(["foo"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one codepoint"),
+                                  CallFunction("utf8_lpad", {input}, &options_bad));
+  options_bad.padding = "";
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one codepoint"),
+                                  CallFunction("utf8_lpad", {input}, &options_bad));
+}
+
 #ifdef ARROW_WITH_UTF8PROC
 
 TYPED_TEST(TestStringKernels, TrimWhitespaceUTF8) {
@@ -526,15 +1317,23 @@ TYPED_TEST(TestStringKernels, TrimWhitespaceUTF8) {
 }
 
 TYPED_TEST(TestStringKernels, TrimUTF8) {
-  TrimOptions options{"ȺA"};
-  this->CheckUnary("utf8_trim", "[\"ȺȺfooȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺbarA\"]",
-                   this->type(), "[\"foo\", null, \"bar\", \"fooȺAȺbar\"]", &options);
-  this->CheckUnary("utf8_ltrim", "[\"ȺȺfooȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺbarA\"]",
-                   this->type(), "[\"fooȺAȺ\", null, \"barȺAȺ\", \"fooȺAȺbarA\"]",
-                   &options);
-  this->CheckUnary("utf8_rtrim", "[\"ȺȺfooȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺbarA\"]",
-                   this->type(), "[\"ȺȺfoo\", null, \"bar\", \"ȺAȺfooȺAȺbar\"]",
-                   &options);
+  auto options = TrimOptions{"ab"};
+  this->CheckUnary("utf8_trim", "[\"azȺz矢ba\", null, \"bab\", \"zȺz\"]", this->type(),
+                   "[\"zȺz矢\", null, \"\", \"zȺz\"]", &options);
+  this->CheckUnary("utf8_ltrim", "[\"azȺz矢ba\", null, \"bab\", \"zȺz\"]", this->type(),
+                   "[\"zȺz矢ba\", null, \"\", \"zȺz\"]", &options);
+  this->CheckUnary("utf8_rtrim", "[\"azȺz矢ba\", null, \"bab\", \"zȺz\"]", this->type(),
+                   "[\"azȺz矢\", null, \"\", \"zȺz\"]", &options);
+
+  options = TrimOptions{"ȺA"};
+  this->CheckUnary("utf8_trim", "[\"ȺȺfoo矢ȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺ矢barA\"]",
+                   this->type(), "[\"foo矢\", null, \"bar\", \"fooȺAȺ矢bar\"]", &options);
+  this->CheckUnary(
+      "utf8_ltrim", "[\"ȺȺfoo矢ȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺ矢barA\"]",
+      this->type(), "[\"foo矢ȺAȺ\", null, \"barȺAȺ\", \"fooȺAȺ矢barA\"]", &options);
+  this->CheckUnary(
+      "utf8_rtrim", "[\"ȺȺfoo矢ȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺ矢barA\"]",
+      this->type(), "[\"ȺȺfoo矢\", null, \"bar\", \"ȺAȺfooȺAȺ矢bar\"]", &options);
 
   TrimOptions options_invalid{"ɑa\xFFɑ"};
   auto input = ArrayFromJSON(this->type(), "[\"foo\"]");
@@ -543,6 +1342,138 @@ TYPED_TEST(TestStringKernels, TrimUTF8) {
 }
 #endif
 
+// produce test data with e.g.:
+// repr([k[-3:1] for k in ["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"]]).replace("'", '"')
+
+#ifdef ARROW_WITH_UTF8PROC
+TYPED_TEST(TestStringKernels, SliceCodeunitsBasic) {
+  SliceOptions options{2, 4};
+  this->CheckUnary("utf8_slice_codeunits", R"(["foo", "fo", null, "foo bar"])",
+                   this->type(), R"(["o", "", null, "o "])", &options);
+  SliceOptions options_2{2, 3};
+  // ensure we slice in codeunits, not graphemes
+  // a\u0308 is ä, which is 1 grapheme (character), but two codepoints
+  // \u0308 in utf8 encoding is \xcc\x88
+  this->CheckUnary("utf8_slice_codeunits", R"(["ää", "bä"])", this->type(),
+                   "[\"a\", \"\xcc\x88\"]", &options_2);
+  SliceOptions options_empty_pos{6, 6};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓öõ"])", this->type(), R"(["",
+  ""])",
+                   &options_empty_pos);
+  SliceOptions options_empty_neg{-6, -6};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓öõ"])", this->type(), R"(["",
+  ""])",
+                   &options_empty_neg);
+  SliceOptions options_empty_neg_to_zero{-6, 0};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓öõ"])", this->type(), R"(["", ""])",
+                   &options_empty_neg_to_zero);
+
+  // end is beyond 0, but before start (hence empty)
+  SliceOptions options_edgecase_1{-3, 1};
+  this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"([""])",
+                   &options_edgecase_1);
+
+  // this is a safeguard agains an optimization path possible, but actually a tricky case
+  SliceOptions options_edgecase_2{-6, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"(["𝑓öõ"])",
+                   &options_edgecase_2);
+
+  auto input = ArrayFromJSON(this->type(), R"(["𝑓öõḍš"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr("Attempted to initialize KernelState from null FunctionOptions"),
+      CallFunction("utf8_slice_codeunits", {input}));
+
+  SliceOptions options_invalid{2, 4, 0};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Slice step cannot be zero"),
+      CallFunction("utf8_slice_codeunits", {input}, &options_invalid));
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsPosPos) {
+  SliceOptions options{2, 4};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "õ", "õḍ", "õḍ"])", &options);
+  SliceOptions options_step{1, 5, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "ö", "ö", "öḍ", "öḍ"])", &options_step);
+  SliceOptions options_step_neg{5, 1, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "õ", "ḍ", "šõ"])", &options_step_neg);
+  options_step_neg.stop = 0;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ","𝑓öõḍš"])",
+                   this->type(), R"(["", "", "ö", "õ", "ḍö", "šõ"])", &options_step_neg);
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsPosNeg) {
+  SliceOptions options{2, -1};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "", "õ", "õḍ"])", &options);
+  SliceOptions options_step{1, -1, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "f", "fö", "föo", "föod","foodš"])",
+                   this->type(), R"(["", "", "", "ö", "ö", "od"])", &options_step);
+  SliceOptions options_step_neg{3, -4, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ","𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ𝑓", "ḍö", "ḍ"])", &options_step_neg);
+  options_step_neg.stop = -5;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ","𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ𝑓", "ḍö", "ḍö"])",
+                   &options_step_neg);
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsNegNeg) {
+  SliceOptions options{-2, -1};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "𝑓", "ö", "õ", "ḍ"])", &options);
+  SliceOptions options_step{-4, -1, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "𝑓", "𝑓", "𝑓õ", "öḍ"])", &options_step);
+  SliceOptions options_step_neg{-1, -3, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ", "ḍ", "š"])", &options_step_neg);
+  options_step_neg.stop = -4;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ𝑓", "ḍö", "šõ"])",
+                   &options_step_neg);
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsNegPos) {
+  SliceOptions options{-2, 4};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "𝑓ö", "öõ", "õḍ", "ḍ"])", &options);
+  SliceOptions options_step{-4, 4, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "𝑓", "𝑓õ", "𝑓õ", "öḍ"])", &options_step);
+  SliceOptions options_step_neg{-1, 1, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "õ", "ḍ", "šõ"])", &options_step_neg);
+  options_step_neg.stop = 0;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "ö", "õ", "ḍö", "šõ"])", &options_step_neg);
+}
+
+#endif  // ARROW_WITH_UTF8PROC
+
+TYPED_TEST(TestStringKernels, PadAscii) {
+  PadOptions options{/*width=*/5, " "};
+  this->CheckUnary("ascii_center", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
+                   R"([null, "  a  ", " bb  ", " bar ", "foobar"])", &options);
+  this->CheckUnary("ascii_lpad", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
+                   R"([null, "    a", "   bb", "  bar", "foobar"])", &options);
+  this->CheckUnary("ascii_rpad", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
+                   R"([null, "a    ", "bb   ", "bar  ", "foobar"])", &options);
+
+  PadOptions options_bad{/*width=*/3, /*padding=*/"spam"};
+  auto input = ArrayFromJSON(this->type(), R"(["foo"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one byte"),
+                                  CallFunction("ascii_lpad", {input}, &options_bad));
+  options_bad.padding = "";
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one byte"),
+                                  CallFunction("ascii_lpad", {input}, &options_bad));
+}
+
 TYPED_TEST(TestStringKernels, TrimWhitespaceAscii) {
   // \xe2\x80\x88 is punctuation space
   this->CheckUnary("ascii_trim_whitespace",
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
new file mode 100644
index 00000000000..f0257772d4a
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
@@ -0,0 +1,663 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/builder.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/time.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::hh_mm_ss;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::trunc;
+using arrow_vendored::date::weekday;
+using arrow_vendored::date::weeks;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::years;
+using arrow_vendored::date::literals::dec;
+using arrow_vendored::date::literals::jan;
+using arrow_vendored::date::literals::last;
+using arrow_vendored::date::literals::mon;
+using arrow_vendored::date::literals::thu;
+using internal::applicator::ScalarUnaryNotNull;
+using internal::applicator::SimpleUnary;
+
+using DayOfWeekState = OptionsWrapper<DayOfWeekOptions>;
+
+const std::string& GetInputTimezone(const Datum& datum) {
+  return checked_cast<const TimestampType&>(*datum.type()).timezone();
+}
+
+const std::string& GetInputTimezone(const Scalar& scalar) {
+  return checked_cast<const TimestampType&>(*scalar.type).timezone();
+}
+
+const std::string& GetInputTimezone(const ArrayData& array) {
+  return checked_cast<const TimestampType&>(*array.type).timezone();
+}
+
+template <typename T>
+Status TemporalComponentExtractCheckTimezone(const T& input) {
+  const auto& timezone = GetInputTimezone(input);
+  if (!timezone.empty()) {
+    return Status::NotImplemented(
+        "Cannot extract components from timestamp with specific timezone: ", timezone);
+  }
+  return Status::OK();
+}
+
+template <typename Op, typename OutType>
+struct TemporalComponentExtract {
+  using OutValue = typename internal::GetOutputType<OutType>::T;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0]));
+    return ScalarUnaryNotNull<OutType, TimestampType, Op>::Exec(ctx, batch, out);
+  }
+};
+
+template <typename Op, typename OutType>
+struct DayOfWeekExec {
+  using OutValue = typename internal::GetOutputType<OutType>::T;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const DayOfWeekOptions& options = DayOfWeekState::Get(ctx);
+    if (options.week_start < 1 || 7 < options.week_start) {
+      return Status::Invalid(
+          "week_start must follow ISO convention (Monday=1, Sunday=7). Got week_start=",
+          options.week_start);
+    }
+
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0]));
+    applicator::ScalarUnaryNotNullStateful<OutType, TimestampType, Op> kernel{
+        Op(options)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract year from timestamp
+
+template <typename Duration>
+struct Year {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    return static_cast<T>(static_cast<const int32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract month from timestamp
+
+template <typename Duration>
+struct Month {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    return static_cast<T>(static_cast<const uint32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day from timestamp
+
+template <typename Duration>
+struct Day {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    return static_cast<T>(static_cast<const uint32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day of week from timestamp
+//
+// By default week starts on Monday represented by 0 and ends on Sunday represented
+// by 6. Start day of the week (Monday=1, Sunday=7) and numbering start (0 or 1) can be
+// set using DayOfWeekOptions
+
+template <typename Duration>
+struct DayOfWeek {
+  explicit DayOfWeek(const DayOfWeekOptions& options) {
+    for (int i = 0; i < 7; i++) {
+      lookup_table[i] = i + 8 - options.week_start;
+      lookup_table[i] = (lookup_table[i] > 6) ? lookup_table[i] - 7 : lookup_table[i];
+      lookup_table[i] += options.one_based_numbering;
+    }
+  }
+
+  template <typename T, typename Arg0>
+  T Call(KernelContext*, Arg0 arg, Status*) const {
+    const auto wd = arrow_vendored::date::year_month_weekday(
+                        floor<days>(sys_time<Duration>(Duration{arg})))
+                        .weekday()
+                        .iso_encoding();
+    return lookup_table[wd - 1];
+  }
+  std::array<int64_t, 7> lookup_table;
+};
+
+// ----------------------------------------------------------------------
+// Extract day of year from timestamp
+
+template <typename Duration>
+struct DayOfYear {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    return static_cast<T>(
+        (t - sys_time<days>(year_month_day(t).year() / jan / 0)).count());
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract ISO Year values from timestamp
+//
+// First week of an ISO year has the majority (4 or more) of it's days in January.
+// Last week of an ISO year has the year's last Thursday in it.
+
+template <typename Duration>
+struct ISOYear {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    auto y = year_month_day{t + days{3}}.year();
+    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (t < start) {
+      --y;
+    }
+    return static_cast<T>(static_cast<int32_t>(y));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract ISO week from timestamp
+//
+// First week of an ISO year has the majority (4 or more) of it's days in January.
+// Last week of an ISO year has the year's last Thursday in it.
+// Based on
+// https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
+template <typename Duration>
+struct ISOWeek {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    auto y = year_month_day{t + days{3}}.year();
+    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (t < start) {
+      --y;
+      start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    }
+    return static_cast<T>(trunc<weeks>(t - start).count() + 1);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract quarter from timestamp
+
+template <typename Duration>
+struct Quarter {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto ymd = year_month_day(floor<days>(sys_time<Duration>(Duration{arg})));
+    return static_cast<T>((static_cast<const uint32_t>(ymd.month()) - 1) / 3 + 1);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract hour from timestamp
+
+template <typename Duration>
+struct Hour {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<days>(t)) / std::chrono::hours(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract minute from timestamp
+
+template <typename Duration>
+struct Minute {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<std::chrono::hours>(t)) / std::chrono::minutes(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract second from timestamp
+
+template <typename Duration>
+struct Second {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<std::chrono::minutes>(t)) / std::chrono::seconds(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract subsecond from timestamp
+
+template <typename Duration>
+struct Subsecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        (std::chrono::duration<double>(t - floor<std::chrono::seconds>(t)).count()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract milliseconds from timestamp
+
+template <typename Duration>
+struct Millisecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::milliseconds(1)) % 1000);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract microseconds from timestamp
+
+template <typename Duration>
+struct Microsecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::microseconds(1)) % 1000);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract nanoseconds from timestamp
+
+template <typename Duration>
+struct Nanosecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::nanoseconds(1)) % 1000);
+  }
+};
+
+template <typename Duration>
+inline std::vector<int64_t> get_iso_calendar(int64_t arg) {
+  const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+  const auto ymd = year_month_day(t);
+  auto y = year_month_day{t + days{3}}.year();
+  auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+  if (t < start) {
+    --y;
+    start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+  }
+  return {static_cast<int64_t>(static_cast<int32_t>(y)),
+          static_cast<int64_t>(trunc<weeks>(t - start).count() + 1),
+          static_cast<int64_t>(weekday(ymd).iso_encoding())};
+}
+
+// ----------------------------------------------------------------------
+// Extract ISO calendar values from timestamp
+
+template <typename Duration>
+struct ISOCalendar {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in));
+    if (in.is_valid) {
+      const std::shared_ptr<DataType> iso_calendar_type =
+          struct_({field("iso_year", int64()), field("iso_week", int64()),
+                   field("iso_day_of_week", int64())});
+      const auto& in_val = internal::UnboxScalar<const TimestampType>::Unbox(in);
+      const auto iso_calendar = get_iso_calendar<Duration>(in_val);
+
+      std::vector<std::shared_ptr<Scalar>> values = {
+          std::make_shared<Int64Scalar>(iso_calendar[0]),
+          std::make_shared<Int64Scalar>(iso_calendar[1]),
+          std::make_shared<Int64Scalar>(iso_calendar[2])};
+      *checked_cast<StructScalar*>(out) = StructScalar(values, iso_calendar_type);
+    } else {
+      out->is_valid = false;
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
+
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in));
+    const std::shared_ptr<DataType> iso_calendar_type =
+        struct_({field("iso_year", int64()), field("iso_week", int64()),
+                 field("iso_day_of_week", int64())});
+
+    std::unique_ptr<ArrayBuilder> array_builder;
+    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), iso_calendar_type, &array_builder));
+    StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
+    RETURN_NOT_OK(struct_builder->Reserve(in.length));
+
+    std::vector<BuilderType*> field_builders;
+    field_builders.reserve(3);
+    for (int i = 0; i < 3; i++) {
+      field_builders.push_back(
+          checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+      RETURN_NOT_OK(field_builders[i]->Reserve(1));
+    }
+    auto visit_null = [&]() { return struct_builder->AppendNull(); };
+    auto visit_value = [&](int64_t arg) {
+      const auto iso_calendar = get_iso_calendar<Duration>(arg);
+      field_builders[0]->UnsafeAppend(iso_calendar[0]);
+      field_builders[1]->UnsafeAppend(iso_calendar[1]);
+      field_builders[2]->UnsafeAppend(iso_calendar[2]);
+      return struct_builder->Append();
+    };
+    RETURN_NOT_OK(VisitArrayDataInline<Int64Type>(in, visit_value, visit_null));
+
+    std::shared_ptr<Array> out_array;
+    RETURN_NOT_OK(struct_builder->Finish(&out_array));
+    *out = *std::move(out_array->data());
+
+    return Status::OK();
+  }
+};
+
+template <template <typename...> class Op, typename OutType>
+std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc* doc) {
+  const auto& out_type = TypeTraits<OutType>::type_singleton();
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = TemporalComponentExtract<Op<std::chrono::seconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec =
+            TemporalComponentExtract<Op<std::chrono::milliseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec =
+            TemporalComponentExtract<Op<std::chrono::microseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = TemporalComponentExtract<Op<std::chrono::nanoseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+template <template <typename...> class Op, typename OutType>
+std::shared_ptr<ScalarFunction> MakeTemporalWithOptions(
+    std::string name, const FunctionDoc* doc, const DayOfWeekOptions& default_options,
+    KernelInit init) {
+  const auto& out_type = TypeTraits<OutType>::type_singleton();
+  auto func =
+      std::make_shared<ScalarFunction>(name, Arity::Unary(), doc, &default_options);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = DayOfWeekExec<Op<std::chrono::seconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec = DayOfWeekExec<Op<std::chrono::milliseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec = DayOfWeekExec<Op<std::chrono::microseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = DayOfWeekExec<Op<std::chrono::nanoseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+template <template <typename...> class Op>
+std::shared_ptr<ScalarFunction> MakeStructTemporal(std::string name,
+                                                   const FunctionDoc* doc) {
+  const auto& out_type = struct_({field("iso_year", int64()), field("iso_week", int64()),
+                                  field("iso_day_of_week", int64())});
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = SimpleUnary<Op<std::chrono::seconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec = SimpleUnary<Op<std::chrono::milliseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec = SimpleUnary<Op<std::chrono::microseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = SimpleUnary<Op<std::chrono::nanoseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+const FunctionDoc year_doc{
+    "Extract year from timestamp",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc month_doc{
+    "Extract month number",
+    ("Month is encoded as January=1, December=12.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc day_doc{
+    "Extract day number",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc day_of_week_doc{
+    "Extract day of the week number",
+    ("By default, the week starts on Monday represented by 0 and ends on Sunday "
+     "represented by 6.\n"
+     "DayOfWeekOptions.week_start can be used to set another starting day using ISO "
+     "convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1 using "
+     "DayOfWeekOptions.one_based_numbering parameter.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"},
+    "DayOfWeekOptions"};
+
+const FunctionDoc day_of_year_doc{
+    "Extract number of day of year",
+    ("January 1st maps to day number 1, February 1st to 32, etc.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_year_doc{
+    "Extract ISO year number",
+    ("First week of an ISO year has the majority (4 or more) of its days in January."
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_week_doc{
+    "Extract ISO week of year number",
+    ("First ISO week has the majority (4 or more) of its days in January.\n"
+     "Week of the year starts with 1 and can run up to 53.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_calendar_doc{
+    "Extract (ISO year, ISO week, ISO day of week) struct",
+    ("ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc quarter_doc{
+    "Extract quarter of year number",
+    ("First quarter maps to 1 and forth quarter maps to 4.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc hour_doc{
+    "Extract hour value",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc minute_doc{
+    "Extract minute values",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc second_doc{
+    "Extract second values",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc millisecond_doc{
+    "Extract millisecond values",
+    ("Millisecond returns number of milliseconds since the last full second.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc microsecond_doc{
+    "Extract microsecond values",
+    ("Millisecond returns number of microseconds since the last full millisecond.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc nanosecond_doc{
+    "Extract nanosecond values",
+    ("Nanosecond returns number of nanoseconds since the last full microsecond.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc subsecond_doc{
+    "Extract subsecond values",
+    ("Subsecond returns the fraction of a second since the last full second.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+}  // namespace
+
+void RegisterScalarTemporal(FunctionRegistry* registry) {
+  auto year = MakeTemporal<Year, Int64Type>("year", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(year)));
+
+  auto month = MakeTemporal<Month, Int64Type>("month", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(month)));
+
+  auto day = MakeTemporal<Day, Int64Type>("day", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(day)));
+
+  static auto default_day_of_week_options = DayOfWeekOptions::Defaults();
+  auto day_of_week = MakeTemporalWithOptions<DayOfWeek, Int64Type>(
+      "day_of_week", &day_of_week_doc, default_day_of_week_options, DayOfWeekState::Init);
+  DCHECK_OK(registry->AddFunction(std::move(day_of_week)));
+
+  auto day_of_year = MakeTemporal<DayOfYear, Int64Type>("day_of_year", &day_of_year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(day_of_year)));
+
+  auto iso_year = MakeTemporal<ISOYear, Int64Type>("iso_year", &iso_year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_year)));
+
+  auto iso_week = MakeTemporal<ISOWeek, Int64Type>("iso_week", &iso_week_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_week)));
+
+  auto iso_calendar = MakeStructTemporal<ISOCalendar>("iso_calendar", &iso_calendar_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_calendar)));
+
+  auto quarter = MakeTemporal<Quarter, Int64Type>("quarter", &quarter_doc);
+  DCHECK_OK(registry->AddFunction(std::move(quarter)));
+
+  auto hour = MakeTemporal<Hour, Int64Type>("hour", &hour_doc);
+  DCHECK_OK(registry->AddFunction(std::move(hour)));
+
+  auto minute = MakeTemporal<Minute, Int64Type>("minute", &minute_doc);
+  DCHECK_OK(registry->AddFunction(std::move(minute)));
+
+  auto second = MakeTemporal<Second, Int64Type>("second", &second_doc);
+  DCHECK_OK(registry->AddFunction(std::move(second)));
+
+  auto millisecond =
+      MakeTemporal<Millisecond, Int64Type>("millisecond", &millisecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(millisecond)));
+
+  auto microsecond =
+      MakeTemporal<Microsecond, Int64Type>("microsecond", &microsecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(microsecond)));
+
+  auto nanosecond = MakeTemporal<Nanosecond, Int64Type>("nanosecond", &nanosecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(nanosecond)));
+
+  auto subsecond = MakeTemporal<Subsecond, DoubleType>("subsecond", &subsecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(subsecond)));
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
new file mode 100644
index 00000000000..f2e9c12a050
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {
+ public:
+  const char* times =
+      R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
+          "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
+          null, "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+          "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
+          "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
+          "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+          "2008-12-28", "2008-12-29", "2012-01-01 01:02:03"])";
+  const char* times_seconds_precision =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "1899-01-01T00:59:20","2033-05-18T03:33:20",
+          null, "2020-01-01T01:05:05", "2019-12-31T02:10:10",
+          "2019-12-30T03:15:15", "2009-12-31T04:20:20",
+          "2010-01-01T05:25:25", "2010-01-03T06:30:30",
+          "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+          "2008-12-28", "2008-12-29", "2012-01-01 01:02:03"])";
+  std::shared_ptr<arrow::DataType> iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+  std::shared_ptr<arrow::Array> iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4},
+                          {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 2},
+                          {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 3},
+                          null,
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 3},
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 2},
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 4},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 5},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 7},
+                          {"iso_year": 2010, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 6},
+                          {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 7}])");
+  std::string year =
+      "[1970, 2000, 1899, 2033, null, 2020, 2019, 2019, 2009, 2010, 2010, 2010, 2006, "
+      "2005, 2008, 2008, 2012]";
+  std::string month = "[1, 2, 1, 5, null, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1]";
+  std::string day = "[1, 29, 1, 18, null, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1]";
+  std::string day_of_week = "[3, 1, 6, 2, null, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6]";
+  std::string day_of_year =
+      "[1, 60, 1, 138, null, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1]";
+  std::string iso_year =
+      "[1970, 2000, 1898, 2033, null, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, "
+      "2005, 2008, 2009, 2011]";
+  std::string iso_week =
+      "[1, 9, 52, 20, null, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52]";
+
+  std::string quarter = "[1, 1, 1, 2, null, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1]";
+  std::string hour = "[0, 23, 0, 3, null, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 1]";
+  std::string minute =
+      "[0, 23, 59, 33, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 2]";
+  std::string second =
+      "[59, 23, 20, 20, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 3]";
+  std::string millisecond = "[123, 999, 1, 0, null, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0]";
+  std::string microsecond =
+      "[456, 999, 1, 0, null, 0, 0, 0, 132, 321, 163, 0, 0, 0, 0, 0, 0]";
+  std::string nanosecond = "[789, 999, 1, 0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]";
+  std::string subsecond =
+      "[0.123456789, 0.999999999, 0.001001001, 0, null, 0.001, 0.002, 0.003, 0.004132, "
+      "0.005321, 0.006163, 0, 0, 0, 0, 0, 0]";
+  std::string zeros = "[0, 0, 0, 0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]";
+};
+
+namespace compute {
+
+TEST_F(ScalarTemporalTest, TestTemporalComponentExtraction) {
+  auto unit = timestamp(TimeUnit::NANO);
+
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, int64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
+}
+
+TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
+  for (auto u : internal::AllTimeUnits()) {
+    auto unit = timestamp(u);
+    CheckScalarUnary("year", unit, times_seconds_precision, int64(), year);
+    CheckScalarUnary("month", unit, times_seconds_precision, int64(), month);
+    CheckScalarUnary("day", unit, times_seconds_precision, int64(), day);
+    CheckScalarUnary("day_of_week", unit, times_seconds_precision, int64(), day_of_week);
+    CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year);
+    CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year);
+    CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week);
+    CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times_seconds_precision),
+                     iso_calendar);
+    CheckScalarUnary("quarter", unit, times_seconds_precision, int64(), quarter);
+    CheckScalarUnary("hour", unit, times_seconds_precision, int64(), hour);
+    CheckScalarUnary("minute", unit, times_seconds_precision, int64(), minute);
+    CheckScalarUnary("second", unit, times_seconds_precision, int64(), second);
+    CheckScalarUnary("millisecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("microsecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("nanosecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("subsecond", unit, times_seconds_precision, float64(), zeros);
+  }
+}
+
+TEST_F(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
+  std::string timezone = "Asia/Kolkata";
+
+  for (auto u : internal::AllTimeUnits()) {
+    auto unit = timestamp(u, timezone);
+    auto timestamps = ArrayFromJSON(unit, times_seconds_precision);
+
+    ASSERT_RAISES(NotImplemented, Year(timestamps));
+    ASSERT_RAISES(NotImplemented, Month(timestamps));
+    ASSERT_RAISES(NotImplemented, Day(timestamps));
+    ASSERT_RAISES(NotImplemented, DayOfWeek(timestamps));
+    ASSERT_RAISES(NotImplemented, DayOfYear(timestamps));
+    ASSERT_RAISES(NotImplemented, ISOYear(timestamps));
+    ASSERT_RAISES(NotImplemented, ISOWeek(timestamps));
+    ASSERT_RAISES(NotImplemented, ISOCalendar(timestamps));
+    ASSERT_RAISES(NotImplemented, Quarter(timestamps));
+    ASSERT_RAISES(NotImplemented, Hour(timestamps));
+    ASSERT_RAISES(NotImplemented, Minute(timestamps));
+    ASSERT_RAISES(NotImplemented, Second(timestamps));
+    ASSERT_RAISES(NotImplemented, Millisecond(timestamps));
+    ASSERT_RAISES(NotImplemented, Microsecond(timestamps));
+    ASSERT_RAISES(NotImplemented, Nanosecond(timestamps));
+    ASSERT_RAISES(NotImplemented, Subsecond(timestamps));
+  }
+}
+
+TEST_F(ScalarTemporalTest, DayOfWeek) {
+  auto unit = timestamp(TimeUnit::NANO);
+
+  auto timestamps = ArrayFromJSON(unit, times);
+  auto day_of_week_week_start_7_zero_based =
+      "[4, 2, 0, 3, null, 3, 2, 1, 4, 5, 0, 1, 0, 6, 0, 1, 0]";
+  auto day_of_week_week_start_2_zero_based =
+      "[2, 0, 5, 1, null, 1, 0, 6, 2, 3, 5, 6, 5, 4, 5, 6, 5]";
+  auto day_of_week_week_start_7_one_based =
+      "[5, 3, 1, 4, null, 4, 3, 2, 5, 6, 1, 2, 1, 7, 1, 2, 1]";
+  auto day_of_week_week_start_2_one_based =
+      "[3, 1, 6, 2, null, 2, 1, 7, 3, 4, 6, 7, 6, 5, 6, 7, 6]";
+
+  auto expected_70 = ArrayFromJSON(int64(), day_of_week_week_start_7_zero_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_70,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/false, /*week_start=*/7)));
+  ASSERT_TRUE(result_70.Equals(expected_70));
+
+  auto expected_20 = ArrayFromJSON(int64(), day_of_week_week_start_2_zero_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_20,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/false, /*week_start=*/2)));
+  ASSERT_TRUE(result_20.Equals(expected_20));
+
+  auto expected_71 = ArrayFromJSON(int64(), day_of_week_week_start_7_one_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_71,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/true, /*week_start=*/7)));
+  ASSERT_TRUE(result_71.Equals(expected_71));
+
+  auto expected_21 = ArrayFromJSON(int64(), day_of_week_week_start_2_one_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_21,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/true, /*week_start=*/2)));
+  ASSERT_TRUE(result_21.Equals(expected_21));
+
+  ASSERT_RAISES(Invalid,
+                DayOfWeek(timestamps, DayOfWeekOptions(/*one_based_numbering=*/true,
+                                                       /*week_start=*/0)));
+  ASSERT_RAISES(Invalid,
+                DayOfWeek(timestamps, DayOfWeekOptions(/*one_based_numbering=*/false,
+                                                       /*week_start=*/8)));
+}
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index 1d399f322bf..ead88abc0f2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -32,11 +32,12 @@ namespace internal {
 namespace {
 
 struct IsValidOperator {
-  static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = in.is_valid;
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
     DCHECK_EQ(out->offset, 0);
     DCHECK_LE(out->length, arr.length);
     if (arr.MayHaveNulls()) {
@@ -48,37 +49,54 @@ struct IsValidOperator {
           arr.offset == 0 ? arr.buffers[0]
                           : SliceBuffer(arr.buffers[0], arr.offset / 8,
                                         BitUtil::BytesForBits(out->length + out->offset));
-      return;
+      return Status::OK();
     }
 
     // Input has no nulls => output is entirely true.
-    KERNEL_ASSIGN_OR_RAISE(out->buffers[1], ctx,
-                           ctx->AllocateBitmap(out->length + out->offset));
+    ARROW_ASSIGN_OR_RAISE(out->buffers[1],
+                          ctx->AllocateBitmap(out->length + out->offset));
     BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, true);
+    return Status::OK();
+  }
+};
+
+struct IsFiniteOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isfinite(value);
+  }
+};
+
+struct IsInfOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isinf(value);
   }
 };
 
 struct IsNullOperator {
-  static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
     if (arr.MayHaveNulls()) {
       // Input has nulls => output is the inverted null (validity) bitmap.
       InvertBitmap(arr.buffers[0]->data(), arr.offset, arr.length,
                    out->buffers[1]->mutable_data(), out->offset);
-      return;
+    } else {
+      // Input has no nulls => output is entirely false.
+      BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length,
+                         false);
     }
-
-    // Input has no nulls => output is entirely false.
-    BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, false);
+    return Status::OK();
   }
 };
 
 struct IsNanOperator {
   template <typename OutType, typename InType>
-  static constexpr OutType Call(KernelContext*, const InType& value) {
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
     return std::isnan(value);
   }
 };
@@ -99,24 +117,43 @@ void MakeFunction(std::string name, const FunctionDoc* doc,
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-template <typename InType>
-void AddIsNanKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
-  DCHECK_OK(
-      func->AddKernel({ty}, boolean(),
-                      applicator::ScalarUnary<BooleanType, InType, IsNanOperator>::Exec));
+template <typename InType, typename Op>
+void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
+  DCHECK_OK(func->AddKernel({ty}, boolean(),
+                            applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
+}
+
+std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
+                                                     const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());
+
+  return func;
+}
+
+std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
+                                                  const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());
+
+  return func;
 }
 
 std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
                                                   const FunctionDoc* doc) {
   auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
 
-  AddIsNanKernel<FloatType>(float32(), func.get());
-  AddIsNanKernel<DoubleType>(float64(), func.get());
+  AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());
 
   return func;
 }
 
-void IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Datum& arg0 = batch[0];
   if (arg0.type()->id() == Type::NA) {
     auto false_value = std::make_shared<BooleanScalar>(false);
@@ -124,17 +161,17 @@ void IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       out->value = false_value;
     } else {
       std::shared_ptr<Array> false_values;
-      KERNEL_RETURN_IF_ERROR(
-          ctx, MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool())
-                   .Value(&false_values));
+      RETURN_NOT_OK(MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool())
+                        .Value(&false_values));
       out->value = false_values->data();
     }
+    return Status::OK();
   } else {
-    applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
+    return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
   }
 }
 
-void IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Datum& arg0 = batch[0];
   if (arg0.type()->id() == Type::NA) {
     if (arg0.kind() == Datum::SCALAR) {
@@ -145,8 +182,9 @@ void IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       BitUtil::SetBitsTo(out_arr->buffers[1]->mutable_data(), out_arr->offset,
                          out_arr->length, true);
     }
+    return Status::OK();
   } else {
-    applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
+    return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
   }
 }
 
@@ -154,6 +192,16 @@ const FunctionDoc is_valid_doc(
     "Return true if non-null",
     ("For each input value, emit true iff the value is valid (non-null)."), {"values"});
 
+const FunctionDoc is_finite_doc(
+    "Return true if value is finite",
+    ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."),
+    {"values"});
+
+const FunctionDoc is_inf_doc(
+    "Return true if infinity",
+    ("For each input value, emit true iff the value is infinite (inf or -inf)."),
+    {"values"});
+
 const FunctionDoc is_null_doc("Return true if null",
                               ("For each input value, emit true iff the value is null."),
                               {"values"});
@@ -172,6 +220,8 @@ void RegisterScalarValidity(FunctionRegistry* registry) {
                registry, MemAllocation::PREALLOCATE,
                /*can_write_into_slices=*/true);
 
+  DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
+  DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));
   DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
index 54fa5967f7a..1a7a1cbda15 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
@@ -88,15 +88,107 @@ TEST_F(TestBooleanValidityKernels, ScalarIsNull) {
   CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true));
 }
 
+TEST_F(TestFloatValidityKernels, FloatArrayIsFinite) {
+  // All Inf
+  CheckScalarUnary("is_finite", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
+  // No Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
+  // Some Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleArrayIsFinite) {
+  // All Inf
+  CheckScalarUnary("is_finite", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
+  // No Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
+  // Some Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
+}
+
+TEST_F(TestFloatValidityKernels, FloatScalarIsFinite) {
+  CheckScalarUnary("is_finite", MakeNullScalar(float32()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_finite", MakeScalar(42.0f), MakeScalar(true));
+  CheckScalarUnary("is_finite", MakeScalar(std::nanf("")), MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleScalarIsFinite) {
+  CheckScalarUnary("is_finite", MakeNullScalar(float64()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_finite", MakeScalar(42.0), MakeScalar(true));
+  CheckScalarUnary("is_finite", MakeScalar(std::nan("")), MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+}
+
+TEST_F(TestFloatValidityKernels, FloatArrayIsInf) {
+  // All Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
+  // No Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
+  // Some Infs
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleArrayIsInf) {
+  // All Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
+  // No Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
+  // Some Infs
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
+}
+
+TEST_F(TestFloatValidityKernels, FloatScalarIsInf) {
+  CheckScalarUnary("is_inf", MakeNullScalar(float32()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_inf", MakeScalar(42.0f), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::nanf("")), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(true));
+  CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(true));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleScalarIsInf) {
+  CheckScalarUnary("is_inf", MakeNullScalar(float64()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_inf", MakeScalar(42.0), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::nan("")), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(true));
+  CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(true));
+}
+
 TEST_F(TestFloatValidityKernels, FloatArrayIsNan) {
   // All NaN
   CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[NaN, NaN, NaN, NaN, NaN]"),
                    ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
   // No NaN
-  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
   // Some NaNs
-  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
 }
 
@@ -105,10 +197,10 @@ TEST_F(TestDoubleValidityKernels, DoubleArrayIsNan) {
   CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[NaN, NaN, NaN, NaN, NaN]"),
                    ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
   // No NaN
-  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
   // Some NaNs
-  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
 }
 
@@ -116,12 +208,20 @@ TEST_F(TestFloatValidityKernels, FloatScalarIsNan) {
   CheckScalarUnary("is_nan", MakeNullScalar(float32()), MakeNullScalar(boolean()));
   CheckScalarUnary("is_nan", MakeScalar(42.0f), MakeScalar(false));
   CheckScalarUnary("is_nan", MakeScalar(std::nanf("")), MakeScalar(true));
+  CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
 }
 
 TEST_F(TestDoubleValidityKernels, DoubleScalarIsNan) {
   CheckScalarUnary("is_nan", MakeNullScalar(float64()), MakeNullScalar(boolean()));
   CheckScalarUnary("is_nan", MakeScalar(42.0), MakeScalar(false));
   CheckScalarUnary("is_nan", MakeScalar(std::nan("")), MakeScalar(true));
+  CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index a8a0c8b95f3..ceea9cbc15c 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -22,12 +22,14 @@
 #include <string>
 
 #include "arrow/array.h"
+#include "arrow/array/validate.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
+#include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 
 namespace arrow {
@@ -36,7 +38,7 @@ namespace compute {
 namespace {
 
 template <typename T>
-std::vector<Datum> GetDatums(const std::vector<T>& inputs) {
+DatumVector GetDatums(const std::vector<T>& inputs) {
   std::vector<Datum> datums;
   for (const auto& input : inputs) {
     datums.emplace_back(input);
@@ -44,32 +46,40 @@ std::vector<Datum> GetDatums(const std::vector<T>& inputs) {
   return datums;
 }
 
-void CheckScalarNonRecursive(const std::string& func_name, const ArrayVector& inputs,
-                             const std::shared_ptr<Array>& expected,
-                             const FunctionOptions* options) {
-  ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, GetDatums(inputs), options));
-  std::shared_ptr<Array> actual = std::move(out).make_array();
-  ASSERT_OK(actual->ValidateFull());
-  AssertArraysEqual(*expected, *actual, /*verbose=*/true);
+void CheckScalarNonRecursive(const std::string& func_name, const DatumVector& inputs,
+                             const Datum& expected, const FunctionOptions* options) {
+  ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, inputs, options));
+  ValidateOutput(out);
+  AssertDatumsEqual(expected, out, /*verbose=*/true);
 }
 
 template <typename... SliceArgs>
-ArrayVector SliceAll(const ArrayVector& inputs, SliceArgs... slice_args) {
-  ArrayVector sliced;
+DatumVector SliceArrays(const DatumVector& inputs, SliceArgs... slice_args) {
+  DatumVector sliced;
   for (const auto& input : inputs) {
-    sliced.push_back(input->Slice(slice_args...));
+    if (input.is_array()) {
+      sliced.push_back(*input.make_array()->Slice(slice_args...));
+    } else {
+      sliced.push_back(input);
+    }
   }
   return sliced;
 }
 
-ScalarVector GetScalars(const ArrayVector& inputs, int64_t index) {
+ScalarVector GetScalars(const DatumVector& inputs, int64_t index) {
   ScalarVector scalars;
   for (const auto& input : inputs) {
-    scalars.push_back(*input->GetScalar(index));
+    if (input.is_array()) {
+      scalars.push_back(*input.make_array()->GetScalar(index));
+    } else {
+      scalars.push_back(input.scalar());
+    }
   }
   return scalars;
 }
 
+}  // namespace
+
 void CheckScalar(std::string func_name, const ScalarVector& inputs,
                  std::shared_ptr<Scalar> expected, const FunctionOptions* options) {
   ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, GetDatums(inputs), options));
@@ -91,60 +101,83 @@ void CheckScalar(std::string func_name, const ScalarVector& inputs,
   }
 }
 
-void CheckScalar(std::string func_name, const ArrayVector& inputs,
-                 std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  CheckScalarNonRecursive(func_name, inputs, expected, options);
+void CheckScalar(std::string func_name, const DatumVector& inputs, Datum expected_datum,
+                 const FunctionOptions* options) {
+  CheckScalarNonRecursive(func_name, inputs, expected_datum, options);
+
+  if (expected_datum.is_scalar()) return;
+  ASSERT_TRUE(expected_datum.is_array())
+      << "CheckScalar is only implemented for scalar/array expected values";
+  auto expected = expected_datum.make_array();
+
+  // check for at least 1 array, and make sure the others are of equal length
+  bool has_array = false;
+  for (const auto& input : inputs) {
+    if (input.is_array()) {
+      ASSERT_EQ(input.array()->length, expected->length());
+      has_array = true;
+    }
+  }
+  ASSERT_TRUE(has_array) << "Must have at least 1 array input to have an array output";
 
   // Check all the input scalars, if scalars are implemented
-  if (std::none_of(inputs.begin(), inputs.end(), [](const std::shared_ptr<Array>& array) {
-        return array->type_id() == Type::EXTENSION;
+  if (std::none_of(inputs.begin(), inputs.end(), [](const Datum& datum) {
+        return datum.type()->id() == Type::EXTENSION;
       })) {
-    for (int64_t i = 0; i < inputs[0]->length(); ++i) {
+    // Check all the input scalars
+    for (int64_t i = 0; i < expected->length(); ++i) {
       CheckScalar(func_name, GetScalars(inputs, i), *expected->GetScalar(i), options);
     }
   }
 
   // Since it's a scalar function, calling it on sliced inputs should
   // result in the sliced expected output.
-  const auto slice_length = inputs[0]->length() / 3;
+  const auto slice_length = expected->length() / 3;
   if (slice_length > 0) {
-    CheckScalarNonRecursive(func_name, SliceAll(inputs, 0, slice_length),
+    CheckScalarNonRecursive(func_name, SliceArrays(inputs, 0, slice_length),
                             expected->Slice(0, slice_length), options);
 
-    CheckScalarNonRecursive(func_name, SliceAll(inputs, slice_length, slice_length),
+    CheckScalarNonRecursive(func_name, SliceArrays(inputs, slice_length, slice_length),
                             expected->Slice(slice_length, slice_length), options);
 
-    CheckScalarNonRecursive(func_name, SliceAll(inputs, 2 * slice_length),
+    CheckScalarNonRecursive(func_name, SliceArrays(inputs, 2 * slice_length),
                             expected->Slice(2 * slice_length), options);
   }
 
-  // should also work with an empty slice
-  CheckScalarNonRecursive(func_name, SliceAll(inputs, 0, 0), expected->Slice(0, 0),
+  // Should also work with an empty slice
+  CheckScalarNonRecursive(func_name, SliceArrays(inputs, 0, 0), expected->Slice(0, 0),
                           options);
 
   // Ditto with ChunkedArray inputs
   if (slice_length > 0) {
-    std::vector<std::shared_ptr<ChunkedArray>> chunked_inputs;
+    DatumVector chunked_inputs;
     chunked_inputs.reserve(inputs.size());
     for (const auto& input : inputs) {
-      chunked_inputs.push_back(std::make_shared<ChunkedArray>(
-          ArrayVector{input->Slice(0, slice_length), input->Slice(slice_length)}));
+      if (input.is_array()) {
+        auto ar = input.make_array();
+        auto ar_chunked = std::make_shared<ChunkedArray>(
+            ArrayVector{ar->Slice(0, slice_length), ar->Slice(slice_length)});
+        chunked_inputs.push_back(ar_chunked);
+      } else {
+        chunked_inputs.push_back(input.scalar());
+      }
     }
     ArrayVector expected_chunks{expected->Slice(0, slice_length),
                                 expected->Slice(slice_length)};
 
     ASSERT_OK_AND_ASSIGN(Datum out,
                          CallFunction(func_name, GetDatums(chunked_inputs), options));
-    ASSERT_OK(out.chunked_array()->ValidateFull());
+    ValidateOutput(out);
+    auto chunked = out.chunked_array();
+    (void)chunked;
     AssertDatumsEqual(std::make_shared<ChunkedArray>(expected_chunks), out);
   }
 }
 
-}  // namespace
-
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Array> input,
-                      std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {input}, expected, options);
+void CheckScalarUnary(std::string func_name, Datum input, Datum expected,
+                      const FunctionOptions* options) {
+  std::vector<Datum> input_vector = {std::move(input)};
+  CheckScalar(std::move(func_name), input_vector, expected, options);
 }
 
 void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
@@ -154,29 +187,68 @@ void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
                    ArrayFromJSON(out_ty, json_expected), options);
 }
 
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Scalar> input,
-                      std::shared_ptr<Scalar> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {input}, expected, options);
-}
-
 void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array> expected,
                       const FunctionOptions* options) {
   ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, {input}, options));
   std::shared_ptr<Array> actual = std::move(out).make_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(*actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
-                       std::shared_ptr<Scalar> right_input,
-                       std::shared_ptr<Scalar> expected, const FunctionOptions* options) {
+void CheckScalarBinary(std::string func_name, Datum left_input, Datum right_input,
+                       Datum expected, const FunctionOptions* options) {
   CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
 }
 
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
-                       std::shared_ptr<Array> right_input,
-                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
+namespace {
+
+void ValidateOutput(const ArrayData& output) {
+  ASSERT_OK(::arrow::internal::ValidateArrayFull(output));
+  TestInitialized(output);
+}
+
+void ValidateOutput(const ChunkedArray& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& chunk : output.chunks()) {
+    TestInitialized(*chunk);
+  }
+}
+
+void ValidateOutput(const RecordBatch& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& column : output.column_data()) {
+    TestInitialized(*column);
+  }
+}
+
+void ValidateOutput(const Table& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& column : output.columns()) {
+    for (const auto& chunk : column->chunks()) {
+      TestInitialized(*chunk);
+    }
+  }
+}
+
+}  // namespace
+
+void ValidateOutput(const Datum& output) {
+  switch (output.kind()) {
+    case Datum::ARRAY:
+      ValidateOutput(*output.array());
+      break;
+    case Datum::CHUNKED_ARRAY:
+      ValidateOutput(*output.chunked_array());
+      break;
+    case Datum::RECORD_BATCH:
+      ValidateOutput(*output.record_batch());
+      break;
+    case Datum::TABLE:
+      ValidateOutput(*output.table());
+      break;
+    default:
+      break;
+  }
 }
 
 void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_values,
@@ -196,5 +268,11 @@ void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_v
       << expected_kernel->signature->ToString();
 }
 
+void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> values) {
+  ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
+  ASSERT_NOT_OK(function->DispatchBest(&values));
+  ASSERT_NOT_OK(function->DispatchExact(values));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index 767911888ac..eecedb64317 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -19,13 +19,15 @@
 
 // IWYU pragma: begin_exports
 
+#include <gmock/gmock.h>
+
 #include <memory>
 #include <string>
 #include <vector>
 
-#include <gmock/gmock.h>
-
 #include "arrow/array.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
 #include "arrow/pretty_print.h"
@@ -34,8 +36,6 @@
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
 
-#include "arrow/compute/kernel.h"
-
 // IWYU pragma: end_exports
 
 namespace arrow {
@@ -44,6 +44,8 @@ using internal::checked_cast;
 
 namespace compute {
 
+using DatumVector = std::vector<Datum>;
+
 template <typename Type, typename T>
 std::shared_ptr<Array> _MakeArray(const std::shared_ptr<DataType>& type,
                                   const std::vector<T>& values,
@@ -57,65 +59,36 @@ std::shared_ptr<Array> _MakeArray(const std::shared_ptr<DataType>& type,
   return result;
 }
 
-template <typename Type, typename Enable = void>
-struct DatumEqual {};
-
-template <typename Type>
-struct DatumEqual<Type, enable_if_floating_point<Type>> {
-  static constexpr double kArbitraryDoubleErrorBound = 1.0;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-
-  static void EnsureEqual(const Datum& lhs, const Datum& rhs) {
-    ASSERT_EQ(lhs.kind(), rhs.kind());
-    if (lhs.kind() == Datum::SCALAR) {
-      auto left = checked_cast<const ScalarType*>(lhs.scalar().get());
-      auto right = checked_cast<const ScalarType*>(rhs.scalar().get());
-      ASSERT_EQ(left->is_valid, right->is_valid);
-      ASSERT_EQ(left->type->id(), right->type->id());
-      ASSERT_NEAR(left->value, right->value, kArbitraryDoubleErrorBound);
-    }
-  }
-};
-
-template <typename Type>
-struct DatumEqual<Type, enable_if_integer<Type>> {
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  static void EnsureEqual(const Datum& lhs, const Datum& rhs) {
-    ASSERT_EQ(lhs.kind(), rhs.kind());
-    if (lhs.kind() == Datum::SCALAR) {
-      auto left = checked_cast<const ScalarType*>(lhs.scalar().get());
-      auto right = checked_cast<const ScalarType*>(rhs.scalar().get());
-      ASSERT_EQ(*left, *right);
-    }
-  }
-};
+inline std::string CompareOperatorToFunctionName(CompareOperator op) {
+  static std::string function_names[] = {
+      "equal", "not_equal", "greater", "greater_equal", "less", "less_equal",
+  };
+  return function_names[op];
+}
+
+void CheckScalar(std::string func_name, const ScalarVector& inputs,
+                 std::shared_ptr<Scalar> expected,
+                 const FunctionOptions* options = nullptr);
+
+void CheckScalar(std::string func_name, const DatumVector& inputs, Datum expected,
+                 const FunctionOptions* options = nullptr);
 
 void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
                       std::string json_input, std::shared_ptr<DataType> out_ty,
                       std::string json_expected,
                       const FunctionOptions* options = nullptr);
 
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Array> input,
-                      std::shared_ptr<Array> expected,
-                      const FunctionOptions* options = nullptr);
-
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Scalar> input,
-                      std::shared_ptr<Scalar> expected,
+void CheckScalarUnary(std::string func_name, Datum input, Datum expected,
                       const FunctionOptions* options = nullptr);
 
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
-                       std::shared_ptr<Scalar> right_input,
-                       std::shared_ptr<Scalar> expected,
-                       const FunctionOptions* options = nullptr);
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
-                       std::shared_ptr<Array> right_input,
-                       std::shared_ptr<Array> expected,
-                       const FunctionOptions* options = nullptr);
+void CheckScalarBinary(std::string func_name, Datum left_input, Datum right_input,
+                       Datum expected, const FunctionOptions* options = nullptr);
 
 void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array> expected,
                       const FunctionOptions* options = nullptr);
 
+void ValidateOutput(const Datum& output);
+
 using BinaryTypes =
     ::testing::Types<BinaryType, LargeBinaryType, StringType, LargeStringType>;
 using StringTypes = ::testing::Types<StringType, LargeStringType>;
@@ -148,5 +121,30 @@ void TestRandomPrimitiveCTypes() {
 void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> descrs,
                        std::vector<ValueDescr> exact_descrs);
 
+// Check that function fails to produce a Kernel for the set of ValueDescrs.
+void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> descrs);
+
+// Helper to get a default instance of a type, including parameterized types
+template <typename T>
+enable_if_parameter_free<T, std::shared_ptr<DataType>> default_type_instance() {
+  return TypeTraits<T>::type_singleton();
+}
+template <typename T>
+enable_if_time<T, std::shared_ptr<DataType>> default_type_instance() {
+  // Time32 requires second/milli, Time64 requires nano/micro
+  if (bit_width(T::type_id) == 32) {
+    return std::make_shared<T>(TimeUnit::type::SECOND);
+  }
+  return std::make_shared<T>(TimeUnit::type::NANO);
+}
+template <typename T>
+enable_if_timestamp<T, std::shared_ptr<DataType>> default_type_instance() {
+  return std::make_shared<T>(TimeUnit::type::SECOND);
+}
+template <typename T>
+enable_if_decimal<T, std::shared_ptr<DataType>> default_type_instance() {
+  return std::make_shared<T>(5, 2);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/util_internal.cc b/cpp/src/arrow/compute/kernels/util_internal.cc
index 1656ed2aaf3..846fa26baf2 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/util_internal.cc
@@ -59,24 +59,21 @@ PrimitiveArg GetPrimitiveArg(const ArrayData& arr) {
 
 ArrayKernelExec TrivialScalarUnaryAsArraysExec(ArrayKernelExec exec,
                                                NullHandling::type null_handling) {
-  return [=](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return [=](KernelContext* ctx, const ExecBatch& batch, Datum* out) -> Status {
     if (out->is_array()) {
       return exec(ctx, batch, out);
     }
 
     if (null_handling == NullHandling::INTERSECTION && !batch[0].scalar()->is_valid) {
       out->scalar()->is_valid = false;
-      return;
+      return Status::OK();
     }
 
-    KERNEL_ASSIGN_OR_RAISE(Datum array_in, ctx,
-                           MakeArrayFromScalar(*batch[0].scalar(), 1));
-
-    KERNEL_ASSIGN_OR_RAISE(Datum array_out, ctx, MakeArrayFromScalar(*out->scalar(), 1));
-
-    exec(ctx, ExecBatch{{std::move(array_in)}, 1}, &array_out);
-
-    KERNEL_ASSIGN_OR_RAISE(*out, ctx, array_out.make_array()->GetScalar(0));
+    ARROW_ASSIGN_OR_RAISE(Datum array_in, MakeArrayFromScalar(*batch[0].scalar(), 1));
+    ARROW_ASSIGN_OR_RAISE(Datum array_out, MakeArrayFromScalar(*out->scalar(), 1));
+    RETURN_NOT_OK(exec(ctx, ExecBatch{{std::move(array_in)}, 1}, &array_out));
+    ARROW_ASSIGN_OR_RAISE(*out, array_out.make_array()->GetScalar(0));
+    return Status::OK();
   };
 }
 
diff --git a/cpp/src/arrow/compute/kernels/util_internal.h b/cpp/src/arrow/compute/kernels/util_internal.h
index f614439ffb8..394e08da581 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/cpp/src/arrow/compute/kernels/util_internal.h
@@ -18,16 +18,30 @@
 #pragma once
 
 #include <cstdint>
+#include <utility>
 
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/type_fwd.h"
+#include "arrow/util/bit_run_reader.h"
 
 namespace arrow {
 namespace compute {
 namespace internal {
 
+// Used in some kernels and testing - not provided by default in MSVC
+// and _USE_MATH_DEFINES is not reliable with unity builds
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#ifndef M_PI_2
+#define M_PI_2 1.57079632679489661923
+#endif
+#ifndef M_PI_4
+#define M_PI_4 0.785398163397448309616
+#endif
+
 // An internal data structure for unpacking a primitive argument to pass to a
 // kernel implementation
 struct PrimitiveArg {
@@ -62,6 +76,91 @@ PrimitiveArg GetPrimitiveArg(const ArrayData& arr);
 ArrayKernelExec TrivialScalarUnaryAsArraysExec(
     ArrayKernelExec exec, NullHandling::type null_handling = NullHandling::INTERSECTION);
 
+// Return (min, max) of a numerical array, ignore nulls.
+// For empty array, return the maximal number limit as 'min', and minimal limit as 'max'.
+template <typename T>
+ARROW_NOINLINE std::pair<T, T> GetMinMax(const ArrayData& data) {
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::lowest();
+
+  const T* values = data.GetValues<T>(1);
+  arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                                       [&](int64_t pos, int64_t len) {
+                                         for (int64_t i = 0; i < len; ++i) {
+                                           min = std::min(min, values[pos + i]);
+                                           max = std::max(max, values[pos + i]);
+                                         }
+                                       });
+
+  return std::make_pair(min, max);
+}
+
+template <typename T>
+std::pair<T, T> GetMinMax(const Datum& datum) {
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::lowest();
+
+  for (const auto& array : datum.chunks()) {
+    T local_min, local_max;
+    std::tie(local_min, local_max) = GetMinMax<T>(*array->data());
+    min = std::min(min, local_min);
+    max = std::max(max, local_max);
+  }
+
+  return std::make_pair(min, max);
+}
+
+// Count value occurrences of an array, ignore nulls.
+// 'counts' must be zeroed and with enough size.
+template <typename T>
+ARROW_NOINLINE int64_t CountValues(uint64_t* counts, const ArrayData& data, T min) {
+  const int64_t n = data.length - data.GetNullCount();
+  if (n > 0) {
+    const T* values = data.GetValues<T>(1);
+    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                                         [&](int64_t pos, int64_t len) {
+                                           for (int64_t i = 0; i < len; ++i) {
+                                             ++counts[values[pos + i] - min];
+                                           }
+                                         });
+  }
+  return n;
+}
+
+template <typename T>
+int64_t CountValues(uint64_t* counts, const Datum& datum, T min) {
+  int64_t n = 0;
+  for (const auto& array : datum.chunks()) {
+    n += CountValues<T>(counts, *array->data(), min);
+  }
+  return n;
+}
+
+// Copy numerical array values to a buffer, ignore nulls.
+template <typename T>
+ARROW_NOINLINE int64_t CopyNonNullValues(const ArrayData& data, T* out) {
+  const int64_t n = data.length - data.GetNullCount();
+  if (n > 0) {
+    int64_t index = 0;
+    const T* values = data.GetValues<T>(1);
+    arrow::internal::VisitSetBitRunsVoid(
+        data.buffers[0], data.offset, data.length, [&](int64_t pos, int64_t len) {
+          memcpy(out + index, values + pos, len * sizeof(T));
+          index += len;
+        });
+  }
+  return n;
+}
+
+template <typename T>
+int64_t CopyNonNullValues(const Datum& datum, T* out) {
+  int64_t n = 0;
+  for (const auto& array : datum.chunks()) {
+    n += CopyNonNullValues(*array->data(), out + n);
+  }
+  return n;
+}
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc
index 0ed15702832..a68e78130f2 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -513,17 +513,19 @@ struct HashKernelTraits<Type, Action, enable_if_has_string_view<Type>> {
 };
 
 template <typename Type, typename Action>
-std::unique_ptr<HashKernel> HashInitImpl(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<HashKernel>> HashInitImpl(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
   using HashKernelType = typename HashKernelTraits<Type, Action>::HashKernel;
   auto result = ::arrow::internal::make_unique<HashKernelType>(
       args.inputs[0].type, args.options, ctx->memory_pool());
-  ctx->SetStatus(result->Reset());
+  RETURN_NOT_OK(result->Reset());
   return std::move(result);
 }
 
 template <typename Type, typename Action>
-std::unique_ptr<KernelState> HashInit(KernelContext* ctx, const KernelInitArgs& args) {
-  return std::move(HashInitImpl<Type, Action>(ctx, args));
+Result<std::unique_ptr<KernelState>> HashInit(KernelContext* ctx,
+                                              const KernelInitArgs& args) {
+  return HashInitImpl<Type, Action>(ctx, args);
 }
 
 template <typename Action>
@@ -574,10 +576,10 @@ KernelInit GetHashInit(Type::type type_id) {
 using DictionaryEncodeState = OptionsWrapper<DictionaryEncodeOptions>;
 
 template <typename Action>
-std::unique_ptr<KernelState> DictionaryHashInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> DictionaryHashInit(KernelContext* ctx,
+                                                        const KernelInitArgs& args) {
   const auto& dict_type = checked_cast<const DictionaryType&>(*args.inputs[0].type);
-  std::unique_ptr<HashKernel> indices_hasher;
+  Result<std::unique_ptr<HashKernel>> indices_hasher;
   switch (dict_type.index_type()->id()) {
     case Type::INT8:
       indices_hasher = HashInitImpl<UInt8Type, Action>(ctx, args);
@@ -595,32 +597,37 @@ std::unique_ptr<KernelState> DictionaryHashInit(KernelContext* ctx,
       DCHECK(false) << "Unsupported dictionary index type";
       break;
   }
-  return ::arrow::internal::make_unique<DictionaryHashKernel>(std::move(indices_hasher));
+  RETURN_NOT_OK(indices_hasher);
+  return ::arrow::internal::make_unique<DictionaryHashKernel>(
+      std::move(indices_hasher.ValueOrDie()));
 }
 
-void HashExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status HashExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->Append(ctx, *batch[0].array()));
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->Flush(out));
+  RETURN_NOT_OK(hash_impl->Append(ctx, *batch[0].array()));
+  RETURN_NOT_OK(hash_impl->Flush(out));
+  return Status::OK();
 }
 
-void UniqueFinalize(KernelContext* ctx, std::vector<Datum>* out) {
+Status UniqueFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
   *out = {Datum(uniques)};
+  return Status::OK();
 }
 
-void DictEncodeFinalize(KernelContext* ctx, std::vector<Datum>* out) {
+Status DictEncodeFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
   auto dict_type = dictionary(int32(), uniques->type);
   auto dict = MakeArray(uniques);
   for (size_t i = 0; i < out->size(); ++i) {
     (*out)[i] =
         std::make_shared<DictionaryArray>(dict_type, (*out)[i].make_array(), dict);
   }
+  return Status::OK();
 }
 
 std::shared_ptr<ArrayData> BoxValueCounts(const std::shared_ptr<ArrayData>& uniques,
@@ -631,33 +638,33 @@ std::shared_ptr<ArrayData> BoxValueCounts(const std::shared_ptr<ArrayData>& uniq
   return std::make_shared<StructArray>(data_type, uniques->length, children)->data();
 }
 
-void ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) {
+Status ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
   Datum value_counts;
 
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->GetDictionary(&uniques));
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->FlushFinal(&value_counts));
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash_impl->FlushFinal(&value_counts));
   *out = {Datum(BoxValueCounts(uniques, value_counts.array()))};
+  return Status::OK();
 }
 
-void UniqueFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
-  UniqueFinalize(ctx, out);
-  if (ctx->HasError()) {
-    return;
-  }
+Status UniqueFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
+  RETURN_NOT_OK(UniqueFinalize(ctx, out));
   auto hash = checked_cast<DictionaryHashKernel*>(ctx->state());
   (*out)[0].mutable_array()->dictionary = hash->dictionary();
+  return Status::OK();
 }
 
-void ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
+Status ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash = checked_cast<DictionaryHashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
   Datum value_counts;
-  KERNEL_RETURN_IF_ERROR(ctx, hash->GetDictionary(&uniques));
-  KERNEL_RETURN_IF_ERROR(ctx, hash->FlushFinal(&value_counts));
+  RETURN_NOT_OK(hash->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash->FlushFinal(&value_counts));
   uniques->dictionary = hash->dictionary();
   *out = {Datum(BoxValueCounts(uniques, value_counts.array()))};
+  return Status::OK();
 }
 
 ValueDescr DictEncodeOutput(KernelContext*, const std::vector<ValueDescr>& descrs) {
diff --git a/cpp/src/arrow/compute/kernels/vector_hash_test.cc b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
index a3fa9314e60..c09b042a8be 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
@@ -59,7 +59,7 @@ template <typename T>
 void CheckUnique(const std::shared_ptr<T>& input,
                  const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Unique(input));
-  ASSERT_OK(result->ValidateFull());
+  ValidateOutput(*result);
   // TODO: We probably shouldn't rely on array ordering.
   ASSERT_ARRAYS_EQUAL(*expected, *result);
 }
@@ -84,7 +84,7 @@ void CheckValueCountsNull(const std::shared_ptr<DataType>& type) {
   std::shared_ptr<Array> ex_counts = ArrayFromJSON(int64(), "[]");
 
   ASSERT_OK_AND_ASSIGN(auto result_struct, ValueCounts(input));
-  ASSERT_OK(result_struct->ValidateFull());
+  ValidateOutput(*result_struct);
   ASSERT_NE(result_struct->GetFieldByName(kValuesFieldName), nullptr);
   // TODO: We probably shouldn't rely on value ordering.
   ASSERT_ARRAYS_EQUAL(*ex_values, *result_struct->GetFieldByName(kValuesFieldName));
@@ -96,7 +96,7 @@ void CheckValueCounts(const std::shared_ptr<T>& input,
                       const std::shared_ptr<Array>& expected_values,
                       const std::shared_ptr<Array>& expected_counts) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, ValueCounts(input));
-  ASSERT_OK(result->ValidateFull());
+  ValidateOutput(*result);
   auto result_struct = std::dynamic_pointer_cast<StructArray>(result);
   ASSERT_EQ(result_struct->num_fields(), 2);
   // TODO: We probably shouldn't rely on value ordering.
@@ -128,7 +128,7 @@ void CheckDictEncode(const std::shared_ptr<Array>& input,
 
   ASSERT_OK_AND_ASSIGN(Datum datum_out, DictionaryEncode(input));
   std::shared_ptr<Array> result = MakeArray(datum_out.array());
-  ASSERT_OK(result->ValidateFull());
+  ValidateOutput(*result);
 
   ASSERT_ARRAYS_EQUAL(expected, *result);
 }
@@ -691,10 +691,7 @@ TEST_F(TestHashKernel, ZeroLengthDictionaryEncode) {
   // ARROW-7008
   auto values = ArrayFromJSON(utf8(), "[]");
   ASSERT_OK_AND_ASSIGN(Datum datum_result, DictionaryEncode(values));
-
-  std::shared_ptr<Array> result = datum_result.make_array();
-  const auto& dict_result = checked_cast<const DictionaryArray&>(*result);
-  ASSERT_OK(dict_result.ValidateFull());
+  ValidateOutput(datum_result);
 }
 
 TEST_F(TestHashKernel, NullEncodingSchemes) {
diff --git a/cpp/src/arrow/compute/kernels/vector_nested.cc b/cpp/src/arrow/compute/kernels/vector_nested.cc
index b7317e5bea0..b84640854ed 100644
--- a/cpp/src/arrow/compute/kernels/vector_nested.cc
+++ b/cpp/src/arrow/compute/kernels/vector_nested.cc
@@ -27,18 +27,15 @@ namespace internal {
 namespace {
 
 template <typename Type>
-void ListFlatten(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ListFlatten(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   typename TypeTraits<Type>::ArrayType list_array(batch[0].array());
-  Result<std::shared_ptr<Array>> result = list_array.Flatten(ctx->memory_pool());
-  if (!result.ok()) {
-    ctx->SetStatus(result.status());
-    return;
-  }
-  out->value = (*result)->data();
+  ARROW_ASSIGN_OR_RAISE(auto result, list_array.Flatten(ctx->memory_pool()));
+  out->value = result->data();
+  return Status::OK();
 }
 
 template <typename Type, typename offset_type = typename Type::offset_type>
-void ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   typename TypeTraits<Type>::ArrayType list(batch[0].array());
   ArrayData* out_arr = out->mutable_array();
 
@@ -47,8 +44,8 @@ void ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 
   out_arr->length = values_length;
   out_arr->null_count = 0;
-  KERNEL_ASSIGN_OR_RAISE(out_arr->buffers[1], ctx,
-                         ctx->Allocate(values_length * sizeof(offset_type)));
+  ARROW_ASSIGN_OR_RAISE(out_arr->buffers[1],
+                        ctx->Allocate(values_length * sizeof(offset_type)));
   auto out_indices = reinterpret_cast<offset_type*>(out_arr->buffers[1]->mutable_data());
   for (int64_t i = 0; i < list.length(); ++i) {
     // Note: In most cases, null slots are empty, but when they are non-empty
@@ -58,6 +55,7 @@ void ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       *out_indices++ = static_cast<offset_type>(i);
     }
   }
+  return Status::OK();
 }
 
 Result<ValueDescr> ValuesType(KernelContext*, const std::vector<ValueDescr>& args) {
diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc b/cpp/src/arrow/compute/kernels/vector_replace.cc
new file mode 100644
index 00000000000..450f99d7826
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_replace.cc
@@ -0,0 +1,541 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/bitmap_ops.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+Status ReplacementArrayTooShort(int64_t expected, int64_t actual) {
+  return Status::Invalid("Replacement array must be of appropriate length (expected ",
+                         expected, " items but got ", actual, " items)");
+}
+
+// Helper to implement replace_with kernel with scalar mask for fixed-width types,
+// using callbacks to handle both bool and byte-sized types
+template <typename Functor>
+Status ReplaceWithScalarMask(KernelContext* ctx, const ArrayData& array,
+                             const BooleanScalar& mask, const Datum& replacements,
+                             ArrayData* output) {
+  Datum source = array;
+  if (!mask.is_valid) {
+    // Output = null
+    source = MakeNullScalar(output->type);
+  } else if (mask.value) {
+    // Output = replacement
+    source = replacements;
+  }
+  uint8_t* out_bitmap = output->buffers[0]->mutable_data();
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  const int64_t out_offset = output->offset;
+  if (source.is_array()) {
+    const ArrayData& in_data = *source.array();
+    if (in_data.length < array.length) {
+      return ReplacementArrayTooShort(array.length, in_data.length);
+    }
+    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0,
+                      array.length);
+    if (in_data.MayHaveNulls()) {
+      arrow::internal::CopyBitmap(in_data.buffers[0]->data(), in_data.offset,
+                                  array.length, out_bitmap, out_offset);
+    } else {
+      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true);
+    }
+  } else {
+    const Scalar& in_data = *source.scalar();
+    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0,
+                      array.length);
+    BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, in_data.is_valid);
+  }
+  return Status::OK();
+}
+
+struct CopyArrayBitmap {
+  const uint8_t* in_bitmap;
+  int64_t in_offset;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    arrow::internal::CopyBitmap(in_bitmap, in_offset + offset, length, out_bitmap,
+                                out_offset);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset,
+                      BitUtil::GetBit(in_bitmap, in_offset + offset));
+  }
+};
+
+struct CopyScalarBitmap {
+  const bool is_valid;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    BitUtil::SetBitsTo(out_bitmap, out_offset, length, is_valid);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset, is_valid);
+  }
+};
+
+// Helper to implement replace_with kernel with array mask for fixed-width types,
+// using callbacks to handle both bool and byte-sized types and to handle
+// scalar and array replacements
+template <typename Functor, typename Data, typename CopyBitmap>
+void ReplaceWithArrayMaskImpl(const ArrayData& array, const ArrayData& mask,
+                              const Data& replacements, bool replacements_bitmap,
+                              const CopyBitmap& copy_bitmap, const uint8_t* mask_bitmap,
+                              const uint8_t* mask_values, uint8_t* out_bitmap,
+                              uint8_t* out_values, const int64_t out_offset) {
+  Functor::CopyData(*array.type, out_values, /*out_offset=*/0, array, /*in_offset=*/0,
+                    array.length);
+  arrow::internal::OptionalBinaryBitBlockCounter counter(
+      mask_values, mask.offset, mask_bitmap, mask.offset, mask.length);
+  int64_t write_offset = 0;
+  int64_t replacements_offset = 0;
+  while (write_offset < array.length) {
+    BitBlockCount block = counter.NextAndBlock();
+    if (block.AllSet()) {
+      // Copy from replacement array
+      Functor::CopyData(*array.type, out_values, out_offset + write_offset, replacements,
+                        replacements_offset, block.length);
+      if (replacements_bitmap) {
+        copy_bitmap.CopyBitmap(out_bitmap, out_offset + write_offset, replacements_offset,
+                               block.length);
+      } else if (!replacements_bitmap && out_bitmap) {
+        BitUtil::SetBitsTo(out_bitmap, out_offset + write_offset, block.length, true);
+      }
+      replacements_offset += block.length;
+    } else if (block.popcount) {
+      for (int64_t i = 0; i < block.length; ++i) {
+        if (BitUtil::GetBit(mask_values, write_offset + mask.offset + i) &&
+            (!mask_bitmap ||
+             BitUtil::GetBit(mask_bitmap, write_offset + mask.offset + i))) {
+          Functor::CopyData(*array.type, out_values, out_offset + write_offset + i,
+                            replacements, replacements_offset, /*length=*/1);
+          if (replacements_bitmap) {
+            copy_bitmap.SetBit(out_bitmap, out_offset + write_offset + i,
+                               replacements_offset);
+          }
+          replacements_offset++;
+        }
+      }
+    }
+    write_offset += block.length;
+  }
+}
+
+template <typename Functor>
+Status ReplaceWithArrayMask(KernelContext* ctx, const ArrayData& array,
+                            const ArrayData& mask, const Datum& replacements,
+                            ArrayData* output) {
+  const int64_t out_offset = output->offset;
+  uint8_t* out_bitmap = nullptr;
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  const uint8_t* mask_bitmap = mask.MayHaveNulls() ? mask.buffers[0]->data() : nullptr;
+  const uint8_t* mask_values = mask.buffers[1]->data();
+  const bool replacements_bitmap = replacements.is_array()
+                                       ? replacements.array()->MayHaveNulls()
+                                       : !replacements.scalar()->is_valid;
+  if (replacements.is_array()) {
+    // Check that we have enough replacement values
+    const int64_t replacements_length = replacements.array()->length;
+
+    BooleanArray mask_arr(mask.length, mask.buffers[1], mask.buffers[0], mask.null_count,
+                          mask.offset);
+    const int64_t count = mask_arr.true_count();
+    if (count > replacements_length) {
+      return ReplacementArrayTooShort(count, replacements_length);
+    }
+  }
+  if (array.MayHaveNulls() || mask.MayHaveNulls() || replacements_bitmap) {
+    out_bitmap = output->buffers[0]->mutable_data();
+    output->null_count = -1;
+    if (array.MayHaveNulls()) {
+      // Copy array's bitmap
+      arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset, array.length,
+                                  out_bitmap, out_offset);
+    } else {
+      // Array has no bitmap but mask/replacements do, generate an all-valid bitmap
+      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true);
+    }
+  } else {
+    BitUtil::SetBitsTo(output->buffers[0]->mutable_data(), out_offset, array.length,
+                       true);
+    output->null_count = 0;
+  }
+
+  if (replacements.is_array()) {
+    const ArrayData& array_repl = *replacements.array();
+    ReplaceWithArrayMaskImpl<Functor>(
+        array, mask, array_repl, replacements_bitmap,
+        CopyArrayBitmap{replacements_bitmap ? array_repl.buffers[0]->data() : nullptr,
+                        array_repl.offset},
+        mask_bitmap, mask_values, out_bitmap, out_values, out_offset);
+  } else {
+    const Scalar& scalar_repl = *replacements.scalar();
+    ReplaceWithArrayMaskImpl<Functor>(array, mask, scalar_repl, replacements_bitmap,
+                                      CopyScalarBitmap{scalar_repl.is_valid}, mask_bitmap,
+                                      mask_values, out_bitmap, out_values, out_offset);
+  }
+
+  if (mask.MayHaveNulls()) {
+    arrow::internal::BitmapAnd(out_bitmap, out_offset, mask.buffers[0]->data(),
+                               mask.offset, array.length, out_offset, out_bitmap);
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct ReplaceWithMask {};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_number<Type>> {
+  using T = typename TypeTraits<Type>::CType;
+
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * sizeof(T));
+    std::memcpy(out + (out_offset * sizeof(T)), in_arr, length * sizeof(T));
+  }
+
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    T* begin = reinterpret_cast<T*>(out + (out_offset * sizeof(T)));
+    T* end = begin + length;
+    std::fill(begin, end, UnboxScalar<Type>::Unbox(in));
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_boolean<Type>> {
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const auto in_arr = in.GetValues<uint8_t>(1, /*absolute_offset=*/0);
+    arrow::internal::CopyBitmap(in_arr, in_offset + in.offset, length, out, out_offset);
+  }
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    BitUtil::SetBitsTo(out, out_offset, length, in.is_valid);
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_same<Type, FixedSizeBinaryType>> {
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width);
+    std::memcpy(begin, in_arr, length * width);
+  }
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(in);
+    // Null scalar may have null value buffer
+    if (!scalar.value) return;
+    const Buffer& buffer = *scalar.value;
+    const uint8_t* value = buffer.data();
+    DCHECK_GE(buffer.size(), width);
+    for (int i = 0; i < length; i++) {
+      std::memcpy(begin, value, width);
+      begin += width;
+    }
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_decimal<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width);
+    std::memcpy(begin, in_arr, length * width);
+  }
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto& scalar = checked_cast<const ScalarType&>(in);
+    const auto value = scalar.value.ToBytes();
+    for (int i = 0; i < length; i++) {
+      std::memcpy(begin, value.data(), width);
+      begin += width;
+    }
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_null<Type>> {
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    *output = array;
+    return Status::OK();
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    *output = array;
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    if (!mask.is_valid) {
+      // Output = null
+      ARROW_ASSIGN_OR_RAISE(
+          auto replacement_array,
+          MakeArrayOfNull(array.type, array.length, ctx->memory_pool()));
+      *output = *replacement_array->data();
+    } else if (mask.value) {
+      // Output = replacement
+      if (replacements.is_scalar()) {
+        ARROW_ASSIGN_OR_RAISE(auto replacement_array,
+                              MakeArrayFromScalar(*replacements.scalar(), array.length,
+                                                  ctx->memory_pool()));
+        *output = *replacement_array->data();
+      } else {
+        const ArrayData& replacement_array = *replacements.array();
+        if (replacement_array.length < array.length) {
+          return ReplacementArrayTooShort(array.length, replacement_array.length);
+        }
+        *output = replacement_array;
+        output->length = array.length;
+      }
+    } else {
+      // Output = input
+      *output = array;
+    }
+    return Status::OK();
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    BuilderType builder(array.type, ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(array.length));
+    RETURN_NOT_OK(builder.ReserveData(array.buffers[2]->size()));
+    int64_t source_offset = 0;
+    int64_t replacements_offset = 0;
+    RETURN_NOT_OK(VisitArrayDataInline<BooleanType>(
+        mask,
+        [&](bool replace) {
+          if (replace && replacements.is_scalar()) {
+            const Scalar& scalar = *replacements.scalar();
+            if (scalar.is_valid) {
+              RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(scalar)));
+            } else {
+              RETURN_NOT_OK(builder.AppendNull());
+            }
+          } else {
+            const ArrayData& source = replace ? *replacements.array() : array;
+            const int64_t offset = replace ? replacements_offset++ : source_offset;
+            if (!source.MayHaveNulls() ||
+                BitUtil::GetBit(source.buffers[0]->data(), source.offset + offset)) {
+              const uint8_t* data = source.buffers[2]->data();
+              const offset_type* offsets = source.GetValues<offset_type>(1);
+              const offset_type offset0 = offsets[offset];
+              const offset_type offset1 = offsets[offset + 1];
+              RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0));
+            } else {
+              RETURN_NOT_OK(builder.AppendNull());
+            }
+          }
+          source_offset++;
+          return Status::OK();
+        },
+        [&]() {
+          RETURN_NOT_OK(builder.AppendNull());
+          source_offset++;
+          return Status::OK();
+        }));
+    std::shared_ptr<Array> temp_output;
+    RETURN_NOT_OK(builder.Finish(&temp_output));
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = array.type;
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMaskFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ArrayData& array = *batch[0].array();
+    const Datum& replacements = batch[2];
+    ArrayData* output = out->array().get();
+    output->length = array.length;
+
+    // Needed for FixedSizeBinary/parameterized types
+    if (!array.type->Equals(*replacements.type(), /*check_metadata=*/false)) {
+      return Status::Invalid("Replacements must be of same type (expected ",
+                             array.type->ToString(), " but got ",
+                             replacements.type()->ToString(), ")");
+    }
+
+    if (!replacements.is_array() && !replacements.is_scalar()) {
+      return Status::Invalid("Replacements must be array or scalar");
+    }
+
+    if (batch[1].is_scalar()) {
+      return ReplaceWithMask<Type>::ExecScalarMask(
+          ctx, array, batch[1].scalar_as<BooleanScalar>(), replacements, output);
+    }
+    const ArrayData& mask = *batch[1].array();
+    if (array.length != mask.length) {
+      return Status::Invalid("Mask must be of same length as array (expected ",
+                             array.length, " items but got ", mask.length, " items)");
+    }
+    return ReplaceWithMask<Type>::ExecArrayMask(ctx, array, mask, replacements, output);
+  }
+};
+
+}  // namespace
+
+const FunctionDoc replace_with_mask_doc(
+    "Replace items using a mask and replacement values",
+    ("Given an array and a Boolean mask (either scalar or of equal length), "
+     "along with replacement values (either scalar or array), "
+     "each element of the array for which the corresponding mask element is "
+     "true will be replaced by the next value from the replacements, "
+     "or with null if the mask is null. "
+     "Hence, for replacement arrays, len(replacements) == sum(mask == true)."),
+    {"values", "mask", "replacements"});
+
+void RegisterVectorReplace(FunctionRegistry* registry) {
+  auto func = std::make_shared<VectorFunction>("replace_with_mask", Arity::Ternary(),
+                                               &replace_with_mask_doc);
+  auto add_kernel = [&](detail::GetTypeId get_id, ArrayKernelExec exec) {
+    VectorKernel kernel;
+    kernel.can_execute_chunkwise = false;
+    if (is_fixed_width(get_id.id)) {
+      kernel.null_handling = NullHandling::type::COMPUTED_PREALLOCATE;
+    } else {
+      kernel.can_write_into_slices = false;
+      kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    }
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    kernel.signature = KernelSignature::Make(
+        {InputType::Array(get_id.id), InputType(boolean()), InputType(get_id.id)},
+        OutputType(FirstType));
+    kernel.exec = std::move(exec);
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  };
+  auto add_primitive_kernel = [&](detail::GetTypeId get_id) {
+    add_kernel(get_id, GenerateTypeAgnosticPrimitive<ReplaceWithMaskFunctor>(get_id));
+  };
+  for (const auto& ty : NumericTypes()) {
+    add_primitive_kernel(ty);
+  }
+  for (const auto& ty : TemporalTypes()) {
+    add_primitive_kernel(ty);
+  }
+  for (const auto& ty : IntervalTypes()) {
+    add_primitive_kernel(ty);
+  }
+  add_primitive_kernel(null());
+  add_primitive_kernel(boolean());
+  add_kernel(Type::FIXED_SIZE_BINARY, ReplaceWithMaskFunctor<FixedSizeBinaryType>::Exec);
+  add_kernel(Type::DECIMAL128, ReplaceWithMaskFunctor<Decimal128Type>::Exec);
+  add_kernel(Type::DECIMAL256, ReplaceWithMaskFunctor<Decimal256Type>::Exec);
+  for (const auto& ty : BaseBinaryTypes()) {
+    add_kernel(ty->id(), GenerateTypeAgnosticVarBinaryBase<ReplaceWithMaskFunctor>(*ty));
+  }
+  // TODO: list types
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+
+  // TODO(ARROW-9431): "replace_with_indices"
+}
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
new file mode 100644
index 00000000000..719969d46ea
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <benchmark/benchmark.h>
+
+#include "arrow/array.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+
+#include "arrow/compute/api_vector.h"
+
+namespace arrow {
+namespace compute {
+
+using ::arrow::internal::checked_pointer_cast;
+
+static constexpr random::SeedType kRandomSeed = 0xabcdef;
+static constexpr random::SeedType kLongLength = 16384;
+
+static std::shared_ptr<Array> MakeReplacements(random::RandomArrayGenerator* generator,
+                                               const BooleanArray& mask) {
+  int64_t count = 0;
+  for (int64_t i = 0; i < mask.length(); i++) {
+    count += mask.Value(i) && mask.IsValid(i);
+  }
+  return generator->Int64(count, /*min=*/-65536, /*max=*/65536, /*null_probability=*/0.1);
+}
+
+static void ReplaceWithMaskLowSelectivityBench(
+    benchmark::State& state) {  // NOLINT non-const reference
+  random::RandomArrayGenerator generator(kRandomSeed);
+  const int64_t len = state.range(0);
+  const int64_t offset = state.range(1);
+
+  auto values =
+      generator.Int64(len, /*min=*/-65536, /*max=*/65536, /*null_probability=*/0.1)
+          ->Slice(offset);
+  auto mask = checked_pointer_cast<BooleanArray>(
+      generator.Boolean(len, /*true_probability=*/0.1, /*null_probability=*/0.1)
+          ->Slice(offset));
+  auto replacements = MakeReplacements(&generator, *mask);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(ReplaceWithMask(values, mask, replacements));
+  }
+  state.SetBytesProcessed(state.iterations() * (len - offset) * 8);
+}
+
+static void ReplaceWithMaskHighSelectivityBench(
+    benchmark::State& state) {  // NOLINT non-const reference
+  random::RandomArrayGenerator generator(kRandomSeed);
+  const int64_t len = state.range(0);
+  const int64_t offset = state.range(1);
+
+  auto values =
+      generator.Int64(len, /*min=*/-65536, /*max=*/65536, /*null_probability=*/0.1)
+          ->Slice(offset);
+  auto mask = checked_pointer_cast<BooleanArray>(
+      generator.Boolean(len, /*true_probability=*/0.9, /*null_probability=*/0.1)
+          ->Slice(offset));
+  auto replacements = MakeReplacements(&generator, *mask);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(ReplaceWithMask(values, mask, replacements));
+  }
+  state.SetBytesProcessed(state.iterations() * (len - offset) * 8);
+}
+
+BENCHMARK(ReplaceWithMaskLowSelectivityBench)->Args({kLongLength, 0});
+BENCHMARK(ReplaceWithMaskLowSelectivityBench)->Args({kLongLength, 99});
+BENCHMARK(ReplaceWithMaskHighSelectivityBench)->Args({kLongLength, 0});
+BENCHMARK(ReplaceWithMaskHighSelectivityBench)->Args({kLongLength, 99});
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_test.cc b/cpp/src/arrow/compute/kernels/vector_replace_test.cc
new file mode 100644
index 00000000000..48f253e7ca9
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_replace_test.cc
@@ -0,0 +1,677 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/make_unique.h"
+
+namespace arrow {
+namespace compute {
+
+using arrow::internal::checked_pointer_cast;
+
+template <typename T>
+class TestReplaceKernel : public ::testing::Test {
+ protected:
+  virtual std::shared_ptr<DataType> type() = 0;
+
+  using ReplaceFunction = std::function<Result<Datum>(const Datum&, const Datum&,
+                                                      const Datum&, ExecContext*)>;
+
+  void SetUp() override { equal_options_ = equal_options_.nans_equal(true); }
+
+  Datum mask_scalar(bool value) { return Datum(std::make_shared<BooleanScalar>(value)); }
+
+  Datum null_mask_scalar() {
+    auto scalar = std::make_shared<BooleanScalar>(true);
+    scalar->is_valid = false;
+    return Datum(std::move(scalar));
+  }
+
+  Datum scalar(const std::string& json) { return ScalarFromJSON(type(), json); }
+
+  std::shared_ptr<Array> array(const std::string& value) {
+    return ArrayFromJSON(type(), value);
+  }
+
+  std::shared_ptr<Array> mask(const std::string& value) {
+    return ArrayFromJSON(boolean(), value);
+  }
+
+  Status AssertRaises(ReplaceFunction func, const std::shared_ptr<Array>& array,
+                      const Datum& mask, const std::shared_ptr<Array>& replacements) {
+    auto result = func(array, mask, replacements, nullptr);
+    EXPECT_FALSE(result.ok());
+    return result.status();
+  }
+
+  void Assert(ReplaceFunction func, const std::shared_ptr<Array>& array,
+              const Datum& mask, Datum replacements,
+              const std::shared_ptr<Array>& expected) {
+    SCOPED_TRACE("Replacements: " + (replacements.is_array()
+                                         ? replacements.make_array()->ToString()
+                                         : replacements.scalar()->ToString()));
+    SCOPED_TRACE("Mask: " + (mask.is_array() ? mask.make_array()->ToString()
+                                             : mask.scalar()->ToString()));
+    SCOPED_TRACE("Array: " + array->ToString());
+
+    ASSERT_OK_AND_ASSIGN(auto actual, func(array, mask, replacements, nullptr));
+    ASSERT_TRUE(actual.is_array());
+    ASSERT_OK(actual.make_array()->ValidateFull());
+
+    AssertArraysApproxEqual(*expected, *actual.make_array(), /*verbose=*/true,
+                            equal_options_);
+  }
+
+  std::shared_ptr<Array> NaiveImpl(
+      const typename TypeTraits<T>::ArrayType& array, const BooleanArray& mask,
+      const typename TypeTraits<T>::ArrayType& replacements) {
+    auto length = array.length();
+    auto builder = arrow::internal::make_unique<typename TypeTraits<T>::BuilderType>(
+        default_type_instance<T>(), default_memory_pool());
+    int64_t replacement_offset = 0;
+    for (int64_t i = 0; i < length; ++i) {
+      if (mask.IsValid(i)) {
+        if (mask.Value(i)) {
+          if (replacements.IsValid(replacement_offset)) {
+            ARROW_EXPECT_OK(builder->Append(replacements.Value(replacement_offset++)));
+          } else {
+            ARROW_EXPECT_OK(builder->AppendNull());
+            replacement_offset++;
+          }
+        } else {
+          if (array.IsValid(i)) {
+            ARROW_EXPECT_OK(builder->Append(array.Value(i)));
+          } else {
+            ARROW_EXPECT_OK(builder->AppendNull());
+          }
+        }
+      } else {
+        ARROW_EXPECT_OK(builder->AppendNull());
+      }
+    }
+    EXPECT_OK_AND_ASSIGN(auto expected, builder->Finish());
+    return expected;
+  }
+
+  EqualOptions equal_options_ = EqualOptions::Defaults();
+};
+
+template <typename T>
+class TestReplaceNumeric : public TestReplaceKernel<T> {
+ protected:
+  std::shared_ptr<DataType> type() override { return default_type_instance<T>(); }
+};
+
+class TestReplaceBoolean : public TestReplaceKernel<BooleanType> {
+ protected:
+  std::shared_ptr<DataType> type() override {
+    return TypeTraits<BooleanType>::type_singleton();
+  }
+};
+
+class TestReplaceFixedSizeBinary : public TestReplaceKernel<FixedSizeBinaryType> {
+ protected:
+  std::shared_ptr<DataType> type() override { return fixed_size_binary(3); }
+};
+
+template <typename T>
+class TestReplaceDecimal : public TestReplaceKernel<T> {
+ protected:
+  std::shared_ptr<DataType> type() override { return default_type_instance<T>(); }
+};
+
+class TestReplaceDayTimeInterval : public TestReplaceKernel<DayTimeIntervalType> {
+ protected:
+  std::shared_ptr<DataType> type() override {
+    return TypeTraits<DayTimeIntervalType>::type_singleton();
+  }
+};
+
+template <typename T>
+class TestReplaceBinary : public TestReplaceKernel<T> {
+ protected:
+  std::shared_ptr<DataType> type() override { return default_type_instance<T>(); }
+};
+
+using NumericBasedTypes =
+    ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
+                     Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type,
+                     Time32Type, Time64Type, TimestampType, MonthIntervalType>;
+
+TYPED_TEST_SUITE(TestReplaceNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestReplaceDecimal, DecimalArrowTypes);
+TYPED_TEST_SUITE(TestReplaceBinary, BinaryTypes);
+
+TYPED_TEST(TestReplaceNumeric, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[1]"));
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->mask_scalar(true),
+               this->array("[0]"), this->array("[0]"));
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->mask_scalar(true),
+               this->array("[2, 0]"), this->array("[2]"));
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[0, 0]"), this->mask_scalar(false),
+               this->scalar("1"), this->array("[0, 0]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 0]"), this->mask_scalar(true),
+               this->scalar("1"), this->array("[1, 1]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 0]"), this->mask_scalar(true),
+               this->scalar("null"), this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[0, 1, 2, 3]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3]"),
+               this->mask("[true, true, true, true]"), this->array("[10, 11, 12, 13]"),
+               this->array("[10, 11, 12, 13]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, null]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[0, 1, 2, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, null]"),
+               this->mask("[true, true, true, true]"), this->array("[10, 11, 12, 13]"),
+               this->array("[10, 11, 12, 13]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, null]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3, 4, 5]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[10, null]"), this->array("[10, null, 2, 3, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[10, null]"),
+               this->array("[10, null, null, null, null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->scalar("1"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1]"), this->mask("[true, true]"),
+               this->scalar("10"), this->array("[10, 10]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1]"), this->mask("[true, true]"),
+               this->scalar("null"), this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2]"),
+               this->mask("[true, false, null]"), this->scalar("10"),
+               this->array("[10, 1, null]"));
+}
+
+TYPED_TEST(TestReplaceNumeric, ReplaceWithMaskRandom) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  using CType = typename TypeTraits<TypeParam>::CType;
+  auto ty = this->type();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  const int64_t length = 1023;
+  std::vector<std::string> values = {"0.01", "0"};
+  // Clamp the range because date/time types don't print well with extreme values
+  values.push_back(std::to_string(static_cast<CType>(std::min<double>(
+      16384.0, static_cast<double>(std::numeric_limits<CType>::max())))));
+  auto options = key_value_metadata({"null_probability", "min", "max"}, values);
+  auto array =
+      checked_pointer_cast<ArrayType>(rand.ArrayOf(*field("a", ty, options), length));
+  auto mask = checked_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), length, /*null_probability=*/0.01));
+  const int64_t num_replacements = std::count_if(
+      mask->begin(), mask->end(),
+      [](util::optional<bool> value) { return value.has_value() && *value; });
+  auto replacements = checked_pointer_cast<ArrayType>(
+      rand.ArrayOf(*field("a", ty, options), num_replacements));
+  auto expected = this->NaiveImpl(*array, *mask, *replacements);
+
+  this->Assert(ReplaceWithMask, array, mask, replacements, expected);
+  for (int64_t slice = 1; slice <= 16; slice++) {
+    auto sliced_array = checked_pointer_cast<ArrayType>(array->Slice(slice, 15));
+    auto sliced_mask = checked_pointer_cast<BooleanArray>(mask->Slice(slice, 15));
+    auto new_expected = this->NaiveImpl(*sliced_array, *sliced_mask, *replacements);
+    this->Assert(ReplaceWithMask, sliced_array, sliced_mask, replacements, new_expected);
+  }
+}
+
+TYPED_TEST(TestReplaceNumeric, ReplaceWithMaskErrors) {
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 2 "
+                           "items but got 1 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[1, 2]"),
+                         this->mask("[true, true]"), this->array("[0]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 1 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[1, 2]"),
+                         this->mask("[true, null]"), this->array("[]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Mask must be of same length as array (expected 2 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[1, 2]"), this->mask("[]"),
+                         this->array("[]")));
+}
+
+TEST_F(TestReplaceBoolean, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array("[true]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[true]"));
+  this->Assert(ReplaceWithMask, this->array("[true]"), this->mask_scalar(true),
+               this->array("[false]"), this->array("[false]"));
+  this->Assert(ReplaceWithMask, this->array("[true]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask_scalar(false),
+               this->scalar("true"), this->array("[false, false]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask_scalar(true),
+               this->scalar("true"), this->array("[true, true]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask_scalar(true),
+               this->scalar("null"), this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[true, true, true, true]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[false, false, false, false]"),
+               this->array("[false, false, false, false]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, null]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[true, true, true, null]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, null]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[false, false, false, false]"),
+               this->array("[false, false, false, false]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, null]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true, true, true]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[false, null]"),
+               this->array("[false, null, true, true, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[false, null]"),
+               this->array("[false, null, null, null, null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->scalar("true"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask("[true, true]"),
+               this->scalar("true"), this->array("[true, true]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask("[true, true]"),
+               this->scalar("null"), this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false, false]"),
+               this->mask("[true, false, null]"), this->scalar("true"),
+               this->array("[true, false, null]"));
+}
+
+TEST_F(TestReplaceBoolean, ReplaceWithMaskErrors) {
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 2 "
+                           "items but got 1 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[true, true]"),
+                         this->mask("[true, true]"), this->array("[false]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 1 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[true, true]"),
+                         this->mask("[true, null]"), this->array("[]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Mask must be of same length as array (expected 2 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[true, true]"), this->mask("[]"),
+                         this->array("[]")));
+}
+
+TEST_F(TestReplaceFixedSizeBinary, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(false),
+               this->array("[]"), this->array(R"(["foo"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(true),
+               this->array(R"(["bar"])"), this->array(R"(["bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"),
+               this->mask_scalar(false), this->scalar(R"("baz")"),
+               this->array(R"(["foo", "bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar(R"("baz")"), this->array(R"(["baz", "baz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar("null"), this->array(R"([null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", "ddd"])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["aaa", "bbb", "ccc", "ddd"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", "ddd"])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", "ddd"])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", null])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["aaa", "bbb", "ccc", null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", null])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", null])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask,
+               this->array(R"(["aaa", "bbb", "ccc", "ddd", "eee", "fff"])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["ggg", null])"),
+               this->array(R"(["ggg", null, "ccc", "ddd", null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"([null, null, null, null, null, null])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["aaa", null])"),
+               this->array(R"(["aaa", null, null, null, null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar(R"("zzz")"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb"])"),
+               this->mask("[true, true]"), this->scalar(R"("zzz")"),
+               this->array(R"(["zzz", "zzz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb"])"),
+               this->mask("[true, true]"), this->scalar("null"),
+               this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc"])"),
+               this->mask("[true, false, null]"), this->scalar(R"("zzz")"),
+               this->array(R"(["zzz", "bbb", null])"));
+}
+
+TEST_F(TestReplaceFixedSizeBinary, ReplaceWithMaskErrors) {
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::AllOf(
+          ::testing::HasSubstr("Replacements must be of same type (expected "),
+          ::testing::HasSubstr(this->type()->ToString()),
+          ::testing::HasSubstr("but got fixed_size_binary[2]")),
+      this->AssertRaises(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+                         ArrayFromJSON(fixed_size_binary(2), "[]")));
+}
+
+TYPED_TEST(TestReplaceDecimal, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["1.00"])"), this->mask_scalar(false),
+               this->array("[]"), this->array(R"(["1.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["1.00"])"), this->mask_scalar(true),
+               this->array(R"(["0.00"])"), this->array(R"(["0.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["1.00"])"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "0.00"])"),
+               this->mask_scalar(false), this->scalar(R"("1.00")"),
+               this->array(R"(["0.00", "0.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "0.00"])"),
+               this->mask_scalar(true), this->scalar(R"("1.00")"),
+               this->array(R"(["1.00", "1.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "0.00"])"),
+               this->mask_scalar(true), this->scalar("null"),
+               this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", "3.00"])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["0.00", "1.00", "2.00", "3.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", "3.00"])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", "3.00"])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", null])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["0.00", "1.00", "2.00", null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", null])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", null])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask,
+               this->array(R"(["0.00", "1.00", "2.00", "3.00", "4.00", "5.00"])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["10.00", null])"),
+               this->array(R"(["10.00", null, "2.00", "3.00", null, null])"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["10.00", null])"),
+               this->array(R"(["10.00", null, null, null, null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar(R"("1.00")"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00"])"),
+               this->mask("[true, true]"), this->scalar(R"("10.00")"),
+               this->array(R"(["10.00", "10.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00"])"),
+               this->mask("[true, true]"), this->scalar("null"),
+               this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00"])"),
+               this->mask("[true, false, null]"), this->scalar(R"("10.00")"),
+               this->array(R"(["10.00", "1.00", null])"));
+}
+
+TEST_F(TestReplaceDayTimeInterval, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array("[[1, 2]]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[[1, 2]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2]]"), this->mask_scalar(true),
+               this->array("[[3, 4]]"), this->array("[[3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2]]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"), this->mask_scalar(false),
+               this->scalar("[7, 8]"), this->array("[[1, 2], [3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"), this->mask_scalar(true),
+               this->scalar("[7, 8]"), this->array("[[7, 8], [7, 8]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"), this->mask_scalar(true),
+               this->scalar("null"), this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], null]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[[1, 2], [1, 2], [1, 2], null]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], null]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], null]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(
+      ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2], [1, 2], [1, 2]]"),
+      this->mask("[true, true, false, false, null, null]"), this->array("[[3, 4], null]"),
+      this->array("[[3, 4], null, [1, 2], [1, 2], null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[[3, 4], null]"),
+               this->array("[[3, 4], null, null, null, null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar("[7, 8]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"),
+               this->mask("[true, true]"), this->scalar("[7, 8]"),
+               this->array("[[7, 8], [7, 8]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"),
+               this->mask("[true, true]"), this->scalar("null"),
+               this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4], [5, 6]]"),
+               this->mask("[true, false, null]"), this->scalar("[7, 8]"),
+               this->array("[[7, 8], [3, 4], null]"));
+}
+
+TYPED_TEST(TestReplaceBinary, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(false),
+               this->array("[]"), this->array(R"(["foo"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(true),
+               this->array(R"(["bar"])"), this->array(R"(["bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"),
+               this->mask_scalar(false), this->scalar(R"("baz")"),
+               this->array(R"(["foo", "bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar(R"("baz")"), this->array(R"(["baz", "baz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar("null"), this->array(R"([null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", "dddd"])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["a", "bb", "ccc", "dddd"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", "dddd"])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", "dddd"])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", null])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["a", "bb", "ccc", null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", null])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", null])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask,
+               this->array(R"(["a", "bb", "ccc", "dddd", "eeeee", "f"])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["ggg", null])"),
+               this->array(R"(["ggg", null, "ccc", "dddd", null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"([null, null, null, null, null, null])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["a", null])"),
+               this->array(R"(["a", null, null, null, null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar(R"("zzz")"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb"])"), this->mask("[true, true]"),
+               this->scalar(R"("zzz")"), this->array(R"(["zzz", "zzz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb"])"), this->mask("[true, true]"),
+               this->scalar("null"), this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc"])"),
+               this->mask("[true, false, null]"), this->scalar(R"("zzz")"),
+               this->array(R"(["zzz", "bb", null])"));
+}
+
+TYPED_TEST(TestReplaceBinary, ReplaceWithMaskRandom) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  auto ty = this->type();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  const int64_t length = 1023;
+  auto options = key_value_metadata({{"null_probability", "0.01"}, {"max_length", "5"}});
+  auto array =
+      checked_pointer_cast<ArrayType>(rand.ArrayOf(*field("a", ty, options), length));
+  auto mask = checked_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), length, /*null_probability=*/0.01));
+  const int64_t num_replacements = std::count_if(
+      mask->begin(), mask->end(),
+      [](util::optional<bool> value) { return value.has_value() && *value; });
+  auto replacements = checked_pointer_cast<ArrayType>(
+      rand.ArrayOf(*field("a", ty, options), num_replacements));
+  auto expected = this->NaiveImpl(*array, *mask, *replacements);
+
+  this->Assert(ReplaceWithMask, array, mask, replacements, expected);
+  for (int64_t slice = 1; slice <= 16; slice++) {
+    auto sliced_array = checked_pointer_cast<ArrayType>(array->Slice(slice, 15));
+    auto sliced_mask = checked_pointer_cast<BooleanArray>(mask->Slice(slice, 15));
+    auto new_expected = this->NaiveImpl(*sliced_array, *sliced_mask, *replacements);
+    this->Assert(ReplaceWithMask, sliced_array, sliced_mask, replacements, new_expected);
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 1c96f7699c6..5845a7ee2d0 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -490,9 +490,9 @@ void TakeIndexDispatch(const PrimitiveArg& values, const PrimitiveArg& indices,
   }
 }
 
-void PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckIndexBounds(*batch[1].array(), batch[0].length()));
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
 
   PrimitiveArg values = GetPrimitiveArg(*batch[0].array());
@@ -504,23 +504,29 @@ void PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // allocating the validity bitmap altogether and save time and space. A
   // streamlined PrimitiveTakeImpl would need to be written that skips all
   // interactions with the output validity bitmap, though.
-  KERNEL_RETURN_IF_ERROR(ctx, PreallocateData(ctx, indices.length, values.bit_width,
-                                              /*allocate_validity=*/true, out_arr));
+  RETURN_NOT_OK(PreallocateData(ctx, indices.length, values.bit_width,
+                                /*allocate_validity=*/true, out_arr));
   switch (values.bit_width) {
     case 1:
-      return TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr);
+      TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr);
+      break;
     case 8:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr);
+      break;
     case 16:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr);
+      break;
     case 32:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr);
+      break;
     case 64:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr);
+      break;
     default:
       DCHECK(false) << "Invalid values byte width";
       break;
   }
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -777,7 +783,7 @@ inline void PrimitiveFilterImpl<BooleanType>::WriteNull() {
   BitUtil::ClearBit(out_data_, out_offset_ + out_position_++);
 }
 
-void PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   PrimitiveArg values = GetPrimitiveArg(*batch[0].array());
   PrimitiveArg filter = GetPrimitiveArg(*batch[1].array());
   FilterOptions::NullSelectionBehavior null_selection =
@@ -802,29 +808,30 @@ void PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // validity bitmap.
   bool allocate_validity = values.null_count != 0 || filter.null_count != 0;
 
-  KERNEL_RETURN_IF_ERROR(ctx, PreallocateData(ctx, output_length, values.bit_width,
-                                              allocate_validity, out_arr));
+  RETURN_NOT_OK(
+      PreallocateData(ctx, output_length, values.bit_width, allocate_validity, out_arr));
 
   switch (values.bit_width) {
     case 1:
-      return PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 8:
-      return PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 16:
-      return PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 32:
-      return PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 64:
-      return PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     default:
       DCHECK(false) << "Invalid values bit width";
       break;
   }
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -1072,7 +1079,7 @@ Status BinaryFilterImpl(KernelContext* ctx, const ArrayData& values,
 #undef APPEND_RAW_DATA
 #undef APPEND_SINGLE_VALUE
 
-void BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   FilterOptions::NullSelectionBehavior null_selection =
       FilterState::Get(ctx).null_selection_behavior;
 
@@ -1094,97 +1101,100 @@ void BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (values.null_count == 0 && filter.null_count == 0) {
     // Faster no-nulls case
     if (is_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterNonNullImpl<BinaryType>(ctx, values, filter, output_length,
-                                                   null_selection, out_arr));
+      RETURN_NOT_OK(BinaryFilterNonNullImpl<BinaryType>(
+          ctx, values, filter, output_length, null_selection, out_arr));
     } else if (is_large_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterNonNullImpl<LargeBinaryType>(
-                   ctx, values, filter, output_length, null_selection, out_arr));
+      RETURN_NOT_OK(BinaryFilterNonNullImpl<LargeBinaryType>(
+          ctx, values, filter, output_length, null_selection, out_arr));
     } else {
       DCHECK(false);
     }
   } else {
     // Output may have nulls
-    KERNEL_RETURN_IF_ERROR(
-        ctx, ctx->AllocateBitmap(output_length).Value(&out_arr->buffers[0]));
+    RETURN_NOT_OK(ctx->AllocateBitmap(output_length).Value(&out_arr->buffers[0]));
     if (is_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterImpl<BinaryType>(ctx, values, filter, output_length,
-                                            null_selection, out_arr));
-    } else if (is_large_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterImpl<LargeBinaryType>(ctx, values, filter, output_length,
+      RETURN_NOT_OK(BinaryFilterImpl<BinaryType>(ctx, values, filter, output_length,
                                                  null_selection, out_arr));
+    } else if (is_large_binary_like(type_id)) {
+      RETURN_NOT_OK(BinaryFilterImpl<LargeBinaryType>(ctx, values, filter, output_length,
+                                                      null_selection, out_arr));
     } else {
       DCHECK(false);
     }
   }
+
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Null take and filter
 
-void NullTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status NullTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckIndexBounds(*batch[1].array(), batch[0].length()));
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
   // batch.length doesn't take into account the take indices
   auto new_length = batch[1].array()->length;
   out->value = std::make_shared<NullArray>(new_length)->data();
+  return Status::OK();
 }
 
-void NullFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status NullFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   int64_t output_length = GetFilterOutputSize(
       *batch[1].array(), FilterState::Get(ctx).null_selection_behavior);
   out->value = std::make_shared<NullArray>(output_length)->data();
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Dictionary take and filter
 
-void DictionaryTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status DictionaryTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DictionaryArray values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Take(Datum(values.indices()), batch[1], TakeState::Get(ctx),
-                                   ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(
+      Take(Datum(values.indices()), batch[1], TakeState::Get(ctx), ctx->exec_context())
+          .Value(&result));
   DictionaryArray taken_values(values.type(), result.make_array(), values.dictionary());
   out->value = taken_values.data();
+  return Status::OK();
 }
 
-void DictionaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status DictionaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DictionaryArray dict_values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Filter(Datum(dict_values.indices()), batch[1].array(),
-                                     FilterState::Get(ctx), ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(Filter(Datum(dict_values.indices()), batch[1].array(),
+                       FilterState::Get(ctx), ctx->exec_context())
+                    .Value(&result));
   DictionaryArray filtered_values(dict_values.type(), result.make_array(),
                                   dict_values.dictionary());
   out->value = filtered_values.data();
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Extension take and filter
 
-void ExtensionTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExtensionTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Take(Datum(values.storage()), batch[1], TakeState::Get(ctx),
-                                   ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(
+      Take(Datum(values.storage()), batch[1], TakeState::Get(ctx), ctx->exec_context())
+          .Value(&result));
   ExtensionArray taken_values(values.type(), result.make_array());
   out->value = taken_values.data();
+  return Status::OK();
 }
 
-void ExtensionFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExtensionFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray ext_values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Filter(Datum(ext_values.storage()), batch[1].array(),
-                                     FilterState::Get(ctx), ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(Filter(Datum(ext_values.storage()), batch[1].array(),
+                       FilterState::Get(ctx), ctx->exec_context())
+                    .Value(&result));
   ExtensionArray filtered_values(ext_values.type(), result.make_array());
   out->value = filtered_values.data();
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -1658,6 +1668,81 @@ struct ListImpl : public Selection<ListImpl<Type>, Type> {
   }
 };
 
+struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> {
+  using Base = Selection<DenseUnionImpl, DenseUnionType>;
+  LIFT_BASE_MEMBERS();
+
+  TypedBufferBuilder<int32_t> value_offset_buffer_builder_;
+  TypedBufferBuilder<int8_t> child_id_buffer_builder_;
+  std::vector<int8_t> type_codes_;
+  std::vector<Int32Builder> child_indices_builders_;
+
+  DenseUnionImpl(KernelContext* ctx, const ExecBatch& batch, int64_t output_length,
+                 Datum* out)
+      : Base(ctx, batch, output_length, out),
+        value_offset_buffer_builder_(ctx->memory_pool()),
+        child_id_buffer_builder_(ctx->memory_pool()),
+        type_codes_(checked_cast<const UnionType&>(*this->values->type).type_codes()),
+        child_indices_builders_(type_codes_.size()) {
+    for (auto& child_indices_builder : child_indices_builders_) {
+      child_indices_builder = Int32Builder(ctx->memory_pool());
+    }
+  }
+
+  template <typename Adapter>
+  Status GenerateOutput() {
+    DenseUnionArray typed_values(this->values);
+    Adapter adapter(this);
+    RETURN_NOT_OK(adapter.Generate(
+        [&](int64_t index) {
+          int8_t child_id = typed_values.child_id(index);
+          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]);
+          int32_t value_offset = typed_values.value_offset(index);
+          value_offset_buffer_builder_.UnsafeAppend(
+              static_cast<int32_t>(child_indices_builders_[child_id].length()));
+          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1));
+          child_indices_builders_[child_id].UnsafeAppend(value_offset);
+          return Status::OK();
+        },
+        [&]() {
+          int8_t child_id = 0;
+          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]);
+          value_offset_buffer_builder_.UnsafeAppend(
+              static_cast<int32_t>(child_indices_builders_[child_id].length()));
+          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1));
+          child_indices_builders_[child_id].UnsafeAppendNull();
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  Status Init() override {
+    RETURN_NOT_OK(child_id_buffer_builder_.Reserve(output_length));
+    RETURN_NOT_OK(value_offset_buffer_builder_.Reserve(output_length));
+    return Status::OK();
+  }
+
+  Status Finish() override {
+    ARROW_ASSIGN_OR_RAISE(auto child_ids_buffer, child_id_buffer_builder_.Finish());
+    ARROW_ASSIGN_OR_RAISE(auto value_offsets_buffer,
+                          value_offset_buffer_builder_.Finish());
+    DenseUnionArray typed_values(this->values);
+    auto num_fields = typed_values.num_fields();
+    auto num_rows = child_ids_buffer->size();
+    BufferVector buffers{nullptr, std::move(child_ids_buffer),
+                         std::move(value_offsets_buffer)};
+    *out = ArrayData(typed_values.type(), num_rows, std::move(buffers), 0);
+    for (auto i = 0; i < num_fields; i++) {
+      ARROW_ASSIGN_OR_RAISE(auto child_indices_array,
+                            child_indices_builders_[i].Finish());
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> child_array,
+                            Take(*typed_values.field(i), *child_indices_array));
+      out->child_data.push_back(child_array->data());
+    }
+    return Status::OK();
+  }
+};
+
 struct FSLImpl : public Selection<FSLImpl, FixedSizeListType> {
   Int64Builder child_index_builder;
 
@@ -1742,20 +1827,20 @@ struct StructImpl : public Selection<StructImpl, StructType> {
   }
 };
 
-void StructFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status StructFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // Transform filter to selection indices and then use Take.
   std::shared_ptr<ArrayData> indices;
-  KERNEL_RETURN_IF_ERROR(
-      ctx,
-      GetTakeIndices(*batch[1].array(), FilterState::Get(ctx).null_selection_behavior,
-                     ctx->memory_pool())
-          .Value(&indices));
+  RETURN_NOT_OK(GetTakeIndices(*batch[1].array(),
+                               FilterState::Get(ctx).null_selection_behavior,
+                               ctx->memory_pool())
+                    .Value(&indices));
 
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Take(batch[0], Datum(indices), TakeOptions::NoBoundsCheck(),
-                                   ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(
+      Take(batch[0], Datum(indices), TakeOptions::NoBoundsCheck(), ctx->exec_context())
+          .Value(&result));
   out->value = result.array();
+  return Status::OK();
 }
 
 #undef LIFT_BASE_MEMBERS
@@ -1783,7 +1868,7 @@ Result<std::shared_ptr<RecordBatch>> FilterRecordBatch(const RecordBatch& batch,
                                           TakeOptions::NoBoundsCheck(), ctx));
     columns[i] = out.make_array();
   }
-  return RecordBatch::Make(batch.schema(), indices->length, columns);
+  return RecordBatch::Make(batch.schema(), indices->length, std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> FilterTable(const Table& table, const Datum& filter,
@@ -1976,7 +2061,7 @@ Result<std::shared_ptr<RecordBatch>> TakeRA(const RecordBatch& batch,
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeAA(*batch.column(j), indices, options, ctx));
   }
-  return RecordBatch::Make(batch.schema(), nrows, columns);
+  return RecordBatch::Make(batch.schema(), nrows, std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> TakeTA(const Table& table, const Array& indices,
@@ -1987,7 +2072,7 @@ Result<std::shared_ptr<Table>> TakeTA(const Table& table, const Array& indices,
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeCA(*table.column(j), indices, options, ctx));
   }
-  return Table::Make(table.schema(), columns);
+  return Table::Make(table.schema(), std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> TakeTC(const Table& table, const ChunkedArray& indices,
@@ -1997,7 +2082,7 @@ Result<std::shared_ptr<Table>> TakeTC(const Table& table, const ChunkedArray& in
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeCC(*table.column(j), indices, options, ctx));
   }
-  return Table::Make(table.schema(), columns);
+  return Table::Make(table.schema(), std::move(columns));
 }
 
 static auto kDefaultTakeOptions = TakeOptions::Defaults();
@@ -2064,21 +2149,21 @@ class TakeMetaFunction : public MetaFunction {
 // ----------------------------------------------------------------------
 
 template <typename Impl>
-void FilterExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status FilterExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // TODO: where are the values and filter length equality checked?
   int64_t output_length = GetFilterOutputSize(
       *batch[1].array(), FilterState::Get(ctx).null_selection_behavior);
   Impl kernel(ctx, batch, output_length, out);
-  KERNEL_RETURN_IF_ERROR(ctx, kernel.ExecFilter());
+  return kernel.ExecFilter();
 }
 
 template <typename Impl>
-void TakeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status TakeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckIndexBounds(*batch[1].array(), batch[0].length()));
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
   Impl kernel(ctx, batch, /*output_length=*/batch[1].length(), out);
-  KERNEL_RETURN_IF_ERROR(ctx, kernel.ExecTake());
+  return kernel.ExecTake();
 }
 
 struct SelectionKernelDescr {
@@ -2131,6 +2216,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
       {InputType::Array(Type::LIST), FilterExec<ListImpl<ListType>>},
       {InputType::Array(Type::LARGE_LIST), FilterExec<ListImpl<LargeListType>>},
       {InputType::Array(Type::FIXED_SIZE_LIST), FilterExec<FSLImpl>},
+      {InputType::Array(Type::DENSE_UNION), FilterExec<DenseUnionImpl>},
       {InputType::Array(Type::STRUCT), StructFilter},
       // TODO: Reuse ListType kernel for MAP
       {InputType::Array(Type::MAP), FilterExec<ListImpl<MapType>>},
@@ -2160,6 +2246,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
       {InputType::Array(Type::LIST), TakeExec<ListImpl<ListType>>},
       {InputType::Array(Type::LARGE_LIST), TakeExec<ListImpl<LargeListType>>},
       {InputType::Array(Type::FIXED_SIZE_LIST), TakeExec<FSLImpl>},
+      {InputType::Array(Type::DENSE_UNION), TakeExec<DenseUnionImpl>},
       {InputType::Array(Type::STRUCT), TakeExec<StructImpl>},
       // TODO: Reuse ListType kernel for MAP
       {InputType::Array(Type::MAP), TakeExec<ListImpl<MapType>>},
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index cf52870ed89..2c8830e88c0 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -51,7 +51,7 @@ TEST(GetTakeIndices, Basics) {
     ASSERT_OK_AND_ASSIGN(auto indices,
                          internal::GetTakeIndices(*filter->data(), null_selection));
     auto indices_array = MakeArray(indices);
-    ASSERT_OK(indices_array->ValidateFull());
+    ValidateOutput(indices);
     AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
   };
 
@@ -73,13 +73,13 @@ TEST(GetTakeIndices, NullValidityBuffer) {
   ASSERT_OK_AND_ASSIGN(auto indices,
                        internal::GetTakeIndices(*filter.data(), FilterOptions::DROP));
   auto indices_array = MakeArray(indices);
-  ASSERT_OK(indices_array->ValidateFull());
+  ValidateOutput(indices);
   AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
 
   ASSERT_OK_AND_ASSIGN(
       indices, internal::GetTakeIndices(*filter.data(), FilterOptions::EMIT_NULL));
   indices_array = MakeArray(indices);
-  ASSERT_OK(indices_array->ValidateFull());
+  ValidateOutput(indices);
   AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
 }
 
@@ -93,7 +93,7 @@ void CheckGetTakeIndicesCase(const Array& untyped_filter) {
   // Verify DROP indices
   {
     IndexArrayType indices(drop_indices);
-    ASSERT_OK(indices.ValidateFull());
+    ValidateOutput(indices);
 
     int64_t out_position = 0;
     for (int64_t i = 0; i < filter.length(); ++i) {
@@ -116,7 +116,7 @@ void CheckGetTakeIndicesCase(const Array& untyped_filter) {
   // Verify EMIT_NULL indices
   {
     IndexArrayType indices(emit_indices);
-    ASSERT_OK(indices.ValidateFull());
+    ValidateOutput(indices);
 
     int64_t out_position = 0;
     for (int64_t i = 0; i < filter.length(); ++i) {
@@ -183,7 +183,7 @@ class TestFilterKernel : public ::testing::Test {
     // test with EMIT_NULL
     ASSERT_OK_AND_ASSIGN(Datum out_datum, Filter(values, filter, emit_null_));
     auto actual = out_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(*actual);
     AssertArraysEqual(*expected, *actual);
 
     // test with DROP using EMIT_NULL and a coalesced filter
@@ -192,7 +192,7 @@ class TestFilterKernel : public ::testing::Test {
     expected = out_datum.make_array();
     ASSERT_OK_AND_ASSIGN(out_datum, Filter(values, filter, drop_));
     actual = out_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(*actual);
     AssertArraysEqual(*expected, *actual);
   }
 
@@ -212,11 +212,11 @@ void ValidateFilter(const std::shared_ptr<Array>& values,
 
   ASSERT_OK_AND_ASSIGN(Datum out_datum, Filter(values, filter_boxed, emit_null));
   auto filtered_emit_null = out_datum.make_array();
-  ASSERT_OK(filtered_emit_null->ValidateFull());
+  ValidateOutput(*filtered_emit_null);
 
   ASSERT_OK_AND_ASSIGN(out_datum, Filter(values, filter_boxed, drop));
   auto filtered_drop = out_datum.make_array();
-  ASSERT_OK(filtered_drop->ValidateFull());
+  ValidateOutput(*filtered_drop);
 
   // Create the expected arrays using Take
   ASSERT_OK_AND_ASSIGN(
@@ -380,11 +380,12 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareScalarAndFilterRandomNumeric) {
     CType c_fifty = 50;
     auto fifty = std::make_shared<ScalarType>(c_fifty);
     for (auto op : {EQUAL, NOT_EQUAL, GREATER, LESS_EQUAL}) {
-      ASSERT_OK_AND_ASSIGN(Datum selection,
-                           Compare(array, Datum(fifty), CompareOptions(op)));
+      ASSERT_OK_AND_ASSIGN(
+          Datum selection,
+          CallFunction(CompareOperatorToFunctionName(op), {array, Datum(fifty)}));
       ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(array, selection));
       auto filtered_array = filtered.make_array();
-      ASSERT_OK(filtered_array->ValidateFull());
+      ValidateOutput(*filtered_array);
       auto expected =
           CompareAndFilter<TypeParam>(array->raw_values(), array->length(), c_fifty, op);
       ASSERT_ARRAYS_EQUAL(*filtered_array, *expected);
@@ -403,10 +404,11 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareArrayAndFilterRandomNumeric) {
     auto rhs = checked_pointer_cast<ArrayType>(
         rand.Numeric<TypeParam>(length, 0, 100, /*null_probability=*/0.0));
     for (auto op : {EQUAL, NOT_EQUAL, GREATER, LESS_EQUAL}) {
-      ASSERT_OK_AND_ASSIGN(Datum selection, Compare(lhs, rhs, CompareOptions(op)));
+      ASSERT_OK_AND_ASSIGN(Datum selection,
+                           CallFunction(CompareOperatorToFunctionName(op), {lhs, rhs}));
       ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(lhs, selection));
       auto filtered_array = filtered.make_array();
-      ASSERT_OK(filtered_array->ValidateFull());
+      ValidateOutput(*filtered_array);
       auto expected = CompareAndFilter<TypeParam>(lhs->raw_values(), lhs->length(),
                                                   rhs->raw_values(), op);
       ASSERT_ARRAYS_EQUAL(*filtered_array, *expected);
@@ -428,13 +430,13 @@ TYPED_TEST(TestFilterKernelWithNumeric, ScalarInRangeAndFilterRandomNumeric) {
     auto fifty = std::make_shared<ScalarType>(c_fifty);
     auto hundred = std::make_shared<ScalarType>(c_hundred);
     ASSERT_OK_AND_ASSIGN(Datum greater_than_fifty,
-                         Compare(array, Datum(fifty), CompareOptions(GREATER)));
+                         CallFunction("greater", {array, Datum(fifty)}));
     ASSERT_OK_AND_ASSIGN(Datum less_than_hundred,
-                         Compare(array, Datum(hundred), CompareOptions(LESS)));
+                         CallFunction("less", {array, Datum(hundred)}));
     ASSERT_OK_AND_ASSIGN(Datum selection, And(greater_than_fifty, less_than_hundred));
     ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(array, selection));
     auto filtered_array = filtered.make_array();
-    ASSERT_OK(filtered_array->ValidateFull());
+    ValidateOutput(*filtered_array);
     auto expected = CompareAndFilter<TypeParam>(
         array->raw_values(), array->length(),
         [&](CType e) { return (e > c_fifty) && (e < c_hundred); });
@@ -607,31 +609,31 @@ TEST_F(TestFilterKernelWithStruct, FilterStruct) {
 
 class TestFilterKernelWithUnion : public TestFilterKernel<UnionType> {};
 
-TEST_F(TestFilterKernelWithUnion, DISABLED_FilterUnion) {
-  for (auto union_ : UnionTypeFactories()) {
-    auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 5});
-    auto union_json = R"([
-      null,
+TEST_F(TestFilterKernelWithUnion, FilterUnion) {
+  auto union_type = dense_union({field("a", int32()), field("b", utf8())}, {2, 5});
+  auto union_json = R"([
+      [2, null],
       [2, 222],
       [5, "hello"],
       [5, "eh"],
-      null,
-      [2, 111]
+      [2, null],
+      [2, 111],
+      [5, null]
     ])";
-    this->AssertFilter(union_type, union_json, "[0, 0, 0, 0, 0, 0]", "[]");
-    this->AssertFilter(union_type, union_json, "[0, 1, 1, null, 0, 1]", R"([
+  this->AssertFilter(union_type, union_json, "[0, 0, 0, 0, 0, 0, 0]", "[]");
+  this->AssertFilter(union_type, union_json, "[0, 1, 1, null, 0, 1, 1]", R"([
       [2, 222],
       [5, "hello"],
-      null,
-      [2, 111]
+      [2, null],
+      [2, 111],
+      [5, null]
     ])");
-    this->AssertFilter(union_type, union_json, "[1, 0, 1, 0, 1, 0]", R"([
-      null,
+  this->AssertFilter(union_type, union_json, "[1, 0, 1, 0, 1, 0, 0]", R"([
+      [2, null],
       [5, "hello"],
-      null
+      [2, null]
     ])");
-    this->AssertFilter(union_type, union_json, "[1, 1, 1, 1, 1, 1]", union_json);
-  }
+  this->AssertFilter(union_type, union_json, "[1, 1, 1, 1, 1, 1, 1]", union_json);
 }
 
 class TestFilterKernelWithRecordBatch : public TestFilterKernel<RecordBatch> {
@@ -642,7 +644,7 @@ class TestFilterKernelWithRecordBatch : public TestFilterKernel<RecordBatch> {
     std::shared_ptr<RecordBatch> actual;
 
     ASSERT_OK(this->DoFilter(schm, batch_json, selection, options, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_BATCHES_EQUAL(*RecordBatchFromJSON(schm, expected_batch), *actual);
   }
 
@@ -695,7 +697,7 @@ class TestFilterKernelWithChunkedArray : public TestFilterKernel<ChunkedArray> {
                     const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->FilterWithArray(type, values, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -705,7 +707,7 @@ class TestFilterKernelWithChunkedArray : public TestFilterKernel<ChunkedArray> {
                            const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->FilterWithChunkedArray(type, values, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -754,7 +756,7 @@ class TestFilterKernelWithTable : public TestFilterKernel<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->FilterWithArray(schm, table_json, filter, options, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
   }
 
@@ -765,7 +767,7 @@ class TestFilterKernelWithTable : public TestFilterKernel<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->FilterWithChunkedArray(schm, table_json, filter, options, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertTablesEqual(*TableFromJSON(schm, expected_table), *actual,
                       /*same_chunk_layout=*/false);
   }
@@ -843,7 +845,7 @@ void AssertTakeArrays(const std::shared_ptr<Array>& values,
                       const std::shared_ptr<Array>& indices,
                       const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> actual, Take(*values, *indices));
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -860,7 +862,7 @@ void CheckTake(const std::shared_ptr<DataType>& type, const std::string& values,
 
   for (auto index_type : {int8(), uint32()}) {
     ASSERT_OK(TakeJSON(type, values, index_type, indices, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertArraysEqual(*ArrayFromJSON(type, expected), *actual, /*verbose=*/true);
   }
 }
@@ -900,7 +902,7 @@ void ValidateTake(const std::shared_ptr<Array>& values,
                   const std::shared_ptr<Array>& indices) {
   ASSERT_OK_AND_ASSIGN(Datum out, Take(values, indices));
   auto taken = out.make_array();
-  ASSERT_OK(taken->ValidateFull());
+  ValidateOutput(taken);
   ASSERT_EQ(indices->length(), taken->length());
   switch (indices->type_id()) {
     case Type::INT8:
@@ -1281,34 +1283,34 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) {
 
 class TestTakeKernelWithUnion : public TestTakeKernelTyped<UnionType> {};
 
-// TODO: Restore Union take functionality
-TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) {
-  for (auto union_ : UnionTypeFactories()) {
-    auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 5});
-    auto union_json = R"([
-      null,
+TEST_F(TestTakeKernelWithUnion, TakeUnion) {
+  auto union_type = dense_union({field("a", int32()), field("b", utf8())}, {2, 5});
+  auto union_json = R"([
+      [2, null],
       [2, 222],
       [5, "hello"],
       [5, "eh"],
-      null,
-      [2, 111]
+      [2, null],
+      [2, 111],
+      [5, null]
     ])";
-    CheckTake(union_type, union_json, "[]", "[]");
-    CheckTake(union_type, union_json, "[3, 1, 3, 1, 3]", R"([
+  CheckTake(union_type, union_json, "[]", "[]");
+  CheckTake(union_type, union_json, "[3, 1, 3, 1, 3]", R"([
       [5, "eh"],
       [2, 222],
       [5, "eh"],
       [2, 222],
       [5, "eh"]
     ])");
-    CheckTake(union_type, union_json, "[4, 2, 1]", R"([
-      null,
+  CheckTake(union_type, union_json, "[4, 2, 1, 6]", R"([
+      [2, null],
       [5, "hello"],
-      [2, 222]
+      [2, 222],
+      [5, null]
     ])");
-    CheckTake(union_type, union_json, "[0, 1, 2, 3, 4, 5]", union_json);
-    CheckTake(union_type, union_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
-      null,
+  CheckTake(union_type, union_json, "[0, 1, 2, 3, 4, 5, 6]", union_json);
+  CheckTake(union_type, union_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
+      [2, null],
       [5, "hello"],
       [5, "hello"],
       [5, "hello"],
@@ -1316,7 +1318,6 @@ TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) {
       [5, "hello"],
       [5, "hello"]
     ])");
-  }
 }
 
 class TestPermutationsWithTake : public TestBase {
@@ -1324,7 +1325,7 @@ class TestPermutationsWithTake : public TestBase {
   void DoTake(const Int16Array& values, const Int16Array& indices,
               std::shared_ptr<Int16Array>* out) {
     ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> boxed_out, Take(values, indices));
-    ASSERT_OK(boxed_out->ValidateFull());
+    ValidateOutput(boxed_out);
     *out = checked_pointer_cast<Int16Array>(std::move(boxed_out));
   }
 
@@ -1441,7 +1442,7 @@ class TestTakeKernelWithRecordBatch : public TestTakeKernelTyped<RecordBatch> {
 
     for (auto index_type : {int8(), uint32()}) {
       ASSERT_OK(TakeJSON(schm, batch_json, index_type, indices, &actual));
-      ASSERT_OK(actual->ValidateFull());
+      ValidateOutput(actual);
       ASSERT_BATCHES_EQUAL(*RecordBatchFromJSON(schm, expected_batch), *actual);
     }
   }
@@ -1499,7 +1500,7 @@ class TestTakeKernelWithChunkedArray : public TestTakeKernelTyped<ChunkedArray>
                   const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->TakeWithArray(type, values, indices, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -1509,7 +1510,7 @@ class TestTakeKernelWithChunkedArray : public TestTakeKernelTyped<ChunkedArray>
                          const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->TakeWithChunkedArray(type, values, indices, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -1557,7 +1558,7 @@ class TestTakeKernelWithTable : public TestTakeKernelTyped<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->TakeWithArray(schm, table_json, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
   }
 
@@ -1568,7 +1569,7 @@ class TestTakeKernelWithTable : public TestTakeKernelTyped<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->TakeWithChunkedArray(schm, table_json, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
   }
 
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index a29c9311d86..7fa43e715d8 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -25,9 +25,12 @@
 #include "arrow/array/data.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
 #include "arrow/table.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bitmap.h"
+#include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/optional.h"
 #include "arrow/visitor_inline.h"
@@ -41,6 +44,7 @@ namespace internal {
 
 // Visit all physical types for which sorting is implemented.
 #define VISIT_PHYSICAL_TYPES(VISIT) \
+  VISIT(BooleanType)                \
   VISIT(Int8Type)                   \
   VISIT(Int16Type)                  \
   VISIT(Int32Type)                  \
@@ -321,27 +325,25 @@ template <typename OutType, typename InType>
 struct PartitionNthToIndices {
   using ArrayType = typename TypeTraits<InType>::ArrayType;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     using GetView = GetViewType<InType>;
 
     if (ctx->state() == nullptr) {
-      ctx->SetStatus(Status::Invalid("NthToIndices requires PartitionNthOptions"));
-      return;
+      return Status::Invalid("NthToIndices requires PartitionNthOptions");
     }
 
     ArrayType arr(batch[0].array());
 
     int64_t pivot = PartitionNthToIndicesState::Get(ctx).pivot;
     if (pivot > arr.length()) {
-      ctx->SetStatus(Status::IndexError("NthToIndices index out of bound"));
-      return;
+      return Status::IndexError("NthToIndices index out of bound");
     }
     ArrayData* out_arr = out->mutable_array();
     uint64_t* out_begin = out_arr->GetMutableValues<uint64_t>(1);
     uint64_t* out_end = out_begin + arr.length();
     std::iota(out_begin, out_end, 0);
     if (pivot == arr.length()) {
-      return;
+      return Status::OK();
     }
     auto nulls_begin =
         PartitionNulls<ArrayType, NonStablePartitioner>(out_begin, out_end, arr, 0);
@@ -354,6 +356,7 @@ struct PartitionNthToIndices {
                          return lval < rval;
                        });
     }
+    return Status::OK();
   }
 };
 
@@ -370,6 +373,24 @@ inline void VisitRawValuesInline(const ArrayType& values,
       [&](int64_t i) { visitor_not_null(data[i]); }, [&]() { visitor_null(); });
 }
 
+template <typename VisitorNotNull, typename VisitorNull>
+inline void VisitRawValuesInline(const BooleanArray& values,
+                                 VisitorNotNull&& visitor_not_null,
+                                 VisitorNull&& visitor_null) {
+  if (values.null_count() != 0) {
+    const uint8_t* data = values.data()->GetValues<uint8_t>(1, 0);
+    VisitBitBlocksVoid(
+        values.null_bitmap(), values.offset(), values.length(),
+        [&](int64_t i) { visitor_not_null(BitUtil::GetBit(data, values.offset() + i)); },
+        [&]() { visitor_null(); });
+  } else {
+    // Can avoid GetBit() overhead in the no-nulls case
+    VisitBitBlocksVoid(
+        values.data()->buffers[1], values.offset(), values.length(),
+        [&](int64_t i) { visitor_not_null(true); }, [&]() { visitor_not_null(false); });
+  }
+}
+
 template <typename ArrowType>
 class ArrayCompareSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
@@ -477,6 +498,42 @@ class ArrayCountSorter {
   }
 };
 
+using ::arrow::internal::Bitmap;
+
+template <>
+class ArrayCountSorter<BooleanType> {
+ public:
+  ArrayCountSorter() = default;
+
+  // Returns where null starts.
+  // `offset` is used when this is called on a chunk of a chunked array
+  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end,
+                 const BooleanArray& values, int64_t offset,
+                 const ArraySortOptions& options) {
+    std::array<int64_t, 2> counts{0, 0};
+
+    const int64_t nulls = values.null_count();
+    const int64_t ones = values.true_count();
+    const int64_t zeros = values.length() - ones - nulls;
+
+    int64_t null_position = values.length() - nulls;
+    int64_t index = offset;
+    const auto nulls_begin = indices_begin + null_position;
+
+    if (options.order == SortOrder::Ascending) {
+      // ones start after zeros
+      counts[1] = zeros;
+    } else {
+      // zeros start after ones
+      counts[0] = ones;
+    }
+    VisitRawValuesInline(
+        values, [&](bool v) { indices_begin[counts[v]++] = index++; },
+        [&]() { indices_begin[null_position++] = index++; });
+    return nulls_begin;
+  }
+};
+
 // Sort integers with counting sort or comparison based sorting algorithm
 // - Use O(n) counting sort if values are in a small range
 // - Use O(nlogn) std::stable_sort otherwise
@@ -492,16 +549,8 @@ class ArrayCountOrCompareSorter {
   uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values,
                  int64_t offset, const ArraySortOptions& options) {
     if (values.length() >= countsort_min_len_ && values.length() > values.null_count()) {
-      c_type min{std::numeric_limits<c_type>::max()};
-      c_type max{std::numeric_limits<c_type>::min()};
-
-      VisitRawValuesInline(
-          values,
-          [&](c_type v) {
-            min = std::min(min, v);
-            max = std::max(max, v);
-          },
-          []() {});
+      c_type min, max;
+      std::tie(min, max) = GetMinMax<c_type>(*values.data());
 
       // For signed int32/64, (max - min) may overflow and trigger UBSAN.
       // Cast to largest unsigned type(uint64_t) before subtraction.
@@ -535,6 +584,11 @@ class ArrayCountOrCompareSorter {
 template <typename Type, typename Enable = void>
 struct ArraySorter;
 
+template <>
+struct ArraySorter<BooleanType> {
+  ArrayCountSorter<BooleanType> impl;
+};
+
 template <>
 struct ArraySorter<UInt8Type> {
   ArrayCountSorter<UInt8Type> impl;
@@ -566,7 +620,7 @@ using ArraySortIndicesState = internal::OptionsWrapper<ArraySortOptions>;
 template <typename OutType, typename InType>
 struct ArraySortIndices {
   using ArrayType = typename TypeTraits<InType>::ArrayType;
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = ArraySortIndicesState::Get(ctx);
 
     ArrayType arr(batch[0].array());
@@ -577,16 +631,24 @@ struct ArraySortIndices {
 
     ArraySorter<InType> sorter;
     sorter.impl.Sort(out_begin, out_end, arr, 0, options);
+
+    return Status::OK();
   }
 };
 
 // Sort indices kernels implemented for
 //
+// * Boolean type
 // * Number types
 // * Base binary types
 
 template <template <typename...> class ExecTemplate>
 void AddSortingKernels(VectorKernel base, VectorFunction* func) {
+  // bool type
+  base.signature = KernelSignature::Make({InputType::Array(boolean())}, uint64());
+  base.exec = ExecTemplate<UInt64Type, BooleanType>::Exec;
+  DCHECK_OK(func->AddKernel(base));
+
   for (const auto& ty : NumericTypes()) {
     auto physical_type = GetPhysicalType(ty);
     base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
index 820c51ba8ec..d8e3b9b8081 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
@@ -81,6 +81,16 @@ static void ArraySortIndicesInt64Wide(benchmark::State& state) {
   ArraySortIndicesInt64Benchmark(state, min, max);
 }
 
+static void ArraySortIndicesBool(benchmark::State& state) {
+  RegressionArgs args(state);
+
+  const int64_t array_size = args.size * 8;
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto values = rand.Boolean(array_size, 0.5, args.null_proportion);
+
+  ArraySortIndicesBenchmark(state, values);
+}
+
 static void ChunkedArraySortIndicesInt64Narrow(benchmark::State& state) {
   ChunkedArraySortIndicesInt64Benchmark(state, -100, 100);
 }
@@ -235,6 +245,12 @@ BENCHMARK(ArraySortIndicesInt64Wide)
     ->Args({1 << 23, 100})
     ->Unit(benchmark::TimeUnit::kNanosecond);
 
+BENCHMARK(ArraySortIndicesBool)
+    ->Apply(RegressionSetArgs)
+    ->Args({1 << 20, 100})
+    ->Args({1 << 23, 100})
+    ->Unit(benchmark::TimeUnit::kNanosecond);
+
 BENCHMARK(ChunkedArraySortIndicesInt64Narrow)
     ->Apply(RegressionSetArgs)
     ->Args({1 << 20, 100})
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index a54890e51de..478f6ccac3a 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -24,6 +24,7 @@
 #include "arrow/array/array_decimal.h"
 #include "arrow/array/concatenate.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/test_util.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
@@ -153,7 +154,7 @@ class TestNthToIndicesBase : public TestBase {
     ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> offsets, NthToIndices(*values, n));
     // null_count field should have been initialized to 0, for convenience
     ASSERT_EQ(offsets->data()->null_count, 0);
-    ASSERT_OK(offsets->ValidateFull());
+    ValidateOutput(*offsets);
     Validate(*checked_pointer_cast<ArrayType>(values), n,
              *checked_pointer_cast<UInt64Array>(offsets));
   }
@@ -179,6 +180,10 @@ template <typename ArrowType>
 class TestNthToIndicesForIntegral : public TestNthToIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestNthToIndicesForIntegral, IntegralArrowTypes);
 
+template <typename ArrowType>
+class TestNthToIndicesForBool : public TestNthToIndices<ArrowType> {};
+TYPED_TEST_SUITE(TestNthToIndicesForBool, ::testing::Types<BooleanType>);
+
 template <typename ArrowType>
 class TestNthToIndicesForTemporal : public TestNthToIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestNthToIndicesForTemporal, TemporalArrowTypes);
@@ -222,6 +227,13 @@ TYPED_TEST(TestNthToIndicesForIntegral, Integral) {
   this->AssertNthToIndicesJson("[null, 1, 3, null, 2, 5]", 6);
 }
 
+TYPED_TEST(TestNthToIndicesForBool, Bool) {
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 0);
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 2);
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 5);
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 6);
+}
+
 TYPED_TEST(TestNthToIndicesForTemporal, Temporal) {
   this->AssertNthToIndicesJson("[null, 1, 3, null, 2, 5]", 0);
   this->AssertNthToIndicesJson("[null, 1, 3, null, 2, 5]", 2);
@@ -352,7 +364,7 @@ template <typename T>
 void AssertSortIndices(const std::shared_ptr<T>& input, SortOrder order,
                        const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(auto actual, SortIndices(*input, order));
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(*actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -360,7 +372,7 @@ template <typename T>
 void AssertSortIndices(const std::shared_ptr<T>& input, const SortOptions& options,
                        const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(auto actual, SortIndices(Datum(*input), options));
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(*actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -401,6 +413,10 @@ template <typename ArrowType>
 class TestArraySortIndicesForReal : public TestArraySortIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestArraySortIndicesForReal, RealArrowTypes);
 
+template <typename ArrowType>
+class TestArraySortIndicesForBool : public TestArraySortIndices<ArrowType> {};
+TYPED_TEST_SUITE(TestArraySortIndicesForBool, ::testing::Types<BooleanType>);
+
 template <typename ArrowType>
 class TestArraySortIndicesForIntegral : public TestArraySortIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestArraySortIndicesForIntegral, IntegralArrowTypes);
@@ -463,6 +479,26 @@ TYPED_TEST(TestArraySortIndicesForIntegral, SortIntegral) {
                           "[5, 2, 4, 1, 0, 3]");
 }
 
+TYPED_TEST(TestArraySortIndicesForBool, SortBool) {
+  this->AssertSortIndices("[]", "[]");
+
+  this->AssertSortIndices("[true, true, false]", "[2, 0, 1]");
+  this->AssertSortIndices("[false, false,  false, true, true, true, true]",
+                          "[0, 1, 2, 3, 4, 5, 6]");
+  this->AssertSortIndices("[true, true, true, true, false, false, false]",
+                          "[4, 5, 6, 0, 1, 2, 3]");
+
+  this->AssertSortIndices("[false, true, false, true, true, false, false]",
+                          SortOrder::Ascending, "[0, 2, 5, 6, 1, 3, 4]");
+  this->AssertSortIndices("[false, true, false, true, true, false, false]",
+                          SortOrder::Descending, "[1, 3, 4, 0, 2, 5, 6]");
+
+  this->AssertSortIndices("[null, true, false, null, false, true]", SortOrder::Ascending,
+                          "[2, 4, 1, 5, 0, 3]");
+  this->AssertSortIndices("[null, true, false, null, false, true]", SortOrder::Descending,
+                          "[1, 5, 2, 4, 0, 3]");
+}
+
 TYPED_TEST(TestArraySortIndicesForTemporal, SortTemporal) {
   this->AssertSortIndices("[]", "[]");
 
@@ -545,11 +581,11 @@ class TestArraySortIndicesRandomCompare : public TestBase {};
 using SortIndicesableTypes =
     ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
                      Int32Type, Int64Type, FloatType, DoubleType, StringType,
-                     Decimal128Type>;
+                     Decimal128Type, BooleanType>;
 
 template <typename ArrayType>
 void ValidateSorted(const ArrayType& array, UInt64Array& offsets, SortOrder order) {
-  ASSERT_OK(array.ValidateFull());
+  ValidateOutput(array);
   SortComparator<ArrayType> compare;
   for (int i = 1; i < array.length(); i++) {
     uint64_t lhs = offsets.Value(i - 1);
@@ -841,6 +877,27 @@ TEST_F(TestRecordBatchSortIndices, NaNAndNull) {
   AssertSortIndices(batch, options, "[7, 1, 2, 6, 5, 4, 0, 3]");
 }
 
+TEST_F(TestRecordBatchSortIndices, Boolean) {
+  auto schema = ::arrow::schema({
+      {field("a", boolean())},
+      {field("b", boolean())},
+  });
+  SortOptions options(
+      {SortKey("a", SortOrder::Ascending), SortKey("b", SortOrder::Descending)});
+
+  auto batch = RecordBatchFromJSON(schema,
+                                   R"([{"a": true,    "b": null},
+                                       {"a": false,   "b": null},
+                                       {"a": true,    "b": true},
+                                       {"a": false,   "b": true},
+                                       {"a": true,    "b": false},
+                                       {"a": null,    "b": false},
+                                       {"a": false,   "b": null},
+                                       {"a": null,    "b": true}
+                                       ])");
+  AssertSortIndices(batch, options, "[3, 1, 6, 2, 4, 0, 7, 5]");
+}
+
 TEST_F(TestRecordBatchSortIndices, MoreTypes) {
   auto schema = ::arrow::schema({
       {field("a", timestamp(TimeUnit::MICRO))},
@@ -979,6 +1036,25 @@ TEST_F(TestTableSortIndices, NaNAndNull) {
   AssertSortIndices(table, options, "[7, 1, 2, 6, 5, 4, 0, 3]");
 }
 
+TEST_F(TestTableSortIndices, Boolean) {
+  auto schema = ::arrow::schema({
+      {field("a", boolean())},
+      {field("b", boolean())},
+  });
+  SortOptions options(
+      {SortKey("a", SortOrder::Ascending), SortKey("b", SortOrder::Descending)});
+  auto table = TableFromJSON(schema, {R"([{"a": true,    "b": null},
+                                       {"a": false,   "b": null},
+                                       {"a": true,    "b": true},
+                                       {"a": false,   "b": true}])",
+                                      R"([{"a": true,    "b": false},
+                                       {"a": null,    "b": false},
+                                       {"a": false,   "b": null},
+                                       {"a": null,    "b": true}
+                                       ])"});
+  AssertSortIndices(table, options, "[3, 1, 6, 2, 4, 0, 7, 5]");
+}
+
 TEST_F(TestTableSortIndices, BinaryLike) {
   auto schema = ::arrow::schema({
       {field("a", large_utf8())},
@@ -1171,7 +1247,7 @@ class TestTableSortIndicesRandom : public testing::TestWithParam<RandomParam> {
  public:
   // Validates the sorted indexes are really sorted.
   void Validate(const Table& table, const SortOptions& options, UInt64Array& offsets) {
-    ASSERT_OK(offsets.ValidateFull());
+    ValidateOutput(offsets);
     Comparator comparator{table, options};
     for (int i = 1; i < table.num_rows(); i++) {
       uint64_t lhs = offsets.Value(i - 1);
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index 3a8a3a0eb85..ca7b6137306 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -24,8 +24,10 @@
 #include <utility>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry_internal.h"
 #include "arrow/status.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace compute {
@@ -57,6 +59,20 @@ class FunctionRegistry::FunctionRegistryImpl {
     return Status::OK();
   }
 
+  Status AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                bool allow_overwrite = false) {
+    std::lock_guard<std::mutex> mutation_guard(lock_);
+
+    const std::string name = options_type->type_name();
+    auto it = name_to_options_type_.find(name);
+    if (it != name_to_options_type_.end() && !allow_overwrite) {
+      return Status::KeyError(
+          "Already have a function options type registered with name: ", name);
+    }
+    name_to_options_type_[name] = options_type;
+    return Status::OK();
+  }
+
   Result<std::shared_ptr<Function>> GetFunction(const std::string& name) const {
     auto it = name_to_function_.find(name);
     if (it == name_to_function_.end()) {
@@ -74,11 +90,21 @@ class FunctionRegistry::FunctionRegistryImpl {
     return results;
   }
 
+  Result<const FunctionOptionsType*> GetFunctionOptionsType(
+      const std::string& name) const {
+    auto it = name_to_options_type_.find(name);
+    if (it == name_to_options_type_.end()) {
+      return Status::KeyError("No function options type registered with name: ", name);
+    }
+    return it->second;
+  }
+
   int num_functions() const { return static_cast<int>(name_to_function_.size()); }
 
  private:
   std::mutex lock_;
   std::unordered_map<std::string, std::shared_ptr<Function>> name_to_function_;
+  std::unordered_map<std::string, const FunctionOptionsType*> name_to_options_type_;
 };
 
 std::unique_ptr<FunctionRegistry> FunctionRegistry::Make() {
@@ -99,6 +125,11 @@ Status FunctionRegistry::AddAlias(const std::string& target_name,
   return impl_->AddAlias(target_name, source_name);
 }
 
+Status FunctionRegistry::AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                                bool allow_overwrite) {
+  return impl_->AddFunctionOptionsType(options_type, allow_overwrite);
+}
+
 Result<std::shared_ptr<Function>> FunctionRegistry::GetFunction(
     const std::string& name) const {
   return impl_->GetFunction(name);
@@ -108,6 +139,11 @@ std::vector<std::string> FunctionRegistry::GetFunctionNames() const {
   return impl_->GetFunctionNames();
 }
 
+Result<const FunctionOptionsType*> FunctionRegistry::GetFunctionOptionsType(
+    const std::string& name) const {
+  return impl_->GetFunctionOptionsType(name);
+}
+
 int FunctionRegistry::num_functions() const { return impl_->num_functions(); }
 
 namespace internal {
@@ -125,13 +161,20 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarStringAscii(registry.get());
   RegisterScalarValidity(registry.get());
   RegisterScalarFillNull(registry.get());
+  RegisterScalarIfElse(registry.get());
+  RegisterScalarTemporal(registry.get());
+
+  RegisterScalarOptions(registry.get());
 
   // Vector functions
   RegisterVectorHash(registry.get());
+  RegisterVectorReplace(registry.get());
   RegisterVectorSelection(registry.get());
   RegisterVectorNested(registry.get());
   RegisterVectorSort(registry.get());
 
+  RegisterVectorOptions(registry.get());
+
   // Aggregate functions
   RegisterScalarAggregateBasic(registry.get());
   RegisterScalarAggregateMode(registry.get());
@@ -140,6 +183,8 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarAggregateVariance(registry.get());
   RegisterHashAggregateBasic(registry.get());
 
+  RegisterAggregateOptions(registry.get());
+
   return registry;
 }
 
diff --git a/cpp/src/arrow/compute/registry.h b/cpp/src/arrow/compute/registry.h
index b4456dc5b6b..e83036db6ac 100644
--- a/cpp/src/arrow/compute/registry.h
+++ b/cpp/src/arrow/compute/registry.h
@@ -32,6 +32,7 @@ namespace arrow {
 namespace compute {
 
 class Function;
+class FunctionOptionsType;
 
 /// \brief A mutable central function registry for built-in functions as well
 /// as user-defined functions. Functions are implementations of
@@ -58,6 +59,11 @@ class ARROW_EXPORT FunctionRegistry {
   /// function with the given name is not registered
   Status AddAlias(const std::string& target_name, const std::string& source_name);
 
+  /// \brief Add a new function options type to the registry. Returns Status::KeyError if
+  /// a function options type with the same name is already registered
+  Status AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                bool allow_overwrite = false);
+
   /// \brief Retrieve a function by name from the registry
   Result<std::shared_ptr<Function>> GetFunction(const std::string& name) const;
 
@@ -65,6 +71,10 @@ class ARROW_EXPORT FunctionRegistry {
   /// displaying a manifest of available functions
   std::vector<std::string> GetFunctionNames() const;
 
+  /// \brief Retrieve a function options type by name from the registry
+  Result<const FunctionOptionsType*> GetFunctionOptionsType(
+      const std::string& name) const;
+
   /// \brief The number of currently registered functions
   int num_functions() const;
 
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index e4008cf3f27..892b54341da 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -34,13 +34,20 @@ void RegisterScalarSetLookup(FunctionRegistry* registry);
 void RegisterScalarStringAscii(FunctionRegistry* registry);
 void RegisterScalarValidity(FunctionRegistry* registry);
 void RegisterScalarFillNull(FunctionRegistry* registry);
+void RegisterScalarIfElse(FunctionRegistry* registry);
+void RegisterScalarTemporal(FunctionRegistry* registry);
+
+void RegisterScalarOptions(FunctionRegistry* registry);
 
 // Vector functions
 void RegisterVectorHash(FunctionRegistry* registry);
+void RegisterVectorReplace(FunctionRegistry* registry);
 void RegisterVectorSelection(FunctionRegistry* registry);
 void RegisterVectorNested(FunctionRegistry* registry);
 void RegisterVectorSort(FunctionRegistry* registry);
 
+void RegisterVectorOptions(FunctionRegistry* registry);
+
 // Aggregate functions
 void RegisterScalarAggregateBasic(FunctionRegistry* registry);
 void RegisterScalarAggregateMode(FunctionRegistry* registry);
@@ -49,6 +56,8 @@ void RegisterScalarAggregateTDigest(FunctionRegistry* registry);
 void RegisterScalarAggregateVariance(FunctionRegistry* registry);
 void RegisterHashAggregateBasic(FunctionRegistry* registry);
 
+void RegisterAggregateOptions(FunctionRegistry* registry);
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 9888e610aa7..eebc8c1b678 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -25,10 +25,11 @@ struct ValueDescr;
 namespace compute {
 
 class Function;
-struct FunctionOptions;
+class FunctionOptions;
 
-struct CastOptions;
+class CastOptions;
 
+struct ExecBatch;
 class ExecContext;
 class KernelContext;
 
@@ -39,5 +40,9 @@ struct VectorKernel;
 
 struct KernelState;
 
+class Expression;
+class ExecNode;
+class ExecPlan;
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/chunker.cc b/cpp/src/arrow/csv/chunker.cc
index 95b5031dec3..6ae9f492f62 100644
--- a/cpp/src/arrow/csv/chunker.cc
+++ b/cpp/src/arrow/csv/chunker.cc
@@ -63,12 +63,18 @@ class Lexer {
       case IN_FIELD:
         goto InField;
       case AT_ESCAPE:
+        // will never reach here if escaping = false
+        // just to hint the compiler to remove dead code
+        if (!escaping) return nullptr;
         goto AtEscape;
       case IN_QUOTED_FIELD:
+        if (!quoting) return nullptr;
         goto InQuotedField;
       case AT_QUOTED_QUOTE:
+        if (!quoting) return nullptr;
         goto AtQuotedQuote;
       case AT_QUOTED_ESCAPE:
+        if (!quoting) return nullptr;
         goto AtQuotedEscape;
     }
 
@@ -171,6 +177,7 @@ class Lexer {
     goto FieldStart;
 
   LineEnd:
+    state_ = FIELD_START;
     return data;
 
   AbortLine:
@@ -234,6 +241,39 @@ class LexingBoundaryFinder : public BoundaryFinder {
     return Status::OK();
   }
 
+  Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+                 int64_t* out_pos, int64_t* num_found) override {
+    Lexer<quoting, escaping> lexer(options_);
+    int64_t found = 0;
+    const char* data = block.data();
+    const char* const data_end = block.data() + block.size();
+
+    const char* line_end;
+    if (partial.size()) {
+      line_end = lexer.ReadLine(partial.data(), partial.data() + partial.size());
+      DCHECK_EQ(line_end, nullptr);  // Otherwise `partial` is a whole CSV line
+    }
+
+    for (; data < data_end && found < count; ++found) {
+      line_end = lexer.ReadLine(data, data_end);
+      if (line_end == nullptr) {
+        // Cannot read any further
+        break;
+      }
+      DCHECK_GT(line_end, data);
+      data = line_end;
+    }
+
+    if (data == block.data()) {
+      // No complete CSV line
+      *out_pos = kNoDelimiterFound;
+    } else {
+      *out_pos = static_cast<int64_t>(data - block.data());
+    }
+    *num_found = found;
+    return Status::OK();
+  }
+
  protected:
   ParseOptions options_;
 };
diff --git a/cpp/src/arrow/csv/chunker_test.cc b/cpp/src/arrow/csv/chunker_test.cc
index ab565567bde..27101e5538c 100644
--- a/cpp/src/arrow/csv/chunker_test.cc
+++ b/cpp/src/arrow/csv/chunker_test.cc
@@ -71,6 +71,36 @@ class BaseChunkerTest : public ::testing::TestWithParam<bool> {
 
   void MakeChunker() { chunker_ = ::arrow::csv::MakeChunker(options_); }
 
+  void AssertSkip(const std::string& str, int64_t count, int64_t rem_count,
+                  int64_t rest_size) {
+    MakeChunker();
+    {
+      auto test_count = count;
+      auto partial = std::make_shared<Buffer>("");
+      auto block = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(str.data()),
+                                            static_cast<int64_t>(str.size()));
+      std::shared_ptr<Buffer> rest;
+      ASSERT_OK(chunker_->ProcessSkip(partial, block, true, &test_count, &rest));
+      ASSERT_EQ(rem_count, test_count);
+      ASSERT_EQ(rest_size, rest->size());
+      AssertBufferEqual(*SliceBuffer(block, block->size() - rest_size), *rest);
+    }
+    {
+      auto test_count = count;
+      auto split = static_cast<int64_t>(str.find_first_of('\n'));
+      auto partial =
+          std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(str.data()), split);
+      auto block =
+          std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(str.data() + split),
+                                   static_cast<int64_t>(str.size()) - split);
+      std::shared_ptr<Buffer> rest;
+      ASSERT_OK(chunker_->ProcessSkip(partial, block, true, &test_count, &rest));
+      ASSERT_EQ(rem_count, test_count);
+      ASSERT_EQ(rest_size, rest->size());
+      AssertBufferEqual(*SliceBuffer(block, block->size() - rest_size), *rest);
+    }
+  }
+
   ParseOptions options_;
   std::unique_ptr<Chunker> chunker_;
 };
@@ -261,5 +291,43 @@ TEST_P(BaseChunkerTest, EscapingNewline) {
   }
 }
 
+TEST_P(BaseChunkerTest, ParseSkip) {
+  {
+    auto csv = MakeCSVData({"ab,c,\n", "def,,gh\n", ",ij,kl\n"});
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 1, 0, 15));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 7));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 6, 3, 0));
+  }
+
+  // Test with no trailing new line
+  {
+    auto csv = MakeCSVData({"ab,c,\n", "def,,gh\n", ",ij,kl"});
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 6));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+  }
+
+  // Test skip with new lines in values
+  {
+    auto csv = MakeCSVData({"ab,\"c\n\",\n", "\"d\nef\",,gh\n", ",ij,\"nkl\"\n"});
+    options_.newlines_in_values = true;
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 1, 0, 21));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 10));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 6, 3, 0));
+  }
+
+  // Test with no trailing new line and new lines in values
+  {
+    auto csv = MakeCSVData({"ab,\"c\n\",\n", "\"d\nef\",,gh\n", ",ij,\"nkl\""});
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 9));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+  }
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/column_builder.cc b/cpp/src/arrow/csv/column_builder.cc
index 8178b260b4a..bc974428734 100644
--- a/cpp/src/arrow/csv/column_builder.cc
+++ b/cpp/src/arrow/csv/column_builder.cc
@@ -109,7 +109,7 @@ class ConcreteColumnBuilder : public ColumnBuilder {
   }
 
   Status WrapConversionError(const Status& st) {
-    if (st.ok()) {
+    if (ARROW_PREDICT_TRUE(st.ok())) {
       return st;
     } else {
       std::stringstream ss;
diff --git a/cpp/src/arrow/csv/column_builder_test.cc b/cpp/src/arrow/csv/column_builder_test.cc
index 9fa995350e2..7577c883e8c 100644
--- a/cpp/src/arrow/csv/column_builder_test.cc
+++ b/cpp/src/arrow/csv/column_builder_test.cc
@@ -400,6 +400,24 @@ TEST_F(InferringColumnBuilderTest, MultipleChunkDate) {
                  ArrayFromJSON(date32(), "[null]")});
 }
 
+TEST_F(InferringColumnBuilderTest, SingleChunkTime) {
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"", "01:23:45", "NA"}}, options,
+                {ArrayFromJSON(time32(TimeUnit::SECOND), "[null, 5025, null]")});
+}
+
+TEST_F(InferringColumnBuilderTest, MultipleChunkTime) {
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+  auto type = time32(TimeUnit::SECOND);
+
+  CheckInferred(tg, {{""}, {"01:23:45"}, {"NA"}}, options,
+                {ArrayFromJSON(type, "[null]"), ArrayFromJSON(type, "[5025]"),
+                 ArrayFromJSON(type, "[null]")});
+}
+
 TEST_F(InferringColumnBuilderTest, SingleChunkTimestamp) {
   auto options = ConvertOptions::Defaults();
   auto tg = TaskGroup::MakeSerial();
@@ -453,6 +471,46 @@ TEST_F(InferringColumnBuilderTest, MultipleChunkTimestampNS) {
                 options, expected);
 }
 
+TEST_F(InferringColumnBuilderTest, SingleChunkIntegerAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"", "99", "01:23:45", "NA"}}, options,
+                {ArrayFromJSON(utf8(), R"(["", "99", "01:23:45", "NA"])")});
+}
+
+TEST_F(InferringColumnBuilderTest, MultipleChunkIntegerAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+  auto type = utf8();
+
+  CheckInferred(tg, {{""}, {"99"}, {"01:23:45", "NA"}}, options,
+                {ArrayFromJSON(type, R"([""])"), ArrayFromJSON(type, R"(["99"])"),
+                 ArrayFromJSON(type, R"(["01:23:45", "NA"])")});
+}
+
+TEST_F(InferringColumnBuilderTest, SingleChunkDateAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"", "01:23:45", "1998-04-05"}}, options,
+                {ArrayFromJSON(utf8(), R"(["", "01:23:45", "1998-04-05"])")});
+}
+
+TEST_F(InferringColumnBuilderTest, MultipleChunkDateAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+  auto type = utf8();
+
+  CheckInferred(tg, {{""}, {"01:23:45"}, {"1998-04-05"}}, options,
+                {ArrayFromJSON(type, R"([""])"), ArrayFromJSON(type, R"(["01:23:45"])"),
+                 ArrayFromJSON(type, R"(["1998-04-05"])")});
+}
+
 TEST_F(InferringColumnBuilderTest, SingleChunkString) {
   auto options = ConvertOptions::Defaults();
   auto tg = TaskGroup::MakeSerial();
diff --git a/cpp/src/arrow/csv/column_decoder.cc b/cpp/src/arrow/csv/column_decoder.cc
index 1dd13bc9086..436d703a9cc 100644
--- a/cpp/src/arrow/csv/column_decoder.cc
+++ b/cpp/src/arrow/csv/column_decoder.cc
@@ -15,18 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/csv/column_decoder.h"
+
 #include <cstddef>
 #include <cstdint>
 #include <memory>
-#include <mutex>
 #include <sstream>
 #include <string>
 #include <utility>
-#include <vector>
 
 #include "arrow/array.h"
 #include "arrow/array/builder_base.h"
-#include "arrow/csv/column_decoder.h"
 #include "arrow/csv/converter.h"
 #include "arrow/csv/inference_internal.h"
 #include "arrow/csv/options.h"
@@ -45,95 +44,19 @@ using internal::TaskGroup;
 
 class ConcreteColumnDecoder : public ColumnDecoder {
  public:
-  explicit ConcreteColumnDecoder(MemoryPool* pool,
-                                 std::shared_ptr<internal::TaskGroup> task_group,
-                                 int32_t col_index = -1)
-      : ColumnDecoder(std::move(task_group)),
-        pool_(pool),
-        col_index_(col_index),
-        num_chunks_(-1),
-        next_chunk_(0) {}
-
-  void Append(const std::shared_ptr<BlockParser>& parser) override {
-    Insert(static_cast<int64_t>(chunks_.size()), parser);
-  }
-
-  void SetEOF(int64_t num_blocks) override {
-    std::lock_guard<std::mutex> lock(mutex_);
-
-    DCHECK_EQ(num_chunks_, -1) << "Cannot change EOF";
-    num_chunks_ = num_blocks;
-
-    // If further chunks have been requested in NextChunk(), arrange to return nullptr
-    for (int64_t i = num_chunks_; i < static_cast<int64_t>(chunks_.size()); ++i) {
-      auto* chunk = &chunks_[i];
-      if (chunk->is_valid()) {
-        DCHECK(!IsFutureFinished(chunk->state()));
-        chunk->MarkFinished(std::shared_ptr<Array>());
-      }
-    }
-  }
-
-  Result<std::shared_ptr<Array>> NextChunk() override {
-    std::unique_lock<std::mutex> lock(mutex_);
-
-    if (num_chunks_ > 0 && next_chunk_ >= num_chunks_) {
-      return nullptr;  // EOF
-    }
-    PrepareChunkUnlocked(next_chunk_);
-    auto chunk_index = next_chunk_++;
-    WaitForChunkUnlocked(chunk_index);
-    // Move Future to avoid keeping chunk alive
-    return chunks_[chunk_index].MoveResult();
-  }
+  explicit ConcreteColumnDecoder(MemoryPool* pool, int32_t col_index = -1)
+      : ColumnDecoder(), pool_(pool), col_index_(col_index) {}
 
  protected:
   // XXX useful?
   virtual std::shared_ptr<DataType> type() const = 0;
 
-  void WaitForChunkUnlocked(int64_t chunk_index) {
-    auto future = chunks_[chunk_index];  // Make copy because of resizes
-    mutex_.unlock();
-    future.Wait();
-    mutex_.lock();
-  }
-
-  void PrepareChunk(int64_t block_index) {
-    std::lock_guard<std::mutex> lock(mutex_);
-    PrepareChunkUnlocked(block_index);
-  }
-
-  void PrepareChunkUnlocked(int64_t block_index) {
-    size_t chunk_index = static_cast<size_t>(block_index);
-    if (chunks_.size() <= chunk_index) {
-      chunks_.resize(chunk_index + 1);
-    }
-    if (!chunks_[block_index].is_valid()) {
-      chunks_[block_index] = Future<std::shared_ptr<Array>>::Make();
-    }
-  }
-
-  void SetChunk(int64_t chunk_index, Result<std::shared_ptr<Array>> maybe_array) {
-    std::lock_guard<std::mutex> lock(mutex_);
-    SetChunkUnlocked(chunk_index, std::move(maybe_array));
-  }
-
-  void SetChunkUnlocked(int64_t chunk_index, Result<std::shared_ptr<Array>> maybe_array) {
-    auto* chunk = &chunks_[chunk_index];
-    DCHECK(chunk->is_valid());
-    DCHECK(!IsFutureFinished(chunk->state()));
-
-    if (maybe_array.ok()) {
-      chunk->MarkFinished(std::move(maybe_array));
-    } else {
-      chunk->MarkFinished(WrapConversionError(maybe_array.status()));
-    }
-  }
-
-  Status WrapConversionError(const Status& st) {
-    if (st.ok()) {
-      return st;
+  Result<std::shared_ptr<Array>> WrapConversionError(
+      const Result<std::shared_ptr<Array>>& result) {
+    if (ARROW_PREDICT_TRUE(result.ok())) {
+      return result;
     } else {
+      const auto& st = result.status();
       std::stringstream ss;
       ss << "In CSV column #" << col_index_ << ": " << st.message();
       return st.WithMessage(ss.str());
@@ -142,12 +65,7 @@ class ConcreteColumnDecoder : public ColumnDecoder {
 
   MemoryPool* pool_;
   int32_t col_index_;
-
-  std::vector<Future<std::shared_ptr<Array>>> chunks_;
-  int64_t num_chunks_;
-  int64_t next_chunk_;
-
-  std::mutex mutex_;
+  internal::Executor* executor_;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -155,11 +73,11 @@ class ConcreteColumnDecoder : public ColumnDecoder {
 
 class NullColumnDecoder : public ConcreteColumnDecoder {
  public:
-  explicit NullColumnDecoder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                             const std::shared_ptr<internal::TaskGroup>& task_group)
-      : ConcreteColumnDecoder(pool, task_group), type_(type) {}
+  explicit NullColumnDecoder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : ConcreteColumnDecoder(pool), type_(type) {}
 
-  void Insert(int64_t block_index, const std::shared_ptr<BlockParser>& parser) override;
+  Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) override;
 
  protected:
   std::shared_ptr<DataType> type() const override { return type_; }
@@ -167,24 +85,10 @@ class NullColumnDecoder : public ConcreteColumnDecoder {
   std::shared_ptr<DataType> type_;
 };
 
-void NullColumnDecoder::Insert(int64_t block_index,
-                               const std::shared_ptr<BlockParser>& parser) {
-  PrepareChunk(block_index);
-
-  // Spawn a task that will build an array of nulls with the right DataType
-  const int32_t num_rows = parser->num_rows();
-  DCHECK_GE(num_rows, 0);
-
-  task_group_->Append([=]() -> Status {
-    std::unique_ptr<ArrayBuilder> builder;
-    RETURN_NOT_OK(MakeBuilder(pool_, type_, &builder));
-    std::shared_ptr<Array> array;
-    RETURN_NOT_OK(builder->AppendNulls(num_rows));
-    RETURN_NOT_OK(builder->Finish(&array));
-
-    SetChunk(block_index, array);
-    return Status::OK();
-  });
+Future<std::shared_ptr<Array>> NullColumnDecoder::Decode(
+    const std::shared_ptr<BlockParser>& parser) {
+  DCHECK_GE(parser->num_rows(), 0);
+  return WrapConversionError(MakeArrayOfNull(type_, parser->num_rows(), pool_));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -193,15 +97,13 @@ void NullColumnDecoder::Insert(int64_t block_index,
 class TypedColumnDecoder : public ConcreteColumnDecoder {
  public:
   TypedColumnDecoder(const std::shared_ptr<DataType>& type, int32_t col_index,
-                     const ConvertOptions& options, MemoryPool* pool,
-                     const std::shared_ptr<internal::TaskGroup>& task_group)
-      : ConcreteColumnDecoder(pool, task_group, col_index),
-        type_(type),
-        options_(options) {}
+                     const ConvertOptions& options, MemoryPool* pool)
+      : ConcreteColumnDecoder(pool, col_index), type_(type), options_(options) {}
 
   Status Init();
 
-  void Insert(int64_t block_index, const std::shared_ptr<BlockParser>& parser) override;
+  Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) override;
 
  protected:
   std::shared_ptr<DataType> type() const override { return type_; }
@@ -219,17 +121,11 @@ Status TypedColumnDecoder::Init() {
   return Status::OK();
 }
 
-void TypedColumnDecoder::Insert(int64_t block_index,
-                                const std::shared_ptr<BlockParser>& parser) {
+Future<std::shared_ptr<Array>> TypedColumnDecoder::Decode(
+    const std::shared_ptr<BlockParser>& parser) {
   DCHECK_NE(converter_, nullptr);
-
-  PrepareChunk(block_index);
-
-  // We're careful that all references in the closure outlive the Append() call
-  task_group_->Append([=]() -> Status {
-    SetChunk(block_index, converter_->Convert(*parser, col_index_));
-    return Status::OK();
-  });
+  return Future<std::shared_ptr<Array>>::MakeFinished(
+      WrapConversionError(converter_->Convert(*parser, col_index_)));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -238,16 +134,19 @@ void TypedColumnDecoder::Insert(int64_t block_index,
 class InferringColumnDecoder : public ConcreteColumnDecoder {
  public:
   InferringColumnDecoder(int32_t col_index, const ConvertOptions& options,
-                         MemoryPool* pool,
-                         const std::shared_ptr<internal::TaskGroup>& task_group)
-      : ConcreteColumnDecoder(pool, task_group, col_index),
+                         MemoryPool* pool)
+      : ConcreteColumnDecoder(pool, col_index),
         options_(options),
         infer_status_(options),
-        type_frozen_(false) {}
+        type_frozen_(false) {
+    first_inference_run_ = Future<>::Make();
+    first_inferrer_ = 0;
+  }
 
   Status Init();
 
-  void Insert(int64_t block_index, const std::shared_ptr<BlockParser>& parser) override;
+  Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) override;
 
  protected:
   std::shared_ptr<DataType> type() const override {
@@ -265,10 +164,9 @@ class InferringColumnDecoder : public ConcreteColumnDecoder {
   // Current inference status
   InferStatus infer_status_;
   bool type_frozen_;
+  std::atomic<int> first_inferrer_;
+  Future<> first_inference_run_;
   std::shared_ptr<Converter> converter_;
-
-  // The parsers corresponding to each chunk (for reconverting)
-  std::vector<std::shared_ptr<BlockParser>> parsers_;
 };
 
 Status InferringColumnDecoder::Init() { return UpdateType(); }
@@ -283,55 +181,37 @@ Result<std::shared_ptr<Array>> InferringColumnDecoder::RunInference(
     // (no one else should be updating converter_ concurrently)
     auto maybe_array = converter_->Convert(*parser, col_index_);
 
-    std::unique_lock<std::mutex> lock(mutex_);
     if (maybe_array.ok() || !infer_status_.can_loosen_type()) {
       // Conversion succeeded, or failed definitively
+      DCHECK(!type_frozen_);
+      type_frozen_ = true;
       return maybe_array;
     }
     // Conversion failed temporarily, try another type
     infer_status_.LoosenType(maybe_array.status());
-    RETURN_NOT_OK(UpdateType());
+    auto update_status = UpdateType();
+    if (!update_status.ok()) {
+      return update_status;
+    }
   }
 }
 
-void InferringColumnDecoder::Insert(int64_t block_index,
-                                    const std::shared_ptr<BlockParser>& parser) {
-  PrepareChunk(block_index);
-
+Future<std::shared_ptr<Array>> InferringColumnDecoder::Decode(
+    const std::shared_ptr<BlockParser>& parser) {
+  bool already_taken = first_inferrer_.fetch_or(1);
   // First block: run inference
-  if (block_index == 0) {
-    task_group_->Append([=]() -> Status {
-      auto maybe_array = RunInference(parser);
-
-      std::unique_lock<std::mutex> lock(mutex_);
-      DCHECK(!type_frozen_);
-      type_frozen_ = true;
-      SetChunkUnlocked(block_index, std::move(maybe_array));
-      return Status::OK();
-    });
-    return;
+  if (!already_taken) {
+    auto maybe_array = RunInference(parser);
+    first_inference_run_.MarkFinished();
+    return Future<std::shared_ptr<Array>>::MakeFinished(std::move(maybe_array));
   }
 
   // Non-first block: wait for inference to finish on first block now,
   // without blocking a TaskGroup thread.
-  {
-    std::unique_lock<std::mutex> lock(mutex_);
-    PrepareChunkUnlocked(0);
-    WaitForChunkUnlocked(0);
-    if (!chunks_[0].status().ok()) {
-      // Failed converting first chunk: bail out by marking EOF,
-      // because we can't decide a type for the other chunks.
-      SetChunkUnlocked(block_index, std::shared_ptr<Array>());
-    }
+  return first_inference_run_.Then([this, parser] {
     DCHECK(type_frozen_);
-  }
-
-  // Then use the inferred type to convert this block.
-  task_group_->Append([=]() -> Status {
     auto maybe_array = converter_->Convert(*parser, col_index_);
-
-    SetChunk(block_index, std::move(maybe_array));
-    return Status::OK();
+    return WrapConversionError(converter_->Convert(*parser, col_index_));
   });
 }
 
@@ -339,28 +219,24 @@ void InferringColumnDecoder::Insert(int64_t block_index,
 // Factory functions
 
 Result<std::shared_ptr<ColumnDecoder>> ColumnDecoder::Make(
-    MemoryPool* pool, int32_t col_index, const ConvertOptions& options,
-    std::shared_ptr<TaskGroup> task_group) {
-  auto ptr = std::make_shared<InferringColumnDecoder>(col_index, options, pool,
-                                                      std::move(task_group));
+    MemoryPool* pool, int32_t col_index, const ConvertOptions& options) {
+  auto ptr = std::make_shared<InferringColumnDecoder>(col_index, options, pool);
   RETURN_NOT_OK(ptr->Init());
   return ptr;
 }
 
 Result<std::shared_ptr<ColumnDecoder>> ColumnDecoder::Make(
     MemoryPool* pool, std::shared_ptr<DataType> type, int32_t col_index,
-    const ConvertOptions& options, std::shared_ptr<TaskGroup> task_group) {
-  auto ptr = std::make_shared<TypedColumnDecoder>(std::move(type), col_index, options,
-                                                  pool, std::move(task_group));
+    const ConvertOptions& options) {
+  auto ptr =
+      std::make_shared<TypedColumnDecoder>(std::move(type), col_index, options, pool);
   RETURN_NOT_OK(ptr->Init());
   return ptr;
 }
 
 Result<std::shared_ptr<ColumnDecoder>> ColumnDecoder::MakeNull(
-    MemoryPool* pool, std::shared_ptr<DataType> type,
-    std::shared_ptr<internal::TaskGroup> task_group) {
-  return std::make_shared<NullColumnDecoder>(std::move(type), pool,
-                                             std::move(task_group));
+    MemoryPool* pool, std::shared_ptr<DataType> type) {
+  return std::make_shared<NullColumnDecoder>(std::move(type), pool);
 }
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/column_decoder.h b/cpp/src/arrow/csv/column_decoder.h
index 92644e3769f..5fbbd5df58b 100644
--- a/cpp/src/arrow/csv/column_decoder.h
+++ b/cpp/src/arrow/csv/column_decoder.h
@@ -36,45 +36,28 @@ class ARROW_EXPORT ColumnDecoder {
  public:
   virtual ~ColumnDecoder() = default;
 
-  /// Spawn a task that will try to convert and append the given CSV block.
-  /// All calls to Append() should happen on the same thread, otherwise
-  /// call Insert() instead.
-  virtual void Append(const std::shared_ptr<BlockParser>& parser) = 0;
-
   /// Spawn a task that will try to convert and insert the given CSV block
-  virtual void Insert(int64_t block_index,
-                      const std::shared_ptr<BlockParser>& parser) = 0;
-
-  /// Set EOF at the given number of blocks.  Must only be called once.
-  virtual void SetEOF(int64_t num_blocks) = 0;
-
-  /// Fetch a chunk.
-  virtual Result<std::shared_ptr<Array>> NextChunk() = 0;
-
-  std::shared_ptr<internal::TaskGroup> task_group() { return task_group_; }
+  virtual Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) = 0;
 
   /// Construct a strictly-typed ColumnDecoder.
-  static Result<std::shared_ptr<ColumnDecoder>> Make(
-      MemoryPool* pool, std::shared_ptr<DataType> type, int32_t col_index,
-      const ConvertOptions& options, std::shared_ptr<internal::TaskGroup> task_group);
+  static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool,
+                                                     std::shared_ptr<DataType> type,
+                                                     int32_t col_index,
+                                                     const ConvertOptions& options);
 
   /// Construct a type-inferring ColumnDecoder.
   /// Inference will run only on the first block, the type will be frozen afterwards.
-  static Result<std::shared_ptr<ColumnDecoder>> Make(
-      MemoryPool* pool, int32_t col_index, const ConvertOptions& options,
-      std::shared_ptr<internal::TaskGroup> task_group);
+  static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool, int32_t col_index,
+                                                     const ConvertOptions& options);
 
   /// Construct a ColumnDecoder for a column of nulls
   /// (i.e. not present in the CSV file).
-  static Result<std::shared_ptr<ColumnDecoder>> MakeNull(
-      MemoryPool* pool, std::shared_ptr<DataType> type,
-      std::shared_ptr<internal::TaskGroup> task_group);
+  static Result<std::shared_ptr<ColumnDecoder>> MakeNull(MemoryPool* pool,
+                                                         std::shared_ptr<DataType> type);
 
  protected:
-  explicit ColumnDecoder(std::shared_ptr<internal::TaskGroup> task_group)
-      : task_group_(std::move(task_group)) {}
-
-  std::shared_ptr<internal::TaskGroup> task_group_;
+  ColumnDecoder() = default;
 };
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/column_decoder_test.cc b/cpp/src/arrow/csv/column_decoder_test.cc
index 231ffb85e1b..c8b96e04696 100644
--- a/cpp/src/arrow/csv/column_decoder_test.cc
+++ b/cpp/src/arrow/csv/column_decoder_test.cc
@@ -27,11 +27,11 @@
 #include "arrow/csv/test_common.h"
 #include "arrow/memory_pool.h"
 #include "arrow/table.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/task_group.h"
 #include "arrow/util/thread_pool.h"
 
 namespace arrow {
@@ -41,7 +41,6 @@ class BlockParser;
 
 using internal::checked_cast;
 using internal::GetCpuThreadPool;
-using internal::TaskGroup;
 
 using ChunkData = std::vector<std::vector<std::string>>;
 
@@ -65,58 +64,70 @@ ThreadJoiner RunThread(Func&& func) {
   return ThreadJoiner(std::make_shared<std::thread>(std::forward<Func>(func)));
 }
 
-struct SerialExecutor {
-  static std::shared_ptr<TaskGroup> task_group() { return TaskGroup::MakeSerial(); }
-};
-
-struct ParallelExecutor {
-  static std::shared_ptr<TaskGroup> task_group() {
-    return TaskGroup::MakeThreaded(GetCpuThreadPool());
+template <typename Func>
+void RunThreadsAndJoin(Func&& func, int iters) {
+  std::vector<ThreadJoiner> threads;
+  for (int i = 0; i < iters; i++) {
+    threads.emplace_back(std::make_shared<std::thread>([i, func] { func(i); }));
   }
-};
-
-using ExecutorTypes = ::testing::Types<SerialExecutor, ParallelExecutor>;
+}
 
 class ColumnDecoderTest : public ::testing::Test {
  public:
-  ColumnDecoderTest() : tg_(TaskGroup::MakeSerial()), num_chunks_(0) {}
+  ColumnDecoderTest() : num_chunks_(0), read_ptr_(0) {}
 
   void SetDecoder(std::shared_ptr<ColumnDecoder> decoder) {
     decoder_ = std::move(decoder);
+    decoded_chunks_.clear();
     num_chunks_ = 0;
+    read_ptr_ = 0;
   }
 
-  void InsertChunk(int64_t num_chunk, std::vector<std::string> chunk) {
+  void InsertChunk(std::vector<std::string> chunk) {
     std::shared_ptr<BlockParser> parser;
     MakeColumnParser(chunk, &parser);
-    decoder_->Insert(num_chunk, parser);
+    auto decoded = decoder_->Decode(parser);
+    decoded_chunks_.push_back(decoded);
+    ++num_chunks_;
   }
 
   void AppendChunks(const ChunkData& chunks) {
     for (const auto& chunk : chunks) {
-      std::shared_ptr<BlockParser> parser;
-      MakeColumnParser(chunk, &parser);
-      decoder_->Append(parser);
-      ++num_chunks_;
+      InsertChunk(chunk);
     }
   }
 
-  void SetEOF() { decoder_->SetEOF(num_chunks_); }
+  Result<std::shared_ptr<Array>> NextChunk() {
+    EXPECT_LT(read_ptr_, static_cast<int64_t>(decoded_chunks_.size()));
+    return decoded_chunks_[read_ptr_++].result();
+  }
+
+  void AssertChunk(std::vector<std::string> chunk, std::shared_ptr<Array> expected) {
+    std::shared_ptr<BlockParser> parser;
+    MakeColumnParser(chunk, &parser);
+    ASSERT_FINISHES_OK_AND_ASSIGN(auto decoded, decoder_->Decode(parser));
+    AssertArraysEqual(*expected, *decoded);
+  }
+
+  void AssertChunkInvalid(std::vector<std::string> chunk) {
+    std::shared_ptr<BlockParser> parser;
+    MakeColumnParser(chunk, &parser);
+    ASSERT_FINISHES_AND_RAISES(Invalid, decoder_->Decode(parser));
+  }
 
   void AssertFetch(std::shared_ptr<Array> expected_chunk) {
-    ASSERT_OK_AND_ASSIGN(auto chunk, decoder_->NextChunk());
+    ASSERT_OK_AND_ASSIGN(auto chunk, NextChunk());
     ASSERT_NE(chunk, nullptr);
     AssertArraysEqual(*expected_chunk, *chunk);
   }
 
-  void AssertFetchInvalid() { ASSERT_RAISES(Invalid, decoder_->NextChunk()); }
-
-  void AssertFetchEOF() { ASSERT_OK_AND_EQ(nullptr, decoder_->NextChunk()); }
+  void AssertFetchInvalid() { ASSERT_RAISES(Invalid, NextChunk()); }
 
  protected:
-  std::shared_ptr<TaskGroup> tg_;
   std::shared_ptr<ColumnDecoder> decoder_;
-  int64_t num_chunks_;
+  std::vector<Future<std::shared_ptr<Array>>> decoded_chunks_;
+  int64_t num_chunks_ = 0;
+  int64_t read_ptr_ = 0;
 
   ConvertOptions default_options = ConvertOptions::Defaults();
 };
@@ -124,14 +135,13 @@ class ColumnDecoderTest : public ::testing::Test {
 //////////////////////////////////////////////////////////////////////////
 // Tests for null column decoder
 
-template <typename ExecutorType>
 class NullColumnDecoderTest : public ColumnDecoderTest {
  public:
-  NullColumnDecoderTest() { tg_ = ExecutorType::task_group(); }
+  NullColumnDecoderTest() {}
 
   void MakeDecoder(std::shared_ptr<DataType> type) {
     ASSERT_OK_AND_ASSIGN(auto decoder,
-                         ColumnDecoder::MakeNull(default_memory_pool(), type, tg_));
+                         ColumnDecoder::MakeNull(default_memory_pool(), type));
     SetDecoder(decoder);
   }
 
@@ -141,10 +151,8 @@ class NullColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type);
 
     AppendChunks({{"1", "2", "3"}, {"4", "5"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, null, null]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    AssertFetchEOF();
 
     MakeDecoder(type);
 
@@ -153,8 +161,6 @@ class NullColumnDecoderTest : public ColumnDecoderTest {
     AppendChunks({{"7", "8"}});
     AssertFetch(ArrayFromJSON(type, "[null]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    SetEOF();
-    AssertFetchEOF();
   }
 
   void TestOtherType() {
@@ -163,57 +169,40 @@ class NullColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type);
 
     AppendChunks({{"1", "2", "3"}, {"4", "5"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, null, null]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestThreaded() {
+    constexpr int NITERS = 10;
     auto type = int32();
-
     MakeDecoder(type);
 
-    auto joiner = RunThread([&]() {
-      InsertChunk(1, {"4", "5"});
-      InsertChunk(0, {"1", "2", "3"});
-      InsertChunk(3, {"6"});
-      InsertChunk(2, {});
-      decoder_->SetEOF(4);
-    });
-
-    AssertFetch(ArrayFromJSON(type, "[null, null, null]"));
-    AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetch(ArrayFromJSON(type, "[null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          AssertChunk({"4", "5", std::to_string(thread_id)},
+                      ArrayFromJSON(type, "[null, null, null]"));
+        },
+        NITERS);
   }
-
- protected:
-  ExecutorType executor_;
 };
 
-TYPED_TEST_SUITE(NullColumnDecoderTest, ExecutorTypes);
-
-TYPED_TEST(NullColumnDecoderTest, NullType) { this->TestNullType(); }
+TEST_F(NullColumnDecoderTest, NullType) { this->TestNullType(); }
 
-TYPED_TEST(NullColumnDecoderTest, OtherType) { this->TestOtherType(); }
+TEST_F(NullColumnDecoderTest, OtherType) { this->TestOtherType(); }
 
-TYPED_TEST(NullColumnDecoderTest, Threaded) { this->TestThreaded(); }
+TEST_F(NullColumnDecoderTest, Threaded) { this->TestThreaded(); }
 
 //////////////////////////////////////////////////////////////////////////
 // Tests for fixed-type column decoder
 
-template <typename ExecutorType>
 class TypedColumnDecoderTest : public ColumnDecoderTest {
  public:
-  TypedColumnDecoderTest() { tg_ = ExecutorType::task_group(); }
+  TypedColumnDecoderTest() {}
 
   void MakeDecoder(const std::shared_ptr<DataType>& type, const ConvertOptions& options) {
-    ASSERT_OK_AND_ASSIGN(
-        auto decoder, ColumnDecoder::Make(default_memory_pool(), type, 0, options, tg_));
+    ASSERT_OK_AND_ASSIGN(auto decoder,
+                         ColumnDecoder::Make(default_memory_pool(), type, 0, options));
     SetDecoder(decoder);
   }
 
@@ -223,11 +212,8 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type, default_options);
 
     AppendChunks({{"123", "456", "-78"}, {"901", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, -78]"));
     AssertFetch(ArrayFromJSON(type, "[901, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
 
     MakeDecoder(type, default_options);
 
@@ -236,9 +222,6 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     AppendChunks({{"N/A", "N/A"}});
     AssertFetch(ArrayFromJSON(type, "[-987]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    SetEOF();
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestOptions() {
@@ -247,10 +230,7 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type, default_options);
 
     AppendChunks({{"true", "false", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[true, false, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
 
     // With non-default options
     auto options = default_options;
@@ -260,10 +240,7 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type, options);
 
     AppendChunks({{"true", "false", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, true, false]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestErrors() {
@@ -273,56 +250,46 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
 
     AppendChunks({{"123", "456", "N/A"}, {"-901"}});
     AppendChunks({{"N/A", "1000"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, null]"));
     AssertFetchInvalid();
     AssertFetch(ArrayFromJSON(type, "[null, 1000]"));
-    AssertFetchEOF();
   }
 
   void TestThreaded() {
+    constexpr int NITERS = 10;
     auto type = uint32();
-
     MakeDecoder(type, default_options);
 
-    auto joiner = RunThread([&]() {
-      InsertChunk(1, {"4", "-5"});
-      InsertChunk(0, {"1", "2", "3"});
-      InsertChunk(3, {"6"});
-      InsertChunk(2, {});
-      decoder_->SetEOF(4);
-    });
-
-    AssertFetch(ArrayFromJSON(type, "[1, 2, 3]"));
-    AssertFetchInvalid();
-    AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetch(ArrayFromJSON(type, "[6]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          if (thread_id % 2 == 0) {
+            AssertChunkInvalid({"4", "-5"});
+          } else {
+            AssertChunk({"1", "2", "3"}, ArrayFromJSON(type, "[1, 2, 3]"));
+          }
+        },
+        NITERS);
   }
 };
 
-TYPED_TEST_SUITE(TypedColumnDecoderTest, ExecutorTypes);
+TEST_F(TypedColumnDecoderTest, Integers) { this->TestIntegers(); }
 
-TYPED_TEST(TypedColumnDecoderTest, Integers) { this->TestIntegers(); }
+TEST_F(TypedColumnDecoderTest, Options) { this->TestOptions(); }
 
-TYPED_TEST(TypedColumnDecoderTest, Options) { this->TestOptions(); }
+TEST_F(TypedColumnDecoderTest, Errors) { this->TestErrors(); }
 
-TYPED_TEST(TypedColumnDecoderTest, Errors) { this->TestErrors(); }
-
-TYPED_TEST(TypedColumnDecoderTest, Threaded) { this->TestThreaded(); }
+TEST_F(TypedColumnDecoderTest, Threaded) { this->TestThreaded(); }
 
 //////////////////////////////////////////////////////////////////////////
 // Tests for type-inferring column decoder
 
-template <typename ExecutorType>
 class InferringColumnDecoderTest : public ColumnDecoderTest {
  public:
-  InferringColumnDecoderTest() { tg_ = ExecutorType::task_group(); }
+  InferringColumnDecoderTest() {}
 
   void MakeDecoder(const ConvertOptions& options) {
     ASSERT_OK_AND_ASSIGN(auto decoder,
-                         ColumnDecoder::Make(default_memory_pool(), 0, options, tg_));
+                         ColumnDecoder::Make(default_memory_pool(), 0, options));
     SetDecoder(decoder);
   }
 
@@ -332,35 +299,37 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(default_options);
 
     AppendChunks({{"123", "456", "-78"}, {"901", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, -78]"));
     AssertFetch(ArrayFromJSON(type, "[901, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestThreaded() {
+    constexpr int NITERS = 10;
     auto type = float64();
-
     MakeDecoder(default_options);
 
-    auto joiner = RunThread([&]() {
-      SleepFor(1e-3);
-      InsertChunk(0, {"1.5", "2", "3"});
-      InsertChunk(3, {"6"});
-      decoder_->SetEOF(4);
-    });
-
-    // These chunks will wait for inference to run on chunk 0
-    InsertChunk(1, {"4", "-5", "N/A"});
-    InsertChunk(2, {});
-
-    AssertFetch(ArrayFromJSON(type, "[1.5, 2, 3]"));
-    AssertFetch(ArrayFromJSON(type, "[4, -5, null]"));
-    AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetch(ArrayFromJSON(type, "[6]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
+    // One of these will do the inference so we need to make sure they all have floating
+    // point
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          if (thread_id % 2 == 0) {
+            AssertChunk({"6.3", "7.2"}, ArrayFromJSON(type, "[6.3, 7.2]"));
+          } else {
+            AssertChunk({"1.1", "2", "3"}, ArrayFromJSON(type, "[1.1, 2, 3]"));
+          }
+        },
+        NITERS);
+
+    // These will run after the inference
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          if (thread_id % 2 == 0) {
+            AssertChunk({"1", "2"}, ArrayFromJSON(type, "[1, 2]"));
+          } else {
+            AssertChunkInvalid({"xyz"});
+          }
+        },
+        NITERS);
   }
 
   void TestOptions() {
@@ -373,11 +342,8 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(options);
 
     AppendChunks({{"true", "false", "N/A"}, {"true"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, true, false]"));
     AssertFetch(ArrayFromJSON(type, "[null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestErrors() {
@@ -387,12 +353,9 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
 
     AppendChunks({{"123", "456", "-78"}, {"9.5", "N/A"}});
     AppendChunks({{"1000", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, -78]"));
     AssertFetchInvalid();
     AssertFetch(ArrayFromJSON(type, "[1000, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestEmpty() {
@@ -401,25 +364,20 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(default_options);
 
     AppendChunks({{}, {}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[]"));
     AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 };
 
-TYPED_TEST_SUITE(InferringColumnDecoderTest, ExecutorTypes);
-
-TYPED_TEST(InferringColumnDecoderTest, Integers) { this->TestIntegers(); }
+TEST_F(InferringColumnDecoderTest, Integers) { this->TestIntegers(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Threaded) { this->TestThreaded(); }
+TEST_F(InferringColumnDecoderTest, Threaded) { this->TestThreaded(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Options) { this->TestOptions(); }
+TEST_F(InferringColumnDecoderTest, Options) { this->TestOptions(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Errors) { this->TestErrors(); }
+TEST_F(InferringColumnDecoderTest, Errors) { this->TestErrors(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Empty) { this->TestEmpty(); }
+TEST_F(InferringColumnDecoderTest, Empty) { this->TestEmpty(); }
 
 // More inference tests are in InferringColumnBuilderTest
 
diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index feebf374e38..5381e733914 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/csv/converter.h"
 
+#include <array>
 #include <cstring>
 #include <limits>
 #include <sstream>
@@ -130,7 +131,7 @@ struct ValueDecoder {
 
  protected:
   Trie null_trie_;
-  std::shared_ptr<DataType> type_;
+  const std::shared_ptr<DataType> type_;
   const ConvertOptions& options_;
 };
 
@@ -185,29 +186,35 @@ struct BinaryValueDecoder : public ValueDecoder {
 
   bool IsNull(const uint8_t* data, uint32_t size, bool quoted) {
     return options_.strings_can_be_null &&
+           (!quoted || options_.quoted_strings_can_be_null) &&
            ValueDecoder::IsNull(data, size, false /* quoted */);
   }
 };
 
 //
-// Value decoder for integers and floats
+// Value decoder for integers, floats and temporals
 //
 
 template <typename T>
 struct NumericValueDecoder : public ValueDecoder {
   using value_type = typename T::c_type;
 
-  using ValueDecoder::ValueDecoder;
+  explicit NumericValueDecoder(const std::shared_ptr<DataType>& type,
+                               const ConvertOptions& options)
+      : ValueDecoder(type, options), concrete_type_(checked_cast<const T&>(*type)) {}
 
   Status Decode(const uint8_t* data, uint32_t size, bool quoted, value_type* out) {
     // XXX should quoted values be allowed at all?
     TrimWhiteSpace(&data, &size);
-    if (ARROW_PREDICT_FALSE(
-            !internal::ParseValue<T>(reinterpret_cast<const char*>(data), size, out))) {
+    if (ARROW_PREDICT_FALSE(!internal::ParseValue<T>(
+            concrete_type_, reinterpret_cast<const char*>(data), size, out))) {
       return GenericConversionError(type_, data, size);
     }
     return Status::OK();
   }
+
+ protected:
+  const T& concrete_type_;
 };
 
 //
@@ -284,6 +291,55 @@ struct DecimalValueDecoder : public ValueDecoder {
   const int32_t type_scale_;
 };
 
+//
+// Value decoder wrapper for floating-point and decimals
+// with a non-default decimal point
+//
+
+template <typename WrappedDecoder>
+struct CustomDecimalPointValueDecoder : public ValueDecoder {
+  using value_type = typename WrappedDecoder::value_type;
+
+  explicit CustomDecimalPointValueDecoder(const std::shared_ptr<DataType>& type,
+                                          const ConvertOptions& options)
+      : ValueDecoder(type, options), wrapped_decoder_(type, options) {}
+
+  Status Initialize() {
+    RETURN_NOT_OK(wrapped_decoder_.Initialize());
+    for (int i = 0; i < 256; ++i) {
+      mapping_[i] = i;
+    }
+    mapping_[options_.decimal_point] = '.';
+    mapping_['.'] = options_.decimal_point;  // error out on standard decimal point
+    temp_.resize(30);
+    return Status::OK();
+  }
+
+  Status Decode(const uint8_t* data, uint32_t size, bool quoted, value_type* out) {
+    if (ARROW_PREDICT_FALSE(size > temp_.size())) {
+      temp_.resize(size);
+    }
+    uint8_t* temp_data = temp_.data();
+    for (uint32_t i = 0; i < size; ++i) {
+      temp_data[i] = mapping_[data[i]];
+    }
+    if (ARROW_PREDICT_FALSE(
+            !wrapped_decoder_.Decode(temp_data, size, quoted, out).ok())) {
+      return GenericConversionError(type_, data, size);
+    }
+    return Status::OK();
+  }
+
+  bool IsNull(const uint8_t* data, uint32_t size, bool quoted) {
+    return wrapped_decoder_.IsNull(data, size, quoted);
+  }
+
+ protected:
+  WrappedDecoder wrapped_decoder_;
+  std::array<uint8_t, 256> mapping_;
+  std::vector<uint8_t> temp_;
+};
+
 //
 // Value decoders for timestamps
 //
@@ -526,6 +582,24 @@ std::shared_ptr<Converter> MakeTimestampConverter(const std::shared_ptr<DataType
   }
 }
 
+//
+// Concrete Converter factory for reals
+//
+
+template <typename ConverterType, template <typename...> class ConcreteConverterType,
+          typename Type, typename DecoderType>
+std::shared_ptr<ConverterType> MakeRealConverter(const std::shared_ptr<DataType>& type,
+                                                 const ConvertOptions& options,
+                                                 MemoryPool* pool) {
+  if (options.decimal_point == '.') {
+    return std::make_shared<ConcreteConverterType<Type, DecoderType>>(type, options,
+                                                                      pool);
+  }
+  return std::make_shared<
+      ConcreteConverterType<Type, CustomDecimalPointValueDecoder<DecoderType>>>(
+      type, options, pool);
+}
+
 }  // namespace
 
 /////////////////////////////////////////////////////////////////////////
@@ -555,6 +629,12 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
   CONVERTER_CASE(TYPE_ID,                           \
                  (PrimitiveConverter<TYPE_CLASS, NumericValueDecoder<TYPE_CLASS>>))
 
+#define REAL_CONVERTER_CASE(TYPE_ID, TYPE_CLASS, DECODER)                        \
+  case TYPE_ID:                                                                  \
+    ptr = MakeRealConverter<Converter, PrimitiveConverter, TYPE_CLASS, DECODER>( \
+        type, options, pool);                                                    \
+    break;
+
     CONVERTER_CASE(Type::NA, NullConverter)
     NUMERIC_CONVERTER_CASE(Type::INT8, Int8Type)
     NUMERIC_CONVERTER_CASE(Type::INT16, Int16Type)
@@ -564,10 +644,13 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
     NUMERIC_CONVERTER_CASE(Type::UINT16, UInt16Type)
     NUMERIC_CONVERTER_CASE(Type::UINT32, UInt32Type)
     NUMERIC_CONVERTER_CASE(Type::UINT64, UInt64Type)
-    NUMERIC_CONVERTER_CASE(Type::FLOAT, FloatType)
-    NUMERIC_CONVERTER_CASE(Type::DOUBLE, DoubleType)
+    REAL_CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    REAL_CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    REAL_CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
     NUMERIC_CONVERTER_CASE(Type::DATE32, Date32Type)
     NUMERIC_CONVERTER_CASE(Type::DATE64, Date64Type)
+    NUMERIC_CONVERTER_CASE(Type::TIME32, Time32Type)
+    NUMERIC_CONVERTER_CASE(Type::TIME64, Time64Type)
     CONVERTER_CASE(Type::BOOL, (PrimitiveConverter<BooleanType, BooleanValueDecoder>))
     CONVERTER_CASE(Type::BINARY,
                    (PrimitiveConverter<BinaryType, BinaryValueDecoder<false>>))
@@ -575,8 +658,6 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
                    (PrimitiveConverter<LargeBinaryType, BinaryValueDecoder<false>>))
     CONVERTER_CASE(Type::FIXED_SIZE_BINARY,
                    (PrimitiveConverter<FixedSizeBinaryType, FixedSizeBinaryValueDecoder>))
-    CONVERTER_CASE(Type::DECIMAL,
-                   (PrimitiveConverter<Decimal128Type, DecimalValueDecoder>))
 
     case Type::TIMESTAMP:
       ptr = MakeTimestampConverter<PrimitiveConverter>(type, options, pool);
@@ -622,6 +703,7 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
 
 #undef CONVERTER_CASE
 #undef NUMERIC_CONVERTER_CASE
+#undef REAL_CONVERTER_CASE
   }
   RETURN_NOT_OK(ptr->Initialize());
   return ptr;
@@ -639,14 +721,20 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
         new TypedDictionaryConverter<TYPE, VALUE_DECODER_TYPE>(type, options, pool)); \
     break;
 
+#define REAL_CONVERTER_CASE(TYPE_ID, TYPE_CLASS, DECODER)                              \
+  case TYPE_ID:                                                                        \
+    ptr = MakeRealConverter<DictionaryConverter, TypedDictionaryConverter, TYPE_CLASS, \
+                            DECODER>(type, options, pool);                             \
+    break;
+
     // XXX Are 32-bit types useful?
     CONVERTER_CASE(Type::INT32, Int32Type, NumericValueDecoder<Int32Type>)
     CONVERTER_CASE(Type::INT64, Int64Type, NumericValueDecoder<Int64Type>)
     CONVERTER_CASE(Type::UINT32, UInt32Type, NumericValueDecoder<UInt32Type>)
     CONVERTER_CASE(Type::UINT64, UInt64Type, NumericValueDecoder<UInt64Type>)
-    CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
-    CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
-    CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
+    REAL_CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    REAL_CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    REAL_CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
     CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryType,
                    FixedSizeBinaryValueDecoder)
     CONVERTER_CASE(Type::BINARY, BinaryType, BinaryValueDecoder<false>)
@@ -682,6 +770,7 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
     }
 
 #undef CONVERTER_CASE
+#undef REAL_CONVERTER_CASE
   }
   RETURN_NOT_OK(ptr->Initialize());
   return ptr;
diff --git a/cpp/src/arrow/csv/converter_test.cc b/cpp/src/arrow/csv/converter_test.cc
index e12e3d17a83..0407de91f67 100644
--- a/cpp/src/arrow/csv/converter_test.cc
+++ b/cpp/src/arrow/csv/converter_test.cc
@@ -174,67 +174,105 @@ void AssertConversionError(const std::shared_ptr<DataType>& type,
 // Converter tests
 
 template <typename T>
-static void TestBinaryConversionBasics() {
-  auto type = TypeTraits<T>::type_singleton();
-  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
-                                   {{"ab", ""}, {"cdé", "\xffgh"}});
-}
-
-TEST(BinaryConversion, Basics) { TestBinaryConversionBasics<BinaryType>(); }
+class BinaryConversionTestBase : public testing::Test {
+ public:
+  std::shared_ptr<DataType> type() { return TypeTraits<T>::type_singleton(); }
 
-TEST(LargeBinaryConversion, Basics) { TestBinaryConversionBasics<LargeBinaryType>(); }
+  void TestNulls() {
+    auto type = this->type();
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "NULL,\n"},
+                                     {{"ab", "NULL"}, {"N/A", ""}},
+                                     {{true, true}, {true, true}});
 
-TEST(BinaryConversion, Nulls) {
-  AssertConversion<BinaryType, std::string>(binary(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", "NULL"}, {"N/A", ""}},
-                                            {{true, true}, {true, true}});
+    auto options = ConvertOptions::Defaults();
+    options.strings_can_be_null = true;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "NULL,\n"},
+                                     {{"ab", ""}, {"", ""}},
+                                     {{true, false}, {false, false}}, options);
+    AssertConversion<T, std::string>(type, {"ab,\"N/A\"\n", "\"NULL\",\"\"\n"},
+                                     {{"ab", ""}, {"", ""}},
+                                     {{true, false}, {false, false}}, options);
+    options.quoted_strings_can_be_null = false;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "NULL,\n"},
+                                     {{"ab", ""}, {"", ""}},
+                                     {{true, false}, {false, false}}, options);
+    AssertConversion<T, std::string>(type, {"ab,\"N/A\"\n", "\"NULL\",\"\"\n"},
+                                     {{"ab", "NULL"}, {"N/A", ""}},
+                                     {{true, true}, {true, true}}, options);
+  }
 
-  auto options = ConvertOptions::Defaults();
-  options.strings_can_be_null = true;
-  AssertConversion<BinaryType, std::string>(binary(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", ""}, {"", ""}},
-                                            {{true, false}, {false, false}}, options);
-}
+  void TestCustomNulls() {
+    auto type = this->type();
+    auto options = ConvertOptions::Defaults();
+    options.null_values = {"xxx", "zzz"};
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "xxx,\"zzz\"\n"},
+                                     {{"ab", "xxx"}, {"N/A", "zzz"}},
+                                     {{true, true}, {true, true}}, options);
+
+    options.strings_can_be_null = true;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "xxx,\"zzz\"\n"},
+                                     {{"ab", ""}, {"N/A", ""}},
+                                     {{true, false}, {true, false}}, options);
+    options.quoted_strings_can_be_null = false;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "xxx,\"zzz\"\n"},
+                                     {{"ab", ""}, {"N/A", "zzz"}},
+                                     {{true, false}, {true, true}}, options);
+  }
+};
 
 template <typename T>
-static void TestStringConversionBasics() {
-  auto type = TypeTraits<T>::type_singleton();
-  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",gh\n"},
-                                   {{"ab", ""}, {"cdé", "gh"}});
+class BinaryConversionTest : public BinaryConversionTestBase<T> {
+ public:
+  void TestBasics() {
+    auto type = this->type();
+    AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                     {{"ab", ""}, {"cdé", "\xffgh"}});
+  }
+};
 
-  auto options = ConvertOptions::Defaults();
-  options.check_utf8 = false;
-  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
-                                   {{"ab", ""}, {"cdé", "\xffgh"}}, options,
-                                   /*validate_full=*/false);
-}
+using BinaryTestTypes = ::testing::Types<BinaryType, LargeBinaryType>;
 
-TEST(StringConversion, Basics) { TestStringConversionBasics<StringType>(); }
+TYPED_TEST_SUITE(BinaryConversionTest, BinaryTestTypes);
 
-TEST(LargeStringConversion, Basics) { TestStringConversionBasics<LargeStringType>(); }
+TYPED_TEST(BinaryConversionTest, Basics) { this->TestBasics(); }
 
-TEST(StringConversion, Nulls) {
-  AssertConversion<StringType, std::string>(utf8(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", "NULL"}, {"N/A", ""}},
-                                            {{true, true}, {true, true}});
+TYPED_TEST(BinaryConversionTest, Nulls) { this->TestNulls(); }
 
-  auto options = ConvertOptions::Defaults();
-  options.strings_can_be_null = true;
-  AssertConversion<StringType, std::string>(utf8(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", ""}, {"", ""}},
-                                            {{true, false}, {false, false}}, options);
-}
+TYPED_TEST(BinaryConversionTest, CustomNulls) { this->TestNulls(); }
 
 template <typename T>
-static void TestStringConversionErrors() {
-  auto type = TypeTraits<T>::type_singleton();
-  // Invalid UTF8 in column 0
-  AssertConversionError(type, {"ab,cdé\n", "\xff,gh\n"}, {0});
-}
+class StringConversionTest : public BinaryConversionTestBase<T> {
+ public:
+  void TestBasics() {
+    auto type = TypeTraits<T>::type_singleton();
+    AssertConversion<T, std::string>(type, {"ab,cdé\n", ",gh\n"},
+                                     {{"ab", ""}, {"cdé", "gh"}});
+  }
+
+  void TestInvalidUtf8() {
+    auto type = TypeTraits<T>::type_singleton();
+    // Invalid UTF8 in column 0
+    AssertConversionError(type, {"ab,cdé\n", "\xff,gh\n"}, {0});
+
+    auto options = ConvertOptions::Defaults();
+    options.check_utf8 = false;
+    AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                     {{"ab", ""}, {"cdé", "\xffgh"}}, options,
+                                     /*validate_full=*/false);
+  }
+};
 
-TEST(StringConversion, Errors) { TestStringConversionErrors<StringType>(); }
+using StringTestTypes = ::testing::Types<StringType, LargeStringType>;
 
-TEST(LargeStringConversion, Errors) { TestStringConversionErrors<LargeStringType>(); }
+TYPED_TEST_SUITE(StringConversionTest, StringTestTypes);
+
+TYPED_TEST(StringConversionTest, Basics) { this->TestBasics(); }
+
+TYPED_TEST(StringConversionTest, Nulls) { this->TestNulls(); }
+
+TYPED_TEST(StringConversionTest, CustomNulls) { this->TestCustomNulls(); }
+
+TYPED_TEST(StringConversionTest, InvalidUtf8) { this->TestInvalidUtf8(); }
 
 TEST(FixedSizeBinaryConversion, Basics) {
   AssertConversion<FixedSizeBinaryType, std::string>(
@@ -360,6 +398,18 @@ TEST(FloatingPointConversion, Whitespace) {
                                        {{12., 0.}, {34.5, -1e100}});
 }
 
+TEST(FloatingPointConversion, CustomDecimalPoint) {
+  auto options = ConvertOptions::Defaults();
+  options.decimal_point = '/';
+
+  AssertConversion<FloatType, float>(float32(), {"1/5\n", "-1e10\n", "N/A\n"},
+                                     {{1.5, -1e10f, 0.}}, {{true, true, false}}, options);
+  AssertConversion<DoubleType, double>(float64(), {"1/5\n", "-1e10\n", "N/A\n"},
+                                       {{1.5, -1e10, 0.}}, {{true, true, false}},
+                                       options);
+  AssertConversionError(float32(), {"1.5\n"}, {0}, options);
+}
+
 TEST(BooleanConversion, Basics) {
   // XXX we may want to accept more bool-like values
   AssertConversion<BooleanType, bool>(boolean(), {"true,false\n", "1,0\n"},
@@ -391,6 +441,11 @@ TEST(Date32Conversion, Nulls) {
                                         {{false, true}});
 }
 
+TEST(Date32Conversion, Errors) {
+  AssertConversionError(date32(), {"1945-06-31\n"}, {0});
+  AssertConversionError(date32(), {"2020-13-01\n"}, {0});
+}
+
 TEST(Date64Conversion, Basics) {
   AssertConversion<Date64Type, int64_t>(date64(), {"1945-05-08\n", "2020-03-15\n"},
                                         {{-777945600000LL, 1584230400000LL}});
@@ -401,6 +456,63 @@ TEST(Date64Conversion, Nulls) {
                                         {{0, 1584230400000LL}}, {{false, true}});
 }
 
+TEST(Date64Conversion, Errors) {
+  AssertConversionError(date64(), {"1945-06-31\n"}, {0});
+  AssertConversionError(date64(), {"2020-13-01\n"}, {0});
+}
+
+TEST(Time32Conversion, Seconds) {
+  const auto type = time32(TimeUnit::SECOND);
+
+  AssertConversion<Time32Type, int32_t>(type, {"00:00\n", "00:00:00\n"}, {{0, 0}});
+  AssertConversion<Time32Type, int32_t>(type, {"01:23:45\n", "23:45:43\n"},
+                                        {{5025, 85543}});
+  AssertConversion<Time32Type, int32_t>(type, {"N/A\n", "23:59:59\n"}, {{0, 86399}},
+                                        {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
+TEST(Time32Conversion, Millis) {
+  const auto type = time32(TimeUnit::MILLI);
+
+  AssertConversion<Time32Type, int32_t>(type, {"00:00\n", "00:00:00\n"}, {{0, 0}});
+  AssertConversion<Time32Type, int32_t>(type, {"01:23:45.1\n", "23:45:43.789\n"},
+                                        {{5025100, 85543789}});
+  AssertConversion<Time32Type, int32_t>(type, {"N/A\n", "23:59:59.999\n"},
+                                        {{0, 86399999}}, {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
+TEST(Time64Conversion, Micros) {
+  const auto type = time64(TimeUnit::MICRO);
+
+  AssertConversion<Time64Type, int64_t>(type, {"00:00\n", "00:00:00\n"}, {{0LL, 0LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"01:23:45.1\n", "23:45:43.456789\n"},
+                                        {{5025100000LL, 85543456789LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"N/A\n", "23:59:59.999999\n"},
+                                        {{0, 86399999999LL}}, {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
+TEST(Time64Conversion, Nanos) {
+  const auto type = time64(TimeUnit::NANO);
+
+  AssertConversion<Time64Type, int64_t>(type, {"00:00\n", "00:00:00\n"}, {{0LL, 0LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"01:23:45.1\n", "23:45:43.123456789\n"},
+                                        {{5025100000000LL, 85543123456789LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"N/A\n", "23:59:59.999999999\n"},
+                                        {{0, 86399999999999LL}}, {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
 TEST(TimestampConversion, Basics) {
   auto type = timestamp(TimeUnit::SECOND);
 
@@ -482,6 +594,17 @@ TEST(DecimalConversion, CustomNulls) {
       {{true, false}, {false, true}}, options);
 }
 
+TEST(DecimalConversion, CustomDecimalPoint) {
+  auto options = ConvertOptions::Defaults();
+  options.decimal_point = '/';
+
+  AssertConversion<Decimal128Type, Decimal128>(
+      decimal(14, 3), {"1/5,0/\n", ",-1e3\n"},
+      {{Dec128("1.500"), Decimal128()}, {Decimal128(), Dec128("-1000.000")}},
+      {{true, false}, {true, true}}, options);
+  AssertConversionError(decimal128(14, 3), {"1.5\n"}, {0}, options);
+}
+
 TEST(DecimalConversion, Whitespace) {
   AssertConversion<Decimal128Type, Decimal128>(
       decimal(5, 1), {" 12.00,34.5\n", " 0 ,-1e2 \n"},
@@ -625,6 +748,21 @@ TEST(TestDecimalDictConverter, Basics) {
                        expected_dict);
 }
 
+TEST(TestDecimalDictConverter, CustomDecimalPoint) {
+  auto value_type = decimal(9, 3);
+
+  auto options = ConvertOptions::Defaults();
+  options.decimal_point = '\'';
+
+  auto expected_dict = ArrayFromJSON(value_type, R"(["1.234", "456.789"])");
+  auto expected_indices = ArrayFromJSON(int32(), "[0, 1, null, 1]");
+
+  AssertDictConversion("1'234\n456'789\nN/A\n4'56789e2\n", expected_indices,
+                       expected_dict, -1, options);
+
+  ASSERT_RAISES(Invalid, DictConversion(value_type, "1.234\n", -1, options));
+}
+
 TEST(TestDecimalDictConverter, Errors) {
   auto value_type = decimal(9, 3);
 
diff --git a/cpp/src/arrow/csv/inference_internal.h b/cpp/src/arrow/csv/inference_internal.h
index 42486a1ebaf..1fd6d41b5cc 100644
--- a/cpp/src/arrow/csv/inference_internal.h
+++ b/cpp/src/arrow/csv/inference_internal.h
@@ -32,6 +32,7 @@ enum class InferKind {
   Boolean,
   Real,
   Date,
+  Time,
   Timestamp,
   TimestampNS,
   TextDict,
@@ -60,6 +61,8 @@ class InferStatus {
       case InferKind::Boolean:
         return SetKind(InferKind::Date);
       case InferKind::Date:
+        return SetKind(InferKind::Time);
+      case InferKind::Time:
         return SetKind(InferKind::Timestamp);
       case InferKind::Timestamp:
         return SetKind(InferKind::TimestampNS);
@@ -114,6 +117,8 @@ class InferStatus {
         return make_converter(boolean());
       case InferKind::Date:
         return make_converter(date32());
+      case InferKind::Time:
+        return make_converter(time32(TimeUnit::SECOND));
       case InferKind::Timestamp:
         return make_converter(timestamp(TimeUnit::SECOND));
       case InferKind::TimestampNS:
diff --git a/cpp/src/arrow/csv/options.cc b/cpp/src/arrow/csv/options.cc
index a515abf2cf4..c71cfdaf295 100644
--- a/cpp/src/arrow/csv/options.cc
+++ b/cpp/src/arrow/csv/options.cc
@@ -22,6 +22,19 @@ namespace csv {
 
 ParseOptions ParseOptions::Defaults() { return ParseOptions(); }
 
+Status ParseOptions::Validate() const {
+  if (ARROW_PREDICT_FALSE(delimiter == '\n' || delimiter == '\r')) {
+    return Status::Invalid("ParseOptions: delimiter cannot be \\r or \\n");
+  }
+  if (ARROW_PREDICT_FALSE(quoting && (quote_char == '\n' || quote_char == '\r'))) {
+    return Status::Invalid("ParseOptions: quote_char cannot be \\r or \\n");
+  }
+  if (ARROW_PREDICT_FALSE(escaping && (escape_char == '\n' || escape_char == '\r'))) {
+    return Status::Invalid("ParseOptions: escape_char cannot be \\r or \\n");
+  }
+  return Status::OK();
+}
+
 ConvertOptions ConvertOptions::Defaults() {
   auto options = ConvertOptions();
   // Same default null / true / false spellings as in Pandas.
@@ -33,8 +46,38 @@ ConvertOptions ConvertOptions::Defaults() {
   return options;
 }
 
+Status ConvertOptions::Validate() const { return Status::OK(); }
+
 ReadOptions ReadOptions::Defaults() { return ReadOptions(); }
+
+Status ReadOptions::Validate() const {
+  if (ARROW_PREDICT_FALSE(block_size < 1)) {
+    // Min is 1 because some tests use really small block sizes
+    return Status::Invalid("ReadOptions: block_size must be at least 1: ", block_size);
+  }
+  if (ARROW_PREDICT_FALSE(skip_rows < 0)) {
+    return Status::Invalid("ReadOptions: skip_rows cannot be negative: ", skip_rows);
+  }
+  if (ARROW_PREDICT_FALSE(skip_rows_after_names < 0)) {
+    return Status::Invalid("ReadOptions: skip_rows_after_names cannot be negative: ",
+                           skip_rows_after_names);
+  }
+  if (ARROW_PREDICT_FALSE(autogenerate_column_names && !column_names.empty())) {
+    return Status::Invalid(
+        "ReadOptions: autogenerate_column_names cannot be true when column_names are "
+        "provided");
+  }
+  return Status::OK();
+}
+
 WriteOptions WriteOptions::Defaults() { return WriteOptions(); }
 
+Status WriteOptions::Validate() const {
+  if (ARROW_PREDICT_FALSE(batch_size < 1)) {
+    return Status::Invalid("WriteOptions: batch_size must be at least 1: ", batch_size);
+  }
+  return Status::OK();
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 5c912e7fd85..efa95c5b3a2 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -24,6 +24,8 @@
 #include <vector>
 
 #include "arrow/csv/type_fwd.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/status.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -59,6 +61,9 @@ struct ARROW_EXPORT ParseOptions {
 
   /// Create parsing options with default values
   static ParseOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 struct ARROW_EXPORT ConvertOptions {
@@ -74,11 +79,18 @@ struct ARROW_EXPORT ConvertOptions {
   std::vector<std::string> true_values;
   /// Recognized spellings for boolean false values
   std::vector<std::string> false_values;
+
   /// Whether string / binary columns can have null values.
   ///
   /// If true, then strings in "null_values" are considered null for string columns.
   /// If false, then all strings are valid string values.
   bool strings_can_be_null = false;
+  /// Whether string / binary columns can have quoted null values.
+  ///
+  /// If true *and* `strings_can_be_null` is true, then quoted strings in
+  /// "null_values" are also considered null for string columns.  Otherwise,
+  /// quoted strings are never considered null.
+  bool quoted_strings_can_be_null = true;
 
   /// Whether to try to automatically dict-encode string / binary data.
   /// If true, then when type inference detects a string or binary column,
@@ -89,6 +101,9 @@ struct ARROW_EXPORT ConvertOptions {
   bool auto_dict_encode = false;
   int32_t auto_dict_max_cardinality = 50;
 
+  /// Decimal point character for floating-point and decimal data
+  char decimal_point = '.';
+
   // XXX Should we have a separate FilterOptions?
 
   /// If non-empty, indicates the names of columns from the CSV file that should
@@ -112,6 +127,9 @@ struct ARROW_EXPORT ConvertOptions {
   /// Create conversion options with default values, including conventional
   /// values for `null_values`, `true_values` and `false_values`
   static ConvertOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 struct ARROW_EXPORT ReadOptions {
@@ -119,15 +137,24 @@ struct ARROW_EXPORT ReadOptions {
 
   /// Whether to use the global CPU thread pool
   bool use_threads = true;
-  /// Block size we request from the IO layer; also determines the size of
-  /// chunks when use_threads is true
+
+  /// \brief Block size we request from the IO layer.
+  ///
+  /// This will determine multi-threading granularity as well as
+  /// the size of individual record batches.
+  /// Minimum valid value for block size is 1
   int32_t block_size = 1 << 20;  // 1 MB
 
   /// Number of header rows to skip (not including the row of column names, if any)
   int32_t skip_rows = 0;
+
+  /// Number of rows to skip after the column names are read, if any
+  int32_t skip_rows_after_names = 0;
+
   /// Column names for the target table.
   /// If empty, fall back on autogenerate_column_names.
   std::vector<std::string> column_names;
+
   /// Whether to autogenerate column names if `column_names` is empty.
   /// If true, column names will be of the form "f0", "f1"...
   /// If false, column names will be read from the first CSV row after `skip_rows`.
@@ -135,6 +162,9 @@ struct ARROW_EXPORT ReadOptions {
 
   /// Create read options with default values
   static ReadOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 /// Experimental
@@ -148,8 +178,14 @@ struct ARROW_EXPORT WriteOptions {
   /// This number can impact performance.
   int32_t batch_size = 1024;
 
+  /// \brief IO context for writing.
+  io::IOContext io_context;
+
   /// Create write options with default values
   static WriteOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc
index 07e561276fa..446f36a4ee5 100644
--- a/cpp/src/arrow/csv/parser.cc
+++ b/cpp/src/arrow/csv/parser.cc
@@ -35,14 +35,24 @@ using detail::ParsedValueDesc;
 
 namespace {
 
-Status ParseError(const char* message) {
-  return Status::Invalid("CSV parse error: ", message);
+template <typename... Args>
+Status ParseError(Args&&... args) {
+  return Status::Invalid("CSV parse error: ", std::forward<Args>(args)...);
 }
 
-Status MismatchingColumns(int32_t expected, int32_t actual) {
-  char s[50];
-  snprintf(s, sizeof(s), "Expected %d columns, got %d", expected, actual);
-  return ParseError(s);
+Status MismatchingColumns(int32_t expected, int32_t actual, int64_t row_num,
+                          util::string_view row) {
+  std::string ellipse;
+  if (row.length() > 100) {
+    row = row.substr(0, 96);
+    ellipse = " ...";
+  }
+  if (row_num < 0) {
+    return ParseError("Expected ", expected, " columns, got ", actual, ": ", row,
+                      ellipse);
+  }
+  return ParseError("Row #", row_num, ": Expected ", expected, " columns, got ", actual,
+                    ": ", row, ellipse);
 }
 
 inline bool IsControlChar(uint8_t c) { return c < ' '; }
@@ -173,17 +183,24 @@ class PresizedValueDescWriter : public ValueDescWriter<PresizedValueDescWriter>
 class BlockParserImpl {
  public:
   BlockParserImpl(MemoryPool* pool, ParseOptions options, int32_t num_cols,
-                  int32_t max_num_rows)
-      : pool_(pool), options_(options), max_num_rows_(max_num_rows), batch_(num_cols) {}
+                  int64_t first_row, int32_t max_num_rows)
+      : pool_(pool),
+        options_(options),
+        first_row_(first_row),
+        max_num_rows_(max_num_rows),
+        batch_(num_cols) {}
 
   const DataBatch& parsed_batch() const { return batch_; }
 
+  int64_t first_row_num() const { return first_row_; }
+
   template <typename SpecializedOptions, typename ValueDescWriter, typename DataWriter>
   Status ParseLine(ValueDescWriter* values_writer, DataWriter* parsed_writer,
                    const char* data, const char* data_end, bool is_final,
                    const char** out_data) {
     int32_t num_cols = 0;
     char c;
+    const auto start = data;
 
     DCHECK_GT(data_end, data);
 
@@ -299,7 +316,17 @@ class BlockParserImpl {
       if (batch_.num_cols_ == -1) {
         batch_.num_cols_ = num_cols;
       } else {
-        return MismatchingColumns(batch_.num_cols_, num_cols);
+        // Find the end of the line without newline or carriage return
+        auto end = data;
+        if (*(end - 1) == '\n') {
+          --end;
+        }
+        if (*(end - 1) == '\r') {
+          --end;
+        }
+        return MismatchingColumns(batch_.num_cols_, num_cols,
+                                  first_row_ < 0 ? -1 : first_row_ + batch_.num_rows_,
+                                  util::string_view(start, end - start));
       }
     }
     ++batch_.num_rows_;
@@ -481,6 +508,7 @@ class BlockParserImpl {
  protected:
   MemoryPool* pool_;
   const ParseOptions options_;
+  const int64_t first_row_;
   // The maximum number of rows to parse from a block
   int32_t max_num_rows_;
 
@@ -490,12 +518,14 @@ class BlockParserImpl {
   DataBatch batch_;
 };
 
-BlockParser::BlockParser(ParseOptions options, int32_t num_cols, int32_t max_num_rows)
-    : BlockParser(default_memory_pool(), options, num_cols, max_num_rows) {}
+BlockParser::BlockParser(ParseOptions options, int32_t num_cols, int64_t first_row,
+                         int32_t max_num_rows)
+    : BlockParser(default_memory_pool(), options, num_cols, first_row, max_num_rows) {}
 
 BlockParser::BlockParser(MemoryPool* pool, ParseOptions options, int32_t num_cols,
-                         int32_t max_num_rows)
-    : impl_(new BlockParserImpl(pool, std::move(options), num_cols, max_num_rows)) {}
+                         int64_t first_row, int32_t max_num_rows)
+    : impl_(new BlockParserImpl(pool, std::move(options), num_cols, first_row,
+                                max_num_rows)) {}
 
 BlockParser::~BlockParser() {}
 
@@ -519,6 +549,8 @@ Status BlockParser::ParseFinal(util::string_view data, uint32_t* out_size) {
 
 const DataBatch& BlockParser::parsed_batch() const { return impl_->parsed_batch(); }
 
+int64_t BlockParser::first_row_num() const { return impl_->first_row_num(); }
+
 int32_t SkipRows(const uint8_t* data, uint32_t size, int32_t num_rows,
                  const uint8_t** out_data) {
   const auto end = data + size;
diff --git a/cpp/src/arrow/csv/parser.h b/cpp/src/arrow/csv/parser.h
index 4fcc52fb3a6..ffc735c228f 100644
--- a/cpp/src/arrow/csv/parser.h
+++ b/cpp/src/arrow/csv/parser.h
@@ -63,19 +63,26 @@ class ARROW_EXPORT DataBatch {
   uint32_t num_bytes() const { return parsed_size_; }
 
   template <typename Visitor>
-  Status VisitColumn(int32_t col_index, Visitor&& visit) const {
+  Status VisitColumn(int32_t col_index, int64_t first_row, Visitor&& visit) const {
     using detail::ParsedValueDesc;
 
+    int64_t row = first_row;
     for (size_t buf_index = 0; buf_index < values_buffers_.size(); ++buf_index) {
       const auto& values_buffer = values_buffers_[buf_index];
       const auto values = reinterpret_cast<const ParsedValueDesc*>(values_buffer->data());
       const auto max_pos =
           static_cast<int32_t>(values_buffer->size() / sizeof(ParsedValueDesc)) - 1;
-      for (int32_t pos = col_index; pos < max_pos; pos += num_cols_) {
+      for (int32_t pos = col_index; pos < max_pos; pos += num_cols_, ++row) {
         auto start = values[pos].offset;
         auto stop = values[pos + 1].offset;
         auto quoted = values[pos + 1].quoted;
-        ARROW_RETURN_NOT_OK(visit(parsed_ + start, stop - start, quoted));
+        Status status = visit(parsed_ + start, stop - start, quoted);
+        if (ARROW_PREDICT_FALSE(!status.ok())) {
+          if (first_row >= 0) {
+            status = status.WithMessage("Row #", row, ": ", status.message());
+          }
+          ARROW_RETURN_NOT_OK(status);
+        }
       }
     }
     return Status::OK();
@@ -134,9 +141,9 @@ constexpr int32_t kMaxParserNumRows = 100000;
 class ARROW_EXPORT BlockParser {
  public:
   explicit BlockParser(ParseOptions options, int32_t num_cols = -1,
-                       int32_t max_num_rows = kMaxParserNumRows);
+                       int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
   explicit BlockParser(MemoryPool* pool, ParseOptions options, int32_t num_cols = -1,
-                       int32_t max_num_rows = kMaxParserNumRows);
+                       int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
   ~BlockParser();
 
   /// \brief Parse a block of data
@@ -167,6 +174,8 @@ class ARROW_EXPORT BlockParser {
   int32_t num_cols() const { return parsed_batch().num_cols(); }
   /// \brief Return the total size in bytes of parsed data
   uint32_t num_bytes() const { return parsed_batch().num_bytes(); }
+  /// \brief Return the row number of the first row in the block or -1 if unsupported
+  int64_t first_row_num() const;
 
   /// \brief Visit parsed values in a column
   ///
@@ -174,7 +183,8 @@ class ARROW_EXPORT BlockParser {
   /// Status(const uint8_t* data, uint32_t size, bool quoted)
   template <typename Visitor>
   Status VisitColumn(int32_t col_index, Visitor&& visit) const {
-    return parsed_batch().VisitColumn(col_index, std::forward<Visitor>(visit));
+    return parsed_batch().VisitColumn(col_index, first_row_num(),
+                                      std::forward<Visitor>(visit));
   }
 
   template <typename Visitor>
diff --git a/cpp/src/arrow/csv/parser_test.cc b/cpp/src/arrow/csv/parser_test.cc
index 6414b379804..67cf4226a7a 100644
--- a/cpp/src/arrow/csv/parser_test.cc
+++ b/cpp/src/arrow/csv/parser_test.cc
@@ -20,6 +20,7 @@
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include "arrow/csv/options.h"
@@ -295,7 +296,7 @@ TEST(BlockParser, Newlines) {
 
 TEST(BlockParser, MaxNumRows) {
   auto csv = MakeCSVData({"a\n", "b\n", "c\n", "d\n"});
-  BlockParser parser(ParseOptions::Defaults(), -1, 3 /* max_num_rows */);
+  BlockParser parser(ParseOptions::Defaults(), -1, 0, 3 /* max_num_rows */);
 
   AssertParsePartial(parser, csv, 6);
   AssertColumnsEq(parser, {{"a", "b", "c"}});
@@ -536,22 +537,37 @@ TEST(BlockParser, QuotesSpecial) {
 TEST(BlockParser, MismatchingNumColumns) {
   uint32_t out_size;
   {
-    BlockParser parser(ParseOptions::Defaults());
+    BlockParser parser(ParseOptions::Defaults(), -1, 0 /* first_row */);
     auto csv = MakeCSVData({"a,b\nc\n"});
     Status st = Parse(parser, csv, &out_size);
-    ASSERT_RAISES(Invalid, st);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        testing::HasSubstr("CSV parse error: Row #1: Expected 2 columns, got 1: c"), st);
   }
   {
-    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */);
+    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */, 0 /* first_row */);
     auto csv = MakeCSVData({"a\n"});
     Status st = Parse(parser, csv, &out_size);
-    ASSERT_RAISES(Invalid, st);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        testing::HasSubstr("CSV parse error: Row #0: Expected 2 columns, got 1: a"), st);
   }
   {
-    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */);
+    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */, 50 /* first_row */);
     auto csv = MakeCSVData({"a,b,c\n"});
     Status st = Parse(parser, csv, &out_size);
-    ASSERT_RAISES(Invalid, st);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        testing::HasSubstr("CSV parse error: Row #50: Expected 2 columns, got 3: a,b,c"),
+        st);
+  }
+  // No row number
+  {
+    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */, -1);
+    auto csv = MakeCSVData({"a\n"});
+    Status st = Parse(parser, csv, &out_size);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, testing::HasSubstr("CSV parse error: Expected 2 columns, got 1: a"), st);
   }
 }
 
@@ -623,5 +639,46 @@ TEST(BlockParser, QuotedEscape) {
   }
 }
 
+TEST(BlockParser, RowNumberAppendedToError) {
+  auto options = ParseOptions::Defaults();
+  auto csv = "a,b,c\nd,e,f\ng,h,i\n";
+  {
+    BlockParser parser(options, -1, 0);
+    ASSERT_NO_FATAL_FAILURE(AssertParseOk(parser, csv));
+    int row = 0;
+    auto status = parser.VisitColumn(
+        0, [row](const uint8_t* data, uint32_t size, bool quoted) mutable -> Status {
+          return ++row == 2 ? Status::Invalid("Bad value") : Status::OK();
+        });
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Row #1: Bad value"),
+                                    status);
+  }
+
+  {
+    BlockParser parser(options, -1, 100);
+    ASSERT_NO_FATAL_FAILURE(AssertParseOk(parser, csv));
+    int row = 0;
+    auto status = parser.VisitColumn(
+        0, [row](const uint8_t* data, uint32_t size, bool quoted) mutable -> Status {
+          return ++row == 3 ? Status::Invalid("Bad value") : Status::OK();
+        });
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Row #102: Bad value"),
+                                    status);
+  }
+
+  // No first row specified should not append row information
+  {
+    BlockParser parser(options, -1, -1);
+    ASSERT_NO_FATAL_FAILURE(AssertParseOk(parser, csv));
+    int row = 0;
+    auto status = parser.VisitColumn(
+        0, [row](const uint8_t* data, uint32_t size, bool quoted) mutable -> Status {
+          return ++row == 3 ? Status::Invalid("Bad value") : Status::OK();
+        });
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::Not(testing::HasSubstr("Row")),
+                                    status);
+  }
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index c4352360e6b..1a7836561da 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -50,6 +50,7 @@
 #include "arrow/util/task_group.h"
 #include "arrow/util/thread_pool.h"
 #include "arrow/util/utf8.h"
+#include "arrow/util/vector.h"
 
 namespace arrow {
 namespace csv {
@@ -145,6 +146,7 @@ struct CSVBlock {
   std::shared_ptr<Buffer> buffer;
   int64_t block_index;
   bool is_final;
+  int64_t bytes_skipped;
   std::function<Status(int64_t)> consume_bytes;
 };
 
@@ -153,7 +155,7 @@ struct CSVBlock {
 
 template <>
 struct IterationTraits<csv::CSVBlock> {
-  static csv::CSVBlock End() { return csv::CSVBlock{{}, {}, {}, -1, true, {}}; }
+  static csv::CSVBlock End() { return csv::CSVBlock{{}, {}, {}, -1, true, 0, {}}; }
   static bool IsEnd(const csv::CSVBlock& val) { return val.block_index < 0; }
 };
 
@@ -166,14 +168,17 @@ namespace {
 // iterator APIs (e.g. Visit)) even though an empty optional is never used in this code.
 class BlockReader {
  public:
-  BlockReader(std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer)
+  BlockReader(std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer,
+              int64_t skip_rows)
       : chunker_(std::move(chunker)),
         partial_(std::make_shared<Buffer>("")),
-        buffer_(std::move(first_buffer)) {}
+        buffer_(std::move(first_buffer)),
+        skip_rows_(skip_rows) {}
 
  protected:
   std::unique_ptr<Chunker> chunker_;
   std::shared_ptr<Buffer> partial_, buffer_;
+  int64_t skip_rows_;
   int64_t block_index_ = 0;
   // Whether there was a trailing CR at the end of last received buffer
   bool trailing_cr_ = false;
@@ -188,9 +193,9 @@ class SerialBlockReader : public BlockReader {
 
   static Iterator<CSVBlock> MakeIterator(
       Iterator<std::shared_ptr<Buffer>> buffer_iterator, std::unique_ptr<Chunker> chunker,
-      std::shared_ptr<Buffer> first_buffer) {
+      std::shared_ptr<Buffer> first_buffer, int64_t skip_rows) {
     auto block_reader =
-        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer);
+        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer, skip_rows);
     // Wrap shared pointer in callable
     Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
         [block_reader](std::shared_ptr<Buffer> buf) {
@@ -201,9 +206,10 @@ class SerialBlockReader : public BlockReader {
 
   static AsyncGenerator<CSVBlock> MakeAsyncIterator(
       AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
-      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer) {
+      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer,
+      int64_t skip_rows) {
     auto block_reader =
-        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer);
+        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer, skip_rows);
     // Wrap shared pointer in callable
     Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
         [block_reader](std::shared_ptr<Buffer> next) {
@@ -217,8 +223,28 @@ class SerialBlockReader : public BlockReader {
       return TransformFinish();
     }
 
-    std::shared_ptr<Buffer> completion;
     bool is_final = (next_buffer == nullptr);
+    int64_t bytes_skipped = 0;
+
+    if (skip_rows_) {
+      bytes_skipped += partial_->size();
+      auto orig_size = buffer_->size();
+      RETURN_NOT_OK(
+          chunker_->ProcessSkip(partial_, buffer_, is_final, &skip_rows_, &buffer_));
+      bytes_skipped += orig_size - buffer_->size();
+      auto empty = std::make_shared<Buffer>(nullptr, 0);
+      if (skip_rows_) {
+        // Still have rows beyond this buffer to skip return empty block
+        partial_ = std::move(buffer_);
+        buffer_ = next_buffer;
+        return TransformYield<CSVBlock>(CSVBlock{empty, empty, empty, block_index_++,
+                                                 is_final, bytes_skipped,
+                                                 [](int64_t) { return Status::OK(); }});
+      }
+      partial_ = std::move(empty);
+    }
+
+    std::shared_ptr<Buffer> completion;
 
     if (is_final) {
       // End of file reached => compute completion from penultimate block
@@ -244,7 +270,7 @@ class SerialBlockReader : public BlockReader {
     };
 
     return TransformYield<CSVBlock>(CSVBlock{partial_, completion, buffer_,
-                                             block_index_++, is_final,
+                                             block_index_++, is_final, bytes_skipped,
                                              std::move(consume_bytes)});
   }
 };
@@ -254,22 +280,12 @@ class ThreadedBlockReader : public BlockReader {
  public:
   using BlockReader::BlockReader;
 
-  static Iterator<CSVBlock> MakeIterator(
-      Iterator<std::shared_ptr<Buffer>> buffer_iterator, std::unique_ptr<Chunker> chunker,
-      std::shared_ptr<Buffer> first_buffer) {
-    auto block_reader =
-        std::make_shared<ThreadedBlockReader>(std::move(chunker), first_buffer);
-    // Wrap shared pointer in callable
-    Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
-        [block_reader](std::shared_ptr<Buffer> next) { return (*block_reader)(next); };
-    return MakeTransformedIterator(std::move(buffer_iterator), block_reader_fn);
-  }
-
   static AsyncGenerator<CSVBlock> MakeAsyncIterator(
       AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
-      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer) {
-    auto block_reader =
-        std::make_shared<ThreadedBlockReader>(std::move(chunker), first_buffer);
+      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer,
+      int64_t skip_rows) {
+    auto block_reader = std::make_shared<ThreadedBlockReader>(std::move(chunker),
+                                                              first_buffer, skip_rows);
     // Wrap shared pointer in callable
     Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
         [block_reader](std::shared_ptr<Buffer> next) { return (*block_reader)(next); };
@@ -282,11 +298,33 @@ class ThreadedBlockReader : public BlockReader {
       return TransformFinish();
     }
 
-    std::shared_ptr<Buffer> whole, completion, next_partial;
     bool is_final = (next_buffer == nullptr);
 
     auto current_partial = std::move(partial_);
     auto current_buffer = std::move(buffer_);
+    int64_t bytes_skipped = 0;
+
+    if (skip_rows_) {
+      auto orig_size = current_buffer->size();
+      bytes_skipped = current_partial->size();
+      RETURN_NOT_OK(chunker_->ProcessSkip(current_partial, current_buffer, is_final,
+                                          &skip_rows_, &current_buffer));
+      bytes_skipped += orig_size - current_buffer->size();
+      current_partial = std::make_shared<Buffer>(nullptr, 0);
+      if (skip_rows_) {
+        partial_ = std::move(current_buffer);
+        buffer_ = std::move(next_buffer);
+        return TransformYield<CSVBlock>(CSVBlock{current_partial,
+                                                 current_partial,
+                                                 current_partial,
+                                                 block_index_++,
+                                                 is_final,
+                                                 bytes_skipped,
+                                                 {}});
+      }
+    }
+
+    std::shared_ptr<Buffer> whole, completion, next_partial;
 
     if (is_final) {
       // End of file reached => compute completion from penultimate block
@@ -307,11 +345,192 @@ class ThreadedBlockReader : public BlockReader {
     partial_ = std::move(next_partial);
     buffer_ = std::move(next_buffer);
 
-    return TransformYield<CSVBlock>(
-        CSVBlock{current_partial, completion, whole, block_index_++, is_final, {}});
+    return TransformYield<CSVBlock>(CSVBlock{
+        current_partial, completion, whole, block_index_++, is_final, bytes_skipped, {}});
   }
 };
 
+struct ParsedBlock {
+  std::shared_ptr<BlockParser> parser;
+  int64_t block_index;
+  int64_t bytes_parsed_or_skipped;
+};
+
+struct DecodedBlock {
+  std::shared_ptr<RecordBatch> record_batch;
+  // Represents the number of input bytes represented by this batch
+  // This will include bytes skipped when skipping rows after the header
+  int64_t bytes_processed;
+};
+
+}  // namespace
+
+}  // namespace csv
+
+template <>
+struct IterationTraits<csv::ParsedBlock> {
+  static csv::ParsedBlock End() { return csv::ParsedBlock{nullptr, -1, -1}; }
+  static bool IsEnd(const csv::ParsedBlock& val) { return val.block_index < 0; }
+};
+
+template <>
+struct IterationTraits<csv::DecodedBlock> {
+  static csv::DecodedBlock End() { return csv::DecodedBlock{nullptr, -1}; }
+  static bool IsEnd(const csv::DecodedBlock& val) { return val.bytes_processed < 0; }
+};
+
+namespace csv {
+namespace {
+
+// A function object that takes in a buffer of CSV data and returns a parsed batch of CSV
+// data (CSVBlock -> ParsedBlock) for use with MakeMappedGenerator.
+// The parsed batch contains a list of offsets for each of the columns so that columns
+// can be individually scanned
+//
+// This operator is not re-entrant
+class BlockParsingOperator {
+ public:
+  BlockParsingOperator(io::IOContext io_context, ParseOptions parse_options,
+                       int num_csv_cols, int64_t first_row)
+      : io_context_(io_context),
+        parse_options_(parse_options),
+        num_csv_cols_(num_csv_cols),
+        count_rows_(first_row >= 0),
+        num_rows_seen_(first_row) {}
+
+  Result<ParsedBlock> operator()(const CSVBlock& block) {
+    constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
+    auto parser = std::make_shared<BlockParser>(
+        io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows);
+
+    std::shared_ptr<Buffer> straddling;
+    std::vector<util::string_view> views;
+    if (block.partial->size() != 0 || block.completion->size() != 0) {
+      if (block.partial->size() == 0) {
+        straddling = block.completion;
+      } else if (block.completion->size() == 0) {
+        straddling = block.partial;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(
+            straddling,
+            ConcatenateBuffers({block.partial, block.completion}, io_context_.pool()));
+      }
+      views = {util::string_view(*straddling), util::string_view(*block.buffer)};
+    } else {
+      views = {util::string_view(*block.buffer)};
+    }
+    uint32_t parsed_size;
+    if (block.is_final) {
+      RETURN_NOT_OK(parser->ParseFinal(views, &parsed_size));
+    } else {
+      RETURN_NOT_OK(parser->Parse(views, &parsed_size));
+    }
+    if (count_rows_) {
+      num_rows_seen_ += parser->num_rows();
+    }
+    RETURN_NOT_OK(block.consume_bytes(parsed_size));
+    return ParsedBlock{std::move(parser), block.block_index,
+                       static_cast<int64_t>(parsed_size) + block.bytes_skipped};
+  }
+
+ private:
+  io::IOContext io_context_;
+  ParseOptions parse_options_;
+  int num_csv_cols_;
+  bool count_rows_;
+  int64_t num_rows_seen_;
+};
+
+// A function object that takes in parsed batch of CSV data and decodes it to an arrow
+// record batch (ParsedBlock -> DecodedBlock) for use with MakeMappedGenerator.
+class BlockDecodingOperator {
+ public:
+  Future<DecodedBlock> operator()(const ParsedBlock& block) {
+    DCHECK(!state_->column_decoders.empty());
+    std::vector<Future<std::shared_ptr<Array>>> decoded_array_futs;
+    for (auto& decoder : state_->column_decoders) {
+      decoded_array_futs.push_back(decoder->Decode(block.parser));
+    }
+    auto bytes_parsed_or_skipped = block.bytes_parsed_or_skipped;
+    auto decoded_arrays_fut = All(std::move(decoded_array_futs));
+    auto state = state_;
+    return decoded_arrays_fut.Then(
+        [state, bytes_parsed_or_skipped](
+            const std::vector<Result<std::shared_ptr<Array>>>& maybe_decoded_arrays)
+            -> Result<DecodedBlock> {
+          ARROW_ASSIGN_OR_RAISE(auto decoded_arrays,
+                                internal::UnwrapOrRaise(maybe_decoded_arrays));
+
+          ARROW_ASSIGN_OR_RAISE(auto batch,
+                                state->DecodedArraysToBatch(std::move(decoded_arrays)));
+          return DecodedBlock{std::move(batch), bytes_parsed_or_skipped};
+        });
+  }
+
+  static Result<BlockDecodingOperator> Make(io::IOContext io_context,
+                                            ConvertOptions convert_options,
+                                            ConversionSchema conversion_schema) {
+    BlockDecodingOperator op(std::move(io_context), std::move(convert_options),
+                             std::move(conversion_schema));
+    RETURN_NOT_OK(op.state_->MakeColumnDecoders(io_context));
+    return op;
+  }
+
+ private:
+  BlockDecodingOperator(io::IOContext io_context, ConvertOptions convert_options,
+                        ConversionSchema conversion_schema)
+      : state_(std::make_shared<State>(std::move(io_context), std::move(convert_options),
+                                       std::move(conversion_schema))) {}
+
+  struct State {
+    State(io::IOContext io_context, ConvertOptions convert_options,
+          ConversionSchema conversion_schema)
+        : convert_options(std::move(convert_options)),
+          conversion_schema(std::move(conversion_schema)) {}
+
+    Result<std::shared_ptr<RecordBatch>> DecodedArraysToBatch(
+        std::vector<std::shared_ptr<Array>> arrays) {
+      if (schema == nullptr) {
+        FieldVector fields(arrays.size());
+        for (size_t i = 0; i < arrays.size(); ++i) {
+          fields[i] = field(conversion_schema.columns[i].name, arrays[i]->type());
+        }
+        schema = arrow::schema(std::move(fields));
+      }
+      const auto n_rows = arrays[0]->length();
+      return RecordBatch::Make(schema, n_rows, std::move(arrays));
+    }
+
+    // Make column decoders from conversion schema
+    Status MakeColumnDecoders(io::IOContext io_context) {
+      for (const auto& column : conversion_schema.columns) {
+        std::shared_ptr<ColumnDecoder> decoder;
+        if (column.is_missing) {
+          ARROW_ASSIGN_OR_RAISE(decoder,
+                                ColumnDecoder::MakeNull(io_context.pool(), column.type));
+        } else if (column.type != nullptr) {
+          ARROW_ASSIGN_OR_RAISE(
+              decoder, ColumnDecoder::Make(io_context.pool(), column.type, column.index,
+                                           convert_options));
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              decoder,
+              ColumnDecoder::Make(io_context.pool(), column.index, convert_options));
+        }
+        column_decoders.push_back(std::move(decoder));
+      }
+      return Status::OK();
+    }
+
+    ConvertOptions convert_options;
+    ConversionSchema conversion_schema;
+    std::vector<std::shared_ptr<ColumnDecoder>> column_decoders;
+    std::shared_ptr<Schema> schema;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
 /////////////////////////////////////////////////////////////////////////
 // Base class for common functionality
 
@@ -319,17 +538,20 @@ class ReaderMixin {
  public:
   ReaderMixin(io::IOContext io_context, std::shared_ptr<io::InputStream> input,
               const ReadOptions& read_options, const ParseOptions& parse_options,
-              const ConvertOptions& convert_options)
+              const ConvertOptions& convert_options, bool count_rows)
       : io_context_(std::move(io_context)),
         read_options_(read_options),
         parse_options_(parse_options),
         convert_options_(convert_options),
+        count_rows_(count_rows),
+        num_rows_seen_(count_rows_ ? 1 : -1),
         input_(std::move(input)) {}
 
  protected:
   // Read header and column names from buffer, create column builders
-  Status ProcessHeader(const std::shared_ptr<Buffer>& buf,
-                       std::shared_ptr<Buffer>* rest) {
+  // Returns the # of bytes consumed
+  Result<int64_t> ProcessHeader(const std::shared_ptr<Buffer>& buf,
+                                std::shared_ptr<Buffer>* rest) {
     const uint8_t* data = buf->data();
     const auto data_end = data + buf->size();
     DCHECK_GT(data_end - data, 0);
@@ -344,11 +566,15 @@ class ReaderMixin {
             " rows from CSV file, "
             "either file is too short or header is larger than block size");
       }
+      if (count_rows_) {
+        num_rows_seen_ += num_skipped_rows;
+      }
     }
 
     if (read_options_.column_names.empty()) {
       // Parse one row (either to read column names or to know the number of columns)
-      BlockParser parser(io_context_.pool(), parse_options_, num_csv_cols_, 1);
+      BlockParser parser(io_context_.pool(), parse_options_, num_csv_cols_,
+                         num_rows_seen_, 1);
       uint32_t parsed_size = 0;
       RETURN_NOT_OK(parser.Parse(
           util::string_view(reinterpret_cast<const char*>(data), data_end - data),
@@ -374,16 +600,27 @@ class ReaderMixin {
         DCHECK_EQ(static_cast<size_t>(parser.num_cols()), column_names_.size());
         // Skip parsed header row
         data += parsed_size;
+        if (count_rows_) {
+          ++num_rows_seen_;
+        }
       }
     } else {
       column_names_ = read_options_.column_names;
     }
-    *rest = SliceBuffer(buf, data - buf->data());
+
+    if (count_rows_) {
+      // increase rows seen to skip past rows which will be skipped
+      num_rows_seen_ += read_options_.skip_rows_after_names;
+    }
+
+    auto bytes_consumed = data - buf->data();
+    *rest = SliceBuffer(buf, bytes_consumed);
 
     num_csv_cols_ = static_cast<int32_t>(column_names_.size());
     DCHECK_GT(num_csv_cols_, 0);
 
-    return MakeConversionSchema();
+    RETURN_NOT_OK(MakeConversionSchema());
+    return bytes_consumed;
   }
 
   std::vector<std::string> GenerateColumnNames(int32_t num_cols) {
@@ -466,8 +703,8 @@ class ReaderMixin {
                             const std::shared_ptr<Buffer>& block, int64_t block_index,
                             bool is_final) {
     static constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
-    auto parser = std::make_shared<BlockParser>(io_context_.pool(), parse_options_,
-                                                num_csv_cols_, max_num_rows);
+    auto parser = std::make_shared<BlockParser>(
+        io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows);
 
     std::shared_ptr<Buffer> straddling;
     std::vector<util::string_view> views;
@@ -490,6 +727,9 @@ class ReaderMixin {
     } else {
       RETURN_NOT_OK(parser->Parse(views, &parsed_size));
     }
+    if (count_rows_) {
+      num_rows_seen_ += parser->num_rows();
+    }
     return ParseResult{std::move(parser), static_cast<int64_t>(parsed_size)};
   }
 
@@ -500,6 +740,10 @@ class ReaderMixin {
 
   // Number of columns in the CSV file
   int32_t num_csv_cols_ = -1;
+  // Whether num_rows_seen_ tracks the number of rows seen in the CSV being parsed
+  bool count_rows_;
+  // Number of rows seen in the csv. Not used if count_rows is false
+  int64_t num_rows_seen_;
   // Column names in the CSV file
   std::vector<std::string> column_names_;
   ConversionSchema conversion_schema_;
@@ -573,7 +817,7 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
       fields.push_back(::arrow::field(column.name, array->type()));
       columns.emplace_back(std::move(array));
     }
-    return Table::Make(schema(fields), columns);
+    return Table::Make(schema(std::move(fields)), std::move(columns));
   }
 
   // Column builders for target Table (in ConversionSchema order)
@@ -583,244 +827,122 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
 /////////////////////////////////////////////////////////////////////////
 // Base class for streaming readers
 
-class BaseStreamingReader : public ReaderMixin, public csv::StreamingReader {
+class StreamingReaderImpl : public ReaderMixin,
+                            public csv::StreamingReader,
+                            public std::enable_shared_from_this<StreamingReaderImpl> {
  public:
-  BaseStreamingReader(io::IOContext io_context, Executor* cpu_executor,
-                      std::shared_ptr<io::InputStream> input,
+  StreamingReaderImpl(io::IOContext io_context, std::shared_ptr<io::InputStream> input,
                       const ReadOptions& read_options, const ParseOptions& parse_options,
-                      const ConvertOptions& convert_options)
+                      const ConvertOptions& convert_options, bool count_rows)
       : ReaderMixin(io_context, std::move(input), read_options, parse_options,
-                    convert_options),
-        cpu_executor_(cpu_executor) {}
+                    convert_options, count_rows),
+        bytes_decoded_(std::make_shared<std::atomic<int64_t>>(0)) {}
+
+  Future<> Init(Executor* cpu_executor) {
+    ARROW_ASSIGN_OR_RAISE(auto istream_it,
+                          io::MakeInputStreamIterator(input_, read_options_.block_size));
 
-  virtual Future<std::shared_ptr<csv::StreamingReader>> Init() = 0;
+    // TODO Consider exposing readahead as a read option (ARROW-12090)
+    ARROW_ASSIGN_OR_RAISE(auto bg_it, MakeBackgroundGenerator(std::move(istream_it),
+                                                              io_context_.executor()));
+
+    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor);
+
+    auto buffer_generator = CSVBufferIterator::MakeAsync(std::move(transferred_it));
+
+    int max_readahead = cpu_executor->GetCapacity();
+    auto self = shared_from_this();
+
+    return buffer_generator().Then([self, buffer_generator, max_readahead](
+                                       const std::shared_ptr<Buffer>& first_buffer) {
+      return self->InitAfterFirstBuffer(first_buffer, buffer_generator, max_readahead);
+    });
+  }
 
   std::shared_ptr<Schema> schema() const override { return schema_; }
 
+  int64_t bytes_read() const override { return bytes_decoded_->load(); }
+
   Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
     auto next_fut = ReadNextAsync();
     auto next_result = next_fut.result();
     return std::move(next_result).Value(batch);
   }
 
- protected:
-  // Make column decoders from conversion schema
-  Status MakeColumnDecoders() {
-    for (const auto& column : conversion_schema_.columns) {
-      std::shared_ptr<ColumnDecoder> decoder;
-      if (column.is_missing) {
-        ARROW_ASSIGN_OR_RAISE(decoder, ColumnDecoder::MakeNull(io_context_.pool(),
-                                                               column.type, task_group_));
-      } else if (column.type != nullptr) {
-        ARROW_ASSIGN_OR_RAISE(
-            decoder, ColumnDecoder::Make(io_context_.pool(), column.type, column.index,
-                                         convert_options_, task_group_));
-      } else {
-        ARROW_ASSIGN_OR_RAISE(decoder,
-                              ColumnDecoder::Make(io_context_.pool(), column.index,
-                                                  convert_options_, task_group_));
-      }
-      column_decoders_.push_back(std::move(decoder));
-    }
-    return Status::OK();
-  }
-
-  Result<int64_t> ParseAndInsert(const std::shared_ptr<Buffer>& partial,
-                                 const std::shared_ptr<Buffer>& completion,
-                                 const std::shared_ptr<Buffer>& block,
-                                 int64_t block_index, bool is_final) {
-    ARROW_ASSIGN_OR_RAISE(auto result,
-                          Parse(partial, completion, block, block_index, is_final));
-    RETURN_NOT_OK(ProcessData(result.parser, block_index));
-    return result.parsed_bytes;
-  }
-
-  // Trigger conversion of parsed block data
-  Status ProcessData(const std::shared_ptr<BlockParser>& parser, int64_t block_index) {
-    for (auto& decoder : column_decoders_) {
-      decoder->Insert(block_index, parser);
-    }
-    return Status::OK();
+  Future<std::shared_ptr<RecordBatch>> ReadNextAsync() override {
+    return record_batch_gen_();
   }
 
-  Result<std::shared_ptr<RecordBatch>> DecodeNextBatch() {
-    DCHECK(!column_decoders_.empty());
-    ArrayVector arrays;
-    arrays.reserve(column_decoders_.size());
-    Status st;
-    for (auto& decoder : column_decoders_) {
-      auto maybe_array = decoder->NextChunk();
-      if (!maybe_array.ok()) {
-        // If there's an error, still fetch results from other decoders to
-        // keep them in sync.
-        st &= maybe_array.status();
-      } else {
-        arrays.push_back(*std::move(maybe_array));
-      }
-    }
-    RETURN_NOT_OK(st);
-    DCHECK_EQ(arrays.size(), column_decoders_.size());
-    const bool is_null = (arrays[0] == nullptr);
-#ifndef NDEBUG
-    for (const auto& array : arrays) {
-      DCHECK_EQ(array == nullptr, is_null);
-    }
-#endif
-    if (is_null) {
-      eof_ = true;
-      return nullptr;
-    }
-
-    if (schema_ == nullptr) {
-      FieldVector fields(arrays.size());
-      for (size_t i = 0; i < arrays.size(); ++i) {
-        fields[i] = field(conversion_schema_.columns[i].name, arrays[i]->type());
-      }
-      schema_ = arrow::schema(std::move(fields));
+ protected:
+  Future<> InitAfterFirstBuffer(const std::shared_ptr<Buffer>& first_buffer,
+                                AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
+                                int max_readahead) {
+    if (first_buffer == nullptr) {
+      return Status::Invalid("Empty CSV file");
     }
-    const auto n_rows = arrays[0]->length();
-    return RecordBatch::Make(schema_, n_rows, std::move(arrays));
-  }
-
-  // Column decoders (in ConversionSchema order)
-  std::vector<std::shared_ptr<ColumnDecoder>> column_decoders_;
-  std::shared_ptr<Schema> schema_;
-  std::shared_ptr<RecordBatch> pending_batch_;
-  AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator_;
-  Executor* cpu_executor_;
-  bool eof_ = false;
-};
-
-/////////////////////////////////////////////////////////////////////////
-// Serial StreamingReader implementation
 
-class SerialStreamingReader : public BaseStreamingReader,
-                              public std::enable_shared_from_this<SerialStreamingReader> {
- public:
-  using BaseStreamingReader::BaseStreamingReader;
-
-  Future<std::shared_ptr<csv::StreamingReader>> Init() override {
-    ARROW_ASSIGN_OR_RAISE(auto istream_it,
-                          io::MakeInputStreamIterator(input_, read_options_.block_size));
-
-    // TODO Consider exposing readahead as a read option (ARROW-12090)
-    ARROW_ASSIGN_OR_RAISE(auto bg_it, MakeBackgroundGenerator(std::move(istream_it),
-                                                              io_context_.executor()));
+    std::shared_ptr<Buffer> after_header;
+    ARROW_ASSIGN_OR_RAISE(auto header_bytes_consumed,
+                          ProcessHeader(first_buffer, &after_header));
+    bytes_decoded_->fetch_add(header_bytes_consumed);
 
-    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor_);
+    auto parser_op =
+        BlockParsingOperator(io_context_, parse_options_, num_csv_cols_, num_rows_seen_);
+    ARROW_ASSIGN_OR_RAISE(
+        auto decoder_op,
+        BlockDecodingOperator::Make(io_context_, convert_options_, conversion_schema_));
 
-    buffer_generator_ = CSVBufferIterator::MakeAsync(std::move(transferred_it));
-    task_group_ = internal::TaskGroup::MakeSerial(io_context_.stop_token());
+    auto block_gen = SerialBlockReader::MakeAsyncIterator(
+        std::move(buffer_generator), MakeChunker(parse_options_), std::move(after_header),
+        read_options_.skip_rows_after_names);
+    auto parsed_block_gen =
+        MakeMappedGenerator(std::move(block_gen), std::move(parser_op));
+    auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op));
 
     auto self = shared_from_this();
-    // Read schema from first batch
-    return ReadNextAsync().Then([self](const std::shared_ptr<RecordBatch>& first_batch)
-                                    -> Result<std::shared_ptr<csv::StreamingReader>> {
-      self->pending_batch_ = first_batch;
-      DCHECK_NE(self->schema_, nullptr);
-      return self;
+    return rb_gen().Then([self, rb_gen, max_readahead](const DecodedBlock& first_block) {
+      return self->InitAfterFirstBatch(first_block, std::move(rb_gen), max_readahead);
     });
   }
 
-  Result<std::shared_ptr<RecordBatch>> DecodeBatchAndUpdateSchema() {
-    auto maybe_batch = DecodeNextBatch();
-    if (schema_ == nullptr && maybe_batch.ok()) {
-      schema_ = (*maybe_batch)->schema();
-    }
-    return maybe_batch;
-  }
+  Status InitAfterFirstBatch(const DecodedBlock& first_block,
+                             AsyncGenerator<DecodedBlock> batch_gen, int max_readahead) {
+    schema_ = first_block.record_batch->schema();
 
-  Future<std::shared_ptr<RecordBatch>> DoReadNext(
-      std::shared_ptr<SerialStreamingReader> self) {
-    auto batch = std::move(pending_batch_);
-    if (batch != nullptr) {
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
-    }
-
-    if (!source_eof_) {
-      return block_generator_()
-          .Then([self](const CSVBlock& maybe_block) -> Status {
-            if (!IsIterationEnd(maybe_block)) {
-              self->last_block_index_ = maybe_block.block_index;
-              auto maybe_parsed = self->ParseAndInsert(
-                  maybe_block.partial, maybe_block.completion, maybe_block.buffer,
-                  maybe_block.block_index, maybe_block.is_final);
-              if (!maybe_parsed.ok()) {
-                // Parse error => bail out
-                self->eof_ = true;
-                return maybe_parsed.status();
-              }
-              RETURN_NOT_OK(maybe_block.consume_bytes(*maybe_parsed));
-            } else {
-              self->source_eof_ = true;
-              for (auto& decoder : self->column_decoders_) {
-                decoder->SetEOF(self->last_block_index_ + 1);
-              }
-            }
-            return Status::OK();
-          })
-          .Then([self](const ::arrow::detail::Empty& st)
-                    -> Result<std::shared_ptr<RecordBatch>> {
-            return self->DecodeBatchAndUpdateSchema();
-          });
+    AsyncGenerator<DecodedBlock> readahead_gen;
+    if (read_options_.use_threads) {
+      readahead_gen = MakeReadaheadGenerator(std::move(batch_gen), max_readahead);
+    } else {
+      readahead_gen = std::move(batch_gen);
     }
-    return Future<std::shared_ptr<RecordBatch>>::MakeFinished(
-        DecodeBatchAndUpdateSchema());
-  }
-
-  Future<std::shared_ptr<RecordBatch>> ReadNextSkippingEmpty(
-      std::shared_ptr<SerialStreamingReader> self) {
-    return DoReadNext(self).Then([self](const std::shared_ptr<RecordBatch>& batch) {
-      if (batch != nullptr && batch->num_rows() == 0) {
-        return self->ReadNextSkippingEmpty(self);
-      }
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
-    });
-  }
 
-  Future<std::shared_ptr<RecordBatch>> ReadNextAsync() override {
-    if (eof_) {
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(nullptr);
-    }
-    if (io_context_.stop_token().IsStopRequested()) {
-      eof_ = true;
-      return io_context_.stop_token().Poll();
-    }
-    auto self = shared_from_this();
-    if (!block_generator_) {
-      return SetupReader(self).Then([self](const Result<::arrow::detail::Empty>& res)
-                                        -> Future<std::shared_ptr<RecordBatch>> {
-        if (!res.ok()) {
-          self->eof_ = true;
-          return res.status();
-        }
-        return self->ReadNextSkippingEmpty(self);
-      });
+    AsyncGenerator<DecodedBlock> restarted_gen;
+    // Streaming reader should not emit empty record batches
+    if (first_block.record_batch->num_rows() > 0) {
+      restarted_gen = MakeGeneratorStartsWith({first_block}, std::move(readahead_gen));
     } else {
-      return self->ReadNextSkippingEmpty(self);
+      restarted_gen = std::move(readahead_gen);
     }
-  };
 
- protected:
-  Future<> SetupReader(std::shared_ptr<SerialStreamingReader> self) {
-    return buffer_generator_().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
-      if (first_buffer == nullptr) {
-        return Status::Invalid("Empty CSV file");
-      }
-      auto own_first_buffer = first_buffer;
-      RETURN_NOT_OK(self->ProcessHeader(own_first_buffer, &own_first_buffer));
-      RETURN_NOT_OK(self->MakeColumnDecoders());
+    auto bytes_decoded = bytes_decoded_;
+    auto unwrap_and_record_bytes =
+        [bytes_decoded](
+            const DecodedBlock& block) -> Result<std::shared_ptr<RecordBatch>> {
+      bytes_decoded->fetch_add(block.bytes_processed);
+      return block.record_batch;
+    };
 
-      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
-          std::move(self->buffer_generator_), MakeChunker(self->parse_options_),
-          std::move(own_first_buffer));
-      return Status::OK();
-    });
+    auto unwrapped =
+        MakeMappedGenerator(std::move(restarted_gen), std::move(unwrap_and_record_bytes));
+
+    record_batch_gen_ = MakeCancellable(std::move(unwrapped), io_context_.stop_token());
+    return Status::OK();
   }
 
-  bool source_eof_ = false;
-  int64_t last_block_index_ = 0;
-  AsyncGenerator<CSVBlock> block_generator_;
+  std::shared_ptr<Schema> schema_;
+  AsyncGenerator<std::shared_ptr<RecordBatch>> record_batch_gen_;
+  // bytes which have been decoded and asked for by the caller
+  std::shared_ptr<std::atomic<int64_t>> bytes_decoded_;
 };
 
 /////////////////////////////////////////////////////////////////////////
@@ -853,9 +975,9 @@ class SerialTableReader : public BaseTableReader {
     RETURN_NOT_OK(ProcessHeader(first_buffer, &first_buffer));
     RETURN_NOT_OK(MakeColumnBuilders());
 
-    auto block_iterator = SerialBlockReader::MakeIterator(std::move(buffer_iterator_),
-                                                          MakeChunker(parse_options_),
-                                                          std::move(first_buffer));
+    auto block_iterator = SerialBlockReader::MakeIterator(
+        std::move(buffer_iterator_), MakeChunker(parse_options_), std::move(first_buffer),
+        read_options_.skip_rows_after_names);
     while (true) {
       RETURN_NOT_OK(io_context_.stop_token().Poll());
 
@@ -890,8 +1012,9 @@ class AsyncThreadedTableReader
                            const ReadOptions& read_options,
                            const ParseOptions& parse_options,
                            const ConvertOptions& convert_options, Executor* cpu_executor)
+      // Count rows is currently not supported during parallel read
       : BaseTableReader(std::move(io_context), input, read_options, parse_options,
-                        convert_options),
+                        convert_options, /*count_rows=*/false),
         cpu_executor_(cpu_executor) {}
 
   ~AsyncThreadedTableReader() override {
@@ -925,10 +1048,10 @@ class AsyncThreadedTableReader
         internal::TaskGroup::MakeThreaded(cpu_executor_, io_context_.stop_token());
 
     auto self = shared_from_this();
-    return ProcessFirstBuffer().Then([self](std::shared_ptr<Buffer> first_buffer) {
+    return ProcessFirstBuffer().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
       auto block_generator = ThreadedBlockReader::MakeAsyncIterator(
           self->buffer_generator_, MakeChunker(self->parse_options_),
-          std::move(first_buffer));
+          std::move(first_buffer), self->read_options_.skip_rows_after_names);
 
       std::function<Status(CSVBlock)> block_visitor =
           [self](CSVBlock maybe_block) -> Status {
@@ -950,12 +1073,12 @@ class AsyncThreadedTableReader
       };
 
       return VisitAsyncGenerator(std::move(block_generator), block_visitor)
-          .Then([self](...) -> Future<> {
+          .Then([self]() -> Future<> {
             // By this point we've added all top level tasks so it is safe to call
             // FinishAsync
             return self->task_group_->FinishAsync();
           })
-          .Then([self](...) -> Result<std::shared_ptr<Table>> {
+          .Then([self]() -> Result<std::shared_ptr<Table>> {
             // Finish conversion, create schema and table
             return self->MakeTable();
           });
@@ -986,6 +1109,9 @@ Result<std::shared_ptr<TableReader>> MakeTableReader(
     MemoryPool* pool, io::IOContext io_context, std::shared_ptr<io::InputStream> input,
     const ReadOptions& read_options, const ParseOptions& parse_options,
     const ConvertOptions& convert_options) {
+  RETURN_NOT_OK(parse_options.Validate());
+  RETURN_NOT_OK(read_options.Validate());
+  RETURN_NOT_OK(convert_options.Validate());
   std::shared_ptr<BaseTableReader> reader;
   if (read_options.use_threads) {
     auto cpu_executor = internal::GetCpuThreadPool();
@@ -993,7 +1119,8 @@ Result<std::shared_ptr<TableReader>> MakeTableReader(
         io_context, input, read_options, parse_options, convert_options, cpu_executor);
   } else {
     reader = std::make_shared<SerialTableReader>(io_context, input, read_options,
-                                                 parse_options, convert_options);
+                                                 parse_options, convert_options,
+                                                 /*count_rows=*/true);
   }
   RETURN_NOT_OK(reader->Init());
   return reader;
@@ -1003,12 +1130,84 @@ Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
     io::IOContext io_context, std::shared_ptr<io::InputStream> input,
     internal::Executor* cpu_executor, const ReadOptions& read_options,
     const ParseOptions& parse_options, const ConvertOptions& convert_options) {
-  std::shared_ptr<BaseStreamingReader> reader;
-  reader = std::make_shared<SerialStreamingReader>(
-      io_context, cpu_executor, input, read_options, parse_options, convert_options);
-  return reader->Init();
+  RETURN_NOT_OK(parse_options.Validate());
+  RETURN_NOT_OK(read_options.Validate());
+  RETURN_NOT_OK(convert_options.Validate());
+  std::shared_ptr<StreamingReaderImpl> reader;
+  reader = std::make_shared<StreamingReaderImpl>(
+      io_context, input, read_options, parse_options, convert_options,
+      /*count_rows=*/!read_options.use_threads || cpu_executor->GetCapacity() == 1);
+  return reader->Init(cpu_executor).Then([reader] {
+    return std::dynamic_pointer_cast<StreamingReader>(reader);
+  });
 }
 
+/////////////////////////////////////////////////////////////////////////
+// Row count implementation
+
+class CSVRowCounter : public ReaderMixin,
+                      public std::enable_shared_from_this<CSVRowCounter> {
+ public:
+  CSVRowCounter(io::IOContext io_context, Executor* cpu_executor,
+                std::shared_ptr<io::InputStream> input, const ReadOptions& read_options,
+                const ParseOptions& parse_options)
+      : ReaderMixin(io_context, std::move(input), read_options, parse_options,
+                    ConvertOptions::Defaults(), /*count_rows=*/true),
+        cpu_executor_(cpu_executor),
+        row_count_(0) {}
+
+  Future<int64_t> Count() {
+    auto self = shared_from_this();
+    return Init(self).Then([self]() { return self->DoCount(self); });
+  }
+
+ private:
+  Future<> Init(const std::shared_ptr<CSVRowCounter>& self) {
+    ARROW_ASSIGN_OR_RAISE(auto istream_it,
+                          io::MakeInputStreamIterator(input_, read_options_.block_size));
+    // TODO Consider exposing readahead as a read option (ARROW-12090)
+    ARROW_ASSIGN_OR_RAISE(auto bg_it, MakeBackgroundGenerator(std::move(istream_it),
+                                                              io_context_.executor()));
+    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor_);
+    auto buffer_generator = CSVBufferIterator::MakeAsync(std::move(transferred_it));
+
+    return buffer_generator().Then(
+        [self, buffer_generator](std::shared_ptr<Buffer> first_buffer) {
+          if (!first_buffer) {
+            return Status::Invalid("Empty CSV file");
+          }
+          RETURN_NOT_OK(self->ProcessHeader(first_buffer, &first_buffer));
+          self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
+              buffer_generator, MakeChunker(self->parse_options_),
+              std::move(first_buffer), 0);
+          return Status::OK();
+        });
+  }
+
+  Future<int64_t> DoCount(const std::shared_ptr<CSVRowCounter>& self) {
+    // count_cb must return a value instead of Status/Future<> to work with
+    // MakeMappedGenerator, and it must use a type with a valid end value to work with
+    // IterationEnd.
+    std::function<Result<util::optional<int64_t>>(const CSVBlock&)> count_cb =
+        [self](const CSVBlock& maybe_block) -> Result<util::optional<int64_t>> {
+      ARROW_ASSIGN_OR_RAISE(
+          auto parser,
+          self->Parse(maybe_block.partial, maybe_block.completion, maybe_block.buffer,
+                      maybe_block.block_index, maybe_block.is_final));
+      RETURN_NOT_OK(maybe_block.consume_bytes(parser.parsed_bytes));
+      self->row_count_ += parser.parser->num_rows();
+      return parser.parser->num_rows();
+    };
+    auto count_gen = MakeMappedGenerator(block_generator_, std::move(count_cb));
+    return DiscardAllFromAsyncGenerator(count_gen).Then(
+        [self]() { return self->row_count_; });
+  }
+
+  Executor* cpu_executor_;
+  AsyncGenerator<CSVBlock> block_generator_;
+  int64_t row_count_;
+};
+
 }  // namespace
 
 /////////////////////////////////////////////////////////////////////////
@@ -1063,6 +1262,18 @@ Future<std::shared_ptr<StreamingReader>> StreamingReader::MakeAsync(
                              parse_options, convert_options);
 }
 
+Future<int64_t> CountRowsAsync(io::IOContext io_context,
+                               std::shared_ptr<io::InputStream> input,
+                               internal::Executor* cpu_executor,
+                               const ReadOptions& read_options,
+                               const ParseOptions& parse_options) {
+  RETURN_NOT_OK(parse_options.Validate());
+  RETURN_NOT_OK(read_options.Validate());
+  auto counter = std::make_shared<CSVRowCounter>(
+      io_context, cpu_executor, std::move(input), read_options, parse_options);
+  return counter->Count();
+}
+
 }  // namespace csv
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/reader.h b/cpp/src/arrow/csv/reader.h
index 72f1375cc3c..253db689296 100644
--- a/cpp/src/arrow/csv/reader.h
+++ b/cpp/src/arrow/csv/reader.h
@@ -53,17 +53,43 @@ class ARROW_EXPORT TableReader {
                                                    const ParseOptions&,
                                                    const ConvertOptions&);
 
-  ARROW_DEPRECATED("Use MemoryPool-less variant (the IOContext holds a pool already)")
+  ARROW_DEPRECATED(
+      "Deprecated in 4.0.0. "
+      "Use MemoryPool-less variant (the IOContext holds a pool already)")
   static Result<std::shared_ptr<TableReader>> Make(
       MemoryPool* pool, io::IOContext io_context, std::shared_ptr<io::InputStream> input,
       const ReadOptions&, const ParseOptions&, const ConvertOptions&);
 };
 
-/// Experimental
+/// \brief A class that reads a CSV file incrementally
+///
+/// Caveats:
+/// - For now, this is always single-threaded (regardless of `ReadOptions::use_threads`.
+/// - Type inference is done on the first block and types are frozen afterwards;
+///   to make sure the right data types are inferred, either set
+///   `ReadOptions::block_size` to a large enough value, or use
+///   `ConvertOptions::column_types` to set the desired data types explicitly.
 class ARROW_EXPORT StreamingReader : public RecordBatchReader {
  public:
   virtual ~StreamingReader() = default;
 
+  virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0;
+
+  /// \brief Return the number of bytes which have been read and processed
+  ///
+  /// The returned number includes CSV bytes which the StreamingReader has
+  /// finished processing, but not bytes for which some processing (e.g.
+  /// CSV parsing or conversion to Arrow layout) is still ongoing.
+  ///
+  /// Furthermore, the following rules apply:
+  /// - bytes skipped by `ReadOptions.skip_rows` are counted as being read before
+  /// any records are returned.
+  /// - bytes read while parsing the header are counted as being read before any
+  /// records are returned.
+  /// - bytes skipped by `ReadOptions.skip_rows_after_names` are counted after the
+  /// first batch is returned.
+  virtual int64_t bytes_read() const = 0;
+
   /// Create a StreamingReader instance
   ///
   /// This involves some I/O as the first batch must be loaded during the creation process
@@ -80,12 +106,20 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader {
       io::IOContext io_context, std::shared_ptr<io::InputStream> input,
       const ReadOptions&, const ParseOptions&, const ConvertOptions&);
 
-  ARROW_DEPRECATED("Use IOContext-based overload")
+  ARROW_DEPRECATED("Deprecated in 4.0.0. Use IOContext-based overload")
   static Result<std::shared_ptr<StreamingReader>> Make(
       MemoryPool* pool, std::shared_ptr<io::InputStream> input,
       const ReadOptions& read_options, const ParseOptions& parse_options,
       const ConvertOptions& convert_options);
 };
 
+/// \brief Count the logical rows of data in a CSV file (i.e. the
+/// number of rows you would get if you read the file into a table).
+ARROW_EXPORT
+Future<int64_t> CountRowsAsync(io::IOContext io_context,
+                               std::shared_ptr<io::InputStream> input,
+                               internal::Executor* cpu_executor, const ReadOptions&,
+                               const ParseOptions&);
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index 228ab71301a..88ead7677f3 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -37,9 +37,6 @@
 #include "arrow/util/thread_pool.h"
 
 namespace arrow {
-
-using RecordBatchGenerator = AsyncGenerator<std::shared_ptr<RecordBatch>>;
-
 namespace csv {
 
 // Allows the streaming reader to be used in tests that expect a table reader
@@ -49,17 +46,19 @@ class StreamingReaderAsTableReader : public TableReader {
       : reader_(std::move(reader)) {}
   virtual ~StreamingReaderAsTableReader() = default;
   virtual Result<std::shared_ptr<Table>> Read() {
-    auto table_fut = ReadAsync();
-    auto table_res = table_fut.result();
-    ARROW_ASSIGN_OR_RAISE(auto table, table_res);
+    std::shared_ptr<Table> table;
+    RETURN_NOT_OK(reader_->ReadAll(&table));
     return table;
   }
   virtual Future<std::shared_ptr<Table>> ReadAsync() {
     auto reader = reader_;
-    RecordBatchGenerator rb_generator = [reader]() { return reader->ReadNextAsync(); };
-    return CollectAsyncGenerator(rb_generator).Then([](const RecordBatchVector& rbs) {
-      return Table::FromRecordBatches(rbs);
-    });
+    AsyncGenerator<std::shared_ptr<RecordBatch>> gen = [reader] {
+      return reader->ReadNextAsync();
+    };
+    return CollectAsyncGenerator(std::move(gen))
+        .Then([](const RecordBatchVector& batches) {
+          return Table::FromRecordBatches(batches);
+        });
   }
 
  private:
@@ -68,6 +67,38 @@ class StreamingReaderAsTableReader : public TableReader {
 
 using TableReaderFactory =
     std::function<Result<std::shared_ptr<TableReader>>(std::shared_ptr<io::InputStream>)>;
+using StreamingReaderFactory = std::function<Result<std::shared_ptr<StreamingReader>>(
+    std::shared_ptr<io::InputStream>)>;
+
+void TestEmptyTable(TableReaderFactory reader_factory) {
+  auto empty_buffer = std::make_shared<Buffer>("");
+  auto empty_input = std::make_shared<io::BufferReader>(empty_buffer);
+  auto maybe_reader = reader_factory(empty_input);
+  // Streaming reader fails on open, table readers fail on first read
+  if (maybe_reader.ok()) {
+    ASSERT_FINISHES_AND_RAISES(Invalid, (*maybe_reader)->ReadAsync());
+  } else {
+    ASSERT_TRUE(maybe_reader.status().IsInvalid());
+  }
+}
+
+void TestHeaderOnly(TableReaderFactory reader_factory) {
+  auto header_only_buffer = std::make_shared<Buffer>("a,b,c\n");
+  auto input = std::make_shared<io::BufferReader>(header_only_buffer);
+  ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(input));
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto table, reader->ReadAsync());
+  ASSERT_EQ(table->schema()->num_fields(), 3);
+  ASSERT_EQ(table->num_rows(), 0);
+}
+
+void TestHeaderOnlyStreaming(StreamingReaderFactory reader_factory) {
+  auto header_only_buffer = std::make_shared<Buffer>("a,b,c\n");
+  auto input = std::make_shared<io::BufferReader>(header_only_buffer);
+  ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(input));
+  std::shared_ptr<RecordBatch> next_batch;
+  ASSERT_OK(reader->ReadNext(&next_batch));
+  ASSERT_EQ(next_batch, nullptr);
+}
 
 void StressTableReader(TableReaderFactory reader_factory) {
 #ifdef ARROW_VALGRIND
@@ -152,6 +183,8 @@ TableReaderFactory MakeSerialFactory() {
   };
 }
 
+TEST(SerialReaderTests, Empty) { TestEmptyTable(MakeSerialFactory()); }
+TEST(SerialReaderTests, HeaderOnly) { TestHeaderOnly(MakeSerialFactory()); }
 TEST(SerialReaderTests, Stress) { StressTableReader(MakeSerialFactory()); }
 TEST(SerialReaderTests, StressInvalid) { StressInvalidTableReader(MakeSerialFactory()); }
 TEST(SerialReaderTests, NestedParallelism) {
@@ -176,6 +209,14 @@ Result<TableReaderFactory> MakeAsyncFactory(
   };
 }
 
+TEST(AsyncReaderTests, Empty) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
+  TestEmptyTable(table_factory);
+}
+TEST(AsyncReaderTests, HeaderOnly) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
+  TestHeaderOnly(table_factory);
+}
 TEST(AsyncReaderTests, Stress) {
   ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
   StressTableReader(table_factory);
@@ -195,6 +236,7 @@ Result<TableReaderFactory> MakeStreamingFactory() {
              -> Result<std::shared_ptr<TableReader>> {
     auto read_options = ReadOptions::Defaults();
     read_options.block_size = 1 << 10;
+    read_options.use_threads = true;
     ARROW_ASSIGN_OR_RAISE(
         auto streaming_reader,
         StreamingReader::Make(io::default_io_context(), input_stream, read_options,
@@ -203,6 +245,25 @@ Result<TableReaderFactory> MakeStreamingFactory() {
   };
 }
 
+Result<StreamingReaderFactory> MakeStreamingReaderFactory() {
+  return [](std::shared_ptr<io::InputStream> input_stream)
+             -> Result<std::shared_ptr<StreamingReader>> {
+    auto read_options = ReadOptions::Defaults();
+    read_options.block_size = 1 << 10;
+    read_options.use_threads = true;
+    return StreamingReader::Make(io::default_io_context(), input_stream, read_options,
+                                 ParseOptions::Defaults(), ConvertOptions::Defaults());
+  };
+}
+
+TEST(StreamingReaderTests, Empty) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingFactory());
+  TestEmptyTable(table_factory);
+}
+TEST(StreamingReaderTests, HeaderOnly) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingReaderFactory());
+  TestHeaderOnlyStreaming(table_factory);
+}
 TEST(StreamingReaderTests, Stress) {
   ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingFactory());
   StressTableReader(table_factory);
@@ -217,5 +278,135 @@ TEST(StreamingReaderTests, NestedParallelism) {
   TestNestedParallelism(thread_pool, table_factory);
 }
 
+TEST(StreamingReaderTest, BytesRead) {
+  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(1));
+  auto table_buffer =
+      std::make_shared<Buffer>("a,b,c\n123,456,789\n101,112,131\n415,161,718\n");
+
+  // Basic read without any skips and small block size
+  {
+    auto input = std::make_shared<io::BufferReader>(table_buffer);
+
+    auto read_options = ReadOptions::Defaults();
+    read_options.block_size = 20;
+    read_options.use_threads = false;
+    ASSERT_OK_AND_ASSIGN(
+        auto streaming_reader,
+        StreamingReader::Make(io::default_io_context(), input, read_options,
+                              ParseOptions::Defaults(), ConvertOptions::Defaults()));
+    std::shared_ptr<RecordBatch> batch;
+    int64_t bytes = 6;  // Size of header (counted during StreamingReader::Make)
+    do {
+      ASSERT_EQ(bytes, streaming_reader->bytes_read());
+      ASSERT_OK(streaming_reader->ReadNext(&batch));
+      bytes += 12;  // Add size of each row
+    } while (bytes <= 42);
+    ASSERT_EQ(42, streaming_reader->bytes_read());
+    ASSERT_EQ(batch.get(), nullptr);
+  }
+
+  // Interaction of skip_rows and bytes_read()
+  {
+    auto input = std::make_shared<io::BufferReader>(table_buffer);
+
+    auto read_options = ReadOptions::Defaults();
+    read_options.skip_rows = 1;
+    read_options.block_size = 32;
+    ASSERT_OK_AND_ASSIGN(
+        auto streaming_reader,
+        StreamingReader::Make(io::default_io_context(), input, read_options,
+                              ParseOptions::Defaults(), ConvertOptions::Defaults()));
+    std::shared_ptr<RecordBatch> batch;
+    // The header (6 bytes) and first skipped row (12 bytes) are counted during
+    // StreamingReader::Make
+    ASSERT_EQ(18, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
+    ASSERT_EQ(30, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
+    ASSERT_EQ(42, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_EQ(batch.get(), nullptr);
+  }
+
+  // Interaction of skip_rows_after_names and bytes_read()
+  {
+    auto input = std::make_shared<io::BufferReader>(table_buffer);
+
+    auto read_options = ReadOptions::Defaults();
+    read_options.block_size = 32;
+    read_options.skip_rows_after_names = 1;
+
+    ASSERT_OK_AND_ASSIGN(
+        auto streaming_reader,
+        StreamingReader::Make(io::default_io_context(), input, read_options,
+                              ParseOptions::Defaults(), ConvertOptions::Defaults()));
+    std::shared_ptr<RecordBatch> batch;
+
+    // The header is read as part of StreamingReader::Make
+    ASSERT_EQ(6, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
+    // Next the skipped batch (12 bytes) and 1 row (12 bytes)
+    ASSERT_EQ(30, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
+    ASSERT_EQ(42, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_EQ(batch.get(), nullptr);
+  }
+}
+
+TEST(CountRowsAsync, Basics) {
+  constexpr int NROWS = 4096;
+  ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(NROWS));
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(
+        NROWS, CountRowsAsync(io::default_io_context(), reader,
+                              internal::GetCpuThreadPool(), read_options, parse_options));
+  }
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    read_options.skip_rows = 20;
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(NROWS - 20, CountRowsAsync(io::default_io_context(), reader,
+                                                         internal::GetCpuThreadPool(),
+                                                         read_options, parse_options));
+  }
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    read_options.autogenerate_column_names = true;
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(NROWS + 1, CountRowsAsync(io::default_io_context(), reader,
+                                                        internal::GetCpuThreadPool(),
+                                                        read_options, parse_options));
+  }
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    read_options.block_size = 1024;
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(
+        NROWS, CountRowsAsync(io::default_io_context(), reader,
+                              internal::GetCpuThreadPool(), read_options, parse_options));
+  }
+}
+
+TEST(CountRowsAsync, Errors) {
+  ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(4096, /*valid=*/false));
+  auto reader = std::make_shared<io::BufferReader>(table_buffer);
+  auto read_options = ReadOptions::Defaults();
+  auto parse_options = ParseOptions::Defaults();
+  ASSERT_FINISHES_AND_RAISES(
+      Invalid, CountRowsAsync(io::default_io_context(), reader,
+                              internal::GetCpuThreadPool(), read_options, parse_options));
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/type_fwd.h b/cpp/src/arrow/csv/type_fwd.h
index 17fcdbdcc56..c0a53847a90 100644
--- a/cpp/src/arrow/csv/type_fwd.h
+++ b/cpp/src/arrow/csv/type_fwd.h
@@ -22,6 +22,7 @@ class TableReader;
 struct ConvertOptions;
 struct ReadOptions;
 struct ParseOptions;
+struct WriteOptions;
 
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc
index ddd59b46fc1..1b782cae7dc 100644
--- a/cpp/src/arrow/csv/writer.cc
+++ b/cpp/src/arrow/csv/writer.cc
@@ -19,6 +19,7 @@
 #include "arrow/array.h"
 #include "arrow/compute/cast.h"
 #include "arrow/io/interfaces.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/result_internal.h"
@@ -282,65 +283,76 @@ Result<std::unique_ptr<ColumnPopulator>> MakePopulator(const Field& field, char
   return std::unique_ptr<ColumnPopulator>(factory.populator);
 }
 
-class CSVConverter {
+class CSVWriterImpl : public ipc::RecordBatchWriter {
  public:
-  static Result<std::unique_ptr<CSVConverter>> Make(std::shared_ptr<Schema> schema,
-                                                    MemoryPool* pool) {
+  static Result<std::shared_ptr<CSVWriterImpl>> Make(
+      io::OutputStream* sink, std::shared_ptr<io::OutputStream> owned_sink,
+      std::shared_ptr<Schema> schema, const WriteOptions& options) {
+    RETURN_NOT_OK(options.Validate());
     std::vector<std::unique_ptr<ColumnPopulator>> populators(schema->num_fields());
     for (int col = 0; col < schema->num_fields(); col++) {
       char end_char = col < schema->num_fields() - 1 ? ',' : '\n';
-      ASSIGN_OR_RAISE(populators[col],
-                      MakePopulator(*schema->field(col), end_char, pool));
+      ASSIGN_OR_RAISE(populators[col], MakePopulator(*schema->field(col), end_char,
+                                                     options.io_context.pool()));
     }
-    return std::unique_ptr<CSVConverter>(
-        new CSVConverter(std::move(schema), std::move(populators), pool));
+    auto writer = std::make_shared<CSVWriterImpl>(
+        sink, std::move(owned_sink), std::move(schema), std::move(populators), options);
+    RETURN_NOT_OK(writer->PrepareForContentsWrite());
+    if (options.include_header) {
+      RETURN_NOT_OK(writer->WriteHeader());
+    }
+    return writer;
   }
 
-  Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
-                  io::OutputStream* out) {
-    RETURN_NOT_OK(PrepareForContentsWrite(options, out));
-    RecordBatchIterator iterator = RecordBatchSliceIterator(batch, options.batch_size);
+  Status WriteRecordBatch(const RecordBatch& batch) override {
+    RecordBatchIterator iterator = RecordBatchSliceIterator(batch, options_.batch_size);
     for (auto maybe_slice : iterator) {
       ASSIGN_OR_RAISE(std::shared_ptr<RecordBatch> slice, maybe_slice);
       RETURN_NOT_OK(TranslateMinimalBatch(*slice));
-      RETURN_NOT_OK(out->Write(data_buffer_));
+      RETURN_NOT_OK(sink_->Write(data_buffer_));
+      stats_.num_record_batches++;
     }
     return Status::OK();
   }
 
-  Status WriteCSV(const Table& table, const WriteOptions& options,
-                  io::OutputStream* out) {
+  Status WriteTable(const Table& table, int64_t max_chunksize) override {
     TableBatchReader reader(table);
-    reader.set_chunksize(options.batch_size);
-    RETURN_NOT_OK(PrepareForContentsWrite(options, out));
+    reader.set_chunksize(max_chunksize > 0 ? max_chunksize : options_.batch_size);
     std::shared_ptr<RecordBatch> batch;
     RETURN_NOT_OK(reader.ReadNext(&batch));
     while (batch != nullptr) {
       RETURN_NOT_OK(TranslateMinimalBatch(*batch));
-      RETURN_NOT_OK(out->Write(data_buffer_));
+      RETURN_NOT_OK(sink_->Write(data_buffer_));
       RETURN_NOT_OK(reader.ReadNext(&batch));
+      stats_.num_record_batches++;
     }
 
     return Status::OK();
   }
 
- private:
-  CSVConverter(std::shared_ptr<Schema> schema,
-               std::vector<std::unique_ptr<ColumnPopulator>> populators, MemoryPool* pool)
-      : column_populators_(std::move(populators)),
-        offsets_(0, 0, ::arrow::stl::allocator<char*>(pool)),
+  Status Close() override { return Status::OK(); }
+
+  ipc::WriteStats stats() const override { return stats_; }
+
+  CSVWriterImpl(io::OutputStream* sink, std::shared_ptr<io::OutputStream> owned_sink,
+                std::shared_ptr<Schema> schema,
+                std::vector<std::unique_ptr<ColumnPopulator>> populators,
+                const WriteOptions& options)
+      : sink_(sink),
+        owned_sink_(std::move(owned_sink)),
+        column_populators_(std::move(populators)),
+        offsets_(0, 0, ::arrow::stl::allocator<char*>(options.io_context.pool())),
         schema_(std::move(schema)),
-        pool_(pool) {}
+        options_(options) {}
 
-  Status PrepareForContentsWrite(const WriteOptions& options, io::OutputStream* out) {
+ private:
+  Status PrepareForContentsWrite() {
+    // Only called once, as part of initialization
     if (data_buffer_ == nullptr) {
-      ASSIGN_OR_RAISE(
-          data_buffer_,
-          AllocateResizableBuffer(
-              options.batch_size * schema_->num_fields() * kColumnSizeGuess, pool_));
-    }
-    if (options.include_header) {
-      RETURN_NOT_OK(WriteHeader(out));
+      ASSIGN_OR_RAISE(data_buffer_,
+                      AllocateResizableBuffer(
+                          options_.batch_size * schema_->num_fields() * kColumnSizeGuess,
+                          options_.io_context.pool()));
     }
     return Status::OK();
   }
@@ -355,7 +367,8 @@ class CSVConverter {
     return header_length + (kQuoteDelimiterCount * schema_->num_fields());
   }
 
-  Status WriteHeader(io::OutputStream* out) {
+  Status WriteHeader() {
+    // Only called once, as part of initialization
     RETURN_NOT_OK(data_buffer_->Resize(CalculateHeaderSize(), /*shrink_to_fit=*/false));
     char* next =
         reinterpret_cast<char*>(data_buffer_->mutable_data() + data_buffer_->size() - 1);
@@ -367,7 +380,7 @@ class CSVConverter {
     }
     *(data_buffer_->mutable_data() + data_buffer_->size() - 1) = '\n';
     DCHECK_EQ(reinterpret_cast<uint8_t*>(next + 1), data_buffer_->data());
-    return out->Write(data_buffer_);
+    return sink_->Write(data_buffer_);
   }
 
   Status TranslateMinimalBatch(const RecordBatch& batch) {
@@ -403,34 +416,44 @@ class CSVConverter {
   }
 
   static constexpr int64_t kColumnSizeGuess = 8;
+  io::OutputStream* sink_;
+  std::shared_ptr<io::OutputStream> owned_sink_;
   std::vector<std::unique_ptr<ColumnPopulator>> column_populators_;
   std::vector<int32_t, arrow::stl::allocator<int32_t>> offsets_;
   std::shared_ptr<ResizableBuffer> data_buffer_;
   const std::shared_ptr<Schema> schema_;
-  MemoryPool* pool_;
+  const WriteOptions options_;
+  ipc::WriteStats stats_;
 };
 
 }  // namespace
 
-Status WriteCSV(const Table& table, const WriteOptions& options, MemoryPool* pool,
+Status WriteCSV(const Table& table, const WriteOptions& options,
                 arrow::io::OutputStream* output) {
-  if (pool == nullptr) {
-    pool = default_memory_pool();
-  }
-  ASSIGN_OR_RAISE(std::unique_ptr<CSVConverter> converter,
-                  CSVConverter::Make(table.schema(), pool));
-  return converter->WriteCSV(table, options, output);
+  ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, table.schema(), options));
+  RETURN_NOT_OK(writer->WriteTable(table));
+  return writer->Close();
 }
 
-Status WriteCSV(const RecordBatch& batch, const WriteOptions& options, MemoryPool* pool,
+Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
                 arrow::io::OutputStream* output) {
-  if (pool == nullptr) {
-    pool = default_memory_pool();
-  }
+  ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, batch.schema(), options));
+  RETURN_NOT_OK(writer->WriteRecordBatch(batch));
+  return writer->Close();
+}
+
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options) {
+  return CSVWriterImpl::Make(sink.get(), sink, schema, options);
+}
 
-  ASSIGN_OR_RAISE(std::unique_ptr<CSVConverter> converter,
-                  CSVConverter::Make(batch.schema(), pool));
-  return converter->WriteCSV(batch, options, output);
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options) {
+  return CSVWriterImpl::Make(sink, nullptr, schema, options);
 }
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/writer.h b/cpp/src/arrow/csv/writer.h
index c009d7849f4..2f1442ae0af 100644
--- a/cpp/src/arrow/csv/writer.h
+++ b/cpp/src/arrow/csv/writer.h
@@ -17,8 +17,11 @@
 
 #pragma once
 
+#include <memory>
+
 #include "arrow/csv/options.h"
 #include "arrow/io/interfaces.h"
+#include "arrow/ipc/type_fwd.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
 
@@ -37,11 +40,34 @@ namespace csv {
 /// \brief Converts table to a CSV and writes the results to output.
 /// Experimental
 ARROW_EXPORT Status WriteCSV(const Table& table, const WriteOptions& options,
-                             MemoryPool* pool, arrow::io::OutputStream* output);
+                             arrow::io::OutputStream* output);
 /// \brief Converts batch to CSV and writes the results to output.
 /// Experimental
 ARROW_EXPORT Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
-                             MemoryPool* pool, arrow::io::OutputStream* output);
+                             arrow::io::OutputStream* output);
+
+/// \brief Create a new CSV writer. User is responsible for closing the
+/// actual OutputStream.
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options = WriteOptions::Defaults());
+
+/// \brief Create a new CSV writer.
+///
+/// \param[in] sink output stream to write to (does not take ownership)
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options = WriteOptions::Defaults());
 
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/writer_test.cc b/cpp/src/arrow/csv/writer_test.cc
index a49dbcd8268..0c7e3fdb0c5 100644
--- a/cpp/src/arrow/csv/writer_test.cc
+++ b/cpp/src/arrow/csv/writer_test.cc
@@ -23,6 +23,7 @@
 #include "arrow/buffer.h"
 #include "arrow/csv/writer.h"
 #include "arrow/io/memory.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/result_internal.h"
 #include "arrow/testing/gtest_util.h"
@@ -87,7 +88,27 @@ class TestWriteCSV : public ::testing::TestWithParam<WriterTestParams> {
     std::shared_ptr<io::BufferOutputStream> out;
     ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create());
 
-    RETURN_NOT_OK(WriteCSV(data, options, default_memory_pool(), out.get()));
+    RETURN_NOT_OK(WriteCSV(data, options, out.get()));
+    ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer, out->Finish());
+    return std::string(reinterpret_cast<const char*>(buffer->data()), buffer->size());
+  }
+
+  Result<std::string> ToCsvStringUsingWriter(const Table& data,
+                                             const WriteOptions& options) {
+    std::shared_ptr<io::BufferOutputStream> out;
+    ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create());
+    // Write row-by-row
+    ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(out, data.schema(), options));
+    TableBatchReader reader(data);
+    reader.set_chunksize(1);
+    std::shared_ptr<RecordBatch> batch;
+    RETURN_NOT_OK(reader.ReadNext(&batch));
+    while (batch != nullptr) {
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+      RETURN_NOT_OK(reader.ReadNext(&batch));
+    }
+    RETURN_NOT_OK(writer->Close());
+    EXPECT_EQ(data.num_rows(), writer->stats().num_record_batches);
     ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer, out->Finish());
     return std::string(reinterpret_cast<const char*>(buffer->data()), buffer->size());
   }
@@ -112,6 +133,10 @@ TEST_P(TestWriteCSV, TestWrite) {
                        Table::FromRecordBatches({record_batch}));
   ASSERT_OK_AND_ASSIGN(csv, ToCsvString(*table, options));
   EXPECT_EQ(csv, GetParam().expected_output);
+
+  // The writer should work identically.
+  ASSERT_OK_AND_ASSIGN(csv, ToCsvStringUsingWriter(*table, options));
+  EXPECT_EQ(csv, GetParam().expected_output);
 }
 
 INSTANTIATE_TEST_SUITE_P(MultiColumnWriteCSVTest, TestWriteCSV,
diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt
index 14acbc73a48..ca467d110c6 100644
--- a/cpp/src/arrow/dataset/CMakeLists.txt
+++ b/cpp/src/arrow/dataset/CMakeLists.txt
@@ -22,7 +22,6 @@ arrow_install_all_headers("arrow/dataset")
 set(ARROW_DATASET_SRCS
     dataset.cc
     discovery.cc
-    expression.cc
     file_base.cc
     file_ipc.cc
     partition.cc
@@ -54,6 +53,8 @@ add_arrow_lib(arrow_dataset
               ${ARROW_DATASET_SRCS}
               PRECOMPILED_HEADERS
               "$<$<COMPILE_LANGUAGE:CXX>:arrow/dataset/pch.h>"
+              DEPENDENCIES
+              toolchain
               PRIVATE_INCLUDES
               ${ARROW_DATASET_PRIVATE_INCLUDES}
               SHARED_LINK_LIBS
@@ -106,7 +107,6 @@ endfunction()
 
 add_arrow_dataset_test(dataset_test)
 add_arrow_dataset_test(discovery_test)
-add_arrow_dataset_test(expression_test)
 add_arrow_dataset_test(file_ipc_test)
 add_arrow_dataset_test(file_test)
 add_arrow_dataset_test(partition_test)
@@ -121,14 +121,11 @@ if(ARROW_PARQUET)
 endif()
 
 if(ARROW_BUILD_BENCHMARKS)
-  add_arrow_benchmark(expression_benchmark PREFIX "arrow-dataset")
   add_arrow_benchmark(file_benchmark PREFIX "arrow-dataset")
 
   if(ARROW_BUILD_STATIC)
-    target_link_libraries(arrow-dataset-expression-benchmark PUBLIC arrow_dataset_static)
     target_link_libraries(arrow-dataset-file-benchmark PUBLIC arrow_dataset_static)
   else()
-    target_link_libraries(arrow-dataset-expression-benchmark PUBLIC arrow_dataset_shared)
     target_link_libraries(arrow-dataset-file-benchmark PUBLIC arrow_dataset_shared)
   endif()
 endif()
diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h
index da9f5ed371e..f3672a0eff5 100644
--- a/cpp/src/arrow/dataset/api.h
+++ b/cpp/src/arrow/dataset/api.h
@@ -19,9 +19,9 @@
 
 #pragma once
 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/discovery.h"
-#include "arrow/dataset/expression.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/file_csv.h"
 #include "arrow/dataset/file_ipc.h"
diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index 2df34145cd9..fc6b38b37a9 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -31,7 +31,7 @@
 namespace arrow {
 namespace dataset {
 
-Fragment::Fragment(Expression partition_expression,
+Fragment::Fragment(compute::Expression partition_expression,
                    std::shared_ptr<Schema> physical_schema)
     : partition_expression_(std::move(partition_expression)),
       physical_schema_(std::move(physical_schema)) {}
@@ -52,20 +52,25 @@ Result<std::shared_ptr<Schema>> Fragment::ReadPhysicalSchema() {
   return physical_schema_;
 }
 
+Future<util::optional<int64_t>> Fragment::CountRows(compute::Expression,
+                                                    const std::shared_ptr<ScanOptions>&) {
+  return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+}
+
 Result<std::shared_ptr<Schema>> InMemoryFragment::ReadPhysicalSchemaImpl() {
   return physical_schema_;
 }
 
 InMemoryFragment::InMemoryFragment(std::shared_ptr<Schema> schema,
                                    RecordBatchVector record_batches,
-                                   Expression partition_expression)
+                                   compute::Expression partition_expression)
     : Fragment(std::move(partition_expression), std::move(schema)),
       record_batches_(std::move(record_batches)) {
   DCHECK_NE(physical_schema_, nullptr);
 }
 
 InMemoryFragment::InMemoryFragment(RecordBatchVector record_batches,
-                                   Expression partition_expression)
+                                   compute::Expression partition_expression)
     : Fragment(std::move(partition_expression), /*schema=*/nullptr),
       record_batches_(std::move(record_batches)) {
   // Order of argument evaluation is undefined, so compute physical_schema here
@@ -95,25 +100,80 @@ Result<ScanTaskIterator> InMemoryFragment::Scan(std::shared_ptr<ScanOptions> opt
   return MakeMapIterator(fn, std::move(batches_it));
 }
 
-Dataset::Dataset(std::shared_ptr<Schema> schema, Expression partition_expression)
-    : schema_(std::move(schema)),
-      partition_expression_(std::move(partition_expression)) {}
+Result<RecordBatchGenerator> InMemoryFragment::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options) {
+  struct State {
+    State(std::shared_ptr<InMemoryFragment> fragment, int64_t batch_size)
+        : fragment(std::move(fragment)),
+          batch_index(0),
+          offset(0),
+          batch_size(batch_size) {}
+
+    std::shared_ptr<RecordBatch> Next() {
+      const auto& next_parent = fragment->record_batches_[batch_index];
+      if (offset < next_parent->num_rows()) {
+        auto next = next_parent->Slice(offset, batch_size);
+        offset += batch_size;
+        return next;
+      }
+      batch_index++;
+      offset = 0;
+      return nullptr;
+    }
+
+    bool Finished() { return batch_index >= fragment->record_batches_.size(); }
+
+    std::shared_ptr<InMemoryFragment> fragment;
+    std::size_t batch_index;
+    int64_t offset;
+    int64_t batch_size;
+  };
+
+  struct Generator {
+    Generator(std::shared_ptr<InMemoryFragment> fragment, int64_t batch_size)
+        : state(std::make_shared<State>(std::move(fragment), batch_size)) {}
+
+    Future<std::shared_ptr<RecordBatch>> operator()() {
+      while (!state->Finished()) {
+        auto next = state->Next();
+        if (next) {
+          return Future<std::shared_ptr<RecordBatch>>::MakeFinished(std::move(next));
+        }
+      }
+      return AsyncGeneratorEnd<std::shared_ptr<RecordBatch>>();
+    }
+
+    std::shared_ptr<State> state;
+  };
+  return Generator(internal::checked_pointer_cast<InMemoryFragment>(shared_from_this()),
+                   options->batch_size);
+}
 
-Result<std::shared_ptr<ScannerBuilder>> Dataset::NewScan(
-    std::shared_ptr<ScanOptions> options) {
-  return std::make_shared<ScannerBuilder>(this->shared_from_this(), options);
+Future<util::optional<int64_t>> InMemoryFragment::CountRows(
+    compute::Expression predicate, const std::shared_ptr<ScanOptions>& options) {
+  if (ExpressionHasFieldRefs(predicate)) {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  int64_t total = 0;
+  for (const auto& batch : record_batches_) {
+    total += batch->num_rows();
+  }
+  return Future<util::optional<int64_t>>::MakeFinished(total);
 }
 
+Dataset::Dataset(std::shared_ptr<Schema> schema, compute::Expression partition_expression)
+    : schema_(std::move(schema)),
+      partition_expression_(std::move(partition_expression)) {}
+
 Result<std::shared_ptr<ScannerBuilder>> Dataset::NewScan() {
-  return NewScan(std::make_shared<ScanOptions>());
+  return std::make_shared<ScannerBuilder>(this->shared_from_this());
 }
 
 Result<FragmentIterator> Dataset::GetFragments() {
-  ARROW_ASSIGN_OR_RAISE(auto predicate, literal(true).Bind(*schema_));
-  return GetFragments(std::move(predicate));
+  return GetFragments(compute::literal(true));
 }
 
-Result<FragmentIterator> Dataset::GetFragments(Expression predicate) {
+Result<FragmentIterator> Dataset::GetFragments(compute::Expression predicate) {
   ARROW_ASSIGN_OR_RAISE(
       predicate, SimplifyWithGuarantee(std::move(predicate), partition_expression_));
   return predicate.IsSatisfiable() ? GetFragmentsImpl(std::move(predicate))
@@ -151,35 +211,13 @@ InMemoryDataset::InMemoryDataset(std::shared_ptr<Table> table)
     : Dataset(table->schema()),
       get_batches_(new TableRecordBatchGenerator(std::move(table))) {}
 
-struct ReaderRecordBatchGenerator : InMemoryDataset::RecordBatchGenerator {
-  explicit ReaderRecordBatchGenerator(std::shared_ptr<RecordBatchReader> reader)
-      : reader_(std::move(reader)), consumed_(false) {}
-
-  RecordBatchIterator Get() const final {
-    if (consumed_) {
-      return MakeErrorIterator<std::shared_ptr<RecordBatch>>(Status::Invalid(
-          "RecordBatchReader-backed InMemoryDataset was already consumed"));
-    }
-    consumed_ = true;
-    auto reader = reader_;
-    return MakeFunctionIterator([reader] { return reader->Next(); });
-  }
-
-  std::shared_ptr<RecordBatchReader> reader_;
-  mutable bool consumed_;
-};
-
-InMemoryDataset::InMemoryDataset(std::shared_ptr<RecordBatchReader> reader)
-    : Dataset(reader->schema()),
-      get_batches_(new ReaderRecordBatchGenerator(std::move(reader))) {}
-
 Result<std::shared_ptr<Dataset>> InMemoryDataset::ReplaceSchema(
     std::shared_ptr<Schema> schema) const {
   RETURN_NOT_OK(CheckProjectable(*schema_, *schema));
   return std::make_shared<InMemoryDataset>(std::move(schema), get_batches_);
 }
 
-Result<FragmentIterator> InMemoryDataset::GetFragmentsImpl(Expression) {
+Result<FragmentIterator> InMemoryDataset::GetFragmentsImpl(compute::Expression) {
   auto schema = this->schema();
 
   auto create_fragment =
@@ -189,11 +227,11 @@ Result<FragmentIterator> InMemoryDataset::GetFragmentsImpl(Expression) {
                                " which did not match InMemorySource's: ", *schema);
     }
 
-    RecordBatchVector batches{batch};
-    return std::make_shared<InMemoryFragment>(std::move(batches));
+    return std::make_shared<InMemoryFragment>(RecordBatchVector{std::move(batch)});
   };
 
-  return MakeMaybeMapIterator(std::move(create_fragment), get_batches_->Get());
+  auto batches_it = get_batches_->Get();
+  return MakeMaybeMapIterator(std::move(create_fragment), std::move(batches_it));
 }
 
 Result<std::shared_ptr<UnionDataset>> UnionDataset::Make(std::shared_ptr<Schema> schema,
@@ -220,7 +258,7 @@ Result<std::shared_ptr<Dataset>> UnionDataset::ReplaceSchema(
       new UnionDataset(std::move(schema), std::move(children)));
 }
 
-Result<FragmentIterator> UnionDataset::GetFragmentsImpl(Expression predicate) {
+Result<FragmentIterator> UnionDataset::GetFragmentsImpl(compute::Expression predicate) {
   return GetFragmentsFromDatasets(children_, predicate);
 }
 
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 6be83059fc1..11210fdc27b 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -25,15 +25,18 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/mutex.h"
+#include "arrow/util/optional.h"
 
 namespace arrow {
 namespace dataset {
 
+using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>()>;
+
 /// \brief A granular piece of a Dataset, such as an individual file.
 ///
 /// A Fragment can be read/scanned separately from other fragments. It yields a
@@ -64,27 +67,38 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
   /// To receive a record batch stream which is fully filtered and projected, use Scanner.
   virtual Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) = 0;
 
-  /// \brief Return true if the fragment can benefit from parallel scanning.
-  virtual bool splittable() const = 0;
+  /// An asynchronous version of Scan
+  virtual Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) = 0;
+
+  /// \brief Count the number of rows in this fragment matching the filter using metadata
+  /// only. That is, this method may perform I/O, but will not load data.
+  ///
+  /// If this is not possible, resolve with an empty optional. The fragment can perform
+  /// I/O (e.g. to read metadata) before it deciding whether it can satisfy the request.
+  virtual Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, const std::shared_ptr<ScanOptions>& options);
 
   virtual std::string type_name() const = 0;
   virtual std::string ToString() const { return type_name(); }
 
   /// \brief An expression which evaluates to true for all data viewed by this
   /// Fragment.
-  const Expression& partition_expression() const { return partition_expression_; }
+  const compute::Expression& partition_expression() const {
+    return partition_expression_;
+  }
 
   virtual ~Fragment() = default;
 
  protected:
   Fragment() = default;
-  explicit Fragment(Expression partition_expression,
+  explicit Fragment(compute::Expression partition_expression,
                     std::shared_ptr<Schema> physical_schema);
 
   virtual Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() = 0;
 
   util::Mutex physical_schema_mutex_;
-  Expression partition_expression_ = literal(true);
+  compute::Expression partition_expression_ = compute::literal(true);
   std::shared_ptr<Schema> physical_schema_;
 };
 
@@ -94,6 +108,8 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
 /// the results of a scan. These are options which make sense to change between
 /// repeated reads of the same dataset, such as format-specific conversion options
 /// (that do not affect the schema).
+///
+/// \ingroup dataset-scanning
 class ARROW_DS_EXPORT FragmentScanOptions {
  public:
   virtual std::string type_name() const = 0;
@@ -101,17 +117,25 @@ class ARROW_DS_EXPORT FragmentScanOptions {
   virtual ~FragmentScanOptions() = default;
 };
 
+/// \defgroup dataset-implementations Concrete implementations
+///
+/// @{
+
 /// \brief A trivial Fragment that yields ScanTask out of a fixed set of
 /// RecordBatch.
 class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
  public:
   InMemoryFragment(std::shared_ptr<Schema> schema, RecordBatchVector record_batches,
-                   Expression = literal(true));
-  explicit InMemoryFragment(RecordBatchVector record_batches, Expression = literal(true));
+                   compute::Expression = compute::literal(true));
+  explicit InMemoryFragment(RecordBatchVector record_batches,
+                            compute::Expression = compute::literal(true));
 
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
-
-  bool splittable() const override { return false; }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override;
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
 
   std::string type_name() const override { return "in-memory"; }
 
@@ -121,6 +145,8 @@ class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
   RecordBatchVector record_batches_;
 };
 
+/// @}
+
 /// \brief A container of zero or more Fragments.
 ///
 /// A Dataset acts as a union of Fragments, e.g. files deeply nested in a
@@ -129,18 +155,19 @@ class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
 class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this<Dataset> {
  public:
   /// \brief Begin to build a new Scan operation against this Dataset
-  Result<std::shared_ptr<ScannerBuilder>> NewScan(std::shared_ptr<ScanOptions> options);
   Result<std::shared_ptr<ScannerBuilder>> NewScan();
 
   /// \brief GetFragments returns an iterator of Fragments given a predicate.
-  Result<FragmentIterator> GetFragments(Expression predicate);
+  Result<FragmentIterator> GetFragments(compute::Expression predicate);
   Result<FragmentIterator> GetFragments();
 
   const std::shared_ptr<Schema>& schema() const { return schema_; }
 
   /// \brief An expression which evaluates to true for all data viewed by this Dataset.
   /// May be null, which indicates no information is available.
-  const Expression& partition_expression() const { return partition_expression_; }
+  const compute::Expression& partition_expression() const {
+    return partition_expression_;
+  }
 
   /// \brief The name identifying the kind of Dataset
   virtual std::string type_name() const = 0;
@@ -157,14 +184,18 @@ class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this<Dataset> {
  protected:
   explicit Dataset(std::shared_ptr<Schema> schema) : schema_(std::move(schema)) {}
 
-  Dataset(std::shared_ptr<Schema> schema, Expression partition_expression);
+  Dataset(std::shared_ptr<Schema> schema, compute::Expression partition_expression);
 
-  virtual Result<FragmentIterator> GetFragmentsImpl(Expression predicate) = 0;
+  virtual Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) = 0;
 
   std::shared_ptr<Schema> schema_;
-  Expression partition_expression_ = literal(true);
+  compute::Expression partition_expression_ = compute::literal(true);
 };
 
+/// \addtogroup dataset-implementations
+///
+/// @{
+
 /// \brief A Source which yields fragments wrapping a stream of record batches.
 ///
 /// The record batches must match the schema provided to the source at construction.
@@ -176,15 +207,16 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset {
     virtual RecordBatchIterator Get() const = 0;
   };
 
+  /// Construct a dataset from a schema and a factory of record batch iterators.
   InMemoryDataset(std::shared_ptr<Schema> schema,
                   std::shared_ptr<RecordBatchGenerator> get_batches)
       : Dataset(std::move(schema)), get_batches_(std::move(get_batches)) {}
 
-  // Convenience constructor taking a fixed list of batches
+  /// Convenience constructor taking a fixed list of batches
   InMemoryDataset(std::shared_ptr<Schema> schema, RecordBatchVector batches);
 
+  /// Convenience constructor taking a Table
   explicit InMemoryDataset(std::shared_ptr<Table> table);
-  explicit InMemoryDataset(std::shared_ptr<RecordBatchReader> reader);
 
   std::string type_name() const override { return "in-memory"; }
 
@@ -192,7 +224,7 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset {
       std::shared_ptr<Schema> schema) const override;
 
  protected:
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override;
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
 
   std::shared_ptr<RecordBatchGenerator> get_batches_;
 };
@@ -216,7 +248,7 @@ class ARROW_DS_EXPORT UnionDataset : public Dataset {
       std::shared_ptr<Schema> schema) const override;
 
  protected:
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override;
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
 
   explicit UnionDataset(std::shared_ptr<Schema> schema, DatasetVector children)
       : Dataset(std::move(schema)), children_(std::move(children)) {}
@@ -226,5 +258,7 @@ class ARROW_DS_EXPORT UnionDataset : public Dataset {
   friend class UnionDatasetFactory;
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/dataset_internal.h b/cpp/src/arrow/dataset/dataset_internal.h
index a5ac474754b..a1245b7e2a0 100644
--- a/cpp/src/arrow/dataset/dataset_internal.h
+++ b/cpp/src/arrow/dataset/dataset_internal.h
@@ -19,7 +19,6 @@
 
 #include <memory>
 #include <string>
-#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -30,7 +29,6 @@
 #include "arrow/scalar.h"
 #include "arrow/type.h"
 #include "arrow/util/iterator.h"
-#include "arrow/util/optional.h"
 
 namespace arrow {
 namespace dataset {
@@ -38,7 +36,7 @@ namespace dataset {
 /// \brief GetFragmentsFromDatasets transforms a vector<Dataset> into a
 /// flattened FragmentIterator.
 inline Result<FragmentIterator> GetFragmentsFromDatasets(const DatasetVector& datasets,
-                                                         Expression predicate) {
+                                                         compute::Expression predicate) {
   // Iterator<Dataset>
   auto datasets_it = MakeVectorIterator(datasets);
 
@@ -54,11 +52,6 @@ inline Result<FragmentIterator> GetFragmentsFromDatasets(const DatasetVector& da
   return MakeFlattenIterator(std::move(fragments_it));
 }
 
-inline RecordBatchIterator IteratorFromReader(
-    const std::shared_ptr<RecordBatchReader>& reader) {
-  return MakeFunctionIterator([reader] { return reader->Next(); });
-}
-
 inline std::shared_ptr<Schema> SchemaFromColumnNames(
     const std::shared_ptr<Schema>& input, const std::vector<std::string>& column_names) {
   std::vector<std::shared_ptr<Field>> columns;
@@ -72,119 +65,6 @@ inline std::shared_ptr<Schema> SchemaFromColumnNames(
   return schema(std::move(columns))->WithMetadata(input->metadata());
 }
 
-// Helper class for efficiently detecting subtrees given fragment partition expressions.
-// Partition expressions are broken into conjunction members and each member dictionary
-// encoded to impose a sortable ordering. In addition, subtrees are generated which span
-// groups of fragments and nested subtrees. After encoding each fragment is guaranteed to
-// be a descendant of at least one subtree. For example, given fragments in a
-// HivePartitioning with paths:
-//
-//   /num=0/al=eh/dat.par
-//   /num=0/al=be/dat.par
-//   /num=1/al=eh/dat.par
-//   /num=1/al=be/dat.par
-//
-// The following subtrees will be introduced:
-//
-//   /num=0/
-//   /num=0/al=eh/
-//   /num=0/al=eh/dat.par
-//   /num=0/al=be/
-//   /num=0/al=be/dat.par
-//   /num=1/
-//   /num=1/al=eh/
-//   /num=1/al=eh/dat.par
-//   /num=1/al=be/
-//   /num=1/al=be/dat.par
-struct SubtreeImpl {
-  // Each unique conjunction member is mapped to an integer.
-  using expression_code = char32_t;
-  // Partition expressions are mapped to strings of codes; strings give us lexicographic
-  // ordering (and potentially useful optimizations).
-  using expression_codes = std::basic_string<expression_code>;
-  // An encoded fragment (if fragment_index is set) or subtree.
-  struct Encoded {
-    util::optional<int> fragment_index;
-    expression_codes partition_expression;
-  };
-
-  std::unordered_map<Expression, expression_code, Expression::Hash> expr_to_code_;
-  std::vector<Expression> code_to_expr_;
-  std::unordered_set<expression_codes> subtree_exprs_;
-
-  // Encode a subexpression (returning the existing code if possible).
-  expression_code GetOrInsert(const Expression& expr) {
-    auto next_code = static_cast<int>(expr_to_code_.size());
-    auto it_success = expr_to_code_.emplace(expr, next_code);
-
-    if (it_success.second) {
-      code_to_expr_.push_back(expr);
-    }
-    return it_success.first->second;
-  }
-
-  // Encode an expression (recursively breaking up conjunction members if possible).
-  void EncodeConjunctionMembers(const Expression& expr, expression_codes* codes) {
-    if (auto call = expr.call()) {
-      if (call->function_name == "and_kleene") {
-        // expr is a conjunction, encode its arguments
-        EncodeConjunctionMembers(call->arguments[0], codes);
-        EncodeConjunctionMembers(call->arguments[1], codes);
-        return;
-      }
-    }
-    // expr is not a conjunction, encode it whole
-    codes->push_back(GetOrInsert(expr));
-  }
-
-  // Convert an encoded subtree or fragment back into an expression.
-  Expression GetSubtreeExpression(const Encoded& encoded_subtree) {
-    // Filters will already be simplified by all of a subtree's ancestors, so
-    // we only need to simplify the filter by the trailing conjunction member
-    // of each subtree.
-    return code_to_expr_[encoded_subtree.partition_expression.back()];
-  }
-
-  // Insert subtrees for each component of an encoded partition expression.
-  void GenerateSubtrees(expression_codes partition_expression,
-                        std::vector<Encoded>* encoded) {
-    while (!partition_expression.empty()) {
-      if (subtree_exprs_.insert(partition_expression).second) {
-        Encoded encoded_subtree{/*fragment_index=*/util::nullopt, partition_expression};
-        encoded->push_back(std::move(encoded_subtree));
-      }
-      partition_expression.resize(partition_expression.size() - 1);
-    }
-  }
-
-  // Encode the fragment's partition expression and generate subtrees for it as well.
-  void EncodeOneFragment(int fragment_index, const Fragment& fragment,
-                         std::vector<Encoded>* encoded) {
-    Encoded encoded_fragment{fragment_index, {}};
-
-    EncodeConjunctionMembers(fragment.partition_expression(),
-                             &encoded_fragment.partition_expression);
-
-    GenerateSubtrees(encoded_fragment.partition_expression, encoded);
-
-    encoded->push_back(std::move(encoded_fragment));
-  }
-
-  template <typename Fragments>
-  std::vector<Encoded> EncodeFragments(const Fragments& fragments) {
-    std::vector<Encoded> encoded;
-    for (size_t i = 0; i < fragments.size(); ++i) {
-      EncodeOneFragment(static_cast<int>(i), *fragments[i], &encoded);
-    }
-    return encoded;
-  }
-};
-
-inline bool operator==(const SubtreeImpl::Encoded& l, const SubtreeImpl::Encoded& r) {
-  return l.fragment_index == r.fragment_index &&
-         l.partition_expression == r.partition_expression;
-}
-
 /// Get fragment scan options of the expected type.
 /// \return Fragment scan options if provided on the scan options, else the default
 ///     options if set, else a default-constructed value. If options are provided
@@ -207,5 +87,35 @@ arrow::Result<std::shared_ptr<T>> GetFragmentScanOptions(
   return internal::checked_pointer_cast<T>(source);
 }
 
+class FragmentDataset : public Dataset {
+ public:
+  FragmentDataset(std::shared_ptr<Schema> schema, FragmentVector fragments)
+      : Dataset(std::move(schema)), fragments_(std::move(fragments)) {}
+
+  std::string type_name() const override { return "fragment"; }
+
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const override {
+    return std::make_shared<FragmentDataset>(std::move(schema), fragments_);
+  }
+
+ protected:
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override {
+    // TODO(ARROW-12891) Provide subtree pruning for any vector of fragments
+    FragmentVector fragments;
+    for (const auto& fragment : fragments_) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto simplified_filter,
+          compute::SimplifyWithGuarantee(predicate, fragment->partition_expression()));
+
+      if (simplified_filter.IsSatisfiable()) {
+        fragments.push_back(fragment);
+      }
+    }
+    return MakeVectorIterator(std::move(fragments));
+  }
+  FragmentVector fragments_;
+};
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc
index 1db96b8b5c3..66d69c30c82 100644
--- a/cpp/src/arrow/dataset/dataset_test.cc
+++ b/cpp/src/arrow/dataset/dataset_test.cc
@@ -79,23 +79,6 @@ TEST_F(TestInMemoryDataset, ReplaceSchema) {
                     .status());
 }
 
-TEST_F(TestInMemoryDataset, FromReader) {
-  constexpr int64_t kBatchSize = 1024;
-  constexpr int64_t kNumberBatches = 16;
-
-  SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  auto source_reader = ConstantArrayGenerator::Repeat(kNumberBatches, batch);
-  auto target_reader = ConstantArrayGenerator::Repeat(kNumberBatches, batch);
-
-  auto dataset = std::make_shared<InMemoryDataset>(source_reader);
-
-  AssertDatasetEquals(target_reader.get(), dataset.get());
-  // Such datasets can only be scanned once
-  ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments());
-  ASSERT_RAISES(Invalid, fragments.Next());
-}
-
 TEST_F(TestInMemoryDataset, GetFragments) {
   constexpr int64_t kBatchSize = 1024;
   constexpr int64_t kNumberBatches = 16;
@@ -442,7 +425,7 @@ TEST_F(TestEndToEnd, EndToEndSingleDataset) {
   // In the simplest case, consumption is simply conversion to a Table.
   ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
 
-  auto expected = TableFromJSON(scanner->schema(), {R"([
+  auto expected = TableFromJSON(scanner_builder->projected_schema(), {R"([
     {"sales": 152.25, "model": "3", "country": "CA"},
     {"sales": 273.5, "model": "3", "country": "US"}
   ])"});
@@ -547,7 +530,7 @@ class TestSchemaUnification : public TestUnionDataset {
   void AssertScanEquals(std::shared_ptr<Scanner> scanner,
                         const std::vector<TupleType>& expected_rows) {
     std::vector<std::string> columns;
-    for (const auto& field : scanner->schema()->fields()) {
+    for (const auto& field : scanner->options()->projected_schema->fields()) {
       columns.push_back(field->name());
     }
 
diff --git a/cpp/src/arrow/dataset/discovery.cc b/cpp/src/arrow/dataset/discovery.cc
index a146bd6185e..0f9d479b9d6 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -34,7 +34,7 @@
 namespace arrow {
 namespace dataset {
 
-DatasetFactory::DatasetFactory() : root_partition_(literal(true)) {}
+DatasetFactory::DatasetFactory() : root_partition_(compute::literal(true)) {}
 
 Result<std::shared_ptr<Schema>> DatasetFactory::Inspect(InspectOptions options) {
   ARROW_ASSIGN_OR_RAISE(auto schemas, InspectSchemas(std::move(options)));
@@ -226,8 +226,14 @@ Result<std::vector<std::shared_ptr<Schema>>> FileSystemDatasetFactory::InspectSc
   int fragments = options.fragments;
   for (const auto& info : files_) {
     if (has_fragments_limit && fragments-- == 0) break;
-    ARROW_ASSIGN_OR_RAISE(auto schema, format_->Inspect({info, fs_}));
-    schemas.push_back(schema);
+    auto result = format_->Inspect({info, fs_});
+    if (ARROW_PREDICT_FALSE(!result.ok())) {
+      return result.status().WithMessage(
+          "Error creating dataset. Could not read schema from '", info.path(),
+          "': ", result.status().message(), ". Is this a '", format_->type_name(),
+          "' file?");
+    }
+    schemas.push_back(result.MoveValueUnsafe());
   }
 
   ARROW_ASSIGN_OR_RAISE(auto partition_schema,
@@ -268,7 +274,8 @@ Result<std::shared_ptr<Dataset>> FileSystemDatasetFactory::Finish(FinishOptions
     fragments.push_back(fragment);
   }
 
-  return FileSystemDataset::Make(schema, root_partition_, format_, fs_, fragments);
+  return FileSystemDataset::Make(std::move(schema), root_partition_, format_, fs_,
+                                 std::move(fragments), std::move(partitioning));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/discovery.h b/cpp/src/arrow/dataset/discovery.h
index 94c49ff0b85..40c02051955 100644
--- a/cpp/src/arrow/dataset/discovery.h
+++ b/cpp/src/arrow/dataset/discovery.h
@@ -38,6 +38,10 @@
 namespace arrow {
 namespace dataset {
 
+/// \defgroup dataset-discovery Discovery API
+///
+/// @{
+
 struct InspectOptions {
   /// See `fragments` property.
   static constexpr int kInspectAllFragments = -1;
@@ -85,12 +89,15 @@ class ARROW_DS_EXPORT DatasetFactory {
 
   /// \brief Create a Dataset
   Result<std::shared_ptr<Dataset>> Finish();
+  /// \brief Create a Dataset with the given schema (see \a InspectOptions::schema)
   Result<std::shared_ptr<Dataset>> Finish(std::shared_ptr<Schema> schema);
+  /// \brief Create a Dataset with the given options
   virtual Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) = 0;
 
   /// \brief Optional root partition for the resulting Dataset.
-  const Expression& root_partition() const { return root_partition_; }
-  Status SetRootPartition(Expression partition) {
+  const compute::Expression& root_partition() const { return root_partition_; }
+  /// \brief Set the root partition for the resulting Dataset.
+  Status SetRootPartition(compute::Expression partition) {
     root_partition_ = std::move(partition);
     return Status::OK();
   }
@@ -100,11 +107,14 @@ class ARROW_DS_EXPORT DatasetFactory {
  protected:
   DatasetFactory();
 
-  Expression root_partition_;
+  compute::Expression root_partition_;
 };
 
+/// @}
+
 /// \brief DatasetFactory provides a way to inspect/discover a Dataset's
 /// expected schema before materialization.
+/// \ingroup dataset-implementations
 class ARROW_DS_EXPORT UnionDatasetFactory : public DatasetFactory {
  public:
   static Result<std::shared_ptr<DatasetFactory>> Make(
@@ -132,51 +142,52 @@ class ARROW_DS_EXPORT UnionDatasetFactory : public DatasetFactory {
   std::vector<std::shared_ptr<DatasetFactory>> factories_;
 };
 
+/// \ingroup dataset-filesystem
 struct FileSystemFactoryOptions {
-  // Either an explicit Partitioning or a PartitioningFactory to discover one.
-  //
-  // If a factory is provided, it will be used to infer a schema for partition fields
-  // based on file and directory paths then construct a Partitioning. The default
-  // is a Partitioning which will yield no partition information.
-  //
-  // The (explicit or discovered) partitioning will be applied to discovered files
-  // and the resulting partition information embedded in the Dataset.
+  /// Either an explicit Partitioning or a PartitioningFactory to discover one.
+  ///
+  /// If a factory is provided, it will be used to infer a schema for partition fields
+  /// based on file and directory paths then construct a Partitioning. The default
+  /// is a Partitioning which will yield no partition information.
+  ///
+  /// The (explicit or discovered) partitioning will be applied to discovered files
+  /// and the resulting partition information embedded in the Dataset.
   PartitioningOrFactory partitioning{Partitioning::Default()};
 
-  // For the purposes of applying the partitioning, paths will be stripped
-  // of the partition_base_dir. Files not matching the partition_base_dir
-  // prefix will be skipped for partition discovery. The ignored files will still
-  // be part of the Dataset, but will not have partition information.
-  //
-  // Example:
-  // partition_base_dir = "/dataset";
-  //
-  // - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
-  //
-  // - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
-  //
-  // This is useful for partitioning which parses directory when ordering
-  // is important, e.g. DirectoryPartitioning.
+  /// For the purposes of applying the partitioning, paths will be stripped
+  /// of the partition_base_dir. Files not matching the partition_base_dir
+  /// prefix will be skipped for partition discovery. The ignored files will still
+  /// be part of the Dataset, but will not have partition information.
+  ///
+  /// Example:
+  /// partition_base_dir = "/dataset";
+  ///
+  /// - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
+  ///
+  /// - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
+  ///
+  /// This is useful for partitioning which parses directory when ordering
+  /// is important, e.g. DirectoryPartitioning.
   std::string partition_base_dir;
 
-  // Invalid files (via selector or explicitly) will be excluded by checking
-  // with the FileFormat::IsSupported method.  This will incur IO for each files
-  // in a serial and single threaded fashion. Disabling this feature will skip the
-  // IO, but unsupported files may be present in the Dataset
-  // (resulting in an error at scan time).
+  /// Invalid files (via selector or explicitly) will be excluded by checking
+  /// with the FileFormat::IsSupported method.  This will incur IO for each files
+  /// in a serial and single threaded fashion. Disabling this feature will skip the
+  /// IO, but unsupported files may be present in the Dataset
+  /// (resulting in an error at scan time).
   bool exclude_invalid_files = false;
 
-  // When discovering from a Selector (and not from an explicit file list), ignore
-  // files and directories matching any of these prefixes.
-  //
-  // Example (with selector = "/dataset/**"):
-  // selector_ignore_prefixes = {"_", ".DS_STORE" };
-  //
-  // - "/dataset/data.csv" -> not ignored
-  // - "/dataset/_metadata" -> ignored
-  // - "/dataset/.DS_STORE" -> ignored
-  // - "/dataset/_hidden/dat" -> ignored
-  // - "/dataset/nested/.DS_STORE" -> ignored
+  /// When discovering from a Selector (and not from an explicit file list), ignore
+  /// files and directories matching any of these prefixes.
+  ///
+  /// Example (with selector = "/dataset/**"):
+  /// selector_ignore_prefixes = {"_", ".DS_STORE" };
+  ///
+  /// - "/dataset/data.csv" -> not ignored
+  /// - "/dataset/_metadata" -> ignored
+  /// - "/dataset/.DS_STORE" -> ignored
+  /// - "/dataset/_hidden/dat" -> ignored
+  /// - "/dataset/nested/.DS_STORE" -> ignored
   std::vector<std::string> selector_ignore_prefixes = {
       ".",
       "_",
@@ -185,6 +196,7 @@ struct FileSystemFactoryOptions {
 
 /// \brief FileSystemDatasetFactory creates a Dataset from a vector of
 /// fs::FileInfo or a fs::FileSelector.
+/// \ingroup dataset-filesystem
 class ARROW_DS_EXPORT FileSystemDatasetFactory : public DatasetFactory {
  public:
   /// \brief Build a FileSystemDatasetFactory from an explicit list of
@@ -225,16 +237,23 @@ class ARROW_DS_EXPORT FileSystemDatasetFactory : public DatasetFactory {
                                                       std::shared_ptr<FileFormat> format,
                                                       FileSystemFactoryOptions options);
 
+  /// \brief Build a FileSystemDatasetFactory from an explicit list of
+  /// file information.
+  ///
+  /// \param[in] filesystem passed to FileSystemDataset
+  /// \param[in] files passed to FileSystemDataset
+  /// \param[in] format passed to FileSystemDataset
+  /// \param[in] options see FileSystemFactoryOptions for more information.
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      std::shared_ptr<fs::FileSystem> filesystem, const std::vector<fs::FileInfo>& files,
+      std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options);
+
   Result<std::vector<std::shared_ptr<Schema>>> InspectSchemas(
       InspectOptions options) override;
 
   Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) override;
 
  protected:
-  static Result<std::shared_ptr<DatasetFactory>> Make(
-      std::shared_ptr<fs::FileSystem> filesystem, const std::vector<fs::FileInfo>& files,
-      std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options);
-
   FileSystemDatasetFactory(std::vector<fs::FileInfo> files,
                            std::shared_ptr<fs::FileSystem> filesystem,
                            std::shared_ptr<FileFormat> format,
diff --git a/cpp/src/arrow/dataset/expression.h b/cpp/src/arrow/dataset/expression.h
deleted file mode 100644
index 8bdcb4a0ffa..00000000000
--- a/cpp/src/arrow/dataset/expression.h
+++ /dev/null
@@ -1,250 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This API is EXPERIMENTAL.
-
-#pragma once
-
-#include <atomic>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "arrow/compute/type_fwd.h"
-#include "arrow/dataset/type_fwd.h"
-#include "arrow/dataset/visibility.h"
-#include "arrow/datum.h"
-#include "arrow/type_fwd.h"
-#include "arrow/util/variant.h"
-
-namespace arrow {
-namespace dataset {
-
-/// An unbound expression which maps a single Datum to another Datum.
-/// An expression is one of
-/// - A literal Datum.
-/// - A reference to a single (potentially nested) field of the input Datum.
-/// - A call to a compute function, with arguments specified by other Expressions.
-class ARROW_DS_EXPORT Expression {
- public:
-  struct Call {
-    std::string function_name;
-    std::vector<Expression> arguments;
-    std::shared_ptr<compute::FunctionOptions> options;
-    std::shared_ptr<std::atomic<size_t>> hash;
-
-    // post-Bind properties:
-    std::shared_ptr<compute::Function> function;
-    const compute::Kernel* kernel = NULLPTR;
-    std::shared_ptr<compute::KernelState> kernel_state;
-    ValueDescr descr;
-  };
-
-  std::string ToString() const;
-  bool Equals(const Expression& other) const;
-  size_t hash() const;
-  struct Hash {
-    size_t operator()(const Expression& expr) const { return expr.hash(); }
-  };
-
-  /// Bind this expression to the given input type, looking up Kernels and field types.
-  /// Some expression simplification may be performed and implicit casts will be inserted.
-  /// Any state necessary for execution will be initialized and returned.
-  Result<Expression> Bind(ValueDescr in, compute::ExecContext* = NULLPTR) const;
-  Result<Expression> Bind(const Schema& in_schema, compute::ExecContext* = NULLPTR) const;
-
-  // XXX someday
-  // Clone all KernelState in this bound expression. If any function referenced by this
-  // expression has mutable KernelState, it is not safe to execute or apply simplification
-  // passes to it (or copies of it!) from multiple threads. Cloning state produces new
-  // KernelStates where necessary to ensure that Expressions may be manipulated safely
-  // on multiple threads.
-  // Result<ExpressionState> CloneState() const;
-  // Status SetState(ExpressionState);
-
-  /// Return true if all an expression's field references have explicit ValueDescr and all
-  /// of its functions' kernels are looked up.
-  bool IsBound() const;
-
-  /// Return true if this expression is composed only of Scalar literals, field
-  /// references, and calls to ScalarFunctions.
-  bool IsScalarExpression() const;
-
-  /// Return true if this expression is literal and entirely null.
-  bool IsNullLiteral() const;
-
-  /// Return true if this expression could evaluate to true.
-  bool IsSatisfiable() const;
-
-  // XXX someday
-  // Result<PipelineGraph> GetPipelines();
-
-  /// Access a Call or return nullptr if this expression is not a call
-  const Call* call() const;
-  /// Access a Datum or return nullptr if this expression is not a literal
-  const Datum* literal() const;
-  /// Access a FieldRef or return nullptr if this expression is not a field_ref
-  const FieldRef* field_ref() const;
-
-  /// The type and shape to which this expression will evaluate
-  ValueDescr descr() const;
-  std::shared_ptr<DataType> type() const { return descr().type; }
-  // XXX someday
-  // NullGeneralization::type nullable() const;
-
-  struct Parameter {
-    FieldRef ref;
-    ValueDescr descr;
-  };
-
-  Expression() = default;
-  explicit Expression(Call call);
-  explicit Expression(Datum literal);
-  explicit Expression(Parameter parameter);
-
- private:
-  using Impl = util::Variant<Datum, Parameter, Call>;
-  std::shared_ptr<Impl> impl_;
-
-  ARROW_DS_EXPORT friend bool Identical(const Expression& l, const Expression& r);
-
-  ARROW_DS_EXPORT friend void PrintTo(const Expression&, std::ostream*);
-};
-
-inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
-inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); }
-
-// Factories
-
-ARROW_DS_EXPORT
-Expression literal(Datum lit);
-
-template <typename Arg>
-Expression literal(Arg&& arg) {
-  return literal(Datum(std::forward<Arg>(arg)));
-}
-
-ARROW_DS_EXPORT
-Expression field_ref(FieldRef ref);
-
-ARROW_DS_EXPORT
-Expression call(std::string function, std::vector<Expression> arguments,
-                std::shared_ptr<compute::FunctionOptions> options = NULLPTR);
-
-template <typename Options, typename = typename std::enable_if<std::is_base_of<
-                                compute::FunctionOptions, Options>::value>::type>
-Expression call(std::string function, std::vector<Expression> arguments,
-                Options options) {
-  return call(std::move(function), std::move(arguments),
-              std::make_shared<Options>(std::move(options)));
-}
-
-/// Assemble a list of all fields referenced by an Expression at any depth.
-ARROW_DS_EXPORT
-std::vector<FieldRef> FieldsInExpression(const Expression&);
-
-/// Assemble a mapping from field references to known values.
-ARROW_DS_EXPORT
-Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldValues(
-    const Expression& guaranteed_true_predicate);
-
-/// \defgroup expression-passes Functions for modification of Expressions
-///
-/// @{
-///
-/// These transform bound expressions. Some transforms utilize a guarantee, which is
-/// provided as an Expression which is guaranteed to evaluate to true. The
-/// guaranteed_true_predicate need not be bound, but canonicalization is currently
-/// deferred to producers of guarantees. For example in order to be recognized as a
-/// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
-/// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
-/// other semantically identical Expressions will not be recognized.
-
-/// Weak canonicalization which establishes guarantees for subsequent passes. Even
-/// equivalent Expressions may result in different canonicalized expressions.
-/// TODO this could be a strong canonicalization
-ARROW_DS_EXPORT
-Result<Expression> Canonicalize(Expression, compute::ExecContext* = NULLPTR);
-
-/// Simplify Expressions based on literal arguments (for example, add(null, x) will always
-/// be null so replace the call with a null literal). Includes early evaluation of all
-/// calls whose arguments are entirely literal.
-ARROW_DS_EXPORT
-Result<Expression> FoldConstants(Expression);
-
-/// Simplify Expressions by replacing with known values of the fields which it references.
-ARROW_DS_EXPORT
-Result<Expression> ReplaceFieldsWithKnownValues(
-    const std::unordered_map<FieldRef, Datum, FieldRef::Hash>& known_values, Expression);
-
-/// Simplify an expression by replacing subexpressions based on a guarantee:
-/// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
-/// used to remove redundant function calls from a filter expression or to replace a
-/// reference to a constant-value field with a literal.
-ARROW_DS_EXPORT
-Result<Expression> SimplifyWithGuarantee(Expression,
-                                         const Expression& guaranteed_true_predicate);
-
-/// @}
-
-// Execution
-
-/// Execute a scalar expression against the provided state and input Datum. This
-/// expression must be bound.
-ARROW_DS_EXPORT
-Result<Datum> ExecuteScalarExpression(const Expression&, const Datum& input,
-                                      compute::ExecContext* = NULLPTR);
-
-// Serialization
-
-ARROW_DS_EXPORT
-Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
-
-ARROW_DS_EXPORT
-Result<Expression> Deserialize(std::shared_ptr<Buffer>);
-
-// Convenience aliases for factories
-
-ARROW_DS_EXPORT Expression project(std::vector<Expression> values,
-                                   std::vector<std::string> names);
-
-ARROW_DS_EXPORT Expression equal(Expression lhs, Expression rhs);
-
-ARROW_DS_EXPORT Expression not_equal(Expression lhs, Expression rhs);
-
-ARROW_DS_EXPORT Expression less(Expression lhs, Expression rhs);
-
-ARROW_DS_EXPORT Expression less_equal(Expression lhs, Expression rhs);
-
-ARROW_DS_EXPORT Expression greater(Expression lhs, Expression rhs);
-
-ARROW_DS_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
-
-ARROW_DS_EXPORT Expression is_null(Expression lhs);
-
-ARROW_DS_EXPORT Expression is_valid(Expression lhs);
-
-ARROW_DS_EXPORT Expression and_(Expression lhs, Expression rhs);
-ARROW_DS_EXPORT Expression and_(const std::vector<Expression>&);
-ARROW_DS_EXPORT Expression or_(Expression lhs, Expression rhs);
-ARROW_DS_EXPORT Expression or_(const std::vector<Expression>&);
-ARROW_DS_EXPORT Expression not_(Expression operand);
-
-}  // namespace dataset
-}  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index ad19bd2041e..68c309bea8f 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -23,8 +23,9 @@
 #include <unordered_set>
 #include <vector>
 
+#include "arrow/compute/exec/forest_internal.h"
+#include "arrow/compute/exec/subtree_internal.h"
 #include "arrow/dataset/dataset_internal.h"
-#include "arrow/dataset/forest_internal.h"
 #include "arrow/dataset/scanner.h"
 #include "arrow/dataset/scanner_internal.h"
 #include "arrow/filesystem/filesystem.h"
@@ -84,49 +85,126 @@ Result<std::shared_ptr<io::InputStream>> FileSource::OpenCompressed(
   return io::CompressedInputStream::Make(codec.get(), std::move(file));
 }
 
+Future<util::optional<int64_t>> FileFormat::CountRows(
+    const std::shared_ptr<FileFragment>&, compute::Expression,
+    const std::shared_ptr<ScanOptions>&) {
+  return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+}
+
 Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
     FileSource source, std::shared_ptr<Schema> physical_schema) {
-  return MakeFragment(std::move(source), literal(true), std::move(physical_schema));
+  return MakeFragment(std::move(source), compute::literal(true),
+                      std::move(physical_schema));
 }
 
 Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
-    FileSource source, Expression partition_expression) {
+    FileSource source, compute::Expression partition_expression) {
   return MakeFragment(std::move(source), std::move(partition_expression), nullptr);
 }
 
 Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
-    FileSource source, Expression partition_expression,
+    FileSource source, compute::Expression partition_expression,
     std::shared_ptr<Schema> physical_schema) {
   return std::shared_ptr<FileFragment>(
       new FileFragment(std::move(source), shared_from_this(),
                        std::move(partition_expression), std::move(physical_schema)));
 }
 
+// TODO(ARROW-12355[CSV], ARROW-11772[IPC], ARROW-11843[Parquet]) The following
+// implementation of ScanBatchesAsync is both ugly and terribly ineffecient.  Each of the
+// formats should provide their own efficient implementation.
+Result<RecordBatchGenerator> FileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& scan_options,
+    const std::shared_ptr<FileFragment>& file) const {
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanFile(scan_options, file));
+  struct State {
+    State(std::shared_ptr<ScanOptions> scan_options, ScanTaskIterator scan_task_it)
+        : scan_options(std::move(scan_options)),
+          scan_task_it(std::move(scan_task_it)),
+          current_rb_it(),
+          finished(false) {}
+
+    std::shared_ptr<ScanOptions> scan_options;
+    ScanTaskIterator scan_task_it;
+    RecordBatchIterator current_rb_it;
+    bool finished;
+  };
+  struct Generator {
+    Future<std::shared_ptr<RecordBatch>> operator()() {
+      while (!state->finished) {
+        if (!state->current_rb_it) {
+          RETURN_NOT_OK(PumpScanTask());
+          if (state->finished) {
+            return AsyncGeneratorEnd<std::shared_ptr<RecordBatch>>();
+          }
+        }
+        ARROW_ASSIGN_OR_RAISE(auto next_batch, state->current_rb_it.Next());
+        if (IsIterationEnd(next_batch)) {
+          state->current_rb_it = RecordBatchIterator();
+        } else {
+          return Future<std::shared_ptr<RecordBatch>>::MakeFinished(next_batch);
+        }
+      }
+      return AsyncGeneratorEnd<std::shared_ptr<RecordBatch>>();
+    }
+    Status PumpScanTask() {
+      ARROW_ASSIGN_OR_RAISE(auto next_task, state->scan_task_it.Next());
+      if (IsIterationEnd(next_task)) {
+        state->finished = true;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(state->current_rb_it, next_task->Execute());
+      }
+      return Status::OK();
+    }
+    std::shared_ptr<State> state;
+  };
+  return Generator{std::make_shared<State>(scan_options, std::move(scan_task_it))};
+}
+
 Result<std::shared_ptr<Schema>> FileFragment::ReadPhysicalSchemaImpl() {
   return format_->Inspect(source_);
 }
 
 Result<ScanTaskIterator> FileFragment::Scan(std::shared_ptr<ScanOptions> options) {
   auto self = std::dynamic_pointer_cast<FileFragment>(shared_from_this());
-  return format_->ScanFile(std::move(options), self);
+  return format_->ScanFile(options, self);
+}
+
+Result<RecordBatchGenerator> FileFragment::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options) {
+  auto self = std::dynamic_pointer_cast<FileFragment>(shared_from_this());
+  return format_->ScanBatchesAsync(options, self);
+}
+
+Future<util::optional<int64_t>> FileFragment::CountRows(
+    compute::Expression predicate, const std::shared_ptr<ScanOptions>& options) {
+  ARROW_ASSIGN_OR_RAISE(predicate, compute::SimplifyWithGuarantee(std::move(predicate),
+                                                                  partition_expression_));
+  if (!predicate.IsSatisfiable()) {
+    return Future<util::optional<int64_t>>::MakeFinished(0);
+  }
+  auto self = internal::checked_pointer_cast<FileFragment>(shared_from_this());
+  return format()->CountRows(self, std::move(predicate), options);
 }
 
 struct FileSystemDataset::FragmentSubtrees {
   // Forest for skipping fragments based on extracted subtree expressions
-  Forest forest;
+  compute::Forest forest;
   // fragment indices and subtree expressions in forest order
-  std::vector<util::Variant<int, Expression>> fragments_and_subtrees;
+  std::vector<util::Variant<int, compute::Expression>> fragments_and_subtrees;
 };
 
 Result<std::shared_ptr<FileSystemDataset>> FileSystemDataset::Make(
-    std::shared_ptr<Schema> schema, Expression root_partition,
+    std::shared_ptr<Schema> schema, compute::Expression root_partition,
     std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
-    std::vector<std::shared_ptr<FileFragment>> fragments) {
+    std::vector<std::shared_ptr<FileFragment>> fragments,
+    std::shared_ptr<Partitioning> partitioning) {
   std::shared_ptr<FileSystemDataset> out(
       new FileSystemDataset(std::move(schema), std::move(root_partition)));
   out->format_ = std::move(format);
   out->filesystem_ = std::move(filesystem);
   out->fragments_ = std::move(fragments);
+  out->partitioning_ = std::move(partitioning);
   out->SetupSubtreePruning();
   return out;
 }
@@ -158,7 +236,7 @@ std::string FileSystemDataset::ToString() const {
     repr += "\n" + fragment->source().path();
 
     const auto& partition = fragment->partition_expression();
-    if (partition != literal(true)) {
+    if (partition != compute::literal(true)) {
       repr += ": " + partition.ToString();
     }
   }
@@ -168,56 +246,38 @@ std::string FileSystemDataset::ToString() const {
 
 void FileSystemDataset::SetupSubtreePruning() {
   subtrees_ = std::make_shared<FragmentSubtrees>();
-  SubtreeImpl impl;
+  compute::SubtreeImpl impl;
 
-  auto encoded = impl.EncodeFragments(fragments_);
+  auto encoded = impl.EncodeGuarantees(
+      [&](int index) { return fragments_[index]->partition_expression(); },
+      static_cast<int>(fragments_.size()));
 
-  std::sort(encoded.begin(), encoded.end(),
-            [](const SubtreeImpl::Encoded& l, const SubtreeImpl::Encoded& r) {
-              const auto cmp = l.partition_expression.compare(r.partition_expression);
-              if (cmp != 0) {
-                return cmp < 0;
-              }
-              // Equal partition expressions; sort encodings with fragment indices after
-              // encodings without
-              return (l.fragment_index ? 1 : 0) < (r.fragment_index ? 1 : 0);
-            });
+  std::sort(encoded.begin(), encoded.end(), compute::SubtreeImpl::ByGuarantee());
 
   for (const auto& e : encoded) {
-    if (e.fragment_index) {
-      subtrees_->fragments_and_subtrees.emplace_back(*e.fragment_index);
+    if (e.index) {
+      subtrees_->fragments_and_subtrees.emplace_back(*e.index);
     } else {
       subtrees_->fragments_and_subtrees.emplace_back(impl.GetSubtreeExpression(e));
     }
   }
 
-  subtrees_->forest = Forest(static_cast<int>(encoded.size()), [&](int l, int r) {
-    if (encoded[l].fragment_index) {
-      // Fragment: not an ancestor.
-      return false;
-    }
-
-    const auto& ancestor = encoded[l].partition_expression;
-    const auto& descendant = encoded[r].partition_expression;
-
-    if (descendant.size() >= ancestor.size()) {
-      return std::equal(ancestor.begin(), ancestor.end(), descendant.begin());
-    }
-    return false;
-  });
+  subtrees_->forest = compute::Forest(static_cast<int>(encoded.size()),
+                                      compute::SubtreeImpl::IsAncestor{encoded});
 }
 
-Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(Expression predicate) {
-  if (predicate == literal(true)) {
+Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(
+    compute::Expression predicate) {
+  if (predicate == compute::literal(true)) {
     // trivial predicate; skip subtree pruning
     return MakeVectorIterator(FragmentVector(fragments_.begin(), fragments_.end()));
   }
 
   std::vector<int> fragment_indices;
 
-  std::vector<Expression> predicates{predicate};
+  std::vector<compute::Expression> predicates{predicate};
   RETURN_NOT_OK(subtrees_->forest.Visit(
-      [&](Forest::Ref ref) -> Result<bool> {
+      [&](compute::Forest::Ref ref) -> Result<bool> {
         if (auto fragment_index =
                 util::get_if<int>(&subtrees_->fragments_and_subtrees[ref.i])) {
           fragment_indices.push_back(*fragment_index);
@@ -225,7 +285,7 @@ Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(Expression predicat
         }
 
         const auto& subtree_expr =
-            util::get<Expression>(subtrees_->fragments_and_subtrees[ref.i]);
+            util::get<compute::Expression>(subtrees_->fragments_and_subtrees[ref.i]);
         ARROW_ASSIGN_OR_RAISE(auto simplified,
                               SimplifyWithGuarantee(predicates.back(), subtree_expr));
 
@@ -236,7 +296,7 @@ Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(Expression predicat
         predicates.push_back(std::move(simplified));
         return true;
       },
-      [&](Forest::Ref ref) { predicates.pop_back(); }));
+      [&](compute::Forest::Ref ref) { predicates.pop_back(); }));
 
   std::sort(fragment_indices.begin(), fragment_indices.end());
 
@@ -342,7 +402,8 @@ class WriteQueue {
 
     ARROW_ASSIGN_OR_RAISE(
         writer_, write_options.format()->MakeWriter(std::move(destination), schema_,
-                                                    write_options.file_write_options));
+                                                    write_options.file_write_options,
+                                                    {write_options.filesystem, path}));
     return Status::OK();
   }
 
@@ -369,37 +430,37 @@ struct WriteState {
   std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues;
 };
 
-Status WriteNextBatch(WriteState& state, const std::shared_ptr<ScanTask>& scan_task,
+Status WriteNextBatch(WriteState* state, const std::shared_ptr<Fragment>& fragment,
                       std::shared_ptr<RecordBatch> batch) {
-  ARROW_ASSIGN_OR_RAISE(auto groups, state.write_options.partitioning->Partition(batch));
+  ARROW_ASSIGN_OR_RAISE(auto groups, state->write_options.partitioning->Partition(batch));
   batch.reset();  // drop to hopefully conserve memory
 
-  if (groups.batches.size() > static_cast<size_t>(state.write_options.max_partitions)) {
+  if (groups.batches.size() > static_cast<size_t>(state->write_options.max_partitions)) {
     return Status::Invalid("Fragment would be written into ", groups.batches.size(),
                            " partitions. This exceeds the maximum of ",
-                           state.write_options.max_partitions);
+                           state->write_options.max_partitions);
   }
 
   std::unordered_set<WriteQueue*> need_flushed;
   for (size_t i = 0; i < groups.batches.size(); ++i) {
-    auto partition_expression = and_(std::move(groups.expressions[i]),
-                                     scan_task->fragment()->partition_expression());
+    auto partition_expression =
+        and_(std::move(groups.expressions[i]), fragment->partition_expression());
     auto batch = std::move(groups.batches[i]);
 
-    ARROW_ASSIGN_OR_RAISE(auto part,
-                          state.write_options.partitioning->Format(partition_expression));
+    ARROW_ASSIGN_OR_RAISE(
+        auto part, state->write_options.partitioning->Format(partition_expression));
 
     WriteQueue* queue;
     {
       // lookup the queue to which batch should be appended
-      auto queues_lock = state.mutex.Lock();
+      auto queues_lock = state->mutex.Lock();
 
       queue = internal::GetOrInsertGenerated(
-                  &state.queues, std::move(part),
+                  &state->queues, std::move(part),
                   [&](const std::string& emplaced_part) {
                     // lookup in `queues` also failed,
                     // generate a new WriteQueue
-                    size_t queue_index = state.queues.size() - 1;
+                    size_t queue_index = state->queues.size() - 1;
 
                     return internal::make_unique<WriteQueue>(emplaced_part, queue_index,
                                                              batch->schema());
@@ -413,43 +474,33 @@ Status WriteNextBatch(WriteState& state, const std::shared_ptr<ScanTask>& scan_t
 
   // flush all touched WriteQueues
   for (auto queue : need_flushed) {
-    RETURN_NOT_OK(queue->Flush(state.write_options));
+    RETURN_NOT_OK(queue->Flush(state->write_options));
   }
   return Status::OK();
 }
 
-Future<> WriteInternal(const ScanOptions& scan_options, WriteState& state,
-                       ScanTaskVector scan_tasks, internal::Executor* cpu_executor) {
+Status WriteInternal(const ScanOptions& scan_options, WriteState* state,
+                     ScanTaskVector scan_tasks) {
   // Store a mapping from partitions (represened by their formatted partition expressions)
   // to a WriteQueue which flushes batches into that partition's output file. In principle
   // any thread could produce a batch for any partition, so each task alternates between
   // pushing batches and flushing them to disk.
-  std::vector<Future<>> scan_futs;
   auto task_group = scan_options.TaskGroup();
 
   for (const auto& scan_task : scan_tasks) {
-    if (scan_task->supports_async()) {
-      ARROW_ASSIGN_OR_RAISE(auto batches_gen, scan_task->ExecuteAsync(cpu_executor));
-      std::function<Status(std::shared_ptr<RecordBatch> batch)> batch_visitor =
-          [&, scan_task](std::shared_ptr<RecordBatch> batch) {
-            return WriteNextBatch(state, scan_task, std::move(batch));
+    task_group->Append([&, scan_task] {
+      std::function<Status(std::shared_ptr<RecordBatch>)> visitor =
+          [&](std::shared_ptr<RecordBatch> batch) {
+            return WriteNextBatch(state, scan_task->fragment(), std::move(batch));
           };
-      scan_futs.push_back(VisitAsyncGenerator(batches_gen, batch_visitor));
-    } else {
-      task_group->Append([&, scan_task] {
-        ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
-
-        for (auto maybe_batch : batches) {
-          ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
-          RETURN_NOT_OK(WriteNextBatch(state, scan_task, std::move(batch)));
-        }
-
-        return Status::OK();
-      });
-    }
+      return internal::RunSynchronously<Future<>>(
+          [&](internal::Executor* executor) {
+            return scan_task->SafeVisit(executor, visitor);
+          },
+          /*use_threads=*/false);
+    });
   }
-  scan_futs.push_back(task_group->FinishAsync());
-  return AllComplete(scan_futs);
+  return task_group->Finish();
 }
 
 }  // namespace
@@ -469,34 +520,35 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
   //
   // NB: neither of these will have any impact whatsoever on the common case of writing
   //     an in-memory table to disk.
-  ARROW_ASSIGN_OR_RAISE(auto fragment_it, scanner->GetFragments());
-  ARROW_ASSIGN_OR_RAISE(FragmentVector fragments, fragment_it.ToVector());
-  ScanTaskVector scan_tasks;
-
-  for (const auto& fragment : fragments) {
-    auto options = std::make_shared<ScanOptions>(*scanner->options());
-    // Avoid contention with multithreaded readers
-    options->use_threads = false;
-    ARROW_ASSIGN_OR_RAISE(auto scan_task_it,
-                          Scanner(fragment, std::move(options)).Scan());
-    for (auto maybe_scan_task : scan_task_it) {
-      ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
-      scan_tasks.push_back(std::move(scan_task));
-    }
-  }
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+
+  // TODO(ARROW-11782/ARROW-12288) Remove calls to Scan()
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, scanner->Scan());
+  ARROW_ASSIGN_OR_RAISE(ScanTaskVector scan_tasks, scan_task_it.ToVector());
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#endif
 
   WriteState state(write_options);
-  auto res = internal::RunSynchronously<arrow::detail::Empty>(
-      [&](internal::Executor* cpu_executor) -> Future<> {
-        return WriteInternal(*scanner->options(), state, std::move(scan_tasks),
-                             cpu_executor);
-      },
-      scanner->options()->use_threads);
-  RETURN_NOT_OK(res);
+  RETURN_NOT_OK(WriteInternal(*scanner->options(), &state, std::move(scan_tasks)));
 
   auto task_group = scanner->options()->TaskGroup();
   for (const auto& part_queue : state.queues) {
-    task_group->Append([&] { return part_queue.second->writer()->Finish(); });
+    task_group->Append([&] {
+      RETURN_NOT_OK(write_options.writer_pre_finish(part_queue.second->writer().get()));
+      RETURN_NOT_OK(part_queue.second->writer()->Finish());
+      return write_options.writer_post_finish(part_queue.second->writer().get());
+    });
   }
   return task_group->Finish();
 }
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index e4e7167aa75..49b700a7ef9 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -39,6 +39,11 @@ namespace arrow {
 
 namespace dataset {
 
+/// \defgroup dataset-file-formats File formats for reading and writing datasets
+/// \defgroup dataset-filesystem File system datasets
+///
+/// @{
+
 /// \brief The path and filesystem where an actual file is located or a buffer which can
 /// be read like a file
 class ARROW_DS_EXPORT FileSource {
@@ -134,9 +139,6 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   /// \brief The name identifying the kind of file format
   virtual std::string type_name() const = 0;
 
-  /// \brief Return true if fragments of this format can benefit from parallel scanning.
-  virtual bool splittable() const { return false; }
-
   virtual bool Equals(const FileFormat& other) const = 0;
 
   /// \brief Indicate if the FileSource is supported/readable by this format.
@@ -148,24 +150,36 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   /// \brief Open a FileFragment for scanning.
   /// May populate lazy properties of the FileFragment.
   virtual Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& file) const = 0;
 
+  virtual Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const;
+  virtual Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options);
+
   /// \brief Open a fragment
   virtual Result<std::shared_ptr<FileFragment>> MakeFragment(
-      FileSource source, Expression partition_expression,
+      FileSource source, compute::Expression partition_expression,
       std::shared_ptr<Schema> physical_schema);
 
-  Result<std::shared_ptr<FileFragment>> MakeFragment(FileSource source,
-                                                     Expression partition_expression);
+  /// \brief Create a FileFragment for a FileSource.
+  Result<std::shared_ptr<FileFragment>> MakeFragment(
+      FileSource source, compute::Expression partition_expression);
 
+  /// \brief Create a FileFragment for a FileSource.
   Result<std::shared_ptr<FileFragment>> MakeFragment(
       FileSource source, std::shared_ptr<Schema> physical_schema = NULLPTR);
 
+  /// \brief Create a writer for this format.
   virtual Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const = 0;
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const = 0;
 
+  /// \brief Get default write options for this format.
   virtual std::shared_ptr<FileWriteOptions> DefaultWriteOptions() = 0;
 };
 
@@ -173,17 +187,22 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
 class ARROW_DS_EXPORT FileFragment : public Fragment {
  public:
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override;
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
 
   std::string type_name() const override { return format_->type_name(); }
   std::string ToString() const override { return source_.path(); };
-  bool splittable() const override { return format_->splittable(); }
 
   const FileSource& source() const { return source_; }
   const std::shared_ptr<FileFormat>& format() const { return format_; }
 
  protected:
   FileFragment(FileSource source, std::shared_ptr<FileFormat> format,
-               Expression partition_expression, std::shared_ptr<Schema> physical_schema)
+               compute::Expression partition_expression,
+               std::shared_ptr<Schema> physical_schema)
       : Fragment(std::move(partition_expression), std::move(physical_schema)),
         source_(std::move(source)),
         format_(std::move(format)) {}
@@ -210,15 +229,19 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   /// \param[in] filesystem the filesystem of each FileFragment, or nullptr if the
   ///            fragments wrap buffers.
   /// \param[in] fragments list of fragments to create the dataset from.
+  /// \param[in] partitioning the Partitioning object in case the dataset is created
+  ///            with a known partitioning (e.g. from a discovered partitioning
+  ///            through a DatasetFactory), or nullptr if not known.
   ///
   /// Note that fragments wrapping files resident in differing filesystems are not
   /// permitted; to work with multiple filesystems use a UnionDataset.
   ///
   /// \return A constructed dataset.
   static Result<std::shared_ptr<FileSystemDataset>> Make(
-      std::shared_ptr<Schema> schema, Expression root_partition,
+      std::shared_ptr<Schema> schema, compute::Expression root_partition,
       std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
-      std::vector<std::shared_ptr<FileFragment>> fragments);
+      std::vector<std::shared_ptr<FileFragment>> fragments,
+      std::shared_ptr<Partitioning> partitioning = NULLPTR);
 
   /// \brief Write a dataset.
   static Status Write(const FileSystemDatasetWriteOptions& write_options,
@@ -240,6 +263,10 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   /// \brief Return the filesystem. May be nullptr if the fragments wrap buffers.
   const std::shared_ptr<fs::FileSystem>& filesystem() const { return filesystem_; }
 
+  /// \brief Return the partitioning. May be nullptr if the dataset was not constructed
+  /// with a partitioning.
+  const std::shared_ptr<Partitioning>& partitioning() const { return partitioning_; }
+
   std::string ToString() const;
 
  protected:
@@ -248,20 +275,23 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   explicit FileSystemDataset(std::shared_ptr<Schema> schema)
       : Dataset(std::move(schema)) {}
 
-  FileSystemDataset(std::shared_ptr<Schema> schema, Expression partition_expression)
+  FileSystemDataset(std::shared_ptr<Schema> schema,
+                    compute::Expression partition_expression)
       : Dataset(std::move(schema), partition_expression) {}
 
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override;
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
 
   void SetupSubtreePruning();
 
   std::shared_ptr<FileFormat> format_;
   std::shared_ptr<fs::FileSystem> filesystem_;
   std::vector<std::shared_ptr<FileFragment>> fragments_;
+  std::shared_ptr<Partitioning> partitioning_;
 
   std::shared_ptr<FragmentSubtrees> subtrees_;
 };
 
+/// \brief Options for writing a file of this format.
 class ARROW_DS_EXPORT FileWriteOptions {
  public:
   virtual ~FileWriteOptions() = default;
@@ -277,34 +307,43 @@ class ARROW_DS_EXPORT FileWriteOptions {
   std::shared_ptr<FileFormat> format_;
 };
 
+/// \brief A writer for this format.
 class ARROW_DS_EXPORT FileWriter {
  public:
   virtual ~FileWriter() = default;
 
+  /// \brief Write the given batch.
   virtual Status Write(const std::shared_ptr<RecordBatch>& batch) = 0;
 
+  /// \brief Write all batches from the reader.
   Status Write(RecordBatchReader* batches);
 
+  /// \brief Indicate that writing is done.
   virtual Status Finish();
 
   const std::shared_ptr<FileFormat>& format() const { return options_->format(); }
   const std::shared_ptr<Schema>& schema() const { return schema_; }
   const std::shared_ptr<FileWriteOptions>& options() const { return options_; }
+  const fs::FileLocator& destination() const { return destination_locator_; }
 
  protected:
   FileWriter(std::shared_ptr<Schema> schema, std::shared_ptr<FileWriteOptions> options,
-             std::shared_ptr<io::OutputStream> destination)
+             std::shared_ptr<io::OutputStream> destination,
+             fs::FileLocator destination_locator)
       : schema_(std::move(schema)),
         options_(std::move(options)),
-        destination_(destination) {}
+        destination_(std::move(destination)),
+        destination_locator_(std::move(destination_locator)) {}
 
   virtual Status FinishInternal() = 0;
 
   std::shared_ptr<Schema> schema_;
   std::shared_ptr<FileWriteOptions> options_;
   std::shared_ptr<io::OutputStream> destination_;
+  fs::FileLocator destination_locator_;
 };
 
+/// \brief Options for writing a dataset.
 struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   /// Options for individual fragment writing.
   std::shared_ptr<FileWriteOptions> file_write_options;
@@ -325,10 +364,24 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   /// {i} will be replaced by an auto incremented integer.
   std::string basename_template;
 
+  /// Callback to be invoked against all FileWriters before
+  /// they are finalized with FileWriter::Finish().
+  std::function<Status(FileWriter*)> writer_pre_finish = [](FileWriter*) {
+    return Status::OK();
+  };
+
+  /// Callback to be invoked against all FileWriters after they have
+  /// called FileWriter::Finish().
+  std::function<Status(FileWriter*)> writer_post_finish = [](FileWriter*) {
+    return Status::OK();
+  };
+
   const std::shared_ptr<FileFormat>& format() const {
     return file_write_options->format();
   }
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_benchmark.cc b/cpp/src/arrow/dataset/file_benchmark.cc
index 238a83bdc1a..5caea18511d 100644
--- a/cpp/src/arrow/dataset/file_benchmark.cc
+++ b/cpp/src/arrow/dataset/file_benchmark.cc
@@ -17,8 +17,8 @@
 
 #include "benchmark/benchmark.h"
 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/discovery.h"
-#include "arrow/dataset/expression.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/file_ipc.h"
 #include "arrow/dataset/partition.h"
@@ -62,7 +62,7 @@ static void GetAllFragments(benchmark::State& state) {
   }
 }
 
-static void GetFilteredFragments(benchmark::State& state, Expression filter) {
+static void GetFilteredFragments(benchmark::State& state, compute::Expression filter) {
   auto dataset = GetDataset();
   ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*dataset->schema()));
   for (auto _ : state) {
@@ -71,6 +71,9 @@ static void GetFilteredFragments(benchmark::State& state, Expression filter) {
   }
 }
 
+using compute::field_ref;
+using compute::literal;
+
 BENCHMARK(GetAllFragments);
 // Drill down to a subtree.
 BENCHMARK_CAPTURE(GetFilteredFragments, single_dir, equal(field_ref("a"), literal(90)));
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 677d1be05b7..1ef1d2907c2 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -26,12 +26,14 @@
 #include "arrow/csv/options.h"
 #include "arrow/csv/parser.h"
 #include "arrow/csv/reader.h"
+#include "arrow/csv/writer.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
 #include "arrow/io/buffered.h"
 #include "arrow/io/compressed.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
 #include "arrow/util/async_generator.h"
@@ -45,21 +47,32 @@ using internal::checked_cast;
 using internal::checked_pointer_cast;
 using internal::Executor;
 using internal::SerialExecutor;
-using RecordBatchGenerator = AsyncGenerator<std::shared_ptr<RecordBatch>>;
+using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>()>;
 
 Result<std::unordered_set<std::string>> GetColumnNames(
-    const csv::ParseOptions& parse_options, util::string_view first_block,
-    MemoryPool* pool) {
+    const csv::ReadOptions& read_options, const csv::ParseOptions& parse_options,
+    util::string_view first_block, MemoryPool* pool) {
+  if (!read_options.column_names.empty()) {
+    std::unordered_set<std::string> column_names;
+    for (const auto& s : read_options.column_names) {
+      if (!column_names.emplace(s).second) {
+        return Status::Invalid("CSV file contained multiple columns named ", s);
+      }
+    }
+    return column_names;
+  }
+
   uint32_t parsed_size = 0;
-  csv::BlockParser parser(pool, parse_options, /*num_cols=*/-1,
-                          /*max_num_rows=*/1);
+  int32_t max_num_rows = read_options.skip_rows + 1;
+  csv::BlockParser parser(pool, parse_options, /*num_cols=*/-1, /*first_row=*/1,
+                          max_num_rows);
 
   RETURN_NOT_OK(parser.Parse(util::string_view{first_block}, &parsed_size));
 
-  if (parser.num_rows() != 1) {
-    return Status::Invalid(
-        "Could not read first row from CSV file, either "
-        "file is truncated or header is larger than block size");
+  if (parser.num_rows() != max_num_rows) {
+    return Status::Invalid("Could not read first ", max_num_rows,
+                           " rows from CSV file, either file is truncated or"
+                           " header is larger than block size");
   }
 
   if (parser.num_cols() == 0) {
@@ -81,20 +94,31 @@ Result<std::unordered_set<std::string>> GetColumnNames(
 }
 
 static inline Result<csv::ConvertOptions> GetConvertOptions(
-    const CsvFileFormat& format, const std::shared_ptr<ScanOptions>& scan_options,
-    const util::string_view first_block, MemoryPool* pool) {
-  ARROW_ASSIGN_OR_RAISE(auto column_names,
-                        GetColumnNames(format.parse_options, first_block, pool));
-
+    const CsvFileFormat& format, const ScanOptions* scan_options,
+    const util::string_view first_block) {
   ARROW_ASSIGN_OR_RAISE(
       auto csv_scan_options,
       GetFragmentScanOptions<CsvFragmentScanOptions>(
-          kCsvTypeName, scan_options.get(), format.default_fragment_scan_options));
+          kCsvTypeName, scan_options, format.default_fragment_scan_options));
+  ARROW_ASSIGN_OR_RAISE(
+      auto column_names,
+      GetColumnNames(csv_scan_options->read_options, format.parse_options, first_block,
+                     scan_options ? scan_options->pool : default_memory_pool()));
+
   auto convert_options = csv_scan_options->convert_options;
-  for (FieldRef ref : scan_options->MaterializedFields()) {
-    ARROW_ASSIGN_OR_RAISE(auto field, ref.GetOne(*scan_options->dataset_schema));
 
+  if (!scan_options) return convert_options;
+
+  auto materialized = scan_options->MaterializedFields();
+  std::unordered_set<std::string> materialized_fields(materialized.begin(),
+                                                      materialized.end());
+  for (auto field : scan_options->dataset_schema->fields()) {
+    if (materialized_fields.find(field->name()) == materialized_fields.end()) continue;
+    // Ignore virtual columns.
     if (column_names.find(field->name()) == column_names.end()) continue;
+    // Only read the requested columns
+    convert_options.include_columns.push_back(field->name());
+    // Properly set conversion types
     convert_options.column_types[field->name()] = field->type();
   }
   return convert_options;
@@ -116,9 +140,7 @@ static inline Result<csv::ReadOptions> GetReadOptions(
 
 static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
     const FileSource& source, const CsvFileFormat& format,
-    internal::Executor* cpu_executor,
-    const std::shared_ptr<ScanOptions>& scan_options = nullptr,
-    MemoryPool* pool = default_memory_pool()) {
+    const std::shared_ptr<ScanOptions>& scan_options, internal::Executor* cpu_executor) {
   ARROW_ASSIGN_OR_RAISE(auto reader_options, GetReadOptions(format, scan_options));
 
   ARROW_ASSIGN_OR_RAISE(auto input, source.OpenCompressed());
@@ -126,41 +148,45 @@ static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
       input, io::BufferedInputStream::Create(reader_options.block_size,
                                              default_memory_pool(), std::move(input)));
 
-  auto peek_fut = DeferNotOk(input->io_context().executor()->Submit(
-      [input, reader_options] { return input->Peek(reader_options.block_size); }));
-
-  return peek_fut.Then([=](const util::string_view& first_block)
-                           -> Future<std::shared_ptr<csv::StreamingReader>> {
-    const auto& parse_options = format.parse_options;
-    auto convert_options = csv::ConvertOptions::Defaults();
-    if (scan_options != nullptr) {
-      ARROW_ASSIGN_OR_RAISE(convert_options,
-                            GetConvertOptions(format, scan_options, first_block, pool));
-    }
-
-    return csv::StreamingReader::MakeAsync(io::default_io_context(), std::move(input),
-                                           cpu_executor, reader_options, parse_options,
-                                           convert_options)
-        .Then(
-            [](const std::shared_ptr<csv::StreamingReader>& maybe_reader)
-                -> Result<std::shared_ptr<csv::StreamingReader>> { return maybe_reader; },
-            [source](const Status& err) -> Result<std::shared_ptr<csv::StreamingReader>> {
-              return err.WithMessage("Could not open CSV input source '", source.path(),
-                                     "': ", err);
-            });
-  });
+  // Grab the first block and use it to determine the schema and create a reader.  The
+  // input->Peek call blocks so we run the whole thing on the I/O thread pool.
+  auto reader_fut = DeferNotOk(input->io_context().executor()->Submit(
+      [=]() -> Future<std::shared_ptr<csv::StreamingReader>> {
+        ARROW_ASSIGN_OR_RAISE(auto first_block, input->Peek(reader_options.block_size));
+        const auto& parse_options = format.parse_options;
+        ARROW_ASSIGN_OR_RAISE(
+            auto convert_options,
+            GetConvertOptions(format, scan_options ? scan_options.get() : nullptr,
+                              first_block));
+        return csv::StreamingReader::MakeAsync(io::default_io_context(), std::move(input),
+                                               cpu_executor, reader_options,
+                                               parse_options, convert_options);
+      }));
+  return reader_fut.Then(
+      // Adds the filename to the error
+      [](const std::shared_ptr<csv::StreamingReader>& reader)
+          -> Result<std::shared_ptr<csv::StreamingReader>> { return reader; },
+      [source](const Status& err) -> Result<std::shared_ptr<csv::StreamingReader>> {
+        return err.WithMessage("Could not open CSV input source '", source.path(),
+                               "': ", err);
+      });
 }
 
 static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
     const FileSource& source, const CsvFileFormat& format,
-    const std::shared_ptr<ScanOptions>& scan_options = nullptr,
-    MemoryPool* pool = default_memory_pool()) {
-  bool use_threads = (scan_options != nullptr && scan_options->use_threads);
-  return internal::RunSynchronously<std::shared_ptr<csv::StreamingReader>>(
-      [&](Executor* executor) {
-        return OpenReaderAsync(source, format, executor, scan_options, pool);
-      },
-      use_threads);
+    const std::shared_ptr<ScanOptions>& scan_options = nullptr) {
+  auto open_reader_fut =
+      OpenReaderAsync(source, format, scan_options, internal::GetCpuThreadPool());
+  return open_reader_fut.result();
+}
+
+static RecordBatchGenerator GeneratorFromReader(
+    const Future<std::shared_ptr<csv::StreamingReader>>& reader) {
+  auto gen_fut = reader.Then(
+      [](const std::shared_ptr<csv::StreamingReader>& reader) -> RecordBatchGenerator {
+        return [reader]() { return reader->ReadNextAsync(); };
+      });
+  return MakeFromFuture(std::move(gen_fut));
 }
 
 /// \brief A ScanTask backed by an Csv file.
@@ -174,20 +200,24 @@ class CsvScanTask : public ScanTask {
         source_(fragment->source()) {}
 
   Result<RecordBatchIterator> Execute() override {
-    ARROW_ASSIGN_OR_RAISE(auto gen, ExecuteAsync(internal::GetCpuThreadPool()));
-    return MakeGeneratorIterator(std::move(gen));
+    auto reader_fut =
+        OpenReaderAsync(source_, *format_, options(), internal::GetCpuThreadPool());
+    auto reader_gen = GeneratorFromReader(std::move(reader_fut));
+    return MakeGeneratorIterator(std::move(reader_gen));
   }
 
-  bool supports_async() const override { return true; }
+  Future<RecordBatchVector> SafeExecute(internal::Executor* executor) override {
+    auto reader_fut = OpenReaderAsync(source_, *format_, options(), executor);
+    auto reader_gen = GeneratorFromReader(std::move(reader_fut));
+    return CollectAsyncGenerator(reader_gen);
+  }
 
-  Result<RecordBatchGenerator> ExecuteAsync(internal::Executor* cpu_executor) override {
-    auto reader_fut =
-        OpenReaderAsync(source_, *format_, cpu_executor, options(), options()->pool);
-    auto generator_fut = reader_fut.Then(
-        [](const std::shared_ptr<csv::StreamingReader>& reader) -> RecordBatchGenerator {
-          return [reader]() { return reader->ReadNextAsync(); };
-        });
-    return MakeFromFuture(generator_fut);
+  Future<> SafeVisit(
+      internal::Executor* executor,
+      std::function<Status(std::shared_ptr<RecordBatch>)> visitor) override {
+    auto reader_fut = OpenReaderAsync(source_, *format_, options(), executor);
+    auto reader_gen = GeneratorFromReader(std::move(reader_fut));
+    return VisitAsyncGenerator(reader_gen, visitor);
   }
 
  private:
@@ -222,14 +252,80 @@ Result<std::shared_ptr<Schema>> CsvFileFormat::Inspect(const FileSource& source)
 }
 
 Result<ScanTaskIterator> CsvFileFormat::ScanFile(
-    std::shared_ptr<ScanOptions> options,
+    const std::shared_ptr<ScanOptions>& options,
     const std::shared_ptr<FileFragment>& fragment) const {
   auto this_ = checked_pointer_cast<const CsvFileFormat>(shared_from_this());
-  auto task = std::make_shared<CsvScanTask>(std::move(this_), std::move(options),
-                                            std::move(fragment));
+  auto task = std::make_shared<CsvScanTask>(std::move(this_), options, fragment);
 
   return MakeVectorIterator<std::shared_ptr<ScanTask>>({std::move(task)});
 }
 
+Result<RecordBatchGenerator> CsvFileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& scan_options,
+    const std::shared_ptr<FileFragment>& file) const {
+  auto this_ = checked_pointer_cast<const CsvFileFormat>(shared_from_this());
+  auto source = file->source();
+  auto reader_fut =
+      OpenReaderAsync(source, *this, scan_options, internal::GetCpuThreadPool());
+  return GeneratorFromReader(std::move(reader_fut));
+}
+
+Future<util::optional<int64_t>> CsvFileFormat::CountRows(
+    const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+    const std::shared_ptr<ScanOptions>& options) {
+  if (ExpressionHasFieldRefs(predicate)) {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  auto self = internal::checked_pointer_cast<CsvFileFormat>(shared_from_this());
+  ARROW_ASSIGN_OR_RAISE(auto input, file->source().OpenCompressed());
+  ARROW_ASSIGN_OR_RAISE(auto read_options, GetReadOptions(*self, options));
+  return csv::CountRowsAsync(options->io_context, std::move(input),
+                             internal::GetCpuThreadPool(), read_options,
+                             self->parse_options)
+      .Then([](int64_t count) { return util::make_optional<int64_t>(count); });
+}
+
+//
+// CsvFileWriter, CsvFileWriteOptions
+//
+
+std::shared_ptr<FileWriteOptions> CsvFileFormat::DefaultWriteOptions() {
+  std::shared_ptr<CsvFileWriteOptions> csv_options(
+      new CsvFileWriteOptions(shared_from_this()));
+  csv_options->write_options =
+      std::make_shared<csv::WriteOptions>(csv::WriteOptions::Defaults());
+  return csv_options;
+}
+
+Result<std::shared_ptr<FileWriter>> CsvFileFormat::MakeWriter(
+    std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+    std::shared_ptr<FileWriteOptions> options,
+    fs::FileLocator destination_locator) const {
+  if (!Equals(*options->format())) {
+    return Status::TypeError("Mismatching format/write options.");
+  }
+  auto csv_options = checked_pointer_cast<CsvFileWriteOptions>(options);
+  ARROW_ASSIGN_OR_RAISE(
+      auto writer, csv::MakeCSVWriter(destination, schema, *csv_options->write_options));
+  return std::shared_ptr<FileWriter>(
+      new CsvFileWriter(std::move(destination), std::move(writer), std::move(schema),
+                        std::move(csv_options), std::move(destination_locator)));
+}
+
+CsvFileWriter::CsvFileWriter(std::shared_ptr<io::OutputStream> destination,
+                             std::shared_ptr<ipc::RecordBatchWriter> writer,
+                             std::shared_ptr<Schema> schema,
+                             std::shared_ptr<CsvFileWriteOptions> options,
+                             fs::FileLocator destination_locator)
+    : FileWriter(std::move(schema), std::move(options), std::move(destination),
+                 std::move(destination_locator)),
+      batch_writer_(std::move(writer)) {}
+
+Status CsvFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
+  return batch_writer_->WriteRecordBatch(*batch);
+}
+
+Status CsvFileWriter::FinishInternal() { return batch_writer_->Close(); }
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index b235195c5e3..8d7391727c6 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -25,6 +25,7 @@
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
+#include "arrow/ipc/type_fwd.h"
 #include "arrow/status.h"
 #include "arrow/util/compression.h"
 
@@ -33,6 +34,10 @@ namespace dataset {
 
 constexpr char kCsvTypeName[] = "csv";
 
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
 /// \brief A FileFormat implementation that reads from and writes to Csv files
 class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
  public:
@@ -50,16 +55,23 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override;
 
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& scan_options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override {
-    return Status::NotImplemented("writing fragment of CsvFileFormat");
-  }
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
 
-  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return NULLPTR; }
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
 };
 
 /// \brief Per-scan options for CSV fragments
@@ -75,5 +87,37 @@ struct ARROW_DS_EXPORT CsvFragmentScanOptions : public FragmentScanOptions {
   csv::ReadOptions read_options = csv::ReadOptions::Defaults();
 };
 
+class ARROW_DS_EXPORT CsvFileWriteOptions : public FileWriteOptions {
+ public:
+  /// Options passed to csv::MakeCSVWriter.
+  std::shared_ptr<csv::WriteOptions> write_options;
+
+ protected:
+  using FileWriteOptions::FileWriteOptions;
+
+  friend class CsvFileFormat;
+};
+
+class ARROW_DS_EXPORT CsvFileWriter : public FileWriter {
+ public:
+  Status Write(const std::shared_ptr<RecordBatch>& batch) override;
+
+ private:
+  CsvFileWriter(std::shared_ptr<io::OutputStream> destination,
+                std::shared_ptr<ipc::RecordBatchWriter> writer,
+                std::shared_ptr<Schema> schema,
+                std::shared_ptr<CsvFileWriteOptions> options,
+                fs::FileLocator destination_locator);
+
+  Status FinishInternal() override;
+
+  std::shared_ptr<io::OutputStream> destination_;
+  std::shared_ptr<ipc::RecordBatchWriter> batch_writer_;
+
+  friend class CsvFileFormat;
+};
+
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 99ca7cc0f42..b7b1d342e61 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/csv/writer.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/partition.h"
@@ -30,13 +31,36 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
+#include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 
 namespace arrow {
 namespace dataset {
 
-class TestCsvFileFormat : public testing::TestWithParam<Compression::type> {
+class CsvFormatHelper {
+ public:
+  using FormatType = CsvFileFormat;
+  static Result<std::shared_ptr<Buffer>> Write(RecordBatchReader* reader) {
+    ARROW_ASSIGN_OR_RAISE(auto sink, io::BufferOutputStream::Create());
+    std::shared_ptr<Table> table;
+    RETURN_NOT_OK(reader->ReadAll(&table));
+    auto options = csv::WriteOptions::Defaults();
+    RETURN_NOT_OK(csv::WriteCSV(*table, options, sink.get()));
+    return sink->Finish();
+  }
+
+  static std::shared_ptr<CsvFileFormat> MakeFormat() {
+    auto format = std::make_shared<CsvFileFormat>();
+    // Required for CountRows (since the test generates data with nulls that get written
+    // as empty lines)
+    format->parse_options.ignore_empty_lines = false;
+    return format;
+  }
+};
+
+class TestCsvFileFormat : public FileFormatFixtureMixin<CsvFormatHelper>,
+                          public ::testing::WithParamInterface<Compression::type> {
  public:
   Compression::type GetCompression() { return GetParam(); }
 
@@ -83,16 +107,10 @@ class TestCsvFileFormat : public testing::TestWithParam<Compression::type> {
     EXPECT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
     return Batches(std::move(scan_task_it));
   }
-
-  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
-    opts_->dataset_schema = schema(std::move(fields));
-    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
-  }
-
-  std::shared_ptr<CsvFileFormat> format_ = std::make_shared<CsvFileFormat>();
-  std::shared_ptr<ScanOptions> opts_ = std::make_shared<ScanOptions>();
 };
 
+// Basic scanning tests (to exercise compression support); see the parameterized test
+// below for more comprehensive testing of scan behaviors
 TEST_P(TestCsvFileFormat, ScanRecordBatchReader) {
   auto source = GetFileSource(R"(f64
 1.0
@@ -100,7 +118,7 @@ TEST_P(TestCsvFileFormat, ScanRecordBatchReader) {
 N/A
 2)");
   SetSchema({field("f64", float64())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
 
   int64_t row_count = 0;
 
@@ -109,7 +127,7 @@ N/A
     row_count += batch->num_rows();
   }
 
-  ASSERT_EQ(row_count, 3);
+  ASSERT_EQ(row_count, 4);
 }
 
 TEST_P(TestCsvFileFormat, CustomConvertOptions) {
@@ -119,7 +137,7 @@ MYNULL
 N/A
 bar)");
   SetSchema({field("str", utf8())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
   auto fragment_scan_options = std::make_shared<CsvFragmentScanOptions>();
   fragment_scan_options->convert_options.null_values = {"MYNULL"};
   fragment_scan_options->convert_options.strings_can_be_null = true;
@@ -141,15 +159,15 @@ foo
 MYNULL
 N/A
 bar)");
-  SetSchema({field("str", utf8())});
-  auto defaults = std::make_shared<CsvFragmentScanOptions>();
-  defaults->read_options.skip_rows = 1;
-  format_->default_fragment_scan_options = defaults;
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-  ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
-  AssertSchemaEqual(opts_->dataset_schema, physical_schema);
-
   {
+    SetSchema({field("str", utf8())});
+    auto defaults = std::make_shared<CsvFragmentScanOptions>();
+    defaults->read_options.skip_rows = 1;
+    format_->default_fragment_scan_options = defaults;
+    auto fragment = MakeFragment(*source);
+    ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
+    AssertSchemaEqual(opts_->dataset_schema, physical_schema);
+
     int64_t rows = 0;
     for (auto maybe_batch : Batches(fragment.get())) {
       ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
@@ -158,17 +176,57 @@ bar)");
     ASSERT_EQ(rows, 4);
   }
   {
+    SetSchema({field("header_skipped", utf8())});
     // These options completely override the default ones
     auto fragment_scan_options = std::make_shared<CsvFragmentScanOptions>();
     fragment_scan_options->read_options.block_size = 1 << 22;
     opts_->fragment_scan_options = fragment_scan_options;
     int64_t rows = 0;
+    auto fragment = MakeFragment(*source);
     for (auto maybe_batch : Batches(fragment.get())) {
       ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
       rows += batch->GetColumnByName("header_skipped")->length();
     }
     ASSERT_EQ(rows, 5);
   }
+  {
+    SetSchema({field("custom_header", utf8())});
+    auto defaults = std::make_shared<CsvFragmentScanOptions>();
+    defaults->read_options.column_names = {"custom_header"};
+    format_->default_fragment_scan_options = defaults;
+    opts_->fragment_scan_options = nullptr;
+    int64_t rows = 0;
+    auto fragment = MakeFragment(*source);
+    for (auto maybe_batch : Batches(fragment.get())) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      rows += batch->GetColumnByName("custom_header")->length();
+    }
+    ASSERT_EQ(rows, 6);
+  }
+}
+
+TEST_P(TestCsvFileFormat, CustomReadOptionsColumnNames) {
+  auto source = GetFileSource("1,1\n2,3");
+  SetSchema({field("ints_1", int64()), field("ints_2", int64())});
+  auto defaults = std::make_shared<CsvFragmentScanOptions>();
+  defaults->read_options.column_names = {"ints_1", "ints_2"};
+  format_->default_fragment_scan_options = defaults;
+  auto fragment = MakeFragment(*source);
+  ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
+  AssertSchemaEqual(opts_->dataset_schema, physical_schema);
+  int64_t rows = 0;
+  for (auto maybe_batch : Batches(fragment.get())) {
+    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+    rows += batch->num_rows();
+  }
+  ASSERT_EQ(rows, 2);
+
+  defaults->read_options.column_names = {"same", "same"};
+  format_->default_fragment_scan_options = defaults;
+  fragment = MakeFragment(*source);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("CSV file contained multiple columns named same"),
+      Batches(fragment.get()).Next());
 }
 
 TEST_P(TestCsvFileFormat, ScanRecordBatchReaderWithVirtualColumn) {
@@ -179,7 +237,7 @@ N/A
 2)");
   // NB: dataset_schema includes a column not present in the file
   SetSchema({field("f64", float64()), field("virtual", int32())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
 
   ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
   AssertSchemaEqual(Schema({field("f64", float64())}), *physical_schema);
@@ -192,25 +250,15 @@ N/A
     row_count += batch->num_rows();
   }
 
-  ASSERT_EQ(row_count, 3);
+  ASSERT_EQ(row_count, 4);
 }
 
-TEST_P(TestCsvFileFormat, OpenFailureWithRelevantError) {
-  if (GetCompression() != Compression::type::UNCOMPRESSED) {
-    GTEST_SKIP() << "File source name is different with compression";
-  }
-  auto source = GetFileSource("");
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("<Buffer>"),
-                                  format_->Inspect(*source).status());
-
-  constexpr auto file_name = "herp/derp";
-  ASSERT_OK_AND_ASSIGN(
-      auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr(file_name),
-                                  format_->Inspect({file_name, fs}).status());
+TEST_P(TestCsvFileFormat, InspectFailureWithRelevantError) {
+  TestInspectFailureWithRelevantError(StatusCode::Invalid, "CSV");
 }
 
 TEST_P(TestCsvFileFormat, Inspect) {
+  TestInspect();
   auto source = GetFileSource(R"(f64
 1.0
 
@@ -220,7 +268,28 @@ N/A
   EXPECT_EQ(*actual, Schema({field("f64", float64())}));
 }
 
+TEST_P(TestCsvFileFormat, InspectWithCustomConvertOptions) {
+  // Regression test for ARROW-12083
+  auto source = GetFileSource(R"(actually_string
+1.0
+
+N/A
+2)");
+  auto defaults = std::make_shared<CsvFragmentScanOptions>();
+  format_->default_fragment_scan_options = defaults;
+
+  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
+  // Default type inferred
+  EXPECT_EQ(*actual, Schema({field("actually_string", float64())}));
+
+  // Override the inferred type
+  defaults->convert_options.column_types["actually_string"] = utf8();
+  ASSERT_OK_AND_ASSIGN(actual, format_->Inspect(*source.get()));
+  EXPECT_EQ(*actual, Schema({field("actually_string", utf8())}));
+}
+
 TEST_P(TestCsvFileFormat, IsSupported) {
+  TestIsSupported();
   bool supported;
 
   auto source = GetFileSource("");
@@ -247,7 +316,7 @@ TEST_P(TestCsvFileFormat, NonProjectedFieldWithDifferingTypeFromInferred) {
 ,
 N/A,bar
 2,baz)");
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
   ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
   AssertSchemaEqual(
       Schema({field("betrayal_not_really_f64", float64()), field("str", utf8())}),
@@ -270,19 +339,30 @@ N/A,bar
   ASSERT_OK(builder.Project({"str"}));
   ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
 
-  ASSERT_OK_AND_ASSIGN(auto scan_task_it, scanner->Scan());
-  for (auto maybe_scan_task : scan_task_it) {
-    ASSERT_OK_AND_ASSIGN(auto scan_task, maybe_scan_task);
-    ASSERT_OK_AND_ASSIGN(auto batch_it, scan_task->Execute());
-    for (auto maybe_batch : batch_it) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      // Run through the scan checking for errors to ensure that "f64" is read with the
-      // specified type and does not revert to the inferred type (if it reverts to
-      // inferring float64 then evaluation of the comparison expression should break)
-    }
-  }
+  ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+  // Run through the scan checking for errors to ensure that "f64" is read with the
+  // specified type and does not revert to the inferred type (if it reverts to
+  // inferring float64 then evaluation of the comparison expression should break)
+  ASSERT_OK(batch_it.Visit([](TaggedRecordBatch) { return Status::OK(); }));
+}
+
+TEST_P(TestCsvFileFormat, WriteRecordBatchReader) { TestWrite(); }
+
+TEST_P(TestCsvFileFormat, WriteRecordBatchReaderCustomOptions) {
+  auto options =
+      checked_pointer_cast<CsvFileWriteOptions>(format_->DefaultWriteOptions());
+  options->write_options->include_header = false;
+  auto data_schema = schema({field("f64", float64())});
+  ASSERT_OK_AND_ASSIGN(auto sink, GetFileSink());
+  ASSERT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, data_schema, options, {}));
+  ASSERT_OK(writer->Write(ConstantArrayGenerator::Zeroes(5, data_schema)));
+  ASSERT_OK(writer->Finish());
+  ASSERT_OK_AND_ASSIGN(auto written, sink->Finish());
+  ASSERT_EQ("0\n0\n0\n0\n0\n", written->ToString());
 }
 
+TEST_P(TestCsvFileFormat, CountRows) { TestCountRows(); }
+
 INSTANTIATE_TEST_SUITE_P(TestUncompressedCsv, TestCsvFileFormat,
                          ::testing::Values(Compression::UNCOMPRESSED));
 #ifdef ARROW_WITH_BZ2
@@ -303,5 +383,20 @@ INSTANTIATE_TEST_SUITE_P(TestZSTDCsv, TestCsvFileFormat,
                          ::testing::Values(Compression::ZSTD));
 #endif
 
+class TestCsvFileFormatScan : public FileFormatScanMixin<CsvFormatHelper> {};
+
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReader) { TestScan(); }
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReaderWithVirtualColumn) {
+  TestScanWithVirtualColumn();
+}
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReaderProjected) { TestScanProjected(); }
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReaderProjectedMissingCols) {
+  TestScanProjectedMissingCols();
+}
+
+INSTANTIATE_TEST_SUITE_P(TestScan, TestCsvFileFormatScan,
+                         ::testing::ValuesIn(TestFormatParams::Values()),
+                         TestFormatParams::ToTestNameString);
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index 24ea6e36ff2..40f5d3e8e0d 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -59,6 +59,21 @@ static inline Result<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReader(
   return reader;
 }
 
+static inline Future<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReaderAsync(
+    const FileSource& source,
+    const ipc::IpcReadOptions& options = default_read_options()) {
+  ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
+  auto path = source.path();
+  return ipc::RecordBatchFileReader::OpenAsync(std::move(input), options)
+      .Then([](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
+                -> Result<std::shared_ptr<ipc::RecordBatchFileReader>> { return reader; },
+            [path](const Status& status)
+                -> Result<std::shared_ptr<ipc::RecordBatchFileReader>> {
+              return status.WithMessage("Could not open IPC input source '", path,
+                                        "': ", status.message());
+            });
+}
+
 static inline Result<std::vector<int>> GetIncludedFields(
     const Schema& schema, const std::vector<std::string>& materialized_fields) {
   std::vector<int> included_fields;
@@ -73,6 +88,26 @@ static inline Result<std::vector<int>> GetIncludedFields(
   return included_fields;
 }
 
+static inline Result<ipc::IpcReadOptions> GetReadOptions(
+    const Schema& schema, const FileFormat& format, const ScanOptions& scan_options) {
+  ARROW_ASSIGN_OR_RAISE(
+      auto ipc_scan_options,
+      GetFragmentScanOptions<IpcFragmentScanOptions>(
+          kIpcTypeName, &scan_options, format.default_fragment_scan_options));
+  auto options =
+      ipc_scan_options->options ? *ipc_scan_options->options : default_read_options();
+  options.memory_pool = scan_options.pool;
+  if (!options.included_fields.empty()) {
+    // Cannot set them here
+    ARROW_LOG(WARNING) << "IpcFragmentScanOptions.options->included_fields was set "
+                          "but will be ignored; included_fields are derived from "
+                          "fields referenced by the scan";
+  }
+  ARROW_ASSIGN_OR_RAISE(options.included_fields,
+                        GetIncludedFields(schema, scan_options.MaterializedFields()));
+  return options;
+}
+
 /// \brief A ScanTask backed by an Ipc file.
 class IpcScanTask : public ScanTask {
  public:
@@ -83,28 +118,11 @@ class IpcScanTask : public ScanTask {
   Result<RecordBatchIterator> Execute() override {
     struct Impl {
       static Result<RecordBatchIterator> Make(const FileSource& source,
-                                              FileFormat* format,
-                                              const ScanOptions* scan_options) {
+                                              const FileFormat& format,
+                                              const ScanOptions& scan_options) {
         ARROW_ASSIGN_OR_RAISE(auto reader, OpenReader(source));
-
-        ARROW_ASSIGN_OR_RAISE(
-            auto ipc_scan_options,
-            GetFragmentScanOptions<IpcFragmentScanOptions>(
-                kIpcTypeName, scan_options, format->default_fragment_scan_options));
-        auto options = ipc_scan_options->options ? *ipc_scan_options->options
-                                                 : default_read_options();
-        options.memory_pool = scan_options->pool;
-        options.use_threads = false;
-        if (!options.included_fields.empty()) {
-          // Cannot set them here
-          ARROW_LOG(WARNING) << "IpcFragmentScanOptions.options->included_fields was set "
-                                "but will be ignored; included_fields are derived from "
-                                "fields referenced by the scan";
-        }
-        ARROW_ASSIGN_OR_RAISE(
-            options.included_fields,
-            GetIncludedFields(*reader->schema(), scan_options->MaterializedFields()));
-
+        ARROW_ASSIGN_OR_RAISE(auto options,
+                              GetReadOptions(*reader->schema(), format, scan_options));
         ARROW_ASSIGN_OR_RAISE(reader, OpenReader(source, options));
         return RecordBatchIterator(Impl{std::move(reader), 0});
       }
@@ -121,9 +139,9 @@ class IpcScanTask : public ScanTask {
       int i_;
     };
 
-    return Impl::Make(
-        source_, internal::checked_pointer_cast<FileFragment>(fragment_)->format().get(),
-        options_.get());
+    return Impl::Make(source_,
+                      *internal::checked_pointer_cast<FileFragment>(fragment_)->format(),
+                      *options_);
   }
 
  private:
@@ -168,9 +186,61 @@ Result<std::shared_ptr<Schema>> IpcFileFormat::Inspect(const FileSource& source)
 }
 
 Result<ScanTaskIterator> IpcFileFormat::ScanFile(
-    std::shared_ptr<ScanOptions> options,
+    const std::shared_ptr<ScanOptions>& options,
     const std::shared_ptr<FileFragment>& fragment) const {
-  return IpcScanTaskIterator::Make(std::move(options), std::move(fragment));
+  return IpcScanTaskIterator::Make(options, fragment);
+}
+
+Result<RecordBatchGenerator> IpcFileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options,
+    const std::shared_ptr<FileFragment>& file) const {
+  auto self = shared_from_this();
+  auto source = file->source();
+  auto open_reader = OpenReaderAsync(source);
+  auto reopen_reader = [self, options,
+                        source](std::shared_ptr<ipc::RecordBatchFileReader> reader)
+      -> Future<std::shared_ptr<ipc::RecordBatchFileReader>> {
+    ARROW_ASSIGN_OR_RAISE(auto options,
+                          GetReadOptions(*reader->schema(), *self, *options));
+    return OpenReader(source, options);
+  };
+  auto readahead_level = options->batch_readahead;
+  auto default_fragment_scan_options = this->default_fragment_scan_options;
+  auto open_generator = [=](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
+      -> Result<RecordBatchGenerator> {
+    ARROW_ASSIGN_OR_RAISE(
+        auto ipc_scan_options,
+        GetFragmentScanOptions<IpcFragmentScanOptions>(kIpcTypeName, options.get(),
+                                                       default_fragment_scan_options));
+
+    RecordBatchGenerator generator;
+    if (ipc_scan_options->cache_options) {
+      // Transferring helps performance when coalescing
+      ARROW_ASSIGN_OR_RAISE(
+          generator, reader->GetRecordBatchGenerator(
+                         /*coalesce=*/true, options->io_context,
+                         *ipc_scan_options->cache_options, internal::GetCpuThreadPool()));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(generator, reader->GetRecordBatchGenerator(
+                                           /*coalesce=*/false, options->io_context));
+    }
+    return MakeReadaheadGenerator(std::move(generator), readahead_level);
+  };
+  return MakeFromFuture(open_reader.Then(reopen_reader).Then(open_generator));
+}
+
+Future<util::optional<int64_t>> IpcFileFormat::CountRows(
+    const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+    const std::shared_ptr<ScanOptions>& options) {
+  if (ExpressionHasFieldRefs(predicate)) {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  auto self = internal::checked_pointer_cast<IpcFileFormat>(shared_from_this());
+  return DeferNotOk(options->io_context.executor()->Submit(
+      [self, file]() -> Result<util::optional<int64_t>> {
+        ARROW_ASSIGN_OR_RAISE(auto reader, OpenReader(file->source()));
+        return reader->CountRows();
+      }));
 }
 
 //
@@ -188,7 +258,8 @@ std::shared_ptr<FileWriteOptions> IpcFileFormat::DefaultWriteOptions() {
 
 Result<std::shared_ptr<FileWriter>> IpcFileFormat::MakeWriter(
     std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-    std::shared_ptr<FileWriteOptions> options) const {
+    std::shared_ptr<FileWriteOptions> options,
+    fs::FileLocator destination_locator) const {
   if (!Equals(*options->format())) {
     return Status::TypeError("Mismatching format/write options.");
   }
@@ -204,14 +275,16 @@ Result<std::shared_ptr<FileWriter>> IpcFileFormat::MakeWriter(
 
   return std::shared_ptr<FileWriter>(
       new IpcFileWriter(std::move(destination), std::move(writer), std::move(schema),
-                        std::move(ipc_options)));
+                        std::move(ipc_options), std::move(destination_locator)));
 }
 
 IpcFileWriter::IpcFileWriter(std::shared_ptr<io::OutputStream> destination,
                              std::shared_ptr<ipc::RecordBatchWriter> writer,
                              std::shared_ptr<Schema> schema,
-                             std::shared_ptr<IpcFileWriteOptions> options)
-    : FileWriter(std::move(schema), std::move(options), std::move(destination)),
+                             std::shared_ptr<IpcFileWriteOptions> options,
+                             fs::FileLocator destination_locator)
+    : FileWriter(std::move(schema), std::move(options), std::move(destination),
+                 std::move(destination_locator)),
       batch_writer_(std::move(writer)) {}
 
 Status IpcFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index a7bcd04a9d2..ef78515221c 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -25,12 +25,17 @@
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
+#include "arrow/io/type_fwd.h"
 #include "arrow/ipc/type_fwd.h"
 #include "arrow/result.h"
 
 namespace arrow {
 namespace dataset {
 
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
 constexpr char kIpcTypeName[] = "ipc";
 
 /// \brief A FileFormat implementation that reads from and writes to Ipc files
@@ -42,8 +47,6 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
     return type_name() == other.type_name();
   }
 
-  bool splittable() const override { return true; }
-
   Result<bool> IsSupported(const FileSource& source) const override;
 
   /// \brief Return the schema of the file if possible.
@@ -51,12 +54,21 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override;
 
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override;
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
 
   std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
 };
@@ -69,6 +81,9 @@ class ARROW_DS_EXPORT IpcFragmentScanOptions : public FragmentScanOptions {
   /// Options passed to the IPC file reader.
   /// included_fields, memory_pool, and use_threads are ignored.
   std::shared_ptr<ipc::IpcReadOptions> options;
+  /// If present, the async scanner will enable I/O coalescing.
+  /// This is ignored by the sync scanner.
+  std::shared_ptr<io::CacheOptions> cache_options;
 };
 
 class ARROW_DS_EXPORT IpcFileWriteOptions : public FileWriteOptions {
@@ -93,7 +108,8 @@ class ARROW_DS_EXPORT IpcFileWriter : public FileWriter {
   IpcFileWriter(std::shared_ptr<io::OutputStream> destination,
                 std::shared_ptr<ipc::RecordBatchWriter> writer,
                 std::shared_ptr<Schema> schema,
-                std::shared_ptr<IpcFileWriteOptions> options);
+                std::shared_ptr<IpcFileWriteOptions> options,
+                fs::FileLocator destination_locator);
 
   Status FinishInternal() override;
 
@@ -103,5 +119,7 @@ class ARROW_DS_EXPORT IpcFileWriter : public FileWriter {
   friend class IpcFileFormat;
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc
index 502b61ca645..e6192523f53 100644
--- a/cpp/src/arrow/dataset/file_ipc_test.cc
+++ b/cpp/src/arrow/dataset/file_ipc_test.cc
@@ -39,172 +39,35 @@
 namespace arrow {
 namespace dataset {
 
-constexpr int64_t kBatchSize = 1UL << 12;
-constexpr int64_t kBatchRepetitions = 1 << 5;
-constexpr int64_t kNumRows = kBatchSize * kBatchRepetitions;
-
 using internal::checked_pointer_cast;
 
-class ArrowIpcWriterMixin : public ::testing::Test {
+class IpcFormatHelper {
  public:
-  std::shared_ptr<Buffer> Write(RecordBatchReader* reader) {
-    EXPECT_OK_AND_ASSIGN(auto sink, io::BufferOutputStream::Create());
-
-    EXPECT_OK_AND_ASSIGN(auto writer, ipc::MakeFileWriter(sink, reader->schema()));
-
+  using FormatType = IpcFileFormat;
+  static Result<std::shared_ptr<Buffer>> Write(RecordBatchReader* reader) {
+    ARROW_ASSIGN_OR_RAISE(auto sink, io::BufferOutputStream::Create());
+    ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(sink, reader->schema()));
     std::vector<std::shared_ptr<RecordBatch>> batches;
-    ARROW_EXPECT_OK(reader->ReadAll(&batches));
+    RETURN_NOT_OK(reader->ReadAll(&batches));
     for (auto batch : batches) {
-      ARROW_EXPECT_OK(writer->WriteRecordBatch(*batch));
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
     }
-
-    ARROW_EXPECT_OK(writer->Close());
-
-    EXPECT_OK_AND_ASSIGN(auto out, sink->Finish());
-    return out;
+    RETURN_NOT_OK(writer->Close());
+    return sink->Finish();
   }
 
-  std::shared_ptr<Buffer> Write(const Table& table) {
-    EXPECT_OK_AND_ASSIGN(auto sink, io::BufferOutputStream::Create());
-    EXPECT_OK_AND_ASSIGN(auto writer, ipc::MakeFileWriter(sink, table.schema()));
-
-    ARROW_EXPECT_OK(writer->WriteTable(table));
-
-    ARROW_EXPECT_OK(writer->Close());
-
-    EXPECT_OK_AND_ASSIGN(auto out, sink->Finish());
-    return out;
+  static std::shared_ptr<IpcFileFormat> MakeFormat() {
+    return std::make_shared<IpcFileFormat>();
   }
 };
 
-class TestIpcFileFormat : public ArrowIpcWriterMixin {
- public:
-  std::unique_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
-    auto buffer = Write(reader);
-    return internal::make_unique<FileSource>(std::move(buffer));
-  }
-
-  std::unique_ptr<RecordBatchReader> GetRecordBatchReader(
-      std::shared_ptr<Schema> schema) {
-    return MakeGeneratedRecordBatch(schema, kBatchSize, kBatchRepetitions);
-  }
-
-  Result<std::shared_ptr<io::BufferOutputStream>> GetFileSink() {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> buffer,
-                          AllocateResizableBuffer(0));
-    return std::make_shared<io::BufferOutputStream>(buffer);
-  }
-
-  RecordBatchIterator Batches(ScanTaskIterator scan_task_it) {
-    return MakeFlattenIterator(MakeMaybeMapIterator(
-        [](std::shared_ptr<ScanTask> scan_task) { return scan_task->Execute(); },
-        std::move(scan_task_it)));
-  }
-
-  RecordBatchIterator Batches(Fragment* fragment) {
-    EXPECT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
-    return Batches(std::move(scan_task_it));
-  }
-
-  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
-    opts_ = std::make_shared<ScanOptions>();
-    opts_->dataset_schema = schema(std::move(fields));
-    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
-  }
-
- protected:
-  std::shared_ptr<IpcFileFormat> format_ = std::make_shared<IpcFileFormat>();
-  std::shared_ptr<ScanOptions> opts_;
-};
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReader) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  int64_t row_count = 0;
-
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-  }
-
-  ASSERT_EQ(row_count, kNumRows);
-}
-
-TEST_F(TestIpcFileFormat, FragmentScanOptions) {
-  auto reader = GetRecordBatchReader(
-      // ARROW-12077: on Windows/mimalloc/release, nullable list column leads to crash
-      schema({field("list", list(float64()), false,
-                    key_value_metadata({{"max_length", "1"}})),
-              field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  // Set scan options that ensure reading fails
-  auto fragment_scan_options = std::make_shared<IpcFragmentScanOptions>();
-  fragment_scan_options->options = std::make_shared<ipc::IpcReadOptions>();
-  fragment_scan_options->options->max_recursion_depth = 0;
-  opts_->fragment_scan_options = fragment_scan_options;
-  ASSERT_OK_AND_ASSIGN(auto scan_tasks, fragment->Scan(opts_));
-  ASSERT_OK_AND_ASSIGN(auto scan_task, scan_tasks.Next());
-  ASSERT_OK_AND_ASSIGN(auto batches, scan_task->Execute());
-  ASSERT_RAISES(Invalid, batches.Next());
-}
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReaderWithVirtualColumn) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  // NB: dataset_schema includes a column not present in the file
-  SetSchema({reader->schema()->field(0), field("virtual", int32())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
-  AssertSchemaEqual(Schema({field("f64", float64())}), *physical_schema);
-
-  int64_t row_count = 0;
-
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    AssertSchemaEqual(*batch->schema(), *physical_schema);
-    row_count += batch->num_rows();
-  }
-
-  ASSERT_EQ(row_count, kNumRows);
-}
-
-TEST_F(TestIpcFileFormat, WriteRecordBatchReader) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
-  auto options = format_->DefaultWriteOptions();
-  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
-
-  ASSERT_OK(writer->Write(GetRecordBatchReader(schema({field("f64", float64())})).get()));
-  ASSERT_OK(writer->Finish());
+class TestIpcFileFormat : public FileFormatFixtureMixin<IpcFormatHelper> {};
 
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
-
-  AssertBufferEqual(*written, *source->buffer());
-}
+TEST_F(TestIpcFileFormat, WriteRecordBatchReader) { TestWrite(); }
 
 TEST_F(TestIpcFileFormat, WriteRecordBatchReaderCustomOptions) {
   auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
   auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
   auto ipc_options =
       checked_pointer_cast<IpcFileWriteOptions>(format_->DefaultWriteOptions());
   if (util::Codec::IsAvailable(Compression::ZSTD)) {
@@ -212,19 +75,22 @@ TEST_F(TestIpcFileFormat, WriteRecordBatchReaderCustomOptions) {
                          util::Codec::Create(Compression::ZSTD));
   }
   ipc_options->metadata = key_value_metadata({{"hello", "world"}});
-  EXPECT_OK_AND_ASSIGN(auto writer,
-                       format_->MakeWriter(sink, reader->schema(), ipc_options));
-  ASSERT_OK(writer->Write(GetRecordBatchReader(schema({field("f64", float64())})).get()));
-  ASSERT_OK(writer->Finish());
 
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
+  auto written = WriteToBuffer(reader->schema(), ipc_options);
+
   EXPECT_OK_AND_ASSIGN(auto ipc_reader, ipc::RecordBatchFileReader::Open(
                                             std::make_shared<io::BufferReader>(written)));
-
   EXPECT_EQ(ipc_reader->metadata()->sorted_pairs(),
             ipc_options->metadata->sorted_pairs());
 }
 
+TEST_F(TestIpcFileFormat, InspectFailureWithRelevantError) {
+  TestInspectFailureWithRelevantError(StatusCode::Invalid, "IPC");
+}
+TEST_F(TestIpcFileFormat, Inspect) { TestInspect(); }
+TEST_F(TestIpcFileFormat, IsSupported) { TestIsSupported(); }
+TEST_F(TestIpcFileFormat, CountRows) { TestCountRows(); }
+
 class TestIpcFileSystemDataset : public testing::Test,
                                  public WriteFileSystemDatasetMixin {
  public:
@@ -259,123 +125,45 @@ TEST_F(TestIpcFileSystemDataset, WriteExceedsMaxPartitions) {
   // require that no batch be grouped into more than 2 written batches:
   write_options_.max_partitions = 2;
 
-  auto scanner = std::make_shared<Scanner>(dataset_, scan_options_);
+  EXPECT_OK_AND_ASSIGN(auto scanner, ScannerBuilder(dataset_, scan_options_).Finish());
   EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("This exceeds the maximum"),
                                   FileSystemDataset::Write(write_options_, scanner));
 }
 
-TEST_F(TestIpcFileFormat, OpenFailureWithRelevantError) {
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  auto result = format_->Inspect(FileSource(buf));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("<Buffer>"),
-                                  result.status());
-
-  constexpr auto file_name = "herp/derp";
-  ASSERT_OK_AND_ASSIGN(
-      auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
-  result = format_->Inspect({file_name, fs});
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr(file_name),
-                                  result.status());
-}
-
-static auto f32 = field("f32", float32());
-static auto f64 = field("f64", float64());
-static auto i32 = field("i32", int32());
-static auto i64 = field("i64", int64());
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReaderProjected) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  opts_->filter = equal(field_ref("i32"), literal(0));
-
-  // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-  // we will not drop "i32" even though it is not projected since we need it for
-  // filtering
-  auto expected_schema = schema({f64, i32});
-
-  auto reader = GetRecordBatchReader(opts_->dataset_schema);
-  auto source = GetFileSource(reader.get());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  int64_t row_count = 0;
-
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-    AssertSchemaEqual(*batch->schema(), *expected_schema,
-                      /*check_metadata=*/false);
-  }
-
-  ASSERT_EQ(row_count, kNumRows);
-}
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReaderProjectedMissingCols) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  opts_->filter = equal(field_ref("i32"), literal(0));
+class TestIpcFileFormatScan : public FileFormatScanMixin<IpcFormatHelper> {};
 
-  auto reader_without_i32 = GetRecordBatchReader(schema({f64, i64, f32}));
-  auto reader_without_f64 = GetRecordBatchReader(schema({i64, f32, i32}));
-  auto reader = GetRecordBatchReader(schema({f64, i64, f32, i32}));
-
-  auto readers = {reader.get(), reader_without_i32.get(), reader_without_f64.get()};
-  for (auto reader : readers) {
-    auto source = GetFileSource(reader);
-    ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-    // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-    // we will not drop "i32" even though it is not projected since we need it for
-    // filtering
-    //
-    // in the case where a file doesn't contain a referenced field, we won't
-    // materialize it as nulls later
-    std::shared_ptr<Schema> expected_schema;
-    if (reader == reader_without_i32.get()) {
-      expected_schema = schema({f64});
-    } else if (reader == reader_without_f64.get()) {
-      expected_schema = schema({i32});
-    } else {
-      expected_schema = schema({f64, i32});
-    }
-
-    int64_t row_count = 0;
-
-    for (auto maybe_batch : Batches(fragment.get())) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-      AssertSchemaEqual(*batch->schema(), *expected_schema,
-                        /*check_metadata=*/false);
-    }
-
-    ASSERT_EQ(row_count, kNumRows);
-  }
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReader) { TestScan(); }
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReaderWithVirtualColumn) {
+  TestScanWithVirtualColumn();
 }
-
-TEST_F(TestIpcFileFormat, Inspect) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
-  EXPECT_EQ(*actual, *reader->schema());
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReaderProjected) { TestScanProjected(); }
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReaderProjectedMissingCols) {
+  TestScanProjectedMissingCols();
 }
-
-TEST_F(TestIpcFileFormat, IsSupported) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+TEST_P(TestIpcFileFormatScan, FragmentScanOptions) {
+  auto reader = GetRecordBatchReader(
+      // ARROW-12077: on Windows/mimalloc/release, nullable list column leads to crash
+      schema({field("list", list(float64()), false,
+                    key_value_metadata({{"max_length", "1"}})),
+              field("f64", float64())}));
   auto source = GetFileSource(reader.get());
 
-  bool supported = false;
-
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
-
-  buf = std::make_shared<Buffer>(util::string_view("corrupted"));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
+  SetSchema(reader->schema()->fields());
+  auto fragment = MakeFragment(*source);
 
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(*source));
-  EXPECT_EQ(supported, true);
+  // Set scan options that ensure reading fails
+  auto fragment_scan_options = std::make_shared<IpcFragmentScanOptions>();
+  fragment_scan_options->options = std::make_shared<ipc::IpcReadOptions>();
+  fragment_scan_options->options->max_recursion_depth = 0;
+  opts_->fragment_scan_options = fragment_scan_options;
+  ASSERT_OK_AND_ASSIGN(auto scan_tasks, fragment->Scan(opts_));
+  ASSERT_OK_AND_ASSIGN(auto scan_task, scan_tasks.Next());
+  ASSERT_OK_AND_ASSIGN(auto batches, scan_task->Execute());
+  ASSERT_RAISES(Invalid, batches.Next());
 }
+INSTANTIATE_TEST_SUITE_P(TestScan, TestIpcFileFormatScan,
+                         ::testing::ValuesIn(TestFormatParams::Values()),
+                         TestFormatParams::ToTestNameString);
 
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 8caae949784..b20ca504db4 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compute/exec.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/scanner.h"
 #include "arrow/filesystem/path_util.h"
@@ -51,6 +52,8 @@ using parquet::arrow::SchemaField;
 using parquet::arrow::SchemaManifest;
 using parquet::arrow::StatisticsAsScalars;
 
+namespace {
+
 /// \brief A ScanTask backed by a parquet file and a RowGroup within a parquet file.
 class ParquetScanTask : public ScanTask {
  public:
@@ -67,7 +70,7 @@ class ParquetScanTask : public ScanTask {
         reader_(std::move(reader)),
         pre_buffer_once_(std::move(pre_buffer_once)),
         pre_buffer_row_groups_(std::move(pre_buffer_row_groups)),
-        io_context_(io_context),
+        io_context_(std::move(io_context)),
         cache_options_(cache_options) {}
 
   Result<RecordBatchIterator> Execute() override {
@@ -127,7 +130,7 @@ class ParquetScanTask : public ScanTask {
   arrow::io::CacheOptions cache_options_;
 };
 
-static parquet::ReaderProperties MakeReaderProperties(
+parquet::ReaderProperties MakeReaderProperties(
     const ParquetFileFormat& format, ParquetFragmentScanOptions* parquet_scan_options,
     MemoryPool* pool = default_memory_pool()) {
   // Can't mutate pool after construction
@@ -143,18 +146,20 @@ static parquet::ReaderProperties MakeReaderProperties(
   return properties;
 }
 
-static parquet::ArrowReaderProperties MakeArrowReaderProperties(
+parquet::ArrowReaderProperties MakeArrowReaderProperties(
     const ParquetFileFormat& format, const parquet::FileMetaData& metadata) {
   parquet::ArrowReaderProperties properties(/* use_threads = */ false);
   for (const std::string& name : format.reader_options.dict_columns) {
     auto column_index = metadata.schema()->ColumnIndex(name);
     properties.set_read_dictionary(column_index, true);
   }
+  properties.set_coerce_int96_timestamp_unit(
+      format.reader_options.coerce_int96_timestamp_unit);
   return properties;
 }
 
 template <typename M>
-static Result<std::shared_ptr<SchemaManifest>> GetSchemaManifest(
+Result<std::shared_ptr<SchemaManifest>> GetSchemaManifest(
     const M& metadata, const parquet::ArrowReaderProperties& properties) {
   auto manifest = std::make_shared<SchemaManifest>();
   const std::shared_ptr<const ::arrow::KeyValueMetadata>& key_value_metadata = nullptr;
@@ -163,7 +168,7 @@ static Result<std::shared_ptr<SchemaManifest>> GetSchemaManifest(
   return manifest;
 }
 
-static util::optional<Expression> ColumnChunkStatisticsAsExpression(
+util::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
     const SchemaField& schema_field, const parquet::RowGroupMetaData& metadata) {
   // For the remaining of this function, failure to extract/parse statistics
   // are ignored by returning nullptr. The goal is two fold. First
@@ -182,11 +187,11 @@ static util::optional<Expression> ColumnChunkStatisticsAsExpression(
   }
 
   const auto& field = schema_field.field;
-  auto field_expr = field_ref(field->name());
+  auto field_expr = compute::field_ref(field->name());
 
   // Optimize for corner case where all values are nulls
-  if (statistics->num_values() == statistics->null_count()) {
-    return equal(std::move(field_expr), literal(MakeNullScalar(field->type())));
+  if (statistics->num_values() == 0 && statistics->null_count() > 0) {
+    return is_null(std::move(field_expr));
   }
 
   std::shared_ptr<Scalar> min, max;
@@ -197,17 +202,24 @@ static util::optional<Expression> ColumnChunkStatisticsAsExpression(
   auto maybe_min = min->CastTo(field->type());
   auto maybe_max = max->CastTo(field->type());
   if (maybe_min.ok() && maybe_max.ok()) {
-    min = maybe_min.MoveValueUnsafe();
-    max = maybe_max.MoveValueUnsafe();
-    return and_(greater_equal(field_expr, literal(min)),
-                less_equal(field_expr, literal(max)));
+    auto col_min = maybe_min.MoveValueUnsafe();
+    auto col_max = maybe_max.MoveValueUnsafe();
+    if (col_min->Equals(col_max)) {
+      return compute::equal(std::move(field_expr), compute::literal(std::move(col_min)));
+    }
+
+    auto lower_bound =
+        compute::greater_equal(field_expr, compute::literal(std::move(col_min)));
+    auto upper_bound =
+        compute::less_equal(std::move(field_expr), compute::literal(std::move(col_max)));
+    return compute::and_(std::move(lower_bound), std::move(upper_bound));
   }
 
   return util::nullopt;
 }
 
-static void AddColumnIndices(const SchemaField& schema_field,
-                             std::vector<int>* column_projection) {
+void AddColumnIndices(const SchemaField& schema_field,
+                      std::vector<int>* column_projection) {
   if (schema_field.is_leaf()) {
     column_projection->push_back(schema_field.column_index);
   } else {
@@ -219,8 +231,8 @@ static void AddColumnIndices(const SchemaField& schema_field,
 }
 
 // Compute the column projection out of an optional arrow::Schema
-static std::vector<int> InferColumnProjection(const parquet::arrow::FileReader& reader,
-                                              const ScanOptions& options) {
+std::vector<int> InferColumnProjection(const parquet::arrow::FileReader& reader,
+                                       const ScanOptions& options) {
   auto manifest = reader.manifest();
   // Checks if the field is needed in either the projection or the filter.
   auto field_names = options.MaterializedFields();
@@ -245,6 +257,33 @@ static std::vector<int> InferColumnProjection(const parquet::arrow::FileReader&
   return columns_selection;
 }
 
+Status WrapSourceError(const Status& status, const std::string& path) {
+  return status.WithMessage("Could not open Parquet input source '", path,
+                            "': ", status.message());
+}
+
+Result<bool> IsSupportedParquetFile(const ParquetFileFormat& format,
+                                    const FileSource& source) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  try {
+    ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
+    ARROW_ASSIGN_OR_RAISE(
+        auto parquet_scan_options,
+        GetFragmentScanOptions<ParquetFragmentScanOptions>(
+            kParquetTypeName, nullptr, format.default_fragment_scan_options));
+    auto reader = parquet::ParquetFileReader::Open(
+        std::move(input), MakeReaderProperties(format, parquet_scan_options.get()));
+    std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
+    return metadata != nullptr && metadata->can_decompress();
+  } catch (const ::parquet::ParquetInvalidOrCorruptedFileException& e) {
+    ARROW_UNUSED(e);
+    return false;
+  }
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
+}  // namespace
+
 bool ParquetFileFormat::Equals(const FileFormat& other) const {
   if (other.type_name() != type_name()) return false;
 
@@ -252,7 +291,9 @@ bool ParquetFileFormat::Equals(const FileFormat& other) const {
       checked_cast<const ParquetFileFormat&>(other).reader_options;
 
   // FIXME implement comparison for decryption options
-  return reader_options.dict_columns == other_reader_options.dict_columns;
+  return (reader_options.dict_columns == other_reader_options.dict_columns &&
+          reader_options.coerce_int96_timestamp_unit ==
+              other_reader_options.coerce_int96_timestamp_unit);
 }
 
 ParquetFileFormat::ParquetFileFormat(const parquet::ReaderProperties& reader_properties) {
@@ -262,24 +303,11 @@ ParquetFileFormat::ParquetFileFormat(const parquet::ReaderProperties& reader_pro
 }
 
 Result<bool> ParquetFileFormat::IsSupported(const FileSource& source) const {
-  try {
-    ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
-    ARROW_ASSIGN_OR_RAISE(auto parquet_scan_options,
-                          GetFragmentScanOptions<ParquetFragmentScanOptions>(
-                              kParquetTypeName, nullptr, default_fragment_scan_options));
-    auto reader = parquet::ParquetFileReader::Open(
-        std::move(input), MakeReaderProperties(*this, parquet_scan_options.get()));
-    std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
-    return metadata != nullptr && metadata->can_decompress();
-  } catch (const ::parquet::ParquetInvalidOrCorruptedFileException& e) {
-    ARROW_UNUSED(e);
-    return false;
-  } catch (const ::parquet::ParquetException& e) {
-    return Status::IOError("Could not open parquet input source '", source.path(),
-                           "': ", e.what());
+  auto maybe_is_supported = IsSupportedParquetFile(*this, source);
+  if (!maybe_is_supported.ok()) {
+    return WrapSourceError(maybe_is_supported.status(), source.path());
   }
-
-  return true;
+  return maybe_is_supported;
 }
 
 Result<std::shared_ptr<Schema>> ParquetFileFormat::Inspect(
@@ -299,14 +327,18 @@ Result<std::unique_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
   auto properties = MakeReaderProperties(*this, parquet_scan_options.get(), pool);
 
   ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
-  std::unique_ptr<parquet::ParquetFileReader> reader;
-  try {
-    reader = parquet::ParquetFileReader::Open(std::move(input), std::move(properties));
-  } catch (const ::parquet::ParquetException& e) {
-    return Status::IOError("Could not open parquet input source '", source.path(),
-                           "': ", e.what());
-  }
 
+  auto make_reader = [&]() -> Result<std::unique_ptr<parquet::ParquetFileReader>> {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    return parquet::ParquetFileReader::Open(std::move(input), std::move(properties));
+    END_PARQUET_CATCH_EXCEPTIONS
+  };
+
+  auto maybe_reader = std::move(make_reader)();
+  if (!maybe_reader.ok()) {
+    return WrapSourceError(maybe_reader.status(), source.path());
+  }
+  std::unique_ptr<parquet::ParquetFileReader> reader = *std::move(maybe_reader);
   std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
   auto arrow_properties = MakeArrowReaderProperties(*this, *metadata);
 
@@ -325,8 +357,50 @@ Result<std::unique_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
   return std::move(arrow_reader);
 }
 
+Future<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReaderAsync(
+    const FileSource& source, const std::shared_ptr<ScanOptions>& options) const {
+  ARROW_ASSIGN_OR_RAISE(
+      auto parquet_scan_options,
+      GetFragmentScanOptions<ParquetFragmentScanOptions>(kParquetTypeName, options.get(),
+                                                         default_fragment_scan_options));
+  auto properties =
+      MakeReaderProperties(*this, parquet_scan_options.get(), options->pool);
+  ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
+  // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+  auto reader_fut =
+      parquet::ParquetFileReader::OpenAsync(std::move(input), std::move(properties));
+  auto path = source.path();
+  auto self = checked_pointer_cast<const ParquetFileFormat>(shared_from_this());
+  return reader_fut.Then(
+      [=](const std::unique_ptr<parquet::ParquetFileReader>&) mutable
+      -> Result<std::shared_ptr<parquet::arrow::FileReader>> {
+        ARROW_ASSIGN_OR_RAISE(std::unique_ptr<parquet::ParquetFileReader> reader,
+                              reader_fut.MoveResult());
+        std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
+        auto arrow_properties = MakeArrowReaderProperties(*self, *metadata);
+        arrow_properties.set_batch_size(options->batch_size);
+        // Must be set here since the sync ScanTask handles pre-buffering itself
+        arrow_properties.set_pre_buffer(
+            parquet_scan_options->arrow_reader_properties->pre_buffer());
+        arrow_properties.set_cache_options(
+            parquet_scan_options->arrow_reader_properties->cache_options());
+        arrow_properties.set_io_context(
+            parquet_scan_options->arrow_reader_properties->io_context());
+        arrow_properties.set_use_threads(options->use_threads);
+        std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
+        RETURN_NOT_OK(parquet::arrow::FileReader::Make(options->pool, std::move(reader),
+                                                       std::move(arrow_properties),
+                                                       &arrow_reader));
+        return std::move(arrow_reader);
+      },
+      [path](
+          const Status& status) -> Result<std::shared_ptr<parquet::arrow::FileReader>> {
+        return WrapSourceError(status, path);
+      });
+}
+
 Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
-    std::shared_ptr<ScanOptions> options,
+    const std::shared_ptr<ScanOptions>& options,
     const std::shared_ptr<FileFragment>& fragment) const {
   auto* parquet_fragment = checked_cast<ParquetFileFragment*>(fragment.get());
   std::vector<int> row_groups;
@@ -382,8 +456,66 @@ Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
   return MakeVectorIterator(std::move(tasks));
 }
 
+Result<RecordBatchGenerator> ParquetFileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options,
+    const std::shared_ptr<FileFragment>& file) const {
+  auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(file);
+  std::vector<int> row_groups;
+  bool pre_filtered = false;
+  // If RowGroup metadata is cached completely we can pre-filter RowGroups before opening
+  // a FileReader, potentially avoiding IO altogether if all RowGroups are excluded due to
+  // prior statistics knowledge. In the case where a RowGroup doesn't have statistics
+  // metdata, it will not be excluded.
+  if (parquet_fragment->metadata() != nullptr) {
+    ARROW_ASSIGN_OR_RAISE(row_groups, parquet_fragment->FilterRowGroups(options->filter));
+    pre_filtered = true;
+    if (row_groups.empty()) return MakeEmptyGenerator<std::shared_ptr<RecordBatch>>();
+  }
+  // Open the reader and pay the real IO cost.
+  auto make_generator =
+      [=](const std::shared_ptr<parquet::arrow::FileReader>& reader) mutable
+      -> Result<RecordBatchGenerator> {
+    // Ensure that parquet_fragment has FileMetaData
+    RETURN_NOT_OK(parquet_fragment->EnsureCompleteMetadata(reader.get()));
+    if (!pre_filtered) {
+      // row groups were not already filtered; do this now
+      ARROW_ASSIGN_OR_RAISE(row_groups,
+                            parquet_fragment->FilterRowGroups(options->filter));
+      if (row_groups.empty()) return MakeEmptyGenerator<std::shared_ptr<RecordBatch>>();
+    }
+    auto column_projection = InferColumnProjection(*reader, *options);
+    ARROW_ASSIGN_OR_RAISE(
+        auto parquet_scan_options,
+        GetFragmentScanOptions<ParquetFragmentScanOptions>(
+            kParquetTypeName, options.get(), default_fragment_scan_options));
+    ARROW_ASSIGN_OR_RAISE(auto generator, reader->GetRecordBatchGenerator(
+                                              reader, row_groups, column_projection,
+                                              internal::GetCpuThreadPool()));
+    return MakeReadaheadGenerator(std::move(generator), options->batch_readahead);
+  };
+  return MakeFromFuture(GetReaderAsync(parquet_fragment->source(), options)
+                            .Then(std::move(make_generator)));
+}
+
+Future<util::optional<int64_t>> ParquetFileFormat::CountRows(
+    const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+    const std::shared_ptr<ScanOptions>& options) {
+  auto parquet_file = internal::checked_pointer_cast<ParquetFileFragment>(file);
+  if (parquet_file->metadata()) {
+    ARROW_ASSIGN_OR_RAISE(auto maybe_count,
+                          parquet_file->TryCountRows(std::move(predicate)));
+    return Future<util::optional<int64_t>>::MakeFinished(maybe_count);
+  } else {
+    return DeferNotOk(options->io_context.executor()->Submit(
+        [parquet_file, predicate]() -> Result<util::optional<int64_t>> {
+          RETURN_NOT_OK(parquet_file->EnsureCompleteMetadata());
+          return parquet_file->TryCountRows(predicate);
+        }));
+  }
+}
+
 Result<std::shared_ptr<ParquetFileFragment>> ParquetFileFormat::MakeFragment(
-    FileSource source, Expression partition_expression,
+    FileSource source, compute::Expression partition_expression,
     std::shared_ptr<Schema> physical_schema, std::vector<int> row_groups) {
   return std::shared_ptr<ParquetFileFragment>(new ParquetFileFragment(
       std::move(source), shared_from_this(), std::move(partition_expression),
@@ -391,7 +523,7 @@ Result<std::shared_ptr<ParquetFileFragment>> ParquetFileFormat::MakeFragment(
 }
 
 Result<std::shared_ptr<FileFragment>> ParquetFileFormat::MakeFragment(
-    FileSource source, Expression partition_expression,
+    FileSource source, compute::Expression partition_expression,
     std::shared_ptr<Schema> physical_schema) {
   return std::shared_ptr<FileFragment>(new ParquetFileFragment(
       std::move(source), shared_from_this(), std::move(partition_expression),
@@ -412,7 +544,8 @@ std::shared_ptr<FileWriteOptions> ParquetFileFormat::DefaultWriteOptions() {
 
 Result<std::shared_ptr<FileWriter>> ParquetFileFormat::MakeWriter(
     std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-    std::shared_ptr<FileWriteOptions> options) const {
+    std::shared_ptr<FileWriteOptions> options,
+    fs::FileLocator destination_locator) const {
   if (!Equals(*options->format())) {
     return Status::TypeError("Mismatching format/write options");
   }
@@ -424,14 +557,17 @@ Result<std::shared_ptr<FileWriter>> ParquetFileFormat::MakeWriter(
       *schema, default_memory_pool(), destination, parquet_options->writer_properties,
       parquet_options->arrow_writer_properties, &parquet_writer));
 
-  return std::shared_ptr<FileWriter>(new ParquetFileWriter(
-      std::move(destination), std::move(parquet_writer), std::move(parquet_options)));
+  return std::shared_ptr<FileWriter>(
+      new ParquetFileWriter(std::move(destination), std::move(parquet_writer),
+                            std::move(parquet_options), std::move(destination_locator)));
 }
 
 ParquetFileWriter::ParquetFileWriter(std::shared_ptr<io::OutputStream> destination,
                                      std::shared_ptr<parquet::arrow::FileWriter> writer,
-                                     std::shared_ptr<ParquetFileWriteOptions> options)
-    : FileWriter(writer->schema(), std::move(options), std::move(destination)),
+                                     std::shared_ptr<ParquetFileWriteOptions> options,
+                                     fs::FileLocator destination_locator)
+    : FileWriter(writer->schema(), std::move(options), std::move(destination),
+                 std::move(destination_locator)),
       parquet_writer_(std::move(writer)) {}
 
 Status ParquetFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
@@ -447,7 +583,7 @@ Status ParquetFileWriter::FinishInternal() { return parquet_writer_->Close(); }
 
 ParquetFileFragment::ParquetFileFragment(FileSource source,
                                          std::shared_ptr<FileFormat> format,
-                                         Expression partition_expression,
+                                         compute::Expression partition_expression,
                                          std::shared_ptr<Schema> physical_schema,
                                          util::optional<std::vector<int>> row_groups)
     : FileFragment(std::move(source), std::move(format), std::move(partition_expression),
@@ -494,7 +630,7 @@ Status ParquetFileFragment::SetMetadata(
   metadata_ = std::move(metadata);
   manifest_ = std::move(manifest);
 
-  statistics_expressions_.resize(row_groups_->size(), literal(true));
+  statistics_expressions_.resize(row_groups_->size(), compute::literal(true));
   statistics_expressions_complete_.resize(physical_schema_->num_fields(), false);
 
   for (int row_group : *row_groups_) {
@@ -509,7 +645,8 @@ Status ParquetFileFragment::SetMetadata(
   return Status::OK();
 }
 
-Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(Expression predicate) {
+Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(
+    compute::Expression predicate) {
   RETURN_NOT_OK(EnsureCompleteMetadata());
   ARROW_ASSIGN_OR_RAISE(auto row_groups, FilterRowGroups(predicate));
 
@@ -527,7 +664,8 @@ Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(Expression predicate
   return fragments;
 }
 
-Result<std::shared_ptr<Fragment>> ParquetFileFragment::Subset(Expression predicate) {
+Result<std::shared_ptr<Fragment>> ParquetFileFragment::Subset(
+    compute::Expression predicate) {
   RETURN_NOT_OK(EnsureCompleteMetadata());
   ARROW_ASSIGN_OR_RAISE(auto row_groups, FilterRowGroups(predicate));
   return Subset(std::move(row_groups));
@@ -544,15 +682,31 @@ Result<std::shared_ptr<Fragment>> ParquetFileFragment::Subset(
   return new_fragment;
 }
 
-inline void FoldingAnd(Expression* l, Expression r) {
-  if (*l == literal(true)) {
+inline void FoldingAnd(compute::Expression* l, compute::Expression r) {
+  if (*l == compute::literal(true)) {
     *l = std::move(r);
   } else {
     *l = and_(std::move(*l), std::move(r));
   }
 }
 
-Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(Expression predicate) {
+Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(
+    compute::Expression predicate) {
+  std::vector<int> row_groups;
+  ARROW_ASSIGN_OR_RAISE(auto expressions, TestRowGroups(std::move(predicate)));
+
+  auto lock = physical_schema_mutex_.Lock();
+  DCHECK(expressions.empty() || (expressions.size() == row_groups_->size()));
+  for (size_t i = 0; i < expressions.size(); i++) {
+    if (expressions[i].IsSatisfiable()) {
+      row_groups.push_back(row_groups_->at(i));
+    }
+  }
+  return row_groups;
+}
+
+Result<std::vector<compute::Expression>> ParquetFileFragment::TestRowGroups(
+    compute::Expression predicate) {
   auto lock = physical_schema_mutex_.Lock();
 
   DCHECK_NE(metadata_, nullptr);
@@ -560,7 +714,7 @@ Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(Expression predica
       predicate, SimplifyWithGuarantee(std::move(predicate), partition_expression_));
 
   if (!predicate.IsSatisfiable()) {
-    return std::vector<int>{};
+    return std::vector<compute::Expression>{};
   }
 
   for (const FieldRef& ref : FieldsInExpression(predicate)) {
@@ -586,18 +740,44 @@ Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(Expression predica
     }
   }
 
-  std::vector<int> row_groups;
+  std::vector<compute::Expression> row_groups(row_groups_->size());
   for (size_t i = 0; i < row_groups_->size(); ++i) {
     ARROW_ASSIGN_OR_RAISE(auto row_group_predicate,
                           SimplifyWithGuarantee(predicate, statistics_expressions_[i]));
-    if (row_group_predicate.IsSatisfiable()) {
-      row_groups.push_back(row_groups_->at(i));
-    }
+    row_groups[i] = std::move(row_group_predicate);
   }
-
   return row_groups;
 }
 
+Result<util::optional<int64_t>> ParquetFileFragment::TryCountRows(
+    compute::Expression predicate) {
+  DCHECK_NE(metadata_, nullptr);
+  if (ExpressionHasFieldRefs(predicate)) {
+#if defined(__GNUC__) && (__GNUC__ < 5)
+    // ARROW-12694: with GCC 4.9 (RTools 35) we sometimes segfault here if we move(result)
+    auto result = TestRowGroups(std::move(predicate));
+    if (!result.ok()) {
+      return result.status();
+    }
+    auto expressions = result.ValueUnsafe();
+#else
+    ARROW_ASSIGN_OR_RAISE(auto expressions, TestRowGroups(std::move(predicate)));
+#endif
+    int64_t rows = 0;
+    for (size_t i = 0; i < row_groups_->size(); i++) {
+      // If the row group is entirely excluded, exclude it from the row count
+      if (!expressions[i].IsSatisfiable()) continue;
+      // Unless the row group is entirely included, bail out of fast path
+      if (expressions[i] != compute::literal(true)) return util::nullopt;
+      BEGIN_PARQUET_CATCH_EXCEPTIONS
+      rows += metadata()->RowGroup((*row_groups_)[i])->num_rows();
+      END_PARQUET_CATCH_EXCEPTIONS
+    }
+    return rows;
+  }
+  return metadata()->num_rows();
+}
+
 //
 // ParquetFragmentScanOptions
 //
@@ -691,7 +871,8 @@ Result<std::shared_ptr<DatasetFactory>> ParquetDatasetFactory::Make(
   ARROW_ASSIGN_OR_RAISE(auto physical_schema, GetSchema(*metadata, properties));
   ARROW_ASSIGN_OR_RAISE(auto manifest, GetSchemaManifest(*metadata, properties));
 
-  std::unordered_map<std::string, std::vector<int>> path_to_row_group_ids;
+  std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids;
+  std::unordered_map<std::string, int> paths_to_index;
 
   for (int i = 0; i < metadata->num_row_groups(); i++) {
     auto row_group = metadata->RowGroup(i);
@@ -701,22 +882,26 @@ Result<std::shared_ptr<DatasetFactory>> ParquetDatasetFactory::Make(
 
     // Insert the path, or increase the count of row groups. It will be assumed that the
     // RowGroup of a file are ordered exactly as in the metadata file.
-    auto row_groups = &path_to_row_group_ids.insert({std::move(path), {}}).first->second;
-    row_groups->emplace_back(i);
+    auto inserted_index = paths_to_index.emplace(
+        std::move(path), static_cast<int>(paths_with_row_group_ids.size()));
+    if (inserted_index.second) {
+      paths_with_row_group_ids.push_back({inserted_index.first->first, {}});
+    }
+    paths_with_row_group_ids[inserted_index.first->second].second.push_back(i);
   }
 
   return std::shared_ptr<DatasetFactory>(new ParquetDatasetFactory(
       std::move(filesystem), std::move(format), std::move(metadata), std::move(manifest),
       std::move(physical_schema), base_path, std::move(options),
-      std::move(path_to_row_group_ids)));
+      std::move(paths_with_row_group_ids)));
 }
 
 Result<std::vector<std::shared_ptr<FileFragment>>>
 ParquetDatasetFactory::CollectParquetFragments(const Partitioning& partitioning) {
-  std::vector<std::shared_ptr<FileFragment>> fragments(path_to_row_group_ids_.size());
+  std::vector<std::shared_ptr<FileFragment>> fragments(paths_with_row_group_ids_.size());
 
   size_t i = 0;
-  for (const auto& e : path_to_row_group_ids_) {
+  for (const auto& e : paths_with_row_group_ids_) {
     const auto& path = e.first;
     auto metadata_subset = metadata_->Subset(e.second);
 
@@ -724,7 +909,7 @@ ParquetDatasetFactory::CollectParquetFragments(const Partitioning& partitioning)
 
     auto partition_expression =
         partitioning.Parse(StripPrefixAndFilename(path, options_.partition_base_dir))
-            .ValueOr(literal(true));
+            .ValueOr(compute::literal(true));
 
     ARROW_ASSIGN_OR_RAISE(
         auto fragment,
@@ -745,10 +930,10 @@ Result<std::vector<std::shared_ptr<Schema>>> ParquetDatasetFactory::InspectSchem
 
   if (auto factory = options_.partitioning.factory()) {
     // Gather paths found in RowGroups' ColumnChunks.
-    std::vector<std::string> stripped(path_to_row_group_ids_.size());
+    std::vector<std::string> stripped(paths_with_row_group_ids_.size());
 
     size_t i = 0;
-    for (const auto& e : path_to_row_group_ids_) {
+    for (const auto& e : paths_with_row_group_ids_) {
       stripped[i++] = StripPrefixAndFilename(e.first, options_.partition_base_dir);
     }
     ARROW_ASSIGN_OR_RAISE(auto partition_schema, factory->Inspect(stripped));
@@ -775,8 +960,9 @@ Result<std::shared_ptr<Dataset>> ParquetDatasetFactory::Finish(FinishOptions opt
   }
 
   ARROW_ASSIGN_OR_RAISE(auto fragments, CollectParquetFragments(*partitioning));
-  return FileSystemDataset::Make(std::move(schema), literal(true), format_, filesystem_,
-                                 std::move(fragments));
+  return FileSystemDataset::Make(std::move(schema), compute::literal(true), format_,
+                                 filesystem_, std::move(fragments),
+                                 std::move(partitioning));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index fa0d7dea843..daf4bd92d59 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -57,6 +57,10 @@ struct SchemaManifest;
 namespace arrow {
 namespace dataset {
 
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
 constexpr char kParquetTypeName[] = "parquet";
 
 /// \brief A FileFormat implementation that reads from Parquet files
@@ -70,8 +74,6 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   std::string type_name() const override { return kParquetTypeName; }
 
-  bool splittable() const override { return true; }
-
   bool Equals(const FileFormat& other) const override;
 
   struct ReaderOptions {
@@ -84,6 +86,7 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
     ///
     /// @{
     std::unordered_set<std::string> dict_columns;
+    arrow::TimeUnit::type coerce_int96_timestamp_unit = arrow::TimeUnit::NANO;
     /// @}
   } reader_options;
 
@@ -94,28 +97,40 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& file) const override;
 
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
   using FileFormat::MakeFragment;
 
   /// \brief Create a Fragment targeting all RowGroups.
   Result<std::shared_ptr<FileFragment>> MakeFragment(
-      FileSource source, Expression partition_expression,
+      FileSource source, compute::Expression partition_expression,
       std::shared_ptr<Schema> physical_schema) override;
 
   /// \brief Create a Fragment, restricted to the specified row groups.
   Result<std::shared_ptr<ParquetFileFragment>> MakeFragment(
-      FileSource source, Expression partition_expression,
+      FileSource source, compute::Expression partition_expression,
       std::shared_ptr<Schema> physical_schema, std::vector<int> row_groups);
 
   /// \brief Return a FileReader on the given source.
   Result<std::unique_ptr<parquet::arrow::FileReader>> GetReader(
       const FileSource& source, ScanOptions* = NULLPTR) const;
 
+  Future<std::shared_ptr<parquet::arrow::FileReader>> GetReaderAsync(
+      const FileSource& source, const std::shared_ptr<ScanOptions>& options) const;
+
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override;
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
 
   std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
 };
@@ -134,7 +149,7 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 /// significant performance boost when scanning high latency file systems.
 class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
  public:
-  Result<FragmentVector> SplitByRowGroup(Expression predicate);
+  Result<FragmentVector> SplitByRowGroup(compute::Expression predicate);
 
   /// \brief Return the RowGroups selected by this fragment.
   const std::vector<int>& row_groups() const {
@@ -150,12 +165,12 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   Status EnsureCompleteMetadata(parquet::arrow::FileReader* reader = NULLPTR);
 
   /// \brief Return fragment which selects a filtered subset of this fragment's RowGroups.
-  Result<std::shared_ptr<Fragment>> Subset(Expression predicate);
+  Result<std::shared_ptr<Fragment>> Subset(compute::Expression predicate);
   Result<std::shared_ptr<Fragment>> Subset(std::vector<int> row_group_ids);
 
  private:
   ParquetFileFragment(FileSource source, std::shared_ptr<FileFormat> format,
-                      Expression partition_expression,
+                      compute::Expression partition_expression,
                       std::shared_ptr<Schema> physical_schema,
                       util::optional<std::vector<int>> row_groups);
 
@@ -168,16 +183,22 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
     return physical_schema_;
   }
 
-  // Return a filtered subset of row group indices.
-  Result<std::vector<int>> FilterRowGroups(Expression predicate);
+  /// Return a filtered subset of row group indices.
+  Result<std::vector<int>> FilterRowGroups(compute::Expression predicate);
+  /// Simplify the predicate against the statistics of each row group.
+  Result<std::vector<compute::Expression>> TestRowGroups(compute::Expression predicate);
+  /// Try to count rows matching the predicate using metadata. Expects
+  /// metadata to be present, and expects the predicate to have been
+  /// simplified against the partition expression already.
+  Result<util::optional<int64_t>> TryCountRows(compute::Expression predicate);
 
   ParquetFileFormat& parquet_format_;
 
-  // Indices of row groups selected by this fragment,
-  // or util::nullopt if all row groups are selected.
+  /// Indices of row groups selected by this fragment,
+  /// or util::nullopt if all row groups are selected.
   util::optional<std::vector<int>> row_groups_;
 
-  std::vector<Expression> statistics_expressions_;
+  std::vector<compute::Expression> statistics_expressions_;
   std::vector<bool> statistics_expressions_complete_;
   std::shared_ptr<parquet::FileMetaData> metadata_;
   std::shared_ptr<parquet::arrow::SchemaManifest> manifest_;
@@ -203,14 +224,17 @@ class ARROW_DS_EXPORT ParquetFragmentScanOptions : public FragmentScanOptions {
   /// EXPERIMENTAL: Parallelize conversion across columns. This option is ignored if a
   /// scan is already parallelized across input files to avoid thread contention. This
   /// option will be removed after support is added for simultaneous parallelization
-  /// across files and columns.
+  /// across files and columns. Only affects the threaded reader; the async reader
+  /// will parallelize across columns if use_threads is enabled.
   bool enable_parallel_column_conversion = false;
 };
 
 class ARROW_DS_EXPORT ParquetFileWriteOptions : public FileWriteOptions {
  public:
+  /// \brief Parquet writer properties.
   std::shared_ptr<parquet::WriterProperties> writer_properties;
 
+  /// \brief Parquet Arrow writer properties.
   std::shared_ptr<parquet::ArrowWriterProperties> arrow_writer_properties;
 
  protected:
@@ -230,7 +254,8 @@ class ARROW_DS_EXPORT ParquetFileWriter : public FileWriter {
  private:
   ParquetFileWriter(std::shared_ptr<io::OutputStream> destination,
                     std::shared_ptr<parquet::arrow::FileWriter> writer,
-                    std::shared_ptr<ParquetFileWriteOptions> options);
+                    std::shared_ptr<ParquetFileWriteOptions> options,
+                    fs::FileLocator destination_locator);
 
   Status FinishInternal() override;
 
@@ -239,38 +264,39 @@ class ARROW_DS_EXPORT ParquetFileWriter : public FileWriter {
   friend class ParquetFileFormat;
 };
 
+/// \brief Options for making a FileSystemDataset from a Parquet _metadata file.
 struct ParquetFactoryOptions {
-  // Either an explicit Partitioning or a PartitioningFactory to discover one.
-  //
-  // If a factory is provided, it will be used to infer a schema for partition fields
-  // based on file and directory paths then construct a Partitioning. The default
-  // is a Partitioning which will yield no partition information.
-  //
-  // The (explicit or discovered) partitioning will be applied to discovered files
-  // and the resulting partition information embedded in the Dataset.
+  /// Either an explicit Partitioning or a PartitioningFactory to discover one.
+  ///
+  /// If a factory is provided, it will be used to infer a schema for partition fields
+  /// based on file and directory paths then construct a Partitioning. The default
+  /// is a Partitioning which will yield no partition information.
+  ///
+  /// The (explicit or discovered) partitioning will be applied to discovered files
+  /// and the resulting partition information embedded in the Dataset.
   PartitioningOrFactory partitioning{Partitioning::Default()};
 
-  // For the purposes of applying the partitioning, paths will be stripped
-  // of the partition_base_dir. Files not matching the partition_base_dir
-  // prefix will be skipped for partition discovery. The ignored files will still
-  // be part of the Dataset, but will not have partition information.
-  //
-  // Example:
-  // partition_base_dir = "/dataset";
-  //
-  // - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
-  //
-  // - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
-  //
-  // This is useful for partitioning which parses directory when ordering
-  // is important, e.g. DirectoryPartitioning.
+  /// For the purposes of applying the partitioning, paths will be stripped
+  /// of the partition_base_dir. Files not matching the partition_base_dir
+  /// prefix will be skipped for partition discovery. The ignored files will still
+  /// be part of the Dataset, but will not have partition information.
+  ///
+  /// Example:
+  /// partition_base_dir = "/dataset";
+  ///
+  /// - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
+  ///
+  /// - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
+  ///
+  /// This is useful for partitioning which parses directory when ordering
+  /// is important, e.g. DirectoryPartitioning.
   std::string partition_base_dir;
 
-  // Assert that all ColumnChunk paths are consistent. The parquet spec allows for
-  // ColumnChunk data to be stored in multiple files, but ParquetDatasetFactory
-  // supports only a single file with all ColumnChunk data. If this flag is set
-  // construction of a ParquetDatasetFactory will raise an error if ColumnChunk
-  // data is not resident in a single file.
+  /// Assert that all ColumnChunk paths are consistent. The parquet spec allows for
+  /// ColumnChunk data to be stored in multiple files, but ParquetDatasetFactory
+  /// supports only a single file with all ColumnChunk data. If this flag is set
+  /// construction of a ParquetDatasetFactory will raise an error if ColumnChunk
+  /// data is not resident in a single file.
   bool validate_column_chunk_paths = false;
 };
 
@@ -327,7 +353,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
       std::shared_ptr<parquet::arrow::SchemaManifest> manifest,
       std::shared_ptr<Schema> physical_schema, std::string base_path,
       ParquetFactoryOptions options,
-      std::unordered_map<std::string, std::vector<int>> path_to_row_group_ids)
+      std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids)
       : filesystem_(std::move(filesystem)),
         format_(std::move(format)),
         metadata_(std::move(metadata)),
@@ -335,7 +361,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
         physical_schema_(std::move(physical_schema)),
         base_path_(std::move(base_path)),
         options_(std::move(options)),
-        path_to_row_group_ids_(std::move(path_to_row_group_ids)) {}
+        paths_with_row_group_ids_(std::move(paths_with_row_group_ids)) {}
 
   std::shared_ptr<fs::FileSystem> filesystem_;
   std::shared_ptr<ParquetFileFormat> format_;
@@ -344,7 +370,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
   std::shared_ptr<Schema> physical_schema_;
   std::string base_path_;
   ParquetFactoryOptions options_;
-  std::unordered_map<std::string, std::vector<int>> path_to_row_group_ids_;
+  std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids_;
 
  private:
   Result<std::vector<std::shared_ptr<FileFragment>>> CollectParquetFragments(
@@ -353,5 +379,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
   Result<std::shared_ptr<Schema>> PartitionSchema();
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index bb06e7f2b63..eab80010c76 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -25,6 +25,7 @@
 #include "arrow/dataset/scanner_internal.h"
 #include "arrow/dataset/test_util.h"
 #include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
@@ -39,10 +40,6 @@
 namespace arrow {
 namespace dataset {
 
-constexpr int64_t kBatchSize = 1UL << 12;
-constexpr int64_t kBatchRepetitions = 1 << 5;
-constexpr int64_t kNumRows = kBatchSize * kBatchRepetitions;
-
 using parquet::ArrowWriterProperties;
 using parquet::default_arrow_writer_properties;
 
@@ -56,9 +53,24 @@ using testing::Pointee;
 
 using internal::checked_pointer_cast;
 
-class ArrowParquetWriterMixin : public ::testing::Test {
+class ParquetFormatHelper {
  public:
-  Status WriteRecordBatch(const RecordBatch& batch, parquet::arrow::FileWriter* writer) {
+  using FormatType = ParquetFileFormat;
+
+  static Result<std::shared_ptr<Buffer>> Write(RecordBatchReader* reader) {
+    auto pool = ::arrow::default_memory_pool();
+    std::shared_ptr<Buffer> out;
+    auto sink = CreateOutputStream(pool);
+    RETURN_NOT_OK(WriteRecordBatchReader(reader, pool, sink));
+    return sink->Finish();
+  }
+  static std::shared_ptr<ParquetFileFormat> MakeFormat() {
+    return std::make_shared<ParquetFileFormat>();
+  }
+
+ private:
+  static Status WriteRecordBatch(const RecordBatch& batch,
+                                 parquet::arrow::FileWriter* writer) {
     auto schema = batch.schema();
     auto size = batch.num_rows();
 
@@ -76,8 +88,8 @@ class ArrowParquetWriterMixin : public ::testing::Test {
     return Status::OK();
   }
 
-  Status WriteRecordBatchReader(RecordBatchReader* reader,
-                                parquet::arrow::FileWriter* writer) {
+  static Status WriteRecordBatchReader(RecordBatchReader* reader,
+                                       parquet::arrow::FileWriter* writer) {
     auto schema = reader->schema();
 
     if (!schema->Equals(*writer->schema(), false)) {
@@ -92,7 +104,7 @@ class ArrowParquetWriterMixin : public ::testing::Test {
         });
   }
 
-  Status WriteRecordBatchReader(
+  static Status WriteRecordBatchReader(
       RecordBatchReader* reader, MemoryPool* pool,
       const std::shared_ptr<io::OutputStream>& sink,
       const std::shared_ptr<WriterProperties>& properties = default_writer_properties(),
@@ -104,52 +116,10 @@ class ArrowParquetWriterMixin : public ::testing::Test {
     RETURN_NOT_OK(WriteRecordBatchReader(reader, writer.get()));
     return writer->Close();
   }
-
-  std::shared_ptr<Buffer> Write(RecordBatchReader* reader) {
-    auto pool = ::arrow::default_memory_pool();
-
-    std::shared_ptr<Buffer> out;
-
-    auto sink = CreateOutputStream(pool);
-
-    ARROW_EXPECT_OK(WriteRecordBatchReader(reader, pool, sink));
-    // XXX the rest of the test may crash if this fails, since out will be nullptr
-    EXPECT_OK_AND_ASSIGN(out, sink->Finish());
-
-    return out;
-  }
-
-  std::shared_ptr<Buffer> Write(const Table& table) {
-    auto pool = ::arrow::default_memory_pool();
-
-    std::shared_ptr<Buffer> out;
-    auto sink = CreateOutputStream(pool);
-
-    ARROW_EXPECT_OK(WriteTable(table, pool, sink, 1U << 16));
-    // XXX the rest of the test may crash if this fails, since out will be nullptr
-    EXPECT_OK_AND_ASSIGN(out, sink->Finish());
-    return out;
-  }
 };
 
-class TestParquetFileFormat : public ArrowParquetWriterMixin {
+class TestParquetFileFormat : public FileFormatFixtureMixin<ParquetFormatHelper> {
  public:
-  std::unique_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
-    auto buffer = Write(reader);
-    return internal::make_unique<FileSource>(std::move(buffer));
-  }
-
-  std::unique_ptr<RecordBatchReader> GetRecordBatchReader(
-      std::shared_ptr<Schema> schema) {
-    return MakeGeneratedRecordBatch(schema, kBatchSize, kBatchRepetitions);
-  }
-
-  Result<std::shared_ptr<io::BufferOutputStream>> GetFileSink() {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> buffer,
-                          AllocateResizableBuffer(0));
-    return std::make_shared<io::BufferOutputStream>(buffer);
-  }
-
   RecordBatchIterator Batches(ScanTaskIterator scan_task_it) {
     return MakeFlattenIterator(MakeMaybeMapIterator(
         [](std::shared_ptr<ScanTask> scan_task) { return scan_task->Execute(); },
@@ -161,10 +131,6 @@ class TestParquetFileFormat : public ArrowParquetWriterMixin {
     return Batches(std::move(scan_task_it));
   }
 
-  void SetFilter(Expression filter) {
-    ASSERT_OK_AND_ASSIGN(opts_->filter, filter.Bind(*opts_->dataset_schema));
-  }
-
   std::shared_ptr<RecordBatch> SingleBatch(Fragment* fragment) {
     auto batches = IteratorToVector(Batches(fragment));
     EXPECT_EQ(batches.size(), 1);
@@ -192,7 +158,8 @@ class TestParquetFileFormat : public ArrowParquetWriterMixin {
   }
 
   void CountRowGroupsInFragment(const std::shared_ptr<Fragment>& fragment,
-                                std::vector<int> expected_row_groups, Expression filter) {
+                                std::vector<int> expected_row_groups,
+                                compute::Expression filter) {
     SetFilter(filter);
 
     auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(fragment);
@@ -207,219 +174,257 @@ class TestParquetFileFormat : public ArrowParquetWriterMixin {
       EXPECT_EQ(SingleBatch(parquet_fragment.get())->num_rows(), expected + 1);
     }
   }
-
-  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
-    opts_ = std::make_shared<ScanOptions>();
-    opts_->dataset_schema = schema(std::move(fields));
-    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
-  }
-
- protected:
-  std::shared_ptr<ParquetFileFormat> format_ = std::make_shared<ParquetFileFormat>();
-  std::shared_ptr<ScanOptions> opts_;
 };
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReader) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+TEST_F(TestParquetFileFormat, InspectFailureWithRelevantError) {
+  TestInspectFailureWithRelevantError(StatusCode::Invalid, "Parquet");
+}
+TEST_F(TestParquetFileFormat, Inspect) { TestInspect(); }
+
+TEST_F(TestParquetFileFormat, InspectDictEncoded) {
+  auto reader = GetRecordBatchReader(schema({field("utf8", utf8())}));
   auto source = GetFileSource(reader.get());
 
-  SetSchema(reader->schema()->fields());
-  SetFilter(literal(true));
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  format_->reader_options.dict_columns = {"utf8"};
+  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
 
-  int64_t row_count = 0;
+  Schema expected_schema({field("utf8", dictionary(int32(), utf8()))});
+  AssertSchemaEqual(*actual, expected_schema, /* check_metadata = */ false);
+}
 
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-  }
+TEST_F(TestParquetFileFormat, IsSupported) { TestIsSupported(); }
 
-  ASSERT_EQ(row_count, kNumRows);
-}
+TEST_F(TestParquetFileFormat, WriteRecordBatchReader) { TestWrite(); }
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderDictEncoded) {
-  auto reader = GetRecordBatchReader(schema({field("utf8", utf8())}));
-  auto source = GetFileSource(reader.get());
+TEST_F(TestParquetFileFormat, WriteRecordBatchReaderCustomOptions) {
+  TimeUnit::type coerce_timestamps_to = TimeUnit::MICRO,
+                 coerce_timestamps_from = TimeUnit::NANO;
 
-  SetSchema(reader->schema()->fields());
-  SetFilter(literal(true));
+  auto reader =
+      GetRecordBatchReader(schema({field("ts", timestamp(coerce_timestamps_from))}));
+  auto options =
+      checked_pointer_cast<ParquetFileWriteOptions>(format_->DefaultWriteOptions());
+  options->writer_properties = parquet::WriterProperties::Builder()
+                                   .created_by("TestParquetFileFormat")
+                                   ->disable_statistics()
+                                   ->build();
+  options->arrow_writer_properties = parquet::ArrowWriterProperties::Builder()
+                                         .coerce_timestamps(coerce_timestamps_to)
+                                         ->allow_truncated_timestamps()
+                                         ->build();
 
-  format_->reader_options.dict_columns = {"utf8"};
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto written = WriteToBuffer(reader->schema(), options);
 
-  ASSERT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
-  int64_t row_count = 0;
+  EXPECT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(FileSource{written}));
+  EXPECT_OK_AND_ASSIGN(auto actual_schema, fragment->ReadPhysicalSchema());
+  AssertSchemaEqual(Schema({field("ts", timestamp(coerce_timestamps_to))}),
+                    *actual_schema);
+}
 
-  Schema expected_schema({field("utf8", dictionary(int32(), utf8()))});
+TEST_F(TestParquetFileFormat, CountRows) { TestCountRows(); }
 
-  for (auto maybe_task : scan_task_it) {
-    ASSERT_OK_AND_ASSIGN(auto task, maybe_task);
-    ASSERT_OK_AND_ASSIGN(auto rb_it, task->Execute());
-    for (auto maybe_batch : rb_it) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-      AssertSchemaEqual(*batch->schema(), expected_schema, /* check_metadata = */ false);
-    }
+TEST_F(TestParquetFileFormat, CountRowsPredicatePushdown) {
+  constexpr int64_t kNumRowGroups = 16;
+  constexpr int64_t kTotalNumRows = kNumRowGroups * (kNumRowGroups + 1) / 2;
+
+  // See PredicatePushdown test below for a description of the generated data
+  auto reader = ArithmeticDatasetFixture::GetRecordBatchReader(kNumRowGroups);
+  auto source = GetFileSource(reader.get());
+  auto options = std::make_shared<ScanOptions>();
+
+  auto fragment = MakeFragment(*source);
+
+  ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(kTotalNumRows),
+                            fragment->CountRows(literal(true), options));
+
+  for (int i = 1; i <= kNumRowGroups; i++) {
+    SCOPED_TRACE(i);
+    // The row group for which all values in column i64 == i has i rows
+    auto predicate = less_equal(field_ref("i64"), literal(i));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*reader->schema()));
+    auto expected = i * (i + 1) / 2;
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected),
+                              fragment->CountRows(predicate, options));
+
+    predicate = and_(less_equal(field_ref("i64"), literal(i)),
+                     greater_equal(field_ref("i64"), literal(i)));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*reader->schema()));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(i),
+                              fragment->CountRows(predicate, options));
+
+    predicate = equal(field_ref("i64"), literal(i));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*reader->schema()));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(i),
+                              fragment->CountRows(predicate, options));
   }
 
-  ASSERT_EQ(row_count, kNumRows);
+  // Ensure nulls are properly handled
+  {
+    auto dataset_schema = schema({field("i64", int64())});
+    auto null_batch = RecordBatchFromJSON(dataset_schema, R"([
+[null],
+[null],
+[null]
+])");
+    auto batch = RecordBatchFromJSON(dataset_schema, R"([
+[1],
+[2]
+])");
+    ASSERT_OK_AND_ASSIGN(auto reader,
+                         RecordBatchReader::Make({null_batch, batch}, dataset_schema));
+    auto source = GetFileSource(reader.get());
+    auto fragment = MakeFragment(*source);
+    ASSERT_OK_AND_ASSIGN(
+        auto predicate,
+        greater_equal(field_ref("i64"), literal(1)).Bind(*dataset_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(2),
+                              fragment->CountRows(predicate, options));
+    // TODO(ARROW-12659): SimplifyWithGuarantee can't handle
+    // not(is_null) so trying to count with is_null doesn't work
+  }
 }
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderPreBuffer) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+TEST_F(TestParquetFileFormat, MultithreadedScan) {
+  constexpr int64_t kNumRowGroups = 16;
+
+  // See PredicatePushdown test below for a description of the generated data
+  auto reader = ArithmeticDatasetFixture::GetRecordBatchReader(kNumRowGroups);
   auto source = GetFileSource(reader.get());
+  auto options = std::make_shared<ScanOptions>();
 
-  SetSchema(reader->schema()->fields());
-  SetFilter(literal(true));
+  auto fragment = MakeFragment(*source);
 
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-  auto fragment_scan_options = std::make_shared<ParquetFragmentScanOptions>();
-  fragment_scan_options->arrow_reader_properties->set_pre_buffer(true);
-  opts_->fragment_scan_options = fragment_scan_options;
-  ASSERT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
+  FragmentDataset dataset(ArithmeticDatasetFixture::schema(), {fragment});
+  ScannerBuilder builder({&dataset, [](...) {}});
 
-  int64_t task_count = 0;
-  int64_t row_count = 0;
+  ASSERT_OK(builder.UseAsync(true));
+  ASSERT_OK(builder.UseThreads(true));
+  ASSERT_OK(builder.Project({call("add", {field_ref("i64"), literal(3)})}, {""}));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
 
-  for (auto maybe_task : scan_task_it) {
-    ASSERT_OK_AND_ASSIGN(auto task, maybe_task);
-    task_count += 1;
-    ASSERT_OK_AND_ASSIGN(auto rb_it, task->Execute());
-    for (auto maybe_batch : rb_it) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-    }
+  ASSERT_OK_AND_ASSIGN(auto gen, scanner->ScanBatchesUnorderedAsync());
+
+  auto collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_OK_AND_ASSIGN(auto batches, collect_fut.result());
+
+  ASSERT_EQ(batches.size(), kNumRowGroups);
+}
+
+class TestParquetFileSystemDataset : public WriteFileSystemDatasetMixin,
+                                     public testing::Test {
+ public:
+  void SetUp() override {
+    MakeSourceDataset();
+    check_metadata_ = false;
+    auto parquet_format = std::make_shared<ParquetFileFormat>();
+    format_ = parquet_format;
+    SetWriteOptions(parquet_format->DefaultWriteOptions());
   }
+};
 
-  ASSERT_EQ(task_count, kBatchRepetitions);
-  ASSERT_EQ(row_count, kNumRows);
+TEST_F(TestParquetFileSystemDataset, WriteWithIdenticalPartitioningSchema) {
+  TestWriteWithIdenticalPartitioningSchema();
 }
 
-TEST_F(TestParquetFileFormat, OpenFailureWithRelevantError) {
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  auto result = format_->Inspect(FileSource(buf));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, testing::HasSubstr("<Buffer>"),
-                                  result.status());
-
-  constexpr auto file_name = "herp/derp";
-  ASSERT_OK_AND_ASSIGN(
-      auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
-  result = format_->Inspect({file_name, fs});
-  EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, testing::HasSubstr(file_name),
-                                  result.status());
+TEST_F(TestParquetFileSystemDataset, WriteWithUnrelatedPartitioningSchema) {
+  TestWriteWithUnrelatedPartitioningSchema();
 }
 
-static auto f32 = field("f32", float32());
-static auto f64 = field("f64", float64());
-static auto i32 = field("i32", int32());
-static auto i64 = field("i64", int64());
+TEST_F(TestParquetFileSystemDataset, WriteWithSupersetPartitioningSchema) {
+  TestWriteWithSupersetPartitioningSchema();
+}
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderProjected) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  SetFilter(equal(field_ref("i32"), literal(0)));
+TEST_F(TestParquetFileSystemDataset, WriteWithEmptyPartitioningSchema) {
+  TestWriteWithEmptyPartitioningSchema();
+}
 
-  // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-  // we will not drop "i32" even though it is not projected since we need it for
-  // filtering
-  auto expected_schema = schema({f64, i32});
+class TestParquetFileFormatScan : public FileFormatScanMixin<ParquetFormatHelper> {
+ public:
+  std::shared_ptr<RecordBatch> SingleBatch(std::shared_ptr<Fragment> fragment) {
+    auto batches = IteratorToVector(PhysicalBatches(fragment));
+    EXPECT_EQ(batches.size(), 1);
+    return batches.front();
+  }
 
-  auto reader = GetRecordBatchReader(opts_->dataset_schema);
-  auto source = GetFileSource(reader.get());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  void CountRowsAndBatchesInScan(std::shared_ptr<Fragment> fragment,
+                                 int64_t expected_rows, int64_t expected_batches) {
+    int64_t actual_rows = 0;
+    int64_t actual_batches = 0;
 
-  int64_t row_count = 0;
+    for (auto maybe_batch : PhysicalBatches(fragment)) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      actual_rows += batch->num_rows();
+      ++actual_batches;
+    }
 
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-    AssertSchemaEqual(*batch->schema(), *expected_schema,
-                      /*check_metadata=*/false);
+    EXPECT_EQ(actual_rows, expected_rows);
+    EXPECT_EQ(actual_batches, expected_batches);
   }
 
-  ASSERT_EQ(row_count, kNumRows);
-}
+  void CountRowGroupsInFragment(const std::shared_ptr<Fragment>& fragment,
+                                std::vector<int> expected_row_groups,
+                                compute::Expression filter) {
+    SetFilter(filter);
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderProjectedMissingCols) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  SetFilter(equal(field_ref("i32"), literal(0)));
-
-  auto reader_without_i32 = GetRecordBatchReader(schema({f64, i64, f32}));
-  auto reader_without_f64 = GetRecordBatchReader(schema({i64, f32, i32}));
-  auto reader = GetRecordBatchReader(schema({f64, i64, f32, i32}));
-
-  auto readers = {reader.get(), reader_without_i32.get(), reader_without_f64.get()};
-  for (auto reader : readers) {
-    auto source = GetFileSource(reader);
-    ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-    // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-    // we will not drop "i32" even though it is not projected since we need it for
-    // filtering
-    //
-    // in the case where a file doesn't contain a referenced field, we won't
-    // materialize it as nulls later
-    std::shared_ptr<Schema> expected_schema;
-    if (reader == reader_without_i32.get()) {
-      expected_schema = schema({f64});
-    } else if (reader == reader_without_f64.get()) {
-      expected_schema = schema({i32});
-    } else {
-      expected_schema = schema({f64, i32});
-    }
+    auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(fragment);
+    ASSERT_OK_AND_ASSIGN(auto fragments, parquet_fragment->SplitByRowGroup(opts_->filter))
 
-    int64_t row_count = 0;
+    EXPECT_EQ(fragments.size(), expected_row_groups.size());
+    for (size_t i = 0; i < fragments.size(); i++) {
+      auto expected = expected_row_groups[i];
+      auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(fragments[i]);
 
-    for (auto maybe_batch : Batches(fragment.get())) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-      AssertSchemaEqual(*batch->schema(), *expected_schema,
-                        /*check_metadata=*/false);
+      EXPECT_EQ(parquet_fragment->row_groups(), std::vector<int>{expected});
+      EXPECT_EQ(SingleBatch(parquet_fragment)->num_rows(), expected + 1);
     }
-
-    ASSERT_EQ(row_count, kNumRows);
   }
-}
-
-TEST_F(TestParquetFileFormat, Inspect) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
+};
 
-  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
-  AssertSchemaEqual(*actual, *reader->schema(), /*check_metadata=*/false);
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReader) { TestScan(); }
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderProjected) { TestScanProjected(); }
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderProjectedMissingCols) {
+  TestScanProjectedMissingCols();
 }
-
-TEST_F(TestParquetFileFormat, InspectDictEncoded) {
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderDictEncoded) {
   auto reader = GetRecordBatchReader(schema({field("utf8", utf8())}));
   auto source = GetFileSource(reader.get());
 
+  SetSchema(reader->schema()->fields());
+  SetFilter(literal(true));
   format_->reader_options.dict_columns = {"utf8"};
-  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
+  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
 
+  int64_t row_count = 0;
   Schema expected_schema({field("utf8", dictionary(int32(), utf8()))});
-  AssertSchemaEqual(*actual, expected_schema, /* check_metadata = */ false);
-}
 
-TEST_F(TestParquetFileFormat, IsSupported) {
+  for (auto maybe_batch : PhysicalBatches(fragment)) {
+    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+    row_count += batch->num_rows();
+    AssertSchemaEqual(*batch->schema(), expected_schema, /* check_metadata = */ false);
+  }
+  ASSERT_EQ(row_count, expected_rows());
+}
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderPreBuffer) {
   auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
   auto source = GetFileSource(reader.get());
 
-  bool supported = false;
-
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
+  SetSchema(reader->schema()->fields());
+  SetFilter(literal(true));
 
-  buf = std::make_shared<Buffer>(util::string_view("corrupted"));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
+  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment_scan_options = std::make_shared<ParquetFragmentScanOptions>();
+  fragment_scan_options->arrow_reader_properties->set_pre_buffer(true);
+  opts_->fragment_scan_options = fragment_scan_options;
+  ASSERT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
 
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(*source));
-  EXPECT_EQ(supported, true);
+  int64_t row_count = 0;
+  for (auto maybe_batch : PhysicalBatches(fragment)) {
+    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+    row_count += batch->num_rows();
+  }
+  ASSERT_EQ(row_count, expected_rows());
 }
-
-TEST_F(TestParquetFileFormat, PredicatePushdown) {
+TEST_P(TestParquetFileFormatScan, PredicatePushdown) {
   // Given a number `n`, the arithmetic dataset creates n RecordBatches where
   // each RecordBatch is keyed by a unique integer in [1, n]. Let `rb_i` denote
   // the record batch keyed by `i`. `rb_i` is composed of `i` rows where all
@@ -432,6 +437,7 @@ TEST_F(TestParquetFileFormat, PredicatePushdown) {
   // applied via ScanOptions' evaluator. Thus, counting the number of returned
   // rows and returned row groups is a good enough proxy to check if pushdown
   // predicate is working.
+
   constexpr int64_t kNumRowGroups = 16;
   constexpr int64_t kTotalNumRows = kNumRowGroups * (kNumRowGroups + 1) / 2;
 
@@ -473,7 +479,7 @@ TEST_F(TestParquetFileFormat, PredicatePushdown) {
                             kNumRowGroups - 5);
 }
 
-TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragments) {
+TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragments) {
   constexpr int64_t kNumRowGroups = 16;
 
   auto reader = ArithmeticDatasetFixture::GetRecordBatchReader(kNumRowGroups);
@@ -485,9 +491,6 @@ TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragments) {
   auto all_row_groups = internal::Iota(static_cast<int>(kNumRowGroups));
   CountRowGroupsInFragment(fragment, all_row_groups, literal(true));
 
-  // FIXME this is only meaningful if "not here" is a virtual column
-  // CountRowGroupsInFragment(fragment, all_row_groups, "not here"_ == 0);
-
   for (int i = 0; i < kNumRowGroups; ++i) {
     CountRowGroupsInFragment(fragment, {i}, equal(field_ref("i64"), literal(i + 1)));
   }
@@ -510,9 +513,10 @@ TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragments) {
       fragment, {1, 3},
       or_(equal(field_ref("i64"), literal(2)), equal(field_ref("i64"), literal(4))));
 
-  // TODO(bkietz): better Assume support for InExpression
-  // auto set = ArrayFromJSON(int64(), "[2, 4]");
-  // CountRowGroupsInFragment(fragment, {1, 3}, field_ref("i64").In(set));
+  auto set = ArrayFromJSON(int64(), "[2, 4]");
+  CountRowGroupsInFragment(
+      fragment, {1, 3},
+      call("is_in", {field_ref("i64")}, compute::SetLookupOptions{set}));
 
   CountRowGroupsInFragment(fragment, {0, 1, 2, 3, 4}, less(field_ref("i64"), literal(6)));
 
@@ -524,24 +528,7 @@ TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragments) {
                                 less(field_ref("i64"), literal(8))));
 }
 
-TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragmentsUsingStringColumn) {
-  auto table = TableFromJSON(schema({field("x", utf8())}),
-                             {
-                                 R"([{"x": "a"}])",
-                                 R"([{"x": "b"}, {"x": "b"}])",
-                                 R"([{"x": "c"}, {"x": "c"}, {"x": "c"}])",
-                                 R"([{"x": "a"}, {"x": "b"}, {"x": "c"}, {"x": "d"}])",
-                             });
-  TableBatchReader reader(*table);
-  auto source = GetFileSource(&reader);
-
-  SetSchema(reader.schema()->fields());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  CountRowGroupsInFragment(fragment, {0, 3}, equal(field_ref("x"), literal("a")));
-}
-
-TEST_F(TestParquetFileFormat, ExplicitRowGroupSelection) {
+TEST_P(TestParquetFileFormatScan, ExplicitRowGroupSelection) {
   constexpr int64_t kNumRowGroups = 16;
   constexpr int64_t kTotalNumRows = kNumRowGroups * (kNumRowGroups + 1) / 2;
 
@@ -598,88 +585,26 @@ TEST_F(TestParquetFileFormat, ExplicitRowGroupSelection) {
       row_groups_fragment({kNumRowGroups + 1})->Scan(opts_));
 }
 
-TEST_F(TestParquetFileFormat, WriteRecordBatchReader) {
-  std::shared_ptr<RecordBatchReader> reader =
-      GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-  reader = GetRecordBatchReader(schema({field("f64", float64())}));
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
-  auto options = format_->DefaultWriteOptions();
-  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
-  ASSERT_OK(writer->Write(reader.get()));
-  ASSERT_OK(writer->Finish());
-
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
-
-  AssertBufferEqual(*written, *source->buffer());
-}
-
-TEST_F(TestParquetFileFormat, WriteRecordBatchReaderCustomOptions) {
-  TimeUnit::type coerce_timestamps_to = TimeUnit::MICRO,
-                 coerce_timestamps_from = TimeUnit::NANO;
-
-  std::shared_ptr<RecordBatchReader> reader =
-      GetRecordBatchReader(schema({field("ts", timestamp(coerce_timestamps_from))}));
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
-  auto options =
-      checked_pointer_cast<ParquetFileWriteOptions>(format_->DefaultWriteOptions());
-  options->writer_properties = parquet::WriterProperties::Builder()
-                                   .created_by("TestParquetFileFormat")
-                                   ->disable_statistics()
-                                   ->build();
-
-  options->arrow_writer_properties = parquet::ArrowWriterProperties::Builder()
-                                         .coerce_timestamps(coerce_timestamps_to)
-                                         ->allow_truncated_timestamps()
-                                         ->build();
-
-  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
-  ASSERT_OK(writer->Write(reader.get()));
-  ASSERT_OK(writer->Finish());
-
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
-  EXPECT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(FileSource{written}));
-
-  EXPECT_OK_AND_ASSIGN(auto actual_schema, fragment->ReadPhysicalSchema());
-  AssertSchemaEqual(Schema({field("ts", timestamp(coerce_timestamps_to))}),
-                    *actual_schema);
-}
-
-class TestParquetFileSystemDataset : public WriteFileSystemDatasetMixin,
-                                     public testing::Test {
- public:
-  void SetUp() override {
-    MakeSourceDataset();
-    check_metadata_ = false;
-    auto parquet_format = std::make_shared<ParquetFileFormat>();
-    format_ = parquet_format;
-    SetWriteOptions(parquet_format->DefaultWriteOptions());
-  }
-};
-
-TEST_F(TestParquetFileSystemDataset, WriteWithIdenticalPartitioningSchema) {
-  TestWriteWithIdenticalPartitioningSchema();
-}
+TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragmentsUsingStringColumn) {
+  auto table = TableFromJSON(schema({field("x", utf8())}),
+                             {
+                                 R"([{"x": "a"}])",
+                                 R"([{"x": "b"}, {"x": "b"}])",
+                                 R"([{"x": "c"}, {"x": "c"}, {"x": "c"}])",
+                                 R"([{"x": "a"}, {"x": "b"}, {"x": "c"}, {"x": "d"}])",
+                             });
+  TableBatchReader reader(*table);
+  auto source = GetFileSource(&reader);
 
-TEST_F(TestParquetFileSystemDataset, WriteWithUnrelatedPartitioningSchema) {
-  TestWriteWithUnrelatedPartitioningSchema();
-}
+  SetSchema(reader.schema()->fields());
+  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
 
-TEST_F(TestParquetFileSystemDataset, WriteWithSupersetPartitioningSchema) {
-  TestWriteWithSupersetPartitioningSchema();
+  CountRowGroupsInFragment(fragment, {0, 3}, equal(field_ref("x"), literal("a")));
 }
 
-TEST_F(TestParquetFileSystemDataset, WriteWithEmptyPartitioningSchema) {
-  TestWriteWithEmptyPartitioningSchema();
-}
+INSTANTIATE_TEST_SUITE_P(TestScan, TestParquetFileFormatScan,
+                         ::testing::ValuesIn(TestFormatParams::Values()),
+                         TestFormatParams::ToTestNameString);
 
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index fdbb4512758..9e6ba8c925a 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -24,12 +24,12 @@
 #include <gtest/gtest.h>
 
 #include "arrow/dataset/api.h"
-#include "arrow/dataset/forest_internal.h"
 #include "arrow/dataset/partition.h"
 #include "arrow/dataset/test_util.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/status.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/io_util.h"
 
@@ -82,6 +82,52 @@ TEST(FileSource, BufferBased) {
   ASSERT_EQ(source1.buffer(), source3.buffer());
 }
 
+constexpr int kNumScanTasks = 2;
+constexpr int kBatchesPerScanTask = 2;
+constexpr int kRowsPerBatch = 1024;
+class MockFileFormat : public FileFormat {
+  std::string type_name() const override { return "mock"; }
+  bool Equals(const FileFormat& other) const override { return false; }
+  Result<bool> IsSupported(const FileSource& source) const override { return true; }
+  Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override {
+    return Status::NotImplemented("Not needed for test");
+  }
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override {
+    return Status::NotImplemented("Not needed for test");
+  }
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return nullptr; }
+
+  Result<ScanTaskIterator> ScanFile(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override {
+    auto sch = schema({field("i32", int32())});
+    ScanTaskVector scan_tasks;
+    for (int i = 0; i < kNumScanTasks; i++) {
+      RecordBatchVector batches;
+      for (int j = 0; j < kBatchesPerScanTask; j++) {
+        batches.push_back(ConstantArrayGenerator::Zeroes(kRowsPerBatch, sch));
+      }
+      scan_tasks.push_back(std::make_shared<InMemoryScanTask>(
+          std::move(batches), std::make_shared<ScanOptions>(), nullptr));
+    }
+    return MakeVectorIterator(std::move(scan_tasks));
+  }
+};
+
+TEST(FileFormat, ScanAsync) {
+  MockFileFormat format;
+  auto scan_options = std::make_shared<ScanOptions>();
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, format.ScanBatchesAsync(scan_options, nullptr));
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto batches, CollectAsyncGenerator(batch_gen));
+  ASSERT_EQ(kNumScanTasks * kBatchesPerScanTask, static_cast<int>(batches.size()));
+  for (int i = 0; i < kNumScanTasks * kBatchesPerScanTask; i++) {
+    ASSERT_EQ(kRowsPerBatch, batches[i]->num_rows());
+  }
+}
+
 TEST_F(TestFileSystemDataset, Basic) {
   MakeDataset({});
   AssertFragmentsAreFromPath(*dataset_->GetFragments(), {});
@@ -122,9 +168,10 @@ TEST_F(TestFileSystemDataset, ReplaceSchema) {
 
 TEST_F(TestFileSystemDataset, RootPartitionPruning) {
   auto root_partition = equal(field_ref("i32"), literal(5));
-  MakeDataset({fs::File("a"), fs::File("b")}, root_partition);
+  MakeDataset({fs::File("a"), fs::File("b")}, root_partition, {},
+              schema({field("i32", int32()), field("f32", float32())}));
 
-  auto GetFragments = [&](Expression filter) {
+  auto GetFragments = [&](compute::Expression filter) {
     return *dataset_->GetFragments(*filter.Bind(*dataset_->schema()));
   };
 
@@ -144,8 +191,9 @@ TEST_F(TestFileSystemDataset, RootPartitionPruning) {
   AssertFragmentsAreFromPath(GetFragments(equal(field_ref("f32"), literal(3.F))),
                              {"a", "b"});
 
-  // No partition should match
-  MakeDataset({fs::File("a"), fs::File("b")});
+  // No root partition: don't prune any fragments
+  MakeDataset({fs::File("a"), fs::File("b")}, literal(true), {},
+              schema({field("i32", int32()), field("f32", float32())}));
   AssertFragmentsAreFromPath(GetFragments(equal(field_ref("f32"), literal(3.F))),
                              {"a", "b"});
 }
@@ -158,7 +206,7 @@ TEST_F(TestFileSystemDataset, TreePartitionPruning) {
       fs::Dir("CA"), fs::File("CA/San Francisco"), fs::File("CA/Franklin"),
   };
 
-  std::vector<Expression> partitions = {
+  std::vector<compute::Expression> partitions = {
       equal(field_ref("state"), literal("NY")),
 
       and_(equal(field_ref("state"), literal("NY")),
@@ -188,7 +236,7 @@ TEST_F(TestFileSystemDataset, TreePartitionPruning) {
   // Default filter should always return all data.
   AssertFragmentsAreFromPath(*dataset_->GetFragments(), all_cities);
 
-  auto GetFragments = [&](Expression filter) {
+  auto GetFragments = [&](compute::Expression filter) {
     return *dataset_->GetFragments(*filter.Bind(*dataset_->schema()));
   };
 
@@ -214,7 +262,7 @@ TEST_F(TestFileSystemDataset, FragmentPartitions) {
       fs::Dir("CA"), fs::File("CA/San Francisco"), fs::File("CA/Franklin"),
   };
 
-  std::vector<Expression> partitions = {
+  std::vector<compute::Expression> partitions = {
       equal(field_ref("state"), literal("NY")),
 
       and_(equal(field_ref("state"), literal("NY")),
@@ -249,322 +297,47 @@ TEST_F(TestFileSystemDataset, FragmentPartitions) {
                 });
 }
 
-class TestFilesystemDatasetNestedParallelism : public NestedParallelismMixin {};
-
-TEST_F(TestFilesystemDatasetNestedParallelism, Write) {
-  constexpr int NUM_BATCHES = 32;
-  RecordBatchVector batches;
-  for (int i = 0; i < NUM_BATCHES; i++) {
-    batches.push_back(ConstantArrayGenerator::Zeroes(/*size=*/1, schema_));
-  }
-  auto dataset = std::make_shared<NestedParallelismDataset>(schema_, std::move(batches));
-  ScannerBuilder builder{dataset, options_};
-  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-
-  ASSERT_OK_AND_ASSIGN(auto output_dir, TemporaryDir::Make("nested-parallel-dataset"));
-
-  auto format = std::make_shared<DiscardingRowCountingFormat>();
-  auto rows_written = std::make_shared<std::atomic<int>>(0);
-  std::shared_ptr<FileWriteOptions> file_write_options =
-      std::make_shared<DiscardingRowCountingFileWriteOptions>(rows_written);
-  FileSystemDatasetWriteOptions dataset_write_options;
-  dataset_write_options.file_write_options = file_write_options;
-  dataset_write_options.basename_template = "{i}";
-  dataset_write_options.partitioning = std::make_shared<HivePartitioning>(schema({}));
-  dataset_write_options.base_dir = output_dir->path().ToString();
-  dataset_write_options.filesystem = std::make_shared<fs::LocalFileSystem>();
-
-  ASSERT_OK(FileSystemDataset::Write(dataset_write_options, scanner));
-  ASSERT_EQ(NUM_BATCHES, rows_written->load());
-}
-
-// Tests of subtree pruning
-
-struct TestPathTree {
-  fs::FileInfo info;
-  std::vector<TestPathTree> subtrees;
-
-  explicit TestPathTree(std::string file_path) : info(fs::File(std::move(file_path))) {}
-
-  TestPathTree(std::string dir_path, std::vector<TestPathTree> subtrees)
-      : info(fs::Dir(std::move(dir_path))), subtrees(std::move(subtrees)) {}
-
-  TestPathTree(Forest::Ref ref, const std::vector<fs::FileInfo>& infos)
-      : info(infos[ref.i]) {
-    const Forest& forest = *ref.forest;
-
-    int begin = ref.i + 1;
-    int end = begin + ref.num_descendants();
-
-    for (int i = begin; i < end; ++i) {
-      subtrees.emplace_back(forest[i], infos);
-      i += forest[i].num_descendants();
-    }
-  }
-
-  bool operator==(const TestPathTree& other) const {
-    return info == other.info && subtrees == other.subtrees;
-  }
-
-  std::string ToString() const {
-    auto out = "\n" + info.path();
-    if (info.IsDirectory()) out += "/";
-
-    for (const auto& subtree : subtrees) {
-      out += subtree.ToString();
-    }
-    return out;
-  }
-
-  friend std::ostream& operator<<(std::ostream& os, const TestPathTree& tree) {
-    return os << tree.ToString();
-  }
-};
-
-using PT = TestPathTree;
-
-Forest MakeForest(std::vector<fs::FileInfo>* infos) {
-  std::sort(infos->begin(), infos->end(), fs::FileInfo::ByPath{});
-
-  return Forest(static_cast<int>(infos->size()), [&](int i, int j) {
-    return fs::internal::IsAncestorOf(infos->at(i).path(), infos->at(j).path());
-  });
-}
-
-void ExpectForestIs(std::vector<fs::FileInfo> infos, std::vector<PT> expected_roots) {
-  auto forest = MakeForest(&infos);
-
-  std::vector<PT> actual_roots;
-  ASSERT_OK(forest.Visit(
-      [&](Forest::Ref ref) -> Result<bool> {
-        actual_roots.emplace_back(ref, infos);
-        return false;  // only vist roots
-      },
-      [](Forest::Ref) {}));
-
-  // visit expected and assert equality
-  EXPECT_THAT(actual_roots, ContainerEq(expected_roots));
-}
-
-TEST(Forest, Basic) {
-  ExpectForestIs({}, {});
-
-  ExpectForestIs({fs::File("aa")}, {PT("aa")});
-  ExpectForestIs({fs::Dir("AA")}, {PT("AA", {})});
-  ExpectForestIs({fs::Dir("AA"), fs::File("AA/aa")}, {PT("AA", {PT("AA/aa")})});
-  ExpectForestIs({fs::Dir("AA"), fs::Dir("AA/BB"), fs::File("AA/BB/0")},
-                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})})});
-
-  // Missing parent can still find ancestor.
-  ExpectForestIs({fs::Dir("AA"), fs::File("AA/BB/bb")}, {PT("AA", {PT("AA/BB/bb")})});
-
-  // Ancestors should link to parent regardless of ordering.
-  ExpectForestIs({fs::File("AA/aa"), fs::Dir("AA")}, {PT("AA", {PT("AA/aa")})});
-
-  // Multiple roots are supported.
-  ExpectForestIs({fs::File("aa"), fs::File("bb")}, {PT("aa"), PT("bb")});
-  ExpectForestIs({fs::File("00"), fs::Dir("AA"), fs::File("AA/aa"), fs::File("BB/bb")},
-                 {PT("00"), PT("AA", {PT("AA/aa")}), PT("BB/bb")});
-  ExpectForestIs({fs::Dir("AA"), fs::Dir("AA/BB"), fs::File("AA/BB/0"), fs::Dir("CC"),
-                  fs::Dir("CC/BB"), fs::File("CC/BB/0")},
-                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})}),
-                  PT("CC", {PT("CC/BB", {PT("CC/BB/0")})})});
-}
-
-TEST(Forest, HourlyETL) {
-  // This test mimics a scenario where an ETL dumps hourly files in a structure
-  // `$year/$month/$day/$hour/*.parquet`.
-  constexpr int64_t kYears = 3;
-  constexpr int64_t kMonthsPerYear = 12;
-  constexpr int64_t kDaysPerMonth = 31;
-  constexpr int64_t kHoursPerDay = 24;
-  constexpr int64_t kFilesPerHour = 2;
-
-  // Avoid constructing strings
-  std::vector<std::string> numbers{kDaysPerMonth + 1};
-  for (size_t i = 0; i < numbers.size(); i++) {
-    numbers[i] = std::to_string(i);
-    if (numbers[i].size() == 1) {
-      numbers[i] = "0" + numbers[i];
-    }
-  }
-
-  auto join = [](const std::vector<std::string>& path) {
-    return fs::internal::JoinAbstractPath(path);
-  };
-
-  std::vector<fs::FileInfo> infos;
-
-  std::vector<PT> forest;
-  for (int64_t year = 0; year < kYears; year++) {
-    auto year_str = std::to_string(year + 2000);
-    auto year_dir = fs::Dir(year_str);
-    infos.push_back(year_dir);
-
-    std::vector<PT> months;
-    for (int64_t month = 0; month < kMonthsPerYear; month++) {
-      auto month_str = join({year_str, numbers[month + 1]});
-      auto month_dir = fs::Dir(month_str);
-      infos.push_back(month_dir);
-
-      std::vector<PT> days;
-      for (int64_t day = 0; day < kDaysPerMonth; day++) {
-        auto day_str = join({month_str, numbers[day + 1]});
-        auto day_dir = fs::Dir(day_str);
-        infos.push_back(day_dir);
-
-        std::vector<PT> hours;
-        for (int64_t hour = 0; hour < kHoursPerDay; hour++) {
-          auto hour_str = join({day_str, numbers[hour]});
-          auto hour_dir = fs::Dir(hour_str);
-          infos.push_back(hour_dir);
-
-          std::vector<PT> files;
-          for (int64_t file = 0; file < kFilesPerHour; file++) {
-            auto file_str = join({hour_str, numbers[file] + ".parquet"});
-            auto file_fd = fs::File(file_str);
-            infos.push_back(file_fd);
-            files.emplace_back(file_str);
-          }
-
-          auto hour_pt = PT(hour_str, std::move(files));
-          hours.push_back(hour_pt);
-        }
-
-        auto day_pt = PT(day_str, std::move(hours));
-        days.push_back(day_pt);
-      }
-
-      auto month_pt = PT(month_str, std::move(days));
-      months.push_back(month_pt);
+TEST_F(TestFileSystemDataset, WriteProjected) {
+  // Regression test for ARROW-12620
+  auto format = std::make_shared<IpcFileFormat>();
+  auto fs = std::make_shared<fs::internal::MockFileSystem>(fs::kNoTime);
+  FileSystemDatasetWriteOptions write_options;
+  write_options.file_write_options = format->DefaultWriteOptions();
+  write_options.filesystem = fs;
+  write_options.base_dir = "root";
+  write_options.partitioning = std::make_shared<HivePartitioning>(schema({}));
+  write_options.basename_template = "{i}.feather";
+
+  auto dataset_schema = schema({field("a", int64())});
+  RecordBatchVector batches{
+      ConstantArrayGenerator::Zeroes(kRowsPerBatch, dataset_schema)};
+  ASSERT_EQ(0, batches[0]->column(0)->null_count());
+  auto dataset = std::make_shared<InMemoryDataset>(dataset_schema, batches);
+  ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
+  ASSERT_OK(scanner_builder->Project(
+      {compute::call("add", {compute::field_ref("a"), compute::literal(1)})},
+      {"a_plus_one"}));
+  ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
+
+  ASSERT_OK(FileSystemDataset::Write(write_options, scanner));
+
+  ASSERT_OK_AND_ASSIGN(auto dataset_factory, FileSystemDatasetFactory::Make(
+                                                 fs, {"root/0.feather"}, format, {}));
+  ASSERT_OK_AND_ASSIGN(auto written_dataset, dataset_factory->Finish(FinishOptions{}));
+  auto expected_schema = schema({field("a_plus_one", int64())});
+  AssertSchemaEqual(*expected_schema, *written_dataset->schema());
+  ASSERT_OK_AND_ASSIGN(scanner_builder, written_dataset->NewScan());
+  ASSERT_OK_AND_ASSIGN(scanner, scanner_builder->Finish());
+  ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+  auto col = table->column(0);
+  ASSERT_EQ(0, col->null_count());
+  for (auto chunk : col->chunks()) {
+    auto arr = std::dynamic_pointer_cast<Int64Array>(chunk);
+    for (auto val : *arr) {
+      ASSERT_TRUE(val.has_value());
+      ASSERT_EQ(1, *val);
     }
-
-    auto year_pt = PT(year_str, std::move(months));
-    forest.push_back(year_pt);
   }
-
-  ExpectForestIs(infos, forest);
 }
-
-TEST(Forest, Visit) {
-  using Infos = std::vector<fs::FileInfo>;
-
-  for (auto infos : {Infos{}, Infos{fs::Dir("A"), fs::File("A/a")},
-                     Infos{fs::Dir("AA"), fs::Dir("AA/BB"), fs::File("AA/BB/0"),
-                           fs::Dir("CC"), fs::Dir("CC/BB"), fs::File("CC/BB/0")}}) {
-    ASSERT_TRUE(std::is_sorted(infos.begin(), infos.end(), fs::FileInfo::ByPath{}));
-
-    auto forest = MakeForest(&infos);
-
-    auto ignore_post = [](Forest::Ref) {};
-
-    // noop is fine
-    ASSERT_OK(
-        forest.Visit([](Forest::Ref) -> Result<bool> { return false; }, ignore_post));
-
-    // Should propagate failure
-    if (forest.size() != 0) {
-      ASSERT_RAISES(
-          Invalid,
-          forest.Visit([](Forest::Ref) -> Result<bool> { return Status::Invalid(""); },
-                       ignore_post));
-    }
-
-    // Ensure basic visit of all nodes
-    int i = 0;
-    ASSERT_OK(forest.Visit(
-        [&](Forest::Ref ref) -> Result<bool> {
-          EXPECT_EQ(ref.i, i);
-          ++i;
-          return true;
-        },
-        ignore_post));
-
-    // Visit only directories
-    Infos actual_dirs;
-    ASSERT_OK(forest.Visit(
-        [&](Forest::Ref ref) -> Result<bool> {
-          if (!infos[ref.i].IsDirectory()) {
-            return false;
-          }
-          actual_dirs.push_back(infos[ref.i]);
-          return true;
-        },
-        ignore_post));
-
-    Infos expected_dirs;
-    for (const auto& info : infos) {
-      if (info.IsDirectory()) {
-        expected_dirs.push_back(info);
-      }
-    }
-    EXPECT_THAT(actual_dirs, ContainerEq(expected_dirs));
-  }
-}
-
-TEST(Subtree, EncodeExpression) {
-  SubtreeImpl tree;
-  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
-  // Should be idempotent
-  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
-  ASSERT_EQ(equal(field_ref("a"), literal("1")), tree.code_to_expr_[0]);
-
-  SubtreeImpl::expression_codes codes;
-  auto conj =
-      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
-  tree.EncodeConjunctionMembers(conj, &codes);
-  ASSERT_EQ(SubtreeImpl::expression_codes({0, 1}), codes);
-
-  codes.clear();
-  conj = or_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
-  tree.EncodeConjunctionMembers(conj, &codes);
-  ASSERT_EQ(SubtreeImpl::expression_codes({2}), codes);
-}
-
-TEST(Subtree, GetSubtreeExpression) {
-  SubtreeImpl tree;
-  const auto expr_a = equal(field_ref("a"), literal("1"));
-  const auto expr_b = equal(field_ref("b"), literal("2"));
-  const auto code_a = tree.GetOrInsert(expr_a);
-  const auto code_b = tree.GetOrInsert(expr_b);
-  ASSERT_EQ(expr_a,
-            tree.GetSubtreeExpression(SubtreeImpl::Encoded{util::nullopt, {code_a}}));
-  ASSERT_EQ(expr_b, tree.GetSubtreeExpression(
-                        SubtreeImpl::Encoded{util::nullopt, {code_a, code_b}}));
-}
-
-TEST(Subtree, EncodeFragments) {
-  auto fragment_schema = schema({});
-  const auto expr_a =
-      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
-  const auto expr_b =
-      and_(equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3")));
-  std::vector<std::shared_ptr<InMemoryFragment>> fragments;
-  fragments.push_back(std::make_shared<InMemoryFragment>(
-      fragment_schema, arrow::RecordBatchVector(), expr_a));
-  fragments.push_back(std::make_shared<InMemoryFragment>(
-      fragment_schema, arrow::RecordBatchVector(), expr_b));
-
-  SubtreeImpl tree;
-  auto encoded = tree.EncodeFragments(fragments);
-  EXPECT_THAT(
-      tree.code_to_expr_,
-      ContainerEq(std::vector<Expression>{
-          equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")),
-          equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3"))}));
-  EXPECT_THAT(
-      encoded,
-      testing::UnorderedElementsAreArray({
-          SubtreeImpl::Encoded{util::make_optional<int>(0),
-                               SubtreeImpl::expression_codes({0, 1})},
-          SubtreeImpl::Encoded{util::make_optional<int>(1),
-                               SubtreeImpl::expression_codes({2, 3})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0, 1})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2, 3})},
-      }));
-}
-
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index 43ccd777cf2..1ec47e3cee1 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/exec/expression_internal.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/scalar.h"
@@ -37,6 +38,8 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
 #include "arrow/util/string_view.h"
+#include "arrow/util/uri.h"
+#include "arrow/util/utf8.h"
 
 namespace arrow {
 
@@ -46,6 +49,18 @@ using util::string_view;
 
 namespace dataset {
 
+namespace {
+/// Apply UriUnescape, then ensure the results are valid UTF-8.
+Result<std::string> SafeUriUnescape(util::string_view encoded) {
+  auto decoded = internal::UriUnescape(encoded);
+  if (!util::ValidateUTF8(decoded)) {
+    return Status::Invalid("Partition segment was not valid UTF-8 after URL decoding: ",
+                           encoded);
+  }
+  return decoded;
+}
+}  // namespace
+
 std::shared_ptr<Partitioning> Partitioning::Default() {
   class DefaultPartitioning : public Partitioning {
    public:
@@ -53,18 +68,18 @@ std::shared_ptr<Partitioning> Partitioning::Default() {
 
     std::string type_name() const override { return "default"; }
 
-    Result<Expression> Parse(const std::string& path) const override {
-      return literal(true);
+    Result<compute::Expression> Parse(const std::string& path) const override {
+      return compute::literal(true);
     }
 
-    Result<std::string> Format(const Expression& expr) const override {
+    Result<std::string> Format(const compute::Expression& expr) const override {
       return Status::NotImplemented("formatting paths from ", type_name(),
                                     " Partitioning");
     }
 
     Result<PartitionedBatches> Partition(
         const std::shared_ptr<RecordBatch>& batch) const override {
-      return PartitionedBatches{{batch}, {literal(true)}};
+      return PartitionedBatches{{batch}, {compute::literal(true)}};
     }
   };
 
@@ -103,7 +118,7 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
 
   if (key_indices.empty()) {
     // no fields to group by; return the whole batch
-    return PartitionedBatches{{batch}, {literal(true)}};
+    return PartitionedBatches{{batch}, {compute::literal(true)}};
   }
 
   // assemble an ExecBatch of the key columns
@@ -132,14 +147,15 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
   // assemble partition expressions from the unique keys
   out.expressions.resize(grouper->num_groups());
   for (uint32_t group = 0; group < grouper->num_groups(); ++group) {
-    std::vector<Expression> exprs(num_keys);
+    std::vector<compute::Expression> exprs(num_keys);
 
     for (int i = 0; i < num_keys; ++i) {
       ARROW_ASSIGN_OR_RAISE(auto val, unique_arrays[i]->GetScalar(group));
       const auto& name = batch->schema()->field(key_indices[i])->name();
 
-      exprs[i] = val->is_valid ? equal(field_ref(name), literal(std::move(val)))
-                               : is_null(field_ref(name));
+      exprs[i] = val->is_valid ? compute::equal(compute::field_ref(name),
+                                                compute::literal(std::move(val)))
+                               : compute::is_null(compute::field_ref(name));
     }
     out.expressions[group] = and_(std::move(exprs));
   }
@@ -157,10 +173,25 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
   return out;
 }
 
-Result<Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
+std::ostream& operator<<(std::ostream& os, SegmentEncoding segment_encoding) {
+  switch (segment_encoding) {
+    case SegmentEncoding::None:
+      os << "SegmentEncoding::None";
+      break;
+    case SegmentEncoding::Uri:
+      os << "SegmentEncoding::Uri";
+      break;
+    default:
+      os << "(invalid SegmentEncoding " << static_cast<int8_t>(segment_encoding) << ")";
+      break;
+  }
+  return os;
+}
+
+Result<compute::Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
   ARROW_ASSIGN_OR_RAISE(auto match, FieldRef(key.name).FindOneOrNone(*schema_));
   if (match.empty()) {
-    return literal(true);
+    return compute::literal(true);
   }
 
   auto field_index = match[0];
@@ -169,7 +200,7 @@ Result<Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
   std::shared_ptr<Scalar> converted;
 
   if (!key.value.has_value()) {
-    return is_null(field_ref(field->name()));
+    return compute::is_null(compute::field_ref(field->name()));
   } else if (field->type()->id() == Type::DICTIONARY) {
     if (dictionaries_.empty() || dictionaries_[field_index] == nullptr) {
       return Status::Invalid("No dictionary provided for dictionary field ",
@@ -201,26 +232,28 @@ Result<Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
     ARROW_ASSIGN_OR_RAISE(converted, Scalar::Parse(field->type(), *key.value));
   }
 
-  return equal(field_ref(field->name()), literal(std::move(converted)));
+  return compute::equal(compute::field_ref(field->name()),
+                        compute::literal(std::move(converted)));
 }
 
-Result<Expression> KeyValuePartitioning::Parse(const std::string& path) const {
-  std::vector<Expression> expressions;
+Result<compute::Expression> KeyValuePartitioning::Parse(const std::string& path) const {
+  std::vector<compute::Expression> expressions;
 
-  for (const Key& key : ParseKeys(path)) {
+  ARROW_ASSIGN_OR_RAISE(auto parsed, ParseKeys(path));
+  for (const Key& key : parsed) {
     ARROW_ASSIGN_OR_RAISE(auto expr, ConvertKey(key));
-    if (expr == literal(true)) continue;
+    if (expr == compute::literal(true)) continue;
     expressions.push_back(std::move(expr));
   }
 
   return and_(std::move(expressions));
 }
 
-Result<std::string> KeyValuePartitioning::Format(const Expression& expr) const {
+Result<std::string> KeyValuePartitioning::Format(const compute::Expression& expr) const {
   ScalarVector values{static_cast<size_t>(schema_->num_fields()), nullptr};
 
   ARROW_ASSIGN_OR_RAISE(auto known_values, ExtractKnownFieldValues(expr));
-  for (const auto& ref_value : known_values) {
+  for (const auto& ref_value : known_values.map) {
     if (!ref_value.second.is_scalar()) {
       return Status::Invalid("non-scalar partition key ", ref_value.second.ToString());
     }
@@ -257,7 +290,14 @@ Result<std::string> KeyValuePartitioning::Format(const Expression& expr) const {
   return FormatValues(values);
 }
 
-std::vector<KeyValuePartitioning::Key> DirectoryPartitioning::ParseKeys(
+DirectoryPartitioning::DirectoryPartitioning(std::shared_ptr<Schema> schema,
+                                             ArrayVector dictionaries,
+                                             KeyValuePartitioningOptions options)
+    : KeyValuePartitioning(std::move(schema), std::move(dictionaries), options) {
+  util::InitializeUTF8();
+}
+
+Result<std::vector<KeyValuePartitioning::Key>> DirectoryPartitioning::ParseKeys(
     const std::string& path) const {
   std::vector<Key> keys;
 
@@ -265,7 +305,23 @@ std::vector<KeyValuePartitioning::Key> DirectoryPartitioning::ParseKeys(
   for (auto&& segment : fs::internal::SplitAbstractPath(path)) {
     if (i >= schema_->num_fields()) break;
 
-    keys.push_back({schema_->field(i++)->name(), std::move(segment)});
+    switch (options_.segment_encoding) {
+      case SegmentEncoding::None: {
+        if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(segment))) {
+          return Status::Invalid("Partition segment was not valid UTF-8: ", segment);
+        }
+        keys.push_back({schema_->field(i++)->name(), std::move(segment)});
+        break;
+      }
+      case SegmentEncoding::Uri: {
+        ARROW_ASSIGN_OR_RAISE(auto decoded, SafeUriUnescape(segment));
+        keys.push_back({schema_->field(i++)->name(), std::move(decoded)});
+        break;
+      }
+      default:
+        return Status::NotImplemented("Unknown segment encoding: ",
+                                      options_.segment_encoding);
+    }
   }
 
   return keys;
@@ -306,6 +362,20 @@ Result<std::string> DirectoryPartitioning::FormatValues(
   return fs::internal::JoinAbstractPath(std::move(segments));
 }
 
+KeyValuePartitioningOptions PartitioningFactoryOptions::AsPartitioningOptions() const {
+  KeyValuePartitioningOptions options;
+  options.segment_encoding = segment_encoding;
+  return options;
+}
+
+HivePartitioningOptions HivePartitioningFactoryOptions::AsHivePartitioningOptions()
+    const {
+  HivePartitioningOptions options;
+  options.segment_encoding = segment_encoding;
+  options.null_fallback = null_fallback;
+  return options;
+}
+
 namespace {
 class KeyValuePartitioningFactory : public PartitioningFactory {
  protected:
@@ -428,6 +498,7 @@ class DirectoryPartitioningFactory : public KeyValuePartitioningFactory {
                                PartitioningFactoryOptions options)
       : KeyValuePartitioningFactory(options), field_names_(std::move(field_names)) {
     Reset();
+    util::InitializeUTF8();
   }
 
   std::string type_name() const override { return "schema"; }
@@ -439,7 +510,23 @@ class DirectoryPartitioningFactory : public KeyValuePartitioningFactory {
       for (auto&& segment : fs::internal::SplitAbstractPath(path)) {
         if (field_index == field_names_.size()) break;
 
-        RETURN_NOT_OK(InsertRepr(static_cast<int>(field_index++), segment));
+        switch (options_.segment_encoding) {
+          case SegmentEncoding::None: {
+            if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(segment))) {
+              return Status::Invalid("Partition segment was not valid UTF-8: ", segment);
+            }
+            RETURN_NOT_OK(InsertRepr(static_cast<int>(field_index++), segment));
+            break;
+          }
+          case SegmentEncoding::Uri: {
+            ARROW_ASSIGN_OR_RAISE(auto decoded, SafeUriUnescape(segment));
+            RETURN_NOT_OK(InsertRepr(static_cast<int>(field_index++), decoded));
+            break;
+          }
+          default:
+            return Status::NotImplemented("Unknown segment encoding: ",
+                                          options_.segment_encoding);
+        }
       }
     }
 
@@ -456,7 +543,8 @@ class DirectoryPartitioningFactory : public KeyValuePartitioningFactory {
     // drop fields which aren't in field_names_
     auto out_schema = SchemaFromColumnNames(schema, field_names_);
 
-    return std::make_shared<DirectoryPartitioning>(std::move(out_schema), dictionaries_);
+    return std::make_shared<DirectoryPartitioning>(std::move(out_schema), dictionaries_,
+                                                   options_.AsPartitioningOptions());
   }
 
  private:
@@ -479,28 +567,50 @@ std::shared_ptr<PartitioningFactory> DirectoryPartitioning::MakeFactory(
       new DirectoryPartitioningFactory(std::move(field_names), options));
 }
 
-util::optional<KeyValuePartitioning::Key> HivePartitioning::ParseKey(
-    const std::string& segment, const std::string& null_fallback) {
+Result<util::optional<KeyValuePartitioning::Key>> HivePartitioning::ParseKey(
+    const std::string& segment, const HivePartitioningOptions& options) {
   auto name_end = string_view(segment).find_first_of('=');
   // Not round-trippable
   if (name_end == string_view::npos) {
     return util::nullopt;
   }
 
+  // Static method, so we have no better place for it
+  util::InitializeUTF8();
+
   auto name = segment.substr(0, name_end);
-  auto value = segment.substr(name_end + 1);
-  if (value == null_fallback) {
-    return Key{name, util::nullopt};
+  std::string value;
+  switch (options.segment_encoding) {
+    case SegmentEncoding::None: {
+      value = segment.substr(name_end + 1);
+      if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(value))) {
+        return Status::Invalid("Partition segment was not valid UTF-8: ", value);
+      }
+      break;
+    }
+    case SegmentEncoding::Uri: {
+      auto raw_value = util::string_view(segment).substr(name_end + 1);
+      ARROW_ASSIGN_OR_RAISE(value, SafeUriUnescape(raw_value));
+      break;
+    }
+    default:
+      return Status::NotImplemented("Unknown segment encoding: ",
+                                    options.segment_encoding);
+  }
+
+  if (value == options.null_fallback) {
+    return Key{std::move(name), util::nullopt};
   }
-  return Key{name, value};
+  return Key{std::move(name), std::move(value)};
 }
 
-std::vector<KeyValuePartitioning::Key> HivePartitioning::ParseKeys(
+Result<std::vector<KeyValuePartitioning::Key>> HivePartitioning::ParseKeys(
     const std::string& path) const {
   std::vector<Key> keys;
 
   for (const auto& segment : fs::internal::SplitAbstractPath(path)) {
-    if (auto key = ParseKey(segment, null_fallback_)) {
+    ARROW_ASSIGN_OR_RAISE(auto maybe_key, ParseKey(segment, hive_options_));
+    if (auto key = maybe_key) {
       keys.push_back(std::move(*key));
     }
   }
@@ -519,7 +629,7 @@ Result<std::string> HivePartitioning::FormatValues(const ScalarVector& values) c
     } else if (!values[i]->is_valid) {
       // If no key is available just provide a placeholder segment to maintain the
       // field_index <-> path nesting relation
-      segments[i] = name + "=" + null_fallback_;
+      segments[i] = name + "=" + hive_options_.null_fallback;
     } else {
       segments[i] = name + "=" + values[i]->ToString();
     }
@@ -531,15 +641,18 @@ Result<std::string> HivePartitioning::FormatValues(const ScalarVector& values) c
 class HivePartitioningFactory : public KeyValuePartitioningFactory {
  public:
   explicit HivePartitioningFactory(HivePartitioningFactoryOptions options)
-      : KeyValuePartitioningFactory(options), null_fallback_(options.null_fallback) {}
+      : KeyValuePartitioningFactory(options), options_(std::move(options)) {}
 
   std::string type_name() const override { return "hive"; }
 
   Result<std::shared_ptr<Schema>> Inspect(
       const std::vector<std::string>& paths) override {
+    auto options = options_.AsHivePartitioningOptions();
     for (auto path : paths) {
       for (auto&& segment : fs::internal::SplitAbstractPath(path)) {
-        if (auto key = HivePartitioning::ParseKey(segment, null_fallback_)) {
+        ARROW_ASSIGN_OR_RAISE(auto maybe_key,
+                              HivePartitioning::ParseKey(segment, options));
+        if (auto key = maybe_key) {
           RETURN_NOT_OK(InsertRepr(key->name, key->value));
         }
       }
@@ -563,12 +676,12 @@ class HivePartitioningFactory : public KeyValuePartitioningFactory {
       auto out_schema = SchemaFromColumnNames(schema, field_names_);
 
       return std::make_shared<HivePartitioning>(std::move(out_schema), dictionaries_,
-                                                null_fallback_);
+                                                options_.AsHivePartitioningOptions());
     }
   }
 
  private:
-  const std::string null_fallback_;
+  const HivePartitioningFactoryOptions options_;
   std::vector<std::string> field_names_;
 };
 
diff --git a/cpp/src/arrow/dataset/partition.h b/cpp/src/arrow/dataset/partition.h
index 74e6c607106..c074010e8e9 100644
--- a/cpp/src/arrow/dataset/partition.h
+++ b/cpp/src/arrow/dataset/partition.h
@@ -20,13 +20,14 @@
 #pragma once
 
 #include <functional>
+#include <iosfwd>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
 #include "arrow/util/optional.h"
@@ -38,6 +39,10 @@ namespace dataset {
 // ----------------------------------------------------------------------
 // Partitioning
 
+/// \defgroup dataset-partitioning Partitioning API
+///
+/// @{
+
 /// \brief Interface for parsing partition expressions from string partition
 /// identifiers.
 ///
@@ -63,19 +68,20 @@ class ARROW_DS_EXPORT Partitioning {
   /// produce sub-batches which satisfy mutually exclusive Expressions.
   struct PartitionedBatches {
     RecordBatchVector batches;
-    std::vector<Expression> expressions;
+    std::vector<compute::Expression> expressions;
   };
   virtual Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>& batch) const = 0;
 
   /// \brief Parse a path into a partition expression
-  virtual Result<Expression> Parse(const std::string& path) const = 0;
+  virtual Result<compute::Expression> Parse(const std::string& path) const = 0;
 
-  virtual Result<std::string> Format(const Expression& expr) const = 0;
+  virtual Result<std::string> Format(const compute::Expression& expr) const = 0;
 
   /// \brief A default Partitioning which always yields scalar(true)
   static std::shared_ptr<Partitioning> Default();
 
+  /// \brief The partition schema.
   const std::shared_ptr<Schema>& schema() { return schema_; }
 
  protected:
@@ -84,7 +90,26 @@ class ARROW_DS_EXPORT Partitioning {
   std::shared_ptr<Schema> schema_;
 };
 
-struct PartitioningFactoryOptions {
+/// \brief The encoding of partition segments.
+enum class SegmentEncoding : int8_t {
+  /// No encoding.
+  None = 0,
+  /// Segment values are URL-encoded.
+  Uri = 1,
+};
+
+ARROW_DS_EXPORT
+std::ostream& operator<<(std::ostream& os, SegmentEncoding segment_encoding);
+
+/// \brief Options for key-value based partitioning (hive/directory).
+struct ARROW_DS_EXPORT KeyValuePartitioningOptions {
+  /// After splitting a path into components, decode the path components
+  /// before parsing according to this scheme.
+  SegmentEncoding segment_encoding = SegmentEncoding::Uri;
+};
+
+/// \brief Options for inferring a partitioning.
+struct ARROW_DS_EXPORT PartitioningFactoryOptions {
   /// When inferring a schema for partition fields, yield dictionary encoded types
   /// instead of plain. This can be more efficient when materializing virtual
   /// columns, and Expressions parsed by the finished Partitioning will include
@@ -94,11 +119,19 @@ struct PartitioningFactoryOptions {
   /// will only check discovered fields against the schema and update internal
   /// state (such as dictionaries).
   std::shared_ptr<Schema> schema;
+  /// After splitting a path into components, decode the path components
+  /// before parsing according to this scheme.
+  SegmentEncoding segment_encoding = SegmentEncoding::Uri;
+
+  KeyValuePartitioningOptions AsPartitioningOptions() const;
 };
 
-struct HivePartitioningFactoryOptions : PartitioningFactoryOptions {
+/// \brief Options for inferring a hive-style partitioning.
+struct ARROW_DS_EXPORT HivePartitioningFactoryOptions : PartitioningFactoryOptions {
   /// The hive partitioning scheme maps null to a hard coded fallback string.
   std::string null_fallback;
+
+  HivePartitioningOptions AsHivePartitioningOptions() const;
 };
 
 /// \brief PartitioningFactory provides creation of a partitioning  when the
@@ -135,26 +168,32 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
   Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>& batch) const override;
 
-  Result<Expression> Parse(const std::string& path) const override;
+  Result<compute::Expression> Parse(const std::string& path) const override;
 
-  Result<std::string> Format(const Expression& expr) const override;
+  Result<std::string> Format(const compute::Expression& expr) const override;
+
+  const ArrayVector& dictionaries() const { return dictionaries_; }
 
  protected:
-  KeyValuePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries)
-      : Partitioning(std::move(schema)), dictionaries_(std::move(dictionaries)) {
+  KeyValuePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries,
+                       KeyValuePartitioningOptions options)
+      : Partitioning(std::move(schema)),
+        dictionaries_(std::move(dictionaries)),
+        options_(options) {
     if (dictionaries_.empty()) {
       dictionaries_.resize(schema_->num_fields());
     }
   }
 
-  virtual std::vector<Key> ParseKeys(const std::string& path) const = 0;
+  virtual Result<std::vector<Key>> ParseKeys(const std::string& path) const = 0;
 
   virtual Result<std::string> FormatValues(const ScalarVector& values) const = 0;
 
   /// Convert a Key to a full expression.
-  Result<Expression> ConvertKey(const Key& key) const;
+  Result<compute::Expression> ConvertKey(const Key& key) const;
 
   ArrayVector dictionaries_;
+  KeyValuePartitioningOptions options_;
 };
 
 /// \brief DirectoryPartitioning parses one segment of a path for each field in its
@@ -165,25 +204,40 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
 /// parsed to ("year"_ == 2009 and "month"_ == 11)
 class ARROW_DS_EXPORT DirectoryPartitioning : public KeyValuePartitioning {
  public:
-  // If a field in schema is of dictionary type, the corresponding element of dictionaries
-  // must be contain the dictionary of values for that field.
+  /// If a field in schema is of dictionary type, the corresponding element of
+  /// dictionaries must be contain the dictionary of values for that field.
   explicit DirectoryPartitioning(std::shared_ptr<Schema> schema,
-                                 ArrayVector dictionaries = {})
-      : KeyValuePartitioning(std::move(schema), std::move(dictionaries)) {}
+                                 ArrayVector dictionaries = {},
+                                 KeyValuePartitioningOptions options = {});
 
   std::string type_name() const override { return "schema"; }
 
+  /// \brief Create a factory for a directory partitioning.
+  ///
+  /// \param[in] field_names The names for the partition fields. Types will be
+  ///     inferred.
   static std::shared_ptr<PartitioningFactory> MakeFactory(
       std::vector<std::string> field_names, PartitioningFactoryOptions = {});
 
  private:
-  std::vector<Key> ParseKeys(const std::string& path) const override;
+  Result<std::vector<Key>> ParseKeys(const std::string& path) const override;
 
   Result<std::string> FormatValues(const ScalarVector& values) const override;
 };
 
+/// \brief The default fallback used for null values in a Hive-style partitioning.
 static constexpr char kDefaultHiveNullFallback[] = "__HIVE_DEFAULT_PARTITION__";
 
+struct ARROW_DS_EXPORT HivePartitioningOptions : public KeyValuePartitioningOptions {
+  std::string null_fallback = kDefaultHiveNullFallback;
+
+  static HivePartitioningOptions DefaultsWithNullFallback(std::string fallback) {
+    HivePartitioningOptions options;
+    options.null_fallback = std::move(fallback);
+    return options;
+  }
+};
+
 /// \brief Multi-level, directory based partitioning
 /// originating from Apache Hive with all data files stored in the
 /// leaf directories. Data is partitioned by static values of a
@@ -195,25 +249,35 @@ static constexpr char kDefaultHiveNullFallback[] = "__HIVE_DEFAULT_PARTITION__";
 /// "/day=321/ignored=3.4/year=2009" parses to ("year"_ == 2009 and "day"_ == 321)
 class ARROW_DS_EXPORT HivePartitioning : public KeyValuePartitioning {
  public:
-  // If a field in schema is of dictionary type, the corresponding element of dictionaries
-  // must be contain the dictionary of values for that field.
+  /// If a field in schema is of dictionary type, the corresponding element of
+  /// dictionaries must be contain the dictionary of values for that field.
   explicit HivePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries = {},
                             std::string null_fallback = kDefaultHiveNullFallback)
-      : KeyValuePartitioning(std::move(schema), std::move(dictionaries)),
-        null_fallback_(std::move(null_fallback)) {}
+      : KeyValuePartitioning(std::move(schema), std::move(dictionaries),
+                             KeyValuePartitioningOptions()),
+        hive_options_(
+            HivePartitioningOptions::DefaultsWithNullFallback(std::move(null_fallback))) {
+  }
+
+  explicit HivePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries,
+                            HivePartitioningOptions options)
+      : KeyValuePartitioning(std::move(schema), std::move(dictionaries), options),
+        hive_options_(options) {}
 
   std::string type_name() const override { return "hive"; }
-  std::string null_fallback() const { return null_fallback_; }
+  std::string null_fallback() const { return hive_options_.null_fallback; }
+  const HivePartitioningOptions& options() const { return hive_options_; }
 
-  static util::optional<Key> ParseKey(const std::string& segment,
-                                      const std::string& null_fallback);
+  static Result<util::optional<Key>> ParseKey(const std::string& segment,
+                                              const HivePartitioningOptions& options);
 
+  /// \brief Create a factory for a hive partitioning.
   static std::shared_ptr<PartitioningFactory> MakeFactory(
       HivePartitioningFactoryOptions = {});
 
  private:
-  const std::string null_fallback_;
-  std::vector<Key> ParseKeys(const std::string& path) const override;
+  const HivePartitioningOptions hive_options_;
+  Result<std::vector<Key>> ParseKeys(const std::string& path) const override;
 
   Result<std::string> FormatValues(const ScalarVector& values) const override;
 };
@@ -221,9 +285,9 @@ class ARROW_DS_EXPORT HivePartitioning : public KeyValuePartitioning {
 /// \brief Implementation provided by lambda or other callable
 class ARROW_DS_EXPORT FunctionPartitioning : public Partitioning {
  public:
-  using ParseImpl = std::function<Result<Expression>(const std::string&)>;
+  using ParseImpl = std::function<Result<compute::Expression>(const std::string&)>;
 
-  using FormatImpl = std::function<Result<std::string>(const Expression&)>;
+  using FormatImpl = std::function<Result<std::string>(const compute::Expression&)>;
 
   FunctionPartitioning(std::shared_ptr<Schema> schema, ParseImpl parse_impl,
                        FormatImpl format_impl = NULLPTR, std::string name = "function")
@@ -234,11 +298,11 @@ class ARROW_DS_EXPORT FunctionPartitioning : public Partitioning {
 
   std::string type_name() const override { return name_; }
 
-  Result<Expression> Parse(const std::string& path) const override {
+  Result<compute::Expression> Parse(const std::string& path) const override {
     return parse_impl_(path);
   }
 
-  Result<std::string> Format(const Expression& expr) const override {
+  Result<std::string> Format(const compute::Expression& expr) const override {
     if (format_impl_) {
       return format_impl_(expr);
     }
@@ -288,10 +352,13 @@ class ARROW_DS_EXPORT PartitioningOrFactory {
     return *this = PartitioningOrFactory(std::move(factory));
   }
 
+  /// \brief The partitioning (if given).
   const std::shared_ptr<Partitioning>& partitioning() const { return partitioning_; }
 
+  /// \brief The partition factory (if given).
   const std::shared_ptr<PartitioningFactory>& factory() const { return factory_; }
 
+  /// \brief Get the partition schema, inferring it with the given factory if needed.
   Result<std::shared_ptr<Schema>> GetOrInferSchema(const std::vector<std::string>& paths);
 
  private:
@@ -299,5 +366,7 @@ class ARROW_DS_EXPORT PartitioningOrFactory {
   std::shared_ptr<Partitioning> partitioning_;
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc
index 06c3cc67674..d8e5198f21d 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -46,17 +46,17 @@ class TestPartitioning : public ::testing::Test {
     ASSERT_RAISES(Invalid, partitioning_->Parse(path));
   }
 
-  void AssertParse(const std::string& path, Expression expected) {
+  void AssertParse(const std::string& path, compute::Expression expected) {
     ASSERT_OK_AND_ASSIGN(auto parsed, partitioning_->Parse(path));
     ASSERT_EQ(parsed, expected);
   }
 
   template <StatusCode code = StatusCode::Invalid>
-  void AssertFormatError(Expression expr) {
+  void AssertFormatError(compute::Expression expr) {
     ASSERT_EQ(partitioning_->Format(expr).status().code(), code);
   }
 
-  void AssertFormat(Expression expr, const std::string& expected) {
+  void AssertFormat(compute::Expression expr, const std::string& expected) {
     // formatted partition expressions are bound to the schema of the dataset being
     // written
     ASSERT_OK_AND_ASSIGN(auto formatted, partitioning_->Format(expr));
@@ -64,7 +64,8 @@ class TestPartitioning : public ::testing::Test {
 
     // ensure the formatted path round trips the relevant components of the partition
     // expression: roundtripped should be a subset of expr
-    ASSERT_OK_AND_ASSIGN(Expression roundtripped, partitioning_->Parse(formatted));
+    ASSERT_OK_AND_ASSIGN(compute::Expression roundtripped,
+                         partitioning_->Parse(formatted));
 
     ASSERT_OK_AND_ASSIGN(roundtripped, roundtripped.Bind(*written_schema_));
     ASSERT_OK_AND_ASSIGN(auto simplified, SimplifyWithGuarantee(roundtripped, expr));
@@ -81,18 +82,26 @@ class TestPartitioning : public ::testing::Test {
   void AssertPartition(const std::shared_ptr<Partitioning> partitioning,
                        const std::shared_ptr<RecordBatch> full_batch,
                        const RecordBatchVector& expected_batches,
-                       const std::vector<Expression>& expected_expressions) {
+                       const std::vector<compute::Expression>& expected_expressions) {
     ASSERT_OK_AND_ASSIGN(auto partition_results, partitioning->Partition(full_batch));
     std::shared_ptr<RecordBatch> rest = full_batch;
+
     ASSERT_EQ(partition_results.batches.size(), expected_batches.size());
-    auto max_index = std::min(partition_results.batches.size(), expected_batches.size());
-    for (std::size_t partition_index = 0; partition_index < max_index;
-         partition_index++) {
-      std::shared_ptr<RecordBatch> actual_batch =
-          partition_results.batches[partition_index];
-      AssertBatchesEqual(*expected_batches[partition_index], *actual_batch);
-      Expression actual_expression = partition_results.expressions[partition_index];
-      ASSERT_EQ(expected_expressions[partition_index], actual_expression);
+
+    for (size_t i = 0; i < partition_results.batches.size(); i++) {
+      std::shared_ptr<RecordBatch> actual_batch = partition_results.batches[i];
+      compute::Expression actual_expression = partition_results.expressions[i];
+
+      auto expected_expression = std::find(expected_expressions.begin(),
+                                           expected_expressions.end(), actual_expression);
+      ASSERT_NE(expected_expression, expected_expressions.end())
+          << "Unexpected partition expr " << actual_expression.ToString();
+
+      auto expected_batch =
+          expected_batches[expected_expression - expected_expressions.begin()];
+
+      SCOPED_TRACE("Batch for " + expected_expression->ToString());
+      AssertBatchesEqual(*expected_batch, *actual_batch);
     }
   }
 
@@ -101,7 +110,7 @@ class TestPartitioning : public ::testing::Test {
                        const std::string& record_batch_json,
                        const std::shared_ptr<Schema> partitioned_schema,
                        const std::vector<std::string>& expected_record_batch_strs,
-                       const std::vector<Expression>& expected_expressions) {
+                       const std::vector<compute::Expression>& expected_expressions) {
     auto record_batch = RecordBatchFromJSON(schema, record_batch_json);
     RecordBatchVector expected_batches;
     for (const auto& expected_record_batch_str : expected_record_batch_strs) {
@@ -161,7 +170,7 @@ TEST_F(TestPartitioning, Partition) {
       R"([{"c": 4}])",
   };
 
-  std::vector<Expression> expected_expressions = {
+  std::vector<compute::Expression> expected_expressions = {
       and_(equal(field_ref("a"), literal(3)), equal(field_ref("b"), literal("x"))),
       and_(equal(field_ref("a"), literal(1)), is_null(field_ref("b"))),
       and_(is_null(field_ref("a")), is_null(field_ref("b"))),
@@ -549,6 +558,103 @@ TEST_F(TestPartitioning, ExistingSchemaHive) {
   AssertInspect({"/a=0/b=1", "/b=2"}, options.schema->fields());
 }
 
+TEST_F(TestPartitioning, UrlEncodedDirectory) {
+  PartitioningFactoryOptions options;
+  auto ts = timestamp(TimeUnit::type::SECOND);
+  options.schema = schema({field("date", ts), field("time", ts), field("str", utf8())});
+  factory_ = DirectoryPartitioning::MakeFactory(options.schema->field_names(), options);
+
+  AssertInspect({"/2021-05-04 00:00:00/2021-05-04 07:27:00/%24",
+                 "/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/foo"},
+                options.schema->fields());
+  auto date = std::make_shared<TimestampScalar>(1620086400, ts);
+  auto time = std::make_shared<TimestampScalar>(1620113220, ts);
+  partitioning_ = std::make_shared<DirectoryPartitioning>(options.schema, ArrayVector());
+  AssertParse("/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/%24",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)),
+                    equal(field_ref("str"), literal("$"))}));
+
+  // Invalid UTF-8
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  factory_->Inspect({"/%AF/%BF/%CF"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  partitioning_->Parse({"/%AF/%BF/%CF"}));
+
+  options.segment_encoding = SegmentEncoding::None;
+  options.schema =
+      schema({field("date", utf8()), field("time", utf8()), field("str", utf8())});
+  factory_ = DirectoryPartitioning::MakeFactory(options.schema->field_names(), options);
+  AssertInspect({"/2021-05-04 00:00:00/2021-05-04 07:27:00/%E3%81%8F%E3%81%BE",
+                 "/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/foo"},
+                options.schema->fields());
+  partitioning_ = std::make_shared<DirectoryPartitioning>(
+      options.schema, ArrayVector(), options.AsPartitioningOptions());
+  AssertParse("/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/%24",
+              and_({equal(field_ref("date"), literal("2021-05-04 00%3A00%3A00")),
+                    equal(field_ref("time"), literal("2021-05-04 07%3A27%3A00")),
+                    equal(field_ref("str"), literal("%24"))}));
+}
+
+TEST_F(TestPartitioning, UrlEncodedHive) {
+  HivePartitioningFactoryOptions options;
+  auto ts = timestamp(TimeUnit::type::SECOND);
+  options.schema = schema({field("date", ts), field("time", ts), field("str", utf8())});
+  options.null_fallback = "$";
+  factory_ = HivePartitioning::MakeFactory(options);
+
+  AssertInspect(
+      {"/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=$",
+       "/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=%E3%81%8F%E3%81%BE",
+       "/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24"},
+      options.schema->fields());
+
+  auto date = std::make_shared<TimestampScalar>(1620086400, ts);
+  auto time = std::make_shared<TimestampScalar>(1620113220, ts);
+  partitioning_ = std::make_shared<HivePartitioning>(options.schema, ArrayVector(),
+                                                     options.AsHivePartitioningOptions());
+  AssertParse("/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=$",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)), is_null(field_ref("str"))}));
+  AssertParse("/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=%E3%81%8F%E3%81%BE",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)),
+                    equal(field_ref("str"), literal("\xE3\x81\x8F\xE3\x81\xBE"))}));
+  // URL-encoded null fallback value
+  AssertParse("/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)), is_null(field_ref("str"))}));
+
+  // Invalid UTF-8
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  factory_->Inspect({"/date=%AF/time=%BF/str=%CF"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  partitioning_->Parse({"/date=%AF/time=%BF/str=%CF"}));
+
+  options.segment_encoding = SegmentEncoding::None;
+  options.schema =
+      schema({field("date", utf8()), field("time", utf8()), field("str", utf8())});
+  factory_ = HivePartitioning::MakeFactory(options);
+  AssertInspect(
+      {"/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=$",
+       "/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=%E3%81%8F%E3%81%BE",
+       "/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24"},
+      options.schema->fields());
+  partitioning_ = std::make_shared<HivePartitioning>(options.schema, ArrayVector(),
+                                                     options.AsHivePartitioningOptions());
+  AssertParse("/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24",
+              and_({equal(field_ref("date"), literal("2021-05-04 00%3A00%3A00")),
+                    equal(field_ref("time"), literal("2021-05-04 07%3A27%3A00")),
+                    equal(field_ref("str"), literal("%24"))}));
+
+  // Invalid UTF-8
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  factory_->Inspect({"/date=\xAF/time=\xBF/str=\xCF"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+      partitioning_->Parse({"/date=\xAF/time=\xBF/str=\xCF"}));
+}
+
 TEST_F(TestPartitioning, EtlThenHive) {
   FieldVector etl_fields{field("year", int16()), field("month", int8()),
                          field("day", int8()), field("hour", int8())};
@@ -562,7 +668,7 @@ TEST_F(TestPartitioning, EtlThenHive) {
               field("hour", int8()), field("alpha", int32()), field("beta", float32())});
 
   partitioning_ = std::make_shared<FunctionPartitioning>(
-      schm, [&](const std::string& path) -> Result<Expression> {
+      schm, [&](const std::string& path) -> Result<compute::Expression> {
         auto segments = fs::internal::SplitAbstractPath(path);
         if (segments.size() < etl_fields.size() + alphabeta_fields.size()) {
           return Status::Invalid("path ", path, " can't be parsed");
@@ -604,8 +710,8 @@ TEST_F(TestPartitioning, Set) {
   // An adhoc partitioning which parses segments like "/x in [1 4 5]"
   // into (field_ref("x") == 1 or field_ref("x") == 4 or field_ref("x") == 5)
   partitioning_ = std::make_shared<FunctionPartitioning>(
-      schm, [&](const std::string& path) -> Result<Expression> {
-        std::vector<Expression> subexpressions;
+      schm, [&](const std::string& path) -> Result<compute::Expression> {
+        std::vector<compute::Expression> subexpressions;
         for (auto segment : fs::internal::SplitAbstractPath(path)) {
           std::smatch matches;
 
@@ -643,11 +749,12 @@ class RangePartitioning : public Partitioning {
 
   std::string type_name() const override { return "range"; }
 
-  Result<Expression> Parse(const std::string& path) const override {
-    std::vector<Expression> ranges;
+  Result<compute::Expression> Parse(const std::string& path) const override {
+    std::vector<compute::Expression> ranges;
 
+    HivePartitioningOptions options;
     for (auto segment : fs::internal::SplitAbstractPath(path)) {
-      auto key = HivePartitioning::ParseKey(segment, "");
+      ARROW_ASSIGN_OR_RAISE(auto key, HivePartitioning::ParseKey(segment, options));
       if (!key) {
         return Status::Invalid("can't parse '", segment, "' as a range");
       }
@@ -688,7 +795,7 @@ class RangePartitioning : public Partitioning {
     return Status::OK();
   }
 
-  Result<std::string> Format(const Expression&) const override { return ""; }
+  Result<std::string> Format(const compute::Expression&) const override { return ""; }
   Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>&) const override {
     return Status::OK();
diff --git a/cpp/src/arrow/dataset/projector.h b/cpp/src/arrow/dataset/projector.h
index d3171fbfb3d..86d38f0af23 100644
--- a/cpp/src/arrow/dataset/projector.h
+++ b/cpp/src/arrow/dataset/projector.h
@@ -25,7 +25,7 @@
 namespace arrow {
 namespace dataset {
 
-// FIXME this is superceded by Expression::Bind
+// FIXME this is superceded by compute::Expression::Bind
 ARROW_DS_EXPORT Status CheckProjectable(const Schema& from, const Schema& to);
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index a8ac24b7799..d81b9cd1c5c 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -18,10 +18,17 @@
 #include "arrow/dataset/scanner.h"
 
 #include <algorithm>
+#include <condition_variable>
 #include <memory>
 #include <mutex>
+#include <sstream>
 
+#include "arrow/array/array_primitive.h"
+#include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_scalar.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/cast.h"
+#include "arrow/compute/exec/exec_plan.h"
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/scanner_internal.h"
@@ -35,10 +42,12 @@
 namespace arrow {
 namespace dataset {
 
+using FragmentGenerator = std::function<Future<std::shared_ptr<Fragment>>()>;
+
 std::vector<std::string> ScanOptions::MaterializedFields() const {
   std::vector<std::string> fields;
 
-  for (const Expression* expr : {&filter, &projection}) {
+  for (const compute::Expression* expr : {&filter, &projection}) {
     for (const FieldRef& ref : FieldsInExpression(*expr)) {
       DCHECK(ref.name());
       fields.push_back(*ref.name());
@@ -64,24 +73,329 @@ Result<RecordBatchIterator> InMemoryScanTask::Execute() {
   return MakeVectorIterator(record_batches_);
 }
 
-Result<RecordBatchGenerator> ScanTask::ExecuteAsync(internal::Executor*) {
-  return Status::NotImplemented("Async is not implemented for this scan task yet");
+Future<RecordBatchVector> ScanTask::SafeExecute(internal::Executor* executor) {
+  // If the ScanTask can't possibly be async then just execute it
+  ARROW_ASSIGN_OR_RAISE(auto rb_it, Execute());
+  return Future<RecordBatchVector>::MakeFinished(rb_it.ToVector());
+}
+
+Future<> ScanTask::SafeVisit(
+    internal::Executor* executor,
+    std::function<Status(std::shared_ptr<RecordBatch>)> visitor) {
+  // If the ScanTask can't possibly be async then just execute it
+  ARROW_ASSIGN_OR_RAISE(auto rb_it, Execute());
+  return Future<>::MakeFinished(rb_it.Visit(visitor));
+}
+
+Result<ScanTaskIterator> Scanner::Scan() {
+  // TODO(ARROW-12289) This is overridden in SyncScanner and will never be implemented in
+  // AsyncScanner.  It is deprecated and will eventually go away.
+  return Status::NotImplemented("This scanner does not support the legacy Scan() method");
+}
+
+Result<EnumeratedRecordBatchIterator> Scanner::ScanBatchesUnordered() {
+  // If a scanner doesn't support unordered scanning (i.e. SyncScanner) then we just
+  // fall back to an ordered scan and assign the appropriate tagging
+  ARROW_ASSIGN_OR_RAISE(auto ordered_scan, ScanBatches());
+  return AddPositioningToInOrderScan(std::move(ordered_scan));
+}
+
+Result<EnumeratedRecordBatchIterator> Scanner::AddPositioningToInOrderScan(
+    TaggedRecordBatchIterator scan) {
+  ARROW_ASSIGN_OR_RAISE(auto first, scan.Next());
+  if (IsIterationEnd(first)) {
+    return MakeEmptyIterator<EnumeratedRecordBatch>();
+  }
+  struct State {
+    State(TaggedRecordBatchIterator source, TaggedRecordBatch first)
+        : source(std::move(source)),
+          batch_index(0),
+          fragment_index(0),
+          finished(false),
+          prev_batch(std::move(first)) {}
+    TaggedRecordBatchIterator source;
+    int batch_index;
+    int fragment_index;
+    bool finished;
+    TaggedRecordBatch prev_batch;
+  };
+  struct EnumeratingIterator {
+    Result<EnumeratedRecordBatch> Next() {
+      if (state->finished) {
+        return IterationEnd<EnumeratedRecordBatch>();
+      }
+      ARROW_ASSIGN_OR_RAISE(auto next, state->source.Next());
+      if (IsIterationEnd<TaggedRecordBatch>(next)) {
+        state->finished = true;
+        return EnumeratedRecordBatch{
+            {std::move(state->prev_batch.record_batch), state->batch_index, true},
+            {std::move(state->prev_batch.fragment), state->fragment_index, true}};
+      }
+      auto prev = std::move(state->prev_batch);
+      bool prev_is_last_batch = false;
+      auto prev_batch_index = state->batch_index;
+      auto prev_fragment_index = state->fragment_index;
+      // Reference equality here seems risky but a dataset should have a constant set of
+      // fragments which should be consistent for the lifetime of a scan
+      if (prev.fragment.get() != next.fragment.get()) {
+        state->batch_index = 0;
+        state->fragment_index++;
+        prev_is_last_batch = true;
+      } else {
+        state->batch_index++;
+      }
+      state->prev_batch = std::move(next);
+      return EnumeratedRecordBatch{
+          {std::move(prev.record_batch), prev_batch_index, prev_is_last_batch},
+          {std::move(prev.fragment), prev_fragment_index, false}};
+    }
+    std::shared_ptr<State> state;
+  };
+  return EnumeratedRecordBatchIterator(
+      EnumeratingIterator{std::make_shared<State>(std::move(scan), std::move(first))});
+}
+
+Result<int64_t> Scanner::CountRows() {
+  // Naive base implementation
+  ARROW_ASSIGN_OR_RAISE(auto batch_it, ScanBatchesUnordered());
+  int64_t count = 0;
+  RETURN_NOT_OK(batch_it.Visit([&](EnumeratedRecordBatch batch) {
+    count += batch.record_batch.value->num_rows();
+    return Status::OK();
+  }));
+  return count;
 }
 
-bool ScanTask::supports_async() const { return false; }
+namespace {
+class ScannerRecordBatchReader : public RecordBatchReader {
+ public:
+  explicit ScannerRecordBatchReader(std::shared_ptr<Schema> schema,
+                                    TaggedRecordBatchIterator delegate)
+      : schema_(std::move(schema)), delegate_(std::move(delegate)) {}
 
-Result<FragmentIterator> Scanner::GetFragments() {
-  if (fragment_ != nullptr) {
-    return MakeVectorIterator(FragmentVector{fragment_});
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    ARROW_ASSIGN_OR_RAISE(auto next, delegate_.Next());
+    if (IsIterationEnd(next)) {
+      *batch = nullptr;
+    } else {
+      *batch = std::move(next.record_batch);
+    }
+    return Status::OK();
   }
 
+ private:
+  std::shared_ptr<Schema> schema_;
+  TaggedRecordBatchIterator delegate_;
+};
+}  // namespace
+
+Result<std::shared_ptr<RecordBatchReader>> Scanner::ToRecordBatchReader() {
+  ARROW_ASSIGN_OR_RAISE(auto it, ScanBatches());
+  return std::make_shared<ScannerRecordBatchReader>(options()->projected_schema,
+                                                    std::move(it));
+}
+
+struct ScanBatchesState : public std::enable_shared_from_this<ScanBatchesState> {
+  explicit ScanBatchesState(ScanTaskIterator scan_task_it,
+                            std::shared_ptr<TaskGroup> task_group_)
+      : scan_tasks(std::move(scan_task_it)), task_group(std::move(task_group_)) {}
+
+  void ResizeBatches(size_t task_index) {
+    if (task_batches.size() <= task_index) {
+      task_batches.resize(task_index + 1);
+      task_drained.resize(task_index + 1);
+    }
+  }
+
+  void Push(TaggedRecordBatch batch, size_t task_index) {
+    {
+      std::lock_guard<std::mutex> lock(mutex);
+      ResizeBatches(task_index);
+      task_batches[task_index].push_back(std::move(batch));
+    }
+    ready.notify_one();
+  }
+
+  template <typename T>
+  Result<T> PushError(Result<T>&& result, size_t task_index) {
+    if (!result.ok()) {
+      {
+        std::lock_guard<std::mutex> lock(mutex);
+        task_drained[task_index] = true;
+        iteration_error = result.status();
+      }
+      ready.notify_one();
+    }
+    return std::move(result);
+  }
+
+  Status Finish(size_t task_index) {
+    {
+      std::lock_guard<std::mutex> lock(mutex);
+      ResizeBatches(task_index);
+      task_drained[task_index] = true;
+    }
+    ready.notify_one();
+    return Status::OK();
+  }
+
+  void PushScanTask() {
+    if (no_more_tasks) return;
+    std::unique_lock<std::mutex> lock(mutex);
+    auto maybe_task = scan_tasks.Next();
+    if (!maybe_task.ok()) {
+      no_more_tasks = true;
+      iteration_error = maybe_task.status();
+      return;
+    }
+    auto scan_task = maybe_task.ValueOrDie();
+    if (IsIterationEnd(scan_task)) {
+      no_more_tasks = true;
+      return;
+    }
+    auto state = shared_from_this();
+    auto id = next_scan_task_id++;
+    ResizeBatches(id);
+
+    lock.unlock();
+    task_group->Append([state, id, scan_task]() {
+      ARROW_ASSIGN_OR_RAISE(auto batch_it, state->PushError(scan_task->Execute(), id));
+      for (auto maybe_batch : batch_it) {
+        ARROW_ASSIGN_OR_RAISE(auto batch, state->PushError(std::move(maybe_batch), id));
+        state->Push(TaggedRecordBatch{std::move(batch), scan_task->fragment()}, id);
+      }
+      return state->Finish(id);
+    });
+  }
+
+  Result<TaggedRecordBatch> Pop() {
+    std::unique_lock<std::mutex> lock(mutex);
+    ready.wait(lock, [this, &lock] {
+      while (pop_cursor < task_batches.size()) {
+        // queue for current scan task contains at least one batch, pop that
+        if (!task_batches[pop_cursor].empty()) return true;
+        // queue is empty but will be appended to eventually, wait for that
+        if (!task_drained[pop_cursor]) return false;
+
+        // Finished draining current scan task, enqueue a new one
+        ++pop_cursor;
+        // Must unlock since serial task group will execute synchronously
+        lock.unlock();
+        PushScanTask();
+        lock.lock();
+      }
+      DCHECK(no_more_tasks);
+      // all scan tasks drained (or getting next task failed), terminate
+      return true;
+    });
+
+    if (pop_cursor == task_batches.size()) {
+      // Don't report an error until we yield up everything we can first
+      RETURN_NOT_OK(iteration_error);
+      return IterationEnd<TaggedRecordBatch>();
+    }
+
+    auto batch = std::move(task_batches[pop_cursor].front());
+    task_batches[pop_cursor].pop_front();
+    return batch;
+  }
+
+  /// Protecting mutating accesses to batches
+  std::mutex mutex;
+  std::condition_variable ready;
+  ScanTaskIterator scan_tasks;
+  std::shared_ptr<TaskGroup> task_group;
+  int next_scan_task_id = 0;
+  bool no_more_tasks = false;
+  Status iteration_error;
+  std::vector<std::deque<TaggedRecordBatch>> task_batches;
+  std::vector<bool> task_drained;
+  size_t pop_cursor = 0;
+};
+
+class ARROW_DS_EXPORT SyncScanner : public Scanner {
+ public:
+  SyncScanner(std::shared_ptr<Dataset> dataset, std::shared_ptr<ScanOptions> scan_options)
+      : Scanner(std::move(scan_options)), dataset_(std::move(dataset)) {}
+
+  Result<TaggedRecordBatchIterator> ScanBatches() override;
+  Result<ScanTaskIterator> Scan() override;
+  Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
+  Result<std::shared_ptr<Table>> ToTable() override;
+  Result<TaggedRecordBatchGenerator> ScanBatchesAsync() override;
+  Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() override;
+  Result<int64_t> CountRows() override;
+
+ protected:
+  /// \brief GetFragments returns an iterator over all Fragments in this scan.
+  Result<FragmentIterator> GetFragments();
+  Result<TaggedRecordBatchIterator> ScanBatches(ScanTaskIterator scan_task_it);
+  Future<std::shared_ptr<Table>> ToTableInternal(internal::Executor* cpu_executor);
+  Result<ScanTaskIterator> ScanInternal();
+
+  std::shared_ptr<Dataset> dataset_;
+};
+
+Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
+  return ScanBatches(std::move(scan_task_it));
+}
+
+Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches(
+    ScanTaskIterator scan_task_it) {
+  auto task_group = scan_options_->TaskGroup();
+  auto state = std::make_shared<ScanBatchesState>(std::move(scan_task_it), task_group);
+  for (int i = 0; i < scan_options_->fragment_readahead; i++) {
+    state->PushScanTask();
+  }
+  return MakeFunctionIterator([task_group, state]() -> Result<TaggedRecordBatch> {
+    ARROW_ASSIGN_OR_RAISE(auto batch, state->Pop());
+    if (!IsIterationEnd(batch)) return batch;
+    RETURN_NOT_OK(task_group->Finish());
+    return IterationEnd<TaggedRecordBatch>();
+  });
+}
+
+Result<TaggedRecordBatchGenerator> SyncScanner::ScanBatchesAsync() {
+  return Status::NotImplemented("Asynchronous scanning is not supported by SyncScanner");
+}
+
+Result<EnumeratedRecordBatchGenerator> SyncScanner::ScanBatchesUnorderedAsync() {
+  return Status::NotImplemented("Asynchronous scanning is not supported by SyncScanner");
+}
+
+Result<FragmentIterator> SyncScanner::GetFragments() {
   // Transform Datasets in a flat Iterator<Fragment>. This
   // iterator is lazily constructed, i.e. Dataset::GetFragments is
   // not invoked until a Fragment is requested.
   return GetFragmentsFromDatasets({dataset_}, scan_options_->filter);
 }
 
-Result<ScanTaskIterator> Scanner::Scan() {
+Result<ScanTaskIterator> SyncScanner::Scan() { return ScanInternal(); }
+
+Status SyncScanner::Scan(std::function<Status(TaggedRecordBatch)> visitor) {
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
+
+  auto task_group = scan_options_->TaskGroup();
+
+  for (auto maybe_scan_task : scan_task_it) {
+    ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
+    task_group->Append([scan_task, visitor] {
+      ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
+      for (auto maybe_batch : batch_it) {
+        ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
+        RETURN_NOT_OK(
+            visitor(TaggedRecordBatch{std::move(batch), scan_task->fragment()}));
+      }
+      return Status::OK();
+    });
+  }
+
+  return task_group->Finish();
+}
+
+Result<ScanTaskIterator> SyncScanner::ScanInternal() {
   // Transforms Iterator<Fragment> into a unified
   // Iterator<ScanTask>. The first Iterator::Next invocation is going to do
   // all the work of unwinding the chained iterators.
@@ -89,16 +403,440 @@ Result<ScanTaskIterator> Scanner::Scan() {
   return GetScanTaskIterator(std::move(fragment_it), scan_options_);
 }
 
-Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
-    std::vector<std::shared_ptr<RecordBatch>> batches,
-    std::shared_ptr<ScanOptions> options) {
-  if (batches.empty()) {
-    return MakeVectorIterator(ScanTaskVector());
+class ARROW_DS_EXPORT AsyncScanner : public Scanner,
+                                     public std::enable_shared_from_this<AsyncScanner> {
+ public:
+  AsyncScanner(std::shared_ptr<Dataset> dataset,
+               std::shared_ptr<ScanOptions> scan_options)
+      : Scanner(std::move(scan_options)), dataset_(std::move(dataset)) {}
+
+  Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
+  Result<TaggedRecordBatchIterator> ScanBatches() override;
+  Result<TaggedRecordBatchGenerator> ScanBatchesAsync() override;
+  Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered() override;
+  Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() override;
+  Result<std::shared_ptr<Table>> ToTable() override;
+  Result<int64_t> CountRows() override;
+
+ private:
+  Result<TaggedRecordBatchGenerator> ScanBatchesAsync(internal::Executor* executor);
+  Future<> VisitBatchesAsync(std::function<Status(TaggedRecordBatch)> visitor,
+                             internal::Executor* executor);
+  Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync(
+      internal::Executor* executor);
+  Future<std::shared_ptr<Table>> ToTableAsync(internal::Executor* executor);
+
+  Result<FragmentGenerator> GetFragments() const;
+
+  std::shared_ptr<Dataset> dataset_;
+};
+
+namespace {
+
+Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
+    const Enumerated<std::shared_ptr<Fragment>>& fragment,
+    const std::shared_ptr<ScanOptions>& options) {
+  ARROW_ASSIGN_OR_RAISE(auto batch_gen, fragment.value->ScanBatchesAsync(options));
+  auto enumerated_batch_gen = MakeEnumeratedGenerator(std::move(batch_gen));
+
+  auto combine_fn =
+      [fragment](const Enumerated<std::shared_ptr<RecordBatch>>& record_batch) {
+        return EnumeratedRecordBatch{record_batch, fragment};
+      };
+
+  return MakeMappedGenerator(enumerated_batch_gen, std::move(combine_fn));
+}
+
+Result<AsyncGenerator<EnumeratedRecordBatchGenerator>> FragmentsToBatches(
+    FragmentGenerator fragment_gen, const std::shared_ptr<ScanOptions>& options) {
+  auto enumerated_fragment_gen = MakeEnumeratedGenerator(std::move(fragment_gen));
+  return MakeMappedGenerator(std::move(enumerated_fragment_gen),
+                             [=](const Enumerated<std::shared_ptr<Fragment>>& fragment) {
+                               return FragmentToBatches(fragment, options);
+                             });
+}
+
+const FieldVector kAugmentedFields{
+    field("__fragment_index", int32()),
+    field("__batch_index", int32()),
+    field("__last_in_fragment", boolean()),
+};
+
+Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
+                                        FragmentGenerator fragment_gen,
+                                        std::shared_ptr<ScanOptions> options) {
+  if (!options->use_async) {
+    return Status::NotImplemented("ScanNodes without asynchrony");
   }
-  auto schema = batches[0]->schema();
-  auto fragment =
-      std::make_shared<InMemoryFragment>(std::move(schema), std::move(batches));
-  return fragment->Scan(std::move(options));
+
+  ARROW_ASSIGN_OR_RAISE(auto batch_gen_gen,
+                        FragmentsToBatches(std::move(fragment_gen), options));
+
+  auto merged_batch_gen =
+      MakeMergedGenerator(std::move(batch_gen_gen), options->fragment_readahead);
+
+  auto batch_gen =
+      MakeReadaheadGenerator(std::move(merged_batch_gen), options->fragment_readahead);
+
+  auto gen = MakeMappedGenerator(
+      std::move(batch_gen),
+      [options](const EnumeratedRecordBatch& partial)
+          -> Result<util::optional<compute::ExecBatch>> {
+        ARROW_ASSIGN_OR_RAISE(
+            util::optional<compute::ExecBatch> batch,
+            compute::MakeExecBatch(*options->dataset_schema, partial.record_batch.value));
+        // TODO(ARROW-13263) fragments may be able to attach more guarantees to batches
+        // than this, for example parquet's row group stats. Failing to do this leaves
+        // perf on the table because row group stats could be used to skip kernel execs in
+        // FilterNode.
+        //
+        // Additionally, if a fragment failed to perform projection pushdown there may be
+        // unnecessarily materialized columns in batch. We could drop them now instead of
+        // letting them coast through the rest of the plan.
+        batch->guarantee = partial.fragment.value->partition_expression();
+
+        // tag rows with fragment- and batch-of-origin
+        batch->values.emplace_back(partial.fragment.index);
+        batch->values.emplace_back(partial.record_batch.index);
+        batch->values.emplace_back(partial.record_batch.last);
+        return batch;
+      });
+
+  auto fields = options->dataset_schema->fields();
+  for (const auto& aug_field : kAugmentedFields) {
+    fields.push_back(aug_field);
+  }
+  return compute::MakeSourceNode(plan, "dataset_scan", schema(std::move(fields)),
+                                 std::move(gen));
+}
+
+class OneShotScanTask : public ScanTask {
+ public:
+  OneShotScanTask(RecordBatchIterator batch_it, std::shared_ptr<ScanOptions> options,
+                  std::shared_ptr<Fragment> fragment)
+      : ScanTask(std::move(options), std::move(fragment)),
+        batch_it_(std::move(batch_it)) {}
+  Result<RecordBatchIterator> Execute() override {
+    if (!batch_it_) return Status::Invalid("OneShotScanTask was already scanned");
+    return std::move(batch_it_);
+  }
+
+ private:
+  RecordBatchIterator batch_it_;
+};
+
+class OneShotFragment : public Fragment {
+ public:
+  OneShotFragment(std::shared_ptr<Schema> schema, RecordBatchIterator batch_it)
+      : Fragment(compute::literal(true), std::move(schema)),
+        batch_it_(std::move(batch_it)) {
+    DCHECK_NE(physical_schema_, nullptr);
+  }
+  Status CheckConsumed() {
+    if (!batch_it_) return Status::Invalid("OneShotFragment was already scanned");
+    return Status::OK();
+  }
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    RETURN_NOT_OK(CheckConsumed());
+    ScanTaskVector tasks{std::make_shared<OneShotScanTask>(
+        std::move(batch_it_), std::move(options), shared_from_this())};
+    return MakeVectorIterator(std::move(tasks));
+  }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    RETURN_NOT_OK(CheckConsumed());
+    ARROW_ASSIGN_OR_RAISE(
+        auto background_gen,
+        MakeBackgroundGenerator(std::move(batch_it_), options->io_context.executor()));
+    return MakeTransferredGenerator(std::move(background_gen),
+                                    internal::GetCpuThreadPool());
+  }
+  std::string type_name() const override { return "one-shot"; }
+
+ protected:
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
+    return physical_schema_;
+  }
+
+  RecordBatchIterator batch_it_;
+};
+}  // namespace
+
+Result<FragmentGenerator> AsyncScanner::GetFragments() const {
+  // TODO(ARROW-8163): Async fragment scanning will return AsyncGenerator<Fragment>
+  // here. Current iterator based versions are all fast & sync so we will just ToVector
+  // it
+  ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset_->GetFragments(scan_options_->filter));
+  ARROW_ASSIGN_OR_RAISE(auto fragments_vec, fragments_it.ToVector());
+  return MakeVectorGenerator(std::move(fragments_vec));
+}
+
+Result<TaggedRecordBatchIterator> AsyncScanner::ScanBatches() {
+  ARROW_ASSIGN_OR_RAISE(auto batches_gen, ScanBatchesAsync(internal::GetCpuThreadPool()));
+  return MakeGeneratorIterator(std::move(batches_gen));
+}
+
+Result<EnumeratedRecordBatchIterator> AsyncScanner::ScanBatchesUnordered() {
+  ARROW_ASSIGN_OR_RAISE(auto batches_gen,
+                        ScanBatchesUnorderedAsync(internal::GetCpuThreadPool()));
+  return MakeGeneratorIterator(std::move(batches_gen));
+}
+
+Result<std::shared_ptr<Table>> AsyncScanner::ToTable() {
+  auto table_fut = ToTableAsync(internal::GetCpuThreadPool());
+  return table_fut.result();
+}
+
+Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync() {
+  return ScanBatchesUnorderedAsync(internal::GetCpuThreadPool());
+}
+
+namespace {
+Result<EnumeratedRecordBatch> ToEnumeratedRecordBatch(
+    const util::optional<compute::ExecBatch>& batch, const ScanOptions& options,
+    const FragmentVector& fragments) {
+  int num_fields = options.projected_schema->num_fields();
+
+  EnumeratedRecordBatch out;
+  out.fragment.index = batch->values[num_fields].scalar_as<Int32Scalar>().value;
+  out.fragment.last = false;  // ignored during reordering
+  out.fragment.value = fragments[out.fragment.index];
+
+  out.record_batch.index = batch->values[num_fields + 1].scalar_as<Int32Scalar>().value;
+  out.record_batch.last = batch->values[num_fields + 2].scalar_as<BooleanScalar>().value;
+  ARROW_ASSIGN_OR_RAISE(out.record_batch.value,
+                        batch->ToRecordBatch(options.projected_schema, options.pool));
+  return out;
+}
+}  // namespace
+
+Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
+    internal::Executor* cpu_executor) {
+  if (!scan_options_->use_threads) {
+    cpu_executor = nullptr;
+  }
+
+  auto exec_context =
+      std::make_shared<compute::ExecContext>(scan_options_->pool, cpu_executor);
+
+  ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make(exec_context.get()));
+
+  ARROW_ASSIGN_OR_RAISE(auto scan, MakeScanNode(plan.get(), dataset_, scan_options_));
+
+  ARROW_ASSIGN_OR_RAISE(auto filter,
+                        compute::MakeFilterNode(scan, "filter", scan_options_->filter));
+
+  auto exprs = scan_options_->projection.call()->arguments;
+  auto names = checked_cast<const compute::MakeStructOptions*>(
+                   scan_options_->projection.call()->options.get())
+                   ->field_names;
+  ARROW_ASSIGN_OR_RAISE(
+      auto project,
+      MakeAugmentedProjectNode(filter, "project", std::move(exprs), std::move(names)));
+
+  AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
+      compute::MakeSinkNode(project, "sink");
+
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto options = scan_options_;
+  ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset_->GetFragments(scan_options_->filter));
+  ARROW_ASSIGN_OR_RAISE(auto fragments, fragments_it.ToVector());
+  auto shared_fragments = std::make_shared<FragmentVector>(std::move(fragments));
+
+  // If the generator is destroyed before being completely drained, inform plan
+  std::shared_ptr<void> stop_producing{
+      nullptr, [plan, exec_context](...) {
+        bool not_finished_yet = plan->finished().TryAddCallback(
+            [&plan, &exec_context] { return [plan, exec_context](const Status&) {}; });
+
+        if (not_finished_yet) {
+          plan->StopProducing();
+        }
+      }};
+
+  return MakeMappedGenerator(
+      std::move(sink_gen),
+      [sink_gen, options, stop_producing,
+       shared_fragments](const util::optional<compute::ExecBatch>& batch)
+          -> Future<EnumeratedRecordBatch> {
+        return ToEnumeratedRecordBatch(batch, *options, *shared_fragments);
+      });
+}
+
+Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync() {
+  return ScanBatchesAsync(internal::GetCpuThreadPool());
+}
+
+Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync(
+    internal::Executor* cpu_executor) {
+  ARROW_ASSIGN_OR_RAISE(auto unordered, ScanBatchesUnorderedAsync(cpu_executor));
+  // We need an initial value sentinel, so we use one with fragment.index < 0
+  auto is_before_any = [](const EnumeratedRecordBatch& batch) {
+    return batch.fragment.index < 0;
+  };
+  auto left_after_right = [&is_before_any](const EnumeratedRecordBatch& left,
+                                           const EnumeratedRecordBatch& right) {
+    // Before any comes first
+    if (is_before_any(left)) {
+      return false;
+    }
+    if (is_before_any(right)) {
+      return true;
+    }
+    // Compare batches if fragment is the same
+    if (left.fragment.index == right.fragment.index) {
+      return left.record_batch.index > right.record_batch.index;
+    }
+    // Otherwise compare fragment
+    return left.fragment.index > right.fragment.index;
+  };
+  auto is_next = [is_before_any](const EnumeratedRecordBatch& prev,
+                                 const EnumeratedRecordBatch& next) {
+    // Only true if next is the first batch
+    if (is_before_any(prev)) {
+      return next.fragment.index == 0 && next.record_batch.index == 0;
+    }
+    // If same fragment, compare batch index
+    if (prev.fragment.index == next.fragment.index) {
+      return next.record_batch.index == prev.record_batch.index + 1;
+    }
+    // Else only if next first batch of next fragment and prev is last batch of previous
+    return next.fragment.index == prev.fragment.index + 1 && prev.record_batch.last &&
+           next.record_batch.index == 0;
+  };
+  auto before_any = EnumeratedRecordBatch{{nullptr, -1, false}, {nullptr, -1, false}};
+  auto sequenced = MakeSequencingGenerator(std::move(unordered), left_after_right,
+                                           is_next, before_any);
+
+  auto unenumerate_fn = [](const EnumeratedRecordBatch& enumerated_batch) {
+    return TaggedRecordBatch{enumerated_batch.record_batch.value,
+                             enumerated_batch.fragment.value};
+  };
+  return MakeMappedGenerator(std::move(sequenced), unenumerate_fn);
+}
+
+struct AsyncTableAssemblyState {
+  /// Protecting mutating accesses to batches
+  std::mutex mutex{};
+  std::vector<RecordBatchVector> batches{};
+
+  void Emplace(const EnumeratedRecordBatch& batch) {
+    std::lock_guard<std::mutex> lock(mutex);
+    auto fragment_index = batch.fragment.index;
+    auto batch_index = batch.record_batch.index;
+    if (static_cast<int>(batches.size()) <= fragment_index) {
+      batches.resize(fragment_index + 1);
+    }
+    if (static_cast<int>(batches[fragment_index].size()) <= batch_index) {
+      batches[fragment_index].resize(batch_index + 1);
+    }
+    batches[fragment_index][batch_index] = batch.record_batch.value;
+  }
+
+  RecordBatchVector Finish() {
+    RecordBatchVector all_batches;
+    for (auto& fragment_batches : batches) {
+      auto end = std::make_move_iterator(fragment_batches.end());
+      for (auto it = std::make_move_iterator(fragment_batches.begin()); it != end; it++) {
+        all_batches.push_back(*it);
+      }
+    }
+    return all_batches;
+  }
+};
+
+Status AsyncScanner::Scan(std::function<Status(TaggedRecordBatch)> visitor) {
+  auto top_level_task = [this, &visitor](Executor* executor) {
+    return VisitBatchesAsync(visitor, executor);
+  };
+  return internal::RunSynchronously<Future<>>(top_level_task, scan_options_->use_threads);
+}
+
+Future<> AsyncScanner::VisitBatchesAsync(std::function<Status(TaggedRecordBatch)> visitor,
+                                         internal::Executor* executor) {
+  ARROW_ASSIGN_OR_RAISE(auto batches_gen, ScanBatchesAsync(executor));
+  return VisitAsyncGenerator(std::move(batches_gen), visitor);
+}
+
+Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
+    internal::Executor* cpu_executor) {
+  auto scan_options = scan_options_;
+  ARROW_ASSIGN_OR_RAISE(auto positioned_batch_gen,
+                        ScanBatchesUnorderedAsync(cpu_executor));
+  /// Wraps the state in a shared_ptr to ensure that failing ScanTasks don't
+  /// invalidate concurrently running tasks when Finish() early returns
+  /// and the mutex/batches fail out of scope.
+  auto state = std::make_shared<AsyncTableAssemblyState>();
+
+  auto table_building_task = [state](const EnumeratedRecordBatch& batch) {
+    state->Emplace(batch);
+    return batch;
+  };
+
+  auto table_building_gen =
+      MakeMappedGenerator(positioned_batch_gen, table_building_task);
+
+  return DiscardAllFromAsyncGenerator(table_building_gen).Then([state, scan_options]() {
+    return Table::FromRecordBatches(scan_options->projected_schema, state->Finish());
+  });
+}
+
+Result<int64_t> AsyncScanner::CountRows() {
+  ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
+
+  auto cpu_executor = scan_options_->use_threads ? internal::GetCpuThreadPool() : nullptr;
+  compute::ExecContext exec_context(scan_options_->pool, cpu_executor);
+
+  ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make(&exec_context));
+  // Drop projection since we only need to count rows
+  auto options = std::make_shared<ScanOptions>(*scan_options_);
+  RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+
+  std::atomic<int64_t> total{0};
+
+  fragment_gen = MakeMappedGenerator(
+      std::move(fragment_gen), [&](const std::shared_ptr<Fragment>& fragment) {
+        return fragment->CountRows(options->filter, options)
+            .Then([&, fragment](util::optional<int64_t> fast_count) mutable
+                  -> std::shared_ptr<Fragment> {
+              if (fast_count) {
+                // fast path: got row count directly; skip scanning this fragment
+                total += *fast_count;
+                return std::make_shared<OneShotFragment>(
+                    options->dataset_schema,
+                    MakeEmptyIterator<std::shared_ptr<RecordBatch>>());
+              }
+
+              // slow path: actually filter this fragment's batches
+              return std::move(fragment);
+            });
+      });
+
+  ARROW_ASSIGN_OR_RAISE(auto scan,
+                        MakeScanNode(plan.get(), std::move(fragment_gen), options));
+
+  ARROW_ASSIGN_OR_RAISE(auto get_selection,
+                        compute::MakeProjectNode(scan, "get_selection", {options->filter},
+                                                 {"selection_mask"}));
+
+  ARROW_ASSIGN_OR_RAISE(
+      auto sum_selection,
+      compute::MakeScalarAggregateNode(get_selection, "sum_selection",
+                                       {compute::internal::Aggregate{"sum", nullptr}},
+                                       {"selection_mask"}, {"sum"}));
+
+  AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
+      compute::MakeSinkNode(sum_selection, "sink");
+
+  RETURN_NOT_OK(plan->StartProducing());
+  auto maybe_slow_count = sink_gen().result();
+  plan->finished().Wait();
+
+  ARROW_ASSIGN_OR_RAISE(auto slow_count, maybe_slow_count);
+  total += slow_count->values[0].scalar_as<UInt64Scalar>().value;
+
+  return total.load();
 }
 
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset)
@@ -106,37 +844,45 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset)
 
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset,
                                std::shared_ptr<ScanOptions> scan_options)
-    : dataset_(std::move(dataset)),
-      fragment_(nullptr),
-      scan_options_(std::move(scan_options)) {
+    : dataset_(std::move(dataset)), scan_options_(std::move(scan_options)) {
   scan_options_->dataset_schema = dataset_->schema();
-  DCHECK_OK(Filter(literal(true)));
+  DCHECK_OK(Filter(scan_options_->filter));
 }
 
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Schema> schema,
                                std::shared_ptr<Fragment> fragment,
                                std::shared_ptr<ScanOptions> scan_options)
-    : dataset_(nullptr),
-      fragment_(std::move(fragment)),
-      scan_options_(std::move(scan_options)) {
-  scan_options_->dataset_schema = std::move(schema);
-  DCHECK_OK(Filter(literal(true)));
+    : ScannerBuilder(std::make_shared<FragmentDataset>(
+                         std::move(schema), FragmentVector{std::move(fragment)}),
+                     std::move(scan_options)) {}
+
+std::shared_ptr<ScannerBuilder> ScannerBuilder::FromRecordBatchReader(
+    std::shared_ptr<RecordBatchReader> reader) {
+  auto batch_it = MakeIteratorFromReader(reader);
+  auto fragment =
+      std::make_shared<OneShotFragment>(reader->schema(), std::move(batch_it));
+  return std::make_shared<ScannerBuilder>(reader->schema(), std::move(fragment),
+                                          std::make_shared<ScanOptions>());
 }
 
 const std::shared_ptr<Schema>& ScannerBuilder::schema() const {
   return scan_options_->dataset_schema;
 }
 
+const std::shared_ptr<Schema>& ScannerBuilder::projected_schema() const {
+  return scan_options_->projected_schema;
+}
+
 Status ScannerBuilder::Project(std::vector<std::string> columns) {
   return SetProjection(scan_options_.get(), std::move(columns));
 }
 
-Status ScannerBuilder::Project(std::vector<Expression> exprs,
+Status ScannerBuilder::Project(std::vector<compute::Expression> exprs,
                                std::vector<std::string> names) {
   return SetProjection(scan_options_.get(), std::move(exprs), std::move(names));
 }
 
-Status ScannerBuilder::Filter(const Expression& filter) {
+Status ScannerBuilder::Filter(const compute::Expression& filter) {
   return SetFilter(scan_options_.get(), filter);
 }
 
@@ -145,6 +891,20 @@ Status ScannerBuilder::UseThreads(bool use_threads) {
   return Status::OK();
 }
 
+Status ScannerBuilder::FragmentReadahead(int fragment_readahead) {
+  if (fragment_readahead <= 0) {
+    return Status::Invalid("FragmentReadahead must be greater than 0, got ",
+                           fragment_readahead);
+  }
+  scan_options_->fragment_readahead = fragment_readahead;
+  return Status::OK();
+}
+
+Status ScannerBuilder::UseAsync(bool use_async) {
+  scan_options_->use_async = use_async;
+  return Status::OK();
+}
+
 Status ScannerBuilder::BatchSize(int64_t batch_size) {
   if (batch_size <= 0) {
     return Status::Invalid("BatchSize must be greater than 0, got ", batch_size);
@@ -169,10 +929,11 @@ Result<std::shared_ptr<Scanner>> ScannerBuilder::Finish() {
     RETURN_NOT_OK(Project(scan_options_->dataset_schema->field_names()));
   }
 
-  if (dataset_ == nullptr) {
-    return std::make_shared<Scanner>(fragment_, scan_options_);
+  if (scan_options_->use_async) {
+    return std::make_shared<AsyncScanner>(dataset_, scan_options_);
+  } else {
+    return std::make_shared<SyncScanner>(dataset_, scan_options_);
   }
-  return std::make_shared<Scanner>(dataset_, scan_options_);
 }
 
 static inline RecordBatchVector FlattenRecordBatchVector(
@@ -202,15 +963,8 @@ struct TableAssemblyState {
   }
 };
 
-Result<std::shared_ptr<Table>> Scanner::ToTable() {
-  return internal::RunSynchronously<std::shared_ptr<Table>>(
-      [this](Executor* executor) { return ToTableInternal(executor); },
-      scan_options_->use_threads);
-}
-
-Future<std::shared_ptr<Table>> Scanner::ToTableInternal(
-    internal::Executor* cpu_executor) {
-  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, Scan());
+Result<std::shared_ptr<Table>> SyncScanner::ToTable() {
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
   auto task_group = scan_options_->TaskGroup();
 
   /// Wraps the state in a shared_ptr to ensure that failing ScanTasks don't
@@ -218,38 +972,292 @@ Future<std::shared_ptr<Table>> Scanner::ToTableInternal(
   /// and the mutex/batches fail out of scope.
   auto state = std::make_shared<TableAssemblyState>();
 
+  // TODO (ARROW-11797) Migrate to using ScanBatches()
   size_t scan_task_id = 0;
-  std::vector<Future<>> scan_futures;
   for (auto maybe_scan_task : scan_task_it) {
     ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
 
     auto id = scan_task_id++;
-    if (scan_task->supports_async()) {
-      ARROW_ASSIGN_OR_RAISE(auto scan_gen, scan_task->ExecuteAsync(cpu_executor));
-      auto scan_fut = CollectAsyncGenerator(std::move(scan_gen))
-                          .Then([state, id](const RecordBatchVector& rbs) {
-                            state->Emplace(rbs, id);
-                          });
-      scan_futures.push_back(std::move(scan_fut));
-    } else {
-      task_group->Append([state, id, scan_task] {
-        ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
-        ARROW_ASSIGN_OR_RAISE(auto local, batch_it.ToVector());
-        state->Emplace(std::move(local), id);
-        return Status::OK();
-      });
-    }
+    task_group->Append([state, id, scan_task] {
+      ARROW_ASSIGN_OR_RAISE(
+          auto local, internal::SerialExecutor::RunInSerialExecutor<RecordBatchVector>(
+                          [&](internal::Executor* executor) {
+                            return scan_task->SafeExecute(executor);
+                          }));
+      state->Emplace(std::move(local), id);
+      return Status::OK();
+    });
   }
   auto scan_options = scan_options_;
-  scan_futures.push_back(task_group->FinishAsync());
   // Wait for all tasks to complete, or the first error
-  return AllComplete(scan_futures)
-      .Then(
-          [scan_options, state](const detail::Empty&) -> Result<std::shared_ptr<Table>> {
-            return Table::FromRecordBatches(
-                scan_options->projected_schema,
-                FlattenRecordBatchVector(std::move(state->batches)));
-          });
+  RETURN_NOT_OK(task_group->Finish());
+  return Table::FromRecordBatches(scan_options->projected_schema,
+                                  FlattenRecordBatchVector(std::move(state->batches)));
+}
+
+Result<std::shared_ptr<Table>> Scanner::TakeRows(const Array& indices) {
+  if (indices.null_count() != 0) {
+    return Status::NotImplemented("null take indices");
+  }
+
+  compute::ExecContext ctx(scan_options_->pool);
+
+  const Array* original_indices;
+  // If we have to cast, this is the backing reference
+  std::shared_ptr<Array> original_indices_ptr;
+  if (indices.type_id() != Type::INT64) {
+    ARROW_ASSIGN_OR_RAISE(
+        original_indices_ptr,
+        compute::Cast(indices, int64(), compute::CastOptions::Safe(), &ctx));
+    original_indices = original_indices_ptr.get();
+  } else {
+    original_indices = &indices;
+  }
+
+  std::shared_ptr<Array> unsort_indices;
+  {
+    ARROW_ASSIGN_OR_RAISE(
+        auto sort_indices,
+        compute::SortIndices(*original_indices, compute::SortOrder::Ascending, &ctx));
+    ARROW_ASSIGN_OR_RAISE(original_indices_ptr,
+                          compute::Take(*original_indices, *sort_indices,
+                                        compute::TakeOptions::Defaults(), &ctx));
+    original_indices = original_indices_ptr.get();
+    ARROW_ASSIGN_OR_RAISE(
+        unsort_indices,
+        compute::SortIndices(*sort_indices, compute::SortOrder::Ascending, &ctx));
+  }
+
+  RecordBatchVector out_batches;
+
+  auto raw_indices = static_cast<const Int64Array&>(*original_indices).raw_values();
+  int64_t offset = 0, row_begin = 0;
+
+  ARROW_ASSIGN_OR_RAISE(auto batch_it, ScanBatches());
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, batch_it.Next());
+    if (IsIterationEnd(batch)) break;
+    if (offset == original_indices->length()) break;
+    DCHECK_LT(offset, original_indices->length());
+
+    int64_t length = 0;
+    while (offset + length < original_indices->length()) {
+      auto rel_index = raw_indices[offset + length] - row_begin;
+      if (rel_index >= batch.record_batch->num_rows()) break;
+      ++length;
+    }
+    DCHECK_LE(offset + length, original_indices->length());
+    if (length == 0) {
+      row_begin += batch.record_batch->num_rows();
+      continue;
+    }
+
+    Datum rel_indices = original_indices->Slice(offset, length);
+    ARROW_ASSIGN_OR_RAISE(rel_indices,
+                          compute::Subtract(rel_indices, Datum(row_begin),
+                                            compute::ArithmeticOptions(), &ctx));
+
+    ARROW_ASSIGN_OR_RAISE(Datum out_batch,
+                          compute::Take(batch.record_batch, rel_indices,
+                                        compute::TakeOptions::Defaults(), &ctx));
+    out_batches.push_back(out_batch.record_batch());
+
+    offset += length;
+    row_begin += batch.record_batch->num_rows();
+  }
+
+  if (offset < original_indices->length()) {
+    std::stringstream error;
+    const int64_t max_values_shown = 3;
+    const int64_t num_remaining = original_indices->length() - offset;
+    for (int64_t i = 0; i < std::min<int64_t>(max_values_shown, num_remaining); i++) {
+      if (i > 0) error << ", ";
+      error << static_cast<const Int64Array*>(original_indices)->Value(offset + i);
+    }
+    if (num_remaining > max_values_shown) error << ", ...";
+    return Status::IndexError("Some indices were out of bounds: ", error.str());
+  }
+  ARROW_ASSIGN_OR_RAISE(Datum out, Table::FromRecordBatches(options()->projected_schema,
+                                                            std::move(out_batches)));
+  ARROW_ASSIGN_OR_RAISE(
+      out, compute::Take(out, unsort_indices, compute::TakeOptions::Defaults(), &ctx));
+  return out.table();
+}
+
+Result<std::shared_ptr<Table>> Scanner::Head(int64_t num_rows) {
+  if (num_rows == 0) {
+    return Table::FromRecordBatches(options()->projected_schema, {});
+  }
+  ARROW_ASSIGN_OR_RAISE(auto batch_iterator, ScanBatches());
+  RecordBatchVector batches;
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, batch_iterator.Next());
+    if (IsIterationEnd(batch)) break;
+    batches.push_back(batch.record_batch->Slice(0, num_rows));
+    num_rows -= batch.record_batch->num_rows();
+    if (num_rows <= 0) break;
+  }
+  return Table::FromRecordBatches(options()->projected_schema, batches);
+}
+
+Result<int64_t> SyncScanner::CountRows() {
+  // While readers could implement an optimization where they just fabricate empty
+  // batches based on metadata when no columns are selected, skipping I/O (and
+  // indeed, the Parquet reader does this), counting rows using that optimization is
+  // still slower than just hitting metadata directly where possible.
+  ARROW_ASSIGN_OR_RAISE(auto fragment_it, GetFragments());
+  // Fragment is non-null iff fast path could not be taken.
+  std::vector<Future<std::pair<int64_t, std::shared_ptr<Fragment>>>> futures;
+  for (auto maybe_fragment : fragment_it) {
+    ARROW_ASSIGN_OR_RAISE(auto fragment, maybe_fragment);
+    auto count_fut = fragment->CountRows(scan_options_->filter, scan_options_);
+    futures.push_back(
+        count_fut.Then([fragment](const util::optional<int64_t>& count)
+                           -> std::pair<int64_t, std::shared_ptr<Fragment>> {
+          if (count.has_value()) {
+            return std::make_pair(*count, nullptr);
+          }
+          return std::make_pair(0, std::move(fragment));
+        }));
+  }
+
+  int64_t count = 0;
+  FragmentVector fragments;
+  for (auto& future : futures) {
+    ARROW_ASSIGN_OR_RAISE(auto count_result, future.result());
+    count += count_result.first;
+    if (count_result.second) {
+      fragments.push_back(std::move(count_result.second));
+    }
+  }
+  // Now check for any fragments where we couldn't take the fast path
+  if (!fragments.empty()) {
+    auto options = std::make_shared<ScanOptions>(*scan_options_);
+    RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+    ARROW_ASSIGN_OR_RAISE(
+        auto scan_task_it,
+        GetScanTaskIterator(MakeVectorIterator(std::move(fragments)), options));
+    ARROW_ASSIGN_OR_RAISE(auto batch_it, ScanBatches(std::move(scan_task_it)));
+    RETURN_NOT_OK(batch_it.Visit([&](TaggedRecordBatch batch) {
+      count += batch.record_batch->num_rows();
+      return Status::OK();
+    }));
+  }
+  return count;
+}
+
+Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
+                                        std::shared_ptr<Dataset> dataset,
+                                        std::shared_ptr<ScanOptions> scan_options) {
+  if (scan_options->dataset_schema == nullptr) {
+    scan_options->dataset_schema = dataset->schema();
+  }
+
+  if (!scan_options->filter.IsBound()) {
+    ARROW_ASSIGN_OR_RAISE(scan_options->filter,
+                          scan_options->filter.Bind(*dataset->schema()));
+  }
+
+  if (!scan_options->projection.IsBound()) {
+    auto fields = dataset->schema()->fields();
+    for (const auto& aug_field : kAugmentedFields) {
+      fields.push_back(aug_field);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(scan_options->projection,
+                          scan_options->projection.Bind(Schema(std::move(fields))));
+  }
+
+  // using a generator for speculative forward compatibility with async fragment discovery
+  ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset->GetFragments(scan_options->filter));
+  ARROW_ASSIGN_OR_RAISE(auto fragments_vec, fragments_it.ToVector());
+  auto fragments_gen = MakeVectorGenerator(std::move(fragments_vec));
+
+  return MakeScanNode(plan, std::move(fragments_gen), std::move(scan_options));
+}
+
+Result<compute::ExecNode*> MakeAugmentedProjectNode(
+    compute::ExecNode* input, std::string label, std::vector<compute::Expression> exprs,
+    std::vector<std::string> names) {
+  if (names.size() == 0) {
+    names.resize(exprs.size());
+    for (size_t i = 0; i < exprs.size(); ++i) {
+      names[i] = exprs[i].ToString();
+    }
+  }
+
+  for (const auto& aug_field : kAugmentedFields) {
+    exprs.push_back(compute::field_ref(aug_field->name()));
+    names.push_back(aug_field->name());
+  }
+  return compute::MakeProjectNode(input, std::move(label), std::move(exprs),
+                                  std::move(names));
+}
+
+Result<AsyncGenerator<util::optional<compute::ExecBatch>>> MakeOrderedSinkNode(
+    compute::ExecNode* input, std::string label) {
+  auto unordered = compute::MakeSinkNode(input, std::move(label));
+
+  const Schema& schema = *input->output_schema();
+  ARROW_ASSIGN_OR_RAISE(FieldPath match, FieldRef("__fragment_index").FindOne(schema));
+  int i = match[0];
+  auto fragment_index = [i](const compute::ExecBatch& batch) {
+    return batch.values[i].scalar_as<Int32Scalar>().value;
+  };
+  compute::ExecBatch before_any{{}, 0};
+  before_any.values.resize(i + 1);
+  before_any.values.back() = Datum(-1);
+
+  ARROW_ASSIGN_OR_RAISE(match, FieldRef("__batch_index").FindOne(schema));
+  i = match[0];
+  auto batch_index = [i](const compute::ExecBatch& batch) {
+    return batch.values[i].scalar_as<Int32Scalar>().value;
+  };
+
+  ARROW_ASSIGN_OR_RAISE(match, FieldRef("__last_in_fragment").FindOne(schema));
+  i = match[0];
+  auto last_in_fragment = [i](const compute::ExecBatch& batch) {
+    return batch.values[i].scalar_as<BooleanScalar>().value;
+  };
+
+  auto is_before_any = [=](const compute::ExecBatch& batch) {
+    return fragment_index(batch) < 0;
+  };
+
+  auto left_after_right = [=](const util::optional<compute::ExecBatch>& left,
+                              const util::optional<compute::ExecBatch>& right) {
+    // Before any comes first
+    if (is_before_any(*left)) {
+      return false;
+    }
+    if (is_before_any(*right)) {
+      return true;
+    }
+    // Compare batches if fragment is the same
+    if (fragment_index(*left) == fragment_index(*right)) {
+      return batch_index(*left) > batch_index(*right);
+    }
+    // Otherwise compare fragment
+    return fragment_index(*left) > fragment_index(*right);
+  };
+
+  auto is_next = [=](const util::optional<compute::ExecBatch>& prev,
+                     const util::optional<compute::ExecBatch>& next) {
+    // Only true if next is the first batch
+    if (is_before_any(*prev)) {
+      return fragment_index(*next) == 0 && batch_index(*next) == 0;
+    }
+    // If same fragment, compare batch index
+    if (fragment_index(*next) == fragment_index(*prev)) {
+      return batch_index(*next) == batch_index(*prev) + 1;
+    }
+    // Else only if next first batch of next fragment and prev is last batch of previous
+    return fragment_index(*next) == fragment_index(*prev) + 1 &&
+           last_in_fragment(*prev) && batch_index(*next) == 0;
+  };
+
+  return MakeSequencingGenerator(std::move(unordered), left_after_right, is_next,
+                                 util::make_optional(std::move(before_any)));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 9bd4b10847b..fc715206d7d 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -25,13 +25,18 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compute/exec/expression.h"
+#include "arrow/compute/type_fwd.h"
 #include "arrow/dataset/dataset.h"
-#include "arrow/dataset/expression.h"
 #include "arrow/dataset/projector.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
+#include "arrow/io/interfaces.h"
 #include "arrow/memory_pool.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/type_fwd.h"
 
 namespace arrow {
@@ -40,39 +45,78 @@ using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>(
 
 namespace dataset {
 
+/// \defgroup dataset-scanning Scanning API
+///
+/// @{
+
 constexpr int64_t kDefaultBatchSize = 1 << 20;
+constexpr int32_t kDefaultBatchReadahead = 32;
+constexpr int32_t kDefaultFragmentReadahead = 8;
 
+/// Scan-specific options, which can be changed between scans of the same dataset.
 struct ARROW_DS_EXPORT ScanOptions {
-  // Filter and projection
-  Expression filter = literal(true);
-  Expression projection;
-
-  // Schema with which batches will be read from fragments. This is also known as the
-  // "reader schema" it will be used (for example) in constructing CSV file readers to
-  // identify column types for parsing. Usually only a subset of its fields (see
-  // MaterializedFields) will be materialized during a scan.
+  /// A row filter (which will be pushed down to partitioning/reading if supported).
+  compute::Expression filter = compute::literal(true);
+  /// A projection expression (which can add/remove/rename columns).
+  compute::Expression projection;
+
+  /// Schema with which batches will be read from fragments. This is also known as the
+  /// "reader schema" it will be used (for example) in constructing CSV file readers to
+  /// identify column types for parsing. Usually only a subset of its fields (see
+  /// MaterializedFields) will be materialized during a scan.
   std::shared_ptr<Schema> dataset_schema;
 
-  // Schema of projected record batches. This is independent of dataset_schema as its
-  // fields are derived from the projection. For example, let
-  //
-  //   dataset_schema = {"a": int32, "b": int32, "id": utf8}
-  //   projection = project({equal(field_ref("a"), field_ref("b"))}, {"a_plus_b"})
-  //
-  // (no filter specified). In this case, the projected_schema would be
-  //
-  //   {"a_plus_b": int32}
+  /// Schema of projected record batches. This is independent of dataset_schema as its
+  /// fields are derived from the projection. For example, let
+  ///
+  ///   dataset_schema = {"a": int32, "b": int32, "id": utf8}
+  ///   projection = project({equal(field_ref("a"), field_ref("b"))}, {"a_plus_b"})
+  ///
+  /// (no filter specified). In this case, the projected_schema would be
+  ///
+  ///   {"a_plus_b": int32}
   std::shared_ptr<Schema> projected_schema;
 
-  // Maximum row count for scanned batches.
+  /// Maximum row count for scanned batches.
   int64_t batch_size = kDefaultBatchSize;
 
+  /// How many batches to read ahead within a file
+  ///
+  /// Set to 0 to disable batch readahead
+  ///
+  /// Note: May not be supported by all formats
+  /// Note: May not be supported by all scanners
+  /// Note: Will be ignored if use_threads is set to false
+  int32_t batch_readahead = kDefaultBatchReadahead;
+
+  /// How many files to read ahead
+  ///
+  /// Set to 0 to disable fragment readahead
+  ///
+  /// Note: May not be enforced by all scanners
+  /// Note: Will be ignored if use_threads is set to false
+  int32_t fragment_readahead = kDefaultFragmentReadahead;
+
   /// A pool from which materialized and scanned arrays will be allocated.
   MemoryPool* pool = arrow::default_memory_pool();
 
-  /// Indicate if the Scanner should make use of a ThreadPool.
+  /// IOContext for any IO tasks
+  ///
+  /// Note: The IOContext executor will be ignored if use_threads is set to false
+  io::IOContext io_context;
+
+  /// If true the scanner will scan in parallel
+  ///
+  /// Note: If true, this will use threads from both the cpu_executor and the
+  /// io_context.executor
+  /// Note: This  must be true in order for any readahead to happen
   bool use_threads = false;
 
+  /// If true then an asycnhronous implementation of the scanner will be used.
+  /// This implementation is newer and generally performs better.  However, it
+  /// makes extensive use of threading and is still considered experimental
+  bool use_async = false;
+
   /// Fragment-specific scan options.
   std::shared_ptr<FragmentScanOptions> fragment_scan_options;
 
@@ -92,7 +136,7 @@ struct ARROW_DS_EXPORT ScanOptions {
   // sub-selection optimization.
   std::vector<std::string> MaterializedFields() const;
 
-  /// Return a threaded or serial TaskGroup according to use_threads.
+  // Return a threaded or serial TaskGroup according to use_threads.
   std::shared_ptr<internal::TaskGroup> TaskGroup() const;
 };
 
@@ -105,8 +149,9 @@ class ARROW_DS_EXPORT ScanTask {
   /// resulting from the Scan. Execution semantics are encapsulated in the
   /// particular ScanTask implementation
   virtual Result<RecordBatchIterator> Execute() = 0;
-  virtual Result<RecordBatchGenerator> ExecuteAsync(internal::Executor* cpu_executor);
-  virtual bool supports_async() const;
+  virtual Future<RecordBatchVector> SafeExecute(internal::Executor* executor);
+  virtual Future<> SafeVisit(internal::Executor* executor,
+                             std::function<Status(std::shared_ptr<RecordBatch>)> visitor);
 
   virtual ~ScanTask() = default;
 
@@ -121,68 +166,148 @@ class ARROW_DS_EXPORT ScanTask {
   std::shared_ptr<Fragment> fragment_;
 };
 
-/// \brief A trivial ScanTask that yields the RecordBatch of an array.
-class ARROW_DS_EXPORT InMemoryScanTask : public ScanTask {
- public:
-  InMemoryScanTask(std::vector<std::shared_ptr<RecordBatch>> record_batches,
-                   std::shared_ptr<ScanOptions> options,
-                   std::shared_ptr<Fragment> fragment)
-      : ScanTask(std::move(options), std::move(fragment)),
-        record_batches_(std::move(record_batches)) {}
+/// \brief Combines a record batch with the fragment that the record batch originated
+/// from
+///
+/// Knowing the source fragment can be useful for debugging & understanding loaded data
+struct TaggedRecordBatch {
+  std::shared_ptr<RecordBatch> record_batch;
+  std::shared_ptr<Fragment> fragment;
+};
+using TaggedRecordBatchGenerator = std::function<Future<TaggedRecordBatch>()>;
+using TaggedRecordBatchIterator = Iterator<TaggedRecordBatch>;
 
-  Result<RecordBatchIterator> Execute() override;
+/// \brief Combines a tagged batch with positional information
+///
+/// This is returned when scanning batches in an unordered fashion.  This information is
+/// needed if you ever want to reassemble the batches in order
+struct EnumeratedRecordBatch {
+  Enumerated<std::shared_ptr<RecordBatch>> record_batch;
+  Enumerated<std::shared_ptr<Fragment>> fragment;
+};
+using EnumeratedRecordBatchGenerator = std::function<Future<EnumeratedRecordBatch>()>;
+using EnumeratedRecordBatchIterator = Iterator<EnumeratedRecordBatch>;
 
- protected:
-  std::vector<std::shared_ptr<RecordBatch>> record_batches_;
+/// @}
+
+}  // namespace dataset
+
+template <>
+struct IterationTraits<dataset::TaggedRecordBatch> {
+  static dataset::TaggedRecordBatch End() {
+    return dataset::TaggedRecordBatch{NULLPTR, NULLPTR};
+  }
+  static bool IsEnd(const dataset::TaggedRecordBatch& val) {
+    return val.record_batch == NULLPTR;
+  }
 };
 
-ARROW_DS_EXPORT Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
-    std::vector<std::shared_ptr<RecordBatch>> batches,
-    std::shared_ptr<ScanOptions> options);
+template <>
+struct IterationTraits<dataset::EnumeratedRecordBatch> {
+  static dataset::EnumeratedRecordBatch End() {
+    return dataset::EnumeratedRecordBatch{
+        IterationEnd<Enumerated<std::shared_ptr<RecordBatch>>>(),
+        IterationEnd<Enumerated<std::shared_ptr<dataset::Fragment>>>()};
+  }
+  static bool IsEnd(const dataset::EnumeratedRecordBatch& val) {
+    return IsIterationEnd(val.fragment);
+  }
+};
+
+namespace dataset {
+
+/// \defgroup dataset-scanning Scanning API
+///
+/// @{
 
-/// \brief Scanner is a materialized scan operation with context and options
-/// bound. A scanner is the class that glues ScanTask, Fragment,
-/// and Dataset. In python pseudo code, it performs the following:
+/// \brief A scanner glues together several dataset classes to load in data.
+/// The dataset contains a collection of fragments and partitioning rules.
 ///
-///  def Scan():
-///    for fragment in self.dataset.GetFragments(this.options.filter):
-///      for scan_task in fragment.Scan(this.options):
-///        yield scan_task
+/// The fragments identify independently loadable units of data (i.e. each fragment has
+/// a potentially unique schema and possibly even format.  It should be possible to read
+/// fragments in parallel if desired).
+///
+/// The fragment's format contains the logic necessary to actually create a task to load
+/// the fragment into memory.  That task may or may not support parallel execution of
+/// its own.
+///
+/// The scanner is then responsible for creating scan tasks from every fragment in the
+/// dataset and (potentially) sequencing the loaded record batches together.
+///
+/// The scanner should not buffer the entire dataset in memory (unless asked) instead
+/// yielding record batches as soon as they are ready to scan.  Various readahead
+/// properties control how much data is allowed to be scanned before pausing to let a
+/// slow consumer catchup.
+///
+/// Today the scanner also handles projection & filtering although that may change in
+/// the future.
 class ARROW_DS_EXPORT Scanner {
  public:
-  Scanner(std::shared_ptr<Dataset> dataset, std::shared_ptr<ScanOptions> scan_options)
-      : dataset_(std::move(dataset)), scan_options_(std::move(scan_options)) {}
-
-  Scanner(std::shared_ptr<Fragment> fragment, std::shared_ptr<ScanOptions> scan_options)
-      : fragment_(std::move(fragment)), scan_options_(std::move(scan_options)) {}
+  virtual ~Scanner() = default;
 
   /// \brief The Scan operator returns a stream of ScanTask. The caller is
   /// responsible to dispatch/schedule said tasks. Tasks should be safe to run
   /// in a concurrent fashion and outlive the iterator.
-  Result<ScanTaskIterator> Scan();
-
+  ///
+  /// Note: Not supported by the async scanner
+  /// Planned for removal from the public API in ARROW-11782.
+  ARROW_DEPRECATED("Deprecated in 4.0.0 for removal in 5.0.0. Use ScanBatches().")
+  virtual Result<ScanTaskIterator> Scan();
+
+  /// \brief Apply a visitor to each RecordBatch as it is scanned. If multiple threads
+  /// are used (via use_threads), the visitor will be invoked from those threads and is
+  /// responsible for any synchronization.
+  virtual Status Scan(std::function<Status(TaggedRecordBatch)> visitor) = 0;
   /// \brief Convert a Scanner into a Table.
   ///
   /// Use this convenience utility with care. This will serially materialize the
   /// Scan result in memory before creating the Table.
-  Result<std::shared_ptr<Table>> ToTable();
-
-  /// \brief GetFragments returns an iterator over all Fragments in this scan.
-  Result<FragmentIterator> GetFragments();
-
-  const std::shared_ptr<Schema>& schema() const {
-    return scan_options_->projected_schema;
-  }
-
+  virtual Result<std::shared_ptr<Table>> ToTable() = 0;
+  /// \brief Scan the dataset into a stream of record batches.  Each batch is tagged
+  /// with the fragment it originated from.  The batches will arrive in order.  The
+  /// order of fragments is determined by the dataset.
+  ///
+  /// Note: The scanner will perform some readahead but will avoid materializing too
+  /// much in memory (this is goverended by the readahead options and use_threads option).
+  /// If the readahead queue fills up then I/O will pause until the calling thread catches
+  /// up.
+  virtual Result<TaggedRecordBatchIterator> ScanBatches() = 0;
+  virtual Result<TaggedRecordBatchGenerator> ScanBatchesAsync() = 0;
+  /// \brief Scan the dataset into a stream of record batches.  Unlike ScanBatches this
+  /// method may allow record batches to be returned out of order.  This allows for more
+  /// efficient scanning: some fragments may be accessed more quickly than others (e.g.
+  /// may be cached in RAM or just happen to get scheduled earlier by the I/O)
+  ///
+  /// To make up for the out-of-order iteration each batch is further tagged with
+  /// positional information.
+  virtual Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered();
+  virtual Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() = 0;
+  /// \brief A convenience to synchronously load the given rows by index.
+  ///
+  /// Will only consume as many batches as needed from ScanBatches().
+  virtual Result<std::shared_ptr<Table>> TakeRows(const Array& indices);
+  /// \brief Get the first N rows.
+  virtual Result<std::shared_ptr<Table>> Head(int64_t num_rows);
+  /// \brief Count rows matching a predicate.
+  ///
+  /// This method will push down the predicate and compute the result based on fragment
+  /// metadata if possible.
+  virtual Result<int64_t> CountRows();
+  /// \brief Convert the Scanner to a RecordBatchReader so it can be
+  /// easily used with APIs that expect a reader.
+  Result<std::shared_ptr<RecordBatchReader>> ToRecordBatchReader();
+
+  /// \brief Get the options for this scan.
   const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
 
  protected:
-  Future<std::shared_ptr<Table>> ToTableInternal(internal::Executor* cpu_executor);
+  explicit Scanner(std::shared_ptr<ScanOptions> scan_options)
+      : scan_options_(std::move(scan_options)) {}
 
-  std::shared_ptr<Dataset> dataset_;
-  // TODO(ARROW-8065) remove fragment_ after a Dataset is constuctible from fragments
-  std::shared_ptr<Fragment> fragment_;
-  std::shared_ptr<ScanOptions> scan_options_;
+  Result<EnumeratedRecordBatchIterator> AddPositioningToInOrderScan(
+      TaggedRecordBatchIterator scan);
+
+  const std::shared_ptr<ScanOptions> scan_options_;
 };
 
 /// \brief ScannerBuilder is a factory class to construct a Scanner. It is used
@@ -198,6 +323,14 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ScannerBuilder(std::shared_ptr<Schema> schema, std::shared_ptr<Fragment> fragment,
                  std::shared_ptr<ScanOptions> scan_options);
 
+  /// \brief Make a scanner from a record batch reader.
+  ///
+  /// The resulting scanner can be scanned only once. This is intended
+  /// to support writing data from streaming sources or other sources
+  /// that can be iterated only once.
+  static std::shared_ptr<ScannerBuilder> FromRecordBatchReader(
+      std::shared_ptr<RecordBatchReader> reader);
+
   /// \brief Set the subset of columns to materialize.
   ///
   /// Columns which are not referenced may not be read from fragments.
@@ -209,7 +342,8 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///         Schema.
   Status Project(std::vector<std::string> columns);
 
-  /// \brief Set expressions which will be evaluated to produce the materialized columns.
+  /// \brief Set expressions which will be evaluated to produce the materialized
+  /// columns.
   ///
   /// Columns which are not referenced may not be read from fragments.
   ///
@@ -218,7 +352,7 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///
   /// \return Failure if any referenced column does not exists in the dataset's
   ///         Schema.
-  Status Project(std::vector<Expression> exprs, std::vector<std::string> names);
+  Status Project(std::vector<compute::Expression> exprs, std::vector<std::string> names);
 
   /// \brief Set the filter expression to return only rows matching the filter.
   ///
@@ -231,12 +365,23 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///
   /// \return Failure if any referenced columns does not exist in the dataset's
   ///         Schema.
-  Status Filter(const Expression& filter);
+  Status Filter(const compute::Expression& filter);
 
   /// \brief Indicate if the Scanner should make use of the available
   ///        ThreadPool found in ScanOptions;
   Status UseThreads(bool use_threads = true);
 
+  /// \brief Limit how many fragments the scanner will read at once
+  ///
+  /// Note: This is only enforced in "async" mode
+  Status FragmentReadahead(int fragment_readahead);
+
+  /// \brief Indicate if the Scanner should run in experimental "async" mode
+  ///
+  /// This mode should have considerably better performance on high-latency or parallel
+  /// filesystems but is still experimental
+  Status UseAsync(bool use_async = true);
+
   /// \brief Set the maximum number of rows per RecordBatch.
   ///
   /// \param[in] batch_size the maximum number of rows.
@@ -255,11 +400,49 @@ class ARROW_DS_EXPORT ScannerBuilder {
   Result<std::shared_ptr<Scanner>> Finish();
 
   const std::shared_ptr<Schema>& schema() const;
+  const std::shared_ptr<Schema>& projected_schema() const;
 
  private:
   std::shared_ptr<Dataset> dataset_;
-  std::shared_ptr<Fragment> fragment_;
-  std::shared_ptr<ScanOptions> scan_options_;
+  std::shared_ptr<ScanOptions> scan_options_ = std::make_shared<ScanOptions>();
+};
+
+/// \brief Construct a source ExecNode which yields batches from a dataset scan.
+///
+/// Does not construct associated filter or project nodes.
+/// Yielded batches will be augmented with fragment/batch indices to enable stable
+/// ordering for simple ExecPlans.
+ARROW_DS_EXPORT Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan*,
+                                                        std::shared_ptr<Dataset>,
+                                                        std::shared_ptr<ScanOptions>);
+
+/// \brief Construct a ProjectNode which preserves fragment/batch indices.
+ARROW_DS_EXPORT Result<compute::ExecNode*> MakeAugmentedProjectNode(
+    compute::ExecNode* input, std::string label, std::vector<compute::Expression> exprs,
+    std::vector<std::string> names = {});
+
+/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
+///
+/// Emitted batches will be ordered by fragment and batch indices, or an error
+/// will be raised if those fields are not available in the input.
+ARROW_DS_EXPORT Result<AsyncGenerator<util::optional<compute::ExecBatch>>>
+MakeOrderedSinkNode(compute::ExecNode*, std::string label);
+
+/// @}
+
+/// \brief A trivial ScanTask that yields the RecordBatch of an array.
+class ARROW_DS_EXPORT InMemoryScanTask : public ScanTask {
+ public:
+  InMemoryScanTask(std::vector<std::shared_ptr<RecordBatch>> record_batches,
+                   std::shared_ptr<ScanOptions> options,
+                   std::shared_ptr<Fragment> fragment)
+      : ScanTask(std::move(options), std::move(fragment)),
+        record_batches_(std::move(record_batches)) {}
+
+  Result<RecordBatchIterator> Execute() override;
+
+ protected:
+  std::vector<std::shared_ptr<RecordBatch>> record_batches_;
 };
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index d334c094d31..a7ba070b2cf 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -38,42 +38,18 @@ using internal::Executor;
 
 namespace dataset {
 
-// TODO(ARROW-7001) This synchronous version is no longer needed, can use async version
-// regardless of sync/async of source
-inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it, Expression filter,
-                                             MemoryPool* pool) {
-  return MakeMaybeMapIterator(
-      [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
-        compute::ExecContext exec_context{pool};
-        ARROW_ASSIGN_OR_RAISE(Datum mask,
-                              ExecuteScalarExpression(filter, Datum(in), &exec_context));
-
-        if (mask.is_scalar()) {
-          const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
-          if (mask_scalar.is_valid && mask_scalar.value) {
-            return std::move(in);
-          }
-          return in->Slice(0, 0);
-        }
-
-        ARROW_ASSIGN_OR_RAISE(
-            Datum filtered,
-            compute::Filter(in, mask, compute::FilterOptions::Defaults(), &exec_context));
-        return filtered.record_batch();
-      },
-      std::move(it));
-}
-
-inline Result<std::shared_ptr<RecordBatch>> DoFilterRecordBatch(
-    const Expression& filter, MemoryPool* pool, const std::shared_ptr<RecordBatch>& in) {
-  compute::ExecContext exec_context{pool};
-  ARROW_ASSIGN_OR_RAISE(Datum mask,
-                        ExecuteScalarExpression(filter, Datum(in), &exec_context));
+inline Result<std::shared_ptr<RecordBatch>> FilterSingleBatch(
+    const std::shared_ptr<RecordBatch>& in, const compute::Expression& filter,
+    const std::shared_ptr<ScanOptions>& options) {
+  compute::ExecContext exec_context{options->pool};
+  ARROW_ASSIGN_OR_RAISE(
+      Datum mask,
+      ExecuteScalarExpression(filter, *options->dataset_schema, in, &exec_context));
 
   if (mask.is_scalar()) {
     const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
     if (mask_scalar.is_valid && mask_scalar.value) {
-      return std::move(in);
+      return in;
     }
     return in->Slice(0, 0);
   }
@@ -84,52 +60,29 @@ inline Result<std::shared_ptr<RecordBatch>> DoFilterRecordBatch(
   return filtered.record_batch();
 }
 
-inline RecordBatchGenerator FilterRecordBatch(RecordBatchGenerator rbs, Expression filter,
-                                              MemoryPool* pool) {
-  // TODO(ARROW-7001) This changes to auto
-  std::function<Result<std::shared_ptr<RecordBatch>>(const std::shared_ptr<RecordBatch>&)>
-      mapper = [=](const std::shared_ptr<RecordBatch>& in) {
-        return DoFilterRecordBatch(filter, pool, in);
-      };
-  return MakeMappedGenerator(std::move(rbs), mapper);
-}
-
-// TODO(ARROW-7001) This synchronous version is no longer needed, all branches use async
-// version
-inline RecordBatchIterator ProjectRecordBatch(RecordBatchIterator it,
-                                              Expression projection, MemoryPool* pool) {
+inline RecordBatchIterator FilterRecordBatch(
+    RecordBatchIterator it, compute::Expression filter,
+    const std::shared_ptr<ScanOptions>& options) {
   return MakeMaybeMapIterator(
       [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
-        compute::ExecContext exec_context{pool};
-        ARROW_ASSIGN_OR_RAISE(Datum projected, ExecuteScalarExpression(
-                                                   projection, Datum(in), &exec_context));
-
-        DCHECK_EQ(projected.type()->id(), Type::STRUCT);
-        if (projected.shape() == ValueDescr::SCALAR) {
-          // Only virtual columns are projected. Broadcast to an array
-          ARROW_ASSIGN_OR_RAISE(
-              projected, MakeArrayFromScalar(*projected.scalar(), in->num_rows(), pool));
-        }
-
-        ARROW_ASSIGN_OR_RAISE(
-            auto out, RecordBatch::FromStructArray(projected.array_as<StructArray>()));
-
-        return out->ReplaceSchemaMetadata(in->schema()->metadata());
+        return FilterSingleBatch(in, filter, options);
       },
       std::move(it));
 }
 
-inline Result<std::shared_ptr<RecordBatch>> DoProjectRecordBatch(
-    const Expression& projection, MemoryPool* pool,
-    const std::shared_ptr<RecordBatch>& in) {
-  compute::ExecContext exec_context{pool};
-  ARROW_ASSIGN_OR_RAISE(Datum projected,
-                        ExecuteScalarExpression(projection, Datum(in), &exec_context));
+inline Result<std::shared_ptr<RecordBatch>> ProjectSingleBatch(
+    const std::shared_ptr<RecordBatch>& in, const compute::Expression& projection,
+    const std::shared_ptr<ScanOptions>& options) {
+  compute::ExecContext exec_context{options->pool};
+  ARROW_ASSIGN_OR_RAISE(
+      Datum projected,
+      ExecuteScalarExpression(projection, *options->dataset_schema, in, &exec_context));
+
   DCHECK_EQ(projected.type()->id(), Type::STRUCT);
   if (projected.shape() == ValueDescr::SCALAR) {
     // Only virtual columns are projected. Broadcast to an array
-    ARROW_ASSIGN_OR_RAISE(projected,
-                          MakeArrayFromScalar(*projected.scalar(), in->num_rows(), pool));
+    ARROW_ASSIGN_OR_RAISE(projected, MakeArrayFromScalar(*projected.scalar(),
+                                                         in->num_rows(), options->pool));
   }
 
   ARROW_ASSIGN_OR_RAISE(auto out,
@@ -138,67 +91,90 @@ inline Result<std::shared_ptr<RecordBatch>> DoProjectRecordBatch(
   return out->ReplaceSchemaMetadata(in->schema()->metadata());
 }
 
-inline RecordBatchGenerator ProjectRecordBatch(RecordBatchGenerator rbs,
-                                               Expression projection, MemoryPool* pool) {
-  // TODO(ARROW-7001) This changes to auto
-  std::function<Result<std::shared_ptr<RecordBatch>>(const std::shared_ptr<RecordBatch>&)>
-      mapper = [=](const std::shared_ptr<RecordBatch>& in) {
-        return DoProjectRecordBatch(projection, pool, in);
-      };
-  return MakeMappedGenerator(std::move(rbs), mapper);
+inline RecordBatchIterator ProjectRecordBatch(
+    RecordBatchIterator it, compute::Expression projection,
+    const std::shared_ptr<ScanOptions>& options) {
+  return MakeMaybeMapIterator(
+      [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
+        return ProjectSingleBatch(in, projection, options);
+      },
+      std::move(it));
 }
 
 class FilterAndProjectScanTask : public ScanTask {
  public:
-  explicit FilterAndProjectScanTask(std::shared_ptr<ScanTask> task, Expression partition)
+  explicit FilterAndProjectScanTask(std::shared_ptr<ScanTask> task,
+                                    compute::Expression partition)
       : ScanTask(task->options(), task->fragment()),
         task_(std::move(task)),
         partition_(std::move(partition)) {}
 
-  bool supports_async() const override { return task_->supports_async(); }
-
-  Result<RecordBatchIterator> ExecuteSync() {
+  Result<RecordBatchIterator> Execute() override {
     ARROW_ASSIGN_OR_RAISE(auto it, task_->Execute());
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
                           SimplifyWithGuarantee(options()->filter, partition_));
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
                           SimplifyWithGuarantee(options()->projection, partition_));
 
     RecordBatchIterator filter_it =
-        FilterRecordBatch(std::move(it), simplified_filter, options_->pool);
+        FilterRecordBatch(std::move(it), simplified_filter, options_);
 
-    return ProjectRecordBatch(std::move(filter_it), simplified_projection,
-                              options_->pool);
+    return ProjectRecordBatch(std::move(filter_it), simplified_projection, options_);
   }
 
-  Result<RecordBatchIterator> Execute() override { return ExecuteSync(); }
+  Result<RecordBatchIterator> ToFilteredAndProjectedIterator(
+      const RecordBatchVector& rbs) {
+    auto it = MakeVectorIterator(rbs);
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
+                          SimplifyWithGuarantee(options()->filter, partition_));
 
-  Result<RecordBatchGenerator> ExecuteAsync(Executor* cpu_executor) override {
-    if (!task_->supports_async()) {
-      return Status::Invalid(
-          "ExecuteAsync should not have been called on FilterAndProjectScanTask if the "
-          "source task did not support async");
-    }
-    ARROW_ASSIGN_OR_RAISE(auto gen, task_->ExecuteAsync(cpu_executor));
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
+                          SimplifyWithGuarantee(options()->projection, partition_));
+
+    RecordBatchIterator filter_it =
+        FilterRecordBatch(std::move(it), simplified_filter, options_);
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+    return ProjectRecordBatch(std::move(filter_it), simplified_projection, options_);
+  }
+
+  Result<std::shared_ptr<RecordBatch>> FilterAndProjectBatch(
+      const std::shared_ptr<RecordBatch>& batch) {
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
                           SimplifyWithGuarantee(options()->filter, partition_));
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
                           SimplifyWithGuarantee(options()->projection, partition_));
+    ARROW_ASSIGN_OR_RAISE(auto filtered,
+                          FilterSingleBatch(batch, simplified_filter, options_));
+    return ProjectSingleBatch(filtered, simplified_projection, options_);
+  }
 
-    RecordBatchGenerator filter_gen =
-        FilterRecordBatch(std::move(gen), simplified_filter, options_->pool);
+  inline Future<RecordBatchVector> SafeExecute(internal::Executor* executor) override {
+    return task_->SafeExecute(executor).Then(
+        // This should only be run via SerialExecutor so it should be safe to capture
+        // `this`
+        [this](const RecordBatchVector& rbs) -> Result<RecordBatchVector> {
+          ARROW_ASSIGN_OR_RAISE(auto projected_it, ToFilteredAndProjectedIterator(rbs));
+          return projected_it.ToVector();
+        });
+  }
 
-    return ProjectRecordBatch(std::move(filter_gen), simplified_projection,
-                              options_->pool);
+  inline Future<> SafeVisit(
+      internal::Executor* executor,
+      std::function<Status(std::shared_ptr<RecordBatch>)> visitor) override {
+    auto filter_and_project_visitor =
+        [this, visitor](const std::shared_ptr<RecordBatch>& batch) {
+          ARROW_ASSIGN_OR_RAISE(auto projected, FilterAndProjectBatch(batch));
+          return visitor(projected);
+        };
+    return task_->SafeVisit(executor, filter_and_project_visitor);
   }
 
  private:
   std::shared_ptr<ScanTask> task_;
-  Expression partition_;
+  compute::Expression partition_;
 };
 
 /// \brief GetScanTaskIterator transforms an Iterator<Fragment> in a
@@ -233,7 +209,7 @@ inline Status NestedFieldRefsNotImplemented() {
   return Status::NotImplemented("Nested field references in scans.");
 }
 
-inline Status SetProjection(ScanOptions* options, const Expression& projection) {
+inline Status SetProjection(ScanOptions* options, const compute::Expression& projection) {
   ARROW_ASSIGN_OR_RAISE(options->projection, projection.Bind(*options->dataset_schema));
 
   if (options->projection.type()->id() != Type::STRUCT) {
@@ -247,9 +223,9 @@ inline Status SetProjection(ScanOptions* options, const Expression& projection)
   return Status::OK();
 }
 
-inline Status SetProjection(ScanOptions* options, std::vector<Expression> exprs,
+inline Status SetProjection(ScanOptions* options, std::vector<compute::Expression> exprs,
                             std::vector<std::string> names) {
-  compute::ProjectOptions project_options{std::move(names)};
+  compute::MakeStructOptions project_options{std::move(names)};
 
   for (size_t i = 0; i < exprs.size(); ++i) {
     if (auto ref = exprs[i].field_ref()) {
@@ -263,18 +239,18 @@ inline Status SetProjection(ScanOptions* options, std::vector<Expression> exprs,
   }
 
   return SetProjection(options,
-                       call("project", std::move(exprs), std::move(project_options)));
+                       call("make_struct", std::move(exprs), std::move(project_options)));
 }
 
 inline Status SetProjection(ScanOptions* options, std::vector<std::string> names) {
-  std::vector<Expression> exprs(names.size());
+  std::vector<compute::Expression> exprs(names.size());
   for (size_t i = 0; i < exprs.size(); ++i) {
-    exprs[i] = field_ref(names[i]);
+    exprs[i] = compute::field_ref(names[i]);
   }
   return SetProjection(options, std::move(exprs), std::move(names));
 }
 
-inline Status SetFilter(ScanOptions* options, const Expression& filter) {
+inline Status SetFilter(ScanOptions* options, const compute::Expression& filter) {
   for (const auto& ref : FieldsInExpression(filter)) {
     if (!ref.name()) return NestedFieldRefsNotImplemented();
 
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index eec8ed21668..34fa1486ef2 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -18,70 +18,161 @@
 #include "arrow/dataset/scanner.h"
 
 #include <memory>
+#include <utility>
 
+#include <gmock/gmock.h>
+
+#include "arrow/compute/api.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/cast.h"
+#include "arrow/compute/exec/exec_plan.h"
 #include "arrow/dataset/scanner_internal.h"
 #include "arrow/dataset/test_util.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/generator.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/range.h"
+#include "arrow/util/vector.h"
 
 using testing::ElementsAre;
 using testing::IsEmpty;
+using testing::UnorderedElementsAreArray;
 
 namespace arrow {
 namespace dataset {
 
-constexpr int64_t kNumberChildDatasets = 2;
-constexpr int64_t kNumberBatches = 16;
-constexpr int64_t kBatchSize = 1024;
+struct TestScannerParams {
+  bool use_async;
+  bool use_threads;
+  int num_child_datasets;
+  int num_batches;
+  int items_per_batch;
+
+  std::string ToString() const {
+    // GTest requires this to be alphanumeric
+    std::stringstream ss;
+    ss << (use_async ? "Async" : "Sync") << (use_threads ? "Threaded" : "Serial")
+       << num_child_datasets << "d" << num_batches << "b" << items_per_batch << "r";
+    return ss.str();
+  }
+
+  static std::string ToTestNameString(
+      const ::testing::TestParamInfo<TestScannerParams>& info) {
+    return std::to_string(info.index) + info.param.ToString();
+  }
 
-class TestScanner : public DatasetFixtureMixin {
+  static std::vector<TestScannerParams> Values() {
+    std::vector<TestScannerParams> values;
+    for (int sync = 0; sync < 2; sync++) {
+      for (int use_threads = 0; use_threads < 2; use_threads++) {
+        values.push_back(
+            {static_cast<bool>(sync), static_cast<bool>(use_threads), 1, 1, 1024});
+        values.push_back(
+            {static_cast<bool>(sync), static_cast<bool>(use_threads), 2, 16, 1024});
+      }
+    }
+    return values;
+  }
+};
+
+std::ostream& operator<<(std::ostream& out, const TestScannerParams& params) {
+  out << (params.use_async ? "async-" : "sync-")
+      << (params.use_threads ? "threaded-" : "serial-") << params.num_child_datasets
+      << "d-" << params.num_batches << "b-" << params.items_per_batch << "i";
+  return out;
+}
+
+class TestScanner : public DatasetFixtureMixinWithParam<TestScannerParams> {
  protected:
-  Scanner MakeScanner(std::shared_ptr<RecordBatch> batch) {
-    std::vector<std::shared_ptr<RecordBatch>> batches{static_cast<size_t>(kNumberBatches),
-                                                      batch};
+  std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<Dataset> dataset) {
+    ScannerBuilder builder(std::move(dataset), options_);
+    ARROW_EXPECT_OK(builder.UseThreads(GetParam().use_threads));
+    ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    return scanner;
+  }
 
-    DatasetVector children{static_cast<size_t>(kNumberChildDatasets),
+  std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<RecordBatch> batch) {
+    std::vector<std::shared_ptr<RecordBatch>> batches{
+        static_cast<size_t>(GetParam().num_batches), batch};
+
+    DatasetVector children{static_cast<size_t>(GetParam().num_child_datasets),
                            std::make_shared<InMemoryDataset>(batch->schema(), batches)};
 
     EXPECT_OK_AND_ASSIGN(auto dataset, UnionDataset::Make(batch->schema(), children));
-
-    return Scanner{dataset, options_};
+    return MakeScanner(std::move(dataset));
   }
 
   void AssertScannerEqualsRepetitionsOf(
-      Scanner scanner, std::shared_ptr<RecordBatch> batch,
-      const int64_t total_batches = kNumberChildDatasets * kNumberBatches) {
+      std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
+      const int64_t total_batches = GetParam().num_child_datasets *
+                                    GetParam().num_batches) {
     auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
 
     // Verifies that the unified BatchReader is equivalent to flattening all the
     // structures of the scanner, i.e. Scanner[Dataset[ScanTask[RecordBatch]]]
-    AssertScannerEquals(expected.get(), &scanner);
+    AssertScannerEquals(expected.get(), scanner.get());
+  }
+
+  void AssertScanBatchesEqualRepetitionsOf(
+      std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
+      const int64_t total_batches = GetParam().num_child_datasets *
+                                    GetParam().num_batches) {
+    auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
+
+    AssertScanBatchesEquals(expected.get(), scanner.get());
+  }
+
+  void AssertScanBatchesUnorderedEqualRepetitionsOf(
+      std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
+      const int64_t total_batches = GetParam().num_child_datasets *
+                                    GetParam().num_batches) {
+    auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
+
+    AssertScanBatchesUnorderedEquals(expected.get(), scanner.get(), 1);
   }
 };
 
-TEST_F(TestScanner, Scan) {
+TEST_P(TestScanner, Scan) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  AssertScannerEqualsRepetitionsOf(MakeScanner(batch), batch);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch), batch);
 }
 
-TEST_F(TestScanner, ScanWithCappedBatchSize) {
+TEST_P(TestScanner, ScanBatches) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  options_->batch_size = kBatchSize / 2;
-  auto expected = batch->Slice(kBatchSize / 2);
-  AssertScannerEqualsRepetitionsOf(MakeScanner(batch), expected,
-                                   kNumberChildDatasets * kNumberBatches * 2);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  AssertScanBatchesEqualRepetitionsOf(MakeScanner(batch), batch);
 }
 
-TEST_F(TestScanner, FilteredScan) {
+TEST_P(TestScanner, ScanBatchesUnordered) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch), batch);
+}
+
+TEST_P(TestScanner, ScanWithCappedBatchSize) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  options_->batch_size = GetParam().items_per_batch / 2;
+  auto expected = batch->Slice(GetParam().items_per_batch / 2);
+  AssertScanBatchesEqualRepetitionsOf(
+      MakeScanner(batch), expected,
+      GetParam().num_child_datasets * GetParam().num_batches * 2);
+}
+
+TEST_P(TestScanner, FilteredScan) {
   SetSchema({field("f64", float64())});
 
   double value = 0.5;
   ASSERT_OK_AND_ASSIGN(auto f64,
-                       ArrayFromBuilderVisitor(float64(), kBatchSize, kBatchSize / 2,
+                       ArrayFromBuilderVisitor(float64(), GetParam().items_per_batch,
+                                               GetParam().items_per_batch / 2,
                                                [&](DoubleBuilder* builder) {
                                                  builder->UnsafeAppend(value);
                                                  builder->UnsafeAppend(-value);
@@ -93,77 +184,797 @@ TEST_F(TestScanner, FilteredScan) {
   auto batch = RecordBatch::Make(schema_, f64->length(), {f64});
 
   value = 0.5;
-  ASSERT_OK_AND_ASSIGN(
-      auto f64_filtered,
-      ArrayFromBuilderVisitor(float64(), kBatchSize / 2, [&](DoubleBuilder* builder) {
-        builder->UnsafeAppend(value);
-        value += 1.0;
-      }));
+  ASSERT_OK_AND_ASSIGN(auto f64_filtered,
+                       ArrayFromBuilderVisitor(float64(), GetParam().items_per_batch / 2,
+                                               [&](DoubleBuilder* builder) {
+                                                 builder->UnsafeAppend(value);
+                                                 value += 1.0;
+                                               }));
 
   auto filtered_batch =
       RecordBatch::Make(schema_, f64_filtered->length(), {f64_filtered});
 
-  AssertScannerEqualsRepetitionsOf(MakeScanner(batch), filtered_batch);
+  AssertScanBatchesEqualRepetitionsOf(MakeScanner(batch), filtered_batch);
 }
 
-TEST_F(TestScanner, MaterializeMissingColumn) {
+TEST_P(TestScanner, ProjectedScan) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch_missing_f64 =
-      ConstantArrayGenerator::Zeroes(kBatchSize, schema({field("i32", int32())}));
+  SetProjectedColumns({"i32"});
+  auto batch_in = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  auto batch_out = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch,
+                                                  schema({field("i32", int32())}));
+  AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch_in), batch_out);
+}
+
+TEST_P(TestScanner, MaterializeMissingColumn) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch_missing_f64 = ConstantArrayGenerator::Zeroes(
+      GetParam().items_per_batch, schema({field("i32", int32())}));
 
   auto fragment_missing_f64 = std::make_shared<InMemoryFragment>(
-      RecordBatchVector{static_cast<size_t>(kNumberChildDatasets * kNumberBatches),
-                        batch_missing_f64},
+      RecordBatchVector{
+          static_cast<size_t>(GetParam().num_child_datasets * GetParam().num_batches),
+          batch_missing_f64},
       equal(field_ref("f64"), literal(2.5)));
 
-  ASSERT_OK_AND_ASSIGN(auto f64, ArrayFromBuilderVisitor(float64(), kBatchSize,
-                                                         [&](DoubleBuilder* builder) {
-                                                           builder->UnsafeAppend(2.5);
-                                                         }));
+  ASSERT_OK_AND_ASSIGN(auto f64,
+                       ArrayFromBuilderVisitor(
+                           float64(), GetParam().items_per_batch,
+                           [&](DoubleBuilder* builder) { builder->UnsafeAppend(2.5); }));
   auto batch_with_f64 =
       RecordBatch::Make(schema_, f64->length(), {batch_missing_f64->column(0), f64});
 
-  ScannerBuilder builder{schema_, fragment_missing_f64, options_};
-  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-
-  AssertScannerEqualsRepetitionsOf(*scanner, batch_with_f64);
+  FragmentVector fragments{fragment_missing_f64};
+  auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+  auto scanner = MakeScanner(std::move(dataset));
+  AssertScanBatchesEqualRepetitionsOf(scanner, batch_with_f64);
 }
 
-TEST_F(TestScanner, ToTable) {
+TEST_P(TestScanner, ToTable) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  std::vector<std::shared_ptr<RecordBatch>> batches{kNumberBatches * kNumberChildDatasets,
-                                                    batch};
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  std::vector<std::shared_ptr<RecordBatch>> batches{
+      static_cast<std::size_t>(GetParam().num_batches * GetParam().num_child_datasets),
+      batch};
 
   ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches(batches));
 
   auto scanner = MakeScanner(batch);
   std::shared_ptr<Table> actual;
 
-  options_->use_threads = false;
-  ASSERT_OK_AND_ASSIGN(actual, scanner.ToTable());
-  AssertTablesEqual(*expected, *actual);
-
   // There is no guarantee on the ordering when using multiple threads, but
   // since the RecordBatch is always the same it will pass.
-  options_->use_threads = true;
-  ASSERT_OK_AND_ASSIGN(actual, scanner.ToTable());
+  ASSERT_OK_AND_ASSIGN(actual, scanner->ToTable());
   AssertTablesEqual(*expected, *actual);
 }
 
-class TestScannerNestedParallelism : public NestedParallelismMixin {};
+TEST_P(TestScanner, ScanWithVisitor) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  auto scanner = MakeScanner(batch);
+  ASSERT_OK(scanner->Scan([batch](TaggedRecordBatch scanned_batch) {
+    AssertBatchesEqual(*batch, *scanned_batch.record_batch);
+    return Status::OK();
+  }));
+}
+
+TEST_P(TestScanner, TakeIndices) {
+  auto batch_size = GetParam().items_per_batch;
+  auto num_batches = GetParam().num_batches;
+  auto num_datasets = GetParam().num_child_datasets;
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  ArrayVector arrays(2);
+  ArrayFromVector<Int32Type>(internal::Iota<int32_t>(batch_size), &arrays[0]);
+  ArrayFromVector<DoubleType>(internal::Iota<double>(static_cast<double>(batch_size)),
+                              &arrays[1]);
+  auto batch = RecordBatch::Make(schema_, batch_size, arrays);
+
+  auto scanner = MakeScanner(batch);
+
+  std::shared_ptr<Array> indices;
+  {
+    ArrayFromVector<Int64Type>(internal::Iota(batch_size), &indices);
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches({batch}));
+    ASSERT_EQ(expected->num_rows(), batch_size);
+    AssertTablesEqual(*expected, *taken);
+  }
+  {
+    ArrayFromVector<Int64Type>({7, 5, 3, 1}, &indices);
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+    ASSERT_OK_AND_ASSIGN(auto expected, compute::Take(table, *indices));
+    ASSERT_EQ(expected.table()->num_rows(), 4);
+    AssertTablesEqual(*expected.table(), *taken);
+  }
+  if (num_batches > 1) {
+    ArrayFromVector<Int64Type>({batch_size + 2, batch_size + 1}, &indices);
+    ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto expected, compute::Take(table, *indices));
+    ASSERT_EQ(expected.table()->num_rows(), 2);
+    AssertTablesEqual(*expected.table(), *taken);
+  }
+  if (num_batches > 1) {
+    ArrayFromVector<Int64Type>({1, 3, 5, 7, batch_size + 1, 2 * batch_size + 2},
+                               &indices);
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+    ASSERT_OK_AND_ASSIGN(auto expected, compute::Take(table, *indices));
+    ASSERT_EQ(expected.table()->num_rows(), 6);
+    AssertTablesEqual(*expected.table(), *taken);
+  }
+  {
+    auto base = num_datasets * num_batches * batch_size;
+    ArrayFromVector<Int64Type>({base + 1}, &indices);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IndexError,
+        ::testing::HasSubstr("Some indices were out of bounds: " +
+                             std::to_string(base + 1)),
+        scanner->TakeRows(*indices));
+  }
+  {
+    auto base = num_datasets * num_batches * batch_size;
+    ArrayFromVector<Int64Type>(
+        {1, 2, base + 1, base + 2, base + 3, base + 4, base + 5, base + 6}, &indices);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IndexError,
+        ::testing::HasSubstr(
+            "Some indices were out of bounds: " + std::to_string(base + 1) + ", " +
+            std::to_string(base + 2) + ", " + std::to_string(base + 3) + ", ..."),
+        scanner->TakeRows(*indices));
+  }
+}
+
+TEST_P(TestScanner, CountRows) {
+  const auto items_per_batch = GetParam().items_per_batch;
+  const auto num_batches = GetParam().num_batches;
+  const auto num_datasets = GetParam().num_child_datasets;
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  ArrayVector arrays(2);
+  ArrayFromVector<Int32Type>(
+      internal::Iota<int32_t>(static_cast<int32_t>(items_per_batch)), &arrays[0]);
+  ArrayFromVector<DoubleType>(
+      internal::Iota<double>(static_cast<double>(items_per_batch)), &arrays[1]);
+  auto batch = RecordBatch::Make(schema_, items_per_batch, arrays);
+  auto scanner = MakeScanner(batch);
+
+  ASSERT_OK_AND_ASSIGN(auto rows, scanner->CountRows());
+  ASSERT_EQ(rows, num_datasets * num_batches * items_per_batch);
+
+  ASSERT_OK_AND_ASSIGN(options_->filter,
+                       greater_equal(field_ref("i32"), literal(64)).Bind(*schema_));
+  ASSERT_OK_AND_ASSIGN(rows, scanner->CountRows());
+  ASSERT_EQ(rows, num_datasets * num_batches * (items_per_batch - 64));
+}
+
+class CountRowsOnlyFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, const std::shared_ptr<ScanOptions>&) override {
+    if (compute::FieldsInExpression(predicate).size() > 0) {
+      return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+    }
+    int64_t sum = 0;
+    for (const auto& batch : record_batches_) {
+      sum += batch->num_rows();
+    }
+    return Future<util::optional<int64_t>>::MakeFinished(sum);
+  }
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions>) override {
+    return Status::Invalid("Don't scan me!");
+  }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>&) override {
+    return Status::Invalid("Don't scan me!");
+  }
+};
+
+class ScanOnlyFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, const std::shared_ptr<ScanOptions>&) override {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    auto self = shared_from_this();
+    ScanTaskVector tasks{
+        std::make_shared<InMemoryScanTask>(record_batches_, options, self)};
+    return MakeVectorIterator(std::move(tasks));
+  }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>&) override {
+    return MakeVectorGenerator(record_batches_);
+  }
+};
+
+// Ensure the pipeline does not break on an empty batch
+TEST_P(TestScanner, CountRowsEmpty) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto empty_batch = ConstantArrayGenerator::Zeroes(0, schema_);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  RecordBatchVector batches = {empty_batch, batch};
+  ScannerBuilder builder(
+      std::make_shared<FragmentDataset>(
+          schema_, FragmentVector{std::make_shared<ScanOnlyFragment>(batches)}),
+      options_);
+  ASSERT_OK(builder.UseAsync(GetParam().use_async));
+  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+  ASSERT_OK_AND_EQ(batch->num_rows(), scanner->CountRows());
+}
+
+// Regression test for ARROW-12668: ensure failures are properly handled
+class CountFailFragment : public InMemoryFragment {
+ public:
+  explicit CountFailFragment(RecordBatchVector record_batches)
+      : InMemoryFragment(std::move(record_batches)),
+        count(Future<util::optional<int64_t>>::Make()) {}
 
-TEST_F(TestScannerNestedParallelism, Scan) {
-  constexpr int NUM_BATCHES = 32;
-  RecordBatchVector batches;
-  for (int i = 0; i < NUM_BATCHES; i++) {
-    batches.push_back(ConstantArrayGenerator::Zeroes(/*size=*/1, schema_));
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression, const std::shared_ptr<ScanOptions>&) override {
+    return count;
   }
-  auto dataset = std::make_shared<NestedParallelismDataset>(schema_, std::move(batches));
-  ScannerBuilder builder{dataset, options_};
+
+  Future<util::optional<int64_t>> count;
+};
+TEST_P(TestScanner, CountRowsFailure) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  RecordBatchVector batches = {batch};
+  auto fragment1 = std::make_shared<CountFailFragment>(batches);
+  auto fragment2 = std::make_shared<CountFailFragment>(batches);
+  ScannerBuilder builder(
+      std::make_shared<FragmentDataset>(schema_, FragmentVector{fragment1, fragment2}),
+      options_);
+  ASSERT_OK(builder.UseAsync(GetParam().use_async));
+  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
   ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-  ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
-  ASSERT_EQ(table->num_rows(), NUM_BATCHES);
+  fragment1->count.MarkFinished(Status::Invalid(""));
+  // Should immediately stop the count
+  ASSERT_RAISES(Invalid, scanner->CountRows());
+  // Fragment 2 doesn't complete until after the count stops - should not break anything
+  // under ASan, etc.
+  fragment2->count.MarkFinished(util::nullopt);
+}
+
+TEST_P(TestScanner, CountRowsWithMetadata) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  RecordBatchVector batches = {batch, batch, batch, batch};
+  ScannerBuilder builder(
+      std::make_shared<FragmentDataset>(
+          schema_, FragmentVector{std::make_shared<CountRowsOnlyFragment>(batches)}),
+      options_);
+  ASSERT_OK(builder.UseAsync(GetParam().use_async));
+  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+  ASSERT_OK_AND_EQ(4 * batch->num_rows(), scanner->CountRows());
+
+  ASSERT_OK(builder.Filter(equal(field_ref("i32"), literal(5))));
+  ASSERT_OK_AND_ASSIGN(scanner, builder.Finish());
+  // Scanner should fall back on reading data and hit the error
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Don't scan me!"),
+                                  scanner->CountRows());
+}
+
+TEST_P(TestScanner, ToRecordBatchReader) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  std::vector<std::shared_ptr<RecordBatch>> batches{
+      static_cast<std::size_t>(GetParam().num_batches * GetParam().num_child_datasets),
+      batch};
+
+  ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches(batches));
+
+  std::shared_ptr<Table> actual;
+  auto scanner = MakeScanner(batch);
+  ASSERT_OK_AND_ASSIGN(auto reader, scanner->ToRecordBatchReader());
+  scanner.reset();
+  ASSERT_OK(reader->ReadAll(&actual));
+  AssertTablesEqual(*expected, *actual);
+}
+
+class FailingFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    int index = 0;
+    auto self = shared_from_this();
+    return MakeFunctionIterator([=]() mutable -> Result<std::shared_ptr<ScanTask>> {
+      if (index > 16) {
+        return Status::Invalid("Oh no, we failed!");
+      }
+      RecordBatchVector batches = {record_batches_[index++ % record_batches_.size()]};
+      return std::make_shared<InMemoryScanTask>(batches, options, self);
+    });
+  }
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    struct {
+      Future<std::shared_ptr<RecordBatch>> operator()() {
+        if (index > 16) {
+          return Status::Invalid("Oh no, we failed!");
+        }
+        auto batch = batches[index++ % batches.size()];
+        return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
+      }
+      RecordBatchVector batches;
+      int index = 0;
+    } Generator;
+    Generator.batches = record_batches_;
+    return Generator;
+  }
+};
+
+class FailingExecuteScanTask : public InMemoryScanTask {
+ public:
+  using InMemoryScanTask::InMemoryScanTask;
+
+  Result<RecordBatchIterator> Execute() override {
+    return Status::Invalid("Oh no, we failed!");
+  }
+};
+
+class FailingIterationScanTask : public InMemoryScanTask {
+ public:
+  using InMemoryScanTask::InMemoryScanTask;
+
+  Result<RecordBatchIterator> Execute() override {
+    int index = 0;
+    auto batches = record_batches_;
+    return MakeFunctionIterator(
+        [index, batches]() mutable -> Result<std::shared_ptr<RecordBatch>> {
+          if (index < 1) {
+            return batches[index++];
+          }
+          return Status::Invalid("Oh no, we failed!");
+        });
+  }
+};
+
+template <typename T>
+class FailingScanTaskFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    auto self = shared_from_this();
+    ScanTaskVector scan_tasks{std::make_shared<T>(record_batches_, options, self)};
+    return MakeVectorIterator(std::move(scan_tasks));
+  }
+
+  // Unlike the sync case, there's only two places to fail - during
+  // iteration (covered by FailingFragment) or at the initial scan
+  // (covered here)
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    return Status::Invalid("Oh no, we failed!");
+  }
+};
+
+template <typename It, typename GetBatch>
+bool CheckIteratorRaises(const RecordBatch& batch, It batch_it, GetBatch get_batch) {
+  while (true) {
+    auto maybe_batch = batch_it.Next();
+    if (maybe_batch.ok()) {
+      EXPECT_OK_AND_ASSIGN(auto scanned_batch, maybe_batch);
+      if (IsIterationEnd(scanned_batch)) break;
+      AssertBatchesEqual(batch, *get_batch(scanned_batch));
+    } else {
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
+                                      maybe_batch);
+      return true;
+    }
+  }
+  return false;
+}
+
+TEST_P(TestScanner, ScanBatchesFailure) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  RecordBatchVector batches = {batch, batch, batch, batch};
+
+  auto check_scanner = [](const RecordBatch& batch, Scanner* scanner) {
+    auto maybe_batch_it = scanner->ScanBatchesUnordered();
+    if (!maybe_batch_it.ok()) {
+      // SyncScanner can fail here as it eagerly consumes the first value
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
+                                      std::move(maybe_batch_it));
+    } else {
+      ASSERT_OK_AND_ASSIGN(auto batch_it, std::move(maybe_batch_it));
+      EXPECT_TRUE(CheckIteratorRaises(
+          batch, std::move(batch_it),
+          [](const EnumeratedRecordBatch& batch) { return batch.record_batch.value; }))
+          << "ScanBatchesUnordered() did not raise an error";
+    }
+    ASSERT_OK_AND_ASSIGN(auto tagged_batch_it, scanner->ScanBatches());
+    EXPECT_TRUE(CheckIteratorRaises(
+        batch, std::move(tagged_batch_it),
+        [](const TaggedRecordBatch& batch) { return batch.record_batch; }))
+        << "ScanBatches() did not raise an error";
+  };
+
+  // Case 1: failure when getting next scan task
+  {
+    FragmentVector fragments{std::make_shared<FailingFragment>(batches)};
+    auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+    auto scanner = MakeScanner(std::move(dataset));
+    check_scanner(*batch, scanner.get());
+  }
+
+  // Case 2: failure when calling ScanTask::Execute
+  {
+    FragmentVector fragments{
+        std::make_shared<FailingScanTaskFragment<FailingExecuteScanTask>>(batches)};
+    auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+    auto scanner = MakeScanner(std::move(dataset));
+    check_scanner(*batch, scanner.get());
+  }
+
+  // Case 3: failure when calling RecordBatchIterator::Next
+  {
+    FragmentVector fragments{
+        std::make_shared<FailingScanTaskFragment<FailingIterationScanTask>>(batches)};
+    auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+    auto scanner = MakeScanner(std::move(dataset));
+    check_scanner(*batch, scanner.get());
+  }
+}
+
+TEST_P(TestScanner, Head) {
+  auto batch_size = GetParam().items_per_batch;
+  auto num_batches = GetParam().num_batches;
+  auto num_datasets = GetParam().num_child_datasets;
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(batch_size, schema_);
+
+  auto scanner = MakeScanner(batch);
+  std::shared_ptr<Table> expected, actual;
+
+  ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {}));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(0));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {batch}));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(batch_size));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {batch->Slice(0, 1)}));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(1));
+  AssertTablesEqual(*expected, *actual);
+
+  if (num_batches > 1) {
+    ASSERT_OK_AND_ASSIGN(expected,
+                         Table::FromRecordBatches(schema_, {batch, batch->Slice(0, 1)}));
+    ASSERT_OK_AND_ASSIGN(actual, scanner->Head(batch_size + 1));
+    AssertTablesEqual(*expected, *actual);
+  }
+
+  ASSERT_OK_AND_ASSIGN(expected, scanner->ToTable());
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(batch_size * num_batches * num_datasets));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected, scanner->ToTable());
+  ASSERT_OK_AND_ASSIGN(actual,
+                       scanner->Head(batch_size * num_batches * num_datasets + 100));
+  AssertTablesEqual(*expected, *actual);
+}
+
+TEST_P(TestScanner, FromReader) {
+  if (GetParam().use_async) {
+    GTEST_SKIP() << "Async scanner does not support construction from reader";
+  }
+  auto batch_size = GetParam().items_per_batch;
+  auto num_batches = GetParam().num_batches;
+
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(batch_size, schema_);
+  auto source_reader = ConstantArrayGenerator::Repeat(num_batches, batch);
+  auto target_reader = ConstantArrayGenerator::Repeat(num_batches, batch);
+
+  auto builder = ScannerBuilder::FromRecordBatchReader(source_reader);
+  ARROW_EXPECT_OK(builder->UseThreads(GetParam().use_threads));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder->Finish());
+  AssertScannerEquals(target_reader.get(), scanner.get());
+
+  // Such datasets can only be scanned once (but you can get fragments multiple times)
+  ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("OneShotFragment was already scanned"),
+      batch_it.Next());
+}
+
+INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner,
+                         ::testing::ValuesIn(TestScannerParams::Values()),
+                         [](const ::testing::TestParamInfo<TestScannerParams>& info) {
+                           return std::to_string(info.index) + info.param.ToString();
+                         });
+
+/// These ControlledXyz classes allow for controlling the order in which things are
+/// delivered so that we can test out of order resequencing.  The dataset allows
+/// batches to be delivered on any fragment.  When delivering batches a num_rows
+/// parameter is taken which can be used to differentiate batches.
+class ControlledFragment : public Fragment {
+ public:
+  explicit ControlledFragment(std::shared_ptr<Schema> schema)
+      : Fragment(literal(true), std::move(schema)) {}
+
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    return Status::NotImplemented(
+        "Not needed for testing.  Sync can only return things in-order.");
+  }
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
+    return physical_schema_;
+  }
+  std::string type_name() const override { return "scanner_test.cc::ControlledFragment"; }
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    return record_batch_generator_;
+  };
+
+  void Finish() { ARROW_UNUSED(record_batch_generator_.producer().Close()); }
+  void DeliverBatch(uint32_t num_rows) {
+    auto batch = ConstantArrayGenerator::Zeroes(num_rows, physical_schema_);
+    record_batch_generator_.producer().Push(std::move(batch));
+  }
+
+ private:
+  PushGenerator<std::shared_ptr<RecordBatch>> record_batch_generator_;
+};
+
+// TODO(ARROW-8163) Add testing for fragments arriving out of order
+class ControlledDataset : public Dataset {
+ public:
+  explicit ControlledDataset(int num_fragments)
+      : Dataset(arrow::schema({field("i32", int32())})), fragments_() {
+    for (int i = 0; i < num_fragments; i++) {
+      fragments_.push_back(std::make_shared<ControlledFragment>(schema_));
+    }
+  }
+
+  std::string type_name() const override { return "scanner_test.cc::ControlledDataset"; }
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const override {
+    return Status::NotImplemented("Should not be called by unit test");
+  }
+
+  void DeliverBatch(int fragment_index, int num_rows) {
+    fragments_[fragment_index]->DeliverBatch(num_rows);
+  }
+
+  void FinishFragment(int fragment_index) { fragments_[fragment_index]->Finish(); }
+
+ protected:
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override {
+    std::vector<std::shared_ptr<Fragment>> casted_fragments(fragments_.begin(),
+                                                            fragments_.end());
+    return MakeVectorIterator(std::move(casted_fragments));
+  }
+
+ private:
+  std::vector<std::shared_ptr<ControlledFragment>> fragments_;
+};
+
+constexpr int kNumFragments = 2;
+
+class TestReordering : public ::testing::Test {
+ public:
+  void SetUp() override { dataset_ = std::make_shared<ControlledDataset>(kNumFragments); }
+
+  // Given a vector of fragment indices (one per batch) return a vector
+  // (one per fragment) mapping fragment index to the last occurrence of that
+  // index in order
+  //
+  // This allows us to know when to mark a fragment as finished
+  std::vector<int> GetLastIndices(const std::vector<int>& order) {
+    std::vector<int> last_indices(kNumFragments);
+    for (std::size_t i = 0; i < kNumFragments; i++) {
+      auto last_p = std::find(order.rbegin(), order.rend(), static_cast<int>(i));
+      EXPECT_NE(last_p, order.rend());
+      last_indices[i] = static_cast<int>(std::distance(last_p, order.rend())) - 1;
+    }
+    return last_indices;
+  }
+
+  /// We buffer one item in order to enumerate it (technically this could be avoided if
+  /// delivering in order but easier to have a single code path).  We also can't deliver
+  /// items that don't come next.  These two facts make for some pretty complex logic
+  /// to determine when items are ready to be collected.
+  std::vector<TaggedRecordBatch> DeliverAndCollect(std::vector<int> order,
+                                                   TaggedRecordBatchGenerator gen) {
+    std::vector<TaggedRecordBatch> collected;
+    auto last_indices = GetLastIndices(order);
+    int num_fragments = static_cast<int>(last_indices.size());
+    std::vector<int> batches_seen_for_fragment(num_fragments);
+    auto current_fragment_index = 0;
+    auto seen_fragment = false;
+    for (std::size_t i = 0; i < order.size(); i++) {
+      auto fragment_index = order[i];
+      dataset_->DeliverBatch(fragment_index, static_cast<int>(i));
+      batches_seen_for_fragment[fragment_index]++;
+      if (static_cast<int>(i) == last_indices[fragment_index]) {
+        dataset_->FinishFragment(fragment_index);
+      }
+      if (current_fragment_index == fragment_index) {
+        if (seen_fragment) {
+          EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+          collected.push_back(std::move(next));
+        } else {
+          seen_fragment = true;
+        }
+        if (static_cast<int>(i) == last_indices[fragment_index]) {
+          // Immediately collect your bonus fragment
+          EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+          collected.push_back(std::move(next));
+          // Now collect any batches freed up that couldn't be delivered because they came
+          // from the wrong fragment
+          auto last_fragment_index = fragment_index;
+          fragment_index++;
+          seen_fragment = batches_seen_for_fragment[fragment_index] > 0;
+          while (fragment_index < num_fragments &&
+                 fragment_index != last_fragment_index) {
+            last_fragment_index = fragment_index;
+            for (int j = 0; j < batches_seen_for_fragment[fragment_index] - 1; j++) {
+              EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+              collected.push_back(std::move(next));
+            }
+            if (static_cast<int>(i) >= last_indices[fragment_index]) {
+              EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+              collected.push_back(std::move(next));
+              fragment_index++;
+              if (fragment_index < num_fragments) {
+                seen_fragment = batches_seen_for_fragment[fragment_index] > 0;
+              }
+            }
+          }
+        }
+      }
+    }
+    return collected;
+  }
+
+  struct FragmentStats {
+    int last_index;
+    bool seen;
+  };
+
+  std::vector<FragmentStats> GetFragmentStats(const std::vector<int>& order) {
+    auto last_indices = GetLastIndices(order);
+    std::vector<FragmentStats> fragment_stats;
+    for (std::size_t i = 0; i < last_indices.size(); i++) {
+      fragment_stats.push_back({last_indices[i], false});
+    }
+    return fragment_stats;
+  }
+
+  /// When data arrives out of order then we first have to buffer up 1 item in order to
+  /// know when the last item has arrived (so we can mark it as the last).  This means
+  /// sometimes we deliver an item and don't get one (first in a fragment) and sometimes
+  /// we deliver an item and we end up getting two (last in a fragment)
+  std::vector<EnumeratedRecordBatch> DeliverAndCollect(
+      std::vector<int> order, EnumeratedRecordBatchGenerator gen) {
+    std::vector<EnumeratedRecordBatch> collected;
+    auto fragment_stats = GetFragmentStats(order);
+    for (std::size_t i = 0; i < order.size(); i++) {
+      auto fragment_index = order[i];
+      dataset_->DeliverBatch(fragment_index, static_cast<int>(i));
+      if (static_cast<int>(i) == fragment_stats[fragment_index].last_index) {
+        dataset_->FinishFragment(fragment_index);
+        EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+        collected.push_back(std::move(next));
+      }
+      if (!fragment_stats[fragment_index].seen) {
+        fragment_stats[fragment_index].seen = true;
+      } else {
+        EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+        collected.push_back(std::move(next));
+      }
+    }
+    return collected;
+  }
+
+  std::shared_ptr<Scanner> MakeScanner(int fragment_readahead = 0) {
+    ScannerBuilder builder(dataset_);
+    // Reordering tests only make sense for async
+    ARROW_EXPECT_OK(builder.UseAsync(true));
+    if (fragment_readahead != 0) {
+      ARROW_EXPECT_OK(builder.FragmentReadahead(fragment_readahead));
+    }
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    return scanner;
+  }
+
+  void AssertBatchesInOrder(const std::vector<TaggedRecordBatch>& batches,
+                            std::vector<int> expected_order) {
+    ASSERT_EQ(expected_order.size(), batches.size());
+    for (std::size_t i = 0; i < batches.size(); i++) {
+      ASSERT_EQ(expected_order[i], batches[i].record_batch->num_rows());
+    }
+  }
+
+  void AssertBatchesInOrder(const std::vector<EnumeratedRecordBatch>& batches,
+                            std::vector<int> expected_batch_indices,
+                            std::vector<int> expected_row_sizes) {
+    ASSERT_EQ(expected_batch_indices.size(), batches.size());
+    for (std::size_t i = 0; i < batches.size(); i++) {
+      ASSERT_EQ(expected_row_sizes[i], batches[i].record_batch.value->num_rows());
+      ASSERT_EQ(expected_batch_indices[i], batches[i].record_batch.index);
+    }
+  }
+
+  std::shared_ptr<ControlledDataset> dataset_;
+};
+
+TEST_F(TestReordering, ScanBatches) {
+  auto scanner = MakeScanner();
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, scanner->ScanBatchesAsync());
+  auto collected = DeliverAndCollect({0, 0, 1, 1, 0}, std::move(batch_gen));
+  AssertBatchesInOrder(collected, {0, 1, 4, 2, 3});
+}
+
+TEST_F(TestReordering, ScanBatchesUnordered) {
+  auto scanner = MakeScanner();
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, scanner->ScanBatchesUnorderedAsync());
+  auto collected = DeliverAndCollect({0, 0, 1, 1, 0}, std::move(batch_gen));
+  AssertBatchesInOrder(collected, {0, 0, 1, 1, 2}, {0, 2, 3, 1, 4});
+}
+
+struct BatchConsumer {
+  explicit BatchConsumer(EnumeratedRecordBatchGenerator generator)
+      : generator(std::move(generator)), next() {}
+
+  void AssertCanConsume() {
+    if (!next.is_valid()) {
+      next = generator();
+    }
+    ASSERT_FINISHES_OK(next);
+    next = Future<EnumeratedRecordBatch>();
+  }
+
+  void AssertCannotConsume() {
+    if (!next.is_valid()) {
+      next = generator();
+    }
+    SleepABit();
+    ASSERT_FALSE(next.is_finished());
+  }
+
+  void AssertFinished() {
+    if (!next.is_valid()) {
+      next = generator();
+    }
+    ASSERT_FINISHES_OK_AND_ASSIGN(auto last, next);
+    ASSERT_TRUE(IsIterationEnd(last));
+  }
+
+  EnumeratedRecordBatchGenerator generator;
+  Future<EnumeratedRecordBatch> next;
+};
+
+TEST_F(TestReordering, FileReadahead) {
+  auto scanner = MakeScanner(/*fragment_readahead=*/1);
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, scanner->ScanBatchesUnorderedAsync());
+  BatchConsumer consumer(std::move(batch_gen));
+  dataset_->DeliverBatch(0, 0);
+  dataset_->DeliverBatch(0, 1);
+  consumer.AssertCanConsume();
+  consumer.AssertCannotConsume();
+  dataset_->DeliverBatch(1, 0);
+  consumer.AssertCannotConsume();
+  dataset_->FinishFragment(1);
+  // Even though fragment 1 is finished we cannot read it because fragment_readahead
+  // is 1 so we should only be reading fragment 0
+  consumer.AssertCannotConsume();
+  dataset_->FinishFragment(0);
+  consumer.AssertCanConsume();
+  consumer.AssertCanConsume();
+  consumer.AssertFinished();
 }
 
 class TestScannerBuilder : public ::testing::Test {
@@ -281,5 +1092,492 @@ TEST(ScanOptions, TestMaterializedFields) {
   EXPECT_THAT(opts->MaterializedFields(), ElementsAre("i64", "i32"));
 }
 
+namespace {
+
+Future<std::vector<compute::ExecBatch>> StartAndCollect(
+    compute::ExecPlan* plan, AsyncGenerator<util::optional<compute::ExecBatch>> gen) {
+  RETURN_NOT_OK(plan->Validate());
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto collected_fut = CollectAsyncGenerator(gen);
+
+  return AllComplete({plan->finished(), Future<>(collected_fut)})
+      .Then([collected_fut]() -> Result<std::vector<compute::ExecBatch>> {
+        ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result());
+        return internal::MapVector(
+            [](util::optional<compute::ExecBatch> batch) { return std::move(*batch); },
+            std::move(collected));
+      });
+}
+
+struct DatasetAndBatches {
+  std::shared_ptr<Dataset> dataset;
+  std::vector<compute::ExecBatch> batches;
+};
+
+DatasetAndBatches MakeBasicDataset() {
+  const auto dataset_schema = ::arrow::schema({
+      field("a", int32()),
+      field("b", boolean()),
+      field("c", int32()),
+  });
+
+  const auto physical_schema = SchemaFromColumnNames(dataset_schema, {"a", "b"});
+
+  RecordBatchVector record_batches{
+      RecordBatchFromJSON(physical_schema, R"([{"a": 1,    "b": null},
+                                               {"a": 2,    "b": true}])"),
+      RecordBatchFromJSON(physical_schema, R"([{"a": null, "b": true},
+                                               {"a": 3,    "b": false}])"),
+      RecordBatchFromJSON(physical_schema, R"([{"a": null, "b": true},
+                                               {"a": 4,    "b": false}])"),
+      RecordBatchFromJSON(physical_schema, R"([{"a": 5,    "b": null},
+                                               {"a": 6,    "b": false},
+                                               {"a": 7,    "b": false}])"),
+  };
+
+  auto dataset = std::make_shared<FragmentDataset>(
+      dataset_schema,
+      FragmentVector{
+          std::make_shared<InMemoryFragment>(
+              physical_schema, RecordBatchVector{record_batches[0], record_batches[1]},
+              equal(field_ref("c"), literal(23))),
+          std::make_shared<InMemoryFragment>(
+              physical_schema, RecordBatchVector{record_batches[2], record_batches[3]},
+              equal(field_ref("c"), literal(47))),
+      });
+
+  std::vector<compute::ExecBatch> batches;
+
+  auto batch_it = record_batches.begin();
+  for (int fragment_index = 0; fragment_index < 2; ++fragment_index) {
+    for (int batch_index = 0; batch_index < 2; ++batch_index) {
+      const auto& batch = *batch_it++;
+
+      // the scanned ExecBatches will begin with physical columns
+      batches.emplace_back(*batch);
+
+      // a placeholder will be inserted for partition field "c"
+      batches.back().values.emplace_back(std::make_shared<Int32Scalar>());
+
+      // scanned batches will be augmented with fragment and batch indices
+      batches.back().values.emplace_back(fragment_index);
+      batches.back().values.emplace_back(batch_index);
+
+      // ... and with the last-in-fragment flag
+      batches.back().values.emplace_back(batch_index == 1);
+
+      // each batch carries a guarantee inherited from its Fragment's partition expression
+      batches.back().guarantee =
+          equal(field_ref("c"), literal(fragment_index == 0 ? 23 : 47));
+    }
+  }
+
+  return {dataset, batches};
+}
+
+compute::Expression Materialize(std::vector<std::string> names,
+                                bool include_aug_fields = false) {
+  if (include_aug_fields) {
+    for (auto aug_name : {"__fragment_index", "__batch_index", "__last_in_fragment"}) {
+      names.emplace_back(aug_name);
+    }
+  }
+
+  std::vector<compute::Expression> exprs;
+  for (const auto& name : names) {
+    exprs.push_back(field_ref(name));
+  }
+
+  return project(exprs, names);
+}
+}  // namespace
+
+TEST(ScanNode, Schema) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->projection = Materialize({});  // set an empty projection
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto fields = basic.dataset->schema()->fields();
+  fields.push_back(field("__fragment_index", int32()));
+  fields.push_back(field("__batch_index", int32()));
+  fields.push_back(field("__last_in_fragment", boolean()));
+  // output_schema is *always* the full augmented dataset schema, regardless of projection
+  // (but some columns *may* be placeholder null Scalars if not projected)
+  AssertSchemaEqual(Schema(fields), *scan->output_schema());
+}
+
+TEST(ScanNode, Trivial) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  // ensure all fields are materialized
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  // trivial scan: the batches are returned unmodified
+  auto expected = basic.batches;
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
+}
+
+TEST(ScanNode, FilteredOnVirtualColumn) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->filter = less(field_ref("c"), literal(30));
+  // ensure all fields are materialized
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  auto expected = basic.batches;
+
+  // only the first fragment will make it past the filter
+  expected.pop_back();
+  expected.pop_back();
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
+}
+
+TEST(ScanNode, DeferredFilterOnPhysicalColumn) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->filter = greater(field_ref("a"), literal(4));
+  // ensure all fields are materialized
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  // No post filtering is performed by ScanNode: all batches will be yielded whole.
+  // To filter out rows from individual batches, construct a FilterNode.
+  auto expected = basic.batches;
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
+}
+
+TEST(ScanNode, DISABLED_ProjectionPushdown) {
+  // ARROW-13263
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->projection = Materialize({"b"}, /*include_aug_fields=*/true);
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  auto expected = basic.batches;
+
+  int a_index = basic.dataset->schema()->GetFieldIndex("a");
+  int c_index = basic.dataset->schema()->GetFieldIndex("c");
+  for (auto& batch : expected) {
+    // "a", "c" were not projected or filtered so they are dropped eagerly
+    batch.values[a_index] = MakeNullScalar(batch.values[a_index].type());
+    batch.values[c_index] = MakeNullScalar(batch.values[c_index].type());
+  }
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
+}
+
+TEST(ScanNode, MaterializationOfVirtualColumn) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto project,
+      dataset::MakeAugmentedProjectNode(
+          scan, "project", {field_ref("a"), field_ref("b"), field_ref("c")}));
+
+  auto sink_gen = MakeSinkNode(project, "sink");
+
+  auto expected = basic.batches;
+
+  for (auto& batch : expected) {
+    // ProjectNode overwrites "c" placeholder with non-null drawn from guarantee
+    const auto& value = *batch.guarantee.call()->arguments[1].literal();
+    batch.values[project->output_schema()->GetFieldIndex("c")] = value;
+  }
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
+}
+
+TEST(ScanNode, MinimalEndToEnd) {
+  // NB: This test is here for didactic purposes
+
+  // Specify a MemoryPool and ThreadPool for the ExecPlan
+  compute::ExecContext exec_context(default_memory_pool(), internal::GetCpuThreadPool());
+
+  // A ScanNode is constructed from an ExecPlan (into which it is inserted),
+  // a Dataset (whose batches will be scanned), and ScanOptions (to specify a filter for
+  // predicate pushdown, a projection to skip materialization of unnecessary columns, ...)
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<compute::ExecPlan> plan,
+                       compute::ExecPlan::Make(&exec_context));
+
+  std::shared_ptr<Dataset> dataset = std::make_shared<InMemoryDataset>(
+      TableFromJSON(schema({field("a", int32()), field("b", boolean())}),
+                    {
+                        R"([{"a": 1,    "b": null},
+                            {"a": 2,    "b": true}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 3,    "b": false}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 4,    "b": false}])",
+                        R"([{"a": 5,    "b": null},
+                            {"a": 6,    "b": false},
+                            {"a": 7,    "b": false}])",
+                    }));
+
+  auto options = std::make_shared<ScanOptions>();
+  // sync scanning is not supported by ScanNode
+  options->use_async = true;
+  // specify the filter
+  compute::Expression b_is_true = field_ref("b");
+  options->filter = b_is_true;
+  // for now, specify the projection as the full project expression (eventually this can
+  // just be a list of materialized field names)
+  compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
+  options->projection =
+      call("make_struct", {a_times_2}, compute::MakeStructOptions{{"a * 2"}});
+
+  // construct the scan node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
+                       dataset::MakeScanNode(plan.get(), dataset, options));
+
+  // pipe the scan node into a filter node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * filter,
+                       compute::MakeFilterNode(scan, "filter", b_is_true));
+
+  // pipe the filter node into a project node
+  // NB: we're using the project node factory which preserves fragment/batch index
+  // tagging, so we *can* reorder later if we choose. The tags will not appear in
+  // our output.
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * project,
+                       dataset::MakeAugmentedProjectNode(filter, "project", {a_times_2}));
+
+  // finally, pipe the project node into a sink node
+  // NB: if we don't need ordering, we could use compute::MakeSinkNode instead
+  ASSERT_OK_AND_ASSIGN(auto sink_gen, dataset::MakeOrderedSinkNode(project, "sink"));
+
+  // translate sink_gen (async) to sink_reader (sync)
+  std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      schema({field("a * 2", int32())}), std::move(sink_gen), exec_context.memory_pool());
+
+  // start the ExecPlan
+  ASSERT_OK(plan->StartProducing());
+
+  // collect sink_reader into a Table
+  ASSERT_OK_AND_ASSIGN(auto collected, Table::FromRecordBatchReader(sink_reader.get()));
+
+  // wait 1s for completion
+  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
+
+  auto expected = TableFromJSON(schema({field("a * 2", int32())}), {
+                                                                       R"([
+                                               {"a * 2": 4},
+                                               {"a * 2": null},
+                                               {"a * 2": null}
+                                          ])"});
+  AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
+}
+
+TEST(ScanNode, MinimalScalarAggEndToEnd) {
+  // NB: This test is here for didactic purposes
+
+  // Specify a MemoryPool and ThreadPool for the ExecPlan
+  compute::ExecContext exec_context(default_memory_pool(), internal::GetCpuThreadPool());
+
+  // A ScanNode is constructed from an ExecPlan (into which it is inserted),
+  // a Dataset (whose batches will be scanned), and ScanOptions (to specify a filter for
+  // predicate pushdown, a projection to skip materialization of unnecessary columns, ...)
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<compute::ExecPlan> plan,
+                       compute::ExecPlan::Make(&exec_context));
+
+  std::shared_ptr<Dataset> dataset = std::make_shared<InMemoryDataset>(
+      TableFromJSON(schema({field("a", int32()), field("b", boolean())}),
+                    {
+                        R"([{"a": 1,    "b": null},
+                            {"a": 2,    "b": true}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 3,    "b": false}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 4,    "b": false}])",
+                        R"([{"a": 5,    "b": null},
+                            {"a": 6,    "b": false},
+                            {"a": 7,    "b": false}])",
+                    }));
+
+  auto options = std::make_shared<ScanOptions>();
+  // sync scanning is not supported by ScanNode
+  options->use_async = true;
+  // specify the filter
+  compute::Expression b_is_true = field_ref("b");
+  options->filter = b_is_true;
+  // for now, specify the projection as the full project expression (eventually this can
+  // just be a list of materialized field names)
+  compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
+  options->projection =
+      call("make_struct", {a_times_2}, compute::MakeStructOptions{{"a * 2"}});
+
+  // construct the scan node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
+                       dataset::MakeScanNode(plan.get(), dataset, options));
+
+  // pipe the scan node into a filter node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * filter,
+                       compute::MakeFilterNode(scan, "filter", b_is_true));
+
+  // pipe the filter node into a project node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * project,
+                       compute::MakeProjectNode(filter, "project", {a_times_2}));
+
+  // pipe the projection into a scalar aggregate node
+  ASSERT_OK_AND_ASSIGN(
+      compute::ExecNode * sum,
+      compute::MakeScalarAggregateNode(project, "scalar_agg",
+                                       {compute::internal::Aggregate{"sum", nullptr}},
+                                       {a_times_2.ToString()}, {"a*2 sum"}));
+
+  // finally, pipe the project node into a sink node
+  auto sink_gen = compute::MakeSinkNode(sum, "sink");
+
+  // translate sink_gen (async) to sink_reader (sync)
+  std::shared_ptr<RecordBatchReader> sink_reader =
+      compute::MakeGeneratorReader(schema({field("a*2 sum", int64())}),
+                                   std::move(sink_gen), exec_context.memory_pool());
+
+  // start the ExecPlan
+  ASSERT_OK(plan->StartProducing());
+
+  // collect sink_reader into a Table
+  ASSERT_OK_AND_ASSIGN(auto collected, Table::FromRecordBatchReader(sink_reader.get()));
+
+  // wait 1s for completion
+  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
+
+  auto expected = TableFromJSON(schema({field("a*2 sum", int64())}), {
+                                                                         R"([
+                                               {"a*2 sum": 4}
+                                          ])"});
+  AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
+}
+
+TEST(ScanNode, MinimalGroupedAggEndToEnd) {
+  // NB: This test is here for didactic purposes
+
+  // Specify a MemoryPool and ThreadPool for the ExecPlan
+  compute::ExecContext exec_context(default_memory_pool(), internal::GetCpuThreadPool());
+
+  // A ScanNode is constructed from an ExecPlan (into which it is inserted),
+  // a Dataset (whose batches will be scanned), and ScanOptions (to specify a filter for
+  // predicate pushdown, a projection to skip materialization of unnecessary columns, ...)
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<compute::ExecPlan> plan,
+                       compute::ExecPlan::Make(&exec_context));
+
+  std::shared_ptr<Dataset> dataset = std::make_shared<InMemoryDataset>(
+      TableFromJSON(schema({field("a", int32()), field("b", boolean())}),
+                    {
+                        R"([{"a": 1,    "b": null},
+                            {"a": 2,    "b": true}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 3,    "b": false}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 4,    "b": false}])",
+                        R"([{"a": 5,    "b": null},
+                            {"a": 6,    "b": false},
+                            {"a": 7,    "b": false}])",
+                    }));
+
+  auto options = std::make_shared<ScanOptions>();
+  // sync scanning is not supported by ScanNode
+  options->use_async = true;
+  // specify the filter
+  compute::Expression b_is_true = field_ref("b");
+  options->filter = b_is_true;
+  // for now, specify the projection as the full project expression (eventually this can
+  // just be a list of materialized field names)
+  compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
+  compute::Expression b = field_ref("b");
+  options->projection =
+      call("make_struct", {a_times_2, b}, compute::MakeStructOptions{{"a * 2", "b"}});
+
+  // construct the scan node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
+                       dataset::MakeScanNode(plan.get(), dataset, options));
+
+  // pipe the scan node into a project node
+  ASSERT_OK_AND_ASSIGN(
+      compute::ExecNode * project,
+      compute::MakeProjectNode(scan, "project", {a_times_2, b}, {"a * 2", "b"}));
+
+  // pipe the projection into a grouped aggregate node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * sum,
+                       compute::MakeGroupByNode(
+                           project, "grouped_agg", /*keys=*/{"b"}, /*targets=*/{"a * 2"},
+                           {compute::internal::Aggregate{"hash_sum", nullptr}}));
+
+  // finally, pipe the project node into a sink node
+  auto sink_gen = compute::MakeSinkNode(sum, "sink");
+
+  // translate sink_gen (async) to sink_reader (sync)
+  std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      schema({field("hash_sum", int64()), field("b", boolean())}), std::move(sink_gen),
+      exec_context.memory_pool());
+
+  // start the ExecPlan
+  ASSERT_OK(plan->StartProducing());
+
+  // collect sink_reader into a Table
+  ASSERT_OK_AND_ASSIGN(auto collected, Table::FromRecordBatchReader(sink_reader.get()));
+
+  // wait 1s for completion
+  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
+
+  auto expected =
+      TableFromJSON(schema({field("hash_sum", int64()), field("b", boolean())}), {
+                                                                                     R"([
+                                               {"hash_sum": 12, "b": null},
+                                               {"hash_sum": 4,  "b": true},
+                                               {"hash_sum": 40, "b": false}
+                                          ])"});
+  AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 72cde368013..2ce99dc0791 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -21,6 +21,7 @@
 #include <ciso646>
 #include <functional>
 #include <memory>
+#include <ostream>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -30,6 +31,7 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/discovery.h"
 #include "arrow/dataset/file_base.h"
@@ -40,33 +42,36 @@
 #include "arrow/filesystem/test_util.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
+#include "arrow/util/thread_pool.h"
 
 namespace arrow {
 namespace dataset {
 
-const std::shared_ptr<Schema> kBoringSchema = schema({
-    field("bool", boolean()),
-    field("i8", int8()),
-    field("i32", int32()),
-    field("i32_req", int32(), /*nullable=*/false),
-    field("u32", uint32()),
-    field("i64", int64()),
-    field("f32", float32()),
-    field("f32_req", float32(), /*nullable=*/false),
-    field("f64", float64()),
-    field("date64", date64()),
-    field("str", utf8()),
-    field("dict_str", dictionary(int32(), utf8())),
-    field("dict_i32", dictionary(int32(), int32())),
-    field("ts_ns", timestamp(TimeUnit::NANO)),
-});
+using compute::call;
+using compute::field_ref;
+using compute::literal;
+
+using compute::and_;
+using compute::equal;
+using compute::greater;
+using compute::greater_equal;
+using compute::is_null;
+using compute::is_valid;
+using compute::less;
+using compute::less_equal;
+using compute::not_;
+using compute::not_equal;
+using compute::or_;
+using compute::project;
 
 using fs::internal::GetAbstractPathExtension;
 using internal::checked_cast;
@@ -137,6 +142,14 @@ class DatasetFixtureMixin : public ::testing::Test {
     }
   }
 
+  /// \brief Assert the value of the next batch yielded by the reader
+  void AssertBatchEquals(RecordBatchReader* expected, const RecordBatch& batch) {
+    std::shared_ptr<RecordBatch> lhs;
+    ASSERT_OK(expected->ReadNext(&lhs));
+    EXPECT_NE(lhs, nullptr);
+    AssertBatchesEqual(*lhs, batch);
+  }
+
   /// \brief Ensure that record batches found in reader are equals to the
   /// record batches yielded by the data fragment.
   void AssertFragmentEquals(RecordBatchReader* expected, Fragment* fragment,
@@ -174,10 +187,13 @@ class DatasetFixtureMixin : public ::testing::Test {
   /// record batches yielded by a scanner.
   void AssertScannerEquals(RecordBatchReader* expected, Scanner* scanner,
                            bool ensure_drained = true) {
-    ASSERT_OK_AND_ASSIGN(auto it, scanner->Scan());
+    ASSERT_OK_AND_ASSIGN(auto it, scanner->ScanBatches());
 
-    ARROW_EXPECT_OK(it.Visit([&](std::shared_ptr<ScanTask> task) -> Status {
-      AssertScanTaskEquals(expected, task.get(), false);
+    ARROW_EXPECT_OK(it.Visit([&](TaggedRecordBatch batch) -> Status {
+      std::shared_ptr<RecordBatch> lhs;
+      RETURN_NOT_OK(expected->ReadNext(&lhs));
+      EXPECT_NE(lhs, nullptr);
+      AssertBatchesEqual(*lhs, *batch.record_batch);
       return Status::OK();
     }));
 
@@ -186,6 +202,55 @@ class DatasetFixtureMixin : public ::testing::Test {
     }
   }
 
+  /// \brief Ensure that record batches found in reader are equals to the
+  /// record batches yielded by a scanner.
+  void AssertScanBatchesEquals(RecordBatchReader* expected, Scanner* scanner,
+                               bool ensure_drained = true) {
+    ASSERT_OK_AND_ASSIGN(auto it, scanner->ScanBatches());
+
+    ARROW_EXPECT_OK(it.Visit([&](TaggedRecordBatch batch) -> Status {
+      AssertBatchEquals(expected, *batch.record_batch);
+      return Status::OK();
+    }));
+
+    if (ensure_drained) {
+      EnsureRecordBatchReaderDrained(expected);
+    }
+  }
+
+  /// \brief Ensure that record batches found in reader are equals to the
+  /// record batches yielded by a scanner.
+  void AssertScanBatchesUnorderedEquals(RecordBatchReader* expected, Scanner* scanner,
+                                        int expected_batches_per_fragment,
+                                        bool ensure_drained = true) {
+    ASSERT_OK_AND_ASSIGN(auto it, scanner->ScanBatchesUnordered());
+
+    int fragment_counter = 0;
+    bool saw_last_fragment = false;
+    int batch_counter = 0;
+    auto visitor = [&](EnumeratedRecordBatch batch) -> Status {
+      if (batch_counter == 0) {
+        EXPECT_FALSE(saw_last_fragment);
+      }
+      EXPECT_EQ(batch_counter++, batch.record_batch.index);
+      auto last_batch = batch_counter == expected_batches_per_fragment;
+      EXPECT_EQ(last_batch, batch.record_batch.last);
+      EXPECT_EQ(fragment_counter, batch.fragment.index);
+      if (last_batch) {
+        fragment_counter++;
+        batch_counter = 0;
+      }
+      saw_last_fragment = batch.fragment.last;
+      AssertBatchEquals(expected, *batch.record_batch.value);
+      return Status::OK();
+    };
+    ARROW_EXPECT_OK(it.Visit(visitor));
+
+    if (ensure_drained) {
+      EnsureRecordBatchReaderDrained(expected);
+    }
+  }
+
   /// \brief Ensure that record batches found in reader are equals to the
   /// record batches yielded by a dataset.
   void AssertDatasetEquals(RecordBatchReader* expected, Dataset* dataset,
@@ -208,14 +273,408 @@ class DatasetFixtureMixin : public ::testing::Test {
     SetFilter(literal(true));
   }
 
-  void SetFilter(Expression filter) {
+  void SetFilter(compute::Expression filter) {
     ASSERT_OK_AND_ASSIGN(options_->filter, filter.Bind(*schema_));
   }
 
+  void SetProjectedColumns(std::vector<std::string> column_names) {
+    ASSERT_OK(SetProjection(options_.get(), std::move(column_names)));
+  }
+
   std::shared_ptr<Schema> schema_;
   std::shared_ptr<ScanOptions> options_;
 };
 
+template <typename P>
+class DatasetFixtureMixinWithParam : public DatasetFixtureMixin,
+                                     public ::testing::WithParamInterface<P> {};
+
+struct TestFormatParams {
+  bool use_async;
+  bool use_threads;
+  int num_batches;
+  int items_per_batch;
+
+  int64_t expected_rows() const { return num_batches * items_per_batch; }
+
+  std::string ToString() const {
+    // GTest requires this to be alphanumeric
+    std::stringstream ss;
+    ss << (use_async ? "Async" : "Sync") << (use_threads ? "Threaded" : "Serial")
+       << num_batches << "b" << items_per_batch << "r";
+    return ss.str();
+  }
+
+  static std::string ToTestNameString(
+      const ::testing::TestParamInfo<TestFormatParams>& info) {
+    return std::to_string(info.index) + info.param.ToString();
+  }
+
+  static std::vector<TestFormatParams> Values() {
+    std::vector<TestFormatParams> values;
+    for (const bool async : std::vector<bool>{true, false}) {
+      for (const bool use_threads : std::vector<bool>{true, false}) {
+        values.push_back(TestFormatParams{async, use_threads, 16, 1024});
+      }
+    }
+    return values;
+  }
+};
+
+std::ostream& operator<<(std::ostream& out, const TestFormatParams& params) {
+  out << params.ToString();
+  return out;
+}
+
+class FileFormatWriterMixin {
+  virtual std::shared_ptr<Buffer> Write(RecordBatchReader* reader) = 0;
+  virtual std::shared_ptr<Buffer> Write(const Table& table) = 0;
+};
+
+/// FormatHelper should be a class with these static methods:
+/// std::shared_ptr<Buffer> Write(RecordBatchReader* reader);
+/// std::shared_ptr<FileFormat> MakeFormat();
+template <typename FormatHelper>
+class FileFormatFixtureMixin : public ::testing::Test {
+ public:
+  constexpr static int64_t kBatchSize = 1UL << 12;
+  constexpr static int64_t kBatchRepetitions = 1 << 5;
+
+  FileFormatFixtureMixin()
+      : format_(FormatHelper::MakeFormat()), opts_(std::make_shared<ScanOptions>()) {}
+
+  int64_t expected_batches() const { return kBatchRepetitions; }
+  int64_t expected_rows() const { return kBatchSize * kBatchRepetitions; }
+
+  std::shared_ptr<FileFragment> MakeFragment(const FileSource& source) {
+    EXPECT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(source));
+    return fragment;
+  }
+
+  std::shared_ptr<FileFragment> MakeFragment(const FileSource& source,
+                                             compute::Expression partition_expression) {
+    EXPECT_OK_AND_ASSIGN(auto fragment,
+                         format_->MakeFragment(source, partition_expression));
+    return fragment;
+  }
+
+  std::shared_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
+    EXPECT_OK_AND_ASSIGN(auto buffer, FormatHelper::Write(reader));
+    return std::make_shared<FileSource>(std::move(buffer));
+  }
+
+  virtual std::shared_ptr<RecordBatchReader> GetRecordBatchReader(
+      std::shared_ptr<Schema> schema) {
+    return MakeGeneratedRecordBatch(schema, kBatchSize, kBatchRepetitions);
+  }
+
+  Result<std::shared_ptr<io::BufferOutputStream>> GetFileSink() {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> buffer,
+                          AllocateResizableBuffer(0));
+    return std::make_shared<io::BufferOutputStream>(buffer);
+  }
+
+  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
+    opts_->dataset_schema = schema(std::move(fields));
+    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
+  }
+
+  void SetFilter(compute::Expression filter) {
+    ASSERT_OK_AND_ASSIGN(opts_->filter, filter.Bind(*opts_->dataset_schema));
+  }
+
+  void Project(std::vector<std::string> names) {
+    ASSERT_OK(SetProjection(opts_.get(), std::move(names)));
+  }
+
+  // Shared test cases
+  void AssertInspectFailure(const std::string& contents, StatusCode code,
+                            const std::string& format_name) {
+    SCOPED_TRACE("Format: " + format_name + " File contents: " + contents);
+    constexpr auto file_name = "herp/derp";
+    auto make_error_message = [&](const std::string& filename) {
+      return "Could not open " + format_name + " input source '" + filename + "':";
+    };
+    const auto buf = std::make_shared<Buffer>(contents);
+    Status status;
+
+    status = format_->Inspect(FileSource(buf)).status();
+    EXPECT_EQ(code, status.code());
+    EXPECT_THAT(status.ToString(), ::testing::HasSubstr(make_error_message("<Buffer>")));
+
+    ASSERT_OK_AND_EQ(false, format_->IsSupported(FileSource(buf)));
+
+    ASSERT_OK_AND_ASSIGN(
+        auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
+    status = format_->Inspect({file_name, fs}).status();
+    EXPECT_EQ(code, status.code());
+    EXPECT_THAT(status.ToString(), testing::HasSubstr(make_error_message("herp/derp")));
+
+    fs::FileSelector s;
+    s.base_dir = "/";
+    s.recursive = true;
+    FileSystemFactoryOptions options;
+    ASSERT_OK_AND_ASSIGN(auto factory,
+                         FileSystemDatasetFactory::Make(fs, s, format_, options));
+    status = factory->Finish().status();
+    EXPECT_EQ(code, status.code());
+    EXPECT_THAT(
+        status.ToString(),
+        ::testing::AllOf(
+            ::testing::HasSubstr(make_error_message("/herp/derp")),
+            ::testing::HasSubstr(
+                "Error creating dataset. Could not read schema from '/herp/derp':"),
+            ::testing::HasSubstr("Is this a '" + format_->type_name() + "' file?")));
+  }
+
+  void TestInspectFailureWithRelevantError(StatusCode code,
+                                           const std::string& format_name) {
+    const std::vector<std::string> file_contents{"", "PAR0", "ASDFPAR1", "ARROW1"};
+    for (const auto& contents : file_contents) {
+      AssertInspectFailure(contents, code, format_name);
+    }
+  }
+
+  void TestInspect() {
+    auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = GetFileSource(reader.get());
+
+    ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
+    AssertSchemaEqual(*actual, *reader->schema(), /*check_metadata=*/false);
+  }
+  void TestIsSupported() {
+    auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = GetFileSource(reader.get());
+
+    bool supported = false;
+
+    std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
+    ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
+    ASSERT_EQ(supported, false);
+
+    buf = std::make_shared<Buffer>(util::string_view("corrupted"));
+    ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
+    ASSERT_EQ(supported, false);
+
+    ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(*source));
+    EXPECT_EQ(supported, true);
+  }
+  std::shared_ptr<Buffer> WriteToBuffer(
+      std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options = nullptr) {
+    auto format = format_;
+    SetSchema(schema->fields());
+    EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
+
+    if (!options) options = format->DefaultWriteOptions();
+    EXPECT_OK_AND_ASSIGN(auto writer, format->MakeWriter(sink, schema, options, {}));
+    ARROW_EXPECT_OK(writer->Write(GetRecordBatchReader(schema).get()));
+    ARROW_EXPECT_OK(writer->Finish());
+    EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
+    return written;
+  }
+  void TestWrite() {
+    auto reader = this->GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = this->GetFileSource(reader.get());
+    auto written = this->WriteToBuffer(reader->schema());
+    AssertBufferEqual(*written, *source->buffer());
+  }
+  void TestCountRows() {
+    auto options = std::make_shared<ScanOptions>();
+    auto reader = this->GetRecordBatchReader(schema({field("f64", float64())}));
+    auto full_schema = schema({field("f64", float64()), field("part", int64())});
+    auto source = this->GetFileSource(reader.get());
+
+    auto fragment = this->MakeFragment(*source);
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected_rows()),
+                              fragment->CountRows(literal(true), options));
+
+    fragment = this->MakeFragment(*source, equal(field_ref("part"), literal(2)));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected_rows()),
+                              fragment->CountRows(literal(true), options));
+
+    auto predicate = equal(field_ref("part"), literal(1));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*full_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(0),
+                              fragment->CountRows(predicate, options));
+
+    predicate = equal(field_ref("part"), literal(2));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*full_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected_rows()),
+                              fragment->CountRows(predicate, options));
+
+    predicate = equal(call("add", {field_ref("f64"), literal(3)}), literal(2));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*full_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::nullopt, fragment->CountRows(predicate, options));
+  }
+
+ protected:
+  std::shared_ptr<typename FormatHelper::FormatType> format_;
+  std::shared_ptr<ScanOptions> opts_;
+};
+
+template <typename FormatHelper>
+class FileFormatScanMixin : public FileFormatFixtureMixin<FormatHelper>,
+                            public ::testing::WithParamInterface<TestFormatParams> {
+ public:
+  int64_t expected_batches() const { return GetParam().num_batches; }
+  int64_t expected_rows() const { return GetParam().expected_rows(); }
+
+  std::shared_ptr<RecordBatchReader> GetRecordBatchReader(
+      std::shared_ptr<Schema> schema) override {
+    return MakeGeneratedRecordBatch(schema, GetParam().items_per_batch,
+                                    GetParam().num_batches);
+  }
+
+  // Scan the fragment through the scanner.
+  RecordBatchIterator Batches(std::shared_ptr<Fragment> fragment) {
+    auto dataset = std::make_shared<FragmentDataset>(opts_->dataset_schema,
+                                                     FragmentVector{fragment});
+    ScannerBuilder builder(dataset, opts_);
+    ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
+    ARROW_EXPECT_OK(builder.UseThreads(GetParam().use_threads));
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    EXPECT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+    return MakeMapIterator([](TaggedRecordBatch tagged) { return tagged.record_batch; },
+                           std::move(batch_it));
+  }
+
+  // Scan the fragment directly, without using the scanner.
+  RecordBatchIterator PhysicalBatches(std::shared_ptr<Fragment> fragment) {
+    opts_->use_threads = GetParam().use_threads;
+    if (GetParam().use_async) {
+      EXPECT_OK_AND_ASSIGN(auto batch_gen, fragment->ScanBatchesAsync(opts_));
+      auto batch_it = MakeGeneratorIterator(std::move(batch_gen));
+      return batch_it;
+    }
+    EXPECT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
+    return MakeFlattenIterator(MakeMaybeMapIterator(
+        [](std::shared_ptr<ScanTask> scan_task) { return scan_task->Execute(); },
+        std::move(scan_task_it)));
+  }
+
+  // Shared test cases
+  void TestScan() {
+    auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = this->GetFileSource(reader.get());
+
+    this->SetSchema(reader->schema()->fields());
+    auto fragment = this->MakeFragment(*source);
+
+    int64_t row_count = 0;
+    for (auto maybe_batch : Batches(fragment)) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      row_count += batch->num_rows();
+    }
+    ASSERT_EQ(row_count, GetParam().expected_rows());
+  }
+  // Ensure file formats only return columns needed to fulfill filter/projection
+  void TestScanProjected() {
+    auto f32 = field("f32", float32());
+    auto f64 = field("f64", float64());
+    auto i32 = field("i32", int32());
+    auto i64 = field("i64", int64());
+    this->SetSchema({f64, i64, f32, i32});
+    this->Project({"f64"});
+    this->SetFilter(equal(field_ref("i32"), literal(0)));
+
+    // NB: projection is applied by the scanner; FileFragment does not evaluate it so
+    // we will not drop "i32" even though it is not projected since we need it for
+    // filtering
+    auto expected_schema = schema({f64, i32});
+
+    auto reader = this->GetRecordBatchReader(opts_->dataset_schema);
+    auto source = this->GetFileSource(reader.get());
+    auto fragment = this->MakeFragment(*source);
+
+    int64_t row_count = 0;
+
+    for (auto maybe_batch : PhysicalBatches(fragment)) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      row_count += batch->num_rows();
+      AssertSchemaEqual(*batch->schema(), *expected_schema,
+                        /*check_metadata=*/false);
+    }
+
+    ASSERT_EQ(row_count, expected_rows());
+  }
+  void TestScanProjectedMissingCols() {
+    auto f32 = field("f32", float32());
+    auto f64 = field("f64", float64());
+    auto i32 = field("i32", int32());
+    auto i64 = field("i64", int64());
+    this->SetSchema({f64, i64, f32, i32});
+    this->Project({"f64"});
+    this->SetFilter(equal(field_ref("i32"), literal(0)));
+
+    auto reader_without_i32 = this->GetRecordBatchReader(schema({f64, i64, f32}));
+    auto reader_without_f64 = this->GetRecordBatchReader(schema({i64, f32, i32}));
+    auto reader = this->GetRecordBatchReader(schema({f64, i64, f32, i32}));
+
+    auto readers = {reader.get(), reader_without_i32.get(), reader_without_f64.get()};
+    for (auto reader : readers) {
+      SCOPED_TRACE(reader->schema()->ToString());
+      auto source = this->GetFileSource(reader);
+      auto fragment = this->MakeFragment(*source);
+
+      // NB: projection is applied by the scanner; FileFragment does not evaluate it so
+      // we will not drop "i32" even though it is not projected since we need it for
+      // filtering
+      //
+      // in the case where a file doesn't contain a referenced field, we won't
+      // materialize it as nulls later
+      std::shared_ptr<Schema> expected_schema;
+      if (reader == reader_without_i32.get()) {
+        expected_schema = schema({f64});
+      } else if (reader == reader_without_f64.get()) {
+        expected_schema = schema({i32});
+      } else {
+        expected_schema = schema({f64, i32});
+      }
+
+      int64_t row_count = 0;
+      for (auto maybe_batch : PhysicalBatches(fragment)) {
+        ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+        row_count += batch->num_rows();
+        AssertSchemaEqual(*batch->schema(), *expected_schema,
+                          /*check_metadata=*/false);
+      }
+      ASSERT_EQ(row_count, expected_rows());
+    }
+  }
+  void TestScanWithVirtualColumn() {
+    auto reader = this->GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = this->GetFileSource(reader.get());
+    // NB: dataset_schema includes a column not present in the file
+    this->SetSchema({reader->schema()->field(0), field("virtual", int32())});
+    auto fragment = this->MakeFragment(*source);
+
+    ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
+    AssertSchemaEqual(Schema({field("f64", float64())}), *physical_schema);
+    {
+      int64_t row_count = 0;
+      for (auto maybe_batch : Batches(fragment)) {
+        ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+        AssertSchemaEqual(*batch->schema(), *opts_->projected_schema);
+        row_count += batch->num_rows();
+      }
+      ASSERT_EQ(row_count, expected_rows());
+    }
+    {
+      int64_t row_count = 0;
+      for (auto maybe_batch : PhysicalBatches(fragment)) {
+        ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+        AssertSchemaEqual(*batch->schema(), *physical_schema);
+        row_count += batch->num_rows();
+      }
+      ASSERT_EQ(row_count, expected_rows());
+    }
+  }
+
+ protected:
+  using FileFormatFixtureMixin<FormatHelper>::opts_;
+};
+
 /// \brief A dummy FileFormat implementation
 class DummyFileFormat : public FileFormat {
  public:
@@ -237,14 +696,15 @@ class DummyFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning (always returns an empty iterator)
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override {
     return MakeEmptyIterator<std::shared_ptr<ScanTask>>();
   }
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override {
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override {
     return Status::NotImplemented("writing fragment of DummyFileFormat");
   }
 
@@ -277,22 +737,22 @@ class JSONRecordBatchFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override {
     ARROW_ASSIGN_OR_RAISE(auto file, fragment->source().Open());
     ARROW_ASSIGN_OR_RAISE(int64_t size, file->GetSize());
     ARROW_ASSIGN_OR_RAISE(auto buffer, file->Read(size));
-
-    util::string_view view{*buffer};
-
     ARROW_ASSIGN_OR_RAISE(auto schema, Inspect(fragment->source()));
-    std::shared_ptr<RecordBatch> batch = RecordBatchFromJSON(schema, view);
-    return ScanTaskIteratorFromRecordBatch({batch}, std::move(options));
+
+    RecordBatchVector batches{RecordBatchFromJSON(schema, util::string_view{*buffer})};
+    return std::make_shared<InMemoryFragment>(std::move(schema), std::move(batches))
+        ->Scan(std::move(options));
   }
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override {
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override {
     return Status::NotImplemented("writing fragment of JSONRecordBatchFileFormat");
   }
 
@@ -343,9 +803,9 @@ struct MakeFileSystemDatasetMixin {
   }
 
   void MakeDataset(const std::vector<fs::FileInfo>& infos,
-                   Expression root_partition = literal(true),
-                   std::vector<Expression> partitions = {},
-                   std::shared_ptr<Schema> s = kBoringSchema) {
+                   compute::Expression root_partition = literal(true),
+                   std::vector<compute::Expression> partitions = {},
+                   std::shared_ptr<Schema> s = schema({})) {
     auto n_fragments = infos.size();
     if (partitions.empty()) {
       partitions.resize(n_fragments, literal(true));
@@ -404,8 +864,9 @@ void AssertFragmentsAreFromPath(FragmentIterator it, std::vector<std::string> ex
               testing::UnorderedElementsAreArray(expected));
 }
 
-static std::vector<Expression> PartitionExpressionsOf(const FragmentVector& fragments) {
-  std::vector<Expression> partition_expressions;
+static std::vector<compute::Expression> PartitionExpressionsOf(
+    const FragmentVector& fragments) {
+  std::vector<compute::Expression> partition_expressions;
   std::transform(fragments.begin(), fragments.end(),
                  std::back_inserter(partition_expressions),
                  [](const std::shared_ptr<Fragment>& fragment) {
@@ -415,7 +876,7 @@ static std::vector<Expression> PartitionExpressionsOf(const FragmentVector& frag
 }
 
 void AssertFragmentsHavePartitionExpressions(std::shared_ptr<Dataset> dataset,
-                                             std::vector<Expression> expected) {
+                                             std::vector<compute::Expression> expected) {
   ASSERT_OK_AND_ASSIGN(auto fragment_it, dataset->GetFragments());
   for (auto& expr : expected) {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*dataset->schema()));
@@ -429,13 +890,10 @@ struct ArithmeticDatasetFixture {
   static std::shared_ptr<Schema> schema() {
     return ::arrow::schema({
         field("i64", int64()),
-        // ARROW-1644: Parquet can't write complex level
-        // field("struct", struct_({
-        //                     // ARROW-2587: Parquet can't write struct with more
-        //                     // than one field.
-        //                     // field("i32", int32()),
-        //                     field("str", utf8()),
-        //                 })),
+        field("struct", struct_({
+                            field("i32", int32()),
+                            field("str", utf8()),
+                        })),
         field("u8", uint8()),
         field("list", list(int32())),
         field("bool", boolean()),
@@ -452,12 +910,12 @@ struct ArithmeticDatasetFixture {
 
     ss << "{";
     ss << "\"i64\": " << n << ", ";
-    // ss << "\"struct\": {";
-    // {
-    //   // ss << "\"i32\": " << n_i32 << ", ";
-    //   ss << "\"str\": \"" << std::to_string(n) << "\"";
-    // }
-    // ss << "}, ";
+    ss << "\"struct\": {";
+    {
+      ss << "\"i32\": " << n_i32 << ", ";
+      ss << R"("str": ")" << std::to_string(n) << "\"";
+    }
+    ss << "}, ";
     ss << "\"u8\": " << static_cast<int32_t>(n) << ", ";
     ss << "\"list\": [" << n_i32 << ", " << n_i32 << "], ";
     ss << "\"bool\": " << (static_cast<bool>(n % 2) ? "true" : "false");
@@ -571,20 +1029,25 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
     ASSERT_OK_AND_ASSIGN(dataset_, factory->Finish());
 
     scan_options_ = std::make_shared<ScanOptions>();
-    scan_options_->dataset_schema = source_schema_;
+    scan_options_->dataset_schema = dataset_->schema();
     ASSERT_OK(SetProjection(scan_options_.get(), source_schema_->field_names()));
   }
 
   void SetWriteOptions(std::shared_ptr<FileWriteOptions> file_write_options) {
     write_options_.file_write_options = file_write_options;
     write_options_.filesystem = fs_;
-    write_options_.base_dir = "new_root/";
+    write_options_.base_dir = "/new_root/";
     write_options_.basename_template = "dat_{i}";
+    write_options_.writer_pre_finish = [this](FileWriter* writer) {
+      visited_paths_.push_back(writer->destination().path);
+      return Status::OK();
+    };
   }
 
   void DoWrite(std::shared_ptr<Partitioning> desired_partitioning) {
     write_options_.partitioning = desired_partitioning;
-    auto scanner = std::make_shared<Scanner>(dataset_, scan_options_);
+    auto scanner_builder = ScannerBuilder(dataset_, scan_options_);
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder.Finish());
     ASSERT_OK(FileSystemDataset::Write(write_options_, scanner));
 
     // re-discover the written dataset
@@ -730,11 +1193,17 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
     for (const auto& file_contents : expected_files_) {
       expected_paths.insert(file_contents.first);
     }
+
+    // expect the written filesystem to contain precisely the paths we expected
     for (auto path : checked_pointer_cast<FileSystemDataset>(written_)->files()) {
       actual_paths.insert(std::move(path));
     }
     EXPECT_THAT(actual_paths, testing::UnorderedElementsAreArray(expected_paths));
 
+    // Additionally, the writer producing each written file was visited and its path
+    // collected. That should match the expected paths as well
+    EXPECT_THAT(visited_paths_, testing::UnorderedElementsAreArray(expected_paths));
+
     ASSERT_OK_AND_ASSIGN(auto written_fragments_it, written_->GetFragments());
     for (auto maybe_fragment : written_fragments_it) {
       ASSERT_OK_AND_ASSIGN(auto fragment, maybe_fragment);
@@ -759,7 +1228,7 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
       std::shared_ptr<Array> actual_struct;
 
       for (auto maybe_batch :
-           IteratorFromReader(std::make_shared<TableBatchReader>(*actual_table))) {
+           MakeIteratorFromReader(std::make_shared<TableBatchReader>(*actual_table))) {
         ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
         ASSERT_OK_AND_ASSIGN(actual_struct, batch->ToStructArray());
       }
@@ -777,160 +1246,10 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   PathAndContent expected_files_;
   std::shared_ptr<Schema> expected_physical_schema_;
   std::shared_ptr<Dataset> written_;
+  std::vector<std::string> visited_paths_;
   FileSystemDatasetWriteOptions write_options_;
   std::shared_ptr<ScanOptions> scan_options_;
 };
 
-// These test cases will run on a thread pool with 1 thread.  Any illegal (non-async)
-// nested parallelism should deadlock the test
-class NestedParallelismMixin : public ::testing::Test {
- protected:
-  static void SetUpTestSuite() {}
-
-  void TearDown() override {
-    if (old_capacity_ > 0) {
-      ASSERT_OK(internal::GetCpuThreadPool()->SetCapacity(old_capacity_));
-    }
-  }
-
-  void SetUp() override {
-    old_capacity_ = internal::GetCpuThreadPool()->GetCapacity();
-    ASSERT_OK(internal::GetCpuThreadPool()->SetCapacity(1));
-    schema_ = schema({field("i32", int32())});
-    options_ = std::make_shared<ScanOptions>();
-    options_->dataset_schema = schema_;
-    options_->use_threads = true;
-  }
-
-  class NestedParallelismScanTask : public ScanTask {
-   public:
-    explicit NestedParallelismScanTask(std::shared_ptr<ScanTask> target)
-        : ScanTask(target->options(), target->fragment()), target_(std::move(target)) {}
-    virtual ~NestedParallelismScanTask() = default;
-
-    Result<RecordBatchIterator> Execute() override {
-      // We could just return an invalid status here but this way it is easy to verify the
-      // test is checking what it is supposed to be checking by just changing
-      // supports_async() to false (will deadlock)
-      ADD_FAILURE() << "NestedParallelismScanTask::Execute should never be called.  You "
-                       "should be deadlocked right now";
-      ARROW_ASSIGN_OR_RAISE(auto batch_gen, ExecuteAsync(internal::GetCpuThreadPool()));
-      return MakeGeneratorIterator(std::move(batch_gen));
-    }
-
-    Result<RecordBatchGenerator> ExecuteAsync(internal::Executor* cpu_executor) override {
-      ARROW_ASSIGN_OR_RAISE(auto batches_it, target_->Execute());
-      ARROW_ASSIGN_OR_RAISE(auto batches, batches_it.ToVector());
-      auto generator_fut = DeferNotOk(
-          cpu_executor->Submit([batches] { return MakeVectorGenerator(batches); }));
-      return MakeFromFuture(generator_fut);
-    }
-
-    bool supports_async() const override { return true; }
-
-   private:
-    std::shared_ptr<ScanTask> target_;
-  };
-
-  class NestedParallelismFragment : public InMemoryFragment {
-   public:
-    explicit NestedParallelismFragment(RecordBatchVector record_batches,
-                                       Expression expr = literal(true))
-        : InMemoryFragment(std::move(record_batches), std::move(expr)) {}
-
-    Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
-      ARROW_ASSIGN_OR_RAISE(auto scan_task_it, InMemoryFragment::Scan(options));
-      return MakeMaybeMapIterator(
-          [](std::shared_ptr<ScanTask> task) -> Result<std::shared_ptr<ScanTask>> {
-            return std::make_shared<NestedParallelismScanTask>(std::move(task));
-          },
-          std::move(scan_task_it));
-    }
-  };
-
-  class NestedParallelismDataset : public InMemoryDataset {
-   public:
-    NestedParallelismDataset(std::shared_ptr<Schema> sch, RecordBatchVector batches)
-        : InMemoryDataset(std::move(sch), std::move(batches)) {}
-
-   protected:
-    Result<FragmentIterator> GetFragmentsImpl(Expression) override {
-      auto schema = this->schema();
-
-      auto create_fragment =
-          [schema](
-              std::shared_ptr<RecordBatch> batch) -> Result<std::shared_ptr<Fragment>> {
-        RecordBatchVector batches{batch};
-        return std::make_shared<NestedParallelismFragment>(std::move(batches));
-      };
-
-      return MakeMaybeMapIterator(std::move(create_fragment), get_batches_->Get());
-    }
-  };
-
-  class DiscardingRowCountingFileWriteOptions : public FileWriteOptions {
-   public:
-    explicit DiscardingRowCountingFileWriteOptions(
-        std::shared_ptr<std::atomic<int>> row_counter)
-        : FileWriteOptions(
-              std::make_shared<DiscardingRowCountingFormat>(std::move(row_counter))) {}
-  };
-
-  class DiscardingRowCountingFileWriter : public FileWriter {
-   public:
-    explicit DiscardingRowCountingFileWriter(std::shared_ptr<std::atomic<int>> row_count)
-        : FileWriter(NULL, NULL, NULL), row_count_(std::move(row_count)) {}
-    virtual ~DiscardingRowCountingFileWriter() = default;
-
-    Status Write(const std::shared_ptr<RecordBatch>& batch) override {
-      row_count_->fetch_add(static_cast<int>(batch->num_rows()));
-      return Status::OK();
-    }
-    Status Finish() override { return Status::OK(); };
-
-   protected:
-    Status FinishInternal() override { return Status::OK(); };
-
-   private:
-    std::shared_ptr<std::atomic<int>> row_count_;
-  };
-
-  class DiscardingRowCountingFormat : public FileFormat {
-   public:
-    DiscardingRowCountingFormat() : row_count_(std::make_shared<std::atomic<int>>(0)) {}
-    explicit DiscardingRowCountingFormat(std::shared_ptr<std::atomic<int>> row_count)
-        : row_count_(std::move(row_count)) {}
-    virtual ~DiscardingRowCountingFormat() = default;
-
-    std::string type_name() const override { return "discarding-row-counting"; }
-    bool Equals(const FileFormat& other) const override { return true; }
-    Result<bool> IsSupported(const FileSource& source) const override {
-      return Status::NotImplemented("Should not be called");
-    }
-    Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override {
-      return Status::NotImplemented("Should not be called");
-    }
-    Result<ScanTaskIterator> ScanFile(
-        std::shared_ptr<ScanOptions> options,
-        const std::shared_ptr<FileFragment>& file) const override {
-      return Status::NotImplemented("Should not be called");
-    }
-    Result<std::shared_ptr<FileWriter>> MakeWriter(
-        std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-        std::shared_ptr<FileWriteOptions> options) const override {
-      return std::make_shared<DiscardingRowCountingFileWriter>(row_count_);
-    }
-    std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return NULLPTR; }
-
-   private:
-    std::shared_ptr<std::atomic<int>> row_count_;
-  };
-
- protected:
-  int old_capacity_ = 0;
-  std::shared_ptr<Schema> schema_;
-  std::shared_ptr<ScanOptions> options_;
-};
-
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/type_fwd.h b/cpp/src/arrow/dataset/type_fwd.h
index 6ba65a63afd..ad1a2996af4 100644
--- a/cpp/src/arrow/dataset/type_fwd.h
+++ b/cpp/src/arrow/dataset/type_fwd.h
@@ -22,17 +22,12 @@
 #include <memory>
 #include <vector>
 
+#include "arrow/compute/type_fwd.h"  // IWYU pragma: export
 #include "arrow/dataset/visibility.h"
 #include "arrow/filesystem/type_fwd.h"  // IWYU pragma: export
 #include "arrow/type_fwd.h"             // IWYU pragma: export
 
 namespace arrow {
-namespace compute {
-
-class ExecContext;
-
-}  // namespace compute
-
 namespace dataset {
 
 class Dataset;
@@ -60,6 +55,8 @@ struct FileSystemDatasetWriteOptions;
 class InMemoryDataset;
 
 class CsvFileFormat;
+class CsvFileWriter;
+class CsvFileWriteOptions;
 struct CsvFragmentScanOptions;
 
 class IpcFileFormat;
@@ -73,13 +70,13 @@ class ParquetFragmentScanOptions;
 class ParquetFileWriter;
 class ParquetFileWriteOptions;
 
-class Expression;
-
 class Partitioning;
 class PartitioningFactory;
 class PartitioningOrFactory;
+struct KeyValuePartitioningOptions;
 class DirectoryPartitioning;
 class HivePartitioning;
+struct HivePartitioningOptions;
 
 struct ScanOptions;
 
diff --git a/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt b/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
index e8bb533b18e..2638456c61c 100644
--- a/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
+++ b/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
@@ -56,11 +56,9 @@ set(HIVESERVER2_THRIFT_SRC
     Types_types.cpp)
 
 set_source_files_properties(${HIVESERVER2_THRIFT_SRC}
-                            PROPERTIES
-                            COMPILE_FLAGS
-                            "-Wno-unused-variable -Wno-shadow-field"
-                            GENERATED
-                            TRUE)
+                            PROPERTIES COMPILE_FLAGS
+                                       "-Wno-unused-variable -Wno-shadow-field" GENERATED
+                                                                                TRUE)
 
 # keep everything in one library, the object files reference
 # each other
@@ -75,9 +73,9 @@ add_library(arrow_hiveserver2_thrift STATIC ${HIVESERVER2_THRIFT_SRC})
 
 add_dependencies(arrow_hiveserver2_thrift hs2-thrift-cpp)
 
-set_target_properties(
-  arrow_hiveserver2_thrift
-  PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set_target_properties(arrow_hiveserver2_thrift
+                      PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                 "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 
 add_arrow_lib(arrow_hiveserver2
               SOURCES
diff --git a/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt b/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
index f6c88473a6e..237a92a827d 100644
--- a/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
+++ b/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
@@ -44,9 +44,9 @@ function(HS2_THRIFT_GEN VAR)
     # All the output files we can determine based on filename.
     #   - Does not include .skeleton.cpp files
     #   - Does not include java output files
-    set(OUTPUT_BE_FILE "${GEN_DIR}/${FIL_WE}_types.cpp" "${GEN_DIR}/${FIL_WE}_types.h"
-                       "${GEN_DIR}/${FIL_WE}_constants.cpp"
-                       "${GEN_DIR}/${FIL_WE}_constants.h")
+    set(OUTPUT_BE_FILE
+        "${GEN_DIR}/${FIL_WE}_types.cpp" "${GEN_DIR}/${FIL_WE}_types.h"
+        "${GEN_DIR}/${FIL_WE}_constants.cpp" "${GEN_DIR}/${FIL_WE}_constants.h")
     list(APPEND ${VAR} ${OUTPUT_BE_FILE})
 
     # BeeswaxService thrift generation
@@ -80,7 +80,9 @@ function(HS2_THRIFT_GEN VAR)
                        VERBATIM)
   endforeach(FIL)
 
-  set(${VAR} ${${VAR}} PARENT_SCOPE)
+  set(${VAR}
+      ${${VAR}}
+      PARENT_SCOPE)
 endfunction(HS2_THRIFT_GEN)
 
 message("Using Thrift compiler: ${THRIFT_COMPILER}")
@@ -102,8 +104,9 @@ set(SRC_FILES
     Status.thrift
     Types.thrift)
 
-set_source_files_properties(Status.thrift PROPERTIES OBJECT_DEPENDS
-                            ${CMAKE_CURRENT_BINARY_DIR}/ErrorCodes.thrift)
+set_source_files_properties(Status.thrift
+                            PROPERTIES OBJECT_DEPENDS
+                                       ${CMAKE_CURRENT_BINARY_DIR}/ErrorCodes.thrift)
 
 # Create a build command for each of the thrift src files and generate
 # a list of files they produce
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index 473a5ecc0f1..c917db3b99c 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -47,8 +47,8 @@ if(ARROW_S3)
     if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" AND NOT APPLE)
       list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_NOT_SHARED")
     endif()
-    target_compile_definitions(arrow-s3fs-test PRIVATE
-                               ${ARROW_S3FS_TEST_COMPILE_DEFINITIONS})
+    target_compile_definitions(arrow-s3fs-test
+                               PRIVATE ${ARROW_S3FS_TEST_COMPILE_DEFINITIONS})
   endif()
 
   if(ARROW_BUILD_TESTS)
@@ -60,8 +60,8 @@ if(ARROW_S3)
 
   if(ARROW_BUILD_BENCHMARKS AND ARROW_PARQUET)
     add_arrow_benchmark(s3fs_benchmark PREFIX "arrow-filesystem")
-    target_compile_definitions(arrow-filesystem-s3fs-benchmark PRIVATE
-                               ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
+    target_compile_definitions(arrow-filesystem-s3fs-benchmark
+                               PRIVATE ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
     if(ARROW_TEST_LINKAGE STREQUAL "static")
       target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
     else()
diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 7cfe266cdf0..6b94d6118c2 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -32,19 +32,24 @@
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/util_internal.h"
 #include "arrow/io/slow.h"
+#include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/parallel.h"
 #include "arrow/util/uri.h"
+#include "arrow/util/vector.h"
 #include "arrow/util/windows_fixup.h"
 
 namespace arrow {
 
+using internal::checked_pointer_cast;
 using internal::TaskHints;
 using internal::Uri;
+using io::internal::SubmitIO;
 
 namespace fs {
 
@@ -143,11 +148,8 @@ auto FileSystemDefer(FileSystem* fs, bool synchronous, DeferredFunc&& func)
   if (synchronous) {
     return std::forward<DeferredFunc>(func)(std::move(self));
   }
-  TaskHints hints;
-  hints.external_id = fs->io_context().external_id();
-  // TODO pass StopToken
-  return DeferNotOk(fs->io_context().executor()->Submit(
-      hints, std::forward<DeferredFunc>(func), std::move(self)));
+  return DeferNotOk(io::internal::SubmitIO(
+      fs->io_context(), std::forward<DeferredFunc>(func), std::move(self)));
 }
 
 }  // namespace
@@ -159,10 +161,11 @@ Future<std::vector<FileInfo>> FileSystem::GetFileInfoAsync(
       [paths](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(paths); });
 }
 
-Future<std::vector<FileInfo>> FileSystem::GetFileInfoAsync(const FileSelector& select) {
-  return FileSystemDefer(
+FileInfoGenerator FileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto fut = FileSystemDefer(
       this, default_async_is_sync_,
       [select](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(select); });
+  return MakeSingleFutureGenerator(std::move(fut));
 }
 
 Status FileSystem::DeleteFiles(const std::vector<std::string>& paths) {
@@ -229,6 +232,16 @@ Future<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFileAsync(
       [info](std::shared_ptr<FileSystem> self) { return self->OpenInputFile(info); });
 }
 
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenOutputStream(
+    const std::string& path) {
+  return OpenOutputStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenAppendStream(
+    const std::string& path) {
+  return OpenAppendStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
 //////////////////////////////////////////////////////////////////////////
 // SubTreeFileSystem implementation
 
@@ -312,6 +325,23 @@ Result<std::vector<FileInfo>> SubTreeFileSystem::GetFileInfo(const FileSelector&
   return infos;
 }
 
+FileInfoGenerator SubTreeFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto selector = select;
+  selector.base_dir = PrependBase(selector.base_dir);
+  auto gen = base_fs_->GetFileInfoGenerator(selector);
+
+  auto self = checked_pointer_cast<SubTreeFileSystem>(shared_from_this());
+
+  std::function<Result<std::vector<FileInfo>>(const std::vector<FileInfo>& infos)>
+      fix_infos = [self](std::vector<FileInfo> infos) -> Result<std::vector<FileInfo>> {
+    for (auto& info : infos) {
+      RETURN_NOT_OK(self->FixInfo(&info));
+    }
+    return infos;
+  };
+  return MakeMappedGenerator(gen, fix_infos);
+}
+
 Status SubTreeFileSystem::CreateDir(const std::string& path, bool recursive) {
   auto s = path;
   RETURN_NOT_OK(PrependBaseNonEmpty(&s));
@@ -378,6 +408,22 @@ Result<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStream(
   return base_fs_->OpenInputStream(new_info);
 }
 
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+    const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenInputStreamAsync(s);
+}
+
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputStreamAsync(new_info);
+}
+
 Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
     const std::string& path) {
   auto s = path;
@@ -394,18 +440,34 @@ Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
   return base_fs_->OpenInputFile(new_info);
 }
 
-Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenOutputStream(
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
     const std::string& path) {
   auto s = path;
   RETURN_NOT_OK(PrependBaseNonEmpty(&s));
-  return base_fs_->OpenOutputStream(s);
+  return base_fs_->OpenInputFileAsync(s);
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputFileAsync(new_info);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenOutputStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenOutputStream(s, metadata);
 }
 
 Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   auto s = path;
   RETURN_NOT_OK(PrependBaseNonEmpty(&s));
-  return base_fs_->OpenAppendStream(s);
+  return base_fs_->OpenAppendStream(s, metadata);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -503,16 +565,16 @@ Result<std::shared_ptr<io::RandomAccessFile>> SlowFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   latencies_->Sleep();
   // XXX Should we have a SlowOutputStream that waits on Flush() and Close()?
-  return base_fs_->OpenOutputStream(path);
+  return base_fs_->OpenOutputStream(path, metadata);
 }
 
 Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   latencies_->Sleep();
-  return base_fs_->OpenAppendStream(path);
+  return base_fs_->OpenAppendStream(path, metadata);
 }
 
 Status CopyFiles(const std::vector<FileLocator>& sources,
@@ -530,9 +592,10 @@ Status CopyFiles(const std::vector<FileLocator>& sources,
 
     ARROW_ASSIGN_OR_RAISE(auto source,
                           sources[i].filesystem->OpenInputStream(sources[i].path));
+    ARROW_ASSIGN_OR_RAISE(const auto metadata, source->ReadMetadata());
 
     ARROW_ASSIGN_OR_RAISE(auto destination, destinations[i].filesystem->OpenOutputStream(
-                                                destinations[i].path));
+                                                destinations[i].path, metadata));
     RETURN_NOT_OK(internal::CopyStream(source, destination, chunk_size, io_context));
     return destination->Close();
   };
diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h
index f779dd8a13c..c739471c725 100644
--- a/cpp/src/arrow/filesystem/filesystem.h
+++ b/cpp/src/arrow/filesystem/filesystem.h
@@ -19,6 +19,7 @@
 
 #include <chrono>
 #include <cstdint>
+#include <functional>
 #include <iosfwd>
 #include <memory>
 #include <string>
@@ -141,6 +142,19 @@ struct ARROW_EXPORT FileLocator {
   std::string path;
 };
 
+using FileInfoVector = std::vector<FileInfo>;
+using FileInfoGenerator = std::function<Future<FileInfoVector>()>;
+
+}  // namespace fs
+
+template <>
+struct IterationTraits<fs::FileInfoVector> {
+  static fs::FileInfoVector End() { return {}; }
+  static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
+};
+
+namespace fs {
+
 /// \brief Abstract file system API
 class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem> {
  public:
@@ -171,20 +185,22 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   /// a truly exceptional condition (low-level I/O error, etc.).
   virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
   /// Same, for many targets at once.
-  virtual Result<std::vector<FileInfo>> GetFileInfo(
-      const std::vector<std::string>& paths);
+  virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
   /// Same, according to a selector.
   ///
   /// The selector's base directory will not be part of the results, even if
   /// it exists.
   /// If it doesn't exist, see `FileSelector::allow_not_found`.
-  virtual Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) = 0;
+  virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;
 
   /// EXPERIMENTAL: async version of GetFileInfo
-  virtual Future<std::vector<FileInfo>> GetFileInfoAsync(
-      const std::vector<std::string>& paths);
-  /// EXPERIMENTAL: async version of GetFileInfo
-  virtual Future<std::vector<FileInfo>> GetFileInfoAsync(const FileSelector& select);
+  virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);
+
+  /// EXPERIMENTAL: streaming async version of GetFileInfo
+  ///
+  /// The returned generator is not async-reentrant, i.e. you need to wait for
+  /// the returned future to complete before calling the generator again.
+  virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);
 
   /// Create a directory and subdirectories.
   ///
@@ -267,13 +283,17 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   ///
   /// If the target already exists, existing data is truncated.
   virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) = 0;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);
 
   /// Open an output stream for appending.
   ///
   /// If the target doesn't exist, a new empty file is created.
   virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) = 0;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);
 
  protected:
   explicit FileSystem(const io::IOContext& io_context = io::default_io_context())
@@ -314,7 +334,9 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
   using FileSystem::GetFileInfo;
   /// \endcond
   Result<FileInfo> GetFileInfo(const std::string& path) override;
-  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
 
   Status CreateDir(const std::string& path, bool recursive = true) override;
 
@@ -335,10 +357,22 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
       const std::string& path) override;
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const FileInfo& info) override;
-  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
       const std::string& path) override;
-  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const FileInfo& info) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
       const std::string& path) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const FileInfo& info) override;
+
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
  protected:
   SubTreeFileSystem() {}
@@ -370,7 +404,7 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
 
   using FileSystem::GetFileInfo;
   Result<FileInfo> GetFileInfo(const std::string& path) override;
-  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
 
   Status CreateDir(const std::string& path, bool recursive = true) override;
 
@@ -392,9 +426,11 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const FileInfo& info) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
  protected:
   std::shared_ptr<FileSystem> base_fs_;
diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc
index f3b561fc581..44889356b1f 100644
--- a/cpp/src/arrow/filesystem/filesystem_test.cc
+++ b/cpp/src/arrow/filesystem/filesystem_test.cc
@@ -28,6 +28,7 @@
 #include "arrow/filesystem/test_util.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 namespace fs {
@@ -267,21 +268,28 @@ TEST(PathUtil, ToSlashes) {
 ////////////////////////////////////////////////////////////////////////////
 // Generic MockFileSystem tests
 
+template <typename MockFileSystemType>
 class TestMockFSGeneric : public ::testing::Test, public GenericFileSystemTest {
  public:
   void SetUp() override {
     time_ = TimePoint(TimePoint::duration(42));
-    fs_ = std::make_shared<MockFileSystem>(time_);
+    fs_ = std::make_shared<MockFileSystemType>(time_);
   }
 
  protected:
   std::shared_ptr<FileSystem> GetEmptyFileSystem() override { return fs_; }
 
+  bool have_file_metadata() const override { return true; }
+
   TimePoint time_;
-  std::shared_ptr<MockFileSystem> fs_;
+  std::shared_ptr<FileSystem> fs_;
 };
 
-GENERIC_FS_TEST_FUNCTIONS(TestMockFSGeneric);
+using MockFileSystemTypes = ::testing::Types<MockFileSystem, MockAsyncFileSystem>;
+
+TYPED_TEST_SUITE(TestMockFSGeneric, MockFileSystemTypes);
+
+GENERIC_FS_TYPED_TEST_FUNCTIONS(TestMockFSGeneric);
 
 ////////////////////////////////////////////////////////////////////////////
 // Concrete MockFileSystem tests
@@ -451,6 +459,18 @@ TEST_F(TestMockFS, OpenOutputStream) {
   ASSERT_OK(stream->Close());
   CheckDirs({});
   CheckFiles({{"ab", time_, ""}});
+
+  // With metadata
+  auto metadata = KeyValueMetadata::Make({"some key"}, {"some value"});
+  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("cd", metadata));
+  ASSERT_OK(WriteString(stream.get(), "data"));
+  ASSERT_OK(stream->Close());
+  CheckFiles({{"ab", time_, ""}, {"cd", time_, "data"}});
+
+  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream("cd"));
+  ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
+  ASSERT_NE(got_metadata, nullptr);
+  ASSERT_TRUE(got_metadata->Equals(*metadata));
 }
 
 TEST_F(TestMockFS, OpenAppendStream) {
diff --git a/cpp/src/arrow/filesystem/hdfs.cc b/cpp/src/arrow/filesystem/hdfs.cc
index 6ac81d01275..c6396deac05 100644
--- a/cpp/src/arrow/filesystem/hdfs.cc
+++ b/cpp/src/arrow/filesystem/hdfs.cc
@@ -106,12 +106,18 @@ class HadoopFileSystem::Impl {
       return st;
     }
     for (const auto& child_path_info : children) {
-      // HDFS returns an absolute URI here, need to extract path relative to wd
-      Uri uri;
-      RETURN_NOT_OK(uri.Parse(child_path_info.name));
-      std::string child_path = uri.path();
+      // HDFS returns an absolute "URI" here, need to extract path relative to wd
+      // XXX: unfortunately, this is not a real URI as special characters
+      // are not %-escaped... hence parsing it as URI would fail.
+      std::string child_path;
       if (!wd.empty()) {
-        ARROW_ASSIGN_OR_RAISE(child_path, MakeAbstractPathRelative(wd, child_path));
+        if (child_path_info.name.substr(0, wd.length()) != wd) {
+          return Status::IOError("HDFS returned path '", child_path_info.name,
+                                 "' that is not a child of '", wd, "'");
+        }
+        child_path = child_path_info.name.substr(wd.length());
+      } else {
+        child_path = child_path_info.name;
       }
 
       FileInfo info;
@@ -134,21 +140,39 @@ class HadoopFileSystem::Impl {
     }
     std::vector<FileInfo> results;
 
+    // Fetch working directory.
+    // If select.base_dir is relative, we need to trim it from the start
+    // of paths returned by ListDirectory.
+    // If select.base_dir is absolute, we need to trim the "URI authority"
+    // portion of the working directory.
     std::string wd;
-    if (select.base_dir.empty() || select.base_dir.front() != '/') {
-      // Fetch working directory, because we need to trim it from the start
-      // of paths returned by ListDirectory as select.base_dir is relative.
-      RETURN_NOT_OK(client_->GetWorkingDirectory(&wd));
-      Uri wd_uri;
-      RETURN_NOT_OK(wd_uri.Parse(wd));
-      wd = wd_uri.path();
+    RETURN_NOT_OK(client_->GetWorkingDirectory(&wd));
+
+    if (!select.base_dir.empty() && select.base_dir.front() == '/') {
+      // base_dir is absolute, only keep the URI authority portion.
+      // As mentioned in StatSelector() above, the URI may contain unescaped
+      // special chars and therefore may not be a valid URI, so we parse by hand.
+      auto pos = wd.find("://");  // start of host:port portion
+      if (pos == std::string::npos) {
+        return Status::IOError("Unexpected HDFS working directory URI: ", wd);
+      }
+      pos = wd.find("/", pos + 3);  // end of host:port portion
+      if (pos == std::string::npos) {
+        return Status::IOError("Unexpected HDFS working directory URI: ", wd);
+      }
+      wd = wd.substr(0, pos);  // keep up until host:port (included)
+    } else if (!wd.empty() && wd.back() != '/') {
+      // For a relative lookup, trim leading slashes
+      wd += '/';
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(select.base_dir));
-    if (info.type() == FileType::File) {
-      return Status::Invalid(
-          "GetFileInfo expects base_dir of selector to be a directory, while '",
-          select.base_dir, "' is a file");
+    if (!select.base_dir.empty()) {
+      ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(select.base_dir));
+      if (info.type() == FileType::File) {
+        return Status::IOError(
+            "GetFileInfo expects base_dir of selector to be a directory, but '",
+            select.base_dir, "' is a file");
+      }
     }
     RETURN_NOT_OK(StatSelector(wd, select.base_dir, select, 0, &results));
     return results;
@@ -178,6 +202,10 @@ class HadoopFileSystem::Impl {
   }
 
   Status DeleteDirContents(const std::string& path) {
+    if (!IsDirectory(path)) {
+      return Status::IOError("Cannot delete contents of directory '", path,
+                             "': not a directory");
+    }
     std::vector<std::string> file_list;
     RETURN_NOT_OK(client_->GetChildren(path, &file_list));
     for (auto file : file_list) {
@@ -195,13 +223,17 @@ class HadoopFileSystem::Impl {
   }
 
   Status Move(const std::string& src, const std::string& dest) {
-    RETURN_NOT_OK(client_->Rename(src, dest));
-    return Status::OK();
+    auto st = client_->Rename(src, dest);
+    if (st.IsIOError() && IsFile(src) && IsFile(dest)) {
+      // Allow file -> file clobber
+      RETURN_NOT_OK(client_->Delete(dest));
+      st = client_->Rename(src, dest);
+    }
+    return st;
   }
 
   Status CopyFile(const std::string& src, const std::string& dest) {
-    // TODO implement this (but only if HDFS supports on-server copy)
-    return Status::NotImplemented("HadoopFileSystem::CopyFile is not supported yet");
+    return client_->Copy(src, dest);
   }
 
   Result<std::shared_ptr<io::InputStream>> OpenInputStream(const std::string& path) {
@@ -253,14 +285,16 @@ class HadoopFileSystem::Impl {
 
   bool IsDirectory(const std::string& path) {
     io::HdfsPathInfo info;
-    Status status = client_->GetPathInfo(path, &info);
-    if (!status.ok()) {
-      return false;
-    }
-    if (info.kind == io::ObjectType::DIRECTORY) {
-      return true;
-    }
-    return false;
+    return GetPathInfo(path, &info) && info.kind == io::ObjectType::DIRECTORY;
+  }
+
+  bool IsFile(const std::string& path) {
+    io::HdfsPathInfo info;
+    return GetPathInfo(path, &info) && info.kind == io::ObjectType::FILE;
+  }
+
+  bool GetPathInfo(const std::string& path, io::HdfsPathInfo* info) {
+    return client_->GetPathInfo(path, info).ok();
   }
 
   TimePoint ToTimePoint(int secs) {
@@ -471,12 +505,12 @@ Result<std::shared_ptr<io::RandomAccessFile>> HadoopFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> HadoopFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   return impl_->OpenOutputStream(path);
 }
 
 Result<std::shared_ptr<io::OutputStream>> HadoopFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   return impl_->OpenAppendStream(path);
 }
 
diff --git a/cpp/src/arrow/filesystem/hdfs.h b/cpp/src/arrow/filesystem/hdfs.h
index 72cb469b79d..bc72e1cdc92 100644
--- a/cpp/src/arrow/filesystem/hdfs.h
+++ b/cpp/src/arrow/filesystem/hdfs.h
@@ -92,9 +92,11 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   /// Create a HdfsFileSystem instance from the given options.
   static Result<std::shared_ptr<HadoopFileSystem>> Make(
diff --git a/cpp/src/arrow/filesystem/hdfs_test.cc b/cpp/src/arrow/filesystem/hdfs_test.cc
index 8215455613a..498549b85f0 100644
--- a/cpp/src/arrow/filesystem/hdfs_test.cc
+++ b/cpp/src/arrow/filesystem/hdfs_test.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <chrono>
 #include <memory>
 #include <sstream>
 #include <string>
@@ -66,9 +67,9 @@ TEST(TestHdfsOptions, FromUri) {
   ASSERT_EQ(options.connection_config.user, "");
 }
 
-class TestHadoopFileSystem : public ::testing::Test {
+class HadoopFileSystemTestMixin {
  public:
-  void SetUp() override {
+  void MakeFileSystem() {
     const char* host = std::getenv("ARROW_HDFS_TEST_HOST");
     const char* port = std::getenv("ARROW_HDFS_TEST_PORT");
     const char* user = std::getenv("ARROW_HDFS_TEST_USER");
@@ -91,9 +92,19 @@ class TestHadoopFileSystem : public ::testing::Test {
       return;
     }
     loaded_driver_ = true;
-    fs_ = std::make_shared<SubTreeFileSystem>("", *result);
+    fs_ = *result;
   }
 
+ protected:
+  HdfsOptions options_;
+  bool loaded_driver_ = false;
+  std::shared_ptr<FileSystem> fs_;
+};
+
+class TestHadoopFileSystem : public ::testing::Test, public HadoopFileSystemTestMixin {
+ public:
+  void SetUp() override { MakeFileSystem(); }
+
   void TestFileSystemFromUri() {
     std::stringstream ss;
     ss << "hdfs://" << options_.connection_config.host << ":"
@@ -176,17 +187,11 @@ class TestHadoopFileSystem : public ::testing::Test {
     ASSERT_OK(fs_->DeleteDir(base_dir + "AB"));
     AssertFileInfo(fs_.get(), base_dir + "AB", FileType::NotFound);
   }
-
- protected:
-  std::shared_ptr<FileSystem> fs_;
-  HdfsOptions options_;
-  bool loaded_driver_ = false;
 };
 
-#define SKIP_IF_NO_DRIVER()                           \
-  if (!this->loaded_driver_) {                        \
-    ARROW_LOG(INFO) << "Driver not loaded, skipping"; \
-    return;                                           \
+#define SKIP_IF_NO_DRIVER()                        \
+  if (!this->loaded_driver_) {                     \
+    GTEST_SKIP() << "Driver not loaded, skipping"; \
   }
 
 TEST_F(TestHadoopFileSystem, CreateDirDeleteDir) {
@@ -308,5 +313,43 @@ TEST_F(TestHadoopFileSystem, FileSystemFromUri) {
   this->TestFileSystemFromUri();
 }
 
+class TestHadoopFileSystemGeneric : public ::testing::Test,
+                                    public HadoopFileSystemTestMixin,
+                                    public GenericFileSystemTest {
+ public:
+  void SetUp() override {
+    MakeFileSystem();
+    SKIP_IF_NO_DRIVER();
+    timestamp_ =
+        static_cast<int64_t>(std::chrono::time_point_cast<std::chrono::nanoseconds>(
+                                 std::chrono::steady_clock::now())
+                                 .time_since_epoch()
+                                 .count());
+  }
+
+ protected:
+  bool allow_write_file_over_dir() const override { return true; }
+  bool allow_move_dir_over_non_empty_dir() const override { return true; }
+  bool have_implicit_directories() const override { return true; }
+  bool allow_append_to_new_file() const override { return false; }
+
+  std::shared_ptr<FileSystem> GetEmptyFileSystem() override {
+    // Since the HDFS contents are kept persistently between test runs,
+    // make sure each test gets a pristine fresh directory.
+    std::stringstream ss;
+    ss << "GenericTest" << timestamp_ << "-" << test_num_++;
+    const auto subdir = ss.str();
+    ARROW_EXPECT_OK(fs_->CreateDir(subdir));
+    return std::make_shared<SubTreeFileSystem>(subdir, fs_);
+  }
+
+  static int test_num_;
+  int64_t timestamp_;
+};
+
+int TestHadoopFileSystemGeneric::test_num_ = 1;
+
+GENERIC_FS_TEST_FUNCTIONS(TestHadoopFileSystemGeneric);
+
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc
index 490bacea413..775fd746aa6 100644
--- a/cpp/src/arrow/filesystem/localfs.cc
+++ b/cpp/src/arrow/filesystem/localfs.cc
@@ -431,14 +431,14 @@ Result<std::shared_ptr<io::OutputStream>> OpenOutputStreamGeneric(const std::str
 }  // namespace
 
 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   bool truncate = true;
   bool append = false;
   return OpenOutputStreamGeneric(path, truncate, append);
 }
 
 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   bool truncate = false;
   bool append = true;
   return OpenOutputStreamGeneric(path, truncate, append);
diff --git a/cpp/src/arrow/filesystem/localfs.h b/cpp/src/arrow/filesystem/localfs.h
index d660dd36a5d..f8e77aee591 100644
--- a/cpp/src/arrow/filesystem/localfs.h
+++ b/cpp/src/arrow/filesystem/localfs.h
@@ -91,9 +91,11 @@ class ARROW_EXPORT LocalFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
  protected:
   LocalFileSystemOptions options_;
diff --git a/cpp/src/arrow/filesystem/mockfs.cc b/cpp/src/arrow/filesystem/mockfs.cc
index 294cc85531a..f2d2f87263e 100644
--- a/cpp/src/arrow/filesystem/mockfs.cc
+++ b/cpp/src/arrow/filesystem/mockfs.cc
@@ -31,6 +31,8 @@
 #include "arrow/filesystem/util_internal.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/variant.h"
@@ -51,6 +53,7 @@ struct File {
   TimePoint mtime;
   std::string name;
   std::shared_ptr<Buffer> data;
+  std::shared_ptr<const KeyValueMetadata> metadata;
 
   File(TimePoint mtime, std::string name) : mtime(mtime), name(std::move(name)) {}
 
@@ -230,6 +233,19 @@ class MockFSOutputStream : public io::OutputStream {
   bool closed_;
 };
 
+class MockFSInputStream : public io::BufferReader {
+ public:
+  explicit MockFSInputStream(const File& file)
+      : io::BufferReader(file.data), metadata_(file.metadata) {}
+
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override {
+    return metadata_;
+  }
+
+ protected:
+  std::shared_ptr<const KeyValueMetadata> metadata_;
+};
+
 }  // namespace
 
 std::ostream& operator<<(std::ostream& os, const MockDirInfo& di) {
@@ -356,8 +372,9 @@ class MockFileSystem::Impl {
     }
   }
 
-  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path,
-                                                             bool append) {
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path, bool append,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) {
     auto parts = SplitAbstractPath(path);
     RETURN_NOT_OK(ValidateAbstractPathParts(parts));
 
@@ -379,6 +396,7 @@ class MockFileSystem::Impl {
     } else {
       return NotAFile(path);
     }
+    file->metadata = metadata;
     auto ptr = std::make_shared<MockFSOutputStream>(file, pool);
     if (append && file->data) {
       RETURN_NOT_OK(ptr->Write(file->data->data(), file->data->size()));
@@ -397,12 +415,7 @@ class MockFileSystem::Impl {
     if (!entry->is_file()) {
       return NotAFile(path);
     }
-    const auto& file = entry->as_file();
-    if (file.data) {
-      return std::make_shared<io::BufferReader>(file.data);
-    } else {
-      return std::make_shared<io::BufferReader>("");
-    }
+    return std::make_shared<MockFSInputStream>(entry->as_file());
   }
 };
 
@@ -536,13 +549,13 @@ Result<FileInfo> MockFileSystem::GetFileInfo(const std::string& path) {
   return info;
 }
 
-Result<std::vector<FileInfo>> MockFileSystem::GetFileInfo(const FileSelector& selector) {
+Result<FileInfoVector> MockFileSystem::GetFileInfo(const FileSelector& selector) {
   auto parts = SplitAbstractPath(selector.base_dir);
   RETURN_NOT_OK(ValidateAbstractPathParts(parts));
 
   auto guard = impl_->lock_guard();
 
-  std::vector<FileInfo> results;
+  FileInfoVector results;
 
   Entry* base_dir = impl_->FindEntry(parts);
   if (base_dir == nullptr) {
@@ -685,17 +698,17 @@ Result<std::shared_ptr<io::RandomAccessFile>> MockFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   auto guard = impl_->lock_guard();
 
-  return impl_->OpenOutputStream(path, false /* append */);
+  return impl_->OpenOutputStream(path, /*append=*/false, metadata);
 }
 
 Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   auto guard = impl_->lock_guard();
 
-  return impl_->OpenOutputStream(path, true /* append */);
+  return impl_->OpenOutputStream(path, /*append=*/true, metadata);
 }
 
 std::vector<MockDirInfo> MockFileSystem::AllDirs() {
@@ -746,6 +759,20 @@ Result<std::shared_ptr<FileSystem>> MockFileSystem::Make(
   return fs;
 }
 
+FileInfoGenerator MockAsyncFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto maybe_infos = GetFileInfo(select);
+  if (maybe_infos.ok()) {
+    // Return the FileInfo entries one by one
+    const auto& infos = *maybe_infos;
+    std::vector<FileInfoVector> chunks(infos.size());
+    std::transform(infos.begin(), infos.end(), chunks.begin(),
+                   [](const FileInfo& info) { return FileInfoVector{info}; });
+    return MakeVectorGenerator(std::move(chunks));
+  } else {
+    return MakeFailingGenerator(maybe_infos);
+  }
+}
+
 }  // namespace internal
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/filesystem/mockfs.h b/cpp/src/arrow/filesystem/mockfs.h
index 212caf6d7fe..378f30d295d 100644
--- a/cpp/src/arrow/filesystem/mockfs.h
+++ b/cpp/src/arrow/filesystem/mockfs.h
@@ -90,9 +90,11 @@ class ARROW_EXPORT MockFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   // Contents-dumping helpers to ease testing.
   // Output is lexicographically-ordered by full path.
@@ -114,6 +116,17 @@ class ARROW_EXPORT MockFileSystem : public FileSystem {
   std::unique_ptr<Impl> impl_;
 };
 
+class ARROW_EXPORT MockAsyncFileSystem : public MockFileSystem {
+ public:
+  explicit MockAsyncFileSystem(TimePoint current_time,
+                               const io::IOContext& io_context = io::default_io_context())
+      : MockFileSystem(current_time, io_context) {
+    default_async_is_sync_ = false;
+  }
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+};
+
 }  // namespace internal
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 400442d2156..cee05647dab 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -40,6 +40,7 @@
 #include <aws/core/Region.h>
 #include <aws/core/auth/AWSCredentials.h>
 #include <aws/core/auth/AWSCredentialsProviderChain.h>
+#include <aws/core/auth/STSCredentialsProvider.h>
 #include <aws/core/client/RetryStrategy.h>
 #include <aws/core/http/HttpResponse.h>
 #include <aws/core/utils/logging/ConsoleLogSystem.h>
@@ -61,9 +62,12 @@
 #include <aws/s3/model/HeadObjectRequest.h>
 #include <aws/s3/model/ListBucketsResult.h>
 #include <aws/s3/model/ListObjectsV2Request.h>
+#include <aws/s3/model/ObjectCannedACL.h>
 #include <aws/s3/model/PutObjectRequest.h>
 #include <aws/s3/model/UploadPartRequest.h>
 
+#include "arrow/util/windows_fixup.h"
+
 #include "arrow/buffer.h"
 #include "arrow/filesystem/filesystem.h"
 #include "arrow/filesystem/path_util.h"
@@ -74,19 +78,21 @@
 #include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/atomic_shared_ptr.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/optional.h"
 #include "arrow/util/task_group.h"
 #include "arrow/util/thread_pool.h"
-#include "arrow/util/windows_fixup.h"
 
 namespace arrow {
 
 using internal::TaskGroup;
 using internal::Uri;
+using io::internal::SubmitIO;
 
 namespace fs {
 
@@ -170,16 +176,44 @@ Status EnsureS3Initialized() {
   return Status::OK();
 }
 
+// -----------------------------------------------------------------------
+// S3ProxyOptions implementation
+
+Result<S3ProxyOptions> S3ProxyOptions::FromUri(const Uri& uri) {
+  S3ProxyOptions options;
+
+  options.scheme = uri.scheme();
+  options.host = uri.host();
+  options.port = uri.port();
+  options.username = uri.username();
+  options.password = uri.password();
+
+  return options;
+}
+
+Result<S3ProxyOptions> S3ProxyOptions::FromUri(const std::string& uri_string) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri);
+}
+
+bool S3ProxyOptions::Equals(const S3ProxyOptions& other) const {
+  return (scheme == other.scheme && host == other.host && port == other.port &&
+          username == other.username && password == other.password);
+}
+
 // -----------------------------------------------------------------------
 // S3Options implementation
 
 void S3Options::ConfigureDefaultCredentials() {
   credentials_provider =
       std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
+  credentials_kind = S3CredentialsKind::Default;
 }
 
 void S3Options::ConfigureAnonymousCredentials() {
   credentials_provider = std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
+  credentials_kind = S3CredentialsKind::Anonymous;
 }
 
 void S3Options::ConfigureAccessKey(const std::string& access_key,
@@ -187,6 +221,7 @@ void S3Options::ConfigureAccessKey(const std::string& access_key,
                                    const std::string& session_token) {
   credentials_provider = std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(
       ToAwsString(access_key), ToAwsString(secret_key), ToAwsString(session_token));
+  credentials_kind = S3CredentialsKind::Explicit;
 }
 
 void S3Options::ConfigureAssumeRoleCredentials(
@@ -196,6 +231,16 @@ void S3Options::ConfigureAssumeRoleCredentials(
   credentials_provider = std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
       ToAwsString(role_arn), ToAwsString(session_name), ToAwsString(external_id),
       load_frequency, stsClient);
+  credentials_kind = S3CredentialsKind::Role;
+}
+
+void S3Options::ConfigureAssumeRoleWithWebIdentityCredentials() {
+  // The AWS SDK uses environment variables AWS_DEFAULT_REGION,
+  // AWS_ROLE_ARN, AWS_WEB_IDENTITY_TOKEN_FILE and AWS_ROLE_SESSION_NAME
+  // to configure the required credentials
+  credentials_provider =
+      std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>();
+  credentials_kind = S3CredentialsKind::WebIdentity;
 }
 
 std::string S3Options::GetAccessKey() const {
@@ -247,6 +292,12 @@ S3Options S3Options::FromAssumeRole(
   return options;
 }
 
+S3Options S3Options::FromAssumeRoleWithWebIdentity() {
+  S3Options options;
+  options.ConfigureAssumeRoleWithWebIdentityCredentials();
+  return options;
+}
+
 Result<S3Options> S3Options::FromUri(const Uri& uri, std::string* out_path) {
   S3Options options;
 
@@ -315,6 +366,8 @@ Result<S3Options> S3Options::FromUri(const std::string& uri_string,
 bool S3Options::Equals(const S3Options& other) const {
   return (region == other.region && endpoint_override == other.endpoint_override &&
           scheme == other.scheme && background_writes == other.background_writes &&
+          credentials_kind == other.credentials_kind &&
+          proxy_options.Equals(other.proxy_options) &&
           GetAccessKey() == other.GetAccessKey() &&
           GetSecretKey() == other.GetSecretKey() &&
           GetSessionToken() == other.GetSessionToken());
@@ -366,6 +419,14 @@ struct S3Path {
     }
   }
 
+  Aws::String ToAwsString() const {
+    Aws::String res(bucket.begin(), bucket.end());
+    res.reserve(bucket.size() + key.size() + 1);
+    res += kSep;
+    res.append(key.begin(), key.end());
+    return res;
+  }
+
   Aws::String ToURLEncodedAwsString() const {
     // URL-encode individual parts, not the '/' separator
     Aws::String res;
@@ -513,6 +574,31 @@ class ClientBuilder {
     }
 
     const bool use_virtual_addressing = options_.endpoint_override.empty();
+
+    /// Set proxy options if provided
+    if (!options_.proxy_options.scheme.empty()) {
+      if (options_.proxy_options.scheme == "http") {
+        client_config_.proxyScheme = Aws::Http::Scheme::HTTP;
+      } else if (options_.proxy_options.scheme == "https") {
+        client_config_.proxyScheme = Aws::Http::Scheme::HTTPS;
+      } else {
+        return Status::Invalid("Invalid proxy connection scheme '",
+                               options_.proxy_options.scheme, "'");
+      }
+    }
+    if (!options_.proxy_options.host.empty()) {
+      client_config_.proxyHost = ToAwsString(options_.proxy_options.host);
+    }
+    if (options_.proxy_options.port != -1) {
+      client_config_.proxyPort = options_.proxy_options.port;
+    }
+    if (!options_.proxy_options.username.empty()) {
+      client_config_.proxyUserName = ToAwsString(options_.proxy_options.username);
+    }
+    if (!options_.proxy_options.password.empty()) {
+      client_config_.proxyPassword = ToAwsString(options_.proxy_options.password);
+    }
+
     return std::make_shared<S3Client>(
         credentials_provider_, client_config_,
         Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
@@ -630,6 +716,103 @@ Result<S3Model::GetObjectResult> GetObjectRange(Aws::S3::S3Client* client,
   return OutcomeToResult(client->GetObject(req));
 }
 
+template <typename ObjectResult>
+std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& result) {
+  auto md = std::make_shared<KeyValueMetadata>();
+
+  auto push = [&](std::string k, const Aws::String& v) {
+    if (!v.empty()) {
+      md->Append(std::move(k), FromAwsString(v).to_string());
+    }
+  };
+  auto push_datetime = [&](std::string k, const Aws::Utils::DateTime& v) {
+    if (v != Aws::Utils::DateTime(0.0)) {
+      push(std::move(k), v.ToGmtString(Aws::Utils::DateFormat::ISO_8601));
+    }
+  };
+
+  md->Append("Content-Length", std::to_string(result.GetContentLength()));
+  push("Cache-Control", result.GetCacheControl());
+  push("Content-Type", result.GetContentType());
+  push("Content-Language", result.GetContentLanguage());
+  push("ETag", result.GetETag());
+  push("VersionId", result.GetVersionId());
+  push_datetime("Last-Modified", result.GetLastModified());
+  push_datetime("Expires", result.GetExpires());
+  // NOTE the "canned ACL" isn't available for reading (one can get an expanded
+  // ACL using a separate GetObjectAcl request)
+  return md;
+}
+
+template <typename ObjectRequest>
+struct ObjectMetadataSetter {
+  using Setter = std::function<Status(const std::string& value, ObjectRequest* req)>;
+
+  static std::unordered_map<std::string, Setter> GetSetters() {
+    return {{"ACL", CannedACLSetter()},
+            {"Cache-Control", StringSetter(&ObjectRequest::SetCacheControl)},
+            {"Content-Type", StringSetter(&ObjectRequest::SetContentType)},
+            {"Content-Language", StringSetter(&ObjectRequest::SetContentLanguage)},
+            {"Expires", DateTimeSetter(&ObjectRequest::SetExpires)}};
+  }
+
+ private:
+  static Setter StringSetter(void (ObjectRequest::*req_method)(Aws::String&&)) {
+    return [req_method](const std::string& v, ObjectRequest* req) {
+      (req->*req_method)(ToAwsString(v));
+      return Status::OK();
+    };
+  }
+
+  static Setter DateTimeSetter(
+      void (ObjectRequest::*req_method)(Aws::Utils::DateTime&&)) {
+    return [req_method](const std::string& v, ObjectRequest* req) {
+      (req->*req_method)(
+          Aws::Utils::DateTime(v.data(), Aws::Utils::DateFormat::ISO_8601));
+      return Status::OK();
+    };
+  }
+
+  static Setter CannedACLSetter() {
+    return [](const std::string& v, ObjectRequest* req) {
+      ARROW_ASSIGN_OR_RAISE(auto acl, ParseACL(v));
+      req->SetACL(acl);
+      return Status::OK();
+    };
+  }
+
+  static Result<S3Model::ObjectCannedACL> ParseACL(const std::string& v) {
+    if (v.empty()) {
+      return S3Model::ObjectCannedACL::NOT_SET;
+    }
+    auto acl = S3Model::ObjectCannedACLMapper::GetObjectCannedACLForName(ToAwsString(v));
+    if (acl == S3Model::ObjectCannedACL::NOT_SET) {
+      // XXX This actually never happens, as the AWS SDK dynamically
+      // expands the enum range using Aws::GetEnumOverflowContainer()
+      return Status::Invalid("Invalid S3 canned ACL: '", v, "'");
+    }
+    return acl;
+  }
+};
+
+template <typename ObjectRequest>
+Status SetObjectMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
+                         ObjectRequest* req) {
+  static auto setters = ObjectMetadataSetter<ObjectRequest>::GetSetters();
+
+  DCHECK_NE(metadata, nullptr);
+  const auto& keys = metadata->keys();
+  const auto& values = metadata->values();
+
+  for (size_t i = 0; i < keys.size(); ++i) {
+    auto it = setters.find(keys[i]);
+    if (it != setters.end()) {
+      RETURN_NOT_OK(it->second(values[i], req));
+    }
+  }
+  return Status::OK();
+}
+
 // A RandomAccessFile that reads from a S3 object
 class ObjectInputFile final : public io::RandomAccessFile {
  public:
@@ -666,6 +849,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
     }
     content_length_ = outcome.GetResult().GetContentLength();
     DCHECK_GE(content_length_, 0);
+    metadata_ = GetObjectMetadata(outcome.GetResult());
     return Status::OK();
   }
 
@@ -688,6 +872,15 @@ class ObjectInputFile final : public io::RandomAccessFile {
 
   // RandomAccessFile APIs
 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override {
+    return metadata_;
+  }
+
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const io::IOContext& io_context) override {
+    return metadata_;
+  }
+
   Status Close() override {
     client_ = nullptr;
     closed_ = true;
@@ -771,6 +964,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
   bool closed_ = false;
   int64_t pos_ = 0;
   int64_t content_length_ = kNoSize;
+  std::shared_ptr<const KeyValueMetadata> metadata_;
 };
 
 // Minimum size for each part of a multipart upload, except for the last part.
@@ -787,10 +981,13 @@ class ObjectOutputStream final : public io::OutputStream {
  public:
   ObjectOutputStream(std::shared_ptr<Aws::S3::S3Client> client,
                      const io::IOContext& io_context, const S3Path& path,
-                     const S3Options& options)
+                     const S3Options& options,
+                     const std::shared_ptr<const KeyValueMetadata>& metadata)
       : client_(std::move(client)),
         io_context_(io_context),
         path_(path),
+        metadata_(metadata),
+        default_metadata_(options.default_metadata),
         background_writes_(options.background_writes) {}
 
   ~ObjectOutputStream() override {
@@ -804,6 +1001,11 @@ class ObjectOutputStream final : public io::OutputStream {
     S3Model::CreateMultipartUploadRequest req;
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
+    if (metadata_ && metadata_->size() != 0) {
+      RETURN_NOT_OK(SetObjectMetadata(metadata_, &req));
+    } else if (default_metadata_ && default_metadata_->size() != 0) {
+      RETURN_NOT_OK(SetObjectMetadata(default_metadata_, &req));
+    }
 
     auto outcome = client_->CreateMultipartUpload(req);
     if (!outcome.IsSuccess()) {
@@ -994,10 +1196,9 @@ class ObjectOutputStream final : public io::OutputStream {
         ++upload_state_->parts_in_progress;
       }
       auto client = client_;
-      ARROW_ASSIGN_OR_RAISE(auto fut, io_context_.executor()->Submit(
-                                          io_context_.stop_token(), [client, req]() {
-                                            return client->UploadPart(req);
-                                          }));
+      ARROW_ASSIGN_OR_RAISE(auto fut, SubmitIO(io_context_, [client, req]() {
+                              return client->UploadPart(req);
+                            }));
       // The closure keeps the buffer and the upload state alive
       auto state = upload_state_;
       auto part_number = part_number_;
@@ -1074,6 +1275,8 @@ class ObjectOutputStream final : public io::OutputStream {
   std::shared_ptr<Aws::S3::S3Client> client_;
   const io::IOContext io_context_;
   const S3Path path_;
+  const std::shared_ptr<const KeyValueMetadata> metadata_;
+  const std::shared_ptr<const KeyValueMetadata> default_metadata_;
   const bool background_writes_;
 
   Aws::String upload_id_;
@@ -1126,6 +1329,11 @@ struct TreeWalker : public std::enable_shared_from_this<TreeWalker> {
 
   template <typename... Args>
   static Status Walk(Args&&... args) {
+    return WalkAsync(std::forward<Args>(args)...).status();
+  }
+
+  template <typename... Args>
+  static Future<> WalkAsync(Args&&... args) {
     auto self = std::make_shared<TreeWalker>(std::forward<Args>(args)...);
     return self->DoWalk();
   }
@@ -1147,12 +1355,12 @@ struct TreeWalker : public std::enable_shared_from_this<TreeWalker> {
   std::shared_ptr<TaskGroup> task_group_;
   std::mutex mutex_;
 
-  Status DoWalk() {
+  Future<> DoWalk() {
     task_group_ =
         TaskGroup::MakeThreaded(io_context_.executor(), io_context_.stop_token());
     WalkChild(base_dir_, /*nesting_depth=*/0);
     // When this returns, ListObjectsV2 tasks either have finished or will exit early
-    return task_group_->Finish();
+    return task_group_->FinishAsync();
   }
 
   bool ok() const { return task_group_->ok(); }
@@ -1249,7 +1457,7 @@ struct TreeWalker : public std::enable_shared_from_this<TreeWalker> {
 // -----------------------------------------------------------------------
 // S3 filesystem implementation
 
-class S3FileSystem::Impl {
+class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Impl> {
  public:
   ClientBuilder builder_;
   io::IOContext io_context_;
@@ -1284,9 +1492,14 @@ class S3FileSystem::Impl {
   Status CreateBucket(const std::string& bucket) {
     S3Model::CreateBucketConfiguration config;
     S3Model::CreateBucketRequest req;
-    config.SetLocationConstraint(
-        S3Model::BucketLocationConstraintMapper::GetBucketLocationConstraintForName(
-            ToAwsString(options().region)));
+    auto _region = region();
+    // AWS S3 treats the us-east-1 differently than other regions
+    // https://docs.aws.amazon.com/cli/latest/reference/s3api/create-bucket.html
+    if (_region != "us-east-1") {
+      config.SetLocationConstraint(
+          S3Model::BucketLocationConstraintMapper::GetBucketLocationConstraintForName(
+              ToAwsString(_region)));
+    }
     req.SetBucket(ToAwsString(bucket));
     req.SetCreateBucketConfiguration(config);
 
@@ -1326,8 +1539,9 @@ class S3FileSystem::Impl {
     S3Model::CopyObjectRequest req;
     req.SetBucket(ToAwsString(dest_path.bucket));
     req.SetKey(ToAwsString(dest_path.key));
-    // Copy source "Must be URL-encoded" according to AWS SDK docs.
-    req.SetCopySource(src_path.ToURLEncodedAwsString());
+    // ARROW-13048: Copy source "Must be URL-encoded" according to AWS SDK docs.
+    // However at least in 1.8 and 1.9 the SDK URL-encodes the path for you
+    req.SetCopySource(src_path.ToAwsString());
     return OutcomeToStatus(
         std::forward_as_tuple("When copying key '", src_path.key, "' in bucket '",
                               src_path.bucket, "' to key '", dest_path.key,
@@ -1404,32 +1618,20 @@ class S3FileSystem::Impl {
     return Status::OK();
   }
 
-  // Workhorse for GetTargetStats(FileSelector...)
-  Status Walk(const FileSelector& select, const std::string& bucket,
-              const std::string& key, std::vector<FileInfo>* out) {
-    bool is_empty = true;
+  // A helper class for Walk and WalkAsync
+  struct FileInfoCollector {
+    FileInfoCollector(std::string bucket, std::string key, const FileSelector& select)
+        : bucket(std::move(bucket)),
+          key(std::move(key)),
+          allow_not_found(select.allow_not_found) {}
 
-    auto handle_error = [&](const AWSError<S3Errors>& error) -> Status {
-      if (select.allow_not_found && IsNotFound(error)) {
-        return Status::OK();
-      }
-      return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
-                                                 "' in bucket '", bucket, "': "),
-                           error);
-    };
-
-    auto handle_recursion = [&](int32_t nesting_depth) -> Result<bool> {
-      RETURN_NOT_OK(CheckNestingDepth(nesting_depth));
-      return select.recursive && nesting_depth <= select.max_recursion;
-    };
-
-    auto handle_results = [&](const std::string& prefix,
-                              const S3Model::ListObjectsV2Result& result) -> Status {
+    Status Collect(const std::string& prefix, const S3Model::ListObjectsV2Result& result,
+                   std::vector<FileInfo>* out) {
       // Walk "directories"
-      for (const auto& prefix : result.GetCommonPrefixes()) {
+      for (const auto& child_prefix : result.GetCommonPrefixes()) {
         is_empty = false;
         const auto child_key =
-            internal::RemoveTrailingSlash(FromAwsString(prefix.GetPrefix()));
+            internal::RemoveTrailingSlash(FromAwsString(child_prefix.GetPrefix()));
         std::stringstream child_path;
         child_path << bucket << kSep << child_key;
         FileInfo info;
@@ -1453,6 +1655,49 @@ class S3FileSystem::Impl {
         out->push_back(std::move(info));
       }
       return Status::OK();
+    }
+
+    Status Finish(Impl* impl) {
+      // If no contents were found, perhaps it's an empty "directory",
+      // or perhaps it's a nonexistent entry.  Check.
+      if (is_empty && !allow_not_found) {
+        bool is_actually_empty;
+        RETURN_NOT_OK(impl->IsEmptyDirectory(bucket, key, &is_actually_empty));
+        if (!is_actually_empty) {
+          return PathNotFound(bucket, key);
+        }
+      }
+      return Status::OK();
+    }
+
+    std::string bucket;
+    std::string key;
+    bool allow_not_found;
+    bool is_empty = true;
+  };
+
+  // Workhorse for GetFileInfo(FileSelector...)
+  Status Walk(const FileSelector& select, const std::string& bucket,
+              const std::string& key, std::vector<FileInfo>* out) {
+    FileInfoCollector collector(bucket, key, select);
+
+    auto handle_error = [&](const AWSError<S3Errors>& error) -> Status {
+      if (select.allow_not_found && IsNotFound(error)) {
+        return Status::OK();
+      }
+      return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
+                                                 "' in bucket '", bucket, "': "),
+                           error);
+    };
+
+    auto handle_recursion = [&](int32_t nesting_depth) -> Result<bool> {
+      RETURN_NOT_OK(CheckNestingDepth(nesting_depth));
+      return select.recursive && nesting_depth <= select.max_recursion;
+    };
+
+    auto handle_results = [&](const std::string& prefix,
+                              const S3Model::ListObjectsV2Result& result) -> Status {
+      return collector.Collect(prefix, result, out);
     };
 
     RETURN_NOT_OK(TreeWalker::Walk(client_, io_context_, bucket, key, kListObjectsMaxKeys,
@@ -1460,17 +1705,62 @@ class S3FileSystem::Impl {
 
     // If no contents were found, perhaps it's an empty "directory",
     // or perhaps it's a nonexistent entry.  Check.
-    if (is_empty && !select.allow_not_found) {
-      RETURN_NOT_OK(IsEmptyDirectory(bucket, key, &is_empty));
-      if (!is_empty) {
-        return PathNotFound(bucket, key);
-      }
-    }
+    RETURN_NOT_OK(collector.Finish(this));
     // Sort results for convenience, since they can come massively out of order
     std::sort(out->begin(), out->end(), FileInfo::ByPath{});
     return Status::OK();
   }
 
+  // Workhorse for GetFileInfoGenerator(FileSelector...)
+  FileInfoGenerator WalkAsync(const FileSelector& select, const std::string& bucket,
+                              const std::string& key) {
+    PushGenerator<std::vector<FileInfo>> gen;
+    auto producer = gen.producer();
+    auto collector = std::make_shared<FileInfoCollector>(bucket, key, select);
+    auto self = shared_from_this();
+
+    auto handle_error = [select, bucket, key](const AWSError<S3Errors>& error) -> Status {
+      if (select.allow_not_found && IsNotFound(error)) {
+        return Status::OK();
+      }
+      return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
+                                                 "' in bucket '", bucket, "': "),
+                           error);
+    };
+
+    auto handle_recursion = [producer, select,
+                             self](int32_t nesting_depth) -> Result<bool> {
+      if (producer.is_closed()) {
+        return false;
+      }
+      RETURN_NOT_OK(self->CheckNestingDepth(nesting_depth));
+      return select.recursive && nesting_depth <= select.max_recursion;
+    };
+
+    auto handle_results =
+        [collector, producer](
+            const std::string& prefix,
+            const S3Model::ListObjectsV2Result& result) mutable -> Status {
+      std::vector<FileInfo> out;
+      RETURN_NOT_OK(collector->Collect(prefix, result, &out));
+      if (!out.empty()) {
+        producer.Push(std::move(out));
+      }
+      return Status::OK();
+    };
+
+    TreeWalker::WalkAsync(client_, io_context_, bucket, key, kListObjectsMaxKeys,
+                          handle_results, handle_error, handle_recursion)
+        .AddCallback([collector, producer, self](const Status& status) mutable {
+          auto st = collector->Finish(self.get());
+          if (!st.ok()) {
+            producer.Push(st);
+          }
+          producer.Close();
+        });
+    return gen;
+  }
+
   Status WalkForDeleteDir(const std::string& bucket, const std::string& key,
                           std::vector<std::string>* file_keys,
                           std::vector<std::string>* dir_keys) {
@@ -1510,11 +1800,7 @@ class S3FileSystem::Impl {
     struct DeleteCallback {
       const std::string bucket;
 
-      Status operator()(const Result<S3Model::DeleteObjectsOutcome>& result) {
-        if (!result.ok()) {
-          return result.status();
-        }
-        const auto& outcome = *result;
+      Status operator()(const S3Model::DeleteObjectsOutcome& outcome) {
         if (!outcome.IsSuccess()) {
           return ErrorToStatus(outcome.GetError());
         }
@@ -1550,10 +1836,9 @@ class S3FileSystem::Impl {
       }
       req.SetBucket(ToAwsString(bucket));
       req.SetDelete(std::move(del));
-      ARROW_ASSIGN_OR_RAISE(auto fut, io_context_.executor()->Submit(
-                                          io_context_.stop_token(), [client, req]() {
-                                            return client->DeleteObjects(req);
-                                          }));
+      ARROW_ASSIGN_OR_RAISE(auto fut, SubmitIO(io_context_, [client, req]() {
+                              return client->DeleteObjects(req);
+                            }));
       futures.push_back(std::move(fut).Then(delete_cb));
     }
 
@@ -1598,17 +1883,32 @@ class S3FileSystem::Impl {
     return Status::OK();
   }
 
-  Status ListBuckets(std::vector<std::string>* out) {
-    out->clear();
-    auto outcome = client_->ListBuckets();
+  static Result<std::vector<std::string>> ProcessListBuckets(
+      const Aws::S3::Model::ListBucketsOutcome& outcome) {
     if (!outcome.IsSuccess()) {
       return ErrorToStatus(std::forward_as_tuple("When listing buckets: "),
                            outcome.GetError());
     }
+    std::vector<std::string> buckets;
+    buckets.reserve(outcome.GetResult().GetBuckets().size());
     for (const auto& bucket : outcome.GetResult().GetBuckets()) {
-      out->emplace_back(FromAwsString(bucket.GetName()));
+      buckets.emplace_back(FromAwsString(bucket.GetName()));
     }
-    return Status::OK();
+    return buckets;
+  }
+
+  Result<std::vector<std::string>> ListBuckets() {
+    auto outcome = client_->ListBuckets();
+    return ProcessListBuckets(outcome);
+  }
+
+  Future<std::vector<std::string>> ListBucketsAsync(io::IOContext ctx) {
+    auto self = shared_from_this();
+    return DeferNotOk(SubmitIO(ctx, [self]() { return self->client_->ListBuckets(); }))
+        // TODO(ARROW-12655) Change to Then(Impl::ProcessListBuckets)
+        .Then([](const Aws::S3::Model::ListBucketsOutcome& outcome) {
+          return Impl::ProcessListBuckets(outcome);
+        });
   }
 
   Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const std::string& s,
@@ -1641,7 +1941,7 @@ class S3FileSystem::Impl {
 };
 
 S3FileSystem::S3FileSystem(const S3Options& options, const io::IOContext& io_context)
-    : FileSystem(io_context), impl_(new Impl{options, io_context}) {
+    : FileSystem(io_context), impl_(std::make_shared<Impl>(options, io_context)) {
   default_async_is_sync_ = false;
 }
 
@@ -1736,15 +2036,14 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const std::string& s) {
   }
 }
 
-Result<std::vector<FileInfo>> S3FileSystem::GetFileInfo(const FileSelector& select) {
+Result<FileInfoVector> S3FileSystem::GetFileInfo(const FileSelector& select) {
   ARROW_ASSIGN_OR_RAISE(auto base_path, S3Path::FromString(select.base_dir));
 
-  std::vector<FileInfo> results;
+  FileInfoVector results;
 
   if (base_path.empty()) {
     // List all buckets
-    std::vector<std::string> buckets;
-    RETURN_NOT_OK(impl_->ListBuckets(&buckets));
+    ARROW_ASSIGN_OR_RAISE(auto buckets, impl_->ListBuckets());
     for (const auto& bucket : buckets) {
       FileInfo info;
       info.set_path(bucket);
@@ -1762,6 +2061,51 @@ Result<std::vector<FileInfo>> S3FileSystem::GetFileInfo(const FileSelector& sele
   return results;
 }
 
+FileInfoGenerator S3FileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto maybe_base_path = S3Path::FromString(select.base_dir);
+  if (!maybe_base_path.ok()) {
+    return MakeFailingGenerator<FileInfoVector>(maybe_base_path.status());
+  }
+  auto base_path = *std::move(maybe_base_path);
+
+  if (base_path.empty()) {
+    // List all buckets, then possibly recurse
+    PushGenerator<AsyncGenerator<FileInfoVector>> gen;
+    auto producer = gen.producer();
+
+    auto fut = impl_->ListBucketsAsync(io_context());
+    auto impl = impl_->shared_from_this();
+    fut.AddCallback(
+        [producer, select, impl](const Result<std::vector<std::string>>& res) mutable {
+          if (!res.ok()) {
+            producer.Push(res.status());
+            producer.Close();
+            return;
+          }
+          FileInfoVector buckets;
+          for (const auto& bucket : *res) {
+            buckets.push_back(FileInfo{bucket, FileType::Directory});
+          }
+          // Generate all bucket infos
+          auto buckets_fut = Future<FileInfoVector>::MakeFinished(std::move(buckets));
+          producer.Push(MakeSingleFutureGenerator(buckets_fut));
+          if (select.recursive) {
+            // Generate recursive walk for each bucket in turn
+            for (const auto& bucket : *buckets_fut.result()) {
+              producer.Push(impl->WalkAsync(select, bucket.path(), ""));
+            }
+          }
+          producer.Close();
+        });
+
+    return MakeConcatenatedGenerator(
+        AsyncGenerator<AsyncGenerator<FileInfoVector>>{std::move(gen)});
+  }
+
+  // Nominal case -> walk a single bucket
+  return impl_->WalkAsync(select, base_path.bucket, base_path.key);
+}
+
 Status S3FileSystem::CreateDir(const std::string& s, bool recursive) {
   ARROW_ASSIGN_OR_RAISE(auto path, S3Path::FromString(s));
 
@@ -1918,18 +2262,18 @@ Result<std::shared_ptr<io::RandomAccessFile>> S3FileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> S3FileSystem::OpenOutputStream(
-    const std::string& s) {
+    const std::string& s, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   ARROW_ASSIGN_OR_RAISE(auto path, S3Path::FromString(s));
   RETURN_NOT_OK(ValidateFilePath(path));
 
   auto ptr = std::make_shared<ObjectOutputStream>(impl_->client_, io_context(), path,
-                                                  impl_->options());
+                                                  impl_->options(), metadata);
   RETURN_NOT_OK(ptr->Init());
   return ptr;
 }
 
 Result<std::shared_ptr<io::OutputStream>> S3FileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   // XXX Investigate UploadPartCopy? Does it work with source == destination?
   // https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPartCopy.html
   // (but would need to fall back to GET if the current data is < 5 MB)
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index ac384fcba71..1aad4dd7040 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -40,9 +40,38 @@ class STSClient;
 namespace arrow {
 namespace fs {
 
+/// Options for using a proxy for S3
+struct ARROW_EXPORT S3ProxyOptions {
+  std::string scheme;
+  std::string host;
+  int port = -1;
+  std::string username;
+  std::string password;
+
+  /// Initialize from URI such as http://username:password@host:port
+  /// or http://host:port
+  static Result<S3ProxyOptions> FromUri(const std::string& uri);
+  static Result<S3ProxyOptions> FromUri(const ::arrow::internal::Uri& uri);
+
+  bool Equals(const S3ProxyOptions& other) const;
+};
+
+enum class S3CredentialsKind : int8_t {
+  /// Anonymous access (no credentials used)
+  Anonymous,
+  /// Use default AWS credentials, configured through environment variables
+  Default,
+  /// Use explicitly-provided access key pair
+  Explicit,
+  /// Assume role through a role ARN
+  Role,
+  /// Use web identity token to assume role, configured through environment variables
+  WebIdentity
+};
+
 /// Options for the S3FileSystem implementation.
 struct ARROW_EXPORT S3Options {
-  /// AWS region to connect to.
+  /// \brief AWS region to connect to.
   ///
   /// If unset, the AWS SDK will choose a default value.  The exact algorithm
   /// depends on the SDK version.  Before 1.8, the default is hardcoded
@@ -66,12 +95,23 @@ struct ARROW_EXPORT S3Options {
   /// Frequency (in seconds) to refresh temporary credentials from assumed role
   int load_frequency;
 
+  /// If connection is through a proxy, set options here
+  S3ProxyOptions proxy_options;
+
   /// AWS credentials provider
   std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider;
 
+  /// Type of credentials being used. Set along with credentials_provider.
+  S3CredentialsKind credentials_kind = S3CredentialsKind::Default;
+
   /// Whether OutputStream writes will be issued in the background, without blocking.
   bool background_writes = true;
 
+  /// \brief Default metadata for OpenOutputStream.
+  ///
+  /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
+  std::shared_ptr<const KeyValueMetadata> default_metadata;
+
   /// Configure with the default AWS credentials provider chain.
   void ConfigureDefaultCredentials();
 
@@ -88,6 +128,9 @@ struct ARROW_EXPORT S3Options {
       const std::string& external_id = "", int load_frequency = 900,
       const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
 
+  /// Configure with credentials from role assumed using a web identitiy token
+  void ConfigureAssumeRoleWithWebIdentityCredentials();
+
   std::string GetAccessKey() const;
   std::string GetSecretKey() const;
   std::string GetSessionToken() const;
@@ -119,6 +162,11 @@ struct ARROW_EXPORT S3Options {
       const std::string& external_id = "", int load_frequency = 900,
       const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
 
+  /// \brief Initialize from an assumed role with web-identity.
+  /// Uses the AWS SDK which uses environment variables to
+  /// generate temporary credentials.
+  static S3Options FromAssumeRoleWithWebIdentity();
+
   static Result<S3Options> FromUri(const ::arrow::internal::Uri& uri,
                                    std::string* out_path = NULLPTR);
   static Result<S3Options> FromUri(const std::string& uri,
@@ -149,6 +197,8 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
 
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+
   Status CreateDir(const std::string& path, bool recursive = true) override;
 
   Status DeleteDir(const std::string& path) override;
@@ -193,10 +243,12 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   /// It is recommended to enable background_writes unless you prefer
   /// implementing your own background execution strategy.
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   /// Create a S3FileSystem instance from the given options.
   static Result<std::shared_ptr<S3FileSystem>> Make(
@@ -206,7 +258,7 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   explicit S3FileSystem(const S3Options& options, const io::IOContext&);
 
   class Impl;
-  std::unique_ptr<Impl> impl_;
+  std::shared_ptr<Impl> impl_;
 };
 
 enum class S3LogLevel : int8_t { Off, Fatal, Error, Warn, Info, Debug, Trace };
diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
index 36564a70d29..869601b844e 100644
--- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc
+++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
@@ -260,8 +260,10 @@ static void CoalescedRead(benchmark::State& st, S3FileSystem* fs,
     ASSERT_OK_AND_ASSIGN(size, file->GetSize());
     total_items += 1;
 
-    io::internal::ReadRangeCache cache(file, {},
-                                       io::CacheOptions{8192, 64 * 1024 * 1024});
+    io::internal::ReadRangeCache cache(
+        file, {},
+        io::CacheOptions{/*hole_size_limit=*/8192, /*range_size_limit=*/64 * 1024 * 1024,
+                         /*lazy=*/false});
     std::vector<io::ReadRange> ranges;
 
     int64_t offset = 0;
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index c79d9f715be..d73328554d0 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -70,10 +70,14 @@
 #include "arrow/filesystem/test_util.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 
@@ -394,6 +398,7 @@ class TestS3FS : public S3TestMixin {
       ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
       req.SetKey(ToAwsString("somefile"));
       req.SetBody(std::make_shared<std::stringstream>("some data"));
+      req.SetContentType("x-arrow/test");
       ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
     }
   }
@@ -405,6 +410,18 @@ class TestS3FS : public S3TestMixin {
     ASSERT_OK_AND_ASSIGN(fs_, S3FileSystem::Make(options_));
   }
 
+  template <typename Matcher>
+  void AssertMetadataRoundtrip(const std::string& path,
+                               const std::shared_ptr<const KeyValueMetadata>& metadata,
+                               Matcher&& matcher) {
+    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, metadata));
+    ASSERT_OK(output->Close());
+    ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+    ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
+    ASSERT_NE(got_metadata, nullptr);
+    ASSERT_THAT(got_metadata->sorted_pairs(), matcher);
+  }
+
   void TestOpenOutputStream() {
     std::shared_ptr<io::OutputStream> stream;
 
@@ -462,12 +479,12 @@ class TestS3FS : public S3TestMixin {
     // Open file and then lose filesystem reference
     ASSERT_EQ(fs_.use_count(), 1);  // needed for test to work
     std::weak_ptr<S3FileSystem> weak_fs(fs_);
-    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile5"));
+    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile99"));
     fs_.reset();
     ASSERT_OK(stream->Write("some other data"));
     ASSERT_OK(stream->Close());
     ASSERT_TRUE(weak_fs.expired());
-    AssertObjectContents(client_.get(), "bucket", "newfile5", "some other data");
+    AssertObjectContents(client_.get(), "bucket", "newfile99", "some other data");
   }
 
   void TestOpenOutputStreamAbort() {
@@ -641,6 +658,34 @@ TEST_F(TestS3FS, GetFileInfoSelectorRecursive) {
   AssertFileInfo(infos[1], "bucket/somedir/subdir/subfile", FileType::File, 8);
 }
 
+TEST_F(TestS3FS, GetFileInfoGenerator) {
+  FileSelector select;
+  FileInfoVector infos;
+
+  // Root dir
+  select.base_dir = "";
+  CollectFileInfoGenerator(fs_->GetFileInfoGenerator(select), &infos);
+  ASSERT_EQ(infos.size(), 2);
+  SortInfos(&infos);
+  AssertFileInfo(infos[0], "bucket", FileType::Directory);
+  AssertFileInfo(infos[1], "empty-bucket", FileType::Directory);
+
+  // Root dir, recursive
+  select.recursive = true;
+  CollectFileInfoGenerator(fs_->GetFileInfoGenerator(select), &infos);
+  ASSERT_EQ(infos.size(), 7);
+  SortInfos(&infos);
+  AssertFileInfo(infos[0], "bucket", FileType::Directory);
+  AssertFileInfo(infos[1], "bucket/emptydir", FileType::Directory);
+  AssertFileInfo(infos[2], "bucket/somedir", FileType::Directory);
+  AssertFileInfo(infos[3], "bucket/somedir/subdir", FileType::Directory);
+  AssertFileInfo(infos[4], "bucket/somedir/subdir/subfile", FileType::File, 8);
+  AssertFileInfo(infos[5], "bucket/somefile", FileType::File, 9);
+  AssertFileInfo(infos[6], "empty-bucket", FileType::Directory);
+
+  // Non-root dir case is tested by generic tests
+}
+
 TEST_F(TestS3FS, CreateDir) {
   FileInfo st;
 
@@ -736,7 +781,9 @@ TEST_F(TestS3FS, CopyFile) {
   ASSERT_OK(fs_->CopyFile("bucket/somedir/subdir/subfile", "bucket/newfile"));
   AssertFileInfo(fs_.get(), "bucket/newfile", FileType::File, 8);
   AssertObjectContents(client_.get(), "bucket", "newfile", "sub data");
-
+  // ARROW-13048: URL-encoded paths
+  ASSERT_OK(fs_->CopyFile("bucket/somefile", "bucket/a=2/newfile"));
+  ASSERT_OK(fs_->CopyFile("bucket/a=2/newfile", "bucket/a=3/newfile"));
   // Nonexistent
   ASSERT_RAISES(IOError, fs_->CopyFile("bucket/nonexistent", "bucket/newfile2"));
   ASSERT_RAISES(IOError, fs_->CopyFile("nonexistent-bucket/somefile", "bucket/newfile2"));
@@ -759,6 +806,10 @@ TEST_F(TestS3FS, Move) {
   // Source was deleted
   AssertFileInfo(fs_.get(), "bucket/somedir/subdir/subfile", FileType::NotFound);
 
+  // ARROW-13048: URL-encoded paths
+  ASSERT_OK(fs_->Move("bucket/newfile", "bucket/a=2/newfile"));
+  ASSERT_OK(fs_->Move("bucket/a=2/newfile", "bucket/a=3/newfile"));
+
   // Nonexistent
   ASSERT_RAISES(IOError, fs_->Move("bucket/non-existent", "bucket/newfile2"));
   ASSERT_RAISES(IOError, fs_->Move("nonexistent-bucket/somefile", "bucket/newfile2"));
@@ -808,6 +859,19 @@ TEST_F(TestS3FS, OpenInputStream) {
   ASSERT_TRUE(weak_fs.expired());
 }
 
+TEST_F(TestS3FS, OpenInputStreamMetadata) {
+  std::shared_ptr<io::InputStream> stream;
+  std::shared_ptr<const KeyValueMetadata> metadata;
+
+  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream("bucket/somefile"));
+  ASSERT_FINISHES_OK_AND_ASSIGN(metadata, stream->ReadMetadataAsync());
+
+  std::vector<std::pair<std::string, std::string>> expected_kv{
+      {"Content-Length", "9"}, {"Content-Type", "x-arrow/test"}};
+  ASSERT_NE(metadata, nullptr);
+  ASSERT_THAT(metadata->sorted_pairs(), testing::IsSupersetOf(expected_kv));
+}
+
 TEST_F(TestS3FS, OpenInputFile) {
   std::shared_ptr<io::RandomAccessFile> file;
   std::shared_ptr<Buffer> buf;
@@ -876,6 +940,37 @@ TEST_F(TestS3FS, OpenOutputStreamDestructorSyncWrite) {
   TestOpenOutputStreamDestructor();
 }
 
+TEST_F(TestS3FS, OpenOutputStreamMetadata) {
+  std::shared_ptr<io::OutputStream> stream;
+
+  // Create new file with explicit metadata
+  auto metadata = KeyValueMetadata::Make({"Content-Type", "Expires"},
+                                         {"x-arrow/test6", "2016-02-05T20:08:35Z"});
+  AssertMetadataRoundtrip("bucket/mdfile1", metadata,
+                          testing::IsSupersetOf(metadata->sorted_pairs()));
+
+  // Create new file with valid canned ACL
+  // XXX: no easy way of testing the ACL actually gets set
+  metadata = KeyValueMetadata::Make({"ACL"}, {"authenticated-read"});
+  AssertMetadataRoundtrip("bucket/mdfile2", metadata, testing::_);
+
+  // Create new file with default metadata
+  auto default_metadata = KeyValueMetadata::Make({"Content-Type", "Content-Language"},
+                                                 {"image/png", "fr_FR"});
+  options_.default_metadata = default_metadata;
+  MakeFileSystem();
+  // (null, then empty metadata argument)
+  AssertMetadataRoundtrip("bucket/mdfile3", nullptr,
+                          testing::IsSupersetOf(default_metadata->sorted_pairs()));
+  AssertMetadataRoundtrip("bucket/mdfile4", KeyValueMetadata::Make({}, {}),
+                          testing::IsSupersetOf(default_metadata->sorted_pairs()));
+
+  // Create new file with explicit metadata replacing default metadata
+  metadata = KeyValueMetadata::Make({"Content-Type"}, {"x-arrow/test6"});
+  AssertMetadataRoundtrip("bucket/mdfile5", metadata,
+                          testing::IsSupersetOf(metadata->sorted_pairs()));
+}
+
 TEST_F(TestS3FS, FileSystemFromUri) {
   std::stringstream ss;
   ss << "s3://" << minio_.access_key() << ":" << minio_.secret_key()
@@ -928,6 +1023,7 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest {
     return false;
 #endif
   }
+  bool have_file_metadata() const override { return true; }
 
   S3Options options_;
   std::shared_ptr<S3FileSystem> s3fs_;
diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc
index 93d84c06b88..4fe073c0aa0 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <chrono>
+#include <ostream>
 #include <string>
 #include <utility>
 #include <vector>
@@ -30,7 +31,10 @@
 #include "arrow/status.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/vector.h"
 
 using ::testing::ElementsAre;
 
@@ -111,6 +115,12 @@ void SortInfos(std::vector<FileInfo>* infos) {
   std::sort(infos->begin(), infos->end(), FileInfo::ByPath{});
 }
 
+void CollectFileInfoGenerator(FileInfoGenerator gen, FileInfoVector* out_infos) {
+  auto fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto nested_infos, fut);
+  *out_infos = ::arrow::internal::FlattenVectors(nested_infos);
+}
+
 void AssertFileInfo(const FileInfo& info, const std::string& path, FileType type) {
   ASSERT_EQ(info.path(), path);
   ASSERT_EQ(info.type(), type) << "For path '" << info.path() << "'";
@@ -198,11 +208,17 @@ void GenericFileSystemTest::TestCreateDir(FileSystem* fs) {
   ASSERT_RAISES(IOError, fs->CreateDir("AB/def/EF/GH", true /* recursive */));
   ASSERT_RAISES(IOError, fs->CreateDir("AB/def/EF", false /* recursive */));
 
+  // Cannot create a directory when there is already a file with the same name
+  ASSERT_RAISES(IOError, fs->CreateDir("AB/def"));
+
   AssertAllDirs(fs, {"AB", "AB/CD", "AB/CD/EF", "AB/GH", "AB/GH/IJ", "XY"});
   AssertAllFiles(fs, {"AB/def"});
 }
 
 void GenericFileSystemTest::TestDeleteDir(FileSystem* fs) {
+  if (have_flaky_directory_tree_deletion())
+    GTEST_SKIP() << "Flaky directory deletion on Windows";
+
   ASSERT_OK(fs->CreateDir("AB/CD/EF"));
   ASSERT_OK(fs->CreateDir("AB/GH/IJ"));
   CreateFile(fs, "AB/abc", "");
@@ -229,6 +245,9 @@ void GenericFileSystemTest::TestDeleteDir(FileSystem* fs) {
 }
 
 void GenericFileSystemTest::TestDeleteDirContents(FileSystem* fs) {
+  if (have_flaky_directory_tree_deletion())
+    GTEST_SKIP() << "Flaky directory deletion on Windows";
+
   ASSERT_OK(fs->CreateDir("AB/CD/EF"));
   ASSERT_OK(fs->CreateDir("AB/GH/IJ"));
   CreateFile(fs, "AB/abc", "");
@@ -396,9 +415,9 @@ void GenericFileSystemTest::TestMoveFile(FileSystem* fs) {
 
 void GenericFileSystemTest::TestMoveDir(FileSystem* fs) {
   if (!allow_move_dir()) {
-    // XXX skip
-    return;
+    GTEST_SKIP() << "Filesystem doesn't allow moving directories";
   }
+
   ASSERT_OK(fs->CreateDir("AB/CD"));
   ASSERT_OK(fs->CreateDir("EF"));
   CreateFile(fs, "AB/abc", "abc data");
@@ -433,20 +452,28 @@ void GenericFileSystemTest::TestMoveDir(FileSystem* fs) {
   AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
   AssertAllFiles(fs, {"EF/ghi", "KL/CD/def", "KL/abc"});
 
-  // Destination is a non-empty directory
-  ASSERT_RAISES(IOError, fs->Move("KL", "EF"));
-  AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
-  AssertAllFiles(fs, {"EF/ghi", "KL/CD/def", "KL/abc"});
-
   // Cannot move directory inside itself
   ASSERT_RAISES(IOError, fs->Move("KL", "KL/ZZ"));
 
-  // (other errors tested in TestMoveFile)
-
   // Contents didn't change
   AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
   AssertFileContents(fs, "KL/abc", "abc data");
   AssertFileContents(fs, "KL/CD/def", "def data");
+
+  // Destination is a non-empty directory
+  if (!allow_move_dir_over_non_empty_dir()) {
+    ASSERT_RAISES(IOError, fs->Move("KL", "EF"));
+    AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
+    AssertAllFiles(fs, {"EF/ghi", "KL/CD/def", "KL/abc"});
+  } else {
+    // In some filesystems such as HDFS, this operation is interpreted
+    // as with the Unix `mv` command, i.e. move KL *inside* EF.
+    ASSERT_OK(fs->Move("KL", "EF"));
+    AssertAllDirs(fs, {"EF", "EF/KL", "EF/KL/CD"});
+    AssertAllFiles(fs, {"EF/KL/CD/def", "EF/KL/abc", "EF/ghi"});
+  }
+
+  // (other errors tested in TestMoveFile)
 }
 
 void GenericFileSystemTest::TestCopyFile(FileSystem* fs) {
@@ -681,7 +708,7 @@ void GenericFileSystemTest::TestGetFileInfoSelector(FileSystem* fs) {
   ASSERT_RAISES(IOError, fs->GetFileInfo(s));
 }
 
-void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
+void GenericFileSystemTest::TestGetFileInfoGenerator(FileSystem* fs) {
   ASSERT_OK(fs->CreateDir("AB/CD"));
   CreateFile(fs, "abc", "data");
   CreateFile(fs, "AB/def", "some data");
@@ -691,9 +718,11 @@ void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
   FileSelector s;
   s.base_dir = "";
   std::vector<FileInfo> infos;
+  std::vector<std::vector<FileInfo>> nested_infos;
 
   // Non-recursive
-  ASSERT_FINISHES_OK_AND_ASSIGN(infos, fs->GetFileInfoAsync(s));
+  auto gen = fs->GetFileInfoGenerator(s);
+  CollectFileInfoGenerator(std::move(gen), &infos);
   SortInfos(&infos);
   ASSERT_EQ(infos.size(), 2);
   AssertFileInfo(infos[0], "AB", FileType::Directory);
@@ -702,7 +731,7 @@ void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
   // Recursive
   s.base_dir = "AB";
   s.recursive = true;
-  ASSERT_FINISHES_OK_AND_ASSIGN(infos, fs->GetFileInfoAsync(s));
+  CollectFileInfoGenerator(fs->GetFileInfoGenerator(s), &infos);
   SortInfos(&infos);
   ASSERT_EQ(infos.size(), 4);
   AssertFileInfo(infos[0], "AB/CD", FileType::Directory);
@@ -712,9 +741,10 @@ void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
 
   // Doesn't exist
   s.base_dir = "XX";
-  ASSERT_RAISES(IOError, fs->GetFileInfoAsync(s).result());
+  auto fut = CollectAsyncGenerator(fs->GetFileInfoGenerator(s));
+  ASSERT_FINISHES_AND_RAISES(IOError, fut);
   s.allow_not_found = true;
-  ASSERT_FINISHES_OK_AND_ASSIGN(infos, fs->GetFileInfoAsync(s));
+  CollectFileInfoGenerator(fs->GetFileInfoGenerator(s), &infos);
   ASSERT_EQ(infos.size(), 0);
 }
 
@@ -834,6 +864,24 @@ void GenericFileSystemTest::TestOpenOutputStream(FileSystem* fs) {
 
   ASSERT_RAISES(Invalid, stream->Write("x"));  // Stream is closed
 
+  // Storing metadata along file
+  auto metadata = KeyValueMetadata::Make({"Content-Type", "Content-Language"},
+                                         {"x-arrow/filesystem-test", "fr_FR"});
+  ASSERT_OK_AND_ASSIGN(stream, fs->OpenOutputStream("jkl", metadata));
+  ASSERT_OK(stream->Write("data"));
+  ASSERT_OK(stream->Close());
+  ASSERT_OK_AND_ASSIGN(auto input, fs->OpenInputStream("jkl"));
+  ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
+  if (have_file_metadata()) {
+    ASSERT_NE(got_metadata, nullptr);
+    ASSERT_GE(got_metadata->size(), 2);
+    ASSERT_OK_AND_EQ("x-arrow/filesystem-test", got_metadata->Get("Content-Type"));
+  } else {
+    if (got_metadata) {
+      ASSERT_EQ(got_metadata->size(), 0);
+    }
+  }
+
   if (!allow_write_file_over_dir()) {
     // Cannot turn dir into file
     ASSERT_RAISES(IOError, fs->OpenOutputStream("CD"));
@@ -843,12 +891,16 @@ void GenericFileSystemTest::TestOpenOutputStream(FileSystem* fs) {
 
 void GenericFileSystemTest::TestOpenAppendStream(FileSystem* fs) {
   if (!allow_append_to_file()) {
-    // XXX skip
-    return;
+    GTEST_SKIP() << "Filesystem doesn't allow file appends";
   }
+
   std::shared_ptr<io::OutputStream> stream;
 
-  ASSERT_OK_AND_ASSIGN(stream, fs->OpenAppendStream("abc"));
+  if (allow_append_to_new_file()) {
+    ASSERT_OK_AND_ASSIGN(stream, fs->OpenAppendStream("abc"));
+  } else {
+    ASSERT_OK_AND_ASSIGN(stream, fs->OpenOutputStream("abc"));
+  }
   ASSERT_OK_AND_EQ(0, stream->Tell());
   ASSERT_OK(stream->Write("some "));
   ASSERT_OK(stream->Write(Buffer::FromString("data")));
@@ -876,6 +928,8 @@ void GenericFileSystemTest::TestOpenInputStream(FileSystem* fs) {
   std::shared_ptr<io::InputStream> stream;
   std::shared_ptr<Buffer> buffer;
   ASSERT_OK_AND_ASSIGN(stream, fs->OpenInputStream("AB/abc"));
+  ASSERT_OK_AND_ASSIGN(auto metadata, stream->ReadMetadata());
+  // XXX we cannot really test anything more about metadata...
   ASSERT_OK_AND_ASSIGN(buffer, stream->Read(4));
   AssertBufferEqual(*buffer, "some");
   ASSERT_OK_AND_ASSIGN(buffer, stream->Read(6));
@@ -929,7 +983,9 @@ void GenericFileSystemTest::TestOpenInputStreamAsync(FileSystem* fs) {
 
   std::shared_ptr<io::InputStream> stream;
   std::shared_ptr<Buffer> buffer;
+  std::shared_ptr<const KeyValueMetadata> metadata;
   ASSERT_FINISHES_OK_AND_ASSIGN(stream, fs->OpenInputStreamAsync("AB/abc"));
+  ASSERT_FINISHES_OK_AND_ASSIGN(metadata, stream->ReadMetadataAsync());
   ASSERT_OK_AND_ASSIGN(buffer, stream->Read(4));
   AssertBufferEqual(*buffer, "some");
   ASSERT_OK(stream->Close());
@@ -1006,6 +1062,26 @@ void GenericFileSystemTest::TestOpenInputFileWithFileInfo(FileSystem* fs) {
   ASSERT_RAISES(IOError, fs->OpenInputFile(info));
 }
 
+void GenericFileSystemTest::TestSpecialChars(FileSystem* fs) {
+  ASSERT_OK(fs->CreateDir("Blank Char"));
+  CreateFile(fs, "Blank Char/Special%Char.txt", "data");
+  std::vector<std::string> all_dirs{"Blank Char"};
+
+  AssertAllDirs(fs, all_dirs);
+  AssertAllFiles(fs, {"Blank Char/Special%Char.txt"});
+  AssertFileContents(fs, "Blank Char/Special%Char.txt", "data");
+
+  ASSERT_OK(fs->CopyFile("Blank Char/Special%Char.txt", "Special and%different.txt"));
+  AssertAllDirs(fs, all_dirs);
+  AssertAllFiles(fs, {"Blank Char/Special%Char.txt", "Special and%different.txt"});
+  AssertFileContents(fs, "Special and%different.txt", "data");
+
+  ASSERT_OK(fs->DeleteFile("Special and%different.txt"));
+  ASSERT_OK(fs->DeleteDir("Blank Char"));
+  AssertAllDirs(fs, {});
+  AssertAllFiles(fs, {});
+}
+
 #define GENERIC_FS_TEST_DEFINE(FUNC_NAME) \
   void GenericFileSystemTest::FUNC_NAME() { FUNC_NAME(GetEmptyFileSystem().get()); }
 
@@ -1025,7 +1101,7 @@ GENERIC_FS_TEST_DEFINE(TestGetFileInfoVector)
 GENERIC_FS_TEST_DEFINE(TestGetFileInfoSelector)
 GENERIC_FS_TEST_DEFINE(TestGetFileInfoSelectorWithRecursion)
 GENERIC_FS_TEST_DEFINE(TestGetFileInfoAsync)
-GENERIC_FS_TEST_DEFINE(TestGetFileInfoSelectorAsync)
+GENERIC_FS_TEST_DEFINE(TestGetFileInfoGenerator)
 GENERIC_FS_TEST_DEFINE(TestOpenOutputStream)
 GENERIC_FS_TEST_DEFINE(TestOpenAppendStream)
 GENERIC_FS_TEST_DEFINE(TestOpenInputStream)
@@ -1034,6 +1110,7 @@ GENERIC_FS_TEST_DEFINE(TestOpenInputStreamAsync)
 GENERIC_FS_TEST_DEFINE(TestOpenInputFile)
 GENERIC_FS_TEST_DEFINE(TestOpenInputFileWithFileInfo)
 GENERIC_FS_TEST_DEFINE(TestOpenInputFileAsync)
+GENERIC_FS_TEST_DEFINE(TestSpecialChars)
 
 #undef GENERIC_FS_TEST_DEFINE
 
diff --git a/cpp/src/arrow/filesystem/test_util.h b/cpp/src/arrow/filesystem/test_util.h
index 232d06f9ff9..917a768084c 100644
--- a/cpp/src/arrow/filesystem/test_util.h
+++ b/cpp/src/arrow/filesystem/test_util.h
@@ -43,7 +43,10 @@ void CreateFile(FileSystem* fs, const std::string& path, const std::string& data
 
 // Sort a vector of FileInfo by lexicographic path order
 ARROW_TESTING_EXPORT
-void SortInfos(std::vector<FileInfo>* infos);
+void SortInfos(FileInfoVector* infos);
+
+ARROW_TESTING_EXPORT
+void CollectFileInfoGenerator(FileInfoGenerator gen, FileInfoVector* out_infos);
 
 ARROW_TESTING_EXPORT
 void AssertFileInfo(const FileInfo& info, const std::string& path, FileType type);
@@ -109,7 +112,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestGetFileInfoSelector();
   void TestGetFileInfoSelectorWithRecursion();
   void TestGetFileInfoAsync();
-  void TestGetFileInfoSelectorAsync();
+  void TestGetFileInfoGenerator();
   void TestOpenOutputStream();
   void TestOpenAppendStream();
   void TestOpenInputStream();
@@ -118,6 +121,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestOpenInputFile();
   void TestOpenInputFileWithFileInfo();
   void TestOpenInputFileAsync();
+  void TestSpecialChars();
 
  protected:
   // This function should return the filesystem under test.
@@ -131,12 +135,18 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   virtual bool allow_write_file_over_dir() const { return false; }
   // - Whether the filesystem allows moving a directory
   virtual bool allow_move_dir() const { return true; }
+  // - Whether the filesystem allows moving a directory "over" a non-empty destination
+  virtual bool allow_move_dir_over_non_empty_dir() const { return false; }
   // - Whether the filesystem allows appending to a file
   virtual bool allow_append_to_file() const { return true; }
+  // - Whether the filesystem allows appending to a new (not existent yet) file
+  virtual bool allow_append_to_new_file() const { return true; }
   // - Whether the filesystem supports directory modification times
   virtual bool have_directory_mtimes() const { return true; }
   // - Whether some directory tree deletion tests may fail randomly
   virtual bool have_flaky_directory_tree_deletion() const { return false; }
+  // - Whether the filesystem stores some metadata alongside files
+  virtual bool have_file_metadata() const { return false; }
 
   void TestEmpty(FileSystem* fs);
   void TestNormalizePath(FileSystem* fs);
@@ -154,7 +164,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestGetFileInfoSelector(FileSystem* fs);
   void TestGetFileInfoSelectorWithRecursion(FileSystem* fs);
   void TestGetFileInfoAsync(FileSystem* fs);
-  void TestGetFileInfoSelectorAsync(FileSystem* fs);
+  void TestGetFileInfoGenerator(FileSystem* fs);
   void TestOpenOutputStream(FileSystem* fs);
   void TestOpenAppendStream(FileSystem* fs);
   void TestOpenInputStream(FileSystem* fs);
@@ -163,6 +173,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestOpenInputFile(FileSystem* fs);
   void TestOpenInputFileWithFileInfo(FileSystem* fs);
   void TestOpenInputFileAsync(FileSystem* fs);
+  void TestSpecialChars(FileSystem* fs);
 };
 
 #define GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, NAME) \
@@ -185,7 +196,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoSelector)              \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoSelectorWithRecursion) \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoAsync)                 \
-  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoSelectorAsync)         \
+  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoGenerator)             \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenOutputStream)                 \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenAppendStream)                 \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputStream)                  \
@@ -193,7 +204,8 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputStreamAsync)             \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFile)                    \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFileWithFileInfo)        \
-  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFileAsync)
+  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFileAsync)               \
+  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, SpecialChars)
 
 #define GENERIC_FS_TEST_FUNCTIONS(TEST_CLASS) \
   GENERIC_FS_TEST_FUNCTIONS_MACROS(TEST_F, TEST_CLASS)
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index bc91d7e8c22..5429a23672a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -19,21 +19,16 @@ add_custom_target(arrow_flight)
 
 arrow_install_all_headers("arrow/flight")
 
-set(ARROW_FLIGHT_STATIC_LINK_LIBS
-    gRPC::grpc++
-    ${ABSL_LIBRARIES}
-    ${ARROW_PROTOBUF_LIBPROTOBUF}
-    c-ares::cares
-    ZLIB::ZLIB)
+set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++ ${ARROW_PROTOBUF_LIBPROTOBUF})
 
 if(WIN32)
-  list(APPEND ARROW_FLIGHT_STATIC_LINK_LIBS ws2_32.lib)
+  list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)
 endif()
 
 if(ARROW_TEST_LINKAGE STREQUAL "static")
-  set(ARROW_FLIGHT_TEST_LINK_LIBS arrow_flight_static arrow_flight_testing_static
-                                  ${ARROW_FLIGHT_STATIC_LINK_LIBS}
-                                  ${ARROW_TEST_LINK_LIBS})
+  set(ARROW_FLIGHT_TEST_LINK_LIBS
+      arrow_flight_static arrow_flight_testing_static ${ARROW_FLIGHT_STATIC_LINK_LIBS}
+      ${ARROW_TEST_LINK_LIBS})
 else()
   set(ARROW_FLIGHT_TEST_LINK_LIBS arrow_flight_shared arrow_flight_testing_shared
                                   ${ARROW_TEST_LINK_LIBS})
@@ -44,10 +39,10 @@ endif()
 set(FLIGHT_PROTO_PATH "${ARROW_SOURCE_DIR}/../format")
 set(FLIGHT_PROTO ${ARROW_SOURCE_DIR}/../format/Flight.proto)
 
-set(FLIGHT_GENERATED_PROTO_FILES "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.cc"
-                                 "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.h"
-                                 "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.cc"
-                                 "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.h")
+set(FLIGHT_GENERATED_PROTO_FILES
+    "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.cc" "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.cc"
+    "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.h")
 
 set(PROTO_DEPENDS ${FLIGHT_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF} gRPC::grpc_cpp_plugin)
 
@@ -55,8 +50,7 @@ add_custom_command(OUTPUT ${FLIGHT_GENERATED_PROTO_FILES}
                    COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_PROTO_PATH}"
                            "--cpp_out=${CMAKE_CURRENT_BINARY_DIR}" "${FLIGHT_PROTO}"
                    DEPENDS ${PROTO_DEPENDS} ARGS
-                   COMMAND ${ARROW_PROTOBUF_PROTOC}
-                           "-I${FLIGHT_PROTO_PATH}"
+                   COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_PROTO_PATH}"
                            "--grpc_out=${CMAKE_CURRENT_BINARY_DIR}"
                            "--plugin=protoc-gen-grpc=$<TARGET_FILE:gRPC::grpc_cpp_plugin>"
                            "${FLIGHT_PROTO}")
@@ -75,25 +69,23 @@ string(REPLACE "-Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 # verification when using TLS.
 function(test_grpc_version DST_VAR DETECT_VERSION TEST_FILE)
   if(NOT DEFINED ${DST_VAR})
-    message(
-      STATUS "Checking support for TlsCredentialsOptions (gRPC >= ${DETECT_VERSION})...")
+    message(STATUS "Checking support for TlsCredentialsOptions (gRPC >= ${DETECT_VERSION})..."
+    )
     get_property(CURRENT_INCLUDE_DIRECTORIES
                  DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                  PROPERTY INCLUDE_DIRECTORIES)
-    try_compile(HAS_GRPC_VERSION ${CMAKE_CURRENT_BINARY_DIR}/try_compile SOURCES
-                "${CMAKE_CURRENT_SOURCE_DIR}/try_compile/${TEST_FILE}"
+    try_compile(HAS_GRPC_VERSION ${CMAKE_CURRENT_BINARY_DIR}/try_compile
+                SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/try_compile/${TEST_FILE}"
                 CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CURRENT_INCLUDE_DIRECTORIES}"
-                LINK_LIBRARIES gRPC::grpc gRPC::grpc++
+                LINK_LIBRARIES gRPC::grpc++
                 OUTPUT_VARIABLE TLS_CREDENTIALS_OPTIONS_CHECK_OUTPUT CXX_STANDARD 11)
     if(HAS_GRPC_VERSION)
       set(${DST_VAR}
           "${DETECT_VERSION}"
           CACHE INTERNAL "The detected (approximate) gRPC version.")
     else()
-      message(
-        STATUS
-          "TlsCredentialsOptions (for gRPC ${DETECT_VERSION}) not found in grpc::experimental."
-        )
+      message(STATUS "TlsCredentialsOptions (for gRPC ${DETECT_VERSION}) not found in grpc::experimental."
+      )
       message(DEBUG "Build output:")
       list(APPEND CMAKE_MESSAGE_INDENT "${TEST_FILE}: ")
       message(DEBUG ${TLS_CREDENTIALS_OPTIONS_CHECK_OUTPUT})
@@ -110,10 +102,8 @@ else()
   test_grpc_version(GRPC_VERSION "1.34" "check_tls_opts_134.cc")
   test_grpc_version(GRPC_VERSION "1.32" "check_tls_opts_132.cc")
   test_grpc_version(GRPC_VERSION "1.27" "check_tls_opts_127.cc")
-  message(
-    STATUS
-      "Found approximate gRPC version: ${GRPC_VERSION} (ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=${ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS})"
-    )
+  message(STATUS "Found approximate gRPC version: ${GRPC_VERSION} (ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=${ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS})"
+  )
 endif()
 if(GRPC_VERSION EQUAL "1.27")
   add_definitions(-DGRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS=grpc_impl::experimental)
@@ -127,18 +117,13 @@ elseif(GRPC_VERSION EQUAL "1.36")
   add_definitions(-DGRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS
                   -DGRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS=grpc::experimental)
 else()
-  message(
-    STATUS
-      "A proper version of gRPC could not be found to support TlsCredentialsOptions in Arrow Flight."
-    )
-  message(
-    STATUS
-      "You may need a newer version of gRPC (>= 1.27), or the gRPC API has changed and Flight must be updated to match."
-    )
+  message(STATUS "A proper version of gRPC could not be found to support TlsCredentialsOptions in Arrow Flight."
+  )
+  message(STATUS "You may need a newer version of gRPC (>= 1.27), or the gRPC API has changed and Flight must be updated to match."
+  )
   if(ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS)
-    message(
-      FATAL_ERROR "Halting build since ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS is set."
-      )
+    message(FATAL_ERROR "Halting build since ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS is set."
+    )
   endif()
 endif()
 
@@ -177,10 +162,10 @@ add_arrow_lib(arrow_flight
               ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
               SHARED_LINK_LIBS
               arrow_shared
-              ${ARROW_FLIGHT_STATIC_LINK_LIBS}
+              ${ARROW_FLIGHT_LINK_LIBS}
               STATIC_LINK_LIBS
               arrow_static
-              ${ARROW_FLIGHT_STATIC_LINK_LIBS})
+              ${ARROW_FLIGHT_LINK_LIBS})
 
 foreach(LIB_TARGET ${ARROW_FLIGHT_LIBRARIES})
   target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_FLIGHT_EXPORTING)
@@ -216,8 +201,9 @@ if(ARROW_TESTING)
 endif()
 
 foreach(LIB_TARGET ${ARROW_FLIGHT_TESTING_LIBRARIES})
-  target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_FLIGHT_EXPORTING
-                             ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
+  target_compile_definitions(${LIB_TARGET}
+                             PRIVATE ARROW_FLIGHT_EXPORTING
+                                     ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
 endforeach()
 
 add_arrow_test(flight_test
diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc
index c0e8eaaed28..f9728f849ad 100644
--- a/cpp/src/arrow/flight/client.cc
+++ b/cpp/src/arrow/flight/client.cc
@@ -45,6 +45,7 @@
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/uri.h"
@@ -92,6 +93,14 @@ std::shared_ptr<FlightWriteSizeStatusDetail> FlightWriteSizeStatusDetail::Unwrap
 
 FlightClientOptions FlightClientOptions::Defaults() { return FlightClientOptions(); }
 
+Status FlightStreamReader::ReadAll(std::shared_ptr<Table>* table,
+                                   const StopToken& stop_token) {
+  std::vector<std::shared_ptr<RecordBatch>> batches;
+  RETURN_NOT_OK(ReadAll(&batches, stop_token));
+  ARROW_ASSIGN_OR_RAISE(auto schema, GetSchema());
+  return Table::FromRecordBatches(schema, std::move(batches)).Value(table);
+}
+
 struct ClientRpc {
   grpc::ClientContext context;
 
@@ -484,11 +493,12 @@ template <typename Reader>
 class GrpcStreamReader : public FlightStreamReader {
  public:
   GrpcStreamReader(std::shared_ptr<ClientRpc> rpc, std::shared_ptr<std::mutex> read_mutex,
-                   const ipc::IpcReadOptions& options,
+                   const ipc::IpcReadOptions& options, StopToken stop_token,
                    std::shared_ptr<FinishableStream<Reader, internal::FlightData>> stream)
       : rpc_(rpc),
         read_mutex_(read_mutex),
         options_(options),
+        stop_token_(std::move(stop_token)),
         stream_(stream),
         peekable_reader_(new internal::PeekableFlightDataReader<std::shared_ptr<Reader>>(
             stream->stream())),
@@ -552,6 +562,28 @@ class GrpcStreamReader : public FlightStreamReader {
     out->app_metadata = std::move(app_metadata_);
     return Status::OK();
   }
+  Status ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches) override {
+    return ReadAll(batches, stop_token_);
+  }
+  Status ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches,
+                 const StopToken& stop_token) override {
+    FlightStreamChunk chunk;
+
+    while (true) {
+      if (stop_token.IsStopRequested()) {
+        Cancel();
+        return stop_token.Poll();
+      }
+      RETURN_NOT_OK(Next(&chunk));
+      if (!chunk.data) break;
+      batches->emplace_back(std::move(chunk.data));
+    }
+    return Status::OK();
+  }
+  Status ReadAll(std::shared_ptr<Table>* table) override {
+    return ReadAll(table, stop_token_);
+  }
+  using FlightStreamReader::ReadAll;
   void Cancel() override { rpc_->context.TryCancel(); }
 
  private:
@@ -574,6 +606,7 @@ class GrpcStreamReader : public FlightStreamReader {
   // read. Nullable, as DoGet() doesn't need this.
   std::shared_ptr<std::mutex> read_mutex_;
   ipc::IpcReadOptions options_;
+  StopToken stop_token_;
   std::shared_ptr<FinishableStream<Reader, internal::FlightData>> stream_;
   std::shared_ptr<internal::PeekableFlightDataReader<std::shared_ptr<Reader>>>
       peekable_reader_;
@@ -655,11 +688,12 @@ class GrpcStreamWriter : public FlightStreamWriter {
   Status WriteMetadata(std::shared_ptr<Buffer> app_metadata) override {
     FlightPayload payload{};
     payload.app_metadata = app_metadata;
-    if (!internal::WritePayload(payload, writer_->stream().get())) {
+    auto status = internal::WritePayload(payload, writer_->stream().get());
+    if (status.IsIOError()) {
       return writer_->Finish(MakeFlightError(FlightStatusCode::Internal,
                                              "Could not write metadata to stream"));
     }
-    return Status::OK();
+    return status;
   }
 
   Status WriteWithMetadata(const RecordBatch& batch,
@@ -775,11 +809,12 @@ class DoPutPayloadWriter : public ipc::internal::IpcPayloadWriter {
       }
     }
 
-    if (!internal::WritePayload(payload, writer_->stream().get())) {
+    auto status = internal::WritePayload(payload, writer_->stream().get());
+    if (status.IsIOError()) {
       return writer_->Finish(MakeFlightError(FlightStatusCode::Internal,
                                              "Could not write record batch to stream"));
     }
-    return Status::OK();
+    return status;
   }
 
   Status Close() override {
@@ -817,10 +852,12 @@ Status GrpcStreamWriter<ProtoReadT, FlightReadT>::Open(
     // calls Begin() to send data, we'll send a redundant descriptor.
     FlightPayload payload{};
     RETURN_NOT_OK(internal::ToPayload(descriptor, &payload.descriptor));
-    if (!internal::WritePayload(payload, instance->writer_->stream().get())) {
+    auto status = internal::WritePayload(payload, instance->writer_->stream().get());
+    if (status.IsIOError()) {
       return writer->Finish(MakeFlightError(FlightStatusCode::Internal,
                                             "Could not write descriptor to stream"));
     }
+    RETURN_NOT_OK(status);
   }
   *out = std::move(instance);
   return Status::OK();
@@ -1060,12 +1097,13 @@ class FlightClient::FlightClientImpl {
     std::vector<FlightInfo> flights;
 
     pb::FlightInfo pb_info;
-    while (stream->Read(&pb_info)) {
+    while (!options.stop_token.IsStopRequested() && stream->Read(&pb_info)) {
       FlightInfo::Data info_data;
       RETURN_NOT_OK(internal::FromProto(pb_info, &info_data));
       flights.emplace_back(std::move(info_data));
     }
-
+    if (options.stop_token.IsStopRequested()) rpc.context.TryCancel();
+    RETURN_NOT_OK(options.stop_token.Poll());
     listing->reset(new SimpleFlightListing(std::move(flights)));
     return internal::FromGrpcStatus(stream->Finish(), &rpc.context);
   }
@@ -1083,11 +1121,13 @@ class FlightClient::FlightClientImpl {
     pb::Result pb_result;
 
     std::vector<Result> materialized_results;
-    while (stream->Read(&pb_result)) {
+    while (!options.stop_token.IsStopRequested() && stream->Read(&pb_result)) {
       Result result;
       RETURN_NOT_OK(internal::FromProto(pb_result, &result));
       materialized_results.emplace_back(std::move(result));
     }
+    if (options.stop_token.IsStopRequested()) rpc.context.TryCancel();
+    RETURN_NOT_OK(options.stop_token.Poll());
 
     *results = std::unique_ptr<ResultStream>(
         new SimpleResultStream(std::move(materialized_results)));
@@ -1104,10 +1144,12 @@ class FlightClient::FlightClientImpl {
 
     pb::ActionType pb_type;
     ActionType type;
-    while (stream->Read(&pb_type)) {
+    while (!options.stop_token.IsStopRequested() && stream->Read(&pb_type)) {
       RETURN_NOT_OK(internal::FromProto(pb_type, &type));
       types->emplace_back(std::move(type));
     }
+    if (options.stop_token.IsStopRequested()) rpc.context.TryCancel();
+    RETURN_NOT_OK(options.stop_token.Poll());
     return internal::FromGrpcStatus(stream->Finish(), &rpc.context);
   }
 
@@ -1163,8 +1205,8 @@ class FlightClient::FlightClientImpl {
     auto finishable_stream = std::make_shared<
         FinishableStream<grpc::ClientReader<pb::FlightData>, internal::FlightData>>(
         rpc, stream);
-    *out = std::unique_ptr<StreamReader>(
-        new StreamReader(rpc, nullptr, options.read_options, finishable_stream));
+    *out = std::unique_ptr<StreamReader>(new StreamReader(
+        rpc, nullptr, options.read_options, options.stop_token, finishable_stream));
     // Eagerly read the schema
     return static_cast<StreamReader*>(out->get())->EnsureDataStarted();
   }
@@ -1208,8 +1250,8 @@ class FlightClient::FlightClientImpl {
     auto finishable_stream =
         std::make_shared<FinishableWritableStream<GrpcStream, internal::FlightData>>(
             rpc, read_mutex, stream);
-    *reader = std::unique_ptr<StreamReader>(
-        new StreamReader(rpc, read_mutex, options.read_options, finishable_stream));
+    *reader = std::unique_ptr<StreamReader>(new StreamReader(
+        rpc, read_mutex, options.read_options, options.stop_token, finishable_stream));
     // Do not eagerly read the schema. There may be metadata messages
     // before any data is sent, or data may not be sent at all.
     return StreamWriter::Open(descriptor, nullptr, options.write_options, rpc,
diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h
index b3c5a96e597..0a35b6d10e8 100644
--- a/cpp/src/arrow/flight/client.h
+++ b/cpp/src/arrow/flight/client.h
@@ -31,6 +31,7 @@
 #include "arrow/ipc/writer.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/cancel.h"
 #include "arrow/util/variant.h"
 
 #include "arrow/flight/types.h"  // IWYU pragma: keep
@@ -69,6 +70,9 @@ class ARROW_FLIGHT_EXPORT FlightCallOptions {
 
   /// \brief Headers for client to add to context.
   std::vector<std::pair<std::string, std::string>> headers;
+
+  /// \brief A token to enable interactive user cancellation of long-running requests.
+  StopToken stop_token;
 };
 
 /// \brief Indicate that the client attempted to write a message
@@ -129,6 +133,12 @@ class ARROW_FLIGHT_EXPORT FlightStreamReader : public MetadataRecordBatchReader
  public:
   /// \brief Try to cancel the call.
   virtual void Cancel() = 0;
+  using MetadataRecordBatchReader::ReadAll;
+  /// \brief Consume entire stream as a vector of record batches
+  virtual Status ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches,
+                         const StopToken& stop_token) = 0;
+  /// \brief Consume entire stream as a Table
+  Status ReadAll(std::shared_ptr<Table>* table, const StopToken& stop_token);
 };
 
 // Silence warning
diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index f9f60c40c91..1b5f27d3121 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -23,6 +23,7 @@
 
 #include <gflags/gflags.h>
 
+#include "arrow/io/file.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
 #include "arrow/record_batch.h"
@@ -58,6 +59,11 @@ DEFINE_string(compression, "",
               "Leave blank to disable compression.\n"
               "E.g., \"zstd\":   zstd with default compression level.\n"
               "      \"zstd:7\": zstd with compression leve = 7.\n");
+DEFINE_string(
+    data_file, "",
+    "Instead of random data, use data from the given IPC file. Only affects -test_put.");
+DEFINE_string(cert_file, "", "Path to TLS certificate");
+DEFINE_string(key_file, "", "Path to TLS private key (used when spawning a server)");
 
 namespace perf = arrow::flight::perf;
 
@@ -164,29 +170,35 @@ arrow::Result<PerformanceResult> RunDoGetTest(FlightClient* client,
   return PerformanceResult{num_batches, num_records, num_bytes};
 }
 
-arrow::Result<PerformanceResult> RunDoPutTest(FlightClient* client,
-                                              const FlightCallOptions& call_options,
-                                              const perf::Token& token,
-                                              const FlightEndpoint& endpoint,
-                                              PerformanceStats* stats) {
-  std::unique_ptr<FlightStreamWriter> writer;
-  std::unique_ptr<FlightMetadataReader> reader;
+struct SizedBatch {
+  std::shared_ptr<arrow::RecordBatch> batch;
+  int64_t bytes;
+};
+
+arrow::Result<std::vector<SizedBatch>> GetPutData(const perf::Token& token) {
+  if (!FLAGS_data_file.empty()) {
+    ARROW_ASSIGN_OR_RAISE(auto file, arrow::io::ReadableFile::Open(FLAGS_data_file));
+    ARROW_ASSIGN_OR_RAISE(auto reader,
+                          arrow::ipc::RecordBatchFileReader::Open(std::move(file)));
+    std::vector<SizedBatch> batches(reader->num_record_batches());
+    for (int i = 0; i < reader->num_record_batches(); i++) {
+      ARROW_ASSIGN_OR_RAISE(batches[i].batch, reader->ReadRecordBatch(i));
+      RETURN_NOT_OK(arrow::ipc::GetRecordBatchSize(*batches[i].batch, &batches[i].bytes));
+    }
+    return batches;
+  }
+
   std::shared_ptr<Schema> schema =
       arrow::schema({field("a", int64()), field("b", int64()), field("c", int64()),
                      field("d", int64())});
-  RETURN_NOT_OK(
-      client->DoPut(call_options, FlightDescriptor{}, schema, &writer, &reader));
 
   // This is hard-coded for right now, 4 columns each with int64
   const int bytes_per_record = 32;
 
-  int64_t num_bytes = 0;
-  int64_t num_records = 0;
-  int64_t num_batches = 0;
-
   std::shared_ptr<ResizableBuffer> buffer;
   std::vector<std::shared_ptr<Array>> arrays;
 
+  const int64_t total_records = token.definition().records_per_stream();
   const int32_t length = token.definition().records_per_batch();
   const int32_t ncolumns = 4;
   for (int i = 0; i < ncolumns; ++i) {
@@ -197,36 +209,60 @@ arrow::Result<PerformanceResult> RunDoPutTest(FlightClient* client,
   }
 
   std::shared_ptr<RecordBatch> batch = RecordBatch::Make(schema, length, arrays);
+  std::vector<SizedBatch> batches;
 
   int64_t records_sent = 0;
-  const int64_t total_records = token.definition().records_per_stream();
-  StopWatch timer;
   while (records_sent < total_records) {
     if (records_sent + length > total_records) {
       const int last_length = total_records - records_sent;
-      RETURN_NOT_OK(writer->WriteRecordBatch(*(batch->Slice(0, last_length))));
-      num_records += last_length;
       // Hard-coded
-      num_bytes += last_length * bytes_per_record;
+      batches.push_back(SizedBatch{batch->Slice(0, last_length),
+                                   /*bytes=*/last_length * bytes_per_record});
       records_sent += last_length;
     } else {
-      timer.Start();
-      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
-      stats->AddLatency(timer.Stop());
-      num_records += length;
       // Hard-coded
-      num_bytes += length * bytes_per_record;
+      batches.push_back(SizedBatch{batch, /*bytes=*/length * bytes_per_record});
       records_sent += length;
     }
-    ++num_batches;
   }
+  return batches;
+}
 
+arrow::Result<PerformanceResult> RunDoPutTest(FlightClient* client,
+                                              const FlightCallOptions& call_options,
+                                              const perf::Token& token,
+                                              const FlightEndpoint& endpoint,
+                                              PerformanceStats* stats) {
+  ARROW_ASSIGN_OR_RAISE(const auto batches, GetPutData(token));
+  StopWatch timer;
+  int64_t num_records = 0;
+  int64_t num_bytes = 0;
+  std::unique_ptr<FlightStreamWriter> writer;
+  std::unique_ptr<FlightMetadataReader> reader;
+  RETURN_NOT_OK(client->DoPut(call_options, FlightDescriptor{},
+                              batches[0].batch->schema(), &writer, &reader));
+  for (size_t i = 0; i < batches.size(); i++) {
+    auto batch = batches[i];
+    auto is_last = i == (batches.size() - 1);
+    if (is_last) {
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch.batch));
+      num_records += batch.batch->num_rows();
+      num_bytes += batch.bytes;
+    } else {
+      timer.Start();
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch.batch));
+      stats->AddLatency(timer.Stop());
+      num_records += batch.batch->num_rows();
+      num_bytes += batch.bytes;
+    }
+  }
   RETURN_NOT_OK(writer->Close());
-  return PerformanceResult{num_batches, num_records, num_bytes};
+  return PerformanceResult{static_cast<int64_t>(batches.size()), num_records, num_bytes};
 }
 
-Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_options,
-                       bool test_put, PerformanceStats* stats) {
+Status DoSinglePerfRun(FlightClient* client, const FlightClientOptions client_options,
+                       const FlightCallOptions& call_options, bool test_put,
+                       PerformanceStats* stats) {
   // schema not needed
   perf::Perf perf;
   perf.set_stream_count(FLAGS_num_streams);
@@ -249,11 +285,11 @@ Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_optio
   int64_t start_total_records = stats->total_records;
 
   auto test_loop = test_put ? &RunDoPutTest : &RunDoGetTest;
-  auto ConsumeStream = [&stats, &test_loop,
+  auto ConsumeStream = [&stats, &test_loop, &client_options,
                         &call_options](const FlightEndpoint& endpoint) {
-    // TODO(wesm): Use location from endpoint, same host/port for now
     std::unique_ptr<FlightClient> client;
-    RETURN_NOT_OK(FlightClient::Connect(endpoint.locations.front(), &client));
+    RETURN_NOT_OK(
+        FlightClient::Connect(endpoint.locations.front(), client_options, &client));
 
     perf::Token token;
     token.ParseFromString(endpoint.ticket.ticket);
@@ -283,23 +319,25 @@ Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_optio
     RETURN_NOT_OK(task.status());
   }
 
-  // Check that number of rows read / written is as expected
-  int64_t records_for_run = stats->total_records - start_total_records;
-  if (records_for_run != static_cast<int64_t>(plan->total_records())) {
-    return Status::Invalid("Did not consume expected number of records");
+  if (FLAGS_data_file.empty()) {
+    // Check that number of rows read / written is as expected
+    int64_t records_for_run = stats->total_records - start_total_records;
+    if (records_for_run != static_cast<int64_t>(plan->total_records())) {
+      return Status::Invalid("Did not consume expected number of records");
+    }
   }
-
   return Status::OK();
 }
 
-Status RunPerformanceTest(FlightClient* client, const FlightCallOptions& call_options,
-                          bool test_put) {
+Status RunPerformanceTest(FlightClient* client, const FlightClientOptions& client_options,
+                          const FlightCallOptions& call_options, bool test_put) {
   StopWatch timer;
   timer.Start();
 
   PerformanceStats stats;
   for (int i = 0; i < FLAGS_num_perf_runs; ++i) {
-    RETURN_NOT_OK(DoSinglePerfRun(client, call_options, test_put, &stats));
+    RETURN_NOT_OK(
+        DoSinglePerfRun(client, client_options, call_options, test_put, &stats));
   }
 
   // Elapsed time in seconds
@@ -381,9 +419,14 @@ int main(int argc, char** argv) {
 
     call_options.write_options.codec = std::move(codec);
   }
+  if (!FLAGS_data_file.empty() && !FLAGS_test_put) {
+    std::cerr << "A data file can only be specified with \"-test_put\"" << std::endl;
+    return 1;
+  }
 
   std::unique_ptr<arrow::flight::TestServer> server;
   arrow::flight::Location location;
+  auto options = arrow::flight::FlightClientOptions::Defaults();
   if (FLAGS_test_unix || !FLAGS_server_unix.empty()) {
     if (FLAGS_server_unix == "") {
       FLAGS_server_unix = "/tmp/flight-bench-spawn.sock";
@@ -402,22 +445,41 @@ int main(int argc, char** argv) {
       std::cout << "Using spawned TCP server" << std::endl;
       server.reset(
           new arrow::flight::TestServer("arrow-flight-perf-server", FLAGS_server_port));
-      server->Start();
+      std::vector<std::string> args;
+      if (!FLAGS_cert_file.empty() || !FLAGS_key_file.empty()) {
+        if (!FLAGS_cert_file.empty() && !FLAGS_key_file.empty()) {
+          std::cout << "Enabling TLS for spawned server" << std::endl;
+          args.push_back("-cert_file");
+          args.push_back(FLAGS_cert_file);
+          args.push_back("-key_file");
+          args.push_back(FLAGS_key_file);
+        } else {
+          std::cerr << "If providing TLS cert/key, must provide both" << std::endl;
+          return 1;
+        }
+      }
+      server->Start(args);
     } else {
       std::cout << "Using standalone TCP server" << std::endl;
     }
     std::cout << "Server host: " << FLAGS_server_host << std::endl
               << "Server port: " << FLAGS_server_port << std::endl;
-    ABORT_NOT_OK(arrow::flight::Location::ForGrpcTcp(FLAGS_server_host, FLAGS_server_port,
-                                                     &location));
+    if (FLAGS_cert_file.empty()) {
+      ABORT_NOT_OK(arrow::flight::Location::ForGrpcTcp(FLAGS_server_host,
+                                                       FLAGS_server_port, &location));
+    } else {
+      ABORT_NOT_OK(arrow::flight::Location::ForGrpcTls(FLAGS_server_host,
+                                                       FLAGS_server_port, &location));
+      options.disable_server_verification = true;
+    }
   }
 
   std::unique_ptr<arrow::flight::FlightClient> client;
-  ABORT_NOT_OK(arrow::flight::FlightClient::Connect(location, &client));
+  ABORT_NOT_OK(arrow::flight::FlightClient::Connect(location, options, &client));
   ABORT_NOT_OK(arrow::flight::WaitForReady(client.get(), call_options));
 
-  arrow::Status s =
-      arrow::flight::RunPerformanceTest(client.get(), call_options, FLAGS_test_put);
+  arrow::Status s = arrow::flight::RunPerformanceTest(client.get(), options, call_options,
+                                                      FLAGS_test_put);
 
   if (server) {
     server->Stop();
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 099d416aae4..56ca468a043 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -427,12 +427,20 @@ class TestFlightClient : public ::testing::Test {
     std::unique_ptr<FlightStreamReader> stream;
     ASSERT_OK(client_->DoGet(ticket, &stream));
 
+    std::unique_ptr<FlightStreamReader> stream2;
+    ASSERT_OK(client_->DoGet(ticket, &stream2));
+    ASSERT_OK_AND_ASSIGN(auto reader, MakeRecordBatchReader(std::move(stream2)));
+
     FlightStreamChunk chunk;
+    std::shared_ptr<RecordBatch> batch;
     for (int i = 0; i < num_batches; ++i) {
       ASSERT_OK(stream->Next(&chunk));
+      ASSERT_OK(reader->ReadNext(&batch));
       ASSERT_NE(nullptr, chunk.data);
+      ASSERT_NE(nullptr, batch);
 #if !defined(__MINGW32__)
       ASSERT_BATCHES_EQUAL(*expected_batches[i], *chunk.data);
+      ASSERT_BATCHES_EQUAL(*expected_batches[i], *batch);
 #else
       // In MINGW32, the following code does not have the reproducibility at the LSB
       // even when this is called twice with the same seed.
@@ -444,12 +452,15 @@ class TestFlightClient : public ::testing::Test {
       //                 [&dist, &rng] { return static_cast<ValueType>(dist(rng)); });
       //   /* data[1] = 0x40852cdfe23d3976 or 0x40852cdfe23d3975 */
       ASSERT_BATCHES_APPROX_EQUAL(*expected_batches[i], *chunk.data);
+      ASSERT_BATCHES_APPROX_EQUAL(*expected_batches[i], *batch);
 #endif
     }
 
     // Stream exhausted
     ASSERT_OK(stream->Next(&chunk));
+    ASSERT_OK(reader->ReadNext(&batch));
     ASSERT_EQ(nullptr, chunk.data);
+    ASSERT_EQ(nullptr, batch);
   }
 
  protected:
@@ -1467,6 +1478,59 @@ TEST_F(TestFlightClient, DoGetLargeBatch) {
   CheckDoGet(ticket, expected_batches);
 }
 
+TEST_F(TestFlightClient, FlightDataOverflowServerBatch) {
+  // Regression test for ARROW-13253
+  // N.B. this is rather a slow and memory-hungry test
+  {
+    // DoGet: check for overflow on large batch
+    Ticket ticket{"ARROW-13253-DoGet-Batch"};
+    std::unique_ptr<FlightStreamReader> stream;
+    ASSERT_OK(client_->DoGet(ticket, &stream));
+    FlightStreamChunk chunk;
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        stream->Next(&chunk));
+  }
+  {
+    // DoExchange: check for overflow on large batch from server
+    auto descr = FlightDescriptor::Command("large_batch");
+    std::unique_ptr<FlightStreamReader> reader;
+    std::unique_ptr<FlightStreamWriter> writer;
+    ASSERT_OK(client_->DoExchange(descr, &writer, &reader));
+    BatchVector batches;
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        reader->ReadAll(&batches));
+  }
+}
+
+TEST_F(TestFlightClient, FlightDataOverflowClientBatch) {
+  ASSERT_OK_AND_ASSIGN(auto batch, VeryLargeBatch());
+  {
+    // DoPut: check for overflow on large batch
+    std::unique_ptr<FlightStreamWriter> stream;
+    std::unique_ptr<FlightMetadataReader> reader;
+    auto descr = FlightDescriptor::Path({""});
+    ASSERT_OK(client_->DoPut(descr, batch->schema(), &stream, &reader));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        stream->WriteRecordBatch(*batch));
+    ASSERT_OK(stream->Close());
+  }
+  {
+    // DoExchange: check for overflow on large batch from client
+    auto descr = FlightDescriptor::Command("counter");
+    std::unique_ptr<FlightStreamReader> reader;
+    std::unique_ptr<FlightStreamWriter> writer;
+    ASSERT_OK(client_->DoExchange(descr, &writer, &reader));
+    ASSERT_OK(writer->Begin(batch->schema()));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        writer->WriteRecordBatch(*batch));
+    ASSERT_OK(writer->Close());
+  }
+}
+
 TEST_F(TestFlightClient, DoExchange) {
   auto descr = FlightDescriptor::Command("counter");
   BatchVector batches;
@@ -2662,5 +2726,147 @@ TEST_F(TestCookieParsing, CookieCache) {
   AddCookieVerifyCache({"id0=0;", "id1=1;", "id2=2"}, "id0=\"0\"; id1=\"1\"; id2=\"2\"");
 }
 
+class ForeverFlightListing : public FlightListing {
+  Status Next(std::unique_ptr<FlightInfo>* info) override {
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    *info = arrow::internal::make_unique<FlightInfo>(ExampleFlightInfo()[0]);
+    return Status::OK();
+  }
+};
+
+class ForeverResultStream : public ResultStream {
+  Status Next(std::unique_ptr<Result>* result) override {
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    *result = arrow::internal::make_unique<Result>();
+    (*result)->body = Buffer::FromString("foo");
+    return Status::OK();
+  }
+};
+
+class ForeverDataStream : public FlightDataStream {
+ public:
+  ForeverDataStream() : schema_(arrow::schema({})), mapper_(*schema_) {}
+  std::shared_ptr<Schema> schema() override { return schema_; }
+
+  Status GetSchemaPayload(FlightPayload* payload) override {
+    return ipc::GetSchemaPayload(*schema_, ipc::IpcWriteOptions::Defaults(), mapper_,
+                                 &payload->ipc_message);
+  }
+
+  Status Next(FlightPayload* payload) override {
+    auto batch = RecordBatch::Make(schema_, 0, ArrayVector{});
+    return ipc::GetRecordBatchPayload(*batch, ipc::IpcWriteOptions::Defaults(),
+                                      &payload->ipc_message);
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+  ipc::DictionaryFieldMapper mapper_;
+};
+
+class CancelTestServer : public FlightServerBase {
+ public:
+  Status ListFlights(const ServerCallContext&, const Criteria*,
+                     std::unique_ptr<FlightListing>* listings) override {
+    *listings = arrow::internal::make_unique<ForeverFlightListing>();
+    return Status::OK();
+  }
+  Status DoAction(const ServerCallContext&, const Action&,
+                  std::unique_ptr<ResultStream>* result) override {
+    *result = arrow::internal::make_unique<ForeverResultStream>();
+    return Status::OK();
+  }
+  Status ListActions(const ServerCallContext&,
+                     std::vector<ActionType>* actions) override {
+    *actions = {};
+    return Status::OK();
+  }
+  Status DoGet(const ServerCallContext&, const Ticket&,
+               std::unique_ptr<FlightDataStream>* data_stream) override {
+    *data_stream = arrow::internal::make_unique<ForeverDataStream>();
+    return Status::OK();
+  }
+};
+
+class TestCancel : public ::testing::Test {
+ public:
+  void SetUp() {
+    ASSERT_OK(MakeServer<CancelTestServer>(
+        &server_, &client_, [](FlightServerOptions* options) { return Status::OK(); },
+        [](FlightClientOptions* options) { return Status::OK(); }));
+  }
+  void TearDown() { ASSERT_OK(server_->Shutdown()); }
+
+ protected:
+  std::unique_ptr<FlightClient> client_;
+  std::unique_ptr<FlightServerBase> server_;
+};
+
+TEST_F(TestCancel, ListFlights) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<FlightListing> listing;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  client_->ListFlights(options, {}, &listing));
+}
+
+TEST_F(TestCancel, DoAction) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<ResultStream> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  client_->DoAction(options, {}, &results));
+}
+
+TEST_F(TestCancel, ListActions) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::vector<ActionType> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  client_->ListActions(options, &results));
+}
+
+TEST_F(TestCancel, DoGet) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<ResultStream> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  std::unique_ptr<FlightStreamReader> stream;
+  ASSERT_OK(client_->DoGet(options, {}, &stream));
+  std::shared_ptr<Table> table;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table));
+
+  ASSERT_OK(client_->DoGet({}, &stream));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table, options.stop_token));
+}
+
+TEST_F(TestCancel, DoExchange) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<ResultStream> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  std::unique_ptr<FlightStreamWriter> writer;
+  std::unique_ptr<FlightStreamReader> stream;
+  ASSERT_OK(
+      client_->DoExchange(options, FlightDescriptor::Command(""), &writer, &stream));
+  std::shared_ptr<Table> table;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table));
+
+  ASSERT_OK(client_->DoExchange(FlightDescriptor::Command(""), &writer, &stream));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table, options.stop_token));
+}
+
 }  // namespace flight
 }  // namespace arrow
diff --git a/cpp/src/arrow/flight/perf_server.cc b/cpp/src/arrow/flight/perf_server.cc
index b9814035b3b..7efd034ad25 100644
--- a/cpp/src/arrow/flight/perf_server.cc
+++ b/cpp/src/arrow/flight/perf_server.cc
@@ -19,6 +19,7 @@
 
 #include <signal.h>
 #include <cstdint>
+#include <fstream>
 #include <iostream>
 #include <memory>
 #include <string>
@@ -41,6 +42,8 @@
 DEFINE_string(server_host, "localhost", "Host where the server is running on");
 DEFINE_int32(port, 31337, "Server port to listen on");
 DEFINE_string(server_unix, "", "Unix socket path where the server is running on");
+DEFINE_string(cert_file, "", "Path to TLS certificate");
+DEFINE_string(key_file, "", "Path to TLS private key");
 
 namespace perf = arrow::flight::perf;
 namespace proto = arrow::flight::protocol;
@@ -142,15 +145,12 @@ Status GetPerfBatches(const perf::Token& token, const std::shared_ptr<Schema>& s
 class FlightPerfServer : public FlightServerBase {
  public:
   FlightPerfServer() : location_() {
-    if (FLAGS_server_unix.empty()) {
-      DCHECK_OK(Location::ForGrpcTcp(FLAGS_server_host, FLAGS_port, &location_));
-    } else {
-      DCHECK_OK(Location::ForGrpcUnix(FLAGS_server_unix, &location_));
-    }
     perf_schema_ = schema({field("a", int64()), field("b", int64()), field("c", int64()),
                            field("d", int64())});
   }
 
+  void SetLocation(Location location) { location_ = location; }
+
   Status GetFlightInfo(const ServerCallContext& context, const FlightDescriptor& request,
                        std::unique_ptr<FlightInfo>* info) override {
     perf::Perf perf_request;
@@ -233,13 +233,42 @@ int main(int argc, char** argv) {
 
   g_server.reset(new arrow::flight::FlightPerfServer);
 
-  arrow::flight::Location location;
+  arrow::flight::Location bind_location;
+  arrow::flight::Location connect_location;
   if (FLAGS_server_unix.empty()) {
-    ARROW_CHECK_OK(arrow::flight::Location::ForGrpcTcp("0.0.0.0", FLAGS_port, &location));
+    if (!FLAGS_cert_file.empty() || !FLAGS_key_file.empty()) {
+      if (!FLAGS_cert_file.empty() && !FLAGS_key_file.empty()) {
+        ARROW_CHECK_OK(
+            arrow::flight::Location::ForGrpcTls("0.0.0.0", FLAGS_port, &bind_location));
+        ARROW_CHECK_OK(arrow::flight::Location::ForGrpcTls(FLAGS_server_host, FLAGS_port,
+                                                           &connect_location));
+      } else {
+        std::cerr << "If providing TLS cert/key, must provide both" << std::endl;
+        return 1;
+      }
+    } else {
+      ARROW_CHECK_OK(
+          arrow::flight::Location::ForGrpcTcp("0.0.0.0", FLAGS_port, &bind_location));
+      ARROW_CHECK_OK(arrow::flight::Location::ForGrpcTcp(FLAGS_server_host, FLAGS_port,
+                                                         &connect_location));
+    }
   } else {
-    ARROW_CHECK_OK(arrow::flight::Location::ForGrpcUnix(FLAGS_server_unix, &location));
+    ARROW_CHECK_OK(
+        arrow::flight::Location::ForGrpcUnix(FLAGS_server_unix, &bind_location));
+    ARROW_CHECK_OK(
+        arrow::flight::Location::ForGrpcUnix(FLAGS_server_unix, &connect_location));
+  }
+  arrow::flight::FlightServerOptions options(bind_location);
+  if (!FLAGS_cert_file.empty() && !FLAGS_key_file.empty()) {
+    std::cout << "Enabling TLS" << std::endl;
+    std::ifstream cert_file(FLAGS_cert_file);
+    std::string cert((std::istreambuf_iterator<char>(cert_file)),
+                     (std::istreambuf_iterator<char>()));
+    std::ifstream key_file(FLAGS_key_file);
+    std::string key((std::istreambuf_iterator<char>(key_file)),
+                    (std::istreambuf_iterator<char>()));
+    options.tls_certificates.push_back(arrow::flight::CertKeyPair{cert, key});
   }
-  arrow::flight::FlightServerOptions options(location);
 
   ARROW_CHECK_OK(g_server->Init(options));
   // Exit with a clean error code (0) on SIGTERM
@@ -250,6 +279,7 @@ int main(int argc, char** argv) {
   } else {
     std::cout << "Server unix socket: " << FLAGS_server_unix << std::endl;
   }
+  g_server->SetLocation(connect_location);
   ARROW_CHECK_OK(g_server->Serve());
   return 0;
 }
diff --git a/cpp/src/arrow/flight/serialization_internal.cc b/cpp/src/arrow/flight/serialization_internal.cc
index 8c6b737c7e5..36c6cc9e623 100644
--- a/cpp/src/arrow/flight/serialization_internal.cc
+++ b/cpp/src/arrow/flight/serialization_internal.cc
@@ -164,26 +164,18 @@ static const uint8_t kPaddingBytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
 
 // Update the sizes of our Protobuf fields based on the given IPC payload.
 grpc::Status IpcMessageHeaderSize(const arrow::ipc::IpcPayload& ipc_msg, bool has_body,
-                                  size_t* body_size, size_t* header_size,
-                                  int32_t* metadata_size) {
-  DCHECK_LT(ipc_msg.metadata->size(), kInt32Max);
+                                  size_t* header_size, int32_t* metadata_size) {
+  DCHECK_LE(ipc_msg.metadata->size(), kInt32Max);
   *metadata_size = static_cast<int32_t>(ipc_msg.metadata->size());
 
   // 1 byte for metadata tag
   *header_size += 1 + WireFormatLite::LengthDelimitedSize(*metadata_size);
 
-  for (const auto& buffer : ipc_msg.body_buffers) {
-    // Buffer may be null when the row length is zero, or when all
-    // entries are invalid.
-    if (!buffer) continue;
-
-    *body_size += static_cast<size_t>(BitUtil::RoundUpToMultipleOf8(buffer->size()));
-  }
-
   // 2 bytes for body tag
   if (has_body) {
     // We write the body tag in the header but not the actual body data
-    *header_size += 2 + WireFormatLite::LengthDelimitedSize(*body_size) - *body_size;
+    *header_size += 2 + WireFormatLite::LengthDelimitedSize(ipc_msg.body_length) -
+                    ipc_msg.body_length;
   }
 
   return grpc::Status::OK;
@@ -201,9 +193,7 @@ grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
   // Write the descriptor if present
   int32_t descriptor_size = 0;
   if (msg.descriptor != nullptr) {
-    if (msg.descriptor->size() > kInt32Max) {
-      return ToGrpcStatus(Status::CapacityError("Descriptor size overflow (>= 2**31)"));
-    }
+    DCHECK_LE(msg.descriptor->size(), kInt32Max);
     descriptor_size = static_cast<int32_t>(msg.descriptor->size());
     header_size += 1 + WireFormatLite::LengthDelimitedSize(descriptor_size);
   }
@@ -211,7 +201,7 @@ grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
   // App metadata tag if appropriate
   int32_t app_metadata_size = 0;
   if (msg.app_metadata && msg.app_metadata->size() > 0) {
-    DCHECK_LT(msg.app_metadata->size(), kInt32Max);
+    DCHECK_LE(msg.app_metadata->size(), kInt32Max);
     app_metadata_size = static_cast<int32_t>(msg.app_metadata->size());
     header_size += 1 + WireFormatLite::LengthDelimitedSize(app_metadata_size);
   }
@@ -223,15 +213,14 @@ grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
 
   if (has_ipc) {
     DCHECK(has_body || ipc_msg.body_length == 0);
-    GRPC_RETURN_NOT_GRPC_OK(IpcMessageHeaderSize(ipc_msg, has_body, &body_size,
-                                                 &header_size, &metadata_size));
+    GRPC_RETURN_NOT_GRPC_OK(
+        IpcMessageHeaderSize(ipc_msg, has_body, &header_size, &metadata_size));
+    body_size = static_cast<size_t>(ipc_msg.body_length);
   }
 
   // TODO(wesm): messages over 2GB unlikely to be yet supported
-  if (body_size > kInt32Max) {
-    return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
-                        "Cannot send record batches exceeding 2GB yet");
-  }
+  // Validated in WritePayload since returning error here causes gRPC to fail an assertion
+  DCHECK_LE(body_size, kInt32Max);
 
   // Allocate and initialize slices
   std::vector<grpc::Slice> slices;
@@ -404,32 +393,48 @@ ::arrow::Result<std::unique_ptr<ipc::Message>> FlightData::OpenMessage() {
 // pointer argument whichever way we want, including cast it back to the original type.
 // (see customize_protobuf.h).
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerWriter<pb::FlightData>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerWriter<pb::FlightData>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
 bool ReadPayload(grpc::ClientReader<pb::FlightData>* reader, FlightData* data) {
diff --git a/cpp/src/arrow/flight/serialization_internal.h b/cpp/src/arrow/flight/serialization_internal.h
index 2a75d6947ee..5f7d0cc487c 100644
--- a/cpp/src/arrow/flight/serialization_internal.h
+++ b/cpp/src/arrow/flight/serialization_internal.h
@@ -54,15 +54,17 @@ struct FlightData {
 };
 
 /// Write Flight message on gRPC stream with zero-copy optimizations.
-/// True is returned on success, false if some error occurred (connection closed?).
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer);
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer);
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer);
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerWriter<pb::FlightData>* writer);
+// Returns Invalid if the payload is ill-formed
+// Returns IOError if gRPC did not write the message (note this is not
+// necessarily an error - the client may simply have gone away)
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer);
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer);
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer);
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerWriter<pb::FlightData>* writer);
 
 /// Read Flight message from gRPC stream with zero-copy optimizations.
 /// True is returned on success, false if stream ended.
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index ce5a07fc3e0..b52c1624657 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -336,10 +336,7 @@ class DoExchangeMessageWriter : public FlightMessageWriter {
 
  private:
   Status WritePayload(const FlightPayload& payload) {
-    if (!internal::WritePayload(payload, stream_)) {
-      // gRPC doesn't give us any way to find what the error was (if any).
-      return Status::IOError("Could not write payload to stream");
-    }
+    RETURN_NOT_OK(internal::WritePayload(payload, stream_));
     ++stats_.num_messages;
     return Status::OK();
   }
@@ -383,6 +380,7 @@ class GrpcServerCallContext : public ServerCallContext {
 
   const std::string& peer_identity() const override { return peer_identity_; }
   const std::string& peer() const override { return peer_; }
+  bool is_cancelled() const override { return context_->IsCancelled(); }
 
   // Helper method that runs interceptors given the result of an RPC,
   // then returns the final gRPC status to send to the client
@@ -657,21 +655,24 @@ class FlightServiceImpl : public FlightService::Service {
     // Write the schema as the first message in the stream
     FlightPayload schema_payload;
     SERVICE_RETURN_NOT_OK(flight_context, data_stream->GetSchemaPayload(&schema_payload));
-    if (!internal::WritePayload(schema_payload, writer)) {
+    auto status = internal::WritePayload(schema_payload, writer);
+    if (status.IsIOError()) {
       // gRPC doesn't give any way for us to know why the message
       // could not be written.
       RETURN_WITH_MIDDLEWARE(flight_context, grpc::Status::OK);
     }
+    SERVICE_RETURN_NOT_OK(flight_context, status);
 
     // Consume data stream and write out payloads
     while (true) {
       FlightPayload payload;
       SERVICE_RETURN_NOT_OK(flight_context, data_stream->Next(&payload));
-      if (payload.ipc_message.metadata == nullptr ||
-          !internal::WritePayload(payload, writer))
-        // No more messages to write, or connection terminated for some other
-        // reason
-        break;
+      // End of stream
+      if (payload.ipc_message.metadata == nullptr) break;
+      auto status = internal::WritePayload(payload, writer);
+      // Connection terminated
+      if (status.IsIOError()) break;
+      SERVICE_RETURN_NOT_OK(flight_context, status);
     }
     RETURN_WITH_MIDDLEWARE(flight_context, grpc::Status::OK);
   }
diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h
index dd95b7536cd..96b2da488ee 100644
--- a/cpp/src/arrow/flight/server.h
+++ b/cpp/src/arrow/flight/server.h
@@ -119,6 +119,9 @@ class ARROW_FLIGHT_EXPORT ServerCallContext {
   /// to the object beyond the request body.
   /// \return The middleware, or nullptr if not found.
   virtual ServerMiddleware* GetMiddleware(const std::string& key) const = 0;
+  /// \brief Check if the current RPC has been cancelled (by the client, by
+  /// a network error, etc.).
+  virtual bool is_cancelled() const = 0;
 };
 
 class ARROW_FLIGHT_EXPORT FlightServerOptions {
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index 51267c5c0c1..4e387f34b6c 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -86,7 +86,7 @@ Status ResolveCurrentExecutable(fs::path* out) {
 
 }  // namespace
 
-void TestServer::Start() {
+void TestServer::Start(const std::vector<std::string>& extra_args) {
   namespace fs = boost::filesystem;
 
   std::string str_port = std::to_string(port_);
@@ -104,11 +104,13 @@ void TestServer::Start() {
 
   try {
     if (unix_sock_.empty()) {
-      server_process_ = std::make_shared<bp::child>(
-          bp::search_path(executable_name_, search_path), "-port", str_port);
+      server_process_ =
+          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
+                                      "-port", str_port, bp::args(extra_args));
     } else {
-      server_process_ = std::make_shared<bp::child>(
-          bp::search_path(executable_name_, search_path), "-server_unix", unix_sock_);
+      server_process_ =
+          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
+                                      "-server_unix", unix_sock_, bp::args(extra_args));
     }
   } catch (...) {
     std::stringstream ss;
@@ -210,6 +212,14 @@ class FlightTestServer : public FlightServerBase {
     if (request.ticket == "ARROW-5095-success") {
       return Status::OK();
     }
+    if (request.ticket == "ARROW-13253-DoGet-Batch") {
+      // Make batch > 2GiB in size
+      ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
+      ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchReader::Make({batch}));
+      *data_stream =
+          std::unique_ptr<FlightDataStream>(new RecordBatchStream(std::move(reader)));
+      return Status::OK();
+    }
 
     std::shared_ptr<RecordBatchReader> batch_reader;
     RETURN_NOT_OK(GetBatchForFlight(request, &batch_reader));
@@ -218,6 +228,12 @@ class FlightTestServer : public FlightServerBase {
     return Status::OK();
   }
 
+  Status DoPut(const ServerCallContext&, std::unique_ptr<FlightMessageReader> reader,
+               std::unique_ptr<FlightMetadataWriter> writer) override {
+    BatchVector batches;
+    return reader->ReadAll(&batches);
+  }
+
   Status DoExchange(const ServerCallContext& context,
                     std::unique_ptr<FlightMessageReader> reader,
                     std::unique_ptr<FlightMessageWriter> writer) override {
@@ -240,6 +256,8 @@ class FlightTestServer : public FlightServerBase {
       return RunExchangeTotal(std::move(reader), std::move(writer));
     } else if (cmd == "echo") {
       return RunExchangeEcho(std::move(reader), std::move(writer));
+    } else if (cmd == "large_batch") {
+      return RunExchangeLargeBatch(std::move(reader), std::move(writer));
     } else {
       return Status::NotImplemented("Scenario not implemented: ", cmd);
     }
@@ -399,6 +417,14 @@ class FlightTestServer : public FlightServerBase {
     return Status::OK();
   }
 
+  // Regression test for ARROW-13253
+  Status RunExchangeLargeBatch(std::unique_ptr<FlightMessageReader>,
+                               std::unique_ptr<FlightMessageWriter> writer) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
+    RETURN_NOT_OK(writer->Begin(batch->schema()));
+    return writer->WriteRecordBatch(*batch);
+  }
+
   Status RunAction1(const Action& action, std::unique_ptr<ResultStream>* out) {
     std::vector<Result> results;
     for (int i = 0; i < 3; ++i) {
@@ -614,6 +640,22 @@ Status ExampleLargeBatches(BatchVector* out) {
   return Status::OK();
 }
 
+arrow::Result<std::shared_ptr<RecordBatch>> VeryLargeBatch() {
+  // In CI, some platforms don't let us allocate one very large
+  // buffer, so allocate a smaller buffer and repeat it a few times
+  constexpr int64_t nbytes = (1ul << 27ul) + 8ul;
+  constexpr int64_t nrows = nbytes / 8;
+  constexpr int64_t ncols = 16;
+  ARROW_ASSIGN_OR_RAISE(auto values, AllocateBuffer(nbytes));
+  std::memset(values->mutable_data(), 0x00, values->capacity());
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, std::move(values)};
+  auto array = std::make_shared<ArrayData>(int64(), nrows, buffers,
+                                           /*null_count=*/0);
+  std::vector<std::shared_ptr<ArrayData>> arrays(ncols, array);
+  std::vector<std::shared_ptr<Field>> fields(ncols, field("a", int64()));
+  return RecordBatch::Make(schema(std::move(fields)), nrows, std::move(arrays));
+}
+
 std::vector<ActionType> ExampleActionTypes() {
   return {{"drop", "drop a dataset"}, {"cache", "cache a dataset"}};
 }
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index 21c48eabf7e..c912c342afe 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -56,7 +56,8 @@ class ARROW_FLIGHT_EXPORT TestServer {
   TestServer(const std::string& executable_name, const std::string& unix_sock)
       : executable_name_(executable_name), unix_sock_(unix_sock) {}
 
-  void Start();
+  void Start(const std::vector<std::string>& extra_args);
+  void Start() { Start({}); }
 
   int Stop();
 
@@ -162,6 +163,9 @@ Status ExampleNestedBatches(BatchVector* out);
 ARROW_FLIGHT_EXPORT
 Status ExampleLargeBatches(BatchVector* out);
 
+ARROW_FLIGHT_EXPORT
+arrow::Result<std::shared_ptr<RecordBatch>> VeryLargeBatch();
+
 ARROW_FLIGHT_EXPORT
 std::vector<FlightInfo> ExampleFlightInfo();
 
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 84973f033a3..313be122914 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -21,6 +21,7 @@
 #include <sstream>
 #include <utility>
 
+#include "arrow/buffer.h"
 #include "arrow/flight/serialization_internal.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/dictionary.h"
@@ -126,6 +127,20 @@ std::string FlightDescriptor::ToString() const {
   return ss.str();
 }
 
+Status FlightPayload::Validate() const {
+  static constexpr int64_t kInt32Max = std::numeric_limits<int32_t>::max();
+  if (descriptor && descriptor->size() > kInt32Max) {
+    return Status::CapacityError("Descriptor size overflow (>= 2**31)");
+  }
+  if (app_metadata && app_metadata->size() > kInt32Max) {
+    return Status::CapacityError("app_metadata size overflow (>= 2**31)");
+  }
+  if (ipc_message.body_length > kInt32Max) {
+    return Status::Invalid("Cannot send record batches exceeding 2GiB yet");
+  }
+  return Status::OK();
+}
+
 Status SchemaResult::GetSchema(ipc::DictionaryMemo* dictionary_memo,
                                std::shared_ptr<Schema>* out) const {
   io::BufferReader schema_reader(raw_schema_);
@@ -284,6 +299,42 @@ Status MetadataRecordBatchWriter::Begin(const std::shared_ptr<Schema>& schema) {
   return Begin(schema, ipc::IpcWriteOptions::Defaults());
 }
 
+namespace {
+class MetadataRecordBatchReaderAdapter : public RecordBatchReader {
+ public:
+  explicit MetadataRecordBatchReaderAdapter(
+      std::shared_ptr<Schema> schema, std::shared_ptr<MetadataRecordBatchReader> delegate)
+      : schema_(std::move(schema)), delegate_(std::move(delegate)) {}
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    FlightStreamChunk next;
+    while (true) {
+      RETURN_NOT_OK(delegate_->Next(&next));
+      if (!next.data && !next.app_metadata) {
+        // EOS
+        *batch = nullptr;
+        return Status::OK();
+      } else if (next.data) {
+        *batch = std::move(next.data);
+        return Status::OK();
+      }
+      // Got metadata, but no data (which is valid) - read the next message
+    }
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+  std::shared_ptr<MetadataRecordBatchReader> delegate_;
+};
+};  // namespace
+
+arrow::Result<std::shared_ptr<RecordBatchReader>> MakeRecordBatchReader(
+    std::shared_ptr<MetadataRecordBatchReader> reader) {
+  ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
+  return std::make_shared<MetadataRecordBatchReaderAdapter>(std::move(schema),
+                                                            std::move(reader));
+}
+
 SimpleFlightListing::SimpleFlightListing(const std::vector<FlightInfo>& flights)
     : position_(0), flights_(flights) {}
 
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index 7538e4beb13..1e3051d5c29 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -341,6 +341,9 @@ struct ARROW_FLIGHT_EXPORT FlightPayload {
   std::shared_ptr<Buffer> descriptor;
   std::shared_ptr<Buffer> app_metadata;
   ipc::IpcPayload ipc_message;
+
+  /// \brief Check that the payload can be written to the wire.
+  Status Validate() const;
 };
 
 /// \brief Schema result returned after a schema request RPC
@@ -474,6 +477,11 @@ class ARROW_FLIGHT_EXPORT MetadataRecordBatchReader {
   virtual Status ReadAll(std::shared_ptr<Table>* table);
 };
 
+/// \brief Convert a MetadataRecordBatchReader to a regular RecordBatchReader.
+ARROW_FLIGHT_EXPORT
+arrow::Result<std::shared_ptr<RecordBatchReader>> MakeRecordBatchReader(
+    std::shared_ptr<MetadataRecordBatchReader> reader);
+
 /// \brief An interface to write IPC payloads with metadata.
 class ARROW_FLIGHT_EXPORT MetadataRecordBatchWriter : public ipc::RecordBatchWriter {
  public:
diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt
index af8cf317969..a1c182a58bb 100644
--- a/cpp/src/arrow/gpu/CMakeLists.txt
+++ b/cpp/src/arrow/gpu/CMakeLists.txt
@@ -51,7 +51,8 @@ add_arrow_lib(arrow_cuda
               arrow_shared
               ${ARROW_CUDA_SHARED_LINK_LIBS}
               # Static arrow_cuda must also link against CUDA shared libs
-              STATIC_LINK_LIBS ${ARROW_CUDA_SHARED_LINK_LIBS})
+              STATIC_LINK_LIBS
+              ${ARROW_CUDA_SHARED_LINK_LIBS})
 
 add_dependencies(arrow_cuda ${ARROW_CUDA_LIBRARIES})
 
diff --git a/cpp/src/arrow/gpu/cuda_context.cc b/cpp/src/arrow/gpu/cuda_context.cc
index bb0b055e5d2..8cb7e65fa6a 100644
--- a/cpp/src/arrow/gpu/cuda_context.cc
+++ b/cpp/src/arrow/gpu/cuda_context.cc
@@ -160,7 +160,8 @@ class CudaContext::Impl {
     return Status::OK();
   }
 
-  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(void* data, int64_t size) {
+  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(const void* data,
+                                                            int64_t size) {
     CUipcMemHandle cu_handle;
     if (size > 0) {
       ContextSaver set_temporary(context_);
@@ -538,7 +539,7 @@ Result<std::shared_ptr<CudaBuffer>> CudaContext::View(uint8_t* data, int64_t nby
   return std::make_shared<CudaBuffer>(data, nbytes, this->shared_from_this(), false);
 }
 
-Result<std::shared_ptr<CudaIpcMemHandle>> CudaContext::ExportIpcBuffer(void* data,
+Result<std::shared_ptr<CudaIpcMemHandle>> CudaContext::ExportIpcBuffer(const void* data,
                                                                        int64_t size) {
   return impl_->ExportIpcBuffer(data, size);
 }
diff --git a/cpp/src/arrow/gpu/cuda_context.h b/cpp/src/arrow/gpu/cuda_context.h
index 246883c9b99..2cff4f57a74 100644
--- a/cpp/src/arrow/gpu/cuda_context.h
+++ b/cpp/src/arrow/gpu/cuda_context.h
@@ -279,7 +279,8 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext
  private:
   CudaContext();
 
-  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(void* data, int64_t size);
+  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(const void* data,
+                                                            int64_t size);
   Status CopyHostToDevice(void* dst, const void* src, int64_t nbytes);
   Status CopyHostToDevice(uintptr_t dst, const void* src, int64_t nbytes);
   Status CopyDeviceToHost(void* dst, const void* src, int64_t nbytes);
diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc
index 80304d44cfd..297e4dcf71e 100644
--- a/cpp/src/arrow/gpu/cuda_memory.cc
+++ b/cpp/src/arrow/gpu/cuda_memory.cc
@@ -102,7 +102,6 @@ CudaBuffer::CudaBuffer(uint8_t* data, int64_t size,
                        bool is_ipc)
     : Buffer(data, size), context_(context), own_data_(own_data), is_ipc_(is_ipc) {
   is_mutable_ = true;
-  mutable_data_ = data;
   SetMemoryManager(context_->memory_manager());
 }
 
@@ -118,7 +117,7 @@ Status CudaBuffer::Close() {
     if (is_ipc_) {
       return context_->CloseIpcBuffer(this);
     } else {
-      return context_->Free(mutable_data_, size_);
+      return context_->Free(const_cast<uint8_t*>(data_), size_);
     }
   }
   return Status::OK();
@@ -130,10 +129,7 @@ CudaBuffer::CudaBuffer(const std::shared_ptr<CudaBuffer>& parent, const int64_t
       context_(parent->context()),
       own_data_(false),
       is_ipc_(false) {
-  if (parent->is_mutable()) {
-    is_mutable_ = true;
-    mutable_data_ = const_cast<uint8_t*>(data_);
-  }
+  is_mutable_ = parent->is_mutable();
 }
 
 Result<std::shared_ptr<CudaBuffer>> CudaBuffer::FromBuffer(
@@ -171,7 +167,7 @@ Status CudaBuffer::CopyFromHost(const int64_t position, const void* data,
   if (nbytes > size_ - position) {
     return Status::Invalid("Copy would overflow buffer");
   }
-  return context_->CopyHostToDevice(mutable_data_ + position, data, nbytes);
+  return context_->CopyHostToDevice(const_cast<uint8_t*>(data_) + position, data, nbytes);
 }
 
 Status CudaBuffer::CopyFromDevice(const int64_t position, const void* data,
@@ -179,7 +175,8 @@ Status CudaBuffer::CopyFromDevice(const int64_t position, const void* data,
   if (nbytes > size_ - position) {
     return Status::Invalid("Copy would overflow buffer");
   }
-  return context_->CopyDeviceToDevice(mutable_data_ + position, data, nbytes);
+  return context_->CopyDeviceToDevice(const_cast<uint8_t*>(data_) + position, data,
+                                      nbytes);
 }
 
 Status CudaBuffer::CopyFromAnotherDevice(const std::shared_ptr<CudaContext>& src_ctx,
@@ -188,15 +185,15 @@ Status CudaBuffer::CopyFromAnotherDevice(const std::shared_ptr<CudaContext>& src
   if (nbytes > size_ - position) {
     return Status::Invalid("Copy would overflow buffer");
   }
-  return src_ctx->CopyDeviceToAnotherDevice(context_, mutable_data_ + position, data,
-                                            nbytes);
+  return src_ctx->CopyDeviceToAnotherDevice(
+      context_, const_cast<uint8_t*>(data_) + position, data, nbytes);
 }
 
 Result<std::shared_ptr<CudaIpcMemHandle>> CudaBuffer::ExportForIpc() {
   if (is_ipc_) {
     return Status::Invalid("Buffer has already been exported for IPC");
   }
-  ARROW_ASSIGN_OR_RAISE(auto handle, context_->ExportIpcBuffer(mutable_data_, size_));
+  ARROW_ASSIGN_OR_RAISE(auto handle, context_->ExportIpcBuffer(data_, size_));
   own_data_ = false;
   return handle;
 }
@@ -204,7 +201,7 @@ Result<std::shared_ptr<CudaIpcMemHandle>> CudaBuffer::ExportForIpc() {
 CudaHostBuffer::~CudaHostBuffer() {
   auto maybe_manager = CudaDeviceManager::Instance();
   ARROW_CHECK_OK(maybe_manager.status());
-  ARROW_CHECK_OK((*maybe_manager)->FreeHost(mutable_data_, size_));
+  ARROW_CHECK_OK((*maybe_manager)->FreeHost(const_cast<uint8_t*>(data_), size_));
 }
 
 Result<uintptr_t> CudaHostBuffer::GetDeviceAddress(
diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc
index 16b969617e5..7804c130ca1 100644
--- a/cpp/src/arrow/io/buffered.cc
+++ b/cpp/src/arrow/io/buffered.cc
@@ -476,5 +476,14 @@ Result<std::shared_ptr<Buffer>> BufferedInputStream::DoRead(int64_t nbytes) {
   return impl_->Read(nbytes);
 }
 
+Result<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadata() {
+  return impl_->raw()->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  return impl_->raw()->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/buffered.h b/cpp/src/arrow/io/buffered.h
index 56c8c390f79..8116613fa4e 100644
--- a/cpp/src/arrow/io/buffered.h
+++ b/cpp/src/arrow/io/buffered.h
@@ -132,6 +132,9 @@ class ARROW_EXPORT BufferedInputStream
   // InputStream APIs
 
   bool closed() const override;
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
 
  private:
   friend InputStreamConcurrencyWrapper<BufferedInputStream>;
diff --git a/cpp/src/arrow/io/caching.cc b/cpp/src/arrow/io/caching.cc
index 1a7a55cd1b2..722026ccd9b 100644
--- a/cpp/src/arrow/io/caching.cc
+++ b/cpp/src/arrow/io/caching.cc
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <atomic>
 #include <cmath>
+#include <mutex>
 #include <utility>
 #include <vector>
 
@@ -33,7 +34,14 @@ namespace io {
 
 CacheOptions CacheOptions::Defaults() {
   return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
-                      internal::ReadRangeCache::kDefaultRangeSizeLimit};
+                      internal::ReadRangeCache::kDefaultRangeSizeLimit,
+                      /*lazy=*/false};
+}
+
+CacheOptions CacheOptions::LazyDefaults() {
+  return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
+                      internal::ReadRangeCache::kDefaultRangeSizeLimit,
+                      /*lazy=*/true};
 }
 
 CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_millis,
@@ -117,7 +125,7 @@ CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_mil
                                       (1 - ideal_bandwidth_utilization_frac))));
   DCHECK_GT(range_size_limit, 0) << "Computed range_size_limit must be > 0";
 
-  return {hole_size_limit, range_size_limit};
+  return {hole_size_limit, range_size_limit, false};
 }
 
 namespace internal {
@@ -126,6 +134,10 @@ struct RangeCacheEntry {
   ReadRange range;
   Future<std::shared_ptr<Buffer>> future;
 
+  RangeCacheEntry() = default;
+  RangeCacheEntry(const ReadRange& range_, Future<std::shared_ptr<Buffer>> future_)
+      : range(range_), future(std::move(future_)) {}
+
   friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& right) {
     return left.range.offset < right.range.offset;
   }
@@ -139,8 +151,30 @@ struct ReadRangeCache::Impl {
   // Ordered by offset (so as to find a matching region by binary search)
   std::vector<RangeCacheEntry> entries;
 
-  // Add new entries, themselves ordered by offset
-  void AddEntries(std::vector<RangeCacheEntry> new_entries) {
+  virtual ~Impl() = default;
+
+  // Get the future corresponding to a range
+  virtual Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) {
+    return entry->future;
+  }
+
+  // Make cache entries for ranges
+  virtual std::vector<RangeCacheEntry> MakeCacheEntries(
+      const std::vector<ReadRange>& ranges) {
+    std::vector<RangeCacheEntry> new_entries;
+    new_entries.reserve(ranges.size());
+    for (const auto& range : ranges) {
+      new_entries.emplace_back(range, file->ReadAsync(ctx, range.offset, range.length));
+    }
+    return new_entries;
+  }
+
+  // Add the given ranges to the cache, coalescing them where possible
+  virtual Status Cache(std::vector<ReadRange> ranges) {
+    ranges = internal::CoalesceReadRanges(std::move(ranges), options.hole_size_limit,
+                                          options.range_size_limit);
+    std::vector<RangeCacheEntry> new_entries = MakeCacheEntries(ranges);
+    // Add new entries, themselves ordered by offset
     if (entries.size() > 0) {
       std::vector<RangeCacheEntry> merged(entries.size() + new_entries.size());
       std::merge(entries.begin(), entries.end(), new_entries.begin(), new_entries.end(),
@@ -149,12 +183,115 @@ struct ReadRangeCache::Impl {
     } else {
       entries = std::move(new_entries);
     }
+    // Prefetch immediately, regardless of executor availability, if possible
+    return file->WillNeed(ranges);
+  }
+
+  // Read the given range from the cache, blocking if needed. Cannot read a range
+  // that spans cache entries.
+  virtual Result<std::shared_ptr<Buffer>> Read(ReadRange range) {
+    if (range.length == 0) {
+      static const uint8_t byte = 0;
+      return std::make_shared<Buffer>(&byte, 0);
+    }
+
+    const auto it = std::lower_bound(
+        entries.begin(), entries.end(), range,
+        [](const RangeCacheEntry& entry, const ReadRange& range) {
+          return entry.range.offset + entry.range.length < range.offset + range.length;
+        });
+    if (it != entries.end() && it->range.Contains(range)) {
+      auto fut = MaybeRead(&*it);
+      ARROW_ASSIGN_OR_RAISE(auto buf, fut.result());
+      return SliceBuffer(std::move(buf), range.offset - it->range.offset, range.length);
+    }
+    return Status::Invalid("ReadRangeCache did not find matching cache entry");
+  }
+
+  virtual Future<> Wait() {
+    std::vector<Future<>> futures;
+    for (auto& entry : entries) {
+      futures.emplace_back(MaybeRead(&entry));
+    }
+    return AllComplete(futures);
+  }
+
+  // Return a Future that completes when the given ranges have been read.
+  virtual Future<> WaitFor(std::vector<ReadRange> ranges) {
+    auto end = std::remove_if(ranges.begin(), ranges.end(),
+                              [](const ReadRange& range) { return range.length == 0; });
+    ranges.resize(end - ranges.begin());
+    std::vector<Future<>> futures;
+    futures.reserve(ranges.size());
+    for (auto& range : ranges) {
+      const auto it = std::lower_bound(
+          entries.begin(), entries.end(), range,
+          [](const RangeCacheEntry& entry, const ReadRange& range) {
+            return entry.range.offset + entry.range.length < range.offset + range.length;
+          });
+      if (it != entries.end() && it->range.Contains(range)) {
+        futures.push_back(Future<>(MaybeRead(&*it)));
+      } else {
+        return Status::Invalid("Range was not requested for caching: offset=",
+                               range.offset, " length=", range.length);
+      }
+    }
+    return AllComplete(futures);
+  }
+};
+
+// Don't read ranges when they're first added. Instead, wait until they're requested
+// (either through Read or WaitFor).
+struct ReadRangeCache::LazyImpl : public ReadRangeCache::Impl {
+  // Protect against concurrent modification of entries[i]->future
+  std::mutex entry_mutex;
+
+  virtual ~LazyImpl() = default;
+
+  Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) override {
+    // Called by superclass Read()/WaitFor() so we have the lock
+    if (!entry->future.is_valid()) {
+      entry->future = file->ReadAsync(ctx, entry->range.offset, entry->range.length);
+    }
+    return entry->future;
+  }
+
+  std::vector<RangeCacheEntry> MakeCacheEntries(
+      const std::vector<ReadRange>& ranges) override {
+    std::vector<RangeCacheEntry> new_entries;
+    new_entries.reserve(ranges.size());
+    for (const auto& range : ranges) {
+      // In the lazy variant, don't read data here - later, a call to Read or WaitFor
+      // will call back to MaybeRead (under the lock) which will fill the future.
+      new_entries.emplace_back(range, Future<std::shared_ptr<Buffer>>());
+    }
+    return new_entries;
+  }
+
+  Status Cache(std::vector<ReadRange> ranges) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Cache(std::move(ranges));
+  }
+
+  Result<std::shared_ptr<Buffer>> Read(ReadRange range) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Read(range);
+  }
+
+  Future<> Wait() override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Wait();
+  }
+
+  Future<> WaitFor(std::vector<ReadRange> ranges) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::WaitFor(std::move(ranges));
   }
 };
 
 ReadRangeCache::ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
                                CacheOptions options)
-    : impl_(new Impl()) {
+    : impl_(options.lazy ? new LazyImpl() : new Impl()) {
   impl_->file = std::move(file);
   impl_->ctx = std::move(ctx);
   impl_->options = options;
@@ -163,44 +300,17 @@ ReadRangeCache::ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext
 ReadRangeCache::~ReadRangeCache() = default;
 
 Status ReadRangeCache::Cache(std::vector<ReadRange> ranges) {
-  ranges = internal::CoalesceReadRanges(std::move(ranges), impl_->options.hole_size_limit,
-                                        impl_->options.range_size_limit);
-  std::vector<RangeCacheEntry> entries;
-  entries.reserve(ranges.size());
-  for (const auto& range : ranges) {
-    auto fut = impl_->file->ReadAsync(impl_->ctx, range.offset, range.length);
-    entries.push_back({range, std::move(fut)});
-  }
-
-  impl_->AddEntries(std::move(entries));
-  // Prefetch immediately, regardless of executor availability, if possible
-  return impl_->file->WillNeed(ranges);
+  return impl_->Cache(std::move(ranges));
 }
 
 Result<std::shared_ptr<Buffer>> ReadRangeCache::Read(ReadRange range) {
-  if (range.length == 0) {
-    static const uint8_t byte = 0;
-    return std::make_shared<Buffer>(&byte, 0);
-  }
-
-  const auto it = std::lower_bound(
-      impl_->entries.begin(), impl_->entries.end(), range,
-      [](const RangeCacheEntry& entry, const ReadRange& range) {
-        return entry.range.offset + entry.range.length < range.offset + range.length;
-      });
-  if (it != impl_->entries.end() && it->range.Contains(range)) {
-    ARROW_ASSIGN_OR_RAISE(auto buf, it->future.result());
-    return SliceBuffer(std::move(buf), range.offset - it->range.offset, range.length);
-  }
-  return Status::Invalid("ReadRangeCache did not find matching cache entry");
+  return impl_->Read(range);
 }
 
-Future<> ReadRangeCache::Wait() {
-  std::vector<Future<>> futures;
-  for (const auto& entry : impl_->entries) {
-    futures.emplace_back(entry.future);
-  }
-  return AllComplete(futures);
+Future<> ReadRangeCache::Wait() { return impl_->Wait(); }
+
+Future<> ReadRangeCache::WaitFor(std::vector<ReadRange> ranges) {
+  return impl_->WaitFor(std::move(ranges));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/io/caching.h b/cpp/src/arrow/io/caching.h
index a5b48dd885e..59a9b60e82f 100644
--- a/cpp/src/arrow/io/caching.h
+++ b/cpp/src/arrow/io/caching.h
@@ -34,17 +34,19 @@ struct ARROW_EXPORT CacheOptions {
   static constexpr double kDefaultIdealBandwidthUtilizationFrac = 0.9;
   static constexpr int64_t kDefaultMaxIdealRequestSizeMib = 64;
 
-  /// /brief The maximum distance in bytes between two consecutive
+  /// \brief The maximum distance in bytes between two consecutive
   ///   ranges; beyond this value, ranges are not combined
   int64_t hole_size_limit;
-  /// /brief The maximum size in bytes of a combined range; if
+  /// \brief The maximum size in bytes of a combined range; if
   ///   combining two consecutive ranges would produce a range of a
   ///   size greater than this, they are not combined
   int64_t range_size_limit;
+  /// \brief A lazy cache does not perform any I/O until requested.
+  bool lazy;
 
   bool operator==(const CacheOptions& other) const {
     return hole_size_limit == other.hole_size_limit &&
-           range_size_limit == other.range_size_limit;
+           range_size_limit == other.range_size_limit && lazy == other.lazy;
   }
 
   /// \brief Construct CacheOptions from network storage metrics (e.g. S3).
@@ -67,16 +69,34 @@ struct ARROW_EXPORT CacheOptions {
       int64_t max_ideal_request_size_mib = kDefaultMaxIdealRequestSizeMib);
 
   static CacheOptions Defaults();
+  static CacheOptions LazyDefaults();
 };
 
 namespace internal {
 
 /// \brief A read cache designed to hide IO latencies when reading.
 ///
-/// To use this, you must first pass it the ranges you'll need in the future.
-/// The cache will combine those ranges according to parameters (see constructor)
-/// and start fetching the combined ranges in the background.
-/// You can then individually fetch them using Read().
+/// This class takes multiple byte ranges that an application expects to read, and
+/// coalesces them into fewer, larger read requests, which benefits performance on some
+/// filesystems, particularly remote ones like Amazon S3. By default, it also issues
+/// these read requests in parallel up front.
+///
+/// To use:
+/// 1. Cache() the ranges you expect to read in the future. Ideally, these ranges have
+///    the exact offset and length that will later be read. The cache will combine those
+///    ranges according to parameters (see constructor).
+///
+///    By default, the cache will also start fetching the combined ranges in parallel in
+///    the background, unless CacheOptions.lazy is set.
+///
+/// 2. Call WaitFor() to be notified when the given ranges have been read. If
+///    CacheOptions.lazy is set, I/O will be triggered in the background here instead.
+///    This can be done in parallel (e.g. if parsing a file, call WaitFor() for each
+///    chunk of the file that can be parsed in parallel).
+///
+/// 3. Call Read() to retrieve the actual data for the given ranges.
+///    A synchronous application may skip WaitFor() and just call Read() - it will still
+///    benefit from coalescing and parallel fetching.
 class ARROW_EXPORT ReadRangeCache {
  public:
   static constexpr int64_t kDefaultHoleSizeLimit = 8192;
@@ -103,8 +123,13 @@ class ARROW_EXPORT ReadRangeCache {
   /// \brief Wait until all ranges added so far have been cached.
   Future<> Wait();
 
+  /// \brief Wait until all given ranges have been cached.
+  Future<> WaitFor(std::vector<ReadRange> ranges);
+
  protected:
   struct Impl;
+  struct LazyImpl;
+
   std::unique_ptr<Impl> impl_;
 };
 
diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc
index 4993ae2dd37..72977f0f297 100644
--- a/cpp/src/arrow/io/compressed.cc
+++ b/cpp/src/arrow/io/compressed.cc
@@ -437,5 +437,14 @@ Result<std::shared_ptr<Buffer>> CompressedInputStream::DoRead(int64_t nbytes) {
 
 std::shared_ptr<InputStream> CompressedInputStream::raw() const { return impl_->raw(); }
 
+Result<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadata() {
+  return impl_->raw()->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  return impl_->raw()->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/compressed.h b/cpp/src/arrow/io/compressed.h
index 677e45c6ff2..cd1a7f673ce 100644
--- a/cpp/src/arrow/io/compressed.h
+++ b/cpp/src/arrow/io/compressed.h
@@ -89,6 +89,9 @@ class ARROW_EXPORT CompressedInputStream
   // InputStream interface
 
   bool closed() const override;
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
 
   /// \brief Return the underlying raw input stream.
   std::shared_ptr<InputStream> raw() const;
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index 8a4976db4aa..70e15335af2 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -390,15 +390,12 @@ class MemoryMappedFile::MemoryMap
   // An object representing the entire memory-mapped region.
   // It can be sliced in order to return individual subregions, which
   // will then keep the original region alive as long as necessary.
-  class Region : public MutableBuffer {
+  class Region : public Buffer {
    public:
     Region(std::shared_ptr<MemoryMappedFile::MemoryMap> memory_map, uint8_t* data,
            int64_t size)
-        : MutableBuffer(data, size) {
+        : Buffer(data, size) {
       is_mutable_ = memory_map->writable();
-      if (!is_mutable_) {
-        mutable_data_ = nullptr;
-      }
     }
 
     ~Region() {
@@ -542,10 +539,10 @@ class MemoryMappedFile::MemoryMap
 
   void advance(int64_t nbytes) { position_ = position_ + nbytes; }
 
-  uint8_t* head() { return data() + position_; }
-
   uint8_t* data() { return region_ ? region_->data() : nullptr; }
 
+  uint8_t* head() { return data() + position_; }
+
   bool writable() { return file_->mode() != FileMode::READ; }
 
   bool opened() { return file_->is_open(); }
diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc
index e7e42f30700..cd9e912050c 100644
--- a/cpp/src/arrow/io/hdfs.cc
+++ b/cpp/src/arrow/io/hdfs.cc
@@ -84,12 +84,14 @@ class HdfsAnyFileImpl {
   }
 
   Status Seek(int64_t position) {
+    RETURN_NOT_OK(CheckClosed());
     int ret = driver_->Seek(fs_, file_, position);
     CHECK_FAILURE(ret, "seek");
     return Status::OK();
   }
 
   Result<int64_t> Tell() {
+    RETURN_NOT_OK(CheckClosed());
     int64_t ret = driver_->Tell(fs_, file_);
     CHECK_FAILURE(ret, "tell");
     return ret;
@@ -98,6 +100,13 @@ class HdfsAnyFileImpl {
   bool is_open() const { return is_open_; }
 
  protected:
+  Status CheckClosed() {
+    if (!is_open_) {
+      return Status::Invalid("Operation on closed HDFS file");
+    }
+    return Status::OK();
+  }
+
   std::string path_;
 
   internal::LibHdfsShim* driver_;
@@ -143,6 +152,7 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   bool closed() const { return !is_open_; }
 
   Result<int64_t> ReadAt(int64_t position, int64_t nbytes, uint8_t* buffer) {
+    RETURN_NOT_OK(CheckClosed());
     if (!driver_->HasPread()) {
       std::lock_guard<std::mutex> guard(lock_);
       RETURN_NOT_OK(Seek(position));
@@ -169,11 +179,11 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) {
-    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
+    RETURN_NOT_OK(CheckClosed());
 
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
     ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
                           ReadAt(position, nbytes, buffer->mutable_data()));
-
     if (bytes_read < nbytes) {
       RETURN_NOT_OK(buffer->Resize(bytes_read));
       buffer->ZeroPadding();
@@ -182,6 +192,8 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<int64_t> Read(int64_t nbytes, void* buffer) {
+    RETURN_NOT_OK(CheckClosed());
+
     int64_t total_bytes = 0;
     while (total_bytes < nbytes) {
       tSize ret = driver_->Read(
@@ -197,8 +209,9 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) {
-    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
+    RETURN_NOT_OK(CheckClosed());
 
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
     ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
     if (bytes_read < nbytes) {
       RETURN_NOT_OK(buffer->Resize(bytes_read));
@@ -207,11 +220,12 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<int64_t> GetSize() {
+    RETURN_NOT_OK(CheckClosed());
+
     hdfsFileInfo* entry = driver_->GetPathInfo(fs_, path_.c_str());
     if (entry == nullptr) {
       return GetPathInfoFailed(path_);
     }
-
     int64_t size = entry->mSize;
     driver_->FreeFileInfo(entry, 1);
     return size;
@@ -274,7 +288,7 @@ class HdfsOutputStream::HdfsOutputStreamImpl : public HdfsAnyFileImpl {
       // the error doesn't get propagated properly and the second close
       // initiated by the destructor raises a segfault
       is_open_ = false;
-      RETURN_NOT_OK(Flush());
+      RETURN_NOT_OK(FlushInternal());
       int ret = driver_->CloseFile(fs_, file_);
       CHECK_FAILURE(ret, "CloseFile");
     }
@@ -284,12 +298,14 @@ class HdfsOutputStream::HdfsOutputStreamImpl : public HdfsAnyFileImpl {
   bool closed() const { return !is_open_; }
 
   Status Flush() {
-    int ret = driver_->Flush(fs_, file_);
-    CHECK_FAILURE(ret, "Flush");
-    return Status::OK();
+    RETURN_NOT_OK(CheckClosed());
+
+    return FlushInternal();
   }
 
   Status Write(const uint8_t* buffer, int64_t nbytes) {
+    RETURN_NOT_OK(CheckClosed());
+
     constexpr int64_t kMaxBlockSize = std::numeric_limits<int32_t>::max();
 
     std::lock_guard<std::mutex> guard(lock_);
@@ -303,6 +319,13 @@ class HdfsOutputStream::HdfsOutputStreamImpl : public HdfsAnyFileImpl {
     }
     return Status::OK();
   }
+
+ protected:
+  Status FlushInternal() {
+    int ret = driver_->Flush(fs_, file_);
+    CHECK_FAILURE(ret, "Flush");
+    return Status::OK();
+  }
 };
 
 HdfsOutputStream::HdfsOutputStream() { impl_.reset(new HdfsOutputStreamImpl()); }
@@ -552,6 +575,18 @@ class HadoopFileSystem::HadoopFileSystemImpl {
     return Status::OK();
   }
 
+  Status Copy(const std::string& src, const std::string& dst) {
+    int ret = driver_->Copy(fs_, src.c_str(), fs_, dst.c_str());
+    CHECK_FAILURE(ret, "Rename");
+    return Status::OK();
+  }
+
+  Status Move(const std::string& src, const std::string& dst) {
+    int ret = driver_->Move(fs_, src.c_str(), fs_, dst.c_str());
+    CHECK_FAILURE(ret, "Rename");
+    return Status::OK();
+  }
+
   Status Chmod(const std::string& path, int mode) {
     int ret = driver_->Chmod(fs_, path.c_str(), static_cast<short>(mode));  // NOLINT
     CHECK_FAILURE(ret, "Chmod");
@@ -683,6 +718,14 @@ Status HadoopFileSystem::Rename(const std::string& src, const std::string& dst)
   return impl_->Rename(src, dst);
 }
 
+Status HadoopFileSystem::Copy(const std::string& src, const std::string& dst) {
+  return impl_->Copy(src, dst);
+}
+
+Status HadoopFileSystem::Move(const std::string& src, const std::string& dst) {
+  return impl_->Move(src, dst);
+}
+
 // ----------------------------------------------------------------------
 // Allow public API users to check whether we are set up correctly
 
diff --git a/cpp/src/arrow/io/hdfs.h b/cpp/src/arrow/io/hdfs.h
index 21b0cd8a282..5244eb05248 100644
--- a/cpp/src/arrow/io/hdfs.h
+++ b/cpp/src/arrow/io/hdfs.h
@@ -173,6 +173,10 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
   // current filesystem
   Status Rename(const std::string& src, const std::string& dst) override;
 
+  Status Copy(const std::string& src, const std::string& dst);
+
+  Status Move(const std::string& src, const std::string& dst);
+
   Status Stat(const std::string& path, FileStatistics* stat) override;
 
   // TODO(wesm): GetWorkingDirectory, SetWorkingDirectory
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index dc2112ebddd..954c0f37b2d 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -29,9 +29,11 @@
 
 #include "arrow/buffer.h"
 #include "arrow/io/concurrency.h"
+#include "arrow/io/type_fwd.h"
 #include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
@@ -40,6 +42,7 @@
 
 namespace arrow {
 
+using internal::checked_pointer_cast;
 using internal::Executor;
 using internal::TaskHints;
 using internal::ThreadPool;
@@ -53,6 +56,12 @@ IOContext::IOContext(MemoryPool* pool, StopToken stop_token)
 
 const IOContext& default_io_context() { return g_default_io_context; }
 
+int GetIOThreadPoolCapacity() { return internal::GetIOThreadPool()->GetCapacity(); }
+
+Status SetIOThreadPoolCapacity(int threads) {
+  return internal::GetIOThreadPool()->SetCapacity(threads);
+}
+
 FileInterface::~FileInterface() = default;
 
 Status FileInterface::Abort() { return Close(); }
@@ -98,6 +107,22 @@ Result<util::string_view> InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) {
 
 bool InputStream::supports_zero_copy() const { return false; }
 
+Result<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadata() {
+  return std::shared_ptr<const KeyValueMetadata>{};
+}
+
+// Default ReadMetadataAsync() implementation: simply issue the read on the context's
+// executor
+Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync(
+    const IOContext& ctx) {
+  auto self = shared_from_this();
+  return DeferNotOk(internal::SubmitIO(ctx, [self] { return self->ReadMetadata(); }));
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync() {
+  return ReadMetadataAsync(io_context());
+}
+
 Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
     std::shared_ptr<InputStream> stream, int64_t block_size) {
   if (stream->closed()) {
@@ -132,13 +157,9 @@ Result<std::shared_ptr<Buffer>> RandomAccessFile::ReadAt(int64_t position,
 Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(const IOContext& ctx,
                                                             int64_t position,
                                                             int64_t nbytes) {
-  auto self = shared_from_this();
-  TaskHints hints;
-  hints.io_size = nbytes;
-  hints.external_id = ctx.external_id();
-  return DeferNotOk(ctx.executor()->Submit(std::move(hints), [self, position, nbytes] {
-    return self->ReadAt(position, nbytes);
-  }));
+  auto self = checked_pointer_cast<RandomAccessFile>(shared_from_this());
+  return DeferNotOk(internal::SubmitIO(
+      ctx, [self, position, nbytes] { return self->ReadAt(position, nbytes); }));
 }
 
 Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(int64_t position,
@@ -358,10 +379,16 @@ struct ReadRangeCombiner {
     // Remove zero-sized ranges
     auto end = std::remove_if(ranges.begin(), ranges.end(),
                               [](const ReadRange& range) { return range.length == 0; });
-    ranges.resize(end - ranges.begin());
     // Sort in position order
-    std::sort(ranges.begin(), ranges.end(),
+    std::sort(ranges.begin(), end,
               [](const ReadRange& a, const ReadRange& b) { return a.offset < b.offset; });
+    // Remove ranges that overlap 100%
+    end = std::unique(ranges.begin(), end,
+                      [](const ReadRange& left, const ReadRange& right) {
+                        return right.offset >= left.offset &&
+                               right.offset + right.length <= left.offset + left.length;
+                      });
+    ranges.resize(end - ranges.begin());
 
     // Skip further processing if ranges is empty after removing zero-sized ranges.
     if (ranges.empty()) {
diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h
index 0afd2f236b8..e524afa99a3 100644
--- a/cpp/src/arrow/io/interfaces.h
+++ b/cpp/src/arrow/io/interfaces.h
@@ -202,7 +202,9 @@ class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable
   OutputStream() = default;
 };
 
-class ARROW_EXPORT InputStream : virtual public FileInterface, virtual public Readable {
+class ARROW_EXPORT InputStream : virtual public FileInterface,
+                                 virtual public Readable,
+                                 public std::enable_shared_from_this<InputStream> {
  public:
   /// \brief Advance or skip stream indicated number of bytes
   /// \param[in] nbytes the number to move forward
@@ -225,14 +227,23 @@ class ARROW_EXPORT InputStream : virtual public FileInterface, virtual public Re
   /// Zero copy reads imply the use of Buffer-returning Read() overloads.
   virtual bool supports_zero_copy() const;
 
+  /// \brief Read and return stream metadata
+  ///
+  /// If the stream implementation doesn't support metadata, empty metadata
+  /// is returned.  Note that it is allowed to return a null pointer rather
+  /// than an allocated empty metadata.
+  virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
+
+  /// \brief Read stream metadata asynchronously
+  virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context);
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
+
  protected:
   InputStream() = default;
 };
 
-class ARROW_EXPORT RandomAccessFile
-    : public std::enable_shared_from_this<RandomAccessFile>,
-      public InputStream,
-      public Seekable {
+class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
  public:
   /// Necessary because we hold a std::unique_ptr
   ~RandomAccessFile() override;
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index a953c8f28a7..6495242e63b 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -263,8 +263,8 @@ void FixedSizeBufferWriter::set_memcopy_threshold(int64_t threshold) {
 
 BufferReader::BufferReader(std::shared_ptr<Buffer> buffer)
     : buffer_(std::move(buffer)),
-      data_(buffer_->data()),
-      size_(buffer_->size()),
+      data_(buffer_ ? buffer_->data() : reinterpret_cast<const uint8_t*>("")),
+      size_(buffer_ ? buffer_->size() : 0),
       position_(0),
       is_open_(true) {}
 
@@ -344,8 +344,8 @@ Result<std::shared_ptr<Buffer>> BufferReader::DoReadAt(int64_t position, int64_t
   DCHECK_GE(nbytes, 0);
 
   // Arrange for data to be paged in
-  RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
-      {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
+  // RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
+  //     {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
 
   if (nbytes > 0 && buffer_ != nullptr) {
     return SliceBuffer(buffer_, position, nbytes);
diff --git a/cpp/src/arrow/io/memory.h b/cpp/src/arrow/io/memory.h
index bfebe9945f8..8213439ef74 100644
--- a/cpp/src/arrow/io/memory.h
+++ b/cpp/src/arrow/io/memory.h
@@ -88,7 +88,7 @@ class ARROW_EXPORT BufferOutputStream : public OutputStream {
   uint8_t* mutable_data_;
 };
 
-/// \brief A helper class to tracks the size of allocations
+/// \brief A helper class to track the size of allocations
 ///
 /// Writes to this stream do not copy or retain any data, they just bump
 /// a size counter that can be later used to know exactly which data size
diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc
index 00a1dcedb55..bd62761c739 100644
--- a/cpp/src/arrow/io/memory_test.cc
+++ b/cpp/src/arrow/io/memory_test.cc
@@ -37,6 +37,7 @@
 #include "arrow/io/transform.h"
 #include "arrow/io/util_internal.h"
 #include "arrow/status.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/bit_util.h"
@@ -44,6 +45,7 @@
 #include "arrow/util/future.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/parallel.h"
 
 namespace arrow {
 
@@ -177,6 +179,14 @@ TEST(TestBufferReader, FromStrings) {
   ASSERT_EQ(0, memcmp(piece->data(), data.data() + 2, 4));
 }
 
+TEST(TestBufferReader, FromNullBuffer) {
+  std::shared_ptr<Buffer> buf;
+  BufferReader reader(buf);
+  ASSERT_OK_AND_EQ(0, reader.GetSize());
+  ASSERT_OK_AND_ASSIGN(auto piece, reader.Read(10));
+  ASSERT_EQ(0, piece->size());
+}
+
 TEST(TestBufferReader, Seeking) {
   std::string data = "data123456";
 
@@ -690,43 +700,149 @@ TEST(CoalesceReadRanges, Basics) {
   // Same as (*) but unsorted
   check({{140, 100}, {120, 11}, {240, 11}, {110, 10}, {260, 11}},
         {{110, 21}, {140, 100}, {240, 31}});
+
+  // Completely overlapping ranges should be eliminated
+  check({{20, 5}, {20, 5}, {21, 2}}, {{20, 5}});
 }
 
+class CountingBufferReader : public BufferReader {
+ public:
+  using BufferReader::BufferReader;
+  Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext& context, int64_t position,
+                                            int64_t nbytes) override {
+    read_count_++;
+    return BufferReader::ReadAsync(context, position, nbytes);
+  }
+  int64_t read_count() const { return read_count_; }
+
+ private:
+  int64_t read_count_ = 0;
+};
+
 TEST(RangeReadCache, Basics) {
   std::string data = "abcdefghijklmnopqrstuvwxyz";
 
-  auto file = std::make_shared<BufferReader>(Buffer(data));
   CacheOptions options = CacheOptions::Defaults();
   options.hole_size_limit = 2;
   options.range_size_limit = 10;
+
+  for (auto lazy : std::vector<bool>{false, true}) {
+    SCOPED_TRACE(lazy);
+    options.lazy = lazy;
+    auto file = std::make_shared<CountingBufferReader>(Buffer(data));
+    internal::ReadRangeCache cache(file, {}, options);
+
+    ASSERT_OK(cache.Cache({{1, 2}, {3, 2}, {8, 2}, {20, 2}, {25, 0}}));
+    ASSERT_OK(cache.Cache({{10, 4}, {14, 0}, {15, 4}}));
+
+    ASSERT_OK_AND_ASSIGN(auto buf, cache.Read({20, 2}));
+    AssertBufferEqual(*buf, "uv");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 2}));
+    AssertBufferEqual(*buf, "bc");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({3, 2}));
+    AssertBufferEqual(*buf, "de");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({8, 2}));
+    AssertBufferEqual(*buf, "ij");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({10, 4}));
+    AssertBufferEqual(*buf, "klmn");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({15, 4}));
+    AssertBufferEqual(*buf, "pqrs");
+    ASSERT_FINISHES_OK(cache.WaitFor({{15, 1}, {16, 3}, {25, 0}, {1, 2}}));
+    // Zero-sized
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({14, 0}));
+    AssertBufferEqual(*buf, "");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({25, 0}));
+    AssertBufferEqual(*buf, "");
+
+    // Non-cached ranges
+    ASSERT_RAISES(Invalid, cache.Read({20, 3}));
+    ASSERT_RAISES(Invalid, cache.Read({19, 3}));
+    ASSERT_RAISES(Invalid, cache.Read({0, 3}));
+    ASSERT_RAISES(Invalid, cache.Read({25, 2}));
+    ASSERT_FINISHES_AND_RAISES(Invalid, cache.WaitFor({{25, 2}}));
+    ASSERT_FINISHES_AND_RAISES(Invalid, cache.WaitFor({{1, 2}, {25, 2}}));
+
+    ASSERT_FINISHES_OK(cache.Wait());
+    // 8 ranges should lead to less than 8 reads
+    ASSERT_LT(file->read_count(), 8);
+  }
+}
+
+TEST(RangeReadCache, Concurrency) {
+  std::string data = "abcdefghijklmnopqrstuvwxyz";
+
+  auto file = std::make_shared<BufferReader>(Buffer(data));
+  std::vector<ReadRange> ranges{{1, 2},  {3, 2},  {8, 2},  {20, 2},
+                                {25, 0}, {10, 4}, {14, 0}, {15, 4}};
+
+  for (auto lazy : std::vector<bool>{false, true}) {
+    SCOPED_TRACE(lazy);
+    CacheOptions options = CacheOptions::Defaults();
+    options.hole_size_limit = 2;
+    options.range_size_limit = 10;
+    options.lazy = lazy;
+
+    {
+      internal::ReadRangeCache cache(file, {}, options);
+      ASSERT_OK(cache.Cache(ranges));
+      std::vector<Future<std::shared_ptr<Buffer>>> futures;
+      for (const auto& range : ranges) {
+        futures.push_back(
+            cache.WaitFor({range}).Then([&cache, range]() { return cache.Read(range); }));
+      }
+      for (auto fut : futures) {
+        ASSERT_FINISHES_OK(fut);
+      }
+    }
+    {
+      internal::ReadRangeCache cache(file, {}, options);
+      ASSERT_OK(cache.Cache(ranges));
+      ASSERT_OK(arrow::internal::ParallelFor(
+          static_cast<int>(ranges.size()),
+          [&](int index) { return cache.Read(ranges[index]).status(); }));
+    }
+  }
+}
+
+TEST(RangeReadCache, Lazy) {
+  std::string data = "abcdefghijklmnopqrstuvwxyz";
+
+  auto file = std::make_shared<CountingBufferReader>(Buffer(data));
+  CacheOptions options = CacheOptions::LazyDefaults();
+  options.hole_size_limit = 2;
+  options.range_size_limit = 10;
   internal::ReadRangeCache cache(file, {}, options);
 
   ASSERT_OK(cache.Cache({{1, 2}, {3, 2}, {8, 2}, {20, 2}, {25, 0}}));
   ASSERT_OK(cache.Cache({{10, 4}, {14, 0}, {15, 4}}));
 
+  // Lazy cache doesn't fetch ranges until requested
+  ASSERT_EQ(0, file->read_count());
+
   ASSERT_OK_AND_ASSIGN(auto buf, cache.Read({20, 2}));
   AssertBufferEqual(*buf, "uv");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 2}));
-  AssertBufferEqual(*buf, "bc");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({3, 2}));
-  AssertBufferEqual(*buf, "de");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({8, 2}));
-  AssertBufferEqual(*buf, "ij");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({10, 4}));
-  AssertBufferEqual(*buf, "klmn");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({15, 4}));
-  AssertBufferEqual(*buf, "pqrs");
-  // Zero-sized
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({14, 0}));
-  AssertBufferEqual(*buf, "");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({25, 0}));
-  AssertBufferEqual(*buf, "");
+  ASSERT_EQ(1, file->read_count());
+
+  ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 4}));
+  AssertBufferEqual(*buf, "bcde");
+  ASSERT_EQ(2, file->read_count());
+
+  // Requested ranges are still cached
+  ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 4}));
+  ASSERT_EQ(2, file->read_count());
 
   // Non-cached ranges
   ASSERT_RAISES(Invalid, cache.Read({20, 3}));
   ASSERT_RAISES(Invalid, cache.Read({19, 3}));
   ASSERT_RAISES(Invalid, cache.Read({0, 3}));
   ASSERT_RAISES(Invalid, cache.Read({25, 2}));
+
+  // Can asynchronously kick off a read (though BufferReader::ReadAsync is synchronous so
+  // it will increment the read count here)
+  ASSERT_FINISHES_OK(cache.WaitFor({{10, 2}, {15, 4}}));
+  ASSERT_EQ(3, file->read_count());
+  ASSERT_OK_AND_ASSIGN(buf, cache.Read({10, 2}));
+  ASSERT_EQ(3, file->read_count());
 }
 
 TEST(CacheOptions, Basics) {
@@ -734,7 +850,8 @@ TEST(CacheOptions, Basics) {
                   const double expected_range_size_limit_MiB) -> void {
     const CacheOptions expected = {
         static_cast<int64_t>(std::round(expected_hole_size_limit_MiB * 1024 * 1024)),
-        static_cast<int64_t>(std::round(expected_range_size_limit_MiB * 1024 * 1024))};
+        static_cast<int64_t>(std::round(expected_range_size_limit_MiB * 1024 * 1024)),
+        /*lazy=*/false};
     ASSERT_EQ(actual, expected);
   };
 
@@ -752,5 +869,15 @@ TEST(CacheOptions, Basics) {
   check(CacheOptions::MakeFromNetworkMetrics(5, 500, .75, 5), 2.5, 5);
 }
 
+TEST(IOThreadPool, Capacity) {
+  // Simple sanity check
+  auto pool = internal::GetIOThreadPool();
+  int capacity = pool->GetCapacity();
+  ASSERT_GT(capacity, 0);
+  ASSERT_EQ(GetIOThreadPoolCapacity(), capacity);
+  ASSERT_OK(SetIOThreadPoolCapacity(capacity + 1));
+  ASSERT_EQ(GetIOThreadPoolCapacity(), capacity + 1);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/stdio.cc b/cpp/src/arrow/io/stdio.cc
new file mode 100644
index 00000000000..7ef4843a224
--- /dev/null
+++ b/cpp/src/arrow/io/stdio.cc
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/stdio.h"
+
+#include <iostream>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+
+namespace arrow {
+namespace io {
+
+//
+// StdoutStream implementation
+//
+
+StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StdoutStream::Close() { return Status::OK(); }
+
+bool StdoutStream::closed() const { return false; }
+
+Result<int64_t> StdoutStream::Tell() const { return pos_; }
+
+Status StdoutStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cout.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StderrStream implementation
+//
+
+StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StderrStream::Close() { return Status::OK(); }
+
+bool StderrStream::closed() const { return false; }
+
+Result<int64_t> StderrStream::Tell() const { return pos_; }
+
+Status StderrStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StdinStream implementation
+//
+
+StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+
+Status StdinStream::Close() { return Status::OK(); }
+
+bool StdinStream::closed() const { return false; }
+
+Result<int64_t> StdinStream::Tell() const { return pos_; }
+
+Result<int64_t> StdinStream::Read(int64_t nbytes, void* out) {
+  std::cin.read(reinterpret_cast<char*>(out), nbytes);
+  if (std::cin) {
+    pos_ += nbytes;
+    return nbytes;
+  } else {
+    return 0;
+  }
+}
+
+Result<std::shared_ptr<Buffer>> StdinStream::Read(int64_t nbytes) {
+  ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes));
+  ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
+  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+  buffer->ZeroPadding();
+  return std::move(buffer);
+}
+
+}  // namespace io
+}  // namespace arrow
diff --git a/cpp/src/arrow/io/stdio.h b/cpp/src/arrow/io/stdio.h
new file mode 100644
index 00000000000..9484ac77124
--- /dev/null
+++ b/cpp/src/arrow/io/stdio.h
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/io/interfaces.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+
+// Output stream that just writes to stdout.
+class ARROW_EXPORT StdoutStream : public OutputStream {
+ public:
+  StdoutStream();
+  ~StdoutStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Status Write(const void* data, int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+// Output stream that just writes to stderr.
+class ARROW_EXPORT StderrStream : public OutputStream {
+ public:
+  StderrStream();
+  ~StderrStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Status Write(const void* data, int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+// Input stream that just reads from stdin.
+class ARROW_EXPORT StdinStream : public InputStream {
+ public:
+  StdinStream();
+  ~StdinStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Result<int64_t> Read(int64_t nbytes, void* out) override;
+
+  Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+}  // namespace io
+}  // namespace arrow
diff --git a/cpp/src/arrow/io/transform.cc b/cpp/src/arrow/io/transform.cc
index a0b0b33d8dd..3fdf5a7a9ba 100644
--- a/cpp/src/arrow/io/transform.cc
+++ b/cpp/src/arrow/io/transform.cc
@@ -145,5 +145,18 @@ Result<int64_t> TransformInputStream::Tell() const {
   return impl_->pos_;
 }
 
+Result<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadata() {
+  RETURN_NOT_OK(impl_->CheckClosed());
+
+  return impl_->wrapped_->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  RETURN_NOT_OK(impl_->CheckClosed());
+
+  return impl_->wrapped_->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/transform.h b/cpp/src/arrow/io/transform.h
index d983b7c25b3..c117f275929 100644
--- a/cpp/src/arrow/io/transform.h
+++ b/cpp/src/arrow/io/transform.h
@@ -45,6 +45,10 @@ class ARROW_EXPORT TransformInputStream : public InputStream {
   Result<int64_t> Read(int64_t nbytes, void* out) override;
   Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
+
   Result<int64_t> Tell() const override;
 
  protected:
diff --git a/cpp/src/arrow/io/type_fwd.h b/cpp/src/arrow/io/type_fwd.h
index 041b825c988..a2fd33bf360 100644
--- a/cpp/src/arrow/io/type_fwd.h
+++ b/cpp/src/arrow/io/type_fwd.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "arrow/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -27,11 +28,29 @@ struct FileMode {
 };
 
 struct IOContext;
+struct CacheOptions;
 
 /// EXPERIMENTAL: convenience global singleton for default IOContext settings
 ARROW_EXPORT
 const IOContext& default_io_context();
 
+/// \brief Get the capacity of the global I/O thread pool
+///
+/// Return the number of worker threads in the thread pool to which
+/// Arrow dispatches various I/O-bound tasks.  This is an ideal number,
+/// not necessarily the exact number of threads at a given point in time.
+///
+/// You can change this number using SetIOThreadPoolCapacity().
+ARROW_EXPORT int GetIOThreadPoolCapacity();
+
+/// \brief Set the capacity of the global I/O thread pool
+///
+/// Set the number of worker threads in the thread pool to which
+/// Arrow dispatches various I/O-bound tasks.
+///
+/// The current number is returned by GetIOThreadPoolCapacity().
+ARROW_EXPORT Status SetIOThreadPoolCapacity(int threads);
+
 class FileInterface;
 class Seekable;
 class Writable;
diff --git a/cpp/src/arrow/io/util_internal.h b/cpp/src/arrow/io/util_internal.h
index f7112277bb6..b1d75d1d0bd 100644
--- a/cpp/src/arrow/io/util_internal.h
+++ b/cpp/src/arrow/io/util_internal.h
@@ -18,9 +18,11 @@
 #pragma once
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "arrow/io/interfaces.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
@@ -50,6 +52,15 @@ std::vector<ReadRange> CoalesceReadRanges(std::vector<ReadRange> ranges,
 ARROW_EXPORT
 ::arrow::internal::ThreadPool* GetIOThreadPool();
 
+template <typename... SubmitArgs>
+auto SubmitIO(IOContext io_context, SubmitArgs&&... submit_args)
+    -> decltype(std::declval<::arrow::internal::Executor*>()->Submit(submit_args...)) {
+  ::arrow::internal::TaskHints hints;
+  hints.external_id = io_context.external_id();
+  return io_context.executor()->Submit(hints, io_context.stop_token(),
+                                       std::forward<SubmitArgs>(submit_args)...);
+}
+
 }  // namespace internal
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 7561945d5f5..b1c30eec0b3 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -180,7 +180,7 @@ class ReaderV1 : public Reader {
           GetDataType(col->values(), col->metadata_type(), col->metadata(), &type));
       fields.push_back(::arrow::field(col->name()->str(), type));
     }
-    schema_ = ::arrow::schema(fields);
+    schema_ = ::arrow::schema(std::move(fields));
     return Status::OK();
   }
 
@@ -343,7 +343,7 @@ class ReaderV1 : public Reader {
       columns.emplace_back();
       RETURN_NOT_OK(GetColumn(i, &columns.back()));
     }
-    *out = Table::Make(this->schema(), columns, this->num_rows());
+    *out = Table::Make(this->schema(), std::move(columns), this->num_rows());
     return Status::OK();
   }
 
@@ -360,7 +360,8 @@ class ReaderV1 : public Reader {
       RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
       fields.push_back(my_schema->field(field_index));
     }
-    *out = Table::Make(::arrow::schema(fields), columns, this->num_rows());
+    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
+                       this->num_rows());
     return Status::OK();
   }
 
@@ -379,7 +380,8 @@ class ReaderV1 : public Reader {
       RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
       fields.push_back(sch->field(field_index));
     }
-    *out = Table::Make(::arrow::schema(fields), columns, this->num_rows());
+    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
+                       this->num_rows());
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/ipc/file_to_stream.cc b/cpp/src/arrow/ipc/file_to_stream.cc
index c15eb6de21f..6ae6a4fa0c8 100644
--- a/cpp/src/arrow/ipc/file_to_stream.cc
+++ b/cpp/src/arrow/ipc/file_to_stream.cc
@@ -20,13 +20,12 @@
 #include <string>
 
 #include "arrow/io/file.h"
+#include "arrow/io/stdio.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 
-#include "arrow/util/io_util.h"
-
 namespace arrow {
 
 class RecordBatch;
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index caf6fd06b9c..117b82df30d 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -30,6 +30,7 @@
 #include "arrow/array/builder_time.h"
 #include "arrow/array/builder_union.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/scalar.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
@@ -784,6 +785,8 @@ Status GetDictConverter(const std::shared_ptr<DataType>& type,
     PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type)
     PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type)
     PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type)
+    PARAM_CONVERTER_CASE(Type::FLOAT, FloatConverter, FloatType)
+    PARAM_CONVERTER_CASE(Type::DOUBLE, FloatConverter, DoubleType)
     PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType)
     PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType)
     PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType)
@@ -911,6 +914,26 @@ Status DictArrayFromJSON(const std::shared_ptr<DataType>& type,
       .Value(out);
 }
 
+Status ScalarFromJSON(const std::shared_ptr<DataType>& type,
+                      util::string_view json_string, std::shared_ptr<Scalar>* out) {
+  std::shared_ptr<Converter> converter;
+  RETURN_NOT_OK(GetConverter(type, &converter));
+
+  rj::Document json_doc;
+  json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
+  if (json_doc.HasParseError()) {
+    return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
+                           GetParseError_En(json_doc.GetParseError()));
+  }
+
+  std::shared_ptr<Array> array;
+  RETURN_NOT_OK(converter->AppendValue(json_doc));
+  RETURN_NOT_OK(converter->Finish(&array));
+  DCHECK_EQ(array->length(), 1);
+  ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
+  return Status::OK();
+}
+
 }  // namespace json
 }  // namespace internal
 }  // namespace ipc
diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/ipc/json_simple.h
index 8f6b57a4608..4dd3a664aa6 100644
--- a/cpp/src/arrow/ipc/json_simple.h
+++ b/cpp/src/arrow/ipc/json_simple.h
@@ -51,6 +51,10 @@ ARROW_EXPORT
 Status DictArrayFromJSON(const std::shared_ptr<DataType>&, util::string_view indices_json,
                          util::string_view dictionary_json, std::shared_ptr<Array>* out);
 
+ARROW_EXPORT
+Status ScalarFromJSON(const std::shared_ptr<DataType>&, util::string_view json,
+                      std::shared_ptr<Scalar>* out);
+
 }  // namespace json
 }  // namespace internal
 }  // namespace ipc
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index c5358ac89f1..512905dde55 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -136,6 +136,21 @@ void AssertJSONDictArray(const std::shared_ptr<DataType>& index_type,
   AssertArraysEqual(*expected_values, *dict_array.dictionary());
 }
 
+template <typename T, typename C_TYPE = typename T::c_type>
+void AssertJSONScalar(const std::shared_ptr<DataType>& type, const std::string& json,
+                      const bool is_valid, const C_TYPE value) {
+  SCOPED_TRACE(json);
+  std::shared_ptr<Scalar> actual, expected;
+
+  ASSERT_OK(ScalarFromJSON(type, json, &actual));
+  if (is_valid) {
+    ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value));
+  } else {
+    expected = MakeNullScalar(type);
+  }
+  AssertScalarsEqual(*expected, *actual, /*verbose=*/true);
+}
+
 TEST(TestHelper, JSONArray) {
   // Test the JSONArray helper func
   std::string s =
@@ -329,7 +344,6 @@ TEST(TestNull, Errors) {
 
 TEST(TestBoolean, Basics) {
   std::shared_ptr<DataType> type = boolean();
-  std::shared_ptr<Array> expected, actual;
 
   AssertJSONArray<BooleanType, bool>(type, "[]", {});
   AssertJSONArray<BooleanType, bool>(type, "[false, true, false]", {false, true, false});
@@ -1327,6 +1341,44 @@ TEST(TestDictArrayFromJSON, Errors) {
                                            &array));  // dict value isn't string
 }
 
+TEST(TestScalarFromJSON, Basics) {
+  // Sanity check for common types (not exhaustive)
+  std::shared_ptr<Scalar> scalar;
+  AssertJSONScalar<Int64Type>(int64(), "4", true, 4);
+  AssertJSONScalar<Int64Type>(int64(), "null", false, 0);
+  AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"("")", true,
+                                                        Buffer::FromString(""));
+  AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"("foo")", true,
+                                                        Buffer::FromString("foo"));
+  AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"(null)", false,
+                                                        Buffer::FromString(""));
+  AssertJSONScalar<NullType, std::nullptr_t>(null(), "null", false, nullptr);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "true", true, true);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "false", true, false);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "null", false, false);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "0", true, false);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "1", true, true);
+  AssertJSONScalar<DoubleType>(float64(), "1.0", true, 1.0);
+  AssertJSONScalar<DoubleType>(float64(), "-0.0", true, -0.0);
+  ASSERT_OK(ScalarFromJSON(float64(), "NaN", &scalar));
+  ASSERT_TRUE(std::isnan(checked_cast<DoubleScalar&>(*scalar).value));
+  ASSERT_OK(ScalarFromJSON(float64(), "Inf", &scalar));
+  ASSERT_TRUE(std::isinf(checked_cast<DoubleScalar&>(*scalar).value));
+}
+
+TEST(TestScalarFromJSON, Errors) {
+  std::shared_ptr<Scalar> scalar;
+  ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[0]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[9223372036854775808]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[-9223372036854775809]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[18446744073709551616]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[-1]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "0", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "[]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "0.0", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "\"true\"", &scalar));
+}
+
 }  // namespace json
 }  // namespace internal
 }  // namespace ipc
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index 6db8a0f0d3d..197556efcea 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -33,6 +33,7 @@
 #include "arrow/ipc/util.h"
 #include "arrow/status.h"
 #include "arrow/util/endian.h"
+#include "arrow/util/future.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 
@@ -324,6 +325,60 @@ Result<std::unique_ptr<Message>> ReadMessage(int64_t offset, int32_t metadata_le
   }
 }
 
+Future<std::shared_ptr<Message>> ReadMessageAsync(int64_t offset, int32_t metadata_length,
+                                                  int64_t body_length,
+                                                  io::RandomAccessFile* file,
+                                                  const io::IOContext& context) {
+  struct State {
+    std::unique_ptr<Message> result;
+    std::shared_ptr<MessageDecoderListener> listener;
+    std::shared_ptr<MessageDecoder> decoder;
+  };
+  auto state = std::make_shared<State>();
+  state->listener = std::make_shared<AssignMessageDecoderListener>(&state->result);
+  state->decoder = std::make_shared<MessageDecoder>(state->listener);
+
+  if (metadata_length < state->decoder->next_required_size()) {
+    return Status::Invalid("metadata_length should be at least ",
+                           state->decoder->next_required_size());
+  }
+  return file->ReadAsync(context, offset, metadata_length + body_length)
+      .Then([=](std::shared_ptr<Buffer> metadata) -> Result<std::shared_ptr<Message>> {
+        if (metadata->size() < metadata_length) {
+          return Status::Invalid("Expected to read ", metadata_length,
+                                 " metadata bytes but got ", metadata->size());
+        }
+        ARROW_RETURN_NOT_OK(
+            state->decoder->Consume(SliceBuffer(metadata, 0, metadata_length)));
+        switch (state->decoder->state()) {
+          case MessageDecoder::State::INITIAL:
+            return std::move(state->result);
+          case MessageDecoder::State::METADATA_LENGTH:
+            return Status::Invalid("metadata length is missing. File offset: ", offset,
+                                   ", metadata length: ", metadata_length);
+          case MessageDecoder::State::METADATA:
+            return Status::Invalid("flatbuffer size ",
+                                   state->decoder->next_required_size(),
+                                   " invalid. File offset: ", offset,
+                                   ", metadata length: ", metadata_length);
+          case MessageDecoder::State::BODY: {
+            auto body = SliceBuffer(metadata, metadata_length, body_length);
+            if (body->size() < state->decoder->next_required_size()) {
+              return Status::IOError("Expected to be able to read ",
+                                     state->decoder->next_required_size(),
+                                     " bytes for message body, got ", body->size());
+            }
+            RETURN_NOT_OK(state->decoder->Consume(body));
+            return std::move(state->result);
+          }
+          case MessageDecoder::State::EOS:
+            return Status::Invalid("Unexpected empty message in IPC file format");
+          default:
+            return Status::Invalid("Unexpected state: ", state->decoder->state());
+        }
+      });
+}
+
 Status AlignStream(io::InputStream* stream, int32_t alignment) {
   ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
   return stream->Advance(PaddedLength(position, alignment) - position);
diff --git a/cpp/src/arrow/ipc/message.h b/cpp/src/arrow/ipc/message.h
index 6a7619d31b3..b2683259cb4 100644
--- a/cpp/src/arrow/ipc/message.h
+++ b/cpp/src/arrow/ipc/message.h
@@ -459,6 +459,11 @@ Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,
                                              const int32_t metadata_length,
                                              io::RandomAccessFile* file);
 
+ARROW_EXPORT
+Future<std::shared_ptr<Message>> ReadMessageAsync(
+    const int64_t offset, const int32_t metadata_length, const int64_t body_length,
+    io::RandomAccessFile* file, const io::IOContext& context = io::default_io_context());
+
 /// \brief Advance stream to an 8-byte offset if its position is not a multiple
 /// of 8 already
 /// \param[in] stream an input stream
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc
index 4b332bd9e1e..b1b9e56528e 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -74,7 +74,7 @@ MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version) {
       return MetadataVersion::V2;
     case flatbuf::MetadataVersion::V3:
       // Arrow 0.3 to 0.7.1
-      return MetadataVersion::V4;
+      return MetadataVersion::V3;
     case flatbuf::MetadataVersion::V4:
       // Arrow 0.8 to 0.17
       return MetadataVersion::V4;
diff --git a/cpp/src/arrow/ipc/metadata_internal.h b/cpp/src/arrow/ipc/metadata_internal.h
index 9cf489dd668..2afa95f6f83 100644
--- a/cpp/src/arrow/ipc/metadata_internal.h
+++ b/cpp/src/arrow/ipc/metadata_internal.h
@@ -68,9 +68,10 @@ static constexpr flatbuf::MetadataVersion kLatestMetadataVersion =
 static constexpr flatbuf::MetadataVersion kMinMetadataVersion =
     flatbuf::MetadataVersion::V4;
 
+// These functions are used in unit tests
+ARROW_EXPORT
 MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version);
 
-// This function is used in a unit test
 ARROW_EXPORT
 flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version);
 
diff --git a/cpp/src/arrow/ipc/read_write_benchmark.cc b/cpp/src/arrow/ipc/read_write_benchmark.cc
index a56dd3579e2..f5cc857acb0 100644
--- a/cpp/src/arrow/ipc/read_write_benchmark.cc
+++ b/cpp/src/arrow/ipc/read_write_benchmark.cc
@@ -21,6 +21,7 @@
 #include <sstream>
 #include <string>
 
+#include "arrow/io/file.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
 #include "arrow/record_batch.h"
@@ -90,36 +91,6 @@ static void ReadRecordBatch(benchmark::State& state) {  // NOLINT non-const refe
   state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
 }
 
-static void ReadFile(benchmark::State& state) {  // NOLINT non-const reference
-  // 1MB
-  constexpr int64_t kTotalSize = 1 << 20;
-  auto options = ipc::IpcWriteOptions::Defaults();
-
-  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);
-  {
-    // Make Arrow IPC file
-    auto record_batch = MakeRecordBatch(kTotalSize, state.range(0));
-
-    io::BufferOutputStream stream(buffer);
-    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options);
-    ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));
-    ABORT_NOT_OK(writer->Close());
-    ABORT_NOT_OK(stream.Close());
-  }
-
-  ipc::DictionaryMemo empty_memo;
-  while (state.KeepRunning()) {
-    io::BufferReader input(buffer);
-    auto reader =
-        *ipc::RecordBatchFileReader::Open(&input, ipc::IpcReadOptions::Defaults());
-    const int num_batches = reader->num_record_batches();
-    for (int i = 0; i < num_batches; ++i) {
-      auto batch = *reader->ReadRecordBatch(i);
-    }
-  }
-  state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
-}
-
 static void ReadStream(benchmark::State& state) {  // NOLINT non-const reference
   // 1MB
   constexpr int64_t kTotalSize = 1 << 20;
@@ -188,9 +159,103 @@ static void DecodeStream(benchmark::State& state) {  // NOLINT non-const referen
   state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
 }
 
+#define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
+  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+  constexpr int64_t kBatches = 16;                                                \
+  auto options = ipc::IpcWriteOptions::Defaults();                                \
+  ASSIGN_OR_ABORT(options.codec,                                                  \
+                  arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
+  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
+  {                                                                               \
+    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+    io::BufferOutputStream stream(buffer);                                        \
+    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
+    for (int i = 0; i < kBatches; i++) {                                          \
+      ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
+    }                                                                             \
+    ABORT_NOT_OK(writer->Close());                                                \
+    ABORT_NOT_OK(stream.Close());                                                 \
+  }
+
+#define GENERATE_DATA_IN_MEMORY()                                                 \
+  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+  constexpr int64_t kBatches = 1;                                                 \
+  auto options = ipc::IpcWriteOptions::Defaults();                                \
+  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
+  {                                                                               \
+    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+    io::BufferOutputStream stream(buffer);                                        \
+    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
+    ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                        \
+    ABORT_NOT_OK(writer->Close());                                                \
+    ABORT_NOT_OK(stream.Close());                                                 \
+  }
+
+#define GENERATE_DATA_TEMP_FILE()                                                 \
+  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+  constexpr int64_t kBatches = 16;                                                \
+  auto options = ipc::IpcWriteOptions::Defaults();                                \
+  ASSIGN_OR_ABORT(auto sink, io::FileOutputStream::Open("/tmp/benchmark.arrow")); \
+  {                                                                               \
+    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+    auto writer = *ipc::MakeFileWriter(sink, record_batch->schema(), options);    \
+    ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                        \
+    ABORT_NOT_OK(writer->Close());                                                \
+    ABORT_NOT_OK(sink->Close());                                                  \
+  }
+
+#define READ_DATA_IN_MEMORY() auto input = std::make_shared<io::BufferReader>(buffer);
+#define READ_DATA_TEMP_FILE() \
+  ASSIGN_OR_ABORT(auto input, io::ReadableFile::Open("/tmp/benchmark.arrow"));
+#define READ_DATA_MMAP_FILE()                                                    \
+  ASSIGN_OR_ABORT(auto input, io::MemoryMappedFile::Open("/tmp/benchmark.arrow", \
+                                                         io::FileMode::type::READ));
+
+#define READ_SYNC(NAME, GENERATE, READ)                                                 \
+  static void NAME(benchmark::State& state) {                                           \
+    GENERATE();                                                                         \
+    for (auto _ : state) {                                                              \
+      READ();                                                                           \
+      auto reader = *ipc::RecordBatchFileReader::Open(input.get(),                      \
+                                                      ipc::IpcReadOptions::Defaults()); \
+      const int num_batches = reader->num_record_batches();                             \
+      for (int i = 0; i < num_batches; ++i) {                                           \
+        auto batch = *reader->ReadRecordBatch(i);                                       \
+      }                                                                                 \
+    }                                                                                   \
+    state.SetBytesProcessed(int64_t(state.iterations()) * kBatchSize * kBatches);       \
+  }                                                                                     \
+  BENCHMARK(NAME)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
+
+#define READ_ASYNC(NAME, GENERATE, READ)                                                \
+  static void NAME##Async(benchmark::State& state) {                                    \
+    GENERATE();                                                                         \
+    for (auto _ : state) {                                                              \
+      READ();                                                                           \
+      auto reader = *ipc::RecordBatchFileReader::Open(input.get(),                      \
+                                                      ipc::IpcReadOptions::Defaults()); \
+      ASSIGN_OR_ABORT(auto generator, reader->GetRecordBatchGenerator());               \
+      const int num_batches = reader->num_record_batches();                             \
+      for (int i = 0; i < num_batches; ++i) {                                           \
+        auto batch = *generator().result();                                             \
+      }                                                                                 \
+    }                                                                                   \
+    state.SetBytesProcessed(int64_t(state.iterations()) * kBatchSize * kBatches);       \
+  }                                                                                     \
+  BENCHMARK(NAME##Async)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
+
+#define READ_BENCHMARK(NAME, GENERATE, READ) \
+  READ_SYNC(NAME, GENERATE, READ);           \
+  READ_ASYNC(NAME, GENERATE, READ);
+
+READ_BENCHMARK(ReadFile, GENERATE_DATA_IN_MEMORY, READ_DATA_IN_MEMORY);
+READ_BENCHMARK(ReadTempFile, GENERATE_DATA_TEMP_FILE, READ_DATA_TEMP_FILE);
+READ_BENCHMARK(ReadMmapFile, GENERATE_DATA_TEMP_FILE, READ_DATA_MMAP_FILE);
+READ_BENCHMARK(ReadCompressedFile, GENERATE_COMPRESSED_DATA_IN_MEMORY,
+               READ_DATA_IN_MEMORY);
+
 BENCHMARK(WriteRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 BENCHMARK(ReadRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
-BENCHMARK(ReadFile)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 BENCHMARK(ReadStream)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 BENCHMARK(DecodeStream)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index 2efa79de8e0..245534b1d5c 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -40,6 +40,7 @@
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/testing/extension_type.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
@@ -545,6 +546,23 @@ TEST(TestReadMessage, CorruptedSmallInput) {
   ASSERT_EQ(nullptr, message);
 }
 
+TEST(TestMetadata, GetMetadataVersion) {
+  ASSERT_EQ(MetadataVersion::V1,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V1));
+  ASSERT_EQ(MetadataVersion::V2,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V2));
+  ASSERT_EQ(MetadataVersion::V3,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V3));
+  ASSERT_EQ(MetadataVersion::V4,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V4));
+  ASSERT_EQ(MetadataVersion::V5,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V5));
+  ASSERT_EQ(MetadataVersion::V1,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::MIN));
+  ASSERT_EQ(MetadataVersion::V5,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::MAX));
+}
+
 TEST_P(TestIpcRoundTrip, SliceRoundTrip) {
   std::shared_ptr<RecordBatch> batch;
   ASSERT_OK((*GetParam())(&batch));  // NOLINT clang-tidy gtest issue
@@ -963,24 +981,6 @@ struct FileWriterHelper {
     return Status::OK();
   }
 
-  virtual Status Read(const IpcReadOptions& options, RecordBatchVector* out_batches,
-                      ReadStats* out_stats = nullptr) {
-    auto buf_reader = std::make_shared<io::BufferReader>(buffer_);
-    ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchFileReader::Open(
-                                           buf_reader.get(), footer_offset_, options));
-
-    EXPECT_EQ(num_batches_written_, reader->num_record_batches());
-    for (int i = 0; i < num_batches_written_; ++i) {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<RecordBatch> chunk,
-                            reader->ReadRecordBatch(i));
-      out_batches->push_back(chunk);
-    }
-    if (out_stats) {
-      *out_stats = reader->stats();
-    }
-    return Status::OK();
-  }
-
   Status ReadSchema(std::shared_ptr<Schema>* out) {
     return ReadSchema(ipc::IpcReadOptions::Defaults(), out);
   }
@@ -1009,6 +1009,42 @@ struct FileWriterHelper {
   int64_t footer_offset_;
 };
 
+struct FileGeneratorWriterHelper : public FileWriterHelper {
+  Status ReadBatches(const IpcReadOptions& options, RecordBatchVector* out_batches,
+                     ReadStats* out_stats = nullptr) override {
+    auto buf_reader = std::make_shared<io::BufferReader>(buffer_);
+    AsyncGenerator<std::shared_ptr<RecordBatch>> generator;
+
+    {
+      auto fut =
+          RecordBatchFileReader::OpenAsync(buf_reader.get(), footer_offset_, options);
+      // Do NOT assert OK since some tests check whether this fails properly
+      EXPECT_FINISHES(fut);
+      ARROW_ASSIGN_OR_RAISE(auto reader, fut.result());
+      EXPECT_EQ(num_batches_written_, reader->num_record_batches());
+      // Generator will keep reader alive internally
+      ARROW_ASSIGN_OR_RAISE(generator, reader->GetRecordBatchGenerator());
+    }
+
+    // Generator is async-reentrant
+    std::vector<Future<std::shared_ptr<RecordBatch>>> futures;
+    for (int i = 0; i < num_batches_written_; ++i) {
+      futures.push_back(generator());
+    }
+    auto fut = generator();
+    EXPECT_FINISHES_OK_AND_EQ(nullptr, fut);
+    for (auto& future : futures) {
+      EXPECT_FINISHES_OK_AND_ASSIGN(auto batch, future);
+      out_batches->push_back(batch);
+    }
+
+    // The generator doesn't track stats.
+    EXPECT_EQ(nullptr, out_stats);
+
+    return Status::OK();
+  }
+};
+
 struct StreamWriterHelper {
   static constexpr bool kIsFileFormat = false;
 
@@ -1342,6 +1378,9 @@ class ReaderWriterMixin : public ExtensionTypesMixin {
 class TestFileFormat : public ReaderWriterMixin<FileWriterHelper>,
                        public ::testing::TestWithParam<MakeRecordBatch*> {};
 
+class TestFileFormatGenerator : public ReaderWriterMixin<FileGeneratorWriterHelper>,
+                                public ::testing::TestWithParam<MakeRecordBatch*> {};
+
 class TestStreamFormat : public ReaderWriterMixin<StreamWriterHelper>,
                          public ::testing::TestWithParam<MakeRecordBatch*> {};
 
@@ -1366,6 +1405,16 @@ TEST_P(TestFileFormat, RoundTrip) {
   TestZeroLengthRoundTrip(*GetParam(), options);
 }
 
+TEST_P(TestFileFormatGenerator, RoundTrip) {
+  TestRoundTrip(*GetParam(), IpcWriteOptions::Defaults());
+  TestZeroLengthRoundTrip(*GetParam(), IpcWriteOptions::Defaults());
+
+  IpcWriteOptions options;
+  options.write_legacy_ipc_format = true;
+  TestRoundTrip(*GetParam(), options);
+  TestZeroLengthRoundTrip(*GetParam(), options);
+}
+
 Status MakeDictionaryBatch(std::shared_ptr<RecordBatch>* out) {
   auto f0_type = arrow::dictionary(int32(), utf8());
   auto f1_type = arrow::dictionary(int8(), utf8());
@@ -1571,6 +1620,8 @@ INSTANTIATE_TEST_SUITE_P(GenericIpcRoundTripTests, TestIpcRoundTrip,
                          ::testing::ValuesIn(kBatchCases));
 INSTANTIATE_TEST_SUITE_P(FileRoundTripTests, TestFileFormat,
                          ::testing::ValuesIn(kBatchCases));
+INSTANTIATE_TEST_SUITE_P(FileRoundTripTests, TestFileFormatGenerator,
+                         ::testing::ValuesIn(kBatchCases));
 INSTANTIATE_TEST_SUITE_P(StreamRoundTripTests, TestStreamFormat,
                          ::testing::ValuesIn(kBatchCases));
 INSTANTIATE_TEST_SUITE_P(StreamDecoderDataRoundTripTests, TestStreamDecoderData,
@@ -1635,18 +1686,26 @@ TEST_F(TestStreamFormat, DictionaryRoundTrip) { TestDictionaryRoundtrip(); }
 
 TEST_F(TestFileFormat, DictionaryRoundTrip) { TestDictionaryRoundtrip(); }
 
+TEST_F(TestFileFormatGenerator, DictionaryRoundTrip) { TestDictionaryRoundtrip(); }
+
 TEST_F(TestStreamFormat, DifferentSchema) { TestWriteDifferentSchema(); }
 
 TEST_F(TestFileFormat, DifferentSchema) { TestWriteDifferentSchema(); }
 
+TEST_F(TestFileFormatGenerator, DifferentSchema) { TestWriteDifferentSchema(); }
+
 TEST_F(TestStreamFormat, NoRecordBatches) { TestWriteNoRecordBatches(); }
 
 TEST_F(TestFileFormat, NoRecordBatches) { TestWriteNoRecordBatches(); }
 
+TEST_F(TestFileFormatGenerator, NoRecordBatches) { TestWriteNoRecordBatches(); }
+
 TEST_F(TestStreamFormat, ReadFieldSubset) { TestReadSubsetOfFields(); }
 
 TEST_F(TestFileFormat, ReadFieldSubset) { TestReadSubsetOfFields(); }
 
+TEST_F(TestFileFormatGenerator, ReadFieldSubset) { TestReadSubsetOfFields(); }
+
 TEST(TestRecordBatchStreamReader, EmptyStreamWithDictionaries) {
   // ARROW-6006
   auto f0 = arrow::field("f0", arrow::dictionary(arrow::int8(), arrow::utf8()));
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 16ac0fe9c23..a3c345cc440 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -31,6 +31,7 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/extension_type.h"
+#include "arrow/io/caching.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/message.h"
@@ -51,7 +52,9 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/parallel.h"
 #include "arrow/util/string.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/ubsan.h"
+#include "arrow/util/vector.h"
 #include "arrow/visitor_inline.h"
 
 #include "generated/File_generated.h"  // IWYU pragma: export
@@ -518,7 +521,7 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
                             arrow::internal::SwapEndianArrayData(filtered_columns[i]));
     }
   }
-  return RecordBatch::Make(filtered_schema, metadata->length(),
+  return RecordBatch::Make(std::move(filtered_schema), metadata->length(),
                            std::move(filtered_columns));
 }
 
@@ -958,10 +961,94 @@ Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
 // ----------------------------------------------------------------------
 // Reader implementation
 
+// Common functions used in both the random-access file reader and the
+// asynchronous generator
 static inline FileBlock FileBlockFromFlatbuffer(const flatbuf::Block* block) {
   return FileBlock{block->offset(), block->metaDataLength(), block->bodyLength()};
 }
 
+static Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block,
+                                                             io::RandomAccessFile* file) {
+  if (!BitUtil::IsMultipleOf8(block.offset) ||
+      !BitUtil::IsMultipleOf8(block.metadata_length) ||
+      !BitUtil::IsMultipleOf8(block.body_length)) {
+    return Status::Invalid("Unaligned block in IPC file");
+  }
+
+  // TODO(wesm): this breaks integration tests, see ARROW-3256
+  // DCHECK_EQ((*out)->body_length(), block.body_length);
+
+  ARROW_ASSIGN_OR_RAISE(auto message,
+                        ReadMessage(block.offset, block.metadata_length, file));
+  return std::move(message);
+}
+
+static Future<std::shared_ptr<Message>> ReadMessageFromBlockAsync(
+    const FileBlock& block, io::RandomAccessFile* file, const io::IOContext& io_context) {
+  if (!BitUtil::IsMultipleOf8(block.offset) ||
+      !BitUtil::IsMultipleOf8(block.metadata_length) ||
+      !BitUtil::IsMultipleOf8(block.body_length)) {
+    return Status::Invalid("Unaligned block in IPC file");
+  }
+
+  // TODO(wesm): this breaks integration tests, see ARROW-3256
+  // DCHECK_EQ((*out)->body_length(), block.body_length);
+
+  return ReadMessageAsync(block.offset, block.metadata_length, block.body_length, file,
+                          io_context);
+}
+
+static Status ReadOneDictionary(Message* message, const IpcReadContext& context) {
+  CHECK_HAS_BODY(*message);
+  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+  DictionaryKind kind;
+  RETURN_NOT_OK(ReadDictionary(*message->metadata(), context, &kind, reader.get()));
+  if (kind != DictionaryKind::New) {
+    return Status::Invalid(
+        "Unsupported dictionary replacement or "
+        "dictionary delta in IPC file");
+  }
+  return Status::OK();
+}
+
+class RecordBatchFileReaderImpl;
+
+/// A generator of record batches.
+///
+/// All batches are yielded in order.
+class ARROW_EXPORT IpcFileRecordBatchGenerator {
+ public:
+  using Item = std::shared_ptr<RecordBatch>;
+
+  explicit IpcFileRecordBatchGenerator(
+      std::shared_ptr<RecordBatchFileReaderImpl> state,
+      std::shared_ptr<io::internal::ReadRangeCache> cached_source,
+      const io::IOContext& io_context, arrow::internal::Executor* executor)
+      : state_(std::move(state)),
+        cached_source_(std::move(cached_source)),
+        io_context_(io_context),
+        executor_(executor),
+        index_(0) {}
+
+  Future<Item> operator()();
+  Future<std::shared_ptr<Message>> ReadBlock(const FileBlock& block);
+
+  static Status ReadDictionaries(
+      RecordBatchFileReaderImpl* state,
+      std::vector<std::shared_ptr<Message>> dictionary_messages);
+  static Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+      RecordBatchFileReaderImpl* state, Message* message);
+
+ private:
+  std::shared_ptr<RecordBatchFileReaderImpl> state_;
+  std::shared_ptr<io::internal::ReadRangeCache> cached_source_;
+  io::IOContext io_context_;
+  arrow::internal::Executor* executor_;
+  int index_;
+  // Odd Future type, but this lets us use All() easily
+  Future<> read_dictionaries_;
+};
+
 class RecordBatchFileReaderImpl : public RecordBatchFileReader {
  public:
   RecordBatchFileReaderImpl() : file_(NULLPTR), footer_offset_(0), footer_(NULLPTR) {}
@@ -995,6 +1082,25 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     return batch;
   }
 
+  Result<int64_t> CountRows() override {
+    int64_t total = 0;
+    for (int i = 0; i < num_record_batches(); i++) {
+      ARROW_ASSIGN_OR_RAISE(auto outer_message,
+                            ReadMessageFromBlock(GetRecordBatchBlock(i)));
+      auto metadata = outer_message->metadata();
+      const flatbuf::Message* message = nullptr;
+      RETURN_NOT_OK(
+          internal::VerifyMessage(metadata->data(), metadata->size(), &message));
+      auto batch = message->header_as_RecordBatch();
+      if (batch == nullptr) {
+        return Status::IOError(
+            "Header-type of flatbuffer-encoded Message is not RecordBatch.");
+      }
+      total += batch->length();
+    }
+    return total;
+  }
+
   Status Open(const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
               const IpcReadOptions& options) {
     owned_file_ = file;
@@ -1016,13 +1122,69 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     return Status::OK();
   }
 
+  Future<> OpenAsync(const std::shared_ptr<io::RandomAccessFile>& file,
+                     int64_t footer_offset, const IpcReadOptions& options) {
+    owned_file_ = file;
+    return OpenAsync(file.get(), footer_offset, options);
+  }
+
+  Future<> OpenAsync(io::RandomAccessFile* file, int64_t footer_offset,
+                     const IpcReadOptions& options) {
+    file_ = file;
+    options_ = options;
+    footer_offset_ = footer_offset;
+    auto cpu_executor = ::arrow::internal::GetCpuThreadPool();
+    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    return ReadFooterAsync(cpu_executor).Then([self, options]() -> Status {
+      // Get the schema and record any observed dictionaries
+      RETURN_NOT_OK(UnpackSchemaMessage(
+          self->footer_->schema(), options, &self->dictionary_memo_, &self->schema_,
+          &self->out_schema_, &self->field_inclusion_mask_, &self->swap_endian_));
+      ++self->stats_.num_messages;
+      return Status::OK();
+    });
+  }
+
   std::shared_ptr<Schema> schema() const override { return out_schema_; }
 
   std::shared_ptr<const KeyValueMetadata> metadata() const override { return metadata_; }
 
   ReadStats stats() const override { return stats_; }
 
+  Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
+      const bool coalesce, const io::IOContext& io_context,
+      const io::CacheOptions cache_options,
+      arrow::internal::Executor* executor) override {
+    auto state = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    std::shared_ptr<io::internal::ReadRangeCache> cached_source;
+    if (coalesce) {
+      if (!owned_file_) return Status::Invalid("Cannot coalesce without an owned file");
+      cached_source = std::make_shared<io::internal::ReadRangeCache>(
+          owned_file_, io_context, cache_options);
+      auto num_dictionaries = this->num_dictionaries();
+      auto num_record_batches = this->num_record_batches();
+      std::vector<io::ReadRange> ranges(num_dictionaries + num_record_batches);
+      for (int i = 0; i < num_dictionaries; i++) {
+        auto block = FileBlockFromFlatbuffer(footer_->dictionaries()->Get(i));
+        ranges[i].offset = block.offset;
+        ranges[i].length = block.metadata_length + block.body_length;
+      }
+      for (int i = 0; i < num_record_batches; i++) {
+        auto block = FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
+        ranges[num_dictionaries + i].offset = block.offset;
+        ranges[num_dictionaries + i].length = block.metadata_length + block.body_length;
+      }
+      RETURN_NOT_OK(cached_source->Cache(std::move(ranges)));
+    }
+    return IpcFileRecordBatchGenerator(std::move(state), std::move(cached_source),
+                                       io_context, executor);
+  }
+
  private:
+  friend AsyncGenerator<std::shared_ptr<Message>> MakeMessageGenerator(
+      std::shared_ptr<RecordBatchFileReaderImpl>, const io::IOContext&);
+  friend class IpcFileRecordBatchGenerator;
+
   FileBlock GetRecordBatchBlock(int i) const {
     return FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
   }
@@ -1032,42 +1194,28 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
   }
 
   Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block) {
-    if (!BitUtil::IsMultipleOf8(block.offset) ||
-        !BitUtil::IsMultipleOf8(block.metadata_length) ||
-        !BitUtil::IsMultipleOf8(block.body_length)) {
-      return Status::Invalid("Unaligned block in IPC file");
-    }
-
-    // TODO(wesm): this breaks integration tests, see ARROW-3256
-    // DCHECK_EQ((*out)->body_length(), block.body_length);
-
-    ARROW_ASSIGN_OR_RAISE(auto message,
-                          ReadMessage(block.offset, block.metadata_length, file_));
+    ARROW_ASSIGN_OR_RAISE(auto message, arrow::ipc::ReadMessageFromBlock(block, file_));
     ++stats_.num_messages;
     return std::move(message);
   }
 
   Status ReadDictionaries() {
     // Read all the dictionaries
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
     for (int i = 0; i < num_dictionaries(); ++i) {
       ARROW_ASSIGN_OR_RAISE(auto message, ReadMessageFromBlock(GetDictionaryBlock(i)));
-
-      CHECK_HAS_BODY(*message);
-      ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
-      DictionaryKind kind;
-      IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
-      RETURN_NOT_OK(ReadDictionary(*message->metadata(), context, &kind, reader.get()));
+      RETURN_NOT_OK(ReadOneDictionary(message.get(), context));
       ++stats_.num_dictionary_batches;
-      if (kind != DictionaryKind::New) {
-        return Status::Invalid(
-            "Unsupported dictionary replacement or "
-            "dictionary delta in IPC file");
-      }
     }
     return Status::OK();
   }
 
   Status ReadFooter() {
+    auto fut = ReadFooterAsync(/*executor=*/nullptr);
+    return fut.status();
+  }
+
+  Future<> ReadFooterAsync(arrow::internal::Executor* executor) {
     const int32_t magic_size = static_cast<int>(strlen(kArrowMagicBytes));
 
     if (footer_offset_ <= magic_size * 2 + 4) {
@@ -1075,45 +1223,53 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     }
 
     int file_end_size = static_cast<int>(magic_size + sizeof(int32_t));
-    ARROW_ASSIGN_OR_RAISE(auto buffer,
-                          file_->ReadAt(footer_offset_ - file_end_size, file_end_size));
-
-    const int64_t expected_footer_size = magic_size + sizeof(int32_t);
-    if (buffer->size() < expected_footer_size) {
-      return Status::Invalid("Unable to read ", expected_footer_size, "from end of file");
-    }
-
-    if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
-      return Status::Invalid("Not an Arrow file");
-    }
-
-    int32_t footer_length =
-        BitUtil::FromLittleEndian(*reinterpret_cast<const int32_t*>(buffer->data()));
-
-    if (footer_length <= 0 || footer_length > footer_offset_ - magic_size * 2 - 4) {
-      return Status::Invalid("File is smaller than indicated metadata size");
-    }
-
-    // Now read the footer
-    ARROW_ASSIGN_OR_RAISE(
-        footer_buffer_,
-        file_->ReadAt(footer_offset_ - footer_length - file_end_size, footer_length));
-
-    const auto data = footer_buffer_->data();
-    const auto size = footer_buffer_->size();
-    if (!internal::VerifyFlatbuffers<flatbuf::Footer>(data, size)) {
-      return Status::IOError("Verification of flatbuffer-encoded Footer failed.");
-    }
-    footer_ = flatbuf::GetFooter(data);
-
-    auto fb_metadata = footer_->custom_metadata();
-    if (fb_metadata != nullptr) {
-      std::shared_ptr<KeyValueMetadata> md;
-      RETURN_NOT_OK(internal::GetKeyValueMetadata(fb_metadata, &md));
-      metadata_ = std::move(md);  // const-ify
-    }
-
-    return Status::OK();
+    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    auto read_magic = file_->ReadAsync(footer_offset_ - file_end_size, file_end_size);
+    if (executor) read_magic = executor->Transfer(std::move(read_magic));
+    return read_magic
+        .Then([=](const std::shared_ptr<Buffer>& buffer)
+                  -> Future<std::shared_ptr<Buffer>> {
+          const int64_t expected_footer_size = magic_size + sizeof(int32_t);
+          if (buffer->size() < expected_footer_size) {
+            return Status::Invalid("Unable to read ", expected_footer_size,
+                                   "from end of file");
+          }
+
+          if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
+            return Status::Invalid("Not an Arrow file");
+          }
+
+          int32_t footer_length = BitUtil::FromLittleEndian(
+              *reinterpret_cast<const int32_t*>(buffer->data()));
+
+          if (footer_length <= 0 ||
+              footer_length > self->footer_offset_ - magic_size * 2 - 4) {
+            return Status::Invalid("File is smaller than indicated metadata size");
+          }
+
+          // Now read the footer
+          auto read_footer = self->file_->ReadAsync(
+              self->footer_offset_ - footer_length - file_end_size, footer_length);
+          if (executor) read_footer = executor->Transfer(std::move(read_footer));
+          return read_footer;
+        })
+        .Then([=](const std::shared_ptr<Buffer>& buffer) -> Status {
+          self->footer_buffer_ = buffer;
+          const auto data = self->footer_buffer_->data();
+          const auto size = self->footer_buffer_->size();
+          if (!internal::VerifyFlatbuffers<flatbuf::Footer>(data, size)) {
+            return Status::IOError("Verification of flatbuffer-encoded Footer failed.");
+          }
+          self->footer_ = flatbuf::GetFooter(data);
+
+          auto fb_metadata = self->footer_->custom_metadata();
+          if (fb_metadata != nullptr) {
+            std::shared_ptr<KeyValueMetadata> md;
+            RETURN_NOT_OK(internal::GetKeyValueMetadata(fb_metadata, &md));
+            self->metadata_ = std::move(md);  // const-ify
+          }
+          return Status::OK();
+        });
   }
 
   int num_dictionaries() const {
@@ -1175,6 +1331,109 @@ Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
   return result;
 }
 
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(std::move(file), footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(file, footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+    const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+}
+
+Future<IpcFileRecordBatchGenerator::Item> IpcFileRecordBatchGenerator::operator()() {
+  auto state = state_;
+  if (!read_dictionaries_.is_valid()) {
+    std::vector<Future<std::shared_ptr<Message>>> messages(state->num_dictionaries());
+    for (int i = 0; i < state->num_dictionaries(); i++) {
+      auto block = FileBlockFromFlatbuffer(state->footer_->dictionaries()->Get(i));
+      messages[i] = ReadBlock(block);
+    }
+    auto read_messages = All(std::move(messages));
+    if (executor_) read_messages = executor_->Transfer(read_messages);
+    read_dictionaries_ = read_messages.Then(
+        [=](const std::vector<Result<std::shared_ptr<Message>>>& maybe_messages)
+            -> Status {
+          ARROW_ASSIGN_OR_RAISE(auto messages,
+                                arrow::internal::UnwrapOrRaise(maybe_messages));
+          return ReadDictionaries(state.get(), std::move(messages));
+        });
+  }
+  if (index_ >= state_->num_record_batches()) {
+    return Future<Item>::MakeFinished(IterationTraits<Item>::End());
+  }
+  auto block = FileBlockFromFlatbuffer(state->footer_->recordBatches()->Get(index_++));
+  auto read_message = ReadBlock(block);
+  auto read_messages = read_dictionaries_.Then([read_message]() { return read_message; });
+  // Force transfer. This may be wasteful in some cases, but ensures we get off the
+  // I/O threads as soon as possible, and ensures we don't decode record batches
+  // synchronously in the case that the message read has already finished.
+  if (executor_) {
+    auto executor = executor_;
+    return read_messages.Then(
+        [=](const std::shared_ptr<Message>& message) -> Future<Item> {
+          return DeferNotOk(executor->Submit(
+              [=]() { return ReadRecordBatch(state.get(), message.get()); }));
+        });
+  }
+  return read_messages.Then([=](const std::shared_ptr<Message>& message) -> Result<Item> {
+    return ReadRecordBatch(state.get(), message.get());
+  });
+}
+
+Future<std::shared_ptr<Message>> IpcFileRecordBatchGenerator::ReadBlock(
+    const FileBlock& block) {
+  if (cached_source_) {
+    auto cached_source = cached_source_;
+    io::ReadRange range{block.offset, block.metadata_length + block.body_length};
+    auto pool = state_->options_.memory_pool;
+    return cached_source->WaitFor({range}).Then(
+        [cached_source, pool, range]() -> Result<std::shared_ptr<Message>> {
+          ARROW_ASSIGN_OR_RAISE(auto buffer, cached_source->Read(range));
+          io::BufferReader stream(std::move(buffer));
+          return ReadMessage(&stream, pool);
+        });
+  } else {
+    return ReadMessageFromBlockAsync(block, state_->file_, io_context_);
+  }
+}
+
+Status IpcFileRecordBatchGenerator::ReadDictionaries(
+    RecordBatchFileReaderImpl* state,
+    std::vector<std::shared_ptr<Message>> dictionary_messages) {
+  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_);
+  for (const auto& message : dictionary_messages) {
+    RETURN_NOT_OK(ReadOneDictionary(message.get(), context));
+  }
+  return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatch>> IpcFileRecordBatchGenerator::ReadRecordBatch(
+    RecordBatchFileReaderImpl* state, Message* message) {
+  CHECK_HAS_BODY(*message);
+  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_);
+  return ReadRecordBatchInternal(*message->metadata(), state->schema_,
+                                 state->field_inclusion_mask_, context, reader.get());
+}
+
 Status Listener::OnEOS() { return Status::OK(); }
 
 Status Listener::OnSchemaDecoded(std::shared_ptr<Schema> schema) { return Status::OK(); }
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index fe9a3b72e16..6f2157557f3 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -25,12 +25,14 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/io/caching.h"
 #include "arrow/io/type_fwd.h"
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/options.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -99,7 +101,8 @@ class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
 };
 
 /// \brief Reads the record batch file format
-class ARROW_EXPORT RecordBatchFileReader {
+class ARROW_EXPORT RecordBatchFileReader
+    : public std::enable_shared_from_this<RecordBatchFileReader> {
  public:
   virtual ~RecordBatchFileReader() = default;
 
@@ -147,6 +150,26 @@ class ARROW_EXPORT RecordBatchFileReader {
       const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
       const IpcReadOptions& options = IpcReadOptions::Defaults());
 
+  /// \brief Open a file asynchronously (owns the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      const std::shared_ptr<io::RandomAccessFile>& file,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (borrows the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      io::RandomAccessFile* file,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (owns the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (borrows the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      io::RandomAccessFile* file, int64_t footer_offset,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
   /// \brief The schema read from the file
   virtual std::shared_ptr<Schema> schema() const = 0;
 
@@ -169,6 +192,24 @@ class ARROW_EXPORT RecordBatchFileReader {
 
   /// \brief Return current read statistics
   virtual ReadStats stats() const = 0;
+
+  /// \brief Computes the total number of rows in the file.
+  virtual Result<int64_t> CountRows() = 0;
+
+  /// \brief Get a reentrant generator of record batches.
+  ///
+  /// \param[in] coalesce If true, enable I/O coalescing.
+  /// \param[in] io_context The IOContext to use (controls which thread pool
+  ///     is used for I/O).
+  /// \param[in] cache_options Options for coalescing (if enabled).
+  /// \param[in] executor Optionally, an executor to use for decoding record
+  ///     batches. This is generally only a benefit for very wide and/or
+  ///     compressed batches.
+  virtual Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
+      const bool coalesce = false,
+      const io::IOContext& io_context = io::default_io_context(),
+      const io::CacheOptions cache_options = io::CacheOptions::LazyDefaults(),
+      arrow::internal::Executor* executor = NULLPTR) = 0;
 };
 
 /// \brief A general listener class to receive events.
diff --git a/cpp/src/arrow/ipc/stream_to_file.cc b/cpp/src/arrow/ipc/stream_to_file.cc
index 3a2a7fb49fe..40288b687cf 100644
--- a/cpp/src/arrow/ipc/stream_to_file.cc
+++ b/cpp/src/arrow/ipc/stream_to_file.cc
@@ -19,13 +19,12 @@
 #include <memory>
 #include <string>
 
+#include "arrow/io/stdio.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 
-#include "arrow/util/io_util.h"
-
 namespace arrow {
 namespace ipc {
 
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index c14ff5ec9bc..7b9254b7e59 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -557,7 +557,7 @@ class DictionarySerializer : public RecordBatchSerializer {
   Status Assemble(const std::shared_ptr<Array>& dictionary) {
     // Make a dummy record batch. A bit tedious as we have to make a schema
     auto schema = arrow::schema({arrow::field("dictionary", dictionary->type())});
-    auto batch = RecordBatch::Make(schema, dictionary->length(), {dictionary});
+    auto batch = RecordBatch::Make(std::move(schema), dictionary->length(), {dictionary});
     return RecordBatchSerializer::Assemble(*batch);
   }
 
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 0ea83d7630a..e976b41a1c5 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -167,13 +167,13 @@ Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
 
 /// @}
 
-ARROW_DEPRECATED("Use MakeStreamWriter")
+ARROW_DEPRECATED("Deprecated in 3.0.0. Use MakeStreamWriter")
 ARROW_EXPORT
 Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter(
     io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
     const IpcWriteOptions& options = IpcWriteOptions::Defaults());
 
-ARROW_DEPRECATED("Use MakeFileWriter")
+ARROW_DEPRECATED("Deprecated in 2.0.0. Use MakeFileWriter")
 ARROW_EXPORT
 Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
     io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
diff --git a/cpp/src/arrow/json/chunker.cc b/cpp/src/arrow/json/chunker.cc
index 568246bb63b..b4b4d31eb94 100644
--- a/cpp/src/arrow/json/chunker.cc
+++ b/cpp/src/arrow/json/chunker.cc
@@ -163,6 +163,11 @@ class ParsingBoundaryFinder : public BoundaryFinder {
     }
     return Status::OK();
   }
+
+  Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+                 int64_t* out_pos, int64_t* num_found) override {
+    return Status::NotImplemented("ParsingBoundaryFinder::FindNth");
+  }
 };
 
 }  // namespace
diff --git a/cpp/src/arrow/json/converter.cc b/cpp/src/arrow/json/converter.cc
index fe9500d40ca..a2f584c0b7f 100644
--- a/cpp/src/arrow/json/converter.cc
+++ b/cpp/src/arrow/json/converter.cc
@@ -22,11 +22,13 @@
 
 #include "arrow/array.h"
 #include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_decimal.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/array/builder_time.h"
 #include "arrow/json/parser.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/value_parsing.h"
@@ -147,6 +149,41 @@ class NumericConverter : public PrimitiveConverter {
   const T& numeric_type_;
 };
 
+template <typename T>
+class DecimalConverter : public PrimitiveConverter {
+ public:
+  using value_type = typename TypeTraits<T>::BuilderType::ValueType;
+
+  DecimalConverter(MemoryPool* pool, const std::shared_ptr<DataType>& type)
+      : PrimitiveConverter(pool, type) {}
+
+  Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>* out) override {
+    if (in->type_id() == Type::NA) {
+      return MakeArrayOfNull(out_type_, in->length(), pool_).Value(out);
+    }
+    const auto& dict_array = GetDictionaryArray(in);
+
+    using Builder = typename TypeTraits<T>::BuilderType;
+    Builder builder(out_type_, pool_);
+    RETURN_NOT_OK(builder.Resize(dict_array.indices()->length()));
+
+    auto visit_valid = [&builder](string_view repr) {
+      ARROW_ASSIGN_OR_RAISE(value_type value,
+                            TypeTraits<T>::BuilderType::ValueType::FromString(repr));
+      builder.UnsafeAppend(value);
+      return Status::OK();
+    };
+
+    auto visit_null = [&builder]() {
+      builder.UnsafeAppendNull();
+      return Status::OK();
+    };
+
+    RETURN_NOT_OK(VisitDictionaryEntries(dict_array, visit_valid, visit_null));
+    return builder.Finish(out);
+  }
+};
+
 template <typename DateTimeType>
 class DateTimeConverter : public PrimitiveConverter {
  public:
@@ -250,6 +287,8 @@ Status MakeConverter(const std::shared_ptr<DataType>& out_type, MemoryPool* pool
     CONVERTER_CASE(Type::STRING, BinaryConverter<StringType>);
     CONVERTER_CASE(Type::LARGE_BINARY, BinaryConverter<LargeBinaryType>);
     CONVERTER_CASE(Type::LARGE_STRING, BinaryConverter<LargeStringType>);
+    CONVERTER_CASE(Type::DECIMAL128, DecimalConverter<Decimal128Type>);
+    CONVERTER_CASE(Type::DECIMAL256, DecimalConverter<Decimal256Type>);
     default:
       return Status::NotImplemented("JSON conversion to ", *out_type,
                                     " is not supported");
diff --git a/cpp/src/arrow/json/converter_test.cc b/cpp/src/arrow/json/converter_test.cc
index 6d787db0bbd..030f2a7bc13 100644
--- a/cpp/src/arrow/json/converter_test.cc
+++ b/cpp/src/arrow/json/converter_test.cc
@@ -17,83 +17,197 @@
 
 #include "arrow/json/converter.h"
 
-#include <string>
-
 #include <gtest/gtest.h>
 
+#include <string>
+
 #include "arrow/json/options.h"
 #include "arrow/json/test_common.h"
 
 namespace arrow {
 namespace json {
 
-using util::string_view;
-
-void AssertConvert(const std::shared_ptr<DataType>& expected_type,
-                   const std::string& expected_json,
-                   const std::string& unconverted_json) {
-  // make an unconverted array
-  auto scalar_values = ArrayFromJSON(utf8(), unconverted_json);
-  Int32Builder indices_builder;
-  ASSERT_OK(indices_builder.Resize(scalar_values->length()));
-  for (int i = 0; i < scalar_values->length(); ++i) {
-    if (scalar_values->IsNull(i)) {
-      indices_builder.UnsafeAppendNull();
-    } else {
-      indices_builder.UnsafeAppend(i);
-    }
-  }
-  std::shared_ptr<Array> indices, unconverted, converted;
-  ASSERT_OK(indices_builder.Finish(&indices));
-
-  auto unconverted_type = dictionary(int32(), scalar_values->type());
-  unconverted =
-      std::make_shared<DictionaryArray>(unconverted_type, indices, scalar_values);
-
+Result<std::shared_ptr<Array>> Convert(std::shared_ptr<DataType> type,
+                                       std::shared_ptr<Array> unconverted) {
+  std::shared_ptr<Array> converted;
   // convert the array
   std::shared_ptr<Converter> converter;
-  ASSERT_OK(MakeConverter(expected_type, default_memory_pool(), &converter));
-  ASSERT_OK(converter->Convert(unconverted, &converted));
-  ASSERT_OK(converted->ValidateFull());
-
-  // assert equality
-  auto expected = ArrayFromJSON(expected_type, expected_json);
-  AssertArraysEqual(*expected, *converted);
+  RETURN_NOT_OK(MakeConverter(type, default_memory_pool(), &converter));
+  RETURN_NOT_OK(converter->Convert(unconverted, &converted));
+  RETURN_NOT_OK(converted->ValidateFull());
+  return converted;
 }
 
 // bool, null are trivial pass throughs
 
 TEST(ConverterTest, Integers) {
-  for (auto expected_type : {uint8(), uint16(), uint32(), uint64()}) {
-    AssertConvert(expected_type, "[0, null, 1, 32, 45, 12, 64, 124]",
-                  R"(["0", null, "1", "32", "45", "12", "64", "124"])");
+  for (auto int_type : {int8(), int16(), int32(), int64()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", int_type)});
+
+    std::string json_source = R"(
+    {"" : -0}
+    {"" : null}
+    {"" : -1}
+    {"" : 32}
+    {"" : -45}
+    {"" : 12}
+    {"" : -64}
+    {"" : 124}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(int_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(int_type, R"([
+          -0, null, -1, 32, -45, 12, -64, 124])");
+
+    AssertArraysEqual(*expected, *converted);
   }
-  for (auto expected_type : {int8(), int16(), int32(), int64()}) {
-    AssertConvert(expected_type, "[0, null, -1, 32, -45, 12, -64, 124]",
-                  R"(["-0", null, "-1", "32", "-45", "12", "-64", "124"])");
+}
+
+TEST(ConverterTest, UnsignedIntegers) {
+  for (auto uint_type : {uint8(), uint16(), uint32(), uint64()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", uint_type)});
+
+    std::string json_source = R"(
+    {"" : 0}
+    {"" : null}
+    {"" : 1}
+    {"" : 32}
+    {"" : 45}
+    {"" : 12}
+    {"" : 64}
+    {"" : 124}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(uint_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(uint_type, R"([
+          0, null, 1, 32, 45, 12, 64, 124])");
+
+    AssertArraysEqual(*expected, *converted);
   }
 }
 
 TEST(ConverterTest, Floats) {
-  for (auto expected_type : {float32(), float64()}) {
-    AssertConvert(expected_type, "[0, -0.0, null, 32.0, 1e5]",
-                  R"(["0", "-0.0", null, "32.0", "1e5"])");
+  for (auto float_type : {float32(), float64()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", float_type)});
+
+    std::string json_source = R"(
+    {"" : 0}
+    {"" : -0.0}
+    {"" : null}
+    {"" : 32.0}
+    {"" : 1e5}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(float_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(float_type, R"([
+          0, -0.0, null, 32.0, 1e5])");
+
+    AssertArraysEqual(*expected, *converted);
   }
 }
 
-TEST(ConverterTest, String) {
-  std::string src = R"(["a", "b c", null, "d e f", "g"])";
-  AssertConvert(utf8(), src, src);
-}
+TEST(ConverterTest, StringAndLargeString) {
+  for (auto string_type : {utf8(), large_utf8()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", string_type)});
+
+    std::string json_source = R"(
+    {"" : "a"}
+    {"" : "b c"}
+    {"" : null}
+    {"" : "d e f"}
+    {"" : "g"}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(string_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(string_type, R"([
+          "a", "b c", null, "d e f", "g"])");
 
-TEST(ConverterTest, LargeString) {
-  std::string src = R"(["a", "b c", null, "d e f", "g"])";
-  AssertConvert(large_utf8(), src, src);
+    AssertArraysEqual(*expected, *converted);
+  }
 }
 
 TEST(ConverterTest, Timestamp) {
-  std::string src = R"([null, "1970-01-01", "2018-11-13 17:11:10"])";
-  AssertConvert(timestamp(TimeUnit::SECOND), src, src);
+  auto timestamp_type = timestamp(TimeUnit::SECOND);
+
+  ParseOptions options;
+  options.explicit_schema = schema({field("", timestamp_type)});
+
+  std::string json_source = R"(
+    {"" : null}
+    {"" : "1970-01-01"}
+    {"" : "2018-11-13 17:11:10"}
+  )";
+
+  std::shared_ptr<StructArray> parse_array;
+  ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+  // call to convert
+  ASSERT_OK_AND_ASSIGN(auto converted,
+                       Convert(timestamp_type, parse_array->GetFieldByName("")));
+
+  // assert equality
+  auto expected = ArrayFromJSON(timestamp_type, R"([
+          null, "1970-01-01", "2018-11-13 17:11:10"])");
+
+  AssertArraysEqual(*expected, *converted);
+}
+
+TEST(ConverterTest, Decimal128And256) {
+  for (auto decimal_type : {decimal128(38, 10), decimal256(38, 10)}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", decimal_type)});
+
+    std::string json_source = R"(
+    {"" : "02.0000000000"}
+    {"" : "30.0000000000"}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(decimal_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(decimal_type, R"([
+          "02.0000000000",
+          "30.0000000000"])");
+
+    AssertArraysEqual(*expected, *converted);
+  }
 }
 
 }  // namespace json
diff --git a/cpp/src/arrow/json/parser.cc b/cpp/src/arrow/json/parser.cc
index 05f155645a6..62e1cd7fc41 100644
--- a/cpp/src/arrow/json/parser.cc
+++ b/cpp/src/arrow/json/parser.cc
@@ -102,6 +102,8 @@ Status Kind::ForType(const DataType& type, Kind::type* kind) {
     Status Visit(const TimeType&) { return SetKind(Kind::kNumber); }
     Status Visit(const DateType&) { return SetKind(Kind::kNumber); }
     Status Visit(const BinaryType&) { return SetKind(Kind::kString); }
+    Status Visit(const LargeBinaryType&) { return SetKind(Kind::kString); }
+    Status Visit(const TimestampType&) { return SetKind(Kind::kString); }
     Status Visit(const FixedSizeBinaryType&) { return SetKind(Kind::kString); }
     Status Visit(const DictionaryType& dict_type) {
       return Kind::ForType(*dict_type.value_type(), kind_);
diff --git a/cpp/src/arrow/json/reader.cc b/cpp/src/arrow/json/reader.cc
index 51c77fa4df9..18aed0235ff 100644
--- a/cpp/src/arrow/json/reader.cc
+++ b/cpp/src/arrow/json/reader.cc
@@ -168,8 +168,6 @@ class TableReaderImpl : public TableReader,
   std::shared_ptr<ChunkedArrayBuilder> builder_;
 };
 
-Status TableReader::Read(std::shared_ptr<Table>* out) { return Read().Value(out); }
-
 Result<std::shared_ptr<TableReader>> TableReader::Make(
     MemoryPool* pool, std::shared_ptr<io::InputStream> input,
     const ReadOptions& read_options, const ParseOptions& parse_options) {
@@ -185,13 +183,6 @@ Result<std::shared_ptr<TableReader>> TableReader::Make(
   return ptr;
 }
 
-Status TableReader::Make(MemoryPool* pool, std::shared_ptr<io::InputStream> input,
-                         const ReadOptions& read_options,
-                         const ParseOptions& parse_options,
-                         std::shared_ptr<TableReader>* out) {
-  return TableReader::Make(pool, input, read_options, parse_options).Value(out);
-}
-
 Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options,
                                               std::shared_ptr<Buffer> json) {
   std::unique_ptr<BlockParser> parser;
diff --git a/cpp/src/arrow/json/reader.h b/cpp/src/arrow/json/reader.h
index c40338c1e1c..3374931a043 100644
--- a/cpp/src/arrow/json/reader.h
+++ b/cpp/src/arrow/json/reader.h
@@ -50,19 +50,11 @@ class ARROW_EXPORT TableReader {
   /// Read the entire JSON file and convert it to a Arrow Table
   virtual Result<std::shared_ptr<Table>> Read() = 0;
 
-  ARROW_DEPRECATED("Use Result-returning version")
-  Status Read(std::shared_ptr<Table>* out);
-
   /// Create a TableReader instance
   static Result<std::shared_ptr<TableReader>> Make(MemoryPool* pool,
                                                    std::shared_ptr<io::InputStream> input,
                                                    const ReadOptions&,
                                                    const ParseOptions&);
-
-  ARROW_DEPRECATED("Use Result-returning version")
-  static Status Make(MemoryPool* pool, std::shared_ptr<io::InputStream> input,
-                     const ReadOptions&, const ParseOptions&,
-                     std::shared_ptr<TableReader>* out);
 };
 
 ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options,
diff --git a/cpp/src/arrow/json/test_common.h b/cpp/src/arrow/json/test_common.h
index 618b16ae424..1a1a3bd85d2 100644
--- a/cpp/src/arrow/json/test_common.h
+++ b/cpp/src/arrow/json/test_common.h
@@ -24,21 +24,20 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/json/rapidjson_defs.h"
-#include "rapidjson/document.h"
-#include "rapidjson/prettywriter.h"
-#include "rapidjson/reader.h"
-#include "rapidjson/writer.h"
-
 #include "arrow/io/memory.h"
 #include "arrow/json/converter.h"
 #include "arrow/json/options.h"
 #include "arrow/json/parser.h"
+#include "arrow/json/rapidjson_defs.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/string_view.h"
 #include "arrow/visitor_inline.h"
+#include "rapidjson/document.h"
+#include "rapidjson/prettywriter.h"
+#include "rapidjson/reader.h"
+#include "rapidjson/writer.h"
 
 namespace arrow {
 
@@ -192,6 +191,14 @@ inline static Status ParseFromString(ParseOptions options, string_view src_str,
   return parser->Finish(parsed);
 }
 
+inline static Status ParseFromString(ParseOptions options, string_view src_str,
+                                     std::shared_ptr<StructArray>* parsed) {
+  std::shared_ptr<Array> parsed_non_struct;
+  RETURN_NOT_OK(ParseFromString(options, src_str, &parsed_non_struct));
+  *parsed = internal::checked_pointer_cast<StructArray>(parsed_non_struct);
+  return Status::OK();
+}
+
 static inline std::string PrettyPrint(string_view one_line) {
   rj::Document document;
 
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 58a80232fdf..c80e8f6f680 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -18,9 +18,10 @@
 #include "arrow/memory_pool.h"
 
 #include <algorithm>  // IWYU pragma: keep
-#include <cstdlib>    // IWYU pragma: keep
-#include <cstring>    // IWYU pragma: keep
-#include <iostream>   // IWYU pragma: keep
+#include <atomic>
+#include <cstdlib>   // IWYU pragma: keep
+#include <cstring>   // IWYU pragma: keep
+#include <iostream>  // IWYU pragma: keep
 #include <limits>
 #include <memory>
 
@@ -28,12 +29,20 @@
 #include <stdlib.h>
 #endif
 
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"  // IWYU pragma: keep
 #include "arrow/util/optional.h"
 #include "arrow/util/string.h"
+#include "arrow/util/thread_pool.h"
+
+#ifdef __GLIBC__
+#include <malloc.h>
+#endif
 
 #ifdef ARROW_JEMALLOC
 // Needed to support jemalloc 3 and 4
@@ -105,64 +114,73 @@ struct SupportedBackend {
   MemoryPoolBackend backend;
 };
 
-std::vector<SupportedBackend> SupportedBackends() {
-  std::vector<SupportedBackend> backends = {
-#ifdef ARROW_JEMALLOC
-      {"jemalloc", MemoryPoolBackend::Jemalloc},
+// See ARROW-12248 for why we use static in-function singletons rather than
+// global constants below (in SupportedBackends() and UserSelectedBackend()).
+// In some contexts (especially R bindings) `default_memory_pool()` may be
+// called before all globals are initialized, and then the ARROW_DEFAULT_MEMORY_POOL
+// environment variable would be ignored.
+
+const std::vector<SupportedBackend>& SupportedBackends() {
+  static std::vector<SupportedBackend> backends = {
+  // ARROW-12316: Apple => mimalloc first, then jemalloc
+  //              non-Apple => jemalloc first, then mimalloc
+#if defined(ARROW_JEMALLOC) && !defined(__APPLE__)
+    {"jemalloc", MemoryPoolBackend::Jemalloc},
 #endif
 #ifdef ARROW_MIMALLOC
-      {"mimalloc", MemoryPoolBackend::Mimalloc},
+    {"mimalloc", MemoryPoolBackend::Mimalloc},
 #endif
-      {"system", MemoryPoolBackend::System}};
+#if defined(ARROW_JEMALLOC) && defined(__APPLE__)
+    {"jemalloc", MemoryPoolBackend::Jemalloc},
+#endif
+    {"system", MemoryPoolBackend::System}
+  };
   return backends;
 }
 
-const std::vector<SupportedBackend> supported_backends = SupportedBackends();
-
+// Return the MemoryPoolBackend selected by the user through the
+// ARROW_DEFAULT_MEMORY_POOL environment variable, if any.
 util::optional<MemoryPoolBackend> UserSelectedBackend() {
-  auto unsupported_backend = [](const std::string& name) {
-    std::vector<std::string> supported;
-    for (const auto backend : supported_backends) {
-      supported.push_back(std::string("'") + backend.name + "'");
-    }
-    ARROW_LOG(WARNING) << "Unsupported backend '" << name << "' specified in "
-                       << kDefaultBackendEnvVar << " (supported backends are "
-                       << internal::JoinStrings(supported, ", ") << ")";
-  };
-
-  auto maybe_name = internal::GetEnvVar(kDefaultBackendEnvVar);
-  if (!maybe_name.ok()) {
-    return {};
-  }
-  const auto name = *std::move(maybe_name);
-  if (name.empty()) {
-    // An empty environment variable is considered missing
+  static auto user_selected_backend = []() -> util::optional<MemoryPoolBackend> {
+    auto unsupported_backend = [](const std::string& name) {
+      std::vector<std::string> supported;
+      for (const auto backend : SupportedBackends()) {
+        supported.push_back(std::string("'") + backend.name + "'");
+      }
+      ARROW_LOG(WARNING) << "Unsupported backend '" << name << "' specified in "
+                         << kDefaultBackendEnvVar << " (supported backends are "
+                         << internal::JoinStrings(supported, ", ") << ")";
+    };
+
+    auto maybe_name = internal::GetEnvVar(kDefaultBackendEnvVar);
+    if (!maybe_name.ok()) {
+      return {};
+    }
+    const auto name = *std::move(maybe_name);
+    if (name.empty()) {
+      // An empty environment variable is considered missing
+      return {};
+    }
+    const auto found = std::find_if(
+        SupportedBackends().begin(), SupportedBackends().end(),
+        [&](const SupportedBackend& backend) { return name == backend.name; });
+    if (found != SupportedBackends().end()) {
+      return found->backend;
+    }
+    unsupported_backend(name);
     return {};
-  }
-  const auto found =
-      std::find_if(supported_backends.begin(), supported_backends.end(),
-                   [&](const SupportedBackend& backend) { return name == backend.name; });
-  if (found != supported_backends.end()) {
-    return found->backend;
-  }
-  unsupported_backend(name);
-  return {};
-}
+  }();
 
-const util::optional<MemoryPoolBackend> user_selected_backend = UserSelectedBackend();
+  return user_selected_backend;
+}
 
 MemoryPoolBackend DefaultBackend() {
-  auto backend = user_selected_backend;
+  auto backend = UserSelectedBackend();
   if (backend.has_value()) {
     return backend.value();
   }
-#ifdef ARROW_JEMALLOC
-  return MemoryPoolBackend::Jemalloc;
-#elif defined(ARROW_MIMALLOC)
-  return MemoryPoolBackend::Mimalloc;
-#else
-  return MemoryPoolBackend::System;
-#endif
+  struct SupportedBackend default_backend = SupportedBackends().front();
+  return default_backend.backend;
 }
 
 // A static piece of memory for 0-size allocations, so as to return
@@ -244,6 +262,14 @@ class SystemAllocator {
 #endif
     }
   }
+
+  static void ReleaseUnused() {
+#ifdef __GLIBC__
+    // The return value of malloc_trim is not an error but to inform
+    // you if memory was actually released or not, which we do not care about here
+    ARROW_UNUSED(malloc_trim(0));
+#endif
+  }
 };
 
 #ifdef ARROW_JEMALLOC
@@ -291,6 +317,10 @@ class JemallocAllocator {
       dallocx(ptr, MALLOCX_ALIGN(kAlignment));
     }
   }
+
+  static void ReleaseUnused() {
+    mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, 0);
+  }
 };
 
 #endif  // defined(ARROW_JEMALLOC)
@@ -313,6 +343,8 @@ class MimallocAllocator {
     return Status::OK();
   }
 
+  static void ReleaseUnused() { mi_collect(true); }
+
   static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
     uint8_t* previous_ptr = *ptr;
     if (previous_ptr == zero_size_area) {
@@ -419,6 +451,8 @@ class BaseMemoryPoolImpl : public MemoryPool {
     stats_.UpdateAllocatedBytes(-size);
   }
 
+  void ReleaseUnused() override { Allocator::ReleaseUnused(); }
+
   int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
 
   int64_t max_memory() const override { return stats_.max_memory(); }
@@ -465,19 +499,27 @@ std::unique_ptr<MemoryPool> MemoryPool::CreateDefault() {
   }
 }
 
-static SystemMemoryPool system_pool;
+static struct GlobalState {
+  ~GlobalState() { finalizing.store(true, std::memory_order_relaxed); }
+
+  bool is_finalizing() const { return finalizing.load(std::memory_order_relaxed); }
+
+  std::atomic<bool> finalizing{false};  // constructed first, destroyed last
+
+  SystemMemoryPool system_pool;
 #ifdef ARROW_JEMALLOC
-static JemallocMemoryPool jemalloc_pool;
+  JemallocMemoryPool jemalloc_pool;
 #endif
 #ifdef ARROW_MIMALLOC
-static MimallocMemoryPool mimalloc_pool;
+  MimallocMemoryPool mimalloc_pool;
 #endif
+} global_state;
 
-MemoryPool* system_memory_pool() { return &system_pool; }
+MemoryPool* system_memory_pool() { return &global_state.system_pool; }
 
 Status jemalloc_memory_pool(MemoryPool** out) {
 #ifdef ARROW_JEMALLOC
-  *out = &jemalloc_pool;
+  *out = &global_state.jemalloc_pool;
   return Status::OK();
 #else
   return Status::NotImplemented("This Arrow build does not enable jemalloc");
@@ -486,7 +528,7 @@ Status jemalloc_memory_pool(MemoryPool** out) {
 
 Status mimalloc_memory_pool(MemoryPool** out) {
 #ifdef ARROW_MIMALLOC
-  *out = &mimalloc_pool;
+  *out = &global_state.mimalloc_pool;
   return Status::OK();
 #else
   return Status::NotImplemented("This Arrow build does not enable mimalloc");
@@ -497,14 +539,14 @@ MemoryPool* default_memory_pool() {
   auto backend = DefaultBackend();
   switch (backend) {
     case MemoryPoolBackend::System:
-      return &system_pool;
+      return &global_state.system_pool;
 #ifdef ARROW_JEMALLOC
     case MemoryPoolBackend::Jemalloc:
-      return &jemalloc_pool;
+      return &global_state.jemalloc_pool;
 #endif
 #ifdef ARROW_MIMALLOC
     case MemoryPoolBackend::Mimalloc:
-      return &mimalloc_pool;
+      return &global_state.mimalloc_pool;
 #endif
     default:
       ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool";
@@ -634,10 +676,122 @@ std::string ProxyMemoryPool::backend_name() const { return impl_->backend_name()
 
 std::vector<std::string> SupportedMemoryBackendNames() {
   std::vector<std::string> supported;
-  for (const auto backend : supported_backends) {
+  for (const auto backend : SupportedBackends()) {
     supported.push_back(backend.name);
   }
   return supported;
 }
 
+// -----------------------------------------------------------------------
+// Pool buffer and allocation
+
+/// A Buffer whose lifetime is tied to a particular MemoryPool
+class PoolBuffer final : public ResizableBuffer {
+ public:
+  explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool)
+      : ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {}
+
+  ~PoolBuffer() override {
+    // Avoid calling pool_->Free if the global pools are destroyed
+    // (XXX this will not work with user-defined pools)
+
+    // This can happen if a Future is destructing on one thread while or
+    // after memory pools are destructed on the main thread (as there is
+    // no guarantee of destructor order between thread/memory pools)
+    uint8_t* ptr = mutable_data();
+    if (ptr && !global_state.is_finalizing()) {
+      pool_->Free(ptr, capacity_);
+    }
+  }
+
+  Status Reserve(const int64_t capacity) override {
+    if (capacity < 0) {
+      return Status::Invalid("Negative buffer capacity: ", capacity);
+    }
+    uint8_t* ptr = mutable_data();
+    if (!ptr || capacity > capacity_) {
+      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
+      if (ptr) {
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
+      } else {
+        RETURN_NOT_OK(pool_->Allocate(new_capacity, &ptr));
+      }
+      data_ = ptr;
+      capacity_ = new_capacity;
+    }
+    return Status::OK();
+  }
+
+  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
+    if (ARROW_PREDICT_FALSE(new_size < 0)) {
+      return Status::Invalid("Negative buffer resize: ", new_size);
+    }
+    uint8_t* ptr = mutable_data();
+    if (ptr && shrink_to_fit && new_size <= size_) {
+      // Buffer is non-null and is not growing, so shrink to the requested size without
+      // excess space.
+      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
+      if (capacity_ != new_capacity) {
+        // Buffer hasn't got yet the requested size.
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
+        data_ = ptr;
+        capacity_ = new_capacity;
+      }
+    } else {
+      RETURN_NOT_OK(Reserve(new_size));
+    }
+    size_ = new_size;
+
+    return Status::OK();
+  }
+
+  static std::shared_ptr<PoolBuffer> MakeShared(MemoryPool* pool) {
+    std::shared_ptr<MemoryManager> mm;
+    if (pool == nullptr) {
+      pool = default_memory_pool();
+      mm = default_cpu_memory_manager();
+    } else {
+      mm = CPUDevice::memory_manager(pool);
+    }
+    return std::make_shared<PoolBuffer>(std::move(mm), pool);
+  }
+
+  static std::unique_ptr<PoolBuffer> MakeUnique(MemoryPool* pool) {
+    std::shared_ptr<MemoryManager> mm;
+    if (pool == nullptr) {
+      pool = default_memory_pool();
+      mm = default_cpu_memory_manager();
+    } else {
+      mm = CPUDevice::memory_manager(pool);
+    }
+    return std::unique_ptr<PoolBuffer>(new PoolBuffer(std::move(mm), pool));
+  }
+
+ private:
+  MemoryPool* pool_;
+};
+
+namespace {
+// A utility that does most of the work of the `AllocateBuffer` and
+// `AllocateResizableBuffer` methods. The argument `buffer` should be a smart pointer to
+// a PoolBuffer.
+template <typename BufferPtr, typename PoolBufferPtr>
+inline Result<BufferPtr> ResizePoolBuffer(PoolBufferPtr&& buffer, const int64_t size) {
+  RETURN_NOT_OK(buffer->Resize(size));
+  buffer->ZeroPadding();
+  return std::move(buffer);
+}
+
+}  // namespace
+
+Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size, MemoryPool* pool) {
+  return ResizePoolBuffer<std::unique_ptr<Buffer>>(PoolBuffer::MakeUnique(pool), size);
+}
+
+Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size,
+                                                                 MemoryPool* pool) {
+  return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(PoolBuffer::MakeUnique(pool),
+                                                            size);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index fed6640fdf3..81b1b112dc7 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -87,6 +87,13 @@ class ARROW_EXPORT MemoryPool {
   ///   faster deallocation if supported by its backend.
   virtual void Free(uint8_t* buffer, int64_t size) = 0;
 
+  /// Return unused memory to the OS
+  ///
+  /// Only applies to allocators that hold onto unused memory.  This will be
+  /// best effort, a memory pool may not implement this feature or may be
+  /// unable to fulfill the request due to fragmentation.
+  virtual void ReleaseUnused() {}
+
   /// The number of bytes that were allocated and not yet free'd through
   /// this allocator.
   virtual int64_t bytes_allocated() const = 0;
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 8c2ac376d1e..60cdaf0d6af 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -69,10 +69,12 @@ class PrettyPrinter {
 };
 
 void PrettyPrinter::OpenArray(const Array& array) {
-  Indent();
+  if (!options_.skip_new_lines) {
+    Indent();
+  }
   (*sink_) << "[";
   if (array.length() > 0) {
-    (*sink_) << "\n";
+    Newline();
     indent_ += options_.indent_size;
   }
 }
@@ -103,7 +105,6 @@ void PrettyPrinter::Newline() {
     return;
   }
   (*sink_) << "\n";
-  Indent();
 }
 
 void PrettyPrinter::Indent() {
@@ -124,11 +125,15 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ",\n";
+        (*sink_) << ",";
+        Newline();
+      }
+      if (!options_.skip_new_lines) {
+        Indent();
       }
-      Indent();
       if ((i >= options_.window) && (i < (array.length() - options_.window))) {
-        (*sink_) << "...\n";
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
@@ -137,7 +142,7 @@ class ArrayPrinter : public PrettyPrinter {
         func(i);
       }
     }
-    (*sink_) << "\n";
+    Newline();
   }
 
   Status WriteDataValues(const BooleanArray& array) {
@@ -239,11 +244,13 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ",\n";
+        (*sink_) << ",";
+        Newline();
       }
       if ((i >= options_.window) && (i < (array.length() - options_.window))) {
         Indent();
-        (*sink_) << "...\n";
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
@@ -252,10 +259,11 @@ class ArrayPrinter : public PrettyPrinter {
       } else {
         std::shared_ptr<Array> slice =
             array.values()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*slice, {indent_, options_.window}, sink_));
+        RETURN_NOT_OK(
+            PrettyPrint(*slice, PrettyPrintOptions{indent_, options_.window}, sink_));
       }
     }
-    (*sink_) << "\n";
+    Newline();
     return Status::OK();
   }
 
@@ -265,28 +273,36 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ",\n";
+        (*sink_) << ",";
+        Newline();
       }
-      if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+
+      if (!options_.skip_new_lines) {
         Indent();
-        (*sink_) << "...\n";
+      }
+
+      if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
-        Indent();
         (*sink_) << options_.null_rep;
       } else {
-        Indent();
-        (*sink_) << "keys:\n";
+        (*sink_) << "keys:";
+        Newline();
         auto keys_slice =
             array.keys()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*keys_slice, {indent_, options_.window}, sink_));
-        (*sink_) << "\n";
+        RETURN_NOT_OK(PrettyPrint(*keys_slice,
+                                  PrettyPrintOptions{indent_, options_.window}, sink_));
+        Newline();
         Indent();
-        (*sink_) << "values:\n";
+        (*sink_) << "values:";
+        Newline();
         auto values_slice =
             array.items()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*values_slice, {indent_, options_.window}, sink_));
+        RETURN_NOT_OK(PrettyPrint(*values_slice,
+                                  PrettyPrintOptions{indent_, options_.window}, sink_));
       }
     }
     (*sink_) << "\n";
@@ -309,6 +325,12 @@ class ArrayPrinter : public PrettyPrinter {
                   std::is_base_of<FixedSizeListArray, T>::value,
               Status>
   Visit(const T& array) {
+    Status st = array.Validate();
+    if (!st.ok()) {
+      (*sink_) << "<InvalidArray: " << st.message() << ">";
+      return Status::OK();
+    }
+
     OpenArray(array);
     if (array.length() > 0) {
       RETURN_NOT_OK(WriteDataValues(array));
@@ -325,6 +347,7 @@ class ArrayPrinter : public PrettyPrinter {
                        int64_t length) {
     for (size_t i = 0; i < fields.size(); ++i) {
       Newline();
+      Indent();
       std::stringstream ss;
       ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
       Write(ss.str());
@@ -352,12 +375,14 @@ class ArrayPrinter : public PrettyPrinter {
     RETURN_NOT_OK(WriteValidityBitmap(array));
 
     Newline();
+    Indent();
     Write("-- type_ids: ");
     UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
     RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
 
     if (array.mode() == UnionMode::DENSE) {
       Newline();
+      Indent();
       Write("-- value_offsets: ");
       Int32Array value_offsets(
           array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
@@ -376,11 +401,13 @@ class ArrayPrinter : public PrettyPrinter {
 
   Status Visit(const DictionaryArray& array) {
     Newline();
+    Indent();
     Write("-- dictionary:\n");
     RETURN_NOT_OK(
         PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
 
     Newline();
+    Indent();
     Write("-- indices:\n");
     return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
   }
@@ -431,6 +458,7 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
 
   if (array.null_count() > 0) {
     Newline();
+    Indent();
     BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
                           array.offset());
     return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
@@ -470,19 +498,28 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
   for (int i = 0; i < indent; ++i) {
     (*sink) << " ";
   }
-  (*sink) << "[\n";
+  (*sink) << "[";
+  if (!options.skip_new_lines) {
+    *sink << "\n";
+  }
   bool skip_comma = true;
   for (int i = 0; i < num_chunks; ++i) {
     if (skip_comma) {
       skip_comma = false;
     } else {
-      (*sink) << ",\n";
+      (*sink) << ",";
+      if (!options.skip_new_lines) {
+        *sink << "\n";
+      }
     }
     if ((i >= window) && (i < (num_chunks - window))) {
       for (int i = 0; i < indent; ++i) {
         (*sink) << " ";
       }
-      (*sink) << "...\n";
+      (*sink) << "...";
+      if (!options.skip_new_lines) {
+        *sink << "\n";
+      }
       i = num_chunks - window - 1;
       skip_comma = true;
     } else {
@@ -492,7 +529,9 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
       RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i)));
     }
   }
-  (*sink) << "\n";
+  if (!options.skip_new_lines) {
+    *sink << "\n";
+  }
 
   for (int i = 0; i < indent; ++i) {
     (*sink) << " ";
@@ -572,6 +611,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintVerboseMetadata(const KeyValueMetadata& metadata) {
     for (int64_t i = 0; i < metadata.size(); ++i) {
       Newline();
+      Indent();
       Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
     }
   }
@@ -579,6 +619,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintTruncatedMetadata(const KeyValueMetadata& metadata) {
     for (int64_t i = 0; i < metadata.size(); ++i) {
       Newline();
+      Indent();
       size_t size = metadata.value(i).size();
       size_t truncated_size = std::max<size_t>(10, 70 - metadata.key(i).size() - indent_);
       if (size <= truncated_size) {
@@ -594,6 +635,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) {
     if (metadata.size() > 0) {
       Newline();
+      Indent();
       Write(metadata_type);
       if (options_.truncate_metadata) {
         PrintTruncatedMetadata(metadata);
@@ -607,6 +649,7 @@ class SchemaPrinter : public PrettyPrinter {
     for (int i = 0; i < schema_.num_fields(); ++i) {
       if (i > 0) {
         Newline();
+        Indent();
       } else {
         Indent();
       }
@@ -631,6 +674,7 @@ Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
   }
   for (int i = 0; i < type.num_fields(); ++i) {
     Newline();
+    Indent();
 
     std::stringstream ss;
     ss << "child " << i << ", ";
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index 9d2c72c7186..1bc086a6889 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -19,6 +19,7 @@
 
 #include <iosfwd>
 #include <string>
+#include <utility>
 
 #include "arrow/util/visibility.h"
 
@@ -34,13 +35,14 @@ class Table;
 struct PrettyPrintOptions {
   PrettyPrintOptions() = default;
 
-  PrettyPrintOptions(int indent_arg, int window_arg = 10, int indent_size_arg = 2,
+  PrettyPrintOptions(int indent_arg,  // NOLINT runtime/explicit
+                     int window_arg = 10, int indent_size_arg = 2,
                      std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
                      bool truncate_metadata_arg = true)
       : indent(indent_arg),
         indent_size(indent_size_arg),
         window(window_arg),
-        null_rep(null_rep_arg),
+        null_rep(std::move(null_rep_arg)),
         skip_new_lines(skip_new_lines_arg),
         truncate_metadata(truncate_metadata_arg) {}
 
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index 960155703e1..40f351b56a5 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -45,12 +45,8 @@ set(ARROW_PYTHON_SRCS
     pyarrow.cc
     serialize.cc)
 
-set_source_files_properties(init.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                               SKIP_UNITY_BUILD_INCLUSION ON)
 
 if(ARROW_FILESYSTEM)
   list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
@@ -59,7 +55,9 @@ endif()
 set(ARROW_PYTHON_DEPENDENCIES arrow_dependencies)
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-  set_property(SOURCE pyarrow.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+  set_property(SOURCE pyarrow.cc
+               APPEND_STRING
+               PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
 endif()
 
 set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared)
@@ -159,8 +157,8 @@ if(ARROW_BUILD_TESTS)
 
   if(APPLE)
     target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS})
-    set_target_properties(arrow_python_test_main
-                          PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS
+                                                            "-undefined dynamic_lookup")
   elseif(NOT MSVC)
     target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS})
   endif()
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index f058e5261ae..cc386f589a7 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -2229,6 +2229,11 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options,
         checked_cast<const DictionaryType&>(*arr->type()).value_type();
     RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr));
     DCHECK_NE(arr->type()->id(), Type::DICTIONARY);
+
+    // The original Python DictionaryArray won't own the memory anymore
+    // as we actually built a new array when we decoded the DictionaryArray
+    // thus let the final resulting numpy array own the memory through a Capsule
+    py_ref = nullptr;
   }
 
   if (options.strings_to_categorical && is_base_binary_like(arr->type()->id())) {
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 8ff348509e2..6fe2ed4dae3 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -179,9 +179,6 @@ Status PyBuffer::Init(PyObject* obj) {
     size_ = py_buf_.len;
     capacity_ = py_buf_.len;
     is_mutable_ = !py_buf_.readonly;
-    if (is_mutable_) {
-      mutable_data_ = reinterpret_cast<uint8_t*>(py_buf_.buf);
-    }
     return Status::OK();
   } else {
     return ConvertPyError(StatusCode::Invalid);
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 8560fa2d6f4..24dcb130a26 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -185,6 +185,66 @@ class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
   }
 };
 
+template <typename Fn>
+struct BoundFunction;
+
+template <typename... Args>
+struct BoundFunction<void(PyObject*, Args...)> {
+  // We bind `cdef void fn(object, ...)` to get a `Status(...)`
+  // where the Status contains any Python error raised by `fn`
+  using Unbound = void(PyObject*, Args...);
+  using Bound = Status(Args...);
+
+  BoundFunction(Unbound* unbound, PyObject* bound_arg)
+      : bound_arg_(bound_arg), unbound_(unbound) {}
+
+  Status Invoke(Args... args) const {
+    PyAcquireGIL lock;
+    unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
+    RETURN_IF_PYERROR();
+    return Status::OK();
+  }
+
+  Unbound* unbound_;
+  OwnedRefNoGIL bound_arg_;
+};
+
+template <typename Return, typename... Args>
+struct BoundFunction<Return(PyObject*, Args...)> {
+  // We bind `cdef Return fn(object, ...)` to get a `Result<Return>(...)`
+  // where the Result contains any Python error raised by `fn` or the
+  // return value from `fn`.
+  using Unbound = Return(PyObject*, Args...);
+  using Bound = Result<Return>(Args...);
+
+  BoundFunction(Unbound* unbound, PyObject* bound_arg)
+      : bound_arg_(bound_arg), unbound_(unbound) {}
+
+  Result<Return> Invoke(Args... args) const {
+    PyAcquireGIL lock;
+    Return ret = unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
+    RETURN_IF_PYERROR();
+    return ret;
+  }
+
+  Unbound* unbound_;
+  OwnedRefNoGIL bound_arg_;
+};
+
+template <typename OutFn, typename Return, typename... Args>
+std::function<OutFn> BindFunction(Return (*unbound)(PyObject*, Args...),
+                                  PyObject* bound_arg) {
+  using Fn = BoundFunction<Return(PyObject*, Args...)>;
+
+  static_assert(std::is_same<typename Fn::Bound, OutFn>::value,
+                "requested bound function of unsupported type");
+
+  Py_XINCREF(bound_arg);
+  auto bound_fn = std::make_shared<Fn>(unbound, bound_arg);
+  return
+      [bound_fn](Args... args) { return bound_fn->Invoke(std::forward<Args>(args)...); };
+}
+
 // A temporary conversion of a Python object to a bytes area.
 struct PyBytesView {
   const char* bytes;
diff --git a/cpp/src/arrow/python/decimal.cc b/cpp/src/arrow/python/decimal.cc
index 67389095b94..0c00fcfaa8e 100644
--- a/cpp/src/arrow/python/decimal.cc
+++ b/cpp/src/arrow/python/decimal.cc
@@ -72,23 +72,19 @@ static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* p
   const auto exponent = static_cast<int32_t>(PyLong_AsLong(py_exponent.obj()));
   RETURN_IF_PYERROR();
 
-  const int32_t abs_exponent = std::abs(exponent);
-
-  int32_t num_additional_zeros;
-
-  if (num_digits <= abs_exponent) {
-    DCHECK_NE(exponent, 0) << "exponent should never be zero here";
-
-    // we have leading/trailing zeros, leading if exponent is negative
-    num_additional_zeros = exponent < 0 ? abs_exponent - num_digits : exponent;
-    *scale = static_cast<int32_t>(exponent < 0) * -exponent;
-  } else {
-    // we can use the number of digits as the precision
-    num_additional_zeros = 0;
+  if (exponent < 0) {
+    // If exponent > num_digits, we have a number with leading zeros
+    // such as 0.01234.  Ensure we have enough precision for leading zeros
+    // (which are not included in num_digits).
+    *precision = std::max(num_digits, -exponent);
     *scale = -exponent;
+  } else {
+    // Trailing zeros are not included in num_digits, need to add to precision.
+    // Note we don't generate negative scales as they are poorly supported
+    // in non-Arrow systems.
+    *precision = num_digits + exponent;
+    *scale = 0;
   }
-
-  *precision = num_digits + num_additional_zeros;
   return Status::OK();
 }
 
@@ -120,16 +116,18 @@ Status DecimalFromStdString(const std::string& decimal_string,
   const int32_t precision = arrow_type.precision();
   const int32_t scale = arrow_type.scale();
 
-  if (ARROW_PREDICT_FALSE(inferred_precision > precision)) {
+  if (scale != inferred_scale) {
+    DCHECK_NE(out, NULLPTR);
+    ARROW_ASSIGN_OR_RAISE(*out, out->Rescale(inferred_scale, scale));
+  }
+
+  auto inferred_scale_delta = inferred_scale - scale;
+  if (ARROW_PREDICT_FALSE((inferred_precision - inferred_scale_delta) > precision)) {
     return Status::Invalid(
         "Decimal type with precision ", inferred_precision,
         " does not fit into precision inferred from first array element: ", precision);
   }
 
-  if (scale != inferred_scale) {
-    DCHECK_NE(out, NULLPTR);
-    ARROW_ASSIGN_OR_RAISE(*out, out->Rescale(inferred_scale, scale));
-  }
   return Status::OK();
 }
 
@@ -214,16 +212,17 @@ DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale)
     : precision_(precision), scale_(scale) {}
 
 Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_scale) {
-  const int32_t current_precision = precision_;
-  precision_ = std::max(current_precision, suggested_precision);
-
   const int32_t current_scale = scale_;
   scale_ = std::max(current_scale, suggested_scale);
 
-  // if our suggested scale is zero and we don't yet have enough precision then we need to
-  // add whatever the current scale is to the precision
-  if (suggested_scale == 0 && suggested_precision > current_precision) {
-    precision_ += scale_;
+  const int32_t current_precision = precision_;
+
+  if (current_precision == std::numeric_limits<int32_t>::min()) {
+    precision_ = suggested_precision;
+  } else {
+    auto num_digits = std::max(current_precision - current_scale,
+                               suggested_precision - suggested_scale);
+    precision_ = std::max(num_digits + scale_, current_precision);
   }
 
   return Status::OK();
diff --git a/cpp/src/arrow/python/filesystem.cc b/cpp/src/arrow/python/filesystem.cc
index 8e8e8a6e899..8c12f05a0f3 100644
--- a/cpp/src/arrow/python/filesystem.cc
+++ b/cpp/src/arrow/python/filesystem.cc
@@ -170,10 +170,10 @@ Result<std::shared_ptr<io::RandomAccessFile>> PyFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   std::shared_ptr<io::OutputStream> stream;
   auto st = SafeCallIntoPython([&]() -> Status {
-    vtable_.open_output_stream(handler_.obj(), path, &stream);
+    vtable_.open_output_stream(handler_.obj(), path, metadata, &stream);
     return CheckPyError();
   });
   RETURN_NOT_OK(st);
@@ -181,10 +181,10 @@ Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenOutputStream(
 }
 
 Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   std::shared_ptr<io::OutputStream> stream;
   auto st = SafeCallIntoPython([&]() -> Status {
-    vtable_.open_append_stream(handler_.obj(), path, &stream);
+    vtable_.open_append_stream(handler_.obj(), path, metadata, &stream);
     return CheckPyError();
   });
   RETURN_NOT_OK(st);
diff --git a/cpp/src/arrow/python/filesystem.h b/cpp/src/arrow/python/filesystem.h
index f2d9c90c817..e1235f8de5f 100644
--- a/cpp/src/arrow/python/filesystem.h
+++ b/cpp/src/arrow/python/filesystem.h
@@ -60,9 +60,11 @@ class ARROW_PYTHON_EXPORT PyFileSystemVtable {
                      std::shared_ptr<io::RandomAccessFile>* out)>
       open_input_file;
   std::function<void(PyObject*, const std::string& path,
+                     const std::shared_ptr<const KeyValueMetadata>&,
                      std::shared_ptr<io::OutputStream>* out)>
       open_output_stream;
   std::function<void(PyObject*, const std::string& path,
+                     const std::shared_ptr<const KeyValueMetadata>&,
                      std::shared_ptr<io::OutputStream>* out)>
       open_append_stream;
 
@@ -104,9 +106,11 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   Result<std::string> NormalizePath(std::string path) override;
 
diff --git a/cpp/src/arrow/python/inference.cc b/cpp/src/arrow/python/inference.cc
index 9d6707aa11d..5086815f84f 100644
--- a/cpp/src/arrow/python/inference.cc
+++ b/cpp/src/arrow/python/inference.cc
@@ -379,12 +379,13 @@ class TypeInferrer {
   // Infer value type from a sequence of values
   Status VisitSequence(PyObject* obj, PyObject* mask = nullptr) {
     if (mask == nullptr || mask == Py_None) {
-      return internal::VisitSequence(obj, [this](PyObject* value, bool* keep_going) {
-        return Visit(value, keep_going);
-      });
+      return internal::VisitSequence(
+          obj, /*offset=*/0,
+          [this](PyObject* value, bool* keep_going) { return Visit(value, keep_going); });
     } else {
       return internal::VisitSequenceMasked(
-          obj, mask, [this](PyObject* value, uint8_t masked, bool* keep_going) {
+          obj, mask, /*offset=*/0,
+          [this](PyObject* value, uint8_t masked, bool* keep_going) {
             if (!masked) {
               return Visit(value, keep_going);
             } else {
diff --git a/cpp/src/arrow/python/iterators.h b/cpp/src/arrow/python/iterators.h
index 6b0b55342a5..58213ee2dbc 100644
--- a/cpp/src/arrow/python/iterators.h
+++ b/cpp/src/arrow/python/iterators.h
@@ -36,7 +36,7 @@ namespace internal {
 //
 // If keep_going is set to false, the iteration terminates
 template <class VisitorFunc>
-inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
+inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&& func) {
   // VisitorFunc may set to false to terminate iteration
   bool keep_going = true;
 
@@ -49,7 +49,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
     if (PyArray_DESCR(arr_obj)->type_num == NPY_OBJECT) {
       // It's an array object, we can fetch object pointers directly
       const Ndarray1DIndexer<PyObject*> objects(arr_obj);
-      for (int64_t i = 0; keep_going && i < objects.size(); ++i) {
+      for (int64_t i = offset; keep_going && i < objects.size(); ++i) {
         RETURN_NOT_OK(func(objects[i], i, &keep_going));
       }
       return Status::OK();
@@ -64,7 +64,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
     if (PyList_Check(obj) || PyTuple_Check(obj)) {
       // Use fast item access
       const Py_ssize_t size = PySequence_Fast_GET_SIZE(obj);
-      for (Py_ssize_t i = 0; keep_going && i < size; ++i) {
+      for (Py_ssize_t i = offset; keep_going && i < size; ++i) {
         PyObject* value = PySequence_Fast_GET_ITEM(obj, i);
         RETURN_NOT_OK(func(value, static_cast<int64_t>(i), &keep_going));
       }
@@ -72,7 +72,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
       // Regular sequence: avoid making a potentially large copy
       const Py_ssize_t size = PySequence_Size(obj);
       RETURN_IF_PYERROR();
-      for (Py_ssize_t i = 0; keep_going && i < size; ++i) {
+      for (Py_ssize_t i = offset; keep_going && i < size; ++i) {
         OwnedRef value_ref(PySequence_ITEM(obj, i));
         RETURN_IF_PYERROR();
         RETURN_NOT_OK(func(value_ref.obj(), static_cast<int64_t>(i), &keep_going));
@@ -86,16 +86,17 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
 
 // Visit sequence with no null mask
 template <class VisitorFunc>
-inline Status VisitSequence(PyObject* obj, VisitorFunc&& func) {
+inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
   return VisitSequenceGeneric(
-      obj, [&func](PyObject* value, int64_t i /* unused */, bool* keep_going) {
+      obj, offset, [&func](PyObject* value, int64_t i /* unused */, bool* keep_going) {
         return func(value, keep_going);
       });
 }
 
 /// Visit sequence with null mask
 template <class VisitorFunc>
-inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, VisitorFunc&& func) {
+inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
+                                  VisitorFunc&& func) {
   if (mo == nullptr || !PyArray_Check(mo)) {
     return Status::Invalid("Null mask must be NumPy array");
   }
@@ -115,7 +116,7 @@ inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, VisitorFunc&& fun
     Ndarray1DIndexer<uint8_t> mask_values(mask);
 
     return VisitSequenceGeneric(
-        obj, [&func, &mask_values](PyObject* value, int64_t i, bool* keep_going) {
+        obj, offset, [&func, &mask_values](PyObject* value, int64_t i, bool* keep_going) {
           return func(value, mask_values[i], keep_going);
         });
   } else {
@@ -132,7 +133,7 @@ template <class VisitorFunc>
 inline Status VisitIterable(PyObject* obj, VisitorFunc&& func) {
   if (PySequence_Check(obj)) {
     // Numpy arrays fall here as well
-    return VisitSequence(obj, std::forward<VisitorFunc>(func));
+    return VisitSequence(obj, /*offset=*/0, std::forward<VisitorFunc>(func));
   }
   // Fall back on the iterator protocol
   OwnedRef iter_ref(PyObject_GetIter(obj));
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index 6a1440c33b0..bf4afb2a0a1 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -44,13 +44,11 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
 
   if (PyArray_Check(ao)) {
     PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
-    data_ = reinterpret_cast<const uint8_t*>(PyArray_DATA(ndarray));
+    auto ptr = reinterpret_cast<uint8_t*>(PyArray_DATA(ndarray));
+    data_ = const_cast<const uint8_t*>(ptr);
     size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
     capacity_ = size_;
-
-    if (PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE) {
-      is_mutable_ = true;
-    }
+    is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE);
   }
 }
 
diff --git a/cpp/src/arrow/python/numpy_internal.h b/cpp/src/arrow/python/numpy_internal.h
index f43599eb3eb..973f577cb13 100644
--- a/cpp/src/arrow/python/numpy_internal.h
+++ b/cpp/src/arrow/python/numpy_internal.h
@@ -52,7 +52,7 @@ class Ndarray1DIndexer {
 
   int64_t size() const { return PyArray_SIZE(arr_); }
 
-  T* data() const { return data_; }
+  const T* data() const { return reinterpret_cast<const T*>(data_); }
 
   bool is_strided() const { return stride_ != sizeof(T); }
 
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index c17e70823d5..a382f766333 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -594,9 +594,20 @@ Status NumPyConverter::Visit(const FixedSizeBinaryType& type) {
 
   if (mask_ != nullptr) {
     Ndarray1DIndexer<uint8_t> mask_values(mask_);
-    RETURN_NOT_OK(builder.AppendValues(data, length_, mask_values.data()));
+    RETURN_NOT_OK(builder.Reserve(length_));
+    for (int64_t i = 0; i < length_; ++i) {
+      if (mask_values[i]) {
+        RETURN_NOT_OK(builder.AppendNull());
+      } else {
+        RETURN_NOT_OK(builder.Append(data));
+      }
+      data += stride_;
+    }
   } else {
-    RETURN_NOT_OK(builder.AppendValues(data, length_));
+    for (int64_t i = 0; i < length_; ++i) {
+      RETURN_NOT_OK(builder.Append(data));
+      data += stride_;
+    }
   }
 
   std::shared_ptr<Array> result;
diff --git a/cpp/src/arrow/python/pyarrow.cc b/cpp/src/arrow/python/pyarrow.cc
index bea35ff3b61..c3244b74bf5 100644
--- a/cpp/src/arrow/python/pyarrow.cc
+++ b/cpp/src/arrow/python/pyarrow.cc
@@ -57,9 +57,6 @@ int import_pyarrow() {
     } else {                                                                            \
       return UnwrapError(obj, #TYPE_NAME);                                              \
     }                                                                                   \
-  }                                                                                     \
-  Status unwrap_##FUNC_SUFFIX(PyObject* obj, std::shared_ptr<TYPE_NAME>* out) {         \
-    return unwrap_##FUNC_SUFFIX(obj).Value(out);                                        \
   }
 
 DEFINE_WRAP_FUNCTIONS(buffer, Buffer)
diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h
index 8056e700a0c..4c365081d70 100644
--- a/cpp/src/arrow/python/pyarrow.h
+++ b/cpp/src/arrow/python/pyarrow.h
@@ -45,14 +45,11 @@ namespace py {
 // Returns 0 on success, -1 on error.
 ARROW_PYTHON_EXPORT int import_pyarrow();
 
-#define DECLARE_WRAP_FUNCTIONS(FUNC_SUFFIX, TYPE_NAME)                                 \
-  ARROW_PYTHON_EXPORT bool is_##FUNC_SUFFIX(PyObject*);                                \
-  ARROW_PYTHON_EXPORT Result<std::shared_ptr<TYPE_NAME>> unwrap_##FUNC_SUFFIX(         \
-      PyObject*);                                                                      \
-  ARROW_PYTHON_EXPORT PyObject* wrap_##FUNC_SUFFIX(const std::shared_ptr<TYPE_NAME>&); \
-  ARROW_DEPRECATED("Use Result-returning version")                                     \
-  ARROW_PYTHON_EXPORT Status unwrap_##FUNC_SUFFIX(PyObject*,                           \
-                                                  std::shared_ptr<TYPE_NAME>* out);
+#define DECLARE_WRAP_FUNCTIONS(FUNC_SUFFIX, TYPE_NAME)                         \
+  ARROW_PYTHON_EXPORT bool is_##FUNC_SUFFIX(PyObject*);                        \
+  ARROW_PYTHON_EXPORT Result<std::shared_ptr<TYPE_NAME>> unwrap_##FUNC_SUFFIX( \
+      PyObject*);                                                              \
+  ARROW_PYTHON_EXPORT PyObject* wrap_##FUNC_SUFFIX(const std::shared_ptr<TYPE_NAME>&);
 
 DECLARE_WRAP_FUNCTIONS(buffer, Buffer)
 
diff --git a/cpp/src/arrow/python/python_test.cc b/cpp/src/arrow/python/python_test.cc
index 33e0ee9b1c9..d1c00e68cc4 100644
--- a/cpp/src/arrow/python/python_test.cc
+++ b/cpp/src/arrow/python/python_test.cc
@@ -33,6 +33,8 @@
 #include "arrow/python/arrow_to_pandas.h"
 #include "arrow/python/decimal.h"
 #include "arrow/python/helpers.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/numpy_interop.h"
 #include "arrow/python/python_to_arrow.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
@@ -187,6 +189,67 @@ TEST(PyBuffer, InvalidInputObject) {
   ASSERT_EQ(old_refcnt, Py_REFCNT(input));
 }
 
+// Because of how it is declared, the Numpy C API instance initialized
+// within libarrow_python.dll may not be visible in this test under Windows
+// ("unresolved external symbol arrow_ARRAY_API referenced").
+#ifndef _WIN32
+TEST(PyBuffer, NumpyArray) {
+  const npy_intp dims[1] = {10};
+
+  OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT));
+  PyObject* arr = arr_ref.obj();
+  ASSERT_NE(arr, nullptr);
+  auto old_refcnt = Py_REFCNT(arr);
+
+  ASSERT_OK_AND_ASSIGN(auto buf, PyBuffer::FromPyObject(arr));
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_TRUE(buf->is_mutable());
+  ASSERT_EQ(buf->mutable_data(), buf->data());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  // Read-only
+  PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(arr), NPY_ARRAY_WRITEABLE);
+  ASSERT_OK_AND_ASSIGN(buf, PyBuffer::FromPyObject(arr));
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_FALSE(buf->is_mutable());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+}
+
+TEST(NumPyBuffer, NumpyArray) {
+  const npy_intp dims[1] = {10};
+
+  OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT));
+  PyObject* arr = arr_ref.obj();
+  ASSERT_NE(arr, nullptr);
+  auto old_refcnt = Py_REFCNT(arr);
+
+  auto buf = std::make_shared<NumPyBuffer>(arr);
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_TRUE(buf->is_mutable());
+  ASSERT_EQ(buf->mutable_data(), buf->data());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  // Read-only
+  PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(arr), NPY_ARRAY_WRITEABLE);
+  buf = std::make_shared<NumPyBuffer>(arr);
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_FALSE(buf->is_mutable());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+}
+#endif
+
 class DecimalTest : public ::testing::Test {
  public:
   DecimalTest() : lock_(), decimal_constructor_() {
@@ -244,8 +307,8 @@ TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) {
   internal::DecimalMetadata metadata;
   ASSERT_OK(metadata.Update(python_decimal.obj()));
 
-  const auto expected_precision = 9;
-  const int32_t expected_scale = -2;
+  const auto expected_precision = 11;
+  const int32_t expected_scale = 0;
 
   ASSERT_EQ(expected_precision, metadata.precision());
   ASSERT_EQ(expected_scale, metadata.scale());
diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc
index b2d9f1cb5a3..521249fd542 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -392,22 +392,25 @@ class PyValue {
 class PyConverter : public Converter<PyObject*, PyConversionOptions> {
  public:
   // Iterate over the input values and defer the conversion to the Append method
-  Status Extend(PyObject* values, int64_t size) override {
+  Status Extend(PyObject* values, int64_t size, int64_t offset = 0) override {
+    DCHECK_GE(size, offset);
     /// Ensure we've allocated enough space
-    RETURN_NOT_OK(this->Reserve(size));
+    RETURN_NOT_OK(this->Reserve(size - offset));
     // Iterate over the items adding each one
-    return internal::VisitSequence(values, [this](PyObject* item, bool* /* unused */) {
-      return this->Append(item);
-    });
+    return internal::VisitSequence(
+        values, offset,
+        [this](PyObject* item, bool* /* unused */) { return this->Append(item); });
   }
 
   // Convert and append a sequence of values masked with a numpy array
-  Status ExtendMasked(PyObject* values, PyObject* mask, int64_t size) override {
+  Status ExtendMasked(PyObject* values, PyObject* mask, int64_t size,
+                      int64_t offset = 0) override {
+    DCHECK_GE(size, offset);
     /// Ensure we've allocated enough space
-    RETURN_NOT_OK(this->Reserve(size));
+    RETURN_NOT_OK(this->Reserve(size - offset));
     // Iterate over the items adding each one
     return internal::VisitSequenceMasked(
-        values, mask, [this](PyObject* item, bool is_masked, bool* /* unused */) {
+        values, mask, offset, [this](PyObject* item, bool is_masked, bool* /* unused */) {
           if (is_masked) {
             return this->AppendNull();
           } else {
@@ -514,34 +517,6 @@ class PyPrimitiveConverter<
   }
 };
 
-template <typename T>
-class PyPrimitiveConverter<T, enable_if_binary<T>>
-    : public PrimitiveConverter<T, PyConverter> {
- public:
-  using OffsetType = typename T::offset_type;
-
-  Status Append(PyObject* value) override {
-    if (PyValue::IsNull(this->options_, value)) {
-      this->primitive_builder_->UnsafeAppendNull();
-    } else {
-      ARROW_RETURN_NOT_OK(
-          PyValue::Convert(this->primitive_type_, this->options_, value, view_));
-      // Since we don't know the varying length input size in advance, we need to
-      // reserve space in the value builder one by one. ReserveData raises CapacityError
-      // if the value would not fit into the array.
-      ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
-      this->primitive_builder_->UnsafeAppend(view_.bytes,
-                                             static_cast<OffsetType>(view_.size));
-    }
-    return Status::OK();
-  }
-
- protected:
-  // Create a single instance of PyBytesView here to prevent unnecessary object
-  // creation/destruction. This significantly improves the conversion performance.
-  PyBytesView view_;
-};
-
 template <typename T>
 class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::value>>
     : public PrimitiveConverter<T, PyConverter> {
@@ -563,7 +538,7 @@ class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::
 };
 
 template <typename T>
-class PyPrimitiveConverter<T, enable_if_string_like<T>>
+class PyPrimitiveConverter<T, enable_if_base_binary<T>>
     : public PrimitiveConverter<T, PyConverter> {
  public:
   using OffsetType = typename T::offset_type;
@@ -578,6 +553,9 @@ class PyPrimitiveConverter<T, enable_if_string_like<T>>
         // observed binary value
         observed_binary_ = true;
       }
+      // Since we don't know the varying length input size in advance, we need to
+      // reserve space in the value builder one by one. ReserveData raises CapacityError
+      // if the value would not fit into the array.
       ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
       this->primitive_builder_->UnsafeAppend(view_.bytes,
                                              static_cast<OffsetType>(view_.size));
@@ -728,7 +706,6 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
     auto value_builder =
         checked_cast<ValueBuilderType*>(this->value_converter_->builder().get());
 
-    // TODO(wesm): Vector append when not strided
     Ndarray1DIndexer<NumpyType> values(ndarray);
     if (null_sentinels_possible) {
       for (int64_t i = 0; i < values.size(); ++i) {
@@ -738,6 +715,8 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
           RETURN_NOT_OK(value_builder->Append(values[i]));
         }
       }
+    } else if (!values.is_strided()) {
+      RETURN_NOT_OK(value_builder->AppendValues(values.data(), values.size()));
     } else {
       for (int64_t i = 0; i < values.size(); ++i) {
         RETURN_NOT_OK(value_builder->Append(values[i]));
diff --git a/cpp/src/arrow/python/util/CMakeLists.txt b/cpp/src/arrow/python/util/CMakeLists.txt
index c75b622847d..74141bebc8b 100644
--- a/cpp/src/arrow/python/util/CMakeLists.txt
+++ b/cpp/src/arrow/python/util/CMakeLists.txt
@@ -24,8 +24,8 @@ if(PYARROW_BUILD_TESTS)
 
   if(APPLE)
     target_link_libraries(arrow/python_test_main GTest::gtest dl)
-    set_target_properties(arrow/python_test_main
-                          PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    set_target_properties(arrow/python_test_main PROPERTIES LINK_FLAGS
+                                                            "-undefined dynamic_lookup")
   else()
     target_link_libraries(arrow/python_test_main GTest::gtest pthread dl)
   endif()
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index cb8b77e2be8..66f9e932b58 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -69,6 +69,14 @@ class SimpleRecordBatch : public RecordBatch {
     boxed_columns_.resize(schema_->num_fields());
   }
 
+  const std::vector<std::shared_ptr<Array>>& columns() const override {
+    for (int i = 0; i < num_columns(); ++i) {
+      // Force all columns to be boxed
+      column(i);
+    }
+    return boxed_columns_;
+  }
+
   std::shared_ptr<Array> column(int i) const override {
     std::shared_ptr<Array> result = internal::atomic_load(&boxed_columns_[i]);
     if (!result) {
@@ -80,7 +88,7 @@ class SimpleRecordBatch : public RecordBatch {
 
   std::shared_ptr<ArrayData> column_data(int i) const override { return columns_[i]; }
 
-  ArrayDataVector column_data() const override { return columns_; }
+  const ArrayDataVector& column_data() const override { return columns_; }
 
   Result<std::shared_ptr<RecordBatch>> AddColumn(
       int i, const std::shared_ptr<Field>& field,
@@ -100,8 +108,7 @@ class SimpleRecordBatch : public RecordBatch {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field));
-
-    return RecordBatch::Make(new_schema, num_rows_,
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::AddVectorElement(columns_, i, column->data()));
   }
 
@@ -123,21 +130,20 @@ class SimpleRecordBatch : public RecordBatch {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field));
-    return RecordBatch::Make(new_schema, num_rows_,
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::ReplaceVectorElement(columns_, i, column->data()));
   }
 
   Result<std::shared_ptr<RecordBatch>> RemoveColumn(int i) const override {
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
-
-    return RecordBatch::Make(new_schema, num_rows_,
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::DeleteVectorElement(columns_, i));
   }
 
   std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
     auto new_schema = schema_->WithMetadata(metadata);
-    return RecordBatch::Make(new_schema, num_rows_, columns_);
+    return RecordBatch::Make(std::move(new_schema), num_rows_, columns_);
   }
 
   std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const override {
@@ -207,14 +213,6 @@ Result<std::shared_ptr<StructArray>> RecordBatch::ToStructArray() const {
                                        /*offset=*/0);
 }
 
-std::vector<std::shared_ptr<Array>> RecordBatch::columns() const {
-  std::vector<std::shared_ptr<Array>> children(num_columns());
-  for (int i = 0; i < num_columns(); ++i) {
-    children[i] = column(i);
-  }
-  return children;
-}
-
 const std::string& RecordBatch::column_name(int i) const {
   return schema_->field(i)->name();
 }
@@ -253,6 +251,27 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other) const {
   return true;
 }
 
+Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns(
+    const std::vector<int>& indices) const {
+  int n = static_cast<int>(indices.size());
+
+  FieldVector fields(n);
+  ArrayVector columns(n);
+
+  for (int i = 0; i < n; i++) {
+    int pos = indices[i];
+    if (pos < 0 || pos > num_columns() - 1) {
+      return Status::Invalid("Invalid column index ", pos, " to select columns.");
+    }
+    fields[i] = schema()->field(pos);
+    columns[i] = column(pos);
+  }
+
+  auto new_schema =
+      std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
+  return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns));
+}
+
 std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset) const {
   return Slice(offset, this->num_rows() - offset);
 }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index e45f598019d..92ffa8b87fb 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -25,7 +25,6 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
-#include "arrow/util/future.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -86,12 +85,11 @@ class ARROW_EXPORT RecordBatch {
   /// \brief Determine if two record batches are approximately equal
   bool ApproxEquals(const RecordBatch& other) const;
 
-  // \return the table's schema
-  /// \return true if batches are equal
+  /// \return the record batch's schema
   const std::shared_ptr<Schema>& schema() const { return schema_; }
 
   /// \brief Retrieve all columns at once
-  std::vector<std::shared_ptr<Array>> columns() const;
+  virtual const std::vector<std::shared_ptr<Array>>& columns() const = 0;
 
   /// \brief Retrieve an array from the record batch
   /// \param[in] i field index, does not boundscheck
@@ -109,7 +107,7 @@ class ARROW_EXPORT RecordBatch {
   virtual std::shared_ptr<ArrayData> column_data(int i) const = 0;
 
   /// \brief Retrieve all arrays' internal data from the record batch.
-  virtual ArrayDataVector column_data() const = 0;
+  virtual const ArrayDataVector& column_data() const = 0;
 
   /// \brief Add column to the record batch, producing a new RecordBatch
   ///
@@ -131,7 +129,7 @@ class ARROW_EXPORT RecordBatch {
   virtual Result<std::shared_ptr<RecordBatch>> AddColumn(
       int i, std::string field_name, const std::shared_ptr<Array>& column) const;
 
-  /// \brief Replace a column in the table, producing a new Table
+  /// \brief Replace a column in the record batch, producing a new RecordBatch
   virtual Result<std::shared_ptr<RecordBatch>> SetColumn(
       int i, const std::shared_ptr<Field>& field,
       const std::shared_ptr<Array>& column) const = 0;
@@ -167,6 +165,10 @@ class ARROW_EXPORT RecordBatch {
   /// \return PrettyPrint representation suitable for debugging
   std::string ToString() const;
 
+  /// \brief Return new record batch with specified columns
+  Result<std::shared_ptr<RecordBatch>> SelectColumns(
+      const std::vector<int>& indices) const;
+
   /// \brief Perform cheap validation checks to determine obvious inconsistencies
   /// within the record batch's schema and internal data.
   ///
@@ -196,6 +198,8 @@ class ARROW_EXPORT RecordBatch {
 /// \brief Abstract interface for reading stream of record batches
 class ARROW_EXPORT RecordBatchReader {
  public:
+  using ValueType = std::shared_ptr<RecordBatch>;
+
   virtual ~RecordBatchReader() = default;
 
   /// \return the shared schema of the record batches in the stream
@@ -208,14 +212,6 @@ class ARROW_EXPORT RecordBatchReader {
   /// \return Status
   virtual Status ReadNext(std::shared_ptr<RecordBatch>* batch) = 0;
 
-  // Fallback to sync implementation until all other readers are converted(ARROW-11770)
-  // and then this could become pure virtual with ReadNext falling back to async impl.
-  virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() {
-    std::shared_ptr<RecordBatch> batch;
-    ARROW_RETURN_NOT_OK(ReadNext(&batch));
-    return Future<std::shared_ptr<RecordBatch>>::MakeFinished(std::move(batch));
-  }
-
   /// \brief Iterator interface
   Result<std::shared_ptr<RecordBatch>> Next() {
     std::shared_ptr<RecordBatch> batch;
diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index 73b1393bba9..9de57f183ef 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -255,6 +255,34 @@ TEST_F(TestRecordBatch, RemoveColumn) {
   AssertBatchesEqual(*new_batch, *batch4);
 }
 
+TEST_F(TestRecordBatch, SelectColumns) {
+  const int length = 10;
+
+  auto field1 = field("f1", int32());
+  auto field2 = field("f2", uint8());
+  auto field3 = field("f3", int16());
+
+  auto schema1 = ::arrow::schema({field1, field2, field3});
+
+  auto array1 = MakeRandomArray<Int32Array>(length);
+  auto array2 = MakeRandomArray<UInt8Array>(length);
+  auto array3 = MakeRandomArray<Int16Array>(length);
+
+  auto batch = RecordBatch::Make(schema1, length, {array1, array2, array3});
+
+  ASSERT_OK_AND_ASSIGN(auto subset, batch->SelectColumns({0, 2}));
+  ASSERT_OK(subset->ValidateFull());
+
+  auto expected_schema = ::arrow::schema({schema1->field(0), schema1->field(2)});
+  auto expected =
+      RecordBatch::Make(expected_schema, length, {batch->column(0), batch->column(2)});
+  ASSERT_TRUE(subset->Equals(*expected));
+
+  // Out of bounds indices
+  ASSERT_RAISES(Invalid, batch->SelectColumns({0, 3}));
+  ASSERT_RAISES(Invalid, batch->SelectColumns({-1}));
+}
+
 TEST_F(TestRecordBatch, RemoveColumnEmpty) {
   const int length = 10;
 
diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h
index 0172a852434..3ef4f8cc7f7 100644
--- a/cpp/src/arrow/result.h
+++ b/cpp/src/arrow/result.h
@@ -385,7 +385,8 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
   /// Apply a function to the internally stored value to produce a new result or propagate
   /// the stored error.
   template <typename M>
-  typename EnsureResult<typename std::result_of<M && (T)>::type>::type Map(M&& m) && {
+  typename EnsureResult<decltype(std::declval<M&&>()(std::declval<T&&>()))>::type Map(
+      M&& m) && {
     if (!ok()) {
       return status();
     }
@@ -395,8 +396,8 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
   /// Apply a function to the internally stored value to produce a new result or propagate
   /// the stored error.
   template <typename M>
-  typename EnsureResult<typename std::result_of<M && (const T&)>::type>::type Map(
-      M&& m) const& {
+  typename EnsureResult<decltype(std::declval<M&&>()(std::declval<const T&>()))>::type
+  Map(M&& m) const& {
     if (!ok()) {
       return status();
     }
@@ -478,6 +479,11 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
 ///
 /// WARNING: ARROW_ASSIGN_OR_RAISE `std::move`s its right operand. If you have
 /// an lvalue Result which you *don't* want to move out of cast appropriately.
+///
+/// WARNING: ARROW_ASSIGN_OR_RAISE is not a single expression; it will not
+/// maintain lifetimes of all temporaries in `rexpr` (e.g.
+/// `ARROW_ASSIGN_OR_RAISE(auto x, MakeTemp().GetResultRef());`
+/// will most likely segfault)!
 #define ARROW_ASSIGN_OR_RAISE(lhs, rexpr)                                              \
   ARROW_ASSIGN_OR_RAISE_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
                              lhs, rexpr);
@@ -485,7 +491,7 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
 namespace internal {
 
 template <typename T>
-inline Status GenericToStatus(const Result<T>& res) {
+inline const Status& GenericToStatus(const Result<T>& res) {
   return res.status();
 }
 
@@ -496,9 +502,9 @@ inline Status GenericToStatus(Result<T>&& res) {
 
 }  // namespace internal
 
-template <typename T>
-Result<T> ToResult(T t) {
-  return Result<T>(std::move(t));
+template <typename T, typename R = typename EnsureResult<T>::type>
+R ToResult(T t) {
+  return R(std::move(t));
 }
 
 template <typename T>
diff --git a/cpp/src/arrow/result_test.cc b/cpp/src/arrow/result_test.cc
index b71af9d8531..cb645bc7402 100644
--- a/cpp/src/arrow/result_test.cc
+++ b/cpp/src/arrow/result_test.cc
@@ -26,6 +26,8 @@
 #include <gtest/gtest.h>
 
 #include "arrow/testing/gtest_compat.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 
 namespace arrow {
 
@@ -724,5 +726,74 @@ TEST(ResultTest, ViewAsStatus) {
   EXPECT_EQ(ViewAsStatus(&err), &err.status());
 }
 
+TEST(ResultTest, MatcherExamples) {
+  EXPECT_THAT(Result<int>(Status::Invalid("arbitrary error")),
+              Raises(StatusCode::Invalid));
+
+  EXPECT_THAT(Result<int>(Status::Invalid("arbitrary error")),
+              Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary")));
+
+  // message doesn't match, so no match
+  EXPECT_THAT(
+      Result<int>(Status::Invalid("arbitrary error")),
+      testing::Not(Raises(StatusCode::Invalid, testing::HasSubstr("reasonable"))));
+
+  // different error code, so no match
+  EXPECT_THAT(Result<int>(Status::TypeError("arbitrary error")),
+              testing::Not(Raises(StatusCode::Invalid)));
+
+  // not an error, so no match
+  EXPECT_THAT(Result<int>(333), testing::Not(Raises(StatusCode::Invalid)));
+
+  EXPECT_THAT(Result<std::string>("hello world"),
+              ResultWith(testing::HasSubstr("hello")));
+
+  EXPECT_THAT(Result<std::string>(Status::Invalid("XXX")),
+              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+
+  // holds a value, but that value doesn't match the given pattern
+  EXPECT_THAT(Result<std::string>("foo bar"),
+              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+}
+
+TEST(ResultTest, MatcherDescriptions) {
+  testing::Matcher<Result<std::string>> matcher = ResultWith(testing::HasSubstr("hello"));
+
+  {
+    std::stringstream ss;
+    matcher.DescribeTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("value has substring \"hello\""));
+  }
+
+  {
+    std::stringstream ss;
+    matcher.DescribeNegationTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("value has no substring \"hello\""));
+  }
+}
+
+TEST(ResultTest, MatcherExplanations) {
+  testing::Matcher<Result<std::string>> matcher = ResultWith(testing::HasSubstr("hello"));
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_TRUE(matcher.MatchAndExplain(Result<std::string>("hello world"), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"hello world\" matches"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Result<std::string>("foo bar"), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"foo bar\" doesn't match"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Status::TypeError("XXX"), &listener));
+    EXPECT_THAT(listener.str(),
+                testing::StrEq("whose error \"Type error: XXX\" doesn't match"));
+  }
+}
+
 }  // namespace
 }  // namespace arrow
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index 56a36114e49..314894bd04e 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -18,6 +18,7 @@
 #include "arrow/scalar.h"
 
 #include <memory>
+#include <sstream>
 #include <string>
 #include <utility>
 
@@ -48,9 +49,9 @@ bool Scalar::ApproxEquals(const Scalar& other, const EqualOptions& options) cons
   return ScalarApproxEquals(*this, other, options);
 }
 
-struct ScalarHashImpl {
-  static std::hash<std::string> string_hash;
+namespace {
 
+struct ScalarHashImpl {
   Status Visit(const NullScalar& s) { return Status::OK(); }
 
   template <typename T>
@@ -75,7 +76,8 @@ struct ScalarHashImpl {
 
   Status Visit(const Decimal256Scalar& s) {
     Status status = Status::OK();
-    for (uint64_t elem : s.value.little_endian_array()) {
+    // endianness doesn't affect result
+    for (uint64_t elem : s.value.native_endian_array()) {
       status &= StdHash(elem);
     }
     return status;
@@ -95,8 +97,13 @@ struct ScalarHashImpl {
     return Status::OK();
   }
 
-  // TODO(bkietz) implement less wimpy hashing when these have ValueType
-  Status Visit(const UnionScalar& s) { return Status::OK(); }
+  Status Visit(const UnionScalar& s) {
+    // type_code is ignored when comparing for equality, so do not hash it either
+    AccumulateHashFrom(*s.value);
+    return Status::OK();
+  }
+
+  // TODO(bkietz) implement less wimpy hashing when this has ValueType
   Status Visit(const ExtensionScalar& s) { return Status::OK(); }
 
   template <typename T>
@@ -145,6 +152,8 @@ struct ScalarHashImpl {
   size_t hash_;
 };
 
+}  // namespace
+
 size_t Scalar::hash() const { return ScalarHashImpl(*this).hash_; }
 
 StringScalar::StringScalar(std::string s)
@@ -283,6 +292,8 @@ std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar>
                                             std::move(type));
 }
 
+namespace {
+
 template <typename T>
 using scalar_constructor_has_arrow_type =
     std::is_constructible<typename TypeTraits<T>::ScalarType, std::shared_ptr<DataType>>;
@@ -308,6 +319,19 @@ struct MakeNullImpl {
     return Status::OK();
   }
 
+  Status Visit(const SparseUnionType& type) { return MakeUnionScalar(type); }
+
+  Status Visit(const DenseUnionType& type) { return MakeUnionScalar(type); }
+
+  template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
+  Status MakeUnionScalar(const T& type) {
+    if (type.num_fields() == 0) {
+      return Status::Invalid("Cannot make scalar of empty union type");
+    }
+    out_ = std::make_shared<ScalarType>(type.type_codes()[0], type_);
+    return Status::OK();
+  }
+
   std::shared_ptr<Scalar> Finish() && {
     // Should not fail.
     DCHECK_OK(VisitTypeInline(*type_, this));
@@ -318,6 +342,8 @@ struct MakeNullImpl {
   std::shared_ptr<Scalar> out_;
 };
 
+}  // namespace
+
 std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type) {
   return MakeNullImpl{std::move(type), nullptr}.Finish();
 }
@@ -562,6 +588,19 @@ Status CastImpl(const Decimal256Scalar& from, StringScalar* to) {
   return Status::OK();
 }
 
+Status CastImpl(const StructScalar& from, StringScalar* to) {
+  std::stringstream ss;
+  ss << '{';
+  for (int i = 0; static_cast<size_t>(i) < from.value.size(); i++) {
+    if (i > 0) ss << ", ";
+    ss << from.type->field(i)->name() << ':' << from.type->field(i)->type()->ToString()
+       << " = " << from.value[i]->ToString();
+  }
+  ss << '}';
+  to->value = Buffer::FromString(ss.str());
+  return Status::OK();
+}
+
 struct CastImplVisitor {
   Status NotImplemented() {
     return Status::NotImplemented("cast to ", *to_type_, " from ", *from_.type);
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 24744859686..68d8c245b1d 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -420,10 +420,15 @@ struct ARROW_EXPORT StructScalar : public Scalar {
 struct ARROW_EXPORT UnionScalar : public Scalar {
   using Scalar::Scalar;
   using ValueType = std::shared_ptr<Scalar>;
+
   ValueType value;
+  int8_t type_code;
 
-  UnionScalar(ValueType value, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)) {}
+  UnionScalar(int8_t type_code, std::shared_ptr<DataType> type)
+      : Scalar(std::move(type), false), type_code(type_code) {}
+
+  UnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
+      : Scalar(std::move(type), true), value(std::move(value)), type_code(type_code) {}
 };
 
 struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index d99debb2ba9..eeb98a2fb72 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -930,117 +930,134 @@ TEST(TestDictionaryScalar, Cast) {
   }
 }
 
-TEST(TestSparseUnionScalar, Basics) {
-  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+void CheckGetValidUnionScalar(const Array& arr, int64_t index, const Scalar& expected,
+                              const Scalar& expected_value) {
+  ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
+  ASSERT_TRUE(scalar->Equals(expected));
+
+  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
+  ASSERT_TRUE(as_union.is_valid);
+  ASSERT_TRUE(as_union.value->Equals(expected_value));
+}
 
-  auto alpha = MakeScalar("alpha");
-  auto beta = MakeScalar("beta");
-  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+void CheckGetNullUnionScalar(const Array& arr, int64_t index) {
+  ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
+  ASSERT_TRUE(scalar->Equals(MakeNullScalar(arr.type())));
 
-  auto scalar_alpha = SparseUnionScalar(alpha, ty);
-  auto scalar_beta = SparseUnionScalar(beta, ty);
-  auto scalar_two = SparseUnionScalar(two, ty);
+  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
+  ASSERT_FALSE(as_union.is_valid);
+  // XXX in reality, the union array doesn't have a validity bitmap.
+  // Validity is inferred from the underlying child value, which should maybe
+  // be reflected here...
+  ASSERT_EQ(as_union.value, nullptr);
+}
 
-  // test Array.GetScalar
-  std::vector<std::shared_ptr<Array>> children{
-      ArrayFromJSON(utf8(), R"(["alpha", "", "beta", null, "gamma"])"),
-      ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+template <typename Type>
+class TestUnionScalar : public ::testing::Test {
+ public:
+  using UnionType = Type;
+  using ScalarType = typename TypeTraits<UnionType>::ScalarType;
 
-  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
-  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
-  ASSERT_OK(arr.ValidateFull());
+  void SetUp() {
+    type_.reset(new UnionType({field("string", utf8()), field("number", uint64()),
+                               field("other_number", uint64())},
+                              /*type_codes=*/{3, 42, 43}));
+    alpha_ = MakeScalar("alpha");
+    beta_ = MakeScalar("beta");
+    ASSERT_OK_AND_ASSIGN(two_, MakeScalar(uint64(), 2));
+    ASSERT_OK_AND_ASSIGN(three_, MakeScalar(uint64(), 3));
+
+    union_alpha_ = std::make_shared<ScalarType>(alpha_, 3, type_);
+    union_beta_ = std::make_shared<ScalarType>(beta_, 3, type_);
+    union_two_ = std::make_shared<ScalarType>(two_, 42, type_);
+    union_other_two_ = std::make_shared<ScalarType>(two_, 43, type_);
+    union_three_ = std::make_shared<ScalarType>(three_, 42, type_);
+    union_string_null_ = MakeSpecificNullScalar(3);
+    union_number_null_ = MakeSpecificNullScalar(42);
+  }
 
-  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
-  ASSERT_TRUE(first->Equals(scalar_alpha));
+  void TestEquals() {
+    // Differing values
+    ASSERT_FALSE(union_alpha_->Equals(union_beta_));
+    ASSERT_FALSE(union_two_->Equals(union_three_));
+    // Differing validities
+    ASSERT_FALSE(union_alpha_->Equals(union_string_null_));
+    // Differing types
+    ASSERT_FALSE(union_alpha_->Equals(union_two_));
+    ASSERT_FALSE(union_alpha_->Equals(union_other_two_));
+    // Type codes don't count when comparing union scalars: the underlying values
+    // are identical even though their provenance is different.
+    ASSERT_TRUE(union_two_->Equals(union_other_two_));
+    ASSERT_TRUE(union_string_null_->Equals(union_number_null_));
+  }
 
-  const auto& first_as_union = checked_cast<const SparseUnionScalar&>(*first);
-  ASSERT_TRUE(first_as_union.is_valid);
-  ASSERT_TRUE(first_as_union.value->Equals(alpha));
+  void TestMakeNullScalar() {
+    const auto scalar = MakeNullScalar(type_);
+    const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
+    AssertTypeEqual(type_, as_union.type);
+    ASSERT_FALSE(as_union.is_valid);
+    ASSERT_EQ(as_union.value, nullptr);
+    // Abstractly, the type code must be valid.
+    // Concretely, the first child field is chosen.
+    ASSERT_EQ(as_union.type_code, 3);
+  }
 
-  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
-  ASSERT_TRUE(second->Equals(scalar_two));
+ protected:
+  std::shared_ptr<Scalar> MakeSpecificNullScalar(int8_t type_code) {
+    auto scal = MakeNullScalar(type_);
+    checked_cast<UnionScalar*>(scal.get())->type_code = type_code;
+    return scal;
+  }
 
-  const auto& second_as_union = checked_cast<const SparseUnionScalar&>(*second);
-  ASSERT_TRUE(second_as_union.is_valid);
-  ASSERT_TRUE(second_as_union.value->Equals(two));
+  std::shared_ptr<DataType> type_;
+  std::shared_ptr<Scalar> alpha_, beta_, two_, three_;
+  std::shared_ptr<Scalar> union_alpha_, union_beta_, union_two_, union_three_,
+      union_other_two_, union_string_null_, union_number_null_;
+};
 
-  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
-  ASSERT_TRUE(third->Equals(scalar_beta));
+class TestSparseUnionScalar : public TestUnionScalar<SparseUnionType> {};
 
-  const auto& third_as_union = checked_cast<const SparseUnionScalar&>(*third);
-  ASSERT_TRUE(third_as_union.is_valid);
-  ASSERT_TRUE(third_as_union.value->Equals(beta));
+TEST_F(TestSparseUnionScalar, Equals) { this->TestEquals(); }
 
-  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
-  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));
+TEST_F(TestSparseUnionScalar, MakeNullScalar) { this->TestMakeNullScalar(); }
 
-  const auto& fourth_as_union = checked_cast<const SparseUnionScalar&>(*fourth);
-  ASSERT_FALSE(fourth_as_union.is_valid);
+TEST_F(TestSparseUnionScalar, GetScalar) {
+  ArrayVector children{ArrayFromJSON(utf8(), R"(["alpha", "", "beta", null, "gamma"])"),
+                       ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]"),
+                       ArrayFromJSON(uint64(), "[100, 101, 102, 103, 104]")};
 
-  ASSERT_OK_AND_ASSIGN(auto fifth, arr.GetScalar(4));
-  ASSERT_TRUE(fifth->Equals(MakeNullScalar(ty)));
+  auto type_ids = ArrayFromJSON(int8(), "[3, 42, 3, 3, 42]");
+  SparseUnionArray arr(type_, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
 
-  const auto& fifth_as_union = checked_cast<const SparseUnionScalar&>(*fifth);
-  ASSERT_FALSE(fifth_as_union.is_valid);
+  CheckGetValidUnionScalar(arr, 0, *union_alpha_, *alpha_);
+  CheckGetValidUnionScalar(arr, 1, *union_two_, *two_);
+  CheckGetValidUnionScalar(arr, 2, *union_beta_, *beta_);
+  CheckGetNullUnionScalar(arr, 3);
+  CheckGetNullUnionScalar(arr, 4);
 }
 
-TEST(TestDenseUnionScalar, Basics) {
-  auto ty = dense_union({field("string", utf8()), field("number", uint64())});
+class TestDenseUnionScalar : public TestUnionScalar<DenseUnionType> {};
 
-  auto alpha = MakeScalar("alpha");
-  auto beta = MakeScalar("beta");
-  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
-  ASSERT_OK_AND_ASSIGN(auto three, MakeScalar(uint64(), 3));
+TEST_F(TestDenseUnionScalar, Equals) { this->TestEquals(); }
 
-  auto scalar_alpha = DenseUnionScalar(alpha, ty);
-  auto scalar_beta = DenseUnionScalar(beta, ty);
-  auto scalar_two = DenseUnionScalar(two, ty);
-  auto scalar_three = DenseUnionScalar(three, ty);
+TEST_F(TestDenseUnionScalar, MakeNullScalar) { this->TestMakeNullScalar(); }
 
-  // test Array.GetScalar
-  std::vector<std::shared_ptr<Array>> children = {
-      ArrayFromJSON(utf8(), R"(["alpha", "beta", null])"),
-      ArrayFromJSON(uint64(), "[2, 3]")};
+TEST_F(TestDenseUnionScalar, GetScalar) {
+  ArrayVector children{ArrayFromJSON(utf8(), R"(["alpha", "beta", null])"),
+                       ArrayFromJSON(uint64(), "[2, 3]"), ArrayFromJSON(uint64(), "[]")};
 
-  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  auto type_ids = ArrayFromJSON(int8(), "[3, 42, 3, 3, 42]");
   auto offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 1]");
-  DenseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1],
+  DenseUnionArray arr(type_, 5, children, type_ids->data()->buffers[1],
                       offsets->data()->buffers[1]);
   ASSERT_OK(arr.ValidateFull());
 
-  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
-  ASSERT_TRUE(first->Equals(scalar_alpha));
-
-  const auto& first_as_union = checked_cast<const DenseUnionScalar&>(*first);
-  ASSERT_TRUE(first_as_union.value->Equals(alpha));
-  ASSERT_TRUE(first_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
-  ASSERT_TRUE(second->Equals(scalar_two));
-
-  const auto& second_as_union = checked_cast<const DenseUnionScalar&>(*second);
-  ASSERT_TRUE(second_as_union.value->Equals(two));
-  ASSERT_TRUE(second_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
-  ASSERT_TRUE(third->Equals(scalar_beta));
-
-  const auto& third_as_union = checked_cast<const DenseUnionScalar&>(*third);
-  ASSERT_TRUE(third_as_union.value->Equals(beta));
-  ASSERT_TRUE(third_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
-  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));
-
-  const auto& fourth_as_union = checked_cast<const DenseUnionScalar&>(*fourth);
-  ASSERT_FALSE(fourth_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto fifth, arr.GetScalar(4));
-  ASSERT_TRUE(fifth->Equals(scalar_three));
-
-  const auto& fifth_as_union = checked_cast<const DenseUnionScalar&>(*fifth);
-  ASSERT_TRUE(fifth_as_union.value->Equals(three));
-  ASSERT_TRUE(fifth_as_union.is_valid);
+  CheckGetValidUnionScalar(arr, 0, *union_alpha_, *alpha_);
+  CheckGetValidUnionScalar(arr, 1, *union_two_, *two_);
+  CheckGetValidUnionScalar(arr, 2, *union_beta_, *beta_);
+  CheckGetNullUnionScalar(arr, 3);
+  CheckGetValidUnionScalar(arr, 4, *union_three_, *three_);
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 1f2f8c0d82e..4ec824dfa7d 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -508,13 +508,6 @@ class ARROW_EXPORT SparseTensor {
     return ToTensor(default_memory_pool());
   }
 
-  /// \brief Status-return version of ToTensor().
-  ARROW_DEPRECATED("Use Result-returning version")
-  Status ToTensor(std::shared_ptr<Tensor>* out) const { return ToTensor().Value(out); }
-  Status ToTensor(MemoryPool* pool, std::shared_ptr<Tensor>* out) const {
-    return ToTensor(pool).Value(out);
-  }
-
  protected:
   // Constructor with all attributes
   SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 43879e6c6a3..056d60d6f32 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -312,7 +312,10 @@ class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable<
   StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
 
   /// \brief Return the specific error message attached to this status.
-  std::string message() const { return ok() ? "" : state_->msg; }
+  const std::string& message() const {
+    static const std::string no_message = "";
+    return ok() ? no_message : state_->msg;
+  }
 
   /// \brief Return the status detail attached to this message.
   const std::shared_ptr<StatusDetail>& detail() const {
@@ -440,7 +443,7 @@ namespace internal {
 
 // Extract Status from Status or Result<T>
 // Useful for the status check macros such as RETURN_NOT_OK.
-inline Status GenericToStatus(const Status& st) { return st; }
+inline const Status& GenericToStatus(const Status& st) { return st; }
 inline Status GenericToStatus(Status&& st) { return std::move(st); }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/status_test.cc b/cpp/src/arrow/status_test.cc
index fc5a7ec45cf..10a79d9b990 100644
--- a/cpp/src/arrow/status_test.cc
+++ b/cpp/src/arrow/status_test.cc
@@ -17,9 +17,12 @@
 
 #include <sstream>
 
+#include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 
 #include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 
 namespace arrow {
 
@@ -114,6 +117,85 @@ TEST(StatusTest, TestEquality) {
   ASSERT_NE(Status::Invalid("error"), Status::Invalid("other error"));
 }
 
+TEST(StatusTest, MatcherExamples) {
+  EXPECT_THAT(Status::Invalid("arbitrary error"), Raises(StatusCode::Invalid));
+
+  EXPECT_THAT(Status::Invalid("arbitrary error"),
+              Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary")));
+
+  // message doesn't match, so no match
+  EXPECT_THAT(
+      Status::Invalid("arbitrary error"),
+      testing::Not(Raises(StatusCode::Invalid, testing::HasSubstr("reasonable"))));
+
+  // different error code, so no match
+  EXPECT_THAT(Status::TypeError("arbitrary error"),
+              testing::Not(Raises(StatusCode::Invalid)));
+
+  // not an error, so no match
+  EXPECT_THAT(Status::OK(), testing::Not(Raises(StatusCode::Invalid)));
+}
+
+TEST(StatusTest, MatcherDescriptions) {
+  testing::Matcher<Status> matcher = Raises(StatusCode::Invalid);
+
+  {
+    std::stringstream ss;
+    matcher.DescribeTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("raises StatusCode::Invalid"));
+  }
+
+  {
+    std::stringstream ss;
+    matcher.DescribeNegationTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("does not raise StatusCode::Invalid"));
+  }
+}
+
+TEST(StatusTest, MessageMatcherDescriptions) {
+  testing::Matcher<Status> matcher =
+      Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary"));
+
+  {
+    std::stringstream ss;
+    matcher.DescribeTo(&ss);
+    EXPECT_THAT(
+        ss.str(),
+        testing::StrEq(
+            "raises StatusCode::Invalid and message has substring \"arbitrary\""));
+  }
+
+  {
+    std::stringstream ss;
+    matcher.DescribeNegationTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("does not raise StatusCode::Invalid or message "
+                                         "has no substring \"arbitrary\""));
+  }
+}
+
+TEST(StatusTest, MatcherExplanations) {
+  testing::Matcher<Status> matcher = Raises(StatusCode::Invalid);
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_TRUE(matcher.MatchAndExplain(Status::Invalid("XXX"), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"Invalid: XXX\" matches"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Status::OK(), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"OK\" doesn't match"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Status::TypeError("XXX"), &listener));
+    EXPECT_THAT(listener.str(),
+                testing::StrEq("whose value \"Type error: XXX\" doesn't match"));
+  }
+}
+
 TEST(StatusTest, TestDetailEquality) {
   const auto status_with_detail =
       arrow::Status(StatusCode::IOError, "", std::make_shared<TestStatusDetail>());
diff --git a/cpp/src/arrow/stl.h b/cpp/src/arrow/stl.h
index 146c5706766..a1582ed2967 100644
--- a/cpp/src/arrow/stl.h
+++ b/cpp/src/arrow/stl.h
@@ -424,7 +424,7 @@ Status TableFromTupleRange(MemoryPool* pool, Range&& rows,
     arrays.emplace_back(array);
   }
 
-  *table = Table::Make(schema, arrays);
+  *table = Table::Make(std::move(schema), std::move(arrays));
 
   return Status::OK();
 }
diff --git a/cpp/src/arrow/stl_iterator.h b/cpp/src/arrow/stl_iterator.h
index c8c2bb15e07..6225a89aae4 100644
--- a/cpp/src/arrow/stl_iterator.h
+++ b/cpp/src/arrow/stl_iterator.h
@@ -47,6 +47,8 @@ class ArrayIterator {
  public:
   using value_type = arrow::util::optional<typename ValueAccessor::ValueType>;
   using difference_type = int64_t;
+  using pointer = value_type*;
+  using reference = value_type&;
   using iterator_category = std::random_access_iterator_tag;
 
   // Some algorithms need to default-construct an iterator
@@ -136,6 +138,8 @@ struct iterator_traits<::arrow::stl::ArrayIterator<ArrayType>> {
   using IteratorType = ::arrow::stl::ArrayIterator<ArrayType>;
   using difference_type = typename IteratorType::difference_type;
   using value_type = typename IteratorType::value_type;
+  using pointer = typename IteratorType::pointer;
+  using reference = typename IteratorType::reference;
   using iterator_category = typename IteratorType::iterator_category;
 };
 
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 02ba754ca69..d4c7802c834 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -92,6 +92,10 @@ class SimpleTable : public Table {
 
   std::shared_ptr<ChunkedArray> column(int i) const override { return columns_[i]; }
 
+  const std::vector<std::shared_ptr<ChunkedArray>>& columns() const override {
+    return columns_;
+  }
+
   std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const override {
     auto sliced = columns_;
     int64_t num_rows = length;
@@ -99,13 +103,13 @@ class SimpleTable : public Table {
       column = column->Slice(offset, length);
       num_rows = column->length();
     }
-    return Table::Make(schema_, sliced, num_rows);
+    return Table::Make(schema_, std::move(sliced), num_rows);
   }
 
   Result<std::shared_ptr<Table>> RemoveColumn(int i) const override {
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
 
-    return Table::Make(new_schema, internal::DeleteVectorElement(columns_, i),
+    return Table::Make(std::move(new_schema), internal::DeleteVectorElement(columns_, i),
                        this->num_rows());
   }
 
@@ -125,8 +129,7 @@ class SimpleTable : public Table {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field_arg));
-
-    return Table::Make(new_schema,
+    return Table::Make(std::move(new_schema),
                        internal::AddVectorElement(columns_, i, std::move(col)));
   }
 
@@ -146,14 +149,14 @@ class SimpleTable : public Table {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field_arg));
-    return Table::Make(new_schema,
+    return Table::Make(std::move(new_schema),
                        internal::ReplaceVectorElement(columns_, i, std::move(col)));
   }
 
   std::shared_ptr<Table> ReplaceSchemaMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
     auto new_schema = schema_->WithMetadata(metadata);
-    return Table::Make(new_schema, columns_);
+    return Table::Make(std::move(new_schema), columns_);
   }
 
   Result<std::shared_ptr<Table>> Flatten(MemoryPool* pool) const override {
@@ -243,14 +246,6 @@ class SimpleTable : public Table {
 
 Table::Table() : num_rows_(0) {}
 
-std::vector<std::shared_ptr<ChunkedArray>> Table::columns() const {
-  std::vector<std::shared_ptr<ChunkedArray>> result;
-  for (int i = 0; i < this->num_columns(); ++i) {
-    result.emplace_back(this->column(i));
-  }
-  return result;
-}
-
 std::vector<std::shared_ptr<Field>> Table::fields() const {
   std::vector<std::shared_ptr<Field>> result;
   for (int i = 0; i < this->num_columns(); ++i) {
@@ -379,7 +374,7 @@ Result<std::shared_ptr<Table>> Table::SelectColumns(
 
   auto new_schema =
       std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
-  return Table::Make(new_schema, std::move(columns), num_rows());
+  return Table::Make(std::move(new_schema), std::move(columns), num_rows());
 }
 
 std::string Table::ToString() const {
@@ -440,7 +435,7 @@ Result<std::shared_ptr<Table>> ConcatenateTables(
     }
     columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
   }
-  return Table::Make(schema, columns);
+  return Table::Make(std::move(schema), std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> PromoteTableToSchema(const std::shared_ptr<Table>& table,
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index c547019c989..f1e5f23eed8 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -98,7 +98,7 @@ class ARROW_EXPORT Table {
   virtual std::shared_ptr<ChunkedArray> column(int i) const = 0;
 
   /// \brief Return vector of all columns for table
-  std::vector<std::shared_ptr<ChunkedArray>> columns() const;
+  virtual const std::vector<std::shared_ptr<ChunkedArray>>& columns() const = 0;
 
   /// Return a column's field by index
   std::shared_ptr<Field> field(int i) const { return schema_->field(i); }
@@ -151,7 +151,7 @@ class ARROW_EXPORT Table {
   /// \brief Return new table with specified columns
   Result<std::shared_ptr<Table>> SelectColumns(const std::vector<int>& indices) const;
 
-  /// \brief Replace schema key-value metadata with new metadata (EXPERIMENTAL)
+  /// \brief Replace schema key-value metadata with new metadata
   /// \since 0.5.0
   ///
   /// \param[in] metadata new KeyValueMetadata
diff --git a/cpp/src/arrow/table_builder.cc b/cpp/src/arrow/table_builder.cc
index 78034c92868..c026c355758 100644
--- a/cpp/src/arrow/table_builder.cc
+++ b/cpp/src/arrow/table_builder.cc
@@ -74,9 +74,9 @@ Status RecordBatchBuilder::Flush(bool reset_builders,
     }
   }
   std::shared_ptr<Schema> schema =
-      std::make_shared<Schema>(schema_fields, schema_->metadata());
+      std::make_shared<Schema>(std::move(schema_fields), schema_->metadata());
 
-  *batch = RecordBatch::Make(schema, length, std::move(fields));
+  *batch = RecordBatch::Make(std::move(schema), length, std::move(fields));
   if (reset_builders) {
     return InitBuilders();
   } else {
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index 91e9ad26066..ff6f3735f91 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -152,10 +152,6 @@ class ARROW_EXPORT Tensor {
   /// Compute the number of non-zero values in the tensor
   Result<int64_t> CountNonZero() const;
 
-  /// Compute the number of non-zero values in the tensor
-  ARROW_DEPRECATED("Use Result-returning version")
-  Status CountNonZero(int64_t* result) const { return CountNonZero().Value(result); }
-
   /// Return the offset of the given index on the given strides
   static int64_t CalculateValueOffset(const std::vector<int64_t>& strides,
                                       const std::vector<int64_t>& index) {
diff --git a/cpp/src/arrow/testing/executor_util.h b/cpp/src/arrow/testing/executor_util.h
new file mode 100644
index 00000000000..e34fc858d07
--- /dev/null
+++ b/cpp/src/arrow/testing/executor_util.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+/// An executor which synchronously runs the task as part of the SpawnReal call.
+class MockExecutor : public internal::Executor {
+ public:
+  int GetCapacity() override { return 0; }
+
+  Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
+                   StopCallback&&) override {
+    spawn_count++;
+    std::move(task)();
+    return Status::OK();
+  }
+
+  int spawn_count = 0;
+};
+
+/// An executor which does not actually run the task.  Can be used to simulate situations
+/// where the executor schedules a task in a long queue and doesn't get around to running
+/// it for a while
+class DelayedExecutor : public internal::Executor {
+ public:
+  int GetCapacity() override { return 0; }
+
+  Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
+                   StopCallback&&) override {
+    captured_tasks.push_back(std::move(task));
+    return Status::OK();
+  }
+
+  std::vector<internal::FnOnce<void()>> captured_tasks;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/testing/future_util.h b/cpp/src/arrow/testing/future_util.h
index 44fa78c375c..2ca70d05402 100644
--- a/cpp/src/arrow/testing/future_util.h
+++ b/cpp/src/arrow/testing/future_util.h
@@ -21,21 +21,21 @@
 #include "arrow/util/future.h"
 
 // This macro should be called by futures that are expected to
-// complete pretty quickly.  2 seconds is the default max wait
-// here.  Anything longer than that and it's a questionable
-// unit test anyways.
-#define ASSERT_FINISHES_IMPL(fut)                            \
-  do {                                                       \
-    ASSERT_TRUE(fut.Wait(300));                              \
-    if (!fut.is_finished()) {                                \
-      FAIL() << "Future did not finish in a timely fashion"; \
-    }                                                        \
+// complete pretty quickly.  arrow::kDefaultAssertFinishesWaitSeconds is the
+// default max wait here.  Anything longer than that and it's a questionable unit test
+// anyways.
+#define ASSERT_FINISHES_IMPL(fut)                                      \
+  do {                                                                 \
+    ASSERT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
+    if (!fut.is_finished()) {                                          \
+      FAIL() << "Future did not finish in a timely fashion";           \
+    }                                                                  \
   } while (false)
 
 #define ASSERT_FINISHES_OK(expr)                                              \
   do {                                                                        \
     auto&& _fut = (expr);                                                     \
-    ASSERT_TRUE(_fut.Wait(300));                                              \
+    ASSERT_TRUE(_fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds));       \
     if (!_fut.is_finished()) {                                                \
       FAIL() << "Future did not finish in a timely fashion";                  \
     }                                                                         \
@@ -47,15 +47,22 @@
 
 #define ASSERT_FINISHES_AND_RAISES(ENUM, expr) \
   do {                                         \
-    auto&& fut = (expr);                       \
-    ASSERT_FINISHES_IMPL(fut);                 \
-    ASSERT_RAISES(ENUM, fut.status());         \
+    auto&& _fut = (expr);                      \
+    ASSERT_FINISHES_IMPL(_fut);                \
+    ASSERT_RAISES(ENUM, _fut.status());        \
   } while (false)
 
-#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, future_name) \
-  auto future_name = (rexpr);                                       \
-  ASSERT_FINISHES_IMPL(future_name);                                \
-  ASSERT_OK_AND_ASSIGN(lhs, future_name.result());
+#define EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr) \
+  do {                                                                    \
+    auto&& fut = (expr);                                                  \
+    ASSERT_FINISHES_IMPL(fut);                                            \
+    EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, fut.status());         \
+  } while (false)
+
+#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, _future_name) \
+  auto _future_name = (rexpr);                                       \
+  ASSERT_FINISHES_IMPL(_future_name);                                \
+  ASSERT_OK_AND_ASSIGN(lhs, _future_name.result());
 
 #define ASSERT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
   ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr,  \
@@ -67,8 +74,39 @@
     ASSERT_EQ(expected, _actual);                        \
   } while (0)
 
+#define EXPECT_FINISHES_IMPL(fut)                                      \
+  do {                                                                 \
+    EXPECT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
+    if (!fut.is_finished()) {                                          \
+      ADD_FAILURE() << "Future did not finish in a timely fashion";    \
+    }                                                                  \
+  } while (false)
+
+#define ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, future_name, lhs, rexpr) \
+  auto future_name = (rexpr);                                                        \
+  EXPECT_FINISHES_IMPL(future_name);                                                 \
+  handle_error(future_name.status());                                                \
+  EXPECT_OK_AND_ASSIGN(lhs, future_name.result());
+
+#define EXPECT_FINISHES(expr)   \
+  do {                          \
+    EXPECT_FINISHES_IMPL(expr); \
+  } while (0)
+
+#define EXPECT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
+  ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(          \
+      ARROW_EXPECT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__), lhs, rexpr);
+
+#define EXPECT_FINISHES_OK_AND_EQ(expected, expr)        \
+  do {                                                   \
+    EXPECT_FINISHES_OK_AND_ASSIGN(auto _actual, (expr)); \
+    EXPECT_EQ(expected, _actual);                        \
+  } while (0)
+
 namespace arrow {
 
+constexpr double kDefaultAssertFinishesWaitSeconds = 64;
+
 template <typename T>
 void AssertNotFinished(const Future<T>& fut) {
   ASSERT_FALSE(IsFutureFinished(fut.state()));
diff --git a/cpp/src/arrow/testing/generator.cc b/cpp/src/arrow/testing/generator.cc
index 71fad394d00..33371d55c6d 100644
--- a/cpp/src/arrow/testing/generator.cc
+++ b/cpp/src/arrow/testing/generator.cc
@@ -95,88 +95,16 @@ std::shared_ptr<arrow::Array> ConstantArrayGenerator::String(int64_t size,
   return ConstantArray<StringType>(size, value);
 }
 
-struct ScalarVectorToArrayImpl {
-  template <typename T, typename AppendScalar,
-            typename BuilderType = typename TypeTraits<T>::BuilderType,
-            typename ScalarType = typename TypeTraits<T>::ScalarType>
-  Status UseBuilder(const AppendScalar& append) {
-    BuilderType builder(type_, default_memory_pool());
-    for (const auto& s : scalars_) {
-      if (s->is_valid) {
-        RETURN_NOT_OK(append(internal::checked_cast<const ScalarType&>(*s), &builder));
-      } else {
-        RETURN_NOT_OK(builder.AppendNull());
-      }
-    }
-    return builder.FinishInternal(&data_);
-  }
-
-  struct AppendValue {
-    template <typename BuilderType, typename ScalarType>
-    Status operator()(const ScalarType& s, BuilderType* builder) const {
-      return builder->Append(s.value);
-    }
-  };
-
-  struct AppendBuffer {
-    template <typename BuilderType, typename ScalarType>
-    Status operator()(const ScalarType& s, BuilderType* builder) const {
-      const Buffer& buffer = *s.value;
-      return builder->Append(util::string_view{buffer});
-    }
-  };
-
-  template <typename T>
-  enable_if_primitive_ctype<T, Status> Visit(const T&) {
-    return UseBuilder<T>(AppendValue{});
-  }
-
-  template <typename T>
-  enable_if_has_string_view<T, Status> Visit(const T&) {
-    return UseBuilder<T>(AppendBuffer{});
-  }
-
-  Status Visit(const StructType& type) {
-    data_ = ArrayData::Make(type_, static_cast<int64_t>(scalars_.size()),
-                            {/*null_bitmap=*/nullptr});
-    data_->child_data.resize(type_->num_fields());
-
-    ScalarVector field_scalars(scalars_.size());
-
-    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
-      for (size_t i = 0; i < scalars_.size(); ++i) {
-        field_scalars[i] =
-            internal::checked_cast<StructScalar*>(scalars_[i].get())->value[field_index];
-      }
-
-      ARROW_ASSIGN_OR_RAISE(data_->child_data[field_index],
-                            ScalarVectorToArrayImpl{}.Convert(field_scalars));
-    }
-    return Status::OK();
-  }
-
-  Status Visit(const DataType& type) {
-    return Status::NotImplemented("ScalarVectorToArray for type ", type);
-  }
-
-  Result<std::shared_ptr<ArrayData>> Convert(const ScalarVector& scalars) && {
-    if (scalars.size() == 0) {
-      return Status::NotImplemented("ScalarVectorToArray with no scalars");
-    }
-    scalars_ = std::move(scalars);
-    type_ = scalars_[0]->type;
-    RETURN_NOT_OK(VisitTypeInline(*type_, this));
-    return std::move(data_);
-  }
-
-  std::shared_ptr<DataType> type_;
-  ScalarVector scalars_;
-  std::shared_ptr<ArrayData> data_;
-};
-
 Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector& scalars) {
-  ARROW_ASSIGN_OR_RAISE(auto data, ScalarVectorToArrayImpl{}.Convert(scalars));
-  return MakeArray(std::move(data));
+  if (scalars.empty()) {
+    return Status::NotImplemented("ScalarVectorToArray with no scalars");
+  }
+  std::unique_ptr<arrow::ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), scalars[0]->type, &builder));
+  RETURN_NOT_OK(builder->AppendScalars(scalars));
+  std::shared_ptr<Array> out;
+  RETURN_NOT_OK(builder->Finish(&out));
+  return out;
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 67343b0d86a..30cc59800f4 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -222,6 +222,20 @@ void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& a
   }
 }
 
+void AssertChunkedApproxEquivalent(const ChunkedArray& expected,
+                                   const ChunkedArray& actual,
+                                   const EqualOptions& equal_options) {
+  if (!actual.ApproxEquals(expected, equal_options)) {
+    std::stringstream pp_expected;
+    std::stringstream pp_actual;
+    ::arrow::PrettyPrintOptions options(/*indent=*/2);
+    options.window = 50;
+    ARROW_EXPECT_OK(PrettyPrint(expected, options, &pp_expected));
+    ARROW_EXPECT_OK(PrettyPrint(actual, options, &pp_actual));
+    FAIL() << "Got: \n" << pp_actual.str() << "\nExpected: \n" << pp_expected.str();
+  }
+}
+
 void AssertBufferEqual(const Buffer& buffer, const std::vector<uint8_t>& expected) {
   ASSERT_EQ(static_cast<size_t>(buffer.size()), expected.size())
       << "Mismatching buffer size";
@@ -361,6 +375,34 @@ void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose)
   }
 }
 
+void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool verbose,
+                             const EqualOptions& options) {
+  ASSERT_EQ(expected.kind(), actual.kind())
+      << "expected:" << expected.ToString() << " got:" << actual.ToString();
+
+  switch (expected.kind()) {
+    case Datum::SCALAR:
+      AssertScalarsApproxEqual(*expected.scalar(), *actual.scalar(), verbose, options);
+      break;
+    case Datum::ARRAY: {
+      auto expected_array = expected.make_array();
+      auto actual_array = actual.make_array();
+      AssertArraysApproxEqual(*expected_array, *actual_array, verbose, options);
+      break;
+    }
+    case Datum::CHUNKED_ARRAY: {
+      auto expected_array = expected.chunked_array();
+      auto actual_array = actual.chunked_array();
+      AssertChunkedApproxEquivalent(*expected_array, *actual_array, options);
+      break;
+    }
+    default:
+      // TODO: Implement better print
+      ASSERT_TRUE(actual.Equals(expected));
+      break;
+  }
+}
+
 std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>& type,
                                      util::string_view json) {
   std::shared_ptr<Array> out;
@@ -396,6 +438,13 @@ std::shared_ptr<RecordBatch> RecordBatchFromJSON(const std::shared_ptr<Schema>&
   return *RecordBatch::FromStructArray(struct_array);
 }
 
+std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>& type,
+                                       util::string_view json) {
+  std::shared_ptr<Scalar> out;
+  ABORT_NOT_OK(ipc::internal::json::ScalarFromJSON(type, json, &out));
+  return out;
+}
+
 std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>& schema,
                                      const std::vector<std::string>& json) {
   std::vector<std::shared_ptr<RecordBatch>> batches;
@@ -504,6 +553,19 @@ void ApproxCompareBatch(const RecordBatch& left, const RecordBatch& right,
       [](const Array& left, const Array& right) { return left.ApproxEquals(right); });
 }
 
+std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
+                                        int64_t index, bool validity) {
+  auto data = array->data()->Copy();
+  if (data->buffers[0] == nullptr) {
+    data->buffers[0] = *AllocateBitmap(data->length);
+    BitUtil::SetBitsTo(data->buffers[0]->mutable_data(), 0, data->length, true);
+  }
+  BitUtil::SetBitTo(data->buffers[0]->mutable_data(), index, validity);
+  data->null_count = kUnknownNullCount;
+  // Need to return a new array, because Array caches the null bitmap pointer
+  return MakeArray(data);
+}
+
 class LocaleGuard::Impl {
  public:
   explicit Impl(const char* new_locale) : global_locale_(std::locale()) {
@@ -581,17 +643,34 @@ void AssertZeroPadded(const Array& array) {
   }
 }
 
-void TestInitialized(const Array& array) {
-  for (const auto& buffer : array.data()->buffers) {
+void TestInitialized(const Array& array) { TestInitialized(*array.data()); }
+
+void TestInitialized(const ArrayData& array) {
+  uint8_t total = 0;
+  for (const auto& buffer : array.buffers) {
     if (buffer && buffer->capacity() > 0) {
-      int total = 0;
       auto data = buffer->data();
       for (int64_t i = 0; i < buffer->size(); ++i) {
         total ^= data[i];
       }
-      throw_away = total;
     }
   }
+  uint8_t total_bit = 0;
+  for (uint32_t mask = 1; mask < 256; mask <<= 1) {
+    total_bit ^= (total & mask) != 0;
+  }
+  // This is a dummy condition on all the bits of `total` (which depend on the
+  // entire buffer data).  If not all bits are well-defined, Valgrind will
+  // error with "Conditional jump or move depends on uninitialised value(s)".
+  if (total_bit == 0) {
+    ++throw_away;
+  }
+  for (const auto& child : array.child_data) {
+    TestInitialized(*child);
+  }
+  if (array.dictionary) {
+    TestInitialized(*array.dictionary);
+  }
 }
 
 void SleepFor(double seconds) {
@@ -637,7 +716,7 @@ Future<> SleepAsync(double seconds) {
   auto out = Future<>::Make();
   std::thread([out, seconds]() mutable {
     SleepFor(seconds);
-    out.MarkFinished(Status::OK());
+    out.MarkFinished();
   }).detach();
   return out;
 }
@@ -646,7 +725,7 @@ Future<> SleepABitAsync() {
   auto out = Future<>::Make();
   std::thread([out]() mutable {
     SleepABit();
-    out.MarkFinished(Status::OK());
+    out.MarkFinished();
   }).detach();
   return out;
 }
@@ -828,7 +907,7 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
   double timeout_seconds_;
   Status status_;
   bool unlocked_;
-  int num_launched_ = 0;
+  std::atomic<int> num_launched_{0};
   int num_running_ = 0;
   int num_finished_ = 0;
   std::mutex mx_;
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 0f25ac07767..591745151da 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -41,6 +41,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/string_builder.h"
 #include "arrow/util/type_fwd.h"
 
 // NOTE: failing must be inline in the macros below, to get correct file / line number
@@ -67,16 +68,13 @@
     ASSERT_EQ((message), _st.ToString());                                             \
   } while (false)
 
-#define EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr)                          \
-  do {                                                                                \
-    auto _res = (expr);                                                               \
-    ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res);                   \
-    if (!_st.Is##ENUM()) {                                                            \
-      FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
-                    ENUM) ", but got "                                                \
-             << _st.ToString();                                                       \
-    }                                                                                 \
-    EXPECT_THAT(_st.ToString(), (matcher));                                           \
+#define EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr)                             \
+  do {                                                                                   \
+    auto _res = (expr);                                                                  \
+    ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res);                      \
+    EXPECT_TRUE(_st.Is##ENUM()) << "Expected '" ARROW_STRINGIFY(expr) "' to fail with "  \
+                                << ARROW_STRINGIFY(ENUM) ", but got " << _st.ToString(); \
+    EXPECT_THAT(_st.ToString(), (matcher));                                              \
   } while (false)
 
 #define EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(code, matcher, expr) \
@@ -101,6 +99,10 @@
                           << _st.ToString();                            \
   } while (false)
 
+#define ASSERT_NOT_OK(expr)                                                         \
+  for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); _st.ok();) \
+  FAIL() << "'" ARROW_STRINGIFY(expr) "' did not failed" << _st.ToString()
+
 #define ABORT_NOT_OK(expr)                                          \
   do {                                                              \
     auto _res = (expr);                                             \
@@ -135,6 +137,11 @@
     ASSERT_EQ(expected, _actual);               \
   } while (0)
 
+// A generalized version of GTest's SCOPED_TRACE that takes arbitrary arguments.
+//   ARROW_SCOPED_TRACE("some variable = ", some_variable, ...)
+
+#define ARROW_SCOPED_TRACE(...) SCOPED_TRACE(::arrow::util::StringBuilder(__VA_ARGS__))
+
 namespace arrow {
 
 // ----------------------------------------------------------------------
@@ -198,6 +205,9 @@ ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
 // Like ChunkedEqual, but permits different chunk layout
 ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
                                                   const ChunkedArray& actual);
+ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
+    const ChunkedArray& expected, const ChunkedArray& actual,
+    const EqualOptions& equal_options = EqualOptions::Defaults());
 ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
                                             const std::vector<uint8_t>& expected);
 ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
@@ -245,6 +255,9 @@ ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table&
 
 ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
                                             bool verbose = false);
+ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
+    const Datum& expected, const Datum& actual, bool verbose = false,
+    const EqualOptions& options = EqualOptions::Defaults());
 
 template <typename C_TYPE>
 void AssertNumericDataEqual(const C_TYPE* raw_data,
@@ -268,6 +281,7 @@ ARROW_TESTING_EXPORT void AssertZeroPadded(const Array& array);
 
 // Check if the valid buffer bytes are initialized
 // and cause valgrind warnings otherwise.
+ARROW_TESTING_EXPORT void TestInitialized(const ArrayData& array);
 ARROW_TESTING_EXPORT void TestInitialized(const Array& array);
 
 template <typename BuilderType>
@@ -300,6 +314,10 @@ ARROW_TESTING_EXPORT
 std::shared_ptr<ChunkedArray> ChunkedArrayFromJSON(const std::shared_ptr<DataType>&,
                                                    const std::vector<std::string>& json);
 
+ARROW_TESTING_EXPORT
+std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>&,
+                                       util::string_view json);
+
 ARROW_TESTING_EXPORT
 std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>&,
                                      const std::vector<std::string>& json);
@@ -433,6 +451,14 @@ inline void BitmapFromVector(const std::vector<T>& is_valid,
   ASSERT_OK(GetBitmapFromVector(is_valid, out));
 }
 
+// Given an array, return a new identical array except for one validity bit
+// set to a new value.
+// This is useful to force the underlying "value" of null entries to otherwise
+// invalid data and check that errors don't get reported.
+ARROW_TESTING_EXPORT
+std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
+                                        int64_t index, bool validity);
+
 ARROW_TESTING_EXPORT
 void SleepFor(double seconds);
 
diff --git a/cpp/src/arrow/testing/matchers.h b/cpp/src/arrow/testing/matchers.h
new file mode 100644
index 00000000000..b64269ea7a1
--- /dev/null
+++ b/cpp/src/arrow/testing/matchers.h
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <gmock/gmock-matchers.h>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/testing/future_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/future.h"
+
+namespace arrow {
+
+template <typename ResultMatcher>
+class FutureMatcher {
+ public:
+  explicit FutureMatcher(ResultMatcher result_matcher, double wait_seconds)
+      : result_matcher_(std::move(result_matcher)), wait_seconds_(wait_seconds) {}
+
+  template <typename Fut,
+            typename ValueType = typename std::decay<Fut>::type::ValueType>
+  operator testing::Matcher<Fut>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Fut&> {
+      explicit Impl(const ResultMatcher& result_matcher, double wait_seconds)
+          : result_matcher_(testing::MatcherCast<Result<ValueType>>(result_matcher)),
+            wait_seconds_(wait_seconds) {}
+
+      void DescribeTo(::std::ostream* os) const override {
+        *os << "value ";
+        result_matcher_.DescribeTo(os);
+      }
+
+      void DescribeNegationTo(::std::ostream* os) const override {
+        *os << "value ";
+        result_matcher_.DescribeNegationTo(os);
+      }
+
+      bool MatchAndExplain(const Fut& fut,
+                           testing::MatchResultListener* listener) const override {
+        if (!fut.Wait(wait_seconds_)) {
+          *listener << "which didn't finish within " << wait_seconds_ << " seconds";
+          return false;
+        }
+        return result_matcher_.MatchAndExplain(fut.result(), listener);
+      }
+
+      const testing::Matcher<Result<ValueType>> result_matcher_;
+      const double wait_seconds_;
+    };
+
+    return testing::Matcher<Fut>(new Impl(result_matcher_, wait_seconds_));
+  }
+
+ private:
+  const ResultMatcher result_matcher_;
+  const double wait_seconds_;
+};
+
+template <typename ValueMatcher>
+class ResultMatcher {
+ public:
+  explicit ResultMatcher(ValueMatcher value_matcher)
+      : value_matcher_(std::move(value_matcher)) {}
+
+  template <typename Res,
+            typename ValueType = typename std::decay<Res>::type::ValueType>
+  operator testing::Matcher<Res>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Res&> {
+      explicit Impl(const ValueMatcher& value_matcher)
+          : value_matcher_(testing::MatcherCast<ValueType>(value_matcher)) {}
+
+      void DescribeTo(::std::ostream* os) const override {
+        *os << "value ";
+        value_matcher_.DescribeTo(os);
+      }
+
+      void DescribeNegationTo(::std::ostream* os) const override {
+        *os << "value ";
+        value_matcher_.DescribeNegationTo(os);
+      }
+
+      bool MatchAndExplain(const Res& maybe_value,
+                           testing::MatchResultListener* listener) const override {
+        if (!maybe_value.status().ok()) {
+          *listener << "whose error "
+                    << testing::PrintToString(maybe_value.status().ToString())
+                    << " doesn't match";
+          return false;
+        }
+        const ValueType& value = maybe_value.ValueOrDie();
+        testing::StringMatchResultListener value_listener;
+        const bool match = value_matcher_.MatchAndExplain(value, &value_listener);
+        *listener << "whose value " << testing::PrintToString(value)
+                  << (match ? " matches" : " doesn't match");
+        testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
+        return match;
+      }
+
+      const testing::Matcher<ValueType> value_matcher_;
+    };
+
+    return testing::Matcher<Res>(new Impl(value_matcher_));
+  }
+
+ private:
+  const ValueMatcher value_matcher_;
+};
+
+class ErrorMatcher {
+ public:
+  explicit ErrorMatcher(StatusCode code,
+                        util::optional<testing::Matcher<std::string>> message_matcher)
+      : code_(code), message_matcher_(std::move(message_matcher)) {}
+
+  template <typename Res>
+  operator testing::Matcher<Res>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Res&> {
+      explicit Impl(StatusCode code,
+                    util::optional<testing::Matcher<std::string>> message_matcher)
+          : code_(code), message_matcher_(std::move(message_matcher)) {}
+
+      void DescribeTo(::std::ostream* os) const override {
+        *os << "raises StatusCode::" << Status::CodeAsString(code_);
+        if (message_matcher_) {
+          *os << " and message ";
+          message_matcher_->DescribeTo(os);
+        }
+      }
+
+      void DescribeNegationTo(::std::ostream* os) const override {
+        *os << "does not raise StatusCode::" << Status::CodeAsString(code_);
+        if (message_matcher_) {
+          *os << " or message ";
+          message_matcher_->DescribeNegationTo(os);
+        }
+      }
+
+      bool MatchAndExplain(const Res& maybe_value,
+                           testing::MatchResultListener* listener) const override {
+        const Status& status = internal::GenericToStatus(maybe_value);
+        testing::StringMatchResultListener value_listener;
+
+        bool match = status.code() == code_;
+        if (message_matcher_) {
+          match = match &&
+                  message_matcher_->MatchAndExplain(status.message(), &value_listener);
+        }
+
+        *listener << "whose value " << testing::PrintToString(status.ToString())
+                  << (match ? " matches" : " doesn't match");
+        testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
+        return match;
+      }
+
+      const StatusCode code_;
+      const util::optional<testing::Matcher<std::string>> message_matcher_;
+    };
+
+    return testing::Matcher<Res>(new Impl(code_, message_matcher_));
+  }
+
+ private:
+  const StatusCode code_;
+  const util::optional<testing::Matcher<std::string>> message_matcher_;
+};
+
+class OkMatcher {
+ public:
+  template <typename Res>
+  operator testing::Matcher<Res>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Res&> {
+      void DescribeTo(::std::ostream* os) const override { *os << "is ok"; }
+
+      void DescribeNegationTo(::std::ostream* os) const override { *os << "is not ok"; }
+
+      bool MatchAndExplain(const Res& maybe_value,
+                           testing::MatchResultListener* listener) const override {
+        const Status& status = internal::GenericToStatus(maybe_value);
+        testing::StringMatchResultListener value_listener;
+
+        const bool match = status.ok();
+        *listener << "whose value " << testing::PrintToString(status.ToString())
+                  << (match ? " matches" : " doesn't match");
+        testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
+        return match;
+      }
+    };
+
+    return testing::Matcher<Res>(new Impl());
+  }
+};
+
+// Returns a matcher that waits on a Future (by default for 16 seconds)
+// then applies a matcher to the result.
+template <typename ResultMatcher>
+FutureMatcher<ResultMatcher> Finishes(
+    const ResultMatcher& result_matcher,
+    double wait_seconds = kDefaultAssertFinishesWaitSeconds) {
+  return FutureMatcher<ResultMatcher>(result_matcher, wait_seconds);
+}
+
+// Returns a matcher that matches the value of a successful Result<T>.
+template <typename ValueMatcher>
+ResultMatcher<ValueMatcher> ResultWith(const ValueMatcher& value_matcher) {
+  return ResultMatcher<ValueMatcher>(value_matcher);
+}
+
+// Returns a matcher that matches an ok Status or Result<T>.
+inline OkMatcher Ok() { return {}; }
+
+// Returns a matcher that matches the StatusCode of a Status or Result<T>.
+// Do not use Raises(StatusCode::OK) to match a non error code.
+inline ErrorMatcher Raises(StatusCode code) { return ErrorMatcher(code, util::nullopt); }
+
+// Returns a matcher that matches the StatusCode and message of a Status or Result<T>.
+template <typename MessageMatcher>
+ErrorMatcher Raises(StatusCode code, const MessageMatcher& message_matcher) {
+  return ErrorMatcher(code, testing::MatcherCast<std::string>(message_matcher));
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index 5fe2bb8b0ef..bf95ea5e051 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -17,7 +17,10 @@
 
 #include "arrow/testing/random.h"
 
+#include <gtest/gtest.h>
+
 #include <algorithm>
+#include <array>
 #include <cmath>
 #include <limits>
 #include <memory>
@@ -25,8 +28,6 @@
 #include <type_traits>
 #include <vector>
 
-#include <gtest/gtest.h>
-
 #include "arrow/array.h"
 #include "arrow/array/builder_decimal.h"
 #include "arrow/array/builder_primitive.h"
@@ -41,6 +42,7 @@
 #include "arrow/util/decimal.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/pcg_random.h"
 #include "arrow/util/value_parsing.h"
 
 namespace arrow {
@@ -79,9 +81,9 @@ struct GenerateOptions {
       GenerateTypedDataNoNan(data, n);
       return;
     }
-    std::default_random_engine rng(seed_++);
+    pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);
-    std::bernoulli_distribution nan_dist(nan_probability_);
+    ::arrow::random::bernoulli_distribution nan_dist(nan_probability_);
     const ValueType nan_value = std::numeric_limits<ValueType>::quiet_NaN();
 
     // A static cast is required due to the int16 -> int8 handling.
@@ -91,7 +93,7 @@ struct GenerateOptions {
   }
 
   void GenerateTypedDataNoNan(ValueType* data, size_t n) {
-    std::default_random_engine rng(seed_++);
+    pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);
 
     // A static cast is required due to the int16 -> int8 handling.
@@ -100,8 +102,8 @@ struct GenerateOptions {
 
   void GenerateBitmap(uint8_t* buffer, size_t n, int64_t* null_count) {
     int64_t count = 0;
-    std::default_random_engine rng(seed_++);
-    std::bernoulli_distribution dist(1.0 - probability_);
+    pcg32_fast rng(seed_++);
+    ::arrow::random::bernoulli_distribution dist(1.0 - probability_);
 
     for (size_t i = 0; i < n; i++) {
       if (dist(rng)) {
@@ -210,7 +212,8 @@ PRIMITIVE_RAND_INTEGER_IMPL(Float16, int16_t, HalfFloatType)
 std::shared_ptr<Array> RandomArrayGenerator::Float32(int64_t size, float min, float max,
                                                      double null_probability,
                                                      double nan_probability) {
-  using OptionType = GenerateOptions<float, std::uniform_real_distribution<float>>;
+  using OptionType =
+      GenerateOptions<float, ::arrow::random::uniform_real_distribution<float>>;
   OptionType options(seed(), min, max, null_probability, nan_probability);
   return GenerateNumericArray<FloatType, OptionType>(size, options);
 }
@@ -218,7 +221,8 @@ std::shared_ptr<Array> RandomArrayGenerator::Float32(int64_t size, float min, fl
 std::shared_ptr<Array> RandomArrayGenerator::Float64(int64_t size, double min, double max,
                                                      double null_probability,
                                                      double nan_probability) {
-  using OptionType = GenerateOptions<double, std::uniform_real_distribution<double>>;
+  using OptionType =
+      GenerateOptions<double, ::arrow::random::uniform_real_distribution<double>>;
   OptionType options(seed(), min, max, null_probability, nan_probability);
   return GenerateNumericArray<DoubleType, OptionType>(size, options);
 }
@@ -226,34 +230,95 @@ std::shared_ptr<Array> RandomArrayGenerator::Float64(int64_t size, double min, d
 #undef PRIMITIVE_RAND_INTEGER_IMPL
 #undef PRIMITIVE_RAND_IMPL
 
-std::shared_ptr<Array> RandomArrayGenerator::Decimal128(std::shared_ptr<DataType> type,
-                                                        int64_t size,
-                                                        double null_probability) {
-  const auto& decimal_type = checked_cast<const Decimal128Type&>(*type);
-  const auto digits = decimal_type.precision();
-  if (digits > 18) {
-    // More than 18 digits + sign don't fit in a int64_t
-    ABORT_NOT_OK(
-        Status::NotImplemented("random decimal128 generation with precision > 18"));
-  }
+namespace {
 
-  // Generate logical values as integers, then convert them
-  const auto max = static_cast<int64_t>(std::llround(std::pow(10.0, digits)) - 1);
-  const auto int_array =
-      checked_pointer_cast<Int64Array>(Int64(size, -max, max, null_probability));
+// A generic generator for random decimal arrays
+template <typename DecimalType>
+struct DecimalGenerator {
+  using DecimalBuilderType = typename TypeTraits<DecimalType>::BuilderType;
+  using DecimalValue = typename DecimalBuilderType::ValueType;
+
+  std::shared_ptr<DataType> type_;
+  RandomArrayGenerator* rng_;
+
+  static uint64_t MaxDecimalInteger(int32_t digits) {
+    // Need to decrement *after* the cast to uint64_t because, while
+    // 10**x is exactly representable in a double for x <= 19,
+    // 10**x - 1 is not.
+    return static_cast<uint64_t>(std::ceil(std::pow(10.0, digits))) - 1;
+  }
+
+  std::shared_ptr<Array> MakeRandomArray(int64_t size, double null_probability) {
+    // 10**19 fits in a 64-bit unsigned integer
+    static constexpr int32_t kMaxDigitsInInteger = 19;
+    static constexpr int kNumIntegers = DecimalType::kByteWidth / 8;
+
+    static_assert(
+        kNumIntegers ==
+            (DecimalType::kMaxPrecision + kMaxDigitsInInteger - 1) / kMaxDigitsInInteger,
+        "inconsistent decimal metadata: kMaxPrecision doesn't match kByteWidth");
+
+    // First generate separate random values for individual components:
+    // boolean sign (including null-ness), and uint64 "digits" in big endian order.
+    const auto& decimal_type = checked_cast<const DecimalType&>(*type_);
+
+    const auto sign_array = checked_pointer_cast<BooleanArray>(
+        rng_->Boolean(size, /*true_probability=*/0.5, null_probability));
+    std::array<std::shared_ptr<UInt64Array>, kNumIntegers> digit_arrays;
+
+    auto remaining_digits = decimal_type.precision();
+    for (int i = kNumIntegers - 1; i >= 0; --i) {
+      const auto digits = std::min(kMaxDigitsInInteger, remaining_digits);
+      digit_arrays[i] = checked_pointer_cast<UInt64Array>(
+          rng_->UInt64(size, 0, MaxDecimalInteger(digits)));
+      DCHECK_EQ(digit_arrays[i]->null_count(), 0);
+      remaining_digits -= digits;
+    }
 
-  Decimal128Builder builder(type);
-  ABORT_NOT_OK(builder.Reserve(size));
-  for (int64_t i = 0; i < size; ++i) {
-    if (int_array->IsValid(i)) {
-      builder.UnsafeAppend(::arrow::Decimal128(int_array->Value(i)));
-    } else {
-      builder.UnsafeAppendNull();
+    // Second compute decimal values from the individual components,
+    // building up a decimal array.
+    DecimalBuilderType builder(type_);
+    ABORT_NOT_OK(builder.Reserve(size));
+
+    const DecimalValue kDigitsMultiplier =
+        DecimalValue::GetScaleMultiplier(kMaxDigitsInInteger);
+
+    for (int64_t i = 0; i < size; ++i) {
+      if (sign_array->IsValid(i)) {
+        DecimalValue dec_value{0};
+        for (int j = 0; j < kNumIntegers; ++j) {
+          dec_value =
+              dec_value * kDigitsMultiplier + DecimalValue(digit_arrays[j]->Value(i));
+        }
+        if (sign_array->Value(i)) {
+          builder.UnsafeAppend(dec_value.Negate());
+        } else {
+          builder.UnsafeAppend(dec_value);
+        }
+      } else {
+        builder.UnsafeAppendNull();
+      }
     }
+    std::shared_ptr<Array> array;
+    ABORT_NOT_OK(builder.Finish(&array));
+    return array;
   }
-  std::shared_ptr<Array> array;
-  ABORT_NOT_OK(builder.Finish(&array));
-  return array;
+};
+
+}  // namespace
+
+std::shared_ptr<Array> RandomArrayGenerator::Decimal128(std::shared_ptr<DataType> type,
+                                                        int64_t size,
+                                                        double null_probability) {
+  DecimalGenerator<Decimal128Type> gen{type, this};
+  return gen.MakeRandomArray(size, null_probability);
+}
+
+std::shared_ptr<Array> RandomArrayGenerator::Decimal256(std::shared_ptr<DataType> type,
+                                                        int64_t size,
+                                                        double null_probability) {
+  DecimalGenerator<Decimal256Type> gen{type, this};
+  return gen.MakeRandomArray(size, null_probability);
 }
 
 template <typename TypeClass>
@@ -504,6 +569,17 @@ std::shared_ptr<Array> RandomArrayGenerator::List(const Array& values, int64_t s
   return *::arrow::ListArray::FromArrays(*offsets, values);
 }
 
+std::shared_ptr<Array> RandomArrayGenerator::Map(const std::shared_ptr<Array>& keys,
+                                                 const std::shared_ptr<Array>& items,
+                                                 int64_t size, double null_probability,
+                                                 bool force_empty_nulls) {
+  DCHECK_EQ(keys->length(), items->length());
+  auto offsets = Offsets(size + 1, static_cast<int32_t>(keys->offset()),
+                         static_cast<int32_t>(keys->offset() + keys->length()),
+                         null_probability, force_empty_nulls);
+  return *::arrow::MapArray::FromArrays(offsets, keys, items);
+}
+
 std::shared_ptr<Array> RandomArrayGenerator::SparseUnion(const ArrayVector& fields,
                                                          int64_t size) {
   DCHECK_GT(fields.size(), 0);
@@ -609,6 +685,11 @@ struct RandomArrayGeneratorOfImpl {
     return Status::OK();
   }
 
+  Status Visit(const Decimal256Type&) {
+    out_ = rag_->Decimal256(type_, size_, null_probability_);
+    return Status::OK();
+  }
+
   Status Visit(const Decimal128Type&) {
     out_ = rag_->Decimal128(type_, size_, null_probability_);
     return Status::OK();
@@ -765,7 +846,11 @@ std::shared_ptr<Array> RandomArrayGenerator::ArrayOf(const Field& field, int64_t
     }
 
     case Type::type::DECIMAL128:
+      return Decimal128(field.type(), length, null_probability);
+
     case Type::type::DECIMAL256:
+      return Decimal256(field.type(), length, null_probability);
+
     case Type::type::FIXED_SIZE_BINARY: {
       auto byte_width =
           internal::checked_pointer_cast<FixedSizeBinaryType>(field.type())->byte_width();
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 5c6b0b4ae77..e9b6e426fbc 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -25,6 +25,7 @@
 #include <random>
 #include <vector>
 
+#include "arrow/testing/uniform_real.h"
 #include "arrow/testing/visibility.h"
 #include "arrow/type.h"
 
@@ -54,7 +55,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///
   /// \param[in] size the size of the array to generate
   /// \param[in] true_probability the probability of a value being 1 / bit-set
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Boolean(int64_t size, double true_probability,
@@ -65,7 +66,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max,
@@ -76,7 +77,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max,
@@ -87,7 +88,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max,
@@ -98,7 +99,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max,
@@ -109,7 +110,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max,
@@ -120,7 +121,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max,
@@ -131,7 +132,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max,
@@ -142,7 +143,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max,
@@ -153,7 +154,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the distribution
   /// \param[in] max the upper bound of the distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max,
@@ -164,8 +165,8 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
-  /// \param[in] nan_probability the probability of a row being NaN
+  /// \param[in] null_probability the probability of a value being null
+  /// \param[in] nan_probability the probability of a value being NaN
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Float32(int64_t size, float min, float max,
@@ -176,8 +177,8 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
-  /// \param[in] nan_probability the probability of a row being NaN
+  /// \param[in] null_probability the probability of a value being null
+  /// \param[in] nan_probability the probability of a value being NaN
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Float64(int64_t size, double min, double max,
@@ -230,12 +231,23 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] type the type of the array to generate
   ///            (must be an instance of Decimal128Type)
   /// \param[in] size the size of the array to generate
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size,
                                     double null_probability = 0);
 
+  /// \brief Generate a random Decimal256Array
+  ///
+  /// \param[in] type the type of the array to generate
+  ///            (must be an instance of Decimal256Type)
+  /// \param[in] size the size of the array to generate
+  /// \param[in] null_probability the probability of a value being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<Array> Decimal256(std::shared_ptr<DataType> type, int64_t size,
+                                    double null_probability = 0);
+
   /// \brief Generate an array of offsets (for use in e.g. ListArray::FromArrays)
   ///
   /// \param[in] size the size of the array to generate
@@ -260,7 +272,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///            determined by the uniform distribution
   /// \param[in] max_length the upper bound of the string length
   ///            determined by the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length,
@@ -273,7 +285,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///            determined by the uniform distribution
   /// \param[in] max_length the upper bound of the string length
   ///            determined by the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> LargeString(int64_t size, int32_t min_length, int32_t max_length,
@@ -288,7 +300,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///            determined by the uniform distribution
   /// \param[in] max_length the upper bound of the string length
   ///            determined by the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> StringWithRepeats(int64_t size, int64_t unique,
@@ -304,7 +316,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///
   /// \param[in] size the size of the array to generate
   /// \param[in] byte_width the byte width of fixed-size binary items
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width,
@@ -321,6 +333,19 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   std::shared_ptr<Array> List(const Array& values, int64_t size, double null_probability,
                               bool force_empty_nulls = false);
 
+  /// \brief Generate a random MapArray
+  ///
+  /// \param[in] keys The underlying keys array
+  /// \param[in] items The underlying items array
+  /// \param[in] size The size of the generated map array
+  /// \param[in] null_probability the probability of a map value being null
+  /// \param[in] force_empty_nulls if true, null map entries must have 0 length
+  ///
+  /// \return a generated Array
+  std::shared_ptr<Array> Map(const std::shared_ptr<Array>& keys,
+                             const std::shared_ptr<Array>& items, int64_t size,
+                             double null_probability, bool force_empty_nulls = false);
+
   /// \brief Generate a random SparseUnionArray
   ///
   /// The type ids are chosen randomly, according to a uniform distribution,
@@ -442,7 +467,7 @@ template <typename T, typename U>
 void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
                  std::vector<U>* out) {
   std::default_random_engine gen(seed);
-  std::uniform_real_distribution<T> d(min_value, max_value);
+  ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
   out->resize(n, static_cast<T>(0));
   std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
 }
diff --git a/cpp/src/arrow/testing/random_test.cc b/cpp/src/arrow/testing/random_test.cc
index c1e5a83a31a..553028f8fb3 100644
--- a/cpp/src/arrow/testing/random_test.cc
+++ b/cpp/src/arrow/testing/random_test.cc
@@ -17,12 +17,14 @@
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
+#include "arrow/array/builder_decimal.h"
 #include "arrow/record_batch.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/key_value_metadata.h"
+#include "arrow/util/pcg_random.h"
 
 namespace arrow {
 
@@ -38,16 +40,6 @@ class RandomArrayTest : public ::testing::TestWithParam<std::shared_ptr<Field>>
   std::shared_ptr<Field> GetField() { return GetParam(); }
 };
 
-template <typename T>
-class RandomNumericArrayTest : public ::testing::Test {
- protected:
-  std::shared_ptr<Field> GetField() { return field("field0", std::make_shared<T>()); }
-
-  std::shared_ptr<NumericArray<T>> Downcast(std::shared_ptr<Array> array) {
-    return internal::checked_pointer_cast<NumericArray<T>>(array);
-  }
-};
-
 TEST_P(RandomArrayTest, GenerateArray) {
   auto field = GetField();
   auto array = GenerateArray(*field, kExpectedLength, 0xDEADBEEF);
@@ -108,7 +100,8 @@ auto values = ::testing::Values(
     field("int64", int64()), field("float16", float16()), field("float32", float32()),
     field("float64", float64()), field("string", utf8()), field("binary", binary()),
     field("fixed_size_binary", fixed_size_binary(8)),
-    field("decimal128", decimal128(8, 3)), field("decimal256", decimal256(16, 4)),
+    field("decimal128", decimal128(8, 3)), field("decimal128", decimal128(29, -5)),
+    field("decimal256", decimal256(16, 4)), field("decimal256", decimal256(57, -6)),
     field("date32", date32()), field("date64", date64()),
     field("timestampns", timestamp(TimeUnit::NANO)),
     field("timestamps", timestamp(TimeUnit::SECOND, "America/Phoenix")),
@@ -153,6 +146,16 @@ INSTANTIATE_TEST_SUITE_P(
       return std::to_string(info.index) + info.param->name();
     });
 
+template <typename T>
+class RandomNumericArrayTest : public ::testing::Test {
+ protected:
+  std::shared_ptr<Field> GetField() { return field("field0", std::make_shared<T>()); }
+
+  std::shared_ptr<NumericArray<T>> Downcast(std::shared_ptr<Array> array) {
+    return internal::checked_pointer_cast<NumericArray<T>>(array);
+  }
+};
+
 using NumericTypes =
     ::testing::Types<UInt8Type, Int8Type, UInt16Type, Int16Type, UInt32Type, Int32Type,
                      HalfFloatType, FloatType, DoubleType>;
@@ -172,6 +175,88 @@ TYPED_TEST(RandomNumericArrayTest, GenerateMinMax) {
   }
 }
 
+TYPED_TEST(RandomNumericArrayTest, EmptyRange) {
+  auto field =
+      this->GetField()->WithMetadata(key_value_metadata({{"min", "42"}, {"max", "42"}}));
+  auto batch = GenerateBatch({field}, kExpectedLength, 0xcafe);
+  ASSERT_OK(batch->ValidateFull());
+  AssertSchemaEqual(schema({field}), batch->schema());
+  auto array = this->Downcast(batch->column(0));
+  for (auto slot : *array) {
+    if (!slot.has_value()) continue;
+    ASSERT_EQ(slot, typename TypeParam::c_type(42));
+  }
+}
+
+template <typename DecimalType>
+class RandomDecimalArrayTest : public ::testing::Test {
+ protected:
+  using ArrayType = typename TypeTraits<DecimalType>::ArrayType;
+  using DecimalValue = typename TypeTraits<DecimalType>::BuilderType::ValueType;
+
+  constexpr static int32_t max_precision() { return DecimalType::kMaxPrecision; }
+
+  std::shared_ptr<DataType> type(int32_t precision, int32_t scale) {
+    return std::make_shared<DecimalType>(precision, scale);
+  }
+
+  void CheckArray(const Array& array) {
+    ASSERT_OK(array.ValidateFull());
+
+    const auto& type = checked_cast<const DecimalType&>(*array.type());
+    const auto& values = checked_cast<const ArrayType&>(array);
+
+    const DecimalValue limit = DecimalValue::GetScaleMultiplier(type.precision());
+    const DecimalValue neg_limit = DecimalValue(limit).Negate();
+    const DecimalValue half_limit = limit / DecimalValue(2);
+    const DecimalValue neg_half_limit = DecimalValue(half_limit).Negate();
+
+    // Check that random-generated values:
+    // - satisfy the requested precision
+    // - at least sometimes are close to the max allowable values for precision
+    // - sometimes are negative
+    int64_t non_nulls = 0;
+    int64_t over_half = 0;
+    int64_t negative = 0;
+
+    for (int64_t i = 0; i < values.length(); ++i) {
+      if (values.IsNull(i)) {
+        continue;
+      }
+      ++non_nulls;
+      const DecimalValue value(values.GetValue(i));
+      ASSERT_LT(value, limit);
+      ASSERT_GT(value, neg_limit);
+      if (value >= half_limit || value <= neg_half_limit) {
+        ++over_half;
+      }
+      if (value.Sign() < 0) {
+        ++negative;
+      }
+    }
+
+    ASSERT_GE(over_half, non_nulls * 0.3);
+    ASSERT_LE(over_half, non_nulls * 0.7);
+    ASSERT_GE(negative, non_nulls * 0.3);
+    ASSERT_LE(negative, non_nulls * 0.7);
+  }
+};
+
+using DecimalTypes = ::testing::Types<Decimal128Type, Decimal256Type>;
+TYPED_TEST_SUITE(RandomDecimalArrayTest, DecimalTypes);
+
+TYPED_TEST(RandomDecimalArrayTest, Basic) {
+  random::RandomArrayGenerator rng(42);
+
+  for (const int32_t precision :
+       {1, 2, 5, 9, 18, 19, 25, this->max_precision() - 1, this->max_precision()}) {
+    ARROW_SCOPED_TRACE("precision = ", precision);
+    const auto type = this->type(precision, 5);
+    auto array = rng.ArrayOf(type, /*size=*/1000, /*null_probability=*/0.2);
+    this->CheckArray(*array);
+  }
+}
+
 // Test all the supported options
 TEST(TypeSpecificTests, BoolTrueProbability) {
   auto field =
@@ -352,5 +437,76 @@ TEST(RandomList, Basics) {
   }
 }
 
+template <typename T>
+class UniformRealTest : public ::testing::Test {
+ protected:
+  void VerifyDist(int seed, T a, T b) {
+    pcg32_fast rng(seed);
+    ::arrow::random::uniform_real_distribution<T> dist(a, b);
+
+    const int kCount = 5000;
+    T min = std::numeric_limits<T>::max();
+    T max = std::numeric_limits<T>::lowest();
+    double sum = 0;
+    double square_sum = 0;
+    for (int i = 0; i < kCount; ++i) {
+      const T v = dist(rng);
+      min = std::min(min, v);
+      max = std::max(max, v);
+      sum += v;
+      square_sum += static_cast<double>(v) * v;
+    }
+
+    ASSERT_GE(min, a);
+    ASSERT_LT(max, b);
+
+    // verify E(X), E(X^2) is near theory
+    const double E_X = (a + b) / 2.0;
+    const double E_X2 = 1.0 / 12 * (a - b) * (a - b) + E_X * E_X;
+    ASSERT_NEAR(sum / kCount, E_X, std::abs(E_X) * 0.02);
+    ASSERT_NEAR(square_sum / kCount, E_X2, E_X2 * 0.02);
+  }
+};
+
+using RealCTypes = ::testing::Types<float, double>;
+TYPED_TEST_SUITE(UniformRealTest, RealCTypes);
+
+TYPED_TEST(UniformRealTest, Basic) {
+  int seed = 42;
+  this->VerifyDist(seed++, 0, 1);
+  this->VerifyDist(seed++, -3, 1);
+  this->VerifyDist(seed++, -123456, 654321);
+}
+
+TEST(BernoulliTest, Basic) {
+  int seed = 42;
+
+  // count #trues (values less than p), p = 0 ~ 1
+  auto count = [&seed](double p, int total) {
+    pcg32_fast rng(seed++);
+    ::arrow::random::bernoulli_distribution dist(p);
+    int cnt = 0;
+    for (int i = 0; i < total; ++i) {
+      cnt += dist(rng);
+    }
+    return cnt;
+  };
+
+  ASSERT_EQ(count(0, 1000), 0);
+  ASSERT_EQ(count(1, 1000), 1000);
+
+  // verify #trues is near p*total
+  auto verify = [&count](double p, int total, double dev) {
+    const int cnt = count(p, total);
+    const int min = std::max(0, static_cast<int>(total * p * (1 - dev)));
+    const int max = std::min(total, static_cast<int>(total * p * (1 + dev)));
+    ASSERT_TRUE(cnt >= min && cnt <= max);
+  };
+
+  for (double p = 0.1; p < 0.95; p += 0.1) {
+    verify(p, 5000, 0.1);
+  }
+}
+
 }  // namespace random
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/uniform_real.h b/cpp/src/arrow/testing/uniform_real.h
new file mode 100644
index 00000000000..155cb16b641
--- /dev/null
+++ b/cpp/src/arrow/testing/uniform_real.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Random real generation is very slow on Arm if built with clang + libstdc++
+// due to software emulated long double arithmetic.
+// This file ports some random real libs from llvm libc++ library, which are
+// free from long double calculation.
+// It improves performance significantly on both Arm (~100x) and x86 (~8x) in
+// generating random reals when built with clang + gnu libstdc++.
+// Based on: https://github.com/llvm/llvm-project/tree/main/libcxx
+
+#pragma once
+
+#include <limits>
+
+#include <arrow/util/bit_util.h>
+
+namespace arrow {
+namespace random {
+
+namespace detail {
+
+// std::generate_canonical, simplified
+// https://en.cppreference.com/w/cpp/numeric/random/generate_canonical
+template <typename RealType, typename Rng>
+RealType generate_canonical(Rng& rng) {
+  const size_t b = std::numeric_limits<RealType>::digits;
+  const size_t log2R = 63 - ::arrow::BitUtil::CountLeadingZeros(
+                                static_cast<uint64_t>(Rng::max() - Rng::min()) + 1);
+  const size_t k = b / log2R + (b % log2R != 0) + (b == 0);
+  const RealType r = static_cast<RealType>(Rng::max() - Rng::min()) + 1;
+  RealType base = r;
+  RealType sp = static_cast<RealType>(rng() - Rng::min());
+  for (size_t i = 1; i < k; ++i, base *= r) {
+    sp += (rng() - Rng::min()) * base;
+  }
+  return sp / base;
+}
+
+}  // namespace detail
+
+// std::uniform_real_distribution, simplified
+// https://en.cppreference.com/w/cpp/numeric/random/uniform_real_distribution
+template <typename RealType = double>
+struct uniform_real_distribution {
+  const RealType a, b;
+
+  explicit uniform_real_distribution(RealType a = 0, RealType b = 1) : a(a), b(b) {}
+
+  template <typename Rng>
+  RealType operator()(Rng& rng) {
+    return (b - a) * detail::generate_canonical<RealType>(rng) + a;
+  }
+};
+
+// std::bernoulli_distribution, simplified
+// https://en.cppreference.com/w/cpp/numeric/random/bernoulli_distribution
+struct bernoulli_distribution {
+  const double p;
+
+  explicit bernoulli_distribution(double p = 0.5) : p(p) {}
+
+  template <class Rng>
+  bool operator()(Rng& rng) {
+    return detail::generate_canonical<double>(rng) < p;
+  }
+};
+
+}  // namespace random
+}  // namespace arrow
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index 85885cc8e18..9e3e2717479 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -42,32 +42,35 @@
 #include "arrow/testing/random.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/pcg_random.h"
 
 namespace arrow {
 
+using random::pcg32_fast;
+
 uint64_t random_seed() {
   return std::chrono::high_resolution_clock::now().time_since_epoch().count();
 }
 
 void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
   const int random_seed = 0;
-  std::default_random_engine gen(random_seed);
-  std::uniform_real_distribution<double> d(0.0, 1.0);
+  pcg32_fast gen(random_seed);
+  ::arrow::random::uniform_real_distribution<double> d(0.0, 1.0);
   std::generate(null_bytes, null_bytes + n,
                 [&d, &gen, &pct_null] { return d(gen) > pct_null; });
 }
 
 void random_is_valid(int64_t n, double pct_null, std::vector<bool>* is_valid,
                      int random_seed) {
-  std::default_random_engine gen(random_seed);
-  std::uniform_real_distribution<double> d(0.0, 1.0);
+  pcg32_fast gen(random_seed);
+  ::arrow::random::uniform_real_distribution<double> d(0.0, 1.0);
   is_valid->resize(n, false);
   std::generate(is_valid->begin(), is_valid->end(),
                 [&d, &gen, &pct_null] { return d(gen) > pct_null; });
 }
 
 void random_bytes(int64_t n, uint32_t seed, uint8_t* out) {
-  std::default_random_engine gen(seed);
+  pcg32_fast gen(seed);
   std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
   std::generate(out, out + n, [&d, &gen] { return static_cast<uint8_t>(d(gen)); });
 }
@@ -80,7 +83,7 @@ std::string random_string(int64_t n, uint32_t seed) {
 }
 
 void random_decimals(int64_t n, uint32_t seed, int32_t precision, uint8_t* out) {
-  std::default_random_engine gen(seed);
+  pcg32_fast gen(seed);
   std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
   const int32_t required_bytes = DecimalType::DecimalSize(precision);
   constexpr int32_t byte_width = 16;
diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h
index 99b438db9c7..05fb8c68e3f 100644
--- a/cpp/src/arrow/testing/util.h
+++ b/cpp/src/arrow/testing/util.h
@@ -123,7 +123,7 @@ struct VisitBuilderImpl {
   template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType,
             // need to let SFINAE drop this Visit when it would result in
             // [](NullBuilder*){}(double_builder)
-            typename E = typename std::result_of<Fn(BuilderType*)>::type>
+            typename = decltype(std::declval<Fn>()(std::declval<BuilderType*>()))>
   Status Visit(const T&) {
     fn_(internal::checked_cast<BuilderType*>(builder_));
     return Status::OK();
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 344585446fc..41914f43663 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -771,6 +771,17 @@ std::vector<std::shared_ptr<Field>> StructType::GetAllFieldsByName(
   return result;
 }
 
+Result<std::shared_ptr<DataType>> DecimalType::Make(Type::type type_id, int32_t precision,
+                                                    int32_t scale) {
+  if (type_id == Type::DECIMAL128) {
+    return Decimal128Type::Make(precision, scale);
+  } else if (type_id == Type::DECIMAL256) {
+    return Decimal256Type::Make(precision, scale);
+  } else {
+    return Status::Invalid("Not a decimal type_id: ", type_id);
+  }
+}
+
 // Taken from the Apache Impala codebase. The comments next
 // to the return values are the maximum value that can be represented in 2's
 // complement with the returned number of bytes.
@@ -1184,6 +1195,10 @@ std::string FieldRef::ToString() const {
 }
 
 std::vector<FieldPath> FieldRef::FindAll(const Schema& schema) const {
+  if (auto name = this->name()) {
+    return internal::MapVector([](int i) { return FieldPath{i}; },
+                               schema.GetAllFieldIndices(*name));
+  }
   return FindAll(schema.fields());
 }
 
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 1d3d1e27f92..506fb785957 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -124,22 +124,13 @@ class ARROW_EXPORT DataType : public detail::Fingerprintable {
   /// \brief Return whether the types are equal
   bool Equals(const std::shared_ptr<DataType>& other) const;
 
-  ARROW_DEPRECATED("Use field(i)")
-  const std::shared_ptr<Field>& child(int i) const { return field(i); }
-
-  /// Returns the child-field at index i.
+  /// \brief Return the child field at index i.
   const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
 
-  ARROW_DEPRECATED("Use fields()")
-  const std::vector<std::shared_ptr<Field>>& children() const { return fields(); }
-
-  /// \brief Returns the children fields associated with this type.
+  /// \brief Return the children fields associated with this type.
   const std::vector<std::shared_ptr<Field>>& fields() const { return children_; }
 
-  ARROW_DEPRECATED("Use num_fields()")
-  int num_children() const { return num_fields(); }
-
-  /// \brief Returns the number of children fields associated with this type.
+  /// \brief Return the number of children fields associated with this type.
   int num_fields() const { return static_cast<int>(children_.size()); }
 
   Status Accept(TypeVisitor* visitor) const;
@@ -880,6 +871,10 @@ class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
                        int32_t scale)
       : FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {}
 
+  /// Constructs concrete decimal types
+  static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision,
+                                                int32_t scale);
+
   int32_t precision() const { return precision_; }
   int32_t scale() const { return scale_; }
 
@@ -1291,8 +1286,11 @@ class ARROW_EXPORT MonthIntervalType : public IntervalType {
 class ARROW_EXPORT DayTimeIntervalType : public IntervalType {
  public:
   struct DayMilliseconds {
-    int32_t days;
-    int32_t milliseconds;
+    int32_t days = 0;
+    int32_t milliseconds = 0;
+    DayMilliseconds() = default;
+    DayMilliseconds(int32_t days, int32_t milliseconds)
+        : days(days), milliseconds(milliseconds) {}
     bool operator==(DayMilliseconds other) const {
       return this->days == other.days && this->milliseconds == other.milliseconds;
     }
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 46018ef13be..d77f519a3c5 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -29,16 +29,18 @@ namespace arrow {
 
 template <typename T>
 class Iterator;
+template <typename T>
+struct IterationTraits;
 
 template <typename T>
 class Result;
 
 class Status;
 
-namespace detail {
+namespace internal {
 struct Empty;
-}
-template <typename T = detail::Empty>
+}  // namespace internal
+template <typename T = internal::Empty>
 class Future;
 
 namespace util {
@@ -79,6 +81,7 @@ class RecordBatchReader;
 class Table;
 
 struct Datum;
+struct ValueDescr;
 
 using ChunkedArrayVector = std::vector<std::shared_ptr<ChunkedArray>>;
 using RecordBatchVector = std::vector<std::shared_ptr<RecordBatch>>;
@@ -446,8 +449,8 @@ std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
 
 /// \brief Create a DecimalType instance depending on the precision
 ///
-/// If the precision is greater than 38, a Decimal128Type is returned,
-/// otherwise a Decimal256Type.
+/// If the precision is greater than 38, a Decimal256Type is returned,
+/// otherwise a Decimal128Type.
 ARROW_EXPORT
 std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale);
 
@@ -533,80 +536,19 @@ struct_(const std::vector<std::shared_ptr<Field>>& fields);
 /// \brief Create a SparseUnionType instance
 std::shared_ptr<DataType> ARROW_EXPORT sparse_union(FieldVector child_fields,
                                                     std::vector<int8_t> type_codes = {});
-/// \brief Create a DenseUnionType instance
-std::shared_ptr<DataType> ARROW_EXPORT dense_union(FieldVector child_fields,
-                                                   std::vector<int8_t> type_codes = {});
-
 /// \brief Create a SparseUnionType instance
 std::shared_ptr<DataType> ARROW_EXPORT
 sparse_union(const ArrayVector& children, std::vector<std::string> field_names = {},
              std::vector<int8_t> type_codes = {});
+
+/// \brief Create a DenseUnionType instance
+std::shared_ptr<DataType> ARROW_EXPORT dense_union(FieldVector child_fields,
+                                                   std::vector<int8_t> type_codes = {});
 /// \brief Create a DenseUnionType instance
 std::shared_ptr<DataType> ARROW_EXPORT
 dense_union(const ArrayVector& children, std::vector<std::string> field_names = {},
             std::vector<int8_t> type_codes = {});
 
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Field>>& child_fields,
-       const std::vector<int8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(child_fields, type_codes);
-  } else {
-    return dense_union(child_fields, type_codes);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Field>>& child_fields,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(child_fields);
-  } else {
-    return dense_union(child_fields);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
-       const std::vector<std::string>& field_names, const std::vector<int8_t>& type_codes,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(children, field_names, type_codes);
-  } else {
-    return dense_union(children, field_names, type_codes);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
-       const std::vector<std::string>& field_names,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(children, field_names);
-  } else {
-    return dense_union(children, field_names);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(children);
-  } else {
-    return dense_union(children);
-  }
-}
 /// \brief Create a DictionaryType instance
 /// \param[in] index_type the type of the dictionary indices (must be
 /// a signed integer)
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index b74aa3b0adb..e4d809967f9 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -233,6 +233,7 @@ struct TypeTraits<MonthIntervalType> {
   using ArrayType = MonthIntervalArray;
   using BuilderType = MonthIntervalBuilder;
   using ScalarType = MonthIntervalScalar;
+  using CType = MonthIntervalType::c_type;
 
   static constexpr int64_t bytes_required(int64_t elements) {
     return elements * static_cast<int64_t>(sizeof(int32_t));
@@ -845,6 +846,17 @@ static inline bool is_floating(Type::type type_id) {
   return false;
 }
 
+static inline bool is_decimal(Type::type type_id) {
+  switch (type_id) {
+    case Type::DECIMAL128:
+    case Type::DECIMAL256:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 static inline bool is_primitive(Type::type type_id) {
   switch (type_id) {
     case Type::BOOL:
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 37987b98520..660fb2657b6 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -57,6 +57,7 @@ add_arrow_test(utility-test
                logging_test.cc
                queue_test.cc
                range_test.cc
+               reflection_test.cc
                rle_encoding_test.cc
                stl_util_test.cc
                string_test.cc
@@ -78,6 +79,7 @@ add_arrow_test(threading-utility-test
 
 add_arrow_benchmark(bit_block_counter_benchmark)
 add_arrow_benchmark(bit_util_benchmark)
+add_arrow_benchmark(bitmap_reader_benchmark)
 add_arrow_benchmark(cache_benchmark)
 add_arrow_benchmark(compression_benchmark)
 add_arrow_benchmark(decimal_benchmark)
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index f034cea9983..9d1021edff5 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -17,12 +17,16 @@
 
 #pragma once
 
+#include <atomic>
 #include <cassert>
+#include <cstring>
 #include <deque>
+#include <limits>
 #include <queue>
 
 #include "arrow/util/functional.h"
 #include "arrow/util/future.h"
+#include "arrow/util/io_util.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/mutex.h"
 #include "arrow/util/optional.h"
@@ -44,7 +48,7 @@ namespace arrow {
 // the utilities Visit/Collect/Await take care to do this).
 //
 // Asynchronous reentrancy on the other hand means the function is called again before the
-// future returned by the function is marekd finished (but after the call to get the
+// future returned by the function is marked finished (but after the call to get the
 // future returns).  Some of these generators are async-reentrant while others (e.g.
 // those that depend on ordered processing like decompression) are not.  Read the MakeXYZ
 // function comments to determine which generators support async reentrancy.
@@ -73,16 +77,15 @@ Future<T> AsyncGeneratorEnd() {
 }
 
 /// returning a future that completes when all have been visited
-template <typename T>
-Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
-                             std::function<Status(T)> visitor) {
+template <typename T, typename Visitor>
+Future<> VisitAsyncGenerator(AsyncGenerator<T> generator, Visitor visitor) {
   struct LoopBody {
     struct Callback {
-      Result<ControlFlow<detail::Empty>> operator()(const T& result) {
-        if (IsIterationEnd(result)) {
-          return Break(detail::Empty());
+      Result<ControlFlow<>> operator()(const T& next) {
+        if (IsIterationEnd(next)) {
+          return Break();
         } else {
-          auto visited = visitor(result);
+          auto visited = visitor(next);
           if (visited.ok()) {
             return Continue();
           } else {
@@ -91,17 +94,17 @@ Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
         }
       }
 
-      std::function<Status(T)> visitor;
+      Visitor visitor;
     };
 
-    Future<ControlFlow<detail::Empty>> operator()() {
+    Future<ControlFlow<>> operator()() {
       Callback callback{visitor};
       auto next = generator();
       return next.Then(std::move(callback));
     }
 
     AsyncGenerator<T> generator;
-    std::function<Status(T)> visitor;
+    Visitor visitor;
   };
 
   return Loop(LoopBody{std::move(generator), std::move(visitor)});
@@ -110,7 +113,7 @@ Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
 /// \brief Waits for an async generator to complete, discarding results.
 template <typename T>
 Future<> DiscardAllFromAsyncGenerator(AsyncGenerator<T> generator) {
-  std::function<Status(T)> visitor = [](...) { return Status::OK(); };
+  std::function<Status(T)> visitor = [](const T&) { return Status::OK(); };
   return VisitAsyncGenerator(generator, visitor);
 }
 
@@ -258,26 +261,17 @@ class MappingGenerator {
 /// Note: Errors returned from the `map` function will be propagated
 ///
 /// If the source generator is async-reentrant then this generator will be also
-template <typename T, typename V>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator,
-                                      std::function<Result<V>(const T&)> map) {
-  std::function<Future<V>(const T&)> future_map = [map](const T& val) -> Future<V> {
-    return Future<V>::MakeFinished(map(val));
-  };
-  return MappingGenerator<T, V>(std::move(source_generator), std::move(future_map));
-}
-template <typename T, typename V>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator,
-                                      std::function<V(const T&)> map) {
-  std::function<Future<V>(const T&)> maybe_future_map = [map](const T& val) -> Future<V> {
-    return Future<V>::MakeFinished(map(val));
+template <typename T, typename MapFn,
+          typename Mapped = detail::result_of_t<MapFn(const T&)>,
+          typename V = typename EnsureFuture<Mapped>::type::ValueType>
+AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map) {
+  struct MapCallback {
+    MapFn map_;
+
+    Future<V> operator()(const T& val) { return ToFuture(map_(val)); }
   };
-  return MappingGenerator<T, V>(std::move(source_generator), std::move(maybe_future_map));
-}
-template <typename T, typename V>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator,
-                                      std::function<Future<V>(const T&)> map) {
-  return MappingGenerator<T, V>(std::move(source_generator), std::move(map));
+
+  return MappingGenerator<T, V>(std::move(source_generator), MapCallback{std::move(map)});
 }
 
 /// \see MakeSequencingGenerator
@@ -467,13 +461,9 @@ class TransformingGenerator {
           // Otherwise, if not finished immediately, add callback to process results
         } else {
           auto self = this->shared_from_this();
-          return next_fut.Then([self](const Result<T>& next_result) {
-            if (next_result.ok()) {
-              self->last_value_ = *next_result;
-              return (*self)();
-            } else {
-              return Future<V>::MakeFinished(next_result.status());
-            }
+          return next_fut.Then([self](const T& next_result) {
+            self->last_value_ = next_result;
+            return (*self)();
           });
         }
       }
@@ -529,7 +519,7 @@ class TransformingGenerator {
 ///
 /// This generator is not async-reentrant
 ///
-/// This generator may queue up to 1 instance of T
+/// This generator may queue up to 1 instance of T but will not delay
 template <typename T, typename V>
 AsyncGenerator<V> MakeTransformedGenerator(AsyncGenerator<T> generator,
                                            Transformer<T, V> transformer) {
@@ -548,7 +538,7 @@ class SerialReadaheadGenerator {
       // Lazy generator, need to wait for the first ask to prime the pump
       state_->first_ = false;
       auto next = state_->source_();
-      return next.Then(Callback{state_});
+      return next.Then(Callback{state_}, ErrCallback{state_});
     }
 
     // This generator is not async-reentrant.  We won't be called until the last
@@ -583,7 +573,7 @@ class SerialReadaheadGenerator {
           readahead_queue_(max_readahead + 1) {}
 
     Status Pump(const std::shared_ptr<State>& self) {
-      // Can't do readahead_queue.write(source().Then(Callback{self})) because then the
+      // Can't do readahead_queue.write(source().Then(...)) because then the
       // callback might run immediately and add itself to the queue before this gets added
       // to the queue messing up the order.
       auto next_slot = std::make_shared<Future<T>>();
@@ -597,7 +587,7 @@ class SerialReadaheadGenerator {
       // writing.  That is because this callback (the callback for future X) must be
       // finished before future X is marked complete and this source is not pulled
       // reentrantly so it will not poll for future X+1 until this callback has completed.
-      *next_slot = source_().Then(Callback{self});
+      *next_slot = source_().Then(Callback{self}, ErrCallback{self});
       return Status::OK();
     }
 
@@ -617,21 +607,25 @@ class SerialReadaheadGenerator {
   };
 
   struct Callback {
-    Result<T> operator()(const Result<T>& maybe_next) {
-      if (!maybe_next.ok()) {
-        state_->finished_.store(true);
-        return maybe_next;
-      }
-      const auto& next = *maybe_next;
+    Result<T> operator()(const T& next) {
       if (IsIterationEnd(next)) {
         state_->finished_.store(true);
-        return maybe_next;
+        return next;
       }
       auto last_available = state_->spaces_available_.fetch_sub(1);
       if (last_available > 1) {
         ARROW_RETURN_NOT_OK(state_->Pump(state_));
       }
-      return maybe_next;
+      return next;
+    }
+
+    std::shared_ptr<State> state_;
+  };
+
+  struct ErrCallback {
+    Result<T> operator()(const Status& st) {
+      state_->finished_.store(true);
+      return st;
     }
 
     std::shared_ptr<State> state_;
@@ -700,50 +694,64 @@ template <typename T>
 class ReadaheadGenerator {
  public:
   ReadaheadGenerator(AsyncGenerator<T> source_generator, int max_readahead)
-      : source_generator_(std::move(source_generator)), max_readahead_(max_readahead) {
-    auto finished = std::make_shared<std::atomic<bool>>(false);
-    mark_finished_if_done_ = [finished](const Result<T>& next_result) {
-      if (!next_result.ok()) {
-        finished->store(true);
-      } else {
-        if (IsIterationEnd(*next_result)) {
-          *finished = true;
-        }
-      }
-    };
-    finished_ = std::move(finished);
+      : state_(std::make_shared<State>(std::move(source_generator), max_readahead)) {}
+
+  Future<T> AddMarkFinishedContinuation(Future<T> fut) {
+    auto state = state_;
+    return fut.Then(
+        [state](const T& result) -> Result<T> {
+          state->MarkFinishedIfDone(result);
+          return result;
+        },
+        [state](const Status& err) -> Result<T> {
+          state->finished.store(true);
+          return err;
+        });
   }
 
   Future<T> operator()() {
-    if (readahead_queue_.empty()) {
+    if (state_->readahead_queue.empty()) {
       // This is the first request, let's pump the underlying queue
-      for (int i = 0; i < max_readahead_; i++) {
-        auto next = source_generator_();
-        next.AddCallback(mark_finished_if_done_);
-        readahead_queue_.push(std::move(next));
+      for (int i = 0; i < state_->max_readahead; i++) {
+        auto next = state_->source_generator();
+        auto next_after_check = AddMarkFinishedContinuation(std::move(next));
+        state_->readahead_queue.push(std::move(next_after_check));
       }
     }
     // Pop one and add one
-    auto result = readahead_queue_.front();
-    readahead_queue_.pop();
-    if (finished_->load()) {
-      readahead_queue_.push(AsyncGeneratorEnd<T>());
+    auto result = state_->readahead_queue.front();
+    state_->readahead_queue.pop();
+    if (state_->finished.load()) {
+      state_->readahead_queue.push(AsyncGeneratorEnd<T>());
     } else {
-      auto back_of_queue = source_generator_();
-      back_of_queue.AddCallback(mark_finished_if_done_);
-      readahead_queue_.push(std::move(back_of_queue));
+      auto back_of_queue = state_->source_generator();
+      auto back_of_queue_after_check =
+          AddMarkFinishedContinuation(std::move(back_of_queue));
+      state_->readahead_queue.push(std::move(back_of_queue_after_check));
     }
     return result;
   }
 
  private:
-  AsyncGenerator<T> source_generator_;
-  int max_readahead_;
-  std::function<void(const Result<T>&)> mark_finished_if_done_;
-  // Can't use a bool here because finished may be referenced by callbacks that
-  // outlive this class
-  std::shared_ptr<std::atomic<bool>> finished_;
-  std::queue<Future<T>> readahead_queue_;
+  struct State {
+    State(AsyncGenerator<T> source_generator, int max_readahead)
+        : source_generator(std::move(source_generator)), max_readahead(max_readahead) {
+      finished.store(false);
+    }
+
+    void MarkFinishedIfDone(const T& next_result) {
+      if (IsIterationEnd(next_result)) {
+        finished.store(true);
+      }
+    }
+
+    AsyncGenerator<T> source_generator;
+    int max_readahead;
+    std::atomic<bool> finished;
+    std::queue<Future<T>> readahead_queue;
+  };
+
+  std::shared_ptr<State> state_;
 };
 
 /// \brief A generator where the producer pushes items on a queue.
@@ -766,23 +774,33 @@ class PushGenerator {
   /// Producer API for PushGenerator
   class Producer {
    public:
-    explicit Producer(std::shared_ptr<State> state) : state_(std::move(state)) {}
+    explicit Producer(const std::shared_ptr<State>& state) : weak_state_(state) {}
 
-    /// Push a value on the queue
-    void Push(Result<T> result) {
-      auto lock = state_->mutex.Lock();
-      if (state_->finished) {
+    /// \brief Push a value on the queue
+    ///
+    /// True is returned if the value was pushed, false if the generator is
+    /// already closed or destroyed.  If the latter, it is recommended to stop
+    /// producing any further values.
+    bool Push(Result<T> result) {
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return false;
+      }
+      auto lock = state->mutex.Lock();
+      if (state->finished) {
         // Closed early
-        return;
+        return false;
       }
-      if (state_->consumer_fut.has_value()) {
-        auto fut = std::move(state_->consumer_fut.value());
-        state_->consumer_fut.reset();
+      if (state->consumer_fut.has_value()) {
+        auto fut = std::move(state->consumer_fut.value());
+        state->consumer_fut.reset();
         lock.Unlock();  // unlock before potentially invoking a callback
         fut.MarkFinished(std::move(result));
-        return;
+      } else {
+        state->result_q.push_back(std::move(result));
       }
-      state_->result_q.push_back(std::move(result));
+      return true;
     }
 
     /// \brief Tell the consumer we have finished producing
@@ -790,28 +808,43 @@ class PushGenerator {
     /// It is allowed to call this and later call Push() again ("early close").
     /// In this case, calls to Push() after the queue is closed are silently
     /// ignored.  This can help implementing non-trivial cancellation cases.
-    void Close() {
-      auto lock = state_->mutex.Lock();
-      if (state_->finished) {
+    ///
+    /// True is returned on success, false if the generator is already closed
+    /// or destroyed.
+    bool Close() {
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return false;
+      }
+      auto lock = state->mutex.Lock();
+      if (state->finished) {
         // Already closed
-        return;
+        return false;
       }
-      state_->finished = true;
-      if (state_->consumer_fut.has_value()) {
-        auto fut = std::move(state_->consumer_fut.value());
-        state_->consumer_fut.reset();
+      state->finished = true;
+      if (state->consumer_fut.has_value()) {
+        auto fut = std::move(state->consumer_fut.value());
+        state->consumer_fut.reset();
         lock.Unlock();  // unlock before potentially invoking a callback
         fut.MarkFinished(IterationTraits<T>::End());
       }
+      return true;
     }
 
+    /// Return whether the generator was closed or destroyed.
     bool is_closed() const {
-      auto lock = state_->mutex.Lock();
-      return state_->finished;
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return true;
+      }
+      auto lock = state->mutex.Lock();
+      return state->finished;
     }
 
    private:
-    const std::shared_ptr<State> state_;
+    const std::weak_ptr<State> weak_state_;
   };
 
   PushGenerator() : state_(std::make_shared<State>()) {}
@@ -874,6 +907,8 @@ AsyncGenerator<T> MakeVectorGenerator(std::vector<T> vec) {
   return [state]() {
     auto idx = state->vec_idx.fetch_add(1);
     if (idx >= state->vec.size()) {
+      // Eagerly return memory
+      state->vec.clear();
       return AsyncGeneratorEnd<T>();
     }
     return Future<T>::MakeFinished(state->vec[idx]);
@@ -915,7 +950,7 @@ class MergedGenerator {
     if (state_->first) {
       state_->first = false;
       for (std::size_t i = 0; i < state_->active_subscriptions.size(); i++) {
-        state_->source().AddCallback(OuterCallback{state_, i});
+        state_->PullSource().AddCallback(OuterCallback{state_, i});
       }
     }
     return waiting_future;
@@ -944,6 +979,13 @@ class MergedGenerator {
           finished(false),
           num_active_subscriptions(max_subscriptions) {}
 
+    Future<AsyncGenerator<T>> PullSource() {
+      // Need to guard access to source() so we don't pull sync-reentrantly which
+      // is never valid.
+      auto lock = mutex.Lock();
+      return source();
+    }
+
     AsyncGenerator<AsyncGenerator<T>> source;
     // active_subscriptions and delivered_jobs will be bounded by max_subscriptions
     std::vector<AsyncGenerator<T>> active_subscriptions;
@@ -979,7 +1021,7 @@ class MergedGenerator {
         }
       }
       if (sub_finished) {
-        state->source().AddCallback(OuterCallback{state, index});
+        state->PullSource().AddCallback(OuterCallback{state, index});
       } else if (sink.is_valid()) {
         sink.MarkFinished(maybe_next);
         if (maybe_next.ok()) {
@@ -1065,11 +1107,89 @@ AsyncGenerator<T> MakeMergedGenerator(AsyncGenerator<AsyncGenerator<T>> source,
 /// will never pull from any subscription reentrantly.
 ///
 /// This generator may queue 1 instance of T
+///
+/// TODO: Could potentially make a bespoke implementation instead of MergedGenerator that
+/// forwards async-reentrant requests instead of buffering them (which is what
+/// MergedGenerator does)
 template <typename T>
 AsyncGenerator<T> MakeConcatenatedGenerator(AsyncGenerator<AsyncGenerator<T>> source) {
   return MergedGenerator<T>(std::move(source), 1);
 }
 
+template <typename T>
+struct Enumerated {
+  T value;
+  int index;
+  bool last;
+};
+
+template <typename T>
+struct IterationTraits<Enumerated<T>> {
+  static Enumerated<T> End() { return Enumerated<T>{IterationEnd<T>(), -1, false}; }
+  static bool IsEnd(const Enumerated<T>& val) { return val.index < 0; }
+};
+
+/// \see MakeEnumeratedGenerator
+template <typename T>
+class EnumeratingGenerator {
+ public:
+  EnumeratingGenerator(AsyncGenerator<T> source, T initial_value)
+      : state_(std::make_shared<State>(std::move(source), std::move(initial_value))) {}
+
+  Future<Enumerated<T>> operator()() {
+    if (state_->finished) {
+      return AsyncGeneratorEnd<Enumerated<T>>();
+    } else {
+      auto state = state_;
+      return state->source().Then([state](const T& next) {
+        auto finished = IsIterationEnd<T>(next);
+        auto prev = Enumerated<T>{state->prev_value, state->prev_index, finished};
+        state->prev_value = next;
+        state->prev_index++;
+        state->finished = finished;
+        return prev;
+      });
+    }
+  }
+
+ private:
+  struct State {
+    State(AsyncGenerator<T> source, T initial_value)
+        : source(std::move(source)), prev_value(std::move(initial_value)), prev_index(0) {
+      finished = IsIterationEnd<T>(prev_value);
+    }
+
+    AsyncGenerator<T> source;
+    T prev_value;
+    int prev_index;
+    bool finished;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// Wraps items from a source generator with positional information
+///
+/// When used with MakeMergedGenerator and MakeSequencingGenerator this allows items to be
+/// processed in a "first-available" fashion and later resequenced which can reduce the
+/// impact of sources with erratic performance (e.g. a filesystem where some items may
+/// take longer to read than others).
+///
+/// TODO(ARROW-12371) Would require this generator be async-reentrant
+///
+/// \see MakeSequencingGenerator for an example of putting items back in order
+///
+/// This generator is not async-reentrant
+///
+/// This generator buffers one item (so it knows which item is the last item)
+template <typename T>
+AsyncGenerator<Enumerated<T>> MakeEnumeratedGenerator(AsyncGenerator<T> source) {
+  return FutureFirstGenerator<Enumerated<T>>(
+      source().Then([source](const T& initial_value) -> AsyncGenerator<Enumerated<T>> {
+        return EnumeratingGenerator<T>(std::move(source), initial_value);
+      }));
+}
+
 /// \see MakeTransferredGenerator
 template <typename T>
 class TransferringGenerator {
@@ -1104,34 +1224,6 @@ AsyncGenerator<T> MakeTransferredGenerator(AsyncGenerator<T> source,
                                            internal::Executor* executor) {
   return TransferringGenerator<T>(std::move(source), executor);
 }
-/// \see MakeIteratorGenerator
-template <typename T>
-class IteratorGenerator {
- public:
-  explicit IteratorGenerator(Iterator<T> it) : it_(std::move(it)) {}
-
-  Future<T> operator()() { return Future<T>::MakeFinished(it_.Next()); }
-
- private:
-  Iterator<T> it_;
-};
-
-/// \brief Constructs a generator that yields futures from an iterator.
-///
-/// Note: Do not use this if you can avoid it.  This blocks in an async
-/// context which is a bad idea.  If you're converting sync-I/O to async
-/// then use MakeBackgroundGenerator.  Otherwise, convert the underlying
-/// source to async.  This function is only around until we can conver the
-/// remaining table readers to async.  Once all uses of this generator have
-/// been removed it should be removed(ARROW-11909).
-///
-/// This generator is not async-reentrant
-///
-/// This generator will not queue
-template <typename T>
-AsyncGenerator<T> MakeIteratorGenerator(Iterator<T> it) {
-  return IteratorGenerator<T>(std::move(it));
-}
 
 /// \see MakeBackgroundGenerator
 template <typename T>
@@ -1139,9 +1231,8 @@ class BackgroundGenerator {
  public:
   explicit BackgroundGenerator(Iterator<T> it, internal::Executor* io_executor, int max_q,
                                int q_restart)
-      : state_(std::make_shared<State>(io_executor, std::move(it), max_q, q_restart)) {}
-
-  ~BackgroundGenerator() {}
+      : state_(std::make_shared<State>(io_executor, std::move(it), max_q, q_restart)),
+        cleanup_(std::make_shared<Cleanup>(state_.get())) {}
 
   Future<T> operator()() {
     auto guard = state_->mutex.Lock();
@@ -1156,29 +1247,30 @@ class BackgroundGenerator {
     } else {
       auto next = Future<T>::MakeFinished(std::move(state_->queue.front()));
       state_->queue.pop();
-      if (!state_->running &&
-          static_cast<int>(state_->queue.size()) <= state_->q_restart) {
-        state_->RestartTask(state_, std::move(guard));
+      if (state_->NeedsRestart()) {
+        return state_->RestartTask(state_, std::move(guard), std::move(next));
       }
       return next;
     }
-    if (!state_->running) {
-      // This branch should only be needed to start the background thread on the first
-      // call
-      state_->RestartTask(state_, std::move(guard));
+    // This should only trigger the very first time this method is called
+    if (state_->NeedsRestart()) {
+      return state_->RestartTask(state_, std::move(guard), std::move(waiting_future));
     }
     return waiting_future;
   }
 
  protected:
+  static constexpr uint64_t kUnlikelyThreadId{std::numeric_limits<uint64_t>::max()};
+
   struct State {
     State(internal::Executor* io_executor, Iterator<T> it, int max_q, int q_restart)
         : io_executor(io_executor),
+          max_q(max_q),
+          q_restart(q_restart),
           it(std::move(it)),
-          running(false),
+          reading(false),
           finished(false),
-          max_q(max_q),
-          q_restart(q_restart) {}
+          should_shutdown(false) {}
 
     void ClearQueue() {
       while (!queue.empty()) {
@@ -1186,78 +1278,173 @@ class BackgroundGenerator {
       }
     }
 
-    void RestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard) {
-      if (!finished) {
-        running = true;
-        auto spawn_status = io_executor->Spawn([state]() { Task()(std::move(state)); });
-        if (!spawn_status.ok()) {
-          running = false;
-          finished = true;
-          if (waiting_future.has_value()) {
-            auto to_deliver = std::move(waiting_future.value());
-            waiting_future.reset();
-            guard.Unlock();
-            to_deliver.MarkFinished(spawn_status);
-          } else {
-            ClearQueue();
-            queue.push(spawn_status);
-          }
+    bool TaskIsRunning() const { return task_finished.is_valid(); }
+
+    bool NeedsRestart() const {
+      return !finished && !reading && static_cast<int>(queue.size()) <= q_restart;
+    }
+
+    void DoRestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard) {
+      // If we get here we are actually going to start a new task so let's create a
+      // task_finished future for it
+      state->task_finished = Future<>::Make();
+      state->reading = true;
+      auto spawn_status = io_executor->Spawn(
+          [state]() { BackgroundGenerator::WorkerTask(std::move(state)); });
+      if (!spawn_status.ok()) {
+        // If we can't spawn a new task then send an error to the consumer (either via a
+        // waiting future or the queue) and mark ourselves finished
+        state->finished = true;
+        state->task_finished = Future<>();
+        if (waiting_future.has_value()) {
+          auto to_deliver = std::move(waiting_future.value());
+          waiting_future.reset();
+          guard.Unlock();
+          to_deliver.MarkFinished(spawn_status);
+        } else {
+          ClearQueue();
+          queue.push(spawn_status);
         }
       }
     }
 
+    Future<T> RestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard,
+                          Future<T> next) {
+      if (TaskIsRunning()) {
+        // If the task is still cleaning up we need to wait for it to finish before
+        // restarting.  We also want to block the consumer until we've restarted the
+        // reader to avoid multiple restarts
+        return task_finished.Then([state, next]() {
+          // This may appear dangerous (recursive mutex) but we should be guaranteed the
+          // outer guard has been released by this point.  We know...
+          // * task_finished is not already finished (it would be invalid in that case)
+          // * task_finished will not be marked complete until we've given up the mutex
+          auto guard_ = state->mutex.Lock();
+          state->DoRestartTask(state, std::move(guard_));
+          return next;
+        });
+      }
+      // Otherwise we can restart immediately
+      DoRestartTask(std::move(state), std::move(guard));
+      return next;
+    }
+
     internal::Executor* io_executor;
+    const int max_q;
+    const int q_restart;
     Iterator<T> it;
-    bool running;
+    std::atomic<uint64_t> worker_thread_id{kUnlikelyThreadId};
+
+    // If true, the task is actively pumping items from the queue and does not need a
+    // restart
+    bool reading;
+    // Set to true when a terminal item arrives
     bool finished;
-    int max_q;
-    int q_restart;
+    // Signal to the background task to end early because consumers have given up on it
+    bool should_shutdown;
+    // If the queue is empty, the consumer will create a waiting future and wait for it
     std::queue<Result<T>> queue;
     util::optional<Future<T>> waiting_future;
+    // Every background task is given a future to complete when it is entirely finished
+    // processing and ready for the next task to start or for State to be destroyed
+    Future<> task_finished;
     util::Mutex mutex;
   };
 
-  class Task {
-   public:
-    void operator()(std::shared_ptr<State> state) {
-      // while condition can't be based on state_ because it is run outside the mutex
-      bool running = true;
-      while (running) {
-        auto next = state->it.Next();
-        // Need to capture state->waiting_future inside the mutex to mark finished outside
-        Future<T> waiting_future;
-        {
-          auto guard = state->mutex.Lock();
+  // Cleanup task that will be run when all consumer references to the generator are lost
+  struct Cleanup {
+    explicit Cleanup(State* state) : state(state) {}
+    ~Cleanup() {
+      /// TODO: Once ARROW-13109 is available then we can be force consumers to spawn and
+      /// there is no need to perform this check.
+      ///
+      /// It's a deadlock if we enter cleanup from
+      /// the worker thread but it can happen if the consumer doesn't transfer away
+      assert(state->worker_thread_id.load() != ::arrow::internal::GetThreadId());
+      Future<> finish_fut;
+      {
+        auto lock = state->mutex.Lock();
+        if (!state->TaskIsRunning()) {
+          return;
+        }
+        // Signal the current task to stop and wait for it to finish
+        state->should_shutdown = true;
+        finish_fut = state->task_finished;
+      }
+      // Using future as a condition variable here
+      Status st = finish_fut.status();
+      ARROW_UNUSED(st);
+    }
+    State* state;
+  };
 
-          if (!next.ok() || IsIterationEnd<T>(*next)) {
-            state->finished = true;
-            state->running = false;
-            if (!next.ok()) {
-              state->ClearQueue();
-            }
-          }
-          if (state->waiting_future.has_value()) {
-            waiting_future = std::move(state->waiting_future.value());
-            state->waiting_future.reset();
-          } else {
-            state->queue.push(std::move(next));
-            if (static_cast<int>(state->queue.size()) >= state->max_q) {
-              state->running = false;
-            }
+  static void WorkerTask(std::shared_ptr<State> state) {
+    state->worker_thread_id.store(::arrow::internal::GetThreadId());
+    // We need to capture the state to read while outside the mutex
+    bool reading = true;
+    while (reading) {
+      auto next = state->it.Next();
+      // Need to capture state->waiting_future inside the mutex to mark finished outside
+      Future<T> waiting_future;
+      {
+        auto guard = state->mutex.Lock();
+
+        if (state->should_shutdown) {
+          state->finished = true;
+          break;
+        }
+
+        if (!next.ok() || IsIterationEnd<T>(*next)) {
+          // Terminal item.  Mark finished to true, send this last item, and quit
+          state->finished = true;
+          if (!next.ok()) {
+            state->ClearQueue();
           }
-          running = state->running;
         }
-        // This must happen outside the task.  Although presumably there is a transferring
-        // generator on the other end that will quickly transfer any callbacks off of this
-        // thread so we can continue looping.  Still, best not to rely on that
-        if (waiting_future.is_valid()) {
-          waiting_future.MarkFinished(next);
+        // At this point we are going to send an item.  Either we will add it to the
+        // queue or deliver it to a waiting future.
+        if (state->waiting_future.has_value()) {
+          waiting_future = std::move(state->waiting_future.value());
+          state->waiting_future.reset();
+        } else {
+          state->queue.push(std::move(next));
+          // We just filled up the queue so it is time to quit.  We may need to notify
+          // a cleanup task so we transition to Quitting
+          if (static_cast<int>(state->queue.size()) >= state->max_q) {
+            state->reading = false;
+          }
         }
+        reading = state->reading && !state->finished;
+      }
+      // This should happen outside the mutex.  Presumably there is a
+      // transferring generator on the other end that will quickly transfer any
+      // callbacks off of this thread so we can continue looping.  Still, best not to
+      // rely on that
+      if (waiting_future.is_valid()) {
+        waiting_future.MarkFinished(next);
       }
     }
-  };
+    // Once we've sent our last item we can notify any waiters that we are done and so
+    // either state can be cleaned up or a new background task can be started
+    Future<> task_finished;
+    {
+      auto guard = state->mutex.Lock();
+      // After we give up the mutex state can be safely deleted.  We will no longer
+      // reference it.  We can safely transition to idle now.
+      task_finished = state->task_finished;
+      state->task_finished = Future<>();
+      state->worker_thread_id.store(kUnlikelyThreadId);
+    }
+    task_finished.MarkFinished();
+  }
 
   std::shared_ptr<State> state_;
+  // state_ is held by both the generator and the background thread so it won't be cleaned
+  // up when all consumer references are relinquished.  cleanup_ is only held by the
+  // generator so it will be destructed when the last consumer reference is gone.  We use
+  // this to cleanup / stop the background generator in case the consuming end stops
+  // listening (e.g. due to a downstream error)
+  std::shared_ptr<Cleanup> cleanup_;
 };
 
 constexpr int kDefaultBackgroundMaxQ = 32;
@@ -1278,6 +1465,14 @@ constexpr int kDefaultBackgroundQRestart = 16;
 /// again.  If it is too high then it will be constantly stopping and restarting the
 /// background queue task
 ///
+/// The "background thread" is a logical thread and will run as tasks on the io_executor.
+/// This thread may stop and start when the queue fills up but there will only be one
+/// active background thread task at any given time.  You MUST transfer away from this
+/// background generator.  Otherwise there could be a race condition if a callback on the
+/// background thread deletes the last consumer reference to the background generator. You
+/// can transfer onto the same executor as the background thread, it is only neccesary to
+/// create a new thread task, not to switch executors.
+///
 /// This generator is not async-reentrant
 ///
 /// This generator will queue up to max_q blocks
@@ -1306,7 +1501,7 @@ class GeneratorIterator {
 /// \brief Converts an AsyncGenerator<T> to an Iterator<T> by blocking until each future
 /// is finished
 template <typename T>
-Result<Iterator<T>> MakeGeneratorIterator(AsyncGenerator<T> source) {
+Iterator<T> MakeGeneratorIterator(AsyncGenerator<T> source) {
   return Iterator<T>(GeneratorIterator<T>(std::move(source)));
 }
 
@@ -1332,4 +1527,88 @@ Result<Iterator<T>> MakeReadaheadIterator(Iterator<T> it, int readahead_queue_si
   return MakeGeneratorIterator(std::move(owned_bg_generator));
 }
 
+/// \brief Make a generator that returns a single pre-generated future
+///
+/// This generator is async-reentrant.
+template <typename T>
+std::function<Future<T>()> MakeSingleFutureGenerator(Future<T> future) {
+  assert(future.is_valid());
+  auto state = std::make_shared<Future<T>>(std::move(future));
+  return [state]() -> Future<T> {
+    auto fut = std::move(*state);
+    if (fut.is_valid()) {
+      return fut;
+    } else {
+      return AsyncGeneratorEnd<T>();
+    }
+  };
+}
+
+/// \brief Make a generator that immediately ends.
+///
+/// This generator is async-reentrant.
+template <typename T>
+std::function<Future<T>()> MakeEmptyGenerator() {
+  return []() -> Future<T> { return AsyncGeneratorEnd<T>(); };
+}
+
+/// \brief Make a generator that always fails with a given error
+///
+/// This generator is async-reentrant.
+template <typename T>
+AsyncGenerator<T> MakeFailingGenerator(Status st) {
+  assert(!st.ok());
+  auto state = std::make_shared<Status>(std::move(st));
+  return [state]() -> Future<T> {
+    auto st = std::move(*state);
+    if (!st.ok()) {
+      return std::move(st);
+    } else {
+      return AsyncGeneratorEnd<T>();
+    }
+  };
+}
+
+/// \brief Make a generator that always fails with a given error
+///
+/// This overload allows inferring the return type from the argument.
+template <typename T>
+AsyncGenerator<T> MakeFailingGenerator(const Result<T>& result) {
+  return MakeFailingGenerator<T>(result.status());
+}
+
+/// \brief Prepends initial_values onto a generator
+///
+/// This generator is async-reentrant but will buffer requests and will not
+/// pull from following_values async-reentrantly.
+template <typename T>
+AsyncGenerator<T> MakeGeneratorStartsWith(std::vector<T> initial_values,
+                                          AsyncGenerator<T> following_values) {
+  auto initial_values_vec_gen = MakeVectorGenerator(std::move(initial_values));
+  auto gen_gen = MakeVectorGenerator<AsyncGenerator<T>>(
+      {std::move(initial_values_vec_gen), std::move(following_values)});
+  return MakeConcatenatedGenerator(std::move(gen_gen));
+}
+
+template <typename T>
+struct CancellableGenerator {
+  Future<T> operator()() {
+    if (stop_token.IsStopRequested()) {
+      return stop_token.Poll();
+    }
+    return source();
+  }
+
+  AsyncGenerator<T> source;
+  StopToken stop_token;
+};
+
+/// \brief Allows an async generator to be cancelled
+///
+/// This generator is async-reentrant
+template <typename T>
+AsyncGenerator<T> MakeCancellable(AsyncGenerator<T> source, StopToken stop_token) {
+  return CancellableGenerator<T>{std::move(source), std::move(stop_token)};
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 51e4f948d38..ccacc380392 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -15,17 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <atomic>
 #include <chrono>
 #include <condition_variable>
 #include <mutex>
 #include <random>
 #include <thread>
 #include <unordered_set>
+#include <utility>
 
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/async_generator.h"
+#include "arrow/util/optional.h"
 #include "arrow/util/test_common.h"
 #include "arrow/util/vector.h"
 
@@ -50,9 +53,8 @@ AsyncGenerator<T> FailsAt(AsyncGenerator<T> src, int failing_index) {
 
 template <typename T>
 AsyncGenerator<T> SlowdownABit(AsyncGenerator<T> source) {
-  return MakeMappedGenerator<T, T>(std::move(source), [](const T& res) -> Future<T> {
-    return SleepABitAsync().Then(
-        [res](const Result<detail::Empty>& empty) { return res; });
+  return MakeMappedGenerator(std::move(source), [](const T& res) {
+    return SleepABitAsync().Then([res]() { return res; });
   });
 }
 
@@ -67,14 +69,14 @@ class TrackingGenerator {
     return state_->source();
   }
 
-  int num_read() { return state_->num_read; }
+  int num_read() { return state_->num_read.load(); }
 
  private:
   struct State {
     explicit State(AsyncGenerator<T> source) : source(std::move(source)), num_read(0) {}
 
     AsyncGenerator<T> source;
-    int num_read;
+    std::atomic<int> num_read;
   };
 
   std::shared_ptr<State> state_;
@@ -88,8 +90,7 @@ std::function<Future<TestInt>()> BackgroundAsyncVectorIt(
   auto slow_iterator = PossiblySlowVectorIt(v, sleep);
   EXPECT_OK_AND_ASSIGN(
       auto background,
-      MakeBackgroundGenerator<TestInt>(std::move(slow_iterator),
-                                       internal::GetCpuThreadPool(), max_q, q_restart));
+      MakeBackgroundGenerator<TestInt>(std::move(slow_iterator), pool, max_q, q_restart));
   return MakeTransferredGenerator(background, pool);
 }
 
@@ -106,8 +107,7 @@ std::function<Future<TestInt>()> NewBackgroundAsyncVectorIt(std::vector<TestInt>
       });
 
   EXPECT_OK_AND_ASSIGN(auto background,
-                       MakeBackgroundGenerator<TestInt>(std::move(slow_iterator),
-                                                        internal::GetCpuThreadPool()));
+                       MakeBackgroundGenerator<TestInt>(std::move(slow_iterator), pool));
   return MakeTransferredGenerator(background, pool);
 }
 
@@ -163,7 +163,7 @@ class ReentrantChecker {
     std::atomic<bool> valid;
   };
   struct Callback {
-    Future<T> operator()(const Result<T>& result) {
+    Future<T> operator()(const T& result) {
       state_->generated_unfinished_future.store(false);
       return result;
     }
@@ -176,7 +176,8 @@ class ReentrantChecker {
 template <typename T>
 class ReentrantCheckerGuard {
  public:
-  explicit ReentrantCheckerGuard(ReentrantChecker<T> checker) : checker_(checker) {}
+  explicit ReentrantCheckerGuard(ReentrantChecker<T> checker)
+      : checker_(std::move(checker)) {}
 
   ARROW_DISALLOW_COPY_AND_ASSIGN(ReentrantCheckerGuard);
   ReentrantCheckerGuard(ReentrantCheckerGuard&& other) : checker_(other.checker_) {
@@ -228,6 +229,8 @@ class GeneratorTestFixture : public ::testing::TestWithParam<bool> {
     return gen;
   }
 
+  AsyncGenerator<TestInt> MakeEmptySource() { return MakeSource({}); }
+
   AsyncGenerator<TestInt> MakeFailingSource() {
     AsyncGenerator<TestInt> gen = [] {
       return Future<TestInt>::MakeFinished(Status::Invalid("XYZ"));
@@ -359,9 +362,7 @@ TEST(TestAsyncUtil, MapAsync) {
   std::vector<TestInt> input = {1, 2, 3};
   auto generator = AsyncVectorIt(input);
   std::function<Future<TestStr>(const TestInt&)> mapper = [](const TestInt& in) {
-    return SleepAsync(1e-3).Then([in](const Result<detail::Empty>& empty) {
-      return TestStr(std::to_string(in.value));
-    });
+    return SleepAsync(1e-3).Then([in]() { return TestStr(std::to_string(in.value)); });
   };
   auto mapped = MakeMappedGenerator(std::move(generator), mapper);
   std::vector<TestStr> expected{"1", "2", "3"};
@@ -380,7 +381,7 @@ TEST(TestAsyncUtil, MapReentrant) {
   Future<> can_proceed = Future<>::Make();
   std::function<Future<TestStr>(const TestInt&)> mapper = [&](const TestInt& in) {
     map_tasks_running.fetch_add(1);
-    return can_proceed.Then([in](...) { return TestStr(std::to_string(in.value)); });
+    return can_proceed.Then([in]() { return TestStr(std::to_string(in.value)); });
   };
   auto mapped = MakeMappedGenerator(std::move(source), mapper);
 
@@ -466,7 +467,7 @@ TEST_P(FromFutureFixture, Basic) {
   auto source = Future<std::vector<TestInt>>::MakeFinished(RangeVector(3));
   if (IsSlow()) {
     source = SleepABitAsync().Then(
-        [](...) -> Result<std::vector<TestInt>> { return RangeVector(3); });
+        []() -> Result<std::vector<TestInt>> { return RangeVector(3); });
   }
   auto slow = IsSlow();
   auto to_gen = source.Then([slow](const std::vector<TestInt>& vec) {
@@ -569,6 +570,7 @@ TEST_P(MergedGeneratorTestFixture, MergedStress) {
       sources.push_back(source);
     }
     AsyncGenerator<AsyncGenerator<TestInt>> source_gen = AsyncVectorIt(sources);
+    auto outer_gaurd = ExpectNotAccessedReentrantly(&source_gen);
 
     auto merged = MakeMergedGenerator(source_gen, 4);
     ASSERT_FINISHES_OK_AND_ASSIGN(auto items, CollectAsyncGenerator(merged));
@@ -591,7 +593,7 @@ TEST_P(MergedGeneratorTestFixture, MergedParallelStress) {
   }
 }
 
-INSTANTIATE_TEST_SUITE_P(MergedGeneratorTests, GeneratorTestFixture,
+INSTANTIATE_TEST_SUITE_P(MergedGeneratorTests, MergedGeneratorTestFixture,
                          ::testing::Values(false, true));
 
 TEST(TestAsyncUtil, FromVector) {
@@ -617,7 +619,7 @@ TEST(TestAsyncUtil, SynchronousFinish) {
 
 TEST(TestAsyncUtil, GeneratorIterator) {
   auto generator = BackgroundAsyncVectorIt({1, 2, 3});
-  ASSERT_OK_AND_ASSIGN(auto iterator, MakeGeneratorIterator(std::move(generator)));
+  auto iterator = MakeGeneratorIterator(std::move(generator));
   ASSERT_OK_AND_EQ(TestInt(1), iterator.Next());
   ASSERT_OK_AND_EQ(TestInt(2), iterator.Next());
   ASSERT_OK_AND_EQ(TestInt(3), iterator.Next());
@@ -648,7 +650,7 @@ TEST(TestAsyncUtil, MakeTransferredGenerator) {
       MakeTransferredGenerator<TestInt>(std::move(slow_generator), thread_pool.get());
 
   auto current_thread_id = std::this_thread::get_id();
-  auto fut = transferred().Then([&current_thread_id](const Result<TestInt>& result) {
+  auto fut = transferred().Then([&current_thread_id](const TestInt&) {
     ASSERT_NE(current_thread_id, std::this_thread::get_id());
   });
 
@@ -814,6 +816,65 @@ TEST_P(BackgroundGeneratorTestFixture, StopAndRestart) {
   AssertGeneratorExhausted(generator);
 }
 
+struct TrackingIterator {
+  explicit TrackingIterator(bool slow)
+      : token(std::make_shared<bool>(false)), slow(slow) {}
+
+  Result<TestInt> Next() {
+    if (slow) {
+      SleepABit();
+    }
+    return TestInt(0);
+  }
+  std::weak_ptr<bool> GetWeakTargetRef() { return std::weak_ptr<bool>(token); }
+
+  std::shared_ptr<bool> token;
+  bool slow;
+};
+
+TEST_P(BackgroundGeneratorTestFixture, AbortReading) {
+  // If there is an error downstream then it is likely the chain will abort and the
+  // background generator will lose all references and should abandon reading
+  TrackingIterator source(IsSlow());
+  auto tracker = source.GetWeakTargetRef();
+  auto iter = Iterator<TestInt>(std::move(source));
+  std::shared_ptr<AsyncGenerator<TestInt>> generator;
+  {
+    ASSERT_OK_AND_ASSIGN(
+        auto gen, MakeBackgroundGenerator(std::move(iter), internal::GetCpuThreadPool()));
+    generator = std::make_shared<AsyncGenerator<TestInt>>(gen);
+  }
+
+  // Poll one item to start it up
+  ASSERT_FINISHES_OK_AND_EQ(TestInt(0), (*generator)());
+  ASSERT_FALSE(tracker.expired());
+  // Remove last reference to generator, should trigger and wait for cleanup
+  generator.reset();
+  // Cleanup should have ensured no more reference to the source.  It may take a moment
+  // to expire because the background thread has to destruct itself
+  BusyWait(10, [&tracker] { return tracker.expired(); });
+}
+
+TEST_P(BackgroundGeneratorTestFixture, AbortOnIdleBackground) {
+  // Tests what happens when the downstream aborts while the background thread is idle
+  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(1));
+
+  auto source = PossiblySlowVectorIt(RangeVector(100), IsSlow());
+  std::shared_ptr<AsyncGenerator<TestInt>> generator;
+  {
+    ASSERT_OK_AND_ASSIGN(auto gen,
+                         MakeBackgroundGenerator(std::move(source), thread_pool.get()));
+    generator = std::make_shared<AsyncGenerator<TestInt>>(gen);
+  }
+  ASSERT_FINISHES_OK_AND_EQ(TestInt(0), (*generator)());
+
+  // The generator should pretty quickly fill up the queue and idle
+  BusyWait(10, [&thread_pool] { return thread_pool->GetNumTasks() == 0; });
+
+  // Now delete the generator and hope we don't deadlock
+  generator.reset();
+}
+
 struct SlowEmptyIterator {
   Result<TestInt> Next() {
     if (called_) {
@@ -947,8 +1008,8 @@ TEST(TestAsyncUtil, SerialReadaheadStressFailing) {
     AsyncGenerator<TestInt> it = BackgroundAsyncVectorIt(RangeVector(NITEMS));
     AsyncGenerator<TestInt> fails_at_ten = [&it]() {
       auto next = it();
-      return next.Then([](const Result<TestInt>& item) -> Result<TestInt> {
-        if (item->value >= 10) {
+      return next.Then([](const TestInt& item) -> Result<TestInt> {
+        if (item.value >= 10) {
           return Status::Invalid("XYZ");
         } else {
           return item;
@@ -1001,45 +1062,116 @@ TEST(TestAsyncUtil, Readahead) {
   ASSERT_TRUE(IsIterationEnd(last_val));
 }
 
+TEST(TestAsyncUtil, ReadaheadCopy) {
+  auto source = AsyncVectorIt<TestInt>(RangeVector(6));
+  auto gen = MakeReadaheadGenerator(std::move(source), 2);
+
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i), gen());
+  }
+  auto gen_copy = gen;
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i + 2), gen_copy());
+  }
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i + 4), gen());
+  }
+  AssertGeneratorExhausted(gen);
+  AssertGeneratorExhausted(gen_copy);
+}
+
+TEST(TestAsyncUtil, ReadaheadMove) {
+  auto source = AsyncVectorIt<TestInt>(RangeVector(6));
+  auto gen = MakeReadaheadGenerator(std::move(source), 2);
+
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i), gen());
+  }
+  auto gen_copy = std::move(gen);
+  for (int i = 0; i < 4; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i + 2), gen_copy());
+  }
+  AssertGeneratorExhausted(gen_copy);
+}
+
 TEST(TestAsyncUtil, ReadaheadFailed) {
-  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(4));
+  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(20));
   std::atomic<int32_t> counter(0);
+  auto gating_task = GatingTask::Make();
   // All tasks are a little slow.  The first task fails.
   // The readahead will have spawned 9 more tasks and they
   // should all pass
-  auto source = [thread_pool, &counter]() -> Future<TestInt> {
+  auto source = [&]() -> Future<TestInt> {
     auto count = counter++;
-    return *thread_pool->Submit([count]() -> Result<TestInt> {
+    return DeferNotOk(thread_pool->Submit([&, count]() -> Result<TestInt> {
+      gating_task->Task()();
       if (count == 0) {
         return Status::Invalid("X");
       }
       return TestInt(count);
-    });
+    }));
   };
   auto readahead = MakeReadaheadGenerator<TestInt>(source, 10);
-  ASSERT_FINISHES_AND_RAISES(Invalid, readahead());
-  SleepABit();
-
-  for (int i = 0; i < 9; i++) {
-    ASSERT_FINISHES_OK_AND_ASSIGN(auto next_val, readahead());
-    ASSERT_EQ(TestInt(i + 1), next_val);
+  auto should_be_invalid = readahead();
+  // Polling once should allow 10 additional calls to start
+  ASSERT_OK(gating_task->WaitForRunning(11));
+  ASSERT_OK(gating_task->Unlock());
+
+  // Once unlocked the error task should always be the first.  Some number of successful
+  // tasks may follow until the end.
+  ASSERT_FINISHES_AND_RAISES(Invalid, should_be_invalid);
+
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto remaining_results, CollectAsyncGenerator(readahead));
+  // Don't need to know the exact number of successful tasks (and it may vary)
+  for (std::size_t i = 0; i < remaining_results.size(); i++) {
+    ASSERT_EQ(TestInt(static_cast<int>(i) + 1), remaining_results[i]);
   }
-  ASSERT_FINISHES_OK_AND_ASSIGN(auto after, readahead());
+}
 
-  // It's possible that finished was set quickly and there
-  // are only 10 elements
-  if (IsIterationEnd(after)) {
-    return;
+class EnumeratorTestFixture : public GeneratorTestFixture {
+ protected:
+  void AssertEnumeratedCorrectly(AsyncGenerator<Enumerated<TestInt>>& gen,
+                                 int num_items) {
+    auto collected = CollectAsyncGenerator(gen);
+    ASSERT_FINISHES_OK_AND_ASSIGN(auto items, collected);
+    EXPECT_EQ(num_items, items.size());
+
+    for (const auto& item : items) {
+      ASSERT_EQ(item.index, item.value.value);
+      bool last = item.index == num_items - 1;
+      ASSERT_EQ(last, item.last);
+    }
+    AssertGeneratorExhausted(gen);
   }
+};
+
+TEST_P(EnumeratorTestFixture, Basic) {
+  constexpr int NITEMS = 100;
+
+  auto source = MakeSource(RangeVector(NITEMS));
+  auto enumerated = MakeEnumeratedGenerator(std::move(source));
 
-  // It's also possible that finished was too slow and there
-  // ended up being 11 elements
-  ASSERT_EQ(TestInt(10), after);
-  // There can't be 12 elements because SleepABit will prevent it
-  ASSERT_FINISHES_OK_AND_ASSIGN(auto definitely_last, readahead());
-  ASSERT_TRUE(IsIterationEnd(definitely_last));
+  AssertEnumeratedCorrectly(enumerated, NITEMS);
 }
 
+TEST_P(EnumeratorTestFixture, Empty) {
+  auto source = MakeEmptySource();
+  auto enumerated = MakeEnumeratedGenerator(std::move(source));
+  AssertGeneratorExhausted(enumerated);
+}
+
+TEST_P(EnumeratorTestFixture, Error) {
+  auto source = FailsAt(MakeSource({1, 2, 3}), 1);
+  auto enumerated = MakeEnumeratedGenerator(std::move(source));
+
+  // Even though the first item finishes ok the enumerator buffers it.  The error then
+  // takes priority over the buffered result.
+  ASSERT_FINISHES_AND_RAISES(Invalid, enumerated());
+}
+
+INSTANTIATE_TEST_SUITE_P(EnumeratedTests, EnumeratorTestFixture,
+                         ::testing::Values(false, true));
+
 class SequencerTestFixture : public GeneratorTestFixture {
  protected:
   void RandomShuffle(std::vector<TestInt>& values) {
@@ -1165,13 +1297,16 @@ TEST(PushGenerator, Empty) {
 
   auto fut = gen();
   AssertNotFinished(fut);
-  producer.Close();
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), fut);
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
 
   // Close idempotent
   fut = gen();
-  producer.Close();
+  ASSERT_FALSE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), fut);
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
@@ -1182,8 +1317,8 @@ TEST(PushGenerator, Success) {
   auto producer = gen.producer();
   std::vector<Future<TestInt>> futures;
 
-  producer.Push(TestInt{1});
-  producer.Push(TestInt{2});
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_TRUE(producer.Push(TestInt{2}));
   for (int i = 0; i < 3; ++i) {
     futures.push_back(gen());
   }
@@ -1191,13 +1326,16 @@ TEST(PushGenerator, Success) {
   ASSERT_FINISHES_OK_AND_EQ(TestInt{2}, futures[1]);
   AssertNotFinished(futures[2]);
 
-  producer.Push(TestInt{3});
+  ASSERT_TRUE(producer.Push(TestInt{3}));
   ASSERT_FINISHES_OK_AND_EQ(TestInt{3}, futures[2]);
-  producer.Push(TestInt{4});
+  ASSERT_TRUE(producer.Push(TestInt{4}));
   futures.push_back(gen());
   ASSERT_FINISHES_OK_AND_EQ(TestInt{4}, futures[3]);
-  producer.Push(TestInt{5});
-  producer.Close();
+  ASSERT_TRUE(producer.Push(TestInt{5}));
+
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   for (int i = 0; i < 4; ++i) {
     futures.push_back(gen());
   }
@@ -1213,8 +1351,8 @@ TEST(PushGenerator, Errors) {
   auto producer = gen.producer();
   std::vector<Future<TestInt>> futures;
 
-  producer.Push(TestInt{1});
-  producer.Push(Status::Invalid("2"));
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_TRUE(producer.Push(Status::Invalid("2")));
   for (int i = 0; i < 3; ++i) {
     futures.push_back(gen());
   }
@@ -1222,12 +1360,15 @@ TEST(PushGenerator, Errors) {
   ASSERT_FINISHES_AND_RAISES(Invalid, futures[1]);
   AssertNotFinished(futures[2]);
 
-  producer.Push(Status::IOError("3"));
-  producer.Push(TestInt{4});
+  ASSERT_TRUE(producer.Push(Status::IOError("3")));
+  ASSERT_TRUE(producer.Push(TestInt{4}));
   ASSERT_FINISHES_AND_RAISES(IOError, futures[2]);
   futures.push_back(gen());
   ASSERT_FINISHES_OK_AND_EQ(TestInt{4}, futures[3]);
-  producer.Close();
+
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
 }
 
@@ -1236,13 +1377,17 @@ TEST(PushGenerator, CloseEarly) {
   auto producer = gen.producer();
   std::vector<Future<TestInt>> futures;
 
-  producer.Push(TestInt{1});
-  producer.Push(TestInt{2});
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_TRUE(producer.Push(TestInt{2}));
   for (int i = 0; i < 3; ++i) {
     futures.push_back(gen());
   }
-  producer.Close();
-  producer.Push(TestInt{3});
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
+  ASSERT_FALSE(producer.Push(TestInt{3}));
+  ASSERT_FALSE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
 
   ASSERT_FINISHES_OK_AND_EQ(TestInt{1}, futures[0]);
   ASSERT_FINISHES_OK_AND_EQ(TestInt{2}, futures[1]);
@@ -1250,6 +1395,19 @@ TEST(PushGenerator, CloseEarly) {
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
 }
 
+TEST(PushGenerator, DanglingProducer) {
+  util::optional<PushGenerator<TestInt>> gen;
+  gen.emplace();
+  auto producer = gen->producer();
+
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_FALSE(producer.is_closed());
+  gen.reset();
+  ASSERT_TRUE(producer.is_closed());
+  ASSERT_FALSE(producer.Push(TestInt{2}));
+  ASSERT_FALSE(producer.Close());
+}
+
 TEST(PushGenerator, Stress) {
   const int NTHREADS = 20;
   const int NVALUES = 2000;
@@ -1299,4 +1457,26 @@ TEST(PushGenerator, Stress) {
   }
 }
 
+TEST(SingleFutureGenerator, Basics) {
+  auto fut = Future<TestInt>::Make();
+  auto gen = MakeSingleFutureGenerator(fut);
+  auto collect_fut = CollectAsyncGenerator(gen);
+  AssertNotFinished(collect_fut);
+  fut.MarkFinished(TestInt{42});
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto collected, collect_fut);
+  ASSERT_EQ(collected, std::vector<TestInt>{42});
+  // Generator exhausted
+  collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_OK_AND_EQ(std::vector<TestInt>{}, collect_fut);
+}
+
+TEST(FailingGenerator, Basics) {
+  auto gen = MakeFailingGenerator<TestInt>(Status::IOError("zzz"));
+  auto collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_AND_RAISES(IOError, collect_fut);
+  // Generator exhausted
+  collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_OK_AND_EQ(std::vector<TestInt>{}, collect_fut);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
index 56809f28165..edc25e25db8 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -121,219 +121,242 @@ static const BasicDecimal128 ScaleMultipliersHalf[] = {
     BasicDecimal128(271050543121376108LL, 9257742014424809472ULL),
     BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
 
+#define BasicDecimal256FromLE(v1, v2, v3, v4) \
+  BasicDecimal256(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(v1, v2, v3, v4))
+
 static const BasicDecimal256 ScaleMultipliersDecimal256[] = {
-    BasicDecimal256({1ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}),
-    BasicDecimal256({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}),
-    BasicDecimal256({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}),
-    BasicDecimal256({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}),
-    BasicDecimal256({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}),
-    BasicDecimal256({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}),
-    BasicDecimal256({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}),
-    BasicDecimal256({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}),
-    BasicDecimal256({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}),
-    BasicDecimal256({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}),
-    BasicDecimal256({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}),
-    BasicDecimal256({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}),
-    BasicDecimal256({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}),
-    BasicDecimal256({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}),
-    BasicDecimal256({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}),
-    BasicDecimal256({4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}),
-    BasicDecimal256({11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}),
-    BasicDecimal256({7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}),
-    BasicDecimal256({80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}),
-    BasicDecimal256({802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}),
-    BasicDecimal256({8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({1ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}),
+    BasicDecimal256FromLE({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}),
+    BasicDecimal256FromLE(
         {6450984253743169536ULL, 13015503840481697412ULL, 293873587ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {9169610316303040512ULL, 1027829888850112811ULL, 2938735877ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {17909126868192198656ULL, 10278298888501128114ULL, 29387358770ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {13070572018536022016ULL, 10549268516463523069ULL, 293873587705ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {1578511669393358848ULL, 13258964796087472617ULL, 2938735877055ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {15785116693933588480ULL, 3462439444907864858ULL, 29387358770557ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10277214349659471872ULL, 16177650375369096972ULL, 293873587705571ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10538423128046960640ULL, 14202551164014556797ULL, 2938735877055718ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {13150510911921848320ULL, 12898303124178706663ULL, 29387358770557187ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {2377900603251621888ULL, 18302566799529756941ULL, 293873587705571876ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {5332261958806667264ULL, 17004971331911604867ULL, 2938735877055718769ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {16429131440647569408ULL, 4029016655730084128ULL, 10940614696847636083ULL, 1ULL}),
-    BasicDecimal256({16717361816799281152ULL, 3396678409881738056ULL,
-                     17172426599928602752ULL, 15ULL}),
-    BasicDecimal256({1152921504606846976ULL, 15520040025107828953ULL,
-                     5703569335900062977ULL, 159ULL}),
-    BasicDecimal256({11529215046068469760ULL, 7626447661401876602ULL,
-                     1695461137871974930ULL, 1593ULL}),
-    BasicDecimal256({4611686018427387904ULL, 2477500319180559562ULL,
-                     16954611378719749304ULL, 15930ULL}),
-    BasicDecimal256({9223372036854775808ULL, 6328259118096044006ULL,
-                     3525417123811528497ULL, 159309ULL}),
-    BasicDecimal256({0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}),
-    BasicDecimal256({0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}),
-    BasicDecimal256({0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({16717361816799281152ULL, 3396678409881738056ULL,
+                           17172426599928602752ULL, 15ULL}),
+    BasicDecimal256FromLE({1152921504606846976ULL, 15520040025107828953ULL,
+                           5703569335900062977ULL, 159ULL}),
+    BasicDecimal256FromLE({11529215046068469760ULL, 7626447661401876602ULL,
+                           1695461137871974930ULL, 1593ULL}),
+    BasicDecimal256FromLE({4611686018427387904ULL, 2477500319180559562ULL,
+                           16954611378719749304ULL, 15930ULL}),
+    BasicDecimal256FromLE({9223372036854775808ULL, 6328259118096044006ULL,
+                           3525417123811528497ULL, 159309ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}),
+    BasicDecimal256FromLE(
         {0ULL, 10259008136678022120ULL, 2443313256331835254ULL, 1593091911ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 10356360998232463120ULL, 5986388489608800929ULL, 15930919111ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 11329889613776873120ULL, 4523652674959354447ULL, 159309191113ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 2618431695511421504ULL, 8343038602174441244ULL, 1593091911132ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 7737572881404663424ULL, 9643409726906205977ULL, 15930919111324ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 3588752519208427776ULL, 4200376900514301694ULL, 159309191113245ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 17440781118374726144ULL, 5110280857723913709ULL, 1593091911132452ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 8387114520361296896ULL, 14209320429820033867ULL, 15930919111324522ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 8607968719199866880ULL, 532749306367912313ULL, 1593091911132452277ULL})};
 
 static const BasicDecimal256 ScaleMultipliersHalfDecimal256[] = {
-    BasicDecimal256({0ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}),
-    BasicDecimal256({932356024711512064ULL, 271ULL, 0ULL, 0ULL}),
-    BasicDecimal256({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}),
-    BasicDecimal256({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}),
-    BasicDecimal256({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}),
-    BasicDecimal256({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}),
-    BasicDecimal256({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}),
-    BasicDecimal256({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}),
-    BasicDecimal256({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}),
-    BasicDecimal256({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}),
-    BasicDecimal256({15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}),
-    BasicDecimal256({11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}),
-    BasicDecimal256({5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}),
-    BasicDecimal256({3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}),
-    BasicDecimal256({40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}),
-    BasicDecimal256({401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}),
-    BasicDecimal256({4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({0ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({932356024711512064ULL, 271ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}),
+    BasicDecimal256FromLE(
         {3225492126871584768ULL, 15731123957095624514ULL, 146936793ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {13808177195006296064ULL, 9737286981279832213ULL, 1469367938ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {8954563434096099328ULL, 5139149444250564057ULL, 14693679385ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {15758658046122786816ULL, 14498006295086537342ULL, 146936793852ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10012627871551455232ULL, 15852854434898512116ULL, 1469367938527ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {7892558346966794240ULL, 10954591759308708237ULL, 14693679385278ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {5138607174829735936ULL, 17312197224539324294ULL, 146936793852785ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {14492583600878256128ULL, 7101275582007278398ULL, 1469367938527859ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {15798627492815699968ULL, 15672523598944129139ULL, 14693679385278593ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10412322338480586752ULL, 9151283399764878470ULL, 146936793852785938ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {11889503016258109440ULL, 17725857702810578241ULL, 1469367938527859384ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {8214565720323784704ULL, 11237880364719817872ULL, 14693679385278593849ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {8358680908399640576ULL, 1698339204940869028ULL, 17809585336819077184ULL, 7ULL}),
-    BasicDecimal256({9799832789158199296ULL, 16983392049408690284ULL,
-                     12075156704804807296ULL, 79ULL}),
-    BasicDecimal256({5764607523034234880ULL, 3813223830700938301ULL,
-                     10071102605790763273ULL, 796ULL}),
-    BasicDecimal256({2305843009213693952ULL, 1238750159590279781ULL,
-                     8477305689359874652ULL, 7965ULL}),
-    BasicDecimal256({4611686018427387904ULL, 12387501595902797811ULL,
-                     10986080598760540056ULL, 79654ULL}),
-    BasicDecimal256({9223372036854775808ULL, 13194551516770668416ULL,
-                     17627085619057642486ULL, 796545ULL}),
-    BasicDecimal256({0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}),
-    BasicDecimal256({0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({9799832789158199296ULL, 16983392049408690284ULL,
+                           12075156704804807296ULL, 79ULL}),
+    BasicDecimal256FromLE({5764607523034234880ULL, 3813223830700938301ULL,
+                           10071102605790763273ULL, 796ULL}),
+    BasicDecimal256FromLE({2305843009213693952ULL, 1238750159590279781ULL,
+                           8477305689359874652ULL, 7965ULL}),
+    BasicDecimal256FromLE({4611686018427387904ULL, 12387501595902797811ULL,
+                           10986080598760540056ULL, 79654ULL}),
+    BasicDecimal256FromLE({9223372036854775808ULL, 13194551516770668416ULL,
+                           17627085619057642486ULL, 796545ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}),
+    BasicDecimal256FromLE(
         {0ULL, 5129504068339011060ULL, 10445028665020693435ULL, 796545955ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 14401552535971007368ULL, 12216566281659176272ULL, 7965459555ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 14888316843743212368ULL, 11485198374334453031ULL, 79654595556ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 1309215847755710752ULL, 4171519301087220622ULL, 796545955566ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 13092158477557107520ULL, 4821704863453102988ULL, 7965459555662ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 1794376259604213888ULL, 11323560487111926655ULL, 79654595556622ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 17943762596042138880ULL, 2555140428861956854ULL, 796545955566226ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 13416929297035424256ULL, 7104660214910016933ULL, 7965459555662261ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 5042084454387381248ULL, 15706369927971514489ULL, 79654595556622613ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 13527356396454709248ULL, 9489746690038731964ULL, 796545955566226138ULL})};
 
+#undef BasicDecimal256FromLE
+
 #ifdef ARROW_USE_NATIVE_INT128
 static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
 #else
@@ -572,20 +595,24 @@ struct uint128_t {
 
 // Multiplies two N * 64 bit unsigned integer types, represented by a uint64_t
 // array into a same sized output. Elements in the array should be in
-// little endian order, and output will be the same. Overflow in multiplication
+// native endian order, and output will be the same. Overflow in multiplication
 // will result in the lower N * 64 bits of the result being set.
 template <int N>
 inline void MultiplyUnsignedArray(const std::array<uint64_t, N>& lh,
                                   const std::array<uint64_t, N>& rh,
                                   std::array<uint64_t, N>* result) {
+  const auto lh_le = BitUtil::LittleEndianArray::Make(lh);
+  const auto rh_le = BitUtil::LittleEndianArray::Make(rh);
+  auto result_le = BitUtil::LittleEndianArray::Make(result);
+
   for (int j = 0; j < N; ++j) {
     uint64_t carry = 0;
     for (int i = 0; i < N - j; ++i) {
-      uint128_t tmp(lh[i]);
-      tmp *= uint128_t(rh[j]);
-      tmp += uint128_t((*result)[i + j]);
+      uint128_t tmp(lh_le[i]);
+      tmp *= uint128_t(rh_le[j]);
+      tmp += uint128_t(result_le[i + j]);
       tmp += uint128_t(carry);
-      (*result)[i + j] = tmp.lo();
+      result_le[i + j] = tmp.lo();
       carry = tmp.hi();
     }
   }
@@ -609,22 +636,23 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
   return *this;
 }
 
-/// Expands the given little endian array of uint64_t into a big endian array of
+/// Expands the given native endian array of uint64_t into a big endian array of
 /// uint32_t. The value of input array is expected to be non-negative. The result_array
 /// will remove leading zeros from the input array.
-/// \param value_array a little endian array to represent the value
+/// \param value_array a native endian array to represent the value
 /// \param result_array a big endian array of length N*2 to set with the value
 /// \result the output length of the array
 template <size_t N>
 static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
                            uint32_t* result_array) {
+  const auto value_array_le = BitUtil::LittleEndianArray::Make(value_array);
   int64_t next_index = 0;
   // 1st loop to find out 1st non-negative value in input
   int64_t i = N - 1;
   for (; i >= 0; i--) {
-    if (value_array[i] != 0) {
-      if (value_array[i] <= std::numeric_limits<uint32_t>::max()) {
-        result_array[next_index++] = static_cast<uint32_t>(value_array[i]);
+    if (value_array_le[i] != 0) {
+      if (value_array_le[i] <= std::numeric_limits<uint32_t>::max()) {
+        result_array[next_index++] = static_cast<uint32_t>(value_array_le[i]);
         i--;
       }
       break;
@@ -632,8 +660,8 @@ static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
   }
   // 2nd loop to fill in the rest of the array.
   for (int64_t j = i; j >= 0; j--) {
-    result_array[next_index++] = static_cast<uint32_t>(value_array[j] >> 32);
-    result_array[next_index++] = static_cast<uint32_t>(value_array[j]);
+    result_array[next_index++] = static_cast<uint32_t>(value_array_le[j] >> 32);
+    result_array[next_index++] = static_cast<uint32_t>(value_array_le[j]);
   }
   return next_index;
 }
@@ -699,7 +727,7 @@ static int64_t FillInArray(const BasicDecimal256& value, uint32_t* array,
     positive_value.Negate();
     was_negative = true;
   }
-  return FillInArray<4>(positive_value.little_endian_array(), array);
+  return FillInArray<4>(positive_value.native_endian_array(), array);
 }
 
 /// Shift the number in the array left by bits positions.
@@ -743,7 +771,7 @@ static inline void FixDivisionSigns(DecimalClass* result, DecimalClass* remainde
   }
 }
 
-/// \brief Build a little endian array of uint64_t from a big endian array of uint32_t.
+/// \brief Build a native endian array of uint64_t from a big endian array of uint32_t.
 template <size_t N>
 static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
                                     const uint32_t* array, int64_t length) {
@@ -754,15 +782,16 @@ static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
   }
   int64_t next_index = length - 1;
   size_t i = 0;
+  auto result_array_le = BitUtil::LittleEndianArray::Make(result_array);
   for (; i < N && next_index >= 0; i++) {
     uint64_t lower_bits = array[next_index--];
-    (*result_array)[i] =
+    result_array_le[i] =
         (next_index < 0)
             ? lower_bits
             : ((static_cast<uint64_t>(array[next_index--]) << 32) + lower_bits);
   }
   for (; i < N; i++) {
-    (*result_array)[i] = 0;
+    result_array_le[i] = 0;
   }
   return DecimalStatus::kSuccess;
 }
@@ -775,7 +804,8 @@ static DecimalStatus BuildFromArray(BasicDecimal128* value, const uint32_t* arra
   if (status != DecimalStatus::kSuccess) {
     return status;
   }
-  *value = {static_cast<int64_t>(result_array[1]), result_array[0]};
+  const auto result_array_le = BitUtil::LittleEndianArray::Make(result_array);
+  *value = {static_cast<int64_t>(result_array_le[1]), result_array_le[0]};
   return DecimalStatus::kSuccess;
 }
 
@@ -800,7 +830,7 @@ static inline DecimalStatus SingleDivide(const uint32_t* dividend,
                                          bool divisor_was_negative,
                                          DecimalClass* result) {
   uint64_t r = 0;
-  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t) + 1;
+  constexpr int64_t kDecimalArrayLength = DecimalClass::kBitWidth / sizeof(uint32_t) + 1;
   uint32_t result_array[kDecimalArrayLength];
   for (int64_t j = 0; j < dividend_length; j++) {
     r <<= 32;
@@ -823,7 +853,7 @@ template <class DecimalClass>
 static inline DecimalStatus DecimalDivide(const DecimalClass& dividend,
                                           const DecimalClass& divisor,
                                           DecimalClass* result, DecimalClass* remainder) {
-  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t);
+  constexpr int64_t kDecimalArrayLength = DecimalClass::kBitWidth / sizeof(uint32_t);
   // Split the dividend and divisor into integer pieces so that we can
   // work on them.
   uint32_t dividend_array[kDecimalArrayLength + 1];
@@ -1117,25 +1147,17 @@ int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
   }
 }
 
-#if ARROW_LITTLE_ENDIAN
-BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
-    : little_endian_array_(
-          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[0],
-                                   reinterpret_cast<const uint64_t*>(bytes)[1],
-                                   reinterpret_cast<const uint64_t*>(bytes)[2],
-                                   reinterpret_cast<const uint64_t*>(bytes)[3]})) {}
-#else
 BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
-    : little_endian_array_(
-          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[3],
-                                   reinterpret_cast<const uint64_t*>(bytes)[2],
-                                   reinterpret_cast<const uint64_t*>(bytes)[1],
-                                   reinterpret_cast<const uint64_t*>(bytes)[0]})) {}
-#endif
+    : array_({reinterpret_cast<const uint64_t*>(bytes)[0],
+              reinterpret_cast<const uint64_t*>(bytes)[1],
+              reinterpret_cast<const uint64_t*>(bytes)[2],
+              reinterpret_cast<const uint64_t*>(bytes)[3]}) {}
 
 BasicDecimal256& BasicDecimal256::Negate() {
+  auto array_le = BitUtil::LittleEndianArray::Make(&array_);
   uint64_t carry = 1;
-  for (uint64_t& elem : little_endian_array_) {
+  for (size_t i = 0; i < array_.size(); ++i) {
+    uint64_t& elem = array_le[i];
     elem = ~elem + carry;
     carry &= (elem == 0);
   }
@@ -1150,19 +1172,21 @@ BasicDecimal256 BasicDecimal256::Abs(const BasicDecimal256& in) {
 }
 
 BasicDecimal256& BasicDecimal256::operator+=(const BasicDecimal256& right) {
+  auto array_le = BitUtil::LittleEndianArray::Make(&array_);
+  const auto right_array_le = BitUtil::LittleEndianArray::Make(right.array_);
   uint64_t carry = 0;
-  for (size_t i = 0; i < little_endian_array_.size(); i++) {
-    const uint64_t right_value = right.little_endian_array_[i];
+  for (size_t i = 0; i < array_.size(); i++) {
+    const uint64_t right_value = right_array_le[i];
     uint64_t sum = right_value + carry;
     carry = 0;
     if (sum < right_value) {
       carry += 1;
     }
-    sum += little_endian_array_[i];
-    if (sum < little_endian_array_[i]) {
+    sum += array_le[i];
+    if (sum < array_le[i]) {
       carry += 1;
     }
-    little_endian_array_[i] = sum;
+    array_le[i] = sum;
   }
   return *this;
 }
@@ -1177,23 +1201,22 @@ BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) {
     return *this;
   }
   int cross_word_shift = bits / 64;
-  if (static_cast<size_t>(cross_word_shift) >= little_endian_array_.size()) {
-    little_endian_array_ = {0, 0, 0, 0};
+  if (static_cast<size_t>(cross_word_shift) >= array_.size()) {
+    array_ = {0, 0, 0, 0};
     return *this;
   }
   uint32_t in_word_shift = bits % 64;
-  for (int i = static_cast<int>(little_endian_array_.size() - 1); i >= cross_word_shift;
-       i--) {
+  auto array_le = BitUtil::LittleEndianArray::Make(&array_);
+  for (int i = static_cast<int>(array_.size() - 1); i >= cross_word_shift; i--) {
     // Account for shifts larger then 64 bits
-    little_endian_array_[i] = little_endian_array_[i - cross_word_shift];
-    little_endian_array_[i] <<= in_word_shift;
+    array_le[i] = array_le[i - cross_word_shift];
+    array_le[i] <<= in_word_shift;
     if (in_word_shift != 0 && i >= cross_word_shift + 1) {
-      little_endian_array_[i] |=
-          little_endian_array_[i - (cross_word_shift + 1)] >> (64 - in_word_shift);
+      array_le[i] |= array_le[i - (cross_word_shift + 1)] >> (64 - in_word_shift);
     }
   }
   for (int i = cross_word_shift - 1; i >= 0; i--) {
-    little_endian_array_[i] = 0;
+    array_le[i] = 0;
   }
   return *this;
 }
@@ -1206,17 +1229,10 @@ std::array<uint8_t, 32> BasicDecimal256::ToBytes() const {
 
 void BasicDecimal256::ToBytes(uint8_t* out) const {
   DCHECK_NE(out, nullptr);
-#if ARROW_LITTLE_ENDIAN
-  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[0];
-  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[1];
-  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[2];
-  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[3];
-#else
-  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[3];
-  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[2];
-  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[1];
-  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[0];
-#endif
+  reinterpret_cast<uint64_t*>(out)[0] = array_[0];
+  reinterpret_cast<uint64_t*>(out)[1] = array_[1];
+  reinterpret_cast<uint64_t*>(out)[2] = array_[2];
+  reinterpret_cast<uint64_t*>(out)[3] = array_[3];
 }
 
 BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
@@ -1229,8 +1245,8 @@ BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
   uint128_t r_hi;
   uint128_t r_lo;
   std::array<uint64_t, 4> res{0, 0, 0, 0};
-  MultiplyUnsignedArray<4>(x.little_endian_array_, y.little_endian_array_, &res);
-  little_endian_array_ = res;
+  MultiplyUnsignedArray<4>(x.array_, y.array_, &res);
+  array_ = res;
   if (negate) {
     Negate();
   }
@@ -1301,12 +1317,13 @@ BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256& ri
 }
 
 bool operator<(const BasicDecimal256& left, const BasicDecimal256& right) {
-  const std::array<uint64_t, 4>& lhs = left.little_endian_array();
-  const std::array<uint64_t, 4>& rhs = right.little_endian_array();
-  return lhs[3] != rhs[3]
-             ? static_cast<int64_t>(lhs[3]) < static_cast<int64_t>(rhs[3])
-             : lhs[2] != rhs[2] ? lhs[2] < rhs[2]
-                                : lhs[1] != rhs[1] ? lhs[1] < rhs[1] : lhs[0] < rhs[0];
+  const auto lhs_le = BitUtil::LittleEndianArray::Make(left.native_endian_array());
+  const auto rhs_le = BitUtil::LittleEndianArray::Make(right.native_endian_array());
+  return lhs_le[3] != rhs_le[3]
+             ? static_cast<int64_t>(lhs_le[3]) < static_cast<int64_t>(rhs_le[3])
+             : lhs_le[2] != rhs_le[2] ? lhs_le[2] < rhs_le[2]
+                                      : lhs_le[1] != rhs_le[1] ? lhs_le[1] < rhs_le[1]
+                                                               : lhs_le[0] < rhs_le[0];
 }
 
 BasicDecimal256 operator-(const BasicDecimal256& operand) {
@@ -1315,7 +1332,7 @@ BasicDecimal256 operator-(const BasicDecimal256& operand) {
 }
 
 BasicDecimal256 operator~(const BasicDecimal256& operand) {
-  const std::array<uint64_t, 4>& arr = operand.little_endian_array();
+  const std::array<uint64_t, 4>& arr = operand.native_endian_array();
   BasicDecimal256 result({~arr[0], ~arr[1], ~arr[2], ~arr[3]});
   return result;
 }
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
index d2e37db0cc1..745d8ac4602 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -23,6 +23,7 @@
 #include <string>
 #include <type_traits>
 
+#include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/type_traits.h"
 #include "arrow/util/visibility.h"
@@ -41,12 +42,41 @@ enum class DecimalStatus {
 /// This class is also compiled into LLVM IR - so, it should not have cpp references like
 /// streams and boost.
 class ARROW_EXPORT BasicDecimal128 {
+  struct LittleEndianArrayTag {};
+
  public:
-  static constexpr int bit_width = 128;
+  static constexpr int kBitWidth = 128;
+  static constexpr int kMaxPrecision = 38;
+  static constexpr int kMaxScale = 38;
+
+  // A constructor tag to introduce a little-endian encoded array
+  static constexpr LittleEndianArrayTag LittleEndianArray{};
 
   /// \brief Create a BasicDecimal128 from the two's complement representation.
+#if ARROW_LITTLE_ENDIAN
   constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
       : low_bits_(low), high_bits_(high) {}
+#else
+  constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
+      : high_bits_(high), low_bits_(low) {}
+#endif
+
+  /// \brief Create a BasicDecimal256 from the two's complement representation.
+  ///
+  /// Input array is assumed to be in native endianness.
+#if ARROW_LITTLE_ENDIAN
+  constexpr BasicDecimal128(const std::array<uint64_t, 2>& array) noexcept
+      : low_bits_(array[0]), high_bits_(static_cast<int64_t>(array[1])) {}
+#else
+  constexpr BasicDecimal128(const std::array<uint64_t, 2>& array) noexcept
+      : high_bits_(static_cast<int64_t>(array[0])), low_bits_(array[1]) {}
+#endif
+
+  /// \brief Create a BasicDecimal256 from the two's complement representation.
+  ///
+  /// Input array is assumed to be in little endianness, with native endian elements.
+  BasicDecimal128(LittleEndianArrayTag, const std::array<uint64_t, 2>& array) noexcept
+      : BasicDecimal128(BitUtil::LittleEndianArray::ToNative(array)) {}
 
   /// \brief Empty constructor creates a BasicDecimal128 with a value of 0.
   constexpr BasicDecimal128() noexcept : BasicDecimal128(0, 0) {}
@@ -116,6 +146,30 @@ class ARROW_EXPORT BasicDecimal128 {
   /// \brief Get the low bits of the two's complement representation of the number.
   inline constexpr uint64_t low_bits() const { return low_bits_; }
 
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 2 elements are in native endian order. The bits within each uint64_t element
+  /// are in native endian order. For example, on a little endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {123, 0};
+  /// but on a big endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {0, 123};
+  inline std::array<uint64_t, 2> native_endian_array() const {
+#if ARROW_LITTLE_ENDIAN
+    return {low_bits_, static_cast<uint64_t>(high_bits_)};
+#else
+    return {static_cast<uint64_t>(high_bits_), low_bits_};
+#endif
+  }
+
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 2 elements are in little endian order. However, the bits within each
+  /// uint64_t element are in native endian order.
+  /// For example, BasicDecimal128(123).little_endian_array() = {123, 0};
+  inline std::array<uint64_t, 2> little_endian_array() const {
+    return {low_bits_, static_cast<uint64_t>(high_bits_)};
+  }
+
   /// \brief Return the raw bytes of the value in native-endian byte order.
   std::array<uint8_t, 16> ToBytes() const;
   void ToBytes(uint8_t* out) const;
@@ -156,8 +210,13 @@ class ARROW_EXPORT BasicDecimal128 {
   static const BasicDecimal128& GetMaxValue();
 
  private:
+#if ARROW_LITTLE_ENDIAN
   uint64_t low_bits_;
   int64_t high_bits_;
+#else
+  int64_t high_bits_;
+  uint64_t low_bits_;
+#endif
 };
 
 ARROW_EXPORT bool operator==(const BasicDecimal128& left, const BasicDecimal128& right);
@@ -189,27 +248,44 @@ class ARROW_EXPORT BasicDecimal256 {
     return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
   }
 
+  struct LittleEndianArrayTag {};
+
  public:
-  static constexpr int bit_width = 256;
+  static constexpr int kBitWidth = 256;
+  static constexpr int kMaxPrecision = 76;
+  static constexpr int kMaxScale = 76;
+
+  // A constructor tag to denote a little-endian encoded array
+  static constexpr LittleEndianArrayTag LittleEndianArray{};
 
   /// \brief Create a BasicDecimal256 from the two's complement representation.
-  constexpr BasicDecimal256(const std::array<uint64_t, 4>& little_endian_array) noexcept
-      : little_endian_array_(little_endian_array) {}
+  ///
+  /// Input array is assumed to be in native endianness.
+  constexpr BasicDecimal256(const std::array<uint64_t, 4>& array) noexcept
+      : array_(array) {}
+
+  /// \brief Create a BasicDecimal256 from the two's complement representation.
+  ///
+  /// Input array is assumed to be in little endianness, with native endian elements.
+  BasicDecimal256(LittleEndianArrayTag, const std::array<uint64_t, 4>& array) noexcept
+      : BasicDecimal256(BitUtil::LittleEndianArray::ToNative(array)) {}
 
   /// \brief Empty constructor creates a BasicDecimal256 with a value of 0.
-  constexpr BasicDecimal256() noexcept : little_endian_array_({0, 0, 0, 0}) {}
+  constexpr BasicDecimal256() noexcept : array_({0, 0, 0, 0}) {}
 
   /// \brief Convert any integer value into a BasicDecimal256.
   template <typename T,
             typename = typename std::enable_if<
                 std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
   constexpr BasicDecimal256(T value) noexcept
-      : little_endian_array_({static_cast<uint64_t>(value), extend(value), extend(value),
-                              extend(value)}) {}
+      : array_(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(
+            {static_cast<uint64_t>(value), extend(value), extend(value),
+             extend(value)})) {}
 
-  constexpr BasicDecimal256(const BasicDecimal128& value) noexcept
-      : little_endian_array_({value.low_bits(), static_cast<uint64_t>(value.high_bits()),
-                              extend(value.high_bits()), extend(value.high_bits())}) {}
+  explicit BasicDecimal256(const BasicDecimal128& value) noexcept
+      : array_(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(
+            {value.low_bits(), static_cast<uint64_t>(value.high_bits()),
+             extend(value.high_bits()), extend(value.high_bits())})) {}
 
   /// \brief Create a BasicDecimal256 from an array of bytes. Bytes are assumed to be in
   /// native-endian byte order.
@@ -230,18 +306,30 @@ class ARROW_EXPORT BasicDecimal256 {
   /// \brief Subtract a number from this one. The result is truncated to 256 bits.
   BasicDecimal256& operator-=(const BasicDecimal256& right);
 
-  /// \brief Get the bits of the two's complement representation of the number. The 4
-  /// elements are in little endian order. The bits within each uint64_t element are in
-  /// native endian order. For example,
-  /// BasicDecimal256(123).little_endian_array() = {123, 0, 0, 0};
-  /// BasicDecimal256(-2).little_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 4 elements are in native endian order. The bits within each uint64_t element
+  /// are in native endian order. For example, on a little endian machine,
+  ///   BasicDecimal256(123).native_endian_array() = {123, 0, 0, 0};
+  ///   BasicDecimal256(-2).native_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
   /// 0xFF...FF}.
-  inline const std::array<uint64_t, 4>& little_endian_array() const {
-    return little_endian_array_;
+  /// while on a big endian machine,
+  ///   BasicDecimal256(123).native_endian_array() = {0, 0, 0, 123};
+  ///   BasicDecimal256(-2).native_endian_array() = {0xFF...FF, 0xFF...FF, 0xFF...FF,
+  /// 0xFF...FE}.
+  inline const std::array<uint64_t, 4>& native_endian_array() const { return array_; }
+
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 4 elements are in little endian order. However, the bits within each
+  /// uint64_t element are in native endian order.
+  /// For example, BasicDecimal256(123).little_endian_array() = {123, 0};
+  inline const std::array<uint64_t, 4> little_endian_array() const {
+    return BitUtil::LittleEndianArray::FromNative(array_);
   }
 
   /// \brief Get the lowest bits of the two's complement representation of the number.
-  inline constexpr uint64_t low_bits() const { return little_endian_array_[0]; }
+  inline uint64_t low_bits() const { return BitUtil::LittleEndianArray::Make(array_)[0]; }
 
   /// \brief Return the raw bytes of the value in native-endian byte order.
   std::array<uint8_t, 32> ToBytes() const;
@@ -270,11 +358,11 @@ class ARROW_EXPORT BasicDecimal256 {
   bool FitsInPrecision(int32_t precision) const;
 
   inline int64_t Sign() const {
-    return 1 | (static_cast<int64_t>(little_endian_array_[3]) >> 63);
+    return 1 | (static_cast<int64_t>(BitUtil::LittleEndianArray::Make(array_)[3]) >> 63);
   }
 
   inline int64_t IsNegative() const {
-    return static_cast<int64_t>(little_endian_array_[3]) < 0;
+    return static_cast<int64_t>(BitUtil::LittleEndianArray::Make(array_)[3]) < 0;
   }
 
   /// \brief Multiply this number by another number. The result is truncated to 256 bits.
@@ -293,6 +381,7 @@ class ARROW_EXPORT BasicDecimal256 {
   /// \param[out] remainder the remainder after the division
   DecimalStatus Divide(const BasicDecimal256& divisor, BasicDecimal256* result,
                        BasicDecimal256* remainder) const;
+
   /// \brief Shift left by the given number of bits.
   BasicDecimal256& operator<<=(uint32_t bits);
 
@@ -300,17 +389,17 @@ class ARROW_EXPORT BasicDecimal256 {
   BasicDecimal256& operator/=(const BasicDecimal256& right);
 
  private:
-  std::array<uint64_t, 4> little_endian_array_;
+  std::array<uint64_t, 4> array_;
 };
 
 ARROW_EXPORT inline bool operator==(const BasicDecimal256& left,
                                     const BasicDecimal256& right) {
-  return left.little_endian_array() == right.little_endian_array();
+  return left.native_endian_array() == right.native_endian_array();
 }
 
 ARROW_EXPORT inline bool operator!=(const BasicDecimal256& left,
                                     const BasicDecimal256& right) {
-  return left.little_endian_array() != right.little_endian_array();
+  return left.native_endian_array() != right.native_endian_array();
 }
 
 ARROW_EXPORT bool operator<(const BasicDecimal256& left, const BasicDecimal256& right);
diff --git a/cpp/src/arrow/util/bit_block_counter.cc b/cpp/src/arrow/util/bit_block_counter.cc
index c67cedc4a06..7b5590f1797 100644
--- a/cpp/src/arrow/util/bit_block_counter.cc
+++ b/cpp/src/arrow/util/bit_block_counter.cc
@@ -37,19 +37,12 @@ BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) noexcept {
   return {run_length, popcount};
 }
 
-// Prevent pointer arithmetic on nullptr, which is undefined behavior even if the pointer
-// is never dereferenced.
-inline const uint8_t* EnsureNotNull(const uint8_t* ptr) {
-  static const uint8_t byte{};
-  return ptr == nullptr ? &byte : ptr;
-}
-
 OptionalBitBlockCounter::OptionalBitBlockCounter(const uint8_t* validity_bitmap,
                                                  int64_t offset, int64_t length)
     : has_bitmap_(validity_bitmap != nullptr),
       position_(0),
       length_(length),
-      counter_(EnsureNotNull(validity_bitmap), offset, length) {}
+      counter_(util::MakeNonNull(validity_bitmap), offset, length) {}
 
 OptionalBitBlockCounter::OptionalBitBlockCounter(
     const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset, int64_t length)
@@ -64,10 +57,11 @@ OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(const uint8_t* left
     : has_bitmap_(HasBitmapFromBitmaps(left_bitmap != nullptr, right_bitmap != nullptr)),
       position_(0),
       length_(length),
-      unary_counter_(EnsureNotNull(left_bitmap != nullptr ? left_bitmap : right_bitmap),
-                     left_bitmap != nullptr ? left_offset : right_offset, length),
-      binary_counter_(EnsureNotNull(left_bitmap), left_offset,
-                      EnsureNotNull(right_bitmap), right_offset, length) {}
+      unary_counter_(
+          util::MakeNonNull(left_bitmap != nullptr ? left_bitmap : right_bitmap),
+          left_bitmap != nullptr ? left_offset : right_offset, length),
+      binary_counter_(util::MakeNonNull(left_bitmap), left_offset,
+                      util::MakeNonNull(right_bitmap), right_offset, length) {}
 
 OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(
     const std::shared_ptr<Buffer>& left_bitmap, int64_t left_offset,
diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h
index 803b825e1b2..63036af52a4 100644
--- a/cpp/src/arrow/util/bit_block_counter.h
+++ b/cpp/src/arrow/util/bit_block_counter.h
@@ -57,6 +57,16 @@ struct BitBlockAnd<bool> {
   static bool Call(bool left, bool right) { return left && right; }
 };
 
+template <typename T>
+struct BitBlockAndNot {
+  static T Call(T left, T right) { return left & ~right; }
+};
+
+template <>
+struct BitBlockAndNot<bool> {
+  static bool Call(bool left, bool right) { return left && !right; }
+};
+
 template <typename T>
 struct BitBlockOr {
   static T Call(T left, T right) { return left | right; }
@@ -266,6 +276,9 @@ class ARROW_EXPORT BinaryBitBlockCounter {
   /// blocks in subsequent invocations.
   BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); }
 
+  /// \brief Computes "x & ~y" block for each available run of bits.
+  BitBlockCount NextAndNotWord() { return NextWord<detail::BitBlockAndNot>(); }
+
   /// \brief Computes "x | y" block for each available run of bits.
   BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); }
 
diff --git a/cpp/src/arrow/util/bit_run_reader.h b/cpp/src/arrow/util/bit_run_reader.h
index 5933ccf3b9a..ed9f4fa867a 100644
--- a/cpp/src/arrow/util/bit_run_reader.h
+++ b/cpp/src/arrow/util/bit_run_reader.h
@@ -197,7 +197,7 @@ class BaseSetBitRunReader {
   /// \param[in] length number of bits to copy
   ARROW_NOINLINE
   BaseSetBitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
-      : bitmap_(bitmap),
+      : bitmap_(util::MakeNonNull(bitmap)),
         length_(length),
         remaining_(length_),
         current_word_(0),
@@ -462,8 +462,8 @@ using ReverseSetBitRunReader = BaseSetBitRunReader</*Reverse=*/true>;
 // - don't inline SetBitRunReader constructor, it doesn't hurt performance
 // - un-inline NextRun hurts 'many null' cases a bit, but improves normal cases
 template <typename Visit>
-Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
-                       Visit&& visit) {
+inline Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
+                              Visit&& visit) {
   if (bitmap == NULLPTR) {
     // Assuming all set (as in a null bitmap)
     return visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
@@ -480,8 +480,8 @@ Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
 }
 
 template <typename Visit>
-void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
-                         Visit&& visit) {
+inline void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
+                                Visit&& visit) {
   if (bitmap == NULLPTR) {
     // Assuming all set (as in a null bitmap)
     visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
@@ -498,15 +498,15 @@ void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
 }
 
 template <typename Visit>
-Status VisitSetBitRuns(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
-                       int64_t length, Visit&& visit) {
+inline Status VisitSetBitRuns(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                              int64_t length, Visit&& visit) {
   return VisitSetBitRuns(bitmap ? bitmap->data() : NULLPTR, offset, length,
                          std::forward<Visit>(visit));
 }
 
 template <typename Visit>
-void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
-                         int64_t length, Visit&& visit) {
+inline void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                                int64_t length, Visit&& visit) {
   VisitSetBitRunsVoid(bitmap ? bitmap->data() : NULLPTR, offset, length,
                       std::forward<Visit>(visit));
 }
diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc
index 6e23678ddf9..ee4bcde7713 100644
--- a/cpp/src/arrow/util/bit_util.cc
+++ b/cpp/src/arrow/util/bit_util.cc
@@ -20,6 +20,8 @@
 #include <cstdint>
 #include <cstring>
 
+#include "arrow/util/logging.h"
+
 namespace arrow {
 namespace BitUtil {
 
@@ -67,5 +69,59 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar
   bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
 }
 
+template <bool value>
+void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
+  //                 offset  length
+  // data              |<------------->|
+  //   |--------|...|--------|...|--------|
+  //                   |<--->|   |<--->|
+  //                     pro       epi
+  if (ARROW_PREDICT_FALSE(length == 0)) {
+    return;
+  }
+
+  constexpr uint8_t set_byte = value ? UINT8_MAX : 0;
+
+  auto prologue = static_cast<int32_t>(BitUtil::RoundUp(offset, 8) - offset);
+  DCHECK_LT(prologue, 8);
+
+  if (length < prologue) {  // special case where a mask is required
+    //             offset length
+    // data             |<->|
+    //   |--------|...|--------|...
+    //         mask --> |111|
+    //                  |<---->|
+    //                     pro
+    uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^
+                   BitUtil::kPrecedingBitmask[8 - prologue + length];
+    data[offset / 8] = value ? data[offset / 8] | mask : data[offset / 8] & ~mask;
+    return;
+  }
+
+  // align to a byte boundary
+  data[offset / 8] = BitUtil::SpliceWord(8 - prologue, data[offset / 8], set_byte);
+  offset += prologue;
+  length -= prologue;
+
+  // set values per byte
+  DCHECK_EQ(offset % 8, 0);
+  std::memset(data + offset / 8, set_byte, length / 8);
+  offset += BitUtil::RoundDown(length, 8);
+  length -= BitUtil::RoundDown(length, 8);
+
+  // clean up
+  DCHECK_LT(length, 8);
+  data[offset / 8] =
+      BitUtil::SpliceWord(static_cast<int32_t>(length), set_byte, data[offset / 8]);
+}
+
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length) {
+  SetBitmapImpl<true>(data, offset, length);
+}
+
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) {
+  SetBitmapImpl<false>(data, offset, length);
+}
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 01845791faa..c306ce7821b 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -290,12 +290,14 @@ static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63
 // the bitwise complement version of kPrecedingBitmask
 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
 
-static inline bool GetBit(const uint8_t* bits, uint64_t i) {
+static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {
   return (bits[i >> 3] >> (i & 0x07)) & 1;
 }
 
 // Gets the i-th bit from a byte. Should only be used with i <= 7.
-static inline bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; }
+static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
+  return byte & kBitmask[i];
+}
 
 static inline void ClearBit(uint8_t* bits, int64_t i) {
   bits[i / 8] &= kFlippedBitmask[i % 8];
@@ -316,5 +318,37 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
 ARROW_EXPORT
 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
 
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// \brief Clears all bits in the bitmap (set to false)
+ARROW_EXPORT
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be
+/// returned
+/// ex:
+/// ref: https://stackoverflow.com/a/59523400
+template <typename Word>
+constexpr Word PrecedingWordBitmask(unsigned int const i) {
+  return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1;
+}
+static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
+static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
+static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
+static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
+
+/// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits
+/// from `high`.
+/// Word ret
+/// for (i = 0; i < sizeof(Word)*8; i++){
+///     ret[i]= i < n ? low[i]: high[i];
+/// }
+template <typename Word>
+constexpr Word SpliceWord(int n, Word low, Word high) {
+  return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n));
+}
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc
index e5a5e4c39be..ded37398f95 100644
--- a/cpp/src/arrow/util/bit_util_test.cc
+++ b/cpp/src/arrow/util/bit_util_test.cc
@@ -1532,6 +1532,43 @@ TEST(BitUtilTests, TestSetBitsTo) {
   }
 }
 
+TEST(BitUtilTests, TestSetBitmap) {
+  using BitUtil::SetBitsTo;
+  for (const auto fill_byte_int : {0xff}) {
+    const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
+    {
+      // test set within a byte
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::SetBitmap(bitmap, 2, 2);
+      BitUtil::ClearBitmap(bitmap, 4, 2);
+      ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & ~0x3C) | 0xC)});
+    }
+    {
+      // test straddling a single byte boundary
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::SetBitmap(bitmap, 4, 7);
+      BitUtil::ClearBitmap(bitmap, 11, 7);
+      ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x7,
+                               static_cast<uint8_t>(fill_byte & ~0x3)});
+    }
+    {
+      // test byte aligned end
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::SetBitmap(bitmap, 4, 4);
+      BitUtil::ClearBitmap(bitmap, 8, 8);
+      ASSERT_BYTES_EQ(bitmap,
+                      {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x00, fill_byte});
+    }
+    {
+      // test byte aligned end, multiple bytes
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::ClearBitmap(bitmap, 0, 24);
+      uint8_t false_byte = static_cast<uint8_t>(0);
+      ASSERT_BYTES_EQ(bitmap, {false_byte, false_byte, false_byte, fill_byte});
+    }
+  }
+}
+
 TEST(BitUtilTests, TestCopyBitmap) {
   const int kBufferSize = 1000;
 
@@ -1975,6 +2012,34 @@ TEST(BitUtil, BitsetStack) {
   ASSERT_EQ(stack.TopSize(), 0);
 }
 
+TEST(SpliceWord, SpliceWord) {
+  static_assert(
+      BitUtil::PrecedingWordBitmask<uint8_t>(0) == BitUtil::kPrecedingBitmask[0], "");
+  static_assert(
+      BitUtil::PrecedingWordBitmask<uint8_t>(5) == BitUtil::kPrecedingBitmask[5], "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint8_t>(8) == UINT8_MAX, "");
+
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(0) == uint64_t(0), "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(33) == 8589934591, "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(64) == UINT64_MAX, "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(65) == UINT64_MAX, "");
+
+  ASSERT_EQ(BitUtil::SpliceWord<uint8_t>(0, 0x12, 0xef), 0xef);
+  ASSERT_EQ(BitUtil::SpliceWord<uint8_t>(8, 0x12, 0xef), 0x12);
+  ASSERT_EQ(BitUtil::SpliceWord<uint8_t>(3, 0x12, 0xef), 0xea);
+
+  ASSERT_EQ(BitUtil::SpliceWord<uint32_t>(0, 0x12345678, 0xfedcba98), 0xfedcba98);
+  ASSERT_EQ(BitUtil::SpliceWord<uint32_t>(32, 0x12345678, 0xfedcba98), 0x12345678);
+  ASSERT_EQ(BitUtil::SpliceWord<uint32_t>(24, 0x12345678, 0xfedcba98), 0xfe345678);
+
+  ASSERT_EQ(BitUtil::SpliceWord<uint64_t>(0, 0x0123456789abcdef, 0xfedcba9876543210),
+            0xfedcba9876543210);
+  ASSERT_EQ(BitUtil::SpliceWord<uint64_t>(64, 0x0123456789abcdef, 0xfedcba9876543210),
+            0x0123456789abcdef);
+  ASSERT_EQ(BitUtil::SpliceWord<uint64_t>(48, 0x0123456789abcdef, 0xfedcba9876543210),
+            0xfedc456789abcdef);
+}
+
 // test the basic assumption of word level Bitmap::Visit
 TEST(Bitmap, ShiftingWordsOptimization) {
   // single word
@@ -2156,5 +2221,72 @@ TEST(Bitmap, VisitWordsAnd) {
   }
 }
 
+void DoBitmapVisitAndWrite(int64_t part, bool with_offset) {
+  int64_t bits = part * 4;
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  auto arrow_data = rand.ArrayOf(boolean(), bits, 0);
+
+  std::shared_ptr<Buffer>& arrow_buffer = arrow_data->data()->buffers[1];
+
+  Bitmap bm0(arrow_buffer, 0, part);
+  Bitmap bm1(arrow_buffer, part * 1, part);
+  Bitmap bm2(arrow_buffer, part * 2, part);
+
+  std::array<Bitmap, 2> out_bms;
+  if (with_offset) {
+    ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4));
+    out_bms[0] = Bitmap(out, part, part);
+    out_bms[1] = Bitmap(out, part * 2, part);
+  } else {
+    ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part));
+    ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part));
+    out_bms[0] = Bitmap(out0, 0, part);
+    out_bms[1] = Bitmap(out1, 0, part);
+  }
+
+  // out0 = bm0 & bm1, out1= bm0 | bm2
+  std::array<Bitmap, 3> in_bms{bm0, bm1, bm2};
+  Bitmap::VisitWordsAndWrite(
+      in_bms, &out_bms,
+      [](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+        out->at(0) = in[0] & in[1];
+        out->at(1) = in[0] | in[2];
+      });
+
+  auto pool = MemoryPool::CreateDefault();
+  ASSERT_OK_AND_ASSIGN(auto exp_0,
+                       BitmapAnd(pool.get(), bm0.buffer()->data(), bm0.offset(),
+                                 bm1.buffer()->data(), bm1.offset(), part, 0));
+  ASSERT_OK_AND_ASSIGN(auto exp_1,
+                       BitmapOr(pool.get(), bm0.buffer()->data(), bm0.offset(),
+                                bm2.buffer()->data(), bm2.offset(), part, 0));
+
+  ASSERT_TRUE(BitmapEquals(exp_0->data(), 0, out_bms[0].buffer()->data(),
+                           out_bms[0].offset(), part))
+      << "exp: " << Bitmap(exp_0->data(), 0, part).ToString() << std::endl
+      << "got: " << out_bms[0].ToString();
+
+  ASSERT_TRUE(BitmapEquals(exp_1->data(), 0, out_bms[1].buffer()->data(),
+                           out_bms[1].offset(), part))
+      << "exp: " << Bitmap(exp_1->data(), 0, part).ToString() << std::endl
+      << "got: " << out_bms[1].ToString();
+}
+
+class TestBitmapVisitAndWrite : public ::testing::TestWithParam<int32_t> {};
+
+INSTANTIATE_TEST_SUITE_P(VisitWriteGeneral, TestBitmapVisitAndWrite,
+                         testing::Values(199, 256, 1000));
+
+INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases, TestBitmapVisitAndWrite,
+                         testing::Values(5, 13, 21, 29, 37, 41, 51, 59, 64, 97));
+
+INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases2, TestBitmapVisitAndWrite,
+                         testing::Values(8, 16, 24, 32, 40, 48, 56, 64));
+
+TEST_P(TestBitmapVisitAndWrite, NoOffset) { DoBitmapVisitAndWrite(GetParam(), false); }
+
+TEST_P(TestBitmapVisitAndWrite, WithOffset) { DoBitmapVisitAndWrite(GetParam(), true); }
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h
index 8562c55e3d5..141f863c0b8 100644
--- a/cpp/src/arrow/util/bitmap.h
+++ b/cpp/src/arrow/util/bitmap.h
@@ -29,6 +29,9 @@
 
 #include "arrow/buffer.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/bitmap_writer.h"
 #include "arrow/util/compare.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/functional.h"
@@ -109,6 +112,21 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     }
   }
 
+  /// \brief Visit bits from each bitmap as bitset<N>
+  ///
+  /// All bitmaps must have identical length.
+  template <size_t N, typename Visitor>
+  static void VisitBits(const std::array<Bitmap, N>& bitmaps, Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps);
+    std::bitset<N> bits;
+    for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) {
+      for (size_t i = 0; i < N; ++i) {
+        bits[i] = bitmaps[i].GetBit(bit_i);
+      }
+      visitor(bits);
+    }
+  }
+
   /// \brief Visit words of bits from each bitmap as array<Word, N>
   ///
   /// All bitmaps must have identical length. The first bit in a visited bitmap
@@ -225,6 +243,132 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     return min_offset;
   }
 
+  template <size_t N, size_t M, typename ReaderT, typename WriterT, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void RunVisitWordsAndWriteLoop(int64_t bit_length,
+                                        std::array<ReaderT, N>& readers,
+                                        std::array<WriterT, M>& writers,
+                                        Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    std::array<Word, N> visited_words;
+    std::array<Word, M> output_words;
+
+    // every reader will have same number of words, since they are same length'ed
+    // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond
+    //  Word boundary, every Word would have to be created from 2 adjoining Words
+    auto n_words = readers[0].words();
+    bit_length -= n_words * kBitWidth;
+    while (n_words--) {
+      // first collect all words to visited_words array
+      for (size_t i = 0; i < N; i++) {
+        visited_words[i] = readers[i].NextWord();
+      }
+      visitor(visited_words, &output_words);
+      for (size_t i = 0; i < M; i++) {
+        writers[i].PutNextWord(output_words[i]);
+      }
+    }
+
+    // every reader will have same number of trailing bytes, because of the above reason
+    // tailing portion could be more than one word! (ref: BitmapWordReader constructor)
+    // remaining full/ partial words to write
+
+    if (bit_length) {
+      // convert the word visitor lambda to a byte_visitor
+      auto byte_visitor = [&](const std::array<uint8_t, N>& in,
+                              std::array<uint8_t, M>* out) {
+        std::array<Word, N> in_words;
+        std::array<Word, M> out_words;
+        std::copy(in.begin(), in.end(), in_words.begin());
+        visitor(in_words, &out_words);
+        for (size_t i = 0; i < M; i++) {
+          out->at(i) = static_cast<uint8_t>(out_words[i]);
+        }
+      };
+
+      std::array<uint8_t, N> visited_bytes;
+      std::array<uint8_t, M> output_bytes;
+      int n_bytes = readers[0].trailing_bytes();
+      while (n_bytes--) {
+        visited_bytes.fill(0);
+        output_bytes.fill(0);
+        int valid_bits;
+        for (size_t i = 0; i < N; i++) {
+          visited_bytes[i] = readers[i].NextTrailingByte(valid_bits);
+        }
+        byte_visitor(visited_bytes, &output_bytes);
+        for (size_t i = 0; i < M; i++) {
+          writers[i].PutNextTrailingByte(output_bytes[i], valid_bits);
+        }
+      }
+    }
+  }
+
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise contain
+  /// densely packed bits loaded from the bitmap. That offset within the first word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    // if both input and output bitmaps have no byte offset, then use special template
+    if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; }) &&
+        std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; })) {
+      std::array<BitmapWordReader<Word, /*may_have_byte_offset=*/false>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word, /*may_have_byte_offset=*/false>(
+            in_bitmap.buffer_->data(), in_bitmap.offset_, in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word, /*may_have_byte_offset=*/false>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word, /*may_have_byte_offset=*/false>(
+            out_bitmap.buffer_->mutable_data(), out_bitmap.offset_, out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    } else {
+      std::array<BitmapWordReader<Word>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word>(in_bitmap.buffer_->data(), in_bitmap.offset_,
+                                            in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+                                            out_bitmap.offset_, out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    }
+  }
+
   const std::shared_ptr<Buffer>& buffer() const { return buffer_; }
 
   /// offset of first bit relative to buffer().data()
@@ -301,6 +445,14 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
   /// assert bitmaps have identical length and return that length
   static int64_t BitLength(const Bitmap* bitmaps, size_t N);
 
+  template <size_t N>
+  static int64_t BitLength(const std::array<Bitmap, N>& bitmaps) {
+    for (size_t i = 1; i < N; ++i) {
+      assert(bitmaps[i].length() == bitmaps[0].length());
+    }
+    return bitmaps[0].length();
+  }
+
   std::shared_ptr<Buffer> buffer_;
   int64_t offset_ = 0, length_ = 0;
 };
diff --git a/cpp/src/arrow/util/bitmap_generate.h b/cpp/src/arrow/util/bitmap_generate.h
index 129fa913231..6b900f246fa 100644
--- a/cpp/src/arrow/util/bitmap_generate.h
+++ b/cpp/src/arrow/util/bitmap_generate.h
@@ -62,7 +62,7 @@ void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generat
 template <class Generator>
 void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
                           Generator&& g) {
-  static_assert(std::is_same<typename std::result_of<Generator && ()>::type, bool>::value,
+  static_assert(std::is_same<decltype(std::declval<Generator>()()), bool>::value,
                 "Functor passed to GenerateBitsUnrolled must return bool");
 
   if (length == 0) {
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index 1f9cf19bbd0..63c8b008f4a 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -28,9 +28,7 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_reader.h"
 #include "arrow/util/bitmap_writer.h"
-#include "arrow/util/endian.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace internal {
@@ -85,222 +83,6 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
   return count;
 }
 
-namespace {
-
-// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
-// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead
-// and should probably not be used for small bitmaps.
-
-template <typename Word>
-class BitmapWordReader {
- public:
-  BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) {
-    bitmap_ = bitmap + offset / 8;
-    offset_ = offset % 8;
-    bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length);
-
-    // decrement word count by one as we may touch two adjacent words in one iteration
-    nwords_ = length / (sizeof(Word) * 8) - 1;
-    if (nwords_ < 0) {
-      nwords_ = 0;
-    }
-    trailing_bits_ = static_cast<int>(length - nwords_ * sizeof(Word) * 8);
-    trailing_bytes_ = static_cast<int>(BitUtil::BytesForBits(trailing_bits_));
-
-    if (nwords_ > 0) {
-      current_word_ = load<Word>(bitmap_);
-    } else if (length > 0) {
-      current_byte_ = load<uint8_t>(bitmap_);
-    }
-  }
-
-  Word NextWord() {
-    bitmap_ += sizeof(Word);
-    const Word next_word = load<Word>(bitmap_);
-    Word word = current_word_;
-    if (offset_) {
-      // combine two adjacent words into one word
-      // |<------ next ----->|<---- current ---->|
-      // +-------------+-----+-------------+-----+
-      // |     ---     |  A  |      B      | --- |
-      // +-------------+-----+-------------+-----+
-      //                  |         |       offset
-      //                  v         v
-      //               +-----+-------------+
-      //               |  A  |      B      |
-      //               +-----+-------------+
-      //               |<------ word ----->|
-      word >>= offset_;
-      word |= next_word << (sizeof(Word) * 8 - offset_);
-    }
-    current_word_ = next_word;
-    return word;
-  }
-
-  uint8_t NextTrailingByte(int& valid_bits) {
-    uint8_t byte;
-    DCHECK_GT(trailing_bits_, 0);
-
-    if (trailing_bits_ <= 8) {
-      // last byte
-      valid_bits = trailing_bits_;
-      trailing_bits_ = 0;
-      byte = 0;
-      internal::BitmapReader reader(bitmap_, offset_, valid_bits);
-      for (int i = 0; i < valid_bits; ++i) {
-        byte >>= 1;
-        if (reader.IsSet()) {
-          byte |= 0x80;
-        }
-        reader.Next();
-      }
-      byte >>= (8 - valid_bits);
-    } else {
-      ++bitmap_;
-      const uint8_t next_byte = load<uint8_t>(bitmap_);
-      byte = current_byte_;
-      if (offset_) {
-        byte >>= offset_;
-        byte |= next_byte << (8 - offset_);
-      }
-      current_byte_ = next_byte;
-      trailing_bits_ -= 8;
-      valid_bits = 8;
-    }
-    return byte;
-  }
-
-  int64_t words() const { return nwords_; }
-  int trailing_bytes() const { return trailing_bytes_; }
-
- private:
-  int64_t offset_;
-  const uint8_t* bitmap_;
-
-  const uint8_t* bitmap_end_;
-  int64_t nwords_;
-  int trailing_bits_;
-  int trailing_bytes_;
-  union {
-    Word current_word_;
-    struct {
-#if ARROW_LITTLE_ENDIAN == 0
-      uint8_t padding_bytes_[sizeof(Word) - 1];
-#endif
-      uint8_t current_byte_;
-    };
-  };
-
-  template <typename DType>
-  DType load(const uint8_t* bitmap) {
-    DCHECK_LE(bitmap + sizeof(DType), bitmap_end_);
-    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
-  }
-};
-
-template <typename Word>
-class BitmapWordWriter {
- public:
-  BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) {
-    bitmap_ = bitmap + offset / 8;
-    offset_ = offset % 8;
-    bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length);
-    mask_ = (1U << offset_) - 1;
-
-    if (offset_) {
-      if (length >= static_cast<int>(sizeof(Word) * 8)) {
-        current_word_ = load<Word>(bitmap_);
-      } else if (length > 0) {
-        current_byte_ = load<uint8_t>(bitmap_);
-      }
-    }
-  }
-
-  void PutNextWord(Word word) {
-    if (offset_) {
-      // split one word into two adjacent words, don't touch unused bits
-      //               |<------ word ----->|
-      //               +-----+-------------+
-      //               |  A  |      B      |
-      //               +-----+-------------+
-      //                  |         |
-      //                  v         v       offset
-      // +-------------+-----+-------------+-----+
-      // |     ---     |  A  |      B      | --- |
-      // +-------------+-----+-------------+-----+
-      // |<------ next ----->|<---- current ---->|
-      word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
-      Word next_word = load<Word>(bitmap_ + sizeof(Word));
-      current_word_ = (current_word_ & mask_) | (word & ~mask_);
-      next_word = (next_word & ~mask_) | (word & mask_);
-      store<Word>(bitmap_, current_word_);
-      store<Word>(bitmap_ + sizeof(Word), next_word);
-      current_word_ = next_word;
-    } else {
-      store<Word>(bitmap_, word);
-    }
-    bitmap_ += sizeof(Word);
-  }
-
-  void PutNextTrailingByte(uint8_t byte, int valid_bits) {
-    if (valid_bits == 8) {
-      if (offset_) {
-        byte = (byte << offset_) | (byte >> (8 - offset_));
-        uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
-        current_byte_ = (current_byte_ & mask_) | (byte & ~mask_);
-        next_byte = (next_byte & ~mask_) | (byte & mask_);
-        store<uint8_t>(bitmap_, current_byte_);
-        store<uint8_t>(bitmap_ + 1, next_byte);
-        current_byte_ = next_byte;
-      } else {
-        store<uint8_t>(bitmap_, byte);
-      }
-      ++bitmap_;
-    } else {
-      DCHECK_GT(valid_bits, 0);
-      DCHECK_LT(valid_bits, 8);
-      DCHECK_LE(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits), bitmap_end_);
-      internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
-      for (int i = 0; i < valid_bits; ++i) {
-        (byte & 0x01) ? writer.Set() : writer.Clear();
-        writer.Next();
-        byte >>= 1;
-      }
-      writer.Finish();
-    }
-  }
-
- private:
-  int64_t offset_;
-  uint8_t* bitmap_;
-
-  const uint8_t* bitmap_end_;
-  uint64_t mask_;
-  union {
-    Word current_word_;
-    struct {
-#if ARROW_LITTLE_ENDIAN == 0
-      uint8_t padding_bytes_[sizeof(Word) - 1];
-#endif
-      uint8_t current_byte_;
-    };
-  };
-
-  template <typename DType>
-  DType load(const uint8_t* bitmap) {
-    DCHECK_LE(bitmap + sizeof(DType), bitmap_end_);
-    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
-  }
-
-  template <typename DType>
-  void store(uint8_t* bitmap, DType data) {
-    DCHECK_LE(bitmap + sizeof(DType), bitmap_end_);
-    util::SafeStore(bitmap, BitUtil::FromLittleEndian(data));
-  }
-};
-
-}  // namespace
-
 enum class TransferMode : bool { Copy, Invert };
 
 template <TransferMode mode>
@@ -390,8 +172,7 @@ Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* data
 }
 
 Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* data,
-                                             int64_t offset, int64_t length,
-                                             std::shared_ptr<Buffer>* out) {
+                                             int64_t offset, int64_t length) {
   return TransferBitmap<TransferMode::Invert>(pool, data, offset, length);
 }
 
@@ -584,5 +365,23 @@ void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right
   BitmapOp<AndNotOp>(left, left_offset, right, right_offset, length, out_offset, out);
 }
 
+template <typename T>
+struct OrNotOp {
+  constexpr T operator()(const T& l, const T& r) const { return l | ~r; }
+};
+
+Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left,
+                                            int64_t left_offset, const uint8_t* right,
+                                            int64_t right_offset, int64_t length,
+                                            int64_t out_offset) {
+  return BitmapOp<OrNotOp>(pool, left, left_offset, right, right_offset, length,
+                           out_offset);
+}
+
+void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
+  BitmapOp<OrNotOp>(left, left_offset, right, right_offset, length, out_offset, out);
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h
index 554e1d7468b..40a7797a239 100644
--- a/cpp/src/arrow/util/bitmap_ops.h
+++ b/cpp/src/arrow/util/bitmap_ops.h
@@ -183,5 +183,24 @@ ARROW_EXPORT
 void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                   int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
 
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left,
+                                            int64_t left_offset, const uint8_t* right,
+                                            int64_t right_offset, int64_t length,
+                                            int64_t out_offset);
+
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h
index cf4f5e7db8b..55d92d15ce0 100644
--- a/cpp/src/arrow/util/bitmap_reader.h
+++ b/cpp/src/arrow/util/bitmap_reader.h
@@ -76,7 +76,7 @@ class BitmapReader {
 class BitmapUInt64Reader {
  public:
   BitmapUInt64Reader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
-      : bitmap_(bitmap + start_offset / 8),
+      : bitmap_(util::MakeNonNull(bitmap) + start_offset / 8),
         num_carry_bits_(8 - start_offset % 8),
         length_(length),
         remaining_length_(length_) {
@@ -142,6 +142,118 @@ class BitmapUInt64Reader {
   uint64_t carry_bits_;
 };
 
+// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
+// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead
+// and should probably not be used for small bitmaps.
+
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordReader {
+ public:
+  BitmapWordReader() = default;
+  BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)) {
+    // decrement word count by one as we may touch two adjacent words in one iteration
+    nwords_ = length / (sizeof(Word) * 8) - 1;
+    if (nwords_ < 0) {
+      nwords_ = 0;
+    }
+    trailing_bits_ = static_cast<int>(length - nwords_ * sizeof(Word) * 8);
+    trailing_bytes_ = static_cast<int>(BitUtil::BytesForBits(trailing_bits_));
+
+    if (nwords_ > 0) {
+      current_data.word_ = load<Word>(bitmap_);
+    } else if (length > 0) {
+      current_data.epi.byte_ = load<uint8_t>(bitmap_);
+    }
+  }
+
+  Word NextWord() {
+    bitmap_ += sizeof(Word);
+    const Word next_word = load<Word>(bitmap_);
+    Word word = current_data.word_;
+    if (may_have_byte_offset && offset_) {
+      // combine two adjacent words into one word
+      // |<------ next ----->|<---- current ---->|
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      //                  |         |       offset
+      //                  v         v
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //               |<------ word ----->|
+      word >>= offset_;
+      word |= next_word << (sizeof(Word) * 8 - offset_);
+    }
+    current_data.word_ = next_word;
+    return word;
+  }
+
+  uint8_t NextTrailingByte(int& valid_bits) {
+    uint8_t byte;
+    assert(trailing_bits_ > 0);
+
+    if (trailing_bits_ <= 8) {
+      // last byte
+      valid_bits = trailing_bits_;
+      trailing_bits_ = 0;
+      byte = 0;
+      internal::BitmapReader reader(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        byte >>= 1;
+        if (reader.IsSet()) {
+          byte |= 0x80;
+        }
+        reader.Next();
+      }
+      byte >>= (8 - valid_bits);
+    } else {
+      ++bitmap_;
+      const uint8_t next_byte = load<uint8_t>(bitmap_);
+      byte = current_data.epi.byte_;
+      if (may_have_byte_offset && offset_) {
+        byte >>= offset_;
+        byte |= next_byte << (8 - offset_);
+      }
+      current_data.epi.byte_ = next_byte;
+      trailing_bits_ -= 8;
+      trailing_bytes_--;
+      valid_bits = 8;
+    }
+    return byte;
+  }
+
+  int64_t words() const { return nwords_; }
+  int trailing_bytes() const { return trailing_bytes_; }
+
+ private:
+  int64_t offset_;
+  const uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  int64_t nwords_;
+  int trailing_bits_;
+  int trailing_bytes_;
+  union {
+    Word word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t byte_;
+    } epi;
+  } current_data;
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+};
+
 /// \brief Index into a possibly non-existent bitmap
 struct OptionalBitIndexer {
   const uint8_t* bitmap;
@@ -151,7 +263,7 @@ struct OptionalBitIndexer {
       : bitmap(buffer == NULLPTR ? NULLPTR : buffer->data()), offset(offset) {}
 
   bool operator[](int64_t i) const {
-    return bitmap == NULLPTR ? true : BitUtil::GetBit(bitmap, offset + i);
+    return bitmap == NULLPTR || BitUtil::GetBit(bitmap, offset + i);
   }
 };
 
diff --git a/cpp/src/arrow/util/bitmap_reader_benchmark.cc b/cpp/src/arrow/util/bitmap_reader_benchmark.cc
new file mode 100644
index 00000000000..359653c9644
--- /dev/null
+++ b/cpp/src/arrow/util/bitmap_reader_benchmark.cc
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <bitset>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/util.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_reader.h"
+#include "benchmark/benchmark.h"
+
+namespace arrow {
+namespace BitUtil {
+
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapWordReader;
+
+const int64_t kBufferSize = 1024 * (std::rand() % 25 + 1000);
+
+// const int seed = std::rand();
+
+static std::shared_ptr<Buffer> CreateRandomBuffer(int64_t nbytes) {
+  auto buffer = *AllocateBuffer(nbytes);
+  memset(buffer->mutable_data(), 0, nbytes);
+  random_bytes(nbytes, /*seed=*/0, buffer->mutable_data());
+  return std::move(buffer);
+}
+
+static void BitBlockCounterBench(benchmark::State& state) {
+  int64_t nbytes = state.range(0);
+  std::shared_ptr<Buffer> cond_buf = CreateRandomBuffer(nbytes);
+  for (auto _ : state) {
+    BitBlockCounter counter(cond_buf->data(), 0, nbytes * 8);
+
+    int64_t offset = 0;
+    uint64_t set_bits = 0;
+
+    while (offset < nbytes * 8) {
+      const BitBlockCount& word = counter.NextWord();
+      //      if (word.AllSet()) {
+      //        set_bits += word.length;
+      //      } else if (word.popcount) {
+      //        set_bits += word.popcount;
+      //      }
+      set_bits += word.popcount;
+      benchmark::DoNotOptimize(set_bits);
+      offset += word.length;
+    }
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(state.iterations() * nbytes);
+}
+
+static void BitmapWordReaderBench(benchmark::State& state) {
+  int64_t nbytes = state.range(0);
+  std::shared_ptr<Buffer> cond_buf = CreateRandomBuffer(nbytes);
+  for (auto _ : state) {
+    BitmapWordReader<uint64_t> counter(cond_buf->data(), 0, nbytes * 8);
+
+    int64_t set_bits = 0;
+
+    int64_t cnt = counter.words();
+    while (cnt--) {
+      const auto& word = counter.NextWord();
+      //      if (word == UINT64_MAX) {
+      //        set_bits += sizeof(uint64_t) * 8;
+      //      } else if (word) {
+      //        set_bits += PopCount(word);
+      //      }
+      set_bits += PopCount(word);
+      benchmark::DoNotOptimize(set_bits);
+    }
+
+    cnt = counter.trailing_bytes();
+    while (cnt--) {
+      int valid_bits;
+      const auto& byte = static_cast<uint32_t>(counter.NextTrailingByte(valid_bits));
+      set_bits += PopCount(kPrecedingBitmask[valid_bits] & byte);
+      benchmark::DoNotOptimize(set_bits);
+    }
+    benchmark::ClobberMemory();
+  }
+  state.SetBytesProcessed(state.iterations() * nbytes);
+}
+
+BENCHMARK(BitBlockCounterBench)->Arg(kBufferSize);
+BENCHMARK(BitmapWordReaderBench)->Arg(kBufferSize);
+
+}  // namespace BitUtil
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h
index d4f02f37a41..1df1baa0f2e 100644
--- a/cpp/src/arrow/util/bitmap_writer.h
+++ b/cpp/src/arrow/util/bitmap_writer.h
@@ -180,5 +180,106 @@ class FirstTimeBitmapWriter {
   int64_t byte_offset_;
 };
 
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordWriter {
+ public:
+  BitmapWordWriter() = default;
+  BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)),
+        mask_((1U << offset_) - 1) {
+    if (offset_) {
+      if (length >= static_cast<int>(sizeof(Word) * 8)) {
+        current_data.word_ = load<Word>(bitmap_);
+      } else if (length > 0) {
+        current_data.epi.byte_ = load<uint8_t>(bitmap_);
+      }
+    }
+  }
+
+  void PutNextWord(Word word) {
+    if (may_have_byte_offset && offset_) {
+      // split one word into two adjacent words, don't touch unused bits
+      //               |<------ word ----->|
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //                  |         |
+      //                  v         v       offset
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      // |<------ next ----->|<---- current ---->|
+      word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
+      Word next_word = load<Word>(bitmap_ + sizeof(Word));
+      current_data.word_ = (current_data.word_ & mask_) | (word & ~mask_);
+      next_word = (next_word & ~mask_) | (word & mask_);
+      store<Word>(bitmap_, current_data.word_);
+      store<Word>(bitmap_ + sizeof(Word), next_word);
+      current_data.word_ = next_word;
+    } else {
+      store<Word>(bitmap_, word);
+    }
+    bitmap_ += sizeof(Word);
+  }
+
+  void PutNextTrailingByte(uint8_t byte, int valid_bits) {
+    if (valid_bits == 8) {
+      if (may_have_byte_offset && offset_) {
+        byte = (byte << offset_) | (byte >> (8 - offset_));
+        uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
+        current_data.epi.byte_ = (current_data.epi.byte_ & mask_) | (byte & ~mask_);
+        next_byte = (next_byte & ~mask_) | (byte & mask_);
+        store<uint8_t>(bitmap_, current_data.epi.byte_);
+        store<uint8_t>(bitmap_ + 1, next_byte);
+        current_data.epi.byte_ = next_byte;
+      } else {
+        store<uint8_t>(bitmap_, byte);
+      }
+      ++bitmap_;
+    } else {
+      assert(valid_bits > 0);
+      assert(valid_bits < 8);
+      assert(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits) <= bitmap_end_);
+      internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        (byte & 0x01) ? writer.Set() : writer.Clear();
+        writer.Next();
+        byte >>= 1;
+      }
+      writer.Finish();
+    }
+  }
+
+ private:
+  int64_t offset_;
+  uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  uint64_t mask_;
+  union {
+    Word word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t byte_;
+    } epi;
+  } current_data;
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+
+  template <typename DType>
+  void store(uint8_t* bitmap, DType data) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    util::SafeStore(bitmap, BitUtil::FromLittleEndian(data));
+  }
+};
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking.cc b/cpp/src/arrow/util/bpacking.cc
index 02634755bd0..2e658fd108e 100644
--- a/cpp/src/arrow/util/bpacking.cc
+++ b/cpp/src/arrow/util/bpacking.cc
@@ -27,6 +27,9 @@
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
 #include "arrow/util/bpacking_avx512.h"
 #endif
+#if defined(ARROW_HAVE_NEON)
+#include "arrow/util/bpacking_neon.h"
+#endif
 
 namespace arrow {
 namespace internal {
@@ -163,8 +166,12 @@ struct Unpack32DynamicFunction {
 }  // namespace
 
 int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
+#if defined(ARROW_HAVE_NEON)
+  return unpack32_neon(in, out, batch_size, num_bits);
+#else
   static DynamicDispatch<Unpack32DynamicFunction> dispatch;
   return dispatch.func(in, out, batch_size, num_bits);
+#endif
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/bpacking_avx2.cc b/cpp/src/arrow/util/bpacking_avx2.cc
index 63b914b578a..5a3a7bad3d3 100644
--- a/cpp/src/arrow/util/bpacking_avx2.cc
+++ b/cpp/src/arrow/util/bpacking_avx2.cc
@@ -16,121 +16,15 @@
 // under the License.
 
 #include "arrow/util/bpacking_avx2.h"
-#include "arrow/util/bpacking_avx2_generated.h"
-#include "arrow/util/logging.h"
+#include "arrow/util/bpacking_simd256_generated.h"
+#include "arrow/util/bpacking_simd_internal.h"
 
 namespace arrow {
 namespace internal {
 
 int unpack32_avx2(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
-  batch_size = batch_size / 32 * 32;
-  int num_loops = batch_size / 32;
-
-  switch (num_bits) {
-    case 0:
-      for (int i = 0; i < num_loops; ++i) in = unpack0_32_avx2(in, out + i * 32);
-      break;
-    case 1:
-      for (int i = 0; i < num_loops; ++i) in = unpack1_32_avx2(in, out + i * 32);
-      break;
-    case 2:
-      for (int i = 0; i < num_loops; ++i) in = unpack2_32_avx2(in, out + i * 32);
-      break;
-    case 3:
-      for (int i = 0; i < num_loops; ++i) in = unpack3_32_avx2(in, out + i * 32);
-      break;
-    case 4:
-      for (int i = 0; i < num_loops; ++i) in = unpack4_32_avx2(in, out + i * 32);
-      break;
-    case 5:
-      for (int i = 0; i < num_loops; ++i) in = unpack5_32_avx2(in, out + i * 32);
-      break;
-    case 6:
-      for (int i = 0; i < num_loops; ++i) in = unpack6_32_avx2(in, out + i * 32);
-      break;
-    case 7:
-      for (int i = 0; i < num_loops; ++i) in = unpack7_32_avx2(in, out + i * 32);
-      break;
-    case 8:
-      for (int i = 0; i < num_loops; ++i) in = unpack8_32_avx2(in, out + i * 32);
-      break;
-    case 9:
-      for (int i = 0; i < num_loops; ++i) in = unpack9_32_avx2(in, out + i * 32);
-      break;
-    case 10:
-      for (int i = 0; i < num_loops; ++i) in = unpack10_32_avx2(in, out + i * 32);
-      break;
-    case 11:
-      for (int i = 0; i < num_loops; ++i) in = unpack11_32_avx2(in, out + i * 32);
-      break;
-    case 12:
-      for (int i = 0; i < num_loops; ++i) in = unpack12_32_avx2(in, out + i * 32);
-      break;
-    case 13:
-      for (int i = 0; i < num_loops; ++i) in = unpack13_32_avx2(in, out + i * 32);
-      break;
-    case 14:
-      for (int i = 0; i < num_loops; ++i) in = unpack14_32_avx2(in, out + i * 32);
-      break;
-    case 15:
-      for (int i = 0; i < num_loops; ++i) in = unpack15_32_avx2(in, out + i * 32);
-      break;
-    case 16:
-      for (int i = 0; i < num_loops; ++i) in = unpack16_32_avx2(in, out + i * 32);
-      break;
-    case 17:
-      for (int i = 0; i < num_loops; ++i) in = unpack17_32_avx2(in, out + i * 32);
-      break;
-    case 18:
-      for (int i = 0; i < num_loops; ++i) in = unpack18_32_avx2(in, out + i * 32);
-      break;
-    case 19:
-      for (int i = 0; i < num_loops; ++i) in = unpack19_32_avx2(in, out + i * 32);
-      break;
-    case 20:
-      for (int i = 0; i < num_loops; ++i) in = unpack20_32_avx2(in, out + i * 32);
-      break;
-    case 21:
-      for (int i = 0; i < num_loops; ++i) in = unpack21_32_avx2(in, out + i * 32);
-      break;
-    case 22:
-      for (int i = 0; i < num_loops; ++i) in = unpack22_32_avx2(in, out + i * 32);
-      break;
-    case 23:
-      for (int i = 0; i < num_loops; ++i) in = unpack23_32_avx2(in, out + i * 32);
-      break;
-    case 24:
-      for (int i = 0; i < num_loops; ++i) in = unpack24_32_avx2(in, out + i * 32);
-      break;
-    case 25:
-      for (int i = 0; i < num_loops; ++i) in = unpack25_32_avx2(in, out + i * 32);
-      break;
-    case 26:
-      for (int i = 0; i < num_loops; ++i) in = unpack26_32_avx2(in, out + i * 32);
-      break;
-    case 27:
-      for (int i = 0; i < num_loops; ++i) in = unpack27_32_avx2(in, out + i * 32);
-      break;
-    case 28:
-      for (int i = 0; i < num_loops; ++i) in = unpack28_32_avx2(in, out + i * 32);
-      break;
-    case 29:
-      for (int i = 0; i < num_loops; ++i) in = unpack29_32_avx2(in, out + i * 32);
-      break;
-    case 30:
-      for (int i = 0; i < num_loops; ++i) in = unpack30_32_avx2(in, out + i * 32);
-      break;
-    case 31:
-      for (int i = 0; i < num_loops; ++i) in = unpack31_32_avx2(in, out + i * 32);
-      break;
-    case 32:
-      for (int i = 0; i < num_loops; ++i) in = unpack32_32_avx2(in, out + i * 32);
-      break;
-    default:
-      DCHECK(false) << "Unsupported num_bits";
-  }
-
-  return batch_size;
+  return unpack32_specialized<UnpackBits256<DispatchLevel::AVX2>>(in, out, batch_size,
+                                                                  num_bits);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/bpacking_avx2_codegen.py b/cpp/src/arrow/util/bpacking_avx2_codegen.py
deleted file mode 100644
index e60aed86a29..00000000000
--- a/cpp/src/arrow/util/bpacking_avx2_codegen.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Usage: python bpacking_avx2_codegen.py > bpacking_avx2_generated.h
-
-
-def print_unpack_bit_func(bit):
-    shift = 0
-    shifts = []
-    in_index = 0
-    inls = []
-    mask = (1 << bit) - 1
-    bracket = "{"
-
-    print(
-        f"inline static const uint32_t* unpack{bit}_32_avx2(const uint32_t* in, uint32_t* out) {bracket}")
-    print("  using ::arrow::util::SafeLoad;")
-    print("  uint32_t mask = 0x%x;" % mask)
-    print("  __m256i reg_shifts, reg_inls, reg_masks;")
-    print("  __m256i results;")
-
-    print("")
-    for i in range(32):
-        if shift + bit == 32:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            in_index += 1
-            shift = 0
-        elif shift + bit > 32:  # cross the boundary
-            inls.append(
-                f"SafeLoad(in + {in_index}) >> {shift} | SafeLoad(in + {in_index + 1}) << {32 - shift}")
-            in_index += 1
-            shift = bit - (32 - shift)
-            shifts.append(0)  # zero shift
-        else:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            shift += bit
-
-    print("  reg_masks = _mm256_set1_epi32(mask);")
-    print("")
-
-    print("  // shift the first 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[7]}, {shifts[6]}, {shifts[5]}, {shifts[4]},")
-    print(
-        f"                                {shifts[3]}, {shifts[2]}, {shifts[1]}, {shifts[0]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[7]}, {inls[6]},")
-    print(f"                              {inls[5]}, {inls[4]},")
-    print(f"                              {inls[3]}, {inls[2]},")
-    print(f"                              {inls[1]}, {inls[0]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-    print("")
-
-    print("  // shift the second 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[15]}, {shifts[14]}, {shifts[13]}, {shifts[12]},")
-    print(
-        f"                                {shifts[11]}, {shifts[10]}, {shifts[9]}, {shifts[8]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[15]}, {inls[14]},")
-    print(f"                              {inls[13]}, {inls[12]},")
-    print(f"                              {inls[11]}, {inls[10]},")
-    print(f"                              {inls[9]}, {inls[8]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-    print("")
-
-    print("  // shift the third 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[23]}, {shifts[22]}, {shifts[21]}, {shifts[20]},")
-    print(
-        f"                                {shifts[19]}, {shifts[18]}, {shifts[17]}, {shifts[16]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[23]}, {inls[22]},")
-    print(f"                              {inls[21]}, {inls[20]},")
-    print(f"                              {inls[19]}, {inls[18]},")
-    print(f"                              {inls[17]}, {inls[16]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-    print("")
-
-    print("  // shift the last 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[31]}, {shifts[30]}, {shifts[29]}, {shifts[28]},")
-    print(
-        f"                                {shifts[27]}, {shifts[26]}, {shifts[25]}, {shifts[24]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[31]}, {inls[30]},")
-    print(f"                              {inls[29]}, {inls[28]},")
-    print(f"                              {inls[27]}, {inls[26]},")
-    print(f"                              {inls[25]}, {inls[24]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-
-    print("")
-    print(f"  in += {bit};")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit0_func():
-    print(
-        "inline static const uint32_t* unpack0_32_avx2(const uint32_t* in, uint32_t* out) {")
-    print("  memset(out, 0x0, 32 * sizeof(*out));")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit32_func():
-    print(
-        "inline static const uint32_t* unpack32_32_avx2(const uint32_t* in, uint32_t* out) {")
-    print("  memcpy(out, in, 32 * sizeof(*out));")
-    print("  in += 32;")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_copyright():
-    print(
-        """// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.""")
-
-
-def print_note():
-    print("//")
-    print("// Automatically generated file; DO NOT EDIT.")
-
-
-def main():
-    print_copyright()
-    print_note()
-    print("")
-    print("#pragma once")
-    print("")
-    print("#include <stdint.h>")
-    print("#include <string.h>")
-    print("")
-    print("#ifdef _MSC_VER")
-    print("#include <intrin.h>")
-    print("#else")
-    print("#include <immintrin.h>")
-    print("#endif")
-    print("")
-    print('#include "arrow/util/ubsan.h"')
-    print("")
-    print("namespace arrow {")
-    print("namespace internal {")
-    print("")
-    print_unpack_bit0_func()
-    print("")
-    for i in range(1, 32):
-        print_unpack_bit_func(i)
-        print("")
-    print_unpack_bit32_func()
-    print("")
-    print("}  // namespace internal")
-    print("}  // namespace arrow")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/cpp/src/arrow/util/bpacking_avx2_generated.h b/cpp/src/arrow/util/bpacking_avx2_generated.h
deleted file mode 100644
index 2240143b16d..00000000000
--- a/cpp/src/arrow/util/bpacking_avx2_generated.h
+++ /dev/null
@@ -1,1819 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// Automatically generated file; DO NOT EDIT.
-
-#pragma once
-
-#include <stdint.h>
-#include <string.h>
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#else
-#include <immintrin.h>
-#endif
-
-#include "arrow/util/ubsan.h"
-
-namespace arrow {
-namespace internal {
-
-inline static const uint32_t* unpack0_32_avx2(const uint32_t* in, uint32_t* out) {
-  memset(out, 0x0, 32 * sizeof(*out));
-  out += 32;
-
-  return in;
-}
-
-inline static const uint32_t* unpack1_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(7, 6, 5, 4,
-                                3, 2, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(15, 14, 13, 12,
-                                11, 10, 9, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(23, 22, 21, 20,
-                                19, 18, 17, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(31, 30, 29, 28,
-                                27, 26, 25, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 1;
-
-  return in;
-}
-
-inline static const uint32_t* unpack2_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 2;
-
-  return in;
-}
-
-inline static const uint32_t* unpack3_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(21, 18, 15, 12,
-                                9, 6, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(13, 10, 7, 4,
-                                1, 0, 27, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(5, 2, 0, 28,
-                                25, 22, 19, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(29, 26, 23, 20,
-                                17, 14, 11, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 3;
-
-  return in;
-}
-
-inline static const uint32_t* unpack4_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xf;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 4;
-
-  return in;
-}
-
-inline static const uint32_t* unpack5_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1f;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 25, 20,
-                                15, 10, 5, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(11, 6, 1, 0,
-                                23, 18, 13, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(19, 14, 9, 4,
-                                0, 26, 21, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(27, 22, 17, 12,
-                                7, 2, 0, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 5;
-
-  return in;
-}
-
-inline static const uint32_t* unpack6_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3f;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 6;
-
-  return in;
-}
-
-inline static const uint32_t* unpack7_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7f;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(17, 10, 3, 0,
-                                21, 14, 7, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(9, 2, 0, 20,
-                                13, 6, 0, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 19, 12,
-                                5, 0, 23, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(25, 18, 11, 4,
-                                0, 22, 15, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 7;
-
-  return in;
-}
-
-inline static const uint32_t* unpack8_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 8;
-
-  return in;
-}
-
-inline static const uint32_t* unpack9_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 22, 13, 4,
-                                0, 18, 9, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(7, 0, 21, 12,
-                                3, 0, 17, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(15, 6, 0, 20,
-                                11, 2, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(23, 14, 5, 0,
-                                19, 10, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 9;
-
-  return in;
-}
-
-inline static const uint32_t* unpack10_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6, SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 10;
-
-  return in;
-}
-
-inline static const uint32_t* unpack11_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(13, 2, 0, 12,
-                                1, 0, 11, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 23 | SafeLoad(in + 2) << 9, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(5, 0, 15, 4,
-                                0, 14, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 25 | SafeLoad(in + 4) << 7, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 18, 7, 0,
-                                17, 6, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(21, 10, 0, 20,
-                                9, 0, 19, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 11;
-
-  return in;
-}
-
-inline static const uint32_t* unpack12_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4, SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 9), SafeLoad(in + 9));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 12;
-
-  return in;
-}
-
-inline static const uint32_t* unpack13_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 14, 1, 0,
-                                7, 0, 13, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 9, 0,
-                                15, 2, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 21 | SafeLoad(in + 4) << 11, SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(11, 0, 17, 4,
-                                0, 10, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(19, 6, 0, 12,
-                                0, 18, 5, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12), SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 13;
-
-  return in;
-}
-
-inline static const uint32_t* unpack14_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2, SafeLoad(in + 10));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 14;
-
-  return in;
-}
-
-inline static const uint32_t* unpack15_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(9, 0, 11, 0,
-                                13, 0, 15, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 3, 0,
-                                5, 0, 7, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 10, 0, 12,
-                                0, 14, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(17, 2, 0, 4,
-                                0, 6, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 11));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 15;
-
-  return in;
-}
-
-inline static const uint32_t* unpack16_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 8));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 15), SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12), SafeLoad(in + 12));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 16;
-
-  return in;
-}
-
-inline static const uint32_t* unpack17_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 6, 0, 4,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3) >> 23 | SafeLoad(in + 4) << 9, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 21 | SafeLoad(in + 3) << 11, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 19 | SafeLoad(in + 2) << 13, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 17 | SafeLoad(in + 1) << 15, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 14, 0, 12,
-                                0, 10, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(7, 0, 5, 0,
-                                3, 0, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12), SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 16 | SafeLoad(in + 9) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(15, 0, 13, 0,
-                                11, 0, 9, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 16), SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 17;
-
-  return in;
-}
-
-inline static const uint32_t* unpack18_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 18 | SafeLoad(in + 1) << 14, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14, SafeLoad(in + 9));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 17), SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 18;
-
-  return in;
-}
-
-inline static const uint32_t* unpack19_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(5, 0, 0, 12,
-                                0, 6, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 18 | SafeLoad(in + 4) << 14,
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 25 | SafeLoad(in + 2) << 7, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 19 | SafeLoad(in + 1) << 13, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 10, 0, 4,
-                                0, 0, 11, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 17 | SafeLoad(in + 7) << 15, SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 24 | SafeLoad(in + 5) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 2, 0, 0,
-                                9, 0, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 16 | SafeLoad(in + 10) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(13, 0, 7, 0,
-                                1, 0, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 18), SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 19;
-
-  return in;
-}
-
-inline static const uint32_t* unpack20_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 16 | SafeLoad(in + 3) << 16,
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 20 | SafeLoad(in + 1) << 12, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12, SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14), SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16,
-                              SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12, SafeLoad(in + 10));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 19), SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 18), SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4, SafeLoad(in + 16),
-                              SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12, SafeLoad(in + 15));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 20;
-
-  return in;
-}
-
-inline static const uint32_t* unpack21_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 9, 0,
-                                0, 10, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4) >> 19 | SafeLoad(in + 5) << 13, SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 21 | SafeLoad(in + 1) << 11, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 6, 0, 0,
-                                7, 0, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 17 | SafeLoad(in + 9) << 15, SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 0, 4,
-                                0, 0, 5, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 15), SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18,
-                              SafeLoad(in + 13) >> 25 | SafeLoad(in + 14) << 7, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(11, 0, 1, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 20), SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 21;
-
-  return in;
-}
-
-inline static const uint32_t* unpack22_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 14 | SafeLoad(in + 4) << 18, SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 12 | SafeLoad(in + 2) << 20,
-                              SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 18 | SafeLoad(in + 8) << 14, SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 16 | SafeLoad(in + 6) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6, SafeLoad(in + 15),
-                              SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18, SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 12 | SafeLoad(in + 13) << 20,
-                              SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10, SafeLoad(in + 11));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 21), SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2, SafeLoad(in + 19),
-                              SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 22;
-
-  return in;
-}
-
-inline static const uint32_t* unpack23_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 0, 0,
-                                5, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 10 | SafeLoad(in + 5) << 22,
-                              SafeLoad(in + 3) >> 19 | SafeLoad(in + 4) << 13, SafeLoad(in + 2) >> 28 | SafeLoad(in + 3) << 4,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 14 | SafeLoad(in + 2) << 18,
-                              SafeLoad(in + 0) >> 23 | SafeLoad(in + 1) << 9, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 2, 0, 0,
-                                0, 6, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 11 | SafeLoad(in + 10) << 21, SafeLoad(in + 8) >> 20 | SafeLoad(in + 9) << 12,
-                              SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 15 | SafeLoad(in + 7) << 17, SafeLoad(in + 5) >> 24 | SafeLoad(in + 6) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 3, 0,
-                                0, 0, 7, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 16) >> 17 | SafeLoad(in + 17) << 15, SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 12 | SafeLoad(in + 15) << 20,
-                              SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2,
-                              SafeLoad(in + 12), SafeLoad(in + 11) >> 16 | SafeLoad(in + 12) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(9, 0, 0, 4,
-                                0, 0, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 22), SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14,
-                              SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 20),
-                              SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10,
-                              SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 23;
-
-  return in;
-}
-
-inline static const uint32_t* unpack24_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16,
-                              SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8, SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 16 | SafeLoad(in + 2) << 16,
-                              SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8, SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8, SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16,
-                              SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8, SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16,
-                              SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8, SafeLoad(in + 12));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 23), SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16,
-                              SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8, SafeLoad(in + 21),
-                              SafeLoad(in + 20), SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16,
-                              SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8, SafeLoad(in + 18));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 24;
-
-  return in;
-}
-
-inline static const uint32_t* unpack25_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 4,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5) >> 15 | SafeLoad(in + 6) << 17, SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 11 | SafeLoad(in + 3) << 21, SafeLoad(in + 1) >> 18 | SafeLoad(in + 2) << 14,
-                              SafeLoad(in + 0) >> 25 | SafeLoad(in + 1) << 7, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 5, 0,
-                                0, 0, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 19 | SafeLoad(in + 9) << 13, SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 8 | SafeLoad(in + 7) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 6, 0, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 9 | SafeLoad(in + 14) << 23, SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(7, 0, 0, 0,
-                                3, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 24), SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22,
-                              SafeLoad(in + 19) >> 17 | SafeLoad(in + 20) << 15, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 25;
-
-  return in;
-}
-
-inline static const uint32_t* unpack26_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 8 | SafeLoad(in + 4) << 24,
-                              SafeLoad(in + 2) >> 14 | SafeLoad(in + 3) << 18, SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12), SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20,
-                              SafeLoad(in + 10) >> 18 | SafeLoad(in + 11) << 14, SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 10 | SafeLoad(in + 8) << 22, SafeLoad(in + 6) >> 16 | SafeLoad(in + 7) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 8 | SafeLoad(in + 17) << 24,
-                              SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18, SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6, SafeLoad(in + 13));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 25), SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2, SafeLoad(in + 21),
-                              SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22, SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 26;
-
-  return in;
-}
-
-inline static const uint32_t* unpack27_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 2, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 7 | SafeLoad(in + 5) << 25, SafeLoad(in + 3) >> 12 | SafeLoad(in + 4) << 20,
-                              SafeLoad(in + 2) >> 17 | SafeLoad(in + 3) << 15, SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10,
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 4,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 9 | SafeLoad(in + 10) << 23, SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18,
-                              SafeLoad(in + 7) >> 19 | SafeLoad(in + 8) << 13, SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                1, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 6 | SafeLoad(in + 16) << 26,
-                              SafeLoad(in + 14) >> 11 | SafeLoad(in + 15) << 21, SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(5, 0, 0, 0,
-                                0, 0, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 26), SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12,
-                              SafeLoad(in + 22) >> 25 | SafeLoad(in + 23) << 7, SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 8 | SafeLoad(in + 21) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 27;
-
-  return in;
-}
-
-inline static const uint32_t* unpack28_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 5) >> 8 | SafeLoad(in + 6) << 24,
-                              SafeLoad(in + 4) >> 12 | SafeLoad(in + 5) << 20, SafeLoad(in + 3) >> 16 | SafeLoad(in + 4) << 16,
-                              SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12, SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13), SafeLoad(in + 12) >> 8 | SafeLoad(in + 13) << 24,
-                              SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20, SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12, SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 20), SafeLoad(in + 19) >> 8 | SafeLoad(in + 20) << 24,
-                              SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20, SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4, SafeLoad(in + 14));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 27), SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20, SafeLoad(in + 24) >> 16 | SafeLoad(in + 25) << 16,
-                              SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4, SafeLoad(in + 21));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 28;
-
-  return in;
-}
-
-inline static const uint32_t* unpack29_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6) >> 11 | SafeLoad(in + 7) << 21, SafeLoad(in + 5) >> 14 | SafeLoad(in + 6) << 18,
-                              SafeLoad(in + 4) >> 17 | SafeLoad(in + 5) << 15, SafeLoad(in + 3) >> 20 | SafeLoad(in + 4) << 12,
-                              SafeLoad(in + 2) >> 23 | SafeLoad(in + 3) << 9, SafeLoad(in + 1) >> 26 | SafeLoad(in + 2) << 6,
-                              SafeLoad(in + 0) >> 29 | SafeLoad(in + 1) << 3, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10,
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4,
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 5 | SafeLoad(in + 9) << 27, SafeLoad(in + 7) >> 8 | SafeLoad(in + 8) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 1, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 4 | SafeLoad(in + 19) << 28,
-                              SafeLoad(in + 17) >> 7 | SafeLoad(in + 18) << 25, SafeLoad(in + 16) >> 10 | SafeLoad(in + 17) << 22,
-                              SafeLoad(in + 15) >> 13 | SafeLoad(in + 16) << 19, SafeLoad(in + 14) >> 16 | SafeLoad(in + 15) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 28), SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26,
-                              SafeLoad(in + 26) >> 9 | SafeLoad(in + 27) << 23, SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 29;
-
-  return in;
-}
-
-inline static const uint32_t* unpack30_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14, SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10, SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14), SafeLoad(in + 13) >> 4 | SafeLoad(in + 14) << 28,
-                              SafeLoad(in + 12) >> 6 | SafeLoad(in + 13) << 26, SafeLoad(in + 11) >> 8 | SafeLoad(in + 12) << 24,
-                              SafeLoad(in + 10) >> 10 | SafeLoad(in + 11) << 22, SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18, SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14, SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2, SafeLoad(in + 15));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 29), SafeLoad(in + 28) >> 4 | SafeLoad(in + 29) << 28,
-                              SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26, SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22, SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18, SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 30;
-
-  return in;
-}
-
-inline static const uint32_t* unpack31_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6) >> 25 | SafeLoad(in + 7) << 7, SafeLoad(in + 5) >> 26 | SafeLoad(in + 6) << 6,
-                              SafeLoad(in + 4) >> 27 | SafeLoad(in + 5) << 5, SafeLoad(in + 3) >> 28 | SafeLoad(in + 4) << 4,
-                              SafeLoad(in + 2) >> 29 | SafeLoad(in + 3) << 3, SafeLoad(in + 1) >> 30 | SafeLoad(in + 2) << 2,
-                              SafeLoad(in + 0) >> 31 | SafeLoad(in + 1) << 1, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14) >> 17 | SafeLoad(in + 15) << 15, SafeLoad(in + 13) >> 18 | SafeLoad(in + 14) << 14,
-                              SafeLoad(in + 12) >> 19 | SafeLoad(in + 13) << 13, SafeLoad(in + 11) >> 20 | SafeLoad(in + 12) << 12,
-                              SafeLoad(in + 10) >> 21 | SafeLoad(in + 11) << 11, SafeLoad(in + 9) >> 22 | SafeLoad(in + 10) << 10,
-                              SafeLoad(in + 8) >> 23 | SafeLoad(in + 9) << 9, SafeLoad(in + 7) >> 24 | SafeLoad(in + 8) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 22) >> 9 | SafeLoad(in + 23) << 23, SafeLoad(in + 21) >> 10 | SafeLoad(in + 22) << 22,
-                              SafeLoad(in + 20) >> 11 | SafeLoad(in + 21) << 21, SafeLoad(in + 19) >> 12 | SafeLoad(in + 20) << 20,
-                              SafeLoad(in + 18) >> 13 | SafeLoad(in + 19) << 19, SafeLoad(in + 17) >> 14 | SafeLoad(in + 18) << 18,
-                              SafeLoad(in + 16) >> 15 | SafeLoad(in + 17) << 17, SafeLoad(in + 15) >> 16 | SafeLoad(in + 16) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 30), SafeLoad(in + 29) >> 2 | SafeLoad(in + 30) << 30,
-                              SafeLoad(in + 28) >> 3 | SafeLoad(in + 29) << 29, SafeLoad(in + 27) >> 4 | SafeLoad(in + 28) << 28,
-                              SafeLoad(in + 26) >> 5 | SafeLoad(in + 27) << 27, SafeLoad(in + 25) >> 6 | SafeLoad(in + 26) << 26,
-                              SafeLoad(in + 24) >> 7 | SafeLoad(in + 25) << 25, SafeLoad(in + 23) >> 8 | SafeLoad(in + 24) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 31;
-
-  return in;
-}
-
-inline static const uint32_t* unpack32_32_avx2(const uint32_t* in, uint32_t* out) {
-  memcpy(out, in, 32 * sizeof(*out));
-  in += 32;
-  out += 32;
-
-  return in;
-}
-
-}  // namespace internal
-}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking_avx512.cc b/cpp/src/arrow/util/bpacking_avx512.cc
index 98eb4d325af..08ccd3fcd4d 100644
--- a/cpp/src/arrow/util/bpacking_avx512.cc
+++ b/cpp/src/arrow/util/bpacking_avx512.cc
@@ -16,121 +16,15 @@
 // under the License.
 
 #include "arrow/util/bpacking_avx512.h"
-#include "arrow/util/bpacking_avx512_generated.h"
-#include "arrow/util/logging.h"
+#include "arrow/util/bpacking_simd512_generated.h"
+#include "arrow/util/bpacking_simd_internal.h"
 
 namespace arrow {
 namespace internal {
 
 int unpack32_avx512(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
-  batch_size = batch_size / 32 * 32;
-  int num_loops = batch_size / 32;
-
-  switch (num_bits) {
-    case 0:
-      for (int i = 0; i < num_loops; ++i) in = unpack0_32_avx512(in, out + i * 32);
-      break;
-    case 1:
-      for (int i = 0; i < num_loops; ++i) in = unpack1_32_avx512(in, out + i * 32);
-      break;
-    case 2:
-      for (int i = 0; i < num_loops; ++i) in = unpack2_32_avx512(in, out + i * 32);
-      break;
-    case 3:
-      for (int i = 0; i < num_loops; ++i) in = unpack3_32_avx512(in, out + i * 32);
-      break;
-    case 4:
-      for (int i = 0; i < num_loops; ++i) in = unpack4_32_avx512(in, out + i * 32);
-      break;
-    case 5:
-      for (int i = 0; i < num_loops; ++i) in = unpack5_32_avx512(in, out + i * 32);
-      break;
-    case 6:
-      for (int i = 0; i < num_loops; ++i) in = unpack6_32_avx512(in, out + i * 32);
-      break;
-    case 7:
-      for (int i = 0; i < num_loops; ++i) in = unpack7_32_avx512(in, out + i * 32);
-      break;
-    case 8:
-      for (int i = 0; i < num_loops; ++i) in = unpack8_32_avx512(in, out + i * 32);
-      break;
-    case 9:
-      for (int i = 0; i < num_loops; ++i) in = unpack9_32_avx512(in, out + i * 32);
-      break;
-    case 10:
-      for (int i = 0; i < num_loops; ++i) in = unpack10_32_avx512(in, out + i * 32);
-      break;
-    case 11:
-      for (int i = 0; i < num_loops; ++i) in = unpack11_32_avx512(in, out + i * 32);
-      break;
-    case 12:
-      for (int i = 0; i < num_loops; ++i) in = unpack12_32_avx512(in, out + i * 32);
-      break;
-    case 13:
-      for (int i = 0; i < num_loops; ++i) in = unpack13_32_avx512(in, out + i * 32);
-      break;
-    case 14:
-      for (int i = 0; i < num_loops; ++i) in = unpack14_32_avx512(in, out + i * 32);
-      break;
-    case 15:
-      for (int i = 0; i < num_loops; ++i) in = unpack15_32_avx512(in, out + i * 32);
-      break;
-    case 16:
-      for (int i = 0; i < num_loops; ++i) in = unpack16_32_avx512(in, out + i * 32);
-      break;
-    case 17:
-      for (int i = 0; i < num_loops; ++i) in = unpack17_32_avx512(in, out + i * 32);
-      break;
-    case 18:
-      for (int i = 0; i < num_loops; ++i) in = unpack18_32_avx512(in, out + i * 32);
-      break;
-    case 19:
-      for (int i = 0; i < num_loops; ++i) in = unpack19_32_avx512(in, out + i * 32);
-      break;
-    case 20:
-      for (int i = 0; i < num_loops; ++i) in = unpack20_32_avx512(in, out + i * 32);
-      break;
-    case 21:
-      for (int i = 0; i < num_loops; ++i) in = unpack21_32_avx512(in, out + i * 32);
-      break;
-    case 22:
-      for (int i = 0; i < num_loops; ++i) in = unpack22_32_avx512(in, out + i * 32);
-      break;
-    case 23:
-      for (int i = 0; i < num_loops; ++i) in = unpack23_32_avx512(in, out + i * 32);
-      break;
-    case 24:
-      for (int i = 0; i < num_loops; ++i) in = unpack24_32_avx512(in, out + i * 32);
-      break;
-    case 25:
-      for (int i = 0; i < num_loops; ++i) in = unpack25_32_avx512(in, out + i * 32);
-      break;
-    case 26:
-      for (int i = 0; i < num_loops; ++i) in = unpack26_32_avx512(in, out + i * 32);
-      break;
-    case 27:
-      for (int i = 0; i < num_loops; ++i) in = unpack27_32_avx512(in, out + i * 32);
-      break;
-    case 28:
-      for (int i = 0; i < num_loops; ++i) in = unpack28_32_avx512(in, out + i * 32);
-      break;
-    case 29:
-      for (int i = 0; i < num_loops; ++i) in = unpack29_32_avx512(in, out + i * 32);
-      break;
-    case 30:
-      for (int i = 0; i < num_loops; ++i) in = unpack30_32_avx512(in, out + i * 32);
-      break;
-    case 31:
-      for (int i = 0; i < num_loops; ++i) in = unpack31_32_avx512(in, out + i * 32);
-      break;
-    case 32:
-      for (int i = 0; i < num_loops; ++i) in = unpack32_32_avx512(in, out + i * 32);
-      break;
-    default:
-      DCHECK(false) << "Unsupported num_bits";
-  }
-
-  return batch_size;
+  return unpack32_specialized<UnpackBits512<DispatchLevel::AVX512>>(in, out, batch_size,
+                                                                    num_bits);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/bpacking_avx512_codegen.py b/cpp/src/arrow/util/bpacking_avx512_codegen.py
deleted file mode 100644
index df4d7d750da..00000000000
--- a/cpp/src/arrow/util/bpacking_avx512_codegen.py
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Usage: python bpacking_avx512_codegen.py > bpacking_avx512_generated.h
-
-
-def print_unpack_bit_func(bit):
-    shift = 0
-    shifts = []
-    in_index = 0
-    inls = []
-    mask = (1 << bit) - 1
-    bracket = "{"
-
-    print(
-        f"inline static const uint32_t* unpack{bit}_32_avx512(const uint32_t* in, uint32_t* out) {bracket}")
-    print("  using ::arrow::util::SafeLoad;")
-    print("  uint32_t mask = 0x%x;" % mask)
-    print("  __m512i reg_shifts, reg_inls, reg_masks;")
-    print("  __m512i results;")
-
-    print("")
-    for i in range(32):
-        if shift + bit == 32:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            in_index += 1
-            shift = 0
-        elif shift + bit > 32:  # cross the boundary
-            inls.append(
-                f"SafeLoad(in + {in_index}) >> {shift} | SafeLoad(in + {in_index + 1}) << {32 - shift}")
-            in_index += 1
-            shift = bit - (32 - shift)
-            shifts.append(0)  # zero shift
-        else:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            shift += bit
-
-    print("  reg_masks = _mm512_set1_epi32(mask);")
-    print("")
-    print("  // shift the first 16 outs")
-    print(
-        f"  reg_shifts = _mm512_set_epi32({shifts[15]}, {shifts[14]}, {shifts[13]}, {shifts[12]},")
-    print(
-        f"                                {shifts[11]}, {shifts[10]}, {shifts[9]}, {shifts[8]},")
-    print(
-        f"                                {shifts[7]}, {shifts[6]}, {shifts[5]}, {shifts[4]},")
-    print(
-        f"                                {shifts[3]}, {shifts[2]}, {shifts[1]}, {shifts[0]});")
-    print(f"  reg_inls = _mm512_set_epi32({inls[15]}, {inls[14]},")
-    print(f"                              {inls[13]}, {inls[12]},")
-    print(f"                              {inls[11]}, {inls[10]},")
-    print(f"                              {inls[9]}, {inls[8]},")
-    print(f"                              {inls[7]}, {inls[6]},")
-    print(f"                              {inls[5]}, {inls[4]},")
-    print(f"                              {inls[3]}, {inls[2]},")
-    print(f"                              {inls[1]}, {inls[0]});")
-    print(
-        "  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm512_storeu_si512(out, results);")
-    print("  out += 16;")
-    print("")
-    print("  // shift the second 16 outs")
-    print(
-        f"  reg_shifts = _mm512_set_epi32({shifts[31]}, {shifts[30]}, {shifts[29]}, {shifts[28]},")
-    print(
-        f"                                {shifts[27]}, {shifts[26]}, {shifts[25]}, {shifts[24]},")
-    print(
-        f"                                {shifts[23]}, {shifts[22]}, {shifts[21]}, {shifts[20]},")
-    print(
-        f"                                {shifts[19]}, {shifts[18]}, {shifts[17]}, {shifts[16]});")
-    print(f"  reg_inls = _mm512_set_epi32({inls[31]}, {inls[30]},")
-    print(f"                              {inls[29]}, {inls[28]},")
-    print(f"                              {inls[27]}, {inls[26]},")
-    print(f"                              {inls[25]}, {inls[24]},")
-    print(f"                              {inls[23]}, {inls[22]},")
-    print(f"                              {inls[21]}, {inls[20]},")
-    print(f"                              {inls[19]}, {inls[18]},")
-    print(f"                              {inls[17]}, {inls[16]});")
-    print(
-        "  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm512_storeu_si512(out, results);")
-    print("  out += 16;")
-    print("")
-    print(f"  in += {bit};")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit0_func():
-    print(
-        "inline static const uint32_t* unpack0_32_avx512(const uint32_t* in, uint32_t* out) {")
-    print("  memset(out, 0x0, 32 * sizeof(*out));")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit32_func():
-    print(
-        "inline static const uint32_t* unpack32_32_avx512(const uint32_t* in, uint32_t* out) {")
-    print("  memcpy(out, in, 32 * sizeof(*out));")
-    print("  in += 32;")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_copyright():
-    print(
-        """// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.""")
-
-
-def print_note():
-    print("//")
-    print("// Automatically generated file; DO NOT EDIT.")
-
-
-def main():
-    print_copyright()
-    print_note()
-    print("")
-    print("#pragma once")
-    print("")
-    print("#include <stdint.h>")
-    print("#include <string.h>")
-    print("")
-    print("#ifdef _MSC_VER")
-    print("#include <intrin.h>")
-    print("#else")
-    print("#include <immintrin.h>")
-    print("#endif")
-    print("")
-    print('#include "arrow/util/ubsan.h"')
-    print("")
-    print("namespace arrow {")
-    print("namespace internal {")
-    print("")
-    print_unpack_bit0_func()
-    print("")
-    for i in range(1, 32):
-        print_unpack_bit_func(i)
-        print("")
-    print_unpack_bit32_func()
-    print("")
-    print("}  // namespace internal")
-    print("}  // namespace arrow")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/cpp/src/arrow/util/bpacking_avx512_generated.h b/cpp/src/arrow/util/bpacking_avx512_generated.h
deleted file mode 100644
index fd5db6ecce5..00000000000
--- a/cpp/src/arrow/util/bpacking_avx512_generated.h
+++ /dev/null
@@ -1,1509 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// Automatically generated file; DO NOT EDIT.
-
-#pragma once
-
-#include <stdint.h>
-#include <string.h>
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#else
-#include <immintrin.h>
-#endif
-
-#include "arrow/util/ubsan.h"
-
-namespace arrow {
-namespace internal {
-
-inline static const uint32_t* unpack0_32_avx512(const uint32_t* in, uint32_t* out) {
-  memset(out, 0x0, 32 * sizeof(*out));
-  out += 32;
-
-  return in;
-}
-
-inline static const uint32_t* unpack1_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(15, 14, 13, 12,
-                                11, 10, 9, 8,
-                                7, 6, 5, 4,
-                                3, 2, 1, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(31, 30, 29, 28,
-                                27, 26, 25, 24,
-                                23, 22, 21, 20,
-                                19, 18, 17, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 1;
-
-  return in;
-}
-
-inline static const uint32_t* unpack2_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16,
-                                14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16,
-                                14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 2;
-
-  return in;
-}
-
-inline static const uint32_t* unpack3_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(13, 10, 7, 4,
-                                1, 0, 27, 24,
-                                21, 18, 15, 12,
-                                9, 6, 3, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(29, 26, 23, 20,
-                                17, 14, 11, 8,
-                                5, 2, 0, 28,
-                                25, 22, 19, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 3;
-
-  return in;
-}
-
-inline static const uint32_t* unpack4_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xf;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0,
-                                28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0,
-                                28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 4;
-
-  return in;
-}
-
-inline static const uint32_t* unpack5_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1f;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(11, 6, 1, 0,
-                                23, 18, 13, 8,
-                                3, 0, 25, 20,
-                                15, 10, 5, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(27, 22, 17, 12,
-                                7, 2, 0, 24,
-                                19, 14, 9, 4,
-                                0, 26, 21, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 5;
-
-  return in;
-}
-
-inline static const uint32_t* unpack6_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3f;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16,
-                                10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16,
-                                10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 6;
-
-  return in;
-}
-
-inline static const uint32_t* unpack7_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7f;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(9, 2, 0, 20,
-                                13, 6, 0, 24,
-                                17, 10, 3, 0,
-                                21, 14, 7, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(25, 18, 11, 4,
-                                0, 22, 15, 8,
-                                1, 0, 19, 12,
-                                5, 0, 23, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 7;
-
-  return in;
-}
-
-inline static const uint32_t* unpack8_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 8;
-
-  return in;
-}
-
-inline static const uint32_t* unpack9_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(7, 0, 21, 12,
-                                3, 0, 17, 8,
-                                0, 22, 13, 4,
-                                0, 18, 9, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(23, 14, 5, 0,
-                                19, 10, 1, 0,
-                                15, 6, 0, 20,
-                                11, 2, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 9;
-
-  return in;
-}
-
-inline static const uint32_t* unpack10_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16,
-                                6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16,
-                                6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 10;
-
-  return in;
-}
-
-inline static const uint32_t* unpack11_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(5, 0, 15, 4,
-                                0, 14, 3, 0,
-                                13, 2, 0, 12,
-                                1, 0, 11, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 25 | SafeLoad(in + 4) << 7, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8,
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 23 | SafeLoad(in + 2) << 9, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(21, 10, 0, 20,
-                                9, 0, 19, 8,
-                                0, 18, 7, 0,
-                                17, 6, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 11;
-
-  return in;
-}
-
-inline static const uint32_t* unpack12_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0,
-                                20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4, SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0,
-                                20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 6));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 12;
-
-  return in;
-}
-
-inline static const uint32_t* unpack13_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(3, 0, 9, 0,
-                                15, 2, 0, 8,
-                                0, 14, 1, 0,
-                                7, 0, 13, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 6), SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 21 | SafeLoad(in + 4) << 11, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(19, 6, 0, 12,
-                                0, 18, 5, 0,
-                                11, 0, 17, 4,
-                                0, 10, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 12), SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 13;
-
-  return in;
-}
-
-inline static const uint32_t* unpack14_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16,
-                                2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16,
-                                2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 14;
-
-  return in;
-}
-
-inline static const uint32_t* unpack15_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(1, 0, 3, 0,
-                                5, 0, 7, 0,
-                                9, 0, 11, 0,
-                                13, 0, 15, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(17, 2, 0, 4,
-                                0, 6, 0, 8,
-                                0, 10, 0, 12,
-                                0, 14, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 15;
-
-  return in;
-}
-
-inline static const uint32_t* unpack16_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 15), SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12), SafeLoad(in + 12),
-                              SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 8));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 16;
-
-  return in;
-}
-
-inline static const uint32_t* unpack17_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 14, 0, 12,
-                                0, 10, 0, 8,
-                                0, 6, 0, 4,
-                                0, 2, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 23 | SafeLoad(in + 4) << 9, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 21 | SafeLoad(in + 3) << 11, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 19 | SafeLoad(in + 2) << 13, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 17 | SafeLoad(in + 1) << 15, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(15, 0, 13, 0,
-                                11, 0, 9, 0,
-                                7, 0, 5, 0,
-                                3, 0, 1, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 16), SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8,
-                              SafeLoad(in + 12), SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 16 | SafeLoad(in + 9) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 17;
-
-  return in;
-}
-
-inline static const uint32_t* unpack18_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0,
-                                0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16,
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 18 | SafeLoad(in + 1) << 14, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0,
-                                0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 17), SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16,
-                              SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14, SafeLoad(in + 9));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 18;
-
-  return in;
-}
-
-inline static const uint32_t* unpack19_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 10, 0, 4,
-                                0, 0, 11, 0,
-                                5, 0, 0, 12,
-                                0, 6, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 17 | SafeLoad(in + 7) << 15, SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 24 | SafeLoad(in + 5) << 8,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 18 | SafeLoad(in + 4) << 14,
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 25 | SafeLoad(in + 2) << 7, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 19 | SafeLoad(in + 1) << 13, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(13, 0, 7, 0,
-                                1, 0, 0, 8,
-                                0, 2, 0, 0,
-                                9, 0, 3, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 18), SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 16 | SafeLoad(in + 10) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 19;
-
-  return in;
-}
-
-inline static const uint32_t* unpack20_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0,
-                                12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12, SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 16 | SafeLoad(in + 3) << 16,
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 20 | SafeLoad(in + 1) << 12, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0,
-                                12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 19), SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 18), SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4, SafeLoad(in + 16),
-                              SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12, SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16,
-                              SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12, SafeLoad(in + 10));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 20;
-
-  return in;
-}
-
-inline static const uint32_t* unpack21_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 6, 0, 0,
-                                7, 0, 0, 8,
-                                0, 0, 9, 0,
-                                0, 10, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 17 | SafeLoad(in + 9) << 15, SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 19 | SafeLoad(in + 5) << 13, SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 21 | SafeLoad(in + 1) << 11, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(11, 0, 1, 0,
-                                0, 2, 0, 0,
-                                3, 0, 0, 4,
-                                0, 0, 5, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 20), SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18,
-                              SafeLoad(in + 13) >> 25 | SafeLoad(in + 14) << 7, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 21;
-
-  return in;
-}
-
-inline static const uint32_t* unpack22_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0,
-                                0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 18 | SafeLoad(in + 8) << 14, SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 16 | SafeLoad(in + 6) << 16,
-                              SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 14 | SafeLoad(in + 4) << 18, SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 12 | SafeLoad(in + 2) << 20,
-                              SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0,
-                                0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 21), SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2, SafeLoad(in + 19),
-                              SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16,
-                              SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6, SafeLoad(in + 15),
-                              SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18, SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 12 | SafeLoad(in + 13) << 20,
-                              SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10, SafeLoad(in + 11));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 22;
-
-  return in;
-}
-
-inline static const uint32_t* unpack23_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 2, 0, 0,
-                                0, 6, 0, 0,
-                                1, 0, 0, 0,
-                                5, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 11 | SafeLoad(in + 10) << 21, SafeLoad(in + 8) >> 20 | SafeLoad(in + 9) << 12,
-                              SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 15 | SafeLoad(in + 7) << 17, SafeLoad(in + 5) >> 24 | SafeLoad(in + 6) << 8,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 10 | SafeLoad(in + 5) << 22,
-                              SafeLoad(in + 3) >> 19 | SafeLoad(in + 4) << 13, SafeLoad(in + 2) >> 28 | SafeLoad(in + 3) << 4,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 14 | SafeLoad(in + 2) << 18,
-                              SafeLoad(in + 0) >> 23 | SafeLoad(in + 1) << 9, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(9, 0, 0, 4,
-                                0, 0, 0, 8,
-                                0, 0, 3, 0,
-                                0, 0, 7, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 22), SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14,
-                              SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 20),
-                              SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10,
-                              SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 17 | SafeLoad(in + 17) << 15, SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 12 | SafeLoad(in + 15) << 20,
-                              SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2,
-                              SafeLoad(in + 12), SafeLoad(in + 11) >> 16 | SafeLoad(in + 12) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 23;
-
-  return in;
-}
-
-inline static const uint32_t* unpack24_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8, SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8, SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16,
-                              SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8, SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 16 | SafeLoad(in + 2) << 16,
-                              SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 23), SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16,
-                              SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8, SafeLoad(in + 21),
-                              SafeLoad(in + 20), SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16,
-                              SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8, SafeLoad(in + 18),
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16,
-                              SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8, SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16,
-                              SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8, SafeLoad(in + 12));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 24;
-
-  return in;
-}
-
-inline static const uint32_t* unpack25_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 5, 0,
-                                0, 0, 1, 0,
-                                0, 0, 0, 4,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 19 | SafeLoad(in + 9) << 13, SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 8 | SafeLoad(in + 7) << 24,
-                              SafeLoad(in + 5) >> 15 | SafeLoad(in + 6) << 17, SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 11 | SafeLoad(in + 3) << 21, SafeLoad(in + 1) >> 18 | SafeLoad(in + 2) << 14,
-                              SafeLoad(in + 0) >> 25 | SafeLoad(in + 1) << 7, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(7, 0, 0, 0,
-                                3, 0, 0, 0,
-                                0, 6, 0, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 24), SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22,
-                              SafeLoad(in + 19) >> 17 | SafeLoad(in + 20) << 15, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 9 | SafeLoad(in + 14) << 23, SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 25;
-
-  return in;
-}
-
-inline static const uint32_t* unpack26_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0,
-                                0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 12), SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20,
-                              SafeLoad(in + 10) >> 18 | SafeLoad(in + 11) << 14, SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 10 | SafeLoad(in + 8) << 22, SafeLoad(in + 6) >> 16 | SafeLoad(in + 7) << 16,
-                              SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 8 | SafeLoad(in + 4) << 24,
-                              SafeLoad(in + 2) >> 14 | SafeLoad(in + 3) << 18, SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0,
-                                0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 25), SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2, SafeLoad(in + 21),
-                              SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22, SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16,
-                              SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 8 | SafeLoad(in + 17) << 24,
-                              SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18, SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6, SafeLoad(in + 13));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 26;
-
-  return in;
-}
-
-inline static const uint32_t* unpack27_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 0, 4,
-                                0, 0, 0, 0,
-                                0, 2, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 9 | SafeLoad(in + 10) << 23, SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18,
-                              SafeLoad(in + 7) >> 19 | SafeLoad(in + 8) << 13, SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 7 | SafeLoad(in + 5) << 25, SafeLoad(in + 3) >> 12 | SafeLoad(in + 4) << 20,
-                              SafeLoad(in + 2) >> 17 | SafeLoad(in + 3) << 15, SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10,
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(5, 0, 0, 0,
-                                0, 0, 3, 0,
-                                0, 0, 0, 0,
-                                1, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 26), SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12,
-                              SafeLoad(in + 22) >> 25 | SafeLoad(in + 23) << 7, SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 8 | SafeLoad(in + 21) << 24,
-                              SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 6 | SafeLoad(in + 16) << 26,
-                              SafeLoad(in + 14) >> 11 | SafeLoad(in + 15) << 21, SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 27;
-
-  return in;
-}
-
-inline static const uint32_t* unpack28_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0,
-                                4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 13), SafeLoad(in + 12) >> 8 | SafeLoad(in + 13) << 24,
-                              SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20, SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12, SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 8 | SafeLoad(in + 6) << 24,
-                              SafeLoad(in + 4) >> 12 | SafeLoad(in + 5) << 20, SafeLoad(in + 3) >> 16 | SafeLoad(in + 4) << 16,
-                              SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12, SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0,
-                                4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 27), SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20, SafeLoad(in + 24) >> 16 | SafeLoad(in + 25) << 16,
-                              SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4, SafeLoad(in + 21),
-                              SafeLoad(in + 20), SafeLoad(in + 19) >> 8 | SafeLoad(in + 20) << 24,
-                              SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20, SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4, SafeLoad(in + 14));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 28;
-
-  return in;
-}
-
-inline static const uint32_t* unpack29_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 0, 0,
-                                0, 2, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10,
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4,
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 5 | SafeLoad(in + 9) << 27, SafeLoad(in + 7) >> 8 | SafeLoad(in + 8) << 24,
-                              SafeLoad(in + 6) >> 11 | SafeLoad(in + 7) << 21, SafeLoad(in + 5) >> 14 | SafeLoad(in + 6) << 18,
-                              SafeLoad(in + 4) >> 17 | SafeLoad(in + 5) << 15, SafeLoad(in + 3) >> 20 | SafeLoad(in + 4) << 12,
-                              SafeLoad(in + 2) >> 23 | SafeLoad(in + 3) << 9, SafeLoad(in + 1) >> 26 | SafeLoad(in + 2) << 6,
-                              SafeLoad(in + 0) >> 29 | SafeLoad(in + 1) << 3, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(3, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 1, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 28), SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26,
-                              SafeLoad(in + 26) >> 9 | SafeLoad(in + 27) << 23, SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8,
-                              SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 4 | SafeLoad(in + 19) << 28,
-                              SafeLoad(in + 17) >> 7 | SafeLoad(in + 18) << 25, SafeLoad(in + 16) >> 10 | SafeLoad(in + 17) << 22,
-                              SafeLoad(in + 15) >> 13 | SafeLoad(in + 16) << 19, SafeLoad(in + 14) >> 16 | SafeLoad(in + 15) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 29;
-
-  return in;
-}
-
-inline static const uint32_t* unpack30_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 14), SafeLoad(in + 13) >> 4 | SafeLoad(in + 14) << 28,
-                              SafeLoad(in + 12) >> 6 | SafeLoad(in + 13) << 26, SafeLoad(in + 11) >> 8 | SafeLoad(in + 12) << 24,
-                              SafeLoad(in + 10) >> 10 | SafeLoad(in + 11) << 22, SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18, SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14, SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10, SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 29), SafeLoad(in + 28) >> 4 | SafeLoad(in + 29) << 28,
-                              SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26, SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22, SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18, SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16,
-                              SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14, SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2, SafeLoad(in + 15));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 30;
-
-  return in;
-}
-
-inline static const uint32_t* unpack31_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 14) >> 17 | SafeLoad(in + 15) << 15, SafeLoad(in + 13) >> 18 | SafeLoad(in + 14) << 14,
-                              SafeLoad(in + 12) >> 19 | SafeLoad(in + 13) << 13, SafeLoad(in + 11) >> 20 | SafeLoad(in + 12) << 12,
-                              SafeLoad(in + 10) >> 21 | SafeLoad(in + 11) << 11, SafeLoad(in + 9) >> 22 | SafeLoad(in + 10) << 10,
-                              SafeLoad(in + 8) >> 23 | SafeLoad(in + 9) << 9, SafeLoad(in + 7) >> 24 | SafeLoad(in + 8) << 8,
-                              SafeLoad(in + 6) >> 25 | SafeLoad(in + 7) << 7, SafeLoad(in + 5) >> 26 | SafeLoad(in + 6) << 6,
-                              SafeLoad(in + 4) >> 27 | SafeLoad(in + 5) << 5, SafeLoad(in + 3) >> 28 | SafeLoad(in + 4) << 4,
-                              SafeLoad(in + 2) >> 29 | SafeLoad(in + 3) << 3, SafeLoad(in + 1) >> 30 | SafeLoad(in + 2) << 2,
-                              SafeLoad(in + 0) >> 31 | SafeLoad(in + 1) << 1, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(1, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 30), SafeLoad(in + 29) >> 2 | SafeLoad(in + 30) << 30,
-                              SafeLoad(in + 28) >> 3 | SafeLoad(in + 29) << 29, SafeLoad(in + 27) >> 4 | SafeLoad(in + 28) << 28,
-                              SafeLoad(in + 26) >> 5 | SafeLoad(in + 27) << 27, SafeLoad(in + 25) >> 6 | SafeLoad(in + 26) << 26,
-                              SafeLoad(in + 24) >> 7 | SafeLoad(in + 25) << 25, SafeLoad(in + 23) >> 8 | SafeLoad(in + 24) << 24,
-                              SafeLoad(in + 22) >> 9 | SafeLoad(in + 23) << 23, SafeLoad(in + 21) >> 10 | SafeLoad(in + 22) << 22,
-                              SafeLoad(in + 20) >> 11 | SafeLoad(in + 21) << 21, SafeLoad(in + 19) >> 12 | SafeLoad(in + 20) << 20,
-                              SafeLoad(in + 18) >> 13 | SafeLoad(in + 19) << 19, SafeLoad(in + 17) >> 14 | SafeLoad(in + 18) << 18,
-                              SafeLoad(in + 16) >> 15 | SafeLoad(in + 17) << 17, SafeLoad(in + 15) >> 16 | SafeLoad(in + 16) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 31;
-
-  return in;
-}
-
-inline static const uint32_t* unpack32_32_avx512(const uint32_t* in, uint32_t* out) {
-  memcpy(out, in, 32 * sizeof(*out));
-  in += 32;
-  out += 32;
-
-  return in;
-}
-
-}  // namespace internal
-}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking_neon.cc b/cpp/src/arrow/util/bpacking_neon.cc
new file mode 100644
index 00000000000..a0bb5dc7a9e
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_neon.cc
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bpacking_neon.h"
+#include "arrow/util/bpacking_simd128_generated.h"
+#include "arrow/util/bpacking_simd_internal.h"
+
+namespace arrow {
+namespace internal {
+
+int unpack32_neon(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
+  return unpack32_specialized<UnpackBits128<DispatchLevel::NEON>>(in, out, batch_size,
+                                                                  num_bits);
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking_neon.h b/cpp/src/arrow/util/bpacking_neon.h
new file mode 100644
index 00000000000..9d02cd568ac
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_neon.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace arrow {
+namespace internal {
+
+int unpack32_neon(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking_simd128_generated.h b/cpp/src/arrow/util/bpacking_simd128_generated.h
new file mode 100644
index 00000000000..f7700fd0e76
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd128_generated.h
@@ -0,0 +1,2138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Automatically generated file; DO NOT EDIT.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+namespace {
+
+using ::arrow::util::SafeLoad;
+
+template <DispatchLevel level>
+struct UnpackBits128 {
+
+using simd_batch = xsimd::batch<uint32_t, 4>;
+
+inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {
+  memset(out, 0x0, 32 * sizeof(*out));
+  out += 32;
+
+  return in;
+}
+
+inline static const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 1-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 1, 2, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 4, 5, 6, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 8, 9, 10, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 12, 13, 14, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 17, 18, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 20, 21, 22, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 24, 25, 26, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 28, 29, 30, 31 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 1;
+  return in;
+}
+
+inline static const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 2-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 2, 4, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 18, 20, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 2, 4, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 18, 20, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 2;
+  return in;
+}
+
+inline static const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 3-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 3, 6, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 12, 15, 18, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 27, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 4, 7, 10, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 19, 22, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 28, 0, 2, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 11, 14, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 20, 23, 26, 29 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 3;
+  return in;
+}
+
+inline static const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xf;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 4-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 4;
+  return in;
+}
+
+inline static const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 5-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 5, 10, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 20, 25, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 8, 13, 18, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 1, 6, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1 };
+  shifts = simd_batch{ 16, 21, 26, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 4, 9, 14, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 24, 0, 2, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 12, 17, 22, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 5;
+  return in;
+}
+
+inline static const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 6-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 6, 12, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 22, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 6, 12, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 22, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 6;
+  return in;
+}
+
+inline static const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 7-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 7, 14, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 3, 10, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 24, 0, 6, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 20, 0, 2, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 16, 23, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 12, 19, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3 };
+  shifts = simd_batch{ 8, 15, 22, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 4, 11, 18, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 7;
+  return in;
+}
+
+inline static const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 8-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 8;
+  return in;
+}
+
+inline static const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 9-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5 };
+  shifts = simd_batch{ 0, 9, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1 };
+  shifts = simd_batch{ 4, 13, 22, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 8, 17, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 12, 21, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 0, 2, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 20, 0, 6, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 1, 10, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 5, 14, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 9;
+  return in;
+}
+
+inline static const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 10-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2 };
+  shifts = simd_batch{ 0, 10, 20, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 16, 0, 4, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2 };
+  shifts = simd_batch{ 0, 10, 20, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 16, 0, 4, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 10;
+  return in;
+}
+
+inline static const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 11-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 11, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 23 | SafeLoad<uint32_t>(in + 2) << 9, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 12, 0, 2, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 25 | SafeLoad<uint32_t>(in + 4) << 7 };
+  shifts = simd_batch{ 0, 3, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 4, 15, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 0, 6, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3 };
+  shifts = simd_batch{ 0, 7, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 8, 19, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 20, 0, 10, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 11;
+  return in;
+}
+
+inline static const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 12-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 12;
+  return in;
+}
+
+inline static const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 13-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 13, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5 };
+  shifts = simd_batch{ 0, 1, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 21 | SafeLoad<uint32_t>(in + 4) << 11, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 8, 0, 2, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 9, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9 };
+  shifts = simd_batch{ 16, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 4, 17, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1 };
+  shifts = simd_batch{ 0, 5, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 12, 0, 6, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 13;
+  return in;
+}
+
+inline static const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 14-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 14, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6 };
+  shifts = simd_batch{ 16, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 14, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6 };
+  shifts = simd_batch{ 16, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 14;
+  return in;
+}
+
+inline static const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 15-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 15, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 11, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 7, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 3, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 16, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 12, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 8, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 4, 0, 2, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 15;
+  return in;
+}
+
+inline static const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 16-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 16;
+  return in;
+}
+
+inline static const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 17-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 17 | SafeLoad<uint32_t>(in + 1) << 15, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 19 | SafeLoad<uint32_t>(in + 2) << 13 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 21 | SafeLoad<uint32_t>(in + 3) << 11, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 23 | SafeLoad<uint32_t>(in + 4) << 9 };
+  shifts = simd_batch{ 4, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5 };
+  shifts = simd_batch{ 8, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1 };
+  shifts = simd_batch{ 12, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 16 | SafeLoad<uint32_t>(in + 9) << 16, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 1, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 5, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 9, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 13, 0, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 17;
+  return in;
+}
+
+inline static const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 18-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 18 | SafeLoad<uint32_t>(in + 1) << 14, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2 };
+  shifts = simd_batch{ 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 2, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2 };
+  shifts = simd_batch{ 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 2, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 18;
+  return in;
+}
+
+inline static const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 19-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 19 | SafeLoad<uint32_t>(in + 1) << 13, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 25 | SafeLoad<uint32_t>(in + 2) << 7 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3) >> 18 | SafeLoad<uint32_t>(in + 4) << 14, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 12, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 24 | SafeLoad<uint32_t>(in + 5) << 8, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6) >> 17 | SafeLoad<uint32_t>(in + 7) << 15 };
+  shifts = simd_batch{ 0, 11, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 4, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 16 | SafeLoad<uint32_t>(in + 10) << 16, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 3, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 8, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) };
+  shifts = simd_batch{ 0, 7, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 19;
+  return in;
+}
+
+inline static const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 20-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 20 | SafeLoad<uint32_t>(in + 1) << 12, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 16 | SafeLoad<uint32_t>(in + 3) << 16, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 20;
+  return in;
+}
+
+inline static const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 21-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 21 | SafeLoad<uint32_t>(in + 1) << 11, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1 };
+  shifts = simd_batch{ 0, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 19 | SafeLoad<uint32_t>(in + 5) << 13 };
+  shifts = simd_batch{ 0, 9, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 8, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 17 | SafeLoad<uint32_t>(in + 9) << 15, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17 };
+  shifts = simd_batch{ 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 25 | SafeLoad<uint32_t>(in + 14) << 7, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 4, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 1, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 21;
+  return in;
+}
+
+inline static const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 22-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) >> 12 | SafeLoad<uint32_t>(in + 2) << 20, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3) >> 14 | SafeLoad<uint32_t>(in + 4) << 18, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 16 | SafeLoad<uint32_t>(in + 6) << 16, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 18 | SafeLoad<uint32_t>(in + 8) << 14 };
+  shifts = simd_batch{ 0, 6, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) >> 12 | SafeLoad<uint32_t>(in + 13) << 20, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14 };
+  shifts = simd_batch{ 0, 6, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 22;
+  return in;
+}
+
+inline static const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 23-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 23 | SafeLoad<uint32_t>(in + 1) << 9, SafeLoad<uint32_t>(in + 1) >> 14 | SafeLoad<uint32_t>(in + 2) << 18, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 28 | SafeLoad<uint32_t>(in + 3) << 4, SafeLoad<uint32_t>(in + 3) >> 19 | SafeLoad<uint32_t>(in + 4) << 13, SafeLoad<uint32_t>(in + 4) >> 10 | SafeLoad<uint32_t>(in + 5) << 22, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 24 | SafeLoad<uint32_t>(in + 6) << 8, SafeLoad<uint32_t>(in + 6) >> 15 | SafeLoad<uint32_t>(in + 7) << 17, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 20 | SafeLoad<uint32_t>(in + 9) << 12, SafeLoad<uint32_t>(in + 9) >> 11 | SafeLoad<uint32_t>(in + 10) << 21, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 16 | SafeLoad<uint32_t>(in + 12) << 16, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11 };
+  shifts = simd_batch{ 0, 7, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 12 | SafeLoad<uint32_t>(in + 15) << 20, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 17 | SafeLoad<uint32_t>(in + 17) << 15 };
+  shifts = simd_batch{ 0, 3, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19 };
+  shifts = simd_batch{ 8, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) };
+  shifts = simd_batch{ 4, 0, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 23;
+  return in;
+}
+
+inline static const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 24-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) >> 16 | SafeLoad<uint32_t>(in + 2) << 16, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 24;
+  return in;
+}
+
+inline static const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 25-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 25 | SafeLoad<uint32_t>(in + 1) << 7, SafeLoad<uint32_t>(in + 1) >> 18 | SafeLoad<uint32_t>(in + 2) << 14, SafeLoad<uint32_t>(in + 2) >> 11 | SafeLoad<uint32_t>(in + 3) << 21 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 15 | SafeLoad<uint32_t>(in + 6) << 17 };
+  shifts = simd_batch{ 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 8 | SafeLoad<uint32_t>(in + 7) << 24, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8) >> 19 | SafeLoad<uint32_t>(in + 9) << 13 };
+  shifts = simd_batch{ 0, 1, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9 };
+  shifts = simd_batch{ 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13) >> 9 | SafeLoad<uint32_t>(in + 14) << 23, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 17 | SafeLoad<uint32_t>(in + 20) << 15, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) };
+  shifts = simd_batch{ 0, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 25;
+  return in;
+}
+
+inline static const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 26-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2) >> 14 | SafeLoad<uint32_t>(in + 3) << 18 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 8 | SafeLoad<uint32_t>(in + 4) << 24, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10 };
+  shifts = simd_batch{ 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 16 | SafeLoad<uint32_t>(in + 7) << 16, SafeLoad<uint32_t>(in + 7) >> 10 | SafeLoad<uint32_t>(in + 8) << 22, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 18 | SafeLoad<uint32_t>(in + 11) << 14, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 8 | SafeLoad<uint32_t>(in + 17) << 24, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10 };
+  shifts = simd_batch{ 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) };
+  shifts = simd_batch{ 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 26;
+  return in;
+}
+
+inline static const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 27-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2) >> 17 | SafeLoad<uint32_t>(in + 3) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 12 | SafeLoad<uint32_t>(in + 4) << 20, SafeLoad<uint32_t>(in + 4) >> 7 | SafeLoad<uint32_t>(in + 5) << 25, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 19 | SafeLoad<uint32_t>(in + 8) << 13, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 9 | SafeLoad<uint32_t>(in + 10) << 23 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) >> 11 | SafeLoad<uint32_t>(in + 15) << 21, SafeLoad<uint32_t>(in + 15) >> 6 | SafeLoad<uint32_t>(in + 16) << 26, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 20) >> 8 | SafeLoad<uint32_t>(in + 21) << 24, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 25 | SafeLoad<uint32_t>(in + 23) << 7 };
+  shifts = simd_batch{ 0, 3, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) };
+  shifts = simd_batch{ 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 27;
+  return in;
+}
+
+inline static const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 28-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 16 | SafeLoad<uint32_t>(in + 4) << 16, SafeLoad<uint32_t>(in + 4) >> 12 | SafeLoad<uint32_t>(in + 5) << 20, SafeLoad<uint32_t>(in + 5) >> 8 | SafeLoad<uint32_t>(in + 6) << 24, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) >> 8 | SafeLoad<uint32_t>(in + 13) << 24, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19) >> 8 | SafeLoad<uint32_t>(in + 20) << 24, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 24) >> 16 | SafeLoad<uint32_t>(in + 25) << 16, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 28;
+  return in;
+}
+
+inline static const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 29-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 29 | SafeLoad<uint32_t>(in + 1) << 3, SafeLoad<uint32_t>(in + 1) >> 26 | SafeLoad<uint32_t>(in + 2) << 6, SafeLoad<uint32_t>(in + 2) >> 23 | SafeLoad<uint32_t>(in + 3) << 9 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 20 | SafeLoad<uint32_t>(in + 4) << 12, SafeLoad<uint32_t>(in + 4) >> 17 | SafeLoad<uint32_t>(in + 5) << 15, SafeLoad<uint32_t>(in + 5) >> 14 | SafeLoad<uint32_t>(in + 6) << 18, SafeLoad<uint32_t>(in + 6) >> 11 | SafeLoad<uint32_t>(in + 7) << 21 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 8 | SafeLoad<uint32_t>(in + 8) << 24, SafeLoad<uint32_t>(in + 8) >> 5 | SafeLoad<uint32_t>(in + 9) << 27, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 16 | SafeLoad<uint32_t>(in + 15) << 16, SafeLoad<uint32_t>(in + 15) >> 13 | SafeLoad<uint32_t>(in + 16) << 19, SafeLoad<uint32_t>(in + 16) >> 10 | SafeLoad<uint32_t>(in + 17) << 22, SafeLoad<uint32_t>(in + 17) >> 7 | SafeLoad<uint32_t>(in + 18) << 25 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 4 | SafeLoad<uint32_t>(in + 19) << 28, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5 };
+  shifts = simd_batch{ 0, 1, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 9 | SafeLoad<uint32_t>(in + 27) << 23, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) };
+  shifts = simd_batch{ 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 29;
+  return in;
+}
+
+inline static const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 30-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10) >> 10 | SafeLoad<uint32_t>(in + 11) << 22 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 8 | SafeLoad<uint32_t>(in + 12) << 24, SafeLoad<uint32_t>(in + 12) >> 6 | SafeLoad<uint32_t>(in + 13) << 26, SafeLoad<uint32_t>(in + 13) >> 4 | SafeLoad<uint32_t>(in + 14) << 28, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) >> 4 | SafeLoad<uint32_t>(in + 29) << 28, SafeLoad<uint32_t>(in + 29) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 30;
+  return in;
+}
+
+inline static const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 31-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 31 | SafeLoad<uint32_t>(in + 1) << 1, SafeLoad<uint32_t>(in + 1) >> 30 | SafeLoad<uint32_t>(in + 2) << 2, SafeLoad<uint32_t>(in + 2) >> 29 | SafeLoad<uint32_t>(in + 3) << 3 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 28 | SafeLoad<uint32_t>(in + 4) << 4, SafeLoad<uint32_t>(in + 4) >> 27 | SafeLoad<uint32_t>(in + 5) << 5, SafeLoad<uint32_t>(in + 5) >> 26 | SafeLoad<uint32_t>(in + 6) << 6, SafeLoad<uint32_t>(in + 6) >> 25 | SafeLoad<uint32_t>(in + 7) << 7 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 24 | SafeLoad<uint32_t>(in + 8) << 8, SafeLoad<uint32_t>(in + 8) >> 23 | SafeLoad<uint32_t>(in + 9) << 9, SafeLoad<uint32_t>(in + 9) >> 22 | SafeLoad<uint32_t>(in + 10) << 10, SafeLoad<uint32_t>(in + 10) >> 21 | SafeLoad<uint32_t>(in + 11) << 11 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 20 | SafeLoad<uint32_t>(in + 12) << 12, SafeLoad<uint32_t>(in + 12) >> 19 | SafeLoad<uint32_t>(in + 13) << 13, SafeLoad<uint32_t>(in + 13) >> 18 | SafeLoad<uint32_t>(in + 14) << 14, SafeLoad<uint32_t>(in + 14) >> 17 | SafeLoad<uint32_t>(in + 15) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 16 | SafeLoad<uint32_t>(in + 16) << 16, SafeLoad<uint32_t>(in + 16) >> 15 | SafeLoad<uint32_t>(in + 17) << 17, SafeLoad<uint32_t>(in + 17) >> 14 | SafeLoad<uint32_t>(in + 18) << 18, SafeLoad<uint32_t>(in + 18) >> 13 | SafeLoad<uint32_t>(in + 19) << 19 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19) >> 12 | SafeLoad<uint32_t>(in + 20) << 20, SafeLoad<uint32_t>(in + 20) >> 11 | SafeLoad<uint32_t>(in + 21) << 21, SafeLoad<uint32_t>(in + 21) >> 10 | SafeLoad<uint32_t>(in + 22) << 22, SafeLoad<uint32_t>(in + 22) >> 9 | SafeLoad<uint32_t>(in + 23) << 23 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 23) >> 8 | SafeLoad<uint32_t>(in + 24) << 24, SafeLoad<uint32_t>(in + 24) >> 7 | SafeLoad<uint32_t>(in + 25) << 25, SafeLoad<uint32_t>(in + 25) >> 6 | SafeLoad<uint32_t>(in + 26) << 26, SafeLoad<uint32_t>(in + 26) >> 5 | SafeLoad<uint32_t>(in + 27) << 27 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 27) >> 4 | SafeLoad<uint32_t>(in + 28) << 28, SafeLoad<uint32_t>(in + 28) >> 3 | SafeLoad<uint32_t>(in + 29) << 29, SafeLoad<uint32_t>(in + 29) >> 2 | SafeLoad<uint32_t>(in + 30) << 30, SafeLoad<uint32_t>(in + 30) };
+  shifts = simd_batch{ 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 31;
+  return in;
+}
+
+inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+  memcpy(out, in, 32 * sizeof(*out));
+  in += 32;
+  out += 32;
+
+  return in;
+}
+
+};  // struct UnpackBits128
+
+}  // namespace
+}  // namespace internal
+}  // namespace arrow
+
diff --git a/cpp/src/arrow/util/bpacking_simd256_generated.h b/cpp/src/arrow/util/bpacking_simd256_generated.h
new file mode 100644
index 00000000000..a73bafe17e5
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd256_generated.h
@@ -0,0 +1,1270 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Automatically generated file; DO NOT EDIT.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+namespace {
+
+using ::arrow::util::SafeLoad;
+
+template <DispatchLevel level>
+struct UnpackBits256 {
+
+using simd_batch = xsimd::batch<uint32_t, 8>;
+
+inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {
+  memset(out, 0x0, 32 * sizeof(*out));
+  out += 32;
+
+  return in;
+}
+
+inline static const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 1-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 1, 2, 3, 4, 5, 6, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 1-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 8, 9, 10, 11, 12, 13, 14, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 1-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 17, 18, 19, 20, 21, 22, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 1-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 24, 25, 26, 27, 28, 29, 30, 31 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 1;
+  return in;
+}
+
+inline static const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 2-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 2-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 2-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 2-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 2;
+  return in;
+}
+
+inline static const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 3-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 3, 6, 9, 12, 15, 18, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 3-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 27, 0, 1, 4, 7, 10, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 3-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 19, 22, 25, 28, 0, 2, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 3-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 11, 14, 17, 20, 23, 26, 29 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 3;
+  return in;
+}
+
+inline static const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xf;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 4-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 4-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 4-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 4-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 4;
+  return in;
+}
+
+inline static const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 5-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 5, 10, 15, 20, 25, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 5-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 13, 18, 23, 0, 1, 6, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 5-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 16, 21, 26, 0, 4, 9, 14, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 5-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 24, 0, 2, 7, 12, 17, 22, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 5;
+  return in;
+}
+
+inline static const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 6-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 6-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 6-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 6-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 6;
+  return in;
+}
+
+inline static const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 7-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 7, 14, 21, 0, 3, 10, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 7-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 24, 0, 6, 13, 20, 0, 2, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 7-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 23, 0, 5, 12, 19, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 7-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 8, 15, 22, 0, 4, 11, 18, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 7;
+  return in;
+}
+
+inline static const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 8-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 8-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 8-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 8-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 8;
+  return in;
+}
+
+inline static const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 9-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1 };
+  shifts = simd_batch{ 0, 9, 18, 0, 4, 13, 22, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 9-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 8, 17, 0, 3, 12, 21, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 9-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 0, 2, 11, 20, 0, 6, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 9-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 1, 10, 19, 0, 5, 14, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 9;
+  return in;
+}
+
+inline static const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 10-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 10-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 10-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 10-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 10;
+  return in;
+}
+
+inline static const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 11-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 23 | SafeLoad<uint32_t>(in + 2) << 9, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 11, 0, 1, 12, 0, 2, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 11-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 25 | SafeLoad<uint32_t>(in + 4) << 7, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 3, 14, 0, 4, 15, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 11-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3 };
+  shifts = simd_batch{ 16, 0, 6, 17, 0, 7, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 11-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 8, 19, 0, 9, 20, 0, 10, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 11;
+  return in;
+}
+
+inline static const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 12-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 12-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 12-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 12-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 12;
+  return in;
+}
+
+inline static const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 13-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5 };
+  shifts = simd_batch{ 0, 13, 0, 7, 0, 1, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 13-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 21 | SafeLoad<uint32_t>(in + 4) << 11, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 8, 0, 2, 15, 0, 9, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 13-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 16, 0, 10, 0, 4, 17, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 13-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 5, 18, 0, 12, 0, 6, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 13;
+  return in;
+}
+
+inline static const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 14-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 14-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 14-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 14-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 14;
+  return in;
+}
+
+inline static const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 15-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 15, 0, 13, 0, 11, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 15-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 7, 0, 5, 0, 3, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 15-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 16, 0, 14, 0, 12, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 15-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 8, 0, 6, 0, 4, 0, 2, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 15;
+  return in;
+}
+
+inline static const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 16-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 16-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 16-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 16-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 16;
+  return in;
+}
+
+inline static const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 17-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 17 | SafeLoad<uint32_t>(in + 1) << 15, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 19 | SafeLoad<uint32_t>(in + 2) << 13, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 21 | SafeLoad<uint32_t>(in + 3) << 11, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 23 | SafeLoad<uint32_t>(in + 4) << 9 };
+  shifts = simd_batch{ 0, 0, 2, 0, 4, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 17-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1 };
+  shifts = simd_batch{ 8, 0, 10, 0, 12, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 17-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 16 | SafeLoad<uint32_t>(in + 9) << 16, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 1, 0, 3, 0, 5, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 17-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 9, 0, 11, 0, 13, 0, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 17;
+  return in;
+}
+
+inline static const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 18-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 18 | SafeLoad<uint32_t>(in + 1) << 14, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 18-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 18-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 18-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 18;
+  return in;
+}
+
+inline static const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 19-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 19 | SafeLoad<uint32_t>(in + 1) << 13, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 25 | SafeLoad<uint32_t>(in + 2) << 7, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3) >> 18 | SafeLoad<uint32_t>(in + 4) << 14, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 0, 6, 0, 12, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 19-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 24 | SafeLoad<uint32_t>(in + 5) << 8, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6) >> 17 | SafeLoad<uint32_t>(in + 7) << 15, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 0, 11, 0, 0, 4, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 19-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 16 | SafeLoad<uint32_t>(in + 10) << 16, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11 };
+  shifts = simd_batch{ 0, 3, 0, 9, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 19-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) };
+  shifts = simd_batch{ 8, 0, 0, 1, 0, 7, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 19;
+  return in;
+}
+
+inline static const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 20-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 20 | SafeLoad<uint32_t>(in + 1) << 12, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 16 | SafeLoad<uint32_t>(in + 3) << 16, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 20-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 20-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 20-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 20;
+  return in;
+}
+
+inline static const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 21-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 21 | SafeLoad<uint32_t>(in + 1) << 11, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 19 | SafeLoad<uint32_t>(in + 5) << 13 };
+  shifts = simd_batch{ 0, 0, 10, 0, 0, 9, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 21-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 17 | SafeLoad<uint32_t>(in + 9) << 15, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5 };
+  shifts = simd_batch{ 8, 0, 0, 7, 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 21-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 25 | SafeLoad<uint32_t>(in + 14) << 7, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 5, 0, 0, 4, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 21-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 1, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 21;
+  return in;
+}
+
+inline static const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 22-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) >> 12 | SafeLoad<uint32_t>(in + 2) << 20, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3) >> 14 | SafeLoad<uint32_t>(in + 4) << 18, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 22-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 16 | SafeLoad<uint32_t>(in + 6) << 16, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 18 | SafeLoad<uint32_t>(in + 8) << 14, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 22-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) >> 12 | SafeLoad<uint32_t>(in + 13) << 20, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 22-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 22;
+  return in;
+}
+
+inline static const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 23-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 23 | SafeLoad<uint32_t>(in + 1) << 9, SafeLoad<uint32_t>(in + 1) >> 14 | SafeLoad<uint32_t>(in + 2) << 18, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 28 | SafeLoad<uint32_t>(in + 3) << 4, SafeLoad<uint32_t>(in + 3) >> 19 | SafeLoad<uint32_t>(in + 4) << 13, SafeLoad<uint32_t>(in + 4) >> 10 | SafeLoad<uint32_t>(in + 5) << 22, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 5, 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 23-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 24 | SafeLoad<uint32_t>(in + 6) << 8, SafeLoad<uint32_t>(in + 6) >> 15 | SafeLoad<uint32_t>(in + 7) << 17, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8) >> 20 | SafeLoad<uint32_t>(in + 9) << 12, SafeLoad<uint32_t>(in + 9) >> 11 | SafeLoad<uint32_t>(in + 10) << 21, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 0, 0, 6, 0, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 23-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 16 | SafeLoad<uint32_t>(in + 12) << 16, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14) >> 12 | SafeLoad<uint32_t>(in + 15) << 20, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 17 | SafeLoad<uint32_t>(in + 17) << 15 };
+  shifts = simd_batch{ 0, 7, 0, 0, 0, 3, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 23-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) };
+  shifts = simd_batch{ 8, 0, 0, 0, 4, 0, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 23;
+  return in;
+}
+
+inline static const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 24-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) >> 16 | SafeLoad<uint32_t>(in + 2) << 16, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 24-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 24-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 24-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 24;
+  return in;
+}
+
+inline static const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 25-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 25 | SafeLoad<uint32_t>(in + 1) << 7, SafeLoad<uint32_t>(in + 1) >> 18 | SafeLoad<uint32_t>(in + 2) << 14, SafeLoad<uint32_t>(in + 2) >> 11 | SafeLoad<uint32_t>(in + 3) << 21, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 15 | SafeLoad<uint32_t>(in + 6) << 17 };
+  shifts = simd_batch{ 0, 0, 0, 0, 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 25-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 8 | SafeLoad<uint32_t>(in + 7) << 24, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8) >> 19 | SafeLoad<uint32_t>(in + 9) << 13, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9 };
+  shifts = simd_batch{ 0, 1, 0, 0, 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 25-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13) >> 9 | SafeLoad<uint32_t>(in + 14) << 23, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1 };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 25-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 17 | SafeLoad<uint32_t>(in + 20) << 15, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) };
+  shifts = simd_batch{ 0, 0, 0, 3, 0, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 25;
+  return in;
+}
+
+inline static const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 26-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2) >> 14 | SafeLoad<uint32_t>(in + 3) << 18, SafeLoad<uint32_t>(in + 3) >> 8 | SafeLoad<uint32_t>(in + 4) << 24, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 26-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 16 | SafeLoad<uint32_t>(in + 7) << 16, SafeLoad<uint32_t>(in + 7) >> 10 | SafeLoad<uint32_t>(in + 8) << 22, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 18 | SafeLoad<uint32_t>(in + 11) << 14, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 26-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16) >> 8 | SafeLoad<uint32_t>(in + 17) << 24, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 26-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) };
+  shifts = simd_batch{ 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 26;
+  return in;
+}
+
+inline static const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 27-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2) >> 17 | SafeLoad<uint32_t>(in + 3) << 15, SafeLoad<uint32_t>(in + 3) >> 12 | SafeLoad<uint32_t>(in + 4) << 20, SafeLoad<uint32_t>(in + 4) >> 7 | SafeLoad<uint32_t>(in + 5) << 25, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 27-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 19 | SafeLoad<uint32_t>(in + 8) << 13, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 9 | SafeLoad<uint32_t>(in + 10) << 23, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 0, 0, 0, 0, 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 27-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) >> 11 | SafeLoad<uint32_t>(in + 15) << 21, SafeLoad<uint32_t>(in + 15) >> 6 | SafeLoad<uint32_t>(in + 16) << 26, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19 };
+  shifts = simd_batch{ 0, 0, 0, 1, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 27-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 20) >> 8 | SafeLoad<uint32_t>(in + 21) << 24, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 25 | SafeLoad<uint32_t>(in + 23) << 7, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) };
+  shifts = simd_batch{ 0, 3, 0, 0, 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 27;
+  return in;
+}
+
+inline static const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 28-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) >> 16 | SafeLoad<uint32_t>(in + 4) << 16, SafeLoad<uint32_t>(in + 4) >> 12 | SafeLoad<uint32_t>(in + 5) << 20, SafeLoad<uint32_t>(in + 5) >> 8 | SafeLoad<uint32_t>(in + 6) << 24, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 28-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) >> 8 | SafeLoad<uint32_t>(in + 13) << 24, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 28-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19) >> 8 | SafeLoad<uint32_t>(in + 20) << 24, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 28-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 16 | SafeLoad<uint32_t>(in + 25) << 16, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 28;
+  return in;
+}
+
+inline static const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 29-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 29 | SafeLoad<uint32_t>(in + 1) << 3, SafeLoad<uint32_t>(in + 1) >> 26 | SafeLoad<uint32_t>(in + 2) << 6, SafeLoad<uint32_t>(in + 2) >> 23 | SafeLoad<uint32_t>(in + 3) << 9, SafeLoad<uint32_t>(in + 3) >> 20 | SafeLoad<uint32_t>(in + 4) << 12, SafeLoad<uint32_t>(in + 4) >> 17 | SafeLoad<uint32_t>(in + 5) << 15, SafeLoad<uint32_t>(in + 5) >> 14 | SafeLoad<uint32_t>(in + 6) << 18, SafeLoad<uint32_t>(in + 6) >> 11 | SafeLoad<uint32_t>(in + 7) << 21 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 29-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 8 | SafeLoad<uint32_t>(in + 8) << 24, SafeLoad<uint32_t>(in + 8) >> 5 | SafeLoad<uint32_t>(in + 9) << 27, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13 };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 29-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 16 | SafeLoad<uint32_t>(in + 15) << 16, SafeLoad<uint32_t>(in + 15) >> 13 | SafeLoad<uint32_t>(in + 16) << 19, SafeLoad<uint32_t>(in + 16) >> 10 | SafeLoad<uint32_t>(in + 17) << 22, SafeLoad<uint32_t>(in + 17) >> 7 | SafeLoad<uint32_t>(in + 18) << 25, SafeLoad<uint32_t>(in + 18) >> 4 | SafeLoad<uint32_t>(in + 19) << 28, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 1, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 29-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 9 | SafeLoad<uint32_t>(in + 27) << 23, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 29;
+  return in;
+}
+
+inline static const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 30-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 30-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10) >> 10 | SafeLoad<uint32_t>(in + 11) << 22, SafeLoad<uint32_t>(in + 11) >> 8 | SafeLoad<uint32_t>(in + 12) << 24, SafeLoad<uint32_t>(in + 12) >> 6 | SafeLoad<uint32_t>(in + 13) << 26, SafeLoad<uint32_t>(in + 13) >> 4 | SafeLoad<uint32_t>(in + 14) << 28, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 30-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 30-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) >> 4 | SafeLoad<uint32_t>(in + 29) << 28, SafeLoad<uint32_t>(in + 29) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 30;
+  return in;
+}
+
+inline static const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 31-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 31 | SafeLoad<uint32_t>(in + 1) << 1, SafeLoad<uint32_t>(in + 1) >> 30 | SafeLoad<uint32_t>(in + 2) << 2, SafeLoad<uint32_t>(in + 2) >> 29 | SafeLoad<uint32_t>(in + 3) << 3, SafeLoad<uint32_t>(in + 3) >> 28 | SafeLoad<uint32_t>(in + 4) << 4, SafeLoad<uint32_t>(in + 4) >> 27 | SafeLoad<uint32_t>(in + 5) << 5, SafeLoad<uint32_t>(in + 5) >> 26 | SafeLoad<uint32_t>(in + 6) << 6, SafeLoad<uint32_t>(in + 6) >> 25 | SafeLoad<uint32_t>(in + 7) << 7 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 31-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 24 | SafeLoad<uint32_t>(in + 8) << 8, SafeLoad<uint32_t>(in + 8) >> 23 | SafeLoad<uint32_t>(in + 9) << 9, SafeLoad<uint32_t>(in + 9) >> 22 | SafeLoad<uint32_t>(in + 10) << 10, SafeLoad<uint32_t>(in + 10) >> 21 | SafeLoad<uint32_t>(in + 11) << 11, SafeLoad<uint32_t>(in + 11) >> 20 | SafeLoad<uint32_t>(in + 12) << 12, SafeLoad<uint32_t>(in + 12) >> 19 | SafeLoad<uint32_t>(in + 13) << 13, SafeLoad<uint32_t>(in + 13) >> 18 | SafeLoad<uint32_t>(in + 14) << 14, SafeLoad<uint32_t>(in + 14) >> 17 | SafeLoad<uint32_t>(in + 15) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 31-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 16 | SafeLoad<uint32_t>(in + 16) << 16, SafeLoad<uint32_t>(in + 16) >> 15 | SafeLoad<uint32_t>(in + 17) << 17, SafeLoad<uint32_t>(in + 17) >> 14 | SafeLoad<uint32_t>(in + 18) << 18, SafeLoad<uint32_t>(in + 18) >> 13 | SafeLoad<uint32_t>(in + 19) << 19, SafeLoad<uint32_t>(in + 19) >> 12 | SafeLoad<uint32_t>(in + 20) << 20, SafeLoad<uint32_t>(in + 20) >> 11 | SafeLoad<uint32_t>(in + 21) << 21, SafeLoad<uint32_t>(in + 21) >> 10 | SafeLoad<uint32_t>(in + 22) << 22, SafeLoad<uint32_t>(in + 22) >> 9 | SafeLoad<uint32_t>(in + 23) << 23 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 31-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 23) >> 8 | SafeLoad<uint32_t>(in + 24) << 24, SafeLoad<uint32_t>(in + 24) >> 7 | SafeLoad<uint32_t>(in + 25) << 25, SafeLoad<uint32_t>(in + 25) >> 6 | SafeLoad<uint32_t>(in + 26) << 26, SafeLoad<uint32_t>(in + 26) >> 5 | SafeLoad<uint32_t>(in + 27) << 27, SafeLoad<uint32_t>(in + 27) >> 4 | SafeLoad<uint32_t>(in + 28) << 28, SafeLoad<uint32_t>(in + 28) >> 3 | SafeLoad<uint32_t>(in + 29) << 29, SafeLoad<uint32_t>(in + 29) >> 2 | SafeLoad<uint32_t>(in + 30) << 30, SafeLoad<uint32_t>(in + 30) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 31;
+  return in;
+}
+
+inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+  memcpy(out, in, 32 * sizeof(*out));
+  in += 32;
+  out += 32;
+
+  return in;
+}
+
+};  // struct UnpackBits256
+
+}  // namespace
+}  // namespace internal
+}  // namespace arrow
+
diff --git a/cpp/src/arrow/util/bpacking_simd512_generated.h b/cpp/src/arrow/util/bpacking_simd512_generated.h
new file mode 100644
index 00000000000..2a62c962cd0
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd512_generated.h
@@ -0,0 +1,836 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Automatically generated file; DO NOT EDIT.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+namespace {
+
+using ::arrow::util::SafeLoad;
+
+template <DispatchLevel level>
+struct UnpackBits512 {
+
+using simd_batch = xsimd::batch<uint32_t, 16>;
+
+inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {
+  memset(out, 0x0, 32 * sizeof(*out));
+  out += 32;
+
+  return in;
+}
+
+inline static const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 1-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 1-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 1;
+  return in;
+}
+
+inline static const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 2-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 2-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 2;
+  return in;
+}
+
+inline static const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 3-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 0, 1, 4, 7, 10, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 3-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 19, 22, 25, 28, 0, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 3;
+  return in;
+}
+
+inline static const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xf;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 4-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 4-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 4;
+  return in;
+}
+
+inline static const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 5-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 5, 10, 15, 20, 25, 0, 3, 8, 13, 18, 23, 0, 1, 6, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 5-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 16, 21, 26, 0, 4, 9, 14, 19, 24, 0, 2, 7, 12, 17, 22, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 5;
+  return in;
+}
+
+inline static const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 6-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10, 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 6-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10, 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 6;
+  return in;
+}
+
+inline static const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 7-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 7, 14, 21, 0, 3, 10, 17, 24, 0, 6, 13, 20, 0, 2, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 7-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 23, 0, 5, 12, 19, 0, 1, 8, 15, 22, 0, 4, 11, 18, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 7;
+  return in;
+}
+
+inline static const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 8-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 8-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 8;
+  return in;
+}
+
+inline static const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 9-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 9, 18, 0, 4, 13, 22, 0, 8, 17, 0, 3, 12, 21, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 9-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 16, 0, 2, 11, 20, 0, 6, 15, 0, 1, 10, 19, 0, 5, 14, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 9;
+  return in;
+}
+
+inline static const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 10-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6, 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 10-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6, 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 10;
+  return in;
+}
+
+inline static const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 11-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 23 | SafeLoad<uint32_t>(in + 2) << 9, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 25 | SafeLoad<uint32_t>(in + 4) << 7, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 11, 0, 1, 12, 0, 2, 13, 0, 3, 14, 0, 4, 15, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 11-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 16, 0, 6, 17, 0, 7, 18, 0, 8, 19, 0, 9, 20, 0, 10, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 11;
+  return in;
+}
+
+inline static const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 12-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20, 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 12-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20, 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 12;
+  return in;
+}
+
+inline static const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 13-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 21 | SafeLoad<uint32_t>(in + 4) << 11, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 13, 0, 7, 0, 1, 14, 0, 8, 0, 2, 15, 0, 9, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 13-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 16, 0, 10, 0, 4, 17, 0, 11, 0, 5, 18, 0, 12, 0, 6, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 13;
+  return in;
+}
+
+inline static const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 14-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2, 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 14-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2, 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 14;
+  return in;
+}
+
+inline static const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 15-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 15, 0, 13, 0, 11, 0, 9, 0, 7, 0, 5, 0, 3, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 15-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 16, 0, 14, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 15;
+  return in;
+}
+
+inline static const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 16-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 16-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 16;
+  return in;
+}
+
+inline static const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 17-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 17 | SafeLoad<uint32_t>(in + 1) << 15, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 19 | SafeLoad<uint32_t>(in + 2) << 13, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 21 | SafeLoad<uint32_t>(in + 3) << 11, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 23 | SafeLoad<uint32_t>(in + 4) << 9, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1 };
+  shifts = simd_batch{ 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 17-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 16 | SafeLoad<uint32_t>(in + 9) << 16, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 17;
+  return in;
+}
+
+inline static const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 18-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 18 | SafeLoad<uint32_t>(in + 1) << 14, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0, 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 18-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0, 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 18;
+  return in;
+}
+
+inline static const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 19-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 19 | SafeLoad<uint32_t>(in + 1) << 13, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 25 | SafeLoad<uint32_t>(in + 2) << 7, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3) >> 18 | SafeLoad<uint32_t>(in + 4) << 14, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 24 | SafeLoad<uint32_t>(in + 5) << 8, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6) >> 17 | SafeLoad<uint32_t>(in + 7) << 15, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 0, 0, 6, 0, 12, 0, 0, 5, 0, 11, 0, 0, 4, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 19-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 16 | SafeLoad<uint32_t>(in + 10) << 16, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) };
+  shifts = simd_batch{ 0, 3, 0, 9, 0, 0, 2, 0, 8, 0, 0, 1, 0, 7, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 19;
+  return in;
+}
+
+inline static const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 20-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 20 | SafeLoad<uint32_t>(in + 1) << 12, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 16 | SafeLoad<uint32_t>(in + 3) << 16, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12, 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 20-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12, 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 20;
+  return in;
+}
+
+inline static const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 21-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 21 | SafeLoad<uint32_t>(in + 1) << 11, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 19 | SafeLoad<uint32_t>(in + 5) << 13, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 17 | SafeLoad<uint32_t>(in + 9) << 15, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5 };
+  shifts = simd_batch{ 0, 0, 10, 0, 0, 9, 0, 0, 8, 0, 0, 7, 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 21-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 25 | SafeLoad<uint32_t>(in + 14) << 7, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 5, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 21;
+  return in;
+}
+
+inline static const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 22-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) >> 12 | SafeLoad<uint32_t>(in + 2) << 20, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3) >> 14 | SafeLoad<uint32_t>(in + 4) << 18, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) >> 16 | SafeLoad<uint32_t>(in + 6) << 16, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 18 | SafeLoad<uint32_t>(in + 8) << 14, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0, 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 22-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) >> 12 | SafeLoad<uint32_t>(in + 13) << 20, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0, 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 22;
+  return in;
+}
+
+inline static const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 23-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 23 | SafeLoad<uint32_t>(in + 1) << 9, SafeLoad<uint32_t>(in + 1) >> 14 | SafeLoad<uint32_t>(in + 2) << 18, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 28 | SafeLoad<uint32_t>(in + 3) << 4, SafeLoad<uint32_t>(in + 3) >> 19 | SafeLoad<uint32_t>(in + 4) << 13, SafeLoad<uint32_t>(in + 4) >> 10 | SafeLoad<uint32_t>(in + 5) << 22, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 24 | SafeLoad<uint32_t>(in + 6) << 8, SafeLoad<uint32_t>(in + 6) >> 15 | SafeLoad<uint32_t>(in + 7) << 17, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8) >> 20 | SafeLoad<uint32_t>(in + 9) << 12, SafeLoad<uint32_t>(in + 9) >> 11 | SafeLoad<uint32_t>(in + 10) << 21, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 6, 0, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 23-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 16 | SafeLoad<uint32_t>(in + 12) << 16, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14) >> 12 | SafeLoad<uint32_t>(in + 15) << 20, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 17 | SafeLoad<uint32_t>(in + 17) << 15, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) };
+  shifts = simd_batch{ 0, 7, 0, 0, 0, 3, 0, 0, 8, 0, 0, 0, 4, 0, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 23;
+  return in;
+}
+
+inline static const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 24-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) >> 16 | SafeLoad<uint32_t>(in + 2) << 16, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 24-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 24;
+  return in;
+}
+
+inline static const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 25-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 25 | SafeLoad<uint32_t>(in + 1) << 7, SafeLoad<uint32_t>(in + 1) >> 18 | SafeLoad<uint32_t>(in + 2) << 14, SafeLoad<uint32_t>(in + 2) >> 11 | SafeLoad<uint32_t>(in + 3) << 21, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 15 | SafeLoad<uint32_t>(in + 6) << 17, SafeLoad<uint32_t>(in + 6) >> 8 | SafeLoad<uint32_t>(in + 7) << 24, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8) >> 19 | SafeLoad<uint32_t>(in + 9) << 13, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9 };
+  shifts = simd_batch{ 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 25-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13) >> 9 | SafeLoad<uint32_t>(in + 14) << 23, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 17 | SafeLoad<uint32_t>(in + 20) << 15, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 0, 3, 0, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 25;
+  return in;
+}
+
+inline static const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 26-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2) >> 14 | SafeLoad<uint32_t>(in + 3) << 18, SafeLoad<uint32_t>(in + 3) >> 8 | SafeLoad<uint32_t>(in + 4) << 24, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) >> 16 | SafeLoad<uint32_t>(in + 7) << 16, SafeLoad<uint32_t>(in + 7) >> 10 | SafeLoad<uint32_t>(in + 8) << 22, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 18 | SafeLoad<uint32_t>(in + 11) << 14, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 26-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16) >> 8 | SafeLoad<uint32_t>(in + 17) << 24, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 26;
+  return in;
+}
+
+inline static const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 27-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2) >> 17 | SafeLoad<uint32_t>(in + 3) << 15, SafeLoad<uint32_t>(in + 3) >> 12 | SafeLoad<uint32_t>(in + 4) << 20, SafeLoad<uint32_t>(in + 4) >> 7 | SafeLoad<uint32_t>(in + 5) << 25, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 19 | SafeLoad<uint32_t>(in + 8) << 13, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 9 | SafeLoad<uint32_t>(in + 10) << 23, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 27-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) >> 11 | SafeLoad<uint32_t>(in + 15) << 21, SafeLoad<uint32_t>(in + 15) >> 6 | SafeLoad<uint32_t>(in + 16) << 26, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20) >> 8 | SafeLoad<uint32_t>(in + 21) << 24, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 25 | SafeLoad<uint32_t>(in + 23) << 7, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) };
+  shifts = simd_batch{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 27;
+  return in;
+}
+
+inline static const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 28-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) >> 16 | SafeLoad<uint32_t>(in + 4) << 16, SafeLoad<uint32_t>(in + 4) >> 12 | SafeLoad<uint32_t>(in + 5) << 20, SafeLoad<uint32_t>(in + 5) >> 8 | SafeLoad<uint32_t>(in + 6) << 24, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) >> 8 | SafeLoad<uint32_t>(in + 13) << 24, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 28-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19) >> 8 | SafeLoad<uint32_t>(in + 20) << 24, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 16 | SafeLoad<uint32_t>(in + 25) << 16, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 28;
+  return in;
+}
+
+inline static const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 29-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 29 | SafeLoad<uint32_t>(in + 1) << 3, SafeLoad<uint32_t>(in + 1) >> 26 | SafeLoad<uint32_t>(in + 2) << 6, SafeLoad<uint32_t>(in + 2) >> 23 | SafeLoad<uint32_t>(in + 3) << 9, SafeLoad<uint32_t>(in + 3) >> 20 | SafeLoad<uint32_t>(in + 4) << 12, SafeLoad<uint32_t>(in + 4) >> 17 | SafeLoad<uint32_t>(in + 5) << 15, SafeLoad<uint32_t>(in + 5) >> 14 | SafeLoad<uint32_t>(in + 6) << 18, SafeLoad<uint32_t>(in + 6) >> 11 | SafeLoad<uint32_t>(in + 7) << 21, SafeLoad<uint32_t>(in + 7) >> 8 | SafeLoad<uint32_t>(in + 8) << 24, SafeLoad<uint32_t>(in + 8) >> 5 | SafeLoad<uint32_t>(in + 9) << 27, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 29-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 16 | SafeLoad<uint32_t>(in + 15) << 16, SafeLoad<uint32_t>(in + 15) >> 13 | SafeLoad<uint32_t>(in + 16) << 19, SafeLoad<uint32_t>(in + 16) >> 10 | SafeLoad<uint32_t>(in + 17) << 22, SafeLoad<uint32_t>(in + 17) >> 7 | SafeLoad<uint32_t>(in + 18) << 25, SafeLoad<uint32_t>(in + 18) >> 4 | SafeLoad<uint32_t>(in + 19) << 28, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 9 | SafeLoad<uint32_t>(in + 27) << 23, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 29;
+  return in;
+}
+
+inline static const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 30-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10) >> 10 | SafeLoad<uint32_t>(in + 11) << 22, SafeLoad<uint32_t>(in + 11) >> 8 | SafeLoad<uint32_t>(in + 12) << 24, SafeLoad<uint32_t>(in + 12) >> 6 | SafeLoad<uint32_t>(in + 13) << 26, SafeLoad<uint32_t>(in + 13) >> 4 | SafeLoad<uint32_t>(in + 14) << 28, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 30-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) >> 4 | SafeLoad<uint32_t>(in + 29) << 28, SafeLoad<uint32_t>(in + 29) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 30;
+  return in;
+}
+
+inline static const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 31-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 31 | SafeLoad<uint32_t>(in + 1) << 1, SafeLoad<uint32_t>(in + 1) >> 30 | SafeLoad<uint32_t>(in + 2) << 2, SafeLoad<uint32_t>(in + 2) >> 29 | SafeLoad<uint32_t>(in + 3) << 3, SafeLoad<uint32_t>(in + 3) >> 28 | SafeLoad<uint32_t>(in + 4) << 4, SafeLoad<uint32_t>(in + 4) >> 27 | SafeLoad<uint32_t>(in + 5) << 5, SafeLoad<uint32_t>(in + 5) >> 26 | SafeLoad<uint32_t>(in + 6) << 6, SafeLoad<uint32_t>(in + 6) >> 25 | SafeLoad<uint32_t>(in + 7) << 7, SafeLoad<uint32_t>(in + 7) >> 24 | SafeLoad<uint32_t>(in + 8) << 8, SafeLoad<uint32_t>(in + 8) >> 23 | SafeLoad<uint32_t>(in + 9) << 9, SafeLoad<uint32_t>(in + 9) >> 22 | SafeLoad<uint32_t>(in + 10) << 10, SafeLoad<uint32_t>(in + 10) >> 21 | SafeLoad<uint32_t>(in + 11) << 11, SafeLoad<uint32_t>(in + 11) >> 20 | SafeLoad<uint32_t>(in + 12) << 12, SafeLoad<uint32_t>(in + 12) >> 19 | SafeLoad<uint32_t>(in + 13) << 13, SafeLoad<uint32_t>(in + 13) >> 18 | SafeLoad<uint32_t>(in + 14) << 14, SafeLoad<uint32_t>(in + 14) >> 17 | SafeLoad<uint32_t>(in + 15) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 31-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 16 | SafeLoad<uint32_t>(in + 16) << 16, SafeLoad<uint32_t>(in + 16) >> 15 | SafeLoad<uint32_t>(in + 17) << 17, SafeLoad<uint32_t>(in + 17) >> 14 | SafeLoad<uint32_t>(in + 18) << 18, SafeLoad<uint32_t>(in + 18) >> 13 | SafeLoad<uint32_t>(in + 19) << 19, SafeLoad<uint32_t>(in + 19) >> 12 | SafeLoad<uint32_t>(in + 20) << 20, SafeLoad<uint32_t>(in + 20) >> 11 | SafeLoad<uint32_t>(in + 21) << 21, SafeLoad<uint32_t>(in + 21) >> 10 | SafeLoad<uint32_t>(in + 22) << 22, SafeLoad<uint32_t>(in + 22) >> 9 | SafeLoad<uint32_t>(in + 23) << 23, SafeLoad<uint32_t>(in + 23) >> 8 | SafeLoad<uint32_t>(in + 24) << 24, SafeLoad<uint32_t>(in + 24) >> 7 | SafeLoad<uint32_t>(in + 25) << 25, SafeLoad<uint32_t>(in + 25) >> 6 | SafeLoad<uint32_t>(in + 26) << 26, SafeLoad<uint32_t>(in + 26) >> 5 | SafeLoad<uint32_t>(in + 27) << 27, SafeLoad<uint32_t>(in + 27) >> 4 | SafeLoad<uint32_t>(in + 28) << 28, SafeLoad<uint32_t>(in + 28) >> 3 | SafeLoad<uint32_t>(in + 29) << 29, SafeLoad<uint32_t>(in + 29) >> 2 | SafeLoad<uint32_t>(in + 30) << 30, SafeLoad<uint32_t>(in + 30) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 31;
+  return in;
+}
+
+inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+  memcpy(out, in, 32 * sizeof(*out));
+  in += 32;
+  out += 32;
+
+  return in;
+}
+
+};  // struct UnpackBits512
+
+}  // namespace
+}  // namespace internal
+}  // namespace arrow
+
diff --git a/cpp/src/arrow/util/bpacking_simd_codegen.py b/cpp/src/arrow/util/bpacking_simd_codegen.py
new file mode 100644
index 00000000000..d033394df97
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd_codegen.py
@@ -0,0 +1,209 @@
+#!/bin/python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Usage:
+#   python bpacking_simd_codegen.py 128 > bpacking_simd128_generated.h
+#   python bpacking_simd_codegen.py 256 > bpacking_simd256_generated.h
+#   python bpacking_simd_codegen.py 512 > bpacking_simd512_generated.h
+
+from functools import partial
+import sys
+from textwrap import dedent, indent
+
+
+class UnpackGenerator:
+
+    def __init__(self, simd_width):
+        self.simd_width = simd_width
+        if simd_width % 32 != 0:
+            raise("SIMD bit width should be a multiple of 32")
+        self.simd_byte_width = simd_width // 8
+
+    def print_unpack_bit0_func(self):
+        print(
+            "inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {")
+        print("  memset(out, 0x0, 32 * sizeof(*out));")
+        print("  out += 32;")
+        print("")
+        print("  return in;")
+        print("}")
+
+
+    def print_unpack_bit32_func(self):
+        print(
+            "inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {")
+        print("  memcpy(out, in, 32 * sizeof(*out));")
+        print("  in += 32;")
+        print("  out += 32;")
+        print("")
+        print("  return in;")
+        print("}")
+
+    def print_unpack_bit_func(self, bit):
+        def p(code):
+            print(indent(code, prefix='  '))
+
+        shift = 0
+        shifts = []
+        in_index = 0
+        inls = []
+        mask = (1 << bit) - 1
+        bracket = "{"
+
+        print(f"inline static const uint32_t* unpack{bit}_32(const uint32_t* in, uint32_t* out) {{")
+        p(dedent(f"""\
+            uint32_t mask = 0x{mask:0x};
+
+            simd_batch masks(mask);
+            simd_batch words, shifts;
+            simd_batch results;
+            """))
+
+        def safe_load(index):
+            return f"SafeLoad<uint32_t>(in + {index})"
+
+        for i in range(32):
+            if shift + bit == 32:
+                shifts.append(shift)
+                inls.append(safe_load(in_index))
+                in_index += 1
+                shift = 0
+            elif shift + bit > 32:  # cross the boundary
+                inls.append(
+                    f"{safe_load(in_index)} >> {shift} | {safe_load(in_index + 1)} << {32 - shift}")
+                in_index += 1
+                shift = bit - (32 - shift)
+                shifts.append(0)  # zero shift
+            else:
+                shifts.append(shift)
+                inls.append(safe_load(in_index))
+                shift += bit
+
+        bytes_per_batch = self.simd_byte_width
+        words_per_batch = bytes_per_batch // 4
+
+        one_word_template = dedent("""\
+            words = simd_batch{{ {words} }};
+            shifts = simd_batch{{ {shifts} }};
+            results = (words >> shifts) & masks;
+            results.store_unaligned(out);
+            out += {words_per_batch};
+            """)
+
+        for start in range(0, 32, words_per_batch):
+            stop = start + words_per_batch;
+            p(f"""// extract {bit}-bit bundles {start} to {stop - 1}""")
+            p(one_word_template.format(
+                words=", ".join(inls[start:stop]),
+                shifts=", ".join(map(str, shifts[start:stop])),
+                words_per_batch=words_per_batch))
+
+        p(dedent(f"""\
+            in += {bit};
+            return in;"""))
+        print("}")
+
+
+def print_copyright():
+    print(dedent("""\
+        // Licensed to the Apache Software Foundation (ASF) under one
+        // or more contributor license agreements.  See the NOTICE file
+        // distributed with this work for additional information
+        // regarding copyright ownership.  The ASF licenses this file
+        // to you under the Apache License, Version 2.0 (the
+        // "License"); you may not use this file except in compliance
+        // with the License.  You may obtain a copy of the License at
+        //
+        //   http://www.apache.org/licenses/LICENSE-2.0
+        //
+        // Unless required by applicable law or agreed to in writing,
+        // software distributed under the License is distributed on an
+        // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+        // KIND, either express or implied.  See the License for the
+        // specific language governing permissions and limitations
+        // under the License.
+        """))
+
+
+def print_note():
+    print("// Automatically generated file; DO NOT EDIT.")
+    print()
+
+
+def main(simd_width):
+    print_copyright()
+    print_note()
+
+    struct_name = f"UnpackBits{simd_width}"
+
+    # NOTE: templating the UnpackBits struct on the dispatch level avoids
+    # potential name collisions if there are several UnpackBits generations
+    # with the same SIMD width on a given architecture.
+
+    print(dedent(f"""\
+        #pragma once
+
+        #include <cstdint>
+        #include <cstring>
+
+        #include <xsimd/xsimd.hpp>
+
+        #include "arrow/util/dispatch.h"
+        #include "arrow/util/ubsan.h"
+
+        namespace arrow {{
+        namespace internal {{
+        namespace {{
+
+        using ::arrow::util::SafeLoad;
+
+        template <DispatchLevel level>
+        struct {struct_name} {{
+
+        using simd_batch = xsimd::batch<uint32_t, {simd_width // 32}>;
+        """))
+
+    gen = UnpackGenerator(simd_width)
+    gen.print_unpack_bit0_func()
+    print()
+    for i in range(1, 32):
+        gen.print_unpack_bit_func(i)
+        print()
+    gen.print_unpack_bit32_func()
+    print()
+
+    print(dedent(f"""\
+        }};  // struct {struct_name}
+
+        }}  // namespace
+        }}  // namespace internal
+        }}  // namespace arrow
+        """))
+
+
+if __name__ == '__main__':
+    usage = f"""Usage: {__file__} <SIMD bit-width>"""
+    if len(sys.argv) != 2:
+        raise ValueError(usage)
+    try:
+        simd_width = int(sys.argv[1])
+    except ValueError:
+        raise ValueError(usage)
+
+    main(simd_width)
diff --git a/cpp/src/arrow/util/bpacking_simd_internal.h b/cpp/src/arrow/util/bpacking_simd_internal.h
new file mode 100644
index 00000000000..72d23f2d38c
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd_internal.h
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename UnpackBits>
+static int unpack32_specialized(const uint32_t* in, uint32_t* out, int batch_size,
+                                int num_bits) {
+  batch_size = batch_size / 32 * 32;
+  int num_loops = batch_size / 32;
+
+  switch (num_bits) {
+    case 0:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack0_32(in, out + i * 32);
+      break;
+    case 1:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack1_32(in, out + i * 32);
+      break;
+    case 2:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack2_32(in, out + i * 32);
+      break;
+    case 3:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack3_32(in, out + i * 32);
+      break;
+    case 4:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack4_32(in, out + i * 32);
+      break;
+    case 5:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack5_32(in, out + i * 32);
+      break;
+    case 6:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack6_32(in, out + i * 32);
+      break;
+    case 7:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack7_32(in, out + i * 32);
+      break;
+    case 8:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack8_32(in, out + i * 32);
+      break;
+    case 9:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack9_32(in, out + i * 32);
+      break;
+    case 10:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack10_32(in, out + i * 32);
+      break;
+    case 11:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack11_32(in, out + i * 32);
+      break;
+    case 12:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack12_32(in, out + i * 32);
+      break;
+    case 13:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack13_32(in, out + i * 32);
+      break;
+    case 14:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack14_32(in, out + i * 32);
+      break;
+    case 15:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack15_32(in, out + i * 32);
+      break;
+    case 16:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack16_32(in, out + i * 32);
+      break;
+    case 17:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack17_32(in, out + i * 32);
+      break;
+    case 18:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack18_32(in, out + i * 32);
+      break;
+    case 19:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack19_32(in, out + i * 32);
+      break;
+    case 20:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack20_32(in, out + i * 32);
+      break;
+    case 21:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack21_32(in, out + i * 32);
+      break;
+    case 22:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack22_32(in, out + i * 32);
+      break;
+    case 23:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack23_32(in, out + i * 32);
+      break;
+    case 24:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack24_32(in, out + i * 32);
+      break;
+    case 25:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack25_32(in, out + i * 32);
+      break;
+    case 26:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack26_32(in, out + i * 32);
+      break;
+    case 27:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack27_32(in, out + i * 32);
+      break;
+    case 28:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack28_32(in, out + i * 32);
+      break;
+    case 29:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack29_32(in, out + i * 32);
+      break;
+    case 30:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack30_32(in, out + i * 32);
+      break;
+    case 31:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack31_32(in, out + i * 32);
+      break;
+    case 32:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack32_32(in, out + i * 32);
+      break;
+    default:
+      DCHECK(false) << "Unsupported num_bits";
+  }
+
+  return batch_size;
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/cancel.cc b/cpp/src/arrow/util/cancel.cc
index 533075a9a64..874b2c2c886 100644
--- a/cpp/src/arrow/util/cancel.cc
+++ b/cpp/src/arrow/util/cancel.cc
@@ -74,14 +74,14 @@ void StopSource::Reset() {
 
 StopToken StopSource::token() { return StopToken(impl_); }
 
-bool StopToken::IsStopRequested() {
+bool StopToken::IsStopRequested() const {
   if (!impl_) {
     return false;
   }
   return impl_->requested_.load() != 0;
 }
 
-Status StopToken::Poll() {
+Status StopToken::Poll() const {
   if (!impl_) {
     return Status::OK();
   }
diff --git a/cpp/src/arrow/util/cancel.h b/cpp/src/arrow/util/cancel.h
index 506a7e16e4f..9e00f673a21 100644
--- a/cpp/src/arrow/util/cancel.h
+++ b/cpp/src/arrow/util/cancel.h
@@ -65,8 +65,8 @@ class ARROW_EXPORT StopToken {
   static StopToken Unstoppable() { return StopToken(); }
 
   // Producer API (the side that gets asked to stopped)
-  Status Poll();
-  bool IsStopRequested();
+  Status Poll() const;
+  bool IsStopRequested() const;
 
  protected:
   std::shared_ptr<StopSourceImpl> impl_;
diff --git a/cpp/src/arrow/util/compiler_util.h b/cpp/src/arrow/util/compiler_util.h
deleted file mode 100644
index ac1745074a1..00000000000
--- a/cpp/src/arrow/util/compiler_util.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Deprecated header, here for backwards compatibility in parquet-cpp
-
-#pragma once
-
-#include "arrow/util/macros.h"
diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
index f9c084f6c26..8db199b4e76 100644
--- a/cpp/src/arrow/util/compression.cc
+++ b/cpp/src/arrow/util/compression.cc
@@ -29,6 +29,18 @@
 namespace arrow {
 namespace util {
 
+namespace {
+
+Status CheckSupportsCompressionLevel(Compression::type type) {
+  if (!Codec::SupportsCompressionLevel(type)) {
+    return Status::Invalid(
+        "The specified codec does not support the compression level parameter");
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
 int Codec::UseDefaultCompressionLevel() { return kUseDefaultCompressionLevel; }
 
 Status Codec::Init() { return Status::OK(); }
@@ -103,6 +115,24 @@ bool Codec::SupportsCompressionLevel(Compression::type codec) {
   }
 }
 
+Result<int> Codec::MaximumCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->maximum_compression_level();
+}
+
+Result<int> Codec::MinimumCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->minimum_compression_level();
+}
+
+Result<int> Codec::DefaultCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->default_compression_level();
+}
+
 Result<std::unique_ptr<Codec>> Codec::Create(Compression::type codec_type,
                                              int compression_level) {
   if (!IsAvailable(codec_type)) {
diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h
index 6c9a74c6d21..0832e82a606 100644
--- a/cpp/src/arrow/util/compression.h
+++ b/cpp/src/arrow/util/compression.h
@@ -132,6 +132,27 @@ class ARROW_EXPORT Codec {
   /// \brief Return true if indicated codec supports setting a compression level
   static bool SupportsCompressionLevel(Compression::type codec);
 
+  /// \brief Return the smallest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MinimumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the largest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MaximumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the default compression level
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> DefaultCompressionLevel(Compression::type codec);
+
+  /// \brief Return the smallest supported compression level
+  virtual int minimum_compression_level() const = 0;
+
+  /// \brief Return the largest supported compression level
+  virtual int maximum_compression_level() const = 0;
+
+  /// \brief Return the default compression level
+  virtual int default_compression_level() const = 0;
+
   /// \brief One-shot decompression function
   ///
   /// output_buffer_len must be correct and therefore be obtained in advance.
diff --git a/cpp/src/arrow/util/compression_brotli.cc b/cpp/src/arrow/util/compression_brotli.cc
index 4feabe23345..cb547c2c8cf 100644
--- a/cpp/src/arrow/util/compression_brotli.cc
+++ b/cpp/src/arrow/util/compression_brotli.cc
@@ -224,6 +224,11 @@ class BrotliCodec : public Codec {
   Compression::type compression_type() const override { return Compression::BROTLI; }
 
   int compression_level() const override { return compression_level_; }
+  int minimum_compression_level() const override { return BROTLI_MIN_QUALITY; }
+  int maximum_compression_level() const override { return BROTLI_MAX_QUALITY; }
+  int default_compression_level() const override {
+    return kBrotliDefaultCompressionLevel;
+  }
 
  private:
   const int compression_level_;
diff --git a/cpp/src/arrow/util/compression_bz2.cc b/cpp/src/arrow/util/compression_bz2.cc
index 8a8c1cb7a45..b367f2ff20c 100644
--- a/cpp/src/arrow/util/compression_bz2.cc
+++ b/cpp/src/arrow/util/compression_bz2.cc
@@ -40,6 +40,9 @@ namespace internal {
 
 namespace {
 
+constexpr int kBZ2MinCompressionLevel = 1;
+constexpr int kBZ2MaxCompressionLevel = 9;
+
 // Max number of bytes the bz2 APIs accept at a time
 constexpr auto kSizeLimit =
     static_cast<int64_t>(std::numeric_limits<unsigned int>::max());
@@ -265,6 +268,9 @@ class BZ2Codec : public Codec {
   Compression::type compression_type() const override { return Compression::BZ2; }
 
   int compression_level() const override { return compression_level_; }
+  int minimum_compression_level() const override { return kBZ2MinCompressionLevel; }
+  int maximum_compression_level() const override { return kBZ2MaxCompressionLevel; }
+  int default_compression_level() const override { return kBZ2DefaultCompressionLevel; }
 
  private:
   int compression_level_;
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index 9314dfd7faf..c783e405590 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -300,6 +300,9 @@ class Lz4FrameCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::LZ4_FRAME; }
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 
  protected:
   const LZ4F_preferences_t prefs_;
@@ -350,6 +353,9 @@ class Lz4Codec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::LZ4; }
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 };
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression_snappy.cc b/cpp/src/arrow/util/compression_snappy.cc
index 9b016874b56..3756f957d04 100644
--- a/cpp/src/arrow/util/compression_snappy.cc
+++ b/cpp/src/arrow/util/compression_snappy.cc
@@ -86,6 +86,9 @@ class SnappyCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::SNAPPY; }
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 };
 
 }  // namespace
diff --git a/cpp/src/arrow/util/compression_test.cc b/cpp/src/arrow/util/compression_test.cc
index 2bd7a176234..795d5e31d65 100644
--- a/cpp/src/arrow/util/compression_test.cc
+++ b/cpp/src/arrow/util/compression_test.cc
@@ -399,6 +399,32 @@ TEST(TestCodecMisc, SpecifyCompressionLevel) {
   }
 }
 
+TEST_P(CodecTest, MinMaxCompressionLevel) {
+  auto type = GetCompression();
+  ASSERT_OK_AND_ASSIGN(auto codec, Codec::Create(type));
+
+  if (Codec::SupportsCompressionLevel(type)) {
+    ASSERT_OK_AND_ASSIGN(auto min_level, Codec::MinimumCompressionLevel(type));
+    ASSERT_OK_AND_ASSIGN(auto max_level, Codec::MaximumCompressionLevel(type));
+    ASSERT_OK_AND_ASSIGN(auto default_level, Codec::DefaultCompressionLevel(type));
+    ASSERT_NE(min_level, Codec::UseDefaultCompressionLevel());
+    ASSERT_NE(max_level, Codec::UseDefaultCompressionLevel());
+    ASSERT_NE(default_level, Codec::UseDefaultCompressionLevel());
+    ASSERT_LT(min_level, max_level);
+    ASSERT_EQ(min_level, codec->minimum_compression_level());
+    ASSERT_EQ(max_level, codec->maximum_compression_level());
+    ASSERT_GE(default_level, min_level);
+    ASSERT_LE(default_level, max_level);
+  } else {
+    ASSERT_RAISES(Invalid, Codec::MinimumCompressionLevel(type));
+    ASSERT_RAISES(Invalid, Codec::MaximumCompressionLevel(type));
+    ASSERT_RAISES(Invalid, Codec::DefaultCompressionLevel(type));
+    ASSERT_EQ(codec->minimum_compression_level(), Codec::UseDefaultCompressionLevel());
+    ASSERT_EQ(codec->maximum_compression_level(), Codec::UseDefaultCompressionLevel());
+    ASSERT_EQ(codec->default_compression_level(), Codec::UseDefaultCompressionLevel());
+  }
+}
+
 TEST_P(CodecTest, OutputBufferIsSmall) {
   auto type = GetCompression();
   if (type != Compression::SNAPPY) {
diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc
index 520e9dcd383..e9cb2470ee2 100644
--- a/cpp/src/arrow/util/compression_zlib.cc
+++ b/cpp/src/arrow/util/compression_zlib.cc
@@ -52,6 +52,9 @@ constexpr int GZIP_CODEC = 16;
 // Determine if this is libz or gzip from header.
 constexpr int DETECT_CODEC = 32;
 
+constexpr int kGZipMinCompressionLevel = 1;
+constexpr int kGZipMaxCompressionLevel = 9;
+
 int CompressionWindowBitsForFormat(GZipFormat::type format) {
   int window_bits = WINDOW_BITS;
   switch (format) {
@@ -468,6 +471,9 @@ class GZipCodec : public Codec {
   Compression::type compression_type() const override { return Compression::GZIP; }
 
   int compression_level() const override { return compression_level_; }
+  int minimum_compression_level() const override { return kGZipMinCompressionLevel; }
+  int maximum_compression_level() const override { return kGZipMaxCompressionLevel; }
+  int default_compression_level() const override { return kGZipDefaultCompressionLevel; }
 
  private:
   // zlib is stateful and the z_stream state variable must be initialized
diff --git a/cpp/src/arrow/util/compression_zstd.cc b/cpp/src/arrow/util/compression_zstd.cc
index 382e0573b29..e15ecb4e1fe 100644
--- a/cpp/src/arrow/util/compression_zstd.cc
+++ b/cpp/src/arrow/util/compression_zstd.cc
@@ -228,6 +228,9 @@ class ZSTDCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::ZSTD; }
+  int minimum_compression_level() const override { return ZSTD_minCLevel(); }
+  int maximum_compression_level() const override { return ZSTD_maxCLevel(); }
+  int default_compression_level() const override { return kZSTDDefaultCompressionLevel; }
 
   int compression_level() const override { return compression_level_; }
 
diff --git a/cpp/src/arrow/util/converter.h b/cpp/src/arrow/util/converter.h
index 2c40a48726b..0b29e0f5bc7 100644
--- a/cpp/src/arrow/util/converter.h
+++ b/cpp/src/arrow/util/converter.h
@@ -54,11 +54,12 @@ class Converter {
 
   virtual Status Append(InputType value) { return Status::NotImplemented("Append"); }
 
-  virtual Status Extend(InputType values, int64_t size) {
+  virtual Status Extend(InputType values, int64_t size, int64_t offset = 0) {
     return Status::NotImplemented("Extend");
   }
 
-  virtual Status ExtendMasked(InputType values, InputType mask, int64_t size) {
+  virtual Status ExtendMasked(InputType values, InputType mask, int64_t size,
+                              int64_t offset = 0) {
     return Status::NotImplemented("ExtendMasked");
   }
 
@@ -70,6 +71,8 @@ class Converter {
 
   bool may_overflow() const { return may_overflow_; }
 
+  bool rewind_on_overflow() const { return rewind_on_overflow_; }
+
   virtual Status Reserve(int64_t additional_capacity) {
     return builder_->Reserve(additional_capacity);
   }
@@ -96,6 +99,7 @@ class Converter {
   std::shared_ptr<ArrayBuilder> builder_;
   OptionsType options_;
   bool may_overflow_ = false;
+  bool rewind_on_overflow_ = false;
 };
 
 template <typename ArrowType, typename BaseConverter>
@@ -134,7 +138,8 @@ class ListConverter : public BaseConverter {
         std::make_shared<BuilderType>(pool, value_converter_->builder(), this->type_);
     list_builder_ = checked_cast<BuilderType*>(this->builder_.get());
     // Narrow list types may overflow
-    this->may_overflow_ = sizeof(typename ArrowType::offset_type) < sizeof(int64_t);
+    this->may_overflow_ = this->rewind_on_overflow_ =
+        sizeof(typename ArrowType::offset_type) < sizeof(int64_t);
     return Status::OK();
   }
 
@@ -167,6 +172,7 @@ class StructConverter : public BaseConverter {
                             (MakeConverter<BaseConverter, ConverterTrait>(
                                 field->type(), this->options_, pool)));
       this->may_overflow_ |= child_converter->may_overflow();
+      this->rewind_on_overflow_ = this->may_overflow_;
       child_builders.push_back(child_converter->builder());
       children_.push_back(std::move(child_converter));
     }
@@ -302,32 +308,69 @@ class Chunker {
     return status;
   }
 
-  // we could get bit smarter here since the whole batch of appendable values
-  // will be rejected if a capacity error is raised
-  Status Extend(InputType values, int64_t size) {
-    auto status = converter_->Extend(values, size);
-    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
-      if (converter_->builder()->length() == 0) {
+  Status Extend(InputType values, int64_t size, int64_t offset = 0) {
+    while (offset < size) {
+      auto length_before = converter_->builder()->length();
+      auto status = converter_->Extend(values, size, offset);
+      auto length_after = converter_->builder()->length();
+      auto num_converted = length_after - length_before;
+
+      offset += num_converted;
+      length_ += num_converted;
+
+      if (status.IsCapacityError()) {
+        if (converter_->builder()->length() == 0) {
+          // Builder length == 0 means the individual element is too large to append.
+          // In this case, no need to try again.
+          return status;
+        } else if (converter_->rewind_on_overflow()) {
+          // The list-like and binary-like conversion paths may raise  a capacity error,
+          // we need to handle them differently. While the binary-like converters check
+          // the capacity before append/extend the list-like converters just check after
+          // append/extend. Thus depending on the implementation semantics we may need
+          // to rewind (slice) the output chunk by one.
+          length_ -= 1;
+          offset -= 1;
+        }
+        ARROW_RETURN_NOT_OK(FinishChunk());
+      } else if (!status.ok()) {
         return status;
       }
-      ARROW_RETURN_NOT_OK(FinishChunk());
-      return Extend(values, size);
     }
-    length_ += size;
-    return status;
+    return Status::OK();
   }
 
-  Status ExtendMasked(InputType values, InputType mask, int64_t size) {
-    auto status = converter_->ExtendMasked(values, mask, size);
-    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
-      if (converter_->builder()->length() == 0) {
+  Status ExtendMasked(InputType values, InputType mask, int64_t size,
+                      int64_t offset = 0) {
+    while (offset < size) {
+      auto length_before = converter_->builder()->length();
+      auto status = converter_->ExtendMasked(values, mask, size, offset);
+      auto length_after = converter_->builder()->length();
+      auto num_converted = length_after - length_before;
+
+      offset += num_converted;
+      length_ += num_converted;
+
+      if (status.IsCapacityError()) {
+        if (converter_->builder()->length() == 0) {
+          // Builder length == 0 means the individual element is too large to append.
+          // In this case, no need to try again.
+          return status;
+        } else if (converter_->rewind_on_overflow()) {
+          // The list-like and binary-like conversion paths may raise  a capacity error,
+          // we need to handle them differently. While the binary-like converters check
+          // the capacity before append/extend the list-like converters just check after
+          // append/extend. Thus depending on the implementation semantics we may need
+          // to rewind (slice) the output chunk by one.
+          length_ -= 1;
+          offset -= 1;
+        }
+        ARROW_RETURN_NOT_OK(FinishChunk());
+      } else if (!status.ok()) {
         return status;
       }
-      ARROW_RETURN_NOT_OK(FinishChunk());
-      return ExtendMasked(values, mask, size);
     }
-    length_ += size;
-    return status;
+    return Status::OK();
   }
 
   Status FinishChunk() {
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 7aefd1ab9cd..5e3e5e4ab43 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -504,17 +504,16 @@ inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
   return Status::OK();
 }
 
-}  // namespace
-
-Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
-                              int32_t* precision, int32_t* scale) {
+template <typename Decimal>
+Status DecimalFromString(const char* type_name, const util::string_view& s, Decimal* out,
+                         int32_t* precision, int32_t* scale) {
   if (s.empty()) {
-    return Status::Invalid("Empty string cannot be converted to decimal");
+    return Status::Invalid("Empty string cannot be converted to ", type_name);
   }
 
   DecimalComponents dec;
   if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
-    return Status::Invalid("The string '", s, "' is not a valid decimal number");
+    return Status::Invalid("The string '", s, "' is not a valid ", type_name, " number");
   }
 
   // Count number of significant digits (without leading zeros)
@@ -528,29 +527,33 @@ Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
   int32_t parsed_scale = 0;
   if (dec.has_exponent) {
     auto adjusted_exponent = dec.exponent;
-    auto len = static_cast<int32_t>(significant_digits);
-    parsed_scale = -adjusted_exponent + len - 1;
+    parsed_scale =
+        -adjusted_exponent + static_cast<int32_t>(dec.fractional_digits.size());
   } else {
     parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
   }
 
   if (out != nullptr) {
-    std::array<uint64_t, 2> little_endian_array = {0, 0};
+    static_assert(Decimal::kBitWidth % 64 == 0, "decimal bit-width not a multiple of 64");
+    std::array<uint64_t, Decimal::kBitWidth / 64> little_endian_array{};
     ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
     ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
                 little_endian_array.size());
-    *out =
-        Decimal128(static_cast<int64_t>(little_endian_array[1]), little_endian_array[0]);
-    if (parsed_scale < 0) {
-      *out *= GetScaleMultiplier(-parsed_scale);
-    }
-
+    *out = Decimal(BitUtil::LittleEndianArray::ToNative(little_endian_array));
     if (dec.sign == '-') {
       out->Negate();
     }
   }
 
   if (parsed_scale < 0) {
+    // Force the scale to zero, to avoid negative scales (due to compatibility issues
+    // with external systems such as databases)
+    if (-parsed_scale > Decimal::kMaxScale) {
+      return Status::Invalid("The string '", s, "' cannot be represented as ", type_name);
+    }
+    if (out != nullptr) {
+      *out *= Decimal::GetScaleMultiplier(-parsed_scale);
+    }
     parsed_precision -= parsed_scale;
     parsed_scale = 0;
   }
@@ -565,6 +568,13 @@ Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
   return Status::OK();
 }
 
+}  // namespace
+
+Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
+                              int32_t* precision, int32_t* scale) {
+  return DecimalFromString("decimal128", s, out, precision, scale);
+}
+
 Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
                               int32_t* scale) {
   return FromString(util::string_view(s), out, precision, scale);
@@ -671,13 +681,15 @@ Decimal256::Decimal256(const std::string& str) : Decimal256() {
 
 std::string Decimal256::ToIntegerString() const {
   std::string result;
-  if (static_cast<int64_t>(little_endian_array()[3]) < 0) {
+  if (IsNegative()) {
     result.push_back('-');
     Decimal256 abs = *this;
     abs.Negate();
-    AppendLittleEndianArrayToString(abs.little_endian_array(), &result);
+    AppendLittleEndianArrayToString(
+        BitUtil::LittleEndianArray::FromNative(abs.native_endian_array()), &result);
   } else {
-    AppendLittleEndianArrayToString(little_endian_array(), &result);
+    AppendLittleEndianArrayToString(
+        BitUtil::LittleEndianArray::FromNative(native_endian_array()), &result);
   }
   return result;
 }
@@ -690,49 +702,7 @@ std::string Decimal256::ToString(int32_t scale) const {
 
 Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
                               int32_t* precision, int32_t* scale) {
-  if (s.empty()) {
-    return Status::Invalid("Empty string cannot be converted to decimal");
-  }
-
-  DecimalComponents dec;
-  if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
-    return Status::Invalid("The string '", s, "' is not a valid decimal number");
-  }
-
-  // Count number of significant digits (without leading zeros)
-  size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
-  size_t significant_digits = dec.fractional_digits.size();
-  if (first_non_zero != std::string::npos) {
-    significant_digits += dec.whole_digits.size() - first_non_zero;
-  }
-
-  if (precision != nullptr) {
-    *precision = static_cast<int32_t>(significant_digits);
-  }
-
-  if (scale != nullptr) {
-    if (dec.has_exponent) {
-      auto adjusted_exponent = dec.exponent;
-      auto len = static_cast<int32_t>(significant_digits);
-      *scale = -adjusted_exponent + len - 1;
-    } else {
-      *scale = static_cast<int32_t>(dec.fractional_digits.size());
-    }
-  }
-
-  if (out != nullptr) {
-    std::array<uint64_t, 4> little_endian_array = {0, 0, 0, 0};
-    ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
-    ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
-                little_endian_array.size());
-    *out = Decimal256(little_endian_array);
-
-    if (dec.sign == '-') {
-      out->Negate();
-    }
-  }
-
-  return Status::OK();
+  return DecimalFromString("decimal256", s, out, precision, scale);
 }
 
 Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision,
@@ -798,7 +768,7 @@ Result<Decimal256> Decimal256::FromBigEndian(const uint8_t* bytes, int32_t lengt
     length -= word_length;
   }
 
-  return Decimal256(little_endian_array);
+  return Decimal256(BitUtil::LittleEndianArray::ToNative(little_endian_array));
 }
 
 Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
@@ -841,9 +811,9 @@ struct Decimal256RealConversion {
     DCHECK_LT(part1, 1.8446744073709552e+19);  // 2**64
     DCHECK_GE(part0, 0);
     DCHECK_LT(part0, 1.8446744073709552e+19);  // 2**64
-    return Decimal256(std::array<uint64_t, 4>{
-        static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
-        static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)});
+    return Decimal256(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(
+        {static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
+         static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)}));
   }
 
   static Result<Decimal256> FromReal(Real x, int32_t precision, int32_t scale) {
@@ -865,11 +835,11 @@ struct Decimal256RealConversion {
   static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
     DCHECK_GE(decimal, 0);
     Real x = 0;
-    const auto& parts = decimal.little_endian_array();
-    x += Derived::two_to_192(static_cast<Real>(parts[3]));
-    x += Derived::two_to_128(static_cast<Real>(parts[2]));
-    x += Derived::two_to_64(static_cast<Real>(parts[1]));
-    x += static_cast<Real>(parts[0]);
+    const auto parts_le = BitUtil::LittleEndianArray::Make(decimal.native_endian_array());
+    x += Derived::two_to_192(static_cast<Real>(parts_le[3]));
+    x += Derived::two_to_128(static_cast<Real>(parts_le[2]));
+    x += Derived::two_to_64(static_cast<Real>(parts_le[1]));
+    x += static_cast<Real>(parts_le[0]);
     if (scale >= -76 && scale <= 76) {
       x *= Derived::powers_of_ten()[-scale + 76];
     } else {
@@ -879,7 +849,7 @@ struct Decimal256RealConversion {
   }
 
   static Real ToReal(Decimal256 decimal, int32_t scale) {
-    if (decimal.little_endian_array()[3] & (1ULL << 63)) {
+    if (decimal.IsNegative()) {
       // Convert the absolute value to avoid precision loss
       decimal.Negate();
       return -ToRealPositive(decimal, scale);
diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc
index fdcbd945e66..75716f943c6 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -20,7 +20,6 @@
 #include <cmath>
 #include <cstdint>
 #include <ostream>
-#include <sstream>
 #include <string>
 #include <tuple>
 #include <utility>
@@ -29,9 +28,13 @@
 #include <gtest/gtest.h>
 #include <boost/multiprecision/cpp_int.hpp>
 
+#include "arrow/array.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/int128_internal.h"
@@ -39,53 +42,200 @@
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::int128_t;
 using internal::uint128_t;
 
+using DecimalTypes = ::testing::Types<Decimal128, Decimal256>;
+
 static const int128_t kInt128Max =
     (static_cast<int128_t>(INT64_MAX) << 64) + static_cast<int128_t>(UINT64_MAX);
 
-class DecimalTestFixture : public ::testing::Test {
+template <typename DecimalType>
+void AssertDecimalFromString(const std::string& s, const DecimalType& expected,
+                             int32_t expected_precision, int32_t expected_scale) {
+  ARROW_SCOPED_TRACE("s = '", s, "'");
+  DecimalType d;
+  int32_t precision, scale;
+  ASSERT_OK(DecimalType::FromString(s, &d, &precision, &scale));
+  EXPECT_EQ(expected, d);
+  EXPECT_EQ(expected_precision, precision);
+  EXPECT_EQ(expected_scale, scale);
+}
+
+// Assert that the low bits of an array of integers are equal to `expected_low`,
+// and that all other bits are equal to `expected_high`.
+template <typename T, size_t N, typename U, typename V>
+void AssertArrayBits(const std::array<T, N>& a, U expected_low, V expected_high) {
+  EXPECT_EQ(a[0], expected_low);
+  for (size_t i = 1; i < N; ++i) {
+    EXPECT_EQ(a[i], expected_high);
+  }
+}
+
+Decimal128 Decimal128FromLE(const std::array<uint64_t, 2>& a) {
+  return Decimal128(Decimal128::LittleEndianArray, a);
+}
+
+Decimal256 Decimal256FromLE(const std::array<uint64_t, 4>& a) {
+  return Decimal256(Decimal256::LittleEndianArray, a);
+}
+
+template <typename DecimalType>
+struct DecimalTraits {};
+
+template <>
+struct DecimalTraits<Decimal128> {
+  using ArrowType = Decimal128Type;
+};
+
+template <>
+struct DecimalTraits<Decimal256> {
+  using ArrowType = Decimal256Type;
+};
+
+template <typename DecimalType>
+class DecimalFromStringTest : public ::testing::Test {
  public:
-  DecimalTestFixture() : integer_value_(23423445), string_value_("234.23445") {}
-  Decimal128 integer_value_;
-  std::string string_value_;
+  using ArrowType = typename DecimalTraits<DecimalType>::ArrowType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  void TestBasics() { AssertDecimalFromString("234.23445", DecimalType(23423445), 8, 5); }
+
+  void TestStringStartingWithPlus() {
+    AssertDecimalFromString("+234.567", DecimalType(234567), 6, 3);
+    AssertDecimalFromString("+2342394230592.232349023094",
+                            DecimalType("2342394230592232349023094"), 25, 12);
+  }
+
+  void TestInvalidInput() {
+    for (const std::string invalid_value :
+         {"-", "0.0.0", "0-13-32", "a", "-23092.235-", "-+23092.235", "+-23092.235",
+          "00a", "1e1a", "0.00123D/3", "1.23eA8", "1.23E+3A", "-1.23E--5",
+          "1.2345E+++07"}) {
+      ARROW_SCOPED_TRACE("invalid_value = '", invalid_value, "'");
+      ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
+    }
+  }
+
+  void TestLeadingZerosNoDecimalPoint() {
+    AssertDecimalFromString("0000000", DecimalType(0), 0, 0);
+  }
+
+  void TestLeadingZerosDecimalPoint() {
+    AssertDecimalFromString("000.0000", DecimalType(0), 4, 4);
+  }
+
+  void TestNoLeadingZerosDecimalPoint() {
+    AssertDecimalFromString(".00000", DecimalType(0), 5, 5);
+  }
+
+  void TestNoDecimalPointExponent() {
+    AssertDecimalFromString("1E1", DecimalType(10), 2, 0);
+  }
+
+  void TestWithExponentAndNullptrScale() {
+    const DecimalType expected_value(123);
+    ASSERT_OK_AND_EQ(expected_value, DecimalType::FromString("1.23E-8"));
+  }
+
+  void TestSmallValues() {
+    struct TestValue {
+      std::string s;
+      int64_t expected;
+      int32_t expected_precision;
+      int32_t expected_scale;
+    };
+    for (const auto& tv : std::vector<TestValue>{{"12.3", 123LL, 3, 1},
+                                                 {"0.00123", 123LL, 5, 5},
+                                                 {"1.23E-8", 123LL, 3, 10},
+                                                 {"-1.23E-8", -123LL, 3, 10},
+                                                 {"1.23E+3", 1230LL, 4, 0},
+                                                 {"-1.23E+3", -1230LL, 4, 0},
+                                                 {"1.23E+5", 123000LL, 6, 0},
+                                                 {"1.2345E+7", 12345000LL, 8, 0},
+                                                 {"1.23e-8", 123LL, 3, 10},
+                                                 {"-1.23e-8", -123LL, 3, 10},
+                                                 {"1.23e+3", 1230LL, 4, 0},
+                                                 {"-1.23e+3", -1230LL, 4, 0},
+                                                 {"1.23e+5", 123000LL, 6, 0},
+                                                 {"1.2345e+7", 12345000LL, 8, 0}}) {
+      ARROW_SCOPED_TRACE("s = '", tv.s, "'");
+      AssertDecimalFromString(tv.s, DecimalType(tv.expected), tv.expected_precision,
+                              tv.expected_scale);
+    }
+  }
+
+  void CheckRandomValuesRoundTrip(int32_t precision, int32_t scale) {
+    auto rnd = random::RandomArrayGenerator(42);
+    const auto ty = std::make_shared<ArrowType>(precision, scale);
+    const auto array = rnd.ArrayOf(ty, 100, /*null_probability=*/0.0);
+    for (int64_t i = 0; i < array->length(); ++i) {
+      ASSERT_OK_AND_ASSIGN(auto scalar, array->GetScalar(i));
+      const DecimalType& dec_value = checked_cast<const ScalarType&>(*scalar).value;
+      const auto s = dec_value.ToString(scale);
+      ASSERT_OK_AND_ASSIGN(auto round_tripped, DecimalType::FromString(s));
+      ASSERT_EQ(dec_value, round_tripped);
+    }
+  }
+
+  void TestRandomSmallValuesRoundTrip() {
+    for (int32_t scale : {0, 2, 9}) {
+      ARROW_SCOPED_TRACE("scale = ", scale);
+      CheckRandomValuesRoundTrip(9, scale);
+    }
+  }
+
+  void TestRandomValuesRoundTrip() {
+    const auto max_scale = DecimalType::kMaxScale;
+    for (int32_t scale : {0, 3, max_scale / 2, max_scale}) {
+      ARROW_SCOPED_TRACE("scale = ", scale);
+      CheckRandomValuesRoundTrip(DecimalType::kMaxPrecision, scale);
+    }
+  }
 };
 
-TEST_F(DecimalTestFixture, TestFromString) {
-  Decimal128 expected(this->integer_value_);
-  Decimal128 result;
-  int32_t precision, scale;
-  ASSERT_OK(Decimal128::FromString(this->string_value_, &result, &precision, &scale));
-  ASSERT_EQ(result, expected);
-  ASSERT_EQ(precision, 8);
-  ASSERT_EQ(scale, 5);
+TYPED_TEST_SUITE(DecimalFromStringTest, DecimalTypes);
+
+TYPED_TEST(DecimalFromStringTest, Basics) { this->TestBasics(); }
+
+TYPED_TEST(DecimalFromStringTest, StringStartingWithPlus) {
+  this->TestStringStartingWithPlus();
 }
 
-TEST_F(DecimalTestFixture, TestStringStartingWithPlus) {
-  std::string plus_value("+234.234");
-  Decimal128 out;
-  int32_t scale;
-  int32_t precision;
-  ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale));
-  ASSERT_EQ(234234, out);
-  ASSERT_EQ(6, precision);
-  ASSERT_EQ(3, scale);
+TYPED_TEST(DecimalFromStringTest, InvalidInput) { this->TestInvalidInput(); }
+
+TYPED_TEST(DecimalFromStringTest, LeadingZerosDecimalPoint) {
+  this->TestLeadingZerosDecimalPoint();
 }
 
-TEST_F(DecimalTestFixture, TestStringStartingWithPlus128) {
-  std::string plus_value("+2342394230592.232349023094");
-  Decimal128 expected_value("2342394230592232349023094");
-  Decimal128 out;
-  int32_t scale;
-  int32_t precision;
-  ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale));
-  ASSERT_EQ(expected_value, out);
-  ASSERT_EQ(25, precision);
-  ASSERT_EQ(12, scale);
+TYPED_TEST(DecimalFromStringTest, LeadingZerosNoDecimalPoint) {
+  this->TestLeadingZerosNoDecimalPoint();
+}
+
+TYPED_TEST(DecimalFromStringTest, NoLeadingZerosDecimalPoint) {
+  this->TestNoLeadingZerosDecimalPoint();
+}
+
+TYPED_TEST(DecimalFromStringTest, NoDecimalPointExponent) {
+  this->TestNoDecimalPointExponent();
+}
+
+TYPED_TEST(DecimalFromStringTest, WithExponentAndNullptrScale) {
+  this->TestWithExponentAndNullptrScale();
+}
+
+TYPED_TEST(DecimalFromStringTest, SmallValues) { this->TestSmallValues(); }
+
+TYPED_TEST(DecimalFromStringTest, RandomSmallValuesRoundTrip) {
+  this->TestRandomSmallValuesRoundTrip();
+}
+
+TYPED_TEST(DecimalFromStringTest, RandomValuesRoundTrip) {
+  this->TestRandomValuesRoundTrip();
 }
 
-TEST(DecimalTest, TestFromStringDecimal128) {
+TEST(Decimal128Test, TestFromStringDecimal128) {
   std::string string_value("-23049223942343532412");
   Decimal128 result(string_value);
   Decimal128 expected(static_cast<int64_t>(-230492239423435324));
@@ -95,7 +245,7 @@ TEST(DecimalTest, TestFromStringDecimal128) {
   ASSERT_NE(result.high_bits(), 0);
 }
 
-TEST(DecimalTest, TestFromDecimalString128) {
+TEST(Decimal128Test, TestFromDecimalString128) {
   std::string string_value("-23049223942343.532412");
   Decimal128 result;
   ASSERT_OK_AND_ASSIGN(result, Decimal128::FromString(string_value));
@@ -106,7 +256,7 @@ TEST(DecimalTest, TestFromDecimalString128) {
   ASSERT_NE(result.high_bits(), 0);
 }
 
-TEST(DecimalTest, TestStringRoundTrip) {
+TEST(Decimal128Test, TestStringRoundTrip) {
   static constexpr uint64_t kTestBits[] = {
       0,
       1,
@@ -135,7 +285,7 @@ TEST(DecimalTest, TestStringRoundTrip) {
   }
 }
 
-TEST(DecimalTest, TestDecimal32SignedRoundTrip) {
+TEST(Decimal128Test, TestDecimal32SignedRoundTrip) {
   Decimal128 expected("-3402692");
 
   auto bytes = expected.ToBytes();
@@ -143,7 +293,7 @@ TEST(DecimalTest, TestDecimal32SignedRoundTrip) {
   ASSERT_EQ(expected, result);
 }
 
-TEST(DecimalTest, TestDecimal64SignedRoundTrip) {
+TEST(Decimal128Test, TestDecimal64SignedRoundTrip) {
   Decimal128 expected;
   std::string string_value("-34034293045.921");
   ASSERT_OK_AND_ASSIGN(expected, Decimal128::FromString(string_value));
@@ -154,7 +304,7 @@ TEST(DecimalTest, TestDecimal64SignedRoundTrip) {
   ASSERT_EQ(expected, result);
 }
 
-TEST(DecimalTest, TestDecimalStringAndBytesRoundTrip) {
+TEST(Decimal128Test, TestDecimalStringAndBytesRoundTrip) {
   Decimal128 expected;
   std::string string_value("-340282366920938463463374607431.711455");
   ASSERT_OK_AND_ASSIGN(expected, Decimal128::FromString(string_value));
@@ -171,117 +321,289 @@ TEST(DecimalTest, TestDecimalStringAndBytesRoundTrip) {
   ASSERT_EQ(expected, result);
 }
 
-TEST(DecimalTest, TestInvalidInputMinus) {
-  std::string invalid_value("-");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputDot) {
-  std::string invalid_value("0.0.0");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputEmbeddedMinus) {
-  std::string invalid_value("0-13-32");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputSingleChar) {
-  std::string invalid_value("a");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputWithValidSubstring) {
-  std::string invalid_value("-23092.235-");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputWithMinusPlus) {
-  std::string invalid_value("-+23092.235");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputWithPlusMinus) {
-  std::string invalid_value("+-23092.235");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
+/*
+  Note: generating a number of 64-bit decimal digits from a bigint:
+
+  >>> def dec(x, n):
+  ...:     sign = x < 0
+  ...:     if sign:
+  ...:         x = 2**(64*n) + x
+  ...:     a = []
+  ...:     for i in range(n-1):
+  ...:         x, r = divmod(x, 2**64)
+  ...:         a.append(r)
+  ...:     assert x < 2**64
+  ...:     a.append(x)
+  ...:     return a
+  ...:
+  >>> dec(10**37, 2)
+  [68739955140067328, 542101086242752217]
+  >>> dec(-10**37, 2)
+  [18378004118569484288, 17904642987466799398]
+  >>> dec(10**75, 4)
+  [0, 10084168908774762496, 12965995782233477362, 159309191113245227]
+  >>> dec(-10**75, 4)
+  [0, 8362575164934789120, 5480748291476074253, 18287434882596306388]
+*/
+
+TEST(Decimal128Test, FromStringLimits) {
+  // Positive / zero exponent
+  AssertDecimalFromString(
+      "1e37", Decimal128FromLE({68739955140067328ULL, 542101086242752217ULL}), 38, 0);
+  AssertDecimalFromString(
+      "-1e37", Decimal128FromLE({18378004118569484288ULL, 17904642987466799398ULL}), 38,
+      0);
+  AssertDecimalFromString(
+      "9.87e37", Decimal128FromLE({15251391175463010304ULL, 5350537721215964381ULL}), 38,
+      0);
+  AssertDecimalFromString(
+      "-9.87e37", Decimal128FromLE({3195352898246541312ULL, 13096206352493587234ULL}), 38,
+      0);
+  AssertDecimalFromString(
+      "12345678901234567890123456789012345678",
+      Decimal128FromLE({14143994781733811022ULL, 669260594276348691ULL}), 38, 0);
+  AssertDecimalFromString(
+      "-12345678901234567890123456789012345678",
+      Decimal128FromLE({4302749291975740594ULL, 17777483479433202924ULL}), 38, 0);
+
+  // "9..9" (38 times)
+  const auto dec38times9pos =
+      Decimal128FromLE({687399551400673279ULL, 5421010862427522170ULL});
+  // "-9..9" (38 times)
+  const auto dec38times9neg =
+      Decimal128FromLE({17759344522308878337ULL, 13025733211282029445ULL});
+
+  AssertDecimalFromString("99999999999999999999999999999999999999", dec38times9pos, 38,
+                          0);
+  AssertDecimalFromString("-99999999999999999999999999999999999999", dec38times9neg, 38,
+                          0);
+  AssertDecimalFromString("9.9999999999999999999999999999999999999e37", dec38times9pos,
+                          38, 0);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999e37", dec38times9neg,
+                          38, 0);
+
+  // Positive / zero exponent, precision too large for a non-negative scale
+  ASSERT_RAISES(Invalid, Decimal128::FromString("1e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("-1e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("9e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("-9e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("9.9e40"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("-9.9e40"));
+  // XXX conversion overflows are currently not detected
+  //   ASSERT_RAISES(Invalid, Decimal128::FromString("99e38"));
+  //   ASSERT_RAISES(Invalid, Decimal128::FromString("-99e38"));
+  //   ASSERT_RAISES(Invalid,
+  //   Decimal128::FromString("999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //   Decimal128::FromString("-999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //   Decimal128::FromString("999999999999999999999999999999999999999"));
+
+  // No exponent, many fractional digits
+  AssertDecimalFromString("9.9999999999999999999999999999999999999", dec38times9pos, 38,
+                          37);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999", dec38times9neg, 38,
+                          37);
+  AssertDecimalFromString("0.99999999999999999999999999999999999999", dec38times9pos, 38,
+                          38);
+  AssertDecimalFromString("-0.99999999999999999999999999999999999999", dec38times9neg, 38,
+                          38);
+
+  // Negative exponent
+  AssertDecimalFromString("1e-38", Decimal128FromLE({1, 0}), 1, 38);
+  AssertDecimalFromString(
+      "-1e-38", Decimal128FromLE({18446744073709551615ULL, 18446744073709551615ULL}), 1,
+      38);
+  AssertDecimalFromString("9.99e-36", Decimal128FromLE({999, 0}), 3, 38);
+  AssertDecimalFromString(
+      "-9.99e-36", Decimal128FromLE({18446744073709550617ULL, 18446744073709551615ULL}),
+      3, 38);
+  AssertDecimalFromString("987e-38", Decimal128FromLE({987, 0}), 3, 38);
+  AssertDecimalFromString(
+      "-987e-38", Decimal128FromLE({18446744073709550629ULL, 18446744073709551615ULL}), 3,
+      38);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-37", dec38times9pos,
+                          38, 37);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-37", dec38times9neg,
+                          38, 37);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-38", dec38times9pos,
+                          38, 38);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-38", dec38times9neg,
+                          38, 38);
+}
+
+TEST(Decimal256Test, FromStringLimits) {
+  // Positive / zero exponent
+  AssertDecimalFromString(
+      "1e75",
+      Decimal256FromLE(
+          {0, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "-1e75",
+      Decimal256FromLE(
+          {0, 8362575164934789120ULL, 5480748291476074253ULL, 18287434882596306388ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "9.87e75",
+      Decimal256FromLE(
+          {0, 3238743064843046400ULL, 7886074450795240548ULL, 1572381716287730397ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "-9.87e75",
+      Decimal256FromLE(
+          {0, 15208001008866505216ULL, 10560669622914311067ULL, 16874362357421821218ULL}),
+      76, 0);
+
+  AssertDecimalFromString(
+      "1234567890123456789012345678901234567890123456789012345678901234567890123456",
+      Decimal256FromLE({17877984925544397504ULL, 5352188884907840935ULL,
+                        234631617561833724ULL, 196678011949953713ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "-1234567890123456789012345678901234567890123456789012345678901234567890123456",
+      Decimal256FromLE({568759148165154112ULL, 13094555188801710680ULL,
+                        18212112456147717891ULL, 18250066061759597902ULL}),
+      76, 0);
+
+  // "9..9" (76 times)
+  const auto dec76times9pos =
+      Decimal256FromLE({18446744073709551615ULL, 8607968719199866879ULL,
+                        532749306367912313ULL, 1593091911132452277ULL});
+  // "-9..9" (76 times)
+  const auto dec76times9neg = Decimal256FromLE(
+      {1, 9838775354509684736ULL, 17913994767341639302ULL, 16853652162577099338ULL});
+
+  AssertDecimalFromString(
+      "9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9pos, 76, 0);
+  AssertDecimalFromString(
+      "-9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9neg, 76, 0);
+  AssertDecimalFromString(
+      "9.999999999999999999999999999999999999999999999999999999999999999999999999999e75",
+      dec76times9pos, 76, 0);
+  AssertDecimalFromString(
+      "-9.999999999999999999999999999999999999999999999999999999999999999999999999999e75",
+      dec76times9neg, 76, 0);
+
+  // Positive / zero exponent, precision too large for a non-negative scale
+  ASSERT_RAISES(Invalid, Decimal256::FromString("1e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("-1e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("9e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("-9e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("9.9e78"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("-9.9e78"));
+
+  // XXX conversion overflows are currently not detected
+  //   ASSERT_RAISES(Invalid, Decimal256::FromString("99e76"));
+  //   ASSERT_RAISES(Invalid, Decimal256::FromString("-99e76"));
+  //   ASSERT_RAISES(Invalid,
+  //     Decimal256::FromString("9999999999999999999999999999999999999999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //     Decimal256::FromString("-9999999999999999999999999999999999999999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //     Decimal256::FromString("99999999999999999999999999999999999999999999999999999999999999999999999999999"));
+
+  // No exponent, many fractional digits
+  AssertDecimalFromString(
+      "9.999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9pos, 76, 75);
+  AssertDecimalFromString(
+      "-9.999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9neg, 76, 75);
+  AssertDecimalFromString(
+      "0.9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9pos, 76, 76);
+  AssertDecimalFromString(
+      "-0.9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9neg, 76, 76);
+
+  // Negative exponent
+  AssertDecimalFromString("1e-76", Decimal256FromLE({1, 0, 0, 0}), 1, 76);
+  AssertDecimalFromString(
+      "-1e-76",
+      Decimal256FromLE({18446744073709551615ULL, 18446744073709551615ULL,
+                        18446744073709551615ULL, 18446744073709551615ULL}),
+      1, 76);
+  AssertDecimalFromString("9.99e-74", Decimal256FromLE({999, 0, 0, 0}), 3, 76);
+  AssertDecimalFromString(
+      "-9.99e-74",
+      Decimal256FromLE({18446744073709550617ULL, 18446744073709551615ULL,
+                        18446744073709551615ULL, 18446744073709551615ULL}),
+      3, 76);
+  AssertDecimalFromString("987e-76", Decimal256FromLE({987, 0, 0, 0}), 3, 76);
+  AssertDecimalFromString(
+      "-987e-76",
+      Decimal256FromLE({18446744073709550629ULL, 18446744073709551615ULL,
+                        18446744073709551615ULL, 18446744073709551615ULL}),
+      3, 76);
+  AssertDecimalFromString(
+      "9999999999999999999999999999999999999999999999999999999999999999999999999999e-75",
+      dec76times9pos, 76, 75);
+  AssertDecimalFromString(
+      "-9999999999999999999999999999999999999999999999999999999999999999999999999999e-75",
+      dec76times9neg, 76, 75);
+  AssertDecimalFromString(
+      "9999999999999999999999999999999999999999999999999999999999999999999999999999e-76",
+      dec76times9pos, 76, 76);
+  AssertDecimalFromString(
+      "-9999999999999999999999999999999999999999999999999999999999999999999999999999e-76",
+      dec76times9neg, 76, 76);
 }
 
-TEST(DecimalTest, TestInvalidInputWithLeadingZeros) {
-  std::string invalid_value("00a");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalZerosTest, LeadingZerosNoDecimalPoint) {
-  std::string string_value("0000000");
-  Decimal128 d;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
-  ASSERT_EQ(0, precision);
-  ASSERT_EQ(0, scale);
-  ASSERT_EQ(0, d);
-}
-
-TEST(DecimalZerosTest, LeadingZerosDecimalPoint) {
-  std::string string_value("000.0000");
-  Decimal128 d;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
-  ASSERT_EQ(4, precision);
-  ASSERT_EQ(4, scale);
-  ASSERT_EQ(0, d);
-}
-
-TEST(DecimalZerosTest, NoLeadingZerosDecimalPoint) {
-  std::string string_value(".00000");
-  Decimal128 d;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
-  ASSERT_EQ(5, precision);
-  ASSERT_EQ(5, scale);
-  ASSERT_EQ(0, d);
-}
-
-template <typename T>
-class Decimal128Test : public ::testing::Test {
+template <typename DecimalType>
+class DecimalFromIntegerTest : public ::testing::Test {
  public:
-  Decimal128Test() {}
-};
-
-using Decimal128Types =
-    ::testing::Types<char, unsigned char, short, unsigned short,  // NOLINT
-                     int, unsigned int, long, unsigned long,      // NOLINT
-                     long long, unsigned long long                // NOLINT
-                     >;
-
-TYPED_TEST_SUITE(Decimal128Test, Decimal128Types);
+  template <typename IntegerType>
+  void CheckConstructFrom() {
+    DecimalType value(IntegerType{42});
+    AssertArrayBits(value.little_endian_array(), 42, 0);
+
+    DecimalType max_value(std::numeric_limits<IntegerType>::max());
+    AssertArrayBits(max_value.little_endian_array(),
+                    std::numeric_limits<IntegerType>::max(), 0);
+
+    DecimalType min_value(std::numeric_limits<IntegerType>::min());
+    AssertArrayBits(min_value.little_endian_array(),
+                    std::numeric_limits<IntegerType>::min(),
+                    (std::is_signed<IntegerType>::value ? -1 : 0));
+  }
 
-TYPED_TEST(Decimal128Test, ConstructibleFromAnyIntegerType) {
-  Decimal128 value(TypeParam{42});
-  EXPECT_EQ(42, value.low_bits());
-  EXPECT_EQ(0, value.high_bits());
+  void TestConstructibleFromAnyIntegerType() {
+    CheckConstructFrom<char>();                // NOLINT
+    CheckConstructFrom<signed char>();         // NOLINT
+    CheckConstructFrom<unsigned char>();       // NOLINT
+    CheckConstructFrom<short>();               // NOLINT
+    CheckConstructFrom<unsigned short>();      // NOLINT
+    CheckConstructFrom<int>();                 // NOLINT
+    CheckConstructFrom<unsigned int>();        // NOLINT
+    CheckConstructFrom<long>();                // NOLINT
+    CheckConstructFrom<unsigned long>();       // NOLINT
+    CheckConstructFrom<long long>();           // NOLINT
+    CheckConstructFrom<unsigned long long>();  // NOLINT
+  }
 
-  Decimal128 max_value(std::numeric_limits<TypeParam>::max());
-  EXPECT_EQ(std::numeric_limits<TypeParam>::max(), max_value.low_bits());
-  EXPECT_EQ(0, max_value.high_bits());
+  void TestConstructibleFromBool() {
+    {
+      DecimalType value(true);
+      AssertArrayBits(value.little_endian_array(), 1, 0);
+    }
+    {
+      DecimalType value(false);
+      AssertArrayBits(value.little_endian_array(), 0, 0);
+    }
+  }
+};
 
-  Decimal128 min_value(std::numeric_limits<TypeParam>::min());
-  EXPECT_EQ(std::numeric_limits<TypeParam>::min(), min_value.low_bits());
-  EXPECT_EQ((std::is_signed<TypeParam>::value ? -1 : 0), min_value.high_bits());
-}
+TYPED_TEST_SUITE(DecimalFromIntegerTest, DecimalTypes);
 
-TEST(Decimal128TestTrue, ConstructibleFromBool) {
-  Decimal128 value(true);
-  EXPECT_EQ(1, value.low_bits());
-  EXPECT_EQ(0, value.high_bits());
+TYPED_TEST(DecimalFromIntegerTest, ConstructibleFromAnyIntegerType) {
+  this->TestConstructibleFromAnyIntegerType();
 }
 
-TEST(Decimal128TestFalse, ConstructibleFromBool) {
-  Decimal128 value(false);
-  EXPECT_EQ(0, value.low_bits());
-  EXPECT_EQ(0, value.high_bits());
+TYPED_TEST(DecimalFromIntegerTest, ConstructibleFromBool) {
+  this->TestConstructibleFromBool();
 }
 
 TEST(Decimal128Test, Division) {
@@ -408,53 +730,6 @@ TEST_P(Decimal128ToStringTest, ToString) {
 INSTANTIATE_TEST_SUITE_P(Decimal128ToStringTest, Decimal128ToStringTest,
                          ::testing::ValuesIn(kToStringTestData));
 
-class Decimal128ParsingTest
-    : public ::testing::TestWithParam<std::tuple<std::string, uint64_t, int32_t>> {};
-
-TEST_P(Decimal128ParsingTest, Parse) {
-  std::string test_string;
-  uint64_t expected_low_bits;
-  int32_t expected_scale;
-  std::tie(test_string, expected_low_bits, expected_scale) = GetParam();
-  Decimal128 value;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(test_string, &value, nullptr, &scale));
-  ASSERT_EQ(value.low_bits(), expected_low_bits);
-  ASSERT_EQ(expected_scale, scale);
-}
-
-INSTANTIATE_TEST_SUITE_P(Decimal128ParsingTest, Decimal128ParsingTest,
-                         ::testing::Values(std::make_tuple("12.3", 123ULL, 1),
-                                           std::make_tuple("0.00123", 123ULL, 5),
-                                           std::make_tuple("1.23E-8", 123ULL, 10),
-                                           std::make_tuple("-1.23E-8", -123LL, 10),
-                                           std::make_tuple("1.23E+3", 1230ULL, 0),
-                                           std::make_tuple("-1.23E+3", -1230LL, 0),
-                                           std::make_tuple("1.23E+5", 123000ULL, 0),
-                                           std::make_tuple("1.2345E+7", 12345000ULL, 0),
-                                           std::make_tuple("1.23e-8", 123ULL, 10),
-                                           std::make_tuple("-1.23e-8", -123LL, 10),
-                                           std::make_tuple("1.23e+3", 1230ULL, 0),
-                                           std::make_tuple("-1.23e+3", -1230LL, 0),
-                                           std::make_tuple("1.23e+5", 123000ULL, 0),
-                                           std::make_tuple("1.2345e+7", 12345000ULL, 0)));
-
-class Decimal128ParsingTestInvalid : public ::testing::TestWithParam<std::string> {};
-
-TEST_P(Decimal128ParsingTestInvalid, Parse) {
-  std::string test_string = GetParam();
-  ASSERT_RAISES(Invalid, Decimal128::FromString(test_string));
-}
-
-INSTANTIATE_TEST_SUITE_P(Decimal128ParsingTestInvalid, Decimal128ParsingTestInvalid,
-                         ::testing::Values("0.00123D/3", "1.23eA8", "1.23E+3A",
-                                           "-1.23E--5", "1.2345E+++07"));
-
-TEST(Decimal128ParseTest, WithExponentAndNullptrScale) {
-  const Decimal128 expected_value(123);
-  ASSERT_OK_AND_EQ(expected_value, Decimal128::FromString("1.23E-8"));
-}
-
 template <typename Decimal, typename Real>
 void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale,
                           const std::string& expected) {
@@ -559,8 +834,6 @@ TYPED_TEST(TestDecimalFromReal, TestSuccess) { this->TestSuccess(); }
 
 TYPED_TEST(TestDecimalFromReal, TestErrors) { this->TestErrors(); }
 
-using DecimalTypes = ::testing::Types<Decimal128, Decimal256>;
-
 // Tests for Decimal128::FromReal(float, ...) and Decimal256::FromReal(float, ...)
 template <typename T>
 class TestDecimalFromRealFloat : public ::testing::Test {
@@ -862,16 +1135,6 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) {
 
 #endif  // __MINGW32__
 
-TEST(Decimal128Test, TestNoDecimalPointExponential) {
-  Decimal128 value;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString("1E1", &value, &precision, &scale));
-  ASSERT_EQ(10, value.low_bits());
-  ASSERT_EQ(2, precision);
-  ASSERT_EQ(0, scale);
-}
-
 TEST(Decimal128Test, TestFromBigEndian) {
   // We test out a variety of scenarios:
   //
@@ -1281,9 +1544,11 @@ TEST(Decimal256Test, TestComparators) {
   constexpr size_t num_values =
       sizeof(kSortedDecimal256Bits) / sizeof(kSortedDecimal256Bits[0]);
   for (size_t i = 0; i < num_values; ++i) {
-    Decimal256 left(kSortedDecimal256Bits[i]);
+    Decimal256 left(
+        ::arrow::BitUtil::LittleEndianArray::ToNative(kSortedDecimal256Bits[i]));
     for (size_t j = 0; j < num_values; ++j) {
-      Decimal256 right(kSortedDecimal256Bits[j]);
+      Decimal256 right(
+          ::arrow::BitUtil::LittleEndianArray::ToNative(kSortedDecimal256Bits[j]));
       EXPECT_EQ(i == j, left == right);
       EXPECT_EQ(i != j, left != right);
       EXPECT_EQ(i < j, left < right);
@@ -1296,7 +1561,7 @@ TEST(Decimal256Test, TestComparators) {
 
 TEST(Decimal256Test, TestToBytesRoundTrip) {
   for (const std::array<uint64_t, 4>& bits : kSortedDecimal256Bits) {
-    Decimal256 decimal(bits);
+    Decimal256 decimal(::arrow::BitUtil::LittleEndianArray::ToNative(bits));
     EXPECT_EQ(decimal, Decimal256(decimal.ToBytes().data()));
   }
 }
@@ -1318,18 +1583,21 @@ TYPED_TEST_SUITE(Decimal256Test, Decimal256Types);
 TYPED_TEST(Decimal256Test, ConstructibleFromAnyIntegerType) {
   using UInt64Array = std::array<uint64_t, 4>;
   Decimal256 value(TypeParam{42});
-  EXPECT_EQ(UInt64Array({42, 0, 0, 0}), value.little_endian_array());
+  EXPECT_EQ(UInt64Array({42, 0, 0, 0}),
+            ::arrow::BitUtil::LittleEndianArray::FromNative(value.native_endian_array()));
 
   TypeParam max = std::numeric_limits<TypeParam>::max();
   Decimal256 max_value(max);
-  EXPECT_EQ(UInt64Array({static_cast<uint64_t>(max), 0, 0, 0}),
-            max_value.little_endian_array());
+  EXPECT_EQ(
+      UInt64Array({static_cast<uint64_t>(max), 0, 0, 0}),
+      ::arrow::BitUtil::LittleEndianArray::FromNative(max_value.native_endian_array()));
 
   TypeParam min = std::numeric_limits<TypeParam>::min();
   Decimal256 min_value(min);
   uint64_t high_bits = std::is_signed<TypeParam>::value ? ~uint64_t{0} : uint64_t{0};
-  EXPECT_EQ(UInt64Array({static_cast<uint64_t>(min), high_bits, high_bits, high_bits}),
-            min_value.little_endian_array());
+  EXPECT_EQ(
+      UInt64Array({static_cast<uint64_t>(min), high_bits, high_bits, high_bits}),
+      ::arrow::BitUtil::LittleEndianArray::FromNative(min_value.native_endian_array()));
 }
 
 TEST(Decimal256Test, ConstructibleFromBool) {
@@ -1432,12 +1700,12 @@ TEST(Decimal256Test, Shift) {
     Decimal256 v("-12346789123456789123456789");
     v <<= 15;
     ASSERT_EQ(v, Decimal256("-404579585997432065997432061952"))
-        << std::hex << v.little_endian_array()[0] << " " << v.little_endian_array()[1]
-        << " " << v.little_endian_array()[2] << " " << v.little_endian_array()[3] << "\n"
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[0] << " "
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[1] << " "
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[2] << " "
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[3];
+        << std::hex << v.native_endian_array()[0] << " " << v.native_endian_array()[1]
+        << " " << v.native_endian_array()[2] << " " << v.native_endian_array()[3] << "\n"
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[0] << " "
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[1] << " "
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[2] << " "
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[3];
     v <<= 30;
     ASSERT_EQ(v, Decimal256("-434414022622047565860171081516421480448"));
     v <<= 66;
diff --git a/cpp/src/arrow/util/delimiting.cc b/cpp/src/arrow/util/delimiting.cc
index 1b23c377052..fe1b6ea3126 100644
--- a/cpp/src/arrow/util/delimiting.cc
+++ b/cpp/src/arrow/util/delimiting.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/util/delimiting.h"
 #include "arrow/buffer.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
@@ -60,6 +61,35 @@ class NewlineBoundaryFinder : public BoundaryFinder {
     return Status::OK();
   }
 
+  Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+                 int64_t* out_pos, int64_t* num_found) override {
+    DCHECK(partial.find_first_of(newline_delimiters) == util::string_view::npos);
+
+    int64_t found = 0;
+    int64_t pos = kNoDelimiterFound;
+
+    auto cur_pos = block.find_first_of(newline_delimiters);
+    while (cur_pos != util::string_view::npos) {
+      if (block[cur_pos] == '\r' && cur_pos + 1 < block.length() &&
+          block[cur_pos + 1] == '\n') {
+        cur_pos += 2;
+      } else {
+        ++cur_pos;
+      }
+
+      pos = static_cast<int64_t>(cur_pos);
+      if (++found >= count) {
+        break;
+      }
+
+      cur_pos = block.find_first_of(newline_delimiters, cur_pos);
+    }
+
+    *out_pos = pos;
+    *num_found = found;
+    return Status::OK();
+  }
+
  protected:
   static constexpr const char* newline_delimiters = "\r\n";
 };
@@ -138,4 +168,26 @@ Status Chunker::ProcessFinal(std::shared_ptr<Buffer> partial,
   return Status::OK();
 }
 
+Status Chunker::ProcessSkip(std::shared_ptr<Buffer> partial,
+                            std::shared_ptr<Buffer> block, bool final, int64_t* count,
+                            std::shared_ptr<Buffer>* rest) {
+  DCHECK_GT(*count, 0);
+  int64_t pos;
+  int64_t num_found;
+  ARROW_RETURN_NOT_OK(boundary_finder_->FindNth(
+      util::string_view(*partial), util::string_view(*block), *count, &pos, &num_found));
+  if (pos == BoundaryFinder::kNoDelimiterFound) {
+    return StraddlingTooLarge();
+  }
+  if (ARROW_PREDICT_FALSE(final && *count > num_found && block->size() != pos)) {
+    // Skip the last row in the final block which does not have a delimiter
+    ++num_found;
+    *rest = SliceBuffer(block, 0, 0);
+  } else {
+    *rest = SliceBuffer(block, pos);
+  }
+  *count -= num_found;
+  return Status::OK();
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/delimiting.h b/cpp/src/arrow/util/delimiting.h
index 33828964414..b4b868340db 100644
--- a/cpp/src/arrow/util/delimiting.h
+++ b/cpp/src/arrow/util/delimiting.h
@@ -53,6 +53,19 @@ class ARROW_EXPORT BoundaryFinder {
   /// `out_pos` will be -1 if no delimiter is found.
   virtual Status FindLast(util::string_view block, int64_t* out_pos) = 0;
 
+  /// \brief Find the position of the Nth delimiter inside the block
+  ///
+  /// `partial` is taken to be the beginning of the block, and `block`
+  /// its continuation.  Also, `partial` doesn't contain a delimiter.
+  ///
+  /// The returned `out_pos` is relative to `block`'s start and should point
+  /// to the first character after the first delimiter.
+  /// `out_pos` will be -1 if no delimiter is found.
+  ///
+  /// The returned `num_found` is the number of delimiters actually found
+  virtual Status FindNth(util::string_view partial, util::string_view block,
+                         int64_t count, int64_t* out_pos, int64_t* num_found) = 0;
+
   static constexpr int64_t kNoDelimiterFound = -1;
 
  protected:
@@ -138,6 +151,27 @@ class ARROW_EXPORT Chunker {
   Status ProcessFinal(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
                       std::shared_ptr<Buffer>* completion, std::shared_ptr<Buffer>* rest);
 
+  /// \brief Skip count number of rows
+  /// Pre-conditions:
+  /// - `partial` is the start of a valid block of delimited data
+  ///   (i.e. starts just after a delimiter)
+  /// - `block` follows `partial` in file order
+  ///
+  /// Post-conditions:
+  /// - `count` is updated to indicate the number of rows that still need to be skipped
+  /// - If `count` is > 0 then `rest` is an incomplete block that should be a future
+  /// `partial`
+  /// - Else `rest` could be one or more valid blocks of delimited data which need to be
+  /// parsed
+  ///
+  /// \param[in] partial incomplete delimited data
+  /// \param[in] block delimited data following partial
+  /// \param[in] final whether this is the final chunk
+  /// \param[in,out] count number of rows that need to be skipped
+  /// \param[out] rest subrange of block containing what was not skipped
+  Status ProcessSkip(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
+                     bool final, int64_t* count, std::shared_ptr<Buffer>* rest);
+
  protected:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Chunker);
 
diff --git a/cpp/src/arrow/util/endian.h b/cpp/src/arrow/util/endian.h
index 0cb2e44d275..0fae454e0eb 100644
--- a/cpp/src/arrow/util/endian.h
+++ b/cpp/src/arrow/util/endian.h
@@ -52,6 +52,9 @@
 #define ARROW_BYTE_SWAP32 __builtin_bswap32
 #endif
 
+#include <algorithm>
+#include <array>
+
 #include "arrow/util/type_traits.h"
 #include "arrow/util/ubsan.h"
 
@@ -177,5 +180,66 @@ static inline T FromLittleEndian(T value) {
 }
 #endif
 
+// Handle endianness in *word* granuality (keep individual array element untouched)
+namespace LittleEndianArray {
+
+namespace detail {
+
+// Read a native endian array as little endian
+template <typename T, size_t N>
+struct Reader {
+  const std::array<T, N>& native_array;
+
+  explicit Reader(const std::array<T, N>& native_array) : native_array(native_array) {}
+
+  const T& operator[](size_t i) const {
+    return native_array[ARROW_LITTLE_ENDIAN ? i : N - 1 - i];
+  }
+};
+
+// Read/write a native endian array as little endian
+template <typename T, size_t N>
+struct Writer {
+  std::array<T, N>* native_array;
+
+  explicit Writer(std::array<T, N>* native_array) : native_array(native_array) {}
+
+  const T& operator[](size_t i) const {
+    return (*native_array)[ARROW_LITTLE_ENDIAN ? i : N - 1 - i];
+  }
+  T& operator[](size_t i) { return (*native_array)[ARROW_LITTLE_ENDIAN ? i : N - 1 - i]; }
+};
+
+}  // namespace detail
+
+// Construct array reader and try to deduce template augments
+template <typename T, size_t N>
+static inline detail::Reader<T, N> Make(const std::array<T, N>& native_array) {
+  return detail::Reader<T, N>(native_array);
+}
+
+// Construct array writer and try to deduce template augments
+template <typename T, size_t N>
+static inline detail::Writer<T, N> Make(std::array<T, N>* native_array) {
+  return detail::Writer<T, N>(native_array);
+}
+
+// Convert little endian array to native endian
+template <typename T, size_t N>
+static inline std::array<T, N> ToNative(std::array<T, N> array) {
+  if (!ARROW_LITTLE_ENDIAN) {
+    std::reverse(array.begin(), array.end());
+  }
+  return array;
+}
+
+// Convert native endian array to little endian
+template <typename T, size_t N>
+static inline std::array<T, N> FromNative(std::array<T, N> array) {
+  return ToNative(array);
+}
+
+}  // namespace LittleEndianArray
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/enum.h b/cpp/src/arrow/util/enum.h
new file mode 100644
index 00000000000..d0382bc604f
--- /dev/null
+++ b/cpp/src/arrow/util/enum.h
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace internal {
+
+constexpr char ToLower(char c) { return c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c; }
+
+constexpr bool CaseInsensitiveEquals(const char* l, const char* r,
+                                     size_t limit = util::string_view::npos) {
+  return limit == 0
+             ? true
+             : ToLower(l[0]) != ToLower(r[0])
+                   ? false
+                   : l[0] == '\0' ? true : CaseInsensitiveEquals(l + 1, r + 1, limit - 1);
+}
+
+constexpr bool CaseInsensitiveEquals(util::string_view l, util::string_view r) {
+  return l.size() == r.size() && CaseInsensitiveEquals(l.data(), r.data(), l.size());
+}
+
+}  // namespace internal
+
+template <int N>
+struct EnumStrings {
+  template <int M>
+  static constexpr bool assert_count() {
+    static_assert(M == N, "Incorrect number of enum strings provided");
+    return false;
+  }
+
+  template <typename... Strs>
+  constexpr EnumStrings(const Strs&... strs)  // NOLINT runtime/explicit
+      : dummy_{assert_count<sizeof...(Strs)>()}, strings_{util::string_view(strs)...} {}
+
+  constexpr int GetIndex(util::string_view repr, int i = 0) const {
+    return i == N ? -1
+                  : internal::CaseInsensitiveEquals(strings_[i], repr)
+                        ? i
+                        : GetIndex(repr, i + 1);
+  }
+
+  using value_type = util::string_view;
+  using const_iterator = const util::string_view*;
+
+  constexpr int size() const { return N; }
+  constexpr const util::string_view* data() const { return strings_; }
+  constexpr const_iterator begin() const { return data(); }
+  constexpr const_iterator end() const { return begin() + size(); }
+  constexpr util::string_view operator[](int i) const { return strings_[i]; }
+
+  bool dummy_;
+  util::string_view strings_[N];  // NOLINT modernize
+};
+
+struct EnumTypeTag {};
+
+/// \brief An enum replacement with minimal reflection capabilities.
+///
+/// Declare an enum by inheriting from this helper with CRTP, including a
+/// static string literal member function returning the enum's values:
+///
+///     struct Color : EnumType<Color> {
+///       using EnumType::EnumType;
+///       static constexpr EnumStrings<3> values() { return {"red", "green", "blue"}; }
+///       static constexpr const char* name() { return "Color"; }
+///     };
+///
+/// Ensure the doccomment includes a description of each enum value.
+///
+/// Values of enumerations declared in this way can be constructed from their string
+/// representations at compile time, and can be converted to their string representation
+/// for easier debugging/logging/...
+template <typename Raw>
+struct EnumType : EnumTypeTag {
+  constexpr EnumType() = default;
+
+  constexpr explicit EnumType(int index)
+      : index{index >= 0 && index < Raw::values().size() ? index : -1} {}
+
+  constexpr explicit EnumType(util::string_view repr)
+      : index{Raw::values().GetIndex(repr)} {}
+
+  constexpr bool operator==(EnumType other) const { return index == other.index; }
+  constexpr bool operator!=(EnumType other) const { return index != other.index; }
+
+  /// Return the string representation of this enum value.
+  std::string ToString() const { return Raw::values()[index].to_string(); }
+
+  /// \brief Valid enum values will be truthy.
+  ///
+  /// Invalid enums are constructed with indices outside the range [0, size), with strings
+  /// not present in EnumType::value_strings(), or by default construction.
+  constexpr explicit operator bool() const { return index != -1; }
+
+  /// Convert this enum value to its integer index.
+  constexpr int operator*() const { return index; }
+
+  /// The number of values in this enumeration.
+  static constexpr int size() { return Raw::values().size(); }
+
+  /// Construct a valid enum from int or raise an error
+  static Result<Raw> Make(int index) {
+    if (auto valid = Raw(index)) return valid;
+    return Status::Invalid("index ", index, " for enum ", Raw::name(),
+                           "- index should be in range [0, ", Raw::values().size(), ")");
+  }
+
+  /// Construct a valid enum from repr or raise an error
+  static Result<Raw> Make(util::string_view repr) {
+    if (auto valid = Raw(repr)) return valid;
+
+    std::string values;
+    static std::string sep = ", ";
+    for (auto value : Raw::values()) {
+      values.append("'");
+      values.append(value.data(), value.size());
+      values.append("'");
+      values.append(sep);
+    }
+    values.resize(values.size() - sep.size());
+
+    return Status::Invalid("string '", repr, "' for enum ", Raw::name(),
+                           "- string should be one of {", values, "}");
+  }
+
+  int index = -1;
+
+  friend inline void PrintTo(const EnumType& e, std::ostream* os) {
+    PrintTo(e.ToString(), os);
+  }
+};
+
+template <typename T>
+using is_reflection_enum = std::is_base_of<EnumTypeTag, T>;
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/formatting.cc b/cpp/src/arrow/util/formatting.cc
index 9e4d25c0e2b..c16d42ce5cf 100644
--- a/cpp/src/arrow/util/formatting.cc
+++ b/cpp/src/arrow/util/formatting.cc
@@ -43,11 +43,29 @@ struct FloatToStringFormatter::Impl {
       : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
                    'e', -6, 10, 6, 0) {}
 
+  Impl(int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
+       int decimal_in_shortest_low, int decimal_in_shortest_high,
+       int max_leading_padding_zeroes_in_precision_mode,
+       int max_trailing_padding_zeroes_in_precision_mode)
+      : converter_(flags, inf_symbol, nan_symbol, exp_character, decimal_in_shortest_low,
+                   decimal_in_shortest_high, max_leading_padding_zeroes_in_precision_mode,
+                   max_trailing_padding_zeroes_in_precision_mode) {}
+
   DoubleToStringConverter converter_;
 };
 
 FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
 
+FloatToStringFormatter::FloatToStringFormatter(
+    int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
+    int decimal_in_shortest_low, int decimal_in_shortest_high,
+    int max_leading_padding_zeroes_in_precision_mode,
+    int max_trailing_padding_zeroes_in_precision_mode)
+    : impl_(new Impl(flags, inf_symbol, nan_symbol, exp_character,
+                     decimal_in_shortest_low, decimal_in_shortest_high,
+                     max_leading_padding_zeroes_in_precision_mode,
+                     max_trailing_padding_zeroes_in_precision_mode)) {}
+
 FloatToStringFormatter::~FloatToStringFormatter() {}
 
 int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h
index 5f4b251a38c..566c9795f83 100644
--- a/cpp/src/arrow/util/formatting.h
+++ b/cpp/src/arrow/util/formatting.h
@@ -31,6 +31,7 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/double_conversion.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/time.h"
 #include "arrow/util/visibility.h"
@@ -219,6 +220,11 @@ class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type>
 class ARROW_EXPORT FloatToStringFormatter {
  public:
   FloatToStringFormatter();
+  FloatToStringFormatter(int flags, const char* inf_symbol, const char* nan_symbol,
+                         char exp_character, int decimal_in_shortest_low,
+                         int decimal_in_shortest_high,
+                         int max_leading_padding_zeroes_in_precision_mode,
+                         int max_trailing_padding_zeroes_in_precision_mode);
   ~FloatToStringFormatter();
 
   // Returns the number of characters written
@@ -239,6 +245,16 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter {
 
   explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
 
+  FloatToStringFormatterMixin(int flags, const char* inf_symbol, const char* nan_symbol,
+                              char exp_character, int decimal_in_shortest_low,
+                              int decimal_in_shortest_high,
+                              int max_leading_padding_zeroes_in_precision_mode,
+                              int max_trailing_padding_zeroes_in_precision_mode)
+      : FloatToStringFormatter(flags, inf_symbol, nan_symbol, exp_character,
+                               decimal_in_shortest_low, decimal_in_shortest_high,
+                               max_leading_padding_zeroes_in_precision_mode,
+                               max_trailing_padding_zeroes_in_precision_mode) {}
+
   template <typename Appender>
   Return<Appender> operator()(value_type value, Appender&& append) {
     char buffer[buffer_size];
diff --git a/cpp/src/arrow/util/functional.h b/cpp/src/arrow/util/functional.h
index 3588e8540e8..41e268852fa 100644
--- a/cpp/src/arrow/util/functional.h
+++ b/cpp/src/arrow/util/functional.h
@@ -21,13 +21,23 @@
 #include <tuple>
 #include <type_traits>
 
+#include "arrow/result.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
 namespace internal {
 
+struct Empty {
+  static Result<Empty> ToResult(Status s) {
+    if (ARROW_PREDICT_TRUE(s.ok())) {
+      return Empty{};
+    }
+    return s;
+  }
+};
+
 /// Helper struct for examining lambdas and other callables.
-/// TODO(bkietz) support function pointers
+/// TODO(ARROW-12655) support function pointers
 struct call_traits {
  public:
   template <typename R, typename... A>
@@ -57,6 +67,16 @@ struct call_traits {
   static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
       R (F::*)(A...) &&);
 
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...));
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...)
+                                                                           const);
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...) &&);
+
   /// bool constant indicating whether F is a callable with more than one possible
   /// signature. Will be true_type for objects which define multiple operator() or which
   /// define a template operator()
@@ -76,12 +96,22 @@ struct call_traits {
   template <std::size_t I, typename F>
   using argument_type = decltype(argument_type_impl<I>(&std::decay<F>::type::operator()));
 
+  template <typename F>
+  using argument_count = decltype(argument_count_impl(&std::decay<F>::type::operator()));
+
   template <typename F>
   using return_type = decltype(return_type_impl(&std::decay<F>::type::operator()));
 
   template <typename F, typename T, typename RT = T>
   using enable_if_return =
       typename std::enable_if<std::is_same<return_type<F>, T>::value, RT>;
+
+  template <typename T, typename R = void>
+  using enable_if_empty = typename std::enable_if<std::is_same<T, Empty>::value, R>::type;
+
+  template <typename T, typename R = void>
+  using enable_if_not_empty =
+      typename std::enable_if<!std::is_same<T, Empty>::value, R>::type;
 };
 
 /// A type erased callable object which may only be invoked once.
@@ -99,7 +129,7 @@ class FnOnce<R(A...)> {
 
   template <typename Fn,
             typename = typename std::enable_if<std::is_convertible<
-                typename std::result_of<Fn && (A...)>::type, R>::value>::type>
+                decltype(std::declval<Fn&&>()(std::declval<A>()...)), R>::value>::type>
   FnOnce(Fn fn) : impl_(new FnImpl<Fn>(std::move(fn))) {  // NOLINT runtime/explicit
   }
 
diff --git a/cpp/src/arrow/util/future.cc b/cpp/src/arrow/util/future.cc
index 90e8db3c6a6..fc8022a95e4 100644
--- a/cpp/src/arrow/util/future.cc
+++ b/cpp/src/arrow/util/future.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/thread_pool.h"
 
 namespace arrow {
 
@@ -231,26 +232,73 @@ class ConcreteFutureImpl : public FutureImpl {
 
   void DoMarkFailed() { DoMarkFinishedOrFailed(FutureState::FAILURE); }
 
-  void AddCallback(Callback callback) {
+  void CheckOptions(const CallbackOptions& opts) {
+    if (opts.should_schedule != ShouldSchedule::Never) {
+      DCHECK_NE(opts.executor, nullptr)
+          << "An executor must be specified when adding a callback that might schedule";
+    }
+  }
+
+  void AddCallback(Callback callback, CallbackOptions opts) {
+    CheckOptions(opts);
     std::unique_lock<std::mutex> lock(mutex_);
+    CallbackRecord callback_record{std::move(callback), opts};
     if (IsFutureFinished(state_)) {
       lock.unlock();
-      std::move(callback)();
+      RunOrScheduleCallback(shared_from_this(), std::move(callback_record),
+                            /*in_add_callback=*/true);
     } else {
-      callbacks_.push_back(std::move(callback));
+      callbacks_.push_back(std::move(callback_record));
     }
   }
 
-  bool TryAddCallback(const std::function<Callback()>& callback_factory) {
+  bool TryAddCallback(const std::function<Callback()>& callback_factory,
+                      CallbackOptions opts) {
+    CheckOptions(opts);
     std::unique_lock<std::mutex> lock(mutex_);
     if (IsFutureFinished(state_)) {
       return false;
     } else {
-      callbacks_.push_back(callback_factory());
+      callbacks_.push_back({callback_factory(), opts});
       return true;
     }
   }
 
+  static bool ShouldScheduleCallback(const CallbackRecord& callback_record,
+                                     bool in_add_callback) {
+    switch (callback_record.options.should_schedule) {
+      case ShouldSchedule::Never:
+        return false;
+      case ShouldSchedule::Always:
+        return true;
+      case ShouldSchedule::IfUnfinished:
+        return !in_add_callback;
+      case ShouldSchedule::IfDifferentExecutor:
+        return !callback_record.options.executor->OwnsThisThread();
+      default:
+        DCHECK(false) << "Unrecognized ShouldSchedule option";
+        return false;
+    }
+  }
+
+  static void RunOrScheduleCallback(const std::shared_ptr<FutureImpl>& self,
+                                    CallbackRecord&& callback_record,
+                                    bool in_add_callback) {
+    if (ShouldScheduleCallback(callback_record, in_add_callback)) {
+      struct CallbackTask {
+        void operator()() { std::move(callback)(*self); }
+
+        Callback callback;
+        std::shared_ptr<FutureImpl> self;
+      };
+      // Need to keep `this` alive until the callback has a chance to be scheduled.
+      CallbackTask task{std::move(callback_record.callback), self};
+      DCHECK_OK(callback_record.options.executor->Spawn(std::move(task)));
+    } else {
+      std::move(callback_record.callback)(*self);
+    }
+  }
+
   void DoMarkFinishedOrFailed(FutureState state) {
     {
       // Lock the hypothetical waiter first, and the future after.
@@ -266,16 +314,18 @@ class ConcreteFutureImpl : public FutureImpl {
     }
     cv_.notify_all();
 
-    // run callbacks, lock not needed since the future is finsihed by this
+    auto callbacks = std::move(callbacks_);
+    auto self = shared_from_this();
+
+    // run callbacks, lock not needed since the future is finished by this
     // point so nothing else can modify the callbacks list and it is safe
     // to iterate.
     //
     // In fact, it is important not to hold the locks because the callback
     // may be slow or do its own locking on other resources
-    for (auto&& callback : callbacks_) {
-      std::move(callback)();
+    for (auto& callback_record : callbacks) {
+      RunOrScheduleCallback(self, std::move(callback_record), /*in_add_callback=*/false);
     }
-    callbacks_.clear();
   }
 
   void DoWait() {
@@ -334,12 +384,13 @@ void FutureImpl::MarkFinished() { GetConcreteFuture(this)->DoMarkFinished(); }
 
 void FutureImpl::MarkFailed() { GetConcreteFuture(this)->DoMarkFailed(); }
 
-void FutureImpl::AddCallback(Callback callback) {
-  GetConcreteFuture(this)->AddCallback(std::move(callback));
+void FutureImpl::AddCallback(Callback callback, CallbackOptions opts) {
+  GetConcreteFuture(this)->AddCallback(std::move(callback), opts);
 }
 
-bool FutureImpl::TryAddCallback(const std::function<Callback()>& callback_factory) {
-  return GetConcreteFuture(this)->TryAddCallback(callback_factory);
+bool FutureImpl::TryAddCallback(const std::function<Callback()>& callback_factory,
+                                CallbackOptions opts) {
+  return GetConcreteFuture(this)->TryAddCallback(callback_factory, opts);
 }
 
 Future<> AllComplete(const std::vector<Future<>>& futures) {
@@ -357,16 +408,16 @@ Future<> AllComplete(const std::vector<Future<>>& futures) {
   auto state = std::make_shared<State>(futures.size());
   auto out = Future<>::Make();
   for (const auto& future : futures) {
-    future.AddCallback([state, out](const Result<detail::Empty>& result) mutable {
-      if (!result.ok()) {
+    future.AddCallback([state, out](const Status& status) mutable {
+      if (!status.ok()) {
         std::unique_lock<std::mutex> lock(state->mutex);
         if (!out.is_finished()) {
-          out.MarkFinished(result);
+          out.MarkFinished(status);
         }
         return;
       }
       if (state->n_remaining.fetch_sub(1) != 1) return;
-      out.MarkFinished(Status::OK());
+      out.MarkFinished();
     });
   }
   return out;
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index 21754ec073a..6c194cab2ac 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -28,6 +28,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/functional.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/optional.h"
@@ -36,16 +37,10 @@
 
 namespace arrow {
 
-namespace detail {
+template <typename>
+struct EnsureFuture;
 
-struct Empty {
-  static Result<Empty> ToResult(Status s) {
-    if (ARROW_PREDICT_TRUE(s.ok())) {
-      return Empty{};
-    }
-    return s;
-  }
-};
+namespace detail {
 
 template <typename>
 struct is_future : std::false_type {};
@@ -53,9 +48,70 @@ struct is_future : std::false_type {};
 template <typename T>
 struct is_future<Future<T>> : std::true_type {};
 
+template <typename Signature, typename Enable = void>
+struct result_of;
+
+template <typename Fn, typename... A>
+struct result_of<Fn(A...),
+                 internal::void_t<decltype(std::declval<Fn>()(std::declval<A>()...))>> {
+  using type = decltype(std::declval<Fn>()(std::declval<A>()...));
+};
+
 template <typename Signature>
-using result_of_t = typename std::result_of<Signature>::type;
+using result_of_t = typename result_of<Signature>::type;
+
+// Helper to find the synchronous counterpart for a Future
+template <typename T>
+struct SyncType {
+  using type = Result<T>;
+};
+
+template <>
+struct SyncType<internal::Empty> {
+  using type = Status;
+};
+
+template <typename Fn>
+using first_arg_is_status =
+    std::is_same<typename std::decay<internal::call_traits::argument_type<0, Fn>>::type,
+                 Status>;
+
+template <typename Fn, typename Then, typename Else,
+          typename Count = internal::call_traits::argument_count<Fn>>
+using if_has_no_args = typename std::conditional<Count::value == 0, Then, Else>::type;
+
+/// Creates a callback that can be added to a future to mark a `dest` future finished
+template <typename Source, typename Dest, bool SourceEmpty = Source::is_empty,
+          bool DestEmpty = Dest::is_empty>
+struct MarkNextFinished {};
+
+/// If the source and dest are both empty we can pass on the status
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, true, true> {
+  void operator()(const Status& status) && { next.MarkFinished(status); }
+  Dest next;
+};
 
+/// If the source is not empty but the dest is then we can take the
+/// status out of the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, true> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(internal::Empty::ToResult(res.status()));
+  }
+  Dest next;
+};
+
+/// If neither are empty we pass on the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, false> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(res);
+  }
+  Dest next;
+};
+
+/// Helper that contains information about how to apply a continuation
 struct ContinueFuture {
   template <typename Return>
   struct ForReturnImpl;
@@ -66,6 +122,7 @@ struct ContinueFuture {
   template <typename Signature>
   using ForSignature = ForReturn<result_of_t<Signature>>;
 
+  // If the callback returns void then we return Future<> that always finishes OK.
   template <typename ContinueFunc, typename... Args,
             typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
             typename NextFuture = ForReturn<ContinueResult>>
@@ -75,15 +132,42 @@ struct ContinueFuture {
     next.MarkFinished();
   }
 
+  /// If the callback returns a non-future then we return Future<T>
+  /// and mark the future finished with the callback result.  It will get promoted
+  /// to Result<T> as part of MarkFinished if it isn't already.
+  ///
+  /// If the callback returns Status and we return Future<> then also send the callback
+  /// result as-is to the destination future.
   template <typename ContinueFunc, typename... Args,
             typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
             typename NextFuture = ForReturn<ContinueResult>>
-  typename std::enable_if<!std::is_void<ContinueResult>::value &&
-                          !is_future<ContinueResult>::value>::type
+  typename std::enable_if<
+      !std::is_void<ContinueResult>::value && !is_future<ContinueResult>::value &&
+      (!NextFuture::is_empty || std::is_same<ContinueResult, Status>::value)>::type
   operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
     next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...));
   }
 
+  /// If the callback returns a Result and the next future is Future<> then we mark
+  /// the future finished with the callback result.
+  ///
+  /// It may seem odd that the next future is Future<> when the callback returns a
+  /// result but this can occur if the OnFailure callback returns a result while the
+  /// OnSuccess callback is void/Status (e.g. you would get this calling the one-arg
+  /// version of Then with an OnSuccess callback that returns void)
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<!std::is_void<ContinueResult>::value &&
+                          !is_future<ContinueResult>::value && NextFuture::is_empty &&
+                          !std::is_same<ContinueResult, Status>::value>::type
+  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...).status());
+  }
+
+  /// If the callback returns a Future<T> then we return Future<T>.  We create a new
+  /// future and add a callback to the future given to us by the user that forwards the
+  /// result to the future we just created
   template <typename ContinueFunc, typename... Args,
             typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
             typename NextFuture = ForReturn<ContinueResult>>
@@ -91,18 +175,26 @@ struct ContinueFuture {
       NextFuture next, ContinueFunc&& f, Args&&... a) const {
     ContinueResult signal_to_complete_next =
         std::forward<ContinueFunc>(f)(std::forward<Args>(a)...);
+    MarkNextFinished<ContinueResult, NextFuture> callback{std::move(next)};
+    signal_to_complete_next.AddCallback(std::move(callback));
+  }
 
-    struct MarkNextFinished {
-      void operator()(const Result<typename ContinueResult::ValueType>& result) && {
-        next.MarkFinished(result);
-      }
-      NextFuture next;
-    };
-
-    signal_to_complete_next.AddCallback(MarkNextFinished{std::move(next)});
+  /// Helpers to conditionally ignore arguments to ContinueFunc
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::true_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&...) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f));
+  }
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::false_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&... a) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f),
+               std::forward<Args>(a)...);
   }
 };
 
+/// Helper struct which tells us what kind of Future gets returned from `Then` based on
+/// the return type of the OnSuccess callback
 template <>
 struct ContinueFuture::ForReturnImpl<void> {
   using type = Future<>;
@@ -135,8 +227,33 @@ enum class FutureState : int8_t { PENDING, SUCCESS, FAILURE };
 
 inline bool IsFutureFinished(FutureState state) { return state != FutureState::PENDING; }
 
+/// \brief Describe whether the callback should be scheduled or run synchronously
+enum class ShouldSchedule {
+  /// Always run the callback synchronously (the default)
+  Never = 0,
+  /// Schedule a new task only if the future is not finished when the
+  /// callback is added
+  IfUnfinished = 1,
+  /// Always schedule the callback as a new task
+  Always = 2,
+  /// Schedule a new task only if it would run on an executor other than
+  /// the specified executor.
+  IfDifferentExecutor = 3,
+};
+
+/// \brief Options that control how a continuation is run
+struct CallbackOptions {
+  /// Describe whether the callback should be run synchronously or scheduled
+  ShouldSchedule should_schedule = ShouldSchedule::Never;
+  /// If the callback is scheduled then this is the executor it should be scheduled
+  /// on.  If this is NULL then should_schedule must be Never
+  internal::Executor* executor = NULLPTR;
+
+  static CallbackOptions Defaults() { return {}; }
+};
+
 // Untyped private implementation
-class ARROW_EXPORT FutureImpl {
+class ARROW_EXPORT FutureImpl : public std::enable_shared_from_this<FutureImpl> {
  public:
   FutureImpl();
   virtual ~FutureImpl() = default;
@@ -151,10 +268,15 @@ class ARROW_EXPORT FutureImpl {
   void MarkFailed();
   void Wait();
   bool Wait(double seconds);
+  template <typename ValueType>
+  Result<ValueType>* CastResult() const {
+    return static_cast<Result<ValueType>*>(result_.get());
+  }
 
-  using Callback = internal::FnOnce<void()>;
-  void AddCallback(Callback callback);
-  bool TryAddCallback(const std::function<Callback()>& callback_factory);
+  using Callback = internal::FnOnce<void(const FutureImpl& impl)>;
+  void AddCallback(Callback callback, CallbackOptions opts);
+  bool TryAddCallback(const std::function<Callback()>& callback_factory,
+                      CallbackOptions opts);
 
   // Waiter API
   inline FutureState SetWaiter(FutureWaiter* w, int future_num);
@@ -167,7 +289,11 @@ class ARROW_EXPORT FutureImpl {
   using Storage = std::unique_ptr<void, void (*)(void*)>;
   Storage result_{NULLPTR, NULLPTR};
 
-  std::vector<Callback> callbacks_;
+  struct CallbackRecord {
+    Callback callback;
+    CallbackOptions options;
+  };
+  std::vector<CallbackRecord> callbacks_;
 };
 
 // An object that waits on multiple futures at once.  Only one waiter
@@ -245,7 +371,8 @@ template <typename T>
 class ARROW_MUST_USE_TYPE Future {
  public:
   using ValueType = T;
-
+  using SyncType = typename detail::SyncType<T>::type;
+  static constexpr bool is_empty = std::is_same<T, internal::Empty>::value;
   // The default constructor creates an invalid Future.  Use Future::Make()
   // for a valid Future.  This constructor is mostly for the convenience
   // of being able to presize a vector of Futures.
@@ -331,7 +458,7 @@ class ARROW_MUST_USE_TYPE Future {
 
   /// \brief Mark a Future<> completed with the provided Status.
   template <typename E = ValueType, typename = typename std::enable_if<
-                                        std::is_same<E, detail::Empty>::value>::type>
+                                        std::is_same<E, internal::Empty>::value>::type>
   void MarkFinished(Status s = Status::OK()) {
     return DoMarkFinished(E::ToResult(std::move(s)));
   }
@@ -349,29 +476,51 @@ class ARROW_MUST_USE_TYPE Future {
   }
 
   /// \brief Producer API: instantiate a finished Future
-  static Future MakeFinished(Result<ValueType> res) {
-    Future fut;
-    if (ARROW_PREDICT_TRUE(res.ok())) {
-      fut.impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
-    } else {
-      fut.impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
-    }
-    fut.SetResult(std::move(res));
+  static Future<ValueType> MakeFinished(Result<ValueType> res) {
+    Future<ValueType> fut;
+    fut.InitializeFromResult(std::move(res));
     return fut;
   }
 
   /// \brief Make a finished Future<> with the provided Status.
   template <typename E = ValueType, typename = typename std::enable_if<
-                                        std::is_same<E, detail::Empty>::value>::type>
+                                        std::is_same<E, internal::Empty>::value>::type>
   static Future<> MakeFinished(Status s = Status::OK()) {
     return MakeFinished(E::ToResult(std::move(s)));
   }
 
+  struct WrapResultyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(*impl.CastResult<ValueType>());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  struct WrapStatusyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      static_assert(std::is_same<internal::Empty, ValueType>::value,
+                    "Only callbacks for Future<> should accept Status and not Result");
+
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(impl.CastResult<ValueType>()->status());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  template <typename OnComplete>
+  using WrapOnComplete = typename std::conditional<
+      detail::first_arg_is_status<OnComplete>::value, WrapStatusyOnComplete,
+      WrapResultyOnComplete>::type::template Callback<OnComplete>;
+
   /// \brief Consumer API: Register a callback to run when this future completes
   ///
   /// The callback should receive the result of the future (const Result<T>&)
-  /// For a void or statusy future this should be
-  /// (const Result<detail::Empty>& result)
+  /// For a void or statusy future this should be (const Status&)
   ///
   /// There is no guarantee to the order in which callbacks will run.  In
   /// particular, callbacks added while the future is being marked complete
@@ -384,18 +533,18 @@ class ARROW_MUST_USE_TYPE Future {
   ///
   /// {
   ///     auto fut = Future<>::Make();
-  ///     fut.AddCallback([fut](...) {});
+  ///     fut.AddCallback([fut]() {});
   /// }
   ///
   /// In this example `fut` falls out of scope but is not destroyed because it holds a
   /// cyclic reference to itself through the callback.
-  template <typename OnComplete>
-  void AddCallback(OnComplete on_complete) const {
+  template <typename OnComplete, typename Callback = WrapOnComplete<OnComplete>>
+  void AddCallback(OnComplete on_complete,
+                   CallbackOptions opts = CallbackOptions::Defaults()) const {
     // We know impl_ will not be dangling when invoking callbacks because at least one
     // thread will be waiting for MarkFinished to return. Thus it's safe to keep a
     // weak reference to impl_ here
-    impl_->AddCallback(
-        Callback<OnComplete>{WeakFuture<T>(*this), std::move(on_complete)});
+    impl_->AddCallback(Callback{std::move(on_complete)}, opts);
   }
 
   /// \brief Overload of AddCallback that will return false instead of running
@@ -411,14 +560,64 @@ class ARROW_MUST_USE_TYPE Future {
   ///
   /// Returns true if a callback was actually added and false if the callback failed
   /// to add because the future was marked complete.
-  template <typename CallbackFactory>
-  bool TryAddCallback(const CallbackFactory& callback_factory) const {
-    return impl_->TryAddCallback([this, &callback_factory]() {
-      return Callback<detail::result_of_t<CallbackFactory()>>{WeakFuture<T>(*this),
-                                                              callback_factory()};
-    });
+  template <typename CallbackFactory,
+            typename OnComplete = detail::result_of_t<CallbackFactory()>,
+            typename Callback = WrapOnComplete<OnComplete>>
+  bool TryAddCallback(const CallbackFactory& callback_factory,
+                      CallbackOptions opts = CallbackOptions::Defaults()) const {
+    return impl_->TryAddCallback([&]() { return Callback{callback_factory()}; }, opts);
   }
 
+  template <typename OnSuccess, typename OnFailure>
+  struct ThenOnComplete {
+    static constexpr bool has_no_args =
+        internal::call_traits::argument_count<OnSuccess>::value == 0;
+
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    static_assert(
+        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
+                     ContinuedFuture>::value,
+        "OnSuccess and OnFailure must continue with the same future type");
+
+    struct DummyOnSuccess {
+      void operator()(const T&);
+    };
+    using OnSuccessArg = typename std::decay<internal::call_traits::argument_type<
+        0, detail::if_has_no_args<OnSuccess, DummyOnSuccess, OnSuccess>>>::type;
+
+    static_assert(
+        !std::is_same<OnSuccessArg, typename EnsureResult<OnSuccessArg>::type>::value,
+        "OnSuccess' argument should not be a Result");
+
+    void operator()(const Result<T>& result) && {
+      detail::ContinueFuture continue_future;
+      if (ARROW_PREDICT_TRUE(result.ok())) {
+        // move on_failure to a(n immediately destroyed) temporary to free its resources
+        ARROW_UNUSED(OnFailure(std::move(on_failure)));
+        continue_future.IgnoringArgsIf(
+            detail::if_has_no_args<OnSuccess, std::true_type, std::false_type>{},
+            std::move(next), std::move(on_success), result.ValueOrDie());
+      } else {
+        ARROW_UNUSED(OnSuccess(std::move(on_success)));
+        continue_future(std::move(next), std::move(on_failure), result.status());
+      }
+    }
+
+    OnSuccess on_success;
+    OnFailure on_failure;
+    ContinuedFuture next;
+  };
+
+  template <typename OnSuccess>
+  struct PassthruOnFailure {
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    Result<typename ContinuedFuture::ValueType> operator()(const Status& s) { return s; }
+  };
+
   /// \brief Consumer API: Register a continuation to run when this future completes
   ///
   /// The continuation will run in the same thread that called MarkFinished (whatever
@@ -427,8 +626,10 @@ class ARROW_MUST_USE_TYPE Future {
   /// returning the future.
   ///
   /// Two callbacks are supported:
-  /// - OnSuccess, called against the result (const ValueType&) on successul completion.
-  /// - OnFailure, called against the error (const Status&) on failed completion.
+  /// - OnSuccess, called with the result (const ValueType&) on successul completion.
+  ///              for an empty future this will be called with nothing ()
+  /// - OnFailure, called with the error (const Status&) on failed completion.
+  ///              This callback is optional and defaults to a passthru of any errors.
   ///
   /// Then() returns a Future whose ValueType is derived from the return type of the
   /// callbacks. If a callback returns:
@@ -451,51 +652,18 @@ class ARROW_MUST_USE_TYPE Future {
   /// and the returned future may already be marked complete.
   ///
   /// See AddCallback for general considerations when writing callbacks.
-  template <typename OnSuccess, typename OnFailure,
-            typename ContinuedFuture =
-                detail::ContinueFuture::ForSignature<OnSuccess && (const T&)>>
-  ContinuedFuture Then(OnSuccess on_success, OnFailure on_failure) const {
-    static_assert(
-        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
-                     ContinuedFuture>::value,
-        "OnSuccess and OnFailure must continue with the same future type");
-
+  template <typename OnSuccess, typename OnFailure = PassthruOnFailure<OnSuccess>,
+            typename OnComplete = ThenOnComplete<OnSuccess, OnFailure>,
+            typename ContinuedFuture = typename OnComplete::ContinuedFuture>
+  ContinuedFuture Then(OnSuccess on_success, OnFailure on_failure = {},
+                       CallbackOptions options = CallbackOptions::Defaults()) const {
     auto next = ContinuedFuture::Make();
-
-    struct Callback {
-      void operator()(const Result<T>& result) && {
-        detail::ContinueFuture continue_future;
-        if (ARROW_PREDICT_TRUE(result.ok())) {
-          // move on_failure to a(n immediately destroyed) temporary to free its resources
-          ARROW_UNUSED(OnFailure(std::move(on_failure)));
-          continue_future(std::move(next), std::move(on_success), result.ValueOrDie());
-        } else {
-          ARROW_UNUSED(OnSuccess(std::move(on_success)));
-          continue_future(std::move(next), std::move(on_failure), result.status());
-        }
-      }
-
-      OnSuccess on_success;
-      OnFailure on_failure;
-      ContinuedFuture next;
-    };
-
-    AddCallback(Callback{std::forward<OnSuccess>(on_success),
-                         std::forward<OnFailure>(on_failure), next});
-
+    AddCallback(OnComplete{std::forward<OnSuccess>(on_success),
+                           std::forward<OnFailure>(on_failure), next},
+                options);
     return next;
   }
 
-  /// \brief Overload without OnFailure. Failures will be passed through unchanged.
-  template <typename OnSuccess,
-            typename ContinuedFuture =
-                detail::ContinueFuture::ForSignature<OnSuccess && (const T&)>>
-  ContinuedFuture Then(OnSuccess&& on_success) const {
-    return Then(std::forward<OnSuccess>(on_success), [](const Status& s) {
-      return Result<typename ContinuedFuture::ValueType>(s);
-    });
-  }
-
   /// \brief Implicit constructor to create a finished future from a value
   Future(ValueType val) : Future() {  // NOLINT runtime/explicit
     impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
@@ -519,20 +687,18 @@ class ARROW_MUST_USE_TYPE Future {
       : Future(Result<ValueType>(std::move(s))) {}
 
  protected:
-  template <typename OnComplete>
-  struct Callback {
-    void operator()() && {
-      auto self = weak_self.get();
-      std::move(on_complete)(*self.GetResult());
+  void InitializeFromResult(Result<ValueType> res) {
+    if (ARROW_PREDICT_TRUE(res.ok())) {
+      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    } else {
+      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
     }
+    SetResult(std::move(res));
+  }
 
-    WeakFuture<T> weak_self;
-    OnComplete on_complete;
-  };
+  void Initialize() { impl_ = FutureImpl::Make(); }
 
-  Result<ValueType>* GetResult() const {
-    return static_cast<Result<ValueType>*>(impl_->result_.get());
-  }
+  Result<ValueType>* GetResult() const { return impl_->CastResult<ValueType>(); }
 
   void SetResult(Result<ValueType> res) {
     impl_->result_ = {new Result<ValueType>(std::move(res)),
@@ -573,6 +739,17 @@ class ARROW_MUST_USE_TYPE Future {
   FRIEND_TEST(FutureRefTest, HeadRemoved);
 };
 
+template <typename T>
+typename Future<T>::SyncType FutureToSync(const Future<T>& fut) {
+  return fut.result();
+}
+
+template <>
+inline typename Future<internal::Empty>::SyncType FutureToSync<internal::Empty>(
+    const Future<internal::Empty>& fut) {
+  return fut.status();
+}
+
 template <typename T>
 class WeakFuture {
  public:
@@ -652,6 +829,9 @@ Future<std::vector<Result<T>>> All(std::vector<Future<T>> futures) {
   return out;
 }
 
+template <>
+inline Future<>::Future(Status s) : Future(internal::Empty::ToResult(std::move(s))) {}
+
 /// \brief Create a Future which completes when all of `futures` complete.
 ///
 /// The future will be marked complete if all `futures` complete
@@ -691,26 +871,26 @@ struct Continue {
   }
 };
 
-template <typename T = detail::Empty>
+template <typename T = internal::Empty>
 util::optional<T> Break(T break_value = {}) {
   return util::optional<T>{std::move(break_value)};
 }
 
-template <typename T = detail::Empty>
+template <typename T = internal::Empty>
 using ControlFlow = util::optional<T>;
 
 /// \brief Loop through an asynchronous sequence
 ///
-/// \param[in] iterate A generator of Future<ControlFlow<BreakValue>>. On completion of
-/// each yielded future the resulting ControlFlow will be examined. A Break will terminate
-/// the loop, while a Continue will re-invoke `iterate`. \return A future which will
-/// complete when a Future returned by iterate completes with a Break
+/// \param[in] iterate A generator of Future<ControlFlow<BreakValue>>. On completion
+/// of each yielded future the resulting ControlFlow will be examined. A Break will
+/// terminate the loop, while a Continue will re-invoke `iterate`.
+///
+/// \return A future which will complete when a Future returned by iterate completes with
+/// a Break
 template <typename Iterate,
           typename Control = typename detail::result_of_t<Iterate()>::ValueType,
           typename BreakValueType = typename Control::value_type>
 Future<BreakValueType> Loop(Iterate iterate) {
-  auto break_fut = Future<BreakValueType>::Make();
-
   struct Callback {
     bool CheckForTermination(const Result<Control>& control_res) {
       if (!control_res.ok()) {
@@ -718,7 +898,7 @@ Future<BreakValueType> Loop(Iterate iterate) {
         return true;
       }
       if (control_res->has_value()) {
-        break_fut.MarkFinished(*std::move(*control_res));
+        break_fut.MarkFinished(**control_res);
         return true;
       }
       return false;
@@ -753,10 +933,35 @@ Future<BreakValueType> Loop(Iterate iterate) {
     Future<BreakValueType> break_fut;
   };
 
+  auto break_fut = Future<BreakValueType>::Make();
   auto control_fut = iterate();
   control_fut.AddCallback(Callback{std::move(iterate), break_fut});
 
   return break_fut;
 }
 
+inline Future<> ToFuture(Status status) {
+  return Future<>::MakeFinished(std::move(status));
+}
+
+template <typename T>
+Future<T> ToFuture(T value) {
+  return Future<T>::MakeFinished(std::move(value));
+}
+
+template <typename T>
+Future<T> ToFuture(Result<T> maybe_value) {
+  return Future<T>::MakeFinished(std::move(maybe_value));
+}
+
+template <typename T>
+Future<T> ToFuture(Future<T> fut) {
+  return std::move(fut);
+}
+
+template <typename T>
+struct EnsureFuture {
+  using type = decltype(ToFuture(std::declval<T>()));
+};
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc
index a505df5ef04..0db355433e8 100644
--- a/cpp/src/arrow/util/future_test.cc
+++ b/cpp/src/arrow/util/future_test.cc
@@ -27,13 +27,16 @@
 #include <random>
 #include <string>
 #include <thread>
+#include <unordered_set>
 #include <vector>
 
 #include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 
+#include "arrow/testing/executor_util.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/thread_pool.h"
 
@@ -261,21 +264,17 @@ TEST(FutureSyncTest, Empty) {
     // MakeFinished()
     auto fut = Future<>::MakeFinished();
     AssertSuccessful(fut);
-    auto res = fut.result();
-    ASSERT_OK(res);
-    res = std::move(fut.result());
-    ASSERT_OK(res);
   }
   {
     // MarkFinished(Status)
     auto fut = Future<>::Make();
     AssertNotFinished(fut);
-    fut.MarkFinished(Status::OK());
+    fut.MarkFinished();
     AssertSuccessful(fut);
   }
   {
     // MakeFinished(Status)
-    auto fut = Future<>::MakeFinished(Status::OK());
+    auto fut = Future<>::MakeFinished();
     AssertSuccessful(fut);
     fut = Future<>::MakeFinished(Status::IOError("xxx"));
     AssertFailed(fut);
@@ -352,8 +351,7 @@ TEST(FutureRefTest, ChainRemoved) {
   std::weak_ptr<FutureImpl> ref2;
   {
     auto fut = Future<>::Make();
-    auto fut2 =
-        fut.Then([](const Result<detail::Empty>& status) { return Status::OK(); });
+    auto fut2 = fut.Then([]() { return Status::OK(); });
     ref = fut.impl_;
     ref2 = fut2.impl_;
   }
@@ -362,7 +360,7 @@ TEST(FutureRefTest, ChainRemoved) {
 
   {
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](const Result<detail::Empty>&) { return Future<>::Make(); });
+    auto fut2 = fut.Then([]() { return Future<>::Make(); });
     ref = fut.impl_;
     ref2 = fut2.impl_;
   }
@@ -377,7 +375,7 @@ TEST(FutureRefTest, TailRemoved) {
   bool side_effect_run = false;
   {
     ref = std::make_shared<Future<>>(Future<>::Make());
-    auto fut2 = ref->Then([&side_effect_run](const Result<detail::Empty>& status) {
+    auto fut2 = ref->Then([&side_effect_run]() {
       side_effect_run = true;
       return Status::OK();
     });
@@ -392,22 +390,20 @@ TEST(FutureRefTest, TailRemoved) {
 
 TEST(FutureRefTest, HeadRemoved) {
   // Keeping the tail of the future chain should not keep the entire chain alive.  If no
-  // one has a reference to the head then there is no need to keep it, nothing will finish
-  // it.  In theory the intermediate futures could be finished by some external process
-  // but that would be highly unusual and bad practice so in reality this would just be a
-  // reference to a future that will never complete which is ok.
+  // one has a reference to the head then the future is abandoned.  TODO (ARROW-12207):
+  // detect abandonment.
   std::weak_ptr<FutureImpl> ref;
   std::shared_ptr<Future<>> ref2;
   {
     auto fut = std::make_shared<Future<>>(Future<>::Make());
     ref = fut->impl_;
-    ref2 = std::make_shared<Future<>>(fut->Then([](...) {}));
+    ref2 = std::make_shared<Future<>>(fut->Then([]() {}));
   }
   ASSERT_TRUE(ref.expired());
 
   {
     auto fut = Future<>::Make();
-    ref2 = std::make_shared<Future<>>(fut.Then([&](...) {
+    ref2 = std::make_shared<Future<>>(fut.Then([&]() {
       auto intermediate = Future<>::Make();
       ref = intermediate.impl_;
       return intermediate;
@@ -434,7 +430,8 @@ TEST(FutureStressTest, Callback) {
       auto test_thread = std::this_thread::get_id();
       while (!finished.load()) {
         fut.AddCallback([&test_thread, &count_finished_immediately,
-                         &count_finished_deferred](const Result<detail::Empty>& result) {
+                         &count_finished_deferred](const Status& status) {
+          ARROW_EXPECT_OK(status);
           if (std::this_thread::get_id() == test_thread) {
             count_finished_immediately++;
           } else {
@@ -483,14 +480,15 @@ TEST(FutureStressTest, TryAddCallback) {
 
     std::thread callback_adder([&] {
       callback_adder_thread_id = std::this_thread::get_id();
-      std::function<void(const Result<detail::Empty>&)> callback =
-          [&callback_adder_thread_id](const Result<detail::Empty>&) {
+      std::function<void(const Status&)> callback =
+          [&callback_adder_thread_id](const Status& st) {
+            ARROW_EXPECT_OK(st);
             if (std::this_thread::get_id() == callback_adder_thread_id) {
               FAIL() << "TryAddCallback allowed a callback to be run synchronously";
             }
           };
-      std::function<std::function<void(const Result<detail::Empty>&)>()>
-          callback_factory = [&callback]() { return callback; };
+      std::function<std::function<void(const Status&)>()> callback_factory =
+          [&callback]() { return callback; };
       while (true) {
         auto callback_added = fut.TryAddCallback(callback_factory);
         if (callback_added) {
@@ -541,7 +539,7 @@ TEST(FutureCompletionTest, Void) {
   {
     // Propagate failure by returning it from on_failure
     auto fut = Future<int>::Make();
-    auto fut2 = fut.Then([](...) {}, [](const Status& s) { return s; });
+    auto fut2 = fut.Then([](const int&) {}, [](const Status& s) { return s; });
     fut.MarkFinished(Status::IOError("xxx"));
     AssertFailed(fut2);
     ASSERT_TRUE(fut2.status().IsIOError());
@@ -549,7 +547,7 @@ TEST(FutureCompletionTest, Void) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](const Result<detail::Empty>&) {});
+    auto fut2 = fut.Then([]() {});
     fut.MarkFinished();
     AssertSuccessful(fut2);
   }
@@ -557,9 +555,9 @@ TEST(FutureCompletionTest, Void) {
     // Propagate failure by not having on_failure
     auto fut = Future<>::Make();
     auto cb_was_run = false;
-    auto fut2 = fut.Then([&cb_was_run](const Result<detail::Empty>& res) {
+    auto fut2 = fut.Then([&cb_was_run]() {
       cb_was_run = true;
-      return res;
+      return Status::OK();
     });
     fut.MarkFinished(Status::IOError("xxx"));
     AssertFailed(fut2);
@@ -569,7 +567,7 @@ TEST(FutureCompletionTest, Void) {
     // Swallow failure by catching in on_failure
     auto fut = Future<>::Make();
     Status status_seen = Status::OK();
-    auto fut2 = fut.Then([](...) {},
+    auto fut2 = fut.Then([]() {},
                          [&status_seen](const Status& s) {
                            status_seen = s;
                            return Status::OK();
@@ -626,7 +624,7 @@ TEST(FutureCompletionTest, NonVoid) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](...) { return 42; });
+    auto fut2 = fut.Then([]() { return 42; });
     fut.MarkFinished();
     AssertSuccessful(fut2);
     auto result = *fut2.result();
@@ -702,7 +700,7 @@ TEST(FutureCompletionTest, FutureNonVoid) {
     // From void
     auto fut = Future<>::Make();
     auto innerFut = Future<std::string>::Make();
-    auto fut2 = fut.Then([&innerFut](...) { return innerFut; });
+    auto fut2 = fut.Then([&innerFut]() { return innerFut; });
     fut.MarkFinished();
     AssertNotFinished(fut2);
     innerFut.MarkFinished("hello");
@@ -716,7 +714,7 @@ TEST(FutureCompletionTest, FutureNonVoid) {
     auto innerFut = Future<std::string>::Make();
     auto was_cb_run = false;
     auto fut2 = fut.Then(
-        [&innerFut, &was_cb_run](...) {
+        [&innerFut, &was_cb_run]() {
           was_cb_run = true;
           return Result<Future<std::string>>(innerFut);
         },
@@ -775,7 +773,7 @@ TEST(FutureCompletionTest, Status) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](const Result<detail::Empty>& res) { return Status::OK(); });
+    auto fut2 = fut.Then([]() { return Status::OK(); });
     fut.MarkFinished();
     AssertSuccessful(fut2);
   }
@@ -784,7 +782,7 @@ TEST(FutureCompletionTest, Status) {
     auto fut = Future<>::Make();
     auto was_cb_run = false;
     auto fut2 = fut.Then(
-        [&was_cb_run](const Result<detail::Empty>& res) {
+        [&was_cb_run]() {
           was_cb_run = true;
           return Status::OK();
         },
@@ -846,7 +844,7 @@ TEST(FutureCompletionTest, Result) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](...) { return Result<int>(42); });
+    auto fut2 = fut.Then([]() { return Result<int>(42); });
     fut.MarkFinished();
     AssertSuccessful(fut2);
     auto result = *fut2.result();
@@ -857,7 +855,7 @@ TEST(FutureCompletionTest, Result) {
     auto fut = Future<>::Make();
     auto was_cb_run = false;
     auto fut2 = fut.Then(
-        [&was_cb_run](...) {
+        [&was_cb_run]() {
           was_cb_run = true;
           return Result<int>(42);
         },
@@ -938,7 +936,7 @@ TEST(FutureCompletionTest, FutureVoid) {
     // From void
     auto fut = Future<>::Make();
     auto innerFut = Future<>::Make();
-    auto fut2 = fut.Then([&innerFut](...) { return innerFut; });
+    auto fut2 = fut.Then([&innerFut]() { return innerFut; });
     fut.MarkFinished();
     AssertNotFinished(fut2);
     innerFut.MarkFinished();
@@ -948,13 +946,177 @@ TEST(FutureCompletionTest, FutureVoid) {
     // Propagate failure by returning failure
     auto fut = Future<>::Make();
     auto innerFut = Future<>::Make();
-    auto fut2 = fut.Then([&innerFut](...) { return innerFut; },
+    auto fut2 = fut.Then([&innerFut]() { return innerFut; },
                          [](const Status& s) { return Future<>::MakeFinished(s); });
     fut.MarkFinished(Status::IOError("xxx"));
     AssertFailed(fut2);
   }
 }
 
+class FutureSchedulingTest : public testing::Test {
+ public:
+  internal::Executor* executor() { return mock_executor.get(); }
+
+  int spawn_count() { return static_cast<int>(mock_executor->captured_tasks.size()); }
+
+  void AssertRunSynchronously(const std::vector<int>& ids) { AssertIds(ids, true); }
+
+  void AssertScheduled(const std::vector<int>& ids) { AssertIds(ids, false); }
+
+  void AssertIds(const std::vector<int>& ids, bool should_be_synchronous) {
+    for (auto id : ids) {
+      ASSERT_EQ(should_be_synchronous, callbacks_run_synchronously.find(id) !=
+                                           callbacks_run_synchronously.end());
+    }
+  }
+
+  std::function<void(const Status&)> callback(int id) {
+    return [this, id](const Status&) { callbacks_run_synchronously.insert(id); };
+  }
+
+  std::shared_ptr<DelayedExecutor> mock_executor = std::make_shared<DelayedExecutor>();
+  std::unordered_set<int> callbacks_run_synchronously;
+};
+
+TEST_F(FutureSchedulingTest, ScheduleNever) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::Never;
+  options.executor = executor();
+  // Successful future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(1), options);
+    fut.MarkFinished();
+    fut.AddCallback(callback(2), options);
+    ASSERT_EQ(0, spawn_count());
+    AssertRunSynchronously({1, 2});
+  }
+  // Failing future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(3), options);
+    fut.MarkFinished(Status::Invalid("XYZ"));
+    fut.AddCallback(callback(4), options);
+    ASSERT_EQ(0, spawn_count());
+    AssertRunSynchronously({3, 4});
+  }
+}
+
+TEST_F(FutureSchedulingTest, ScheduleAlways) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::Always;
+  options.executor = executor();
+  // Successful future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(1), options);
+    fut.MarkFinished();
+    fut.AddCallback(callback(2), options);
+    ASSERT_EQ(2, spawn_count());
+    AssertScheduled({1, 2});
+  }
+  // Failing future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(3), options);
+    fut.MarkFinished(Status::Invalid("XYZ"));
+    fut.AddCallback(callback(4), options);
+    ASSERT_EQ(4, spawn_count());
+    AssertScheduled({3, 4});
+  }
+}
+
+TEST_F(FutureSchedulingTest, ScheduleIfUnfinished) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::IfUnfinished;
+  options.executor = executor();
+  // Successful future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(1), options);
+    fut.MarkFinished();
+    fut.AddCallback(callback(2), options);
+    ASSERT_EQ(1, spawn_count());
+    AssertRunSynchronously({2});
+    AssertScheduled({1});
+  }
+  // Failing future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(3), options);
+    fut.MarkFinished(Status::Invalid("XYZ"));
+    fut.AddCallback(callback(4), options);
+    ASSERT_EQ(2, spawn_count());
+    AssertRunSynchronously({4});
+    AssertScheduled({3});
+  }
+}
+
+TEST_F(FutureSchedulingTest, ScheduleIfDifferentExecutor) {
+  struct : internal::Executor {
+    int GetCapacity() override { return pool_->GetCapacity(); }
+
+    bool OwnsThisThread() override { return pool_->OwnsThisThread(); }
+
+    Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task,
+                     StopToken stop_token, StopCallback&& stop_callback) override {
+      ++spawn_count;
+      return pool_->Spawn(hints, std::move(task), std::move(stop_token),
+                          std::move(stop_callback));
+    }
+
+    std::atomic<int> spawn_count{0};
+    internal::Executor* pool_ = internal::GetCpuThreadPool();
+  } executor;
+
+  CallbackOptions options;
+  options.executor = &executor;
+  options.should_schedule = ShouldSchedule::IfDifferentExecutor;
+  auto pass_err = [](const Status& s) { return s; };
+
+  std::atomic<bool> fut0_on_executor{false};
+  std::atomic<bool> fut1_on_executor{false};
+
+  auto fut0 = Future<>::Make();
+  auto fut1 = Future<>::Make();
+
+  auto fut0_done = fut0.Then(
+      [&] {
+        // marked finished on main thread -> must be scheduled to executor
+        fut0_on_executor.store(executor.OwnsThisThread());
+
+        fut1.MarkFinished();
+      },
+      pass_err, options);
+
+  auto fut1_done = fut1.Then(
+      [&] {
+        // marked finished on executor -> no need to schedule
+        fut1_on_executor.store(executor.OwnsThisThread());
+      },
+      pass_err, options);
+
+  fut0.MarkFinished();
+
+  AllComplete({fut0_done, fut1_done}).Wait();
+
+  ASSERT_EQ(executor.spawn_count, 1);
+  ASSERT_TRUE(fut0_on_executor);
+  ASSERT_TRUE(fut1_on_executor);
+}
+
+TEST_F(FutureSchedulingTest, ScheduleAlwaysKeepsFutureAliveUntilCallback) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::Always;
+  options.executor = executor();
+  {
+    auto fut = Future<int>::Make();
+    fut.AddCallback([](const Result<int> val) { ASSERT_EQ(7, *val); }, options);
+    fut.MarkFinished(7);
+  }
+  std::move(mock_executor->captured_tasks[0])();
+}
+
 TEST(FutureAllTest, Empty) {
   auto combined = arrow::All(std::vector<Future<int>>{});
   auto after_assert = combined.Then(
@@ -1080,7 +1242,7 @@ TEST(FutureLoopTest, Sync) {
 
 TEST(FutureLoopTest, EmptyBreakValue) {
   Future<> none_fut =
-      Loop([&] { return Future<>::MakeFinished().Then([&](...) { return Break(); }); });
+      Loop([&] { return Future<>::MakeFinished().Then([&]() { return Break(); }); });
   AssertSuccessful(none_fut);
 }
 
@@ -1145,7 +1307,7 @@ TEST(FutureLoopTest, AllowsBreakFutToBeDiscarded) {
     }
     return Future<ControlFlow<int>>::MakeFinished(Break(-1));
   };
-  auto loop_fut = Loop(loop_body).Then([](...) { return Status::OK(); });
+  auto loop_fut = Loop(loop_body).Then([](const int&) { return Status::OK(); });
   ASSERT_TRUE(loop_fut.Wait(0.1));
 }
 
@@ -1175,7 +1337,7 @@ class MoveTrackingCallable {
     return *this;
   }
 
-  Status operator()(...) {
+  Status operator()() {
     // std::cout << "TRIGGER" << std::endl;
     if (valid_) {
       return Status::OK();
@@ -1197,7 +1359,7 @@ TEST(FutureCompletionTest, ReuseCallback) {
     continuation = fut.Then(callback);
   }
 
-  fut.MarkFinished(Status::OK());
+  fut.MarkFinished();
 
   ASSERT_TRUE(continuation.is_finished());
   if (continuation.is_finished()) {
@@ -1597,6 +1759,45 @@ TEST(FnOnceTest, MoveOnlyDataType) {
   ASSERT_EQ(i1.moves, 0);
 }
 
-}  // namespace internal
+TEST(FutureTest, MatcherExamples) {
+  EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
+              Finishes(Raises(StatusCode::Invalid)));
+
+  EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
+              Finishes(Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary"))));
+
+  // message doesn't match, so no match
+  EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
+              Finishes(testing::Not(
+                  Raises(StatusCode::Invalid, testing::HasSubstr("reasonable")))));
 
+  // different error code, so no match
+  EXPECT_THAT(Future<int>::MakeFinished(Status::TypeError("arbitrary error")),
+              Finishes(testing::Not(Raises(StatusCode::Invalid))));
+
+  // not an error, so no match
+  EXPECT_THAT(Future<int>::MakeFinished(333),
+              Finishes(testing::Not(Raises(StatusCode::Invalid))));
+
+  EXPECT_THAT(Future<std::string>::MakeFinished("hello world"),
+              Finishes(ResultWith(testing::HasSubstr("hello"))));
+
+  // Matcher waits on Futures
+  auto string_fut = Future<std::string>::Make();
+  auto finisher = std::thread([&] {
+    SleepABit();
+    string_fut.MarkFinished("hello world");
+  });
+  EXPECT_THAT(string_fut, Finishes(ResultWith(testing::HasSubstr("hello"))));
+  finisher.join();
+
+  EXPECT_THAT(Future<std::string>::MakeFinished(Status::Invalid("XXX")),
+              Finishes(testing::Not(ResultWith(testing::HasSubstr("hello")))));
+
+  // holds a value, but that value doesn't match the given pattern
+  EXPECT_THAT(Future<std::string>::MakeFinished("foo bar"),
+              Finishes(testing::Not(ResultWith(testing::HasSubstr("hello")))));
+}
+
+}  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index f55ac88fb91..09076c54d3c 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -329,8 +329,7 @@ class HashTable {
 
     // Stash old entries and seal builder, effectively resetting the Buffer
     const Entry* old_entries = entries_;
-    std::shared_ptr<Buffer> previous;
-    RETURN_NOT_OK(entries_builder_.Finish(&previous));
+    ARROW_ASSIGN_OR_RAISE(auto previous, entries_builder_.FinishWithLength(capacity_));
     // Allocate new buffer
     RETURN_NOT_OK(UpsizeBuffer(new_capacity));
 
@@ -461,6 +460,13 @@ class ScalarMemoTable : public MemoTable {
         out_data[index] = entry->payload.value;
       }
     });
+    // Zero-initialize the null entry
+    if (null_index_ != kKeyNotFound) {
+      int32_t index = null_index_ - start;
+      if (index >= 0) {
+        out_data[index] = Scalar{};
+      }
+    }
   }
 
   void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
@@ -775,6 +781,8 @@ class BinaryMemoTable : public MemoTable {
     if (left_size > 0) {
       memcpy(out_data, in_data + left_offset, left_size);
     }
+    // Zero-initialize the null entry
+    memset(out_data + left_size, 0, width_size);
 
     auto right_size = values_size() - static_cast<size_t>(null_data_offset);
     if (right_size > 0) {
diff --git a/cpp/src/arrow/util/int_util_internal.h b/cpp/src/arrow/util/int_util_internal.h
index de39229cfdd..4136706629f 100644
--- a/cpp/src/arrow/util/int_util_internal.h
+++ b/cpp/src/arrow/util/int_util_internal.h
@@ -63,6 +63,27 @@ OPS_WITH_OVERFLOW(DivideWithOverflow, div)
 #undef OP_WITH_OVERFLOW
 #undef OPS_WITH_OVERFLOW
 
+// Define function NegateWithOverflow with the signature `bool(T u, T* out)`
+// where T is a signed integer type.  On overflow, these functions return true.
+// Otherwise, false is returned and `out` is updated with the result of the
+// operation.
+
+#define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \
+  static inline bool _func_name(_type u, _type* out) {                    \
+    return !psnip_safe_##_psnip_type##_##_psnip_op(out, u);               \
+  }
+
+#define SIGNED_UNARY_OPS_WITH_OVERFLOW(_func_name, _psnip_op)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64)
+
+SIGNED_UNARY_OPS_WITH_OVERFLOW(NegateWithOverflow, neg)
+
+#undef UNARY_OP_WITH_OVERFLOW
+#undef SIGNED_UNARY_OPS_WITH_OVERFLOW
+
 /// Signed addition with well-defined behaviour on overflow (as unsigned)
 template <typename SignedInt>
 SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
@@ -79,6 +100,13 @@ SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
                                 static_cast<UnsignedInt>(v));
 }
 
+/// Signed negation with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedNegate(SignedInt u) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1);
+}
+
 /// Signed left shift with well-defined behaviour on negative numbers or overflow
 template <typename SignedInt, typename Shift>
 SignedInt SafeLeftShift(SignedInt u, Shift shift) {
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 5e6da2fb9d6..f6566ea7e36 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -41,6 +41,7 @@
 #include <random>
 #include <sstream>
 #include <string>
+#include <thread>
 #include <utility>
 #include <vector>
 
@@ -99,76 +100,6 @@ namespace arrow {
 
 using internal::checked_cast;
 
-namespace io {
-
-//
-// StdoutStream implementation
-//
-
-StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
-
-Status StdoutStream::Close() { return Status::OK(); }
-
-bool StdoutStream::closed() const { return false; }
-
-Result<int64_t> StdoutStream::Tell() const { return pos_; }
-
-Status StdoutStream::Write(const void* data, int64_t nbytes) {
-  pos_ += nbytes;
-  std::cout.write(reinterpret_cast<const char*>(data), nbytes);
-  return Status::OK();
-}
-
-//
-// StderrStream implementation
-//
-
-StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
-
-Status StderrStream::Close() { return Status::OK(); }
-
-bool StderrStream::closed() const { return false; }
-
-Result<int64_t> StderrStream::Tell() const { return pos_; }
-
-Status StderrStream::Write(const void* data, int64_t nbytes) {
-  pos_ += nbytes;
-  std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
-  return Status::OK();
-}
-
-//
-// StdinStream implementation
-//
-
-StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
-
-Status StdinStream::Close() { return Status::OK(); }
-
-bool StdinStream::closed() const { return false; }
-
-Result<int64_t> StdinStream::Tell() const { return pos_; }
-
-Result<int64_t> StdinStream::Read(int64_t nbytes, void* out) {
-  std::cin.read(reinterpret_cast<char*>(out), nbytes);
-  if (std::cin) {
-    pos_ += nbytes;
-    return nbytes;
-  } else {
-    return 0;
-  }
-}
-
-Result<std::shared_ptr<Buffer>> StdinStream::Read(int64_t nbytes) {
-  ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes));
-  ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
-  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
-  buffer->ZeroPadding();
-  return std::move(buffer);
-}
-
-}  // namespace io
-
 namespace internal {
 
 namespace {
@@ -472,11 +403,18 @@ namespace {
 
 Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents) {
 #ifdef _WIN32
-  if (CreateDirectoryW(dir_path.ToNative().c_str(), nullptr)) {
+  const auto s = dir_path.ToNative().c_str();
+  if (CreateDirectoryW(s, nullptr)) {
     return true;
   }
   int errnum = GetLastError();
   if (errnum == ERROR_ALREADY_EXISTS) {
+    const auto attrs = GetFileAttributesW(s);
+    if (attrs == INVALID_FILE_ATTRIBUTES || !(attrs & FILE_ATTRIBUTE_DIRECTORY)) {
+      // Note we propagate the original error, not the GetFileAttributesW() error
+      return IOErrorFromWinError(ERROR_ALREADY_EXISTS, "Cannot create directory '",
+                                 dir_path.ToString(), "': non-directory entry exists");
+    }
     return false;
   }
   if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
@@ -489,10 +427,17 @@ Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents)
   return IOErrorFromWinError(GetLastError(), "Cannot create directory '",
                              dir_path.ToString(), "'");
 #else
-  if (mkdir(dir_path.ToNative().c_str(), S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
+  const auto s = dir_path.ToNative().c_str();
+  if (mkdir(s, S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
     return true;
   }
   if (errno == EEXIST) {
+    struct stat st;
+    if (stat(s, &st) || !S_ISDIR(st.st_mode)) {
+      // Note we propagate the original errno, not the stat() errno
+      return IOErrorFromErrno(EEXIST, "Cannot create directory '", dir_path.ToString(),
+                              "': non-directory entry exists");
+    }
     return false;
   }
   if (create_parents && errno == ENOENT) {
@@ -1720,5 +1665,21 @@ int64_t GetRandomSeed() {
   return static_cast<int64_t>(seed_gen());
 }
 
+uint64_t GetThreadId() {
+  uint64_t equiv{0};
+  // std::thread::id is trivially copyable as per C++ spec,
+  // so type punning as a uint64_t should work
+  static_assert(sizeof(std::thread::id) <= sizeof(uint64_t),
+                "std::thread::id can't fit into uint64_t");
+  const auto tid = std::this_thread::get_id();
+  memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid));
+  return equiv;
+}
+
+uint64_t GetOptionalThreadId() {
+  auto tid = GetThreadId();
+  return (tid == 0) ? tid - 1 : tid;
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index 38bcdd4b41f..4255dd37105 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -30,73 +30,12 @@
 #include <signal.h>  // Needed for struct sigaction
 #endif
 
-#include "arrow/io/interfaces.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/windows_fixup.h"
 
 namespace arrow {
-
-class Buffer;
-
-namespace io {
-
-// Output stream that just writes to stdout.
-class ARROW_EXPORT StdoutStream : public OutputStream {
- public:
-  StdoutStream();
-  ~StdoutStream() override {}
-
-  Status Close() override;
-  bool closed() const override;
-
-  Result<int64_t> Tell() const override;
-
-  Status Write(const void* data, int64_t nbytes) override;
-
- private:
-  int64_t pos_;
-};
-
-// Output stream that just writes to stderr.
-class ARROW_EXPORT StderrStream : public OutputStream {
- public:
-  StderrStream();
-  ~StderrStream() override {}
-
-  Status Close() override;
-  bool closed() const override;
-
-  Result<int64_t> Tell() const override;
-
-  Status Write(const void* data, int64_t nbytes) override;
-
- private:
-  int64_t pos_;
-};
-
-// Input stream that just reads from stdin.
-class ARROW_EXPORT StdinStream : public InputStream {
- public:
-  StdinStream();
-  ~StdinStream() override {}
-
-  Status Close() override;
-  bool closed() const override;
-
-  Result<int64_t> Tell() const override;
-
-  Result<int64_t> Read(int64_t nbytes, void* out) override;
-
-  Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
-
- private:
-  int64_t pos_;
-};
-
-}  // namespace io
-
 namespace internal {
 
 // NOTE: 8-bit path strings on Windows are encoded using UTF-8.
@@ -399,5 +338,12 @@ Status SendSignalToThread(int signum, uint64_t thread_id);
 ARROW_EXPORT
 int64_t GetRandomSeed();
 
+/// \brief Get the current thread id
+///
+/// In addition to having the same properties as std::thread, the returned value
+/// is a regular integer value, which is more convenient than an opaque type.
+ARROW_EXPORT
+uint64_t GetThreadId();
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index a423ecd0152..c09e4b974dd 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -29,6 +29,7 @@
 #include <pthread.h>
 #endif
 
+#include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 
 #include "arrow/testing/gtest_util.h"
@@ -53,6 +54,12 @@ void AssertNotExists(const PlatformFilename& path) {
   ASSERT_FALSE(exists) << "Path '" << path.ToString() << "' exists";
 }
 
+void TouchFile(const PlatformFilename& path) {
+  int fd = -1;
+  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(path));
+  ASSERT_OK(FileClose(fd));
+}
+
 TEST(ErrnoFromStatus, Basics) {
   Status st;
   st = Status::OK();
@@ -370,7 +377,7 @@ TEST(CreateDirDeleteDir, Basics) {
   const std::string BASE =
       temp_dir->path().Join("xxx-io-util-test-dir2").ValueOrDie().ToString();
   bool created, deleted;
-  PlatformFilename parent, child;
+  PlatformFilename parent, child, child_file;
 
   ASSERT_OK_AND_ASSIGN(parent, PlatformFilename::FromString(BASE));
   ASSERT_EQ(parent.ToString(), BASE);
@@ -392,6 +399,11 @@ TEST(CreateDirDeleteDir, Basics) {
   ASSERT_TRUE(created);
   AssertExists(child);
 
+  ASSERT_OK_AND_ASSIGN(child_file, PlatformFilename::FromString(BASE + "/some-file"));
+  TouchFile(child_file);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("non-directory entry exists"), CreateDir(child_file));
+
   ASSERT_OK_AND_ASSIGN(deleted, DeleteDirTree(parent));
   ASSERT_TRUE(deleted);
   AssertNotExists(parent);
@@ -436,9 +448,7 @@ TEST(DeleteDirContents, Basics) {
   ASSERT_OK_AND_ASSIGN(child2, PlatformFilename::FromString(BASE + "/child-file"));
   ASSERT_OK_AND_ASSIGN(created, CreateDir(child1));
   ASSERT_TRUE(created);
-  int fd = -1;
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(child2));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(child2);
   AssertExists(child1);
   AssertExists(child2);
 
@@ -522,6 +532,14 @@ TEST(CreateDirTree, Basics) {
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("EF"));
   ASSERT_OK_AND_ASSIGN(created, CreateDirTree(fn));
   ASSERT_TRUE(created);
+
+  ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/file"));
+  TouchFile(fn);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("non-directory entry exists"), CreateDirTree(fn));
+
+  ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/file/sub"));
+  ASSERT_RAISES(IOError, CreateDirTree(fn));
 }
 
 TEST(ListDir, Basics) {
@@ -546,9 +564,7 @@ TEST(ListDir, Basics) {
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/EF/GH"));
   ASSERT_OK(CreateDirTree(fn));
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/ghi.txt"));
-  int fd = -1;
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(fn));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(fn);
 
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB"));
   ASSERT_OK_AND_ASSIGN(entries, ListDir(fn));
@@ -568,15 +584,13 @@ TEST(ListDir, Basics) {
 TEST(DeleteFile, Basics) {
   std::unique_ptr<TemporaryDir> temp_dir;
   PlatformFilename fn;
-  int fd;
   bool deleted;
 
   ASSERT_OK_AND_ASSIGN(temp_dir, TemporaryDir::Make("io-util-test-"));
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("test-file"));
 
   AssertNotExists(fn);
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(fn));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(fn);
   AssertExists(fn);
   ASSERT_OK_AND_ASSIGN(deleted, DeleteFile(fn));
   ASSERT_TRUE(deleted);
@@ -638,8 +652,7 @@ TEST(FileUtils, LongPaths) {
   AssertExists(long_path);
   ASSERT_OK_AND_ASSIGN(long_filename,
                        PlatformFilename::FromString(fs.str() + "/file.txt"));
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(long_filename));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(long_filename);
   AssertExists(long_filename);
   fd = -1;
   ASSERT_OK_AND_ASSIGN(fd, FileOpenReadable(long_filename));
diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h
index 4d9e7b18290..2f42803d26f 100644
--- a/cpp/src/arrow/util/iterator.h
+++ b/cpp/src/arrow/util/iterator.h
@@ -20,7 +20,6 @@
 #include <cassert>
 #include <functional>
 #include <memory>
-#include <queue>
 #include <tuple>
 #include <type_traits>
 #include <utility>
@@ -560,4 +559,10 @@ Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
   return Iterator<T>(FlattenIterator<T>(std::move(it)));
 }
 
+template <typename Reader>
+Iterator<typename Reader::ValueType> MakeIteratorFromReader(
+    const std::shared_ptr<Reader>& reader) {
+  return MakeFunctionIterator([reader] { return reader->Next(); });
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/iterator_test.cc b/cpp/src/arrow/util/iterator_test.cc
index 60b57dea1e2..ab62fcb7034 100644
--- a/cpp/src/arrow/util/iterator_test.cc
+++ b/cpp/src/arrow/util/iterator_test.cc
@@ -31,6 +31,7 @@
 #include "arrow/util/iterator.h"
 #include "arrow/util/test_common.h"
 #include "arrow/util/vector.h"
+
 namespace arrow {
 
 template <typename T>
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index 4c6af29dab7..ad3b686a9bd 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -70,6 +70,11 @@ KeyValueMetadata::KeyValueMetadata(std::vector<std::string> keys,
   ARROW_CHECK_EQ(keys.size(), values.size());
 }
 
+std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Make(
+    std::vector<std::string> keys, std::vector<std::string> values) {
+  return std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values));
+}
+
 void KeyValueMetadata::ToUnorderedMap(
     std::unordered_map<std::string, std::string>* out) const {
   DCHECK_NE(out, nullptr);
diff --git a/cpp/src/arrow/util/key_value_metadata.h b/cpp/src/arrow/util/key_value_metadata.h
index d4207a53dc4..d42ab78f667 100644
--- a/cpp/src/arrow/util/key_value_metadata.h
+++ b/cpp/src/arrow/util/key_value_metadata.h
@@ -39,6 +39,9 @@ class ARROW_EXPORT KeyValueMetadata {
   explicit KeyValueMetadata(const std::unordered_map<std::string, std::string>& map);
   virtual ~KeyValueMetadata() = default;
 
+  static std::shared_ptr<KeyValueMetadata> Make(std::vector<std::string> keys,
+                                                std::vector<std::string> values);
+
   void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
   void Append(const std::string& key, const std::string& value);
 
diff --git a/cpp/src/arrow/util/parallel.h b/cpp/src/arrow/util/parallel.h
index e56a71b91af..80f60fbdb36 100644
--- a/cpp/src/arrow/util/parallel.h
+++ b/cpp/src/arrow/util/parallel.h
@@ -21,7 +21,9 @@
 #include <vector>
 
 #include "arrow/status.h"
+#include "arrow/util/functional.h"
 #include "arrow/util/thread_pool.h"
+#include "arrow/util/vector.h"
 
 namespace arrow {
 namespace internal {
@@ -44,6 +46,21 @@ Status ParallelFor(int num_tasks, FUNCTION&& func,
   return st;
 }
 
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> ParallelForAsync(
+    std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  std::vector<Future<R>> futures(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i, std::move(inputs[i])));
+  }
+  return All(std::move(futures))
+      .Then([](const std::vector<Result<R>>& results) -> Result<std::vector<R>> {
+        return UnwrapOrRaise(results);
+      });
+}
+
 // A parallelizer that takes a `Status(int)` function and calls it with
 // arguments between 0 and `num_tasks - 1`, in sequence or in parallel,
 // depending on the input boolean.
@@ -61,5 +78,25 @@ Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func,
   }
 }
 
+// A parallelizer that takes a `Result<R>(int index, T item)` function and
+// calls it with each item from the input array, in sequence or in parallel,
+// depending on the input boolean.
+
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> OptionalParallelForAsync(
+    bool use_threads, std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  if (use_threads) {
+    return ParallelForAsync(std::move(inputs), std::forward<FUNCTION>(func), executor);
+  } else {
+    std::vector<R> result(inputs.size());
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(result[i], func(i, inputs[i]));
+    }
+    return result;
+  }
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/pcg_random.h b/cpp/src/arrow/util/pcg_random.h
new file mode 100644
index 00000000000..a53e9ec310e
--- /dev/null
+++ b/cpp/src/arrow/util/pcg_random.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/pcg/pcg_random.hpp"  // IWYU pragma: export
+
+namespace arrow {
+namespace random {
+
+using pcg32 = ::arrow_vendored::pcg32;
+using pcg64 = ::arrow_vendored::pcg64;
+using pcg32_fast = ::arrow_vendored::pcg32_fast;
+using pcg64_fast = ::arrow_vendored::pcg64_fast;
+
+}  // namespace random
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/reflection_internal.h b/cpp/src/arrow/util/reflection_internal.h
new file mode 100644
index 00000000000..0440a2eb563
--- /dev/null
+++ b/cpp/src/arrow/util/reflection_internal.h
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <tuple>
+#include <utility>
+
+#include "arrow/type_traits.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace internal {
+
+template <size_t...>
+struct index_sequence {};
+
+template <size_t N, size_t Head = N, size_t... Tail>
+struct make_index_sequence_impl;
+
+template <size_t N>
+using make_index_sequence = typename make_index_sequence_impl<N>::type;
+
+template <typename... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+template <size_t N, size_t... I>
+struct make_index_sequence_impl<N, 0, I...> {
+  using type = index_sequence<I...>;
+};
+
+template <size_t N, size_t H, size_t... I>
+struct make_index_sequence_impl : make_index_sequence_impl<N, H - 1, H - 1, I...> {};
+
+static_assert(std::is_same<index_sequence<>, make_index_sequence<0>>::value, "");
+static_assert(std::is_same<index_sequence<0, 1, 2>, make_index_sequence<3>>::value, "");
+
+template <typename...>
+struct all_same : std::true_type {};
+
+template <typename One>
+struct all_same<One> : std::true_type {};
+
+template <typename Same, typename... Rest>
+struct all_same<Same, Same, Rest...> : all_same<Same, Rest...> {};
+
+template <typename One, typename Other, typename... Rest>
+struct all_same<One, Other, Rest...> : std::false_type {};
+
+template <size_t... I, typename... T, typename Fn>
+void ForEachTupleMemberImpl(const std::tuple<T...>& tup, Fn&& fn, index_sequence<I...>) {
+  (void)std::make_tuple((fn(std::get<I>(tup), I), std::ignore)...);
+}
+
+template <typename... T, typename Fn>
+void ForEachTupleMember(const std::tuple<T...>& tup, Fn&& fn) {
+  ForEachTupleMemberImpl(tup, fn, index_sequence_for<T...>());
+}
+
+template <typename C, typename T>
+struct DataMemberProperty {
+  using Class = C;
+  using Type = T;
+
+  constexpr const Type& get(const Class& obj) const { return obj.*ptr_; }
+
+  void set(Class* obj, Type value) const { (*obj).*ptr_ = std::move(value); }
+
+  constexpr util::string_view name() const { return name_; }
+
+  util::string_view name_;
+  Type Class::*ptr_;
+};
+
+template <typename Class, typename Type>
+constexpr DataMemberProperty<Class, Type> DataMember(util::string_view name,
+                                                     Type Class::*ptr) {
+  return {name, ptr};
+}
+
+template <typename... Properties>
+struct PropertyTuple {
+  template <typename Fn>
+  void ForEach(Fn&& fn) const {
+    ForEachTupleMember(props_, fn);
+  }
+
+  static_assert(all_same<typename Properties::Class...>::value,
+                "All properties must be properties of the same class");
+
+  size_t size() const { return sizeof...(Properties); }
+
+  std::tuple<Properties...> props_;
+};
+
+template <typename... Properties>
+PropertyTuple<Properties...> MakeProperties(Properties... props) {
+  return {std::make_tuple(props...)};
+}
+
+template <typename Enum>
+struct EnumTraits {};
+
+template <typename Enum, Enum... Values>
+struct BasicEnumTraits {
+  using CType = typename std::underlying_type<Enum>::type;
+  using Type = typename CTypeTraits<CType>::ArrowType;
+  static std::array<Enum, sizeof...(Values)> values() { return {Values...}; }
+};
+
+template <typename T, typename Enable = void>
+struct has_enum_traits : std::false_type {};
+
+template <typename T>
+struct has_enum_traits<T, void_t<typename EnumTraits<T>::Type>> : std::true_type {};
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/reflection_test.cc b/cpp/src/arrow/util/reflection_test.cc
new file mode 100644
index 00000000000..9e3afb00589
--- /dev/null
+++ b/cpp/src/arrow/util/reflection_test.cc
@@ -0,0 +1,298 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/matchers.h"
+#include "arrow/util/enum.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+
+using testing::ElementsAre;
+using testing::Eq;
+using testing::HasSubstr;
+
+namespace arrow {
+namespace internal {
+
+// generic property-based equality comparison
+template <typename Class>
+struct EqualsImpl {
+  template <typename Properties>
+  EqualsImpl(const Class& l, const Class& r, const Properties& props)
+      : left_(l), right_(r) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    equal_ &= prop.get(left_) == prop.get(right_);
+  }
+
+  const Class& left_;
+  const Class& right_;
+  bool equal_ = true;
+};
+
+// generic property-based serialization
+template <typename Class>
+struct ToStringImpl {
+  template <typename Properties>
+  ToStringImpl(util::string_view class_name, const Class& obj, const Properties& props)
+      : class_name_(class_name), obj_(obj), members_(props.size()) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    std::stringstream ss;
+    ss << prop.name() << ":" << prop.get(obj_);
+    members_[i] = ss.str();
+  }
+
+  std::string Finish() {
+    return class_name_.to_string() + "{" + JoinStrings(members_, ",") + "}";
+  }
+
+  util::string_view class_name_;
+  const Class& obj_;
+  std::vector<std::string> members_;
+};
+
+// generic property-based deserialization
+template <typename Class>
+struct FromStringImpl {
+  template <typename Properties>
+  FromStringImpl(util::string_view class_name, util::string_view repr,
+                 const Properties& props) {
+    Init(class_name, repr, props.size());
+    props.ForEach(*this);
+  }
+
+  void Fail() { obj_ = util::nullopt; }
+
+  void Init(util::string_view class_name, util::string_view repr, size_t num_properties) {
+    if (!repr.starts_with(class_name)) return Fail();
+
+    repr = repr.substr(class_name.size());
+    if (repr.empty()) return Fail();
+    if (repr.front() != '{') return Fail();
+    if (repr.back() != '}') return Fail();
+
+    repr = repr.substr(1, repr.size() - 2);
+    members_ = SplitString(repr, ',');
+    if (members_.size() != num_properties) return Fail();
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    if (!obj_) return;
+
+    auto first_colon = members_[i].find_first_of(':');
+    if (first_colon == util::string_view::npos) return Fail();
+
+    auto name = members_[i].substr(0, first_colon);
+    if (name != prop.name()) return Fail();
+
+    auto value_repr = members_[i].substr(first_colon + 1);
+    typename Property::Type value;
+    try {
+      std::stringstream ss(value_repr.to_string());
+      ss >> value;
+      if (!ss.eof()) return Fail();
+    } catch (...) {
+      return Fail();
+    }
+    prop.set(&*obj_, std::move(value));
+  }
+
+  util::optional<Class> obj_ = Class{};
+  std::vector<util::string_view> members_;
+};
+
+// unmodified structure which we wish to reflect on:
+struct Person {
+  int age;
+  std::string name;
+};
+
+// enumeration of properties:
+// NB: no references to Person::age or Person::name after this
+// NB: ordering of properties follows this enum, regardless of
+//     order of declaration in `struct Person`
+static auto kPersonProperties =
+    MakeProperties(DataMember("age", &Person::age), DataMember("name", &Person::name));
+
+// use generic facilities to define equality, serialization and deserialization
+bool operator==(const Person& l, const Person& r) {
+  return EqualsImpl<Person>{l, r, kPersonProperties}.equal_;
+}
+
+bool operator!=(const Person& l, const Person& r) { return !(l == r); }
+
+std::string ToString(const Person& obj) {
+  return ToStringImpl<Person>{"Person", obj, kPersonProperties}.Finish();
+}
+
+void PrintTo(const Person& obj, std::ostream* os) { *os << ToString(obj); }
+
+util::optional<Person> PersonFromString(util::string_view repr) {
+  return FromStringImpl<Person>("Person", repr, kPersonProperties).obj_;
+}
+
+TEST(Reflection, EqualityWithDataMembers) {
+  Person genos{19, "Genos"};
+  Person kuseno{45, "Kuseno"};
+
+  EXPECT_EQ(genos, genos);
+  EXPECT_EQ(kuseno, kuseno);
+
+  EXPECT_NE(genos, kuseno);
+  EXPECT_NE(kuseno, genos);
+}
+
+TEST(Reflection, ToStringFromDataMembers) {
+  Person genos{19, "Genos"};
+  Person kuseno{45, "Kuseno"};
+
+  EXPECT_EQ(ToString(genos), "Person{age:19,name:Genos}");
+  EXPECT_EQ(ToString(kuseno), "Person{age:45,name:Kuseno}");
+}
+
+TEST(Reflection, FromStringToDataMembers) {
+  Person genos{19, "Genos"};
+
+  EXPECT_EQ(PersonFromString(ToString(genos)), genos);
+
+  EXPECT_EQ(PersonFromString(""), util::nullopt);
+  EXPECT_EQ(PersonFromString("Per"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19,name:Genos"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Person{name:Genos"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19,name:Genos,extra:Cyborg}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{name:Genos,age:19"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Fake{age:19,name:Genos}"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Person{age,name:Genos}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:nineteen,name:Genos}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19 ,name:Genos}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19,moniker:Genos}"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Person{age: 19, name: Genos}"), util::nullopt);
+}
+
+enum class PersonType : int8_t {
+  EMPLOYEE,
+  CONTRACTOR,
+};
+
+template <>
+struct EnumTraits<PersonType>
+    : BasicEnumTraits<PersonType, PersonType::EMPLOYEE, PersonType::CONTRACTOR> {
+  static std::string name() { return "PersonType"; }
+  static std::string value_name(PersonType value) {
+    switch (value) {
+      case PersonType::EMPLOYEE:
+        return "EMPLOYEE";
+      case PersonType::CONTRACTOR:
+        return "CONTRACTOR";
+    }
+    return "<INVALID>";
+  }
+};
+
+TEST(Reflection, EnumTraits) {
+  static_assert(!has_enum_traits<Person>::value, "");
+  static_assert(has_enum_traits<PersonType>::value, "");
+  static_assert(std::is_same<EnumTraits<PersonType>::CType, int8_t>::value, "");
+  static_assert(std::is_same<EnumTraits<PersonType>::Type, Int8Type>::value, "");
+}
+
+TEST(Reflection, CompileTimeStringOps) {
+  static_assert(CaseInsensitiveEquals("a", "a"), "");
+  static_assert(CaseInsensitiveEquals("Ab", "ab"), "");
+  static_assert(CaseInsensitiveEquals("Ab ", "ab", 2), "");
+  static_assert(CaseInsensitiveEquals(util::string_view{"Ab ", 2}, "ab"), "");
+}
+
+/// \brief Enumeration of primary colors.
+///
+/// - red:   Hex value 0xff0000
+/// - green: Hex value 0x00ff00
+/// - blue:  Hex value 0x0000ff
+struct Color : EnumType<Color> {
+  using EnumType<Color>::EnumType;
+  static constexpr EnumStrings<3> values() { return {"red", "green", "blue"}; }
+  static constexpr const char* name() { return "Color"; }
+};
+
+TEST(Reflection, EnumType) {
+  static_assert(Color::size() == 3, "");
+  EXPECT_THAT(Color::values(),
+              ElementsAre(util::string_view{"red"}, util::string_view{"green"},
+                          util::string_view{"blue"}));
+
+  static_assert(Color("red").index == 0, "");
+  static_assert(*Color("GREEN") == 1, "");
+  static_assert(Color("Blue") == Color(2), "");
+
+  EXPECT_EQ(Color("red").ToString(), "red");
+  EXPECT_EQ(Color("GREEN").ToString(), "green");
+  EXPECT_EQ(Color("Blue").ToString(), "blue");
+
+  static_assert(Color("GREEN") == Color("Green"), "");
+  static_assert(Color("GREEN") == Color(1), "");
+  static_assert(Color("GREEN") != Color(), "");
+
+  static_assert(!Color("chartreuse"), "");
+  static_assert(Color("violet") == Color(), "");
+  static_assert(Color(-1) == Color(), "");
+  static_assert(Color(-29) == Color(), "");
+  static_assert(Color(12334) == Color(), "");
+
+  for (util::string_view repr : {"Red", "orange", "BLUE"}) {
+    switch (*Color(repr)) {
+      case* Color("blue"):
+        EXPECT_EQ(repr, "BLUE");
+        break;
+      case* Color("red"):
+        EXPECT_EQ(repr, "Red");
+        break;
+      default:
+        EXPECT_EQ(repr, "orange");
+        break;
+    }
+  }
+
+  EXPECT_THAT(Color::Make(0), ResultWith(Eq(Color(0))));
+  EXPECT_THAT(Color::Make(-33), Raises(StatusCode::Invalid,
+                                       HasSubstr("index -33 for enum Color- index should "
+                                                 "be in range [0, 3)")));
+
+  EXPECT_THAT(Color::Make("red"), ResultWith(Eq(Color("red"))));
+  EXPECT_THAT(Color::Make("mahogany"),
+              Raises(StatusCode::Invalid,
+                     HasSubstr("string 'mahogany' for enum Color- string should "
+                               "be one of {'red', 'green', 'blue'}")));
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/tdigest.cc b/cpp/src/arrow/util/tdigest.cc
index b23bca397ec..99b771ca0f2 100644
--- a/cpp/src/arrow/util/tdigest.cc
+++ b/cpp/src/arrow/util/tdigest.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cmath>
 #include <iostream>
+#include <limits>
 #include <queue>
 #include <tuple>
 #include <vector>
diff --git a/cpp/src/arrow/util/test_common.h b/cpp/src/arrow/util/test_common.h
index 8c304ffbbcf..511daed1eca 100644
--- a/cpp/src/arrow/util/test_common.h
+++ b/cpp/src/arrow/util/test_common.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include <iosfwd>
 
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index 873b9335e74..758295d01ed 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -22,7 +22,6 @@
 #include <deque>
 #include <list>
 #include <mutex>
-#include <queue>
 #include <string>
 #include <thread>
 #include <vector>
@@ -46,44 +45,54 @@ struct Task {
 }  // namespace
 
 struct SerialExecutor::State {
-  std::queue<Task> task_queue;
+  std::deque<Task> task_queue;
   std::mutex mutex;
   std::condition_variable wait_for_tasks;
-  bool finished;
+  bool finished{false};
 };
 
-SerialExecutor::SerialExecutor() : state_(new State()) {}
-SerialExecutor::~SerialExecutor() {}
+SerialExecutor::SerialExecutor() : state_(std::make_shared<State>()) {}
+
+SerialExecutor::~SerialExecutor() = default;
 
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-  // The serial task queue is truly serial (no mutex needed) but SpawnReal may be called
-  // from external threads (e.g. when transferring back from blocking I/O threads) so a
-  // mutex is needed
+  // While the SerialExecutor runs tasks synchronously on its main thread,
+  // SpawnReal may be called from external threads (e.g. when transferring back
+  // from blocking I/O threads), so we need to keep the state alive *and* to
+  // lock its contents.
+  //
+  // Note that holding the lock while notifying the condition variable may
+  // not be sufficient, as some exit paths in the main thread are unlocked.
+  auto state = state_;
   {
-    std::lock_guard<std::mutex> lg(state_->mutex);
-    state_->task_queue.push(
+    std::lock_guard<std::mutex> lk(state->mutex);
+    state->task_queue.push_back(
         Task{std::move(task), std::move(stop_token), std::move(stop_callback)});
   }
-  state_->wait_for_tasks.notify_one();
+  state->wait_for_tasks.notify_one();
   return Status::OK();
 }
 
 void SerialExecutor::MarkFinished() {
-  std::lock_guard<std::mutex> lk(state_->mutex);
-  state_->finished = true;
-  // Keep the lock when notifying to avoid situations where the SerialExecutor
-  // would start being destroyed while the notify_one() call is still ongoing.
-  state_->wait_for_tasks.notify_one();
+  // Same comment as SpawnReal above
+  auto state = state_;
+  {
+    std::lock_guard<std::mutex> lk(state->mutex);
+    state->finished = true;
+  }
+  state->wait_for_tasks.notify_one();
 }
 
 void SerialExecutor::RunLoop() {
+  // This is called from the SerialExecutor's main thread, so the
+  // state is guaranteed to be kept alive.
   std::unique_lock<std::mutex> lk(state_->mutex);
 
   while (!state_->finished) {
     while (!state_->task_queue.empty()) {
       Task task = std::move(state_->task_queue.front());
-      state_->task_queue.pop();
+      state_->task_queue.pop_front();
       lk.unlock();
       if (!task.stop_token.IsStopRequested()) {
         std::move(task.callable)();
@@ -272,6 +281,12 @@ int ThreadPool::GetCapacity() {
   return state_->desired_capacity_;
 }
 
+int ThreadPool::GetNumTasks() {
+  ProtectAgainstFork();
+  std::unique_lock<std::mutex> lock(state_->mutex_);
+  return state_->tasks_queued_or_running_;
+}
+
 int ThreadPool::GetActualCapacity() {
   ProtectAgainstFork();
   std::unique_lock<std::mutex> lock(state_->mutex_);
@@ -306,13 +321,20 @@ void ThreadPool::CollectFinishedWorkersUnlocked() {
   state_->finished_workers_.clear();
 }
 
+thread_local ThreadPool* current_thread_pool_ = nullptr;
+
+bool ThreadPool::OwnsThisThread() { return current_thread_pool_ == this; }
+
 void ThreadPool::LaunchWorkersUnlocked(int threads) {
   std::shared_ptr<State> state = sp_state_;
 
   for (int i = 0; i < threads; i++) {
     state_->workers_.emplace_back();
     auto it = --(state_->workers_.end());
-    *it = std::thread([state, it] { WorkerLoop(state, it); });
+    *it = std::thread([this, state, it] {
+      current_thread_pool_ = this;
+      WorkerLoop(state, it);
+    });
   }
 }
 
@@ -409,11 +431,6 @@ ThreadPool* GetCpuThreadPool() {
   return singleton.get();
 }
 
-Status RunSynchronouslyVoid(FnOnce<Future<arrow::detail::Empty>(Executor*)> get_future,
-                            bool use_threads) {
-  return RunSynchronously(std::move(get_future), use_threads).status();
-}
-
 }  // namespace internal
 
 int GetCpuThreadPoolCapacity() { return internal::GetCpuThreadPool()->GetCapacity(); }
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index c4d4d1869c6..9ac8e36a3d8 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -82,17 +82,31 @@ class ARROW_EXPORT Executor {
 
   // Spawn a fire-and-forget task.
   template <typename Function>
-  Status Spawn(Function&& func, StopToken stop_token = StopToken::Unstoppable()) {
+  Status Spawn(Function&& func) {
+    return SpawnReal(TaskHints{}, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(Function&& func, StopToken stop_token) {
     return SpawnReal(TaskHints{}, std::forward<Function>(func), std::move(stop_token),
                      StopCallback{});
   }
-
   template <typename Function>
-  Status Spawn(TaskHints hints, Function&& func,
-               StopToken stop_token = StopToken::Unstoppable()) {
+  Status Spawn(TaskHints hints, Function&& func) {
+    return SpawnReal(hints, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token) {
     return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
                      StopCallback{});
   }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token,
+               StopCallback stop_callback) {
+    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
+                     std::move(stop_callback));
+  }
 
   // Transfers a future to this executor.  Any continuations added to the
   // returned future will run in this executor.  Otherwise they would run
@@ -102,25 +116,24 @@ class ARROW_EXPORT Executor {
   // The continuations of that future should run on the CPU thread pool keeping
   // CPU heavy work off the I/O thread pool.  So the I/O task should transfer
   // the future to the CPU executor before returning.
+  //
+  // By default this method will only transfer if the future is not already completed.  If
+  // the future is already completed then any callback would be run synchronously and so
+  // no transfer is typically necessary.  However, in cases where you want to force a
+  // transfer (e.g. to help the scheduler break up units of work across multiple cores)
+  // then you can override this behavior with `always_transfer`.
   template <typename T>
   Future<T> Transfer(Future<T> future) {
-    auto transferred = Future<T>::Make();
-    auto callback = [this, transferred](const Result<T>& result) mutable {
-      auto spawn_status = Spawn([transferred, result]() mutable {
-        transferred.MarkFinished(std::move(result));
-      });
-      if (!spawn_status.ok()) {
-        transferred.MarkFinished(spawn_status);
-      }
-    };
-    auto callback_factory = [&callback]() { return callback; };
-    if (future.TryAddCallback(callback_factory)) {
-      return transferred;
-    }
-    // If the future is already finished and we aren't going to force spawn a thread
-    // then we don't need to add another layer of callback and can return the original
-    // future
-    return future;
+    return DoTransfer(std::move(future), false);
+  }
+
+  // Overload of Transfer which will always schedule callbacks on new threads even if the
+  // future is finished when the callback is added.
+  //
+  // This can be useful in cases where you want to ensure parallelism
+  template <typename T>
+  Future<T> TransferAlways(Future<T> future) {
+    return DoTransfer(std::move(future), true);
   }
 
   // Submit a callable and arguments for execution.  Return a future that
@@ -180,11 +193,48 @@ class ARROW_EXPORT Executor {
   // concurrently).  This may be an approximate number.
   virtual int GetCapacity() = 0;
 
+  // Return true if the thread from which this function is called is owned by this
+  // Executor. Returns false if this Executor does not support this property.
+  virtual bool OwnsThisThread() { return false; }
+
  protected:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Executor);
 
   Executor() = default;
 
+  template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType>
+  Future<T> DoTransfer(Future<T> future, bool always_transfer = false) {
+    auto transferred = Future<T>::Make();
+    if (always_transfer) {
+      CallbackOptions callback_options = CallbackOptions::Defaults();
+      callback_options.should_schedule = ShouldSchedule::Always;
+      callback_options.executor = this;
+      auto sync_callback = [transferred](const FTSync& result) mutable {
+        transferred.MarkFinished(result);
+      };
+      future.AddCallback(sync_callback, callback_options);
+      return transferred;
+    }
+
+    // We could use AddCallback's ShouldSchedule::IfUnfinished but we can save a bit of
+    // work by doing the test here.
+    auto callback = [this, transferred](const FTSync& result) mutable {
+      auto spawn_status =
+          Spawn([transferred, result]() mutable { transferred.MarkFinished(result); });
+      if (!spawn_status.ok()) {
+        transferred.MarkFinished(spawn_status);
+      }
+    };
+    auto callback_factory = [&callback]() { return callback; };
+    if (future.TryAddCallback(callback_factory)) {
+      return transferred;
+    }
+    // If the future is already finished and we aren't going to force spawn a thread
+    // then we don't need to add another layer of callback and can return the original
+    // future
+    return future;
+  }
+
   // Subclassing API
   virtual Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
                            StopCallback&&) = 0;
@@ -198,10 +248,10 @@ class ARROW_EXPORT Executor {
 /// asynchronous continuation.
 class ARROW_EXPORT SerialExecutor : public Executor {
  public:
-  template <typename T = ::arrow::detail::Empty>
+  template <typename T = ::arrow::internal::Empty>
   using TopLevelTask = internal::FnOnce<Future<T>(Executor*)>;
 
-  ~SerialExecutor();
+  ~SerialExecutor() override;
 
   int GetCapacity() override { return 1; };
   Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
@@ -215,9 +265,11 @@ class ARROW_EXPORT SerialExecutor : public Executor {
   /// RunSynchronously/RunSerially which delegates the responsiblity onto a Future
   /// producer's existing responsibility to always mark a future finished (which can
   /// someday be aided by ARROW-12207).
-  template <typename T>
-  static Result<T> RunInSerialExecutor(TopLevelTask<T> initial_task) {
-    return SerialExecutor().Run<T>(std::move(initial_task));
+  template <typename T = internal::Empty, typename FT = Future<T>,
+            typename FTSync = typename FT::SyncType>
+  static FTSync RunInSerialExecutor(TopLevelTask<T> initial_task) {
+    Future<T> fut = SerialExecutor().Run<T>(std::move(initial_task));
+    return FutureToSync(fut);
   }
 
  private:
@@ -225,17 +277,17 @@ class ARROW_EXPORT SerialExecutor : public Executor {
 
   // State uses mutex
   struct State;
-  std::unique_ptr<State> state_;
+  std::shared_ptr<State> state_;
 
-  template <typename T>
-  Result<T> Run(TopLevelTask<T> initial_task) {
+  template <typename T, typename FTSync = typename Future<T>::SyncType>
+  Future<T> Run(TopLevelTask<T> initial_task) {
     auto final_fut = std::move(initial_task)(this);
     if (final_fut.is_finished()) {
-      return final_fut.result();
+      return final_fut;
     }
-    final_fut.AddCallback([this](const Result<T>&) { MarkFinished(); });
+    final_fut.AddCallback([this](const FTSync&) { MarkFinished(); });
     RunLoop();
-    return final_fut.result();
+    return final_fut;
   }
   void RunLoop();
   void MarkFinished();
@@ -264,6 +316,11 @@ class ARROW_EXPORT ThreadPool : public Executor {
   // match this value.
   int GetCapacity() override;
 
+  bool OwnsThisThread() override;
+
+  // Return the number of tasks either running or in the queue.
+  int GetNumTasks();
+
   // Dynamically change the number of worker threads.
   //
   // This function always returns immediately.
@@ -326,16 +383,16 @@ ARROW_EXPORT ThreadPool* GetCpuThreadPool();
 /// `get_future` is called (from this thread) with the chosen executor and must
 /// return a future that will eventually finish. This function returns once the
 /// future has finished.
-template <typename T>
-Result<T> RunSynchronously(FnOnce<Future<T>(Executor*)> get_future, bool use_threads) {
+template <typename Fut, typename ValueType = typename Fut::ValueType>
+typename Fut::SyncType RunSynchronously(FnOnce<Fut(Executor*)> get_future,
+                                        bool use_threads) {
   if (use_threads) {
-    return std::move(get_future)(GetCpuThreadPool()).result();
+    auto fut = std::move(get_future)(GetCpuThreadPool());
+    return FutureToSync(fut);
   } else {
-    return SerialExecutor::RunInSerialExecutor<T>(std::move(get_future));
+    return SerialExecutor::RunInSerialExecutor<ValueType>(std::move(get_future));
   }
 }
 
-ARROW_EXPORT Status RunSynchronouslyVoid(
-    FnOnce<Future<arrow::detail::Empty>(Executor*)> get_future, bool use_threads);
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/thread_pool_benchmark.cc b/cpp/src/arrow/util/thread_pool_benchmark.cc
index 2ff4d111763..7c342c47fb1 100644
--- a/cpp/src/arrow/util/thread_pool_benchmark.cc
+++ b/cpp/src/arrow/util/thread_pool_benchmark.cc
@@ -103,6 +103,22 @@ static void ThreadPoolSpawn(benchmark::State& state) {  // NOLINT non-const refe
   state.SetItemsProcessed(state.iterations() * nspawns);
 }
 
+// Benchmark SerialExecutor::RunInSerialExecutor
+static void RunInSerialExecutor(benchmark::State& state) {  // NOLINT non-const reference
+  const auto workload_size = static_cast<int32_t>(state.range(0));
+
+  Workload workload(workload_size);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(
+        SerialExecutor::RunInSerialExecutor<Future<>>([&](internal::Executor* executor) {
+          return DeferNotOk(executor->Submit(std::ref(workload)));
+        }));
+  }
+
+  state.SetItemsProcessed(state.iterations());
+}
+
 // Benchmark ThreadPool::Submit
 static void ThreadPoolSubmit(benchmark::State& state) {  // NOLINT non-const reference
   const auto nthreads = static_cast<int>(state.range(0));
@@ -120,7 +136,7 @@ static void ThreadPoolSubmit(benchmark::State& state) {  // NOLINT non-const ref
 
     for (int32_t i = 0; i < nspawns; ++i) {
       // Pass the task by reference to avoid copying it around
-      (void)DeferNotOk(pool->Submit(std::ref(workload))).Then([&](...) {
+      (void)DeferNotOk(pool->Submit(std::ref(workload))).Then([&]() {
         n_finished.fetch_add(1);
       });
     }
@@ -223,6 +239,7 @@ BENCHMARK(ReferenceWorkloadCost)->Apply(WorkloadCost_Customize);
 #endif
 
 BENCHMARK(SerialTaskGroup)->Apply(WorkloadCost_Customize);
+BENCHMARK(RunInSerialExecutor)->Apply(WorkloadCost_Customize);
 BENCHMARK(ThreadPoolSpawn)->Apply(ThreadPoolSpawn_Customize);
 BENCHMARK(ThreadedTaskGroup)->Apply(ThreadPoolSpawn_Customize);
 BENCHMARK(ThreadPoolSubmit)->Apply(ThreadPoolSpawn_Customize);
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index 2390f8c1a41..399c755a8f9 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -32,9 +32,12 @@
 #include <gtest/gtest.h>
 
 #include "arrow/status.h"
+#include "arrow/testing/executor_util.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/test_common.h"
 #include "arrow/util/thread_pool.h"
 
 namespace arrow {
@@ -133,7 +136,31 @@ class TestRunSynchronously : public testing::TestWithParam<bool> {
   }
 
   Status RunVoid(FnOnce<Future<>(Executor*)> top_level_task) {
-    return RunSynchronouslyVoid(std::move(top_level_task), UseThreads());
+    return RunSynchronously(std::move(top_level_task), UseThreads());
+  }
+
+  void TestContinueAfterExternal(bool transfer_to_main_thread) {
+    bool continuation_ran = false;
+    EXPECT_OK_AND_ASSIGN(auto external_pool, ThreadPool::Make(1));
+    auto top_level_task = [&](Executor* executor) {
+      struct Callback {
+        Status operator()() {
+          *continuation_ran = true;
+          return Status::OK();
+        }
+        bool* continuation_ran;
+      };
+      auto fut = DeferNotOk(external_pool->Submit([&] {
+        SleepABit();
+        return Status::OK();
+      }));
+      if (transfer_to_main_thread) {
+        fut = executor->Transfer(fut);
+      }
+      return fut.Then(Callback{&continuation_ran});
+    };
+    ASSERT_OK(RunVoid(std::move(top_level_task)));
+    EXPECT_TRUE(continuation_ran);
   }
 };
 
@@ -142,7 +169,7 @@ TEST_P(TestRunSynchronously, SimpleRun) {
   auto task = [&](Executor* executor) {
     EXPECT_NE(executor, nullptr);
     task_ran = true;
-    return Future<>::MakeFinished(Status::OK());
+    return Future<>::MakeFinished();
   };
   ASSERT_OK(RunVoid(std::move(task)));
   EXPECT_TRUE(task_ran);
@@ -165,11 +192,7 @@ TEST_P(TestRunSynchronously, SpawnMoreNested) {
   auto top_level_task = [&](Executor* executor) -> Future<> {
     auto fut_a = DeferNotOk(executor->Submit([&] { nested_ran++; }));
     auto fut_b = DeferNotOk(executor->Submit([&] { nested_ran++; }));
-    return AllComplete({fut_a, fut_b})
-        .Then([&](const Result<arrow::detail::Empty>& result) {
-          nested_ran++;
-          return result;
-        });
+    return AllComplete({fut_a, fut_b}).Then([&]() { nested_ran++; });
   };
   ASSERT_OK(RunVoid(std::move(top_level_task)));
   EXPECT_EQ(nested_ran, 3);
@@ -209,25 +232,16 @@ TEST_P(TestRunSynchronously, StopTokenSubmit) {
 }
 
 TEST_P(TestRunSynchronously, ContinueAfterExternal) {
-  bool continuation_ran = false;
-  EXPECT_OK_AND_ASSIGN(auto mock_io_pool, ThreadPool::Make(1));
-  auto top_level_task = [&](Executor* executor) {
-    struct Callback {
-      Status operator()(...) {
-        continuation_ran = true;
-        return Status::OK();
-      }
-      bool& continuation_ran;
-    };
-    return executor
-        ->Transfer(DeferNotOk(mock_io_pool->Submit([&] {
-          SleepABit();
-          return Status::OK();
-        })))
-        .Then(Callback{continuation_ran});
-  };
-  ASSERT_OK(RunVoid(std::move(top_level_task)));
-  EXPECT_TRUE(continuation_ran);
+  // The future returned by the top-level task completes on another thread.
+  // This can trigger delicate race conditions in the SerialExecutor code,
+  // especially destruction.
+  this->TestContinueAfterExternal(/*transfer_to_main_thread=*/false);
+}
+
+TEST_P(TestRunSynchronously, ContinueAfterExternalTransferred) {
+  // Like above, but the future is transferred back to the serial executor
+  // after completion on an external thread.
+  this->TestContinueAfterExternal(/*transfer_to_main_thread=*/true);
 }
 
 TEST_P(TestRunSynchronously, SchedulerAbort) {
@@ -245,6 +259,42 @@ TEST_P(TestRunSynchronously, PropagatedError) {
 INSTANTIATE_TEST_SUITE_P(TestRunSynchronously, TestRunSynchronously,
                          ::testing::Values(false, true));
 
+class TransferTest : public testing::Test {
+ public:
+  internal::Executor* executor() { return mock_executor.get(); }
+  int spawn_count() { return mock_executor->spawn_count; }
+
+  std::function<void(const Status&)> callback = [](const Status&) {};
+  std::shared_ptr<MockExecutor> mock_executor = std::make_shared<MockExecutor>();
+};
+
+TEST_F(TransferTest, DefaultTransferIfNotFinished) {
+  {
+    Future<> fut = Future<>::Make();
+    auto transferred = executor()->Transfer(fut);
+    fut.MarkFinished();
+    ASSERT_FINISHES_OK(transferred);
+    ASSERT_EQ(1, spawn_count());
+  }
+  {
+    Future<> fut = Future<>::Make();
+    fut.MarkFinished();
+    auto transferred = executor()->Transfer(fut);
+    ASSERT_FINISHES_OK(transferred);
+    ASSERT_EQ(1, spawn_count());
+  }
+}
+
+TEST_F(TransferTest, TransferAlways) {
+  {
+    Future<> fut = Future<>::Make();
+    fut.MarkFinished();
+    auto transferred = executor()->TransferAlways(fut);
+    ASSERT_FINISHES_OK(transferred);
+    ASSERT_EQ(1, spawn_count());
+  }
+}
+
 class TestThreadPool : public ::testing::Test {
  public:
   void TearDown() override {
@@ -345,6 +395,23 @@ TEST_F(TestThreadPool, StressSpawn) {
   SpawnAdds(pool.get(), 1000, task_add<int>);
 }
 
+TEST_F(TestThreadPool, OwnsCurrentThread) {
+  auto pool = this->MakeThreadPool(30);
+  std::atomic<bool> one_failed{false};
+
+  for (int i = 0; i < 1000; ++i) {
+    ASSERT_OK(pool->Spawn([&] {
+      if (pool->OwnsThisThread()) return;
+
+      one_failed = true;
+    }));
+  }
+
+  ASSERT_OK(pool->Shutdown());
+  ASSERT_FALSE(pool->OwnsThisThread());
+  ASSERT_FALSE(one_failed);
+}
+
 TEST_F(TestThreadPool, StressSpawnThreaded) {
   auto pool = this->MakeThreadPool(30);
   SpawnAddsThreaded(pool.get(), 20, 100, task_add<int>);
diff --git a/cpp/src/arrow/util/type_fwd.h b/cpp/src/arrow/util/type_fwd.h
index f5d01518862..ca107c2c69d 100644
--- a/cpp/src/arrow/util/type_fwd.h
+++ b/cpp/src/arrow/util/type_fwd.h
@@ -19,11 +19,11 @@
 
 namespace arrow {
 
-namespace detail {
+namespace internal {
 struct Empty;
-}  // namespace detail
+}  // namespace internal
 
-template <typename T = detail::Empty>
+template <typename T = internal::Empty>
 class WeakFuture;
 class FutureWaiter;
 
diff --git a/cpp/src/arrow/util/ubsan.h b/cpp/src/arrow/util/ubsan.h
index 2d4b513894b..77c3cb8e5ac 100644
--- a/cpp/src/arrow/util/ubsan.h
+++ b/cpp/src/arrow/util/ubsan.h
@@ -30,7 +30,7 @@ namespace util {
 
 namespace internal {
 
-static uint8_t non_null_filler;
+constexpr uint8_t kNonNullFiller = 0;
 
 }  // namespace internal
 
@@ -44,12 +44,12 @@ static uint8_t non_null_filler;
 /// https://github.com/google/flatbuffers/pull/5355 is trying to resolve
 /// them.
 template <typename T>
-inline T* MakeNonNull(T* maybe_null) {
+inline T* MakeNonNull(T* maybe_null = NULLPTR) {
   if (ARROW_PREDICT_TRUE(maybe_null != NULLPTR)) {
     return maybe_null;
   }
 
-  return reinterpret_cast<T*>(&internal::non_null_filler);
+  return const_cast<T*>(reinterpret_cast<const T*>(&internal::kNonNullFiller));
 }
 
 template <typename T>
diff --git a/cpp/src/arrow/util/unreachable.cc b/cpp/src/arrow/util/unreachable.cc
new file mode 100644
index 00000000000..4ffe3a8f787
--- /dev/null
+++ b/cpp/src/arrow/util/unreachable.cc
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/unreachable.h"
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+[[noreturn]] void Unreachable(const char* message) {
+  DCHECK(false) << message;
+  std::abort();
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/unreachable.h b/cpp/src/arrow/util/unreachable.h
new file mode 100644
index 00000000000..027f76e84d2
--- /dev/null
+++ b/cpp/src/arrow/util/unreachable.h
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+namespace arrow {
+
+[[noreturn]] void Unreachable(const char* message = "Unreachable");
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/utf8.cc b/cpp/src/arrow/util/utf8.cc
index 478d8ade95f..11394d2e64c 100644
--- a/cpp/src/arrow/util/utf8.cc
+++ b/cpp/src/arrow/util/utf8.cc
@@ -64,6 +64,8 @@ const uint8_t utf8_small_table[] = { // NOLINT
 
 uint16_t utf8_large_table[9 * 256] = {0xffff};
 
+const uint8_t utf8_byte_size_table[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
+
 static void InitializeLargeTable() {
   for (uint32_t state = 0; state < 9; ++state) {
     for (uint32_t byte = 0; byte < 256; ++byte) {
diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h
index 2d94ca4986e..0c9a368d3dd 100644
--- a/cpp/src/arrow/util/utf8.h
+++ b/cpp/src/arrow/util/utf8.h
@@ -65,6 +65,8 @@ static constexpr uint8_t kUTF8DecodeReject = 12;
 // In this table states are multiples of 256.
 ARROW_EXPORT extern uint16_t utf8_large_table[9 * 256];
 
+ARROW_EXPORT extern const uint8_t utf8_byte_size_table[16];
+
 // Success / reject states when looked up in the large table
 static constexpr uint16_t kUTF8ValidateAccept = 0;
 static constexpr uint16_t kUTF8ValidateReject = 256;
@@ -293,6 +295,18 @@ Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size);
 
 static constexpr uint32_t kMaxUnicodeCodepoint = 0x110000;
 
+// size of a valid UTF8 can be determined by looking at leading 4 bits of BYTE1
+// utf8_byte_size_table[0..7] --> pure ascii chars --> 1B length
+// utf8_byte_size_table[8..11] --> internal bytes --> 1B length
+// utf8_byte_size_table[12,13] --> 2B long UTF8 chars
+// utf8_byte_size_table[14] --> 3B long UTF8 chars
+// utf8_byte_size_table[15] --> 4B long UTF8 chars
+// NOTE: Results for invalid/ malformed utf-8 sequences are undefined.
+// ex: \xFF... returns 4B
+static inline uint8_t ValidUtf8CodepointByteSize(const uint8_t* codeunit) {
+  return internal::utf8_byte_size_table[*codeunit >> 4];
+}
+
 static inline bool Utf8IsContinuation(const uint8_t codeunit) {
   return (codeunit & 0xC0) == 0x80;  // upper two bits should be 10
 }
@@ -478,6 +492,30 @@ static inline bool UTF8FindIfReverse(const uint8_t* first, const uint8_t* last,
   return true;
 }
 
+static inline bool UTF8AdvanceCodepoints(const uint8_t* first, const uint8_t* last,
+                                         const uint8_t** destination, int64_t n) {
+  return UTF8FindIf(
+      first, last,
+      [&](uint32_t codepoint) {
+        bool done = n == 0;
+        n--;
+        return done;
+      },
+      destination);
+}
+
+static inline bool UTF8AdvanceCodepointsReverse(const uint8_t* first, const uint8_t* last,
+                                                const uint8_t** destination, int64_t n) {
+  return UTF8FindIfReverse(
+      first, last,
+      [&](uint32_t codepoint) {
+        bool done = n == 0;
+        n--;
+        return done;
+      },
+      destination);
+}
+
 template <class UnaryFunction>
 static inline bool UTF8ForEach(const uint8_t* first, const uint8_t* last,
                                UnaryFunction&& f) {
@@ -518,5 +556,15 @@ static inline bool UTF8AllOf(const uint8_t* first, const uint8_t* last, bool* re
   return true;
 }
 
+/// Count the number of codepoints in the given string (assuming it is valid UTF8).
+static inline int64_t UTF8Length(const uint8_t* first, const uint8_t* last) {
+  int64_t length = 0;
+  while (first != last) {
+    length += ((*first & 0xc0) != 0x80);
+    ++first;
+  }
+  return length;
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/utf8_util_test.cc b/cpp/src/arrow/util/utf8_util_test.cc
index 8b6713623c4..62a3d0d28cb 100644
--- a/cpp/src/arrow/util/utf8_util_test.cc
+++ b/cpp/src/arrow/util/utf8_util_test.cc
@@ -489,5 +489,25 @@ TEST(UTF8FindIf, Basics) {
   CheckOkUTF8("", U'β', 0, 0);
 }
 
+TEST(UTF8Length, Basics) {
+  auto length = [](const std::string& s) {
+    const auto* p = reinterpret_cast<const uint8_t*>(s.data());
+    return UTF8Length(p, p + s.length());
+  };
+  ASSERT_EQ(length("abcde"), 5);
+  // accented a encoded as a single codepoint
+  ASSERT_EQ(length("\xc3\x81"
+                   "bcde"),
+            5);
+  // accented a encoded as two codepoints via combining character
+  ASSERT_EQ(length("a\xcc\x81"
+                   "bcde"),
+            6);
+  // hiragana a (3 bytes)
+  ASSERT_EQ(length("\xe3\x81\x81"), 1);
+  // raised hands emoji (4 bytes)
+  ASSERT_EQ(length("\xf0\x9f\x99\x8c"), 1);
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/value_parsing.h b/cpp/src/arrow/util/value_parsing.h
index 00295d1b51f..02e6fa42e01 100644
--- a/cpp/src/arrow/util/value_parsing.h
+++ b/cpp/src/arrow/util/value_parsing.h
@@ -719,7 +719,9 @@ struct StringConverter<DATE_TYPE, enable_if_date<DATE_TYPE>> {
 
   static bool Convert(const DATE_TYPE& type, const char* s, size_t length,
                       value_type* out) {
-    if (length != 10) return false;
+    if (ARROW_PREDICT_FALSE(length != 10)) {
+      return false;
+    }
 
     duration_type since_epoch;
     if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &since_epoch))) {
@@ -735,12 +737,36 @@ template <typename TIME_TYPE>
 struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
   using value_type = typename TIME_TYPE::c_type;
 
+  // We allow the following formats for all units:
+  // - "hh:mm"
+  // - "hh:mm:ss"
+  //
+  // We allow the following formats for unit == MILLI, MICRO, or NANO:
+  // - "hh:mm:ss.s{1,3}"
+  //
+  // We allow the following formats for unit == MICRO, or NANO:
+  // - "hh:mm:ss.s{4,6}"
+  //
+  // We allow the following formats for unit == NANO:
+  // - "hh:mm:ss.s{7,9}"
+
   static bool Convert(const TIME_TYPE& type, const char* s, size_t length,
                       value_type* out) {
-    if (length < 8) return false;
-    auto unit = type.unit();
-
+    const auto unit = type.unit();
     std::chrono::seconds since_midnight;
+
+    if (length == 5) {
+      if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s, &since_midnight))) {
+        return false;
+      }
+      *out =
+          static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
+      return true;
+    }
+
+    if (ARROW_PREDICT_FALSE(length < 8)) {
+      return false;
+    }
     if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s, &since_midnight))) {
       return false;
     }
@@ -751,6 +777,10 @@ struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
       return true;
     }
 
+    if (ARROW_PREDICT_FALSE(s[8] != '.')) {
+      return false;
+    }
+
     uint32_t subseconds_count = 0;
     if (ARROW_PREDICT_FALSE(
             !detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) {
diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc
index e790a10acf1..b5dc5619ded 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -264,6 +264,103 @@ TEST(StringConversion, ToDate64) {
   AssertConversion<Date64Type>("0001-01-01", -62135596800000LL);
 }
 
+template <typename T>
+void AssertInvalidTimes(const T& type) {
+  // Invalid time format
+  AssertConversionFails(type, "");
+  AssertConversionFails(type, "00");
+  AssertConversionFails(type, "00:");
+  AssertConversionFails(type, "00:00:");
+  AssertConversionFails(type, "00:00:00:");
+  AssertConversionFails(type, "000000");
+  AssertConversionFails(type, "000000.000");
+
+  // Invalid time value
+  AssertConversionFails(type, "24:00:00");
+  AssertConversionFails(type, "00:60:00");
+  AssertConversionFails(type, "00:00:60");
+}
+
+TEST(StringConversion, ToTime32) {
+  {
+    Time32Type type{TimeUnit::SECOND};
+
+    AssertConversion(type, "00:00", 0);
+    AssertConversion(type, "01:23", 4980);
+    AssertConversion(type, "23:59", 86340);
+
+    AssertConversion(type, "00:00:00", 0);
+    AssertConversion(type, "01:23:45", 5025);
+    AssertConversion(type, "23:45:43", 85543);
+    AssertConversion(type, "23:59:59", 86399);
+
+    AssertInvalidTimes(type);
+    // No subseconds allowed
+    AssertConversionFails(type, "00:00:00.123");
+  }
+  {
+    Time32Type type{TimeUnit::MILLI};
+
+    AssertConversion(type, "00:00", 0);
+    AssertConversion(type, "01:23", 4980000);
+    AssertConversion(type, "23:59", 86340000);
+
+    AssertConversion(type, "00:00:00", 0);
+    AssertConversion(type, "01:23:45", 5025000);
+    AssertConversion(type, "23:45:43", 85543000);
+    AssertConversion(type, "23:59:59", 86399000);
+
+    AssertConversion(type, "00:00:00.123", 123);
+    AssertConversion(type, "01:23:45.000", 5025000);
+    AssertConversion(type, "01:23:45.1", 5025100);
+    AssertConversion(type, "01:23:45.123", 5025123);
+    AssertConversion(type, "01:23:45.999", 5025999);
+
+    AssertInvalidTimes(type);
+    // Invalid subseconds
+    AssertConversionFails(type, "00:00:00.1234");
+  }
+}
+
+TEST(StringConversion, ToTime64) {
+  {
+    Time64Type type{TimeUnit::MICRO};
+
+    AssertConversion(type, "00:00:00", 0LL);
+    AssertConversion(type, "01:23:45", 5025000000LL);
+    AssertConversion(type, "23:45:43", 85543000000LL);
+    AssertConversion(type, "23:59:59", 86399000000LL);
+
+    AssertConversion(type, "00:00:00.123456", 123456LL);
+    AssertConversion(type, "01:23:45.000000", 5025000000LL);
+    AssertConversion(type, "01:23:45.1", 5025100000LL);
+    AssertConversion(type, "01:23:45.123", 5025123000LL);
+    AssertConversion(type, "01:23:45.999999", 5025999999LL);
+
+    AssertInvalidTimes(type);
+    // Invalid subseconds
+    AssertConversionFails(type, "00:00:00.1234567");
+  }
+  {
+    Time64Type type{TimeUnit::NANO};
+
+    AssertConversion(type, "00:00:00", 0LL);
+    AssertConversion(type, "01:23:45", 5025000000000LL);
+    AssertConversion(type, "23:45:43", 85543000000000LL);
+    AssertConversion(type, "23:59:59", 86399000000000LL);
+
+    AssertConversion(type, "00:00:00.123456789", 123456789LL);
+    AssertConversion(type, "01:23:45.000000000", 5025000000000LL);
+    AssertConversion(type, "01:23:45.1", 5025100000000LL);
+    AssertConversion(type, "01:23:45.1234", 5025123400000LL);
+    AssertConversion(type, "01:23:45.999999999", 5025999999999LL);
+
+    AssertInvalidTimes(type);
+    // Invalid subseconds
+    AssertConversionFails(type, "00:00:00.1234567891");
+  }
+}
+
 TEST(StringConversion, ToTimestampDate_ISO8601) {
   {
     TimestampType type{TimeUnit::SECOND};
diff --git a/cpp/src/arrow/util/variant.h b/cpp/src/arrow/util/variant.h
index 89f39ab8917..962254a9b0f 100644
--- a/cpp/src/arrow/util/variant.h
+++ b/cpp/src/arrow/util/variant.h
@@ -262,18 +262,17 @@ class Variant : detail::VariantImpl<Variant<T...>, T...>,
 
   Variant(const Variant& other) = default;
   Variant& operator=(const Variant& other) = default;
-
-  using Impl::Impl;
-  using Impl::operator=;
-
-  Variant(Variant&& other) noexcept { other.move_to(this); }
-
   Variant& operator=(Variant&& other) noexcept {
     this->destroy();
     other.move_to(this);
     return *this;
   }
 
+  using Impl::Impl;
+  using Impl::operator=;
+
+  Variant(Variant&& other) noexcept { other.move_to(this); }
+
   ~Variant() {
     static_assert(offsetof(Variant, data_) == 0, "(void*)&Variant::data_ == (void*)this");
     this->destroy();
diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h
index 67401d496e6..041bdb424a7 100644
--- a/cpp/src/arrow/util/vector.h
+++ b/cpp/src/arrow/util/vector.h
@@ -84,27 +84,49 @@ std::vector<T> FilterVector(std::vector<T> values, Predicate&& predicate) {
   return values;
 }
 
-/// \brief Like MapVector, but where the function can fail.
-template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
-          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
-Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& src) {
+template <typename Fn, typename From,
+          typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
+std::vector<To> MapVector(Fn&& map, const std::vector<From>& source) {
   std::vector<To> out;
-  out.reserve(src.size());
-  ARROW_RETURN_NOT_OK(MaybeTransform(src.begin(), src.end(), std::back_inserter(out),
-                                     std::forward<Fn>(map)));
+  out.reserve(source.size());
+  std::transform(source.begin(), source.end(), std::back_inserter(out),
+                 std::forward<Fn>(map));
   return out;
 }
 
 template <typename Fn, typename From,
           typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
-std::vector<To> MapVector(Fn&& map, const std::vector<From>& source) {
+std::vector<To> MapVector(Fn&& map, std::vector<From>&& source) {
   std::vector<To> out;
   out.reserve(source.size());
-  std::transform(source.begin(), source.end(), std::back_inserter(out),
+  std::transform(std::make_move_iterator(source.begin()),
+                 std::make_move_iterator(source.end()), std::back_inserter(out),
                  std::forward<Fn>(map));
   return out;
 }
 
+/// \brief Like MapVector, but where the function can fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, std::vector<From>&& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(std::make_move_iterator(source.begin()),
+                                     std::make_move_iterator(source.end()),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
 template <typename T>
 std::vector<T> FlattenVectors(const std::vector<std::vector<T>>& vecs) {
   std::size_t sum = 0;
@@ -130,7 +152,20 @@ Result<std::vector<T>> UnwrapOrRaise(std::vector<Result<T>>&& results) {
     }
     out.push_back(it->MoveValueUnsafe());
   }
-  return out;
+  return std::move(out);
+}
+
+template <typename T>
+Result<std::vector<T>> UnwrapOrRaise(const std::vector<Result<T>>& results) {
+  std::vector<T> out;
+  out.reserve(results.size());
+  for (const auto& result : results) {
+    if (!result.ok()) {
+      return result.status();
+    }
+    out.push_back(result.ValueUnsafe());
+  }
+  return std::move(out);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/windows_fixup.h b/cpp/src/arrow/util/windows_fixup.h
index 0afa53c6c1e..2949ac4ab76 100644
--- a/cpp/src/arrow/util/windows_fixup.h
+++ b/cpp/src/arrow/util/windows_fixup.h
@@ -19,6 +19,13 @@
 
 #ifdef _WIN32
 
+#ifdef max
+#undef max
+#endif
+#ifdef min
+#undef min
+#endif
+
 // The Windows API defines macros from *File resolving to either
 // *FileA or *FileW.  Need to undo them.
 #ifdef CopyFile
diff --git a/cpp/src/arrow/vendored/double-conversion/double-conversion.cc b/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
index 5d5d6f13116..27e70b4c90d 100644
--- a/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
+++ b/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
@@ -84,7 +84,25 @@ void DoubleToStringConverter::CreateExponentialRepresentation(
     StringBuilder* result_builder) const {
   ASSERT(length != 0);
   result_builder->AddCharacter(decimal_digits[0]);
-  if (length != 1) {
+
+  /* If the mantissa of the scientific notation representation is an integer number,
+   * the EMIT_TRAILING_DECIMAL_POINT flag will add a '.' character at the end of the
+   * representation:
+   * - With EMIT_TRAILING_DECIMAL_POINT enabled -> 0.0009 => 9.E-4
+   * - With EMIT_TRAILING_DECIMAL_POINT disabled -> 0.0009 => 9E-4
+   *
+   * If the mantissa is an integer and the EMIT_TRAILING_ZERO_AFTER_POINT flag is enabled
+   * it will add a '0' character at the end of the mantissa representation. Note that that
+   * flag depends on EMIT_TRAILING_DECIMAL_POINT flag be enabled.*/
+  if(length == 1){
+    if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
+      result_builder->AddCharacter('.');
+
+      if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
+          result_builder->AddCharacter('0');
+      }
+    }
+  } else {
     result_builder->AddCharacter('.');
     result_builder->AddSubstring(&decimal_digits[1], length-1);
   }
diff --git a/cpp/src/arrow/vendored/double-conversion/double-conversion.h b/cpp/src/arrow/vendored/double-conversion/double-conversion.h
index 6dbc0997c61..9dc3ebd8dfd 100644
--- a/cpp/src/arrow/vendored/double-conversion/double-conversion.h
+++ b/cpp/src/arrow/vendored/double-conversion/double-conversion.h
@@ -104,6 +104,17 @@ class DoubleToStringConverter {
   //   ToPrecision(230.0, 2) -> "230"
   //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
   //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  //
+  // When converting numbers to scientific notation representation, if the mantissa of
+  // the representation is an integer number, the EMIT_TRAILING_DECIMAL_POINT flag will
+  // add a '.' character at the end of the representation:
+  // - With EMIT_TRAILING_DECIMAL_POINT enabled -> 0.0009 => 9.E-4
+  // - With EMIT_TRAILING_DECIMAL_POINT disabled -> 0.0009 => 9E-4
+  //
+  // If the mantissa is an integer and the EMIT_TRAILING_ZERO_AFTER_POINT flag is enabled
+  // it will add a '0' character at the end of the mantissa representation. Note that that
+  // flag depends on EMIT_TRAILING_DECIMAL_POINT flag be enabled.
+  // - With EMIT_TRAILING_ZERO_AFTER_POINT enabled -> 0.0009 => 9.0E-4
   DoubleToStringConverter(int flags,
                           const char* infinity_symbol,
                           const char* nan_symbol,
diff --git a/cpp/src/arrow/vendored/pcg/README.md b/cpp/src/arrow/vendored/pcg/README.md
new file mode 100644
index 00000000000..bf72ea8973e
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/README.md
@@ -0,0 +1,26 @@
+<!--
+PCG Random Number Generation for C++
+
+Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+                    and the PCG Project contributors.
+
+SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+Licensed under the Apache License, Version 2.0 (provided in
+LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+or under the MIT license (provided in LICENSE-MIT.txt and at
+http://opensource.org/licenses/MIT), at your option. This file may not
+be copied, modified, or distributed except according to those terms.
+
+Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+express or implied.  See your chosen license for details.
+
+For additional information about the PCG random number generation scheme,
+visit http://www.pcg-random.org/.
+-->
+
+Sources are taken from git changeset ffd522e7188bef30a00c74dc7eb9de5faff90092
+(https://github.com/imneme/pcg-cpp).
+
+Changes:
+- enclosed in `arrow_vendored` namespace
diff --git a/cpp/src/arrow/vendored/pcg/pcg_extras.hpp b/cpp/src/arrow/vendored/pcg/pcg_extras.hpp
new file mode 100644
index 00000000000..760867e1ebe
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/pcg_extras.hpp
@@ -0,0 +1,670 @@
+/*
+ * PCG Random Number Generation for C++
+ *
+ * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
+ *                     and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied.  See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/*
+ * This file provides support code that is useful for random-number generation
+ * but not specific to the PCG generation scheme, including:
+ *      - 128-bit int support for platforms where it isn't available natively
+ *      - bit twiddling operations
+ *      - I/O of 128-bit and 8-bit integers
+ *      - Handling the evilness of SeedSeq
+ *      - Support for efficiently producing random numbers less than a given
+ *        bound
+ */
+
+#ifndef PCG_EXTRAS_HPP_INCLUDED
+#define PCG_EXTRAS_HPP_INCLUDED 1
+
+#include <cinttypes>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <type_traits>
+#include <utility>
+#include <locale>
+#include <iterator>
+
+#ifdef __GNUC__
+    #include <cxxabi.h>
+#endif
+
+/*
+ * Abstractions for compiler-specific directives
+ */
+
+#ifdef __GNUC__
+    #define PCG_NOINLINE __attribute__((noinline))
+#else
+    #define PCG_NOINLINE
+#endif
+
+/*
+ * Some members of the PCG library use 128-bit math.  When compiling on 64-bit
+ * platforms, both GCC and Clang provide 128-bit integer types that are ideal
+ * for the job.
+ *
+ * On 32-bit platforms (or with other compilers), we fall back to a C++
+ * class that provides 128-bit unsigned integers instead.  It may seem
+ * like we're reinventing the wheel here, because libraries already exist
+ * that support large integers, but most existing libraries provide a very
+ * generic multiprecision code, but here we're operating at a fixed size.
+ * Also, most other libraries are fairly heavyweight.  So we use a direct
+ * implementation.  Sadly, it's much slower than hand-coded assembly or
+ * direct CPU support.
+ *
+ */
+#if __SIZEOF_INT128__ && !PCG_FORCE_EMULATED_128BIT_MATH
+    namespace arrow_vendored {
+    namespace pcg_extras {
+        typedef __uint128_t pcg128_t;
+    }
+    }
+    #define PCG_128BIT_CONSTANT(high,low) \
+            ((pcg_extras::pcg128_t(high) << 64) + low)
+#else
+    #include "pcg_uint128.hpp"
+    namespace arrow_vendored {
+    namespace pcg_extras {
+        typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
+    }
+    }
+    #define PCG_128BIT_CONSTANT(high,low) \
+            pcg_extras::pcg128_t(high,low)
+    #define PCG_EMULATED_128BIT_MATH 1
+#endif
+
+
+namespace arrow_vendored {
+namespace pcg_extras {
+
+/*
+ * We often need to represent a "number of bits".  When used normally, these
+ * numbers are never greater than 128, so an unsigned char is plenty.
+ * If you're using a nonstandard generator of a larger size, you can set
+ * PCG_BITCOUNT_T to have it define it as a larger size.  (Some compilers
+ * might produce faster code if you set it to an unsigned int.)
+ */
+
+#ifndef PCG_BITCOUNT_T
+    typedef uint8_t bitcount_t;
+#else
+    typedef PCG_BITCOUNT_T bitcount_t;
+#endif
+
+/*
+ * C++ requires us to be able to serialize RNG state by printing or reading
+ * it from a stream.  Because we use 128-bit ints, we also need to be able
+ * ot print them, so here is code to do so.
+ *
+ * This code provides enough functionality to print 128-bit ints in decimal
+ * and zero-padded in hex.  It's not a full-featured implementation.
+ */
+
+template <typename CharT, typename Traits>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
+{
+    auto desired_base = out.flags() & out.basefield;
+    bool want_hex = desired_base == out.hex;
+
+    if (want_hex) {
+        uint64_t highpart = uint64_t(value >> 64);
+        uint64_t lowpart  = uint64_t(value);
+        auto desired_width = out.width();
+        if (desired_width > 16) {
+            out.width(desired_width - 16);
+        }
+        if (highpart != 0 || desired_width > 16)
+            out << highpart;
+        CharT oldfill = '\0';
+        if (highpart != 0) {
+            out.width(16);
+            oldfill = out.fill('0');
+        }
+        auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
+        out << lowpart;
+        out.setf(oldflags);
+        if (highpart != 0) {
+            out.fill(oldfill);
+        }
+        return out;
+    }
+    constexpr size_t MAX_CHARS_128BIT = 40;
+
+    char buffer[MAX_CHARS_128BIT];
+    char* pos = buffer+sizeof(buffer);
+    *(--pos) = '\0';
+    constexpr auto BASE = pcg128_t(10ULL);
+    do {
+        auto div = value / BASE;
+        auto mod = uint32_t(value - (div * BASE));
+        *(--pos) = '0' + char(mod);
+        value = div;
+    } while(value != pcg128_t(0ULL));
+    return out << pos;
+}
+
+template <typename CharT, typename Traits>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
+{
+    typename std::basic_istream<CharT,Traits>::sentry s(in);
+
+    if (!s)
+         return in;
+
+    constexpr auto BASE = pcg128_t(10ULL);
+    pcg128_t current(0ULL);
+    bool did_nothing = true;
+    bool overflow = false;
+    for(;;) {
+        CharT wide_ch = in.get();
+        if (!in.good())
+            break;
+        auto ch = in.narrow(wide_ch, '\0');
+        if (ch < '0' || ch > '9') {
+            in.unget();
+            break;
+        }
+        did_nothing = false;
+        pcg128_t digit(uint32_t(ch - '0'));
+        pcg128_t timesbase = current*BASE;
+        overflow = overflow || timesbase < current;
+        current = timesbase + digit;
+        overflow = overflow || current < digit;
+    }
+
+    if (did_nothing || overflow) {
+        in.setstate(std::ios::failbit);
+        if (overflow)
+            current = ~pcg128_t(0ULL);
+    }
+
+    value = current;
+
+    return in;
+}
+
+/*
+ * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
+ * If we just used the provided IO operators, they'd read/write chars,
+ * not ints, so we need to define our own.  We *can* redefine this operator
+ * here because we're in our own namespace.
+ */
+
+template <typename CharT, typename Traits>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
+{
+    return out << uint32_t(value);
+}
+
+template <typename CharT, typename Traits>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in, uint8_t& target)
+{
+    uint32_t value = 0xdecea5edU;
+    in >> value;
+    if (!in && value == 0xdecea5edU)
+        return in;
+    if (value > uint8_t(~0)) {
+        in.setstate(std::ios::failbit);
+        value = ~0U;
+    }
+    target = uint8_t(value);
+    return in;
+}
+
+/* Unfortunately, the above functions don't get found in preference to the
+ * built in ones, so we create some more specific overloads that will.
+ * Ugh.
+ */
+
+inline std::ostream& operator<<(std::ostream& out, uint8_t value)
+{
+    return pcg_extras::operator<< <char>(out, value);
+}
+
+inline std::istream& operator>>(std::istream& in, uint8_t& value)
+{
+    return pcg_extras::operator>> <char>(in, value);
+}
+
+
+
+/*
+ * Useful bitwise operations.
+ */
+
+/*
+ * XorShifts are invertable, but they are someting of a pain to invert.
+ * This function backs them out.  It's used by the whacky "inside out"
+ * generator defined later.
+ */
+
+template <typename itype>
+inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
+{
+    if (2*shift >= bits) {
+        return x ^ (x >> shift);
+    }
+    itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
+    itype highmask1 = ~lowmask1;
+    itype top1 = x;
+    itype bottom1 = x & lowmask1;
+    top1 ^= top1 >> shift;
+    top1 &= highmask1;
+    x = top1 | bottom1;
+    itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
+    itype bottom2 = x & lowmask2;
+    bottom2 = unxorshift(bottom2, bits - shift, shift);
+    bottom2 &= lowmask1;
+    return top1 | bottom2;
+}
+
+/*
+ * Rotate left and right.
+ *
+ * In ideal world, compilers would spot idiomatic rotate code and convert it
+ * to a rotate instruction.  Of course, opinions vary on what the correct
+ * idiom is and how to spot it.  For clang, sometimes it generates better
+ * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
+ */
+
+template <typename itype>
+inline itype rotl(itype value, bitcount_t rot)
+{
+    constexpr bitcount_t bits = sizeof(itype) * 8;
+    constexpr bitcount_t mask = bits - 1;
+#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
+    return rot ? (value << rot) | (value >> (bits - rot)) : value;
+#else
+    return (value << rot) | (value >> ((- rot) & mask));
+#endif
+}
+
+template <typename itype>
+inline itype rotr(itype value, bitcount_t rot)
+{
+    constexpr bitcount_t bits = sizeof(itype) * 8;
+    constexpr bitcount_t mask = bits - 1;
+#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
+    return rot ? (value >> rot) | (value << (bits - rot)) : value;
+#else
+    return (value >> rot) | (value << ((- rot) & mask));
+#endif
+}
+
+/* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
+ * to properly recognizing idiomatic rotate code, so for we also provide
+ * assembler directives (enabled with PCG_USE_INLINE_ASM).  Boo, hiss.
+ * (I hope that these compilers get better so that this code can die.)
+ *
+ * These overloads will be preferred over the general template code above.
+ */
+#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__  || __i386__)
+
+inline uint8_t rotr(uint8_t value, bitcount_t rot)
+{
+    asm ("rorb   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+
+inline uint16_t rotr(uint16_t value, bitcount_t rot)
+{
+    asm ("rorw   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+
+inline uint32_t rotr(uint32_t value, bitcount_t rot)
+{
+    asm ("rorl   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+
+#if __x86_64__
+inline uint64_t rotr(uint64_t value, bitcount_t rot)
+{
+    asm ("rorq   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+#endif // __x86_64__
+
+#elif defined(_MSC_VER)
+  // Use MSVC++ bit rotation intrinsics
+
+#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16)
+
+inline uint8_t rotr(uint8_t value, bitcount_t rot)
+{
+    return _rotr8(value, rot);
+}
+
+inline uint16_t rotr(uint16_t value, bitcount_t rot)
+{
+    return _rotr16(value, rot);
+}
+
+inline uint32_t rotr(uint32_t value, bitcount_t rot)
+{
+    return _rotr(value, rot);
+}
+
+inline uint64_t rotr(uint64_t value, bitcount_t rot)
+{
+    return _rotr64(value, rot);
+}
+
+#endif // PCG_USE_INLINE_ASM
+
+
+/*
+ * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
+ * 32-bit integers with seed data, but sometimes we want to produce
+ * larger or smaller integers.
+ *
+ * The following code handles this annoyance.
+ *
+ * uneven_copy will copy an array of 32-bit ints to an array of larger or
+ * smaller ints (actually, the code is general it only needing forward
+ * iterators).  The copy is identical to the one that would be performed if
+ * we just did memcpy on a standard little-endian machine, but works
+ * regardless of the endian of the machine (or the weirdness of the ints
+ * involved).
+ *
+ * generate_to initializes an array of integers using a SeedSeq
+ * object.  It is given the size as a static constant at compile time and
+ * tries to avoid memory allocation.  If we're filling in 32-bit constants
+ * we just do it directly.  If we need a separate buffer and it's small,
+ * we allocate it on the stack.  Otherwise, we fall back to heap allocation.
+ * Ugh.
+ *
+ * generate_one produces a single value of some integral type using a
+ * SeedSeq object.
+ */
+
+ /* uneven_copy helper, case where destination ints are less than 32 bit. */
+
+template<class SrcIter, class DestIter>
+SrcIter uneven_copy_impl(
+    SrcIter src_first, DestIter dest_first, DestIter dest_last,
+    std::true_type)
+{
+    typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+
+    constexpr bitcount_t SRC_SIZE  = sizeof(src_t);
+    constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
+    constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
+    constexpr bitcount_t SCALE     = SRC_SIZE / DEST_SIZE;
+
+    size_t count = 0;
+    src_t value = 0;
+
+    while (dest_first != dest_last) {
+        if ((count++ % SCALE) == 0)
+            value = *src_first++;       // Get more bits
+        else
+            value >>= DEST_BITS;        // Move down bits
+
+        *dest_first++ = dest_t(value);  // Truncates, ignores high bits.
+    }
+    return src_first;
+}
+
+ /* uneven_copy helper, case where destination ints are more than 32 bit. */
+
+template<class SrcIter, class DestIter>
+SrcIter uneven_copy_impl(
+    SrcIter src_first, DestIter dest_first, DestIter dest_last,
+    std::false_type)
+{
+    typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+
+    constexpr auto SRC_SIZE  = sizeof(src_t);
+    constexpr auto SRC_BITS  = SRC_SIZE * 8;
+    constexpr auto DEST_SIZE = sizeof(dest_t);
+    constexpr auto SCALE     = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
+
+    while (dest_first != dest_last) {
+        dest_t value(0UL);
+        unsigned int shift = 0;
+
+        for (size_t i = 0; i < SCALE; ++i) {
+            value |= dest_t(*src_first++) << shift;
+            shift += SRC_BITS;
+        }
+
+        *dest_first++ = value;
+    }
+    return src_first;
+}
+
+/* uneven_copy, call the right code for larger vs. smaller */
+
+template<class SrcIter, class DestIter>
+inline SrcIter uneven_copy(SrcIter src_first,
+                           DestIter dest_first, DestIter dest_last)
+{
+    typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+
+    constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
+
+    return uneven_copy_impl(src_first, dest_first, dest_last,
+                            std::integral_constant<bool, DEST_IS_SMALLER>{});
+}
+
+/* generate_to, fill in a fixed-size array of integral type using a SeedSeq
+ * (actually works for any random-access iterator)
+ */
+
+template <size_t size, typename SeedSeq, typename DestIter>
+inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
+                             std::true_type)
+{
+    generator.generate(dest, dest+size);
+}
+
+template <size_t size, typename SeedSeq, typename DestIter>
+void generate_to_impl(SeedSeq&& generator, DestIter dest,
+                      std::false_type)
+{
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+    constexpr auto DEST_SIZE = sizeof(dest_t);
+    constexpr auto GEN_SIZE  = sizeof(uint32_t);
+
+    constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
+    constexpr size_t FROM_ELEMS =
+        GEN_IS_SMALLER
+            ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
+            : (size + (GEN_SIZE / DEST_SIZE) - 1)
+                / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
+                        //  this odd code ^^^^^^^^^^^^^^^^^ is work-around for
+                        //  a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
+
+    if (FROM_ELEMS <= 1024) {
+        uint32_t buffer[FROM_ELEMS];
+        generator.generate(buffer, buffer+FROM_ELEMS);
+        uneven_copy(buffer, dest, dest+size);
+    } else {
+        uint32_t* buffer = static_cast<uint32_t*>(malloc(GEN_SIZE * FROM_ELEMS));
+        generator.generate(buffer, buffer+FROM_ELEMS);
+        uneven_copy(buffer, dest, dest+size);
+        free(static_cast<void*>(buffer));
+    }
+}
+
+template <size_t size, typename SeedSeq, typename DestIter>
+inline void generate_to(SeedSeq&& generator, DestIter dest)
+{
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+    constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
+
+    generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
+                           std::integral_constant<bool, IS_32BIT>{});
+}
+
+/* generate_one, produce a value of integral type using a SeedSeq
+ * (optionally, we can have it produce more than one and pick which one
+ * we want)
+ */
+
+template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
+inline UInt generate_one(SeedSeq&& generator)
+{
+    UInt result[N];
+    generate_to<N>(std::forward<SeedSeq>(generator), result);
+    return result[i];
+}
+
+template <typename RngType>
+auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
+        -> typename RngType::result_type
+{
+    typedef typename RngType::result_type rtype;
+    rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
+                    % upper_bound;
+    for (;;) {
+        rtype r = rng() - RngType::min();
+        if (r >= threshold)
+            return r % upper_bound;
+    }
+}
+
+template <typename Iter, typename RandType>
+void shuffle(Iter from, Iter to, RandType&& rng)
+{
+    typedef typename std::iterator_traits<Iter>::difference_type delta_t;
+    typedef typename std::remove_reference<RandType>::type::result_type result_t;
+    auto count = to - from;
+    while (count > 1) {
+        delta_t chosen = delta_t(bounded_rand(rng, result_t(count)));
+        --count;
+        --to;
+        using std::swap;
+        swap(*(from + chosen), *to);
+    }
+}
+
+/*
+ * Although std::seed_seq is useful, it isn't everything.  Often we want to
+ * initialize a random-number generator some other way, such as from a random
+ * device.
+ *
+ * Technically, it does not meet the requirements of a SeedSequence because
+ * it lacks some of the rarely-used member functions (some of which would
+ * be impossible to provide).  However the C++ standard is quite specific
+ * that actual engines only called the generate method, so it ought not to be
+ * a problem in practice.
+ */
+
+template <typename RngType>
+class seed_seq_from {
+private:
+    RngType rng_;
+
+    typedef uint_least32_t result_type;
+
+public:
+    template<typename... Args>
+    seed_seq_from(Args&&... args) :
+        rng_(std::forward<Args>(args)...)
+    {
+        // Nothing (else) to do...
+    }
+
+    template<typename Iter>
+    void generate(Iter start, Iter finish)
+    {
+        for (auto i = start; i != finish; ++i)
+            *i = result_type(rng_());
+    }
+
+    constexpr size_t size() const
+    {
+        return (sizeof(typename RngType::result_type) > sizeof(result_type)
+                && RngType::max() > ~size_t(0UL))
+             ? ~size_t(0UL)
+             : size_t(RngType::max());
+    }
+};
+
+/*
+ * Sometimes you might want a distinct seed based on when the program
+ * was compiled.  That way, a particular instance of the program will
+ * behave the same way, but when recompiled it'll produce a different
+ * value.
+ */
+
+template <typename IntType>
+struct static_arbitrary_seed {
+private:
+    static constexpr IntType fnv(IntType hash, const char* pos) {
+        return *pos == '\0'
+             ? hash
+             : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1));
+    }
+
+public:
+    static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)),
+                        __DATE__ __TIME__ __FILE__);
+};
+
+// Sometimes, when debugging or testing, it's handy to be able print the name
+// of a (in human-readable form).  This code allows the idiom:
+//
+//      cout << printable_typename<my_foo_type_t>()
+//
+// to print out my_foo_type_t (or its concrete type if it is a synonym)
+
+#if __cpp_rtti || __GXX_RTTI
+
+template <typename T>
+struct printable_typename {};
+
+template <typename T>
+std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
+    const char *implementation_typename = typeid(T).name();
+#ifdef __GNUC__
+    int status;
+    char* pretty_name =
+        abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status);
+    if (status == 0)
+        out << pretty_name;
+    free(static_cast<void*>(pretty_name));
+    if (status == 0)
+        return out;
+#endif
+    out << implementation_typename;
+    return out;
+}
+
+#endif  // __cpp_rtti || __GXX_RTTI
+
+} // namespace pcg_extras
+} // namespace arrow_vendored
+
+#endif // PCG_EXTRAS_HPP_INCLUDED
diff --git a/cpp/src/arrow/vendored/pcg/pcg_random.hpp b/cpp/src/arrow/vendored/pcg/pcg_random.hpp
new file mode 100644
index 00000000000..a864ba0a2c5
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/pcg_random.hpp
@@ -0,0 +1,1954 @@
+/*
+ * PCG Random Number Generation for C++
+ *
+ * Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+ *                     and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied.  See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/*
+ * This code provides the reference implementation of the PCG family of
+ * random number generators.  The code is complex because it implements
+ *
+ *      - several members of the PCG family, specifically members corresponding
+ *        to the output functions:
+ *             - XSH RR         (good for 64-bit state, 32-bit output)
+ *             - XSH RS         (good for 64-bit state, 32-bit output)
+ *             - XSL RR         (good for 128-bit state, 64-bit output)
+ *             - RXS M XS       (statistically most powerful generator)
+ *             - XSL RR RR      (good for 128-bit state, 128-bit output)
+ *             - and RXS, RXS M, XSH, XSL       (mostly for testing)
+ *      - at potentially *arbitrary* bit sizes
+ *      - with four different techniques for random streams (MCG, one-stream
+ *        LCG, settable-stream LCG, unique-stream LCG)
+ *      - and the extended generation schemes allowing arbitrary periods
+ *      - with all features of C++11 random number generation (and more),
+ *        some of which are somewhat painful, including
+ *            - initializing with a SeedSequence which writes 32-bit values
+ *              to memory, even though the state of the generator may not
+ *              use 32-bit values (it might use smaller or larger integers)
+ *            - I/O for RNGs and a prescribed format, which needs to handle
+ *              the issue that 8-bit and 128-bit integers don't have working
+ *              I/O routines (e.g., normally 8-bit = char, not integer)
+ *            - equality and inequality for RNGs
+ *      - and a number of convenience typedefs to mask all the complexity
+ *
+ * The code employes a fairly heavy level of abstraction, and has to deal
+ * with various C++ minutia.  If you're looking to learn about how the PCG
+ * scheme works, you're probably best of starting with one of the other
+ * codebases (see www.pcg-random.org).  But if you're curious about the
+ * constants for the various output functions used in those other, simpler,
+ * codebases, this code shows how they are calculated.
+ *
+ * On the positive side, at least there are convenience typedefs so that you
+ * can say
+ *
+ *      pcg32 myRNG;
+ *
+ * rather than:
+ *
+ *      pcg_detail::engine<
+ *          uint32_t,                                           // Output Type
+ *          uint64_t,                                           // State Type
+ *          pcg_detail::xsh_rr_mixin<uint32_t, uint64_t>, true, // Output Func
+ *          pcg_detail::specific_stream<uint64_t>,              // Stream Kind
+ *          pcg_detail::default_multiplier<uint64_t>            // LCG Mult
+ *      > myRNG;
+ *
+ */
+
+#ifndef PCG_RAND_HPP_INCLUDED
+#define PCG_RAND_HPP_INCLUDED 1
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <iterator>
+#include <type_traits>
+#include <utility>
+#include <locale>
+#include <new>
+#include <stdexcept>
+
+#ifdef _MSC_VER
+    #pragma warning(disable:4146)
+#endif
+
+#ifdef _MSC_VER
+    #define PCG_ALWAYS_INLINE __forceinline
+#elif __GNUC__
+    #define PCG_ALWAYS_INLINE __attribute__((always_inline))
+#else
+    #define PCG_ALWAYS_INLINE inline
+#endif
+
+/*
+ * The pcg_extras namespace contains some support code that is likley to
+ * be useful for a variety of RNGs, including:
+ *      - 128-bit int support for platforms where it isn't available natively
+ *      - bit twiddling operations
+ *      - I/O of 128-bit and 8-bit integers
+ *      - Handling the evilness of SeedSeq
+ *      - Support for efficiently producing random numbers less than a given
+ *        bound
+ */
+
+#include "pcg_extras.hpp"
+
+namespace arrow_vendored {
+namespace pcg_detail {
+
+using namespace pcg_extras;
+
+/*
+ * The LCG generators need some constants to function.  This code lets you
+ * look up the constant by *type*.  For example
+ *
+ *      default_multiplier<uint32_t>::multiplier()
+ *
+ * gives you the default multipler for 32-bit integers.  We use the name
+ * of the constant and not a generic word like value to allow these classes
+ * to be used as mixins.
+ */
+
+template <typename T>
+struct default_multiplier {
+    // Not defined for an arbitrary type
+};
+
+template <typename T>
+struct default_increment {
+    // Not defined for an arbitrary type
+};
+
+#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \
+        template <>                                     \
+        struct what ## _ ## kind<type> {                \
+            static constexpr type kind() {              \
+                return constant;                        \
+            }                                           \
+        };
+
+PCG_DEFINE_CONSTANT(uint8_t,  default, multiplier, 141U)
+PCG_DEFINE_CONSTANT(uint8_t,  default, increment,  77U)
+
+PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U)
+PCG_DEFINE_CONSTANT(uint16_t, default, increment,  47989U)
+
+PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U)
+PCG_DEFINE_CONSTANT(uint32_t, default, increment,  2891336453U)
+
+PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL)
+PCG_DEFINE_CONSTANT(uint64_t, default, increment,  1442695040888963407ULL)
+
+PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier,
+        PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL))
+PCG_DEFINE_CONSTANT(pcg128_t, default, increment,
+        PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL))
+
+/* Alternative (cheaper) multipliers for 128-bit */
+
+template <typename T>
+struct cheap_multiplier : public default_multiplier<T> {
+    // For most types just use the default.
+};
+
+template <>
+struct cheap_multiplier<pcg128_t> {
+    static constexpr uint64_t multiplier() {
+        return 0xda942042e4dd58b5ULL;
+    }
+};
+
+
+/*
+ * Each PCG generator is available in four variants, based on how it applies
+ * the additive constant for its underlying LCG; the variations are:
+ *
+ *     single stream   - all instances use the same fixed constant, thus
+ *                       the RNG always somewhere in same sequence
+ *     mcg             - adds zero, resulting in a single stream and reduced
+ *                       period
+ *     specific stream - the constant can be changed at any time, selecting
+ *                       a different random sequence
+ *     unique stream   - the constant is based on the memory address of the
+ *                       object, thus every RNG has its own unique sequence
+ *
+ * This variation is provided though mixin classes which define a function
+ * value called increment() that returns the nesessary additive constant.
+ */
+
+
+
+/*
+ * unique stream
+ */
+
+
+template <typename itype>
+class unique_stream {
+protected:
+    static constexpr bool is_mcg = false;
+
+    // Is never called, but is provided for symmetry with specific_stream
+    void set_stream(...)
+    {
+        abort();
+    }
+
+public:
+    typedef itype state_type;
+
+    constexpr itype increment() const {
+        return itype(reinterpret_cast<uintptr_t>(this) | 1);
+    }
+
+    constexpr itype stream() const
+    {
+         return increment() >> 1;
+    }
+
+    static constexpr bool can_specify_stream = false;
+
+    static constexpr size_t streams_pow2()
+    {
+        return (sizeof(itype) < sizeof(size_t) ? sizeof(itype)
+                                               : sizeof(size_t))*8 - 1u;
+    }
+
+protected:
+    constexpr unique_stream() = default;
+};
+
+
+/*
+ * no stream (mcg)
+ */
+
+template <typename itype>
+class no_stream {
+protected:
+    static constexpr bool is_mcg = true;
+
+    // Is never called, but is provided for symmetry with specific_stream
+    void set_stream(...)
+    {
+        abort();
+    }
+
+public:
+    typedef itype state_type;
+
+    static constexpr itype increment() {
+        return 0;
+    }
+
+    static constexpr bool can_specify_stream = false;
+
+    static constexpr size_t streams_pow2()
+    {
+        return 0u;
+    }
+
+protected:
+    constexpr no_stream() = default;
+};
+
+
+/*
+ * single stream/sequence (oneseq)
+ */
+
+template <typename itype>
+class oneseq_stream : public default_increment<itype> {
+protected:
+    static constexpr bool is_mcg = false;
+
+    // Is never called, but is provided for symmetry with specific_stream
+    void set_stream(...)
+    {
+        abort();
+    }
+
+public:
+    typedef itype state_type;
+
+    static constexpr itype stream()
+    {
+         return default_increment<itype>::increment() >> 1;
+    }
+
+    static constexpr bool can_specify_stream = false;
+
+    static constexpr size_t streams_pow2()
+    {
+        return 0u;
+    }
+
+protected:
+    constexpr oneseq_stream() = default;
+};
+
+
+/*
+ * specific stream
+ */
+
+template <typename itype>
+class specific_stream {
+protected:
+    static constexpr bool is_mcg = false;
+
+    itype inc_ = default_increment<itype>::increment();
+
+public:
+    typedef itype state_type;
+    typedef itype stream_state;
+
+    constexpr itype increment() const {
+        return inc_;
+    }
+
+    itype stream()
+    {
+         return inc_ >> 1;
+    }
+
+    void set_stream(itype specific_seq)
+    {
+         inc_ = (specific_seq << 1) | 1;
+    }
+
+    static constexpr bool can_specify_stream = true;
+
+    static constexpr size_t streams_pow2()
+    {
+        return (sizeof(itype)*8) - 1u;
+    }
+
+protected:
+    specific_stream() = default;
+
+    specific_stream(itype specific_seq)
+        : inc_(itype(specific_seq << 1) | itype(1U))
+    {
+        // Nothing (else) to do.
+    }
+};
+
+
+/*
+ * This is where it all comes together.  This function joins together three
+ * mixin classes which define
+ *    - the LCG additive constant (the stream)
+ *    - the LCG multiplier
+ *    - the output function
+ * in addition, we specify the type of the LCG state, and the result type,
+ * and whether to use the pre-advance version of the state for the output
+ * (increasing instruction-level parallelism) or the post-advance version
+ * (reducing register pressure).
+ *
+ * Given the high level of parameterization, the code has to use some
+ * template-metaprogramming tricks to handle some of the suble variations
+ * involved.
+ */
+
+template <typename xtype, typename itype,
+          typename output_mixin,
+          bool output_previous = true,
+          typename stream_mixin = oneseq_stream<itype>,
+          typename multiplier_mixin = default_multiplier<itype> >
+class engine : protected output_mixin,
+               public stream_mixin,
+               protected multiplier_mixin {
+protected:
+    itype state_;
+
+    struct can_specify_stream_tag {};
+    struct no_specifiable_stream_tag {};
+
+    using stream_mixin::increment;
+    using multiplier_mixin::multiplier;
+
+public:
+    typedef xtype result_type;
+    typedef itype state_type;
+
+    static constexpr size_t period_pow2()
+    {
+        return sizeof(state_type)*8 - 2*stream_mixin::is_mcg;
+    }
+
+    // It would be nice to use std::numeric_limits for these, but
+    // we can't be sure that it'd be defined for the 128-bit types.
+
+    static constexpr result_type min()
+    {
+        return result_type(0UL);
+    }
+
+    static constexpr result_type max()
+    {
+        return result_type(~result_type(0UL));
+    }
+
+protected:
+    itype bump(itype state)
+    {
+        return state * multiplier() + increment();
+    }
+
+    itype base_generate()
+    {
+        return state_ = bump(state_);
+    }
+
+    itype base_generate0()
+    {
+        itype old_state = state_;
+        state_ = bump(state_);
+        return old_state;
+    }
+
+public:
+    result_type operator()()
+    {
+        if (output_previous)
+            return this->output(base_generate0());
+        else
+            return this->output(base_generate());
+    }
+
+    result_type operator()(result_type upper_bound)
+    {
+        return bounded_rand(*this, upper_bound);
+    }
+
+protected:
+    static itype advance(itype state, itype delta,
+                         itype cur_mult, itype cur_plus);
+
+    static itype distance(itype cur_state, itype newstate, itype cur_mult,
+                          itype cur_plus, itype mask = ~itype(0U));
+
+    itype distance(itype newstate, itype mask = itype(~itype(0U))) const
+    {
+        return distance(state_, newstate, multiplier(), increment(), mask);
+    }
+
+public:
+    void advance(itype delta)
+    {
+        state_ = advance(state_, delta, this->multiplier(), this->increment());
+    }
+
+    void backstep(itype delta)
+    {
+        advance(-delta);
+    }
+
+    void discard(itype delta)
+    {
+        advance(delta);
+    }
+
+    bool wrapped()
+    {
+        if (stream_mixin::is_mcg) {
+            // For MCGs, the low order two bits never change. In this
+            // implementation, we keep them fixed at 3 to make this test
+            // easier.
+            return state_ == 3;
+        } else {
+            return state_ == 0;
+        }
+    }
+
+    engine(itype state = itype(0xcafef00dd15ea5e5ULL))
+        : state_(this->is_mcg ? state|state_type(3U)
+                              : bump(state + this->increment()))
+    {
+        // Nothing else to do.
+    }
+
+    // This function may or may not exist.  It thus has to be a template
+    // to use SFINAE; users don't have to worry about its template-ness.
+
+    template <typename sm = stream_mixin>
+    engine(itype state, typename sm::stream_state stream_seed)
+        : stream_mixin(stream_seed),
+          state_(this->is_mcg ? state|state_type(3U)
+                              : bump(state + this->increment()))
+    {
+        // Nothing else to do.
+    }
+
+    template<typename SeedSeq>
+    engine(SeedSeq&& seedSeq, typename std::enable_if<
+                  !stream_mixin::can_specify_stream
+               && !std::is_convertible<SeedSeq, itype>::value
+               && !std::is_convertible<SeedSeq, engine>::value,
+               no_specifiable_stream_tag>::type = {})
+        : engine(generate_one<itype>(std::forward<SeedSeq>(seedSeq)))
+    {
+        // Nothing else to do.
+    }
+
+    template<typename SeedSeq>
+    engine(SeedSeq&& seedSeq, typename std::enable_if<
+                   stream_mixin::can_specify_stream
+               && !std::is_convertible<SeedSeq, itype>::value
+               && !std::is_convertible<SeedSeq, engine>::value,
+        can_specify_stream_tag>::type = {})
+    {
+        itype seeddata[2];
+        generate_to<2>(std::forward<SeedSeq>(seedSeq), seeddata);
+        seed(seeddata[1], seeddata[0]);
+    }
+
+
+    template<typename... Args>
+    void seed(Args&&... args)
+    {
+        new (this) engine(std::forward<Args>(args)...);
+    }
+
+    template <typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+              typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+    friend bool operator==(const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_lhs, multiplier_mixin_lhs>&,
+                           const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_rhs, multiplier_mixin_rhs>&);
+
+    template <typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+              typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+    friend itype1 operator-(const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_lhs, multiplier_mixin_lhs>&,
+                            const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_rhs, multiplier_mixin_rhs>&);
+
+    template <typename CharT, typename Traits,
+              typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin1, typename multiplier_mixin1>
+    friend std::basic_ostream<CharT,Traits>&
+    operator<<(std::basic_ostream<CharT,Traits>& out,
+               const engine<xtype1,itype1,
+                              output_mixin1,output_previous1,
+                              stream_mixin1, multiplier_mixin1>&);
+
+    template <typename CharT, typename Traits,
+              typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin1, typename multiplier_mixin1>
+    friend std::basic_istream<CharT,Traits>&
+    operator>>(std::basic_istream<CharT,Traits>& in,
+               engine<xtype1, itype1,
+                        output_mixin1, output_previous1,
+                        stream_mixin1, multiplier_mixin1>& rng);
+};
+
+template <typename CharT, typename Traits,
+          typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>& out,
+           const engine<xtype,itype,
+                          output_mixin,output_previous,
+                          stream_mixin, multiplier_mixin>& rng)
+{
+    using pcg_extras::operator<<;
+
+    auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left);
+    auto space = out.widen(' ');
+    auto orig_fill = out.fill();
+
+    out << rng.multiplier() << space
+        << rng.increment() << space
+        << rng.state_;
+
+    out.flags(orig_flags);
+    out.fill(orig_fill);
+    return out;
+}
+
+
+template <typename CharT, typename Traits,
+          typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in,
+           engine<xtype,itype,
+                    output_mixin,output_previous,
+                    stream_mixin, multiplier_mixin>& rng)
+{
+    using pcg_extras::operator>>;
+
+    auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws);
+
+    itype multiplier, increment, state;
+    in >> multiplier >> increment >> state;
+
+    if (!in.fail()) {
+        bool good = true;
+        if (multiplier != rng.multiplier()) {
+           good = false;
+        } else if (rng.can_specify_stream) {
+           rng.set_stream(increment >> 1);
+        } else if (increment != rng.increment()) {
+           good = false;
+        }
+        if (good) {
+            rng.state_ = state;
+        } else {
+            in.clear(std::ios::failbit);
+        }
+    }
+
+    in.flags(orig_flags);
+    return in;
+}
+
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
+             multiplier_mixin>::advance(
+    itype state, itype delta, itype cur_mult, itype cur_plus)
+{
+    // The method used here is based on Brown, "Random Number Generation
+    // with Arbitrary Stride,", Transactions of the American Nuclear
+    // Society (Nov. 1994).  The algorithm is very similar to fast
+    // exponentiation.
+    //
+    // Even though delta is an unsigned integer, we can pass a
+    // signed integer to go backwards, it just goes "the long way round".
+
+    constexpr itype ZERO = 0u;  // itype may be a non-trivial types, so
+    constexpr itype ONE  = 1u;  // we define some ugly constants.
+    itype acc_mult = 1;
+    itype acc_plus = 0;
+    while (delta > ZERO) {
+       if (delta & ONE) {
+          acc_mult *= cur_mult;
+          acc_plus = acc_plus*cur_mult + cur_plus;
+       }
+       cur_plus = (cur_mult+ONE)*cur_plus;
+       cur_mult *= cur_mult;
+       delta >>= 1;
+    }
+    return acc_mult * state + acc_plus;
+}
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
+               multiplier_mixin>::distance(
+    itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask)
+{
+    constexpr itype ONE  = 1u;  // itype could be weird, so use constant
+    bool is_mcg = cur_plus == itype(0);
+    itype the_bit = is_mcg ? itype(4u) : itype(1u);
+    itype distance = 0u;
+    while ((cur_state & mask) != (newstate & mask)) {
+       if ((cur_state & the_bit) != (newstate & the_bit)) {
+           cur_state = cur_state * cur_mult + cur_plus;
+           distance |= the_bit;
+       }
+       assert((cur_state & the_bit) == (newstate & the_bit));
+       the_bit <<= 1;
+       cur_plus = (cur_mult+ONE)*cur_plus;
+       cur_mult *= cur_mult;
+    }
+    return is_mcg ? distance >> 2 : distance;
+}
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+          typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+itype operator-(const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
+               const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
+{
+    static_assert(
+        std::is_same<stream_mixin_lhs, stream_mixin_rhs>::value &&
+            std::is_same<multiplier_mixin_lhs, multiplier_mixin_rhs>::value,
+        "Incomparable generators");
+    if (lhs.increment() == rhs.increment()) {
+       return rhs.distance(lhs.state_);
+    } else  {
+       constexpr itype ONE = 1u;
+       itype lhs_diff = lhs.increment() + (lhs.multiplier()-ONE) * lhs.state_;
+       itype rhs_diff = rhs.increment() + (rhs.multiplier()-ONE) * rhs.state_;
+       if ((lhs_diff & itype(3u)) != (rhs_diff & itype(3u))) {
+           rhs_diff = -rhs_diff;
+       }
+       return rhs.distance(rhs_diff, lhs_diff, rhs.multiplier(), itype(0u));
+    }
+}
+
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+          typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+bool operator==(const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
+                const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
+{
+    return    (lhs.multiplier() == rhs.multiplier())
+           && (lhs.increment()  == rhs.increment())
+           && (lhs.state_       == rhs.state_);
+}
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+          typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+inline bool operator!=(const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
+                       const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
+{
+    return !operator==(lhs,rhs);
+}
+
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using oneseq_base  = engine<xtype, itype,
+                        output_mixin<xtype, itype>, output_previous,
+                        oneseq_stream<itype>,
+                        multiplier_mixin<itype> >;
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using unique_base = engine<xtype, itype,
+                         output_mixin<xtype, itype>, output_previous,
+                         unique_stream<itype>,
+                         multiplier_mixin<itype> >;
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using setseq_base = engine<xtype, itype,
+                         output_mixin<xtype, itype>, output_previous,
+                         specific_stream<itype>,
+                         multiplier_mixin<itype> >;
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using mcg_base = engine<xtype, itype,
+                      output_mixin<xtype, itype>, output_previous,
+                      no_stream<itype>,
+                      multiplier_mixin<itype> >;
+
+/*
+ * OUTPUT FUNCTIONS.
+ *
+ * These are the core of the PCG generation scheme.  They specify how to
+ * turn the base LCG's internal state into the output value of the final
+ * generator.
+ *
+ * They're implemented as mixin classes.
+ *
+ * All of the classes have code that is written to allow it to be applied
+ * at *arbitrary* bit sizes, although in practice they'll only be used at
+ * standard sizes supported by C++.
+ */
+
+/*
+ * XSH RS -- high xorshift, followed by a random shift
+ *
+ * Fast.  A good performer.
+ */
+
+template <typename xtype, typename itype>
+struct xsh_rs_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t bits        = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t xtypebits   = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t sparebits   = bits - xtypebits;
+        constexpr bitcount_t opbits =
+                              sparebits-5 >= 64 ? 5
+                            : sparebits-4 >= 32 ? 4
+                            : sparebits-3 >= 16 ? 3
+                            : sparebits-2 >= 4  ? 2
+                            : sparebits-1 >= 1  ? 1
+                            :                     0;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t maxrandshift  = mask;
+        constexpr bitcount_t topspare     = opbits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift     = topspare + (xtypebits+maxrandshift)/2;
+        bitcount_t rshift =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        internal ^= internal >> xshift;
+        xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift));
+        return result;
+    }
+};
+
+/*
+ * XSH RR -- high xorshift, followed by a random rotate
+ *
+ * Fast.  A good performer.  Slightly better statistically than XSH RS.
+ */
+
+template <typename xtype, typename itype>
+struct xsh_rr_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t bits        = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t xtypebits   = bitcount_t(sizeof(xtype)*8);
+        constexpr bitcount_t sparebits   = bits - xtypebits;
+        constexpr bitcount_t wantedopbits =
+                              xtypebits >= 128 ? 7
+                            : xtypebits >=  64 ? 6
+                            : xtypebits >=  32 ? 5
+                            : xtypebits >=  16 ? 4
+                            :                    3;
+        constexpr bitcount_t opbits =
+                              sparebits >= wantedopbits ? wantedopbits
+                                                        : sparebits;
+        constexpr bitcount_t amplifier = wantedopbits - opbits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t topspare    = opbits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift      = (topspare + xtypebits)/2;
+        bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask
+                                : 0;
+        bitcount_t amprot = (rot << amplifier) & mask;
+        internal ^= internal >> xshift;
+        xtype result = xtype(internal >> bottomspare);
+        result = rotr(result, amprot);
+        return result;
+    }
+};
+
+/*
+ * RXS -- random xorshift
+ */
+
+template <typename xtype, typename itype>
+struct rxs_mixin {
+static xtype output_rxs(itype internal)
+    {
+        constexpr bitcount_t bits        = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t xtypebits   = bitcount_t(sizeof(xtype)*8);
+        constexpr bitcount_t shift       = bits - xtypebits;
+        constexpr bitcount_t extrashift  = (xtypebits - shift)/2;
+        bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63
+                       : shift > 32+4 ? (internal >> (bits - 5)) & 31
+                       : shift > 16+2 ? (internal >> (bits - 4)) & 15
+                       : shift >  8+1 ? (internal >> (bits - 3)) & 7
+                       : shift >  4+1 ? (internal >> (bits - 2)) & 3
+                       : shift >  2+1 ? (internal >> (bits - 1)) & 1
+                       :              0;
+        internal ^= internal >> (shift + extrashift - rshift);
+        xtype result = internal >> rshift;
+        return result;
+    }
+};
+
+/*
+ * RXS M XS -- random xorshift, mcg multiply, fixed xorshift
+ *
+ * The most statistically powerful generator, but all those steps
+ * make it slower than some of the others.  We give it the rottenest jobs.
+ *
+ * Because it's usually used in contexts where the state type and the
+ * result type are the same, it is a permutation and is thus invertable.
+ * We thus provide a function to invert it.  This function is used to
+ * for the "inside out" generator used by the extended generator.
+ */
+
+/* Defined type-based concepts for the multiplication step.  They're actually
+ * all derived by truncating the 128-bit, which was computed to be a good
+ * "universal" constant.
+ */
+
+template <typename T>
+struct mcg_multiplier {
+    // Not defined for an arbitrary type
+};
+
+template <typename T>
+struct mcg_unmultiplier {
+    // Not defined for an arbitrary type
+};
+
+PCG_DEFINE_CONSTANT(uint8_t,  mcg, multiplier,   217U)
+PCG_DEFINE_CONSTANT(uint8_t,  mcg, unmultiplier, 105U)
+
+PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier,   62169U)
+PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U)
+
+PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier,   277803737U)
+PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U)
+
+PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier,   12605985483714917081ULL)
+PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL)
+
+PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier,
+        PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL))
+PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier,
+        PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL))
+
+
+template <typename xtype, typename itype>
+struct rxs_m_xs_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t opbits = xtypebits >= 128 ? 6
+                                 : xtypebits >=  64 ? 5
+                                 : xtypebits >=  32 ? 4
+                                 : xtypebits >=  16 ? 3
+                                 :                    2;
+        constexpr bitcount_t shift = bits - xtypebits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        bitcount_t rshift =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        internal ^= internal >> (opbits + rshift);
+        internal *= mcg_multiplier<itype>::multiplier();
+        xtype result = internal >> shift;
+        result ^= result >> ((2U*xtypebits+2U)/3U);
+        return result;
+    }
+
+    static itype unoutput(itype internal)
+    {
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t opbits = bits >= 128 ? 6
+                                 : bits >=  64 ? 5
+                                 : bits >=  32 ? 4
+                                 : bits >=  16 ? 3
+                                 :               2;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+
+        internal = unxorshift(internal, bits, (2U*bits+2U)/3U);
+
+        internal *= mcg_unmultiplier<itype>::unmultiplier();
+
+        bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
+        internal = unxorshift(internal, bits, opbits + rshift);
+
+        return internal;
+    }
+};
+
+
+/*
+ * RXS M -- random xorshift, mcg multiply
+ */
+
+template <typename xtype, typename itype>
+struct rxs_m_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t opbits = xtypebits >= 128 ? 6
+                                 : xtypebits >=  64 ? 5
+                                 : xtypebits >=  32 ? 4
+                                 : xtypebits >=  16 ? 3
+                                 :                    2;
+        constexpr bitcount_t shift = bits - xtypebits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
+        internal ^= internal >> (opbits + rshift);
+        internal *= mcg_multiplier<itype>::multiplier();
+        xtype result = internal >> shift;
+        return result;
+    }
+};
+
+
+/*
+ * DXSM -- double xorshift multiply
+ *
+ * This is a new, more powerful output permutation (added in 2019).  It's
+ * a more comprehensive scrambling than RXS M, but runs faster on 128-bit
+ * types.  Although primarily intended for use at large sizes, also works
+ * at smaller sizes as well.
+ *
+ * This permutation is similar to xorshift multiply hash functions, except
+ * that one of the multipliers is the LCG multiplier (to avoid needing to
+ * have a second constant) and the other is based on the low-order bits.
+ * This latter aspect means that the scrambling applied to the high bits
+ * depends on the low bits, and makes it (to my eye) impractical to back
+ * out the permutation without having the low-order bits.
+ */
+
+template <typename xtype, typename itype>
+struct dxsm_mixin {
+    inline xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t itypebits = bitcount_t(sizeof(itype) * 8);
+        static_assert(xtypebits <= itypebits/2,
+                      "Output type must be half the size of the state type.");
+        
+        xtype hi = xtype(internal >> (itypebits - xtypebits));
+        xtype lo = xtype(internal);
+
+        lo |= 1;
+        hi ^= hi >> (xtypebits/2);
+	hi *= xtype(cheap_multiplier<itype>::multiplier());
+	hi ^= hi >> (3*(xtypebits/4));
+	hi *= lo;
+	return hi;
+    }
+};
+
+
+/*
+ * XSL RR -- fixed xorshift (to low bits), random rotate
+ *
+ * Useful for 128-bit types that are split across two CPU registers.
+ */
+
+template <typename xtype, typename itype>
+struct xsl_rr_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - xtypebits;
+        constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7
+                                       : xtypebits >=  64 ? 6
+                                       : xtypebits >=  32 ? 5
+                                       : xtypebits >=  16 ? 4
+                                       :                    3;
+        constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits
+                                                             : sparebits;
+        constexpr bitcount_t amplifier = wantedopbits - opbits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t topspare = sparebits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift = (topspare + xtypebits) / 2;
+
+        bitcount_t rot =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        bitcount_t amprot = (rot << amplifier) & mask;
+        internal ^= internal >> xshift;
+        xtype result = xtype(internal >> bottomspare);
+        result = rotr(result, amprot);
+        return result;
+    }
+};
+
+
+/*
+ * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts)
+ *
+ * Useful for 128-bit types that are split across two CPU registers.
+ * If you really want an invertable 128-bit RNG, I guess this is the one.
+ */
+
+template <typename T> struct halfsize_trait {};
+template <> struct halfsize_trait<pcg128_t>  { typedef uint64_t type; };
+template <> struct halfsize_trait<uint64_t>  { typedef uint32_t type; };
+template <> struct halfsize_trait<uint32_t>  { typedef uint16_t type; };
+template <> struct halfsize_trait<uint16_t>  { typedef uint8_t type;  };
+
+template <typename xtype, typename itype>
+struct xsl_rr_rr_mixin {
+    typedef typename halfsize_trait<itype>::type htype;
+
+    static itype output(itype internal)
+    {
+        constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8);
+        constexpr bitcount_t bits      = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - htypebits;
+        constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7
+                                       : htypebits >=  64 ? 6
+                                       : htypebits >=  32 ? 5
+                                       : htypebits >=  16 ? 4
+                                       :                    3;
+        constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits
+                                                                : sparebits;
+        constexpr bitcount_t amplifier = wantedopbits - opbits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t topspare = sparebits;
+        constexpr bitcount_t xshift = (topspare + htypebits) / 2;
+
+        bitcount_t rot =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        bitcount_t amprot = (rot << amplifier) & mask;
+        internal ^= internal >> xshift;
+        htype lowbits = htype(internal);
+        lowbits = rotr(lowbits, amprot);
+        htype highbits = htype(internal >> topspare);
+        bitcount_t rot2 = lowbits & mask;
+        bitcount_t amprot2 = (rot2 << amplifier) & mask;
+        highbits = rotr(highbits, amprot2);
+        return (itype(highbits) << topspare) ^ itype(lowbits);
+    }
+};
+
+
+/*
+ * XSH -- fixed xorshift (to high bits)
+ *
+ * You shouldn't use this at 64-bits or less.
+ */
+
+template <typename xtype, typename itype>
+struct xsh_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - xtypebits;
+        constexpr bitcount_t topspare = 0;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift = (topspare + xtypebits) / 2;
+
+        internal ^= internal >> xshift;
+        xtype result = internal >> bottomspare;
+        return result;
+    }
+};
+
+/*
+ * XSL -- fixed xorshift (to low bits)
+ *
+ * You shouldn't use this at 64-bits or less.
+ */
+
+template <typename xtype, typename itype>
+struct xsl_mixin {
+    inline xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - xtypebits;
+        constexpr bitcount_t topspare = sparebits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift = (topspare + xtypebits) / 2;
+
+        internal ^= internal >> xshift;
+        xtype result = internal >> bottomspare;
+        return result;
+    }
+};
+
+
+/* ---- End of Output Functions ---- */
+
+
+template <typename baseclass>
+struct inside_out : private baseclass {
+    inside_out() = delete;
+
+    typedef typename baseclass::result_type result_type;
+    typedef typename baseclass::state_type  state_type;
+    static_assert(sizeof(result_type) == sizeof(state_type),
+                  "Require a RNG whose output function is a permutation");
+
+    static bool external_step(result_type& randval, size_t i)
+    {
+        state_type state = baseclass::unoutput(randval);
+        state = state * baseclass::multiplier() + baseclass::increment()
+                + state_type(i*2);
+        result_type result = baseclass::output(state);
+        randval = result;
+        state_type zero =
+            baseclass::is_mcg ? state & state_type(3U) : state_type(0U);
+        return result == zero;
+    }
+
+    static bool external_advance(result_type& randval, size_t i,
+                                 result_type delta, bool forwards = true)
+    {
+        state_type state = baseclass::unoutput(randval);
+        state_type mult  = baseclass::multiplier();
+        state_type inc   = baseclass::increment() + state_type(i*2);
+        state_type zero =
+            baseclass::is_mcg ? state & state_type(3U) : state_type(0U);
+        state_type dist_to_zero = baseclass::distance(state, zero, mult, inc);
+        bool crosses_zero =
+            forwards ? dist_to_zero <= delta
+                     : (-dist_to_zero) <= delta;
+        if (!forwards)
+            delta = -delta;
+        state = baseclass::advance(state, delta, mult, inc);
+        randval = baseclass::output(state);
+        return crosses_zero;
+    }
+};
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, typename baseclass, typename extvalclass, bool kdd = true>
+class extended : public baseclass {
+public:
+    typedef typename baseclass::state_type  state_type;
+    typedef typename baseclass::result_type result_type;
+    typedef inside_out<extvalclass> insideout;
+
+private:
+    static constexpr bitcount_t rtypebits = sizeof(result_type)*8;
+    static constexpr bitcount_t stypebits = sizeof(state_type)*8;
+
+    static constexpr bitcount_t tick_limit_pow2 = 64U;
+
+    static constexpr size_t table_size  = 1UL << table_pow2;
+    static constexpr size_t table_shift = stypebits - table_pow2;
+    static constexpr state_type table_mask =
+        (state_type(1U) << table_pow2) - state_type(1U);
+
+    static constexpr bool   may_tick  =
+        (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2);
+    static constexpr size_t tick_shift = stypebits - advance_pow2;
+    static constexpr state_type tick_mask  =
+        may_tick ? state_type(
+                       (uint64_t(1) << (advance_pow2*may_tick)) - 1)
+                                        // ^-- stupidity to appease GCC warnings
+                 : ~state_type(0U);
+
+    static constexpr bool may_tock = stypebits < tick_limit_pow2;
+
+    result_type data_[table_size];
+
+    PCG_NOINLINE void advance_table();
+
+    PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true);
+
+    result_type& get_extended_value()
+    {
+        state_type state = this->state_;
+        if (kdd && baseclass::is_mcg) {
+            // The low order bits of an MCG are constant, so drop them.
+            state >>= 2;
+        }
+        size_t index       = kdd ? state &  table_mask
+                                 : state >> table_shift;
+
+        if (may_tick) {
+            bool tick = kdd ? (state & tick_mask) == state_type(0u)
+                            : (state >> tick_shift) == state_type(0u);
+            if (tick)
+                    advance_table();
+        }
+        if (may_tock) {
+            bool tock = state == state_type(0u);
+            if (tock)
+                advance_table();
+        }
+        return data_[index];
+    }
+
+public:
+    static constexpr size_t period_pow2()
+    {
+        return baseclass::period_pow2() + table_size*extvalclass::period_pow2();
+    }
+
+    PCG_ALWAYS_INLINE result_type operator()()
+    {
+        result_type rhs = get_extended_value();
+        result_type lhs = this->baseclass::operator()();
+        return lhs ^ rhs;
+    }
+
+    result_type operator()(result_type upper_bound)
+    {
+        return bounded_rand(*this, upper_bound);
+    }
+
+    void set(result_type wanted)
+    {
+        result_type& rhs = get_extended_value();
+        result_type lhs = this->baseclass::operator()();
+        rhs = lhs ^ wanted;
+    }
+
+    void advance(state_type distance, bool forwards = true);
+
+    void backstep(state_type distance)
+    {
+        advance(distance, false);
+    }
+
+    extended(const result_type* data)
+        : baseclass()
+    {
+        datainit(data);
+    }
+
+    extended(const result_type* data, state_type seed)
+        : baseclass(seed)
+    {
+        datainit(data);
+    }
+
+    // This function may or may not exist.  It thus has to be a template
+    // to use SFINAE; users don't have to worry about its template-ness.
+
+    template <typename bc = baseclass>
+    extended(const result_type* data, state_type seed,
+            typename bc::stream_state stream_seed)
+        : baseclass(seed, stream_seed)
+    {
+        datainit(data);
+    }
+
+    extended()
+        : baseclass()
+    {
+        selfinit();
+    }
+
+    extended(state_type seed)
+        : baseclass(seed)
+    {
+        selfinit();
+    }
+
+    // This function may or may not exist.  It thus has to be a template
+    // to use SFINAE; users don't have to worry about its template-ness.
+
+    template <typename bc = baseclass>
+    extended(state_type seed, typename bc::stream_state stream_seed)
+        : baseclass(seed, stream_seed)
+    {
+        selfinit();
+    }
+
+private:
+    void selfinit();
+    void datainit(const result_type* data);
+
+public:
+
+    template<typename SeedSeq, typename = typename std::enable_if<
+           !std::is_convertible<SeedSeq, result_type>::value
+        && !std::is_convertible<SeedSeq, extended>::value>::type>
+    extended(SeedSeq&& seedSeq)
+        : baseclass(seedSeq)
+    {
+        generate_to<table_size>(seedSeq, data_);
+    }
+
+    template<typename... Args>
+    void seed(Args&&... args)
+    {
+        new (this) extended(std::forward<Args>(args)...);
+    }
+
+    template <bitcount_t table_pow2_, bitcount_t advance_pow2_,
+              typename baseclass_, typename extvalclass_, bool kdd_>
+    friend bool operator==(const extended<table_pow2_, advance_pow2_,
+                                              baseclass_, extvalclass_, kdd_>&,
+                           const extended<table_pow2_, advance_pow2_,
+                                              baseclass_, extvalclass_, kdd_>&);
+
+    template <typename CharT, typename Traits,
+              bitcount_t table_pow2_, bitcount_t advance_pow2_,
+              typename baseclass_, typename extvalclass_, bool kdd_>
+    friend std::basic_ostream<CharT,Traits>&
+    operator<<(std::basic_ostream<CharT,Traits>& out,
+               const extended<table_pow2_, advance_pow2_,
+                              baseclass_, extvalclass_, kdd_>&);
+
+    template <typename CharT, typename Traits,
+              bitcount_t table_pow2_, bitcount_t advance_pow2_,
+              typename baseclass_, typename extvalclass_, bool kdd_>
+    friend std::basic_istream<CharT,Traits>&
+    operator>>(std::basic_istream<CharT,Traits>& in,
+               extended<table_pow2_, advance_pow2_,
+                        baseclass_, extvalclass_, kdd_>&);
+
+};
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::datainit(
+         const result_type* data)
+{
+    for (size_t i = 0; i < table_size; ++i)
+        data_[i] = data[i];
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::selfinit()
+{
+    // We need to fill the extended table with something, and we have
+    // very little provided data, so we use the base generator to
+    // produce values.  Although not ideal (use a seed sequence, folks!),
+    // unexpected correlations are mitigated by
+    //      - using XOR differences rather than the number directly
+    //      - the way the table is accessed, its values *won't* be accessed
+    //        in the same order the were written.
+    //      - any strange correlations would only be apparent if we
+    //        were to backstep the generator so that the base generator
+    //        was generating the same values again
+    result_type lhs = baseclass::operator()();
+    result_type rhs = baseclass::operator()();
+    result_type xdiff = lhs - rhs;
+    for (size_t i = 0; i < table_size; ++i) {
+        data_[i] = baseclass::operator()() ^ xdiff;
+    }
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+bool operator==(const extended<table_pow2, advance_pow2,
+                               baseclass, extvalclass, kdd>& lhs,
+                const extended<table_pow2, advance_pow2,
+                               baseclass, extvalclass, kdd>& rhs)
+{
+    auto& base_lhs = static_cast<const baseclass&>(lhs);
+    auto& base_rhs = static_cast<const baseclass&>(rhs);
+    return base_lhs == base_rhs
+        && std::equal(
+               std::begin(lhs.data_), std::end(lhs.data_),
+               std::begin(rhs.data_)
+           );
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+inline bool operator!=(const extended<table_pow2, advance_pow2,
+                                      baseclass, extvalclass, kdd>& lhs,
+                       const extended<table_pow2, advance_pow2,
+                                      baseclass, extvalclass, kdd>& rhs)
+{
+    return !operator==(lhs, rhs);
+}
+
+template <typename CharT, typename Traits,
+          bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>& out,
+           const extended<table_pow2, advance_pow2,
+                          baseclass, extvalclass, kdd>& rng)
+{
+    using pcg_extras::operator<<;
+
+    auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left);
+    auto space = out.widen(' ');
+    auto orig_fill = out.fill();
+
+    out << rng.multiplier() << space
+        << rng.increment() << space
+        << rng.state_;
+
+    for (const auto& datum : rng.data_)
+        out << space << datum;
+
+    out.flags(orig_flags);
+    out.fill(orig_fill);
+    return out;
+}
+
+template <typename CharT, typename Traits,
+          bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in,
+           extended<table_pow2, advance_pow2,
+                    baseclass, extvalclass, kdd>& rng)
+{
+    extended<table_pow2, advance_pow2, baseclass, extvalclass> new_rng;
+    auto& base_rng = static_cast<baseclass&>(new_rng);
+    in >> base_rng;
+
+    if (in.fail())
+        return in;
+
+    using pcg_extras::operator>>;
+
+    auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws);
+
+    for (auto& datum : new_rng.data_) {
+        in >> datum;
+        if (in.fail())
+            goto bail;
+    }
+
+    rng = new_rng;
+
+bail:
+    in.flags(orig_flags);
+    return in;
+}
+
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void
+extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance_table()
+{
+    bool carry = false;
+    for (size_t i = 0; i < table_size; ++i) {
+        if (carry) {
+            carry = insideout::external_step(data_[i],i+1);
+        }
+        bool carry2 = insideout::external_step(data_[i],i+1);
+        carry = carry || carry2;
+    }
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void
+extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance_table(
+        state_type delta, bool isForwards)
+{
+    typedef typename baseclass::state_type   base_state_t;
+    typedef typename extvalclass::state_type ext_state_t;
+    constexpr bitcount_t basebits = sizeof(base_state_t)*8;
+    constexpr bitcount_t extbits  = sizeof(ext_state_t)*8;
+    static_assert(basebits <= extbits || advance_pow2 > 0,
+                  "Current implementation might overflow its carry");
+
+    base_state_t carry = 0;
+    for (size_t i = 0; i < table_size; ++i) {
+        base_state_t total_delta = carry + delta;
+        ext_state_t  trunc_delta = ext_state_t(total_delta);
+        if (basebits > extbits) {
+            carry = total_delta >> extbits;
+        } else {
+            carry = 0;
+        }
+        carry +=
+            insideout::external_advance(data_[i],i+1, trunc_delta, isForwards);
+    }
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance(
+    state_type distance, bool forwards)
+{
+    static_assert(kdd,
+        "Efficient advance is too hard for non-kdd extension. "
+        "For a weak advance, cast to base class");
+    state_type zero =
+        baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U);
+    if (may_tick) {
+        state_type ticks = distance >> (advance_pow2*may_tick);
+                                        // ^-- stupidity to appease GCC
+                                        // warnings
+        state_type adv_mask =
+            baseclass::is_mcg ? tick_mask << 2 : tick_mask;
+        state_type next_advance_distance = this->distance(zero, adv_mask);
+        if (!forwards)
+            next_advance_distance = (-next_advance_distance) & tick_mask;
+        if (next_advance_distance < (distance & tick_mask)) {
+            ++ticks;
+        }
+        if (ticks)
+            advance_table(ticks, forwards);
+    }
+    if (forwards) {
+        if (may_tock && this->distance(zero) <= distance)
+            advance_table();
+        baseclass::advance(distance);
+    } else {
+        if (may_tock && -(this->distance(zero)) <= distance)
+            advance_table(state_type(1U), false);
+        baseclass::advance(-distance);
+    }
+}
+
+} // namespace pcg_detail
+
+namespace pcg_engines {
+
+using namespace pcg_detail;
+
+/* Predefined types for XSH RS */
+
+typedef oneseq_base<uint8_t,  uint16_t, xsh_rs_mixin>  oneseq_xsh_rs_16_8;
+typedef oneseq_base<uint16_t, uint32_t, xsh_rs_mixin>  oneseq_xsh_rs_32_16;
+typedef oneseq_base<uint32_t, uint64_t, xsh_rs_mixin>  oneseq_xsh_rs_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rs_mixin>  oneseq_xsh_rs_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                       cm_oneseq_xsh_rs_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, xsh_rs_mixin>  unique_xsh_rs_16_8;
+typedef unique_base<uint16_t, uint32_t, xsh_rs_mixin>  unique_xsh_rs_32_16;
+typedef unique_base<uint32_t, uint64_t, xsh_rs_mixin>  unique_xsh_rs_64_32;
+typedef unique_base<uint64_t, pcg128_t, xsh_rs_mixin>  unique_xsh_rs_128_64;
+typedef unique_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                       cm_unique_xsh_rs_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, xsh_rs_mixin>  setseq_xsh_rs_16_8;
+typedef setseq_base<uint16_t, uint32_t, xsh_rs_mixin>  setseq_xsh_rs_32_16;
+typedef setseq_base<uint32_t, uint64_t, xsh_rs_mixin>  setseq_xsh_rs_64_32;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rs_mixin>  setseq_xsh_rs_128_64;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                       cm_setseq_xsh_rs_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, xsh_rs_mixin>  mcg_xsh_rs_16_8;
+typedef mcg_base<uint16_t, uint32_t, xsh_rs_mixin>  mcg_xsh_rs_32_16;
+typedef mcg_base<uint32_t, uint64_t, xsh_rs_mixin>  mcg_xsh_rs_64_32;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rs_mixin>  mcg_xsh_rs_128_64;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                    cm_mcg_xsh_rs_128_64;
+
+/* Predefined types for XSH RR */
+
+typedef oneseq_base<uint8_t,  uint16_t, xsh_rr_mixin>  oneseq_xsh_rr_16_8;
+typedef oneseq_base<uint16_t, uint32_t, xsh_rr_mixin>  oneseq_xsh_rr_32_16;
+typedef oneseq_base<uint32_t, uint64_t, xsh_rr_mixin>  oneseq_xsh_rr_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rr_mixin>  oneseq_xsh_rr_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                       cm_oneseq_xsh_rr_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, xsh_rr_mixin>  unique_xsh_rr_16_8;
+typedef unique_base<uint16_t, uint32_t, xsh_rr_mixin>  unique_xsh_rr_32_16;
+typedef unique_base<uint32_t, uint64_t, xsh_rr_mixin>  unique_xsh_rr_64_32;
+typedef unique_base<uint64_t, pcg128_t, xsh_rr_mixin>  unique_xsh_rr_128_64;
+typedef unique_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                       cm_unique_xsh_rr_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, xsh_rr_mixin>  setseq_xsh_rr_16_8;
+typedef setseq_base<uint16_t, uint32_t, xsh_rr_mixin>  setseq_xsh_rr_32_16;
+typedef setseq_base<uint32_t, uint64_t, xsh_rr_mixin>  setseq_xsh_rr_64_32;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rr_mixin>  setseq_xsh_rr_128_64;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                       cm_setseq_xsh_rr_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, xsh_rr_mixin>  mcg_xsh_rr_16_8;
+typedef mcg_base<uint16_t, uint32_t, xsh_rr_mixin>  mcg_xsh_rr_32_16;
+typedef mcg_base<uint32_t, uint64_t, xsh_rr_mixin>  mcg_xsh_rr_64_32;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rr_mixin>  mcg_xsh_rr_128_64;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                    cm_mcg_xsh_rr_128_64;
+
+
+/* Predefined types for RXS M XS */
+
+typedef oneseq_base<uint8_t,  uint8_t, rxs_m_xs_mixin>   oneseq_rxs_m_xs_8_8;
+typedef oneseq_base<uint16_t, uint16_t, rxs_m_xs_mixin>  oneseq_rxs_m_xs_16_16;
+typedef oneseq_base<uint32_t, uint32_t, rxs_m_xs_mixin>  oneseq_rxs_m_xs_32_32;
+typedef oneseq_base<uint64_t, uint64_t, rxs_m_xs_mixin>  oneseq_rxs_m_xs_64_64;
+typedef oneseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin>
+                                                        oneseq_rxs_m_xs_128_128;
+typedef oneseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin, true, cheap_multiplier>
+                                                     cm_oneseq_rxs_m_xs_128_128;
+
+typedef unique_base<uint8_t,  uint8_t, rxs_m_xs_mixin>  unique_rxs_m_xs_8_8;
+typedef unique_base<uint16_t, uint16_t, rxs_m_xs_mixin> unique_rxs_m_xs_16_16;
+typedef unique_base<uint32_t, uint32_t, rxs_m_xs_mixin> unique_rxs_m_xs_32_32;
+typedef unique_base<uint64_t, uint64_t, rxs_m_xs_mixin> unique_rxs_m_xs_64_64;
+typedef unique_base<pcg128_t, pcg128_t, rxs_m_xs_mixin> unique_rxs_m_xs_128_128;
+typedef unique_base<pcg128_t, pcg128_t, rxs_m_xs_mixin, true, cheap_multiplier>
+                                                     cm_unique_rxs_m_xs_128_128;
+
+typedef setseq_base<uint8_t,  uint8_t, rxs_m_xs_mixin>  setseq_rxs_m_xs_8_8;
+typedef setseq_base<uint16_t, uint16_t, rxs_m_xs_mixin> setseq_rxs_m_xs_16_16;
+typedef setseq_base<uint32_t, uint32_t, rxs_m_xs_mixin> setseq_rxs_m_xs_32_32;
+typedef setseq_base<uint64_t, uint64_t, rxs_m_xs_mixin> setseq_rxs_m_xs_64_64;
+typedef setseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin> setseq_rxs_m_xs_128_128;
+typedef setseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin, true, cheap_multiplier>
+                                                     cm_setseq_rxs_m_xs_128_128;
+
+                // MCG versions don't make sense here, so aren't defined.
+
+/* Predefined types for RXS M */
+
+typedef oneseq_base<uint8_t,  uint16_t, rxs_m_mixin>  oneseq_rxs_m_16_8;
+typedef oneseq_base<uint16_t, uint32_t, rxs_m_mixin>  oneseq_rxs_m_32_16;
+typedef oneseq_base<uint32_t, uint64_t, rxs_m_mixin>  oneseq_rxs_m_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, rxs_m_mixin>  oneseq_rxs_m_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                      cm_oneseq_rxs_m_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, rxs_m_mixin>  unique_rxs_m_16_8;
+typedef unique_base<uint16_t, uint32_t, rxs_m_mixin>  unique_rxs_m_32_16;
+typedef unique_base<uint32_t, uint64_t, rxs_m_mixin>  unique_rxs_m_64_32;
+typedef unique_base<uint64_t, pcg128_t, rxs_m_mixin>  unique_rxs_m_128_64;
+typedef unique_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                      cm_unique_rxs_m_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, rxs_m_mixin>  setseq_rxs_m_16_8;
+typedef setseq_base<uint16_t, uint32_t, rxs_m_mixin>  setseq_rxs_m_32_16;
+typedef setseq_base<uint32_t, uint64_t, rxs_m_mixin>  setseq_rxs_m_64_32;
+typedef setseq_base<uint64_t, pcg128_t, rxs_m_mixin>  setseq_rxs_m_128_64;
+typedef setseq_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                      cm_setseq_rxs_m_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, rxs_m_mixin>  mcg_rxs_m_16_8;
+typedef mcg_base<uint16_t, uint32_t, rxs_m_mixin>  mcg_rxs_m_32_16;
+typedef mcg_base<uint32_t, uint64_t, rxs_m_mixin>  mcg_rxs_m_64_32;
+typedef mcg_base<uint64_t, pcg128_t, rxs_m_mixin>  mcg_rxs_m_128_64;
+typedef mcg_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                   cm_mcg_rxs_m_128_64;
+
+/* Predefined types for DXSM */
+
+typedef oneseq_base<uint8_t,  uint16_t, dxsm_mixin>  oneseq_dxsm_16_8;
+typedef oneseq_base<uint16_t, uint32_t, dxsm_mixin>  oneseq_dxsm_32_16;
+typedef oneseq_base<uint32_t, uint64_t, dxsm_mixin>  oneseq_dxsm_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, dxsm_mixin>  oneseq_dxsm_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                     cm_oneseq_dxsm_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, dxsm_mixin>  unique_dxsm_16_8;
+typedef unique_base<uint16_t, uint32_t, dxsm_mixin>  unique_dxsm_32_16;
+typedef unique_base<uint32_t, uint64_t, dxsm_mixin>  unique_dxsm_64_32;
+typedef unique_base<uint64_t, pcg128_t, dxsm_mixin>  unique_dxsm_128_64;
+typedef unique_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                     cm_unique_dxsm_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, dxsm_mixin>  setseq_dxsm_16_8;
+typedef setseq_base<uint16_t, uint32_t, dxsm_mixin>  setseq_dxsm_32_16;
+typedef setseq_base<uint32_t, uint64_t, dxsm_mixin>  setseq_dxsm_64_32;
+typedef setseq_base<uint64_t, pcg128_t, dxsm_mixin>  setseq_dxsm_128_64;
+typedef setseq_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                     cm_setseq_dxsm_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, dxsm_mixin>  mcg_dxsm_16_8;
+typedef mcg_base<uint16_t, uint32_t, dxsm_mixin>  mcg_dxsm_32_16;
+typedef mcg_base<uint32_t, uint64_t, dxsm_mixin>  mcg_dxsm_64_32;
+typedef mcg_base<uint64_t, pcg128_t, dxsm_mixin>  mcg_dxsm_128_64;
+typedef mcg_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                  cm_mcg_dxsm_128_64;
+
+/* Predefined types for XSL RR (only defined for "large" types) */
+
+typedef oneseq_base<uint32_t, uint64_t, xsl_rr_mixin>  oneseq_xsl_rr_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, xsl_rr_mixin>  oneseq_xsl_rr_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                       cm_oneseq_xsl_rr_128_64;
+
+typedef unique_base<uint32_t, uint64_t, xsl_rr_mixin>  unique_xsl_rr_64_32;
+typedef unique_base<uint64_t, pcg128_t, xsl_rr_mixin>  unique_xsl_rr_128_64;
+typedef unique_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                       cm_unique_xsl_rr_128_64;
+
+typedef setseq_base<uint32_t, uint64_t, xsl_rr_mixin>  setseq_xsl_rr_64_32;
+typedef setseq_base<uint64_t, pcg128_t, xsl_rr_mixin>  setseq_xsl_rr_128_64;
+typedef setseq_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                       cm_setseq_xsl_rr_128_64;
+
+typedef mcg_base<uint32_t, uint64_t, xsl_rr_mixin>  mcg_xsl_rr_64_32;
+typedef mcg_base<uint64_t, pcg128_t, xsl_rr_mixin>  mcg_xsl_rr_128_64;
+typedef mcg_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                    cm_mcg_xsl_rr_128_64;
+
+
+/* Predefined types for XSL RR RR (only defined for "large" types) */
+
+typedef oneseq_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
+    oneseq_xsl_rr_rr_64_64;
+typedef oneseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
+    oneseq_xsl_rr_rr_128_128;
+typedef oneseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin, true, cheap_multiplier>
+    cm_oneseq_xsl_rr_rr_128_128;
+
+typedef unique_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
+    unique_xsl_rr_rr_64_64;
+typedef unique_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
+    unique_xsl_rr_rr_128_128;
+typedef unique_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin, true, cheap_multiplier>
+    cm_unique_xsl_rr_rr_128_128;
+
+typedef setseq_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
+    setseq_xsl_rr_rr_64_64;
+typedef setseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
+    setseq_xsl_rr_rr_128_128;
+typedef setseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin, true, cheap_multiplier>
+    cm_setseq_xsl_rr_rr_128_128;
+
+                // MCG versions don't make sense here, so aren't defined.
+
+/* Extended generators */
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std8 = extended<table_pow2, advance_pow2, BaseRNG,
+                          oneseq_rxs_m_xs_8_8, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std16 = extended<table_pow2, advance_pow2, BaseRNG,
+                           oneseq_rxs_m_xs_16_16, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std32 = extended<table_pow2, advance_pow2, BaseRNG,
+                           oneseq_rxs_m_xs_32_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std64 = extended<table_pow2, advance_pow2, BaseRNG,
+                           oneseq_rxs_m_xs_64_64, kdd>;
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_oneseq_rxs_m_xs_32_32 =
+          ext_std32<table_pow2, advance_pow2, oneseq_rxs_m_xs_32_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_mcg_xsh_rs_64_32 =
+          ext_std32<table_pow2, advance_pow2, mcg_xsh_rs_64_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_oneseq_xsh_rs_64_32 =
+          ext_std32<table_pow2, advance_pow2, oneseq_xsh_rs_64_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_setseq_xsh_rr_64_32 =
+          ext_std32<table_pow2, advance_pow2, setseq_xsh_rr_64_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_mcg_xsl_rr_128_64 =
+          ext_std64<table_pow2, advance_pow2, mcg_xsl_rr_128_64, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_oneseq_xsl_rr_128_64 =
+          ext_std64<table_pow2, advance_pow2, oneseq_xsl_rr_128_64, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_setseq_xsl_rr_128_64 =
+          ext_std64<table_pow2, advance_pow2, setseq_xsl_rr_128_64, kdd>;
+
+} // namespace pcg_engines
+
+typedef pcg_engines::setseq_xsh_rr_64_32        pcg32;
+typedef pcg_engines::oneseq_xsh_rr_64_32        pcg32_oneseq;
+typedef pcg_engines::unique_xsh_rr_64_32        pcg32_unique;
+typedef pcg_engines::mcg_xsh_rs_64_32           pcg32_fast;
+
+typedef pcg_engines::setseq_xsl_rr_128_64       pcg64;
+typedef pcg_engines::oneseq_xsl_rr_128_64       pcg64_oneseq;
+typedef pcg_engines::unique_xsl_rr_128_64       pcg64_unique;
+typedef pcg_engines::mcg_xsl_rr_128_64          pcg64_fast;
+
+typedef pcg_engines::setseq_rxs_m_xs_8_8        pcg8_once_insecure;
+typedef pcg_engines::setseq_rxs_m_xs_16_16      pcg16_once_insecure;
+typedef pcg_engines::setseq_rxs_m_xs_32_32      pcg32_once_insecure;
+typedef pcg_engines::setseq_rxs_m_xs_64_64      pcg64_once_insecure;
+typedef pcg_engines::setseq_xsl_rr_rr_128_128   pcg128_once_insecure;
+
+typedef pcg_engines::oneseq_rxs_m_xs_8_8        pcg8_oneseq_once_insecure;
+typedef pcg_engines::oneseq_rxs_m_xs_16_16      pcg16_oneseq_once_insecure;
+typedef pcg_engines::oneseq_rxs_m_xs_32_32      pcg32_oneseq_once_insecure;
+typedef pcg_engines::oneseq_rxs_m_xs_64_64      pcg64_oneseq_once_insecure;
+typedef pcg_engines::oneseq_xsl_rr_rr_128_128   pcg128_oneseq_once_insecure;
+
+
+// These two extended RNGs provide two-dimensionally equidistributed
+// 32-bit generators.  pcg32_k2_fast occupies the same space as pcg64,
+// and can be called twice to generate 64 bits, but does not required
+// 128-bit math; on 32-bit systems, it's faster than pcg64 as well.
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<1,16,true>     pcg32_k2;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<1,32,true>     pcg32_k2_fast;
+
+// These eight extended RNGs have about as much state as arc4random
+//
+//  - the k variants are k-dimensionally equidistributed
+//  - the c variants offer better crypographic security
+//
+// (just how good the cryptographic security is is an open question)
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true>     pcg32_k64;
+typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,true>        pcg32_k64_oneseq;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true>     pcg32_k64_fast;
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,false>    pcg32_c64;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,false>    pcg32_c64_oneseq;
+typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,false>       pcg32_c64_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,true>    pcg64_k32;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,true>   pcg64_k32_oneseq;
+typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,true>      pcg64_k32_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,false>   pcg64_c32;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,false>  pcg64_c32_oneseq;
+typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,false>     pcg64_c32_fast;
+
+// These eight extended RNGs have more state than the Mersenne twister
+//
+//  - the k variants are k-dimensionally equidistributed
+//  - the c variants offer better crypographic security
+//
+// (just how good the cryptographic security is is an open question)
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,true>    pcg32_k1024;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,true>    pcg32_k1024_fast;
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,false>   pcg32_c1024;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,false>   pcg32_c1024_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,true>   pcg64_k1024;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,true>  pcg64_k1024_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,false>  pcg64_c1024;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,false> pcg64_c1024_fast;
+
+// These generators have an insanely huge period (2^524352), and is suitable
+// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary
+// point in the future.   [Actually, over the full period of the generator, it
+// will produce every 64 KB ZIP file 2^64 times!]
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<14,16,true>    pcg32_k16384;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true>    pcg32_k16384_fast;
+
+} // namespace arrow_vendored
+
+#ifdef _MSC_VER
+    #pragma warning(default:4146)
+#endif
+
+#endif // PCG_RAND_HPP_INCLUDED
diff --git a/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp b/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp
new file mode 100644
index 00000000000..0181e69e4ef
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp
@@ -0,0 +1,1008 @@
+/*
+ * PCG Random Number Generation for C++
+ *
+ * Copyright 2014-2021 Melissa O'Neill <oneill@pcg-random.org>,
+ *                     and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied.  See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/*
+ * This code provides a a C++ class that can provide 128-bit (or higher)
+ * integers.  To produce 2K-bit integers, it uses two K-bit integers,
+ * placed in a union that allowes the code to also see them as four K/2 bit
+ * integers (and access them either directly name, or by index).
+ *
+ * It may seem like we're reinventing the wheel here, because several
+ * libraries already exist that support large integers, but most existing
+ * libraries provide a very generic multiprecision code, but here we're
+ * operating at a fixed size.  Also, most other libraries are fairly
+ * heavyweight.  So we use a direct implementation.  Sadly, it's much slower
+ * than hand-coded assembly or direct CPU support.
+ */
+
+#ifndef PCG_UINT128_HPP_INCLUDED
+#define PCG_UINT128_HPP_INCLUDED 1
+
+#include <cstdint>
+#include <cstdio>
+#include <cassert>
+#include <climits>
+#include <utility>
+#include <initializer_list>
+#include <type_traits>
+
+#if defined(_MSC_VER)  // Use MSVC++ intrinsics
+#include <intrin.h>
+#endif
+
+/*
+ * We want to lay the type out the same way that a native type would be laid
+ * out, which means we must know the machine's endian, at compile time.
+ * This ugliness attempts to do so.
+ */
+
+#ifndef PCG_LITTLE_ENDIAN
+    #if defined(__BYTE_ORDER__)
+        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+            #define PCG_LITTLE_ENDIAN 1
+        #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+            #define PCG_LITTLE_ENDIAN 0
+        #else
+            #error __BYTE_ORDER__ does not match a standard endian, pick a side
+        #endif
+    #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN
+        #define PCG_LITTLE_ENDIAN 1
+    #elif __BIG_ENDIAN__ || _BIG_ENDIAN
+        #define PCG_LITTLE_ENDIAN 0
+    #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86
+        #define PCG_LITTLE_ENDIAN 1
+    #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
+          || __m68k__ || __mc68000__
+        #define PCG_LITTLE_ENDIAN 0
+    #else
+        #error Unable to determine target endianness
+    #endif
+#endif
+
+#if INTPTR_MAX == INT64_MAX && !defined(PCG_64BIT_SPECIALIZATIONS)
+    #define PCG_64BIT_SPECIALIZATIONS 1
+#endif
+
+namespace arrow_vendored {
+namespace pcg_extras {
+
+// Recent versions of GCC have intrinsics we can use to quickly calculate
+// the number of leading and trailing zeros in a number.  If possible, we
+// use them, otherwise we fall back to old-fashioned bit twiddling to figure
+// them out.
+
+#ifndef PCG_BITCOUNT_T
+    typedef uint8_t bitcount_t;
+#else
+    typedef PCG_BITCOUNT_T bitcount_t;
+#endif
+
+/*
+ * Provide some useful helper functions
+ *      * flog2                 floor(log2(x))
+ *      * trailingzeros         number of trailing zero bits
+ */
+
+#if defined(__GNUC__)   // Any GNU-compatible compiler supporting C++11 has
+                        // some useful intrinsics we can use.
+
+inline bitcount_t flog2(uint32_t v)
+{
+    return 31 - __builtin_clz(v);
+}
+
+inline bitcount_t trailingzeros(uint32_t v)
+{
+    return __builtin_ctz(v);
+}
+
+inline bitcount_t flog2(uint64_t v)
+{
+#if UINT64_MAX == ULONG_MAX
+    return 63 - __builtin_clzl(v);
+#elif UINT64_MAX == ULLONG_MAX
+    return 63 - __builtin_clzll(v);
+#else
+    #error Cannot find a function for uint64_t
+#endif
+}
+
+inline bitcount_t trailingzeros(uint64_t v)
+{
+#if UINT64_MAX == ULONG_MAX
+    return __builtin_ctzl(v);
+#elif UINT64_MAX == ULLONG_MAX
+    return __builtin_ctzll(v);
+#else
+    #error Cannot find a function for uint64_t
+#endif
+}
+
+#elif defined(_MSC_VER)  // Use MSVC++ intrinsics
+
+#pragma intrinsic(_BitScanReverse, _BitScanForward)
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#pragma intrinsic(_BitScanReverse64, _BitScanForward64)
+#endif
+
+inline bitcount_t flog2(uint32_t v)
+{
+    unsigned long i;
+    _BitScanReverse(&i, v);
+    return bitcount_t(i);
+}
+
+inline bitcount_t trailingzeros(uint32_t v)
+{
+    unsigned long i;
+    _BitScanForward(&i, v);
+    return bitcount_t(i);
+}
+
+inline bitcount_t flog2(uint64_t v)
+{
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+    unsigned long i;
+    _BitScanReverse64(&i, v);
+    return bitcount_t(i);
+#else
+    // 32-bit x86
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+    return high ? 32+flog2(high) : flog2(low);
+#endif
+}
+
+inline bitcount_t trailingzeros(uint64_t v)
+{
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+    unsigned long i;
+    _BitScanForward64(&i, v);
+    return bitcount_t(i);
+#else
+    // 32-bit x86
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+    return low ? trailingzeros(low) : trailingzeros(high)+32;
+#endif
+}
+
+#else                   // Otherwise, we fall back to bit twiddling
+                        // implementations
+
+inline bitcount_t flog2(uint32_t v)
+{
+    // Based on code by Eric Cole and Mark Dickinson, which appears at
+    // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
+
+    static const uint8_t multiplyDeBruijnBitPos[32] = {
+      0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+      8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
+    };
+
+    v |= v >> 1; // first round down to one less than a power of 2
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+
+    return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27];
+}
+
+inline bitcount_t trailingzeros(uint32_t v)
+{
+    static const uint8_t multiplyDeBruijnBitPos[32] = {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+    return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
+}
+
+inline bitcount_t flog2(uint64_t v)
+{
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+
+    return high ? 32+flog2(high) : flog2(low);
+}
+
+inline bitcount_t trailingzeros(uint64_t v)
+{
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+
+    return low ? trailingzeros(low) : trailingzeros(high)+32;
+}
+
+#endif
+
+inline bitcount_t flog2(uint8_t v)
+{
+    return flog2(uint32_t(v));
+}
+
+inline bitcount_t flog2(uint16_t v)
+{
+    return flog2(uint32_t(v));
+}
+
+#if __SIZEOF_INT128__
+inline bitcount_t flog2(__uint128_t v)
+{
+    uint64_t high = uint64_t(v >> 64);
+    uint64_t low  = uint64_t(v);
+
+    return high ? 64+flog2(high) : flog2(low);
+}
+#endif
+
+inline bitcount_t trailingzeros(uint8_t v)
+{
+    return trailingzeros(uint32_t(v));
+}
+
+inline bitcount_t trailingzeros(uint16_t v)
+{
+    return trailingzeros(uint32_t(v));
+}
+
+#if __SIZEOF_INT128__
+inline bitcount_t trailingzeros(__uint128_t v)
+{
+    uint64_t high = uint64_t(v >> 64);
+    uint64_t low  = uint64_t(v);
+    return low ? trailingzeros(low) : trailingzeros(high)+64;
+}
+#endif
+
+template <typename UInt>
+inline bitcount_t clog2(UInt v)
+{
+    return flog2(v) + ((v & (-v)) != v);
+}
+
+template <typename UInt>
+inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
+{
+    UInt half_result = y + carryin;
+    UInt result = x + half_result;
+    *carryout = (half_result < y) || (result < x);
+    return result;
+}
+
+template <typename UInt>
+inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
+{
+    UInt half_result = y + carryin;
+    UInt result = x - half_result;
+    *carryout = (half_result < y) || (result > x);
+    return result;
+}
+
+
+template <typename UInt, typename UIntX2>
+class uint_x4 {
+// private:
+    static constexpr unsigned int UINT_BITS = sizeof(UInt) * CHAR_BIT;
+public:
+    union {
+#if PCG_LITTLE_ENDIAN
+        struct {
+            UInt v0, v1, v2, v3;
+        } w;
+        struct {
+            UIntX2 v01, v23;
+        } d;
+#else
+        struct {
+            UInt v3, v2, v1, v0;
+        } w;
+        struct {
+            UIntX2 v23, v01;
+        } d;
+#endif
+        // For the array access versions, the code that uses the array
+        // must handle endian itself.  Yuck.
+        UInt wa[4];
+    };
+
+public:
+    uint_x4() = default;
+
+    constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0)
+#if PCG_LITTLE_ENDIAN
+       : w{v0, v1, v2, v3}
+#else
+       : w{v3, v2, v1, v0}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    constexpr uint_x4(UIntX2 v23, UIntX2 v01)
+#if PCG_LITTLE_ENDIAN
+       : d{v01,v23}
+#else
+       : d{v23,v01}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    constexpr uint_x4(UIntX2 v01)
+#if PCG_LITTLE_ENDIAN
+       : d{v01, UIntX2(0)}
+#else
+       : d{UIntX2(0),v01}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    template<class Integral,
+             typename std::enable_if<(std::is_integral<Integral>::value
+                                      && sizeof(Integral) <= sizeof(UIntX2))
+                                    >::type* = nullptr>
+    constexpr uint_x4(Integral v01)
+#if PCG_LITTLE_ENDIAN
+       : d{UIntX2(v01), UIntX2(0)}
+#else
+       : d{UIntX2(0), UIntX2(v01)}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    explicit constexpr operator UIntX2() const
+    {
+        return d.v01;
+    }
+
+    template<class Integral,
+             typename std::enable_if<(std::is_integral<Integral>::value
+                                      && sizeof(Integral) <= sizeof(UIntX2))
+                                    >::type* = nullptr>
+    explicit constexpr operator Integral() const
+    {
+        return Integral(d.v01);
+    }
+
+    explicit constexpr operator bool() const
+    {
+        return d.v01 || d.v23;
+    }
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator*(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator*(const uint_x4<U,V>&, V);
+
+    template<typename U, typename V>
+    friend std::pair< uint_x4<U,V>,uint_x4<U,V> >
+        divmod(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator+(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator-(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator<<(const uint_x4<U,V>&, const bitcount_t shift);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator>>(const uint_x4<U,V>&, const bitcount_t shift);
+
+#if PCG_64BIT_SPECIALIZATIONS
+    template<typename U>
+    friend uint_x4<U,uint64_t> operator<<(const uint_x4<U,uint64_t>&, const bitcount_t shift);
+
+    template<typename U>
+    friend uint_x4<U,uint64_t> operator>>(const uint_x4<U,uint64_t>&, const bitcount_t shift);
+#endif
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator&(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator|(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator^(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator==(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator!=(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator<(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator<=(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator>(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator>=(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator~(const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator-(const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bitcount_t flog2(const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bitcount_t trailingzeros(const uint_x4<U,V>&);
+
+#if PCG_64BIT_SPECIALIZATIONS
+    template<typename U>
+    friend bitcount_t flog2(const uint_x4<U,uint64_t>&);
+
+    template<typename U>
+    friend bitcount_t trailingzeros(const uint_x4<U,uint64_t>&);
+#endif
+
+    uint_x4& operator*=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this * rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator*=(UIntX2 rhs)
+    {
+        uint_x4 result = *this * rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator/=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this / rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator%=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this % rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator+=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this + rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator-=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this - rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator&=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this & rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator|=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this | rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator^=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this ^ rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator>>=(bitcount_t shift)
+    {
+        uint_x4 result = *this >> shift;
+        return *this = result;
+    }
+
+    uint_x4& operator<<=(bitcount_t shift)
+    {
+        uint_x4 result = *this << shift;
+        return *this = result;
+    }
+
+};
+
+template<typename U, typename V>
+bitcount_t flog2(const uint_x4<U,V>& v)
+{
+#if PCG_LITTLE_ENDIAN
+    for (uint8_t i = 4; i !=0; /* dec in loop */) {
+        --i;
+#else
+    for (uint8_t i = 0; i < 4; ++i) {
+#endif
+        if (v.wa[i] == 0)
+             continue;
+        return flog2(v.wa[i]) + uint_x4<U,V>::UINT_BITS*i;
+    }
+    abort();
+}
+
+template<typename U, typename V>
+bitcount_t trailingzeros(const uint_x4<U,V>& v)
+{
+#if PCG_LITTLE_ENDIAN
+    for (uint8_t i = 0; i < 4; ++i) {
+#else
+    for (uint8_t i = 4; i !=0; /* dec in loop */) {
+        --i;
+#endif
+        if (v.wa[i] != 0)
+            return trailingzeros(v.wa[i]) + uint_x4<U,V>::UINT_BITS*i;
+    }
+    return uint_x4<U,V>::UINT_BITS*4;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+template<typename UInt32>
+bitcount_t flog2(const uint_x4<UInt32,uint64_t>& v)
+{
+    return v.d.v23 > 0 ? flog2(v.d.v23) + uint_x4<UInt32,uint64_t>::UINT_BITS*2
+                       : flog2(v.d.v01);
+}
+
+template<typename UInt32>
+bitcount_t trailingzeros(const uint_x4<UInt32,uint64_t>& v)
+{
+    return v.d.v01 == 0 ? trailingzeros(v.d.v23) + uint_x4<UInt32,uint64_t>::UINT_BITS*2
+                        : trailingzeros(v.d.v01);
+}
+#endif
+
+template <typename UInt, typename UIntX2>
+std::pair< uint_x4<UInt,UIntX2>, uint_x4<UInt,UIntX2> >
+    divmod(const uint_x4<UInt,UIntX2>& orig_dividend,
+           const uint_x4<UInt,UIntX2>& divisor)
+{
+    // If the dividend is less than the divisor, the answer is always zero.
+    // This takes care of boundary cases like 0/x (which would otherwise be
+    // problematic because we can't take the log of zero.  (The boundary case
+    // of division by zero is undefined.)
+    if (orig_dividend < divisor)
+        return { uint_x4<UInt,UIntX2>(UIntX2(0)), orig_dividend };
+
+    auto dividend = orig_dividend;
+
+    auto log2_divisor  = flog2(divisor);
+    auto log2_dividend = flog2(dividend);
+    // assert(log2_dividend >= log2_divisor);
+    bitcount_t logdiff = log2_dividend - log2_divisor;
+
+    constexpr uint_x4<UInt,UIntX2> ONE(UIntX2(1));
+    if (logdiff == 0)
+        return { ONE, dividend - divisor };
+
+    // Now we change the log difference to
+    //  floor(log2(divisor)) - ceil(log2(dividend))
+    // to ensure that we *underestimate* the result.
+    logdiff -= 1;
+
+    uint_x4<UInt,UIntX2> quotient(UIntX2(0));
+
+    auto qfactor = ONE << logdiff;
+    auto factor  = divisor << logdiff;
+
+    do {
+        dividend -= factor;
+        quotient += qfactor;
+        while (dividend < factor) {
+            factor  >>= 1;
+            qfactor >>= 1;
+        }
+    } while (dividend >= divisor);
+
+    return { quotient, dividend };
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator/(const uint_x4<UInt,UIntX2>& dividend,
+                               const uint_x4<UInt,UIntX2>& divisor)
+{
+    return divmod(dividend, divisor).first;
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator%(const uint_x4<UInt,UIntX2>& dividend,
+                               const uint_x4<UInt,UIntX2>& divisor)
+{
+    return divmod(dividend, divisor).second;
+}
+
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    constexpr auto UINT_BITS = uint_x4<UInt,UIntX2>::UINT_BITS;
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    bool carryin = false;
+    bool carryout;
+    UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0);
+    r.w.v0 = UInt(a0b0);
+    r.w.v1 = UInt(a0b0 >> UINT_BITS);
+
+    UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0);
+    r.w.v2 = UInt(a1b0 >> UINT_BITS);
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    carryin = false;
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout);
+
+    r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01;
+
+    return r;
+}
+
+ 
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
+                               UIntX2 b01)
+{
+    constexpr auto UINT_BITS = uint_x4<UInt,UIntX2>::UINT_BITS;
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    bool carryin = false;
+    bool carryout;
+    UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(UInt(b01));
+    r.w.v0 = UInt(a0b0);
+    r.w.v1 = UInt(a0b0 >> UINT_BITS);
+
+    UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(UInt(b01));
+    r.w.v2 = UInt(a1b0 >> UINT_BITS);
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b01 >> UINT_BITS);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    carryin = false;
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b01 >> UINT_BITS);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout);
+
+    r.d.v23 += a.d.v23 * b01;
+
+    return r;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+#if defined(_MSC_VER)
+#pragma intrinsic(_umul128)
+#endif
+
+#if defined(_MSC_VER) || __SIZEOF_INT128__
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator*(const uint_x4<UInt32,uint64_t>& a,
+				   const uint_x4<UInt32,uint64_t>& b)
+{
+#if defined(_MSC_VER)
+    uint64_t hi;
+    uint64_t lo = _umul128(a.d.v01, b.d.v01, &hi);
+#else
+    __uint128_t r = __uint128_t(a.d.v01) * __uint128_t(b.d.v01);
+    uint64_t lo = uint64_t(r);
+    uint64_t hi = r >> 64;
+#endif
+    hi += a.d.v23 * b.d.v01 + a.d.v01 * b.d.v23;
+    return {hi, lo};
+}
+#endif
+#endif
+
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator+(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+
+    bool carryin = false;
+    bool carryout;
+    r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
+    carryin = carryout;
+    r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
+
+    return r;
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+
+    bool carryin = false;
+    bool carryout;
+    r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
+    carryin = carryout;
+    r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
+
+    return r;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator+(const uint_x4<UInt32,uint64_t>& a,
+				   const uint_x4<UInt32,uint64_t>& b)
+{
+    uint_x4<UInt32,uint64_t> r = {uint64_t(0u), uint64_t(0u)};
+
+    bool carryin = false;
+    bool carryout;
+    r.d.v01 = addwithcarry(a.d.v01, b.d.v01, carryin, &carryout);
+    carryin = carryout;
+    r.d.v23 = addwithcarry(a.d.v23, b.d.v23, carryin, &carryout);
+
+    return r;
+}
+
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator-(const uint_x4<UInt32,uint64_t>& a,
+				   const uint_x4<UInt32,uint64_t>& b)
+{
+    uint_x4<UInt32,uint64_t> r = {uint64_t(0u), uint64_t(0u)};
+
+    bool carryin = false;
+    bool carryout;
+    r.d.v01 = subwithcarry(a.d.v01, b.d.v01, carryin, &carryout);
+    carryin = carryout;
+    r.d.v23 = subwithcarry(a.d.v23, b.d.v23, carryin, &carryout);
+
+    return r;
+}
+#endif
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator&(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    return uint_x4<UInt,UIntX2>(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator|(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    return uint_x4<UInt,UIntX2>(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator^(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    return uint_x4<UInt,UIntX2>(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator~(const uint_x4<UInt,UIntX2>& v)
+{
+    return uint_x4<UInt,UIntX2>(~v.d.v23, ~v.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& v)
+{
+    return uint_x4<UInt,UIntX2>(0UL,0UL) - v;
+}
+
+template <typename UInt, typename UIntX2>
+bool operator==(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23);
+}
+
+template <typename UInt, typename UIntX2>
+bool operator!=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return !operator==(a,b);
+}
+
+
+template <typename UInt, typename UIntX2>
+bool operator<(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return (a.d.v23 < b.d.v23)
+           || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01));
+}
+
+template <typename UInt, typename UIntX2>
+bool operator>(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return operator<(b,a);
+}
+
+template <typename UInt, typename UIntX2>
+bool operator<=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return !(operator<(b,a));
+}
+
+template <typename UInt, typename UIntX2>
+bool operator>=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return !(operator<(a,b));
+}
+
+
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator<<(const uint_x4<UInt,UIntX2>& v,
+                                const bitcount_t shift)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    const bitcount_t bits    = uint_x4<UInt,UIntX2>::UINT_BITS;
+    const bitcount_t bitmask = bits - 1;
+    const bitcount_t shiftdiv = shift / bits;
+    const bitcount_t shiftmod = shift & bitmask;
+
+    if (shiftmod) {
+        UInt carryover = 0;
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#else
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#endif
+            r.wa[out] = (v.wa[in] << shiftmod) | carryover;
+            carryover = (v.wa[in] >> (bits - shiftmod));
+        }
+    } else {
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#else
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#endif
+            r.wa[out] = v.wa[in];
+        }
+    }
+
+    return r;
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator>>(const uint_x4<UInt,UIntX2>& v,
+                                const bitcount_t shift)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    const bitcount_t bits    = uint_x4<UInt,UIntX2>::UINT_BITS;
+    const bitcount_t bitmask = bits - 1;
+    const bitcount_t shiftdiv = shift / bits;
+    const bitcount_t shiftmod = shift & bitmask;
+
+    if (shiftmod) {
+        UInt carryover = 0;
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#else
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#endif
+            r.wa[out] = (v.wa[in] >> shiftmod) | carryover;
+            carryover = (v.wa[in] << (bits - shiftmod));
+        }
+    } else {
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#else
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#endif
+            r.wa[out] = v.wa[in];
+        }
+    }
+
+    return r;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator<<(const uint_x4<UInt32,uint64_t>& v,
+				    const bitcount_t shift)
+{
+    constexpr bitcount_t bits2   = uint_x4<UInt32,uint64_t>::UINT_BITS * 2;
+    
+    if (shift >= bits2) {
+        return {v.d.v01 << (shift-bits2), uint64_t(0u)};
+    } else {
+        return {shift ? (v.d.v23 << shift) | (v.d.v01 >> (bits2-shift)) 
+                      : v.d.v23,
+                v.d.v01 << shift};
+    }
+}
+
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator>>(const uint_x4<UInt32,uint64_t>& v,
+				    const bitcount_t shift)
+{
+    constexpr bitcount_t bits2   = uint_x4<UInt32,uint64_t>::UINT_BITS * 2;
+    
+    if (shift >= bits2) {
+        return {uint64_t(0u), v.d.v23 >> (shift-bits2)};
+    } else {
+        return {v.d.v23 >> shift,
+                shift ? (v.d.v01 >> shift) | (v.d.v23 << (bits2-shift))
+                      : v.d.v01};
+    }
+}
+#endif
+
+} // namespace pcg_extras
+} // namespace arrow_vendored
+
+#endif // PCG_UINT128_HPP_INCLUDED
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index fcdaf97d526..18932a9523c 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -69,6 +69,7 @@ set(SRC_FILES
     expression_registry.cc
     exported_funcs_registry.cc
     filter.cc
+    null_ops.cc
     function_ir_builder.cc
     function_registry.cc
     function_registry_arithmetic.cc
@@ -86,6 +87,7 @@ set(SRC_FILES
     literal_holder.cc
     projector.cc
     regex_util.cc
+    replace_holder.cc
     selection_vector.cc
     tree_expr_builder.cc
     to_date_holder.cc
@@ -97,8 +99,8 @@ set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared LLVM::LLVM_INTERFACE
 
 set(GANDIVA_STATIC_LINK_LIBS arrow_static LLVM::LLVM_INTERFACE ${GANDIVA_OPENSSL_LIBS})
 
-if(ARROW_GANDIVA_STATIC_LIBSTDCPP
-   AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX))
+if(ARROW_GANDIVA_STATIC_LIBSTDCPP AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX
+                                      ))
   set(GANDIVA_STATIC_LINK_LIBS ${GANDIVA_STATIC_LINK_LIBS} -static-libstdc++
                                -static-libgcc)
 endif()
@@ -111,10 +113,7 @@ endif()
 #     set(GANDIVA_SHARED_LINK_FLAGS "${GANDIVA_SHARED_LINK_FLAGS} /EXPORT:${SYMBOL}")
 #   endforeach()
 # endif()
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(GANDIVA_VERSION_SCRIPT_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
   set(GANDIVA_SHARED_LINK_FLAGS
@@ -138,6 +137,7 @@ add_arrow_lib(gandiva
               EXTRA_INCLUDES
               $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
               ${GANDIVA_OPENSSL_INCLUDE_DIR}
+              ${UTF8PROC_INCLUDE_DIR}
               SHARED_LINK_FLAGS
               ${GANDIVA_SHARED_LINK_FLAGS}
               SHARED_LINK_LIBS
@@ -209,8 +209,11 @@ endfunction()
 
 set(GANDIVA_INTERNALS_TEST_ARGUMENTS)
 if(WIN32)
-  list(APPEND GANDIVA_INTERNALS_TEST_ARGUMENTS EXTRA_LINK_LIBS LLVM::LLVM_INTERFACE
-              ${GANDIVA_OPENSSL_LIBS})
+  list(APPEND
+       GANDIVA_INTERNALS_TEST_ARGUMENTS
+       EXTRA_LINK_LIBS
+       LLVM::LLVM_INTERFACE
+       ${GANDIVA_OPENSSL_LIBS})
 endif()
 add_gandiva_test(internals-test
                  SOURCES
@@ -225,21 +228,24 @@ add_gandiva_test(internals-test
                  expr_decomposer_test.cc
                  expression_registry_test.cc
                  selection_vector_test.cc
-                 lru_cache_test.cc
+                 greedy_dual_size_cache_test.cc
                  to_date_holder_test.cc
                  simple_arena_test.cc
                  like_holder_test.cc
+                 replace_holder_test.cc
                  decimal_type_util_test.cc
                  random_generator_holder_test.cc
                  hash_utils_test.cc
                  gdv_function_stubs_test.cc
+                 null_ops_test.cc
                  EXTRA_DEPENDENCIES
                  LLVM::LLVM_INTERFACE
                  ${GANDIVA_OPENSSL_LIBS}
                  EXTRA_INCLUDES
                  $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
                  ${GANDIVA_INTERNALS_TEST_ARGUMENTS}
-                 ${GANDIVA_OPENSSL_INCLUDE_DIR})
+                 ${GANDIVA_OPENSSL_INCLUDE_DIR}
+                 ${UTF8PROC_INCLUDE_DIR})
 
 if(ARROW_GANDIVA_JAVA)
   add_subdirectory(jni)
diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc
index f6acaff1804..8d0eb145e17 100644
--- a/cpp/src/gandiva/annotator.cc
+++ b/cpp/src/gandiva/annotator.cc
@@ -77,13 +77,21 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
     ++buffer_idx;
   }
 
-  uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
-  eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
+  if (array_data.type->id() == arrow::Type::NA) {
+    eval_batch->SetBuffer(desc.data_idx(), nullptr, array_data.offset);
+  } else {
+    uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
+    eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
+  }
   if (is_output) {
     // pass in the Buffer object for output data buffers. Can be used for resizing.
-    uint8_t* data_buf_ptr =
-        reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
-    eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
+    if (array_data.type->id() == arrow::Type::NA) {
+      eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), nullptr, array_data.offset);
+    } else {
+      uint8_t* data_buf_ptr =
+          reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
+      eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
+    }
   }
 }
 
diff --git a/cpp/src/gandiva/cache.h b/cpp/src/gandiva/cache.h
index 73a2fd14224..8d0f75ce36a 100644
--- a/cpp/src/gandiva/cache.h
+++ b/cpp/src/gandiva/cache.h
@@ -18,9 +18,10 @@
 #pragma once
 
 #include <cstdlib>
+#include <memory>
 #include <mutex>
 
-#include "gandiva/lru_cache.h"
+#include "gandiva/greedy_dual_size_cache.h"
 #include "gandiva/visibility.h"
 
 namespace gandiva {
@@ -39,21 +40,21 @@ class Cache {
   Cache() : Cache(GetCapacity()) {}
 
   ValueType GetModule(KeyType cache_key) {
-    arrow::util::optional<ValueType> result;
+    arrow::util::optional<ValueCacheObject<ValueType>> result;
     mtx_.lock();
     result = cache_.get(cache_key);
     mtx_.unlock();
-    return result != arrow::util::nullopt ? *result : nullptr;
+    return result != arrow::util::nullopt ? (*result).module : nullptr;
   }
 
-  void PutModule(KeyType cache_key, ValueType module) {
+  void PutModule(KeyType cache_key, ValueCacheObject<ValueType> valueCacheObject) {
     mtx_.lock();
-    cache_.insert(cache_key, module);
+    cache_.insert(cache_key, valueCacheObject);
     mtx_.unlock();
   }
 
  private:
-  LruCache<KeyType, ValueType> cache_;
+  GreedyDualSizeCache<KeyType, ValueType> cache_;
   std::mutex mtx_;
 };
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/dex.h b/cpp/src/gandiva/dex.h
index 3920f82f1d7..0b6cc22c197 100644
--- a/cpp/src/gandiva/dex.h
+++ b/cpp/src/gandiva/dex.h
@@ -205,6 +205,14 @@ class GANDIVA_EXPORT LiteralDex : public Dex {
   LiteralHolder holder_;
 };
 
+/// decomposed expression for a null literal.
+class GANDIVA_EXPORT NullLiteralDex : public Dex {
+ public:
+  NullLiteralDex() {}
+
+  void Accept(DexVisitor& visitor) override { visitor.Visit(*this); }
+};
+
 /// decomposed if-else expression.
 class GANDIVA_EXPORT IfDex : public Dex {
  public:
@@ -353,6 +361,24 @@ class InExprDex<int64_t> : public InExprDexBase<int64_t> {
   }
 };
 
+template <>
+class InExprDex<float> : public InExprDexBase<float> {
+ public:
+  InExprDex(const ValueValidityPairVector& args, const std::unordered_set<float>& values)
+      : InExprDexBase(args, values) {
+    runtime_function_ = "gdv_fn_in_expr_lookup_float";
+  }
+};
+
+template <>
+class InExprDex<double> : public InExprDexBase<double> {
+ public:
+  InExprDex(const ValueValidityPairVector& args, const std::unordered_set<double>& values)
+      : InExprDexBase(args, values) {
+    runtime_function_ = "gdv_fn_in_expr_lookup_double";
+  }
+};
+
 template <>
 class InExprDex<gandiva::DecimalScalar128>
     : public InExprDexBase<gandiva::DecimalScalar128> {
diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h
index ba5de970dda..28378db0c19 100644
--- a/cpp/src/gandiva/dex_visitor.h
+++ b/cpp/src/gandiva/dex_visitor.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <string>
 
 #include "arrow/util/logging.h"
@@ -30,6 +31,7 @@ class VectorReadFixedLenValueDex;
 class VectorReadVarLenValueDex;
 class LocalBitMapValidityDex;
 class LiteralDex;
+class NullLiteralDex;
 class TrueDex;
 class FalseDex;
 class NonNullableFuncDex;
@@ -53,6 +55,7 @@ class GANDIVA_EXPORT DexVisitor {
   virtual void Visit(const TrueDex& dex) = 0;
   virtual void Visit(const FalseDex& dex) = 0;
   virtual void Visit(const LiteralDex& dex) = 0;
+  virtual void Visit(const NullLiteralDex& dex) = 0;
   virtual void Visit(const NonNullableFuncDex& dex) = 0;
   virtual void Visit(const NullableNeverFuncDex& dex) = 0;
   virtual void Visit(const NullableInternalFuncDex& dex) = 0;
@@ -61,6 +64,8 @@ class GANDIVA_EXPORT DexVisitor {
   virtual void Visit(const BooleanOrDex& dex) = 0;
   virtual void Visit(const InExprDexBase<int32_t>& dex) = 0;
   virtual void Visit(const InExprDexBase<int64_t>& dex) = 0;
+  virtual void Visit(const InExprDexBase<float>& dex) = 0;
+  virtual void Visit(const InExprDexBase<double>& dex) = 0;
   virtual void Visit(const InExprDexBase<gandiva::DecimalScalar128>& dex) = 0;
   virtual void Visit(const InExprDexBase<std::string>& dex) = 0;
 };
@@ -77,6 +82,7 @@ class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
   VISIT_DCHECK(TrueDex)
   VISIT_DCHECK(FalseDex)
   VISIT_DCHECK(LiteralDex)
+  VISIT_DCHECK(NullLiteralDex)
   VISIT_DCHECK(NonNullableFuncDex)
   VISIT_DCHECK(NullableNeverFuncDex)
   VISIT_DCHECK(NullableInternalFuncDex)
@@ -85,6 +91,8 @@ class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
   VISIT_DCHECK(BooleanOrDex)
   VISIT_DCHECK(InExprDexBase<int32_t>)
   VISIT_DCHECK(InExprDexBase<int64_t>)
+  VISIT_DCHECK(InExprDexBase<float>)
+  VISIT_DCHECK(InExprDexBase<double>)
   VISIT_DCHECK(InExprDexBase<gandiva::DecimalScalar128>)
   VISIT_DCHECK(InExprDexBase<std::string>)
 };
diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h
index 58205266094..1dc1f57f770 100644
--- a/cpp/src/gandiva/exported_funcs.h
+++ b/cpp/src/gandiva/exported_funcs.h
@@ -32,6 +32,12 @@ class ExportedFuncsBase {
   virtual void AddMappings(Engine* engine) const = 0;
 };
 
+// Class for exporting Null functions
+class ExportedNullFunctions : public ExportedFuncsBase {
+  void AddMappings(Engine* engine) const override;
+};
+REGISTER_EXPORTED_FUNCS(ExportedNullFunctions);
+
 // Class for exporting Stub functions
 class ExportedStubFunctions : public ExportedFuncsBase {
   void AddMappings(Engine* engine) const override;
diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc
index 07252b42fd2..02bb050724b 100644
--- a/cpp/src/gandiva/expr_decomposer.cc
+++ b/cpp/src/gandiva/expr_decomposer.cc
@@ -209,6 +209,8 @@ Status ExprDecomposer::Visit(const InExpressionNode<gandiva::DecimalScalar128>&
 
 MAKE_VISIT_IN(int32_t);
 MAKE_VISIT_IN(int64_t);
+MAKE_VISIT_IN(float);
+MAKE_VISIT_IN(double);
 MAKE_VISIT_IN(std::string);
 
 Status ExprDecomposer::Visit(const LiteralNode& node) {
@@ -223,6 +225,13 @@ Status ExprDecomposer::Visit(const LiteralNode& node) {
   return Status::OK();
 }
 
+Status ExprDecomposer::Visit(const NullLiteralNode& node) {
+  auto value_dex = std::make_shared<NullLiteralDex>();
+  auto validity_dex = std::make_shared<FalseDex>();
+  result_ = std::make_shared<ValueValidityPair>(validity_dex, value_dex);
+  return Status::OK();
+}
+
 // The bolow functions use a stack to detect :
 // a. nested if-else expressions.
 //    In such cases,  the local bitmap can be re-used.
diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h
index 3e8e67de255..d5b3866ea35 100644
--- a/cpp/src/gandiva/expr_decomposer.h
+++ b/cpp/src/gandiva/expr_decomposer.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <memory>
 #include <stack>
 #include <string>
@@ -63,9 +64,12 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor {
   Status Visit(const FunctionNode& node) override;
   Status Visit(const IfNode& node) override;
   Status Visit(const LiteralNode& node) override;
+  Status Visit(const NullLiteralNode& node) override;
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
+  Status Visit(const InExpressionNode<float>& node) override;
+  Status Visit(const InExpressionNode<double>& node) override;
   Status Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) override;
   Status Visit(const InExpressionNode<std::string>& node) override;
 
diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc
index fd46c2894b9..32aab53bf07 100644
--- a/cpp/src/gandiva/expr_validator.cc
+++ b/cpp/src/gandiva/expr_validator.cc
@@ -42,11 +42,14 @@ Status ExprValidator::Validate(const ExpressionPtr& expr) {
 }
 
 Status ExprValidator::Visit(const FieldNode& node) {
-  auto llvm_type = types_->IRType(node.return_type()->id());
-  ARROW_RETURN_IF(llvm_type == nullptr,
-                  Status::ExpressionValidationError("Field ", node.field()->name(),
-                                                    " has unsupported data type ",
-                                                    node.return_type()->name()));
+  auto return_type = node.return_type();
+  if (return_type->id() != arrow::Type::NA) {
+    auto llvm_type = types_->DataVecType(node.return_type());
+    ARROW_RETURN_IF(llvm_type == nullptr,
+                    Status::ExpressionValidationError("Field ", node.field()->name(),
+                                                      " has unsupported data type ",
+                                                      node.return_type()->name()));
+  }
 
   // Ensure that field is found in schema
   auto field_in_schema_entry = field_map_.find(node.field()->name());
@@ -120,6 +123,15 @@ Status ExprValidator::Visit(const LiteralNode& node) {
   return Status::OK();
 }
 
+Status ExprValidator::Visit(const NullLiteralNode& node) {
+  auto llvm_type = types_->DataVecType(node.return_type());
+  ARROW_RETURN_IF(llvm_type != nullptr,
+                  Status::ExpressionValidationError("Should be data type ",
+                                                    node.return_type()->name()));
+
+  return Status::OK();
+}
+
 Status ExprValidator::Visit(const BooleanNode& node) {
   ARROW_RETURN_IF(
       node.children().size() < 2,
@@ -156,6 +168,14 @@ Status ExprValidator::Visit(const InExpressionNode<int64_t>& node) {
   return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
                               arrow::int64());
 }
+Status ExprValidator::Visit(const InExpressionNode<float>& node) {
+  return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
+                              arrow::float32());
+}
+Status ExprValidator::Visit(const InExpressionNode<double>& node) {
+  return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
+                              arrow::float64());
+}
 
 Status ExprValidator::Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) {
   return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
@@ -173,10 +193,11 @@ Status ExprValidator::ValidateInExpression(size_t number_of_values,
   ARROW_RETURN_IF(number_of_values == 0,
                   Status::ExpressionValidationError(
                       "IN Expression needs a non-empty constant list to match."));
-  ARROW_RETURN_IF(!in_expr_return_type->Equals(type_of_values),
-                  Status::ExpressionValidationError(
-                      "Evaluation expression for IN clause returns ", in_expr_return_type,
-                      " values are of type", type_of_values));
+  ARROW_RETURN_IF(
+      !in_expr_return_type->Equals(type_of_values),
+      Status::ExpressionValidationError(
+          "Evaluation expression for IN clause returns ", in_expr_return_type->ToString(),
+          " values are of type", type_of_values->ToString()));
 
   return Status::OK();
 }
diff --git a/cpp/src/gandiva/expr_validator.h b/cpp/src/gandiva/expr_validator.h
index e25afe5e7e8..08b3e422761 100644
--- a/cpp/src/gandiva/expr_validator.h
+++ b/cpp/src/gandiva/expr_validator.h
@@ -57,9 +57,12 @@ class ExprValidator : public NodeVisitor {
   Status Visit(const FunctionNode& node) override;
   Status Visit(const IfNode& node) override;
   Status Visit(const LiteralNode& node) override;
+  Status Visit(const NullLiteralNode& node) override;
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
+  Status Visit(const InExpressionNode<float>& node) override;
+  Status Visit(const InExpressionNode<double>& node) override;
   Status Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) override;
   Status Visit(const InExpressionNode<std::string>& node) override;
   Status ValidateInExpression(size_t number_of_values, DataTypePtr in_expr_return_type,
diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc
index 5546c097b06..875cc5447f4 100644
--- a/cpp/src/gandiva/filter.cc
+++ b/cpp/src/gandiva/filter.cc
@@ -118,11 +118,19 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
   // Return if the expression is invalid since we will not be able to process further.
   ExprValidator expr_validator(llvm_gen->types(), schema);
   ARROW_RETURN_NOT_OK(expr_validator.Validate(condition));
+
+  // Start measuring build time
+  auto begin = std::chrono::high_resolution_clock::now();
   ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}, SelectionVector::Mode::MODE_NONE));
+  // Stop measuring time and calculate the elapsed time
+  auto end = std::chrono::high_resolution_clock::now();
+  auto elapsed =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
 
   // Instantiate the filter with the completely built llvm generator
   *filter = std::make_shared<Filter>(std::move(llvm_gen), schema, configuration);
-  cache.PutModule(cache_key, *filter);
+  ValueCacheObject<std::shared_ptr<Filter>> value_cache(*filter, elapsed);
+  cache.PutModule(cache_key, value_cache);
 
   return Status::OK();
 }
diff --git a/cpp/src/gandiva/formatting_utils.h b/cpp/src/gandiva/formatting_utils.h
new file mode 100644
index 00000000000..7bc6a49696a
--- /dev/null
+++ b/cpp/src/gandiva/formatting_utils.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/type.h"
+#include "arrow/util/formatting.h"
+#include "arrow/vendored/double-conversion/double-conversion.h"
+
+namespace gandiva {
+
+/// \brief The entry point for conversion to strings.
+template <typename ARROW_TYPE, typename Enable = void>
+class GdvStringFormatter;
+
+using double_conversion::DoubleToStringConverter;
+
+template <typename ARROW_TYPE>
+class FloatToStringGdvMixin
+    : public arrow::internal::FloatToStringFormatterMixin<ARROW_TYPE> {
+ public:
+  using arrow::internal::FloatToStringFormatterMixin<
+      ARROW_TYPE>::FloatToStringFormatterMixin;
+
+  // The mixin is a modified version of the existent FloatToStringFormatterMixin, but
+  // it defines some specific parameters in the FloatToStringFormatterMixin to cast
+  // the float numbers to string using the same patterns like Java.
+  //
+  // The Java real numbers are represented in two ways following these rules:
+  //- If the number is greater or equals than 10^7 and less than 10^(-3)
+  //  it will be represented using scientific notation, e.g:
+  //      - 0.000012 -> 1.2E-5
+  //      - 10000002.3 -> 1.00000023E7
+  //- If the numbers are between that interval above, they are showed as is.
+  explicit FloatToStringGdvMixin(const std::shared_ptr<arrow::DataType>& = NULLPTR)
+      : arrow::internal::FloatToStringFormatterMixin<ARROW_TYPE>(
+            DoubleToStringConverter::EMIT_TRAILING_ZERO_AFTER_POINT |
+                DoubleToStringConverter::EMIT_TRAILING_DECIMAL_POINT,
+            "Infinity", "NaN", 'E', -3, 7, 3, 1) {}
+};
+
+template <>
+class GdvStringFormatter<arrow::FloatType>
+    : public FloatToStringGdvMixin<arrow::FloatType> {
+ public:
+  using FloatToStringGdvMixin::FloatToStringGdvMixin;
+};
+
+template <>
+class GdvStringFormatter<arrow::DoubleType>
+    : public FloatToStringGdvMixin<arrow::DoubleType> {
+ public:
+  using FloatToStringGdvMixin::FloatToStringGdvMixin;
+};
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h
index e1c5630e841..ced1538915d 100644
--- a/cpp/src/gandiva/function_holder_registry.h
+++ b/cpp/src/gandiva/function_holder_registry.h
@@ -28,6 +28,7 @@
 #include "gandiva/like_holder.h"
 #include "gandiva/node.h"
 #include "gandiva/random_generator_holder.h"
+#include "gandiva/replace_holder.h"
 #include "gandiva/to_date_holder.h"
 
 namespace gandiva {
@@ -62,9 +63,11 @@ class FunctionHolderRegistry {
   static map_type& makers() {
     static map_type maker_map = {
         {"like", LAMBDA_MAKER(LikeHolder)},
+        {"ilike", LAMBDA_MAKER(LikeHolder)},
         {"to_date", LAMBDA_MAKER(ToDateHolder)},
         {"random", LAMBDA_MAKER(RandomGeneratorHolder)},
         {"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
+        {"regexp_replace", LAMBDA_MAKER(ReplaceHolder)},
     };
     return maker_map;
   }
diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc
index d5d015c10b4..2d622124102 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -20,6 +20,7 @@
 #include "gandiva/function_registry_datetime.h"
 #include "gandiva/function_registry_hash.h"
 #include "gandiva/function_registry_math_ops.h"
+#include "gandiva/function_registry_null.h"
 #include "gandiva/function_registry_string.h"
 #include "gandiva/function_registry_timestamp_arithmetic.h"
 
@@ -65,6 +66,9 @@ SignatureMap FunctionRegistry::InitPCMap() {
   auto v6 = GetDateTimeArithmeticFunctionRegistry();
   pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end());
 
+  auto v8 = GetNullFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v8.begin(), v8.end());
+
   for (auto& elem : pc_registry_) {
     for (auto& func_signature : elem.signatures()) {
       map.insert(std::make_pair(&(func_signature), &elem));
diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc
index 2c3049a8b74..f34289f372e 100644
--- a/cpp/src/gandiva/function_registry_arithmetic.cc
+++ b/cpp/src/gandiva/function_registry_arithmetic.cc
@@ -29,9 +29,13 @@ namespace gandiva {
 #define BINARY_RELATIONAL_BOOL_DATE_FN(name, ALIASES) \
   NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name, ALIASES)
 
-#define UNARY_CAST_TO_FLOAT64(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, {}, name, float64)
+#define UNARY_CAST_TO_FLOAT64(type) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, {}, type, float64)
 
-#define UNARY_CAST_TO_FLOAT32(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, {}, name, float32)
+#define UNARY_CAST_TO_FLOAT32(type) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, {}, type, float32)
+
+#define UNARY_CAST_TO_INT32(type) UNARY_SAFE_NULL_IF_NULL(castINT, {}, type, int32)
+
+#define UNARY_CAST_TO_INT64(type) UNARY_SAFE_NULL_IF_NULL(castBIGINT, {}, type, int64)
 
 std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
   static std::vector<NativeFunction> arithmetic_fn_registry_ = {
@@ -44,6 +48,12 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
       UNARY_CAST_TO_FLOAT32(int32), UNARY_CAST_TO_FLOAT32(int64),
       UNARY_CAST_TO_FLOAT32(float64),
 
+      // cast to int32
+      UNARY_CAST_TO_INT32(float32), UNARY_CAST_TO_INT32(float64),
+
+      // cast to int64
+      UNARY_CAST_TO_INT64(float32), UNARY_CAST_TO_INT64(float64),
+
       // cast to float64
       UNARY_CAST_TO_FLOAT64(int32), UNARY_CAST_TO_FLOAT64(int64),
       UNARY_CAST_TO_FLOAT64(float32), UNARY_CAST_TO_FLOAT64(decimal128),
@@ -103,7 +113,11 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
       BINARY_RELATIONAL_BOOL_DATE_FN(less_than, {}),
       BINARY_RELATIONAL_BOOL_DATE_FN(less_than_or_equal_to, {}),
       BINARY_RELATIONAL_BOOL_DATE_FN(greater_than, {}),
-      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to, {})};
+      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to, {}),
+
+      // binary representation of integer values
+      UNARY_UNSAFE_NULL_IF_NULL(bin, {}, int32, utf8),
+      UNARY_UNSAFE_NULL_IF_NULL(bin, {}, int64, utf8)};
 
   return arithmetic_fn_registry_;
 }
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index d1555fba3ce..b95b8684d6c 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -43,6 +43,8 @@ using arrow::int16;
 using arrow::int32;
 using arrow::int64;
 using arrow::int8;
+using arrow::month_interval;
+using arrow::null;
 using arrow::uint16;
 using arrow::uint32;
 using arrow::uint64;
@@ -213,9 +215,9 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 // - can return errors
 //
 // The function name includes the base name & input type name. gdv_fn_sha1_float64
-#define HASH_SHA1_NULL_NEVER(NAME, ALIASES, TYPE)                                 \
-  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
-                 utf8(), kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE),   \
+#define HASH_SHA1_NULL_NEVER(NAME, ALIASES, TYPE)                        \
+  NativeFunction(#NAME, {"sha", "sha1"}, DataTypeVector{TYPE()}, utf8(), \
+                 kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE),  \
                  NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
 
 // HashSHA256 functions that :
@@ -223,18 +225,22 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 // - can return errors
 //
 // The function name includes the base name & input type name. gdv_fn_sha256_float64
-#define HASH_SHA256_NULL_NEVER(NAME, ALIASES, TYPE)                               \
-  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
-                 utf8(), kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha256_##TYPE), \
+#define HASH_SHA256_NULL_NEVER(NAME, ALIASES, TYPE)                                   \
+  NativeFunction(#NAME, {"sha256"}, DataTypeVector{TYPE()}, utf8(), kResultNullNever, \
+                 ARROW_STRINGIFY(gdv_fn_sha256_##TYPE),                               \
                  NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
 
 // Iterate the inner macro over all numeric types
-#define NUMERIC_TYPES(INNER, NAME, ALIASES)                                             \
+#define BASE_NUMERIC_TYPES(INNER, NAME, ALIASES)                                        \
   INNER(NAME, ALIASES, int8), INNER(NAME, ALIASES, int16), INNER(NAME, ALIASES, int32), \
       INNER(NAME, ALIASES, int64), INNER(NAME, ALIASES, uint8),                         \
       INNER(NAME, ALIASES, uint16), INNER(NAME, ALIASES, uint32),                       \
       INNER(NAME, ALIASES, uint64), INNER(NAME, ALIASES, float32),                      \
-      INNER(NAME, ALIASES, float64), INNER(NAME, ALIASES, decimal128)
+      INNER(NAME, ALIASES, float64)
+
+// Iterate the inner macro over all base numeric types
+#define NUMERIC_TYPES(INNER, NAME, ALIASES) \
+  BASE_NUMERIC_TYPES(INNER, NAME, ALIASES), INNER(NAME, ALIASES, decimal128)
 
 // Iterate the inner macro over numeric and date/time types
 #define NUMERIC_DATE_TYPES(INNER, NAME, ALIASES)                         \
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index a5e0c1c3789..b8d2e7b6c7d 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -21,18 +21,28 @@
 
 namespace gandiva {
 
-#define DATE_EXTRACTION_TRUNCATION_FNS(INNER, name)                              \
-  DATE_TYPES(INNER, name##Millennium, {}), DATE_TYPES(INNER, name##Century, {}), \
-      DATE_TYPES(INNER, name##Decade, {}), DATE_TYPES(INNER, name##Year, {}),    \
-      DATE_TYPES(INNER, name##Quarter, {}), DATE_TYPES(INNER, name##Month, {}),  \
-      DATE_TYPES(INNER, name##Week, {}), DATE_TYPES(INNER, name##Day, {}),       \
-      DATE_TYPES(INNER, name##Hour, {}), DATE_TYPES(INNER, name##Minute, {}),    \
-      DATE_TYPES(INNER, name##Second, {})
-
-#define TIME_EXTRACTION_FNS(name)                              \
-  TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour, {}),       \
-      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {}), \
-      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {})
+#define DATE_EXTRACTION_TRUNCATION_FNS(INNER, name)                                    \
+  DATE_TYPES(INNER, name##Millennium, {}), DATE_TYPES(INNER, name##Century, {}),       \
+      DATE_TYPES(INNER, name##Decade, {}), DATE_TYPES(INNER, name##Year, {"year"}),    \
+      DATE_TYPES(INNER, name##Quarter, {}), DATE_TYPES(INNER, name##Month, {"month"}), \
+      DATE_TYPES(INNER, name##Week, ({"weekofyear", "yearweek"})),                     \
+      DATE_TYPES(INNER, name##Day, ({"day", "dayofmonth"})),                           \
+      DATE_TYPES(INNER, name##Hour, {"hour"}),                                         \
+      DATE_TYPES(INNER, name##Minute, {"minute"}),                                     \
+      DATE_TYPES(INNER, name##Second, {"second"})
+
+#define TO_TIMESTAMP_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)                       \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 timestamp(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
+
+#define TO_TIME_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)                            \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 time32(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
+
+#define TIME_EXTRACTION_FNS(name)                                      \
+  TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour, {"hour"}),         \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {"minute"}), \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {"second"})
 
 std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
   static std::vector<NativeFunction> date_time_fn_registry_ = {
@@ -83,10 +93,38 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
       NativeFunction("castBIGINT", {}, DataTypeVector{day_time_interval()}, int64(),
                      kResultNullIfNull, "castBIGINT_daytimeinterval"),
 
+      NativeFunction("castINT", {"castNULLABLEINT"}, DataTypeVector{month_interval()},
+                     int32(), kResultNullIfNull, "castINT_year_interval",
+                     NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castBIGINT", {"castNULLABLEBIGINT"},
+                     DataTypeVector{month_interval()}, int64(), kResultNullIfNull,
+                     "castBIGINT_year_interval", NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castNULLABLEINTERVALYEAR", {"castINTERVALYEAR"},
+                     DataTypeVector{int32()}, month_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALYEAR_int32",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castNULLABLEINTERVALYEAR", {"castINTERVALYEAR"},
+                     DataTypeVector{int64()}, month_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALYEAR_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castNULLABLEINTERVALDAY", {"castINTERVALDAY"},
+                     DataTypeVector{int32()}, day_time_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALDAY_int32"),
+
+      NativeFunction("castNULLABLEINTERVALDAY", {"castINTERVALDAY"},
+                     DataTypeVector{int64()}, day_time_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALDAY_int64"),
+
       NativeFunction("extractDay", {}, DataTypeVector{day_time_interval()}, int64(),
                      kResultNullIfNull, "extractDay_daytimeinterval"),
 
-      DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {})};
+      DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {}),
+      BASE_NUMERIC_TYPES(TO_TIME_SAFE_NULL_IF_NULL, to_time, {}),
+      BASE_NUMERIC_TYPES(TO_TIMESTAMP_SAFE_NULL_IF_NULL, to_timestamp, {})};
 
   return date_time_fn_registry_;
 }
diff --git a/cpp/src/gandiva/function_registry_null.h b/cpp/src/gandiva/function_registry_null.h
new file mode 100644
index 00000000000..ab45e6f4e41
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_null.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetNullFunctionRegistry() {
+  static std::vector<NativeFunction> null_fn_registry_ = {
+      NativeFunction("equal",
+                     {"not_equal", "less_than", "less_than_or_equal_to", "greater_than",
+                      "greater_than_or_equal_to"},
+                     DataTypeVector{null(), null()}, null(), kResultNullNever,
+                     "compare_null_null"),
+      NativeFunction("isnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
+                     "isnull_null"),
+      NativeFunction("isnotnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
+                     "isnotnull_null")};
+  return null_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 3c0d714f164..3ea426c85f4 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -48,6 +48,7 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(is_substr, {}),
 
       BINARY_UNSAFE_NULL_IF_NULL(locate, {"position"}, utf8, int32),
+      BINARY_UNSAFE_NULL_IF_NULL(strpos, {}, utf8, int32),
 
       UNARY_OCTET_LEN_FN(octet_length, {}), UNARY_OCTET_LEN_FN(bit_length, {}),
 
@@ -58,15 +59,37 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       UNARY_UNSAFE_NULL_IF_NULL(ltrim, {}, utf8, utf8),
       UNARY_UNSAFE_NULL_IF_NULL(rtrim, {}, utf8, utf8),
       UNARY_UNSAFE_NULL_IF_NULL(btrim, {}, utf8, utf8),
+      UNARY_UNSAFE_NULL_IF_NULL(space, {}, int32, utf8),
+      UNARY_UNSAFE_NULL_IF_NULL(space, {}, int64, utf8),
 
       UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull, {}),
       UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
 
+      NativeFunction("ascii", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
+                     "ascii_utf8"),
+
+      NativeFunction("base64", {}, DataTypeVector{binary()}, utf8(), kResultNullIfNull,
+                     "gdv_fn_base64_encode_binary", NativeFunction::kNeedsContext),
+
+      NativeFunction("unbase64", {}, DataTypeVector{utf8()}, binary(), kResultNullIfNull,
+                     "gdv_fn_base64_decode_utf8", NativeFunction::kNeedsContext),
+
+      NativeFunction("repeat", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "repeat_utf8_int32",
+                     NativeFunction::kNeedsContext),
+
       NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
-                     "upper_utf8", NativeFunction::kNeedsContext),
+                     "gdv_fn_upper_utf8", NativeFunction::kNeedsContext),
 
       NativeFunction("lower", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
-                     "lower_utf8", NativeFunction::kNeedsContext),
+                     "gdv_fn_lower_utf8", NativeFunction::kNeedsContext),
+
+      NativeFunction("initcap", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
+                     "gdv_fn_initcap_utf8",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castBIT", {"castBOOLEAN"}, DataTypeVector{utf8()}, boolean(),
+                     kResultNullIfNull, "castBIT_utf8", NativeFunction::kNeedsContext),
 
       NativeFunction("castINT", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
                      "gdv_fn_castINT_utf8",
@@ -84,6 +107,22 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "gdv_fn_castFLOAT8_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
 
+      NativeFunction("castINT", {}, DataTypeVector{binary()}, int32(), kResultNullIfNull,
+                     "gdv_fn_castINT_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castBIGINT", {}, DataTypeVector{binary()}, int64(),
+                     kResultNullIfNull, "gdv_fn_castBIGINT_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castFLOAT4", {}, DataTypeVector{binary()}, float32(),
+                     kResultNullIfNull, "gdv_fn_castFLOAT4_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castFLOAT8", {}, DataTypeVector{binary()}, float64(),
+                     kResultNullIfNull, "gdv_fn_castFLOAT8_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
       NativeFunction("castVARCHAR", {}, DataTypeVector{boolean(), int64()}, utf8(),
                      kResultNullIfNull, "castVARCHAR_bool_int64",
                      NativeFunction::kNeedsContext),
@@ -92,6 +131,26 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "castVARCHAR_utf8_int64",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("castVARCHAR", {}, DataTypeVector{binary(), int64()}, utf8(),
+                     kResultNullIfNull, "castVARCHAR_binary_int64",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("castVARCHAR", {}, DataTypeVector{int32(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_int32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARCHAR", {}, DataTypeVector{int64(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_int64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARCHAR", {}, DataTypeVector{float32(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_float32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARCHAR", {}, DataTypeVector{float64(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_float64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
       NativeFunction("castVARCHAR", {}, DataTypeVector{decimal128(), int64()}, utf8(),
                      kResultNullIfNull, "castVARCHAR_decimal128_int64",
                      NativeFunction::kNeedsContext),
@@ -100,6 +159,14 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "gdv_fn_like_utf8_utf8",
                      NativeFunction::kNeedsFunctionHolder),
 
+      NativeFunction("like", {}, DataTypeVector{utf8(), utf8(), utf8()}, boolean(),
+                     kResultNullIfNull, "gdv_fn_like_utf8_utf8_utf8",
+                     NativeFunction::kNeedsFunctionHolder),
+
+      NativeFunction("ilike", {}, DataTypeVector{utf8(), utf8()}, boolean(),
+                     kResultNullIfNull, "gdv_fn_ilike_utf8_utf8",
+                     NativeFunction::kNeedsFunctionHolder),
+
       NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
                      kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),
 
@@ -118,6 +185,26 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      utf8(), kResultNullIfNull, "substr_utf8_int64",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("lpad", {}, DataTypeVector{utf8(), int32(), utf8()}, utf8(),
+                     kResultNullIfNull, "lpad_utf8_int32_utf8",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("lpad", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "lpad_utf8_int32", NativeFunction::kNeedsContext),
+
+      NativeFunction("rpad", {}, DataTypeVector{utf8(), int32(), utf8()}, utf8(),
+                     kResultNullIfNull, "rpad_utf8_int32_utf8",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("rpad", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "rpad_utf8_int32", NativeFunction::kNeedsContext),
+
+      NativeFunction("regexp_replace", {}, DataTypeVector{utf8(), utf8(), utf8()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_regexp_replace_utf8_utf8",
+                     NativeFunction::kNeedsContext |
+                         NativeFunction::kNeedsFunctionHolder |
+                         NativeFunction::kCanReturnErrors),
+
       NativeFunction("concatOperator", {}, DataTypeVector{utf8(), utf8()}, utf8(),
                      kResultNullIfNull, "concatOperator_utf8_utf8",
                      NativeFunction::kNeedsContext),
@@ -200,6 +287,11 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      "concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("byte_substr", {"bytesubstring"},
+                     DataTypeVector{binary(), int32(), int32()}, binary(),
+                     kResultNullIfNull, "byte_substr_binary_int32_int32",
+                     NativeFunction::kNeedsContext),
+
       NativeFunction("convert_fromUTF8", {"convert_fromutf8"}, DataTypeVector{binary()},
                      utf8(), kResultNullIfNull, "convert_fromUTF8_binary",
                      NativeFunction::kNeedsContext),
@@ -209,6 +301,67 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      "convert_replace_invalid_fromUTF8_binary",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("convert_toDOUBLE", {}, DataTypeVector{float64()}, binary(),
+                     kResultNullIfNull, "convert_toDOUBLE",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toDOUBLE_be", {}, DataTypeVector{float64()}, binary(),
+                     kResultNullIfNull, "convert_toDOUBLE_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toFLOAT", {}, DataTypeVector{float32()}, binary(),
+                     kResultNullIfNull, "convert_toFLOAT", NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toFLOAT_be", {}, DataTypeVector{float32()}, binary(),
+                     kResultNullIfNull, "convert_toFLOAT_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toINT", {}, DataTypeVector{int32()}, binary(),
+                     kResultNullIfNull, "convert_toINT", NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toINT_be", {}, DataTypeVector{int32()}, binary(),
+                     kResultNullIfNull, "convert_toINT_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toBIGINT", {}, DataTypeVector{int64()}, binary(),
+                     kResultNullIfNull, "convert_toBIGINT",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toBIGINT_be", {}, DataTypeVector{int64()}, binary(),
+                     kResultNullIfNull, "convert_toBIGINT_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toBOOLEAN_BYTE", {}, DataTypeVector{boolean()}, binary(),
+                     kResultNullIfNull, "convert_toBOOLEAN",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIME_EPOCH", {}, DataTypeVector{time32()}, binary(),
+                     kResultNullIfNull, "convert_toTIME_EPOCH",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIME_EPOCH_be", {}, DataTypeVector{time32()}, binary(),
+                     kResultNullIfNull, "convert_toTIME_EPOCH_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIMESTAMP_EPOCH", {}, DataTypeVector{timestamp()},
+                     binary(), kResultNullIfNull, "convert_toTIMESTAMP_EPOCH",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIMESTAMP_EPOCH_be", {}, DataTypeVector{timestamp()},
+                     binary(), kResultNullIfNull, "convert_toTIMESTAMP_EPOCH_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toDATE_EPOCH", {}, DataTypeVector{date64()}, binary(),
+                     kResultNullIfNull, "convert_toDATE_EPOCH",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toDATE_EPOCH_be", {}, DataTypeVector{date64()}, binary(),
+                     kResultNullIfNull, "convert_toDATE_EPOCH_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toUTF8", {}, DataTypeVector{utf8()}, binary(),
+                     kResultNullIfNull, "convert_toUTF8", NativeFunction::kNeedsContext),
+
       NativeFunction("locate", {"position"}, DataTypeVector{utf8(), utf8(), int32()},
                      int32(), kResultNullIfNull, "locate_utf8_utf8_int32",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
@@ -220,6 +373,37 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(),
                      kResultNullIfNull, "binary_string", NativeFunction::kNeedsContext),
 
+      NativeFunction("left", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "left_utf8_int32", NativeFunction::kNeedsContext),
+
+      NativeFunction("right", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "right_utf8_int32",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{binary(), int64()}, binary(),
+                     kResultNullIfNull, "castVARBINARY_binary_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{utf8(), int64()}, binary(),
+                     kResultNullIfNull, "castVARBINARY_utf8_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{int32(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_int32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{int64(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_int64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{float32(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_float32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{float64(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_float64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
       NativeFunction("split_part", {}, DataTypeVector{utf8(), utf8(), int32()}, utf8(),
                      kResultNullIfNull, "split_part",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)};
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 2d0e1a7ce87..2cac036abd5 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -17,16 +17,25 @@
 
 #include "gandiva/gdv_function_stubs.h"
 
+#include <utf8proc.h>
+
 #include <string>
 #include <vector>
 
+#include "arrow/util/base64.h"
+#include "arrow/util/double_conversion.h"
+#include "arrow/util/formatting.h"
+#include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
 #include "gandiva/engine.h"
 #include "gandiva/exported_funcs.h"
+#include "gandiva/formatting_utils.h"
 #include "gandiva/hash_utils.h"
 #include "gandiva/in_holder.h"
 #include "gandiva/like_holder.h"
+#include "gandiva/precompiled/types.h"
 #include "gandiva/random_generator_holder.h"
+#include "gandiva/replace_holder.h"
 #include "gandiva/to_date_holder.h"
 
 /// Stub functions that can be accessed from LLVM or the pre-compiled library.
@@ -39,6 +48,31 @@ bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
   return (*holder)(std::string(data, data_len));
 }
 
+bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                                const char* pattern, int pattern_len,
+                                const char* escape_char, int escape_char_len) {
+  gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
+  return (*holder)(std::string(data, data_len));
+}
+
+bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                            const char* pattern, int pattern_len) {
+  gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
+  return (*holder)(std::string(data, data_len));
+}
+
+const char* gdv_fn_regexp_replace_utf8_utf8(
+    int64_t ptr, int64_t holder_ptr, const char* data, int32_t data_len,
+    const char* /*pattern*/, int32_t /*pattern_len*/, const char* replace_string,
+    int32_t replace_string_len, int32_t* out_length) {
+  gandiva::ExecutionContext* context = reinterpret_cast<gandiva::ExecutionContext*>(ptr);
+
+  gandiva::ReplaceHolder* holder = reinterpret_cast<gandiva::ReplaceHolder*>(holder_ptr);
+
+  return (*holder)(context, data, data_len, replace_string, replace_string_len,
+                   out_length);
+}
+
 double gdv_fn_random(int64_t ptr) {
   gandiva::RandomGeneratorHolder* holder =
       reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
@@ -99,6 +133,22 @@ bool gdv_fn_in_expr_lookup_decimal(int64_t ptr, int64_t value_high, int64_t valu
   return holder->HasValue(value);
 }
 
+bool gdv_fn_in_expr_lookup_float(int64_t ptr, float value, bool in_validity) {
+  if (!in_validity) {
+    return false;
+  }
+  gandiva::InHolder<float>* holder = reinterpret_cast<gandiva::InHolder<float>*>(ptr);
+  return holder->HasValue(value);
+}
+
+bool gdv_fn_in_expr_lookup_double(int64_t ptr, double value, bool in_validity) {
+  if (!in_validity) {
+    return false;
+  }
+  gandiva::InHolder<double>* holder = reinterpret_cast<gandiva::InHolder<double>*>(ptr);
+  return holder->HasValue(value);
+}
+
 bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len,
                                 bool in_validity) {
   if (!in_validity) {
@@ -273,10 +323,65 @@ char* gdv_fn_dec_to_string(int64_t context, int64_t x_high, uint64_t x_low,
   return ret;
 }
 
-#define CAST_NUMERIC_FROM_STRING(OUT_TYPE, ARROW_TYPE, TYPE_NAME)                    \
+GANDIVA_EXPORT
+const char* gdv_fn_base64_encode_binary(int64_t context, const char* in, int32_t in_len,
+                                        int32_t* out_len) {
+  if (in_len < 0) {
+    gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");
+    *out_len = 0;
+    return "";
+  }
+  if (in_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+  // use arrow method to encode base64 string
+  std::string encoded_str =
+      arrow::util::base64_encode(reinterpret_cast<const unsigned char*>(in), in_len);
+  *out_len = static_cast<int32_t>(encoded_str.length());
+  // allocate memory for response
+  char* ret = reinterpret_cast<char*>(
+      gdv_fn_context_arena_malloc(context, static_cast<int32_t>(*out_len)));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory");
+    *out_len = 0;
+    return "";
+  }
+  memcpy(ret, encoded_str.data(), *out_len);
+  return ret;
+}
+
+GANDIVA_EXPORT
+const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t in_len,
+                                      int32_t* out_len) {
+  if (in_len < 0) {
+    gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");
+    *out_len = 0;
+    return "";
+  }
+  if (in_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+  // use arrow method to decode base64 string
+  std::string decoded_str = arrow::util::base64_decode(std::string(in, in_len));
+  *out_len = static_cast<int32_t>(decoded_str.length());
+  // allocate memory for response
+  char* ret = reinterpret_cast<char*>(
+      gdv_fn_context_arena_malloc(context, static_cast<int32_t>(*out_len)));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory");
+    *out_len = 0;
+    return "";
+  }
+  memcpy(ret, decoded_str.data(), *out_len);
+  return ret;
+}
+
+#define CAST_NUMERIC_FROM_VARLEN_TYPES(OUT_TYPE, ARROW_TYPE, TYPE_NAME, INNER_TYPE)  \
   GANDIVA_EXPORT                                                                     \
-  OUT_TYPE gdv_fn_cast##TYPE_NAME##_utf8(int64_t context, const char* data,          \
-                                         int32_t len) {                              \
+  OUT_TYPE gdv_fn_cast##TYPE_NAME##_##INNER_TYPE(int64_t context, const char* data,  \
+                                                 int32_t len) {                      \
     OUT_TYPE val = 0;                                                                \
     /* trim leading and trailing spaces */                                           \
     int32_t trimmed_len;                                                             \
@@ -297,12 +402,396 @@ char* gdv_fn_dec_to_string(int64_t context, int64_t x_high, uint64_t x_low,
     return val;                                                                      \
   }
 
+#define CAST_NUMERIC_FROM_STRING(OUT_TYPE, ARROW_TYPE, TYPE_NAME) \
+  CAST_NUMERIC_FROM_VARLEN_TYPES(OUT_TYPE, ARROW_TYPE, TYPE_NAME, utf8)
+
 CAST_NUMERIC_FROM_STRING(int32_t, arrow::Int32Type, INT)
 CAST_NUMERIC_FROM_STRING(int64_t, arrow::Int64Type, BIGINT)
 CAST_NUMERIC_FROM_STRING(float, arrow::FloatType, FLOAT4)
 CAST_NUMERIC_FROM_STRING(double, arrow::DoubleType, FLOAT8)
 
 #undef CAST_NUMERIC_FROM_STRING
+
+#define CAST_NUMERIC_FROM_VARBINARY(OUT_TYPE, ARROW_TYPE, TYPE_NAME) \
+  CAST_NUMERIC_FROM_VARLEN_TYPES(OUT_TYPE, ARROW_TYPE, TYPE_NAME, varbinary)
+
+CAST_NUMERIC_FROM_VARBINARY(int32_t, arrow::Int32Type, INT)
+CAST_NUMERIC_FROM_VARBINARY(int64_t, arrow::Int64Type, BIGINT)
+CAST_NUMERIC_FROM_VARBINARY(float, arrow::FloatType, FLOAT4)
+CAST_NUMERIC_FROM_VARBINARY(double, arrow::DoubleType, FLOAT8)
+
+#undef CAST_NUMERIC_STRING
+
+#define GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER(IN_TYPE, CAST_NAME, ARROW_TYPE)      \
+  GANDIVA_EXPORT                                                                  \
+  const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
+      int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    if (len == 0) {                                                               \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::internal::StringFormatter<arrow::ARROW_TYPE> formatter;                \
+    char* ret = reinterpret_cast<char*>(                                          \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));         \
+    if (ret == nullptr) {                                                         \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {     \
+      int64_t size = static_cast<int64_t>(v.size());                              \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                   \
+      memcpy(ret, v.data(), *out_len);                                            \
+      return arrow::Status::OK();                                                 \
+    });                                                                           \
+    if (!status.ok()) {                                                           \
+      std::string err = "Could not cast " + std::to_string(value) + " to string"; \
+      gdv_fn_context_set_error_msg(context, err.c_str());                         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    return ret;                                                                   \
+  }
+
+#define GDV_FN_CAST_VARLEN_TYPE_FROM_REAL(IN_TYPE, CAST_NAME, ARROW_TYPE)         \
+  GANDIVA_EXPORT                                                                  \
+  const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
+      int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    if (len == 0) {                                                               \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    gandiva::GdvStringFormatter<arrow::ARROW_TYPE> formatter;                     \
+    char* ret = reinterpret_cast<char*>(                                          \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));         \
+    if (ret == nullptr) {                                                         \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {     \
+      int64_t size = static_cast<int64_t>(v.size());                              \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                   \
+      memcpy(ret, v.data(), *out_len);                                            \
+      return arrow::Status::OK();                                                 \
+    });                                                                           \
+    if (!status.ok()) {                                                           \
+      std::string err = "Could not cast " + std::to_string(value) + " to string"; \
+      gdv_fn_context_set_error_msg(context, err.c_str());                         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    return ret;                                                                   \
+  }
+
+#define CAST_VARLEN_TYPE_FROM_NUMERIC(VARLEN_TYPE)                    \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER(int32, VARLEN_TYPE, Int32Type) \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER(int64, VARLEN_TYPE, Int64Type) \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_REAL(float32, VARLEN_TYPE, FloatType)  \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_REAL(float64, VARLEN_TYPE, DoubleType)
+
+CAST_VARLEN_TYPE_FROM_NUMERIC(VARCHAR)
+CAST_VARLEN_TYPE_FROM_NUMERIC(VARBINARY)
+
+#undef CAST_VARLEN_TYPE_FROM_NUMERIC
+#undef GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER
+#undef GDV_FN_CAST_VARLEN_TYPE_FROM_REAL
+#undef GDV_FN_CAST_VARCHAR_INTEGER
+#undef GDV_FN_CAST_VARCHAR_REAL
+
+GDV_FORCE_INLINE
+int32_t gdv_fn_utf8_char_length(char c) {
+  if ((signed char)c >= 0) {  // 1-byte char (0x00 ~ 0x7F)
+    return 1;
+  } else if ((c & 0xE0) == 0xC0) {  // 2-byte char
+    return 2;
+  } else if ((c & 0xF0) == 0xE0) {  // 3-byte char
+    return 3;
+  } else if ((c & 0xF8) == 0xF0) {  // 4-byte char
+    return 4;
+  }
+  // invalid char
+  return 0;
+}
+
+GDV_FORCE_INLINE
+void gdv_fn_set_error_for_invalid_utf8(int64_t execution_context, char val) {
+  char const* fmt = "unexpected byte \\%02hhx encountered while decoding utf8 string";
+  int size = static_cast<int>(strlen(fmt)) + 64;
+  char* error = reinterpret_cast<char*>(malloc(size));
+  snprintf(error, size, fmt, (unsigned char)val);
+  gdv_fn_context_set_error_msg(execution_context, error);
+  free(error);
+}
+
+// Convert an utf8 string to its corresponding uppercase string
+GANDIVA_EXPORT
+const char* gdv_fn_upper_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len) {
+  if (data_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // If it is a single-byte character (ASCII), corresponding uppercase is always 1-byte
+  // long; if it is >= 2 bytes long, uppercase can be at most 4 bytes long, so length of
+  // the output can be at most twice the length of the input
+  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 2 * data_len));
+  if (out == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t char_len, out_char_len, out_idx = 0;
+  uint32_t char_codepoint;
+
+  for (int32_t i = 0; i < data_len; i += char_len) {
+    char_len = gdv_fn_utf8_char_length(data[i]);
+    // For single byte characters:
+    // If it is a lowercase ASCII character, set the output to its corresponding uppercase
+    // character; else, set the output to the read character
+    if (char_len == 1) {
+      char cur = data[i];
+      // 'A' - 'Z' : 0x41 - 0x5a
+      // 'a' - 'z' : 0x61 - 0x7a
+      if (cur >= 0x61 && cur <= 0x7a) {
+        out[out_idx++] = static_cast<char>(cur - 0x20);
+      } else {
+        out[out_idx++] = cur;
+      }
+      continue;
+    }
+
+    // Control reaches here when we encounter a multibyte character
+    const auto* in_char = (const uint8_t*)(data + i);
+
+    // Decode the multibyte character
+    bool is_valid_utf8_char =
+        arrow::util::UTF8Decode((const uint8_t**)&in_char, &char_codepoint);
+
+    // If it is an invalid utf8 character, UTF8Decode evaluates to false
+    if (!is_valid_utf8_char) {
+      gdv_fn_set_error_for_invalid_utf8(context, data[i]);
+      *out_len = 0;
+      return "";
+    }
+
+    // Convert the encoded codepoint to its uppercase codepoint
+    int32_t upper_codepoint = utf8proc_toupper(char_codepoint);
+
+    // UTF8Encode advances the pointer by the number of bytes present in the uppercase
+    // character
+    auto* out_char = (uint8_t*)(out + out_idx);
+    uint8_t* out_char_start = out_char;
+
+    // Encode the uppercase character
+    out_char = arrow::util::UTF8Encode(out_char, upper_codepoint);
+
+    out_char_len = static_cast<int32_t>(out_char - out_char_start);
+    out_idx += out_char_len;
+  }
+
+  *out_len = out_idx;
+  return out;
+}
+
+// Convert an utf8 string to its corresponding lowercase string
+GANDIVA_EXPORT
+const char* gdv_fn_lower_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len) {
+  if (data_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // If it is a single-byte character (ASCII), corresponding lowercase is always 1-byte
+  // long; if it is >= 2 bytes long, lowercase can be at most 4 bytes long, so length of
+  // the output can be at most twice the length of the input
+  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 2 * data_len));
+  if (out == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t char_len, out_char_len, out_idx = 0;
+  uint32_t char_codepoint;
+
+  for (int32_t i = 0; i < data_len; i += char_len) {
+    char_len = gdv_fn_utf8_char_length(data[i]);
+    // For single byte characters:
+    // If it is an uppercase ASCII character, set the output to its corresponding
+    // lowercase character; else, set the output to the read character
+    if (char_len == 1) {
+      char cur = data[i];
+      // 'A' - 'Z' : 0x41 - 0x5a
+      // 'a' - 'z' : 0x61 - 0x7a
+      if (cur >= 0x41 && cur <= 0x5a) {
+        out[out_idx++] = static_cast<char>(cur + 0x20);
+      } else {
+        out[out_idx++] = cur;
+      }
+      continue;
+    }
+
+    // Control reaches here when we encounter a multibyte character
+    const auto* in_char = (const uint8_t*)(data + i);
+
+    // Decode the multibyte character
+    bool is_valid_utf8_char =
+        arrow::util::UTF8Decode((const uint8_t**)&in_char, &char_codepoint);
+
+    // If it is an invalid utf8 character, UTF8Decode evaluates to false
+    if (!is_valid_utf8_char) {
+      gdv_fn_set_error_for_invalid_utf8(context, data[i]);
+      *out_len = 0;
+      return "";
+    }
+
+    // Convert the encoded codepoint to its lowercase codepoint
+    int32_t lower_codepoint = utf8proc_tolower(char_codepoint);
+
+    // UTF8Encode advances the pointer by the number of bytes present in the lowercase
+    // character
+    auto* out_char = (uint8_t*)(out + out_idx);
+    uint8_t* out_char_start = out_char;
+
+    // Encode the lowercase character
+    out_char = arrow::util::UTF8Encode(out_char, lower_codepoint);
+
+    out_char_len = static_cast<int32_t>(out_char - out_char_start);
+    out_idx += out_char_len;
+  }
+
+  *out_len = out_idx;
+  return out;
+}
+
+// Any codepoint, except the ones for lowercase letters, uppercase letters,
+// titlecase letters, decimal digits and letter numbers categories will be
+// considered as word separators.
+//
+// The Unicode characters also are divided between categories. This link
+// https://www.compart.com/en/unicode/category shows
+// more information about characters categories.
+GDV_FORCE_INLINE
+bool gdv_fn_is_codepoint_for_space(uint32_t val) {
+  auto category = utf8proc_category(val);
+
+  return category != utf8proc_category_t::UTF8PROC_CATEGORY_LU &&
+         category != utf8proc_category_t::UTF8PROC_CATEGORY_LL &&
+         category != utf8proc_category_t::UTF8PROC_CATEGORY_LT &&
+         category != utf8proc_category_t::UTF8PROC_CATEGORY_NL &&
+         category != utf8proc_category_t ::UTF8PROC_CATEGORY_ND;
+}
+
+// For a given text, initialize the first letter after a word-separator and lowercase
+// the others e.g:
+//     - "IT is a tEXt str" -> "It Is A Text Str"
+GANDIVA_EXPORT
+const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_len,
+                                int32_t* out_len) {
+  if (data_len == 0) {
+    *out_len = data_len;
+    return "";
+  }
+
+  // If it is a single-byte character (ASCII), corresponding uppercase is always 1-byte
+  // long; if it is >= 2 bytes long, uppercase can be at most 4 bytes long, so length of
+  // the output can be at most twice the length of the input
+  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 2 * data_len));
+  if (out == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t char_len = 0;
+  int32_t out_char_len = 0;
+  int32_t out_idx = 0;
+  uint32_t char_codepoint;
+
+  // Any character is considered as space, except if it is alphanumeric
+  bool last_char_was_space = true;
+
+  for (int32_t i = 0; i < data_len; i += char_len) {
+    // An optimization for single byte characters:
+    if (static_cast<signed char>(data[i]) >= 0) {  // 1-byte char (0x00 ~ 0x7F)
+      char_len = 1;
+      char cur = data[i];
+
+      if (cur >= 0x61 && cur <= 0x7a && last_char_was_space) {
+        // Check if the character is the first one of the word and it is
+        // lowercase -> 'a' - 'z' : 0x61 - 0x7a.
+        // Then turn it into uppercase -> 'A' - 'Z' : 0x41 - 0x5a
+        out[out_idx++] = static_cast<char>(cur - 0x20);
+        last_char_was_space = false;
+      } else if (cur >= 0x41 && cur <= 0x5a && !last_char_was_space) {
+        out[out_idx++] = static_cast<char>(cur + 0x20);
+      } else {
+        // Check if the ASCII character is not an alphanumeric character:
+        // '0' - '9': 0x30 - 0x39
+        // 'a' - 'z' : 0x61 - 0x7a
+        // 'A' - 'Z' : 0x41 - 0x5a
+        last_char_was_space = (cur < 0x30) || (cur > 0x39 && cur < 0x41) ||
+                              (cur > 0x5a && cur < 0x61) || (cur > 0x7a);
+        out[out_idx++] = cur;
+      }
+      continue;
+    }
+
+    char_len = gdv_fn_utf8_char_length(data[i]);
+
+    // Control reaches here when we encounter a multibyte character
+    const auto* in_char = (const uint8_t*)(data + i);
+
+    // Decode the multibyte character
+    bool is_valid_utf8_char =
+        arrow::util::UTF8Decode((const uint8_t**)&in_char, &char_codepoint);
+
+    // If it is an invalid utf8 character, UTF8Decode evaluates to false
+    if (!is_valid_utf8_char) {
+      gdv_fn_set_error_for_invalid_utf8(context, data[i]);
+      *out_len = 0;
+      return "";
+    }
+
+    bool is_char_space = gdv_fn_is_codepoint_for_space(char_codepoint);
+
+    int32_t formatted_codepoint;
+    if (last_char_was_space && !is_char_space) {
+      formatted_codepoint = utf8proc_toupper(char_codepoint);
+    } else {
+      formatted_codepoint = utf8proc_tolower(char_codepoint);
+    }
+
+    // UTF8Encode advances the pointer by the number of bytes present in the character
+    auto* out_char = (uint8_t*)(out + out_idx);
+    uint8_t* out_char_start = out_char;
+
+    // Encode the character
+    out_char = arrow::util::UTF8Encode(out_char, formatted_codepoint);
+
+    out_char_len = static_cast<int32_t>(out_char - out_char_start);
+    out_idx += out_char_len;
+
+    last_char_was_space = is_char_space;
+  }
+
+  *out_len = out_idx;
+  return out;
+}
 }
 
 namespace gandiva {
@@ -311,6 +800,54 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   std::vector<llvm::Type*> args;
   auto types = engine->types();
 
+  // gdv_fn_castVARBINARY_int32
+  args = {
+      types->i64_type(),     // context
+      types->i32_type(),     // int32_t value
+      types->i64_type(),     // int64_t out value length
+      types->i32_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_int32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_int32_int64));
+
+  // gdv_fn_castVARBINARY_int64
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // int64_t value
+      types->i64_type(),     // int64_t out value length
+      types->i32_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_int64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_int64_int64));
+
+  // gdv_fn_castVARBINARY_float32
+  args = {
+      types->i64_type(),     // context
+      types->float_type(),   // float value
+      types->i64_type(),     // int64_t out value length
+      types->i64_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_float32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_float32_int64));
+
+  // gdv_fn_castVARBINARY_float64
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // double value
+      types->i64_type(),     // int64_t out value length
+      types->i32_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_float64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_float64_int64));
+
   // gdv_fn_dec_from_string
   args = {
       types->i64_type(),      // context
@@ -350,6 +887,45 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
                                   types->i1_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_like_utf8_utf8));
 
+  // gdv_fn_like_utf8_utf8_utf8
+  args = {types->i64_type(),     // int64_t ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type(),     // int data_len
+          types->i8_ptr_type(),  // const char* pattern
+          types->i32_type(),     // int pattern_len
+          types->i8_ptr_type(),  // const char* escape_char
+          types->i32_type()};    // int escape_char_len
+
+  engine->AddGlobalMappingForFunc("gdv_fn_like_utf8_utf8_utf8",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_like_utf8_utf8_utf8));
+
+  // gdv_fn_ilike_utf8_utf8
+  args = {types->i64_type(),     // int64_t ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type(),     // int data_len
+          types->i8_ptr_type(),  // const char* pattern
+          types->i32_type()};    // int pattern_len
+
+  engine->AddGlobalMappingForFunc("gdv_fn_ilike_utf8_utf8",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_ilike_utf8_utf8));
+
+  // gdv_fn_regexp_replace_utf8_utf8
+  args = {types->i64_type(),       // int64_t ptr
+          types->i64_type(),       // int64_t holder_ptr
+          types->i8_ptr_type(),    // const char* data
+          types->i32_type(),       // int data_len
+          types->i8_ptr_type(),    // const char* pattern
+          types->i32_type(),       // int pattern_len
+          types->i8_ptr_type(),    // const char* replace_string
+          types->i32_type(),       // int32_t replace_string_len
+          types->i32_ptr_type()};  // int32_t* out_length
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_regexp_replace_utf8_utf8", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_regexp_replace_utf8_utf8));
+
   // gdv_fn_to_date_utf8_utf8
   args = {types->i64_type(),                   // int64_t execution_context
           types->i64_type(),                   // int64_t holder_ptr
@@ -421,7 +997,22 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_in_expr_lookup_utf8",
                                   types->i1_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_in_expr_lookup_utf8));
+  // gdv_fn_in_expr_lookup_float
+  args = {types->i64_type(),    // int64_t in holder ptr
+          types->float_type(),  // float value
+          types->i1_type()};    // bool in_validity
+
+  engine->AddGlobalMappingForFunc("gdv_fn_in_expr_lookup_float",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_in_expr_lookup_float));
+  // gdv_fn_in_expr_lookup_double
+  args = {types->i64_type(),     // int64_t in holder ptr
+          types->double_type(),  // double value
+          types->i1_type()};     // bool in_validity
 
+  engine->AddGlobalMappingForFunc("gdv_fn_in_expr_lookup_double",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_in_expr_lookup_double));
   // gdv_fn_populate_varlen_vector
   args = {types->i64_type(),      // int64_t execution_context
           types->i8_ptr_type(),   // int8_t* data ptr
@@ -471,6 +1062,72 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT8_utf8", types->double_type(), args,
                                   reinterpret_cast<void*>(gdv_fn_castFLOAT8_utf8));
 
+  // gdv_fn_castVARCHAR_int32_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->i32_type(),       // int32_t value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_int32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_int32_int64));
+
+  // gdv_fn_castVARCHAR_int64_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->i64_type(),       // int64_t value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_int64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_int64_int64));
+
+  // gdv_fn_castVARCHAR_float32_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->float_type(),     // float value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_float32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_float32_int64));
+
+  // gdv_fn_castVARCHAR_float64_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->double_type(),    // double value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_float64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_float64_int64));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castINT_varbinary", types->i32_type(), args,
+                                  reinterpret_cast<void*>(gdv_fn_castINT_varbinary));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castBIGINT_varbinary", types->i64_type(), args,
+                                  reinterpret_cast<void*>(gdv_fn_castBIGINT_varbinary));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT4_varbinary", types->float_type(),
+                                  args,
+                                  reinterpret_cast<void*>(gdv_fn_castFLOAT4_varbinary));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT8_varbinary", types->double_type(),
+                                  args,
+                                  reinterpret_cast<void*>(gdv_fn_castFLOAT8_varbinary));
+
   // gdv_fn_sha1_int8
   args = {
       types->i64_type(),     // context
@@ -881,5 +1538,64 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_sha256_decimal128",
                                   types->i8_ptr_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_sha256_decimal128));
+
+  // gdv_fn_base64_encode_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // in
+      types->i32_type(),      // in_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_base64_encode_binary",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_base64_encode_binary));
+
+  // gdv_fn_base64_decode_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // in
+      types->i32_type(),      // in_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_base64_decode_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_base64_decode_utf8));
+
+  // gdv_fn_upper_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // data
+      types->i32_type(),      // data_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_upper_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_upper_utf8));
+  // gdv_fn_lower_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // data
+      types->i32_type(),      // data_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_lower_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_lower_utf8));
+
+  // gdv_fn_initcap_utf8
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // const char*
+      types->i32_type(),     // value_length
+      types->i32_ptr_type()  // out_length
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_initcap_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_initcap_utf8));
 }
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 255e9af367b..670ac94df1b 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -42,10 +42,29 @@ using gdv_timestamp = int64_t;
 using gdv_utf8 = char*;
 using gdv_binary = char*;
 using gdv_day_time_interval = int64_t;
+using gdv_month_interval = int32_t;
+
+#ifdef GANDIVA_UNIT_TEST
+// unit tests may be compiled without O2, so inlining may not happen.
+#define GDV_FORCE_INLINE
+#else
+#ifdef _MSC_VER
+#define GDV_FORCE_INLINE __forceinline
+#else
+#define GDV_FORCE_INLINE inline __attribute__((always_inline))
+#endif
+#endif
 
 bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
                            const char* pattern, int pattern_len);
 
+bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                                const char* pattern, int pattern_len,
+                                const char* escape_char, int escape_char_len);
+
+bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                            const char* pattern, int pattern_len);
+
 int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
                                        int data_len, bool in1_validity,
                                        const char* pattern, int pattern_len,
@@ -67,6 +86,22 @@ bool in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len, bool in_va
 int gdv_fn_time_with_zone(int* time_fields, const char* zone, int zone_len,
                           int64_t* ret_time);
 
+GANDIVA_EXPORT
+const char* gdv_fn_base64_encode_binary(int64_t context, const char* in, int32_t in_len,
+                                        int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t in_len,
+                                      int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_castVARBINARY_int32_int64(int64_t context, gdv_int32 value,
+                                             int64_t out_len, int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_castVARBINARY_int64_int64(int64_t context, gdv_int64 value,
+                                             int64_t out_len, int32_t* out_length);
+
 GANDIVA_EXPORT
 const char* gdv_fn_sha256_decimal128(int64_t context, int64_t x_high, uint64_t x_low,
                                      int32_t x_precision, int32_t x_scale,
@@ -95,4 +130,44 @@ float gdv_fn_castFLOAT4_utf8(int64_t context, const char* data, int32_t data_len
 
 GANDIVA_EXPORT
 double gdv_fn_castFLOAT8_utf8(int64_t context, const char* data, int32_t data_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_int32_int64(int64_t context, int32_t value, int64_t len,
+                                           int32_t* out_len);
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_int64_int64(int64_t context, int64_t value, int64_t len,
+                                           int32_t* out_len);
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_float32_int64(int64_t context, float value, int64_t len,
+                                             int32_t* out_len);
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_float64_int64(int64_t context, double value, int64_t len,
+                                             int32_t* out_len);
+
+GANDIVA_EXPORT
+int32_t gdv_fn_utf8_char_length(char c);
+
+GANDIVA_EXPORT
+const char* gdv_fn_upper_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_lower_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_len,
+                                int32_t* out_len);
+
+GANDIVA_EXPORT
+int32_t gdv_fn_castINT_varbinary(gdv_int64 context, const char* in, int32_t in_len);
+
+GANDIVA_EXPORT
+int64_t gdv_fn_castBIGINT_varbinary(gdv_int64 context, const char* in, int32_t in_len);
+
+GANDIVA_EXPORT
+float gdv_fn_castFLOAT4_varbinary(gdv_int64 context, const char* in, int32_t in_len);
+
+GANDIVA_EXPORT
+double gdv_fn_castFLOAT8_varbinary(gdv_int64 context, const char* in, int32_t in_len);
 }
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index 90ac1dfa540..f7c21981cbc 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -24,6 +24,121 @@
 
 namespace gandiva {
 
+TEST(TestGdvFnStubs, TestCastVarbinaryNumeric) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  // tests for integer values as input
+  const char* out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, -46, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-46");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 2147483647, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "2147483647");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, -2147483647 - 1, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-2147483648");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 34567, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "345");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 347, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 347, -1, &out_len);
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+
+  // tests for big integer values as input
+  out_str =
+      gdv_fn_castVARBINARY_int64_int64(ctx_ptr, 9223372036854775807LL, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9223372036854775807");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int64_int64(ctx_ptr, -9223372036854775807LL - 1, 100,
+                                             &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-9223372036854775808");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int64_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARBINARY_int64_int64(ctx_ptr, 12345, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123");
+  EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestGdvFnStubs, TestBase64Encode) {
+  gandiva::ExecutionContext ctx;
+
+  auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  auto value = gdv_fn_base64_encode_binary(ctx_ptr, "hello", 5, &out_len);
+  std::string out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "aGVsbG8=");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "test", 4, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "dGVzdA==");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "hive", 4, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "aGl2ZQ==");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "", 0, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "test", -5, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestBase64Decode) {
+  gandiva::ExecutionContext ctx;
+
+  auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  auto value = gdv_fn_base64_decode_utf8(ctx_ptr, "aGVsbG8=", 8, &out_len);
+  std::string out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "hello");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "dGVzdA==", 8, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "test");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "aGl2ZQ==", 8, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "hive");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "", 0, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "test", -5, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+}
+
 TEST(TestGdvFnStubs, TestCastINT) {
   gandiva::ExecutionContext ctx;
 
@@ -160,4 +275,495 @@ TEST(TestGdvFnStubs, TestCastFloat8) {
   ctx.Reset();
 }
 
+TEST(TestGdvFnStubs, TestCastVARCHARFromInt32) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, -46, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-46");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 2147483647, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "2147483647");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, -2147483647 - 1, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-2147483648");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 34567, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "345");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 347, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 347, -1, &out_len);
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVARCHARFromInt64) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str =
+      gdv_fn_castVARCHAR_int64_int64(ctx_ptr, 9223372036854775807LL, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9223372036854775807");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str =
+      gdv_fn_castVARCHAR_int64_int64(ctx_ptr, -9223372036854775807LL - 1, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-9223372036854775808");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int64_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_int64_int64(ctx_ptr, 12345, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123");
+  EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestGdvFnStubs, TestCastVARCHARFromFloat) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 4.567f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "4.567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, -3.4567f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-3.4567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.00001f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.0E-5");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.00099999f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9.9999E-4");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.0f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 10.00000f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "10.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 1.2345f, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.2");
+  EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestGdvFnStubs, TestCastVARCHARFromDouble) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 4.567, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "4.567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, -3.4567, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-3.4567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 0.00001, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.0E-5");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.00099999f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9.9999E-4");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 0.0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 10.0000000000, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "10.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 1.2345, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.2");
+  EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestGdvFnStubs, TestUpper) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = gdv_fn_upper_utf8(ctx_ptr, "AbcDEfGh", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ABCDEFGH");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "asdfj", 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ASDFJ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "s;dcGS,jO!l", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "S;DCGS,JO!L");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "münchen", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "MÜNCHEN");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "CITROËN", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "CITROËN");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "âBćDëFGH", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ÂBĆDËFGH");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "øhpqRšvñ", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ØHPQRŠVÑ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "Möbelträgerfüße", 19, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "MÖBELTRÄGERFÜẞE");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "{õhp,PQŚv}ń+", 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "{ÕHP,PQŚV}Ń+");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d("AbOJjÜoß\xc3");
+  out_str = gdv_fn_upper_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  std::string e(
+      "åbÑg\xe0\xa0"
+      "åBUå");
+  out_str = gdv_fn_upper_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\e0 encountered while decoding utf8 string"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestLower) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = gdv_fn_lower_utf8(ctx_ptr, "AbcDEfGh", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "abcdefgh");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "asdfj", 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdfj");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "S;DCgs,Jo!L", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "s;dcgs,jo!l");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "MÜNCHEN", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "münchen");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "citroën", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "citroën");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "ÂbĆDËFgh", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "âbćdëfgh");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "ØHPQrŠvÑ", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "øhpqršvñ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "MÖBELTRÄGERFÜẞE", 20, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "möbelträgerfüße");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "{ÕHP,pqśv}Ń+", 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "{õhp,pqśv}ń+");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d("AbOJjÜoß\xc3");
+  out_str = gdv_fn_lower_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  std::string e(
+      "åbÑg\xe0\xa0"
+      "åBUå");
+  out_str = gdv_fn_lower_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\e0 encountered while decoding utf8 string"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestInitCap) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = gdv_fn_initcap_utf8(ctx_ptr, "test string", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "asdfj\nhlqf", 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Asdfj\nHlqf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "s;DCgs,Jo!l", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "S;Dcgs,Jo!L");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, " mÜNCHEN", 9, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), " München");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "citroën CaR", 12, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Citroën Car");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "ÂbĆDËFgh\néll", 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Âbćdëfgh\nÉll");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "  øhpqršvñ  \n\n", 17, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "  Øhpqršvñ  \n\n");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str =
+      gdv_fn_initcap_utf8(ctx_ptr, "möbelträgerfüße   \nmöbelträgerfüße", 42, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Möbelträgerfüße   \nMöbelträgerfüße");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "{ÕHP,pqśv}Ń+", 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "{Õhp,Pqśv}Ń+");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "sɦasasdsɦsd\"sdsdɦ", 19, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Sɦasasdsɦsd\"Sdsdɦ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "mysuperscipt@number²isfine", 27, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Mysuperscipt@Number²Isfine");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "Ő<tŵas̓老ƕɱ¢vIYwށ", 25, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Ő<Tŵas̓老Ƕɱ¢Viywށ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "ↆcheckↆnumberisspace", 24, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ↆcheckↆnumberisspace");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "testing ᾌTitleᾌcase", 23, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Testing ᾌtitleᾄcase");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "ʳTesting mʳodified", 20, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ʳTesting MʳOdified");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d("AbOJjÜoß\xc3");
+  out_str =
+      gdv_fn_initcap_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  std::string e(
+      "åbÑg\xe0\xa0"
+      "åBUå");
+  out_str =
+      gdv_fn_initcap_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\e0 encountered while decoding utf8 string"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryINT) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "-45", 3), -45);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "0", 1), 0);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "2147483647", 10), 2147483647);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "\x32\x33", 2), 23);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "02147483647", 11), 2147483647);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "-2147483648", 11), -2147483648LL);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "-02147483648", 12), -2147483648LL);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, " 12 ", 4), 12);
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "2147483648", 10);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string 2147483648 to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "-2147483649", 11);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string -2147483649 to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "12.34", 5);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string 12.34 to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "abc", 3);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string abc to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "-", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string - to int32"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryBIGINT) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "-45", 3), -45);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "0", 1), 0);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "9223372036854775807", 19),
+            9223372036854775807LL);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "09223372036854775807", 20),
+            9223372036854775807LL);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "-9223372036854775808", 20),
+            -9223372036854775807LL - 1);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "-009223372036854775808", 22),
+            -9223372036854775807LL - 1);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, " 12 ", 4), 12);
+
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr,
+                                        "\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39", 10),
+            9999999999LL);
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "9223372036854775808", 19);
+  EXPECT_THAT(
+      ctx.get_error(),
+      ::testing::HasSubstr("Failed to cast the string 9223372036854775808 to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "-9223372036854775809", 20);
+  EXPECT_THAT(
+      ctx.get_error(),
+      ::testing::HasSubstr("Failed to cast the string -9223372036854775809 to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "12.34", 5);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string 12.34 to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "abc", 3);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string abc to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "-", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string - to int64"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryFloat4) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-45.34", 6), -45.34f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "0", 1), 0.0f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "5", 1), 5.0f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, " 3.4 ", 5), 3.4f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, " \x33\x2E\x34 ", 5), 3.4f);
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to float"));
+  ctx.Reset();
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "e", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string e to float"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryFloat8) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, "-45.34", 6), -45.34);
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, "0", 1), 0.0);
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, "5", 1), 5.0);
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, " \x33\x2E\x34 ", 5), 3.4);
+
+  gdv_fn_castFLOAT8_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to double"));
+  ctx.Reset();
+
+  gdv_fn_castFLOAT8_varbinary(ctx_ptr, "e", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string e to double"));
+  ctx.Reset();
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/greedy_dual_size_cache.h b/cpp/src/gandiva/greedy_dual_size_cache.h
new file mode 100644
index 00000000000..cb5c38e075c
--- /dev/null
+++ b/cpp/src/gandiva/greedy_dual_size_cache.h
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <list>
+#include <queue>
+#include <set>
+#include <unordered_map>
+#include <utility>
+
+#include "arrow/util/optional.h"
+
+// modified cache to support evict policy using the GreedyDual-Size algorithm.
+namespace gandiva {
+// Defines a base value object supported on the cache that may contain properties
+template <typename ValueType>
+class ValueCacheObject {
+ public:
+  ValueCacheObject(ValueType module, uint64_t cost) : module(module), cost(cost) {}
+  ValueType module;
+  uint64_t cost;
+  bool operator<(const ValueCacheObject& other) const { return cost < other.cost; }
+};
+
+// A particular cache based on the GreedyDual-Size cache which is a generalization of LRU
+// which defines costs for each cache values.
+// The algorithm associates a cost, C, with each cache value. Initially, when the value
+// is brought into cache, C is set to be the cost related to the value (the cost is
+// always non-negative). When a replacement needs to be made, the value with the lowest C
+// cost is replaced, and then all values reduce their C costs by the minimum value of C
+// over all the values already in the cache.
+// If a value is accessed, its C value is restored to its initial cost. Thus, the C costs
+// of recently accessed values retain a larger portion of the original cost than those of
+// values that have not been accessed for a long time. The C costs are reduced as time
+// goes and are restored when accessed.
+
+template <class Key, class Value>
+class GreedyDualSizeCache {
+  // inner class to define the priority item
+  class PriorityItem {
+   public:
+    PriorityItem(uint64_t actual_priority, uint64_t original_priority, Key key)
+        : actual_priority(actual_priority),
+          original_priority(original_priority),
+          cache_key(key) {}
+    // this ensure that the items with low priority stays in the beginning of the queue,
+    // so it can be the one removed by evict operation
+    bool operator<(const PriorityItem& other) const {
+      return actual_priority < other.actual_priority;
+    }
+    uint64_t actual_priority;
+    uint64_t original_priority;
+    Key cache_key;
+  };
+
+ public:
+  struct hasher {
+    template <typename I>
+    std::size_t operator()(const I& i) const {
+      return i.Hash();
+    }
+  };
+  // a map from 'key' to a pair of Value and a pointer to the priority value
+  using map_type = std::unordered_map<
+      Key, std::pair<ValueCacheObject<Value>, typename std::set<PriorityItem>::iterator>,
+      hasher>;
+
+  explicit GreedyDualSizeCache(size_t capacity) : inflation_(0), capacity_(capacity) {}
+
+  ~GreedyDualSizeCache() = default;
+
+  size_t size() const { return map_.size(); }
+
+  size_t capacity() const { return capacity_; }
+
+  bool empty() const { return map_.empty(); }
+
+  bool contains(const Key& key) { return map_.find(key) != map_.end(); }
+
+  void insert(const Key& key, const ValueCacheObject<Value>& value) {
+    typename map_type::iterator i = map_.find(key);
+    // check if element is not in the cache to add it
+    if (i == map_.end()) {
+      // insert item into the cache, but first check if it is full, to evict an item
+      // if it is necessary
+      if (size() >= capacity_) {
+        evict();
+      }
+
+      // insert the new item
+      auto item =
+          priority_set_.insert(PriorityItem(value.cost + inflation_, value.cost, key));
+      // save on map the value and the priority item iterator position
+      map_.emplace(key, std::make_pair(value, item.first));
+    }
+  }
+
+  arrow::util::optional<ValueCacheObject<Value>> get(const Key& key) {
+    // lookup value in the cache
+    typename map_type::iterator value_for_key = map_.find(key);
+    if (value_for_key == map_.end()) {
+      // value not in cache
+      return arrow::util::nullopt;
+    }
+    PriorityItem item = *value_for_key->second.second;
+    // if the value was found on the cache, update its cost (original + inflation)
+    if (item.actual_priority != item.original_priority + inflation_) {
+      priority_set_.erase(value_for_key->second.second);
+      auto iter = priority_set_.insert(PriorityItem(
+          item.original_priority + inflation_, item.original_priority, item.cache_key));
+      value_for_key->second.second = iter.first;
+    }
+    return value_for_key->second.first;
+  }
+
+  void clear() {
+    map_.clear();
+    priority_set_.clear();
+  }
+
+ private:
+  void evict() {
+    // TODO: inflation overflow is unlikely to happen but needs to be handled
+    //  for correctness.
+    // evict item from the beginning of the set. This set is ordered from the
+    // lower priority value to the higher priority value.
+    typename std::set<PriorityItem>::iterator i = priority_set_.begin();
+    // update the inflation cost related to the evicted item
+    inflation_ = (*i).actual_priority;
+    map_.erase((*i).cache_key);
+    priority_set_.erase(i);
+  }
+
+  map_type map_;
+  std::set<PriorityItem> priority_set_;
+  uint64_t inflation_;
+  size_t capacity_;
+};
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/greedy_dual_size_cache_test.cc b/cpp/src/gandiva/greedy_dual_size_cache_test.cc
new file mode 100644
index 00000000000..3c72eef7092
--- /dev/null
+++ b/cpp/src/gandiva/greedy_dual_size_cache_test.cc
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/greedy_dual_size_cache.h"
+
+#include <string>
+#include <typeinfo>
+
+#include <gtest/gtest.h>
+
+namespace gandiva {
+
+class GreedyDualSizeCacheKey {
+ public:
+  explicit GreedyDualSizeCacheKey(int tmp) : tmp_(tmp) {}
+  std::size_t Hash() const { return tmp_; }
+  bool operator==(const GreedyDualSizeCacheKey& other) const {
+    return tmp_ == other.tmp_;
+  }
+
+ private:
+  int tmp_;
+};
+
+class TestGreedyDualSizeCache : public ::testing::Test {
+ public:
+  TestGreedyDualSizeCache() : cache_(2) {}
+
+ protected:
+  GreedyDualSizeCache<GreedyDualSizeCacheKey, std::string> cache_;
+};
+
+TEST_F(TestGreedyDualSizeCache, TestEvict) {
+  // check if the cache is evicting the items with low priority on cache
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("1", 1));
+  cache_.insert(GreedyDualSizeCacheKey(2), ValueCacheObject<std::string>("2", 10));
+  cache_.insert(GreedyDualSizeCacheKey(3), ValueCacheObject<std::string>("3", 20));
+  cache_.insert(GreedyDualSizeCacheKey(4), ValueCacheObject<std::string>("4", 15));
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("5", 1));
+  ASSERT_EQ(2, cache_.size());
+  // we check initially the values that won't be on the cache, since the get operation
+  // may affect the entity costs, which is not the purpose of this test
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(2)), arrow::util::nullopt);
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(3)), arrow::util::nullopt);
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(1))->module, "5");
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(4))->module, "4");
+}
+
+TEST_F(TestGreedyDualSizeCache, TestGreedyDualSizeBehavior) {
+  // insert 1 and 3 evicting 2 (this eviction will increase the inflation cost by 20)
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("1", 40));
+  cache_.insert(GreedyDualSizeCacheKey(2), ValueCacheObject<std::string>("2", 20));
+  cache_.insert(GreedyDualSizeCacheKey(3), ValueCacheObject<std::string>("3", 30));
+
+  // when accessing key 3, its actual cost will be increased by the inflation, so in the
+  // next eviction, the key 1 will be evicted, since the key 1 actual cost (original(40))
+  // is smaller than key 3 actual increased cost (original(30) + inflation(20))
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(3))->module, "3");
+
+  // try to insert key 2 and expect the eviction of key 1
+  cache_.insert(GreedyDualSizeCacheKey(2), ValueCacheObject<std::string>("2", 20));
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(1)), arrow::util::nullopt);
+
+  // when accessing key 2, its original cost should be increased by inflation, so when
+  // inserting the key 1 again, now the key 3 should be evicted
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(2))->module, "2");
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("1", 20));
+
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(1))->module, "1");
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(2))->module, "2");
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(3)), arrow::util::nullopt);
+  ASSERT_EQ(2, cache_.size());
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt
index b456d5f3def..046934141f6 100644
--- a/cpp/src/gandiva/jni/CMakeLists.txt
+++ b/cpp/src/gandiva/jni/CMakeLists.txt
@@ -32,14 +32,12 @@ set(PROTO_OUTPUT_FILES ${PROTO_OUTPUT_FILES} "${PROTO_OUTPUT_DIR}/Types.pb.h")
 
 set_source_files_properties(${PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE)
 
-get_filename_component(ABS_GANDIVA_PROTO ${CMAKE_SOURCE_DIR}/src/gandiva/proto/Types.proto
-                       ABSOLUTE)
+get_filename_component(ABS_GANDIVA_PROTO
+                       ${CMAKE_SOURCE_DIR}/src/gandiva/proto/Types.proto ABSOLUTE)
 
 add_custom_command(OUTPUT ${PROTO_OUTPUT_FILES}
-                   COMMAND ${ARROW_PROTOBUF_PROTOC}
-                           --proto_path
-                           ${CMAKE_SOURCE_DIR}/src/gandiva/proto
-                           --cpp_out
+                   COMMAND ${ARROW_PROTOBUF_PROTOC} --proto_path
+                           ${CMAKE_SOURCE_DIR}/src/gandiva/proto --cpp_out
                            ${PROTO_OUTPUT_DIR}
                            ${CMAKE_SOURCE_DIR}/src/gandiva/proto/Types.proto
                    DEPENDS ${ABS_GANDIVA_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF}
@@ -100,10 +98,10 @@ add_dependencies(gandiva ${GANDIVA_JNI_LIBRARIES})
 if(ARROW_BUILD_SHARED)
   # filter out everything that is not needed for the jni bridge
   # statically linked stdc++ has conflicts with stdc++ loaded by other libraries.
-  if(NOT APPLE)
-    set_target_properties(
-      gandiva_jni_shared
-      PROPERTIES LINK_FLAGS
-                 "-Wl,--version-script=${CMAKE_SOURCE_DIR}/src/gandiva/jni/symbols.map")
+  if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
+    set_target_properties(gandiva_jni_shared
+                          PROPERTIES LINK_FLAGS
+                                     "-Wl,--version-script=${CMAKE_SOURCE_DIR}/src/gandiva/jni/symbols.map"
+    )
   endif()
 endif()
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index 871bd248e65..5a4cbb03188 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -380,6 +380,22 @@ NodePtr ProtoTypeToInNode(const types::InNode& node) {
     return TreeExprBuilder::MakeInExpressionDecimal(field, decimal_values);
   }
 
+  if (node.has_floatvalues()) {
+    std::unordered_set<float> float_values;
+    for (int i = 0; i < node.floatvalues().floatvalues_size(); i++) {
+      float_values.insert(node.floatvalues().floatvalues(i).value());
+    }
+    return TreeExprBuilder::MakeInExpressionFloat(field, float_values);
+  }
+
+  if (node.has_doublevalues()) {
+    std::unordered_set<double> double_values;
+    for (int i = 0; i < node.doublevalues().doublevalues_size(); i++) {
+      double_values.insert(node.doublevalues().doublevalues(i).value());
+    }
+    return TreeExprBuilder::MakeInExpressionDouble(field, double_values);
+  }
+
   if (node.has_stringvalues()) {
     std::unordered_set<std::string> stringvalues;
     for (int i = 0; i < node.stringvalues().stringvalues_size(); i++) {
@@ -730,7 +746,7 @@ Status JavaResizableBuffer::Resize(const int64_t new_size, bool shrink_to_fit) {
   jlong ret_capacity = env_->GetLongField(ret, vector_expander_ret_capacity_);
   DCHECK_GE(ret_capacity, new_size);
 
-  data_ = mutable_data_ = reinterpret_cast<uint8_t*>(ret_address);
+  data_ = reinterpret_cast<uint8_t*>(ret_address);
   size_ = new_size;
   capacity_ = ret_capacity;
   return Status::OK();
diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc
index 688a4ffa130..af9ac67d66a 100644
--- a/cpp/src/gandiva/like_holder.cc
+++ b/cpp/src/gandiva/like_holder.cc
@@ -67,8 +67,8 @@ static bool IsArrowStringLiteral(arrow::Type::type type) {
 }
 
 Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
-  ARROW_RETURN_IF(node.children().size() != 2,
-                  Status::Invalid("'like' function requires two parameters"));
+  ARROW_RETURN_IF(node.children().size() != 2 && node.children().size() != 3,
+                  Status::Invalid("'like' function requires two or three parameters"));
 
   auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
   ARROW_RETURN_IF(
@@ -81,7 +81,28 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
       Status::Invalid(
           "'like' function requires a string literal as the second parameter"));
 
-  return Make(arrow::util::get<std::string>(literal->holder()), holder);
+  RE2::Options regex_op;
+  if (node.descriptor()->name() == "ilike") {
+    regex_op.set_case_sensitive(false);  // set case-insensitive for ilike function.
+
+    return Make(arrow::util::get<std::string>(literal->holder()), holder, regex_op);
+  }
+  if (node.children().size() == 2) {
+    return Make(arrow::util::get<std::string>(literal->holder()), holder);
+  } else {
+    auto escape_char = dynamic_cast<LiteralNode*>(node.children().at(2).get());
+    ARROW_RETURN_IF(
+        escape_char == nullptr,
+        Status::Invalid("'like' function requires a literal as the third parameter"));
+
+    auto escape_char_type = escape_char->return_type()->id();
+    ARROW_RETURN_IF(
+        !IsArrowStringLiteral(escape_char_type),
+        Status::Invalid(
+            "'like' function requires a string literal as the third parameter"));
+    return Make(arrow::util::get<std::string>(literal->holder()),
+                arrow::util::get<std::string>(escape_char->holder()), holder);
+  }
 }
 
 Status LikeHolder::Make(const std::string& sql_pattern,
@@ -97,4 +118,39 @@ Status LikeHolder::Make(const std::string& sql_pattern,
   return Status::OK();
 }
 
+Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escape_char,
+                        std::shared_ptr<LikeHolder>* holder) {
+  ARROW_RETURN_IF(escape_char.length() > 1,
+                  Status::Invalid("The length of escape char ", escape_char,
+                                  " in 'like' function is greater than 1"));
+  std::string pcre_pattern;
+  if (escape_char.length() == 1) {
+    ARROW_RETURN_NOT_OK(
+        RegexUtil::SqlLikePatternToPcre(sql_pattern, escape_char.at(0), pcre_pattern));
+  } else {
+    ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
+  }
+
+  auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
+
+Status LikeHolder::Make(const std::string& sql_pattern,
+                        std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op) {
+  std::string pcre_pattern;
+  ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
+
+  std::shared_ptr<LikeHolder> lholder;
+  lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
+
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h
index 82c9e3b29a6..73e58017de1 100644
--- a/cpp/src/gandiva/like_holder.h
+++ b/cpp/src/gandiva/like_holder.h
@@ -39,6 +39,12 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
 
   static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder);
 
+  static Status Make(const std::string& sql_pattern, const std::string& escape_char,
+                     std::shared_ptr<LikeHolder>* holder);
+
+  static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder,
+                     RE2::Options regex_op);
+
   // Try and optimise a function node with a "like" pattern.
   static const FunctionNode TryOptimize(const FunctionNode& node);
 
@@ -48,6 +54,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
  private:
   explicit LikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {}
 
+  LikeHolder(const std::string& pattern, RE2::Options regex_op)
+      : pattern_(pattern), regex_(pattern, regex_op) {}
+
   std::string pattern_;  // posix pattern string, to help debugging
   RE2 regex_;            // compiled regex for the pattern
 
diff --git a/cpp/src/gandiva/like_holder_test.cc b/cpp/src/gandiva/like_holder_test.cc
index ce6697e72d6..a52533a1138 100644
--- a/cpp/src/gandiva/like_holder_test.cc
+++ b/cpp/src/gandiva/like_holder_test.cc
@@ -27,18 +27,29 @@ namespace gandiva {
 
 class TestLikeHolder : public ::testing::Test {
  public:
+  RE2::Options regex_op;
   FunctionNode BuildLike(std::string pattern) {
     auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
     auto pattern_node =
         std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
     return FunctionNode("like", {field, pattern_node}, arrow::boolean());
   }
+
+  FunctionNode BuildLike(std::string pattern, char escape_char) {
+    auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+    auto pattern_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
+    auto escape_char_node = std::make_shared<LiteralNode>(
+        arrow::int8(), LiteralHolder((int8_t)escape_char), false);
+    return FunctionNode("like", {field, pattern_node, escape_char_node},
+                        arrow::boolean());
+  }
 };
 
 TEST_F(TestLikeHolder, TestMatchAny) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make("ab%", &like_holder);
+  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -53,7 +64,7 @@ TEST_F(TestLikeHolder, TestMatchAny) {
 TEST_F(TestLikeHolder, TestMatchOne) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make("ab_", &like_holder);
+  auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -68,7 +79,7 @@ TEST_F(TestLikeHolder, TestMatchOne) {
 TEST_F(TestLikeHolder, TestPcreSpecial) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make(".*ab_", &like_holder);
+  auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -87,7 +98,7 @@ TEST_F(TestLikeHolder, TestRegexEscape) {
 TEST_F(TestLikeHolder, TestDot) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make("abc.", &like_holder);
+  auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -125,6 +136,146 @@ TEST_F(TestLikeHolder, TestOptimise) {
 
   fnode = LikeHolder::TryOptimize(BuildLike("x_yz%"));
   EXPECT_EQ(fnode.descriptor()->name(), "like");
+
+  // no optimisation for escaped pattern.
+  fnode = LikeHolder::TryOptimize(BuildLike("\\%xyz", '\\'));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+  EXPECT_EQ(fnode.ToString(),
+            "bool like((string) in, (const string) \\%xyz, (const int8) \\)");
+}
+
+TEST_F(TestLikeHolder, TestMatchOneEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab_"));
+
+  EXPECT_FALSE(like("abc"));
+  EXPECT_FALSE(like("abd"));
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("abcd"));
+  EXPECT_FALSE(like("dabc"));
+}
+
+TEST_F(TestLikeHolder, TestMatchManyEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab%"));
+
+  EXPECT_FALSE(like("abc"));
+  EXPECT_FALSE(like("abd"));
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("abcd"));
+  EXPECT_FALSE(like("dabc"));
+}
+
+TEST_F(TestLikeHolder, TestMatchEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab\\"));
+
+  EXPECT_FALSE(like("abc"));
+}
+
+TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab\\c"));
+  EXPECT_TRUE(like("ab\\_"));
+
+  EXPECT_FALSE(like("ab\\_d"));
+  EXPECT_FALSE(like("ab__"));
+}
+
+TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
+  EXPECT_EQ(status.ok(), false) << status.message();
+}
+class TestILikeHolder : public ::testing::Test {
+ public:
+  RE2::Options regex_op;
+  FunctionNode BuildILike(std::string pattern) {
+    auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+    auto pattern_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
+    return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
+  }
+};
+
+TEST_F(TestILikeHolder, TestMatchAny) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("ab"));
+  EXPECT_TRUE(like("aBc"));
+  EXPECT_TRUE(like("ABCD"));
+
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("cab"));
+}
+
+TEST_F(TestILikeHolder, TestMatchOne) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("abc"));
+  EXPECT_TRUE(like("aBd"));
+
+  EXPECT_FALSE(like("A"));
+  EXPECT_FALSE(like("Abcd"));
+  EXPECT_FALSE(like("DaBc"));
+}
+
+TEST_F(TestILikeHolder, TestPcreSpecial) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like(".*Abc"));  // . and * aren't special in sql regex
+  EXPECT_FALSE(like("xxAbc"));
+}
+
+TEST_F(TestILikeHolder, TestDot) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_FALSE(like("abcd"));
 }
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 1a80f1e7586..33345f19d2c 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -170,6 +170,9 @@ llvm::Value* LLVMGenerator::GetDataReference(llvm::Value* arg_addrs, int idx,
   llvm::Value* load = LoadVectorAtIndex(arg_addrs, idx, name);
   llvm::Type* base_type = types()->DataVecType(field->type());
   llvm::Value* ret;
+  if (base_type == nullptr) {
+    return nullptr;
+  }
   if (base_type->isPointerTy()) {
     ret = ir_builder()->CreateIntToPtr(load, base_type, name + "_darray");
   } else {
@@ -363,6 +366,8 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count,
     AddFunctionCall("gdv_fn_populate_varlen_vector", types()->i32_type(),
                     {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var,
                      output_value->data(), output_value->length()});
+  } else if (output_type_id == arrow::Type::NA) {
+    // Do nothing when data type is null
   } else {
     return Status::NotImplemented("output type ", output->Type()->ToString(),
                                   " not supported");
@@ -452,6 +457,10 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr,
   // Extract the destination bitmap address.
   int out_idx = compiled_expr.output()->validity_idx();
   uint8_t* dst_bitmap = eval_batch.GetBuffer(out_idx);
+  if (dst_bitmap == nullptr) {
+    // Return when dst_bitmap is null meaning data type is null
+    return;
+  }
   // Compute the destination bitmap.
   if (selection_vector == nullptr) {
     accumulator.ComputeResult(dst_bitmap);
@@ -491,7 +500,7 @@ llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name,
 
   // build a call to the llvm function.
   llvm::Value* value;
-  if (ret_type->isVoidTy()) {
+  if (ret_type == nullptr || ret_type->isVoidTy()) {
     // void functions can't have a name for the call.
     value = ir_builder()->CreateCall(fn, args);
   } else {
@@ -556,6 +565,9 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueDex& dex) {
       break;
     }
 
+    case arrow::Type::NA:
+      break;
+
     default: {
       auto slot_offset = builder->CreateGEP(slot_ref, slot_index);
       slot_value = builder->CreateLoad(slot_offset, dex.FieldName());
@@ -720,6 +732,13 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) {
   result_.reset(new LValue(value, len));
 }
 
+void LLVMGenerator::Visitor::Visit(const NullLiteralDex& dex) {
+  llvm::Value* value = nullptr;
+  llvm::Value* len = nullptr;
+  ADD_VISITOR_TRACE("visit Literal null");
+  result_.reset(new LValue(value, len));
+}
+
 void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) {
   const std::string& function_name = dex.func_descriptor()->name();
   ADD_VISITOR_TRACE("visit NonNullableFunc base function " + function_name);
@@ -1084,6 +1103,13 @@ void LLVMGenerator::Visitor::Visit(const InExprDexBase<int64_t>& dex) {
   VisitInExpression<int64_t>(dex);
 }
 
+void LLVMGenerator::Visitor::Visit(const InExprDexBase<float>& dex) {
+  VisitInExpression<float>(dex);
+}
+void LLVMGenerator::Visitor::Visit(const InExprDexBase<double>& dex) {
+  VisitInExpression<double>(dex);
+}
+
 void LLVMGenerator::Visitor::Visit(const InExprDexBase<gandiva::DecimalScalar128>& dex) {
   VisitInExpression<gandiva::DecimalScalar128>(dex);
 }
@@ -1126,13 +1152,17 @@ LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition,
   // Emit the merge block.
   builder->SetInsertPoint(merge_bb);
   auto llvm_type = types->IRType(result_type->id());
+  if (llvm_type == nullptr) {
+    return nullptr;
+  }
   llvm::PHINode* result_value = builder->CreatePHI(llvm_type, 2, "res_value");
   result_value->addIncoming(then_lvalue->data(), then_bb);
   result_value->addIncoming(else_lvalue->data(), else_bb);
 
   LValuePtr ret;
   switch (result_type->id()) {
-    case arrow::Type::STRING: {
+    case arrow::Type::STRING:
+    case arrow::Type::BINARY: {
       llvm::PHINode* result_length;
       result_length = builder->CreatePHI(types->i32_type(), 2, "res_length");
       result_length->addIncoming(then_lvalue->length(), then_bb);
@@ -1240,10 +1270,11 @@ std::vector<llvm::Value*> LLVMGenerator::Visitor::BuildParams(
     // build value.
     DexPtr value_expr = pair->value_expr();
     value_expr->Accept(*this);
-    LValue& result_ref = *result();
-
-    // append all the parameters corresponding to this LValue.
-    result_ref.AppendFunctionParams(&params);
+    if (auto result_ptr = result()) {
+      LValue& result_ref = *result_ptr;
+      // append all the parameters corresponding to this LValue.
+      result_ref.AppendFunctionParams(&params);
+    }
 
     // build validity.
     if (with_validity) {
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 8ff9711c0f9..d18a47a2735 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -100,6 +100,7 @@ class GANDIVA_EXPORT LLVMGenerator {
     void Visit(const TrueDex& dex) override;
     void Visit(const FalseDex& dex) override;
     void Visit(const LiteralDex& dex) override;
+    void Visit(const NullLiteralDex& dex) override;
     void Visit(const NonNullableFuncDex& dex) override;
     void Visit(const NullableNeverFuncDex& dex) override;
     void Visit(const NullableInternalFuncDex& dex) override;
@@ -108,6 +109,8 @@ class GANDIVA_EXPORT LLVMGenerator {
     void Visit(const BooleanOrDex& dex) override;
     void Visit(const InExprDexBase<int32_t>& dex) override;
     void Visit(const InExprDexBase<int64_t>& dex) override;
+    void Visit(const InExprDexBase<float>& dex) override;
+    void Visit(const InExprDexBase<double>& dex) override;
     void Visit(const InExprDexBase<gandiva::DecimalScalar128>& dex) override;
     void Visit(const InExprDexBase<std::string>& dex) override;
     template <typename Type>
diff --git a/cpp/src/gandiva/lru_cache.h b/cpp/src/gandiva/lru_cache.h
deleted file mode 100644
index 6602116b0a0..00000000000
--- a/cpp/src/gandiva/lru_cache.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <list>
-#include <unordered_map>
-#include <utility>
-
-#include "arrow/util/optional.h"
-
-// modified from boost LRU cache -> the boost cache supported only an
-// ordered map.
-namespace gandiva {
-// a cache which evicts the least recently used item when it is full
-template <class Key, class Value>
-class LruCache {
- public:
-  using key_type = Key;
-  using value_type = Value;
-  using list_type = std::list<key_type>;
-  struct hasher {
-    template <typename I>
-    std::size_t operator()(const I& i) const {
-      return i.Hash();
-    }
-  };
-  using map_type =
-      std::unordered_map<key_type, std::pair<value_type, typename list_type::iterator>,
-                         hasher>;
-
-  explicit LruCache(size_t capacity) : cache_capacity_(capacity) {}
-
-  ~LruCache() {}
-
-  size_t size() const { return map_.size(); }
-
-  size_t capacity() const { return cache_capacity_; }
-
-  bool empty() const { return map_.empty(); }
-
-  bool contains(const key_type& key) { return map_.find(key) != map_.end(); }
-
-  void insert(const key_type& key, const value_type& value) {
-    typename map_type::iterator i = map_.find(key);
-    if (i == map_.end()) {
-      // insert item into the cache, but first check if it is full
-      if (size() >= cache_capacity_) {
-        // cache is full, evict the least recently used item
-        evict();
-      }
-
-      // insert the new item
-      lru_list_.push_front(key);
-      map_[key] = std::make_pair(value, lru_list_.begin());
-    }
-  }
-
-  arrow::util::optional<value_type> get(const key_type& key) {
-    // lookup value in the cache
-    typename map_type::iterator value_for_key = map_.find(key);
-    if (value_for_key == map_.end()) {
-      // value not in cache
-      return arrow::util::nullopt;
-    }
-
-    // return the value, but first update its place in the most
-    // recently used list
-    typename list_type::iterator position_in_lru_list = value_for_key->second.second;
-    if (position_in_lru_list != lru_list_.begin()) {
-      // move item to the front of the most recently used list
-      lru_list_.erase(position_in_lru_list);
-      lru_list_.push_front(key);
-
-      // update iterator in map
-      position_in_lru_list = lru_list_.begin();
-      const value_type& value = value_for_key->second.first;
-      map_[key] = std::make_pair(value, position_in_lru_list);
-
-      // return the value
-      return value;
-    } else {
-      // the item is already at the front of the most recently
-      // used list so just return it
-      return value_for_key->second.first;
-    }
-  }
-
-  void clear() {
-    map_.clear();
-    lru_list_.clear();
-  }
-
- private:
-  void evict() {
-    // evict item from the end of most recently used list
-    typename list_type::iterator i = --lru_list_.end();
-    map_.erase(*i);
-    lru_list_.erase(i);
-  }
-
- private:
-  map_type map_;
-  list_type lru_list_;
-  size_t cache_capacity_;
-};
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/lru_cache_test.cc b/cpp/src/gandiva/lru_cache_test.cc
deleted file mode 100644
index 06c86d69032..00000000000
--- a/cpp/src/gandiva/lru_cache_test.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "gandiva/lru_cache.h"
-
-#include <map>
-#include <string>
-#include <typeinfo>
-
-#include <gtest/gtest.h>
-
-namespace gandiva {
-
-class TestCacheKey {
- public:
-  explicit TestCacheKey(int tmp) : tmp_(tmp) {}
-  std::size_t Hash() const { return tmp_; }
-  bool operator==(const TestCacheKey& other) const { return tmp_ == other.tmp_; }
-
- private:
-  int tmp_;
-};
-
-class TestLruCache : public ::testing::Test {
- public:
-  TestLruCache() : cache_(2) {}
-
- protected:
-  LruCache<TestCacheKey, std::string> cache_;
-};
-
-TEST_F(TestLruCache, TestEvict) {
-  cache_.insert(TestCacheKey(1), "hello");
-  cache_.insert(TestCacheKey(2), "hello");
-  cache_.insert(TestCacheKey(1), "hello");
-  cache_.insert(TestCacheKey(3), "hello");
-  // should have evicted key 1
-  ASSERT_EQ(2, cache_.size());
-  ASSERT_EQ(cache_.get(TestCacheKey(1)), arrow::util::nullopt);
-}
-
-TEST_F(TestLruCache, TestLruBehavior) {
-  cache_.insert(TestCacheKey(1), "hello");
-  cache_.insert(TestCacheKey(2), "hello");
-  cache_.get(TestCacheKey(1));
-  cache_.insert(TestCacheKey(3), "hello");
-  // should have evicted key 2.
-  ASSERT_EQ(*cache_.get(TestCacheKey(1)), "hello");
-}
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/node.h b/cpp/src/gandiva/node.h
index 20807d4a0cb..6e4c22e93b1 100644
--- a/cpp/src/gandiva/node.h
+++ b/cpp/src/gandiva/node.h
@@ -23,7 +23,6 @@
 #include <vector>
 
 #include "arrow/status.h"
-
 #include "gandiva/arrow.h"
 #include "gandiva/func_descriptor.h"
 #include "gandiva/gandiva_aliases.h"
@@ -94,6 +93,20 @@ class GANDIVA_EXPORT LiteralNode : public Node {
   bool is_null_;
 };
 
+/// \brief Node in the expression tree, representing a NullLiteralNode.
+class GANDIVA_EXPORT NullLiteralNode : public Node {
+ public:
+  NullLiteralNode() : Node(arrow::null()) {}
+
+  Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
+
+  std::string ToString() const override {
+    std::stringstream ss;
+    ss << "(const " << return_type()->ToString() << ") null";
+    return ss.str();
+  }
+};
+
 /// \brief Node in the expression tree, representing an arrow field.
 class GANDIVA_EXPORT FieldNode : public Node {
  public:
diff --git a/cpp/src/gandiva/node_visitor.h b/cpp/src/gandiva/node_visitor.h
index b118e496383..a8f94fe8735 100644
--- a/cpp/src/gandiva/node_visitor.h
+++ b/cpp/src/gandiva/node_visitor.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <string>
 
 #include "arrow/status.h"
@@ -30,6 +31,7 @@ class FieldNode;
 class FunctionNode;
 class IfNode;
 class LiteralNode;
+class NullLiteralNode;
 class BooleanNode;
 template <typename Type>
 class InExpressionNode;
@@ -43,9 +45,12 @@ class GANDIVA_EXPORT NodeVisitor {
   virtual Status Visit(const FunctionNode& node) = 0;
   virtual Status Visit(const IfNode& node) = 0;
   virtual Status Visit(const LiteralNode& node) = 0;
+  virtual Status Visit(const NullLiteralNode& node) = 0;
   virtual Status Visit(const BooleanNode& node) = 0;
   virtual Status Visit(const InExpressionNode<int32_t>& node) = 0;
   virtual Status Visit(const InExpressionNode<int64_t>& node) = 0;
+  virtual Status Visit(const InExpressionNode<float>& node) = 0;
+  virtual Status Visit(const InExpressionNode<double>& node) = 0;
   virtual Status Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) = 0;
   virtual Status Visit(const InExpressionNode<std::string>& node) = 0;
 };
diff --git a/cpp/src/gandiva/null_ops.cc b/cpp/src/gandiva/null_ops.cc
new file mode 100644
index 00000000000..b7179a8e8be
--- /dev/null
+++ b/cpp/src/gandiva/null_ops.cc
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "gandiva/null_ops.h"
+
+#include "gandiva/engine.h"
+#include "gandiva/exported_funcs.h"
+#include "gandiva/gdv_function_stubs.h"
+
+/// Stub functions that can be accessed from LLVM or the pre-compiled library.
+
+extern "C" {
+void compare_null_null(bool in1_valid, bool in2_valid) {}
+
+bool isnull_null(bool in_valid) { return true; }
+
+bool isnotnull_null(bool in_valid) { return false; }
+}
+
+namespace gandiva {
+void ExportedNullFunctions::AddMappings(Engine* engine) const {
+  std::vector<llvm::Type*> args;
+  auto types = engine->types();
+
+  args = {types->i1_type(), types->i1_type()};
+  engine->AddGlobalMappingForFunc("compare_null_null", types->void_type() /*return_type*/,
+                                  args, reinterpret_cast<void*>(compare_null_null));
+
+  args = {types->i1_type()};
+  engine->AddGlobalMappingForFunc("isnull_null", types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(isnull_null));
+
+  args = {types->i1_type()};
+  engine->AddGlobalMappingForFunc("isnotnull_null", types->i1_type() /*return_type*/,
+                                  args, reinterpret_cast<void*>(isnotnull_null));
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/null_ops.h b/cpp/src/gandiva/null_ops.h
new file mode 100644
index 00000000000..492eb6033cd
--- /dev/null
+++ b/cpp/src/gandiva/null_ops.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+/// Stub functions that can be accessed from LLVM.
+extern "C" {
+
+void compare_null_null(bool in1_valid, bool in2_valid);
+
+bool isnull_null(bool in_valid);
+
+bool isnotnull_null(bool in_valid);
+}
\ No newline at end of file
diff --git a/cpp/src/gandiva/null_ops_test.cc b/cpp/src/gandiva/null_ops_test.cc
new file mode 100644
index 00000000000..a979b82a771
--- /dev/null
+++ b/cpp/src/gandiva/null_ops_test.cc
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestNullOps, Test) {
+  compare_null_null(true, true);
+  EXPECT_TRUE(isnull_null(true));
+  EXPECT_FALSE(isnotnull_null(true));
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index 7343bc052c4..1cd505b44a3 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -53,34 +53,38 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS})
   set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SRC_BASE}.bc)
   set(PRECOMPILE_COMMAND)
   if(CMAKE_OSX_SYSROOT)
-    list(APPEND PRECOMPILE_COMMAND
-                ${CMAKE_COMMAND}
-                -E
-                env
-                SDKROOT=${CMAKE_OSX_SYSROOT})
+    list(APPEND
+         PRECOMPILE_COMMAND
+         ${CMAKE_COMMAND}
+         -E
+         env
+         SDKROOT=${CMAKE_OSX_SYSROOT})
   endif()
-  list(
-    APPEND PRECOMPILE_COMMAND
-           ${CLANG_EXECUTABLE}
-           ${PLATFORM_CLANG_OPTIONS}
-           -DGANDIVA_IR
-           -DNDEBUG # DCHECK macros not implemented in precompiled code
-           -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
-           -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
-           -fno-use-cxa-atexit # Workaround for unresolved __dso_handle
-           -emit-llvm
-           -O3
-           -c
-           ${ABSOLUTE_SRC}
-           -o
-           ${BC_FILE}
-           ${ARROW_GANDIVA_PC_CXX_FLAGS}
-           -I${CMAKE_SOURCE_DIR}/src
-           -I${ARROW_BINARY_DIR}/src)
+  list(APPEND
+       PRECOMPILE_COMMAND
+       ${CLANG_EXECUTABLE}
+       ${PLATFORM_CLANG_OPTIONS}
+       -DGANDIVA_IR
+       -DNDEBUG # DCHECK macros not implemented in precompiled code
+       -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
+       -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
+       -fno-use-cxa-atexit # Workaround for unresolved __dso_handle
+       -emit-llvm
+       -O3
+       -c
+       ${ABSOLUTE_SRC}
+       -o
+       ${BC_FILE}
+       ${ARROW_GANDIVA_PC_CXX_FLAGS}
+       -I${CMAKE_SOURCE_DIR}/src
+       -I${ARROW_BINARY_DIR}/src)
+
   if(NOT ARROW_USE_NATIVE_INT128)
     list(APPEND PRECOMPILE_COMMAND -I${Boost_INCLUDE_DIR})
   endif()
-  add_custom_command(OUTPUT ${BC_FILE} COMMAND ${PRECOMPILE_COMMAND} DEPENDS ${SRC_FILE})
+  add_custom_command(OUTPUT ${BC_FILE}
+                     COMMAND ${PRECOMPILE_COMMAND}
+                     DEPENDS ${SRC_FILE})
   list(APPEND BC_FILES ${BC_FILE})
 endforeach()
 
@@ -95,13 +99,12 @@ add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_CC_PATH}
                    COMMAND ${PYTHON_EXECUTABLE}
                            "${CMAKE_CURRENT_SOURCE_DIR}/../make_precompiled_bitcode.py"
                            ${GANDIVA_PRECOMPILED_CC_IN_PATH}
-                           ${GANDIVA_PRECOMPILED_BC_PATH}
-                           ${GANDIVA_PRECOMPILED_CC_PATH}
+                           ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH}
                    DEPENDS ${GANDIVA_PRECOMPILED_CC_IN_PATH}
                            ${GANDIVA_PRECOMPILED_BC_PATH})
 
-add_custom_target(precompiled ALL
-                  DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH})
+add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
+                                          ${GANDIVA_PRECOMPILED_CC_PATH})
 
 # testing
 if(ARROW_BUILD_TESTS)
@@ -129,11 +132,8 @@ if(ARROW_BUILD_TESTS)
                  ../decimal_xlarge.cc)
   target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
   target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS})
-  target_compile_definitions(gandiva-precompiled-test
-                             PRIVATE
-                             GANDIVA_UNIT_TEST=1
-                             ARROW_STATIC
-                             GANDIVA_STATIC)
+  target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
+                                                              ARROW_STATIC GANDIVA_STATIC)
   set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
   add_test(gandiva-precompiled-test ${TEST_PATH})
   set_property(TEST gandiva-precompiled-test
diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/cpp/src/gandiva/precompiled/arithmetic_ops.cc
index a173a60d6d0..c736c38d32c 100644
--- a/cpp/src/gandiva/precompiled/arithmetic_ops.cc
+++ b/cpp/src/gandiva/precompiled/arithmetic_ops.cc
@@ -122,6 +122,21 @@ CAST_UNARY(castFLOAT4, float64, float32)
 
 #undef CAST_UNARY
 
+// cast float types to int types.
+#define CAST_INT_FLOAT(NAME, IN_TYPE, OUT_TYPE)                  \
+  FORCE_INLINE                                                   \
+  gdv_##OUT_TYPE NAME##_##IN_TYPE(gdv_##IN_TYPE in) {            \
+    gdv_##OUT_TYPE out = static_cast<gdv_##OUT_TYPE>(round(in)); \
+    return out;                                                  \
+  }
+
+CAST_INT_FLOAT(castBIGINT, float32, int64)
+CAST_INT_FLOAT(castBIGINT, float64, int64)
+CAST_INT_FLOAT(castINT, float32, int32)
+CAST_INT_FLOAT(castINT, float64, int32)
+
+#undef CAST_INT_FLOAT
+
 // simple nullable functions, result value = fn(input validity)
 #define VALIDITY_OP(NAME, TYPE, OP) \
   FORCE_INLINE                      \
diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
index b3359ac7d6c..36b50bcfdae 100644
--- a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
@@ -137,4 +137,44 @@ TEST(TestArithmeticOps, TestBitwiseOps) {
   EXPECT_EQ(bitwise_not_int64(0x0000000000000000), 0xFFFFFFFFFFFFFFFF);
 }
 
+TEST(TestArithmeticOps, TestIntCastFloatDouble) {
+  // castINT from floats
+  EXPECT_EQ(castINT_float32(6.6f), 7);
+  EXPECT_EQ(castINT_float32(-6.6f), -7);
+  EXPECT_EQ(castINT_float32(-6.3f), -6);
+  EXPECT_EQ(castINT_float32(0.0f), 0);
+  EXPECT_EQ(castINT_float32(-0), 0);
+
+  // castINT from doubles
+  EXPECT_EQ(castINT_float64(6.6), 7);
+  EXPECT_EQ(castINT_float64(-6.6), -7);
+  EXPECT_EQ(castINT_float64(-6.3), -6);
+  EXPECT_EQ(castINT_float64(0.0), 0);
+  EXPECT_EQ(castINT_float64(-0), 0);
+  EXPECT_EQ(castINT_float64(999999.99999999999999999999999), 1000000);
+  EXPECT_EQ(castINT_float64(-999999.99999999999999999999999), -1000000);
+  EXPECT_EQ(castINT_float64(INT32_MAX), 2147483647);
+  EXPECT_EQ(castINT_float64(-2147483647), -2147483647);
+}
+
+TEST(TestArithmeticOps, TestBigIntCastFloatDouble) {
+  // castINT from floats
+  EXPECT_EQ(castBIGINT_float32(6.6f), 7);
+  EXPECT_EQ(castBIGINT_float32(-6.6f), -7);
+  EXPECT_EQ(castBIGINT_float32(-6.3f), -6);
+  EXPECT_EQ(castBIGINT_float32(0.0f), 0);
+  EXPECT_EQ(castBIGINT_float32(-0), 0);
+
+  // castINT from doubles
+  EXPECT_EQ(castBIGINT_float64(6.6), 7);
+  EXPECT_EQ(castBIGINT_float64(-6.6), -7);
+  EXPECT_EQ(castBIGINT_float64(-6.3), -6);
+  EXPECT_EQ(castBIGINT_float64(0.0), 0);
+  EXPECT_EQ(castBIGINT_float64(-0), 0);
+  EXPECT_EQ(castBIGINT_float64(999999.99999999999999999999999), 1000000);
+  EXPECT_EQ(castBIGINT_float64(-999999.99999999999999999999999), -1000000);
+  EXPECT_EQ(castBIGINT_float64(INT32_MAX), 2147483647);
+  EXPECT_EQ(castBIGINT_float64(-2147483647), -2147483647);
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/epoch_time_point.h b/cpp/src/gandiva/precompiled/epoch_time_point.h
index 80841b1a629..2a8b08c6d7f 100644
--- a/cpp/src/gandiva/precompiled/epoch_time_point.h
+++ b/cpp/src/gandiva/precompiled/epoch_time_point.h
@@ -87,12 +87,6 @@ class EpochTimePoint {
 
   int64_t MillisSinceEpoch() const { return tp_.time_since_epoch().count(); }
 
- private:
-  arrow_vendored::date::year_month_day YearMonthDay() const {
-    return arrow_vendored::date::year_month_day{
-        arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)};  // NOLINT
-  }
-
   arrow_vendored::date::time_of_day<std::chrono::milliseconds> TimeOfDay() const {
     auto millis_since_midnight =
         tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
@@ -100,5 +94,11 @@ class EpochTimePoint {
         millis_since_midnight);
   }
 
+ private:
+  arrow_vendored::date::year_month_day YearMonthDay() const {
+    return arrow_vendored::date::year_month_day{
+        arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)};  // NOLINT
+  }
+
   std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp_;
 };
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops.cc b/cpp/src/gandiva/precompiled/extended_math_ops.cc
index b2d62daac7f..365b08a6da9 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops.cc
@@ -367,4 +367,44 @@ gdv_float64 get_scale_multiplier(gdv_int32 scale) {
   return power_float64_float64(10.0, scale);
 }
 
+// returns the binary representation of a given integer (e.g. 928 -> 1110100000)
+#define BIN_INTEGER(IN_TYPE)                                                          \
+  FORCE_INLINE                                                                        \
+  const char* bin_##IN_TYPE(int64_t context, gdv_##IN_TYPE value, int32_t* out_len) { \
+    *out_len = 0;                                                                     \
+    int32_t len = 8 * sizeof(value);                                                  \
+    char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, len));   \
+    if (ret == nullptr) {                                                             \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory for output");  \
+      return "";                                                                      \
+    }                                                                                 \
+    /* handle case when value is zero */                                              \
+    if (value == 0) {                                                                 \
+      *out_len = 1;                                                                   \
+      ret[0] = '0';                                                                   \
+      return ret;                                                                     \
+    }                                                                                 \
+    /* generate binary representation iteratively */                                  \
+    gdv_u##IN_TYPE i;                                                                 \
+    int8_t count = 0;                                                                 \
+    bool first = false; /* flag for not printing left zeros in positive numbers */    \
+    for (i = static_cast<gdv_u##IN_TYPE>(1) << (len - 1); i > 0; i = i / 2) {         \
+      if ((value & i) != 0) {                                                         \
+        ret[count] = '1';                                                             \
+        if (!first) first = true;                                                     \
+      } else {                                                                        \
+        if (!first) continue;                                                         \
+        ret[count] = '0';                                                             \
+      }                                                                               \
+      count += 1;                                                                     \
+    }                                                                                 \
+    *out_len = count;                                                                 \
+    return ret;                                                                       \
+  }
+
+BIN_INTEGER(int32)
+BIN_INTEGER(int64)
+
+#undef BIN_INTEGER
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
index 6e59f684f62..147b4035c7d 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
@@ -273,4 +273,77 @@ TEST(TestExtendedMathOps, TestTrigonometricFunctions) {
   VerifyFuzzyEquals(cot_float64(M_PI / 2), tan(M_PI / 2 - M_PI / 2));
 }
 
+TEST(TestExtendedMathOps, TestBinRepresentation) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = bin_int32(ctx_ptr, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, 28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "110111110000110");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, -28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "11111111111111111001000001111010");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, 58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1110001100000101");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, -58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "11111111111111110001110011111011");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, INT32_MAX, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1111111111111111111111111111111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, INT32_MIN, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "10000000000000000000000000000000");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "110111110000110");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, -28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "1111111111111111111111111111111111111111111111111001000001111010");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1110001100000101");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, -58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "1111111111111111111111111111111111111111111111110001110011111011");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, INT64_MAX, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "111111111111111111111111111111111111111111111111111111111111111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, INT64_MIN, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "1000000000000000000000000000000000000000000000000000000000000000");
+  EXPECT_FALSE(ctx.has_error());
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index fa9164bd139..48c24b862b8 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -17,12 +17,14 @@
 
 // String functions
 #include "arrow/util/value_parsing.h"
+
 extern "C" {
 
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 #include "./types.h"
 
@@ -190,6 +192,27 @@ gdv_int32 utf8_length(gdv_int64 context, const char* data, gdv_int32 data_len) {
   return count;
 }
 
+// Count the number of utf8 characters, ignoring invalid char, considering size 1
+FORCE_INLINE
+gdv_int32 utf8_length_ignore_invalid(const char* data, gdv_int32 data_len) {
+  int char_len = 0;
+  int count = 0;
+  for (int i = 0; i < data_len; i += char_len) {
+    char_len = utf8_char_length(data[i]);
+    if (char_len == 0 || i + char_len > data_len) {  // invalid byte or incomplete glyph
+      // if invalid byte or incomplete glyph, ignore it
+      char_len = 1;
+    }
+    for (int j = 1; j < char_len; ++j) {
+      if ((data[i + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
+        char_len += 1;
+      }
+    }
+    ++count;
+  }
+  return count;
+}
+
 // Get the byte position corresponding to a character position for a non-empty utf8
 // sequence
 FORCE_INLINE
@@ -220,65 +243,30 @@ UTF8_LENGTH(char_length, utf8)
 UTF8_LENGTH(length, utf8)
 UTF8_LENGTH(lengthUtf8, binary)
 
-// Convert a utf8 sequence to upper case.
-// TODO : This handles only ascii characters.
-FORCE_INLINE
-const char* upper_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
-                       int32_t* out_len) {
-  if (data_len == 0) {
-    *out_len = 0;
-    return "";
-  }
-
-  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, data_len));
-  if (ret == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
-    *out_len = 0;
-    return "";
-  }
-  for (gdv_int32 i = 0; i < data_len; ++i) {
-    char cur = data[i];
-
-    // 'A- - 'Z' : 0x41 - 0x5a
-    // 'a' - 'z' : 0x61 - 0x7a
-    if (cur >= 0x61 && cur <= 0x7a) {
-      cur = static_cast<char>(cur - 0x20);
-    }
-    ret[i] = cur;
-  }
-  *out_len = data_len;
-  return ret;
-}
-
-// Convert a utf8 sequence to lower case.
-// TODO : This handles only ascii characters.
-FORCE_INLINE
-const char* lower_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
-                       int32_t* out_len) {
-  if (data_len == 0) {
-    *out_len = 0;
-    return "";
-  }
-
-  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, data_len));
-  if (ret == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
-    *out_len = 0;
-    return "";
-  }
-  for (gdv_int32 i = 0; i < data_len; ++i) {
-    char cur = data[i];
-
-    // 'A' - 'Z' : 0x41 - 0x5a
-    // 'a' - 'z' : 0x61 - 0x7a
-    if (cur >= 0x41 && cur <= 0x5a) {
-      cur = static_cast<char>(cur + 0x20);
-    }
-    ret[i] = cur;
-  }
-  *out_len = data_len;
-  return ret;
-}
+// Returns a string of 'n' spaces.
+#define SPACE_STR(IN_TYPE)                                                              \
+  GANDIVA_EXPORT                                                                        \
+  const char* space_##IN_TYPE(gdv_int64 ctx, gdv_##IN_TYPE n, int32_t* out_len) {       \
+    gdv_int32 n_times = static_cast<gdv_int32>(n);                                      \
+    if (n_times <= 0) {                                                                 \
+      *out_len = 0;                                                                     \
+      return "";                                                                        \
+    }                                                                                   \
+    char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ctx, n_times));     \
+    if (ret == nullptr) {                                                               \
+      gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output string"); \
+      *out_len = 0;                                                                     \
+      return "";                                                                        \
+    }                                                                                   \
+    for (int i = 0; i < n_times; i++) {                                                 \
+      ret[i] = ' ';                                                                     \
+    }                                                                                   \
+    *out_len = n_times;                                                                 \
+    return ret;                                                                         \
+  }
+
+SPACE_STR(int32)
+SPACE_STR(int64)
 
 // Reverse a utf8 sequence
 FORCE_INLINE
@@ -517,6 +505,66 @@ const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
   return basetext + start_ptr;
 }
 
+FORCE_INLINE
+gdv_boolean compare_lower_strings(const char* base_str, gdv_int32 base_str_len,
+                                  const char* str, gdv_int32 str_len) {
+  if (base_str_len != str_len) {
+    return false;
+  }
+  for (int i = 0; i < str_len; i++) {
+    // convert char to lower
+    char cur = str[i];
+    // 'A' - 'Z' : 0x41 - 0x5a
+    // 'a' - 'z' : 0x61 - 0x7a
+    if (cur >= 0x41 && cur <= 0x5a) {
+      cur = static_cast<char>(cur + 0x20);
+    }
+    // if the character does not match, break the flow
+    if (cur != base_str[i]) break;
+    // if the character matches and it is the last iteration, return true
+    if (i == str_len - 1) return true;
+  }
+  return false;
+}
+
+// Try to cast the received string ('0', '1', 'true', 'false'), ignoring leading
+// and trailing spaces, also ignoring lower and upper case.
+FORCE_INLINE
+gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len) {
+  if (data_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
+    return false;
+  }
+
+  // trim leading and trailing spaces
+  int32_t trimmed_len;
+  int32_t start = 0, end = data_len - 1;
+  while (start <= end && data[start] == ' ') {
+    ++start;
+  }
+  while (end >= start && data[end] == ' ') {
+    --end;
+  }
+  trimmed_len = end - start + 1;
+  const char* trimmed_data = data + start;
+
+  // compare received string with the valid bool string values '1', '0', 'true', 'false'
+  if (trimmed_len == 1) {
+    // case for '0' and '1' value
+    if (trimmed_data[0] == '1') return true;
+    if (trimmed_data[0] == '0') return false;
+  } else if (trimmed_len == 4) {
+    // case for matching 'true'
+    if (compare_lower_strings("true", 4, trimmed_data, trimmed_len)) return true;
+  } else if (trimmed_len == 5) {
+    // case for matching 'false'
+    if (compare_lower_strings("false", 5, trimmed_data, trimmed_len)) return false;
+  }
+  // if no 'true', 'false', '0' or '1' value is found, set an error
+  gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
+  return false;
+}
+
 FORCE_INLINE
 const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
                                    gdv_int64 out_len, gdv_int32* out_length) {
@@ -534,88 +582,122 @@ const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
 }
 
 // Truncates the string to given length
-FORCE_INLINE
-const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data,
-                                   gdv_int32 data_len, int64_t out_len,
-                                   int32_t* out_length) {
-  int32_t len = static_cast<int32_t>(out_len);
-
-  if (len < 0) {
-    gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative");
-    *out_length = 0;
-    return "";
-  }
-
-  if (len >= data_len || len == 0) {
-    *out_length = data_len;
-    return data;
-  }
-
-  int32_t remaining = len;
-  int32_t index = 0;
-  bool is_multibyte = false;
-  do {
-    // In utf8, MSB of a single byte unicode char is always 0,
-    // whereas for a multibyte character the MSB of each byte is 1.
-    // So for a single byte char, a bitwise-and with x80 (10000000) will be 0
-    // and it won't be 0 for bytes of a multibyte char
-    char* data_ptr = const_cast<char*>(data);
-
-    // we advance byte by byte till the 8 byte boundary then advance 8 bytes at a time
-    auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07;
-    num_bytes = (8 - num_bytes) & 0x07;
-    while (num_bytes > 0) {
-      uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);
-      if ((*ptr & 0x80) != 0) {
-        is_multibyte = true;
-        break;
-      }
-      index++;
-      remaining--;
-      num_bytes--;
-    }
-    if (is_multibyte) break;
-    while (remaining >= 8) {
-      uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index);
-      if ((*ptr & 0x8080808080808080) != 0) {
-        is_multibyte = true;
-        break;
-      }
-      index += 8;
-      remaining -= 8;
-    }
-    if (is_multibyte) break;
-    if (remaining >= 4) {
-      uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index);
-      if ((*ptr & 0x80808080) != 0) break;
-      index += 4;
-      remaining -= 4;
-    }
-    while (remaining > 0) {
-      uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);
-      if ((*ptr & 0x80) != 0) {
-        is_multibyte = true;
-        break;
-      }
-      index++;
-      remaining--;
-    }
-    if (is_multibyte) break;
-    // reached here; all are single byte characters
-    *out_length = len;
-    return data;
-  } while (false);
-
-  // detected multibyte utf8 characters; slow path
-  int32_t byte_pos = utf8_byte_pos(context, data + index, data_len - index, len - index);
-  if (byte_pos < 0) {
-    *out_length = 0;
-    return "";
-  }
-
-  *out_length = index + byte_pos;
-  return data;
-}
+#define CAST_VARCHAR_FROM_VARLEN_TYPE(TYPE)                                            \
+  FORCE_INLINE                                                                         \
+  const char* castVARCHAR_##TYPE##_int64(gdv_int64 context, const char* data,          \
+                                         gdv_int32 data_len, int64_t out_len,          \
+                                         int32_t* out_length) {                        \
+    int32_t len = static_cast<int32_t>(out_len);                                       \
+                                                                                       \
+    if (len < 0) {                                                                     \
+      gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
+      *out_length = 0;                                                                 \
+      return "";                                                                       \
+    }                                                                                  \
+                                                                                       \
+    if (len >= data_len || len == 0) {                                                 \
+      *out_length = data_len;                                                          \
+      return data;                                                                     \
+    }                                                                                  \
+                                                                                       \
+    int32_t remaining = len;                                                           \
+    int32_t index = 0;                                                                 \
+    bool is_multibyte = false;                                                         \
+    do {                                                                               \
+      /* In utf8, MSB of a single byte unicode char is always 0,                       \
+       * whereas for a multibyte character the MSB of each byte is 1.                  \
+       * So for a single byte char, a bitwise-and with x80 (10000000) will be 0        \
+       * and it won't be 0 for bytes of a multibyte char.                              \
+       */                                                                              \
+      char* data_ptr = const_cast<char*>(data);                                        \
+                                                                                       \
+      /* advance byte by byte till the 8-byte boundary then advance 8 bytes */         \
+      auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07;                   \
+      num_bytes = (8 - num_bytes) & 0x07;                                              \
+      while (num_bytes > 0) {                                                          \
+        uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);                   \
+        if ((*ptr & 0x80) != 0) {                                                      \
+          is_multibyte = true;                                                         \
+          break;                                                                       \
+        }                                                                              \
+        index++;                                                                       \
+        remaining--;                                                                   \
+        num_bytes--;                                                                   \
+      }                                                                                \
+      if (is_multibyte) break;                                                         \
+      while (remaining >= 8) {                                                         \
+        uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index);                 \
+        if ((*ptr & 0x8080808080808080) != 0) {                                        \
+          is_multibyte = true;                                                         \
+          break;                                                                       \
+        }                                                                              \
+        index += 8;                                                                    \
+        remaining -= 8;                                                                \
+      }                                                                                \
+      if (is_multibyte) break;                                                         \
+      if (remaining >= 4) {                                                            \
+        uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index);                 \
+        if ((*ptr & 0x80808080) != 0) break;                                           \
+        index += 4;                                                                    \
+        remaining -= 4;                                                                \
+      }                                                                                \
+      while (remaining > 0) {                                                          \
+        uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);                   \
+        if ((*ptr & 0x80) != 0) {                                                      \
+          is_multibyte = true;                                                         \
+          break;                                                                       \
+        }                                                                              \
+        index++;                                                                       \
+        remaining--;                                                                   \
+      }                                                                                \
+      if (is_multibyte) break;                                                         \
+      /* reached here; all are single byte characters */                               \
+      *out_length = len;                                                               \
+      return data;                                                                     \
+    } while (false);                                                                   \
+                                                                                       \
+    /* detected multibyte utf8 characters; slow path */                                \
+    int32_t byte_pos =                                                                 \
+        utf8_byte_pos(context, data + index, data_len - index, len - index);           \
+    if (byte_pos < 0) {                                                                \
+      *out_length = 0;                                                                 \
+      return "";                                                                       \
+    }                                                                                  \
+                                                                                       \
+    *out_length = index + byte_pos;                                                    \
+    return data;                                                                       \
+  }
+
+CAST_VARCHAR_FROM_VARLEN_TYPE(utf8)
+CAST_VARCHAR_FROM_VARLEN_TYPE(binary)
+
+#undef CAST_VARCHAR_FROM_VARLEN_TYPE
+
+// Add functions for castVARBINARY
+#define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE)                                    \
+  GANDIVA_EXPORT                                                                       \
+  const char* castVARBINARY_##TYPE##_int64(gdv_int64 context, const char* data,        \
+                                           gdv_int32 data_len, int64_t out_len,        \
+                                           int32_t* out_length) {                      \
+    int32_t len = static_cast<int32_t>(out_len);                                       \
+    if (len < 0) {                                                                     \
+      gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
+      *out_length = 0;                                                                 \
+      return "";                                                                       \
+    }                                                                                  \
+                                                                                       \
+    if (len >= data_len || len == 0) {                                                 \
+      *out_length = data_len;                                                          \
+    } else {                                                                           \
+      *out_length = len;                                                               \
+    }                                                                                  \
+    return data;                                                                       \
+  }
+
+CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8)
+CAST_VARBINARY_FROM_STRING_AND_BINARY(binary)
+
+#undef CAST_VARBINARY_FROM_STRING_AND_BINARY
 
 #define IS_NULL(NAME, TYPE)                                                \
   FORCE_INLINE                                                             \
@@ -726,6 +808,33 @@ const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in
   return substr_utf8_int64_int64(context, input, in_len, offset64, in_len, out_len);
 }
 
+FORCE_INLINE
+const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
+                              gdv_int32 repeat_number, gdv_int32* out_len) {
+  // if the repeat number is zero, then return empty string
+  if (repeat_number == 0 || in_len <= 0) {
+    *out_len = 0;
+    return "";
+  }
+  // if the repeat number is a negative number, an error is set on context
+  if (repeat_number < 0) {
+    gdv_fn_context_set_error_msg(context, "Repeat number can't be negative");
+    *out_len = 0;
+    return "";
+  }
+  *out_len = repeat_number * in_len;
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+  for (int i = 0; i < repeat_number; ++i) {
+    memcpy(ret + (i * in_len), in, in_len);
+  }
+  return ret;
+}
+
 FORCE_INLINE
 const char* concat_utf8_utf8(gdv_int64 context, const char* left, gdv_int32 left_len,
                              bool left_validity, const char* right, gdv_int32 right_len,
@@ -1243,6 +1352,15 @@ const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
   return ret;
 }
 
+// Returns the numeric value of the first character of str.
+GANDIVA_EXPORT
+gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) {
+  if (data_len == 0) {
+    return 0;
+  }
+  return static_cast<gdv_int32>(data[0]);
+}
+
 FORCE_INLINE
 const char* convert_fromUTF8_binary(gdv_int64 context, const char* bin_in, gdv_int32 len,
                                     gdv_int32* out_len) {
@@ -1263,10 +1381,7 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
                                                     const char* char_to_replace,
                                                     int32_t char_to_replace_len,
                                                     int32_t* out_len) {
-  if (char_to_replace_len == 0) {
-    *out_len = text_len;
-    return text_in;
-  } else if (char_to_replace_len != 1) {
+  if (char_to_replace_len > 1) {
     gdv_fn_context_set_error_msg(context, "Replacement of multiple bytes not supported");
     *out_len = 0;
     return "";
@@ -1282,6 +1397,7 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
   }
   int32_t valid_bytes_to_cpy = 0;
   int32_t out_byte_counter = 0;
+  int32_t in_byte_counter = 0;
   int32_t char_len;
   // scan the base text from left to right and increment the start pointer till
   // looking for invalid chars to substitute
@@ -1293,23 +1409,247 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
       // define char_len = 1 to increase text_index by 1 (as ASCII char fits in 1 byte)
       char_len = 1;
       // first copy the valid bytes until now and then replace the invalid character
-      memcpy(ret + out_byte_counter, text_in + out_byte_counter, valid_bytes_to_cpy);
-      ret[out_byte_counter + valid_bytes_to_cpy] = char_to_replace[0];
-      out_byte_counter += valid_bytes_to_cpy + char_len;
+      memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
+      // if the replacement char is empty, the invalid char should be ignored
+      if (char_to_replace_len == 0) {
+        out_byte_counter += valid_bytes_to_cpy;
+      } else {
+        ret[out_byte_counter + valid_bytes_to_cpy] = char_to_replace[0];
+        out_byte_counter += valid_bytes_to_cpy + char_len;
+      }
+      in_byte_counter += valid_bytes_to_cpy + char_len;
       valid_bytes_to_cpy = 0;
       continue;
     }
     valid_bytes_to_cpy += char_len;
   }
   // if invalid chars were not found, return the original string
-  if (out_byte_counter == 0) return text_in;
+  if (out_byte_counter == 0 && in_byte_counter == 0) return text_in;
   // if there are still valid bytes to copy, do it
   if (valid_bytes_to_cpy != 0) {
-    memcpy(ret + out_byte_counter, text_in + out_byte_counter, valid_bytes_to_cpy);
+    memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
   }
+  // the out length will be the out bytes copied + the missing end bytes copied
+  *out_len = valid_bytes_to_cpy + out_byte_counter;
   return ret;
 }
 
+// The function reverse a char array in-place
+static inline void reverse_char_buf(char* buf, int32_t len) {
+  char temp;
+
+  for (int32_t i = 0; i < len / 2; i++) {
+    int32_t pos_swp = len - (1 + i);
+    temp = buf[pos_swp];
+    buf[pos_swp] = buf[i];
+    buf[i] = temp;
+  }
+}
+
+// Converts a double variable to binary
+FORCE_INLINE
+const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len) {
+  // The function behaves like convert_toDOUBLE, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toDOUBLE(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts a float variable to binary
+FORCE_INLINE
+const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len) {
+  // The function behaves like convert_toFLOAT, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toFLOAT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts a bigint(int with 64 bits) variable to binary
+FORCE_INLINE
+const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len) {
+  // The function behaves like convert_toBIGINT, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toBIGINT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts an integer(with 32 bits) variable to binary
+FORCE_INLINE
+const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len) {
+  // The function behaves like convert_toINT, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toINT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts a boolean variable to binary
+FORCE_INLINE
+const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+// Converts a time variable to binary
+FORCE_INLINE
+const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len) {
+  return convert_toINT(context, value, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len) {
+  // The function behaves as convert_toTIME_EPOCH, but
+  // returns the bytes in big endian format
+  return convert_toINT_be(context, value, out_len);
+}
+
+// Converts a timestamp variable to binary
+FORCE_INLINE
+const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp,
+                                      int32_t* out_len) {
+  return convert_toBIGINT(context, timestamp, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp,
+                                         int32_t* out_len) {
+  // The function behaves as convert_toTIMESTAMP_EPOCH, but
+  // returns the bytes in big endian format
+  return convert_toBIGINT_be(context, timestamp, out_len);
+}
+
+// Converts a date variable to binary
+FORCE_INLINE
+const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len) {
+  return convert_toBIGINT(context, date, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len) {
+  // The function behaves as convert_toDATE_EPOCH, but
+  // returns the bytes in big endian format
+  return convert_toBIGINT_be(context, date, out_len);
+}
+
+// Converts a string variable to binary
+FORCE_INLINE
+const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len,
+                           int32_t* out_len) {
+  *out_len = value_len;
+  return value;
+}
+
+// Search for a string within another string
+// Same as "locate(substr, str)", except for the reverse order of the arguments.
+FORCE_INLINE
+gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len,
+                           const char* sub_str, gdv_int32 sub_str_len) {
+  return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1);
+}
+
 // Search for a string within another string
 FORCE_INLINE
 gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
@@ -1422,6 +1762,141 @@ const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text,
                                              out_len);
 }
 
+FORCE_INLINE
+const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len) {
+  // if the text length or the defined return length (number of characters to return)
+  // is <=0, then return an empty string.
+  if (text_len == 0 || return_length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // count the number of utf8 characters on text, ignoring invalid bytes
+  int text_char_count = utf8_length_ignore_invalid(text, text_len);
+
+  if (return_length == text_char_count ||
+      (return_length > text_char_count && fill_text_len == 0)) {
+    // case where the return length is same as the text's length, or if it need to
+    // fill into text but "fill_text" is empty, then return text directly.
+    *out_len = text_len;
+    return text;
+  } else if (return_length < text_char_count) {
+    // case where it truncates the result on return length.
+    *out_len = utf8_byte_pos(context, text, text_len, return_length);
+    return text;
+  } else {
+    // case (return_length > text_char_count)
+    // case where it needs to copy "fill_text" on the string left. The total number
+    // of chars to copy is given by (return_length -  text_char_count)
+    char* ret =
+        reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length));
+    if (ret == nullptr) {
+      gdv_fn_context_set_error_msg(context,
+                                   "Could not allocate memory for output string");
+      *out_len = 0;
+      return "";
+    }
+    // try to fulfill the return string with the "fill_text" continuously
+    int32_t copied_chars_count = 0;
+    int32_t copied_chars_position = 0;
+    while (copied_chars_count < return_length - text_char_count) {
+      int32_t char_len;
+      int32_t fill_index;
+      // for each char, evaluate its length to consider it when mem copying
+      for (fill_index = 0; fill_index < fill_text_len; fill_index += char_len) {
+        if (copied_chars_count >= return_length - text_char_count) {
+          break;
+        }
+        char_len = utf8_char_length(fill_text[fill_index]);
+        // ignore invalid char on the fill text, considering it as size 1
+        if (char_len == 0) char_len += 1;
+        copied_chars_count++;
+      }
+      memcpy(ret + copied_chars_position, fill_text, fill_index);
+      copied_chars_position += fill_index;
+    }
+    // after fulfilling the text, copy the main string
+    memcpy(ret + copied_chars_position, text, text_len);
+    *out_len = copied_chars_position + text_len;
+    return ret;
+  }
+}
+
+FORCE_INLINE
+const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len) {
+  // if the text length or the defined return length (number of characters to return)
+  // is <=0, then return an empty string.
+  if (text_len == 0 || return_length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // count the number of utf8 characters on text, ignoring invalid bytes
+  int text_char_count = utf8_length_ignore_invalid(text, text_len);
+
+  if (return_length == text_char_count ||
+      (return_length > text_char_count && fill_text_len == 0)) {
+    // case where the return length is same as the text's length, or if it need to
+    // fill into text but "fill_text" is empty, then return text directly.
+    *out_len = text_len;
+    return text;
+  } else if (return_length < text_char_count) {
+    // case where it truncates the result on return length.
+    *out_len = utf8_byte_pos(context, text, text_len, return_length);
+    return text;
+  } else {
+    // case (return_length > text_char_count)
+    // case where it needs to copy "fill_text" on the string right
+    char* ret =
+        reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length));
+    if (ret == nullptr) {
+      gdv_fn_context_set_error_msg(context,
+                                   "Could not allocate memory for output string");
+      *out_len = 0;
+      return "";
+    }
+    // fulfill the initial text copying the main input string
+    memcpy(ret, text, text_len);
+    // try to fulfill the return string with the "fill_text" continuously
+    int32_t copied_chars_count = 0;
+    int32_t copied_chars_position = 0;
+    while (text_char_count + copied_chars_count < return_length) {
+      int32_t char_len;
+      int32_t fill_length;
+      // for each char, evaluate its length to consider it when mem copying
+      for (fill_length = 0; fill_length < fill_text_len; fill_length += char_len) {
+        if (text_char_count + copied_chars_count >= return_length) {
+          break;
+        }
+        char_len = utf8_char_length(fill_text[fill_length]);
+        // ignore invalid char on the fill text, considering it as size 1
+        if (char_len == 0) char_len += 1;
+        copied_chars_count++;
+      }
+      memcpy(ret + text_len + copied_chars_position, fill_text, fill_length);
+      copied_chars_position += fill_length;
+    }
+    *out_len = copied_chars_position + text_len;
+    return ret;
+  }
+}
+
+FORCE_INLINE
+const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len) {
+  return lpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
+}
+
+FORCE_INLINE
+const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len) {
+  return rpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
+}
+
 FORCE_INLINE
 const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
                        const char* delimiter, gdv_int32 delim_len, gdv_int32 index,
@@ -1481,6 +1956,98 @@ const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
   return "";
 }
 
+// Returns the x leftmost characters of a given string. Cases:
+//     LEFT("TestString", 10) => "TestString"
+//     LEFT("TestString", 3) => "Tes"
+//     LEFT("TestString", -3) => "TestStr"
+FORCE_INLINE
+const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 number, gdv_int32* out_len) {
+  // returns the 'number' left most characters of a given text
+  if (text_len == 0 || number == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // iterate over the utf8 string validating each character
+  int char_len;
+  int char_count = 0;
+  int byte_index = 0;
+  for (int i = 0; i < text_len; i += char_len) {
+    char_len = utf8_char_length(text[i]);
+    if (char_len == 0 || i + char_len > text_len) {  // invalid byte or incomplete glyph
+      set_error_for_invalid_utf(context, text[i]);
+      *out_len = 0;
+      return "";
+    }
+    for (int j = 1; j < char_len; ++j) {
+      if ((text[i + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
+        set_error_for_invalid_utf(context, text[i + j]);
+        *out_len = 0;
+        return "";
+      }
+    }
+    byte_index += char_len;
+    ++char_count;
+    // Define the rules to stop the iteration over the string
+    // case where left('abc', 5) -> 'abc'
+    if (number > 0 && char_count == number) break;
+    // case where left('abc', -5) ==> ''
+    if (number < 0 && char_count == number + text_len) break;
+  }
+
+  *out_len = byte_index;
+  return text;
+}
+
+// Returns the x rightmost characters of a given string. Cases:
+//     RIGHT("TestString", 10) => "TestString"
+//     RIGHT("TestString", 3) => "ing"
+//     RIGHT("TestString", -3) => "tString"
+FORCE_INLINE
+const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                             gdv_int32 number, gdv_int32* out_len) {
+  // returns the 'number' left most characters of a given text
+  if (text_len == 0 || number == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // initially counts the number of utf8 characters in the defined text
+  int32_t char_count = utf8_length(context, text, text_len);
+  // char_count is zero if input has invalid utf8 char
+  if (char_count == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t start_char_pos;  // the char result start position (inclusive)
+  int32_t end_char_len;    // the char result end position (inclusive)
+  if (number > 0) {
+    // case where right('abc', 5) ==> 'abc' start_char_pos=1.
+    start_char_pos = (char_count > number) ? char_count - number : 0;
+    end_char_len = char_count - start_char_pos;
+  } else {
+    start_char_pos = number * -1;
+    end_char_len = char_count - start_char_pos;
+  }
+
+  // calculate the start byte position and the output length
+  int32_t start_byte_pos = utf8_byte_pos(context, text, text_len, start_char_pos);
+  *out_len = utf8_byte_pos(context, text, text_len, end_char_len);
+
+  // try to allocate memory for the response
+  char* ret =
+      reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+  memcpy(ret, text + start_byte_pos, *out_len);
+  return ret;
+}
+
 FORCE_INLINE
 const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len,
                           gdv_int32* out_len) {
@@ -1520,4 +2087,112 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
   return ret;
 }
 
+#define CAST_INT_BIGINT_VARBINARY(OUT_TYPE, TYPE_NAME)                                 \
+  FORCE_INLINE                                                                         \
+  OUT_TYPE                                                                             \
+  cast##TYPE_NAME##_varbinary(gdv_int64 context, const char* in, int32_t in_len) {     \
+    if (in_len == 0) {                                                                 \
+      gdv_fn_context_set_error_msg(context, "Can't cast an empty string.");            \
+      return -1;                                                                       \
+    }                                                                                  \
+    char sign = in[0];                                                                 \
+                                                                                       \
+    bool negative = false;                                                             \
+    if (sign == '-') {                                                                 \
+      negative = true;                                                                 \
+      /* Ignores the sign char in the hexadecimal string */                            \
+      in++;                                                                            \
+      in_len--;                                                                        \
+    }                                                                                  \
+                                                                                       \
+    if (negative && in_len == 0) {                                                     \
+      gdv_fn_context_set_error_msg(context,                                            \
+                                   "Can't cast hexadecimal with only a minus sign.");  \
+      return -1;                                                                       \
+    }                                                                                  \
+                                                                                       \
+    OUT_TYPE result = 0;                                                               \
+    int digit;                                                                         \
+                                                                                       \
+    int read_index = 0;                                                                \
+    while (read_index < in_len) {                                                      \
+      char c1 = in[read_index];                                                        \
+      if (isxdigit(c1)) {                                                              \
+        digit = to_binary_from_hex(c1);                                                \
+                                                                                       \
+        OUT_TYPE next = result * 16 - digit;                                           \
+                                                                                       \
+        if (next > result) {                                                           \
+          gdv_fn_context_set_error_msg(context, "Integer overflow.");                  \
+          return -1;                                                                   \
+        }                                                                              \
+        result = next;                                                                 \
+        read_index++;                                                                  \
+      } else {                                                                         \
+        gdv_fn_context_set_error_msg(context,                                          \
+                                     "The hexadecimal given has invalid characters."); \
+        return -1;                                                                     \
+      }                                                                                \
+    }                                                                                  \
+    if (!negative) {                                                                   \
+      result *= -1;                                                                    \
+                                                                                       \
+      if (result < 0) {                                                                \
+        gdv_fn_context_set_error_msg(context, "Integer overflow.");                    \
+        return -1;                                                                     \
+      }                                                                                \
+    }                                                                                  \
+    return result;                                                                     \
+  }
+
+CAST_INT_BIGINT_VARBINARY(int32_t, INT)
+CAST_INT_BIGINT_VARBINARY(int64_t, BIGINT)
+
+#undef CAST_INT_BIGINT_VARBINARY
+
+// Produces the binary representation of a string y characters long derived by starting
+// at offset 'x' and considering the defined length 'y'. Notice that the offset index
+// may be a negative number (starting from the end of the string), or a positive number
+// starting on index 1. Cases:
+//     BYTE_SUBSTR("TestString", 1, 10) => "TestString"
+//     BYTE_SUBSTR("TestString", 5, 10) => "String"
+//     BYTE_SUBSTR("TestString", -6, 10) => "String"
+//     BYTE_SUBSTR("TestString", -600, 10) => "TestString"
+FORCE_INLINE
+const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
+                                           gdv_int32 text_len, gdv_int32 offset,
+                                           gdv_int32 length, gdv_int32* out_len) {
+  // the first offset position for a string is 1, so not consider offset == 0
+  // also, the length should be always a positive number
+  if (text_len == 0 || offset == 0 || length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret =
+      reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t startPos = 0;
+  if (offset >= 0) {
+    startPos = offset - 1;
+  } else if (text_len + offset >= 0) {
+    startPos = text_len + offset;
+  }
+
+  // calculate end position from length and truncate to upper value bounds
+  if (startPos + length > text_len) {
+    *out_len = text_len - startPos;
+  } else {
+    *out_len = length;
+  }
+
+  memcpy(ret, text + startPos, *out_len);
+  return ret;
+}
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 9326aac1e0f..6221dffb302 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -18,6 +18,8 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include <limits>
+
 #include "gandiva/execution_context.h"
 #include "gandiva/precompiled/types.h"
 
@@ -40,6 +42,17 @@ TEST(TestStringOps, TestCompare) {
   EXPECT_GT(mem_compare(left, 7, right, 5), 0);
 }
 
+TEST(TestStringOps, TestAscii) {
+  // ASCII
+  EXPECT_EQ(ascii_utf8("ABC", 3), 65);
+  EXPECT_EQ(ascii_utf8("abc", 3), 97);
+  EXPECT_EQ(ascii_utf8("Hello World!", 12), 72);
+  EXPECT_EQ(ascii_utf8("This is us", 10), 84);
+  EXPECT_EQ(ascii_utf8("", 0), 0);
+  EXPECT_EQ(ascii_utf8("123", 3), 49);
+  EXPECT_EQ(ascii_utf8("999", 3), 57);
+}
+
 TEST(TestStringOps, TestBeginsEnds) {
   // starts_with
   EXPECT_TRUE(starts_with_utf8_utf8("hello sir", 9, "hello", 5));
@@ -56,6 +69,31 @@ TEST(TestStringOps, TestBeginsEnds) {
   EXPECT_FALSE(ends_with_utf8_utf8("hello", 5, "sir", 3));
 }
 
+TEST(TestStringOps, TestSpace) {
+  // Space - returns a string with 'n' spaces
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  int32_t out_len = 0;
+
+  auto out = space_int32(ctx_ptr, 1, &out_len);
+  EXPECT_EQ(std::string(out, out_len), " ");
+  out = space_int32(ctx_ptr, 10, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "          ");
+  out = space_int32(ctx_ptr, 5, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "     ");
+  out = space_int32(ctx_ptr, -5, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+
+  out = space_int64(ctx_ptr, 2, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "  ");
+  out = space_int64(ctx_ptr, 9, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "         ");
+  out = space_int64(ctx_ptr, 4, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "    ");
+  out = space_int64(ctx_ptr, -5, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+}
+
 TEST(TestStringOps, TestIsSubstr) {
   EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "world", 5));
   EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "lo wo", 5));
@@ -162,14 +200,65 @@ TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) {
   EXPECT_TRUE(ctx.has_error());
   ctx.Reset();
 
-  // full valid utf8, but invalid replacement char length
+  // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
   std::string f("ok-\xa0\xa1-valid");
   auto f_in_out_len = static_cast<int>(f.length());
   const char* f_str = convert_replace_invalid_fromUTF8_binary(
       ctx_ptr, f.data(), f_in_out_len, "", 0, &f_in_out_len);
-  EXPECT_EQ(std::string(f_str, f_in_out_len), "ok-\xa0\xa1-valid");
+  EXPECT_EQ(std::string(f_str, f_in_out_len), "ok--valid");
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
+  std::string g("\xa0\xa1-ok-\xa0\xa1-valid-\xa0\xa1");
+  auto g_in_out_len = static_cast<int>(g.length());
+  const char* g_str = convert_replace_invalid_fromUTF8_binary(
+      ctx_ptr, g.data(), g_in_out_len, "", 0, &g_in_out_len);
+  EXPECT_EQ(std::string(g_str, g_in_out_len), "-ok--valid-");
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  std::string h("\xa0\xa1-valid");
+  auto h_in_out_len = static_cast<int>(h.length());
+  const char* h_str = convert_replace_invalid_fromUTF8_binary(
+      ctx_ptr, h.data(), h_in_out_len, "", 0, &h_in_out_len);
+  EXPECT_EQ(std::string(h_str, h_in_out_len), "-valid");
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  std::string i("\xa0\xa1-valid-\xa0\xa1-valid-\xa0\xa1");
+  auto i_in_out_len = static_cast<int>(i.length());
+  const char* i_str = convert_replace_invalid_fromUTF8_binary(
+      ctx_ptr, i.data(), i_in_out_len, "", 0, &i_in_out_len);
+  EXPECT_EQ(std::string(i_str, i_in_out_len), "-valid--valid-");
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+}
+
+TEST(TestStringOps, TestRepeat) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = repeat_utf8_int32(ctx_ptr, "abc", 3, 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "abcabc");
   EXPECT_FALSE(ctx.has_error());
 
+  out_str = repeat_utf8_int32(ctx_ptr, "a", 1, 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "aaaaa");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = repeat_utf8_int32(ctx_ptr, "", 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = repeat_utf8_int32(ctx_ptr, "", -20, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = repeat_utf8_int32(ctx_ptr, "a", 1, -10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Repeat number can't be negative"));
   ctx.Reset();
 }
 
@@ -194,18 +283,197 @@ TEST(TestStringOps, TestCastBoolToVarchar) {
   EXPECT_EQ(std::string(out_str, out_len), "false");
   EXPECT_FALSE(ctx.has_error());
 
-  out_str = castVARCHAR_bool_int64(ctx_ptr, true, -3, &out_len);
+  castVARCHAR_bool_int64(ctx_ptr, true, -3, &out_len);
   EXPECT_THAT(ctx.get_error(),
               ::testing::HasSubstr("Output buffer length can't be negative"));
   ctx.Reset();
 }
 
-TEST(TestStringOps, TestCastVarhcar) {
+TEST(TestStringOps, TestCastVarcharToBool) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "true", 4), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "     true     ", 14), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "true     ", 9), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "     true", 9), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "TRUE", 4), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "TrUe", 4), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "1", 1), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "  1", 3), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "false", 5), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "false     ", 10), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "     false", 10), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "0", 1), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "0   ", 4), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "FALSE", 5), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "FaLsE", 5), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "test", 4), false);
+  EXPECT_TRUE(ctx.has_error());
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Invalid value for boolean"));
+  ctx.Reset();
+}
+
+TEST(TestStringOps, TestCastVarchar) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
   gdv_int32 out_len = 0;
 
-  const char* out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len);
+  // BINARY TESTS
+  const char* out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 1, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "a");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 6, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asd");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  // do not truncate if output length is 0
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "", 0, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 6, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "abc", 3, -1, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Output buffer length can't be negative"));
+  ctx.Reset();
+
+  std::string z("aa\xc3");
+  out_str = castVARCHAR_binary_int64(ctx_ptr, z.data(), static_cast<int>(z.length()), 2,
+                                     &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "aa");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234567812341234");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234123");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "12345678");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string f("123456781234çåå\xc3");
+  out_str = castVARCHAR_binary_int64(ctx_ptr, f.data(), static_cast<int32_t>(f.length()),
+                                     16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  // UTF8 TESTS
+  out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len);
   EXPECT_EQ(std::string(out_str, out_len), "a");
   EXPECT_FALSE(ctx.has_error());
 
@@ -255,6 +523,7 @@ TEST(TestStringOps, TestCastVarhcar) {
   EXPECT_FALSE(ctx.has_error());
 
   out_str = castVARCHAR_utf8_int64(ctx_ptr, "abc", 3, -1, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
   EXPECT_THAT(ctx.get_error(),
               ::testing::HasSubstr("Output buffer length can't be negative"));
   ctx.Reset();
@@ -317,8 +586,8 @@ TEST(TestStringOps, TestCastVarhcar) {
   EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
   EXPECT_FALSE(ctx.has_error());
 
-  std::string f("123456781234çåå\xc3");
-  out_str = castVARCHAR_utf8_int64(ctx_ptr, f.data(), static_cast<int32_t>(f.length()),
+  std::string y("123456781234çåå\xc3");
+  out_str = castVARCHAR_utf8_int64(ctx_ptr, y.data(), static_cast<int32_t>(y.length()),
                                    16, &out_len);
   EXPECT_EQ(std::string(out_str, out_len), "");
   EXPECT_THAT(ctx.get_error(),
@@ -437,6 +706,54 @@ TEST(TestStringOps, TestSubstringInvalidInputs) {
   EXPECT_FALSE(ctx.has_error());
 }
 
+TEST(TestGdvFnStubs, TestCastVarbinaryUtf8) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+  const char* input = "abc";
+  const char* out;
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 0, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 1, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "a");
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 500, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, -10, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Output buffer length can't be negative"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryBinary) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+  const char* input = "\\x41\\x42\\x43";
+  const char* out;
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 0, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 8, 8, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "\\x41\\x42");
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 500, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 12, -10, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Output buffer length can't be negative"));
+  ctx.Reset();
+}
+
 TEST(TestStringOps, TestConcat) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
@@ -568,28 +885,6 @@ TEST(TestStringOps, TestConcat) {
   EXPECT_FALSE(ctx.has_error());
 }
 
-TEST(TestStringOps, TestLower) {
-  gandiva::ExecutionContext ctx;
-  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
-  gdv_int32 out_len = 0;
-
-  const char* out_str = lower_utf8(ctx_ptr, "AsDfJ", 5, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "asdfj");
-  EXPECT_FALSE(ctx.has_error());
-
-  out_str = lower_utf8(ctx_ptr, "asdfj", 5, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "asdfj");
-  EXPECT_FALSE(ctx.has_error());
-
-  out_str = lower_utf8(ctx_ptr, "Ç††AbD", 11, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "Ç††abd");
-  EXPECT_FALSE(ctx.has_error());
-
-  out_str = lower_utf8(ctx_ptr, "", 0, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "");
-  EXPECT_FALSE(ctx.has_error());
-}
-
 TEST(TestStringOps, TestReverse) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
@@ -696,6 +991,144 @@ TEST(TestStringOps, TestLtrim) {
   EXPECT_FALSE(ctx.has_error());
 }
 
+TEST(TestStringOps, TestLpadString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  // LPAD function tests - with defined fill pad text
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "FillFillTestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "FillFTestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "FillFillFiTestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ддабвгд");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "дhello");
+
+  // LPAD function tests - with NO pad text
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "        TestString");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "     TestString");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "  абвгд");
+}
+
+TEST(TestStringOps, TestRpadString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  // RPAD function tests - with defined fill pad text
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFill");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestStringFillF");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFillFi");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгддд");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "helloд");
+
+  // RPAD function tests - with NO pad text
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString        ");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString     ");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгд  ");
+}
+
 TEST(TestStringOps, TestRtrim) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
@@ -946,6 +1379,97 @@ TEST(TestStringOps, TestLocate) {
   ctx.Reset();
 }
 
+TEST(TestStringOps, TestByteSubstr) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str;
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -6, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, -500, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 1000, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Str");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -100, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+  EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestStringOps, TestStrPos) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+  int pos;
+
+  pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
+  EXPECT_EQ(pos, 5);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
+  EXPECT_EQ(pos, 5);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "abcabc", 6, "abc", 3);
+  EXPECT_EQ(pos, 1);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "s†å†emçåå†d", 21, "çåå", 6);
+  EXPECT_EQ(pos, 7);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "†barbar", 9, "bar", 3);
+  EXPECT_EQ(pos, 2);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "", 0, "sub", 3);
+  EXPECT_EQ(pos, 0);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "str", 3, "", 0);
+  EXPECT_EQ(pos, 0);
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d(
+      "a\xff"
+      "c");
+  pos = strpos_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "c", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\ff encountered while decoding utf8 string"));
+  ctx.Reset();
+}
+
 TEST(TestStringOps, TestReplace) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
@@ -988,17 +1512,81 @@ TEST(TestStringOps, TestReplace) {
   EXPECT_EQ(std::string(out_str, out_len), "TestString");
   EXPECT_FALSE(ctx.has_error());
 
-  out_str = replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "Hell", 4, "ell", 3, "ollow", 5,
-                                                5, &out_len);
+  replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "Hell", 4, "ell", 3, "ollow", 5, 5,
+                                      &out_len);
   EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
   ctx.Reset();
 
-  out_str = replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "eeee", 4, "e", 1, "aaaa", 4, 14,
-                                                &out_len);
+  replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "eeee", 4, "e", 1, "aaaa", 4, 14,
+                                      &out_len);
   EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
   ctx.Reset();
 }
 
+TEST(TestStringOps, TestLeftString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  std::string output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "TestString");
+
+  out_str = left_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = left_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "Tes");
+
+  out_str = left_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "TestStr");
+
+  // the text length for this string is 10 (each utf8 char is represented by two bytes)
+  out_str = left_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "абв");
+}
+
+TEST(TestStringOps, TestRightString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  std::string output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "TestString");
+
+  out_str = right_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = right_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "ing");
+
+  out_str = right_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "tString");
+
+  // the text length for this string is 10 (each utf8 char is represented by two bytes)
+  out_str = right_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "вгд");
+}
+
 TEST(TestStringOps, TestBinaryString) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
@@ -1088,4 +1676,83 @@ TEST(TestStringOps, TestSplitPart) {
   EXPECT_EQ(std::string(out_str, out_len), "ååçåå");
 }
 
+TEST(TestStringOps, TestConvertTo) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  const int32_t ALL_BYTES_MATCH = 0;
+
+  int32_t integer_value = std::numeric_limits<int32_t>::max();
+  out_str = convert_toINT(ctx_ptr, integer_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(integer_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &integer_value, out_len));
+
+  int64_t big_integer_value = std::numeric_limits<int64_t>::max();
+  out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(big_integer_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &big_integer_value, out_len));
+
+  float float_value = std::numeric_limits<float>::max();
+  out_str = convert_toFLOAT(ctx_ptr, float_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(float_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &float_value, out_len));
+
+  double double_value = std::numeric_limits<double>::max();
+  out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(double_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &double_value, out_len));
+
+  const char* test_string = "test string";
+  int32_t str_len = 11;
+  out_str = convert_toUTF8(ctx_ptr, test_string, str_len, &out_len);
+  EXPECT_EQ(out_len, str_len);
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, test_string, out_len));
+}
+
+TEST(TestStringOps, TestConvertToBigEndian) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  gdv_int32 out_len_big_endian = 0;
+  const char* out_str;
+  const char* out_str_big_endian;
+
+  int64_t big_integer_value = std::numeric_limits<int64_t>::max();
+  out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
+  out_str_big_endian =
+      convert_toBIGINT_be(ctx_ptr, big_integer_value, &out_len_big_endian);
+  EXPECT_EQ(out_len_big_endian, sizeof(big_integer_value));
+  EXPECT_EQ(out_len_big_endian, out_len);
+
+#if ARROW_LITTLE_ENDIAN
+  // Checks that bytes are in reverse order
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
+  }
+#else
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[i]);
+  }
+#endif
+
+  double double_value = std::numeric_limits<double>::max();
+  out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
+  out_str_big_endian = convert_toDOUBLE_be(ctx_ptr, double_value, &out_len_big_endian);
+  EXPECT_EQ(out_len_big_endian, sizeof(double_value));
+  EXPECT_EQ(out_len_big_endian, out_len);
+
+#if ARROW_LITTLE_ENDIAN
+  // Checks that bytes are in reverse order
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
+  }
+#else
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[i]);
+  }
+#endif
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index b25769f9123..336f692267d 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -40,6 +40,19 @@ extern "C" {
   INNER(date64)           \
   INNER(timestamp)
 
+// Expand inner macro for all base numeric types.
+#define NUMERIC_TYPES(INNER) \
+  INNER(int8)                \
+  INNER(int16)               \
+  INNER(int32)               \
+  INNER(int64)               \
+  INNER(uint8)               \
+  INNER(uint16)              \
+  INNER(uint32)              \
+  INNER(uint64)              \
+  INNER(float32)             \
+  INNER(float64)
+
 // Extract millennium
 #define EXTRACT_MILLENNIUM(TYPE)                            \
   FORCE_INLINE                                              \
@@ -828,4 +841,54 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+// Convert the seconds since epoch argument to timestamp
+#define TO_TIMESTAMP(TYPE)                                      \
+  FORCE_INLINE                                                  \
+  gdv_timestamp to_timestamp##_##TYPE(gdv_##TYPE seconds) {     \
+    return static_cast<gdv_timestamp>(seconds * MILLIS_IN_SEC); \
+  }
+
+NUMERIC_TYPES(TO_TIMESTAMP)
+
+// Convert the seconds since epoch argument to time
+#define TO_TIME(TYPE)                                                     \
+  FORCE_INLINE                                                            \
+  gdv_time32 to_time##_##TYPE(gdv_##TYPE seconds) {                       \
+    EpochTimePoint tp(static_cast<int64_t>(seconds * MILLIS_IN_SEC));     \
+    return static_cast<gdv_time32>(tp.TimeOfDay().to_duration().count()); \
+  }
+
+NUMERIC_TYPES(TO_TIME)
+
+#define CAST_INT_YEAR_INTERVAL(TYPE, OUT_TYPE)                 \
+  FORCE_INLINE                                                 \
+  gdv_##OUT_TYPE TYPE##_year_interval(gdv_month_interval in) { \
+    return static_cast<gdv_##OUT_TYPE>(in / 12.0);             \
+  }
+
+CAST_INT_YEAR_INTERVAL(castBIGINT, int64)
+CAST_INT_YEAR_INTERVAL(castINT, int32)
+
+#define CAST_NULLABLE_INTERVAL_DAY(TYPE)                                \
+  FORCE_INLINE                                                          \
+  gdv_day_time_interval castNULLABLEINTERVALDAY_##TYPE(gdv_##TYPE in) { \
+    return static_cast<gdv_day_time_interval>(in);                      \
+  }
+
+CAST_NULLABLE_INTERVAL_DAY(int32)
+CAST_NULLABLE_INTERVAL_DAY(int64)
+
+#define CAST_NULLABLE_INTERVAL_YEAR(TYPE)                                              \
+  FORCE_INLINE                                                                         \
+  gdv_month_interval castNULLABLEINTERVALYEAR_##TYPE(int64_t context, gdv_##TYPE in) { \
+    gdv_month_interval value = static_cast<gdv_month_interval>(in);                    \
+    if (value != in) {                                                                 \
+      gdv_fn_context_set_error_msg(context, "Integer overflow");                       \
+    }                                                                                  \
+    return value;                                                                      \
+  }
+
+CAST_NULLABLE_INTERVAL_YEAR(int32)
+CAST_NULLABLE_INTERVAL_YEAR(int64)
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index 27db8dac464..cec3cf747c2 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -181,6 +181,36 @@ TEST(TestTime, TestExtractTime) {
   EXPECT_EQ(extractSecond_time32(time_as_millis_in_day), 33);
 }
 
+TEST(TestTime, TestTimestampDiffMonth) {
+  gdv_timestamp ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  gdv_timestamp ts2 = StringToTimestamp("2019-05-31 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -1);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-02-28 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -4);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-03-31 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -3);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-06-30 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-07-31 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-07-30 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-07-29 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0);
+}
+
 TEST(TestTime, TestExtractTimestamp) {
   gdv_timestamp ts = StringToTimestamp("1970-05-02 10:20:33");
 
@@ -743,4 +773,144 @@ TEST(TestTime, TestLastDay) {
   EXPECT_EQ(StringToTimestamp("2015-12-31 00:00:00"), out);
 }
 
+TEST(TestTime, TestToTimestamp) {
+  auto ts = StringToTimestamp("1970-01-01 00:00:00");
+  EXPECT_EQ(ts, to_timestamp_int32(0));
+  EXPECT_EQ(ts, to_timestamp_int64(0));
+  EXPECT_EQ(ts, to_timestamp_float32(0));
+  EXPECT_EQ(ts, to_timestamp_float64(0));
+
+  ts = StringToTimestamp("1970-01-01 00:00:01");
+  EXPECT_EQ(ts, to_timestamp_int32(1));
+  EXPECT_EQ(ts, to_timestamp_int64(1));
+  EXPECT_EQ(ts, to_timestamp_float32(1));
+  EXPECT_EQ(ts, to_timestamp_float64(1));
+
+  ts = StringToTimestamp("1970-01-01 00:01:00");
+  EXPECT_EQ(ts, to_timestamp_int32(60));
+  EXPECT_EQ(ts, to_timestamp_int64(60));
+  EXPECT_EQ(ts, to_timestamp_float32(60));
+  EXPECT_EQ(ts, to_timestamp_float64(60));
+
+  ts = StringToTimestamp("1970-01-01 01:00:00");
+  EXPECT_EQ(ts, to_timestamp_int32(3600));
+  EXPECT_EQ(ts, to_timestamp_int64(3600));
+  EXPECT_EQ(ts, to_timestamp_float32(3600));
+  EXPECT_EQ(ts, to_timestamp_float64(3600));
+
+  ts = StringToTimestamp("1970-01-02 00:00:00");
+  EXPECT_EQ(ts, to_timestamp_int32(86400));
+  EXPECT_EQ(ts, to_timestamp_int64(86400));
+  EXPECT_EQ(ts, to_timestamp_float32(86400));
+  EXPECT_EQ(ts, to_timestamp_float64(86400));
+
+  // tests with fractional part
+  ts = StringToTimestamp("1970-01-01 00:00:01") + 500;
+  EXPECT_EQ(ts, to_timestamp_float32(1.500f));
+  EXPECT_EQ(ts, to_timestamp_float64(1.500));
+
+  ts = StringToTimestamp("1970-01-01 00:01:01") + 600;
+  EXPECT_EQ(ts, to_timestamp_float32(61.600f));
+  EXPECT_EQ(ts, to_timestamp_float64(61.600));
+
+  ts = StringToTimestamp("1970-01-01 01:00:01") + 400;
+  EXPECT_EQ(ts, to_timestamp_float32(3601.400f));
+  EXPECT_EQ(ts, to_timestamp_float64(3601.400));
+}
+
+TEST(TestTime, TestToTimeNumeric) {
+  // input timestamp in seconds: 1970-01-01 00:00:00
+  int64_t expected_output = 0;  // 0 milliseconds
+  EXPECT_EQ(expected_output, to_time_int32(0));
+  EXPECT_EQ(expected_output, to_time_int64(0));
+  EXPECT_EQ(expected_output, to_time_float32(0.000f));
+  EXPECT_EQ(expected_output, to_time_float64(0.000));
+
+  // input timestamp in seconds: 1970-01-01 00:00:01
+  expected_output = 1000;  // 1 seconds
+  EXPECT_EQ(expected_output, to_time_int32(1));
+  EXPECT_EQ(expected_output, to_time_int64(1));
+  EXPECT_EQ(expected_output, to_time_float32(1.000f));
+  EXPECT_EQ(expected_output, to_time_float64(1.000));
+
+  // input timestamp in seconds: 1970-01-01 01:00:00
+  expected_output = 3600000;  // 3600 seconds
+  EXPECT_EQ(expected_output, to_time_int32(3600));
+  EXPECT_EQ(expected_output, to_time_int64(3600));
+  EXPECT_EQ(expected_output, to_time_float32(3600.000f));
+  EXPECT_EQ(expected_output, to_time_float64(3600.000));
+
+  // input timestamp in seconds: 1970-01-01 23:59:59
+  expected_output = 86399000;  // 86399 seconds
+  EXPECT_EQ(expected_output, to_time_int32(86399));
+  EXPECT_EQ(expected_output, to_time_int64(86399));
+  EXPECT_EQ(expected_output, to_time_float32(86399.000f));
+  EXPECT_EQ(expected_output, to_time_float64(86399.000));
+
+  // input timestamp in seconds: 2020-01-01 00:00:01
+  expected_output = 1000;  // 1 second
+  EXPECT_EQ(expected_output, to_time_int64(1577836801));
+  EXPECT_EQ(expected_output, to_time_float64(1577836801.000));
+
+  // tests with fractional part
+  // input timestamp in seconds: 1970-01-01 00:00:01.500
+  expected_output = 1500;  // 1.5 seconds
+  EXPECT_EQ(expected_output, to_time_float32(1.500f));
+  EXPECT_EQ(expected_output, to_time_float64(1.500));
+
+  // input timestamp in seconds: 1970-01-01 00:01:01.500
+  expected_output = 61500;  // 61.5 seconds
+  EXPECT_EQ(expected_output, to_time_float32(61.500f));
+  EXPECT_EQ(expected_output, to_time_float64(61.500));
+
+  // input timestamp in seconds: 1970-01-01 01:00:01.500
+  expected_output = 3601500;  // 3601.5 seconds
+  EXPECT_EQ(expected_output, to_time_float32(3601.500f));
+  EXPECT_EQ(expected_output, to_time_float64(3601.500));
+}
+
+TEST(TestTime, TestCastIntDayInterval) {
+  EXPECT_EQ(castBIGINT_daytimeinterval(10), 864000000);
+  EXPECT_EQ(castBIGINT_daytimeinterval(-100), -8640000001);
+  EXPECT_EQ(castBIGINT_daytimeinterval(-0), 0);
+}
+
+TEST(TestTime, TestCastIntYearInterval) {
+  EXPECT_EQ(castINT_year_interval(24), 2);
+  EXPECT_EQ(castINT_year_interval(-24), -2);
+  EXPECT_EQ(castINT_year_interval(-23), -1);
+
+  EXPECT_EQ(castBIGINT_year_interval(24), 2);
+  EXPECT_EQ(castBIGINT_year_interval(-24), -2);
+  EXPECT_EQ(castBIGINT_year_interval(-23), -1);
+}
+
+TEST(TestTime, TestCastNullableInterval) {
+  ExecutionContext context;
+  auto context_ptr = reinterpret_cast<int64_t>(&context);
+  // Test castNULLABLEINTERVALDAY for int and bigint
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-55), -55);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-1201), -1201);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-55), -55);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-1201), -1201);
+
+  // Test castNULLABLEINTERVALYEAR for int and bigint
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 55), 55);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1201), 1201);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 55), 55);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1201), 1201);
+  // validate overflow error when using bigint as input
+  castNULLABLEINTERVALYEAR_int64(context_ptr, INT64_MAX);
+  EXPECT_EQ(context.get_error(), "Integer overflow");
+  context.Reset();
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
index cdf9139fe2e..c17c04cc065 100644
--- a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
+++ b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
@@ -95,9 +95,9 @@ extern "C" {
     }                                                                                 \
     if (end_tm.TmMday() < start_tm.TmMday()) {                                        \
       /* case b */                                                                    \
+      months_diff += (is_last_day_of_month(end_tm) ? 1 : 0);                          \
       diff = MONTHS_TO_TIMEUNIT(months_diff - 1, N_MONTHS);                           \
-      return SIGN_ADJUST_DIFF(is_positive, diff) +                                    \
-             (is_last_day_of_month(end_tm) ? 1 : 0);                                  \
+      return SIGN_ADJUST_DIFF(is_positive, diff);                                     \
     }                                                                                 \
     gdv_int32 end_day_millis =                                                        \
         static_cast<gdv_int32>(end_tm.TmHour() * MILLIS_IN_HOUR +                     \
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 1b0f96e0ab7..f216cc98756 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -18,7 +18,9 @@
 #pragma once
 
 #include <cstdint>
+
 #include "gandiva/gdv_function_stubs.h"
+#include "gandiva/null_ops.h"
 
 // Use the same names as in arrow data types. Makes it easy to write pre-processor macros.
 using gdv_boolean = bool;
@@ -75,6 +77,8 @@ gdv_int32 hash32_buf(const gdv_uint8* buf, int len, gdv_int32 seed);
 gdv_int64 hash64(double val, gdv_int64 seed);
 gdv_int64 hash64_buf(const gdv_uint8* buf, int len, gdv_int64 seed);
 
+gdv_int32 timestampdiffMonth_timestamp_timestamp(gdv_timestamp, gdv_timestamp);
+
 gdv_int64 timestampaddSecond_int32_timestamp(gdv_int32, gdv_timestamp);
 gdv_int64 timestampaddMinute_int32_timestamp(gdv_int32, gdv_timestamp);
 gdv_int64 timestampaddHour_int32_timestamp(gdv_int32, gdv_timestamp);
@@ -99,10 +103,22 @@ gdv_int64 add_int32_timestamp(gdv_int32, gdv_timestamp);
 gdv_int64 date_add_int64_timestamp(gdv_int64, gdv_timestamp);
 gdv_timestamp add_date64_int64(gdv_date64, gdv_int64);
 
+gdv_timestamp to_timestamp_int32(gdv_int32);
+gdv_timestamp to_timestamp_int64(gdv_int64);
+gdv_timestamp to_timestamp_float32(gdv_float32);
+gdv_timestamp to_timestamp_float64(gdv_float64);
+
+gdv_time32 to_time_int32(gdv_int32);
+gdv_time32 to_time_int64(gdv_int64);
+gdv_time32 to_time_float32(gdv_float32);
+gdv_time32 to_time_float64(gdv_float64);
+
 gdv_int64 date_sub_timestamp_int32(gdv_timestamp, gdv_int32);
 gdv_int64 subtract_timestamp_int32(gdv_timestamp, gdv_int32);
 gdv_int64 date_diff_timestamp_int64(gdv_timestamp, gdv_int64);
 
+gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len);
+
 bool is_distinct_from_timestamp_timestamp(gdv_int64, bool, gdv_int64, bool);
 bool is_not_distinct_from_int32_int32(gdv_int32, bool, gdv_int32, bool);
 
@@ -143,6 +159,9 @@ gdv_int32 round_int32(gdv_int32);
 gdv_int64 round_int64(gdv_int64);
 gdv_int64 get_power_of_10(gdv_int32);
 
+const char* bin_int32(int64_t context, gdv_int32 value, int32_t* out_len);
+const char* bin_int64(int64_t context, gdv_int64 value, int32_t* out_len);
+
 gdv_float64 cbrt_int32(gdv_int32);
 gdv_float64 cbrt_int64(gdv_int64);
 gdv_float64 cbrt_float32(gdv_float32);
@@ -257,6 +276,9 @@ gdv_date64 last_day_from_timestamp(gdv_date64 millis);
 
 gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale);
 
+const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
+                              gdv_int32 repeat_times, gdv_int32* out_len);
+
 const char* substr_utf8_int64_int64(gdv_int64 context, const char* input,
                                     gdv_int32 in_len, gdv_int64 offset64,
                                     gdv_int64 length, gdv_int32* out_len);
@@ -277,6 +299,8 @@ const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
                                        gdv_int32 in3_len, bool in3_validity,
                                        const char* in4, gdv_int32 in4_len,
                                        bool in4_validity, gdv_int32* out_len);
+const char* space_int32(gdv_int64 ctx, gdv_int32 n, int32_t* out_len);
+const char* space_int64(gdv_int64 ctx, gdv_int64 n, int32_t* out_len);
 const char* concat_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
@@ -369,12 +393,21 @@ const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, const char* in10,
     gdv_int32 in10_len, gdv_int32* out_len);
 
+const char* castVARCHAR_binary_int64(gdv_int64 context, const char* data,
+                                     gdv_int32 data_len, int64_t out_len,
+                                     int32_t* out_length);
+
 const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data,
                                    gdv_int32 data_len, int64_t out_len,
                                    int32_t* out_length);
 
-const char* lower_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
-                       int32_t* out_length);
+const char* castVARBINARY_utf8_int64(gdv_int64 context, const char* data,
+                                     gdv_int32 data_len, int64_t out_len,
+                                     int32_t* out_length);
+
+const char* castVARBINARY_binary_int64(gdv_int64 context, const char* data,
+                                       gdv_int32 data_len, int64_t out_len,
+                                       int32_t* out_length);
 
 const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
                          int32_t* out_len);
@@ -400,13 +433,32 @@ const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
                             gdv_int32 basetext_len, const char* trimtext,
                             gdv_int32 trimtext_len, int32_t* out_len);
 
+gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len);
+
 gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
                            const char* str, gdv_int32 str_len);
 
+gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len,
+                           const char* sub_str, gdv_int32 sub_str_len);
+
 gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
                                  gdv_int32 sub_str_len, const char* str,
                                  gdv_int32 str_len, gdv_int32 start_pos);
 
+const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len);
+
+const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len);
+
+const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len);
+
+const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len);
+
 const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,
                                                 gdv_int32 text_len, const char* from_str,
                                                 gdv_int32 from_str_len,
@@ -424,10 +476,48 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
                                                     int32_t char_to_replace_len,
                                                     int32_t* out_len);
 
+const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len);
+
+const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len);
+
+const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len);
+
+const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len);
+
+const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len);
+
+const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len);
+
+const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len);
+
+const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp,
+                                      int32_t* out_len);
+const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp,
+                                         int32_t* out_len);
+
+const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len);
+
+const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len);
+
+const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len,
+                           int32_t* out_len);
+
 const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
                        const char* splitter, gdv_int32 split_len, gdv_int32 index,
                        gdv_int32* out_len);
 
+const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
+                                           gdv_int32 text_len, gdv_int32 offset,
+                                           gdv_int32 length, gdv_int32* out_len);
+
 const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
                                    gdv_int64 out_len, gdv_int32* out_length);
 
@@ -443,6 +533,12 @@ const char* castVARCHAR_float32_int64(int64_t context, float value, int64_t len,
 const char* castVARCHAR_float64_int64(int64_t context, double value, int64_t len,
                                       int32_t* out_len);
 
+const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 number, gdv_int32* out_len);
+
+const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                             gdv_int32 number, gdv_int32* out_len);
+
 const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len,
                           gdv_int32* out_len);
 
@@ -454,4 +550,26 @@ float castFLOAT4_utf8(int64_t context, const char* data, int32_t len);
 
 double castFLOAT8_utf8(int64_t context, const char* data, int32_t len);
 
+int32_t castINT_float32(gdv_float32 value);
+
+int32_t castINT_float64(gdv_float64 value);
+
+int64_t castBIGINT_float32(gdv_float32 value);
+
+int64_t castBIGINT_float64(gdv_float64 value);
+
+int64_t castBIGINT_daytimeinterval(gdv_day_time_interval in);
+
+int32_t castINT_year_interval(gdv_month_interval in);
+
+int64_t castBIGINT_year_interval(gdv_month_interval in);
+
+gdv_day_time_interval castNULLABLEINTERVALDAY_int32(gdv_int32 in);
+
+gdv_day_time_interval castNULLABLEINTERVALDAY_int64(gdv_int64 in);
+
+gdv_month_interval castNULLABLEINTERVALYEAR_int32(int64_t context, gdv_int32 in);
+
+gdv_month_interval castNULLABLEINTERVALYEAR_int64(int64_t context, gdv_int64 in);
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index 734720c64c9..dbdd746d671 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -24,7 +24,6 @@
 
 #include "arrow/util/hash_util.h"
 #include "arrow/util/logging.h"
-
 #include "gandiva/cache.h"
 #include "gandiva/expr_validator.h"
 #include "gandiva/llvm_generator.h"
@@ -174,7 +173,13 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
     ARROW_RETURN_NOT_OK(expr_validator.Validate(expr));
   }
 
+  // Start measuring build time
+  auto begin = std::chrono::high_resolution_clock::now();
   ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs, selection_vector_mode));
+  // Stop measuring time and calculate the elapsed time
+  auto end = std::chrono::high_resolution_clock::now();
+  auto elapsed =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
 
   // save the output field types. Used for validation at Evaluate() time.
   std::vector<FieldPtr> output_fields;
@@ -186,7 +191,8 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
   // Instantiate the projector with the completely built llvm generator
   *projector = std::shared_ptr<Projector>(
       new Projector(std::move(llvm_gen), schema, output_fields, configuration));
-  cache.PutModule(cache_key, *projector);
+  ValueCacheObject<std::shared_ptr<Projector>> value_cache(*projector, elapsed);
+  cache.PutModule(cache_key, value_cache);
 
   return Status::OK();
 }
@@ -289,6 +295,8 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
   } else if (arrow::is_binary_like(type_id)) {
     // we don't know the expected size for varlen output vectors.
     data_len = 0;
+  } else if (type_id == arrow::Type::NA) {
+    data_len = 0;
   } else {
     return Status::Invalid("Unsupported output data type " + type->ToString());
   }
@@ -301,7 +309,11 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
   }
   buffers.push_back(std::move(data_buffer));
 
-  *array_data = arrow::ArrayData::Make(type, num_records, std::move(buffers));
+  if (type_id == arrow::Type::NA) {
+    *array_data = arrow::ArrayData::Make(type, num_records, {nullptr});
+  } else {
+    *array_data = arrow::ArrayData::Make(type, num_records, std::move(buffers));
+  }
   return Status::OK();
 }
 
@@ -350,6 +362,10 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data,
     int64_t data_len = array_data.buffers[1]->capacity();
     ARROW_RETURN_IF(data_len < min_data_len,
                     Status::Invalid("Data buffer too small for ", field.name()));
+  } else if (type_id == arrow::Type::NA) {
+    ARROW_RETURN_IF(array_data.buffers.size() == 1 && array_data.buffers[0] == nullptr,
+                    Status::Invalid("Data buffer should be nullptr for null typed field",
+                                    field.name()));
   } else {
     return Status::Invalid("Unsupported output data type " + field.type()->ToString());
   }
diff --git a/cpp/src/gandiva/proto/Types.proto b/cpp/src/gandiva/proto/Types.proto
index 7c0c49f2d85..eb0d996b92e 100644
--- a/cpp/src/gandiva/proto/Types.proto
+++ b/cpp/src/gandiva/proto/Types.proto
@@ -222,6 +222,8 @@ message InNode {
   optional StringConstants stringValues = 4;
   optional BinaryConstants binaryValues = 5;
   optional DecimalConstants decimalValues = 6;
+  optional FloatConstants floatValues = 7;
+  optional DoubleConstants doubleValues = 8;
 }
 
 message IntConstants {
@@ -236,6 +238,14 @@ message DecimalConstants {
   repeated DecimalNode decimalValues = 1;
 }
 
+message FloatConstants {
+  repeated FloatNode floatValues = 1;
+}
+
+message DoubleConstants {
+  repeated DoubleNode doubleValues = 1;
+}
+
 message StringConstants {
   repeated StringNode stringValues = 1;
 }
diff --git a/cpp/src/gandiva/replace_holder.cc b/cpp/src/gandiva/replace_holder.cc
new file mode 100644
index 00000000000..8b42b585f9c
--- /dev/null
+++ b/cpp/src/gandiva/replace_holder.cc
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/replace_holder.h"
+
+#include "gandiva/node.h"
+#include "gandiva/regex_util.h"
+
+namespace gandiva {
+
+static bool IsArrowStringLiteral(arrow::Type::type type) {
+  return type == arrow::Type::STRING || type == arrow::Type::BINARY;
+}
+
+Status ReplaceHolder::Make(const FunctionNode& node,
+                           std::shared_ptr<ReplaceHolder>* holder) {
+  ARROW_RETURN_IF(node.children().size() != 3,
+                  Status::Invalid("'replace' function requires three parameters"));
+
+  auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+  ARROW_RETURN_IF(
+      literal == nullptr,
+      Status::Invalid("'replace' function requires a literal as the second parameter"));
+
+  auto literal_type = literal->return_type()->id();
+  ARROW_RETURN_IF(
+      !IsArrowStringLiteral(literal_type),
+      Status::Invalid(
+          "'replace' function requires a string literal as the second parameter"));
+
+  return Make(arrow::util::get<std::string>(literal->holder()), holder);
+}
+
+Status ReplaceHolder::Make(const std::string& sql_pattern,
+                           std::shared_ptr<ReplaceHolder>* holder) {
+  auto lholder = std::shared_ptr<ReplaceHolder>(new ReplaceHolder(sql_pattern));
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", sql_pattern, "' failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
+
+void ReplaceHolder::return_error(ExecutionContext* context, std::string& data,
+                                 std::string& replace_string) {
+  std::string err_msg = "Error replacing '" + replace_string + "' on the given string '" +
+                        data + "' for the given pattern: " + pattern_;
+  context->set_error_msg(err_msg.c_str());
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/replace_holder.h b/cpp/src/gandiva/replace_holder.h
new file mode 100644
index 00000000000..79150d7aa4d
--- /dev/null
+++ b/cpp/src/gandiva/replace_holder.h
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <re2/re2.h>
+
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "gandiva/execution_context.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+namespace gandiva {
+
+/// Function Holder for 'replace'
+class GANDIVA_EXPORT ReplaceHolder : public FunctionHolder {
+ public:
+  ~ReplaceHolder() override = default;
+
+  static Status Make(const FunctionNode& node, std::shared_ptr<ReplaceHolder>* holder);
+
+  static Status Make(const std::string& sql_pattern,
+                     std::shared_ptr<ReplaceHolder>* holder);
+
+  /// Return a new string with the pattern that matched the regex replaced for
+  /// the replace_input parameter.
+  const char* operator()(ExecutionContext* ctx, const char* user_input,
+                         int32_t user_input_len, const char* replace_input,
+                         int32_t replace_input_len, int32_t* out_length) {
+    std::string user_input_as_str(user_input, user_input_len);
+    std::string replace_input_as_str(replace_input, replace_input_len);
+
+    int32_t total_replaces =
+        RE2::GlobalReplace(&user_input_as_str, regex_, replace_input_as_str);
+
+    if (total_replaces < 0) {
+      return_error(ctx, user_input_as_str, replace_input_as_str);
+      *out_length = 0;
+      return "";
+    }
+
+    if (total_replaces == 0) {
+      *out_length = user_input_len;
+      return user_input;
+    }
+
+    *out_length = static_cast<int32_t>(user_input_as_str.size());
+
+    // This condition treats the case where the whole string is replaced by an empty
+    // string
+    if (*out_length == 0) {
+      return "";
+    }
+
+    char* result_buffer = reinterpret_cast<char*>(ctx->arena()->Allocate(*out_length));
+
+    if (result_buffer == NULLPTR) {
+      ctx->set_error_msg("Could not allocate memory for result");
+      *out_length = 0;
+      return "";
+    }
+
+    memcpy(result_buffer, user_input_as_str.data(), *out_length);
+
+    return result_buffer;
+  }
+
+ private:
+  explicit ReplaceHolder(const std::string& pattern)
+      : pattern_(pattern), regex_(pattern) {}
+
+  void return_error(ExecutionContext* context, std::string& data,
+                    std::string& replace_string);
+
+  std::string pattern_;  // posix pattern string, to help debugging
+  RE2 regex_;            // compiled regex for the pattern
+};
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/replace_holder_test.cc b/cpp/src/gandiva/replace_holder_test.cc
new file mode 100644
index 00000000000..b0830d4f004
--- /dev/null
+++ b/cpp/src/gandiva/replace_holder_test.cc
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/replace_holder.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <vector>
+
+namespace gandiva {
+
+class TestReplaceHolder : public ::testing::Test {
+ protected:
+  ExecutionContext execution_context_;
+};
+
+TEST_F(TestReplaceHolder, TestMultipleReplace) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("ana", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "banana";
+  std::string replace_string;
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_str(ret, out_length);
+  EXPECT_EQ(out_length, 3);
+  EXPECT_EQ(ret_as_str, "bna");
+
+  input_string = "bananaana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 3);
+  EXPECT_EQ(ret_as_str, "bna");
+
+  input_string = "bananana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 2);
+  EXPECT_EQ(ret_as_str, "bn");
+
+  input_string = "anaana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 0);
+  EXPECT_FALSE(execution_context_.has_error());
+  EXPECT_EQ(ret_as_str, "");
+}
+
+TEST_F(TestReplaceHolder, TestNoMatchPattern) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("ana", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "apple";
+  std::string replace_string;
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_string(ret, out_length);
+  EXPECT_EQ(out_length, 5);
+  EXPECT_EQ(ret_as_string, "apple");
+}
+
+TEST_F(TestReplaceHolder, TestReplaceSameSize) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("a", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "ananindeua";
+  std::string replace_string = "b";
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_string(ret, out_length);
+  EXPECT_EQ(out_length, 10);
+  EXPECT_EQ(ret_as_string, "bnbnindeub");
+}
+
+TEST_F(TestReplaceHolder, TestReplaceInvalidPattern) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("+", &replace_holder);
+  EXPECT_EQ(status.ok(), false) << status.message();
+
+  execution_context_.Reset();
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt
index 5fa2da16c63..a57085c589e 100644
--- a/cpp/src/gandiva/tests/CMakeLists.txt
+++ b/cpp/src/gandiva/tests/CMakeLists.txt
@@ -25,6 +25,7 @@ add_gandiva_test(binary_test)
 add_gandiva_test(date_time_test)
 add_gandiva_test(to_string_test)
 add_gandiva_test(utf8_test)
+add_gandiva_test(null_test)
 add_gandiva_test(hash_test)
 add_gandiva_test(in_expr_test)
 add_gandiva_test(null_validity_test)
diff --git a/cpp/src/gandiva/tests/binary_test.cc b/cpp/src/gandiva/tests/binary_test.cc
index 6ac3c515519..591c5befcbd 100644
--- a/cpp/src/gandiva/tests/binary_test.cc
+++ b/cpp/src/gandiva/tests/binary_test.cc
@@ -16,9 +16,10 @@
 // under the License.
 
 #include <gtest/gtest.h>
+
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
-
+#include "gandiva/node.h"
 #include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tree_expr_builder.h"
@@ -86,4 +87,50 @@ TEST_F(TestBinary, TestSimple) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestBinary, TestIfElse) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::binary());
+  auto field1 = field("f1", arrow::binary());
+
+  auto schema = arrow::schema({field0, field1});
+
+  auto f0 = TreeExprBuilder::MakeField(field0);
+  auto f1 = TreeExprBuilder::MakeField(field1);
+
+  // output fields
+  auto field_result = field("out", arrow::binary());
+
+  // Build expression
+  auto cond = TreeExprBuilder::MakeFunction("isnotnull", {f0}, arrow::boolean());
+  auto ifexpr = TreeExprBuilder::MakeIf(cond, f0, f1, arrow::binary());
+  auto expr = TreeExprBuilder::MakeExpression(ifexpr, field_result);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_f0 =
+      MakeArrowArrayBinary({"foo", "hello", "hi", "bye"}, {true, true, true, false});
+  auto array_f1 =
+      MakeArrowArrayBinary({"fe", "fi", "fo", "fum"}, {true, true, true, true});
+
+  // expected output
+  auto exp =
+      MakeArrowArrayBinary({"foo", "hello", "hi", "fum"}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_f0, array_f1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/decimal_test.cc b/cpp/src/gandiva/tests/decimal_test.cc
index 51e9dcb3c87..31f2dedf5c8 100644
--- a/cpp/src/gandiva/tests/decimal_test.cc
+++ b/cpp/src/gandiva/tests/decimal_test.cc
@@ -1012,7 +1012,7 @@ TEST_F(TestDecimal, TestCastDecimalVarCharInvalidInput) {
   arrow::ArrayVector outputs_1;
   status = projector->Evaluate(*in_batch_1, pool_, &outputs_1);
   EXPECT_FALSE(status.ok()) << status.message();
-  EXPECT_TRUE(status.message().find("not a valid decimal number") != std::string::npos);
+  EXPECT_NE(status.message().find("not a valid decimal128 number"), std::string::npos);
 }
 
 TEST_F(TestDecimal, TestVarCharDecimalNestedCast) {
diff --git a/cpp/src/gandiva/tests/hash_test.cc b/cpp/src/gandiva/tests/hash_test.cc
index 9f4fff8c25b..40ebc50a271 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <gtest/gtest.h>
+
 #include <sstream>
 
-#include <gtest/gtest.h>
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
-
 #include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tree_expr_builder.h"
@@ -428,4 +428,188 @@ TEST_F(TestHash, TestSha1Varlen) {
     EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString());
   }
 }
+
+TEST_F(TestHash, TestSha1FunctionsAlias) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto field_b = field("c", int64());
+  auto field_c = field("e", float64());
+  auto schema = arrow::schema({field_a, field_b, field_c});
+
+  // output fields
+  auto res_0 = field("res0", utf8());
+  auto res_0_sha1 = field("res0sha1", utf8());
+  auto res_0_sha = field("res0sha", utf8());
+
+  auto res_1 = field("res1", utf8());
+  auto res_1_sha1 = field("res1sha1", utf8());
+  auto res_1_sha = field("res1sha", utf8());
+
+  auto res_2 = field("res2", utf8());
+  auto res_2_sha1 = field("res2_sha1", utf8());
+  auto res_2_sha = field("res2_sha", utf8());
+
+  // build expressions.
+  // hashSHA1(a)
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8());
+  auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0);
+  auto sha1 = TreeExprBuilder::MakeFunction("sha1", {node_a}, utf8());
+  auto expr_0_sha1 = TreeExprBuilder::MakeExpression(sha1, res_0_sha1);
+  auto sha = TreeExprBuilder::MakeFunction("sha", {node_a}, utf8());
+  auto expr_0_sha = TreeExprBuilder::MakeExpression(sha, res_0_sha);
+
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8());
+  auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_1, res_1);
+  auto sha1_1 = TreeExprBuilder::MakeFunction("sha1", {node_b}, utf8());
+  auto expr_1_sha1 = TreeExprBuilder::MakeExpression(sha1_1, res_1_sha1);
+  auto sha_1 = TreeExprBuilder::MakeFunction("sha", {node_b}, utf8());
+  auto expr_1_sha = TreeExprBuilder::MakeExpression(sha_1, res_1_sha);
+
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8());
+  auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_2, res_2);
+  auto sha1_2 = TreeExprBuilder::MakeFunction("sha1", {node_c}, utf8());
+  auto expr_2_sha1 = TreeExprBuilder::MakeExpression(sha1_2, res_2_sha1);
+  auto sha_2 = TreeExprBuilder::MakeFunction("sha", {node_c}, utf8());
+  auto expr_2_sha = TreeExprBuilder::MakeExpression(sha_2, res_2_sha);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema,
+                                {expr_0, expr_0_sha, expr_0_sha1, expr_1, expr_1_sha,
+                                 expr_1_sha1, expr_2, expr_2_sha, expr_2_sha1},
+                                TestConfiguration(), &projector);
+  ASSERT_OK(status) << status.message();
+
+  // Create a row-batch with some sample data
+  int32_t num_records = 3;
+
+  std::string first_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ]";
+  std::string second_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+  auto array_utf8 =
+      MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
+
+  auto validity_array = {false, true, true};
+
+  auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+  auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+                                           {array_utf8, array_int64, array_float64});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
+  // field of utf8 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1));  // hashSha1 and sha
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(1), outputs.at(2));  // sha and sha1
+
+  // Checks that the response for the hashSHA1, sha and sha1 are equals for the second
+  // field of int64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(3), outputs.at(4));  // hashSha1 and sha
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4), outputs.at(5));  // sha and sha1
+
+  // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
+  // field of float64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(6), outputs.at(7));  // hashSha1 and sha responses
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(7), outputs.at(8));  // sha and sha1 responses
+}
+
+TEST_F(TestHash, TestSha256FunctionsAlias) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto field_b = field("c", int64());
+  auto field_c = field("e", float64());
+  auto schema = arrow::schema({field_a, field_b, field_c});
+
+  // output fields
+  auto res_0 = field("res0", utf8());
+  auto res_0_sha256 = field("res0sha256", utf8());
+
+  auto res_1 = field("res1", utf8());
+  auto res_1_sha256 = field("res1sha256", utf8());
+
+  auto res_2 = field("res2", utf8());
+  auto res_2_sha256 = field("res2_sha256", utf8());
+
+  // build expressions.
+  // hashSHA1(a)
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto hashSha2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8());
+  auto expr_0 = TreeExprBuilder::MakeExpression(hashSha2, res_0);
+  auto sha256 = TreeExprBuilder::MakeFunction("sha256", {node_a}, utf8());
+  auto expr_0_sha256 = TreeExprBuilder::MakeExpression(sha256, res_0_sha256);
+
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto hashSha2_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8());
+  auto expr_1 = TreeExprBuilder::MakeExpression(hashSha2_1, res_1);
+  auto sha256_1 = TreeExprBuilder::MakeFunction("sha256", {node_b}, utf8());
+  auto expr_1_sha256 = TreeExprBuilder::MakeExpression(sha256_1, res_1_sha256);
+
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto hashSha2_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8());
+  auto expr_2 = TreeExprBuilder::MakeExpression(hashSha2_2, res_2);
+  auto sha256_2 = TreeExprBuilder::MakeFunction("sha256", {node_c}, utf8());
+  auto expr_2_sha256 = TreeExprBuilder::MakeExpression(sha256_2, res_2_sha256);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(
+      schema, {expr_0, expr_0_sha256, expr_1, expr_1_sha256, expr_2, expr_2_sha256},
+      TestConfiguration(), &projector);
+  ASSERT_OK(status) << status.message();
+
+  // Create a row-batch with some sample data
+  int32_t num_records = 3;
+
+  std::string first_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ]";
+  std::string second_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+  auto array_utf8 =
+      MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
+
+  auto validity_array = {false, true, true};
+
+  auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+  auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+                                           {array_utf8, array_int64, array_float64});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
+  // field of utf8 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1));  // hashSha2 and sha256
+
+  // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the second
+  // field of int64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3));  // hashSha2 and sha256
+
+  // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
+  // field of float64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4),
+                            outputs.at(5));  // hashSha2 and sha256 responses
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/in_expr_test.cc b/cpp/src/gandiva/tests/in_expr_test.cc
index 6a31b1cf4ef..fc1a8a71b9c 100644
--- a/cpp/src/gandiva/tests/in_expr_test.cc
+++ b/cpp/src/gandiva/tests/in_expr_test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <gtest/gtest.h>
+#include <cmath>
 
 #include "arrow/memory_pool.h"
 #include "gandiva/filter.h"
@@ -26,6 +27,7 @@ namespace gandiva {
 
 using arrow::boolean;
 using arrow::float32;
+using arrow::float64;
 using arrow::int32;
 
 class TestIn : public ::testing::Test {
@@ -91,6 +93,86 @@ TEST_F(TestIn, TestInSimple) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
 }
 
+TEST_F(TestIn, TestInFloat) {
+  // schema for input fields
+  auto field0 = field("f0", float32());
+  auto schema = arrow::schema({field0});
+
+  // Build In f0 + f1 in (6, 11)
+  auto node_f0 = TreeExprBuilder::MakeField(field0);
+
+  std::unordered_set<float> in_constants({6.5f, 12.0f, 11.5f});
+  auto in_expr = TreeExprBuilder::MakeInExpressionFloat(node_f0, in_constants);
+  auto condition = TreeExprBuilder::MakeCondition(in_expr);
+
+  std::shared_ptr<Filter> filter;
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array0 =
+      MakeArrowArrayFloat32({6.5f, 11.5f, 4, 3.15f, 6}, {true, true, false, true, true});
+  // expected output (indices for which condition matches)
+  auto exp = MakeArrowArrayUint16({0, 1});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  std::shared_ptr<SelectionVector> selection_vector;
+  status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Evaluate expression
+  status = filter->Evaluate(*in_batch, selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+}
+
+TEST_F(TestIn, TestInDouble) {
+  // schema for input fields
+  auto field0 = field("double0", float64());
+  auto field1 = field("double1", float64());
+  auto schema = arrow::schema({field0, field1});
+
+  auto node_f0 = TreeExprBuilder::MakeField(field0);
+  auto node_f1 = TreeExprBuilder::MakeField(field1);
+  auto sum_func =
+      TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::float64());
+  std::unordered_set<double> in_constants({3.14159265359, 15.5555555});
+  auto in_expr = TreeExprBuilder::MakeInExpressionDouble(sum_func, in_constants);
+  auto condition = TreeExprBuilder::MakeCondition(in_expr);
+
+  std::shared_ptr<Filter> filter;
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array0 = MakeArrowArrayFloat64({1, 2, 3, 4, 11}, {true, true, true, false, false});
+  auto array1 = MakeArrowArrayFloat64({5, 9, 0.14159265359, 17, 4.5555555},
+                                      {true, true, true, true, true});
+
+  // expected output (indices for which condition matches)
+  auto exp = MakeArrowArrayUint16({2});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  std::shared_ptr<SelectionVector> selection_vector;
+  status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Evaluate expression
+  status = filter->Evaluate(*in_batch, selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+}
+
 TEST_F(TestIn, TestInDecimal) {
   int32_t precision = 38;
   int32_t scale = 5;
diff --git a/cpp/src/gandiva/tests/null_test.cc b/cpp/src/gandiva/tests/null_test.cc
new file mode 100644
index 00000000000..db67117c3ef
--- /dev/null
+++ b/cpp/src/gandiva/tests/null_test.cc
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "gandiva/projector.h"
+#include "gandiva/tests/test_util.h"
+#include "gandiva/tree_expr_builder.h"
+
+namespace gandiva {
+
+using arrow::boolean;
+using arrow::null;
+
+class TestNull : public ::testing::Test {
+ public:
+  void SetUp() { pool_ = arrow::default_memory_pool(); }
+
+ protected:
+  arrow::MemoryPool* pool_;
+};
+
+TEST_F(TestNull, TestSimple) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  auto literal_null = TreeExprBuilder::MakeNull(arrow::null());
+  auto node_field_null = TreeExprBuilder::MakeField(field_null);
+
+  // output fields
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+  auto expr_1 = TreeExprBuilder::MakeExpression(literal_null, res_1);
+  auto expr_2 = TreeExprBuilder::MakeExpression(node_field_null, res_2);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {expr_1, expr_2}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto null_array = std::make_shared<arrow::NullArray>(4);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(null_array, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(null_array, outputs.at(1));
+}
+
+TEST_F(TestNull, TestOps) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  // output fields
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+  auto res_3 = field("res3", null());
+  auto res_4 = field("res4", null());
+  auto res_5 = field("res5", null());
+  auto res_6 = field("res6", null());
+  auto res_7 = field("res7", boolean());
+  auto res_8 = field("res8", boolean());
+  auto expr_1 = TreeExprBuilder::MakeExpression("equal", {field_null, field_null}, res_1);
+  auto expr_2 =
+      TreeExprBuilder::MakeExpression("not_equal", {field_null, field_null}, res_2);
+  auto expr_3 =
+      TreeExprBuilder::MakeExpression("less_than", {field_null, field_null}, res_3);
+  auto expr_4 = TreeExprBuilder::MakeExpression("less_than_or_equal_to",
+                                                {field_null, field_null}, res_4);
+  auto expr_5 =
+      TreeExprBuilder::MakeExpression("greater_than", {field_null, field_null}, res_5);
+  auto expr_6 = TreeExprBuilder::MakeExpression("greater_than_or_equal_to",
+                                                {field_null, field_null}, res_6);
+  auto expr_7 = TreeExprBuilder::MakeExpression("isnull", {field_null}, res_7);
+  auto expr_8 = TreeExprBuilder::MakeExpression("isnotnull", {field_null}, res_8);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(
+      schema, {expr_1, expr_2, expr_3, expr_4, expr_5, expr_6, expr_7, expr_8},
+      TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto null_array = std::make_shared<arrow::NullArray>(4);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  auto exp_true = MakeArrowArrayBool({true, true, true, true}, {true, true, true, true});
+  auto exp_false =
+      MakeArrowArrayBool({false, false, false, false}, {true, true, true, true});
+  for (int i = 0; i < 6; i++) {
+    EXPECT_EQ(outputs.at(i)->null_count(), 4);
+  }
+  EXPECT_ARROW_ARRAY_EQUALS(exp_true, outputs.at(6));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(7));
+}
+
+TEST_F(TestNull, TestMakeIf) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  // output fields
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+
+  auto null_node = TreeExprBuilder::MakeNull(null());
+  auto expr_1 = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeIf(TreeExprBuilder::MakeLiteral(true), null_node, null_node,
+                              null()),
+      res_1);
+  auto expr_2 = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeIf(TreeExprBuilder::MakeLiteral(false), null_node, null_node,
+                              null()),
+      res_2);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {expr_1, expr_2}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto null_array = std::make_shared<arrow::NullArray>(4);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  for (auto& output : outputs) {
+    EXPECT_EQ(output->null_count(), 4);
+  }
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index b63af40d359..12020777309 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -818,6 +818,154 @@ TEST_F(TestProjector, TestConcat) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_concat, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestBase64) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::binary());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_base = field("base64", arrow::utf8());
+
+  // Build expression
+  auto base_expr = TreeExprBuilder::MakeExpression("base64", {field0}, field_base);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {base_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array0 =
+      MakeArrowArrayBinary({"hello", "", "test", "hive"}, {true, true, true, true});
+  // expected output
+  auto exp_base = MakeArrowArrayUtf8({"aGVsbG8=", "", "dGVzdA==", "aGl2ZQ=="},
+                                     {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_base, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestUnbase64) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_base = field("base64", arrow::binary());
+
+  // Build expression
+  auto base_expr = TreeExprBuilder::MakeExpression("unbase64", {field0}, field_base);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {base_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array0 = MakeArrowArrayUtf8({"aGVsbG8=", "", "dGVzdA==", "aGl2ZQ=="},
+                                   {true, true, true, true});
+  // expected output
+  auto exp_unbase =
+      MakeArrowArrayBinary({"hello", "", "test", "hive"}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_unbase, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestLeftString) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_concat = field("left", arrow::utf8());
+
+  // Build expression
+  auto concat_expr =
+      TreeExprBuilder::MakeExpression("left", {field0, field1}, field_concat);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {concat_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayUtf8({"ab", "", "ab", "invalid", "valid", "invalid"},
+                                   {true, true, true, true, true, true});
+  auto array1 =
+      MakeArrowArrayInt32({1, 500, 2, -5, 5, 0}, {true, true, true, true, true, true});
+  // expected output
+  auto exp_left = MakeArrowArrayUtf8({"a", "", "ab", "in", "valid", ""},
+                                     {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_left, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestRightString) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_concat = field("right", arrow::utf8());
+
+  // Build expression
+  auto concat_expr =
+      TreeExprBuilder::MakeExpression("right", {field0, field1}, field_concat);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {concat_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayUtf8({"ab", "", "ab", "invalid", "valid", "invalid"},
+                                   {true, true, true, true, true, true});
+  auto array1 =
+      MakeArrowArrayInt32({1, 500, 2, -5, 5, 0}, {true, true, true, true, true, true});
+  // expected output
+  auto exp_left = MakeArrowArrayUtf8({"b", "", "ab", "id", "valid", ""},
+                                     {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_left, outputs.at(0));
+}
+
 TEST_F(TestProjector, TestOffset) {
   // schema for input fields
   auto field0 = field("f0", arrow::int32());
@@ -855,6 +1003,49 @@ TEST_F(TestProjector, TestOffset) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestByteSubString) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::binary());
+  auto field1 = field("f1", arrow::int32());
+  auto field2 = field("f2", arrow::int32());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto field_byte_substr = field("bytesubstring", arrow::binary());
+
+  // Build expression
+  auto byte_substr_expr = TreeExprBuilder::MakeExpression(
+      "bytesubstring", {field0, field1, field2}, field_byte_substr);
+
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {byte_substr_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayBinary({"ab", "", "ab", "invalid", "valid", "invalid"},
+                                     {true, true, true, true, true, true});
+  auto array1 =
+      MakeArrowArrayInt32({0, 1, 1, 1, 3, 3}, {true, true, true, true, true, true});
+  auto array2 =
+      MakeArrowArrayInt32({0, 1, 1, 2, 3, 3}, {true, true, true, true, true, true});
+  // expected output
+  auto exp_byte_substr = MakeArrowArrayBinary({"", "", "a", "in", "lid", "val"},
+                                              {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_byte_substr, outputs.at(0));
+}
+
 // Test to ensure behaviour of cast functions when the validity is false for an input. The
 // function should not run for that input.
 TEST_F(TestProjector, TestCastFunction) {
@@ -908,6 +1099,93 @@ TEST_F(TestProjector, TestCastFunction) {
   EXPECT_ARROW_ARRAY_EQUALS(out_int8, outputs.at(3));
 }
 
+TEST_F(TestProjector, TestCastBitFunction) {
+  auto field0 = field("f0", arrow::utf8());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto res_bit = field("res_bit", arrow::boolean());
+
+  // Build expression
+  auto cast_bit = TreeExprBuilder::MakeExpression("castBIT", {field0}, res_bit);
+
+  std::shared_ptr<Projector> projector;
+
+  auto status = Projector::Make(schema, {cast_bit}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto arr = MakeArrowArrayUtf8({"1", "true", "false", "0"}, {true, true, true, true});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arr});
+
+  auto out = MakeArrowArrayBool({true, true, false, false}, {true, true, true, true});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out, outputs.at(0));
+}
+
+// Test to ensure behaviour of cast functions when the validity is false for an input. The
+// function should not run for that input.
+TEST_F(TestProjector, TestCastVarbinaryFunction) {
+  auto field0 = field("f0", arrow::binary());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto res_int4 = field("res_int4", arrow::int32());
+  auto res_int8 = field("res_int8", arrow::int64());
+  auto res_float4 = field("res_float4", arrow::float32());
+  auto res_float8 = field("res_float8", arrow::float64());
+
+  // Build expression
+  auto cast_expr_int4 = TreeExprBuilder::MakeExpression("castINT", {field0}, res_int4);
+  auto cast_expr_int8 = TreeExprBuilder::MakeExpression("castBIGINT", {field0}, res_int8);
+  auto cast_expr_float4 =
+      TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res_float4);
+  auto cast_expr_float8 =
+      TreeExprBuilder::MakeExpression("castFLOAT8", {field0}, res_float8);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8}
+  auto status = Projector::Make(
+      schema, {cast_expr_int4, cast_expr_int8, cast_expr_float4, cast_expr_float8},
+      TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 =
+      MakeArrowArrayBinary({"37", "-99999", "99999", "4"}, {true, true, true, false});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  auto out_int4 = MakeArrowArrayInt32({37, -99999, 99999, 0}, {true, true, true, false});
+  auto out_int8 = MakeArrowArrayInt64({37, -99999, 99999, 0}, {true, true, true, false});
+  auto out_float4 =
+      MakeArrowArrayFloat32({37, -99999, 99999, 0}, {true, true, true, false});
+  auto out_float8 =
+      MakeArrowArrayFloat64({37, -99999, 99999, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_int4, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_int8, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(2));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(3));
+}
+
 TEST_F(TestProjector, TestToDate) {
   // schema for input fields
   auto field0 = field("f0", arrow::utf8());
@@ -1010,4 +1288,322 @@ TEST_F(TestProjector, TestIfElseOpt) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestRepeat) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_repeat = field("repeat", arrow::utf8());
+
+  // Build expression
+  auto repeat_expr =
+      TreeExprBuilder::MakeExpression("repeat", {field0, field1}, field_repeat);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {repeat_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array0 =
+      MakeArrowArrayUtf8({"ab", "a", "car", "valid", ""}, {true, true, true, true, true});
+  auto array1 = MakeArrowArrayInt32({2, 1, 3, 2, 10}, {true, true, true, true, true});
+  // expected output
+  auto exp_repeat = MakeArrowArrayUtf8({"abab", "a", "carcarcar", "validvalid", ""},
+                                       {true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_repeat, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestLpad) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto field2 = field("f2", arrow::utf8());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto field_lpad = field("lpad", arrow::utf8());
+
+  // Build expression
+  auto lpad_expr =
+      TreeExprBuilder::MakeExpression("lpad", {field0, field1, field2}, field_lpad);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {lpad_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 7;
+  auto array0 = MakeArrowArrayUtf8({"ab", "a", "ab", "invalid", "valid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  auto array1 = MakeArrowArrayInt32({1, 5, 3, 12, 0, 2, 10},
+                                    {true, true, true, true, true, true, true});
+  auto array2 = MakeArrowArrayUtf8({"z", "z", "c", "valid", "invalid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  // expected output
+  auto exp_lpad = MakeArrowArrayUtf8({"a", "zzzza", "cab", "validinvalid", "", "in", ""},
+                                     {true, true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_lpad, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestRpad) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto field2 = field("f2", arrow::utf8());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto field_rpad = field("rpad", arrow::utf8());
+
+  // Build expression
+  auto rpad_expr =
+      TreeExprBuilder::MakeExpression("rpad", {field0, field1, field2}, field_rpad);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {rpad_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 7;
+  auto array0 = MakeArrowArrayUtf8({"ab", "a", "ab", "invalid", "valid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  auto array1 = MakeArrowArrayInt32({1, 5, 3, 12, 0, 2, 10},
+                                    {true, true, true, true, true, true, true});
+  auto array2 = MakeArrowArrayUtf8({"z", "z", "c", "valid", "invalid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  // expected output
+  auto exp_rpad = MakeArrowArrayUtf8({"a", "azzzz", "abc", "invalidvalid", "", "in", ""},
+                                     {true, true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_rpad, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestBinRepresentation) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::int64());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_result = field("bin", arrow::utf8());
+
+  // Build expression
+  auto myexpr = TreeExprBuilder::MakeExpression("bin", {field0}, field_result);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {myexpr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 3;
+  auto array0 = MakeArrowArrayInt64({7, -28550, 58117}, {true, true, true});
+  // expected output
+  auto exp = MakeArrowArrayUtf8(
+      {"111", "1111111111111111111111111111111111111111111111111001000001111010",
+       "1110001100000101"},
+      {true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestBigIntCastFunction) {
+  // input fields
+  auto field0 = field("f0", arrow::float32());
+  auto field1 = field("f1", arrow::float64());
+  auto field2 = field("f2", arrow::day_time_interval());
+  auto field3 = field("f3", arrow::month_interval());
+  auto schema = arrow::schema({field0, field1, field2, field3});
+
+  // output fields
+  auto res_int64 = field("res", arrow::int64());
+
+  // Build expression
+  auto cast_expr_float4 =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field0}, res_int64);
+  auto cast_expr_float8 =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field1}, res_int64);
+  auto cast_expr_day_interval =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field2}, res_int64);
+  auto cast_expr_year_interval =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field3}, res_int64);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_float4, cast_expr_float8, cast_expr_day_interval,
+  //  cast_expr_year_interval}
+  auto status = Projector::Make(schema,
+                                {cast_expr_float4, cast_expr_float8,
+                                 cast_expr_day_interval, cast_expr_year_interval},
+                                TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 =
+      MakeArrowArrayFloat32({6.6f, -6.6f, 9.999999f, 0}, {true, true, true, false});
+  auto array1 =
+      MakeArrowArrayFloat64({6.6, -6.6, 9.99999999999, 0}, {true, true, true, false});
+  auto array2 = MakeArrowArrayInt64({100, 25, -0, 0}, {true, true, true, false});
+  auto array3 = MakeArrowArrayInt32({25, -25, -0, 0}, {true, true, true, false});
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2, array3});
+
+  auto out_float4 = MakeArrowArrayInt64({7, -7, 10, 0}, {true, true, true, false});
+  auto out_float8 = MakeArrowArrayInt64({7, -7, 10, 0}, {true, true, true, false});
+  auto out_days_interval =
+      MakeArrowArrayInt64({8640000000, 2160000000, 0, 0}, {true, true, true, false});
+  auto out_year_interval = MakeArrowArrayInt64({2, -2, 0, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(out_days_interval, outputs.at(2));
+  EXPECT_ARROW_ARRAY_EQUALS(out_year_interval, outputs.at(3));
+}
+
+TEST_F(TestProjector, TestIntCastFunction) {
+  // input fields
+  auto field0 = field("f0", arrow::float32());
+  auto field1 = field("f1", arrow::float64());
+  auto field2 = field("f2", arrow::month_interval());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto res_int32 = field("res", arrow::int32());
+
+  // Build expression
+  auto cast_expr_float4 = TreeExprBuilder::MakeExpression("castINT", {field0}, res_int32);
+  auto cast_expr_float8 = TreeExprBuilder::MakeExpression("castINT", {field1}, res_int32);
+  auto cast_expr_year_interval =
+      TreeExprBuilder::MakeExpression("castINT", {field2}, res_int32);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_float4, cast_expr_float8, cast_expr_day_interval,
+  //  cast_expr_year_interval}
+  auto status = Projector::Make(
+      schema, {cast_expr_float4, cast_expr_float8, cast_expr_year_interval},
+      TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 =
+      MakeArrowArrayFloat32({6.6f, -6.6f, 9.999999f, 0}, {true, true, true, false});
+  auto array1 =
+      MakeArrowArrayFloat64({6.6, -6.6, 9.99999999999, 0}, {true, true, true, false});
+  auto array2 = MakeArrowArrayInt32({25, -25, -0, 0}, {true, true, true, false});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  auto out_float4 = MakeArrowArrayInt32({7, -7, 10, 0}, {true, true, true, false});
+  auto out_float8 = MakeArrowArrayInt32({7, -7, 10, 0}, {true, true, true, false});
+  auto out_year_interval = MakeArrowArrayInt32({2, -2, 0, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(out_year_interval, outputs.at(2));
+}
+
+TEST_F(TestProjector, TestCastNullableIntYearInterval) {
+  // input fields
+  auto field1 = field("f1", arrow::month_interval());
+  auto schema = arrow::schema({field1});
+
+  // output fields
+  auto res_int32 = field("res", arrow::int32());
+  auto res_int64 = field("res", arrow::int64());
+
+  // Build expression
+  auto cast_expr_int32 =
+      TreeExprBuilder::MakeExpression("castNULLABLEINT", {field1}, res_int32);
+  auto cast_expr_int64 =
+      TreeExprBuilder::MakeExpression("castNULLABLEBIGINT", {field1}, res_int64);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_int32, cast_expr_int64, cast_expr_day_interval,
+  //  cast_expr_year_interval}
+  auto status = Projector::Make(schema, {cast_expr_int32, cast_expr_int64},
+                                TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 = MakeArrowArrayInt32({12, -24, -0, 0}, {true, true, true, false});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  auto out_int32 = MakeArrowArrayInt32({1, -2, -0, 0}, {true, true, true, false});
+  auto out_int64 = MakeArrowArrayInt64({1, -2, -0, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_int32, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_int64, outputs.at(1));
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc
index 29ce81f4942..e19d6712d57 100644
--- a/cpp/src/gandiva/tests/utf8_test.cc
+++ b/cpp/src/gandiva/tests/utf8_test.cc
@@ -221,6 +221,49 @@ TEST_F(TestUtf8, TestLike) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestUtf8, TestLikeWithEscape) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto schema = arrow::schema({field_a});
+
+  // output fields
+  auto res = field("res", boolean());
+
+  // build expressions.
+  // like(literal(s), a, '\')
+
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto literal_s = TreeExprBuilder::MakeStringLiteral("%pa\\%rk%");
+  auto escape_char = TreeExprBuilder::MakeStringLiteral("\\");
+  auto is_like =
+      TreeExprBuilder::MakeFunction("like", {node_a, literal_s, escape_char}, boolean());
+  auto expr = TreeExprBuilder::MakeExpression(is_like, res);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_a = MakeArrowArrayUtf8(
+      {"park", "spa%rkle", "bright spa%rk and fire", "spark"}, {true, true, true, true});
+
+  // expected output
+  auto exp = MakeArrowArrayBool({false, true, true, false}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
 TEST_F(TestUtf8, TestBeginsEnds) {
   // schema for input fields
   auto field_a = field("a", utf8());
@@ -637,4 +680,72 @@ TEST_F(TestUtf8, TestCastVarChar) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
 }
 
+TEST_F(TestUtf8, TestAscii) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_asc = field("ascii", arrow::int32());
+
+  // Build expression
+  auto asc_expr = TreeExprBuilder::MakeExpression("ascii", {field0}, field_asc);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {asc_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayUtf8({"ABC", "", "abc", "Hello World", "123", "999"},
+                                   {true, true, true, true, true, true});
+  // expected output
+  auto exp_asc =
+      MakeArrowArrayInt32({65, 0, 97, 72, 49, 57}, {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_asc, outputs.at(0));
+}
+
+TEST_F(TestUtf8, TestSpace) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::int64());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_space = field("space", arrow::utf8());
+
+  // Build expression
+  auto space_expr = TreeExprBuilder::MakeExpression("space", {field0}, field_space);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {space_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array0 = MakeArrowArrayInt64({1, 0, -5, 2}, {true, true, true, true});
+  // expected output
+  auto exp_space = MakeArrowArrayUtf8({" ", "", "", "  "}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_space, outputs.at(0));
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc
index b27b92010e8..c7869c147ec 100644
--- a/cpp/src/gandiva/tree_expr_builder.cc
+++ b/cpp/src/gandiva/tree_expr_builder.cc
@@ -105,6 +105,8 @@ NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) {
       DecimalScalar128 literal(decimal_type->precision(), decimal_type->scale());
       return std::make_shared<LiteralNode>(data_type, LiteralHolder(literal), true);
     }
+    case arrow::Type::NA:
+      return std::make_shared<NullLiteralNode>();
     default:
       return nullptr;
   }
@@ -215,6 +217,8 @@ MAKE_IN(Date64, int64_t);
 MAKE_IN(TimeStamp, int64_t);
 MAKE_IN(Time32, int32_t);
 MAKE_IN(Time64, int64_t);
+MAKE_IN(Float, float);
+MAKE_IN(Double, double);
 MAKE_IN(String, std::string);
 MAKE_IN(Binary, std::string);
 
diff --git a/cpp/src/gandiva/tree_expr_builder.h b/cpp/src/gandiva/tree_expr_builder.h
index 9c24fb9d616..94a4a179340 100644
--- a/cpp/src/gandiva/tree_expr_builder.h
+++ b/cpp/src/gandiva/tree_expr_builder.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <memory>
 #include <string>
 #include <unordered_set>
@@ -106,6 +107,14 @@ class GANDIVA_EXPORT TreeExprBuilder {
   static NodePtr MakeInExpressionBinary(NodePtr node,
                                         const std::unordered_set<std::string>& constants);
 
+  /// \brief creates an in expression for float
+  static NodePtr MakeInExpressionFloat(NodePtr node,
+                                       const std::unordered_set<float>& constants);
+
+  /// \brief creates an in expression for double
+  static NodePtr MakeInExpressionDouble(NodePtr node,
+                                        const std::unordered_set<double>& constants);
+
   /// \brief Date as s/millis since epoch.
   static NodePtr MakeInExpressionDate32(NodePtr node,
                                         const std::unordered_set<int32_t>& constants);
diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp
index 9233d79c92c..cccd92e2ec4 100644
--- a/cpp/src/generated/parquet_types.cpp
+++ b/cpp/src/generated/parquet_types.cpp
@@ -204,7 +204,8 @@ int _kCompressionCodecValues[] = {
   CompressionCodec::LZO,
   CompressionCodec::BROTLI,
   CompressionCodec::LZ4,
-  CompressionCodec::ZSTD
+  CompressionCodec::ZSTD,
+  CompressionCodec::LZ4_RAW
 };
 const char* _kCompressionCodecNames[] = {
   "UNCOMPRESSED",
@@ -213,9 +214,10 @@ const char* _kCompressionCodecNames[] = {
   "LZO",
   "BROTLI",
   "LZ4",
-  "ZSTD"
+  "ZSTD",
+  "LZ4_RAW"
 };
-const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(7, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
 
 std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) {
   std::map<int, const char*>::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val);
diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h
index 2a4e26b0ea2..3d7edd40983 100644
--- a/cpp/src/generated/parquet_types.h
+++ b/cpp/src/generated/parquet_types.h
@@ -116,7 +116,8 @@ struct CompressionCodec {
     LZO = 3,
     BROTLI = 4,
     LZ4 = 5,
-    ZSTD = 6
+    ZSTD = 6,
+    LZ4_RAW = 7
   };
 };
 
diff --git a/cpp/src/jni/dataset/jni_wrapper.cc b/cpp/src/jni/dataset/jni_wrapper.cc
index fe09dc44eca..d61fb3f964e 100644
--- a/cpp/src/jni/dataset/jni_wrapper.cc
+++ b/cpp/src/jni/dataset/jni_wrapper.cc
@@ -140,55 +140,29 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener {
 class DisposableScannerAdaptor {
  public:
   DisposableScannerAdaptor(std::shared_ptr<arrow::dataset::Scanner> scanner,
-                           arrow::dataset::ScanTaskIterator task_itr) {
-    this->scanner_ = std::move(scanner);
-    this->task_itr_ = std::move(task_itr);
-  }
+                           arrow::dataset::TaggedRecordBatchIterator batch_itr)
+      : scanner_(std::move(scanner)), batch_itr_(std::move(batch_itr)) {}
 
   static arrow::Result<std::shared_ptr<DisposableScannerAdaptor>> Create(
       std::shared_ptr<arrow::dataset::Scanner> scanner) {
-    ARROW_ASSIGN_OR_RAISE(arrow::dataset::ScanTaskIterator task_itr, scanner->Scan())
-    return std::make_shared<DisposableScannerAdaptor>(scanner, std::move(task_itr));
+    ARROW_ASSIGN_OR_RAISE(auto batch_itr, scanner->ScanBatches())
+    return std::make_shared<DisposableScannerAdaptor>(scanner, std::move(batch_itr));
   }
 
   arrow::Result<std::shared_ptr<arrow::RecordBatch>> Next() {
-    do {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::RecordBatch> batch, NextBatch())
-      if (batch != nullptr) {
-        return batch;
-      }
-      // batch is null, current task is fully consumed
-      ARROW_ASSIGN_OR_RAISE(bool has_next_task, NextTask())
-      if (!has_next_task) {
-        // no more tasks
-        return nullptr;
-      }
-      // new task appended, read again
-    } while (true);
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::RecordBatch> batch, NextBatch());
+    return batch;
   }
 
   const std::shared_ptr<arrow::dataset::Scanner>& GetScanner() const { return scanner_; }
 
  private:
-  arrow::dataset::ScanTaskIterator task_itr_;
   std::shared_ptr<arrow::dataset::Scanner> scanner_;
-  std::shared_ptr<arrow::dataset::ScanTask> current_task_ = nullptr;
-  arrow::RecordBatchIterator current_batch_itr_ =
-      arrow::MakeEmptyIterator<std::shared_ptr<arrow::RecordBatch>>();
-
-  arrow::Result<bool> NextTask() {
-    ARROW_ASSIGN_OR_RAISE(current_task_, task_itr_.Next())
-    if (current_task_ == nullptr) {
-      return false;
-    }
-    ARROW_ASSIGN_OR_RAISE(current_batch_itr_, current_task_->Execute())
-    return true;
-  }
+  arrow::dataset::TaggedRecordBatchIterator batch_itr_;
 
   arrow::Result<std::shared_ptr<arrow::RecordBatch>> NextBatch() {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::RecordBatch> batch,
-                          current_batch_itr_.Next())
-    return batch;
+    ARROW_ASSIGN_OR_RAISE(auto batch, batch_itr_.Next())
+    return batch.record_batch;
   }
 };
 
@@ -475,7 +449,8 @@ Java_org_apache_arrow_dataset_jni_JniWrapper_getSchemaFromScanner(JNIEnv* env, j
   std::shared_ptr<arrow::Schema> schema =
       RetrieveNativeInstance<DisposableScannerAdaptor>(scanner_id)
           ->GetScanner()
-          ->schema();
+          ->options()
+          ->projected_schema;
   return JniGetOrThrow(ToSchemaByteArray(env, schema));
   JNI_METHOD_END(nullptr)
 }
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 3f3ca5a5299..39e3d0541d5 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -127,19 +127,15 @@ set(PARQUET_STATIC_TEST_LINK_LIBS ${PARQUET_MIN_TEST_LIBS} parquet_static thrift
 
 #
 # Generated Thrift sources
-set_source_files_properties(src/generated/parquet_types.cpp
-                            src/generated/parquet_types.h
+set_source_files_properties(src/generated/parquet_types.cpp src/generated/parquet_types.h
                             src/generated/parquet_constants.cpp
                             src/generated/parquet_constants.h
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+                            PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                       SKIP_UNITY_BUILD_INCLUSION ON)
 
 if(NOT MSVC)
-  set_source_files_properties(src/parquet/parquet_types.cpp PROPERTIES COMPILE_FLAGS
-                              -Wno-unused-variable)
+  set_source_files_properties(src/parquet/parquet_types.cpp
+                              PROPERTIES COMPILE_FLAGS -Wno-unused-variable)
 endif()
 
 #
@@ -182,21 +178,16 @@ if(ARROW_HAVE_RUNTIME_AVX2)
   # AVX2 is used as a proxy for BMI2.
   list(APPEND PARQUET_SRCS level_comparison_avx2.cc level_conversion_bmi2.cc)
   set_source_files_properties(level_comparison_avx2.cc
-                              PROPERTIES
-                              SKIP_PRECOMPILE_HEADERS
-                              ON
-                              COMPILE_FLAGS
-                              "${ARROW_AVX2_FLAG}")
+                              PROPERTIES SKIP_PRECOMPILE_HEADERS ON COMPILE_FLAGS
+                                                                    "${ARROW_AVX2_FLAG}")
   # WARNING: DO NOT BLINDLY COPY THIS CODE FOR OTHER BMI2 USE CASES.
   # This code is always guarded by runtime dispatch which verifies
   # BMI2 is present.  For a very small number of CPUs AVX2 does not
   # imply BMI2.
   set_source_files_properties(level_conversion_bmi2.cc
-                              PROPERTIES
-                              SKIP_PRECOMPILE_HEADERS
-                              ON
-                              COMPILE_FLAGS
-                              "${ARROW_AVX2_FLAG} -DARROW_HAVE_BMI2 -mbmi2")
+                              PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                         COMPILE_FLAGS
+                                         "${ARROW_AVX2_FLAG} -DARROW_HAVE_BMI2 -mbmi2")
 endif()
 
 if(PARQUET_REQUIRE_ENCRYPTION)
@@ -241,10 +232,7 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY)
 
 endif(NOT PARQUET_MINIMAL_DEPENDENCY)
 
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(PARQUET_SHARED_LINK_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
 endif()
@@ -297,12 +285,9 @@ add_dependencies(parquet ${PARQUET_LIBRARIES} thrift::thrift)
 # Thrift requires these definitions for some types that we use
 foreach(LIB_TARGET ${PARQUET_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
-                             PRIVATE
-                             PARQUET_EXPORTING
-                             PRIVATE
-                             HAVE_INTTYPES_H
-                             PRIVATE
-                             HAVE_NETDB_H)
+                             PRIVATE PARQUET_EXPORTING
+                             PRIVATE HAVE_INTTYPES_H
+                             PRIVATE HAVE_NETDB_H)
   if(WIN32)
     target_compile_definitions(${LIB_TARGET} PRIVATE NOMINMAX)
   else()
@@ -336,14 +321,10 @@ add_parquet_test(internals-test
                  types_test.cc
                  test_util.cc)
 
-set_source_files_properties(public_api_test.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                          SKIP_UNITY_BUILD_INCLUSION ON)
 
-add_parquet_test(reader_test
+add_parquet_test(reader-test
                  SOURCES
                  column_reader_test.cc
                  level_conversion_test.cc
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 303fb454880..6c82b8dee78 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -558,6 +558,35 @@ void ReadSingleColumnFileStatistics(std::unique_ptr<FileReader> file_reader,
   ASSERT_OK(StatisticsAsScalars(*statistics, min, max));
 }
 
+void DownsampleInt96RoundTrip(std::shared_ptr<Array> arrow_vector_in,
+                              std::shared_ptr<Array> arrow_vector_out,
+                              ::arrow::TimeUnit::type unit) {
+  // Create single input table of NS to be written to parquet with INT96
+  auto input_schema =
+      ::arrow::schema({::arrow::field("f", ::arrow::timestamp(TimeUnit::NANO))});
+  auto input = Table::Make(input_schema, {arrow_vector_in});
+
+  // Create an expected schema for each resulting table (one for each "downsampled" ts)
+  auto ex_schema = ::arrow::schema({::arrow::field("f", ::arrow::timestamp(unit))});
+  auto ex_result = Table::Make(ex_schema, {arrow_vector_out});
+
+  std::shared_ptr<Table> result;
+
+  ArrowReaderProperties arrow_reader_prop;
+  arrow_reader_prop.set_coerce_int96_timestamp_unit(unit);
+
+  ASSERT_NO_FATAL_FAILURE(DoRoundtrip(
+      input, input->num_rows(), &result, default_writer_properties(),
+      ArrowWriterProperties::Builder().enable_deprecated_int96_timestamps()->build(),
+      arrow_reader_prop));
+
+  ASSERT_NO_FATAL_FAILURE(::arrow::AssertSchemaEqual(*ex_result->schema(),
+                                                     *result->schema(),
+                                                     /*check_metadata=*/false));
+
+  ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_result, *result));
+}
+
 // Non-template base class for TestParquetIO, to avoid code duplication
 class ParquetIOTestBase : public ::testing::Test {
  public:
@@ -1671,6 +1700,33 @@ TEST(TestArrowReadWrite, UseDeprecatedInt96) {
   ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_result, *result));
 }
 
+TEST(TestArrowReadWrite, DownsampleDeprecatedInt96) {
+  using ::arrow::ArrayFromJSON;
+  using ::arrow::field;
+  using ::arrow::schema;
+
+  // Timestamp values at 2000-01-01 00:00:00,
+  // then with increment unit of 1ns, 1us, 1ms and 1s.
+  auto a_nano =
+      ArrayFromJSON(timestamp(TimeUnit::NANO),
+                    "[946684800000000000, 946684800000000001, 946684800000001000, "
+                    "946684800001000000, 946684801000000000]");
+  auto a_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO),
+                               "[946684800000000, 946684800000000, 946684800000001, "
+                               "946684800001000, 946684801000000]");
+  auto a_milli = ArrayFromJSON(
+      timestamp(TimeUnit::MILLI),
+      "[946684800000, 946684800000, 946684800000, 946684800001, 946684801000]");
+  auto a_second =
+      ArrayFromJSON(timestamp(TimeUnit::SECOND),
+                    "[946684800, 946684800, 946684800, 946684800, 946684801]");
+
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_nano, TimeUnit::NANO));
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_micro, TimeUnit::MICRO));
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_milli, TimeUnit::MILLI));
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_second, TimeUnit::SECOND));
+}
+
 TEST(TestArrowReadWrite, CoerceTimestamps) {
   using ::arrow::ArrayFromVector;
   using ::arrow::field;
@@ -2287,10 +2343,9 @@ TEST(TestArrowReadWrite, WaitCoalescedReads) {
   ASSERT_OK(builder.Open(std::make_shared<BufferReader>(buffer)));
   ASSERT_OK(builder.properties(properties)->Build(&reader));
   // Pre-buffer data and wait for I/O to complete.
-  ASSERT_OK(reader->parquet_reader()
-                ->PreBuffer({0}, {0, 1, 2, 3, 4}, ::arrow::io::IOContext(),
-                            ::arrow::io::CacheOptions::Defaults())
-                .status());
+  reader->parquet_reader()->PreBuffer({0}, {0, 1, 2, 3, 4}, ::arrow::io::IOContext(),
+                                      ::arrow::io::CacheOptions::Defaults());
+  ASSERT_OK(reader->parquet_reader()->WhenBuffered({0}, {0, 1, 2, 3, 4}).status());
 
   std::shared_ptr<::arrow::RecordBatchReader> rb_reader;
   ASSERT_OK_NO_THROW(reader->GetRecordBatchReader({0}, {0, 1, 2, 3, 4}, &rb_reader));
@@ -2331,6 +2386,66 @@ TEST(TestArrowReadWrite, GetRecordBatchReaderNoColumns) {
   ASSERT_EQ(actual_batch->num_rows(), num_rows);
 }
 
+TEST(TestArrowReadWrite, GetRecordBatchGenerator) {
+  ArrowReaderProperties properties = default_arrow_reader_properties();
+  const int num_rows = 1024;
+  const int row_group_size = 512;
+  const int num_columns = 2;
+
+  std::shared_ptr<Table> table;
+  ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, 1, &table));
+
+  std::shared_ptr<Buffer> buffer;
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
+                                             default_arrow_writer_properties(), &buffer));
+
+  std::shared_ptr<FileReader> reader;
+  {
+    std::unique_ptr<FileReader> unique_reader;
+    FileReaderBuilder builder;
+    ASSERT_OK(builder.Open(std::make_shared<BufferReader>(buffer)));
+    ASSERT_OK(builder.properties(properties)->Build(&unique_reader));
+    reader = std::move(unique_reader);
+  }
+
+  auto check_batches = [](const std::shared_ptr<::arrow::RecordBatch>& batch,
+                          int num_columns, int num_rows) {
+    ASSERT_NE(batch, nullptr);
+    ASSERT_EQ(batch->num_columns(), num_columns);
+    ASSERT_EQ(batch->num_rows(), num_rows);
+  };
+  {
+    ASSERT_OK_AND_ASSIGN(auto batch_generator,
+                         reader->GetRecordBatchGenerator(reader, {0, 1}, {0, 1}));
+    auto fut1 = batch_generator();
+    auto fut2 = batch_generator();
+    auto fut3 = batch_generator();
+    ASSERT_OK_AND_ASSIGN(auto batch1, fut1.result());
+    ASSERT_OK_AND_ASSIGN(auto batch2, fut2.result());
+    ASSERT_OK_AND_ASSIGN(auto batch3, fut3.result());
+    ASSERT_EQ(batch3, nullptr);
+    check_batches(batch1, num_columns, row_group_size);
+    check_batches(batch2, num_columns, row_group_size);
+    ASSERT_OK_AND_ASSIGN(auto actual, ::arrow::Table::FromRecordBatches(
+                                          batch1->schema(), {batch1, batch2}));
+    AssertTablesEqual(*table, *actual, /*same_chunk_layout=*/false);
+  }
+  {
+    // No columns case
+    ASSERT_OK_AND_ASSIGN(auto batch_generator,
+                         reader->GetRecordBatchGenerator(reader, {0, 1}, {}));
+    auto fut1 = batch_generator();
+    auto fut2 = batch_generator();
+    auto fut3 = batch_generator();
+    ASSERT_OK_AND_ASSIGN(auto batch1, fut1.result());
+    ASSERT_OK_AND_ASSIGN(auto batch2, fut2.result());
+    ASSERT_OK_AND_ASSIGN(auto batch3, fut3.result());
+    ASSERT_EQ(batch3, nullptr);
+    check_batches(batch1, 0, row_group_size);
+    check_batches(batch2, 0, row_group_size);
+  }
+}
+
 TEST(TestArrowReadWrite, ScanContents) {
   const int num_columns = 20;
   const int num_rows = 1000;
@@ -2700,7 +2815,7 @@ TEST(ArrowReadWrite, Decimal256) {
 
   auto type = ::arrow::decimal256(8, 4);
 
-  const char* json = R"(["1.0000", null, "-1.2345", "-1000.5678", 
+  const char* json = R"(["1.0000", null, "-1.2345", "-1000.5678",
                          "-9999.9999", "9999.9999"])";
   auto array = ::arrow::ArrayFromJSON(type, json);
   auto table = ::arrow::Table::Make(::arrow::schema({field("root", type)}), {array});
diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index 4c5a24bcb9f..880eba7a1bc 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -26,7 +26,9 @@
 #include "parquet/arrow/schema.h"
 #include "parquet/file_reader.h"
 #include "parquet/schema.h"
+#include "parquet/schema_internal.h"
 #include "parquet/test_util.h"
+#include "parquet/thrift_internal.h"
 
 #include "arrow/array.h"
 #include "arrow/testing/gtest_util.h"
@@ -41,6 +43,7 @@ using ParquetType = parquet::Type;
 using parquet::ConvertedType;
 using parquet::LogicalType;
 using parquet::Repetition;
+using parquet::format::SchemaElement;
 using parquet::internal::LevelInfo;
 using parquet::schema::GroupNode;
 using parquet::schema::NodePtr;
@@ -1157,6 +1160,141 @@ TEST_F(TestConvertArrowSchema, ParquetFlatDecimals) {
   ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(parquet_fields));
 }
 
+class TestConvertRoundTrip : public ::testing::Test {
+ public:
+  ::arrow::Status RoundTripSchema(
+      const std::vector<std::shared_ptr<Field>>& fields,
+      std::shared_ptr<::parquet::ArrowWriterProperties> arrow_properties =
+          ::parquet::default_arrow_writer_properties()) {
+    arrow_schema_ = ::arrow::schema(fields);
+    std::shared_ptr<::parquet::WriterProperties> properties =
+        ::parquet::default_writer_properties();
+    RETURN_NOT_OK(ToParquetSchema(arrow_schema_.get(), *properties.get(),
+                                  *arrow_properties, &parquet_schema_));
+    ::parquet::schema::ToParquet(parquet_schema_->group_node(), &parquet_format_schema_);
+    auto parquet_schema = ::parquet::schema::FromParquet(parquet_format_schema_);
+    return FromParquetSchema(parquet_schema.get(), &result_schema_);
+  }
+
+ protected:
+  std::shared_ptr<::arrow::Schema> arrow_schema_;
+  std::shared_ptr<SchemaDescriptor> parquet_schema_;
+  std::vector<SchemaElement> parquet_format_schema_;
+  std::shared_ptr<::arrow::Schema> result_schema_;
+};
+
+int GetFieldId(const ::arrow::Field& field) {
+  if (field.metadata() == nullptr) {
+    return -1;
+  }
+  auto maybe_field = field.metadata()->Get("PARQUET:field_id");
+  if (!maybe_field.ok()) {
+    return -1;
+  }
+  return std::stoi(maybe_field.ValueOrDie());
+}
+
+void GetFieldIdsDfs(const ::arrow::FieldVector& fields, std::vector<int>* field_ids) {
+  for (const auto& field : fields) {
+    field_ids->push_back(GetFieldId(*field));
+    GetFieldIdsDfs(field->type()->fields(), field_ids);
+  }
+}
+
+std::vector<int> GetFieldIdsDfs(const ::arrow::FieldVector& fields) {
+  std::vector<int> field_ids;
+  GetFieldIdsDfs(fields, &field_ids);
+  return field_ids;
+}
+
+std::vector<int> GetParquetFieldIdsHelper(const parquet::schema::Node* node) {
+  std::vector<int> field_ids;
+  field_ids.push_back(node->field_id());
+  if (node->is_group()) {
+    const GroupNode* group_node = static_cast<const GroupNode*>(node);
+    for (int i = 0; i < group_node->field_count(); i++) {
+      for (auto id : GetParquetFieldIdsHelper(group_node->field(i).get())) {
+        field_ids.push_back(id);
+      }
+    }
+  }
+  return field_ids;
+}
+
+std::vector<int> GetParquetFieldIds(std::shared_ptr<SchemaDescriptor> parquet_schema) {
+  return GetParquetFieldIdsHelper(
+      static_cast<const parquet::schema::Node*>(parquet_schema->group_node()));
+}
+
+std::vector<int> GetThriftFieldIds(
+    const std::vector<SchemaElement>& parquet_format_schema) {
+  std::vector<int> field_ids;
+  for (const auto& element : parquet_format_schema) {
+    field_ids.push_back(element.field_id);
+  }
+  return field_ids;
+}
+
+TEST_F(TestConvertRoundTrip, FieldIdMissingIfNotSpecified) {
+  std::vector<std::shared_ptr<Field>> arrow_fields;
+  arrow_fields.push_back(::arrow::field("simple", ::arrow::int32(), false));
+  /// { "nested": { "outer": { "inner" }, "sibling" } }
+  arrow_fields.push_back(::arrow::field(
+      "nested",
+      ::arrow::struct_({::arrow::field("outer", ::arrow::struct_({::arrow::field(
+                                                    "inner", ::arrow::utf8())})),
+                        ::arrow::field("sibling", ::arrow::date32())}),
+      false));
+
+  ASSERT_OK(RoundTripSchema(arrow_fields));
+  auto field_ids = GetFieldIdsDfs(result_schema_->fields());
+  for (int actual_id : field_ids) {
+    ASSERT_EQ(actual_id, -1);
+  }
+  auto parquet_field_ids = GetParquetFieldIds(parquet_schema_);
+  for (int actual_id : parquet_field_ids) {
+    ASSERT_EQ(actual_id, -1);
+  }
+  // In our unit test a "not set" thrift field has a value of 0
+  auto thrift_field_ids = GetThriftFieldIds(parquet_format_schema_);
+  for (int actual_id : thrift_field_ids) {
+    ASSERT_EQ(actual_id, 0);
+  }
+}
+
+std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) {
+  return ::arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
+}
+
+TEST_F(TestConvertRoundTrip, FieldIdPreserveExisting) {
+  std::vector<std::shared_ptr<Field>> arrow_fields;
+  arrow_fields.push_back(
+      ::arrow::field("simple", ::arrow::int32(), /*nullable=*/true, FieldIdMetadata(2)));
+  /// { "nested": { "outer": { "inner" }, "sibling" }
+  arrow_fields.push_back(::arrow::field(
+      "nested",
+      ::arrow::struct_({::arrow::field("outer", ::arrow::struct_({::arrow::field(
+                                                    "inner", ::arrow::utf8())})),
+                        ::arrow::field("sibling", ::arrow::date32(), /*nullable=*/true,
+                                       FieldIdMetadata(17))}),
+      false));
+
+  ASSERT_OK(RoundTripSchema(arrow_fields));
+  auto field_ids = GetFieldIdsDfs(result_schema_->fields());
+  auto expected_field_ids = std::vector<int>{2, -1, -1, -1, 17};
+  ASSERT_EQ(field_ids, expected_field_ids);
+
+  // Parquet has a field id for the schema itself
+  expected_field_ids = std::vector<int>{-1, 2, -1, -1, -1, 17};
+  auto parquet_ids = GetParquetFieldIds(parquet_schema_);
+  ASSERT_EQ(parquet_ids, expected_field_ids);
+
+  // In our unit test a "not set" thrift field has a value of 0
+  expected_field_ids = std::vector<int>{0, 2, 0, 0, 0, 17};
+  auto thrift_field_ids = GetThriftFieldIds(parquet_format_schema_);
+  ASSERT_EQ(thrift_field_ids, expected_field_ids);
+}
+
 TEST(InvalidSchema, ParquetNegativeDecimalScale) {
   const auto& type = ::arrow::decimal(23, -2);
   const auto& field = ::arrow::field("f0", type);
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 016ceacb0ef..4f5f79c964a 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -30,7 +30,9 @@
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
 #include "arrow/type.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/future.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
@@ -291,6 +293,13 @@ class FileReaderImpl : public FileReader {
                        const std::vector<int>& indices,
                        std::shared_ptr<Table>* table) override;
 
+  // Helper method used by ReadRowGroups - read the given row groups/columns, skipping
+  // bounds checks and pre-buffering. Takes a shared_ptr to self to keep the reader
+  // alive in async contexts.
+  Future<std::shared_ptr<Table>> DecodeRowGroups(
+      std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups,
+      const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor);
+
   Status ReadRowGroups(const std::vector<int>& row_groups,
                        std::shared_ptr<Table>* table) override {
     return ReadRowGroups(row_groups, Iota(reader_->metadata()->num_columns()), table);
@@ -315,6 +324,12 @@ class FileReaderImpl : public FileReader {
                                 Iota(reader_->metadata()->num_columns()), out);
   }
 
+  ::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>>
+  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                          const std::vector<int> row_group_indices,
+                          const std::vector<int> column_indices,
+                          ::arrow::internal::Executor* cpu_executor) override;
+
   int num_columns() const { return reader_->metadata()->num_columns(); }
 
   ParquetFileReader* parquet_reader() const override { return reader_.get(); }
@@ -890,9 +905,8 @@ Status FileReaderImpl::GetRecordBatchReader(const std::vector<int>& row_groups,
   if (reader_properties_.pre_buffer()) {
     // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled
     BEGIN_PARQUET_CATCH_EXCEPTIONS
-    ARROW_UNUSED(reader_->PreBuffer(row_groups, column_indices,
-                                    reader_properties_.io_context(),
-                                    reader_properties_.cache_options()));
+    reader_->PreBuffer(row_groups, column_indices, reader_properties_.io_context(),
+                       reader_properties_.cache_options());
     END_PARQUET_CATCH_EXCEPTIONS
   }
 
@@ -968,6 +982,95 @@ Status FileReaderImpl::GetRecordBatchReader(const std::vector<int>& row_groups,
   return Status::OK();
 }
 
+/// Given a file reader and a list of row groups, this is a generator of record
+/// batch generators (where each sub-generator is the contents of a single row group).
+class RowGroupGenerator {
+ public:
+  using RecordBatchGenerator =
+      ::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>;
+
+  explicit RowGroupGenerator(std::shared_ptr<FileReaderImpl> arrow_reader,
+                             ::arrow::internal::Executor* cpu_executor,
+                             std::vector<int> row_groups, std::vector<int> column_indices)
+      : arrow_reader_(std::move(arrow_reader)),
+        cpu_executor_(cpu_executor),
+        row_groups_(std::move(row_groups)),
+        column_indices_(std::move(column_indices)),
+        index_(0) {}
+
+  ::arrow::Future<RecordBatchGenerator> operator()() {
+    if (index_ >= row_groups_.size()) {
+      return ::arrow::AsyncGeneratorEnd<RecordBatchGenerator>();
+    }
+    int row_group = row_groups_[index_++];
+    std::vector<int> column_indices = column_indices_;
+    auto reader = arrow_reader_;
+    if (!reader->properties().pre_buffer()) {
+      return SubmitRead(cpu_executor_, reader, row_group, column_indices);
+    }
+    auto ready = reader->parquet_reader()->WhenBuffered({row_group}, column_indices);
+    if (cpu_executor_) ready = cpu_executor_->TransferAlways(ready);
+    return ready.Then([=]() -> ::arrow::Future<RecordBatchGenerator> {
+      return ReadOneRowGroup(cpu_executor_, reader, row_group, column_indices);
+    });
+  }
+
+ private:
+  // Synchronous fallback for when pre-buffer isn't enabled.
+  //
+  // Making the Parquet reader truly asynchronous requires heavy refactoring, so the
+  // generator piggybacks on ReadRangeCache. The lazy ReadRangeCache can be used for
+  // async I/O without forcing readahead.
+  static ::arrow::Future<RecordBatchGenerator> SubmitRead(
+      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self,
+      const int row_group, const std::vector<int>& column_indices) {
+    if (!cpu_executor) {
+      return ReadOneRowGroup(cpu_executor, self, row_group, column_indices);
+    }
+    // If we have an executor, then force transfer (even if I/O was complete)
+    return ::arrow::DeferNotOk(cpu_executor->Submit(ReadOneRowGroup, cpu_executor, self,
+                                                    row_group, column_indices));
+  }
+
+  static ::arrow::Future<RecordBatchGenerator> ReadOneRowGroup(
+      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self,
+      const int row_group, const std::vector<int>& column_indices) {
+    // Skips bound checks/pre-buffering, since we've done that already
+    return self->DecodeRowGroups(self, {row_group}, column_indices, cpu_executor)
+        .Then([](const std::shared_ptr<Table>& table)
+                  -> ::arrow::Result<RecordBatchGenerator> {
+          ::arrow::TableBatchReader table_reader(*table);
+          ::arrow::RecordBatchVector batches;
+          RETURN_NOT_OK(table_reader.ReadAll(&batches));
+          return ::arrow::MakeVectorGenerator(std::move(batches));
+        });
+  }
+
+  std::shared_ptr<FileReaderImpl> arrow_reader_;
+  ::arrow::internal::Executor* cpu_executor_;
+  std::vector<int> row_groups_;
+  std::vector<int> column_indices_;
+  size_t index_;
+};
+
+::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>>
+FileReaderImpl::GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                                        const std::vector<int> row_group_indices,
+                                        const std::vector<int> column_indices,
+                                        ::arrow::internal::Executor* cpu_executor) {
+  RETURN_NOT_OK(BoundsCheck(row_group_indices, column_indices));
+  if (reader_properties_.pre_buffer()) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    reader_->PreBuffer(row_group_indices, column_indices, reader_properties_.io_context(),
+                       reader_properties_.cache_options());
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+  ::arrow::AsyncGenerator<RowGroupGenerator::RecordBatchGenerator> row_group_generator =
+      RowGroupGenerator(::arrow::internal::checked_pointer_cast<FileReaderImpl>(reader),
+                        cpu_executor, row_group_indices, column_indices);
+  return ::arrow::MakeConcatenatedGenerator(std::move(row_group_generator));
+}
+
 Status FileReaderImpl::GetColumn(int i, FileColumnIteratorFactory iterator_factory,
                                  std::unique_ptr<ColumnReader>* out) {
   RETURN_NOT_OK(BoundsCheckColumn(i));
@@ -990,33 +1093,55 @@ Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups,
   // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled
   if (reader_properties_.pre_buffer()) {
     BEGIN_PARQUET_CATCH_EXCEPTIONS
-    ARROW_UNUSED(parquet_reader()->PreBuffer(row_groups, column_indices,
-                                             reader_properties_.io_context(),
-                                             reader_properties_.cache_options()));
+    parquet_reader()->PreBuffer(row_groups, column_indices,
+                                reader_properties_.io_context(),
+                                reader_properties_.cache_options());
     END_PARQUET_CATCH_EXCEPTIONS
   }
 
+  auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices,
+                             /*cpu_executor=*/nullptr);
+  ARROW_ASSIGN_OR_RAISE(*out, fut.MoveResult());
+  return Status::OK();
+}
+
+Future<std::shared_ptr<Table>> FileReaderImpl::DecodeRowGroups(
+    std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups,
+    const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor) {
+  // `self` is used solely to keep `this` alive in an async context - but we use this
+  // in a sync context too so use `this` over `self`
   std::vector<std::shared_ptr<ColumnReaderImpl>> readers;
   std::shared_ptr<::arrow::Schema> result_schema;
   RETURN_NOT_OK(GetFieldReaders(column_indices, row_groups, &readers, &result_schema));
-
-  ::arrow::ChunkedArrayVector columns(readers.size());
-  RETURN_NOT_OK(::arrow::internal::OptionalParallelFor(
-      reader_properties_.use_threads(), static_cast<int>(readers.size()), [&](int i) {
-        return ReadColumn(static_cast<int>(i), row_groups, readers[i].get(), &columns[i]);
-      }));
-
-  int64_t num_rows = 0;
-  if (!columns.empty()) {
-    num_rows = columns[0]->length();
-  } else {
-    for (int i : row_groups) {
-      num_rows += parquet_reader()->metadata()->RowGroup(i)->num_rows();
+  // OptionalParallelForAsync requires an executor
+  if (!cpu_executor) cpu_executor = ::arrow::internal::GetCpuThreadPool();
+
+  auto read_column = [row_groups, self, this](size_t i,
+                                              std::shared_ptr<ColumnReaderImpl> reader)
+      -> ::arrow::Result<std::shared_ptr<::arrow::ChunkedArray>> {
+    std::shared_ptr<::arrow::ChunkedArray> column;
+    RETURN_NOT_OK(ReadColumn(static_cast<int>(i), row_groups, reader.get(), &column));
+    return column;
+  };
+  auto make_table = [result_schema, row_groups, self,
+                     this](const ::arrow::ChunkedArrayVector& columns)
+      -> ::arrow::Result<std::shared_ptr<Table>> {
+    int64_t num_rows = 0;
+    if (!columns.empty()) {
+      num_rows = columns[0]->length();
+    } else {
+      for (int i : row_groups) {
+        num_rows += parquet_reader()->metadata()->RowGroup(i)->num_rows();
+      }
     }
-  }
-
-  *out = Table::Make(std::move(result_schema), std::move(columns), num_rows);
-  return (*out)->Validate();
+    auto table = Table::Make(std::move(result_schema), columns, num_rows);
+    RETURN_NOT_OK(table->Validate());
+    return table;
+  };
+  return ::arrow::internal::OptionalParallelForAsync(reader_properties_.use_threads(),
+                                                     std::move(readers), read_column,
+                                                     cpu_executor)
+      .Then(std::move(make_table));
 }
 
 std::shared_ptr<RowGroupReader> FileReaderImpl::RowGroup(int row_group_index) {
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 4e75b25a4ae..2d6a5ef2c3e 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -18,6 +18,8 @@
 #pragma once
 
 #include <cstdint>
+// N.B. we don't include async_generator.h as it's relatively heavy
+#include <functional>
 #include <memory>
 #include <vector>
 
@@ -65,6 +67,9 @@ class RowGroupReader;
 /// `FileReader::RowGroup(i)->Column(j)->Read` and receive an `arrow::Column`
 /// instance.
 ///
+/// The parquet format supports an optional integer field_id which can be assigned
+/// to a field.  Arrow will convert these field IDs to a metadata key named
+/// PARQUET:field_id on the appropriate field.
 // TODO(wesm): nested data does not always make sense with this user
 // interface unless you are only reading a single leaf node from a branch of
 // a table. For example:
@@ -175,6 +180,20 @@ class PARQUET_EXPORT FileReader {
       const std::vector<int>& row_group_indices, const std::vector<int>& column_indices,
       std::unique_ptr<::arrow::RecordBatchReader>* out) = 0;
 
+  /// \brief Return a generator of record batches.
+  ///
+  /// The FileReader must outlive the generator, so this requires that you pass in a
+  /// shared_ptr.
+  ///
+  /// \returns error Result if either row_group_indices or column_indices contains an
+  ///     invalid index
+  virtual ::arrow::Result<
+      std::function<::arrow::Future<std::shared_ptr<::arrow::RecordBatch>>()>>
+  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                          const std::vector<int> row_group_indices,
+                          const std::vector<int> column_indices,
+                          ::arrow::internal::Executor* cpu_executor = NULLPTR) = 0;
+
   ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
                                        const std::vector<int>& column_indices,
                                        std::shared_ptr<::arrow::RecordBatchReader>* out);
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index 1410a5f89e2..f13687079d4 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -353,7 +353,8 @@ Status TransferBool(RecordReader* reader, MemoryPool* pool, Datum* out) {
 }
 
 Status TransferInt96(RecordReader* reader, MemoryPool* pool,
-                     const std::shared_ptr<DataType>& type, Datum* out) {
+                     const std::shared_ptr<DataType>& type, Datum* out,
+                     const ::arrow::TimeUnit::type int96_arrow_time_unit) {
   int64_t length = reader->values_written();
   auto values = reinterpret_cast<const Int96*>(reader->values());
   ARROW_ASSIGN_OR_RAISE(auto data,
@@ -365,7 +366,20 @@ Status TransferInt96(RecordReader* reader, MemoryPool* pool,
       // isn't representable as a 64-bit Unix timestamp.
       *data_ptr++ = 0;
     } else {
-      *data_ptr++ = Int96GetNanoSeconds(values[i]);
+      switch (int96_arrow_time_unit) {
+        case ::arrow::TimeUnit::NANO:
+          *data_ptr++ = Int96GetNanoSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::MICRO:
+          *data_ptr++ = Int96GetMicroSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::MILLI:
+          *data_ptr++ = Int96GetMilliSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::SECOND:
+          *data_ptr++ = Int96GetSeconds(values[i]);
+          break;
+      }
     }
   }
   *out = std::make_shared<TimestampArray>(type, length, std::move(data),
@@ -415,7 +429,7 @@ Status TransferBinary(RecordReader* reader, MemoryPool* pool,
   }
   ::arrow::compute::ExecContext ctx(pool);
   ::arrow::compute::CastOptions cast_options;
-  cast_options.allow_invalid_utf8 = false;  // avoid spending time validating UTF8 data
+  cast_options.allow_invalid_utf8 = true;  // avoid spending time validating UTF8 data
 
   auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader);
   DCHECK(binary_reader);
@@ -742,20 +756,19 @@ Status TransferColumnData(RecordReader* reader, std::shared_ptr<DataType> value_
     case ::arrow::Type::TIMESTAMP: {
       const ::arrow::TimestampType& timestamp_type =
           checked_cast<::arrow::TimestampType&>(*value_type);
-      switch (timestamp_type.unit()) {
-        case ::arrow::TimeUnit::MILLI:
-        case ::arrow::TimeUnit::MICRO: {
-          result = TransferZeroCopy(reader, value_type);
-        } break;
-        case ::arrow::TimeUnit::NANO: {
-          if (descr->physical_type() == ::parquet::Type::INT96) {
-            RETURN_NOT_OK(TransferInt96(reader, pool, value_type, &result));
-          } else {
+      if (descr->physical_type() == ::parquet::Type::INT96) {
+        RETURN_NOT_OK(
+            TransferInt96(reader, pool, value_type, &result, timestamp_type.unit()));
+      } else {
+        switch (timestamp_type.unit()) {
+          case ::arrow::TimeUnit::MILLI:
+          case ::arrow::TimeUnit::MICRO:
+          case ::arrow::TimeUnit::NANO:
             result = TransferZeroCopy(reader, value_type);
-          }
-        } break;
-        default:
-          return Status::NotImplemented("TimeUnit not supported");
+            break;
+          default:
+            return Status::NotImplemented("TimeUnit not supported");
+        }
       }
     } break;
     default:
diff --git a/cpp/src/parquet/arrow/reader_writer_benchmark.cc b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
index 6f5d195aad6..6445bb02758 100644
--- a/cpp/src/parquet/arrow/reader_writer_benchmark.cc
+++ b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
@@ -33,7 +33,9 @@
 #include "arrow/array/builder_primitive.h"
 #include "arrow/io/memory.h"
 #include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/logging.h"
 
@@ -534,6 +536,7 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
   EXIT_NOT_OK(
       WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE / 10));
   PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
+  std::vector<int> rgs{0, 2, 4, 6, 8};
 
   while (state.KeepRunning()) {
     auto reader =
@@ -541,16 +544,6 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
     std::unique_ptr<FileReader> arrow_reader;
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
-
-    std::vector<std::shared_ptr<::arrow::Table>> tables;
-    std::vector<int> rgs;
-    for (int i = 0; i < arrow_reader->num_row_groups(); i++) {
-      // Only read the even numbered RowGroups
-      if ((i % 2) == 0) {
-        rgs.push_back(i);
-      }
-    }
-
     std::shared_ptr<::arrow::Table> table;
     EXIT_NOT_OK(arrow_reader->ReadRowGroups(rgs, &table));
   }
@@ -559,6 +552,34 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
 
 BENCHMARK(BM_ReadMultipleRowGroups);
 
+static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State& state) {
+  std::vector<int64_t> values(BENCHMARK_SIZE, 128);
+  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  auto output = CreateOutputStream();
+  // This writes 10 RowGroups
+  EXIT_NOT_OK(
+      WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE / 10));
+  PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
+  std::vector<int> rgs{0, 2, 4, 6, 8};
+
+  while (state.KeepRunning()) {
+    auto reader =
+        ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+    std::unique_ptr<FileReader> unique_reader;
+    EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
+                                 &unique_reader));
+    std::shared_ptr<FileReader> arrow_reader = std::move(unique_reader);
+    ASSIGN_OR_ABORT(auto generator,
+                    arrow_reader->GetRecordBatchGenerator(arrow_reader, rgs, {0}));
+    auto fut = ::arrow::CollectAsyncGenerator(generator);
+    ASSIGN_OR_ABORT(auto batches, fut.result());
+    ASSIGN_OR_ABORT(auto actual, ::arrow::Table::FromRecordBatches(std::move(batches)));
+  }
+  SetBytesProcessed<true, Int64Type>(state);
+}
+
+BENCHMARK(BM_ReadMultipleRowGroupsGenerator);
+
 }  // namespace benchmark
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 75813da0b50..eb7fd628dfc 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -30,6 +30,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/value_parsing.h"
 
 #include "parquet/arrow/schema_internal.h"
 #include "parquet/exception.h"
@@ -231,6 +232,40 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
   return Status::OK();
 }
 
+static constexpr char FIELD_ID_KEY[] = "PARQUET:field_id";
+
+std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) {
+  if (field_id >= 0) {
+    return ::arrow::key_value_metadata({FIELD_ID_KEY}, {std::to_string(field_id)});
+  } else {
+    return nullptr;
+  }
+}
+
+int FieldIdFromMetadata(
+    const std::shared_ptr<const ::arrow::KeyValueMetadata>& metadata) {
+  if (!metadata) {
+    return -1;
+  }
+  int key = metadata->FindKey(FIELD_ID_KEY);
+  if (key < 0) {
+    return -1;
+  }
+  std::string field_id_str = metadata->value(key);
+  int field_id;
+  if (::arrow::internal::ParseValue<::arrow::Int32Type>(
+          field_id_str.c_str(), field_id_str.length(), &field_id)) {
+    if (field_id < 0) {
+      // Thrift should convert any negative value to null but normalize to -1 here in case
+      // we later check this in logic.
+      return -1;
+    }
+    return field_id;
+  } else {
+    return -1;
+  }
+}
+
 Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
                    const WriterProperties& properties,
                    const ArrowWriterProperties& arrow_properties, NodePtr* out) {
@@ -387,8 +422,9 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     }
   }
 
+  int field_id = FieldIdFromMetadata(field->metadata());
   PARQUET_CATCH_NOT_OK(*out = PrimitiveNode::Make(name, repetition, logical_type, type,
-                                                  length));
+                                                  length, field_id));
 
   return Status::OK();
 }
@@ -418,7 +454,9 @@ bool IsDictionaryReadSupported(const ArrowType& type) {
 ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
     int column_index, const schema::PrimitiveNode& primitive_node,
     SchemaTreeContext* ctx) {
-  ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> storage_type, GetArrowType(primitive_node));
+  ASSIGN_OR_RAISE(
+      std::shared_ptr<ArrowType> storage_type,
+      GetArrowType(primitive_node, ctx->properties.coerce_int96_timestamp_unit()));
   if (ctx->properties.read_dictionary(column_index) &&
       IsDictionaryReadSupported(*storage_type)) {
     return ::arrow::dictionary(::arrow::int32(), storage_type);
@@ -453,10 +491,6 @@ bool HasStructListName(const GroupNode& node) {
   return name == "array" || name.ends_with("_tuple");
 }
 
-std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) {
-  return ::arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
-}
-
 Status GroupToStruct(const GroupNode& node, LevelInfo current_levels,
                      SchemaTreeContext* ctx, const SchemaField* parent,
                      SchemaField* out) {
diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
index fbdfa09a040..064bf4f55cc 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -179,9 +179,9 @@ Result<std::shared_ptr<ArrowType>> FromInt64(const LogicalType& logical_type) {
   }
 }
 
-Result<std::shared_ptr<ArrowType>> GetArrowType(Type::type physical_type,
-                                                const LogicalType& logical_type,
-                                                int type_length) {
+Result<std::shared_ptr<ArrowType>> GetArrowType(
+    Type::type physical_type, const LogicalType& logical_type, int type_length,
+    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
   if (logical_type.is_invalid() || logical_type.is_null()) {
     return ::arrow::null();
   }
@@ -194,7 +194,7 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(Type::type physical_type,
     case ParquetType::INT64:
       return FromInt64(logical_type);
     case ParquetType::INT96:
-      return ::arrow::timestamp(::arrow::TimeUnit::NANO);
+      return ::arrow::timestamp(int96_arrow_time_unit);
     case ParquetType::FLOAT:
       return ::arrow::float32();
     case ParquetType::DOUBLE:
@@ -211,14 +211,11 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(Type::type physical_type,
   }
 }
 
-Result<std::shared_ptr<ArrowType>> GetArrowType(const schema::PrimitiveNode& primitive) {
+Result<std::shared_ptr<ArrowType>> GetArrowType(
+    const schema::PrimitiveNode& primitive,
+    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
   return GetArrowType(primitive.physical_type(), *primitive.logical_type(),
-                      primitive.type_length());
-}
-
-Result<std::shared_ptr<ArrowType>> GetArrowType(const ColumnDescriptor& descriptor) {
-  return GetArrowType(descriptor.physical_type(), *descriptor.logical_type(),
-                      descriptor.type_length());
+                      primitive.type_length(), int96_arrow_time_unit);
 }
 
 }  // namespace arrow
diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h
index ec0d9571304..fb837c3ee6c 100644
--- a/cpp/src/parquet/arrow/schema_internal.h
+++ b/cpp/src/parquet/arrow/schema_internal.h
@@ -40,9 +40,12 @@ Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type
                                                         int type_length);
 
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
-    const schema::PrimitiveNode& primitive);
+    Type::type physical_type, const LogicalType& logical_type, int type_length,
+    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
-    const ColumnDescriptor& descriptor);
+    const schema::PrimitiveNode& primitive,
+    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
 
 }  // namespace arrow
 }  // namespace parquet
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 2a220231453..2fbebf27fce 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -25,7 +25,6 @@
 #include <vector>
 
 #include "arrow/array.h"
-#include "arrow/buffer_builder.h"
 #include "arrow/extension_type.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/table.h"
@@ -56,7 +55,6 @@ using arrow::ExtensionArray;
 using arrow::ExtensionType;
 using arrow::Field;
 using arrow::FixedSizeBinaryArray;
-using Int16BufferBuilder = arrow::TypedBufferBuilder<int16_t>;
 using arrow::ListArray;
 using arrow::MemoryPool;
 using arrow::NumericArray;
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 60e373c664c..f31f3d03def 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -43,6 +43,10 @@ namespace arrow {
 ///
 /// Start a new RowGroup or Chunk with NewRowGroup.
 /// Write column-by-column the whole column chunk.
+///
+/// If PARQUET:field_id is present as a metadata key on a field, and the corresponding
+/// value is a nonnegative integer, then it will be used as the field_id in the parquet
+/// file.
 class PARQUET_EXPORT FileWriter {
  public:
   static ::arrow::Status Make(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index ec205f3d3f9..d56f163b58a 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -823,6 +823,9 @@ class ColumnReaderImplBase {
   /// DictionaryRecordReader
   bool new_dictionary_;
 
+  // The exposed encoding
+  ExposedEncoding exposed_encoding_ = ExposedEncoding::NO_ENCODING;
+
   // Map of encoding type to the respective decoder object. For example, a
   // column chunk's data pages may include both dictionary-encoded and
   // plain-encoded data.
@@ -861,8 +864,108 @@ class TypedColumnReaderImpl : public TypedColumnReader<DType>,
   Type::type type() const override { return this->descr_->physical_type(); }
 
   const ColumnDescriptor* descr() const override { return this->descr_; }
+
+  ExposedEncoding GetExposedEncoding() override { return this->exposed_encoding_; };
+
+  int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                  int16_t* rep_levels, int32_t* indices,
+                                  int64_t* indices_read, const T** dict,
+                                  int32_t* dict_len) override;
+
+ protected:
+  void SetExposedEncoding(ExposedEncoding encoding) override {
+    this->exposed_encoding_ = encoding;
+  }
+
+ private:
+  // Read dictionary indices. Similar to ReadValues but decode data to dictionary indices.
+  // This function is called only by ReadBatchWithDictionary().
+  int64_t ReadDictionaryIndices(int64_t indices_to_read, int32_t* indices) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    return decoder->DecodeIndices(static_cast<int>(indices_to_read), indices);
+  }
+
+  // Get dictionary. The dictionary should have been set by SetDict(). The dictionary is
+  // owned by the internal decoder and is destroyed when the reader is destroyed. This
+  // function is called only by ReadBatchWithDictionary() after dictionary is configured.
+  void GetDictionary(const T** dictionary, int32_t* dictionary_length) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    decoder->GetDictionary(dictionary, dictionary_length);
+  }
+
+  // Read definition and repetition levels. Also return the number of definition levels
+  // and number of values to read. This function is called before reading values.
+  void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                  int64_t* num_def_levels, int64_t* values_to_read) {
+    batch_size =
+        std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
+
+    // If the field is required and non-repeated, there are no definition levels
+    if (this->max_def_level_ > 0 && def_levels != nullptr) {
+      *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
+      // TODO(wesm): this tallying of values-to-decode can be performed with better
+      // cache-efficiency if fused with the level decoding.
+      for (int64_t i = 0; i < *num_def_levels; ++i) {
+        if (def_levels[i] == this->max_def_level_) {
+          ++(*values_to_read);
+        }
+      }
+    } else {
+      // Required field, read all values
+      *values_to_read = batch_size;
+    }
+
+    // Not present for non-repeated fields
+    if (this->max_rep_level_ > 0 && rep_levels != nullptr) {
+      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
+      if (def_levels != nullptr && *num_def_levels != num_rep_levels) {
+        throw ParquetException("Number of decoded rep / def levels did not match");
+      }
+    }
+  }
 };
 
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatchWithDictionary(
+    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, int32_t* indices,
+    int64_t* indices_read, const T** dict, int32_t* dict_len) {
+  bool has_dict_output = dict != nullptr && dict_len != nullptr;
+  // Similar logic as ReadValues to get pages.
+  if (!HasNext()) {
+    *indices_read = 0;
+    if (has_dict_output) {
+      *dict = nullptr;
+      *dict_len = 0;
+    }
+    return 0;
+  }
+
+  // Verify the current data page is dictionary encoded.
+  if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+    std::stringstream ss;
+    ss << "Data page is not dictionary encoded. Encoding: "
+       << EncodingToString(this->current_encoding_);
+    throw ParquetException(ss.str());
+  }
+
+  // Get dictionary pointer and length.
+  if (has_dict_output) {
+    GetDictionary(dict, dict_len);
+  }
+
+  // Similar logic as ReadValues to get def levels and rep levels.
+  int64_t num_def_levels = 0;
+  int64_t indices_to_read = 0;
+  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &indices_to_read);
+
+  // Read dictionary indices.
+  *indices_read = ReadDictionaryIndices(indices_to_read, indices);
+  int64_t total_indices = std::max(num_def_levels, *indices_read);
+  this->ConsumeBufferedValues(total_indices);
+
+  return total_indices;
+}
+
 template <typename DType>
 int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def_levels,
                                                 int16_t* rep_levels, T* values,
@@ -875,36 +978,9 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def
 
   // TODO(wesm): keep reading data pages until batch_size is reached, or the
   // row group is finished
-  batch_size =
-      std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
-
   int64_t num_def_levels = 0;
-  int64_t num_rep_levels = 0;
-
   int64_t values_to_read = 0;
-
-  // If the field is required and non-repeated, there are no definition levels
-  if (this->max_def_level_ > 0 && def_levels) {
-    num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
-    // TODO(wesm): this tallying of values-to-decode can be performed with better
-    // cache-efficiency if fused with the level decoding.
-    for (int64_t i = 0; i < num_def_levels; ++i) {
-      if (def_levels[i] == this->max_def_level_) {
-        ++values_to_read;
-      }
-    }
-  } else {
-    // Required field, read all values
-    values_to_read = batch_size;
-  }
-
-  // Not present for non-repeated fields
-  if (this->max_rep_level_ > 0 && rep_levels) {
-    num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
-    if (def_levels && num_def_levels != num_rep_levels) {
-      throw ParquetException("Number of decoded rep / def levels did not match");
-    }
-  }
+  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read);
 
   *values_read = this->ReadValues(values_to_read, values);
   int64_t total_values = std::max(num_def_levels, *values_read);
@@ -1013,8 +1089,9 @@ int64_t TypedColumnReaderImpl<DType>::Skip(int64_t num_rows_to_skip) {
 
       // This will be enough scratch space to accommodate 16-bit levels or any
       // value type
+      int value_size = type_traits<DType::type_num>::value_byte_size;
       std::shared_ptr<ResizableBuffer> scratch = AllocateBuffer(
-          this->pool_, batch_size * type_traits<DType::type_num>::value_byte_size);
+          this->pool_, batch_size * std::max<int>(sizeof(int16_t), value_size));
 
       do {
         batch_size = std::min(batch_size, rows_to_skip);
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index a73bba6cb4e..8c48e4d7843 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -128,6 +128,19 @@ class PARQUET_EXPORT ColumnReader {
   virtual Type::type type() const = 0;
 
   virtual const ColumnDescriptor* descr() const = 0;
+
+  // Get the encoding that can be exposed by this reader. If it returns
+  // dictionary encoding, then ReadBatchWithDictionary can be used to read data.
+  //
+  // \note API EXPERIMENTAL
+  virtual ExposedEncoding GetExposedEncoding() = 0;
+
+ protected:
+  friend class RowGroupReader;
+  // Set the encoding that can be exposed by this reader.
+  //
+  // \note API EXPERIMENTAL
+  virtual void SetExposedEncoding(ExposedEncoding encoding) = 0;
 };
 
 // API to read values from a single column. This is a main client facing API.
@@ -201,6 +214,36 @@ class TypedColumnReader : public ColumnReader {
   // Skip reading levels
   // Returns the number of levels skipped
   virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
+
+  // Read a batch of repetition levels, definition levels, and indices from the
+  // column. And read the dictionary if a dictionary page is encountered during
+  // reading pages. This API is similar to ReadBatch(), with ability to read
+  // dictionary and indices. It is only valid to call this method  when the reader can
+  // expose dictionary encoding. (i.e., the reader's GetExposedEncoding() returns
+  // DICTIONARY).
+  //
+  // The dictionary is read along with the data page. When there's no data page,
+  // the dictionary won't be returned.
+  //
+  // @param batch_size The batch size to read
+  // @param[out] def_levels The Parquet definition levels.
+  // @param[out] rep_levels The Parquet repetition levels.
+  // @param[out] indices The dictionary indices.
+  // @param[out] indices_read The number of indices read.
+  // @param[out] dict The pointer to dictionary values. It will return nullptr if
+  // there's no data page. Each column chunk only has one dictionary page. The dictionary
+  // is owned by the reader, so the caller is responsible for copying the dictionary
+  // values before the reader gets destroyed.
+  // @param[out] dict_len The dictionary length. It will return 0 if there's no data
+  // page.
+  // @returns: actual number of levels read (see indices_read for number of
+  // indices read
+  //
+  // \note API EXPERIMENTAL
+  virtual int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                          int16_t* rep_levels, int32_t* indices,
+                                          int64_t* indices_read, const T** dict,
+                                          int32_t* dict_len) = 0;
 };
 
 namespace internal {
diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc
index f0025f4c3a9..a50610bb8a2 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -27,6 +27,7 @@
 #include <vector>
 
 #include "arrow/testing/macros.h"
+#include "arrow/util/make_unique.h"
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
 #include "parquet/schema.h"
@@ -386,5 +387,90 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
   pages_.clear();
 }
 
+TEST_F(TestPrimitiveReader, TestDictionaryEncodedPagesWithExposeEncoding) {
+  max_def_level_ = 0;
+  max_rep_level_ = 0;
+  int levels_per_page = 100;
+  int num_pages = 5;
+  std::vector<int16_t> def_levels;
+  std::vector<int16_t> rep_levels;
+  std::vector<ByteArray> values;
+  std::vector<uint8_t> buffer;
+  NodePtr type = schema::ByteArray("a", Repetition::REQUIRED);
+  const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
+
+  // Fully dictionary encoded
+  MakePages<ByteArrayType>(&descr, num_pages, levels_per_page, def_levels, rep_levels,
+                           values, buffer, pages_, Encoding::RLE_DICTIONARY);
+  InitReader(&descr);
+
+  auto reader = static_cast<ByteArrayReader*>(reader_.get());
+  const ByteArray* dict = nullptr;
+  int32_t dict_len = 0;
+  int64_t total_indices = 0;
+  int64_t indices_read = 0;
+  int64_t value_size = values.size();
+  auto indices = ::arrow::internal::make_unique<int32_t[]>(value_size);
+  while (total_indices < value_size && reader->HasNext()) {
+    const ByteArray* tmp_dict = nullptr;
+    int32_t tmp_dict_len = 0;
+    EXPECT_NO_THROW(reader->ReadBatchWithDictionary(
+        value_size, /*def_levels=*/nullptr,
+        /*rep_levels=*/nullptr, indices.get() + total_indices, &indices_read, &tmp_dict,
+        &tmp_dict_len));
+    if (tmp_dict != nullptr) {
+      // Dictionary is read along with data
+      EXPECT_GT(indices_read, 0);
+      dict = tmp_dict;
+      dict_len = tmp_dict_len;
+    } else {
+      // Dictionary is not read when there's no data
+      EXPECT_EQ(indices_read, 0);
+    }
+    total_indices += indices_read;
+  }
+
+  EXPECT_EQ(total_indices, value_size);
+  for (int64_t i = 0; i < total_indices; ++i) {
+    EXPECT_LT(indices[i], dict_len);
+    EXPECT_EQ(dict[indices[i]].len, values[i].len);
+    EXPECT_EQ(memcmp(dict[indices[i]].ptr, values[i].ptr, values[i].len), 0);
+  }
+  pages_.clear();
+}
+
+TEST_F(TestPrimitiveReader, TestNonDictionaryEncodedPagesWithExposeEncoding) {
+  max_def_level_ = 0;
+  max_rep_level_ = 0;
+  int64_t value_size = 100;
+  std::vector<int32_t> values(value_size, 0);
+  NodePtr type = schema::Int32("a", Repetition::REQUIRED);
+  const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
+
+  // The data page falls back to plain encoding
+  std::shared_ptr<ResizableBuffer> dummy = AllocateBuffer();
+  std::shared_ptr<DictionaryPage> dict_page =
+      std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN);
+  std::shared_ptr<DataPageV1> data_page = MakeDataPage<Int32Type>(
+      &descr, values, static_cast<int>(value_size), Encoding::PLAIN, /*indices=*/{},
+      /*indices_size=*/0, /*def_levels=*/{}, /*max_def_level=*/0, /*rep_levels=*/{},
+      /*max_rep_level=*/0);
+  pages_.push_back(dict_page);
+  pages_.push_back(data_page);
+  InitReader(&descr);
+
+  auto reader = static_cast<ByteArrayReader*>(reader_.get());
+  const ByteArray* dict = nullptr;
+  int32_t dict_len = 0;
+  int64_t indices_read = 0;
+  auto indices = ::arrow::internal::make_unique<int32_t[]>(value_size);
+  // Dictionary cannot be exposed when it's not fully dictionary encoded
+  EXPECT_THROW(reader->ReadBatchWithDictionary(value_size, /*def_levels=*/nullptr,
+                                               /*rep_levels=*/nullptr, indices.get(),
+                                               &indices_read, &dict, &dict_len),
+               ParquetException);
+  pages_.clear();
+}
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_scanner_test.cc b/cpp/src/parquet/column_scanner_test.cc
index ea54319babe..f6d162e3db7 100644
--- a/cpp/src/parquet/column_scanner_test.cc
+++ b/cpp/src/parquet/column_scanner_test.cc
@@ -39,12 +39,6 @@ using schema::NodePtr;
 
 namespace test {
 
-template <>
-void InitDictValues<bool>(int num_values, int dict_per_page, std::vector<bool>& values,
-                          std::vector<uint8_t>& buffer) {
-  // No op for bool
-}
-
 template <typename Type>
 class TestFlatScanner : public ::testing::Test {
  public:
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index eeeff1c8f9b..6e8f7ee5491 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -341,7 +341,6 @@ class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEnco
       // no nulls, just dump the data
       ::arrow::internal::CopyBitmap(data.data()->GetValues<uint8_t>(1), data.offset(),
                                     data.length(), sink_.mutable_data(), sink_.length());
-      sink_.UnsafeAdvance(data.length());
     } else {
       auto n_valid = BitUtil::BytesForBits(data.length() - data.null_count());
       PARQUET_THROW_NOT_OK(sink_.Reserve(n_valid));
@@ -360,6 +359,7 @@ class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEnco
       }
       writer.Finish();
     }
+    sink_.UnsafeAdvance(data.length());
   }
 
  private:
@@ -861,20 +861,31 @@ class ByteStreamSplitEncoder : public EncoderImpl, virtual public TypedEncoder<D
                  int64_t valid_bits_offset) override;
 
  protected:
-  ::arrow::TypedBufferBuilder<T> values_;
+  template <typename ArrowType>
+  void PutImpl(const ::arrow::Array& values) {
+    if (values.type_id() != ArrowType::type_id) {
+      throw ParquetException(std::string() + "direct put to " + ArrowType::type_name() +
+                             " from " + values.type()->ToString() + " not supported");
+    }
+    const auto& data = *values.data();
+    PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
+              static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0), data.offset);
+  }
 
- private:
-  void PutArrowArray(const ::arrow::Array& values);
+  ::arrow::BufferBuilder sink_;
+  int64_t num_values_in_buffer_;
 };
 
 template <typename DType>
 ByteStreamSplitEncoder<DType>::ByteStreamSplitEncoder(const ColumnDescriptor* descr,
                                                       ::arrow::MemoryPool* pool)
-    : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool), values_{pool} {}
+    : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
+      sink_{pool},
+      num_values_in_buffer_{0} {}
 
 template <typename DType>
 int64_t ByteStreamSplitEncoder<DType>::EstimatedDataEncodedSize() {
-  return values_.length() * sizeof(T);
+  return sink_.length();
 }
 
 template <typename DType>
@@ -882,34 +893,30 @@ std::shared_ptr<Buffer> ByteStreamSplitEncoder<DType>::FlushValues() {
   std::shared_ptr<ResizableBuffer> output_buffer =
       AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
   uint8_t* output_buffer_raw = output_buffer->mutable_data();
-  const size_t num_values = values_.length();
-  const uint8_t* raw_values = reinterpret_cast<const uint8_t*>(values_.data());
-  ::arrow::util::internal::ByteStreamSplitEncode<T>(raw_values, num_values,
+  const uint8_t* raw_values = sink_.data();
+  ::arrow::util::internal::ByteStreamSplitEncode<T>(raw_values, num_values_in_buffer_,
                                                     output_buffer_raw);
-  values_.Reset();
+  sink_.Reset();
+  num_values_in_buffer_ = 0;
   return std::move(output_buffer);
 }
 
 template <typename DType>
 void ByteStreamSplitEncoder<DType>::Put(const T* buffer, int num_values) {
-  if (num_values > 0) PARQUET_THROW_NOT_OK(values_.Append(buffer, num_values));
-}
-
-template <typename DType>
-void ByteStreamSplitEncoder<DType>::Put(const ::arrow::Array& values) {
-  PutArrowArray(values);
+  if (num_values > 0) {
+    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
+    num_values_in_buffer_ += num_values;
+  }
 }
 
 template <>
-void ByteStreamSplitEncoder<FloatType>::PutArrowArray(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::FloatArray>(values,
-                                     reinterpret_cast<::arrow::BufferBuilder*>(&values_));
+void ByteStreamSplitEncoder<FloatType>::Put(const ::arrow::Array& values) {
+  PutImpl<::arrow::FloatType>(values);
 }
 
 template <>
-void ByteStreamSplitEncoder<DoubleType>::PutArrowArray(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::DoubleArray>(
-      values, reinterpret_cast<::arrow::BufferBuilder*>(&values_));
+void ByteStreamSplitEncoder<DoubleType>::Put(const ::arrow::Array& values) {
+  PutImpl<::arrow::DoubleType>(values);
 }
 
 template <typename DType>
@@ -1569,6 +1576,19 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
     return num_values;
   }
 
+  int DecodeIndices(int num_values, int32_t* indices) override {
+    if (num_values != idx_decoder_.GetBatch(indices, num_values)) {
+      ParquetException::EofException();
+    }
+    num_values_ -= num_values;
+    return num_values;
+  }
+
+  void GetDictionary(const T** dictionary, int32_t* dictionary_length) override {
+    *dictionary_length = dictionary_length_;
+    *dictionary = reinterpret_cast<T*>(dictionary_->mutable_data());
+  }
+
  protected:
   Status IndexInBounds(int32_t index) {
     if (ARROW_PREDICT_TRUE(0 <= index && index < dictionary_length_)) {
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index a3d8e012b6a..b9ca7a7ee68 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -350,6 +350,8 @@ class TypedDecoder : virtual public Decoder {
 template <typename DType>
 class DictDecoder : virtual public TypedDecoder<DType> {
  public:
+  using T = typename DType::c_type;
+
   virtual void SetDict(TypedDecoder<DType>* dictionary) = 0;
 
   /// \brief Insert dictionary values into the Arrow dictionary builder's memo,
@@ -371,6 +373,22 @@ class DictDecoder : virtual public TypedDecoder<DType> {
   /// \warning Remember to reset the builder each time the dict decoder is initialized
   /// with a new dictionary page
   virtual int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices (no nulls). Same as above
+  /// DecodeIndices but target is an array instead of a builder.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual int DecodeIndices(int num_values, int32_t* indices) = 0;
+
+  /// \brief Get dictionary. The reader will call this API when it encounters a
+  /// new dictionary.
+  ///
+  /// @param[out] dictionary The pointer to dictionary values. Dictionary is owned by
+  /// the decoder and is destroyed when the decoder is destroyed.
+  /// @param[out] dictionary_length The dictionary length.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual void GetDictionary(const T** dictionary, int32_t* dictionary_length) = 0;
 };
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc
index 02e81becd47..d271d59ef27 100644
--- a/cpp/src/parquet/encoding_test.cc
+++ b/cpp/src/parquet/encoding_test.cc
@@ -669,7 +669,7 @@ class EncodingAdHocTyped : public ::testing::Test {
     std::shared_ptr<::arrow::Array> result;
     ASSERT_OK(acc.Finish(&result));
     ASSERT_EQ(50, result->length());
-    ::arrow::AssertArraysEqual(*values, *result);
+    ::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
   }
 
   void ByteStreamSplit(int seed) {
diff --git a/cpp/src/parquet/encryption/test_encryption_util.cc b/cpp/src/parquet/encryption/test_encryption_util.cc
index 8fe048e3bcd..8b83154c96c 100644
--- a/cpp/src/parquet/encryption/test_encryption_util.cc
+++ b/cpp/src/parquet/encryption/test_encryption_util.cc
@@ -23,6 +23,7 @@
 
 #include <arrow/io/file.h>
 
+#include "arrow/testing/future_util.h"
 #include "parquet/encryption/test_encryption_util.h"
 #include "parquet/file_reader.h"
 #include "parquet/file_writer.h"
@@ -284,6 +285,7 @@ void FileEncryptor::EncryptFile(
 
   // Close the ParquetFileWriter
   file_writer->Close();
+  PARQUET_THROW_NOT_OK(out_file->Close());
 
   return;
 }  // namespace test
@@ -334,8 +336,27 @@ void FileDecryptor::DecryptFile(
     reader_properties.file_decryption_properties(file_decryption_properties->DeepClone());
   }
 
-  auto file_reader = parquet::ParquetFileReader::OpenFile(file, false, reader_properties);
+  std::shared_ptr<::arrow::io::RandomAccessFile> source;
+  PARQUET_ASSIGN_OR_THROW(
+      source, ::arrow::io::ReadableFile::Open(file, reader_properties.memory_pool()));
 
+  auto file_reader = parquet::ParquetFileReader::Open(source, reader_properties);
+  CheckFile(file_reader.get(), file_decryption_properties.get());
+
+  if (file_decryption_properties) {
+    reader_properties.file_decryption_properties(file_decryption_properties->DeepClone());
+  }
+  auto fut = parquet::ParquetFileReader::OpenAsync(source, reader_properties);
+  ASSERT_FINISHES_OK(fut);
+  ASSERT_OK_AND_ASSIGN(file_reader, fut.MoveResult());
+  CheckFile(file_reader.get(), file_decryption_properties.get());
+
+  file_reader->Close();
+  PARQUET_THROW_NOT_OK(source->Close());
+}
+
+void FileDecryptor::CheckFile(parquet::ParquetFileReader* file_reader,
+                              FileDecryptionProperties* file_decryption_properties) {
   // Get the File MetaData
   std::shared_ptr<parquet::FileMetaData> file_metadata = file_reader->metadata();
 
@@ -474,7 +495,6 @@ void FileDecryptor::DecryptFile(
     // make sure we got the same number of values the metadata says
     ASSERT_EQ(flba_md->num_values(), i);
   }
-  file_reader->Close();
 }
 
 }  // namespace test
diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h
index 32790950f84..b5d71b9954f 100644
--- a/cpp/src/parquet/encryption/test_encryption_util.h
+++ b/cpp/src/parquet/encryption/test_encryption_util.h
@@ -33,6 +33,7 @@
 #include "parquet/test_util.h"
 
 namespace parquet {
+class ParquetFileReader;
 namespace encryption {
 namespace test {
 
@@ -106,6 +107,10 @@ class FileDecryptor {
  public:
   void DecryptFile(std::string file_name,
                    std::shared_ptr<FileDecryptionProperties> file_decryption_properties);
+
+ private:
+  void CheckFile(parquet::ParquetFileReader* file_reader,
+                 FileDecryptionProperties* file_decryption_properties);
 };
 
 }  // namespace test
diff --git a/cpp/src/parquet/exception.h b/cpp/src/parquet/exception.h
index bfd1bfd9422..826f5bdc8bf 100644
--- a/cpp/src/parquet/exception.h
+++ b/cpp/src/parquet/exception.h
@@ -33,23 +33,29 @@
 
 // Parquet exception to Arrow Status
 
-#define PARQUET_CATCH_NOT_OK(s)                          \
-  try {                                                  \
-    (s);                                                 \
-  } catch (const ::parquet::ParquetStatusException& e) { \
-    return e.status();                                   \
-  } catch (const ::parquet::ParquetException& e) {       \
-    return ::arrow::Status::IOError(e.what());           \
+#define BEGIN_PARQUET_CATCH_EXCEPTIONS try {
+#define END_PARQUET_CATCH_EXCEPTIONS                   \
+  }                                                    \
+  catch (const ::parquet::ParquetStatusException& e) { \
+    return e.status();                                 \
+  }                                                    \
+  catch (const ::parquet::ParquetException& e) {       \
+    return ::arrow::Status::IOError(e.what());         \
   }
 
-#define PARQUET_CATCH_AND_RETURN(s)                      \
-  try {                                                  \
-    return (s);                                          \
-  } catch (const ::parquet::ParquetStatusException& e) { \
-    return e.status();                                   \
-  } catch (const ::parquet::ParquetException& e) {       \
-    return ::arrow::Status::IOError(e.what());           \
-  }
+// clang-format off
+
+#define PARQUET_CATCH_NOT_OK(s)    \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS   \
+  (s);                             \
+  END_PARQUET_CATCH_EXCEPTIONS
+
+// clang-format on
+
+#define PARQUET_CATCH_AND_RETURN(s) \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS    \
+  return (s);                       \
+  END_PARQUET_CATCH_EXCEPTIONS
 
 // Arrow Status to Parquet exception
 
@@ -149,11 +155,4 @@ void ThrowNotOk(StatusReturnBlock&& b) {
   PARQUET_THROW_NOT_OK(b());
 }
 
-#define BEGIN_PARQUET_CATCH_EXCEPTIONS try {
-#define END_PARQUET_CATCH_EXCEPTIONS             \
-  }                                              \
-  catch (const ::parquet::ParquetException& e) { \
-    return ::arrow::Status::IOError(e.what());   \
-  }
-
 }  // namespace parquet
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 7ac0c9d86a8..4e38901aa0d 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
+#include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 #include "parquet/column_reader.h"
@@ -44,6 +45,8 @@
 #include "parquet/schema.h"
 #include "parquet/types.h"
 
+using arrow::internal::AddWithOverflow;
+
 namespace parquet {
 
 // PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file
@@ -74,6 +77,45 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
       const_cast<ReaderProperties*>(contents_->properties())->memory_pool());
 }
 
+std::shared_ptr<ColumnReader> RowGroupReader::ColumnWithExposeEncoding(
+    int i, ExposedEncoding encoding_to_expose) {
+  std::shared_ptr<ColumnReader> reader = Column(i);
+
+  if (encoding_to_expose == ExposedEncoding::DICTIONARY) {
+    // Check the encoding_stats to see if all data pages are dictionary encoded.
+    std::unique_ptr<ColumnChunkMetaData> col = metadata()->ColumnChunk(i);
+    const std::vector<PageEncodingStats>& encoding_stats = col->encoding_stats();
+    if (encoding_stats.empty()) {
+      // Some parquet files may have empty encoding_stats. In this case we are
+      // not sure whether all data pages are dictionary encoded. So we do not
+      // enable exposing dictionary.
+      return reader;
+    }
+    // The 1st page should be the dictionary page.
+    if (encoding_stats[0].page_type != PageType::DICTIONARY_PAGE ||
+        (encoding_stats[0].encoding != Encoding::PLAIN &&
+         encoding_stats[0].encoding != Encoding::PLAIN_DICTIONARY)) {
+      return reader;
+    }
+    // The following pages should be dictionary encoded data pages.
+    for (size_t idx = 1; idx < encoding_stats.size(); ++idx) {
+      if ((encoding_stats[idx].encoding != Encoding::RLE_DICTIONARY &&
+           encoding_stats[idx].encoding != Encoding::PLAIN_DICTIONARY) ||
+          (encoding_stats[idx].page_type != PageType::DATA_PAGE &&
+           encoding_stats[idx].page_type != PageType::DATA_PAGE_V2)) {
+        return reader;
+      }
+    }
+  } else {
+    // Exposing other encodings are not supported for now.
+    return reader;
+  }
+
+  // Set exposed encoding.
+  reader->SetExposedEncoding(encoding_to_expose);
+  return reader;
+}
+
 std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
   if (i >= metadata()->num_columns()) {
     std::stringstream ss;
@@ -103,13 +145,18 @@ ::arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata,
   }
 
   int64_t col_length = column_metadata->total_compressed_size();
+  int64_t col_end;
+  if (AddWithOverflow(col_start, col_length, &col_end) || col_end > source_size) {
+    throw ParquetException("Invalid column metadata (corrupt file?)");
+  }
+
   // PARQUET-816 workaround for old files created by older parquet-mr
   const ApplicationVersion& version = file_metadata->writer_version();
   if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) {
     // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the
     // dictionary page header size in total_compressed_size and total_uncompressed_size
     // (see IMPALA-694). We add padding to compensate.
-    int64_t bytes_remaining = source_size - (col_start + col_length);
+    int64_t bytes_remaining = source_size - col_end;
     int64_t padding = std::min<int64_t>(kMaxDictHeaderSize, bytes_remaining);
     col_length += padding;
   }
@@ -250,10 +297,10 @@ class SerializedFile : public ParquetFileReader::Contents {
     file_metadata_ = std::move(metadata);
   }
 
-  ::arrow::Future<> PreBuffer(const std::vector<int>& row_groups,
-                              const std::vector<int>& column_indices,
-                              const ::arrow::io::IOContext& ctx,
-                              const ::arrow::io::CacheOptions& options) {
+  void PreBuffer(const std::vector<int>& row_groups,
+                 const std::vector<int>& column_indices,
+                 const ::arrow::io::IOContext& ctx,
+                 const ::arrow::io::CacheOptions& options) {
     cached_source_ =
         std::make_shared<::arrow::io::internal::ReadRangeCache>(source_, ctx, options);
     std::vector<::arrow::io::ReadRange> ranges;
@@ -264,10 +311,79 @@ class SerializedFile : public ParquetFileReader::Contents {
       }
     }
     PARQUET_THROW_NOT_OK(cached_source_->Cache(ranges));
-    return cached_source_->Wait();
   }
 
+  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups,
+                                 const std::vector<int>& column_indices) const {
+    if (!cached_source_) {
+      return ::arrow::Status::Invalid("Must call PreBuffer before WhenBuffered");
+    }
+    std::vector<::arrow::io::ReadRange> ranges;
+    for (int row : row_groups) {
+      for (int col : column_indices) {
+        ranges.push_back(
+            ComputeColumnChunkRange(file_metadata_.get(), source_size_, row, col));
+      }
+    }
+    return cached_source_->WaitFor(ranges);
+  }
+
+  // Metadata/footer parsing. Divided up to separate sync/async paths, and to use
+  // exceptions for error handling (with the async path converting to Future/Status).
+
   void ParseMetaData() {
+    int64_t footer_read_size = GetFooterReadSize();
+    PARQUET_ASSIGN_OR_THROW(
+        auto footer_buffer,
+        source_->ReadAt(source_size_ - footer_read_size, footer_read_size));
+    uint32_t metadata_len = ParseFooterLength(footer_buffer, footer_read_size);
+    int64_t metadata_start = source_size_ - kFooterSize - metadata_len;
+
+    std::shared_ptr<::arrow::Buffer> metadata_buffer;
+    if (footer_read_size >= (metadata_len + kFooterSize)) {
+      metadata_buffer = SliceBuffer(
+          footer_buffer, footer_read_size - metadata_len - kFooterSize, metadata_len);
+    } else {
+      PARQUET_ASSIGN_OR_THROW(metadata_buffer,
+                              source_->ReadAt(metadata_start, metadata_len));
+    }
+
+    // Parse the footer depending on encryption type
+    const bool is_encrypted_footer =
+        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0;
+    if (is_encrypted_footer) {
+      // Encrypted file with Encrypted footer.
+      const std::pair<int64_t, uint32_t> read_size =
+          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len);
+      // Read the actual footer
+      metadata_start = read_size.first;
+      metadata_len = read_size.second;
+      PARQUET_ASSIGN_OR_THROW(metadata_buffer,
+                              source_->ReadAt(metadata_start, metadata_len));
+      // Fall through
+    }
+
+    const uint32_t read_metadata_len =
+        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len);
+    auto file_decryption_properties = properties_.file_decryption_properties().get();
+    if (is_encrypted_footer) {
+      // Nothing else to do here.
+      return;
+    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
+      if (file_decryption_properties != nullptr) {
+        if (!file_decryption_properties->plaintext_files_allowed()) {
+          throw ParquetException("Applying decryption properties on plaintext file");
+        }
+      }
+    } else {
+      // Encrypted file with plaintext footer mode.
+      ParseMetaDataOfEncryptedFileWithPlaintextFooter(
+          file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len);
+    }
+  }
+
+  // Validate the source size and get the initial read size.
+  int64_t GetFooterReadSize() {
     if (source_size_ == 0) {
       throw ParquetInvalidOrCorruptedFileException("Parquet file size is 0 bytes");
     } else if (source_size_ < kFooterSize) {
@@ -275,12 +391,12 @@ class SerializedFile : public ParquetFileReader::Contents {
           "Parquet file size is ", source_size_,
           " bytes, smaller than the minimum file footer (", kFooterSize, " bytes)");
     }
+    return std::min(source_size_, kDefaultFooterReadSize);
+  }
 
-    int64_t footer_read_size = std::min(source_size_, kDefaultFooterReadSize);
-    PARQUET_ASSIGN_OR_THROW(
-        auto footer_buffer,
-        source_->ReadAt(source_size_ - footer_read_size, footer_read_size));
-
+  // Validate the magic bytes and get the length of the full footer.
+  uint32_t ParseFooterLength(const std::shared_ptr<::arrow::Buffer>& footer_buffer,
+                             const int64_t footer_read_size) {
     // Check if all bytes are read. Check if last 4 bytes read have the magic bits
     if (footer_buffer->size() != footer_read_size ||
         (memcmp(footer_buffer->data() + footer_read_size - 4, kParquetMagic, 4) != 0 &&
@@ -289,21 +405,91 @@ class SerializedFile : public ParquetFileReader::Contents {
           "Parquet magic bytes not found in footer. Either the file is corrupted or this "
           "is not a parquet file.");
     }
+    // Both encrypted/unencrypted footers have the same footer length check.
+    uint32_t metadata_len = ::arrow::util::SafeLoadAs<uint32_t>(
+        reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
+        kFooterSize);
+    if (metadata_len > source_size_ - kFooterSize) {
+      throw ParquetInvalidOrCorruptedFileException(
+          "Parquet file size is ", source_size_,
+          " bytes, smaller than the size reported by footer's (", metadata_len, "bytes)");
+    }
+    return metadata_len;
+  }
+
+  // Does not throw.
+  ::arrow::Future<> ParseMetaDataAsync() {
+    int64_t footer_read_size;
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    footer_read_size = GetFooterReadSize();
+    END_PARQUET_CATCH_EXCEPTIONS
+    // Assumes this is kept alive externally
+    return source_->ReadAsync(source_size_ - footer_read_size, footer_read_size)
+        .Then([=](const std::shared_ptr<::arrow::Buffer>& footer_buffer)
+                  -> ::arrow::Future<> {
+          uint32_t metadata_len;
+          BEGIN_PARQUET_CATCH_EXCEPTIONS
+          metadata_len = ParseFooterLength(footer_buffer, footer_read_size);
+          END_PARQUET_CATCH_EXCEPTIONS
+          int64_t metadata_start = source_size_ - kFooterSize - metadata_len;
+
+          std::shared_ptr<::arrow::Buffer> metadata_buffer;
+          if (footer_read_size >= (metadata_len + kFooterSize)) {
+            metadata_buffer =
+                SliceBuffer(footer_buffer, footer_read_size - metadata_len - kFooterSize,
+                            metadata_len);
+            return ParseMaybeEncryptedMetaDataAsync(footer_buffer,
+                                                    std::move(metadata_buffer),
+                                                    footer_read_size, metadata_len);
+          }
+          return source_->ReadAsync(metadata_start, metadata_len)
+              .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) {
+                return ParseMaybeEncryptedMetaDataAsync(footer_buffer, metadata_buffer,
+                                                        footer_read_size, metadata_len);
+              });
+        });
+  }
 
-    if (memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0) {
+  // Continuation
+  ::arrow::Future<> ParseMaybeEncryptedMetaDataAsync(
+      std::shared_ptr<::arrow::Buffer> footer_buffer,
+      std::shared_ptr<::arrow::Buffer> metadata_buffer, int64_t footer_read_size,
+      uint32_t metadata_len) {
+    // Parse the footer depending on encryption type
+    const bool is_encrypted_footer =
+        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0;
+    if (is_encrypted_footer) {
       // Encrypted file with Encrypted footer.
-      ParseMetaDataOfEncryptedFileWithEncryptedFooter(footer_buffer, footer_read_size);
-      return;
+      std::pair<int64_t, uint32_t> read_size;
+      BEGIN_PARQUET_CATCH_EXCEPTIONS
+      read_size =
+          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len);
+      END_PARQUET_CATCH_EXCEPTIONS
+      // Read the actual footer
+      int64_t metadata_start = read_size.first;
+      metadata_len = read_size.second;
+      return source_->ReadAsync(metadata_start, metadata_len)
+          .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) {
+            // Continue and read the file footer
+            return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer);
+          });
     }
+    return ParseMetaDataFinal(std::move(metadata_buffer), metadata_len,
+                              is_encrypted_footer);
+  }
 
-    // No encryption or encryption with plaintext footer mode.
-    std::shared_ptr<Buffer> metadata_buffer;
-    uint32_t metadata_len, read_metadata_len;
-    ParseUnencryptedFileMetadata(footer_buffer, footer_read_size, &metadata_buffer,
-                                 &metadata_len, &read_metadata_len);
-
+  // Continuation
+  ::arrow::Status ParseMetaDataFinal(std::shared_ptr<::arrow::Buffer> metadata_buffer,
+                                     uint32_t metadata_len,
+                                     const bool is_encrypted_footer) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    const uint32_t read_metadata_len =
+        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len);
     auto file_decryption_properties = properties_.file_decryption_properties().get();
-    if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
+    if (is_encrypted_footer) {
+      // Nothing else to do here.
+      return ::arrow::Status::OK();
+    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
       if (file_decryption_properties != nullptr) {
         if (!file_decryption_properties->plaintext_files_allowed()) {
           throw ParquetException("Applying decryption properties on plaintext file");
@@ -314,6 +500,8 @@ class SerializedFile : public ParquetFileReader::Contents {
       ParseMetaDataOfEncryptedFileWithPlaintextFooter(
           file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len);
     }
+    END_PARQUET_CATCH_EXCEPTIONS
+    return ::arrow::Status::OK();
   }
 
  private:
@@ -325,10 +513,9 @@ class SerializedFile : public ParquetFileReader::Contents {
 
   std::shared_ptr<InternalFileDecryptor> file_decryptor_;
 
-  void ParseUnencryptedFileMetadata(const std::shared_ptr<Buffer>& footer_buffer,
-                                    int64_t footer_read_size,
-                                    std::shared_ptr<Buffer>* metadata_buffer,
-                                    uint32_t* metadata_len, uint32_t* read_metadata_len);
+  // \return The true length of the metadata in bytes
+  uint32_t ParseUnencryptedFileMetadata(const std::shared_ptr<Buffer>& footer_buffer,
+                                        const uint32_t metadata_len);
 
   std::string HandleAadPrefix(FileDecryptionProperties* file_decryption_properties,
                               EncryptionAlgorithm& algo);
@@ -338,68 +525,36 @@ class SerializedFile : public ParquetFileReader::Contents {
       const std::shared_ptr<Buffer>& metadata_buffer, uint32_t metadata_len,
       uint32_t read_metadata_len);
 
-  void ParseMetaDataOfEncryptedFileWithEncryptedFooter(
-      const std::shared_ptr<Buffer>& footer_buffer, int64_t footer_read_size);
+  // \return The position and size of the actual footer
+  std::pair<int64_t, uint32_t> ParseMetaDataOfEncryptedFileWithEncryptedFooter(
+      const std::shared_ptr<Buffer>& crypto_metadata_buffer, uint32_t footer_len);
 };
 
-void SerializedFile::ParseUnencryptedFileMetadata(
-    const std::shared_ptr<Buffer>& footer_buffer, int64_t footer_read_size,
-    std::shared_ptr<Buffer>* metadata_buffer, uint32_t* metadata_len,
-    uint32_t* read_metadata_len) {
-  *metadata_len = ::arrow::util::SafeLoadAs<uint32_t>(
-      reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
-      kFooterSize);
-  int64_t metadata_start = source_size_ - kFooterSize - *metadata_len;
-  if (*metadata_len > source_size_ - kFooterSize) {
-    throw ParquetInvalidOrCorruptedFileException(
-        "Parquet file size is ", source_size_,
-        " bytes, smaller than the size reported by metadata (", metadata_len, "bytes)");
-  }
-
-  // Check if the footer_buffer contains the entire metadata
-  if (footer_read_size >= (*metadata_len + kFooterSize)) {
-    *metadata_buffer = SliceBuffer(
-        footer_buffer, footer_read_size - *metadata_len - kFooterSize, *metadata_len);
-  } else {
-    PARQUET_ASSIGN_OR_THROW(*metadata_buffer,
-                            source_->ReadAt(metadata_start, *metadata_len));
-    if ((*metadata_buffer)->size() != *metadata_len) {
-      throw ParquetException("Failed reading metadata buffer (requested " +
-                             std::to_string(*metadata_len) + " bytes but got " +
-                             std::to_string((*metadata_buffer)->size()) + " bytes)");
-    }
+uint32_t SerializedFile::ParseUnencryptedFileMetadata(
+    const std::shared_ptr<Buffer>& metadata_buffer, const uint32_t metadata_len) {
+  if (metadata_buffer->size() != metadata_len) {
+    throw ParquetException("Failed reading metadata buffer (requested " +
+                           std::to_string(metadata_len) + " bytes but got " +
+                           std::to_string(metadata_buffer->size()) + " bytes)");
   }
-
-  *read_metadata_len = *metadata_len;
-  file_metadata_ = FileMetaData::Make((*metadata_buffer)->data(), read_metadata_len);
+  uint32_t read_metadata_len = metadata_len;
+  // The encrypted read path falls through to here, so pass in the decryptor
+  file_metadata_ =
+      FileMetaData::Make(metadata_buffer->data(), &read_metadata_len, file_decryptor_);
+  return read_metadata_len;
 }
 
-void SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter(
-    const std::shared_ptr<Buffer>& footer_buffer, int64_t footer_read_size) {
+std::pair<int64_t, uint32_t>
+SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter(
+    const std::shared_ptr<::arrow::Buffer>& crypto_metadata_buffer,
+    // both metadata & crypto metadata length
+    const uint32_t footer_len) {
   // encryption with encrypted footer
-  // both metadata & crypto metadata length
-  uint32_t footer_len = ::arrow::util::SafeLoadAs<uint32_t>(
-      reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
-      kFooterSize);
-  int64_t crypto_metadata_start = source_size_ - kFooterSize - footer_len;
-  if (kFooterSize + footer_len > source_size_) {
-    throw ParquetInvalidOrCorruptedFileException(
-        "Parquet file size is ", source_size_,
-        " bytes, smaller than the size reported by footer's (", footer_len, "bytes)");
-  }
-  std::shared_ptr<Buffer> crypto_metadata_buffer;
   // Check if the footer_buffer contains the entire metadata
-  if (footer_read_size >= (footer_len + kFooterSize)) {
-    crypto_metadata_buffer = SliceBuffer(
-        footer_buffer, footer_read_size - footer_len - kFooterSize, footer_len);
-  } else {
-    PARQUET_ASSIGN_OR_THROW(crypto_metadata_buffer,
-                            source_->ReadAt(crypto_metadata_start, footer_len));
-    if (crypto_metadata_buffer->size() != footer_len) {
-      throw ParquetException("Failed reading encrypted metadata buffer (requested " +
-                             std::to_string(footer_len) + " bytes but got " +
-                             std::to_string(crypto_metadata_buffer->size()) + " bytes)");
-    }
+  if (crypto_metadata_buffer->size() != footer_len) {
+    throw ParquetException("Failed reading encrypted metadata buffer (requested " +
+                           std::to_string(footer_len) + " bytes but got " +
+                           std::to_string(crypto_metadata_buffer->size()) + " bytes)");
   }
   auto file_decryption_properties = properties_.file_decryption_properties().get();
   if (file_decryption_properties == nullptr) {
@@ -418,16 +573,7 @@ void SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter(
 
   int64_t metadata_offset = source_size_ - kFooterSize - footer_len + crypto_metadata_len;
   uint32_t metadata_len = footer_len - crypto_metadata_len;
-  PARQUET_ASSIGN_OR_THROW(auto metadata_buffer,
-                          source_->ReadAt(metadata_offset, metadata_len));
-  if (metadata_buffer->size() != metadata_len) {
-    throw ParquetException("Failed reading metadata buffer (requested " +
-                           std::to_string(metadata_len) + " bytes but got " +
-                           std::to_string(metadata_buffer->size()) + " bytes)");
-  }
-
-  file_metadata_ =
-      FileMetaData::Make(metadata_buffer->data(), &metadata_len, file_decryptor_);
+  return std::make_pair(metadata_offset, metadata_len);
 }
 
 void SerializedFile::ParseMetaDataOfEncryptedFileWithPlaintextFooter(
@@ -539,6 +685,33 @@ std::unique_ptr<ParquetFileReader::Contents> ParquetFileReader::Contents::Open(
   return result;
 }
 
+::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>>
+ParquetFileReader::Contents::OpenAsync(std::shared_ptr<ArrowInputFile> source,
+                                       const ReaderProperties& props,
+                                       std::shared_ptr<FileMetaData> metadata) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  std::unique_ptr<ParquetFileReader::Contents> result(
+      new SerializedFile(std::move(source), props));
+  SerializedFile* file = static_cast<SerializedFile*>(result.get());
+  if (metadata == nullptr) {
+    // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+    struct {
+      ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>> operator()() {
+        return std::move(result);
+      }
+
+      std::unique_ptr<ParquetFileReader::Contents> result;
+    } Continuation;
+    Continuation.result = std::move(result);
+    return file->ParseMetaDataAsync().Then(std::move(Continuation));
+  } else {
+    file->set_metadata(std::move(metadata));
+    return ::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>>::MakeFinished(
+        std::move(result));
+  }
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
 std::unique_ptr<ParquetFileReader> ParquetFileReader::Open(
     std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props,
     std::shared_ptr<FileMetaData> metadata) {
@@ -563,6 +736,28 @@ std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile(
   return Open(std::move(source), props, std::move(metadata));
 }
 
+::arrow::Future<std::unique_ptr<ParquetFileReader>> ParquetFileReader::OpenAsync(
+    std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props,
+    std::shared_ptr<FileMetaData> metadata) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  auto fut = SerializedFile::OpenAsync(std::move(source), props, std::move(metadata));
+  // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+  auto completed = ::arrow::Future<std::unique_ptr<ParquetFileReader>>::Make();
+  fut.AddCallback([fut, completed](
+                      const ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>>&
+                          contents) mutable {
+    if (!contents.ok()) {
+      completed.MarkFinished(contents.status());
+      return;
+    }
+    std::unique_ptr<ParquetFileReader> result(new ParquetFileReader());
+    result->Open(fut.MoveResult().MoveValueUnsafe());
+    completed.MarkFinished(std::move(result));
+  });
+  return completed;
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
 void ParquetFileReader::Open(std::unique_ptr<ParquetFileReader::Contents> contents) {
   contents_ = std::move(contents);
 }
@@ -587,14 +782,22 @@ std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
   return contents_->GetRowGroup(i);
 }
 
-::arrow::Future<> ParquetFileReader::PreBuffer(const std::vector<int>& row_groups,
-                                               const std::vector<int>& column_indices,
-                                               const ::arrow::io::IOContext& ctx,
-                                               const ::arrow::io::CacheOptions& options) {
+void ParquetFileReader::PreBuffer(const std::vector<int>& row_groups,
+                                  const std::vector<int>& column_indices,
+                                  const ::arrow::io::IOContext& ctx,
+                                  const ::arrow::io::CacheOptions& options) {
+  // Access private methods here
+  SerializedFile* file =
+      ::arrow::internal::checked_cast<SerializedFile*>(contents_.get());
+  file->PreBuffer(row_groups, column_indices, ctx, options);
+}
+
+::arrow::Future<> ParquetFileReader::WhenBuffered(
+    const std::vector<int>& row_groups, const std::vector<int>& column_indices) const {
   // Access private methods here
   SerializedFile* file =
       ::arrow::internal::checked_cast<SerializedFile*>(contents_.get());
-  return file->PreBuffer(row_groups, column_indices, ctx, options);
+  return file->WhenBuffered(row_groups, column_indices);
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h
index de8685c7b90..0fc84054939 100644
--- a/cpp/src/parquet/file_reader.h
+++ b/cpp/src/parquet/file_reader.h
@@ -56,6 +56,20 @@ class PARQUET_EXPORT RowGroupReader {
   // column. Ownership is shared with the RowGroupReader.
   std::shared_ptr<ColumnReader> Column(int i);
 
+  // Construct a ColumnReader, trying to enable exposed encoding.
+  //
+  // For dictionary encoding, currently we only support column chunks that are fully
+  // dictionary encoded, i.e., all data pages in the column chunk are dictionary encoded.
+  // If a column chunk uses dictionary encoding but then falls back to plain encoding, the
+  // encoding will not be exposed.
+  //
+  // The returned column reader provides an API GetExposedEncoding() for the
+  // users to check the exposed encoding and determine how to read the batches.
+  //
+  // \note API EXPERIMENTAL
+  std::shared_ptr<ColumnReader> ColumnWithExposeEncoding(
+      int i, ExposedEncoding encoding_to_expose);
+
   std::unique_ptr<PageReader> GetColumnPageReader(int i);
 
  private:
@@ -74,6 +88,11 @@ class PARQUET_EXPORT ParquetFileReader {
         const ReaderProperties& props = default_reader_properties(),
         std::shared_ptr<FileMetaData> metadata = NULLPTR);
 
+    static ::arrow::Future<std::unique_ptr<Contents>> OpenAsync(
+        std::shared_ptr<::arrow::io::RandomAccessFile> source,
+        const ReaderProperties& props = default_reader_properties(),
+        std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
     virtual ~Contents() = default;
     // Perform any cleanup associated with the file contents
     virtual void Close() = 0;
@@ -98,6 +117,13 @@ class PARQUET_EXPORT ParquetFileReader {
       const ReaderProperties& props = default_reader_properties(),
       std::shared_ptr<FileMetaData> metadata = NULLPTR);
 
+  // Asynchronously open a file reader from an Arrow file object.
+  // Does not throw - all errors are reported through the Future.
+  static ::arrow::Future<std::unique_ptr<ParquetFileReader>> OpenAsync(
+      std::shared_ptr<::arrow::io::RandomAccessFile> source,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
   void Open(std::unique_ptr<Contents> contents);
   void Close();
 
@@ -125,10 +151,21 @@ class PARQUET_EXPORT ParquetFileReader {
   /// buffered in memory until either \a PreBuffer() is called again,
   /// or the reader itself is destructed. Reading - and buffering -
   /// only one row group at a time may be useful.
-  ::arrow::Future<> PreBuffer(const std::vector<int>& row_groups,
-                              const std::vector<int>& column_indices,
-                              const ::arrow::io::IOContext& ctx,
-                              const ::arrow::io::CacheOptions& options);
+  ///
+  /// This method may throw.
+  void PreBuffer(const std::vector<int>& row_groups,
+                 const std::vector<int>& column_indices,
+                 const ::arrow::io::IOContext& ctx,
+                 const ::arrow::io::CacheOptions& options);
+
+  /// Wait for the specified row groups and column indices to be pre-buffered.
+  ///
+  /// After the returned Future completes, reading the specified row
+  /// groups/columns will not block.
+  ///
+  /// PreBuffer must be called first. This method does not throw.
+  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups,
+                                 const std::vector<int>& column_indices) const;
 
  private:
   // Holds a pointer to an instance of Contents implementation
diff --git a/cpp/src/parquet/file_serialize_test.cc b/cpp/src/parquet/file_serialize_test.cc
index 3574d379561..eb1133d8a9e 100644
--- a/cpp/src/parquet/file_serialize_test.cc
+++ b/cpp/src/parquet/file_serialize_test.cc
@@ -330,8 +330,7 @@ TYPED_TEST(TestSerialize, SmallFileGzip) {
 
 #ifdef ARROW_WITH_LZ4
 TYPED_TEST(TestSerialize, SmallFileLz4) {
-  ASSERT_NO_FATAL_FAILURE(
-      this->FileSerializeTest(Compression::LZ4, Compression::LZ4_HADOOP));
+  ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::LZ4));
 }
 
 TYPED_TEST(TestSerialize, SmallFileLz4Hadoop) {
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index 1865115e423..e8d20bcb9eb 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -84,7 +84,7 @@ class PARQUET_EXPORT ApplicationVersion {
   // Returns true if version is strictly less than other_version
   bool VersionLt(const ApplicationVersion& other_version) const;
 
-  // Returns true if version is strictly less than other_version
+  // Returns true if version is strictly equal with other_version
   bool VersionEq(const ApplicationVersion& other_version) const;
 
   // Checks if the Version has the correct statistics for a given column
diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift
index de875f7a559..8aa98481675 100644
--- a/cpp/src/parquet/parquet.thrift
+++ b/cpp/src/parquet/parquet.thrift
@@ -17,11 +17,11 @@
  * under the License.
  */
 
-cpp_include "parquet/windows_compatibility.h"
-
 /**
  * File format description for the parquet file format
  */
+
+cpp_include "parquet/windows_compatibility.h"
 namespace cpp parquet.format
 namespace java org.apache.parquet.format
 
@@ -473,19 +473,21 @@ enum Encoding {
 /**
  * Supported compression algorithms.
  *
- * Codecs added in 2.4 can be read by readers based on 2.4 and later.
+ * Codecs added in format version X.Y can be read by readers based on X.Y and later.
  * Codec support may vary between readers based on the format version and
- * libraries available at runtime. Gzip, Snappy, and LZ4 codecs are
- * widely available, while Zstd and Brotli require additional libraries.
+ * libraries available at runtime.
+ *
+ * See Compression.md for a detailed specification of these algorithms.
  */
 enum CompressionCodec {
   UNCOMPRESSED = 0;
   SNAPPY = 1;
   GZIP = 2;
   LZO = 3;
-  BROTLI = 4; // Added in 2.4
-  LZ4 = 5;    // Added in 2.4
-  ZSTD = 6;   // Added in 2.4
+  BROTLI = 4;  // Added in 2.4
+  LZ4 = 5;     // DEPRECATED (Added in 2.4)
+  ZSTD = 6;    // Added in 2.4
+  LZ4_RAW = 7; // Added in 2.9
 }
 
 enum PageType {
@@ -568,7 +570,7 @@ struct DataPageHeaderV2 {
   If missing it is considered compressed */
   7: optional bool is_compressed = 1;
 
-  /** optional statistics for this column chunk */
+  /** optional statistics for the data in this page **/
   8: optional Statistics statistics;
 }
 
@@ -581,11 +583,11 @@ union BloomFilterAlgorithm {
 }
 
 /** Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
- * algorithm. It uses 64 bits version of xxHash.
+ * algorithm. It uses 64 bits version of xxHash. 
  **/
 struct XxHash {}
 
-/**
+/** 
  * The hash function used in Bloom filter. This function takes the hash of a column value
  * using plain encoding.
  **/
@@ -648,6 +650,8 @@ struct PageHeader {
    *     uncompressed definition levels and the compressed column values.
    *     If no compression scheme is specified, the CRC shall be calculated on
    *     the uncompressed concatenation.
+   * - In encrypted columns, CRC is calculated after page encryption; the
+   *   encryption itself is performed after page compression (if compressed)
    * If enabled, this allows for disabling checksumming in HDFS if only a few
    * pages need to be read.
    **/
@@ -722,7 +726,7 @@ struct ColumnMetaData {
   /** total byte size of all uncompressed pages in this column chunk (including the headers) **/
   6: required i64 total_uncompressed_size
 
-  /** total byte size of all compressed, and potentially encrypted, pages
+  /** total byte size of all compressed, and potentially encrypted, pages 
    *  in this column chunk (including the headers) **/
   7: required i64 total_compressed_size
 
@@ -756,7 +760,7 @@ struct EncryptionWithFooterKey {
 struct EncryptionWithColumnKey {
   /** Column path in schema **/
   1: required list<string> path_in_schema
-
+  
   /** Retrieval metadata of column encryption key **/
   2: optional binary key_metadata
 }
@@ -795,7 +799,7 @@ struct ColumnChunk {
 
   /** Crypto metadata of encrypted columns **/
   8: optional ColumnCryptoMetaData crypto_metadata
-
+  
   /** Encrypted column metadata for this chunk **/
   9: optional binary encrypted_column_metadata
 }
@@ -821,10 +825,10 @@ struct RowGroup {
    * in this row group **/
   5: optional i64 file_offset
 
-  /** Total byte size of all compressed (and potentially encrypted) column data
+  /** Total byte size of all compressed (and potentially encrypted) column data 
    *  in this row group **/
   6: optional i64 total_compressed_size
-
+  
   /** Row group ordinal in the file **/
   7: optional i16 ordinal
 }
@@ -944,7 +948,7 @@ struct ColumnIndex {
   3: required list<binary> max_values
 
   /**
-   * Stores whether both min_values and max_values are ordered and if so, in
+   * Stores whether both min_values and max_values are orderd and if so, in
    * which direction. This allows readers to perform binary searches in both
    * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
    * if the lists are ordered.
@@ -961,7 +965,7 @@ struct AesGcmV1 {
 
   /** Unique file identifier part of AAD suffix **/
   2: optional binary aad_file_unique
-
+  
   /** In files encrypted with AAD prefix without storing it,
    * readers must supply the prefix **/
   3: optional bool supply_aad_prefix
@@ -973,7 +977,7 @@ struct AesGcmCtrV1 {
 
   /** Unique file identifier part of AAD suffix **/
   2: optional binary aad_file_unique
-
+  
   /** In files encrypted with AAD prefix without storing it,
    * readers must supply the prefix **/
   3: optional bool supply_aad_prefix
@@ -1029,30 +1033,31 @@ struct FileMetaData {
    */
   7: optional list<ColumnOrder> column_orders;
 
-  /**
+  /** 
    * Encryption algorithm. This field is set only in encrypted files
    * with plaintext footer. Files with encrypted footer store algorithm id
    * in FileCryptoMetaData structure.
    */
   8: optional EncryptionAlgorithm encryption_algorithm
 
-  /**
-   * Retrieval metadata of key used for signing the footer.
-   * Used only in encrypted files with plaintext footer.
-   */
+  /** 
+   * Retrieval metadata of key used for signing the footer. 
+   * Used only in encrypted files with plaintext footer. 
+   */ 
   9: optional binary footer_signing_key_metadata
 }
 
 /** Crypto metadata for files with encrypted footer **/
 struct FileCryptoMetaData {
-  /**
+  /** 
    * Encryption algorithm. This field is only used for files
    * with encrypted footer. Files with plaintext footer store algorithm id
    * inside footer (FileMetaData structure).
    */
   1: required EncryptionAlgorithm encryption_algorithm
-
-  /** Retrieval metadata of key used for encryption of footer,
+    
+  /** Retrieval metadata of key used for encryption of footer, 
    *  and (possibly) columns **/
   2: optional binary key_metadata
 }
+
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 13ddc78cf11..d217b8efa52 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -575,7 +575,8 @@ class PARQUET_EXPORT ArrowReaderProperties {
         read_dict_indices_(),
         batch_size_(kArrowDefaultBatchSize),
         pre_buffer_(false),
-        cache_options_(::arrow::io::CacheOptions::Defaults()) {}
+        cache_options_(::arrow::io::CacheOptions::Defaults()),
+        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO) {}
 
   void set_use_threads(bool use_threads) { use_threads_ = use_threads; }
 
@@ -613,13 +614,23 @@ class PARQUET_EXPORT ArrowReaderProperties {
   /// implementation for characteristics of different filesystems.
   void set_cache_options(::arrow::io::CacheOptions options) { cache_options_ = options; }
 
-  ::arrow::io::CacheOptions cache_options() const { return cache_options_; }
+  const ::arrow::io::CacheOptions& cache_options() const { return cache_options_; }
 
   /// Set execution context for read coalescing.
   void set_io_context(const ::arrow::io::IOContext& ctx) { io_context_ = ctx; }
 
   const ::arrow::io::IOContext& io_context() const { return io_context_; }
 
+  /// Set timestamp unit to use for deprecated INT96-encoded timestamps
+  /// (default is NANO).
+  void set_coerce_int96_timestamp_unit(::arrow::TimeUnit::type unit) {
+    coerce_int96_timestamp_unit_ = unit;
+  }
+
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit() const {
+    return coerce_int96_timestamp_unit_;
+  }
+
  private:
   bool use_threads_;
   std::unordered_set<int> read_dict_indices_;
@@ -627,6 +638,7 @@ class PARQUET_EXPORT ArrowReaderProperties {
   bool pre_buffer_;
   ::arrow::io::IOContext io_context_;
   ::arrow::io::CacheOptions cache_options_;
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit_;
 };
 
 /// EXPERIMENTAL: Constructs the default ArrowReaderProperties
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 7c2f9d7aa58..2d13266df22 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <fcntl.h>
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include <cstdint>
 #include <cstdlib>
@@ -26,9 +27,11 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/io/file.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/make_unique.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_scanner.h"
@@ -60,17 +63,28 @@ std::string nation_dict_truncated_data_page() {
   return data_file("nation.dict-malformed.parquet");
 }
 
-// Compressed using custom Hadoop LZ4 format (block LZ4 format + custom header)
+// LZ4-compressed data files.
+// These files come in three flavours:
+// - legacy "LZ4" compression type, actually compressed with block LZ4 codec
+//   (as emitted by some earlier versions of parquet-cpp)
+// - legacy "LZ4" compression type, actually compressed with custom Hadoop LZ4 codec
+//   (as emitted by parquet-mr)
+// - "LZ4_RAW" compression type (added in Parquet format version 2.9.0)
+
 std::string hadoop_lz4_compressed() { return data_file("hadoop_lz4_compressed.parquet"); }
 
-// Compressed using block LZ4 format
+std::string hadoop_lz4_compressed_larger() {
+  return data_file("hadoop_lz4_compressed_larger.parquet");
+}
+
 std::string non_hadoop_lz4_compressed() {
   return data_file("non_hadoop_lz4_compressed.parquet");
 }
 
-// Larger data compressed using custom Hadoop LZ4 format (several frames)
-std::string hadoop_lz4_compressed_larger() {
-  return data_file("hadoop_lz4_compressed_larger.parquet");
+std::string lz4_raw_compressed() { return data_file("lz4_raw_compressed.parquet"); }
+
+std::string lz4_raw_compressed_larger() {
+  return data_file("lz4_raw_compressed_larger.parquet");
 }
 
 // TODO: Assert on definition and repetition levels
@@ -463,6 +477,83 @@ TEST(TestJSONWithLocalFile, JSONOutput) {
   ASSERT_EQ(json_output, ss.str());
 }
 
+TEST(TestFileReader, BufferedReadsWithDictionary) {
+  const int num_rows = 1000;
+
+  // Make schema
+  schema::NodeVector fields;
+  fields.push_back(PrimitiveNode::Make("field", Repetition::REQUIRED, Type::DOUBLE,
+                                       ConvertedType::NONE));
+  auto schema = std::static_pointer_cast<GroupNode>(
+      GroupNode::Make("schema", Repetition::REQUIRED, fields));
+
+  // Write small batches and small data pages
+  std::shared_ptr<WriterProperties> writer_props = WriterProperties::Builder()
+                                                       .write_batch_size(64)
+                                                       ->data_pagesize(128)
+                                                       ->enable_dictionary()
+                                                       ->build();
+
+  ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create());
+  std::shared_ptr<ParquetFileWriter> file_writer =
+      ParquetFileWriter::Open(out_file, schema, writer_props);
+
+  RowGroupWriter* rg_writer = file_writer->AppendRowGroup();
+
+  // write one column
+  ::arrow::random::RandomArrayGenerator rag(0);
+  DoubleWriter* writer = static_cast<DoubleWriter*>(rg_writer->NextColumn());
+  std::shared_ptr<::arrow::Array> col = rag.Float64(num_rows, 0, 100);
+  const auto& col_typed = static_cast<const ::arrow::DoubleArray&>(*col);
+  writer->WriteBatch(num_rows, nullptr, nullptr, col_typed.raw_values());
+  rg_writer->Close();
+  file_writer->Close();
+
+  // Open the reader
+  ASSERT_OK_AND_ASSIGN(auto file_buf, out_file->Finish());
+  auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf);
+
+  ReaderProperties reader_props;
+  reader_props.enable_buffered_stream();
+  reader_props.set_buffer_size(64);
+  std::unique_ptr<ParquetFileReader> file_reader =
+      ParquetFileReader::Open(in_file, reader_props);
+
+  auto row_group = file_reader->RowGroup(0);
+  auto col_reader = std::static_pointer_cast<DoubleReader>(
+      row_group->ColumnWithExposeEncoding(0, ExposedEncoding::DICTIONARY));
+  EXPECT_EQ(col_reader->GetExposedEncoding(), ExposedEncoding::DICTIONARY);
+
+  auto indices = ::arrow::internal::make_unique<int32_t[]>(num_rows);
+  const double* dict = nullptr;
+  int32_t dict_len = 0;
+  for (int row_index = 0; row_index < num_rows; ++row_index) {
+    const double* tmp_dict = nullptr;
+    int32_t tmp_dict_len = 0;
+    int64_t values_read = 0;
+    int64_t levels_read = col_reader->ReadBatchWithDictionary(
+        /*batch_size=*/1, /*def_levels=*/nullptr, /*rep_levels=*/nullptr,
+        indices.get() + row_index, &values_read, &tmp_dict, &tmp_dict_len);
+
+    if (tmp_dict != nullptr) {
+      EXPECT_EQ(values_read, 1);
+      dict = tmp_dict;
+      dict_len = tmp_dict_len;
+    } else {
+      EXPECT_EQ(values_read, 0);
+    }
+
+    ASSERT_EQ(1, levels_read);
+    ASSERT_EQ(1, values_read);
+  }
+
+  // Check the results
+  for (int row_index = 0; row_index < num_rows; ++row_index) {
+    EXPECT_LT(indices[row_index], dict_len);
+    EXPECT_EQ(dict[indices[row_index]], col_typed.Value(row_index));
+  }
+}
+
 TEST(TestFileReader, BufferedReads) {
   // PARQUET-1636: Buffered reads were broken before introduction of
   // RandomAccessFile::GetStream
@@ -548,13 +639,95 @@ TEST(TestFileReader, BufferedReads) {
   }
 }
 
-class TestCodec : public ::testing::TestWithParam<std::string> {
+std::unique_ptr<ParquetFileReader> OpenBuffer(const std::string& contents) {
+  auto buffer = ::arrow::Buffer::FromString(contents);
+  return ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+}
+
+::arrow::Future<> OpenBufferAsync(const std::string& contents) {
+  auto buffer = ::arrow::Buffer::FromString(contents);
+  return ::arrow::Future<>(
+      ParquetFileReader::OpenAsync(std::make_shared<::arrow::io::BufferReader>(buffer)));
+}
+
+// https://github.com/google/googletest/pull/2904 not available in our version of
+// gtest/gmock
+#define EXPECT_THROW_THAT(callable, ex_type, property)   \
+  EXPECT_THROW(                                          \
+      try { (callable)(); } catch (const ex_type& err) { \
+        EXPECT_THAT(err, (property));                    \
+        throw;                                           \
+      },                                                 \
+      ex_type)
+
+TEST(TestFileReader, TestOpenErrors) {
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer(""); }, ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(&ParquetInvalidOrCorruptedFileException::what,
+                          ::testing::HasSubstr("Parquet file size is 0 bytes")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer("AAAAPAR0"); }, ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(&ParquetInvalidOrCorruptedFileException::what,
+                          ::testing::HasSubstr("Parquet magic bytes not found")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer("APAR1"); }, ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(
+          &ParquetInvalidOrCorruptedFileException::what,
+          ::testing::HasSubstr(
+              "Parquet file size is 5 bytes, smaller than the minimum file footer")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer("\xFF\xFF\xFF\x0FPAR1"); },
+      ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(&ParquetInvalidOrCorruptedFileException::what,
+                          ::testing::HasSubstr("Parquet file size is 8 bytes, smaller "
+                                               "than the size reported by footer's")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer(std::string("\x00\x00\x00\x00PAR1", 8)); }, ParquetException,
+      ::testing::Property(
+          &ParquetException::what,
+          ::testing::HasSubstr("Couldn't deserialize thrift: No more data to read")));
+
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Parquet file size is 0 bytes"), OpenBufferAsync(""));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Parquet magic bytes not found"),
+      OpenBufferAsync("AAAAPAR0"));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr(
+          "Parquet file size is 5 bytes, smaller than the minimum file footer"),
+      OpenBufferAsync("APAR1"));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr(
+          "Parquet file size is 8 bytes, smaller than the size reported by footer's"),
+      OpenBufferAsync("\xFF\xFF\xFF\x0FPAR1"));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("Couldn't deserialize thrift: No more data to read"),
+      OpenBufferAsync(std::string("\x00\x00\x00\x00PAR1", 8)));
+}
+
+#undef EXPECT_THROW_THAT
+
+#ifdef ARROW_WITH_LZ4
+struct TestCodecParam {
+  std::string name;
+  std::string small_data_file;
+  std::string larger_data_file;
+};
+
+void PrintTo(const TestCodecParam& p, std::ostream* os) { *os << p.name; }
+
+class TestCodec : public ::testing::TestWithParam<TestCodecParam> {
  protected:
-  const std::string& GetDataFile() { return GetParam(); }
+  const std::string& GetSmallDataFile() { return GetParam().small_data_file; }
+
+  const std::string& GetLargerDataFile() { return GetParam().larger_data_file; }
 };
 
-TEST_P(TestCodec, FileMetadataAndValues) {
-  std::unique_ptr<ParquetFileReader> reader_ = ParquetFileReader::OpenFile(GetDataFile());
+TEST_P(TestCodec, SmallFileMetadataAndValues) {
+  std::unique_ptr<ParquetFileReader> reader_ =
+      ParquetFileReader::OpenFile(GetSmallDataFile());
   std::shared_ptr<RowGroupReader> group = reader_->RowGroup(0);
   const auto rg_metadata = group->metadata();
 
@@ -593,14 +766,14 @@ TEST_P(TestCodec, FileMetadataAndValues) {
   AssertColumnValues(col2, 4, 4, expected_double_values, 4);
 }
 
-#ifdef ARROW_WITH_LZ4
-INSTANTIATE_TEST_SUITE_P(Lz4CodecTests, TestCodec,
-                         ::testing::Values(hadoop_lz4_compressed(),
-                                           non_hadoop_lz4_compressed()));
-
-TEST(TestLz4HadoopCodec, TestSeveralFrames) {
-  // ARROW-9177: Hadoop can compress a data block in several LZ4 "frames"
-  auto file = ParquetFileReader::OpenFile(hadoop_lz4_compressed_larger());
+TEST_P(TestCodec, LargeFileValues) {
+  // Test codec with a larger data file such data may have been compressed
+  // in several "frames" (ARROW-9177)
+  auto file_path = GetParam().larger_data_file;
+  if (file_path.empty()) {
+    GTEST_SKIP() << "Larger data file not available for this codec";
+  }
+  auto file = ParquetFileReader::OpenFile(file_path);
   auto group = file->RowGroup(0);
 
   const int64_t kNumRows = 10000;
@@ -624,6 +797,14 @@ TEST(TestLz4HadoopCodec, TestSeveralFrames) {
   ASSERT_EQ(values[kNumRows - 2], ByteArray("ab52a0cc-c6bb-4d61-8a8f-166dc4b8b13c"));
   ASSERT_EQ(values[kNumRows - 1], ByteArray("85440778-460a-41ac-aa2e-ac3ee41696bf"));
 }
-#endif
+
+std::vector<TestCodecParam> test_codec_params{
+    {"LegacyLZ4Hadoop", hadoop_lz4_compressed(), hadoop_lz4_compressed_larger()},
+    {"LegacyLZ4NonHadoop", non_hadoop_lz4_compressed(), ""},
+    {"LZ4Raw", lz4_raw_compressed(), lz4_raw_compressed_larger()}};
+
+INSTANTIATE_TEST_SUITE_P(Lz4CodecTests, TestCodec, ::testing::ValuesIn(test_codec_params),
+                         testing::PrintToStringParamName());
+#endif  // ARROW_WITH_LZ4
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index bfb295f0be3..cfa6bdb2912 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -406,10 +406,11 @@ void GroupNode::VisitConst(Node::ConstVisitor* visitor) const { visitor->Visit(t
 // Node construction from Parquet metadata
 
 std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element,
-                                             NodeVector fields, int field_id) {
+                                             NodeVector fields) {
   const format::SchemaElement* element =
       static_cast<const format::SchemaElement*>(opaque_element);
 
+  int field_id = -1;
   if (element->__isset.field_id) {
     field_id = element->field_id;
   }
@@ -431,11 +432,11 @@ std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element,
   return std::unique_ptr<Node>(group_node.release());
 }
 
-std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element,
-                                                 int field_id) {
+std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element) {
   const format::SchemaElement* element =
       static_cast<const format::SchemaElement*>(opaque_element);
 
+  int field_id = -1;
   if (element->__isset.field_id) {
     field_id = element->field_id;
   }
@@ -538,7 +539,7 @@ std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int lengt
   if (elements[0].num_children == 0) {
     if (length == 1) {
       // Degenerate case of Parquet file with no columns
-      return GroupNode::FromParquet(elements, {}, /*field_id=*/0);
+      return GroupNode::FromParquet(elements, {});
     } else {
       throw ParquetException(
           "Parquet schema had multiple nodes but root had no children");
@@ -549,19 +550,17 @@ std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int lengt
   // consistently set by implementations
 
   int pos = 0;
-  int current_id = 0;
 
   std::function<std::unique_ptr<Node>()> NextNode = [&]() {
     if (pos == length) {
       throw ParquetException("Malformed schema: not enough elements");
     }
     const SchemaElement& element = elements[pos++];
-    int field_id = current_id++;
     const void* opaque_element = static_cast<const void*>(&element);
 
     if (element.num_children == 0 && element.__isset.type) {
       // Leaf (primitive) node: always has a type
-      return PrimitiveNode::FromParquet(opaque_element, field_id);
+      return PrimitiveNode::FromParquet(opaque_element);
     } else {
       // Group node (may have 0 children, but cannot have a type)
       NodeVector fields;
@@ -569,7 +568,7 @@ std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int lengt
         std::unique_ptr<Node> field = NextNode();
         fields.push_back(NodePtr(field.release()));
       }
-      return GroupNode::FromParquet(opaque_element, std::move(fields), field_id);
+      return GroupNode::FromParquet(opaque_element, std::move(fields));
     }
   };
   return NextNode();
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index bf119d6624e..83d0cf24f1e 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -127,9 +127,6 @@ class PARQUET_EXPORT Node {
   /// Thrift.
   int field_id() const { return field_id_; }
 
-  PARQUET_DEPRECATED("id() is deprecated. Use field_id() instead")
-  int id() const { return field_id_; }
-
   const Node* parent() const { return parent_; }
 
   const std::shared_ptr<ColumnPath> path() const;
@@ -200,8 +197,7 @@ typedef std::vector<NodePtr> NodeVector;
 // parameters)
 class PARQUET_EXPORT PrimitiveNode : public Node {
  public:
-  // The field_id here is the default to use if it is not set in the SchemaElement
-  static std::unique_ptr<Node> FromParquet(const void* opaque_element, int field_id = -1);
+  static std::unique_ptr<Node> FromParquet(const void* opaque_element);
 
   // A field_id -1 (or any negative value) will be serialized as null in Thrift
   static inline NodePtr Make(const std::string& name, Repetition::type repetition,
@@ -266,9 +262,8 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
 
 class PARQUET_EXPORT GroupNode : public Node {
  public:
-  // The field_id here is the default to use if it is not set in the SchemaElement
   static std::unique_ptr<Node> FromParquet(const void* opaque_element,
-                                           NodeVector fields = {}, int field_id = -1);
+                                           NodeVector fields = {});
 
   // A field_id -1 (or any negative value) will be serialized as null in Thrift
   static inline NodePtr Make(const std::string& name, Repetition::type repetition,
diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc
index 43760d34ab4..703bac81086 100644
--- a/cpp/src/parquet/schema_test.cc
+++ b/cpp/src/parquet/schema_test.cc
@@ -121,7 +121,7 @@ class TestPrimitiveNode : public ::testing::Test {
   }
 
   void Convert(const format::SchemaElement* element) {
-    node_ = PrimitiveNode::FromParquet(element, field_id_);
+    node_ = PrimitiveNode::FromParquet(element);
     ASSERT_TRUE(node_->is_primitive());
     prim_node_ = static_cast<const PrimitiveNode*>(node_.get());
   }
@@ -1728,7 +1728,7 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   node->ToParquet(&string_intermediary);
   // ... corrupt the Thrift intermediary ....
   string_intermediary.logicalType.__isset.STRING = false;
-  ASSERT_ANY_THROW(node = PrimitiveNode::FromParquet(&string_intermediary, 1));
+  ASSERT_ANY_THROW(node = PrimitiveNode::FromParquet(&string_intermediary));
 
   // Invalid TimeUnit in deserialized TimeLogicalType ...
   node = PrimitiveNode::Make("time", Repetition::REQUIRED,
@@ -1738,7 +1738,7 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   node->ToParquet(&time_intermediary);
   // ... corrupt the Thrift intermediary ....
   time_intermediary.logicalType.TIME.unit.__isset.NANOS = false;
-  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&time_intermediary, 1));
+  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&time_intermediary));
 
   // Invalid TimeUnit in deserialized TimestampLogicalType ...
   node = PrimitiveNode::Make(
@@ -1748,7 +1748,7 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   node->ToParquet(&timestamp_intermediary);
   // ... corrupt the Thrift intermediary ....
   timestamp_intermediary.logicalType.TIMESTAMP.unit.__isset.NANOS = false;
-  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&timestamp_intermediary, 1));
+  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&timestamp_intermediary));
 }
 
 struct SchemaElementConstructionArguments {
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index bc474e99abf..72341590e75 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -41,6 +41,7 @@
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
 using arrow::internal::checked_cast;
+using arrow::util::SafeCopy;
 
 namespace parquet {
 namespace {
@@ -55,6 +56,9 @@ template <typename DType, bool is_signed>
 struct CompareHelper {
   using T = typename DType::c_type;
 
+  static_assert(!std::is_unsigned<T>::value || std::is_same<T, bool>::value,
+                "T is an unsigned numeric");
+
   constexpr static T DefaultMin() { return std::numeric_limits<T>::max(); }
   constexpr static T DefaultMax() { return std::numeric_limits<T>::lowest(); }
 
@@ -83,12 +87,24 @@ struct UnsignedCompareHelperBase {
   using T = typename DType::c_type;
   using UCType = typename std::make_unsigned<T>::type;
 
-  constexpr static T DefaultMin() { return std::numeric_limits<UCType>::max(); }
-  constexpr static T DefaultMax() { return std::numeric_limits<UCType>::lowest(); }
+  static_assert(!std::is_same<T, UCType>::value, "T is unsigned");
+  static_assert(sizeof(T) == sizeof(UCType), "T and UCType not the same size");
+
+  // NOTE: according to the C++ spec, unsigned-to-signed conversion is
+  // implementation-defined if the original value does not fit in the signed type
+  // (i.e., two's complement cannot be assumed even on mainstream machines,
+  // because the compiler may decide otherwise).  Hence the use of `SafeCopy`
+  // below for deterministic bit-casting.
+  // (see "Integer conversions" in
+  //  https://en.cppreference.com/w/cpp/language/implicit_conversion)
+
+  static const T DefaultMin() { return SafeCopy<T>(std::numeric_limits<UCType>::max()); }
+  static const T DefaultMax() { return 0; }
+
   static T Coalesce(T val, T fallback) { return val; }
 
-  static inline bool Compare(int type_length, T a, T b) {
-    return ::arrow::util::SafeCopy<UCType>(a) < ::arrow::util::SafeCopy<UCType>(b);
+  static bool Compare(int type_length, T a, T b) {
+    return SafeCopy<UCType>(a) < SafeCopy<UCType>(b);
   }
 
   static T Min(int type_length, T a, T b) { return Compare(type_length, a, b) ? a : b; }
@@ -107,12 +123,12 @@ struct CompareHelper<Int96Type, is_signed> {
   using msb_type = typename std::conditional<is_signed, int32_t, uint32_t>::type;
 
   static T DefaultMin() {
-    uint32_t kMsbMax = std::numeric_limits<msb_type>::max();
+    uint32_t kMsbMax = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::max());
     uint32_t kMax = std::numeric_limits<uint32_t>::max();
     return {kMax, kMax, kMsbMax};
   }
   static T DefaultMax() {
-    uint32_t kMsbMin = std::numeric_limits<msb_type>::min();
+    uint32_t kMsbMin = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::min());
     uint32_t kMin = std::numeric_limits<uint32_t>::min();
     return {kMin, kMin, kMsbMin};
   }
@@ -122,8 +138,7 @@ struct CompareHelper<Int96Type, is_signed> {
     if (a.value[2] != b.value[2]) {
       // Only the MSB bit is by Signed comparison. For little-endian, this is the
       // last bit of Int96 type.
-      return ::arrow::util::SafeCopy<msb_type>(a.value[2]) <
-             ::arrow::util::SafeCopy<msb_type>(b.value[2]);
+      return SafeCopy<msb_type>(a.value[2]) < SafeCopy<msb_type>(b.value[2]);
     } else if (a.value[1] != b.value[1]) {
       return (a.value[1] < b.value[1]);
     }
@@ -374,6 +389,28 @@ class TypedComparatorImpl : virtual public TypedComparator<DType> {
   int type_length_;
 };
 
+// ARROW-11675: A hand-written version of GetMinMax(), to work around
+// what looks like a MSVC code generation bug.
+// This does not seem to be required for GetMinMaxSpaced().
+template <>
+std::pair<int32_t, int32_t>
+TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* values,
+                                                               int64_t length) {
+  DCHECK_GT(length, 0);
+
+  const uint32_t* unsigned_values = reinterpret_cast<const uint32_t*>(values);
+  uint32_t min = std::numeric_limits<uint32_t>::max();
+  uint32_t max = std::numeric_limits<uint32_t>::lowest();
+
+  for (int64_t i = 0; i < length; i++) {
+    const auto val = unsigned_values[i];
+    min = std::min<uint32_t>(min, val);
+    max = std::max<uint32_t>(max, val);
+  }
+
+  return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)};
+}
+
 template <bool is_signed, typename DType>
 std::pair<typename DType::c_type, typename DType::c_type>
 TypedComparatorImpl<is_signed, DType>::GetMinMax(const ::arrow::Array& values) {
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 8a275fd0936..dbd7d98b238 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
@@ -44,6 +45,7 @@
 
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
+using arrow::util::SafeCopy;
 
 namespace BitUtil = arrow::BitUtil;
 
@@ -702,10 +704,11 @@ class TestStatisticsSortOrder : public ::testing::Test {
     std::shared_ptr<parquet::FileMetaData> file_metadata = parquet_reader->metadata();
     std::shared_ptr<parquet::RowGroupMetaData> rg_metadata = file_metadata->RowGroup(0);
     for (int i = 0; i < static_cast<int>(fields_.size()); i++) {
+      ARROW_SCOPED_TRACE("Statistics for field #", i);
       std::shared_ptr<parquet::ColumnChunkMetaData> cc_metadata =
           rg_metadata->ColumnChunk(i);
-      ASSERT_EQ(stats_[i].min(), cc_metadata->statistics()->EncodeMin());
-      ASSERT_EQ(stats_[i].max(), cc_metadata->statistics()->EncodeMax());
+      EXPECT_EQ(stats_[i].min(), cc_metadata->statistics()->EncodeMin());
+      EXPECT_EQ(stats_[i].max(), cc_metadata->statistics()->EncodeMax());
     }
   }
 
@@ -934,11 +937,11 @@ template <typename Stats, typename Array, typename T = typename Array::value_typ
 void AssertMinMaxAre(Stats stats, const Array& values, T expected_min, T expected_max) {
   stats->Update(values.data(), values.size(), 0);
   ASSERT_TRUE(stats->HasMinMax());
-  ASSERT_EQ(stats->min(), expected_min);
-  ASSERT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->min(), expected_min);
+  EXPECT_EQ(stats->max(), expected_max);
 }
 
-template <typename Stats, typename Array, typename T = typename Array::value_type>
+template <typename Stats, typename Array, typename T = typename Stats::T>
 void AssertMinMaxAre(Stats stats, const Array& values, const uint8_t* valid_bitmap,
                      T expected_min, T expected_max) {
   auto n_values = values.size();
@@ -946,8 +949,8 @@ void AssertMinMaxAre(Stats stats, const Array& values, const uint8_t* valid_bitm
   auto non_null_count = n_values - null_count;
   stats->UpdateSpaced(values.data(), valid_bitmap, 0, non_null_count, null_count);
   ASSERT_TRUE(stats->HasMinMax());
-  ASSERT_EQ(stats->min(), expected_min);
-  ASSERT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->min(), expected_min);
+  EXPECT_EQ(stats->max(), expected_max);
 }
 
 template <typename Stats, typename Array>
@@ -966,17 +969,17 @@ void AssertUnsetMinMax(Stats stats, const Array& values, const uint8_t* valid_bi
 }
 
 template <typename ParquetType, typename T = typename ParquetType::c_type>
-void CheckExtremums() {
+void CheckExtrema() {
   using UT = typename std::make_unsigned<T>::type;
 
-  T smin = std::numeric_limits<T>::min();
-  T smax = std::numeric_limits<T>::max();
-  T umin = std::numeric_limits<UT>::min();
-  T umax = std::numeric_limits<UT>::max();
+  const T smin = std::numeric_limits<T>::min();
+  const T smax = std::numeric_limits<T>::max();
+  const T umin = SafeCopy<T>(std::numeric_limits<UT>::min());
+  const T umax = SafeCopy<T>(std::numeric_limits<UT>::max());
 
   constexpr int kNumValues = 8;
   std::array<T, kNumValues> values{0,    smin,     smax,     umin,
-                                   umax, smin + 1, smax - 1, umin - 1};
+                                   umax, smin + 1, smax - 1, umax - 1};
 
   NodePtr unsigned_node = PrimitiveNode::Make(
       "uint", Repetition::OPTIONAL,
@@ -987,15 +990,47 @@ void CheckExtremums() {
       LogicalType::Int(sizeof(T) * CHAR_BIT, true /*signed*/), ParquetType::type_num);
   ColumnDescriptor signed_descr(signed_node, 1, 1);
 
-  auto unsigned_stats = MakeStatistics<ParquetType>(&unsigned_descr);
-  AssertMinMaxAre(unsigned_stats, values, umin, umax);
+  {
+    ARROW_SCOPED_TRACE("unsigned statistics: umin = ", umin, ", umax = ", umax,
+                       ", node type = ", unsigned_node->logical_type()->ToString(),
+                       ", physical type = ", unsigned_descr.physical_type(),
+                       ", sort order = ", unsigned_descr.sort_order());
+    auto unsigned_stats = MakeStatistics<ParquetType>(&unsigned_descr);
+    AssertMinMaxAre(unsigned_stats, values, umin, umax);
+  }
+  {
+    ARROW_SCOPED_TRACE("signed statistics: smin = ", smin, ", smax = ", smax,
+                       ", node type = ", signed_node->logical_type()->ToString(),
+                       ", physical type = ", signed_descr.physical_type(),
+                       ", sort order = ", signed_descr.sort_order());
+    auto signed_stats = MakeStatistics<ParquetType>(&signed_descr);
+    AssertMinMaxAre(signed_stats, values, smin, smax);
+  }
 
-  auto signed_stats = MakeStatistics<ParquetType>(&signed_descr);
-  AssertMinMaxAre(signed_stats, values, smin, smax);
+  // With validity bitmap
+  std::vector<bool> is_valid = {true, false, false, false, false, true, true, true};
+  std::shared_ptr<Buffer> valid_bitmap;
+  ::arrow::BitmapFromVector(is_valid, &valid_bitmap);
+  {
+    ARROW_SCOPED_TRACE("spaced unsigned statistics: umin = ", umin, ", umax = ", umax,
+                       ", node type = ", unsigned_node->logical_type()->ToString(),
+                       ", physical type = ", unsigned_descr.physical_type(),
+                       ", sort order = ", unsigned_descr.sort_order());
+    auto unsigned_stats = MakeStatistics<ParquetType>(&unsigned_descr);
+    AssertMinMaxAre(unsigned_stats, values, valid_bitmap->data(), T{0}, umax - 1);
+  }
+  {
+    ARROW_SCOPED_TRACE("spaced signed statistics: smin = ", smin, ", smax = ", smax,
+                       ", node type = ", signed_node->logical_type()->ToString(),
+                       ", physical type = ", signed_descr.physical_type(),
+                       ", sort order = ", signed_descr.sort_order());
+    auto signed_stats = MakeStatistics<ParquetType>(&signed_descr);
+    AssertMinMaxAre(signed_stats, values, valid_bitmap->data(), smin + 1, smax - 1);
+  }
 }
 
-TEST(TestStatistic, Int32Extremums) { CheckExtremums<Int32Type>(); }
-TEST(TestStatistic, Int64Extremums) { CheckExtremums<Int64Type>(); }
+TEST(TestStatistic, Int32Extrema) { CheckExtrema<Int32Type>(); }
+TEST(TestStatistic, Int64Extrema) { CheckExtrema<Int64Type>(); }
 
 // PARQUET-1225: Float NaN values may lead to incorrect min-max
 template <typename ParquetType>
diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h
index b41230827c8..d4e6de8251c 100644
--- a/cpp/src/parquet/test_util.h
+++ b/cpp/src/parquet/test_util.h
@@ -179,6 +179,12 @@ static void InitDictValues(int num_values, int num_dicts, std::vector<T>& values
   }
 }
 
+template <>
+inline void InitDictValues<bool>(int num_values, int num_dicts, std::vector<bool>& values,
+                                 std::vector<uint8_t>& buffer) {
+  // No op for bool
+}
+
 class MockPageReader : public PageReader {
  public:
   explicit MockPageReader(const std::vector<std::shared_ptr<Page>>& pages)
@@ -563,7 +569,7 @@ template <>
 void inline InitValues<bool>(int num_values, std::vector<bool>& values,
                              std::vector<uint8_t>& buffer) {
   values = {};
-  ::arrow::random_is_valid(num_values, 1., &values,
+  ::arrow::random_is_valid(num_values, 0.5, &values,
                            static_cast<int>(::arrow::random_seed()));
 }
 
diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h
index c9e02696f5d..ea7df209621 100644
--- a/cpp/src/parquet/thrift_internal.h
+++ b/cpp/src/parquet/thrift_internal.h
@@ -102,6 +102,8 @@ static inline Compression::type FromThriftUnsafe(format::CompressionCodec::type
       return Compression::BROTLI;
     case format::CompressionCodec::LZ4:
       return Compression::LZ4_HADOOP;
+    case format::CompressionCodec::LZ4_RAW:
+      return Compression::LZ4;
     case format::CompressionCodec::ZSTD:
       return Compression::ZSTD;
     default:
@@ -212,7 +214,8 @@ inline typename Compression::type LoadEnumSafe(const format::CompressionCodec::t
   // as format::CompressionCodec.
   const auto min_value =
       static_cast<decltype(raw_value)>(format::CompressionCodec::UNCOMPRESSED);
-  const auto max_value = static_cast<decltype(raw_value)>(format::CompressionCodec::ZSTD);
+  const auto max_value =
+      static_cast<decltype(raw_value)>(format::CompressionCodec::LZ4_RAW);
   if (raw_value < min_value || raw_value > max_value) {
     return Compression::UNCOMPRESSED;
   }
@@ -282,9 +285,10 @@ static inline format::CompressionCodec::type ToThrift(Compression::type type) {
       return format::CompressionCodec::LZO;
     case Compression::BROTLI:
       return format::CompressionCodec::BROTLI;
-    // For compatibility with existing source code
     case Compression::LZ4:
+      return format::CompressionCodec::LZ4_RAW;
     case Compression::LZ4_HADOOP:
+      // Deprecated "LZ4" Parquet compression has Hadoop-specific framing
       return format::CompressionCodec::LZ4;
     case Compression::ZSTD:
       return format::CompressionCodec::ZSTD;
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 4e5bcee4ce8..ef23c40662b 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -69,11 +69,6 @@ std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level)
     throw ParquetException(ss.str());
   }
 
-  if (codec == Compression::LZ4) {
-    // For compatibility with existing source code
-    codec = Compression::LZ4_HADOOP;
-  }
-
   PARQUET_ASSIGN_OR_THROW(result, Codec::Create(codec, compression_level));
   return result;
 }
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index f3d3abfc918..c25719830ec 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -29,6 +29,15 @@
 #include "parquet/platform.h"
 #include "parquet/type_fwd.h"
 
+#ifdef _WIN32
+
+// Repetition::OPTIONAL conflicts with a #define, so we undefine it
+#ifdef OPTIONAL
+#undef OPTIONAL
+#endif
+
+#endif  // _WIN32
+
 namespace arrow {
 namespace util {
 
@@ -470,6 +479,15 @@ struct Encoding {
   };
 };
 
+// Exposed data encodings. It is the encoding of the data read from the file,
+// rather than the encoding of the data in the file. E.g., the data encoded as
+// RLE_DICTIONARY in the file can be read as dictionary indices by RLE
+// decoding, in which case the data read from the file is DICTIONARY encoded.
+enum class ExposedEncoding {
+  NO_ENCODING = 0,  // data is not encoded, i.e. already decoded during reading
+  DICTIONARY = 1
+};
+
 /// \brief Return true if Parquet supports indicated compression type
 PARQUET_EXPORT
 bool IsCodecSupported(Compression::type codec);
@@ -582,15 +600,46 @@ static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds)
   std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds));
 }
 
-static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
+struct DecodedInt96 {
+  uint64_t days_since_epoch;
+  uint64_t nanoseconds;
+};
+
+static inline DecodedInt96 DecodeInt96Timestamp(const parquet::Int96& i96) {
   // We do the computations in the unsigned domain to avoid unsigned behaviour
   // on overflow.
-  uint64_t days_since_epoch =
-      i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
-  uint64_t nanoseconds = 0;
+  DecodedInt96 result;
+  result.days_since_epoch = i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
+  result.nanoseconds = 0;
+
+  memcpy(&result.nanoseconds, &i96.value, sizeof(uint64_t));
+  return result;
+}
+
+static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  return static_cast<int64_t>(decoded.days_since_epoch * kNanosecondsPerDay +
+                              decoded.nanoseconds);
+}
+
+static inline int64_t Int96GetMicroSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t microseconds = decoded.nanoseconds / static_cast<uint64_t>(1000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMicrosecondsPerDay +
+                              microseconds);
+}
+
+static inline int64_t Int96GetMilliSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t milliseconds = decoded.nanoseconds / static_cast<uint64_t>(1000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMillisecondsPerDay +
+                              milliseconds);
+}
 
-  memcpy(&nanoseconds, &i96.value, sizeof(uint64_t));
-  return static_cast<int64_t>(days_since_epoch * kNanosecondsPerDay + nanoseconds);
+static inline int64_t Int96GetSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t seconds = decoded.nanoseconds / static_cast<uint64_t>(1000000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kSecondsPerDay + seconds);
 }
 
 static inline std::string Int96ToString(const Int96& a) {
diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt
index 8c8523a37d5..46603d6f85c 100644
--- a/cpp/src/plasma/CMakeLists.txt
+++ b/cpp/src/plasma/CMakeLists.txt
@@ -61,10 +61,7 @@ if(ARROW_CUDA)
   add_definitions(-DPLASMA_CUDA)
 endif()
 
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(PLASMA_SHARED_LINK_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
 endif()
@@ -98,14 +95,17 @@ set_source_files_properties(dlmalloc.cc PROPERTIES COMPILE_FLAGS "-O3")
 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
   set_property(SOURCE dlmalloc.cc
                APPEND_STRING
-               PROPERTY COMPILE_FLAGS " -Wno-parentheses-equality \
+               PROPERTY COMPILE_FLAGS
+                        " -Wno-parentheses-equality \
 -Wno-null-pointer-arithmetic \
 -Wno-shorten-64-to-32 \
 -Wno-unused-macros")
 endif()
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  set_property(SOURCE dlmalloc.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-conversion")
+  set_property(SOURCE dlmalloc.cc
+               APPEND_STRING
+               PROPERTY COMPILE_FLAGS " -Wno-conversion")
 endif()
 
 list(APPEND PLASMA_EXTERNAL_STORE_SOURCES "external_store.cc" "hash_table_store.cc")
@@ -128,8 +128,8 @@ if(ARROW_RPATH_ORIGIN)
   else()
     set(_lib_install_rpath "\$ORIGIN")
   endif()
-  set_target_properties(plasma-store-server
-                        PROPERTIES INSTALL_RPATH ${_lib_install_rpath})
+  set_target_properties(plasma-store-server PROPERTIES INSTALL_RPATH
+                                                       ${_lib_install_rpath})
 elseif(APPLE)
   # With OSX and conda, we need to set the correct RPATH so that dependencies
   # are found. The installed libraries with conda have an RPATH that matches
@@ -138,12 +138,9 @@ elseif(APPLE)
   # installed there.
   if(NOT "$ENV{CONDA_PREFIX}" STREQUAL "" AND APPLE)
     set_target_properties(plasma-store-server
-                          PROPERTIES BUILD_WITH_INSTALL_RPATH
-                                     TRUE
-                                     INSTALL_RPATH_USE_LINK_PATH
-                                     TRUE
-                                     INSTALL_RPATH
-                                     "$ENV{CONDA_PREFIX}/lib")
+                          PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
+                                     INSTALL_RPATH_USE_LINK_PATH TRUE
+                                     INSTALL_RPATH "$ENV{CONDA_PREFIX}/lib")
   endif()
 endif()
 
@@ -156,8 +153,8 @@ install(FILES common.h
 
 # Plasma store
 set_target_properties(plasma-store-server PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
-install(TARGETS plasma-store-server ${INSTALL_IS_OPTIONAL} DESTINATION
-                ${CMAKE_INSTALL_BINDIR})
+install(TARGETS plasma-store-server ${INSTALL_IS_OPTIONAL}
+        DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 if(ARROW_PLASMA_JAVA_CLIENT)
   # Plasma java client support
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 8e7badc6a38..ddd89895880 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 8e7badc6a3817a02e06d17b5d8ab6b6dc356e890
+Subproject commit ddd898958803cb89b7156c6350584d1cda0fe8de
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 01b658d6d47..593611fcd4c 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -27,7 +27,7 @@
 ARROW_ABSL_BUILD_VERSION=0f3bb466b868b523cf1dc9b2aaaed65c77b28862
 ARROW_AWSSDK_BUILD_VERSION=1.8.133
 ARROW_AWS_CHECKSUMS_BUILD_VERSION=v0.1.10
-ARROW_AWS_C_COMMON_BUILD_VERSION=v0.4.59
+ARROW_AWS_C_COMMON_BUILD_VERSION=v0.5.10
 ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION=v0.1.5
 ARROW_BOOST_BUILD_VERSION=1.75.0
 ARROW_BROTLI_BUILD_VERSION=v1.0.9
@@ -40,8 +40,10 @@ ARROW_GRPC_BUILD_VERSION=v1.35.0
 ARROW_GTEST_BUILD_VERSION=1.10.0
 ARROW_JEMALLOC_BUILD_VERSION=5.2.1
 ARROW_LZ4_BUILD_VERSION=v1.9.3
-ARROW_MIMALLOC_BUILD_VERSION=v2.0.0
-ARROW_ORC_BUILD_VERSION=1.6.6
+# mimalloc 1.6.7 didn't build on Visual Studio 2015
+# https://github.com/microsoft/mimalloc/issues/353
+ARROW_MIMALLOC_BUILD_VERSION=v1.7.2
+ARROW_ORC_BUILD_VERSION=1.6.9
 ARROW_PROTOBUF_BUILD_VERSION=v3.14.0
 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require
 # a pre-release version of RapidJSON to build with GCC 8 without
@@ -52,10 +54,9 @@ ARROW_SNAPPY_BUILD_VERSION=1.1.8
 ARROW_THRIFT_BUILD_VERSION=0.13.0
 ARROW_THRIFT_BUILD_MD5_CHECKSUM=38a27d391a2b03214b444cb13d5664f1
 ARROW_UTF8PROC_BUILD_VERSION=v2.6.1
-# For https://github.com/xtensor-stack/xsimd/pull/419
-ARROW_XSIMD_BUILD_VERSION=e916f3ab1bc513328b627df702226a1d1e2ae3a9
+ARROW_XSIMD_BUILD_VERSION=e9234cd6e6f4428fc260073b2c34ffe86fda1f34
 ARROW_ZLIB_BUILD_VERSION=1.2.11
-ARROW_ZSTD_BUILD_VERSION=v1.4.8
+ARROW_ZSTD_BUILD_VERSION=v1.5.0
 
 # The first field is the name of the environment variable expected by cmake.
 # This _must_ match what is defined. The second field is the name of the
@@ -64,6 +65,9 @@ ARROW_ZSTD_BUILD_VERSION=v1.4.8
 DEPENDENCIES=(
   "ARROW_ABSL_URL absl-${ARROW_ABSL_BUILD_VERSION}.tar.gz https://github.com/abseil/abseil-cpp/archive/${ARROW_ABSL_BUILD_VERSION}.tar.gz"
   "ARROW_AWSSDK_URL aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz https://github.com/aws/aws-sdk-cpp/archive/${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
+  "ARROW_AWS_CHECKSUMS_URL aws-checksums-${ARROW_AWS_CHECKSUMS_BUILD_VERSION} https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz"
+  "ARROW_AWS_C_COMMON_URL aws-c-common-${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz"
+  "ARROW_AWS_C_EVENT_STREAM_URL aws-c-event-stream-${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION} https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz"
   "ARROW_BOOST_URL boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz https://github.com/ursa-labs/thirdparty/releases/download/latest/boost_${ARROW_BOOST_BUILD_VERSION//./_}.tar.gz"
   "ARROW_BROTLI_URL brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
   "ARROW_BZIP2_URL bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 282677aea7f..723f3a46e78 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow",
-  "version-string": "4.0.0-SNAPSHOT",
+  "version-string": "6.0.0-SNAPSHOT",
   "dependencies": [
     "abseil",
     {
@@ -15,7 +15,9 @@
       ]
     },
     "benchmark",
-    "boost",
+    "boost-filesystem",
+    "boost-multiprecision",
+    "boost-system",
     "brotli",
     "bzip2",
     "c-ares",
@@ -36,5 +38,9 @@
     "utf8proc",
     "zlib",
     "zstd"
-  ]
+  ],
+  "overrides": [
+    { "name": "gtest", "version": "1.10.0", "port-version": 4 }
+  ],
+  "builtin-baseline": "a267ab118c09f56f3dae96c9a4b3410820ad2f0b"
 }
diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index 3ee2af71538..c42ff55a413 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -29,7 +29,7 @@
     <Product>Apache Arrow library</Product>
     <Copyright>Copyright 2016-2019 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
-    <Version>4.0.0-SNAPSHOT</Version>
+    <Version>6.0.0-SNAPSHOT</Version>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/csharp/examples/FluentBuilderExample/Program.cs b/csharp/examples/FluentBuilderExample/Program.cs
index a55f8419518..6dbdc3d778e 100644
--- a/csharp/examples/FluentBuilderExample/Program.cs
+++ b/csharp/examples/FluentBuilderExample/Program.cs
@@ -51,7 +51,7 @@ public static async Task Main(string[] args)
             using (var writer = new ArrowFileWriter(stream, recordBatch.Schema))
             {
                 await writer.WriteRecordBatchAsync(recordBatch);
-                await writer.WriteFooterAsync();
+                await writer.WriteEndAsync();
             }
 
             Console.WriteLine("Done");
diff --git a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
index 8b5279a1069..0269768f490 100644
--- a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
@@ -76,9 +76,9 @@ protected override long ConvertTo(DateTimeOffset value)
                 switch (DataType.Unit)
                 {
                     case TimeUnit.Nanosecond:
-                        return ticks / 100;
+                        return ticks * 100;
                     case TimeUnit.Microsecond:
-                        return ticks / TimeSpan.TicksPerMillisecond / 1000;
+                        return ticks / 10;
                     case TimeUnit.Millisecond:
                         return ticks / TimeSpan.TicksPerMillisecond;
                     case TimeUnit.Second:
@@ -116,10 +116,10 @@ public DateTimeOffset GetTimestampUnchecked(int index)
             switch (type.Unit)
             {
                 case TimeUnit.Nanosecond:
-                    ticks = value * 100;
+                    ticks = value / 100;
                     break;
                 case TimeUnit.Microsecond:
-                    ticks = value * TimeSpan.TicksPerMillisecond * 1000;
+                    ticks = value * 10;
                     break;
                 case TimeUnit.Millisecond:
                     ticks = value * TimeSpan.TicksPerMillisecond;
diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
index 7c1fd6476d1..05a566b1cb4 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
@@ -170,8 +170,29 @@ public void ProducesExpectedArray()
                     .Build();
 
                 Assert.Equal(1, array.Length);
-                Assert.NotNull(array.GetTimestamp(0));
-                Assert.Equal(now.Truncate(TimeSpan.FromTicks(100)), array.GetTimestamp(0).Value);
+                var value = array.GetTimestamp(0);
+                Assert.NotNull(value);
+                Assert.Equal(now, value.Value);
+
+                timestampType = new TimestampType(TimeUnit.Microsecond, TimeZoneInfo.Local);
+                array = new TimestampArray.Builder(timestampType)
+                    .Append(now)
+                    .Build();
+
+                Assert.Equal(1, array.Length);
+                value = array.GetTimestamp(0);
+                Assert.NotNull(value);
+                Assert.Equal(now.Truncate(TimeSpan.FromTicks(10)), value.Value);
+
+                timestampType = new TimestampType(TimeUnit.Millisecond, TimeZoneInfo.Local);
+                array = new TimestampArray.Builder(timestampType)
+                    .Append(now)
+                    .Build();
+
+                Assert.Equal(1, array.Length);
+                value = array.GetTimestamp(0);
+                Assert.NotNull(value);
+                Assert.Equal(now.Truncate(TimeSpan.FromTicks(TimeSpan.TicksPerMillisecond)), value.Value);
             }
         }
 
diff --git a/dev/archery/README.md b/dev/archery/README.md
new file mode 100644
index 00000000000..eff65441661
--- /dev/null
+++ b/dev/archery/README.md
@@ -0,0 +1,49 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+
+# Developing with Archery
+
+Archery is documented on the Arrow website:
+
+* [Daily development using Archery](https://arrow.apache.org/docs/developers/archery.html)
+* [Using Archery and Crossbow](https://arrow.apache.org/docs/developers/crossbow.html)
+* [Using Archer and Docker](https://arrow.apache.org/docs/developers/docker.html)
+
+# Installing Archery
+
+See the pages linked aboved for more details. As a general overview, Archery
+comes in a number of subpackages, each needing to be installed if you want
+to use the functionality of it:
+
+* lint – lint (and in some cases auto-format) code in the Arrow repo
+  To install: `pip install -e "arrow/dev/archery[lint]"`
+* benchmark – to run Arrow benchmarks using Archery
+  To install: `pip install -e "arrow/dev/archery[benchmark]"`
+* docker – to run docker-compose based tasks more easily
+  To install: `pip install -e "arrow/dev/archery[docker]"`
+* release – release related helpers
+  To install: `pip install -e "arrow/dev/archery[release]"`
+* crossbow – to trigger + interact with the crossbow build system
+  To install: `pip install -e "arrow/dev/archery[crossbow]"`
+* crossbow-upload
+  To install: `pip install -e "arrow/dev/archery[crossbow-upload]"`
+
+Additionally, if you would prefer to install everything at once,
+`pip install -e "arrow/dev/archery[all]"` is an alias for all of
+the above subpackages.
\ No newline at end of file
diff --git a/dev/archery/archery/benchmark/codec.py b/dev/archery/archery/benchmark/codec.py
index 359dea9b9f3..4157890d13d 100644
--- a/dev/archery/archery/benchmark/codec.py
+++ b/dev/archery/archery/benchmark/codec.py
@@ -50,6 +50,7 @@ def encode(b):
             "values": b.values,
             "time_unit": b.time_unit,
             "times": b.times,
+            "counters": b.counters,
         }
 
     @staticmethod
diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py
index c1644dcbd9c..ebcc5263645 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -157,8 +157,9 @@ def __init__(self, name, runs):
         values = [b.value for b in self.runs]
         times = [b.real_time for b in self.runs]
         # Slight kludge to extract the UserCounters for each benchmark
-        self.counters = self.runs[0].counters
-        super().__init__(name, unit, less_is_better, values, time_unit, times)
+        counters = self.runs[0].counters
+        super().__init__(name, unit, less_is_better, values, time_unit, times,
+                         counters)
 
     def __repr__(self):
         return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs)
diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
new file mode 100644
index 00000000000..f531b6de163
--- /dev/null
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from itertools import filterfalse, groupby, tee
+import json
+import subprocess
+from tempfile import NamedTemporaryFile
+
+from .core import Benchmark
+from ..utils.command import Command
+from ..utils.maven import Maven
+
+
+def partition(pred, iterable):
+    # adapted from python's examples
+    t1, t2 = tee(iterable)
+    return list(filter(pred, t1)), list(filterfalse(pred, t2))
+
+
+class JavaMicrobenchmarkHarnessCommand(Command):
+    """ Run a Java Micro Benchmark Harness
+
+    This assumes the binary supports the standard command line options,
+    notably `-Dbenchmark_filter`
+    """
+
+    def __init__(self, build, benchmark_filter=None):
+        self.benchmark_filter = benchmark_filter
+        self.build = build
+        self.maven = Maven()
+
+    """ Extract benchmark names from output between "Benchmarks:" and "[INFO]".
+    Assume the following output:
+      ...
+      Benchmarks:
+      org.apache.arrow.vector.IntBenchmarks.setIntDirectly
+      ...
+      org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
+      org.apache.arrow.vector.IntBenchmarks.setWithWriter
+      ...
+      [INFO]
+    """
+
+    def list_benchmarks(self):
+        argv = []
+        if self.benchmark_filter:
+            argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
+        result = self.build.list(
+            *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        lists = []
+        benchmarks = False
+        for line in str.splitlines(result.stdout.decode("utf-8")):
+            if not benchmarks:
+                if line.startswith("Benchmarks:"):
+                    benchmarks = True
+            else:
+                if line.startswith("org.apache.arrow"):
+                    lists.append(line)
+                if line.startswith("[INFO]"):
+                    break
+        return lists
+
+    def results(self, repetitions):
+        with NamedTemporaryFile(suffix=".json") as out:
+            argv = ["-Dbenchmark.runs={}".format(repetitions),
+                    "-Dbenchmark.resultfile={}".format(out.name),
+                    "-Dbenchmark.resultformat=json"]
+            if self.benchmark_filter:
+                argv.append(
+                    "-Dbenchmark.filter={}".format(self.benchmark_filter)
+                )
+
+            self.build.benchmark(*argv, check=True)
+            return json.load(out)
+
+
+class JavaMicrobenchmarkHarnessObservation:
+    """ Represents one run of a single Java Microbenchmark Harness
+    """
+
+    def __init__(self, benchmark, primaryMetric,
+                 forks, warmupIterations, measurementIterations, **counters):
+        self.name = benchmark
+        self.primaryMetric = primaryMetric
+        self.score = primaryMetric["score"]
+        self.score_unit = primaryMetric["scoreUnit"]
+        self.forks = forks
+        self.warmups = warmupIterations
+        self.runs = measurementIterations
+        self.counters = {
+            "mode": counters["mode"],
+            "threads": counters["threads"],
+            "warmups": warmupIterations,
+            "warmupTime": counters["warmupTime"],
+            "measurements": measurementIterations,
+            "measurementTime": counters["measurementTime"],
+            "jvmArgs": counters["jvmArgs"]
+        }
+        self.reciprocal_value = True if self.score_unit.endswith(
+            "/op") else False
+        if self.score_unit.startswith("ops/"):
+            idx = self.score_unit.find("/")
+            self.normalizePerSec(self.score_unit[idx+1:])
+        elif self.score_unit.endswith("/op"):
+            idx = self.score_unit.find("/")
+            self.normalizePerSec(self.score_unit[:idx])
+        else:
+            self.normalizeFactor = 1
+
+    @property
+    def value(self):
+        """ Return the benchmark value."""
+        val = 1 / self.score if self.reciprocal_value else self.score
+        return val * self.normalizeFactor
+
+    def normalizePerSec(self, unit):
+        if unit == "ns":
+            self.normalizeFactor = 1000 * 1000 * 1000
+        elif unit == "us":
+            self.normalizeFactor = 1000 * 1000
+        elif unit == "ms":
+            self.normalizeFactor = 1000
+        elif unit == "min":
+            self.normalizeFactor = 1 / 60
+        elif unit == "hr":
+            self.normalizeFactor = 1 / (60 * 60)
+        elif unit == "day":
+            self.normalizeFactor = 1 / (60 * 60 * 24)
+        else:
+            self.normalizeFactor = 1
+
+    @property
+    def unit(self):
+        if self.score_unit.startswith("ops/"):
+            return "items_per_second"
+        elif self.score_unit.endswith("/op"):
+            return "items_per_second"
+        else:
+            return "?"
+
+    def __repr__(self):
+        return str(self.value)
+
+
+class JavaMicrobenchmarkHarness(Benchmark):
+    """ A set of JavaMicrobenchmarkHarnessObservations. """
+
+    def __init__(self, name, runs):
+        """ Initialize a JavaMicrobenchmarkHarness.
+
+        Parameters
+        ----------
+        name: str
+              Name of the benchmark
+        forks: int
+        warmups: int
+        runs: int
+        runs: list(JavaMicrobenchmarkHarnessObservation)
+              Repetitions of JavaMicrobenchmarkHarnessObservation run.
+
+        """
+        self.name = name
+        self.runs = sorted(runs, key=lambda b: b.value)
+        unit = self.runs[0].unit
+        time_unit = "N/A"
+        less_is_better = not unit.endswith("per_second")
+        values = [b.value for b in self.runs]
+        times = []
+        # Slight kludge to extract the UserCounters for each benchmark
+        counters = self.runs[0].counters
+        super().__init__(name, unit, less_is_better, values, time_unit, times,
+                         counters)
+
+    def __repr__(self):
+        return "JavaMicrobenchmark[name={},runs={}]".format(
+            self.name, self.runs)
+
+    @classmethod
+    def from_json(cls, payload):
+        def group_key(x):
+            return x.name
+
+        benchmarks = map(
+            lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
+        groups = groupby(sorted(benchmarks, key=group_key), group_key)
+        return [cls(k, list(bs)) for k, bs in groups]
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index 5718bcaf108..fc6d354b180 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -22,8 +22,11 @@
 
 from .core import BenchmarkSuite
 from .google import GoogleBenchmarkCommand, GoogleBenchmark
+from .jmh import JavaMicrobenchmarkHarnessCommand, JavaMicrobenchmarkHarness
 from ..lang.cpp import CppCMakeDefinition, CppConfiguration
+from ..lang.java import JavaMavenDefinition, JavaConfiguration
 from ..utils.cmake import CMakeBuild
+from ..utils.maven import MavenBuild
 from ..utils.logger import logger
 
 
@@ -50,40 +53,8 @@ def suites(self):
 
     @staticmethod
     def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
-        """ Returns a BenchmarkRunner from a path or a git revision.
-
-        First, it checks if `rev_or_path` is a valid path (or string) of a json
-        object that can deserialize to a BenchmarkRunner. If so, it initialize
-        a StaticBenchmarkRunner from it. This allows memoizing the result of a
-        run in a file or a string.
-
-        Second, it checks if `rev_or_path` points to a valid CMake build
-        directory.  If so, it creates a CppBenchmarkRunner with this existing
-        CMakeBuild.
-
-        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
-        the given revision and create a fresh CMakeBuild.
-        """
-        build = None
-        if StaticBenchmarkRunner.is_json_result(rev_or_path):
-            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
-        elif CMakeBuild.is_build_dir(rev_or_path):
-            build = CMakeBuild.from_path(rev_or_path)
-            return CppBenchmarkRunner(build, **kwargs)
-        else:
-            # Revisions can references remote via the `/` character, ensure
-            # that the revision is path friendly
-            path_rev = rev_or_path.replace("/", "_")
-            root_rev = os.path.join(root, path_rev)
-            os.mkdir(root_rev)
-
-            clone_dir = os.path.join(root_rev, "arrow")
-            # Possibly checkout the sources at given revision, no need to
-            # perform cleanup on cloned repository as root_rev is reclaimed.
-            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
-            cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
-            build_dir = os.path.join(root_rev, "build")
-            return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+        raise NotImplementedError(
+            "BenchmarkRunner must implement from_rev_or_path")
 
 
 class StaticBenchmarkRunner(BenchmarkRunner):
@@ -210,3 +181,133 @@ def suites(self):
                 continue
 
             yield suite
+
+    @staticmethod
+    def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
+        """ Returns a BenchmarkRunner from a path or a git revision.
+
+        First, it checks if `rev_or_path` is a valid path (or string) of a json
+        object that can deserialize to a BenchmarkRunner. If so, it initialize
+        a StaticBenchmarkRunner from it. This allows memoizing the result of a
+        run in a file or a string.
+
+        Second, it checks if `rev_or_path` points to a valid CMake build
+        directory.  If so, it creates a CppBenchmarkRunner with this existing
+        CMakeBuild.
+
+        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+        the given revision and create a fresh CMakeBuild.
+        """
+        build = None
+        if StaticBenchmarkRunner.is_json_result(rev_or_path):
+            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+        elif CMakeBuild.is_build_dir(rev_or_path):
+            build = CMakeBuild.from_path(rev_or_path)
+            return CppBenchmarkRunner(build, **kwargs)
+        else:
+            # Revisions can references remote via the `/` character, ensure
+            # that the revision is path friendly
+            path_rev = rev_or_path.replace("/", "_")
+            root_rev = os.path.join(root, path_rev)
+            os.mkdir(root_rev)
+
+            clone_dir = os.path.join(root_rev, "arrow")
+            # Possibly checkout the sources at given revision, no need to
+            # perform cleanup on cloned repository as root_rev is reclaimed.
+            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+            cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
+            build_dir = os.path.join(root_rev, "build")
+            return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+
+
+class JavaBenchmarkRunner(BenchmarkRunner):
+    """ Run suites for Java. """
+
+    # default repetitions is 5 for Java microbenchmark harness
+    def __init__(self, build, **kwargs):
+        """ Initialize a JavaBenchmarkRunner. """
+        self.build = build
+        super().__init__(**kwargs)
+
+    @staticmethod
+    def default_configuration(**kwargs):
+        """ Returns the default benchmark configuration. """
+        return JavaConfiguration(**kwargs)
+
+    def suite(self, name):
+        """ Returns the resulting benchmarks for a given suite. """
+        # update .m2 directory, which installs target jars
+        self.build.build()
+
+        suite_cmd = JavaMicrobenchmarkHarnessCommand(
+            self.build, self.benchmark_filter)
+
+        # Ensure there will be data
+        benchmark_names = suite_cmd.list_benchmarks()
+        if not benchmark_names:
+            return None
+
+        results = suite_cmd.results(repetitions=self.repetitions)
+        benchmarks = JavaMicrobenchmarkHarness.from_json(results)
+        return BenchmarkSuite(name, benchmarks)
+
+    @property
+    def list_benchmarks(self):
+        """ Returns all suite names """
+        # Ensure build is up-to-date to run benchmarks
+        self.build.build()
+
+        suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build)
+        benchmark_names = suite_cmd.list_benchmarks()
+        for benchmark_name in benchmark_names:
+            yield "{}".format(benchmark_name)
+
+    @property
+    def suites(self):
+        """ Returns all suite for a runner. """
+        suite_name = "JavaBenchmark"
+        suite = self.suite(suite_name)
+
+        # Filter may exclude all benchmarks
+        if not suite:
+            logger.debug("Suite {} executed but no results"
+                         .format(suite_name))
+            return
+
+        yield suite
+
+    @staticmethod
+    def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs):
+        """ Returns a BenchmarkRunner from a path or a git revision.
+
+        First, it checks if `rev_or_path` is a valid path (or string) of a json
+        object that can deserialize to a BenchmarkRunner. If so, it initialize
+        a StaticBenchmarkRunner from it. This allows memoizing the result of a
+        run in a file or a string.
+
+        Second, it checks if `rev_or_path` points to a valid Maven build
+        directory.  If so, it creates a JavaBenchmarkRunner with this existing
+        MavenBuild.
+
+        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+        the given revision and create a fresh MavenBuild.
+        """
+        if StaticBenchmarkRunner.is_json_result(rev_or_path):
+            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+        elif MavenBuild.is_build_dir(rev_or_path):
+            maven_def = JavaMavenDefinition(rev_or_path, maven_conf)
+            return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs)
+        else:
+            # Revisions can references remote via the `/` character, ensure
+            # that the revision is path friendly
+            path_rev = rev_or_path.replace("/", "_")
+            root_rev = os.path.join(root, path_rev)
+            os.mkdir(root_rev)
+
+            clone_dir = os.path.join(root_rev, "arrow")
+            # Possibly checkout the sources at given revision, no need to
+            # perform cleanup on cloned repository as root_rev is reclaimed.
+            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+            maven_def = JavaMavenDefinition(src_rev.java, maven_conf)
+            build_dir = os.path.join(root_rev, "arrow/java")
+            return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs)
diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index 27cf2470907..cb3d1da14fd 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -110,7 +110,13 @@ def parse_command(self, payload):
         elif not comment['body'].lstrip().startswith(mention):
             raise EventError("The bot is not mentioned")
 
-        return payload['comment']['body'].split(mention)[-1].strip()
+        # Parse the comment, removing the bot mentioned (and everything
+        # before it)
+        command = payload['comment']['body'].split(mention)[-1]
+
+        # then split on newlines and keep only the first line
+        # (ignoring all other lines)
+        return command.split("\n")[0].strip()
 
     def handle(self, event, payload):
         try:
@@ -146,7 +152,7 @@ def handle_issue_comment(self, command, payload):
             logger.error(e)
             pull.create_issue_comment("```\n{}\n```".format(e.message))
         except Exception as e:
-            logger.error(e)
+            logger.exception(e)
             comment.create_reaction('-1')
         else:
             comment.create_reaction('+1')
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index bcaddf1c795..582a4288492 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -27,36 +27,19 @@
 
 from .benchmark.codec import JsonEncoder
 from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
-from .benchmark.runner import BenchmarkRunner, CppBenchmarkRunner
+from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner
+from .compat import _import_pandas
 from .lang.cpp import CppCMakeDefinition, CppConfiguration
+from .utils.cli import ArrowBool, validate_arrow_sources, add_optional_command
 from .utils.lint import linter, python_numpydoc, LintValidationException
 from .utils.logger import logger, ctx as log_ctx
-from .utils.source import ArrowSources, InvalidArrowSource
+from .utils.source import ArrowSources
 from .utils.tmpdir import tmpdir
 
 # Set default logging to INFO in command line.
 logging.basicConfig(level=logging.INFO)
 
 
-class ArrowBool(click.types.BoolParamType):
-    """
-    ArrowBool supports the 'ON' and 'OFF' values on top of the values
-    supported by BoolParamType. This is convenient to port script which exports
-    CMake options variables.
-    """
-    name = "boolean"
-
-    def convert(self, value, param, ctx):
-        if isinstance(value, str):
-            lowered = value.lower()
-            if lowered == "on":
-                return True
-            elif lowered == "off":
-                return False
-
-        return super().convert(value, param, ctx)
-
-
 BOOL = ArrowBool()
 
 
@@ -88,14 +71,6 @@ def archery(ctx, debug, pdb, quiet):
         sys.excepthook = lambda t, v, e: pdb.pm()
 
 
-def validate_arrow_sources(ctx, param, src):
-    """ Ensure a directory contains Arrow cpp sources. """
-    try:
-        return ArrowSources.find(src)
-    except InvalidArrowSource as e:
-        raise click.BadParameter(str(e))
-
-
 build_dir_type = click.Path(dir_okay=True, file_okay=False, resolve_path=True)
 # Supported build types
 build_type = click.Choice(["debug", "relwithdebinfo", "release"],
@@ -120,6 +95,15 @@ def cpp_toolchain_options(cmd):
     return _apply_options(cmd, options)
 
 
+def java_toolchain_options(cmd):
+    options = [
+        click.option("--java-home", metavar="<java_home>",
+                     help="Path to Java Developers Kit."),
+        click.option("--java-options", help="java compiler options."),
+    ]
+    return _apply_options(cmd, options)
+
+
 def _apply_options(cmd, options):
     for option in options:
         cmd = option(cmd)
@@ -272,7 +256,6 @@ def build(ctx, src, build_dir, force, targets, **kwargs):
     LintCheck('rat',
               "Check all sources files for license texts via Apache RAT."),
     LintCheck('r', "Lint R files."),
-    LintCheck('rust', "Lint Rust files."),
     LintCheck('docker', "Lint Dockerfiles with hadolint."),
 ]
 
@@ -339,7 +322,7 @@ def numpydoc(src, symbols, allow_rule, disallow_rule):
     disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
     try:
         results = python_numpydoc(symbols, allow_rules=allow_rule,
-                                  disallow_rule=disallow_rule)
+                                  disallow_rules=disallow_rule)
         for result in results:
             result.ok()
     except LintValidationException:
@@ -357,6 +340,11 @@ def benchmark(ctx):
 
 
 def benchmark_common_options(cmd):
+    def check_language(ctx, param, value):
+        if value not in {"cpp", "java"}:
+            raise click.BadParameter("cpp or java is supported now")
+        return value
+
     options = [
         click.option("--src", metavar="<arrow_src>", show_default=True,
                      default=None, callback=validate_arrow_sources,
@@ -367,11 +355,21 @@ def benchmark_common_options(cmd):
         click.option("--output", metavar="<output>",
                      type=click.File("w", encoding="utf8"), default="-",
                      help="Capture output result into file."),
+        click.option("--language", metavar="<lang>", type=str, default="cpp",
+                     show_default=True, callback=check_language,
+                     help="Specify target language for the benchmark"),
+        click.option("--build-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to mvn build. "
+                     "Can be stacked. For language=java"),
+        click.option("--benchmark-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to mvn benchmark. "
+                     "Can be stacked. For language=java"),
         click.option("--cmake-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to cmake invocation. "
-                     "Can be stacked"),
+                     "Can be stacked. For language=cpp")
     ]
 
+    cmd = java_toolchain_options(cmd)
     cmd = cpp_toolchain_options(cmd)
     return _apply_options(cmd, options)
 
@@ -394,17 +392,30 @@ def benchmark_filter_options(cmd):
 @benchmark_common_options
 @click.pass_context
 def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                   **kwargs):
+                   java_home, java_options, build_extras, benchmark_extras,
+                   language, **kwargs):
     """ List benchmark suite.
     """
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
 
-        runner_base = BenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf)
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf)
+
+        elif language == "java":
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
+
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf)
 
         for b in runner_base.list_benchmarks:
             click.echo(b, file=output)
@@ -415,12 +426,15 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                 default="WORKSPACE", required=False)
 @benchmark_common_options
 @benchmark_filter_options
-@click.option("--repetitions", type=int, default=1, show_default=True,
+@click.option("--repetitions", type=int, default=-1,
               help=("Number of repetitions of each benchmark. Increasing "
-                    "may improve result precision."))
+                    "may improve result precision. "
+                    "[default: 1 for cpp, 5 for java"))
 @click.pass_context
 def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                  suite_filter, benchmark_filter, repetitions, **kwargs):
+                  java_home, java_options, build_extras, benchmark_extras,
+                  language, suite_filter, benchmark_filter, repetitions,
+                  **kwargs):
     """ Run benchmark suite.
 
     This command will run the benchmark suite for a single build. This is
@@ -456,13 +470,29 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
-
-        runner_base = BenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 1
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+
+        elif language == "java":
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 5
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
 
         json.dump(runner_base, output, cls=JsonEncoder)
 
@@ -475,7 +505,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
               help="Regression failure threshold in percentage.")
 @click.option("--repetitions", type=int, default=1, show_default=True,
               help=("Number of repetitions of each benchmark. Increasing "
-                    "may improve result precision."))
+                    "may improve result precision. "
+                    "[default: 1 for cpp, 5 for java"))
 @click.option("--no-counters", type=BOOL, default=False, is_flag=True,
               help="Hide counters field in diff report.")
 @click.argument("contender", metavar="[<contender>",
@@ -483,8 +514,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
 @click.argument("baseline", metavar="[<baseline>]]", default="origin/master",
                 required=False)
 @click.pass_context
-def benchmark_diff(ctx, src, preserve, output, cmake_extras,
+def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
                    suite_filter, benchmark_filter, repetitions, no_counters,
+                   java_home, java_options, build_extras, benchmark_extras,
                    threshold, contender, baseline, **kwargs):
     """Compare (diff) benchmark runs.
 
@@ -560,26 +592,47 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
         logger.debug("Comparing {} (contender) with {} (baseline)"
                      .format(contender, baseline))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
-
-        runner_cont = BenchmarkRunner.from_rev_or_path(
-            src, root, contender, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter,
-            benchmark_filter=benchmark_filter)
-        runner_base = BenchmarkRunner.from_rev_or_path(
-            src, root, baseline, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter,
-            benchmark_filter=benchmark_filter)
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 1
+            runner_cont = CppBenchmarkRunner.from_rev_or_path(
+                src, root, contender, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter,
+                benchmark_filter=benchmark_filter)
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, baseline, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter,
+                benchmark_filter=benchmark_filter)
+
+        elif language == "java":
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 5
+            runner_cont = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, contender, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, baseline, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
 
         runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
 
         # TODO(kszucs): test that the output is properly formatted jsonlines
         comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
+        ren_counters = language == "java"
         formatted = _format_comparisons_with_pandas(comparisons_json,
-                                                    no_counters)
+                                                    no_counters, ren_counters)
         output.write(formatted)
         output.write('\n')
 
@@ -593,8 +646,9 @@ def _get_comparisons_as_json(comparisons):
     return buf.getvalue()
 
 
-def _format_comparisons_with_pandas(comparisons_json, no_counters):
-    import pandas as pd
+def _format_comparisons_with_pandas(comparisons_json, no_counters,
+                                    ren_counters):
+    pd = _import_pandas()
     df = pd.read_json(StringIO(comparisons_json), lines=True)
     # parse change % so we can sort by it
     df['change %'] = df.pop('change').str[:-1].map(float)
@@ -604,7 +658,10 @@ def _format_comparisons_with_pandas(comparisons_json, no_counters):
     if not no_counters:
         fields += ['counters']
 
-    df = df[fields].sort_values(by='change %', ascending=False)
+    df = df[fields]
+    if ren_counters:
+        df = df.rename(columns={'counters': 'configurations'})
+    df = df.sort_values(by='change %', ascending=False)
 
     def labelled(title, df):
         if len(df) == 0:
@@ -642,7 +699,8 @@ def _set_default(opt, default):
 @click.option('--with-go', type=bool, default=False,
               help='Include Go in integration tests')
 @click.option('--with-rust', type=bool, default=False,
-              help='Include Rust in integration tests')
+              help='Include Rust in integration tests',
+              envvar="ARCHERY_INTEGRATION_WITH_RUST")
 @click.option('--write_generated_json', default=False,
               help='Generate test JSON to indicated path')
 @click.option('--run-flight', is_flag=True, default=False,
@@ -704,9 +762,7 @@ def integration(with_all=False, random_seed=12345, **args):
               default='-', required=True)
 @click.option('--arrow-token', envvar='ARROW_GITHUB_TOKEN',
               help='OAuth token for responding comment in the arrow repo')
-@click.option('--crossbow-token', '-ct', envvar='CROSSBOW_GITHUB_TOKEN',
-              help='OAuth token for pushing to the crossow repository')
-def trigger_bot(event_name, event_payload, arrow_token, crossbow_token):
+def trigger_bot(event_name, event_payload, arrow_token):
     from .bot import CommentBot, actions
 
     event_payload = json.loads(event_payload.read())
@@ -715,228 +771,6 @@ def trigger_bot(event_name, event_payload, arrow_token, crossbow_token):
     bot.handle(event_name, event_payload)
 
 
-def _mock_compose_calls(compose):
-    from types import MethodType
-    from subprocess import CompletedProcess
-
-    def _mock(compose, executable):
-        def _execute(self, *args, **kwargs):
-            params = ['{}={}'.format(k, v)
-                      for k, v in self.config.params.items()]
-            command = ' '.join(params + [executable] + list(args))
-            click.echo(command)
-            return CompletedProcess([], 0)
-        return MethodType(_execute, compose)
-
-    compose._execute_docker = _mock(compose, executable='docker')
-    compose._execute_compose = _mock(compose, executable='docker-compose')
-
-
-@archery.group('docker')
-@click.option("--src", metavar="<arrow_src>", default=None,
-              callback=validate_arrow_sources,
-              help="Specify Arrow source directory.")
-@click.option('--dry-run/--execute', default=False,
-              help="Display the docker-compose commands instead of executing "
-                   "them.")
-@click.pass_obj
-def docker_compose(obj, src, dry_run):
-    """Interact with docker-compose based builds."""
-    from .docker import DockerCompose
-
-    config_path = src.path / 'docker-compose.yml'
-    if not config_path.exists():
-        raise click.ClickException(
-            "Docker compose configuration cannot be found in directory {}, "
-            "try to pass the arrow source directory explicitly.".format(src)
-        )
-
-    # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
-    # environment variables to keep the usage similar to docker-compose
-    compose = DockerCompose(config_path, params=os.environ)
-    if dry_run:
-        _mock_compose_calls(compose)
-    obj['compose'] = compose
-
-
-@docker_compose.command('build')
-@click.argument('image')
-@click.option('--force-pull/--no-pull', default=True,
-              help="Whether to force pull the image and its ancestor images")
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_CLI',
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.option('--using-docker-buildx', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_BUILDX',
-              help="Use buildx with docker CLI directly for building instead "
-                   "of calling docker-compose or the plain docker build "
-                   "command. This option makes the build cache reusable "
-                   "across hosts.")
-@click.option('--use-cache/--no-cache', default=True,
-              help="Whether to use cache when building the image and its "
-                   "ancestor images")
-@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
-              help="Whether to use cache when building only the (leaf) image "
-                   "passed as the argument. To disable caching for both the "
-                   "image and its ancestors use --no-cache option.")
-@click.pass_obj
-def docker_compose_build(obj, image, *, force_pull, using_docker_cli,
-                         using_docker_buildx, use_cache, use_leaf_cache):
-    """
-    Execute docker-compose builds.
-    """
-    from .docker import UndefinedImage
-
-    compose = obj['compose']
-
-    using_docker_cli |= using_docker_buildx
-    try:
-        if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache,
-                         using_docker=using_docker_cli)
-        compose.build(image, use_cache=use_cache,
-                      use_leaf_cache=use_leaf_cache,
-                      using_docker=using_docker_cli,
-                      using_buildx=using_docker_buildx)
-    except UndefinedImage as e:
-        raise click.ClickException(
-            "There is no service/image defined in docker-compose.yml with "
-            "name: {}".format(str(e))
-        )
-    except RuntimeError as e:
-        raise click.ClickException(str(e))
-
-
-@docker_compose.command('run')
-@click.argument('image')
-@click.argument('command', required=False, default=None)
-@click.option('--env', '-e', multiple=True,
-              help="Set environment variable within the container")
-@click.option('--user', '-u', default=None,
-              help="Username or UID to run the container with")
-@click.option('--force-pull/--no-pull', default=True,
-              help="Whether to force pull the image and its ancestor images")
-@click.option('--force-build/--no-build', default=True,
-              help="Whether to force build the image and its ancestor images")
-@click.option('--build-only', default=False, is_flag=True,
-              help="Pull and/or build the image, but do not run it")
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_CLI',
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.option('--using-docker-buildx', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_BUILDX',
-              help="Use buildx with docker CLI directly for building instead "
-                   "of calling docker-compose or the plain docker build "
-                   "command. This option makes the build cache reusable "
-                   "across hosts.")
-@click.option('--use-cache/--no-cache', default=True,
-              help="Whether to use cache when building the image and its "
-                   "ancestor images")
-@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
-              help="Whether to use cache when building only the (leaf) image "
-                   "passed as the argument. To disable caching for both the "
-                   "image and its ancestors use --no-cache option.")
-@click.option('--volume', '-v', multiple=True,
-              help="Set volume within the container")
-@click.pass_obj
-def docker_compose_run(obj, image, command, *, env, user, force_pull,
-                       force_build, build_only, using_docker_cli,
-                       using_docker_buildx, use_cache,
-                       use_leaf_cache, volume):
-    """Execute docker-compose builds.
-
-    To see the available builds run `archery docker images`.
-
-    Examples:
-
-    # execute a single build
-    archery docker run conda-python
-
-    # execute the builds but disable the image pulling
-    archery docker run --no-cache conda-python
-
-    # pass a docker-compose parameter, like the python version
-    PYTHON=3.8 archery docker run conda-python
-
-    # disable the cache only for the leaf image
-    PANDAS=master archery docker run --no-leaf-cache conda-python-pandas
-
-    # entirely skip building the image
-    archery docker run --no-pull --no-build conda-python
-
-    # pass runtime parameters via docker environment variables
-    archery docker run -e CMAKE_BUILD_TYPE=release ubuntu-cpp
-
-    # set a volume
-    archery docker run -v $PWD/build:/build ubuntu-cpp
-
-    # starting an interactive bash session for debugging
-    archery docker run ubuntu-cpp bash
-    """
-    from .docker import UndefinedImage
-
-    compose = obj['compose']
-    using_docker_cli |= using_docker_buildx
-
-    env = dict(kv.split('=', 1) for kv in env)
-    try:
-        if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache,
-                         using_docker=using_docker_cli)
-        if force_build:
-            compose.build(image, use_cache=use_cache,
-                          use_leaf_cache=use_leaf_cache,
-                          using_docker=using_docker_cli,
-                          using_buildx=using_docker_buildx)
-        if build_only:
-            return
-        compose.run(
-            image,
-            command=command,
-            env=env,
-            user=user,
-            using_docker=using_docker_cli,
-            volumes=volume
-        )
-    except UndefinedImage as e:
-        raise click.ClickException(
-            "There is no service/image defined in docker-compose.yml with "
-            "name: {}".format(str(e))
-        )
-    except RuntimeError as e:
-        raise click.ClickException(str(e))
-
-
-@docker_compose.command('push')
-@click.argument('image')
-@click.option('--user', '-u', required=False, envvar='ARCHERY_DOCKER_USER',
-              help='Docker repository username')
-@click.option('--password', '-p', required=False,
-              envvar='ARCHERY_DOCKER_PASSWORD',
-              help='Docker repository password')
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.pass_obj
-def docker_compose_push(obj, image, user, password, using_docker_cli):
-    """Push the generated docker-compose image."""
-    compose = obj['compose']
-    compose.push(image, user=user, password=password,
-                 using_docker=using_docker_cli)
-
-
-@docker_compose.command('images')
-@click.pass_obj
-def docker_compose_images(obj):
-    """List the available docker-compose images."""
-    compose = obj['compose']
-    click.echo('Available images:')
-    for image in compose.images():
-        click.echo(' - {}'.format(image))
-
-
 @archery.group('release')
 @click.option("--src", metavar="<arrow_src>", default=None,
               callback=validate_arrow_sources,
@@ -1069,22 +903,38 @@ def release_cherry_pick(obj, version, dry_run, recreate):
         click.echo('git cherry-pick {}'.format(commit.hexsha))
 
 
-try:
-    from .crossbow.cli import crossbow  # noqa
-except ImportError as exc:
-    missing_package = exc.name
+@archery.group("linking")
+@click.pass_obj
+def linking(obj):
+    """
+    Quick and dirty utilities for checking library linkage.
+    """
+    pass
+
+
+@linking.command("check-dependencies")
+@click.argument("paths", nargs=-1)
+@click.option("--allow", "-a", "allowed", multiple=True,
+              help="Name of the allowed libraries")
+@click.option("--disallow", "-d", "disallowed", multiple=True,
+              help="Name of the disallowed libraries")
+@click.pass_obj
+def linking_check_dependencies(obj, allowed, disallowed, paths):
+    from .linking import check_dynamic_library_dependencies, DependencyError
 
-    @archery.command(
-        'crossbow',
-        context_settings={"ignore_unknown_options": True}
-    )
-    def crossbow():
-        raise click.ClickException(
-            "Couldn't import crossbow because of missing dependency: {}"
-            .format(missing_package)
-        )
-else:
-    archery.add_command(crossbow)
+    allowed, disallowed = set(allowed), set(disallowed)
+    try:
+        for path in map(pathlib.Path, paths):
+            check_dynamic_library_dependencies(path, allowed=allowed,
+                                               disallowed=disallowed)
+    except DependencyError as e:
+        raise click.ClickException(str(e))
+
+
+add_optional_command("docker", module=".docker.cli", function="docker",
+                     parent=archery)
+add_optional_command("crossbow", module=".crossbow.cli", function="crossbow",
+                     parent=archery)
 
 
 if __name__ == "__main__":
diff --git a/dev/archery/archery/compat.py b/dev/archery/archery/compat.py
index 22cb9fc7957..bb0b1542832 100644
--- a/dev/archery/archery/compat.py
+++ b/dev/archery/archery/compat.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import pathlib
+import sys
 
 
 def _is_path_like(path):
@@ -49,3 +50,10 @@ def _stringify_path(path):
             return str(path)
 
     raise TypeError("not a path-like object")
+
+
+def _import_pandas():
+    # ARROW-13425: avoid importing PyArrow from Pandas
+    sys.modules['pyarrow'] = None
+    import pandas as pd
+    return pd
diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py
index 71c25e0460f..99a3369a9a0 100644
--- a/dev/archery/archery/crossbow/cli.py
+++ b/dev/archery/archery/crossbow/cli.py
@@ -194,6 +194,7 @@ def highlight(code):
                               head=arrow_sha, version=arrow_version)
     config = Config.load_yaml(config_path)
     params = dict([p.split("=") for p in params])
+    params["queue_remote_url"] = "https://github.com/org/crossbow"
     job = Job.from_config(config=config, target=target, tasks=[task],
                           params=params)
 
@@ -339,11 +340,10 @@ def asset_callback(task_name, task, asset):
 
 
 @crossbow.command()
+@click.argument('patterns', nargs=-1, required=True)
 @click.option('--sha', required=True, help='Target committish')
 @click.option('--tag', required=True, help='Target tag')
 @click.option('--method', default='curl', help='Use cURL to upload')
-@click.option('--pattern', '-p', 'patterns', required=True, multiple=True,
-              help='File pattern to upload as assets')
 @click.pass_obj
 def upload_artifacts(obj, tag, sha, patterns, method):
     queue = obj['queue']
diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py
index d4d3d5183b5..838d6d48b73 100644
--- a/dev/archery/archery/crossbow/core.py
+++ b/dev/archery/archery/crossbow/core.py
@@ -121,7 +121,8 @@ def format_all(items, pattern):
 
     loader = jinja2.FileSystemLoader(searchpath)
     env = jinja2.Environment(loader=loader, trim_blocks=True,
-                             lstrip_blocks=True)
+                             lstrip_blocks=True,
+                             undefined=jinja2.StrictUndefined)
     env.filters['format_all'] = format_all
     template = env.get_template(template)
     return template.render(**params)
@@ -193,7 +194,8 @@ def credentials(self, url, username_from_url, allowed_types):
             print(msg)
             raise CrossbowError(msg)
 
-        if allowed_types & pygit2.credentials.GIT_CREDTYPE_USERPASS_PLAINTEXT:
+        if (allowed_types &
+                pygit2.credentials.GIT_CREDENTIAL_USERPASS_PLAINTEXT):
             return pygit2.UserPass(self.token, 'x-oauth-basic')
         else:
             return None
@@ -638,17 +640,20 @@ def get_version(root, **kwargs):
         'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"'
     )
     version = parse_git_version(root, **kwargs)
-
-    # increment the minor version, because there can be patch releases created
-    # from maintenance branches where the tags are unreachable from the
-    # master's HEAD, so the git command above generates 0.17.0.dev300 even if
-    # arrow has a never 0.17.1 patch release
-    pattern = r"^(\d+)\.(\d+)\.(\d+)$"
-    match = re.match(pattern, str(version.tag))
+    tag = str(version.tag)
+
+    # We may get a development tag for the next version, such as "5.0.0.dev0",
+    # or the tag of an already released version, such as "4.0.0".
+    # In the latter case, we need to increment the version so that the computed
+    # version comes after any patch release (the next feature version after
+    # 4.0.0 is 5.0.0).
+    pattern = r"^(\d+)\.(\d+)\.(\d+)"
+    match = re.match(pattern, tag)
     major, minor, patch = map(int, match.groups())
+    if 'dev' not in tag:
+        major += 1
 
-    # the bumped version number after 0.17.x will be 0.18.0.dev300
-    return "{}.{}.{}.dev{}".format(major, minor + 1, patch, version.distance)
+    return "{}.{}.{}.dev{}".format(major, minor, patch, version.distance)
 
 
 class Serializable:
diff --git a/dev/archery/archery/docker/__init__.py b/dev/archery/archery/docker/__init__.py
new file mode 100644
index 00000000000..6be29c91638
--- /dev/null
+++ b/dev/archery/archery/docker/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .core import DockerCompose, UndefinedImage  # noqa
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
new file mode 100644
index 00000000000..c6b4a6473b8
--- /dev/null
+++ b/dev/archery/archery/docker/cli.py
@@ -0,0 +1,261 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+import click
+
+from ..utils.cli import validate_arrow_sources
+from .core import DockerCompose, UndefinedImage
+
+
+def _mock_compose_calls(compose):
+    from types import MethodType
+    from subprocess import CompletedProcess
+
+    def _mock(compose, executable):
+        def _execute(self, *args, **kwargs):
+            params = ['{}={}'.format(k, v)
+                      for k, v in self.config.params.items()]
+            command = ' '.join(params + [executable] + list(args))
+            click.echo(command)
+            return CompletedProcess([], 0)
+        return MethodType(_execute, compose)
+
+    compose._execute_docker = _mock(compose, executable='docker')
+    compose._execute_compose = _mock(compose, executable='docker-compose')
+
+
+@click.group()
+@click.option("--src", metavar="<arrow_src>", default=None,
+              callback=validate_arrow_sources,
+              help="Specify Arrow source directory.")
+@click.option('--dry-run/--execute', default=False,
+              help="Display the docker-compose commands instead of executing "
+                   "them.")
+@click.pass_context
+def docker(ctx, src, dry_run):
+    """
+    Interact with docker-compose based builds.
+    """
+    ctx.ensure_object(dict)
+
+    config_path = src.path / 'docker-compose.yml'
+    if not config_path.exists():
+        raise click.ClickException(
+            "Docker compose configuration cannot be found in directory {}, "
+            "try to pass the arrow source directory explicitly.".format(src)
+        )
+
+    # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
+    # environment variables to keep the usage similar to docker-compose
+    compose = DockerCompose(config_path, params=os.environ)
+    if dry_run:
+        _mock_compose_calls(compose)
+    ctx.obj['compose'] = compose
+
+
+@docker.command("check-config")
+@click.pass_obj
+def check_config(obj):
+    """
+    Validate docker-compose configuration.
+    """
+    # executes the body of the docker function above which does the validation
+    # during the configuration loading
+
+
+@docker.command('build')
+@click.argument('image')
+@click.option('--force-pull/--no-pull', default=True,
+              help="Whether to force pull the image and its ancestor images")
+@click.option('--using-docker-cli', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_CLI',
+              help="Use docker CLI directly for building instead of calling "
+                   "docker-compose. This may help to reuse cached layers.")
+@click.option('--using-docker-buildx', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_BUILDX',
+              help="Use buildx with docker CLI directly for building instead "
+                   "of calling docker-compose or the plain docker build "
+                   "command. This option makes the build cache reusable "
+                   "across hosts.")
+@click.option('--use-cache/--no-cache', default=True,
+              help="Whether to use cache when building the image and its "
+                   "ancestor images")
+@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
+              help="Whether to use cache when building only the (leaf) image "
+                   "passed as the argument. To disable caching for both the "
+                   "image and its ancestors use --no-cache option.")
+@click.pass_obj
+def docker_build(obj, image, *, force_pull, using_docker_cli,
+                 using_docker_buildx, use_cache, use_leaf_cache):
+    """
+    Execute docker-compose builds.
+    """
+    compose = obj['compose']
+
+    using_docker_cli |= using_docker_buildx
+    try:
+        if force_pull:
+            compose.pull(image, pull_leaf=use_leaf_cache,
+                         using_docker=using_docker_cli)
+        compose.build(image, use_cache=use_cache,
+                      use_leaf_cache=use_leaf_cache,
+                      using_docker=using_docker_cli,
+                      using_buildx=using_docker_buildx,
+                      pull_parents=force_pull)
+    except UndefinedImage as e:
+        raise click.ClickException(
+            "There is no service/image defined in docker-compose.yml with "
+            "name: {}".format(str(e))
+        )
+    except RuntimeError as e:
+        raise click.ClickException(str(e))
+
+
+@docker.command('run')
+@click.argument('image')
+@click.argument('command', required=False, default=None)
+@click.option('--env', '-e', multiple=True,
+              help="Set environment variable within the container")
+@click.option('--user', '-u', default=None,
+              help="Username or UID to run the container with")
+@click.option('--force-pull/--no-pull', default=True,
+              help="Whether to force pull the image and its ancestor images")
+@click.option('--force-build/--no-build', default=True,
+              help="Whether to force build the image and its ancestor images")
+@click.option('--build-only', default=False, is_flag=True,
+              help="Pull and/or build the image, but do not run it")
+@click.option('--using-docker-cli', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_CLI',
+              help="Use docker CLI directly for building instead of calling "
+                   "docker-compose. This may help to reuse cached layers.")
+@click.option('--using-docker-buildx', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_BUILDX',
+              help="Use buildx with docker CLI directly for building instead "
+                   "of calling docker-compose or the plain docker build "
+                   "command. This option makes the build cache reusable "
+                   "across hosts.")
+@click.option('--use-cache/--no-cache', default=True,
+              help="Whether to use cache when building the image and its "
+                   "ancestor images")
+@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
+              help="Whether to use cache when building only the (leaf) image "
+                   "passed as the argument. To disable caching for both the "
+                   "image and its ancestors use --no-cache option.")
+@click.option('--resource-limit', default=None,
+              help="A CPU/memory limit preset to mimic CI environments like "
+                   "GitHub Actions. Implies --using-docker-cli. Note that "
+                   "exporting ARCHERY_DOCKER_BIN=\"sudo docker\" is likely "
+                   "required, unless Docker is configured with cgroups v2 "
+                   "(else Docker will silently ignore the limits).")
+@click.option('--volume', '-v', multiple=True,
+              help="Set volume within the container")
+@click.pass_obj
+def docker_run(obj, image, command, *, env, user, force_pull, force_build,
+               build_only, using_docker_cli, using_docker_buildx, use_cache,
+               use_leaf_cache, resource_limit, volume):
+    """
+    Execute docker-compose builds.
+
+    To see the available builds run `archery docker images`.
+
+    Examples:
+
+    # execute a single build
+    archery docker run conda-python
+
+    # execute the builds but disable the image pulling
+    archery docker run --no-cache conda-python
+
+    # pass a docker-compose parameter, like the python version
+    PYTHON=3.8 archery docker run conda-python
+
+    # disable the cache only for the leaf image
+    PANDAS=master archery docker run --no-leaf-cache conda-python-pandas
+
+    # entirely skip building the image
+    archery docker run --no-pull --no-build conda-python
+
+    # pass runtime parameters via docker environment variables
+    archery docker run -e CMAKE_BUILD_TYPE=release ubuntu-cpp
+
+    # set a volume
+    archery docker run -v $PWD/build:/build ubuntu-cpp
+
+    # starting an interactive bash session for debugging
+    archery docker run ubuntu-cpp bash
+    """
+    compose = obj['compose']
+    using_docker_cli |= using_docker_buildx
+
+    env = dict(kv.split('=', 1) for kv in env)
+    try:
+        if force_pull:
+            compose.pull(image, pull_leaf=use_leaf_cache,
+                         using_docker=using_docker_cli)
+        if force_build:
+            compose.build(image, use_cache=use_cache,
+                          use_leaf_cache=use_leaf_cache,
+                          using_docker=using_docker_cli,
+                          using_buildx=using_docker_buildx)
+        if build_only:
+            return
+        compose.run(
+            image,
+            command=command,
+            env=env,
+            user=user,
+            using_docker=using_docker_cli,
+            resource_limit=resource_limit,
+            volumes=volume
+        )
+    except UndefinedImage as e:
+        raise click.ClickException(
+            "There is no service/image defined in docker-compose.yml with "
+            "name: {}".format(str(e))
+        )
+    except RuntimeError as e:
+        raise click.ClickException(str(e))
+
+
+@docker.command('push')
+@click.argument('image')
+@click.option('--user', '-u', required=False, envvar='ARCHERY_DOCKER_USER',
+              help='Docker repository username')
+@click.option('--password', '-p', required=False,
+              envvar='ARCHERY_DOCKER_PASSWORD',
+              help='Docker repository password')
+@click.option('--using-docker-cli', default=False, is_flag=True,
+              help="Use docker CLI directly for building instead of calling "
+                   "docker-compose. This may help to reuse cached layers.")
+@click.pass_obj
+def docker_compose_push(obj, image, user, password, using_docker_cli):
+    """Push the generated docker-compose image."""
+    compose = obj['compose']
+    compose.push(image, user=user, password=password,
+                 using_docker=using_docker_cli)
+
+
+@docker.command('images')
+@click.pass_obj
+def docker_compose_images(obj):
+    """List the available docker-compose images."""
+    compose = obj['compose']
+    click.echo('Available images:')
+    for image in compose.images():
+        click.echo(f' - {image}')
diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py
new file mode 100644
index 00000000000..aaf16bdfa6e
--- /dev/null
+++ b/dev/archery/archery/docker/core.py
@@ -0,0 +1,417 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import subprocess
+from io import StringIO
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from ..utils.command import Command, default_bin
+from ..compat import _ensure_path
+
+
+def flatten(node, parents=None):
+    parents = list(parents or [])
+    if isinstance(node, str):
+        yield (node, parents)
+    elif isinstance(node, list):
+        for value in node:
+            yield from flatten(value, parents=parents)
+    elif isinstance(node, dict):
+        for key, value in node.items():
+            yield (key, parents)
+            yield from flatten(value, parents=parents + [key])
+    else:
+        raise TypeError(node)
+
+
+def _sanitize_command(cmd):
+    if isinstance(cmd, list):
+        cmd = " ".join(cmd)
+    return re.sub(r"\s+", " ", cmd)
+
+
+class UndefinedImage(Exception):
+    pass
+
+
+class ComposeConfig:
+
+    def __init__(self, config_path, dotenv_path, compose_bin, params=None):
+        config_path = _ensure_path(config_path)
+        if dotenv_path:
+            dotenv_path = _ensure_path(dotenv_path)
+        else:
+            dotenv_path = config_path.parent / '.env'
+        self._read_env(dotenv_path, params)
+        self._read_config(config_path, compose_bin)
+
+    def _read_env(self, dotenv_path, params):
+        """
+        Read .env and merge it with explicitly passed parameters.
+        """
+        self.dotenv = dotenv_values(str(dotenv_path))
+        if params is None:
+            self.params = {}
+        else:
+            self.params = {k: v for k, v in params.items() if k in self.dotenv}
+
+        # forward the process' environment variables
+        self.env = os.environ.copy()
+        # set the defaults from the dotenv files
+        self.env.update(self.dotenv)
+        # override the defaults passed as parameters
+        self.env.update(self.params)
+
+        # translate docker's architecture notation to a more widely used one
+        arch = self.env.get('ARCH', 'amd64')
+        arch_aliases = {
+            'amd64': 'x86_64',
+            'arm64v8': 'aarch64',
+            's390x': 's390x'
+        }
+        arch_short_aliases = {
+            'amd64': 'x64',
+            'arm64v8': 'arm64',
+            's390x': 's390x'
+        }
+        self.env['ARCH_ALIAS'] = arch_aliases.get(arch, arch)
+        self.env['ARCH_SHORT_ALIAS'] = arch_short_aliases.get(arch, arch)
+
+    def _read_config(self, config_path, compose_bin):
+        """
+        Validate and read the docker-compose.yml
+        """
+        yaml = YAML()
+        with config_path.open() as fp:
+            config = yaml.load(fp)
+
+        services = config['services'].keys()
+        self.hierarchy = dict(flatten(config.get('x-hierarchy', {})))
+        self.limit_presets = config.get('x-limit-presets', {})
+        self.with_gpus = config.get('x-with-gpus', [])
+        nodes = self.hierarchy.keys()
+        errors = []
+
+        for name in self.with_gpus:
+            if name not in services:
+                errors.append(
+                    'Service `{}` defined in `x-with-gpus` bot not in '
+                    '`services`'.format(name)
+                )
+        for name in nodes - services:
+            errors.append(
+                'Service `{}` is defined in `x-hierarchy` bot not in '
+                '`services`'.format(name)
+            )
+        for name in services - nodes:
+            errors.append(
+                'Service `{}` is defined in `services` but not in '
+                '`x-hierarchy`'.format(name)
+            )
+
+        # trigger docker-compose's own validation
+        compose = Command('docker-compose')
+        args = ['--file', str(config_path), 'config']
+        result = compose.run(*args, env=self.env, check=False,
+                             stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+
+        if result.returncode != 0:
+            # strip the intro line of docker-compose errors
+            errors += result.stderr.decode().splitlines()
+
+        if errors:
+            msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+            raise ValueError(
+                'Found errors with docker-compose:\n{}'.format(msg)
+            )
+
+        rendered_config = StringIO(result.stdout.decode())
+        self.path = config_path
+        self.config = yaml.load(rendered_config)
+
+    def get(self, service_name):
+        try:
+            service = self.config['services'][service_name]
+        except KeyError:
+            raise UndefinedImage(service_name)
+        service['name'] = service_name
+        service['need_gpu'] = service_name in self.with_gpus
+        service['ancestors'] = self.hierarchy[service_name]
+        return service
+
+    def __getitem__(self, service_name):
+        return self.get(service_name)
+
+
+class Docker(Command):
+
+    def __init__(self, docker_bin=None):
+        self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+    def __init__(self, config_path, dotenv_path=None, compose_bin=None,
+                 params=None):
+        compose_bin = default_bin(compose_bin, 'docker-compose')
+        self.config = ComposeConfig(config_path, dotenv_path, compose_bin,
+                                    params)
+        self.bin = compose_bin
+        self.pull_memory = set()
+
+    def clear_pull_memory(self):
+        self.pull_memory = set()
+
+    def _execute_compose(self, *args, **kwargs):
+        # execute as a docker compose command
+        try:
+            result = super().run('--file', str(self.config.path), *args,
+                                 env=self.config.env, **kwargs)
+            result.check_returncode()
+        except subprocess.CalledProcessError as e:
+            def formatdict(d, template):
+                return '\n'.join(
+                    template.format(k, v) for k, v in sorted(d.items())
+                )
+            msg = (
+                "`{cmd}` exited with a non-zero exit code {code}, see the "
+                "process log above.\n\nThe docker-compose command was "
+                "invoked with the following parameters:\n\nDefaults defined "
+                "in .env:\n{dotenv}\n\nArchery was called with:\n{params}"
+            )
+            raise RuntimeError(
+                msg.format(
+                    cmd=' '.join(e.cmd),
+                    code=e.returncode,
+                    dotenv=formatdict(self.config.dotenv, template='  {}: {}'),
+                    params=formatdict(
+                        self.config.params, template='  export {}={}'
+                    )
+                )
+            )
+
+    def _execute_docker(self, *args, **kwargs):
+        # execute as a plain docker cli command
+        try:
+            result = Docker().run(*args, **kwargs)
+            result.check_returncode()
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(
+                "{} exited with non-zero exit code {}".format(
+                    ' '.join(e.cmd), e.returncode
+                )
+            )
+
+    def pull(self, service_name, pull_leaf=True, using_docker=False):
+        def _pull(service):
+            args = ['pull']
+            if service['image'] in self.pull_memory:
+                return
+
+            if using_docker:
+                try:
+                    self._execute_docker(*args, service['image'])
+                except Exception as e:
+                    # better --ignore-pull-failures handling
+                    print(e)
+            else:
+                args.append('--ignore-pull-failures')
+                self._execute_compose(*args, service['name'])
+
+            self.pull_memory.add(service['image'])
+
+        service = self.config.get(service_name)
+        for ancestor in service['ancestors']:
+            _pull(self.config.get(ancestor))
+        if pull_leaf:
+            _pull(service)
+
+    def build(self, service_name, use_cache=True, use_leaf_cache=True,
+              using_docker=False, using_buildx=False, pull_parents=True):
+        def _build(service, use_cache):
+            if 'build' not in service:
+                # nothing to do
+                return
+
+            args = []
+            cache_from = list(service.get('build', {}).get('cache_from', []))
+            if pull_parents:
+                for image in cache_from:
+                    if image not in self.pull_memory:
+                        try:
+                            self._execute_docker('pull', image)
+                        except Exception as e:
+                            print(e)
+                        finally:
+                            self.pull_memory.add(image)
+
+            if not use_cache:
+                args.append('--no-cache')
+
+            # turn on inline build cache, this is a docker buildx feature
+            # used to bundle the image build cache to the pushed image manifest
+            # so the build cache can be reused across hosts, documented at
+            # https://github.com/docker/buildx#--cache-tonametypetypekeyvalue
+            if self.config.env.get('BUILDKIT_INLINE_CACHE') == '1':
+                args.extend(['--build-arg', 'BUILDKIT_INLINE_CACHE=1'])
+
+            if using_buildx:
+                for k, v in service['build'].get('args', {}).items():
+                    args.extend(['--build-arg', '{}={}'.format(k, v)])
+
+                if use_cache:
+                    cache_ref = '{}-cache'.format(service['image'])
+                    cache_from = 'type=registry,ref={}'.format(cache_ref)
+                    cache_to = (
+                        'type=registry,ref={},mode=max'.format(cache_ref)
+                    )
+                    args.extend([
+                        '--cache-from', cache_from,
+                        '--cache-to', cache_to,
+                    ])
+
+                args.extend([
+                    '--output', 'type=docker',
+                    '-f', service['build']['dockerfile'],
+                    '-t', service['image'],
+                    service['build'].get('context', '.')
+                ])
+                self._execute_docker("buildx", "build", *args)
+            elif using_docker:
+                # better for caching
+                for k, v in service['build'].get('args', {}).items():
+                    args.extend(['--build-arg', '{}={}'.format(k, v)])
+                for img in cache_from:
+                    args.append('--cache-from="{}"'.format(img))
+                args.extend([
+                    '-f', service['build']['dockerfile'],
+                    '-t', service['image'],
+                    service['build'].get('context', '.')
+                ])
+                self._execute_docker("build", *args)
+            else:
+                self._execute_compose("build", *args, service['name'])
+
+        service = self.config.get(service_name)
+        # build ancestor services
+        for ancestor in service['ancestors']:
+            _build(self.config.get(ancestor), use_cache=use_cache)
+        # build the leaf/target service
+        _build(service, use_cache=use_cache and use_leaf_cache)
+
+    def run(self, service_name, command=None, *, env=None, volumes=None,
+            user=None, using_docker=False, resource_limit=None):
+        service = self.config.get(service_name)
+
+        args = []
+        if user is not None:
+            args.extend(['-u', user])
+
+        if env is not None:
+            for k, v in env.items():
+                args.extend(['-e', '{}={}'.format(k, v)])
+
+        if volumes is not None:
+            for volume in volumes:
+                args.extend(['--volume', volume])
+
+        if using_docker or service['need_gpu'] or resource_limit:
+            # use gpus, requires docker>=19.03
+            if service['need_gpu']:
+                args.extend(['--gpus', 'all'])
+
+            if service.get('shm_size'):
+                args.extend(['--shm-size', service['shm_size']])
+
+            # append env variables from the compose conf
+            for k, v in service.get('environment', {}).items():
+                args.extend(['-e', '{}={}'.format(k, v)])
+
+            # append volumes from the compose conf
+            for v in service.get('volumes', []):
+                if not isinstance(v, str):
+                    # if not the compact string volume definition
+                    v = "{}:{}".format(v['source'], v['target'])
+                args.extend(['-v', v])
+
+            # infer whether an interactive shell is desired or not
+            if command in ['cmd.exe', 'bash', 'sh', 'powershell']:
+                args.append('-it')
+
+            if resource_limit:
+                limits = self.config.limit_presets.get(resource_limit)
+                if not limits:
+                    raise ValueError(
+                        f"Unknown resource limit preset '{resource_limit}'")
+                cpuset = limits.get('cpuset_cpus', [])
+                if cpuset:
+                    args.append(f'--cpuset-cpus={",".join(map(str, cpuset))}')
+                memory = limits.get('memory')
+                if memory:
+                    args.append(f'--memory={memory}')
+                    args.append(f'--memory-swap={memory}')
+
+            # get the actual docker image name instead of the compose service
+            # name which we refer as image in general
+            args.append(service['image'])
+
+            # add command from compose if it wasn't overridden
+            if command is not None:
+                args.append(command)
+            else:
+                # replace whitespaces from the preformatted compose command
+                cmd = _sanitize_command(service.get('command', ''))
+                if cmd:
+                    args.append(cmd)
+
+            # execute as a plain docker cli command
+            self._execute_docker('run', '--rm', *args)
+        else:
+            # execute as a docker-compose command
+            args.append(service_name)
+            if command is not None:
+                args.append(command)
+            self._execute_compose('run', '--rm', *args)
+
+    def push(self, service_name, user=None, password=None, using_docker=False):
+        def _push(service):
+            if using_docker:
+                return self._execute_docker('push', service['image'])
+            else:
+                return self._execute_compose('push', service['name'])
+
+        if user is not None:
+            try:
+                # TODO(kszucs): have an option for a prompt
+                self._execute_docker('login', '-u', user, '-p', password)
+            except subprocess.CalledProcessError:
+                # hide credentials
+                msg = ('Failed to push `{}`, check the passed credentials'
+                       .format(service_name))
+                raise RuntimeError(msg) from None
+
+        service = self.config.get(service_name)
+        for ancestor in service['ancestors']:
+            _push(self.config.get(ancestor))
+        _push(service)
+
+    def images(self):
+        return sorted(self.config.hierarchy.keys())
diff --git a/dev/archery/archery/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py
similarity index 96%
rename from dev/archery/archery/tests/test_docker.py
rename to dev/archery/archery/docker/tests/test_docker.py
index 09dcd27a713..982f3bfc189 100644
--- a/dev/archery/archery/tests/test_docker.py
+++ b/dev/archery/archery/docker/tests/test_docker.py
@@ -128,6 +128,11 @@
       - ubuntu-ruby
   - ubuntu-cuda
 
+x-limit-presets:
+  github:
+    cpuset_cpus: [0, 1]
+    memory: 7g
+
 services:
   conda-cpp:
     image: org/conda-cpp
@@ -448,6 +453,20 @@ def test_compose_run(arrow_compose_path):
         compose.run('conda-python', volumes=volumes)
 
 
+def test_compose_run_with_resource_limits(arrow_compose_path):
+    expected_calls = [
+        format_run([
+            "--cpuset-cpus=0,1",
+            "--memory=7g",
+            "--memory-swap=7g",
+            "org/conda-cpp"
+        ]),
+    ]
+    compose = DockerCompose(arrow_compose_path)
+    with assert_docker_calls(compose, expected_calls):
+        compose.run('conda-cpp', resource_limit="github")
+
+
 def test_compose_push(arrow_compose_path):
     compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8'))
     expected_env = PartialEnv(PYTHON="3.8")
diff --git a/dev/archery/archery/docker/tests/test_docker_cli.py b/dev/archery/archery/docker/tests/test_docker_cli.py
new file mode 100644
index 00000000000..ab39c7b9dbb
--- /dev/null
+++ b/dev/archery/archery/docker/tests/test_docker_cli.py
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from unittest.mock import patch
+
+from click.testing import CliRunner
+
+from archery.docker import DockerCompose
+from archery.docker.cli import docker
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_with_custom_command(run, build, pull):
+    # with custom command
+    args = ["run", "ubuntu-cpp", "bash"]
+    result = CliRunner().invoke(docker, args)
+
+    assert result.exit_code == 0
+    pull.assert_called_once_with(
+        "ubuntu-cpp", pull_leaf=True, using_docker=False
+    )
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=True,
+        use_leaf_cache=True,
+        using_docker=False,
+        using_buildx=False
+    )
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command="bash",
+        env={},
+        resource_limit=None,
+        user=None,
+        using_docker=False,
+        volumes=(),
+    )
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_options(run, build, pull):
+    # environment variables and volumes
+    args = [
+        "run",
+        "-e",
+        "ARROW_GANDIVA=OFF",
+        "-e",
+        "ARROW_FLIGHT=ON",
+        "--volume",
+        "./build:/build",
+        "-v",
+        "./ccache:/ccache:delegated",
+        "-u",
+        "root",
+        "ubuntu-cpp",
+    ]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    pull.assert_called_once_with(
+        "ubuntu-cpp", pull_leaf=True, using_docker=False
+    )
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=True,
+        use_leaf_cache=True,
+        using_docker=False,
+        using_buildx=False
+    )
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
+        resource_limit=None,
+        user="root",
+        using_docker=False,
+        volumes=(
+            "./build:/build",
+            "./ccache:/ccache:delegated",
+        ),
+    )
+
+
+@patch.object(DockerCompose, "run")
+def test_docker_limit_options(run):
+    # environment variables and volumes
+    args = [
+        "run",
+        "-e",
+        "ARROW_GANDIVA=OFF",
+        "-e",
+        "ARROW_FLIGHT=ON",
+        "--volume",
+        "./build:/build",
+        "-v",
+        "./ccache:/ccache:delegated",
+        "-u",
+        "root",
+        "--resource-limit=github",
+        "--no-build",
+        "--no-pull",
+        "ubuntu-cpp",
+    ]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
+        resource_limit="github",
+        user="root",
+        using_docker=False,
+        volumes=(
+            "./build:/build",
+            "./ccache:/ccache:delegated",
+        ),
+    )
+
+
+@patch.object(DockerCompose, "run")
+def test_docker_run_without_pulling_or_building(run):
+    args = ["run", "--no-pull", "--no-build", "ubuntu-cpp"]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={},
+        resource_limit=None,
+        user=None,
+        using_docker=False,
+        volumes=(),
+    )
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+def test_docker_run_only_pulling_and_building(build, pull):
+    args = ["run", "ubuntu-cpp", "--build-only"]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    pull.assert_called_once_with(
+        "ubuntu-cpp", pull_leaf=True, using_docker=False
+    )
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=True,
+        use_leaf_cache=True,
+        using_docker=False,
+        using_buildx=False
+    )
+
+
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_without_build_cache(run, build):
+    args = [
+        "run",
+        "--no-pull",
+        "--force-build",
+        "--user",
+        "me",
+        "--no-cache",
+        "--no-leaf-cache",
+        "ubuntu-cpp",
+    ]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=False,
+        use_leaf_cache=False,
+        using_docker=False,
+        using_buildx=False
+    )
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={},
+        resource_limit=None,
+        user="me",
+        using_docker=False,
+        volumes=(),
+    )
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 35ab289cc33..62fe17bffda 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1519,15 +1519,14 @@ def _temp_path():
         .skip_category('JS'),
 
         generate_null_case([10, 0])
-        .skip_category('JS')   # TODO(ARROW-7900)
-        .skip_category('Go'),  # TODO(ARROW-7901)
+        .skip_category('Go')    # TODO(ARROW-7901)
+        .skip_category('JS'),   # TODO(ARROW-7900)
 
         generate_null_trivial_case([0, 0])
-        .skip_category('JS')   # TODO(ARROW-7900)
-        .skip_category('Go'),  # TODO(ARROW-7901)
+        .skip_category('Go')    # TODO(ARROW-7901)
+        .skip_category('JS'),   # TODO(ARROW-7900)
 
         generate_decimal128_case()
-        .skip_category('Go')  # TODO(ARROW-7948): Decimal + Go
         .skip_category('Rust'),
 
         generate_decimal256_case()
@@ -1542,11 +1541,9 @@ def _temp_path():
         .skip_category('Rust'),
 
         generate_map_case()
-        .skip_category('Go')  # TODO(ARROW-5620): Map + Go
         .skip_category('Rust'),
 
         generate_non_canonical_map_case()
-        .skip_category('Go')     # TODO(ARROW-5620)
         .skip_category('Java')   # TODO(ARROW-8715)
         .skip_category('JS')     # TODO(ARROW-8716)
         .skip_category('Rust'),
@@ -1567,7 +1564,6 @@ def _temp_path():
         .skip_category('Rust'),
 
         generate_custom_metadata_case()
-        .skip_category('Go')
         .skip_category('JS'),
 
         generate_duplicate_fieldnames_case()
@@ -1589,7 +1585,7 @@ def _temp_path():
         .skip_category('Rust'),
 
         generate_extension_case()
-        .skip_category('Go')
+        .skip_category('Go')  # TODO(ARROW-3039): requires dictionaries
         .skip_category('JS')
         .skip_category('Rust'),
     ]
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 8aef1637490..6f4c1385abf 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -396,9 +396,12 @@ def write_js_test_json(directory):
     datagen.generate_nested_case().write(
         os.path.join(directory, 'nested.json')
     )
-    datagen.generate_decimal_case().write(
+    datagen.generate_decimal128_case().write(
         os.path.join(directory, 'decimal.json')
     )
+    datagen.generate_decimal256_case().write(
+        os.path.join(directory, 'decimal256.json')
+    )
     datagen.generate_datetime_case().write(
         os.path.join(directory, 'datetime.json')
     )
diff --git a/dev/archery/archery/integration/tester_go.py b/dev/archery/archery/integration/tester_go.py
index ea799c5a1bd..eeba38fe501 100644
--- a/dev/archery/archery/integration/tester_go.py
+++ b/dev/archery/archery/integration/tester_go.py
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import contextlib
 import os
+import subprocess
 
 from .tester import Tester
 from .util import run_cmd, log
@@ -24,6 +26,8 @@
 class GoTester(Tester):
     PRODUCER = True
     CONSUMER = True
+    FLIGHT_SERVER = True
+    FLIGHT_CLIENT = True
 
     # FIXME(sbinet): revisit for Go modules
     HOME = os.getenv('HOME', '~')
@@ -34,6 +38,12 @@ class GoTester(Tester):
     STREAM_TO_FILE = os.path.join(GOBIN, 'arrow-stream-to-file')
     FILE_TO_STREAM = os.path.join(GOBIN, 'arrow-file-to-stream')
 
+    FLIGHT_SERVER_CMD = [
+        os.path.join(GOBIN, 'arrow-flight-integration-server')]
+    FLIGHT_CLIENT_CMD = [
+        os.path.join(GOBIN, 'arrow-flight-integration-client'),
+        '-host', 'localhost']
+
     name = 'Go'
 
     def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
@@ -65,3 +75,45 @@ def stream_to_file(self, stream_path, file_path):
     def file_to_stream(self, file_path, stream_path):
         cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path]
         self.run_shell_command(cmd)
+
+    @contextlib.contextmanager
+    def flight_server(self, scenario_name=None):
+        cmd = self.FLIGHT_SERVER_CMD + ['-port=0']
+        if scenario_name:
+            cmd = cmd + ['-scenario', scenario_name]
+        if self.debug:
+            log(' '.join(cmd))
+        server = subprocess.Popen(cmd,
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE)
+
+        try:
+            output = server.stdout.readline().decode()
+            if not output.startswith("Server listening on localhost:"):
+                server.kill()
+                out, err = server.communicate()
+                raise RuntimeError(
+                    "Flight-Go server did not start properly, "
+                    "stdout: \n{}\n\nstderr:\n{}\n"
+                    .format(output + out.decode(), err.decode())
+                )
+            port = int(output.split(":")[1])
+            yield port
+        finally:
+            server.kill()
+            server.wait(5)
+
+    def flight_request(self, port, json_path=None, scenario_name=None):
+        cmd = self.FLIGHT_CLIENT_CMD + [
+            '-port=' + str(port),
+        ]
+        if json_path:
+            cmd.extend(('-path', json_path))
+        elif scenario_name:
+            cmd.extend(('-scenario', scenario_name))
+        else:
+            raise TypeError("Must provide one of json_path or scenario_name")
+
+        if self.debug:
+            log(' '.join(cmd))
+        run_cmd(cmd)
diff --git a/dev/archery/archery/lang/cpp.py b/dev/archery/archery/lang/cpp.py
index 045d23b56b1..c2b1ca68001 100644
--- a/dev/archery/archery/lang/cpp.py
+++ b/dev/archery/archery/lang/cpp.py
@@ -42,7 +42,7 @@ def __init__(self,
                  cc=None, cxx=None, cxx_flags=None,
                  build_type=None, warn_level=None,
                  cpp_package_prefix=None, install_prefix=None, use_conda=None,
-                 build_static=False, build_shared=True,
+                 build_static=False, build_shared=True, build_unity=True,
                  # tests & examples
                  with_tests=None, with_benchmarks=None, with_examples=None,
                  with_integration=None,
@@ -76,6 +76,7 @@ def __init__(self,
         self._use_conda = use_conda
         self.build_static = build_static
         self.build_shared = build_shared
+        self.build_unity = build_unity
 
         self.with_tests = with_tests
         self.with_benchmarks = with_benchmarks
@@ -176,7 +177,6 @@ def _gen_defs(self):
 
         yield ("CMAKE_EXPORT_COMPILE_COMMANDS", truthifier(True))
         yield ("CMAKE_BUILD_TYPE", self.build_type)
-        yield ("CMAKE_UNITY_BUILD", True)
 
         if not self.with_lint_only:
             yield ("BUILD_WARNING_LEVEL",
@@ -195,6 +195,7 @@ def _gen_defs(self):
 
         yield ("ARROW_BUILD_STATIC", truthifier(self.build_static))
         yield ("ARROW_BUILD_SHARED", truthifier(self.build_shared))
+        yield ("CMAKE_UNITY_BUILD", truthifier(self.build_unity))
 
         # Tests and benchmarks
         yield ("ARROW_BUILD_TESTS", truthifier(self.with_tests))
diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py
index 24743b67fd7..bc169adf647 100644
--- a/dev/archery/archery/lang/java.py
+++ b/dev/archery/archery/lang/java.py
@@ -15,7 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
+
 from ..utils.command import Command, CommandStackMixin, default_bin
+from ..utils.maven import MavenDefinition
 
 
 class Java(Command):
@@ -28,3 +31,47 @@ def __init__(self, jar, *args, **kwargs):
         self.jar = jar
         self.argv = ("-jar", jar)
         Java.__init__(self, *args, **kwargs)
+
+
+class JavaConfiguration:
+    def __init__(self,
+
+                 # toolchain
+                 java_home=None, java_options=None,
+                 # build & benchmark
+                 build_extras=None, benchmark_extras=None):
+        self.java_home = java_home
+        self.java_options = java_options
+
+        self.build_extras = list(build_extras) if build_extras else []
+        self.benchmark_extras = list(
+            benchmark_extras) if benchmark_extras else []
+
+    @property
+    def build_definitions(self):
+        return self.build_extras
+
+    @property
+    def benchmark_definitions(self):
+        return self.benchmark_extras
+
+    @property
+    def environment(self):
+        env = os.environ.copy()
+
+        if self.java_home:
+            env["JAVA_HOME"] = self.java_home
+
+        if self.java_options:
+            env["JAVA_OPTIONS"] = self.java_options
+
+        return env
+
+
+class JavaMavenDefinition(MavenDefinition):
+    def __init__(self, source, conf, **kwargs):
+        self.configuration = conf
+        super().__init__(source, **kwargs,
+                         build_definitions=conf.build_definitions,
+                         benchmark_definitions=conf.benchmark_definitions,
+                         env=conf.environment)
diff --git a/dev/archery/archery/lang/python.py b/dev/archery/archery/lang/python.py
index 4952d5f2305..c6ebbe65004 100644
--- a/dev/archery/archery/lang/python.py
+++ b/dev/archery/archery/lang/python.py
@@ -26,6 +26,7 @@
 else:
     have_numpydoc = True
 
+from ..utils.logger import logger
 from ..utils.command import Command, capture_stdout, default_bin
 
 
@@ -105,8 +106,7 @@ def __init__(self, symbols=None):
         if not have_numpydoc:
             raise RuntimeError(
                 'Numpydoc is not available, install the development version '
-                'with command: pip install '
-                'git+https://github.com/numpy/numpydoc'
+                'with command: pip install numpydoc==1.1.0'
             )
         self.symbols = set(symbols or {'pyarrow'})
 
@@ -192,7 +192,12 @@ def validate(self, from_package='', allow_rules=None,
         results = []
 
         def callback(obj):
-            result = validate(obj)
+            try:
+                result = validate(obj)
+            except OSError as e:
+                symbol = f"{obj.__module__}.{obj.__name__}"
+                logger.warning(f"Unable to validate `{symbol}` due to `{e}`")
+                return
 
             errors = []
             for errcode, errmsg in result.get('errors', []):
diff --git a/dev/archery/archery/lang/rust.py b/dev/archery/archery/lang/rust.py
deleted file mode 100644
index b1d765b7d52..00000000000
--- a/dev/archery/archery/lang/rust.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from ..utils.command import Command, default_bin
-
-
-class Cargo(Command):
-    def __init__(self, cargo_bin=None):
-        self.bin = default_bin(cargo_bin, "cargo")
diff --git a/dev/archery/archery/linking.py b/dev/archery/archery/linking.py
new file mode 100644
index 00000000000..c2e6f1772fa
--- /dev/null
+++ b/dev/archery/archery/linking.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import platform
+import subprocess
+
+from .utils.command import Command
+
+
+_ldd = Command("ldd")
+_otool = Command("otool")
+
+
+class DependencyError(Exception):
+    pass
+
+
+class DynamicLibrary:
+
+    def __init__(self, path):
+        self.path = path
+
+    def list_dependencies(self):
+        """
+        List the full name of the library dependencies.
+        """
+        system = platform.system()
+        if system == "Linux":
+            result = _ldd.run(self.path, stdout=subprocess.PIPE)
+            lines = result.stdout.splitlines()
+            return [ll.split(None, 1)[0].decode() for ll in lines]
+        elif system == "Darwin":
+            result = _otool.run("-L", self.path, stdout=subprocess.PIPE)
+            lines = result.stdout.splitlines()
+            return [dl.split(None, 1)[0].decode() for dl in lines]
+        else:
+            raise ValueError(f"{platform} is not supported")
+
+    def list_dependency_names(self):
+        """
+        List the truncated names of the dynamic library dependencies.
+        """
+        names = []
+        for dependency in self.list_dependencies():
+            *_, library = dependency.rsplit("/", 1)
+            name, *_ = library.split(".", 1)
+            names.append(name)
+        return names
+
+
+def check_dynamic_library_dependencies(path, allowed, disallowed):
+    dylib = DynamicLibrary(path)
+    for dep in dylib.list_dependency_names():
+        if allowed and dep not in allowed:
+            raise DependencyError(
+                f"Unexpected shared dependency found in {dylib.path}: `{dep}`"
+            )
+        if disallowed and dep in disallowed:
+            raise DependencyError(
+                f"Disallowed shared dependency found in {dylib.path}: `{dep}`"
+            )
diff --git a/dev/archery/archery/release.py b/dev/archery/archery/release.py
index acfe3fc2373..6baeabc9d59 100644
--- a/dev/archery/archery/release.py
+++ b/dev/archery/archery/release.py
@@ -448,7 +448,7 @@ def commits_to_pick(self, exclude_already_applied=True):
         if self.version.major == 0:
             # treat minor releases as major releases preceeding 1.0.0 release
             commit_range = "apache-arrow-0.{}.0..master".format(
-                self.version.minor - 1
+                self.version.minor
             )
         else:
             commit_range = "apache-arrow-{}.0.0..master".format(
diff --git a/dev/archery/archery/tests/test_benchmarks.py b/dev/archery/archery/tests/test_benchmarks.py
index dffe698d41d..fab1e8d4432 100644
--- a/dev/archery/archery/tests/test_benchmarks.py
+++ b/dev/archery/archery/tests/test_benchmarks.py
@@ -152,6 +152,12 @@ def test_items_per_second():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 5964,
+                     "null_percent": 0.0,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "items_per_second",
         "less_is_better": False,
@@ -180,6 +186,11 @@ def test_bytes_per_second():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 47,
+                     "repetition_index": 1,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "bytes_per_second",
         "less_is_better": False,
@@ -212,6 +223,12 @@ def test_both_items_and_bytes_per_second():
     }
     # Note that bytes_per_second trumps items_per_second
     archery_result = {
+        "counters": {"iterations": 5964,
+                     "null_percent": 0.0,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "bytes_per_second",
         "less_is_better": False,
@@ -239,6 +256,11 @@ def test_neither_items_nor_bytes_per_second():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,
@@ -266,6 +288,11 @@ def test_prefer_real_time():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,
@@ -292,6 +319,11 @@ def test_prefer_cpu_time():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,
@@ -330,6 +362,11 @@ def test_omits_aggregates():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,
diff --git a/dev/archery/archery/tests/test_bot.py b/dev/archery/archery/tests/test_bot.py
index e00853ceb2c..e84fb7e273a 100644
--- a/dev/archery/archery/tests/test_bot.py
+++ b/dev/archery/archery/tests/test_bot.py
@@ -156,6 +156,7 @@ def handler(command, **kwargs):
 
 @pytest.mark.parametrize(('command', 'reaction'), [
     ('@ursabot build', '+1'),
+    ('@ursabot build\nwith a comment', '+1'),
     ('@ursabot listen', '-1'),
 ])
 def test_issue_comment_with_commands(load_fixture, responses, command,
@@ -199,3 +200,16 @@ def handler(command, **kwargs):
 
     post = responses.calls[3]
     assert json.loads(post.request.body) == {'content': reaction}
+
+
+def test_issue_comment_with_commands_bot_not_first(load_fixture, responses):
+    # when the @-mention is not first, this is a no-op
+    handler = Mock()
+
+    payload = load_fixture('event-issue-comment-build-command.json')
+    payload["comment"]["body"] = 'with a comment\n@ursabot build'
+
+    bot = CommentBot(name='ursabot', token='', handler=handler)
+    bot.handle('issue_comment', payload)
+
+    handler.assert_not_called()
diff --git a/dev/archery/archery/tests/test_cli.py b/dev/archery/archery/tests/test_cli.py
index b3199dfaf1f..3891a2c288d 100644
--- a/dev/archery/archery/tests/test_cli.py
+++ b/dev/archery/archery/tests/test_cli.py
@@ -15,148 +15,25 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from pathlib import Path
 from unittest.mock import patch
 
 from click.testing import CliRunner
 
 from archery.cli import archery
-from archery.docker import DockerCompose
 
 
-@patch.object(DockerCompose, "pull")
-@patch.object(DockerCompose, "build")
-@patch.object(DockerCompose, "run")
-def test_docker_run_with_custom_command(run, build, pull):
-    # with custom command
-    args = ["docker", "run", "ubuntu-cpp", "bash"]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
-    )
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=True,
-        use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
-    )
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command="bash",
-        env={},
-        user=None,
-        using_docker=False,
-        volumes=(),
-    )
-
-
-@patch.object(DockerCompose, "pull")
-@patch.object(DockerCompose, "build")
-@patch.object(DockerCompose, "run")
-def test_docker_run_options(run, build, pull):
-    # environment variables and volumes
-    args = [
-        "docker",
-        "run",
-        "-e",
-        "ARROW_GANDIVA=OFF",
-        "-e",
-        "ARROW_FLIGHT=ON",
-        "--volume",
-        "./build:/build",
-        "-v",
-        "./ccache:/ccache:delegated",
-        "-u",
-        "root",
-        "ubuntu-cpp",
-    ]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
-    )
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=True,
-        use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
-    )
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command=None,
-        env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
-        user="root",
-        using_docker=False,
-        volumes=(
-            "./build:/build",
-            "./ccache:/ccache:delegated",
-        ),
-    )
-
-
-@patch.object(DockerCompose, "run")
-def test_docker_run_without_pulling_or_building(run):
-    args = ["docker", "run", "--no-pull", "--no-build", "ubuntu-cpp"]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command=None,
-        env={},
-        user=None,
-        using_docker=False,
-        volumes=(),
-    )
-
-
-@patch.object(DockerCompose, "pull")
-@patch.object(DockerCompose, "build")
-def test_docker_run_only_pulling_and_building(build, pull):
-    args = ["docker", "run", "ubuntu-cpp", "--build-only"]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
-    )
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=True,
-        use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
-    )
-
-
-@patch.object(DockerCompose, "build")
-@patch.object(DockerCompose, "run")
-def test_docker_run_without_build_cache(run, build):
+@patch("archery.linking.check_dynamic_library_dependencies")
+def test_linking_check_dependencies(fn):
     args = [
-        "docker",
-        "run",
-        "--no-pull",
-        "--force-build",
-        "--user",
-        "me",
-        "--no-cache",
-        "--no-leaf-cache",
-        "ubuntu-cpp",
+        "linking",
+        "check-dependencies",
+        "-a", "libarrow",
+        "-d", "libcurl",
+        "somelib.so"
     ]
     result = CliRunner().invoke(archery, args)
     assert result.exit_code == 0
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=False,
-        use_leaf_cache=False,
-        using_docker=False,
-        using_buildx=False
-    )
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command=None,
-        env={},
-        user="me",
-        using_docker=False,
-        volumes=(),
+    fn.assert_called_once_with(
+        Path('somelib.so'), allowed={'libarrow'}, disallowed={'libcurl'}
     )
diff --git a/dev/archery/archery/utils/cli.py b/dev/archery/archery/utils/cli.py
new file mode 100644
index 00000000000..701abe925fe
--- /dev/null
+++ b/dev/archery/archery/utils/cli.py
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import importlib
+
+import click
+
+from .source import ArrowSources, InvalidArrowSource
+
+
+class ArrowBool(click.types.BoolParamType):
+    """
+    ArrowBool supports the 'ON' and 'OFF' values on top of the values
+    supported by BoolParamType. This is convenient to port script which exports
+    CMake options variables.
+    """
+    name = "boolean"
+
+    def convert(self, value, param, ctx):
+        if isinstance(value, str):
+            lowered = value.lower()
+            if lowered == "on":
+                return True
+            elif lowered == "off":
+                return False
+
+        return super().convert(value, param, ctx)
+
+
+def validate_arrow_sources(ctx, param, src):
+    """
+    Ensure a directory contains Arrow cpp sources.
+    """
+    try:
+        return ArrowSources.find(src)
+    except InvalidArrowSource as e:
+        raise click.BadParameter(str(e))
+
+
+def add_optional_command(name, module, function, parent):
+    try:
+        module = importlib.import_module(module, package="archery")
+        command = getattr(module, function)
+    except ImportError as exc:
+        error_message = exc.name
+
+        @parent.command(
+            name,
+            context_settings={
+                "allow_extra_args": True,
+                "ignore_unknown_options": True,
+            }
+        )
+        def command():
+            raise click.ClickException(
+                f"Couldn't import command `{name}` due to {error_message}"
+            )
+    else:
+        parent.add_command(command)
diff --git a/dev/archery/archery/utils/command.py b/dev/archery/archery/utils/command.py
index 84d2842073f..f655e2ef2e5 100644
--- a/dev/archery/archery/utils/command.py
+++ b/dev/archery/archery/utils/command.py
@@ -50,7 +50,8 @@ def wrapper(*argv, **kwargs):
 
 
 class Command:
-    """ A runnable command.
+    """
+    A runnable command.
 
     Class inheriting from the Command class must provide the bin
     property/attribute.
@@ -78,7 +79,9 @@ def run(self, *argv, **kwargs):
 
     @property
     def available(self):
-        """ Indicate if the command binary is found in PATH. """
+        """
+        Indicate if the command binary is found in PATH.
+        """
         binary = shlex.split(self.bin)[0]
         return shutil.which(binary) is not None
 
diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py
index 3b94d0139c0..d95bfeea309 100644
--- a/dev/archery/archery/utils/lint.py
+++ b/dev/archery/archery/utils/lint.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import fnmatch
 import gzip
 import os
 from pathlib import Path
@@ -26,12 +27,16 @@
 from .git import git
 from .logger import logger
 from ..lang.cpp import CppCMakeDefinition, CppConfiguration
-from ..lang.rust import Cargo
 from ..lang.python import Autopep8, Flake8, NumpyDoc
 from .rat import Rat, exclusion_from_globs
 from .tmpdir import tmpdir
 
 
+_archery_install_msg = (
+    "Please install archery using: `pip install -e dev/archery[lint]`. "
+)
+
+
 class LintValidationException(Exception):
     pass
 
@@ -91,20 +96,75 @@ def cpp_linter(src, build_dir, clang_format=True, cpplint=True,
 
 
 class CMakeFormat(Command):
-    def __init__(self, cmake_format_bin):
-        self.bin = cmake_format_bin
+
+    def __init__(self, paths, cmake_format_bin=None):
+        self.check_version()
+        self.bin = default_bin(cmake_format_bin, "cmake-format")
+        self.paths = paths
+
+    @classmethod
+    def from_patterns(cls, base_path, include_patterns, exclude_patterns):
+        paths = {
+            str(path.as_posix())
+            for pattern in include_patterns
+            for path in base_path.glob(pattern)
+        }
+        for pattern in exclude_patterns:
+            pattern = (base_path / pattern).as_posix()
+            paths -= set(fnmatch.filter(paths, str(pattern)))
+        return cls(paths)
+
+    @staticmethod
+    def check_version():
+        try:
+            # cmake_format is part of the cmakelang package
+            import cmakelang
+        except ImportError:
+            raise ImportError(
+
+            )
+        # pin a specific version of cmake_format, must be updated in setup.py
+        if cmakelang.__version__ != "0.6.13":
+            raise LintValidationException(
+                f"Wrong version of cmake_format is detected. "
+                f"{_archery_install_msg}"
+            )
+
+    def check(self):
+        return self.run("-l", "error", "--check", *self.paths, check=False)
+
+    def fix(self):
+        return self.run("--in-place", *self.paths, check=False)
 
 
 def cmake_linter(src, fix=False):
-    """ Run cmake-format.py on all CMakeFiles.txt """
+    """
+    Run cmake-format on all CMakeFiles.txt
+    """
     logger.info("Running cmake-format linters")
 
-    if not fix:
-        logger.warn("run-cmake-format modifies files, regardless of --fix")
+    cmake_format = CMakeFormat.from_patterns(
+        src.path,
+        include_patterns=[
+            'ci/**/*.cmake',
+            'cpp/CMakeLists.txt',
+            'cpp/src/**/CMakeLists.txt',
+            'cpp/cmake_modules/*.cmake',
+            'go/**/CMakeLists.txt',
+            'java/**/CMakeLists.txt',
+            'matlab/**/CMakeLists.txt',
+            'python/CMakeLists.txt',
+        ],
+        exclude_patterns=[
+            'cpp/cmake_modules/FindNumPy.cmake',
+            'cpp/cmake_modules/FindPythonLibsNew.cmake',
+            'cpp/cmake_modules/UseCython.cmake',
+            'cpp/src/arrow/util/config.h.cmake',
+        ]
+    )
+    method = cmake_format.fix if fix else cmake_format.check
 
-    arrow_cmake_format = os.path.join(src.path, "run-cmake-format.py")
-    cmake_format = CMakeFormat(cmake_format_bin=arrow_cmake_format)
-    yield LintResult.from_cmd(cmake_format("--check"))
+    yield LintResult.from_cmd(method())
 
 
 def python_linter(src, fix=False):
@@ -119,7 +179,7 @@ def python_linter(src, fix=False):
     if not autopep8.available:
         logger.error(
             "Python formatter requested but autopep8 binary not found. "
-            "Please run `pip install -r dev/archery/requirements-lint.txt`")
+            f"{_archery_install_msg}")
         return
 
     # Gather files for autopep8
@@ -128,8 +188,7 @@ def python_linter(src, fix=False):
                 "python/pyarrow/**/*.pxd",
                 "python/pyarrow/**/*.pxi",
                 "python/examples/**/*.py",
-                "dev/archery/**/*.py",
-                ]
+                "dev/archery/**/*.py"]
     files = [setup_py]
     for pattern in patterns:
         files += list(map(str, Path(src.path).glob(pattern)))
@@ -159,7 +218,7 @@ def python_linter(src, fix=False):
     if not flake8.available:
         logger.error(
             "Python linter requested but flake8 binary not found. "
-            "Please run `pip install -r dev/archery/requirements-lint.txt`")
+            f"{_archery_install_msg}")
         return
 
     flake8_exclude = ['.venv*']
@@ -292,20 +351,6 @@ def r_linter(src):
     yield LintResult.from_cmd(Bash().run(r_lint_sh, check=False))
 
 
-def rust_linter(src):
-    """Run Rust linter."""
-    logger.info("Running Rust linter")
-    cargo = Cargo()
-
-    if not cargo.available:
-        logger.error("Rust linter requested but cargo executable not found.")
-        return
-
-    yield LintResult.from_cmd(cargo.run("+stable", "fmt", "--all", "--",
-                                        "--check", cwd=src.rust,
-                                        check=False))
-
-
 class Hadolint(Command):
     def __init__(self, hadolint_bin=None):
         self.bin = default_bin(hadolint_bin, "hadolint")
@@ -341,7 +386,7 @@ def docker_linter(src):
 def linter(src, fix=False, *, clang_format=False, cpplint=False,
            clang_tidy=False, iwyu=False, iwyu_all=False,
            python=False, numpydoc=False, cmake_format=False, rat=False,
-           r=False, rust=False, docker=False):
+           r=False, docker=False):
     """Run all linters."""
     with tmpdir(prefix="arrow-lint-") as root:
         build_dir = os.path.join(root, "cpp-build")
@@ -375,9 +420,6 @@ def linter(src, fix=False, *, clang_format=False, cpplint=False,
         if r:
             results.extend(r_linter(src))
 
-        if rust:
-            results.extend(rust_linter(src))
-
         if docker:
             results.extend(docker_linter(src))
 
diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py
new file mode 100644
index 00000000000..96a3bf5bd99
--- /dev/null
+++ b/dev/archery/archery/utils/maven.py
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from .command import Command, default_bin
+
+
+class Maven(Command):
+    def __init__(self, maven_bin=None):
+        self.bin = default_bin(maven_bin, "mvn")
+
+
+maven = Maven()
+
+
+class MavenDefinition:
+    """ MavenDefinition captures the maven invocation arguments.
+
+    It allows creating build directories with the same definition, e.g.
+    ```
+    build_1 = maven_def.build("/tmp/build-1")
+    build_2 = maven_def.build("/tmp/build-2")
+
+    ...
+
+    build1.install()
+    build2.install()
+    """
+
+    def __init__(self, source, build_definitions=None,
+                 benchmark_definitions=None, env=None):
+        """ Initialize a MavenDefinition
+
+        Parameters
+        ----------
+        source : str
+                 Source directory where the top-level pom.xml is
+                 located. This is usually the root of the project.
+        build_definitions: list(str), optional
+        benchmark_definitions: list(str), optional
+        """
+        self.source = os.path.abspath(source)
+        self.build_definitions = build_definitions if build_definitions else []
+        self.benchmark_definitions =\
+            benchmark_definitions if benchmark_definitions else []
+        self.env = env
+
+    @property
+    def build_arguments(self):
+        """" Return the arguments to maven invocation for build. """
+        arguments = self.build_definitions + [
+            "-B", "-DskipTests", "-Drat.skip=true",
+            "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."
+            "Slf4jMavenTransferListener=warn",
+            "-T", "2C", "install"
+        ]
+        return arguments
+
+    def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs):
+        """ Invoke maven into a build directory.
+
+        Parameters
+        ----------
+        build_dir : str
+                    Directory in which the Maven build will be instantiated.
+        force : bool
+                not used now
+        """
+        if os.path.exists(build_dir):
+            # Extra safety to ensure we're deleting a build folder.
+            if not MavenBuild.is_build_dir(build_dir):
+                raise FileExistsError(
+                    "{} is not a maven build".format(build_dir)
+                )
+
+        cmd_kwargs = cmd_kwargs if cmd_kwargs else {}
+        assert MavenBuild.is_build_dir(build_dir)
+        maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs)
+        return MavenBuild(build_dir, definition=self, **kwargs)
+
+    @property
+    def list_arguments(self):
+        """" Return the arguments to maven invocation for list """
+        arguments = [
+            "-Dskip.perf.benchmarks=false", "-Dbenchmark.list=-lp", "install"
+        ]
+        return arguments
+
+    @property
+    def benchmark_arguments(self):
+        """" Return the arguments to maven invocation for benchmark """
+        arguments = self.benchmark_definitions + [
+            "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1",
+            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false "
+            "-Darrow.enable_unsafe_memory_access=true\"",
+            "install"
+        ]
+        return arguments
+
+    def __repr__(self):
+        return "MavenDefinition[source={}]".format(self.source)
+
+
+class MavenBuild(Maven):
+    """ MavenBuild represents a build directory initialized by maven.
+
+    The build instance can be used to build/test/install. It alleviates the
+    user to know which generator is used.
+    """
+
+    def __init__(self, build_dir, definition=None):
+        """ Initialize a MavenBuild.
+
+        The caller must ensure that maven was invoked in the build directory.
+
+        Parameters
+        ----------
+        definition : MavenDefinition
+                     The definition to build from.
+        build_dir : str
+                    The build directory to setup into.
+        """
+        assert MavenBuild.is_build_dir(build_dir)
+        super().__init__()
+        self.build_dir = os.path.abspath(build_dir)
+        self.definition = definition
+
+    @property
+    def binaries_dir(self):
+        return self.build_dir
+
+    def run(self, *argv, verbose=False, cwd=None, **kwargs):
+        extra = []
+        if verbose:
+            extra.append("-X")
+        if cwd is None:
+            cwd = self.build_dir
+        # Commands must be ran under the directory where pom.xml exists
+        return super().run(*extra, *argv, **kwargs, cwd=cwd)
+
+    def build(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.build_arguments
+        cwd = self.binaries_dir
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    def list(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.list_arguments
+        cwd = self.binaries_dir + "/performance"
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    def benchmark(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.benchmark_arguments
+        cwd = self.binaries_dir + "/performance"
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    @staticmethod
+    def is_build_dir(path):
+        """ Indicate if a path is Maven top directory.
+
+        This method only checks for the existence of paths and does not do any
+        validation whatsoever.
+        """
+        pom_xml = os.path.join(path, "pom.xml")
+        performance_dir = os.path.join(path, "performance")
+        return os.path.exists(pom_xml) and os.path.isdir(performance_dir)
+
+    @staticmethod
+    def from_path(path):
+        """ Instantiate a Maven from a path.
+
+        This is used to recover from an existing physical directory (created
+        with or without Maven).
+
+        Note that this method is not idempotent as the original definition will
+        be lost.
+        """
+        if not MavenBuild.is_build_dir(path):
+            raise ValueError("Not a valid MavenBuild path: {}".format(path))
+
+        return MavenBuild(path, definition=None)
+
+    def __repr__(self):
+        return ("MavenBuild["
+                "build = {},"
+                "definition = {}]".format(self.build_dir,
+                                          self.definition))
diff --git a/dev/archery/archery/utils/source.py b/dev/archery/archery/utils/source.py
index d30b4f152e5..1080cb75d67 100644
--- a/dev/archery/archery/utils/source.py
+++ b/dev/archery/archery/utils/source.py
@@ -68,6 +68,11 @@ def dev(self):
         """ Returns the dev directory of an Arrow sources. """
         return self.path / "dev"
 
+    @property
+    def java(self):
+        """ Returns the java directory of an Arrow sources. """
+        return self.path / "java"
+
     @property
     def python(self):
         """ Returns the python directory of an Arrow sources. """
@@ -83,11 +88,6 @@ def r(self):
         """ Returns the r directory of an Arrow sources. """
         return self.path / "r"
 
-    @property
-    def rust(self):
-        """ Returns the rust directory of an Arrow sources. """
-        return self.path / "rust"
-
     @property
     def git_backed(self):
         """ Indicate if the sources are backed by git. """
diff --git a/dev/archery/requirements-lint.txt b/dev/archery/requirements-lint.txt
deleted file mode 100644
index fc7f339ed4d..00000000000
--- a/dev/archery/requirements-lint.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-autopep8
-flake8
-cmake_format==0.5.2
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 892e6b2a8bd..66480737547 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -19,17 +19,23 @@
 import functools
 import operator
 import sys
-from setuptools import setup
+from setuptools import setup, find_packages
 
 if sys.version_info < (3, 6):
     sys.exit('Python < 3.6 is not supported')
 
+# For pathlib.Path compatibility
+jinja_req = 'jinja2>=2.11'
+
 extras = {
+    'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8', 'cmake_format==0.6.13'],
     'benchmark': ['pandas'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
-    'release': ['jinja2', 'jira', 'semver', 'gitpython'],
-    'crossbow': ['github3.py', 'jinja2', 'pygit2', 'ruamel.yaml',
+    'release': [jinja_req, 'jira', 'semver', 'gitpython'],
+    'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'ruamel.yaml',
                  'setuptools_scm'],
+    'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml',
+                        'setuptools_scm'],
 }
 extras['bot'] = extras['crossbow'] + ['pygithub', 'jira']
 extras['all'] = list(set(functools.reduce(operator.add, extras.values())))
@@ -41,13 +47,7 @@
     url='http://github.com/apache/arrow',
     maintainer='Arrow Developers',
     maintainer_email='dev@arrow.apache.org',
-    packages=[
-        'archery',
-        'archery.benchmark',
-        'archery.integration',
-        'archery.lang',
-        'archery.utils'
-    ],
+    packages=find_packages(),
     include_package_data=True,
     install_requires=['click>=7'],
     tests_require=['pytest', 'responses'],
diff --git a/dev/conbench_envs/README.md b/dev/conbench_envs/README.md
new file mode 100644
index 00000000000..5a4eb58b244
--- /dev/null
+++ b/dev/conbench_envs/README.md
@@ -0,0 +1,214 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+# Benchmark Builds Env and Hooks
+This directory contains: 
+- [benchmarks.env](benchmarks.env) - list of env vars used for building Arrow C++/Python/R/Java/JavaScript and running benchmarks using [conbench](https://ursalabs.org/blog/announcing-conbench/).
+- [hooks.sh](hooks.sh) - hooks used by <b>@ursabot</b> benchmark builds that are triggered by `@ursabot please benchmark` PR comments. 
+
+## How to add or update Arrow build and run env vars used by `@ursabot` benchmark builds
+1. Create `apache/arrow` PR
+2. Update or add env var value in [benchmarks.env](../../dev/conbench_envs/benchmarks.env)
+3. Add `@ursabot please benchmark` comment to PR
+4. Once benchmark builds are done, benchmark results can be viewed via compare/runs links in the PR comment where
+- baseline = PR base HEAD commit with unaltered `/dev/conbench_envs/benchmarks.env`
+- contender = PR branch HEAD commit with overridden `/dev/conbench_envs/benchmarks.env`
+
+## Why do`@ursabot` benchmark builds need `hooks.sh`?
+`@ursabot` benchmark builds are maintained in Ursa's private repo.
+Benchmark builds use `hooks.sh` functions as hooks to create conda env with Arrow dependencies and build Arrow C++/Python/R/Java/JavaScript from source for a specific Arrow repo's commit.
+
+Defining hooks in Arrow repo allows benchmark builds for a specific commit to be
+compatible with the files/scripts *in that commit* which are used for installing Arrow
+dependencies and building Arrow. This allows Arrow contributors to asses the perfomance
+implications of different build options, dependency versions, etc by updating
+`hooks.sh`.
+
+## Can other repos and services use `benchmarks.env` and `hooks.sh`?
+
+Yes, other repos and services are welcome to use `benchmarks.env` and `hooks.sh` as long as 
+- existing hooks are not removed or renamed.
+- function definitions for exiting hooks can only be updated in the Arrow commit where Arrow build scripts or files with dependencies have been renamed, moved or added.
+- benchmark builds are run using `@ursabot please benchmark` PR comment to confirm that function definition updates do not break benchmark builds.
+
+## How can other repos and services use `benchmarks.env` and `hooks.sh` to setup benchmark env?
+Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.sh` to setup benchmarking env on Ubuntu:
+
+### 1. Install Arrow dependencies
+    sudo su
+    apt-get update -y -q && \
+        apt-get install -y -q --no-install-recommends \
+            autoconf \
+            ca-certificates \
+            ccache \
+            cmake \
+            g++ \
+            gcc \
+            gdb \
+            git \
+            libbenchmark-dev \
+            libboost-filesystem-dev \
+            libboost-regex-dev \
+            libboost-system-dev \
+            libbrotli-dev \
+            libbz2-dev \
+            libgflags-dev \
+            libcurl4-openssl-dev \
+            libgoogle-glog-dev \
+            liblz4-dev \
+            libprotobuf-dev \
+            libprotoc-dev \
+            libre2-dev \
+            libsnappy-dev \
+            libssl-dev \
+            libthrift-dev \
+            libutf8proc-dev \
+            libzstd-dev \
+            make \
+            ninja-build \
+            pkg-config \
+            protobuf-compiler \
+            rapidjson-dev \
+            tzdata \
+            wget && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists*
+
+    apt-get update -y -q && \
+        apt-get install -y -q \
+            python3 \
+            python3-pip \
+            python3-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+
+### 2. Install Arrow dependencies for Java
+    sudo su
+    apt-get install openjdk-8-jdk
+    apt-get install maven
+    
+Verify that you have at least these versions of `java`, `javac` and `maven`:
+    
+    # java -version
+    openjdk version "1.8.0_292"
+    ..
+    # javac -version
+    javac 1.8.0_292
+    ...
+    # mvn -version
+    Apache Maven 3.6.3
+    ...
+
+### 3. Install Arrow dependencies for Java Script
+    sudo apt update
+    sudo apt -y upgrade
+    sudo apt update
+    sudo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates
+    curl -fsSL https://deb.nodesource.com/setup_14.x | sudo -E bash -
+    sudo apt-get install -y nodejs
+    sudo apt -y install yarn
+    sudo apt -y install gcc g++ make
+
+Verify that you have at least these versions of `node` and `yarn`:
+
+    # node --version
+    v14.17.2
+    ...
+    # yarn --version
+    1.22.5
+    ...
+    
+### 4. Install Conda
+    sudo apt install curl
+    curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+    sudo bash Miniconda3-latest-Linux-x86_64.sh
+    
+### 5. Set env vars:
+    export ARROW_REPO=https://github.com/apache/arrow.git
+    export BENCHMARKABLE=e6e9e6ea52b7a8f2682ffc4160168c936ca1d3e6
+    export BENCHMARKABLE_TYPE=arrow-commit
+    export PYTHON_VERSION=3.8
+    export CONBENCH_EMAIL=...
+    export CONBENCH_URL="https://conbench.ursa.dev"
+    export CONBENCH_PASSWORD=...
+    export MACHINE=...
+
+### 6. Use `create_conda_env_with_arrow_python` hook to create conda env and build Arrow C++ and Arrow Python
+    git clone "${ARROW_REPO}"
+    pushd arrow
+    git fetch -v --prune -- origin "${BENCHMARKABLE}"
+    git checkout -f "${BENCHMARKABLE}"
+    source dev/conbench_envs/hooks.sh create_conda_env_with_arrow_python
+    popd
+    
+### 7. Install conbench
+    git clone https://github.com/ursacomputing/conbench.git
+    pushd conbench
+    pip install -r requirements-cli.txt
+    pip install -U PyYAML
+    python setup.py install
+    popd
+
+### 8. Setup benchmarks repo
+    git clone https://github.com/ursacomputing/benchmarks.git
+    pushd benchmarks
+    python setup.py develop
+    popd
+    
+### 9. Setup conbench credentials
+    pushd benchmarks
+    touch .conbench
+    echo "url: $CONBENCH_URL" >> .conbench
+    echo "email: $CONBENCH_EMAIL" >> .conbench
+    echo "password: $CONBENCH_PASSWORD" >> .conbench
+    echo "host_name: $MACHINE" >> .conbench
+    popd
+ 
+### 10. Run Python benchmarks
+    cd benchmarks
+    conbench file-read ALL --iterations=3 --all=true --drop-caches=true 
+
+### 11. Use `install_archery` hook to setup archery and run C++ benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh install_archery
+    popd
+    cd benchmarks
+    conbench cpp-micro --iterations=1
+
+### 12. Use `build_arrow_r` hook to build Arrow R and run R benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh build_arrow_r
+    popd
+    R -e "remotes::install_github('ursacomputing/arrowbench')"
+    cd benchmarks
+    conbench dataframe-to-table ALL --iterations=3 --drop-caches=true --language=R
+
+### 13. Use `build_arrow_java` and `install_archery` hooks to build Arrow Java and run Java benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh build_arrow_java
+    source dev/conbench_envs/hooks.sh install_archery
+    popd
+    cd benchmarks
+    conbench java-micro --iterations=1
+
+### 14. Use `install_java_script_project_dependencies` hook to install Java Script dependencies and run Java Script benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh install_java_script_project_dependencies
+    popd
+    cd benchmarks
+    conbench js-micro
diff --git a/dev/conbench_envs/benchmarks.env b/dev/conbench_envs/benchmarks.env
new file mode 100644
index 00000000000..6c151aa7c1f
--- /dev/null
+++ b/dev/conbench_envs/benchmarks.env
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARROW_BUILD_TESTS=OFF
+ARROW_BUILD_TYPE=release
+ARROW_DEPENDENCY_SOURCE=AUTO
+ARROW_DATASET=ON
+ARROW_DEFAULT_MEMORY_POOL=mimalloc
+ARROW_ENABLE_UNSAFE_MEMORY_ACCESS=true
+ARROW_ENABLE_NULL_CHECK_FOR_GET=false
+ARROW_FLIGHT=OFF
+ARROW_GANDIVA=OFF
+ARROW_HDFS=ON
+ARROW_HOME=$CONDA_PREFIX
+ARROW_INSTALL_NAME_RPATH=OFF
+ARROW_MIMALLOC=ON
+ARROW_NO_DEPRECATED_API=ON
+ARROW_ORC=ON
+ARROW_PARQUET=ON
+ARROW_PLASMA=ON
+ARROW_PYTHON=ON
+ARROW_S3=ON
+ARROW_USE_ASAN=OFF
+ARROW_USE_CCACHE=ON
+ARROW_USE_UBSAN=OFF
+ARROW_WITH_BROTLI=ON
+ARROW_WITH_BZ2=ON
+ARROW_WITH_LZ4=ON
+ARROW_WITH_SNAPPY=ON
+ARROW_WITH_ZLIB=ON
+ARROW_WITH_ZSTD=ON
+GTest_SOURCE=BUNDLED
+ORC_SOURCE=BUNDLED
+PARQUET_BUILD_EXAMPLES=ON
+PARQUET_BUILD_EXECUTABLES=ON
+PYTHON=python
+LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
\ No newline at end of file
diff --git a/dev/conbench_envs/hooks.sh b/dev/conbench_envs/hooks.sh
new file mode 100755
index 00000000000..665a7c10587
--- /dev/null
+++ b/dev/conbench_envs/hooks.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+## These hooks are used by benchmark builds
+# to create a conda env with Arrow dependencies and build Arrow C++, Python, etc
+create_conda_env_for_benchmark_build() {
+  conda create -y -n "${BENCHMARKABLE_TYPE}" -c conda-forge \
+  --file ci/conda_env_unix.txt \
+  --file ci/conda_env_cpp.txt \
+  --file ci/conda_env_python.txt \
+  --file ci/conda_env_gandiva.txt \
+  compilers \
+  python="${PYTHON_VERSION}" \
+  pandas \
+  aws-sdk-cpp \
+  r
+}
+
+activate_conda_env_for_benchmark_build() {
+  conda init bash
+  conda activate "${BENCHMARKABLE_TYPE}"
+}
+
+install_arrow_python_dependencies() {
+  pip install -r python/requirements-build.txt -r python/requirements-test.txt
+}
+
+set_arrow_build_and_run_env_vars() {
+  set -a
+  source dev/conbench_envs/benchmarks.env
+  set +a
+}
+
+build_arrow_cpp() {
+  # Ignore the error when a cache can't be created
+  if ! ci/scripts/cpp_build.sh $(pwd) $(pwd) 2> error.log; then
+      if ! grep -q -F "Can\'t create temporary cache file" error.log; then
+         cat error.log
+      fi
+  fi
+}
+
+build_arrow_python() {
+  ci/scripts/python_build.sh $(pwd) $(pwd)
+}
+
+build_arrow_r() {
+  cat ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+  ci/scripts/r_deps.sh $(pwd) $(pwd)
+  (cd r; R CMD INSTALL .;)
+}
+
+build_arrow_java() {
+  ci/scripts/java_build.sh $(pwd) $(pwd)
+}
+
+install_archery() {
+  pip install -e dev/archery
+}
+
+install_java_script_project_dependencies() {
+  (cd js; yarn;)
+}
+
+create_conda_env_with_arrow_python() {
+  create_conda_env_for_benchmark_build
+  activate_conda_env_for_benchmark_build
+  install_arrow_python_dependencies
+  set_arrow_build_and_run_env_vars
+  build_arrow_cpp
+  build_arrow_python
+}
+
+"$@"
diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py
index 373ceb8e20f..b724a1971c0 100755
--- a/dev/merge_arrow_pr.py
+++ b/dev/merge_arrow_pr.py
@@ -330,6 +330,9 @@ def is_mergeable(self):
         return bool(self._pr_data["mergeable"])
 
     def _get_jira(self):
+        if self.title.startswith("MINOR:"):
+            return None
+
         jira_id = None
         for project, regex in PR_TITLE_REGEXEN:
             m = regex.search(self.title)
@@ -337,7 +340,7 @@ def _get_jira(self):
                 jira_id = m.group(1)
                 break
 
-        if jira_id is None and not self.title.startswith("MINOR:"):
+        if jira_id is None:
             options = ' or '.join('{0}-XXX'.format(project)
                                   for project in SUPPORTED_PROJECTS)
             self.cmd.fail("PR title should be prefixed by a jira id "
diff --git a/dev/release/.env.example b/dev/release/.env.example
index 0126cdd3f29..50c8ec8e6d2 100644
--- a/dev/release/.env.example
+++ b/dev/release/.env.example
@@ -26,23 +26,7 @@
 # You must set this.
 #GPG_KEY_ID=08D3564B7C6A9CAFBFF6A66791D18FCF079F8007
 
-# The Bintray repository where artifacts are uploaded.
-# You can use your Bintray repository such as kou/arrow for test.
-BINTRAY_REPOSITORY=apache/arrow
-
-# The Bintray repository where released artifacts exist.
-# The released artifacts are used to build APT/Yum repository.
-# The Bintray repository isn't changed. (Download only. No upload.)
-#
-# Normally, you don't need to change this.
-SOURCE_BINTRAY_REPOSITORY=apache/arrow
-
-# The Bintray user name to upload artifacts to Bintray.
-#
-# You must set this.
-#BINTRAY_USER=kou
-
-# The Bintray API key to upload artifacts to Bintray.
+# The Artifactory API key to upload artifacts to Artifactory.
 #
 # You must set this.
-#BINTRAY_API_KEY=secret
+#ARTIFACTORY_API_KEY=secret
diff --git a/dev/release/00-prepare-test.rb b/dev/release/00-prepare-test.rb
deleted file mode 100644
index 53bd5e89bf2..00000000000
--- a/dev/release/00-prepare-test.rb
+++ /dev/null
@@ -1,665 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class PrepareTest < Test::Unit::TestCase
-  include GitRunnable
-  include VersionDetectable
-
-  def setup
-    @current_commit = git_current_commit
-    detect_versions
-
-    top_dir = Pathname(__dir__).parent.parent
-    @original_git_repository = top_dir + ".git"
-    Dir.mktmpdir do |dir|
-      @test_git_repository = Pathname(dir) + "arrow"
-      git("clone", @original_git_repository.to_s, @test_git_repository.to_s)
-      Dir.chdir(@test_git_repository) do
-        @tag_name = "apache-arrow-#{@release_version}"
-        @release_branch = "release-#{@release_version}-rc0"
-        @script = "dev/release/00-prepare.sh"
-        git("checkout", "-b", @release_branch, @current_commit)
-        yield
-      end
-      FileUtils.rm_rf(@test_git_repository)
-    end
-  end
-
-  def omit_on_release_branch
-    omit("Not for release branch") if on_release_branch?
-  end
-
-  def prepare(*targets)
-    if targets.last.is_a?(Hash)
-      additional_env = targets.pop
-    else
-      additional_env = {}
-    end
-    env = {"PREPARE_DEFAULT" => "0"}
-    targets.each do |target|
-      env["PREPARE_#{target}"] = "1"
-    end
-    env = env.merge(additional_env)
-    sh(env, @script, @release_version, @next_version)
-  end
-
-  def parse_patch(patch)
-    diffs = []
-    in_hunk = false
-    patch.each_line do |line|
-      case line
-      when /\A--- a\//
-        path = $POSTMATCH.chomp
-        diffs << {path: path, hunks: []}
-        in_hunk = false
-      when /\A@@/
-        in_hunk = true
-        diffs.last[:hunks] << []
-      when /\A[-+]/
-        next unless in_hunk
-        diffs.last[:hunks].last << line.chomp
-      end
-    end
-    diffs.sort_by do |diff|
-      diff[:path]
-    end
-  end
-
-  def test_linux_packages
-    user = "Arrow Developers"
-    email = "dev@arrow.apache.org"
-    prepare("LINUX_PACKAGES",
-            "DEBFULLNAME" => user,
-            "DEBEMAIL" => email)
-    changes = parse_patch(git("log", "-n", "1", "-p"))
-    sampled_changes = changes.collect do |change|
-      {
-        path: change[:path],
-        sampled_hunks: change[:hunks].collect(&:first),
-        # sampled_hunks: change[:hunks],
-      }
-    end
-    base_dir = "dev/tasks/linux-packages"
-    today = Time.now.utc.strftime("%a %b %d %Y")
-    expected_changes = [
-      {
-        path: "#{base_dir}/apache-arrow-archive-keyring/debian/changelog",
-        sampled_hunks: [
-          "+apache-arrow-archive-keyring (#{@release_version}-1) " +
-          "unstable; urgency=low",
-        ],
-      },
-      {
-        path:
-          "#{base_dir}/apache-arrow-release/yum/apache-arrow-release.spec.in",
-        sampled_hunks: [
-          "+* #{today} #{user} <#{email}> - #{@release_version}-1",
-        ],
-      },
-      {
-        path: "#{base_dir}/apache-arrow/debian/changelog",
-        sampled_hunks: [
-          "+apache-arrow (#{@release_version}-1) unstable; urgency=low",
-        ],
-      },
-      {
-        path: "#{base_dir}/apache-arrow/yum/arrow.spec.in",
-        sampled_hunks: [
-          "+* #{today} #{user} <#{email}> - #{@release_version}-1",
-        ],
-      },
-    ]
-    assert_equal(expected_changes, sampled_changes)
-  end
-
-  def test_version_pre_tag
-    omit_on_release_branch
-    prepare("VERSION_PRE_TAG")
-    assert_equal([
-                   {
-                     path: "c_glib/meson.build",
-                     hunks: [
-                       ["-version = '#{@snapshot_version}'",
-                        "+version = '#{@release_version}'"],
-                     ],
-                   },
-                   {
-                     path: "ci/scripts/PKGBUILD",
-                     hunks: [
-                       ["-pkgver=#{@previous_version}.9000",
-                        "+pkgver=#{@release_version}"],
-                     ],
-                   },
-                   {
-                     path: "cpp/CMakeLists.txt",
-                     hunks: [
-                       ["-set(ARROW_VERSION \"#{@snapshot_version}\")",
-                        "+set(ARROW_VERSION \"#{@release_version}\")"],
-                     ],
-                   },
-                   {
-                     path: "cpp/vcpkg.json",
-                     hunks: [
-                       ["-  \"version-string\": \"#{@snapshot_version}\",",
-                        "+  \"version-string\": \"#{@release_version}\","],
-                     ],
-                   },
-                   {
-                     path: "csharp/Directory.Build.props",
-                     hunks: [
-                       ["-    <Version>#{@snapshot_version}</Version>",
-                        "+    <Version>#{@release_version}</Version>"],
-                     ],
-                   },
-                   {
-                     path: "dev/tasks/homebrew-formulae/apache-arrow.rb",
-                     hunks: [
-                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"",
-                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""],
-                     ],
-                   },
-                   {
-                     path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb",
-                     hunks: [
-                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"",
-                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""],
-                     ],
-                   },
-                   {
-                     path: "js/package.json",
-                     hunks: [
-                       ["-  \"version\": \"#{@snapshot_version}\"",
-                        "+  \"version\": \"#{@release_version}\""]
-                     ],
-                   },
-                   {
-                     path: "matlab/CMakeLists.txt",
-                     hunks: [
-                       ["-set(MLARROW_VERSION \"#{@snapshot_version}\")",
-                        "+set(MLARROW_VERSION \"#{@release_version}\")"],
-                     ],
-                   },
-                   {
-                     path: "python/setup.py",
-                     hunks: [
-                       ["-default_version = '#{@snapshot_version}'",
-                        "+default_version = '#{@release_version}'"],
-                     ],
-                   },
-                   {
-                     path: "r/DESCRIPTION",
-                     hunks: [
-                       ["-Version: #{@previous_version}.9000",
-                        "+Version: #{@release_version}"],
-                     ],
-                   },
-                   {
-                     path: "r/NEWS.md",
-                     hunks: [
-                       ["-\# arrow #{@previous_version}.9000",
-                        "+\# arrow #{@release_version}"],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@snapshot_version}\"",
-                        "+  VERSION = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@snapshot_version}\"",
-                        "+  VERSION = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-arrow/lib/arrow/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@snapshot_version}\"",
-                        "+  VERSION = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-gandiva/lib/gandiva/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@snapshot_version}\"",
-                        "+  VERSION = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-parquet/lib/parquet/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@snapshot_version}\"",
-                        "+  VERSION = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-plasma/lib/plasma/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@snapshot_version}\"",
-                        "+  VERSION = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow-flight/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow-pyarrow-integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/benchmarks/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                    path: "rust/datafusion-examples/Cargo.toml",
-                    hunks: [
-                      ["-version = \"#{@snapshot_version}\"",
-                       "+version = \"#{@release_version}\""],
-                    ],
-                   },
-                   {
-                     path: "rust/datafusion/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\", features = [\"prettyprint\"] }",
-                        "-parquet = { path = \"../parquet\", version = \"#{@snapshot_version}\", features = [\"arrow\"] }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\", features = [\"prettyprint\"] }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@release_version}\", features = [\"arrow\"] }"],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion/README.md",
-                     hunks: [
-                       ["-datafusion = \"#{@snapshot_version}\"",
-                        "+datafusion = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\", optional = true }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\", optional = true }"],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"]
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@snapshot_version}\"",
-                        "+parquet = \"#{@release_version}\""],
-                       ["-See [crate documentation](https://docs.rs/crate/parquet/#{@snapshot_version}) on available API.",
-                        "+See [crate documentation](https://docs.rs/crate/parquet/#{@release_version}) on available API."],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@snapshot_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@snapshot_version}\"",
-                        "-parquet_derive = \"#{@snapshot_version}\"",
-                        "+parquet = \"#{@release_version}\"",
-                        "+parquet_derive = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive_test/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@snapshot_version}\" }",
-                        "-parquet_derive = { path = \"../parquet_derive\", version = \"#{@snapshot_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@release_version}\" }",
-                        "+parquet_derive = { path = \"../parquet_derive\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                 ],
-                 parse_patch(git("log", "-n", "1", "-p")))
-  end
-
-  def test_version_post_tag
-    if on_release_branch?
-      prepare("VERSION_POST_TAG")
-    else
-      prepare("VERSION_PRE_TAG",
-              "VERSION_POST_TAG")
-    end
-    assert_equal([
-                   {
-                     path: "c_glib/meson.build",
-                     hunks: [
-                       ["-version = '#{@release_version}'",
-                        "+version = '#{@next_snapshot_version}'"],
-                     ],
-                   },
-                   {
-                     path: "ci/scripts/PKGBUILD",
-                     hunks: [
-                       ["-pkgver=#{@release_version}",
-                        "+pkgver=#{@release_version}.9000"],
-                     ],
-                   },
-                   {
-                     path: "cpp/CMakeLists.txt",
-                     hunks: [
-                       ["-set(ARROW_VERSION \"#{@release_version}\")",
-                        "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"],
-                     ],
-                   },
-                   {
-                     path: "cpp/vcpkg.json",
-                     hunks: [
-                       ["-  \"version-string\": \"#{@release_version}\",",
-                        "+  \"version-string\": \"#{@next_snapshot_version}\","],
-                     ],
-                   },
-                   {
-                     path: "csharp/Directory.Build.props",
-                     hunks: [
-                       ["-    <Version>#{@release_version}</Version>",
-                        "+    <Version>#{@next_snapshot_version}</Version>"],
-                     ],
-                   },
-                   {
-                     path: "dev/tasks/homebrew-formulae/apache-arrow.rb",
-                     hunks: [
-                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\"",
-                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""],
-                     ],
-                   },
-                   {
-                     path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb",
-                     hunks: [
-                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\"",
-                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}.9000/apache-arrow-#{@release_version}.9000.tar.gz\""],
-                     ],
-                   },
-                   {
-                     path: "js/package.json",
-                     hunks: [
-                       ["-  \"version\": \"#{@release_version}\"",
-                        "+  \"version\": \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "matlab/CMakeLists.txt",
-                     hunks: [
-                       ["-set(MLARROW_VERSION \"#{@release_version}\")",
-                        "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"],
-                     ],
-                   },
-                   {
-                     path: "python/setup.py",
-                     hunks: [
-                       ["-default_version = '#{@release_version}'",
-                        "+default_version = '#{@next_snapshot_version}'"],
-                     ],
-                   },
-                   {
-                     path: "r/DESCRIPTION",
-                     hunks: [
-                       ["-Version: #{@release_version}",
-                        "+Version: #{@release_version}.9000"],
-                     ],
-                   },
-                   {
-                     path: "r/NEWS.md",
-                     # Note that these are additions only, no replacement
-                     hunks: [
-                       ["+# arrow #{@release_version}.9000",
-                        "+"],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-arrow/lib/arrow/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-gandiva/lib/gandiva/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-parquet/lib/parquet/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "ruby/red-plasma/lib/plasma/version.rb",
-                     hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow-flight/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow-pyarrow-integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/benchmarks/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                    path: "rust/datafusion-examples/Cargo.toml",
-                    hunks: [
-                      ["-version = \"#{@release_version}\"",
-                      "+version = \"#{@next_snapshot_version}\""],
-                  ],
-                   },
-                   {
-                     path: "rust/datafusion/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\", features = [\"prettyprint\"] }",
-                        "-parquet = { path = \"../parquet\", version = \"#{@release_version}\", features = [\"arrow\"] }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\", features = [\"prettyprint\"] }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@next_snapshot_version}\", features = [\"arrow\"] }"],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion/README.md",
-                     hunks: [
-                       ["-datafusion = \"#{@release_version}\"",
-                        "+datafusion = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\", optional = true }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\", optional = true }"],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"]
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@release_version}\"",
-                        "+parquet = \"#{@next_snapshot_version}\""],
-                       ["-See [crate documentation](https://docs.rs/crate/parquet/#{@release_version}) on available API.",
-                        "+See [crate documentation](https://docs.rs/crate/parquet/#{@next_snapshot_version}) on available API."],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@release_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@release_version}\"",
-                        "-parquet_derive = \"#{@release_version}\"",
-                        "+parquet = \"#{@next_snapshot_version}\"",
-                        "+parquet_derive = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive_test/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@release_version}\" }",
-                        "-parquet_derive = { path = \"../parquet_derive\", version = \"#{@release_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@next_snapshot_version}\" }",
-                        "+parquet_derive = { path = \"../parquet_derive\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                 ],
-                 parse_patch(git("log", "-n", "1", "-p")))
-  end
-
-  def test_deb_package_names
-    prepare("DEB_PACKAGE_NAMES")
-    changes = parse_patch(git("log", "-n", "1", "-p"))
-    sampled_changes = changes.collect do |change|
-      first_hunk = change[:hunks][0]
-      first_removed_line = first_hunk.find {|line| line.start_with?("-")}
-      first_added_line = first_hunk.find {|line| line.start_with?("+")}
-      {
-        sampled_diff: [first_removed_line, first_added_line],
-        path: change[:path],
-      }
-    end
-    expected_changes = [
-      {
-        sampled_diff: [
-          "-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@so_version}.install",
-          "+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@next_so_version}.install",
-        ],
-        path: "dev/release/rat_exclude_files.txt"
-      },
-      {
-        sampled_diff: [
-          "-Package: libarrow#{@so_version}",
-          "+Package: libarrow#{@next_so_version}",
-        ],
-        path: "dev/tasks/linux-packages/apache-arrow/debian/control.in"
-      },
-      {
-        sampled_diff: [
-          "-      - libarrow-glib#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
-          "+      - libarrow-glib#{@next_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
-        ],
-        path: "dev/tasks/tasks.yml",
-      },
-    ]
-    assert_equal(expected_changes, sampled_changes)
-  end
-end
diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh
deleted file mode 100755
index 3e3ce19656a..00000000000
--- a/dev/release/00-prepare.sh
+++ /dev/null
@@ -1,287 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-set -ue
-
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <version> <next_version>"
-  exit 1
-fi
-
-update_versions() {
-  local base_version=$1
-  local next_version=$2
-  local type=$3
-
-  case ${type} in
-    release)
-      local version=${base_version}
-      local r_version=${base_version}
-      ;;
-    snapshot)
-      local version=${next_version}-SNAPSHOT
-      local r_version=${base_version}.9000
-      ;;
-  esac
-
-  cd "${SOURCE_DIR}/../../c_glib"
-  sed -i.bak -E -e \
-    "s/^version = '.+'/version = '${version}'/" \
-    meson.build
-  rm -f meson.build.bak
-  git add meson.build
-  cd -
-
-  cd "${SOURCE_DIR}/../../ci/scripts"
-  sed -i.bak -E -e \
-    "s/^pkgver=.+/pkgver=${r_version}/" \
-    PKGBUILD
-  rm -f PKGBUILD.bak
-  git add PKGBUILD
-  cd -
-
-  cd "${SOURCE_DIR}/../../cpp"
-  sed -i.bak -E -e \
-    "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \
-    CMakeLists.txt
-  rm -f CMakeLists.txt.bak
-  git add CMakeLists.txt
-
-  sed -i.bak -E -e \
-    "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \
-    vcpkg.json
-  rm -f vcpkg.json.bak
-  git add vcpkg.json
-  cd -
-
-  cd "${SOURCE_DIR}/../../csharp"
-  sed -i.bak -E -e \
-    "s/^    <Version>.+<\/Version>/    <Version>${version}<\/Version>/" \
-    Directory.Build.props
-  rm -f Directory.Build.props.bak
-  git add Directory.Build.props
-  cd -
-
-  cd "${SOURCE_DIR}/../../dev/tasks/homebrew-formulae"
-  sed -i.bak -E -e \
-    "s/arrow-[0-9.]+[0-9]+/arrow-${r_version}/g" \
-    autobrew/apache-arrow.rb
-  rm -f autobrew/apache-arrow.rb.bak
-  git add autobrew/apache-arrow.rb
-  sed -i.bak -E -e \
-    "s/arrow-[0-9.\-]+[0-9SNAPHOT]+/arrow-${version}/g" \
-    apache-arrow.rb
-  rm -f apache-arrow.rb.bak
-  git add apache-arrow.rb
-  cd -
-
-  cd "${SOURCE_DIR}/../../js"
-  sed -i.bak -E -e \
-    "s/^  \"version\": \".+\"/  \"version\": \"${version}\"/" \
-    package.json
-  rm -f package.json.bak
-  git add package.json
-  cd -
-
-  cd "${SOURCE_DIR}/../../matlab"
-  sed -i.bak -E -e \
-    "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \
-    CMakeLists.txt
-  rm -f CMakeLists.txt.bak
-  git add CMakeLists.txt
-  cd -
-
-  cd "${SOURCE_DIR}/../../python"
-  sed -i.bak -E -e \
-    "s/^default_version = '.+'/default_version = '${version}'/" \
-    setup.py
-  rm -f setup.py.bak
-  git add setup.py
-  cd -
-
-  cd "${SOURCE_DIR}/../../r"
-  sed -i.bak -E -e \
-    "s/^Version: .+/Version: ${r_version}/" \
-    DESCRIPTION
-  rm -f DESCRIPTION.bak
-  git add DESCRIPTION
-  if [ ${type} = "snapshot" ]; then
-    # Add a news entry for the new dev version
-    echo "dev"
-    sed -i.bak -E -e \
-      "0,/^# arrow /s/^(# arrow .+)/# arrow ${r_version}\n\n\1/" \
-      NEWS.md
-  else
-    # Replace dev version with release version
-    echo "release"
-    sed -i.bak -E -e \
-      "0,/^# arrow /s/^# arrow .+/# arrow ${r_version}/" \
-      NEWS.md
-  fi
-  rm -f NEWS.md.bak
-  git add NEWS.md
-  cd -
-
-  cd "${SOURCE_DIR}/../../ruby"
-  sed -i.bak -E -e \
-    "s/^  VERSION = \".+\"/  VERSION = \"${version}\"/g" \
-    */*/*/version.rb
-  rm -f */*/*/version.rb.bak
-  git add */*/*/version.rb
-  cd -
-
-  cd "${SOURCE_DIR}/../../rust"
-  sed -i.bak -E \
-    -e "s/^version = \".+\"/version = \"${version}\"/g" \
-    -e "s/^(arrow = .* version = )\".*\"(( .*)|(, features = .*)|(, optional = .*))$/\\1\"${version}\"\\2/g" \
-    -e "s/^(arrow-flight = .* version = )\".+\"( .*)/\\1\"${version}\"\\2/g" \
-    -e "s/^(parquet = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
-    -e "s/^(parquet_derive = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
-    */Cargo.toml
-  rm -f */Cargo.toml.bak
-  git add */Cargo.toml
-
-  sed -i.bak -E \
-    -e "s/^([^ ]+) = \".+\"/\\1 = \"${version}\"/g" \
-    -e "s,docs\.rs/crate/([^/]+)/[^)]+,docs.rs/crate/\\1/${version},g" \
-    */README.md
-  rm -f */README.md.bak
-  git add */README.md
-  cd -
-}
-
-############################## Pre-Tag Commits ##############################
-
-version=$1
-next_version=$2
-next_version_snapshot=${next_version}-SNAPSHOT
-tag=apache-arrow-${version}
-
-: ${PREPARE_DEFAULT:=1}
-: ${PREPARE_CHANGELOG:=${PREPARE_DEFAULT}}
-: ${PREPARE_LINUX_PACKAGES:=${PREPARE_DEFAULT}}
-: ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}}
-: ${PREPARE_TAG:=${PREPARE_DEFAULT}}
-: ${PREPARE_VERSION_POST_TAG:=${PREPARE_DEFAULT}}
-: ${PREPARE_DEB_PACKAGE_NAMES:=${PREPARE_DEFAULT}}
-
-if [ ${PREPARE_CHANGELOG} -gt 0 ]; then
-  echo "Updating changelog for $version"
-  # Update changelog
-  archery release changelog add $version
-  git add ${SOURCE_DIR}/../../CHANGELOG.md
-  git commit -m "[Release] Update CHANGELOG.md for $version"
-fi
-
-if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then
-  echo "Updating .deb/.rpm changelogs for $version"
-  cd $SOURCE_DIR/../tasks/linux-packages
-  rake \
-    version:update \
-    ARROW_RELEASE_TIME="$(date +%Y-%m-%dT%H:%M:%S%z)" \
-    ARROW_VERSION=${version}
-  git add */debian*/changelog */yum/*.spec.in
-  git commit -m "[Release] Update .deb/.rpm changelogs for $version"
-  cd -
-fi
-
-if [ ${PREPARE_VERSION_PRE_TAG} -gt 0 ]; then
-  echo "prepare release ${version} on tag ${tag} then reset to version ${next_version_snapshot}"
-
-  update_versions "${version}" "${next_version}" "release"
-  git commit -m "[Release] Update versions for ${version}"
-fi
-
-if [ ${PREPARE_TAG} -gt 0 ]; then
-  profile=arrow-jni # this includes components which depend on arrow cpp.
-  pushd "${SOURCE_DIR}/../../java"
-  git submodule update --init --recursive
-  cpp_dir="${PWD}/../cpp"
-  cpp_build_dir=$(mktemp -d -t "apache-arrow-cpp.XXXXX")
-  pushd ${cpp_build_dir}
-  cmake \
-    -DARROW_GANDIVA=ON \
-    -DARROW_GANDIVA_JAVA=ON \
-    -DARROW_JNI=ON \
-    -DARROW_ORC=ON \
-    -DCMAKE_BUILD_TYPE=release \
-    -G Ninja \
-    "${cpp_dir}"
-  ninja
-  popd
-  mvn release:clean
-  mvn \
-    release:prepare \
-    -Darguments=-Darrow.cpp.build.dir=${cpp_build_dir}/release \
-    -DautoVersionSubmodules \
-    -DdevelopmentVersion=${next_version_snapshot} \
-    -DreleaseVersion=${version} \
-    -Dtag=${tag} \
-    -P ${profile}
-  rm -rf ${cpp_build_dir}
-  popd
-fi
-
-############################## Post-Tag Commits #############################
-
-if [ ${PREPARE_VERSION_POST_TAG} -gt 0 ]; then
-  echo "Updating versions for ${next_version_snapshot}"
-  update_versions "${version}" "${next_version}" "snapshot"
-  git commit -m "[Release] Update versions for ${next_version_snapshot}"
-fi
-
-if [ ${PREPARE_DEB_PACKAGE_NAMES} -gt 0 ]; then
-  echo "Updating .deb package names for ${next_version}"
-  so_version() {
-    local version=$1
-    local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/')
-    local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
-    expr ${major_version} \* 100 + ${minor_version}
-  }
-  deb_lib_suffix=$(so_version $version)
-  next_deb_lib_suffix=$(so_version $next_version)
-  if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then
-    cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow
-    for target in debian*/lib*${deb_lib_suffix}.install; do
-      git mv \
-	${target} \
-	$(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/")
-    done
-    deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g"
-    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control*
-    rm -f debian*/control*.bak
-    git add debian*/control*
-    cd -
-    cd $SOURCE_DIR/../tasks/
-    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml
-    rm -f tasks.yml.bak
-    git add tasks.yml
-    cd -
-    cd $SOURCE_DIR
-    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt
-    rm -f rat_exclude_files.txt.bak
-    git add rat_exclude_files.txt
-    git commit -m "[Release] Update .deb package names for $next_version"
-    cd -
-  fi
-fi
-
-echo "Finish staging binary artifacts by running: dev/release/01-perform.sh"
diff --git a/dev/release/01-perform.sh b/dev/release/01-perform.sh
deleted file mode 100755
index 94ae61f4a22..00000000000
--- a/dev/release/01-perform.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-set -e
-
-SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-pushd "${SOURCE_DIR}/../../java"
-git submodule update --init --recursive
-
-profile=arrow-jni # this includes components which depend on arrow cpp.
-
-cpp_dir="${PWD}/../cpp"
-cpp_build_dir=$(mktemp -d -t "apache-arrow-cpp.XXXXX")
-pushd ${cpp_build_dir}
-cmake \
-  -DARROW_GANDIVA=ON \
-  -DARROW_GANDIVA_JAVA=ON \
-  -DARROW_JNI=ON \
-  -DARROW_ORC=ON \
-  -DCMAKE_BUILD_TYPE=release \
-  -G Ninja \
-  "${cpp_dir}"
-ninja
-popd
-
-export ARROW_TEST_DATA=${PWD}/../testing/data
-mvn \
-  release:perform \
-  -Darguments=-Darrow.cpp.build.dir=${cpp_build_dir}/release \
-  -P ${profile}
-rm -rf ${cpp_build_dir}
-
-popd
diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
new file mode 100644
index 00000000000..098e7f47d69
--- /dev/null
+++ b/dev/release/01-prepare-test.rb
@@ -0,0 +1,580 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class PrepareTest < Test::Unit::TestCase
+  include GitRunnable
+  include VersionDetectable
+
+  def setup
+    @current_commit = git_current_commit
+    detect_versions
+
+    top_dir = Pathname(__dir__).parent.parent
+    @original_git_repository = top_dir + ".git"
+    Dir.mktmpdir do |dir|
+      @test_git_repository = Pathname(dir) + "arrow"
+      git("clone", @original_git_repository.to_s, @test_git_repository.to_s)
+      Dir.chdir(@test_git_repository) do
+        @tag_name = "apache-arrow-#{@release_version}"
+        @release_branch = "testing-release-#{@release_version}-rc0"
+        git("checkout", "-b", @release_branch, @current_commit)
+        yield
+      end
+      FileUtils.rm_rf(@test_git_repository)
+    end
+  end
+
+  def omit_on_release_branch
+    omit("Not for release branch") if on_release_branch?
+  end
+
+  def prepare(*targets)
+    if targets.last.is_a?(Hash)
+      additional_env = targets.pop
+    else
+      additional_env = {}
+    end
+    env = { "PREPARE_DEFAULT" => "0" }
+    targets.each do |target|
+      env["PREPARE_#{target}"] = "1"
+    end
+    env = env.merge(additional_env)
+    sh(env, "dev/release/01-prepare.sh", @release_version, @next_version, "0")
+  end
+
+  def bump_versions(*targets)
+    env = { "BUMP_DEFAULT" => "0" }
+    targets.each do |target|
+      env["BUMP_#{target}"] = "1"
+    end
+    sh(env, "dev/release/post-12-bump-versions.sh", @release_version,
+       @next_version)
+  end
+
+  def parse_patch(patch)
+    diffs = []
+    in_hunk = false
+    patch.each_line do |line|
+      case line
+      when /\A--- a\//
+        path = $POSTMATCH.chomp
+        diffs << { path: path, hunks: [] }
+        in_hunk = false
+      when /\A@@/
+        in_hunk = true
+        diffs.last[:hunks] << []
+      when /\A[-+]/
+        next unless in_hunk
+        diffs.last[:hunks].last << line.chomp
+      end
+    end
+    diffs.sort_by do |diff|
+      diff[:path]
+    end
+  end
+
+  def test_linux_packages
+    user = "Arrow Developers"
+    email = "dev@arrow.apache.org"
+    prepare("LINUX_PACKAGES", "DEBFULLNAME" => user, "DEBEMAIL" => email)
+    changes = parse_patch(git("log", "-n", "1", "-p"))
+    sampled_changes = changes.collect do |change|
+      {
+        path: change[:path],
+        sampled_hunks: change[:hunks].collect(&:first),
+      }
+    end
+    base_dir = "dev/tasks/linux-packages"
+    today = Time.now.utc.strftime("%a %b %d %Y")
+    expected_changes = [
+      {
+        path: "#{base_dir}/apache-arrow-apt-source/debian/changelog",
+        sampled_hunks: [
+          "+apache-arrow-apt-source (#{@release_version}-1) " +
+          "unstable; urgency=low",
+        ],
+      },
+      {
+        path: "#{base_dir}/apache-arrow-release/yum/apache-arrow-release.spec.in",
+        sampled_hunks: [
+          "+* #{today} #{user} <#{email}> - #{@release_version}-1",
+        ],
+      },
+      {
+        path: "#{base_dir}/apache-arrow/debian/changelog",
+        sampled_hunks: [
+          "+apache-arrow (#{@release_version}-1) unstable; urgency=low",
+        ],
+      },
+      {
+        path: "#{base_dir}/apache-arrow/yum/arrow.spec.in",
+        sampled_hunks: [
+          "+* #{today} #{user} <#{email}> - #{@release_version}-1",
+        ],
+      },
+    ]
+    assert_equal(expected_changes, sampled_changes)
+  end
+
+  def test_version_pre_tag
+    omit_on_release_branch
+    prepare("VERSION_PRE_TAG")
+    assert_equal([
+                   {
+                     path: "c_glib/meson.build",
+                     hunks: [
+                       ["-version = '#{@snapshot_version}'",
+                        "+version = '#{@release_version}'"],
+                     ],
+                   },
+                   {
+                     path: "ci/scripts/PKGBUILD",
+                     hunks: [
+                       ["-pkgver=#{@previous_version}.9000",
+                        "+pkgver=#{@release_version}"],
+                     ],
+                   },
+                   {
+                     path: "cpp/CMakeLists.txt",
+                     hunks: [
+                       ["-set(ARROW_VERSION \"#{@snapshot_version}\")",
+                        "+set(ARROW_VERSION \"#{@release_version}\")"],
+                     ],
+                   },
+                   {
+                     path: "cpp/vcpkg.json",
+                     hunks: [
+                       ["-  \"version-string\": \"#{@snapshot_version}\",",
+                        "+  \"version-string\": \"#{@release_version}\","],
+                     ],
+                   },
+                   {
+                     path: "csharp/Directory.Build.props",
+                     hunks: [
+                       ["-    <Version>#{@snapshot_version}</Version>",
+                        "+    <Version>#{@release_version}</Version>"],
+                     ],
+                   },
+                   {
+                     path: "dev/tasks/homebrew-formulae/apache-arrow.rb",
+                     hunks: [
+                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"",
+                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""],
+                     ],
+                   },
+                   {
+                     path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb",
+                     hunks: [
+                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"",
+                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""],
+                     ],
+                   },
+                   {
+                     path: "java/adapter/avro/pom.xml",
+                     hunks: [
+                       ["-    <version>#{@snapshot_version}</version>",
+                        "+    <version>#{@release_version}</version>"],
+                     ],
+                   },
+                   {
+                     hunks: [
+                       ["-        <version>#{@snapshot_version}</version>",
+                        "+        <version>#{@release_version}</version>"],
+                     ],
+                     path: "java/adapter/jdbc/pom.xml",
+                   },
+                   {
+                     hunks: [
+                       ["-        <version>#{@snapshot_version}</version>",
+                        "+        <version>#{@release_version}</version>"],
+                     ],
+                     path: "java/adapter/orc/pom.xml",
+                   },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/algorithm/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/compression/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"]],
+                     path: "java/dataset/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/flight/flight-core/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/flight/flight-grpc/pom.xml" },
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@release_version}</version>"]],
+                     path: "java/format/pom.xml" },
+                   { hunks: [["-      <version>#{@snapshot_version}</version>",
+                              "+      <version>#{@release_version}</version>"]],
+                     path: "java/gandiva/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/memory-core/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/memory-netty/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/memory-unsafe/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"],
+                             ["-            <version>#{@snapshot_version}</version>",
+                              "+            <version>#{@release_version}</version>"]],
+                     path: "java/performance/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"]],
+                     path: "java/plasma/pom.xml" },
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@release_version}</version>"]],
+                     path: "java/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"]],
+                     path: "java/tools/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/vector/pom.xml" },
+                   {
+                     path: "js/package.json",
+                     hunks: [
+                       ["-  \"version\": \"#{@snapshot_version}\"",
+                        "+  \"version\": \"#{@release_version}\""],
+                     ],
+                   },
+                   {
+                     path: "matlab/CMakeLists.txt",
+                     hunks: [
+                       ["-set(MLARROW_VERSION \"#{@snapshot_version}\")",
+                        "+set(MLARROW_VERSION \"#{@release_version}\")"],
+                     ],
+                   },
+                   {
+                     path: "python/setup.py",
+                     hunks: [
+                       ["-default_version = '#{@snapshot_version}'",
+                        "+default_version = '#{@release_version}'"],
+                     ],
+                   },
+                   {
+                     path: "r/DESCRIPTION",
+                     hunks: [
+                       ["-Version: #{@previous_version}.9000",
+                        "+Version: #{@release_version}"],
+                     ],
+                   },
+                   {
+                     path: "r/NEWS.md",
+                     hunks: [
+                       ["-\# arrow #{@previous_version}.9000",
+                        "+\# arrow #{@release_version}"],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow/lib/arrow/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-gandiva/lib/gandiva/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-parquet/lib/parquet/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-plasma/lib/plasma/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
+                 ],
+                 parse_patch(git("log", "-n", "1", "-p")))
+  end
+
+  def test_version_post_tag
+    omit_on_release_branch
+    bump_versions("VERSION_POST_TAG")
+    assert_equal([
+                   {
+                     path: "c_glib/meson.build",
+                     hunks: [
+                       ["-version = '#{@snapshot_version}'",
+                        "+version = '#{@next_snapshot_version}'"],
+                     ],
+                   },
+                   {
+                     path: "ci/scripts/PKGBUILD",
+                     hunks: [
+                       ["-pkgver=#{@previous_version}.9000",
+                        "+pkgver=#{@release_version}.9000"],
+                     ],
+                   },
+                   {
+                     path: "cpp/CMakeLists.txt",
+                     hunks: [
+                       ["-set(ARROW_VERSION \"#{@snapshot_version}\")",
+                        "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"],
+                     ],
+                   },
+                   {
+                     path: "cpp/vcpkg.json",
+                     hunks: [
+                       ["-  \"version-string\": \"#{@snapshot_version}\",",
+                        "+  \"version-string\": \"#{@next_snapshot_version}\","],
+                     ],
+                   },
+                   {
+                     path: "csharp/Directory.Build.props",
+                     hunks: [
+                       ["-    <Version>#{@snapshot_version}</Version>",
+                        "+    <Version>#{@next_snapshot_version}</Version>"],
+                     ],
+                   },
+                   {
+                     path: "dev/tasks/homebrew-formulae/apache-arrow.rb",
+                     hunks: [
+                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"",
+                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""],
+                     ],
+                   },
+                   {
+                     path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb",
+                     hunks: [
+                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"",
+                        "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}.9000/apache-arrow-#{@release_version}.9000.tar.gz\""],
+                     ],
+                   },
+                   { path: "java/adapter/avro/pom.xml",
+                     hunks: [["-    <version>#{@snapshot_version}</version>",
+                             "+    <version>#{@next_snapshot_version}</version>"]] },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/adapter/jdbc/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/adapter/orc/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/algorithm/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/compression/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/dataset/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/flight/flight-core/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/flight/flight-grpc/pom.xml" },
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/format/pom.xml" },
+                   { hunks: [["-      <version>#{@snapshot_version}</version>",
+                              "+      <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/gandiva/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/memory-core/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/memory-netty/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/memory-unsafe/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"],
+                             ["-            <version>#{@snapshot_version}</version>",
+                              "+            <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/performance/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/plasma/pom.xml" },
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/tools/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/vector/pom.xml" },
+                   {
+                     path: "js/package.json",
+                     hunks: [
+                       ["-  \"version\": \"#{@snapshot_version}\"",
+                        "+  \"version\": \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                   {
+                     path: "matlab/CMakeLists.txt",
+                     hunks: [
+                       ["-set(MLARROW_VERSION \"#{@snapshot_version}\")",
+                        "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"],
+                     ],
+                   },
+                   {
+                     path: "python/setup.py",
+                     hunks: [
+                       ["-default_version = '#{@snapshot_version}'",
+                        "+default_version = '#{@next_snapshot_version}'"],
+                     ],
+                   },
+                   {
+                     path: "r/DESCRIPTION",
+                     hunks: [
+                       ["-Version: #{@previous_version}.9000",
+                        "+Version: #{@release_version}.9000"],
+                     ],
+                   },
+                   {
+                     path: "r/NEWS.md",
+                     hunks: [
+                       ["-# arrow #{@previous_version}.9000",
+                        "+# arrow #{@release_version}.9000",
+                        "+",
+                        "+# arrow #{@release_version}",],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-arrow/lib/arrow/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-gandiva/lib/gandiva/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-parquet/lib/parquet/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                   {
+                     path: "ruby/red-plasma/lib/plasma/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
+                 ],
+                 parse_patch(git("log", "-n", "1", "-p")))
+  end
+
+  def test_deb_package_names
+    bump_versions("DEB_PACKAGE_NAMES")
+    changes = parse_patch(git("log", "-n", "1", "-p"))
+    sampled_changes = changes.collect do |change|
+      first_hunk = change[:hunks][0]
+      first_removed_line = first_hunk.find { |line| line.start_with?("-") }
+      first_added_line = first_hunk.find { |line| line.start_with?("+") }
+      {
+        sampled_diff: [first_removed_line, first_added_line],
+        path: change[:path],
+      }
+    end
+    expected_changes = [
+      {
+        sampled_diff: [
+          "-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@so_version}.install",
+          "+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@next_so_version}.install",
+        ],
+        path: "dev/release/rat_exclude_files.txt",
+      },
+      {
+        sampled_diff: [
+          "-Package: libarrow#{@so_version}",
+          "+Package: libarrow#{@next_so_version}",
+        ],
+        path: "dev/tasks/linux-packages/apache-arrow/debian/control.in",
+      },
+      {
+        sampled_diff: [
+          "-      - libarrow-dataset-glib#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
+          "+      - libarrow-dataset-glib#{@next_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
+        ],
+        path: "dev/tasks/tasks.yml",
+      },
+    ]
+    assert_equal(expected_changes, sampled_changes)
+  end
+end
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
new file mode 100755
index 00000000000..051a8f646e5
--- /dev/null
+++ b/dev/release/01-prepare.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -ue
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 3 ]; then
+  echo "Usage: $0 <version> <next_version> <rc-num>"
+  exit 1
+fi
+
+. $SOURCE_DIR/utils-prepare.sh
+
+version=$1
+next_version=$2
+next_version_snapshot="${next_version}-SNAPSHOT"
+rc_number=$3
+
+release_tag="apache-arrow-${version}"
+release_branch="release-${version}"
+release_candidate_branch="release-${version}-rc${rc_number}"
+
+: ${PREPARE_DEFAULT:=1}
+: ${PREPARE_CHANGELOG:=${PREPARE_DEFAULT}}
+: ${PREPARE_LINUX_PACKAGES:=${PREPARE_DEFAULT}}
+: ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}}
+: ${PREPARE_BRANCH:=${PREPARE_DEFAULT}}
+: ${PREPARE_TAG:=${PREPARE_DEFAULT}}
+
+if [ ${PREPARE_TAG} -gt 0 ]; then
+  if [ $(git tag -l "${release_tag}") ]; then
+    echo "Delete existing git tag $release_tag"
+    git tag -d "${release_tag}"
+  fi
+fi
+
+if [ ${PREPARE_BRANCH} -gt 0 ]; then
+  if [[ $(git branch -l "${release_candidate_branch}") ]]; then
+    next_rc_number=$(($rc_number+1))
+    echo "Branch ${release_candidate_branch} already exists, so create a new release candidate:"
+    echo "1. Checkout the master branch for major releases and maint-<version> for patch releases."
+    echo "2. Execute the script again with bumped RC number."
+    echo "Commands:"
+    echo "   git checkout master"
+    echo "   dev/release/01-prepare.sh ${version} ${next_version} ${next_rc_number}"
+    exit 1
+  fi
+
+  echo "Create local branch ${release_candidate_branch} for release candidate ${rc_number}"
+  git checkout -b ${release_candidate_branch}
+fi
+
+############################## Pre-Tag Commits ##############################
+
+if [ ${PREPARE_CHANGELOG} -gt 0 ]; then
+  echo "Updating changelog for $version"
+  # Update changelog
+  archery release changelog add $version
+  git add ${SOURCE_DIR}/../../CHANGELOG.md
+  git commit -m "[Release] Update CHANGELOG.md for $version"
+fi
+
+if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then
+  echo "Updating .deb/.rpm changelogs for $version"
+  cd $SOURCE_DIR/../tasks/linux-packages
+  rake \
+    version:update \
+    ARROW_RELEASE_TIME="$(date +%Y-%m-%dT%H:%M:%S%z)" \
+    ARROW_VERSION=${version}
+  git add */debian*/changelog */yum/*.spec.in
+  git commit -m "[Release] Update .deb/.rpm changelogs for $version"
+  cd -
+fi
+
+if [ ${PREPARE_VERSION_PRE_TAG} -gt 0 ]; then
+  echo "Prepare release ${version} on tag ${release_tag} then reset to version ${next_version_snapshot}"
+
+  update_versions "${version}" "${next_version}" "release"
+  git commit -m "[Release] Update versions for ${version}"
+fi
+
+############################## Tag the Release ##############################
+
+if [ ${PREPARE_TAG} -gt 0 ]; then
+  git tag -a "${release_tag}" -m "[Release] Apache Arrow Release ${version}"
+fi
diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb
index 7d92881f282..652d4c07fa2 100644
--- a/dev/release/02-source-test.rb
+++ b/dev/release/02-source-test.rb
@@ -120,11 +120,11 @@ def test_vote
 #{@current_commit} [2]
 
 The source release rc0 is hosted at [3].
-The binary artifacts are hosted at [4][5][6][7].
-The changelog is located at [8].
+The binary artifacts are hosted at [4][5][6][7][8][9].
+The changelog is located at [10].
 
 Please download, verify checksums and signatures, run the unit tests,
-and vote on the release. See [9] for how to validate a release candidate.
+and vote on the release. See [11] for how to validate a release candidate.
 
 The vote will be open for at least 72 hours.
 
@@ -135,12 +135,14 @@ def test_vote
 [1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20#{@release_version}
 [2]: https://github.com/apache/arrow/tree/#{@current_commit}
 [3]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-#{@release_version}-rc0
-[4]: https://bintray.com/apache/arrow/centos-rc/#{@release_version}-rc0
-[5]: https://bintray.com/apache/arrow/debian-rc/#{@release_version}-rc0
-[6]: https://bintray.com/apache/arrow/python-rc/#{@release_version}-rc0
-[7]: https://bintray.com/apache/arrow/ubuntu-rc/#{@release_version}-rc0
-[8]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
-[9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
+[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/
+[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/
+[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{@release_version}-rc0
+[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0
+[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+[10]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
+[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
     VOTE
   end
 end
diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh
index 89850e7543d..7e0c2451089 100755
--- a/dev/release/02-source.sh
+++ b/dev/release/02-source.sh
@@ -136,11 +136,11 @@ This release candidate is based on commit:
 ${release_hash} [2]
 
 The source release rc${rc} is hosted at [3].
-The binary artifacts are hosted at [4][5][6][7].
-The changelog is located at [8].
+The binary artifacts are hosted at [4][5][6][7][8][9].
+The changelog is located at [10].
 
 Please download, verify checksums and signatures, run the unit tests,
-and vote on the release. See [9] for how to validate a release candidate.
+and vote on the release. See [11] for how to validate a release candidate.
 
 The vote will be open for at least 72 hours.
 
@@ -151,12 +151,14 @@ The vote will be open for at least 72 hours.
 [1]: ${jira_url}/issues/?jql=${jql}
 [2]: https://github.com/apache/arrow/tree/${release_hash}
 [3]: ${rc_url}
-[4]: https://bintray.com/apache/arrow/centos-rc/${version}-rc${rc}
-[5]: https://bintray.com/apache/arrow/debian-rc/${version}-rc${rc}
-[6]: https://bintray.com/apache/arrow/python-rc/${version}-rc${rc}
-[7]: https://bintray.com/apache/arrow/ubuntu-rc/${version}-rc${rc}
-[8]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
-[9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
+[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/
+[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/
+[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/${version}-rc${rc}
+[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc}
+[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+[10]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
+[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
 MAIL
   echo "---------------------------------------------------------"
 fi
diff --git a/dev/release/03-binary-submit.sh b/dev/release/03-binary-submit.sh
new file mode 100755
index 00000000000..ea67222258a
--- /dev/null
+++ b/dev/release/03-binary-submit.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
+  exit
+fi
+
+version=$1
+rc_number=$2
+version_with_rc="${version}-rc${rc_number}"
+crossbow_job_prefix="release-${version_with_rc}"
+release_tag="apache-arrow-${version}"
+
+: ${ARROW_REPOSITORY:="apache/arrow"}
+: ${ARROW_BRANCH:=$release_tag}
+
+# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
+# are jobs submitted with the same prefix (the integer at the end is auto
+# incremented)
+archery crossbow submit \
+    --job-prefix ${crossbow_job_prefix} \
+    --arrow-version ${version_with_rc} \
+    --arrow-remote "https://github.com/${ARROW_REPOSITORY}" \
+    --arrow-branch ${ARROW_BRANCH} \
+    --group packaging
diff --git a/dev/release/03-binary.sh b/dev/release/03-binary.sh
deleted file mode 100755
index 3b845a1bdf0..00000000000
--- a/dev/release/03-binary.sh
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -u
-set -o pipefail
-
-SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-if [ "$#" -ne 3 ]; then
-  echo "Usage: $0 <version> <rc-num> <artifact-dir>"
-  exit
-fi
-
-version=$1
-rc=$2
-artifact_dir=$3
-
-if [ -z "$artifact_dir" ]; then
-  echo "artifact_dir is empty"
-  exit 1
-fi
-
-if [ ! -e "$artifact_dir" ]; then
-  echo "$artifact_dir does not exist"
-  exit 1
-fi
-
-if [ ! -d "$artifact_dir" ]; then
-  echo "$artifact_dir is not a directory"
-  exit 1
-fi
-
-artifact_dir="$(pwd)/${artifact_dir}"
-
-cd "${SOURCE_DIR}"
-
-: ${BINTRAY_REPOSITORY_CUSTOM:=${BINTRAY_REPOSITORY:-}}
-: ${SOURCE_BINTRAY_REPOSITORY_CUSTOM:=${SOURCE_BINTRAY_REPOSITORY:-}}}
-
-if [ ! -f .env ]; then
-  echo "You must create $(pwd)/.env"
-  echo "You can use $(pwd)/.env.example as template"
-  exit 1
-fi
-. .env
-
-if [ -n "${BINTRAY_REPOSITORY_CUSTOM}" ]; then
-  BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY_CUSTOM}
-fi
-
-if [ -n "${SOURCE_BINTRAY_REPOSITORY_CUSTOM}" ]; then
-  SOURCE_BINTRAY_REPOSITORY=${SOURCE_BINTRAY_REPOSITORY_CUSTOM}
-fi
-
-. binary-common.sh
-
-# By default upload all artifacts.
-# To deactivate one category, deactivate the category and all of its dependents.
-# To explicitly select one category, set UPLOAD_DEFAULT=0 UPLOAD_X=1.
-: ${UPLOAD_DEFAULT:=1}
-: ${UPLOAD_CENTOS_RPM:=${UPLOAD_DEFAULT}}
-: ${UPLOAD_CENTOS_YUM:=${UPLOAD_DEFAULT}}
-: ${UPLOAD_DEBIAN_APT:=${UPLOAD_DEFAULT}}
-: ${UPLOAD_DEBIAN_DEB:=${UPLOAD_DEFAULT}}
-: ${UPLOAD_NUGET:=${UPLOAD_DEFAULT}}
-: ${UPLOAD_PYTHON:=${UPLOAD_DEFAULT}}
-: ${UPLOAD_UBUNTU_APT:=${UPLOAD_DEFAULT}}
-: ${UPLOAD_UBUNTU_DEB:=${UPLOAD_DEFAULT}}
-
-rake_tasks=()
-apt_targets=()
-yum_targets=()
-if [ ${UPLOAD_DEBIAN_DEB} -gt 0 ]; then
-  rake_tasks+=(deb)
-  apt_targets+=(debian)
-fi
-if [ ${UPLOAD_DEBIAN_APT} -gt 0 ]; then
-  rake_tasks+=(apt:rc)
-  apt_targets+=(debian)
-fi
-if [ ${UPLOAD_UBUNTU_DEB} -gt 0 ]; then
-  rake_tasks+=(deb)
-  apt_targets+=(ubuntu)
-fi
-if [ ${UPLOAD_UBUNTU_APT} -gt 0 ]; then
-  rake_tasks+=(apt:rc)
-  apt_targets+=(ubuntu)
-fi
-if [ ${UPLOAD_CENTOS_RPM} -gt 0 ]; then
-  rake_tasks+=(rpm)
-  yum_targets+=(centos)
-fi
-if [ ${UPLOAD_CENTOS_YUM} -gt 0 ]; then
-  rake_tasks+=(yum:rc)
-  yum_targets+=(centos)
-fi
-if [ ${UPLOAD_NUGET} -gt 0 ]; then
-  rake_tasks+=(nuget:rc)
-fi
-if [ ${UPLOAD_PYTHON} -gt 0 ]; then
-  rake_tasks+=(python:rc)
-fi
-rake_tasks+=(summary:rc)
-
-tmp_dir=binary/tmp
-mkdir -p "${tmp_dir}"
-source_artifacts_dir="${tmp_dir}/artifacts"
-rm -rf "${source_artifacts_dir}"
-cp -a "${artifact_dir}" "${source_artifacts_dir}"
-
-docker_run \
-  ./runner.sh \
-  rake \
-    "${rake_tasks[@]}" \
-    APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
-    ARTIFACTS_DIR="${tmp_dir}/artifacts" \
-    BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY} \
-    RC=${rc} \
-    SOURCE_BINTRAY_REPOSITORY=${SOURCE_BINTRAY_REPOSITORY} \
-    VERSION=${version} \
-    YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}")
diff --git a/dev/release/04-binary-download.sh b/dev/release/04-binary-download.sh
new file mode 100755
index 00000000000..64d137357f1
--- /dev/null
+++ b/dev/release/04-binary-download.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
+  exit
+fi
+
+version=$1
+rc_number=$2
+version_with_rc="${version}-rc${rc_number}"
+crossbow_job_prefix="release-${version_with_rc}"
+
+# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
+# are jobs submitted with the same prefix (the integer at the end is auto
+# incremented)
+: ${CROSSBOW_JOB_NUMBER:="0"}
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"}
+
+archery crossbow download-artifacts ${CROSSBOW_JOB_ID} --no-fetch
diff --git a/dev/release/05-binary-upload.sh b/dev/release/05-binary-upload.sh
new file mode 100755
index 00000000000..c841fa748e7
--- /dev/null
+++ b/dev/release/05-binary-upload.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
+  exit
+fi
+
+version=$1
+rc=$2
+
+version_with_rc="${version}-rc${rc}"
+crossbow_job_prefix="release-${version_with_rc}"
+crossbow_package_dir="${SOURCE_DIR}/../../packages"
+
+: ${CROSSBOW_JOB_NUMBER:="0"}
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"}
+artifact_dir="${crossbow_package_dir}/${CROSSBOW_JOB_ID}"
+
+if [ ! -e "$artifact_dir" ]; then
+  echo "$artifact_dir does not exist"
+  exit 1
+fi
+
+if [ ! -d "$artifact_dir" ]; then
+  echo "$artifact_dir is not a directory"
+  exit 1
+fi
+
+cd "${SOURCE_DIR}"
+
+if [ ! -f .env ]; then
+  echo "You must create $(pwd)/.env"
+  echo "You can use $(pwd)/.env.example as template"
+  exit 1
+fi
+. .env
+
+. utils-binary.sh
+
+# By default upload all artifacts.
+# To deactivate one category, deactivate the category and all of its dependents.
+# To explicitly select one category, set UPLOAD_DEFAULT=0 UPLOAD_X=1.
+: ${UPLOAD_DEFAULT:=1}
+: ${UPLOAD_AMAZON_LINUX_RPM:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_AMAZON_LINUX_YUM:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_CENTOS_RPM:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_CENTOS_YUM:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_DEBIAN_APT:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_DEBIAN_DEB:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_NUGET:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_PYTHON:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_UBUNTU_APT:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_UBUNTU_DEB:=${UPLOAD_DEFAULT}}
+
+rake_tasks=()
+apt_targets=()
+yum_targets=()
+if [ ${UPLOAD_AMAZON_LINUX_RPM} -gt 0 ]; then
+  rake_tasks+=(rpm)
+  yum_targets+=(amazon-linux)
+fi
+if [ ${UPLOAD_AMAZON_LINUX_YUM} -gt 0 ]; then
+  rake_tasks+=(yum:rc)
+  yum_targets+=(amazon-linux)
+fi
+if [ ${UPLOAD_CENTOS_RPM} -gt 0 ]; then
+  rake_tasks+=(rpm)
+  yum_targets+=(centos)
+fi
+if [ ${UPLOAD_CENTOS_YUM} -gt 0 ]; then
+  rake_tasks+=(yum:rc)
+  yum_targets+=(centos)
+fi
+if [ ${UPLOAD_DEBIAN_DEB} -gt 0 ]; then
+  rake_tasks+=(deb)
+  apt_targets+=(debian)
+fi
+if [ ${UPLOAD_DEBIAN_APT} -gt 0 ]; then
+  rake_tasks+=(apt:rc)
+  apt_targets+=(debian)
+fi
+if [ ${UPLOAD_NUGET} -gt 0 ]; then
+  rake_tasks+=(nuget:rc)
+fi
+if [ ${UPLOAD_PYTHON} -gt 0 ]; then
+  rake_tasks+=(python:rc)
+fi
+if [ ${UPLOAD_UBUNTU_DEB} -gt 0 ]; then
+  rake_tasks+=(deb)
+  apt_targets+=(ubuntu)
+fi
+if [ ${UPLOAD_UBUNTU_APT} -gt 0 ]; then
+  rake_tasks+=(apt:rc)
+  apt_targets+=(ubuntu)
+fi
+rake_tasks+=(summary:rc)
+
+tmp_dir=binary/tmp
+mkdir -p "${tmp_dir}"
+source_artifacts_dir="${tmp_dir}/artifacts"
+rm -rf "${source_artifacts_dir}"
+cp -a "${artifact_dir}" "${source_artifacts_dir}"
+
+docker_run \
+  ./runner.sh \
+  rake \
+    "${rake_tasks[@]}" \
+    APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
+    ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \
+    ARTIFACTS_DIR="${tmp_dir}/artifacts" \
+    RC=${rc} \
+    VERSION=${version} \
+    YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}")
diff --git a/dev/release/binary-common.sh b/dev/release/binary-common.sh
deleted file mode 100644
index 7c66e375f8d..00000000000
--- a/dev/release/binary-common.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-docker_image_name=apache-arrow/release-binary
-gpg_agent_extra_socket="$(gpgconf --list-dirs agent-extra-socket)"
-if [ $(uname) = "Darwin" ]; then
-  docker_uid=10000
-  docker_gid=10000
-else
-  docker_uid=$(id -u)
-  docker_gid=$(id -g)
-fi
-docker_ssh_key="${SOURCE_DIR}/binary/id_rsa"
-
-if [ ! -f "${docker_ssh_key}" ]; then
-  ssh-keygen -N "" -f "${docker_ssh_key}"
-fi
-
-docker_gpg_ssh() {
-  local ssh_port=$1
-  shift
-  local known_hosts_file=$(mktemp -t "arrow-binary-gpg-ssh-known-hosts.XXXXX")
-  local exit_code=
-  if ssh \
-      -o StrictHostKeyChecking=no \
-      -o UserKnownHostsFile=${known_hosts_file} \
-      -i "${docker_ssh_key}" \
-      -p ${ssh_port} \
-      -R "/home/arrow/.gnupg/S.gpg-agent:${gpg_agent_extra_socket}" \
-      arrow@127.0.0.1 \
-      "$@"; then
-    exit_code=$?;
-  else
-    exit_code=$?;
-  fi
-  rm -f ${known_hosts_file}
-  return ${exit_code}
-}
-
-docker_run() {
-  local container_id_dir=$(mktemp -d -t "arrow-binary-gpg-container.XXXXX")
-  local container_id_file=${container_id_dir}/id
-  docker \
-    run \
-    --cidfile ${container_id_file} \
-    --detach \
-    --publish-all \
-    --rm \
-    --volume "$PWD":/host \
-    ${docker_image_name} \
-    bash -c "
-if [ \$(id -u) -ne ${docker_uid} ]; then
-  usermod --uid ${docker_uid} arrow
-  chown -R arrow: ~arrow
-fi
-/usr/sbin/sshd -D
-"
-  local container_id=$(cat ${container_id_file})
-  local ssh_port=$(docker port ${container_id} | grep -E -o '[0-9]+$')
-  # Wait for sshd available
-  while ! docker_gpg_ssh ${ssh_port} : > /dev/null 2>&1; do
-    sleep 0.1
-  done
-  gpg --export ${GPG_KEY_ID} | docker_gpg_ssh ${ssh_port} gpg --import
-  docker_gpg_ssh ${ssh_port} "$@"
-  docker kill ${container_id}
-  rm -rf ${container_id_dir}
-}
-
-docker build -t ${docker_image_name} "${SOURCE_DIR}/binary"
-
-chmod go-rwx "${docker_ssh_key}"
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index babb8012f67..8e3327cdeb5 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -57,8 +57,8 @@ def join
     private
     def choose_n_workers(use_case)
       case use_case
-      when :bintray
-        # Too many workers cause Bintray error.
+      when :artifactory
+        # Too many workers cause Artifactory error.
         6
       when :gpg
         # Too many workers cause gpg-agent error.
@@ -242,7 +242,7 @@ def guess_terminal_width_from_env
     end
   end
 
-  class BintrayClient
+  class ArtifactoryClient
     class Error < StandardError
       attr_reader :request
       attr_reader :response
@@ -253,229 +253,209 @@ def initialize(request, response, message)
       end
     end
 
-    def initialize(options={})
-      @options = options
-      repository = @options[:repository]
-      @subject, @repository = repository.split("/", 2) if repository
-      @package = @options[:package]
-      @version = @options[:version]
-      @user = @options[:user]
-      @api_key = @options[:api_key]
+    def initialize(prefix, api_key)
+      @prefix = prefix
+      @api_key = api_key
+      @http = nil
+      restart
+    end
+
+    def restart
+      close
+      @http = start_http(build_url(""))
     end
 
-    def request(method, headers, *components, &block)
-      url = build_request_url(*components)
+    private def start_http(url, &block)
       http = Net::HTTP.new(url.host, url.port)
       http.set_debug_output($stderr) if ENV["DEBUG"]
       http.use_ssl = true
-      http.start do |http|
-        request = build_request(method, url, headers, &block)
-        http.request(request) do |response|
-          case response
-          when Net::HTTPSuccess
-            return JSON.parse(response.body)
-          else
-            message = "failed to request: "
-            message << "#{url}: #{request.method}: "
-            message << "#{response.message} #{response.code}:\n"
-            message << response.body
-            raise Error.new(request, response, message)
-          end
-        end
+      if block_given?
+        http.start(&block)
+      else
+        http
       end
     end
 
-    def repository
-      request(:get,
-              {},
-              "repos",
-              @subject,
-              @repository)
+    def close
+      return if @http.nil?
+      @http.finish if @http.started?
+      @http = nil
     end
 
-    def create_repository
-      request(:post,
-              {},
-              "repos",
-              @subject,
-              @repository) do
-        request = {
-          "name" => @repository,
-          "desc" => "Apache Arrow",
-        }
-        JSON.generate(request)
-      end
-    end
-
-    def ensure_repository
-      begin
-        repository
-      rescue Error => error
-        case error.response
-        when Net::HTTPNotFound
-          create_repository
+    def request(method, headers, path, body: nil, &block)
+      url = build_url(path)
+      request = build_request(method, url, headers, body: body)
+      if ENV["DRY_RUN"]
+        case request
+        when Net::HTTP::Get, Net::HTTP::Head
         else
-          raise
+          p [method, url]
+          return
         end
       end
+      request_internal(@http, request, &block)
     end
 
-    def package
-      request(:get,
-              {},
-              "packages",
-              @subject,
-              @repository,
-              @package)
-    end
-
-    def package_versions
-      begin
-        package["versions"]
-      rescue Error => error
-        case error.response
-        when Net::HTTPNotFound
-          []
+    private def request_internal(http, request, &block)
+      http.request(request) do |response|
+        case response
+        when Net::HTTPSuccess,
+             Net::HTTPNotModified
+          if block_given?
+            return yield(response)
+          else
+            response.read_body
+            return response
+          end
+        when Net::HTTPRedirection
+          redirected_url = URI(response["Location"])
+          redirected_request = Net::HTTP::Get.new(redirected_url, {})
+          start_http(redirected_url) do |redirected_http|
+            request_internal(redirected_http, redirected_request, &block)
+          end
         else
-          raise
+          message = "failed to request: "
+          message << "#{request.uri}: #{request.method}: "
+          message << "#{response.message} #{response.code}:\n"
+          message << response.body
+          raise Error.new(request, response, message)
         end
       end
     end
 
-    def create_package(description)
-      request(:post,
-              {},
-              "packages",
-              @subject,
-              @repository) do
-        request = {
-          "name" => @package,
-          "desc" => description,
-          "licenses" => ["Apache-2.0"],
-          "vcs_url" => "https://github.com/apache/arrow.git",
-          "website_url" => "https://arrow.apache.org/",
-          "issue_tracker_url" => "https://issues.apache.org/jira/browse/ARROW",
-          "github_repo" => "apache/arrow",
-          "public_download_numbers" => true,
-          "public_stats" => true,
-        }
-        JSON.generate(request)
+    def files
+      _files = []
+      directories = [""]
+      until directories.empty?
+        directory = directories.shift
+        list(directory).each do |path|
+          resolved_path = "#{directory}#{path}"
+          case path
+          when "../"
+          when /\/\z/
+            directories << resolved_path
+          else
+            _files << resolved_path
+          end
+        end
       end
+      _files
     end
 
-    def ensure_package(description)
-      begin
-        package
-      rescue Error => error
-        case error.response
-        when Net::HTTPNotFound
-          create_package(description)
-        else
-          raise
+    def list(path)
+      with_retry(3, build_url(path)) do
+        begin
+          request(:get, {}, path) do |response|
+            response.body.scan(/<a href="(.+?)"/).flatten
+          end
+        rescue Error => error
+          case error.response
+          when Net::HTTPNotFound
+            return []
+          else
+            raise
+          end
         end
       end
     end
 
-    def create_version(description)
-      request(:post,
-              {},
-              "packages",
-              @subject,
-              @repository,
-              @package,
-              "versions") do
-        request = {
-          "name" => @version,
-          "desc" => description,
-        }
-        JSON.generate(request)
+    def head(path)
+      with_retry(3, build_url(path)) do
+        request(:head, {}, path)
       end
     end
 
-    def ensure_version(version, description)
-      return if package["versions"].include?(version)
-      create_version(description)
-    end
-
-    def files
-      request(:get,
-              {},
-              "packages",
-              @subject,
-              @repository,
-              @package,
-              "versions",
-              @version,
-              "files")
+    def upload(path, destination_path)
+      with_retry(3, build_url(destination_path)) do
+        sha1 = Digest::SHA1.file(path).hexdigest
+        sha256 = Digest::SHA256.file(path).hexdigest
+        headers = {
+          "X-Artifactory-Last-Modified" => File.mtime(path).rfc2822,
+          "X-Checksum-Deploy" => "false",
+          "X-Checksum-Sha1" => sha1,
+          "X-Checksum-Sha256" => sha256,
+          "Content-Length" => File.size(path).to_s,
+          "Content-Type" => "application/octet-stream",
+        }
+        File.open(path, "rb") do |input|
+          request(:put, headers, destination_path, body: input)
+        end
+      end
     end
 
-    def upload(path, destination_path)
-      sha256 = Digest::SHA256.file(path).hexdigest
-      headers = {
-        "X-Bintray-Override" => "1",
-        "X-Bintray-Package" => @package,
-        "X-Bintray-Publish" => "1",
-        "X-Bintray-Version" => @version,
-        "X-Checksum-Sha2" => sha256,
-        "Content-Length" => File.size(path).to_s,
-      }
-      File.open(path, "rb") do |input|
-        request(:put,
-                headers,
-                "content",
-                @subject,
-                @repository,
-                destination_path) do
-          input
+    def download(path, output_path)
+      with_retry(5, build_url(path)) do
+        begin
+          begin
+            headers = {}
+            if File.exist?(output_path)
+              headers["If-Modified-Since"] = File.mtime(output_path).rfc2822
+            end
+            request(:get, headers, path) do |response|
+              case response
+              when Net::HTTPNotModified
+              else
+                File.open(output_path, "wb") do |output|
+                  response.read_body do |chunk|
+                    output.write(chunk)
+                  end
+                end
+                last_modified = response["Last-Modified"]
+                if last_modified
+                  FileUtils.touch(output_path,
+                                  mtime: Time.rfc2822(last_modified))
+                end
+              end
+            end
+          rescue Error => error
+            case error.response
+            when Net::HTTPNotFound
+              $stderr.puts(error.message)
+              return
+            else
+              raise
+            end
+          end
         end
+      rescue
+        FileUtils.rm_f(output_path)
+        raise
       end
     end
 
     def delete(path)
-      request(:delete,
-              {},
-              "content",
-              @subject,
-              @repository,
-              path)
+      with_retry(3, build_url(path)) do
+        request(:delete, {}, path)
+      end
     end
 
     private
-    def build_request_url(*components)
-      if components.last.is_a?(Hash)
-        parameters = components.pop
-      else
-        parameters = nil
-      end
-      path = components.join("/")
-      url = "https://bintray.com/api/v1/#{path}"
-      if parameters
-        separator = "?"
-        parameters.each do |key, value|
-          url << "#{separator}#{CGI.escape(key)}=#{CGI.escape(value)}"
-          separator = "&"
-        end
-      end
-      URI(url)
+    def build_url(path)
+      URI("https://apache.jfrog.io/artifactory/arrow/#{@prefix}/#{path}")
     end
 
-    def build_request(method, url, headers, &block)
+    def build_request(method, url, headers, body: nil)
+      need_auth = false
       case method
+      when :head
+        request = Net::HTTP::Head.new(url, headers)
       when :get
         request = Net::HTTP::Get.new(url, headers)
       when :post
+        need_auth = true
         request = Net::HTTP::Post.new(url, headers)
       when :put
+        need_auth = true
         request = Net::HTTP::Put.new(url, headers)
       when :delete
+        need_auth = true
         request = Net::HTTP::Delete.new(url, headers)
       else
         raise "unsupported HTTP method: #{method.inspect}"
       end
-      request.basic_auth(@user, @api_key) if @user and @api_key
-      if block_given?
-        request["Content-Type"] = "application/json"
-        body = yield
+      request["Connection"] = "Keep-Alive"
+      request["X-JFrog-Art-Api"] = @api_key if need_auth
+      if body
         if body.is_a?(String)
           request.body = body
         else
@@ -484,209 +464,112 @@ def build_request(method, url, headers, &block)
       end
       request
     end
-  end
-
-  module HashChekable
-    def same_hash?(path, sha256)
-      return false unless File.exist?(path)
-      Digest::SHA256.file(path).hexdigest == sha256
-    end
-  end
-
-  class BintrayDownloader
-    include HashChekable
-
-    def initialize(repository:,
-                   distribution:,
-                   version:,
-                   rc: nil,
-                   destination:,
-                   user:,
-                   api_key:)
-      @repository = repository
-      @distribution = distribution
-      @version = version
-      @rc = rc
-      @destination = destination
-      @user = user
-      @api_key = api_key
-    end
-
-    def download
-      client.ensure_repository
-
-      progress_label = "Downloading: #{package} #{full_version}"
-      progress_reporter = ProgressReporter.new(progress_label)
-      pool = ThreadPool.new(:bintray) do |path, output_path|
-        download_file(path, output_path)
-        progress_reporter.advance
-      end
-      target_files.each do |file|
-        path = file["path"]
-        path_without_package = path.split("/", 2)[1..-1].join("/")
-        output_path = "#{@destination}/#{path_without_package}"
-        yield(output_path)
-        sha256 = file["sha256"]
-        next if same_hash?(output_path, sha256)
-        output_dir = File.dirname(output_path)
-        FileUtils.mkdir_p(output_dir)
-        progress_reporter.increment_max
-        pool << [path, output_path]
-      end
-      pool.join
-      progress_reporter.finish
-    end
-
-    private
-    def package
-      if @rc
-        "#{@distribution}-rc"
-      else
-        @distribution
-      end
-    end
-
-    def full_version
-      if @rc
-        "#{@version}-rc#{@rc}"
-      else
-        @version
-      end
-    end
-
-    def client(options={})
-      default_options = {
-        repository: @repository,
-        package: package,
-        version: full_version,
-        user: @user,
-        api_key: @api_key,
-      }
-      BintrayClient.new(default_options.merge(options))
-    end
 
-    def target_files
-      begin
-        client.files
-      rescue BintrayClient::Error
-        []
-      end
-    end
-
-    def download_file(path, output_path)
-      max_n_retries = 5
+    def with_retry(max_n_retries, target)
       n_retries = 0
-      url = URI("https://dl.bintray.com/#{@repository}/#{path}")
       begin
-        download_url(url, output_path)
-      rescue OpenSSL::OpenSSLError,
+        yield
+      rescue Net::OpenTimeout,
+             OpenSSL::OpenSSLError,
              SocketError,
              SystemCallError,
              Timeout::Error => error
         n_retries += 1
         if n_retries <= max_n_retries
           $stderr.puts
-          $stderr.puts("Retry #{n_retries}: #{url}: " +
+          $stderr.puts("Retry #{n_retries}: #{target}: " +
                        "#{error.class}: #{error.message}")
+          restart
           retry
         else
           raise
         end
       end
     end
+  end
 
-    def download_url(url, output_path)
-      loop do
-        http = Net::HTTP.new(url.host, url.port)
-        http.set_debug_output($stderr) if ENV["DEBUG"]
-        http.use_ssl = true
-        http.start do |http|
-          request = Net::HTTP::Get.new(url)
-          http.request(request) do |response|
-            case response
-            when Net::HTTPSuccess
-              save_response(response, output_path)
-              return
-            when Net::HTTPRedirection
-              url = URI(response["Location"])
-            when Net::HTTPNotFound
-              $stderr.puts(build_download_error_message(url, response))
-              return
-            else
-              raise build_download_error_message(url, response)
-            end
-          end
+  class ArtifactoryClientPool
+    class << self
+      def open(prefix, api_key)
+        pool = new(prefix, api_key)
+        begin
+          yield(pool)
+        ensure
+          pool.close
         end
       end
     end
 
-    def save_response(response, output_path)
-      File.open(output_path, "wb") do |output|
-        response.read_body do |chunk|
-          output.print(chunk)
+    def initialize(prefix, api_key)
+      @prefix = prefix
+      @api_key = api_key
+      @mutex = Thread::Mutex.new
+      @clients = []
+    end
+
+    def pull
+      client = @mutex.synchronize do
+        if @clients.empty?
+          ArtifactoryClient.new(@prefix, @api_key)
+        else
+          @clients.pop
         end
       end
-      last_modified = response["Last-Modified"]
-      if last_modified
-        FileUtils.touch(output_path, mtime: Time.rfc2822(last_modified))
+      begin
+        yield(client)
+      ensure
+        release(client)
+      end
+    end
+
+    def release(client)
+      @mutex.synchronize do
+        @clients << client
       end
     end
 
-    def build_download_error_message(url, response)
-      message = "failed to download: "
-      message << "#{url}: #{response.message} #{response.code}:\n"
-      message << response.body
-      message
+    def close
+      @clients.each(&:close)
     end
   end
 
-  class BintrayUploader
-    include HashChekable
-
-    def initialize(repository:,
-                   distribution:,
-                   distribution_label:,
-                   version:,
+  class ArtifactoryDownloader
+    def initialize(distribution:,
                    rc: nil,
-                   source:,
-                   destination_prefix: "",
-                   user:,
+                   prefix: "",
+                   destination:,
                    api_key:)
-      @repository = repository
       @distribution = distribution
-      @distribution_label = distribution_label
-      @version = version
       @rc = rc
-      @source = source
-      @destination_prefix = destination_prefix
-      @user = user
+      @prefix = prefix
+      @destination = destination
       @api_key = api_key
     end
 
-    def upload
-      client.ensure_repository
-      client.ensure_package(package_description)
-      client.ensure_version(full_version, version_description)
-
-      progress_label = "Uploading: #{package} #{full_version}"
+    def download
+      progress_label = "Downloading: #{package}"
       progress_reporter = ProgressReporter.new(progress_label)
-      pool = ThreadPool.new(:bintray) do |path, relative_path|
-        upload_file(path, relative_path)
-        progress_reporter.advance
-      end
-
-      files = existing_files
-      source = Pathname(@source)
-      source.glob("**/*") do |path|
-        next if path.directory?
-        destination_path =
-          "#{package}/#{@destination_prefix}#{path.relative_path_from(source)}"
-        file = files[destination_path]
-        next if file and same_hash?(path.to_s, file["sha256"])
-        progress_reporter.increment_max
-        pool << [path, destination_path]
+      prefix = "#{package}/#{@prefix}"
+      ArtifactoryClientPool.open(prefix, @api_key) do |client_pool|
+        thread_pool = ThreadPool.new(:artifactory) do |path, output_path|
+          client_pool.pull do |client|
+            client.download(path, output_path)
+          end
+          progress_reporter.advance
+        end
+        files = client_pool.pull do |client|
+          client.files
+        end
+        files.each do |path|
+          output_path = "#{@destination}/#{path}"
+          yield(output_path)
+          output_dir = File.dirname(output_path)
+          FileUtils.mkdir_p(output_dir)
+          progress_reporter.increment_max
+          thread_pool << [path, output_path]
+        end
+        thread_pool.join
       end
-      pool.join
       progress_reporter.finish
     end
 
@@ -698,88 +581,70 @@ def package
         @distribution
       end
     end
+  end
 
-    def full_version
-      if @rc
-        "#{@version}-rc#{@rc}"
-      else
-        @version
-      end
-    end
-
-    def package_description
-      if @rc
-        release_type = "RC"
-      else
-        release_type = "Release"
-      end
-      case @distribution
-      when "debian", "ubuntu"
-        "#{release_type} deb packages for #{@distribution_label}"
-      when "centos"
-        "#{release_type} RPM packages for #{@distribution_label}"
-      else
-        "#{release_type} binaries for #{@distribution_label}"
-      end
+  class ArtifactoryUploader
+    def initialize(distribution:,
+                   rc: nil,
+                   source:,
+                   destination_prefix: "",
+                   sync: false,
+                   api_key:)
+      @distribution = distribution
+      @rc = rc
+      @source = source
+      @destination_prefix = destination_prefix
+      @sync = sync
+      @api_key = api_key
     end
 
-    def version_description
-      if @rc
-        "Apache Arrow #{@version} RC#{@rc} for #{@distribution_label}"
-      else
-        "Apache Arrow #{@version} for #{@distribution_label}"
-      end
-    end
+    def upload
+      progress_label = "Uploading: #{package}"
+      progress_reporter = ProgressReporter.new(progress_label)
+      prefix = "#{package}/#{@destination_prefix}"
+      ArtifactoryClientPool.open(prefix, @api_key) do |client_pool|
+        if @sync
+          existing_files = client_pool.pull do |client|
+            client.files
+          end
+        else
+          existing_files = []
+        end
 
-    def client
-      BintrayClient.new(repository: @repository,
-                        package: package,
-                        version: full_version,
-                        user: @user,
-                        api_key: @api_key)
-    end
+        thread_pool = ThreadPool.new(:artifactory) do |path, relative_path|
+          client_pool.pull do |client|
+            client.upload(path, relative_path)
+          end
+          progress_reporter.advance
+        end
 
-    def existing_files
-      files = {}
-      client.files.each do |file|
-        files[file["path"]] = file
-      end
-      files
-    end
+        source = Pathname(@source)
+        source.glob("**/*") do |path|
+          next if path.directory?
+          destination_path = path.relative_path_from(source)
+          progress_reporter.increment_max
+          existing_files.delete(destination_path.to_s)
+          thread_pool << [path, destination_path]
+        end
+        thread_pool.join
 
-    def upload_file(path, destination_path)
-      max_n_retries = 3
-      n_retries = 0
-      begin
-        begin
-          client.upload(path, destination_path)
-        rescue BintrayClient::Error => error
-          case error.response
-          when Net::HTTPConflict
-            n_retries += 1
-            if n_retries <= max_n_retries
-              client.delete(destination_path)
-              retry
-            else
-              $stderr.puts(error)
+        if @sync
+          existing_files.each do |file|
+            client_pool.pull do |client|
+              client.delete(file)
             end
-          else
-            $stderr.puts(error)
           end
         end
-      rescue OpenSSL::OpenSSLError,
-             SocketError,
-             SystemCallError,
-             Timeout::Error => error
-        n_retries += 1
-        if n_retries <= max_n_retries
-          $stderr.puts
-          $stderr.puts("Retry #{n_retries}: #{path}: " +
-                       "#{error.class}: #{error.message}")
-          retry
-        else
-          raise
-        end
+      end
+      progress_reporter.finish
+    end
+
+    private
+    def package
+      if @rc
+        "#{@distribution}-rc"
+      else
+        @distribution
       end
     end
   end
@@ -824,22 +689,8 @@ def rpm_gpg_key_package_name(id)
     "gpg-pubkey-#{shorten_gpg_key_id(id).downcase}"
   end
 
-  def bintray_user
-    env_value("BINTRAY_USER")
-  end
-
-  def bintray_api_key
-    env_value("BINTRAY_API_KEY")
-  end
-
-  def bintray_repository
-    env_value("BINTRAY_REPOSITORY")
-  end
-
-  def source_bintray_repository
-    env_value("SOURCE_BINTRAY_REPOSITORY") do
-      bintray_repository
-    end
+  def artifactory_api_key
+    env_value("ARTIFACTORY_API_KEY")
   end
 
   def artifacts_dir
@@ -930,36 +781,27 @@ def sign_dir(label, dir)
 
   def download_distribution(distribution,
                             destination,
-                            with_source_repository: false)
+                            with_source_repository: false,
+                            prefix: "")
     existing_paths = {}
     Pathname(destination).glob("**/*") do |path|
       next if path.directory?
       existing_paths[path.to_s] = true
     end
     if with_source_repository
-      source_client = BintrayClient.new(repository: source_bintray_repository,
-                                        package: distribution,
-                                        user: bintray_user,
-                                        api_key: bintray_api_key)
-      source_client.package_versions[0, 10].each do |source_version|
-        downloader = BintrayDownloader.new(repository: source_bintray_repository,
-                                           distribution: distribution,
-                                           version: source_version,
-                                           destination: destination,
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
-        downloader.download do |output_path|
-          existing_paths.delete(output_path)
-        end
+      downloader = ArtifactoryDownloader.new(distribution: distribution,
+                                             prefix: prefix,
+                                             destination: destination,
+                                             api_key: artifactory_api_key)
+      downloader.download do |output_path|
+        existing_paths.delete(output_path)
       end
     end
-    downloader = BintrayDownloader.new(repository: bintray_repository,
-                                       distribution: distribution,
-                                       version: version,
-                                       rc: rc,
-                                       destination: destination,
-                                       user: bintray_user,
-                                       api_key: bintray_api_key)
+    downloader = ArtifactoryDownloader.new(distribution: distribution,
+                                           rc: rc,
+                                           prefix: prefix,
+                                           destination: destination,
+                                           api_key: artifactory_api_key)
     downloader.download do |output_path|
       existing_paths.delete(output_path)
     end
@@ -1026,20 +868,10 @@ def available_apt_targets
       ["ubuntu", "bionic", "main"],
       ["ubuntu", "focal", "main"],
       ["ubuntu", "groovy", "main"],
+      ["ubuntu", "hirsute", "main"],
     ]
   end
 
-  def apt_distribution_label(distribution)
-    case distribution
-    when "debian"
-      "Debian"
-    when "ubuntu"
-      "Ubuntu"
-    else
-      distribution
-    end
-  end
-
   def apt_targets
     env_apt_targets = (ENV["APT_TARGETS"] || "").split(",")
     if env_apt_targets.empty?
@@ -1047,7 +879,11 @@ def apt_targets
     else
       available_apt_targets.select do |distribution, code_name, component|
         env_apt_targets.any? do |env_apt_target|
-          env_apt_target.start_with?("#{distribution}-#{code_name}")
+          if env_apt_target.include?("-")
+            env_apt_target.start_with?("#{distribution}-#{code_name}")
+          else
+            env_apt_target == distribution
+          end
         end
       end
     end
@@ -1078,8 +914,8 @@ def define_deb_tasks
           Dir.glob("#{source_dir_prefix}*/**/*") do |path|
             next if File.directory?(path)
             base_name = File.basename(path)
-            if base_name.start_with?("apache-arrow-archive-keyring")
-              package_name = "apache-arrow-archive-keyring"
+            if base_name.start_with?("apache-arrow-apt-source")
+              package_name = "apache-arrow-apt-source"
             else
               package_name = "apache-arrow"
             end
@@ -1100,13 +936,13 @@ def define_deb_tasks
                           destination_path,
                           progress_reporter)
             case base_name
-            when /\A[^_]+-archive-keyring_.*\.deb\z/
-              latest_archive_keyring_package_path = [
+            when /\A[^_]+-apt-source_.*\.deb\z/
+              latest_apt_source_package_path = [
                 distribution_dir,
                 "#{package_name}-latest-#{code_name}.deb"
               ].join("/")
               copy_artifact(path,
-                            latest_archive_keyring_package_path,
+                            latest_apt_source_package_path,
                             progress_reporter)
             end
           end
@@ -1144,15 +980,10 @@ def define_deb_tasks
       task :upload do
         apt_distributions.each do |distribution|
           distribution_dir = "#{deb_dir}/#{distribution}"
-          distribution_label = apt_distribution_label(distribution)
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: distribution,
-                                         distribution_label: distribution_label,
-                                         version: version,
-                                         rc: rc,
-                                         source: distribution_dir,
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: distribution,
+                                             rc: rc,
+                                             source: distribution_dir,
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1316,7 +1147,7 @@ def define_apt_rc_tasks
 
         desc "Update RC APT repositories"
         task :update do
-          apt_update(apt_rc_repositiries_dir)
+          apt_update(apt_rc_repositories_dir)
           apt_targets.each do |distribution, code_name, component|
             base_dir = "#{apt_rc_repositories_dir}/#{distribution}"
             dists_dir = "#{base_dir}/dists/#{code_name}"
@@ -1330,16 +1161,11 @@ def define_apt_rc_tasks
         task :upload => apt_rc_repositories_dir do
           apt_distributions.each do |distribution|
             dists_dir = "#{apt_rc_repositories_dir}/#{distribution}/dists"
-            distribution_label = apt_distribution_label(distribution)
-            uploader = BintrayUploader.new(repository: bintray_repository,
-                                           distribution: distribution,
-                                           distribution_label: distribution_label,
-                                           version: version,
-                                           rc: rc,
-                                           source: dists_dir,
-                                           destination_prefix: "dists/",
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
+            uploader = ArtifactoryUploader.new(distribution: distribution,
+                                               rc: rc,
+                                               source: dists_dir,
+                                               destination_prefix: "dists/",
+                                               api_key: artifactory_api_key)
             uploader.upload
           end
         end
@@ -1372,14 +1198,9 @@ def define_apt_release_tasks
         task :upload => apt_release_repositories_dir do
           apt_distributions.each do |distribution|
             distribution_dir = "#{apt_release_repositories_dir}/#{distribution}"
-            distribution_label = apt_distribution_label(distribution)
-            uploader = BintrayUploader.new(repository: bintray_repository,
-                                           distribution: distribution,
-                                           distribution_label: distribution_label,
-                                           version: version,
-                                           source: distribution_dir,
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
+            uploader = ArtifactoryUploader.new(distribution: distribution,
+                                               source: distribution_dir,
+                                               api_key: artifactory_api_key)
             uploader.upload
           end
         end
@@ -1414,20 +1235,12 @@ def yum_release_repositories_dir
 
   def available_yum_targets
     [
+      ["amazon-linux", "2"],
       ["centos", "7"],
       ["centos", "8"],
     ]
   end
 
-  def yum_distribution_label(distribution)
-    case distribution
-    when "centos"
-      "CentOS"
-    else
-      distribution
-    end
-  end
-
   def yum_targets
     env_yum_targets = (ENV["YUM_TARGETS"] || "").split(",")
     if env_yum_targets.empty?
@@ -1435,7 +1248,11 @@ def yum_targets
     else
       available_yum_targets.select do |distribution, distribution_version|
         env_yum_targets.any? do |env_yum_target|
-          env_yum_target.start_with?("#{distribution}-#{distribution_version}")
+          if /\d/.match?(env_yum_target)
+            env_yum_target.start_with?("#{distribution}-#{distribution_version}")
+          else
+            env_yum_target == distribution
+          end
         end
       end
     end
@@ -1593,15 +1410,10 @@ def define_rpm_tasks
       task :upload do
         yum_distributions.each do |distribution|
           distribution_dir = "#{rpm_dir}/#{distribution}"
-          distribution_label = yum_distribution_label(distribution)
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: distribution,
-                                         distribution_label: distribution_label,
-                                         version: version,
-                                         rc: rc,
-                                         source: distribution_dir,
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: distribution,
+                                             rc: rc,
+                                             source: distribution_dir,
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1682,7 +1494,6 @@ def define_yum_rc_tasks
         desc "Upload RC Yum repositories"
         task :upload => yum_rc_repositories_dir do
           yum_targets.each do |distribution, distribution_version|
-            distribution_label = yum_distribution_label(distribution)
             base_dir = [
               yum_rc_repositories_dir,
               distribution,
@@ -1695,15 +1506,12 @@ def define_yum_rc_tasks
                 repodata_dir.relative_path_from(base_dir).to_s
               ].join("/")
               uploader =
-                BintrayUploader.new(repository: bintray_repository,
-                                    distribution: distribution,
-                                    distribution_label: distribution_label,
-                                    version: version,
-                                    rc: rc,
-                                    source: repodata_dir.to_s,
-                                    destination_prefix: "#{relative_dir}/",
-                                    user: bintray_user,
-                                    api_key: bintray_api_key)
+                ArtifactoryUploader.new(distribution: distribution,
+                                        rc: rc,
+                                        source: repodata_dir.to_s,
+                                        destination_prefix: relative_dir,
+                                        sync: true,
+                                        api_key: artifactory_api_key)
               uploader.upload
             end
           end
@@ -1737,14 +1545,9 @@ def define_yum_release_tasks
         task :upload => yum_release_repositories_dir do
           yum_distributions.each do |distribution|
             distribution_dir = "#{yum_release_repositories_dir}/#{distribution}"
-            distribution_label = yum_distribution_label(distribution)
-            uploader = BintrayUploader.new(repository: bintray_repository,
-                                           distribution: distribution,
-                                           distribution_label: distribution_label,
-                                           version: version,
-                                           source: distribution_dir,
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
+            uploader = ArtifactoryUploader.new(distribution: distribution,
+                                               source: distribution_dir,
+                                               api_key: artifactory_api_key)
             uploader.upload
           end
         end
@@ -1797,15 +1600,11 @@ def define_generic_data_rc_tasks(label,
 
         desc "Upload #{label} packages"
         task :upload do
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: id.to_s,
-                                         distribution_label: label,
-                                         version: version,
-                                         rc: rc,
-                                         source: rc_dir,
-                                         destination_prefix: "#{full_version}/",
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: id.to_s,
+                                             rc: rc,
+                                             source: rc_dir,
+                                             destination_prefix: "#{full_version}/",
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1827,20 +1626,17 @@ def define_generic_data_release_tasks(label, id, release_dir)
       namespace :release do
         desc "Download RC #{label} packages"
         task :download => release_dir do
-          download_distribution(id.to_s, release_dir)
+          download_distribution(id.to_s,
+                                release_dir,
+                                prefix: "#{full_version}")
         end
 
         desc "Upload release #{label} packages"
         task :upload => release_dir do
-          packages_dir = "#{release_dir}/#{full_version}"
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: id.to_s,
-                                         distribution_label: label,
-                                         version: version,
-                                         source: packages_dir,
-                                         destination_prefix: "#{version}/",
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: id.to_s,
+                                             source: release_dir,
+                                             destination_prefix: "#{version}",
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1885,11 +1681,12 @@ def define_summary_tasks
       task :rc do
         puts(<<-SUMMARY)
 Success! The release candidate binaries are available here:
-  https://bintray.com/#{bintray_repository}/debian-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/ubuntu-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/centos-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/python-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/nuget-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
+  https://apache.jfrog.io/artifactory/arrow/centos-rc/
+  https://apache.jfrog.io/artifactory/arrow/debian-rc/
+  https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/python-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
         SUMMARY
       end
 
@@ -1897,11 +1694,12 @@ def define_summary_tasks
       task :release do
         puts(<<-SUMMARY)
 Success! The release binaries are available here:
-  https://bintray.com/#{bintray_repository}/debian/#{version}
-  https://bintray.com/#{bintray_repository}/ubuntu/#{version}
-  https://bintray.com/#{bintray_repository}/centos/#{version}
-  https://bintray.com/#{bintray_repository}/python/#{version}
-  https://bintray.com/#{bintray_repository}/nuget/#{version}
+  https://apache.jfrog.io/arrow/amazon-linux/
+  https://apache.jfrog.io/arrow/centos/
+  https://apache.jfrog.io/arrow/debian/
+  https://apache.jfrog.io/arrow/nuget/#{version}
+  https://apache.jfrog.io/arrow/python/#{version}
+  https://apache.jfrog.io/arrow/ubuntu/
         SUMMARY
       end
     end
diff --git a/dev/release/download_rc_binaries.py b/dev/release/download_rc_binaries.py
index 5ed8ece7783..3e3d0f7d3a4 100755
--- a/dev/release/download_rc_binaries.py
+++ b/dev/release/download_rc_binaries.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -15,36 +14,44 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
 import re
 
 import argparse
 import concurrent.futures as cf
 import functools
-import hashlib
-import json
 import os
 import subprocess
 import urllib.request
 
 
-BINTRAY_API_ROOT = "https://bintray.com/api/v1"
-BINTRAY_DL_ROOT = "https://dl.bintray.com"
-BINTRAY_REPO = os.getenv('BINTRAY_REPOSITORY', 'apache/arrow')
+ARTIFACTORY_ROOT = "https://apache.jfrog.io/artifactory/arrow"
 DEFAULT_PARALLEL_DOWNLOADS = 8
 
 
-class Bintray:
-
-    def __init__(self, repo=BINTRAY_REPO):
-        self.repo = repo
-
-    def get_file_list(self, package, version):
-        url = os.path.join(BINTRAY_API_ROOT, 'packages', self.repo, package,
-                           'versions', version, 'files')
-        request = urllib.request.urlopen(url).read()
-        return json.loads(request)
+class Artifactory:
+
+    def get_file_list(self, prefix):
+        def traverse(directory, files, directories):
+            url = f'{ARTIFACTORY_ROOT}/{directory}'
+            response = urllib.request.urlopen(url).read().decode()
+            paths = re.findall('<a href="(.+?)"', response)
+            for path in paths:
+                if path == '../':
+                    continue
+                resolved_path = f'{directory}{path}'
+                if path.endswith('/'):
+                    directories.append(resolved_path)
+                else:
+                    files.append(resolved_path)
+        files = []
+        if not prefix.endswith('/'):
+            prefix += '/'
+        directories = [prefix]
+        while len(directories) > 0:
+            directory = directories.pop()
+            traverse(directory, files, directories)
+        return files
 
     def download_files(self, files, dest=None, num_parallel=None,
                        re_match=None):
@@ -69,7 +76,7 @@ def download_files(self, files, dest=None, num_parallel=None,
 
         if re_match is not None:
             regex = re.compile(re_match)
-            files = [x for x in files if regex.match(x['path'])]
+            files = [x for x in files if regex.match(x)]
 
         if num_parallel == 1:
             for path in files:
@@ -81,40 +88,28 @@ def download_files(self, files, dest=None, num_parallel=None,
                 num_parallel
             )
 
-    def _download_file(self, dest, info):
-        relpath = info['path']
-
-        base, filename = os.path.split(relpath)
+    def _download_file(self, dest, path):
+        base, filename = os.path.split(path)
 
         dest_dir = os.path.join(dest, base)
         os.makedirs(dest_dir, exist_ok=True)
 
         dest_path = os.path.join(dest_dir, filename)
 
-        if os.path.exists(dest_path):
-            with open(dest_path, 'rb') as f:
-                sha256sum = hashlib.sha256(f.read()).hexdigest()
-            if sha256sum == info['sha256']:
-                print('Local file {} sha256 matches, skipping'
-                      .format(dest_path))
-                return
-            else:
-                print('Local file sha256 does not match, overwriting')
-
-        print("Downloading {} to {}".format(relpath, dest_path))
+        print("Downloading {} to {}".format(path, dest_path))
 
-        bintray_abspath = os.path.join(BINTRAY_DL_ROOT, self.repo, relpath)
+        url = f'{ARTIFACTORY_ROOT}/{path}'
 
         cmd = [
             'curl', '--fail', '--location', '--retry', '5',
-            '--output', dest_path, bintray_abspath
+            '--output', dest_path, url
         ]
         proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
         stdout, stderr = proc.communicate()
         if proc.returncode != 0:
             raise Exception("Downloading {} failed\nstdout: {}\nstderr: {}"
-                            .format(relpath, stdout, stderr))
+                            .format(path, stdout, stderr))
 
 
 def parallel_map_terminate_early(f, iterable, num_parallel):
@@ -131,12 +126,16 @@ def parallel_map_terminate_early(f, iterable, num_parallel):
                 raise e
 
 
-ARROW_PACKAGE_TYPES = ['centos', 'debian', 'nuget', 'python', 'ubuntu']
+ARROW_REPOSITORY_PACKAGE_TYPES = ['centos', 'debian', 'ubuntu']
+ARROW_STANDALONE_PACKAGE_TYPES = ['nuget', 'python']
+ARROW_PACKAGE_TYPES = \
+    ARROW_REPOSITORY_PACKAGE_TYPES + \
+    ARROW_STANDALONE_PACKAGE_TYPES
 
 
 def download_rc_binaries(version, rc_number, re_match=None, dest=None,
                          num_parallel=None, target_package_type=None):
-    bintray = Bintray()
+    artifactory = Artifactory()
 
     version_string = '{}-rc{}'.format(version, rc_number)
     if target_package_type:
@@ -144,10 +143,22 @@ def download_rc_binaries(version, rc_number, re_match=None, dest=None,
     else:
         package_types = ARROW_PACKAGE_TYPES
     for package_type in package_types:
-        files = bintray.get_file_list('{}-rc'.format(package_type),
-                                      version_string)
-        bintray.download_files(files, re_match=re_match, dest=dest,
-                               num_parallel=num_parallel)
+        if package_type in ARROW_REPOSITORY_PACKAGE_TYPES:
+            prefix = f'{package_type}-rc'
+        else:
+            prefix = f'{package_type}-rc/{version_string}'
+        files = artifactory.get_file_list(prefix)
+        if package_type in ARROW_REPOSITORY_PACKAGE_TYPES:
+            version_pattern = re.compile(r'\d+\.\d+\.\d+')
+
+            def is_old_release(path):
+                match = version_pattern.search(path)
+                if not match:
+                    return False
+                return match[0] != version
+            files = [x for x in files if not is_old_release(x)]
+        artifactory.download_files(files, re_match=re_match, dest=dest,
+                                   num_parallel=num_parallel)
 
 
 if __name__ == '__main__':
diff --git a/dev/release/generate_force_push_script.py b/dev/release/generate_force_push_script.py
deleted file mode 100755
index b6cd760bc60..00000000000
--- a/dev/release/generate_force_push_script.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/python
-##############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-##############################################################################
-
-# This script generates a series of shell commands
-# to rebase all open pull requests off of master
-# and force push the updates.
-
-from http.client import HTTPSConnection
-import json
-from collections import defaultdict
-
-client = HTTPSConnection('api.github.com')
-client.request('GET',
-               '/repos/apache/arrow/pulls?state=open&per_page=100',
-               headers={'User-Agent': 'ApacheArrowRebaser'})
-response = client.getresponse()
-json_content = response.read()
-if response.status != 200:
-    error_msg = 'GitHub connection error:{}'.format(json_content)
-    raise Exception(error_msg)
-
-parsed_content = json.loads(json_content)
-if len(parsed_content) == 100:
-    print("# WARNING: Only the most recent 100 PRs will be processed")
-
-repos = defaultdict(list)
-for pr in parsed_content:
-    head = pr['head']
-    repos[head['repo']['full_name']].append(head['label'])
-
-for repo, labels in repos.items():
-    print('git clone git@github.com:{}.git'.format(repo))
-    print('cd arrow')
-    print('git remote add upstream https://github.com/apache/arrow.git')
-    print('git fetch --all --prune --tags --force')
-    for label in labels:
-        # Labels are in the form 'user:branch'
-        owner, branch = label.split(':')
-        print('git checkout {}'.format(branch))
-        print('(git rebase upstream/master && git push --force) || ' +
-              '(echo "Rebase failed for {}" && '.format(label) +
-              'git rebase --abort)')
-    print('cd ..')
-    print('rm -rf arrow')
diff --git a/dev/release/post-00-rebase.sh b/dev/release/post-00-rebase.sh
deleted file mode 100755
index c80ce24c2e3..00000000000
--- a/dev/release/post-00-rebase.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-set -e
-set -u
-
-if [ "$#" -ne 1 ]; then
-  echo "Usage: $0 <local-release-branch>"
-  exit
-fi
-
-local_release_branch=$1
-
-echo "Fetch the latest commits"
-git fetch --all --prune
-echo "Checkout the master branch"
-git checkout master
-echo "Apply the latest commits on the master branch"
-git rebase apache/master
-echo "Apply the unpushed commits on the local release branch"
-git rebase ${local_release_branch}
-echo "Push the rebased branch to master"
-git push --force apache master
-
-echo "Success! The rebased commits are available here:"
-echo "  https://github.com/apache/arrow/commits/master"
diff --git a/dev/release/post-01-upload.sh b/dev/release/post-01-upload.sh
index 4f8053de8c9..56977e79f53 100755
--- a/dev/release/post-01-upload.sh
+++ b/dev/release/post-01-upload.sh
@@ -52,7 +52,7 @@ svn add ${tmp_dir}/release/${release_version}
 echo "Keep only the three most recent versions"
 old_releases=$(
   svn ls ${tmp_dir}/release/ | \
-  grep '^arrow-' | \
+  grep -E '^arrow-[0-9\.]+' | \
   sort --version-sort --reverse | \
   tail -n +4
 )
diff --git a/dev/release/post-02-binary.sh b/dev/release/post-02-binary.sh
index 9f531afad9f..ef09ecf50b3 100755
--- a/dev/release/post-02-binary.sh
+++ b/dev/release/post-02-binary.sh
@@ -32,8 +32,6 @@ rc=$2
 
 cd "${SOURCE_DIR}"
 
-: ${BINTRAY_REPOSITORY_CUSTOM:=${BINTRAY_REPOSITORY:-}}
-
 if [ ! -f .env ]; then
   echo "You must create $(pwd)/.env"
   echo "You can use $(pwd)/.env.example as template"
@@ -41,16 +39,13 @@ if [ ! -f .env ]; then
 fi
 . .env
 
-if [ -n "${BINTRAY_REPOSITORY_CUSTOM}" ]; then
-  BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY_CUSTOM}
-fi
-
-. binary-common.sh
+. utils-binary.sh
 
 # By default deploy all artifacts.
 # To deactivate one category, deactivate the category and all of its dependents.
 # To explicitly select one category, set DEPLOY_DEFAULT=0 DEPLOY_X=1.
 : ${DEPLOY_DEFAULT:=1}
+: ${DEPLOY_AMAZON_LINUX:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_CENTOS:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_DEBIAN:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_NUGET:=${DEPLOY_DEFAULT}}
@@ -60,24 +55,28 @@ fi
 rake_tasks=()
 apt_targets=()
 yum_targets=()
-if [ ${DEPLOY_DEBIAN} -gt 0 ]; then
-  rake_tasks+=(apt:release)
-  apt_targets+=(debian)
-fi
-if [ ${DEPLOY_UBUNTU} -gt 0 ]; then
-  rake_tasks+=(apt:release)
-  apt_targets+=(ubuntu)
+if [ ${DEPLOY_AMAZON_LINUX} -gt 0 ]; then
+  rake_tasks+=(yum:release)
+  yum_targets+=(amazon-linux)
 fi
 if [ ${DEPLOY_CENTOS} -gt 0 ]; then
   rake_tasks+=(yum:release)
   yum_targets+=(centos)
 fi
+if [ ${DEPLOY_DEBIAN} -gt 0 ]; then
+  rake_tasks+=(apt:release)
+  apt_targets+=(debian)
+fi
 if [ ${DEPLOY_NUGET} -gt 0 ]; then
   rake_tasks+=(nuget:release)
 fi
 if [ ${DEPLOY_PYTHON} -gt 0 ]; then
   rake_tasks+=(python:release)
 fi
+if [ ${DEPLOY_UBUNTU} -gt 0 ]; then
+  rake_tasks+=(apt:release)
+  apt_targets+=(ubuntu)
+fi
 rake_tasks+=(summary:release)
 
 tmp_dir=binary/tmp
@@ -86,10 +85,11 @@ mkdir -p "${tmp_dir}"
 docker_run \
   ./runner.sh \
   rake \
+    --trace \
     "${rake_tasks[@]}" \
     APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
+    ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \
     ARTIFACTS_DIR="${tmp_dir}/artifacts" \
-    BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY} \
     RC=${rc} \
     VERSION=${version} \
     YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}")
diff --git a/dev/release/post-03-website.sh b/dev/release/post-03-website.sh
index b427142ea98..8cabf364b46 100755
--- a/dev/release/post-03-website.sh
+++ b/dev/release/post-03-website.sh
@@ -145,10 +145,10 @@ archery release changelog generate ${version} | \
 
 cat <<ANNOUNCE >> "${announce_file}"
 [1]: https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/
-[2]: https://bintray.com/apache/arrow/centos/${version}/
-[3]: https://bintray.com/apache/arrow/debian/${version}/
-[4]: https://bintray.com/apache/arrow/python/${version}/
-[5]: https://bintray.com/apache/arrow/ubuntu/${version}/
+[2]: https://apache.jfrog.io/artifactory/arrow/centos/
+[3]: https://apache.jfrog.io/artifactory/arrow/debian/
+[4]: https://apache.jfrog.io/artifactory/arrow/python/${version}/
+[5]: https://apache.jfrog.io/artifactory/arrow/ubuntu/
 [6]: https://github.com/apache/arrow/releases/tag/apache-arrow-${version}
 ANNOUNCE
 git add "${announce_file}"
diff --git a/dev/release/post-06-csharp.sh b/dev/release/post-06-csharp.sh
index e9572025ab5..2e816e46e49 100755
--- a/dev/release/post-06-csharp.sh
+++ b/dev/release/post-06-csharp.sh
@@ -47,12 +47,13 @@ for base_name in ${base_names[@]}; do
       --fail \
       --location \
       --remote-name \
-      https://apache.bintray.com/arrow/nuget/${version}/${path}
+      https://apache.jfrog.io/artifactory/arrow/nuget/${version}/${path}
   done
   dotnet nuget push \
     ${base_name}.nupkg \
     -k ${NUGET_API_KEY} \
     -s https://api.nuget.org/v3/index.json
+  rm -f ${base_name}.{nupkg,snupkg}
 done
 
 echo "Success! The released NuGet package is available here:"
diff --git a/dev/release/post-07-rust.sh b/dev/release/post-07-rust.sh
deleted file mode 100755
index 3c94607565f..00000000000
--- a/dev/release/post-07-rust.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-set -e
-set -o pipefail
-
-SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-if [ "$#" -ne 1 ]; then
-  echo "Usage: $0 <version>"
-  exit
-fi
-
-version=$1
-
-: ${INSTALL_RUST:=no}
-
-if [ "${INSTALL_RUST}" == "yes" ]; then
-  export RUSTUP_HOME="$(pwd)/release-rustup"
-  export CARGO_HOME="${RUSTUP_HOME}"
-  rm -rf "${RUSTUP_HOME}"
-  curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path
-  export PATH="${RUSTUP_HOME}/bin:$PATH"
-  source "${RUSTUP_HOME}/env"
-  rustup default stable
-  cargo login
-fi
-
-archive_name=apache-arrow-${version}
-tar_gz=${archive_name}.tar.gz
-rm -f ${tar_gz}
-curl \
-  --remote-name \
-  --fail \
-  https://downloads.apache.org/arrow/arrow-${version}/${tar_gz}
-rm -rf ${archive_name}
-tar xf ${tar_gz}
-modules=()
-for cargo_toml in ${archive_name}/rust/*/Cargo.toml; do
-  module_dir=$(dirname ${cargo_toml})
-  pushd ${module_dir}
-  cargo publish --allow-dirty
-  modules+=($(basename ${module_dir}))
-  popd
-done
-popd
-rm -rf ${archive_name}
-rm -f ${tar_gz}
-
-if [ "${INSTALL_RUST}" == "yes" ]; then
-  rm -rf "${RUSTUP_HOME}"
-fi
-
-echo "Success! The released packages are available here:"
-for module in ${modules[@]}; do
-  echo "  https://crates.io/crates/${module}/${version}"
-done
diff --git a/dev/release/post-09-docs.sh b/dev/release/post-09-docs.sh
index 51e74541eb7..8751b22887f 100755
--- a/dev/release/post-09-docs.sh
+++ b/dev/release/post-09-docs.sh
@@ -43,7 +43,7 @@ popd
 pushd "${ARROW_DIR}"
 git checkout "${release_tag}"
 
-archery docker run \
+UBUNTU=20.10 archery docker run \
   -v "${ARROW_SITE_DIR}/docs:/build/docs" \
   -e ARROW_DOCS_VERSION="${version}" \
   ubuntu-docs
diff --git a/dev/release/post-10-python.sh b/dev/release/post-10-python.sh
index 0f7a480cde6..9e30bd4b4e2 100755
--- a/dev/release/post-10-python.sh
+++ b/dev/release/post-10-python.sh
@@ -17,10 +17,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 set -o pipefail
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+: ${TEST_PYPI:=0}
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <version> <rc-num>"
@@ -36,8 +37,15 @@ ${PYTHON:-python} \
   ${version} \
   ${rc} \
   --dest="${tmp}" \
-  --package_type=python
-twine upload ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz}
+  --package_type=python \
+  --regex=".*\.(whl|tar\.gz)$"
+
+if [ ${TEST_PYPI} -gt 0 ]; then
+  TWINE_ARGS="--repository-url https://test.pypi.org/legacy/"
+fi
+
+twine upload ${TWINE_ARGS} ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz}
+
 rm -rf "${tmp}"
 
 echo "Success! The released PyPI packages are available here:"
diff --git a/dev/release/post-11-java.sh b/dev/release/post-11-java.sh
new file mode 100755
index 00000000000..86e8d54a0a8
--- /dev/null
+++ b/dev/release/post-11-java.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -o pipefail
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 1 ]; then
+  echo "Usage: $0 <version>"
+  exit
+fi
+
+version=$1
+archive_name=apache-arrow-${version}
+tar_gz=${archive_name}.tar.gz
+
+rm -f ${tar_gz}
+curl \
+  --remote-name \
+  --fail \
+  https://downloads.apache.org/arrow/arrow-${version}/${tar_gz}
+rm -rf ${archive_name}
+tar xf ${tar_gz}
+
+pushd ${archive_name}
+
+# clone the testing data to the appropiate directories
+git clone https://github.com/apache/arrow-testing.git testing
+git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing
+
+# build the jni bindings similarly like the 01-perform.sh does
+mkdir -p cpp/java-build
+pushd cpp/java-build
+cmake \
+  -DARROW_DATASET=ON \
+  -DARROW_FILESYSTEM=ON \
+  -DARROW_GANDIVA_JAVA=ON \
+  -DARROW_GANDIVA=ON \
+  -DARROW_JNI=ON \
+  -DARROW_ORC=ON \
+  -DARROW_PARQUET=ON \
+  -DCMAKE_BUILD_TYPE=release \
+  -G Ninja \
+  ..
+ninja
+popd
+
+# go in the java subfolder
+pushd java
+# stage the artifacts using both the apache-release and arrow-jni profiles
+# Note: on ORC checkstyle failure use -Dcheckstyle.skip=true until https://issues.apache.org/jira/browse/ARROW-12552 gets resolved
+mvn -Papache-release,arrow-jni -Darrow.cpp.build.dir=$(realpath ../cpp/java-build/release) deploy
+popd
+
+popd
+
+echo "Success! The maven artifacts have been stated. Proceed with the following steps:"
+echo "1. Login to the apache repository: https://repository.apache.org/#stagingRepositories"
+echo "2. Select the arrow staging repository you just just created: orgapachearrow-100x"
+echo "3. Click the \"close\" button"
+echo "4. Once validation has passed, click the \"release\" button"
+echo ""
+echo "Note, that you must set up Maven to be able to publish to Apache's repositories."
+echo "Read more at https://www.apache.org/dev/publishing-maven-artifacts.html."
diff --git a/dev/release/post-12-bump-versions.sh b/dev/release/post-12-bump-versions.sh
new file mode 100755
index 00000000000..8f3bf4f98d3
--- /dev/null
+++ b/dev/release/post-12-bump-versions.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -ue
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <next_version>"
+  exit 1
+fi
+
+: ${BUMP_DEFAULT:=1}
+: ${BUMP_VERSION_POST_TAG:=${BUMP_DEFAULT}}
+: ${BUMP_DEB_PACKAGE_NAMES:=${BUMP_DEFAULT}}
+
+. $SOURCE_DIR/utils-prepare.sh
+
+version=$1
+next_version=$2
+next_version_snapshot="${next_version}-SNAPSHOT"
+
+if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then
+  echo "Updating versions for ${next_version_snapshot}"
+  update_versions "${version}" "${next_version}" "snapshot"
+  git commit -m "[Release] Update versions for ${next_version_snapshot}"
+fi
+
+if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ]; then
+  echo "Updating .deb package names for ${next_version}"
+  so_version() {
+    local version=$1
+    local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/')
+    local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
+    expr ${major_version} \* 100 + ${minor_version}
+  }
+  deb_lib_suffix=$(so_version $version)
+  next_deb_lib_suffix=$(so_version $next_version)
+  if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then
+    cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow
+    for target in debian*/lib*${deb_lib_suffix}.install; do
+      git mv \
+	${target} \
+	$(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/")
+    done
+    deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g"
+    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control*
+    rm -f debian*/control*.bak
+    git add debian*/control*
+    cd -
+    cd $SOURCE_DIR/../tasks/
+    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml
+    rm -f tasks.yml.bak
+    git add tasks.yml
+    cd -
+    cd $SOURCE_DIR
+    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt
+    rm -f rat_exclude_files.txt.bak
+    git add rat_exclude_files.txt
+    git commit -m "[Release] Update .deb package names for $next_version"
+    cd -
+  fi
+fi
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 716a1b98ccc..e2aa6285ea5 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -50,16 +50,17 @@ dev/archery/archery/tests/fixtures/*
 dev/archery/archery/crossbow/tests/fixtures/*
 dev/release/rat_exclude_files.txt
 dev/tasks/homebrew-formulae/apache-arrow.rb
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/compat
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/source/format
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
 dev/tasks/linux-packages/apache-arrow/debian/compat
 dev/tasks/linux-packages/apache-arrow/debian/control.in
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-flight-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install
@@ -68,46 +69,51 @@ dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-python400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libgandiva400.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libparquet400.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libplasma400.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
 dev/tasks/linux-packages/apache-arrow/debian/patches/series
 dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
 dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -126,10 +132,14 @@ go/arrow/type_string.go
 go/*.tmpldata
 go/*.s
 go/parquet/go.sum
+go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
+go/parquet/internal/gen-go/parquet/parquet-consts.go
+go/parquet/internal/gen-go/parquet/parquet.go
 js/.npmignore
 js/closure-compiler-scripts/*
 js/src/fb/*.ts
 js/yarn.lock
+js/.eslintignore
 python/cmake_modules
 python/cmake_modules/FindPythonLibsNew.cmake
 python/cmake_modules/SnappyCMakeLists.txt
@@ -165,6 +175,7 @@ csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
 *.sgml
 *.css
 *.png
+*.ico
 *.svg
 *.devhelp2
 *.scss
@@ -185,9 +196,6 @@ r/inst/include/cpp11.hpp
 r/inst/include/cpp11/*.hpp
 .gitattributes
 ruby/red-arrow/.yardopts
-rust/arrow/test/data/*.csv
-rust/rust-toolchain
-rust/arrow-flight/src/arrow.flight.protocol.rs
 julia/Arrow/Project.toml
 julia/Arrow/README.md
 julia/Arrow/docs/Manifest.toml
@@ -197,5 +205,3 @@ julia/Arrow/docs/mkdocs.yml
 julia/Arrow/docs/src/index.md
 julia/Arrow/docs/src/manual.md
 julia/Arrow/docs/src/reference.md
-rust/ballista/rust/benchmarks/tpch/queries/q*.sql
-rust/ballista/rust/scheduler/testdata/*
diff --git a/dev/release/utils-binary.sh b/dev/release/utils-binary.sh
new file mode 100644
index 00000000000..31ebcd8e9bb
--- /dev/null
+++ b/dev/release/utils-binary.sh
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker_image_name=apache-arrow/release-binary
+gpg_agent_extra_socket="$(gpgconf --list-dirs agent-extra-socket)"
+if [ $(uname) = "Darwin" ]; then
+  docker_uid=10000
+  docker_gid=10000
+else
+  docker_uid=$(id -u)
+  docker_gid=$(id -g)
+fi
+docker_ssh_key="${SOURCE_DIR}/binary/id_rsa"
+
+if [ ! -f "${docker_ssh_key}" ]; then
+  ssh-keygen -N "" -f "${docker_ssh_key}"
+fi
+
+docker_gpg_ssh() {
+  local ssh_port=$1
+  shift
+  local known_hosts_file=$(mktemp -t "arrow-binary-gpg-ssh-known-hosts.XXXXX")
+  local exit_code=
+  if ssh \
+      -o StrictHostKeyChecking=no \
+      -o UserKnownHostsFile=${known_hosts_file} \
+      -i "${docker_ssh_key}" \
+      -p ${ssh_port} \
+      -R "/home/arrow/.gnupg/S.gpg-agent:${gpg_agent_extra_socket}" \
+      arrow@127.0.0.1 \
+      "$@"; then
+    exit_code=$?;
+  else
+    exit_code=$?;
+  fi
+  rm -f ${known_hosts_file}
+  return ${exit_code}
+}
+
+docker_run() {
+  local container_id_dir=$(mktemp -d -t "arrow-binary-gpg-container.XXXXX")
+  local container_id_file=${container_id_dir}/id
+  docker \
+    run \
+    --cidfile ${container_id_file} \
+    --detach \
+    --publish-all \
+    --rm \
+    --volume "$PWD":/host \
+    ${docker_image_name} \
+    bash -c "
+if [ \$(id -u) -ne ${docker_uid} ]; then
+  usermod --uid ${docker_uid} arrow
+  chown -R arrow: ~arrow
+fi
+/usr/sbin/sshd -D
+"
+  local container_id=$(cat ${container_id_file})
+  local ssh_port=$(docker port ${container_id} | grep -E -o '[0-9]+$' | head -n 1)
+  # Wait for sshd available
+  while ! docker_gpg_ssh ${ssh_port} : > /dev/null 2>&1; do
+    sleep 0.1
+  done
+  gpg --export ${GPG_KEY_ID} | docker_gpg_ssh ${ssh_port} gpg --import
+  docker_gpg_ssh ${ssh_port} "$@"
+  docker kill ${container_id}
+  rm -rf ${container_id_dir}
+}
+
+docker build -t ${docker_image_name} "${SOURCE_DIR}/binary"
+
+chmod go-rwx "${docker_ssh_key}"
diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh
new file mode 100644
index 00000000000..7ba786a754c
--- /dev/null
+++ b/dev/release/utils-prepare.sh
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARROW_DIR="${SOURCE_DIR}/../.."
+
+update_versions() {
+  local base_version=$1
+  local next_version=$2
+  local type=$3
+
+  case ${type} in
+    release)
+      local version=${base_version}
+      local r_version=${base_version}
+      ;;
+    snapshot)
+      local version=${next_version}-SNAPSHOT
+      local r_version=${base_version}.9000
+      ;;
+  esac
+
+  pushd "${ARROW_DIR}/c_glib"
+  sed -i.bak -E -e \
+    "s/^version = '.+'/version = '${version}'/" \
+    meson.build
+  rm -f meson.build.bak
+  git add meson.build
+  popd
+
+  pushd "${ARROW_DIR}/ci/scripts"
+  sed -i.bak -E -e \
+    "s/^pkgver=.+/pkgver=${r_version}/" \
+    PKGBUILD
+  rm -f PKGBUILD.bak
+  git add PKGBUILD
+  popd
+
+  pushd "${ARROW_DIR}/cpp"
+  sed -i.bak -E -e \
+    "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \
+    CMakeLists.txt
+  rm -f CMakeLists.txt.bak
+  git add CMakeLists.txt
+
+  sed -i.bak -E -e \
+    "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \
+    vcpkg.json
+  rm -f vcpkg.json.bak
+  git add vcpkg.json
+  popd
+
+  pushd "${ARROW_DIR}/java"
+  mvn versions:set -DnewVersion=${version}
+  find . -type f -name pom.xml.versionsBackup -delete
+  git add "pom.xml"
+  git add "**/pom.xml"
+  popd
+
+  pushd "${ARROW_DIR}/csharp"
+  sed -i.bak -E -e \
+    "s/^    <Version>.+<\/Version>/    <Version>${version}<\/Version>/" \
+    Directory.Build.props
+  rm -f Directory.Build.props.bak
+  git add Directory.Build.props
+  popd
+
+  pushd "${ARROW_DIR}/dev/tasks/homebrew-formulae"
+  sed -i.bak -E -e \
+    "s/arrow-[0-9.]+[0-9]+/arrow-${r_version}/g" \
+    autobrew/apache-arrow.rb
+  rm -f autobrew/apache-arrow.rb.bak
+  git add autobrew/apache-arrow.rb
+  sed -i.bak -E -e \
+    "s/arrow-[0-9.\-]+[0-9SNAPHOT]+/arrow-${version}/g" \
+    apache-arrow.rb
+  rm -f apache-arrow.rb.bak
+  git add apache-arrow.rb
+  popd
+
+  pushd "${ARROW_DIR}/js"
+  sed -i.bak -E -e \
+    "s/^  \"version\": \".+\"/  \"version\": \"${version}\"/" \
+    package.json
+  rm -f package.json.bak
+  git add package.json
+  popd
+
+  pushd "${ARROW_DIR}/matlab"
+  sed -i.bak -E -e \
+    "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \
+    CMakeLists.txt
+  rm -f CMakeLists.txt.bak
+  git add CMakeLists.txt
+  popd
+
+  pushd "${ARROW_DIR}/python"
+  sed -i.bak -E -e \
+    "s/^default_version = '.+'/default_version = '${version}'/" \
+    setup.py
+  rm -f setup.py.bak
+  git add setup.py
+  popd
+
+  pushd "${ARROW_DIR}/r"
+  sed -i.bak -E -e \
+    "s/^Version: .+/Version: ${r_version}/" \
+    DESCRIPTION
+  rm -f DESCRIPTION.bak
+  git add DESCRIPTION
+  # Replace dev version with release version
+  sed -i.bak -E -e \
+    "0,/^# arrow /s/^# arrow .+/# arrow ${base_version}/" \
+    NEWS.md
+  if [ ${type} = "snapshot" ]; then
+    # Add a news entry for the new dev version
+    sed -i.bak -E -e \
+      "0,/^# arrow /s/^(# arrow .+)/# arrow ${r_version}\n\n\1/" \
+      NEWS.md
+  fi
+  rm -f NEWS.md.bak
+  git add NEWS.md
+  popd
+
+  pushd "${ARROW_DIR}/ruby"
+  sed -i.bak -E -e \
+    "s/^  VERSION = \".+\"/  VERSION = \"${version}\"/g" \
+    */*/*/version.rb
+  rm -f */*/*/version.rb.bak
+  git add */*/*/version.rb
+  popd
+}
diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh
index 57f44fa212b..2656dd25813 100755
--- a/dev/release/verify-apt.sh
+++ b/dev/release/verify-apt.sh
@@ -21,20 +21,16 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
-  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
-  echo "       $0 VERSION release BINTRAY_REPOSITORY"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
-  echo " e.g.: $0 0.13.0 rc kou/arrow # Verify 0.13.0 RC at https://bintray.com/kou/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
 
 VERSION="$1"
 TYPE="$2"
-BINTRAY_REPOSITORY="${3:-apache/arrow}"
 
 local_prefix="/arrow/dev/tasks/linux-packages"
 
@@ -47,9 +43,9 @@ apt install -y -V \
 
 code_name="$(lsb_release --codename --short)"
 distribution="$(lsb_release --id --short | tr 'A-Z' 'a-z')"
-bintray_base_url="https://dl.bintray.com/${BINTRAY_REPOSITORY}/${distribution}"
+artifactory_base_url="https://apache.jfrog.io/artifactory/arrow/${distribution}"
 if [ "${TYPE}" = "rc" ]; then
-  bintray_base_url="${bintray_base_url}-rc"
+  artifactory_base_url+="-rc"
 fi
 
 have_flight=yes
@@ -80,18 +76,18 @@ if [ "${TYPE}" = "local" ]; then
       ;;
   esac
   package_version+="-1"
-  keyring_archive_path="${local_prefix}/apt/repositories"
-  keyring_archive_path+="/${distribution}/pool/${code_name}/main"
-  keyring_archive_path+="/a/apache-arrow-archive-keyring"
-  keyring_archive_path+="/apache-arrow-archive-keyring_${package_version}_all.deb"
-  apt install -y -V "${keyring_archive_path}"
+  apt_source_path="${local_prefix}/apt/repositories"
+  apt_source_path+="/${distribution}/pool/${code_name}/main"
+  apt_source_path+="/a/apache-arrow-apt-source"
+  apt_source_path+="/apache-arrow-apt-source_${package_version}_all.deb"
+  apt install -y -V "${apt_source_path}"
 else
   package_version="${VERSION}-1"
-  keyring_archive_base_name="apache-arrow-archive-keyring-latest-${code_name}.deb"
+  apt_source_base_name="apache-arrow-apt-source-latest-${code_name}.deb"
   curl \
-    --output "${keyring_archive_base_name}" \
-    "${bintray_base_url}/${keyring_archive_base_name}"
-  apt install -y -V "./${keyring_archive_base_name}"
+    --output "${apt_source_base_name}" \
+    "${artifactory_base_url}/${apt_source_base_name}"
+  apt install -y -V "./${apt_source_base_name}"
 fi
 
 if [ "${TYPE}" = "local" ]; then
@@ -103,21 +99,14 @@ if [ "${TYPE}" = "local" ]; then
   if [ -f "${keys}" ]; then
     gpg \
       --no-default-keyring \
-      --keyring /usr/share/keyrings/apache-arrow-archive-keyring.gpg \
+      --keyring /usr/share/keyrings/apache-arrow-apt-source.gpg \
       --import "${keys}"
   fi
 else
-  if [ "${BINTRAY_REPOSITORY}" = "apache/arrow" ]; then
-    if [ "${TYPE}" = "rc" ]; then
-      sed \
-        -i"" \
-        -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
-        /etc/apt/sources.list.d/apache-arrow.sources
-    fi
-  else
+  if [ "${TYPE}" = "rc" ]; then
     sed \
       -i"" \
-      -e "s,^URIs: .*,URIs: ${bintray_base_url}/,g" \
+      -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
       /etc/apt/sources.list.d/apache-arrow.sources
   fi
 fi
@@ -143,7 +132,8 @@ apt install -y -V libarrow-glib-dev=${package_version}
 apt install -y -V libarrow-glib-doc=${package_version}
 
 if [ "${have_flight}" = "yes" ]; then
-  apt install -y -V libarrow-flight-dev=${package_version}
+  apt install -y -V libarrow-flight-glib-dev=${package_version}
+  apt install -y -V libarrow-flight-glib-doc=${package_version}
 fi
 
 apt install -y -V libarrow-python-dev=${package_version}
diff --git a/dev/release/verify-release-candidate-wheels.bat b/dev/release/verify-release-candidate-wheels.bat
index 2b57113a1bf..5bcefe80d60 100644
--- a/dev/release/verify-release-candidate-wheels.bat
+++ b/dev/release/verify-release-candidate-wheels.bat
@@ -45,17 +45,24 @@ pushd arrow
 git submodule update --init
 popd
 
+set ARROW_VERSION=%1
+set RC_NUMBER=%2
+
+python arrow\dev\release\download_rc_binaries.py %ARROW_VERSION% %RC_NUMBER% ^
+    --package_type python ^
+    --regex=".*win_amd64.*" || EXIT /B 1
+
 call deactivate
 
 set ARROW_TEST_DATA=%cd%\arrow\testing\data
 
-CALL :verify_wheel 3.6 %1 %2 m
+CALL :verify_wheel 3.6 m
 if errorlevel 1 GOTO error
 
-CALL :verify_wheel 3.7 %1 %2 m
+CALL :verify_wheel 3.7 m
 if errorlevel 1 GOTO error
 
-CALL :verify_wheel 3.8 %1 %2
+CALL :verify_wheel 3.8
 if errorlevel 1 GOTO error
 
 :done
@@ -73,9 +80,7 @@ EXIT /B 1
 :verify_wheel
 
 set PY_VERSION=%1
-set ARROW_VERSION=%2
-set RC_NUMBER=%3
-set ABI_TAG=%4
+set ABI_TAG=%2
 set PY_VERSION_NO_PERIOD=%PY_VERSION:.=%
 
 set CONDA_ENV_PATH=%_VERIFICATION_DIR%\_verify-wheel-%PY_VERSION%
@@ -86,20 +91,15 @@ call activate %CONDA_ENV_PATH%
 
 set WHEEL_FILENAME=pyarrow-%ARROW_VERSION%-cp%PY_VERSION_NO_PERIOD%-cp%PY_VERSION_NO_PERIOD%%ABI_TAG%-win_amd64.whl
 
-@rem Requires GNU Wget for Windows
-wget --no-check-certificate -O %WHEEL_FILENAME% https://bintray.com/apache/arrow/download_file?file_path=python-rc%%2F%ARROW_VERSION%-rc%RC_NUMBER%%%2F%WHEEL_FILENAME% || EXIT /B 1
-
-pip install %WHEEL_FILENAME% || EXIT /B 1
-
-pip install -r arrow/python/requirements-test.txt || EXIT /B 1
-
-py.test %CONDA_ENV_PATH%\Lib\site-packages\pyarrow --pdb -v || EXIT /B 1
-
+pip install python-rc\%ARROW_VERSION%-rc%RC_NUMBER%\%WHEEL_FILENAME% || EXIT /B 1
 python -c "import pyarrow" || EXIT /B 1
 python -c "import pyarrow.parquet" || EXIT /B 1
 python -c "import pyarrow.flight" || EXIT /B 1
 python -c "import pyarrow.dataset" || EXIT /B 1
 
+pip install -r arrow\python\requirements-test.txt || EXIT /B 1
+pytest %CONDA_ENV_PATH%\Lib\site-packages\pyarrow --pdb -v || EXIT /B 1
+
 :done
 
 call deactivate
diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat
index bef78fc920c..fee8c01bc63 100644
--- a/dev/release/verify-release-candidate.bat
+++ b/dev/release/verify-release-candidate.bat
@@ -42,8 +42,8 @@ set PYTHON=3.6
 @rem Using call with conda.bat seems necessary to avoid terminating the batch
 @rem script execution
 call conda create --no-shortcuts -c conda-forge -f -q -y -p %_VERIFICATION_CONDA_ENV% ^
-    --file=ci\conda_env_cpp.yml ^
-    --file=ci\conda_env_python.yml ^
+    --file=ci\conda_env_cpp.txt ^
+    --file=ci\conda_env_python.txt ^
     git ^
     python=%PYTHON% ^
     || exit /B 1
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 3fc926f4e82..d6d2140a8e6 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -24,7 +24,7 @@
 # - JDK >=7
 # - gcc >= 4.8
 # - Node.js >= 11.12 (best way is to use nvm)
-# - Go >= 1.11
+# - Go >= 1.15
 #
 # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
 # LD_LIBRARY_PATH.
@@ -128,22 +128,29 @@ test_binary() {
   local download_dir=binaries
   mkdir -p ${download_dir}
 
-  python $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
+  ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
          --dest=${download_dir}
 
   verify_dir_artifact_signatures ${download_dir}
 }
 
 test_apt() {
-  for target in "debian:buster" \
+  for target in "debian:bullseye" \
+                "arm64v8/debian:bullseye" \
+                "debian:buster" \
                 "arm64v8/debian:buster" \
                 "ubuntu:bionic" \
                 "arm64v8/ubuntu:bionic" \
                 "ubuntu:focal" \
                 "arm64v8/ubuntu:focal" \
-                "ubuntu:groovy" \
-                "arm64v8/ubuntu:groovy"; do \
+                "ubuntu:hirsute" \
+                "arm64v8/ubuntu:hirsute"; do \
     case "${target}" in
+      arm64v8/debian:bullseye)
+        # qemu-user-static in Ubuntu 20.04 has a crash bug:
+        #   https://bugs.launchpad.net/qemu/+bug/1749393
+        continue
+        ;;
       arm64v8/*)
         if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
           : # OK
@@ -156,8 +163,7 @@ test_apt() {
            "${target}" \
            /arrow/dev/release/verify-apt.sh \
            "${VERSION}" \
-           "rc" \
-           "${BINTRAY_REPOSITORY}"; then
+           "rc"; then
       echo "Failed to verify the APT repository for ${target}"
       exit 1
     fi
@@ -165,7 +171,8 @@ test_apt() {
 }
 
 test_yum() {
-  for target in "centos:7" \
+  for target in "amazonlinux:2" \
+                "centos:7" \
                 "centos:8" \
                 "arm64v8/centos:8"; do
     case "${target}" in
@@ -181,8 +188,7 @@ test_yum() {
            "${target}" \
            /arrow/dev/release/verify-yum.sh \
            "${VERSION}" \
-           "rc" \
-           "${BINTRAY_REPOSITORY}"; then
+           "rc"; then
       echo "Failed to verify the Yum repository for ${target}"
       exit 1
     fi
@@ -211,15 +217,12 @@ setup_tempdir() {
 
 setup_miniconda() {
   # Setup short-lived miniconda for Python and integration tests
-  if [ "$(uname)" == "Darwin" ]; then
-    if [ "$(uname -m)" == "arm64" ]; then
-	MINICONDA_URL=https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
-    else
-        MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
-    fi
-  else
-    MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+  OS="$(uname)"
+  if [ "${OS}" == "Darwin" ]; then
+    OS=MacOSX
   fi
+  ARCH="$(uname -m)"
+  MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${OS}-${ARCH}.sh"
 
   MINICONDA=$PWD/test-miniconda
 
@@ -424,10 +427,16 @@ test_js() {
 test_ruby() {
   pushd ruby
 
-  local modules="red-arrow red-plasma red-gandiva red-parquet"
+  local modules="red-arrow red-arrow-dataset red-plasma red-parquet"
   if [ "${ARROW_CUDA}" = "ON" ]; then
     modules="${modules} red-arrow-cuda"
   fi
+  if [ "${ARROW_FLIGHT}" = "ON" ]; then
+    modules="${modules} red-arrow-flight"
+  fi
+  if [ "${ARROW_GANDIVA}" = "ON" ]; then
+    modules="${modules} red-gandiva"
+  fi
 
   for module in ${modules}; do
     pushd ${module}
@@ -440,7 +449,7 @@ test_ruby() {
 }
 
 test_go() {
-  local VERSION=1.14.1
+  local VERSION=1.15.14
   local ARCH=amd64
 
   if [ "$(uname)" == "Darwin" ]; then
@@ -469,41 +478,6 @@ test_go() {
   popd
 }
 
-test_rust() {
-  # install rust toolchain in a similar fashion like test-miniconda
-  export RUSTUP_HOME=$PWD/test-rustup
-  export CARGO_HOME=$PWD/test-rustup
-
-  curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path
-
-  export PATH=$RUSTUP_HOME/bin:$PATH
-  source $RUSTUP_HOME/env
-
-  # build and test rust
-  pushd rust
-
-  # raises on any formatting errors
-  rustup component add rustfmt --toolchain stable
-  cargo +stable fmt --all -- --check
-
-  # we are targeting Rust nightly for releases
-  rustup default nightly
-
-  # use local modules because we don't publish modules to crates.io yet
-  sed \
-    -i.bak \
-    -E \
-    -e 's/^arrow = "([^"]*)"/arrow = { version = "\1", path = "..\/arrow" }/g' \
-    -e 's/^parquet = "([^"]*)"/parquet = { version = "\1", path = "..\/parquet" }/g' \
-    */Cargo.toml
-
-  # raises on any warnings
-  RUSTFLAGS="-D warnings" cargo build
-  cargo test
-
-  popd
-}
-
 # Run integration tests
 test_integration() {
   JAVA_DIR=$PWD/java
@@ -581,17 +555,12 @@ test_source_distribution() {
   if [ ${TEST_GO} -gt 0 ]; then
     test_go
   fi
-  if [ ${TEST_RUST} -gt 0 ]; then
-    test_rust
-  fi
   if [ ${TEST_INTEGRATION} -gt 0 ]; then
     test_integration
   fi
 }
 
 test_binary_distribution() {
-  : ${BINTRAY_REPOSITORY:=apache/arrow}
-
   if [ ${TEST_BINARY} -gt 0 ]; then
     test_binary
   fi
@@ -603,29 +572,6 @@ test_binary_distribution() {
   fi
 }
 
-check_python_imports() {
-   python << IMPORT_TESTS
-import platform
-
-import pyarrow
-import pyarrow.parquet
-import pyarrow.plasma
-import pyarrow.fs
-import pyarrow._hdfs
-import pyarrow.dataset
-import pyarrow.flight
-
-if platform.system() == "Darwin":
-    macos_version = tuple(map(int, platform.mac_ver()[0].split('.')))
-    check_s3fs = macos_version >= (10, 13)
-else:
-    check_s3fs = True
-
-if check_s3fs:
-    import pyarrow._s3fs
-IMPORT_TESTS
-}
-
 test_linux_wheels() {
   local py_arches="3.6m 3.7m 3.8 3.9"
   local manylinuxes="2010 2014"
@@ -639,12 +585,7 @@ test_linux_wheels() {
     for ml_spec in ${manylinuxes}; do
       # check the mandatory and optional imports
       pip install python-rc/${VERSION}-rc${RC_NUMBER}/pyarrow-${VERSION}-cp${py_arch//[mu.]/}-cp${py_arch//./}-manylinux${ml_spec}_x86_64.whl
-      check_python_imports
-
-      # install test requirements and execute the tests
-      pip install -r ${ARROW_DIR}/python/requirements-test.txt
-      python -c 'import pyarrow; pyarrow.create_library_symlinks()'
-      pytest --pyargs pyarrow
+      INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
     done
 
     conda deactivate
@@ -653,7 +594,23 @@ test_linux_wheels() {
 
 test_macos_wheels() {
   local py_arches="3.6m 3.7m 3.8 3.9"
+  local macos_version=$(sw_vers -productVersion)
+  local macos_short_version=${macos_version:0:5}
+
+  local check_s3=ON
+  local check_flight=ON
+
+  # macOS version <= 10.13
+  if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then
+    local check_s3=OFF
+  fi
+  # apple silicon processor
+  if [ "$(uname -m)" = "arm64" ]; then
+    local py_arches="3.8 3.9"
+    local check_flight=OFF
+  fi
 
+  # verify arch-native wheels inside an arch-native conda environment
   for py_arch in ${py_arches}; do
     local env=_verify_wheel-${py_arch}
     conda create -yq -n ${env} python=${py_arch//m/}
@@ -662,15 +619,42 @@ test_macos_wheels() {
 
     # check the mandatory and optional imports
     pip install --find-links python-rc/${VERSION}-rc${RC_NUMBER} pyarrow==${VERSION}
-    check_python_imports
-
-    # install test requirements and execute the tests
-    pip install -r ${ARROW_DIR}/python/requirements-test.txt
-    python -c 'import pyarrow; pyarrow.create_library_symlinks()'
-    pytest --pyargs pyarrow
+    INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+      ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
 
     conda deactivate
   done
+
+  # verify arm64 and universal2 wheels using an universal2 python binary
+  # the interpreter should be installed from python.org:
+  #   https://www.python.org/ftp/python/3.9.6/python-3.9.6-macosx10.9.pkg
+  if [ "$(uname -m)" = "arm64" ]; then
+    for py_arch in "3.9"; do
+      local pyver=${py_arch//m/}
+      local python="/Library/Frameworks/Python.framework/Versions/${pyver}/bin/python${pyver}"
+
+      # create and activate a virtualenv for testing as arm64
+      for arch in "arm64" "x86_64"; do
+        local venv="${ARROW_TMPDIR}/test-${arch}-virtualenv"
+        $python -m virtualenv $venv
+        source $venv/bin/activate
+        pip install -U pip
+
+        # install pyarrow's universal2 wheel
+        pip install \
+            --find-links python-rc/${VERSION}-rc${RC_NUMBER} \
+            --target $(python -c 'import site; print(site.getsitepackages()[0])') \
+            --platform macosx_11_0_universal2 \
+            --only-binary=:all: \
+            pyarrow==${VERSION}
+        # check the imports and execute the unittests
+        INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+          arch -${arch} ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
+
+        deactivate
+      done
+    done
+  fi
 }
 
 test_wheels() {
@@ -686,6 +670,7 @@ test_wheels() {
   fi
 
   python $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
+         --package_type python \
          --regex=${filter_regex} \
          --dest=${download_dir}
 
@@ -730,7 +715,6 @@ fi
 : ${TEST_PYTHON:=${TEST_DEFAULT}}
 : ${TEST_JS:=${TEST_DEFAULT}}
 : ${TEST_GO:=${TEST_DEFAULT}}
-: ${TEST_RUST:=${TEST_DEFAULT}}
 : ${TEST_INTEGRATION:=${TEST_DEFAULT}}
 if [ ${TEST_BINARY_DISTRIBUTIONS} -gt 0 ]; then
   TEST_BINARY_DISTRIBUTIONS_DEFAULT=${TEST_DEFAULT}
@@ -755,7 +739,17 @@ TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS}))
 TEST_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO}))
 TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO}))
 
-NEED_MINICONDA=$((${TEST_CPP} + ${TEST_WHEELS} + ${TEST_BINARY} + ${TEST_INTEGRATION}))
+if [ "${ARTIFACT}" == "source" ]; then
+  NEED_MINICONDA=$((${TEST_CPP} + ${TEST_INTEGRATION}))
+elif [ "${ARTIFACT}" == "wheels" ]; then
+  NEED_MINICONDA=$((${TEST_WHEELS}))
+else
+  if [ -z "${PYTHON:-}" ]; then
+    NEED_MINICONDA=$((${TEST_BINARY}))
+  else
+    NEED_MINICONDA=0
+  fi
+fi
 
 : ${TEST_ARCHIVE:=apache-arrow-${VERSION}.tar.gz}
 case "${TEST_ARCHIVE}" in
diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index 0bde804c3c9..26fb9c91bb1 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -21,30 +21,24 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
-  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
-  echo "       $0 VERSION release BINTRAY_REPOSITORY"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
-  echo " e.g.: $0 0.13.0 rc kou/arrow # Verify 0.13.0 RC at https://bintray.com/kou/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
 
 VERSION="$1"
 TYPE="$2"
-BINTRAY_REPOSITORY="${3:-apache/arrow}"
 
 local_prefix="/arrow/dev/tasks/linux-packages"
 
-bintray_base_url="https://dl.bintray.com/${BINTRAY_REPOSITORY}/centos"
-if [ "${TYPE}" = "rc" ]; then
-  bintray_base_url="${bintray_base_url}-rc"
-fi
+artifactory_base_url="https://apache.jfrog.io/artifactory/arrow"
 
 distribution=$(. /etc/os-release && echo "${ID}")
 distribution_version=$(. /etc/os-release && echo "${VERSION_ID}")
+distribution_prefix="centos"
 
 cmake_package=cmake
 cmake_command=cmake
@@ -52,8 +46,20 @@ have_flight=yes
 have_gandiva=yes
 have_glib=yes
 have_parquet=yes
+have_python=yes
 install_command="dnf install -y --enablerepo=powertools"
+
 case "${distribution}-${distribution_version}" in
+  amzn-2)
+    cmake_package=cmake3
+    cmake_command=cmake3
+    have_flight=no
+    have_gandiva=no
+    have_python=no
+    install_command="yum install -y"
+    distribution_prefix="amazon-linux"
+    amazon-linux-extras install epel -y
+    ;;
   centos-7)
     cmake_package=cmake3
     cmake_command=cmake3
@@ -79,38 +85,45 @@ if [ "${TYPE}" = "local" ]; then
       package_version="${VERSION}-1"
       ;;
   esac
-  package_version+=".el${distribution_version}"
   release_path="${local_prefix}/yum/repositories"
-  release_path+="/centos/${distribution_version}/$(arch)/Packages"
+  case "${distribution}" in
+    amzn)
+      package_version+=".${distribution}${distribution_version}"
+      release_path+="/amazon-linux"
+      amazon-linux-extras install -y epel
+      ;;
+    *)
+      package_version+=".el${distribution_version}"
+      release_path+="/centos"
+      ;;
+  esac
+  release_path+="/${distribution_version}/$(arch)/Packages"
   release_path+="/apache-arrow-release-${package_version}.noarch.rpm"
   ${install_command} "${release_path}"
 else
   package_version="${VERSION}"
+  if [ "${TYPE}" = "rc" ]; then
+    distribution_prefix+="-rc"
+  fi
   ${install_command} \
-    ${bintray_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
+    ${artifactory_base_url}/${distribution_prefix}/${distribution_version}/apache-arrow-release-latest.rpm
 fi
 
 if [ "${TYPE}" = "local" ]; then
   sed \
     -i"" \
-    -e "s,baseurl=https://apache.bintray.com/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \
+    -e "s,baseurl=https://apache\.jfrog\.io/artifactory/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \
     /etc/yum.repos.d/Apache-Arrow.repo
   keys="${local_prefix}/KEYS"
   if [ -f "${keys}" ]; then
     cp "${keys}" /etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
   fi
 else
-  if [ "${BINTRAY_REPOSITORY}" = "apache/arrow" ]; then
-    if [ "${TYPE}" = "rc" ]; then
-      sed \
-        -i"" \
-        -e "s,/centos/,/centos-rc/,g" \
-        /etc/yum.repos.d/Apache-Arrow.repo
-    fi
-  else
+  if [ "${TYPE}" = "rc" ]; then
     sed \
       -i"" \
-      -e "s,baseurl=https://apache.bintray.com/arrow/centos,baseurl=${bintray_base_url},g" \
+      -e "s,/centos/,/centos-rc/,g" \
+      -e "s,/amazon-linux/,/amazon-linux-rc/,g" \
       /etc/yum.repos.d/Apache-Arrow.repo
   fi
 fi
@@ -120,6 +133,7 @@ ${install_command} \
   ${cmake_package} \
   gcc-c++ \
   git \
+  libarchive \
   make
 mkdir -p build
 cp -a /arrow/cpp/examples/minimal_build build
@@ -133,7 +147,10 @@ if [ "${have_glib}" = "yes" ]; then
   ${install_command} --enablerepo=epel arrow-glib-devel-${package_version}
   ${install_command} --enablerepo=epel arrow-glib-doc-${package_version}
 fi
-${install_command} --enablerepo=epel arrow-python-devel-${package_version}
+
+if [ "${have_python}" = "yes" ]; then
+  ${install_command} --enablerepo=epel arrow-python-devel-${package_version}
+fi
 
 if [ "${have_glib}" = "yes" ]; then
   ${install_command} --enablerepo=epel plasma-glib-devel-${package_version}
@@ -143,7 +160,8 @@ else
 fi
 
 if [ "${have_flight}" = "yes" ]; then
-  ${install_command} --enablerepo=epel arrow-flight-devel-${package_version}
+  ${install_command} --enablerepo=epel arrow-flight-glib-devel-${package_version}
+  ${install_command} --enablerepo=epel arrow-flight-glib-doc-${package_version}
 fi
 
 if [ "${have_gandiva}" = "yes" ]; then
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
new file mode 100644
index 00000000000..dfc87c80b31
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
new file mode 100644
index 00000000000..3416b952c90
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
new file mode 100644
index 00000000000..f819ba7229e
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
new file mode 100644
index 00000000000..3e2e0ef51fb
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
new file mode 100644
index 00000000000..3aba0f1294c
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
new file mode 100644
index 00000000000..ff26bc5215e
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
new file mode 100644
index 00000000000..5703aba68ec
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
new file mode 100644
index 00000000000..8ff58d717e8
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cuda_compiler_version
+  - cdt_name
+  - docker_image
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
new file mode 100644
index 00000000000..5bb4381febf
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
new file mode 100644
index 00000000000..2b1715d585b
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
new file mode 100644
index 00000000000..5a0e7313e9d
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
new file mode 100644
index 00000000000..16ace00bdae
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.6.____cpython.yaml
deleted file mode 100644
index f2d3ceaac68..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.6.____cpython.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-BUILD:
-- aarch64-conda_cos7-linux-gnu
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-cdt_arch:
-- aarch64
-cdt_name:
-- cos7
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-aarch64
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.6.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-aarch64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.7.____cpython.yaml
deleted file mode 100644
index 611c39c907c..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.7.____cpython.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-BUILD:
-- aarch64-conda_cos7-linux-gnu
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-cdt_arch:
-- aarch64
-cdt_name:
-- cos7
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-aarch64
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.7.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-aarch64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.8.____cpython.yaml
deleted file mode 100644
index 2f0fc0e2306..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.8.____cpython.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-BUILD:
-- aarch64-conda_cos7-linux-gnu
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-cdt_arch:
-- aarch64
-cdt_name:
-- cos7
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-aarch64
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.8.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-aarch64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.9.____cpython.yaml
deleted file mode 100644
index 2ec87205a0f..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.9.____cpython.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-BUILD:
-- aarch64-conda_cos7-linux-gnu
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-cdt_arch:
-- aarch64
-cdt_name:
-- cos7
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-aarch64
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.19'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.9.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-aarch64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.6.____cpython.yaml
deleted file mode 100644
index 4c5061d6ff2..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.6.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '9.2'
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-cuda:9.2
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.6.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.7.____cpython.yaml
deleted file mode 100644
index db820f3ff26..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.7.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '9.2'
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-cuda:9.2
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.7.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.8.____cpython.yaml
deleted file mode 100644
index 24810f3c0f1..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.8.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '9.2'
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-cuda:9.2
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.8.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.9.____cpython.yaml
deleted file mode 100644
index ebe422be367..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.9.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '9.2'
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-cuda:9.2
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.19'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.9.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.6.____cpython.yaml
deleted file mode 100644
index 358814fb0da..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.6.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-comp7
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.6.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.7.____cpython.yaml
deleted file mode 100644
index 054a730ee50..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.7.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-comp7
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.7.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.8.____cpython.yaml
deleted file mode 100644
index a3bfc58ea8f..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.8.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-comp7
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.8.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.9.____cpython.yaml
deleted file mode 100644
index 88541395052..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.9.____cpython.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- None
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-comp7
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.19'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.9.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - cuda_compiler_version
-  - docker_image
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
new file mode 100644
index 00000000000..0be59fe1a38
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
new file mode 100644
index 00000000000..d2c046ab2ea
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
new file mode 100644
index 00000000000..43f63445469
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
new file mode 100644
index 00000000000..7cc730f9bb0
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
new file mode 100644
index 00000000000..e5f8e2ba2a8
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '11.0'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge/label/rust_dev,conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- arm64-apple-darwin20.0.0
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-arm64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
new file mode 100644
index 00000000000..cd3eca6d23d
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '11.0'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge/label/rust_dev,conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- arm64-apple-darwin20.0.0
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-arm64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_python3.6.____cpython.yaml
deleted file mode 100644
index c019508dce4..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.6.____cpython.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- clang
-c_compiler_version:
-- '10'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '10'
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-macos_machine:
-- x86_64-apple-darwin13.4.0
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.6.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- osx-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_python3.7.____cpython.yaml
deleted file mode 100644
index 888071766a5..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.7.____cpython.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- clang
-c_compiler_version:
-- '10'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '10'
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-macos_machine:
-- x86_64-apple-darwin13.4.0
-numpy:
-- '1.16'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.7.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- osx-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_python3.8.____cpython.yaml
deleted file mode 100644
index ad449921253..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.8.____cpython.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-aws_sdk_cpp:
-- 1.8.54
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- clang
-c_compiler_version:
-- '10'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '10'
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.30'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-macos_machine:
-- x86_64-apple-darwin13.4.0
-numpy:
-- '1.17.3'
-orc:
-- 1.6.4
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.8.* *_cpython
-re2:
-- 2020.08.01
-snappy:
-- '1'
-target_platform:
-- osx-64
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_python3.9.____cpython.yaml
deleted file mode 100644
index c630217d111..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.9.____cpython.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- clang
-c_compiler_version:
-- '10'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '10'
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-macos_machine:
-- x86_64-apple-darwin13.4.0
-numpy:
-- '1.19'
-orc:
-- 1.6.5
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.9.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- osx-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base3.6.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base3.6.yaml
deleted file mode 100644
index ac945ce72d3..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base3.6.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- condaforge/linux-anvil-comp7
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '3.6'
-target_platform:
-- linux-64
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
index 51d26f834cc..dfdfae9665a 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
@@ -1,7 +1,9 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -9,9 +11,9 @@ channel_targets:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-comp7
+- quay.io/condaforge/linux-anvil-comp7
 pin_run_as_build:
   r-base:
     min_pin: x.x
@@ -20,3 +22,8 @@ r_base:
 - '4.0'
 target_platform:
 - linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - docker_image
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
new file mode 100644
index 00000000000..c5f455c1917
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
@@ -0,0 +1,29 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+pin_run_as_build:
+  r-base:
+    min_pin: x.x
+    max_pin: x.x
+r_base:
+- '4.1'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - docker_image
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base3.6.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base3.6.yaml
deleted file mode 100644
index e3c5b898be6..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base3.6.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-c_compiler:
-- clang
-c_compiler_version:
-- '10'
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '10'
-macos_machine:
-- x86_64-apple-darwin13.4.0
-macos_min_version:
-- '10.9'
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '3.6'
-target_platform:
-- osx-64
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
index 8343a284b97..08bb81d0808 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
@@ -3,7 +3,7 @@ MACOSX_DEPLOYMENT_TARGET:
 c_compiler:
 - clang
 c_compiler_version:
-- '10'
+- '11'
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -11,11 +11,9 @@ channel_targets:
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
-- '10'
+- '11'
 macos_machine:
 - x86_64-apple-darwin13.4.0
-macos_min_version:
-- '10.9'
 pin_run_as_build:
   r-base:
     min_pin: x.x
@@ -24,3 +22,6 @@ r_base:
 - '4.0'
 target_platform:
 - osx-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
new file mode 100644
index 00000000000..9974c663853
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
@@ -0,0 +1,27 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+macos_machine:
+- x86_64-apple-darwin13.4.0
+pin_run_as_build:
+  r-base:
+    min_pin: x.x
+    max_pin: x.x
+r_base:
+- '4.1'
+target_platform:
+- osx-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base3.6.yaml b/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base3.6.yaml
deleted file mode 100644
index 3fb7f88499a..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base3.6.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '3.6'
-target_platform:
-- win-64
diff --git a/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml b/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml
new file mode 100644
index 00000000000..2fe9ad314dc
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml
@@ -0,0 +1,12 @@
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+pin_run_as_build:
+  r-base:
+    min_pin: x.x
+    max_pin: x.x
+r_base:
+- '4.1'
+target_platform:
+- win-64
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
new file mode 100644
index 00000000000..8d4e25167b0
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+  - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
new file mode 100644
index 00000000000..8da4a8380b7
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+  - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
new file mode 100644
index 00000000000..1980e1be39b
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+  - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
new file mode 100644
index 00000000000..1106037d36b
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+  - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_python3.6.____cpython.yaml
deleted file mode 100644
index 40199c679de..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.6.____cpython.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- vs2017
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- vs2017
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.6.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- win-64
-zip_keys:
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_python3.7.____cpython.yaml
deleted file mode 100644
index 88d17108a98..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.7.____cpython.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- vs2017
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- vs2017
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.7.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- win-64
-zip_keys:
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_python3.8.____cpython.yaml
deleted file mode 100644
index e18785de135..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.8.____cpython.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- vs2017
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- vs2017
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.16'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.8.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- win-64
-zip_keys:
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_python3.9.____cpython.yaml
deleted file mode 100644
index 6177f96ce01..00000000000
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.9.____cpython.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
-bzip2:
-- '1'
-c_compiler:
-- vs2017
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler_version:
-- None
-cxx_compiler:
-- vs2017
-gflags:
-- '2.2'
-glog:
-- 0.4.0
-grpc_cpp:
-- '1.32'
-libprotobuf:
-- '3.13'
-lz4_c:
-- 1.9.2
-numpy:
-- '1.19'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-  bzip2:
-    max_pin: x
-  lz4-c:
-    max_pin: x.x.x
-  python:
-    min_pin: x.x
-    max_pin: x.x
-  zlib:
-    max_pin: x.x
-python:
-- 3.9.* *_cpython
-re2:
-- 2020.10.01
-snappy:
-- '1'
-target_platform:
-- win-64
-zip_keys:
-- - numpy
-  - python
-zlib:
-- '1.2'
-zstd:
-- '1.4'
diff --git a/dev/tasks/conda-recipes/.scripts/logging_utils.sh b/dev/tasks/conda-recipes/.scripts/logging_utils.sh
new file mode 100644
index 00000000000..a53ef3f2c7a
--- /dev/null
+++ b/dev/tasks/conda-recipes/.scripts/logging_utils.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Provide a unified interface for the different logging
+# utilities CI providers offer. If unavailable, provide
+# a compatible fallback (e.g. bare `echo xxxxxx`).
+
+function startgroup {
+    # Start a foldable group of log lines
+    # Pass a single argument, quoted
+    case ${CI:-} in
+        azure )
+            echo "##[group]$1";;
+        travis )
+            echo "$1"
+            echo -en 'travis_fold:start:'"${1// /}"'\\r';;
+        * )
+            echo "$1";;
+    esac
+}
+
+function endgroup {
+    # End a foldable group of log lines
+    # Pass a single argument, quoted
+    case ${CI:-} in
+        azure )
+            echo "##[endgroup]";;
+        travis )
+            echo -en 'travis_fold:end:'"${1// /}"'\\r';;
+    esac
+}
diff --git a/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat b/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
index cc2ed71fe3e..2cc6ed1ba3e 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
+++ b/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
@@ -1,6 +1,19 @@
+@echo on
+
 mkdir "%SRC_DIR%"\cpp\build
 pushd "%SRC_DIR%"\cpp\build
 
+:: Enable CUDA support
+if "%cuda_compiler_version%"=="None" (
+    set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=OFF"
+) else (
+    REM this should move to nvcc-feedstock
+    set "CUDA_PATH=%CUDA_PATH:\=/%"
+    set "CUDA_HOME=%CUDA_HOME:\=/%"
+
+    set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=ON"
+)
+
 cmake -G "Ninja" ^
       -DBUILD_SHARED_LIBS=ON ^
       -DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^
@@ -31,6 +44,7 @@ cmake -G "Ninja" ^
       -DARROW_S3:BOOL=ON ^
       -DBoost_NO_BOOST_CMAKE=ON ^
       -DCMAKE_UNITY_BUILD=ON ^
+      %EXTRA_CMAKE_ARGS% ^
       ..
 if errorlevel 1 exit 1
 
diff --git a/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat b/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
index 8f3357748df..89cec3710c3 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
+++ b/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
@@ -2,18 +2,17 @@
 pushd "%SRC_DIR%"\python
 
 @rem the symlinks for cmake modules don't work here
+@rem NOTE: In contrast to conda-forge, they work here as we clone from git.
 @rem del cmake_modules\BuildUtils.cmake
 @rem del cmake_modules\SetupCxxFlags.cmake
+@rem del cmake_modules\CompilerInfo.cmake
 @rem del cmake_modules\FindNumPy.cmake
 @rem del cmake_modules\FindPythonLibsNew.cmake
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\BuildUtils.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\SetupCxxFlags.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
+@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\CompilerInfo.cmake" cmake_modules\
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindNumPy.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindPythonLibsNew.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
 
 SET ARROW_HOME=%LIBRARY_PREFIX%
 SET SETUPTOOLS_SCM_PRETEND_VERSION=%PKG_VERSION%
@@ -26,6 +25,13 @@ SET PYARROW_WITH_GANDIVA=1
 SET PYARROW_WITH_PARQUET=1
 SET PYARROW_CMAKE_GENERATOR=Ninja
 
+:: Enable CUDA support
+if "%cuda_compiler_version%"=="None" (
+    set "PYARROW_WITH_CUDA=0"
+) else (
+    set "PYARROW_WITH_CUDA=1"
+)
+
 %PYTHON%   setup.py ^
            build_ext ^
            install --single-version-externally-managed ^
diff --git a/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh b/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
old mode 100755
new mode 100644
index e15fc92808c..f9c1d975ec3
--- a/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
+++ b/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
@@ -34,17 +34,26 @@ else
     EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=OFF"
 fi
 
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+    # We need llvm 11+ support in Arrow for this
+    EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=OFF"
+    sed -ie "s;protoc-gen-grpc.*$;protoc-gen-grpc=${BUILD_PREFIX}/bin/grpc_cpp_plugin\";g" ../src/arrow/flight/CMakeLists.txt
+    sed -ie 's;"--with-jemalloc-prefix\=je_arrow_";"--with-jemalloc-prefix\=je_arrow_" "--with-lg-page\=14";g' ../cmake_modules/ThirdpartyToolchain.cmake
+else
+    EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON"
+fi
+
 cmake \
-    -DBUILD_SHARED_LIBS=ON \
     -DARROW_BOOST_USE_SHARED=ON \
     -DARROW_BUILD_BENCHMARKS=OFF \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
     -DARROW_BUILD_UTILITIES=OFF \
+    -DBUILD_SHARED_LIBS=ON \
     -DARROW_DATASET=ON \
     -DARROW_DEPENDENCY_SOURCE=SYSTEM \
     -DARROW_FLIGHT=ON \
-    -DARROW_GANDIVA=ON \
+    -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=ON \
     -DARROW_HDFS=ON \
     -DARROW_JEMALLOC=ON \
     -DARROW_MIMALLOC=ON \
@@ -55,34 +64,32 @@ cmake \
     -DARROW_PYTHON=ON \
     -DARROW_S3=ON \
     -DARROW_SIMD_LEVEL=NONE \
+    -DARROW_USE_LD_GOLD=ON \
     -DARROW_WITH_BROTLI=ON \
     -DARROW_WITH_BZ2=ON \
     -DARROW_WITH_LZ4=ON \
     -DARROW_WITH_SNAPPY=ON \
     -DARROW_WITH_ZLIB=ON \
     -DARROW_WITH_ZSTD=ON \
-    -DARROW_USE_LD_GOLD=ON \
-    -DCMAKE_AR=${AR} \
     -DCMAKE_BUILD_TYPE=release \
     -DCMAKE_INSTALL_LIBDIR=lib \
     -DCMAKE_INSTALL_PREFIX=$PREFIX \
-    -DCMAKE_RANLIB=${RANLIB} \
     -DLLVM_TOOLS_BINARY_DIR=$PREFIX/bin \
-    -DCMAKE_UNITY_BUILD=ON \
+    -DPython3_EXECUTABLE=${PYTHON} \
+    -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc \
     -GNinja \
     ${EXTRA_CMAKE_ARGS} \
     ..
 
-# Decrease parallelism a bit as we will otherwise get out-of-memory problems
-# This is only necessary on Travis
-if [ "${TRAVIS}" = "true" ]; then
-# if [ "$(uname -m)" = "ppc64le" ]; then
-    echo "Using $(grep -c ^processor /proc/cpuinfo) CPUs"
-    CPU_COUNT=$(grep -c ^processor /proc/cpuinfo)
-    CPU_COUNT=$((CPU_COUNT / 4))
-    ninja install -j${CPU_COUNT}
-else
-    ninja install
+# Commented out until jemalloc and mimalloc are fixed upstream
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+     ninja jemalloc_ep-prefix/src/jemalloc_ep-stamp/jemalloc_ep-patch mimalloc_ep-prefix/src/mimalloc_ep-stamp/mimalloc_ep-patch
+     cp $BUILD_PREFIX/share/gnuconfig/config.* jemalloc_ep-prefix/src/jemalloc_ep/build-aux/
+     sed -ie 's/list(APPEND mi_cflags -march=native)//g' mimalloc_ep-prefix/src/mimalloc_ep/CMakeLists.txt
+     # Use the correct register for thread-local storage
+     sed -ie 's/tpidr_el0/tpidrro_el0/g' mimalloc_ep-prefix/src/mimalloc_ep/include/mimalloc-internal.h
 fi
 
+ninja install
+
 popd
diff --git a/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh b/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
old mode 100755
new mode 100644
index d0fb55de580..a394e999f7b
--- a/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
+++ b/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
@@ -9,10 +9,14 @@ export PARQUET_HOME=$PREFIX
 export SETUPTOOLS_SCM_PRETEND_VERSION=$PKG_VERSION
 export PYARROW_BUILD_TYPE=release
 export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
-export PYARROW_BUNDLE_PLASMA_EXECUTABLE=0
 export PYARROW_WITH_DATASET=1
 export PYARROW_WITH_FLIGHT=1
-export PYARROW_WITH_GANDIVA=1
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+    # We need llvm 11+ support in Arrow for this
+    export PYARROW_WITH_GANDIVA=0
+else
+    export PYARROW_WITH_GANDIVA=1
+fi
 export PYARROW_WITH_HDFS=1
 export PYARROW_WITH_ORC=1
 export PYARROW_WITH_PARQUET=1
@@ -22,16 +26,14 @@ export PYARROW_CMAKE_GENERATOR=Ninja
 BUILD_EXT_FLAGS=""
 
 # Enable CUDA support
-if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]
-then
+if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]; then
     export PYARROW_WITH_CUDA=1
 else
     export PYARROW_WITH_CUDA=0
 fi
 
 # Resolve: Make Error at cmake_modules/SetupCxxFlags.cmake:338 (message): Unsupported arch flag: -march=.
-if [[ "$(uname -m)" = "aarch64" ]]
-then
+if [[ "${target_platform}" == "linux-aarch64" ]]; then
     export PYARROW_CMAKE_OPTIONS="-DARROW_ARMV8_ARCH=armv8-a"
 fi
 
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index cdbfc5d5b4d..48a8629866d 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -1,9 +1,9 @@
+# NOTE: In constrast to the conda-forge recipe, ARROW_VERSION is a templated variable here.
 {% set version = ARROW_VERSION %}
-{% set number = "0" %}
-{% set cuda_enabled = cuda_compiler_version is not undefined and cuda_compiler_version == '9.2' %}
-{% set build_ext_version = "1.0.1" %}
+{% set cuda_enabled = cuda_compiler_version != "None" %}
+{% set build_ext_version = ARROW_VERSION %}
 {% set build_ext = "cuda" if cuda_enabled else "cpu" %}
-{% set proc_build_number = "1" %}
+{% set proc_build_number = "0" %}
 
 package:
   name: arrow-cpp-ext
@@ -14,7 +14,10 @@ source:
 
 build:
   number: 0
-  skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
+  # for cuda on win/linux, building with 9.2 is enough to be compatible with all later versions,
+  # since arrow is only using libcuda, and not libcudart.
+  skip: true  # [(win or linux) and cuda_compiler_version not in ("None", "10.2")]
+  skip: true  # [osx and cuda_compiler_version != "None"]
   run_exports:
     - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}
 
@@ -40,27 +43,31 @@ outputs:
     version: {{ version }}
     build:
       string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
-      skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
       run_exports:
         - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}
       ignore_run_exports:
         - cudatoolkit
-        # Gandiva only needs headers
-        - boost-cpp
       track_features:
         {{ "- arrow-cuda" if cuda_enabled else "" }}
     requirements:
       build:
-        - cmake 3.16.*
+        - python                                 # [build_platform != target_platform]
+        - cross-python_{{ target_platform }}     # [build_platform != target_platform]
+        - cython                                 # [build_platform != target_platform]
+        - numpy                                  # [build_platform != target_platform]
+        - gnuconfig                              # [osx and arm64]
+        - libprotobuf
+        - grpc-cpp
+        - cmake
         - autoconf  # [unix]
         - ninja
         - make  # [unix]
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
-        - {{ compiler("cuda") }}  # [cuda_compiler_version not in (undefined, "None")]
+        - {{ compiler("cuda") }}  # [cuda_compiler_version != "None"]
       host:
         - aws-sdk-cpp
-        - boost-cpp
+        - boost-cpp >=1.70
         - brotli
         - bzip2
         - c-ares
@@ -68,8 +75,8 @@ outputs:
         - glog
         - grpc-cpp
         - libprotobuf
-        - clangdev 11
-        - llvmdev 11
+        - clangdev 10  # [not (osx and arm64)]
+        - llvmdev 10   # [not (osx and arm64)]
         - libutf8proc
         - lz4-c
         - numpy
@@ -78,25 +85,15 @@ outputs:
         - rapidjson
         - re2
         - snappy
-        - thrift-cpp >=0.11
+        - thrift-cpp
         - zlib
         - zstd
       run:
         - {{ pin_compatible('numpy', lower_bound='1.16') }}
-        - aws-sdk-cpp
-        - brotli
-        - c-ares
-        - gflags
-        - glog
-        - grpc-cpp
-        - lz4-c
         - python
-        - re2
-        - zlib
-        - zstd
       run_constrained:
         - arrow-cpp-proc * {{ build_ext }}
-        - cudatoolkit >=9.2  # [cuda_compiler_version not in (undefined, "None")]
+        - cudatoolkit >=9.2  # [cuda_compiler_version != "None"]
 
     about:
       home: http://github.com/apache/arrow
@@ -111,7 +108,7 @@ outputs:
         - test -f $PREFIX/include/arrow/api.h              # [unix]
         - test -f $PREFIX/include/arrow/flight/types.h     # [unix]
         - test -f $PREFIX/include/plasma/client.h          # [unix]
-        - test -f $PREFIX/include/gandiva/engine.h         # [unix and not (aarch64 or ppc64le)]
+        - test -f $PREFIX/include/gandiva/engine.h         # [unix and not (osx and arm64)]
         - test -f $PREFIX/include/parquet/api/reader.h     # [unix]
         - if not exist %LIBRARY_INC%\\arrow\\api.h exit 1            # [win]
         - if not exist %LIBRARY_INC%\\gandiva\\engine.h exit 1       # [win]
@@ -123,15 +120,16 @@ outputs:
         - test -f $PREFIX/lib/libarrow_flight.so     # [linux]
         - test -f $PREFIX/lib/libarrow_python.so     # [linux]
         - test -f $PREFIX/lib/libparquet.so          # [linux]
-        - test -f $PREFIX/lib/libgandiva.so          # [linux and not (aarch64 or ppc64le)]
+        - test -f $PREFIX/lib/libgandiva.so          # [linux]
         - test -f $PREFIX/lib/libplasma.so           # [linux]
-        {{ "- test %s -f $PREFIX/lib/libarrow_cuda.so" % (['!', ''][cuda_enabled]) }}                         # [linux]
-        {{ "- test %s -f $PREFIX/lib/libarrow_cuda.dylib" % (['!', ''][cuda_enabled]) }}                      # [osx]
-        {{ "- if %s exist %%PREFIX%%\\Library\\bin\\arrow_cuda.dll exit 1" % (['', 'not'][cuda_enabled]) }}   # [win]
+        - test -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT}               # [(cuda_compiler_version != "None") and unix]
+        - test ! -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT}             # [(cuda_compiler_version == "None") and unix]
+        - if not exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1  # [(cuda_compiler_version != "None") and win]
+        - if exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1      # [(cuda_compiler_version == "None") and win]
         - test -f $PREFIX/lib/libarrow.dylib          # [osx]
         - test -f $PREFIX/lib/libarrow_dataset.dylib  # [osx]
         - test -f $PREFIX/lib/libarrow_python.dylib   # [osx]
-        - test -f $PREFIX/lib/libgandiva.dylib        # [osx]
+        - test -f $PREFIX/lib/libgandiva.dylib        # [osx and not arm64]
         - test -f $PREFIX/lib/libparquet.dylib        # [osx]
         - test -f $PREFIX/lib/libplasma.dylib         # [osx]
         - if not exist %PREFIX%\\Library\\bin\\arrow.dll exit 1          # [win]
@@ -162,20 +160,23 @@ outputs:
     version: {{ version }}
     build:
       string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
-      skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
       ignore_run_exports:
         - cudatoolkit
       track_features:
         {{ "- arrow-cuda" if cuda_enabled else "" }}
     requirements:
       build:
-        - cmake 3.16.*
+        - python                                 # [build_platform != target_platform]
+        - cross-python_{{ target_platform }}     # [build_platform != target_platform]
+        - cython                                 # [build_platform != target_platform]
+        - numpy                                  # [build_platform != target_platform]
+        - cmake
         - ninja
         - make  # [unix]
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
         # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda
-        - {{ compiler("cuda") }}  # [cuda_compiler_version not in (undefined, "None")]
+        - {{ compiler("cuda") }}  # [cuda_compiler_version != "None"]
       host:
         - {{ pin_subpackage('arrow-cpp', exact=True) }}
         - cython
@@ -192,7 +193,7 @@ outputs:
         - python
       run_constrained:
         - arrow-cpp-proc * {{ build_ext }}
-        - cudatoolkit >=9.2  # [cuda_compiler_version not in (undefined, "None")]
+        - cudatoolkit >=9.2  # [cuda_compiler_version != "None"]
 
     about:
       home: http://github.com/apache/arrow
@@ -206,7 +207,7 @@ outputs:
         - pyarrow
         - pyarrow.dataset
         - pyarrow.flight
-        - pyarrow.gandiva
+        - pyarrow.gandiva  # [not (osx and arm64)]
         - pyarrow.orc      # [unix]
         - pyarrow.parquet
         - pyarrow.plasma   # [unix]
@@ -215,31 +216,39 @@ outputs:
         - pyarrow._hdfs
         # We can only test importing cuda package but cannot run when a
         # CUDA device is not available, for instance, when building from CI.
-        - pyarrow.cuda     # [cuda_compiler_version not in (undefined, "None")]
+        # On Windows, we cannot even do that due to `nvcuda.dll` not being found, see
+        # https://conda-forge.org/docs/maintainer/knowledge_base.html#nvcuda-dll-cannot-be-found-on-windows
+        # However, we check below for (at least) the presence of a correctly-compiled module
+        - pyarrow.cuda     # [cuda_compiler_version != "None" and not win]
       commands:
-        - test ! -f ${SP_DIR}/pyarrow/plasma-store-server       # [unix]
-        - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py       # [unix]
-        - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1  # [win]
+        - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py                         # [unix]
+        - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1                    # [win]
+        # Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y"
+        - if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1  # [win and cuda_compiler_version != "None"]
+
   - name: pyarrow-tests
     script: build-pyarrow.sh  # [not win]
     script: bld-pyarrow.bat   # [win]
     version: {{ version }}
     build:
       string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
-      skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
       ignore_run_exports:
         - cudatoolkit
       track_features:
         {{ "- arrow-cuda" if cuda_enabled else "" }}
     requirements:
       build:
-        - cmake 3.16.*
+        - python                                 # [build_platform != target_platform]
+        - cross-python_{{ target_platform }}     # [build_platform != target_platform]
+        - cython                                 # [build_platform != target_platform]
+        - numpy                                  # [build_platform != target_platform]
+        - cmake
         - ninja
         - make  # [unix]
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
         # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda
-        - {{ compiler("cuda") }}  # [cuda_compiler_version not in (undefined, "None")]
+        - {{ compiler("cuda") }}  # [cuda_compiler_version != "None"]
       host:
         - {{ pin_subpackage('arrow-cpp', exact=True) }}
         - {{ pin_subpackage('pyarrow', exact=True) }}
@@ -254,7 +263,7 @@ outputs:
         - python
       run_constrained:
         - arrow-cpp-proc * {{ build_ext }}
-        - cudatoolkit >=9.2  # [cuda_compiler_version not in (undefined, "None")]
+        - cudatoolkit >=9.2  # [cuda_compiler_version != "None"]
 
     about:
       home: http://github.com/apache/arrow
diff --git a/dev/tasks/conda-recipes/azure.linux.yml b/dev/tasks/conda-recipes/azure.linux.yml
index 161fd14e90c..79fe7ae4518 100755
--- a/dev/tasks/conda-recipes/azure.linux.yml
+++ b/dev/tasks/conda-recipes/azure.linux.yml
@@ -34,5 +34,5 @@ jobs:
       CI=azure arrow/dev/tasks/conda-recipes/run_docker_build.sh $(pwd)/build_artifacts
     displayName: Run docker build
 
-  {{ macros.azure_upload_releases("build_artifacts/linux-64/*.tar.bz2") }}
-  {{ macros.azure_upload_anaconda("build_artifacts/linux-64/*.tar.bz2") }}
+  {{ macros.azure_upload_releases("build_artifacts/*/*.tar.bz2") }}
+  {{ macros.azure_upload_anaconda("build_artifacts/*/*.tar.bz2") }}
diff --git a/dev/tasks/conda-recipes/azure.osx.yml b/dev/tasks/conda-recipes/azure.osx.yml
index 58afa8045cb..d3cbcbbb787 100755
--- a/dev/tasks/conda-recipes/azure.osx.yml
+++ b/dev/tasks/conda-recipes/azure.osx.yml
@@ -11,14 +11,6 @@ jobs:
     ARROW_VERSION: {{ arrow.no_rc_version }}
     UPLOAD_PACKAGES: False
   steps:
-  - script: |
-      echo "Removing homebrew from Azure to avoid conflicts."
-      curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/uninstall > ~/uninstall_homebrew
-      chmod +x ~/uninstall_homebrew
-      ~/uninstall_homebrew -fq
-      rm ~/uninstall_homebrew
-    displayName: Remove homebrew
-
   - bash: |
       echo "##vso[task.prependpath]$CONDA/bin"
       sudo chown -R $USER $CONDA
@@ -29,6 +21,13 @@ jobs:
       conda install -n base -c conda-forge --quiet --yes conda-forge-ci-setup=3 conda-build
     displayName: 'Add conda-forge-ci-setup=3'
 
+  - script: |
+      echo "Removing homebrew from Azure to avoid conflicts."
+      /usr/bin/sudo mangle_homebrew
+      /usr/bin/sudo -k
+    displayName: Mangle homebrew
+
+
   {{ macros.azure_checkout_arrow() }}
 
   - script: |
@@ -58,9 +57,14 @@ jobs:
 
   - script: |
       source activate base
-      conda build arrow-cpp parquet-cpp \
+      set +x
+      if [[ "${CONFIG}" == osx_arm* ]]; then
+        EXTRA_CB_OPTIONS="${EXTRA_CB_OPTIONS:-} --no-test"
+      fi
+      conda build arrow-cpp \
         -m ./.ci_support/${CONFIG}.yaml \
         --clobber-file ./.ci_support/clobber_${CONFIG}.yaml \
+        ${EXTRA_CB_OPTIONS:-} \
         --output-folder ./build_artifacts
 
       if [ ! -z "${R_CONFIG}" ]; then
@@ -71,5 +75,9 @@ jobs:
     workingDirectory: arrow/dev/tasks/conda-recipes
     displayName: Build recipes
 
-  {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-64/*.tar.bz2") }}
-  {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-64/*.tar.bz2") }}
+  - script: |
+     sudo mv /usr/local/conda_mangled/* /usr/local/
+    displayName: Unmangle homebrew
+
+  {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}
+  {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}
diff --git a/dev/tasks/conda-recipes/azure.win.yml b/dev/tasks/conda-recipes/azure.win.yml
index a3ec6931caf..422e2f0e913 100755
--- a/dev/tasks/conda-recipes/azure.win.yml
+++ b/dev/tasks/conda-recipes/azure.win.yml
@@ -73,5 +73,5 @@ jobs:
         PYTHONUNBUFFERED: 1
       condition: contains(variables['R_CONFIG'], 'win')
 
-    {{ macros.azure_upload_releases("D:\bld\win-64\*.tar.bz2")|indent(2) }}
-    {{ macros.azure_upload_anaconda("D:\bld\win-64\*.tar.bz2")|indent(2) }}
+    {{ macros.azure_upload_releases("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }}
+    {{ macros.azure_upload_anaconda("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }}
diff --git a/dev/tasks/conda-recipes/build_steps.sh b/dev/tasks/conda-recipes/build_steps.sh
index 8f1743f5946..25864c08a70 100755
--- a/dev/tasks/conda-recipes/build_steps.sh
+++ b/dev/tasks/conda-recipes/build_steps.sh
@@ -1,5 +1,9 @@
 #!/usr/bin/env bash
 
+# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI
+# 	setup. The next time this is updated to the current version on conda-forge,
+#       you will also make this additions afterwards.
+
 # PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here
 # will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent
 # changes to this script, consider a proposal to conda-smithy so that other feedstocks can also
diff --git a/dev/tasks/conda-recipes/drone-steps.sh b/dev/tasks/conda-recipes/drone-steps.sh
deleted file mode 100755
index dffdb41b088..00000000000
--- a/dev/tasks/conda-recipes/drone-steps.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-OUTPUT_DIR=$1
-QUEUE_REMOTE_URL=$2
-TASK_BRANCH=$3
-TASK_TAG=$4
-UPLOAD_TO_ANACONDA=$5
-
-conda install -y mamba
-$FEEDSTOCK_ROOT/build_steps.sh ${OUTPUT_DIR}
-
-# Upload as Github release
-mamba install -y anaconda-client shyaml -c conda-forge
-
-pushd $DRONE_WORKSPACE
-pip install -e arrow/dev/archery[crossbow]
-archery crossbow \
-  --queue-path . \
-  --queue-remote ${QUEUE_REMOTE_URL} \
-  upload-artifacts \
-  --sha ${TASK_BRANCH} \
-  --tag ${TASK_TAG} \
-  --pattern "${OUTPUT_DIR}/linux-aarch64/*.tar.bz2"
-
-if [[ "${UPLOAD_TO_ANACONDA}" == "1" ]]; then
-  anaconda -t ${CROSSBOW_ANACONDA_TOKEN} upload --force build_artifacts/linux-aarch64/*.tar.bz2
-fi
diff --git a/dev/tasks/conda-recipes/drone.yml b/dev/tasks/conda-recipes/drone.yml
deleted file mode 100644
index a461c79b9d4..00000000000
--- a/dev/tasks/conda-recipes/drone.yml
+++ /dev/null
@@ -1,43 +0,0 @@
----
-kind: pipeline
-name: {{ config }}
-
-platform:
-  os: linux
-  arch: arm64
-
-# Omit double builds with crossbow
-trigger:
-  event:
-    - push
-
-steps:
-- name: Install and build
-  image: condaforge/linux-anvil-aarch64
-  environment:
-    CONFIG: {{ config }}
-    UPLOAD_PACKAGES: False
-    ARROW_VERSION: {{ arrow.no_rc_version }}
-    PLATFORM: linux-aarch64
-    BINSTAR_TOKEN:
-      from_secret: BINSTAR_TOKEN
-    FEEDSTOCK_TOKEN:
-      from_secret: FEEDSTOCK_TOKEN
-    STAGING_BINSTAR_TOKEN:
-      from_secret: STAGING_BINSTAR_TOKEN
-    CROSSBOW_GITHUB_TOKEN:
-      from_secret: CROSSBOW_GITHUB_TOKEN
-    CROSSBOW_ANACONDA_TOKEN:
-      from_secret: CROSSBOW_ANACONDA_TOKEN
-  commands:
-    - export RECIPE_ROOT="$FEEDSTOCK_ROOT/arrow-cpp"
-    - export CI=drone
-    - export GIT_BRANCH="{{ arrow.branch }}"
-    - export FEEDSTOCK_NAME=arrow-cpp
-    - export FEEDSTOCK_ROOT="$DRONE_WORKSPACE/arrow/dev/tasks/conda-recipes"
-    - sed -i '$ichown -R conda:conda "$FEEDSTOCK_ROOT"' /opt/docker/bin/entrypoint
-    - yum install -y git
-    - git clone --no-checkout {{ arrow.remote }} arrow
-    - pushd arrow && git fetch -t {{ arrow.remote }} {{ arrow.branch }} && git checkout FETCH_HEAD && git submodule update --init --recursive && popd
-    - mkdir -p $(pwd)/build_artifacts && chmod a+rwx $(pwd)/build_artifacts
-    - /opt/docker/bin/entrypoint $FEEDSTOCK_ROOT/drone-steps.sh $(pwd)/build_artifacts {{ queue_remote_url }} {{ task.branch }} {{ task.tag }} {% if arrow.branch == 'master' %}1{% else %}0{% endif %}
diff --git a/dev/tasks/conda-recipes/run_docker_build.sh b/dev/tasks/conda-recipes/run_docker_build.sh
index 8a900379487..7645c43e2fa 100755
--- a/dev/tasks/conda-recipes/run_docker_build.sh
+++ b/dev/tasks/conda-recipes/run_docker_build.sh
@@ -1,5 +1,9 @@
 #!/usr/bin/env bash
 
+# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI
+# 	setup. The next time this is updated to the current version on conda-forge,
+#       you will also make this additions afterwards.
+
 # PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here
 # will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent
 # changes to this script, consider a proposal to conda-smithy so that other feedstocks can also
diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index 255c9ac14c4..dd347b8306c 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -23,6 +23,12 @@ jobs:
   test:
     name: Docker Test
     runs-on: ubuntu-latest
+  {% if env is defined %}
+    env:
+    {% for key, value in env.items() %}
+      {{ key }}: {{ value }}
+    {% endfor %}
+  {% endif %}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_install_archery()|indent }}
@@ -33,10 +39,17 @@ jobs:
 
       - name: Execute Docker Build
         shell: bash
-        {% if env is defined %}
-        env:
-        {% for key, value in env.items() %}
-          {{ key }}: {{ value }}
-        {% endfor %}
-        {% endif %}
-        run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ run }}
+        run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ flags|default("") }} {{ image }} {{ command|default("") }}
+
+    {% if '-r-' in image %}
+      - name: Dump R install logs
+        run: cat arrow/r/check/arrow.Rcheck/00install.out
+        continue-on-error: true
+    {% endif %}
+
+    {% if arrow.branch == 'master' %}
+      {{ macros.github_login_dockerhub()|indent }}
+      - name: Push Docker Image
+        shell: bash
+        run: archery docker push {{ image }}
+    {% endif %}
diff --git a/dev/tasks/gandiva-jars/README.md b/dev/tasks/gandiva-jars/README.md
deleted file mode 100644
index 2f4c694d799..00000000000
--- a/dev/tasks/gandiva-jars/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Updating manylinux for Gandiva Jar Build.
-
-Do the following to update arrow manylinux docker image for building Gandiva Jars
-
-- Install java in the manylinux image.
-- To do above, update Dockerfile-x86_64_base under python/manylinux1 to install java.
-- Please note only upto java7 is available in CentOS5, so install java7 in the base.
-- Export JAVA_HOME environment variable.
-- Then update build_boost.sh under python/manylinux1/scripts to build boost statically.
-
-Please look at https://github.com/praveenbingo/arrow/tree/buildGandivaDocker that already has these changes.
\ No newline at end of file
diff --git a/dev/tasks/gandiva-jars/build-cpp-linux.sh b/dev/tasks/gandiva-jars/build-cpp-linux.sh
deleted file mode 100755
index 42651739f84..00000000000
--- a/dev/tasks/gandiva-jars/build-cpp-linux.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-source /multibuild/manylinux_utils.sh
-
-# Quit on failure
-set -e
-
-PYTHON_VERSION=3.6
-CPYTHON_PATH="$(cpython_path ${PYTHON_VERSION})"
-PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python"
-PIP="${CPYTHON_PATH}/bin/pip"
-
-ARROW_BUILD_DIR=/tmp/arrow-build
-mkdir -p "${ARROW_BUILD_DIR}"
-pushd "${ARROW_BUILD_DIR}"
-
-PATH="${CPYTHON_PATH}/bin:${PATH}"
-export ARROW_TEST_DATA="/arrow/testing/data"
-
-cmake -DCMAKE_BUILD_TYPE=Release \
-    -DARROW_DEPENDENCY_SOURCE="SYSTEM" \
-    -DZLIB_ROOT=/usr/local \
-    -DCMAKE_INSTALL_PREFIX=/arrow-dist \
-    -DCMAKE_INSTALL_LIBDIR=lib \
-    -DARROW_BUILD_TESTS=ON \
-    -DARROW_BUILD_SHARED=ON \
-    -DARROW_BOOST_USE_SHARED=OFF \
-    -DARROW_PROTOBUF_USE_SHARED=OFF \
-    -DARROW_OPENSSL_USE_SHARED=OFF \
-    -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \
-    -DARROW_JEMALLOC=ON \
-    -DARROW_RPATH_ORIGIN=ON \
-    -DARROW_PYTHON=OFF \
-    -DARROW_PARQUET=OFF \
-    -DARROW_DATASET=OFF \
-    -DARROW_FILESYSTEM=OFF \
-    -DPARQUET_BUILD_ENCRYPTION=OFF \
-    -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
-    -DARROW_GANDIVA=ON \
-    -DARROW_GANDIVA_JAVA=ON \
-    -DARROW_GANDIVA_JAVA7=ON \
-    -DBoost_NAMESPACE=arrow_boost \
-    -Dgflags_SOURCE=BUNDLED \
-    -DRapidJSON_SOURCE=BUNDLED \
-    -DRE2_SOURCE=BUNDLED \
-    -DARROW_BUILD_UTILITIES=OFF \
-    -DBoost_NAMESPACE=arrow_boost \
-    -DBOOST_ROOT=/arrow_boost_dist \
-    -GNinja /arrow/cpp
-ninja install
-CTEST_OUTPUT_ON_FAILURE=1 ninja test
-popd
-
-
-# copy the library to distribution
-cp -L  /arrow-dist/lib/libgandiva_jni.so /arrow/dist
diff --git a/dev/tasks/gandiva-jars/build-cpp-osx.sh b/dev/tasks/gandiva-jars/build-cpp-osx.sh
deleted file mode 100755
index cc6ab246d96..00000000000
--- a/dev/tasks/gandiva-jars/build-cpp-osx.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-set -x
-
-# Builds arrow + gandiva and tests the same.
-pushd cpp
-  mkdir build
-  pushd build
-    CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Release \
-          -DARROW_GANDIVA=ON \
-          -DARROW_GANDIVA_JAVA=ON \
-          -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
-          -DARROW_BUILD_TESTS=ON \
-          -DARROW_BUILD_UTILITIES=OFF \
-          -DPARQUET_BUILD_ENCRYPTION=OFF \
-          -DARROW_PARQUET=OFF \
-          -DARROW_FILESYSTEM=OFF \
-          -DARROW_DATASET=OFF \
-          -DARROW_BOOST_USE_SHARED=OFF \
-          -DARROW_PROTOBUF_USE_SHARED=OFF \
-          -DARROW_GFLAGS_USE_SHARED=OFF \
-          -DARROW_OPENSSL_USE_SHARED=OFF"
-
-    cmake $CMAKE_FLAGS ..
-    make -j4
-    ctest
-
-    cp -L release/libgandiva_jni.dylib $GITHUB_WORKSPACE/arrow/dist
-  popd
-popd
diff --git a/dev/tasks/gandiva-jars/build-java.sh b/dev/tasks/gandiva-jars/build-java.sh
deleted file mode 100755
index 7dec07115a3..00000000000
--- a/dev/tasks/gandiva-jars/build-java.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-CPP_BUILD_DIR=$GITHUB_WORKSPACE/arrow/dist/
-
-pushd java
-  if [[ $OS_NAME == "linux" ]]; then
-    SO_DEP=ldd
-    GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.so
-    WHITELIST=(linux-vdso libz librt libdl libpthread libstdc++ libm libgcc_s libc ld-linux-x86-64)
-  else
-    SO_DEP="otool -L"
-    GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.dylib
-    WHITELIST=(libgandiva_jni libz libncurses libSystem libc++)
-  fi
-
-  # print the shared library dependencies
-  eval "$SO_DEP" "$GANDIVA_LIB"
-
-  if [[ $CHECK_SHARED_DEPENDENCIES ]] ; then
-    # exit if any shared library not in whitelisted set is found
-    echo "Checking shared dependencies"
-    while read -r line
-    do
-      found=false
-      for item in "${WHITELIST[@]}"
-      do
-        if [[ "$line" == *"$item"* ]] ; then
-            found=true
-        fi
-      done
-      if [[ "$found" == false ]] ; then
-        echo "Unexpected shared dependency found"
-        exit 1
-      fi
-    done < <(eval "$SO_DEP" "$GANDIVA_LIB" | awk '{print $1}')
-  fi
-
-  # build the entire project
-  mvn clean install -q -DskipTests -P arrow-jni -Darrow.cpp.build.dir=$CPP_BUILD_DIR
-  # test only gandiva
-  mvn test -q -P arrow-jni -pl gandiva -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR
-
-  # copy the jars to distribution folder
-  find gandiva/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $CPP_BUILD_DIR \;
-popd
diff --git a/dev/tasks/gandiva-jars/github.linux.yml b/dev/tasks/gandiva-jars/github.linux.yml
deleted file mode 100644
index aabcdbee0ef..00000000000
--- a/dev/tasks/gandiva-jars/github.linux.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-jobs:
-  package:
-    name: Package Gandiva
-    runs-on: ubuntu-18.04
-    steps:
-      - name: Checkout Arrow
-        run: |
-          git clone --no-checkout {{ arrow.remote }} arrow
-          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
-          git -C arrow submodule update --init --recursive
-      - name: Build Gandiva
-        run: |
-          python3 -VV
-          cd arrow
-          mkdir -p dist
-          export CC="gcc-4.9" CXX="g++-4.9"
-          ulimit -c unlimited -S
-          set -e
-          docker run -v $PWD:/arrow quay.io/anthonylouisbsb/arrow:gandivadocker /arrow/dev/tasks/gandiva-jars/build-cpp-linux.sh
-          dev/tasks/gandiva-jars/build-java.sh
-        env:
-          OS_NAME: "linux"
-          CHECK_SHARED_DEPENDENCIES: true
-
-      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
diff --git a/dev/tasks/gandiva-jars/github.osx.yml b/dev/tasks/gandiva-jars/github.osx.yml
deleted file mode 100644
index 3dd6fe46bb6..00000000000
--- a/dev/tasks/gandiva-jars/github.osx.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-jobs:
-  package:
-    name: Package Gandiva
-    runs-on: macos-latest
-    steps:
-      - name: Checkout Arrow
-        run: |
-          git clone --no-checkout {{ arrow.remote }} arrow
-          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
-          git -C arrow submodule update --init --recursive
-      - name: Build Gandiva
-        run: |
-          cd arrow
-          mkdir -p dist
-          export ARROW_TEST_DATA=$PWD/testing/data
-          set -e
-          dev/tasks/gandiva-jars/build-cpp-osx.sh
-          dev/tasks/gandiva-jars/build-java.sh
-        env:
-          OS_NAME: "osx"
-          CHECK_SHARED_DEPENDENCIES: true
-          MACOSX_DEPLOYMENT_TARGET: "10.11"
-
-      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb
index 953f1eea1c4..ca3f83174ca 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -1,7 +1,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-4.0.0-SNAPSHOT/apache-arrow-4.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-6.0.0-SNAPSHOT/apache-arrow-6.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git"
diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
index 351d7764603..1d257e6c778 100644
--- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
@@ -19,7 +19,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-3.0.0.9000/apache-arrow-3.0.0.9000.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-5.0.0.9000/apache-arrow-5.0.0.9000.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   head "https://github.com/apache/arrow.git"
 
diff --git a/dev/tasks/java-jars/README.md b/dev/tasks/java-jars/README.md
new file mode 100644
index 00000000000..1d61662d44a
--- /dev/null
+++ b/dev/tasks/java-jars/README.md
@@ -0,0 +1,29 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Jars.
+
+This directory is responsible to generate the jar files for the Arrow components that depend on C++ shared libraries to execute.
+
+The Arrow C++ libraries are compiled both on MacOS and Linux distributions, with their dependencies linked statically, and they are added
+in the jars at the end, so the file can be used on both systems.
+
+## Linux Docker Image
+To compile the C++ libraries in Linux, a docker image is used. 
+It is created used the **ci/docker/java-bundled-jars.dockerfile** file. 
+If it is necessary to add any new dependency, you need to change that file.
\ No newline at end of file
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
new file mode 100644
index 00000000000..e2372c56eb0
--- /dev/null
+++ b/dev/tasks/java-jars/github.yml
@@ -0,0 +1,102 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+
+  build-cpp-ubuntu:
+    name: Build C++ Libs Ubuntu
+    runs-on: ubuntu-18.04
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+      {{ macros.github_install_archery()|indent }}
+      - name: Build C++ Libs
+        run: archery docker run java-jni-manylinux-2014
+      - name: Compress into single artifact
+        run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java/dist/
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: ubuntu-shared-lib
+          path: arrow-shared-libs-linux.tar.gz
+    {% if arrow.branch == 'master' %}
+      {{ macros.github_login_dockerhub()|indent }}
+      - name: Push Docker Image
+        shell: bash
+        run: archery docker push java-jni-manylinux-2014
+    {% endif %}
+
+  build-cpp-macos:
+    name: Build C++ Libs MacOS
+    runs-on: macos-latest
+    env:
+      MACOSX_DEPLOYMENT_TARGET: "10.11"
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+      {{ macros.github_install_archery()|indent }}
+      - name: Build C++ Libs
+        run: |
+          set -e
+          arrow/ci/scripts/java_jni_macos_build.sh \
+            $GITHUB_WORKSPACE/arrow \
+            $GITHUB_WORKSPACE/arrow/cpp-build \
+            $GITHUB_WORKSPACE/arrow/java/dist
+      - name: Compress into single artifact
+        run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/java/dist/
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: macos-shared-lib
+          path: arrow-shared-libs-macos.tar.gz
+
+  package-jars:
+    name: Build Jar Files
+    runs-on: macos-latest
+    needs: [build-cpp-macos, build-cpp-ubuntu]
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+      - name: Download Linux C++ Libraries
+        uses: actions/download-artifact@v2
+        with:
+          name: ubuntu-shared-lib
+      - name: Download MacOS C++ Library
+        uses: actions/download-artifact@v2
+        with:
+          name: macos-shared-lib
+      - name: Descompress artifacts
+        run: |
+          tar -xvzf arrow-shared-libs-macos.tar.gz
+          tar -xvzf arrow-shared-libs-linux.tar.gz
+      - name: Test that Shared Libraries Exist
+        run: |
+          test -f arrow/java/dist/libarrow_dataset_jni.dylib
+          test -f arrow/java/dist/libgandiva_jni.dylib
+          test -f arrow/java/dist/libarrow_orc_jni.dylib
+          test -f arrow/java/dist/libarrow_dataset_jni.so
+          test -f arrow/java/dist/libarrow_orc_jni.so
+          test -f arrow/java/dist/libgandiva_jni.so
+      - name: Build Bundled Jar
+        run: |
+          set -e
+          arrow/ci/scripts/java_jni_build.sh \
+            $GITHUB_WORKSPACE/arrow \
+            $GITHUB_WORKSPACE/arrow/java/dist \
+            $GITHUB_WORKSPACE/arrow/java/dist
+      {{ macros.github_upload_releases(["arrow/java/dist/*.jar", "arrow/java/dist/*.pom"])|indent }}
diff --git a/dev/tasks/linux-packages/Rakefile b/dev/tasks/linux-packages/Rakefile
index e45a56c8bb1..13a15877a1e 100644
--- a/dev/tasks/linux-packages/Rakefile
+++ b/dev/tasks/linux-packages/Rakefile
@@ -24,7 +24,7 @@ require_relative "helper"
 
 packages = [
   "apache-arrow",
-  "apache-arrow-archive-keyring",
+  "apache-arrow-apt-source",
   "apache-arrow-release",
 ]
 
@@ -109,6 +109,7 @@ class LocalBinaryTask < BinaryTask
 
   def resolve_docker_image(target)
     image = ""
+    target = target.gsub(/\Aamazon-linux/, "amazonlinux")
     case target
     when /-(?:arm64|aarch64)\z/
       target = $PREMATCH
@@ -164,8 +165,8 @@ class LocalBinaryTask < BinaryTask
       # "ubuntu-bionic-arm64",
       "ubuntu-focal",
       # "ubuntu-focal-arm64",
-      "ubuntu-groovy",
-      # "ubuntu-groovy-arm64",
+      "ubuntu-hirsute",
+      # "ubuntu-hirsute-arm64",
     ]
   end
 
@@ -203,6 +204,8 @@ class LocalBinaryTask < BinaryTask
     # Disable aarch64 targets by default for now
     # because they require some setups on host.
     [
+      "amazon-linux-2",
+      # "amazon-linux-2-aarch64",
       "centos-7",
       "centos-8",
       # "centos-8-aarch64",
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile b/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile
new file mode 100644
index 00000000000..210fa951ee4
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile
@@ -0,0 +1,64 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "../helper"
+require_relative "../package-task"
+
+class ApacheArrowAptSourcePackageTask < PackageTask
+  include Helper::ApacheArrow
+
+  def initialize
+    release_time = detect_release_time
+    super("apache-arrow-apt-source",
+          detect_version(release_time),
+          release_time,
+          :rc_build_type => :release)
+  end
+
+  private
+  def define_archive_task
+    file @archive_name do
+      rm_rf(@archive_base_name)
+      mkdir(@archive_base_name)
+      download("https://downloads.apache.org/arrow/KEYS",
+               "#{@archive_base_name}/KEYS")
+      sh("tar", "czf", @archive_name, @archive_base_name)
+      rm_rf(@archive_base_name)
+    end
+
+    if deb_archive_name != @archive_name
+      file deb_archive_name => @archive_name do
+        if @archive_base_name == deb_archive_base_name
+          cp(@archive_name, deb_archive_name)
+        else
+          sh("tar", "xf", @archive_name)
+          mv(@archive_base_name, deb_archive_base_name)
+          sh("tar", "czf", deb_archive_name, deb_archive_base_name)
+        end
+      end
+    end
+  end
+
+  def enable_yum?
+    false
+  end
+end
+
+task = ApacheArrowAptSourcePackageTask.new
+task.define
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-bullseye/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-buster/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-buster/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-bionic/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-focal/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile
new file mode 100644
index 00000000000..8b6fd7f0ec9
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:hirsute
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    debhelper \
+    devscripts \
+    fakeroot \
+    gnupg \
+    lsb-release && \
+  apt clean && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/compat b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/compat
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
similarity index 77%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
index 3855234a1bb..f54d52f98a2 100644
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
@@ -1,4 +1,4 @@
-Source: apache-arrow-archive-keyring
+Source: apache-arrow-apt-source
 Section: misc
 Priority: important
 Maintainer: Apache Arrow Developers <dev@arrow.apache.org>
@@ -9,9 +9,11 @@ Build-Depends:
 Standards-Version: 3.9.7
 Homepage: https://arrow.apache.org/
 
-Package: apache-arrow-archive-keyring
+Package: apache-arrow-apt-source
 Section: misc
 Architecture: all
+Replaces: apache-arrow-archive-keyring
+Breaks: apache-arrow-archive-keyring
 Depends:
   ${misc:Depends},
   apt-transport-https,
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/copyright b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/copyright
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
similarity index 75%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
index ec0e386b130..bf7a85c8c8b 100755
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
@@ -12,22 +12,22 @@ export DH_OPTIONS
 override_dh_auto_build:
 	gpg \
 	  --no-default-keyring \
-	  --keyring ./apache-arrow-archive-keyring.gpg \
+	  --keyring ./apache-arrow-apt-source.gpg \
 	  --import KEYS
 
 	( \
 	  distribution=$$(lsb_release --id --short | tr 'A-Z' 'a-z'); \
 	  code_name=$$(lsb_release --codename --short); \
 	  echo "Types: deb deb-src"; \
-	  echo "URIs: https://apache.bintray.com/arrow/$${distribution}/"; \
+	  echo "URIs: https://apache.jfrog.io/artifactory/arrow/$${distribution}/"; \
 	  echo "Suites: $${code_name}"; \
 	  echo "Components: main"; \
-	  echo "Signed-By: /usr/share/keyrings/apache-arrow-archive-keyring.gpg"; \
+	  echo "Signed-By: /usr/share/keyrings/apache-arrow-apt-source.gpg"; \
 	) > apache-arrow.sources
 
 override_dh_install:
 	install -d debian/tmp/usr/share/keyrings/
-	install -m 0644 apache-arrow-archive-keyring.gpg \
+	install -m 0644 apache-arrow-apt-source.gpg \
 	  debian/tmp/usr/share/keyrings/
 
 	install -d debian/tmp/etc/apt/sources.list.d/
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/source/format b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/source/format
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/Rakefile b/dev/tasks/linux-packages/apache-arrow-archive-keyring/Rakefile
deleted file mode 100644
index 0f91e0a5eb1..00000000000
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/Rakefile
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- ruby -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-require_relative "../helper"
-require_relative "../package-task"
-
-class ApacheArrowArchiveKeyringPackageTask < PackageTask
-  include Helper::ApacheArrow
-
-  def initialize
-    release_time = detect_release_time
-    super("apache-arrow-archive-keyring",
-          detect_version(release_time),
-          release_time,
-          :rc_build_type => :release)
-  end
-
-  private
-  def define_archive_task
-    file @archive_name do
-      rm_rf(@archive_base_name)
-      mkdir(@archive_base_name)
-      download("https://downloads.apache.org/arrow/KEYS",
-               "#{@archive_base_name}/KEYS")
-      sh("tar", "czf", @archive_name, @archive_base_name)
-      rm_rf(@archive_base_name)
-    end
-
-    if deb_archive_name != @archive_name
-      file deb_archive_name => @archive_name do
-        if @archive_base_name == deb_archive_base_name
-          cp(@archive_name, deb_archive_name)
-        else
-          sh("tar", "xf", @archive_name)
-          mv(@archive_base_name, deb_archive_base_name)
-          sh("tar", "czf", deb_archive_name, deb_archive_base_name)
-        end
-      end
-    end
-  end
-
-  def enable_yum?
-    false
-  end
-end
-
-task = ApacheArrowArchiveKeyringPackageTask.new
-task.define
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-groovy/Dockerfile
deleted file mode 100644
index 7efd5d1df32..00000000000
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-groovy/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu:groovy
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    debhelper \
-    devscripts \
-    fakeroot \
-    gnupg \
-    lsb-release && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-xenial/Dockerfile b/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-xenial/Dockerfile
deleted file mode 100644
index e05843081ee..00000000000
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-xenial/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu:xenial
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    debhelper \
-    devscripts \
-    fakeroot \
-    gnupg \
-    lsb-release && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/changelog b/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/changelog
deleted file mode 100644
index 22fba76301e..00000000000
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/changelog
+++ /dev/null
@@ -1,29 +0,0 @@
-apache-arrow-archive-keyring (3.0.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Mon, 18 Jan 2021 21:33:18 -0000
-
-apache-arrow-archive-keyring (2.0.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Mon, 12 Oct 2020 23:38:01 -0000
-
-apache-arrow-archive-keyring (1.0.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Mon, 20 Jul 2020 20:41:07 -0000
-
-apache-arrow-archive-keyring (0.17.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Thu, 16 Apr 2020 12:05:43 -0000
-
-apache-arrow-archive-keyring (0.16.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Thu, 30 Jan 2020 20:21:44 -0000
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
index 16a6d742fb7..060a935e3b8 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
@@ -16,22 +16,22 @@
 # under the License.
 
 [apache-arrow-amazon-linux]
-name=Apache Arrow for Amazon Linux 2 - $basearch
-baseurl=https://apache.bintray.com/arrow/centos/7/$basearch/
+name=Apache Arrow for Amazon Linux $releasever - $basearch
+baseurl=https://apache.jfrog.io/artifactory/arrow/amazon-linux/$releasever/$basearch/
 gpgcheck=1
-enabled=1
+enabled=0
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
 
 [apache-arrow-centos]
 name=Apache Arrow for CentOS $releasever - $basearch
-baseurl=https://apache.bintray.com/arrow/centos/$releasever/$basearch/
+baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
 gpgcheck=1
-enabled=1
+enabled=0
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
 
 [apache-arrow-rhel]
 name=Apache Arrow for RHEL $releasever - $basearch
-baseurl=https://apache.bintray.com/arrow/centos/$releasever/$basearch/
+baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
 gpgcheck=1
-enabled=1
+enabled=0
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile b/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile
new file mode 100644
index 00000000000..800df6c68e0
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM amazonlinux:2
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+  yum install -y ${quiet} \
+    rpmdevtools && \
+  yum clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index 9f546569e86..cab546957d8 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -17,9 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-%define _centos_ver %{?centos_ver:%{centos_ver}}%{!?centos_ver:8}
-
-%define use_dnf (%{_centos_ver} >= 8)
+%define use_dnf (%{rhel} >= 8)
 %if %{use_dnf}
 %define yum_repository_enable() (dnf config-manager --set-enabled %1)
 %define yum_repository_disable() (dnf config-manager --set-disabled %1)
@@ -53,12 +51,15 @@ Apache Arrow release files.
 %setup -q
 
 %build
-# We use distribution version explicitly because we can't use symbolic link
-# on Bintray. CentOS uses 7 and 8 but RHEL uses 7Server and 8Server
-# for $releasever. If we can use symbolic link on Bintray, we can use
-# $releasever directly.
-distribution_version=$(cut -d: -f5 /etc/system-release-cpe)
-sed -i'' -e "s/\\\$releasever/${distribution_version}/g" Apache-Arrow.repo
+distribution=$(. /etc/os-release && echo "${ID}")
+if [ "${distribution}" = "rhel" ]; then
+  # We use distribution version explicitly for RHEL because we can't
+  # use symbolic link on Artifactory. CentOS uses 7 and 8 but RHEL uses
+  # 7Server and 8Server for $releasever. If we can use symbolic link
+  # on Artifactory we can use $releasever directly.
+  distribution_version=$(. /etc/os-release && echo "${VERSION_ID}")
+  sed -i'' -e "s/\\\$releasever/${distribution_version}/g" Apache-Arrow.repo
+fi
 
 %install
 rm -rf $RPM_BUILD_ROOT
@@ -81,16 +82,10 @@ rm -rf $RPM_BUILD_ROOT
 %post
 if grep -q 'Amazon Linux release 2' /etc/system-release 2>/dev/null; then
   %{yum_repository_enable apache-arrow-amazon-linux}
-  %{yum_repository_disable apache-arrow-centos}
-  %{yum_repository_disable apache-arrow-rhel}
 elif grep -q 'Red Hat Enterprise Linux' /etc/system-release 2>/dev/null; then
-  %{yum_repository_disable apache-arrow-amazon-linux}
-  %{yum_repository_disable apache-arrow-centos}
   %{yum_repository_enable apache-arrow-rhel}
 else
-  %{yum_repository_disable apache-arrow-amazon-linux}
   %{yum_repository_enable apache-arrow-centos}
-  %{yum_repository_disable apache-arrow-rhel}
 fi
 
 %changelog
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
index 0396593d7d5..236b0e2972d 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
@@ -21,8 +21,6 @@ ARG DEBUG
 
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
-  yum install -y ${quiet} epel-release && \
   yum install -y ${quiet} \
-    rpm-build \
     rpmdevtools && \
   yum clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
index c2131bf8412..e368506302e 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
@@ -21,8 +21,6 @@ ARG DEBUG
 
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
-  dnf install -y ${quiet} epel-release && \
   dnf install --enablerepo=powertools -y ${quiet} \
-    rpm-build \
     rpmdevtools && \
   dnf clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile
index d4848e417be..f401a06d64c 100644
--- a/dev/tasks/linux-packages/apache-arrow/Rakefile
+++ b/dev/tasks/linux-packages/apache-arrow/Rakefile
@@ -84,8 +84,6 @@ class ApacheArrowPackageTask < PackageTask
   end
 
   def apt_arm64_cuda_available_target?(target)
-    # ubuntu-20.10 has navidia-cuda-toolkit but not libcuda1.
-    # ubuntu-21.04 may support this.
     false
   end
 
@@ -108,10 +106,43 @@ class ApacheArrowPackageTask < PackageTask
     control.gsub(/@USE_SYSTEM_GRPC@/, use_system_grpc)
   end
 
+  def apt_prepare_debian_control_c_ares(control, target)
+    case target
+    when /\Aubuntu-bionic/
+      use_system_c_ares = "#"
+    else
+      use_system_c_ares = ""
+    end
+    control.gsub(/@USE_SYSTEM_C_ARES@/, use_system_c_ares)
+  end
+
+  def apt_prepare_debian_control_utf8proc(control, target)
+    case target
+    when /\Aubuntu-bionic/
+      use_system_utf8proc = "#"
+    else
+      use_system_utf8proc = ""
+    end
+    control.gsub(/@USE_SYSTEM_UTF8PROC@/, use_system_utf8proc)
+  end
+
+  def apt_prepare_debian_control_zstd(control, target)
+    case target
+    when /\Adebian-buster/, /\Aubuntu-bionic/
+      use_system_zstd = "#"
+    else
+      use_system_zstd = ""
+    end
+    control.gsub(/@USE_SYSTEM_ZSTD@/, use_system_zstd)
+  end
+
   def apt_prepare_debian_control(control_in, target)
     control = control_in.dup
     control = apt_prepare_debian_control_cuda_architecture(control, target)
     control = apt_prepare_debian_control_grpc(control, target)
+    control = apt_prepare_debian_control_c_ares(control, target)
+    control = apt_prepare_debian_control_utf8proc(control, target)
+    control = apt_prepare_debian_control_zstd(control, target)
     control
   end
 end
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
index fa4961bc97e..b35af302db8 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=debian:bullseye
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
@@ -37,7 +35,7 @@ RUN \
   apt install -y -V ${quiet} \
     build-essential \
     ccache \
-    clang-11 \
+    clang \
     cmake \
     debhelper \
     devscripts \
@@ -56,13 +54,15 @@ RUN \
     libgrpc++-dev \
     libgtest-dev \
     liblz4-dev \
+    libprotoc-dev \
+    libprotobuf-dev \
     libre2-dev \
     libsnappy-dev \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
     libzstd-dev \
-    llvm-11-dev \
+    llvm-dev \
     lsb-release \
     ninja-build \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
index 5dcc1b46b2d..11a33a1300a 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=debian:buster
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
@@ -63,7 +61,6 @@ RUN \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
-    libzstd-dev \
     lsb-release \
     ninja-build \
     pkg-config \
@@ -74,8 +71,8 @@ RUN \
     tzdata \
     zlib1g-dev && \
   apt install -y -V -t buster-backports ${quiet} \
-    clang-8 \
-    llvm-8-dev && \
+    clang-11 \
+    llvm-11-dev && \
   if apt list | grep '^nvidia-cuda-toolkit/'; then \
     apt install -y -V ${quiet} nvidia-cuda-toolkit; \
   fi && \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
index 60be9295194..ac0c6a58d62 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=ubuntu:bionic
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
@@ -54,8 +52,6 @@ RUN \
     libre2-dev \
     libsnappy-dev \
     libssl-dev \
-    libutf8proc-dev \
-    libzstd-dev \
     llvm-10-dev \
     lsb-release \
     ninja-build \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
index ad83bfa9002..112cc1846bc 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=ubuntu:focal
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
@@ -35,7 +33,7 @@ RUN \
   apt install -y -V ${quiet} \
     build-essential \
     ccache \
-    clang-10 \
+    clang \
     cmake \
     debhelper \
     devscripts \
@@ -45,6 +43,7 @@ RUN \
     libboost-system-dev \
     libbrotli-dev \
     libbz2-dev \
+    libc-ares-dev \
     libcurl4-openssl-dev \
     libgirepository1.0-dev \
     libglib2.0-doc \
@@ -58,7 +57,7 @@ RUN \
     libthrift-dev \
     libutf8proc-dev \
     libzstd-dev \
-    llvm-10-dev \
+    llvm-dev \
     lsb-release \
     ninja-build \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
deleted file mode 100644
index d60e6320e36..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG FROM=ubuntu:groovy
-FROM ${FROM}
-
-COPY qemu-* /usr/bin/
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    ccache \
-    clang-11 \
-    cmake \
-    debhelper \
-    devscripts \
-    git \
-    gtk-doc-tools \
-    libboost-filesystem-dev \
-    libboost-system-dev \
-    libbrotli-dev \
-    libbz2-dev \
-    libcurl4-openssl-dev \
-    libgirepository1.0-dev \
-    libglib2.0-doc \
-    libgmock-dev \
-    libgoogle-glog-dev \
-    libgrpc++-dev \
-    libgtest-dev \
-    liblz4-dev \
-    libre2-dev \
-    libsnappy-dev \
-    libssl-dev \
-    libthrift-dev \
-    libutf8proc-dev \
-    libzstd-dev \
-    llvm-11-dev \
-    lsb-release \
-    ninja-build \
-    pkg-config \
-    protobuf-compiler-grpc \
-    python3-dev \
-    python3-numpy \
-    python3-pip \
-    python3-setuptools \
-    rapidjson-dev \
-    tzdata \
-    zlib1g-dev && \
-  ! apt list | grep -q '^libcuda1' || \
-    apt install -y -V ${quiet} nvidia-cuda-toolkit && \
-  apt clean && \
-  python3 -m pip install --no-use-pep517 meson && \
-  ln -s /usr/local/bin/meson /usr/bin/ && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
similarity index 97%
rename from dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy-arm64/from
rename to dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
index d1f6aa9a854..f19ea9022e5 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy-arm64/from
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
@@ -15,4 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-arm64v8/ubuntu:groovy
+arm64v8/ubuntu:hirsute
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
new file mode 100644
index 00000000000..7e26d3eb2b0
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:hirsute
+FROM ${FROM}
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    ccache \
+    clang \
+    cmake \
+    debhelper \
+    devscripts \
+    git \
+    gtk-doc-tools \
+    libboost-filesystem-dev \
+    libboost-system-dev \
+    libbrotli-dev \
+    libbz2-dev \
+    libc-ares-dev \
+    libcurl4-openssl-dev \
+    libgirepository1.0-dev \
+    libglib2.0-doc \
+    libgmock-dev \
+    libgoogle-glog-dev \
+    libgrpc++-dev \
+    libgtest-dev \
+    liblz4-dev \
+    libprotoc-dev \
+    libprotobuf-dev \
+    libre2-dev \
+    libsnappy-dev \
+    libssl-dev \
+    libthrift-dev \
+    libutf8proc-dev \
+    libzstd-dev \
+    llvm-dev \
+    lsb-release \
+    ninja-build \
+    pkg-config \
+    protobuf-compiler-grpc \
+    python3-dev \
+    python3-numpy \
+    python3-pip \
+    python3-setuptools \
+    rapidjson-dev \
+    tzdata \
+    zlib1g-dev && \
+  if apt list | grep -q '^libcuda1'; then \
+    apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+  else \
+    :; \
+  fi && \
+  apt clean && \
+  python3 -m pip install --no-use-pep517 meson && \
+  ln -s /usr/local/bin/meson /usr/bin/ && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index f50b09e6043..b6d849719a1 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -12,17 +12,20 @@ Build-Depends:
   libboost-system-dev,
   libbrotli-dev,
   libbz2-dev,
+@USE_SYSTEM_C_ARES@  libc-ares-dev,
   libcurl4-openssl-dev,
   libgirepository1.0-dev,
   libgoogle-glog-dev,
 @USE_SYSTEM_GRPC@  libgrpc++-dev,
   libgtest-dev,
   liblz4-dev,
+@USE_SYSTEM_GRPC@  libprotoc-dev,
+@USE_SYSTEM_GRPC@  libprotobuf-dev,
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
-  libutf8proc-dev,
-  libzstd-dev,
+@USE_SYSTEM_UTF8PROC@  libutf8proc-dev,
+@USE_SYSTEM_ZSTD@  libzstd-dev,
   ninja-build,
   nvidia-cuda-toolkit [!arm64],
   pkg-config,
@@ -35,7 +38,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.8
 Homepage: https://arrow.apache.org/
 
-Package: libarrow400
+Package: libarrow600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -47,7 +50,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files.
 
-Package: libarrow-cuda400
+Package: libarrow-cuda600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -55,12 +58,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-dataset400
+Package: libarrow-dataset600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -68,13 +71,13 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version}),
-  libparquet400 (= ${binary:Version})
+  libarrow600 (= ${binary:Version}),
+  libparquet600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Dataset module.
 
-Package: libarrow-flight400
+Package: libarrow-flight600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -82,12 +85,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight RPC system.
 
-Package: libarrow-python400
+Package: libarrow-python600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -95,14 +98,14 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version}),
+  libarrow600 (= ${binary:Version}),
   python3,
   python3-numpy
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Python support.
 
-Package: libarrow-python-flight400
+Package: libarrow-python-flight600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -110,8 +113,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight400 (= ${binary:Version}),
-  libarrow-python400 (= ${binary:Version})
+  libarrow-flight600 (= ${binary:Version}),
+  libarrow-python600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight and Python support.
@@ -122,16 +125,17 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow400 (= ${binary:Version}),
+  libarrow600 (= ${binary:Version}),
   libbrotli-dev,
   libbz2-dev,
+@USE_SYSTEM_C_ARES@  libc-ares-dev,
 @USE_SYSTEM_GRPC@  libgrpc++-dev,
   liblz4-dev,
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
-  libutf8proc-dev,
-  libzstd-dev,
+@USE_SYSTEM_UTF8PROC@  libutf8proc-dev,
+@USE_SYSTEM_ZSTD@  libzstd-dev,
 @USE_SYSTEM_GRPC@  protobuf-compiler-grpc,
   zlib1g-dev
 Description: Apache Arrow is a data processing library for analysis
@@ -145,7 +149,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda400 (= ${binary:Version})
+  libarrow-cuda600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -157,7 +161,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-dataset400 (= ${binary:Version}),
+  libarrow-dataset600 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -170,7 +174,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-flight400 (= ${binary:Version})
+  libarrow-flight600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight RPC system.
@@ -182,7 +186,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-python400 (= ${binary:Version})
+  libarrow-python600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Python support.
@@ -195,12 +199,12 @@ Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
   libarrow-python-dev (= ${binary:Version}),
-  libarrow-python-flight400 (= ${binary:Version})
+  libarrow-python-flight600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight and Python support.
 
-Package: libgandiva400
+Package: libgandiva600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -208,7 +212,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -221,13 +225,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva400 (= ${binary:Version})
+  libgandiva600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libplasma400
+Package: libplasma600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -235,7 +239,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda400 (= ${binary:Version})
+  libarrow-cuda600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ library files to connect plasma-store-server.
@@ -247,7 +251,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libplasma400 (= ${binary:Version})
+  libplasma600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides plasma-store-server.
@@ -259,12 +263,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
-  libplasma400 (= ${binary:Version})
+  libplasma600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ header files.
 
-Package: libparquet400
+Package: libparquet600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -283,12 +287,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet400 (= ${binary:Version})
+  libparquet600 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib400
+Package: libarrow-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -296,7 +300,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -320,7 +324,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-dev (= ${binary:Version}),
-  libarrow-glib400 (= ${binary:Version}),
+  libarrow-glib600 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -338,7 +342,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib400
+Package: libarrow-cuda-glib600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -346,8 +350,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libarrow-cuda400 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libarrow-cuda600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -371,13 +375,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib400 (= ${binary:Version}),
+  libarrow-cuda-glib600 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libarrow-dataset-glib400
+Package: libarrow-dataset-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -385,8 +389,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libarrow-dataset400 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libarrow-dataset600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for dataset module.
@@ -410,7 +414,7 @@ Depends:
   ${misc:Depends},
   libarrow-dataset-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-dataset-glib400 (= ${binary:Version}),
+  libarrow-dataset-glib600 (= ${binary:Version}),
   gir1.2-arrow-dataset-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -427,7 +431,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for dataset module.
 
-Package: libgandiva-glib400
+Package: libarrow-flight-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -435,8 +439,59 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libgandiva400 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libarrow-flight600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based library files for Apache Arrow Flight.
+
+Package: gir1.2-arrow-flight-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${gir:Depends},
+  ${misc:Depends}
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GObject Introspection typelib files for Apache Arrow
+ Flight.
+
+Package: libarrow-flight-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libarrow-flight-dev (= ${binary:Version}),
+  libarrow-glib-dev (= ${binary:Version}),
+  libarrow-flight-glib600 (= ${binary:Version}),
+  gir1.2-arrow-flight-1.0 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based header files for Apache Arrow Flight.
+
+Package: libarrow-flight-glib-doc
+Section: doc
+Architecture: any
+Multi-Arch: foreign
+Depends:
+  ${misc:Depends}
+Recommends: libarrow-glib-doc
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides documentations for Apache Arrow Flight.
+
+Package: libgandiva-glib600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow-glib600 (= ${binary:Version}),
+  libgandiva600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -462,7 +517,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib400 (= ${binary:Version}),
+  libgandiva-glib600 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -481,7 +536,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libplasma-glib400
+Package: libplasma-glib600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -489,8 +544,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda-glib400 (= ${binary:Version}),
-  libplasma400 (= ${binary:Version})
+  libarrow-cuda-glib600 (= ${binary:Version}),
+  libplasma600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides GLib based library files to connect plasma-store-server.
@@ -514,7 +569,7 @@ Depends:
   ${misc:Depends},
   libplasma-dev (= ${binary:Version}),
   libarrow-cuda-glib-dev (= ${binary:Version}),
-  libplasma-glib400 (= ${binary:Version}),
+  libplasma-glib600 (= ${binary:Version}),
   gir1.2-plasma-1.0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
@@ -531,7 +586,7 @@ Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides documentations.
 
-Package: libparquet-glib400
+Package: libparquet-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -539,8 +594,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libparquet400 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libparquet600 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -564,7 +619,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib400 (= ${binary:Version}),
+  libparquet-glib600 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
index 52fbbb32d81..ccd0c4e5b06 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
@@ -6,9 +6,7 @@ usr/lib/*/cmake/arrow/Find*Alt.cmake
 usr/lib/*/cmake/arrow/FindArrow.cmake
 usr/lib/*/cmake/arrow/FindBrotli.cmake
 usr/lib/*/cmake/arrow/FindLz4.cmake
-usr/lib/*/cmake/arrow/FindSnappy.cmake
-usr/lib/*/cmake/arrow/Findutf8proc.cmake
-usr/lib/*/cmake/arrow/Findzstd.cmake
+usr/lib/*/cmake/arrow/Find[Suz]*.cmake
 usr/lib/*/cmake/arrow/arrow-config.cmake
 usr/lib/*/libarrow.a
 usr/lib/*/libarrow.so
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
new file mode 100644
index 00000000000..8a8dee3ac5a
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/arrow-flight-glib/
+usr/lib/*/libarrow-flight-glib.so
+usr/lib/*/pkgconfig/arrow-flight-glib.pc
+usr/share/gir-1.0/ArrowFlight-1.0.gir
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
new file mode 100644
index 00000000000..94b17c11b9d
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: arrow-flight-glib
+Title: Apache Arrow Flight GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Apache Arrow Flight GLib provides a general-purpose client-server framework to simplify high performance transport of large datasets over network interfaces.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/arrow-flight-glib/index.html
+Files: /usr/share/gtk-doc/html/arrow-flight-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
new file mode 100644
index 00000000000..3c95f17ed77
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
@@ -0,0 +1 @@
+usr/share/gtk-doc/html/arrow-flight-glib
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
new file mode 100644
index 00000000000..d55c89a1b08
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/arrow-flight-glib usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib 
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-flight-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-flight-glib-doc/gobject
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
new file mode 100644
index 00000000000..a6156ed94c9
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow-flight-glib.so.*
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-python400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva400.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet400.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma400.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules
index 7b8dff26a08..2de53361559 100755
--- a/dev/tasks/linux-packages/apache-arrow/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -32,6 +32,7 @@ override_dh_auto_configure:
 	  -DARROW_GANDIVA_JAVA=OFF				\
 	  -DARROW_MIMALLOC=ON					\
 	  -DARROW_ORC=ON					\
+	  -DARROW_PACKAGE_KIND=deb				\
 	  -DARROW_PARQUET=ON					\
 	  -DARROW_PLASMA=$${ARROW_PLASMA}			\
 	  -DARROW_PYTHON=ON					\
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
new file mode 100644
index 00000000000..6ada8972908
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=amazonlinux:2
+FROM ${FROM}
+
+COPY qemu-* /usr/bin/
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+  yum update -y ${quiet} && \
+  amazon-linux-extras install -y epel && \
+  yum install -y ${quiet} \
+    bison \
+    brotli-devel \
+    bzip2-devel \
+    ccache \
+    cmake3 \
+    flex \
+    gcc-c++ \
+    git \
+    glog-devel \
+    gobject-introspection-devel \
+    gtk-doc \
+    lz4-devel \
+    make \
+    openssl-devel \
+    pkg-config \
+    rapidjson-devel \
+    rpmdevtools \
+    snappy-devel \
+    tar \
+    utf8proc-devel \
+    zlib-devel && \
+  # Install ninja-build dependencies in amzn2-core
+  yum install -y ${quiet} ninja-build && \
+  # Install ninja-build from EPEL because ninja-build in amzn2-core is old.
+  yum install -y ${quiet} --disablerepo=amzn2-core ninja-build && \
+  yum clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/qemu-dummy-static
rename to dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 842b3b0f014..31b057f99d6 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -17,35 +17,46 @@
 # specific language governing permissions and limitations
 # under the License.
 
-%define _centos_ver %{?centos_ver:%{centos_ver}}%{!?centos_ver:8}
+%define _amzn %{?amzn:%{amzn}}%{!?amzn:0}
+%define is_amazon_linux (%{_amzn} != 0)
 
 %define boost_version %( \
-  if [ "%{_centos_ver}" = 7 ]; then \
+  if [ %{rhel} -eq 7 ]; then \
     echo 169; \
   fi)
 %define cmake_version %( \
-  if [ "%{_centos_ver}" -lt 8 ]; then \
+  if [ %{rhel} -lt 8 ]; then \
     echo 3; \
   fi)
 %define python_version %( \
-  if [ "%{_centos_ver}" = 7 ]; then \
+  if [ %{rhel} -eq 7 ]; then \
     echo 36; \
   else \
     echo 3; \
   fi)
 
-%define use_flight (%{_centos_ver} >= 8)
-%define use_gandiva (%{_centos_ver} >= 8 && %{_arch} != "aarch64")
-%define use_mimalloc (%{_centos_ver} >= 8)
-%define use_ninja (%{_centos_ver} >= 8)
+%define lz4_requirement %( \
+  if [ %{_amzn} -eq 0 ]; then \
+    echo ">= 1.8.0"; \
+  fi)
+
+%define use_boost (!%{is_amazon_linux})
+%define use_flight (%{rhel} >= 8)
+%define use_gandiva (%{rhel} >= 8 && %{_arch} != "aarch64")
+%define use_gflags (!%{is_amazon_linux})
+%define use_mimalloc (%{rhel} >= 8)
+%define use_python (!%{is_amazon_linux})
 # TODO: Enable this. This works on local but is fragile on GitHub Actions and
 # Travis CI.
-# %define use_s3 (%{_centos_ver} >= 8)
+# %define use_s3 (%{rhel} >= 8)
 %define use_s3 0
 
-%define have_rapidjson (%{_centos_ver} == 7)
-%define have_re2 (%{_centos_ver} >= 8)
-%define have_utf8proc (%{_centos_ver} == 7)
+%define have_rapidjson (%{rhel} == 7)
+%define have_re2 (%{rhel} >= 8)
+# EPEL ships utf8proc but it's old.
+# %define have_utf8proc (%{rhel} == 7)
+%define have_utf8proc 0
+%define have_zstd (!%{is_amazon_linux})
 
 Name:		@PACKAGE@
 Version:	@VERSION@
@@ -57,24 +68,36 @@ URL:		https://arrow.apache.org/
 Source0:	https://dist.apache.org/repos/dist/release/@PACKAGE@/@PACKAGE@-%{version}/apache-@PACKAGE@-%{version}.tar.gz
 
 BuildRequires:	bison
+%if %{use_boost}
 BuildRequires:	boost%{boost_version}-devel
+%endif
 BuildRequires:	brotli-devel
 BuildRequires:	bzip2-devel
+%if %{use_flight}
+BuildRequires:	c-ares-devel
+%endif
 BuildRequires:	cmake%{cmake_version}
 %if %{use_s3}
 BuildRequires:	curl-devel
 %endif
 BuildRequires:	flex
 BuildRequires:	gcc-c++
+%if %{use_gflags}
 BuildRequires:	gflags-devel
+%endif
 BuildRequires:	git
 BuildRequires:	glog-devel
+%if %{have_zstd}
 BuildRequires:	libzstd-devel
-BuildRequires:	lz4-devel
+%endif
+BuildRequires:	lz4-devel %{lz4_requirement}
+BuildRequires:	ninja-build
 BuildRequires:	openssl-devel
 BuildRequires:	pkgconfig
+%if %{use_python}
 BuildRequires:	python%{python_version}-devel
 BuildRequires:	python%{python_version}-numpy
+%endif
 %if %{have_rapidjson}
 BuildRequires:	rapidjson-devel
 %endif
@@ -106,19 +129,26 @@ cpp_build_type=release
 mkdir cpp/build
 cd cpp/build
 %cmake3 .. \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
 %if %{use_flight}
   -DARROW_FLIGHT=ON \
 %endif
 %if %{use_gandiva}
   -DARROW_GANDIVA=ON \
 %endif
+  -DARROW_HDFS=ON \
+  -DARROW_JSON=ON \
 %if %{use_mimalloc}
   -DARROW_MIMALLOC=ON \
 %endif
   -DARROW_ORC=ON \
+  -DARROW_PACKAGE_KIND=rpm \
   -DARROW_PARQUET=ON \
   -DARROW_PLASMA=ON \
+%if %{use_python}
   -DARROW_PYTHON=ON \
+%endif
 %if %{use_s3}
   -DARROW_S3=ON \
 %endif
@@ -132,17 +162,13 @@ cd cpp/build
   -DARROW_USE_CCACHE=OFF \
   -DCMAKE_UNITY_BUILD=ON \
   -DPARQUET_REQUIRE_ENCRYPTION=ON \
+%if %{use_python}
   -DPythonInterp_FIND_VERSION=ON \
   -DPythonInterp_FIND_VERSION_MAJOR=3 \
-%if %{use_ninja}
-  -GNinja
 %endif
+  -GNinja
 
-%if %{use_ninja}
 ninja %{?_smp_mflags}
-%else
-make %{?_smp_mflags}
-%endif
 cd -
 
 cd c_glib
@@ -154,7 +180,8 @@ meson setup build \
   -Darrow_cpp_build_dir=../cpp/build \
   -Darrow_cpp_build_type=$cpp_build_type \
   -Dgtk_doc=true
-LD_LIBRARY_PATH=$PWD/../cpp/build/$cpp_build_type ninja -C build %{?_smp_mflags}
+LD_LIBRARY_PATH=$PWD/../cpp/build/$cpp_build_type \
+  ninja -C build %{?_smp_mflags}
 cd -
 
 %install
@@ -166,25 +193,22 @@ ninja -C build clean
 cd -
 
 cd cpp/build
-%if %{use_ninja}
 DESTDIR=$RPM_BUILD_ROOT ninja install
 ninja clean
-%else
-make install DESTDIR=$RPM_BUILD_ROOT
-make clean
-%endif
 cd -
 
 %package libs
 Summary:	Runtime libraries for Apache Arrow C++
 License:	Apache-2.0
-Requires:	boost%{boost_version}-system
-Requires:	boost%{boost_version}-filesystem
 Requires:	brotli
+%if %{use_gflags}
 Requires:	gflags
+%endif
 Requires:	glog
+%if %{have_zstd}
 Requires:	libzstd
-Requires:	lz4
+%endif
+Requires:	lz4 %{lz4_requirement}
 %if %{have_re2}
 Requires:	re2
 %endif
@@ -209,8 +233,13 @@ License:	Apache-2.0
 Requires:	%{name}-libs = %{version}-%{release}
 Requires:	brotli-devel
 Requires:	bzip2-devel
+%if %{use_flight}
+Requires:	c-ares-devel
+%endif
+%if %{have_zstd}
 Requires:	libzstd-devel
-Requires:	lz4-devel
+%endif
+Requires:	lz4-devel %{lz4_requirement}
 Requires:	openssl-devel
 %if %{have_rapidjson}
 Requires:	rapidjson-devel
@@ -243,13 +272,18 @@ Libraries and header files for Apache Arrow C++.
 %{_libdir}/cmake/arrow/FindBrotli.cmake
 %{_libdir}/cmake/arrow/FindLz4.cmake
 %{_libdir}/cmake/arrow/FindSnappy.cmake
+%if %{use_flight}
+%{_libdir}/cmake/arrow/Findc-aresAlt.cmake
+%endif
 %if %{have_re2}
 %{_libdir}/cmake/arrow/Findre2Alt.cmake
 %endif
 %if %{have_utf8proc}
 %{_libdir}/cmake/arrow/Findutf8proc.cmake
 %endif
+%if %{have_zstd}
 %{_libdir}/cmake/arrow/Findzstd.cmake
+%endif
 %{_libdir}/cmake/arrow/arrow-config.cmake
 %{_libdir}/libarrow.a
 %{_libdir}/libarrow.so
@@ -298,6 +332,9 @@ Libraries and header files for Apache Arrow dataset.
 Summary:	C++ library for fast data transport.
 License:	Apache-2.0
 Requires:	%{name}-libs = %{version}-%{release}
+%if %{use_flight}
+Requires:	c-ares
+%endif
 Requires:	openssl
 
 %description flight-libs
@@ -364,6 +401,7 @@ Libraries and header files for Gandiva.
 %{_libdir}/pkgconfig/gandiva.pc
 %endif
 
+%if %{use_python}
 %package python-libs
 Summary:	Python integration library for Apache Arrow
 License:	Apache-2.0
@@ -437,6 +475,7 @@ Apache Arrow Flight.
 %{_libdir}/libarrow_python_flight.so
 %{_libdir}/pkgconfig/arrow-python-flight.pc
 %endif
+%endif
 
 %package -n plasma-libs
 Summary:	Runtime libraries for Plasma in-memory object store
@@ -486,7 +525,6 @@ Libraries and header files for Plasma in-memory object store.
 %package -n parquet-libs
 Summary:	Runtime libraries for Apache Parquet C++
 License:	Apache-2.0
-Requires:	boost%{boost_version}-program-options
 Requires:	%{name}-libs = %{version}-%{release}
 Requires:	openssl
 
@@ -568,13 +606,13 @@ Documentation for Apache Arrow GLib.
 %{_datadir}/gtk-doc/html/arrow-glib/
 
 %package dataset-glib-libs
-Summary:	Runtime libraries for Apache Arrow dataset GLib
+Summary:	Runtime libraries for Apache Arrow Dataset GLib
 License:	Apache-2.0
 Requires:	%{name}-dataset-libs = %{version}-%{release}
 Requires:	%{name}-glib-libs = %{version}-%{release}
 
 %description dataset-glib-libs
-This package contains the libraries for Apache Arrow dataset GLib.
+This package contains the libraries for Apache Arrow Dataset GLib.
 
 %files dataset-glib-libs
 %defattr(-,root,root,-)
@@ -583,13 +621,13 @@ This package contains the libraries for Apache Arrow dataset GLib.
 %{_datadir}/gir-1.0/ArrowDataset-1.0.gir
 
 %package dataset-glib-devel
-Summary:	Libraries and header files for Apache Arrow dataset GLib
+Summary:	Libraries and header files for Apache Arrow Dataset GLib
 License:	Apache-2.0
 Requires:	%{name}-dataset-devel = %{version}-%{release}
 Requires:	%{name}-glib-devel = %{version}-%{release}
 
 %description dataset-glib-devel
-Libraries and header files for Apache Arrow dataset GLib.
+Libraries and header files for Apache Arrow Dataset GLib.
 
 %files dataset-glib-devel
 %defattr(-,root,root,-)
@@ -601,7 +639,7 @@ Libraries and header files for Apache Arrow dataset GLib.
 %{_libdir}/girepository-1.0/ArrowDataset-1.0.typelib
 
 %package dataset-glib-doc
-Summary:	Documentation for Apache Arrow dataset GLib
+Summary:	Documentation for Apache Arrow Dataset GLib
 License:	Apache-2.0
 
 %description dataset-glib-doc
@@ -612,6 +650,53 @@ Documentation for Apache Arrow dataset GLib.
 %doc README.md LICENSE.txt NOTICE.txt
 %{_datadir}/gtk-doc/html/arrow-dataset-glib/
 
+%if %{use_flight}
+%package flight-glib-libs
+Summary:	Runtime libraries for Apache Arrow Flight GLib
+License:	Apache-2.0
+Requires:	%{name}-flight-libs = %{version}-%{release}
+Requires:	%{name}-glib-libs = %{version}-%{release}
+
+%description flight-glib-libs
+This package contains the libraries for Apache Arrow Flight GLib.
+
+%files flight-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow-flight-glib.so.*
+%{_datadir}/gir-1.0/ArrowFlight-1.0.gir
+
+%package flight-glib-devel
+Summary:	Libraries and header files for Apache Arrow Flight GLib
+License:	Apache-2.0
+Requires:	%{name}-flight-devel = %{version}-%{release}
+Requires:	%{name}-glib-devel = %{version}-%{release}
+
+%description flight-glib-devel
+Libraries and header files for Apache Arrow Flight GLib.
+
+%files flight-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow-flight-glib/
+%{_libdir}/libarrow-flight-glib.a
+%{_libdir}/libarrow-flight-glib.so
+%{_libdir}/pkgconfig/arrow-flight-glib.pc
+%{_libdir}/girepository-1.0/ArrowFlight-1.0.typelib
+
+%package flight-glib-doc
+Summary:	Documentation for Apache Arrow Flight GLib
+License:	Apache-2.0
+
+%description flight-glib-doc
+Documentation for Apache Arrow Flight GLib.
+
+%files flight-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/arrow-flight-glib/
+%endif
+
 %if %{use_gandiva}
 %package -n gandiva-glib-libs
 Summary:	Runtime libraries for Gandiva GLib
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index 8c6c9d66d25..6856e385476 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -18,15 +18,12 @@
 ARG FROM=centos:7
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 ARG DEBUG
 
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
   yum update -y ${quiet} && \
   yum install -y ${quiet} epel-release && \
-  yum groupinstall -y ${quiet} "Development Tools" && \
   yum install -y ${quiet} \
     bison \
     boost169-devel \
@@ -35,6 +32,7 @@ RUN \
     ccache \
     cmake3 \
     flex \
+    gcc-c++ \
     gflags-devel \
     git \
     glog-devel \
@@ -42,6 +40,7 @@ RUN \
     gtk-doc \
     libzstd-devel \
     lz4-devel \
+    make \
     ninja-build \
     openssl-devel \
     pkg-config \
@@ -49,11 +48,9 @@ RUN \
     python36-devel \
     python36-numpy \
     rapidjson-devel \
-    rpm-build \
     rpmdevtools \
     snappy-devel \
     tar \
-    utf8proc-devel \
     zlib-devel && \
   yum clean ${quiet} all
 
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
index 66c435c333d..ad145c4ee2a 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=centos:8
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 ARG DEBUG
 
 RUN \
@@ -30,6 +28,7 @@ RUN \
     boost-devel \
     brotli-devel \
     bzip2-devel \
+    c-ares-devel \
     ccache \
     clang \
     cmake \
@@ -41,6 +40,7 @@ RUN \
     glog-devel \
     gobject-introspection-devel \
     gtk-doc \
+    libarchive \
     libzstd-devel \
     llvm-devel \
     llvm-static \
@@ -56,7 +56,6 @@ RUN \
     python3-pip \
     re2-devel \
     # rapidjson-devel \
-    rpm-build \
     rpmdevtools \
     snappy-devel \
     tar \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/github.linux.amd64.yml b/dev/tasks/linux-packages/github.linux.amd64.yml
index 380f025afaa..9cd54748361 100644
--- a/dev/tasks/linux-packages/github.linux.amd64.yml
+++ b/dev/tasks/linux-packages/github.linux.amd64.yml
@@ -22,13 +22,22 @@
 jobs:
   package:
     name: Package
+    # We can't use Ubuntu 20.04 because it doesn't ship neither
+    # createrepo nor createrepo_c. We'll be able to use Ubuntu
+    # 22.04. It will ship createrepo_c. Or we can build createrepo_c
+    # from source like we do in travis.linux.arm64.yml.
+    #
+    # Note that createrepo or createrepo_c is only needed to test Yum
+    # repository.
     runs-on: ubuntu-18.04
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_login_dockerhub()|indent }}
 
       - name: Set up Ruby
-        uses: actions/setup-ruby@v1
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: '3.0'
       - name: Free Up Disk Space
         shell: bash
         run: arrow/ci/scripts/util_cleanup.sh
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index 59f34593501..394c88fae13 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -228,6 +228,17 @@ def define_dist_task
     task :dist => [@archive_name]
   end
 
+  def split_target(target)
+    components = target.split("-")
+    if components[0, 2] == ["amazon", "linux"]
+      components[0, 2] = components[0, 2].join("-")
+    end
+    if components.size >= 3
+      components[2..-1] = components[2..-1].join("-")
+    end
+    components
+  end
+
   def enable_apt?
     true
   end
@@ -255,8 +266,8 @@ def apt_targets_default
       # "ubuntu-bionic-arm64",
       "ubuntu-focal",
       # "ubuntu-focal-arm64",
-      "ubuntu-groovy",
-      # "ubuntu-groovy-arm64",
+      "ubuntu-hirsute",
+      # "ubuntu-hirsute-arm64",
     ]
   end
 
@@ -275,7 +286,7 @@ def apt_dir
   def apt_prepare_debian_dir(tmp_dir, target)
     source_debian_dir = nil
     specific_debian_dir = "debian.#{target}"
-    distribution, code_name, _architecture = target.split("-", 3)
+    distribution, code_name, _architecture = split_target(target)
     platform = [distribution, code_name].join("-")
     platform_debian_dir = "debian.#{platform}"
     if File.exist?(specific_debian_dir)
@@ -324,7 +335,7 @@ def apt_build(console: false)
 
     apt_targets.each do |target|
       cd(apt_dir) do
-        distribution, version, architecture = target.split("-", 3)
+        distribution, version, architecture = split_target(target)
         os = "#{distribution}-#{version}"
         docker_run(os, architecture, console: console)
       end
@@ -391,6 +402,8 @@ def yum_targets_default
     # Disable aarch64 targets by default for now
     # because they require some setups on host.
     [
+      "amazon-linux-2",
+      # "amazon-linux-2-arch64",
       "centos-7",
       # "centos-7-aarch64",
       "centos-8",
@@ -459,7 +472,7 @@ def yum_build(console: false)
 
     yum_targets.each do |target|
       cd(yum_dir) do
-        distribution, version, architecture = target.split("-", 3)
+        distribution, version, architecture = split_target(target)
         os = "#{distribution}-#{version}"
         docker_run(os, architecture, console: console)
       end
@@ -592,7 +605,7 @@ def define_docker_tasks
       push_tasks = []
 
       (apt_targets + yum_targets).each do |target|
-        distribution, version, architecture = target.split("-", 3)
+        distribution, version, architecture = split_target(target)
         os = "#{distribution}-#{version}"
 
         namespace :pull do
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index e9457d6a337..b3f7ec75d2a 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -37,6 +37,10 @@ addons:
       - rake
       - rpm
 
+      # https://bugs.launchpad.net/ubuntu/+source/glibc/+bug/1916485
+      # We need to use runc 1.0.0~rc93 or later from focal-updated.
+      - runc
+
       # To build createrepo_c from source.
       # We can remove them when we can install createrepo_c package
       - cmake
@@ -52,6 +56,7 @@ addons:
       - libzstd-dev
       - pkg-config
       - zlib1g-dev
+    update: true
 
 services:
   - docker
@@ -68,6 +73,7 @@ env:
     - YUM_TARGETS={{ target }}
 
 before_script:
+  - set -e
   {{ macros.travis_checkout_arrow() }}
   {{ macros.travis_docker_login() }}
 
@@ -95,7 +101,7 @@ script:
   - rake version:update
   - |
       rake docker:pull || :
-  - pushd apache-arrow-archive-keyring/apt
+  - pushd apache-arrow-apt-source/apt
   - |
       for target in debian-* ubuntu-*; do
         cp -a ${target} ${target}-arm64
@@ -144,5 +150,5 @@ script:
   - popd
 
 after_success:
-  {% set patterns = upload_extensions | format_all("arrow/python/repaired_wheels/*.whl") %}
-  {{ macros.github_upload_releases(patterns) }}
+  {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/**/*{}") %}
+  {{ macros.travis_upload_releases(patterns) }}
diff --git a/dev/tasks/linux-packages/yum/build.sh b/dev/tasks/linux-packages/yum/build.sh
index 01746803adf..8bb8a261795 100755
--- a/dev/tasks/linux-packages/yum/build.sh
+++ b/dev/tasks/linux-packages/yum/build.sh
@@ -33,11 +33,12 @@ rpmbuild_options=
 
 . /host/env.sh
 
-distribution=$(cut -d " " -f 1 /etc/redhat-release | tr "A-Z" "a-z")
-if grep -q Linux /etc/redhat-release; then
-  distribution_version=$(cut -d " " -f 4 /etc/redhat-release)
+if grep -q amazon /etc/system-release-cpe; then
+  distribution=$(cut -d ":" -f 5 /etc/system-release-cpe | tr '_' '-')
+  distribution_version=$(cut -d ":" -f 6 /etc/system-release-cpe)
 else
-  distribution_version=$(cut -d " " -f 3 /etc/redhat-release)
+  distribution=$(cut -d ":" -f 4 /etc/system-release-cpe)
+  distribution_version=$(cut -d ":" -f 5 /etc/system-release-cpe)
 fi
 distribution_version=$(echo ${distribution_version} | sed -e 's/\..*$//g')
 
@@ -124,20 +125,20 @@ run cat <<BUILD > build.sh
 rpmbuild -ba ${rpmbuild_options} rpmbuild/SPECS/${PACKAGE}.spec
 BUILD
 run chmod +x build.sh
-if [ -n "${DEVTOOLSET_VERSION:-}" ]; then
+if [ -n "${SCL:-}" ]; then
   run cat <<WHICH_STRIP > which-strip.sh
 #!/bin/bash
 
 which strip
 WHICH_STRIP
   run chmod +x which-strip.sh
-  run cat <<USE_DEVTOOLSET_STRIP >> ~/.rpmmacros
-%__strip $(run scl enable devtoolset-${DEVTOOLSET_VERSION} ./which-strip.sh)
-USE_DEVTOOLSET_STRIP
+  run cat <<USE_SCL_STRIP >> ~/.rpmmacros
+%__strip $(run scl enable ${SCL} ./which-strip.sh)
+USE_SCL_STRIP
   if [ "${DEBUG:-no}" = "yes" ]; then
-    run scl enable devtoolset-${DEVTOOLSET_VERSION} ./build.sh
+    run scl enable ${SCL} ./build.sh
   else
-    run scl enable devtoolset-${DEVTOOLSET_VERSION} ./build.sh > /dev/null
+    run scl enable ${SCL} ./build.sh > /dev/null
   fi
 else
   if [ "${DEBUG:-no}" = "yes" ]; then
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index e0552b11bcf..be265caa48a 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -69,7 +69,7 @@ on:
       python-version: 3.8
   - name: Setup Crossbow
     shell: bash
-    run: pip install -e arrow/dev/archery[crossbow]
+    run: pip install -e arrow/dev/archery[crossbow-upload]
   - name: Upload artifacts
     shell: bash
     run: |
@@ -80,22 +80,22 @@ on:
       --sha {{ task.branch }} \
       --tag {{ task.tag }} \
     {% if pattern is string %}
-      --pattern "{{ pattern }}"
+      "{{ pattern }}"
     {% elif pattern is iterable %}
       {% for p in pattern %}
-      --pattern "{{ p }}" {{ "\\" if not loop.last else "" }}
+      "{{ p }}" {{ "\\" if not loop.last else "" }}
       {% endfor %}
     {% endif %}
     env:
-      CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
+      CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}' }}
 {% endmacro %}
 
 {%- macro github_upload_gemfury(pattern) -%}
-  {% if arrow.branch == 'master' %}
+  {%- if arrow.branch == 'master' -%}
   - name: Upload package to Gemfury
     shell: bash
     run: |
-      path=$(ls {{ patter }})
+      path=$(ls {{ pattern }})
       curl -F "package=@${path}" https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/
     env:
       CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
@@ -116,7 +116,7 @@ on:
   - task: UsePythonVersion@0
     inputs:
       versionSpec: '3.8'
-  - script: pip install -e arrow/dev/archery[crossbow]
+  - script: pip install -e arrow/dev/archery[crossbow-upload]
     displayName: Install Crossbow
   - bash: |
       archery crossbow \
@@ -126,10 +126,10 @@ on:
         --sha {{ task.branch }} \
         --tag {{ task.tag }} \
       {% if pattern is string %}
-        --pattern "{{ pattern }}"
+        "{{ pattern }}"
       {% elif pattern is iterable %}
         {% for p in pattern %}
-        --pattern "{{ p }}" {{ "" if not loop.last else "" }}
+        "{{ p }}" {{ "\\" if not loop.last else "" }}
         {% endfor %}
       {% endif %}
     env:
@@ -138,7 +138,7 @@ on:
 {% endmacro %}
 
 {%- macro azure_upload_anaconda(pattern) -%}
-  {% if arrow.branch == 'master' %}
+  {%- if arrow.branch == 'master' -%}
   - task: CondaEnvironment@1
     inputs:
       packageSpecs: 'anaconda-client'
@@ -170,25 +170,25 @@ on:
 
 {%- macro travis_upload_releases(pattern) -%}
   - sudo -H pip3 install pygit2==1.0
-  - sudo -H pip3 install arrow/dev/archery[crossbow]
+  - sudo -H pip3 install -e arrow/dev/archery[crossbow-upload]
   - |
-    archery crossbow
+    archery crossbow \
       --queue-path $(pwd) \
       --queue-remote {{ queue_remote_url }} \
       upload-artifacts \
       --sha {{ task.branch }} \
       --tag {{ task.tag }} \
     {% if pattern is string %}
-      --pattern "{{ pattern }}"
+      "{{ pattern }}"
     {% elif pattern is iterable %}
       {% for p in pattern %}
-      --pattern "{{ p }}" {{ "\\" if not loop.last else "" }}
+      "{{ p }}" {{ "\\" if not loop.last else "" }}
       {% endfor %}
     {% endif %}
 {% endmacro %}
 
 {%- macro travis_upload_gemfury(pattern) -%}
-  {% if arrow.branch == 'master' %}
+  {%- if arrow.branch == 'master' -%}
   - |
     WHEEL_PATH=$(echo arrow/python/repaired_wheels/*.whl)
     curl \
diff --git a/dev/tasks/python-wheels/github.linux.amd64.yml b/dev/tasks/python-wheels/github.linux.amd64.yml
index a6264078715..3b9d1ad9a7b 100644
--- a/dev/tasks/python-wheels/github.linux.amd64.yml
+++ b/dev/tasks/python-wheels/github.linux.amd64.yml
@@ -46,3 +46,11 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+
+      {% if arrow.branch == 'master' %}
+      - name: Push Docker Image
+        shell: bash
+        run: |
+          archery docker push python-wheel-manylinux-{{ manylinux_version }}
+          archery docker push python-wheel-manylinux-test-unittests
+      {% endif %}
diff --git a/dev/tasks/python-wheels/github.osx.amd64.yml b/dev/tasks/python-wheels/github.osx.amd64.yml
new file mode 100644
index 00000000000..8078abfd56f
--- /dev/null
+++ b/dev/tasks/python-wheels/github.osx.amd64.yml
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+env:
+  ARROW_S3: {{ arrow_s3 }}
+  CC: "clang"
+  CXX: "clang++"
+  MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
+  PYARROW_BUILD_VERBOSE: 1
+  PYARROW_VERSION: "{{ arrow.no_rc_version }}"
+  PYTHON_VERSION: "{{ python_version }}"
+  PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
+  VCPKG_DEFAULT_TRIPLET: x64-osx-static-release
+  VCPKG_FEATURE_FLAGS: "-manifests"
+  VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
+  VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
+  VCPKG_VERSION: "{{ vcpkg_version }}"
+
+jobs:
+  build:
+    name: Build wheel for OS X
+    runs-on: macos-10.15
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Install System Dependencies
+        run: brew install bash bison coreutils ninja cmake
+
+      - uses: actions/cache@v2
+        id: vcpkg-cache
+        with:
+          path: vcpkg
+          key: vcpkg-{{ macos_deployment_target }}-{{ vcpkg_version }}-{{ "${{ hashFiles('arrow/ci/vcpkg/*.patch', 'arrow/ci/vcpkg/*osx*.cmake') }}" }}
+
+      - name: Install Vcpkg
+        if: steps.vcpkg-cache.outputs.cache-hit != 'true'
+        shell: bash
+        env:
+          MACOSX_DEPLOYMENT_TARGET: "10.15"
+        run: arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT
+
+      - name: Install Packages
+        run: |
+          $VCPKG_ROOT/vcpkg install \
+            abseil \
+            boost-filesystem \
+            brotli \
+            bzip2 \
+            c-ares \
+            curl \
+            flatbuffers \
+            gflags \
+            glog \
+            grpc \
+            lz4 \
+            openssl \
+            orc \
+            protobuf \
+            rapidjson \
+            re2 \
+            snappy \
+            thrift \
+            utf8proc \
+            zlib \
+            zstd
+
+      {% if arrow_s3 == "ON" %}
+      - name: Install AWS SDK C++
+        run: $VCPKG_ROOT/vcpkg install aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer]
+      {% endif %}
+
+      - name: Install Python {{ python_version }}
+        shell: bash
+        run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version }}
+
+      - name: Build Wheel
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv build-env
+          source build-env/bin/activate
+          pip install --upgrade pip wheel
+          arrow/ci/scripts/python_wheel_macos_build.sh x86_64 $(pwd)/arrow $(pwd)/build
+
+      - name: Test Wheel
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv test-env
+          source test-env/bin/activate
+          pip install --upgrade pip wheel
+          arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
+
+      {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
new file mode 100644
index 00000000000..1000510150f
--- /dev/null
+++ b/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -0,0 +1,153 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Prerequisits on the host:
+# - brew install bash bison coreutils ninja cmake
+# - sudo arrow/ci/scripts/install_python.sh macos 3.9
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+env:
+  ARROW_FLIGHT: OFF
+  ARROW_JEMALLOC: OFF
+  ARROW_SIMD_LEVEL: NONE
+  CC: "clang"
+  CMAKE_BUILD_TYPE: release
+  CMAKE_CXX_COMPILER_LAUNCHER: "ccache"
+  CXX: "clang++"
+  MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
+  PYARROW_BUILD_VERBOSE: 1
+  PYARROW_VERSION: "{{ arrow.no_rc_version }}"
+  PYTHON_VERSION: "{{ python_version }}"
+  PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
+  VCPKG_DEFAULT_TRIPLET: {{ arch }}-osx-static-release
+  VCPKG_FEATURE_FLAGS: "-manifests"
+  VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
+  VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
+  VCPKG_VERSION: "{{ vcpkg_version }}"
+
+jobs:
+  build:
+    name: Build wheel for OS X
+    runs-on: self-hosted
+    steps:
+      - name: Cleanup
+        shell: bash
+        run: rm -rf arrow vcpkg build crossbow-env build-env test-*-env
+
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Add Brew's Bison to PATH
+        shell: bash
+        run: echo "/opt/homebrew/opt/bison/bin" >> $GITHUB_PATH
+
+      - name: Install Vcpkg
+        shell: bash
+        env:
+          MACOSX_DEPLOYMENT_TARGET: "11.0"
+        run: arch -arm64 arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT
+
+      - name: Install OpenSSL
+        shell: bash
+        run: arch -arm64 $VCPKG_ROOT/vcpkg install openssl
+
+      {% if arch == "universal2" %}
+      # OpenSSL doesn't provide an universal2 configuration yet, so vcpkg is
+      # unable to propagate the list of architectures from VCPKG_OSX_ARCHIETCTURES.
+      # In order to prevent link time warnings (which may turn out to be errors)
+      # we compile OpenSSL separately for the two architectures and merge the
+      # binaries into universal2 ones using `lipo`.
+      - name: Create universal binaries for OpenSSL
+        shell: bash
+        run: |
+          for arch in arm64 x64; do
+            VCPKG_DEFAULT_TRIPLET=${arch}-osx-static-release arch -arm64 $VCPKG_ROOT/vcpkg install openssl
+          done
+          for lib in libcrypto libssl; do
+            lipo -create $VCPKG_ROOT/installed/arm64-osx-static-release/lib/${lib}.a \
+                         $VCPKG_ROOT/installed/x64-osx-static-release/lib/${lib}.a \
+                 -output $VCPKG_ROOT/installed/universal2-osx-static-release/lib/${lib}.a
+          done
+      {% endif %}
+
+      - name: Install Packages
+        run: |
+          arch -arm64 $VCPKG_ROOT/vcpkg install \
+            aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
+            boost-filesystem \
+            brotli \
+            bzip2 \
+            c-ares \
+            curl \
+            flatbuffers \
+            gflags \
+            glog \
+            lz4 \
+            orc \
+            protobuf \
+            rapidjson \
+            re2 \
+            snappy \
+            thrift \
+            utf8proc \
+            zlib \
+            zstd
+
+      - name: Build Wheel
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv build-env
+          source build-env/bin/activate
+          pip install --upgrade pip wheel
+          arch -arm64 arrow/ci/scripts/python_wheel_macos_build.sh {{ arch }} $(pwd)/arrow $(pwd)/build
+
+      - name: Test Wheel on ARM64
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv test-arm64-env
+          source test-arm64-env/bin/activate
+          pip install --upgrade pip wheel
+          arch -arm64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
+
+      {% if arch == "universal2" %}
+      - name: Test Wheel on AMD64
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv test-amd64-env
+          source test-amd64-env/bin/activate
+          pip install --upgrade pip wheel
+          arch -x86_64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
+      {% endif %}
+
+      - name: Upload artifacts
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv crossbow-env
+          source crossbow-env/bin/activate
+          arch -x86_64 pip install -e arrow/dev/archery[crossbow-upload]
+          arch -x86_64 archery crossbow \
+          --queue-path $(pwd) \
+          --queue-remote {{ queue_remote_url }} \
+          upload-artifacts \
+          --sha {{ task.branch }} \
+          --tag {{ task.tag }} \
+          "arrow/python/repaired_wheels/*.whl"
+        env:
+          CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }}
+
+      {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
deleted file mode 100644
index af0cc44ef09..00000000000
--- a/dev/tasks/python-wheels/github.osx.yml
+++ /dev/null
@@ -1,133 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-env:
-  ARROW_S3: {{ arrow_s3 }}
-  MACOSX_DEPLOYMENT_TARGET: {{ macos_deployment_target }}
-  MB_PYTHON_VERSION: {{ python_version }}
-  PLAT: x86_64
-  PYARROW_BUILD_VERBOSE: 1
-  PYARROW_VERSION: {{ arrow.no_rc_version }}
-  PYTHON_VERSION: {{ python_version }}
-  SETUPTOOLS_SCM_PRETEND_VERSION: {{ arrow.no_rc_version }}
-  VCPKG_DEFAULT_TRIPLET: x64-osx-static-release
-  VCPKG_FEATURE_FLAGS: "-manifests"
-  VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
-
-jobs:
-  build:
-    name: Build wheel for OS X
-    runs-on: macos-latest
-    steps:
-      {{ macros.github_checkout_arrow()|indent }}
-
-      - name: Install System Dependencies
-        run: brew install bison ninja
-
-      # Restore from cache the previously built ports.
-      # If cache-miss, download and build vcpkg (aka "bootstrap vcpkg").
-      - name: Restore from Cache and Install Vcpkg
-        # Download and build vcpkg, without installing any port.
-        # If content is cached already, it is a no-op.
-        uses: kszucs/run-vcpkg@main
-        with:
-          # Required to prevent cache eviction on crossbow's main branch
-          # where we build pre-build the vcpkg packages
-          setupOnly: true
-          doNotSaveCache: true
-          appendedCacheKey: "-macos-{{ macos_deployment_target }}"
-          vcpkgDirectory: {{ "${{ github.workspace }}/vcpkg" }}
-          vcpkgGitCommitId: fced4bef1606260f110d74de1ae1975c2b9ac549
-
-      - name: Patch Vcpkg Ports
-        run: |
-          vcpkg_patch_file="../arrow/ci/vcpkg/ports.patch"
-          cd $VCPKG_ROOT
-          if ! git apply --reverse --check --ignore-whitespace ${vcpkg_patch_file}; then
-            git apply --ignore-whitespace ${vcpkg_patch_file}
-            echo "Patch successfully applied!"
-          fi
-
-      # Now that vcpkg is installed, it is being used to run with the desired arguments.
-      - name: Install Vcpkg Dependencies
-        run: |
-          $VCPKG_ROOT/vcpkg install \
-            abseil \
-            boost-filesystem \
-            brotli \
-            bzip2 \
-            c-ares \
-            curl \
-            flatbuffers \
-            gflags \
-            glog \
-            grpc \
-            lz4 \
-            openssl \
-            orc \
-            protobuf \
-            rapidjson \
-            re2 \
-            snappy \
-            thrift \
-            utf8proc \
-            zlib \
-            zstd
-
-      {% if arrow_s3 == "ON" %}
-      - name: Install AWS SDK C++
-        run: |
-          $VCPKG_ROOT/vcpkg install \
-            aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer]
-      {% endif %}
-
-      - name: Setup Multibuild
-        run: |
-          git clone https://github.com/matthew-brett/multibuild
-          git -C multibuild checkout 03950c9a7feb09d215f82d6563c4ffd91274a1e1
-
-      - name: Build Wheel
-        env:
-          CONFIG_PATH: /dev/null
-        run: |
-          # configure environment and install python
-          source multibuild/common_utils.sh
-          source multibuild/travis_osx_steps.sh
-          before_install
-
-          # install python dependencies
-          pip install -r arrow/python/requirements-wheel-build.txt delocate
-
-          # build the wheel
-          arrow/ci/scripts/python_wheel_macos_build.sh $(pwd)/arrow $(pwd)/build
-
-      - name: Setup Python for Testing
-        uses: actions/setup-python@v2
-        with:
-          python-version: "{{ python_version }}"
-
-      - name: Test the Wheel
-        run: |
-          # TODO(kszucs): temporarily remove homebrew libs
-          unset MACOSX_DEPLOYMENT_TARGET
-          arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
-
-      {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
-      {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/travis.linux.arm64.yml b/dev/tasks/python-wheels/travis.linux.arm64.yml
index 137ad6b2a56..a5c0f7408d2 100644
--- a/dev/tasks/python-wheels/travis.linux.arm64.yml
+++ b/dev/tasks/python-wheels/travis.linux.arm64.yml
@@ -45,6 +45,7 @@ env:
     - PYTHON={{ python_version }}
 
 before_script:
+  - set -e
   {{ macros.travis_checkout_arrow() }}
   {{ macros.travis_docker_login() }}
 
diff --git a/dev/tasks/r/github.devdocs.yml b/dev/tasks/r/github.devdocs.yml
new file mode 100644
index 00000000000..e4d6bfb6953
--- /dev/null
+++ b/dev/tasks/r/github.devdocs.yml
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push:
+    branches:
+      - "*-github-*"
+
+jobs:
+  devdocs:
+    name: 'R devdocs {{ "${{ matrix.os }}" }} system install: {{ "${{ matrix.system-install }}" }}'
+    runs-on: {{ "${{ matrix.os }}" }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macOS-latest, ubuntu-20.04]
+        # should the install method install libarrow into a system directory
+        # or a temporary directory. old is the same as a temporary
+        # directory, but an old version of libarrow will be installed
+        # into a system directory first (to make sure we can link correctly when building)
+        system-install: [true, false]
+
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          git -C arrow checkout FETCH_HEAD
+          git -C arrow submodule update --init --recursive
+      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-pandoc@v1
+      - name: Install knitr, rmarkdown
+        run: |
+          install.packages(c("rmarkdown", "knitr", "sessioninfo"))
+        shell: Rscript {0}
+      - name: Session info
+        run: |
+          options(width = 100)
+          pkgs <- installed.packages()[, "Package"]
+          sessioninfo::session_info(pkgs, include_base = TRUE)
+        shell: Rscript {0}
+      - name: Remove system gfortran so that brew can install gcc successfully
+        run: rm -f /usr/local/bin/gfortran
+      - name: Write the install script
+        env:
+          RUN_DEVDOCS: TRUE
+          DEVDOCS_MACOS: {{ "${{contains(matrix.os, 'macOS')}}" }}
+          DEVDOCS_UBUNTU: {{ "${{contains(matrix.os, 'ubuntu')}}" }}
+          DEVDOCS_SYSTEM_INSTALL: {{ "${{contains(matrix.system-install, 'true')}}" }}
+          DEVDOCS_PRIOR_SYSTEM_INSTALL: {{ "${{contains(matrix.system-install, 'old')}}" }}
+        run: |
+          # This isn't actually rendering the docs, but will save arrow/r/vignettes/script.sh
+          # which can be sourced to install arrow.
+          rmarkdown::render("arrow/r/vignettes/developing.Rmd")
+        shell: Rscript {0}
+      - name: Install from the devdocs
+        env:
+          LIBARROW_BINARY: FALSE
+          ARROW_R_DEV: TRUE
+        run: bash arrow/r/vignettes/script.sh
+        shell: bash
+      - name: Ensure that the Arrow package is loadable and we have the correct one
+        run: |
+          echo $LD_LIBRARY_PATH
+          R --no-save <<EOF
+          Sys.getenv("LD_LIBRARY_PATH")
+          library(arrow)
+          arrow_info()
+          EOF
+        shell: bash -l {0}
+      - name: Save the install script
+        uses: actions/upload-artifact@v2
+        with:
+          name: {{ "devdocs-script_os-${{ matrix.os }}_sysinstall-${{ matrix.system-install }}" }}
+          path: arrow/r/vignettes/script.sh
+        if: always()
diff --git a/dev/tasks/r/github.linux.rchk.yml b/dev/tasks/r/github.linux.rchk.yml
new file mode 100644
index 00000000000..49b819efcfa
--- /dev/null
+++ b/dev/tasks/r/github.linux.rchk.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push:
+    branches:
+      - "*-github-*"
+
+jobs:
+  as-cran:
+    name: "rchk"
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+    env:
+      ARROW_R_DEV: "FALSE"
+      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          git -C arrow checkout FETCH_HEAD
+          git -C arrow submodule update --init --recursive
+      - name: Free Up Disk Space
+        shell: bash
+        run: arrow/ci/scripts/util_cleanup.sh
+      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-pandoc@v1
+      - name: Install dependencies
+        run: |
+          install.packages("remotes")
+          remotes::install_deps("arrow/r", dependencies = TRUE)
+        shell: Rscript {0}
+      - name: Build arrow package
+        run: |
+          R CMD build arrow/r
+          mkdir packages
+          mv arrow_*.tar.gz packages
+      - name: rchk
+        run: |
+          docker run -v `pwd`/packages:/rchk/packages kalibera/rchk:latest /rchk/packages/arrow_*.tar.gz |& tee rchk.out
+      - name: Confirm that rchk has no errors
+        # Suspicious call, [UP], and [PB] are all of the error types currently at
+        # https://github.com/kalibera/cran-checks/tree/master/rchk/results
+        # though this might not be exhaustive, there does not appear to be a way to have rchk return an error code
+        # CRAN also will remove some of the outputs (especially those related to Rcpp and strptime, e.g.
+        # ERROR: too many states (abstraction error?))
+        # https://github.com/kalibera/rchk
+        run: |
+          if [ $(grep -c "Suspicious call" rchk.out) -gt 0 ] || [ $(grep -c "\[UP\]" rchk.out) -gt 0 ] || [ $(grep -c "\[PB\]" rchk.out) -gt 0 ]; then
+            echo "Found rchk errors"
+            cat rchk.out
+            exit 1
+          fi
+        if: always()
+      - name: Dump rchk output logs
+        run: cat rchk.out
+        if: always()
diff --git a/dev/tasks/r/github.linux.revdepcheck.yml b/dev/tasks/r/github.linux.revdepcheck.yml
new file mode 100644
index 00000000000..80071171b75
--- /dev/null
+++ b/dev/tasks/r/github.linux.revdepcheck.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push:
+    branches:
+      - "*-github-*"
+
+jobs:
+  r-versions:
+    name: "rstudio/r-base:latest-focal"
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    env:
+      R_ORG: "rstudio"
+      R_IMAGE: "r-base"
+      R_TAG: "latest-focal"
+      ARROW_R_DEV: "TRUE"
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          git -C arrow checkout FETCH_HEAD
+          git -C arrow submodule update --init --recursive
+      - name: Free Up Disk Space
+        shell: bash
+        run: arrow/ci/scripts/util_cleanup.sh
+      - name: Fetch Submodules and Tags
+        shell: bash
+        run: cd arrow && ci/scripts/util_checkout.sh
+      - name: Docker Pull
+        shell: bash
+        run: cd arrow && docker-compose pull --ignore-pull-failures r
+      - name: Docker Build
+        shell: bash
+        run: cd arrow && docker-compose build r-revdepcheck
+      - name: Docker Run
+        shell: bash
+        run: cd arrow && docker-compose run r-revdepcheck
+      - name: revdepcheck CRAN report
+        if: always()
+        shell: bash
+        run: cat arrow/r/revdep/cran.md
+      - name: revdepcheck failures
+        if: always()
+        shell: bash
+        run: cat arrow/r/revdep/failures.md
+      - name: revdepcheck problems
+        if: always()
+        shell: bash
+        run: cat arrow/r/revdep/problems.md
+      - name: Save the revdep output
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: revdepcheck-folder
+          path: arrow/r/revdep
diff --git a/dev/tasks/r/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml
index 2f64227eb8d..18537499d44 100644
--- a/dev/tasks/r/github.linux.version.compatibility.yml
+++ b/dev/tasks/r/github.linux.version.compatibility.yml
@@ -68,18 +68,23 @@ jobs:
           path: arrow/r/extra-tests/files
 
   read-files:
-    name: "Read files with Arrow {{ '${{ matrix.old_arrow_version }}' }}"
+    name: "Read files with Arrow {{ '${{ matrix.config.old_arrow_version }}' }}"
     needs: [write-files]
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
-        old_arrow_version:
-          - "2.0.0"
-          - "1.0.1"
+        config:
+        # We use the R version that was released at the time of the arrow release in order
+        # to make sure we can download binaries from RSPM.
+        - { old_arrow_version: '4.0.0', r: '4.0' }
+        - { old_arrow_version: '3.0.0', r: '4.0' }
+        - { old_arrow_version: '2.0.0', r: '4.0' }
+        - { old_arrow_version: '1.0.1', r: '4.0' }
     env:
+      ARROW_R_DEV: "TRUE"
       RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
-      OLD_ARROW_VERSION: {{ '${{ matrix.old_arrow_version }}' }}
+      OLD_ARROW_VERSION: {{ '${{ matrix.config.old_arrow_version }}' }}
     steps:
       - name: Checkout Arrow
         run: |
@@ -88,10 +93,12 @@ jobs:
           git -C arrow checkout FETCH_HEAD
           git -C arrow submodule update --init --recursive
       - uses: r-lib/actions/setup-r@v1
+        with:
+          r-version: {{ '${{ matrix.config.r }}' }}
       - name: Install old Arrow
         run: |
           install.packages(c("remotes", "testthat"))
-          remotes::install_version("arrow",  "{{ '${{ matrix.old_arrow_version }}' }}")
+          remotes::install_version("arrow",  "{{ '${{ matrix.config.old_arrow_version }}' }}")
         shell: Rscript {0}
       - name: Setup our testing directory, copy only the tests to it.
         run: |
diff --git a/dev/tasks/r/github.linux.versions.yml b/dev/tasks/r/github.linux.versions.yml
index 25f1f8a6557..f383fe8d07f 100644
--- a/dev/tasks/r/github.linux.versions.yml
+++ b/dev/tasks/r/github.linux.versions.yml
@@ -39,6 +39,7 @@ jobs:
           - "3.3"
           - "3.4"
           - "3.5"
+          - "3.6"
     env:
       R_ORG: "rstudio"
       R_IMAGE: "r-base"
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 5a04c98f640..12fd8ba5496 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -19,7 +19,7 @@ groups:
   # these groups are just for convenience
   # makes it easier to submit related tasks
 
-  ############################# Packaging tasks ###############################
+{############################# Packaging tasks ###############################}
 
   conda:
     - conda-*
@@ -28,36 +28,37 @@ groups:
     - wheel-*
 
   linux:
+    - amazon-linux-*
+    - centos-*
     - debian-*
     - ubuntu-*
-    - centos-*
 
   linux-amd64:
+    - amazon-linux-*-amd64
+    - centos-*-amd64
     - debian-*-amd64
     - ubuntu-*-amd64
-    - centos-*-amd64
 
   linux-arm64:
+    - centos-*-arm64
     - debian-*-arm64
     - ubuntu-*-arm64
-    - centos-*-arm64
-
-  gandiva:
-    - gandiva-*
 
   homebrew:
     - homebrew-*
 
   packaging:
+    - amazon-linux-*
+    - centos-*
     - conda-*
-    - wheel-*
     - debian-*
-    - ubuntu-*
-    - centos-*
-    - python-sdist
+    - java-jars
     - nuget
+    - python-sdist
+    - ubuntu-*
+    - wheel-*
 
-  ############################# Testing tasks #################################
+{############################# Testing tasks #################################}
 
   test:
     - test-*
@@ -88,7 +89,6 @@ groups:
   integration:
     - test-*dask*
     - test-*hdfs*
-    - test-*jpype*
     - test-*kartothek*
     - test-*pandas*
     - test-*spark*
@@ -113,19 +113,19 @@ groups:
     - verify-rc-source-*
 
   verify-rc-source-macos:
-    - verify-rc-source-macos-*
+    - verify-rc-source-*-macos-*
 
   verify-rc-source-linux:
-    - verify-rc-source-linux-*
+    - verify-rc-source-*-linux-*
 
-  ######################## Tasks to run regularly #############################
+{######################## Tasks to run regularly #############################}
 
   nightly:
     - debian-*
     - ubuntu-*
     - centos-*
     - conda-*
-    - gandiva-*
+    - java-jars
     # List the homebrews explicitly because we don't care about running homebrew-cpp-autobrew
     - homebrew-cpp
     - homebrew-r-autobrew
@@ -144,7 +144,7 @@ tasks:
   #              e.g.:
   #     - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2
 
-  ############################## Conda Linux ##################################
+{############################## Conda Linux ##################################}
 
   conda-clean:
     ci: azure
@@ -157,27 +157,27 @@ tasks:
   #   Python and the OS are the main dimension. The R package `r-arrow` is
   #   an independent feedstock as it doesn't have the Python but the
   #   R dimension. To limit the number of CI jobs, we are building `r-arrow`
-  #   for R 3.6 with the Python 3.6 jobs and for R 4.0 with the Python 3.7 jobs.
+  #   for R 4.0 with the Python 3.6 jobs and for R 4.1 with the Python 3.7 jobs.
   # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically
   #   generated and to be synced regularly from the feedstock. We have no way
   #   yet to generate them inside the arrow repository automatically.
 
-  conda-linux-gcc-py36-cpu-r36:
+  conda-linux-gcc-py36-cpu-r40:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.6.____cpython
-      r_config: linux_64_r_base3.6
+      config: linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
+      r_config: linux_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py37-cpu-r40:
+  conda-linux-gcc-py37-cpu-r41:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.7.____cpython
-      r_config: linux_64_r_base4.0
+      config: linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
+      r_config: linux_64_r_base4.1
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -186,7 +186,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.8.____cpython
+      config: linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -195,141 +195,113 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.9.____cpython
+      config: linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py36-cuda:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_cuda_compiler_version9.2python3.6.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cuda.tar.bz2
+{% for python_version, numpy_version in [("3.6", "1.17"),
+                                         ("3.7", "1.17"),
+                                         ("3.8", "1.17"),
+                                         ("3.9", "1.19")] %}
+  {% set pyver = python_version | replace(".", "") %}
 
-  conda-linux-gcc-py37-cuda:
+  conda-linux-gcc-py{{ pyver }}-cuda:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_version9.2python3.7.____cpython
+      config: linux_64_cuda_compiler_version10.2numpy{{ numpy_version }}python{{ python_version }}.____cpython
     artifacts:
-      - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cuda.tar.bz2
+      - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2
+      - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2
 
-  conda-linux-gcc-py38-cuda:
+  conda-linux-gcc-py{{ pyver }}-arm64:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_version9.2python3.8.____cpython
+      config: linux_aarch64_numpy{{ numpy_version }}python{{ python_version }}.____cpython
     artifacts:
-      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cuda.tar.bz2
+      - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py39-cuda:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_cuda_compiler_version9.2python3.9.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
+{% endfor %}
 
-  conda-linux-gcc-py36-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
+  ############################## Conda OSX ####################################
+
+  conda-osx-clang-py36-r40:
+    ci: azure
+    template: conda-recipes/azure.osx.yml
     params:
-      config: linux_aarch64_python3.6.____cpython
+      config: osx_64_numpy1.17python3.6.____cpython
+      r_config: osx_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py37-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
+  conda-osx-clang-py37-r41:
+    ci: azure
+    template: conda-recipes/azure.osx.yml
     params:
-      config: linux_aarch64_python3.7.____cpython
+      config: osx_64_numpy1.17python3.7.____cpython
+      r_config: osx_64_r_base4.1
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py38-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
-    params:
-      config: linux_aarch64_python3.8.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
-
-  conda-linux-gcc-py39-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
-    params:
-      config: linux_aarch64_python3.9.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
-
-  ############################## Conda OSX ####################################
-
-  conda-osx-clang-py36-r36:
+  conda-osx-clang-py38:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.6.____cpython
-      r_config: osx_64_r_base3.6
+      config: osx_64_numpy1.17python3.8.____cpython
     artifacts:
-      - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-osx-clang-py37-r40:
+  conda-osx-clang-py39:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.7.____cpython
-      r_config: osx_64_r_base4.0
+      config: osx_64_numpy1.19python3.9.____cpython
     artifacts:
-      - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
+      - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-osx-clang-py38:
+  conda-osx-arm64-clang-py38:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.8.____cpython
+      config: osx_arm64_python3.8.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-osx-clang-py39:
+  conda-osx-arm64-clang-py39:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.9.____cpython
+      config: osx_arm64_python3.9.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
 
   ############################## Conda Windows ################################
 
-  conda-win-vs2017-py36-r36:
+  conda-win-vs2017-py36-r40:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
-      config: win_python3.6.____cpython
-      r_config: win_64_r_base3.6
+      config: win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
+      r_config: win_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-win-vs2017-py37-r40:
+  conda-win-vs2017-py37-r41:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
-      config: win_python3.7.____cpython
-      r_config: win_64_r_base4.0
+      config: win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
+      r_config: win_64_r_base4.1
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -338,7 +310,16 @@ tasks:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
-      config: win_python3.8.____cpython
+      config: win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython
+    artifacts:
+      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+  conda-win-vs2017-py39:
+    ci: azure
+    template: conda-recipes/azure.win.yml
+    params:
+      config: win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -372,12 +353,13 @@ tasks:
 # enable S3 support from macOS 10.13 so we don't need to bundle curl, crypt and ssl
 {% for macos_version, macos_codename, arrow_s3 in [("10.9", "mavericks", "OFF"),
                                                    ("10.13", "high-sierra", "ON")] %}
-  {% set platform_tag = "macosx_{}_{}".format(macos_version.replace('.', '_'), arch_alias) %}
+  {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
-  wheel-osx-{{ macos_codename }}-{{ python_tag }}:
+  wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
     ci: github
-    template: python-wheels/github.osx.yml
+    template: python-wheels/github.osx.amd64.yml
     params:
+      vcpkg_version: "2021.04.30"
       python_version: {{ python_version }}
       macos_deployment_target: {{ macos_version }}
       arrow_s3: {{ arrow_s3 }}
@@ -388,7 +370,7 @@ tasks:
 
 {############################## Wheel Windows ################################}
 
-  wheel-windows-{{ python_tag }}:
+  wheel-windows-{{ python_tag }}-amd64:
     ci: github
     template: python-wheels/github.windows.yml
     params:
@@ -398,448 +380,76 @@ tasks:
 
 {% endfor %}
 
-{############################ Python sdist ####################################}
-
-  python-sdist:
-    ci: github
-    template: python-sdist/github.yml
-    artifacts:
-      - pyarrow-{no_rc_version}.tar.gz
-
-  ############################## Linux PKGS ####################################
+{############################## Wheel OSX M1 #################################}
 
-  debian-buster-amd64:
+  # The python 3.8 universal2 installer has been built with macos deployment
+  # target 11.0, so we cannot build binaries with earlier deployment target
+  # otherwise distutils will raise a deployment target version mismatch error.
+  wheel-macos-big-sur-py38-arm64:
     ci: github
-    template: linux-packages/github.linux.amd64.yml
+    template: python-wheels/github.osx.arm64.yml
     params:
-      target: "debian-buster"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
+      arch: arm64
+      vcpkg_version: "2021.04.30"
+      python_version: "3.8"
+      macos_deployment_target: "11.0"
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
+      - pyarrow-{no_rc_version}-cp38-cp38-macosx_11_0_arm64.whl
 
-  debian-buster-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
+{% for python_version, python_tag in [("3.9", "cp39")] %}
+  wheel-macos-big-sur-{{ python_tag }}-arm64:
+    ci: github
+    template: python-wheels/github.osx.arm64.yml
     params:
-      target: "debian-buster-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
+      arch: arm64
+      vcpkg_version: "2021.04.30"
+      python_version: "{{ python_version }}"
+      macos_deployment_target: "11.0"
     artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
 
-  debian-bullseye-amd64:
+  wheel-macos-big-sur-{{ python_tag }}-universal2:
     ci: github
-    template: linux-packages/github.linux.amd64.yml
+    template: python-wheels/github.osx.arm64.yml
     params:
-      target: "debian-bullseye"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
+      arch: universal2
+      vcpkg_version: "2021.04.30"
+      python_version: "{{ python_version }}"
+      macos_deployment_target: "10.13"
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_10_13_universal2.whl
+{% endfor %}
 
-  debian-bullseye-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "debian-bullseye-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+{############################ Python sdist ####################################}
 
-  ubuntu-bionic-amd64:
+  python-sdist:
     ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "ubuntu-bionic"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
+    template: python-sdist/github.yml
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
+      - pyarrow-{no_rc_version}.tar.gz
 
-  ubuntu-bionic-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "ubuntu-bionic-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+{############################## Linux PKGS ####################################}
 
-  ubuntu-focal-amd64:
+{% for target in ["debian-buster",
+                  "debian-bullseye",
+                  "ubuntu-bionic",
+                  "ubuntu-focal",
+                  "ubuntu-hirsute"] %}
+  {% for architecture in ["amd64", "arm64"] %}
+  {{ target }}-{{ architecture }}:
+    {% if architecture == "amd64" %}
     ci: github
     template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "ubuntu-focal"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-focal-arm64:
+    {% else %}
     ci: travis
     template: linux-packages/travis.linux.arm64.yml
+    {% endif %}
     params:
-      target: "ubuntu-focal-arm64"
+    {% if architecture == "amd64" %}
+      target: "{{ target }}"
+    {% else %}
+      target: "{{ target }}-arm64"
+    {% endif %}
       task_namespace: "apt"
       upload_extensions:
         - .ddeb
@@ -848,290 +458,196 @@ tasks:
         - .dsc
         - .orig.tar.gz
     artifacts:
+    {% if architecture == "amd64" %}
+      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
+      - apache-arrow-apt-source_{no_rc_version}-1.dsc
+      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
+      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
       - apache-arrow_{no_rc_version}-1.debian.tar.xz
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
+    {% endif %}
       - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+      - gir1.2-arrow-flight-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python-flight600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow600_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva600_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-groovy-amd64:
-    ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "ubuntu-groovy"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet600_{no_rc_version}-1_[a-z0-9]+.deb
+    {% if architecture == "amd64" %}
       - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda600_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma600_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
+    {% endif %}
+  {% endfor %}
+{% endfor %}
 
-  ubuntu-groovy-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "ubuntu-groovy-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
-
-  centos-7-amd64:
+{% for target in ["amazon-linux-2",
+                  "centos-7",
+                  "centos-8"] %}
+  {% for architecture in ["amd64", "arm64"] %}
+    {% if not (target in ["amazon-linux-2", "centos-7"] and architecture == "arm64") %}
+  {{ target }}-{{ architecture }}:
+      {% if architecture == "amd64" %}
     ci: github
     template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "centos-7"
-      task_namespace: yum
-      upload_extensions:
-        - .rpm
-    artifacts:
-      - apache-arrow-release-{no_rc_version}-1.el7.noarch.rpm
-      - apache-arrow-release-{no_rc_version}-1.el7.src.rpm
-      - arrow-{no_rc_version}-1.el7.src.rpm
-      - arrow-debuginfo-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-python-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-python-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-store-server-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-
-  centos-8-amd64:
-    ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "centos-8"
-      task_namespace: yum
-      upload_extensions:
-        - .rpm
-    artifacts:
-      - apache-arrow-release-{no_rc_version}-1.el8.noarch.rpm
-      - apache-arrow-release-{no_rc_version}-1.el8.src.rpm
-      - arrow-{no_rc_version}-1.el8.src.rpm
-      - arrow-dataset-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-
-  centos-8-arm64:
+      {% else %}
     ci: travis
     template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "centos-8-aarch64"
-      task_namespace: yum
+      {% endif %}
+    params:
+      {% if architecture == "amd64" %}
+      target: "{{ target }}"
+      {% else %}
+      target: "{{ target }}-aarch64"
+      {% endif %}
+      task_namespace: "yum"
       upload_extensions:
         - .rpm
     artifacts:
-      - arrow-{no_rc_version}-1.el8.src.rpm
-      - arrow-dataset-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
+      {% if architecture == "amd64" %}
+      - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.noarch.rpm
+      - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.src.rpm
+      {% endif %}
+      - arrow-dataset-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-dataset-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-dataset-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-dataset-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-dataset-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-dataset-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target != "amazon-linux-2" %}
+      - arrow-python-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+        {% if target == "centos-8" %}
+      - arrow-python-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-python-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-python-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+        {% endif %}
+      - arrow-python-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      {% if architecture == "amd64" %}
+      - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm
+      {% endif %}
+      {% if target == "centos-8" and architecture == "amd64" %}
+      - gandiva-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - parquet-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - parquet-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - parquet-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - parquet-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - parquet-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - parquet-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - parquet-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - plasma-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - plasma-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - plasma-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - plasma-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - plasma-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - plasma-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - plasma-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - plasma-store-server-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - plasma-store-server-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+    {% endif %}
+  {% endfor %}
+{% endfor %}
 
   ############################## Homebrew Tasks ################################
 
@@ -1152,19 +668,52 @@ tasks:
     ci: github
     template: r/github.macos.autobrew.yml
 
-  ############################## Gandiva Tasks ################################
+  ############################## Arrow JAR's ##################################
 
-  gandiva-jar-ubuntu:
+  java-jars:
+    # Build jar's that contains cpp libraries dependencies
     ci: github
-    template: gandiva-jars/github.linux.yml
+    template: java-jars/github.yml
     artifacts:
-      - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
-
-  gandiva-jar-osx:
-    ci: github
-    template: gandiva-jars/github.osx.yml
-    artifacts:
-      - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
+      - arrow-algorithm-{no_rc_version}-tests.jar
+      - arrow-algorithm-{no_rc_version}.jar
+      - arrow-avro-{no_rc_version}-tests.jar
+      - arrow-avro-{no_rc_version}.jar
+      - arrow-compression-{no_rc_version}-tests.jar
+      - arrow-compression-{no_rc_version}.jar
+      - arrow-dataset-{no_rc_version}-tests.jar
+      - arrow-dataset-{no_rc_version}.jar
+      - arrow-format-{no_rc_version}-tests.jar
+      - arrow-format-{no_rc_version}.jar
+      - arrow-gandiva-{no_rc_version}-tests.jar
+      - arrow-gandiva-{no_rc_version}.jar
+      - arrow-jdbc-{no_rc_version}-tests.jar
+      - arrow-jdbc-{no_rc_version}.jar
+      - arrow-memory-core-{no_rc_version}-tests.jar
+      - arrow-memory-core-{no_rc_version}.jar
+      - arrow-memory-netty-{no_rc_version}-tests.jar
+      - arrow-memory-netty-{no_rc_version}.jar
+      - arrow-memory-unsafe-{no_rc_version}-tests.jar
+      - arrow-memory-unsafe-{no_rc_version}.jar
+      - arrow-orc-{no_rc_version}-tests.jar
+      - arrow-orc-{no_rc_version}.jar
+      - arrow-performance-{no_rc_version}-tests.jar
+      - arrow-performance-{no_rc_version}.jar
+      - arrow-plasma-{no_rc_version}-tests.jar
+      - arrow-plasma-{no_rc_version}.jar
+      - arrow-tools-{no_rc_version}-jar-with-dependencies.jar
+      - arrow-tools-{no_rc_version}-tests.jar
+      - arrow-tools-{no_rc_version}.jar
+      - arrow-vector-{no_rc_version}-shade-format-flatbuffers.jar
+      - arrow-vector-{no_rc_version}-tests.jar
+      - arrow-vector-{no_rc_version}.jar
+      - benchmarks.jar
+      - flight-core-{no_rc_version}-jar-with-dependencies.jar
+      - flight-core-{no_rc_version}-shaded-ext.jar
+      - flight-core-{no_rc_version}-shaded.jar
+      - flight-core-{no_rc_version}-tests.jar
+      - flight-core-{no_rc_version}.jar
+      - flight-grpc-{no_rc_version}-tests.jar
 
   ############################## NuGet packages ###############################
 
@@ -1184,9 +733,9 @@ tasks:
   ########################### Release verification ############################
 
 {% for target in ["binary", "yum", "apt"] %}
-  verify-rc-binaries-{{ target }}:
+  verify-rc-binaries-{{ target }}-amd64:
     ci: github
-    template: verify-rc/github.linux.yml
+    template: verify-rc/github.linux.amd64.yml
     params:
       env:
         TEST_DEFAULT: 0
@@ -1194,38 +743,93 @@ tasks:
       artifact: "binaries"
 {% endfor %}
 
-{% for platform in ["linux", "macos"] %}
-
-  verify-rc-wheels-{{ platform }}:
+{% for platform, arch, runner in [("linux", "amd64", "ubuntu-20.04"),
+                                  ("macos", "amd64", "macos-10.15")] %}
+  {% for target in ["cpp",
+                    "csharp",
+                    "go",
+                    "integration",
+                    "java",
+                    "js",
+                    "python",
+                    "ruby"] %}
+
+  verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
     ci: github
-    template: verify-rc/github.{{ platform }}.yml
+    template: verify-rc/github.{{ platform }}.{{ arch }}.yml
     params:
       env:
+        INSTALL_NODE: 0
         TEST_DEFAULT: 0
-      artifact: "wheels"
+        TEST_{{ target|upper }}: 1
+      artifact: "source"
+      github_runner: {{ runner }}
+  {% endfor %}
+{% endfor %}
 
-{% for target in ["csharp",
-                  "go",
-                  "integration"
-                  "java",
-                  "js",
-                  "python",
-                  "ruby",
-                  "rust"] %}
+{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %}
+  {% for target in ["cpp",
+                    "csharp",
+                    "go",
+                    "integration",
+                    "js",
+                    "python",
+                    "ruby"] %}
 
-  verify-rc-source-{{ platform }}-{{ target }}:
+  verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
     ci: github
-    template: verify-rc/github.{{ platform }}.yml
+    template: verify-rc/github.{{ platform }}.{{ arch }}.yml
     params:
       env:
+        ARROW_FLIGHT: 0
+        ARROW_GANDIVA: 0
         INSTALL_NODE: 0
         TEST_DEFAULT: 0
+        TEST_INTEGRATION_JAVA: 0
         TEST_{{ target|upper }}: 1
       artifact: "source"
-
+      github_runner: {{ runner }}
+  {% endfor %}
 {% endfor %}
 
-{% endfor %}
+  verify-rc-wheels-linux-amd64:
+    ci: github
+    template: verify-rc/github.linux.amd64.yml
+    params:
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  verify-rc-wheels-macos-10.15-amd64:
+    ci: github
+    template: verify-rc/github.macos.amd64.yml
+    params:
+      github_runner: "macos-10.15"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available:
+  #   https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
+  verify-rc-wheels-macos-11-amd64:
+    ci: github
+    template: verify-rc/github.macos.arm64.yml
+    params:
+      github_runner: "self-hosted"
+      arch_emulation: "x86_64"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  verify-rc-wheels-macos-11-arm64:
+    ci: github
+    template: verify-rc/github.macos.arm64.yml
+    params:
+      github_runner: "self-hosted"
+      arch_emulation: "arm64"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
 
   verify-rc-source-windows:
     ci: github
@@ -1251,24 +855,26 @@ tasks:
     ci: github
     template: docker-tests/github.linux.yml
     params:
-      run: {{ image }}
+      image: {{ image }}
 {% endfor %}
 
-  test-debian-10-cpp:
+{% for ubuntu_version in ["18.04", "20.04"] %}
+  test-ubuntu-{{ ubuntu_version }}-cpp:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        DEBIAN: 10
-      run: debian-cpp
+        UBUNTU: {{ ubuntu_version }}
+      image: ubuntu-cpp
+{% endfor %}
 
-  test-ubuntu-18.04-cpp:
+  test-debian-10-cpp:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        UBUNTU: 18.04
-      run: ubuntu-cpp
+        DEBIAN: 10
+      image: debian-cpp
 
   test-fedora-33-cpp:
     ci: github
@@ -1276,7 +882,7 @@ tasks:
     params:
       env:
         FEDORA: 33
-      run: fedora-cpp
+      image: fedora-cpp
 
   test-ubuntu-18.04-cpp-release:
     ci: github
@@ -1284,7 +890,8 @@ tasks:
     params:
       env:
         UBUNTU: 18.04
-      run: "-e ARROW_BUILD_TYPE=release ubuntu-cpp"
+      flags: "-e ARROW_BUILD_TYPE=release"
+      image: ubuntu-cpp
 
   test-ubuntu-18.04-cpp-static:
     ci: github
@@ -1292,15 +899,8 @@ tasks:
     params:
       env:
         UBUNTU: 18.04
-      run: "-e ARROW_BUILD_SHARED=OFF -e ARROW_BUILD_STATIC=ON -e ARROW_TEST_LINKAGE=static ubuntu-cpp"
-
-  test-ubuntu-20.04-cpp:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        UBUNTU: 20.04
-      run: ubuntu-cpp
+      flags: "-e ARROW_BUILD_SHARED=OFF -e ARROW_BUILD_STATIC=ON -e ARROW_TEST_LINKAGE=static"
+      image: ubuntu-cpp
 
 {% for cpp_standard in [14, 17] %}
   test-ubuntu-20.04-cpp-{{ cpp_standard }}:
@@ -1309,7 +909,8 @@ tasks:
     params:
       env:
         UBUNTU: 20.04
-      run: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD={{ cpp_standard }} ubuntu-cpp"
+      flags: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD={{ cpp_standard }}"
+      image: ubuntu-cpp
 {% endfor %}
 
   test-ubuntu-20.04-cpp-thread-sanitizer:
@@ -1317,9 +918,12 @@ tasks:
     template: docker-tests/github.linux.yml
     params:
       env:
+        # clang-tools and llvm version need to be synchronized so as
+        # to have the right llvm-symbolizer version
         CLANG_TOOLS: 11
+        LLVM: 11
         UBUNTU: 20.04
-      run: ubuntu-cpp-thread-sanitizer
+      image: ubuntu-cpp-thread-sanitizer
 
 {% for python_version in ["3.6", "3.7", "3.8", "3.9"] %}
   test-conda-python-{{ python_version }}:
@@ -1328,7 +932,7 @@ tasks:
     params:
       env:
         PYTHON: {{ python_version }}
-      run: conda-python
+      image: conda-python
 {% endfor %}
 
   test-conda-python-3.8-hypothesis:
@@ -1341,7 +945,7 @@ tasks:
         PYTHON: 3.8
         # limit to execute hypothesis tests only
         PYTEST_ARGS: "-m hypothesis"
-      run: conda-python-pandas
+      image: conda-python-pandas
 
   test-debian-10-python-3:
     ci: azure
@@ -1367,6 +971,19 @@ tasks:
         FEDORA: 33
       run: fedora-python
 
+  test-r-linux-valgrind:
+    ci: azure
+    template: docker-tests/azure.linux.yml
+    params:
+      env:
+        ARROW_R_DEV: "TRUE"
+        UBUNTU: 18.04
+      run: ubuntu-r-valgrind
+
+  test-r-linux-rchk:
+    ci: github
+    template: r/github.linux.rchk.yml
+
   test-r-linux-as-cran:
     ci: github
     template: r/github.linux.cran.yml
@@ -1387,37 +1004,47 @@ tasks:
     ci: github
     template: r/github.macos-linux.local.yml
 
-  test-r-rhub-ubuntu-gcc-release:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rhub
-      r_image: ubuntu-gcc-release
-      r_tag: latest
-
-  test-r-rocker-r-base-latest:
+  test-r-devdocs:
+    ci: github
+    template: r/github.devdocs.yml
+
+{% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"),
+                                 ("rocker", "r-base", "latest"),
+                                 ("rstudio", "r-base", "3.6-bionic"),
+                                 ("rstudio", "r-base", "3.6-centos8"),
+                                 ("rstudio", "r-base", "3.6-opensuse15"),
+                                 ("rstudio", "r-base", "3.6-opensuse42"),
+                                 ("rhub", "debian-gcc-devel-lto", "latest")] %}
+  test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}:
     ci: azure
     template: r/azure.linux.yml
     params:
-      r_org: rocker
-      r_image: r-base
-      r_tag: latest
+      r_org: {{ r_org }}
+      r_image: {{ r_image }}
+      r_tag: {{ r_tag }}
+{% endfor %}
 
-  test-r-rstudio-r-base-3.6-bionic:
-    ci: azure
-    template: r/azure.linux.yml
+  test-r-ubuntu-21.04:
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-bionic
+      env:
+        UBUNTU: 21.04
+        CLANG_TOOLS: 9 # can remove this when >=9 is the default
+      flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE'
+      image: ubuntu-r
 
-  test-r-rstudio-r-base-3.6-centos8:
-    ci: azure
-    template: r/azure.linux.yml
+  test-r-gcc-11:
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-centos8
+      env:
+        UBUNTU: 21.04
+        CLANG_TOOLS: 9 # can remove this when >=9 is the default
+        GCC_VERSION: 11
+      # S3 support is not buildable with gcc11 right now
+      flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE -e ARROW_S3=OFF'
+      image: ubuntu-r-only-r
 
   test-r-rstudio-r-base-3.6-centos7-devtoolset-8:
     ci: azure
@@ -1428,22 +1055,6 @@ tasks:
       r_tag: 3.6-centos7
       devtoolset_version: 8
 
-  test-r-rstudio-r-base-3.6-opensuse15:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-opensuse15
-
-  test-r-rstudio-r-base-3.6-opensuse42:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-opensuse42
-
   test-r-minimal-build:
     ci: azure
     template: r/azure.linux.yml
@@ -1474,8 +1085,14 @@ tasks:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
+      env:
+        UBUNTU: 18.04
       run: ubuntu-r-sanitizer
 
+  revdep-r-check:
+    ci: github
+    template: r/github.linux.revdepcheck.yml
+
   test-debian-10-go-1.15:
     ci: azure
     template: docker-tests/azure.linux.yml
@@ -1484,12 +1101,18 @@ tasks:
         GO: 1.15
       run: debian-go
 
-  test-ubuntu-18.04-docs:
+  test-ubuntu-20.10-docs:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
       env:
-        UBUNTU: 18.04
+        UBUNTU: "20.10"
+      run: ubuntu-docs
+
+  test-ubuntu-default-docs:
+    ci: azure
+    template: docker-tests/azure.linux.yml
+    params:
       run: ubuntu-docs
 
   ############################## vcpkg tests ##################################
@@ -1500,169 +1123,103 @@ tasks:
 
   ############################## Integration tests ############################
 
-  test-conda-python-3.7-pandas-latest:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.7
-        PANDAS: latest
-      # use the latest pandas release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-pandas
-
-  test-conda-python-3.8-pandas-latest:
+{% for python_version, pandas_version, numpy_version, cache_leaf in [("3.6", "0.23", "1.16", True),
+                                                                     ("3.7", "0.24", "1.19", True),
+                                                                     ("3.7", "latest", "latest", False),
+                                                                     ("3.8", "latest", "latest", False),
+                                                                     ("3.8", "nightly", "nightly", False),
+                                                                     ("3.9", "master", "nightly", False)] %}
+  test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        PYTHON: 3.8
-        PANDAS: latest
+        PYTHON: {{ python_version }}
+        PANDAS: {{ pandas_version }}
+        NUMPY: {{ numpy_version }}
+    {% if cache_leaf %}
       # use the latest pandas release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-pandas
-
-  test-conda-python-3.8-pandas-nightly:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-        PANDAS: nightly
-        NUMPY: nightly
-      run: --no-leaf-cache conda-python-pandas
-
-  test-conda-python-3.7-pandas-master:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.7
-        PANDAS: master
-      # use the master branch of pandas, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-pandas
-
-  test-conda-python-3.6-pandas-0.23:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.6
-        PANDAS: 0.23
-      run: conda-python-pandas
+      flags: --no-leaf-cache
+    {% endif %}
+      image: conda-python-pandas
+{% endfor %}
 
-  test-conda-python-3.7-dask-latest:
+{% for dask_version in ["latest", "master"] %}
+  test-conda-python-3.7-dask-{{ dask_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        DASK: latest
+        DASK: {{ dask_version }}
       # use the latest dask release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-dask
-
-  test-conda-python-3.8-dask-master:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-        DASK: master
-      # use the master branch of dask, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-dask
-
-  test-conda-python-3.8-jpype:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-      run: conda-python-jpype
+      flags: --no-leaf-cache
+      image: conda-python-dask
+{% endfor %}
 
-  test-conda-python-3.7-turbodbc-latest:
+{% for turbodbc_version in ["latest", "master"] %}
+  test-conda-python-3.7-turbodbc-{{ turbodbc_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        TURBODBC: latest
+        TURBODBC: {{ turbodbc_version }}
       # use the latest turbodbc release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-turbodbc
-
-  test-conda-python-3.7-turbodbc-master:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.7
-        TURBODBC: master
-      # use the master branch of dask, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-turbodbc
-
-  test-conda-python-3.7-kartothek-latest:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.7
-        KARTOTHEK: latest
-      run: --no-leaf-cache conda-python-kartothek
+      flags: --no-leaf-cache
+      image: conda-python-turbodbc
+{% endfor %}
 
-  test-conda-python-3.7-kartothek-master:
+{% for kartothek_version in ["latest", "master"] %}
+  test-conda-python-3.7-kartothek-{{ kartothek_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        KARTOTHEK: master
-      # use the master branch of kartothek, so prevent reusing any layers
-      run: --no-leaf-cache conda-python-kartothek
+        KARTOTHEK: {{ kartothek_version }}
+      flags: --no-leaf-cache
+      image: conda-python-kartothek
+{% endfor %}
 
-  test-conda-python-3.7-hdfs-3.2:
+{% for hdfs_version in ["2.9.2", "3.2.1"] %}
+  test-conda-python-3.7-hdfs-{{ hdfs_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        HDFS: 3.2.1
-      run: conda-python-hdfs
+        HDFS: {{ hdfs_version }}
+      image: conda-python-hdfs
+{% endfor %}
 
-  test-conda-python-3.7-spark-branch-3.0:
+{% for python_version, spark_version, test_pyarrow_only in [("3.7", "branch-3.0", "true"),
+                                                            ("3.8", "master", "false")] %}
+  test-conda-python-{{ python_version }}-spark-{{ spark_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        PYTHON: 3.7
-        SPARK: "branch-3.0"
-        TEST_PYARROW_ONLY: "true"
+        PYTHON: {{ python_version }}
+        SPARK: {{ spark_version }}
+        TEST_PYARROW_ONLY: {{ test_pyarrow_only }}
       # use the branch-3.0 of spark, so prevent reusing any layers
-      run: --no-leaf-cache conda-python-spark
-
-  test-conda-python-3.8-spark-master:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-        SPARK: master
-      # use the master branch of spark, so prevent reusing any layers
-      run: --no-leaf-cache conda-python-spark
+      flags: --no-leaf-cache
+      image: conda-python-spark
+{% endfor %}
 
   # Remove the "skipped-" prefix in ARROW-8475
   skipped-test-conda-cpp-hiveserver2:
     ci: github
     template: docker-tests/github.linux.yml
     params:
-      run: conda-cpp-hiveserver2
-
-  example-cpp-minimal-build-static:
-    ci: github
-    template: cpp-examples/github.linux.yml
-    params:
-      type: minimal_build
-      run: static
+      image: conda-cpp-hiveserver2
 
-  example-cpp-minimal-build-static-system-dependency:
+{% for kind in ["static", "static-system-dependency"] %}
+  example-cpp-minimal-build-{{ kind }}:
     ci: github
     template: cpp-examples/github.linux.yml
     params:
       type: minimal_build
-      run: static-system-dependency
+      run: {{ kind }}
+{% endfor %}
diff --git a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
index f748f92f3bc..6423720c225 100644
--- a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
+++ b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
@@ -29,6 +29,7 @@ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Too
 vcpkg install ^
     --triplet x64-windows ^
     --x-manifest-root cpp  ^
+    --feature-flags=versions ^
     --clean-after-build ^
     || exit /B 1
 
@@ -59,7 +60,7 @@ cmake -G "Visual Studio 16 2019" -A x64 ^
       -DARROW_CXXFLAGS="/MP" ^
       -DARROW_DATASET=ON ^
       -DARROW_DEPENDENCY_SOURCE=VCPKG ^
-      -DARROW_FLIGHT=ON ^
+      -DARROW_FLIGHT=OFF ^
       -DARROW_MIMALLOC=ON ^
       -DARROW_PARQUET=ON ^
       -DARROW_PYTHON=OFF ^
@@ -78,13 +79,8 @@ cmake --build . --target INSTALL --config Release || exit /B 1
 
 @rem Test Arrow C++ library
 
-@rem TODO(ARROW-11675): Uncomment the below
-@rem and troubleshoot two test failures:
-@rem  - TestStatisticsSortOrder/0.MinMax
-@rem  - TestStatistic.Int32Extremums
-
-@rem ctest --output-on-failure ^
-@rem       --parallel %NUMBER_OF_PROCESSORS% ^
-@rem       --timeout 300 || exit /B 1
+ctest --output-on-failure ^
+      --parallel %NUMBER_OF_PROCESSORS% ^
+      --timeout 300 || exit /B 1
 
 popd
diff --git a/dev/tasks/vcpkg-tests/github.windows.yml b/dev/tasks/vcpkg-tests/github.windows.yml
index eacb6317c30..ad3e793a6c3 100644
--- a/dev/tasks/vcpkg-tests/github.windows.yml
+++ b/dev/tasks/vcpkg-tests/github.windows.yml
@@ -36,15 +36,11 @@ jobs:
           git -C arrow checkout FETCH_HEAD
           git -C arrow submodule update --init --recursive
       - name: Remove and Reinstall vcpkg
-        # As of January 2021, the version of vcpkg that is preinstalled on the
-        # Github Actions windows-2019 image is 2020.11.12, as noted at
-        # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md
-        # This version of vcpkg has a bug that causes the installation of
-        # aws-cpp-sdk to fail. See details at
-        # https://github.com/awslabs/aws-c-common/issues/734
-        # and https://github.com/microsoft/vcpkg/pull/14716.
         # When running vcpkg in Github Actions on Windows, remove the
         # preinstalled vcpkg and install the newest version from source.
+        # Versions of vcpkg rapidly stop working until updated, and
+        # the safest and most reliable way to update vcpkg is simply
+        # to remove and reinstall it.
         shell: cmd
         run: |
           CALL vcpkg integrate remove 2>NUL
diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml
new file mode 100644
index 00000000000..8a4613a49f7
--- /dev/null
+++ b/dev/tasks/verify-rc/github.linux.amd64.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  verify:
+    name: "Verify release candidate Ubuntu {{ artifact }}"
+    runs-on: {{ github_runner|default("ubuntu-20.04") }}
+    {% if env is defined %}
+    env:
+    {% for key, value in env.items() %}
+      {{ key }}: {{ value }}
+    {% endfor %}
+    {% endif %}
+
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Install System Dependencies
+        run: |
+          # TODO: don't require removing newer llvms
+          sudo apt-get --purge remove -y llvm-9 clang-9
+          sudo apt-get update -y
+          sudo apt-get install -y \
+            autoconf-archive \
+            binfmt-support \
+            bison \
+            curl \
+            flex \
+            gtk-doc-tools \
+            jq \
+            libboost-all-dev \
+            libgirepository1.0-dev \
+            ninja-build \
+            qemu-user-static \
+            wget
+
+          if [ "$TEST_JAVA" = "1" ]; then
+            # Maven
+            MAVEN_VERSION=3.6.3
+            wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip
+            unzip apache-maven-$MAVEN_VERSION-bin.zip
+            mkdir -p $HOME/java
+            mv apache-maven-$MAVEN_VERSION $HOME/java
+            export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH
+          fi
+
+          if [ "$TEST_RUBY" = "1" ]; then
+            ruby --version
+            sudo gem install bundler
+          fi
+      - uses: actions/setup-node@v2-beta
+        with:
+          node-version: '14'
+      - name: Run verification
+        shell: bash
+        run: |
+          arrow/dev/release/verify-release-candidate.sh \
+            {{ artifact }} \
+            {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/dev/tasks/verify-rc/github.linux.yml b/dev/tasks/verify-rc/github.linux.yml
deleted file mode 100644
index 8729426fd13..00000000000
--- a/dev/tasks/verify-rc/github.linux.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-jobs:
-  verify:
-    name: "Verify release candidate Ubuntu {{ artifact }}"
-    runs-on: ubuntu-20.04
-    {% if env is defined %}
-    env:
-    {% for key, value in env.items() %}
-      {{ key }}: {{ value }}
-    {% endfor %}
-    {% endif %}
-
-    steps:
-      {{ macros.github_checkout_arrow()|indent }}
-
-      - name: Install System Dependencies
-        run: |
-          # TODO: don't require removing newer llvms
-          sudo apt-get --purge remove -y llvm-9 clang-9
-          sudo apt-get install -y \
-            autoconf-archive \
-            binfmt-support \
-            bison \
-            curl \
-            flex \
-            gtk-doc-tools \
-            jq \
-            libboost-all-dev \
-            libgirepository1.0-dev \
-            qemu-user-static \
-            wget
-
-          if [ "$TEST_JAVA" = "1" ]; then
-            # Maven
-            MAVEN_VERSION=3.6.3
-            wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip
-            unzip apache-maven-$MAVEN_VERSION-bin.zip
-            mkdir -p $HOME/java
-            mv apache-maven-$MAVEN_VERSION $HOME/java
-            export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH
-          fi
-
-          if [ "$TEST_RUBY" = "1" ]; then
-            ruby --version
-            sudo gem install bundler
-          fi
-      - uses: actions/setup-node@v2-beta
-        with:
-          node-version: '14'
-      - name: Run verification
-        shell: bash
-        run: |
-          arrow/dev/release/verify-release-candidate.sh \
-            {{ artifact }} \
-            {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml
new file mode 100644
index 00000000000..d39cda38203
--- /dev/null
+++ b/dev/tasks/verify-rc/github.macos.amd64.yml
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  verify:
+    name: "Verify release candidate macOS {{ artifact }}"
+    runs-on: {{ github_runner|default("macos-latest") }}
+    {% if env is defined %}
+    env:
+    {% for key, value in env.items() %}
+      {{ key }}: {{ value }}
+    {% endfor %}
+    {% endif %}
+
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Install System Dependencies
+        shell: bash
+        run: |
+          brew update
+          brew bundle --file=arrow/cpp/Brewfile
+          brew bundle --file=arrow/c_glib/Brewfile
+      - uses: actions/setup-node@v2-beta
+        with:
+          node-version: '14'
+      - name: Run verification
+        shell: bash
+        run: |
+          arrow/dev/release/verify-release-candidate.sh \
+            {{ artifact }} \
+            {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml
new file mode 100644
index 00000000000..26139ed6026
--- /dev/null
+++ b/dev/tasks/verify-rc/github.macos.arm64.yml
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  verify:
+    name: "Verify release candidate macOS {{ artifact }}"
+    runs-on: {{ github_runner }}
+    {% if env is defined %}
+    env:
+    {% for key, value in env.items() %}
+      {{ key }}: {{ value }}
+    {% endfor %}
+    {% endif %}
+
+    steps:
+      - name: Cleanup
+        shell: bash
+        run: rm -rf arrow
+
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Run verification
+        shell: bash
+        run: |
+          export PATH="$(brew --prefix node@14)/bin:$PATH"
+          export PATH="$(brew --prefix ruby)/bin:$PATH"
+          export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig"
+          arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \
+            {{ artifact }} \
+            {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.yml
deleted file mode 100644
index ab0c6563bdc..00000000000
--- a/dev/tasks/verify-rc/github.macos.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-jobs:
-  verify:
-    name: "Verify release candidate macOS {{ artifact }}"
-    runs-on: macos-latest
-    {% if env is defined %}
-    env:
-    {% for key, value in env.items() %}
-      {{ key }}: {{ value }}
-    {% endfor %}
-    {% endif %}
-
-    steps:
-      {{ macros.github_checkout_arrow()|indent }}
-
-      - name: Install System Dependencies
-        shell: bash
-        run: |
-          brew update
-          brew bundle --file=arrow/cpp/Brewfile
-          brew bundle --file=arrow/c_glib/Brewfile
-      - uses: actions/setup-node@v2-beta
-        with:
-          node-version: '14'
-      - name: Run verification
-        shell: bash
-        run: |
-          arrow/dev/release/verify-release-candidate.sh \
-            {{ artifact }} \
-            {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/docker-compose.yml b/docker-compose.yml
index 539d5adcb97..4290578b9bd 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -62,6 +62,22 @@ x-ccache: &ccache
   CCACHE_MAXSIZE: 500M
   CCACHE_DIR: /ccache
 
+# CPU/memory limit presets to pass to Docker.
+#
+# Usage: archery docker run --resource-limit=github <image>
+#
+# Note that exporting ARCHERY_DOCKER_BIN="sudo docker" is likely required,
+# unless Docker is configured with cgroups v2 (else Docker will silently
+# ignore the limits).
+x-limit-presets:
+  # These values emulate GitHub Actions:
+  # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners
+  github:
+    # Note we use cpuset and not cpus since Ninja only detects and limits
+    # parallelism given the former
+    cpuset_cpus: [0, 1]
+    memory: 7g
+
 x-with-gpus:
   - ubuntu-cuda-cpp
   - ubuntu-cuda-python
@@ -110,19 +126,23 @@ x-hierarchy:
       - ubuntu-docs
     - ubuntu-python-sdist-test
     - ubuntu-r
+    - ubuntu-r-only-r
   - ubuntu-cuda-cpp:
     - ubuntu-cuda-python
   - ubuntu-csharp
   - ubuntu-cpp-sanitizer
   - ubuntu-cpp-thread-sanitizer
   - ubuntu-r-sanitizer
+  - ubuntu-r-valgrind
   - python-sdist
   - r
+  - r-revdepcheck
   # helper services
   - impala
   - postgres
   - python-wheel-manylinux-2010
-  - python-wheel-manylinux-2014
+  - python-wheel-manylinux-2014:
+    - java-jni-manylinux-2014
   - python-wheel-manylinux-test-imports
   - python-wheel-manylinux-test-unittests
   - python-wheel-windows-vs2017
@@ -297,6 +317,7 @@ services:
         base: "${ARCH}/ubuntu:${UBUNTU}"
         clang_tools: ${CLANG_TOOLS}
         llvm: ${LLVM}
+        gcc_version: ${GCC_VERSION}
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
@@ -362,10 +383,12 @@ services:
       <<: *ccache
       CC: clang-${CLANG_TOOLS}
       CXX: clang++-${CLANG_TOOLS}
+      ARROW_BUILD_STATIC: "OFF"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_FUZZING: "ON"  # Check fuzz regressions
       ARROW_JEMALLOC: "OFF"
       ARROW_ORC: "OFF"
+      ARROW_S3: "OFF"
       ARROW_USE_ASAN: "ON"
       ARROW_USE_UBSAN: "ON"
       # utf8proc 2.1.0 in Ubuntu Bionic has test failures
@@ -395,6 +418,7 @@ services:
       <<: *ccache
       CC: clang-${CLANG_TOOLS}
       CXX: clang++-${CLANG_TOOLS}
+      ARROW_BUILD_STATIC: "OFF"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_DATASET: "ON"
       ARROW_JEMALLOC: "OFF"
@@ -710,6 +734,7 @@ services:
     volumes: *ubuntu-volumes
     command: >
       /bin/bash -c "
+        apt remove -y git &&
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_sdist_test.sh /arrow"
 
@@ -719,7 +744,7 @@ services:
   #    https://quay.io/repository/pypa/manylinux2010_x86_64?tab=tags
   #    only amd64 arch is supported
   python-wheel-manylinux-2010:
-    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010
+    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010-vcpkg-${VCPKG}
     build:
       args:
         arch_alias: ${ARCH_ALIAS}
@@ -730,7 +755,7 @@ services:
       context: .
       dockerfile: ci/docker/python-wheel-manylinux-201x.dockerfile
       cache_from:
-        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010
+        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010-vcpkg-${VCPKG}
     environment:
       <<: *ccache
       MANYLINUX_VERSION: 2010
@@ -742,7 +767,7 @@ services:
   # See available versions at:
   #    https://quay.io/repository/pypa/manylinux2014_x86_64?tab=tags
   python-wheel-manylinux-2014:
-    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014
+    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
     build:
       args:
         arch_alias: ${ARCH_ALIAS}
@@ -753,7 +778,7 @@ services:
       context: .
       dockerfile: ci/docker/python-wheel-manylinux-201x.dockerfile
       cache_from:
-        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014
+        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
     environment:
       <<: *ccache
       MANYLINUX_VERSION: 2014
@@ -767,7 +792,10 @@ services:
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
-    command: /arrow/ci/scripts/python_wheel_manylinux_test.sh imports
+    environment:
+      CHECK_IMPORTS: "ON"
+      CHECK_UNITTESTS: "OFF"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-manylinux-test-unittests:
     image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-test
@@ -782,10 +810,23 @@ services:
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
-    command: /arrow/ci/scripts/python_wheel_manylinux_test.sh unittests
+    environment:
+      CHECK_IMPORTS: "OFF"
+      CHECK_UNITTESTS: "ON"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-windows-vs2017:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017
+    # The windows images must be built locally and pushed to a remote registry:
+    # export REPO=ghcr.io/ursacomputing/arrow
+    # PYTHON=3.6 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.7 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.8 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.9 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.6 archery docker push python-wheel-windows-vs2017
+    # PYTHON=3.7 archery docker push python-wheel-windows-vs2017
+    # PYTHON=3.8 archery docker push python-wheel-windows-vs2017
+    # PYTHON=3.9 archery docker push python-wheel-windows-vs2017
+    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}
     build:
       args:
         vcpkg: ${VCPKG}
@@ -812,6 +853,25 @@ services:
         target: "C:/arrow"
     command: arrow\\ci\\scripts\\python_wheel_windows_test.bat
 
+  java-jni-manylinux-2014:
+    image: ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG}
+    build:
+      args:
+        base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
+        java: 1.8.0
+      context: .
+      dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG}
+    environment:
+      <<: *ccache
+    volumes:
+      - .:/arrow:delegated
+      - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
+    command:
+      ["pip install -e /arrow/dev/archery &&
+        /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java/dist"]
+
   ##############################  Integration #################################
 
   conda-python-pandas:
@@ -982,12 +1042,15 @@ services:
         arch: ${ARCH}
         r: ${R}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+        gcc_version: ${GCC_VERSION}
+        tz: ${TZ}
     shm_size: *shm-size
     environment:
       <<: *ccache
       ARROW_R_CXXFLAGS: '-Werror'
       LIBARROW_BUILD: 'false'
       NOT_CRAN: 'true'
+      ARROW_R_DEV: ${ARROW_R_DEV}
     volumes: *ubuntu-volumes
     command: >
       /bin/bash -c "
@@ -995,6 +1058,12 @@ services:
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         /arrow/ci/scripts/r_test.sh /arrow"
 
+  ubuntu-r-only-r:
+    extends: ubuntu-r
+    command: >
+      /bin/bash -c "
+        /arrow/ci/scripts/r_test.sh /arrow"
+
   r:
     # This lets you test building/installing the arrow R package
     # (including building the C++ library) on any Docker image that contains R
@@ -1012,10 +1081,11 @@ services:
         base: ${R_ORG}/${R_IMAGE}:${R_TAG}
         r_dev: ${ARROW_R_DEV}
         devtoolset_version: ${DEVTOOLSET_VERSION}
+        tz: ${TZ}
     shm_size: *shm-size
     environment:
       LIBARROW_DOWNLOAD: "false"
-      ARROW_HOME: "/arrow"
+      ARROW_SOURCE_HOME: "/arrow"
       ARROW_R_DEV: ${ARROW_R_DEV}
       # To test for CRAN release, delete ^^ these two env vars so we download the Apache release
       ARROW_USE_PKG_CONFIG: "false"
@@ -1042,6 +1112,7 @@ services:
       args:
         base: wch1/r-debug:latest
         r_bin: RDsan
+        tz: ${TZ}
     environment:
       <<: *ccache
     volumes: *ubuntu-volumes
@@ -1049,6 +1120,58 @@ services:
       /bin/bash -c "
         /arrow/ci/scripts/r_sanitize.sh /arrow"
 
+  ubuntu-r-valgrind:
+    # Only 18.04 and amd64 supported
+    # Usage:
+    #   docker-compose build ubuntu-r-valgrind
+    #   docker-compose run ubuntu-r-valgrind
+    image: ${REPO}:amd64-ubuntu-18.04-r-valgrind
+    build:
+      context: .
+      dockerfile: ci/docker/linux-r.dockerfile
+      cache_from:
+        - ${REPO}:amd64-ubuntu-18.04-r-valgrind
+      args:
+        base: wch1/r-debug:latest
+        r_bin: RDvalgrind
+        tz: ${TZ}
+    environment:
+      <<: *ccache
+      ARROW_R_DEV: ${ARROW_R_DEV}
+      # AVX512 not supported by Valgrind (similar to ARROW-9851) some runners support AVX512 and some do not
+      # so some build might pass without this setting, but we want to ensure that we stay to AVX2 regardless of runner.
+      EXTRA_CMAKE_FLAGS: "-DARROW_RUNTIME_SIMD_LEVEL=AVX2"
+    volumes: *ubuntu-volumes
+    command: >
+      /bin/bash -c "
+        /arrow/ci/scripts/r_valgrind.sh /arrow"
+
+  r-revdepcheck:
+    # Usage:
+    #   docker-compose build r-revdepcheck
+    #   docker-compose run r-revdepcheck
+    image: ${REPO}:r-rstudio-r-base-4.0-focal-revdepcheck
+    build:
+      context: .
+      dockerfile: ci/docker/linux-r.dockerfile
+      cache_from:
+        - ${REPO}:r-rstudio-r-base-4.0-focal-revdepcheck
+      args:
+        base: rstudio/r-base:4.0-focal
+        r_dev: ${ARROW_R_DEV}
+        tz: ${TZ}
+    shm_size: *shm-size
+    environment:
+      LIBARROW_DOWNLOAD: "true"
+      LIBARROW_MINIMAL: "false"
+      ARROW_SOURCE_HOME: "/arrow"
+      ARROW_R_DEV: "true"
+    volumes: *ubuntu-volumes
+    command: >
+      /bin/bash -c "/arrow/ci/scripts/r_revdepcheck.sh /arrow"
+
+
+
   ################################# Go ########################################
 
   debian-go:
@@ -1147,12 +1270,12 @@ services:
     #   docker-compose build debian-java
     #   docker-compose build debian-java-jni
     #   docker-compose run debian-java-jni
-    image: ${REPO}:${ARCH}-debian-9-java-jni
+    image: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
     build:
       context: .
       dockerfile: ci/docker/linux-apt-jni.dockerfile
       cache_from:
-        - ${REPO}:${ARCH}-debian-9-java-jni
+        - ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
       args:
         base: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}
         llvm: ${LLVM}
@@ -1196,6 +1319,7 @@ services:
       <<: *ccache
       # tell archery where the arrow binaries are located
       ARROW_CPP_EXE_PATH: /build/cpp/debug
+      ARCHERY_INTEGRATION_WITH_RUST: 0
     command:
       ["/arrow/ci/scripts/rust_build.sh /arrow /build &&
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
@@ -1219,6 +1343,7 @@ services:
       cache_from:
         - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs
       args:
+        r: ${R}
         jdk: ${JDK}
         node: ${NODE}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
@@ -1253,14 +1378,10 @@ services:
       args:
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
         clang_tools: ${CLANG_TOOLS}
-        rust: ${RUST}
     environment:
       <<: *ccache
     volumes: *ubuntu-volumes
-    command: >
-      /bin/bash -c "
-        pip install -e /arrow/dev/archery &&
-        archery lint --all --no-clang-tidy --no-iwyu --no-numpydoc"
+    command: archery lint --all --no-clang-tidy --no-iwyu --no-numpydoc
 
   ######################### Integration Tests #################################
 
diff --git a/docs/Makefile b/docs/Makefile
index e38bc91731d..fdff066a39a 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -69,6 +69,7 @@ help:
 .PHONY: clean
 clean:
 	rm -rf $(BUILDDIR)/*
+	rm -rf source/python/generated/*
 
 .PHONY: html
 html:
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 80411408149..0dbca692225 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,4 +2,4 @@ breathe
 ipython
 numpydoc
 sphinx==2.4.4
-sphinx_rtd_theme
+pydata-sphinx-theme
diff --git a/docs/source/_static/arrow.png b/docs/source/_static/arrow.png
new file mode 100644
index 00000000000..72104b075b8
Binary files /dev/null and b/docs/source/_static/arrow.png differ
diff --git a/docs/source/_static/favicon.ico b/docs/source/_static/favicon.ico
new file mode 100644
index 00000000000..33a554a8a82
Binary files /dev/null and b/docs/source/_static/favicon.ico differ
diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index 91670a741e5..1e972cc6fc4 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -17,6 +17,69 @@
  * under the License.
  */
 
+
+/* Customizing with theme CSS variables */
+
+:root {
+  --pst-color-active-navigation: 215, 70, 51;
+  --pst-color-link-hover: 215, 70, 51;
+  --pst-color-headerlink: 215, 70, 51;
+  /* Use normal text color (like h3, ..) instead of primary color */
+  --pst-color-h1: var(--color-text-base);
+  --pst-color-h2: var(--color-text-base);
+  /* Use softer blue from bootstrap's default info color */
+  --pst-color-info: 23, 162, 184;
+  --pst-header-height: 0px;
+}
+
+code {
+  color: rgb(215, 70, 51);
+}
+
+.footer {
+  text-align: center;
+}
+
+/* Ensure the logo is properly displayed */
+
+.navbar-brand {
+  height: auto;
+  width: auto;
+}
+
+a.navbar-brand img {
+  height: auto;
+  width: auto;
+  max-height: 15vh;
+  max-width: 100%;
+}
+
+
+/* This is the bootstrap CSS style for "table-striped". Since the theme does
+not yet provide an easy way to configure this globaly, it easier to simply
+include this snippet here than updating each table in all rst files to
+add ":class: table-striped" */
+
+.table tbody tr:nth-of-type(odd) {
+  background-color: rgba(0, 0, 0, 0.05);
+}
+
+
+/* Limit the max height of the sidebar navigation section. Because in our
+custimized template, there is more content above the navigation, i.e.
+larger logo: if we don't decrease the max-height, it will overlap with
+the footer.
+Details: min(15vh, 110px) for the logo size, 8rem for search box etc*/
+
+@media (min-width:720px) {
+  @supports (position:-webkit-sticky) or (position:sticky) {
+    .bd-links {
+      max-height: calc(100vh - min(15vh, 110px) - 8rem)
+    }
+  }
+}
+
+
 /* Fix table text wrapping in RTD theme,
  * see https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
  */
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
new file mode 100644
index 00000000000..f6ee66cadaa
--- /dev/null
+++ b/docs/source/_templates/docs-sidebar.html
@@ -0,0 +1,19 @@
+
+<a class="navbar-brand" href="{{ pathto(master_doc) }}">
+  <img src="{{ pathto('_static/' + logo, 1) }}" class="logo" alt="logo">
+</a>
+
+<form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
+  <i class="icon fas fa-search"></i>
+  <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
+</form>
+
+<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
+  <div class="bd-toc-item active">
+    {% if "python/api" in pagename or "python/generated" in pagename %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
+    {% else %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
+    {% endif %}
+  </div>
+</nav>
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
index 8ee71e4bb27..a9d0f30bcf8 100644
--- a/docs/source/_templates/layout.html
+++ b/docs/source/_templates/layout.html
@@ -1,13 +1,5 @@
-{# Import the theme's layout. #}
-{% extends "!layout.html" %}
+{% extends "pydata_sphinx_theme/layout.html" %}
 
-{%- block footer %}
-<script async src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1"></script>
-<script>
-  window.dataLayer = window.dataLayer || [];
-  function gtag(){dataLayer.push(arguments);}
-  gtag('js', new Date());
-
-  gtag('config', 'UA-107500873-1');
-</script>
+{# Silence the navbar #}
+{% block docs_navbar %}
 {% endblock %}
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 47d88a9a166..2f813c07268 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -31,8 +31,10 @@
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 
+import datetime
 import os
 import sys
+import warnings
 from unittest import mock
 
 import pyarrow
@@ -44,6 +46,10 @@
 
 ])
 
+# Suppresses all warnings printed when sphinx is traversing the code (e.g.
+# deprecation warnings)
+warnings.filterwarnings("ignore", category=FutureWarning, message=".*pyarrow.*")
+
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -107,7 +113,7 @@
 
 # General information about the project.
 project = u'Apache Arrow'
-copyright = u'2016-2019 Apache Software Foundation'
+copyright = f'2016-{datetime.datetime.now().year} Apache Software Foundation'
 author = u'Apache Software Foundation'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -179,14 +185,15 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = 'pydata_sphinx_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #
 html_theme_options = {
-    'nosidebar': True
+    "show_toc_level": 2,
+    "google_analytics_id": "UA-107500873-1",
 }
 
 # Add any paths that contain custom themes here, relative to this directory.
@@ -204,13 +211,13 @@
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-# html_logo = None
+html_logo = "_static/arrow.png"
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or
 # 32x32 pixels large.
 #
-# html_favicon = None
+html_favicon = "_static/favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -239,7 +246,10 @@
 
 # Custom sidebar templates, maps document names to template names.
 #
-# html_sidebars = {}
+html_sidebars = {
+#    '**': ['sidebar-logo.html', 'sidebar-search-bs.html', 'sidebar-nav-bs.html'],
+    '**': ['docs-sidebar.html'],
+}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
diff --git a/docs/source/cpp/api.rst b/docs/source/cpp/api.rst
index 80adb17b630..3df16a178bb 100644
--- a/docs/source/cpp/api.rst
+++ b/docs/source/cpp/api.rst
@@ -39,3 +39,4 @@ API Reference
    api/cuda
    api/flight
    api/filesystem
+   api/dataset
diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst
index 70e21faa011..bb981d1a047 100644
--- a/docs/source/cpp/api/array.rst
+++ b/docs/source/cpp/api/array.rst
@@ -19,10 +19,6 @@
 Arrays
 ======
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 .. doxygenclass:: arrow::Array
    :project: arrow_cpp
    :members:
diff --git a/docs/source/cpp/api/compute.rst b/docs/source/cpp/api/compute.rst
index 9a71cce1288..3b0a89f83f8 100644
--- a/docs/source/cpp/api/compute.rst
+++ b/docs/source/cpp/api/compute.rst
@@ -18,10 +18,6 @@
 Compute Functions
 =================
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Datum class
 -----------
 
diff --git a/docs/source/cpp/api/dataset.rst b/docs/source/cpp/api/dataset.rst
new file mode 100644
index 00000000000..3f0df8a4537
--- /dev/null
+++ b/docs/source/cpp/api/dataset.rst
@@ -0,0 +1,71 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=======
+Dataset
+=======
+
+Interface
+=========
+
+.. doxygenclass:: arrow::dataset::Fragment
+   :members:
+
+.. doxygenclass:: arrow::dataset::Dataset
+   :members:
+
+Partitioning
+============
+
+.. doxygengroup:: dataset-partitioning
+   :content-only:
+   :members:
+
+Dataset discovery/factories
+===========================
+
+.. doxygengroup:: dataset-discovery
+   :content-only:
+   :members:
+
+Scanning
+========
+
+.. doxygengroup:: dataset-scanning
+   :content-only:
+   :members:
+
+Concrete implementations
+========================
+
+.. doxygengroup:: dataset-implementations
+   :content-only:
+   :members:
+
+File System Datasets
+--------------------
+
+.. doxygengroup:: dataset-filesystem
+   :content-only:
+   :members:
+
+File Formats
+------------
+
+.. doxygengroup:: dataset-file-formats
+   :content-only:
+   :members:
diff --git a/docs/source/cpp/api/filesystem.rst b/docs/source/cpp/api/filesystem.rst
index 957e7321730..02fff9a6c2e 100644
--- a/docs/source/cpp/api/filesystem.rst
+++ b/docs/source/cpp/api/filesystem.rst
@@ -19,10 +19,6 @@
 Filesystems
 ===========
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Interface
 =========
 
diff --git a/docs/source/cpp/api/formats.rst b/docs/source/cpp/api/formats.rst
index a072f114475..2f6b24802f0 100644
--- a/docs/source/cpp/api/formats.rst
+++ b/docs/source/cpp/api/formats.rst
@@ -24,18 +24,29 @@ File Formats
 CSV
 ===
 
-.. doxygenstruct:: arrow::csv::ReadOptions
+.. doxygenstruct:: arrow::csv::ConvertOptions
    :members:
 
 .. doxygenstruct:: arrow::csv::ParseOptions
    :members:
 
-.. doxygenstruct:: arrow::csv::ConvertOptions
+.. doxygenstruct:: arrow::csv::ReadOptions
+   :members:
+
+.. doxygenstruct:: arrow::csv::WriteOptions
    :members:
 
 .. doxygenclass:: arrow::csv::TableReader
    :members:
 
+.. doxygenfunction:: arrow::csv::MakeCSVWriter(io::OutputStream *, const std::shared_ptr<Schema>&, const WriteOptions&)
+
+.. doxygenfunction:: arrow::csv::MakeCSVWriter(std::shared_ptr<io::OutputStream>, const std::shared_ptr<Schema>&, const WriteOptions&)
+
+.. doxygenfunction:: arrow::csv::WriteCSV(const RecordBatch&, const WriteOptions&, arrow::io::OutputStream *)
+
+.. doxygenfunction:: arrow::csv::WriteCSV(const Table&, const WriteOptions&, arrow::io::OutputStream *)
+
 .. _cpp-api-json:
 
 Line-separated JSON
diff --git a/docs/source/cpp/api/io.rst b/docs/source/cpp/api/io.rst
index 37023ec696c..735136a0d47 100644
--- a/docs/source/cpp/api/io.rst
+++ b/docs/source/cpp/api/io.rst
@@ -19,10 +19,6 @@
 Input / output
 ==============
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Interfaces
 ==========
 
diff --git a/docs/source/cpp/api/ipc.rst b/docs/source/cpp/api/ipc.rst
index 2a9e656fa05..6822b986a75 100644
--- a/docs/source/cpp/api/ipc.rst
+++ b/docs/source/cpp/api/ipc.rst
@@ -22,10 +22,6 @@
 Arrow IPC
 =========
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 IPC options
 ===========
 
diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst
new file mode 100644
index 00000000000..c0d05e9dab3
--- /dev/null
+++ b/docs/source/cpp/build_system.rst
@@ -0,0 +1,136 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+===================================
+Using Arrow C++ in your own project
+===================================
+
+This section assumes you already have the Arrow C++ libraries on your
+system, either after installing them using a package manager or after
+:ref:`building them yourself <building-arrow-cpp>`.
+
+The recommended way to integrate the Arrow C++ libraries in your own
+C++ project is to use CMake's `find_package
+<https://cmake.org/cmake/help/latest/command/find_package.html>`_
+function for locating and integrating dependencies. If you don't use
+CMake as a build system, you can use `pkg-config
+<https://www.freedesktop.org/wiki/Software/pkg-config/>`_ to find
+installed the Arrow C++ libraries.
+
+CMake
+=====
+
+Basic usage
+-----------
+
+This minimal ``CMakeLists.txt`` file compiles a ``my_example.cc`` source
+file into an executable linked with the Arrow C++ shared library:
+
+.. code-block:: cmake
+
+   project(MyExample)
+
+   find_package(Arrow REQUIRED)
+
+   add_executable(my_example my_example.cc)
+   target_link_libraries(my_example PRIVATE arrow_shared)
+
+Available variables and targets
+-------------------------------
+
+The directive ``find_package(Arrow REQUIRED)`` asks CMake to find an Arrow
+C++ installation on your system.  When it returns, it will have set a few
+CMake variables:
+
+* ``${Arrow_FOUND}`` is true if the Arrow C++ libraries have been found
+* ``${ARROW_VERSION}`` contains the Arrow version string
+* ``${ARROW_FULL_SO_VERSION}`` contains the Arrow DLL version string
+
+In addition, it will have created some targets that you can link against
+(note these are plain strings, not variables):
+
+* ``arrow_shared`` links to the Arrow shared libraries
+* ``arrow_static`` links to the Arrow static libraries
+
+In most cases, it is recommended to use the Arrow shared libraries.
+
+.. note::
+   CMake is case-sensitive.  The names and variables listed above have to be
+   spelt exactly that way!
+
+.. seealso::
+   A Docker-based :doc:`minimal build example <examples/cmake_minimal_build>`.
+
+pkg-config
+==========
+
+Basic usage
+-----------
+
+You can get suitable build flags by the following command line:
+
+.. code-block:: shell
+
+   pkg-config --cflags --libs arrow
+
+If you want to link the Arrow C++ static library, you need to add
+``--static`` option:
+
+.. code-block:: shell
+
+   pkg-config --cflags --libs --static arrow
+
+This minimal ``Makefile`` file compiles a ``my_example.cc`` source
+file into an executable linked with the Arrow C++ shared library:
+
+.. code-block:: makefile
+
+   my_example: my_example.cc
+   	$(CXX) -o $@ $(CXXFLAGS) $< $$(pkg-config --cflags --libs arrow)
+
+Many build systems support pkg-config. For example:
+
+  * `GNU Autotools <https://people.freedesktop.org/~dbn/pkg-config-guide.html#using>`_
+  * `CMake <https://cmake.org/cmake/help/latest/module/FindPkgConfig.html>`_
+    (But you should use ``find_package(Arrow)`` instead.)
+  * `Meson <https://mesonbuild.com/Reference-manual.html#dependency>`_
+
+Available packages
+------------------
+
+The Arrow C++ provides a pkg-config package for each module. Here are
+all available packages:
+
+  * ``arrow-csv``
+  * ``arrow-cuda``
+  * ``arrow-dataset``
+  * ``arrow-filesystem``
+  * ``arrow-flight-testing``
+  * ``arrow-flight``
+  * ``arrow-json``
+  * ``arrow-orc``
+  * ``arrow-python-flight``
+  * ``arrow-python``
+  * ``arrow-tensorflow``
+  * ``arrow-testing``
+  * ``arrow``
+  * ``gandiva``
+  * ``parquet``
+  * ``plasma``
diff --git a/docs/source/cpp/cmake.rst b/docs/source/cpp/cmake.rst
deleted file mode 100644
index f192988fc0c..00000000000
--- a/docs/source/cpp/cmake.rst
+++ /dev/null
@@ -1,72 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-.. default-domain:: cpp
-.. highlight:: cpp
-
-Using Arrow C++ in your own project
-===================================
-
-This section assumes you already have the Arrow C++ libraries on your
-system, either after installing them using a package manager or after
-:ref:`building them yourself <building-arrow-cpp>`.
-
-The recommended way to integrate the Arrow C++ libraries in your own C++
-project is to use CMake's
-`find_package <https://cmake.org/cmake/help/latest/command/find_package.html>`_
-function for locating and integrating dependencies.
-
-Basic usage
------------
-
-This minimal ``CMakeLists.txt`` file compiles a ``my_example.cc`` source
-file into an executable linked with the Arrow C++ shared library:
-
-.. code-block:: cmake
-
-   project(MyExample)
-
-   find_package(Arrow REQUIRED)
-
-   add_executable(my_example my_example.cc)
-   target_link_libraries(my_example PRIVATE arrow_shared)
-
-Available variables and targets
--------------------------------
-
-The directive ``find_package(Arrow REQUIRED)`` asks CMake to find an Arrow
-C++ installation on your system.  When it returns, it will have set a few
-CMake variables:
-
-* ``${Arrow_FOUND}`` is true if the Arrow C++ libraries have been found
-* ``${ARROW_VERSION}`` contains the Arrow version string
-* ``${ARROW_FULL_SO_VERSION}`` contains the Arrow DLL version string
-
-In addition, it will have created some targets that you can link against
-(note these are plain strings, not variables):
-
-* ``arrow_shared`` links to the Arrow shared libraries
-* ``arrow_static`` links to the Arrow static libraries
-
-In most cases, it is recommended to use the Arrow shared libraries.
-
-.. note::
-   CMake is case-sensitive.  The names and variables listed above have to be
-   spelt exactly that way!
-
-.. seealso::
-   A Docker-based :doc:`minimal build example <examples/cmake_minimal_build>`.
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 715d5036964..0540f806522 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -23,10 +23,6 @@
 Compute Functions
 =================
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 The generic Compute API
 =======================
 
@@ -86,15 +82,15 @@ Many compute functions are also available directly as concrete APIs, here
 Some functions accept or require an options structure that determines the
 exact semantics of the function::
 
-   MinMaxOptions min_max_options;
-   min_max_options.null_handling = MinMaxOptions::EMIT_NULL;
+   ScalarAggregateOptions scalar_aggregate_options;
+   scalar_aggregate_options.skip_nulls = false;
 
    std::shared_ptr<arrow::Array> array = ...;
    arrow::Datum min_max;
 
    ARROW_ASSIGN_OR_RAISE(min_max,
                          arrow::compute::CallFunction("min_max", {array},
-                                                      &min_max_options));
+                                                      &scalar_aggregate_options));
 
    // Unpack struct scalar result (a two-field {"min", "max"} scalar)
    std::shared_ptr<arrow::Scalar> min_value, max_value;
@@ -187,46 +183,51 @@ recommend you try it out.  Unsupported input types return a ``TypeError``
 Aggregations
 ------------
 
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| Function name            | Arity      | Input types        | Output type           | Options class                              |
-+==========================+============+====================+=======================+============================================+
-| all                      | Unary      | Boolean            | Scalar Boolean        |                                            |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| any                      | Unary      | Boolean            | Scalar Boolean        |                                            |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| count                    | Unary      | Any                | Scalar Int64          | :struct:`CountOptions`                     |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| mean                     | Unary      | Numeric            | Scalar Float64        |                                            |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| min_max                  | Unary      | Numeric            | Scalar Struct  (1)    | :struct:`MinMaxOptions`                    |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| mode                     | Unary      | Numeric            | Struct  (2)           | :struct:`ModeOptions`                      |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| quantile                 | Unary      | Numeric            | Scalar Numeric (3)    | :struct:`QuantileOptions`                  |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| stddev                   | Unary      | Numeric            | Scalar Float64        | :struct:`VarianceOptions`                  |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| sum                      | Unary      | Numeric            | Scalar Numeric (4)    |                                            |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| tdigest                  | Unary      | Numeric            | Scalar Float64        | :struct:`TDigestOptions`                   |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| variance                 | Unary      | Numeric            | Scalar Float64        | :struct:`VarianceOptions`                  |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| Function name | Arity | Input types | Output type    | Options class                    | Notes |
++===============+=======+=============+================+==================================+=======+
+| all           | Unary | Boolean     | Scalar Boolean | :struct:`ScalarAggregateOptions` | \(1)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| any           | Unary | Boolean     | Scalar Boolean | :struct:`ScalarAggregateOptions` | \(1)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| count         | Unary | Any         | Scalar Int64   | :struct:`ScalarAggregateOptions` |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| index         | Unary | Any         | Scalar Int64   | :struct:`IndexOptions`           |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| mean          | Unary | Numeric     | Scalar Float64 | :struct:`ScalarAggregateOptions` |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| min_max       | Unary | Numeric     | Scalar Struct  | :struct:`ScalarAggregateOptions` | \(2)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| mode          | Unary | Numeric     | Struct         | :struct:`ModeOptions`            | \(3)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| quantile      | Unary | Numeric     | Scalar Numeric | :struct:`QuantileOptions`        | \(4)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| stddev        | Unary | Numeric     | Scalar Float64 | :struct:`VarianceOptions`        |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| sum           | Unary | Numeric     | Scalar Numeric | :struct:`ScalarAggregateOptions` | \(5)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| tdigest       | Unary | Numeric     | Scalar Float64 | :struct:`TDigestOptions`         |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| variance      | Unary | Numeric     | Scalar Float64 | :struct:`VarianceOptions`        |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
 
 Notes:
 
-* \(1) Output is a ``{"min": input type, "max": input type}`` Struct.
+* \(1) If null values are taken into account by setting ScalarAggregateOptions
+  parameter skip_nulls = false then `Kleene logic`_ logic is applied.
+
+* \(2) Output is a ``{"min": input type, "max": input type}`` Struct.
 
-* \(2) Output is an array of ``{"mode": input type, "count": Int64}`` Struct.
+* \(3) Output is an array of ``{"mode": input type, "count": Int64}`` Struct.
   It contains the *N* most common elements in the input, in descending
   order, where *N* is given in :member:`ModeOptions::n`.
   If two values have the same count, the smallest one comes first.
   Note that the output can have less than *N* elements if the input has
   less than *N* distinct values.
 
-* \(3) Output is Float64 or input type, depending on QuantileOptions.
+* \(4) Output is Float64 or input type, depending on QuantileOptions.
 
-* \(4) Output is Int64, UInt64 or Float64, depending on the input type.
+* \(5) Output is Int64, UInt64 or Float64, depending on the input type.
 
 Element-wise ("scalar") functions
 ---------------------------------
@@ -250,35 +251,177 @@ Binary functions have the following semantics (which is sometimes called
 Arithmetic functions
 ~~~~~~~~~~~~~~~~~~~~
 
-These functions expect two inputs of numeric type and apply a given binary
-operation to each pair of elements gathered from the inputs.  If any of the
-input elements in a pair is null, the corresponding output element is null.
-Inputs will be cast to the :ref:`common numeric type <common-numeric-type>`
+These functions expect inputs of numeric type and apply a given arithmetic
+operation to each element(s) gathered from the input(s).  If any of the
+input element(s) is null, the corresponding output element is null.
+Input(s) will be cast to the :ref:`common numeric type <common-numeric-type>`
 (and dictionary decoded, if applicable) before the operation is applied.
 
 The default variant of these functions does not detect overflow (the result
-then typically wraps around).  Each function is also available in an
+then typically wraps around).  Most functions are also available in an
 overflow-checking variant, suffixed ``_checked``, which returns
 an ``Invalid`` :class:`Status` when overflow is detected.
 
++------------------+--------+----------------+----------------------+-------+
+| Function name    | Arity  | Input types    | Output type          | Notes |
++==================+========+================+======================+=======+
+| abs              | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| abs_checked      | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| add              | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| add_checked      | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| divide           | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| divide_checked   | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| multiply         | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| multiply_checked | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| negate           | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| negate_checked   | Unary  | Signed Numeric | Signed Numeric       |       |
++------------------+--------+----------------+----------------------+-------+
+| power            | Binary | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| power_checked    | Binary | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| sign             | Unary  | Numeric        | Int8/Float32/Float64 | \(2)  |
++------------------+--------+----------------+----------------------+-------+
+| subtract         | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| subtract_checked | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+
+* \(1) Precision and scale of computed DECIMAL results
+
+  +------------+---------------------------------------------+
+  | Operation  | Result precision and scale                  |
+  +============+=============================================+
+  | | add      | | scale = max(s1, s2)                       |
+  | | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
+  +------------+---------------------------------------------+
+  | multiply   | | scale = s1 + s2                           |
+  |            | | precision = p1 + p2 + 1                   |
+  +------------+---------------------------------------------+
+  | divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+  |            | | precision = p1 - s1 + s2 + scale          |
+  +------------+---------------------------------------------+
+
+  It's compatible with Redshift's decimal promotion rules. All decimal digits
+  are preserved for `add`, `subtract` and `multiply` operations. The result
+  precision of `divide` is at least the sum of precisions of both operands with
+  enough scale kept. Error is returned if the result precision is beyond the
+  decimal value range.
+
+* \(2) Output is any of (-1,1) for nonzero inputs and 0 for zero input.
+  NaN values return NaN.  Integral values return signedness as Int8 and
+  floating-point values return it with the same type as the input values.
+
+Bit-wise functions
+~~~~~~~~~~~~~~~~~~
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| bit_wise_and             | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_not             | Unary      | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_or              | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_xor             | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_left               | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_left_checked       | Binary     | Numeric            | Numeric (1)         |
++--------------------------+------------+--------------------+---------------------+
+| shift_right              | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_right_checked      | Binary     | Numeric            | Numeric (1)         |
++--------------------------+------------+--------------------+---------------------+
+
+* \(1) An error is emitted if the shift amount (i.e. the second input) is
+  out of bounds for the data type.  However, an overflow when shifting the
+  first input is not error (truncated bits are silently discarded).
+
+Rounding functions
+~~~~~~~~~~~~~~~~~~
+
+Rounding functions convert a numeric input into an approximate value with a
+simpler representation based on the rounding strategy.
+
++------------------+--------+----------------+-----------------+-------+
+| Function name    | Arity  | Input types    | Output type     | Notes |
++==================+========+================+=================+=======+
+| floor            | Unary  | Numeric        | Float32/Float64 |       |
++------------------+--------+----------------+-----------------+-------+
+| ceil             | Unary  | Numeric        | Float32/Float64 |       |
++------------------+--------+----------------+-----------------+-------+
+| trunc            | Unary  | Numeric        | Float32/Float64 |       |
++------------------+--------+----------------+-----------------+-------+
+
+Logarithmic functions
+~~~~~~~~~~~~~~~~~~~~~
+
+Logarithmic functions are also supported, and also offer ``_checked``
+variants that check for domain errors if needed.
+
 +--------------------------+------------+--------------------+---------------------+
 | Function name            | Arity      | Input types        | Output type         |
 +==========================+============+====================+=====================+
-| add                      | Binary     | Numeric            | Numeric             |
+| ln                       | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
-| add_checked              | Binary     | Numeric            | Numeric             |
+| ln_checked               | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
-| divide                   | Binary     | Numeric            | Numeric             |
+| log10                    | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
-| divide_checked           | Binary     | Numeric            | Numeric             |
+| log10_checked            | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
-| multiply                 | Binary     | Numeric            | Numeric             |
+| log1p                    | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
-| multiply_checked         | Binary     | Numeric            | Numeric             |
+| log1p_checked            | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
-| subtract                 | Binary     | Numeric            | Numeric             |
+| log2                     | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
-| subtract_checked         | Binary     | Numeric            | Numeric             |
+| log2_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+
+Trigonometric functions
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Trigonometric functions are also supported, and also offer ``_checked``
+variants that check for domain errors if needed.
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| acos                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| acos_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| asin                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| asin_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| atan                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| atan2                    | Binary     | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| cos                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| cos_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| sin                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| sin_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| tan                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| tan_checked              | Unary      | Float32/Float64    | Float32/Float64     |
 +--------------------------+------------+--------------------+---------------------+
 
 Comparisons
@@ -300,6 +443,21 @@ output element is null.
 | less, less_equal         |            |                                             |                     |
 +--------------------------+------------+---------------------------------------------+---------------------+
 
+These functions take any number of inputs of numeric type (in which case they
+will be cast to the :ref:`common numeric type <common-numeric-type>` before
+comparison) or of temporal types. If any input is dictionary encoded it will be
+expanded for the purposes of comparison.
+
++--------------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
+| Function names           | Arity      | Input types                                 | Output type         | Options class                         | Notes |
++==========================+============+=============================================+=====================+=======================================+=======+
+| max_element_wise,        | Varargs    | Numeric and Temporal                        | Numeric or Temporal | :struct:`ElementWiseAggregateOptions` | \(1)  |
+| min_element_wise         |            |                                             |                     |                                       |       |
++--------------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
+
+* \(1) By default, nulls are skipped (but the kernel can be configured to propagate nulls).
+  For floating point values, NaN will be taken over null but not over any other value.
+
 Logical functions
 ~~~~~~~~~~~~~~~~~~
 
@@ -353,41 +511,41 @@ The first set of functions operates on a character-per-character basis,
 and emit true in the output if the input contains only characters of a
 given class:
 
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| Function name            | Arity      | Input types        | Output type    | Matched character class          |
-+==========================+============+====================+================+==================================+
-| ascii_is_alnum           | Unary      | String-like        | Boolean        | Alphanumeric ASCII               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_alpha           | Unary      | String-like        | Boolean        | Alphabetic ASCII                 |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_decimal         | Unary      | String-like        | Boolean        | Decimal ASCII \(1)               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_lower           | Unary      | String-like        | Boolean        | Lowercase ASCII \(2)             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_printable       | Unary      | String-like        | Boolean        | Printable ASCII                  |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_space           | Unary      | String-like        | Boolean        | Whitespace ASCII                 |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_upper           | Unary      | String-like        | Boolean        | Uppercase ASCII \(2)             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_alnum            | Unary      | String-like        | Boolean        | Alphanumeric Unicode             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_alpha            | Unary      | String-like        | Boolean        | Alphabetic Unicode               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_decimal          | Unary      | String-like        | Boolean        | Decimal Unicode                  |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_digit            | Unary      | String-like        | Boolean        | Unicode digit \(3)               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_lower            | Unary      | String-like        | Boolean        | Lowercase Unicode \(2)           |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_numeric          | Unary      | String-like        | Boolean        | Numeric Unicode \(4)             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_printable        | Unary      | String-like        | Boolean        | Printable Unicode                |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_space            | Unary      | String-like        | Boolean        | Whitespace Unicode               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_upper            | Unary      | String-like        | Boolean        | Uppercase Unicode \(2)           |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| Function name      | Arity | Input types | Output type | Matched character class | Notes |
++====================+=======+=============+=============+=========================+=======+
+| ascii_is_alnum     | Unary | String-like | Boolean     | Alphanumeric ASCII      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_alpha     | Unary | String-like | Boolean     | Alphabetic ASCII        |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_decimal   | Unary | String-like | Boolean     | Decimal ASCII           | \(1)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_lower     | Unary | String-like | Boolean     | Lowercase ASCII         | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_printable | Unary | String-like | Boolean     | Printable ASCII         |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_space     | Unary | String-like | Boolean     | Whitespace ASCII        |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_upper     | Unary | String-like | Boolean     | Uppercase ASCII         | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_alnum      | Unary | String-like | Boolean     | Alphanumeric Unicode    |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_alpha      | Unary | String-like | Boolean     | Alphabetic Unicode      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_decimal    | Unary | String-like | Boolean     | Decimal Unicode         |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_digit      | Unary | String-like | Boolean     | Unicode digit           | \(3)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_lower      | Unary | String-like | Boolean     | Lowercase Unicode       | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_numeric    | Unary | String-like | Boolean     | Numeric Unicode         | \(4)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_printable  | Unary | String-like | Boolean     | Printable Unicode       |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_space      | Unary | String-like | Boolean     | Whitespace Unicode      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_upper      | Unary | String-like | Boolean     | Uppercase Unicode       | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
 
 * \(1) Also matches all numeric ASCII characters and all ASCII digits.
 
@@ -426,52 +584,101 @@ The third set of functions examines string elements on a byte-per-byte basis:
 String transforms
 ~~~~~~~~~~~~~~~~~
 
-+--------------------------+------------+-------------------------+---------------------+-------------------------------------------------+
-| Function name            | Arity      | Input types             | Output type         | Notes   | Options class                         |
-+==========================+============+=========================+=====================+=========+=======================================+
-| ascii_lower              | Unary      | String-like             | String-like         | \(1)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| ascii_upper              | Unary      | String-like             | String-like         | \(1)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| binary_length            | Unary      | Binary- or String-like  | Int32 or Int64      | \(2)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| replace_substring        | Unary      | String-like             | String-like         | \(3)    | :struct:`ReplaceSubstringOptions`     |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| replace_substring_regex  | Unary      | String-like             | String-like         | \(4)    | :struct:`ReplaceSubstringOptions`     |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_length              | Unary      | String-like             | Int32 or Int64      | \(5)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_lower               | Unary      | String-like             | String-like         | \(6)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_upper               | Unary      | String-like             | String-like         | \(6)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| Function name           | Arity | Input types            | Output type            | Options class                     | Notes |
++=========================+=======+========================+========================+===================================+=======+
+| ascii_capitalize        | Unary | String-like            | String-like            |                                   |       |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_lower             | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_reverse           | Unary | String-like            | String-like            |                                   | \(2)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_swapcase          | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_upper             | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| binary_length           | Unary | Binary- or String-like | Int32 or Int64         |                                   | \(3)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| binary_replace_slice    | Unary | String-like            | Binary- or String-like | :struct:`ReplaceSliceOptions`     | \(4)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| replace_substring       | Unary | String-like            | String-like            | :struct:`ReplaceSubstringOptions` | \(5)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| replace_substring_regex | Unary | String-like            | String-like            | :struct:`ReplaceSubstringOptions` | \(6)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_capitalize         | Unary | String-like            | String-like            |                                   |       |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_length             | Unary | String-like            | Int32 or Int64         |                                   | \(7)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_lower              | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_replace_slice      | Unary | String-like            | String-like            | :struct:`ReplaceSliceOptions`     | \(4)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_reverse            | Unary | String-like            | String-like            |                                   | \(9)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_swapcase           | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_upper              | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 
 
 * \(1) Each ASCII character in the input is converted to lowercase or
   uppercase.  Non-ASCII characters are left untouched.
 
-* \(2) Output is the physical length in bytes of each input element.  Output
+* \(2) ASCII input is reversed to the output. If non-ASCII characters
+  are present, ``Invalid`` :class:`Status` will be returned.
+
+* \(3) Output is the physical length in bytes of each input element.  Output
   type is Int32 for Binary / String, Int64 for LargeBinary / LargeString.
 
-* \(3) Replace non-overlapping substrings that match to
+* \(4) Replace the slice of the substring from :member:`ReplaceSliceOptions::start`
+  (inclusive) to :member:`ReplaceSliceOptions::stop` (exclusive) by
+  :member:`ReplaceSubstringOptions::replacement`. The binary kernel measures the
+  slice in bytes, while the UTF8 kernel measures the slice in codeunits.
+
+* \(5) Replace non-overlapping substrings that match to
   :member:`ReplaceSubstringOptions::pattern` by
   :member:`ReplaceSubstringOptions::replacement`. If
   :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
   maximum number of replacements made, counting from the left.
 
-* \(4) Replace non-overlapping substrings that match to the regular expression
+* \(6) Replace non-overlapping substrings that match to the regular expression
   :member:`ReplaceSubstringOptions::pattern` by
   :member:`ReplaceSubstringOptions::replacement`, using the Google RE2 library. If
   :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
   maximum number of replacements made, counting from the left. Note that if the
   pattern contains groups, backreferencing can be used.
 
-* \(5) Output is the number of characters (not bytes) of each input element.
-  Output type is Int32 for String, Int64 for LargeString. 
+* \(7) Output is the number of characters (not bytes) of each input element.
+  Output type is Int32 for String, Int64 for LargeString.
 
-* \(6) Each UTF8-encoded character in the input is converted to lowercase or
+* \(8) Each UTF8-encoded character in the input is converted to lowercase or
   uppercase.
 
+* \(9) Each UTF8-encoded code unit is written in reverse order to the output.
+  If the input is not valid UTF8, then the output is undefined (but the size of output
+  buffers will be preserved).
+
+String padding
+~~~~~~~~~~~~~~
+
+These functions append/prepend a given padding byte (ASCII) or codepoint (UTF8) in
+order to center (center), right-align (lpad), or left-align (rpad) a string.
+
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| Function name            | Arity      | Input types             | Output type         | Options class                          |
++==========================+============+=========================+=====================+========================================+
+| ascii_lpad               | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| ascii_rpad               | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| ascii_center             | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_lpad                | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_rpad                | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_center              | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
 
 String trimming
 ~~~~~~~~~~~~~~~
@@ -522,33 +729,67 @@ These functions trim off characters on both sides (trim), or the left (ltrim) or
 Containment tests
 ~~~~~~~~~~~~~~~~~
 
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| Function name             | Arity      | Input types                        | Output type   | Options class                          |
-+===========================+============+====================================+===============+========================================+
-| match_substring           | Unary      | String-like                        | Boolean (1)   | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| match_substring_regex     | Unary      | String-like                        | Boolean (2)   | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (3)     | :struct:`SetLookupOptions`             |
-|                           |            | Binary- and String-like            |               |                                        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (4)   | :struct:`SetLookupOptions`             |
-|                           |            | Binary- and String-like            |               |                                        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-
-* \(1) Output is true iff :member:`MatchSubstringOptions::pattern`
-  is a substring of the corresponding input element.
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| Function name         | Arity | Input types                       | Output type    | Options class                   | Notes |
++=======================+=======+===================================+================+=================================+=======+
+| count_substring       | Unary | String-like                       | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(1)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| count_substring_regex | Unary | String-like                       | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(1)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| ends_with             | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(2)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| find_substring        | Unary | Binary- and String-like           | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(3)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| find_substring_regex  | Unary | Binary- and String-like           | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(3)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| index_in              | Unary | Boolean, Null, Numeric, Temporal, | Int32          | :struct:`SetLookupOptions`      | \(4)  |
+|                       |       | Binary- and String-like           |                |                                 |       |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| is_in                 | Unary | Boolean, Null, Numeric, Temporal, | Boolean        | :struct:`SetLookupOptions`      | \(5)  |
+|                       |       | Binary- and String-like           |                |                                 |       |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_like            | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(6)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_substring       | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(7)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_substring_regex | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(8)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| starts_with           | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(2)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+
+
+* \(1) Output is the number of occurrences of
+  :member:`MatchSubstringOptions::pattern` in the corresponding input
+  string. Output type is Int32 for Binary/String, Int64
+  for LargeBinary/LargeString.
 
 * \(2) Output is true iff :member:`MatchSubstringOptions::pattern`
-  matches the corresponding input element at any position.
+  is a suffix/prefix of the corresponding input.
+
+* \(3) Output is the index of the first occurrence of
+  :member:`MatchSubstringOptions::pattern` in the corresponding input
+  string, otherwise -1. Output type is Int32 for Binary/String, Int64
+  for LargeBinary/LargeString.
 
-* \(3) Output is the index of the corresponding input element in
+* \(4) Output is the index of the corresponding input element in
   :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
   output is null.
 
-* \(4) Output is true iff the corresponding input element is equal to one
+* \(5) Output is true iff the corresponding input element is equal to one
   of the elements in :member:`SetLookupOptions::value_set`.
 
+* \(6) Output is true iff the SQL-style LIKE pattern
+  :member:`MatchSubstringOptions::pattern` fully matches the
+  corresponding input element. That is, ``%`` will match any number of
+  characters, ``_`` will match exactly one character, and any other
+  character matches itself. To match a literal percent sign or
+  underscore, precede the character with a backslash.
+
+* \(7) Output is true iff :member:`MatchSubstringOptions::pattern`
+  is a substring of the corresponding input element.
+
+* \(8) Output is true iff :member:`MatchSubstringOptions::pattern`
+  matches the corresponding input element at any position.
 
 String splitting
 ~~~~~~~~~~~~~~~~
@@ -564,58 +805,167 @@ when a positive ``max_splits`` is given.
 +==========================+============+=========================+===================+==================================+=========+
 | split_pattern            | Unary      | String-like             | List-like         | :struct:`SplitPatternOptions`    | \(1)    |
 +--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
-| utf8_split_whitespace    | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(2)    |
+| split_pattern_regex      | Unary      | String-like             | List-like         | :struct:`SplitPatternOptions`    | \(2)    |
 +--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
-| ascii_split_whitespace   | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(3)    |
+| utf8_split_whitespace    | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(3)    |
++--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
+| ascii_split_whitespace   | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(4)    |
 +--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
 
 * \(1) The string is split when an exact pattern is found (the pattern itself
   is not included in the output).
 
-* \(2) A non-zero length sequence of Unicode defined whitespace codepoints
+* \(2) The string is split when a regex match is found (the matched
+  substring itself is not included in the output).
+
+* \(3) A non-zero length sequence of Unicode defined whitespace codepoints
   is seen as separator.
 
-* \(3) A non-zero length sequence of ASCII defined whitespace bytes
+* \(4) A non-zero length sequence of ASCII defined whitespace bytes
   (``'\t'``, ``'\n'``, ``'\v'``, ``'\f'``, ``'\r'``  and ``' '``) is seen
   as separator.
 
 
+String component extraction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++---------------+-------+-------------+-------------+-------------------------------+-------+
+| Function name | Arity | Input types | Output type | Options class                 | Notes |
++===============+=======+=============+=============+===============================+=======+
+| extract_regex | Unary | String-like | Struct      | :struct:`ExtractRegexOptions` | \(1)  |
++---------------+-------+-------------+-------------+-------------------------------+-------+
+
+* \(1) Extract substrings defined by a regular expression using the Google RE2
+  library.  The output struct field names refer to the named capture groups,
+  e.g. 'letter' and 'digit' for the regular expression
+  ``(?P<letter>[ab])(?P<digit>\\d)``.
+
+
+String joining
+~~~~~~~~~~~~~~
+
+These functions do the inverse of string splitting.
+
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+| Function name            | Arity     | Input type 1          | Input type 2   | Output type       | Options class         | Notes   |
++==========================+===========+=======================+================+===================+=======================+=========+
+| binary_join              | Binary    | List of string-like   | String-like    | String-like       |                       | \(1)    |
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+| binary_join_element_wise | Varargs   | String-like (varargs) | String-like    | String-like       | :struct:`JoinOptions` | \(2)    |
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+
+* \(1) The first input must be an array, while the second can be a scalar or array.
+  Each list of values in the first input is joined using each second input
+  as separator.  If any input list is null or contains a null, the corresponding
+  output will be null.
+
+* \(2) All arguments are concatenated element-wise, with the last argument treated
+  as the separator (scalars are recycled in either case). Null separators emit
+  null. If any other argument is null, by default the corresponding output will be
+  null, but it can instead either be skipped or replaced with a given string.
+
+Slicing
+~~~~~~~
+
+This function transforms each sequence of the array to a subsequence, according
+to start and stop indices, and a non-zero step (defaulting to 1).  Slicing
+semantics follow Python slicing semantics: the start index is inclusive,
+the stop index exclusive; if the step is negative, the sequence is followed
+in reverse order.
+
++--------------------------+------------+----------------+-----------------+--------------------------+---------+
+| Function name            | Arity      | Input types    | Output type     | Options class            | Notes   |
++==========================+============+================+=================+==========================+=========+
+| utf8_slice_codepoints    | Unary      | String-like    | String-like     | :struct:`SliceOptions`   | \(1)    |
++--------------------------+------------+----------------+-----------------+--------------------------+---------+
+
+* \(1) Slice string into a substring defined by (``start``, ``stop``, ``step``)
+  as given by :struct:`SliceOptions` where ``start`` and ``stop`` are measured
+  in codeunits. Null inputs emit null.
+
+.. _cpp-compute-scalar-structural-transforms:
+
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
 
 .. XXX (this category is a bit of a hodgepodge)
 
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| Function name            | Arity      | Input types                                    | Output type         | Notes   |
-+==========================+============+================================================+=====================+=========+
-| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like  | Input type          | \(1)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(2)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                            | Boolean             | \(3)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                            | Boolean             | \(4)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(5)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| project                  | Varargs    | Any                                            | Struct              | \(6)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-
-* \(1) First input must be an array, second input a scalar of the same type.
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| Function name            | Arity      | Input types                                       | Output type         | Notes   |
++==========================+============+===================================================+=====================+=========+
+| case_when                | Varargs    | Struct of Boolean (Arg 0), Any fixed-width (rest) | Input type          | \(1)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| choose                   | Varargs    | Integral (Arg 0); Fixed-width/Binary-like (rest)  | Input type          | \(2)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| coalesce                 | Varargs    | Any                                               | Input type          | \(3)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like     | Input type          | \(4)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal                  | Input type          | \(5)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_finite                | Unary      | Float, Double                                     | Boolean             | \(6)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_inf                   | Unary      | Float, Double                                     | Boolean             | \(7)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_nan                   | Unary      | Float, Double                                     | Boolean             | \(8)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_null                  | Unary      | Any                                               | Boolean             | \(9)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_valid                 | Unary      | Any                                               | Boolean             | \(10)   |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| list_value_length        | Unary      | List-like                                         | Int32 or Int64      | \(11)   |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| make_struct              | Varargs    | Any                                               | Struct              | \(12)   |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+
+* \(1) This function acts like a SQL 'case when' statement or switch-case. The
+  input is a "condition" value, which is a struct of Booleans, followed by the
+  values for each "branch". There must be either exactly one value argument for
+  each child of the condition struct, or one more value argument than children
+  (in which case we have an 'else' or 'default' value). The output is of the
+  same type as the value inputs; each row will be the corresponding value from
+  the first value datum for which the corresponding Boolean is true, or the
+  corresponding value from the 'default' input, or null otherwise.
+
+* \(2) The first input must be an integral type. The rest of the arguments can be
+  any type, but must all be the same type or promotable to a common type. Each
+  value of the first input (the 'index') is used as a zero-based index into the
+  remaining arguments (i.e. index 0 is the second argument, index 1 is the third
+  argument, etc.), and the value of the output for that row will be the
+  corresponding value of the selected input at that row. If the index is null,
+  then the output will also be null.
+
+* \(3) Each row of the output will be the corresponding value of the first
+  input which is non-null for that row, otherwise null.
+
+* \(4) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
   as the first input, except for nulls replaced with the second input value.
 
-* \(2) Output is true iff the corresponding input element is NaN.
+* \(5) First input must be a Boolean scalar or array. Second and third inputs
+  could be scalars or arrays and must be of the same type. Output is an array
+  (or scalar if all inputs are scalar) of the same type as the second/ third
+  input. If the nulls present on the first input, they will be promoted to the
+  output, otherwise nulls will be chosen based on the first input values.
+
+  Also see: :ref:`replace_with_mask <cpp-compute-vector-structural-transforms>`.
+
+* \(6) Output is true iff the corresponding input element is finite (not Infinity,
+  -Infinity, or NaN).
 
-* \(3) Output is true iff the corresponding input element is null.
+* \(7) Output is true iff the corresponding input element is Infinity/-Infinity.
 
-* \(4) Output is true iff the corresponding input element is non-null.
+* \(8) Output is true iff the corresponding input element is NaN.
 
-* \(5) Each output element is the length of the corresponding input element
+* \(9) Output is true iff the corresponding input element is null.
+
+* \(10) Output is true iff the corresponding input element is non-null.
+
+* \(11) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
-* \(6) The output struct's field types are the types of its arguments. The
-  field names are specified using an instance of :struct:`ProjectOptions`.
+* \(12) The output struct's field types are the types of its arguments. The
+  field names are specified using an instance of :struct:`MakeStructOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
 
@@ -720,24 +1070,78 @@ null input value is converted into a null output value.
   available).
 
 
+Temporal component extraction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions extract datetime components (year, month, day, etc) from timestamp type.
+Note: this is currently not supported for timestamps with timezone information.
+
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| Function name      | Arity      | Input types       | Output type   | Options class              | Notes |
++====================+============+===================+===============+============================+=======+
+| year               | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| month              | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day                | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day_of_week        | Unary      | Temporal          | Int64         | :struct:`DayOfWeekOptions` | \(1)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day_of_year        | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_year           | Unary      | Temporal          | Int64         |                            | \(2)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_week           | Unary      | Temporal          | Int64         |                            | \(2)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_calendar       | Unary      | Temporal          | Struct        |                            | \(3)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| quarter            | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| hour               | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| minute             | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| second             | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| millisecond        | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| microsecond        | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| nanosecond         | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| subsecond          | Unary      | Temporal          | Double        |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+
+* \(1) Outputs the number of the day of the week. By default week begins on Monday
+  represented by 0 and ends on Sunday represented by 6. :member:`DayOfWeekOptions::week_start` can be used to set
+  the starting day of the week using ISO convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1
+  using :member:`DayOfWeekOptions::one_based_numbering` parameter.
+* \(2) First ISO week has the majority (4 or more) of it's days in January. ISO year
+  starts with the first ISO week.
+  See `ISO 8601 week date definition`_ for more details.
+* \(3) Output is a ``{"iso_year": output type, "iso_week": output type, "iso_day_of_week":  output type}`` Struct.
+
+.. _ISO 8601 week date definition: https://en.wikipedia.org/wiki/ISO_week_date#First_week
+
+
 Array-wise ("vector") functions
 -------------------------------
 
 Associative transforms
 ~~~~~~~~~~~~~~~~~~~~~~
 
-+--------------------------+------------+------------------------------------+----------------------------+
-| Function name            | Arity      | Input types                        | Output type                |
-+==========================+============+====================================+============================+
-| dictionary_encode        | Unary      | Boolean, Null, Numeric,            | Dictionary (1)             |
-|                          |            | Temporal, Binary- and String-like  |                            |
-+--------------------------+------------+------------------------------------+----------------------------+
-| unique                   | Unary      | Boolean, Null, Numeric,            | Input type (2)             |
-|                          |            | Temporal, Binary- and String-like  |                            |
-+--------------------------+------------+------------------------------------+----------------------------+
-| value_counts             | Unary      | Boolean, Null, Numeric,            | Input type (3)             |
-|                          |            | Temporal, Binary- and String-like  |                            |
-+--------------------------+------------+------------------------------------+----------------------------+
++-------------------+-------+-----------------------------------+-------------+-------+
+| Function name     | Arity | Input types                       | Output type | Notes |
++===================+=======+===================================+=============+=======+
+| dictionary_encode | Unary | Boolean, Null, Numeric,           | Dictionary  | \(1)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
+| unique            | Unary | Boolean, Null, Numeric,           | Input type  | \(2)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
+| value_counts      | Unary | Boolean, Null, Numeric,           | Input type  | \(3)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
 
 * \(1) Output is ``Dictionary(Int32, input type)``.
 
@@ -753,13 +1157,13 @@ Selections
 
 These functions select a subset of the first input defined by the second input.
 
-+-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
-| Function name   | Arity      | Input type 1  | Input type 2 | Output type      | Options class           | Notes       |
-+=================+============+===============+==============+==================+=========================+=============+
-| filter          | Binary     | Any (1)       | Boolean      | Input type 1     | :struct:`FilterOptions` | \(2)        |
-+-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
-| take            | Binary     | Any (1)       | Integer      | Input type 1     | :struct:`TakeOptions`   | \(3)        |
-+-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| Function name | Arity  | Input type 1 | Input type 2 | Output type  | Options class           | Notes     |
++===============+========+==============+==============+==============+=========================+===========+
+| filter        | Binary | Any          | Boolean      | Input type 1 | :struct:`FilterOptions` | \(1) \(2) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| take          | Binary | Any          | Integer      | Input type 1 | :struct:`TakeOptions`   | \(1) \(3) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
 
 * \(1) Unions are unsupported.
 
@@ -777,21 +1181,21 @@ In these functions, nulls are considered greater than any other value
 Floating-point NaN values are considered greater than any other non-null
 value, but smaller than nulls.
 
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| Function name         | Arity      | Input types             | Output type       | Options class                  | Notes          |
-+=======================+============+=========================+===================+================================+================+
-| partition_nth_indices | Unary      | Binary- and String-like | UInt64            | :struct:`PartitionNthOptions`  | \(1) \(3)      |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| partition_nth_indices | Unary      | Numeric                 | UInt64            | :struct:`PartitionNthOptions`  | \(1)           |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| array_sort_indices    | Unary      | Binary- and String-like | UInt64            | :struct:`ArraySortOptions`     | \(2) \(3) \(4) |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| array_sort_indices    | Unary      | Numeric                 | UInt64            | :struct:`ArraySortOptions`     | \(2) \(4)      |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| sort_indices          | Unary      | Binary- and String-like | UInt64            | :struct:`SortOptions`          | \(2) \(3) \(5) |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| sort_indices          | Unary      | Numeric                 | UInt64            | :struct:`SortOptions`          | \(2) \(5)      |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| Function name         | Arity      | Input types                 | Output type       | Options class                  | Notes          |
++=======================+============+=============================+===================+================================+================+
+| partition_nth_indices | Unary      | Binary- and String-like     | UInt64            | :struct:`PartitionNthOptions`  | \(1) \(3)      |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| partition_nth_indices | Unary      | Boolean, Numeric, Temporal  | UInt64            | :struct:`PartitionNthOptions`  | \(1)           |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| array_sort_indices    | Unary      | Binary- and String-like     | UInt64            | :struct:`ArraySortOptions`     | \(2) \(3) \(4) |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| array_sort_indices    | Unary      | Boolean, Numeric, Temporal  | UInt64            | :struct:`ArraySortOptions`     | \(2) \(4)      |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| sort_indices          | Unary      | Binary- and String-like     | UInt64            | :struct:`SortOptions`          | \(2) \(3) \(5) |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| sort_indices          | Unary      | Boolean, Numeric, Temporal  | UInt64            | :struct:`SortOptions`          | \(2) \(5)      |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
 
 * \(1) The output is an array of indices into the input array, that define
   a partial non-stable sort such that the *N*'th index points to the *N*'th
@@ -812,6 +1216,8 @@ value, but smaller than nulls.
   table. If the input is a record batch or table, one or more sort
   keys must be specified.
 
+.. _cpp-compute-vector-structural-transforms:
+
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
 
@@ -831,3 +1237,17 @@ Structural transforms
   in the list array is appended to the output.  Nulls in the parent list array
   are discarded.
 
+These functions create a copy of the first input with some elements
+replaced, based on the remaining inputs.
+
++--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+
+| Function name            | Arity      | Input type 1          | Input type 2 | Input type 3 | Output type  | Notes |
++==========================+============+=======================+==============+==============+==============+=======+
+| replace_with_mask        | Ternary    | Fixed-width or binary | Boolean      | Input type 1 | Input type 1 | \(1)  |
++--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+
+
+* \(1) Each element in input 1 for which the corresponding Boolean in input 2
+  is true is replaced with the next value from input 3. A null in input 2
+  results in a corresponding null in the output.
+
+  Also see: :ref:`if_else <cpp-compute-scalar-structural-transforms>`.
diff --git a/docs/source/cpp/csv.rst b/docs/source/cpp/csv.rst
index 123bc8a467d..5ca17a1653b 100644
--- a/docs/source/cpp/csv.rst
+++ b/docs/source/cpp/csv.rst
@@ -20,15 +20,15 @@
 
 .. cpp:namespace:: arrow::csv
 
-=================
-Reading CSV files
-=================
+=============================
+Reading and Writing CSV files
+=============================
 
 Arrow provides a fast CSV reader allowing ingestion of external data
 as Arrow tables.
 
 .. seealso::
-   :ref:`CSV reader API reference <cpp-api-csv>`.
+   :ref:`CSV reader/writer API reference <cpp-api-csv>`.
 
 Basic usage
 ===========
@@ -69,6 +69,46 @@ A CSV file is read from a :class:`~arrow::io::InputStream`.
       std::shared_ptr<arrow::Table> table = *maybe_table;
    }
 
+A CSV file is written to a :class:`~arrow::io::OutputStream`.
+
+.. code-block:: cpp
+
+   #include <arrow/csv/api.h>
+   {
+       // Oneshot write
+       // ...
+       std::shared_ptr<arrow::io::OutputStream> output = ...;
+       auto write_options = arrow::csv::WriteOptions::Defaults();
+       if (WriteCSV(table, options, output.get()).ok()) {
+           // Handle writer error...
+       }
+   }
+   {
+       // Write incrementally
+       // ...
+       std::shared_ptr<arrow::io::OutputStream> output = ...;
+       auto write_options = arrow::csv::WriteOptions::Defaults();
+       auto maybe_writer = arrow::csv::MakeCSVWriter(output, schema, options);
+       if (!maybe_writer.ok()) {
+           // Handle writer instantiation error...
+       }
+       std::shared_ptr<arrow::ipc::RecordBatchWriter> writer = *maybe_writer;
+
+       // Write batches...
+       if (!writer->WriteRecordBatch(*batch).ok()) {
+           // Handle write error...
+       }
+
+       if (!writer->Close().ok()) {
+           // Handle close error...
+       }
+       if (!output->Close().ok()) {
+           // Handle file close error...
+       }
+   }
+
+.. note:: The writer does not yet support all Arrow types.
+
 Column names
 ============
 
@@ -111,6 +151,7 @@ column.  Type inference considers the following data types, in order:
 * Int64
 * Boolean
 * Date32
+* Time32 (with seconds unit)
 * Timestamp (with seconds unit)
 * Timestamp (with nanoseconds unit)
 * Float64
@@ -129,6 +170,7 @@ can be chosen from the following list:
 * Decimal128
 * Boolean
 * Date32 and Date64
+* Time32 and Time64
 * Timestamp
 * Binary and Large Binary
 * String and Large String (with optional UTF8 input validation)
@@ -162,6 +204,12 @@ Character encoding
 CSV files are expected to be encoded in UTF8.  However, non-UTF8 data
 is accepted for Binary columns.
 
+Write Options
+=============
+
+The format of written CSV files can be customized via :class:`~arrow::csv::WriteOptions`.
+Currently few options are available; more will be added in future releases.
+
 Performance
 ===========
 
diff --git a/docs/source/cpp/dataset.rst b/docs/source/cpp/dataset.rst
new file mode 100644
index 00000000000..8c16592e4d9
--- /dev/null
+++ b/docs/source/cpp/dataset.rst
@@ -0,0 +1,417 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+================
+Tabular Datasets
+================
+
+.. seealso::
+   :doc:`Dataset API reference <api/dataset>`
+
+.. warning::
+
+    The ``arrow::dataset`` namespace is experimental, and a stable API
+    is not yet guaranteed.
+
+The Arrow Datasets library provides functionality to efficiently work with
+tabular, potentially larger than memory, and multi-file datasets. This includes:
+
+* A unified interface that supports different sources and file formats (currently,
+  Parquet, Feather / Arrow IPC, and CSV files) and different file systems (local,
+  cloud).
+* Discovery of sources (crawling directories, handling partitioned datasets with
+  various partitioning schemes, basic schema normalization, ...)
+* Optimized reading with predicate pushdown (filtering rows), projection
+  (selecting and deriving columns), and optionally parallel reading.
+
+The goal is to expand support to other file formats and data sources
+(e.g. database connections) in the future.
+
+Reading Datasets
+----------------
+
+For the examples below, let's create a small dataset consisting
+of a directory with two parquet files:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading Datasets)
+   :end-before: (Doc section: Reading Datasets)
+   :linenos:
+   :lineno-match:
+
+(See the full example at bottom: :ref:`cpp-dataset-full-example`.)
+
+Dataset discovery
+~~~~~~~~~~~~~~~~~
+
+A :class:`arrow::dataset::Dataset` object can be created using the various
+:class:`arrow::dataset::DatasetFactory` objects. Here, we'll use the
+:class:`arrow::dataset::FileSystemDatasetFactory`, which can create a dataset
+given a base directory path:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Dataset discovery)
+   :end-before: (Doc section: Dataset discovery)
+   :emphasize-lines: 6-11
+   :linenos:
+   :lineno-match:
+
+We're also passing the filesystem to use and the file format to use for reading.
+This lets us choose between (for example) reading local files or files in Amazon
+S3, or between Parquet and CSV.
+
+In addition to searching a base directory, we can list file paths manually.
+
+Creating a :class:`arrow::dataset::Dataset` does not begin reading the data
+itself. It only crawls the directory to find all the files (if needed), which can
+be retrieved with :func:`arrow::dataset::FileSystemDataset::files`:
+
+.. code-block:: cpp
+
+   // Print out the files crawled (only for FileSystemDataset)
+   for (const auto& filename : dataset->files()) {
+     std::cout << filename << std::endl;
+   }
+
+…and infers the dataset's schema (by default from the first file):
+
+.. code-block:: cpp
+
+   std::cout << dataset->schema()->ToString() << std::endl;
+
+Using the :func:`arrow::dataset::Dataset::NewScan` method, we can build a
+:class:`arrow::dataset::Scanner` and read the dataset (or a portion of it) into
+a :class:`arrow::Table` with the :func:`arrow::dataset::Scanner::ToTable`
+method:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Dataset discovery)
+   :end-before: (Doc section: Dataset discovery)
+   :emphasize-lines: 16-19
+   :linenos:
+   :lineno-match:
+
+.. TODO: iterative loading not documented pending API changes
+.. note:: Depending on the size of your dataset, this can require a lot of
+          memory; see :ref:`cpp-dataset-filtering-data` below on
+          filtering/projecting.
+
+Reading different file formats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above examples use Parquet files on local disk, but the Dataset API
+provides a consistent interface across multiple file formats and filesystems.
+(See :ref:`cpp-dataset-cloud-storage` for more information on the latter.)
+Currently, Parquet, Feather / Arrow IPC, and CSV file formats are supported;
+more formats are planned in the future.
+
+If we save the table as Feather files instead of Parquet files:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading different file formats)
+   :end-before: (Doc section: Reading different file formats)
+   :linenos:
+   :lineno-match:
+
+…then we can read the Feather file by passing an :class:`arrow::dataset::IpcFileFormat`:
+
+.. code-block:: cpp
+
+    auto format = std::make_shared<ds::ParquetFileFormat>();
+    // ...
+    auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format, options)
+                       .ValueOrDie();
+
+Customizing file formats
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+:class:`arrow::dataset::FileFormat` objects have properties that control how
+files are read. For example::
+
+  auto format = std::make_shared<ds::ParquetFileFormat>();
+  format->reader_options.dict_columns.insert("a");
+
+Will configure column ``"a"`` to be dictionary-encoded when read. Similarly,
+setting :member:`arrow::dataset::CsvFileFormat::parse_options` lets us change
+things like reading comma-separated or tab-separated data.
+
+Additionally, passing an :class:`arrow::dataset::FragmentScanOptions` to
+:func:`arrow::dataset::ScannerBuilder::FragmentScanOptions` offers fine-grained
+control over data scanning. For example, for CSV files, we can change what values
+are converted into Boolean true and false at scan time.
+
+.. _cpp-dataset-filtering-data:
+
+Filtering data
+--------------
+
+So far, we've been reading the entire dataset, but if we need only a subset of the
+data, this can waste time or memory reading data we don't need. The
+:class:`arrow::dataset::Scanner` offers control over what data to read.
+
+In this snippet, we use :func:`arrow::dataset::ScannerBuilder::Project` to select
+which columns to read:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Filtering data)
+   :end-before: (Doc section: Filtering data)
+   :emphasize-lines: 16
+   :linenos:
+   :lineno-match:
+
+Some formats, such as Parquet, can reduce I/O costs here by reading only the
+specified columns from the filesystem.
+
+A filter can be provided with :func:`arrow::dataset::ScannerBuilder::Filter`, so
+that rows which do not match the filter predicate will not be included in the
+returned table. Again, some formats, such as Parquet, can use this filter to
+reduce the amount of I/O needed.
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Filtering data)
+   :end-before: (Doc section: Filtering data)
+   :emphasize-lines: 17
+   :linenos:
+   :lineno-match:
+
+.. TODO Expressions not documented pending renamespacing
+
+Projecting columns
+------------------
+
+In addition to selecting columns, :func:`arrow::dataset::ScannerBuilder::Project`
+can also be used for more complex projections, such as renaming columns, casting
+them to other types, and even deriving new columns based on evaluating
+expressions.
+
+In this case, we pass a vector of expressions used to construct column values
+and a vector of names for the columns:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Projecting columns)
+   :end-before: (Doc section: Projecting columns)
+   :emphasize-lines: 18-28
+   :linenos:
+   :lineno-match:
+
+This also determines the column selection; only the given columns will be
+present in the resulting table. If you want to include a derived column in
+*addition* to the existing columns, you can build up the expressions from the
+dataset schema:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Projecting columns #2)
+   :end-before: (Doc section: Projecting columns #2)
+   :emphasize-lines: 17-27
+   :linenos:
+   :lineno-match:
+
+.. note:: When combining filters and projections, Arrow will determine all
+          necessary columns to read. For instance, if you filter on a column that
+          isn't ultimately selected, Arrow will still read the column to evaluate
+          the filter.
+
+Reading and writing partitioned data
+------------------------------------
+
+So far, we've been working with datasets consisting of flat directories with
+files. Oftentimes, a dataset will have one or more columns that are frequently
+filtered on. Instead of having to read and then filter the data, by organizing the
+files into a nested directory structure, we can define a partitioned dataset,
+where sub-directory names hold information about which subset of the data is
+stored in that directory. Then, we can more efficiently filter data by using that
+information to avoid loading files that don't match the filter.
+
+For example, a dataset partitioned by year and month may have the following layout:
+
+.. code-block:: text
+
+   dataset_name/
+     year=2007/
+       month=01/
+          data0.parquet
+          data1.parquet
+          ...
+       month=02/
+          data0.parquet
+          data1.parquet
+          ...
+       month=03/
+       ...
+     year=2008/
+       month=01/
+       ...
+     ...
+
+The above partitioning scheme is using "/key=value/" directory names, as found in
+Apache Hive. Under this convention, the file at
+``dataset_name/year=2007/month=01/data0.parquet`` contains only data for which
+``year == 2007`` and ``month == 01``.
+
+Let's create a small partitioned dataset. For this, we'll use Arrow's dataset
+writing functionality.
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading and writing partitioned data)
+   :end-before: (Doc section: Reading and writing partitioned data)
+   :emphasize-lines: 25-42
+   :linenos:
+   :lineno-match:
+
+The above created a directory with two subdirectories ("part=a" and "part=b"),
+and the Parquet files written in those directories no longer include the "part"
+column.
+
+Reading this dataset, we now specify that the dataset should use a Hive-like
+partitioning scheme:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading and writing partitioned data #2)
+   :end-before: (Doc section: Reading and writing partitioned data #2)
+   :emphasize-lines: 7,9-11
+   :linenos:
+   :lineno-match:
+
+Although the partition fields are not included in the actual Parquet files,
+they will be added back to the resulting table when scanning this dataset:
+
+.. code-block:: text
+
+   $ ./debug/dataset_documentation_example file:///tmp parquet_hive partitioned
+   Found fragment: /tmp/parquet_dataset/part=a/part0.parquet
+   Partition expression: (part == "a")
+   Found fragment: /tmp/parquet_dataset/part=b/part1.parquet
+   Partition expression: (part == "b")
+   Read 20 rows
+   a: int64
+     -- field metadata --
+     PARQUET:field_id: '1'
+   b: double
+     -- field metadata --
+     PARQUET:field_id: '2'
+   c: int64
+     -- field metadata --
+     PARQUET:field_id: '3'
+   part: string
+   ----
+   # snip...
+
+We can now filter on the partition keys, which avoids loading files
+altogether if they do not match the filter:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading and writing partitioned data #3)
+   :end-before: (Doc section: Reading and writing partitioned data #3)
+   :emphasize-lines: 15-18
+   :linenos:
+   :lineno-match:
+
+Different partitioning schemes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above example uses a Hive-like directory scheme, such as "/year=2009/month=11/day=15".
+We specified this by passing the Hive partitioning factory. In this case, the types of
+the partition keys are inferred from the file paths.
+
+It is also possible to directly construct the partitioning and explicitly define
+the schema of the partition keys. For example:
+
+.. code-block:: cpp
+
+    auto part = std::make_shared<ds::HivePartitioning>(arrow::schema({
+        arrow::field("year", arrow::int16()),
+        arrow::field("month", arrow::int8()),
+        arrow::field("day", arrow::int32())
+    }));
+
+Arrow supports another partitioning scheme, "directory partitioning", where the
+segments in the file path represent the values of the partition keys without
+including the name (the field names are implicit in the segment's index). For
+example, given field names "year", "month", and "day", one path might be
+"/2019/11/15".
+
+Since the names are not included in the file paths, these must be specified
+when constructing a directory partitioning:
+
+.. code-block:: cpp
+
+    auto part = ds::DirectoryPartitioning::MakeFactory({"year", "month", "day"});
+
+Directory partitioning also supports providing a full schema rather than inferring
+types from file paths.
+
+Reading from other data sources
+-------------------------------
+
+Reading in-memory data
+~~~~~~~~~~~~~~~~~~~~~~
+
+If you already have data in memory that you'd like to use with the Datasets API
+(e.g. to filter/project data, or to write it out to a filesystem), you can wrap it
+in an :class:`arrow::dataset::InMemoryDataset`:
+
+.. code-block:: cpp
+
+   auto table = arrow::Table::FromRecordBatches(...);
+   auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(std::move(table));
+   // Scan the dataset, filter, it, etc.
+   auto scanner_builder = dataset->NewScan();
+
+In the example, we used the InMemoryDataset to write our example data to local
+disk which was used in the rest of the example:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: Reading and writing partitioned data
+   :end-before: Reading and writing partitioned data
+   :emphasize-lines: 24-28
+   :linenos:
+   :lineno-match:
+
+.. _cpp-dataset-cloud-storage:
+
+Reading from cloud storage
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to local files, Arrow Datasets also support reading from cloud
+storage systems, such as Amazon S3, by passing a different filesystem.
+
+See the :ref:`filesystem <cpp-filesystems>` docs for more details on the available
+filesystems.
+
+.. _cpp-dataset-full-example:
+
+Full Example
+------------
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :linenos:
diff --git a/docs/source/cpp/examples/dataset_documentation_example.rst b/docs/source/cpp/examples/dataset_documentation_example.rst
new file mode 100644
index 00000000000..2bc993f246b
--- /dev/null
+++ b/docs/source/cpp/examples/dataset_documentation_example.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Arrow Datasets example
+=========================
+
+The file ``cpp/examples/arrow/dataset_documentation_example.cc``
+located inside the source tree contains an example of using Arrow
+Datasets to read, write, select, and filter data. :doc:`../dataset`
+has a full walkthrough of the example.
diff --git a/docs/source/cpp/examples/index.rst b/docs/source/cpp/examples/index.rst
index 2bdfcc315bc..d365960a00d 100644
--- a/docs/source/cpp/examples/index.rst
+++ b/docs/source/cpp/examples/index.rst
@@ -22,5 +22,6 @@ Examples
    :maxdepth: 1
 
    cmake_minimal_build
+   dataset_documentation_example
    row_columnar_conversion
    std::tuple-like ranges to Arrow <tuple_range_conversion>
diff --git a/docs/source/cpp/examples/row_columnar_conversion.rst b/docs/source/cpp/examples/row_columnar_conversion.rst
index 02fd61b50c7..3f45864c228 100644
--- a/docs/source/cpp/examples/row_columnar_conversion.rst
+++ b/docs/source/cpp/examples/row_columnar_conversion.rst
@@ -24,4 +24,4 @@ Row to columnar conversion
 The following example converts an array of structs to a :class:`arrow::Table`
 instance, and then converts it back to the original array of structs.
 
-.. literalinclude:: ../../../../cpp/examples/arrow/row-wise-conversion-example.cc
+.. literalinclude:: ../../../../cpp/examples/arrow/row_wise_conversion_example.cc
diff --git a/docs/source/cpp/getting_started.rst b/docs/source/cpp/getting_started.rst
index 033d299d159..3c7b7f94f01 100644
--- a/docs/source/cpp/getting_started.rst
+++ b/docs/source/cpp/getting_started.rst
@@ -25,7 +25,7 @@ User Guide
 
    overview
    conventions
-   cmake
+   build_system
    memory
    arrays
    datatypes
@@ -36,4 +36,5 @@ User Guide
    parquet
    csv
    json
+   dataset
    flight
diff --git a/docs/source/cpp/io.rst b/docs/source/cpp/io.rst
index 501998b73a4..6e1d261c008 100644
--- a/docs/source/cpp/io.rst
+++ b/docs/source/cpp/io.rst
@@ -64,6 +64,8 @@ Concrete implementations are available for :class:`in-memory writes <BufferOutpu
 
 .. cpp:namespace:: arrow::fs
 
+.. _cpp-filesystems:
+
 Filesystems
 ===========
 
diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst
index ac1ae5e4f27..415a3ae1852 100644
--- a/docs/source/cpp/memory.rst
+++ b/docs/source/cpp/memory.rst
@@ -25,10 +25,6 @@ Memory Management
 .. seealso::
    :doc:`Memory management API reference <api/memory>`
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Buffers
 =======
 
diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst
index a81fadb8eda..d69bf1c6b56 100644
--- a/docs/source/cpp/parquet.rst
+++ b/docs/source/cpp/parquet.rst
@@ -27,10 +27,6 @@ Reading and writing Parquet files
 .. seealso::
    :ref:`Parquet reader and writer API reference <cpp-api-parquet>`.
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 The `Parquet format <https://parquet.apache.org/documentation/latest/>`__
 is a space-efficient columnar storage format for complex data.  The Parquet
 C++ implementation is part of the Apache Arrow project and benefits
diff --git a/docs/source/developers/archery.rst b/docs/source/developers/archery.rst
index c5a508d6fd0..f929bb228a7 100644
--- a/docs/source/developers/archery.rst
+++ b/docs/source/developers/archery.rst
@@ -57,10 +57,13 @@ You can inspect Archery usage by passing the ``--help`` flag:
    Commands:
      benchmark    Arrow benchmarking.
      build        Initialize an Arrow C++ build
+     crossbow     Schedule packaging tasks or nightly builds on CI services.
      docker       Interact with docker-compose based builds.
      integration  Execute protocol and Flight integration tests
+     linking      Quick and dirty utilities for checking library linkage.
      lint         Check Arrow source tree for errors
      numpydoc     Lint python docstring with NumpyDoc
+     release      Release releated commands.
      trigger-bot
 
 Archery exposes independent subcommands, each of which provides dedicated
@@ -81,4 +84,3 @@ help output, for example:
      images  List the available docker-compose images.
      push    Push the generated docker-compose image.
      run     Execute docker-compose builds.
-
diff --git a/docs/source/developers/contributing.rst b/docs/source/developers/contributing.rst
index 1eeeafe07fe..9b81a6ff190 100644
--- a/docs/source/developers/contributing.rst
+++ b/docs/source/developers/contributing.rst
@@ -92,7 +92,13 @@ right people see it:
   issue pertains to (for example "Python" or "C++").
 * Also prefix the issue title with the component name in brackets, for example
   ``[Python] issue name`` ; this helps when navigating lists of open issues,
-  and it also makes our changelogs more readable.
+  and it also makes our changelogs more readable. Most prefixes are exactly the 
+  same as the **Component** name, with the following exceptions:
+
+  * **Component:** Continuous Integration — **Summary prefix:** [CI]
+  * **Component:** Developer Tools — **Summary prefix:** [Dev]
+  * **Component:** Documentation — **Summary prefix:** [Docs]
+
 * If you're reporting something that used to work in a previous version
   but doesn't work in the current release, you can add the "Affects version"
   field. For feature requests and other proposals, "Affects version" isn't
@@ -209,7 +215,7 @@ in the end. To make the review process smooth for everyone, try to
   for maintainers to accept.
 * Add new unit tests for your code.
 * Follow the style guides for the part(s) of the project you're modifying.
-  Some languages (C++, Python, and Rust, for example) run a lint check in
+  Some languages (C++ and Python, for example) run a lint check in
   continuous integration. For all languages, see their respective developer
   documentation and READMEs for style guidance. In general, try to make it look
   as if the codebase has a single author, and emulate any conventions you see,
@@ -305,6 +311,8 @@ In addition, the GitHub PR "suggestion" feature can also add commits to
 your branch, so it is possible that your local copy of your branch is missing
 some additions.
 
+.. include:: experimental_repos.rst
+
 Guidance for specific features
 ==============================
 
diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst
index dfaee0ff7c0..085a5cd79a3 100644
--- a/docs/source/developers/cpp/building.rst
+++ b/docs/source/developers/cpp/building.rst
@@ -77,6 +77,7 @@ With `vcpkg <https://github.com/Microsoft/vcpkg>`_:
    cd arrow
    vcpkg install \
      --x-manifest-root cpp \
+     --feature-flags=versions \
      --clean-after-build
 
 On MSYS2:
@@ -294,7 +295,7 @@ the build system how to resolve each dependency. There are a few options:
   have this feature
 * ``CONDA``: Use ``$CONDA_PREFIX`` as alternative ``SYSTEM`` PATH
 * ``VCPKG``: Find dependencies installed by vcpkg, and if not found, run
-  ``vpckg install`` to install them
+  ``vcpkg install`` to install them
 * ``BREW``: Use Homebrew default paths as an alternative ``SYSTEM`` path
 
 The default method is ``AUTO`` unless you are developing within an active conda
diff --git a/docs/source/developers/cpp/development.rst b/docs/source/developers/cpp/development.rst
index c0f5a0f269a..ca7b64a6dc7 100644
--- a/docs/source/developers/cpp/development.rst
+++ b/docs/source/developers/cpp/development.rst
@@ -100,7 +100,7 @@ following checks:
 * Passes various C++ (and others) style checks, checked with the ``lint``
   subcommand to :ref:`Archery <archery>`.
 * CMake files pass style checks, can be fixed by running
-  ``run-cmake-format.py`` from the root of the repository. This requires Python
+  ``archery lint --cmake-format --fix``. This requires Python
   3 and `cmake_format <https://github.com/cheshirekow/cmake_format>`_ (note:
   this currently does not work on Windows)
 
diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst
index 8df443d097a..c06ce225d73 100644
--- a/docs/source/developers/cpp/windows.rst
+++ b/docs/source/developers/cpp/windows.rst
@@ -81,7 +81,7 @@ Arrow codebase):
 
 .. code-block:: shell
 
-   conda create -y -n arrow-dev --file=ci\conda_env_cpp.yml
+   conda create -y -n arrow-dev --file=ci\conda_env_cpp.txt
 
 Then "activate" this conda environment with:
 
@@ -134,10 +134,11 @@ of Arrow and run the command:
    vcpkg install ^
      --triplet x64-windows ^
      --x-manifest-root cpp  ^
+     --feature-flags=versions ^
      --clean-after-build
 
 On Windows, vcpkg builds dynamic link libraries by default. Use the triplet
-``x64-windows-static`` to build static libraries. vcpkg downloads source 
+``x64-windows-static`` to build static libraries. vcpkg downloads source
 packages and compiles them locally, so installing dependencies with vcpkg is
 more time-consuming than with conda.
 
@@ -149,12 +150,12 @@ Then in your ``cmake`` command, to use dependencies installed by vcpkg, set:
 
 You can optionally set other variables to override the default CMake
 configurations for vcpkg, including:
-   
+
 * ``-DCMAKE_TOOLCHAIN_FILE``: by default, the CMake scripts automatically find
   the location of the vcpkg CMake toolchain file ``vcpkg.cmake``; use this to
   instead specify its location
 * ``-DVCPKG_TARGET_TRIPLET``: by default, the CMake scripts attempt to infer the
-  vcpkg 
+  vcpkg
   `triplet <https://github.com/microsoft/vcpkg/blob/master/docs/users/triplets.md>`_;
   use this to instead specify the triplet
 * ``-DARROW_DEPENDENCY_USE_SHARED``: default is ``ON``; set to ``OFF`` for
@@ -194,7 +195,7 @@ to the Unix-specific ``ccache``).
 Newer versions of Visual Studio include Ninja. To see if your Visual Studio
 includes Ninja, run the initialization command shown
 :ref:`above<windows-system-setup>` (``vcvarsall.bat`` or ``VsDevCmd.bat``), then
- run ``ninja --version``.
+run ``ninja --version``.
 
 If Ninja is not included in your version of Visual Studio, and you are using
 conda, activate your conda environment and install Ninja and clcache:
@@ -407,7 +408,7 @@ tests can be made with there individual make targets).
 
 .. code-block:: shell
 
-   conda install -c conda-forge --file .\ci\conda_env_cpp.yml
+   conda install -c conda-forge --file .\ci\conda_env_cpp.txt
    .\ci\appveyor-cpp-setup.bat
    @rem this might fail but at this point most unit tests should be buildable by there individual targets
    @rem see next line for example.
diff --git a/docs/source/developers/crossbow.rst b/docs/source/developers/crossbow.rst
index 7d5a3caa683..cb49a244614 100644
--- a/docs/source/developers/crossbow.rst
+++ b/docs/source/developers/crossbow.rst
@@ -62,23 +62,28 @@ configuration file to run the requested build (like ``.travis.yml``,
 Scheduler
 ~~~~~~~~~
 
-`Crossbow.py`_ handles version generation, task rendering and
+Crossbow handles version generation, task rendering and
 submission. The tasks are defined in ``tasks.yml``.
 
 Install
 -------
 
-   The following guide depends on GitHub, but theoretically any git
-   server can be used.
+The following guide depends on GitHub, but theoretically any git
+server can be used.
+
+If you are not using the `ursacomputing/crossbow <https://github.com/ursacomputing/crossbow>`_
+repository, you will need to complete the first two steps, otherwise procede
+to step 3:
 
 1. `Create the queue repository`_
 
-2. Enable `TravisCI`_, `Appveyor`_, `Azure Pipelines_` and `CircleCI`_
+2. Enable `TravisCI`_, `Appveyor`_, `Azure Pipelines`_ and `CircleCI`_
    integrations on for the newly created queue repository.
 
    -  turn off Travis’ `auto cancellation`_ feature on branches
 
-3. Clone the newly created repository next to the arrow repository:
+3. Clone either ursacomputing/crossbow if you are using that, or the newly
+   created repository next to the arrow repository:
 
    By default the scripts looks for ``crossbow`` next to arrow repository, but
    this can configured through command line arguments.
@@ -100,9 +105,7 @@ Install
 
       export CROSSBOW_GITHUB_TOKEN=<token>
 
-   ..
-
-      or pass as an argument to the CLI script ``--github-token``
+   or pass as an argument to the CLI script ``--github-token``
 
 6. Export the previously created GitHub token on both CI services:
 
@@ -129,7 +132,7 @@ Install
 
    .. code:: bash
 
-      pip install -e arrow/dev/archery[crossbow]
+      pip install -e "arrow/dev/archery[crossbow]"
 
 9. Try running it:
 
@@ -164,10 +167,8 @@ The script does the following:
       git checkout ARROW-<ticket number>
       archery crossbow submit --dry-run conda-linux conda-osx
 
-   ..
-
-      Note that the arrow branch must be pushed beforehand, because the
-      script will clone the selected branch.
+   Note that the arrow branch must be pushed beforehand, because the
+   script will clone the selected branch.
 
 3. Reads and renders the required build configurations with the
    parameters substituted.
diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst
index 4196a0cb3f9..813cc9cbdd2 100644
--- a/docs/source/developers/documentation.rst
+++ b/docs/source/developers/documentation.rst
@@ -30,7 +30,7 @@ If you're using Conda, the required software can be installed in a single line:
 
 .. code-block:: shell
 
-   conda install -c conda-forge --file=ci/conda_env_sphinx.yml
+   conda install -c conda-forge --file=ci/conda_env_sphinx.txt
 
 Otherwise, you'll first need to install `Doxygen <http://www.doxygen.nl/>`_
 yourself (for example from your distribution's official repositories, if
diff --git a/docs/source/developers/experimental_repos.rst b/docs/source/developers/experimental_repos.rst
new file mode 100644
index 00000000000..f13adba2b1a
--- /dev/null
+++ b/docs/source/developers/experimental_repos.rst
@@ -0,0 +1,65 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Experimental repositories
+=========================
+
+Apache Arrow has an explicit policy over developing experimental repositories
+in the context of
+`rules for revolutionaries <https://grep.codeconsult.ch/2020/04/07/rules-for-revolutionaries-2000-edition/>`_.
+
+The main motivation for this policy is to offer a lightweight mechanism to
+conduct experimental work, with the necessary creative freedom, within the ASF
+and the Apache Arrow governance model. This policy allows committers to work on
+new repositories, as they offer many important tools to manage it (e.g. github
+issues, “watch”, “github stars” to measure overall interest).
+
+Process
++++++++
+
+* A committer *may* initiate experimental work by creating a separate git
+  repository within the Apache Arrow (e.g. via `selfserve <https://selfserve.apache.org/>`_)
+  and announcing it on the mailing list, together with its goals, and a link to the
+  newly created repository.
+* The committer *must* initiate an email thread with the sole purpose of
+  presenting updates to the community about the status of the repo.
+* There *must not* be official releases from the repository.
+* Any decision to make the experimental repo official in any way, whether by merging or migrating, *must* be discussed and voted on in the mailing list.
+* The committer is responsible for managing issues, documentation, CI of the repository,
+  including licensing checks.
+* The committer decides when the repository is archived.
+
+Repository management
++++++++++++++++++++++
+
+* The repository *must* be under ``apache/``
+* The repository’s name *must* be prefixed by ``arrow-experimental-``
+* The committer has full permissions over the repository (within possible in ASF)
+* Push / merge permissions *must only* be granted to Apache Arrow committers
+
+Development process
++++++++++++++++++++
+
+* The repository must follow the ASF requirements about 3rd party code.
+* The committer decides how to manage issues, PRs, etc.
+
+Divergences
++++++++++++
+
+* If any of the “must” above fails to materialize and no correction measure
+  is taken by the committer upon request, the PMC *should* take ownership
+  and decide what to do.
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index d1fe086cb15..bcecda000e1 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -34,8 +34,7 @@ We follow a similar PEP8-like coding style to the `pandas project
 
 .. code-block:: shell
 
-   pip install -e arrow/dev/archery
-   pip install -r arrow/dev/archery/requirements-lint.txt
+   pip install -e arrow/dev/archery[lint]
 
 .. code-block:: shell
 
@@ -174,10 +173,10 @@ On Linux and macOS:
 .. code-block:: shell
 
     conda create -y -n pyarrow-dev -c conda-forge \
-        --file arrow/ci/conda_env_unix.yml \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_python.yml \
-        --file arrow/ci/conda_env_gandiva.yml \
+        --file arrow/ci/conda_env_unix.txt \
+        --file arrow/ci/conda_env_cpp.txt \
+        --file arrow/ci/conda_env_python.txt \
+        --file arrow/ci/conda_env_gandiva.txt \
         compilers \
         python=3.7 \
         pandas
@@ -432,9 +431,9 @@ First, starting from fresh clones of Apache Arrow:
 .. code-block:: shell
 
    conda create -y -n pyarrow-dev -c conda-forge ^
-       --file arrow\ci\conda_env_cpp.yml ^
-       --file arrow\ci\conda_env_python.yml ^
-       --file arrow\ci\conda_env_gandiva.yml ^
+       --file arrow\ci\conda_env_cpp.txt ^
+       --file arrow\ci\conda_env_python.txt ^
+       --file arrow\ci\conda_env_gandiva.txt ^
        python=3.7
    conda activate pyarrow-dev
 
diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index a42e863d59c..52920a49b35 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -1006,19 +1006,21 @@ message flatbuffer is read, you can then read the message body.
 
 The stream writer can signal end-of-stream (EOS) either by writing 8 bytes
 containing the 4-byte continuation indicator (``0xFFFFFFFF``) followed by 0
-metadata length (``0x00000000``) or closing the stream interface.
+metadata length (``0x00000000``) or closing the stream interface. We
+recommend the ".arrows" file extension for the streaming format although
+in many cases these streams will not ever be stored as files.
 
 IPC File Format
 ---------------
 
-We define a "file format" supporting random access that is build with
-the stream format. The file starts and ends with a magic string
-``ARROW1`` (plus padding). What follows in the file is identical to
-the stream format. At the end of the file, we write a *footer*
-containing a redundant copy of the schema (which is a part of the
-streaming format) plus memory offsets and sizes for each of the data
-blocks in the file. This enables random access any record batch in the
-file. See ``File.fbs`` for the precise details of the file footer.
+We define a "file format" supporting random access that is an extension of
+the stream format. The file starts and ends with a magic string ``ARROW1``
+(plus padding). What follows in the file is identical to the stream format.
+At the end of the file, we write a *footer* containing a redundant copy of
+the schema (which is a part of the streaming format) plus memory offsets and
+sizes for each of the data blocks in the file. This enables random access to
+any record batch in the file. See `File.fbs`_ for the precise details of the
+file footer.
 
 Schematically we have: ::
 
@@ -1034,8 +1036,9 @@ should be defined in a ``DictionaryBatch`` before they are used in a
 ``RecordBatch``, as long as the keys are defined somewhere in the
 file. Further more, it is invalid to have more than one **non-delta**
 dictionary batch per dictionary ID (i.e. dictionary replacement is not
-supported).  Delta dictionaries are applied in the order they appear in
-the file footer.
+supported). Delta dictionaries are applied in the order they appear in
+the file footer. We recommend the ".arrow" extension for files created with
+this format.
 
 Dictionary Messages
 -------------------
@@ -1208,6 +1211,7 @@ the Arrow spec.
 .. _Flatbuffers protocol definition files: https://github.com/apache/arrow/tree/master/format
 .. _Schema.fbs: https://github.com/apache/arrow/blob/master/format/Schema.fbs
 .. _Message.fbs: https://github.com/apache/arrow/blob/master/format/Message.fbs
+.. _File.fbs: https://github.com/apache/arrow/blob/master/format/File.fbs
 .. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
 .. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
 .. _Endianness: https://en.wikipedia.org/wiki/Endianness
diff --git a/docs/source/format/Guidelines.rst b/docs/source/format/Guidelines.rst
index fec6c7f4f26..40624521a7a 100644
--- a/docs/source/format/Guidelines.rst
+++ b/docs/source/format/Guidelines.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 Implementation Guidelines
 =========================
 
diff --git a/docs/source/format/IPC.rst b/docs/source/format/IPC.rst
index cc9f4198618..65b47f7d71c 100644
--- a/docs/source/format/IPC.rst
+++ b/docs/source/format/IPC.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 IPC
 ===
 
diff --git a/docs/source/format/Layout.rst b/docs/source/format/Layout.rst
index cb83ae152f8..4568f31c58c 100644
--- a/docs/source/format/Layout.rst
+++ b/docs/source/format/Layout.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 Physical Memory Layout
 ======================
 
diff --git a/docs/source/format/Metadata.rst b/docs/source/format/Metadata.rst
index ae15bf286f6..55045abb0af 100644
--- a/docs/source/format/Metadata.rst
+++ b/docs/source/format/Metadata.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 Format Metadata
 ===============
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 615f9f8ab36..65aeb47ea9f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -32,27 +32,15 @@ such topics as:
 * Reading and writing file formats (like CSV, Apache ORC, and Apache Parquet)
 * In-memory analytics and query processing
 
-.. _toc.columnar:
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Specifications and Protocols
-
-   format/Versioning
-   format/Columnar
-   format/Flight
-   format/Integration
-   format/CDataInterface
-   format/CStreamInterface
-   format/Other
+**To learn how to use Arrow refer to the documentation specific to your
+target environment.**
 
 .. _toc.usage:
 
 .. toctree::
-   :maxdepth: 2
-   :caption: Libraries
+   :maxdepth: 1
+   :caption: Supported Environments
 
-   status
    C/GLib <https://arrow.apache.org/docs/c_glib/>
    C++ <cpp/index>
    C# <https://github.com/apache/arrow/blob/master/csharp/README.md>
@@ -65,6 +53,21 @@ such topics as:
    R <https://arrow.apache.org/docs/r/>
    Ruby <https://github.com/apache/arrow/blob/master/ruby/README.md>
    Rust <https://docs.rs/crate/arrow/>
+   status
+
+.. _toc.columnar:
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Specifications and Protocols
+
+   format/Versioning
+   format/Columnar
+   format/Flight
+   format/Integration
+   format/CDataInterface
+   format/CStreamInterface
+   format/Other   
 
 .. _toc.development:
 
diff --git a/docs/source/java/algorithm.rst b/docs/source/java/algorithm.rst
new file mode 100644
index 00000000000..f838398af88
--- /dev/null
+++ b/docs/source/java/algorithm.rst
@@ -0,0 +1,92 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Java Algorithms
+===============
+
+Arrow's Java library provides algorithms for some commonly-used
+functionalities. The algorithms are provided in the ``org.apache.arrow.algorithm``
+package of the ``algorithm`` module. 
+
+Comparing Vector Elements
+-------------------------
+
+Comparing vector elements is the basic for many algorithms. Vector 
+elements can be compared in one of the two ways:
+
+1. **Equality comparison**: there are two possible results for this type of comparisons: ``equal`` and ``unequal``.
+Currently, this type of comparison is supported through the ``org.apache.arrow.vector.compare.VectorValueEqualizer``
+interface.
+
+2. **Ordering comparison**: there are three possible results for this type of comparisons: ``less than``, ``equal to ``
+and ``greater than``. This comparison is supported by the abstract class ``org.apache.arrow.algorithm.sort.VectorValueComparator``.
+
+We provide default implementations to compare vector elements. However, users can also define ways
+for customized comparisons. 
+
+Vector Element Search
+---------------------
+
+A search algorithm tries to find a particular value in a vector. When successful, a vector index is 
+returned; otherwise, a ``-1`` is returned. The following search algorithms are provided:
+
+1. **Linear search**: this algorithm simply traverses the vector from the beginning, until a match is 
+found, or the end of the vector is reached. So it takes ``O(n)`` time, where ``n`` is the number of elements
+in the vector.  This algorithm is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#linearSearch``.
+
+2. **Binary search**: this represents a more efficient search algorithm, as it runs in ``O(log(n))`` time. 
+However, it is only applicable to sorted vectors. To get a sorted vector,
+one can use one of our sorting algorithms, which will be discussed in the next section. This algorithm
+is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#binarySearch``.
+
+3. **Parallel search**: when the vector is large, it takes a long time to traverse the elements to search
+for a value. To make this process faster, one can split the vector into multiple partitions, and perform the 
+search for each partition in parallel. This is supported by ``org.apache.arrow.algorithm.search.ParallelSearcher``.
+
+4. **Range search**: for many scenarios, there can be multiple matching values in the vector. 
+If the vector is sorted, the matching values reside in a contiguous region in the vector. The
+range search algorithm tries to find the upper/lower bound of the region in ``O(log(n))`` time. 
+An implementation is provided in ``org.apache.arrow.algorithm.search.VectorRangeSearcher``.
+
+Vector Sorting
+--------------
+
+Given a vector, a sorting algorithm turns it into a sorted one. The sorting criteria must
+be specified by some ordering comparison operation. The sorting algorithms can be
+classified into the following categories:
+
+1. **In-place sorter**: an in-place sorter performs the sorting by manipulating the original
+vector, without creating any new vector. So it just returns the original vector after the sorting operations.
+Currently, we have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter`` for in-place
+sorting in ``O(nlog(n))`` time. As the name suggests, it only supports fixed width vectors. 
+
+2. **Out-of-place sorter**: an out-of-place sorter does not mutate the original vector. Instead,
+it copies vector elements to a new vector in sorted order, and returns the new vector.
+We have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.FixedWidthOutOfPlaceVectorSorter`` 
+and ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.VariableWidthOutOfPlaceVectorSorter``
+for fixed width and variable width vectors, respectively. Both algorithms run in ``O(nlog(n))`` time. 
+
+3. **Index sorter**: this sorter does not actually sort the vector. Instead, it returns an integer
+vector, which correspond to indices of vector elements in sorted order. With the index vector, one can
+easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k``th
+smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, 
+which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. 
+
+Other Algorithms
+----------------
+
+Other algorithms include vector deduplication, dictionary encoding, etc., in the ``algorithm`` module.
diff --git a/docs/source/java/index.rst b/docs/source/java/index.rst
index 5bb08571b43..64dd44f080b 100644
--- a/docs/source/java/index.rst
+++ b/docs/source/java/index.rst
@@ -27,4 +27,5 @@ on the Arrow format and other language bindings see the :doc:`parent documentati
    vector
    vector_schema_root
    ipc
+   algorithm
    Reference (javadoc) <https://arrow.apache.org/docs/java/reference/>
diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index 81a00d8de3d..17b061dc7d8 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -68,6 +68,7 @@ may expose data type-specific methods or properties.
    Decimal128Array
    DictionaryArray
    ListArray
+   FixedSizeListArray
    LargeListArray
    StructArray
    UnionArray
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index d6efc6a5fea..08d6e9da051 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -28,6 +28,7 @@ Aggregations
    :toctree: ../generated/
 
    count
+   index
    mean
    min_max
    mode
@@ -38,22 +39,94 @@ Aggregations
 Arithmetic Functions
 --------------------
 
-By default these functions do not detect overflow. Each function is also
-available in an overflow-checking variant, suffixed ``_checked``, which 
+By default these functions do not detect overflow. Most functions are also
+available in an overflow-checking variant, suffixed ``_checked``, which
 throws an ``ArrowInvalid`` exception when overflow is detected.
 
 .. autosummary::
    :toctree: ../generated/
 
+   abs
+   abs_checked
    add
    add_checked
    divide
    divide_checked
    multiply
    multiply_checked
+   power
+   power_checked
+   shift_left
+   shift_left_checked
+   shift_right
+   shift_right_checked
+   sign
    subtract
    subtract_checked
 
+Bit-wise operations do not offer (or need) a checked variant.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   bit_wise_and
+   bit_wise_not
+   bit_wise_or
+   bit_wise_xor
+
+Rounding Functions
+------------------
+
+Rounding functions convert a numeric input into an approximate value with a
+simpler representation based on the rounding strategy.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ceil
+   floor
+   trunc
+
+Logarithmic Functions
+---------------------
+
+Logarithmic functions are also supported, and also offer ``_checked``
+variants which detect domain errors.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ln
+   ln_checked
+   log10
+   log10_checked
+   log1p
+   log1p_checked
+   log2
+   log2_checked
+
+Trigonometric Functions
+-----------------------
+
+Trigonometric functions are also supported, and also offer ``_checked``
+variants which detect domain errors where appropriate.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   acos
+   acos_checked
+   asin
+   asin_checked
+   atan
+   atan2
+   cos
+   cos_checked
+   sin
+   sin_checked
+   tan
+   tan_checked
+
 Comparisons
 -----------
 
@@ -70,6 +143,14 @@ they return ``null``.
    less_equal
    not_equal
 
+These functions take any number of arguments of a numeric or temporal type.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   max_element_wise
+   min_element_wise
+
 Logical Functions
 -----------------
 
@@ -91,11 +172,11 @@ logic variants are provided (suffixed ``_kleene``). See User Guide for details.
 String Predicates
 -----------------
 
-In these functions an empty string emits false in the output. For ASCII 
+In these functions an empty string emits false in the output. For ASCII
 variants (prefixed ``ascii_``) a string element with non-ASCII characters
 emits false in the output.
 
-The first set of functions emit true if the input contains only 
+The first set of functions emit true if the input contains only
 characters of a given class.
 
 .. autosummary::
@@ -127,7 +208,7 @@ in the string element.
    ascii_is_title
    utf8_is_title
 
-The third set of functions examines string elements on 
+The third set of functions examines string elements on
 a byte-by-byte basis.
 
 .. autosummary::
@@ -135,15 +216,71 @@ a byte-by-byte basis.
 
    string_is_ascii
 
+String Splitting
+----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   split_pattern
+   split_pattern_regex
+   ascii_split_whitespace
+   utf8_split_whitespace
+
+String Component Extraction
+---------------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   extract_regex
+
+String Joining
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   binary_join
+   binary_join_element_wise
+
 String Transforms
 -----------------
 
 .. autosummary::
    :toctree: ../generated/
 
+   ascii_capitalize
+   ascii_center
+   ascii_lpad
+   ascii_ltrim
+   ascii_ltrim_whitespace
    ascii_lower
+   ascii_reverse
+   ascii_rpad
+   ascii_rtrim
+   ascii_rtrim_whitespace
+   ascii_swapcase
+   ascii_trim
    ascii_upper
+   binary_length
+   binary_replace_slice
+   replace_substring
+   replace_substring_regex
+   utf8_capitalize
+   utf8_center
+   utf8_length
    utf8_lower
+   utf8_lpad
+   utf8_ltrim
+   utf8_ltrim_whitespace
+   utf8_replace_slice
+   utf8_reverse
+   utf8_rpad
+   utf8_rtrim
+   utf8_rtrim_whitespace
+   utf8_swapcase
+   utf8_trim
    utf8_upper
 
 Containment tests
@@ -152,10 +289,17 @@ Containment tests
 .. autosummary::
    :toctree: ../generated/
 
+   count_substring
+   count_substring_regex
+   ends_with
+   find_substring
+   find_substring_regex
    index_in
    is_in
+   match_like
    match_substring
    match_substring_regex
+   starts_with
 
 Conversions
 -----------
@@ -166,6 +310,14 @@ Conversions
    cast
    strptime
 
+Replacements
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   replace_with_mask
+
 Selections
 ----------
 
@@ -201,7 +353,14 @@ Structural Transforms
    :toctree: ../generated/
 
    binary_length
+   case_when
+   choose
+   coalesce
    fill_null
+   if_else
+   is_finite
+   is_inf
+   is_nan
    is_null
    is_valid
    list_value_length
diff --git a/docs/source/python/api/dataset.rst b/docs/source/python/api/dataset.rst
index 90d5bbf340a..e837839f907 100644
--- a/docs/source/python/api/dataset.rst
+++ b/docs/source/python/api/dataset.rst
@@ -38,6 +38,7 @@ Factory functions
    partitioning
    field
    scalar
+   write_dataset
 
 Classes
 -------
diff --git a/docs/source/python/api/formats.rst b/docs/source/python/api/formats.rst
index 28a30c934e5..fdc28040a71 100644
--- a/docs/source/python/api/formats.rst
+++ b/docs/source/python/api/formats.rst
@@ -28,12 +28,16 @@ CSV Files
 .. autosummary::
    :toctree: ../generated/
 
-   ReadOptions
-   ParseOptions
    ConvertOptions
-   read_csv
-   open_csv
    CSVStreamingReader
+   CSVWriter
+   ISO8601
+   ParseOptions
+   ReadOptions
+   WriteOptions
+   open_csv
+   read_csv
+   write_csv
 
 .. _api.feather:
 
diff --git a/docs/source/python/api/ipc.rst b/docs/source/python/api/ipc.rst
index cc3ccfe40bc..83ff53de7de 100644
--- a/docs/source/python/api/ipc.rst
+++ b/docs/source/python/api/ipc.rst
@@ -38,6 +38,7 @@ Inter-Process Communication
    ipc.read_tensor
    ipc.write_tensor
    ipc.get_tensor_size
+   ipc.IpcWriteOptions
    ipc.Message
    ipc.MessageReader
    ipc.RecordBatchFileReader
diff --git a/docs/source/python/api/memory.rst b/docs/source/python/api/memory.rst
index b7384748076..f4382ba23c9 100644
--- a/docs/source/python/api/memory.rst
+++ b/docs/source/python/api/memory.rst
@@ -50,6 +50,7 @@ Miscellaneous
 .. autosummary::
    :toctree: ../generated/
 
+   Codec
    compress
    decompress
 
diff --git a/docs/source/python/csv.rst b/docs/source/python/csv.rst
index ad48ee59e8f..1724c63f417 100644
--- a/docs/source/python/csv.rst
+++ b/docs/source/python/csv.rst
@@ -18,10 +18,10 @@
 .. currentmodule:: pyarrow.csv
 .. _csv:
 
-Reading CSV files
-=================
+Reading and Writing CSV files
+=============================
 
-Arrow supports reading columnar data from CSV files.
+Arrow supports reading and writing columnar data from/to CSV files.
 The features currently offered are the following:
 
 * multi-threaded or single-threaded reading
@@ -29,17 +29,19 @@ The features currently offered are the following:
   such as ``my_data.csv.gz``)
 * fetching column names from the first row in the CSV file
 * column-wise type inference and conversion to one of ``null``, ``int64``,
-  ``float64``, ``date32``, ``timestamp[s]``, ``timestamp[ns]``, ``string`` or ``binary`` data
+  ``float64``, ``date32``, ``time32[s]``, ``timestamp[s]``, ``timestamp[ns]``,
+  ``string`` or ``binary`` data
 * opportunistic dictionary encoding of ``string`` and ``binary`` columns
   (disabled by default)
 * detecting various spellings of null values such as ``NaN`` or ``#N/A``
+* writing CSV files with options to configure the exact output format
 
 Usage
 -----
 
-CSV reading functionality is available through the :mod:`pyarrow.csv` module.
-In many cases, you will simply call the :func:`read_csv` function
-with the file path you want to read from::
+CSV reading and writing functionality is available through the
+:mod:`pyarrow.csv` module.  In many cases, you will simply call the
+:func:`read_csv` function with the file path you want to read from::
 
    >>> from pyarrow import csv
    >>> fn = 'tips.csv.gz'
@@ -64,6 +66,18 @@ with the file path you want to read from::
    3       23.68  3.31    Male     No  Sun  Dinner     2
    4       24.59  3.61  Female     No  Sun  Dinner     4
 
+To write CSV files, just call :func:`write_csv` with a
+:class:`pyarrow.RecordBatch` or :class:`pyarrow.Table` and a path or
+file-like object::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> csv.write_csv(table, "tips.csv")
+  >>> with pa.CompressedOutputStream("tips.csv.gz", "gzip") as out:
+  ...     csv.write_csv(table, out)
+
+.. note:: The writer does not yet support all Arrow types.
+
 Customized parsing
 ------------------
 
@@ -92,8 +106,17 @@ Incremental reading
 -------------------
 
 For memory-constrained environments, it is also possible to read a CSV file
-one batch at a time, using :func:`open_csv`.  It currently doesn't support
-parallel reading.
+one batch at a time, using :func:`open_csv`.
+
+There are a few caveats:
+
+1. For now, the incremental reader is always single-threaded (regardless of
+   :attr:`ReadOptions.use_threads`)
+
+2. Type inference is done on the first block and types are frozen afterwards;
+   to make sure the right data types are inferred, either set
+   :attr:`ReadOptions.block_size` to a large enough value, or use
+   :attr:`ConvertOptions.column_types` to set the desired data types explicitly.
 
 Character encoding
 ------------------
@@ -102,6 +125,31 @@ By default, CSV files are expected to be encoded in UTF8.  Non-UTF8 data
 is accepted for ``binary`` columns.  The encoding can be changed using
 the :class:`ReadOptions` class.
 
+Customized writing
+------------------
+
+To alter the default write settings in case of writing CSV files with
+different conventions, you can create a :class:`WriteOptions` instance and
+pass it to :func:`write_csv`::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> # Omit the header row (include_header=True is the default)
+  >>> options = csv.WriteOptions(include_header=False)
+  >>> csv.write_csv(table, "data.csv", options)
+
+Incremental writing
+-------------------
+
+To write CSV files one batch at a time, create a :class:`CSVWriter`. This
+requires the output (a path or file-like object), the schema of the data to
+be written, and optionally write options as described above::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> with csv.CSVWriter("data.csv", table.schema) as writer:
+  >>>     writer.write_table(table)
+
 Performance
 -----------
 
diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst
index 614c2bf2a25..aa22c6f2725 100644
--- a/docs/source/python/dataset.rst
+++ b/docs/source/python/dataset.rst
@@ -28,19 +28,19 @@ Tabular Datasets
     and a stable API is not yet guaranteed.
 
 The ``pyarrow.dataset`` module provides functionality to efficiently work with
-tabular, potentially larger than memory and multi-file datasets:
+tabular, potentially larger than memory, and multi-file datasets. This includes:
 
-* A unified interface for different sources: supporting different sources and
-  file formats (Parquet, Feather files) and different file systems (local,
-  cloud).
+* A unified interface that supports different sources and file formats
+  (Parquet, Feather / Arrow IPC, and CSV files) and different file systems
+  (local, cloud).
 * Discovery of sources (crawling directories, handle directory-based partitioned
   datasets, basic schema normalization, ..)
 * Optimized reading with predicate pushdown (filtering rows), projection
-  (selecting columns), parallel reading or fine-grained managing of tasks.
+  (selecting and deriving columns), and optionally parallel reading.
 
-Currently, only Parquet and Feather / Arrow IPC files are supported. The goal
-is to expand this in the future to other file formats and data sources (e.g.
-database connections).
+Currently, only Parquet, Feather / Arrow IPC, and CSV files are supported. The
+goal is to expand this in the future to other file formats and data sources
+(e.g. database connections).
 
 For those familiar with the existing :class:`pyarrow.parquet.ParquetDataset` for
 reading Parquet datasets: ``pyarrow.dataset``'s goal is similar but not specific
@@ -64,6 +64,7 @@ of a directory with two parquet files:
     import pathlib
     import pyarrow as pa
     import pyarrow.parquet as pq
+    import numpy as np
 
     base = pathlib.Path(tempfile.gettempdir())
     (base / "parquet_dataset").mkdir(exist_ok=True)
@@ -87,11 +88,11 @@ can pass it the path to the directory containing the data files:
     dataset = ds.dataset(base / "parquet_dataset", format="parquet")
     dataset
 
-In addition to a base directory path, :func:`dataset` accepts a path to a single
-file or a list of file paths.
+In addition to searching a base directory, :func:`dataset` accepts a path to a
+single file or a list of file paths.
 
-Creating a :class:`Dataset` object loads nothing into memory, it only crawls the
-directory to find all the files:
+Creating a :class:`Dataset` object does not begin reading the data itself. If
+needed, it only crawls the directory to find all the files:
 
 .. ipython:: python
 
@@ -116,12 +117,12 @@ this can require a lot of memory, see below on filtering / iterative loading):
 Reading different file formats
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The above examples use Parquet files as dataset source but the Dataset API
-provides a consistent interface across multiple file formats and sources.
-Currently, Parquet and Feather / Arrow IPC file format are supported; more
-formats are planned in the future.
+The above examples use Parquet files as dataset sources but the Dataset API
+provides a consistent interface across multiple file formats and filesystems.
+Currently, Parquet, Feather / Arrow IPC, and CSV file formats are supported;
+more formats are planned in the future.
 
-If we save the table as a Feather file instead of Parquet files:
+If we save the table as Feather files instead of Parquet files:
 
 .. ipython:: python
 
@@ -129,7 +130,7 @@ If we save the table as a Feather file instead of Parquet files:
 
     feather.write_feather(table, base / "data.feather")
 
-then we can read the Feather file using the same functions, but with specifying
+…then we can read the Feather file using the same functions, but with specifying
 ``format="feather"``:
 
 .. ipython:: python
@@ -272,7 +273,7 @@ and the Parquet files written in those directories no longer include the "part"
 column.
 
 Reading this dataset with :func:`dataset`, we now specify that the dataset
-uses a hive-like partitioning scheme with the `partitioning` keyword:
+should use a hive-like partitioning scheme with the `partitioning` keyword:
 
 .. ipython:: python
 
@@ -288,7 +289,7 @@ they will be added back to the resulting table when scanning this dataset:
     dataset.to_table().to_pandas().head(3)
 
 We can now filter on the partition keys, which avoids loading files
-altogether if they do not match the predicate:
+altogether if they do not match the filter:
 
 .. ipython:: python
 
@@ -385,11 +386,11 @@ some specific methods exist for Parquet Datasets.
 
 Some processing frameworks such as Dask (optionally) use a ``_metadata`` file
 with partitioned datasets which includes information about the schema and the
-row group metadata of the full dataset. Using such file can give a more
+row group metadata of the full dataset. Using such a file can give a more
 efficient creation of a parquet Dataset, since it does not need to infer the
 schema and crawl the directories for all Parquet files (this is especially the
 case for filesystems where accessing files is expensive). The
-:func:`parquet_dataset` function allows to create a Dataset from a partitioned
+:func:`parquet_dataset` function allows us to create a Dataset from a partitioned
 dataset with a ``_metadata`` file:
 
 .. code-block:: python
@@ -455,20 +456,166 @@ is materialized as columns when reading the data and can be used for filtering:
     dataset.to_table().to_pandas()
     dataset.to_table(filter=ds.field('year') == 2019).to_pandas()
 
+Another benefit of manually listing the files is that the order of the files
+controls the order of the data.  When performing an ordered read (or a read to
+a table) then the rows returned will match the order of the files given.  This
+only applies when the dataset is constructed with a list of files.  There
+are no order guarantees given when the files are instead discovered by scanning
+a directory.
 
-Manual scheduling
------------------
+Iterative (out of core or streaming) reads
+------------------------------------------
 
-..
-    Possible content:
-    - fragments (get_fragments)
-    - scan / scan tasks / iterators of record batches
+The previous examples have demonstrated how to read the data into a table using :func:`~Dataset.to_table`.  This is
+useful if the dataset is small or there is only a small amount of data that needs to
+be read.  The dataset API contains additional methods to read and process large amounts
+of data in a streaming fashion.
 
-The :func:`~Dataset.to_table` method loads all selected data into memory
-at once resulting in a pyarrow Table. Alternatively, a dataset can also be
-scanned one RecordBatch at a time in an iterative manner using the
-:func:`~Dataset.scan` method::
+The easiest way to do this is to use the method :meth:`Dataset.to_batches`.  This
+method returns an iterator of record batches.  For example, we can use this method to
+calculate the average of a column without loading the entire column into memory:
 
-    for scan_task in dataset.scan(columns=[...], filter=...):
-        for record_batch in scan_task.execute():
-            # process the record batch
+.. ipython:: python
+
+    import pyarrow.compute as pc
+
+    col2_sum = 0
+    count = 0
+    for batch in dataset.to_batches(columns=["col2"], filter=~ds.field("col2").is_null()):
+        col2_sum += pc.sum(batch.column("col2")).as_py()
+        count += batch.num_rows
+    mean_a = col2_sum/count
+
+Customizing the batch size
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An iterative read of a dataset is often called a "scan" of the dataset and pyarrow
+uses an object called a :class:`Scanner` to do this.  A Scanner is created for you
+automatically by the to_table and to_batches method of the dataset.  Any arguments
+you pass to these methods will be passed on to the Scanner constructor.
+
+One of those parameters is the ``batch_size``.  This controls the maximum size of the
+batches returned by the scanner.  Batches can still be smaller than the `batch_size`
+if the dataset consists of small files or those files themselves consist of small
+row groups.  For example, a parquet file with 10,000 rows per row group will yield
+batches with, at most, 10,000 rows unless the batch_size is set to a smaller value.
+
+The default batch size is one million rows and this is typically a good default but
+you may want to customize it if you are reading a large number of columns.
+
+Writing Datasets
+----------------
+
+The dataset API also simplifies writing data to a dataset using :func:`write_dataset` .  This can be useful when
+you want to partition your data or you need to write a large amount of data.  A
+basic dataset write is similar to writing a table except that you specify a directory
+instead of a filename.
+
+.. ipython:: python
+
+    base = pathlib.Path(tempfile.gettempdir())
+    dataset_root = base / "sample_dataset"
+    dataset_root.mkdir(exist_ok=True)
+
+    table = pa.table({"a": range(10), "b": np.random.randn(10), "c": [1, 2] * 5})
+    ds.write_dataset(table, dataset_root, format="parquet")
+
+The above example will create a single file named part-0.parquet in our sample_dataset
+directory.
+
+.. warning::
+
+    If you run the example again it will replace the existing part-0.parquet file.
+    Appending files to an existing dataset requires specifying a new
+    ``basename_template`` for each call to ``ds.write_dataset``
+    to avoid overwrite.
+
+Writing partitioned data
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+A partitioning object can be used to specify how your output data should be partitioned.
+This uses the same kind of partitioning objects we used for reading datasets.  To write
+our above data out to a partitioned directory we only need to specify how we want the
+dataset to be partitioned.  For example:
+
+.. ipython:: python
+
+    part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor="hive"
+    )
+    ds.write_dataset(table, dataset_root, format="parquet", partitioning=part)
+
+This will create two files.  Half our data will be in the dataset_root/c=1 directory and
+the other half will be in the dataset_root/c=2 directory.
+
+Writing large amounts of data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above examples wrote data from a table.  If you are writing a large amount of data
+you may not be able to load everything into a single in-memory table.  Fortunately, the
+write_dataset method also accepts an iterable of record batches.  This makes it really
+simple, for example, to repartition a large dataset without loading the entire dataset
+into memory:
+
+.. ipython:: python
+
+    old_part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor="hive"
+    )
+    new_part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor=None
+    )
+    input_dataset = ds.dataset(dataset_root, partitioning=old_part)
+    new_root = base / "repartitioned_dataset"
+    # A scanner can act as an iterator of record batches but you could also receive
+    # data from the network (e.g. via flight), from your own scanning, or from any
+    # other method that yields record batches.  In addition, you can pass a dataset
+    # into write_dataset directly but this method is useful if you want to customize
+    # the scanner (e.g. to filter the input dataset or set a maximum batch size)
+    scanner = input_dataset.scanner()
+
+    ds.write_dataset(scanner, new_root, format="parquet", partitioning=new_part)
+
+After the above example runs our data will be in dataset_root/1 and dataset_root/2
+directories.  In this simple example we are not changing the structure of the data
+(only the directory naming schema) but you could also use this mechnaism to change
+which columns are used to partition the dataset.  This is useful when you expect to
+query your data in specific ways and you can utilize partitioning to reduce the
+amount of data you need to read.
+
+.. To add when ARROW-12364 is merged
+    Customizing & inspecting written files
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    By default the dataset API will create files named "part-i.format" where "i" is a integer
+    generated during the write and "format" is the file format specified in the write_dataset
+    call.  For simple datasets it may be possible to know which files will be created but for
+    larger or partitioned datasets it is not so easy.  The ``file_visitor`` keyword can be used 
+    to supply a visitor that will be called as each file is created:
+
+    .. ipython:: python
+
+        def file_visitor(written_file):
+            print(f"path={written_file.path}")
+            print(f"metadata={written_file.metadata}")
+        ds.write_dataset(table, dataset_root, format="parquet", partitioning=part,
+                        file_visitor=file_visitor)
+
+    This will allow you to collect the filenames that belong to the dataset and store them elsewhere
+    which can be useful when you want to avoid scanning directories the next time you need to read
+    the data.  It can also be used to generate the _metadata index file used by other tools such as
+    dask or spark to create an index of the dataset.
+
+Configuring format-specific parameters during a write
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the common options shared by all formats there are also format specific options
+that are unique to a particular format.  For example, to allow truncated timestamps while writing
+Parquet files:
+
+.. ipython:: python
+
+    parquet_format = ds.ParquetFileFormat()
+    write_options = parquet_format.make_write_options(allow_truncated_timestamps=True)
+    ds.write_dataset(table, dataset_root, format="parquet", partitioning=part,
+                     file_options=write_options)
diff --git a/docs/source/python/extending.rst b/docs/source/python/extending.rst
index 738a7369f70..5e00e79059c 100644
--- a/docs/source/python/extending.rst
+++ b/docs/source/python/extending.rst
@@ -466,3 +466,18 @@ installed. This function will attempt to create symlinks like
 
    pip install pyarrow
    python -c "import pyarrow; pyarrow.create_library_symlinks()"
+
+Toolchain Compatibility (Linux)
+"""""""""""""""""""""""""""""""
+
+The Python wheels for Linux are built using the
+`PyPA manylinux images <https://quay.io/organization/pypa>`_ which use
+the CentOS `devtoolset-8` or `devtoolset-9` depending on which manylinux
+wheel version (2010 or 2014) is being used. In addition to the other notes
+above, if you are compiling C++ using these shared libraries, you will need
+to make sure you use a compatible toolchain as well or you might see a
+segfault during runtime.
+
+Also, if you encounter errors when linking or loading the library, consider
+setting the ``_GLIBCXX_USE_CXX11_ABI`` preprocessor macro to ``0``
+(for example by adding ``-D_GLIBCXX_USE_CXX11_ABI=0`` to ``CFLAGS``).
diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst
index 01e7d7bba55..13c0d93101e 100644
--- a/docs/source/python/filesystems.rst
+++ b/docs/source/python/filesystems.rst
@@ -178,6 +178,7 @@ some environment variables.
   If ``CLASSPATH`` is not set, then it will be set automatically if the
   ``hadoop`` executable is in your system path, or if ``HADOOP_HOME`` is set.
 
+.. _filesystem-fsspec:
 
 Using fsspec-compatible filesystems
 -----------------------------------
diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst
index 60e81a1ab35..3c23d8a0f5b 100644
--- a/docs/source/python/install.rst
+++ b/docs/source/python/install.rst
@@ -28,7 +28,7 @@ using a 64-bit system.
 Python Compatibility
 --------------------
 
-PyArrow is currently compatible with Python 3.6, 3.7 and 3.8.
+PyArrow is currently compatible with Python 3.6, 3.7, 3.8, and 3.9.
 
 Using Conda
 -----------
diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst
index 6d90179b39e..249780a8dcd 100644
--- a/docs/source/python/ipc.rst
+++ b/docs/source/python/ipc.rst
@@ -154,6 +154,73 @@ DataFrame output:
    df = pa.ipc.open_file(buf).read_pandas()
    df[:5]
 
+Efficiently Writing and Reading Arrow Data
+------------------------------------------
+
+Being optimized for zero copy and memory mapped data, Arrow allows to easily
+read and write arrays consuming the minimum amount of resident memory.
+
+When writing and reading raw Arrow data, we can use the Arrow File Format
+or the Arrow Streaming Format.
+
+To dump an array to file, you can use the :meth:`~pyarrow.ipc.new_file`
+which will provide a new :class:`~pyarrow.ipc.RecordBatchFileWriter` instance
+that can be used to write batches of data to that file.
+
+For example to write an array of 10M integers, we could write it in 1000 chunks
+of 10000 entries:
+
+.. ipython:: python
+
+      BATCH_SIZE = 10000
+      NUM_BATCHES = 1000
+
+      schema = pa.schema([pa.field('nums', pa.int32())])
+
+      with pa.OSFile('bigfile.arrow', 'wb') as sink:
+         with pa.ipc.new_file(sink, schema) as writer:
+            for row in range(NUM_BATCHES):
+                  batch = pa.record_batch([pa.array(range(BATCH_SIZE), type=pa.int32())], schema)
+                  writer.write(batch)
+
+record batches support multiple columns, so in practice we always write the
+equivalent of a :class:`~pyarrow.Table`.
+
+Writing in batches is effective because we in theory need to keep in memory only
+the current batch we are writing. But when reading back, we can be even more effective
+by directly mapping the data from disk and avoid allocating any new memory on read.
+
+Under normal conditions, reading back our file will consume a few hundred megabytes
+of memory:
+
+.. ipython:: python
+
+      with pa.OSFile('bigfile.arrow', 'rb') as source:
+         loaded_array = pa.ipc.open_file(source).read_all()
+
+      print("LEN:", len(loaded_array))
+      print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+
+To more efficiently read big data from disk, we can memory map the file, so that
+Arrow can directly reference the data mapped from disk and avoid having to
+allocate its own memory.
+In such case the operating system will be able to page in the mapped memory
+lazily and page it out without any write back cost when under pressure,
+allowing to more easily read arrays bigger than the total memory.
+
+.. ipython:: python
+
+      with pa.memory_map('bigfile.arrow', 'rb') as source:
+         loaded_array = pa.ipc.open_file(source).read_all()
+      print("LEN:", len(loaded_array))
+      print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+
+.. note::
+
+   Other high level APIs like :meth:`~pyarrow.parquet.read_table` also provide a
+   ``memory_map`` option. But in those cases, the memory mapping can't help with
+   reducing resident memory consumption. See :ref:`parquet_mmap` for details.
+
 Arbitrary Object Serialization
 ------------------------------
 
diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst
index 693fc97e062..cab385b8b5d 100644
--- a/docs/source/python/parquet.rst
+++ b/docs/source/python/parquet.rst
@@ -112,6 +112,29 @@ In general, a Python file object will have the worst read performance, while a
 string file path or an instance of :class:`~.NativeFile` (especially memory
 maps) will perform the best.
 
+.. _parquet_mmap:
+
+Reading Parquet and Memory Mapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because Parquet data needs to be decoded from the Parquet format 
+and compression, it can't be directly mapped from disk.
+Thus the ``memory_map`` option might perform better on some systems
+but won't help much with resident memory consumption.
+
+.. code-block:: python
+
+      >>> pq_array = pa.parquet.read_table("area1.parquet", memory_map=True)
+      >>> print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+      RSS: 4299MB
+
+      >>> pq_array = pa.parquet.read_table("area1.parquet", memory_map=False)
+      >>> print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+      RSS: 4299MB   
+
+If you need to deal with Parquet data bigger than memory, 
+the :ref:`dataset` and partitioning is probably what you are looking for.
+
 Parquet file writing options
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -387,7 +410,8 @@ individual table writes are wrapped using ``with`` statements so the
 .. code-block:: python
 
    # Remote file-system example
-   fs = pa.hdfs.connect(host, port, user=user, kerb_ticket=ticket_cache_path)
+   from pyarrow.fs import HadoopFileSystem
+   fs = HadoopFileSystem(host, port, user=user, kerb_ticket=ticket_cache_path)
    pq.write_to_dataset(table, root_path='dataset_name',
                        partition_cols=['one', 'two'], filesystem=fs)
 
@@ -545,46 +569,38 @@ This can be disabled by specifying ``use_threads=False``.
    The number of threads to use concurrently is automatically inferred by Arrow
    and can be inspected using the :func:`~pyarrow.cpu_count()` function.
 
+Reading from cloud storage
+--------------------------
 
-Reading a Parquet File from Azure Blob storage
-----------------------------------------------
+In addition to local files, pyarrow supports other filesystems, such as cloud
+filesystems, through the ``filesystem`` keyword:
 
-The code below shows how to use Azure's storage sdk along with pyarrow to read
-a parquet file into a Pandas dataframe.
-This is suitable for executing inside a Jupyter notebook running on a Python 3
-kernel.
+.. code-block:: python
+
+    from pyarrow import fs
 
-Dependencies:
+    s3  = fs.S3FileSystem(region="us-east-2")
+    table = pq.read_table("bucket/object/key/prefix", filesystem=s3)
 
-* python 3.6.2
-* azure-storage 0.36.0
-* pyarrow 0.8.0
+Currently, :class:`HDFS <pyarrow.fs.HadoopFileSystem>` and
+:class:`Amazon S3-compatible storage <pyarrow.fs.S3FileSystem>` are
+supported. See the :ref:`filesystem` docs for more details. For those
+built-in filesystems, the filesystem can also be inferred from the file path,
+if specified as a URI:
 
 .. code-block:: python
 
-   import pyarrow.parquet as pq
-   from io import BytesIO
-   from azure.storage.blob import BlockBlobService
-
-   account_name = '...'
-   account_key = '...'
-   container_name = '...'
-   parquet_file = 'mysample.parquet'
-
-   byte_stream = io.BytesIO()
-   block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key)
-   try:
-      block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream)
-      df = pq.read_table(source=byte_stream).to_pandas()
-      # Do work on df ...
-   finally:
-      # Add finally block to ensure closure of the stream
-      byte_stream.close()
-
-Notes:
-
-* The ``account_key`` can be found under ``Settings -> Access keys`` in the
-  Microsoft Azure portal for a given container
-* The code above works for a container with private access, Lease State =
-  Available, Lease Status = Unlocked
-* The parquet file was Blob Type = Block blob
+    table = pq.read_table("s3://bucket/object/key/prefix")
+
+Other filesystems can still be supported if there is an
+`fsspec <https://filesystem-spec.readthedocs.io/en/latest/>`__-compatible
+implementation available. See :ref:`filesystem-fsspec` for more details.
+One example is Azure Blob storage, which can be interfaced through the
+`adlfs <https://github.com/dask/adlfs>`__ package.
+
+.. code-block:: python
+
+    from adlfs import AzureBlobFileSystem
+
+    abfs = AzureBlobFileSystem(account_name="XXXX", account_key="XXXX", container_name="XXXX")
+    table = pq.read_table("file.parquet", filesystem=abfs)
diff --git a/docs/source/python/plasma.rst b/docs/source/python/plasma.rst
index 25248c16a77..e373bd0a69d 100644
--- a/docs/source/python/plasma.rst
+++ b/docs/source/python/plasma.rst
@@ -21,9 +21,6 @@
 The Plasma In-Memory Object Store
 =================================
 
-.. contents:: Contents
-  :depth: 3
-
 .. note::
 
    As present, Plasma is only supported for use on Linux and macOS.
diff --git a/docs/source/status.rst b/docs/source/status.rst
index acf5af90d52..48084187ef9 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -79,7 +79,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
 | Struct            | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
-| Map               | ✓     | ✓     |       | ✓          |       |       | ✓     |
+| Map               | ✓     | ✓     | ✓     | ✓          |       |       | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
 | Dense Union       | ✓     | ✓     |       |            |       |       | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
@@ -92,7 +92,7 @@ Data Types
 +===================+=======+=======+=======+============+=======+=======+=======+
 | Dictionary        | ✓     | ✓ (1) |       | ✓ (1)      |       | ✓ (1) | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
-| Extension         | ✓     | ✓     |       |            |       |       | ✓     |
+| Extension         | ✓     | ✓     | ✓     |            |       |       | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
 
 Notes:
@@ -126,11 +126,11 @@ IPC Format
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | Sparse tensors              | ✓     |       |       |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Buffer compression          | ✓     | ✓ (3) | ✓    |            |       |       | ✓     |
+| Buffer compression          | ✓     | ✓ (3) | ✓     |            |       |       | ✓     |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | Endianness conversion       | ✓ (2) |       |       |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Custom schema metadata      | ✓     | ✓     |       |            |       |  ✓    | ✓     |
+| Custom schema metadata      | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 
 Notes:
@@ -156,13 +156,13 @@ Flight RPC
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | gRPC + TLS transport        | ✓     | ✓     | ✓     |            | ✓     |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| RPC error codes             | ✓     | ✓     |       |            | ✓     |       |       |
+| RPC error codes             | ✓     | ✓     | ✓     |            | ✓     |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | Authentication handlers     | ✓     | ✓     | ✓     |            | ✓ (2) |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Custom client middleware    | ✓     | ✓     |       |            |       |       |       |
+| Custom client middleware    | ✓     | ✓     | ✓     |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Custom server middleware    | ✓     | ✓     |       |            |       |       |       |
+| Custom server middleware    | ✓     | ✓     | ✓     |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 
 Notes:
@@ -221,9 +221,9 @@ Third-Party Data Formats
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
 | CSV                         | R       |         | R/W   |            |       | R/W     | R/W   |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
-| ORC                         | R       |         |       |            |       |         |       |
+| ORC                         | R/W     | R (2)   |       |            |       |         |       |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
-| Parquet                     | R/W     | R (2)   |       |            |       | R/W (1) |       |
+| Parquet                     | R/W     | R (3)   |       |            |       | R/W (1) |       |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
 
 Notes:
@@ -232,6 +232,8 @@ Notes:
 
 * *W* = Write supported
 
-* \(1) Nested read/write not supported
+* \(1) Nested read/write not supported.
 
-* \(2) Through JNI bindings to datasets.
+* \(2) Through JNI bindings. (Provided by ``org.apache.arrow.orc:arrow-orc``)
+
+* \(3) Through JNI bindings to Arrow C++ Datasets. (Provided by ``org.apache.arrow:arrow-dataset``)
diff --git a/format/Schema.fbs b/format/Schema.fbs
index 3b00dd4780d..2d447d30791 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -218,8 +218,33 @@ table Time {
 /// leap seconds, as a 64-bit integer. Note that UNIX time does not include
 /// leap seconds.
 ///
-/// The Timestamp metadata supports both "time zone naive" and "time zone
-/// aware" timestamps. Read about the timezone attribute for more detail
+/// Date & time libraries often have multiple different data types for temporal
+/// data.  In order to ease interoperability between different implementations the
+/// Arrow project has some recommendations for encoding these types into a Timestamp
+/// column.
+///
+/// An "instant" represents a single moment in time that has no meaningful time zone
+/// or the time zone is unknown.  A column of instants can also contain values from
+/// multiple time zones.  To encode an instant set the timezone string to "UTC".
+///
+/// A "zoned date-time" represents a single moment in time that has a meaningful
+/// reference time zone.  To encode a zoned date-time as a Timestamp set the timezone
+/// string to the name of the timezone.  There is some ambiguity between an instant
+/// and a zoned date-time with the UTC time zone.  Both of these are stored the same.
+/// Typically, this distinction does not matter.  If it does, then an application should
+/// use custom metadata or an extension type to distinguish between the two cases.
+///
+/// An "offset date-time" represents a single moment in time combined with a meaningful
+/// offset from UTC.  To encode an offset date-time as a Timestamp set the timezone string
+/// to the numeric time zone offset string (e.g. "+03:00").
+///
+/// A "local date-time" does not represent a single moment in time.  It represents a wall
+/// clock time combined with a date.  Because of daylight savings time there may multiple
+/// instants that correspond to a single local date-time in any given time zone.  A
+/// local date-time is often stored as a struct or a Date32/Time64 pair.  However, it can
+/// also be encoded into a Timestamp column.  To do so the value should be the the time
+/// elapsed from the Unix epoch so that a wall clock in UTC would display the desired time.
+/// The timezone string should be set to null or the empty string.
 table Timestamp {
   unit: TimeUnit;
 
@@ -232,11 +257,9 @@ table Timestamp {
   /// Whether a timezone string is present indicates different semantics about
   /// the data:
   ///
-  /// * If the time zone is null or equal to an empty string, the data is "time
-  ///   zone naive" and shall be displayed *as is* to the user, not localized
-  ///   to the locale of the user. This data can be though of as UTC but
-  ///   without having "UTC" as the time zone, it is not considered to be
-  ///   localized to any time zone
+  /// * If the time zone is null or an empty string, the data is a local date-time
+  ///   and does not represent a single moment in time.  Instead it represents a wall clock
+  ///   time and care should be taken to avoid interpreting it semantically as an instant.
   ///
   /// * If the time zone is set to a valid value, values can be displayed as
   ///   "localized" to that time zone, even though the underlying 64-bit
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index 9cbaef9ff12..cc2a1a3b983 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -197,8 +197,8 @@ func init() {
 		arrow.STRUCT:            func(data *Data) Interface { return NewStructData(data) },
 		arrow.UNION:             unsupportedArrayType,
 		arrow.DICTIONARY:        unsupportedArrayType,
-		arrow.MAP:               unsupportedArrayType,
-		arrow.EXTENSION:         unsupportedArrayType,
+		arrow.MAP:               func(data *Data) Interface { return NewMapData(data) },
+		arrow.EXTENSION:         func(data *Data) Interface { return NewExtensionData(data) },
 		arrow.FIXED_SIZE_LIST:   func(data *Data) Interface { return NewFixedSizeListData(data) },
 		arrow.DURATION:          func(data *Data) Interface { return NewDurationData(data) },
 
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index 48c238656b4..3a2101a368f 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -22,6 +22,7 @@ import (
 	"github.com/apache/arrow/go/arrow"
 	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
 	"github.com/apache/arrow/go/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
@@ -69,27 +70,35 @@ func TestMakeFromData(t *testing.T) {
 		{name: "decimal", d: &testDataType{arrow.DECIMAL}},
 
 		{name: "list", d: &testDataType{arrow.LIST}, child: []*array.Data{
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
 		}},
 
 		{name: "struct", d: &testDataType{arrow.STRUCT}},
 		{name: "struct", d: &testDataType{arrow.STRUCT}, child: []*array.Data{
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
 		}},
 
 		{name: "fixed_size_list", d: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int64), child: []*array.Data{
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
 		}},
 		{name: "duration", d: &testDataType{arrow.DURATION}},
 
+		{name: "map", d: &testDataType{arrow.MAP}, child: []*array.Data{
+			array.NewData(&testDataType{arrow.STRUCT}, 0 /* length */, make([]*memory.Buffer, 3 /*null bitmap, values, offsets*/), []*array.Data{
+				array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+				array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			}, 0 /* nulls */, 0 /* offset */)},
+		},
+
+		{name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"},
+		{name: "extension", d: types.NewUUIDType()},
+
 		// unsupported types
 		{name: "union", d: &testDataType{arrow.UNION}, expPanic: true, expError: "unsupported data type: UNION"},
 		{name: "dictionary", d: &testDataType{arrow.DICTIONARY}, expPanic: true, expError: "unsupported data type: DICTIONARY"},
-		{name: "map", d: &testDataType{arrow.Type(27)}, expPanic: true, expError: "unsupported data type: MAP"},
-		{name: "extension", d: &testDataType{arrow.Type(28)}, expPanic: true, expError: "unsupported data type: EXTENSION"},
 
 		// invalid types
 		{name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"},
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 0066e1dd1bd..378c6089222 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -277,7 +277,11 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
 	case arrow.UNION:
 	case arrow.DICTIONARY:
 	case arrow.MAP:
+		typ := dtype.(*arrow.MapType)
+		return NewMapBuilder(mem, typ.KeyType(), typ.ItemType(), typ.KeysSorted)
 	case arrow.EXTENSION:
+		typ := dtype.(arrow.ExtensionType)
+		return NewExtensionBuilder(mem, typ)
 	case arrow.FIXED_SIZE_LIST:
 		typ := dtype.(*arrow.FixedSizeListType)
 		return NewFixedSizeListBuilder(mem, typ.Len(), typ.Elem())
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index 537630dbffc..f0ff5ed3529 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -164,7 +164,12 @@ func ArrayEqual(left, right Interface) bool {
 	case *Duration:
 		r := right.(*Duration)
 		return arrayEqualDuration(l, r)
-
+	case *Map:
+		r := right.(*Map)
+		return arrayEqualMap(l, r)
+	case ExtensionArray:
+		r := right.(ExtensionArray)
+		return arrayEqualExtension(l, r)
 	default:
 		panic(xerrors.Errorf("arrow/array: unknown array type %T", l))
 	}
@@ -353,7 +358,12 @@ func arrayApproxEqual(left, right Interface, opt equalOption) bool {
 	case *Duration:
 		r := right.(*Duration)
 		return arrayEqualDuration(l, r)
-
+	case *Map:
+		r := right.(*Map)
+		return arrayApproxEqualList(l.List, r.List, opt)
+	case ExtensionArray:
+		r := right.(ExtensionArray)
+		return arrayApproxEqualExtension(l, r, opt)
 	default:
 		panic(xerrors.Errorf("arrow/array: unknown array type %T", l))
 	}
diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go
new file mode 100644
index 00000000000..d79103cc171
--- /dev/null
+++ b/go/arrow/array/extension.go
@@ -0,0 +1,236 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
+)
+
+// ExtensionArray is the interface that needs to be implemented to handle
+// user-defined extension type arrays. In order to ensure consistency and
+// proper behavior, all ExtensionArray types must embed ExtensionArrayBase
+// in order to meet the interface which provides the default implementation
+// and handling for the array while allowing custom behavior to be built
+// on top of it.
+type ExtensionArray interface {
+	Interface
+	// ExtensionType returns the datatype as per calling DataType(), but
+	// already cast to ExtensionType
+	ExtensionType() arrow.ExtensionType
+	// Storage returns the underlying storage array for this array.
+	Storage() Interface
+
+	// by having a non-exported function in the interface, it means that
+	// consumers must embed ExtensionArrayBase in their structs in order
+	// to fulfill this interface.
+	mustEmbedExtensionArrayBase()
+}
+
+// two extension arrays are equal if their data types are equal and
+// their underlying storage arrays are equal.
+func arrayEqualExtension(l, r ExtensionArray) bool {
+	if !arrow.TypeEqual(l.DataType(), r.DataType()) {
+		return false
+	}
+
+	return ArrayEqual(l.Storage(), r.Storage())
+}
+
+// two extension arrays are approximately equal if their data types are
+// equal and their underlying storage arrays are approximately equal.
+func arrayApproxEqualExtension(l, r ExtensionArray, opt equalOption) bool {
+	if !arrow.TypeEqual(l.DataType(), r.DataType()) {
+		return false
+	}
+
+	return arrayApproxEqual(l.Storage(), r.Storage(), opt)
+}
+
+// NewExtensionArrayWithStorage constructs a new ExtensionArray from the provided
+// ExtensionType and uses the provided storage interface as the underlying storage.
+// This will not release the storage array passed in so consumers should call Release
+// on it manually while the new Extension array will share references to the underlying
+// Data buffers.
+func NewExtensionArrayWithStorage(dt arrow.ExtensionType, storage Interface) Interface {
+	if !arrow.TypeEqual(dt.StorageType(), storage.DataType()) {
+		panic(xerrors.Errorf("arrow/array: storage type %s for extension type %s, does not match expected type %s", storage.DataType(), dt.ExtensionName(), dt.StorageType()))
+	}
+
+	base := ExtensionArrayBase{}
+	base.refCount = 1
+	base.storage = storage
+	storage.Retain()
+
+	storageData := storage.Data()
+	// create a new data instance with the ExtensionType as the datatype but referencing the
+	// same underlying buffers to share them with the storage array.
+	baseData := NewData(dt, storageData.length, storageData.buffers, storageData.childData, storageData.nulls, storageData.offset)
+	defer baseData.Release()
+	base.array.setData(baseData)
+
+	// use the ExtensionType's ArrayType to construct the correctly typed object
+	// to use as the ExtensionArray interface. reflect.New returns a pointer to
+	// the newly created object.
+	arr := reflect.New(base.ExtensionType().ArrayType())
+	// set the embedded ExtensionArrayBase to the value we created above. We know
+	// that this field will exist because the interface requires embedding ExtensionArrayBase
+	// so we don't have to separately check, this will panic if called on an ArrayType
+	// that doesn't embed ExtensionArrayBase which is what we want.
+	arr.Elem().FieldByName("ExtensionArrayBase").Set(reflect.ValueOf(base))
+	return arr.Interface().(ExtensionArray)
+}
+
+// NewExtensionData expects a data with a datatype of arrow.ExtensionType and
+// underlying data built for the storage array.
+func NewExtensionData(data *Data) ExtensionArray {
+	base := ExtensionArrayBase{}
+	base.refCount = 1
+	base.setData(data)
+
+	// use the ExtensionType's ArrayType to construct the correctly typed object
+	// to use as the ExtensionArray interface. reflect.New returns a pointer to
+	// the newly created object.
+	arr := reflect.New(base.ExtensionType().ArrayType())
+	// set the embedded ExtensionArrayBase to the value we created above. We know
+	// that this field will exist because the interface requires embedding ExtensionArrayBase
+	// so we don't have to separately check, this will panic if called on an ArrayType
+	// that doesn't embed ExtensionArrayBase which is what we want.
+	arr.Elem().FieldByName("ExtensionArrayBase").Set(reflect.ValueOf(base))
+	return arr.Interface().(ExtensionArray)
+}
+
+// ExtensionArrayBase is the base struct for user-defined Extension Array types
+// and must be embedded in any user-defined extension arrays like so:
+//
+//   type UserDefinedArray struct {
+//       array.ExtensionArrayBase
+//   }
+//
+type ExtensionArrayBase struct {
+	array
+	storage Interface
+}
+
+// Retain increases the reference count by 1.
+// Retain may be called simultaneously from multiple goroutines.
+func (e *ExtensionArrayBase) Retain() {
+	e.array.Retain()
+	e.storage.Retain()
+}
+
+// Release decreases the reference count by 1.
+// Release may be called simultaneously from multiple goroutines.
+// When the reference count goes to zero, the memory is freed.
+func (e *ExtensionArrayBase) Release() {
+	e.array.Release()
+	e.storage.Release()
+}
+
+// Storage returns the underlying storage array
+func (e *ExtensionArrayBase) Storage() Interface { return e.storage }
+
+// ExtensionType returns the same thing as DataType, just already casted
+// to an ExtensionType interface for convenience.
+func (e *ExtensionArrayBase) ExtensionType() arrow.ExtensionType {
+	return e.DataType().(arrow.ExtensionType)
+}
+
+func (e *ExtensionArrayBase) setData(data *Data) {
+	if data.DataType().ID() != arrow.EXTENSION {
+		panic("arrow/array: must use extension type to construct an extension array")
+	}
+	extType, ok := data.dtype.(arrow.ExtensionType)
+	if !ok {
+		panic("arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType")
+	}
+
+	e.array.setData(data)
+	// our underlying storage needs to reference the same data buffers (no copying)
+	// but should have the storage type's datatype, so we create a Data for it.
+	storageData := NewData(extType.StorageType(), data.length, data.buffers, data.childData, data.nulls, data.offset)
+	defer storageData.Release()
+	e.storage = MakeFromData(storageData)
+}
+
+// no-op function that exists simply to force embedding this in any extension array types.
+func (ExtensionArrayBase) mustEmbedExtensionArrayBase() {}
+
+// ExtensionBuilder is a convenience builder so that NewBuilder and such will still work
+// with extension types properly. Depending on preference it may be cleaner or easier to just use
+// NewExtensionArrayWithStorage and pass a storage array.
+//
+// That said, this allows easily building an extension array by providing the extension
+// type and retrieving the storage builder.
+type ExtensionBuilder struct {
+	Builder
+	dt arrow.ExtensionType
+}
+
+// NewExtensionBuilder returns a builder using the provided memory allocator for the desired
+// extension type. It will internally construct a builder of the storage type for the extension
+// type and keep a copy of the extension type. The underlying type builder can then be retrieved
+// by calling `StorageBuilder` on this and then type asserting it to the desired builder type.
+//
+// After using the storage builder, calling NewArray or NewExtensionArray will construct
+// the appropriate extension array type and set the storage correctly, resetting the builder for
+// reuse.
+//
+// Example
+//
+// Simple example assuming an extension type of a UUID defined as a FixedSizeBinary(16) was registered
+// using the type name "uuid":
+//
+//   uuidType := arrow.GetExtensionType("uuid")
+//   bldr := array.NewExtensionBuilder(memory.DefaultAllocator, uuidType)
+//   defer bldr.Release()
+//   uuidBldr := bldr.StorageBuilder().(*array.FixedSizeBinaryBuilder)
+//   /* build up the fixed size binary array as usual via Append/AppendValues */
+//   uuidArr := bldr.NewExtensionArray()
+//   defer uuidArr.Release()
+//
+// Because the storage builder is embedded in the Extension builder it also means
+// that any of the functions available on the Builder interface can be called on
+// an instance of ExtensionBuilder and will respond appropriately as the storage
+// builder would for generically grabbing the Lenth, Cap, Nulls, reserving, etc.
+func NewExtensionBuilder(mem memory.Allocator, dt arrow.ExtensionType) *ExtensionBuilder {
+	return &ExtensionBuilder{Builder: NewBuilder(mem, dt.StorageType()), dt: dt}
+}
+
+// StorageBuilder returns the builder for the underlying storage type.
+func (b *ExtensionBuilder) StorageBuilder() Builder { return b.Builder }
+
+// NewArray creates a new array from the memory buffers used by the builder
+// and resets the builder so it can be used to build a new array.
+func (b *ExtensionBuilder) NewArray() Interface {
+	return b.NewExtensionArray()
+}
+
+// NewExtensionArray creates an Extension array from the memory buffers used
+// by the builder and resets the ExtensionBuilder so it can be used to build
+// a new ExtensionArray of the same type.
+func (b *ExtensionBuilder) NewExtensionArray() ExtensionArray {
+	storage := b.Builder.NewArray()
+	defer storage.Release()
+
+	data := NewData(b.dt, storage.Len(), storage.Data().buffers, storage.Data().childData, storage.Data().nulls, 0)
+	defer data.Release()
+	return NewExtensionData(data)
+}
diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go
new file mode 100644
index 00000000000..c053d38872f
--- /dev/null
+++ b/go/arrow/array/extension_test.go
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/stretchr/testify/suite"
+)
+
+type ExtensionTypeTestSuite struct {
+	suite.Suite
+}
+
+func (e *ExtensionTypeTestSuite) SetupTest() {
+	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
+}
+
+func (e *ExtensionTypeTestSuite) TearDownTest() {
+	if arrow.GetExtensionType("uuid") != nil {
+		e.NoError(arrow.UnregisterExtensionType("uuid"))
+	}
+}
+
+func (e *ExtensionTypeTestSuite) TestParametricEquals() {
+	p1Type := types.NewParametric1Type(6)
+	p2Type := types.NewParametric1Type(6)
+	p3Type := types.NewParametric1Type(3)
+
+	e.True(arrow.TypeEqual(p1Type, p2Type))
+	e.False(arrow.TypeEqual(p1Type, p3Type))
+}
+
+func exampleParametric(mem memory.Allocator, dt arrow.DataType, vals []int32, valid []bool) array.Interface {
+	bldr := array.NewBuilder(mem, dt)
+	defer bldr.Release()
+
+	exb := bldr.(*array.ExtensionBuilder)
+	sb := exb.StorageBuilder().(*array.Int32Builder)
+	sb.AppendValues(vals, valid)
+
+	return bldr.NewArray()
+}
+
+func (e *ExtensionTypeTestSuite) TestParametricArrays() {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(e.T(), 0)
+
+	p1Type := types.NewParametric1Type(6)
+	p1 := exampleParametric(pool, p1Type, []int32{-1, 1, 2, 3}, []bool{false, true, true, true})
+	defer p1.Release()
+
+	p2Type := types.NewParametric1Type(12)
+	p2 := exampleParametric(pool, p2Type, []int32{2, -1, 3, 4}, []bool{true, false, true, true})
+	defer p2.Release()
+
+	p3Type := types.NewParametric2Type(2)
+	p3 := exampleParametric(pool, p3Type, []int32{5, 6, 7, 8}, nil)
+	defer p3.Release()
+
+	p4Type := types.NewParametric2Type(3)
+	p4 := exampleParametric(pool, p4Type, []int32{5, 6, 7, 9}, nil)
+	defer p4.Release()
+
+	rb := array.NewRecord(arrow.NewSchema([]arrow.Field{
+		{Name: "f0", Type: p1Type, Nullable: true},
+		{Name: "f1", Type: p2Type, Nullable: true},
+		{Name: "f2", Type: p3Type, Nullable: true},
+		{Name: "f3", Type: p4Type, Nullable: true},
+	}, nil), []array.Interface{p1, p2, p3, p4}, -1)
+	defer rb.Release()
+
+	e.True(array.RecordEqual(rb, rb))
+}
+
+func TestExtensionTypes(t *testing.T) {
+	suite.Run(t, new(ExtensionTypeTestSuite))
+}
diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go
new file mode 100644
index 00000000000..16fc5e91bbd
--- /dev/null
+++ b/go/arrow/array/map.go
@@ -0,0 +1,272 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array // import "github.com/apache/arrow/go/arrow/array"
+
+import (
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+)
+
+// Map represents an immutable sequence of Key/Value structs. It is a
+// logical type that is implemented as a List<Struct: key, value>.
+type Map struct {
+	*List
+	keys, items Interface
+}
+
+// NewMapData returns a new Map array value, from data
+func NewMapData(data *Data) *Map {
+	a := &Map{List: &List{}}
+	a.refCount = 1
+	a.setData(data)
+	return a
+}
+
+// KeysSorted checks the datatype that was used to construct this array and
+// returns the KeysSorted boolean value used to denote if the key array is
+// sorted for each list element.
+//
+// Important note: Nothing is enforced regarding the KeysSorted value, it is
+// solely a metadata field that should be set if keys within each value are sorted.
+// This value is not used at all in regards to comparisons / equality.
+func (a *Map) KeysSorted() bool { return a.DataType().(*arrow.MapType).KeysSorted }
+
+func (a *Map) validateData(data *Data) {
+	if len(data.childData) != 1 || data.childData[0] == nil {
+		panic("arrow/array: expected one child array for map array")
+	}
+
+	if data.childData[0].dtype.ID() != arrow.STRUCT {
+		panic("arrow/array: map array child should be struct type")
+	}
+
+	if data.childData[0].NullN() != 0 {
+		panic("arrow/array: map array child array should have no nulls")
+	}
+
+	if len(data.childData[0].childData) != 2 {
+		panic("arrow/array: map array child array should have two fields")
+	}
+
+	if data.childData[0].childData[0].NullN() != 0 {
+		panic("arrow/array: map array keys array should have no nulls")
+	}
+}
+
+func (a *Map) setData(data *Data) {
+	a.validateData(data)
+
+	a.List.setData(data)
+	a.keys = MakeFromData(data.childData[0].childData[0])
+	a.items = MakeFromData(data.childData[0].childData[1])
+}
+
+// Keys returns the full Array of Key values, equivalent to grabbing
+// the key field of the child struct.
+func (a *Map) Keys() Interface { return a.keys }
+
+// Items returns the full Array of Item values, equivalent to grabbing
+// the Value field (the second field) of the child struct.
+func (a *Map) Items() Interface { return a.items }
+
+// Retain increases the reference count by 1.
+// Retain may be called simultaneously from multiple goroutines.
+func (a *Map) Retain() {
+	a.List.Retain()
+	a.keys.Retain()
+	a.items.Retain()
+}
+
+// Release decreases the reference count by 1.
+// Release may be called simultaneously from multiple goroutines.
+// When the reference count goes to zero, the memory is freed.
+func (a *Map) Release() {
+	a.List.Release()
+	a.keys.Release()
+	a.items.Release()
+}
+
+func arrayEqualMap(left, right *Map) bool {
+	// since Map is implemented using a list, we can just use arrayEqualList
+	return arrayEqualList(left.List, right.List)
+}
+
+type MapBuilder struct {
+	listBuilder *ListBuilder
+
+	etype                   arrow.DataType
+	keytype, itemtype       arrow.DataType
+	keyBuilder, itemBuilder Builder
+	keysSorted              bool
+}
+
+// NewMapBuilder returns a builder, using the provided memory allocator.
+// The created Map builder will create a map array whose keys will be a non-nullable
+// array of type `keytype` and whose mapped items will be a nullable array of itemtype.
+//
+// KeysSorted is not enforced at all by the builder, it should only be set to true
+// building using keys in sorted order for each value. The KeysSorted value will just be
+// used when creating the DataType for the map.
+//
+// Example
+//
+// Simple example provided of converting a []map[string]int32 to an array.Map
+// by using a MapBuilder:
+//
+//   /* assume maplist == []map[string]int32 */
+//   bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false)
+//   defer bldr.Release()
+//   kb := bldr.KeyBuilder().(*array.StringBuilder)
+//   ib := bldr.ItemBuilder().(*array.Int32Builder)
+//   for _, m := range maplist {
+//       bldr.Append(true)
+//       for k, v := range m {
+//            kb.Append(k)
+//            ib.Append(v)
+//       }
+//   }
+//   maparr := bldr.NewMapArray()
+//   defer maparr.Release()
+//
+func NewMapBuilder(mem memory.Allocator, keytype, itemtype arrow.DataType, keysSorted bool) *MapBuilder {
+	etype := arrow.MapOf(keytype, itemtype)
+	etype.KeysSorted = keysSorted
+	listBldr := NewListBuilder(mem, etype.ValueType())
+	keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0)
+	keyBldr.Retain()
+	itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1)
+	itemBldr.Retain()
+	return &MapBuilder{
+		listBuilder: listBldr,
+		keyBuilder:  keyBldr,
+		itemBuilder: itemBldr,
+		etype:       etype,
+		keytype:     keytype,
+		itemtype:    itemtype,
+		keysSorted:  keysSorted,
+	}
+}
+
+// Retain increases the reference count by 1 for the sub-builders (list, key, item).
+// Retain may be called simultaneously from multiple goroutines.
+func (b *MapBuilder) Retain() {
+	b.listBuilder.Retain()
+	b.keyBuilder.Retain()
+	b.itemBuilder.Retain()
+}
+
+// Release decreases the reference count by 1 for the sub builders (list, key, item).
+func (b *MapBuilder) Release() {
+	b.listBuilder.Release()
+	b.keyBuilder.Release()
+	b.itemBuilder.Release()
+}
+
+// Len returns the current number of Maps that are in the builder
+func (b *MapBuilder) Len() int { return b.listBuilder.Len() }
+
+// Cap returns the total number of elements that can be stored
+// without allocating additional memory.
+func (b *MapBuilder) Cap() int { return b.listBuilder.Cap() }
+
+// NullN returns the number of null values in the array builder.
+func (b *MapBuilder) NullN() int { return b.listBuilder.NullN() }
+
+// Append adds a new Map element to the array, calling Append(false) is
+// equivalent to calling AppendNull.
+func (b *MapBuilder) Append(v bool) {
+	b.adjustStructBuilderLen()
+	b.listBuilder.Append(v)
+}
+
+// AppendNull adds a null map entry to the array.
+func (b *MapBuilder) AppendNull() {
+	b.Append(false)
+}
+
+// Reserve enough space for n maps
+func (b *MapBuilder) Reserve(n int) { b.listBuilder.Reserve(n) }
+
+// Resize adjust the space allocated by b to n map elements. If n is greater than
+// b.Cap(), additional memory will be allocated. If n is smaller, the allocated memory may be reduced.
+func (b *MapBuilder) Resize(n int) { b.listBuilder.Resize(n) }
+
+// AppendValues is for bulk appending a group of elements with offsets provided
+// and validity booleans provided.
+func (b *MapBuilder) AppendValues(offsets []int32, valid []bool) {
+	b.adjustStructBuilderLen()
+	b.listBuilder.AppendValues(offsets, valid)
+}
+
+func (b *MapBuilder) init(capacity int)                  { b.listBuilder.init(capacity) }
+func (b *MapBuilder) resize(newBits int, init func(int)) { b.listBuilder.resize(newBits, init) }
+
+func (b *MapBuilder) adjustStructBuilderLen() {
+	sb := b.listBuilder.ValueBuilder().(*StructBuilder)
+	if sb.Len() < b.keyBuilder.Len() {
+		valids := make([]bool, b.keyBuilder.Len()-sb.Len())
+		for i := range valids {
+			valids[i] = true
+		}
+		sb.AppendValues(valids)
+	}
+}
+
+// NewArray creates a new Map array from the memory buffers used by the builder, and
+// resets the builder so it can be used again to build a new Map array.
+func (b *MapBuilder) NewArray() Interface {
+	return b.NewMapArray()
+}
+
+// NewMapArray creates a new Map array from the memory buffers used by the builder, and
+// resets the builder so it can be used again to build a new Map array.
+func (b *MapBuilder) NewMapArray() (a *Map) {
+	data := b.newData()
+	defer data.Release()
+	a = NewMapData(data)
+	return
+}
+
+func (b *MapBuilder) newData() (data *Data) {
+	b.adjustStructBuilderLen()
+	values := b.listBuilder.NewListArray()
+	defer values.Release()
+
+	data = NewData(b.etype,
+		values.Len(), values.data.buffers,
+		values.data.childData, values.NullN(), 0)
+	return
+}
+
+// KeyBuilder returns a builder that can be used to populate the keys of the maps.
+func (b *MapBuilder) KeyBuilder() Builder { return b.keyBuilder }
+
+// ItemBuilder returns a builder that can be used to populate the values that the
+// keys point to.
+func (b *MapBuilder) ItemBuilder() Builder { return b.itemBuilder }
+
+// ValueBuilder can be used instead of separately using the Key/Item builders
+// to build the list as a List of Structs rather than building the keys/items
+// separately.
+func (b *MapBuilder) ValueBuilder() *StructBuilder {
+	return b.listBuilder.ValueBuilder().(*StructBuilder)
+}
+
+var (
+	_ Interface = (*Map)(nil)
+	_ Builder   = (*MapBuilder)(nil)
+)
diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go
new file mode 100644
index 00000000000..9c961555822
--- /dev/null
+++ b/go/arrow/array/map_test.go
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestMapArray(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	var (
+		arr, equalArr, unequalArr *array.Map
+
+		equalValid     = []bool{true, true, true, true, true, true, true}
+		equalOffsets   = []int32{0, 1, 2, 5, 6, 7, 8, 10}
+		equalKeys      = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"}
+		equalValues    = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6}
+		unequalValid   = []bool{true, true, true}
+		unequalOffsets = []int32{0, 1, 4, 7}
+		unequalKeys    = []string{"a", "a", "b", "c", "a", "b", "c"}
+		unequalValues  = []int32{1, 2, 2, 2, 3, 4, 5}
+	)
+
+	bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false)
+	defer bldr.Release()
+
+	kb := bldr.KeyBuilder().(*array.StringBuilder)
+	ib := bldr.ItemBuilder().(*array.Int32Builder)
+
+	bldr.AppendValues(equalOffsets, equalValid)
+	for _, k := range equalKeys {
+		kb.Append(k)
+	}
+	ib.AppendValues(equalValues, nil)
+
+	assert.Equal(t, len(equalValid), bldr.Len())
+	assert.Zero(t, bldr.NullN())
+
+	arr = bldr.NewMapArray()
+	defer arr.Release()
+
+	bldr.AppendValues(equalOffsets, equalValid)
+	for _, k := range equalKeys {
+		kb.Append(k)
+	}
+	ib.AppendValues(equalValues, nil)
+
+	equalArr = bldr.NewMapArray()
+	defer equalArr.Release()
+
+	bldr.AppendValues(unequalOffsets, unequalValid)
+	for _, k := range unequalKeys {
+		kb.Append(k)
+	}
+	ib.AppendValues(unequalValues, nil)
+
+	unequalArr = bldr.NewMapArray()
+	defer unequalArr.Release()
+
+	assert.True(t, array.ArrayEqual(arr, arr))
+	assert.True(t, array.ArrayEqual(arr, equalArr))
+	assert.True(t, array.ArrayEqual(equalArr, arr))
+	assert.False(t, array.ArrayEqual(equalArr, unequalArr))
+	assert.False(t, array.ArrayEqual(unequalArr, equalArr))
+
+	assert.True(t, array.ArraySliceEqual(arr, 0, 1, unequalArr, 0, 1))
+	assert.False(t, array.ArraySliceEqual(arr, 0, 2, unequalArr, 0, 2))
+	assert.False(t, array.ArraySliceEqual(arr, 1, 2, unequalArr, 1, 2))
+	assert.True(t, array.ArraySliceEqual(arr, 2, 3, unequalArr, 2, 3))
+}
+
+func TestMapArrayBuildIntToInt(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	var (
+		dtype      = arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16)
+		keys       = []int16{0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5}
+		items      = []int16{1, 1, 2, 3, 5, 8, -1, -1, 0, 1, -1, 2}
+		validItems = []bool{true, true, true, true, true, true, false, false, true, true, false, true}
+		offsets    = []int32{0, 6, 6, 12, 12}
+		validMaps  = []bool{true, false, true, true}
+	)
+
+	bldr := array.NewBuilder(pool, dtype).(*array.MapBuilder)
+	defer bldr.Release()
+
+	bldr.Reserve(len(validMaps))
+
+	kb := bldr.KeyBuilder().(*array.Int16Builder)
+	ib := bldr.ItemBuilder().(*array.Int16Builder)
+
+	bldr.Append(true)
+	kb.AppendValues(keys[:6], nil)
+	ib.AppendValues(items[:6], nil)
+
+	bldr.AppendNull()
+	bldr.Append(true)
+	kb.AppendValues(keys[6:], nil)
+	ib.AppendValues(items[6:], []bool{false, false, true, true, false, true})
+
+	bldr.Append(true)
+	arr := bldr.NewArray().(*array.Map)
+	defer arr.Release()
+
+	assert.Equal(t, arrow.MAP, arr.DataType().ID())
+	assert.EqualValues(t, len(validMaps), arr.Len())
+
+	for i, ex := range validMaps {
+		assert.Equal(t, ex, arr.IsValid(i))
+		assert.Equal(t, !ex, arr.IsNull(i))
+	}
+
+	assert.Equal(t, offsets, arr.Offsets())
+	assert.Equal(t, keys, arr.Keys().(*array.Int16).Int16Values())
+
+	itemArr := arr.Items().(*array.Int16)
+	for i, ex := range validItems {
+		if ex {
+			assert.True(t, itemArr.IsValid(i))
+			assert.False(t, itemArr.IsNull(i))
+			assert.Equal(t, items[i], itemArr.Value(i))
+		} else {
+			assert.False(t, itemArr.IsValid(i))
+			assert.True(t, itemArr.IsNull(i))
+		}
+	}
+
+	assert.Equal(t, "[{[0 1 2 3 4 5] [1 1 2 3 5 8]} (null) {[0 1 2 3 4 5] [(null) (null) 0 1 (null) 2]} {[] []}]", arr.String())
+}
diff --git a/go/arrow/compare.go b/go/arrow/compare.go
index c2ca4e32141..5acfd940a59 100644
--- a/go/arrow/compare.go
+++ b/go/arrow/compare.go
@@ -46,34 +46,45 @@ func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool {
 
 	switch {
 	case left == nil || right == nil:
-		return false
+		return left == nil && right == nil
 	case left.ID() != right.ID():
 		return false
 	}
 
-	// StructType is the only type that has metadata.
-	l, ok := left.(*StructType)
-	if !ok || cfg.metadata {
-		return reflect.DeepEqual(left, right)
-	}
-
-	r := right.(*StructType)
-	switch {
-	case len(l.fields) != len(r.fields):
-		return false
-	case !reflect.DeepEqual(l.index, r.index):
-		return false
-	}
-	for i := range l.fields {
-		leftField, rightField := l.fields[i], r.fields[i]
-		switch {
-		case leftField.Name != rightField.Name:
+	switch l := left.(type) {
+	case ExtensionType:
+		return l.ExtensionEquals(right.(ExtensionType))
+	case *ListType:
+		if !TypeEqual(l.Elem(), right.(*ListType).Elem(), opts...) {
 			return false
-		case leftField.Nullable != rightField.Nullable:
+		}
+		if cfg.metadata {
+			return l.Meta.Equal(right.(*ListType).Meta)
+		}
+		return true
+	case *StructType:
+		r := right.(*StructType)
+		switch {
+		case len(l.fields) != len(r.fields):
 			return false
-		case !TypeEqual(leftField.Type, rightField.Type, opts...):
+		case !reflect.DeepEqual(l.index, r.index):
 			return false
 		}
+		for i := range l.fields {
+			leftField, rightField := l.fields[i], r.fields[i]
+			switch {
+			case leftField.Name != rightField.Name:
+				return false
+			case leftField.Nullable != rightField.Nullable:
+				return false
+			case !TypeEqual(leftField.Type, rightField.Type, opts...):
+				return false
+			case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata):
+				return false
+			}
+		}
+		return true
+	default:
+		return reflect.DeepEqual(left, right)
 	}
-	return true
 }
diff --git a/go/arrow/compare_test.go b/go/arrow/compare_test.go
index 89112de9aea..9123036285b 100644
--- a/go/arrow/compare_test.go
+++ b/go/arrow/compare_test.go
@@ -27,7 +27,7 @@ func TestTypeEqual(t *testing.T) {
 		checkMetadata bool
 	}{
 		{
-			nil, nil, false, false,
+			nil, nil, true, false,
 		},
 		{
 			nil, PrimitiveTypes.Uint8, false, false,
@@ -69,25 +69,25 @@ func TestTypeEqual(t *testing.T) {
 			&TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Nanosecond, TimeZone: "CET"}, false, false,
 		},
 		{
-			&ListType{PrimitiveTypes.Uint64}, &ListType{PrimitiveTypes.Uint64}, true, false,
+			&ListType{elem: PrimitiveTypes.Uint64}, &ListType{elem: PrimitiveTypes.Uint64}, true, false,
 		},
 		{
-			&ListType{PrimitiveTypes.Uint64}, &ListType{PrimitiveTypes.Uint32}, false, false,
+			&ListType{elem: PrimitiveTypes.Uint64}, &ListType{elem: PrimitiveTypes.Uint32}, false, false,
 		},
 		{
-			&ListType{&Time32Type{Unit: Millisecond}}, &ListType{&Time32Type{Unit: Millisecond}}, true, false,
+			&ListType{elem: &Time32Type{Unit: Millisecond}}, &ListType{elem: &Time32Type{Unit: Millisecond}}, true, false,
 		},
 		{
-			&ListType{&Time32Type{Unit: Millisecond}}, &ListType{&Time32Type{Unit: Second}}, false, false,
+			&ListType{elem: &Time32Type{Unit: Millisecond}}, &ListType{elem: &Time32Type{Unit: Second}}, false, false,
 		},
 		{
-			&ListType{&ListType{PrimitiveTypes.Uint16}}, &ListType{&ListType{PrimitiveTypes.Uint16}}, true, false,
+			&ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}, &ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}, true, false,
 		},
 		{
-			&ListType{&ListType{PrimitiveTypes.Uint16}}, &ListType{&ListType{PrimitiveTypes.Uint8}}, false, false,
+			&ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}, &ListType{elem: &ListType{elem: PrimitiveTypes.Uint8}}, false, false,
 		},
 		{
-			&ListType{&ListType{&ListType{PrimitiveTypes.Uint16}}}, &ListType{&ListType{PrimitiveTypes.Uint8}}, false, false,
+			&ListType{elem: &ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}}, &ListType{elem: &ListType{elem: PrimitiveTypes.Uint8}}, false, false,
 		},
 		{
 			&StructType{
@@ -222,7 +222,7 @@ func TestTypeEqual(t *testing.T) {
 					Field{Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false},
 				},
 				index: map[string]int{"f1": 0, "f2": 1},
-				meta:  MetadataFrom(map[string]string{"k1": "v1"}),
+				meta:  MetadataFrom(map[string]string{"k1": "v1", "k2": "v2"}),
 			},
 			&StructType{
 				fields: []Field{
@@ -230,7 +230,7 @@ func TestTypeEqual(t *testing.T) {
 					Field{Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false},
 				},
 				index: map[string]int{"f1": 0, "f2": 1},
-				meta:  MetadataFrom(map[string]string{"k1": "v1"}),
+				meta:  MetadataFrom(map[string]string{"k2": "v2", "k1": "v1"}),
 			},
 			true, true,
 		},
diff --git a/go/arrow/datatype_extension.go b/go/arrow/datatype_extension.go
new file mode 100644
index 00000000000..52e68870f84
--- /dev/null
+++ b/go/arrow/datatype_extension.go
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow
+
+import (
+	"fmt"
+	"reflect"
+	"sync"
+
+	"golang.org/x/xerrors"
+)
+
+var (
+	// global extension type registry, initially left null to avoid paying
+	// the cost if no extension types are used.
+	// the choice to use a sync.Map here is because it's expected that most
+	// use cases would be to register some number of types at initialization
+	// or otherwise and leave them rather than a pattern of repeatedly registering
+	// and unregistering types. As per the documentation for sync.Map
+	// (https://pkg.go.dev/sync#Map), it is specialized for the case where an entry
+	// is written once but read many times which fits our case here as we register
+	// a type once and then have to read it many times when deserializing messages
+	// with that type.
+	extTypeRegistry *sync.Map
+	// used for initializing the registry once and only once
+	initReg sync.Once
+)
+
+// convenience function to ensure that the type registry is initialized once
+// and only once in a goroutine-safe manner.
+func getExtTypeRegistry() *sync.Map {
+	initReg.Do(func() { extTypeRegistry = &sync.Map{} })
+	return extTypeRegistry
+}
+
+// RegisterExtensionType registers the provided ExtensionType by calling ExtensionName
+// to use as a Key for registrying the type. If a type with the same name is already
+// registered then this will return an error saying so, otherwise it will return nil
+// if successful registering the type.
+// This function is safe to call from multiple goroutines simultaneously.
+func RegisterExtensionType(typ ExtensionType) error {
+	name := typ.ExtensionName()
+	registry := getExtTypeRegistry()
+	if _, existed := registry.LoadOrStore(name, typ); existed {
+		return xerrors.Errorf("arrow: type extension with name %s already defined", name)
+	}
+	return nil
+}
+
+// UnregisterExtensionType removes the type with the given name from the registry
+// causing any messages with that type which come in to be expressed with their
+// metadata and underlying type instead of the extension type that isn't known.
+// This function is safe to call from multiple goroutines simultaneously.
+func UnregisterExtensionType(typName string) error {
+	registry := getExtTypeRegistry()
+	if _, loaded := registry.LoadAndDelete(typName); !loaded {
+		return xerrors.Errorf("arrow: no type extension with name %s found", typName)
+	}
+	return nil
+}
+
+// GetExtensionType retrieves and returns the extension type of the given name
+// from the global extension type registry. If the type isn't found it will return
+// nil. This function is safe to call from multiple goroutines concurrently.
+func GetExtensionType(typName string) ExtensionType {
+	registry := getExtTypeRegistry()
+	if val, ok := registry.Load(typName); ok {
+		return val.(ExtensionType)
+	}
+	return nil
+}
+
+// ExtensionType is an interface for handling user-defined types. They must be
+// DataTypes and must embed arrow.ExtensionBase in them in order to work properly
+// ensuring that they always have the expected base behavior.
+//
+// The arrow.ExtensionBase that needs to be embedded implements the DataType interface
+// leaving the remaining functions having to be implemented by the actual user-defined
+// type in order to be handled properly.
+type ExtensionType interface {
+	DataType
+	// ArrayType should return the reflect.TypeOf(ExtensionArrayType{}) where the
+	// ExtensionArrayType is a type that implements the array.ExtensionArray interface.
+	// Such a type must also embed the array.ExtensionArrayBase in it. This will be used
+	// when creating arrays of this ExtensionType by using reflect.New
+	ArrayType() reflect.Type
+	// ExtensionName is what will be used when registering / unregistering this extension
+	// type. Multiple user-defined types can be defined with a parameterized ExtensionType
+	// as long as the parameter is used in the ExtensionName to distinguish the instances
+	// in the global Extension Type registry.
+	// The return from this is also what will be placed in the metadata for IPC communication
+	// under the key ARROW:extension:name
+	ExtensionName() string
+	// StorageType returns the underlying storage type which is used by this extension
+	// type. It is already implemented by the ExtensionBase struct and thus does not need
+	// to be re-implemented by a user-defined type.
+	StorageType() DataType
+	// ExtensionEquals is used to tell whether two ExtensionType instances are equal types.
+	ExtensionEquals(ExtensionType) bool
+	// Serialize should produce any extra metadata necessary for initializing an instance of
+	// this user-defined type. Not all user-defined types require this and it is valid to return
+	// nil from this function or an empty slice. This is used for the IPC format and will be
+	// added to metadata for IPC communication under the key ARROW:extension:metadata
+	// This should be implemented such that it is valid to be called by multiple goroutines
+	// concurrently.
+	Serialize() string
+	// Deserialize is called when reading in extension arrays and types via the IPC format
+	// in order to construct an instance of the appropriate extension type. The data passed in
+	// is pulled from the ARROW:extension:metadata key and may be nil or an empty slice.
+	// If the storage type is incorrect or something else is invalid with the data this should
+	// return nil and an appropriate error.
+	Deserialize(storageType DataType, data string) (ExtensionType, error)
+
+	mustEmbedExtensionBase()
+}
+
+// ExtensionBase is the base struct for user-defined Extension Types which must be
+// embedded in any user-defined types like so:
+//
+//     type UserDefinedType struct {
+//         arrow.ExtensionBase
+//         // any other data
+//     }
+//
+type ExtensionBase struct {
+	// Storage is the underlying storage type
+	Storage DataType
+}
+
+// ID always returns arrow.EXTENSION and should not be overridden
+func (*ExtensionBase) ID() Type { return EXTENSION }
+
+// Name should always return "extension" and should not be overridden
+func (*ExtensionBase) Name() string { return "extension" }
+
+// String by default will return "extension_type<storage=storage_type>" by can be overridden
+// to customize what is printed out when printing this extension type.
+func (e *ExtensionBase) String() string { return fmt.Sprintf("extension_type<storage=%s>", e.Storage) }
+
+// StorageType returns the underlying storage type and exists so that functions
+// written against the ExtensionType interface can access the storage type.
+func (e *ExtensionBase) StorageType() DataType { return e.Storage }
+
+// this no-op exists to ensure that this type must be embedded in any user-defined extension type.
+func (ExtensionBase) mustEmbedExtensionBase() {}
+
+var (
+	_ DataType = (*ExtensionBase)(nil)
+)
diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go
new file mode 100644
index 00000000000..1963d79fad1
--- /dev/null
+++ b/go/arrow/datatype_extension_test.go
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+type BadExtensionType struct{}
+
+func (BadExtensionType) ID() arrow.Type                           { return arrow.EXTENSION }
+func (BadExtensionType) ArrayType() reflect.Type                  { return nil }
+func (BadExtensionType) Name() string                             { return "bad" }
+func (BadExtensionType) StorageType() arrow.DataType              { return arrow.Null }
+func (BadExtensionType) ExtensionEquals(arrow.ExtensionType) bool { return false }
+func (BadExtensionType) ExtensionName() string                    { return "bad" }
+func (BadExtensionType) Serialize() string                        { return "" }
+func (BadExtensionType) Deserialize(_ arrow.DataType, _ string) (arrow.ExtensionType, error) {
+	return nil, nil
+}
+
+func TestMustEmbedBase(t *testing.T) {
+	var ext interface{} = &BadExtensionType{}
+	assert.Panics(t, func() {
+		var _ arrow.ExtensionType = ext.(arrow.ExtensionType)
+	})
+}
+
+type ExtensionTypeTestSuite struct {
+	suite.Suite
+}
+
+func (e *ExtensionTypeTestSuite) SetupTest() {
+	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
+}
+
+func (e *ExtensionTypeTestSuite) TearDownTest() {
+	if arrow.GetExtensionType("uuid") != nil {
+		e.NoError(arrow.UnregisterExtensionType("uuid"))
+	}
+}
+
+func (e *ExtensionTypeTestSuite) TestExtensionType() {
+	e.Nil(arrow.GetExtensionType("uuid-unknown"))
+	e.NotNil(arrow.GetExtensionType("uuid"))
+
+	e.Error(arrow.RegisterExtensionType(types.NewUUIDType()))
+	e.Error(arrow.UnregisterExtensionType("uuid-unknown"))
+
+	typ := types.NewUUIDType()
+	e.Implements((*arrow.ExtensionType)(nil), typ)
+	e.Equal(arrow.EXTENSION, typ.ID())
+	e.Equal("extension", typ.Name())
+
+	serialized := typ.Serialize()
+	deserialized, err := typ.Deserialize(&arrow.FixedSizeBinaryType{ByteWidth: 16}, serialized)
+	e.NoError(err)
+
+	e.True(arrow.TypeEqual(deserialized.StorageType(), &arrow.FixedSizeBinaryType{ByteWidth: 16}))
+	e.True(arrow.TypeEqual(deserialized, typ))
+	e.False(arrow.TypeEqual(deserialized, &arrow.FixedSizeBinaryType{ByteWidth: 16}))
+}
+
+func TestExtensionTypes(t *testing.T) {
+	suite.Run(t, new(ExtensionTypeTestSuite))
+}
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index c6173fe87cb..9350c35b655 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -18,7 +18,6 @@ package arrow
 
 import (
 	"fmt"
-	"reflect"
 	"strings"
 )
 
@@ -26,6 +25,7 @@ import (
 // a variable-size sequence of values, all having the same relative type.
 type ListType struct {
 	elem DataType // DataType of the list's elements
+	Meta Metadata
 }
 
 // ListOf returns the list type with element type t.
@@ -148,6 +148,40 @@ func (t *StructType) FieldByName(name string) (Field, bool) {
 	return t.fields[i], true
 }
 
+type MapType struct {
+	value      *ListType
+	KeysSorted bool
+}
+
+func MapOf(key, item DataType) *MapType {
+	if key == nil || item == nil {
+		panic("arrow: nil key or item type for MapType")
+	}
+
+	return &MapType{value: ListOf(StructOf(Field{Name: "key", Type: key}, Field{Name: "value", Type: item, Nullable: true}))}
+}
+
+func (*MapType) ID() Type     { return MAP }
+func (*MapType) Name() string { return "map" }
+
+func (t *MapType) String() string {
+	var o strings.Builder
+	o.WriteString(fmt.Sprintf("map<%s, %s",
+		t.value.Elem().(*StructType).Field(0).Type,
+		t.value.Elem().(*StructType).Field(1).Type))
+	if t.KeysSorted {
+		o.WriteString(", keys_sorted")
+	}
+	o.WriteString(">")
+	return o.String()
+}
+
+func (t *MapType) KeyField() Field        { return t.value.Elem().(*StructType).Field(0) }
+func (t *MapType) KeyType() DataType      { return t.KeyField().Type }
+func (t *MapType) ItemField() Field       { return t.value.Elem().(*StructType).Field(1) }
+func (t *MapType) ItemType() DataType     { return t.ItemField().Type }
+func (t *MapType) ValueType() *StructType { return t.value.Elem().(*StructType) }
+
 type Field struct {
 	Name     string   // Field name
 	Type     DataType // The field's data type
@@ -158,7 +192,17 @@ type Field struct {
 func (f Field) HasMetadata() bool { return f.Metadata.Len() != 0 }
 
 func (f Field) Equal(o Field) bool {
-	return reflect.DeepEqual(f, o)
+	switch {
+	case f.Name != o.Name:
+		return false
+	case f.Nullable != o.Nullable:
+		return false
+	case !TypeEqual(f.Type, o.Type, CheckMetadata()):
+		return false
+	case !f.Metadata.Equal(o.Metadata):
+		return false
+	}
+	return true
 }
 
 func (f Field) String() string {
@@ -177,4 +221,5 @@ func (f Field) String() string {
 var (
 	_ DataType = (*ListType)(nil)
 	_ DataType = (*StructType)(nil)
+	_ DataType = (*MapType)(nil)
 )
diff --git a/go/arrow/datatype_nested_test.go b/go/arrow/datatype_nested_test.go
index 34b77373832..94c6a71d99a 100644
--- a/go/arrow/datatype_nested_test.go
+++ b/go/arrow/datatype_nested_test.go
@@ -354,3 +354,73 @@ func TestFixedSizeListOf(t *testing.T) {
 		})
 	}
 }
+
+func TestMapOf(t *testing.T) {
+	for _, tc := range []struct {
+		key, item DataType
+		want      DataType
+		str       string
+	}{
+		{
+			key:  BinaryTypes.String,
+			item: PrimitiveTypes.Uint8,
+			want: &MapType{value: ListOf(StructOf(
+				Field{Name: "key", Type: BinaryTypes.String},
+				Field{Name: "value", Type: PrimitiveTypes.Uint8, Nullable: true},
+			))},
+			str: "map<utf8, uint8>",
+		},
+		{
+			key:  BinaryTypes.String,
+			item: MapOf(PrimitiveTypes.Uint32, FixedWidthTypes.Date32),
+			want: &MapType{value: ListOf(StructOf(
+				Field{Name: "key", Type: BinaryTypes.String},
+				Field{Name: "value", Nullable: true,
+					Type: &MapType{value: ListOf(StructOf(
+						Field{Name: "key", Type: PrimitiveTypes.Uint32},
+						Field{Name: "value", Type: FixedWidthTypes.Date32, Nullable: true},
+					))}},
+			))},
+			str: "map<utf8, map<uint32, date32>>",
+		},
+	} {
+		t.Run("", func(t *testing.T) {
+			got := MapOf(tc.key, tc.item)
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Fatalf("got=%#v, want=%#v", got, tc.want)
+			}
+
+			if got, want := got.ID(), MAP; got != want {
+				t.Fatalf("invalid ID. got=%v, want=%v", got, want)
+			}
+
+			if got, want := got.Name(), "map"; got != want {
+				t.Fatalf("invalid name. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.KeyField().Name, "key"; got != want {
+				t.Fatalf("invalid key field name. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.ItemField().Name, "value"; got != want {
+				t.Fatalf("invalid item field name. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.KeyType(), tc.key; got != want {
+				t.Fatalf("invalid key type. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.ItemType(), tc.item; got != want {
+				t.Fatalf("invalid item type. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.ValueType(), StructOf(got.KeyField(), got.ItemField()); !TypeEqual(got, want) {
+				t.Fatalf("invalid value type. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.String(), tc.str; got != want {
+				t.Fatalf("invalid String() result. got=%q, want=%q", got, want)
+			}
+		})
+	}
+}
diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go
index 2f1b181772d..b14b81df878 100644
--- a/go/arrow/decimal128/decimal128.go
+++ b/go/arrow/decimal128/decimal128.go
@@ -16,6 +16,10 @@
 
 package decimal128 // import "github.com/apache/arrow/go/arrow/decimal128"
 
+import (
+	"math/big"
+)
+
 var (
 	MaxDecimal128 = New(542101086242752217, 687399551400673280-1)
 )
@@ -54,6 +58,43 @@ func FromI64(v int64) Num {
 	}
 }
 
+// FromBigInt will convert a big.Int to a Num, if the value in v has a
+// BitLen > 128, this will panic.
+func FromBigInt(v *big.Int) (n Num) {
+	bitlen := v.BitLen()
+	if bitlen > 128 {
+		panic("arrow/decimal128: cannot represent value larger than 128bits")
+	} else if bitlen == 0 {
+		// if bitlen is 0, then the value is 0 so return the default zeroed
+		// out n
+		return
+	}
+
+	// if the value is negative, then get the high and low bytes from
+	// v, and then negate it. this is because Num uses a two's compliment
+	// representation of values and big.Int stores the value as a bool for
+	// the sign and the absolute value of the integer. This means that the
+	// raw bytes are *always* the absolute value.
+	b := v.Bits()
+	n.lo = uint64(b[0])
+	if len(b) > 1 {
+		n.hi = int64(b[1])
+	}
+	if v.Sign() < 0 {
+		return n.negated()
+	}
+	return
+}
+
+func (n Num) negated() Num {
+	n.lo = ^n.lo + 1
+	n.hi = ^n.hi
+	if n.lo == 0 {
+		n.hi += 1
+	}
+	return n
+}
+
 // LowBits returns the low bits of the two's complement representation of the number.
 func (n Num) LowBits() uint64 { return n.lo }
 
@@ -71,3 +112,18 @@ func (n Num) Sign() int {
 	}
 	return int(1 | (n.hi >> 63))
 }
+
+func toBigIntPositive(n Num) *big.Int {
+	return (&big.Int{}).SetBits([]big.Word{big.Word(n.lo), big.Word(n.hi)})
+}
+
+// while the code would be simpler to just do lsh/rsh and add
+// it turns out from benchmarking that calling SetBits passing
+// in the words and negating ends up being >2x faster
+func (n Num) BigInt() *big.Int {
+	if n.Sign() < 0 {
+		b := toBigIntPositive(n.negated())
+		return b.Neg(b)
+	}
+	return toBigIntPositive(n)
+}
diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go
index cf4ebd4cd1e..dfdf5371a76 100644
--- a/go/arrow/decimal128/decimal128_test.go
+++ b/go/arrow/decimal128/decimal128_test.go
@@ -14,29 +14,32 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package decimal128 // import "github.com/apache/arrow/go/arrow/decimal128"
+package decimal128_test // import "github.com/apache/arrow/go/arrow/decimal128"
 
 import (
 	"fmt"
 	"math"
 	"math/big"
 	"testing"
+
+	"github.com/apache/arrow/go/arrow/decimal128"
+	"github.com/stretchr/testify/assert"
 )
 
 func TestFromU64(t *testing.T) {
 	for _, tc := range []struct {
 		v    uint64
-		want Num
+		want decimal128.Num
 		sign int
 	}{
-		{0, Num{0, 0}, 0},
-		{1, Num{1, 0}, +1},
-		{2, Num{2, 0}, +1},
-		{math.MaxInt64, Num{math.MaxInt64, 0}, +1},
-		{math.MaxUint64, Num{math.MaxUint64, 0}, +1},
+		{0, decimal128.New(0, 0), 0},
+		{1, decimal128.New(0, 1), +1},
+		{2, decimal128.New(0, 2), +1},
+		{math.MaxInt64, decimal128.New(0, math.MaxInt64), +1},
+		{math.MaxUint64, decimal128.New(0, math.MaxUint64), +1},
 	} {
 		t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) {
-			v := FromU64(tc.v)
+			v := decimal128.FromU64(tc.v)
 			ref := new(big.Int).SetUint64(tc.v)
 			if got, want := v, tc.want; got != want {
 				t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref)
@@ -47,10 +50,10 @@ func TestFromU64(t *testing.T) {
 			if got, want := v.Sign(), ref.Sign(); got != want {
 				t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want)
 			}
-			if got, want := v.LowBits(), tc.want.lo; got != want {
+			if got, want := v.LowBits(), tc.want.LowBits(); got != want {
 				t.Fatalf("invalid low-bits: got=%+0#x, want=%+0#x", got, want)
 			}
-			if got, want := v.HighBits(), tc.want.hi; got != want {
+			if got, want := v.HighBits(), tc.want.HighBits(); got != want {
 				t.Fatalf("invalid high-bits: got=%+0#x, want=%+0#x", got, want)
 			}
 		})
@@ -60,17 +63,17 @@ func TestFromU64(t *testing.T) {
 func TestFromI64(t *testing.T) {
 	for _, tc := range []struct {
 		v    int64
-		want Num
+		want decimal128.Num
 		sign int
 	}{
-		{0, Num{0, 0}, 0},
-		{1, Num{1, 0}, 1},
-		{2, Num{2, 0}, 1},
-		{math.MaxInt64, Num{math.MaxInt64, 0}, 1},
-		{math.MinInt64, Num{u64Cnv(math.MinInt64), -1}, -1},
+		{0, decimal128.New(0, 0), 0},
+		{1, decimal128.New(0, 1), 1},
+		{2, decimal128.New(0, 2), 1},
+		{math.MaxInt64, decimal128.New(0, math.MaxInt64), 1},
+		{math.MinInt64, decimal128.New(-1, u64Cnv(math.MinInt64)), -1},
 	} {
 		t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) {
-			v := FromI64(tc.v)
+			v := decimal128.FromI64(tc.v)
 			ref := big.NewInt(tc.v)
 			if got, want := v, tc.want; got != want {
 				t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref)
@@ -81,10 +84,10 @@ func TestFromI64(t *testing.T) {
 			if got, want := v.Sign(), ref.Sign(); got != want {
 				t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want)
 			}
-			if got, want := v.LowBits(), tc.want.lo; got != want {
+			if got, want := v.LowBits(), tc.want.LowBits(); got != want {
 				t.Fatalf("invalid low-bits: got=%+0#x, want=%+0#x", got, want)
 			}
-			if got, want := v.HighBits(), tc.want.hi; got != want {
+			if got, want := v.HighBits(), tc.want.HighBits(); got != want {
 				t.Fatalf("invalid high-bits: got=%+0#x, want=%+0#x", got, want)
 			}
 		})
@@ -92,3 +95,56 @@ func TestFromI64(t *testing.T) {
 }
 
 func u64Cnv(i int64) uint64 { return uint64(i) }
+
+func BenchmarkBigIntToDecimal(b *testing.B) {
+	var (
+		n     decimal128.Num
+		bi, _ = (&big.Int{}).SetString("-340282366920938463463374607431711455", 10)
+	)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		n = decimal128.FromBigInt(bi)
+		if n.Sign() >= 0 {
+			b.FailNow()
+		}
+	}
+}
+
+func BenchmarkDecimalToBigInt(b *testing.B) {
+	var (
+		bi *big.Int
+		n  = decimal128.New(-18446744073709552, 7083549724304524577)
+	)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		bi = n.BigInt()
+		if bi.Sign() >= 0 {
+			b.FailNow()
+		}
+	}
+}
+
+func TestDecimalToBigInt(t *testing.T) {
+	tests := []struct {
+		hi  int64
+		lo  uint64
+		exp string
+	}{
+		{-18446744073709552, 7083549724304524577, "-340282366920938463463374607431711455"},
+		{1, 4611686018427387904, "23058430092136939520"},
+		{0, 0, "0"},
+	}
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+			n := decimal128.New(tc.hi, tc.lo)
+			bi := n.BigInt()
+
+			assert.Equal(t, tc.exp, bi.String())
+			n2 := decimal128.FromBigInt(bi)
+			assert.Equal(t, n.LowBits(), n2.LowBits())
+			assert.Equal(t, n.HighBits(), n2.HighBits())
+		})
+	}
+}
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 6413aeef649..71f86743664 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -593,3 +593,67 @@ func Example_table() {
 	// rec[3]["f1-i32"]: [16 17 18 19 20]
 	// rec[3]["f2-f64"]: [16 17 18 19 20]
 }
+
+// This example demonstrates how to create a Map Array.
+// The resulting array should be:
+//   [{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
+func Example_mapArray() {
+	pool := memory.NewGoAllocator()
+	mb := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int16, false)
+	defer mb.Release()
+
+	kb := mb.KeyBuilder().(*array.StringBuilder)
+	ib := mb.ItemBuilder().(*array.Int16Builder)
+
+	keys := []string{"ab", "cd", "ef", "gh"}
+
+	mb.Append(true)
+	kb.AppendValues(keys, nil)
+	ib.AppendValues([]int16{1, 2, 3, 4}, nil)
+
+	mb.AppendNull()
+
+	mb.Append(true)
+	kb.AppendValues(keys, nil)
+	ib.AppendValues([]int16{-1, 2, 5, 1}, []bool{false, true, true, true})
+
+	arr := mb.NewMapArray()
+	defer arr.Release()
+
+	fmt.Printf("NullN() = %d\n", arr.NullN())
+	fmt.Printf("Len()   = %d\n", arr.Len())
+
+	offsets := arr.Offsets()
+	keyArr := arr.Keys().(*array.String)
+	itemArr := arr.Items().(*array.Int16)
+
+	for i := 0; i < arr.Len(); i++ {
+		if arr.IsNull(i) {
+			fmt.Printf("Map[%d] = (null)\n", i)
+			continue
+		}
+
+		fmt.Printf("Map[%d] = {", i)
+		for j := offsets[i]; j < offsets[i+1]; j++ {
+			if j != offsets[i] {
+				fmt.Printf(", ")
+			}
+			fmt.Printf("%v => ", keyArr.Value(int(j)))
+			if itemArr.IsValid(int(j)) {
+				fmt.Printf("%v", itemArr.Value(int(j)))
+			} else {
+				fmt.Printf("(null)")
+			}
+		}
+		fmt.Printf("}\n")
+	}
+	fmt.Printf("Map    = %v\n", arr)
+
+	// Output:
+	// NullN() = 1
+	// Len()   = 3
+	// Map[0] = {ab => 1, cd => 2, ef => 3, gh => 4}
+	// Map[1] = (null)
+	// Map[2] = {ab => (null), cd => 2, ef => 5, gh => 1}
+	// Map    = [{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
+}
diff --git a/go/arrow/flight/basic_auth_flight_test.go b/go/arrow/flight/basic_auth_flight_test.go
index 0eb39eedcac..c008566f894 100644
--- a/go/arrow/flight/basic_auth_flight_test.go
+++ b/go/arrow/flight/basic_auth_flight_test.go
@@ -147,8 +147,7 @@ func TestErrorAuths(t *testing.T) {
 }
 
 func TestBasicAuthHelpers(t *testing.T) {
-	unary, stream := flight.CreateServerBearerTokenAuthInterceptors(&validator{})
-	s := flight.NewFlightServer(nil, grpc.UnaryInterceptor(unary), grpc.StreamInterceptor(stream))
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{flight.CreateServerBasicAuthMiddleware(&validator{})})
 	s.Init("localhost:0")
 	f := &HeaderAuthTestFlight{}
 	s.RegisterFlightService(&flight.FlightServiceService{
diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go
index 262b41d0269..735c08bc1f0 100644
--- a/go/arrow/flight/client.go
+++ b/go/arrow/flight/client.go
@@ -20,7 +20,9 @@ import (
 	"context"
 	"encoding/base64"
 	"io"
+	"runtime"
 	"strings"
+	"sync/atomic"
 
 	"golang.org/x/xerrors"
 	"google.golang.org/grpc"
@@ -45,6 +47,168 @@ type Client interface {
 	FlightServiceClient
 }
 
+type CustomClientMiddleware interface {
+	StartCall(ctx context.Context) context.Context
+}
+
+type ClientPostCallMiddleware interface {
+	CallCompleted(ctx context.Context, err error)
+}
+
+type ClientHeadersMiddleware interface {
+	HeadersReceived(ctx context.Context, md metadata.MD)
+}
+
+func CreateClientMiddleware(middleware CustomClientMiddleware) ClientMiddleware {
+	return ClientMiddleware{
+		Unary: func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
+			nctx := middleware.StartCall(ctx)
+			if nctx != nil {
+				ctx = nctx
+			}
+
+			if hdrs, ok := middleware.(ClientHeadersMiddleware); ok {
+				hdrmd := make(metadata.MD)
+				trailermd := make(metadata.MD)
+				opts = append(opts, grpc.Header(&hdrmd), grpc.Trailer(&trailermd))
+				defer func() {
+					hdrs.HeadersReceived(ctx, metadata.Join(hdrmd, trailermd))
+				}()
+			}
+
+			err := invoker(ctx, method, req, reply, cc, opts...)
+			if post, ok := middleware.(ClientPostCallMiddleware); ok {
+				post.CallCompleted(ctx, err)
+			}
+			return err
+		},
+		Stream: func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) {
+			nctx := middleware.StartCall(ctx)
+			if nctx != nil {
+				ctx = nctx
+			}
+
+			cs, err := streamer(ctx, desc, cc, method, opts...)
+			hdrs, isHdrs := middleware.(ClientHeadersMiddleware)
+			post, isPostcall := middleware.(ClientPostCallMiddleware)
+			if !isPostcall && !isHdrs {
+				return cs, err
+			}
+
+			if err != nil {
+				if isHdrs {
+					md, _ := cs.Header()
+					hdrs.HeadersReceived(ctx, metadata.Join(md, cs.Trailer()))
+				}
+				if isPostcall {
+					post.CallCompleted(ctx, err)
+				}
+				return cs, err
+			}
+
+			// Grab the client stream context because when the finish function or the goroutine below will be
+			// executed it's not guaranteed cs.Context() will be valid.
+			csCtx := cs.Context()
+			finishChan := make(chan struct{})
+			isFinished := new(int32)
+			*isFinished = 0
+			finishFunc := func(err error) {
+
+				// since there are multiple code paths that could call finishFunc
+				// we need some sort of synchronization to guard against multiple
+				// calls to finish
+				if !atomic.CompareAndSwapInt32(isFinished, 0, 1) {
+					return
+				}
+
+				close(finishChan)
+				if isPostcall {
+					post.CallCompleted(csCtx, err)
+				}
+				if isHdrs {
+					hdrmd, _ := cs.Header()
+					hdrs.HeadersReceived(csCtx, metadata.Join(hdrmd, cs.Trailer()))
+				}
+			}
+			go func() {
+				select {
+				case <-finishChan:
+					// finish is being called by something else, no action necessary
+				case <-csCtx.Done():
+					finishFunc(csCtx.Err())
+				}
+			}()
+
+			newCS := &clientStream{
+				ClientStream: cs,
+				desc:         desc,
+				finishFn:     finishFunc,
+			}
+			// The `ClientStream` interface allows one to omit calling `Recv` if it's
+			// known that the result will be `io.EOF`. See
+			// http://stackoverflow.com/q/42915337
+			// In such cases, there's nothing that triggers the span to finish. We,
+			// therefore, set a finalizer so that the span and the context goroutine will
+			// at least be cleaned up when the garbage collector is run.
+			runtime.SetFinalizer(newCS, func(newcs *clientStream) {
+				newcs.finishFn(nil)
+			})
+			return newCS, nil
+		},
+	}
+}
+
+type clientStream struct {
+	grpc.ClientStream
+	desc     *grpc.StreamDesc
+	finishFn func(error)
+}
+
+func (cs *clientStream) Header() (metadata.MD, error) {
+	md, err := cs.ClientStream.Header()
+	if err != nil {
+		cs.finishFn(err)
+	}
+	return md, err
+}
+
+func (cs *clientStream) SendMsg(m interface{}) error {
+	err := cs.ClientStream.SendMsg(m)
+	if err != nil {
+		cs.finishFn(err)
+	}
+	return err
+}
+
+func (cs *clientStream) RecvMsg(m interface{}) error {
+	err := cs.ClientStream.RecvMsg(m)
+	if err == io.EOF {
+		cs.finishFn(nil)
+		return err
+	} else if err != nil {
+		cs.finishFn(err)
+		return err
+	}
+
+	if !cs.desc.ServerStreams {
+		cs.finishFn(nil)
+	}
+	return err
+}
+
+func (cs *clientStream) CloseSend() error {
+	err := cs.ClientStream.CloseSend()
+	if err != nil {
+		cs.finishFn(err)
+	}
+	return err
+}
+
+type ClientMiddleware struct {
+	Stream grpc.StreamClientInterceptor
+	Unary  grpc.UnaryClientInterceptor
+}
+
 type client struct {
 	conn        *grpc.ClientConn
 	authHandler ClientAuthHandler
@@ -60,6 +224,8 @@ type client struct {
 // Alternatively, a grpc client can be constructed as normal without this helper as the
 // grpc generated client code is still exported. This exists to add utility and helpers
 // around the authentication and passing the token with requests.
+//
+// Deprecated: prefer to use NewClientWithMiddleware
 func NewFlightClient(addr string, auth ClientAuthHandler, opts ...grpc.DialOption) (Client, error) {
 	if auth != nil {
 		opts = append([]grpc.DialOption{
@@ -76,6 +242,36 @@ func NewFlightClient(addr string, auth ClientAuthHandler, opts ...grpc.DialOptio
 	return &client{conn: conn, FlightServiceClient: NewFlightServiceClient(conn), authHandler: auth}, nil
 }
 
+// NewClientWithMiddleware takes a slice of middlewares in addition to the auth and address which will be
+// used by grpc and chained, the first middleware will be the outer most with the last middleware
+// being the inner most wrapper around the actual call. It also passes along the dialoptions passed in such
+// as TLS certs and so on.
+func NewClientWithMiddleware(addr string, auth ClientAuthHandler, middleware []ClientMiddleware, opts ...grpc.DialOption) (Client, error) {
+	unary := make([]grpc.UnaryClientInterceptor, 0, len(middleware))
+	stream := make([]grpc.StreamClientInterceptor, 0, len(middleware))
+	if auth != nil {
+		unary = append(unary, createClientAuthUnaryInterceptor(auth))
+		stream = append(stream, createClientAuthStreamInterceptor(auth))
+	}
+	if len(middleware) > 0 {
+		for _, m := range middleware {
+			if m.Unary != nil {
+				unary = append(unary, m.Unary)
+			}
+			if m.Stream != nil {
+				stream = append(stream, m.Stream)
+			}
+		}
+	}
+	opts = append(opts, grpc.WithChainUnaryInterceptor(unary...), grpc.WithChainStreamInterceptor(stream...))
+	conn, err := grpc.Dial(addr, opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	return &client{conn: conn, FlightServiceClient: NewFlightServiceClient(conn), authHandler: auth}, nil
+}
+
 func (c *client) AuthenticateBasicToken(ctx context.Context, username, password string, opts ...grpc.CallOption) (context.Context, error) {
 	authCtx := metadata.AppendToOutgoingContext(ctx, "Authorization", "Basic "+base64.RawStdEncoding.EncodeToString([]byte(strings.Join([]string{username, password}, ":"))))
 
diff --git a/go/arrow/flight/client_auth.go b/go/arrow/flight/client_auth.go
index 5f7c151abf7..1c1e38ed7d2 100644
--- a/go/arrow/flight/client_auth.go
+++ b/go/arrow/flight/client_auth.go
@@ -65,7 +65,7 @@ func createClientAuthUnaryInterceptor(auth ClientAuthHandler) grpc.UnaryClientIn
 			return status.Errorf(codes.Unauthenticated, "error retrieving token: %s", err)
 		}
 
-		return invoker(metadata.NewOutgoingContext(ctx, metadata.Pairs(grpcAuthHeader, tok)), method, req, reply, cc, opts...)
+		return invoker(metadata.AppendToOutgoingContext(ctx, grpcAuthHeader, tok), method, req, reply, cc, opts...)
 	}
 }
 
@@ -86,6 +86,6 @@ func createClientAuthStreamInterceptor(auth ClientAuthHandler) grpc.StreamClient
 			return nil, status.Errorf(codes.Unauthenticated, "error retrieving token: %s", err)
 		}
 
-		return streamer(metadata.NewOutgoingContext(ctx, metadata.Pairs(grpcAuthHeader, tok)), desc, cc, method, opts...)
+		return streamer(metadata.AppendToOutgoingContext(ctx, grpcAuthHeader, tok), desc, cc, method, opts...)
 	}
 }
diff --git a/go/arrow/flight/flight_middleware_test.go b/go/arrow/flight/flight_middleware_test.go
new file mode 100644
index 00000000000..4227fee8e7d
--- /dev/null
+++ b/go/arrow/flight/flight_middleware_test.go
@@ -0,0 +1,297 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flight_test
+
+import (
+	"context"
+	"io"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow/flight"
+	"github.com/apache/arrow/go/arrow/internal/arrdata"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+)
+
+type ServerMiddlewareAddHeader struct {
+	ctx context.Context
+}
+
+func (s *ServerMiddlewareAddHeader) StartCall(ctx context.Context) context.Context {
+	grpc.SetHeader(ctx, metadata.Pairs("foo", "bar"))
+	s.ctx = ctx
+
+	return nil
+}
+
+func (s *ServerMiddlewareAddHeader) CallCompleted(ctx context.Context, err error) {
+	if s.ctx != ctx {
+		panic("invalid context")
+	}
+
+	grpc.SetTrailer(ctx, metadata.Pairs("super", "duper"))
+
+	if err != nil {
+		panic("got error")
+	}
+}
+
+type ServerTraceMiddleware struct{}
+
+type tracetestKey struct{}
+
+func (s ServerTraceMiddleware) StartCall(ctx context.Context) context.Context {
+	return context.WithValue(ctx, tracetestKey{}, "foobar")
+}
+
+func (s ServerTraceMiddleware) CallCompleted(ctx context.Context, _ error) {
+	v := ctx.Value(tracetestKey{}).(string)
+	if v != "foobar" {
+		panic("missing value from context in middleware test")
+	}
+}
+
+type ServerExpectHeaderMiddleware struct{}
+
+func (s ServerExpectHeaderMiddleware) StartCall(ctx context.Context) context.Context {
+	md, ok := metadata.FromIncomingContext(ctx)
+	if !ok {
+		panic("missing metadata headers")
+	}
+
+	bar := md.Get("foo")
+	if len(bar) != 1 || bar[0] != "bar" {
+		panic("incorrect header received: " + bar[0])
+	}
+
+	return nil
+}
+
+func (s ServerExpectHeaderMiddleware) CallCompleted(context.Context, error) {}
+
+func TestServerStreamMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+		flight.CreateServerMiddleware(ServerTraceMiddleware{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		ListFlights: f.ListFlights,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, nil, grpc.WithInsecure())
+	require.NoError(t, err)
+	defer client.Close()
+
+	flightStream, err := client.ListFlights(context.Background(), &flight.Criteria{})
+	require.NoError(t, err)
+
+	md, err := flightStream.Header()
+	assert.NoError(t, err)
+	assert.Equal(t, []string{"bar"}, md.Get("foo"))
+
+	for {
+		info, err := flightStream.Recv()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			assert.NoError(t, err)
+		}
+
+		fname := info.GetFlightDescriptor().GetPath()[0]
+		recs, ok := arrdata.Records[fname]
+		assert.True(t, ok)
+
+		sc, err := flight.DeserializeSchema(info.GetSchema(), f.mem)
+		assert.NoError(t, err)
+
+		assert.True(t, recs[0].Schema().Equal(sc))
+	}
+
+	md = flightStream.Trailer()
+	assert.Equal(t, []string{"duper"}, md.Get("super"))
+}
+
+func TestServerUnaryMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+		flight.CreateServerMiddleware(ServerTraceMiddleware{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		GetSchema: f.GetSchema,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, nil, grpc.WithInsecure())
+	require.NoError(t, err)
+	defer client.Close()
+
+	for name, testrecs := range arrdata.Records {
+		t.Run("flight get schema: "+name, func(t *testing.T) {
+			var (
+				hdrMD     metadata.MD
+				trailerMD metadata.MD
+			)
+			res, err := client.GetSchema(context.Background(), &flight.FlightDescriptor{Path: []string{name}}, grpc.Header(&hdrMD), grpc.Trailer(&trailerMD))
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			schema, err := flight.DeserializeSchema(res.GetSchema(), f.getmem())
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if !testrecs[0].Schema().Equal(schema) {
+				t.Fatalf("schema not match: \ngot = %#v\nwant = %#v\n", schema, testrecs[0].Schema())
+			}
+
+			assert.Equal(t, []string{"bar"}, hdrMD.Get("foo"))
+			assert.Equal(t, []string{"duper"}, trailerMD.Get("super"))
+		})
+	}
+}
+
+type ClientTestSendHeaderMiddleware struct {
+	ctx context.Context
+	md  metadata.MD
+}
+
+func (c *ClientTestSendHeaderMiddleware) StartCall(ctx context.Context) context.Context {
+	c.ctx = context.WithValue(metadata.AppendToOutgoingContext(ctx, "foo", "bar"), tracetestKey{}, "super")
+	return c.ctx
+}
+
+func (c *ClientTestSendHeaderMiddleware) CallCompleted(ctx context.Context, err error) {
+	val := ctx.Value(tracetestKey{}).(string)
+	if val != "super" {
+		panic("invalid context client middleware")
+	}
+}
+
+func (c *ClientTestSendHeaderMiddleware) HeadersReceived(ctx context.Context, md metadata.MD) {
+	val := ctx.Value(tracetestKey{}).(string)
+	if val != "super" {
+		panic("invalid context client middleware")
+	}
+
+	c.md = md
+}
+
+func TestClientStreamMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerExpectHeaderMiddleware{}),
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		ListFlights: f.ListFlights,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	middleware := &ClientTestSendHeaderMiddleware{}
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, []flight.ClientMiddleware{
+		flight.CreateClientMiddleware(middleware),
+	}, grpc.WithInsecure())
+	require.NoError(t, err)
+	defer client.Close()
+
+	flightStream, err := client.ListFlights(context.Background(), &flight.Criteria{})
+	require.NoError(t, err)
+
+	for {
+		info, err := flightStream.Recv()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			assert.NoError(t, err)
+		}
+
+		fname := info.GetFlightDescriptor().GetPath()[0]
+		recs, ok := arrdata.Records[fname]
+		assert.True(t, ok)
+
+		sc, err := flight.DeserializeSchema(info.GetSchema(), f.mem)
+		assert.NoError(t, err)
+
+		assert.True(t, recs[0].Schema().Equal(sc))
+	}
+
+	assert.Equal(t, []string{"bar"}, middleware.md.Get("foo"))
+	assert.Equal(t, []string{"duper"}, middleware.md.Get("super"))
+}
+
+func TestClientUnaryMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+		flight.CreateServerMiddleware(ServerExpectHeaderMiddleware{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		GetSchema: f.GetSchema,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	middle := &ClientTestSendHeaderMiddleware{}
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, []flight.ClientMiddleware{
+		flight.CreateClientMiddleware(middle),
+	}, grpc.WithInsecure())
+
+	require.NoError(t, err)
+	defer client.Close()
+
+	for name, testrecs := range arrdata.Records {
+		t.Run("flight get schema: "+name, func(t *testing.T) {
+			res, err := client.GetSchema(context.Background(), &flight.FlightDescriptor{Path: []string{name}})
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			schema, err := flight.DeserializeSchema(res.GetSchema(), f.getmem())
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if !testrecs[0].Schema().Equal(schema) {
+				t.Fatalf("schema not match: \ngot = %#v\nwant = %#v\n", schema, testrecs[0].Schema())
+			}
+
+			assert.Equal(t, []string{"bar"}, middle.md.Get("foo"))
+			assert.Equal(t, []string{"duper"}, middle.md.Get("super"))
+
+			middle.md = metadata.MD{}
+		})
+	}
+}
diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go
index bd81892fdb9..a6a59f198bf 100644
--- a/go/arrow/flight/flight_test.go
+++ b/go/arrow/flight/flight_test.go
@@ -19,6 +19,7 @@ package flight_test
 import (
 	"context"
 	"errors"
+	"fmt"
 	"io"
 	"testing"
 
@@ -311,3 +312,97 @@ func TestServer(t *testing.T) {
 		t.Fatalf("got %d, want %d", numRows, fi.TotalRecords)
 	}
 }
+
+type flightMetadataWriterServer struct{}
+
+func (f *flightMetadataWriterServer) DoGet(tkt *flight.Ticket, fs flight.FlightService_DoGetServer) error {
+	recs := arrdata.Records[string(tkt.GetTicket())]
+
+	w := flight.NewRecordWriter(fs, ipc.WithSchema(recs[0].Schema()))
+	defer w.Close()
+	for idx, r := range recs {
+		w.WriteWithAppMetadata(r, []byte(fmt.Sprintf("%d_%s", idx, string(tkt.GetTicket()))) /*metadata*/)
+	}
+	return nil
+}
+
+func TestFlightWithAppMetadata(t *testing.T) {
+	f := &flightMetadataWriterServer{}
+	s := flight.NewFlightServer(nil)
+	s.RegisterFlightService(&flight.FlightServiceService{DoGet: f.DoGet})
+	s.Init("localhost:0")
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewFlightClient(s.Addr().String(), nil, grpc.WithInsecure())
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer client.Close()
+
+	fdata, err := client.DoGet(context.Background(), &flight.Ticket{Ticket: []byte("primitives")})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	r, err := flight.NewRecordReader(fdata)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := arrdata.Records["primitives"]
+	idx := 0
+	for {
+		rec, err := r.Read()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			t.Fatal(err)
+		}
+
+		appMeta := r.LatestAppMetadata()
+		if !array.RecordEqual(expected[idx], rec) {
+			t.Errorf("flight data stream records for idx: %d don't match: \ngot = %#v\nwant = %#v", idx, rec, expected[idx])
+		}
+
+		exMeta := fmt.Sprintf("%d_primitives", idx)
+		if string(appMeta) != exMeta {
+			t.Errorf("flight data stream application metadata mismatch: got: %v, want: %v\n", string(appMeta), exMeta)
+		}
+		idx++
+	}
+}
+
+type flightErrorReturn struct{}
+
+func (f *flightErrorReturn) DoGet(_ *flight.Ticket, _ flight.FlightService_DoGetServer) error {
+	return status.Error(codes.NotFound, "nofound")
+}
+
+func TestReaderError(t *testing.T) {
+	f := &flightErrorReturn{}
+	s := flight.NewFlightServer(nil)
+	s.RegisterFlightService(&flight.FlightServiceService{DoGet: f.DoGet})
+	s.Init("localhost:0")
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewFlightClient(s.Addr().String(), nil, grpc.WithInsecure())
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer client.Close()
+
+	fdata, err := client.DoGet(context.Background(), &flight.Ticket{})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = flight.NewRecordReader(fdata)
+	if err == nil {
+		t.Fatal("should have errored")
+	}
+}
diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go
index 8e3490fac9f..1af3a58023c 100644
--- a/go/arrow/flight/record_batch_reader.go
+++ b/go/arrow/flight/record_batch_reader.go
@@ -24,6 +24,7 @@ import (
 	"github.com/apache/arrow/go/arrow/internal/debug"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
 )
 
 // DataStreamReader is an interface for receiving flight data messages on a stream
@@ -37,16 +38,28 @@ type dataMessageReader struct {
 
 	refCount int64
 	msg      *ipc.Message
-	err      error
+
+	lastAppMetadata []byte
+	descr           *FlightDescriptor
 }
 
 func (d *dataMessageReader) Message() (*ipc.Message, error) {
 	fd, err := d.rdr.Recv()
 	if err != nil {
+		if d.msg != nil {
+			// clear the previous message in the error case
+			d.msg.Release()
+			d.msg = nil
+		}
+		d.lastAppMetadata = nil
+		d.descr = nil
 		return nil, err
 	}
 
-	return ipc.NewMessage(memory.NewBufferBytes(fd.DataHeader), memory.NewBufferBytes(fd.DataBody)), nil
+	d.lastAppMetadata = fd.AppMetadata
+	d.descr = fd.FlightDescriptor
+	d.msg = ipc.NewMessage(memory.NewBufferBytes(fd.DataHeader), memory.NewBufferBytes(fd.DataBody))
+	return d.msg, nil
 }
 
 func (d *dataMessageReader) Retain() {
@@ -61,14 +74,60 @@ func (d *dataMessageReader) Release() {
 			d.msg.Release()
 			d.msg = nil
 		}
+		d.lastAppMetadata = nil
 	}
 }
 
+// Reader is an ipc.Reader which also keeps track of the metadata from
+// the FlightData messages as they come in, calling LatestAppMetadata
+// will return the metadata bytes from the most recently read message.
+type Reader struct {
+	*ipc.Reader
+	dmr *dataMessageReader
+}
+
+// Retain increases the reference count for the underlying message reader
+// and ipc.Reader which are utilized by this Reader.
+func (r *Reader) Retain() {
+	r.Reader.Retain()
+	r.dmr.Retain()
+}
+
+// Release reduces the reference count for the underlying message reader
+// and ipc.Reader, when the reference counts become zero, the allocated
+// memory is released for the stored record and metadata.
+func (r *Reader) Release() {
+	r.Reader.Release()
+	r.dmr.Release()
+}
+
+// LatestAppMetadata returns the bytes from the AppMetadata field of the
+// most recently read FlightData message that was processed by calling
+// the Next function. The metadata returned would correspond to the record
+// retrieved by calling Record().
+func (r *Reader) LatestAppMetadata() []byte {
+	return r.dmr.lastAppMetadata
+}
+
+// LatestFlightDescriptor returns a pointer to the last FlightDescriptor object
+// that was received in the most recently read FlightData message that was
+// processed by calling the Next function. The descriptor returned would correspond
+// to the record retrieved by calling Record().
+func (r *Reader) LatestFlightDescriptor() *FlightDescriptor {
+	return r.dmr.descr
+}
+
 // NewRecordReader constructs an ipc reader using the flight data stream reader
 // as the source of the ipc messages, opts passed will be passed to the underlying
 // ipc.Reader such as ipc.WithSchema and ipc.WithAllocator
-func NewRecordReader(r DataStreamReader, opts ...ipc.Option) (*ipc.Reader, error) {
-	return ipc.NewReaderFromMessageReader(&dataMessageReader{rdr: r}, opts...)
+func NewRecordReader(r DataStreamReader, opts ...ipc.Option) (*Reader, error) {
+	rdr := &Reader{dmr: &dataMessageReader{rdr: r}}
+	var err error
+	if rdr.Reader, err = ipc.NewReaderFromMessageReader(rdr.dmr, opts...); err != nil {
+		return nil, xerrors.Errorf("arrow/flight: could not create flight reader: %w", err)
+	}
+
+	return rdr, nil
 }
 
 // DeserializeSchema takes the schema bytes from FlightInfo or SchemaResult
diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go
index 101fbcf16f5..857b125fd44 100644
--- a/go/arrow/flight/record_batch_writer.go
+++ b/go/arrow/flight/record_batch_writer.go
@@ -20,6 +20,7 @@ import (
 	"bytes"
 
 	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 )
@@ -46,17 +47,57 @@ func (f *flightPayloadWriter) WritePayload(payload ipc.Payload) error {
 
 	payload.SerializeBody(&f.buf)
 	f.fd.DataBody = f.buf.Bytes()
+
 	return f.w.Send(&f.fd)
 }
 
 func (f *flightPayloadWriter) Close() error { return nil }
 
+// Writer is an ipc.Writer which also adds a WriteWithAppMetadata function
+// in order to allow adding AppMetadata to the FlightData messages which
+// are written.
+type Writer struct {
+	*ipc.Writer
+	pw   *flightPayloadWriter
+	desc *FlightDescriptor
+}
+
+// SetFlightDescriptor sets the flight descriptor into the next payload that will
+// be written by the flight writer. It will only be put into the very next payload
+// and afterwards the writer will no longer keep it's pointer to the descriptor.
+func (w *Writer) SetFlightDescriptor(descr *FlightDescriptor) {
+	w.desc = descr
+}
+
+// Write writes a recordbatch payload and returns any error, implementing the arrio.Writer interface
+func (w *Writer) Write(rec array.Record) error {
+	if w.desc != nil {
+		w.pw.fd.FlightDescriptor = w.desc
+		defer func() {
+			w.desc = nil
+			w.pw.fd.FlightDescriptor = nil
+		}()
+	}
+	return w.Writer.Write(rec)
+}
+
+// WriteWithAppMetadata will write this record with the supplied application
+// metadata attached in the flightData message.
+func (w *Writer) WriteWithAppMetadata(rec array.Record, appMeta []byte) error {
+	w.pw.fd.AppMetadata = appMeta
+	defer func() {
+		w.pw.fd.AppMetadata = nil
+	}()
+	return w.Write(rec)
+}
+
 // NewRecordWriter can be used to construct a writer for arrow flight via
 // the grpc stream handler to write flight data objects and write
 // record batches to the stream. Options passed here will be passed to
 // ipc.NewWriter
-func NewRecordWriter(w DataStreamWriter, opts ...ipc.Option) *ipc.Writer {
-	return ipc.NewWriterWithPayloadWriter(&flightPayloadWriter{w: w}, opts...)
+func NewRecordWriter(w DataStreamWriter, opts ...ipc.Option) *Writer {
+	pw := &flightPayloadWriter{w: w}
+	return &Writer{Writer: ipc.NewWriterWithPayloadWriter(pw, opts...), pw: pw}
 }
 
 // SerializeSchema returns the serialized schema bytes for use in Arrow Flight
diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go
index 90492a7b3a3..779bcfd0c04 100644
--- a/go/arrow/flight/server.go
+++ b/go/arrow/flight/server.go
@@ -17,6 +17,7 @@
 package flight
 
 import (
+	context "context"
 	"net"
 	"os"
 	"os/signal"
@@ -47,6 +48,46 @@ type Server interface {
 	RegisterFlightService(*FlightServiceService)
 }
 
+type CustomServerMiddleware interface {
+	// StartCall will be called with the current context of the call, grpc.SetHeader can be used to add outgoing headers
+	// if the returned context is non-nil, then it will be used as the new context being passed through the calls
+	StartCall(ctx context.Context) context.Context
+	// CallCompleted is a callback which is called with the return from the handler
+	// it will be nil if everything was successful or will be the error about to be returned
+	// to grpc
+	CallCompleted(ctx context.Context, err error)
+}
+
+func CreateServerMiddleware(middleware CustomServerMiddleware) ServerMiddleware {
+	return ServerMiddleware{
+		Unary: func(ctx context.Context, req interface{}, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (ret interface{}, err error) {
+			nctx := middleware.StartCall(ctx)
+			if nctx != nil {
+				ctx = nctx
+			}
+
+			ret, err = handler(ctx, req)
+			middleware.CallCompleted(ctx, err)
+			return
+		},
+		Stream: func(srv interface{}, stream grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
+			ctx := middleware.StartCall(stream.Context())
+			if ctx != nil {
+				stream = &wrappedStream{ServerStream: stream, ctx: ctx}
+			}
+
+			err := handler(srv, stream)
+			middleware.CallCompleted(stream.Context(), err)
+			return err
+		},
+	}
+}
+
+type ServerMiddleware struct {
+	Stream grpc.StreamServerInterceptor
+	Unary  grpc.UnaryServerInterceptor
+}
+
 type server struct {
 	lis        net.Listener
 	sigChannel <-chan os.Signal
@@ -63,6 +104,8 @@ type server struct {
 // Alternatively, a grpc server can be created normally without this helper as the
 // grpc server generated code is still being exported. This only exists to allow
 // the utility of the helpers
+//
+// Deprecated: prefer to use NewServerWithMiddleware
 func NewFlightServer(auth ServerAuthHandler, opt ...grpc.ServerOption) Server {
 	if auth != nil {
 		opt = append([]grpc.ServerOption{
@@ -77,6 +120,38 @@ func NewFlightServer(auth ServerAuthHandler, opt ...grpc.ServerOption) Server {
 	}
 }
 
+// NewServerWithMiddleware takes a slice of middleware which will be used
+// by grpc and chained, the first middleware will be the outer most with the last
+// middleware being the inner most wrapper around the actual call. It also takes
+// any grpc Server options desired, such as TLS certs and so on which will just
+// be passed through to the underlying grpc server.
+//
+// Alternatively, a grpc server can be created normally without this helper as the
+// grpc server generated code is still being exported. This only exists to allow
+// the utility of the helpers
+func NewServerWithMiddleware(auth ServerAuthHandler, middleware []ServerMiddleware, opts ...grpc.ServerOption) Server {
+	unary := make([]grpc.UnaryServerInterceptor, 0, len(middleware))
+	stream := make([]grpc.StreamServerInterceptor, 0, len(middleware))
+	if auth != nil {
+		unary = append(unary, createServerAuthUnaryInterceptor(auth))
+		stream = append(stream, createServerAuthStreamInterceptor(auth))
+	}
+
+	if len(middleware) > 0 {
+		for _, m := range middleware {
+			if m.Unary != nil {
+				unary = append(unary, m.Unary)
+			}
+			if m.Stream != nil {
+				stream = append(stream, m.Stream)
+			}
+		}
+	}
+	opts = append(opts, grpc.ChainUnaryInterceptor(unary...), grpc.ChainStreamInterceptor(stream...))
+
+	return &server{server: grpc.NewServer(opts...), authHandler: auth}
+}
+
 func (s *server) Init(addr string) (err error) {
 	s.lis, err = net.Listen("tcp", addr)
 	return
diff --git a/go/arrow/flight/server_auth.go b/go/arrow/flight/server_auth.go
index 861e8ac5c41..7bff59517ff 100644
--- a/go/arrow/flight/server_auth.go
+++ b/go/arrow/flight/server_auth.go
@@ -70,12 +70,12 @@ type ServerAuthHandler interface {
 
 type authCtxKey struct{}
 
-type authWrappedStream struct {
+type wrappedStream struct {
 	grpc.ServerStream
 	ctx context.Context
 }
 
-func (a *authWrappedStream) Context() context.Context { return a.ctx }
+func (a *wrappedStream) Context() context.Context { return a.ctx }
 
 // AuthFromContext will return back whatever object was returned from `IsValid` for a
 // given request context allowing handlers to retrieve identifying information
@@ -136,7 +136,7 @@ func createServerAuthStreamInterceptor(auth ServerAuthHandler) grpc.StreamServer
 			return status.Errorf(codes.Unauthenticated, "auth-error: %s", err)
 		}
 
-		stream = &authWrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, peerIdentity)}
+		stream = &wrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, peerIdentity)}
 		return handler(srv, stream)
 	}
 }
@@ -186,7 +186,7 @@ func createServerBearerTokenStreamInterceptor(validator BasicAuthValidator) grpc
 			}
 		}
 
-		if auth == nil || len(auth) == 0 {
+		if len(auth) == 0 {
 			return status.Error(codes.Unauthenticated, "must authenticate first")
 		}
 
@@ -214,12 +214,16 @@ func createServerBearerTokenStreamInterceptor(validator BasicAuthValidator) grpc
 			if err != nil {
 				return err
 			}
-			return handler(srv, &authWrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, identity)})
+			return handler(srv, &wrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, identity)})
 		}
 		return status.Errorf(codes.Unauthenticated, "Only bearer token auth implemented")
 	}
 }
 
+// CreateServerBearerTokenAuthInterceptors returns grpc interceptors for basic auth handling
+// via bearer tokens. validator cannot be nil
+//
+// Deprecated: use CreateServerBasicAuthMiddleware instead
 func CreateServerBearerTokenAuthInterceptors(validator BasicAuthValidator) (grpc.UnaryServerInterceptor, grpc.StreamServerInterceptor) {
 	if validator == nil {
 		panic("validator cannot be nil")
@@ -227,3 +231,19 @@ func CreateServerBearerTokenAuthInterceptors(validator BasicAuthValidator) (grpc
 
 	return createServerBearerTokenUnaryInterceptor(validator), createServerBearerTokenStreamInterceptor(validator)
 }
+
+// CreateServerBasicAuthMiddleware returns a ServerMiddleware that can be passed to NewServerWithMiddleware
+// in order to automatically add interceptors which will properly enforce auth validation
+// as per the passed in BasicAuthValidator.
+//
+// validator cannot be nil.
+func CreateServerBasicAuthMiddleware(validator BasicAuthValidator) ServerMiddleware {
+	if validator == nil {
+		panic("validator cannot be nil")
+	}
+
+	return ServerMiddleware{
+		Unary:  createServerBearerTokenUnaryInterceptor(validator),
+		Stream: createServerBearerTokenStreamInterceptor(validator),
+	}
+}
diff --git a/go/arrow/go.mod b/go/arrow/go.mod
index 5e7915fa194..20f2f427b53 100644
--- a/go/arrow/go.mod
+++ b/go/arrow/go.mod
@@ -16,23 +16,20 @@
 
 module github.com/apache/arrow/go/arrow
 
-go 1.12
+go 1.15
 
 require (
-	github.com/davecgh/go-spew v1.1.0 // indirect
-	github.com/frankban/quicktest v1.11.3 // indirect
-	github.com/golang/protobuf v1.4.2
-	github.com/google/flatbuffers v1.11.0
-	github.com/klauspost/compress v1.11.13
-	github.com/pierrec/lz4 v2.6.0+incompatible
-	github.com/pierrec/lz4/v4 v4.1.4 // indirect
-	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/stretchr/testify v1.2.0
-	golang.org/x/net v0.0.0-20200904194848-62affa334b73 // indirect
-	golang.org/x/sys v0.0.0-20200909081042-eff7692f9009 // indirect
-	golang.org/x/text v0.3.3 // indirect
-	golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543
-	google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f // indirect
-	google.golang.org/grpc v1.32.0
-	google.golang.org/protobuf v1.25.0
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/golang/protobuf v1.5.2
+	github.com/google/flatbuffers v2.0.0+incompatible
+	github.com/google/go-cmp v0.5.6 // indirect
+	github.com/klauspost/compress v1.13.1
+	github.com/pierrec/lz4/v4 v4.1.8
+	github.com/stretchr/testify v1.7.0
+	golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect
+	golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect
+	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
+	google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79 // indirect
+	google.golang.org/grpc v1.39.0
+	google.golang.org/protobuf v1.27.1
 )
diff --git a/go/arrow/go.sum b/go/arrow/go.sum
index 5743321c526..0ac57bae563 100644
--- a/go/arrow/go.sum
+++ b/go/arrow/go.sum
@@ -1,16 +1,23 @@
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
+github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
+github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
-github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
+github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -22,79 +29,117 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU
 github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
-github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
 github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/google/flatbuffers v1.11.0 h1:O7CEyB8Cb3/DmtxODGtLHcEvpr81Jm5qLg/hsHnxA2A=
-github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
+github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/flatbuffers v2.0.0+incompatible h1:dicJ2oXwypfwUGnB2/TYWYEKiuk9eYQlQO/AnOHl5mI=
+github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
-github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4=
-github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
-github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
-github.com/pierrec/lz4 v2.6.0+incompatible h1:Ix9yFKn1nSPBLFl/yZknTp8TU5G4Ps0JDmguYK6iH1A=
-github.com/pierrec/lz4 v2.6.0+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
-github.com/pierrec/lz4/v4 v4.1.4 h1:PjkB+qEooc9nw4F6Pxe/e0xaRdWz3suItXWxWqAO1QE=
-github.com/pierrec/lz4/v4 v4.1.4/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
+github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
+github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
+github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
+github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/stretchr/testify v1.2.0 h1:LThGCOvhuJic9Gyd1VBCkhyUXmO8vKaBFvBsJ2k03rg=
-github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
+github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo=
+golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20200904194848-62affa334b73 h1:MXfv8rhZWmFeqX3GNZRsd6vOLoaCHjYEX3qkRo3YBUA=
-golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200909081042-eff7692f9009 h1:W0lCpv29Hv0UaM1LXb9QlBHLNP8UFfcKjblhVCWftOM=
-golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.1.4 h1:cVngSRcfgyZCzys3KYOpCFa+4dqX/Oub9tAq00ttGVs=
+golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f h1:Yv4xsIx7HZOoyUGSJ2ksDyWE2qIBXROsZKt2ny3hCGM=
-google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79 h1:s1jFTXJryg4a1mew7xv03VZD8N9XjxFhk1o4Js4WvPQ=
+google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
 google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.32.0 h1:zWTV+LMdc3kaiJMSTOFz2UgSBgx8RNQoTGiZu3fR9S0=
-google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
+google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
+google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
+google.golang.org/grpc v1.39.0 h1:Klz8I9kdtkIN6EpHHUOMLCYhTn/2WAe5a0s1hcBkdTI=
+google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -103,8 +148,16 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
 google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
-google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ=
+google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index 3c5154ab2ec..69dc8a1a866 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -25,6 +25,8 @@ import (
 	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/decimal128"
 	"github.com/apache/arrow/go/arrow/float16"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 )
 
@@ -45,6 +47,8 @@ func init() {
 	Records["intervals"] = makeIntervalsRecords()
 	Records["durations"] = makeDurationsRecords()
 	Records["decimal128"] = makeDecimal128sRecords()
+	Records["maps"] = makeMapsRecords()
+	Records["extension"] = makeExtensionRecords()
 
 	for k := range Records {
 		RecordNames = append(RecordNames, k)
@@ -690,6 +694,224 @@ func makeDecimal128sRecords() []array.Record {
 	return recs
 }
 
+func makeMapsRecords() []array.Record {
+	mem := memory.NewGoAllocator()
+	dtype := arrow.MapOf(arrow.PrimitiveTypes.Int32, arrow.BinaryTypes.String)
+	dtype.KeysSorted = true
+	schema := arrow.NewSchema([]arrow.Field{{Name: "map_int_utf8", Type: dtype, Nullable: true}}, nil)
+
+	mask := []bool{true, false, false, true, true}
+	chunks := [][]array.Interface{
+		{
+			mapOf(mem, dtype.KeysSorted, []array.Interface{
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"111", "222", "333", "444", "555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"1111", "1222", "1333", "1444", "1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"2111", "2222", "2333", "2444", "2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"3111", "3222", "3333", "3444", "3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"4111", "4222", "4333", "4444", "4555"}, mask[:5]),
+					},
+				}, nil),
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-111", "-222", "-333", "-444", "-555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-1111", "-1222", "-1333", "-1444", "-1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-2111", "-2222", "-2333", "-2444", "-2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-3111", "-3222", "-3333", "-3444", "-3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-4111", "-4222", "-4333", "-4444", "-4555"}, mask[:5]),
+					},
+				}, nil),
+			}, []bool{true, false, true, true, true}),
+		},
+		{
+			mapOf(mem, dtype.KeysSorted, []array.Interface{
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-111", "-222", "-333", "-444", "-555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-1111", "-1222", "-1333", "-1444", "-1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-2111", "-2222", "-2333", "-2444", "-2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-3111", "-3222", "-3333", "-3444", "-3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-4111", "-4222", "-4333", "-4444", "-4555"}, mask[:5]),
+					},
+				}, nil),
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"111", "222", "333", "444", "555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"1111", "1222", "1333", "1444", "1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"2111", "2222", "2333", "2444", "2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"3111", "3222", "3333", "3444", "3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"4111", "4222", "4333", "4444", "4555"}, mask[:5]),
+					},
+				}, nil),
+			}, []bool{true, false, true, true, true}),
+		},
+	}
+
+	defer func() {
+		for _, chunk := range chunks {
+			for _, col := range chunk {
+				col.Release()
+			}
+		}
+	}()
+
+	recs := make([]array.Record, len(chunks))
+	for i, chunk := range chunks {
+		recs[i] = array.NewRecord(schema, chunk, -1)
+	}
+
+	return recs
+}
+
+func makeExtensionRecords() []array.Record {
+	mem := memory.NewGoAllocator()
+
+	p1Type := types.NewParametric1Type(6)
+	p2Type := types.NewParametric1Type(12)
+	p3Type := types.NewParametric2Type(2)
+	p4Type := types.NewParametric2Type(3)
+	p5Type := types.NewExtStructType()
+
+	arrow.RegisterExtensionType(p1Type)
+	arrow.RegisterExtensionType(p3Type)
+	arrow.RegisterExtensionType(p4Type)
+	arrow.RegisterExtensionType(p5Type)
+
+	meta := arrow.NewMetadata(
+		[]string{"k1", "k2"},
+		[]string{"v1", "v2"},
+	)
+
+	unregisteredMeta := arrow.NewMetadata(
+		append(meta.Keys(), ipc.ExtensionTypeKeyName, ipc.ExtensionMetadataKeyName),
+		append(meta.Values(), "unregistered", ""))
+
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "p1", Type: p1Type, Nullable: true, Metadata: meta},
+			{Name: "p2", Type: p2Type, Nullable: true, Metadata: meta},
+			{Name: "p3", Type: p3Type, Nullable: true, Metadata: meta},
+			{Name: "p4", Type: p4Type, Nullable: true, Metadata: meta},
+			{Name: "p5", Type: p5Type, Nullable: true, Metadata: meta},
+			{Name: "unreg", Type: arrow.PrimitiveTypes.Int8, Nullable: true, Metadata: unregisteredMeta},
+		}, nil)
+
+	mask := []bool{true, false, true, true, false}
+	chunks := [][]array.Interface{
+		{
+			extArray(mem, p1Type, []int32{1, -1, 2, 3, -1}, mask),
+			extArray(mem, p2Type, []int32{2, -1, 3, 4, -1}, mask),
+			extArray(mem, p3Type, []int32{5, -1, 6, 7, 8}, mask),
+			extArray(mem, p4Type, []int32{5, -1, 7, 9, -1}, mask),
+			extArray(mem, p5Type, [][]array.Interface{
+				{
+					arrayOf(mem, []int64{1, -1, 2, 3, -1}, mask),
+					arrayOf(mem, []float64{0.1, -1, 0.2, 0.3, -1}, mask),
+				},
+			}, mask),
+			arrayOf(mem, []int8{-1, -2, -3, -4, -5}, mask),
+		},
+		{
+			extArray(mem, p1Type, []int32{10, -1, 20, 30, -1}, mask),
+			extArray(mem, p2Type, []int32{20, -1, 30, 40, -1}, mask),
+			extArray(mem, p3Type, []int32{50, -1, 60, 70, 8}, mask),
+			extArray(mem, p4Type, []int32{50, -1, 70, 90, -1}, mask),
+			extArray(mem, p5Type, [][]array.Interface{
+				{
+					arrayOf(mem, []int64{10, -1, 20, 30, -1}, mask),
+					arrayOf(mem, []float64{0.01, -1, 0.02, 0.03, -1}, mask),
+				},
+			}, mask),
+			arrayOf(mem, []int8{-11, -12, -13, -14, -15}, mask),
+		},
+	}
+
+	defer func() {
+		for _, chunk := range chunks {
+			for _, col := range chunk {
+				col.Release()
+			}
+		}
+	}()
+
+	recs := make([]array.Record, len(chunks))
+	for i, chunk := range chunks {
+		recs[i] = array.NewRecord(schema, chunk, -1)
+	}
+
+	return recs
+}
+
+func extArray(mem memory.Allocator, dt arrow.ExtensionType, a interface{}, valids []bool) array.Interface {
+	var storage array.Interface
+	switch st := dt.StorageType().(type) {
+	case *arrow.StructType:
+		storage = structOf(mem, st, a.([][]array.Interface), valids)
+	case *arrow.MapType:
+		storage = mapOf(mem, false, a.([]array.Interface), valids)
+	case *arrow.ListType:
+		storage = listOf(mem, a.([]array.Interface), valids)
+	default:
+		storage = arrayOf(mem, a, valids)
+	}
+	defer storage.Release()
+
+	return array.NewExtensionArrayWithStorage(dt, storage)
+}
+
 func arrayOf(mem memory.Allocator, a interface{}, valids []bool) array.Interface {
 	if mem == nil {
 		mem = memory.NewGoAllocator()
@@ -1050,6 +1272,33 @@ func structOf(mem memory.Allocator, dtype *arrow.StructType, fields [][]array.In
 	return bldr.NewStructArray()
 }
 
+func mapOf(mem memory.Allocator, sortedKeys bool, values []array.Interface, valids []bool) *array.Map {
+	if mem == nil {
+		mem = memory.NewGoAllocator()
+	}
+
+	pairType := values[0].DataType().(*arrow.StructType)
+	bldr := array.NewMapBuilder(mem, pairType.Field(0).Type, pairType.Field(1).Type, sortedKeys)
+	defer bldr.Release()
+
+	valid := func(i int) bool {
+		return valids[i]
+	}
+
+	if valids == nil {
+		valid = func(i int) bool { return true }
+	}
+
+	vb := bldr.ValueBuilder()
+	for i, value := range values {
+		bldr.Append(valid(i))
+		buildArray(vb.FieldBuilder(0), value.(*array.Struct).Field(0))
+		buildArray(vb.FieldBuilder(1), value.(*array.Struct).Field(1))
+	}
+
+	return bldr.NewMapArray()
+}
+
 func buildArray(bldr array.Builder, data array.Interface) {
 	defer data.Release()
 
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index a9f44859a76..57e7cb9f084 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -19,286 +19,551 @@
 package arrjson // import "github.com/apache/arrow/go/arrow/internal/arrjson"
 
 import (
+	"bytes"
 	"encoding/hex"
 	"encoding/json"
+	"math/big"
 	"strconv"
 	"strings"
 
 	"github.com/apache/arrow/go/arrow"
 	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/decimal128"
 	"github.com/apache/arrow/go/arrow/float16"
+	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
-const (
-	kData         = "DATA"
-	kDays         = "days"
-	kDayTime      = "DAY_TIME"
-	kDuration     = "duration"
-	kMilliseconds = "milliseconds"
-	kYearMonth    = "YEAR_MONTH"
-)
-
 type Schema struct {
-	Fields []Field `json:"fields"`
+	Fields    []FieldWrapper `json:"fields"`
+	arrowMeta arrow.Metadata `json:"-"`
+	Metadata  []metaKV       `json:"metadata,omitempty"`
 }
 
-type Field struct {
-	Name     string   `json:"name"`
-	Type     dataType `json:"type"`
-	Nullable bool     `json:"nullable"`
-	Children []Field  `json:"children"`
+func (s Schema) MarshalJSON() ([]byte, error) {
+	if s.arrowMeta.Len() > 0 {
+		s.Metadata = make([]metaKV, 0, s.arrowMeta.Len())
+		keys := s.arrowMeta.Keys()
+		vals := s.arrowMeta.Values()
+		for i := range keys {
+			s.Metadata = append(s.Metadata, metaKV{Key: keys[i], Value: vals[i]})
+		}
+	}
+	type alias Schema
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	enc.SetEscapeHTML(false)
+	err := enc.Encode(alias(s))
+	return buf.Bytes(), err
+}
+
+func (s *Schema) UnmarshalJSON(data []byte) error {
+	type Alias Schema
+	aux := &struct {
+		*Alias
+	}{Alias: (*Alias)(s)}
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	var (
+		mdkeys = make([]string, 0)
+		mdvals = make([]string, 0)
+	)
+
+	for _, kv := range s.Metadata {
+		mdkeys = append(mdkeys, kv.Key)
+		mdvals = append(mdvals, kv.Value)
+	}
+
+	if len(s.Metadata) > 0 {
+		s.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+	}
+	return nil
 }
 
-type dataType struct {
-	Name      string `json:"name"`
-	Signed    bool   `json:"isSigned,omitempty"`
-	BitWidth  int    `json:"bitWidth,omitempty"`
-	Precision string `json:"precision,omitempty"`
-	ByteWidth int    `json:"byteWidth,omitempty"`
-	ListSize  int32  `json:"listSize,omitempty"`
-	Unit      string `json:"unit,omitempty"`
-	TimeZone  string `json:"timezone,omitempty"`
-	Scale     int    `json:"scale,omitempty"` // for Decimal128
+// FieldWrapper gets used in order to hook into the JSON marshalling and
+// unmarshalling without creating an infinite loop when dealing with the
+// children fields.
+type FieldWrapper struct {
+	Field
 }
 
-func dtypeToJSON(dt arrow.DataType) dataType {
-	switch dt := dt.(type) {
+type Field struct {
+	Name string `json:"name"`
+	// the arrowType will get populated during unmarshalling by processing the
+	// Type, and will be used to generate the Type during Marshalling to JSON
+	arrowType arrow.DataType `json:"-"`
+	// leave this as a json RawMessage in order to partially unmarshal as needed
+	// during marshal/unmarshal time so we can determine what the structure is
+	// actually expected to be.
+	Type      json.RawMessage `json:"type"`
+	Nullable  bool            `json:"nullable"`
+	Children  []FieldWrapper  `json:"children"`
+	arrowMeta arrow.Metadata  `json:"-"`
+	Metadata  []metaKV        `json:"metadata,omitempty"`
+}
+
+type metaKV struct {
+	Key   string `json:"key"`
+	Value string `json:"value"`
+}
+
+func (f FieldWrapper) MarshalJSON() ([]byte, error) {
+	// for extension types, add the extension type metadata appropriately
+	// and then marshal as normal for the storage type.
+	if f.arrowType.ID() == arrow.EXTENSION {
+		exType := f.arrowType.(arrow.ExtensionType)
+
+		mdkeys := append(f.arrowMeta.Keys(), ipc.ExtensionTypeKeyName)
+		mdvals := append(f.arrowMeta.Values(), exType.ExtensionName())
+
+		serializedData := exType.Serialize()
+		if len(serializedData) > 0 {
+			mdkeys = append(mdkeys, ipc.ExtensionMetadataKeyName)
+			mdvals = append(mdvals, string(serializedData))
+		}
+
+		f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+		f.arrowType = exType.StorageType()
+	}
+
+	var typ interface{}
+	switch dt := f.arrowType.(type) {
 	case *arrow.NullType:
-		return dataType{Name: "null"}
+		typ = nameJSON{"null"}
 	case *arrow.BooleanType:
-		return dataType{Name: "bool"}
+		typ = nameJSON{"bool"}
 	case *arrow.Int8Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 8}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 8}
 	case *arrow.Int16Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 16}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 16}
 	case *arrow.Int32Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 32}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 32}
 	case *arrow.Int64Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 64}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 64}
 	case *arrow.Uint8Type:
-		return dataType{Name: "int", BitWidth: 8}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 8}
 	case *arrow.Uint16Type:
-		return dataType{Name: "int", BitWidth: 16}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 16}
 	case *arrow.Uint32Type:
-		return dataType{Name: "int", BitWidth: 32}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 32}
 	case *arrow.Uint64Type:
-		return dataType{Name: "int", BitWidth: 64}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 64}
 	case *arrow.Float16Type:
-		return dataType{Name: "floatingpoint", Precision: "HALF"}
+		typ = floatJSON{"floatingpoint", "HALF"}
 	case *arrow.Float32Type:
-		return dataType{Name: "floatingpoint", Precision: "SINGLE"}
+		typ = floatJSON{"floatingpoint", "SINGLE"}
 	case *arrow.Float64Type:
-		return dataType{Name: "floatingpoint", Precision: "DOUBLE"}
+		typ = floatJSON{"floatingpoint", "DOUBLE"}
 	case *arrow.BinaryType:
-		return dataType{Name: "binary"}
+		typ = nameJSON{"binary"}
 	case *arrow.StringType:
-		return dataType{Name: "utf8"}
+		typ = nameJSON{"utf8"}
 	case *arrow.Date32Type:
-		return dataType{Name: "date", Unit: "DAY"}
+		typ = unitZoneJSON{Name: "date", Unit: "DAY"}
 	case *arrow.Date64Type:
-		return dataType{Name: "date", Unit: "MILLISECOND"}
-	case *arrow.Time32Type:
+		typ = unitZoneJSON{Name: "date", Unit: "MILLISECOND"}
+	case *arrow.MonthIntervalType:
+		typ = unitZoneJSON{Name: "interval", Unit: "YEAR_MONTH"}
+	case *arrow.DayTimeIntervalType:
+		typ = unitZoneJSON{Name: "interval", Unit: "DAY_TIME"}
+	case *arrow.DurationType:
 		switch dt.Unit {
 		case arrow.Second:
-			return dataType{Name: "time", Unit: "SECOND", BitWidth: dt.BitWidth()}
+			typ = unitZoneJSON{Name: "duration", Unit: "SECOND"}
 		case arrow.Millisecond:
-			return dataType{Name: "time", Unit: "MILLISECOND", BitWidth: dt.BitWidth()}
-		}
-	case *arrow.Time64Type:
-		switch dt.Unit {
+			typ = unitZoneJSON{Name: "duration", Unit: "MILLISECOND"}
 		case arrow.Microsecond:
-			return dataType{Name: "time", Unit: "MICROSECOND", BitWidth: dt.BitWidth()}
+			typ = unitZoneJSON{Name: "duration", Unit: "MICROSECOND"}
 		case arrow.Nanosecond:
-			return dataType{Name: "time", Unit: "NANOSECOND", BitWidth: dt.BitWidth()}
+			typ = unitZoneJSON{Name: "duration", Unit: "NANOSECOND"}
 		}
-	case *arrow.TimestampType:
+	case *arrow.Time32Type:
 		switch dt.Unit {
 		case arrow.Second:
-			return dataType{Name: "timestamp", Unit: "SECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "SECOND"}
 		case arrow.Millisecond:
-			return dataType{Name: "timestamp", Unit: "MILLISECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "MILLISECOND"}
+		}
+	case *arrow.Time64Type:
+		switch dt.Unit {
 		case arrow.Microsecond:
-			return dataType{Name: "timestamp", Unit: "MICROSECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "MICROSECOND"}
 		case arrow.Nanosecond:
-			return dataType{Name: "timestamp", Unit: "NANOSECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "NANOSECOND"}
 		}
-	case *arrow.MonthIntervalType:
-		return dataType{Name: "interval", Unit: "YEAR_MONTH"}
-	case *arrow.DayTimeIntervalType:
-		return dataType{Name: "interval", Unit: "DAY_TIME"}
-	case *arrow.DurationType:
+	case *arrow.TimestampType:
 		switch dt.Unit {
 		case arrow.Second:
-			return dataType{Name: "duration", Unit: "SECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "SECOND", TimeZone: dt.TimeZone}
 		case arrow.Millisecond:
-			return dataType{Name: "duration", Unit: "MILLISECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "MILLISECOND", TimeZone: dt.TimeZone}
 		case arrow.Microsecond:
-			return dataType{Name: "duration", Unit: "MICROSECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "MICROSECOND", TimeZone: dt.TimeZone}
 		case arrow.Nanosecond:
-			return dataType{Name: "duration", Unit: "NANOSECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "NANOSECOND", TimeZone: dt.TimeZone}
 		}
-
 	case *arrow.ListType:
-		return dataType{Name: "list"}
+		typ = nameJSON{"list"}
+	case *arrow.MapType:
+		typ = mapJSON{Name: "map", KeysSorted: dt.KeysSorted}
 	case *arrow.StructType:
-		return dataType{Name: "struct"}
+		typ = nameJSON{"struct"}
 	case *arrow.FixedSizeListType:
-		return dataType{Name: "fixedsizelist", ListSize: dt.Len()}
+		typ = listSizeJSON{"fixedsizelist", dt.Len()}
 	case *arrow.FixedSizeBinaryType:
-		return dataType{
-			Name:      "fixedsizebinary",
-			ByteWidth: dt.ByteWidth,
+		typ = byteWidthJSON{"fixedsizebinary", dt.ByteWidth}
+	case *arrow.Decimal128Type:
+		typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision)}
+	default:
+		return nil, xerrors.Errorf("unknown arrow.DataType %v", f.arrowType)
+	}
+
+	var err error
+	if f.Type, err = json.Marshal(typ); err != nil {
+		return nil, err
+	}
+
+	// if we have metadata then add the key/value pairs to the json
+	if f.arrowMeta.Len() > 0 {
+		f.Metadata = make([]metaKV, 0, f.arrowMeta.Len())
+		for i := 0; i < f.arrowMeta.Len(); i++ {
+			f.Metadata = append(f.Metadata, metaKV{Key: f.arrowMeta.Keys()[i], Value: f.arrowMeta.Values()[i]})
 		}
 	}
-	panic(xerrors.Errorf("unknown arrow.DataType %v", dt))
+
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	enc.SetEscapeHTML(false)
+	err = enc.Encode(f.Field)
+	return buf.Bytes(), err
 }
 
-func dtypeFromJSON(dt dataType, children []Field) arrow.DataType {
-	switch dt.Name {
+func (f *FieldWrapper) UnmarshalJSON(data []byte) error {
+	if err := json.Unmarshal(data, &f.Field); err != nil {
+		return err
+	}
+
+	tmp := nameJSON{}
+	if err := json.Unmarshal(f.Type, &tmp); err != nil {
+		return err
+	}
+
+	switch tmp.Name {
 	case "null":
-		return arrow.Null
+		f.arrowType = arrow.Null
 	case "bool":
-		return arrow.FixedWidthTypes.Boolean
+		f.arrowType = arrow.FixedWidthTypes.Boolean
 	case "int":
-		switch dt.Signed {
+		t := bitWidthJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Signed {
 		case true:
-			switch dt.BitWidth {
+			switch t.BitWidth {
 			case 8:
-				return arrow.PrimitiveTypes.Int8
+				f.arrowType = arrow.PrimitiveTypes.Int8
 			case 16:
-				return arrow.PrimitiveTypes.Int16
+				f.arrowType = arrow.PrimitiveTypes.Int16
 			case 32:
-				return arrow.PrimitiveTypes.Int32
+				f.arrowType = arrow.PrimitiveTypes.Int32
 			case 64:
-				return arrow.PrimitiveTypes.Int64
+				f.arrowType = arrow.PrimitiveTypes.Int64
 			}
 		default:
-			switch dt.BitWidth {
+			switch t.BitWidth {
 			case 8:
-				return arrow.PrimitiveTypes.Uint8
+				f.arrowType = arrow.PrimitiveTypes.Uint8
 			case 16:
-				return arrow.PrimitiveTypes.Uint16
+				f.arrowType = arrow.PrimitiveTypes.Uint16
 			case 32:
-				return arrow.PrimitiveTypes.Uint32
+				f.arrowType = arrow.PrimitiveTypes.Uint32
 			case 64:
-				return arrow.PrimitiveTypes.Uint64
+				f.arrowType = arrow.PrimitiveTypes.Uint64
 			}
 		}
 	case "floatingpoint":
-		switch dt.Precision {
+		t := floatJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Precision {
 		case "HALF":
-			return arrow.FixedWidthTypes.Float16
+			f.arrowType = arrow.FixedWidthTypes.Float16
 		case "SINGLE":
-			return arrow.PrimitiveTypes.Float32
+			f.arrowType = arrow.PrimitiveTypes.Float32
 		case "DOUBLE":
-			return arrow.PrimitiveTypes.Float64
+			f.arrowType = arrow.PrimitiveTypes.Float64
 		}
 	case "binary":
-		return arrow.BinaryTypes.Binary
+		f.arrowType = arrow.BinaryTypes.Binary
 	case "utf8":
-		return arrow.BinaryTypes.String
+		f.arrowType = arrow.BinaryTypes.String
 	case "date":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Unit {
 		case "DAY":
-			return arrow.FixedWidthTypes.Date32
+			f.arrowType = arrow.FixedWidthTypes.Date32
 		case "MILLISECOND":
-			return arrow.FixedWidthTypes.Date64
+			f.arrowType = arrow.FixedWidthTypes.Date64
 		}
 	case "time":
-		switch dt.BitWidth {
+		t := bitWidthJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.BitWidth {
 		case 32:
-			switch dt.Unit {
+			switch t.Unit {
 			case "SECOND":
-				return arrow.FixedWidthTypes.Time32s
+				f.arrowType = arrow.FixedWidthTypes.Time32s
 			case "MILLISECOND":
-				return arrow.FixedWidthTypes.Time32ms
+				f.arrowType = arrow.FixedWidthTypes.Time32ms
 			}
 		case 64:
-			switch dt.Unit {
+			switch t.Unit {
 			case "MICROSECOND":
-				return arrow.FixedWidthTypes.Time64us
+				f.arrowType = arrow.FixedWidthTypes.Time64us
 			case "NANOSECOND":
-				return arrow.FixedWidthTypes.Time64ns
+				f.arrowType = arrow.FixedWidthTypes.Time64ns
 			}
 		}
 	case "timestamp":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = &arrow.TimestampType{TimeZone: t.TimeZone}
+		switch t.Unit {
 		case "SECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Second}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Second
 		case "MILLISECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Millisecond}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Millisecond
 		case "MICROSECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Microsecond}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Microsecond
 		case "NANOSECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Nanosecond}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Nanosecond
 		}
 	case "list":
-		return arrow.ListOf(dtypeFromJSON(children[0].Type, nil))
+		f.arrowType = arrow.ListOf(f.Children[0].arrowType)
+		f.arrowType.(*arrow.ListType).Meta = f.Children[0].arrowMeta
+
+	case "map":
+		t := mapJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		pairType := f.Children[0].arrowType
+		f.arrowType = arrow.MapOf(pairType.(*arrow.StructType).Field(0).Type, pairType.(*arrow.StructType).Field(1).Type)
+		f.arrowType.(*arrow.MapType).KeysSorted = t.KeysSorted
 	case "struct":
-		return arrow.StructOf(fieldsFromJSON(children)...)
+		f.arrowType = arrow.StructOf(fieldsFromJSON(f.Children)...)
 	case "fixedsizebinary":
-		return &arrow.FixedSizeBinaryType{ByteWidth: dt.ByteWidth}
+		t := byteWidthJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = &arrow.FixedSizeBinaryType{ByteWidth: t.ByteWidth}
 	case "fixedsizelist":
-		return arrow.FixedSizeListOf(dt.ListSize, dtypeFromJSON(children[0].Type, nil))
+		t := listSizeJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = arrow.FixedSizeListOf(t.ListSize, f.Children[0].arrowType)
 	case "interval":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Unit {
 		case "YEAR_MONTH":
-			return arrow.FixedWidthTypes.MonthInterval
+			f.arrowType = arrow.FixedWidthTypes.MonthInterval
 		case "DAY_TIME":
-			return arrow.FixedWidthTypes.DayTimeInterval
+			f.arrowType = arrow.FixedWidthTypes.DayTimeInterval
 		}
 	case "duration":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Unit {
 		case "SECOND":
-			return arrow.FixedWidthTypes.Duration_s
+			f.arrowType = arrow.FixedWidthTypes.Duration_s
 		case "MILLISECOND":
-			return arrow.FixedWidthTypes.Duration_ms
+			f.arrowType = arrow.FixedWidthTypes.Duration_ms
 		case "MICROSECOND":
-			return arrow.FixedWidthTypes.Duration_us
+			f.arrowType = arrow.FixedWidthTypes.Duration_us
 		case "NANOSECOND":
-			return arrow.FixedWidthTypes.Duration_ns
+			f.arrowType = arrow.FixedWidthTypes.Duration_ns
 		}
+	case "decimal":
+		t := decimalJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)}
 	}
-	panic(xerrors.Errorf("unknown DataType %#v", dt))
+
+	if f.arrowType == nil {
+		return xerrors.Errorf("unhandled type unmarshalling from json: %s", tmp.Name)
+	}
+
+	var err error
+	if len(f.Metadata) > 0 { // unmarshal the key/value metadata pairs
+		var (
+			mdkeys         = make([]string, 0, len(f.Metadata))
+			mdvals         = make([]string, 0, len(f.Metadata))
+			extKeyIdx  int = -1
+			extDataIdx int = -1
+		)
+
+		for i, kv := range f.Metadata {
+			switch kv.Key {
+			case ipc.ExtensionTypeKeyName:
+				extKeyIdx = i
+			case ipc.ExtensionMetadataKeyName:
+				extDataIdx = i
+			}
+			mdkeys = append(mdkeys, kv.Key)
+			mdvals = append(mdvals, kv.Value)
+		}
+
+		if extKeyIdx == -1 { // no extension metadata just create the metadata
+			f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+			return nil
+		}
+
+		extType := arrow.GetExtensionType(mdvals[extKeyIdx])
+		if extType == nil { // unregistered extension type, just keep the metadata
+			f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+			return nil
+		}
+
+		var extData string
+		if extDataIdx > -1 {
+			extData = mdvals[extDataIdx]
+			// if both extension type and extension type metadata exist
+			// filter out both keys
+			newkeys := make([]string, 0, len(mdkeys)-2)
+			newvals := make([]string, 0, len(mdvals)-2)
+			for i := range mdkeys {
+				if i != extKeyIdx && i != extDataIdx {
+					newkeys = append(newkeys, mdkeys[i])
+					newvals = append(newvals, mdvals[i])
+				}
+			}
+			mdkeys = newkeys
+			mdvals = newvals
+		} else {
+			// if only extension type key is present, we can simplify filtering it out
+			mdkeys = append(mdkeys[:extKeyIdx], mdkeys[extKeyIdx+1:]...)
+			mdvals = append(mdvals[:extKeyIdx], mdvals[extKeyIdx+1:]...)
+		}
+
+		if f.arrowType, err = extType.Deserialize(f.arrowType, extData); err != nil {
+			return err
+		}
+
+		f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+	}
+
+	return err
+}
+
+// the structs below represent various configurations of the Type
+// json block and what fields will be expected. Sometimes there is
+// overlap between the same key used with different types, so it's
+// easier to partial unmarshal and then use these to ensure correct
+// typing.
+
+type nameJSON struct {
+	Name string `json:"name"`
+}
+
+type listSizeJSON struct {
+	Name     string `json:"name"`
+	ListSize int32  `json:"listSize,omitempty"`
+}
+
+type bitWidthJSON struct {
+	Name     string `json:"name"`
+	Signed   bool   `json:"isSigned,omitempty"`
+	BitWidth int    `json:"bitWidth,omitempty"`
+	Unit     string `json:"unit,omitempty"`
+}
+
+type floatJSON struct {
+	Name      string `json:"name"`
+	Precision string `json:"precision,omitempty"`
+}
+
+type unitZoneJSON struct {
+	Name     string `json:"name"`
+	Unit     string `json:"unit,omitempty"`
+	TimeZone string `json:"timezone,omitempty"`
+}
+
+type decimalJSON struct {
+	Name      string `json:"name"`
+	Scale     int    `json:"scale,omitempty"`
+	Precision int    `json:"precision,omitempty"`
+}
+
+type byteWidthJSON struct {
+	Name      string `json:"name"`
+	ByteWidth int    `json:"byteWidth,omitempty"`
+}
+
+type mapJSON struct {
+	Name       string `json:"name"`
+	KeysSorted bool   `json:"keysSorted,omitempty"`
 }
 
 func schemaToJSON(schema *arrow.Schema) Schema {
 	return Schema{
-		Fields: fieldsToJSON(schema.Fields()),
+		Fields:    fieldsToJSON(schema.Fields()),
+		arrowMeta: schema.Metadata(),
 	}
 }
 
 func schemaFromJSON(schema Schema) *arrow.Schema {
-	return arrow.NewSchema(fieldsFromJSON(schema.Fields), nil)
+	return arrow.NewSchema(fieldsFromJSON(schema.Fields), &schema.arrowMeta)
 }
 
-func fieldsToJSON(fields []arrow.Field) []Field {
-	o := make([]Field, len(fields))
+func fieldsToJSON(fields []arrow.Field) []FieldWrapper {
+	o := make([]FieldWrapper, len(fields))
 	for i, f := range fields {
-		o[i] = Field{
-			Name:     f.Name,
-			Type:     dtypeToJSON(f.Type),
-			Nullable: f.Nullable,
-			Children: []Field{},
-		}
+		o[i] = FieldWrapper{Field{
+			Name:      f.Name,
+			arrowType: f.Type,
+			Nullable:  f.Nullable,
+			Children:  []FieldWrapper{},
+			arrowMeta: f.Metadata,
+		}}
 		switch dt := f.Type.(type) {
 		case *arrow.ListType:
-			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable}})
+			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable, Metadata: dt.Meta}})
 		case *arrow.FixedSizeListType:
 			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable}})
 		case *arrow.StructType:
 			o[i].Children = fieldsToJSON(dt.Fields())
+		case *arrow.MapType:
+			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "entries", Type: dt.ValueType()}})
 		}
 	}
 	return o
 }
 
-func fieldsFromJSON(fields []Field) []arrow.Field {
+func fieldsFromJSON(fields []FieldWrapper) []arrow.Field {
 	vs := make([]arrow.Field, len(fields))
 	for i, v := range fields {
-		vs[i] = fieldFromJSON(v)
+		vs[i] = fieldFromJSON(v.Field)
 	}
 	return vs
 }
@@ -306,8 +571,9 @@ func fieldsFromJSON(fields []Field) []arrow.Field {
 func fieldFromJSON(f Field) arrow.Field {
 	return arrow.Field{
 		Name:     f.Name,
-		Type:     dtypeFromJSON(f.Type, f.Children),
+		Type:     f.arrowType,
 		Nullable: f.Nullable,
+		Metadata: f.arrowMeta,
 	}
 }
 
@@ -553,6 +819,25 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) array.Int
 		bldr.AppendValues(data, valids)
 		return bldr.NewArray()
 
+	case *arrow.MapType:
+		bldr := array.NewMapBuilder(mem, dt.KeyType(), dt.ItemType(), dt.KeysSorted)
+		defer bldr.Release()
+		valids := validsFromJSON(arr.Valids)
+		pairs := arrayFromJSON(mem, dt.ValueType(), arr.Children[0])
+		defer pairs.Release()
+		for i, v := range valids {
+			bldr.Append(v)
+			beg := int64(arr.Offset[i])
+			end := int64(arr.Offset[i+1])
+			slice := array.NewSlice(pairs, beg, end).(*array.Struct)
+			kb := bldr.KeyBuilder()
+			buildArray(kb, slice.Field(0))
+			ib := bldr.ItemBuilder()
+			buildArray(ib, slice.Field(1))
+			slice.Release()
+		}
+		return bldr.NewArray()
+
 	case *arrow.Date32Type:
 		bldr := array.NewDate32Builder(mem)
 		defer bldr.Release()
@@ -617,6 +902,19 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) array.Int
 		bldr.AppendValues(data, valids)
 		return bldr.NewArray()
 
+	case *arrow.Decimal128Type:
+		bldr := array.NewDecimal128Builder(mem, dt)
+		defer bldr.Release()
+		data := decimal128FromJSON(arr.Data)
+		valids := validsFromJSON(arr.Valids)
+		bldr.AppendValues(data, valids)
+		return bldr.NewArray()
+
+	case arrow.ExtensionType:
+		storage := arrayFromJSON(mem, dt.StorageType(), arr)
+		defer storage.Release()
+		return array.NewExtensionArrayWithStorage(dt, storage)
+
 	default:
 		panic(xerrors.Errorf("unknown data type %v %T", dt, dt))
 	}
@@ -756,6 +1054,18 @@ func arrayToJSON(field arrow.Field, arr array.Interface) Array {
 		}
 		return o
 
+	case *array.Map:
+		o := Array{
+			Name:   field.Name,
+			Count:  arr.Len(),
+			Valids: validsToJSON(arr),
+			Offset: arr.Offsets(),
+			Children: []Array{
+				arrayToJSON(arrow.Field{Name: "entries", Type: arr.DataType().(*arrow.MapType).ValueType()}, arr.ListValues()),
+			},
+		}
+		return o
+
 	case *array.FixedSizeList:
 		o := Array{
 			Name:   field.Name,
@@ -858,6 +1168,17 @@ func arrayToJSON(field arrow.Field, arr array.Interface) Array {
 			Valids: validsToJSON(arr),
 		}
 
+	case *array.Decimal128:
+		return Array{
+			Name:   field.Name,
+			Count:  arr.Len(),
+			Data:   decimal128ToJSON(arr),
+			Valids: validsToJSON(arr),
+		}
+
+	case array.ExtensionArray:
+		return arrayToJSON(field, arr.Storage())
+
 	default:
 		panic(xerrors.Errorf("unknown array type %T", arr))
 	}
@@ -1128,6 +1449,27 @@ func f64ToJSON(arr *array.Float64) []interface{} {
 	return o
 }
 
+func decimal128ToJSON(arr *array.Decimal128) []interface{} {
+	o := make([]interface{}, arr.Len())
+	for i := range o {
+		o[i] = arr.Value(i).BigInt().String()
+	}
+	return o
+}
+
+func decimal128FromJSON(vs []interface{}) []decimal128.Num {
+	var tmp big.Int
+	o := make([]decimal128.Num, len(vs))
+	for i, v := range vs {
+		if err := tmp.UnmarshalJSON([]byte(v.(string))); err != nil {
+			panic(xerrors.Errorf("could not convert %v (%T) to decimal128: %w", v, v, err))
+		}
+
+		o[i] = decimal128.FromBigInt(&tmp)
+	}
+	return o
+}
+
 func strFromJSON(vs []interface{}) []string {
 	o := make([]string, len(vs))
 	for i, v := range vs {
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index 173db12b401..6ffbfa98fca 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -20,6 +20,7 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
+	"strings"
 	"testing"
 
 	"github.com/apache/arrow/go/arrow/array"
@@ -40,6 +41,8 @@ func TestReadWrite(t *testing.T) {
 	wantJSONs["intervals"] = makeIntervalsWantJSONs()
 	wantJSONs["durations"] = makeDurationsWantJSONs()
 	wantJSONs["decimal128"] = makeDecimal128sWantJSONs()
+	wantJSONs["maps"] = makeMapsWantJSONs()
+	wantJSONs["extension"] = makeExtensionsWantJSONs()
 
 	tempDir, err := ioutil.TempDir("", "go-arrow-read-write-")
 	if err != nil {
@@ -49,9 +52,6 @@ func TestReadWrite(t *testing.T) {
 
 	for name, recs := range arrdata.Records {
 		t.Run(name, func(t *testing.T) {
-			if name == "decimal128" {
-				t.Skip() // FIXME(sbinet): implement full decimal128 support
-			}
 			mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
 			defer mem.AssertSize(t, 0)
 
@@ -85,7 +85,7 @@ func TestReadWrite(t *testing.T) {
 			}
 
 			fileBytes, _ := ioutil.ReadFile(f.Name())
-			if wantJSONs[name] != string(fileBytes) {
+			if wantJSONs[name] != strings.TrimSpace(string(fileBytes)) {
 				t.Fatalf("not expected JSON pretty output for case: %v", name)
 			}
 
@@ -147,6 +147,20 @@ func makeNullWantJSONs() string {
         "nullable": true,
         "children": []
       }
+    ],
+    "metadata": [
+      {
+        "key": "k1",
+        "value": "v1"
+      },
+      {
+        "key": "k2",
+        "value": "v2"
+      },
+      {
+        "key": "k3",
+        "value": "v3"
+      }
     ]
   },
   "batches": [
@@ -287,6 +301,20 @@ func makePrimitiveWantJSONs() string {
         "nullable": true,
         "children": []
       }
+    ],
+    "metadata": [
+      {
+        "key": "k1",
+        "value": "v1"
+      },
+      {
+        "key": "k2",
+        "value": "v2"
+      },
+      {
+        "key": "k3",
+        "value": "v3"
+      }
     ]
   },
   "batches": [
@@ -3100,5 +3128,1192 @@ func makeDurationsWantJSONs() string {
 }
 
 func makeDecimal128sWantJSONs() string {
-	return `` // FIXME(fredgan): implement full decimal128 JSON support
-}
\ No newline at end of file
+	return `{
+  "schema": {
+    "fields": [
+      {
+        "name": "dec128s",
+        "type": {
+          "name": "decimal",
+          "scale": 1,
+          "precision": 10
+        },
+        "nullable": true,
+        "children": []
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "dec128s",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            "571849066284996100127",
+            "590295810358705651744",
+            "608742554432415203361",
+            "627189298506124754978",
+            "645636042579834306595"
+          ]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "dec128s",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            "756316507022091616297",
+            "774763251095801167914",
+            "793209995169510719531",
+            "811656739243220271148",
+            "830103483316929822765"
+          ]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "dec128s",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            "940783947759187132467",
+            "959230691832896684084",
+            "977677435906606235701",
+            "996124179980315787318",
+            "1014570924054025338935"
+          ]
+        }
+      ]
+    }
+  ]
+}`
+}
+
+func makeMapsWantJSONs() string {
+	return `{
+  "schema": {
+    "fields": [
+      {
+        "name": "map_int_utf8",
+        "type": {
+          "name": "map",
+          "keysSorted": true
+        },
+        "nullable": true,
+        "children": [
+          {
+            "name": "entries",
+            "type": {
+              "name": "struct"
+            },
+            "nullable": false,
+            "children": [
+              {
+                "name": "key",
+                "type": {
+                  "name": "int",
+                  "isSigned": true,
+                  "bitWidth": 32
+                },
+                "nullable": false,
+                "children": []
+              },
+              {
+                "name": "value",
+                "type": {
+                  "name": "utf8"
+                },
+                "nullable": true,
+                "children": []
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 2,
+      "columns": [
+        {
+          "name": "map_int_utf8",
+          "count": 2,
+          "VALIDITY": [
+            1,
+            0
+          ],
+          "OFFSET": [
+            0,
+            25,
+            50
+          ],
+          "children": [
+            {
+              "name": "entries",
+              "count": 50,
+              "VALIDITY": [
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1
+              ],
+              "children": [
+                {
+                  "name": "key",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5
+                  ]
+                },
+                {
+                  "name": "value",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    "111",
+                    "",
+                    "",
+                    "444",
+                    "555",
+                    "1111",
+                    "",
+                    "",
+                    "1444",
+                    "1555",
+                    "2111",
+                    "",
+                    "",
+                    "2444",
+                    "2555",
+                    "3111",
+                    "",
+                    "",
+                    "3444",
+                    "3555",
+                    "4111",
+                    "",
+                    "",
+                    "4444",
+                    "4555",
+                    "-111",
+                    "",
+                    "",
+                    "-444",
+                    "-555",
+                    "-1111",
+                    "",
+                    "",
+                    "-1444",
+                    "-1555",
+                    "-2111",
+                    "",
+                    "",
+                    "-2444",
+                    "-2555",
+                    "-3111",
+                    "",
+                    "",
+                    "-3444",
+                    "-3555",
+                    "-4111",
+                    "",
+                    "",
+                    "-4444",
+                    "-4555"
+                  ]
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "count": 2,
+      "columns": [
+        {
+          "name": "map_int_utf8",
+          "count": 2,
+          "VALIDITY": [
+            1,
+            0
+          ],
+          "OFFSET": [
+            0,
+            25,
+            50
+          ],
+          "children": [
+            {
+              "name": "entries",
+              "count": 50,
+              "VALIDITY": [
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1
+              ],
+              "children": [
+                {
+                  "name": "key",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5
+                  ]
+                },
+                {
+                  "name": "value",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    "-111",
+                    "",
+                    "",
+                    "-444",
+                    "-555",
+                    "-1111",
+                    "",
+                    "",
+                    "-1444",
+                    "-1555",
+                    "-2111",
+                    "",
+                    "",
+                    "-2444",
+                    "-2555",
+                    "-3111",
+                    "",
+                    "",
+                    "-3444",
+                    "-3555",
+                    "-4111",
+                    "",
+                    "",
+                    "-4444",
+                    "-4555",
+                    "111",
+                    "",
+                    "",
+                    "444",
+                    "555",
+                    "1111",
+                    "",
+                    "",
+                    "1444",
+                    "1555",
+                    "2111",
+                    "",
+                    "",
+                    "2444",
+                    "2555",
+                    "3111",
+                    "",
+                    "",
+                    "3444",
+                    "3555",
+                    "4111",
+                    "",
+                    "",
+                    "4444",
+                    "4555"
+                  ]
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}`
+}
+
+func makeExtensionsWantJSONs() string {
+	return `{
+  "schema": {
+    "fields": [
+      {
+        "name": "p1",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-1"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u0006\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p2",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-1"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u000c\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p3",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-2<param=2>"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u0002\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p4",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-2<param=3>"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u0003\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p5",
+        "type": {
+          "name": "struct"
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "ext-struct-type"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "ext-struct-type-unique-code"
+          }
+        ]
+      },
+      {
+        "name": "unreg",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 8
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "unregistered"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": ""
+          }
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "p1",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            1,
+            -1,
+            2,
+            3,
+            -1
+          ]
+        },
+        {
+          "name": "p2",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            2,
+            -1,
+            3,
+            4,
+            -1
+          ]
+        },
+        {
+          "name": "p3",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            5,
+            -1,
+            6,
+            7,
+            8
+          ]
+        },
+        {
+          "name": "p4",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            5,
+            -1,
+            7,
+            9,
+            -1
+          ]
+        },
+        {
+          "name": "p5",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "a",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                "1",
+                "0",
+                "2",
+                "3",
+                "0"
+              ]
+            },
+            {
+              "name": "b",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                0.1,
+                0,
+                0.2,
+                0.3,
+                0
+              ]
+            }
+          ]
+        },
+        {
+          "name": "unreg",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            -1,
+            -2,
+            -3,
+            -4,
+            -5
+          ]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "p1",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            10,
+            -1,
+            20,
+            30,
+            -1
+          ]
+        },
+        {
+          "name": "p2",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            20,
+            -1,
+            30,
+            40,
+            -1
+          ]
+        },
+        {
+          "name": "p3",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            50,
+            -1,
+            60,
+            70,
+            8
+          ]
+        },
+        {
+          "name": "p4",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            50,
+            -1,
+            70,
+            90,
+            -1
+          ]
+        },
+        {
+          "name": "p5",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "a",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                "10",
+                "0",
+                "20",
+                "30",
+                "0"
+              ]
+            },
+            {
+              "name": "b",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                0.01,
+                0,
+                0.02,
+                0.03,
+                0
+              ]
+            }
+          ]
+        },
+        {
+          "name": "unreg",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            -11,
+            -12,
+            -13,
+            -14,
+            -15
+          ]
+        }
+      ]
+    }
+  ]
+}`
+}
diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go
index cb953286db3..34c49c14f43 100644
--- a/go/arrow/internal/arrjson/reader.go
+++ b/go/arrow/internal/arrjson/reader.go
@@ -39,10 +39,7 @@ type Reader struct {
 func NewReader(r io.Reader, opts ...Option) (*Reader, error) {
 	dec := json.NewDecoder(r)
 	dec.UseNumber()
-	var raw struct {
-		Schema  Schema   `json:"schema"`
-		Records []Record `json:"batches"`
-	}
+	var raw rawJSON
 	err := dec.Decode(&raw)
 	if err != nil {
 		return nil, err
diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go
index 72fab46bc48..f5dd00c8f4c 100644
--- a/go/arrow/internal/arrjson/writer.go
+++ b/go/arrow/internal/arrjson/writer.go
@@ -31,84 +31,45 @@ const (
 	jsonRecPrefix = "    "
 )
 
+type rawJSON struct {
+	Schema  Schema   `json:"schema"`
+	Records []Record `json:"batches"`
+}
+
 type Writer struct {
 	w io.Writer
 
-	schema *arrow.Schema
-	nrecs  int64
+	nrecs int64
+	raw   rawJSON
 }
 
 func NewWriter(w io.Writer, schema *arrow.Schema) (*Writer, error) {
 	ww := &Writer{
-		w:      w,
-		schema: schema,
-	}
-	_, err := ww.w.Write([]byte("{\n"))
-	if err != nil {
-		return nil, err
-	}
-
-	err = ww.writeSchema()
-	if err != nil {
-		return nil, err
+		w: w,
 	}
+	ww.raw.Schema = schemaToJSON(schema)
+	ww.raw.Records = make([]Record, 0)
 	return ww, nil
 }
 
 func (w *Writer) Write(rec array.Record) error {
-	switch {
-	case w.nrecs == 0:
-		_, err := w.w.Write([]byte(",\n" + jsonPrefix + `"batches": [` + "\n" + jsonRecPrefix))
-		if err != nil {
-			return err
-		}
-	case w.nrecs > 0:
-		_, err := w.w.Write([]byte(",\n" + jsonRecPrefix))
-		if err != nil {
-			return err
-		}
-	}
-
-	raw, err := json.MarshalIndent(recordToJSON(rec), jsonRecPrefix, jsonIndent)
-	if err != nil {
-		return err
-	}
-
-	_, err = w.w.Write(raw)
-	if err != nil {
-		return err
-	}
-
+	w.raw.Records = append(w.raw.Records, recordToJSON(rec))
 	w.nrecs++
 	return nil
 }
 
-func (w *Writer) writeSchema() error {
-	_, err := w.w.Write([]byte(`  "schema": `))
-	if err != nil {
-		return err
-	}
-	raw, err := json.MarshalIndent(schemaToJSON(w.schema), jsonPrefix, jsonIndent)
-	if err != nil {
-		return err
-	}
-	_, err = w.w.Write(raw)
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
 func (w *Writer) Close() error {
 	if w.w == nil {
 		return nil
 	}
-	_, err := w.w.Write([]byte("\n  ]\n}"))
-	if err == nil {
-		w.w = nil
-	}
-	return err
+
+	enc := json.NewEncoder(w.w)
+	enc.SetIndent("", jsonIndent)
+	// ensure that we don't convert <, >, !, etc. to their unicode equivalents
+	// in the output json since we're not using this in an HTML context so that
+	// we can make sure that the json files match.
+	enc.SetEscapeHTML(false)
+	return enc.Encode(w.raw)
 }
 
 var (
diff --git a/go/arrow/internal/cpu/cpu_arm64.go b/go/arrow/internal/cpu/cpu_arm64.go
new file mode 100644
index 00000000000..179a03e53d2
--- /dev/null
+++ b/go/arrow/internal/cpu/cpu_arm64.go
@@ -0,0 +1,7 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLineSize = 64
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
new file mode 100644
index 00000000000..b0c0bfbd98f
--- /dev/null
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Client for use with Arrow Flight Integration tests via archery
+package main
+
+import (
+	"flag"
+	"fmt"
+	"time"
+
+	"github.com/apache/arrow/go/arrow/internal/flight_integration"
+	"google.golang.org/grpc"
+)
+
+var (
+	host     = flag.String("host", "localhost", "Server host to connect to")
+	port     = flag.Int("port", 31337, "Server port to connect to")
+	path     = flag.String("path", "", "Resource path to request")
+	scenario = flag.String("scenario", "", "Integration test scenario to run")
+)
+
+const retries = 3
+
+func main() {
+	flag.Parse()
+
+	c := flight_integration.GetScenario(*scenario, *path)
+	var err error
+	for i := 0; i < retries; i++ {
+		err = c.RunClient(fmt.Sprintf("%s:%d", *host, *port), grpc.WithInsecure())
+		if err == nil {
+			break
+		}
+		time.Sleep(time.Duration(i+1) * 500 * time.Millisecond)
+	}
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
new file mode 100644
index 00000000000..7384a74077f
--- /dev/null
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"net"
+	"os"
+	"syscall"
+
+	"github.com/apache/arrow/go/arrow/internal/flight_integration"
+)
+
+var (
+	port     = flag.Int("port", 31337, "Server port to listen on")
+	scenario = flag.String("scenario", "", "Integration test scenario to run")
+)
+
+func main() {
+	flag.Parse()
+
+	s := flight_integration.GetScenario(*scenario)
+	srv := s.MakeServer(*port)
+	srv.Init(fmt.Sprintf("0.0.0.0:%d", *port))
+	srv.SetShutdownOnSignals(syscall.SIGTERM, os.Interrupt)
+	_, p, _ := net.SplitHostPort(srv.Addr().String())
+	fmt.Printf("Server listening on localhost:%s\n", p)
+	srv.Serve()
+}
diff --git a/go/arrow/internal/flight_integration/middleware.go b/go/arrow/internal/flight_integration/middleware.go
new file mode 100644
index 00000000000..073b2843953
--- /dev/null
+++ b/go/arrow/internal/flight_integration/middleware.go
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flight_integration
+
+import (
+	"context"
+
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+)
+
+type testServerMiddleware struct{}
+
+func (testServerMiddleware) StartCall(ctx context.Context) context.Context {
+	var val string
+
+	md, ok := metadata.FromIncomingContext(ctx)
+	if ok {
+		received := md.Get("x-middleware")
+		if len(received) > 0 {
+			val = received[0]
+		}
+	}
+
+	grpc.SetHeader(ctx, metadata.Pairs("x-middleware", val))
+	return nil
+}
+
+func (testServerMiddleware) CallCompleted(_ context.Context, _ error) {}
+
+type testClientMiddleware struct {
+	received string
+}
+
+func (tm *testClientMiddleware) StartCall(ctx context.Context) context.Context {
+	return metadata.AppendToOutgoingContext(ctx, "x-middleware", "expected value")
+}
+
+func (tm *testClientMiddleware) HeadersReceived(_ context.Context, md metadata.MD) {
+	received := md.Get("x-middleware")
+	if len(received) > 0 {
+		tm.received = received[0]
+	}
+}
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
new file mode 100644
index 00000000000..edafe0564ab
--- /dev/null
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -0,0 +1,497 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flight_integration
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/flight"
+	"github.com/apache/arrow/go/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/ipc"
+	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/proto"
+)
+
+type Scenario interface {
+	MakeServer(port int) flight.Server
+	RunClient(addr string, opts ...grpc.DialOption) error
+}
+
+func GetScenario(name string, args ...string) Scenario {
+	switch name {
+	case "auth:basic_proto":
+		return &authBasicProtoTester{}
+	case "middleware":
+		return &middlewareScenarioTester{}
+	case "":
+		if len(args) > 0 {
+			return &defaultIntegrationTester{path: args[0]}
+		}
+		return &defaultIntegrationTester{}
+	}
+	panic(fmt.Errorf("scenario not found: %s", name))
+}
+
+type integrationDataSet struct {
+	schema *arrow.Schema
+	chunks []array.Record
+}
+
+func consumeFlightLocation(ctx context.Context, loc *flight.Location, tkt *flight.Ticket, orig []array.Record, opts ...grpc.DialOption) error {
+	client, err := flight.NewClientWithMiddleware(loc.GetUri(), nil, nil, opts...)
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+
+	stream, err := client.DoGet(ctx, tkt)
+	if err != nil {
+		return err
+	}
+
+	rdr, err := flight.NewRecordReader(stream)
+	if err != nil {
+		return err
+	}
+	defer rdr.Release()
+
+	for i, chunk := range orig {
+		if !rdr.Next() {
+			return xerrors.Errorf("got fewer batches than expected, received so far: %d, expected: %d", i, len(orig))
+		}
+
+		if !array.RecordEqual(chunk, rdr.Record()) {
+			return xerrors.Errorf("batch %d doesn't match", i)
+		}
+
+		if string(rdr.LatestAppMetadata()) != strconv.Itoa(i) {
+			return xerrors.Errorf("expected metadata value: %s, but got: %s", strconv.Itoa(i), string(rdr.LatestAppMetadata()))
+		}
+	}
+
+	if rdr.Next() {
+		return xerrors.Errorf("got more batches than the expected: %d", len(orig))
+	}
+
+	return nil
+}
+
+type defaultIntegrationTester struct {
+	port           int
+	path           string
+	uploadedChunks map[string]integrationDataSet
+}
+
+func (s *defaultIntegrationTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	client, err := flight.NewClientWithMiddleware(addr, nil, nil, opts...)
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+
+	ctx := context.Background()
+
+	arrow.RegisterExtensionType(types.NewUUIDType())
+	defer arrow.UnregisterExtensionType("uuid")
+
+	descr := &flight.FlightDescriptor{
+		Type: flight.FlightDescriptor_PATH,
+		Path: []string{s.path},
+	}
+
+	fmt.Println("Opening JSON file '", s.path, "'")
+	r, err := os.Open(s.path)
+	if err != nil {
+		return xerrors.Errorf("could not open JSON file: %q: %w", s.path, err)
+	}
+
+	rdr, err := arrjson.NewReader(r)
+	if err != nil {
+		return xerrors.Errorf("could not create JSON file reader from file: %q: %w", s.path, err)
+	}
+
+	dataSet := integrationDataSet{
+		chunks: make([]array.Record, 0),
+		schema: rdr.Schema(),
+	}
+
+	for {
+		rec, err := rdr.Read()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return err
+		}
+		defer rec.Release()
+		dataSet.chunks = append(dataSet.chunks, rec)
+	}
+
+	stream, err := client.DoPut(ctx)
+	if err != nil {
+		return err
+	}
+
+	wr := flight.NewRecordWriter(stream, ipc.WithSchema(dataSet.schema))
+	wr.SetFlightDescriptor(descr)
+
+	for i, rec := range dataSet.chunks {
+		metadata := []byte(strconv.Itoa(i))
+		if err := wr.WriteWithAppMetadata(rec, metadata); err != nil {
+			return err
+		}
+
+		pr, err := stream.Recv()
+		if err != nil {
+			return err
+		}
+
+		acked := pr.GetAppMetadata()
+		switch {
+		case len(acked) == 0:
+			return xerrors.Errorf("expected metadata value: %s, but got nothing.", string(metadata))
+		case !bytes.Equal(metadata, acked):
+			return xerrors.Errorf("expected metadata value: %s, but got: %s", string(metadata), string(acked))
+		}
+	}
+
+	if err := stream.CloseSend(); err != nil {
+		return err
+	}
+
+	info, err := client.GetFlightInfo(ctx, descr)
+	if err != nil {
+		return err
+	}
+
+	if len(info.Endpoint) == 0 {
+		fmt.Fprintln(os.Stderr, "no endpoints returned from flight server.")
+		return xerrors.Errorf("no endpoints returned from flight server")
+	}
+
+	for _, ep := range info.Endpoint {
+		if len(ep.Location) == 0 {
+			return xerrors.Errorf("no locations returned from flight server")
+		}
+
+		for _, loc := range ep.Location {
+			consumeFlightLocation(ctx, loc, ep.Ticket, dataSet.chunks, opts...)
+		}
+	}
+
+	return nil
+}
+
+func (s *defaultIntegrationTester) MakeServer(port int) flight.Server {
+	s.port = port
+	s.uploadedChunks = make(map[string]integrationDataSet)
+	srv := flight.NewServerWithMiddleware(nil, nil)
+	srv.RegisterFlightService(&flight.FlightServiceService{
+		GetFlightInfo: s.GetFlightInfo,
+		DoGet:         s.DoGet,
+		DoPut:         s.DoPut,
+	})
+	return srv
+}
+
+func (s *defaultIntegrationTester) GetFlightInfo(ctx context.Context, in *flight.FlightDescriptor) (*flight.FlightInfo, error) {
+	if in.Type == flight.FlightDescriptor_PATH {
+		if len(in.Path) == 0 {
+			return nil, status.Error(codes.InvalidArgument, "invalid path")
+		}
+
+		data, ok := s.uploadedChunks[in.Path[0]]
+		if !ok {
+			return nil, status.Errorf(codes.NotFound, "could not find flight: %s", in.Path[0])
+		}
+
+		flightData := &flight.FlightInfo{
+			Schema:           flight.SerializeSchema(data.schema, memory.DefaultAllocator),
+			FlightDescriptor: in,
+			Endpoint: []*flight.FlightEndpoint{{
+				Ticket:   &flight.Ticket{Ticket: []byte(in.Path[0])},
+				Location: []*flight.Location{{Uri: fmt.Sprintf("127.0.0.1:%d", s.port)}},
+			}},
+			TotalRecords: 0,
+			TotalBytes:   -1,
+		}
+		for _, r := range data.chunks {
+			flightData.TotalRecords += r.NumRows()
+		}
+		return flightData, nil
+	}
+	return nil, status.Error(codes.Unimplemented, in.Type.String())
+}
+
+func (s *defaultIntegrationTester) DoGet(tkt *flight.Ticket, stream flight.FlightService_DoGetServer) error {
+	data, ok := s.uploadedChunks[string(tkt.Ticket)]
+	if !ok {
+		return status.Errorf(codes.NotFound, "could not find flight: %s", string(tkt.Ticket))
+	}
+
+	wr := flight.NewRecordWriter(stream, ipc.WithSchema(data.schema))
+	defer wr.Close()
+	for i, rec := range data.chunks {
+		wr.WriteWithAppMetadata(rec, []byte(strconv.Itoa(i)))
+	}
+
+	return nil
+}
+
+func (s *defaultIntegrationTester) DoPut(stream flight.FlightService_DoPutServer) error {
+	rdr, err := flight.NewRecordReader(stream)
+	if err != nil {
+		return status.Error(codes.Internal, err.Error())
+	}
+
+	var (
+		key     string
+		dataset integrationDataSet
+	)
+
+	// creating the reader should have gotten the first message which would
+	// have the schema, which should have a populated flight descriptor
+	desc := rdr.LatestFlightDescriptor()
+	if desc.Type != flight.FlightDescriptor_PATH || len(desc.Path) < 1 {
+		return status.Error(codes.InvalidArgument, "must specify a path")
+	}
+
+	key = desc.Path[0]
+	dataset.schema = rdr.Schema()
+	dataset.chunks = make([]array.Record, 0)
+	for rdr.Next() {
+		rec := rdr.Record()
+		rec.Retain()
+
+		dataset.chunks = append(dataset.chunks, rec)
+		if len(rdr.LatestAppMetadata()) > 0 {
+			stream.Send(&flight.PutResult{AppMetadata: rdr.LatestAppMetadata()})
+		}
+	}
+	s.uploadedChunks[key] = dataset
+	return nil
+}
+
+func CheckActionResults(ctx context.Context, client flight.Client, action *flight.Action, results []string) error {
+	stream, err := client.DoAction(ctx, action)
+	if err != nil {
+		return err
+	}
+	defer stream.CloseSend()
+
+	for _, expected := range results {
+		res, err := stream.Recv()
+		if err != nil {
+			return err
+		}
+
+		actual := string(res.Body)
+		if expected != actual {
+			return xerrors.Errorf("got wrong result: expected: %s, got: %s", expected, actual)
+		}
+	}
+
+	res, err := stream.Recv()
+	if res != nil || err != io.EOF {
+		return xerrors.New("action result stream had too many entries")
+	}
+	return nil
+}
+
+const (
+	authUsername = "arrow"
+	authPassword = "flight"
+)
+
+type authBasicValidator struct {
+	auth flight.BasicAuth
+}
+
+func (a *authBasicValidator) Authenticate(conn flight.AuthConn) error {
+	token, err := conn.Read()
+	if err != nil {
+		return err
+	}
+
+	var incoming flight.BasicAuth
+	if err = proto.Unmarshal(token, &incoming); err != nil {
+		return err
+	}
+
+	if incoming.Username != a.auth.Username || incoming.Password != a.auth.Password {
+		return status.Error(codes.Unauthenticated, "invalid token")
+	}
+
+	return conn.Send([]byte(a.auth.Username))
+}
+
+func (a *authBasicValidator) IsValid(token string) (interface{}, error) {
+	if token != a.auth.Username {
+		return nil, status.Error(codes.Unauthenticated, "invalid token")
+	}
+	return token, nil
+}
+
+type clientAuthBasic struct {
+	auth  *flight.BasicAuth
+	token string
+}
+
+func (c *clientAuthBasic) Authenticate(_ context.Context, conn flight.AuthConn) error {
+	if c.auth != nil {
+		data, err := proto.Marshal(c.auth)
+		if err != nil {
+			return err
+		}
+		if err = conn.Send(data); err != nil {
+			return err
+		}
+
+		token, err := conn.Read()
+		c.token = string(token)
+		if err != io.EOF {
+			return err
+		}
+	}
+	return nil
+}
+
+func (c *clientAuthBasic) GetToken(context.Context) (string, error) {
+	return c.token, nil
+}
+
+type authBasicProtoTester struct{}
+
+func (s *authBasicProtoTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	auth := &clientAuthBasic{}
+
+	client, err := flight.NewClientWithMiddleware(addr, auth, nil, opts...)
+	if err != nil {
+		return err
+	}
+
+	ctx := context.Background()
+	stream, err := client.DoAction(ctx, &flight.Action{})
+	if err != nil {
+		return err
+	}
+
+	// should fail unauthenticated
+	_, err = stream.Recv()
+	st, ok := status.FromError(err)
+	if !ok {
+		return err
+	}
+
+	if st.Code() != codes.Unauthenticated {
+		return xerrors.Errorf("expected Unauthenticated, got %s", st.Code())
+	}
+
+	auth.auth = &flight.BasicAuth{Username: authUsername, Password: authPassword}
+	if err := client.Authenticate(ctx); err != nil {
+		return err
+	}
+	return CheckActionResults(ctx, client, &flight.Action{}, []string{authUsername})
+}
+
+func (s *authBasicProtoTester) MakeServer(_ int) flight.Server {
+	srv := flight.NewServerWithMiddleware(&authBasicValidator{
+		auth: flight.BasicAuth{Username: authUsername, Password: authPassword}}, nil)
+	srv.RegisterFlightService(&flight.FlightServiceService{
+		DoAction: s.DoAction,
+	})
+	return srv
+}
+
+func (authBasicProtoTester) DoAction(_ *flight.Action, stream flight.FlightService_DoActionServer) error {
+	auth := flight.AuthFromContext(stream.Context())
+	stream.Send(&flight.Result{Body: []byte(auth.(string))})
+	return nil
+}
+
+type middlewareScenarioTester struct{}
+
+func (m *middlewareScenarioTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	tm := &testClientMiddleware{}
+	client, err := flight.NewClientWithMiddleware(addr, nil, []flight.ClientMiddleware{
+		flight.CreateClientMiddleware(tm)}, opts...)
+	if err != nil {
+		return err
+	}
+
+	ctx := context.Background()
+	// this call is expected to fail
+	_, err = client.GetFlightInfo(ctx, &flight.FlightDescriptor{Type: flight.FlightDescriptor_CMD})
+	if err == nil {
+		return xerrors.New("expected call to fail")
+	}
+
+	if tm.received != "expected value" {
+		return xerrors.Errorf("expected to receive header 'x-middleware: expected value', but instead got %s", tm.received)
+	}
+
+	fmt.Fprintln(os.Stderr, "Headers received successfully on failing call.")
+	tm.received = ""
+	_, err = client.GetFlightInfo(ctx, &flight.FlightDescriptor{Type: flight.FlightDescriptor_CMD, Cmd: []byte("success")})
+	if err != nil {
+		return err
+	}
+
+	if tm.received != "expected value" {
+		return xerrors.Errorf("expected to receive header 'x-middleware: expected value', but instead got %s", tm.received)
+	}
+	fmt.Fprintln(os.Stderr, "Headers received successfully on passing call.")
+	return nil
+}
+
+func (m *middlewareScenarioTester) MakeServer(_ int) flight.Server {
+	srv := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(testServerMiddleware{})})
+	srv.RegisterFlightService(&flight.FlightServiceService{
+		GetFlightInfo: m.GetFlightInfo,
+	})
+	return srv
+}
+
+func (m *middlewareScenarioTester) GetFlightInfo(ctx context.Context, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) {
+	if desc.Type != flight.FlightDescriptor_CMD || string(desc.Cmd) != "success" {
+		return nil, status.Error(codes.Unknown, "unknown")
+	}
+
+	return &flight.FlightInfo{
+		Schema:           flight.SerializeSchema(arrow.NewSchema([]arrow.Field{}, nil), memory.DefaultAllocator),
+		FlightDescriptor: desc,
+		Endpoint: []*flight.FlightEndpoint{{
+			Ticket:   &flight.Ticket{Ticket: []byte("foo")},
+			Location: []*flight.Location{{Uri: "localhost:10010"}},
+		}},
+		TotalRecords: -1,
+		TotalBytes:   -1,
+	}, nil
+}
diff --git a/go/arrow/internal/testing/types/extension_types.go b/go/arrow/internal/testing/types/extension_types.go
new file mode 100644
index 00000000000..bb0f984a8e1
--- /dev/null
+++ b/go/arrow/internal/testing/types/extension_types.go
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package types contains user-defined types for use in the tests for the arrow package
+package types
+
+import (
+	"encoding/binary"
+	"fmt"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"golang.org/x/xerrors"
+)
+
+// UUIDArray is a simple array which is a FixedSizeBinary(16)
+type UUIDArray struct {
+	array.ExtensionArrayBase
+}
+
+// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
+// to be used for representing UUIDs
+type UUIDType struct {
+	arrow.ExtensionBase
+}
+
+// NewUUIDType is a convenience function to create an instance of UuidType
+// with the correct storage type
+func NewUUIDType() *UUIDType {
+	return &UUIDType{
+		ExtensionBase: arrow.ExtensionBase{
+			Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
+}
+
+// ArrayType returns TypeOf(UuidArray) for constructing uuid arrays
+func (UUIDType) ArrayType() reflect.Type { return reflect.TypeOf(UUIDArray{}) }
+
+func (UUIDType) ExtensionName() string { return "uuid" }
+
+// Serialize returns "uuid-serialized" for testing proper metadata passing
+func (UUIDType) Serialize() string { return "uuid-serialized" }
+
+// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16} and the data to be
+// "uuid-serialized" in order to correctly create a UuidType for testing deserialize.
+func (UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if string(data) != "uuid-serialized" {
+		return nil, xerrors.Errorf("type identifier did not match: '%s'", string(data))
+	}
+	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
+		return nil, xerrors.Errorf("invalid storage type for UuidType: %s", storageType.Name())
+	}
+	return NewUUIDType(), nil
+}
+
+// UuidTypes are equal if both are named "uuid"
+func (u UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return u.ExtensionName() == other.ExtensionName()
+}
+
+// Parametric1Array is a simple int32 array for use with the Parametric1Type
+// in testing a parameterized user-defined extension type.
+type Parametric1Array struct {
+	array.ExtensionArrayBase
+}
+
+// Parametric2Array is another simple int32 array for use with the Parametric2Type
+// also for testing a parameterized user-defined extension type that utilizes
+// the parameter for defining different types based on the param.
+type Parametric2Array struct {
+	array.ExtensionArrayBase
+}
+
+// A type where ExtensionName is always the same
+type Parametric1Type struct {
+	arrow.ExtensionBase
+
+	param int32
+}
+
+func NewParametric1Type(p int32) *Parametric1Type {
+	ret := &Parametric1Type{param: p}
+	ret.ExtensionBase.Storage = arrow.PrimitiveTypes.Int32
+	return ret
+}
+
+func (p *Parametric1Type) String() string { return "extension<" + p.ExtensionName() + ">" }
+
+// ExtensionEquals returns true if other is a *Parametric1Type and has the same param
+func (p *Parametric1Type) ExtensionEquals(other arrow.ExtensionType) bool {
+	o, ok := other.(*Parametric1Type)
+	if !ok {
+		return false
+	}
+	return p.param == o.param
+}
+
+// ExtensionName is always "parametric-type-1"
+func (Parametric1Type) ExtensionName() string { return "parametric-type-1" }
+
+// ArrayType returns the TypeOf(Parametric1Array{})
+func (Parametric1Type) ArrayType() reflect.Type { return reflect.TypeOf(Parametric1Array{}) }
+
+// Serialize returns the param as 4 little endian bytes
+func (p *Parametric1Type) Serialize() string {
+	var buf [4]byte
+	binary.LittleEndian.PutUint32(buf[:], uint32(p.param))
+	return string(buf[:])
+}
+
+// Deserialize requires storage to be an int32 type and data should be a 4 byte little endian int32 value
+func (Parametric1Type) Deserialize(storage arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if len(data) != 4 {
+		return nil, xerrors.Errorf("parametric1type: invalid serialized data size: %d", len(data))
+	}
+
+	if storage.ID() != arrow.INT32 {
+		return nil, xerrors.New("parametric1type: must have int32 as underlying storage type")
+	}
+
+	return &Parametric1Type{arrow.ExtensionBase{Storage: arrow.PrimitiveTypes.Int32}, int32(binary.LittleEndian.Uint32([]byte(data)))}, nil
+}
+
+// a parametric type where the extension name is different for each
+// parameter, and must be registered separately
+type Parametric2Type struct {
+	arrow.ExtensionBase
+
+	param int32
+}
+
+func NewParametric2Type(p int32) *Parametric2Type {
+	ret := &Parametric2Type{param: p}
+	ret.ExtensionBase.Storage = arrow.PrimitiveTypes.Int32
+	return ret
+}
+
+func (p *Parametric2Type) String() string { return "extension<" + p.ExtensionName() + ">" }
+
+// ExtensionEquals returns true if other is a *Parametric2Type and has the same param
+func (p *Parametric2Type) ExtensionEquals(other arrow.ExtensionType) bool {
+	o, ok := other.(*Parametric2Type)
+	if !ok {
+		return false
+	}
+	return p.param == o.param
+}
+
+// ExtensionName incorporates the param in the name requiring different instances of
+// Parametric2Type to be registered separately if they have different params. this is
+// used for testing registration of different types with the same struct type.
+func (p *Parametric2Type) ExtensionName() string {
+	return fmt.Sprintf("parametric-type-2<param=%d>", p.param)
+}
+
+// ArrayType returns TypeOf(Parametric2Array{})
+func (Parametric2Type) ArrayType() reflect.Type { return reflect.TypeOf(Parametric2Array{}) }
+
+// Serialize returns the param as a 4 byte little endian slice
+func (p *Parametric2Type) Serialize() string {
+	var buf [4]byte
+	binary.LittleEndian.PutUint32(buf[:], uint32(p.param))
+	return string(buf[:])
+}
+
+// Deserialize expects storage to be int32 type and data must be a 4 byte little endian slice.
+func (Parametric2Type) Deserialize(storage arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if len(data) != 4 {
+		return nil, xerrors.Errorf("parametric1type: invalid serialized data size: %d", len(data))
+	}
+
+	if storage.ID() != arrow.INT32 {
+		return nil, xerrors.New("parametric1type: must have int32 as underlying storage type")
+	}
+
+	return &Parametric2Type{arrow.ExtensionBase{Storage: arrow.PrimitiveTypes.Int32}, int32(binary.LittleEndian.Uint32([]byte(data)))}, nil
+}
+
+// ExtStructArray is a struct array type for testing an extension type with non-primitive storage
+type ExtStructArray struct {
+	array.ExtensionArrayBase
+}
+
+// ExtStructType is an extension type with a non-primitive storage type containing a struct
+// with fields {a: int64, b: float64}
+type ExtStructType struct {
+	arrow.ExtensionBase
+}
+
+func NewExtStructType() *ExtStructType {
+	return &ExtStructType{
+		ExtensionBase: arrow.ExtensionBase{Storage: arrow.StructOf(
+			arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+			arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64},
+		)},
+	}
+}
+
+func (p *ExtStructType) String() string { return "extension<" + p.ExtensionName() + ">" }
+
+// ExtensionName is always "ext-struct-type"
+func (ExtStructType) ExtensionName() string { return "ext-struct-type" }
+
+// ExtensionEquals returns true if other is a *ExtStructType
+func (ExtStructType) ExtensionEquals(other arrow.ExtensionType) bool {
+	_, ok := other.(*ExtStructType)
+	return ok
+}
+
+// ArrayType returns TypeOf(ExtStructType{})
+func (ExtStructType) ArrayType() reflect.Type { return reflect.TypeOf(ExtStructArray{}) }
+
+// Serialize just returns "ext-struct-type-unique-code" to test metadata passing in IPC
+func (ExtStructType) Serialize() string { return "ext-struct-type-unique-code" }
+
+// Deserialize ignores the passed in storage datatype and only checks the serialized data byte slice
+// returning the correct type if it matches "ext-struct-type-unique-code".
+func (ExtStructType) Deserialize(_ arrow.DataType, serialized string) (arrow.ExtensionType, error) {
+	if string(serialized) != "ext-struct-type-unique-code" {
+		return nil, xerrors.New("type identifier did not match")
+	}
+	return NewExtStructType(), nil
+}
+
+var (
+	_ arrow.ExtensionType  = (*UUIDType)(nil)
+	_ arrow.ExtensionType  = (*Parametric1Type)(nil)
+	_ arrow.ExtensionType  = (*Parametric2Type)(nil)
+	_ arrow.ExtensionType  = (*ExtStructType)(nil)
+	_ array.ExtensionArray = (*UUIDArray)(nil)
+	_ array.ExtensionArray = (*Parametric1Array)(nil)
+	_ array.ExtensionArray = (*Parametric2Array)(nil)
+	_ array.ExtensionArray = (*ExtStructArray)(nil)
+)
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
index 79662f76dec..90347b1618f 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
@@ -21,9 +21,11 @@ import (
 	"log"
 	"os"
 
+	"github.com/apache/arrow/go/arrow"
 	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/arrio"
 	"github.com/apache/arrow/go/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"golang.org/x/xerrors"
 )
@@ -48,6 +50,8 @@ func main() {
 }
 
 func runCommand(jsonName, arrowName, mode string, verbose bool) error {
+	arrow.RegisterExtensionType(types.NewUUIDType())
+
 	if jsonName == "" {
 		return xerrors.Errorf("must specify json file name")
 	}
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 3b38f1bf628..04d736fb227 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -449,6 +449,14 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
 	case *arrow.StructType:
 		return ctx.loadStruct(dt)
 
+	case *arrow.MapType:
+		return ctx.loadMap(dt)
+
+	case arrow.ExtensionType:
+		storage := ctx.loadArray(dt.StorageType())
+		defer storage.Release()
+		return array.NewExtensionArrayWithStorage(dt, storage)
+
 	default:
 		panic(xerrors.Errorf("array type %T not handled yet", dt))
 	}
@@ -525,6 +533,19 @@ func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType
 	return array.MakeFromData(data)
 }
 
+func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) array.Interface {
+	field, buffers := ctx.loadCommon(2)
+	buffers = append(buffers, ctx.buffer())
+
+	sub := ctx.loadChild(dt.ValueType())
+	defer sub.Release()
+
+	data := array.NewData(dt, int(field.Length()), buffers, []*array.Data{sub.Data()}, int(field.NullCount()), 0)
+	defer data.Release()
+
+	return array.NewMapData(data)
+}
+
 func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
 	field, buffers := ctx.loadCommon(2)
 	buffers = append(buffers, ctx.buffer())
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index 5f335f10890..ccc231d17bf 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -35,8 +35,10 @@ const (
 	currentMetadataVersion = MetadataV5
 	minMetadataVersion     = MetadataV4
 
-	kExtensionTypeKeyName = "arrow_extension_name"
-	kExtensionDataKeyName = "arrow_extension_data"
+	// constants for the extension type metadata keys for the type name and
+	// any extension metadata to be passed to deserialize.
+	ExtensionTypeKeyName     = "ARROW:extension:name"
+	ExtensionMetadataKeyName = "ARROW:extension:metadata"
 
 	// ARROW-109: We set this number arbitrarily to help catch user mistakes. For
 	// deeply nested schemas, it is expected the user will indicate explicitly the
@@ -187,7 +189,7 @@ func fieldFromFB(field *flatbuf.Field, memo *dictMemo) (arrow.Field, error) {
 			children[i] = child
 		}
 
-		o.Type, err = typeFromFB(field, children, o.Metadata)
+		o.Type, err = typeFromFB(field, children, &o.Metadata)
 		if err != nil {
 			return o, xerrors.Errorf("arrow/ipc: could not convert field type: %w", err)
 		}
@@ -345,7 +347,7 @@ func (fv *fieldVisitor) visit(field arrow.Field) {
 
 	case *arrow.ListType:
 		fv.dtype = flatbuf.TypeList
-		fv.kids = append(fv.kids, fieldToFB(fv.b, arrow.Field{Name: "item", Type: dt.Elem(), Nullable: field.Nullable}, fv.memo))
+		fv.kids = append(fv.kids, fieldToFB(fv.b, arrow.Field{Name: "item", Type: dt.Elem(), Nullable: field.Nullable, Metadata: dt.Meta}, fv.memo))
 		flatbuf.ListStart(fv.b)
 		fv.offset = flatbuf.ListEnd(fv.b)
 
@@ -375,6 +377,19 @@ func (fv *fieldVisitor) visit(field arrow.Field) {
 		flatbuf.DurationAddUnit(fv.b, unit)
 		fv.offset = flatbuf.DurationEnd(fv.b)
 
+	case *arrow.MapType:
+		fv.dtype = flatbuf.TypeMap
+		fv.kids = append(fv.kids, fieldToFB(fv.b, arrow.Field{Name: "entries", Type: dt.ValueType()}, fv.memo))
+		flatbuf.MapStart(fv.b)
+		flatbuf.MapAddKeysSorted(fv.b, dt.KeysSorted)
+		fv.offset = flatbuf.MapEnd(fv.b)
+
+	case arrow.ExtensionType:
+		field.Type = dt.StorageType()
+		fv.visit(field)
+		fv.meta[ExtensionTypeKeyName] = dt.ExtensionName()
+		fv.meta[ExtensionMetadataKeyName] = string(dt.Serialize())
+
 	default:
 		err := xerrors.Errorf("arrow/ipc: invalid data type %v", dt)
 		panic(err) // FIXME(sbinet): implement all data-types.
@@ -477,7 +492,7 @@ func fieldFromFBDict(field *flatbuf.Field) (arrow.Field, error) {
 		return o, xerrors.Errorf("arrow/ipc: metadata for field from dict: %w", err)
 	}
 
-	o.Type, err = typeFromFB(field, kids, meta)
+	o.Type, err = typeFromFB(field, kids, &meta)
 	if err != nil {
 		return o, xerrors.Errorf("arrow/ipc: type for field from dict: %w", err)
 	}
@@ -485,7 +500,7 @@ func fieldFromFBDict(field *flatbuf.Field) (arrow.Field, error) {
 	return o, nil
 }
 
-func typeFromFB(field *flatbuf.Field, children []arrow.Field, md arrow.Metadata) (arrow.DataType, error) {
+func typeFromFB(field *flatbuf.Field, children []arrow.Field, md *arrow.Metadata) (arrow.DataType, error) {
 	var data flatbuffers.Table
 	if !field.Type(&data) {
 		return nil, xerrors.Errorf("arrow/ipc: could not load field type data")
@@ -498,23 +513,58 @@ func typeFromFB(field *flatbuf.Field, children []arrow.Field, md arrow.Metadata)
 
 	// look for extension metadata in custom metadata field.
 	if md.Len() > 0 {
-		i := md.FindKey(kExtensionTypeKeyName)
+		i := md.FindKey(ExtensionTypeKeyName)
 		if i < 0 {
 			return dt, err
 		}
 
-		panic("not implemented") // FIXME(sbinet)
+		extType := arrow.GetExtensionType(md.Values()[i])
+		if extType == nil {
+			// if the extension type is unknown, we do not error here.
+			// simply return the storage type.
+			return dt, err
+		}
+
+		var (
+			data    string
+			dataIdx int
+		)
+
+		if dataIdx = md.FindKey(ExtensionMetadataKeyName); dataIdx >= 0 {
+			data = md.Values()[dataIdx]
+		}
+
+		dt, err = extType.Deserialize(dt, data)
+		if err != nil {
+			return dt, err
+		}
+
+		mdkeys := md.Keys()
+		mdvals := md.Values()
+		if dataIdx < 0 {
+			// if there was no extension metadata, just the name, we only have to
+			// remove the extension name metadata key/value to ensure roundtrip
+			// metadata consistency
+			*md = arrow.NewMetadata(append(mdkeys[:i], mdkeys[i+1:]...), append(mdvals[:i], mdvals[i+1:]...))
+		} else {
+			// if there was extension metadata, we need to remove both the type name
+			// and the extension metadata keys and values.
+			newkeys := make([]string, 0, md.Len()-2)
+			newvals := make([]string, 0, md.Len()-2)
+			for j := range mdkeys {
+				if j != i && j != dataIdx { // copy everything except the extension metadata keys/values
+					newkeys = append(newkeys, mdkeys[j])
+					newvals = append(newvals, mdvals[j])
+				}
+			}
+			*md = arrow.NewMetadata(newkeys, newvals)
+		}
 	}
 
 	return dt, err
 }
 
 func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arrow.Field) (arrow.DataType, error) {
-	var (
-		dt  arrow.DataType
-		err error
-	)
-
 	switch typ {
 	case flatbuf.TypeNONE:
 		return nil, xerrors.Errorf("arrow/ipc: Type metadata cannot be none")
@@ -555,7 +605,9 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 		if len(children) != 1 {
 			return nil, xerrors.Errorf("arrow/ipc: List must have exactly 1 child field (got=%d)", len(children))
 		}
-		return arrow.ListOf(children[0].Type), nil
+		dt := arrow.ListOf(children[0].Type)
+		dt.Meta = children[0].Metadata
+		return dt, nil
 
 	case flatbuf.TypeFixedSizeList:
 		var dt flatbuf.FixedSizeList
@@ -593,12 +645,30 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 		dt.Init(data.Bytes, data.Pos)
 		return durationFromFB(dt)
 
+	case flatbuf.TypeMap:
+		if len(children) != 1 {
+			return nil, xerrors.Errorf("arrow/ipc: Map must have exactly 1 child field")
+		}
+
+		if children[0].Nullable || children[0].Type.ID() != arrow.STRUCT || len(children[0].Type.(*arrow.StructType).Fields()) != 2 {
+			return nil, xerrors.Errorf("arrow/ipc: Map's key-item pairs must be non-nullable structs")
+		}
+
+		pairType := children[0].Type.(*arrow.StructType)
+		if pairType.Field(0).Nullable {
+			return nil, xerrors.Errorf("arrow/ipc: Map's keys must be non-nullable")
+		}
+
+		var dt flatbuf.Map
+		dt.Init(data.Bytes, data.Pos)
+		ret := arrow.MapOf(pairType.Field(0).Type, pairType.Field(1).Type)
+		ret.KeysSorted = dt.KeysSorted()
+		return ret, nil
+
 	default:
 		// FIXME(sbinet): implement all the other types.
 		panic(xerrors.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))
 	}
-
-	return dt, err
 }
 
 func intFromFB(data flatbuf.Int) (arrow.DataType, error) {
diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go
index 974267239e4..a1408ae1ecf 100644
--- a/go/arrow/ipc/metadata_test.go
+++ b/go/arrow/ipc/metadata_test.go
@@ -22,8 +22,12 @@ import (
 	"testing"
 
 	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/memory"
 	flatbuffers "github.com/google/flatbuffers/go"
+	"github.com/stretchr/testify/assert"
 )
 
 func TestRWSchema(t *testing.T) {
@@ -157,3 +161,62 @@ func TestRWFooter(t *testing.T) {
 		})
 	}
 }
+
+func exampleUUID(mem memory.Allocator) array.Interface {
+	extType := types.NewUUIDType()
+	bldr := array.NewExtensionBuilder(mem, extType)
+	defer bldr.Release()
+
+	bldr.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(
+		[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+		[]bool{false, true, true, true})
+
+	return bldr.NewArray()
+}
+
+func TestUnrecognizedExtensionType(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	// register the uuid type
+	assert.NoError(t, arrow.RegisterExtensionType(types.NewUUIDType()))
+
+	extArr := exampleUUID(pool)
+	defer extArr.Release()
+
+	batch := array.NewRecord(
+		arrow.NewSchema([]arrow.Field{
+			{Name: "f0", Type: extArr.DataType(), Nullable: true}}, nil),
+		[]array.Interface{extArr}, 4)
+	defer batch.Release()
+
+	storageArr := extArr.(array.ExtensionArray).Storage()
+
+	var buf bytes.Buffer
+	wr := NewWriter(&buf, WithAllocator(pool), WithSchema(batch.Schema()))
+	assert.NoError(t, wr.Write(batch))
+	wr.Close()
+
+	// unregister the uuid type before we read back the buffer so it is
+	// unrecognized when reading back the record batch.
+	assert.NoError(t, arrow.UnregisterExtensionType("uuid"))
+	rdr, err := NewReader(&buf, WithAllocator(pool))
+	defer rdr.Release()
+
+	assert.NoError(t, err)
+	assert.True(t, rdr.Next())
+
+	rec := rdr.Record()
+	assert.NotNil(t, rec)
+
+	// create a record batch with the same data, but the field should contain the
+	// extension metadata and be of the storage type instead of being the extension type.
+	extMetadata := arrow.NewMetadata([]string{ExtensionTypeKeyName, ExtensionMetadataKeyName}, []string{"uuid", "uuid-serialized"})
+	batchNoExt := array.NewRecord(
+		arrow.NewSchema([]arrow.Field{
+			{Name: "f0", Type: storageArr.DataType(), Nullable: true, Metadata: extMetadata},
+		}, nil), []array.Interface{storageArr}, 4)
+	defer batchNoExt.Release()
+
+	assert.Truef(t, array.RecordEqual(rec, batchNoExt), "expected: %s\ngot: %s\n", batchNoExt, rec)
+}
diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go
index 42f0857b805..e63f4059e7b 100644
--- a/go/arrow/ipc/reader.go
+++ b/go/arrow/ipc/reader.go
@@ -58,10 +58,11 @@ func NewReaderFromMessageReader(r MessageReader, opts ...Option) (*Reader, error
 	}
 
 	rr := &Reader{
-		r:     r,
-		types: make(dictTypeMap),
-		memo:  newMemo(),
-		mem:   cfg.alloc,
+		r:        r,
+		refCount: 1,
+		types:    make(dictTypeMap),
+		memo:     newMemo(),
+		mem:      cfg.alloc,
 	}
 
 	err := rr.readSchema(cfg.schema)
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index c4c75722610..e9c43bbe900 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -316,6 +316,15 @@ func (w *recordEncoder) visit(p *Payload, arr array.Interface) error {
 		return errBigArray
 	}
 
+	if arr.DataType().ID() == arrow.EXTENSION {
+		arr := arr.(array.ExtensionArray)
+		err := w.visit(p, arr.Storage())
+		if err != nil {
+			return xerrors.Errorf("failed visiting storage of for array %T: %w", arr, err)
+		}
+		return nil
+	}
+
 	// add all common elements
 	w.fields = append(w.fields, fieldMetadata{
 		Len:    int64(arr.Len()),
@@ -449,6 +458,43 @@ func (w *recordEncoder) visit(p *Payload, arr array.Interface) error {
 		}
 		w.depth++
 
+	case *arrow.MapType:
+		arr := arr.(*array.Map)
+		voffsets, err := w.getZeroBasedValueOffsets(arr)
+		if err != nil {
+			return xerrors.Errorf("could not retrieve zero-based value offsets for array %T: %w", arr, err)
+		}
+		p.body = append(p.body, voffsets)
+
+		w.depth--
+		var (
+			values        = arr.ListValues()
+			mustRelease   = false
+			values_offset int64
+			values_length int64
+		)
+		defer func() {
+			if mustRelease {
+				values.Release()
+			}
+		}()
+
+		if voffsets != nil {
+			values_offset = int64(arr.Offsets()[0])
+			values_length = int64(arr.Offsets()[arr.Len()]) - values_offset
+		}
+
+		if len(arr.Offsets()) != 0 || values_length < int64(values.Len()) {
+			// must also slice the values
+			values = array.NewSlice(values, values_offset, values_length)
+			mustRelease = true
+		}
+		err = w.visit(p, values)
+
+		if err != nil {
+			return xerrors.Errorf("could not visit list element for array %T: %w", arr, err)
+		}
+		w.depth++
 	case *arrow.ListType:
 		arr := arr.(*array.List)
 		voffsets, err := w.getZeroBasedValueOffsets(arr)
diff --git a/go/arrow/math/float64_arm64.go b/go/arrow/math/float64_arm64.go
new file mode 100644
index 00000000000..f60be90721d
--- /dev/null
+++ b/go/arrow/math/float64_arm64.go
@@ -0,0 +1,25 @@
+// Code generated by type_s390x.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func initFloat64Go() {
+	Float64.sum = sum_float64_go
+}
diff --git a/go/arrow/math/int64_arm64.go b/go/arrow/math/int64_arm64.go
new file mode 100644
index 00000000000..1a615a9b27d
--- /dev/null
+++ b/go/arrow/math/int64_arm64.go
@@ -0,0 +1,25 @@
+// Code generated by type_s390x.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func initInt64Go() {
+	Int64.sum = sum_int64_go
+}
diff --git a/go/arrow/math/math_arm64.go b/go/arrow/math/math_arm64.go
new file mode 100644
index 00000000000..3daeac7efaf
--- /dev/null
+++ b/go/arrow/math/math_arm64.go
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func init() {
+	initGo()
+}
+
+func initGo() {
+	initFloat64Go()
+	initInt64Go()
+	initUint64Go()
+}
diff --git a/go/arrow/math/uint64_arm64.go b/go/arrow/math/uint64_arm64.go
new file mode 100644
index 00000000000..8f7419fd484
--- /dev/null
+++ b/go/arrow/math/uint64_arm64.go
@@ -0,0 +1,25 @@
+// Code generated by type_s390x.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func initUint64Go() {
+	Uint64.sum = sum_uint64_go
+}
diff --git a/go/arrow/schema.go b/go/arrow/schema.go
index ab9e536e64c..f278308da81 100644
--- a/go/arrow/schema.go
+++ b/go/arrow/schema.go
@@ -104,6 +104,35 @@ func (md Metadata) clone() Metadata {
 	return o
 }
 
+func (md Metadata) sortedIndices() []int {
+	idxes := make([]int, len(md.keys))
+	for i := range idxes {
+		idxes[i] = i
+	}
+
+	sort.Slice(idxes, func(i, j int) bool {
+		return md.keys[idxes[i]] < md.keys[idxes[j]]
+	})
+	return idxes
+}
+
+func (md Metadata) Equal(rhs Metadata) bool {
+	if md.Len() != rhs.Len() {
+		return false
+	}
+
+	idxes := md.sortedIndices()
+	rhsIdxes := rhs.sortedIndices()
+	for i := range idxes {
+		j := idxes[i]
+		k := rhsIdxes[i]
+		if md.keys[j] != rhs.keys[k] || md.values[j] != rhs.values[k] {
+			return false
+		}
+	}
+	return true
+}
+
 // Schema is a sequence of Field values, describing the columns of a table or
 // a record batch.
 type Schema struct {
diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go
new file mode 100644
index 00000000000..2b45225957b
--- /dev/null
+++ b/go/parquet/compress/brotli.go
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+
+	"github.com/andybalholm/brotli"
+	"github.com/apache/arrow/go/parquet/internal/debug"
+)
+
+type brotliCodec struct{}
+
+func (brotliCodec) NewReader(r io.Reader) io.ReadCloser {
+	return ioutil.NopCloser(brotli.NewReader(r))
+}
+
+func (b brotliCodec) EncodeLevel(dst, src []byte, level int) []byte {
+	if level == DefaultCompressionLevel {
+		level = brotli.DefaultCompression
+	}
+
+	maxlen := int(b.CompressBound(int64(len(src))))
+	if dst == nil || cap(dst) < maxlen {
+		dst = make([]byte, 0, maxlen)
+	}
+	buf := bytes.NewBuffer(dst[:0])
+	w := brotli.NewWriterLevel(buf, level)
+	_, err := w.Write(src)
+	if err != nil {
+		panic(err)
+	}
+	if err := w.Close(); err != nil {
+		panic(err)
+	}
+	return buf.Bytes()
+}
+
+func (b brotliCodec) Encode(dst, src []byte) []byte {
+	return b.EncodeLevel(dst, src, brotli.DefaultCompression)
+}
+
+func (brotliCodec) Decode(dst, src []byte) []byte {
+	rdr := brotli.NewReader(bytes.NewReader(src))
+	if dst != nil {
+		var (
+			sofar       = 0
+			n           = -1
+			err   error = nil
+		)
+		for n != 0 && err == nil {
+			n, err = rdr.Read(dst[sofar:])
+			sofar += n
+		}
+		if err != nil && err != io.EOF {
+			panic(err)
+		}
+		return dst[:sofar]
+	}
+
+	dst, err := ioutil.ReadAll(rdr)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst
+}
+
+// taken from brotli/enc/encode.c:1426
+// BrotliEncoderMaxCompressedSize
+func (brotliCodec) CompressBound(len int64) int64 {
+	// [window bits / empty metadata] + N * [uncompressed] + [last empty]
+	debug.Assert(len > 0, "brotli compressbound should be > 0")
+	nlarge := len >> 14
+	overhead := 2 + (4 * nlarge) + 3 + 1
+	result := len + overhead
+	if len == 0 {
+		return 2
+	}
+	if result < len {
+		return 0
+	}
+	return len
+}
+
+func (brotliCodec) NewWriter(w io.Writer) io.WriteCloser {
+	return brotli.NewWriter(w)
+}
+
+func (brotliCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error) {
+	if level == DefaultCompressionLevel {
+		level = brotli.DefaultCompression
+	}
+	return brotli.NewWriterLevel(w, level), nil
+}
+
+func init() {
+	codecs[Codecs.Brotli] = brotliCodec{}
+}
diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
new file mode 100644
index 00000000000..1b6d83687c2
--- /dev/null
+++ b/go/parquet/compress/compress.go
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package compress contains the interfaces and implementations for handling compression/decompression
+// of parquet data at the column levels.
+package compress
+
+import (
+	"compress/flate"
+	"io"
+	"io/ioutil"
+
+	"github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// Compression is an alias to the thrift compression codec enum type for easy use
+type Compression parquet.CompressionCodec
+
+func (c Compression) String() string {
+	return parquet.CompressionCodec(c).String()
+}
+
+// DefaultCompressionLevel will use flate.DefaultCompression since many of the compression libraries
+// use that to denote "use the default".
+const DefaultCompressionLevel = flate.DefaultCompression
+
+// Codecs is a useful struct to provide namespaced enum values to use for specifying the compression type to use
+// which make for easy internal swapping between them and the thrift enum since they are initialized to the same
+// constant values.
+var Codecs = struct {
+	Uncompressed Compression
+	Snappy       Compression
+	Gzip         Compression
+	// LZO is unsupported in this library since LZO license is incompatible with Apache License
+	Lzo    Compression
+	Brotli Compression
+	// LZ4 unsupported in this library due to problematic issues between the Hadoop LZ4 spec vs regular lz4
+	// see: http://mail-archives.apache.org/mod_mbox/arrow-dev/202007.mbox/%3CCAAri41v24xuA8MGHLDvgSnE+7AAgOhiEukemW_oPNHMvfMmrWw@mail.gmail.com%3E
+	Lz4  Compression
+	Zstd Compression
+}{
+	Uncompressed: Compression(parquet.CompressionCodec_UNCOMPRESSED),
+	Snappy:       Compression(parquet.CompressionCodec_SNAPPY),
+	Gzip:         Compression(parquet.CompressionCodec_GZIP),
+	Lzo:          Compression(parquet.CompressionCodec_LZO),
+	Brotli:       Compression(parquet.CompressionCodec_BROTLI),
+	Lz4:          Compression(parquet.CompressionCodec_LZ4),
+	Zstd:         Compression(parquet.CompressionCodec_ZSTD),
+}
+
+// Codec is an interface which is implemented for each compression type in order to make the interactions easy to
+// implement. Most consumers won't be calling GetCodec directly.
+type Codec interface {
+	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
+	NewReader(io.Reader) io.ReadCloser
+	// NewWriter provides a wrapper around a write stream to compress data before writing it.
+	NewWriter(io.Writer) io.WriteCloser
+	// NewWriterLevel is like NewWriter but allows specifying the compression level
+	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
+	// Encode encodes a block of data given by src and returns the compressed block. dst should be either nil
+	// or sized large enough to fit the compressed block (use CompressBound to allocate). dst and src should not
+	// overlap since some of the compression types don't allow it.
+	//
+	// The returned slice will be one of the following:
+	//	1. If dst was nil or dst was too small to fit the compressed data, it will be a newly allocated slice
+	//	2. If dst was large enough to fit the compressed data (depending on the compression algorithm it might
+	//		 be required to be at least CompressBound length) then it might be a slice of dst.
+	Encode(dst, src []byte) []byte
+	// EncodeLevel is like Encode, but specifies a particular encoding level instead of the default.
+	EncodeLevel(dst, src []byte, level int) []byte
+	// CompressBound returns the boundary of maximum size of compressed data under the chosen codec.
+	CompressBound(int64) int64
+	// Decode is for decoding a single block rather than a stream, like with Encode, dst must be either nil or
+	// sized large enough to accommodate the uncompressed data and should not overlap with src.
+	//
+	// the returned slice *might* be a slice of dst.
+	Decode(dst, src []byte) []byte
+}
+
+var codecs = map[Compression]Codec{}
+
+type nocodec struct{}
+
+func (nocodec) NewReader(r io.Reader) io.ReadCloser {
+	ret, ok := r.(io.ReadCloser)
+	if !ok {
+		return ioutil.NopCloser(r)
+	}
+	return ret
+}
+
+func (nocodec) Decode(dst, src []byte) []byte {
+	if dst != nil {
+		copy(dst, src)
+	}
+	return dst
+}
+
+type writerNopCloser struct {
+	io.Writer
+}
+
+func (writerNopCloser) Close() error {
+	return nil
+}
+
+func (nocodec) Encode(dst, src []byte) []byte {
+	copy(dst, src)
+	return dst
+}
+
+func (nocodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	copy(dst, src)
+	return dst
+}
+
+func (nocodec) NewWriter(w io.Writer) io.WriteCloser {
+	ret, ok := w.(io.WriteCloser)
+	if !ok {
+		return writerNopCloser{w}
+	}
+	return ret
+}
+
+func (n nocodec) NewWriterLevel(w io.Writer, _ int) (io.WriteCloser, error) {
+	return n.NewWriter(w), nil
+}
+
+func (nocodec) CompressBound(len int64) int64 { return len }
+
+func init() {
+	codecs[Codecs.Uncompressed] = nocodec{}
+}
+
+// GetCodec returns a Codec interface for the requested Compression type
+func GetCodec(typ Compression) (Codec, error) {
+	ret, ok := codecs[typ]
+	if !ok {
+		return nil, xerrors.Errorf("compression for %s unimplemented", typ.String())
+	}
+	return ret, nil
+}
diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go
new file mode 100644
index 00000000000..acb0c4bacd0
--- /dev/null
+++ b/go/parquet/compress/compress_test.go
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress_test
+
+import (
+	"bytes"
+	"io/ioutil"
+	"math/rand"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet/compress"
+	"github.com/stretchr/testify/assert"
+)
+
+const (
+	RandomDataSize       = 3 * 1024 * 1024
+	CompressibleDataSize = 8 * 1024 * 1024
+)
+
+func makeRandomData(size int) []byte {
+	ret := make([]byte, size)
+	r := rand.New(rand.NewSource(1234))
+	r.Read(ret)
+	return ret
+}
+
+func makeCompressibleData(size int) []byte {
+	const base = "Apache Arrow is a cross-language development platform for in-memory data"
+
+	data := make([]byte, size)
+	n := copy(data, base)
+	for i := n; i < len(data); i *= 2 {
+		copy(data[i:], data[:i])
+	}
+	return data
+}
+
+func TestErrorForUnimplemented(t *testing.T) {
+	_, err := compress.GetCodec(compress.Codecs.Lzo)
+	assert.Error(t, err)
+
+	_, err = compress.GetCodec(compress.Codecs.Lz4)
+	assert.Error(t, err)
+}
+
+func TestCompressDataOneShot(t *testing.T) {
+	tests := []struct {
+		c compress.Compression
+	}{
+		{compress.Codecs.Uncompressed},
+		{compress.Codecs.Snappy},
+		{compress.Codecs.Gzip},
+		{compress.Codecs.Brotli},
+		{compress.Codecs.Zstd},
+		// {compress.Codecs.Lzo},
+		// {compress.Codecs.Lz4},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.c.String(), func(t *testing.T) {
+			codec, err := compress.GetCodec(tt.c)
+			assert.NoError(t, err)
+			data := makeCompressibleData(CompressibleDataSize)
+
+			buf := make([]byte, codec.CompressBound(int64(len(data))))
+			compressed := codec.Encode(buf, data)
+			assert.Same(t, &buf[0], &compressed[0])
+
+			out := make([]byte, len(data))
+			uncompressed := codec.Decode(out, compressed)
+			assert.Same(t, &out[0], &uncompressed[0])
+
+			assert.Exactly(t, data, uncompressed)
+		})
+	}
+}
+
+func TestCompressReaderWriter(t *testing.T) {
+	tests := []struct {
+		c compress.Compression
+	}{
+		{compress.Codecs.Uncompressed},
+		{compress.Codecs.Snappy},
+		{compress.Codecs.Gzip},
+		{compress.Codecs.Brotli},
+		{compress.Codecs.Zstd},
+		// {compress.Codecs.Lzo},
+		// {compress.Codecs.Lz4},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.c.String(), func(t *testing.T) {
+			var buf bytes.Buffer
+			codec, err := compress.GetCodec(tt.c)
+			assert.NoError(t, err)
+			data := makeRandomData(RandomDataSize)
+
+			wr := codec.NewWriter(&buf)
+
+			const chunkSize = 1111
+			input := data
+			for len(input) > 0 {
+				var (
+					n   int
+					err error
+				)
+				if len(input) > chunkSize {
+					n, err = wr.Write(input[:chunkSize])
+				} else {
+					n, err = wr.Write(input)
+				}
+
+				assert.NoError(t, err)
+				input = input[n:]
+			}
+			wr.Close()
+
+			rdr := codec.NewReader(&buf)
+			out, err := ioutil.ReadAll(rdr)
+			assert.NoError(t, err)
+			assert.Exactly(t, data, out)
+		})
+	}
+}
diff --git a/go/parquet/compress/gzip.go b/go/parquet/compress/gzip.go
new file mode 100644
index 00000000000..829d5f823ee
--- /dev/null
+++ b/go/parquet/compress/gzip.go
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+
+	"github.com/klauspost/compress/gzip"
+	"golang.org/x/xerrors"
+)
+
+type gzipCodec struct{}
+
+func (gzipCodec) NewReader(r io.Reader) io.ReadCloser {
+	ret, err := gzip.NewReader(r)
+	if err != nil {
+		panic(xerrors.Errorf("codec: gzip: %w", err))
+	}
+	return ret
+}
+
+func (gzipCodec) Decode(dst, src []byte) []byte {
+	rdr, err := gzip.NewReader(bytes.NewReader(src))
+	if err != nil {
+		panic(err)
+	}
+
+	if dst != nil {
+		n, err := io.ReadFull(rdr, dst)
+		if err != nil {
+			panic(err)
+		}
+		return dst[:n]
+	}
+
+	dst, err = ioutil.ReadAll(rdr)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst
+}
+
+func (g gzipCodec) EncodeLevel(dst, src []byte, level int) []byte {
+	maxlen := int(g.CompressBound(int64(len(src))))
+	if dst == nil || cap(dst) < maxlen {
+		dst = make([]byte, 0, maxlen)
+	}
+	buf := bytes.NewBuffer(dst[:0])
+	w, err := gzip.NewWriterLevel(buf, level)
+	if err != nil {
+		panic(err)
+	}
+	_, err = w.Write(src)
+	if err != nil {
+		panic(err)
+	}
+	if err := w.Close(); err != nil {
+		panic(err)
+	}
+	return buf.Bytes()
+}
+
+func (g gzipCodec) Encode(dst, src []byte) []byte {
+	return g.EncodeLevel(dst, src, DefaultCompressionLevel)
+}
+
+func (gzipCodec) CompressBound(len int64) int64 {
+	return len + ((len + 7) >> 3) + ((len + 63) >> 6) + 5
+}
+
+func (gzipCodec) NewWriter(w io.Writer) io.WriteCloser {
+	return gzip.NewWriter(w)
+}
+
+func (gzipCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error) {
+	return gzip.NewWriterLevel(w, level)
+}
+
+func init() {
+	codecs[Codecs.Gzip] = gzipCodec{}
+}
diff --git a/go/parquet/compress/snappy.go b/go/parquet/compress/snappy.go
new file mode 100644
index 00000000000..6468df780a7
--- /dev/null
+++ b/go/parquet/compress/snappy.go
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"io"
+	"io/ioutil"
+
+	"github.com/golang/snappy"
+)
+
+type snappyCodec struct{}
+
+func (snappyCodec) Encode(dst, src []byte) []byte {
+	return snappy.Encode(dst, src)
+}
+
+func (snappyCodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	return snappy.Encode(dst, src)
+}
+
+func (snappyCodec) Decode(dst, src []byte) []byte {
+	dst, err := snappy.Decode(dst, src)
+	if err != nil {
+		panic(err)
+	}
+	return dst
+}
+
+func (snappyCodec) NewReader(r io.Reader) io.ReadCloser {
+	return ioutil.NopCloser(snappy.NewReader(r))
+}
+
+func (snappyCodec) CompressBound(len int64) int64 {
+	return int64(snappy.MaxEncodedLen(int(len)))
+}
+
+func (snappyCodec) NewWriter(w io.Writer) io.WriteCloser {
+	return snappy.NewBufferedWriter(w)
+}
+
+func (s snappyCodec) NewWriterLevel(w io.Writer, _ int) (io.WriteCloser, error) {
+	return s.NewWriter(w), nil
+}
+
+func init() {
+	codecs[Codecs.Snappy] = snappyCodec{}
+}
diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go
new file mode 100644
index 00000000000..ebc91baeb26
--- /dev/null
+++ b/go/parquet/compress/zstd.go
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"io"
+	"sync"
+
+	"github.com/apache/arrow/go/parquet/internal/debug"
+	"github.com/klauspost/compress/zstd"
+)
+
+type zstdCodec struct{}
+
+type zstdcloser struct {
+	*zstd.Decoder
+}
+
+var (
+	enc         *zstd.Encoder
+	dec         *zstd.Decoder
+	initEncoder sync.Once
+	initDecoder sync.Once
+)
+
+func getencoder() *zstd.Encoder {
+	initEncoder.Do(func() {
+		enc, _ = zstd.NewWriter(nil, zstd.WithZeroFrames(true))
+	})
+	return enc
+}
+
+func getdecoder() *zstd.Decoder {
+	initDecoder.Do(func() {
+		dec, _ = zstd.NewReader(nil)
+	})
+	return dec
+}
+
+func (zstdCodec) Decode(dst, src []byte) []byte {
+	dst, err := getdecoder().DecodeAll(src, dst[:0])
+	if err != nil {
+		panic(err)
+	}
+	return dst
+}
+
+func (z *zstdcloser) Close() error {
+	z.Decoder.Close()
+	return nil
+}
+
+func (zstdCodec) NewReader(r io.Reader) io.ReadCloser {
+	ret, _ := zstd.NewReader(r)
+	return &zstdcloser{ret}
+}
+
+func (zstdCodec) NewWriter(w io.Writer) io.WriteCloser {
+	ret, _ := zstd.NewWriter(w)
+	return ret
+}
+
+func (zstdCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error) {
+	var compressLevel zstd.EncoderLevel
+	if level == DefaultCompressionLevel {
+		compressLevel = zstd.SpeedDefault
+	} else {
+		compressLevel = zstd.EncoderLevelFromZstd(level)
+	}
+	return zstd.NewWriter(w, zstd.WithEncoderLevel(compressLevel))
+}
+
+func (z zstdCodec) Encode(dst, src []byte) []byte {
+	return getencoder().EncodeAll(src, dst[:0])
+}
+
+func (z zstdCodec) EncodeLevel(dst, src []byte, level int) []byte {
+	compressLevel := zstd.EncoderLevelFromZstd(level)
+	if level == DefaultCompressionLevel {
+		compressLevel = zstd.SpeedDefault
+	}
+	enc, _ := zstd.NewWriter(nil, zstd.WithZeroFrames(true), zstd.WithEncoderLevel(compressLevel))
+	return enc.EncodeAll(src, dst[:0])
+}
+
+// from zstd.h, ZSTD_COMPRESSBOUND
+func (zstdCodec) CompressBound(len int64) int64 {
+	debug.Assert(len > 0, "len for zstd CompressBound should be > 0")
+	extra := ((128 << 10) - len) >> 11
+	if len >= (128 << 10) {
+		extra = 0
+	}
+	return len + (len >> 8) + extra
+}
+
+func init() {
+	codecs[Codecs.Zstd] = zstdCodec{}
+}
diff --git a/go/parquet/doc.go b/go/parquet/doc.go
index cf87b81826e..87a592836a9 100644
--- a/go/parquet/doc.go
+++ b/go/parquet/doc.go
@@ -29,8 +29,8 @@
 //   go get -u github.com/apache/arrow/go/parquet
 //
 // In addition, two cli utilities are provided:
-// 	go install github.factset.com/mtopol/parquet-go/cmd/parquet_reader
-// 	go install github.factset.com/mtopol/parquet-go/cmd/parquet_schema
+// 	go install github.com/apache/arrow/go/parquet/cmd/parquet_reader
+// 	go install github.com/apache/arrow/go/parquet/cmd/parquet_schema
 //
 // Modules
 //
diff --git a/go/parquet/encryption_properties.go b/go/parquet/encryption_properties.go
new file mode 100644
index 00000000000..bd97e53a401
--- /dev/null
+++ b/go/parquet/encryption_properties.go
@@ -0,0 +1,711 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet
+
+import (
+	"crypto/rand"
+	"unicode/utf8"
+
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// Constants that will be used as the default values with encryption/decryption
+const (
+	// By default we'll use AesGCM as our encryption algorithm
+	DefaultEncryptionAlgorithm       = AesGcm
+	MaximalAadMetadataLength   int32 = 256
+	// if encryption is turned on, we will default to also encrypting the footer
+	DefaultEncryptedFooter = true
+	DefaultCheckSignature  = true
+	// by default if you set the file decryption properties, we will error
+	// on any plaintext files unless otherwise specified.
+	DefaultAllowPlaintextFiles       = false
+	AadFileUniqueLength        int32 = 8
+)
+
+// ColumnPathToDecryptionPropsMap maps column paths to decryption properties
+type ColumnPathToDecryptionPropsMap map[string]*ColumnDecryptionProperties
+
+// ColumnPathToEncryptionPropsMap maps column paths to encryption properties
+type ColumnPathToEncryptionPropsMap map[string]*ColumnEncryptionProperties
+
+// ColumnEncryptionProperties specifies how to encrypt a given column
+type ColumnEncryptionProperties struct {
+	columnPath             string
+	encrypted              bool
+	encryptedWithFooterKey bool
+	key                    string
+	keyMetadata            string
+	utilized               bool
+}
+
+// ColumnPath returns which column these properties are for
+func (ce *ColumnEncryptionProperties) ColumnPath() string {
+	return ce.columnPath
+}
+
+// IsEncrypted returns true if this column is encrypted.
+func (ce *ColumnEncryptionProperties) IsEncrypted() bool { return ce.encrypted }
+
+// IsEncryptedWithFooterKey returns if this column was encrypted with the footer key itself, or false if a separate
+// key was used for encrypting this column.
+func (ce *ColumnEncryptionProperties) IsEncryptedWithFooterKey() bool {
+	return ce.encryptedWithFooterKey
+}
+
+// Key returns the key used for encrypting this column if it isn't encrypted by the footer key
+func (ce *ColumnEncryptionProperties) Key() string { return ce.key }
+
+// KeyMetadata returns the key identifier which is used with a KeyRetriever to get the key for this column if it is not
+// encrypted using the footer key
+func (ce *ColumnEncryptionProperties) KeyMetadata() string { return ce.keyMetadata }
+
+// WipeOutEncryptionKey Clears the encryption key, used after completion of file writing
+func (ce *ColumnEncryptionProperties) WipeOutEncryptionKey() { ce.key = "" }
+
+// IsUtilized returns whether or not these properties have already been used, if the key is empty
+// then this is always false
+func (ce *ColumnEncryptionProperties) IsUtilized() bool {
+	if ce.key == "" {
+		return false
+	}
+	return ce.utilized
+}
+
+// SetUtilized is used for marking it as utilized once it is used in FileEncryptionProperties
+// as the encryption key will be wiped out on completion of writing
+func (ce *ColumnEncryptionProperties) SetUtilized() {
+	ce.utilized = true
+}
+
+// Clone returns a instance of ColumnEncryptionProperties with the same key and metadata
+func (ce *ColumnEncryptionProperties) Clone() *ColumnEncryptionProperties {
+	copy := ce.key
+	return NewColumnEncryptionProperties(ce.columnPath, WithKey(copy), WithKeyMetadata(ce.keyMetadata))
+}
+
+type colEncryptConfig struct {
+	key         string
+	keyMetadata string
+	encrypted   bool
+}
+
+// ColumnEncryptOption how to specify options to the the NewColumnEncryptionProperties function.
+type ColumnEncryptOption func(*colEncryptConfig)
+
+// WithKey sets a column specific key.
+// If key is not set on an encrypted column, the column will be encrypted with the footer key.
+// key length must be either 16, 24, or 32 bytes
+// the key is cloned and will be wiped out (array values set to 0) upon completion of file writing.
+// Caller is responsible for wiping out input key array
+func WithKey(key string) ColumnEncryptOption {
+	return func(c *colEncryptConfig) {
+		if key != "" {
+			c.key = key
+		}
+	}
+}
+
+// WithKeyMetadata sets the key retrieval metadata, use either KeyMetadata or KeyID but not both
+func WithKeyMetadata(keyMeta string) ColumnEncryptOption {
+	return func(c *colEncryptConfig) {
+		c.keyMetadata = keyMeta
+	}
+}
+
+// WithKeyID is a convenience function to set the key metadata using a string id.
+// Set a key retrieval metadata (converted from String). and use either KeyMetadata or KeyID, not both.
+// KeyID will be converted to metadata (UTF-8 Array)
+func WithKeyID(keyID string) ColumnEncryptOption {
+	if !utf8.ValidString(keyID) {
+		panic("parquet: key id should be UTF8 encoded")
+	}
+	return WithKeyMetadata(keyID)
+}
+
+// NewColumnEncryptionProperties constructs properties for the provided column path, modified by the options provided
+func NewColumnEncryptionProperties(name string, opts ...ColumnEncryptOption) *ColumnEncryptionProperties {
+	var cfg colEncryptConfig
+	cfg.encrypted = true
+	for _, o := range opts {
+		o(&cfg)
+	}
+	return &ColumnEncryptionProperties{
+		utilized:               false,
+		encrypted:              cfg.encrypted,
+		encryptedWithFooterKey: cfg.encrypted && cfg.key == "",
+		keyMetadata:            cfg.keyMetadata,
+		key:                    cfg.key,
+		columnPath:             name,
+	}
+}
+
+// ColumnDecryptionProperties are the specifications for how to decrypt a given column.
+type ColumnDecryptionProperties struct {
+	columnPath string
+	key        string
+	utilized   bool
+}
+
+// NewColumnDecryptionProperties constructs a new ColumnDecryptionProperties for the given column path, modified by
+// the provided options
+func NewColumnDecryptionProperties(column string, opts ...ColumnDecryptOption) *ColumnDecryptionProperties {
+	var cfg columnDecryptConfig
+	for _, o := range opts {
+		o(&cfg)
+	}
+
+	return &ColumnDecryptionProperties{
+		columnPath: column,
+		utilized:   false,
+		key:        cfg.key,
+	}
+}
+
+// ColumnPath returns which column these properties describe how to decrypt
+func (cd *ColumnDecryptionProperties) ColumnPath() string { return cd.columnPath }
+
+// Key returns the key specified to decrypt this column, or is empty if the Footer Key should be used.
+func (cd *ColumnDecryptionProperties) Key() string { return cd.key }
+
+// IsUtilized returns whether or not these properties have been used for decryption already
+func (cd *ColumnDecryptionProperties) IsUtilized() bool { return cd.utilized }
+
+// SetUtilized is used by the reader to specify when we've decrypted the column and have used the key so we know
+// to wipe out the keys.
+func (cd *ColumnDecryptionProperties) SetUtilized() { cd.utilized = true }
+
+// WipeOutDecryptionKey is called after decryption to ensure the key doesn't stick around and get re-used.
+func (cd *ColumnDecryptionProperties) WipeOutDecryptionKey() { cd.key = "" }
+
+// Clone returns a new instance of ColumnDecryptionProperties with the same key and column
+func (cd *ColumnDecryptionProperties) Clone() *ColumnDecryptionProperties {
+	return NewColumnDecryptionProperties(cd.columnPath, WithDecryptKey(cd.key))
+}
+
+type columnDecryptConfig struct {
+	key string
+}
+
+// ColumnDecryptOption is the type of the options passed for constructing Decryption Properties
+type ColumnDecryptOption func(*columnDecryptConfig)
+
+// WithDecryptKey specifies the key to utilize for decryption
+func WithDecryptKey(key string) ColumnDecryptOption {
+	return func(cfg *columnDecryptConfig) {
+		if key != "" {
+			cfg.key = key
+		}
+	}
+}
+
+// AADPrefixVerifier is an interface for any object that can be used to verify the identity of the file being decrypted.
+// It should panic if the provided AAD identity is bad.
+//
+// In a data set, AAD Prefixes should be collected, and then checked for missing files.
+type AADPrefixVerifier interface {
+	// Verify identity of file. panic if bad
+	Verify(string)
+}
+
+// DecryptionKeyRetriever is an interface for getting the desired key for decryption from metadata. It should take in
+// some metadata identifier and return the actual Key to use for decryption.
+type DecryptionKeyRetriever interface {
+	GetKey(keyMetadata []byte) string
+}
+
+// FileDecryptionProperties define the File Level configuration for decrypting a parquet file. Once constructed they are
+// read only.
+type FileDecryptionProperties struct {
+	footerKey                     string
+	aadPrefix                     string
+	checkPlaintextFooterIntegrity bool
+	plaintextAllowed              bool
+	utilized                      bool
+	columnDecryptProps            ColumnPathToDecryptionPropsMap
+	Verifier                      AADPrefixVerifier
+	KeyRetriever                  DecryptionKeyRetriever
+}
+
+// NewFileDecryptionProperties takes in the options for constructing a new FileDecryptionProperties object, otherwise
+// it will use the default configuration which will check footer integrity of a plaintext footer for an encrypted file
+// for unencrypted parquet files, the decryption properties should not be set.
+func NewFileDecryptionProperties(opts ...FileDecryptionOption) *FileDecryptionProperties {
+	var cfg fileDecryptConfig
+	cfg.checkFooterIntegrity = DefaultCheckSignature
+	cfg.plaintextAllowed = DefaultAllowPlaintextFiles
+	for _, o := range opts {
+		o(&cfg)
+	}
+	return &FileDecryptionProperties{
+		Verifier:                      cfg.verifier,
+		footerKey:                     cfg.footerKey,
+		checkPlaintextFooterIntegrity: cfg.checkFooterIntegrity,
+		KeyRetriever:                  cfg.retriever,
+		aadPrefix:                     cfg.aadPrefix,
+		columnDecryptProps:            cfg.colDecrypt,
+		plaintextAllowed:              cfg.plaintextAllowed,
+		utilized:                      false,
+	}
+}
+
+// ColumnKey returns the key to be used for decrypting the provided column.
+func (fd *FileDecryptionProperties) ColumnKey(path string) string {
+	if d, ok := fd.columnDecryptProps[path]; ok {
+		if d != nil {
+			return d.Key()
+		}
+	}
+	return ""
+}
+
+// FooterKey returns the key utilized for decrypting the Footer if encrypted and any columns that are encrypted with
+// the footer key.
+func (fd *FileDecryptionProperties) FooterKey() string { return fd.footerKey }
+
+// AadPrefix returns the prefix to be supplied for constructing the identification strings when decrypting
+func (fd *FileDecryptionProperties) AadPrefix() string { return fd.aadPrefix }
+
+// PlaintextFooterIntegrity returns whether or not an integrity check will be performed on a plaintext footer for an
+// encrypted file.
+func (fd *FileDecryptionProperties) PlaintextFooterIntegrity() bool {
+	return fd.checkPlaintextFooterIntegrity
+}
+
+// PlaintextFilesAllowed returns whether or not this instance of decryption properties are allowed on a plaintext file.
+func (fd *FileDecryptionProperties) PlaintextFilesAllowed() bool { return fd.plaintextAllowed }
+
+// SetUtilized is called to mark this instance as utilized once it is used to read a file. A single instance
+// can be used for reading one file only. Setting this ensures the keys will be wiped out upon completion of file reading.
+func (fd *FileDecryptionProperties) SetUtilized() { fd.utilized = true }
+
+// IsUtilized returns whether or not this instance has been used to decrypt a file. If the footer key and prefix are
+// empty and there are no column decryption properties, then this is always false.
+func (fd *FileDecryptionProperties) IsUtilized() bool {
+	if fd.footerKey == "" && len(fd.columnDecryptProps) == 0 && fd.aadPrefix == "" {
+		return false
+	}
+	return fd.utilized
+}
+
+// WipeOutDecryptionKeys will clear all the keys for this instance including the column level ones, this will be called
+// after this instance has been utilized.
+func (fd *FileDecryptionProperties) WipeOutDecryptionKeys() {
+	fd.footerKey = ""
+	for _, cd := range fd.columnDecryptProps {
+		cd.WipeOutDecryptionKey()
+	}
+}
+
+// Clone returns a new instance of these properties, changing the prefix if set (keeping the same prefix if left empty)
+func (fd *FileDecryptionProperties) Clone(newAadPrefix string) *FileDecryptionProperties {
+	keyCopy := fd.footerKey
+	colDecryptMapCopy := make(ColumnPathToDecryptionPropsMap)
+	for k, v := range fd.columnDecryptProps {
+		colDecryptMapCopy[k] = v.Clone()
+	}
+	if newAadPrefix == "" {
+		newAadPrefix = fd.aadPrefix
+	}
+	return &FileDecryptionProperties{
+		footerKey:                     keyCopy,
+		KeyRetriever:                  fd.KeyRetriever,
+		checkPlaintextFooterIntegrity: fd.checkPlaintextFooterIntegrity,
+		Verifier:                      fd.Verifier,
+		columnDecryptProps:            colDecryptMapCopy,
+		aadPrefix:                     newAadPrefix,
+		plaintextAllowed:              fd.plaintextAllowed,
+		utilized:                      false,
+	}
+}
+
+type fileDecryptConfig struct {
+	footerKey            string
+	aadPrefix            string
+	verifier             AADPrefixVerifier
+	colDecrypt           ColumnPathToDecryptionPropsMap
+	retriever            DecryptionKeyRetriever
+	checkFooterIntegrity bool
+	plaintextAllowed     bool
+}
+
+// FileDecryptionOption is how to supply options to constructing a new FileDecryptionProperties instance.
+type FileDecryptionOption func(*fileDecryptConfig)
+
+// WithFooterKey sets an explicit footer key. If Applied on a file that contains footer key
+// metadata the metadata will be ignored, the footer will be decrypted/verified with this key.
+//
+// If the explicit key is not set, footer key will be fetched from the key retriever.
+// With explcit keys or AAD prefix, new encryption properties object must be created for each
+// encrypted file.
+//
+// Explicit encryption keys (footer and column) are cloned.
+// Upon completion of file reading, the cloned encryption keys in the properties will be wiped out
+// Caller is responsible for wiping out the input key array
+// footer key length must be either 16, 24, or 32 bytes
+func WithFooterKey(key string) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if key != "" {
+			cfg.footerKey = key
+		}
+	}
+}
+
+// WithPrefixVerifier supplies a verifier object to use for verifying the AAD Prefixes stored in the file.
+func WithPrefixVerifier(verifier AADPrefixVerifier) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if verifier != nil {
+			cfg.verifier = verifier
+		}
+	}
+}
+
+// WithColumnKeys sets explicit column keys.
+//
+// It's also possible to set a key retriever on this property object.
+//
+// Upon file decryption, availability of explicit keys is checked before invocation
+// of the retreiver callback.
+//
+// If an explicit key is available for a footer or a column, its key metadata will be ignored.
+func WithColumnKeys(decrypt ColumnPathToDecryptionPropsMap) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if len(decrypt) == 0 {
+			return
+		}
+		if len(cfg.colDecrypt) != 0 {
+			panic("column properties already set")
+		}
+		for _, v := range decrypt {
+			if v.IsUtilized() {
+				panic("parquet: column properties utilized in another file")
+			}
+			v.SetUtilized()
+		}
+		cfg.colDecrypt = decrypt
+	}
+}
+
+// WithKeyRetriever sets a key retriever callback. It's also possible to set explicit footer or column keys.
+func WithKeyRetriever(retriever DecryptionKeyRetriever) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if retriever != nil {
+			cfg.retriever = retriever
+		}
+	}
+}
+
+// DisableFooterSignatureVerification skips integrity verification of plaintext footers.
+//
+// If not called, integrity of plaintext footers will be checked in runtime, and will panic
+// if the footer signing key is not available
+// or if the footer content and signature don't match
+func DisableFooterSignatureVerification() FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		cfg.checkFooterIntegrity = false
+	}
+}
+
+// WithPlaintextAllowed sets allowing plaintext files.
+//
+// By default, reading plaintext (unencrypted) files is not allowed when using
+// a decryptor.
+//
+// In order to detect files that were not encrypted by mistake.
+// However the default behavior can be overridden by using this method.
+func WithPlaintextAllowed() FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		cfg.plaintextAllowed = true
+	}
+}
+
+// WithDecryptAadPrefix explicitly supplies the file aad prefix.
+//
+// A must when a prefix is used for file encryption, but not stored in the file.
+func WithDecryptAadPrefix(prefix string) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if prefix != "" {
+			cfg.aadPrefix = prefix
+		}
+	}
+}
+
+// Algorithm describes how something was encrypted, representing the EncryptionAlgorithm object from the
+// parquet.thrift file.
+type Algorithm struct {
+	Algo Cipher
+	Aad  struct {
+		AadPrefix       []byte
+		AadFileUnique   []byte
+		SupplyAadPrefix bool
+	}
+}
+
+// ToThrift returns an instance to be used for serializing when writing a file.
+func (e Algorithm) ToThrift() *format.EncryptionAlgorithm {
+	if e.Algo == AesGcm {
+		return &format.EncryptionAlgorithm{
+			AES_GCM_V1: &format.AesGcmV1{
+				AadPrefix:       e.Aad.AadPrefix,
+				AadFileUnique:   e.Aad.AadFileUnique,
+				SupplyAadPrefix: &e.Aad.SupplyAadPrefix,
+			},
+		}
+	}
+	return &format.EncryptionAlgorithm{
+		AES_GCM_CTR_V1: &format.AesGcmCtrV1{
+			AadPrefix:       e.Aad.AadPrefix,
+			AadFileUnique:   e.Aad.AadFileUnique,
+			SupplyAadPrefix: &e.Aad.SupplyAadPrefix,
+		},
+	}
+}
+
+// AlgorithmFromThrift converts the thrift object to the Algorithm struct for easier usage.
+func AlgorithmFromThrift(enc *format.EncryptionAlgorithm) (ret Algorithm) {
+	if enc.IsSetAES_GCM_V1() {
+		ret.Algo = AesGcm
+		ret.Aad.AadFileUnique = enc.AES_GCM_V1.AadFileUnique
+		ret.Aad.AadPrefix = enc.AES_GCM_V1.AadPrefix
+		ret.Aad.SupplyAadPrefix = *enc.AES_GCM_V1.SupplyAadPrefix
+		return
+	}
+	ret.Algo = AesCtr
+	ret.Aad.AadFileUnique = enc.AES_GCM_CTR_V1.AadFileUnique
+	ret.Aad.AadPrefix = enc.AES_GCM_CTR_V1.AadPrefix
+	ret.Aad.SupplyAadPrefix = *enc.AES_GCM_CTR_V1.SupplyAadPrefix
+	return
+}
+
+// FileEncryptionProperties describe how to encrypt a parquet file when writing data.
+type FileEncryptionProperties struct {
+	alg                  Algorithm
+	footerKey            string
+	footerKeyMetadata    string
+	encryptedFooter      bool
+	fileAad              string
+	utilized             bool
+	storeAadPrefixInFile bool
+	aadPrefix            string
+	encryptedCols        ColumnPathToEncryptionPropsMap
+}
+
+// EncryptedFooter returns if the footer for this file should be encrypted or left in plaintext.
+func (fe *FileEncryptionProperties) EncryptedFooter() bool { return fe.encryptedFooter }
+
+// Algorithm returns the description of how we will perform the encryption, the algorithm, prefixes, and so on.
+func (fe *FileEncryptionProperties) Algorithm() Algorithm { return fe.alg }
+
+// FooterKey returns the actual key used to encrypt the footer if it is encrypted, or to encrypt any columns which
+// will be encrypted with it rather than their own keys.
+func (fe *FileEncryptionProperties) FooterKey() string { return fe.footerKey }
+
+// FooterKeyMetadata is used for retrieving a key from the key retriever in order to set the footer key
+func (fe *FileEncryptionProperties) FooterKeyMetadata() string { return fe.footerKeyMetadata }
+
+// FileAad returns the aad identification to be used at the file level which gets concatenated with the row and column
+// information for encrypting data.
+func (fe *FileEncryptionProperties) FileAad() string { return fe.fileAad }
+
+// IsUtilized returns whether or not this instance has been used to encrypt a file
+func (fe *FileEncryptionProperties) IsUtilized() bool { return fe.utilized }
+
+// SetUtilized is called after writing a file. A FileEncryptionProperties object can be used for writing one file only,
+// the encryption keys will be wiped out upon completion of writing the file.
+func (fe *FileEncryptionProperties) SetUtilized() { fe.utilized = true }
+
+// EncryptedColumns returns the mapping of column paths to column encryption properties
+func (fe *FileEncryptionProperties) EncryptedColumns() ColumnPathToEncryptionPropsMap {
+	return fe.encryptedCols
+}
+
+// ColumnEncryptionProperties returns the properties for encrypting a given column.
+//
+// This may be nil for columns that aren't encrypted or may be default properties.
+func (fe *FileEncryptionProperties) ColumnEncryptionProperties(path string) *ColumnEncryptionProperties {
+	if len(fe.encryptedCols) == 0 {
+		return NewColumnEncryptionProperties(path)
+	}
+	if c, ok := fe.encryptedCols[path]; ok {
+		return c
+	}
+	return nil
+}
+
+// Clone allows returning an identical property setup for another file with the option to update the aadPrefix,
+// (if given the empty string, the current aad prefix will be used) since a single instance can only be used
+// to encrypt one file before wiping out the keys.
+func (fe *FileEncryptionProperties) Clone(newAadPrefix string) *FileEncryptionProperties {
+	footerKeyCopy := fe.footerKey
+	encryptedColsCopy := make(ColumnPathToEncryptionPropsMap)
+	for k, v := range fe.encryptedCols {
+		encryptedColsCopy[k] = v.Clone()
+	}
+	if newAadPrefix == "" {
+		newAadPrefix = fe.aadPrefix
+	}
+
+	opts := []EncryptOption{
+		WithAlg(fe.alg.Algo), WithFooterKeyMetadata(fe.footerKeyMetadata),
+		WithAadPrefix(newAadPrefix), WithEncryptedColumns(encryptedColsCopy),
+	}
+	if !fe.encryptedFooter {
+		opts = append(opts, WithPlaintextFooter())
+	}
+	if !fe.storeAadPrefixInFile {
+		opts = append(opts, DisableAadPrefixStorage())
+	}
+	return NewFileEncryptionProperties(footerKeyCopy, opts...)
+}
+
+// WipeOutEncryptionKeys clears all of the encryption keys for this and the columns
+func (fe *FileEncryptionProperties) WipeOutEncryptionKeys() {
+	fe.footerKey = ""
+	for _, elem := range fe.encryptedCols {
+		elem.WipeOutEncryptionKey()
+	}
+}
+
+type configEncrypt struct {
+	cipher               Cipher
+	encryptFooter        bool
+	keyMetadata          string
+	aadprefix            string
+	storeAadPrefixInFile bool
+	encryptedCols        ColumnPathToEncryptionPropsMap
+}
+
+// EncryptOption is used for specifying values when building FileEncryptionProperties
+type EncryptOption func(*configEncrypt)
+
+// WithPlaintextFooter sets the writer to write the footer in plain text, otherwise the footer will be encrypted
+// too (which is the default behavior).
+func WithPlaintextFooter() EncryptOption {
+	return func(cfg *configEncrypt) {
+		cfg.encryptFooter = false
+	}
+}
+
+// WithAlg sets the encryption algorithm to utilize. (default is AesGcm)
+func WithAlg(cipher Cipher) EncryptOption {
+	return func(cfg *configEncrypt) {
+		cfg.cipher = cipher
+	}
+}
+
+// WithFooterKeyID sets a key retrieval metadata to use (converted from string), this must be a utf8 string.
+//
+// use either WithFooterKeyID or WithFooterKeyMetadata, not both.
+func WithFooterKeyID(key string) EncryptOption {
+	if !utf8.ValidString(key) {
+		panic("parquet: footer key id should be UTF8 encoded")
+	}
+	return WithFooterKeyMetadata(key)
+}
+
+// WithFooterKeyMetadata sets a key retrieval metadata to use for getting the key.
+//
+// Use either WithFooterKeyID or WithFooterKeyMetadata, not both.
+func WithFooterKeyMetadata(keyMeta string) EncryptOption {
+	return func(cfg *configEncrypt) {
+		if keyMeta != "" {
+			cfg.keyMetadata = keyMeta
+		}
+	}
+}
+
+// WithAadPrefix sets the AAD prefix to use for encryption and by default will store it in the file
+func WithAadPrefix(aadPrefix string) EncryptOption {
+	return func(cfg *configEncrypt) {
+		if aadPrefix != "" {
+			cfg.aadprefix = aadPrefix
+			cfg.storeAadPrefixInFile = true
+		}
+	}
+}
+
+// DisableAadPrefixStorage will set the properties to not store the AadPrefix in the file. If this isn't called
+// and the AadPrefix is set, then it will be stored. This needs to in the options *after* WithAadPrefix to have an effect.
+func DisableAadPrefixStorage() EncryptOption {
+	return func(cfg *configEncrypt) {
+		cfg.storeAadPrefixInFile = false
+	}
+}
+
+// WithEncryptedColumns sets the map of columns and their properties (keys etc.) If not called, then all columns will
+// be encrypted with the footer key. If called, then columns not in the map will be left unencrypted.
+func WithEncryptedColumns(encrypted ColumnPathToEncryptionPropsMap) EncryptOption {
+	none := func(*configEncrypt) {}
+	if len(encrypted) == 0 {
+		return none
+	}
+	return func(cfg *configEncrypt) {
+		if len(cfg.encryptedCols) != 0 {
+			panic("column properties already set")
+		}
+		for _, v := range encrypted {
+			if v.IsUtilized() {
+				panic("column properties utilized in another file")
+			}
+			v.SetUtilized()
+		}
+		cfg.encryptedCols = encrypted
+	}
+}
+
+// NewFileEncryptionProperties returns a new File Encryption description object using the options provided.
+func NewFileEncryptionProperties(footerKey string, opts ...EncryptOption) *FileEncryptionProperties {
+	var cfg configEncrypt
+	cfg.cipher = DefaultEncryptionAlgorithm
+	cfg.encryptFooter = DefaultEncryptedFooter
+	for _, o := range opts {
+		o(&cfg)
+	}
+
+	props := &FileEncryptionProperties{
+		footerKey:            footerKey,
+		footerKeyMetadata:    cfg.keyMetadata,
+		encryptedFooter:      cfg.encryptFooter,
+		aadPrefix:            cfg.aadprefix,
+		storeAadPrefixInFile: cfg.storeAadPrefixInFile,
+		encryptedCols:        cfg.encryptedCols,
+		utilized:             false,
+	}
+
+	aadFileUnique := [AadFileUniqueLength]uint8{}
+	_, err := rand.Read(aadFileUnique[:])
+	if err != nil {
+		panic(err)
+	}
+
+	supplyAadPrefix := false
+	if props.aadPrefix == "" {
+		props.fileAad = string(aadFileUnique[:])
+	} else {
+		props.fileAad = props.aadPrefix + string(aadFileUnique[:])
+		if !props.storeAadPrefixInFile {
+			supplyAadPrefix = true
+		}
+	}
+	props.alg.Algo = cfg.cipher
+	props.alg.Aad.AadFileUnique = aadFileUnique[:]
+	props.alg.Aad.SupplyAadPrefix = supplyAadPrefix
+	if cfg.aadprefix != "" && cfg.storeAadPrefixInFile {
+		props.alg.Aad.AadPrefix = []byte(props.aadPrefix)
+	}
+	return props
+}
diff --git a/go/parquet/encryption_properties_test.go b/go/parquet/encryption_properties_test.go
new file mode 100644
index 00000000000..ad7cb6010d9
--- /dev/null
+++ b/go/parquet/encryption_properties_test.go
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/encryption"
+	"github.com/stretchr/testify/assert"
+)
+
+const (
+	FooterEncryptionKey  = "0123456789012345"
+	ColumnEncryptionKey1 = "1234567890123450"
+	ColumnEncryptionKey2 = "1234567890123451"
+	FileName             = "tester"
+)
+
+func TestColumnEncryptedWithOwnKey(t *testing.T) {
+	t.Parallel()
+
+	columnPath1 := "column_1"
+	colprops1 := parquet.NewColumnEncryptionProperties(columnPath1,
+		parquet.WithKey(ColumnEncryptionKey1), parquet.WithKeyID("kc1"))
+
+	assert.Equal(t, columnPath1, colprops1.ColumnPath())
+	assert.True(t, colprops1.IsEncrypted())
+	assert.False(t, colprops1.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, colprops1.Key())
+	assert.Equal(t, "kc1", colprops1.KeyMetadata())
+}
+
+func TestColumnEncryptedWithFooterKey(t *testing.T) {
+	t.Parallel()
+
+	colPath1 := "column_1"
+	colprops1 := parquet.NewColumnEncryptionProperties(colPath1)
+
+	assert.Equal(t, colPath1, colprops1.ColumnPath())
+	assert.True(t, colprops1.IsEncrypted())
+	assert.True(t, colprops1.IsEncryptedWithFooterKey())
+}
+
+func TestUniformEncryption(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey, parquet.WithFooterKeyMetadata("kf"))
+
+	assert.True(t, props.EncryptedFooter())
+	assert.Equal(t, parquet.DefaultEncryptionAlgorithm, props.Algorithm().Algo)
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+	assert.Equal(t, "kf", props.FooterKeyMetadata())
+
+	colPath := parquet.ColumnPathFromString("a_column")
+	outColProps := props.ColumnEncryptionProperties(colPath.String())
+
+	assert.True(t, outColProps.IsEncrypted())
+	assert.True(t, outColProps.IsEncryptedWithFooterKey())
+}
+
+func TestEncryptFooterAndTwoColumns(t *testing.T) {
+	t.Parallel()
+
+	columnPath1 := parquet.ColumnPathFromString("column_1")
+	columnPath2 := parquet.ColumnPathFromString("column_2")
+
+	encryptedColumns := make(parquet.ColumnPathToEncryptionPropsMap)
+	encryptedColumns[columnPath1.String()] = parquet.NewColumnEncryptionProperties(columnPath1.String(),
+		parquet.WithKey(ColumnEncryptionKey1), parquet.WithKeyID("kc1"))
+	encryptedColumns[columnPath2.String()] = parquet.NewColumnEncryptionProperties(columnPath2.String(),
+		parquet.WithKey(ColumnEncryptionKey2), parquet.WithKeyID("kc2"))
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithFooterKeyMetadata("kf"), parquet.WithEncryptedColumns(encryptedColumns))
+
+	assert.True(t, props.EncryptedFooter())
+	assert.Equal(t, parquet.DefaultEncryptionAlgorithm, props.Algorithm().Algo)
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+
+	outColProps1 := props.ColumnEncryptionProperties(columnPath1.String())
+	assert.Equal(t, columnPath1.String(), outColProps1.ColumnPath())
+	assert.True(t, outColProps1.IsEncrypted())
+	assert.False(t, outColProps1.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, outColProps1.Key())
+	assert.Equal(t, "kc1", outColProps1.KeyMetadata())
+
+	outColProps2 := props.ColumnEncryptionProperties(columnPath2.String())
+	assert.Equal(t, columnPath2.String(), outColProps2.ColumnPath())
+	assert.True(t, outColProps2.IsEncrypted())
+	assert.False(t, outColProps2.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey2, outColProps2.Key())
+	assert.Equal(t, "kc2", outColProps2.KeyMetadata())
+
+	columnPath3 := parquet.ColumnPathFromString("column_3")
+	outColProps3 := props.ColumnEncryptionProperties(columnPath3.String())
+	assert.Nil(t, outColProps3)
+}
+
+func TestEncryptTwoColumnsNotFooter(t *testing.T) {
+	t.Parallel()
+
+	columnPath1 := parquet.ColumnPathFromString("column_1")
+	columnPath2 := parquet.ColumnPathFromString("column_2")
+
+	encryptedColumns := make(parquet.ColumnPathToEncryptionPropsMap)
+	encryptedColumns[columnPath1.String()] = parquet.NewColumnEncryptionProperties(columnPath1.String(),
+		parquet.WithKey(ColumnEncryptionKey1), parquet.WithKeyID("kc1"))
+	encryptedColumns[columnPath2.String()] = parquet.NewColumnEncryptionProperties(columnPath2.String(),
+		parquet.WithKey(ColumnEncryptionKey2), parquet.WithKeyID("kc2"))
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithFooterKeyMetadata("kf"), parquet.WithPlaintextFooter(), parquet.WithEncryptedColumns(encryptedColumns))
+
+	assert.False(t, props.EncryptedFooter())
+	assert.Equal(t, parquet.DefaultEncryptionAlgorithm, props.Algorithm().Algo)
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+
+	outColProps1 := props.ColumnEncryptionProperties(columnPath1.String())
+	assert.Equal(t, columnPath1.String(), outColProps1.ColumnPath())
+	assert.True(t, outColProps1.IsEncrypted())
+	assert.False(t, outColProps1.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, outColProps1.Key())
+	assert.Equal(t, "kc1", outColProps1.KeyMetadata())
+
+	outColProps2 := props.ColumnEncryptionProperties(columnPath2.String())
+	assert.Equal(t, columnPath2.String(), outColProps2.ColumnPath())
+	assert.True(t, outColProps2.IsEncrypted())
+	assert.False(t, outColProps2.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey2, outColProps2.Key())
+	assert.Equal(t, "kc2", outColProps2.KeyMetadata())
+
+	columnPath3 := "column_3"
+	outColProps3 := props.ColumnEncryptionProperties(columnPath3)
+	assert.Nil(t, outColProps3)
+}
+
+func TestUseAadPrefix(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey, parquet.WithAadPrefix(FileName))
+
+	assert.Equal(t, FileName, string(props.Algorithm().Aad.AadPrefix))
+	assert.False(t, props.Algorithm().Aad.SupplyAadPrefix)
+}
+
+func TestUseAadPrefixNotStoreInFile(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithAadPrefix(FileName), parquet.DisableAadPrefixStorage())
+
+	assert.Empty(t, props.Algorithm().Aad.AadPrefix)
+	assert.True(t, props.Algorithm().Aad.SupplyAadPrefix)
+}
+
+func TestUseAES_GCM_CTR_V1Algo(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithAlg(parquet.AesCtr))
+
+	assert.Equal(t, parquet.AesCtr, props.Algorithm().Algo)
+}
+
+func TestUseKeyRetriever(t *testing.T) {
+	t.Parallel()
+
+	stringKr1 := make(encryption.StringKeyIDRetriever)
+	stringKr1.PutKey("kf", FooterEncryptionKey)
+	stringKr1.PutKey("kc1", ColumnEncryptionKey1)
+	stringKr1.PutKey("kc2", ColumnEncryptionKey2)
+
+	props := parquet.NewFileDecryptionProperties(parquet.WithKeyRetriever(stringKr1))
+	assert.Equal(t, FooterEncryptionKey, props.KeyRetriever.GetKey([]byte("kf")))
+	assert.Equal(t, ColumnEncryptionKey1, props.KeyRetriever.GetKey([]byte("kc1")))
+	assert.Equal(t, ColumnEncryptionKey2, props.KeyRetriever.GetKey([]byte("kc2")))
+}
+
+func TestSupplyAadPrefix(t *testing.T) {
+	props := parquet.NewFileDecryptionProperties(
+		parquet.WithFooterKey(FooterEncryptionKey), parquet.WithDecryptAadPrefix(FileName))
+	assert.Equal(t, FileName, props.AadPrefix())
+}
+
+func TestSetKey(t *testing.T) {
+	columnPath1 := parquet.ColumnPathFromString("column_1")
+	props := parquet.NewColumnDecryptionProperties(columnPath1.String(), parquet.WithDecryptKey(ColumnEncryptionKey1))
+	assert.Equal(t, ColumnEncryptionKey1, props.Key())
+}
+
+func TestUsingExplicitFooterAndColumnKeys(t *testing.T) {
+	colPath1 := "column_1"
+	colPath2 := "column_2"
+	decryptCols := make(parquet.ColumnPathToDecryptionPropsMap)
+	decryptCols[colPath1] = parquet.NewColumnDecryptionProperties(colPath1, parquet.WithDecryptKey(ColumnEncryptionKey1))
+	decryptCols[colPath2] = parquet.NewColumnDecryptionProperties(colPath2, parquet.WithDecryptKey(ColumnEncryptionKey2))
+
+	props := parquet.NewFileDecryptionProperties(parquet.WithFooterKey(FooterEncryptionKey), parquet.WithColumnKeys(decryptCols))
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, props.ColumnKey(colPath1))
+	assert.Equal(t, ColumnEncryptionKey2, props.ColumnKey(colPath2))
+}
diff --git a/go/parquet/go.mod b/go/parquet/go.mod
index 0f36a8dea73..cf2be66aba0 100644
--- a/go/parquet/go.mod
+++ b/go/parquet/go.mod
@@ -19,11 +19,17 @@ module github.com/apache/arrow/go/parquet
 go 1.15
 
 require (
-	github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa
+	github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216
+	github.com/andybalholm/brotli v1.0.1
+	github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677
+	github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4
+	github.com/golang/snappy v0.0.3
 	github.com/klauspost/asmfmt v1.2.3
+	github.com/klauspost/compress v1.12.2
 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8
 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3
 	github.com/stretchr/testify v1.7.0
+	github.com/zeebo/xxh3 v0.10.0
 	golang.org/x/exp v0.0.0-20210220032938-85be41e4509f
 	golang.org/x/sys v0.0.0-20210309074719-68d13333faf2
 	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
diff --git a/go/parquet/go.sum b/go/parquet/go.sum
index 60aa68a5953..bebc1ff48a1 100644
--- a/go/parquet/go.sum
+++ b/go/parquet/go.sum
@@ -2,9 +2,15 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
 dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216 h1:2ZboyJ8vl75fGesnG9NpMTD2DyQI3FzMXy4x752rGF0=
+github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk=
 github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
-github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa h1:0Bhiab9ep1wmbD1Lm17uqPkzgYhcBIZf1CsvrMhFMGI=
-github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa/go.mod h1:c9sxoIT3YgLxH4UhLOCKaBlEojuMhVYpk4Ntv3opUTQ=
+github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
+github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
+github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677 h1:F7HiqIf4aBsF4YUBcLolXZ8duSEideNnZnr3lBGa2sA=
+github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677/go.mod h1:R4hW3Ug0s+n4CUsWHKOj00Pu01ZqU4x/hSF5kXUcXKQ=
+github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4 h1:orNYqmQGnSjgOauLWjHEp9/qIDT98xv/0Aa4Zet3/Y8=
+github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4/go.mod h1:V/LzksIyqd3KZuQ2SunvReTG/UkArhII1dAWY5U1sCE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
@@ -29,6 +35,8 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
 github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
+github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/flatbuffers v1.11.0 h1:O7CEyB8Cb3/DmtxODGtLHcEvpr81Jm5qLg/hsHnxA2A=
 github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@@ -36,13 +44,19 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
 github.com/klauspost/asmfmt v1.2.3 h1:qEM7SLDo6DXXXz5yTpqUoxhsrtwH30nNR2riO2ZjznY=
 github.com/klauspost/asmfmt v1.2.3/go.mod h1:RAoUvqkWr2rUa2I19qKMEVZQe4BVtcHGTMCUOcCU2Lg=
+github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
+github.com/klauspost/compress v1.12.2 h1:2KCfW3I9M7nSc5wOqXAlW2v2U6v+w6cbjvbfp+OykW8=
+github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE=
+github.com/pierrec/lz4/v4 v4.1.4 h1:PjkB+qEooc9nw4F6Pxe/e0xaRdWz3suItXWxWqAO1QE=
+github.com/pierrec/lz4/v4 v4.1.4/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@@ -50,6 +64,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/zeebo/xxh3 v0.10.0 h1:1+2Mov9zfxTNUeoDG9k9i13VfxTR0p1JQu8L0vikxB0=
+github.com/zeebo/xxh3 v0.10.0/go.mod h1:AQY73TOrhF3jNsdiM9zZOb8MThrYbZONHj7ryDBaLpg=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
@@ -89,6 +105,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200727154430-2d971f7391a4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY=
 golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -127,7 +144,6 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac
 google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
 google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/grpc/cmd/protoc-gen-go-grpc v0.0.0-20200910201057-6591123024b3/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
diff --git a/go/parquet/internal/bmi/Makefile b/go/parquet/internal/bmi/Makefile
index 138b4f1cee4..f196d819243 100644
--- a/go/parquet/internal/bmi/Makefile
+++ b/go/parquet/internal/bmi/Makefile
@@ -15,6 +15,9 @@
 # limitations under the License.
 
 PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/'
+# since we're passing an int16, swap the MOVQ for the argument to a MOVW as per
+# the message given by go vet since it's a 2-byte value.
+PERL_FIXUP_MOVQ_MOVW=perl -i -pe 's/MOVQ rhs\+16\(FP\)/MOVW rhs+16(FP)/'
 C2GOASM=c2goasm -a -f
 CC=clang
 C_FLAGS=-masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \
@@ -37,4 +40,8 @@ _lib/bitmap_bmi2.s: _lib/bitmap_bmi2.c
 	$(CC) -S $(ASM_FLAGS_AVX2) $(ASM_FLAGS_BMI2) $(ASM_FLAGS_POPCNT) $(C_FLAGS)  $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
 
 bitmap_bmi2.s: _lib/bitmap_bmi2.s
-	$(C2GOASM) $^ $@
+	$(C2GOASM) $^ $@ ; $(PERL_FIXUP_MOVQ_MOVW) $@
+
+clean:
+	rm -f $(INTEL_SOURCES)
+	rm -f _lib/$(INTEL_SOURCES)
diff --git a/go/parquet/internal/bmi/bitmap_bmi2.go b/go/parquet/internal/bmi/bitmap_bmi2.go
deleted file mode 100644
index ce09b3fd01d..00000000000
--- a/go/parquet/internal/bmi/bitmap_bmi2.go
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bmi
-
-import "unsafe"
-
-//go:noescape
-func _extract_bits(bitmap, selectBitmap uint64) (res uint64)
-
-// extractBitsBMI2 uses BMI2 to call the pext instruction, Parallel Bits Extract
-// in order to quickly and efficiently extract the bits selected in a parallel
-// fashion. See the definition of the PEXT instruction for x86/x86-64 cpus
-func extractBitsBMI2(bitmap, selectBitmap uint64) uint64 {
-	return _extract_bits(bitmap, selectBitmap)
-}
-
-//go:noescape
-func _levels_to_bitmap(levels unsafe.Pointer, numLevels int, rhs int16) (res uint64)
-
-// greaterThanBitmapBMI2 builds a bitmap where each set bit indicates the corresponding level
-// is greater than the rhs value.
-func greaterThanBitmapBMI2(levels []int16, rhs int16) uint64 {
-	if len(levels) == 0 {
-		return 0
-	}
-
-	var (
-		p1 = unsafe.Pointer(&levels[0])
-		p2 = len(levels)
-		p3 = rhs
-	)
-
-	return _levels_to_bitmap(p1, p2, p3)
-}
diff --git a/go/parquet/internal/bmi/bitmap_bmi2.s b/go/parquet/internal/bmi/bitmap_bmi2.s
deleted file mode 100644
index c81794d4c4c..00000000000
--- a/go/parquet/internal/bmi/bitmap_bmi2.s
+++ /dev/null
@@ -1,117 +0,0 @@
-//+build !noasm !appengine
-// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
-
-TEXT ·_extract_bits(SB), $0-24
-
-	MOVQ bitmap+0(FP), DI
-	MOVQ selectBitmap+8(FP), SI
-
-	LONG $0xf5c2e2c4; BYTE $0xc6 // pext    rax, rdi, rsi
-	MOVQ AX, res+16(FP)
-	RET
-
-DATA LCDATA1<>+0x000(SB)/8, $0x0000000000000000
-DATA LCDATA1<>+0x008(SB)/8, $0x0000000000000001
-DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000002
-DATA LCDATA1<>+0x018(SB)/8, $0x0000000000000003
-DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000004
-DATA LCDATA1<>+0x028(SB)/8, $0x0000000000000008
-DATA LCDATA1<>+0x030(SB)/8, $0x000000000000000c
-DATA LCDATA1<>+0x038(SB)/8, $0x0000000000000001
-DATA LCDATA1<>+0x040(SB)/8, $0x0000000000000010
-GLOBL LCDATA1<>(SB), 8, $72
-
-TEXT ·_levels_to_bitmap(SB), $0-32
-
-	MOVQ levels+0(FP), DI
-	MOVQ numLevels+8(FP), SI
-	MOVQ rhs+16(FP), DX
-	LEAQ LCDATA1<>(SB), BP
-
-	WORD $0xf685             // test    esi, esi
-	JLE  LBB1_1
-	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x0f // cmp    esi, 15
-	JA   LBB1_4
-	WORD $0xf631             // xor    esi, esi
-	WORD $0xc031             // xor    eax, eax
-	JMP  LBB1_7
-
-LBB1_1:
-	WORD $0xc031 // xor    eax, eax
-	JMP  LBB1_8
-
-LBB1_4:
-	WORD $0x8944; BYTE $0xc6       // mov    esi, r8d
-	WORD $0xe683; BYTE $0xf0       // and    esi, -16
-	LONG $0xc26ef9c5               // vmovd    xmm0, edx
-	LONG $0x7979e2c4; BYTE $0xc8   // vpbroadcastw    xmm1, xmm0
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x556ffdc5; BYTE $0x00   // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
-	LONG $0x597d62c4; WORD $0x2065 // vpbroadcastq    ymm12, qword 32[rbp] /* [rip + .LCPI1_1] */
-	LONG $0x597de2c4; WORD $0x2865 // vpbroadcastq    ymm4, qword 40[rbp] /* [rip + .LCPI1_2] */
-	LONG $0x597de2c4; WORD $0x306d // vpbroadcastq    ymm5, qword 48[rbp] /* [rip + .LCPI1_3] */
-	LONG $0x597de2c4; WORD $0x3875 // vpbroadcastq    ymm6, qword 56[rbp] /* [rip + .LCPI1_4] */
-	LONG $0x597de2c4; WORD $0x407d // vpbroadcastq    ymm7, qword 64[rbp] /* [rip + .LCPI1_5] */
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
-	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
-	LONG $0xef2941c4; BYTE $0xd2   // vpxor    xmm10, xmm10, xmm10
-
-LBB1_5:
-	LONG $0xdad41dc5               // vpaddq    ymm11, ymm12, ymm2
-	LONG $0x5c7efac5; WORD $0x0847 // vmovq    xmm3, qword [rdi + 2*rax + 8]
-	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
-	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
-	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
-	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
-	LONG $0xdcd46dc5               // vpaddq    ymm11, ymm2, ymm4
-	LONG $0xc3eb3dc5               // vpor    ymm8, ymm8, ymm3
-	LONG $0x5c7efac5; WORD $0x1047 // vmovq    xmm3, qword [rdi + 2*rax + 16]
-	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
-	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
-	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
-	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
-	LONG $0xddd46dc5               // vpaddq    ymm11, ymm2, ymm5
-	LONG $0xcbeb35c5               // vpor    ymm9, ymm9, ymm3
-	LONG $0x5c7efac5; WORD $0x1847 // vmovq    xmm3, qword [rdi + 2*rax + 24]
-	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
-	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
-	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
-	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
-	LONG $0xd3eb2dc5               // vpor    ymm10, ymm10, ymm3
-	LONG $0x1c7efac5; BYTE $0x47   // vmovq    xmm3, qword [rdi + 2*rax]
-	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
-	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
-	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
-	LONG $0x47e5e2c4; BYTE $0xda   // vpsllvq    ymm3, ymm3, ymm2
-	LONG $0xc0ebe5c5               // vpor    ymm0, ymm3, ymm0
-	LONG $0x10c08348               // add    rax, 16
-	LONG $0xd7d4edc5               // vpaddq    ymm2, ymm2, ymm7
-	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
-	JNE  LBB1_5
-	LONG $0xc0ebbdc5               // vpor    ymm0, ymm8, ymm0
-	LONG $0xc0ebb5c5               // vpor    ymm0, ymm9, ymm0
-	LONG $0xc0ebadc5               // vpor    ymm0, ymm10, ymm0
-	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
-	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
-	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
-	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
-	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
-	WORD $0x394c; BYTE $0xc6       // cmp    rsi, r8
-	JE   LBB1_8
-
-LBB1_7:
-	WORD $0xc931                 // xor    ecx, ecx
-	LONG $0x77143966             // cmp    word [rdi + 2*rsi], dx
-	WORD $0x9f0f; BYTE $0xd1     // setg    cl
-	LONG $0xf7c9e2c4; BYTE $0xc9 // shlx    rcx, rcx, rsi
-	WORD $0x0948; BYTE $0xc8     // or    rax, rcx
-	LONG $0x01c68348             // add    rsi, 1
-	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
-	JNE  LBB1_7
-
-LBB1_8:
-	VZEROUPPER
-	MOVQ AX, res+24(FP)
-	RET
diff --git a/go/parquet/internal/bmi/bitmap_bmi2_amd64.go b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go
new file mode 100644
index 00000000000..66ffd8e6603
--- /dev/null
+++ b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package bmi
+
+import "unsafe"
+
+//go:noescape
+func _extract_bits(bitmap, selectBitmap uint64) (res uint64)
+
+// extractBitsBMI2 uses BMI2 to call the pext instruction, Parallel Bits Extract
+// in order to quickly and efficiently extract the bits selected in a parallel
+// fashion. See the definition of the PEXT instruction for x86/x86-64 cpus
+func extractBitsBMI2(bitmap, selectBitmap uint64) uint64 {
+	return _extract_bits(bitmap, selectBitmap)
+}
+
+//go:noescape
+func _levels_to_bitmap(levels unsafe.Pointer, numLevels int, rhs int16) (res uint64)
+
+// greaterThanBitmapBMI2 builds a bitmap where each set bit indicates the corresponding level
+// is greater than the rhs value.
+func greaterThanBitmapBMI2(levels []int16, rhs int16) uint64 {
+	if len(levels) == 0 {
+		return 0
+	}
+
+	var (
+		p1 = unsafe.Pointer(&levels[0])
+		p2 = len(levels)
+		p3 = rhs
+	)
+
+	return _levels_to_bitmap(p1, p2, p3)
+}
diff --git a/go/parquet/internal/bmi/bitmap_bmi2_amd64.s b/go/parquet/internal/bmi/bitmap_bmi2_amd64.s
new file mode 100644
index 00000000000..d5e5cf4bc03
--- /dev/null
+++ b/go/parquet/internal/bmi/bitmap_bmi2_amd64.s
@@ -0,0 +1,117 @@
+//+build !noasm !appengine
+// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
+
+TEXT ·_extract_bits(SB), $0-24
+
+	MOVQ bitmap+0(FP), DI
+	MOVQ selectBitmap+8(FP), SI
+
+	LONG $0xf5c2e2c4; BYTE $0xc6 // pext    rax, rdi, rsi
+	MOVQ AX, res+16(FP)
+	RET
+
+DATA LCDATA1<>+0x000(SB)/8, $0x0000000000000000
+DATA LCDATA1<>+0x008(SB)/8, $0x0000000000000001
+DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000002
+DATA LCDATA1<>+0x018(SB)/8, $0x0000000000000003
+DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000004
+DATA LCDATA1<>+0x028(SB)/8, $0x0000000000000008
+DATA LCDATA1<>+0x030(SB)/8, $0x000000000000000c
+DATA LCDATA1<>+0x038(SB)/8, $0x0000000000000001
+DATA LCDATA1<>+0x040(SB)/8, $0x0000000000000010
+GLOBL LCDATA1<>(SB), 8, $72
+
+TEXT ·_levels_to_bitmap(SB), $0-32
+
+	MOVQ levels+0(FP), DI
+	MOVQ numLevels+8(FP), SI
+	MOVW rhs+16(FP), DX
+	LEAQ LCDATA1<>(SB), BP
+
+	WORD $0xf685             // test    esi, esi
+	JLE  LBB1_1
+	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
+	WORD $0xfe83; BYTE $0x0f // cmp    esi, 15
+	JA   LBB1_4
+	WORD $0xf631             // xor    esi, esi
+	WORD $0xc031             // xor    eax, eax
+	JMP  LBB1_7
+
+LBB1_1:
+	WORD $0xc031 // xor    eax, eax
+	JMP  LBB1_8
+
+LBB1_4:
+	WORD $0x8944; BYTE $0xc6       // mov    esi, r8d
+	WORD $0xe683; BYTE $0xf0       // and    esi, -16
+	LONG $0xc26ef9c5               // vmovd    xmm0, edx
+	LONG $0x7979e2c4; BYTE $0xc8   // vpbroadcastw    xmm1, xmm0
+	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
+	LONG $0x556ffdc5; BYTE $0x00   // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
+	LONG $0x597d62c4; WORD $0x2065 // vpbroadcastq    ymm12, qword 32[rbp] /* [rip + .LCPI1_1] */
+	LONG $0x597de2c4; WORD $0x2865 // vpbroadcastq    ymm4, qword 40[rbp] /* [rip + .LCPI1_2] */
+	LONG $0x597de2c4; WORD $0x306d // vpbroadcastq    ymm5, qword 48[rbp] /* [rip + .LCPI1_3] */
+	LONG $0x597de2c4; WORD $0x3875 // vpbroadcastq    ymm6, qword 56[rbp] /* [rip + .LCPI1_4] */
+	LONG $0x597de2c4; WORD $0x407d // vpbroadcastq    ymm7, qword 64[rbp] /* [rip + .LCPI1_5] */
+	WORD $0xc031                   // xor    eax, eax
+	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
+	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
+	LONG $0xef2941c4; BYTE $0xd2   // vpxor    xmm10, xmm10, xmm10
+
+LBB1_5:
+	LONG $0xdad41dc5               // vpaddq    ymm11, ymm12, ymm2
+	LONG $0x5c7efac5; WORD $0x0847 // vmovq    xmm3, qword [rdi + 2*rax + 8]
+	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
+	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
+	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
+	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
+	LONG $0xdcd46dc5               // vpaddq    ymm11, ymm2, ymm4
+	LONG $0xc3eb3dc5               // vpor    ymm8, ymm8, ymm3
+	LONG $0x5c7efac5; WORD $0x1047 // vmovq    xmm3, qword [rdi + 2*rax + 16]
+	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
+	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
+	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
+	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
+	LONG $0xddd46dc5               // vpaddq    ymm11, ymm2, ymm5
+	LONG $0xcbeb35c5               // vpor    ymm9, ymm9, ymm3
+	LONG $0x5c7efac5; WORD $0x1847 // vmovq    xmm3, qword [rdi + 2*rax + 24]
+	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
+	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
+	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
+	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
+	LONG $0xd3eb2dc5               // vpor    ymm10, ymm10, ymm3
+	LONG $0x1c7efac5; BYTE $0x47   // vmovq    xmm3, qword [rdi + 2*rax]
+	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
+	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
+	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
+	LONG $0x47e5e2c4; BYTE $0xda   // vpsllvq    ymm3, ymm3, ymm2
+	LONG $0xc0ebe5c5               // vpor    ymm0, ymm3, ymm0
+	LONG $0x10c08348               // add    rax, 16
+	LONG $0xd7d4edc5               // vpaddq    ymm2, ymm2, ymm7
+	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
+	JNE  LBB1_5
+	LONG $0xc0ebbdc5               // vpor    ymm0, ymm8, ymm0
+	LONG $0xc0ebb5c5               // vpor    ymm0, ymm9, ymm0
+	LONG $0xc0ebadc5               // vpor    ymm0, ymm10, ymm0
+	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
+	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
+	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
+	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
+	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
+	WORD $0x394c; BYTE $0xc6       // cmp    rsi, r8
+	JE   LBB1_8
+
+LBB1_7:
+	WORD $0xc931                 // xor    ecx, ecx
+	LONG $0x77143966             // cmp    word [rdi + 2*rsi], dx
+	WORD $0x9f0f; BYTE $0xd1     // setg    cl
+	LONG $0xf7c9e2c4; BYTE $0xc9 // shlx    rcx, rcx, rsi
+	WORD $0x0948; BYTE $0xc8     // or    rax, rcx
+	LONG $0x01c68348             // add    rsi, 1
+	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
+	JNE  LBB1_7
+
+LBB1_8:
+	VZEROUPPER
+	MOVQ AX, res+24(FP)
+	RET
diff --git a/go/parquet/internal/bmi/bitmap_bmi2_arm64.go b/go/parquet/internal/bmi/bitmap_bmi2_arm64.go
new file mode 100644
index 00000000000..498d5452e17
--- /dev/null
+++ b/go/parquet/internal/bmi/bitmap_bmi2_arm64.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package bmi
+
+func init() {
+	funclist.extractBits = extractBitsGo
+	funclist.gtbitmap = greaterThanBitmapGo
+}
diff --git a/go/parquet/internal/bmi/bitmap_bmi2_s390x.go b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go
new file mode 100644
index 00000000000..498d5452e17
--- /dev/null
+++ b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package bmi
+
+func init() {
+	funclist.extractBits = extractBitsGo
+	funclist.gtbitmap = greaterThanBitmapGo
+}
diff --git a/go/parquet/internal/bmi/bmi.go b/go/parquet/internal/bmi/bmi.go
new file mode 100644
index 00000000000..ea0f6e374fe
--- /dev/null
+++ b/go/parquet/internal/bmi/bmi.go
@@ -0,0 +1,275 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package bmi contains helpers for manipulating bitmaps via BMI2 extensions
+// properly falling back to pure go implementations if the CPU doesn't support
+// BMI2.
+package bmi
+
+import "math/bits"
+
+type funcs struct {
+	extractBits func(uint64, uint64) uint64
+	gtbitmap    func([]int16, int16) uint64
+}
+
+var funclist funcs
+
+// ExtractBits performs a Parallel Bit extract as per the PEXT instruction for
+// x86/x86-64 cpus to use the second parameter as a mask to extract the bits from
+// the first argument into a new bitmap.
+//
+// For each bit Set in selectBitmap, the corresponding bits are extracted from bitmap
+// and written to contiguous lower bits of the result, the remaining upper bits are zeroed.
+func ExtractBits(bitmap, selectBitmap uint64) uint64 {
+	return funclist.extractBits(bitmap, selectBitmap)
+}
+
+// GreaterThanBitmap builds a bitmap where each bit corresponds to whether or not
+// the level in that index is greater than the value of rhs.
+func GreaterThanBitmap(levels []int16, rhs int16) uint64 {
+	return funclist.gtbitmap(levels, rhs)
+}
+
+/* Python code to generate lookup table:
+kLookupBits = 5
+count = 0
+print('constexpr int kLookupBits = {};'.format(kLookupBits))
+print('constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = {')
+print(' ', end = '')
+for mask in range(1 << kLookupBits):
+    for data in range(1 << kLookupBits):
+        bit_value = 0
+        bit_len = 0
+        for i in range(kLookupBits):
+            if mask & (1 << i):
+                bit_value |= (((data >> i) & 1) << bit_len)
+                bit_len += 1
+        out = '0x{:02X},'.format(bit_value)
+        count += 1
+        if count % (1 << kLookupBits) == 1:
+            print(' {')
+        if count % 8 == 1:
+            print('    ', end = '')
+        if count % 8 == 0:
+            print(out, end = '\n')
+        else:
+            print(out, end = ' ')
+        if count % (1 << kLookupBits) == 0:
+            print('  },', end = '')
+print('\n};')
+*/
+
+const lookupBits = 5
+
+var pextTable = [1 << lookupBits][1 << lookupBits]uint8{
+	{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+		0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
+		0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
+		0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
+		0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
+		0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
+	},
+	{
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+		0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
+		0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
+		0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
+		0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
+		0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
+		0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
+		0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+		0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	},
+	{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
+		0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
+		0x03, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+		0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+		0x03, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
+		0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+		0x07, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
+		0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
+	},
+	{
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
+		0x02, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
+		0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
+		0x05, 0x06, 0x07, 0x06, 0x07, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
+		0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
+		0x05, 0x06, 0x06, 0x07, 0x07, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
+		0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+		0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+		0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	},
+	{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+		0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+		0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+		0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
+		0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02,
+		0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
+		0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05,
+		0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
+	},
+	{
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+		0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03,
+		0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
+		0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07,
+		0x06, 0x07, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
+		0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,
+		0x07, 0x07, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
+		0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+		0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	},
+	{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
+		0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+		0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
+		0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05,
+		0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+		0x03, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x04, 0x04,
+		0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+		0x07, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09,
+		0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F,
+	},
+	{
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
+		0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05,
+		0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07,
+	},
+	{
+		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
+		0x05, 0x06, 0x07, 0x06, 0x07, 0x08, 0x09, 0x08, 0x09, 0x0A, 0x0B,
+		0x0A, 0x0B, 0x0C, 0x0D, 0x0C, 0x0D, 0x0E, 0x0F, 0x0E, 0x0F,
+	},
+	{
+		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
+		0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0A, 0x0A,
+		0x0B, 0x0B, 0x0C, 0x0C, 0x0D, 0x0D, 0x0E, 0x0E, 0x0F, 0x0F,
+	},
+	{
+		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+		0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
+		0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+	},
+}
+
+// software emulation of _pext_u64
+func extractBitsGo(bitmap, selectBitmap uint64) uint64 {
+	if selectBitmap == ^uint64(0) {
+		return bitmap
+	} else if selectBitmap == 0 {
+		return 0
+	}
+
+	// fallback to lookup table method
+	bitValue := uint64(0)
+	bitLen := int(0)
+	const lookupMask = uint64((uint(1) << lookupBits) - 1)
+
+	for selectBitmap != 0 {
+		maskLen := bits.OnesCount32(uint32(selectBitmap & lookupMask))
+		value := pextTable[selectBitmap&lookupMask][bitmap&lookupMask]
+		bitValue |= uint64(value << bitLen)
+		bitLen += maskLen
+		bitmap >>= lookupBits
+		selectBitmap >>= lookupBits
+	}
+	return bitValue
+}
+
+func greaterThanBitmapGo(levels []int16, rhs int16) uint64 {
+	mask := uint64(0)
+	for idx, lvl := range levels {
+		if lvl > rhs {
+			mask |= uint64(1) << idx
+		} else {
+			mask |= uint64(0) << idx
+		}
+	}
+	return mask
+}
diff --git a/go/parquet/internal/bmi/bmi_amd64.go b/go/parquet/internal/bmi/bmi_amd64.go
new file mode 100644
index 00000000000..600ef024f69
--- /dev/null
+++ b/go/parquet/internal/bmi/bmi_amd64.go
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package bmi
+
+import (
+	"golang.org/x/sys/cpu"
+)
+
+func init() {
+	if cpu.X86.HasBMI2 {
+		funclist.extractBits = extractBitsBMI2
+	} else {
+		funclist.extractBits = extractBitsGo
+	}
+	if cpu.X86.HasAVX2 {
+		funclist.gtbitmap = greaterThanBitmapBMI2
+	} else {
+		funclist.gtbitmap = greaterThanBitmapGo
+	}
+}
diff --git a/go/parquet/internal/bmi/bmi_init.go b/go/parquet/internal/bmi/bmi_init.go
deleted file mode 100644
index e82b8e556e5..00000000000
--- a/go/parquet/internal/bmi/bmi_init.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package bmi contains helpers for manipulating bitmaps via BMI2 extensions
-// properly falling back to pure go implementations if the CPU doesn't support
-// BMI2.
-package bmi
-
-import (
-	"golang.org/x/sys/cpu"
-)
-
-type funcs struct {
-	extractBits func(uint64, uint64) uint64
-	gtbitmap    func([]int16, int16) uint64
-}
-
-var funclist funcs
-
-func init() {
-	if cpu.X86.HasBMI2 {
-		funclist.extractBits = extractBitsBMI2
-	} else {
-		funclist.extractBits = extractBitsGo
-	}
-	if cpu.X86.HasAVX2 {
-		funclist.gtbitmap = greaterThanBitmapBMI2
-	} else {
-		funclist.gtbitmap = greaterThanBitmapGo
-	}
-}
-
-// ExtractBits performs a Parallel Bit extract as per the PEXT instruction for
-// x86/x86-64 cpus to use the second parameter as a mask to extract the bits from
-// the first argument into a new bitmap.
-//
-// For each bit Set in selectBitmap, the corresponding bits are extracted from bitmap
-// and written to contiguous lower bits of the result, the remaining upper bits are zeroed.
-func ExtractBits(bitmap, selectBitmap uint64) uint64 {
-	return funclist.extractBits(bitmap, selectBitmap)
-}
-
-// GreaterThanBitmap builds a bitmap where each bit corresponds to whether or not
-// the level in that index is greater than the value of rhs.
-func GreaterThanBitmap(levels []int16, rhs int16) uint64 {
-	return funclist.gtbitmap(levels, rhs)
-}
diff --git a/go/parquet/internal/bmi/bmi_noasm.go b/go/parquet/internal/bmi/bmi_noasm.go
deleted file mode 100644
index 7ebb19597ee..00000000000
--- a/go/parquet/internal/bmi/bmi_noasm.go
+++ /dev/null
@@ -1,249 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bmi
-
-import "math/bits"
-
-/* Python code to generate lookup table:
-kLookupBits = 5
-count = 0
-print('constexpr int kLookupBits = {};'.format(kLookupBits))
-print('constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = {')
-print(' ', end = '')
-for mask in range(1 << kLookupBits):
-    for data in range(1 << kLookupBits):
-        bit_value = 0
-        bit_len = 0
-        for i in range(kLookupBits):
-            if mask & (1 << i):
-                bit_value |= (((data >> i) & 1) << bit_len)
-                bit_len += 1
-        out = '0x{:02X},'.format(bit_value)
-        count += 1
-        if count % (1 << kLookupBits) == 1:
-            print(' {')
-        if count % 8 == 1:
-            print('    ', end = '')
-        if count % 8 == 0:
-            print(out, end = '\n')
-        else:
-            print(out, end = ' ')
-        if count % (1 << kLookupBits) == 0:
-            print('  },', end = '')
-print('\n};')
-*/
-
-const lookupBits = 5
-
-var pextTable = [1 << lookupBits][1 << lookupBits]uint8{
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
-		0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
-		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
-		0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
-		0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
-		0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
-		0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
-		0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
-		0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
-		0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
-		0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
-		0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
-		0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
-		0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
-		0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
-		0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
-		0x03, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
-		0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
-		0x03, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
-		0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
-		0x07, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
-		0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
-		0x02, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
-		0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
-		0x05, 0x06, 0x07, 0x06, 0x07, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
-		0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
-		0x05, 0x06, 0x06, 0x07, 0x07, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
-		0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
-		0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
-		0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-		0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
-		0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
-		0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
-		0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02,
-		0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
-		0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05,
-		0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
-		0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03,
-		0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
-		0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07,
-		0x06, 0x07, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
-		0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,
-		0x07, 0x07, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
-		0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
-		0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
-		0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-		0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
-		0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05,
-		0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
-		0x03, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x04, 0x04,
-		0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
-		0x07, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09,
-		0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
-		0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05,
-		0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07,
-	},
-	{
-		0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
-		0x05, 0x06, 0x07, 0x06, 0x07, 0x08, 0x09, 0x08, 0x09, 0x0A, 0x0B,
-		0x0A, 0x0B, 0x0C, 0x0D, 0x0C, 0x0D, 0x0E, 0x0F, 0x0E, 0x0F,
-	},
-	{
-		0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
-		0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0A, 0x0A,
-		0x0B, 0x0B, 0x0C, 0x0C, 0x0D, 0x0D, 0x0E, 0x0E, 0x0F, 0x0F,
-	},
-	{
-		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
-		0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
-		0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
-	},
-}
-
-// software emulation of _pext_u64
-func extractBitsGo(bitmap, selectBitmap uint64) uint64 {
-	if selectBitmap == ^uint64(0) {
-		return bitmap
-	} else if selectBitmap == 0 {
-		return 0
-	}
-
-	// fallback to lookup table method
-	bitValue := uint64(0)
-	bitLen := int(0)
-	const lookupMask = uint64((uint(1) << lookupBits) - 1)
-
-	for selectBitmap != 0 {
-		maskLen := bits.OnesCount32(uint32(selectBitmap & lookupMask))
-		value := pextTable[selectBitmap&lookupMask][bitmap&lookupMask]
-		bitValue |= uint64(value << bitLen)
-		bitLen += maskLen
-		bitmap >>= lookupBits
-		selectBitmap >>= lookupBits
-	}
-	return bitValue
-}
-
-func greaterThanBitmapGo(levels []int16, rhs int16) uint64 {
-	mask := uint64(0)
-	for idx, lvl := range levels {
-		if lvl > rhs {
-			mask |= uint64(1) << idx
-		} else {
-			mask |= uint64(0) << idx
-		}
-	}
-	return mask
-}
diff --git a/go/parquet/internal/debug/assert_off.go b/go/parquet/internal/debug/assert_off.go
new file mode 100644
index 00000000000..52b9a233169
--- /dev/null
+++ b/go/parquet/internal/debug/assert_off.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !assert
+
+package debug
+
+// Assert will panic with msg if cond is false.
+//
+// msg must be a string, func() string or fmt.Stringer.
+func Assert(cond bool, msg interface{}) {}
diff --git a/go/parquet/internal/debug/assert_on.go b/go/parquet/internal/debug/assert_on.go
new file mode 100644
index 00000000000..188e6831204
--- /dev/null
+++ b/go/parquet/internal/debug/assert_on.go
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build assert
+
+package debug
+
+// Assert will panic with msg if cond is false.
+//
+// msg should be a string or fmt.Stringer
+func Assert(cond bool, msg interface{}) {
+	if !cond {
+		panic(msg)
+	}
+}
diff --git a/go/parquet/internal/debug/doc.go b/go/parquet/internal/debug/doc.go
new file mode 100644
index 00000000000..61684d62538
--- /dev/null
+++ b/go/parquet/internal/debug/doc.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package debug provides APIs for conditional runtime assertions and debug logging.
+//
+// Using Assert
+//
+// To enable runtime assertions, build with the assert tag. When the assert tag is omitted,
+// the code for the assertion will be omitted from the binary.
+package debug
diff --git a/go/parquet/internal/debug/log_off.go b/go/parquet/internal/debug/log_off.go
new file mode 100644
index 00000000000..23dcccd810c
--- /dev/null
+++ b/go/parquet/internal/debug/log_off.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !debug
+
+package debug
+
+// use build tags in order to control the existence of this log function vs it getting
+// optimized away as a noop without the debug build tag.
+
+func Log(interface{}) {}
diff --git a/go/parquet/internal/debug/log_on.go b/go/parquet/internal/debug/log_on.go
new file mode 100644
index 00000000000..8d6106099f6
--- /dev/null
+++ b/go/parquet/internal/debug/log_on.go
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build debug
+
+package debug
+
+import (
+	"log"
+	"os"
+)
+
+var (
+	debug = log.New(os.Stderr, "[D] ", log.LstdFlags)
+)
+
+func Log(msg interface{}) {
+	debug.Println(msg)
+}
diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go
new file mode 100644
index 00000000000..a33b21a3181
--- /dev/null
+++ b/go/parquet/internal/encoding/boolean_decoder.go
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// PlainBooleanDecoder is for the Plain Encoding type, there is no
+// dictionary decoding for bools.
+type PlainBooleanDecoder struct {
+	decoder
+
+	bitOffset int
+}
+
+// Type for the PlainBooleanDecoder is parquet.Types.Boolean
+func (PlainBooleanDecoder) Type() parquet.Type {
+	return parquet.Types.Boolean
+}
+
+// Decode fills out with bools decoded from the data at the current point
+// or until we reach the end of the data.
+//
+// Returns the number of values decoded
+func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+
+	unalignedExtract := func(start, end, curBitOffset int) int {
+		i := start
+		for ; curBitOffset < end; i, curBitOffset = i+1, curBitOffset+1 {
+			out[i] = (dec.data[0] & byte(1<<curBitOffset)) != 0
+		}
+		return i // return the number of bits we extracted
+	}
+
+	// if we aren't at a byte boundary, then get bools until we hit
+	// a byte boundary with the bit offset.
+	i := 0
+	if dec.bitOffset != 0 {
+		i = unalignedExtract(0, 8, dec.bitOffset)
+		dec.bitOffset = 0
+	}
+
+	// determine the number of full bytes worth of bits we can decode
+	// given the number of values we want to decode.
+	bitsRemain := max - i
+	batch := bitsRemain / 8 * 8
+	if batch > 0 { // only go in here if there's at least one full byte to decode
+		if i > 0 { // skip our data forward if we decoded anything above
+			dec.data = dec.data[1:]
+			out = out[i:]
+		}
+		// determine the number of aligned bytes we can grab using SIMD optimized
+		// functions to improve performance.
+		alignedBytes := bitutil.BytesForBits(int64(batch))
+		utils.BytesToBools(dec.data[:alignedBytes], out)
+		dec.data = dec.data[alignedBytes:]
+		out = out[alignedBytes*8:]
+	}
+
+	// grab any trailing bits now that we've got our aligned bytes.
+	dec.bitOffset += unalignedExtract(dec.bitOffset, bitsRemain-batch, dec.bitOffset)
+
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is like Decode except it expands the values to leave spaces for null
+// as determined by the validBits bitmap.
+func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	if nullCount > 0 {
+		toRead := len(out) - nullCount
+		valuesRead, err := dec.Decode(out[:toRead])
+		if err != nil {
+			return 0, err
+		}
+		if valuesRead != toRead {
+			return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match")
+		}
+		return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+	}
+	return dec.Decode(out)
+}
diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go
new file mode 100644
index 00000000000..fc9cd2728ac
--- /dev/null
+++ b/go/parquet/internal/encoding/boolean_encoder.go
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+const (
+	boolBufSize = 1024
+	boolsInBuf  = boolBufSize * 8
+)
+
+// PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding
+type PlainBooleanEncoder struct {
+	encoder
+	bitsBuffer []byte
+	wr         utils.BitmapWriter
+}
+
+// Type for the PlainBooleanEncoder is parquet.Types.Boolean
+func (PlainBooleanEncoder) Type() parquet.Type {
+	return parquet.Types.Boolean
+}
+
+// Put encodes the contents of in into the underlying data buffer.
+func (enc *PlainBooleanEncoder) Put(in []bool) {
+	if enc.bitsBuffer == nil {
+		enc.bitsBuffer = make([]byte, boolBufSize)
+	}
+	if enc.wr == nil {
+		enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf)
+	}
+
+	n := enc.wr.AppendBools(in)
+	for n < len(in) {
+		enc.wr.Finish()
+		enc.append(enc.bitsBuffer)
+		enc.wr.Reset(0, boolsInBuf)
+		in = in[n:]
+		n = enc.wr.AppendBools(in)
+	}
+}
+
+// PutSpaced will use the validBits bitmap to determine which values are nulls
+// and can be left out from the slice, and the encoded without those nulls.
+func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) {
+	bufferOut := make([]bool, len(in))
+	nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset)
+	enc.Put(bufferOut[:nvalid])
+}
+
+// EstimatedDataEncodedSize returns the current number of bytes that have
+// been buffered so far
+func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
+	return int64(enc.sink.Len() + int(bitutil.BytesForBits(enc.wr.Pos())))
+}
+
+// FlushValues returns the buffered data, the responsibility is on the caller
+// to release the buffer memory
+func (enc *PlainBooleanEncoder) FlushValues() Buffer {
+	if enc.wr.Pos() > 0 {
+		toFlush := int(enc.wr.Pos())
+		enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))])
+	}
+
+	return enc.sink.Finish()
+}
diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go
new file mode 100644
index 00000000000..fa8033b78fa
--- /dev/null
+++ b/go/parquet/internal/encoding/byte_array_decoder.go
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"encoding/binary"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// PlainByteArrayDecoder decodes a data chunk for bytearrays according to
+// the plain encoding. The byte arrays will use slices to reference the
+// data rather than copying it.
+//
+// The parquet spec defines Plain encoding for ByteArrays as a 4 byte little
+// endian integer containing the length of the bytearray followed by that many
+// bytes being the raw data of the byte array.
+type PlainByteArrayDecoder struct {
+	decoder
+}
+
+// Type returns parquet.Types.ByteArray for this decoder
+func (PlainByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// Decode will populate the slice of bytearrays in full or until the number
+// of values is consumed.
+//
+// Returns the number of values that were decoded.
+func (pbad *PlainByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	max := utils.MinInt(len(out), pbad.nvals)
+
+	for i := 0; i < max; i++ {
+		// there should always be at least four bytes which is the length of the
+		// next value in the data.
+		if len(pbad.data) < 4 {
+			return i, xerrors.New("parquet: eof reading bytearray")
+		}
+
+		// the first 4 bytes are a little endian int32 length
+		byteLen := int32(binary.LittleEndian.Uint32(pbad.data[:4]))
+		if byteLen < 0 {
+			return i, xerrors.New("parquet: invalid BYTE_ARRAY value")
+		}
+
+		if int64(len(pbad.data)) < int64(byteLen)+4 {
+			return i, xerrors.New("parquet: eof reading bytearray")
+		}
+
+		out[i] = pbad.data[4 : byteLen+4 : byteLen+4]
+		pbad.data = pbad.data[byteLen+4:]
+	}
+
+	pbad.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is like Decode, but expands the slice out to leave empty values
+// where the validBits bitmap has 0s
+func (pbad *PlainByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toRead := len(out) - nullCount
+	valuesRead, err := pbad.Decode(out[:toRead])
+	if err != nil {
+		return valuesRead, err
+	}
+	if valuesRead != toRead {
+		return valuesRead, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go
new file mode 100644
index 00000000000..8d46c6f5a9b
--- /dev/null
+++ b/go/parquet/internal/encoding/byte_array_encoder.go
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"encoding/binary"
+	"unsafe"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+// PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding
+// by encoding the length as a int32 followed by the bytes of the value.
+type PlainByteArrayEncoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// PutByteArray writes out the 4 bytes for the length followed by the data
+func (enc *PlainByteArrayEncoder) PutByteArray(val parquet.ByteArray) {
+	inc := val.Len() + arrow.Uint32SizeBytes
+	enc.sink.Reserve(inc)
+	vlen := toLEFunc(uint32(val.Len()))
+	enc.sink.UnsafeWrite((*(*[4]byte)(unsafe.Pointer(&vlen)))[:])
+	enc.sink.UnsafeWrite(val)
+}
+
+// Put writes out all of the values in this slice to the encoding sink
+func (enc *PlainByteArrayEncoder) Put(in []parquet.ByteArray) {
+	for _, val := range in {
+		enc.PutByteArray(val)
+	}
+}
+
+// PutSpaced uses the bitmap of validBits to leave out anything that is null according
+// to the bitmap.
+//
+// If validBits is nil, this is equivalent to calling Put
+func (enc *PlainByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		if enc.bitSetReader == nil {
+			enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+		} else {
+			enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+		}
+
+		for {
+			run := enc.bitSetReader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+		}
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Type returns parquet.Types.ByteArray for the bytearray encoder
+func (PlainByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// WriteDict writes the dictionary out to the provided slice, out should be
+// at least DictEncodedSize() bytes
+func (enc *DictByteArrayEncoder) WriteDict(out []byte) {
+	enc.memo.(BinaryMemoTable).VisitValues(0, func(v []byte) {
+		binary.LittleEndian.PutUint32(out, uint32(len(v)))
+		out = out[arrow.Uint32SizeBytes:]
+		copy(out, v)
+		out = out[len(v):]
+	})
+}
+
+// PutByteArray adds a single byte array to buffer, updating the dictionary
+// and encoded size if it's a new value
+func (enc *DictByteArrayEncoder) PutByteArray(in parquet.ByteArray) {
+	if in == nil {
+		in = empty[:]
+	}
+	memoIdx, found, err := enc.memo.GetOrInsert(in)
+	if err != nil {
+		panic(err)
+	}
+	if !found {
+		enc.dictEncodedSize += in.Len() + arrow.Uint32SizeBytes
+	}
+	enc.addIndex(memoIdx)
+}
+
+// Put takes a slice of ByteArrays to add and encode.
+func (enc *DictByteArrayEncoder) Put(in []parquet.ByteArray) {
+	for _, val := range in {
+		enc.PutByteArray(val)
+	}
+}
+
+// PutSpaced like with the non-dict encoder leaves out the values where the validBits bitmap is 0
+func (enc *DictByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.PutByteArray(in[i+pos])
+		}
+		return nil
+	})
+}
diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go
new file mode 100644
index 00000000000..6de61574ec5
--- /dev/null
+++ b/go/parquet/internal/encoding/decoder.go
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"bytes"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/debug"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"github.com/apache/arrow/go/parquet/schema"
+	"golang.org/x/xerrors"
+)
+
+// DecoderTraits provides an interface for more easily interacting with types
+// to generate decoders for specific types.
+type DecoderTraits interface {
+	Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder
+	BytesRequired(int) int
+}
+
+// NewDecoder constructs a decoder for a given type and encoding
+func NewDecoder(t parquet.Type, e parquet.Encoding, descr *schema.Column, mem memory.Allocator) TypedDecoder {
+	traits := getDecodingTraits(t)
+	if traits == nil {
+		return nil
+	}
+
+	return traits.Decoder(e, descr, false /* use dictionary */, mem)
+}
+
+// NewDictDecoder is like NewDecoder but for dictionary encodings, panics if type is bool.
+//
+// if mem is nil, memory.DefaultAllocator will be used
+func NewDictDecoder(t parquet.Type, descr *schema.Column, mem memory.Allocator) DictDecoder {
+	traits := getDecodingTraits(t)
+	if traits == nil {
+		return nil
+	}
+
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+
+	return traits.Decoder(parquet.Encodings.RLEDict, descr, true /* use dictionary */, mem).(DictDecoder)
+}
+
+type decoder struct {
+	descr    *schema.Column
+	encoding format.Encoding
+	nvals    int
+	data     []byte
+	typeLen  int
+}
+
+// newDecoderBase constructs the base decoding object that is embedded in the
+// type specific decoders.
+func newDecoderBase(e format.Encoding, descr *schema.Column) decoder {
+	typeLen := -1
+	if descr != nil && descr.PhysicalType() == parquet.Types.FixedLenByteArray {
+		typeLen = int(descr.TypeLength())
+	}
+
+	return decoder{
+		descr:    descr,
+		encoding: e,
+		typeLen:  typeLen,
+	}
+}
+
+// SetData sets the data for decoding into the decoder to update the available
+// data bytes and number of values available.
+func (d *decoder) SetData(nvals int, data []byte) error {
+	d.data = data
+	d.nvals = nvals
+	return nil
+}
+
+// ValuesLeft returns the number of remaining values that can be decoded
+func (d *decoder) ValuesLeft() int { return d.nvals }
+
+// Encoding returns the encoding type used by this decoder to decode the bytes.
+func (d *decoder) Encoding() parquet.Encoding { return parquet.Encoding(d.encoding) }
+
+type dictDecoder struct {
+	decoder
+	mem              memory.Allocator
+	dictValueDecoder utils.DictionaryConverter
+	idxDecoder       *utils.RleDecoder
+}
+
+// SetDict sets a decoder that can be used to decode the dictionary that is
+// used for this column in order to return the proper values.
+func (d *dictDecoder) SetDict(dict TypedDecoder) {
+	if dict.Type() != d.descr.PhysicalType() {
+		panic("parquet: mismatch dictionary and column data type")
+	}
+
+	d.dictValueDecoder = NewDictConverter(dict)
+}
+
+// SetData sets the index value data into the decoder.
+func (d *dictDecoder) SetData(nvals int, data []byte) error {
+	d.nvals = nvals
+	if len(data) == 0 {
+		// no data, bitwidth can safely be 0
+		d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data), 0 /* bitwidth */)
+		return nil
+	}
+
+	// grab the bit width from the first byte
+	width := uint8(data[0])
+	if width >= 64 {
+		return xerrors.New("parquet: invalid or corrupted bit width")
+	}
+
+	// pass the rest of the data, minus that first byte, to the decoder
+	d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data[1:]), int(width))
+	return nil
+}
+
+func (d *dictDecoder) decode(out interface{}) (int, error) {
+	return d.idxDecoder.GetBatchWithDict(d.dictValueDecoder, out)
+}
+
+func (d *dictDecoder) decodeSpaced(out interface{}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	return d.idxDecoder.GetBatchWithDictSpaced(d.dictValueDecoder, out, nullCount, validBits, validBitsOffset)
+}
+
+var empty = [1]byte{0}
+
+// spacedExpand is used to take a slice of data and utilize the bitmap provided to fill in nulls into the
+// correct slots according to the bitmap in order to produce a fully expanded result slice with nulls
+// in the correct slots.
+func spacedExpand(buffer interface{}, nullCount int, validBits []byte, validBitsOffset int64) int {
+	bufferRef := reflect.ValueOf(buffer)
+	if bufferRef.Kind() != reflect.Slice {
+		panic("invalid spacedexpand type, not slice")
+	}
+
+	var (
+		numValues int = bufferRef.Len()
+	)
+
+	idxDecode := int64(numValues - nullCount)
+	if idxDecode == 0 { // if there's nothing to decode there's nothing to do.
+		return numValues
+	}
+
+	// read the bitmap in reverse grabbing runs of valid bits where possible.
+	rdr := utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(numValues))
+	for {
+		run := rdr.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		// copy data from the end of the slice to it's proper location in the slice after accounting for the nulls
+		// because we technically don't care what is in the null slots we don't actually have to clean
+		// up after ourselves because we're doing this in reverse to guarantee that we'll always simply
+		// overwrite any existing data with the correctly spaced data. Any data that happens to be left in the null
+		// slots is fine since it shouldn't matter and saves us work.
+		idxDecode -= run.Length
+		n := reflect.Copy(bufferRef.Slice(int(run.Pos), bufferRef.Len()), bufferRef.Slice(int(idxDecode), int(int64(idxDecode)+run.Length)))
+		debug.Assert(n == int(run.Length), "reflect.Copy copied incorrect number of elements in spacedExpand")
+	}
+
+	return numValues
+}
diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go
new file mode 100644
index 00000000000..babd0b1fa97
--- /dev/null
+++ b/go/parquet/internal/encoding/delta_bit_packing.go
@@ -0,0 +1,520 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"bytes"
+	"math"
+	"math/bits"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// see the deltaBitPack encoder for a description of the encoding format that is
+// used for delta-bitpacking.
+type deltaBitPackDecoder struct {
+	decoder
+
+	mem memory.Allocator
+
+	usedFirst            bool
+	bitdecoder           *utils.BitReader
+	blockSize            uint64
+	currentBlockVals     uint32
+	miniBlocks           uint64
+	valsPerMini          uint32
+	currentMiniBlockVals uint32
+	minDelta             int64
+	miniBlockIdx         uint64
+
+	deltaBitWidths *memory.Buffer
+	deltaBitWidth  byte
+
+	lastVal int64
+}
+
+// returns the number of bytes read so far
+func (d *deltaBitPackDecoder) bytesRead() int64 {
+	return d.bitdecoder.CurOffset()
+}
+
+func (d *deltaBitPackDecoder) Allocator() memory.Allocator { return d.mem }
+
+// SetData sets the bytes and the expected number of values to decode
+// into the decoder, updating the decoder and allowing it to be reused.
+func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error {
+	// set our data into the underlying decoder for the type
+	if err := d.decoder.SetData(nvalues, data); err != nil {
+		return err
+	}
+	// create a bit reader for our decoder's values
+	d.bitdecoder = utils.NewBitReader(bytes.NewReader(d.data))
+	d.currentBlockVals = 0
+	d.currentMiniBlockVals = 0
+	if d.deltaBitWidths == nil {
+		d.deltaBitWidths = memory.NewResizableBuffer(d.mem)
+	}
+
+	var ok bool
+	d.blockSize, ok = d.bitdecoder.GetVlqInt()
+	if !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	if d.miniBlocks, ok = d.bitdecoder.GetVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	var totalValues uint64
+	if totalValues, ok = d.bitdecoder.GetVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	if int(totalValues) != d.nvals {
+		return xerrors.New("parquet: mismatch between number of values and count in data header")
+	}
+
+	if d.lastVal, ok = d.bitdecoder.GetZigZagVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	if d.miniBlocks != 0 {
+		d.valsPerMini = uint32(d.blockSize / d.miniBlocks)
+	}
+	return nil
+}
+
+// initialize a block to decode
+func (d *deltaBitPackDecoder) initBlock() error {
+	// first we grab the min delta value that we'll start from
+	var ok bool
+	if d.minDelta, ok = d.bitdecoder.GetZigZagVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	// ensure we have enough space for our miniblocks to decode the widths
+	d.deltaBitWidths.Resize(int(d.miniBlocks))
+
+	var err error
+	for i := uint64(0); i < d.miniBlocks; i++ {
+		if d.deltaBitWidths.Bytes()[i], err = d.bitdecoder.ReadByte(); err != nil {
+			return err
+		}
+	}
+
+	d.miniBlockIdx = 0
+	d.deltaBitWidth = d.deltaBitWidths.Bytes()[0]
+	d.currentBlockVals = uint32(d.blockSize)
+	return nil
+}
+
+// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm.
+type DeltaBitPackInt32Decoder struct {
+	*deltaBitPackDecoder
+
+	miniBlockValues []int32
+}
+
+func (d *DeltaBitPackInt32Decoder) unpackNextMini() error {
+	if d.miniBlockValues == nil {
+		d.miniBlockValues = make([]int32, 0, int(d.valsPerMini))
+	} else {
+		d.miniBlockValues = d.miniBlockValues[:0]
+	}
+	d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)]
+	d.currentMiniBlockVals = d.valsPerMini
+
+	for j := 0; j < int(d.valsPerMini); j++ {
+		delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth))
+		if !ok {
+			return xerrors.New("parquet: eof exception")
+		}
+
+		d.lastVal += int64(delta) + int64(d.minDelta)
+		d.miniBlockValues = append(d.miniBlockValues, int32(d.lastVal))
+	}
+	d.miniBlockIdx++
+	return nil
+}
+
+// Decode retrieves min(remaining values, len(out)) values from the data and returns the number
+// of values actually decoded and any errors encountered.
+func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	if max == 0 {
+		return 0, nil
+	}
+
+	out = out[:max]
+	if !d.usedFirst { // starting value to calculate deltas against
+		out[0] = int32(d.lastVal)
+		out = out[1:]
+		d.usedFirst = true
+	}
+
+	var err error
+	for len(out) > 0 { // unpack mini blocks until we get all the values we need
+		if d.currentBlockVals == 0 {
+			err = d.initBlock()
+		}
+		if d.currentMiniBlockVals == 0 {
+			err = d.unpackNextMini()
+		}
+		if err != nil {
+			return 0, err
+		}
+
+		// copy as many values from our mini block as we can into out
+		start := int(d.valsPerMini - d.currentMiniBlockVals)
+		end := utils.MinInt(int(d.valsPerMini), len(out))
+		copy(out, d.miniBlockValues[start:end])
+
+		numCopied := end - start
+		out = out[numCopied:]
+		d.currentBlockVals -= uint32(numCopied)
+		d.currentMiniBlockVals -= uint32(numCopied)
+	}
+	return max, nil
+}
+
+// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap
+func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := d.Decode(out[:toread])
+	if err != nil {
+		return values, err
+	}
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
+
+// Type returns the physical parquet type that this decoder decodes, in this case Int32
+func (DeltaBitPackInt32Decoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// DeltaBitPackInt64Decoder decodes a delta bit packed int64 column of data.
+type DeltaBitPackInt64Decoder struct {
+	*deltaBitPackDecoder
+
+	miniBlockValues []int64
+}
+
+func (d *DeltaBitPackInt64Decoder) unpackNextMini() error {
+	if d.miniBlockValues == nil {
+		d.miniBlockValues = make([]int64, 0, int(d.valsPerMini))
+	} else {
+		d.miniBlockValues = d.miniBlockValues[:0]
+	}
+
+	d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)]
+	d.currentMiniBlockVals = d.valsPerMini
+
+	for j := 0; j < int(d.valsPerMini); j++ {
+		delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth))
+		if !ok {
+			return xerrors.New("parquet: eof exception")
+		}
+
+		d.lastVal += int64(delta) + int64(d.minDelta)
+		d.miniBlockValues = append(d.miniBlockValues, d.lastVal)
+	}
+	d.miniBlockIdx++
+	return nil
+}
+
+// Decode retrieves min(remaining values, len(out)) values from the data and returns the number
+// of values actually decoded and any errors encountered.
+func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	if max == 0 {
+		return 0, nil
+	}
+
+	out = out[:max]
+	if !d.usedFirst {
+		out[0] = d.lastVal
+		out = out[1:]
+		d.usedFirst = true
+	}
+
+	var err error
+	for len(out) > 0 {
+		if d.currentBlockVals == 0 {
+			err = d.initBlock()
+		}
+		if d.currentMiniBlockVals == 0 {
+			err = d.unpackNextMini()
+		}
+
+		if err != nil {
+			return 0, err
+		}
+
+		start := int(d.valsPerMini - d.currentMiniBlockVals)
+		end := utils.MinInt(int(d.valsPerMini), len(out))
+		copy(out, d.miniBlockValues[start:end])
+
+		numCopied := end - start
+		out = out[numCopied:]
+		d.currentBlockVals -= uint32(numCopied)
+		d.currentMiniBlockVals -= uint32(numCopied)
+	}
+	return max, nil
+}
+
+// Type returns the physical parquet type that this decoder decodes, in this case Int64
+func (DeltaBitPackInt64Decoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap
+func (d DeltaBitPackInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := d.Decode(out[:toread])
+	if err != nil {
+		return values, err
+	}
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
+
+const (
+	// block size must be a multiple of 128
+	defaultBlockSize     = 128
+	defaultNumMiniBlocks = 4
+	// block size / number of mini blocks must result in a multiple of 32
+	defaultNumValuesPerMini = 32
+	// max size of the header for the delta blocks
+	maxHeaderWriterSize = 32
+)
+
+// deltaBitPackEncoder is an encoder for the DeltaBinary Packing format
+// as per the parquet spec.
+//
+// Consists of a header followed by blocks of delta encoded values binary packed.
+//
+//	Format
+// 		[header] [block 1] [block 2] ... [block N]
+//
+//	Header
+//		[block size] [number of mini blocks per block] [total value count] [first value]
+//
+//	Block
+//		[min delta] [list of bitwidths of the miniblocks] [miniblocks...]
+//
+// Sets aside bytes at the start of the internal buffer where the header will be written,
+// and only writes the header when FlushValues is called before returning it.
+type deltaBitPackEncoder struct {
+	encoder
+
+	bitWriter  *utils.BitWriter
+	totalVals  uint64
+	firstVal   int64
+	currentVal int64
+
+	blockSize     uint64
+	miniBlockSize uint64
+	numMiniBlocks uint64
+	deltas        []int64
+}
+
+// flushBlock flushes out a finished block for writing to the underlying encoder
+func (enc *deltaBitPackEncoder) flushBlock() {
+	if len(enc.deltas) == 0 {
+		return
+	}
+
+	// determine the minimum delta value
+	minDelta := int64(math.MaxInt64)
+	for _, delta := range enc.deltas {
+		if delta < minDelta {
+			minDelta = delta
+		}
+	}
+
+	enc.bitWriter.WriteZigZagVlqInt(minDelta)
+	// reserve enough bytes to write out our miniblock deltas
+	offset := enc.bitWriter.ReserveBytes(int(enc.numMiniBlocks))
+
+	valuesToWrite := int64(len(enc.deltas))
+	for i := 0; i < int(enc.numMiniBlocks); i++ {
+		n := utils.Min(int64(enc.miniBlockSize), valuesToWrite)
+		if n == 0 {
+			break
+		}
+
+		maxDelta := int64(math.MinInt64)
+		start := i * int(enc.miniBlockSize)
+		for _, val := range enc.deltas[start : start+int(n)] {
+			maxDelta = utils.Max(maxDelta, val)
+		}
+
+		// compute bit width to store (max_delta - min_delta)
+		width := uint(bits.Len64(uint64(maxDelta - minDelta)))
+		// write out the bit width we used into the bytes we reserved earlier
+		enc.bitWriter.WriteAt([]byte{byte(width)}, int64(offset+i))
+
+		// write out our deltas
+		for _, val := range enc.deltas[start : start+int(n)] {
+			enc.bitWriter.WriteValue(uint64(val-minDelta), width)
+		}
+
+		valuesToWrite -= n
+
+		// pad the last block if n < miniBlockSize
+		for ; n < int64(enc.miniBlockSize); n++ {
+			enc.bitWriter.WriteValue(0, width)
+		}
+	}
+	enc.deltas = enc.deltas[:0]
+}
+
+// putInternal is the implementation for actually writing data which must be
+// integral data as int, int8, int32, or int64.
+func (enc *deltaBitPackEncoder) putInternal(data interface{}) {
+	v := reflect.ValueOf(data)
+	if v.Len() == 0 {
+		return
+	}
+
+	idx := 0
+	if enc.totalVals == 0 {
+		enc.blockSize = defaultBlockSize
+		enc.numMiniBlocks = defaultNumMiniBlocks
+		enc.miniBlockSize = defaultNumValuesPerMini
+
+		enc.firstVal = v.Index(0).Int()
+		enc.currentVal = enc.firstVal
+		idx = 1
+
+		enc.bitWriter = utils.NewBitWriter(enc.sink)
+	}
+
+	enc.totalVals += uint64(v.Len())
+	for ; idx < v.Len(); idx++ {
+		val := v.Index(idx).Int()
+		enc.deltas = append(enc.deltas, val-enc.currentVal)
+		enc.currentVal = val
+		if len(enc.deltas) == int(enc.blockSize) {
+			enc.flushBlock()
+		}
+	}
+}
+
+// FlushValues flushes any remaining data and returns the finished encoded buffer
+func (enc *deltaBitPackEncoder) FlushValues() Buffer {
+	if enc.bitWriter != nil {
+		// write any remaining values
+		enc.flushBlock()
+		enc.bitWriter.Flush(true)
+	} else {
+		enc.blockSize = defaultBlockSize
+		enc.numMiniBlocks = defaultNumMiniBlocks
+		enc.miniBlockSize = defaultNumValuesPerMini
+	}
+
+	buffer := make([]byte, maxHeaderWriterSize)
+	headerWriter := utils.NewBitWriter(utils.NewWriterAtBuffer(buffer))
+
+	headerWriter.WriteVlqInt(uint64(enc.blockSize))
+	headerWriter.WriteVlqInt(uint64(enc.numMiniBlocks))
+	headerWriter.WriteVlqInt(uint64(enc.totalVals))
+	headerWriter.WriteZigZagVlqInt(int64(enc.firstVal))
+	headerWriter.Flush(false)
+
+	buffer = buffer[:headerWriter.Written()]
+	enc.totalVals = 0
+
+	if enc.bitWriter != nil {
+		flushed := enc.sink.Finish()
+		defer flushed.Release()
+
+		buffer = append(buffer, flushed.Buf()[:enc.bitWriter.Written()]...)
+	}
+	return poolBuffer{memory.NewBufferBytes(buffer)}
+}
+
+// EstimatedDataEncodedSize returns the current amount of data actually flushed out and written
+func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 {
+	return int64(enc.bitWriter.Written())
+}
+
+// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data.
+type DeltaBitPackInt32Encoder struct {
+	*deltaBitPackEncoder
+}
+
+// Put writes the values from the provided slice of int32 to the encoder
+func (enc DeltaBitPackInt32Encoder) Put(in []int32) {
+	enc.putInternal(in)
+}
+
+// PutSpaced takes a slice of int32 along with a bitmap that describes the nulls and an offset into the bitmap
+// in order to write spaced data to the encoder.
+func (enc DeltaBitPackInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) {
+	buffer := memory.NewResizableBuffer(enc.mem)
+	buffer.Reserve(arrow.Int32Traits.BytesRequired(len(in)))
+	defer buffer.Release()
+
+	data := arrow.Int32Traits.CastFromBytes(buffer.Buf())
+	nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+	enc.Put(data[:nvalid])
+}
+
+// Type returns the underlying physical type this encoder works with, in this case Int32
+func (DeltaBitPackInt32Encoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data.
+type DeltaBitPackInt64Encoder struct {
+	*deltaBitPackEncoder
+}
+
+// Put writes the values from the provided slice of int64 to the encoder
+func (enc DeltaBitPackInt64Encoder) Put(in []int64) {
+	enc.putInternal(in)
+}
+
+// PutSpaced takes a slice of int64 along with a bitmap that describes the nulls and an offset into the bitmap
+// in order to write spaced data to the encoder.
+func (enc DeltaBitPackInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) {
+	buffer := memory.NewResizableBuffer(enc.mem)
+	buffer.Reserve(arrow.Int64Traits.BytesRequired(len(in)))
+	defer buffer.Release()
+
+	data := arrow.Int64Traits.CastFromBytes(buffer.Buf())
+	nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+	enc.Put(data[:nvalid])
+}
+
+// Type returns the underlying physical type this encoder works with, in this case Int64
+func (DeltaBitPackInt64Encoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go
new file mode 100644
index 00000000000..d11413ea236
--- /dev/null
+++ b/go/parquet/internal/encoding/delta_byte_array.go
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// DeltaByteArrayEncoder is an encoder for writing bytearrays which are delta encoded
+// this is also known as incremental encoding or front compression. For each element
+// in a sequence of strings, we store the prefix length of the previous entry plus the suffix
+// see https://en.wikipedia.org/wiki/Incremental_encoding for a longer description.
+//
+// This is stored as a sequence of delta-encoded prefix lengths followed by the suffixes
+// encoded as delta length byte arrays.
+type DeltaByteArrayEncoder struct {
+	encoder
+
+	prefixEncoder *DeltaBitPackInt32Encoder
+	suffixEncoder *DeltaLengthByteArrayEncoder
+
+	lastVal parquet.ByteArray
+}
+
+func (enc *DeltaByteArrayEncoder) initEncoders() {
+	enc.prefixEncoder = &DeltaBitPackInt32Encoder{
+		deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}
+	enc.suffixEncoder = &DeltaLengthByteArrayEncoder{
+		newEncoderBase(enc.encoding, nil, enc.mem),
+		&DeltaBitPackInt32Encoder{
+			deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}}
+}
+
+// Type returns the underlying physical type this operates on, in this case ByteArrays only
+func (DeltaByteArrayEncoder) Type() parquet.Type { return parquet.Types.ByteArray }
+
+// Put writes a slice of ByteArrays to the encoder
+func (enc *DeltaByteArrayEncoder) Put(in []parquet.ByteArray) {
+	if len(in) == 0 {
+		return
+	}
+
+	var suf parquet.ByteArray
+	if enc.prefixEncoder == nil { // initialize our encoders if we haven't yet
+		enc.initEncoders()
+		enc.prefixEncoder.Put([]int32{0})
+		suf = in[0]
+		enc.lastVal = in[0]
+		enc.suffixEncoder.Put([]parquet.ByteArray{suf})
+		in = in[1:]
+	}
+
+	// for each value, figure out the common prefix with the previous value
+	// and then write the prefix length and the suffix.
+	for _, val := range in {
+		l1 := enc.lastVal.Len()
+		l2 := val.Len()
+		j := 0
+		for j < l1 && j < l2 {
+			if enc.lastVal[j] != val[j] {
+				break
+			}
+			j++
+		}
+		enc.prefixEncoder.Put([]int32{int32(j)})
+		suf = val[j:]
+		enc.suffixEncoder.Put([]parquet.ByteArray{suf})
+		enc.lastVal = val
+	}
+
+	// do the memcpy after the loops to keep a copy of the lastVal
+	// we do a copy here so that we only copy and keep a reference
+	// to the suffix, and aren't forcing the *entire* value to stay
+	// in memory while we have this reference to just the suffix.
+	enc.lastVal = append([]byte{}, enc.lastVal...)
+}
+
+// PutSpaced is like Put, but assumes the data is already spaced for nulls and uses the bitmap provided and offset
+// to compress the data before writing it without the null slots.
+func (enc *DeltaByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		data := make([]parquet.ByteArray, len(in))
+		nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+		enc.Put(data[:nvalid])
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Flush flushes any remaining data out and returns the finished encoded buffer.
+func (enc *DeltaByteArrayEncoder) FlushValues() Buffer {
+	if enc.prefixEncoder == nil {
+		enc.initEncoders()
+	}
+	prefixBuf := enc.prefixEncoder.FlushValues()
+	defer prefixBuf.Release()
+
+	suffixBuf := enc.suffixEncoder.FlushValues()
+	defer suffixBuf.Release()
+
+	ret := bufferPool.Get().(*memory.Buffer)
+	ret.ResizeNoShrink(prefixBuf.Len() + suffixBuf.Len())
+	copy(ret.Bytes(), prefixBuf.Bytes())
+	copy(ret.Bytes()[prefixBuf.Len():], suffixBuf.Bytes())
+	return poolBuffer{ret}
+}
+
+// DeltaByteArrayDecoder is a decoder for a column of data encoded using incremental or prefix encoding.
+type DeltaByteArrayDecoder struct {
+	*DeltaLengthByteArrayDecoder
+
+	prefixLengths []int32
+	lastVal       parquet.ByteArray
+}
+
+// Type returns the underlying physical type this decoder operates on, in this case ByteArrays only
+func (DeltaByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+func (d *DeltaByteArrayDecoder) Allocator() memory.Allocator { return d.mem }
+
+// SetData expects the data passed in to be the prefix lengths, followed by the
+// blocks of suffix data in order to initialize the decoder.
+func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error {
+	prefixLenDec := DeltaBitPackInt32Decoder{
+		deltaBitPackDecoder: &deltaBitPackDecoder{
+			decoder: newDecoderBase(d.encoding, d.descr),
+			mem:     d.mem}}
+
+	if err := prefixLenDec.SetData(nvalues, data); err != nil {
+		return err
+	}
+
+	d.prefixLengths = make([]int32, nvalues)
+	// decode all the prefix lengths first so we know how many bytes it took to get the
+	// prefix lengths for nvalues
+	prefixLenDec.Decode(d.prefixLengths)
+
+	// now that we know how many bytes we needed for the prefix lengths, the rest are the
+	// delta length byte array encoding.
+	return d.DeltaLengthByteArrayDecoder.SetData(nvalues, data[int(prefixLenDec.bytesRead()):])
+}
+
+// Decode decodes byte arrays into the slice provided and returns the number of values actually decoded
+func (d *DeltaByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	if max == 0 {
+		return 0, nil
+	}
+	out = out[:max]
+
+	var err error
+	if d.lastVal == nil {
+		_, err = d.DeltaLengthByteArrayDecoder.Decode(out[:1])
+		if err != nil {
+			return 0, err
+		}
+		d.lastVal = out[0]
+		out = out[1:]
+		d.prefixLengths = d.prefixLengths[1:]
+	}
+
+	var prefixLen int32
+	suffixHolder := make([]parquet.ByteArray, 1)
+	for len(out) > 0 {
+		prefixLen, d.prefixLengths = d.prefixLengths[0], d.prefixLengths[1:]
+
+		prefix := d.lastVal[:prefixLen:prefixLen]
+		_, err = d.DeltaLengthByteArrayDecoder.Decode(suffixHolder)
+		if err != nil {
+			return 0, err
+		}
+
+		if len(suffixHolder[0]) == 0 {
+			d.lastVal = prefix
+		} else {
+			d.lastVal = make([]byte, int(prefixLen)+len(suffixHolder[0]))
+			copy(d.lastVal, prefix)
+			copy(d.lastVal[prefixLen:], suffixHolder[0])
+		}
+		out[0], out = d.lastVal, out[1:]
+	}
+	return max, nil
+}
+
+// DecodeSpaced is like decode, but the result is spaced out based on the bitmap provided.
+func (d *DeltaByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := d.Decode(out[:toread])
+	if err != nil {
+		return values, err
+	}
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go
new file mode 100644
index 00000000000..3563ccec461
--- /dev/null
+++ b/go/parquet/internal/encoding/delta_length_byte_array.go
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// DeltaLengthByteArrayEncoder encodes data using by taking all of the byte array lengths
+// and encoding them in front using delta encoding, followed by all of the binary data
+// concatenated back to back. The expected savings is from the cost of encoding the lengths
+// and possibly better compression in the data which will no longer be interleaved with the lengths.
+//
+// This encoding is always preferred over PLAIN for byte array columns where possible.
+//
+// For example, if the data was "Hello", "World", "Foobar", "ABCDEF" the encoded data would be:
+// DeltaEncoding(5, 5, 6, 6) "HelloWorldFoobarABCDEF"
+type DeltaLengthByteArrayEncoder struct {
+	encoder
+
+	lengthEncoder *DeltaBitPackInt32Encoder
+}
+
+// Put writes the provided slice of byte arrays to the encoder
+func (enc *DeltaLengthByteArrayEncoder) Put(in []parquet.ByteArray) {
+	lengths := make([]int32, len(in))
+	totalLen := int(0)
+	for idx, val := range in {
+		lengths[idx] = int32(val.Len())
+		totalLen += val.Len()
+	}
+
+	enc.lengthEncoder.Put(lengths)
+	enc.sink.Reserve(totalLen)
+	for _, val := range in {
+		enc.sink.UnsafeWrite(val)
+	}
+}
+
+// PutSpaced is like Put, but the data is spaced out according to the bitmap provided and is compressed
+// accordingly before it is written to drop the null data from the write.
+func (enc *DeltaLengthByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		data := make([]parquet.ByteArray, len(in))
+		nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+		enc.Put(data[:nvalid])
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Type returns the underlying type which is handled by this encoder, ByteArrays only.
+func (DeltaLengthByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// FlushValues flushes any remaining data and returns the final encoded buffer of data.
+func (enc *DeltaLengthByteArrayEncoder) FlushValues() Buffer {
+	ret := enc.lengthEncoder.FlushValues()
+	defer ret.Release()
+
+	data := enc.sink.Finish()
+	defer data.Release()
+
+	output := bufferPool.Get().(*memory.Buffer)
+	output.ResizeNoShrink(ret.Len() + data.Len())
+	copy(output.Bytes(), ret.Bytes())
+	copy(output.Bytes()[ret.Len():], data.Bytes())
+	return poolBuffer{output}
+}
+
+// DeltaLengthByteArrayDecoder is a decoder for handling data produced by the corresponding
+// encoder which expects delta packed lengths followed by the bytes of data.
+type DeltaLengthByteArrayDecoder struct {
+	decoder
+
+	mem     memory.Allocator
+	lengths []int32
+}
+
+// Type returns the underlying type which is handled by this encoder, ByteArrays only.
+func (DeltaLengthByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+func (d *DeltaLengthByteArrayDecoder) Allocator() memory.Allocator { return d.mem }
+
+// SetData sets in the expected data to the decoder which should be nvalues delta packed lengths
+// followed by the rest of the byte array data immediately after.
+func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error {
+	dec := DeltaBitPackInt32Decoder{
+		deltaBitPackDecoder: &deltaBitPackDecoder{
+			decoder: newDecoderBase(d.encoding, d.descr),
+			mem:     d.mem}}
+
+	if err := dec.SetData(nvalues, data); err != nil {
+		return err
+	}
+	d.lengths = make([]int32, nvalues)
+	dec.Decode(d.lengths)
+
+	return d.decoder.SetData(nvalues, data[int(dec.bytesRead()):])
+}
+
+// Decode populates the passed in slice with data decoded until it hits the length of out
+// or runs out of values in the column to decode, then returns the number of values actually decoded.
+func (d *DeltaLengthByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	for i := 0; i < max; i++ {
+		out[i] = d.data[:d.lengths[i]:d.lengths[i]]
+		d.data = d.data[d.lengths[i]:]
+	}
+	d.nvals -= max
+	d.lengths = d.lengths[max:]
+	return max, nil
+}
+
+// DecodeSpaced is like Decode, but for spaced data using the provided bitmap to determine where the nulls should be inserted.
+func (d *DeltaLengthByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, _ := d.Decode(out[:toread])
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go
new file mode 100644
index 00000000000..49072c8e151
--- /dev/null
+++ b/go/parquet/internal/encoding/encoder.go
@@ -0,0 +1,311 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"math/bits"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/arrow/endian"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"github.com/apache/arrow/go/parquet/schema"
+)
+
+var toLEFunc func(uint32) uint32
+
+func init() {
+	if endian.IsBigEndian {
+		toLEFunc = bits.ReverseBytes32
+	} else {
+		toLEFunc = func(in uint32) uint32 { return in }
+	}
+}
+
+//go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl
+
+// EncoderTraits is an interface for the different types to make it more
+// convenient to construct encoders for specific types.
+type EncoderTraits interface {
+	Encoder(format.Encoding, bool, *schema.Column, memory.Allocator) TypedEncoder
+}
+
+// NewEncoder will return the appropriately typed encoder for the requested physical type
+// and encoding.
+//
+// If mem is nil, memory.DefaultAllocator will be used.
+func NewEncoder(t parquet.Type, e parquet.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+	traits := getEncodingTraits(t)
+	if traits == nil {
+		return nil
+	}
+
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+	return traits.Encoder(format.Encoding(e), useDict, descr, mem)
+}
+
+type encoder struct {
+	descr    *schema.Column
+	encoding format.Encoding
+	typeLen  int
+	mem      memory.Allocator
+
+	sink *PooledBufferWriter
+}
+
+// newEncoderBase constructs a new base encoder for embedding on the typed encoders
+// encapsulating the common functionality.
+func newEncoderBase(e format.Encoding, descr *schema.Column, mem memory.Allocator) encoder {
+	typelen := -1
+	if descr != nil && descr.PhysicalType() == parquet.Types.FixedLenByteArray {
+		typelen = int(descr.TypeLength())
+	}
+	return encoder{
+		descr:    descr,
+		encoding: e,
+		mem:      mem,
+		typeLen:  typelen,
+		sink:     NewPooledBufferWriter(1024),
+	}
+}
+
+// ReserveForWrite allocates n bytes so that the next n bytes written do not require new allocations.
+func (e *encoder) ReserveForWrite(n int)           { e.sink.Reserve(n) }
+func (e *encoder) EstimatedDataEncodedSize() int64 { return int64(e.sink.Len()) }
+func (e *encoder) Encoding() parquet.Encoding      { return parquet.Encoding(e.encoding) }
+func (e *encoder) Allocator() memory.Allocator     { return e.mem }
+func (e *encoder) append(data []byte)              { e.sink.Write(data) }
+
+// FlushValues flushes any unwritten data to the buffer and returns the finished encoded buffer of data.
+// This also clears the encoder, ownership of the data belongs to whomever called FlushValues, Release
+// should be called on the resulting Buffer when done.
+func (e *encoder) FlushValues() Buffer { return e.sink.Finish() }
+
+// Bytes returns the current bytes that have been written to the encoder's buffer but doesn't transfer ownership.
+func (e *encoder) Bytes() []byte { return e.sink.Bytes() }
+
+// Reset drops the data currently in the encoder and resets for new use.
+func (e *encoder) Reset() { e.sink.Reset(0) }
+
+type dictEncoder struct {
+	encoder
+
+	dictEncodedSize int
+	idxBuffer       *memory.Buffer
+	idxValues       []int32
+	memo            MemoTable
+}
+
+// newDictEncoderBase constructs and returns a dictionary encoder for the appropriate type using the passed
+// in memo table for constructing the index.
+func newDictEncoderBase(descr *schema.Column, memo MemoTable, mem memory.Allocator) dictEncoder {
+	return dictEncoder{
+		encoder:   newEncoderBase(format.Encoding_PLAIN_DICTIONARY, descr, mem),
+		idxBuffer: memory.NewResizableBuffer(mem),
+		memo:      memo,
+	}
+}
+
+// Reset drops all the currently encoded values from the index and indexes from the data to allow
+// restarting the encoding process.
+func (d *dictEncoder) Reset() {
+	d.encoder.Reset()
+	d.dictEncodedSize = 0
+	d.idxValues = d.idxValues[:0]
+	d.idxBuffer.ResizeNoShrink(0)
+	d.memo.Reset()
+}
+
+// append the passed index to the indexbuffer
+func (d *dictEncoder) addIndex(idx int) {
+	if len(d.idxValues) == cap(d.idxValues) {
+		curLen := len(d.idxValues)
+		d.idxBuffer.ResizeNoShrink(arrow.Int32Traits.BytesRequired(bitutil.NextPowerOf2(curLen + 1)))
+		d.idxValues = arrow.Int32Traits.CastFromBytes(d.idxBuffer.Buf())[: curLen : d.idxBuffer.Len()/arrow.Int32SizeBytes]
+	}
+	d.idxValues = append(d.idxValues, int32(idx))
+}
+
+// FlushValues dumps all the currently buffered indexes that would become the data page to a buffer and
+// returns it.
+func (d *dictEncoder) FlushValues() Buffer {
+	buf := bufferPool.Get().(*memory.Buffer)
+	buf.Reserve(int(d.EstimatedDataEncodedSize()))
+	size := d.WriteIndices(buf.Buf())
+	buf.ResizeNoShrink(size)
+	return poolBuffer{buf}
+}
+
+// EstimatedDataEncodedSize returns the maximum number of bytes needed to store the RLE encoded indexes, not including the
+// dictionary index in the computation.
+func (d *dictEncoder) EstimatedDataEncodedSize() int64 {
+	return 1 + int64(utils.MaxBufferSize(d.BitWidth(), len(d.idxValues))+utils.MinBufferSize(d.BitWidth()))
+}
+
+// NumEntries returns the number of entires in the dictionary index for this encoder.
+func (d *dictEncoder) NumEntries() int {
+	return d.memo.Size()
+}
+
+// BitWidth returns the max bitwidth that would be necessary for encoding the index values currently
+// in the dictionary based on the size of the dictionary index.
+func (d *dictEncoder) BitWidth() int {
+	switch d.NumEntries() {
+	case 0:
+		return 0
+	case 1:
+		return 1
+	default:
+		return bits.Len32(uint32(d.NumEntries() - 1))
+	}
+}
+
+// WriteDict writes the dictionary index to the given byte slice.
+func (d *dictEncoder) WriteDict(out []byte) {
+	d.memo.CopyValues(out)
+}
+
+// WriteIndices performs Run Length encoding on the indexes and the writes the encoded
+// index value data to the provided byte slice, returning the number of bytes actually written.
+func (d *dictEncoder) WriteIndices(out []byte) int {
+	out[0] = byte(d.BitWidth())
+
+	enc := utils.NewRleEncoder(utils.NewWriterAtBuffer(out[1:]), d.BitWidth())
+	for _, idx := range d.idxValues {
+		if !enc.Put(uint64(idx)) {
+			return -1
+		}
+	}
+	nbytes := enc.Flush()
+
+	d.idxValues = d.idxValues[:0]
+	return nbytes + 1
+}
+
+// Put adds a value to the dictionary data column, inserting the value if it
+// didn't already exist in the dictionary.
+func (d *dictEncoder) Put(v interface{}) {
+	memoIdx, found, err := d.memo.GetOrInsert(v)
+	if err != nil {
+		panic(err)
+	}
+	if !found {
+		d.dictEncodedSize += int(reflect.TypeOf(v).Size())
+	}
+	d.addIndex(memoIdx)
+}
+
+// DictEncodedSize returns the current size of the encoded dictionary
+func (d *dictEncoder) DictEncodedSize() int {
+	return d.dictEncodedSize
+}
+
+// spacedCompress is a helper function for encoders to remove the slots in the slices passed in according
+// to the bitmap which are null into an output slice that is no longer spaced out with slots for nulls.
+func spacedCompress(src, out interface{}, validBits []byte, validBitsOffset int64) int {
+	nvalid := 0
+
+	// for efficiency we use a type switch because the copy runs significantly faster when typed
+	// than calling reflect.Copy
+	switch s := src.(type) {
+	case []int32:
+		o := out.([]int32)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []int64:
+		o := out.([]int64)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []float32:
+		o := out.([]float32)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []float64:
+		o := out.([]float64)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []parquet.ByteArray:
+		o := out.([]parquet.ByteArray)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []parquet.FixedLenByteArray:
+		o := out.([]parquet.FixedLenByteArray)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []bool:
+		o := out.([]bool)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	}
+
+	return nvalid
+}
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
new file mode 100644
index 00000000000..a23489290c8
--- /dev/null
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"math"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// PlainFixedLenByteArrayDecoder is a plain encoding decoder for Fixed Length Byte Arrays
+type PlainFixedLenByteArrayDecoder struct {
+	decoder
+}
+
+// Type returns the physical type this decoder operates on, FixedLength Byte Arrays
+func (PlainFixedLenByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// Decode populates out with fixed length byte array values until either there are no more
+// values to decode or the length of out has been filled. Then returns the total number of values
+// that were decoded.
+func (pflba *PlainFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) {
+	max := utils.MinInt(len(out), pflba.nvals)
+	numBytesNeeded := max * pflba.typeLen
+	if numBytesNeeded > len(pflba.data) || numBytesNeeded > math.MaxInt32 {
+		return 0, xerrors.New("parquet: eof exception")
+	}
+
+	for idx := range out[:max] {
+		out[idx] = pflba.data[:pflba.typeLen]
+		pflba.data = pflba.data[pflba.typeLen:]
+	}
+	return max, nil
+}
+
+// DecodeSpaced does the same as Decode but spaces out the resulting slice according to the bitmap leaving space for null values
+func (pflba *PlainFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toRead := len(out) - nullCount
+	valuesRead, err := pflba.Decode(out[:toRead])
+	if err != nil {
+		return valuesRead, err
+	}
+	if valuesRead != toRead {
+		return valuesRead, xerrors.New("parquet: number of values / definitions levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
new file mode 100644
index 00000000000..7eda0d38b0b
--- /dev/null
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+// PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array
+// always writing typeLength bytes for each value.
+type PlainFixedLenByteArrayEncoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put writes the provided values to the encoder
+func (enc *PlainFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) {
+	typeLen := enc.descr.TypeLength()
+	if typeLen == 0 {
+		return
+	}
+
+	bytesNeeded := len(in) * typeLen
+	enc.sink.Reserve(bytesNeeded)
+	for _, val := range in {
+		if val == nil {
+			panic("value cannot be nil")
+		}
+		enc.sink.UnsafeWrite(val[:typeLen])
+	}
+}
+
+// PutSpaced is like Put but works with data that is spaced out according to the passed in bitmap
+func (enc *PlainFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		if enc.bitSetReader == nil {
+			enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+		} else {
+			enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+		}
+
+		for {
+			run := enc.bitSetReader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+		}
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Type returns the underlying physical type this encoder works with, Fixed Length byte arrays.
+func (PlainFixedLenByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// WriteDict overrides the embedded WriteDict function to call a specialized function
+// for copying out the Fixed length values from the dictionary more efficiently.
+func (enc *DictFixedLenByteArrayEncoder) WriteDict(out []byte) {
+	enc.memo.(BinaryMemoTable).CopyFixedWidthValues(0, enc.typeLen, out)
+}
+
+// Put writes fixed length values to a dictionary encoded column
+func (enc *DictFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) {
+	for _, v := range in {
+		if v == nil {
+			v = empty[:]
+		}
+		memoIdx, found, err := enc.memo.GetOrInsert(v)
+		if err != nil {
+			panic(err)
+		}
+		if !found {
+			enc.dictEncodedSize += enc.typeLen
+		}
+		enc.addIndex(memoIdx)
+	}
+}
+
+// PutSpaced is like Put but leaves space for nulls
+func (enc *DictFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		enc.Put(in[pos : pos+length])
+		return nil
+	})
+}
diff --git a/go/parquet/internal/encoding/physical_types.tmpldata b/go/parquet/internal/encoding/physical_types.tmpldata
new file mode 100644
index 00000000000..0adeb9955bf
--- /dev/null
+++ b/go/parquet/internal/encoding/physical_types.tmpldata
@@ -0,0 +1,52 @@
+[
+  {
+    "Name": "Int32",
+    "name": "int32",
+    "lower": "int32",
+    "prefix": "arrow"
+  },
+  {
+    "Name": "Int64",
+    "name": "int64",
+    "lower": "int64",
+    "prefix": "arrow"
+  },
+  {
+    "Name": "Int96",
+    "name": "parquet.Int96",
+    "lower": "int96",
+    "prefix": "parquet"
+  },
+  {
+    "Name": "Float32",
+    "name": "float32",
+    "lower": "float32",
+    "prefix": "arrow",
+    "physical": "Float"
+  },
+  {
+    "Name": "Float64",
+    "name": "float64",
+    "lower": "float64",
+    "prefix": "arrow",
+    "physical": "Double"
+  },
+  {
+    "Name": "Boolean",
+    "name": "bool",
+    "lower": "bool",
+    "prefix": "arrow"
+  },
+  {
+    "Name": "ByteArray",
+    "name": "parquet.ByteArray",
+    "lower": "byteArray",
+    "prefix": "parquet"
+  },
+  {
+    "Name": "FixedLenByteArray",
+    "name": "parquet.FixedLenByteArray",
+    "lower": "fixedLenByteArray",
+    "prefix": "parquet"
+  }
+]
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go
new file mode 100644
index 00000000000..a3826339dfa
--- /dev/null
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go
@@ -0,0 +1,639 @@
+// Code generated by plain_encoder_types.gen.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"bytes"
+	"encoding/binary"
+	"math"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/endian"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+var (
+	writeInt32LE      func(*encoder, []int32)
+	copyFromInt32LE   func(dst []int32, src []byte)
+	writeInt64LE      func(*encoder, []int64)
+	copyFromInt64LE   func(dst []int64, src []byte)
+	writeInt96LE      func(*encoder, []parquet.Int96)
+	copyFromInt96LE   func(dst []parquet.Int96, src []byte)
+	writeFloat32LE    func(*encoder, []float32)
+	copyFromFloat32LE func(dst []float32, src []byte)
+	writeFloat64LE    func(*encoder, []float64)
+	copyFromFloat64LE func(dst []float64, src []byte)
+)
+
+func init() {
+	// int96 is already internally represented as little endian data
+	// no need to have special behavior on big endian architectures
+	// for read/write, consumers will need to be aware of the fact
+	// that it is internally 12 bytes little endian when attempting
+	// to utilize it.
+	writeInt96LE = func(e *encoder, in []parquet.Int96) {
+		e.append(parquet.Int96Traits.CastToBytes(in))
+	}
+	copyFromInt96LE = func(dst []parquet.Int96, src []byte) {
+		copy(parquet.Int96Traits.CastToBytes(dst), src)
+	}
+
+	if endian.IsBigEndian {
+		writeInt32LE = func(e *encoder, in []int32) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromInt32LE = func(dst []int32, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+		writeInt64LE = func(e *encoder, in []int64) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromInt64LE = func(dst []int64, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+		writeFloat32LE = func(e *encoder, in []float32) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromFloat32LE = func(dst []float32, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+		writeFloat64LE = func(e *encoder, in []float64) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromFloat64LE = func(dst []float64, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+	} else {
+		writeInt32LE = func(e *encoder, in []int32) {
+			e.append(arrow.Int32Traits.CastToBytes(in))
+		}
+		copyFromInt32LE = func(dst []int32, src []byte) {
+			copy(arrow.Int32Traits.CastToBytes(dst), src)
+		}
+		writeInt64LE = func(e *encoder, in []int64) {
+			e.append(arrow.Int64Traits.CastToBytes(in))
+		}
+		copyFromInt64LE = func(dst []int64, src []byte) {
+			copy(arrow.Int64Traits.CastToBytes(dst), src)
+		}
+		writeFloat32LE = func(e *encoder, in []float32) {
+			e.append(arrow.Float32Traits.CastToBytes(in))
+		}
+		copyFromFloat32LE = func(dst []float32, src []byte) {
+			copy(arrow.Float32Traits.CastToBytes(dst), src)
+		}
+		writeFloat64LE = func(e *encoder, in []float64) {
+			e.append(arrow.Float64Traits.CastToBytes(in))
+		}
+		copyFromFloat64LE = func(dst []float64, src []byte) {
+			copy(arrow.Float64Traits.CastToBytes(dst), src)
+		}
+	}
+}
+
+// PlainInt32Encoder is an encoder for int32 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainInt32Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainInt32Encoder) Put(in []int32) {
+	writeInt32LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Int32Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainInt32Encoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// PlainInt32Decoder is a decoder specifically for decoding Plain Encoding data
+// of int32 type.
+type PlainInt32Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainInt32Decoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainInt32Decoder) Decode(out []int32) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Int32SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Int32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromInt32LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainInt64Encoder is an encoder for int64 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainInt64Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainInt64Encoder) Put(in []int64) {
+	writeInt64LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Int64Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainInt64Encoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// PlainInt64Decoder is a decoder specifically for decoding Plain Encoding data
+// of int64 type.
+type PlainInt64Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainInt64Decoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainInt64Decoder) Decode(out []int64) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Int64SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Int64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromInt64LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainInt96Encoder is an encoder for parquet.Int96 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainInt96Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainInt96Encoder) Put(in []parquet.Int96) {
+	writeInt96LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainInt96Encoder) PutSpaced(in []parquet.Int96, validBits []byte, validBitsOffset int64) {
+	nbytes := parquet.Int96Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainInt96Encoder) Type() parquet.Type {
+	return parquet.Types.Int96
+}
+
+// PlainInt96Decoder is a decoder specifically for decoding Plain Encoding data
+// of parquet.Int96 type.
+type PlainInt96Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainInt96Decoder) Type() parquet.Type {
+	return parquet.Types.Int96
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainInt96Decoder) Decode(out []parquet.Int96) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(parquet.Int96SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Int96, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromInt96LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainInt96Decoder) DecodeSpaced(out []parquet.Int96, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainFloat32Encoder is an encoder for float32 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainFloat32Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainFloat32Encoder) Put(in []float32) {
+	writeFloat32LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainFloat32Encoder) PutSpaced(in []float32, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Float32Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainFloat32Encoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// PlainFloat32Decoder is a decoder specifically for decoding Plain Encoding data
+// of float32 type.
+type PlainFloat32Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainFloat32Decoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainFloat32Decoder) Decode(out []float32) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Float32SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Float32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromFloat32LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainFloat64Encoder is an encoder for float64 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainFloat64Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainFloat64Encoder) Put(in []float64) {
+	writeFloat64LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainFloat64Encoder) PutSpaced(in []float64, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Float64Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainFloat64Encoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// PlainFloat64Decoder is a decoder specifically for decoding Plain Encoding data
+// of float64 type.
+type PlainFloat64Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainFloat64Decoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainFloat64Decoder) Decode(out []float64) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Float64SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Float64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromFloat64LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
new file mode 100644
index 00000000000..1b72497444c
--- /dev/null
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+  "encoding/binary"
+
+  "github.com/apache/arrow/go/arrow"
+  "github.com/apache/arrow/go/parquet"
+  "github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+var (
+{{range .In}}
+{{if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") -}}
+	write{{.Name}}LE   func(*encoder, []{{.name}})
+  copyFrom{{.Name}}LE  func(dst []{{.name}}, src []byte)
+{{- end}}
+{{- end}}
+)
+
+func init() {
+  // int96 is already internally represented as little endian data
+  // no need to have special behavior on big endian architectures
+  // for read/write, consumers will need to be aware of the fact
+  // that it is internally 12 bytes little endian when attempting
+  // to utilize it.
+  writeInt96LE = func(e *encoder, in []parquet.Int96) {
+    e.append(parquet.Int96Traits.CastToBytes(in))
+  }
+  copyFromInt96LE = func(dst []parquet.Int96, src []byte) {
+    copy(parquet.Int96Traits.CastToBytes(dst), src)
+  }
+
+	if endian.IsBigEndian {
+{{- range .In}}
+{{- if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") (ne .Name "Int96")}}
+    write{{.Name}}LE = func(e *encoder, in []{{.name}}) {
+      binary.Write(e.sink, binary.LittleEndian, in)
+    }
+    copyFrom{{.Name}}LE = func(dst []{{.name}}, src []byte) {
+      r := bytes.NewReader(src)
+      binary.Read(r, binary.LittleEndian, &dst)
+    }
+{{- end -}}
+{{- end}}
+	} else {
+{{- range .In}}
+{{- if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") (ne .Name "Int96")}}
+    write{{.Name}}LE = func(e *encoder, in []{{.name}}) {
+      e.append({{.prefix}}.{{.Name}}Traits.CastToBytes(in))
+    }
+    copyFrom{{.Name}}LE = func(dst []{{.name}}, src []byte) {
+      copy({{.prefix}}.{{.Name}}Traits.CastToBytes(dst), src)
+    }
+{{- end -}}
+{{- end}}
+	}
+}
+
+{{range .In}}
+{{if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}
+// Plain{{.Name}}Encoder is an encoder for {{.name}} values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type Plain{{.Name}}Encoder struct {
+  encoder
+
+  bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *Plain{{.Name}}Encoder) Put(in []{{.name}}) {
+  write{{.Name}}LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *Plain{{.Name}}Encoder) PutSpaced(in []{{.name}}, validBits []byte, validBitsOffset int64) {
+  nbytes := {{.prefix}}.{{.Name}}Traits.BytesRequired(len(in))
+  enc.ReserveForWrite(nbytes)
+
+  if enc.bitSetReader == nil {
+    enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+  } else {
+    enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+  }
+
+  for {
+    run := enc.bitSetReader.NextRun()
+    if run.Length == 0 {
+      break
+    }
+    enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+  }
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (Plain{{.Name}}Encoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+// Plain{{.Name}}Decoder is a decoder specifically for decoding Plain Encoding data
+// of {{.name}} type.
+type Plain{{.Name}}Decoder struct {
+  decoder
+
+  bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (Plain{{.Name}}Decoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *Plain{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) {
+  max := utils.MinInt(len(out), dec.nvals)
+  nbytes := int64(max) * int64({{.prefix}}.{{.Name}}SizeBytes)
+  if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+    return 0, xerrors.Errorf("parquet: eof exception decode plain {{.Name}}, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+  }
+
+  copyFrom{{.Name}}LE(out, dec.data[:nbytes])
+  dec.data = dec.data[nbytes:]
+  dec.nvals -= max
+  return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *Plain{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+  toread := len(out) - nullCount
+  values, err := dec.Decode(out[:toread])
+  if err != nil {
+    return 0, err
+  }
+  if values != toread {
+    return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+  }
+
+  nvalues := len(out)
+  if nullCount == 0 {
+    return nvalues, nil
+  }
+
+  idxDecode := nvalues - nullCount
+  if dec.bitSetReader == nil {
+    dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+  } else {
+    dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+  }
+
+  for {
+    run := dec.bitSetReader.NextRun()
+    if run.Length == 0 {
+      break
+    }
+
+    idxDecode -= int(run.Length)
+    copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+  }
+  return nvalues, nil
+}
+{{end}}
+{{end}}
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go
new file mode 100644
index 00000000000..abcfd95142e
--- /dev/null
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go
@@ -0,0 +1,1443 @@
+// Code generated by typed_encoder.gen.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"unsafe"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"github.com/apache/arrow/go/parquet/schema"
+	"golang.org/x/xerrors"
+)
+
+// fully typed encoder interfaces to enable writing against encoder/decoders
+// without having to care about what encoding type is actually being used.
+
+var (
+	Int32EncoderTraits             int32EncoderTraits
+	Int32DecoderTraits             int32DecoderTraits
+	Int64EncoderTraits             int64EncoderTraits
+	Int64DecoderTraits             int64DecoderTraits
+	Int96EncoderTraits             int96EncoderTraits
+	Int96DecoderTraits             int96DecoderTraits
+	Float32EncoderTraits           float32EncoderTraits
+	Float32DecoderTraits           float32DecoderTraits
+	Float64EncoderTraits           float64EncoderTraits
+	Float64DecoderTraits           float64DecoderTraits
+	BooleanEncoderTraits           boolEncoderTraits
+	BooleanDecoderTraits           boolDecoderTraits
+	ByteArrayEncoderTraits         byteArrayEncoderTraits
+	ByteArrayDecoderTraits         byteArrayDecoderTraits
+	FixedLenByteArrayEncoderTraits fixedLenByteArrayEncoderTraits
+	FixedLenByteArrayDecoderTraits fixedLenByteArrayDecoderTraits
+)
+
+// Int32Encoder is the interface for all encoding types that implement encoding
+// int32 values.
+type Int32Encoder interface {
+	TypedEncoder
+	Put([]int32)
+	PutSpaced([]int32, []byte, int64)
+}
+
+// Int32Decoder is the interface for all encoding types that implement decoding
+// int32 values.
+type Int32Decoder interface {
+	TypedDecoder
+	Decode([]int32) (int, error)
+	DecodeSpaced([]int32, int, []byte, int64) (int, error)
+}
+
+// the int32EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type int32EncoderTraits struct{}
+
+// Encoder returns an encoder for int32 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (int32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)}
+	case format.Encoding_DELTA_BINARY_PACKED:
+		return DeltaBitPackInt32Encoder{&deltaBitPackEncoder{
+			encoder: newEncoderBase(e, descr, mem)}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// int32DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for int32 values
+type int32DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n int32 values.
+func (int32DecoderTraits) BytesRequired(n int) int {
+	return arrow.Int32Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for int32 typed data of the requested encoding type if available
+func (int32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictInt32Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainInt32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	case parquet.Encodings.DeltaBinaryPacked:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaBitPackInt32Decoder{
+			deltaBitPackDecoder: &deltaBitPackDecoder{
+				decoder: newDecoderBase(format.Encoding(e), descr),
+				mem:     mem,
+			}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictInt32Encoder is an encoder for int32 data using dictionary encoding
+type DictInt32Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictInt32Encoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictInt32Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Int32Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictInt32Encoder) Put(in []int32) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictInt32Decoder is a decoder for decoding dictionary encoded data for int32 columns
+type DictInt32Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictInt32Decoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictInt32Decoder) Decode(out []int32) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Int32DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Int32DictConverter struct {
+	valueDecoder Int32Decoder
+	dict         []int32
+	zeroVal      int32
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Int32DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]int32, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Int32DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Int32DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]int32)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for int32
+func (dc *Int32DictConverter) FillZero(out interface{}) {
+	o := out.([]int32)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Int32DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]int32)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// Int64Encoder is the interface for all encoding types that implement encoding
+// int64 values.
+type Int64Encoder interface {
+	TypedEncoder
+	Put([]int64)
+	PutSpaced([]int64, []byte, int64)
+}
+
+// Int64Decoder is the interface for all encoding types that implement decoding
+// int64 values.
+type Int64Decoder interface {
+	TypedDecoder
+	Decode([]int64) (int, error)
+	DecodeSpaced([]int64, int, []byte, int64) (int, error)
+}
+
+// the int64EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type int64EncoderTraits struct{}
+
+// Encoder returns an encoder for int64 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (int64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)}
+	case format.Encoding_DELTA_BINARY_PACKED:
+		return DeltaBitPackInt64Encoder{&deltaBitPackEncoder{
+			encoder: newEncoderBase(e, descr, mem)}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// int64DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for int64 values
+type int64DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n int64 values.
+func (int64DecoderTraits) BytesRequired(n int) int {
+	return arrow.Int64Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for int64 typed data of the requested encoding type if available
+func (int64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictInt64Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainInt64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	case parquet.Encodings.DeltaBinaryPacked:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaBitPackInt64Decoder{
+			deltaBitPackDecoder: &deltaBitPackDecoder{
+				decoder: newDecoderBase(format.Encoding(e), descr),
+				mem:     mem,
+			}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictInt64Encoder is an encoder for int64 data using dictionary encoding
+type DictInt64Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictInt64Encoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictInt64Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Int64Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictInt64Encoder) Put(in []int64) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictInt64Decoder is a decoder for decoding dictionary encoded data for int64 columns
+type DictInt64Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictInt64Decoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictInt64Decoder) Decode(out []int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Int64DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Int64DictConverter struct {
+	valueDecoder Int64Decoder
+	dict         []int64
+	zeroVal      int64
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Int64DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]int64, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Int64DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Int64DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]int64)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for int64
+func (dc *Int64DictConverter) FillZero(out interface{}) {
+	o := out.([]int64)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Int64DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]int64)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// Int96Encoder is the interface for all encoding types that implement encoding
+// parquet.Int96 values.
+type Int96Encoder interface {
+	TypedEncoder
+	Put([]parquet.Int96)
+	PutSpaced([]parquet.Int96, []byte, int64)
+}
+
+// Int96Decoder is the interface for all encoding types that implement decoding
+// parquet.Int96 values.
+type Int96Decoder interface {
+	TypedDecoder
+	Decode([]parquet.Int96) (int, error)
+	DecodeSpaced([]parquet.Int96, int, []byte, int64) (int, error)
+}
+
+// the int96EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type int96EncoderTraits struct{}
+
+// Encoder returns an encoder for int96 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+// dictionary encoding does not exist for this type and Encoder will panic if useDict is true
+func (int96EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainInt96Encoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// int96DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for int96 values
+type int96DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n int96 values.
+func (int96DecoderTraits) BytesRequired(n int) int {
+	return parquet.Int96Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for int96 typed data of the requested encoding type if available
+func (int96DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		panic("dictionary decoding unimplemented for int96")
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainInt96Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// Float32Encoder is the interface for all encoding types that implement encoding
+// float32 values.
+type Float32Encoder interface {
+	TypedEncoder
+	Put([]float32)
+	PutSpaced([]float32, []byte, int64)
+}
+
+// Float32Decoder is the interface for all encoding types that implement decoding
+// float32 values.
+type Float32Decoder interface {
+	TypedDecoder
+	Decode([]float32) (int, error)
+	DecodeSpaced([]float32, int, []byte, int64) (int, error)
+}
+
+// the float32EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type float32EncoderTraits struct{}
+
+// Encoder returns an encoder for float32 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (float32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainFloat32Encoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// float32DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for float32 values
+type float32DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n float32 values.
+func (float32DecoderTraits) BytesRequired(n int) int {
+	return arrow.Float32Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for float32 typed data of the requested encoding type if available
+func (float32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictFloat32Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainFloat32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictFloat32Encoder is an encoder for float32 data using dictionary encoding
+type DictFloat32Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictFloat32Encoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictFloat32Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Float32Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictFloat32Encoder) Put(in []float32) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictFloat32Encoder) PutSpaced(in []float32, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictFloat32Decoder is a decoder for decoding dictionary encoded data for float32 columns
+type DictFloat32Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictFloat32Decoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictFloat32Decoder) Decode(out []float32) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Float32DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Float32DictConverter struct {
+	valueDecoder Float32Decoder
+	dict         []float32
+	zeroVal      float32
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Float32DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]float32, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Float32DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Float32DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]float32)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for float32
+func (dc *Float32DictConverter) FillZero(out interface{}) {
+	o := out.([]float32)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Float32DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]float32)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// Float64Encoder is the interface for all encoding types that implement encoding
+// float64 values.
+type Float64Encoder interface {
+	TypedEncoder
+	Put([]float64)
+	PutSpaced([]float64, []byte, int64)
+}
+
+// Float64Decoder is the interface for all encoding types that implement decoding
+// float64 values.
+type Float64Decoder interface {
+	TypedDecoder
+	Decode([]float64) (int, error)
+	DecodeSpaced([]float64, int, []byte, int64) (int, error)
+}
+
+// the float64EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type float64EncoderTraits struct{}
+
+// Encoder returns an encoder for float64 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (float64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainFloat64Encoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// float64DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for float64 values
+type float64DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n float64 values.
+func (float64DecoderTraits) BytesRequired(n int) int {
+	return arrow.Float64Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for float64 typed data of the requested encoding type if available
+func (float64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictFloat64Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainFloat64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictFloat64Encoder is an encoder for float64 data using dictionary encoding
+type DictFloat64Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictFloat64Encoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictFloat64Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Float64Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictFloat64Encoder) Put(in []float64) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictFloat64Encoder) PutSpaced(in []float64, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictFloat64Decoder is a decoder for decoding dictionary encoded data for float64 columns
+type DictFloat64Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictFloat64Decoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictFloat64Decoder) Decode(out []float64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Float64DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Float64DictConverter struct {
+	valueDecoder Float64Decoder
+	dict         []float64
+	zeroVal      float64
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Float64DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]float64, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Float64DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Float64DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]float64)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for float64
+func (dc *Float64DictConverter) FillZero(out interface{}) {
+	o := out.([]float64)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Float64DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]float64)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// BooleanEncoder is the interface for all encoding types that implement encoding
+// bool values.
+type BooleanEncoder interface {
+	TypedEncoder
+	Put([]bool)
+	PutSpaced([]bool, []byte, int64)
+}
+
+// BooleanDecoder is the interface for all encoding types that implement decoding
+// bool values.
+type BooleanDecoder interface {
+	TypedDecoder
+	Decode([]bool) (int, error)
+	DecodeSpaced([]bool, int, []byte, int64) (int, error)
+}
+
+// the boolEncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type boolEncoderTraits struct{}
+
+// Encoder returns an encoder for bool type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+// dictionary encoding does not exist for this type and Encoder will panic if useDict is true
+func (boolEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainBooleanEncoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// boolDecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for bool values
+type boolDecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n bool values.
+func (boolDecoderTraits) BytesRequired(n int) int {
+	return arrow.BooleanTraits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for bool typed data of the requested encoding type if available
+func (boolDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		panic("dictionary decoding unimplemented for bool")
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainBooleanDecoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// ByteArrayEncoder is the interface for all encoding types that implement encoding
+// parquet.ByteArray values.
+type ByteArrayEncoder interface {
+	TypedEncoder
+	Put([]parquet.ByteArray)
+	PutSpaced([]parquet.ByteArray, []byte, int64)
+}
+
+// ByteArrayDecoder is the interface for all encoding types that implement decoding
+// parquet.ByteArray values.
+type ByteArrayDecoder interface {
+	TypedDecoder
+	Decode([]parquet.ByteArray) (int, error)
+	DecodeSpaced([]parquet.ByteArray, int, []byte, int64) (int, error)
+}
+
+// the byteArrayEncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type byteArrayEncoderTraits struct{}
+
+// Encoder returns an encoder for byteArray type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (byteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)}
+	case format.Encoding_DELTA_LENGTH_BYTE_ARRAY:
+		return &DeltaLengthByteArrayEncoder{
+			encoder: newEncoderBase(e, descr, mem),
+			lengthEncoder: &DeltaBitPackInt32Encoder{
+				&deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}},
+		}
+	case format.Encoding_DELTA_BYTE_ARRAY:
+		return &DeltaByteArrayEncoder{
+			encoder: newEncoderBase(e, descr, mem),
+		}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// byteArrayDecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for byteArray values
+type byteArrayDecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n byteArray values.
+func (byteArrayDecoderTraits) BytesRequired(n int) int {
+	return parquet.ByteArrayTraits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for byteArray typed data of the requested encoding type if available
+func (byteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictByteArrayDecoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	case parquet.Encodings.DeltaLengthByteArray:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaLengthByteArrayDecoder{
+			decoder: newDecoderBase(format.Encoding(e), descr),
+			mem:     mem,
+		}
+	case parquet.Encodings.DeltaByteArray:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaByteArrayDecoder{
+			DeltaLengthByteArrayDecoder: &DeltaLengthByteArrayDecoder{
+				decoder: newDecoderBase(format.Encoding(e), descr),
+				mem:     mem,
+			}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictByteArrayEncoder is an encoder for parquet.ByteArray data using dictionary encoding
+type DictByteArrayEncoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// DictByteArrayDecoder is a decoder for decoding dictionary encoded data for parquet.ByteArray columns
+type DictByteArrayDecoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// ByteArrayDictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type ByteArrayDictConverter struct {
+	valueDecoder ByteArrayDecoder
+	dict         []parquet.ByteArray
+	zeroVal      parquet.ByteArray
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *ByteArrayDictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]parquet.ByteArray, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *ByteArrayDictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *ByteArrayDictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]parquet.ByteArray)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for parquet.ByteArray
+func (dc *ByteArrayDictConverter) FillZero(out interface{}) {
+	o := out.([]parquet.ByteArray)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *ByteArrayDictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]parquet.ByteArray)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// FixedLenByteArrayEncoder is the interface for all encoding types that implement encoding
+// parquet.FixedLenByteArray values.
+type FixedLenByteArrayEncoder interface {
+	TypedEncoder
+	Put([]parquet.FixedLenByteArray)
+	PutSpaced([]parquet.FixedLenByteArray, []byte, int64)
+}
+
+// FixedLenByteArrayDecoder is the interface for all encoding types that implement decoding
+// parquet.FixedLenByteArray values.
+type FixedLenByteArrayDecoder interface {
+	TypedDecoder
+	Decode([]parquet.FixedLenByteArray) (int, error)
+	DecodeSpaced([]parquet.FixedLenByteArray, int, []byte, int64) (int, error)
+}
+
+// the fixedLenByteArrayEncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type fixedLenByteArrayEncoderTraits struct{}
+
+// Encoder returns an encoder for fixedLenByteArray type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (fixedLenByteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainFixedLenByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// fixedLenByteArrayDecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for fixedLenByteArray values
+type fixedLenByteArrayDecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n fixedLenByteArray values.
+func (fixedLenByteArrayDecoderTraits) BytesRequired(n int) int {
+	return parquet.FixedLenByteArrayTraits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for fixedLenByteArray typed data of the requested encoding type if available
+func (fixedLenByteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictFixedLenByteArrayDecoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainFixedLenByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictFixedLenByteArrayEncoder is an encoder for parquet.FixedLenByteArray data using dictionary encoding
+type DictFixedLenByteArrayEncoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictFixedLenByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// DictFixedLenByteArrayDecoder is a decoder for decoding dictionary encoded data for parquet.FixedLenByteArray columns
+type DictFixedLenByteArrayDecoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictFixedLenByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// FixedLenByteArrayDictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type FixedLenByteArrayDictConverter struct {
+	valueDecoder FixedLenByteArrayDecoder
+	dict         []parquet.FixedLenByteArray
+	zeroVal      parquet.FixedLenByteArray
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *FixedLenByteArrayDictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]parquet.FixedLenByteArray, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *FixedLenByteArrayDictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *FixedLenByteArrayDictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]parquet.FixedLenByteArray)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for parquet.FixedLenByteArray
+func (dc *FixedLenByteArrayDictConverter) FillZero(out interface{}) {
+	o := out.([]parquet.FixedLenByteArray)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *FixedLenByteArrayDictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]parquet.FixedLenByteArray)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// NewDictConverter creates a dict converter of the appropriate type, using the passed in
+// decoder as the decoder to decode the dictionary index.
+func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter {
+	switch dict.Type() {
+	case parquet.Types.Int32:
+		return &Int32DictConverter{valueDecoder: dict.(Int32Decoder), dict: make([]int32, 0, dict.ValuesLeft())}
+	case parquet.Types.Int64:
+		return &Int64DictConverter{valueDecoder: dict.(Int64Decoder), dict: make([]int64, 0, dict.ValuesLeft())}
+	case parquet.Types.Float:
+		return &Float32DictConverter{valueDecoder: dict.(Float32Decoder), dict: make([]float32, 0, dict.ValuesLeft())}
+	case parquet.Types.Double:
+		return &Float64DictConverter{valueDecoder: dict.(Float64Decoder), dict: make([]float64, 0, dict.ValuesLeft())}
+	case parquet.Types.ByteArray:
+		return &ByteArrayDictConverter{valueDecoder: dict.(ByteArrayDecoder), dict: make([]parquet.ByteArray, 0, dict.ValuesLeft())}
+	case parquet.Types.FixedLenByteArray:
+		return &FixedLenByteArrayDictConverter{valueDecoder: dict.(FixedLenByteArrayDecoder), dict: make([]parquet.FixedLenByteArray, 0, dict.ValuesLeft())}
+	default:
+		return nil
+	}
+}
+
+// helper function to get encoding traits object for the physical type indicated
+func getEncodingTraits(t parquet.Type) EncoderTraits {
+	switch t {
+	case parquet.Types.Int32:
+		return Int32EncoderTraits
+	case parquet.Types.Int64:
+		return Int64EncoderTraits
+	case parquet.Types.Int96:
+		return Int96EncoderTraits
+	case parquet.Types.Float:
+		return Float32EncoderTraits
+	case parquet.Types.Double:
+		return Float64EncoderTraits
+	case parquet.Types.Boolean:
+		return BooleanEncoderTraits
+	case parquet.Types.ByteArray:
+		return ByteArrayEncoderTraits
+	case parquet.Types.FixedLenByteArray:
+		return FixedLenByteArrayEncoderTraits
+	default:
+		return nil
+	}
+}
+
+// helper function to get decoding traits object for the physical type indicated
+func getDecodingTraits(t parquet.Type) DecoderTraits {
+	switch t {
+	case parquet.Types.Int32:
+		return Int32DecoderTraits
+	case parquet.Types.Int64:
+		return Int64DecoderTraits
+	case parquet.Types.Int96:
+		return Int96DecoderTraits
+	case parquet.Types.Float:
+		return Float32DecoderTraits
+	case parquet.Types.Double:
+		return Float64DecoderTraits
+	case parquet.Types.Boolean:
+		return BooleanDecoderTraits
+	case parquet.Types.ByteArray:
+		return ByteArrayDecoderTraits
+	case parquet.Types.FixedLenByteArray:
+		return FixedLenByteArrayDecoderTraits
+	default:
+		return nil
+	}
+}
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
new file mode 100644
index 00000000000..509266b6878
--- /dev/null
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
@@ -0,0 +1,341 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+  "github.com/apache/arrow/go/parquet"
+  "github.com/apache/arrow/go/parquet/schema"
+  format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+  "github.com/apache/arrow/go/arrow"
+  "github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+// fully typed encoder interfaces to enable writing against encoder/decoders
+// without having to care about what encoding type is actually being used.
+
+var (
+{{range .In}}
+  {{.Name}}EncoderTraits {{.lower}}EncoderTraits
+  {{.Name}}DecoderTraits {{.lower}}DecoderTraits
+{{- end}}
+)
+
+{{range .In}}
+// {{.Name}}Encoder is the interface for all encoding types that implement encoding
+// {{.name}} values.
+type {{.Name}}Encoder interface {
+  TypedEncoder
+  Put([]{{.name}})
+  PutSpaced([]{{.name}}, []byte, int64)
+}
+
+// {{.Name}}Decoder is the interface for all encoding types that implement decoding
+// {{.name}} values.
+type {{.Name}}Decoder interface {
+  TypedDecoder
+  Decode([]{{.name}}) (int, error)
+  DecodeSpaced([]{{.name}}, int, []byte, int64) (int, error)
+}
+
+// the {{.lower}}EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type {{.lower}}EncoderTraits struct{}
+
+// Encoder returns an encoder for {{.lower}} type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+{{- if or (eq .Name "Boolean") (eq .Name "Int96")}}
+// dictionary encoding does not exist for this type and Encoder will panic if useDict is true
+{{- end }}
+func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+  {{/* if useDict {
+{{- if or (eq .Name "Boolean") (eq .Name "Int96")}}
+    panic("parquet: no {{.name}} dictionary encoding")
+{{- else}}
+    return &Dict{{.Name}}Encoder{newDictEncoderBase(descr, New{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}{{.Name}}Dictionary(){{else}}BinaryDictionary(mem){{end}}, mem)}
+{{- end}}
+  } */}}
+
+  switch e {
+  case format.Encoding_PLAIN:
+    return &Plain{{.Name}}Encoder{encoder: newEncoderBase(e, descr, mem)}
+{{- if or (eq .Name "Int32") (eq .Name "Int64")}}
+  case format.Encoding_DELTA_BINARY_PACKED:
+    return DeltaBitPack{{.Name}}Encoder{&deltaBitPackEncoder{
+      encoder: newEncoderBase(e, descr, mem)}}
+{{- end}}
+{{- if eq .Name "ByteArray"}}
+  case format.Encoding_DELTA_LENGTH_BYTE_ARRAY:
+    return &DeltaLengthByteArrayEncoder{
+      encoder: newEncoderBase(e, descr, mem),
+      lengthEncoder: &DeltaBitPackInt32Encoder{
+        &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}},
+    }
+  case format.Encoding_DELTA_BYTE_ARRAY:
+    return &DeltaByteArrayEncoder{
+      encoder: newEncoderBase(e, descr, mem),
+    }
+{{- end}}
+  default:
+    panic("unimplemented encoding type")
+  }
+}
+
+// {{.lower}}DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for {{.lower}} values
+type {{.lower}}DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n {{.lower}} values.
+func ({{.lower}}DecoderTraits) BytesRequired(n int) int {
+  return {{.prefix}}.{{.Name}}Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for {{.lower}} typed data of the requested encoding type if available
+func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+  if useDict {
+{{- if and (ne .Name "Boolean") (ne .Name "Int96")}}
+    return &Dict{{.Name}}Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+{{- else}}
+    panic("dictionary decoding unimplemented for {{.lower}}")
+{{- end}}
+  }
+
+  switch e {
+  case parquet.Encodings.Plain:
+    return &Plain{{.Name}}Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+{{- if or (eq .Name "Int32") (eq .Name "Int64")}}
+  case parquet.Encodings.DeltaBinaryPacked:
+    if mem == nil {
+      mem = memory.DefaultAllocator
+    }
+    return &DeltaBitPack{{.Name}}Decoder{
+      deltaBitPackDecoder: &deltaBitPackDecoder{
+        decoder: newDecoderBase(format.Encoding(e), descr),
+        mem:     mem,
+      }}
+{{- end}}
+{{- if eq .Name "ByteArray"}}
+  case parquet.Encodings.DeltaLengthByteArray:
+    if mem == nil {
+      mem = memory.DefaultAllocator
+    }
+    return &DeltaLengthByteArrayDecoder{
+      decoder: newDecoderBase(format.Encoding(e), descr),
+      mem: mem,
+    }
+  case parquet.Encodings.DeltaByteArray:
+    if mem == nil {
+      mem = memory.DefaultAllocator
+    }
+    return &DeltaByteArrayDecoder{
+      DeltaLengthByteArrayDecoder: &DeltaLengthByteArrayDecoder{
+        decoder: newDecoderBase(format.Encoding(e), descr),
+        mem: mem,
+      }}
+{{- end}}
+  default:
+    panic("unimplemented encoding type")
+  }
+}
+
+{{if and (ne .Name "Boolean") (ne .Name "Int96")}}
+// Dict{{.Name}}Encoder is an encoder for {{.name}} data using dictionary encoding
+type Dict{{.Name}}Encoder struct {
+  dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *Dict{{.Name}}Encoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}
+// WriteDict populates the byte slice with the dictionary index
+func (enc *Dict{{.Name}}Encoder) WriteDict(out []byte) {
+  enc.memo.CopyValues({{.prefix}}.{{.Name}}Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *Dict{{.Name}}Encoder) Put(in []{{.name}}) {
+  for _, val := range in {
+    enc.dictEncoder.Put(val)
+  }
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *Dict{{.Name}}Encoder) PutSpaced(in []{{.name}}, validBits []byte, validBitsOffset int64) {
+  utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+    for i := int64(0); i < length; i++ {
+      enc.dictEncoder.Put(in[i+pos])
+    }
+    return nil
+  })
+}
+{{end}}
+
+// Dict{{.Name}}Decoder is a decoder for decoding dictionary encoded data for {{.name}} columns
+type Dict{{.Name}}Decoder struct {
+  dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (Dict{{.Name}}Decoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) {
+  vals := utils.MinInt(len(out), d.nvals)
+  decoded, err := d.decode(out[:vals])
+  if err != nil {
+    return decoded, err
+  }
+  if vals != decoded {
+    return decoded, xerrors.New("parquet: dict eof exception")
+  }
+  d.nvals -= vals
+  return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *Dict{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+  vals := utils.MinInt(len(out), d.nvals)
+  decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+  if err != nil {
+    return decoded, err
+  }
+  if vals != decoded {
+    return decoded, xerrors.New("parquet: dict spaced eof exception")
+  }
+  d.nvals -= vals
+  return vals, nil
+}
+
+// {{.Name}}DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type {{.Name}}DictConverter struct {
+  valueDecoder {{.Name}}Decoder
+  dict []{{.name}}
+  zeroVal {{.name}}
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *{{.Name}}DictConverter) ensure(idx utils.IndexType) error {
+  if len(dc.dict) <= int(idx) {
+    if cap(dc.dict) <= int(idx) {
+      val := make([]{{.name}}, int(idx+1)-len(dc.dict))
+      n, err := dc.valueDecoder.Decode(val)
+      if err != nil {
+        return err
+      }
+      dc.dict = append(dc.dict, val[:n]...)
+    } else {
+      cur := len(dc.dict)
+      n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+      if err != nil {
+        return err
+      }
+      dc.dict = dc.dict[:cur+n]
+    }
+  }
+  return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *{{.Name}}DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+  dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *{{.Name}}DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]{{.name}})
+	if err := dc.ensure(val); err != nil {
+    return err
+  }
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+  return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for {{.name}}
+func (dc *{{.Name}}DictConverter) FillZero(out interface{}) {
+  o := out.([]{{.name}})
+  o[0] = dc.zeroVal
+  for i := 1; i < len(o); i *= 2 {
+    copy(o[i:], o[:i])
+  }
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *{{.Name}}DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]{{.name}})
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+  return nil
+}
+{{end}}
+
+{{end}}
+
+// NewDictConverter creates a dict converter of the appropriate type, using the passed in
+// decoder as the decoder to decode the dictionary index.
+func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter {
+  switch dict.Type() {
+  {{ range .In }}{{ if and (ne .Name "Boolean") (ne .Name "Int96") -}}
+  case parquet.Types.{{if .physical }}{{.physical}}{{else}}{{.Name}}{{end}}:
+    return &{{.Name}}DictConverter{valueDecoder: dict.({{.Name}}Decoder), dict: make([]{{.name}}, 0, dict.ValuesLeft())}
+  {{ end }}{{ end -}}
+  default:
+    return nil
+  }
+}
+
+// helper function to get encoding traits object for the physical type indicated
+func getEncodingTraits(t parquet.Type) EncoderTraits {
+  switch t {
+  {{ range .In -}}
+  case parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}:
+    return {{.Name}}EncoderTraits
+  {{ end -}}
+  default:
+    return nil
+  }
+}
+
+// helper function to get decoding traits object for the physical type indicated
+func getDecodingTraits(t parquet.Type) DecoderTraits {
+  switch t {
+  {{ range .In -}}
+  case parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}:
+    return {{.Name}}DecoderTraits
+  {{ end -}}
+  default:
+    return nil
+  }
+}
diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go
new file mode 100644
index 00000000000..fa3661e1119
--- /dev/null
+++ b/go/parquet/internal/encoding/types.go
@@ -0,0 +1,497 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"io"
+	"sync"
+
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// TypedDecoder is the general interface for all decoder types which can
+// then be type asserted to a specific Type Decoder
+type TypedDecoder interface {
+	// SetData updates the data in the decoder with the passed in byte slice and the
+	// stated number of values as expected to be decoded.
+	SetData(buffered int, buf []byte) error
+	// Encoding returns the encoding type that this decoder decodes data of
+	Encoding() parquet.Encoding
+	// ValuesLeft returns the number of remaining values to be decoded
+	ValuesLeft() int
+	// Type returns the physical type this can decode.
+	Type() parquet.Type
+}
+
+// DictDecoder is a special TypedDecoder which implements dictionary decoding
+type DictDecoder interface {
+	TypedDecoder
+	// SetDict takes in a decoder which can decode the dictionary index to be used
+	SetDict(TypedDecoder)
+}
+
+// TypedEncoder is the general interface for all encoding types which
+// can then be type asserted to a specific Type Encoder
+type TypedEncoder interface {
+	// Bytes returns the current slice of bytes that have been encoded but does not pass ownership
+	Bytes() []byte
+	// Reset resets the encoder and dumps all the data to let it be reused.
+	Reset()
+	// ReserveForWrite reserves n bytes in the buffer so that the next n bytes written will not
+	// cause a memory allocation.
+	ReserveForWrite(n int)
+	// EstimatedDataEncodedSize returns the estimated number of bytes in the buffer
+	// so far.
+	EstimatedDataEncodedSize() int64
+	// FlushValues finishes up any unwritten data and returns the buffer of data passing
+	// ownership to the caller, Release needs to be called on the Buffer to free the memory
+	FlushValues() Buffer
+	// Encoding returns the type of encoding that this encoder operates with
+	Encoding() parquet.Encoding
+	// Allocator returns the allocator that was used when creating this encoder
+	Allocator() memory.Allocator
+	// Type returns the underlying physical type this encodes.
+	Type() parquet.Type
+}
+
+// DictEncoder is a special kind of TypedEncoder which implements Dictionary
+// encoding.
+type DictEncoder interface {
+	TypedEncoder
+	// WriteIndices populates the byte slice with the final indexes of data and returns
+	// the number of bytes written
+	WriteIndices(out []byte) int
+	// DictEncodedSize returns the current size of the encoded dictionary index.
+	DictEncodedSize() int
+	// BitWidth returns the bitwidth needed to encode all of the index values based
+	// on the number of values in the dictionary index.
+	BitWidth() int
+	// WriteDict populates out with the dictionary index values, out should be sized to at least
+	// as many bytes as DictEncodedSize
+	WriteDict(out []byte)
+	// NumEntries returns the number of values currently in the dictionary index.
+	NumEntries() int
+}
+
+var bufferPool = sync.Pool{
+	New: func() interface{} {
+		return memory.NewResizableBuffer(memory.DefaultAllocator)
+	},
+}
+
+// Buffer is an interface used as a general interface for handling buffers
+// regardless of the underlying implementation.
+type Buffer interface {
+	Len() int
+	Buf() []byte
+	Bytes() []byte
+	Resize(int)
+	Release()
+}
+
+// poolBuffer is a buffer that will release the allocated buffer to a pool
+// of buffers when release is called in order to allow it to be reused to
+// cut down on the number of allocations.
+type poolBuffer struct {
+	buf *memory.Buffer
+}
+
+func (p poolBuffer) Resize(n int) { p.buf.ResizeNoShrink(n) }
+
+func (p poolBuffer) Len() int { return p.buf.Len() }
+
+func (p poolBuffer) Bytes() []byte { return p.buf.Bytes() }
+
+func (p poolBuffer) Buf() []byte { return p.buf.Buf() }
+
+func (p poolBuffer) Release() {
+	if p.buf.Mutable() {
+		memory.Set(p.buf.Buf(), 0)
+		p.buf.ResizeNoShrink(0)
+		bufferPool.Put(p.buf)
+		return
+	}
+
+	p.buf.Release()
+}
+
+// PooledBufferWriter uses buffers from the buffer pool to back it while
+// implementing io.Writer and io.WriterAt interfaces
+type PooledBufferWriter struct {
+	buf    *memory.Buffer
+	pos    int
+	offset int
+}
+
+// NewPooledBufferWriter returns a new buffer with 'initial' bytes reserved
+// and pre-allocated to guarantee that writing that many more bytes will not
+// require another allocation.
+func NewPooledBufferWriter(initial int) *PooledBufferWriter {
+	ret := &PooledBufferWriter{}
+	ret.Reserve(initial)
+	return ret
+}
+
+// SetOffset sets an offset in the buffer which will ensure that all references
+// to offsets and sizes in the buffer will be offset by this many bytes, allowing
+// the writer to reserve space in the buffer.
+func (b *PooledBufferWriter) SetOffset(offset int) {
+	b.pos -= b.offset
+	b.offset = offset
+	b.pos += offset
+}
+
+// Reserve pre-allocates nbytes to ensure that the next write of that many bytes
+// will not require another allocation.
+func (b *PooledBufferWriter) Reserve(nbytes int) {
+	if b.buf == nil {
+		b.buf = bufferPool.Get().(*memory.Buffer)
+	}
+
+	newCap := utils.MaxInt(b.buf.Cap()+b.offset, 256)
+	for newCap < b.pos+nbytes {
+		newCap = bitutil.NextPowerOf2(newCap)
+	}
+	b.buf.Reserve(newCap)
+}
+
+// Reset will release any current memory and initialize it with the new
+// allocated bytes.
+func (b *PooledBufferWriter) Reset(initial int) {
+	if b.buf != nil {
+		memory.Set(b.buf.Buf(), 0)
+		b.buf.ResizeNoShrink(0)
+		bufferPool.Put(b.buf)
+		b.buf = nil
+	}
+
+	b.pos = 0
+	b.offset = 0
+	b.Reserve(initial)
+}
+
+// Finish returns the current buffer, with the responsibility for releasing
+// the memory on the caller, resetting this writer to be re-used
+func (b *PooledBufferWriter) Finish() Buffer {
+	if b.buf.Len() < b.pos {
+		b.buf.ResizeNoShrink(b.pos)
+	}
+	buf := poolBuffer{b.buf}
+
+	b.buf = nil
+	b.Reset(0)
+	return buf
+}
+
+// WriteAt writes the bytes from p into this buffer starting at offset.
+//
+// Does not affect the internal position of the writer.
+func (b *PooledBufferWriter) WriteAt(p []byte, offset int64) (n int, err error) {
+	if len(p) == 0 {
+		return 0, nil
+	}
+	offset += int64(b.offset)
+	need := int(offset) + len(p)
+
+	if need >= b.buf.Cap() {
+		b.Reserve(need - b.pos)
+	}
+	n = copy(b.buf.Buf()[offset:], p)
+
+	if need > b.buf.Len() {
+		b.buf.ResizeNoShrink(need)
+	}
+	return
+}
+
+func (b *PooledBufferWriter) Write(buf []byte) (int, error) {
+	if len(buf) == 0 {
+		return 0, nil
+	}
+	b.Reserve(len(buf))
+	return b.UnsafeWrite(buf)
+}
+
+func (b *PooledBufferWriter) UnsafeWriteCopy(ncopies int, pattern []byte) (int, error) {
+	nbytes := len(pattern) * ncopies
+	slc := b.buf.Buf()[b.pos : b.pos+nbytes]
+	copy(slc, pattern)
+	for j := len(pattern); j < len(slc); j *= 2 {
+		copy(slc[j:], slc[:j])
+	}
+	b.pos += nbytes
+	return nbytes, nil
+}
+
+// UnsafeWrite does not check the capacity / length before writing.
+func (b *PooledBufferWriter) UnsafeWrite(buf []byte) (n int, err error) {
+	n = copy(b.buf.Buf()[b.pos:], buf)
+	b.pos += n
+	return
+}
+
+func (b *PooledBufferWriter) Tell() int64 {
+	return int64(b.pos)
+}
+
+// Bytes returns the current bytes slice of slice Len
+func (b *PooledBufferWriter) Bytes() []byte {
+	if b.buf.Len() < b.pos {
+		b.buf.ResizeNoShrink(b.pos)
+	}
+	return b.buf.Bytes()[b.offset:]
+}
+
+// Len provides the current Length of the byte slice
+func (b *PooledBufferWriter) Len() int {
+	if b.buf.Len() < b.pos {
+		b.buf.ResizeNoShrink(b.pos)
+	}
+	return b.buf.Len() - b.offset
+}
+
+// BufferWriter is a utility class for building and writing to a memory.Buffer
+// with a given allocator that fulfills the interfaces io.Write, io.WriteAt
+// and io.Seeker, while providing the ability to pre-allocate memory.
+type BufferWriter struct {
+	buffer *memory.Buffer
+	pos    int
+	mem    memory.Allocator
+
+	offset int
+}
+
+// NewBufferWriterFromBuffer wraps the provided buffer to allow it to fulfill these
+// interfaces.
+func NewBufferWriterFromBuffer(b *memory.Buffer, mem memory.Allocator) *BufferWriter {
+	return &BufferWriter{b, 0, mem, 0}
+}
+
+// NewBufferWriter constructs a buffer with initially reserved/allocated memory.
+func NewBufferWriter(initial int, mem memory.Allocator) *BufferWriter {
+	buf := memory.NewResizableBuffer(mem)
+	buf.Reserve(initial)
+	return &BufferWriter{buffer: buf, mem: mem}
+}
+
+func (b *BufferWriter) SetOffset(offset int) {
+	b.offset = offset
+}
+
+// Bytes returns the current bytes slice of slice Len
+func (b *BufferWriter) Bytes() []byte {
+	return b.buffer.Bytes()[b.offset:]
+}
+
+// Len provides the current Length of the byte slice
+func (b *BufferWriter) Len() int {
+	return b.buffer.Len() - b.offset
+}
+
+// Cap returns the current capacity of the underlying buffer
+func (b *BufferWriter) Cap() int {
+	return b.buffer.Cap() - b.offset
+}
+
+// Finish returns the current buffer, with the responsibility for releasing
+// the memory on the caller, resetting this writer to be re-used
+func (b *BufferWriter) Finish() *memory.Buffer {
+	buf := b.buffer
+	b.buffer = nil
+	b.Reset(0)
+	return buf
+}
+
+func (b *BufferWriter) Truncate() {
+	b.pos = 0
+	b.offset = 0
+
+	if b.buffer == nil {
+		b.Reserve(1024)
+	} else {
+		b.buffer.ResizeNoShrink(0)
+	}
+}
+
+// Reset will release any current memory and initialize it with the new
+// allocated bytes.
+func (b *BufferWriter) Reset(initial int) {
+	if b.buffer != nil {
+		b.buffer.Release()
+	}
+
+	b.pos = 0
+	b.offset = 0
+	b.Reserve(initial)
+}
+
+// Reserve ensures that there is at least enough capacity to write nbytes
+// without another allocation, may allocate more than that in order to
+// efficiently reduce allocations
+func (b *BufferWriter) Reserve(nbytes int) {
+	if b.buffer == nil {
+		b.buffer = memory.NewResizableBuffer(b.mem)
+	}
+	newCap := utils.MaxInt(b.buffer.Cap()+b.offset, 256)
+	for newCap < b.pos+nbytes+b.offset {
+		newCap = bitutil.NextPowerOf2(newCap)
+	}
+	b.buffer.Reserve(newCap)
+}
+
+// WriteAt writes the bytes from p into this buffer starting at offset.
+//
+// Does not affect the internal position of the writer.
+func (b *BufferWriter) WriteAt(p []byte, offset int64) (n int, err error) {
+	if len(p) == 0 {
+		return 0, nil
+	}
+	offset += int64(b.offset)
+	need := int(offset) + len(p)
+
+	if need >= b.buffer.Cap() {
+		b.Reserve(need - b.pos)
+	}
+	copy(b.buffer.Buf()[offset:], p)
+
+	if need > b.buffer.Len() {
+		b.buffer.ResizeNoShrink(need)
+	}
+	return len(p), nil
+}
+
+func (b *BufferWriter) Write(buf []byte) (int, error) {
+	if len(buf) == 0 {
+		return 0, nil
+	}
+	if b.buffer == nil {
+		b.Reserve(len(buf))
+	}
+
+	if b.pos+b.offset+len(buf) >= b.buffer.Cap() {
+		b.Reserve(len(buf))
+	}
+	return b.UnsafeWrite(buf)
+}
+
+func (b *BufferWriter) UnsafeWriteCopy(ncopies int, pattern []byte) (int, error) {
+	nbytes := len(pattern) * ncopies
+	slc := b.buffer.Buf()[b.pos : b.pos+nbytes]
+	copy(slc, pattern)
+	for j := len(pattern); j < len(slc); j *= 2 {
+		copy(slc[j:], slc[:j])
+	}
+	b.pos += nbytes
+	b.buffer.ResizeNoShrink(b.pos)
+	return nbytes, nil
+}
+
+// UnsafeWrite does not check the capacity / length before writing.
+func (b *BufferWriter) UnsafeWrite(buf []byte) (int, error) {
+	copy(b.buffer.Buf()[b.pos+b.offset:], buf)
+	b.pos += len(buf)
+	b.buffer.ResizeNoShrink(b.pos)
+	return len(buf), nil
+}
+
+// Seek fulfills the io.Seeker interface returning it's new position
+// whence must be io.SeekStart, io.SeekCurrent or io.SeekEnd or it will be ignored.
+func (b *BufferWriter) Seek(offset int64, whence int) (int64, error) {
+	newPos, offs := 0, int(offset)
+	offs += b.offset
+	switch whence {
+	case io.SeekStart:
+		newPos = offs
+	case io.SeekCurrent:
+		newPos = b.pos + offs
+	case io.SeekEnd:
+		newPos = b.buffer.Len() + offs
+	}
+	if newPos < 0 {
+		return 0, xerrors.New("negative result pos")
+	}
+	b.pos = newPos
+	return int64(newPos), nil
+}
+
+func (b *BufferWriter) Tell() int64 {
+	return int64(b.pos)
+}
+
+// MemoTable interface that can be used to swap out implementations of the hash table
+// used for handling dictionary encoding. Dictionary encoding is built against this interface
+// to make it easy for code generation and changing implementations.
+//
+// Values should remember the order they are inserted to generate a valid dictionary index
+type MemoTable interface {
+	// Reset drops everything in the table allowing it to be reused
+	Reset()
+	// Size returns the current number of unique values stored in the table
+	// including whether or not a null value has been passed in using GetOrInsertNull
+	Size() int
+	// CopyValues populates out with the values currently in the table, out must
+	// be a slice of the appropriate type for the table type.
+	CopyValues(out interface{})
+	// CopyValuesSubset is like CopyValues but only copies a subset of values starting
+	// at the indicated index.
+	CopyValuesSubset(start int, out interface{})
+	// Get returns the index of the table the specified value is, and a boolean indicating
+	// whether or not the value was found in the table. Will panic if val is not the appropriate
+	// type for the underlying table.
+	Get(val interface{}) (int, bool)
+	// GetOrInsert is the same as Get, except if the value is not currently in the table it will
+	// be inserted into the table.
+	GetOrInsert(val interface{}) (idx int, existed bool, err error)
+	// GetNull returns the index of the null value and whether or not it was found in the table
+	GetNull() (int, bool)
+	// GetOrInsertNull returns the index of the null value, if it didn't already exist in the table,
+	// it is inserted.
+	GetOrInsertNull() (idx int, existed bool)
+}
+
+// BinaryMemoTable is an extension of the MemoTable interface adding extra methods
+// for handling byte arrays/strings/fixed length byte arrays.
+type BinaryMemoTable interface {
+	MemoTable
+	// ValuesSize returns the total number of bytes needed to copy all of the values
+	// from this table.
+	ValuesSize() int
+	// CopyOffsets populates out with the start and end offsets of each value in the
+	// table data. Out should be sized to Size()+1 to accomodate all of the offsets.
+	CopyOffsets(out []int8)
+	// CopyOffsetsSubset is like CopyOffsets but only gets a subset of the offsets
+	// starting at the specified index.
+	CopyOffsetsSubset(start int, out []int8)
+	// CopyFixedWidthValues exists to cope with the fact that the table doesn't track
+	// the fixed width when inserting the null value into the databuffer populating
+	// a zero length byte slice for the null value (if found).
+	CopyFixedWidthValues(start int, width int, out []byte)
+	// VisitValues calls visitFn on each value in the table starting with the index specified
+	VisitValues(start int, visitFn func([]byte))
+	// Retain increases the reference count of the separately stored binary data that is
+	// kept alongside the table which contains all of the values in the table. This is
+	// safe to call simultaneously across multiple goroutines.
+	Retain()
+	// Release decreases the reference count by 1 of the separately stored binary data
+	// kept alongside the table containing the values. When the reference count goes to
+	// 0, the memory is freed. This is safe to call across multiple goroutines simultaneoulsy.
+	Release()
+}
diff --git a/go/parquet/internal/encryption/aes.go b/go/parquet/internal/encryption/aes.go
new file mode 100644
index 00000000000..3138b921f80
--- /dev/null
+++ b/go/parquet/internal/encryption/aes.go
@@ -0,0 +1,264 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package encryption contains the internal helpers for the parquet AES encryption/decryption handling.
+//
+// Testing for this is done via integration testing at the top level parquet package via attempting to
+// read and write encrypted files with different configurations to match test files in parquet-testing
+package encryption
+
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/rand"
+	"encoding/binary"
+	"io"
+
+	"github.com/apache/arrow/go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// important constants for handling the aes encryption
+const (
+	GcmTagLength = 16
+	NonceLength  = 12
+
+	gcmMode          = 0
+	ctrMode          = 1
+	ctrIVLen         = 16
+	bufferSizeLength = 4
+)
+
+// Module constants for constructing the AAD bytes, the order here is
+// important as the constants are set via iota.
+const (
+	FooterModule int8 = iota
+	ColumnMetaModule
+	DataPageModule
+	DictPageModule
+	DataPageHeaderModule
+	DictPageHeaderModule
+	ColumnIndexModule
+	OffsetIndexModule
+)
+
+type aesEncryptor struct {
+	mode                int
+	ciphertextSizeDelta int
+}
+
+// NewAesEncryptor constructs an encryptor for the passed in cipher and whether
+// or not it's being used to encrypt metadata.
+func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor {
+	ret := &aesEncryptor{}
+	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
+	if metadata || alg == parquet.AesGcm {
+		ret.mode = gcmMode
+		ret.ciphertextSizeDelta += GcmTagLength
+	} else {
+		ret.mode = ctrMode
+	}
+
+	return ret
+}
+
+// CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data
+// above and beyond the plaintext value.
+func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
+
+// SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce.
+// It returns the number of bytes that were written to w.
+func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int {
+	if a.mode != gcmMode {
+		panic("must use AES GCM (metadata) encryptor")
+	}
+
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		panic(err)
+	}
+
+	aead, err := cipher.NewGCM(block)
+	if err != nil {
+		panic(err)
+	}
+	if aead.NonceSize() != NonceLength {
+		panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
+	}
+	if aead.Overhead() != GcmTagLength {
+		panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
+	}
+
+	ciphertext := aead.Seal(nil, nonce, footer, aad)
+	bufferSize := uint32(len(ciphertext) + len(nonce))
+	// data is written with a prefix of the size written as a little endian 32bit int.
+	if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil {
+		panic(err)
+	}
+	w.Write(nonce)
+	w.Write(ciphertext)
+	return bufferSizeLength + int(bufferSize)
+}
+
+// Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w.
+// Returns the total number of bytes written.
+func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int {
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		panic(err)
+	}
+
+	nonce := make([]byte, NonceLength)
+	rand.Read(nonce)
+
+	if a.mode == gcmMode {
+		aead, err := cipher.NewGCM(block)
+		if err != nil {
+			panic(err)
+		}
+		if aead.NonceSize() != NonceLength {
+			panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
+		}
+		if aead.Overhead() != GcmTagLength {
+			panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
+		}
+
+		ciphertext := aead.Seal(nil, nonce, src, aad)
+		bufferSize := len(ciphertext) + len(nonce)
+		// data is written with a prefix of the size written as a little endian 32bit int.
+		if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
+			panic(err)
+		}
+		w.Write(nonce)
+		w.Write(ciphertext)
+		return bufferSizeLength + bufferSize
+	}
+
+	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
+	// counter field.
+	// The first 31 bits of the initial counter field are set to 0, the last bit
+	// is set to 1.
+	iv := make([]byte, ctrIVLen)
+	copy(iv, nonce)
+	iv[ctrIVLen-1] = 1
+
+	bufferSize := NonceLength + len(src)
+	// data is written with a prefix of the size written as a little endian 32bit int.
+	if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
+		panic(err)
+	}
+	w.Write(nonce)
+	cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src)
+	return bufferSizeLength + bufferSize
+}
+
+type aesDecryptor struct {
+	mode                int
+	ciphertextSizeDelta int
+}
+
+// newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or
+// not it is intended to be used for decrypting metadata.
+func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor {
+	ret := &aesDecryptor{}
+	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
+	if metadata || alg == parquet.AesGcm {
+		ret.mode = gcmMode
+		ret.ciphertextSizeDelta += GcmTagLength
+	} else {
+		ret.mode = ctrMode
+	}
+
+	return ret
+}
+
+// CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the
+// plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is
+// the length of the plaintext after decryption.
+func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
+
+// Decrypt returns the plaintext version of the given ciphertext when decrypted
+// with the provided key and AAD security bytes.
+func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte {
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		panic(err)
+	}
+
+	writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText)
+	cipherLen := writtenCiphertextLen + bufferSizeLength
+	nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength]
+
+	if a.mode == gcmMode {
+		aead, err := cipher.NewGCM(block)
+		if err != nil {
+			panic(err)
+		}
+
+		plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad)
+		if err != nil {
+			panic(err)
+		}
+		return plain
+	}
+
+	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
+	// counter field.
+	// The first 31 bits of the initial counter field are set to 0, the last bit
+	// is set to 1.
+	iv := make([]byte, ctrIVLen)
+	copy(iv, nonce)
+	iv[ctrIVLen-1] = 1
+
+	stream := cipher.NewCTR(block, iv)
+	dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength)
+	stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:])
+	return dst
+}
+
+// CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page.
+//
+// This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes.
+func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string {
+	buf := bytes.NewBuffer([]byte(fileAad))
+	buf.WriteByte(byte(moduleType))
+
+	if moduleType == FooterModule {
+		return buf.String()
+	}
+
+	binary.Write(buf, binary.LittleEndian, rowGroupOrdinal)
+	binary.Write(buf, binary.LittleEndian, columnOrdinal)
+	if DataPageModule != moduleType && DataPageHeaderModule != moduleType {
+		return buf.String()
+	}
+
+	binary.Write(buf, binary.LittleEndian, pageOrdinal)
+	return buf.String()
+}
+
+// CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting
+// and decrypting the parquet footer bytes.
+func CreateFooterAad(aadPrefix string) string {
+	return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1)
+}
+
+// QuickUpdatePageAad updates aad with the new page ordinal, modifying the
+// last two bytes of aad.
+func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) {
+	binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal))
+}
diff --git a/go/parquet/internal/encryption/decryptor.go b/go/parquet/internal/encryption/decryptor.go
new file mode 100644
index 00000000000..9a427a75605
--- /dev/null
+++ b/go/parquet/internal/encryption/decryptor.go
@@ -0,0 +1,261 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+)
+
+// FileDecryptor is an interface used by the filereader for decrypting an
+// entire parquet file as we go, usually constructed from the DecryptionProperties
+type FileDecryptor interface {
+	// Returns the key for decrypting the footer if provided
+	GetFooterKey() string
+	// Provides the file level AAD security bytes
+	FileAad() string
+	// return which algorithm this decryptor was constructed for
+	Algorithm() parquet.Cipher
+	// return the FileDecryptionProperties that were used for this decryptor
+	Properties() *parquet.FileDecryptionProperties
+	// Clear out the decryption keys, this is automatically called after every
+	// successfully decrypted file to ensure that keys aren't kept around.
+	WipeOutDecryptionKeys()
+	// GetFooterDecryptor returns a Decryptor interface for use to decrypt the footer
+	// of a parquet file.
+	GetFooterDecryptor() Decryptor
+	// GetFooterDecryptorForColumnMeta returns a Decryptor interface for Column Metadata
+	// in the file footer using the AAD bytes provided.
+	GetFooterDecryptorForColumnMeta(aad string) Decryptor
+	// GetFooterDecryptorForColumnData returns the decryptor that can be used for decrypting
+	// actual column data footer bytes, not column metadata.
+	GetFooterDecryptorForColumnData(aad string) Decryptor
+	// GetColumnMetaDecryptor returns a decryptor for the requested column path, key and AAD bytes
+	// but only for decrypting the row group level metadata
+	GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
+	// GetColumnDataDecryptor returns a decryptor for the requested column path, key, and AAD bytes
+	// but only for the rowgroup column data.
+	GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
+}
+
+type fileDecryptor struct {
+	// the properties contains the key retriever for us to get keys
+	// from the key metadata
+	props *parquet.FileDecryptionProperties
+	// concatenation of aad_prefix (if exists) and aad_file_unique
+	fileAad                 string
+	columnDataMap           map[string]Decryptor
+	columnMetaDataMap       map[string]Decryptor
+	footerMetadataDecryptor Decryptor
+	footerDataDecryptor     Decryptor
+	alg                     parquet.Cipher
+	footerKeyMetadata       string
+	metaDecryptor           *aesDecryptor
+	dataDecryptor           *aesDecryptor
+	mem                     memory.Allocator
+}
+
+// NewFileDecryptor constructs a decryptor from the provided configuration of properties, cipher and key metadata. Using the provided memory allocator or
+// the default allocator if one isn't provided.
+func NewFileDecryptor(props *parquet.FileDecryptionProperties, fileAad string, alg parquet.Cipher, keymetadata string, mem memory.Allocator) FileDecryptor {
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+	return &fileDecryptor{
+		fileAad:           fileAad,
+		props:             props,
+		alg:               alg,
+		footerKeyMetadata: keymetadata,
+		mem:               mem,
+		columnDataMap:     make(map[string]Decryptor),
+		columnMetaDataMap: make(map[string]Decryptor),
+	}
+}
+
+func (d *fileDecryptor) FileAad() string                               { return d.fileAad }
+func (d *fileDecryptor) Properties() *parquet.FileDecryptionProperties { return d.props }
+func (d *fileDecryptor) Algorithm() parquet.Cipher                     { return d.alg }
+func (d *fileDecryptor) GetFooterKey() string {
+	footerKey := d.props.FooterKey()
+	if footerKey == "" {
+		if d.footerKeyMetadata == "" {
+			panic("no footer key or key metadata")
+		}
+		if d.props.KeyRetriever == nil {
+			panic("no footer key or key retriever")
+		}
+		footerKey = d.props.KeyRetriever.GetKey([]byte(d.footerKeyMetadata))
+	}
+	if footerKey == "" {
+		panic("invalid footer encryption key. Could not parse footer metadata")
+	}
+	return footerKey
+}
+
+func (d *fileDecryptor) GetFooterDecryptor() Decryptor {
+	aad := CreateFooterAad(d.fileAad)
+	return d.getFooterDecryptor(aad, true)
+}
+
+func (d *fileDecryptor) GetFooterDecryptorForColumnMeta(aad string) Decryptor {
+	return d.getFooterDecryptor(aad, true)
+}
+
+func (d *fileDecryptor) GetFooterDecryptorForColumnData(aad string) Decryptor {
+	return d.getFooterDecryptor(aad, false)
+}
+
+func (d *fileDecryptor) GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
+	return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, true)
+}
+
+func (d *fileDecryptor) GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
+	return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, false)
+}
+
+func (d *fileDecryptor) WipeOutDecryptionKeys() {
+	d.props.WipeOutDecryptionKeys()
+}
+
+func (d *fileDecryptor) getFooterDecryptor(aad string, metadata bool) Decryptor {
+	if metadata {
+		if d.footerMetadataDecryptor != nil {
+			return d.footerMetadataDecryptor
+		}
+	} else {
+		if d.footerDataDecryptor != nil {
+			return d.footerDataDecryptor
+		}
+	}
+
+	footerKey := d.GetFooterKey()
+
+	// Create both data and metadata decryptors to avoid redundant retrieval of key
+	// from the key_retriever.
+	aesMetaDecrypt := d.getMetaAesDecryptor()
+	aesDataDecrypt := d.getDataAesDecryptor()
+
+	d.footerMetadataDecryptor = &decryptor{
+		decryptor: aesMetaDecrypt,
+		key:       []byte(footerKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+	d.footerDataDecryptor = &decryptor{
+		decryptor: aesDataDecrypt,
+		key:       []byte(footerKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+
+	if metadata {
+		return d.footerMetadataDecryptor
+	}
+	return d.footerDataDecryptor
+}
+
+func (d *fileDecryptor) getColumnDecryptor(columnPath, columnMeta, aad string, metadata bool) Decryptor {
+	if metadata {
+		if res, ok := d.columnMetaDataMap[columnPath]; ok {
+			res.UpdateAad(aad)
+			return res
+		}
+	} else {
+		if res, ok := d.columnDataMap[columnPath]; ok {
+			res.UpdateAad(aad)
+			return res
+		}
+	}
+
+	columnKey := d.props.ColumnKey(columnPath)
+	// No explicit column key given via API. Retrieve via key metadata.
+	if columnKey == "" && columnMeta != "" && d.props.KeyRetriever != nil {
+		columnKey = d.props.KeyRetriever.GetKey([]byte(columnMeta))
+	}
+	if columnKey == "" {
+		panic("hidden column exception, path=" + columnPath)
+	}
+
+	aesDataDecrypt := d.getDataAesDecryptor()
+	aesMetaDecrypt := d.getMetaAesDecryptor()
+
+	d.columnDataMap[columnPath] = &decryptor{
+		decryptor: aesDataDecrypt,
+		key:       []byte(columnKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+	d.columnMetaDataMap[columnPath] = &decryptor{
+		decryptor: aesMetaDecrypt,
+		key:       []byte(columnKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+
+	if metadata {
+		return d.columnMetaDataMap[columnPath]
+	}
+	return d.columnDataMap[columnPath]
+}
+
+func (d *fileDecryptor) getMetaAesDecryptor() *aesDecryptor {
+	if d.metaDecryptor == nil {
+		d.metaDecryptor = newAesDecryptor(d.alg, true)
+	}
+	return d.metaDecryptor
+}
+
+func (d *fileDecryptor) getDataAesDecryptor() *aesDecryptor {
+	if d.dataDecryptor == nil {
+		d.dataDecryptor = newAesDecryptor(d.alg, false)
+	}
+	return d.dataDecryptor
+}
+
+// Decryptor is the basic interface for any decryptor generated from a FileDecryptor
+type Decryptor interface {
+	// returns the File Level AAD bytes
+	FileAad() string
+	// returns the current allocator that was used for any extra allocations of buffers
+	Allocator() memory.Allocator
+	// returns the CiphertextSizeDelta from the decryptor
+	CiphertextSizeDelta() int
+	// Decrypt just returns the decrypted plaintext from the src ciphertext
+	Decrypt(src []byte) []byte
+	// set the AAD bytes of the decryptor to the provided string
+	UpdateAad(string)
+}
+
+type decryptor struct {
+	decryptor *aesDecryptor
+	key       []byte
+	fileAad   []byte
+	aad       []byte
+	mem       memory.Allocator
+}
+
+func (d *decryptor) Allocator() memory.Allocator { return d.mem }
+func (d *decryptor) FileAad() string             { return string(d.fileAad) }
+func (d *decryptor) UpdateAad(aad string)        { d.aad = []byte(aad) }
+func (d *decryptor) CiphertextSizeDelta() int    { return d.decryptor.CiphertextSizeDelta() }
+func (d *decryptor) Decrypt(src []byte) []byte {
+	return d.decryptor.Decrypt(src, d.key, d.aad)
+}
diff --git a/go/parquet/internal/encryption/encryptor.go b/go/parquet/internal/encryption/encryptor.go
new file mode 100644
index 00000000000..dda5c186a81
--- /dev/null
+++ b/go/parquet/internal/encryption/encryptor.go
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+	"io"
+
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+)
+
+// FileEncryptor is the interface for constructing encryptors for the different
+// sections of a parquet file.
+type FileEncryptor interface {
+	// GetFooterEncryptor returns an encryptor for the footer metadata
+	GetFooterEncryptor() Encryptor
+	// GetFooterSigningEncryptor returns an encryptor for creating the signature
+	// for the footer as opposed to encrypting the footer bytes directly.
+	GetFooterSigningEncryptor() Encryptor
+	// GetColumnMetaEncryptor returns an encryptor for the metadata only of the requested
+	// column path string.
+	GetColumnMetaEncryptor(columnPath string) Encryptor
+	// GetColumnDataEncryptor returns an encryptor for the column data ONLY of
+	// the requested column path string.
+	GetColumnDataEncryptor(columnPath string) Encryptor
+	// WipeOutEncryptionKeys deletes the keys that were used for encryption,
+	// called after every successfully encrypted file to ensure against accidental
+	// key re-use.
+	WipeOutEncryptionKeys()
+}
+
+type fileEncryptor struct {
+	props                  *parquet.FileEncryptionProperties
+	columnDataMap          map[string]Encryptor
+	columnMetaDataMap      map[string]Encryptor
+	footerSigningEncryptor Encryptor
+	footerEncryptor        Encryptor
+
+	// Key must be 16, 24, or 32 bytes in length thus there could be up to
+	// three types of meta_encryptors and data_encryptors
+	metaEncryptor *aesEncryptor
+	dataEncryptor *aesEncryptor
+
+	mem memory.Allocator
+}
+
+// NewFileEncryptor returns a new encryptor using the given encryption properties.
+//
+// Panics if the properties passed have already been used to construct an encryptor
+// ie: props.IsUtilized returns true. If mem is nil, will default to memory.DefaultAllocator
+func NewFileEncryptor(props *parquet.FileEncryptionProperties, mem memory.Allocator) FileEncryptor {
+	if props.IsUtilized() {
+		panic("re-using encryption properties for another file")
+	}
+
+	props.SetUtilized()
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+
+	return &fileEncryptor{
+		props:             props,
+		mem:               mem,
+		columnDataMap:     make(map[string]Encryptor),
+		columnMetaDataMap: make(map[string]Encryptor),
+	}
+}
+
+func (e *fileEncryptor) WipeOutEncryptionKeys() {
+	e.props.WipeOutEncryptionKeys()
+}
+
+func (e *fileEncryptor) GetFooterEncryptor() Encryptor {
+	if e.footerEncryptor == nil {
+		alg := e.props.Algorithm().Algo
+		footerAad := CreateFooterAad(e.props.FileAad())
+		footerKey := e.props.FooterKey()
+		enc := e.getMetaAesEncryptor(alg)
+		e.footerEncryptor = &encryptor{
+			aesEncryptor: enc,
+			key:          []byte(footerKey),
+			fileAad:      e.props.FileAad(),
+			aad:          footerAad,
+			mem:          e.mem,
+		}
+	}
+	return e.footerEncryptor
+}
+
+func (e *fileEncryptor) GetFooterSigningEncryptor() Encryptor {
+	if e.footerSigningEncryptor == nil {
+		alg := e.props.Algorithm().Algo
+		footerAad := CreateFooterAad(e.props.FileAad())
+		footerKey := e.props.FooterKey()
+		enc := e.getMetaAesEncryptor(alg)
+		e.footerSigningEncryptor = &encryptor{
+			aesEncryptor: enc,
+			key:          []byte(footerKey),
+			fileAad:      e.props.FileAad(),
+			aad:          footerAad,
+			mem:          e.mem,
+		}
+	}
+	return e.footerSigningEncryptor
+}
+
+func (e *fileEncryptor) getMetaAesEncryptor(alg parquet.Cipher) *aesEncryptor {
+	if e.metaEncryptor == nil {
+		e.metaEncryptor = NewAesEncryptor(alg, true)
+	}
+	return e.metaEncryptor
+}
+
+func (e *fileEncryptor) getDataAesEncryptor(alg parquet.Cipher) *aesEncryptor {
+	if e.dataEncryptor == nil {
+		e.dataEncryptor = NewAesEncryptor(alg, false)
+	}
+	return e.dataEncryptor
+}
+
+func (e *fileEncryptor) GetColumnMetaEncryptor(columnPath string) Encryptor {
+	return e.getColumnEncryptor(columnPath, true)
+}
+
+func (e *fileEncryptor) GetColumnDataEncryptor(columnPath string) Encryptor {
+	return e.getColumnEncryptor(columnPath, false)
+}
+
+func (e *fileEncryptor) getColumnEncryptor(columnPath string, metadata bool) Encryptor {
+	if metadata {
+		if enc, ok := e.columnMetaDataMap[columnPath]; ok {
+			return enc
+		}
+	} else {
+		if enc, ok := e.columnDataMap[columnPath]; ok {
+			return enc
+		}
+	}
+
+	columnProp := e.props.ColumnEncryptionProperties(columnPath)
+	if columnProp == nil {
+		return nil
+	}
+
+	var key string
+	if columnProp.IsEncryptedWithFooterKey() {
+		key = e.props.FooterKey()
+	} else {
+		key = columnProp.Key()
+	}
+
+	alg := e.props.Algorithm().Algo
+	var enc *aesEncryptor
+	if metadata {
+		enc = e.getMetaAesEncryptor(alg)
+	} else {
+		enc = e.getDataAesEncryptor(alg)
+	}
+
+	fileAad := e.props.FileAad()
+	ret := &encryptor{
+		aesEncryptor: enc,
+		key:          []byte(key),
+		fileAad:      fileAad,
+		aad:          "",
+		mem:          e.mem,
+	}
+	if metadata {
+		e.columnMetaDataMap[columnPath] = ret
+	} else {
+		e.columnDataMap[columnPath] = ret
+	}
+	return ret
+}
+
+// Encryptor is the basic interface for encryptors, for now there's only the single
+// aes encryptor implementation, but having it as an interface allows easy addition
+// manipulation of encryptor implementations in the future.
+type Encryptor interface {
+	// FileAad returns the file level AAD bytes for this encryptor
+	FileAad() string
+	// UpdateAad sets the aad bytes for encryption to the provided string
+	UpdateAad(string)
+	// Allocator returns the allocator that was used to construct the encryptor
+	Allocator() memory.Allocator
+	// CiphertextSizeDelta returns the extra bytes that will be added to the ciphertext
+	// for a total size of len(plaintext) + CiphertextSizeDelta bytes
+	CiphertextSizeDelta() int
+	// Encrypt writes the encrypted ciphertext for src to w and returns the total
+	// number of bytes written.
+	Encrypt(w io.Writer, src []byte) int
+	// EncryptColumnMetaData returns true if the column metadata should be encrypted based on the
+	// column encryption settings and footer encryption setting.
+	EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool
+}
+
+type encryptor struct {
+	aesEncryptor *aesEncryptor
+	key          []byte
+	fileAad      string
+	aad          string
+	mem          memory.Allocator
+}
+
+func (e *encryptor) FileAad() string             { return e.fileAad }
+func (e *encryptor) UpdateAad(aad string)        { e.aad = aad }
+func (e *encryptor) Allocator() memory.Allocator { return e.mem }
+func (e *encryptor) CiphertextSizeDelta() int    { return e.aesEncryptor.CiphertextSizeDelta() }
+
+func (e *encryptor) EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool {
+	if properties == nil || !properties.IsEncrypted() {
+		return false
+	}
+	if !encryptFooter {
+		return false
+	}
+	// if not encrypted with footer key then encrypt the metadata
+	return !properties.IsEncryptedWithFooterKey()
+}
+
+func (e *encryptor) Encrypt(w io.Writer, src []byte) int {
+	return e.aesEncryptor.Encrypt(w, src, e.key, []byte(e.aad))
+}
diff --git a/go/parquet/internal/encryption/key_handling.go b/go/parquet/internal/encryption/key_handling.go
new file mode 100644
index 00000000000..b3c36c497ca
--- /dev/null
+++ b/go/parquet/internal/encryption/key_handling.go
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+	"encoding/binary"
+	"unsafe"
+
+	"golang.org/x/xerrors"
+)
+
+// StringKeyIDRetriever implements the KeyRetriever interface GetKey
+// to allow setting in keys with a string id.
+type StringKeyIDRetriever map[string]string
+
+// PutKey adds a key with the given string ID that can be retrieved
+func (s StringKeyIDRetriever) PutKey(keyID, key string) {
+	s[keyID] = key
+}
+
+// GetKey expects the keymetadata to match one of the keys that were added
+// with PutKey and panics if the key cannot be found.
+func (s StringKeyIDRetriever) GetKey(keyMetadata []byte) string {
+	k, ok := s[*(*string)(unsafe.Pointer(&keyMetadata))]
+	if !ok {
+		panic(xerrors.Errorf("parquet: key missing for id %s", keyMetadata))
+	}
+	return k
+}
+
+// IntegerKeyIDRetriever is used for using unsigned 32bit integers as key ids.
+type IntegerKeyIDRetriever map[uint32]string
+
+// PutKey adds keys with uint32 IDs
+func (i IntegerKeyIDRetriever) PutKey(keyID uint32, key string) {
+	i[keyID] = key
+}
+
+// GetKey expects the key metadata bytes to be a little endian uint32 which
+// is then used to retrieve the key bytes. Panics if the key id cannot be found.
+func (i IntegerKeyIDRetriever) GetKey(keyMetadata []byte) string {
+	keyID := binary.LittleEndian.Uint32(keyMetadata)
+	k, ok := i[keyID]
+	if !ok {
+		panic(xerrors.Errorf("parquet: key missing for id %d", keyID))
+	}
+	return k
+}
diff --git a/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
new file mode 100644
index 00000000000..b72118e443e
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
@@ -0,0 +1,6 @@
+// Code generated by Thrift Compiler (0.14.0). DO NOT EDIT.
+
+package parquet
+
+var GoUnusedProtection__ int;
+
diff --git a/go/parquet/internal/gen-go/parquet/parquet-consts.go b/go/parquet/internal/gen-go/parquet/parquet-consts.go
new file mode 100644
index 00000000000..8de3a86d224
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/parquet-consts.go
@@ -0,0 +1,23 @@
+// Code generated by Thrift Compiler (0.14.0). DO NOT EDIT.
+
+package parquet
+
+import(
+	"bytes"
+	"context"
+	"fmt"
+	"time"
+	"github.com/apache/thrift/lib/go/thrift"
+)
+
+// (needed to ensure safety because of naive import list construction.)
+var _ = thrift.ZERO
+var _ = fmt.Printf
+var _ = context.Background
+var _ = time.Now
+var _ = bytes.Equal
+
+
+func init() {
+}
+
diff --git a/go/parquet/internal/gen-go/parquet/parquet.go b/go/parquet/internal/gen-go/parquet/parquet.go
new file mode 100644
index 00000000000..997b6ab91f3
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/parquet.go
@@ -0,0 +1,10961 @@
+// Code generated by Thrift Compiler (0.14.0). DO NOT EDIT.
+
+package parquet
+
+import(
+	"bytes"
+	"context"
+	"database/sql/driver"
+	"errors"
+	"fmt"
+	"time"
+	"github.com/apache/thrift/lib/go/thrift"
+)
+
+// (needed to ensure safety because of naive import list construction.)
+var _ = thrift.ZERO
+var _ = fmt.Printf
+var _ = context.Background
+var _ = time.Now
+var _ = bytes.Equal
+
+//Types supported by Parquet.  These types are intended to be used in combination
+//with the encodings to control the on disk storage format.
+//For example INT16 is not included as a type since a good encoding of INT32
+//would handle this.
+type Type int64
+const (
+  Type_BOOLEAN Type = 0
+  Type_INT32 Type = 1
+  Type_INT64 Type = 2
+  Type_INT96 Type = 3
+  Type_FLOAT Type = 4
+  Type_DOUBLE Type = 5
+  Type_BYTE_ARRAY Type = 6
+  Type_FIXED_LEN_BYTE_ARRAY Type = 7
+)
+
+func (p Type) String() string {
+  switch p {
+  case Type_BOOLEAN: return "BOOLEAN"
+  case Type_INT32: return "INT32"
+  case Type_INT64: return "INT64"
+  case Type_INT96: return "INT96"
+  case Type_FLOAT: return "FLOAT"
+  case Type_DOUBLE: return "DOUBLE"
+  case Type_BYTE_ARRAY: return "BYTE_ARRAY"
+  case Type_FIXED_LEN_BYTE_ARRAY: return "FIXED_LEN_BYTE_ARRAY"
+  }
+  return "<UNSET>"
+}
+
+func TypeFromString(s string) (Type, error) {
+  switch s {
+  case "BOOLEAN": return Type_BOOLEAN, nil 
+  case "INT32": return Type_INT32, nil 
+  case "INT64": return Type_INT64, nil 
+  case "INT96": return Type_INT96, nil 
+  case "FLOAT": return Type_FLOAT, nil 
+  case "DOUBLE": return Type_DOUBLE, nil 
+  case "BYTE_ARRAY": return Type_BYTE_ARRAY, nil 
+  case "FIXED_LEN_BYTE_ARRAY": return Type_FIXED_LEN_BYTE_ARRAY, nil 
+  }
+  return Type(0), fmt.Errorf("not a valid Type string")
+}
+
+
+func TypePtr(v Type) *Type { return &v }
+
+func (p Type) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *Type) UnmarshalText(text []byte) error {
+q, err := TypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *Type) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = Type(v)
+return nil
+}
+
+func (p * Type) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Common types used by frameworks(e.g. hive, pig) using parquet.  This helps map
+//between types in those frameworks to the base types in parquet.  This is only
+//metadata and not needed to read or write the data.
+type ConvertedType int64
+const (
+  ConvertedType_UTF8 ConvertedType = 0
+  ConvertedType_MAP ConvertedType = 1
+  ConvertedType_MAP_KEY_VALUE ConvertedType = 2
+  ConvertedType_LIST ConvertedType = 3
+  ConvertedType_ENUM ConvertedType = 4
+  ConvertedType_DECIMAL ConvertedType = 5
+  ConvertedType_DATE ConvertedType = 6
+  ConvertedType_TIME_MILLIS ConvertedType = 7
+  ConvertedType_TIME_MICROS ConvertedType = 8
+  ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9
+  ConvertedType_TIMESTAMP_MICROS ConvertedType = 10
+  ConvertedType_UINT_8 ConvertedType = 11
+  ConvertedType_UINT_16 ConvertedType = 12
+  ConvertedType_UINT_32 ConvertedType = 13
+  ConvertedType_UINT_64 ConvertedType = 14
+  ConvertedType_INT_8 ConvertedType = 15
+  ConvertedType_INT_16 ConvertedType = 16
+  ConvertedType_INT_32 ConvertedType = 17
+  ConvertedType_INT_64 ConvertedType = 18
+  ConvertedType_JSON ConvertedType = 19
+  ConvertedType_BSON ConvertedType = 20
+  ConvertedType_INTERVAL ConvertedType = 21
+)
+
+func (p ConvertedType) String() string {
+  switch p {
+  case ConvertedType_UTF8: return "UTF8"
+  case ConvertedType_MAP: return "MAP"
+  case ConvertedType_MAP_KEY_VALUE: return "MAP_KEY_VALUE"
+  case ConvertedType_LIST: return "LIST"
+  case ConvertedType_ENUM: return "ENUM"
+  case ConvertedType_DECIMAL: return "DECIMAL"
+  case ConvertedType_DATE: return "DATE"
+  case ConvertedType_TIME_MILLIS: return "TIME_MILLIS"
+  case ConvertedType_TIME_MICROS: return "TIME_MICROS"
+  case ConvertedType_TIMESTAMP_MILLIS: return "TIMESTAMP_MILLIS"
+  case ConvertedType_TIMESTAMP_MICROS: return "TIMESTAMP_MICROS"
+  case ConvertedType_UINT_8: return "UINT_8"
+  case ConvertedType_UINT_16: return "UINT_16"
+  case ConvertedType_UINT_32: return "UINT_32"
+  case ConvertedType_UINT_64: return "UINT_64"
+  case ConvertedType_INT_8: return "INT_8"
+  case ConvertedType_INT_16: return "INT_16"
+  case ConvertedType_INT_32: return "INT_32"
+  case ConvertedType_INT_64: return "INT_64"
+  case ConvertedType_JSON: return "JSON"
+  case ConvertedType_BSON: return "BSON"
+  case ConvertedType_INTERVAL: return "INTERVAL"
+  }
+  return "<UNSET>"
+}
+
+func ConvertedTypeFromString(s string) (ConvertedType, error) {
+  switch s {
+  case "UTF8": return ConvertedType_UTF8, nil 
+  case "MAP": return ConvertedType_MAP, nil 
+  case "MAP_KEY_VALUE": return ConvertedType_MAP_KEY_VALUE, nil 
+  case "LIST": return ConvertedType_LIST, nil 
+  case "ENUM": return ConvertedType_ENUM, nil 
+  case "DECIMAL": return ConvertedType_DECIMAL, nil 
+  case "DATE": return ConvertedType_DATE, nil 
+  case "TIME_MILLIS": return ConvertedType_TIME_MILLIS, nil 
+  case "TIME_MICROS": return ConvertedType_TIME_MICROS, nil 
+  case "TIMESTAMP_MILLIS": return ConvertedType_TIMESTAMP_MILLIS, nil 
+  case "TIMESTAMP_MICROS": return ConvertedType_TIMESTAMP_MICROS, nil 
+  case "UINT_8": return ConvertedType_UINT_8, nil 
+  case "UINT_16": return ConvertedType_UINT_16, nil 
+  case "UINT_32": return ConvertedType_UINT_32, nil 
+  case "UINT_64": return ConvertedType_UINT_64, nil 
+  case "INT_8": return ConvertedType_INT_8, nil 
+  case "INT_16": return ConvertedType_INT_16, nil 
+  case "INT_32": return ConvertedType_INT_32, nil 
+  case "INT_64": return ConvertedType_INT_64, nil 
+  case "JSON": return ConvertedType_JSON, nil 
+  case "BSON": return ConvertedType_BSON, nil 
+  case "INTERVAL": return ConvertedType_INTERVAL, nil 
+  }
+  return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string")
+}
+
+
+func ConvertedTypePtr(v ConvertedType) *ConvertedType { return &v }
+
+func (p ConvertedType) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *ConvertedType) UnmarshalText(text []byte) error {
+q, err := ConvertedTypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *ConvertedType) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = ConvertedType(v)
+return nil
+}
+
+func (p * ConvertedType) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Representation of Schemas
+type FieldRepetitionType int64
+const (
+  FieldRepetitionType_REQUIRED FieldRepetitionType = 0
+  FieldRepetitionType_OPTIONAL FieldRepetitionType = 1
+  FieldRepetitionType_REPEATED FieldRepetitionType = 2
+)
+
+func (p FieldRepetitionType) String() string {
+  switch p {
+  case FieldRepetitionType_REQUIRED: return "REQUIRED"
+  case FieldRepetitionType_OPTIONAL: return "OPTIONAL"
+  case FieldRepetitionType_REPEATED: return "REPEATED"
+  }
+  return "<UNSET>"
+}
+
+func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error) {
+  switch s {
+  case "REQUIRED": return FieldRepetitionType_REQUIRED, nil 
+  case "OPTIONAL": return FieldRepetitionType_OPTIONAL, nil 
+  case "REPEATED": return FieldRepetitionType_REPEATED, nil 
+  }
+  return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string")
+}
+
+
+func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType { return &v }
+
+func (p FieldRepetitionType) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *FieldRepetitionType) UnmarshalText(text []byte) error {
+q, err := FieldRepetitionTypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *FieldRepetitionType) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = FieldRepetitionType(v)
+return nil
+}
+
+func (p * FieldRepetitionType) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Encodings supported by Parquet.  Not all encodings are valid for all types.  These
+//enums are also used to specify the encoding of definition and repetition levels.
+//See the accompanying doc for the details of the more complicated encodings.
+type Encoding int64
+const (
+  Encoding_PLAIN Encoding = 0
+  Encoding_PLAIN_DICTIONARY Encoding = 2
+  Encoding_RLE Encoding = 3
+  Encoding_BIT_PACKED Encoding = 4
+  Encoding_DELTA_BINARY_PACKED Encoding = 5
+  Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6
+  Encoding_DELTA_BYTE_ARRAY Encoding = 7
+  Encoding_RLE_DICTIONARY Encoding = 8
+  Encoding_BYTE_STREAM_SPLIT Encoding = 9
+)
+
+func (p Encoding) String() string {
+  switch p {
+  case Encoding_PLAIN: return "PLAIN"
+  case Encoding_PLAIN_DICTIONARY: return "PLAIN_DICTIONARY"
+  case Encoding_RLE: return "RLE"
+  case Encoding_BIT_PACKED: return "BIT_PACKED"
+  case Encoding_DELTA_BINARY_PACKED: return "DELTA_BINARY_PACKED"
+  case Encoding_DELTA_LENGTH_BYTE_ARRAY: return "DELTA_LENGTH_BYTE_ARRAY"
+  case Encoding_DELTA_BYTE_ARRAY: return "DELTA_BYTE_ARRAY"
+  case Encoding_RLE_DICTIONARY: return "RLE_DICTIONARY"
+  case Encoding_BYTE_STREAM_SPLIT: return "BYTE_STREAM_SPLIT"
+  }
+  return "<UNSET>"
+}
+
+func EncodingFromString(s string) (Encoding, error) {
+  switch s {
+  case "PLAIN": return Encoding_PLAIN, nil 
+  case "PLAIN_DICTIONARY": return Encoding_PLAIN_DICTIONARY, nil 
+  case "RLE": return Encoding_RLE, nil 
+  case "BIT_PACKED": return Encoding_BIT_PACKED, nil 
+  case "DELTA_BINARY_PACKED": return Encoding_DELTA_BINARY_PACKED, nil 
+  case "DELTA_LENGTH_BYTE_ARRAY": return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil 
+  case "DELTA_BYTE_ARRAY": return Encoding_DELTA_BYTE_ARRAY, nil 
+  case "RLE_DICTIONARY": return Encoding_RLE_DICTIONARY, nil 
+  case "BYTE_STREAM_SPLIT": return Encoding_BYTE_STREAM_SPLIT, nil 
+  }
+  return Encoding(0), fmt.Errorf("not a valid Encoding string")
+}
+
+
+func EncodingPtr(v Encoding) *Encoding { return &v }
+
+func (p Encoding) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *Encoding) UnmarshalText(text []byte) error {
+q, err := EncodingFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *Encoding) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = Encoding(v)
+return nil
+}
+
+func (p * Encoding) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Supported compression algorithms.
+//
+//Codecs added in 2.4 can be read by readers based on 2.4 and later.
+//Codec support may vary between readers based on the format version and
+//libraries available at runtime. Gzip, Snappy, and LZ4 codecs are
+//widely available, while Zstd and Brotli require additional libraries.
+type CompressionCodec int64
+const (
+  CompressionCodec_UNCOMPRESSED CompressionCodec = 0
+  CompressionCodec_SNAPPY CompressionCodec = 1
+  CompressionCodec_GZIP CompressionCodec = 2
+  CompressionCodec_LZO CompressionCodec = 3
+  CompressionCodec_BROTLI CompressionCodec = 4
+  CompressionCodec_LZ4 CompressionCodec = 5
+  CompressionCodec_ZSTD CompressionCodec = 6
+)
+
+func (p CompressionCodec) String() string {
+  switch p {
+  case CompressionCodec_UNCOMPRESSED: return "UNCOMPRESSED"
+  case CompressionCodec_SNAPPY: return "SNAPPY"
+  case CompressionCodec_GZIP: return "GZIP"
+  case CompressionCodec_LZO: return "LZO"
+  case CompressionCodec_BROTLI: return "BROTLI"
+  case CompressionCodec_LZ4: return "LZ4"
+  case CompressionCodec_ZSTD: return "ZSTD"
+  }
+  return "<UNSET>"
+}
+
+func CompressionCodecFromString(s string) (CompressionCodec, error) {
+  switch s {
+  case "UNCOMPRESSED": return CompressionCodec_UNCOMPRESSED, nil 
+  case "SNAPPY": return CompressionCodec_SNAPPY, nil 
+  case "GZIP": return CompressionCodec_GZIP, nil 
+  case "LZO": return CompressionCodec_LZO, nil 
+  case "BROTLI": return CompressionCodec_BROTLI, nil 
+  case "LZ4": return CompressionCodec_LZ4, nil 
+  case "ZSTD": return CompressionCodec_ZSTD, nil 
+  }
+  return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string")
+}
+
+
+func CompressionCodecPtr(v CompressionCodec) *CompressionCodec { return &v }
+
+func (p CompressionCodec) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *CompressionCodec) UnmarshalText(text []byte) error {
+q, err := CompressionCodecFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *CompressionCodec) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = CompressionCodec(v)
+return nil
+}
+
+func (p * CompressionCodec) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+type PageType int64
+const (
+  PageType_DATA_PAGE PageType = 0
+  PageType_INDEX_PAGE PageType = 1
+  PageType_DICTIONARY_PAGE PageType = 2
+  PageType_DATA_PAGE_V2 PageType = 3
+)
+
+func (p PageType) String() string {
+  switch p {
+  case PageType_DATA_PAGE: return "DATA_PAGE"
+  case PageType_INDEX_PAGE: return "INDEX_PAGE"
+  case PageType_DICTIONARY_PAGE: return "DICTIONARY_PAGE"
+  case PageType_DATA_PAGE_V2: return "DATA_PAGE_V2"
+  }
+  return "<UNSET>"
+}
+
+func PageTypeFromString(s string) (PageType, error) {
+  switch s {
+  case "DATA_PAGE": return PageType_DATA_PAGE, nil 
+  case "INDEX_PAGE": return PageType_INDEX_PAGE, nil 
+  case "DICTIONARY_PAGE": return PageType_DICTIONARY_PAGE, nil 
+  case "DATA_PAGE_V2": return PageType_DATA_PAGE_V2, nil 
+  }
+  return PageType(0), fmt.Errorf("not a valid PageType string")
+}
+
+
+func PageTypePtr(v PageType) *PageType { return &v }
+
+func (p PageType) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *PageType) UnmarshalText(text []byte) error {
+q, err := PageTypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *PageType) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = PageType(v)
+return nil
+}
+
+func (p * PageType) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Enum to annotate whether lists of min/max elements inside ColumnIndex
+//are ordered and if so, in which direction.
+type BoundaryOrder int64
+const (
+  BoundaryOrder_UNORDERED BoundaryOrder = 0
+  BoundaryOrder_ASCENDING BoundaryOrder = 1
+  BoundaryOrder_DESCENDING BoundaryOrder = 2
+)
+
+func (p BoundaryOrder) String() string {
+  switch p {
+  case BoundaryOrder_UNORDERED: return "UNORDERED"
+  case BoundaryOrder_ASCENDING: return "ASCENDING"
+  case BoundaryOrder_DESCENDING: return "DESCENDING"
+  }
+  return "<UNSET>"
+}
+
+func BoundaryOrderFromString(s string) (BoundaryOrder, error) {
+  switch s {
+  case "UNORDERED": return BoundaryOrder_UNORDERED, nil 
+  case "ASCENDING": return BoundaryOrder_ASCENDING, nil 
+  case "DESCENDING": return BoundaryOrder_DESCENDING, nil 
+  }
+  return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string")
+}
+
+
+func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder { return &v }
+
+func (p BoundaryOrder) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *BoundaryOrder) UnmarshalText(text []byte) error {
+q, err := BoundaryOrderFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *BoundaryOrder) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = BoundaryOrder(v)
+return nil
+}
+
+func (p * BoundaryOrder) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+// Statistics per row group and per page
+// All fields are optional.
+// 
+// Attributes:
+//  - Max: DEPRECATED: min and max value of the column. Use min_value and max_value.
+// 
+// Values are encoded using PLAIN encoding, except that variable-length byte
+// arrays do not include a length prefix.
+// 
+// These fields encode min and max values determined by signed comparison
+// only. New files should use the correct order for a column's logical type
+// and store the values in the min_value and max_value fields.
+// 
+// To support older readers, these may be set when the column order is
+// signed.
+//  - Min
+//  - NullCount: count of null value in the column
+//  - DistinctCount: count of distinct values occurring
+//  - MaxValue: Min and max values for the column, determined by its ColumnOrder.
+// 
+// Values are encoded using PLAIN encoding, except that variable-length byte
+// arrays do not include a length prefix.
+//  - MinValue
+type Statistics struct {
+  Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"`
+  Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"`
+  NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"`
+  DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"`
+  MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"`
+  MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"`
+}
+
+func NewStatistics() *Statistics {
+  return &Statistics{}
+}
+
+var Statistics_Max_DEFAULT []byte
+
+func (p *Statistics) GetMax() []byte {
+  return p.Max
+}
+var Statistics_Min_DEFAULT []byte
+
+func (p *Statistics) GetMin() []byte {
+  return p.Min
+}
+var Statistics_NullCount_DEFAULT int64
+func (p *Statistics) GetNullCount() int64 {
+  if !p.IsSetNullCount() {
+    return Statistics_NullCount_DEFAULT
+  }
+return *p.NullCount
+}
+var Statistics_DistinctCount_DEFAULT int64
+func (p *Statistics) GetDistinctCount() int64 {
+  if !p.IsSetDistinctCount() {
+    return Statistics_DistinctCount_DEFAULT
+  }
+return *p.DistinctCount
+}
+var Statistics_MaxValue_DEFAULT []byte
+
+func (p *Statistics) GetMaxValue() []byte {
+  return p.MaxValue
+}
+var Statistics_MinValue_DEFAULT []byte
+
+func (p *Statistics) GetMinValue() []byte {
+  return p.MinValue
+}
+func (p *Statistics) IsSetMax() bool {
+  return p.Max != nil
+}
+
+func (p *Statistics) IsSetMin() bool {
+  return p.Min != nil
+}
+
+func (p *Statistics) IsSetNullCount() bool {
+  return p.NullCount != nil
+}
+
+func (p *Statistics) IsSetDistinctCount() bool {
+  return p.DistinctCount != nil
+}
+
+func (p *Statistics) IsSetMaxValue() bool {
+  return p.MaxValue != nil
+}
+
+func (p *Statistics) IsSetMinValue() bool {
+  return p.MinValue != nil
+}
+
+func (p *Statistics) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *Statistics)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Max = v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Min = v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NullCount = &v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.DistinctCount = &v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.MaxValue = v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.MinValue = v
+}
+  return nil
+}
+
+func (p *Statistics) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "Statistics"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *Statistics) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMax() {
+    if err := oprot.WriteFieldBegin(ctx, "max", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.Max); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMin() {
+    if err := oprot.WriteFieldBegin(ctx, "min", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.Min); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNullCount() {
+    if err := oprot.WriteFieldBegin(ctx, "null_count", thrift.I64, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.NullCount)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDistinctCount() {
+    if err := oprot.WriteFieldBegin(ctx, "distinct_count", thrift.I64, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.DistinctCount)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMaxValue() {
+    if err := oprot.WriteFieldBegin(ctx, "max_value", thrift.STRING, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.MaxValue); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMinValue() {
+    if err := oprot.WriteFieldBegin(ctx, "min_value", thrift.STRING, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.MinValue); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) Equals(other *Statistics) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if bytes.Compare(p.Max, other.Max) != 0 { return false }
+  if bytes.Compare(p.Min, other.Min) != 0 { return false }
+  if p.NullCount != other.NullCount {
+    if p.NullCount == nil || other.NullCount == nil {
+      return false
+    }
+    if (*p.NullCount) != (*other.NullCount) { return false }
+  }
+  if p.DistinctCount != other.DistinctCount {
+    if p.DistinctCount == nil || other.DistinctCount == nil {
+      return false
+    }
+    if (*p.DistinctCount) != (*other.DistinctCount) { return false }
+  }
+  if bytes.Compare(p.MaxValue, other.MaxValue) != 0 { return false }
+  if bytes.Compare(p.MinValue, other.MinValue) != 0 { return false }
+  return true
+}
+
+func (p *Statistics) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("Statistics(%+v)", *p)
+}
+
+// Empty structs to use as logical type annotations
+type StringType struct {
+}
+
+func NewStringType() *StringType {
+  return &StringType{}
+}
+
+func (p *StringType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *StringType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "StringType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *StringType) Equals(other *StringType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *StringType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("StringType(%+v)", *p)
+}
+
+type UUIDType struct {
+}
+
+func NewUUIDType() *UUIDType {
+  return &UUIDType{}
+}
+
+func (p *UUIDType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *UUIDType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "UUIDType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *UUIDType) Equals(other *UUIDType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *UUIDType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("UUIDType(%+v)", *p)
+}
+
+type MapType struct {
+}
+
+func NewMapType() *MapType {
+  return &MapType{}
+}
+
+func (p *MapType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *MapType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "MapType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *MapType) Equals(other *MapType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *MapType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("MapType(%+v)", *p)
+}
+
+type ListType struct {
+}
+
+func NewListType() *ListType {
+  return &ListType{}
+}
+
+func (p *ListType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *ListType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ListType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ListType) Equals(other *ListType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *ListType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ListType(%+v)", *p)
+}
+
+type EnumType struct {
+}
+
+func NewEnumType() *EnumType {
+  return &EnumType{}
+}
+
+func (p *EnumType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *EnumType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "EnumType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EnumType) Equals(other *EnumType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *EnumType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EnumType(%+v)", *p)
+}
+
+type DateType struct {
+}
+
+func NewDateType() *DateType {
+  return &DateType{}
+}
+
+func (p *DateType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *DateType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DateType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DateType) Equals(other *DateType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *DateType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DateType(%+v)", *p)
+}
+
+// Logical type to annotate a column that is always null.
+// 
+// Sometimes when discovering the schema of existing data, values are always
+// null and the physical type can't be determined. This annotation signals
+// the case where the physical type was guessed from all null values.
+type NullType struct {
+}
+
+func NewNullType() *NullType {
+  return &NullType{}
+}
+
+func (p *NullType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *NullType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "NullType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *NullType) Equals(other *NullType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *NullType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("NullType(%+v)", *p)
+}
+
+// Decimal logical type annotation
+// 
+// To maintain forward-compatibility in v1, implementations using this logical
+// type must also set scale and precision on the annotated SchemaElement.
+// 
+// Allowed for physical types: INT32, INT64, FIXED, and BINARY
+// 
+// Attributes:
+//  - Scale
+//  - Precision
+type DecimalType struct {
+  Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"`
+  Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"`
+}
+
+func NewDecimalType() *DecimalType {
+  return &DecimalType{}
+}
+
+
+func (p *DecimalType) GetScale() int32 {
+  return p.Scale
+}
+
+func (p *DecimalType) GetPrecision() int32 {
+  return p.Precision
+}
+func (p *DecimalType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetScale bool = false;
+  var issetPrecision bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetScale = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetPrecision = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetScale{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set"));
+  }
+  if !issetPrecision{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set"));
+  }
+  return nil
+}
+
+func (p *DecimalType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Scale = v
+}
+  return nil
+}
+
+func (p *DecimalType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Precision = v
+}
+  return nil
+}
+
+func (p *DecimalType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DecimalType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DecimalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Scale)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) }
+  return err
+}
+
+func (p *DecimalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Precision)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) }
+  return err
+}
+
+func (p *DecimalType) Equals(other *DecimalType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Scale != other.Scale { return false }
+  if p.Precision != other.Precision { return false }
+  return true
+}
+
+func (p *DecimalType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DecimalType(%+v)", *p)
+}
+
+// Time units for logical types
+type MilliSeconds struct {
+}
+
+func NewMilliSeconds() *MilliSeconds {
+  return &MilliSeconds{}
+}
+
+func (p *MilliSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *MilliSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "MilliSeconds"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *MilliSeconds) Equals(other *MilliSeconds) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *MilliSeconds) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("MilliSeconds(%+v)", *p)
+}
+
+type MicroSeconds struct {
+}
+
+func NewMicroSeconds() *MicroSeconds {
+  return &MicroSeconds{}
+}
+
+func (p *MicroSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *MicroSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "MicroSeconds"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *MicroSeconds) Equals(other *MicroSeconds) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *MicroSeconds) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("MicroSeconds(%+v)", *p)
+}
+
+type NanoSeconds struct {
+}
+
+func NewNanoSeconds() *NanoSeconds {
+  return &NanoSeconds{}
+}
+
+func (p *NanoSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *NanoSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "NanoSeconds"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *NanoSeconds) Equals(other *NanoSeconds) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *NanoSeconds) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("NanoSeconds(%+v)", *p)
+}
+
+// Attributes:
+//  - MILLIS
+//  - MICROS
+//  - NANOS
+type TimeUnit struct {
+  MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"`
+  MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"`
+  NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"`
+}
+
+func NewTimeUnit() *TimeUnit {
+  return &TimeUnit{}
+}
+
+var TimeUnit_MILLIS_DEFAULT *MilliSeconds
+func (p *TimeUnit) GetMILLIS() *MilliSeconds {
+  if !p.IsSetMILLIS() {
+    return TimeUnit_MILLIS_DEFAULT
+  }
+return p.MILLIS
+}
+var TimeUnit_MICROS_DEFAULT *MicroSeconds
+func (p *TimeUnit) GetMICROS() *MicroSeconds {
+  if !p.IsSetMICROS() {
+    return TimeUnit_MICROS_DEFAULT
+  }
+return p.MICROS
+}
+var TimeUnit_NANOS_DEFAULT *NanoSeconds
+func (p *TimeUnit) GetNANOS() *NanoSeconds {
+  if !p.IsSetNANOS() {
+    return TimeUnit_NANOS_DEFAULT
+  }
+return p.NANOS
+}
+func (p *TimeUnit) CountSetFieldsTimeUnit() int {
+  count := 0
+  if (p.IsSetMILLIS()) {
+    count++
+  }
+  if (p.IsSetMICROS()) {
+    count++
+  }
+  if (p.IsSetNANOS()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *TimeUnit) IsSetMILLIS() bool {
+  return p.MILLIS != nil
+}
+
+func (p *TimeUnit) IsSetMICROS() bool {
+  return p.MICROS != nil
+}
+
+func (p *TimeUnit) IsSetNANOS() bool {
+  return p.NANOS != nil
+}
+
+func (p *TimeUnit) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MILLIS = &MilliSeconds{}
+  if err := p.MILLIS.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MICROS = &MicroSeconds{}
+  if err := p.MICROS.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.NANOS = &NanoSeconds{}
+  if err := p.NANOS.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsTimeUnit(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "TimeUnit"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TimeUnit) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMILLIS() {
+    if err := oprot.WriteFieldBegin(ctx, "MILLIS", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) }
+    if err := p.MILLIS.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) }
+  }
+  return err
+}
+
+func (p *TimeUnit) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMICROS() {
+    if err := oprot.WriteFieldBegin(ctx, "MICROS", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) }
+    if err := p.MICROS.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) }
+  }
+  return err
+}
+
+func (p *TimeUnit) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNANOS() {
+    if err := oprot.WriteFieldBegin(ctx, "NANOS", thrift.STRUCT, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) }
+    if err := p.NANOS.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) }
+  }
+  return err
+}
+
+func (p *TimeUnit) Equals(other *TimeUnit) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.MILLIS.Equals(other.MILLIS) { return false }
+  if !p.MICROS.Equals(other.MICROS) { return false }
+  if !p.NANOS.Equals(other.NANOS) { return false }
+  return true
+}
+
+func (p *TimeUnit) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TimeUnit(%+v)", *p)
+}
+
+// Timestamp logical type annotation
+// 
+// Allowed for physical types: INT64
+// 
+// Attributes:
+//  - IsAdjustedToUTC
+//  - Unit
+type TimestampType struct {
+  IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
+  Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
+}
+
+func NewTimestampType() *TimestampType {
+  return &TimestampType{}
+}
+
+
+func (p *TimestampType) GetIsAdjustedToUTC() bool {
+  return p.IsAdjustedToUTC
+}
+var TimestampType_Unit_DEFAULT *TimeUnit
+func (p *TimestampType) GetUnit() *TimeUnit {
+  if !p.IsSetUnit() {
+    return TimestampType_Unit_DEFAULT
+  }
+return p.Unit
+}
+func (p *TimestampType) IsSetUnit() bool {
+  return p.Unit != nil
+}
+
+func (p *TimestampType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetIsAdjustedToUTC bool = false;
+  var issetUnit bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetIsAdjustedToUTC = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetUnit = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetIsAdjustedToUTC{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set"));
+  }
+  if !issetUnit{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set"));
+  }
+  return nil
+}
+
+func (p *TimestampType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.IsAdjustedToUTC = v
+}
+  return nil
+}
+
+func (p *TimestampType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Unit = &TimeUnit{}
+  if err := p.Unit.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err)
+  }
+  return nil
+}
+
+func (p *TimestampType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "TimestampType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TimestampType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) }
+  return err
+}
+
+func (p *TimestampType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) }
+  if err := p.Unit.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) }
+  return err
+}
+
+func (p *TimestampType) Equals(other *TimestampType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false }
+  if !p.Unit.Equals(other.Unit) { return false }
+  return true
+}
+
+func (p *TimestampType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TimestampType(%+v)", *p)
+}
+
+// Time logical type annotation
+// 
+// Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
+// 
+// Attributes:
+//  - IsAdjustedToUTC
+//  - Unit
+type TimeType struct {
+  IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
+  Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
+}
+
+func NewTimeType() *TimeType {
+  return &TimeType{}
+}
+
+
+func (p *TimeType) GetIsAdjustedToUTC() bool {
+  return p.IsAdjustedToUTC
+}
+var TimeType_Unit_DEFAULT *TimeUnit
+func (p *TimeType) GetUnit() *TimeUnit {
+  if !p.IsSetUnit() {
+    return TimeType_Unit_DEFAULT
+  }
+return p.Unit
+}
+func (p *TimeType) IsSetUnit() bool {
+  return p.Unit != nil
+}
+
+func (p *TimeType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetIsAdjustedToUTC bool = false;
+  var issetUnit bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetIsAdjustedToUTC = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetUnit = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetIsAdjustedToUTC{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set"));
+  }
+  if !issetUnit{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set"));
+  }
+  return nil
+}
+
+func (p *TimeType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.IsAdjustedToUTC = v
+}
+  return nil
+}
+
+func (p *TimeType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Unit = &TimeUnit{}
+  if err := p.Unit.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err)
+  }
+  return nil
+}
+
+func (p *TimeType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "TimeType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TimeType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) }
+  return err
+}
+
+func (p *TimeType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) }
+  if err := p.Unit.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) }
+  return err
+}
+
+func (p *TimeType) Equals(other *TimeType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false }
+  if !p.Unit.Equals(other.Unit) { return false }
+  return true
+}
+
+func (p *TimeType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TimeType(%+v)", *p)
+}
+
+// Integer logical type annotation
+// 
+// bitWidth must be 8, 16, 32, or 64.
+// 
+// Allowed for physical types: INT32, INT64
+// 
+// Attributes:
+//  - BitWidth
+//  - IsSigned
+type IntType struct {
+  BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"`
+  IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"`
+}
+
+func NewIntType() *IntType {
+  return &IntType{}
+}
+
+
+func (p *IntType) GetBitWidth() int8 {
+  return p.BitWidth
+}
+
+func (p *IntType) GetIsSigned() bool {
+  return p.IsSigned
+}
+func (p *IntType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetBitWidth bool = false;
+  var issetIsSigned bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.BYTE {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetBitWidth = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetIsSigned = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetBitWidth{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set"));
+  }
+  if !issetIsSigned{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set"));
+  }
+  return nil
+}
+
+func (p *IntType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadByte(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := int8(v)
+  p.BitWidth = temp
+}
+  return nil
+}
+
+func (p *IntType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.IsSigned = v
+}
+  return nil
+}
+
+func (p *IntType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "IntType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *IntType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "bitWidth", thrift.BYTE, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) }
+  if err := oprot.WriteByte(ctx, int8(p.BitWidth)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) }
+  return err
+}
+
+func (p *IntType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "isSigned", thrift.BOOL, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.IsSigned)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) }
+  return err
+}
+
+func (p *IntType) Equals(other *IntType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.BitWidth != other.BitWidth { return false }
+  if p.IsSigned != other.IsSigned { return false }
+  return true
+}
+
+func (p *IntType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("IntType(%+v)", *p)
+}
+
+// Embedded JSON logical type annotation
+// 
+// Allowed for physical types: BINARY
+type JsonType struct {
+}
+
+func NewJsonType() *JsonType {
+  return &JsonType{}
+}
+
+func (p *JsonType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *JsonType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "JsonType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *JsonType) Equals(other *JsonType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *JsonType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("JsonType(%+v)", *p)
+}
+
+// Embedded BSON logical type annotation
+// 
+// Allowed for physical types: BINARY
+type BsonType struct {
+}
+
+func NewBsonType() *BsonType {
+  return &BsonType{}
+}
+
+func (p *BsonType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BsonType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "BsonType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BsonType) Equals(other *BsonType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *BsonType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BsonType(%+v)", *p)
+}
+
+// LogicalType annotations to replace ConvertedType.
+// 
+// To maintain compatibility, implementations using LogicalType for a
+// SchemaElement must also set the corresponding ConvertedType from the
+// following table.
+// 
+// Attributes:
+//  - STRING
+//  - MAP
+//  - LIST
+//  - ENUM
+//  - DECIMAL
+//  - DATE
+//  - TIME
+//  - TIMESTAMP
+//  - INTEGER
+//  - UNKNOWN
+//  - JSON
+//  - BSON
+//  - UUID
+type LogicalType struct {
+  STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"`
+  MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"`
+  LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"`
+  ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"`
+  DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"`
+  DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"`
+  TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"`
+  TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"`
+  // unused field # 9
+  INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"`
+  UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"`
+  JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"`
+  BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"`
+  UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"`
+}
+
+func NewLogicalType() *LogicalType {
+  return &LogicalType{}
+}
+
+var LogicalType_STRING_DEFAULT *StringType
+func (p *LogicalType) GetSTRING() *StringType {
+  if !p.IsSetSTRING() {
+    return LogicalType_STRING_DEFAULT
+  }
+return p.STRING
+}
+var LogicalType_MAP_DEFAULT *MapType
+func (p *LogicalType) GetMAP() *MapType {
+  if !p.IsSetMAP() {
+    return LogicalType_MAP_DEFAULT
+  }
+return p.MAP
+}
+var LogicalType_LIST_DEFAULT *ListType
+func (p *LogicalType) GetLIST() *ListType {
+  if !p.IsSetLIST() {
+    return LogicalType_LIST_DEFAULT
+  }
+return p.LIST
+}
+var LogicalType_ENUM_DEFAULT *EnumType
+func (p *LogicalType) GetENUM() *EnumType {
+  if !p.IsSetENUM() {
+    return LogicalType_ENUM_DEFAULT
+  }
+return p.ENUM
+}
+var LogicalType_DECIMAL_DEFAULT *DecimalType
+func (p *LogicalType) GetDECIMAL() *DecimalType {
+  if !p.IsSetDECIMAL() {
+    return LogicalType_DECIMAL_DEFAULT
+  }
+return p.DECIMAL
+}
+var LogicalType_DATE_DEFAULT *DateType
+func (p *LogicalType) GetDATE() *DateType {
+  if !p.IsSetDATE() {
+    return LogicalType_DATE_DEFAULT
+  }
+return p.DATE
+}
+var LogicalType_TIME_DEFAULT *TimeType
+func (p *LogicalType) GetTIME() *TimeType {
+  if !p.IsSetTIME() {
+    return LogicalType_TIME_DEFAULT
+  }
+return p.TIME
+}
+var LogicalType_TIMESTAMP_DEFAULT *TimestampType
+func (p *LogicalType) GetTIMESTAMP() *TimestampType {
+  if !p.IsSetTIMESTAMP() {
+    return LogicalType_TIMESTAMP_DEFAULT
+  }
+return p.TIMESTAMP
+}
+var LogicalType_INTEGER_DEFAULT *IntType
+func (p *LogicalType) GetINTEGER() *IntType {
+  if !p.IsSetINTEGER() {
+    return LogicalType_INTEGER_DEFAULT
+  }
+return p.INTEGER
+}
+var LogicalType_UNKNOWN_DEFAULT *NullType
+func (p *LogicalType) GetUNKNOWN() *NullType {
+  if !p.IsSetUNKNOWN() {
+    return LogicalType_UNKNOWN_DEFAULT
+  }
+return p.UNKNOWN
+}
+var LogicalType_JSON_DEFAULT *JsonType
+func (p *LogicalType) GetJSON() *JsonType {
+  if !p.IsSetJSON() {
+    return LogicalType_JSON_DEFAULT
+  }
+return p.JSON
+}
+var LogicalType_BSON_DEFAULT *BsonType
+func (p *LogicalType) GetBSON() *BsonType {
+  if !p.IsSetBSON() {
+    return LogicalType_BSON_DEFAULT
+  }
+return p.BSON
+}
+var LogicalType_UUID_DEFAULT *UUIDType
+func (p *LogicalType) GetUUID() *UUIDType {
+  if !p.IsSetUUID() {
+    return LogicalType_UUID_DEFAULT
+  }
+return p.UUID
+}
+func (p *LogicalType) CountSetFieldsLogicalType() int {
+  count := 0
+  if (p.IsSetSTRING()) {
+    count++
+  }
+  if (p.IsSetMAP()) {
+    count++
+  }
+  if (p.IsSetLIST()) {
+    count++
+  }
+  if (p.IsSetENUM()) {
+    count++
+  }
+  if (p.IsSetDECIMAL()) {
+    count++
+  }
+  if (p.IsSetDATE()) {
+    count++
+  }
+  if (p.IsSetTIME()) {
+    count++
+  }
+  if (p.IsSetTIMESTAMP()) {
+    count++
+  }
+  if (p.IsSetINTEGER()) {
+    count++
+  }
+  if (p.IsSetUNKNOWN()) {
+    count++
+  }
+  if (p.IsSetJSON()) {
+    count++
+  }
+  if (p.IsSetBSON()) {
+    count++
+  }
+  if (p.IsSetUUID()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *LogicalType) IsSetSTRING() bool {
+  return p.STRING != nil
+}
+
+func (p *LogicalType) IsSetMAP() bool {
+  return p.MAP != nil
+}
+
+func (p *LogicalType) IsSetLIST() bool {
+  return p.LIST != nil
+}
+
+func (p *LogicalType) IsSetENUM() bool {
+  return p.ENUM != nil
+}
+
+func (p *LogicalType) IsSetDECIMAL() bool {
+  return p.DECIMAL != nil
+}
+
+func (p *LogicalType) IsSetDATE() bool {
+  return p.DATE != nil
+}
+
+func (p *LogicalType) IsSetTIME() bool {
+  return p.TIME != nil
+}
+
+func (p *LogicalType) IsSetTIMESTAMP() bool {
+  return p.TIMESTAMP != nil
+}
+
+func (p *LogicalType) IsSetINTEGER() bool {
+  return p.INTEGER != nil
+}
+
+func (p *LogicalType) IsSetUNKNOWN() bool {
+  return p.UNKNOWN != nil
+}
+
+func (p *LogicalType) IsSetJSON() bool {
+  return p.JSON != nil
+}
+
+func (p *LogicalType) IsSetBSON() bool {
+  return p.BSON != nil
+}
+
+func (p *LogicalType) IsSetUUID() bool {
+  return p.UUID != nil
+}
+
+func (p *LogicalType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 10:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField10(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 11:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField11(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 12:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField12(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 13:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField13(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 14:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField14(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.STRING = &StringType{}
+  if err := p.STRING.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MAP = &MapType{}
+  if err := p.MAP.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.LIST = &ListType{}
+  if err := p.LIST.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  p.ENUM = &EnumType{}
+  if err := p.ENUM.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DECIMAL = &DecimalType{}
+  if err := p.DECIMAL.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DATE = &DateType{}
+  if err := p.DATE.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  p.TIME = &TimeType{}
+  if err := p.TIME.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.TIMESTAMP = &TimestampType{}
+  if err := p.TIMESTAMP.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField10(ctx context.Context, iprot thrift.TProtocol) error {
+  p.INTEGER = &IntType{}
+  if err := p.INTEGER.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField11(ctx context.Context, iprot thrift.TProtocol) error {
+  p.UNKNOWN = &NullType{}
+  if err := p.UNKNOWN.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField12(ctx context.Context, iprot thrift.TProtocol) error {
+  p.JSON = &JsonType{}
+  if err := p.JSON.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField13(ctx context.Context, iprot thrift.TProtocol) error {
+  p.BSON = &BsonType{}
+  if err := p.BSON.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField14(ctx context.Context, iprot thrift.TProtocol) error {
+  p.UUID = &UUIDType{}
+  if err := p.UUID.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err)
+  }
+  return nil
+}
+
+func (p *LogicalType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsLogicalType(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "LogicalType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField10(ctx, oprot); err != nil { return err }
+    if err := p.writeField11(ctx, oprot); err != nil { return err }
+    if err := p.writeField12(ctx, oprot); err != nil { return err }
+    if err := p.writeField13(ctx, oprot); err != nil { return err }
+    if err := p.writeField14(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *LogicalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSTRING() {
+    if err := oprot.WriteFieldBegin(ctx, "STRING", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) }
+    if err := p.STRING.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMAP() {
+    if err := oprot.WriteFieldBegin(ctx, "MAP", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) }
+    if err := p.MAP.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetLIST() {
+    if err := oprot.WriteFieldBegin(ctx, "LIST", thrift.STRUCT, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) }
+    if err := p.LIST.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetENUM() {
+    if err := oprot.WriteFieldBegin(ctx, "ENUM", thrift.STRUCT, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) }
+    if err := p.ENUM.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDECIMAL() {
+    if err := oprot.WriteFieldBegin(ctx, "DECIMAL", thrift.STRUCT, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) }
+    if err := p.DECIMAL.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDATE() {
+    if err := oprot.WriteFieldBegin(ctx, "DATE", thrift.STRUCT, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) }
+    if err := p.DATE.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTIME() {
+    if err := oprot.WriteFieldBegin(ctx, "TIME", thrift.STRUCT, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) }
+    if err := p.TIME.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTIMESTAMP() {
+    if err := oprot.WriteFieldBegin(ctx, "TIMESTAMP", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) }
+    if err := p.TIMESTAMP.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetINTEGER() {
+    if err := oprot.WriteFieldBegin(ctx, "INTEGER", thrift.STRUCT, 10); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) }
+    if err := p.INTEGER.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetUNKNOWN() {
+    if err := oprot.WriteFieldBegin(ctx, "UNKNOWN", thrift.STRUCT, 11); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) }
+    if err := p.UNKNOWN.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetJSON() {
+    if err := oprot.WriteFieldBegin(ctx, "JSON", thrift.STRUCT, 12); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) }
+    if err := p.JSON.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetBSON() {
+    if err := oprot.WriteFieldBegin(ctx, "BSON", thrift.STRUCT, 13); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) }
+    if err := p.BSON.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetUUID() {
+    if err := oprot.WriteFieldBegin(ctx, "UUID", thrift.STRUCT, 14); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) }
+    if err := p.UUID.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) Equals(other *LogicalType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.STRING.Equals(other.STRING) { return false }
+  if !p.MAP.Equals(other.MAP) { return false }
+  if !p.LIST.Equals(other.LIST) { return false }
+  if !p.ENUM.Equals(other.ENUM) { return false }
+  if !p.DECIMAL.Equals(other.DECIMAL) { return false }
+  if !p.DATE.Equals(other.DATE) { return false }
+  if !p.TIME.Equals(other.TIME) { return false }
+  if !p.TIMESTAMP.Equals(other.TIMESTAMP) { return false }
+  if !p.INTEGER.Equals(other.INTEGER) { return false }
+  if !p.UNKNOWN.Equals(other.UNKNOWN) { return false }
+  if !p.JSON.Equals(other.JSON) { return false }
+  if !p.BSON.Equals(other.BSON) { return false }
+  if !p.UUID.Equals(other.UUID) { return false }
+  return true
+}
+
+func (p *LogicalType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("LogicalType(%+v)", *p)
+}
+
+// Represents a element inside a schema definition.
+//  - if it is a group (inner node) then type is undefined and num_children is defined
+//  - if it is a primitive type (leaf) then type is defined and num_children is undefined
+// the nodes are listed in depth first traversal order.
+// 
+// Attributes:
+//  - Type: Data type for this field. Not set if the current element is a non-leaf node
+//  - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.
+// Otherwise, if specified, this is the maximum bit length to store any of the values.
+// (e.g. a low cardinality INT col could have this set to 3).  Note that this is
+// in the schema, and therefore fixed for the entire file.
+//  - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type.
+// All other nodes must have one
+//  - Name: Name of the field in the schema
+//  - NumChildren: Nested fields.  Since thrift does not support nested fields,
+// the nesting is flattened to a single list by a depth-first traversal.
+// The children count is used to construct the nested relationship.
+// This field is not set when the element is a primitive type
+//  - ConvertedType: When the schema is the result of a conversion from another model
+// Used to record the original type to help with cross conversion.
+//  - Scale: Used when this column contains decimal data.
+// See the DECIMAL converted type for more details.
+//  - Precision
+//  - FieldID: When the original schema supports field ids, this will save the
+// original field id in the parquet schema
+//  - LogicalType: The logical type of this SchemaElement
+// 
+// LogicalType replaces ConvertedType, but ConvertedType is still required
+// for some logical types to ensure forward-compatibility in format v1.
+type SchemaElement struct {
+  Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"`
+  TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"`
+  RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"`
+  Name string `thrift:"name,4,required" db:"name" json:"name"`
+  NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"`
+  ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"`
+  Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"`
+  Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"`
+  FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"`
+  LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"`
+}
+
+func NewSchemaElement() *SchemaElement {
+  return &SchemaElement{}
+}
+
+var SchemaElement_Type_DEFAULT Type
+func (p *SchemaElement) GetType() Type {
+  if !p.IsSetType() {
+    return SchemaElement_Type_DEFAULT
+  }
+return *p.Type
+}
+var SchemaElement_TypeLength_DEFAULT int32
+func (p *SchemaElement) GetTypeLength() int32 {
+  if !p.IsSetTypeLength() {
+    return SchemaElement_TypeLength_DEFAULT
+  }
+return *p.TypeLength
+}
+var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType
+func (p *SchemaElement) GetRepetitionType() FieldRepetitionType {
+  if !p.IsSetRepetitionType() {
+    return SchemaElement_RepetitionType_DEFAULT
+  }
+return *p.RepetitionType
+}
+
+func (p *SchemaElement) GetName() string {
+  return p.Name
+}
+var SchemaElement_NumChildren_DEFAULT int32
+func (p *SchemaElement) GetNumChildren() int32 {
+  if !p.IsSetNumChildren() {
+    return SchemaElement_NumChildren_DEFAULT
+  }
+return *p.NumChildren
+}
+var SchemaElement_ConvertedType_DEFAULT ConvertedType
+func (p *SchemaElement) GetConvertedType() ConvertedType {
+  if !p.IsSetConvertedType() {
+    return SchemaElement_ConvertedType_DEFAULT
+  }
+return *p.ConvertedType
+}
+var SchemaElement_Scale_DEFAULT int32
+func (p *SchemaElement) GetScale() int32 {
+  if !p.IsSetScale() {
+    return SchemaElement_Scale_DEFAULT
+  }
+return *p.Scale
+}
+var SchemaElement_Precision_DEFAULT int32
+func (p *SchemaElement) GetPrecision() int32 {
+  if !p.IsSetPrecision() {
+    return SchemaElement_Precision_DEFAULT
+  }
+return *p.Precision
+}
+var SchemaElement_FieldID_DEFAULT int32
+func (p *SchemaElement) GetFieldID() int32 {
+  if !p.IsSetFieldID() {
+    return SchemaElement_FieldID_DEFAULT
+  }
+return *p.FieldID
+}
+var SchemaElement_LogicalType_DEFAULT *LogicalType
+func (p *SchemaElement) GetLogicalType() *LogicalType {
+  if !p.IsSetLogicalType() {
+    return SchemaElement_LogicalType_DEFAULT
+  }
+return p.LogicalType
+}
+func (p *SchemaElement) IsSetType() bool {
+  return p.Type != nil
+}
+
+func (p *SchemaElement) IsSetTypeLength() bool {
+  return p.TypeLength != nil
+}
+
+func (p *SchemaElement) IsSetRepetitionType() bool {
+  return p.RepetitionType != nil
+}
+
+func (p *SchemaElement) IsSetNumChildren() bool {
+  return p.NumChildren != nil
+}
+
+func (p *SchemaElement) IsSetConvertedType() bool {
+  return p.ConvertedType != nil
+}
+
+func (p *SchemaElement) IsSetScale() bool {
+  return p.Scale != nil
+}
+
+func (p *SchemaElement) IsSetPrecision() bool {
+  return p.Precision != nil
+}
+
+func (p *SchemaElement) IsSetFieldID() bool {
+  return p.FieldID != nil
+}
+
+func (p *SchemaElement) IsSetLogicalType() bool {
+  return p.LogicalType != nil
+}
+
+func (p *SchemaElement) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetName bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetName = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 10:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField10(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetName{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set"));
+  }
+  return nil
+}
+
+func (p *SchemaElement)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := Type(v)
+  p.Type = &temp
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.TypeLength = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  temp := FieldRepetitionType(v)
+  p.RepetitionType = &temp
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.Name = v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.NumChildren = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  temp := ConvertedType(v)
+  p.ConvertedType = &temp
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.Scale = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 8: ", err)
+} else {
+  p.Precision = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.FieldID = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField10(ctx context.Context, iprot thrift.TProtocol) error {
+  p.LogicalType = &LogicalType{}
+  if err := p.LogicalType.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err)
+  }
+  return nil
+}
+
+func (p *SchemaElement) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "SchemaElement"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+    if err := p.writeField10(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *SchemaElement) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetType() {
+    if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Type)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTypeLength() {
+    if err := oprot.WriteFieldBegin(ctx, "type_length", thrift.I32, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.TypeLength)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetRepetitionType() {
+    if err := oprot.WriteFieldBegin(ctx, "repetition_type", thrift.I32, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.RepetitionType)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "name", thrift.STRING, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) }
+  if err := oprot.WriteString(ctx, string(p.Name)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) }
+  return err
+}
+
+func (p *SchemaElement) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNumChildren() {
+    if err := oprot.WriteFieldBegin(ctx, "num_children", thrift.I32, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.NumChildren)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetConvertedType() {
+    if err := oprot.WriteFieldBegin(ctx, "converted_type", thrift.I32, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.ConvertedType)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetScale() {
+    if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Scale)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetPrecision() {
+    if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Precision)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFieldID() {
+    if err := oprot.WriteFieldBegin(ctx, "field_id", thrift.I32, 9); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.FieldID)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetLogicalType() {
+    if err := oprot.WriteFieldBegin(ctx, "logicalType", thrift.STRUCT, 10); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) }
+    if err := p.LogicalType.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) Equals(other *SchemaElement) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Type != other.Type {
+    if p.Type == nil || other.Type == nil {
+      return false
+    }
+    if (*p.Type) != (*other.Type) { return false }
+  }
+  if p.TypeLength != other.TypeLength {
+    if p.TypeLength == nil || other.TypeLength == nil {
+      return false
+    }
+    if (*p.TypeLength) != (*other.TypeLength) { return false }
+  }
+  if p.RepetitionType != other.RepetitionType {
+    if p.RepetitionType == nil || other.RepetitionType == nil {
+      return false
+    }
+    if (*p.RepetitionType) != (*other.RepetitionType) { return false }
+  }
+  if p.Name != other.Name { return false }
+  if p.NumChildren != other.NumChildren {
+    if p.NumChildren == nil || other.NumChildren == nil {
+      return false
+    }
+    if (*p.NumChildren) != (*other.NumChildren) { return false }
+  }
+  if p.ConvertedType != other.ConvertedType {
+    if p.ConvertedType == nil || other.ConvertedType == nil {
+      return false
+    }
+    if (*p.ConvertedType) != (*other.ConvertedType) { return false }
+  }
+  if p.Scale != other.Scale {
+    if p.Scale == nil || other.Scale == nil {
+      return false
+    }
+    if (*p.Scale) != (*other.Scale) { return false }
+  }
+  if p.Precision != other.Precision {
+    if p.Precision == nil || other.Precision == nil {
+      return false
+    }
+    if (*p.Precision) != (*other.Precision) { return false }
+  }
+  if p.FieldID != other.FieldID {
+    if p.FieldID == nil || other.FieldID == nil {
+      return false
+    }
+    if (*p.FieldID) != (*other.FieldID) { return false }
+  }
+  if !p.LogicalType.Equals(other.LogicalType) { return false }
+  return true
+}
+
+func (p *SchemaElement) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("SchemaElement(%+v)", *p)
+}
+
+// Data page header
+// 
+// Attributes:
+//  - NumValues: Number of values, including NULLs, in this data page. *
+//  - Encoding: Encoding used for this data page *
+//  - DefinitionLevelEncoding: Encoding used for definition levels *
+//  - RepetitionLevelEncoding: Encoding used for repetition levels *
+//  - Statistics: Optional statistics for the data in this page*
+type DataPageHeader struct {
+  NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
+  Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
+  DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"`
+  RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"`
+  Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"`
+}
+
+func NewDataPageHeader() *DataPageHeader {
+  return &DataPageHeader{}
+}
+
+
+func (p *DataPageHeader) GetNumValues() int32 {
+  return p.NumValues
+}
+
+func (p *DataPageHeader) GetEncoding() Encoding {
+  return p.Encoding
+}
+
+func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding {
+  return p.DefinitionLevelEncoding
+}
+
+func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding {
+  return p.RepetitionLevelEncoding
+}
+var DataPageHeader_Statistics_DEFAULT *Statistics
+func (p *DataPageHeader) GetStatistics() *Statistics {
+  if !p.IsSetStatistics() {
+    return DataPageHeader_Statistics_DEFAULT
+  }
+return p.Statistics
+}
+func (p *DataPageHeader) IsSetStatistics() bool {
+  return p.Statistics != nil
+}
+
+func (p *DataPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumValues bool = false;
+  var issetEncoding bool = false;
+  var issetDefinitionLevelEncoding bool = false;
+  var issetRepetitionLevelEncoding bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetDefinitionLevelEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetRepetitionLevelEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  if !issetDefinitionLevelEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set"));
+  }
+  if !issetRepetitionLevelEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set"));
+  }
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  temp := Encoding(v)
+  p.DefinitionLevelEncoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := Encoding(v)
+  p.RepetitionLevelEncoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Statistics = &Statistics{}
+  if err := p.Statistics.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err)
+  }
+  return nil
+}
+
+func (p *DataPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DataPageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DataPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "definition_level_encoding", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelEncoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "repetition_level_encoding", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelEncoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetStatistics() {
+    if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) }
+    if err := p.Statistics.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) }
+  }
+  return err
+}
+
+func (p *DataPageHeader) Equals(other *DataPageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumValues != other.NumValues { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.DefinitionLevelEncoding != other.DefinitionLevelEncoding { return false }
+  if p.RepetitionLevelEncoding != other.RepetitionLevelEncoding { return false }
+  if !p.Statistics.Equals(other.Statistics) { return false }
+  return true
+}
+
+func (p *DataPageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DataPageHeader(%+v)", *p)
+}
+
+type IndexPageHeader struct {
+}
+
+func NewIndexPageHeader() *IndexPageHeader {
+  return &IndexPageHeader{}
+}
+
+func (p *IndexPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *IndexPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "IndexPageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *IndexPageHeader) Equals(other *IndexPageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *IndexPageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("IndexPageHeader(%+v)", *p)
+}
+
+// Attributes:
+//  - NumValues: Number of values in the dictionary *
+//  - Encoding: Encoding using this dictionary page *
+//  - IsSorted: If true, the entries in the dictionary are sorted in ascending order *
+type DictionaryPageHeader struct {
+  NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
+  Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
+  IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"`
+}
+
+func NewDictionaryPageHeader() *DictionaryPageHeader {
+  return &DictionaryPageHeader{}
+}
+
+
+func (p *DictionaryPageHeader) GetNumValues() int32 {
+  return p.NumValues
+}
+
+func (p *DictionaryPageHeader) GetEncoding() Encoding {
+  return p.Encoding
+}
+var DictionaryPageHeader_IsSorted_DEFAULT bool
+func (p *DictionaryPageHeader) GetIsSorted() bool {
+  if !p.IsSetIsSorted() {
+    return DictionaryPageHeader_IsSorted_DEFAULT
+  }
+return *p.IsSorted
+}
+func (p *DictionaryPageHeader) IsSetIsSorted() bool {
+  return p.IsSorted != nil
+}
+
+func (p *DictionaryPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumValues bool = false;
+  var issetEncoding bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  return nil
+}
+
+func (p *DictionaryPageHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *DictionaryPageHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *DictionaryPageHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.IsSorted = &v
+}
+  return nil
+}
+
+func (p *DictionaryPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DictionaryPageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DictionaryPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) }
+  return err
+}
+
+func (p *DictionaryPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) }
+  return err
+}
+
+func (p *DictionaryPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIsSorted() {
+    if err := oprot.WriteFieldBegin(ctx, "is_sorted", thrift.BOOL, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(*p.IsSorted)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) }
+  }
+  return err
+}
+
+func (p *DictionaryPageHeader) Equals(other *DictionaryPageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumValues != other.NumValues { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.IsSorted != other.IsSorted {
+    if p.IsSorted == nil || other.IsSorted == nil {
+      return false
+    }
+    if (*p.IsSorted) != (*other.IsSorted) { return false }
+  }
+  return true
+}
+
+func (p *DictionaryPageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DictionaryPageHeader(%+v)", *p)
+}
+
+// New page format allowing reading levels without decompressing the data
+// Repetition and definition levels are uncompressed
+// The remaining section containing the data is compressed if is_compressed is true
+// 
+// 
+// Attributes:
+//  - NumValues: Number of values, including NULLs, in this data page. *
+//  - NumNulls: Number of NULL values, in this data page.
+// Number of non-null = num_values - num_nulls which is also the number of values in the data section *
+//  - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) *
+//  - Encoding: Encoding used for data in this page *
+//  - DefinitionLevelsByteLength: length of the definition levels
+//  - RepetitionLevelsByteLength: length of the repetition levels
+//  - IsCompressed: whether the values are compressed.
+// Which means the section of the page between
+// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included)
+// is compressed with the compression_codec.
+// If missing it is considered compressed
+//  - Statistics: optional statistics for this column chunk
+type DataPageHeaderV2 struct {
+  NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
+  NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"`
+  NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
+  Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"`
+  DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"`
+  RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"`
+  IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed"`
+  Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"`
+}
+
+func NewDataPageHeaderV2() *DataPageHeaderV2 {
+  return &DataPageHeaderV2{
+IsCompressed: true,
+}
+}
+
+
+func (p *DataPageHeaderV2) GetNumValues() int32 {
+  return p.NumValues
+}
+
+func (p *DataPageHeaderV2) GetNumNulls() int32 {
+  return p.NumNulls
+}
+
+func (p *DataPageHeaderV2) GetNumRows() int32 {
+  return p.NumRows
+}
+
+func (p *DataPageHeaderV2) GetEncoding() Encoding {
+  return p.Encoding
+}
+
+func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32 {
+  return p.DefinitionLevelsByteLength
+}
+
+func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32 {
+  return p.RepetitionLevelsByteLength
+}
+var DataPageHeaderV2_IsCompressed_DEFAULT bool = true
+
+func (p *DataPageHeaderV2) GetIsCompressed() bool {
+  return p.IsCompressed
+}
+var DataPageHeaderV2_Statistics_DEFAULT *Statistics
+func (p *DataPageHeaderV2) GetStatistics() *Statistics {
+  if !p.IsSetStatistics() {
+    return DataPageHeaderV2_Statistics_DEFAULT
+  }
+return p.Statistics
+}
+func (p *DataPageHeaderV2) IsSetIsCompressed() bool {
+  return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT
+}
+
+func (p *DataPageHeaderV2) IsSetStatistics() bool {
+  return p.Statistics != nil
+}
+
+func (p *DataPageHeaderV2) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumValues bool = false;
+  var issetNumNulls bool = false;
+  var issetNumRows bool = false;
+  var issetEncoding bool = false;
+  var issetDefinitionLevelsByteLength bool = false;
+  var issetRepetitionLevelsByteLength bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumNulls = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumRows = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+        issetDefinitionLevelsByteLength = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+        issetRepetitionLevelsByteLength = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetNumNulls{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set"));
+  }
+  if !issetNumRows{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  if !issetDefinitionLevelsByteLength{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set"));
+  }
+  if !issetRepetitionLevelsByteLength{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set"));
+  }
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.NumNulls = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NumRows = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.DefinitionLevelsByteLength = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.RepetitionLevelsByteLength = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.IsCompressed = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Statistics = &Statistics{}
+  if err := p.Statistics.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err)
+  }
+  return nil
+}
+
+func (p *DataPageHeaderV2) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DataPageHeaderV2"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DataPageHeaderV2) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_nulls", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumNulls)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumRows)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "definition_levels_byte_length", thrift.I32, 5); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelsByteLength)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "repetition_levels_byte_length", thrift.I32, 6); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelsByteLength)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIsCompressed() {
+    if err := oprot.WriteFieldBegin(ctx, "is_compressed", thrift.BOOL, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(p.IsCompressed)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) }
+  }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetStatistics() {
+    if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) }
+    if err := p.Statistics.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) }
+  }
+  return err
+}
+
+func (p *DataPageHeaderV2) Equals(other *DataPageHeaderV2) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumValues != other.NumValues { return false }
+  if p.NumNulls != other.NumNulls { return false }
+  if p.NumRows != other.NumRows { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.DefinitionLevelsByteLength != other.DefinitionLevelsByteLength { return false }
+  if p.RepetitionLevelsByteLength != other.RepetitionLevelsByteLength { return false }
+  if p.IsCompressed != other.IsCompressed { return false }
+  if !p.Statistics.Equals(other.Statistics) { return false }
+  return true
+}
+
+func (p *DataPageHeaderV2) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DataPageHeaderV2(%+v)", *p)
+}
+
+// Block-based algorithm type annotation. *
+type SplitBlockAlgorithm struct {
+}
+
+func NewSplitBlockAlgorithm() *SplitBlockAlgorithm {
+  return &SplitBlockAlgorithm{}
+}
+
+func (p *SplitBlockAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *SplitBlockAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "SplitBlockAlgorithm"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *SplitBlockAlgorithm) Equals(other *SplitBlockAlgorithm) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *SplitBlockAlgorithm) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("SplitBlockAlgorithm(%+v)", *p)
+}
+
+// The algorithm used in Bloom filter. *
+// 
+// Attributes:
+//  - BLOCK: Block-based Bloom filter. *
+type BloomFilterAlgorithm struct {
+  BLOCK *SplitBlockAlgorithm `thrift:"BLOCK,1" db:"BLOCK" json:"BLOCK,omitempty"`
+}
+
+func NewBloomFilterAlgorithm() *BloomFilterAlgorithm {
+  return &BloomFilterAlgorithm{}
+}
+
+var BloomFilterAlgorithm_BLOCK_DEFAULT *SplitBlockAlgorithm
+func (p *BloomFilterAlgorithm) GetBLOCK() *SplitBlockAlgorithm {
+  if !p.IsSetBLOCK() {
+    return BloomFilterAlgorithm_BLOCK_DEFAULT
+  }
+return p.BLOCK
+}
+func (p *BloomFilterAlgorithm) CountSetFieldsBloomFilterAlgorithm() int {
+  count := 0
+  if (p.IsSetBLOCK()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *BloomFilterAlgorithm) IsSetBLOCK() bool {
+  return p.BLOCK != nil
+}
+
+func (p *BloomFilterAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterAlgorithm)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.BLOCK = &SplitBlockAlgorithm{}
+  if err := p.BLOCK.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BLOCK), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsBloomFilterAlgorithm(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterAlgorithm"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetBLOCK() {
+    if err := oprot.WriteFieldBegin(ctx, "BLOCK", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:BLOCK: ", p), err) }
+    if err := p.BLOCK.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BLOCK), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:BLOCK: ", p), err) }
+  }
+  return err
+}
+
+func (p *BloomFilterAlgorithm) Equals(other *BloomFilterAlgorithm) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.BLOCK.Equals(other.BLOCK) { return false }
+  return true
+}
+
+func (p *BloomFilterAlgorithm) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterAlgorithm(%+v)", *p)
+}
+
+// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
+// algorithm. It uses 64 bits version of xxHash.
+// 
+type XxHash struct {
+}
+
+func NewXxHash() *XxHash {
+  return &XxHash{}
+}
+
+func (p *XxHash) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *XxHash) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "XxHash"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *XxHash) Equals(other *XxHash) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *XxHash) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("XxHash(%+v)", *p)
+}
+
+// The hash function used in Bloom filter. This function takes the hash of a column value
+// using plain encoding.
+// 
+// 
+// Attributes:
+//  - XXHASH: xxHash Strategy. *
+type BloomFilterHash struct {
+  XXHASH *XxHash `thrift:"XXHASH,1" db:"XXHASH" json:"XXHASH,omitempty"`
+}
+
+func NewBloomFilterHash() *BloomFilterHash {
+  return &BloomFilterHash{}
+}
+
+var BloomFilterHash_XXHASH_DEFAULT *XxHash
+func (p *BloomFilterHash) GetXXHASH() *XxHash {
+  if !p.IsSetXXHASH() {
+    return BloomFilterHash_XXHASH_DEFAULT
+  }
+return p.XXHASH
+}
+func (p *BloomFilterHash) CountSetFieldsBloomFilterHash() int {
+  count := 0
+  if (p.IsSetXXHASH()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *BloomFilterHash) IsSetXXHASH() bool {
+  return p.XXHASH != nil
+}
+
+func (p *BloomFilterHash) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHash)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.XXHASH = &XxHash{}
+  if err := p.XXHASH.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.XXHASH), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHash) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsBloomFilterHash(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterHash"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterHash) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetXXHASH() {
+    if err := oprot.WriteFieldBegin(ctx, "XXHASH", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:XXHASH: ", p), err) }
+    if err := p.XXHASH.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.XXHASH), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:XXHASH: ", p), err) }
+  }
+  return err
+}
+
+func (p *BloomFilterHash) Equals(other *BloomFilterHash) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.XXHASH.Equals(other.XXHASH) { return false }
+  return true
+}
+
+func (p *BloomFilterHash) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterHash(%+v)", *p)
+}
+
+// The compression used in the Bloom filter.
+// 
+type Uncompressed struct {
+}
+
+func NewUncompressed() *Uncompressed {
+  return &Uncompressed{}
+}
+
+func (p *Uncompressed) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *Uncompressed) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "Uncompressed"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *Uncompressed) Equals(other *Uncompressed) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *Uncompressed) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("Uncompressed(%+v)", *p)
+}
+
+// Attributes:
+//  - UNCOMPRESSED
+type BloomFilterCompression struct {
+  UNCOMPRESSED *Uncompressed `thrift:"UNCOMPRESSED,1" db:"UNCOMPRESSED" json:"UNCOMPRESSED,omitempty"`
+}
+
+func NewBloomFilterCompression() *BloomFilterCompression {
+  return &BloomFilterCompression{}
+}
+
+var BloomFilterCompression_UNCOMPRESSED_DEFAULT *Uncompressed
+func (p *BloomFilterCompression) GetUNCOMPRESSED() *Uncompressed {
+  if !p.IsSetUNCOMPRESSED() {
+    return BloomFilterCompression_UNCOMPRESSED_DEFAULT
+  }
+return p.UNCOMPRESSED
+}
+func (p *BloomFilterCompression) CountSetFieldsBloomFilterCompression() int {
+  count := 0
+  if (p.IsSetUNCOMPRESSED()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *BloomFilterCompression) IsSetUNCOMPRESSED() bool {
+  return p.UNCOMPRESSED != nil
+}
+
+func (p *BloomFilterCompression) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterCompression)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.UNCOMPRESSED = &Uncompressed{}
+  if err := p.UNCOMPRESSED.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNCOMPRESSED), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterCompression) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsBloomFilterCompression(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterCompression"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterCompression) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetUNCOMPRESSED() {
+    if err := oprot.WriteFieldBegin(ctx, "UNCOMPRESSED", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:UNCOMPRESSED: ", p), err) }
+    if err := p.UNCOMPRESSED.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNCOMPRESSED), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:UNCOMPRESSED: ", p), err) }
+  }
+  return err
+}
+
+func (p *BloomFilterCompression) Equals(other *BloomFilterCompression) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.UNCOMPRESSED.Equals(other.UNCOMPRESSED) { return false }
+  return true
+}
+
+func (p *BloomFilterCompression) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterCompression(%+v)", *p)
+}
+
+// Bloom filter header is stored at beginning of Bloom filter data of each column
+// and followed by its bitset.
+// 
+// 
+// Attributes:
+//  - NumBytes: The size of bitset in bytes *
+//  - Algorithm: The algorithm for setting bits. *
+//  - Hash: The hash function used for Bloom filter. *
+//  - Compression: The compression used in the Bloom filter *
+type BloomFilterHeader struct {
+  NumBytes int32 `thrift:"numBytes,1,required" db:"numBytes" json:"numBytes"`
+  Algorithm *BloomFilterAlgorithm `thrift:"algorithm,2,required" db:"algorithm" json:"algorithm"`
+  Hash *BloomFilterHash `thrift:"hash,3,required" db:"hash" json:"hash"`
+  Compression *BloomFilterCompression `thrift:"compression,4,required" db:"compression" json:"compression"`
+}
+
+func NewBloomFilterHeader() *BloomFilterHeader {
+  return &BloomFilterHeader{}
+}
+
+
+func (p *BloomFilterHeader) GetNumBytes() int32 {
+  return p.NumBytes
+}
+var BloomFilterHeader_Algorithm_DEFAULT *BloomFilterAlgorithm
+func (p *BloomFilterHeader) GetAlgorithm() *BloomFilterAlgorithm {
+  if !p.IsSetAlgorithm() {
+    return BloomFilterHeader_Algorithm_DEFAULT
+  }
+return p.Algorithm
+}
+var BloomFilterHeader_Hash_DEFAULT *BloomFilterHash
+func (p *BloomFilterHeader) GetHash() *BloomFilterHash {
+  if !p.IsSetHash() {
+    return BloomFilterHeader_Hash_DEFAULT
+  }
+return p.Hash
+}
+var BloomFilterHeader_Compression_DEFAULT *BloomFilterCompression
+func (p *BloomFilterHeader) GetCompression() *BloomFilterCompression {
+  if !p.IsSetCompression() {
+    return BloomFilterHeader_Compression_DEFAULT
+  }
+return p.Compression
+}
+func (p *BloomFilterHeader) IsSetAlgorithm() bool {
+  return p.Algorithm != nil
+}
+
+func (p *BloomFilterHeader) IsSetHash() bool {
+  return p.Hash != nil
+}
+
+func (p *BloomFilterHeader) IsSetCompression() bool {
+  return p.Compression != nil
+}
+
+func (p *BloomFilterHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumBytes bool = false;
+  var issetAlgorithm bool = false;
+  var issetHash bool = false;
+  var issetCompression bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumBytes = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetAlgorithm = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetHash = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetCompression = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumBytes{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumBytes is not set"));
+  }
+  if !issetAlgorithm{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Algorithm is not set"));
+  }
+  if !issetHash{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Hash is not set"));
+  }
+  if !issetCompression{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Compression is not set"));
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumBytes = v
+}
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Algorithm = &BloomFilterAlgorithm{}
+  if err := p.Algorithm.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Algorithm), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Hash = &BloomFilterHash{}
+  if err := p.Hash.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Hash), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Compression = &BloomFilterCompression{}
+  if err := p.Compression.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Compression), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "numBytes", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:numBytes: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumBytes)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.numBytes (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:numBytes: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "algorithm", thrift.STRUCT, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:algorithm: ", p), err) }
+  if err := p.Algorithm.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Algorithm), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:algorithm: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "hash", thrift.STRUCT, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:hash: ", p), err) }
+  if err := p.Hash.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Hash), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:hash: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "compression", thrift.STRUCT, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:compression: ", p), err) }
+  if err := p.Compression.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Compression), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:compression: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) Equals(other *BloomFilterHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumBytes != other.NumBytes { return false }
+  if !p.Algorithm.Equals(other.Algorithm) { return false }
+  if !p.Hash.Equals(other.Hash) { return false }
+  if !p.Compression.Equals(other.Compression) { return false }
+  return true
+}
+
+func (p *BloomFilterHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterHeader(%+v)", *p)
+}
+
+// Attributes:
+//  - Type: the type of the page: indicates which of the *_header fields is set *
+//  - UncompressedPageSize: Uncompressed page size in bytes (not including this header) *
+//  - CompressedPageSize: Compressed (and potentially encrypted) page size in bytes, not including this header *
+//  - Crc: The 32bit CRC for the page, to be be calculated as follows:
+// - Using the standard CRC32 algorithm
+// - On the data only, i.e. this header should not be included. 'Data'
+//   hereby refers to the concatenation of the repetition levels, the
+//   definition levels and the column value, in this exact order.
+// - On the encoded versions of the repetition levels, definition levels and
+//   column values
+// - On the compressed versions of the repetition levels, definition levels
+//   and column values where possible;
+//   - For v1 data pages, the repetition levels, definition levels and column
+//     values are always compressed together. If a compression scheme is
+//     specified, the CRC shall be calculated on the compressed version of
+//     this concatenation. If no compression scheme is specified, the CRC
+//     shall be calculated on the uncompressed version of this concatenation.
+//   - For v2 data pages, the repetition levels and definition levels are
+//     handled separately from the data and are never compressed (only
+//     encoded). If a compression scheme is specified, the CRC shall be
+//     calculated on the concatenation of the uncompressed repetition levels,
+//     uncompressed definition levels and the compressed column values.
+//     If no compression scheme is specified, the CRC shall be calculated on
+//     the uncompressed concatenation.
+// If enabled, this allows for disabling checksumming in HDFS if only a few
+// pages need to be read.
+// 
+//  - DataPageHeader
+//  - IndexPageHeader
+//  - DictionaryPageHeader
+//  - DataPageHeaderV2
+type PageHeader struct {
+  Type PageType `thrift:"type,1,required" db:"type" json:"type"`
+  UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"`
+  CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"`
+  Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"`
+  DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"`
+  IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"`
+  DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"`
+  DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"`
+}
+
+func NewPageHeader() *PageHeader {
+  return &PageHeader{}
+}
+
+
+func (p *PageHeader) GetType() PageType {
+  return p.Type
+}
+
+func (p *PageHeader) GetUncompressedPageSize() int32 {
+  return p.UncompressedPageSize
+}
+
+func (p *PageHeader) GetCompressedPageSize() int32 {
+  return p.CompressedPageSize
+}
+var PageHeader_Crc_DEFAULT int32
+func (p *PageHeader) GetCrc() int32 {
+  if !p.IsSetCrc() {
+    return PageHeader_Crc_DEFAULT
+  }
+return *p.Crc
+}
+var PageHeader_DataPageHeader_DEFAULT *DataPageHeader
+func (p *PageHeader) GetDataPageHeader() *DataPageHeader {
+  if !p.IsSetDataPageHeader() {
+    return PageHeader_DataPageHeader_DEFAULT
+  }
+return p.DataPageHeader
+}
+var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader
+func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader {
+  if !p.IsSetIndexPageHeader() {
+    return PageHeader_IndexPageHeader_DEFAULT
+  }
+return p.IndexPageHeader
+}
+var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader
+func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader {
+  if !p.IsSetDictionaryPageHeader() {
+    return PageHeader_DictionaryPageHeader_DEFAULT
+  }
+return p.DictionaryPageHeader
+}
+var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2
+func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2 {
+  if !p.IsSetDataPageHeaderV2() {
+    return PageHeader_DataPageHeaderV2_DEFAULT
+  }
+return p.DataPageHeaderV2
+}
+func (p *PageHeader) IsSetCrc() bool {
+  return p.Crc != nil
+}
+
+func (p *PageHeader) IsSetDataPageHeader() bool {
+  return p.DataPageHeader != nil
+}
+
+func (p *PageHeader) IsSetIndexPageHeader() bool {
+  return p.IndexPageHeader != nil
+}
+
+func (p *PageHeader) IsSetDictionaryPageHeader() bool {
+  return p.DictionaryPageHeader != nil
+}
+
+func (p *PageHeader) IsSetDataPageHeaderV2() bool {
+  return p.DataPageHeaderV2 != nil
+}
+
+func (p *PageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetType bool = false;
+  var issetUncompressedPageSize bool = false;
+  var issetCompressedPageSize bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetType = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetUncompressedPageSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetCompressedPageSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetType{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set"));
+  }
+  if !issetUncompressedPageSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set"));
+  }
+  if !issetCompressedPageSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set"));
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := PageType(v)
+  p.Type = temp
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.UncompressedPageSize = v
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.CompressedPageSize = v
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.Crc = &v
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DataPageHeader = &DataPageHeader{}
+  if err := p.DataPageHeader.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err)
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  p.IndexPageHeader = &IndexPageHeader{}
+  if err := p.IndexPageHeader.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err)
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DictionaryPageHeader = &DictionaryPageHeader{}
+  if err := p.DictionaryPageHeader.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err)
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DataPageHeaderV2 = &DataPageHeaderV2{
+  IsCompressed: true,
+}
+  if err := p.DataPageHeaderV2.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err)
+  }
+  return nil
+}
+
+func (p *PageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "PageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *PageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) }
+  return err
+}
+
+func (p *PageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "uncompressed_page_size", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.UncompressedPageSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) }
+  return err
+}
+
+func (p *PageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) }
+  return err
+}
+
+func (p *PageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetCrc() {
+    if err := oprot.WriteFieldBegin(ctx, "crc", thrift.I32, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Crc)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDataPageHeader() {
+    if err := oprot.WriteFieldBegin(ctx, "data_page_header", thrift.STRUCT, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) }
+    if err := p.DataPageHeader.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIndexPageHeader() {
+    if err := oprot.WriteFieldBegin(ctx, "index_page_header", thrift.STRUCT, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) }
+    if err := p.IndexPageHeader.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDictionaryPageHeader() {
+    if err := oprot.WriteFieldBegin(ctx, "dictionary_page_header", thrift.STRUCT, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) }
+    if err := p.DictionaryPageHeader.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDataPageHeaderV2() {
+    if err := oprot.WriteFieldBegin(ctx, "data_page_header_v2", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) }
+    if err := p.DataPageHeaderV2.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) Equals(other *PageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Type != other.Type { return false }
+  if p.UncompressedPageSize != other.UncompressedPageSize { return false }
+  if p.CompressedPageSize != other.CompressedPageSize { return false }
+  if p.Crc != other.Crc {
+    if p.Crc == nil || other.Crc == nil {
+      return false
+    }
+    if (*p.Crc) != (*other.Crc) { return false }
+  }
+  if !p.DataPageHeader.Equals(other.DataPageHeader) { return false }
+  if !p.IndexPageHeader.Equals(other.IndexPageHeader) { return false }
+  if !p.DictionaryPageHeader.Equals(other.DictionaryPageHeader) { return false }
+  if !p.DataPageHeaderV2.Equals(other.DataPageHeaderV2) { return false }
+  return true
+}
+
+func (p *PageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("PageHeader(%+v)", *p)
+}
+
+// Wrapper struct to store key values
+// 
+// Attributes:
+//  - Key
+//  - Value
+type KeyValue struct {
+  Key string `thrift:"key,1,required" db:"key" json:"key"`
+  Value *string `thrift:"value,2" db:"value" json:"value,omitempty"`
+}
+
+func NewKeyValue() *KeyValue {
+  return &KeyValue{}
+}
+
+
+func (p *KeyValue) GetKey() string {
+  return p.Key
+}
+var KeyValue_Value_DEFAULT string
+func (p *KeyValue) GetValue() string {
+  if !p.IsSetValue() {
+    return KeyValue_Value_DEFAULT
+  }
+return *p.Value
+}
+func (p *KeyValue) IsSetValue() bool {
+  return p.Value != nil
+}
+
+func (p *KeyValue) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetKey bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetKey = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetKey{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set"));
+  }
+  return nil
+}
+
+func (p *KeyValue)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Key = v
+}
+  return nil
+}
+
+func (p *KeyValue)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Value = &v
+}
+  return nil
+}
+
+func (p *KeyValue) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "KeyValue"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *KeyValue) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "key", thrift.STRING, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) }
+  if err := oprot.WriteString(ctx, string(p.Key)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) }
+  return err
+}
+
+func (p *KeyValue) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetValue() {
+    if err := oprot.WriteFieldBegin(ctx, "value", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) }
+    if err := oprot.WriteString(ctx, string(*p.Value)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) }
+  }
+  return err
+}
+
+func (p *KeyValue) Equals(other *KeyValue) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Key != other.Key { return false }
+  if p.Value != other.Value {
+    if p.Value == nil || other.Value == nil {
+      return false
+    }
+    if (*p.Value) != (*other.Value) { return false }
+  }
+  return true
+}
+
+func (p *KeyValue) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("KeyValue(%+v)", *p)
+}
+
+// Wrapper struct to specify sort order
+// 
+// Attributes:
+//  - ColumnIdx: The column index (in this row group) *
+//  - Descending: If true, indicates this column is sorted in descending order. *
+//  - NullsFirst: If true, nulls will come before non-null values, otherwise,
+// nulls go at the end.
+type SortingColumn struct {
+  ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"`
+  Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"`
+  NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"`
+}
+
+func NewSortingColumn() *SortingColumn {
+  return &SortingColumn{}
+}
+
+
+func (p *SortingColumn) GetColumnIdx() int32 {
+  return p.ColumnIdx
+}
+
+func (p *SortingColumn) GetDescending() bool {
+  return p.Descending
+}
+
+func (p *SortingColumn) GetNullsFirst() bool {
+  return p.NullsFirst
+}
+func (p *SortingColumn) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetColumnIdx bool = false;
+  var issetDescending bool = false;
+  var issetNullsFirst bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetColumnIdx = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetDescending = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNullsFirst = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetColumnIdx{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set"));
+  }
+  if !issetDescending{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set"));
+  }
+  if !issetNullsFirst{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set"));
+  }
+  return nil
+}
+
+func (p *SortingColumn)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.ColumnIdx = v
+}
+  return nil
+}
+
+func (p *SortingColumn)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Descending = v
+}
+  return nil
+}
+
+func (p *SortingColumn)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NullsFirst = v
+}
+  return nil
+}
+
+func (p *SortingColumn) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "SortingColumn"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *SortingColumn) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "column_idx", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.ColumnIdx)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) }
+  return err
+}
+
+func (p *SortingColumn) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "descending", thrift.BOOL, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.Descending)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) }
+  return err
+}
+
+func (p *SortingColumn) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "nulls_first", thrift.BOOL, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.NullsFirst)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) }
+  return err
+}
+
+func (p *SortingColumn) Equals(other *SortingColumn) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.ColumnIdx != other.ColumnIdx { return false }
+  if p.Descending != other.Descending { return false }
+  if p.NullsFirst != other.NullsFirst { return false }
+  return true
+}
+
+func (p *SortingColumn) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("SortingColumn(%+v)", *p)
+}
+
+// statistics of a given page type and encoding
+// 
+// Attributes:
+//  - PageType: the page type (data/dic/...) *
+//  - Encoding: encoding of the page *
+//  - Count: number of pages of this type with this encoding *
+type PageEncodingStats struct {
+  PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"`
+  Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
+  Count int32 `thrift:"count,3,required" db:"count" json:"count"`
+}
+
+func NewPageEncodingStats() *PageEncodingStats {
+  return &PageEncodingStats{}
+}
+
+
+func (p *PageEncodingStats) GetPageType() PageType {
+  return p.PageType
+}
+
+func (p *PageEncodingStats) GetEncoding() Encoding {
+  return p.Encoding
+}
+
+func (p *PageEncodingStats) GetCount() int32 {
+  return p.Count
+}
+func (p *PageEncodingStats) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetPageType bool = false;
+  var issetEncoding bool = false;
+  var issetCount bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetPageType = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetCount = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetPageType{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  if !issetCount{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set"));
+  }
+  return nil
+}
+
+func (p *PageEncodingStats)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := PageType(v)
+  p.PageType = temp
+}
+  return nil
+}
+
+func (p *PageEncodingStats)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *PageEncodingStats)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.Count = v
+}
+  return nil
+}
+
+func (p *PageEncodingStats) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "PageEncodingStats"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *PageEncodingStats) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "page_type", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.PageType)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) }
+  return err
+}
+
+func (p *PageEncodingStats) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) }
+  return err
+}
+
+func (p *PageEncodingStats) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "count", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Count)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) }
+  return err
+}
+
+func (p *PageEncodingStats) Equals(other *PageEncodingStats) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.PageType != other.PageType { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.Count != other.Count { return false }
+  return true
+}
+
+func (p *PageEncodingStats) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("PageEncodingStats(%+v)", *p)
+}
+
+// Description for column metadata
+// 
+// Attributes:
+//  - Type: Type of this column *
+//  - Encodings: Set of all encodings used for this column. The purpose is to validate
+// whether we can decode those pages. *
+//  - PathInSchema: Path in schema *
+//  - Codec: Compression codec *
+//  - NumValues: Number of values in this column *
+//  - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) *
+//  - TotalCompressedSize: total byte size of all compressed, and potentially encrypted, pages
+// in this column chunk (including the headers) *
+//  - KeyValueMetadata: Optional key/value metadata *
+//  - DataPageOffset: Byte offset from beginning of file to first data page *
+//  - IndexPageOffset: Byte offset from beginning of file to root index page *
+//  - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page *
+//  - Statistics: optional statistics for this column chunk
+//  - EncodingStats: Set of all encodings used for pages in this column chunk.
+// This information can be used to determine if all data pages are
+// dictionary encoded for example *
+//  - BloomFilterOffset: Byte offset from beginning of file to Bloom filter data. *
+type ColumnMetaData struct {
+  Type Type `thrift:"type,1,required" db:"type" json:"type"`
+  Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"`
+  PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"`
+  Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"`
+  NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"`
+  TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"`
+  TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"`
+  KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
+  DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"`
+  IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"`
+  DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"`
+  Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"`
+  EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"`
+  BloomFilterOffset *int64 `thrift:"bloom_filter_offset,14" db:"bloom_filter_offset" json:"bloom_filter_offset,omitempty"`
+}
+
+func NewColumnMetaData() *ColumnMetaData {
+  return &ColumnMetaData{}
+}
+
+
+func (p *ColumnMetaData) GetType() Type {
+  return p.Type
+}
+
+func (p *ColumnMetaData) GetEncodings() []Encoding {
+  return p.Encodings
+}
+
+func (p *ColumnMetaData) GetPathInSchema() []string {
+  return p.PathInSchema
+}
+
+func (p *ColumnMetaData) GetCodec() CompressionCodec {
+  return p.Codec
+}
+
+func (p *ColumnMetaData) GetNumValues() int64 {
+  return p.NumValues
+}
+
+func (p *ColumnMetaData) GetTotalUncompressedSize() int64 {
+  return p.TotalUncompressedSize
+}
+
+func (p *ColumnMetaData) GetTotalCompressedSize() int64 {
+  return p.TotalCompressedSize
+}
+var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue
+
+func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue {
+  return p.KeyValueMetadata
+}
+
+func (p *ColumnMetaData) GetDataPageOffset() int64 {
+  return p.DataPageOffset
+}
+var ColumnMetaData_IndexPageOffset_DEFAULT int64
+func (p *ColumnMetaData) GetIndexPageOffset() int64 {
+  if !p.IsSetIndexPageOffset() {
+    return ColumnMetaData_IndexPageOffset_DEFAULT
+  }
+return *p.IndexPageOffset
+}
+var ColumnMetaData_DictionaryPageOffset_DEFAULT int64
+func (p *ColumnMetaData) GetDictionaryPageOffset() int64 {
+  if !p.IsSetDictionaryPageOffset() {
+    return ColumnMetaData_DictionaryPageOffset_DEFAULT
+  }
+return *p.DictionaryPageOffset
+}
+var ColumnMetaData_Statistics_DEFAULT *Statistics
+func (p *ColumnMetaData) GetStatistics() *Statistics {
+  if !p.IsSetStatistics() {
+    return ColumnMetaData_Statistics_DEFAULT
+  }
+return p.Statistics
+}
+var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats
+
+func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats {
+  return p.EncodingStats
+}
+var ColumnMetaData_BloomFilterOffset_DEFAULT int64
+func (p *ColumnMetaData) GetBloomFilterOffset() int64 {
+  if !p.IsSetBloomFilterOffset() {
+    return ColumnMetaData_BloomFilterOffset_DEFAULT
+  }
+return *p.BloomFilterOffset
+}
+func (p *ColumnMetaData) IsSetKeyValueMetadata() bool {
+  return p.KeyValueMetadata != nil
+}
+
+func (p *ColumnMetaData) IsSetIndexPageOffset() bool {
+  return p.IndexPageOffset != nil
+}
+
+func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool {
+  return p.DictionaryPageOffset != nil
+}
+
+func (p *ColumnMetaData) IsSetStatistics() bool {
+  return p.Statistics != nil
+}
+
+func (p *ColumnMetaData) IsSetEncodingStats() bool {
+  return p.EncodingStats != nil
+}
+
+func (p *ColumnMetaData) IsSetBloomFilterOffset() bool {
+  return p.BloomFilterOffset != nil
+}
+
+func (p *ColumnMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetType bool = false;
+  var issetEncodings bool = false;
+  var issetPathInSchema bool = false;
+  var issetCodec bool = false;
+  var issetNumValues bool = false;
+  var issetTotalUncompressedSize bool = false;
+  var issetTotalCompressedSize bool = false;
+  var issetDataPageOffset bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetType = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncodings = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetPathInSchema = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetCodec = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+        issetTotalUncompressedSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+        issetTotalCompressedSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+        issetDataPageOffset = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 10:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField10(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 11:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField11(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 12:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField12(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 13:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField13(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 14:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField14(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetType{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set"));
+  }
+  if !issetEncodings{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set"));
+  }
+  if !issetPathInSchema{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set"));
+  }
+  if !issetCodec{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set"));
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetTotalUncompressedSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set"));
+  }
+  if !issetTotalCompressedSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set"));
+  }
+  if !issetDataPageOffset{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set"));
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := Type(v)
+  p.Type = temp
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]Encoding, 0, size)
+  p.Encodings =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem0 Encoding
+    if v, err := iprot.ReadI32(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    temp := Encoding(v)
+    _elem0 = temp
+}
+    p.Encodings = append(p.Encodings, _elem0)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]string, 0, size)
+  p.PathInSchema =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem1 string
+    if v, err := iprot.ReadString(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem1 = v
+}
+    p.PathInSchema = append(p.PathInSchema, _elem1)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := CompressionCodec(v)
+  p.Codec = temp
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.TotalUncompressedSize = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.TotalCompressedSize = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*KeyValue, 0, size)
+  p.KeyValueMetadata =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem2 := &KeyValue{}
+    if err := _elem2.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err)
+    }
+    p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.DataPageOffset = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField10(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 10: ", err)
+} else {
+  p.IndexPageOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField11(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 11: ", err)
+} else {
+  p.DictionaryPageOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField12(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Statistics = &Statistics{}
+  if err := p.Statistics.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField13(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*PageEncodingStats, 0, size)
+  p.EncodingStats =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem3 := &PageEncodingStats{}
+    if err := _elem3.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err)
+    }
+    p.EncodingStats = append(p.EncodingStats, _elem3)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField14(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 14: ", err)
+} else {
+  p.BloomFilterOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ColumnMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+    if err := p.writeField10(ctx, oprot); err != nil { return err }
+    if err := p.writeField11(ctx, oprot); err != nil { return err }
+    if err := p.writeField12(ctx, oprot); err != nil { return err }
+    if err := p.writeField13(ctx, oprot); err != nil { return err }
+    if err := p.writeField14(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encodings", thrift.LIST, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.I32, len(p.Encodings)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.Encodings {
+    if err := oprot.WriteI32(ctx, int32(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.PathInSchema {
+    if err := oprot.WriteString(ctx, string(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "codec", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Codec)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I64, 5); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "total_uncompressed_size", thrift.I64, 6); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.TotalUncompressedSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 7); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.TotalCompressedSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyValueMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.KeyValueMetadata {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "data_page_offset", thrift.I64, 9); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.DataPageOffset)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIndexPageOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "index_page_offset", thrift.I64, 10); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.IndexPageOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDictionaryPageOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "dictionary_page_offset", thrift.I64, 11); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.DictionaryPageOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetStatistics() {
+    if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 12); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) }
+    if err := p.Statistics.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetEncodingStats() {
+    if err := oprot.WriteFieldBegin(ctx, "encoding_stats", thrift.LIST, 13); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.EncodingStats)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.EncodingStats {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetBloomFilterOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "bloom_filter_offset", thrift.I64, 14); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:bloom_filter_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.BloomFilterOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.bloom_filter_offset (14) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 14:bloom_filter_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) Equals(other *ColumnMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Type != other.Type { return false }
+  if len(p.Encodings) != len(other.Encodings) { return false }
+  for i, _tgt := range p.Encodings {
+    _src4 := other.Encodings[i]
+    if _tgt != _src4 { return false }
+  }
+  if len(p.PathInSchema) != len(other.PathInSchema) { return false }
+  for i, _tgt := range p.PathInSchema {
+    _src5 := other.PathInSchema[i]
+    if _tgt != _src5 { return false }
+  }
+  if p.Codec != other.Codec { return false }
+  if p.NumValues != other.NumValues { return false }
+  if p.TotalUncompressedSize != other.TotalUncompressedSize { return false }
+  if p.TotalCompressedSize != other.TotalCompressedSize { return false }
+  if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false }
+  for i, _tgt := range p.KeyValueMetadata {
+    _src6 := other.KeyValueMetadata[i]
+    if !_tgt.Equals(_src6) { return false }
+  }
+  if p.DataPageOffset != other.DataPageOffset { return false }
+  if p.IndexPageOffset != other.IndexPageOffset {
+    if p.IndexPageOffset == nil || other.IndexPageOffset == nil {
+      return false
+    }
+    if (*p.IndexPageOffset) != (*other.IndexPageOffset) { return false }
+  }
+  if p.DictionaryPageOffset != other.DictionaryPageOffset {
+    if p.DictionaryPageOffset == nil || other.DictionaryPageOffset == nil {
+      return false
+    }
+    if (*p.DictionaryPageOffset) != (*other.DictionaryPageOffset) { return false }
+  }
+  if !p.Statistics.Equals(other.Statistics) { return false }
+  if len(p.EncodingStats) != len(other.EncodingStats) { return false }
+  for i, _tgt := range p.EncodingStats {
+    _src7 := other.EncodingStats[i]
+    if !_tgt.Equals(_src7) { return false }
+  }
+  if p.BloomFilterOffset != other.BloomFilterOffset {
+    if p.BloomFilterOffset == nil || other.BloomFilterOffset == nil {
+      return false
+    }
+    if (*p.BloomFilterOffset) != (*other.BloomFilterOffset) { return false }
+  }
+  return true
+}
+
+func (p *ColumnMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnMetaData(%+v)", *p)
+}
+
+type EncryptionWithFooterKey struct {
+}
+
+func NewEncryptionWithFooterKey() *EncryptionWithFooterKey {
+  return &EncryptionWithFooterKey{}
+}
+
+func (p *EncryptionWithFooterKey) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *EncryptionWithFooterKey) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "EncryptionWithFooterKey"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EncryptionWithFooterKey) Equals(other *EncryptionWithFooterKey) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *EncryptionWithFooterKey) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EncryptionWithFooterKey(%+v)", *p)
+}
+
+// Attributes:
+//  - PathInSchema: Column path in schema *
+//  - KeyMetadata: Retrieval metadata of column encryption key *
+type EncryptionWithColumnKey struct {
+  PathInSchema []string `thrift:"path_in_schema,1,required" db:"path_in_schema" json:"path_in_schema"`
+  KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"`
+}
+
+func NewEncryptionWithColumnKey() *EncryptionWithColumnKey {
+  return &EncryptionWithColumnKey{}
+}
+
+
+func (p *EncryptionWithColumnKey) GetPathInSchema() []string {
+  return p.PathInSchema
+}
+var EncryptionWithColumnKey_KeyMetadata_DEFAULT []byte
+
+func (p *EncryptionWithColumnKey) GetKeyMetadata() []byte {
+  return p.KeyMetadata
+}
+func (p *EncryptionWithColumnKey) IsSetKeyMetadata() bool {
+  return p.KeyMetadata != nil
+}
+
+func (p *EncryptionWithColumnKey) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetPathInSchema bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetPathInSchema = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetPathInSchema{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set"));
+  }
+  return nil
+}
+
+func (p *EncryptionWithColumnKey)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]string, 0, size)
+  p.PathInSchema =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem8 string
+    if v, err := iprot.ReadString(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem8 = v
+}
+    p.PathInSchema = append(p.PathInSchema, _elem8)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *EncryptionWithColumnKey)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.KeyMetadata = v
+}
+  return nil
+}
+
+func (p *EncryptionWithColumnKey) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "EncryptionWithColumnKey"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EncryptionWithColumnKey) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:path_in_schema: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.PathInSchema {
+    if err := oprot.WriteString(ctx, string(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:path_in_schema: ", p), err) }
+  return err
+}
+
+func (p *EncryptionWithColumnKey) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *EncryptionWithColumnKey) Equals(other *EncryptionWithColumnKey) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.PathInSchema) != len(other.PathInSchema) { return false }
+  for i, _tgt := range p.PathInSchema {
+    _src9 := other.PathInSchema[i]
+    if _tgt != _src9 { return false }
+  }
+  if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false }
+  return true
+}
+
+func (p *EncryptionWithColumnKey) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EncryptionWithColumnKey(%+v)", *p)
+}
+
+// Attributes:
+//  - ENCRYPTION_WITH_FOOTER_KEY
+//  - ENCRYPTION_WITH_COLUMN_KEY
+type ColumnCryptoMetaData struct {
+  ENCRYPTION_WITH_FOOTER_KEY *EncryptionWithFooterKey `thrift:"ENCRYPTION_WITH_FOOTER_KEY,1" db:"ENCRYPTION_WITH_FOOTER_KEY" json:"ENCRYPTION_WITH_FOOTER_KEY,omitempty"`
+  ENCRYPTION_WITH_COLUMN_KEY *EncryptionWithColumnKey `thrift:"ENCRYPTION_WITH_COLUMN_KEY,2" db:"ENCRYPTION_WITH_COLUMN_KEY" json:"ENCRYPTION_WITH_COLUMN_KEY,omitempty"`
+}
+
+func NewColumnCryptoMetaData() *ColumnCryptoMetaData {
+  return &ColumnCryptoMetaData{}
+}
+
+var ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT *EncryptionWithFooterKey
+func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_FOOTER_KEY() *EncryptionWithFooterKey {
+  if !p.IsSetENCRYPTION_WITH_FOOTER_KEY() {
+    return ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT
+  }
+return p.ENCRYPTION_WITH_FOOTER_KEY
+}
+var ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT *EncryptionWithColumnKey
+func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_COLUMN_KEY() *EncryptionWithColumnKey {
+  if !p.IsSetENCRYPTION_WITH_COLUMN_KEY() {
+    return ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT
+  }
+return p.ENCRYPTION_WITH_COLUMN_KEY
+}
+func (p *ColumnCryptoMetaData) CountSetFieldsColumnCryptoMetaData() int {
+  count := 0
+  if (p.IsSetENCRYPTION_WITH_FOOTER_KEY()) {
+    count++
+  }
+  if (p.IsSetENCRYPTION_WITH_COLUMN_KEY()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_FOOTER_KEY() bool {
+  return p.ENCRYPTION_WITH_FOOTER_KEY != nil
+}
+
+func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_COLUMN_KEY() bool {
+  return p.ENCRYPTION_WITH_COLUMN_KEY != nil
+}
+
+func (p *ColumnCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.ENCRYPTION_WITH_FOOTER_KEY = &EncryptionWithFooterKey{}
+  if err := p.ENCRYPTION_WITH_FOOTER_KEY.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err)
+  }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.ENCRYPTION_WITH_COLUMN_KEY = &EncryptionWithColumnKey{}
+  if err := p.ENCRYPTION_WITH_COLUMN_KEY.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err)
+  }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsColumnCryptoMetaData(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "ColumnCryptoMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetENCRYPTION_WITH_FOOTER_KEY() {
+    if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_FOOTER_KEY", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) }
+    if err := p.ENCRYPTION_WITH_FOOTER_KEY.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetENCRYPTION_WITH_COLUMN_KEY() {
+    if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_COLUMN_KEY", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) }
+    if err := p.ENCRYPTION_WITH_COLUMN_KEY.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnCryptoMetaData) Equals(other *ColumnCryptoMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.ENCRYPTION_WITH_FOOTER_KEY.Equals(other.ENCRYPTION_WITH_FOOTER_KEY) { return false }
+  if !p.ENCRYPTION_WITH_COLUMN_KEY.Equals(other.ENCRYPTION_WITH_COLUMN_KEY) { return false }
+  return true
+}
+
+func (p *ColumnCryptoMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnCryptoMetaData(%+v)", *p)
+}
+
+// Attributes:
+//  - FilePath: File where column data is stored.  If not set, assumed to be same file as
+// metadata.  This path is relative to the current file.
+// 
+//  - FileOffset: Byte offset in file_path to the ColumnMetaData *
+//  - MetaData: Column metadata for this chunk. This is the same content as what is at
+// file_path/file_offset.  Having it here has it replicated in the file
+// metadata.
+// 
+//  - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex *
+//  - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes *
+//  - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex *
+//  - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes *
+//  - CryptoMetadata: Crypto metadata of encrypted columns *
+//  - EncryptedColumnMetadata: Encrypted column metadata for this chunk *
+type ColumnChunk struct {
+  FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"`
+  FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"`
+  MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"`
+  OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"`
+  OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"`
+  ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"`
+  ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"`
+  CryptoMetadata *ColumnCryptoMetaData `thrift:"crypto_metadata,8" db:"crypto_metadata" json:"crypto_metadata,omitempty"`
+  EncryptedColumnMetadata []byte `thrift:"encrypted_column_metadata,9" db:"encrypted_column_metadata" json:"encrypted_column_metadata,omitempty"`
+}
+
+func NewColumnChunk() *ColumnChunk {
+  return &ColumnChunk{}
+}
+
+var ColumnChunk_FilePath_DEFAULT string
+func (p *ColumnChunk) GetFilePath() string {
+  if !p.IsSetFilePath() {
+    return ColumnChunk_FilePath_DEFAULT
+  }
+return *p.FilePath
+}
+
+func (p *ColumnChunk) GetFileOffset() int64 {
+  return p.FileOffset
+}
+var ColumnChunk_MetaData_DEFAULT *ColumnMetaData
+func (p *ColumnChunk) GetMetaData() *ColumnMetaData {
+  if !p.IsSetMetaData() {
+    return ColumnChunk_MetaData_DEFAULT
+  }
+return p.MetaData
+}
+var ColumnChunk_OffsetIndexOffset_DEFAULT int64
+func (p *ColumnChunk) GetOffsetIndexOffset() int64 {
+  if !p.IsSetOffsetIndexOffset() {
+    return ColumnChunk_OffsetIndexOffset_DEFAULT
+  }
+return *p.OffsetIndexOffset
+}
+var ColumnChunk_OffsetIndexLength_DEFAULT int32
+func (p *ColumnChunk) GetOffsetIndexLength() int32 {
+  if !p.IsSetOffsetIndexLength() {
+    return ColumnChunk_OffsetIndexLength_DEFAULT
+  }
+return *p.OffsetIndexLength
+}
+var ColumnChunk_ColumnIndexOffset_DEFAULT int64
+func (p *ColumnChunk) GetColumnIndexOffset() int64 {
+  if !p.IsSetColumnIndexOffset() {
+    return ColumnChunk_ColumnIndexOffset_DEFAULT
+  }
+return *p.ColumnIndexOffset
+}
+var ColumnChunk_ColumnIndexLength_DEFAULT int32
+func (p *ColumnChunk) GetColumnIndexLength() int32 {
+  if !p.IsSetColumnIndexLength() {
+    return ColumnChunk_ColumnIndexLength_DEFAULT
+  }
+return *p.ColumnIndexLength
+}
+var ColumnChunk_CryptoMetadata_DEFAULT *ColumnCryptoMetaData
+func (p *ColumnChunk) GetCryptoMetadata() *ColumnCryptoMetaData {
+  if !p.IsSetCryptoMetadata() {
+    return ColumnChunk_CryptoMetadata_DEFAULT
+  }
+return p.CryptoMetadata
+}
+var ColumnChunk_EncryptedColumnMetadata_DEFAULT []byte
+
+func (p *ColumnChunk) GetEncryptedColumnMetadata() []byte {
+  return p.EncryptedColumnMetadata
+}
+func (p *ColumnChunk) IsSetFilePath() bool {
+  return p.FilePath != nil
+}
+
+func (p *ColumnChunk) IsSetMetaData() bool {
+  return p.MetaData != nil
+}
+
+func (p *ColumnChunk) IsSetOffsetIndexOffset() bool {
+  return p.OffsetIndexOffset != nil
+}
+
+func (p *ColumnChunk) IsSetOffsetIndexLength() bool {
+  return p.OffsetIndexLength != nil
+}
+
+func (p *ColumnChunk) IsSetColumnIndexOffset() bool {
+  return p.ColumnIndexOffset != nil
+}
+
+func (p *ColumnChunk) IsSetColumnIndexLength() bool {
+  return p.ColumnIndexLength != nil
+}
+
+func (p *ColumnChunk) IsSetCryptoMetadata() bool {
+  return p.CryptoMetadata != nil
+}
+
+func (p *ColumnChunk) IsSetEncryptedColumnMetadata() bool {
+  return p.EncryptedColumnMetadata != nil
+}
+
+func (p *ColumnChunk) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetFileOffset bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetFileOffset = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetFileOffset{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set"));
+  }
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.FilePath = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.FileOffset = v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MetaData = &ColumnMetaData{}
+  if err := p.MetaData.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err)
+  }
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.OffsetIndexOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.OffsetIndexLength = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.ColumnIndexOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.ColumnIndexLength = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.CryptoMetadata = &ColumnCryptoMetaData{}
+  if err := p.CryptoMetadata.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.CryptoMetadata), err)
+  }
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.EncryptedColumnMetadata = v
+}
+  return nil
+}
+
+func (p *ColumnChunk) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ColumnChunk"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnChunk) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFilePath() {
+    if err := oprot.WriteFieldBegin(ctx, "file_path", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) }
+    if err := oprot.WriteString(ctx, string(*p.FilePath)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.FileOffset)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) }
+  return err
+}
+
+func (p *ColumnChunk) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMetaData() {
+    if err := oprot.WriteFieldBegin(ctx, "meta_data", thrift.STRUCT, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) }
+    if err := p.MetaData.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetOffsetIndexOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "offset_index_offset", thrift.I64, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.OffsetIndexOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetOffsetIndexLength() {
+    if err := oprot.WriteFieldBegin(ctx, "offset_index_length", thrift.I32, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.OffsetIndexLength)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetColumnIndexOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "column_index_offset", thrift.I64, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.ColumnIndexOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetColumnIndexLength() {
+    if err := oprot.WriteFieldBegin(ctx, "column_index_length", thrift.I32, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.ColumnIndexLength)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetCryptoMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "crypto_metadata", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:crypto_metadata: ", p), err) }
+    if err := p.CryptoMetadata.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.CryptoMetadata), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:crypto_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetEncryptedColumnMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "encrypted_column_metadata", thrift.STRING, 9); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:encrypted_column_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.EncryptedColumnMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.encrypted_column_metadata (9) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 9:encrypted_column_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) Equals(other *ColumnChunk) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.FilePath != other.FilePath {
+    if p.FilePath == nil || other.FilePath == nil {
+      return false
+    }
+    if (*p.FilePath) != (*other.FilePath) { return false }
+  }
+  if p.FileOffset != other.FileOffset { return false }
+  if !p.MetaData.Equals(other.MetaData) { return false }
+  if p.OffsetIndexOffset != other.OffsetIndexOffset {
+    if p.OffsetIndexOffset == nil || other.OffsetIndexOffset == nil {
+      return false
+    }
+    if (*p.OffsetIndexOffset) != (*other.OffsetIndexOffset) { return false }
+  }
+  if p.OffsetIndexLength != other.OffsetIndexLength {
+    if p.OffsetIndexLength == nil || other.OffsetIndexLength == nil {
+      return false
+    }
+    if (*p.OffsetIndexLength) != (*other.OffsetIndexLength) { return false }
+  }
+  if p.ColumnIndexOffset != other.ColumnIndexOffset {
+    if p.ColumnIndexOffset == nil || other.ColumnIndexOffset == nil {
+      return false
+    }
+    if (*p.ColumnIndexOffset) != (*other.ColumnIndexOffset) { return false }
+  }
+  if p.ColumnIndexLength != other.ColumnIndexLength {
+    if p.ColumnIndexLength == nil || other.ColumnIndexLength == nil {
+      return false
+    }
+    if (*p.ColumnIndexLength) != (*other.ColumnIndexLength) { return false }
+  }
+  if !p.CryptoMetadata.Equals(other.CryptoMetadata) { return false }
+  if bytes.Compare(p.EncryptedColumnMetadata, other.EncryptedColumnMetadata) != 0 { return false }
+  return true
+}
+
+func (p *ColumnChunk) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnChunk(%+v)", *p)
+}
+
+// Attributes:
+//  - Columns: Metadata for each column chunk in this row group.
+// This list must have the same order as the SchemaElement list in FileMetaData.
+// 
+//  - TotalByteSize: Total byte size of all the uncompressed column data in this row group *
+//  - NumRows: Number of rows in this row group *
+//  - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup.
+// The sorting columns can be a subset of all the columns.
+//  - FileOffset: Byte offset from beginning of file to first page (data or dictionary)
+// in this row group *
+//  - TotalCompressedSize: Total byte size of all compressed (and potentially encrypted) column data
+// in this row group *
+//  - Ordinal: Row group ordinal in the file *
+type RowGroup struct {
+  Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"`
+  TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"`
+  NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
+  SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"`
+  FileOffset *int64 `thrift:"file_offset,5" db:"file_offset" json:"file_offset,omitempty"`
+  TotalCompressedSize *int64 `thrift:"total_compressed_size,6" db:"total_compressed_size" json:"total_compressed_size,omitempty"`
+  Ordinal *int16 `thrift:"ordinal,7" db:"ordinal" json:"ordinal,omitempty"`
+}
+
+func NewRowGroup() *RowGroup {
+  return &RowGroup{}
+}
+
+
+func (p *RowGroup) GetColumns() []*ColumnChunk {
+  return p.Columns
+}
+
+func (p *RowGroup) GetTotalByteSize() int64 {
+  return p.TotalByteSize
+}
+
+func (p *RowGroup) GetNumRows() int64 {
+  return p.NumRows
+}
+var RowGroup_SortingColumns_DEFAULT []*SortingColumn
+
+func (p *RowGroup) GetSortingColumns() []*SortingColumn {
+  return p.SortingColumns
+}
+var RowGroup_FileOffset_DEFAULT int64
+func (p *RowGroup) GetFileOffset() int64 {
+  if !p.IsSetFileOffset() {
+    return RowGroup_FileOffset_DEFAULT
+  }
+return *p.FileOffset
+}
+var RowGroup_TotalCompressedSize_DEFAULT int64
+func (p *RowGroup) GetTotalCompressedSize() int64 {
+  if !p.IsSetTotalCompressedSize() {
+    return RowGroup_TotalCompressedSize_DEFAULT
+  }
+return *p.TotalCompressedSize
+}
+var RowGroup_Ordinal_DEFAULT int16
+func (p *RowGroup) GetOrdinal() int16 {
+  if !p.IsSetOrdinal() {
+    return RowGroup_Ordinal_DEFAULT
+  }
+return *p.Ordinal
+}
+func (p *RowGroup) IsSetSortingColumns() bool {
+  return p.SortingColumns != nil
+}
+
+func (p *RowGroup) IsSetFileOffset() bool {
+  return p.FileOffset != nil
+}
+
+func (p *RowGroup) IsSetTotalCompressedSize() bool {
+  return p.TotalCompressedSize != nil
+}
+
+func (p *RowGroup) IsSetOrdinal() bool {
+  return p.Ordinal != nil
+}
+
+func (p *RowGroup) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetColumns bool = false;
+  var issetTotalByteSize bool = false;
+  var issetNumRows bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetColumns = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetTotalByteSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumRows = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I16 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetColumns{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set"));
+  }
+  if !issetTotalByteSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set"));
+  }
+  if !issetNumRows{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set"));
+  }
+  return nil
+}
+
+func (p *RowGroup)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*ColumnChunk, 0, size)
+  p.Columns =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem10 := &ColumnChunk{}
+    if err := _elem10.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem10), err)
+    }
+    p.Columns = append(p.Columns, _elem10)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *RowGroup)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.TotalByteSize = v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NumRows = v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*SortingColumn, 0, size)
+  p.SortingColumns =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem11 := &SortingColumn{}
+    if err := _elem11.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err)
+    }
+    p.SortingColumns = append(p.SortingColumns, _elem11)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *RowGroup)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.FileOffset = &v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.TotalCompressedSize = &v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI16(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.Ordinal = &v
+}
+  return nil
+}
+
+func (p *RowGroup) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "RowGroup"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *RowGroup) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "columns", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Columns)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.Columns {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) }
+  return err
+}
+
+func (p *RowGroup) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "total_byte_size", thrift.I64, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.TotalByteSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) }
+  return err
+}
+
+func (p *RowGroup) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) }
+  return err
+}
+
+func (p *RowGroup) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSortingColumns() {
+    if err := oprot.WriteFieldBegin(ctx, "sorting_columns", thrift.LIST, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.SortingColumns)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.SortingColumns {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFileOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:file_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.FileOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.file_offset (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:file_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTotalCompressedSize() {
+    if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_compressed_size: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.TotalCompressedSize)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_compressed_size: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetOrdinal() {
+    if err := oprot.WriteFieldBegin(ctx, "ordinal", thrift.I16, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:ordinal: ", p), err) }
+    if err := oprot.WriteI16(ctx, int16(*p.Ordinal)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.ordinal (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:ordinal: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) Equals(other *RowGroup) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.Columns) != len(other.Columns) { return false }
+  for i, _tgt := range p.Columns {
+    _src12 := other.Columns[i]
+    if !_tgt.Equals(_src12) { return false }
+  }
+  if p.TotalByteSize != other.TotalByteSize { return false }
+  if p.NumRows != other.NumRows { return false }
+  if len(p.SortingColumns) != len(other.SortingColumns) { return false }
+  for i, _tgt := range p.SortingColumns {
+    _src13 := other.SortingColumns[i]
+    if !_tgt.Equals(_src13) { return false }
+  }
+  if p.FileOffset != other.FileOffset {
+    if p.FileOffset == nil || other.FileOffset == nil {
+      return false
+    }
+    if (*p.FileOffset) != (*other.FileOffset) { return false }
+  }
+  if p.TotalCompressedSize != other.TotalCompressedSize {
+    if p.TotalCompressedSize == nil || other.TotalCompressedSize == nil {
+      return false
+    }
+    if (*p.TotalCompressedSize) != (*other.TotalCompressedSize) { return false }
+  }
+  if p.Ordinal != other.Ordinal {
+    if p.Ordinal == nil || other.Ordinal == nil {
+      return false
+    }
+    if (*p.Ordinal) != (*other.Ordinal) { return false }
+  }
+  return true
+}
+
+func (p *RowGroup) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("RowGroup(%+v)", *p)
+}
+
+// Empty struct to signal the order defined by the physical or logical type
+type TypeDefinedOrder struct {
+}
+
+func NewTypeDefinedOrder() *TypeDefinedOrder {
+  return &TypeDefinedOrder{}
+}
+
+func (p *TypeDefinedOrder) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *TypeDefinedOrder) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "TypeDefinedOrder"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TypeDefinedOrder) Equals(other *TypeDefinedOrder) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *TypeDefinedOrder) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TypeDefinedOrder(%+v)", *p)
+}
+
+// Union to specify the order used for the min_value and max_value fields for a
+// column. This union takes the role of an enhanced enum that allows rich
+// elements (which will be needed for a collation-based ordering in the future).
+// 
+// Possible values are:
+// * TypeDefinedOrder - the column uses the order defined by its logical or
+//                      physical type (if there is no logical type).
+// 
+// If the reader does not support the value of this union, min and max stats
+// for this column should be ignored.
+// 
+// Attributes:
+//  - TYPE_ORDER: The sort orders for logical types are:
+//   UTF8 - unsigned byte-wise comparison
+//   INT8 - signed comparison
+//   INT16 - signed comparison
+//   INT32 - signed comparison
+//   INT64 - signed comparison
+//   UINT8 - unsigned comparison
+//   UINT16 - unsigned comparison
+//   UINT32 - unsigned comparison
+//   UINT64 - unsigned comparison
+//   DECIMAL - signed comparison of the represented value
+//   DATE - signed comparison
+//   TIME_MILLIS - signed comparison
+//   TIME_MICROS - signed comparison
+//   TIMESTAMP_MILLIS - signed comparison
+//   TIMESTAMP_MICROS - signed comparison
+//   INTERVAL - unsigned comparison
+//   JSON - unsigned byte-wise comparison
+//   BSON - unsigned byte-wise comparison
+//   ENUM - unsigned byte-wise comparison
+//   LIST - undefined
+//   MAP - undefined
+// 
+// In the absence of logical types, the sort order is determined by the physical type:
+//   BOOLEAN - false, true
+//   INT32 - signed comparison
+//   INT64 - signed comparison
+//   INT96 (only used for legacy timestamps) - undefined
+//   FLOAT - signed comparison of the represented value (*)
+//   DOUBLE - signed comparison of the represented value (*)
+//   BYTE_ARRAY - unsigned byte-wise comparison
+//   FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison
+// 
+// (*) Because the sorting order is not specified properly for floating
+//     point values (relations vs. total ordering) the following
+//     compatibility rules should be applied when reading statistics:
+//     - If the min is a NaN, it should be ignored.
+//     - If the max is a NaN, it should be ignored.
+//     - If the min is +0, the row group may contain -0 values as well.
+//     - If the max is -0, the row group may contain +0 values as well.
+//     - When looking for NaN values, min and max should be ignored.
+type ColumnOrder struct {
+  TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"`
+}
+
+func NewColumnOrder() *ColumnOrder {
+  return &ColumnOrder{}
+}
+
+var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder
+func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder {
+  if !p.IsSetTYPE_ORDER() {
+    return ColumnOrder_TYPE_ORDER_DEFAULT
+  }
+return p.TYPE_ORDER
+}
+func (p *ColumnOrder) CountSetFieldsColumnOrder() int {
+  count := 0
+  if (p.IsSetTYPE_ORDER()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *ColumnOrder) IsSetTYPE_ORDER() bool {
+  return p.TYPE_ORDER != nil
+}
+
+func (p *ColumnOrder) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *ColumnOrder)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.TYPE_ORDER = &TypeDefinedOrder{}
+  if err := p.TYPE_ORDER.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err)
+  }
+  return nil
+}
+
+func (p *ColumnOrder) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsColumnOrder(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "ColumnOrder"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnOrder) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTYPE_ORDER() {
+    if err := oprot.WriteFieldBegin(ctx, "TYPE_ORDER", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) }
+    if err := p.TYPE_ORDER.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnOrder) Equals(other *ColumnOrder) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.TYPE_ORDER.Equals(other.TYPE_ORDER) { return false }
+  return true
+}
+
+func (p *ColumnOrder) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnOrder(%+v)", *p)
+}
+
+// Attributes:
+//  - Offset: Offset of the page in the file *
+//  - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header
+// length
+//  - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages
+// change on record boundaries (r = 0).
+type PageLocation struct {
+  Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"`
+  CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"`
+  FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"`
+}
+
+func NewPageLocation() *PageLocation {
+  return &PageLocation{}
+}
+
+
+func (p *PageLocation) GetOffset() int64 {
+  return p.Offset
+}
+
+func (p *PageLocation) GetCompressedPageSize() int32 {
+  return p.CompressedPageSize
+}
+
+func (p *PageLocation) GetFirstRowIndex() int64 {
+  return p.FirstRowIndex
+}
+func (p *PageLocation) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetOffset bool = false;
+  var issetCompressedPageSize bool = false;
+  var issetFirstRowIndex bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetOffset = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetCompressedPageSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetFirstRowIndex = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetOffset{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set"));
+  }
+  if !issetCompressedPageSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set"));
+  }
+  if !issetFirstRowIndex{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set"));
+  }
+  return nil
+}
+
+func (p *PageLocation)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Offset = v
+}
+  return nil
+}
+
+func (p *PageLocation)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.CompressedPageSize = v
+}
+  return nil
+}
+
+func (p *PageLocation)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.FirstRowIndex = v
+}
+  return nil
+}
+
+func (p *PageLocation) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "PageLocation"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *PageLocation) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "offset", thrift.I64, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.Offset)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) }
+  return err
+}
+
+func (p *PageLocation) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) }
+  return err
+}
+
+func (p *PageLocation) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "first_row_index", thrift.I64, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.FirstRowIndex)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) }
+  return err
+}
+
+func (p *PageLocation) Equals(other *PageLocation) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Offset != other.Offset { return false }
+  if p.CompressedPageSize != other.CompressedPageSize { return false }
+  if p.FirstRowIndex != other.FirstRowIndex { return false }
+  return true
+}
+
+func (p *PageLocation) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("PageLocation(%+v)", *p)
+}
+
+// Attributes:
+//  - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required
+// that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
+type OffsetIndex struct {
+  PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"`
+}
+
+func NewOffsetIndex() *OffsetIndex {
+  return &OffsetIndex{}
+}
+
+
+func (p *OffsetIndex) GetPageLocations() []*PageLocation {
+  return p.PageLocations
+}
+func (p *OffsetIndex) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetPageLocations bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetPageLocations = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetPageLocations{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set"));
+  }
+  return nil
+}
+
+func (p *OffsetIndex)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*PageLocation, 0, size)
+  p.PageLocations =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem14 := &PageLocation{}
+    if err := _elem14.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err)
+    }
+    p.PageLocations = append(p.PageLocations, _elem14)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *OffsetIndex) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "OffsetIndex"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *OffsetIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "page_locations", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.PageLocations)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.PageLocations {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) }
+  return err
+}
+
+func (p *OffsetIndex) Equals(other *OffsetIndex) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.PageLocations) != len(other.PageLocations) { return false }
+  for i, _tgt := range p.PageLocations {
+    _src15 := other.PageLocations[i]
+    if !_tgt.Equals(_src15) { return false }
+  }
+  return true
+}
+
+func (p *OffsetIndex) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("OffsetIndex(%+v)", *p)
+}
+
+// Description for ColumnIndex.
+// Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]
+// 
+// Attributes:
+//  - NullPages: A list of Boolean values to determine the validity of the corresponding
+// min and max values. If true, a page contains only null values, and writers
+// have to set the corresponding entries in min_values and max_values to
+// byte[0], so that all lists have the same length. If false, the
+// corresponding entries in min_values and max_values must be valid.
+//  - MinValues: Two lists containing lower and upper bounds for the values of each page.
+// These may be the actual minimum and maximum values found on a page, but
+// can also be (more compact) values that do not exist on a page. For
+// example, instead of storing ""Blart Versenwald III", a writer may set
+// min_values[i]="B", max_values[i]="C". Such more compact values must still
+// be valid values within the column's logical type. Readers must make sure
+// that list entries are populated before using them by inspecting null_pages.
+//  - MaxValues
+//  - BoundaryOrder: Stores whether both min_values and max_values are ordered and if so, in
+// which direction. This allows readers to perform binary searches in both
+// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
+// if the lists are ordered.
+//  - NullCounts: A list containing the number of null values for each page *
+type ColumnIndex struct {
+  NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"`
+  MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"`
+  MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"`
+  BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"`
+  NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"`
+}
+
+func NewColumnIndex() *ColumnIndex {
+  return &ColumnIndex{}
+}
+
+
+func (p *ColumnIndex) GetNullPages() []bool {
+  return p.NullPages
+}
+
+func (p *ColumnIndex) GetMinValues() [][]byte {
+  return p.MinValues
+}
+
+func (p *ColumnIndex) GetMaxValues() [][]byte {
+  return p.MaxValues
+}
+
+func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder {
+  return p.BoundaryOrder
+}
+var ColumnIndex_NullCounts_DEFAULT []int64
+
+func (p *ColumnIndex) GetNullCounts() []int64 {
+  return p.NullCounts
+}
+func (p *ColumnIndex) IsSetNullCounts() bool {
+  return p.NullCounts != nil
+}
+
+func (p *ColumnIndex) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNullPages bool = false;
+  var issetMinValues bool = false;
+  var issetMaxValues bool = false;
+  var issetBoundaryOrder bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNullPages = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetMinValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetMaxValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetBoundaryOrder = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNullPages{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set"));
+  }
+  if !issetMinValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set"));
+  }
+  if !issetMaxValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set"));
+  }
+  if !issetBoundaryOrder{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set"));
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]bool, 0, size)
+  p.NullPages =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem16 bool
+    if v, err := iprot.ReadBool(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem16 = v
+}
+    p.NullPages = append(p.NullPages, _elem16)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([][]byte, 0, size)
+  p.MinValues =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem17 []byte
+    if v, err := iprot.ReadBinary(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem17 = v
+}
+    p.MinValues = append(p.MinValues, _elem17)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([][]byte, 0, size)
+  p.MaxValues =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem18 []byte
+    if v, err := iprot.ReadBinary(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem18 = v
+}
+    p.MaxValues = append(p.MaxValues, _elem18)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := BoundaryOrder(v)
+  p.BoundaryOrder = temp
+}
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]int64, 0, size)
+  p.NullCounts =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem19 int64
+    if v, err := iprot.ReadI64(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem19 = v
+}
+    p.NullCounts = append(p.NullCounts, _elem19)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ColumnIndex"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "null_pages", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.BOOL, len(p.NullPages)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.NullPages {
+    if err := oprot.WriteBool(ctx, bool(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "min_values", thrift.LIST, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MinValues)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.MinValues {
+    if err := oprot.WriteBinary(ctx, v); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "max_values", thrift.LIST, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MaxValues)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.MaxValues {
+    if err := oprot.WriteBinary(ctx, v); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "boundary_order", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.BoundaryOrder)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNullCounts() {
+    if err := oprot.WriteFieldBegin(ctx, "null_counts", thrift.LIST, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.I64, len(p.NullCounts)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.NullCounts {
+      if err := oprot.WriteI64(ctx, int64(v)); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnIndex) Equals(other *ColumnIndex) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.NullPages) != len(other.NullPages) { return false }
+  for i, _tgt := range p.NullPages {
+    _src20 := other.NullPages[i]
+    if _tgt != _src20 { return false }
+  }
+  if len(p.MinValues) != len(other.MinValues) { return false }
+  for i, _tgt := range p.MinValues {
+    _src21 := other.MinValues[i]
+    if bytes.Compare(_tgt, _src21) != 0 { return false }
+  }
+  if len(p.MaxValues) != len(other.MaxValues) { return false }
+  for i, _tgt := range p.MaxValues {
+    _src22 := other.MaxValues[i]
+    if bytes.Compare(_tgt, _src22) != 0 { return false }
+  }
+  if p.BoundaryOrder != other.BoundaryOrder { return false }
+  if len(p.NullCounts) != len(other.NullCounts) { return false }
+  for i, _tgt := range p.NullCounts {
+    _src23 := other.NullCounts[i]
+    if _tgt != _src23 { return false }
+  }
+  return true
+}
+
+func (p *ColumnIndex) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnIndex(%+v)", *p)
+}
+
+// Attributes:
+//  - AadPrefix: AAD prefix *
+//  - AadFileUnique: Unique file identifier part of AAD suffix *
+//  - SupplyAadPrefix: In files encrypted with AAD prefix without storing it,
+// readers must supply the prefix *
+type AesGcmV1 struct {
+  AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"`
+  AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"`
+  SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"`
+}
+
+func NewAesGcmV1() *AesGcmV1 {
+  return &AesGcmV1{}
+}
+
+var AesGcmV1_AadPrefix_DEFAULT []byte
+
+func (p *AesGcmV1) GetAadPrefix() []byte {
+  return p.AadPrefix
+}
+var AesGcmV1_AadFileUnique_DEFAULT []byte
+
+func (p *AesGcmV1) GetAadFileUnique() []byte {
+  return p.AadFileUnique
+}
+var AesGcmV1_SupplyAadPrefix_DEFAULT bool
+func (p *AesGcmV1) GetSupplyAadPrefix() bool {
+  if !p.IsSetSupplyAadPrefix() {
+    return AesGcmV1_SupplyAadPrefix_DEFAULT
+  }
+return *p.SupplyAadPrefix
+}
+func (p *AesGcmV1) IsSetAadPrefix() bool {
+  return p.AadPrefix != nil
+}
+
+func (p *AesGcmV1) IsSetAadFileUnique() bool {
+  return p.AadFileUnique != nil
+}
+
+func (p *AesGcmV1) IsSetSupplyAadPrefix() bool {
+  return p.SupplyAadPrefix != nil
+}
+
+func (p *AesGcmV1) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *AesGcmV1)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.AadPrefix = v
+}
+  return nil
+}
+
+func (p *AesGcmV1)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.AadFileUnique = v
+}
+  return nil
+}
+
+func (p *AesGcmV1)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.SupplyAadPrefix = &v
+}
+  return nil
+}
+
+func (p *AesGcmV1) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "AesGcmV1"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *AesGcmV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadFileUnique() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSupplyAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmV1) Equals(other *AesGcmV1) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false }
+  if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false }
+  if p.SupplyAadPrefix != other.SupplyAadPrefix {
+    if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil {
+      return false
+    }
+    if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false }
+  }
+  return true
+}
+
+func (p *AesGcmV1) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("AesGcmV1(%+v)", *p)
+}
+
+// Attributes:
+//  - AadPrefix: AAD prefix *
+//  - AadFileUnique: Unique file identifier part of AAD suffix *
+//  - SupplyAadPrefix: In files encrypted with AAD prefix without storing it,
+// readers must supply the prefix *
+type AesGcmCtrV1 struct {
+  AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"`
+  AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"`
+  SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"`
+}
+
+func NewAesGcmCtrV1() *AesGcmCtrV1 {
+  return &AesGcmCtrV1{}
+}
+
+var AesGcmCtrV1_AadPrefix_DEFAULT []byte
+
+func (p *AesGcmCtrV1) GetAadPrefix() []byte {
+  return p.AadPrefix
+}
+var AesGcmCtrV1_AadFileUnique_DEFAULT []byte
+
+func (p *AesGcmCtrV1) GetAadFileUnique() []byte {
+  return p.AadFileUnique
+}
+var AesGcmCtrV1_SupplyAadPrefix_DEFAULT bool
+func (p *AesGcmCtrV1) GetSupplyAadPrefix() bool {
+  if !p.IsSetSupplyAadPrefix() {
+    return AesGcmCtrV1_SupplyAadPrefix_DEFAULT
+  }
+return *p.SupplyAadPrefix
+}
+func (p *AesGcmCtrV1) IsSetAadPrefix() bool {
+  return p.AadPrefix != nil
+}
+
+func (p *AesGcmCtrV1) IsSetAadFileUnique() bool {
+  return p.AadFileUnique != nil
+}
+
+func (p *AesGcmCtrV1) IsSetSupplyAadPrefix() bool {
+  return p.SupplyAadPrefix != nil
+}
+
+func (p *AesGcmCtrV1) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *AesGcmCtrV1)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.AadPrefix = v
+}
+  return nil
+}
+
+func (p *AesGcmCtrV1)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.AadFileUnique = v
+}
+  return nil
+}
+
+func (p *AesGcmCtrV1)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.SupplyAadPrefix = &v
+}
+  return nil
+}
+
+func (p *AesGcmCtrV1) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "AesGcmCtrV1"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *AesGcmCtrV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmCtrV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadFileUnique() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmCtrV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSupplyAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmCtrV1) Equals(other *AesGcmCtrV1) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false }
+  if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false }
+  if p.SupplyAadPrefix != other.SupplyAadPrefix {
+    if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil {
+      return false
+    }
+    if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false }
+  }
+  return true
+}
+
+func (p *AesGcmCtrV1) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("AesGcmCtrV1(%+v)", *p)
+}
+
+// Attributes:
+//  - AES_GCM_V1
+//  - AES_GCM_CTR_V1
+type EncryptionAlgorithm struct {
+  AES_GCM_V1 *AesGcmV1 `thrift:"AES_GCM_V1,1" db:"AES_GCM_V1" json:"AES_GCM_V1,omitempty"`
+  AES_GCM_CTR_V1 *AesGcmCtrV1 `thrift:"AES_GCM_CTR_V1,2" db:"AES_GCM_CTR_V1" json:"AES_GCM_CTR_V1,omitempty"`
+}
+
+func NewEncryptionAlgorithm() *EncryptionAlgorithm {
+  return &EncryptionAlgorithm{}
+}
+
+var EncryptionAlgorithm_AES_GCM_V1_DEFAULT *AesGcmV1
+func (p *EncryptionAlgorithm) GetAES_GCM_V1() *AesGcmV1 {
+  if !p.IsSetAES_GCM_V1() {
+    return EncryptionAlgorithm_AES_GCM_V1_DEFAULT
+  }
+return p.AES_GCM_V1
+}
+var EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT *AesGcmCtrV1
+func (p *EncryptionAlgorithm) GetAES_GCM_CTR_V1() *AesGcmCtrV1 {
+  if !p.IsSetAES_GCM_CTR_V1() {
+    return EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT
+  }
+return p.AES_GCM_CTR_V1
+}
+func (p *EncryptionAlgorithm) CountSetFieldsEncryptionAlgorithm() int {
+  count := 0
+  if (p.IsSetAES_GCM_V1()) {
+    count++
+  }
+  if (p.IsSetAES_GCM_CTR_V1()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *EncryptionAlgorithm) IsSetAES_GCM_V1() bool {
+  return p.AES_GCM_V1 != nil
+}
+
+func (p *EncryptionAlgorithm) IsSetAES_GCM_CTR_V1() bool {
+  return p.AES_GCM_CTR_V1 != nil
+}
+
+func (p *EncryptionAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *EncryptionAlgorithm)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.AES_GCM_V1 = &AesGcmV1{}
+  if err := p.AES_GCM_V1.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_V1), err)
+  }
+  return nil
+}
+
+func (p *EncryptionAlgorithm)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.AES_GCM_CTR_V1 = &AesGcmCtrV1{}
+  if err := p.AES_GCM_CTR_V1.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_CTR_V1), err)
+  }
+  return nil
+}
+
+func (p *EncryptionAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsEncryptionAlgorithm(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "EncryptionAlgorithm"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EncryptionAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAES_GCM_V1() {
+    if err := oprot.WriteFieldBegin(ctx, "AES_GCM_V1", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:AES_GCM_V1: ", p), err) }
+    if err := p.AES_GCM_V1.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_V1), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:AES_GCM_V1: ", p), err) }
+  }
+  return err
+}
+
+func (p *EncryptionAlgorithm) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAES_GCM_CTR_V1() {
+    if err := oprot.WriteFieldBegin(ctx, "AES_GCM_CTR_V1", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:AES_GCM_CTR_V1: ", p), err) }
+    if err := p.AES_GCM_CTR_V1.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_CTR_V1), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:AES_GCM_CTR_V1: ", p), err) }
+  }
+  return err
+}
+
+func (p *EncryptionAlgorithm) Equals(other *EncryptionAlgorithm) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.AES_GCM_V1.Equals(other.AES_GCM_V1) { return false }
+  if !p.AES_GCM_CTR_V1.Equals(other.AES_GCM_CTR_V1) { return false }
+  return true
+}
+
+func (p *EncryptionAlgorithm) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EncryptionAlgorithm(%+v)", *p)
+}
+
+// Description for file metadata
+// 
+// Attributes:
+//  - Version: Version of this file *
+//  - Schema: Parquet schema for this file.  This schema contains metadata for all the columns.
+// The schema is represented as a tree with a single root.  The nodes of the tree
+// are flattened to a list by doing a depth-first traversal.
+// The column metadata contains the path in the schema for that column which can be
+// used to map columns to nodes in the schema.
+// The first element is the root *
+//  - NumRows: Number of rows in this file *
+//  - RowGroups: Row groups in this file *
+//  - KeyValueMetadata: Optional key/value metadata *
+//  - CreatedBy: String for application that wrote this file.  This should be in the format
+// <Application> version <App Version> (build <App Build Hash>).
+// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
+// 
+//  - ColumnOrders: Sort order used for the min_value and max_value fields of each column in
+// this file. Sort orders are listed in the order matching the columns in the
+// schema. The indexes are not necessary the same though, because only leaf
+// nodes of the schema are represented in the list of sort orders.
+// 
+// Without column_orders, the meaning of the min_value and max_value fields is
+// undefined. To ensure well-defined behaviour, if min_value and max_value are
+// written to a Parquet file, column_orders must be written as well.
+// 
+// The obsolete min and max fields are always sorted by signed comparison
+// regardless of column_orders.
+//  - EncryptionAlgorithm: Encryption algorithm. This field is set only in encrypted files
+// with plaintext footer. Files with encrypted footer store algorithm id
+// in FileCryptoMetaData structure.
+//  - FooterSigningKeyMetadata: Retrieval metadata of key used for signing the footer.
+// Used only in encrypted files with plaintext footer.
+type FileMetaData struct {
+  Version int32 `thrift:"version,1,required" db:"version" json:"version"`
+  Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"`
+  NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
+  RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"`
+  KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
+  CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"`
+  ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"`
+  EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,8" db:"encryption_algorithm" json:"encryption_algorithm,omitempty"`
+  FooterSigningKeyMetadata []byte `thrift:"footer_signing_key_metadata,9" db:"footer_signing_key_metadata" json:"footer_signing_key_metadata,omitempty"`
+}
+
+func NewFileMetaData() *FileMetaData {
+  return &FileMetaData{}
+}
+
+
+func (p *FileMetaData) GetVersion() int32 {
+  return p.Version
+}
+
+func (p *FileMetaData) GetSchema() []*SchemaElement {
+  return p.Schema
+}
+
+func (p *FileMetaData) GetNumRows() int64 {
+  return p.NumRows
+}
+
+func (p *FileMetaData) GetRowGroups() []*RowGroup {
+  return p.RowGroups
+}
+var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue
+
+func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue {
+  return p.KeyValueMetadata
+}
+var FileMetaData_CreatedBy_DEFAULT string
+func (p *FileMetaData) GetCreatedBy() string {
+  if !p.IsSetCreatedBy() {
+    return FileMetaData_CreatedBy_DEFAULT
+  }
+return *p.CreatedBy
+}
+var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder
+
+func (p *FileMetaData) GetColumnOrders() []*ColumnOrder {
+  return p.ColumnOrders
+}
+var FileMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm
+func (p *FileMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm {
+  if !p.IsSetEncryptionAlgorithm() {
+    return FileMetaData_EncryptionAlgorithm_DEFAULT
+  }
+return p.EncryptionAlgorithm
+}
+var FileMetaData_FooterSigningKeyMetadata_DEFAULT []byte
+
+func (p *FileMetaData) GetFooterSigningKeyMetadata() []byte {
+  return p.FooterSigningKeyMetadata
+}
+func (p *FileMetaData) IsSetKeyValueMetadata() bool {
+  return p.KeyValueMetadata != nil
+}
+
+func (p *FileMetaData) IsSetCreatedBy() bool {
+  return p.CreatedBy != nil
+}
+
+func (p *FileMetaData) IsSetColumnOrders() bool {
+  return p.ColumnOrders != nil
+}
+
+func (p *FileMetaData) IsSetEncryptionAlgorithm() bool {
+  return p.EncryptionAlgorithm != nil
+}
+
+func (p *FileMetaData) IsSetFooterSigningKeyMetadata() bool {
+  return p.FooterSigningKeyMetadata != nil
+}
+
+func (p *FileMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetVersion bool = false;
+  var issetSchema bool = false;
+  var issetNumRows bool = false;
+  var issetRowGroups bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetVersion = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetSchema = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumRows = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetRowGroups = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetVersion{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set"));
+  }
+  if !issetSchema{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set"));
+  }
+  if !issetNumRows{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set"));
+  }
+  if !issetRowGroups{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set"));
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Version = v
+}
+  return nil
+}
+
+func (p *FileMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*SchemaElement, 0, size)
+  p.Schema =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem24 := &SchemaElement{}
+    if err := _elem24.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem24), err)
+    }
+    p.Schema = append(p.Schema, _elem24)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NumRows = v
+}
+  return nil
+}
+
+func (p *FileMetaData)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*RowGroup, 0, size)
+  p.RowGroups =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem25 := &RowGroup{}
+    if err := _elem25.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem25), err)
+    }
+    p.RowGroups = append(p.RowGroups, _elem25)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*KeyValue, 0, size)
+  p.KeyValueMetadata =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem26 := &KeyValue{}
+    if err := _elem26.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem26), err)
+    }
+    p.KeyValueMetadata = append(p.KeyValueMetadata, _elem26)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.CreatedBy = &v
+}
+  return nil
+}
+
+func (p *FileMetaData)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*ColumnOrder, 0, size)
+  p.ColumnOrders =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem27 := &ColumnOrder{}
+    if err := _elem27.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem27), err)
+    }
+    p.ColumnOrders = append(p.ColumnOrders, _elem27)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.EncryptionAlgorithm = &EncryptionAlgorithm{}
+  if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.FooterSigningKeyMetadata = v
+}
+  return nil
+}
+
+func (p *FileMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "FileMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *FileMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "version", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Version)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "schema", thrift.LIST, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Schema)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.Schema {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "row_groups", thrift.LIST, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.RowGroups)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.RowGroups {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyValueMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.KeyValueMetadata {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetCreatedBy() {
+    if err := oprot.WriteFieldBegin(ctx, "created_by", thrift.STRING, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) }
+    if err := oprot.WriteString(ctx, string(*p.CreatedBy)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetColumnOrders() {
+    if err := oprot.WriteFieldBegin(ctx, "column_orders", thrift.LIST, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.ColumnOrders)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.ColumnOrders {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetEncryptionAlgorithm() {
+    if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:encryption_algorithm: ", p), err) }
+    if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:encryption_algorithm: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFooterSigningKeyMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "footer_signing_key_metadata", thrift.STRING, 9); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:footer_signing_key_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.FooterSigningKeyMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.footer_signing_key_metadata (9) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 9:footer_signing_key_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) Equals(other *FileMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Version != other.Version { return false }
+  if len(p.Schema) != len(other.Schema) { return false }
+  for i, _tgt := range p.Schema {
+    _src28 := other.Schema[i]
+    if !_tgt.Equals(_src28) { return false }
+  }
+  if p.NumRows != other.NumRows { return false }
+  if len(p.RowGroups) != len(other.RowGroups) { return false }
+  for i, _tgt := range p.RowGroups {
+    _src29 := other.RowGroups[i]
+    if !_tgt.Equals(_src29) { return false }
+  }
+  if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false }
+  for i, _tgt := range p.KeyValueMetadata {
+    _src30 := other.KeyValueMetadata[i]
+    if !_tgt.Equals(_src30) { return false }
+  }
+  if p.CreatedBy != other.CreatedBy {
+    if p.CreatedBy == nil || other.CreatedBy == nil {
+      return false
+    }
+    if (*p.CreatedBy) != (*other.CreatedBy) { return false }
+  }
+  if len(p.ColumnOrders) != len(other.ColumnOrders) { return false }
+  for i, _tgt := range p.ColumnOrders {
+    _src31 := other.ColumnOrders[i]
+    if !_tgt.Equals(_src31) { return false }
+  }
+  if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false }
+  if bytes.Compare(p.FooterSigningKeyMetadata, other.FooterSigningKeyMetadata) != 0 { return false }
+  return true
+}
+
+func (p *FileMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("FileMetaData(%+v)", *p)
+}
+
+// Crypto metadata for files with encrypted footer *
+// 
+// Attributes:
+//  - EncryptionAlgorithm: Encryption algorithm. This field is only used for files
+// with encrypted footer. Files with plaintext footer store algorithm id
+// inside footer (FileMetaData structure).
+//  - KeyMetadata: Retrieval metadata of key used for encryption of footer,
+// and (possibly) columns *
+type FileCryptoMetaData struct {
+  EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,1,required" db:"encryption_algorithm" json:"encryption_algorithm"`
+  KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"`
+}
+
+func NewFileCryptoMetaData() *FileCryptoMetaData {
+  return &FileCryptoMetaData{}
+}
+
+var FileCryptoMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm
+func (p *FileCryptoMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm {
+  if !p.IsSetEncryptionAlgorithm() {
+    return FileCryptoMetaData_EncryptionAlgorithm_DEFAULT
+  }
+return p.EncryptionAlgorithm
+}
+var FileCryptoMetaData_KeyMetadata_DEFAULT []byte
+
+func (p *FileCryptoMetaData) GetKeyMetadata() []byte {
+  return p.KeyMetadata
+}
+func (p *FileCryptoMetaData) IsSetEncryptionAlgorithm() bool {
+  return p.EncryptionAlgorithm != nil
+}
+
+func (p *FileCryptoMetaData) IsSetKeyMetadata() bool {
+  return p.KeyMetadata != nil
+}
+
+func (p *FileCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetEncryptionAlgorithm bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncryptionAlgorithm = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetEncryptionAlgorithm{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field EncryptionAlgorithm is not set"));
+  }
+  return nil
+}
+
+func (p *FileCryptoMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.EncryptionAlgorithm = &EncryptionAlgorithm{}
+  if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err)
+  }
+  return nil
+}
+
+func (p *FileCryptoMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.KeyMetadata = v
+}
+  return nil
+}
+
+func (p *FileCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "FileCryptoMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *FileCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:encryption_algorithm: ", p), err) }
+  if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:encryption_algorithm: ", p), err) }
+  return err
+}
+
+func (p *FileCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileCryptoMetaData) Equals(other *FileCryptoMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false }
+  if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false }
+  return true
+}
+
+func (p *FileCryptoMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("FileCryptoMetaData(%+v)", *p)
+}
+
diff --git a/go/parquet/internal/gen-go/parquet/staticcheck.conf b/go/parquet/internal/gen-go/parquet/staticcheck.conf
new file mode 100644
index 00000000000..d714bfd89ba
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/staticcheck.conf
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+checks = ["all", "-ST1005", "-ST1000"]
diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go
new file mode 100644
index 00000000000..503c60044ab
--- /dev/null
+++ b/go/parquet/internal/testutils/utils.go
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testutils
+
+import (
+	"reflect"
+
+	"github.com/apache/arrow/go/parquet"
+)
+
+var typeToParquetTypeMap = map[reflect.Type]parquet.Type{
+	reflect.TypeOf(true):                        parquet.Types.Boolean,
+	reflect.TypeOf(int32(0)):                    parquet.Types.Int32,
+	reflect.TypeOf(int64(0)):                    parquet.Types.Int64,
+	reflect.TypeOf(float32(0)):                  parquet.Types.Float,
+	reflect.TypeOf(float64(0)):                  parquet.Types.Double,
+	reflect.TypeOf(parquet.ByteArray{}):         parquet.Types.ByteArray,
+	reflect.TypeOf(parquet.Int96{}):             parquet.Types.Int96,
+	reflect.TypeOf(parquet.FixedLenByteArray{}): parquet.Types.FixedLenByteArray,
+}
+
+func TypeToParquetType(typ reflect.Type) parquet.Type {
+	ret, ok := typeToParquetTypeMap[typ]
+	if !ok {
+		panic("invalid type for parquet type")
+	}
+	return ret
+}
diff --git a/go/parquet/internal/thrift/helpers.go b/go/parquet/internal/thrift/helpers.go
new file mode 100644
index 00000000000..e58df01bd6e
--- /dev/null
+++ b/go/parquet/internal/thrift/helpers.go
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package thrift is just some useful helpers for interacting with thrift to
+// make other code easier to read/write and centralize interactions.
+package thrift
+
+import (
+	"bytes"
+	"context"
+	"io"
+
+	"github.com/apache/arrow/go/parquet/internal/encryption"
+	"github.com/apache/thrift/lib/go/thrift"
+)
+
+// default factory for creating thrift protocols for serialization/deserialization
+var protocolFactory = thrift.NewTCompactProtocolFactoryConf(&thrift.TConfiguration{})
+
+// DeserializeThrift deserializes the bytes in buf into the given thrift msg type
+// returns the number of remaining bytes in the buffer that weren't needed for deserialization
+// and any error if there was one, or nil.
+func DeserializeThrift(msg thrift.TStruct, buf []byte) (remain uint64, err error) {
+	tbuf := &thrift.TMemoryBuffer{Buffer: bytes.NewBuffer(buf)}
+	err = msg.Read(context.TODO(), protocolFactory.GetProtocol(tbuf))
+	remain = tbuf.RemainingBytes()
+	return
+}
+
+// SerializeThriftStream writes out the serialized bytes of the passed in type
+// to the given writer stream.
+func SerializeThriftStream(msg thrift.TStruct, w io.Writer) error {
+	return msg.Write(context.TODO(), protocolFactory.GetProtocol(thrift.NewStreamTransportW(w)))
+}
+
+// DeserializeThriftStream populates the given msg by reading from the provided
+// stream until it completes the deserialization.
+func DeserializeThriftStream(msg thrift.TStruct, r io.Reader) error {
+	return msg.Read(context.TODO(), protocolFactory.GetProtocol(thrift.NewStreamTransportR(r)))
+}
+
+// Serializer is an object that can stick around to provide convenience
+// functions and allow object reuse
+type Serializer struct {
+	thrift.TSerializer
+}
+
+// NewThriftSerializer constructs a serializer with a default buffer of 1024
+func NewThriftSerializer() *Serializer {
+	tbuf := thrift.NewTMemoryBufferLen(1024)
+	return &Serializer{thrift.TSerializer{
+		Transport: tbuf,
+		Protocol:  protocolFactory.GetProtocol(tbuf),
+	}}
+}
+
+// Serialize will serialize the given msg to the writer stream w, optionally encrypting it on the way
+// if enc is not nil, returning the total number of bytes written and any error received, or nil
+func (t *Serializer) Serialize(msg thrift.TStruct, w io.Writer, enc encryption.Encryptor) (int, error) {
+	b, err := t.Write(context.Background(), msg)
+	if err != nil {
+		return 0, err
+	}
+
+	if enc == nil {
+		return w.Write(b)
+	}
+
+	var cipherBuf bytes.Buffer
+	cipherBuf.Grow(enc.CiphertextSizeDelta() + len(b))
+	enc.Encrypt(&cipherBuf, b)
+	n, err := cipherBuf.WriteTo(w)
+	return int(n), err
+}
diff --git a/go/parquet/internal/utils/Makefile b/go/parquet/internal/utils/Makefile
index 39057ae1f2e..41cc68df5bc 100644
--- a/go/parquet/internal/utils/Makefile
+++ b/go/parquet/internal/utils/Makefile
@@ -18,10 +18,10 @@
 PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/'
 
 C2GOASM=c2goasm
-CC=clang
+CC=clang-11
 C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \
 				-fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib
-ASM_FLAGS_AVX2=-mavx2 -mfma -mllvm -force-vector-width=32
+ASM_FLAGS_AVX2=-mavx2 -mfma
 ASM_FLAGS_SSE4=-msse4
 ASM_FLAGS_BMI2=-mbmi2
 ASM_FLAGS_POPCNT=-mpopcnt
@@ -66,3 +66,7 @@ unpack_bool_avx2.s: _lib/unpack_bool_avx2.s
 
 unpack_bool_sse4.s: _lib/unpack_bool_sse4.s
 	$(C2GOASM) -a -f $^ $@
+
+clean:
+	rm -f $(INTEL_SOURCES)
+	rm -f $(addprefix _lib/,$(INTEL_SOURCES))
diff --git a/go/parquet/internal/utils/_lib/bit_packing_avx2.s b/go/parquet/internal/utils/_lib/bit_packing_avx2.s
index 222bc3ce413..84a5cca2ea3 100644
--- a/go/parquet/internal/utils/_lib/bit_packing_avx2.s
+++ b/go/parquet/internal/utils/_lib/bit_packing_avx2.s
@@ -4007,6 +4007,6 @@ unpack32_avx2:                          # @unpack32_avx2
 .Lfunc_end0:
 	.size	unpack32_avx2, .Lfunc_end0-unpack32_avx2
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/min_max_avx2.s b/go/parquet/internal/utils/_lib/min_max_avx2.s
index dbf9a895ae3..ec24a731d69 100644
--- a/go/parquet/internal/utils/_lib/min_max_avx2.s
+++ b/go/parquet/internal/utils/_lib/min_max_avx2.s
@@ -15,173 +15,89 @@ int32_max_min_avx2:                     # @int32_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 64
+	and	rsp, -8
 	test	esi, esi
 	jle	.LBB0_1
 # %bb.2:
 	mov	r8d, esi
 	cmp	esi, 31
-	ja	.LBB0_6
+	ja	.LBB0_4
 # %bb.3:
-	mov	eax, -2147483648
-	mov	r9d, 2147483647
-	xor	r11d, r11d
-	jmp	.LBB0_4
+	mov	r10d, -2147483648
+	mov	eax, 2147483647
+	xor	r9d, r9d
+	jmp	.LBB0_7
 .LBB0_1:
-	mov	r9d, 2147483647
-	mov	eax, -2147483648
-	jmp	.LBB0_14
-.LBB0_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB0_8
-# %bb.7:
-	vpbroadcastd	ymm0, dword ptr [rip + .LCPI0_0] # ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
-	vpbroadcastd	ymm1, dword ptr [rip + .LCPI0_1] # ymm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
-	xor	eax, eax
-	vmovdqa	ymm2, ymm1
-	vmovdqa	ymm4, ymm1
-	vmovdqa	ymm6, ymm1
-	vmovdqa	ymm3, ymm0
-	vmovdqa	ymm5, ymm0
-	vmovdqa	ymm7, ymm0
-	jmp	.LBB0_10
-.LBB0_8:
-	and	r10, -4
-	vpbroadcastd	ymm0, dword ptr [rip + .LCPI0_0] # ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
-	neg	r10
-	vpbroadcastd	ymm1, dword ptr [rip + .LCPI0_1] # ymm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
+	mov	eax, 2147483647
+	mov	esi, -2147483648
+	jmp	.LBB0_8
+.LBB0_4:
+	mov	r9d, r8d
+	vpbroadcastd	ymm4, dword ptr [rip + .LCPI0_0] # ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
+	and	r9d, -32
+	vpbroadcastd	ymm0, dword ptr [rip + .LCPI0_1] # ymm0 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
 	xor	eax, eax
-	vmovdqa	ymm2, ymm1
-	vmovdqa	ymm4, ymm1
-	vmovdqa	ymm6, ymm1
+	vmovdqa	ymm1, ymm0
+	vmovdqa	ymm2, ymm0
 	vmovdqa	ymm3, ymm0
-	vmovdqa	ymm5, ymm0
-	vmovdqa	ymm7, ymm0
+	vmovdqa	ymm5, ymm4
+	vmovdqa	ymm6, ymm4
+	vmovdqa	ymm7, ymm4
 	.p2align	4, 0x90
-.LBB0_9:                                # =>This Inner Loop Header: Depth=1
+.LBB0_5:                                # =>This Inner Loop Header: Depth=1
 	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax]
 	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 32]
 	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 64]
 	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 96]
-	vpminsd	ymm6, ymm6, ymm11
-	vpminsd	ymm4, ymm4, ymm10
-	vpminsd	ymm1, ymm1, ymm8
-	vpminsd	ymm2, ymm2, ymm9
+	vpminsd	ymm0, ymm0, ymm8
+	vpminsd	ymm1, ymm1, ymm9
+	vpminsd	ymm2, ymm2, ymm10
+	vpminsd	ymm3, ymm3, ymm11
+	vpmaxsd	ymm4, ymm4, ymm8
+	vpmaxsd	ymm5, ymm5, ymm9
+	vpmaxsd	ymm6, ymm6, ymm10
 	vpmaxsd	ymm7, ymm7, ymm11
-	vpmaxsd	ymm5, ymm5, ymm10
-	vpmaxsd	ymm0, ymm0, ymm8
-	vpmaxsd	ymm3, ymm3, ymm9
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 224]
-	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 192]
-	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 128]
-	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 160]
-	vmovdqu	ymm12, ymmword ptr [rdi + 4*rax + 256]
-	vmovdqu	ymm13, ymmword ptr [rdi + 4*rax + 320]
-	vmovdqu	ymm14, ymmword ptr [rdi + 4*rax + 352]
-	vpminsd	ymm15, ymm8, ymm14
-	vpminsd	ymm6, ymm6, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm6         # 32-byte Spill
-	vpminsd	ymm15, ymm9, ymm13
-	vpminsd	ymm4, ymm4, ymm15
-	vpminsd	ymm15, ymm10, ymm12
-	vpminsd	ymm1, ymm1, ymm15
-	vmovdqu	ymm15, ymmword ptr [rdi + 4*rax + 288]
-	vpminsd	ymm6, ymm11, ymm15
-	vpminsd	ymm2, ymm2, ymm6
-	vpmaxsd	ymm6, ymm8, ymm14
-	vpmaxsd	ymm7, ymm7, ymm6
-	vpmaxsd	ymm6, ymm9, ymm13
-	vpmaxsd	ymm5, ymm5, ymm6
-	vpmaxsd	ymm6, ymm10, ymm12
-	vpmaxsd	ymm0, ymm0, ymm6
-	vpmaxsd	ymm6, ymm11, ymm15
-	vpmaxsd	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 416]
-	vpminsd	ymm2, ymm2, ymm6
-	vpmaxsd	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 384]
-	vpminsd	ymm1, ymm1, ymm6
-	vpmaxsd	ymm0, ymm0, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 448]
-	vpminsd	ymm4, ymm4, ymm6
-	vpmaxsd	ymm5, ymm5, ymm6
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 480]
-	vpminsd	ymm6, ymm8, ymmword ptr [rsp]   # 32-byte Folded Reload
-	vpmaxsd	ymm7, ymm7, ymm8
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB0_9
-.LBB0_10:
-	test	r9, r9
-	je	.LBB0_13
-# %bb.11:
-	lea	rax, [rdi + 4*rax]
-	neg	r9
-	.p2align	4, 0x90
-.LBB0_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm8, ymmword ptr [rax]
-	vmovdqu	ymm9, ymmword ptr [rax + 32]
-	vmovdqu	ymm10, ymmword ptr [rax + 64]
-	vmovdqu	ymm11, ymmword ptr [rax + 96]
-	vpminsd	ymm2, ymm2, ymm9
-	vpminsd	ymm1, ymm1, ymm8
-	vpminsd	ymm4, ymm4, ymm10
-	vpminsd	ymm6, ymm6, ymm11
-	vpmaxsd	ymm3, ymm3, ymm9
-	vpmaxsd	ymm0, ymm0, ymm8
-	vpmaxsd	ymm5, ymm5, ymm10
-	vpmaxsd	ymm7, ymm7, ymm11
-	sub	rax, -128
-	inc	r9
-	jne	.LBB0_12
-.LBB0_13:
-	vpminsd	ymm2, ymm2, ymm6
-	vpminsd	ymm1, ymm1, ymm4
-	vpminsd	ymm1, ymm1, ymm2
-	vpmaxsd	ymm2, ymm3, ymm7
-	vpmaxsd	ymm0, ymm0, ymm5
-	vpmaxsd	ymm0, ymm0, ymm2
-	vextracti128	xmm2, ymm0, 1
-	vpmaxsd	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 78                  # xmm2 = xmm0[2,3,0,1]
-	vpmaxsd	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 229                 # xmm2 = xmm0[1,1,2,3]
-	vpmaxsd	xmm0, xmm0, xmm2
-	vmovd	eax, xmm0
-	vextracti128	xmm0, ymm1, 1
-	vpminsd	xmm0, xmm1, xmm0
+	add	rax, 32
+	cmp	r9, rax
+	jne	.LBB0_5
+# %bb.6:
+	vpmaxsd	ymm4, ymm4, ymm5
+	vpmaxsd	ymm4, ymm4, ymm6
+	vpmaxsd	ymm4, ymm4, ymm7
+	vextracti128	xmm5, ymm4, 1
+	vpmaxsd	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 78                  # xmm5 = xmm4[2,3,0,1]
+	vpmaxsd	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 229                 # xmm5 = xmm4[1,1,2,3]
+	vpmaxsd	xmm4, xmm4, xmm5
+	vmovd	r10d, xmm4
+	vpminsd	ymm0, ymm0, ymm1
+	vpminsd	ymm0, ymm0, ymm2
+	vpminsd	ymm0, ymm0, ymm3
+	vextracti128	xmm1, ymm0, 1
+	vpminsd	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
 	vpminsd	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 229                 # xmm1 = xmm0[1,1,2,3]
 	vpminsd	xmm0, xmm0, xmm1
-	vmovd	r9d, xmm0
-	cmp	r11, r8
-	je	.LBB0_14
-.LBB0_4:
-	mov	esi, eax
+	vmovd	eax, xmm0
+	mov	esi, r10d
+	cmp	r9, r8
+	je	.LBB0_8
 	.p2align	4, 0x90
-.LBB0_5:                                # =>This Inner Loop Header: Depth=1
-	mov	eax, dword ptr [rdi + 4*r11]
-	cmp	r9d, eax
-	cmovg	r9d, eax
-	cmp	esi, eax
-	cmovge	eax, esi
-	add	r11, 1
-	mov	esi, eax
-	cmp	r8, r11
-	jne	.LBB0_5
-.LBB0_14:
-	mov	dword ptr [rcx], eax
-	mov	dword ptr [rdx], r9d
+.LBB0_7:                                # =>This Inner Loop Header: Depth=1
+	mov	esi, dword ptr [rdi + 4*r9]
+	cmp	eax, esi
+	cmovg	eax, esi
+	cmp	r10d, esi
+	cmovge	esi, r10d
+	add	r9, 1
+	mov	r10d, esi
+	cmp	r8, r9
+	jne	.LBB0_7
+.LBB0_8:
+	mov	dword ptr [rcx], esi
+	mov	dword ptr [rdx], eax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -196,173 +112,89 @@ uint32_max_min_avx2:                    # @uint32_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 64
+	and	rsp, -8
 	test	esi, esi
 	jle	.LBB1_1
 # %bb.2:
 	mov	r8d, esi
 	cmp	esi, 31
-	ja	.LBB1_6
+	ja	.LBB1_4
 # %bb.3:
-	xor	r11d, r11d
-	mov	r9d, -1
-	xor	esi, esi
-	jmp	.LBB1_4
+	xor	r9d, r9d
+	mov	eax, -1
+	xor	r10d, r10d
+	jmp	.LBB1_7
 .LBB1_1:
-	mov	r9d, -1
+	mov	eax, -1
 	xor	esi, esi
-	jmp	.LBB1_14
-.LBB1_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB1_8
-# %bb.7:
-	vpxor	xmm0, xmm0, xmm0
-	vpcmpeqd	ymm1, ymm1, ymm1
+	jmp	.LBB1_8
+.LBB1_4:
+	mov	r9d, r8d
+	and	r9d, -32
+	vpxor	xmm4, xmm4, xmm4
+	vpcmpeqd	ymm0, ymm0, ymm0
 	xor	eax, eax
-	vpcmpeqd	ymm2, ymm2, ymm2
-	vpcmpeqd	ymm4, ymm4, ymm4
-	vpcmpeqd	ymm6, ymm6, ymm6
-	vpxor	xmm3, xmm3, xmm3
-	vpxor	xmm5, xmm5, xmm5
-	vpxor	xmm7, xmm7, xmm7
-	jmp	.LBB1_10
-.LBB1_8:
-	and	r10, -4
-	neg	r10
-	vpxor	xmm0, xmm0, xmm0
 	vpcmpeqd	ymm1, ymm1, ymm1
-	xor	eax, eax
 	vpcmpeqd	ymm2, ymm2, ymm2
-	vpcmpeqd	ymm4, ymm4, ymm4
-	vpcmpeqd	ymm6, ymm6, ymm6
-	vpxor	xmm3, xmm3, xmm3
+	vpcmpeqd	ymm3, ymm3, ymm3
 	vpxor	xmm5, xmm5, xmm5
+	vpxor	xmm6, xmm6, xmm6
 	vpxor	xmm7, xmm7, xmm7
 	.p2align	4, 0x90
-.LBB1_9:                                # =>This Inner Loop Header: Depth=1
+.LBB1_5:                                # =>This Inner Loop Header: Depth=1
 	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax]
 	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 32]
 	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 64]
 	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 96]
-	vpminud	ymm6, ymm6, ymm11
-	vpminud	ymm4, ymm4, ymm10
-	vpminud	ymm1, ymm1, ymm8
-	vpminud	ymm2, ymm2, ymm9
+	vpminud	ymm0, ymm0, ymm8
+	vpminud	ymm1, ymm1, ymm9
+	vpminud	ymm2, ymm2, ymm10
+	vpminud	ymm3, ymm3, ymm11
+	vpmaxud	ymm4, ymm4, ymm8
+	vpmaxud	ymm5, ymm5, ymm9
+	vpmaxud	ymm6, ymm6, ymm10
 	vpmaxud	ymm7, ymm7, ymm11
-	vpmaxud	ymm5, ymm5, ymm10
-	vpmaxud	ymm0, ymm0, ymm8
-	vpmaxud	ymm3, ymm3, ymm9
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 224]
-	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 192]
-	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 128]
-	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 160]
-	vmovdqu	ymm12, ymmword ptr [rdi + 4*rax + 256]
-	vmovdqu	ymm13, ymmword ptr [rdi + 4*rax + 320]
-	vmovdqu	ymm14, ymmword ptr [rdi + 4*rax + 352]
-	vpminud	ymm15, ymm8, ymm14
-	vpminud	ymm6, ymm6, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm6         # 32-byte Spill
-	vpminud	ymm15, ymm9, ymm13
-	vpminud	ymm4, ymm4, ymm15
-	vpminud	ymm15, ymm10, ymm12
-	vpminud	ymm1, ymm1, ymm15
-	vmovdqu	ymm15, ymmword ptr [rdi + 4*rax + 288]
-	vpminud	ymm6, ymm11, ymm15
-	vpminud	ymm2, ymm2, ymm6
-	vpmaxud	ymm6, ymm8, ymm14
-	vpmaxud	ymm7, ymm7, ymm6
-	vpmaxud	ymm6, ymm9, ymm13
-	vpmaxud	ymm5, ymm5, ymm6
-	vpmaxud	ymm6, ymm10, ymm12
-	vpmaxud	ymm0, ymm0, ymm6
-	vpmaxud	ymm6, ymm11, ymm15
-	vpmaxud	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 416]
-	vpminud	ymm2, ymm2, ymm6
-	vpmaxud	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 384]
-	vpminud	ymm1, ymm1, ymm6
-	vpmaxud	ymm0, ymm0, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 448]
-	vpminud	ymm4, ymm4, ymm6
-	vpmaxud	ymm5, ymm5, ymm6
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 480]
-	vpminud	ymm6, ymm8, ymmword ptr [rsp]   # 32-byte Folded Reload
-	vpmaxud	ymm7, ymm7, ymm8
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB1_9
-.LBB1_10:
-	test	r9, r9
-	je	.LBB1_13
-# %bb.11:
-	lea	rax, [rdi + 4*rax]
-	neg	r9
-	.p2align	4, 0x90
-.LBB1_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm8, ymmword ptr [rax]
-	vmovdqu	ymm9, ymmword ptr [rax + 32]
-	vmovdqu	ymm10, ymmword ptr [rax + 64]
-	vmovdqu	ymm11, ymmword ptr [rax + 96]
-	vpminud	ymm2, ymm2, ymm9
-	vpminud	ymm1, ymm1, ymm8
-	vpminud	ymm4, ymm4, ymm10
-	vpminud	ymm6, ymm6, ymm11
-	vpmaxud	ymm3, ymm3, ymm9
-	vpmaxud	ymm0, ymm0, ymm8
-	vpmaxud	ymm5, ymm5, ymm10
-	vpmaxud	ymm7, ymm7, ymm11
-	sub	rax, -128
-	inc	r9
-	jne	.LBB1_12
-.LBB1_13:
-	vpminud	ymm2, ymm2, ymm6
-	vpminud	ymm1, ymm1, ymm4
-	vpminud	ymm1, ymm1, ymm2
-	vpmaxud	ymm2, ymm3, ymm7
-	vpmaxud	ymm0, ymm0, ymm5
-	vpmaxud	ymm0, ymm0, ymm2
-	vextracti128	xmm2, ymm0, 1
-	vpmaxud	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 78                  # xmm2 = xmm0[2,3,0,1]
-	vpmaxud	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 229                 # xmm2 = xmm0[1,1,2,3]
-	vpmaxud	xmm0, xmm0, xmm2
-	vmovd	esi, xmm0
-	vextracti128	xmm0, ymm1, 1
-	vpminud	xmm0, xmm1, xmm0
+	add	rax, 32
+	cmp	r9, rax
+	jne	.LBB1_5
+# %bb.6:
+	vpmaxud	ymm4, ymm4, ymm5
+	vpmaxud	ymm4, ymm4, ymm6
+	vpmaxud	ymm4, ymm4, ymm7
+	vextracti128	xmm5, ymm4, 1
+	vpmaxud	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 78                  # xmm5 = xmm4[2,3,0,1]
+	vpmaxud	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 229                 # xmm5 = xmm4[1,1,2,3]
+	vpmaxud	xmm4, xmm4, xmm5
+	vmovd	r10d, xmm4
+	vpminud	ymm0, ymm0, ymm1
+	vpminud	ymm0, ymm0, ymm2
+	vpminud	ymm0, ymm0, ymm3
+	vextracti128	xmm1, ymm0, 1
+	vpminud	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
 	vpminud	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 229                 # xmm1 = xmm0[1,1,2,3]
 	vpminud	xmm0, xmm0, xmm1
-	vmovd	r9d, xmm0
-	cmp	r11, r8
-	je	.LBB1_14
-.LBB1_4:
-	mov	eax, esi
+	vmovd	eax, xmm0
+	mov	esi, r10d
+	cmp	r9, r8
+	je	.LBB1_8
 	.p2align	4, 0x90
-.LBB1_5:                                # =>This Inner Loop Header: Depth=1
-	mov	esi, dword ptr [rdi + 4*r11]
-	cmp	r9d, esi
-	cmovae	r9d, esi
+.LBB1_7:                                # =>This Inner Loop Header: Depth=1
+	mov	esi, dword ptr [rdi + 4*r9]
 	cmp	eax, esi
-	cmova	esi, eax
-	add	r11, 1
-	mov	eax, esi
-	cmp	r8, r11
-	jne	.LBB1_5
-.LBB1_14:
+	cmovae	eax, esi
+	cmp	r10d, esi
+	cmova	esi, r10d
+	add	r9, 1
+	mov	r10d, esi
+	cmp	r8, r9
+	jne	.LBB1_7
+.LBB1_8:
 	mov	dword ptr [rcx], esi
-	mov	dword ptr [rdx], r9d
+	mov	dword ptr [rdx], eax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -384,387 +216,102 @@ int64_max_min_avx2:                     # @int64_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 224
-	movabs	r9, 9223372036854775807
+	and	rsp, -8
+	movabs	rax, 9223372036854775807
 	test	esi, esi
 	jle	.LBB2_1
 # %bb.2:
 	mov	r8d, esi
-	cmp	esi, 31
-	ja	.LBB2_6
+	cmp	esi, 15
+	ja	.LBB2_4
 # %bb.3:
-	lea	rsi, [r9 + 1]
-	xor	r11d, r11d
-	jmp	.LBB2_4
+	lea	r10, [rax + 1]
+	xor	r9d, r9d
+	jmp	.LBB2_7
 .LBB2_1:
-	lea	rsi, [r9 + 1]
-	jmp	.LBB2_14
-.LBB2_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB2_8
-# %bb.7:
-	vpbroadcastq	ymm15, qword ptr [rip + .LCPI2_0] # ymm15 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	vpbroadcastq	ymm11, qword ptr [rip + .LCPI2_1] # ymm11 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
-	xor	eax, eax
-	vmovdqa	ymmword ptr [rsp + 32], ymm11   # 32-byte Spill
-	vmovdqa	ymm3, ymm11
-	vmovdqa	ymm9, ymm11
-	vmovdqa	ymm5, ymm11
-	vmovdqa	ymm4, ymm11
-	vmovdqa	ymm6, ymm11
-	vmovdqa	ymmword ptr [rsp + 96], ymm11   # 32-byte Spill
-	vmovdqa	ymmword ptr [rsp + 64], ymm15   # 32-byte Spill
-	vmovdqa	ymm2, ymm15
-	vmovdqa	ymm8, ymm15
-	vmovdqa	ymm12, ymm15
-	vmovdqa	ymm13, ymm15
-	vmovdqa	ymm14, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm15        # 32-byte Spill
-	jmp	.LBB2_10
-.LBB2_8:
-	and	r10, -4
-	vpbroadcastq	ymm15, qword ptr [rip + .LCPI2_0] # ymm15 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	neg	r10
-	vpbroadcastq	ymm11, qword ptr [rip + .LCPI2_1] # ymm11 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+	lea	rsi, [rax + 1]
+	jmp	.LBB2_8
+.LBB2_4:
+	mov	r9d, r8d
+	vpbroadcastq	ymm4, qword ptr [rip + .LCPI2_0] # ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+	and	r9d, -16
+	vpbroadcastq	ymm0, qword ptr [rip + .LCPI2_1] # ymm0 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
 	xor	eax, eax
-	vmovdqa	ymmword ptr [rsp + 32], ymm11   # 32-byte Spill
-	vmovdqa	ymm3, ymm11
-	vmovdqa	ymm9, ymm11
-	vmovdqa	ymm5, ymm11
-	vmovdqa	ymm4, ymm11
-	vmovdqa	ymm6, ymm11
-	vmovdqa	ymmword ptr [rsp + 96], ymm11   # 32-byte Spill
-	vmovdqa	ymmword ptr [rsp + 64], ymm15   # 32-byte Spill
-	vmovdqa	ymm2, ymm15
-	vmovdqa	ymm8, ymm15
-	vmovdqa	ymm12, ymm15
-	vmovdqa	ymm13, ymm15
-	vmovdqa	ymm14, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm15        # 32-byte Spill
-	.p2align	4, 0x90
-.LBB2_9:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 224]
-	vmovdqa	ymm10, ymm8
-	vmovdqa	ymm8, ymm2
-	vmovdqa	ymm2, ymm3
-	vmovdqa	ymm3, ymm9
-	vpcmpgtq	ymm9, ymm0, ymm11
-	vblendvpd	ymm1, ymm0, ymm11, ymm9
-	vmovapd	ymmword ptr [rsp + 160], ymm1   # 32-byte Spill
-	vpcmpgtq	ymm9, ymm15, ymm0
-	vblendvpd	ymm0, ymm0, ymm15, ymm9
-	vmovapd	ymmword ptr [rsp + 128], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 192]
-	vpcmpgtq	ymm9, ymm0, ymm6
-	vblendvpd	ymm7, ymm0, ymm6, ymm9
-	vpcmpgtq	ymm9, ymm14, ymm0
-	vblendvpd	ymm14, ymm0, ymm14, ymm9
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 160]
-	vpcmpgtq	ymm9, ymm0, ymm4
-	vblendvpd	ymm6, ymm0, ymm4, ymm9
-	vpcmpgtq	ymm9, ymm13, ymm0
-	vblendvpd	ymm13, ymm0, ymm13, ymm9
-	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 128]
-	vpcmpgtq	ymm0, ymm9, ymm5
-	vblendvpd	ymm1, ymm9, ymm5, ymm0
-	vpcmpgtq	ymm5, ymm12, ymm9
-	vblendvpd	ymm12, ymm9, ymm12, ymm5
-	vmovdqu	ymm5, ymmword ptr [rdi + 8*rax + 96]
-	vpcmpgtq	ymm9, ymm5, ymm3
-	vblendvpd	ymm9, ymm5, ymm3, ymm9
-	vpcmpgtq	ymm4, ymm10, ymm5
-	vblendvpd	ymm10, ymm5, ymm10, ymm4
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 64]
-	vpcmpgtq	ymm5, ymm4, ymm2
-	vblendvpd	ymm5, ymm4, ymm2, ymm5
-	vpcmpgtq	ymm3, ymm8, ymm4
-	vblendvpd	ymm0, ymm4, ymm8, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax]
-	vmovdqa	ymm4, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpcmpgtq	ymm3, ymm2, ymm4
-	vblendvpd	ymm3, ymm2, ymm4, ymm3
-	vmovdqa	ymm11, ymmword ptr [rsp]        # 32-byte Reload
-	vpcmpgtq	ymm4, ymm11, ymm2
-	vblendvpd	ymm4, ymm2, ymm11, ymm4
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 32]
-	vmovdqa	ymm15, ymmword ptr [rsp + 32]   # 32-byte Reload
-	vpcmpgtq	ymm11, ymm2, ymm15
-	vblendvpd	ymm11, ymm2, ymm15, ymm11
-	vmovdqa	ymm8, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpcmpgtq	ymm15, ymm8, ymm2
-	vblendvpd	ymm2, ymm2, ymm8, ymm15
-	vmovdqu	ymm8, ymmword ptr [rdi + 8*rax + 288]
-	vpcmpgtq	ymm15, ymm8, ymm11
-	vblendvpd	ymm11, ymm8, ymm11, ymm15
-	vmovapd	ymmword ptr [rsp + 32], ymm11   # 32-byte Spill
-	vpcmpgtq	ymm11, ymm2, ymm8
-	vblendvpd	ymm2, ymm8, ymm2, ymm11
-	vmovapd	ymmword ptr [rsp], ymm2         # 32-byte Spill
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 256]
-	vpcmpgtq	ymm2, ymm11, ymm3
-	vblendvpd	ymm8, ymm11, ymm3, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm11
-	vblendvpd	ymm3, ymm11, ymm4, ymm3
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 320]
-	vpcmpgtq	ymm4, ymm11, ymm5
-	vblendvpd	ymm4, ymm11, ymm5, ymm4
-	vpcmpgtq	ymm5, ymm0, ymm11
-	vblendvpd	ymm5, ymm11, ymm0, ymm5
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 352]
-	vpcmpgtq	ymm11, ymm0, ymm9
-	vblendvpd	ymm9, ymm0, ymm9, ymm11
-	vpcmpgtq	ymm11, ymm10, ymm0
-	vblendvpd	ymm10, ymm0, ymm10, ymm11
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 384]
-	vpcmpgtq	ymm0, ymm11, ymm1
-	vblendvpd	ymm2, ymm11, ymm1, ymm0
-	vpcmpgtq	ymm1, ymm12, ymm11
-	vblendvpd	ymm12, ymm11, ymm12, ymm1
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 416]
-	vpcmpgtq	ymm11, ymm1, ymm6
-	vblendvpd	ymm6, ymm1, ymm6, ymm11
-	vpcmpgtq	ymm11, ymm13, ymm1
-	vblendvpd	ymm1, ymm1, ymm13, ymm11
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 448]
-	vpcmpgtq	ymm13, ymm11, ymm7
-	vblendvpd	ymm7, ymm11, ymm7, ymm13
-	vpcmpgtq	ymm13, ymm14, ymm11
-	vblendvpd	ymm13, ymm11, ymm14, ymm13
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 480]
-	vmovdqa	ymm0, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpcmpgtq	ymm14, ymm11, ymm0
-	vblendvpd	ymm14, ymm11, ymm0, ymm14
-	vmovdqa	ymm0, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpcmpgtq	ymm15, ymm0, ymm11
-	vblendvpd	ymm15, ymm11, ymm0, ymm15
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 736]
-	vpcmpgtq	ymm11, ymm0, ymm14
-	vblendvpd	ymm11, ymm0, ymm14, ymm11
-	vmovapd	ymmword ptr [rsp + 160], ymm11  # 32-byte Spill
-	vpcmpgtq	ymm14, ymm15, ymm0
-	vblendvpd	ymm0, ymm0, ymm15, ymm14
-	vmovapd	ymmword ptr [rsp + 128], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 704]
-	vpcmpgtq	ymm14, ymm0, ymm7
-	vblendvpd	ymm7, ymm0, ymm7, ymm14
-	vpcmpgtq	ymm14, ymm13, ymm0
-	vblendvpd	ymm14, ymm0, ymm13, ymm14
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 672]
-	vpcmpgtq	ymm13, ymm0, ymm6
-	vblendvpd	ymm6, ymm0, ymm6, ymm13
-	vpcmpgtq	ymm13, ymm1, ymm0
-	vblendvpd	ymm13, ymm0, ymm1, ymm13
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 640]
-	vpcmpgtq	ymm0, ymm1, ymm2
-	vblendvpd	ymm0, ymm1, ymm2, ymm0
-	vpcmpgtq	ymm2, ymm12, ymm1
-	vblendvpd	ymm12, ymm1, ymm12, ymm2
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 608]
-	vpcmpgtq	ymm2, ymm1, ymm9
-	vblendvpd	ymm9, ymm1, ymm9, ymm2
-	vpcmpgtq	ymm2, ymm10, ymm1
-	vblendvpd	ymm10, ymm1, ymm10, ymm2
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 576]
-	vpcmpgtq	ymm2, ymm1, ymm4
-	vblendvpd	ymm2, ymm1, ymm4, ymm2
-	vpcmpgtq	ymm4, ymm5, ymm1
-	vblendvpd	ymm1, ymm1, ymm5, ymm4
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 512]
-	vpcmpgtq	ymm5, ymm4, ymm8
-	vblendvpd	ymm5, ymm4, ymm8, ymm5
-	vpcmpgtq	ymm8, ymm3, ymm4
-	vblendvpd	ymm3, ymm4, ymm3, ymm8
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 544]
-	vmovdqa	ymm11, ymmword ptr [rsp + 32]   # 32-byte Reload
-	vpcmpgtq	ymm8, ymm4, ymm11
-	vblendvpd	ymm8, ymm4, ymm11, ymm8
-	vmovdqa	ymm15, ymmword ptr [rsp]        # 32-byte Reload
-	vpcmpgtq	ymm11, ymm15, ymm4
-	vblendvpd	ymm4, ymm4, ymm15, ymm11
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 800]
-	vpcmpgtq	ymm15, ymm11, ymm8
-	vblendvpd	ymm8, ymm11, ymm8, ymm15
-	vmovapd	ymmword ptr [rsp + 32], ymm8    # 32-byte Spill
-	vpcmpgtq	ymm8, ymm4, ymm11
-	vblendvpd	ymm4, ymm11, ymm4, ymm8
-	vmovapd	ymmword ptr [rsp + 64], ymm4    # 32-byte Spill
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 768]
-	vpcmpgtq	ymm11, ymm4, ymm5
-	vblendvpd	ymm5, ymm4, ymm5, ymm11
-	vmovapd	ymmword ptr [rsp + 96], ymm5    # 32-byte Spill
-	vpcmpgtq	ymm5, ymm3, ymm4
-	vblendvpd	ymm3, ymm4, ymm3, ymm5
-	vmovapd	ymmword ptr [rsp], ymm3         # 32-byte Spill
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 832]
-	vpcmpgtq	ymm3, ymm4, ymm2
-	vblendvpd	ymm3, ymm4, ymm2, ymm3
-	vpcmpgtq	ymm2, ymm1, ymm4
-	vblendvpd	ymm2, ymm4, ymm1, ymm2
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 864]
-	vpcmpgtq	ymm4, ymm1, ymm9
-	vblendvpd	ymm9, ymm1, ymm9, ymm4
-	vpcmpgtq	ymm5, ymm10, ymm1
-	vblendvpd	ymm8, ymm1, ymm10, ymm5
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 896]
-	vpcmpgtq	ymm5, ymm1, ymm0
-	vblendvpd	ymm5, ymm1, ymm0, ymm5
-	vpcmpgtq	ymm0, ymm12, ymm1
-	vblendvpd	ymm12, ymm1, ymm12, ymm0
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 928]
-	vpcmpgtq	ymm1, ymm0, ymm6
-	vblendvpd	ymm4, ymm0, ymm6, ymm1
-	vpcmpgtq	ymm1, ymm13, ymm0
-	vblendvpd	ymm13, ymm0, ymm13, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 960]
-	vpcmpgtq	ymm1, ymm0, ymm7
-	vblendvpd	ymm6, ymm0, ymm7, ymm1
-	vpcmpgtq	ymm1, ymm14, ymm0
-	vblendvpd	ymm14, ymm0, ymm14, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 992]
-	vmovdqa	ymm7, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpcmpgtq	ymm1, ymm0, ymm7
-	vblendvpd	ymm11, ymm0, ymm7, ymm1
-	vmovdqa	ymm7, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpcmpgtq	ymm1, ymm7, ymm0
-	vblendvpd	ymm15, ymm0, ymm7, ymm1
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB2_9
-.LBB2_10:
-	test	r9, r9
-	vmovdqa	ymm7, ymm5
-	vmovdqa	ymm5, ymm9
-	vmovdqa	ymm9, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vmovdqa	ymm10, ymm3
-	je	.LBB2_13
-# %bb.11:
-	lea	rax, [rdi + 8*rax]
-	neg	r9
+	vmovdqa	ymm3, ymm0
+	vmovdqa	ymm2, ymm0
+	vmovdqa	ymm1, ymm0
+	vmovdqa	ymm7, ymm4
+	vmovdqa	ymm6, ymm4
+	vmovdqa	ymm5, ymm4
 	.p2align	4, 0x90
-.LBB2_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm0, ymmword ptr [rax + 32]
-	vmovdqa	ymm3, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpcmpgtq	ymm1, ymm0, ymm3
-	vblendvpd	ymm3, ymm0, ymm3, ymm1
-	vmovapd	ymmword ptr [rsp + 32], ymm3    # 32-byte Spill
-	vmovdqa	ymm3, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpcmpgtq	ymm1, ymm3, ymm0
-	vblendvpd	ymm3, ymm0, ymm3, ymm1
-	vmovapd	ymmword ptr [rsp + 64], ymm3    # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rax]
-	vpcmpgtq	ymm1, ymm0, ymm9
-	vblendvpd	ymm9, ymm0, ymm9, ymm1
-	vmovdqa	ymm3, ymmword ptr [rsp]         # 32-byte Reload
-	vpcmpgtq	ymm1, ymm3, ymm0
-	vblendvpd	ymm3, ymm0, ymm3, ymm1
-	vmovapd	ymmword ptr [rsp], ymm3         # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rax + 64]
-	vpcmpgtq	ymm1, ymm0, ymm10
-	vblendvpd	ymm10, ymm0, ymm10, ymm1
-	vpcmpgtq	ymm1, ymm2, ymm0
-	vblendvpd	ymm2, ymm0, ymm2, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 96]
-	vpcmpgtq	ymm1, ymm0, ymm5
-	vblendvpd	ymm5, ymm0, ymm5, ymm1
-	vpcmpgtq	ymm1, ymm8, ymm0
-	vblendvpd	ymm8, ymm0, ymm8, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 128]
-	vpcmpgtq	ymm1, ymm0, ymm7
-	vblendvpd	ymm7, ymm0, ymm7, ymm1
-	vpcmpgtq	ymm1, ymm12, ymm0
-	vblendvpd	ymm12, ymm0, ymm12, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 160]
-	vpcmpgtq	ymm1, ymm0, ymm4
-	vblendvpd	ymm4, ymm0, ymm4, ymm1
-	vpcmpgtq	ymm1, ymm13, ymm0
-	vblendvpd	ymm13, ymm0, ymm13, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 192]
-	vpcmpgtq	ymm1, ymm0, ymm6
-	vblendvpd	ymm6, ymm0, ymm6, ymm1
-	vpcmpgtq	ymm1, ymm14, ymm0
-	vblendvpd	ymm14, ymm0, ymm14, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 224]
-	vpcmpgtq	ymm1, ymm0, ymm11
-	vblendvpd	ymm11, ymm0, ymm11, ymm1
-	vpcmpgtq	ymm1, ymm15, ymm0
-	vblendvpd	ymm15, ymm0, ymm15, ymm1
-	add	rax, 256
-	inc	r9
-	jne	.LBB2_12
-.LBB2_13:
-	vmovdqa	ymm1, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpcmpgtq	ymm0, ymm1, ymm13
-	vblendvpd	ymm0, ymm13, ymm1, ymm0
-	vpcmpgtq	ymm1, ymm8, ymm15
-	vblendvpd	ymm1, ymm15, ymm8, ymm1
-	vmovdqa	ymm3, ymmword ptr [rsp]         # 32-byte Reload
-	vpcmpgtq	ymm8, ymm3, ymm12
-	vblendvpd	ymm8, ymm12, ymm3, ymm8
-	vmovdqa	ymm3, ymm9
-	vpcmpgtq	ymm9, ymm2, ymm14
-	vblendvpd	ymm2, ymm14, ymm2, ymm9
-	vpcmpgtq	ymm9, ymm8, ymm2
-	vblendvpd	ymm2, ymm2, ymm8, ymm9
-	vpcmpgtq	ymm8, ymm0, ymm1
-	vblendvpd	ymm0, ymm1, ymm0, ymm8
-	vpcmpgtq	ymm1, ymm2, ymm0
-	vblendvpd	ymm0, ymm0, ymm2, ymm1
+.LBB2_5:                                # =>This Inner Loop Header: Depth=1
+	vmovdqu	ymm8, ymmword ptr [rdi + 8*rax]
+	vpcmpgtq	ymm9, ymm8, ymm0
+	vblendvpd	ymm0, ymm8, ymm0, ymm9
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 32]
+	vpcmpgtq	ymm10, ymm9, ymm3
+	vblendvpd	ymm3, ymm9, ymm3, ymm10
+	vmovdqu	ymm10, ymmword ptr [rdi + 8*rax + 64]
+	vpcmpgtq	ymm11, ymm10, ymm2
+	vblendvpd	ymm2, ymm10, ymm2, ymm11
+	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 96]
+	vpcmpgtq	ymm12, ymm11, ymm1
+	vblendvpd	ymm1, ymm11, ymm1, ymm12
+	vpcmpgtq	ymm12, ymm4, ymm8
+	vblendvpd	ymm4, ymm8, ymm4, ymm12
+	vpcmpgtq	ymm8, ymm7, ymm9
+	vblendvpd	ymm7, ymm9, ymm7, ymm8
+	vpcmpgtq	ymm8, ymm6, ymm10
+	vblendvpd	ymm6, ymm10, ymm6, ymm8
+	vpcmpgtq	ymm8, ymm5, ymm11
+	vblendvpd	ymm5, ymm11, ymm5, ymm8
+	add	rax, 16
+	cmp	r9, rax
+	jne	.LBB2_5
+# %bb.6:
+	vpcmpgtq	ymm8, ymm4, ymm7
+	vblendvpd	ymm4, ymm7, ymm4, ymm8
+	vpcmpgtq	ymm7, ymm4, ymm6
+	vblendvpd	ymm4, ymm6, ymm4, ymm7
+	vpcmpgtq	ymm6, ymm4, ymm5
+	vblendvpd	ymm4, ymm5, ymm4, ymm6
+	vextractf128	xmm5, ymm4, 1
+	vpcmpgtq	xmm6, xmm4, xmm5
+	vblendvpd	xmm4, xmm5, xmm4, xmm6
+	vpermilps	xmm5, xmm4, 78          # xmm5 = xmm4[2,3,0,1]
+	vpcmpgtq	xmm6, xmm4, xmm5
+	vblendvpd	xmm4, xmm5, xmm4, xmm6
+	vmovq	r10, xmm4
+	vpcmpgtq	ymm4, ymm3, ymm0
+	vblendvpd	ymm0, ymm3, ymm0, ymm4
+	vpcmpgtq	ymm3, ymm2, ymm0
+	vblendvpd	ymm0, ymm2, ymm0, ymm3
+	vpcmpgtq	ymm2, ymm1, ymm0
+	vblendvpd	ymm0, ymm1, ymm0, ymm2
 	vextractf128	xmm1, ymm0, 1
-	vpcmpgtq	xmm2, xmm0, xmm1
+	vpcmpgtq	xmm2, xmm1, xmm0
 	vblendvpd	xmm0, xmm1, xmm0, xmm2
 	vpermilps	xmm1, xmm0, 78          # xmm1 = xmm0[2,3,0,1]
-	vpcmpgtq	xmm2, xmm0, xmm1
+	vpcmpgtq	xmm2, xmm1, xmm0
 	vblendvpd	xmm0, xmm1, xmm0, xmm2
-	vmovdqa	ymm2, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpcmpgtq	ymm1, ymm4, ymm2
-	vblendvpd	ymm1, ymm4, ymm2, ymm1
-	vpcmpgtq	ymm2, ymm11, ymm5
-	vblendvpd	ymm2, ymm11, ymm5, ymm2
-	vpcmpgtq	ymm4, ymm7, ymm3
-	vblendvpd	ymm4, ymm7, ymm3, ymm4
-	vpcmpgtq	ymm5, ymm6, ymm10
-	vblendvpd	ymm3, ymm6, ymm10, ymm5
-	vpcmpgtq	ymm5, ymm3, ymm4
-	vblendvpd	ymm3, ymm3, ymm4, ymm5
-	vpcmpgtq	ymm4, ymm2, ymm1
-	vblendvpd	ymm1, ymm2, ymm1, ymm4
-	vpcmpgtq	ymm2, ymm1, ymm3
-	vblendvpd	ymm1, ymm1, ymm3, ymm2
-	vextractf128	xmm2, ymm1, 1
-	vpcmpgtq	xmm3, xmm2, xmm1
-	vblendvpd	xmm1, xmm2, xmm1, xmm3
-	vpermilps	xmm2, xmm1, 78          # xmm2 = xmm1[2,3,0,1]
-	vpcmpgtq	xmm3, xmm2, xmm1
-	vblendvpd	xmm1, xmm2, xmm1, xmm3
-	vmovq	rsi, xmm0
-	vmovq	r9, xmm1
-	cmp	r11, r8
-	je	.LBB2_14
-.LBB2_4:
-	mov	rax, rsi
+	vmovq	rax, xmm0
+	mov	rsi, r10
+	cmp	r9, r8
+	je	.LBB2_8
 	.p2align	4, 0x90
-.LBB2_5:                                # =>This Inner Loop Header: Depth=1
-	mov	rsi, qword ptr [rdi + 8*r11]
-	cmp	r9, rsi
-	cmovg	r9, rsi
+.LBB2_7:                                # =>This Inner Loop Header: Depth=1
+	mov	rsi, qword ptr [rdi + 8*r9]
 	cmp	rax, rsi
-	cmovge	rsi, rax
-	add	r11, 1
-	mov	rax, rsi
-	cmp	r8, r11
-	jne	.LBB2_5
-.LBB2_14:
+	cmovg	rax, rsi
+	cmp	r10, rsi
+	cmovge	rsi, r10
+	add	r9, 1
+	mov	r10, rsi
+	cmp	r8, r9
+	jne	.LBB2_7
+.LBB2_8:
 	mov	qword ptr [rcx], rsi
-	mov	qword ptr [rdx], r9
+	mov	qword ptr [rdx], rax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -784,576 +331,136 @@ uint64_max_min_avx2:                    # @uint64_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 288
+	and	rsp, -8
 	test	esi, esi
 	jle	.LBB3_1
 # %bb.2:
 	mov	r8d, esi
-	cmp	esi, 31
-	ja	.LBB3_6
+	cmp	esi, 15
+	ja	.LBB3_4
 # %bb.3:
-	mov	r9, -1
-	xor	r11d, r11d
-	xor	esi, esi
-	jmp	.LBB3_4
+	mov	rax, -1
+	xor	r9d, r9d
+	xor	r10d, r10d
+	jmp	.LBB3_7
 .LBB3_1:
-	mov	r9, -1
+	mov	rax, -1
 	xor	esi, esi
-	jmp	.LBB3_14
-.LBB3_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB3_8
-# %bb.7:
-	vpxor	xmm4, xmm4, xmm4
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 64], ymm0    # 32-byte Spill
-	xor	eax, eax
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 96], ymm0    # 32-byte Spill
-	vpcmpeqd	ymm5, ymm5, ymm5
-	vpcmpeqd	ymm7, ymm7, ymm7
-	vpcmpeqd	ymm12, ymm12, ymm12
-	vpcmpeqd	ymm10, ymm10, ymm10
-	vpcmpeqd	ymm11, ymm11, ymm11
-	vpcmpeqd	ymm13, ymm13, ymm13
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp + 32], ymm0    # 32-byte Spill
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp], ymm0         # 32-byte Spill
-	vpxor	xmm3, xmm3, xmm3
-	vpxor	xmm9, xmm9, xmm9
-	vpxor	xmm8, xmm8, xmm8
-	vpxor	xmm15, xmm15, xmm15
-	vpxor	xmm0, xmm0, xmm0
-	jmp	.LBB3_10
-.LBB3_8:
-	and	r10, -4
-	neg	r10
-	vpxor	xmm4, xmm4, xmm4
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 64], ymm0    # 32-byte Spill
+	jmp	.LBB3_8
+.LBB3_4:
+	mov	r9d, r8d
+	and	r9d, -16
+	vpxor	xmm5, xmm5, xmm5
+	vpcmpeqd	ymm1, ymm1, ymm1
 	xor	eax, eax
-	vpbroadcastq	ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 96], ymm0    # 32-byte Spill
-	vpcmpeqd	ymm5, ymm5, ymm5
-	vpcmpeqd	ymm7, ymm7, ymm7
-	vpcmpeqd	ymm12, ymm12, ymm12
-	vpcmpeqd	ymm10, ymm10, ymm10
-	vpcmpeqd	ymm11, ymm11, ymm11
-	vpcmpeqd	ymm13, ymm13, ymm13
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp + 32], ymm0    # 32-byte Spill
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp], ymm0         # 32-byte Spill
-	vpxor	xmm3, xmm3, xmm3
-	vpxor	xmm9, xmm9, xmm9
+	vpbroadcastq	ymm0, qword ptr [rip + .LCPI3_0] # ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+	vpcmpeqd	ymm4, ymm4, ymm4
+	vpcmpeqd	ymm3, ymm3, ymm3
+	vpcmpeqd	ymm2, ymm2, ymm2
 	vpxor	xmm8, xmm8, xmm8
-	vpxor	xmm15, xmm15, xmm15
-	vpxor	xmm0, xmm0, xmm0
+	vpxor	xmm7, xmm7, xmm7
+	vpxor	xmm6, xmm6, xmm6
 	.p2align	4, 0x90
-.LBB3_9:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 224]
-	vpxor	ymm2, ymm14, ymm1
-	vmovdqa	ymm6, ymm3
-	vpxor	ymm3, ymm13, ymm14
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm3, ymm1, ymm13, ymm3
-	vmovapd	ymmword ptr [rsp + 128], ymm3   # 32-byte Spill
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm0, ymm1, ymm0, ymm2
-	vmovapd	ymmword ptr [rsp + 224], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 192]
-	vpxor	ymm1, ymm14, ymm0
-	vpxor	ymm2, ymm11, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm2, ymm0, ymm11, ymm2
-	vmovapd	ymmword ptr [rsp + 160], ymm2   # 32-byte Spill
-	vpxor	ymm2, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm0, ymm0, ymm15, ymm1
-	vmovapd	ymmword ptr [rsp + 192], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 160]
-	vpxor	ymm1, ymm14, ymm0
-	vpxor	ymm2, ymm10, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vmovdqa	ymm3, ymm8
-	vblendvpd	ymm8, ymm0, ymm10, ymm2
-	vpxor	ymm2, ymm14, ymm3
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm13, ymm0, ymm3, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 128]
-	vpxor	ymm2, ymm14, ymm0
-	vpxor	ymm1, ymm12, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm1, ymm0, ymm12, ymm1
-	vpxor	ymm3, ymm9, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm12, ymm0, ymm9, ymm2
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 96]
-	vpxor	ymm0, ymm14, ymm7
-	vpxor	ymm3, ymm14, ymm2
-	vpcmpgtq	ymm0, ymm3, ymm0
-	vblendvpd	ymm0, ymm2, ymm7, ymm0
-	vmovdqa	ymm15, ymm4
-	vpxor	ymm4, ymm14, ymm6
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm10, ymm2, ymm6, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 64]
-	vpxor	ymm3, ymm14, ymm5
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm5, ymm2, ymm5, ymm3
-	vmovdqa	ymm6, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm6
-	vpcmpgtq	ymm3, ymm3, ymm4
-	vblendvpd	ymm9, ymm2, ymm6, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax]
-	vmovdqa	ymm7, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm7
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm3, ymm2, ymm7, ymm3
-	vpxor	ymm11, ymm15, ymm14
-	vpcmpgtq	ymm4, ymm11, ymm4
-	vblendvpd	ymm4, ymm2, ymm15, ymm4
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 32]
-	vmovdqa	ymm15, ymmword ptr [rsp + 96]   # 32-byte Reload
-	vpxor	ymm11, ymm15, ymm14
-	vpxor	ymm7, ymm14, ymm2
-	vpcmpgtq	ymm11, ymm7, ymm11
-	vblendvpd	ymm11, ymm2, ymm15, ymm11
-	vmovdqa	ymm6, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm15, ymm7
-	vblendvpd	ymm2, ymm2, ymm6, ymm7
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 288]
-	vxorpd	ymm7, ymm11, ymm14
-	vpxor	ymm15, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm15, ymm7
-	vblendvpd	ymm7, ymm6, ymm11, ymm7
-	vmovapd	ymmword ptr [rsp + 96], ymm7    # 32-byte Spill
-	vxorpd	ymm7, ymm14, ymm2
-	vpcmpgtq	ymm7, ymm7, ymm15
-	vblendvpd	ymm2, ymm6, ymm2, ymm7
-	vmovapd	ymmword ptr [rsp + 64], ymm2    # 32-byte Spill
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 256]
-	vxorpd	ymm7, ymm14, ymm3
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm11, ymm7
-	vblendvpd	ymm2, ymm6, ymm3, ymm7
-	vmovapd	ymmword ptr [rsp], ymm2         # 32-byte Spill
-	vxorpd	ymm7, ymm14, ymm4
-	vpcmpgtq	ymm7, ymm7, ymm11
-	vblendvpd	ymm2, ymm6, ymm4, ymm7
-	vmovapd	ymmword ptr [rsp + 32], ymm2    # 32-byte Spill
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 320]
-	vxorpd	ymm7, ymm14, ymm5
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm11, ymm7
+.LBB3_5:                                # =>This Inner Loop Header: Depth=1
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax]
+	vpxor	ymm10, ymm1, ymm0
+	vpxor	ymm11, ymm9, ymm0
+	vpcmpgtq	ymm10, ymm11, ymm10
+	vblendvpd	ymm1, ymm9, ymm1, ymm10
+	vpxor	ymm10, ymm5, ymm0
+	vpcmpgtq	ymm10, ymm10, ymm11
+	vblendvpd	ymm5, ymm9, ymm5, ymm10
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 32]
+	vpxor	ymm10, ymm4, ymm0
+	vpxor	ymm11, ymm9, ymm0
+	vpcmpgtq	ymm10, ymm11, ymm10
+	vblendvpd	ymm4, ymm9, ymm4, ymm10
+	vpxor	ymm10, ymm8, ymm0
+	vpcmpgtq	ymm10, ymm10, ymm11
+	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 64]
+	vblendvpd	ymm8, ymm9, ymm8, ymm10
+	vpxor	ymm9, ymm3, ymm0
+	vpxor	ymm10, ymm11, ymm0
+	vpcmpgtq	ymm9, ymm10, ymm9
+	vblendvpd	ymm3, ymm11, ymm3, ymm9
+	vpxor	ymm9, ymm7, ymm0
+	vpcmpgtq	ymm9, ymm9, ymm10
+	vblendvpd	ymm7, ymm11, ymm7, ymm9
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 96]
+	vpxor	ymm10, ymm2, ymm0
+	vpxor	ymm11, ymm9, ymm0
+	vpcmpgtq	ymm10, ymm11, ymm10
+	vblendvpd	ymm2, ymm9, ymm2, ymm10
+	vpxor	ymm10, ymm6, ymm0
+	vpcmpgtq	ymm10, ymm10, ymm11
+	vblendvpd	ymm6, ymm9, ymm6, ymm10
+	add	rax, 16
+	cmp	r9, rax
+	jne	.LBB3_5
+# %bb.6:
+	vpxor	ymm9, ymm8, ymm0
+	vpxor	ymm10, ymm5, ymm0
+	vpcmpgtq	ymm9, ymm10, ymm9
+	vblendvpd	ymm5, ymm8, ymm5, ymm9
+	vxorpd	ymm8, ymm5, ymm0
+	vpxor	ymm9, ymm7, ymm0
+	vpcmpgtq	ymm8, ymm8, ymm9
+	vblendvpd	ymm5, ymm7, ymm5, ymm8
+	vxorpd	ymm7, ymm5, ymm0
+	vpxor	ymm8, ymm6, ymm0
+	vpcmpgtq	ymm7, ymm7, ymm8
 	vblendvpd	ymm5, ymm6, ymm5, ymm7
-	vxorpd	ymm7, ymm9, ymm14
-	vpcmpgtq	ymm7, ymm7, ymm11
-	vblendvpd	ymm7, ymm6, ymm9, ymm7
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 352]
-	vxorpd	ymm9, ymm14, ymm0
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm9, ymm11, ymm9
-	vblendvpd	ymm9, ymm6, ymm0, ymm9
-	vxorpd	ymm0, ymm10, ymm14
-	vpcmpgtq	ymm0, ymm0, ymm11
-	vblendvpd	ymm10, ymm6, ymm10, ymm0
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 384]
-	vxorpd	ymm0, ymm14, ymm1
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm0, ymm11, ymm0
-	vblendvpd	ymm4, ymm6, ymm1, ymm0
-	vxorpd	ymm1, ymm12, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm11
-	vblendvpd	ymm3, ymm6, ymm12, ymm1
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 416]
-	vxorpd	ymm6, ymm8, ymm14
-	vpxor	ymm12, ymm11, ymm14
-	vpcmpgtq	ymm6, ymm12, ymm6
-	vblendvpd	ymm6, ymm11, ymm8, ymm6
-	vxorpd	ymm8, ymm13, ymm14
-	vpcmpgtq	ymm8, ymm8, ymm12
-	vblendvpd	ymm12, ymm11, ymm13, ymm8
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 448]
-	vmovdqa	ymm0, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpxor	ymm8, ymm14, ymm0
-	vpxor	ymm13, ymm11, ymm14
-	vpcmpgtq	ymm8, ymm13, ymm8
-	vblendvpd	ymm8, ymm11, ymm0, ymm8
-	vmovdqa	ymm0, ymmword ptr [rsp + 192]   # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm0
-	vpcmpgtq	ymm13, ymm15, ymm13
-	vblendvpd	ymm13, ymm11, ymm0, ymm13
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 480]
-	vmovdqa	ymm1, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm1
-	vpxor	ymm0, ymm11, ymm14
-	vpcmpgtq	ymm15, ymm0, ymm15
-	vblendvpd	ymm1, ymm11, ymm1, ymm15
-	vmovdqa	ymm2, ymmword ptr [rsp + 224]   # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm2
-	vpcmpgtq	ymm0, ymm15, ymm0
-	vblendvpd	ymm15, ymm11, ymm2, ymm0
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 736]
-	vxorpd	ymm11, ymm14, ymm1
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm11, ymm2, ymm11
-	vblendvpd	ymm1, ymm0, ymm1, ymm11
-	vmovapd	ymmword ptr [rsp + 128], ymm1   # 32-byte Spill
-	vxorpd	ymm1, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm0, ymm0, ymm15, ymm1
-	vmovapd	ymmword ptr [rsp + 224], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 704]
-	vxorpd	ymm1, ymm8, ymm14
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm1, ymm0, ymm8, ymm1
-	vmovapd	ymmword ptr [rsp + 160], ymm1   # 32-byte Spill
-	vxorpd	ymm1, ymm13, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm0, ymm0, ymm13, ymm1
-	vmovapd	ymmword ptr [rsp + 192], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 672]
-	vxorpd	ymm1, ymm14, ymm6
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm15, ymm0, ymm6, ymm1
-	vxorpd	ymm1, ymm12, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm13, ymm0, ymm12, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 640]
-	vxorpd	ymm1, ymm14, ymm4
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm12, ymm0, ymm4, ymm1
-	vxorpd	ymm1, ymm14, ymm3
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm8, ymm0, ymm3, ymm1
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 608]
-	vxorpd	ymm1, ymm9, ymm14
-	vpxor	ymm3, ymm14, ymm2
-	vpcmpgtq	ymm1, ymm3, ymm1
-	vblendvpd	ymm1, ymm2, ymm9, ymm1
-	vxorpd	ymm4, ymm10, ymm14
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm10, ymm2, ymm10, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 576]
-	vxorpd	ymm3, ymm14, ymm5
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm5, ymm2, ymm5, ymm3
-	vxorpd	ymm3, ymm14, ymm7
-	vpcmpgtq	ymm3, ymm3, ymm4
-	vblendvpd	ymm9, ymm2, ymm7, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 512]
-	vmovdqa	ymm0, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm0
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm3, ymm2, ymm0, ymm3
-	vmovdqa	ymm0, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm6, ymm14, ymm0
-	vpcmpgtq	ymm4, ymm6, ymm4
-	vblendvpd	ymm4, ymm2, ymm0, ymm4
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 544]
-	vmovdqa	ymm0, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpxor	ymm6, ymm14, ymm0
-	vpxor	ymm7, ymm14, ymm2
+	vextractf128	xmm6, ymm5, 1
+	vxorpd	xmm8, xmm6, xmm0
+	vxorpd	xmm7, xmm5, xmm0
+	vpcmpgtq	xmm7, xmm7, xmm8
+	vblendvpd	xmm5, xmm6, xmm5, xmm7
+	vpermilps	xmm6, xmm5, 78          # xmm6 = xmm5[2,3,0,1]
+	vxorpd	xmm8, xmm5, xmm0
+	vxorpd	xmm7, xmm6, xmm0
+	vpcmpgtq	xmm7, xmm8, xmm7
+	vblendvpd	xmm5, xmm6, xmm5, xmm7
+	vpxor	ymm6, ymm1, ymm0
+	vpxor	ymm7, ymm4, ymm0
 	vpcmpgtq	ymm6, ymm7, ymm6
-	vblendvpd	ymm6, ymm2, ymm0, ymm6
-	vmovdqa	ymm0, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm11, ymm14, ymm0
-	vpcmpgtq	ymm7, ymm11, ymm7
-	vblendvpd	ymm2, ymm2, ymm0, ymm7
-	vmovdqu	ymm7, ymmword ptr [rdi + 8*rax + 800]
-	vxorpd	ymm11, ymm14, ymm6
-	vpxor	ymm0, ymm14, ymm7
-	vpcmpgtq	ymm11, ymm0, ymm11
-	vblendvpd	ymm6, ymm7, ymm6, ymm11
-	vmovapd	ymmword ptr [rsp + 96], ymm6    # 32-byte Spill
-	vxorpd	ymm6, ymm14, ymm2
-	vpcmpgtq	ymm0, ymm6, ymm0
-	vblendvpd	ymm0, ymm7, ymm2, ymm0
-	vmovapd	ymmword ptr [rsp + 32], ymm0    # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 768]
-	vxorpd	ymm2, ymm14, ymm3
-	vpxor	ymm7, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm7, ymm2
-	vblendvpd	ymm2, ymm0, ymm3, ymm2
-	vmovapd	ymmword ptr [rsp + 64], ymm2    # 32-byte Spill
-	vxorpd	ymm2, ymm14, ymm4
-	vpcmpgtq	ymm2, ymm2, ymm7
-	vblendvpd	ymm4, ymm0, ymm4, ymm2
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 832]
-	vxorpd	ymm2, ymm14, ymm5
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm5, ymm0, ymm5, ymm2
-	vxorpd	ymm2, ymm9, ymm14
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm0, ymm0, ymm9, ymm2
-	vmovapd	ymmword ptr [rsp], ymm0         # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 864]
-	vxorpd	ymm2, ymm14, ymm1
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm7, ymm0, ymm1, ymm2
-	vxorpd	ymm1, ymm10, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm3
-	vblendvpd	ymm3, ymm0, ymm10, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 896]
-	vxorpd	ymm1, ymm12, ymm14
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm12, ymm0, ymm12, ymm1
-	vxorpd	ymm1, ymm8, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm9, ymm0, ymm8, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 928]
-	vxorpd	ymm1, ymm15, ymm14
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm10, ymm0, ymm15, ymm1
-	vxorpd	ymm1, ymm13, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm8, ymm0, ymm13, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 960]
-	vmovdqa	ymm6, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm11, ymm0, ymm6, ymm1
-	vmovdqa	ymm6, ymmword ptr [rsp + 192]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm15, ymm0, ymm6, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 992]
-	vmovdqa	ymm6, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm13, ymm0, ymm6, ymm1
-	vmovdqa	ymm6, ymmword ptr [rsp + 224]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm0, ymm0, ymm6, ymm1
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB3_9
-.LBB3_10:
-	vmovaps	ymmword ptr [rsp + 128], ymm10  # 32-byte Spill
-	test	r9, r9
-	vmovdqa	ymm10, ymm12
-	vmovdqa	ymm12, ymm3
-	je	.LBB3_13
-# %bb.11:
-	lea	rax, [rdi + 8*rax]
-	neg	r9
-	vpbroadcastq	ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	.p2align	4, 0x90
-.LBB3_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm1, ymmword ptr [rax + 32]
-	vmovdqa	ymm6, ymm7
-	vmovdqa	ymm7, ymm5
-	vmovdqa	ymm5, ymm4
-	vmovdqa	ymm4, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm14, ymm1
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vmovapd	ymmword ptr [rsp + 96], ymm4    # 32-byte Spill
-	vmovdqa	ymm4, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vmovapd	ymmword ptr [rsp + 32], ymm4    # 32-byte Spill
-	vmovdqu	ymm1, ymmword ptr [rax]
-	vmovdqa	ymm4, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm14, ymm1
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vmovapd	ymmword ptr [rsp + 64], ymm4    # 32-byte Spill
-	vmovdqa	ymm4, ymm5
-	vmovdqa	ymm5, ymm7
-	vmovdqa	ymm7, ymm6
-	vpxor	ymm2, ymm14, ymm4
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vmovdqu	ymm3, ymmword ptr [rax + 64]
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm14, ymm5
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm5, ymm3, ymm5, ymm2
-	vmovdqa	ymm6, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm6
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm6, ymm3, ymm6, ymm1
-	vmovapd	ymmword ptr [rsp], ymm6         # 32-byte Spill
-	vmovdqu	ymm1, ymmword ptr [rax + 96]
-	vpxor	ymm2, ymm14, ymm1
-	vpxor	ymm3, ymm14, ymm7
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm7, ymm1, ymm7, ymm3
-	vpxor	ymm3, ymm12, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vmovdqu	ymm3, ymmword ptr [rax + 128]
-	vblendvpd	ymm12, ymm1, ymm12, ymm2
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm10, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm10, ymm3, ymm10, ymm2
-	vpxor	ymm2, ymm9, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm9, ymm3, ymm9, ymm1
-	vmovdqu	ymm1, ymmword ptr [rax + 160]
-	vpxor	ymm2, ymm14, ymm1
-	vmovdqa	ymm6, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm6
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm6, ymm1, ymm6, ymm3
-	vmovapd	ymmword ptr [rsp + 128], ymm6   # 32-byte Spill
-	vpxor	ymm3, ymm8, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vmovdqu	ymm3, ymmword ptr [rax + 192]
-	vblendvpd	ymm8, ymm1, ymm8, ymm2
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm11, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm11, ymm3, ymm11, ymm2
-	vpxor	ymm2, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm15, ymm3, ymm15, ymm1
-	vmovdqu	ymm1, ymmword ptr [rax + 224]
-	vpxor	ymm2, ymm14, ymm1
-	vpxor	ymm3, ymm13, ymm14
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm13, ymm1, ymm13, ymm3
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm0, ymm1, ymm0, ymm2
-	add	rax, 256
-	inc	r9
-	jne	.LBB3_12
-.LBB3_13:
-	vpbroadcastq	ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	vmovdqa	ymm3, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm1, ymm15, ymm3, ymm1
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm9, ymm14
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm2, ymm9, ymm4, ymm2
-	vpxor	ymm3, ymm12, ymm14
-	vpxor	ymm9, ymm14, ymm0
-	vpcmpgtq	ymm3, ymm3, ymm9
-	vblendvpd	ymm0, ymm0, ymm12, ymm3
-	vmovdqa	ymm4, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm4
-	vpxor	ymm9, ymm8, ymm14
-	vpcmpgtq	ymm3, ymm3, ymm9
-	vblendvpd	ymm3, ymm8, ymm4, ymm3
-	vxorpd	ymm6, ymm14, ymm3
-	vxorpd	ymm9, ymm14, ymm0
-	vpcmpgtq	ymm6, ymm6, ymm9
-	vblendvpd	ymm0, ymm0, ymm3, ymm6
-	vxorpd	ymm3, ymm14, ymm2
-	vxorpd	ymm6, ymm14, ymm1
-	vpcmpgtq	ymm3, ymm3, ymm6
-	vblendvpd	ymm1, ymm1, ymm2, ymm3
-	vxorpd	ymm2, ymm14, ymm1
-	vxorpd	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm0, ymm0, ymm1, ymm2
-	vextractf128	xmm1, ymm0, 1
-	vxorpd	xmm2, xmm14, xmm1
-	vxorpd	xmm3, xmm14, xmm0
-	vpcmpgtq	xmm2, xmm3, xmm2
-	vblendvpd	xmm0, xmm1, xmm0, xmm2
-	vpermilps	xmm1, xmm0, 78          # xmm1 = xmm0[2,3,0,1]
-	vxorpd	xmm2, xmm14, xmm0
-	vxorpd	xmm3, xmm14, xmm1
-	vpcmpgtq	xmm2, xmm2, xmm3
-	vblendvpd	xmm0, xmm1, xmm0, xmm2
-	vpxor	ymm1, ymm14, ymm5
-	vpxor	ymm2, ymm11, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm1, ymm11, ymm5, ymm1
-	vmovdqa	ymm4, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm10, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm2, ymm10, ymm4, ymm2
-	vpxor	ymm3, ymm14, ymm7
-	vpxor	ymm5, ymm13, ymm14
-	vpcmpgtq	ymm3, ymm5, ymm3
-	vblendvpd	ymm3, ymm13, ymm7, ymm3
-	vmovdqa	ymm6, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpxor	ymm4, ymm14, ymm6
-	vmovdqa	ymm7, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm5, ymm14, ymm7
-	vpcmpgtq	ymm4, ymm5, ymm4
-	vblendvpd	ymm4, ymm7, ymm6, ymm4
-	vxorpd	ymm5, ymm14, ymm4
-	vxorpd	ymm6, ymm14, ymm3
-	vpcmpgtq	ymm5, ymm6, ymm5
-	vblendvpd	ymm3, ymm3, ymm4, ymm5
-	vxorpd	ymm4, ymm14, ymm2
-	vxorpd	ymm5, ymm14, ymm1
-	vpcmpgtq	ymm4, ymm5, ymm4
-	vblendvpd	ymm1, ymm1, ymm2, ymm4
-	vxorpd	ymm2, ymm14, ymm1
-	vxorpd	ymm4, ymm14, ymm3
-	vpcmpgtq	ymm2, ymm4, ymm2
-	vblendvpd	ymm1, ymm3, ymm1, ymm2
+	vblendvpd	ymm1, ymm4, ymm1, ymm6
+	vxorpd	ymm4, ymm1, ymm0
+	vpxor	ymm6, ymm3, ymm0
+	vpcmpgtq	ymm4, ymm6, ymm4
+	vblendvpd	ymm1, ymm3, ymm1, ymm4
+	vmovq	r10, xmm5
+	vxorpd	ymm3, ymm1, ymm0
+	vpxor	ymm4, ymm2, ymm0
+	vpcmpgtq	ymm3, ymm4, ymm3
+	vblendvpd	ymm1, ymm2, ymm1, ymm3
 	vextractf128	xmm2, ymm1, 1
-	vxorpd	xmm3, xmm14, xmm1
-	vxorpd	xmm4, xmm14, xmm2
+	vxorpd	xmm3, xmm1, xmm0
+	vxorpd	xmm4, xmm2, xmm0
 	vpcmpgtq	xmm3, xmm4, xmm3
 	vblendvpd	xmm1, xmm2, xmm1, xmm3
 	vpermilps	xmm2, xmm1, 78          # xmm2 = xmm1[2,3,0,1]
-	vxorpd	xmm3, xmm14, xmm1
-	vxorpd	xmm4, xmm14, xmm2
-	vpcmpgtq	xmm3, xmm4, xmm3
-	vblendvpd	xmm1, xmm2, xmm1, xmm3
-	vmovq	rsi, xmm0
-	vmovq	r9, xmm1
-	cmp	r11, r8
-	je	.LBB3_14
-.LBB3_4:
-	mov	rax, rsi
+	vxorpd	xmm3, xmm1, xmm0
+	vxorpd	xmm0, xmm2, xmm0
+	vpcmpgtq	xmm0, xmm0, xmm3
+	vblendvpd	xmm0, xmm2, xmm1, xmm0
+	vmovq	rax, xmm0
+	mov	rsi, r10
+	cmp	r9, r8
+	je	.LBB3_8
 	.p2align	4, 0x90
-.LBB3_5:                                # =>This Inner Loop Header: Depth=1
-	mov	rsi, qword ptr [rdi + 8*r11]
-	cmp	r9, rsi
-	cmovae	r9, rsi
+.LBB3_7:                                # =>This Inner Loop Header: Depth=1
+	mov	rsi, qword ptr [rdi + 8*r9]
 	cmp	rax, rsi
-	cmova	rsi, rax
-	add	r11, 1
-	mov	rax, rsi
-	cmp	r8, r11
-	jne	.LBB3_5
-.LBB3_14:
+	cmovae	rax, rsi
+	cmp	r10, rsi
+	cmova	rsi, r10
+	add	r9, 1
+	mov	r10, rsi
+	cmp	r8, r9
+	jne	.LBB3_7
+.LBB3_8:
 	mov	qword ptr [rcx], rsi
-	mov	qword ptr [rdx], r9
+	mov	qword ptr [rdx], rax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -1361,6 +468,6 @@ uint64_max_min_avx2:                    # @uint64_max_min_avx2
 .Lfunc_end3:
 	.size	uint64_max_min_avx2, .Lfunc_end3-uint64_max_min_avx2
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/min_max_sse4.s b/go/parquet/internal/utils/_lib/min_max_sse4.s
index 98f30e3ed1d..893a0a73f02 100644
--- a/go/parquet/internal/utils/_lib/min_max_sse4.s
+++ b/go/parquet/internal/utils/_lib/min_max_sse4.s
@@ -608,6 +608,6 @@ uint64_max_min_sse4:                    # @uint64_max_min_sse4
 .Lfunc_end3:
 	.size	uint64_max_min_sse4, .Lfunc_end3-uint64_max_min_sse4
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/unpack_bool_avx2.s b/go/parquet/internal/utils/_lib/unpack_bool_avx2.s
index 1bc1be53d4d..6ac34887c00 100644
--- a/go/parquet/internal/utils/_lib/unpack_bool_avx2.s
+++ b/go/parquet/internal/utils/_lib/unpack_bool_avx2.s
@@ -1,6293 +1,104 @@
 	.text
 	.intel_syntax noprefix
 	.file	"unpack_bool.c"
-	.section	.rodata.cst32,"aM",@progbits,32
-	.p2align	5                               # -- Begin function bytes_to_bools_avx2
-.LCPI0_0:
-	.long	24                              # 0x18
-	.long	25                              # 0x19
-	.long	26                              # 0x1a
-	.long	27                              # 0x1b
-	.long	28                              # 0x1c
-	.long	29                              # 0x1d
-	.long	30                              # 0x1e
-	.long	31                              # 0x1f
-.LCPI0_1:
-	.long	16                              # 0x10
-	.long	17                              # 0x11
-	.long	18                              # 0x12
-	.long	19                              # 0x13
-	.long	20                              # 0x14
-	.long	21                              # 0x15
-	.long	22                              # 0x16
-	.long	23                              # 0x17
-.LCPI0_2:
-	.long	8                               # 0x8
-	.long	9                               # 0x9
-	.long	10                              # 0xa
-	.long	11                              # 0xb
-	.long	12                              # 0xc
-	.long	13                              # 0xd
-	.long	14                              # 0xe
-	.long	15                              # 0xf
-.LCPI0_3:
-	.long	0                               # 0x0
-	.long	1                               # 0x1
-	.long	2                               # 0x2
-	.long	3                               # 0x3
-	.long	4                               # 0x4
-	.long	5                               # 0x5
-	.long	6                               # 0x6
-	.long	7                               # 0x7
-.LCPI0_4:
-	.zero	32,1
-	.section	.rodata.cst8,"aM",@progbits,8
-	.p2align	3
-.LCPI0_5:
-	.quad	1                               # 0x1
-.LCPI0_6:
-	.quad	2                               # 0x2
-.LCPI0_7:
-	.quad	3                               # 0x3
-.LCPI0_8:
-	.quad	4                               # 0x4
-.LCPI0_9:
-	.quad	5                               # 0x5
-.LCPI0_10:
-	.quad	6                               # 0x6
-.LCPI0_11:
-	.quad	7                               # 0x7
-	.section	.rodata.cst4,"aM",@progbits,4
-	.p2align	2
-.LCPI0_12:
-	.long	32                              # 0x20
-	.text
-	.globl	bytes_to_bools_avx2
+	.globl	bytes_to_bools_avx2             # -- Begin function bytes_to_bools_avx2
 	.p2align	4, 0x90
 	.type	bytes_to_bools_avx2,@function
 bytes_to_bools_avx2:                    # @bytes_to_bools_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	push	r15
-	push	r14
-	push	r13
-	push	r12
-	push	rbx
-	and	rsp, -32
-	sub	rsp, 960
+	and	rsp, -8
 	test	esi, esi
-	jle	.LBB0_1051
+	jle	.LBB0_5
 # %bb.1:
-	mov	r9d, ecx
-	mov	r8, rdx
-	mov	r10d, esi
-	cmp	esi, 32
-	jae	.LBB0_3
-.LBB0_2:
-	xor	r12d, r12d
-.LBB0_1055:
-	lea	ecx, [8*r12]
-	jmp	.LBB0_1057
-	.p2align	4, 0x90
-.LBB0_1056:                             #   in Loop: Header=BB0_1057 Depth=1
-	add	r12, 1
-	add	ecx, 8
-	cmp	r10, r12
-	je	.LBB0_1051
-.LBB0_1057:                             # =>This Inner Loop Header: Depth=1
-	mov	edx, ecx
-	mov	ecx, ecx
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1058:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	edx, byte ptr [rdi + r12]
-	and	dl, 1
-	mov	byte ptr [r8 + rcx], dl
-	mov	rdx, rcx
-	or	rdx, 1
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1059:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 2
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1060:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 2
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 3
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1061:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 3
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 4
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1062:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 4
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 5
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1063:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 5
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 6
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1064:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 6
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 7
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1065:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 7
-	mov	byte ptr [r8 + rdx], bl
-	jmp	.LBB0_1056
-.LBB0_3:
-	mov	dword ptr [rsp + 16], r9d       # 4-byte Spill
-	mov	qword ptr [rsp + 48], r10       # 8-byte Spill
-	lea	rsi, [r10 - 1]
-	mov	ecx, 8
-	mov	eax, esi
-	mul	ecx
-	seto	r14b
-	mov	rbx, rsi
-	shr	rbx, 32
-	lea	rcx, [r8 + 6]
-	mov	edx, 8
-	mov	rax, rsi
-	mul	rdx
-	seto	sil
-	add	rcx, rax
-	setb	dl
-	lea	rcx, [r8 + 7]
-	add	rcx, rax
-	setb	r13b
-	lea	rcx, [r8 + 5]
-	add	rcx, rax
-	setb	r9b
-	lea	rcx, [r8 + 4]
-	add	rcx, rax
-	setb	r15b
-	lea	rcx, [r8 + 3]
-	add	rcx, rax
-	setb	r11b
-	lea	rcx, [r8 + 2]
-	add	rcx, rax
-	setb	r10b
-	lea	rcx, [r8 + 1]
-	add	rcx, rax
-	setb	cl
-	add	rax, r8
-	setb	al
-	xor	r12d, r12d
-	test	rbx, rbx
-	jne	.LBB0_1052
-# %bb.4:
-	test	r14b, r14b
-	jne	.LBB0_1052
-# %bb.5:
-	test	dl, dl
-	jne	.LBB0_1052
-# %bb.6:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.7:
-	test	r13b, r13b
-	jne	.LBB0_1052
-# %bb.8:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.9:
-	test	r9b, r9b
-	jne	.LBB0_1052
-# %bb.10:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.11:
-	test	r15b, r15b
-	jne	.LBB0_1052
-# %bb.12:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.13:
-	test	r11b, r11b
-	jne	.LBB0_1052
-# %bb.14:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.15:
-	test	r10b, r10b
-	jne	.LBB0_1052
-# %bb.16:
-	test	sil, sil
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	jne	.LBB0_1054
-# %bb.17:
-	test	cl, cl
-	jne	.LBB0_1054
-# %bb.18:
-	test	sil, sil
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	jne	.LBB0_1055
-# %bb.19:
-	test	al, al
-	jne	.LBB0_1055
-# %bb.20:
-	test	sil, sil
-	jne	.LBB0_1055
-# %bb.21:
-	lea	rax, [r8 + 8*r10]
-	cmp	rax, rdi
-	jbe	.LBB0_24
-# %bb.22:
-	lea	rax, [rdi + r10]
-	cmp	rax, r8
-	ja	.LBB0_2
-.LBB0_24:
-	mov	r12d, r10d
-	and	r12d, -32
-	vmovd	xmm0, r9d
-	vpbroadcastd	ymm0, xmm0
-	vmovdqa	ymm9, ymmword ptr [rip + .LCPI0_0] # ymm9 = [24,25,26,27,28,29,30,31]
-	vmovdqa	ymm8, ymmword ptr [rip + .LCPI0_1] # ymm8 = [16,17,18,19,20,21,22,23]
-	vmovdqa	ymm3, ymmword ptr [rip + .LCPI0_2] # ymm3 = [8,9,10,11,12,13,14,15]
-	vmovdqa	ymm2, ymmword ptr [rip + .LCPI0_3] # ymm2 = [0,1,2,3,4,5,6,7]
-	xor	r11d, r11d
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_5] # ymm1 = [1,1,1,1]
-	vmovaps	ymmword ptr [rsp + 768], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_6] # ymm1 = [2,2,2,2]
-	vmovaps	ymmword ptr [rsp + 736], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_7] # ymm1 = [3,3,3,3]
-	vmovaps	ymmword ptr [rsp + 704], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_8] # ymm1 = [4,4,4,4]
-	vmovaps	ymmword ptr [rsp + 672], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_9] # ymm1 = [5,5,5,5]
-	vmovaps	ymmword ptr [rsp + 640], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_10] # ymm1 = [6,6,6,6]
-	vmovaps	ymmword ptr [rsp + 608], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_11] # ymm1 = [7,7,7,7]
-	vmovaps	ymmword ptr [rsp + 576], ymm1   # 32-byte Spill
-	vpbroadcastd	ymm1, dword ptr [rip + .LCPI0_12] # ymm1 = [32,32,32,32,32,32,32,32]
-	vmovdqa	ymmword ptr [rsp + 544], ymm1   # 32-byte Spill
-	jmp	.LBB0_26
-	.p2align	4, 0x90
-.LBB0_25:                               #   in Loop: Header=BB0_26 Depth=1
-	add	r11, 32
-	vmovdqa	ymm1, ymmword ptr [rsp + 544]   # 32-byte Reload
-	vpaddd	ymm2, ymm2, ymm1
-	vpaddd	ymm3, ymm3, ymm1
-	vpaddd	ymm8, ymm8, ymm1
-	vpaddd	ymm9, ymm9, ymm1
-	cmp	r11, r12
-	je	.LBB0_1050
-.LBB0_26:                               # =>This Inner Loop Header: Depth=1
-	vmovdqa	ymmword ptr [rsp + 800], ymm2   # 32-byte Spill
-	vpslld	ymm1, ymm2, 3
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vmovd	ecx, xmm2
-                                        # implicit-def: $ymm4
-	test	cl, 1
-	je	.LBB0_28
-# %bb.27:                               #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm4, byte ptr [rdi + r11]
-.LBB0_28:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r10, r11
-	or	r10, 1
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vpackssdw	xmm2, xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 1
-	test	cl, 1
-	je	.LBB0_30
-# %bb.29:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + r10], 1
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_30:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r14, r11
-	or	r14, 2
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vpackssdw	xmm2, xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 2
-	test	cl, 1
-	je	.LBB0_32
-# %bb.31:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + r14], 2
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_32:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm13, ymm1, 1
-	mov	rdx, r11
-	or	rdx, 3
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vpackssdw	xmm2, xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 3
-	test	cl, 1
-	je	.LBB0_34
-# %bb.33:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + rdx], 3
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_34:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, r11
-	or	rcx, 4
-	vextracti128	xmm7, ymm0, 1
-	vpcmpgtd	xmm2, xmm7, xmm13
-	vpextrb	r9d, xmm2, 0
-	test	r9b, 1
-	mov	qword ptr [rsp + 272], rdx      # 8-byte Spill
-	mov	qword ptr [rsp + 264], rcx      # 8-byte Spill
-	je	.LBB0_36
-# %bb.35:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + rcx], 4
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_36:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r15, r11
-	or	r15, 5
-	vpcmpgtd	ymm6, ymm0, ymm1
-	vpackssdw	ymm2, ymm6, ymm0
-	vextracti128	xmm2, ymm2, 1
-	vpbroadcastd	xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 5
-	test	cl, 1
-	je	.LBB0_38
-# %bb.37:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + r15], 5
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_38:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 6
-	vpackssdw	ymm2, ymm6, ymm0
-	vpermq	ymm2, ymm2, 232                 # ymm2 = ymm2[0,2,2,3]
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 6
-	test	cl, 1
-	je	.LBB0_40
-# %bb.39:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + rbx], 6
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_40:                               #   in Loop: Header=BB0_26 Depth=1
-	vpslld	ymm2, ymm3, 3
-	mov	rax, r11
-	or	rax, 7
-	vpackssdw	ymm5, ymm6, ymm0
-	vpermq	ymm5, ymm5, 232                 # ymm5 = ymm5[0,2,2,3]
-	vpacksswb	xmm5, xmm5, xmm5
-	vpextrb	ecx, xmm5, 7
-	test	cl, 1
-	je	.LBB0_42
-# %bb.41:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rax], 7
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_42:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, r11
-	or	rsi, 8
-	vpcmpgtd	xmm5, xmm0, xmm2
-	vpextrb	ecx, xmm5, 0
-	test	cl, 1
-	je	.LBB0_44
-# %bb.43:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rsi], 8
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_44:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, r11
-	or	rdx, 9
-	vpcmpgtd	xmm5, xmm0, xmm2
-	vpackssdw	xmm5, xmm5, xmm5
-	vpacksswb	xmm5, xmm5, xmm5
-	vpextrb	ecx, xmm5, 9
-	test	cl, 1
-	mov	qword ptr [rsp + 224], rdx      # 8-byte Spill
-	je	.LBB0_46
-# %bb.45:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rdx], 9
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_46:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, r11
-	or	rdx, 10
-	vpcmpgtd	xmm5, xmm0, xmm2
-	vpackssdw	xmm5, xmm5, xmm5
-	vpacksswb	xmm5, xmm5, xmm5
-	vpextrb	ecx, xmm5, 10
-	test	cl, 1
-	vmovdqa	ymmword ptr [rsp + 832], ymm3   # 32-byte Spill
-	mov	qword ptr [rsp + 96], rsi       # 8-byte Spill
-	je	.LBB0_48
-# %bb.47:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rdx], 10
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_48:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm5, ymm2, 1
-	mov	rsi, r11
-	or	rsi, 11
-	vpcmpgtd	xmm3, xmm0, xmm2
-	vpackssdw	xmm3, xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	mov	qword ptr [rsp + 152], r10      # 8-byte Spill
-	mov	qword ptr [rsp + 296], r14      # 8-byte Spill
-	mov	qword ptr [rsp + 104], r15      # 8-byte Spill
-	mov	qword ptr [rsp + 288], rbx      # 8-byte Spill
-	mov	qword ptr [rsp + 232], rax      # 8-byte Spill
-	je	.LBB0_50
-# %bb.49:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rsi], 11
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_50:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, r11
-	or	rcx, 12
-	vpcmpgtd	xmm3, xmm7, xmm5
-	vpextrb	r14d, xmm3, 0
-	test	r14b, 1
-	mov	qword ptr [rsp + 256], rsi      # 8-byte Spill
-	mov	qword ptr [rsp + 248], rcx      # 8-byte Spill
-	je	.LBB0_52
-# %bb.51:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rcx], 12
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_52:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, r11
-	or	rax, 13
-	vpcmpgtd	ymm7, ymm0, ymm2
-	vpackssdw	ymm3, ymm7, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpbroadcastd	xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	je	.LBB0_54
-# %bb.53:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rax], 13
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_54:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 14
-	vpackssdw	ymm3, ymm7, ymm0
-	vpermq	ymm3, ymm3, 232                 # ymm3 = ymm3[0,2,2,3]
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 14
-	test	cl, 1
-	mov	qword ptr [rsp + 80], rbx       # 8-byte Spill
-	je	.LBB0_56
-# %bb.55:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rbx], 14
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_56:                               #   in Loop: Header=BB0_26 Depth=1
-	vpslld	ymm10, ymm8, 3
-	mov	rsi, r11
-	or	rsi, 15
-	vpackssdw	ymm3, ymm7, ymm0
-	vpermq	ymm3, ymm3, 232                 # ymm3 = ymm3[0,2,2,3]
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 15
-	test	cl, 1
-	je	.LBB0_58
-# %bb.57:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rsi], 15
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_58:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r15, r11
-	or	r15, 16
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vmovd	ecx, xmm3
-	test	cl, 1
-	mov	qword ptr [rsp + 64], r15       # 8-byte Spill
-	mov	qword ptr [rsp + 72], rsi       # 8-byte Spill
-	je	.LBB0_60
-# %bb.59:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r15], 0
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_60:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, r11
-	or	rsi, 17
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	je	.LBB0_62
-# %bb.61:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rsi], 1
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_62:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 18
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	je	.LBB0_64
-# %bb.63:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 2
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_64:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r15, r11
-	or	r15, 19
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 3
-	test	cl, 1
-	vmovdqa	ymmword ptr [rsp + 864], ymm8   # 32-byte Spill
-	je	.LBB0_66
-# %bb.65:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r15], 3
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_66:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r13, r11
-	or	r13, 20
-	vpcmpgtd	ymm8, ymm0, ymm10
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 4
-	test	cl, 1
-	mov	qword ptr [rsp + 56], r13       # 8-byte Spill
-	je	.LBB0_68
-# %bb.67:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r13], 4
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_68:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r13, r11
-	or	r13, 21
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 5
-	test	cl, 1
-	mov	qword ptr [rsp + 128], rbx      # 8-byte Spill
-	je	.LBB0_70
-# %bb.69:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r13], 5
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_70:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r10, r11
-	or	r10, 22
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 6
-	test	cl, 1
-	je	.LBB0_72
-# %bb.71:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r10], 6
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_72:                               #   in Loop: Header=BB0_26 Depth=1
-	vpslld	ymm11, ymm9, 3
-	mov	rbx, r11
-	or	rbx, 23
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 7
-	test	cl, 1
-	mov	qword ptr [rsp + 240], rbx      # 8-byte Spill
-	vmovdqa	ymmword ptr [rsp + 896], ymm9   # 32-byte Spill
-	je	.LBB0_74
-# %bb.73:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 7
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_74:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 24
-	vpcmpgtd	ymm9, ymm0, ymm11
-	vpermq	ymm12, ymm9, 68                 # ymm12 = ymm9[0,1,0,1]
-	vpacksswb	ymm3, ymm0, ymm12
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 8
-	test	cl, 1
-	mov	qword ptr [rsp + 216], rbx      # 8-byte Spill
-	je	.LBB0_76
-# %bb.75:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 8
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_76:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 25
-	vpcmpgtd	xmm3, xmm0, xmm11
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 9
-	test	cl, 1
-	mov	qword ptr [rsp + 208], rbx      # 8-byte Spill
-	je	.LBB0_78
-# %bb.77:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 9
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_78:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 26
-	vpcmpgtd	xmm3, xmm0, xmm11
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 10
-	test	cl, 1
-	mov	qword ptr [rsp + 200], rbx      # 8-byte Spill
-	je	.LBB0_80
-# %bb.79:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 10
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_80:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 27
-	vpcmpgtd	xmm3, xmm0, xmm11
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	mov	qword ptr [rsp + 192], rbx      # 8-byte Spill
-	mov	qword ptr [rsp + 144], rdx      # 8-byte Spill
-	mov	qword ptr [rsp + 88], rax       # 8-byte Spill
-	je	.LBB0_82
-# %bb.81:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 11
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_82:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, r11
-	or	rdx, 28
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 12
-	test	cl, 1
-	je	.LBB0_84
-# %bb.83:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rdx], 12
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_84:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 29
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	mov	qword ptr [rsp + 176], rbx      # 8-byte Spill
-	je	.LBB0_86
-# %bb.85:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 13
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_86:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 30
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 14
-	test	cl, 1
-	mov	qword ptr [rsp + 168], rbx      # 8-byte Spill
-	je	.LBB0_88
-# %bb.87:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 14
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_88:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 31
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 15
-	test	cl, 1
-	mov	qword ptr [rsp + 160], rbx      # 8-byte Spill
-	je	.LBB0_90
-# %bb.89:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 15
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_90:                               #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm3, xmm1              # ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-	vmovdqa	ymmword ptr [rsp + 512], ymm3   # 32-byte Spill
-	vpand	ymm15, ymm4, ymmword ptr [rip + .LCPI0_4]
-	vpcmpgtd	xmm3, xmm0, xmm1
-	vmovd	ecx, xmm3
-	test	cl, 1
-	je	.LBB0_92
-# %bb.91:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm3, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vmovq	rcx, xmm3
-	vpextrb	byte ptr [r8 + rcx], xmm15, 0
-.LBB0_92:                               #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm3, xmm0, xmm1
-	vpackssdw	xmm3, xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	je	.LBB0_94
-# %bb.93:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm3, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vpextrq	rcx, xmm3, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 1
-.LBB0_94:                               #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm3, xmm0, xmm1
-	vpackssdw	xmm3, xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	je	.LBB0_96
-# %bb.95:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm3, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vextracti128	xmm3, ymm3, 1
-	vmovq	rcx, xmm3
-	vpextrb	byte ptr [r8 + rcx], xmm15, 2
-.LBB0_96:                               #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm1
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 3
-	test	cl, 1
-	je	.LBB0_98
-# %bb.97:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 3
-.LBB0_98:                               #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm13             # ymm1 = xmm13[0],zero,xmm13[1],zero,xmm13[2],zero,xmm13[3],zero
-	vmovdqa	ymmword ptr [rsp + 480], ymm1   # 32-byte Spill
-	test	r9b, 1
-	je	.LBB0_100
-# %bb.99:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 4
-.LBB0_100:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpbroadcastd	xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 5
-	test	cl, 1
-	je	.LBB0_102
-# %bb.101:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 5
-.LBB0_102:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 6
-	test	cl, 1
-	je	.LBB0_104
-# %bb.103:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 6
-.LBB0_104:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 7
-	test	cl, 1
-	je	.LBB0_106
-# %bb.105:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 7
-.LBB0_106:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm2              # ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-	vmovdqa	ymmword ptr [rsp + 448], ymm1   # 32-byte Spill
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpextrb	ecx, xmm1, 0
-	test	cl, 1
-	je	.LBB0_108
-# %bb.107:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 8
-.LBB0_108:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_110
-# %bb.109:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 9
-.LBB0_110:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 10
-	test	cl, 1
-	je	.LBB0_112
-# %bb.111:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 10
-.LBB0_112:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 11
-	test	cl, 1
-	je	.LBB0_114
-# %bb.113:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 11
-.LBB0_114:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	qword ptr [rsp + 136], rsi      # 8-byte Spill
-	vpmovzxdq	ymm1, xmm5              # ymm1 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
-	vmovdqa	ymmword ptr [rsp + 416], ymm1   # 32-byte Spill
-	test	r14b, 1
-	je	.LBB0_116
-# %bb.115:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 12
-.LBB0_116:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm7, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpbroadcastd	xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 13
-	test	cl, 1
-	mov	r9, qword ptr [rsp + 152]       # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 296]      # 8-byte Reload
-	mov	r14, qword ptr [rsp + 104]      # 8-byte Reload
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	je	.LBB0_118
-# %bb.117:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 13
-.LBB0_118:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm7, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 14
-	test	cl, 1
-	je	.LBB0_120
-# %bb.119:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 14
-.LBB0_120:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm7, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 15
-	test	cl, 1
-	je	.LBB0_122
-# %bb.121:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 15
-.LBB0_122:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm10             # ymm1 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero
-	vmovdqa	ymmword ptr [rsp + 384], ymm1   # 32-byte Spill
-	vpcmpgtd	xmm1, xmm0, xmm10
-	vmovd	ecx, xmm1
-	test	cl, 1
-	je	.LBB0_124
-# %bb.123:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 0
-.LBB0_124:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm10
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 1
-	test	cl, 1
-	je	.LBB0_126
-# %bb.125:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 1
-.LBB0_126:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm10
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 2
-	test	cl, 1
-	je	.LBB0_128
-# %bb.127:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 2
-.LBB0_128:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpcmpgtd	xmm2, xmm0, xmm10
-	vpackssdw	xmm2, xmm2, xmm2
-	vpermq	ymm2, ymm2, 212                 # ymm2 = ymm2[0,1,1,3]
-	vpacksswb	ymm2, ymm2, ymm0
-	vextracti128	xmm2, ymm2, 1
-	vpextrb	ecx, xmm2, 3
-	test	cl, 1
-	je	.LBB0_130
-# %bb.129:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm2, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vextracti128	xmm2, ymm2, 1
-	vpextrq	rcx, xmm2, 1
-	vextracti128	xmm2, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm2, 3
-.LBB0_130:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm1              # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-	vmovdqa	ymmword ptr [rsp + 352], ymm1   # 32-byte Spill
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 4
-	test	cl, 1
-	je	.LBB0_132
-# %bb.131:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 4
-.LBB0_132:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 5
-	test	cl, 1
-	je	.LBB0_134
-# %bb.133:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 5
-.LBB0_134:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 6
-	test	cl, 1
-	je	.LBB0_136
-# %bb.135:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 6
-.LBB0_136:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 7
-	test	cl, 1
-	je	.LBB0_138
-# %bb.137:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 7
-.LBB0_138:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm11             # ymm1 = xmm11[0],zero,xmm11[1],zero,xmm11[2],zero,xmm11[3],zero
-	vmovdqa	ymmword ptr [rsp + 320], ymm1   # 32-byte Spill
-	vpacksswb	ymm1, ymm0, ymm12
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 8
-	test	cl, 1
-	je	.LBB0_140
-# %bb.139:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 8
-.LBB0_140:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm11
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_142
-# %bb.141:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-.LBB0_142:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm11
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 10
-	test	cl, 1
-	je	.LBB0_144
-# %bb.143:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-.LBB0_144:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpcmpgtd	xmm4, xmm0, xmm11
-	vpackssdw	xmm4, xmm4, xmm4
-	vpermq	ymm4, ymm4, 212                 # ymm4 = ymm4[0,1,1,3]
-	vpacksswb	ymm4, ymm0, ymm4
-	vextracti128	xmm4, ymm4, 1
-	vpextrb	ecx, xmm4, 11
-	test	cl, 1
-	je	.LBB0_146
-# %bb.145:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm2, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vextracti128	xmm4, ymm2, 1
-	vpextrq	rcx, xmm4, 1
-	vextracti128	xmm4, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm4, 11
-.LBB0_146:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm4, xmm1              # ymm4 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 12
-	test	cl, 1
-	je	.LBB0_148
-# %bb.147:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm4
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-.LBB0_148:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 13
-	test	cl, 1
-	je	.LBB0_150
-# %bb.149:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm4, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-.LBB0_150:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 14
-	test	cl, 1
-	je	.LBB0_152
-# %bb.151:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-.LBB0_152:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 15
-	test	cl, 1
-	je	.LBB0_154
-# %bb.153:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_154:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm8
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpackssdw	ymm5, ymm7, ymm9
-	vpermq	ymm5, ymm5, 216                 # ymm5 = ymm5[0,2,1,3]
-	vpacksswb	ymm1, ymm1, ymm5
-	vmovdqa	ymm2, ymmword ptr [rsp + 768]   # 32-byte Reload
-	vpor	ymm15, ymm2, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm2, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm2, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm2, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm2, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm2, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm2, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm2
-	vperm2i128	ymm6, ymm8, ymm7, 49    # ymm6 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm13, ymm8, xmm7, 1
-	vshufps	ymm6, ymm13, ymm6, 136          # ymm6 = ymm13[0,2],ymm6[0,2],ymm13[4,6],ymm6[4,6]
-	vperm2i128	ymm13, ymm12, ymm11, 49 # ymm13 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm14, ymm12, xmm11, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vperm2i128	ymm14, ymm10, ymm9, 49  # ymm14 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm2, ymm10, xmm9, 1
-	vshufps	ymm2, ymm2, ymm14, 136          # ymm2 = ymm2[0,2],ymm14[0,2],ymm2[4,6],ymm14[4,6]
-	vperm2i128	ymm14, ymm15, ymm5, 49  # ymm14 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm3, ymm15, xmm5, 1
-	vshufps	ymm3, ymm3, ymm14, 136          # ymm3 = ymm3[0,2],ymm14[0,2],ymm3[4,6],ymm14[4,6]
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpackssdw	ymm2, ymm3, ymm2
-	vpcmpgtd	ymm3, ymm0, ymm13
-	vpcmpgtd	ymm6, ymm0, ymm6
-	vpackssdw	ymm3, ymm3, ymm6
-	vpermq	ymm2, ymm2, 216                 # ymm2 = ymm2[0,2,1,3]
-	vpermq	ymm3, ymm3, 216                 # ymm3 = ymm3[0,2,1,3]
-	vpacksswb	ymm2, ymm2, ymm3
-	vpand	ymm6, ymm2, ymm1
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_155
-# %bb.660:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + r11]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_661
-.LBB0_156:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, qword ptr [rsp + 224]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_157
-.LBB0_662:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	jne	.LBB0_663
-.LBB0_158:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_159
-.LBB0_664:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_665
-.LBB0_160:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 232]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_161
-.LBB0_666:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_667
-.LBB0_162:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_163
-.LBB0_668:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 96]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_669
-.LBB0_164:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_165
-.LBB0_670:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_671
-.LBB0_166:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_167
-.LBB0_672:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_673
-.LBB0_168:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_169
-.LBB0_674:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	jne	.LBB0_170
-	jmp	.LBB0_171
-	.p2align	4, 0x90
-.LBB0_155:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_156
-.LBB0_661:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rbx, qword ptr [rsp + 224]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_662
-.LBB0_157:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_158
-.LBB0_663:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_664
-.LBB0_159:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_160
-.LBB0_665:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r14], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rsi, qword ptr [rsp + 232]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_666
-.LBB0_161:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_162
-.LBB0_667:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_668
-.LBB0_163:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_164
-.LBB0_669:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	jne	.LBB0_670
-.LBB0_165:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_166
-.LBB0_671:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_672
-.LBB0_167:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_168
-.LBB0_673:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 88]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	jne	.LBB0_674
-.LBB0_169:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_171
-.LBB0_170:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 72]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_171:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 208]      # 8-byte Reload
-	vextracti128	xmm13, ymm6, 1
-	vmovd	eax, xmm13
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_172
-# %bb.675:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 64]       # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 0
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 1
-	mov	dword ptr [rsp + 40], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_676
-.LBB0_173:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 2
-	mov	dword ptr [rsp + 36], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_174
-.LBB0_677:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 128]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 2
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 3
-	mov	dword ptr [rsp + 32], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_678
-.LBB0_175:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_176
-.LBB0_679:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_680
-.LBB0_177:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_178
-.LBB0_681:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r10], 6
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 7
-	mov	dword ptr [rsp + 316], eax      # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_682
-.LBB0_179:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpextrb	ebx, xmm13, 8
-	test	bl, 1
-	je	.LBB0_181
-.LBB0_180:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_181:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm13, 9
-	test	r9b, 1
-	mov	qword ptr [rsp + 280], r13      # 8-byte Spill
-	mov	qword ptr [rsp + 112], r10      # 8-byte Spill
-	mov	qword ptr [rsp + 184], rdx      # 8-byte Spill
-	je	.LBB0_183
-# %bb.182:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 9
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_183:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	mov	rcx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpextrb	r13d, xmm13, 10
-	test	r13b, 1
-	je	.LBB0_184
-# %bb.683:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 11
-	test	al, 1
-	mov	qword ptr [rsp + 120], r15      # 8-byte Spill
-	jne	.LBB0_684
-.LBB0_185:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r15d, xmm13, 12
-	test	r15b, 1
-	mov	qword ptr [rsp + 304], r11      # 8-byte Spill
-	je	.LBB0_186
-.LBB0_685:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 12
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	edx, xmm13, 13
-	test	dl, 1
-	jne	.LBB0_686
-.LBB0_187:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm13, 14
-	test	sil, 1
-	je	.LBB0_188
-.LBB0_687:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 14
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	r14d, xmm13, 15
-	test	r14b, 1
-	jne	.LBB0_189
-	jmp	.LBB0_190
-	.p2align	4, 0x90
-.LBB0_172:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 1
-	mov	dword ptr [rsp + 40], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_173
-.LBB0_676:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 136]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 1
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 2
-	mov	dword ptr [rsp + 36], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_677
-.LBB0_174:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 3
-	mov	dword ptr [rsp + 32], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_175
-.LBB0_678:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r15], 3
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_679
-.LBB0_176:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_177
-.LBB0_680:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_681
-.LBB0_178:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 7
-	mov	dword ptr [rsp + 316], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_179
-.LBB0_682:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm1, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpextrb	ebx, xmm13, 8
-	test	bl, 1
-	jne	.LBB0_180
-	jmp	.LBB0_181
-	.p2align	4, 0x90
-.LBB0_184:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 11
-	test	al, 1
-	mov	qword ptr [rsp + 120], r15      # 8-byte Spill
-	je	.LBB0_185
-.LBB0_684:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 11
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	r15d, xmm13, 12
-	test	r15b, 1
-	mov	qword ptr [rsp + 304], r11      # 8-byte Spill
-	jne	.LBB0_685
-.LBB0_186:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm13, 13
-	test	dl, 1
-	je	.LBB0_187
-.LBB0_686:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 13
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	esi, xmm13, 14
-	test	sil, 1
-	jne	.LBB0_687
-.LBB0_188:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm13, 15
-	test	r14b, 1
-	je	.LBB0_190
-.LBB0_189:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_190:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 1
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r10d, xmm6
-	test	r10b, 1
-	je	.LBB0_191
-# %bb.688:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm15
-	vpextrb	byte ptr [r8 + rcx], xmm14, 0
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_689
-.LBB0_192:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_193
-.LBB0_690:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 2
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	jne	.LBB0_691
-.LBB0_194:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_195
-.LBB0_692:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm5
-	vpextrb	byte ptr [r8 + rcx], xmm14, 4
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_693
-.LBB0_196:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_197
-.LBB0_694:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 6
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_695
-.LBB0_198:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_199
-.LBB0_696:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm12
-	vpextrb	byte ptr [r8 + rcx], xmm14, 8
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_697
-.LBB0_200:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_201
-.LBB0_698:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 10
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_699
-.LBB0_202:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_203
-.LBB0_700:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm11
-	vpextrb	byte ptr [r8 + rcx], xmm14, 12
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_701
-.LBB0_204:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_205
-.LBB0_702:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 14
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	jne	.LBB0_703
-.LBB0_206:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_207
-.LBB0_704:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_705
-.LBB0_208:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_209
-.LBB0_706:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_707
-.LBB0_210:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_211
-.LBB0_708:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_709
-.LBB0_212:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_213
-.LBB0_710:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 6
-	test	byte ptr [rsp + 316], 1         # 1-byte Folded Reload
-	jne	.LBB0_711
-.LBB0_214:                              #   in Loop: Header=BB0_26 Depth=1
-	test	bl, 1
-	je	.LBB0_215
-.LBB0_712:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 8
-	test	r9b, 1
-	mov	r10, qword ptr [rsp + 224]      # 8-byte Reload
-	mov	r11, qword ptr [rsp + 144]      # 8-byte Reload
-	jne	.LBB0_713
-.LBB0_216:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_217
-.LBB0_714:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	r9, qword ptr [rsp + 288]       # 8-byte Reload
-	mov	rax, qword ptr [rsp + 232]      # 8-byte Reload
-	jne	.LBB0_715
-.LBB0_218:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r15b, 1
-	je	.LBB0_219
-.LBB0_716:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	dl, 1
-	mov	r13, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	r15, qword ptr [rsp + 128]      # 8-byte Reload
-	jne	.LBB0_717
-.LBB0_220:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_221
-.LBB0_718:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_222
-	jmp	.LBB0_223
-	.p2align	4, 0x90
-.LBB0_191:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_192
-.LBB0_689:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 1
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_690
-.LBB0_193:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_194
-.LBB0_691:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 3
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_692
-.LBB0_195:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_196
-.LBB0_693:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm5, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 5
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_694
-.LBB0_197:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_198
-.LBB0_695:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 7
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_696
-.LBB0_199:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_200
-.LBB0_697:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm12, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 9
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	jne	.LBB0_698
-.LBB0_201:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_202
-.LBB0_699:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 11
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_700
-.LBB0_203:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_204
-.LBB0_701:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm11, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 13
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	jne	.LBB0_702
-.LBB0_205:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_206
-.LBB0_703:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_704
-.LBB0_207:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_208
-.LBB0_705:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_706
-.LBB0_209:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_210
-.LBB0_707:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_708
-.LBB0_211:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_212
-.LBB0_709:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_710
-.LBB0_213:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 316], 1         # 1-byte Folded Reload
-	je	.LBB0_214
-.LBB0_711:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 7
-	test	bl, 1
-	jne	.LBB0_712
-.LBB0_215:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	mov	r10, qword ptr [rsp + 224]      # 8-byte Reload
-	mov	r11, qword ptr [rsp + 144]      # 8-byte Reload
-	je	.LBB0_216
-.LBB0_713:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	r13b, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_714
-.LBB0_217:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	r9, qword ptr [rsp + 288]       # 8-byte Reload
-	mov	rax, qword ptr [rsp + 232]      # 8-byte Reload
-	je	.LBB0_218
-.LBB0_715:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r15b, 1
-	jne	.LBB0_716
-.LBB0_219:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	r13, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	r15, qword ptr [rsp + 128]      # 8-byte Reload
-	je	.LBB0_220
-.LBB0_717:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_718
-.LBB0_221:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_223
-.LBB0_222:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_223:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 736]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_224
-# %bb.719:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_720
-.LBB0_225:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_227
-.LBB0_226:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_227:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_228
-# %bb.721:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_722
-.LBB0_229:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_230
-.LBB0_723:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_724
-.LBB0_231:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_232
-.LBB0_725:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_726
-.LBB0_233:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_234
-.LBB0_727:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	jne	.LBB0_728
-.LBB0_235:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_236
-.LBB0_729:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_730
-.LBB0_237:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_239
-.LBB0_238:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_239:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_241
-# %bb.240:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_241:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_243
-# %bb.242:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_243:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_245
-# %bb.244:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_245:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	eax, xmm1, 1
-	mov	dword ptr [rsp + 40], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_247
-# %bb.246:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_247:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 280]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	eax, xmm1, 2
-	mov	dword ptr [rsp + 36], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_249
-# %bb.248:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r15], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_249:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 120]      # 8-byte Reload
-	vpextrb	ebx, xmm1, 3
-	mov	dword ptr [rsp + 32], ebx       # 4-byte Spill
-	test	bl, 1
-	je	.LBB0_250
-# %bb.731:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_732
-.LBB0_251:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_252
-.LBB0_733:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_734
-.LBB0_253:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_254
-.LBB0_735:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_736
-.LBB0_255:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_256
-.LBB0_737:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_738
-.LBB0_257:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_258
-.LBB0_739:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_740
-.LBB0_259:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_260
-.LBB0_741:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_742
-.LBB0_261:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_263
-.LBB0_262:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_263:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 2
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_264
-# %bb.743:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_744
-.LBB0_265:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_266
-.LBB0_745:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_746
-.LBB0_267:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_268
-.LBB0_747:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_748
-.LBB0_269:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_270
-.LBB0_749:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_750
-.LBB0_271:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_272
-.LBB0_751:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_752
-.LBB0_273:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_274
-.LBB0_753:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_754
-.LBB0_275:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_276
-.LBB0_755:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_756
-.LBB0_277:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_278
-.LBB0_757:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_758
-.LBB0_279:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_280
-.LBB0_759:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_760
-.LBB0_281:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_282
-.LBB0_761:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_762
-.LBB0_283:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_284
-.LBB0_763:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_764
-.LBB0_285:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_286
-.LBB0_765:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_766
-.LBB0_287:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_288
-.LBB0_767:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_768
-.LBB0_289:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_290
-.LBB0_769:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_770
-.LBB0_291:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_292
-.LBB0_771:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_772
-.LBB0_293:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_294
-.LBB0_773:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_295
-	jmp	.LBB0_296
-	.p2align	4, 0x90
-.LBB0_224:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_225
-.LBB0_720:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_226
-	jmp	.LBB0_227
-	.p2align	4, 0x90
-.LBB0_228:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_229
-.LBB0_722:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_723
-.LBB0_230:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_231
-.LBB0_724:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_725
-.LBB0_232:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_233
-.LBB0_726:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_727
-.LBB0_234:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_235
-.LBB0_728:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r11], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_729
-.LBB0_236:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_237
-.LBB0_730:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_238
-	jmp	.LBB0_239
-	.p2align	4, 0x90
-.LBB0_250:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_251
-.LBB0_732:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rcx], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_733
-.LBB0_252:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_253
-.LBB0_734:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_735
-.LBB0_254:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_255
-.LBB0_736:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_737
-.LBB0_256:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_257
-.LBB0_738:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_739
-.LBB0_258:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_259
-.LBB0_740:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_741
-.LBB0_260:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_261
-.LBB0_742:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_262
-	jmp	.LBB0_263
-	.p2align	4, 0x90
-.LBB0_264:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_265
-.LBB0_744:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_745
-.LBB0_266:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_267
-.LBB0_746:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_747
-.LBB0_268:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_269
-.LBB0_748:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_749
-.LBB0_270:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_271
-.LBB0_750:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_751
-.LBB0_272:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_273
-.LBB0_752:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_753
-.LBB0_274:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_275
-.LBB0_754:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_755
-.LBB0_276:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_277
-.LBB0_756:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_757
-.LBB0_278:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_279
-.LBB0_758:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_759
-.LBB0_280:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_281
-.LBB0_760:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_761
-.LBB0_282:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_283
-.LBB0_762:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_763
-.LBB0_284:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_285
-.LBB0_764:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_765
-.LBB0_286:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_287
-.LBB0_766:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_767
-.LBB0_288:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_289
-.LBB0_768:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_769
-.LBB0_290:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_291
-.LBB0_770:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_771
-.LBB0_292:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_293
-.LBB0_772:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_773
-.LBB0_294:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_296
-.LBB0_295:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_296:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 704]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_297
-# %bb.774:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_775
-.LBB0_298:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_300
-.LBB0_299:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_300:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_301
-# %bb.776:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_777
-.LBB0_302:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_303
-.LBB0_778:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_779
-.LBB0_304:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_305
-.LBB0_780:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_781
-.LBB0_306:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_308
-.LBB0_307:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_308:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_309
-# %bb.782:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_783
-.LBB0_310:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_311
-.LBB0_784:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_312
-	jmp	.LBB0_313
-	.p2align	4, 0x90
-.LBB0_297:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_298
-.LBB0_775:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_299
-	jmp	.LBB0_300
-	.p2align	4, 0x90
-.LBB0_301:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_302
-.LBB0_777:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_778
-.LBB0_303:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_304
-.LBB0_779:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_780
-.LBB0_305:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_306
-.LBB0_781:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_307
-	jmp	.LBB0_308
-	.p2align	4, 0x90
-.LBB0_309:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_310
-.LBB0_783:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_784
-.LBB0_311:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_313
-.LBB0_312:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_313:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_315
-# %bb.314:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_315:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_317
-# %bb.316:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_317:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_319
-# %bb.318:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_319:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_320
-# %bb.785:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_786
-.LBB0_321:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_322
-.LBB0_787:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_788
-.LBB0_323:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_325
-.LBB0_324:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_325:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_326
-# %bb.789:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_790
-.LBB0_327:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_328
-.LBB0_791:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_792
-.LBB0_329:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_330
-.LBB0_793:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_794
-.LBB0_331:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_332
-.LBB0_795:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_796
-.LBB0_333:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_334
-.LBB0_797:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_335
-	jmp	.LBB0_336
-	.p2align	4, 0x90
-.LBB0_320:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_321
-.LBB0_786:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_787
-.LBB0_322:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_323
-.LBB0_788:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_324
-	jmp	.LBB0_325
-	.p2align	4, 0x90
-.LBB0_326:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_327
-.LBB0_790:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_791
-.LBB0_328:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_329
-.LBB0_792:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_793
-.LBB0_330:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_331
-.LBB0_794:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_795
-.LBB0_332:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_333
-.LBB0_796:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_797
-.LBB0_334:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_336
-.LBB0_335:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_336:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 3
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_337
-# %bb.798:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_799
-.LBB0_338:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_339
-.LBB0_800:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_801
-.LBB0_340:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_341
-.LBB0_802:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_803
-.LBB0_342:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_343
-.LBB0_804:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_805
-.LBB0_344:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_345
-.LBB0_806:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_807
-.LBB0_346:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_347
-.LBB0_808:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_809
-.LBB0_348:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_349
-.LBB0_810:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_811
-.LBB0_350:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_351
-.LBB0_812:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_813
-.LBB0_352:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_353
-.LBB0_814:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_815
-.LBB0_354:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_355
-.LBB0_816:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_817
-.LBB0_356:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_357
-.LBB0_818:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_819
-.LBB0_358:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_359
-.LBB0_820:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_821
-.LBB0_360:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_361
-.LBB0_822:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_823
-.LBB0_362:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_363
-.LBB0_824:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_825
-.LBB0_364:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_365
-.LBB0_826:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_827
-.LBB0_366:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_367
-.LBB0_828:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_368
-	jmp	.LBB0_369
-	.p2align	4, 0x90
-.LBB0_337:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_338
-.LBB0_799:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_800
-.LBB0_339:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_340
-.LBB0_801:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_802
-.LBB0_341:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_342
-.LBB0_803:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_804
-.LBB0_343:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_344
-.LBB0_805:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_806
-.LBB0_345:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_346
-.LBB0_807:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_808
-.LBB0_347:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_348
-.LBB0_809:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_810
-.LBB0_349:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_350
-.LBB0_811:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_812
-.LBB0_351:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_352
-.LBB0_813:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_814
-.LBB0_353:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_354
-.LBB0_815:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_816
-.LBB0_355:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_356
-.LBB0_817:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_818
-.LBB0_357:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_358
-.LBB0_819:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_820
-.LBB0_359:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_360
-.LBB0_821:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_822
-.LBB0_361:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_362
-.LBB0_823:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_824
-.LBB0_363:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_364
-.LBB0_825:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_826
-.LBB0_365:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_366
-.LBB0_827:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_828
-.LBB0_367:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_369
-.LBB0_368:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_369:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 672]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_370
-# %bb.829:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_830
-.LBB0_371:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_373
-.LBB0_372:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_373:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_374
-# %bb.831:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_832
-.LBB0_375:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_376
-.LBB0_833:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_834
-.LBB0_377:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_378
-.LBB0_835:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_836
-.LBB0_379:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_381
-.LBB0_380:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_381:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_382
-# %bb.837:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_838
-.LBB0_383:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_384
-.LBB0_839:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_385
-	jmp	.LBB0_386
-	.p2align	4, 0x90
-.LBB0_370:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_371
-.LBB0_830:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_372
-	jmp	.LBB0_373
-	.p2align	4, 0x90
-.LBB0_374:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_375
-.LBB0_832:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_833
-.LBB0_376:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_377
-.LBB0_834:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_835
-.LBB0_378:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_379
-.LBB0_836:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_380
-	jmp	.LBB0_381
-	.p2align	4, 0x90
-.LBB0_382:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_383
-.LBB0_838:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_839
-.LBB0_384:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_386
-.LBB0_385:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_386:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_388
-# %bb.387:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_388:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_390
-# %bb.389:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_390:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_392
-# %bb.391:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_392:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_393
-# %bb.840:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_841
-.LBB0_394:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_395
-.LBB0_842:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_843
-.LBB0_396:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_398
-.LBB0_397:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_398:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_399
-# %bb.844:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_845
-.LBB0_400:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_401
-.LBB0_846:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_847
-.LBB0_402:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_403
-.LBB0_848:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_849
-.LBB0_404:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_405
-.LBB0_850:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_851
-.LBB0_406:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_407
-.LBB0_852:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_408
-	jmp	.LBB0_409
-	.p2align	4, 0x90
-.LBB0_393:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_394
-.LBB0_841:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_842
-.LBB0_395:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_396
-.LBB0_843:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_397
-	jmp	.LBB0_398
-	.p2align	4, 0x90
-.LBB0_399:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_400
-.LBB0_845:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_846
-.LBB0_401:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_402
-.LBB0_847:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_848
-.LBB0_403:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_404
-.LBB0_849:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_850
-.LBB0_405:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_406
-.LBB0_851:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_852
-.LBB0_407:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_409
-.LBB0_408:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_409:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 4
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_410
-# %bb.853:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_854
-.LBB0_411:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_412
-.LBB0_855:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_856
-.LBB0_413:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_414
-.LBB0_857:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_858
-.LBB0_415:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_416
-.LBB0_859:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_860
-.LBB0_417:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_418
-.LBB0_861:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_862
-.LBB0_419:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_420
-.LBB0_863:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_864
-.LBB0_421:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_422
-.LBB0_865:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_866
-.LBB0_423:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_424
-.LBB0_867:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_868
-.LBB0_425:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_426
-.LBB0_869:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_870
-.LBB0_427:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_428
-.LBB0_871:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_872
-.LBB0_429:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_430
-.LBB0_873:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_874
-.LBB0_431:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_432
-.LBB0_875:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_876
-.LBB0_433:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_434
-.LBB0_877:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_878
-.LBB0_435:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_436
-.LBB0_879:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_880
-.LBB0_437:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_438
-.LBB0_881:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_882
-.LBB0_439:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_440
-.LBB0_883:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_441
-	jmp	.LBB0_442
-	.p2align	4, 0x90
-.LBB0_410:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_411
-.LBB0_854:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_855
-.LBB0_412:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_413
-.LBB0_856:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_857
-.LBB0_414:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_415
-.LBB0_858:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_859
-.LBB0_416:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_417
-.LBB0_860:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_861
-.LBB0_418:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_419
-.LBB0_862:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_863
-.LBB0_420:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_421
-.LBB0_864:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_865
-.LBB0_422:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_423
-.LBB0_866:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_867
-.LBB0_424:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_425
-.LBB0_868:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_869
-.LBB0_426:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_427
-.LBB0_870:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_871
-.LBB0_428:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_429
-.LBB0_872:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_873
-.LBB0_430:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_431
-.LBB0_874:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_875
-.LBB0_432:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_433
-.LBB0_876:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_877
-.LBB0_434:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_435
-.LBB0_878:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_879
-.LBB0_436:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_437
-.LBB0_880:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_881
-.LBB0_438:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_439
-.LBB0_882:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_883
-.LBB0_440:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_442
-.LBB0_441:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_442:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 640]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_443
-# %bb.884:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_885
-.LBB0_444:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_446
-.LBB0_445:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_446:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_447
-# %bb.886:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_887
-.LBB0_448:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_449
-.LBB0_888:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_889
-.LBB0_450:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_451
-.LBB0_890:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_891
-.LBB0_452:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_454
-.LBB0_453:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_454:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_455
-# %bb.892:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_893
-.LBB0_456:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_457
-.LBB0_894:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_458
-	jmp	.LBB0_459
-	.p2align	4, 0x90
-.LBB0_443:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_444
-.LBB0_885:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_445
-	jmp	.LBB0_446
-	.p2align	4, 0x90
-.LBB0_447:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_448
-.LBB0_887:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_888
-.LBB0_449:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_450
-.LBB0_889:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_890
-.LBB0_451:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_452
-.LBB0_891:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_453
-	jmp	.LBB0_454
-	.p2align	4, 0x90
-.LBB0_455:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_456
-.LBB0_893:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_894
-.LBB0_457:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_459
-.LBB0_458:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_459:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_461
-# %bb.460:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_461:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_463
-# %bb.462:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_463:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_465
-# %bb.464:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_465:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_466
-# %bb.895:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_896
-.LBB0_467:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_468
-.LBB0_897:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_898
-.LBB0_469:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_471
-.LBB0_470:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_471:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_472
-# %bb.899:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_900
-.LBB0_473:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_474
-.LBB0_901:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_902
-.LBB0_475:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_476
-.LBB0_903:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_904
-.LBB0_477:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_478
-.LBB0_905:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_906
-.LBB0_479:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_480
-.LBB0_907:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_481
-	jmp	.LBB0_482
-	.p2align	4, 0x90
-.LBB0_466:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_467
-.LBB0_896:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_897
-.LBB0_468:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_469
-.LBB0_898:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_470
-	jmp	.LBB0_471
-	.p2align	4, 0x90
-.LBB0_472:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_473
-.LBB0_900:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_901
-.LBB0_474:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_475
-.LBB0_902:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_903
-.LBB0_476:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_477
-.LBB0_904:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_905
-.LBB0_478:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_479
-.LBB0_906:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_907
-.LBB0_480:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_482
-.LBB0_481:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_482:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 5
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_483
-# %bb.908:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_909
-.LBB0_484:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_485
-.LBB0_910:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_911
-.LBB0_486:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_487
-.LBB0_912:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_913
-.LBB0_488:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_489
-.LBB0_914:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_915
-.LBB0_490:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_491
-.LBB0_916:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_917
-.LBB0_492:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_493
-.LBB0_918:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_919
-.LBB0_494:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_495
-.LBB0_920:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_921
-.LBB0_496:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_497
-.LBB0_922:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_923
-.LBB0_498:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_499
-.LBB0_924:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_925
-.LBB0_500:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_501
-.LBB0_926:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_927
-.LBB0_502:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_503
-.LBB0_928:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_929
-.LBB0_504:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_505
-.LBB0_930:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_931
-.LBB0_506:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_507
-.LBB0_932:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_933
-.LBB0_508:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_509
-.LBB0_934:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_935
-.LBB0_510:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_511
-.LBB0_936:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_937
-.LBB0_512:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_513
-.LBB0_938:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_514
-	jmp	.LBB0_515
-	.p2align	4, 0x90
-.LBB0_483:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_484
-.LBB0_909:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_910
-.LBB0_485:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_486
-.LBB0_911:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_912
-.LBB0_487:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_488
-.LBB0_913:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_914
-.LBB0_489:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_490
-.LBB0_915:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_916
-.LBB0_491:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_492
-.LBB0_917:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_918
-.LBB0_493:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_494
-.LBB0_919:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_920
-.LBB0_495:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_496
-.LBB0_921:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_922
-.LBB0_497:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_498
-.LBB0_923:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_924
-.LBB0_499:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_500
-.LBB0_925:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_926
-.LBB0_501:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_502
-.LBB0_927:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_928
-.LBB0_503:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_504
-.LBB0_929:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_930
-.LBB0_505:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_506
-.LBB0_931:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_932
-.LBB0_507:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_508
-.LBB0_933:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_934
-.LBB0_509:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_510
-.LBB0_935:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_936
-.LBB0_511:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_512
-.LBB0_937:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_938
-.LBB0_513:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_515
-.LBB0_514:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_515:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 608]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_516
-# %bb.939:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_940
-.LBB0_517:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_519
-.LBB0_518:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_519:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_520
-# %bb.941:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_942
-.LBB0_521:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_522
-.LBB0_943:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_944
-.LBB0_523:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_524
-.LBB0_945:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_946
-.LBB0_525:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_527
-.LBB0_526:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_527:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_528
-# %bb.947:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_948
-.LBB0_529:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_530
-.LBB0_949:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_531
-	jmp	.LBB0_532
-	.p2align	4, 0x90
-.LBB0_516:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_517
-.LBB0_940:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_518
-	jmp	.LBB0_519
-	.p2align	4, 0x90
-.LBB0_520:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_521
-.LBB0_942:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_943
-.LBB0_522:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_523
-.LBB0_944:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_945
-.LBB0_524:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_525
-.LBB0_946:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_526
-	jmp	.LBB0_527
-	.p2align	4, 0x90
-.LBB0_528:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_529
-.LBB0_948:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_949
-.LBB0_530:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_532
-.LBB0_531:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_532:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_534
-# %bb.533:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_534:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_536
-# %bb.535:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_536:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_538
-# %bb.537:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_538:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_539
-# %bb.950:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_951
-.LBB0_540:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_541
-.LBB0_952:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_953
-.LBB0_542:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_544
-.LBB0_543:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_544:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_545
-# %bb.954:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_955
-.LBB0_546:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_547
-.LBB0_956:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_957
-.LBB0_548:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_549
-.LBB0_958:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_959
-.LBB0_550:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_551
-.LBB0_960:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_961
-.LBB0_552:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_553
-.LBB0_962:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_554
-	jmp	.LBB0_555
-	.p2align	4, 0x90
-.LBB0_539:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_540
-.LBB0_951:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_952
-.LBB0_541:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_542
-.LBB0_953:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_543
-	jmp	.LBB0_544
-	.p2align	4, 0x90
-.LBB0_545:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_546
-.LBB0_955:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_956
-.LBB0_547:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_548
-.LBB0_957:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_958
-.LBB0_549:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_550
-.LBB0_959:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_960
-.LBB0_551:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_552
-.LBB0_961:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_962
-.LBB0_553:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_555
-.LBB0_554:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_555:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 6
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_556
-# %bb.963:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_964
-.LBB0_557:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_558
-.LBB0_965:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_966
-.LBB0_559:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_560
-.LBB0_967:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_968
-.LBB0_561:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_562
-.LBB0_969:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_970
-.LBB0_563:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_564
-.LBB0_971:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_972
-.LBB0_565:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_566
-.LBB0_973:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_974
-.LBB0_567:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_568
-.LBB0_975:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_976
-.LBB0_569:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_570
-.LBB0_977:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_978
-.LBB0_571:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_572
-.LBB0_979:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_980
-.LBB0_573:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_574
-.LBB0_981:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_982
-.LBB0_575:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_576
-.LBB0_983:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_984
-.LBB0_577:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_578
-.LBB0_985:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_986
-.LBB0_579:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_580
-.LBB0_987:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_988
-.LBB0_581:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_582
-.LBB0_989:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_990
-.LBB0_583:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_584
-.LBB0_991:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_992
-.LBB0_585:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_586
-.LBB0_993:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_587
-	jmp	.LBB0_588
-	.p2align	4, 0x90
-.LBB0_556:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_557
-.LBB0_964:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_965
-.LBB0_558:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_559
-.LBB0_966:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_967
-.LBB0_560:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_561
-.LBB0_968:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_969
-.LBB0_562:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_563
-.LBB0_970:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_971
-.LBB0_564:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_565
-.LBB0_972:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_973
-.LBB0_566:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_567
-.LBB0_974:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_975
-.LBB0_568:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_569
-.LBB0_976:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_977
-.LBB0_570:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_571
-.LBB0_978:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_979
-.LBB0_572:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_573
-.LBB0_980:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_981
-.LBB0_574:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_575
-.LBB0_982:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_983
-.LBB0_576:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_577
-.LBB0_984:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_985
-.LBB0_578:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_579
-.LBB0_986:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_987
-.LBB0_580:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_581
-.LBB0_988:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_989
-.LBB0_582:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_583
-.LBB0_990:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_991
-.LBB0_584:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_585
-.LBB0_992:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_993
-.LBB0_586:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_588
-.LBB0_587:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_588:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 576]   # 32-byte Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm7, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm2, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm15, ymm4, ymm1
-	vperm2i128	ymm3, ymm2, ymm15, 49   # ymm3 = ymm2[2,3],ymm15[2,3]
-	vinserti128	ymm4, ymm2, xmm15, 1
-	vshufps	ymm3, ymm4, ymm3, 136           # ymm3 = ymm4[0,2],ymm3[0,2],ymm4[4,6],ymm3[4,6]
-	vperm2i128	ymm4, ymm9, ymm5, 49    # ymm4 = ymm9[2,3],ymm5[2,3]
-	vinserti128	ymm12, ymm9, xmm5, 1
-	vshufps	ymm4, ymm12, ymm4, 136          # ymm4 = ymm12[0,2],ymm4[0,2],ymm12[4,6],ymm4[4,6]
-	vperm2i128	ymm12, ymm8, ymm7, 49   # ymm12 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm13, ymm8, xmm7, 1
-	vshufps	ymm12, ymm13, ymm12, 136        # ymm12 = ymm13[0,2],ymm12[0,2],ymm13[4,6],ymm12[4,6]
-	vperm2i128	ymm13, ymm11, ymm10, 49 # ymm13 = ymm11[2,3],ymm10[2,3]
-	vinserti128	ymm14, ymm11, xmm10, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm12, ymm0, ymm12
-	vpackssdw	ymm12, ymm13, ymm12
-	vpermq	ymm12, ymm12, 216               # ymm12 = ymm12[0,2,1,3]
-	vpcmpgtd	ymm4, ymm0, ymm4
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm4, ymm3
-	vpermq	ymm3, ymm3, 216                 # ymm3 = ymm3[0,2,1,3]
-	vpacksswb	ymm3, ymm12, ymm3
-	vpand	ymm3, ymm3, ymm6
-	vmovd	ecx, xmm3
-                                        # implicit-def: $ymm4
-	test	cl, 1
-	je	.LBB0_589
-# %bb.994:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm4, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	jne	.LBB0_995
-.LBB0_590:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	je	.LBB0_592
-.LBB0_591:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rbx], 2
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_592:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 3
-	test	cl, 1
-	je	.LBB0_593
-# %bb.996:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rcx], 3
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 4
-	test	cl, 1
-	jne	.LBB0_997
-.LBB0_594:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 5
-	test	cl, 1
-	je	.LBB0_595
-.LBB0_998:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rdx], 5
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 6
-	test	cl, 1
-	jne	.LBB0_999
-.LBB0_596:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 7
-	test	cl, 1
-	je	.LBB0_597
-.LBB0_1000:                             #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + r9], 7
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 8
-	test	cl, 1
-	jne	.LBB0_1001
-.LBB0_598:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 9
-	test	cl, 1
-	je	.LBB0_600
-.LBB0_599:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + r15], 9
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_600:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 10
-	test	cl, 1
-	je	.LBB0_601
-# %bb.1002:                             #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 10
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	jne	.LBB0_1003
-.LBB0_602:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 12
-	test	cl, 1
-	je	.LBB0_603
-.LBB0_1004:                             #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 12
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	jne	.LBB0_604
-	jmp	.LBB0_605
-	.p2align	4, 0x90
-.LBB0_589:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	je	.LBB0_590
-.LBB0_995:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rsi], 1
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	jne	.LBB0_591
-	jmp	.LBB0_592
-	.p2align	4, 0x90
-.LBB0_593:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 4
-	test	cl, 1
-	je	.LBB0_594
-.LBB0_997:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rcx], 4
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 5
-	test	cl, 1
-	jne	.LBB0_998
-.LBB0_595:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 6
-	test	cl, 1
-	je	.LBB0_596
-.LBB0_999:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 6
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 7
-	test	cl, 1
-	jne	.LBB0_1000
-.LBB0_597:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 8
-	test	cl, 1
-	je	.LBB0_598
-.LBB0_1001:                             #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rsi], 8
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 9
-	test	cl, 1
-	jne	.LBB0_599
-	jmp	.LBB0_600
-	.p2align	4, 0x90
-.LBB0_601:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	je	.LBB0_602
-.LBB0_1003:                             #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 11
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 12
-	test	cl, 1
-	jne	.LBB0_1004
-.LBB0_603:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	je	.LBB0_605
-.LBB0_604:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rdx], 13
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_605:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 14
-	test	cl, 1
-	je	.LBB0_607
-# %bb.606:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 14
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_607:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 15
-	test	cl, 1
-	je	.LBB0_609
-# %bb.608:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + r10], 15
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_609:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm6, ymm3, 1
-	vmovd	eax, xmm6
-	mov	dword ptr [rsp + 512], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_611
-# %bb.610:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 0
-	vinserti128	ymm4, ymm4, xmm1, 1
-.LBB0_611:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 1
-	mov	dword ptr [rsp + 480], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_612
-# %bb.1005:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rsi], 1
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 2
-	mov	dword ptr [rsp + 448], ecx      # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_1006
-.LBB0_613:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 3
-	mov	dword ptr [rsp + 416], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_614
-.LBB0_1007:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r9], 3
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 4
-	mov	dword ptr [rsp + 384], ecx      # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_1008
-.LBB0_615:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm6, 5
-	mov	dword ptr [rsp + 352], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_617
-.LBB0_616:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r13], 5
-	vinserti128	ymm4, ymm4, xmm1, 1
-.LBB0_617:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 6
-	mov	dword ptr [rsp + 320], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_618
-# %bb.1009:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 6
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	eax, xmm6, 7
-	mov	dword ptr [rsp + 152], eax      # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_1010
-.LBB0_619:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm6, 8
-	test	r9b, 1
-	je	.LBB0_620
-.LBB0_1011:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 8
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_1012
-.LBB0_621:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm6, 10
-	test	r11b, 1
-	je	.LBB0_622
-.LBB0_1013:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 10
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	eax, xmm6, 11
-	test	al, 1
-	jne	.LBB0_1014
-.LBB0_623:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm6, 12
-	test	sil, 1
-	je	.LBB0_624
-.LBB0_1015:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 12
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	r10d, xmm6, 13
-	test	r10b, 1
-	jne	.LBB0_1016
-.LBB0_625:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpextrb	r13d, xmm6, 14
-	test	r13b, 1
-	je	.LBB0_626
-.LBB0_1017:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 14
-	vinserti128	ymm4, ymm4, xmm1, 1
-	mov	rdx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpextrb	r14d, xmm6, 15
-	test	r14b, 1
-	jne	.LBB0_627
-	jmp	.LBB0_628
-	.p2align	4, 0x90
-.LBB0_612:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 2
-	mov	dword ptr [rsp + 448], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_613
-.LBB0_1006:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 2
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 3
-	mov	dword ptr [rsp + 416], ecx      # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_1007
-.LBB0_614:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	mov	dword ptr [rsp + 384], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_615
-.LBB0_1008:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 4
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	eax, xmm6, 5
-	mov	dword ptr [rsp + 352], eax      # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_616
-	jmp	.LBB0_617
-	.p2align	4, 0x90
-.LBB0_618:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm6, 7
-	mov	dword ptr [rsp + 152], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_619
-.LBB0_1010:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 7
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	r9d, xmm6, 8
-	test	r9b, 1
-	jne	.LBB0_1011
-.LBB0_620:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_621
-.LBB0_1012:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 9
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	r11d, xmm6, 10
-	test	r11b, 1
-	jne	.LBB0_1013
-.LBB0_622:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm6, 11
-	test	al, 1
-	je	.LBB0_623
-.LBB0_1014:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rsi, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rsi], 11
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	esi, xmm6, 12
-	test	sil, 1
-	jne	.LBB0_1015
-.LBB0_624:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm6, 13
-	test	r10b, 1
-	je	.LBB0_625
-.LBB0_1016:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 13
-	vinserti128	ymm4, ymm4, xmm1, 1
-	mov	rdx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpextrb	r13d, xmm6, 14
-	test	r13b, 1
-	jne	.LBB0_1017
-.LBB0_626:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpextrb	r14d, xmm6, 15
-	test	r14b, 1
-	je	.LBB0_628
-.LBB0_627:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 15
-	vinserti128	ymm4, ymm4, xmm1, 1
-.LBB0_628:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm4, 7
-	vpand	ymm4, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm3
-	test	r15b, 1
-	je	.LBB0_629
-# %bb.1018:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm4, 0
-	vpextrb	ebx, xmm3, 1
-	test	bl, 1
-	jne	.LBB0_1019
-.LBB0_630:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 2
-	test	bl, 1
-	je	.LBB0_631
-.LBB0_1020:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 2
-	vpextrb	ebx, xmm3, 3
-	test	bl, 1
-	jne	.LBB0_1021
-.LBB0_632:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 4
-	test	bl, 1
-	je	.LBB0_633
-.LBB0_1022:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vpextrb	byte ptr [r8 + rbx], xmm4, 4
-	vpextrb	ebx, xmm3, 5
-	test	bl, 1
-	jne	.LBB0_1023
-.LBB0_634:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 6
-	test	bl, 1
-	je	.LBB0_635
-.LBB0_1024:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 6
-	vpextrb	ebx, xmm3, 7
-	test	bl, 1
-	jne	.LBB0_1025
-.LBB0_636:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 8
-	test	bl, 1
-	je	.LBB0_637
-.LBB0_1026:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vpextrb	byte ptr [r8 + rbx], xmm4, 8
-	vpextrb	ebx, xmm3, 9
-	test	bl, 1
-	jne	.LBB0_1027
-.LBB0_638:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 10
-	test	bl, 1
-	je	.LBB0_639
-.LBB0_1028:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 10
-	vpextrb	ebx, xmm3, 11
-	test	bl, 1
-	jne	.LBB0_1029
-.LBB0_640:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 12
-	test	bl, 1
-	je	.LBB0_641
-.LBB0_1030:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm4, 12
-	vpextrb	ebx, xmm3, 13
-	test	bl, 1
-	vmovdqa	ymm9, ymmword ptr [rsp + 896]   # 32-byte Reload
-	jne	.LBB0_1031
-.LBB0_642:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 14
-	test	bl, 1
-	je	.LBB0_643
-.LBB0_1032:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 14
-	vpextrb	ebx, xmm3, 15
-	test	bl, 1
-	jne	.LBB0_1033
-.LBB0_644:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 512], 1         # 1-byte Folded Reload
-	vmovdqa	ymm3, ymmword ptr [rsp + 832]   # 32-byte Reload
-	je	.LBB0_645
-.LBB0_1034:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm8
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 480], 1         # 1-byte Folded Reload
-	jne	.LBB0_1035
-.LBB0_646:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 448], 1         # 1-byte Folded Reload
-	je	.LBB0_647
-.LBB0_1036:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 416], 1         # 1-byte Folded Reload
-	jne	.LBB0_1037
-.LBB0_648:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 384], 1         # 1-byte Folded Reload
-	je	.LBB0_649
-.LBB0_1038:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm7
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 352], 1         # 1-byte Folded Reload
-	vmovdqa	ymm8, ymmword ptr [rsp + 864]   # 32-byte Reload
-	jne	.LBB0_1039
-.LBB0_650:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 320], 1         # 1-byte Folded Reload
-	je	.LBB0_651
-.LBB0_1040:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	byte ptr [rsp + 152], 1         # 1-byte Folded Reload
-	jne	.LBB0_1041
-.LBB0_652:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	je	.LBB0_653
-.LBB0_1042:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm2
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_1043
-.LBB0_654:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	mov	r11, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_655
-.LBB0_1044:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm2, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	jne	.LBB0_1045
-.LBB0_656:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	je	.LBB0_657
-.LBB0_1046:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm15
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	vmovdqa	ymm2, ymmword ptr [rsp + 800]   # 32-byte Reload
-	jne	.LBB0_1047
-.LBB0_658:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	je	.LBB0_659
-.LBB0_1048:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	je	.LBB0_25
-	jmp	.LBB0_1049
-	.p2align	4, 0x90
-.LBB0_629:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 1
-	test	bl, 1
-	je	.LBB0_630
-.LBB0_1019:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 1
-	vpextrb	ebx, xmm3, 2
-	test	bl, 1
-	jne	.LBB0_1020
-.LBB0_631:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 3
-	test	bl, 1
-	je	.LBB0_632
-.LBB0_1021:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 3
-	vpextrb	ebx, xmm3, 4
-	test	bl, 1
-	jne	.LBB0_1022
-.LBB0_633:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 5
-	test	bl, 1
-	je	.LBB0_634
-.LBB0_1023:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 5
-	vpextrb	ebx, xmm3, 6
-	test	bl, 1
-	jne	.LBB0_1024
-.LBB0_635:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 7
-	test	bl, 1
-	je	.LBB0_636
-.LBB0_1025:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 7
-	vpextrb	ebx, xmm3, 8
-	test	bl, 1
-	jne	.LBB0_1026
-.LBB0_637:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 9
-	test	bl, 1
-	je	.LBB0_638
-.LBB0_1027:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 9
-	vpextrb	ebx, xmm3, 10
-	test	bl, 1
-	jne	.LBB0_1028
-.LBB0_639:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 11
-	test	bl, 1
-	je	.LBB0_640
-.LBB0_1029:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 11
-	vpextrb	ebx, xmm3, 12
-	test	bl, 1
-	jne	.LBB0_1030
-.LBB0_641:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 13
-	test	bl, 1
-	vmovdqa	ymm9, ymmword ptr [rsp + 896]   # 32-byte Reload
-	je	.LBB0_642
-.LBB0_1031:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 13
-	vpextrb	ebx, xmm3, 14
-	test	bl, 1
-	jne	.LBB0_1032
-.LBB0_643:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 15
-	test	bl, 1
-	je	.LBB0_644
-.LBB0_1033:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 15
-	test	byte ptr [rsp + 512], 1         # 1-byte Folded Reload
-	vmovdqa	ymm3, ymmword ptr [rsp + 832]   # 32-byte Reload
-	jne	.LBB0_1034
-.LBB0_645:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 480], 1         # 1-byte Folded Reload
-	je	.LBB0_646
-.LBB0_1035:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm8, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 448], 1         # 1-byte Folded Reload
-	jne	.LBB0_1036
-.LBB0_647:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 416], 1         # 1-byte Folded Reload
-	je	.LBB0_648
-.LBB0_1037:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 384], 1         # 1-byte Folded Reload
-	jne	.LBB0_1038
-.LBB0_649:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 352], 1         # 1-byte Folded Reload
-	vmovdqa	ymm8, ymmword ptr [rsp + 864]   # 32-byte Reload
-	je	.LBB0_650
-.LBB0_1039:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm7, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 320], 1         # 1-byte Folded Reload
-	jne	.LBB0_1040
-.LBB0_651:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 152], 1         # 1-byte Folded Reload
-	je	.LBB0_652
-.LBB0_1041:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	r9b, 1
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	jne	.LBB0_1042
-.LBB0_653:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_654
-.LBB0_1043:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm2, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	r11b, 1
-	mov	r11, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_1044
-.LBB0_655:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	je	.LBB0_656
-.LBB0_1045:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm2, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	sil, 1
-	jne	.LBB0_1046
-.LBB0_657:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	vmovdqa	ymm2, ymmword ptr [rsp + 800]   # 32-byte Reload
-	je	.LBB0_658
-.LBB0_1047:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm15, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r13b, 1
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	jne	.LBB0_1048
-.LBB0_659:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	je	.LBB0_25
-.LBB0_1049:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-	jmp	.LBB0_25
-.LBB0_1050:
-	cmp	r12, r10
-	jne	.LBB0_1055
-.LBB0_1051:
-	lea	rsp, [rbp - 40]
-	pop	rbx
-	pop	r12
-	pop	r13
-	pop	r14
-	pop	r15
+	mov	r8d, esi
+	shl	r8, 3
+	xor	r10d, r10d
+	jmp	.LBB0_2
+	.p2align	4, 0x90
+.LBB0_4:                                #   in Loop: Header=BB0_2 Depth=1
+	add	r10, 8
+	add	rdi, 1
+	cmp	r8, r10
+	je	.LBB0_5
+.LBB0_2:                                # =>This Inner Loop Header: Depth=1
+	cmp	r10d, ecx
+	jge	.LBB0_4
+# %bb.3:                                #   in Loop: Header=BB0_2 Depth=1
+	mov	r9d, r10d
+	movzx	eax, byte ptr [rdi]
+	and	al, 1
+	mov	byte ptr [rdx + r9], al
+	mov	rsi, r9
+	or	rsi, 1
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.6:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 2
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.7:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 2
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 3
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.8:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 3
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 4
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.9:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 4
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 5
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.10:                               #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 5
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 6
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.11:                               #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 6
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	or	r9, 7
+	cmp	r9d, ecx
+	jge	.LBB0_4
+# %bb.12:                               #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 7
+	mov	byte ptr [rdx + r9], al
+	jmp	.LBB0_4
+.LBB0_5:
+	mov	rsp, rbp
 	pop	rbp
-	vzeroupper
 	ret
-.LBB0_1052:
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	jmp	.LBB0_1055
-.LBB0_1054:
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	jmp	.LBB0_1055
 .Lfunc_end0:
 	.size	bytes_to_bools_avx2, .Lfunc_end0-bytes_to_bools_avx2
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/unpack_bool_sse4.s b/go/parquet/internal/utils/_lib/unpack_bool_sse4.s
index 18caa0473df..6719771b865 100644
--- a/go/parquet/internal/utils/_lib/unpack_bool_sse4.s
+++ b/go/parquet/internal/utils/_lib/unpack_bool_sse4.s
@@ -99,6 +99,6 @@ bytes_to_bools_sse4:                    # @bytes_to_bools_sse4
 .Lfunc_end0:
 	.size	bytes_to_bools_sse4, .Lfunc_end0-bytes_to_bools_sse4
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/bit_packing.go b/go/parquet/internal/utils/bit_packing_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/bit_packing.go
rename to go/parquet/internal/utils/bit_packing_amd64.go
diff --git a/go/parquet/internal/utils/bit_packing_arm64.go b/go/parquet/internal/utils/bit_packing_arm64.go
new file mode 100644
index 00000000000..58f869c3f5d
--- /dev/null
+++ b/go/parquet/internal/utils/bit_packing_arm64.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+import "io"
+
+var unpack32 func(io.Reader, []uint32, int) int = unpack32Default
diff --git a/go/parquet/internal/utils/bit_packing_avx2.go b/go/parquet/internal/utils/bit_packing_avx2_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/bit_packing_avx2.go
rename to go/parquet/internal/utils/bit_packing_avx2_amd64.go
diff --git a/go/parquet/internal/utils/bit_packing_avx2.s b/go/parquet/internal/utils/bit_packing_avx2_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/bit_packing_avx2.s
rename to go/parquet/internal/utils/bit_packing_avx2_amd64.s
diff --git a/go/parquet/internal/utils/bit_packing_s390x.go b/go/parquet/internal/utils/bit_packing_s390x.go
new file mode 100644
index 00000000000..58f869c3f5d
--- /dev/null
+++ b/go/parquet/internal/utils/bit_packing_s390x.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+import "io"
+
+var unpack32 func(io.Reader, []uint32, int) int = unpack32Default
diff --git a/go/parquet/internal/utils/bit_run_reader.go b/go/parquet/internal/utils/bit_run_reader.go
index 2c704cd6a5f..7af0aa056aa 100644
--- a/go/parquet/internal/utils/bit_run_reader.go
+++ b/go/parquet/internal/utils/bit_run_reader.go
@@ -136,6 +136,8 @@ func (b *bitRunReader) loadWord(bitsRemaining int64) {
 		copy(wordptr, b.bitmap[:nbytes])
 
 		bitutil.SetBitTo(wordptr, int(bitsRemaining), bitutil.BitIsNotSet(wordptr, int(bitsRemaining-1)))
+		// reset the value to little endian for big endian architectures
+		b.word = toLEFunc(b.word)
 	}
 
 	// Two cases:
diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go
index eed9f867554..f7c1f7a57cd 100644
--- a/go/parquet/internal/utils/bitmap_writer.go
+++ b/go/parquet/internal/utils/bitmap_writer.go
@@ -96,6 +96,9 @@ type BitmapWriter interface {
 	Finish()
 	// AppendWord takes nbits from word which should be an LSB bitmap and appends them to the bitmap.
 	AppendWord(word uint64, nbits int64)
+	// AppendBools appends the bit representation of the bools slice, returning the number
+	// of bools that were able to fit in the remaining length of the bitmapwriter.
+	AppendBools(in []bool) int
 	// Pos is the current position that will be written next
 	Pos() int64
 	// Reset allows reusing the bitmapwriter by resetting Pos to start with length as
@@ -140,7 +143,7 @@ func (b *bitmapWriter) Reset(start, length int64) {
 
 func (b *bitmapWriter) Pos() int64 { return b.pos }
 func (b *bitmapWriter) Set()       { b.curByte |= b.bitMask }
-func (b *bitmapWriter) Clear()     { b.curByte &= b.bitMask ^ 0xFF }
+func (b *bitmapWriter) Clear()     { b.curByte &= ^b.bitMask }
 
 func (b *bitmapWriter) Next() {
 	b.bitMask = b.bitMask << 1
@@ -155,6 +158,30 @@ func (b *bitmapWriter) Next() {
 	}
 }
 
+func (b *bitmapWriter) AppendBools(in []bool) int {
+	space := Min(bitutil.BytesForBits(b.length-b.pos), int64(len(in)))
+
+	// location that the first byte needs to be written to for appending
+	appslice := b.buf[int(b.byteOffset):]
+	// update everything but curByte
+	bitOffset := bits.TrailingZeros32(uint32(b.bitMask))
+	appslice[0] = b.curByte
+	for i, b := range in[:space] {
+		if b {
+			bitutil.SetBit(appslice, i)
+		} else {
+			bitutil.ClearBit(appslice, i)
+		}
+	}
+
+	b.pos += space
+	b.bitMask = bitutil.BitMask[(int64(bitOffset)+space)%8]
+	b.byteOffset += (int64(bitOffset) + space) / 8
+	b.curByte = appslice[len(appslice)-1]
+
+	return int(space)
+}
+
 func (b *bitmapWriter) Finish() {
 	if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) {
 		b.buf[int(b.byteOffset)] = b.curByte
@@ -267,6 +294,10 @@ func (bw *firstTimeBitmapWriter) Next() {
 	}
 }
 
+func (b *firstTimeBitmapWriter) AppendBools(in []bool) int {
+	panic("Append Bools not yet implemented for firstTimeBitmapWriter")
+}
+
 func (bw *firstTimeBitmapWriter) Finish() {
 	// store curByte into the bitmap
 	if bw.length > 0 && bw.bitMask != 0x01 || bw.pos < bw.length {
diff --git a/go/parquet/internal/utils/min_max_avx2.s b/go/parquet/internal/utils/min_max_avx2.s
deleted file mode 100644
index 6a1bb18fde6..00000000000
--- a/go/parquet/internal/utils/min_max_avx2.s
+++ /dev/null
@@ -1,1352 +0,0 @@
-//+build !noasm !appengine
-// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
-
-DATA LCDATA1<>+0x000(SB)/8, $0x7fffffff80000000
-GLOBL LCDATA1<>(SB), 8, $8
-
-TEXT ·_int32_max_min_avx2(SB), $72-32
-
-	MOVQ values+0(FP), DI
-	MOVQ length+8(FP), SI
-	MOVQ minout+16(FP), DX
-	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
-	LEAQ LCDATA1<>(SB), BP
-
-	WORD $0xf685                   // test    esi, esi
-	JLE  LBB0_1
-	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
-	JA   LBB0_6
-	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
-	LONG $0xffffb941; WORD $0x7fff // mov    r9d, 2147483647
-	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
-	JMP  LBB0_4
-
-LBB0_1:
-	LONG $0xffffb941; WORD $0x7fff // mov    r9d, 2147483647
-	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
-	JMP  LBB0_14
-
-LBB0_6:
-	WORD $0x8945; BYTE $0xc3       // mov    r11d, r8d
-	LONG $0xe0e38341               // and    r11d, -32
-	LONG $0xe0438d49               // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2       // mov    r10, rax
-	LONG $0x05eac149               // shr    r10, 5
-	LONG $0x01c28349               // add    r10, 1
-	WORD $0x8945; BYTE $0xd1       // mov    r9d, r10d
-	LONG $0x03e18341               // and    r9d, 3
-	LONG $0x60f88348               // cmp    rax, 96
-	JAE  LBB0_8
-	LONG $0x587de2c4; WORD $0x0045 // vpbroadcastd    ymm0, dword 0[rbp] /* [rip + .LCPI0_0] */
-	LONG $0x587de2c4; WORD $0x044d // vpbroadcastd    ymm1, dword 4[rbp] /* [rip + .LCPI0_1] */
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0xd16ffdc5               // vmovdqa    ymm2, ymm1
-	LONG $0xe16ffdc5               // vmovdqa    ymm4, ymm1
-	LONG $0xf16ffdc5               // vmovdqa    ymm6, ymm1
-	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
-	LONG $0xe86ffdc5               // vmovdqa    ymm5, ymm0
-	LONG $0xf86ffdc5               // vmovdqa    ymm7, ymm0
-	JMP  LBB0_10
-
-LBB0_8:
-	LONG $0xfce28349               // and    r10, -4
-	LONG $0x587de2c4; WORD $0x0045 // vpbroadcastd    ymm0, dword 0[rbp] /* [rip + .LCPI0_0] */
-	WORD $0xf749; BYTE $0xda       // neg    r10
-	LONG $0x587de2c4; WORD $0x044d // vpbroadcastd    ymm1, dword 4[rbp] /* [rip + .LCPI0_1] */
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0xd16ffdc5               // vmovdqa    ymm2, ymm1
-	LONG $0xe16ffdc5               // vmovdqa    ymm4, ymm1
-	LONG $0xf16ffdc5               // vmovdqa    ymm6, ymm1
-	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
-	LONG $0xe86ffdc5               // vmovdqa    ymm5, ymm0
-	LONG $0xf86ffdc5               // vmovdqa    ymm7, ymm0
-
-LBB0_9:
-	LONG $0x046f7ec5; BYTE $0x87         // vmovdqu    ymm8, yword [rdi + 4*rax]
-	LONG $0x4c6f7ec5; WORD $0x2087       // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
-	LONG $0x546f7ec5; WORD $0x4087       // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
-	LONG $0x5c6f7ec5; WORD $0x6087       // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
-	LONG $0x394dc2c4; BYTE $0xf3         // vpminsd    ymm6, ymm6, ymm11
-	LONG $0x395dc2c4; BYTE $0xe2         // vpminsd    ymm4, ymm4, ymm10
-	LONG $0x3975c2c4; BYTE $0xc8         // vpminsd    ymm1, ymm1, ymm8
-	LONG $0x396dc2c4; BYTE $0xd1         // vpminsd    ymm2, ymm2, ymm9
-	LONG $0x3d45c2c4; BYTE $0xfb         // vpmaxsd    ymm7, ymm7, ymm11
-	LONG $0x3d55c2c4; BYTE $0xea         // vpmaxsd    ymm5, ymm5, ymm10
-	LONG $0x3d7dc2c4; BYTE $0xc0         // vpmaxsd    ymm0, ymm0, ymm8
-	LONG $0x3d65c2c4; BYTE $0xd9         // vpmaxsd    ymm3, ymm3, ymm9
-	QUAD $0x0000e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 224]
-	QUAD $0x0000c0878c6f7ec5; BYTE $0x00 // vmovdqu    ymm9, yword [rdi + 4*rax + 192]
-	QUAD $0x00008087946f7ec5; BYTE $0x00 // vmovdqu    ymm10, yword [rdi + 4*rax + 128]
-	QUAD $0x0000a0879c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 4*rax + 160]
-	QUAD $0x00010087a46f7ec5; BYTE $0x00 // vmovdqu    ymm12, yword [rdi + 4*rax + 256]
-	QUAD $0x00014087ac6f7ec5; BYTE $0x00 // vmovdqu    ymm13, yword [rdi + 4*rax + 320]
-	QUAD $0x00016087b46f7ec5; BYTE $0x00 // vmovdqu    ymm14, yword [rdi + 4*rax + 352]
-	LONG $0x393d42c4; BYTE $0xfe         // vpminsd    ymm15, ymm8, ymm14
-	LONG $0x394dc2c4; BYTE $0xf7         // vpminsd    ymm6, ymm6, ymm15
-	LONG $0x347ffdc5; BYTE $0x24         // vmovdqa    yword [rsp], ymm6
-	LONG $0x393542c4; BYTE $0xfd         // vpminsd    ymm15, ymm9, ymm13
-	LONG $0x395dc2c4; BYTE $0xe7         // vpminsd    ymm4, ymm4, ymm15
-	LONG $0x392d42c4; BYTE $0xfc         // vpminsd    ymm15, ymm10, ymm12
-	LONG $0x3975c2c4; BYTE $0xcf         // vpminsd    ymm1, ymm1, ymm15
-	QUAD $0x00012087bc6f7ec5; BYTE $0x00 // vmovdqu    ymm15, yword [rdi + 4*rax + 288]
-	LONG $0x3925c2c4; BYTE $0xf7         // vpminsd    ymm6, ymm11, ymm15
-	LONG $0x396de2c4; BYTE $0xd6         // vpminsd    ymm2, ymm2, ymm6
-	LONG $0x3d3dc2c4; BYTE $0xf6         // vpmaxsd    ymm6, ymm8, ymm14
-	LONG $0x3d45e2c4; BYTE $0xfe         // vpmaxsd    ymm7, ymm7, ymm6
-	LONG $0x3d35c2c4; BYTE $0xf5         // vpmaxsd    ymm6, ymm9, ymm13
-	LONG $0x3d55e2c4; BYTE $0xee         // vpmaxsd    ymm5, ymm5, ymm6
-	LONG $0x3d2dc2c4; BYTE $0xf4         // vpmaxsd    ymm6, ymm10, ymm12
-	LONG $0x3d7de2c4; BYTE $0xc6         // vpmaxsd    ymm0, ymm0, ymm6
-	LONG $0x3d25c2c4; BYTE $0xf7         // vpmaxsd    ymm6, ymm11, ymm15
-	LONG $0x3d65e2c4; BYTE $0xde         // vpmaxsd    ymm3, ymm3, ymm6
-	QUAD $0x0001a087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 416]
-	LONG $0x396de2c4; BYTE $0xd6         // vpminsd    ymm2, ymm2, ymm6
-	LONG $0x3d65e2c4; BYTE $0xde         // vpmaxsd    ymm3, ymm3, ymm6
-	QUAD $0x00018087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 384]
-	LONG $0x3975e2c4; BYTE $0xce         // vpminsd    ymm1, ymm1, ymm6
-	LONG $0x3d7de2c4; BYTE $0xc6         // vpmaxsd    ymm0, ymm0, ymm6
-	QUAD $0x0001c087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 448]
-	LONG $0x395de2c4; BYTE $0xe6         // vpminsd    ymm4, ymm4, ymm6
-	LONG $0x3d55e2c4; BYTE $0xee         // vpmaxsd    ymm5, ymm5, ymm6
-	QUAD $0x0001e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 480]
-	LONG $0x393de2c4; WORD $0x2434       // vpminsd    ymm6, ymm8, yword [rsp]
-	LONG $0x3d45c2c4; BYTE $0xf8         // vpmaxsd    ymm7, ymm7, ymm8
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB0_9
-
-LBB0_10:
-	WORD $0x854d; BYTE $0xc9 // test    r9, r9
-	JE   LBB0_13
-	LONG $0x87048d48         // lea    rax, [rdi + 4*rax]
-	WORD $0xf749; BYTE $0xd9 // neg    r9
-
-LBB0_12:
-	LONG $0x006f7ec5             // vmovdqu    ymm8, yword [rax]
-	LONG $0x486f7ec5; BYTE $0x20 // vmovdqu    ymm9, yword [rax + 32]
-	LONG $0x506f7ec5; BYTE $0x40 // vmovdqu    ymm10, yword [rax + 64]
-	LONG $0x586f7ec5; BYTE $0x60 // vmovdqu    ymm11, yword [rax + 96]
-	LONG $0x396dc2c4; BYTE $0xd1 // vpminsd    ymm2, ymm2, ymm9
-	LONG $0x3975c2c4; BYTE $0xc8 // vpminsd    ymm1, ymm1, ymm8
-	LONG $0x395dc2c4; BYTE $0xe2 // vpminsd    ymm4, ymm4, ymm10
-	LONG $0x394dc2c4; BYTE $0xf3 // vpminsd    ymm6, ymm6, ymm11
-	LONG $0x3d65c2c4; BYTE $0xd9 // vpmaxsd    ymm3, ymm3, ymm9
-	LONG $0x3d7dc2c4; BYTE $0xc0 // vpmaxsd    ymm0, ymm0, ymm8
-	LONG $0x3d55c2c4; BYTE $0xea // vpmaxsd    ymm5, ymm5, ymm10
-	LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd    ymm7, ymm7, ymm11
-	LONG $0x80e88348             // sub    rax, -128
-	WORD $0xff49; BYTE $0xc1     // inc    r9
-	JNE  LBB0_12
-
-LBB0_13:
-	LONG $0x396de2c4; BYTE $0xd6   // vpminsd    ymm2, ymm2, ymm6
-	LONG $0x3975e2c4; BYTE $0xcc   // vpminsd    ymm1, ymm1, ymm4
-	LONG $0x3975e2c4; BYTE $0xca   // vpminsd    ymm1, ymm1, ymm2
-	LONG $0x3d65e2c4; BYTE $0xd7   // vpmaxsd    ymm2, ymm3, ymm7
-	LONG $0x3d7de2c4; BYTE $0xc5   // vpmaxsd    ymm0, ymm0, ymm5
-	LONG $0x3d7de2c4; BYTE $0xc2   // vpmaxsd    ymm0, ymm0, ymm2
-	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
-	LONG $0x3d79e2c4; BYTE $0xc2   // vpmaxsd    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0x4e   // vpshufd    xmm2, xmm0, 78
-	LONG $0x3d79e2c4; BYTE $0xc2   // vpmaxsd    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0xe5   // vpshufd    xmm2, xmm0, 229
-	LONG $0x3d79e2c4; BYTE $0xc2   // vpmaxsd    xmm0, xmm0, xmm2
-	LONG $0xc07ef9c5               // vmovd    eax, xmm0
-	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
-	LONG $0x3971e2c4; BYTE $0xc0   // vpminsd    xmm0, xmm1, xmm0
-	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
-	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
-	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
-	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
-	LONG $0x7e79c1c4; BYTE $0xc1   // vmovd    r9d, xmm0
-	WORD $0x394d; BYTE $0xc3       // cmp    r11, r8
-	JE   LBB0_14
-
-LBB0_4:
-	WORD $0xc689 // mov    esi, eax
-
-LBB0_5:
-	LONG $0x9f048b42         // mov    eax, dword [rdi + 4*r11]
-	WORD $0x3941; BYTE $0xc1 // cmp    r9d, eax
-	LONG $0xc84f0f44         // cmovg    r9d, eax
-	WORD $0xc639             // cmp    esi, eax
-	WORD $0x4d0f; BYTE $0xc6 // cmovge    eax, esi
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0xc689             // mov    esi, eax
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
-	JNE  LBB0_5
-
-LBB0_14:
-	WORD $0x0189             // mov    dword [rcx], eax
-	WORD $0x8944; BYTE $0x0a // mov    dword [rdx], r9d
-	SUBQ $8, SP
-	VZEROUPPER
-	RET
-
-TEXT ·_uint32_max_min_avx2(SB), $72-32
-
-	MOVQ values+0(FP), DI
-	MOVQ length+8(FP), SI
-	MOVQ minout+16(FP), DX
-	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
-
-	WORD $0xf685                   // test    esi, esi
-	JLE  LBB1_1
-	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
-	JA   LBB1_6
-	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
-	LONG $0xffffb941; WORD $0xffff // mov    r9d, -1
-	WORD $0xf631                   // xor    esi, esi
-	JMP  LBB1_4
-
-LBB1_1:
-	LONG $0xffffb941; WORD $0xffff // mov    r9d, -1
-	WORD $0xf631                   // xor    esi, esi
-	JMP  LBB1_14
-
-LBB1_6:
-	WORD $0x8945; BYTE $0xc3 // mov    r11d, r8d
-	LONG $0xe0e38341         // and    r11d, -32
-	LONG $0xe0438d49         // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2 // mov    r10, rax
-	LONG $0x05eac149         // shr    r10, 5
-	LONG $0x01c28349         // add    r10, 1
-	WORD $0x8945; BYTE $0xd1 // mov    r9d, r10d
-	LONG $0x03e18341         // and    r9d, 3
-	LONG $0x60f88348         // cmp    rax, 96
-	JAE  LBB1_8
-	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
-	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
-	WORD $0xc031             // xor    eax, eax
-	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
-	LONG $0xe476ddc5         // vpcmpeqd    ymm4, ymm4, ymm4
-	LONG $0xf676cdc5         // vpcmpeqd    ymm6, ymm6, ymm6
-	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3
-	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
-	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7
-	JMP  LBB1_10
-
-LBB1_8:
-	LONG $0xfce28349         // and    r10, -4
-	WORD $0xf749; BYTE $0xda // neg    r10
-	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
-	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
-	WORD $0xc031             // xor    eax, eax
-	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
-	LONG $0xe476ddc5         // vpcmpeqd    ymm4, ymm4, ymm4
-	LONG $0xf676cdc5         // vpcmpeqd    ymm6, ymm6, ymm6
-	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3
-	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
-	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7
-
-LBB1_9:
-	LONG $0x046f7ec5; BYTE $0x87         // vmovdqu    ymm8, yword [rdi + 4*rax]
-	LONG $0x4c6f7ec5; WORD $0x2087       // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
-	LONG $0x546f7ec5; WORD $0x4087       // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
-	LONG $0x5c6f7ec5; WORD $0x6087       // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
-	LONG $0x3b4dc2c4; BYTE $0xf3         // vpminud    ymm6, ymm6, ymm11
-	LONG $0x3b5dc2c4; BYTE $0xe2         // vpminud    ymm4, ymm4, ymm10
-	LONG $0x3b75c2c4; BYTE $0xc8         // vpminud    ymm1, ymm1, ymm8
-	LONG $0x3b6dc2c4; BYTE $0xd1         // vpminud    ymm2, ymm2, ymm9
-	LONG $0x3f45c2c4; BYTE $0xfb         // vpmaxud    ymm7, ymm7, ymm11
-	LONG $0x3f55c2c4; BYTE $0xea         // vpmaxud    ymm5, ymm5, ymm10
-	LONG $0x3f7dc2c4; BYTE $0xc0         // vpmaxud    ymm0, ymm0, ymm8
-	LONG $0x3f65c2c4; BYTE $0xd9         // vpmaxud    ymm3, ymm3, ymm9
-	QUAD $0x0000e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 224]
-	QUAD $0x0000c0878c6f7ec5; BYTE $0x00 // vmovdqu    ymm9, yword [rdi + 4*rax + 192]
-	QUAD $0x00008087946f7ec5; BYTE $0x00 // vmovdqu    ymm10, yword [rdi + 4*rax + 128]
-	QUAD $0x0000a0879c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 4*rax + 160]
-	QUAD $0x00010087a46f7ec5; BYTE $0x00 // vmovdqu    ymm12, yword [rdi + 4*rax + 256]
-	QUAD $0x00014087ac6f7ec5; BYTE $0x00 // vmovdqu    ymm13, yword [rdi + 4*rax + 320]
-	QUAD $0x00016087b46f7ec5; BYTE $0x00 // vmovdqu    ymm14, yword [rdi + 4*rax + 352]
-	LONG $0x3b3d42c4; BYTE $0xfe         // vpminud    ymm15, ymm8, ymm14
-	LONG $0x3b4dc2c4; BYTE $0xf7         // vpminud    ymm6, ymm6, ymm15
-	LONG $0x347ffdc5; BYTE $0x24         // vmovdqa    yword [rsp], ymm6
-	LONG $0x3b3542c4; BYTE $0xfd         // vpminud    ymm15, ymm9, ymm13
-	LONG $0x3b5dc2c4; BYTE $0xe7         // vpminud    ymm4, ymm4, ymm15
-	LONG $0x3b2d42c4; BYTE $0xfc         // vpminud    ymm15, ymm10, ymm12
-	LONG $0x3b75c2c4; BYTE $0xcf         // vpminud    ymm1, ymm1, ymm15
-	QUAD $0x00012087bc6f7ec5; BYTE $0x00 // vmovdqu    ymm15, yword [rdi + 4*rax + 288]
-	LONG $0x3b25c2c4; BYTE $0xf7         // vpminud    ymm6, ymm11, ymm15
-	LONG $0x3b6de2c4; BYTE $0xd6         // vpminud    ymm2, ymm2, ymm6
-	LONG $0x3f3dc2c4; BYTE $0xf6         // vpmaxud    ymm6, ymm8, ymm14
-	LONG $0x3f45e2c4; BYTE $0xfe         // vpmaxud    ymm7, ymm7, ymm6
-	LONG $0x3f35c2c4; BYTE $0xf5         // vpmaxud    ymm6, ymm9, ymm13
-	LONG $0x3f55e2c4; BYTE $0xee         // vpmaxud    ymm5, ymm5, ymm6
-	LONG $0x3f2dc2c4; BYTE $0xf4         // vpmaxud    ymm6, ymm10, ymm12
-	LONG $0x3f7de2c4; BYTE $0xc6         // vpmaxud    ymm0, ymm0, ymm6
-	LONG $0x3f25c2c4; BYTE $0xf7         // vpmaxud    ymm6, ymm11, ymm15
-	LONG $0x3f65e2c4; BYTE $0xde         // vpmaxud    ymm3, ymm3, ymm6
-	QUAD $0x0001a087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 416]
-	LONG $0x3b6de2c4; BYTE $0xd6         // vpminud    ymm2, ymm2, ymm6
-	LONG $0x3f65e2c4; BYTE $0xde         // vpmaxud    ymm3, ymm3, ymm6
-	QUAD $0x00018087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 384]
-	LONG $0x3b75e2c4; BYTE $0xce         // vpminud    ymm1, ymm1, ymm6
-	LONG $0x3f7de2c4; BYTE $0xc6         // vpmaxud    ymm0, ymm0, ymm6
-	QUAD $0x0001c087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 448]
-	LONG $0x3b5de2c4; BYTE $0xe6         // vpminud    ymm4, ymm4, ymm6
-	LONG $0x3f55e2c4; BYTE $0xee         // vpmaxud    ymm5, ymm5, ymm6
-	QUAD $0x0001e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 480]
-	LONG $0x3b3de2c4; WORD $0x2434       // vpminud    ymm6, ymm8, yword [rsp]
-	LONG $0x3f45c2c4; BYTE $0xf8         // vpmaxud    ymm7, ymm7, ymm8
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB1_9
-
-LBB1_10:
-	WORD $0x854d; BYTE $0xc9 // test    r9, r9
-	JE   LBB1_13
-	LONG $0x87048d48         // lea    rax, [rdi + 4*rax]
-	WORD $0xf749; BYTE $0xd9 // neg    r9
-
-LBB1_12:
-	LONG $0x006f7ec5             // vmovdqu    ymm8, yword [rax]
-	LONG $0x486f7ec5; BYTE $0x20 // vmovdqu    ymm9, yword [rax + 32]
-	LONG $0x506f7ec5; BYTE $0x40 // vmovdqu    ymm10, yword [rax + 64]
-	LONG $0x586f7ec5; BYTE $0x60 // vmovdqu    ymm11, yword [rax + 96]
-	LONG $0x3b6dc2c4; BYTE $0xd1 // vpminud    ymm2, ymm2, ymm9
-	LONG $0x3b75c2c4; BYTE $0xc8 // vpminud    ymm1, ymm1, ymm8
-	LONG $0x3b5dc2c4; BYTE $0xe2 // vpminud    ymm4, ymm4, ymm10
-	LONG $0x3b4dc2c4; BYTE $0xf3 // vpminud    ymm6, ymm6, ymm11
-	LONG $0x3f65c2c4; BYTE $0xd9 // vpmaxud    ymm3, ymm3, ymm9
-	LONG $0x3f7dc2c4; BYTE $0xc0 // vpmaxud    ymm0, ymm0, ymm8
-	LONG $0x3f55c2c4; BYTE $0xea // vpmaxud    ymm5, ymm5, ymm10
-	LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud    ymm7, ymm7, ymm11
-	LONG $0x80e88348             // sub    rax, -128
-	WORD $0xff49; BYTE $0xc1     // inc    r9
-	JNE  LBB1_12
-
-LBB1_13:
-	LONG $0x3b6de2c4; BYTE $0xd6   // vpminud    ymm2, ymm2, ymm6
-	LONG $0x3b75e2c4; BYTE $0xcc   // vpminud    ymm1, ymm1, ymm4
-	LONG $0x3b75e2c4; BYTE $0xca   // vpminud    ymm1, ymm1, ymm2
-	LONG $0x3f65e2c4; BYTE $0xd7   // vpmaxud    ymm2, ymm3, ymm7
-	LONG $0x3f7de2c4; BYTE $0xc5   // vpmaxud    ymm0, ymm0, ymm5
-	LONG $0x3f7de2c4; BYTE $0xc2   // vpmaxud    ymm0, ymm0, ymm2
-	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
-	LONG $0x3f79e2c4; BYTE $0xc2   // vpmaxud    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0x4e   // vpshufd    xmm2, xmm0, 78
-	LONG $0x3f79e2c4; BYTE $0xc2   // vpmaxud    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0xe5   // vpshufd    xmm2, xmm0, 229
-	LONG $0x3f79e2c4; BYTE $0xc2   // vpmaxud    xmm0, xmm0, xmm2
-	LONG $0xc67ef9c5               // vmovd    esi, xmm0
-	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
-	LONG $0x3b71e2c4; BYTE $0xc0   // vpminud    xmm0, xmm1, xmm0
-	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
-	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
-	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
-	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
-	LONG $0x7e79c1c4; BYTE $0xc1   // vmovd    r9d, xmm0
-	WORD $0x394d; BYTE $0xc3       // cmp    r11, r8
-	JE   LBB1_14
-
-LBB1_4:
-	WORD $0xf089 // mov    eax, esi
-
-LBB1_5:
-	LONG $0x9f348b42         // mov    esi, dword [rdi + 4*r11]
-	WORD $0x3941; BYTE $0xf1 // cmp    r9d, esi
-	LONG $0xce430f44         // cmovae    r9d, esi
-	WORD $0xf039             // cmp    eax, esi
-	WORD $0x470f; BYTE $0xf0 // cmova    esi, eax
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0xf089             // mov    eax, esi
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
-	JNE  LBB1_5
-
-LBB1_14:
-	WORD $0x3189             // mov    dword [rcx], esi
-	WORD $0x8944; BYTE $0x0a // mov    dword [rdx], r9d
-	SUBQ $8, SP
-	VZEROUPPER
-	RET
-
-DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000
-DATA LCDATA2<>+0x008(SB)/8, $0x7fffffffffffffff
-GLOBL LCDATA2<>(SB), 8, $16
-
-TEXT ·_int64_max_min_avx2(SB), $232-32
-
-	MOVQ values+0(FP), DI
-	MOVQ length+8(FP), SI
-	MOVQ minout+16(FP), DX
-	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
-	LEAQ LCDATA2<>(SB), BP
-
-	QUAD $0xffffffffffffb949; WORD $0x7fff // mov    r9, 9223372036854775807
-	WORD $0xf685                           // test    esi, esi
-	JLE  LBB2_1
-	WORD $0x8941; BYTE $0xf0               // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x1f               // cmp    esi, 31
-	JA   LBB2_6
-	LONG $0x01718d49                       // lea    rsi, [r9 + 1]
-	WORD $0x3145; BYTE $0xdb               // xor    r11d, r11d
-	JMP  LBB2_4
-
-LBB2_1:
-	LONG $0x01718d49 // lea    rsi, [r9 + 1]
-	JMP  LBB2_14
-
-LBB2_6:
-	WORD $0x8945; BYTE $0xc3       // mov    r11d, r8d
-	LONG $0xe0e38341               // and    r11d, -32
-	LONG $0xe0438d49               // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2       // mov    r10, rax
-	LONG $0x05eac149               // shr    r10, 5
-	LONG $0x01c28349               // add    r10, 1
-	WORD $0x8945; BYTE $0xd1       // mov    r9d, r10d
-	LONG $0x03e18341               // and    r9d, 3
-	LONG $0x60f88348               // cmp    rax, 96
-	JAE  LBB2_8
-	LONG $0x597d62c4; WORD $0x007d // vpbroadcastq    ymm15, qword 0[rbp] /* [rip + .LCPI2_0] */
-	LONG $0x597d62c4; WORD $0x085d // vpbroadcastq    ymm11, qword 8[rbp] /* [rip + .LCPI2_1] */
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0x5c7f7dc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm11
-	LONG $0x6f7dc1c4; BYTE $0xdb   // vmovdqa    ymm3, ymm11
-	LONG $0x6f7d41c4; BYTE $0xcb   // vmovdqa    ymm9, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xeb   // vmovdqa    ymm5, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xe3   // vmovdqa    ymm4, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xf3   // vmovdqa    ymm6, ymm11
-	LONG $0x5c7f7dc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm11
-	LONG $0x7c7f7dc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm15
-	LONG $0x6f7dc1c4; BYTE $0xd7   // vmovdqa    ymm2, ymm15
-	LONG $0x6f7d41c4; BYTE $0xc7   // vmovdqa    ymm8, ymm15
-	LONG $0x6f7d41c4; BYTE $0xe7   // vmovdqa    ymm12, ymm15
-	LONG $0x6f7d41c4; BYTE $0xef   // vmovdqa    ymm13, ymm15
-	LONG $0x6f7d41c4; BYTE $0xf7   // vmovdqa    ymm14, ymm15
-	LONG $0x3c7f7dc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm15
-	JMP  LBB2_10
-
-LBB2_8:
-	LONG $0xfce28349               // and    r10, -4
-	LONG $0x597d62c4; WORD $0x007d // vpbroadcastq    ymm15, qword 0[rbp] /* [rip + .LCPI2_0] */
-	WORD $0xf749; BYTE $0xda       // neg    r10
-	LONG $0x597d62c4; WORD $0x085d // vpbroadcastq    ymm11, qword 8[rbp] /* [rip + .LCPI2_1] */
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0x5c7f7dc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm11
-	LONG $0x6f7dc1c4; BYTE $0xdb   // vmovdqa    ymm3, ymm11
-	LONG $0x6f7d41c4; BYTE $0xcb   // vmovdqa    ymm9, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xeb   // vmovdqa    ymm5, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xe3   // vmovdqa    ymm4, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xf3   // vmovdqa    ymm6, ymm11
-	LONG $0x5c7f7dc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm11
-	LONG $0x7c7f7dc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm15
-	LONG $0x6f7dc1c4; BYTE $0xd7   // vmovdqa    ymm2, ymm15
-	LONG $0x6f7d41c4; BYTE $0xc7   // vmovdqa    ymm8, ymm15
-	LONG $0x6f7d41c4; BYTE $0xe7   // vmovdqa    ymm12, ymm15
-	LONG $0x6f7d41c4; BYTE $0xef   // vmovdqa    ymm13, ymm15
-	LONG $0x6f7d41c4; BYTE $0xf7   // vmovdqa    ymm14, ymm15
-	LONG $0x3c7f7dc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm15
-
-LBB2_9:
-	QUAD $0x0000e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 224]
-	LONG $0x6f7d41c4; BYTE $0xd0         // vmovdqa    ymm10, ymm8
-	LONG $0xc26f7dc5                     // vmovdqa    ymm8, ymm2
-	LONG $0xd36ffdc5                     // vmovdqa    ymm2, ymm3
-	LONG $0x6f7dc1c4; BYTE $0xd9         // vmovdqa    ymm3, ymm9
-	LONG $0x377d42c4; BYTE $0xcb         // vpcmpgtq    ymm9, ymm0, ymm11
-	LONG $0x4b7dc3c4; WORD $0x90cb       // vblendvpd    ymm1, ymm0, ymm11, ymm9
-	QUAD $0x0000a0248c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm1
-	LONG $0x370562c4; BYTE $0xc8         // vpcmpgtq    ymm9, ymm15, ymm0
-	LONG $0x4b7dc3c4; WORD $0x90c7       // vblendvpd    ymm0, ymm0, ymm15, ymm9
-	QUAD $0x000080248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm0
-	QUAD $0x0000c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 192]
-	LONG $0x377d62c4; BYTE $0xce         // vpcmpgtq    ymm9, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0x90fe       // vblendvpd    ymm7, ymm0, ymm6, ymm9
-	LONG $0x370d62c4; BYTE $0xc8         // vpcmpgtq    ymm9, ymm14, ymm0
-	LONG $0x4b7d43c4; WORD $0x90f6       // vblendvpd    ymm14, ymm0, ymm14, ymm9
-	QUAD $0x0000a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 160]
-	LONG $0x377d62c4; BYTE $0xcc         // vpcmpgtq    ymm9, ymm0, ymm4
-	LONG $0x4b7de3c4; WORD $0x90f4       // vblendvpd    ymm6, ymm0, ymm4, ymm9
-	LONG $0x371562c4; BYTE $0xc8         // vpcmpgtq    ymm9, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0x90ed       // vblendvpd    ymm13, ymm0, ymm13, ymm9
-	QUAD $0x000080c78c6f7ec5; BYTE $0x00 // vmovdqu    ymm9, yword [rdi + 8*rax + 128]
-	LONG $0x3735e2c4; BYTE $0xc5         // vpcmpgtq    ymm0, ymm9, ymm5
-	LONG $0x4b35e3c4; WORD $0x00cd       // vblendvpd    ymm1, ymm9, ymm5, ymm0
-	LONG $0x371dc2c4; BYTE $0xe9         // vpcmpgtq    ymm5, ymm12, ymm9
-	LONG $0x4b3543c4; WORD $0x50e4       // vblendvpd    ymm12, ymm9, ymm12, ymm5
-	LONG $0x6c6ffec5; WORD $0x60c7       // vmovdqu    ymm5, yword [rdi + 8*rax + 96]
-	LONG $0x375562c4; BYTE $0xcb         // vpcmpgtq    ymm9, ymm5, ymm3
-	LONG $0x4b5563c4; WORD $0x90cb       // vblendvpd    ymm9, ymm5, ymm3, ymm9
-	LONG $0x372de2c4; BYTE $0xe5         // vpcmpgtq    ymm4, ymm10, ymm5
-	LONG $0x4b5543c4; WORD $0x40d2       // vblendvpd    ymm10, ymm5, ymm10, ymm4
-	LONG $0x646ffec5; WORD $0x40c7       // vmovdqu    ymm4, yword [rdi + 8*rax + 64]
-	LONG $0x375de2c4; BYTE $0xea         // vpcmpgtq    ymm5, ymm4, ymm2
-	LONG $0x4b5de3c4; WORD $0x50ea       // vblendvpd    ymm5, ymm4, ymm2, ymm5
-	LONG $0x373de2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm8, ymm4
-	LONG $0x4b5dc3c4; WORD $0x30c0       // vblendvpd    ymm0, ymm4, ymm8, ymm3
-	LONG $0x146ffec5; BYTE $0xc7         // vmovdqu    ymm2, yword [rdi + 8*rax]
-	LONG $0x646ffdc5; WORD $0x6024       // vmovdqa    ymm4, yword [rsp + 96]
-	LONG $0x376de2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm2, ymm4
-	LONG $0x4b6de3c4; WORD $0x30dc       // vblendvpd    ymm3, ymm2, ymm4, ymm3
-	LONG $0x1c6f7dc5; BYTE $0x24         // vmovdqa    ymm11, yword [rsp]
-	LONG $0x3725e2c4; BYTE $0xe2         // vpcmpgtq    ymm4, ymm11, ymm2
-	LONG $0x4b6dc3c4; WORD $0x40e3       // vblendvpd    ymm4, ymm2, ymm11, ymm4
-	LONG $0x546ffec5; WORD $0x20c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 32]
-	LONG $0x7c6f7dc5; WORD $0x2024       // vmovdqa    ymm15, yword [rsp + 32]
-	LONG $0x376d42c4; BYTE $0xdf         // vpcmpgtq    ymm11, ymm2, ymm15
-	LONG $0x4b6d43c4; WORD $0xb0df       // vblendvpd    ymm11, ymm2, ymm15, ymm11
-	LONG $0x446f7dc5; WORD $0x4024       // vmovdqa    ymm8, yword [rsp + 64]
-	LONG $0x373d62c4; BYTE $0xfa         // vpcmpgtq    ymm15, ymm8, ymm2
-	LONG $0x4b6dc3c4; WORD $0xf0d0       // vblendvpd    ymm2, ymm2, ymm8, ymm15
-	QUAD $0x000120c7846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 8*rax + 288]
-	LONG $0x373d42c4; BYTE $0xfb         // vpcmpgtq    ymm15, ymm8, ymm11
-	LONG $0x4b3d43c4; WORD $0xf0db       // vblendvpd    ymm11, ymm8, ymm11, ymm15
-	LONG $0x5c297dc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm11
-	LONG $0x376d42c4; BYTE $0xd8         // vpcmpgtq    ymm11, ymm2, ymm8
-	LONG $0x4b3de3c4; WORD $0xb0d2       // vblendvpd    ymm2, ymm8, ymm2, ymm11
-	LONG $0x1429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm2
-	QUAD $0x000100c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 256]
-	LONG $0x3725e2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm11, ymm3
-	LONG $0x4b2563c4; WORD $0x20c3       // vblendvpd    ymm8, ymm11, ymm3, ymm2
-	LONG $0x375dc2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm11
-	LONG $0x4b25e3c4; WORD $0x30dc       // vblendvpd    ymm3, ymm11, ymm4, ymm3
-	QUAD $0x000140c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 320]
-	LONG $0x3725e2c4; BYTE $0xe5         // vpcmpgtq    ymm4, ymm11, ymm5
-	LONG $0x4b25e3c4; WORD $0x40e5       // vblendvpd    ymm4, ymm11, ymm5, ymm4
-	LONG $0x377dc2c4; BYTE $0xeb         // vpcmpgtq    ymm5, ymm0, ymm11
-	LONG $0x4b25e3c4; WORD $0x50e8       // vblendvpd    ymm5, ymm11, ymm0, ymm5
-	QUAD $0x000160c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 352]
-	LONG $0x377d42c4; BYTE $0xd9         // vpcmpgtq    ymm11, ymm0, ymm9
-	LONG $0x4b7d43c4; WORD $0xb0c9       // vblendvpd    ymm9, ymm0, ymm9, ymm11
-	LONG $0x372d62c4; BYTE $0xd8         // vpcmpgtq    ymm11, ymm10, ymm0
-	LONG $0x4b7d43c4; WORD $0xb0d2       // vblendvpd    ymm10, ymm0, ymm10, ymm11
-	QUAD $0x000180c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 384]
-	LONG $0x3725e2c4; BYTE $0xc1         // vpcmpgtq    ymm0, ymm11, ymm1
-	LONG $0x4b25e3c4; WORD $0x00d1       // vblendvpd    ymm2, ymm11, ymm1, ymm0
-	LONG $0x371dc2c4; BYTE $0xcb         // vpcmpgtq    ymm1, ymm12, ymm11
-	LONG $0x4b2543c4; WORD $0x10e4       // vblendvpd    ymm12, ymm11, ymm12, ymm1
-	QUAD $0x0001a0c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 416]
-	LONG $0x377562c4; BYTE $0xde         // vpcmpgtq    ymm11, ymm1, ymm6
-	LONG $0x4b75e3c4; WORD $0xb0f6       // vblendvpd    ymm6, ymm1, ymm6, ymm11
-	LONG $0x371562c4; BYTE $0xd9         // vpcmpgtq    ymm11, ymm13, ymm1
-	LONG $0x4b75c3c4; WORD $0xb0cd       // vblendvpd    ymm1, ymm1, ymm13, ymm11
-	QUAD $0x0001c0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 448]
-	LONG $0x372562c4; BYTE $0xef         // vpcmpgtq    ymm13, ymm11, ymm7
-	LONG $0x4b25e3c4; WORD $0xd0ff       // vblendvpd    ymm7, ymm11, ymm7, ymm13
-	LONG $0x370d42c4; BYTE $0xeb         // vpcmpgtq    ymm13, ymm14, ymm11
-	LONG $0x4b2543c4; WORD $0xd0ee       // vblendvpd    ymm13, ymm11, ymm14, ymm13
-	QUAD $0x0001e0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 480]
-	QUAD $0x0000a024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 160]
-	LONG $0x372562c4; BYTE $0xf0         // vpcmpgtq    ymm14, ymm11, ymm0
-	LONG $0x4b2563c4; WORD $0xe0f0       // vblendvpd    ymm14, ymm11, ymm0, ymm14
-	QUAD $0x00008024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 128]
-	LONG $0x377d42c4; BYTE $0xfb         // vpcmpgtq    ymm15, ymm0, ymm11
-	LONG $0x4b2563c4; WORD $0xf0f8       // vblendvpd    ymm15, ymm11, ymm0, ymm15
-	QUAD $0x0002e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 736]
-	LONG $0x377d42c4; BYTE $0xde         // vpcmpgtq    ymm11, ymm0, ymm14
-	LONG $0x4b7d43c4; WORD $0xb0de       // vblendvpd    ymm11, ymm0, ymm14, ymm11
-	QUAD $0x0000a0249c297dc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm11
-	LONG $0x370562c4; BYTE $0xf0         // vpcmpgtq    ymm14, ymm15, ymm0
-	LONG $0x4b7dc3c4; WORD $0xe0c7       // vblendvpd    ymm0, ymm0, ymm15, ymm14
-	QUAD $0x000080248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm0
-	QUAD $0x0002c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 704]
-	LONG $0x377d62c4; BYTE $0xf7         // vpcmpgtq    ymm14, ymm0, ymm7
-	LONG $0x4b7de3c4; WORD $0xe0ff       // vblendvpd    ymm7, ymm0, ymm7, ymm14
-	LONG $0x371562c4; BYTE $0xf0         // vpcmpgtq    ymm14, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0xe0f5       // vblendvpd    ymm14, ymm0, ymm13, ymm14
-	QUAD $0x0002a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 672]
-	LONG $0x377d62c4; BYTE $0xee         // vpcmpgtq    ymm13, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0xd0f6       // vblendvpd    ymm6, ymm0, ymm6, ymm13
-	LONG $0x377562c4; BYTE $0xe8         // vpcmpgtq    ymm13, ymm1, ymm0
-	LONG $0x4b7d63c4; WORD $0xd0e9       // vblendvpd    ymm13, ymm0, ymm1, ymm13
-	QUAD $0x000280c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 640]
-	LONG $0x3775e2c4; BYTE $0xc2         // vpcmpgtq    ymm0, ymm1, ymm2
-	LONG $0x4b75e3c4; WORD $0x00c2       // vblendvpd    ymm0, ymm1, ymm2, ymm0
-	LONG $0x371de2c4; BYTE $0xd1         // vpcmpgtq    ymm2, ymm12, ymm1
-	LONG $0x4b7543c4; WORD $0x20e4       // vblendvpd    ymm12, ymm1, ymm12, ymm2
-	QUAD $0x000260c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 608]
-	LONG $0x3775c2c4; BYTE $0xd1         // vpcmpgtq    ymm2, ymm1, ymm9
-	LONG $0x4b7543c4; WORD $0x20c9       // vblendvpd    ymm9, ymm1, ymm9, ymm2
-	LONG $0x372de2c4; BYTE $0xd1         // vpcmpgtq    ymm2, ymm10, ymm1
-	LONG $0x4b7543c4; WORD $0x20d2       // vblendvpd    ymm10, ymm1, ymm10, ymm2
-	QUAD $0x000240c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 576]
-	LONG $0x3775e2c4; BYTE $0xd4         // vpcmpgtq    ymm2, ymm1, ymm4
-	LONG $0x4b75e3c4; WORD $0x20d4       // vblendvpd    ymm2, ymm1, ymm4, ymm2
-	LONG $0x3755e2c4; BYTE $0xe1         // vpcmpgtq    ymm4, ymm5, ymm1
-	LONG $0x4b75e3c4; WORD $0x40cd       // vblendvpd    ymm1, ymm1, ymm5, ymm4
-	QUAD $0x000200c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 512]
-	LONG $0x375dc2c4; BYTE $0xe8         // vpcmpgtq    ymm5, ymm4, ymm8
-	LONG $0x4b5dc3c4; WORD $0x50e8       // vblendvpd    ymm5, ymm4, ymm8, ymm5
-	LONG $0x376562c4; BYTE $0xc4         // vpcmpgtq    ymm8, ymm3, ymm4
-	LONG $0x4b5de3c4; WORD $0x80db       // vblendvpd    ymm3, ymm4, ymm3, ymm8
-	QUAD $0x000220c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 544]
-	LONG $0x5c6f7dc5; WORD $0x2024       // vmovdqa    ymm11, yword [rsp + 32]
-	LONG $0x375d42c4; BYTE $0xc3         // vpcmpgtq    ymm8, ymm4, ymm11
-	LONG $0x4b5d43c4; WORD $0x80c3       // vblendvpd    ymm8, ymm4, ymm11, ymm8
-	LONG $0x3c6f7dc5; BYTE $0x24         // vmovdqa    ymm15, yword [rsp]
-	LONG $0x370562c4; BYTE $0xdc         // vpcmpgtq    ymm11, ymm15, ymm4
-	LONG $0x4b5dc3c4; WORD $0xb0e7       // vblendvpd    ymm4, ymm4, ymm15, ymm11
-	QUAD $0x000320c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 800]
-	LONG $0x372542c4; BYTE $0xf8         // vpcmpgtq    ymm15, ymm11, ymm8
-	LONG $0x4b2543c4; WORD $0xf0c0       // vblendvpd    ymm8, ymm11, ymm8, ymm15
-	LONG $0x44297dc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm8
-	LONG $0x375d42c4; BYTE $0xc3         // vpcmpgtq    ymm8, ymm4, ymm11
-	LONG $0x4b25e3c4; WORD $0x80e4       // vblendvpd    ymm4, ymm11, ymm4, ymm8
-	LONG $0x6429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm4
-	QUAD $0x000300c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 768]
-	LONG $0x375d62c4; BYTE $0xdd         // vpcmpgtq    ymm11, ymm4, ymm5
-	LONG $0x4b5de3c4; WORD $0xb0ed       // vblendvpd    ymm5, ymm4, ymm5, ymm11
-	LONG $0x6c29fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm5
-	LONG $0x3765e2c4; BYTE $0xec         // vpcmpgtq    ymm5, ymm3, ymm4
-	LONG $0x4b5de3c4; WORD $0x50db       // vblendvpd    ymm3, ymm4, ymm3, ymm5
-	LONG $0x1c29fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm3
-	QUAD $0x000340c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 832]
-	LONG $0x375de2c4; BYTE $0xda         // vpcmpgtq    ymm3, ymm4, ymm2
-	LONG $0x4b5de3c4; WORD $0x30da       // vblendvpd    ymm3, ymm4, ymm2, ymm3
-	LONG $0x3775e2c4; BYTE $0xd4         // vpcmpgtq    ymm2, ymm1, ymm4
-	LONG $0x4b5de3c4; WORD $0x20d1       // vblendvpd    ymm2, ymm4, ymm1, ymm2
-	QUAD $0x000360c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 864]
-	LONG $0x3775c2c4; BYTE $0xe1         // vpcmpgtq    ymm4, ymm1, ymm9
-	LONG $0x4b7543c4; WORD $0x40c9       // vblendvpd    ymm9, ymm1, ymm9, ymm4
-	LONG $0x372de2c4; BYTE $0xe9         // vpcmpgtq    ymm5, ymm10, ymm1
-	LONG $0x4b7543c4; WORD $0x50c2       // vblendvpd    ymm8, ymm1, ymm10, ymm5
-	QUAD $0x000380c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 896]
-	LONG $0x3775e2c4; BYTE $0xe8         // vpcmpgtq    ymm5, ymm1, ymm0
-	LONG $0x4b75e3c4; WORD $0x50e8       // vblendvpd    ymm5, ymm1, ymm0, ymm5
-	LONG $0x371de2c4; BYTE $0xc1         // vpcmpgtq    ymm0, ymm12, ymm1
-	LONG $0x4b7543c4; WORD $0x00e4       // vblendvpd    ymm12, ymm1, ymm12, ymm0
-	QUAD $0x0003a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 928]
-	LONG $0x377de2c4; BYTE $0xce         // vpcmpgtq    ymm1, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0x10e6       // vblendvpd    ymm4, ymm0, ymm6, ymm1
-	LONG $0x3715e2c4; BYTE $0xc8         // vpcmpgtq    ymm1, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0x10ed       // vblendvpd    ymm13, ymm0, ymm13, ymm1
-	QUAD $0x0003c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 960]
-	LONG $0x377de2c4; BYTE $0xcf         // vpcmpgtq    ymm1, ymm0, ymm7
-	LONG $0x4b7de3c4; WORD $0x10f7       // vblendvpd    ymm6, ymm0, ymm7, ymm1
-	LONG $0x370de2c4; BYTE $0xc8         // vpcmpgtq    ymm1, ymm14, ymm0
-	LONG $0x4b7d43c4; WORD $0x10f6       // vblendvpd    ymm14, ymm0, ymm14, ymm1
-	QUAD $0x0003e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 992]
-	QUAD $0x0000a024bc6ffdc5; BYTE $0x00 // vmovdqa    ymm7, yword [rsp + 160]
-	LONG $0x377de2c4; BYTE $0xcf         // vpcmpgtq    ymm1, ymm0, ymm7
-	LONG $0x4b7d63c4; WORD $0x10df       // vblendvpd    ymm11, ymm0, ymm7, ymm1
-	QUAD $0x00008024bc6ffdc5; BYTE $0x00 // vmovdqa    ymm7, yword [rsp + 128]
-	LONG $0x3745e2c4; BYTE $0xc8         // vpcmpgtq    ymm1, ymm7, ymm0
-	LONG $0x4b7d63c4; WORD $0x10ff       // vblendvpd    ymm15, ymm0, ymm7, ymm1
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB2_9
-
-LBB2_10:
-	WORD $0x854d; BYTE $0xc9       // test    r9, r9
-	LONG $0xfd6ffdc5               // vmovdqa    ymm7, ymm5
-	LONG $0x6f7dc1c4; BYTE $0xe9   // vmovdqa    ymm5, ymm9
-	LONG $0x4c6f7dc5; WORD $0x6024 // vmovdqa    ymm9, yword [rsp + 96]
-	LONG $0xd36f7dc5               // vmovdqa    ymm10, ymm3
-	JE   LBB2_13
-	LONG $0xc7048d48               // lea    rax, [rdi + 8*rax]
-	WORD $0xf749; BYTE $0xd9       // neg    r9
-
-LBB2_12:
-	LONG $0x406ffec5; BYTE $0x20   // vmovdqu    ymm0, yword [rax + 32]
-	LONG $0x5c6ffdc5; WORD $0x2024 // vmovdqa    ymm3, yword [rsp + 32]
-	LONG $0x377de2c4; BYTE $0xcb   // vpcmpgtq    ymm1, ymm0, ymm3
-	LONG $0x4b7de3c4; WORD $0x10db // vblendvpd    ymm3, ymm0, ymm3, ymm1
-	LONG $0x5c29fdc5; WORD $0x2024 // vmovapd    yword [rsp + 32], ymm3
-	LONG $0x5c6ffdc5; WORD $0x4024 // vmovdqa    ymm3, yword [rsp + 64]
-	LONG $0x3765e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm3, ymm0
-	LONG $0x4b7de3c4; WORD $0x10db // vblendvpd    ymm3, ymm0, ymm3, ymm1
-	LONG $0x5c29fdc5; WORD $0x4024 // vmovapd    yword [rsp + 64], ymm3
-	LONG $0x006ffec5               // vmovdqu    ymm0, yword [rax]
-	LONG $0x377dc2c4; BYTE $0xc9   // vpcmpgtq    ymm1, ymm0, ymm9
-	LONG $0x4b7d43c4; WORD $0x10c9 // vblendvpd    ymm9, ymm0, ymm9, ymm1
-	LONG $0x1c6ffdc5; BYTE $0x24   // vmovdqa    ymm3, yword [rsp]
-	LONG $0x3765e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm3, ymm0
-	LONG $0x4b7de3c4; WORD $0x10db // vblendvpd    ymm3, ymm0, ymm3, ymm1
-	LONG $0x1c29fdc5; BYTE $0x24   // vmovapd    yword [rsp], ymm3
-	LONG $0x406ffec5; BYTE $0x40   // vmovdqu    ymm0, yword [rax + 64]
-	LONG $0x377dc2c4; BYTE $0xca   // vpcmpgtq    ymm1, ymm0, ymm10
-	LONG $0x4b7d43c4; WORD $0x10d2 // vblendvpd    ymm10, ymm0, ymm10, ymm1
-	LONG $0x376de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm2, ymm0
-	LONG $0x4b7de3c4; WORD $0x10d2 // vblendvpd    ymm2, ymm0, ymm2, ymm1
-	LONG $0x406ffec5; BYTE $0x60   // vmovdqu    ymm0, yword [rax + 96]
-	LONG $0x377de2c4; BYTE $0xcd   // vpcmpgtq    ymm1, ymm0, ymm5
-	LONG $0x4b7de3c4; WORD $0x10ed // vblendvpd    ymm5, ymm0, ymm5, ymm1
-	LONG $0x373de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm8, ymm0
-	LONG $0x4b7d43c4; WORD $0x10c0 // vblendvpd    ymm8, ymm0, ymm8, ymm1
-	QUAD $0x00000080806ffec5       // vmovdqu    ymm0, yword [rax + 128]
-	LONG $0x377de2c4; BYTE $0xcf   // vpcmpgtq    ymm1, ymm0, ymm7
-	LONG $0x4b7de3c4; WORD $0x10ff // vblendvpd    ymm7, ymm0, ymm7, ymm1
-	LONG $0x371de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm12, ymm0
-	LONG $0x4b7d43c4; WORD $0x10e4 // vblendvpd    ymm12, ymm0, ymm12, ymm1
-	QUAD $0x000000a0806ffec5       // vmovdqu    ymm0, yword [rax + 160]
-	LONG $0x377de2c4; BYTE $0xcc   // vpcmpgtq    ymm1, ymm0, ymm4
-	LONG $0x4b7de3c4; WORD $0x10e4 // vblendvpd    ymm4, ymm0, ymm4, ymm1
-	LONG $0x3715e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0x10ed // vblendvpd    ymm13, ymm0, ymm13, ymm1
-	QUAD $0x000000c0806ffec5       // vmovdqu    ymm0, yword [rax + 192]
-	LONG $0x377de2c4; BYTE $0xce   // vpcmpgtq    ymm1, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0x10f6 // vblendvpd    ymm6, ymm0, ymm6, ymm1
-	LONG $0x370de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm14, ymm0
-	LONG $0x4b7d43c4; WORD $0x10f6 // vblendvpd    ymm14, ymm0, ymm14, ymm1
-	QUAD $0x000000e0806ffec5       // vmovdqu    ymm0, yword [rax + 224]
-	LONG $0x377dc2c4; BYTE $0xcb   // vpcmpgtq    ymm1, ymm0, ymm11
-	LONG $0x4b7d43c4; WORD $0x10db // vblendvpd    ymm11, ymm0, ymm11, ymm1
-	LONG $0x3705e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm15, ymm0
-	LONG $0x4b7d43c4; WORD $0x10ff // vblendvpd    ymm15, ymm0, ymm15, ymm1
-	LONG $0x01000548; WORD $0x0000 // add    rax, 256
-	WORD $0xff49; BYTE $0xc1       // inc    r9
-	JNE  LBB2_12
-
-LBB2_13:
-	LONG $0x4c6ffdc5; WORD $0x4024 // vmovdqa    ymm1, yword [rsp + 64]
-	LONG $0x3775c2c4; BYTE $0xc5   // vpcmpgtq    ymm0, ymm1, ymm13
-	LONG $0x4b15e3c4; WORD $0x00c1 // vblendvpd    ymm0, ymm13, ymm1, ymm0
-	LONG $0x373dc2c4; BYTE $0xcf   // vpcmpgtq    ymm1, ymm8, ymm15
-	LONG $0x4b05c3c4; WORD $0x10c8 // vblendvpd    ymm1, ymm15, ymm8, ymm1
-	LONG $0x1c6ffdc5; BYTE $0x24   // vmovdqa    ymm3, yword [rsp]
-	LONG $0x376542c4; BYTE $0xc4   // vpcmpgtq    ymm8, ymm3, ymm12
-	LONG $0x4b1d63c4; WORD $0x80c3 // vblendvpd    ymm8, ymm12, ymm3, ymm8
-	LONG $0x6f7dc1c4; BYTE $0xd9   // vmovdqa    ymm3, ymm9
-	LONG $0x376d42c4; BYTE $0xce   // vpcmpgtq    ymm9, ymm2, ymm14
-	LONG $0x4b0de3c4; WORD $0x90d2 // vblendvpd    ymm2, ymm14, ymm2, ymm9
-	LONG $0x373d62c4; BYTE $0xca   // vpcmpgtq    ymm9, ymm8, ymm2
-	LONG $0x4b6dc3c4; WORD $0x90d0 // vblendvpd    ymm2, ymm2, ymm8, ymm9
-	LONG $0x377d62c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm0, ymm1
-	LONG $0x4b75e3c4; WORD $0x80c0 // vblendvpd    ymm0, ymm1, ymm0, ymm8
-	LONG $0x376de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm2, ymm0
-	LONG $0x4b7de3c4; WORD $0x10c2 // vblendvpd    ymm0, ymm0, ymm2, ymm1
-	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
-	LONG $0x3779e2c4; BYTE $0xd1   // vpcmpgtq    xmm2, xmm0, xmm1
-	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
-	LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps    xmm1, xmm0, 78
-	LONG $0x3779e2c4; BYTE $0xd1   // vpcmpgtq    xmm2, xmm0, xmm1
-	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
-	LONG $0x546ffdc5; WORD $0x2024 // vmovdqa    ymm2, yword [rsp + 32]
-	LONG $0x375de2c4; BYTE $0xca   // vpcmpgtq    ymm1, ymm4, ymm2
-	LONG $0x4b5de3c4; WORD $0x10ca // vblendvpd    ymm1, ymm4, ymm2, ymm1
-	LONG $0x3725e2c4; BYTE $0xd5   // vpcmpgtq    ymm2, ymm11, ymm5
-	LONG $0x4b25e3c4; WORD $0x20d5 // vblendvpd    ymm2, ymm11, ymm5, ymm2
-	LONG $0x3745e2c4; BYTE $0xe3   // vpcmpgtq    ymm4, ymm7, ymm3
-	LONG $0x4b45e3c4; WORD $0x40e3 // vblendvpd    ymm4, ymm7, ymm3, ymm4
-	LONG $0x374dc2c4; BYTE $0xea   // vpcmpgtq    ymm5, ymm6, ymm10
-	LONG $0x4b4dc3c4; WORD $0x50da // vblendvpd    ymm3, ymm6, ymm10, ymm5
-	LONG $0x3765e2c4; BYTE $0xec   // vpcmpgtq    ymm5, ymm3, ymm4
-	LONG $0x4b65e3c4; WORD $0x50dc // vblendvpd    ymm3, ymm3, ymm4, ymm5
-	LONG $0x376de2c4; BYTE $0xe1   // vpcmpgtq    ymm4, ymm2, ymm1
-	LONG $0x4b6de3c4; WORD $0x40c9 // vblendvpd    ymm1, ymm2, ymm1, ymm4
-	LONG $0x3775e2c4; BYTE $0xd3   // vpcmpgtq    ymm2, ymm1, ymm3
-	LONG $0x4b75e3c4; WORD $0x20cb // vblendvpd    ymm1, ymm1, ymm3, ymm2
-	LONG $0x197de3c4; WORD $0x01ca // vextractf128    xmm2, ymm1, 1
-	LONG $0x3769e2c4; BYTE $0xd9   // vpcmpgtq    xmm3, xmm2, xmm1
-	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps    xmm2, xmm1, 78
-	LONG $0x3769e2c4; BYTE $0xd9   // vpcmpgtq    xmm3, xmm2, xmm1
-	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x7ef9e1c4; BYTE $0xc6   // vmovq    rsi, xmm0
-	LONG $0x7ef9c1c4; BYTE $0xc9   // vmovq    r9, xmm1
-	WORD $0x394d; BYTE $0xc3       // cmp    r11, r8
-	JE   LBB2_14
-
-LBB2_4:
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
-
-LBB2_5:
-	LONG $0xdf348b4a         // mov    rsi, qword [rdi + 8*r11]
-	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
-	LONG $0xce4f0f4c         // cmovg    r9, rsi
-	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
-	LONG $0xf04d0f48         // cmovge    rsi, rax
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
-	JNE  LBB2_5
-
-LBB2_14:
-	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
-	WORD $0x894c; BYTE $0x0a // mov    qword [rdx], r9
-	SUBQ $8, SP
-	VZEROUPPER
-	RET
-
-DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000
-GLOBL LCDATA3<>(SB), 8, $8
-
-TEXT ·_uint64_max_min_avx2(SB), $296-32
-
-	MOVQ values+0(FP), DI
-	MOVQ length+8(FP), SI
-	MOVQ minout+16(FP), DX
-	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
-	LEAQ LCDATA3<>(SB), BP
-
-	WORD $0xf685                               // test    esi, esi
-	JLE  LBB3_1
-	WORD $0x8941; BYTE $0xf0                   // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x1f                   // cmp    esi, 31
-	JA   LBB3_6
-	LONG $0xffc1c749; WORD $0xffff; BYTE $0xff // mov    r9, -1
-	WORD $0x3145; BYTE $0xdb                   // xor    r11d, r11d
-	WORD $0xf631                               // xor    esi, esi
-	JMP  LBB3_4
-
-LBB3_1:
-	LONG $0xffc1c749; WORD $0xffff; BYTE $0xff // mov    r9, -1
-	WORD $0xf631                               // xor    esi, esi
-	JMP  LBB3_14
-
-LBB3_6:
-	WORD $0x8945; BYTE $0xc3       // mov    r11d, r8d
-	LONG $0xe0e38341               // and    r11d, -32
-	LONG $0xe0438d49               // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2       // mov    r10, rax
-	LONG $0x05eac149               // shr    r10, 5
-	LONG $0x01c28349               // add    r10, 1
-	WORD $0x8945; BYTE $0xd1       // mov    r9d, r10d
-	LONG $0x03e18341               // and    r9d, 3
-	LONG $0x60f88348               // cmp    rax, 96
-	JAE  LBB3_8
-	LONG $0xe4efd9c5               // vpxor    xmm4, xmm4, xmm4
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm0
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm0
-	LONG $0xed76d5c5               // vpcmpeqd    ymm5, ymm5, ymm5
-	LONG $0xff76c5c5               // vpcmpeqd    ymm7, ymm7, ymm7
-	LONG $0x761d41c4; BYTE $0xe4   // vpcmpeqd    ymm12, ymm12, ymm12
-	LONG $0x762d41c4; BYTE $0xd2   // vpcmpeqd    ymm10, ymm10, ymm10
-	LONG $0x762541c4; BYTE $0xdb   // vpcmpeqd    ymm11, ymm11, ymm11
-	LONG $0x761541c4; BYTE $0xed   // vpcmpeqd    ymm13, ymm13, ymm13
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x447ffdc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm0
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x047ffdc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm0
-	LONG $0xdbefe1c5               // vpxor    xmm3, xmm3, xmm3
-	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
-	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
-	LONG $0xef0141c4; BYTE $0xff   // vpxor    xmm15, xmm15, xmm15
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	JMP  LBB3_10
-
-LBB3_8:
-	LONG $0xfce28349               // and    r10, -4
-	WORD $0xf749; BYTE $0xda       // neg    r10
-	LONG $0xe4efd9c5               // vpxor    xmm4, xmm4, xmm4
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm0
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0x597d62c4; WORD $0x0075 // vpbroadcastq    ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm0
-	LONG $0xed76d5c5               // vpcmpeqd    ymm5, ymm5, ymm5
-	LONG $0xff76c5c5               // vpcmpeqd    ymm7, ymm7, ymm7
-	LONG $0x761d41c4; BYTE $0xe4   // vpcmpeqd    ymm12, ymm12, ymm12
-	LONG $0x762d41c4; BYTE $0xd2   // vpcmpeqd    ymm10, ymm10, ymm10
-	LONG $0x762541c4; BYTE $0xdb   // vpcmpeqd    ymm11, ymm11, ymm11
-	LONG $0x761541c4; BYTE $0xed   // vpcmpeqd    ymm13, ymm13, ymm13
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x447ffdc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm0
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x047ffdc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm0
-	LONG $0xdbefe1c5               // vpxor    xmm3, xmm3, xmm3
-	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
-	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
-	LONG $0xef0141c4; BYTE $0xff   // vpxor    xmm15, xmm15, xmm15
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-
-LBB3_9:
-	QUAD $0x0000e0c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 224]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	LONG $0xf36ffdc5                     // vmovdqa    ymm6, ymm3
-	LONG $0xef15c1c4; BYTE $0xde         // vpxor    ymm3, ymm13, ymm14
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b75c3c4; WORD $0x30dd       // vblendvpd    ymm3, ymm1, ymm13, ymm3
-	QUAD $0x000080249c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm3
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20c0       // vblendvpd    ymm0, ymm1, ymm0, ymm2
-	QUAD $0x0000e0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 224], ymm0
-	QUAD $0x0000c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 192]
-	LONG $0xc8ef8dc5                     // vpxor    ymm1, ymm14, ymm0
-	LONG $0xef25c1c4; BYTE $0xd6         // vpxor    ymm2, ymm11, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b7dc3c4; WORD $0x20d3       // vblendvpd    ymm2, ymm0, ymm11, ymm2
-	QUAD $0x0000a0249429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm2
-	LONG $0xef05c1c4; BYTE $0xd6         // vpxor    ymm2, ymm15, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7dc3c4; WORD $0x10c7       // vblendvpd    ymm0, ymm0, ymm15, ymm1
-	QUAD $0x0000c0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 192], ymm0
-	QUAD $0x0000a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 160]
-	LONG $0xc8ef8dc5                     // vpxor    ymm1, ymm14, ymm0
-	LONG $0xef2dc1c4; BYTE $0xd6         // vpxor    ymm2, ymm10, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x6f7dc1c4; BYTE $0xd8         // vmovdqa    ymm3, ymm8
-	LONG $0x4b7d43c4; WORD $0x20c2       // vblendvpd    ymm8, ymm0, ymm10, ymm2
-	LONG $0xd3ef8dc5                     // vpxor    ymm2, ymm14, ymm3
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10eb       // vblendvpd    ymm13, ymm0, ymm3, ymm1
-	QUAD $0x000080c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 128]
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0xef1dc1c4; BYTE $0xce         // vpxor    ymm1, ymm12, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7dc3c4; WORD $0x10cc       // vblendvpd    ymm1, ymm0, ymm12, ymm1
-	LONG $0xef35c1c4; BYTE $0xde         // vpxor    ymm3, ymm9, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b7d43c4; WORD $0x20e1       // vblendvpd    ymm12, ymm0, ymm9, ymm2
-	LONG $0x546ffec5; WORD $0x60c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 96]
-	LONG $0xc7ef8dc5                     // vpxor    ymm0, ymm14, ymm7
-	LONG $0xdaef8dc5                     // vpxor    ymm3, ymm14, ymm2
-	LONG $0x3765e2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm3, ymm0
-	LONG $0x4b6de3c4; WORD $0x00c7       // vblendvpd    ymm0, ymm2, ymm7, ymm0
-	LONG $0xfc6f7dc5                     // vmovdqa    ymm15, ymm4
-	LONG $0xe6ef8dc5                     // vpxor    ymm4, ymm14, ymm6
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6d63c4; WORD $0x30d6       // vblendvpd    ymm10, ymm2, ymm6, ymm3
-	LONG $0x546ffec5; WORD $0x40c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 64]
-	LONG $0xddef8dc5                     // vpxor    ymm3, ymm14, ymm5
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30ed       // vblendvpd    ymm5, ymm2, ymm5, ymm3
-	LONG $0x346ffdc5; BYTE $0x24         // vmovdqa    ymm6, yword [rsp]
-	LONG $0xdeef8dc5                     // vpxor    ymm3, ymm14, ymm6
-	LONG $0x3765e2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm3, ymm4
-	LONG $0x4b6d63c4; WORD $0x30ce       // vblendvpd    ymm9, ymm2, ymm6, ymm3
-	LONG $0x146ffec5; BYTE $0xc7         // vmovdqu    ymm2, yword [rdi + 8*rax]
-	LONG $0x7c6ffdc5; WORD $0x4024       // vmovdqa    ymm7, yword [rsp + 64]
-	LONG $0xdfef8dc5                     // vpxor    ymm3, ymm14, ymm7
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30df       // vblendvpd    ymm3, ymm2, ymm7, ymm3
-	LONG $0xef0541c4; BYTE $0xde         // vpxor    ymm11, ymm15, ymm14
-	LONG $0x3725e2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm11, ymm4
-	LONG $0x4b6dc3c4; WORD $0x40e7       // vblendvpd    ymm4, ymm2, ymm15, ymm4
-	LONG $0x546ffec5; WORD $0x20c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 32]
-	LONG $0x7c6f7dc5; WORD $0x6024       // vmovdqa    ymm15, yword [rsp + 96]
-	LONG $0xef0541c4; BYTE $0xde         // vpxor    ymm11, ymm15, ymm14
-	LONG $0xfaef8dc5                     // vpxor    ymm7, ymm14, ymm2
-	LONG $0x374542c4; BYTE $0xdb         // vpcmpgtq    ymm11, ymm7, ymm11
-	LONG $0x4b6d43c4; WORD $0xb0df       // vblendvpd    ymm11, ymm2, ymm15, ymm11
-	LONG $0x746ffdc5; WORD $0x2024       // vmovdqa    ymm6, yword [rsp + 32]
-	LONG $0xfeef0dc5                     // vpxor    ymm15, ymm14, ymm6
-	LONG $0x3705e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm15, ymm7
-	LONG $0x4b6de3c4; WORD $0x70d6       // vblendvpd    ymm2, ymm2, ymm6, ymm7
-	QUAD $0x000120c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 288]
-	LONG $0x5725c1c4; BYTE $0xfe         // vxorpd    ymm7, ymm11, ymm14
-	LONG $0xfeef0dc5                     // vpxor    ymm15, ymm14, ymm6
-	LONG $0x3705e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm15, ymm7
-	LONG $0x4b4dc3c4; WORD $0x70fb       // vblendvpd    ymm7, ymm6, ymm11, ymm7
-	LONG $0x7c29fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm7
-	LONG $0xfa578dc5                     // vxorpd    ymm7, ymm14, ymm2
-	LONG $0x3745c2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm7, ymm15
-	LONG $0x4b4de3c4; WORD $0x70d2       // vblendvpd    ymm2, ymm6, ymm2, ymm7
-	LONG $0x5429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm2
-	QUAD $0x000100c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 256]
-	LONG $0xfb578dc5                     // vxorpd    ymm7, ymm14, ymm3
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x3725e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm11, ymm7
-	LONG $0x4b4de3c4; WORD $0x70d3       // vblendvpd    ymm2, ymm6, ymm3, ymm7
-	LONG $0x1429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm2
-	LONG $0xfc578dc5                     // vxorpd    ymm7, ymm14, ymm4
-	LONG $0x3745c2c4; BYTE $0xfb         // vpcmpgtq    ymm7, ymm7, ymm11
-	LONG $0x4b4de3c4; WORD $0x70d4       // vblendvpd    ymm2, ymm6, ymm4, ymm7
-	LONG $0x5429fdc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm2
-	QUAD $0x000140c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 320]
-	LONG $0xfd578dc5                     // vxorpd    ymm7, ymm14, ymm5
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x3725e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm11, ymm7
-	LONG $0x4b4de3c4; WORD $0x70ed       // vblendvpd    ymm5, ymm6, ymm5, ymm7
-	LONG $0x5735c1c4; BYTE $0xfe         // vxorpd    ymm7, ymm9, ymm14
-	LONG $0x3745c2c4; BYTE $0xfb         // vpcmpgtq    ymm7, ymm7, ymm11
-	LONG $0x4b4dc3c4; WORD $0x70f9       // vblendvpd    ymm7, ymm6, ymm9, ymm7
-	QUAD $0x000160c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 352]
-	LONG $0xc8570dc5                     // vxorpd    ymm9, ymm14, ymm0
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x372542c4; BYTE $0xc9         // vpcmpgtq    ymm9, ymm11, ymm9
-	LONG $0x4b4d63c4; WORD $0x90c8       // vblendvpd    ymm9, ymm6, ymm0, ymm9
-	LONG $0x572dc1c4; BYTE $0xc6         // vxorpd    ymm0, ymm10, ymm14
-	LONG $0x377dc2c4; BYTE $0xc3         // vpcmpgtq    ymm0, ymm0, ymm11
-	LONG $0x4b4d43c4; WORD $0x00d2       // vblendvpd    ymm10, ymm6, ymm10, ymm0
-	QUAD $0x000180c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 384]
-	LONG $0xc1578dc5                     // vxorpd    ymm0, ymm14, ymm1
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x3725e2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm11, ymm0
-	LONG $0x4b4de3c4; WORD $0x00e1       // vblendvpd    ymm4, ymm6, ymm1, ymm0
-	LONG $0x571dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm12, ymm14
-	LONG $0x3775c2c4; BYTE $0xcb         // vpcmpgtq    ymm1, ymm1, ymm11
-	LONG $0x4b4dc3c4; WORD $0x10dc       // vblendvpd    ymm3, ymm6, ymm12, ymm1
-	QUAD $0x0001a0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 416]
-	LONG $0x573dc1c4; BYTE $0xf6         // vxorpd    ymm6, ymm8, ymm14
-	LONG $0xef2541c4; BYTE $0xe6         // vpxor    ymm12, ymm11, ymm14
-	LONG $0x371de2c4; BYTE $0xf6         // vpcmpgtq    ymm6, ymm12, ymm6
-	LONG $0x4b25c3c4; WORD $0x60f0       // vblendvpd    ymm6, ymm11, ymm8, ymm6
-	LONG $0x571541c4; BYTE $0xc6         // vxorpd    ymm8, ymm13, ymm14
-	LONG $0x373d42c4; BYTE $0xc4         // vpcmpgtq    ymm8, ymm8, ymm12
-	LONG $0x4b2543c4; WORD $0x80e5       // vblendvpd    ymm12, ymm11, ymm13, ymm8
-	QUAD $0x0001c0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 448]
-	QUAD $0x0000a024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 160]
-	LONG $0xc0ef0dc5                     // vpxor    ymm8, ymm14, ymm0
-	LONG $0xef2541c4; BYTE $0xee         // vpxor    ymm13, ymm11, ymm14
-	LONG $0x371542c4; BYTE $0xc0         // vpcmpgtq    ymm8, ymm13, ymm8
-	LONG $0x4b2563c4; WORD $0x80c0       // vblendvpd    ymm8, ymm11, ymm0, ymm8
-	QUAD $0x0000c024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 192]
-	LONG $0xf8ef0dc5                     // vpxor    ymm15, ymm14, ymm0
-	LONG $0x370542c4; BYTE $0xed         // vpcmpgtq    ymm13, ymm15, ymm13
-	LONG $0x4b2563c4; WORD $0xd0e8       // vblendvpd    ymm13, ymm11, ymm0, ymm13
-	QUAD $0x0001e0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 480]
-	QUAD $0x000080248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 128]
-	LONG $0xf9ef0dc5                     // vpxor    ymm15, ymm14, ymm1
-	LONG $0xef25c1c4; BYTE $0xc6         // vpxor    ymm0, ymm11, ymm14
-	LONG $0x377d42c4; BYTE $0xff         // vpcmpgtq    ymm15, ymm0, ymm15
-	LONG $0x4b25e3c4; WORD $0xf0c9       // vblendvpd    ymm1, ymm11, ymm1, ymm15
-	QUAD $0x0000e024946ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword [rsp + 224]
-	LONG $0xfaef0dc5                     // vpxor    ymm15, ymm14, ymm2
-	LONG $0x3705e2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm15, ymm0
-	LONG $0x4b2563c4; WORD $0x00fa       // vblendvpd    ymm15, ymm11, ymm2, ymm0
-	QUAD $0x0002e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 736]
-	LONG $0xd9570dc5                     // vxorpd    ymm11, ymm14, ymm1
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376d42c4; BYTE $0xdb         // vpcmpgtq    ymm11, ymm2, ymm11
-	LONG $0x4b7de3c4; WORD $0xb0c9       // vblendvpd    ymm1, ymm0, ymm1, ymm11
-	QUAD $0x000080248c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm1
-	LONG $0x5705c1c4; BYTE $0xce         // vxorpd    ymm1, ymm15, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7dc3c4; WORD $0x10c7       // vblendvpd    ymm0, ymm0, ymm15, ymm1
-	QUAD $0x0000e0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 224], ymm0
-	QUAD $0x0002c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 704]
-	LONG $0x573dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm8, ymm14
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7dc3c4; WORD $0x10c8       // vblendvpd    ymm1, ymm0, ymm8, ymm1
-	QUAD $0x0000a0248c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm1
-	LONG $0x5715c1c4; BYTE $0xce         // vxorpd    ymm1, ymm13, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7dc3c4; WORD $0x10c5       // vblendvpd    ymm0, ymm0, ymm13, ymm1
-	QUAD $0x0000c0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 192], ymm0
-	QUAD $0x0002a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 672]
-	LONG $0xce578dc5                     // vxorpd    ymm1, ymm14, ymm6
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10fe       // vblendvpd    ymm15, ymm0, ymm6, ymm1
-	LONG $0x571dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm12, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d43c4; WORD $0x10ec       // vblendvpd    ymm13, ymm0, ymm12, ymm1
-	QUAD $0x000280c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 640]
-	LONG $0xcc578dc5                     // vxorpd    ymm1, ymm14, ymm4
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10e4       // vblendvpd    ymm12, ymm0, ymm4, ymm1
-	LONG $0xcb578dc5                     // vxorpd    ymm1, ymm14, ymm3
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d63c4; WORD $0x10c3       // vblendvpd    ymm8, ymm0, ymm3, ymm1
-	QUAD $0x000260c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 608]
-	LONG $0x5735c1c4; BYTE $0xce         // vxorpd    ymm1, ymm9, ymm14
-	LONG $0xdaef8dc5                     // vpxor    ymm3, ymm14, ymm2
-	LONG $0x3765e2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm3, ymm1
-	LONG $0x4b6dc3c4; WORD $0x10c9       // vblendvpd    ymm1, ymm2, ymm9, ymm1
-	LONG $0x572dc1c4; BYTE $0xe6         // vxorpd    ymm4, ymm10, ymm14
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6d43c4; WORD $0x30d2       // vblendvpd    ymm10, ymm2, ymm10, ymm3
-	QUAD $0x000240c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 576]
-	LONG $0xdd578dc5                     // vxorpd    ymm3, ymm14, ymm5
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30ed       // vblendvpd    ymm5, ymm2, ymm5, ymm3
-	LONG $0xdf578dc5                     // vxorpd    ymm3, ymm14, ymm7
-	LONG $0x3765e2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm3, ymm4
-	LONG $0x4b6d63c4; WORD $0x30cf       // vblendvpd    ymm9, ymm2, ymm7, ymm3
-	QUAD $0x000200c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 512]
-	LONG $0x046ffdc5; BYTE $0x24         // vmovdqa    ymm0, yword [rsp]
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30d8       // vblendvpd    ymm3, ymm2, ymm0, ymm3
-	LONG $0x446ffdc5; WORD $0x2024       // vmovdqa    ymm0, yword [rsp + 32]
-	LONG $0xf0ef8dc5                     // vpxor    ymm6, ymm14, ymm0
-	LONG $0x374de2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm6, ymm4
-	LONG $0x4b6de3c4; WORD $0x40e0       // vblendvpd    ymm4, ymm2, ymm0, ymm4
-	QUAD $0x000220c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 544]
-	LONG $0x446ffdc5; WORD $0x6024       // vmovdqa    ymm0, yword [rsp + 96]
-	LONG $0xf0ef8dc5                     // vpxor    ymm6, ymm14, ymm0
-	LONG $0xfaef8dc5                     // vpxor    ymm7, ymm14, ymm2
-	LONG $0x3745e2c4; BYTE $0xf6         // vpcmpgtq    ymm6, ymm7, ymm6
-	LONG $0x4b6de3c4; WORD $0x60f0       // vblendvpd    ymm6, ymm2, ymm0, ymm6
-	LONG $0x446ffdc5; WORD $0x4024       // vmovdqa    ymm0, yword [rsp + 64]
-	LONG $0xd8ef0dc5                     // vpxor    ymm11, ymm14, ymm0
-	LONG $0x3725e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm11, ymm7
-	LONG $0x4b6de3c4; WORD $0x70d0       // vblendvpd    ymm2, ymm2, ymm0, ymm7
-	QUAD $0x000320c7bc6ffec5; BYTE $0x00 // vmovdqu    ymm7, yword [rdi + 8*rax + 800]
-	LONG $0xde570dc5                     // vxorpd    ymm11, ymm14, ymm6
-	LONG $0xc7ef8dc5                     // vpxor    ymm0, ymm14, ymm7
-	LONG $0x377d42c4; BYTE $0xdb         // vpcmpgtq    ymm11, ymm0, ymm11
-	LONG $0x4b45e3c4; WORD $0xb0f6       // vblendvpd    ymm6, ymm7, ymm6, ymm11
-	LONG $0x7429fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm6
-	LONG $0xf2578dc5                     // vxorpd    ymm6, ymm14, ymm2
-	LONG $0x374de2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm6, ymm0
-	LONG $0x4b45e3c4; WORD $0x00c2       // vblendvpd    ymm0, ymm7, ymm2, ymm0
-	LONG $0x4429fdc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm0
-	QUAD $0x000300c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 768]
-	LONG $0xd3578dc5                     // vxorpd    ymm2, ymm14, ymm3
-	LONG $0xf8ef8dc5                     // vpxor    ymm7, ymm14, ymm0
-	LONG $0x3745e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm7, ymm2
-	LONG $0x4b7de3c4; WORD $0x20d3       // vblendvpd    ymm2, ymm0, ymm3, ymm2
-	LONG $0x5429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm2
-	LONG $0xd4578dc5                     // vxorpd    ymm2, ymm14, ymm4
-	LONG $0x376de2c4; BYTE $0xd7         // vpcmpgtq    ymm2, ymm2, ymm7
-	LONG $0x4b7de3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm0, ymm4, ymm2
-	QUAD $0x000340c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 832]
-	LONG $0xd5578dc5                     // vxorpd    ymm2, ymm14, ymm5
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b7de3c4; WORD $0x20ed       // vblendvpd    ymm5, ymm0, ymm5, ymm2
-	LONG $0x5735c1c4; BYTE $0xd6         // vxorpd    ymm2, ymm9, ymm14
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b7dc3c4; WORD $0x20c1       // vblendvpd    ymm0, ymm0, ymm9, ymm2
-	LONG $0x0429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm0
-	QUAD $0x000360c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 864]
-	LONG $0xd1578dc5                     // vxorpd    ymm2, ymm14, ymm1
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b7de3c4; WORD $0x20f9       // vblendvpd    ymm7, ymm0, ymm1, ymm2
-	LONG $0x572dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm10, ymm14
-	LONG $0x3775e2c4; BYTE $0xcb         // vpcmpgtq    ymm1, ymm1, ymm3
-	LONG $0x4b7dc3c4; WORD $0x10da       // vblendvpd    ymm3, ymm0, ymm10, ymm1
-	QUAD $0x000380c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 896]
-	LONG $0x571dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm12, ymm14
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d43c4; WORD $0x10e4       // vblendvpd    ymm12, ymm0, ymm12, ymm1
-	LONG $0x573dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm8, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d43c4; WORD $0x10c8       // vblendvpd    ymm9, ymm0, ymm8, ymm1
-	QUAD $0x0003a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 928]
-	LONG $0x5705c1c4; BYTE $0xce         // vxorpd    ymm1, ymm15, ymm14
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d43c4; WORD $0x10d7       // vblendvpd    ymm10, ymm0, ymm15, ymm1
-	LONG $0x5715c1c4; BYTE $0xce         // vxorpd    ymm1, ymm13, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d43c4; WORD $0x10c5       // vblendvpd    ymm8, ymm0, ymm13, ymm1
-	QUAD $0x0003c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 960]
-	QUAD $0x0000a024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 160]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10de       // vblendvpd    ymm11, ymm0, ymm6, ymm1
-	QUAD $0x0000c024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 192]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d63c4; WORD $0x10fe       // vblendvpd    ymm15, ymm0, ymm6, ymm1
-	QUAD $0x0003e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 992]
-	QUAD $0x00008024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 128]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10ee       // vblendvpd    ymm13, ymm0, ymm6, ymm1
-	QUAD $0x0000e024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 224]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7de3c4; WORD $0x10c6       // vblendvpd    ymm0, ymm0, ymm6, ymm1
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB3_9
-
-LBB3_10:
-	QUAD $0x0000802494297cc5; BYTE $0x00 // vmovaps    yword [rsp + 128], ymm10
-	WORD $0x854d; BYTE $0xc9             // test    r9, r9
-	LONG $0x6f7d41c4; BYTE $0xd4         // vmovdqa    ymm10, ymm12
-	LONG $0xe36f7dc5                     // vmovdqa    ymm12, ymm3
-	JE   LBB3_13
-	LONG $0xc7048d48                     // lea    rax, [rdi + 8*rax]
-	WORD $0xf749; BYTE $0xd9             // neg    r9
-	LONG $0x597d62c4; WORD $0x0075       // vpbroadcastq    ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */
-
-LBB3_12:
-	LONG $0x486ffec5; BYTE $0x20         // vmovdqu    ymm1, yword [rax + 32]
-	LONG $0xf76ffdc5                     // vmovdqa    ymm6, ymm7
-	LONG $0xfd6ffdc5                     // vmovdqa    ymm7, ymm5
-	LONG $0xec6ffdc5                     // vmovdqa    ymm5, ymm4
-	LONG $0x646ffdc5; WORD $0x6024       // vmovdqa    ymm4, yword [rsp + 96]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xd9ef8dc5                     // vpxor    ymm3, ymm14, ymm1
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0x6429fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm4
-	LONG $0x646ffdc5; WORD $0x2024       // vmovdqa    ymm4, yword [rsp + 32]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0x6429fdc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm4
-	LONG $0x086ffec5                     // vmovdqu    ymm1, yword [rax]
-	LONG $0x646ffdc5; WORD $0x4024       // vmovdqa    ymm4, yword [rsp + 64]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xd9ef8dc5                     // vpxor    ymm3, ymm14, ymm1
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0x6429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm4
-	LONG $0xe56ffdc5                     // vmovdqa    ymm4, ymm5
-	LONG $0xef6ffdc5                     // vmovdqa    ymm5, ymm7
-	LONG $0xfe6ffdc5                     // vmovdqa    ymm7, ymm6
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x586ffec5; BYTE $0x40         // vmovdqu    ymm3, yword [rax + 64]
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xd5ef8dc5                     // vpxor    ymm2, ymm14, ymm5
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b65e3c4; WORD $0x20ed       // vblendvpd    ymm5, ymm3, ymm5, ymm2
-	LONG $0x346ffdc5; BYTE $0x24         // vmovdqa    ymm6, yword [rsp]
-	LONG $0xd6ef8dc5                     // vpxor    ymm2, ymm14, ymm6
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b65e3c4; WORD $0x10f6       // vblendvpd    ymm6, ymm3, ymm6, ymm1
-	LONG $0x3429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm6
-	LONG $0x486ffec5; BYTE $0x60         // vmovdqu    ymm1, yword [rax + 96]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	LONG $0xdfef8dc5                     // vpxor    ymm3, ymm14, ymm7
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b75e3c4; WORD $0x30ff       // vblendvpd    ymm7, ymm1, ymm7, ymm3
-	LONG $0xef1dc1c4; BYTE $0xde         // vpxor    ymm3, ymm12, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	QUAD $0x00000080986ffec5             // vmovdqu    ymm3, yword [rax + 128]
-	LONG $0x4b7543c4; WORD $0x20e4       // vblendvpd    ymm12, ymm1, ymm12, ymm2
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xef2dc1c4; BYTE $0xd6         // vpxor    ymm2, ymm10, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b6543c4; WORD $0x20d2       // vblendvpd    ymm10, ymm3, ymm10, ymm2
-	LONG $0xef35c1c4; BYTE $0xd6         // vpxor    ymm2, ymm9, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b6543c4; WORD $0x10c9       // vblendvpd    ymm9, ymm3, ymm9, ymm1
-	QUAD $0x000000a0886ffec5             // vmovdqu    ymm1, yword [rax + 160]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	QUAD $0x00008024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 128]
-	LONG $0xdeef8dc5                     // vpxor    ymm3, ymm14, ymm6
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b75e3c4; WORD $0x30f6       // vblendvpd    ymm6, ymm1, ymm6, ymm3
-	QUAD $0x00008024b429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm6
-	LONG $0xef3dc1c4; BYTE $0xde         // vpxor    ymm3, ymm8, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	QUAD $0x000000c0986ffec5             // vmovdqu    ymm3, yword [rax + 192]
-	LONG $0x4b7543c4; WORD $0x20c0       // vblendvpd    ymm8, ymm1, ymm8, ymm2
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xef25c1c4; BYTE $0xd6         // vpxor    ymm2, ymm11, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b6543c4; WORD $0x20db       // vblendvpd    ymm11, ymm3, ymm11, ymm2
-	LONG $0xef05c1c4; BYTE $0xd6         // vpxor    ymm2, ymm15, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b6543c4; WORD $0x10ff       // vblendvpd    ymm15, ymm3, ymm15, ymm1
-	QUAD $0x000000e0886ffec5             // vmovdqu    ymm1, yword [rax + 224]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	LONG $0xef15c1c4; BYTE $0xde         // vpxor    ymm3, ymm13, ymm14
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b7543c4; WORD $0x30ed       // vblendvpd    ymm13, ymm1, ymm13, ymm3
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20c0       // vblendvpd    ymm0, ymm1, ymm0, ymm2
-	LONG $0x01000548; WORD $0x0000       // add    rax, 256
-	WORD $0xff49; BYTE $0xc1             // inc    r9
-	JNE  LBB3_12
-
-LBB3_13:
-	LONG $0x597d62c4; WORD $0x0075       // vpbroadcastq    ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */
-	LONG $0x1c6ffdc5; BYTE $0x24         // vmovdqa    ymm3, yword [rsp]
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xef05c1c4; BYTE $0xd6         // vpxor    ymm2, ymm15, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b05e3c4; WORD $0x10cb       // vblendvpd    ymm1, ymm15, ymm3, ymm1
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xef35c1c4; BYTE $0xde         // vpxor    ymm3, ymm9, ymm14
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b35e3c4; WORD $0x20d4       // vblendvpd    ymm2, ymm9, ymm4, ymm2
-	LONG $0xef1dc1c4; BYTE $0xde         // vpxor    ymm3, ymm12, ymm14
-	LONG $0xc8ef0dc5                     // vpxor    ymm9, ymm14, ymm0
-	LONG $0x3765c2c4; BYTE $0xd9         // vpcmpgtq    ymm3, ymm3, ymm9
-	LONG $0x4b7dc3c4; WORD $0x30c4       // vblendvpd    ymm0, ymm0, ymm12, ymm3
-	LONG $0x646ffdc5; WORD $0x2024       // vmovdqa    ymm4, yword [rsp + 32]
-	LONG $0xdcef8dc5                     // vpxor    ymm3, ymm14, ymm4
-	LONG $0xef3d41c4; BYTE $0xce         // vpxor    ymm9, ymm8, ymm14
-	LONG $0x3765c2c4; BYTE $0xd9         // vpcmpgtq    ymm3, ymm3, ymm9
-	LONG $0x4b3de3c4; WORD $0x30dc       // vblendvpd    ymm3, ymm8, ymm4, ymm3
-	LONG $0xf3578dc5                     // vxorpd    ymm6, ymm14, ymm3
-	LONG $0xc8570dc5                     // vxorpd    ymm9, ymm14, ymm0
-	LONG $0x374dc2c4; BYTE $0xf1         // vpcmpgtq    ymm6, ymm6, ymm9
-	LONG $0x4b7de3c4; WORD $0x60c3       // vblendvpd    ymm0, ymm0, ymm3, ymm6
-	LONG $0xda578dc5                     // vxorpd    ymm3, ymm14, ymm2
-	LONG $0xf1578dc5                     // vxorpd    ymm6, ymm14, ymm1
-	LONG $0x3765e2c4; BYTE $0xde         // vpcmpgtq    ymm3, ymm3, ymm6
-	LONG $0x4b75e3c4; WORD $0x30ca       // vblendvpd    ymm1, ymm1, ymm2, ymm3
-	LONG $0xd1578dc5                     // vxorpd    ymm2, ymm14, ymm1
-	LONG $0xd8578dc5                     // vxorpd    ymm3, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b7de3c4; WORD $0x20c1       // vblendvpd    ymm0, ymm0, ymm1, ymm2
-	LONG $0x197de3c4; WORD $0x01c1       // vextractf128    xmm1, ymm0, 1
-	LONG $0xd15789c5                     // vxorpd    xmm2, xmm14, xmm1
-	LONG $0xd85789c5                     // vxorpd    xmm3, xmm14, xmm0
-	LONG $0x3761e2c4; BYTE $0xd2         // vpcmpgtq    xmm2, xmm3, xmm2
-	LONG $0x4b71e3c4; WORD $0x20c0       // vblendvpd    xmm0, xmm1, xmm0, xmm2
-	LONG $0x0479e3c4; WORD $0x4ec8       // vpermilps    xmm1, xmm0, 78
-	LONG $0xd05789c5                     // vxorpd    xmm2, xmm14, xmm0
-	LONG $0xd95789c5                     // vxorpd    xmm3, xmm14, xmm1
-	LONG $0x3769e2c4; BYTE $0xd3         // vpcmpgtq    xmm2, xmm2, xmm3
-	LONG $0x4b71e3c4; WORD $0x20c0       // vblendvpd    xmm0, xmm1, xmm0, xmm2
-	LONG $0xcdef8dc5                     // vpxor    ymm1, ymm14, ymm5
-	LONG $0xef25c1c4; BYTE $0xd6         // vpxor    ymm2, ymm11, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b25e3c4; WORD $0x10cd       // vblendvpd    ymm1, ymm11, ymm5, ymm1
-	LONG $0x646ffdc5; WORD $0x4024       // vmovdqa    ymm4, yword [rsp + 64]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xef2dc1c4; BYTE $0xde         // vpxor    ymm3, ymm10, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b2de3c4; WORD $0x20d4       // vblendvpd    ymm2, ymm10, ymm4, ymm2
-	LONG $0xdfef8dc5                     // vpxor    ymm3, ymm14, ymm7
-	LONG $0xef15c1c4; BYTE $0xee         // vpxor    ymm5, ymm13, ymm14
-	LONG $0x3755e2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm5, ymm3
-	LONG $0x4b15e3c4; WORD $0x30df       // vblendvpd    ymm3, ymm13, ymm7, ymm3
-	LONG $0x746ffdc5; WORD $0x6024       // vmovdqa    ymm6, yword [rsp + 96]
-	LONG $0xe6ef8dc5                     // vpxor    ymm4, ymm14, ymm6
-	QUAD $0x00008024bc6ffdc5; BYTE $0x00 // vmovdqa    ymm7, yword [rsp + 128]
-	LONG $0xefef8dc5                     // vpxor    ymm5, ymm14, ymm7
-	LONG $0x3755e2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm5, ymm4
-	LONG $0x4b45e3c4; WORD $0x40e6       // vblendvpd    ymm4, ymm7, ymm6, ymm4
-	LONG $0xec578dc5                     // vxorpd    ymm5, ymm14, ymm4
-	LONG $0xf3578dc5                     // vxorpd    ymm6, ymm14, ymm3
-	LONG $0x374de2c4; BYTE $0xed         // vpcmpgtq    ymm5, ymm6, ymm5
-	LONG $0x4b65e3c4; WORD $0x50dc       // vblendvpd    ymm3, ymm3, ymm4, ymm5
-	LONG $0xe2578dc5                     // vxorpd    ymm4, ymm14, ymm2
-	LONG $0xe9578dc5                     // vxorpd    ymm5, ymm14, ymm1
-	LONG $0x3755e2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm5, ymm4
-	LONG $0x4b75e3c4; WORD $0x40ca       // vblendvpd    ymm1, ymm1, ymm2, ymm4
-	LONG $0xd1578dc5                     // vxorpd    ymm2, ymm14, ymm1
-	LONG $0xe3578dc5                     // vxorpd    ymm4, ymm14, ymm3
-	LONG $0x375de2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm4, ymm2
-	LONG $0x4b65e3c4; WORD $0x20c9       // vblendvpd    ymm1, ymm3, ymm1, ymm2
-	LONG $0x197de3c4; WORD $0x01ca       // vextractf128    xmm2, ymm1, 1
-	LONG $0xd95789c5                     // vxorpd    xmm3, xmm14, xmm1
-	LONG $0xe25789c5                     // vxorpd    xmm4, xmm14, xmm2
-	LONG $0x3759e2c4; BYTE $0xdb         // vpcmpgtq    xmm3, xmm4, xmm3
-	LONG $0x4b69e3c4; WORD $0x30c9       // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x0479e3c4; WORD $0x4ed1       // vpermilps    xmm2, xmm1, 78
-	LONG $0xd95789c5                     // vxorpd    xmm3, xmm14, xmm1
-	LONG $0xe25789c5                     // vxorpd    xmm4, xmm14, xmm2
-	LONG $0x3759e2c4; BYTE $0xdb         // vpcmpgtq    xmm3, xmm4, xmm3
-	LONG $0x4b69e3c4; WORD $0x30c9       // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x7ef9e1c4; BYTE $0xc6         // vmovq    rsi, xmm0
-	LONG $0x7ef9c1c4; BYTE $0xc9         // vmovq    r9, xmm1
-	WORD $0x394d; BYTE $0xc3             // cmp    r11, r8
-	JE   LBB3_14
-
-LBB3_4:
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
-
-LBB3_5:
-	LONG $0xdf348b4a         // mov    rsi, qword [rdi + 8*r11]
-	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
-	LONG $0xce430f4c         // cmovae    r9, rsi
-	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
-	LONG $0xf0470f48         // cmova    rsi, rax
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
-	JNE  LBB3_5
-
-LBB3_14:
-	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
-	WORD $0x894c; BYTE $0x0a // mov    qword [rdx], r9
-	SUBQ $8, SP
-	VZEROUPPER
-	RET
diff --git a/go/parquet/internal/utils/min_max_avx2.go b/go/parquet/internal/utils/min_max_avx2_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/min_max_avx2.go
rename to go/parquet/internal/utils/min_max_avx2_amd64.go
diff --git a/go/parquet/internal/utils/min_max_avx2_amd64.s b/go/parquet/internal/utils/min_max_avx2_amd64.s
new file mode 100644
index 00000000000..a54758ba1ed
--- /dev/null
+++ b/go/parquet/internal/utils/min_max_avx2_amd64.s
@@ -0,0 +1,443 @@
+//+build !noasm !appengine
+// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
+
+DATA LCDATA1<>+0x000(SB)/8, $0x7fffffff80000000
+GLOBL LCDATA1<>(SB), 8, $8
+
+TEXT ·_int32_max_min_avx2(SB), $0-32
+
+	MOVQ values+0(FP), DI
+	MOVQ length+8(FP), SI
+	MOVQ minout+16(FP), DX
+	MOVQ maxout+24(FP), CX
+	LEAQ LCDATA1<>(SB), BP
+
+	WORD $0xf685                   // test    esi, esi
+	JLE  LBB0_1
+	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
+	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
+	JA   LBB0_4
+	LONG $0x0000ba41; WORD $0x8000 // mov    r10d, -2147483648
+	LONG $0xffffffb8; BYTE $0x7f   // mov    eax, 2147483647
+	WORD $0x3145; BYTE $0xc9       // xor    r9d, r9d
+	JMP  LBB0_7
+
+LBB0_1:
+	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647
+	LONG $0x000000be; BYTE $0x80 // mov    esi, -2147483648
+	JMP  LBB0_8
+
+LBB0_4:
+	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
+	LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd    ymm4, dword 0[rbp] /* [rip + .LCPI0_0] */
+	LONG $0xe0e18341               // and    r9d, -32
+	LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd    ymm0, dword 4[rbp] /* [rip + .LCPI0_1] */
+	WORD $0xc031                   // xor    eax, eax
+	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
+	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
+	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
+	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
+	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
+	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
+
+LBB0_5:
+	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
+	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
+	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
+	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
+	LONG $0x397dc2c4; BYTE $0xc0   // vpminsd    ymm0, ymm0, ymm8
+	LONG $0x3975c2c4; BYTE $0xc9   // vpminsd    ymm1, ymm1, ymm9
+	LONG $0x396dc2c4; BYTE $0xd2   // vpminsd    ymm2, ymm2, ymm10
+	LONG $0x3965c2c4; BYTE $0xdb   // vpminsd    ymm3, ymm3, ymm11
+	LONG $0x3d5dc2c4; BYTE $0xe0   // vpmaxsd    ymm4, ymm4, ymm8
+	LONG $0x3d55c2c4; BYTE $0xe9   // vpmaxsd    ymm5, ymm5, ymm9
+	LONG $0x3d4dc2c4; BYTE $0xf2   // vpmaxsd    ymm6, ymm6, ymm10
+	LONG $0x3d45c2c4; BYTE $0xfb   // vpmaxsd    ymm7, ymm7, ymm11
+	LONG $0x20c08348               // add    rax, 32
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
+	JNE  LBB0_5
+	LONG $0x3d5de2c4; BYTE $0xe5   // vpmaxsd    ymm4, ymm4, ymm5
+	LONG $0x3d5de2c4; BYTE $0xe6   // vpmaxsd    ymm4, ymm4, ymm6
+	LONG $0x3d5de2c4; BYTE $0xe7   // vpmaxsd    ymm4, ymm4, ymm7
+	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
+	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
+	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
+	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
+	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
+	LONG $0x397de2c4; BYTE $0xc1   // vpminsd    ymm0, ymm0, ymm1
+	LONG $0x397de2c4; BYTE $0xc2   // vpminsd    ymm0, ymm0, ymm2
+	LONG $0x397de2c4; BYTE $0xc3   // vpminsd    ymm0, ymm0, ymm3
+	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
+	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
+	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
+	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
+	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
+	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
+	LONG $0xc07ef9c5               // vmovd    eax, xmm0
+	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB0_8
+
+LBB0_7:
+	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
+	WORD $0xf039             // cmp    eax, esi
+	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
+	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
+	LONG $0xf24d0f41         // cmovge    esi, r10d
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB0_7
+
+LBB0_8:
+	WORD $0x3189 // mov    dword [rcx], esi
+	WORD $0x0289 // mov    dword [rdx], eax
+	VZEROUPPER
+	RET
+
+TEXT ·_uint32_max_min_avx2(SB), $0-32
+
+	MOVQ values+0(FP), DI
+	MOVQ length+8(FP), SI
+	MOVQ minout+16(FP), DX
+	MOVQ maxout+24(FP), CX
+
+	WORD $0xf685                 // test    esi, esi
+	JLE  LBB1_1
+	WORD $0x8941; BYTE $0xf0     // mov    r8d, esi
+	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
+	JA   LBB1_4
+	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
+	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
+	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
+	JMP  LBB1_7
+
+LBB1_1:
+	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
+	WORD $0xf631                 // xor    esi, esi
+	JMP  LBB1_8
+
+LBB1_4:
+	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
+	LONG $0xe0e18341         // and    r9d, -32
+	LONG $0xe4efd9c5         // vpxor    xmm4, xmm4, xmm4
+	LONG $0xc076fdc5         // vpcmpeqd    ymm0, ymm0, ymm0
+	WORD $0xc031             // xor    eax, eax
+	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
+	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
+	LONG $0xdb76e5c5         // vpcmpeqd    ymm3, ymm3, ymm3
+	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
+	LONG $0xf6efc9c5         // vpxor    xmm6, xmm6, xmm6
+	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7
+
+LBB1_5:
+	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
+	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
+	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
+	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
+	LONG $0x3b7dc2c4; BYTE $0xc0   // vpminud    ymm0, ymm0, ymm8
+	LONG $0x3b75c2c4; BYTE $0xc9   // vpminud    ymm1, ymm1, ymm9
+	LONG $0x3b6dc2c4; BYTE $0xd2   // vpminud    ymm2, ymm2, ymm10
+	LONG $0x3b65c2c4; BYTE $0xdb   // vpminud    ymm3, ymm3, ymm11
+	LONG $0x3f5dc2c4; BYTE $0xe0   // vpmaxud    ymm4, ymm4, ymm8
+	LONG $0x3f55c2c4; BYTE $0xe9   // vpmaxud    ymm5, ymm5, ymm9
+	LONG $0x3f4dc2c4; BYTE $0xf2   // vpmaxud    ymm6, ymm6, ymm10
+	LONG $0x3f45c2c4; BYTE $0xfb   // vpmaxud    ymm7, ymm7, ymm11
+	LONG $0x20c08348               // add    rax, 32
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
+	JNE  LBB1_5
+	LONG $0x3f5de2c4; BYTE $0xe5   // vpmaxud    ymm4, ymm4, ymm5
+	LONG $0x3f5de2c4; BYTE $0xe6   // vpmaxud    ymm4, ymm4, ymm6
+	LONG $0x3f5de2c4; BYTE $0xe7   // vpmaxud    ymm4, ymm4, ymm7
+	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
+	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
+	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
+	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
+	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
+	LONG $0x3b7de2c4; BYTE $0xc1   // vpminud    ymm0, ymm0, ymm1
+	LONG $0x3b7de2c4; BYTE $0xc2   // vpminud    ymm0, ymm0, ymm2
+	LONG $0x3b7de2c4; BYTE $0xc3   // vpminud    ymm0, ymm0, ymm3
+	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
+	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
+	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
+	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
+	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
+	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
+	LONG $0xc07ef9c5               // vmovd    eax, xmm0
+	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB1_8
+
+LBB1_7:
+	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
+	WORD $0xf039             // cmp    eax, esi
+	WORD $0x430f; BYTE $0xc6 // cmovae    eax, esi
+	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
+	LONG $0xf2470f41         // cmova    esi, r10d
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB1_7
+
+LBB1_8:
+	WORD $0x3189 // mov    dword [rcx], esi
+	WORD $0x0289 // mov    dword [rdx], eax
+	VZEROUPPER
+	RET
+
+DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000
+DATA LCDATA2<>+0x008(SB)/8, $0x7fffffffffffffff
+GLOBL LCDATA2<>(SB), 8, $16
+
+TEXT ·_int64_max_min_avx2(SB), $0-32
+
+	MOVQ values+0(FP), DI
+	MOVQ length+8(FP), SI
+	MOVQ minout+16(FP), DX
+	MOVQ maxout+24(FP), CX
+	LEAQ LCDATA2<>(SB), BP
+
+	QUAD $0xffffffffffffb848; WORD $0x7fff // mov    rax, 9223372036854775807
+	WORD $0xf685                           // test    esi, esi
+	JLE  LBB2_1
+	WORD $0x8941; BYTE $0xf0               // mov    r8d, esi
+	WORD $0xfe83; BYTE $0x0f               // cmp    esi, 15
+	JA   LBB2_4
+	LONG $0x01508d4c                       // lea    r10, [rax + 1]
+	WORD $0x3145; BYTE $0xc9               // xor    r9d, r9d
+	JMP  LBB2_7
+
+LBB2_1:
+	LONG $0x01708d48 // lea    rsi, [rax + 1]
+	JMP  LBB2_8
+
+LBB2_4:
+	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
+	LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq    ymm4, qword 0[rbp] /* [rip + .LCPI2_0] */
+	LONG $0xf0e18341               // and    r9d, -16
+	LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq    ymm0, qword 8[rbp] /* [rip + .LCPI2_1] */
+	WORD $0xc031                   // xor    eax, eax
+	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
+	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
+	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
+	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
+	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
+	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
+
+LBB2_5:
+	LONG $0x046f7ec5; BYTE $0xc7   // vmovdqu    ymm8, yword [rdi + 8*rax]
+	LONG $0x373d62c4; BYTE $0xc8   // vpcmpgtq    ymm9, ymm8, ymm0
+	LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd    ymm0, ymm8, ymm0, ymm9
+	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
+	LONG $0x373562c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm9, ymm3
+	LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd    ymm3, ymm9, ymm3, ymm10
+	LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu    ymm10, yword [rdi + 8*rax + 64]
+	LONG $0x372d62c4; BYTE $0xda   // vpcmpgtq    ymm11, ymm10, ymm2
+	LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd    ymm2, ymm10, ymm2, ymm11
+	LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 96]
+	LONG $0x372562c4; BYTE $0xe1   // vpcmpgtq    ymm12, ymm11, ymm1
+	LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd    ymm1, ymm11, ymm1, ymm12
+	LONG $0x375d42c4; BYTE $0xe0   // vpcmpgtq    ymm12, ymm4, ymm8
+	LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd    ymm4, ymm8, ymm4, ymm12
+	LONG $0x374542c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm7, ymm9
+	LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd    ymm7, ymm9, ymm7, ymm8
+	LONG $0x374d42c4; BYTE $0xc2   // vpcmpgtq    ymm8, ymm6, ymm10
+	LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd    ymm6, ymm10, ymm6, ymm8
+	LONG $0x375542c4; BYTE $0xc3   // vpcmpgtq    ymm8, ymm5, ymm11
+	LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm11, ymm5, ymm8
+	LONG $0x10c08348               // add    rax, 16
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
+	JNE  LBB2_5
+	LONG $0x375d62c4; BYTE $0xc7   // vpcmpgtq    ymm8, ymm4, ymm7
+	LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd    ymm4, ymm7, ymm4, ymm8
+	LONG $0x375de2c4; BYTE $0xfe   // vpcmpgtq    ymm7, ymm4, ymm6
+	LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd    ymm4, ymm6, ymm4, ymm7
+	LONG $0x375de2c4; BYTE $0xf5   // vpcmpgtq    ymm6, ymm4, ymm5
+	LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd    ymm4, ymm5, ymm4, ymm6
+	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
+	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
+	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
+	LONG $0x0479e3c4; WORD $0x4eec // vpermilps    xmm5, xmm4, 78
+	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
+	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
+	LONG $0x7ef9c1c4; BYTE $0xe2   // vmovq    r10, xmm4
+	LONG $0x3765e2c4; BYTE $0xe0   // vpcmpgtq    ymm4, ymm3, ymm0
+	LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd    ymm0, ymm3, ymm0, ymm4
+	LONG $0x376de2c4; BYTE $0xd8   // vpcmpgtq    ymm3, ymm2, ymm0
+	LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd    ymm0, ymm2, ymm0, ymm3
+	LONG $0x3775e2c4; BYTE $0xd0   // vpcmpgtq    ymm2, ymm1, ymm0
+	LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd    ymm0, ymm1, ymm0, ymm2
+	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
+	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
+	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
+	LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps    xmm1, xmm0, 78
+	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
+	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
+	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
+	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB2_8
+
+LBB2_7:
+	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
+	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
+	LONG $0xc64f0f48         // cmovg    rax, rsi
+	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
+	LONG $0xf24d0f49         // cmovge    rsi, r10
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB2_7
+
+LBB2_8:
+	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
+	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
+	VZEROUPPER
+	RET
+
+DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000
+GLOBL LCDATA3<>(SB), 8, $8
+
+TEXT ·_uint64_max_min_avx2(SB), $0-32
+
+	MOVQ values+0(FP), DI
+	MOVQ length+8(FP), SI
+	MOVQ minout+16(FP), DX
+	MOVQ maxout+24(FP), CX
+	LEAQ LCDATA3<>(SB), BP
+
+	WORD $0xf685                               // test    esi, esi
+	JLE  LBB3_1
+	WORD $0x8941; BYTE $0xf0                   // mov    r8d, esi
+	WORD $0xfe83; BYTE $0x0f                   // cmp    esi, 15
+	JA   LBB3_4
+	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
+	WORD $0x3145; BYTE $0xc9                   // xor    r9d, r9d
+	WORD $0x3145; BYTE $0xd2                   // xor    r10d, r10d
+	JMP  LBB3_7
+
+LBB3_1:
+	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
+	WORD $0xf631                               // xor    esi, esi
+	JMP  LBB3_8
+
+LBB3_4:
+	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
+	LONG $0xf0e18341               // and    r9d, -16
+	LONG $0xedefd1c5               // vpxor    xmm5, xmm5, xmm5
+	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
+	WORD $0xc031                   // xor    eax, eax
+	LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq    ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */
+	LONG $0xe476ddc5               // vpcmpeqd    ymm4, ymm4, ymm4
+	LONG $0xdb76e5c5               // vpcmpeqd    ymm3, ymm3, ymm3
+	LONG $0xd276edc5               // vpcmpeqd    ymm2, ymm2, ymm2
+	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
+	LONG $0xffefc1c5               // vpxor    xmm7, xmm7, xmm7
+	LONG $0xf6efc9c5               // vpxor    xmm6, xmm6, xmm6
+
+LBB3_5:
+	LONG $0x0c6f7ec5; BYTE $0xc7   // vmovdqu    ymm9, yword [rdi + 8*rax]
+	LONG $0xd0ef75c5               // vpxor    ymm10, ymm1, ymm0
+	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
+	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
+	LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd    ymm1, ymm9, ymm1, ymm10
+	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
+	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
+	LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd    ymm5, ymm9, ymm5, ymm10
+	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
+	LONG $0xd0ef5dc5               // vpxor    ymm10, ymm4, ymm0
+	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
+	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
+	LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd    ymm4, ymm9, ymm4, ymm10
+	LONG $0xd0ef3dc5               // vpxor    ymm10, ymm8, ymm0
+	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
+	LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 64]
+	LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd    ymm8, ymm9, ymm8, ymm10
+	LONG $0xc8ef65c5               // vpxor    ymm9, ymm3, ymm0
+	LONG $0xd0ef25c5               // vpxor    ymm10, ymm11, ymm0
+	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
+	LONG $0x4b25e3c4; WORD $0x90db // vblendvpd    ymm3, ymm11, ymm3, ymm9
+	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
+	LONG $0x373542c4; BYTE $0xca   // vpcmpgtq    ymm9, ymm9, ymm10
+	LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd    ymm7, ymm11, ymm7, ymm9
+	LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 96]
+	LONG $0xd0ef6dc5               // vpxor    ymm10, ymm2, ymm0
+	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
+	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
+	LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd    ymm2, ymm9, ymm2, ymm10
+	LONG $0xd0ef4dc5               // vpxor    ymm10, ymm6, ymm0
+	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
+	LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd    ymm6, ymm9, ymm6, ymm10
+	LONG $0x10c08348               // add    rax, 16
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
+	JNE  LBB3_5
+	LONG $0xc8ef3dc5               // vpxor    ymm9, ymm8, ymm0
+	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
+	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
+	LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd    ymm5, ymm8, ymm5, ymm9
+	LONG $0xc05755c5               // vxorpd    ymm8, ymm5, ymm0
+	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
+	LONG $0x373d42c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm8, ymm9
+	LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm7, ymm5, ymm8
+	LONG $0xf857d5c5               // vxorpd    ymm7, ymm5, ymm0
+	LONG $0xc0ef4dc5               // vpxor    ymm8, ymm6, ymm0
+	LONG $0x3745c2c4; BYTE $0xf8   // vpcmpgtq    ymm7, ymm7, ymm8
+	LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd    ymm5, ymm6, ymm5, ymm7
+	LONG $0x197de3c4; WORD $0x01ee // vextractf128    xmm6, ymm5, 1
+	LONG $0xc05749c5               // vxorpd    xmm8, xmm6, xmm0
+	LONG $0xf857d1c5               // vxorpd    xmm7, xmm5, xmm0
+	LONG $0x3741c2c4; BYTE $0xf8   // vpcmpgtq    xmm7, xmm7, xmm8
+	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
+	LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps    xmm6, xmm5, 78
+	LONG $0xc05751c5               // vxorpd    xmm8, xmm5, xmm0
+	LONG $0xf857c9c5               // vxorpd    xmm7, xmm6, xmm0
+	LONG $0x3739e2c4; BYTE $0xff   // vpcmpgtq    xmm7, xmm8, xmm7
+	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
+	LONG $0xf0eff5c5               // vpxor    ymm6, ymm1, ymm0
+	LONG $0xf8efddc5               // vpxor    ymm7, ymm4, ymm0
+	LONG $0x3745e2c4; BYTE $0xf6   // vpcmpgtq    ymm6, ymm7, ymm6
+	LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd    ymm1, ymm4, ymm1, ymm6
+	LONG $0xe057f5c5               // vxorpd    ymm4, ymm1, ymm0
+	LONG $0xf0efe5c5               // vpxor    ymm6, ymm3, ymm0
+	LONG $0x374de2c4; BYTE $0xe4   // vpcmpgtq    ymm4, ymm6, ymm4
+	LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd    ymm1, ymm3, ymm1, ymm4
+	LONG $0x7ef9c1c4; BYTE $0xea   // vmovq    r10, xmm5
+	LONG $0xd857f5c5               // vxorpd    ymm3, ymm1, ymm0
+	LONG $0xe0efedc5               // vpxor    ymm4, ymm2, ymm0
+	LONG $0x375de2c4; BYTE $0xdb   // vpcmpgtq    ymm3, ymm4, ymm3
+	LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd    ymm1, ymm2, ymm1, ymm3
+	LONG $0x197de3c4; WORD $0x01ca // vextractf128    xmm2, ymm1, 1
+	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
+	LONG $0xe057e9c5               // vxorpd    xmm4, xmm2, xmm0
+	LONG $0x3759e2c4; BYTE $0xdb   // vpcmpgtq    xmm3, xmm4, xmm3
+	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
+	LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps    xmm2, xmm1, 78
+	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
+	LONG $0xc057e9c5               // vxorpd    xmm0, xmm2, xmm0
+	LONG $0x3779e2c4; BYTE $0xc3   // vpcmpgtq    xmm0, xmm0, xmm3
+	LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd    xmm0, xmm2, xmm1, xmm0
+	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
+	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB3_8
+
+LBB3_7:
+	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
+	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
+	LONG $0xc6430f48         // cmovae    rax, rsi
+	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
+	LONG $0xf2470f49         // cmova    rsi, r10
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB3_7
+
+LBB3_8:
+	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
+	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
+	VZEROUPPER
+	RET
diff --git a/go/parquet/internal/utils/min_max_sse4.go b/go/parquet/internal/utils/min_max_sse4_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/min_max_sse4.go
rename to go/parquet/internal/utils/min_max_sse4_amd64.go
diff --git a/go/parquet/internal/utils/min_max_sse4.s b/go/parquet/internal/utils/min_max_sse4_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/min_max_sse4.s
rename to go/parquet/internal/utils/min_max_sse4_amd64.s
diff --git a/go/parquet/internal/utils/unpack_bool_arm64.go b/go/parquet/internal/utils/unpack_bool_arm64.go
new file mode 100644
index 00000000000..d833c2b9d62
--- /dev/null
+++ b/go/parquet/internal/utils/unpack_bool_arm64.go
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+// BytesToBools when built with the noasm tag will direct to the pure go implementation
+// for converting a bitmap to a slice of bools
+func BytesToBools(in []byte, out []bool) {
+	bytesToBoolsGo(in, out)
+}
diff --git a/go/parquet/internal/utils/unpack_bool_avx2.s b/go/parquet/internal/utils/unpack_bool_avx2.s
deleted file mode 100644
index 99c2cc88265..00000000000
--- a/go/parquet/internal/utils/unpack_bool_avx2.s
+++ /dev/null
@@ -1,6961 +0,0 @@
-//+build !noasm !appengine
-// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
-
-DATA LCDATA1<>+0x000(SB)/8, $0x0000001900000018
-DATA LCDATA1<>+0x008(SB)/8, $0x0000001b0000001a
-DATA LCDATA1<>+0x010(SB)/8, $0x0000001d0000001c
-DATA LCDATA1<>+0x018(SB)/8, $0x0000001f0000001e
-DATA LCDATA1<>+0x020(SB)/8, $0x0000001100000010
-DATA LCDATA1<>+0x028(SB)/8, $0x0000001300000012
-DATA LCDATA1<>+0x030(SB)/8, $0x0000001500000014
-DATA LCDATA1<>+0x038(SB)/8, $0x0000001700000016
-DATA LCDATA1<>+0x040(SB)/8, $0x0000000900000008
-DATA LCDATA1<>+0x048(SB)/8, $0x0000000b0000000a
-DATA LCDATA1<>+0x050(SB)/8, $0x0000000d0000000c
-DATA LCDATA1<>+0x058(SB)/8, $0x0000000f0000000e
-DATA LCDATA1<>+0x060(SB)/8, $0x0000000100000000
-DATA LCDATA1<>+0x068(SB)/8, $0x0000000300000002
-DATA LCDATA1<>+0x070(SB)/8, $0x0000000500000004
-DATA LCDATA1<>+0x078(SB)/8, $0x0000000700000006
-DATA LCDATA1<>+0x080(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x088(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x090(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x098(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x0a0(SB)/8, $0x0000000000000001
-DATA LCDATA1<>+0x0a8(SB)/8, $0x0000000000000002
-DATA LCDATA1<>+0x0b0(SB)/8, $0x0000000000000003
-DATA LCDATA1<>+0x0b8(SB)/8, $0x0000000000000004
-DATA LCDATA1<>+0x0c0(SB)/8, $0x0000000000000005
-DATA LCDATA1<>+0x0c8(SB)/8, $0x0000000000000006
-DATA LCDATA1<>+0x0d0(SB)/8, $0x0000000000000007
-DATA LCDATA1<>+0x0d8(SB)/8, $0x0000000000000020
-GLOBL LCDATA1<>(SB), 8, $224
-
-TEXT ·_bytes_to_bools_avx2(SB), $1000-32
-
-	MOVQ in+0(FP), DI
-	MOVQ len+8(FP), SI
-	MOVQ out+16(FP), DX
-	MOVQ outlen+24(FP), CX
-	MOVQ SP, BP
-	ADDQ $32, SP
-	ANDQ $-32, SP
-	MOVQ BP, 960(SP)
-	LEAQ LCDATA1<>(SB), BP
-
-	WORD $0xf685             // test    esi, esi
-	JLE  LBB0_1051
-	WORD $0x8941; BYTE $0xc9 // mov    r9d, ecx
-	WORD $0x8949; BYTE $0xd0 // mov    r8, rdx
-	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
-	WORD $0xfe83; BYTE $0x20 // cmp    esi, 32
-	JAE  LBB0_3
-
-LBB0_2:
-	WORD $0x3145; BYTE $0xe4 // xor    r12d, r12d
-
-LBB0_1055:
-	QUAD $0x00000000e50c8d42 // lea    ecx, [8*r12]
-	JMP  LBB0_1057
-
-LBB0_1056:
-	LONG $0x01c48349         // add    r12, 1
-	WORD $0xc183; BYTE $0x08 // add    ecx, 8
-	WORD $0x394d; BYTE $0xe2 // cmp    r10, r12
-	JE   LBB0_1051
-
-LBB0_1057:
-	WORD $0xca89                 // mov    edx, ecx
-	WORD $0xc989                 // mov    ecx, ecx
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x14b60f42; BYTE $0x27 // movzx    edx, byte [rdi + r12]
-	WORD $0xe280; BYTE $0x01     // and    dl, 1
-	LONG $0x08148841             // mov    byte [r8 + rcx], dl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x01ca8348             // or    rdx, 1
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebd0                 // shr    bl, 1
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x02ca8348             // or    rdx, 2
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x02     // shr    bl, 2
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x03ca8348             // or    rdx, 3
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x03     // shr    bl, 3
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x04ca8348             // or    rdx, 4
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x04     // shr    bl, 4
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x05ca8348             // or    rdx, 5
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x05     // shr    bl, 5
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x06ca8348             // or    rdx, 6
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x06     // shr    bl, 6
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x07ca8348             // or    rdx, 7
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x07     // shr    bl, 7
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	JMP  LBB0_1056
-
-LBB0_3:
-	LONG $0x244c8944; BYTE $0x10 // mov    dword [rsp + 16], r9d
-	LONG $0x2454894c; BYTE $0x30 // mov    qword [rsp + 48], r10
-	LONG $0xff728d49             // lea    rsi, [r10 - 1]
-	LONG $0x000008b9; BYTE $0x00 // mov    ecx, 8
-	WORD $0xf089                 // mov    eax, esi
-	WORD $0xe1f7                 // mul    ecx
-	LONG $0xd6900f41             // seto    r14b
-	WORD $0x8948; BYTE $0xf3     // mov    rbx, rsi
-	LONG $0x20ebc148             // shr    rbx, 32
-	LONG $0x06488d49             // lea    rcx, [r8 + 6]
-	LONG $0x000008ba; BYTE $0x00 // mov    edx, 8
-	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
-	WORD $0xf748; BYTE $0xe2     // mul    rdx
-	LONG $0xd6900f40             // seto    sil
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	WORD $0x920f; BYTE $0xd2     // setb    dl
-	LONG $0x07488d49             // lea    rcx, [r8 + 7]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd5920f41             // setb    r13b
-	LONG $0x05488d49             // lea    rcx, [r8 + 5]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd1920f41             // setb    r9b
-	LONG $0x04488d49             // lea    rcx, [r8 + 4]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd7920f41             // setb    r15b
-	LONG $0x03488d49             // lea    rcx, [r8 + 3]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd3920f41             // setb    r11b
-	LONG $0x02488d49             // lea    rcx, [r8 + 2]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd2920f41             // setb    r10b
-	LONG $0x01488d49             // lea    rcx, [r8 + 1]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	WORD $0x920f; BYTE $0xd1     // setb    cl
-	WORD $0x014c; BYTE $0xc0     // add    rax, r8
-	WORD $0x920f; BYTE $0xd0     // setb    al
-	WORD $0x3145; BYTE $0xe4     // xor    r12d, r12d
-	WORD $0x8548; BYTE $0xdb     // test    rbx, rbx
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xf6     // test    r14b, r14b
-	JNE  LBB0_1052
-	WORD $0xd284                 // test    dl, dl
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xed     // test    r13b, r13b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xff     // test    r15b, r15b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xdb     // test    r11b, r11b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xd2     // test    r10b, r10b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	LONG $0x24548b4c; BYTE $0x30 // mov    r10, qword [rsp + 48]
-	JNE  LBB0_1054
-	WORD $0xc984                 // test    cl, cl
-	JNE  LBB0_1054
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	JNE  LBB0_1055
-	WORD $0xc084                 // test    al, al
-	JNE  LBB0_1055
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1055
-	LONG $0xd0048d4b             // lea    rax, [r8 + 8*r10]
-	WORD $0x3948; BYTE $0xf8     // cmp    rax, rdi
-	JBE  LBB0_24
-	LONG $0x17048d4a             // lea    rax, [rdi + r10]
-	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
-	JA   LBB0_2
-
-LBB0_24:
-	WORD $0x8945; BYTE $0xd4             // mov    r12d, r10d
-	LONG $0xe0e48341                     // and    r12d, -32
-	LONG $0x6e79c1c4; BYTE $0xc1         // vmovd    xmm0, r9d
-	LONG $0x587de2c4; BYTE $0xc0         // vpbroadcastd    ymm0, xmm0
-	LONG $0x4d6f7dc5; BYTE $0x00         // vmovdqa    ymm9, yword 0[rbp] /* [rip + .LCPI0_0] */
-	LONG $0x456f7dc5; BYTE $0x20         // vmovdqa    ymm8, yword 32[rbp] /* [rip + .LCPI0_1] */
-	LONG $0x5d6ffdc5; BYTE $0x40         // vmovdqa    ymm3, yword 64[rbp] /* [rip + .LCPI0_2] */
-	LONG $0x556ffdc5; BYTE $0x60         // vmovdqa    ymm2, yword 96[rbp] /* [rip + .LCPI0_3] */
-	WORD $0x3145; BYTE $0xdb             // xor    r11d, r11d
-	QUAD $0x0000a08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 160[rbp] /* [rip + .LCPI0_5] */
-	QUAD $0x000300248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 768], ymm1
-	QUAD $0x0000a88d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 168[rbp] /* [rip + .LCPI0_6] */
-	QUAD $0x0002e0248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 736], ymm1
-	QUAD $0x0000b08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 176[rbp] /* [rip + .LCPI0_7] */
-	QUAD $0x0002c0248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 704], ymm1
-	QUAD $0x0000b88d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 184[rbp] /* [rip + .LCPI0_8] */
-	QUAD $0x0002a0248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 672], ymm1
-	QUAD $0x0000c08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 192[rbp] /* [rip + .LCPI0_9] */
-	QUAD $0x000280248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 640], ymm1
-	QUAD $0x0000c88d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 200[rbp] /* [rip + .LCPI0_10] */
-	QUAD $0x000260248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 608], ymm1
-	QUAD $0x0000d08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 208[rbp] /* [rip + .LCPI0_11] */
-	QUAD $0x000240248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 576], ymm1
-	QUAD $0x0000d88d587de2c4; BYTE $0x00 // vpbroadcastd    ymm1, dword 216[rbp] /* [rip + .LCPI0_12] */
-	QUAD $0x000220248c7ffdc5; BYTE $0x00 // vmovdqa    yword [rsp + 544], ymm1
-	JMP  LBB0_26
-
-LBB0_25:
-	LONG $0x20c38349                     // add    r11, 32
-	QUAD $0x000220248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 544]
-	LONG $0xd1feedc5                     // vpaddd    ymm2, ymm2, ymm1
-	LONG $0xd9fee5c5                     // vpaddd    ymm3, ymm3, ymm1
-	LONG $0xc1fe3dc5                     // vpaddd    ymm8, ymm8, ymm1
-	LONG $0xc9fe35c5                     // vpaddd    ymm9, ymm9, ymm1
-	WORD $0x394d; BYTE $0xe3             // cmp    r11, r12
-	JE   LBB0_1050
-
-LBB0_26:
-	QUAD $0x00032024947ffdc5; BYTE $0x00 // vmovdqa    yword [rsp + 800], ymm2
-	LONG $0xf272f5c5; BYTE $0x03         // vpslld    ymm1, ymm2, 3
-	LONG $0xd166f9c5                     // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd17ef9c5                     // vmovd    ecx, xmm2
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_28
-	LONG $0x787da2c4; WORD $0x1f24       // vpbroadcastb    ymm4, byte [rdi + r11]
-
-LBB0_28:
-	WORD $0x894d; BYTE $0xda                   // mov    r10, r11
-	LONG $0x01ca8349                           // or    r10, 1
-	LONG $0xd166f9c5                           // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x01d1             // vpextrb    ecx, xmm2, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_30
-	LONG $0x2059a3c4; WORD $0x1714; BYTE $0x01 // vpinsrb    xmm2, xmm4, byte [rdi + r10], 1
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_30:
-	WORD $0x894d; BYTE $0xde                   // mov    r14, r11
-	LONG $0x02ce8349                           // or    r14, 2
-	LONG $0xd166f9c5                           // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x02d1             // vpextrb    ecx, xmm2, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_32
-	LONG $0x2059a3c4; WORD $0x3714; BYTE $0x02 // vpinsrb    xmm2, xmm4, byte [rdi + r14], 2
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_32:
-	LONG $0x397dc3c4; WORD $0x01cd             // vextracti128    xmm13, ymm1, 1
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x03ca8348                           // or    rdx, 3
-	LONG $0xd166f9c5                           // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x03d1             // vpextrb    ecx, xmm2, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_34
-	LONG $0x2059e3c4; WORD $0x1714; BYTE $0x03 // vpinsrb    xmm2, xmm4, byte [rdi + rdx], 3
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_34:
-	WORD $0x894c; BYTE $0xd9                   // mov    rcx, r11
-	LONG $0x04c98348                           // or    rcx, 4
-	LONG $0x397de3c4; WORD $0x01c7             // vextracti128    xmm7, ymm0, 1
-	LONG $0x6641c1c4; BYTE $0xd5               // vpcmpgtd    xmm2, xmm7, xmm13
-	LONG $0x1479c3c4; WORD $0x00d1             // vpextrb    r9d, xmm2, 0
-	LONG $0x01c1f641                           // test    r9b, 1
-	QUAD $0x0000011024948948                   // mov    qword [rsp + 272], rdx
-	QUAD $0x00000108248c8948                   // mov    qword [rsp + 264], rcx
-	JE   LBB0_36
-	LONG $0x2059e3c4; WORD $0x0f14; BYTE $0x04 // vpinsrb    xmm2, xmm4, byte [rdi + rcx], 4
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_36:
-	WORD $0x894d; BYTE $0xdf                   // mov    r15, r11
-	LONG $0x05cf8349                           // or    r15, 5
-	LONG $0xf166fdc5                           // vpcmpgtd    ymm6, ymm0, ymm1
-	LONG $0xd06bcdc5                           // vpackssdw    ymm2, ymm6, ymm0
-	LONG $0x397de3c4; WORD $0x01d2             // vextracti128    xmm2, ymm2, 1
-	LONG $0x5879e2c4; BYTE $0xd2               // vpbroadcastd    xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x05d1             // vpextrb    ecx, xmm2, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_38
-	LONG $0x2059a3c4; WORD $0x3f14; BYTE $0x05 // vpinsrb    xmm2, xmm4, byte [rdi + r15], 5
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_38:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x06cb8348                           // or    rbx, 6
-	LONG $0xd06bcdc5                           // vpackssdw    ymm2, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8d2             // vpermq    ymm2, ymm2, 232
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x06d1             // vpextrb    ecx, xmm2, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_40
-	LONG $0x2059e3c4; WORD $0x1f14; BYTE $0x06 // vpinsrb    xmm2, xmm4, byte [rdi + rbx], 6
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_40:
-	LONG $0xf372edc5; BYTE $0x03               // vpslld    ymm2, ymm3, 3
-	WORD $0x894c; BYTE $0xd8                   // mov    rax, r11
-	LONG $0x07c88348                           // or    rax, 7
-	LONG $0xe86bcdc5                           // vpackssdw    ymm5, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8ed             // vpermq    ymm5, ymm5, 232
-	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
-	LONG $0x1479e3c4; WORD $0x07e9             // vpextrb    ecx, xmm5, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_42
-	LONG $0x2059e3c4; WORD $0x072c; BYTE $0x07 // vpinsrb    xmm5, xmm4, byte [rdi + rax], 7
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_42:
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x08ce8348                           // or    rsi, 8
-	LONG $0xea66f9c5                           // vpcmpgtd    xmm5, xmm0, xmm2
-	LONG $0x1479e3c4; WORD $0x00e9             // vpextrb    ecx, xmm5, 0
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_44
-	LONG $0x2059e3c4; WORD $0x372c; BYTE $0x08 // vpinsrb    xmm5, xmm4, byte [rdi + rsi], 8
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_44:
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x09ca8348                           // or    rdx, 9
-	LONG $0xea66f9c5                           // vpcmpgtd    xmm5, xmm0, xmm2
-	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
-	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
-	LONG $0x1479e3c4; WORD $0x09e9             // vpextrb    ecx, xmm5, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000e024948948                   // mov    qword [rsp + 224], rdx
-	JE   LBB0_46
-	LONG $0x2059e3c4; WORD $0x172c; BYTE $0x09 // vpinsrb    xmm5, xmm4, byte [rdi + rdx], 9
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_46:
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x0aca8348                           // or    rdx, 10
-	LONG $0xea66f9c5                           // vpcmpgtd    xmm5, xmm0, xmm2
-	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
-	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
-	LONG $0x1479e3c4; WORD $0x0ae9             // vpextrb    ecx, xmm5, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000340249c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 832], ymm3
-	LONG $0x24748948; BYTE $0x60               // mov    qword [rsp + 96], rsi
-	JE   LBB0_48
-	LONG $0x2059e3c4; WORD $0x172c; BYTE $0x0a // vpinsrb    xmm5, xmm4, byte [rdi + rdx], 10
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_48:
-	LONG $0x397de3c4; WORD $0x01d5             // vextracti128    xmm5, ymm2, 1
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x0bce8348                           // or    rsi, 11
-	LONG $0xda66f9c5                           // vpcmpgtd    xmm3, xmm0, xmm2
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0bd9             // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000982494894c                   // mov    qword [rsp + 152], r10
-	QUAD $0x0000012824b4894c                   // mov    qword [rsp + 296], r14
-	LONG $0x247c894c; BYTE $0x68               // mov    qword [rsp + 104], r15
-	QUAD $0x00000120249c8948                   // mov    qword [rsp + 288], rbx
-	QUAD $0x000000e824848948                   // mov    qword [rsp + 232], rax
-	JE   LBB0_50
-	LONG $0x2059e3c4; WORD $0x371c; BYTE $0x0b // vpinsrb    xmm3, xmm4, byte [rdi + rsi], 11
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_50:
-	WORD $0x894c; BYTE $0xd9                   // mov    rcx, r11
-	LONG $0x0cc98348                           // or    rcx, 12
-	LONG $0xdd66c1c5                           // vpcmpgtd    xmm3, xmm7, xmm5
-	LONG $0x1479c3c4; WORD $0x00de             // vpextrb    r14d, xmm3, 0
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000010024b48948                   // mov    qword [rsp + 256], rsi
-	QUAD $0x000000f8248c8948                   // mov    qword [rsp + 248], rcx
-	JE   LBB0_52
-	LONG $0x2059e3c4; WORD $0x0f1c; BYTE $0x0c // vpinsrb    xmm3, xmm4, byte [rdi + rcx], 12
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_52:
-	WORD $0x894c; BYTE $0xd8                   // mov    rax, r11
-	LONG $0x0dc88348                           // or    rax, 13
-	LONG $0xfa66fdc5                           // vpcmpgtd    ymm7, ymm0, ymm2
-	LONG $0xd86bc5c5                           // vpackssdw    ymm3, ymm7, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x5879e2c4; BYTE $0xdb               // vpbroadcastd    xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0dd9             // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_54
-	LONG $0x2059e3c4; WORD $0x071c; BYTE $0x0d // vpinsrb    xmm3, xmm4, byte [rdi + rax], 13
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_54:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x0ecb8348                           // or    rbx, 14
-	LONG $0xd86bc5c5                           // vpackssdw    ymm3, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8db             // vpermq    ymm3, ymm3, 232
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0ed9             // vpextrb    ecx, xmm3, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	LONG $0x245c8948; BYTE $0x50               // mov    qword [rsp + 80], rbx
-	JE   LBB0_56
-	LONG $0x2059e3c4; WORD $0x1f1c; BYTE $0x0e // vpinsrb    xmm3, xmm4, byte [rdi + rbx], 14
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_56:
-	LONG $0x722dc1c4; WORD $0x03f0             // vpslld    ymm10, ymm8, 3
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x0fce8348                           // or    rsi, 15
-	LONG $0xd86bc5c5                           // vpackssdw    ymm3, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8db             // vpermq    ymm3, ymm3, 232
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0fd9             // vpextrb    ecx, xmm3, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_58
-	LONG $0x2059e3c4; WORD $0x371c; BYTE $0x0f // vpinsrb    xmm3, xmm4, byte [rdi + rsi], 15
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_58:
-	WORD $0x894d; BYTE $0xdf                   // mov    r15, r11
-	LONG $0x10cf8349                           // or    r15, 16
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xd97ef9c5                           // vmovd    ecx, xmm3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	LONG $0x247c894c; BYTE $0x40               // mov    qword [rsp + 64], r15
-	LONG $0x24748948; BYTE $0x48               // mov    qword [rsp + 72], rsi
-	JE   LBB0_60
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x3f1c; BYTE $0x00 // vpinsrb    xmm3, xmm3, byte [rdi + r15], 0
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_60:
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x11ce8348                           // or    rsi, 17
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x01d9             // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_62
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x371c; BYTE $0x01 // vpinsrb    xmm3, xmm3, byte [rdi + rsi], 1
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_62:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x12cb8348                           // or    rbx, 18
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x02d9             // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_64
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x02 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 2
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_64:
-	WORD $0x894d; BYTE $0xdf                   // mov    r15, r11
-	LONG $0x13cf8349                           // or    r15, 19
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x03d9             // vpextrb    ecx, xmm3, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x00036024847f7dc5; BYTE $0x00       // vmovdqa    yword [rsp + 864], ymm8
-	JE   LBB0_66
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x3f1c; BYTE $0x03 // vpinsrb    xmm3, xmm3, byte [rdi + r15], 3
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_66:
-	WORD $0x894d; BYTE $0xdd                   // mov    r13, r11
-	LONG $0x14cd8349                           // or    r13, 20
-	LONG $0x667d41c4; BYTE $0xc2               // vpcmpgtd    ymm8, ymm0, ymm10
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x04d9             // vpextrb    ecx, xmm3, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	LONG $0x246c894c; BYTE $0x38               // mov    qword [rsp + 56], r13
-	JE   LBB0_68
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x2f1c; BYTE $0x04 // vpinsrb    xmm3, xmm3, byte [rdi + r13], 4
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_68:
-	WORD $0x894d; BYTE $0xdd                   // mov    r13, r11
-	LONG $0x15cd8349                           // or    r13, 21
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x05d9             // vpextrb    ecx, xmm3, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x00000080249c8948                   // mov    qword [rsp + 128], rbx
-	JE   LBB0_70
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x2f1c; BYTE $0x05 // vpinsrb    xmm3, xmm3, byte [rdi + r13], 5
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_70:
-	WORD $0x894d; BYTE $0xda                   // mov    r10, r11
-	LONG $0x16ca8349                           // or    r10, 22
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x06d9             // vpextrb    ecx, xmm3, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_72
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x171c; BYTE $0x06 // vpinsrb    xmm3, xmm3, byte [rdi + r10], 6
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_72:
-	LONG $0x7225c1c4; WORD $0x03f1             // vpslld    ymm11, ymm9, 3
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x17cb8348                           // or    rbx, 23
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x07d9             // vpextrb    ecx, xmm3, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000f0249c8948                   // mov    qword [rsp + 240], rbx
-	QUAD $0x000380248c7f7dc5; BYTE $0x00       // vmovdqa    yword [rsp + 896], ymm9
-	JE   LBB0_74
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x07 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 7
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_74:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x18cb8348                           // or    rbx, 24
-	LONG $0x667d41c4; BYTE $0xcb               // vpcmpgtd    ymm9, ymm0, ymm11
-	LONG $0x00fd43c4; WORD $0x44e1             // vpermq    ymm12, ymm9, 68
-	LONG $0x637dc1c4; BYTE $0xdc               // vpacksswb    ymm3, ymm0, ymm12
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x08d9             // vpextrb    ecx, xmm3, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000d8249c8948                   // mov    qword [rsp + 216], rbx
-	JE   LBB0_76
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x08 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 8
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_76:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x19cb8348                           // or    rbx, 25
-	LONG $0x6679c1c4; BYTE $0xdb               // vpcmpgtd    xmm3, xmm0, xmm11
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x09d9             // vpextrb    ecx, xmm3, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000d0249c8948                   // mov    qword [rsp + 208], rbx
-	JE   LBB0_78
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x09 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 9
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_78:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1acb8348                           // or    rbx, 26
-	LONG $0x6679c1c4; BYTE $0xdb               // vpcmpgtd    xmm3, xmm0, xmm11
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0ad9             // vpextrb    ecx, xmm3, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000c8249c8948                   // mov    qword [rsp + 200], rbx
-	JE   LBB0_80
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0a // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 10
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_80:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1bcb8348                           // or    rbx, 27
-	LONG $0x6679c1c4; BYTE $0xdb               // vpcmpgtd    xmm3, xmm0, xmm11
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0bd9             // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000c0249c8948                   // mov    qword [rsp + 192], rbx
-	QUAD $0x0000009024948948                   // mov    qword [rsp + 144], rdx
-	LONG $0x24448948; BYTE $0x58               // mov    qword [rsp + 88], rax
-	JE   LBB0_82
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0b // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 11
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_82:
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x1cca8348                           // or    rdx, 28
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0cd9             // vpextrb    ecx, xmm3, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_84
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x171c; BYTE $0x0c // vpinsrb    xmm3, xmm3, byte [rdi + rdx], 12
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_84:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1dcb8348                           // or    rbx, 29
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0dd9             // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000b0249c8948                   // mov    qword [rsp + 176], rbx
-	JE   LBB0_86
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0d // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 13
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_86:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1ecb8348                           // or    rbx, 30
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0ed9             // vpextrb    ecx, xmm3, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000a8249c8948                   // mov    qword [rsp + 168], rbx
-	JE   LBB0_88
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0e // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 14
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_88:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1fcb8348                           // or    rbx, 31
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0fd9             // vpextrb    ecx, xmm3, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000a0249c8948                   // mov    qword [rsp + 160], rbx
-	JE   LBB0_90
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0f // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 15
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_90:
-	LONG $0x357de2c4; BYTE $0xd9               // vpmovzxdq    ymm3, xmm1
-	QUAD $0x000200249c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 512], ymm3
-	QUAD $0x00000080bddb5dc5                   // vpand    ymm15, ymm4, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0xd966f9c5                           // vpcmpgtd    xmm3, xmm0, xmm1
-	LONG $0xd97ef9c5                           // vmovd    ecx, xmm3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_92
-	QUAD $0x000200249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 512]
-	LONG $0x7ef9e1c4; BYTE $0xd9               // vmovq    rcx, xmm3
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm15, 0
-
-LBB0_92:
-	LONG $0xd966f9c5                           // vpcmpgtd    xmm3, xmm0, xmm1
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x01d9             // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_94
-	QUAD $0x000200249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 512]
-	LONG $0x16f9e3c4; WORD $0x01d9             // vpextrq    rcx, xmm3, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm15, 1
-
-LBB0_94:
-	LONG $0xd966f9c5                           // vpcmpgtd    xmm3, xmm0, xmm1
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x02d9             // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_96
-	QUAD $0x000200249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 512]
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x7ef9e1c4; BYTE $0xd9               // vmovq    rcx, xmm3
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm15, 2
-
-LBB0_96:
-	LONG $0xc966f9c5                           // vpcmpgtd    xmm1, xmm0, xmm1
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_98
-	QUAD $0x000200248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 512]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm15, 3
-
-LBB0_98:
-	LONG $0x357dc2c4; BYTE $0xcd               // vpmovzxdq    ymm1, xmm13
-	QUAD $0x0001e0248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 480], ymm1
-	LONG $0x01c1f641                           // test    r9b, 1
-	JE   LBB0_100
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm15, 4
-
-LBB0_100:
-	LONG $0xc86bcdc5                           // vpackssdw    ymm1, ymm6, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x5879e2c4; BYTE $0xc9               // vpbroadcastd    xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x05c9             // vpextrb    ecx, xmm1, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_102
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm15, 5
-
-LBB0_102:
-	LONG $0xc86bcdc5                           // vpackssdw    ymm1, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_104
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm15, 6
-
-LBB0_104:
-	LONG $0xc86bcdc5                           // vpackssdw    ymm1, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x07c9             // vpextrb    ecx, xmm1, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_106
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm15, 7
-
-LBB0_106:
-	LONG $0x357de2c4; BYTE $0xca               // vpmovzxdq    ymm1, xmm2
-	QUAD $0x0001c0248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 448], ymm1
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0x1479e3c4; WORD $0x00c9             // vpextrb    ecx, xmm1, 0
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_108
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm15, 8
-
-LBB0_108:
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_110
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm15, 9
-
-LBB0_110:
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0ac9             // vpextrb    ecx, xmm1, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_112
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm15, 10
-
-LBB0_112:
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0bc9             // vpextrb    ecx, xmm1, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_114
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm15, 11
-
-LBB0_114:
-	QUAD $0x0000008824b48948                   // mov    qword [rsp + 136], rsi
-	LONG $0x357de2c4; BYTE $0xcd               // vpmovzxdq    ymm1, xmm5
-	QUAD $0x0001a0248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 416], ymm1
-	LONG $0x01c6f641                           // test    r14b, 1
-	JE   LBB0_116
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm15, 12
-
-LBB0_116:
-	LONG $0xc86bc5c5                           // vpackssdw    ymm1, ymm7, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x5879e2c4; BYTE $0xc9               // vpbroadcastd    xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0dc9             // vpextrb    ecx, xmm1, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x00000098248c8b4c                   // mov    r9, qword [rsp + 152]
-	QUAD $0x0000012824b48b48                   // mov    rsi, qword [rsp + 296]
-	LONG $0x24748b4c; BYTE $0x68               // mov    r14, qword [rsp + 104]
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	JE   LBB0_118
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm15, 13
-
-LBB0_118:
-	LONG $0xc86bc5c5                           // vpackssdw    ymm1, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0ec9             // vpextrb    ecx, xmm1, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_120
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm15, 14
-
-LBB0_120:
-	LONG $0xc86bc5c5                           // vpackssdw    ymm1, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0fc9             // vpextrb    ecx, xmm1, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_122
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm15, 15
-
-LBB0_122:
-	LONG $0x357dc2c4; BYTE $0xca               // vpmovzxdq    ymm1, xmm10
-	QUAD $0x000180248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 384], ymm1
-	LONG $0x6679c1c4; BYTE $0xca               // vpcmpgtd    xmm1, xmm0, xmm10
-	LONG $0xc97ef9c5                           // vmovd    ecx, xmm1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_124
-	QUAD $0x000180248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 384]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm1, 0
-
-LBB0_124:
-	LONG $0x6679c1c4; BYTE $0xca               // vpcmpgtd    xmm1, xmm0, xmm10
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_126
-	QUAD $0x000180248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 384]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm1, 1
-
-LBB0_126:
-	LONG $0x6679c1c4; BYTE $0xca               // vpcmpgtd    xmm1, xmm0, xmm10
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_128
-	QUAD $0x000180248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 384]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm1, 2
-
-LBB0_128:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x6679c1c4; BYTE $0xd2               // vpcmpgtd    xmm2, xmm0, xmm10
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0x00fde3c4; WORD $0xd4d2             // vpermq    ymm2, ymm2, 212
-	LONG $0xd063edc5                           // vpacksswb    ymm2, ymm2, ymm0
-	LONG $0x397de3c4; WORD $0x01d2             // vextracti128    xmm2, ymm2, 1
-	LONG $0x1479e3c4; WORD $0x03d1             // vpextrb    ecx, xmm2, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_130
-	QUAD $0x00018024946ffdc5; BYTE $0x00       // vmovdqa    ymm2, yword [rsp + 384]
-	LONG $0x397de3c4; WORD $0x01d2             // vextracti128    xmm2, ymm2, 1
-	LONG $0x16f9e3c4; WORD $0x01d1             // vpextrq    rcx, xmm2, 1
-	LONG $0x397d63c4; WORD $0x01fa             // vextracti128    xmm2, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x0814; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm2, 3
-
-LBB0_130:
-	LONG $0x357de2c4; BYTE $0xc9               // vpmovzxdq    ymm1, xmm1
-	QUAD $0x000160248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 352], ymm1
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_132
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm1, 4
-
-LBB0_132:
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x05c9             // vpextrb    ecx, xmm1, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_134
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm1, 5
-
-LBB0_134:
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_136
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm1, 6
-
-LBB0_136:
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x07c9             // vpextrb    ecx, xmm1, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_138
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm1, 7
-
-LBB0_138:
-	LONG $0x357dc2c4; BYTE $0xcb               // vpmovzxdq    ymm1, xmm11
-	QUAD $0x000140248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 320], ymm1
-	LONG $0x637dc1c4; BYTE $0xcc               // vpacksswb    ymm1, ymm0, ymm12
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x08c9             // vpextrb    ecx, xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_140
-	QUAD $0x000140248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 320]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm1, 8
-
-LBB0_140:
-	LONG $0x6679c1c4; BYTE $0xcb               // vpcmpgtd    xmm1, xmm0, xmm11
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_142
-	QUAD $0x000140248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 320]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-
-LBB0_142:
-	LONG $0x6679c1c4; BYTE $0xcb               // vpcmpgtd    xmm1, xmm0, xmm11
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0ac9             // vpextrb    ecx, xmm1, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_144
-	QUAD $0x000140248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 320]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-
-LBB0_144:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x6679c1c4; BYTE $0xe3               // vpcmpgtd    xmm4, xmm0, xmm11
-	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
-	LONG $0x00fde3c4; WORD $0xd4e4             // vpermq    ymm4, ymm4, 212
-	LONG $0xe463fdc5                           // vpacksswb    ymm4, ymm0, ymm4
-	LONG $0x397de3c4; WORD $0x01e4             // vextracti128    xmm4, ymm4, 1
-	LONG $0x1479e3c4; WORD $0x0be1             // vpextrb    ecx, xmm4, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_146
-	QUAD $0x00014024946ffdc5; BYTE $0x00       // vmovdqa    ymm2, yword [rsp + 320]
-	LONG $0x397de3c4; WORD $0x01d4             // vextracti128    xmm4, ymm2, 1
-	LONG $0x16f9e3c4; WORD $0x01e1             // vpextrq    rcx, xmm4, 1
-	LONG $0x397d63c4; WORD $0x01fc             // vextracti128    xmm4, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x0824; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm4, 11
-
-LBB0_146:
-	LONG $0x357de2c4; BYTE $0xe1               // vpmovzxdq    ymm4, xmm1
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0cc9             // vpextrb    ecx, xmm1, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_148
-	LONG $0x7ef9e1c4; BYTE $0xe1               // vmovq    rcx, xmm4
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-
-LBB0_148:
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0dc9             // vpextrb    ecx, xmm1, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_150
-	LONG $0x16f9e3c4; WORD $0x01e1             // vpextrq    rcx, xmm4, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-
-LBB0_150:
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0ec9             // vpextrb    ecx, xmm1, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_152
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-
-LBB0_152:
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0fc9             // vpextrb    ecx, xmm1, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_154
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_154:
-	LONG $0x6b4dc1c4; BYTE $0xc8         // vpackssdw    ymm1, ymm6, ymm8
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0x6b45c1c4; BYTE $0xe9         // vpackssdw    ymm5, ymm7, ymm9
-	LONG $0x00fde3c4; WORD $0xd8ed       // vpermq    ymm5, ymm5, 216
-	LONG $0xcd63f5c5                     // vpacksswb    ymm1, ymm1, ymm5
-	QUAD $0x00030024946ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword [rsp + 768]
-	QUAD $0x00020024bceb6dc5; BYTE $0x00 // vpor    ymm15, ymm2, yword [rsp + 512]
-	QUAD $0x0001e024acebedc5; BYTE $0x00 // vpor    ymm5, ymm2, yword [rsp + 480]
-	QUAD $0x0001802494eb6dc5; BYTE $0x00 // vpor    ymm10, ymm2, yword [rsp + 384]
-	QUAD $0x000160248ceb6dc5; BYTE $0x00 // vpor    ymm9, ymm2, yword [rsp + 352]
-	QUAD $0x0001c024a4eb6dc5; BYTE $0x00 // vpor    ymm12, ymm2, yword [rsp + 448]
-	QUAD $0x0001a0249ceb6dc5; BYTE $0x00 // vpor    ymm11, ymm2, yword [rsp + 416]
-	QUAD $0x0001402484eb6dc5; BYTE $0x00 // vpor    ymm8, ymm2, yword [rsp + 320]
-	LONG $0xfaebddc5                     // vpor    ymm7, ymm4, ymm2
-	LONG $0x463de3c4; WORD $0x31f7       // vperm2i128    ymm6, ymm8, ymm7, 49
-	LONG $0x383d63c4; WORD $0x01ef       // vinserti128    ymm13, ymm8, xmm7, 1
-	LONG $0xf6c694c5; BYTE $0x88         // vshufps    ymm6, ymm13, ymm6, 136
-	LONG $0x461d43c4; WORD $0x31eb       // vperm2i128    ymm13, ymm12, ymm11, 49
-	LONG $0x381d43c4; WORD $0x01f3       // vinserti128    ymm14, ymm12, xmm11, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x462d43c4; WORD $0x31f1       // vperm2i128    ymm14, ymm10, ymm9, 49
-	LONG $0x382dc3c4; WORD $0x01d1       // vinserti128    ymm2, ymm10, xmm9, 1
-	LONG $0xc66cc1c4; WORD $0x88d6       // vshufps    ymm2, ymm2, ymm14, 136
-	LONG $0x460563c4; WORD $0x31f5       // vperm2i128    ymm14, ymm15, ymm5, 49
-	LONG $0x3805e3c4; WORD $0x01dd       // vinserti128    ymm3, ymm15, xmm5, 1
-	LONG $0xc664c1c4; WORD $0x88de       // vshufps    ymm3, ymm3, ymm14, 136
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xd26be5c5                     // vpackssdw    ymm2, ymm3, ymm2
-	LONG $0x667dc1c4; BYTE $0xdd         // vpcmpgtd    ymm3, ymm0, ymm13
-	LONG $0xf666fdc5                     // vpcmpgtd    ymm6, ymm0, ymm6
-	LONG $0xde6be5c5                     // vpackssdw    ymm3, ymm3, ymm6
-	LONG $0x00fde3c4; WORD $0xd8d2       // vpermq    ymm2, ymm2, 216
-	LONG $0x00fde3c4; WORD $0xd8db       // vpermq    ymm3, ymm3, 216
-	LONG $0xd363edc5                     // vpacksswb    ymm2, ymm2, ymm3
-	LONG $0xf1dbedc5                     // vpand    ymm6, ymm2, ymm1
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_155
-	LONG $0x787d22c4; WORD $0x1f34       // vpbroadcastb    ymm14, byte [rdi + r11]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_661
-
-LBB0_156:
-	QUAD $0x000000e0249c8b48       // mov    rbx, qword [rsp + 224]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_157
-
-LBB0_662:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_663
-
-LBB0_158:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_159
-
-LBB0_664:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_665
-
-LBB0_160:
-	QUAD $0x000000e824b48b48       // mov    rsi, qword [rsp + 232]
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_161
-
-LBB0_666:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_667
-
-LBB0_162:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_163
-
-LBB0_668:
-	LONG $0x24448b48; BYTE $0x60               // mov    rax, qword [rsp + 96]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_669
-
-LBB0_164:
-	LONG $0x1479e3c4; WORD $0x0af1 // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_165
-
-LBB0_670:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_671
-
-LBB0_166:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_167
-
-LBB0_672:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_673
-
-LBB0_168:
-	LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_169
-
-LBB0_674:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_170
-	JMP  LBB0_171
-
-LBB0_155:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_156
-
-LBB0_661:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	QUAD $0x000000e0249c8b48                   // mov    rbx, qword [rsp + 224]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_662
-
-LBB0_157:
-	LONG $0x1479e3c4; WORD $0x03f1 // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_158
-
-LBB0_663:
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_664
-
-LBB0_159:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_160
-
-LBB0_665:
-	LONG $0x2009a3c4; WORD $0x370c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + r14], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	QUAD $0x000000e824b48b48                   // mov    rsi, qword [rsp + 232]
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_666
-
-LBB0_161:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_162
-
-LBB0_667:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_668
-
-LBB0_163:
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_164
-
-LBB0_669:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_670
-
-LBB0_165:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_166
-
-LBB0_671:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_672
-
-LBB0_167:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_168
-
-LBB0_673:
-	LONG $0x24448b48; BYTE $0x58               // mov    rax, qword [rsp + 88]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rax], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_674
-
-LBB0_169:
-	LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_171
-
-LBB0_170:
-	LONG $0x24448b48; BYTE $0x48               // mov    rax, qword [rsp + 72]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + rax], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_171:
-	QUAD $0x000000d0248c8b48                   // mov    rcx, qword [rsp + 208]
-	LONG $0x397dc3c4; WORD $0x01f5             // vextracti128    xmm13, ymm6, 1
-	LONG $0xe87e79c5                           // vmovd    eax, xmm13
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_172
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x24448b48; BYTE $0x40               // mov    rax, qword [rsp + 64]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x00 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 0
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x01e8             // vpextrb    eax, xmm13, 1
-	LONG $0x28244489                           // mov    dword [rsp + 40], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_676
-
-LBB0_173:
-	LONG $0x147963c4; WORD $0x02e8 // vpextrb    eax, xmm13, 2
-	LONG $0x24244489               // mov    dword [rsp + 36], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_174
-
-LBB0_677:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x0000008024848b48                   // mov    rax, qword [rsp + 128]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x02 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 2
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x03e8             // vpextrb    eax, xmm13, 3
-	LONG $0x20244489                           // mov    dword [rsp + 32], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_678
-
-LBB0_175:
-	LONG $0x147963c4; WORD $0x04e8 // vpextrb    eax, xmm13, 4
-	LONG $0x1c244489               // mov    dword [rsp + 28], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_176
-
-LBB0_679:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x04 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x05e8             // vpextrb    eax, xmm13, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_680
-
-LBB0_177:
-	LONG $0x147963c4; WORD $0x06e8 // vpextrb    eax, xmm13, 6
-	LONG $0x14244489               // mov    dword [rsp + 20], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_178
-
-LBB0_681:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071a3c4; WORD $0x170c; BYTE $0x06 // vpinsrb    xmm1, xmm1, byte [rdi + r10], 6
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x07e8             // vpextrb    eax, xmm13, 7
-	LONG $0x3c248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 316], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_682
-
-LBB0_179:
-	QUAD $0x000000d824848b48       // mov    rax, qword [rsp + 216]
-	LONG $0x147963c4; WORD $0x08eb // vpextrb    ebx, xmm13, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_181
-
-LBB0_180:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_181:
-	LONG $0x147943c4; WORD $0x09e9             // vpextrb    r9d, xmm13, 9
-	LONG $0x01c1f641                           // test    r9b, 1
-	QUAD $0x0000011824ac894c                   // mov    qword [rsp + 280], r13
-	LONG $0x2454894c; BYTE $0x70               // mov    qword [rsp + 112], r10
-	QUAD $0x000000b824948948                   // mov    qword [rsp + 184], rdx
-	JE   LBB0_183
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x09 // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 9
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_183:
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	QUAD $0x000000c0248c8b48                   // mov    rcx, qword [rsp + 192]
-	LONG $0x147943c4; WORD $0x0aed             // vpextrb    r13d, xmm13, 10
-	LONG $0x01c5f641                           // test    r13b, 1
-	JE   LBB0_184
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm1, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x0be8             // vpextrb    eax, xmm13, 11
-	WORD $0x01a8                               // test    al, 1
-	LONG $0x247c894c; BYTE $0x78               // mov    qword [rsp + 120], r15
-	JNE  LBB0_684
-
-LBB0_185:
-	LONG $0x147943c4; WORD $0x0cef // vpextrb    r15d, xmm13, 12
-	LONG $0x01c7f641               // test    r15b, 1
-	QUAD $0x00000130249c894c       // mov    qword [rsp + 304], r11
-	JE   LBB0_186
-
-LBB0_685:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000b8248c8b48                   // mov    rcx, qword [rsp + 184]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0c // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 12
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x0dea             // vpextrb    edx, xmm13, 13
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_686
-
-LBB0_187:
-	LONG $0x147963c4; WORD $0x0eee // vpextrb    esi, xmm13, 14
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_188
-
-LBB0_687:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a8248c8b48                   // mov    rcx, qword [rsp + 168]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0e // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 14
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147943c4; WORD $0x0fee             // vpextrb    r14d, xmm13, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_189
-	JMP  LBB0_190
-
-LBB0_172:
-	LONG $0x147963c4; WORD $0x01e8 // vpextrb    eax, xmm13, 1
-	LONG $0x28244489               // mov    dword [rsp + 40], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_173
-
-LBB0_676:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x0000008824848b48                   // mov    rax, qword [rsp + 136]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x01 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 1
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x02e8             // vpextrb    eax, xmm13, 2
-	LONG $0x24244489                           // mov    dword [rsp + 36], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_677
-
-LBB0_174:
-	LONG $0x147963c4; WORD $0x03e8 // vpextrb    eax, xmm13, 3
-	LONG $0x20244489               // mov    dword [rsp + 32], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_175
-
-LBB0_678:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071a3c4; WORD $0x3f0c; BYTE $0x03 // vpinsrb    xmm1, xmm1, byte [rdi + r15], 3
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x04e8             // vpextrb    eax, xmm13, 4
-	LONG $0x1c244489                           // mov    dword [rsp + 28], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_679
-
-LBB0_176:
-	LONG $0x147963c4; WORD $0x05e8 // vpextrb    eax, xmm13, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_177
-
-LBB0_680:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071a3c4; WORD $0x2f0c; BYTE $0x05 // vpinsrb    xmm1, xmm1, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x06e8             // vpextrb    eax, xmm13, 6
-	LONG $0x14244489                           // mov    dword [rsp + 20], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_681
-
-LBB0_178:
-	LONG $0x147963c4; WORD $0x07e8             // vpextrb    eax, xmm13, 7
-	LONG $0x3c248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 316], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_179
-
-LBB0_682:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x147963c4; WORD $0x08eb             // vpextrb    ebx, xmm13, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_180
-	JMP  LBB0_181
-
-LBB0_184:
-	LONG $0x147963c4; WORD $0x0be8 // vpextrb    eax, xmm13, 11
-	WORD $0x01a8                   // test    al, 1
-	LONG $0x247c894c; BYTE $0x78   // mov    qword [rsp + 120], r15
-	JE   LBB0_185
-
-LBB0_684:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0b // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 11
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147943c4; WORD $0x0cef             // vpextrb    r15d, xmm13, 12
-	LONG $0x01c7f641                           // test    r15b, 1
-	QUAD $0x00000130249c894c                   // mov    qword [rsp + 304], r11
-	JNE  LBB0_685
-
-LBB0_186:
-	LONG $0x147963c4; WORD $0x0dea // vpextrb    edx, xmm13, 13
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_187
-
-LBB0_686:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000b0248c8b48                   // mov    rcx, qword [rsp + 176]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0d // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 13
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x0eee             // vpextrb    esi, xmm13, 14
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_687
-
-LBB0_188:
-	LONG $0x147943c4; WORD $0x0fee // vpextrb    r14d, xmm13, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_190
-
-LBB0_189:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0248c8b48                   // mov    rcx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_190:
-	LONG $0x7175c1c4; WORD $0x01d6             // vpsrlw    ymm1, ymm14, 1
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf2               // vmovd    r10d, xmm6
-	LONG $0x01c2f641                           // test    r10b, 1
-	JE   LBB0_191
-	LONG $0x7ef961c4; BYTE $0xf9               // vmovq    rcx, xmm15
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f1             // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_689
-
-LBB0_192:
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_193
-
-LBB0_690:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_691
-
-LBB0_194:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_195
-
-LBB0_692:
-	LONG $0x7ef9e1c4; BYTE $0xe9               // vmovq    rcx, xmm5
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_693
-
-LBB0_196:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_197
-
-LBB0_694:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_695
-
-LBB0_198:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_199
-
-LBB0_696:
-	LONG $0x7ef961c4; BYTE $0xe1               // vmovq    rcx, xmm12
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_697
-
-LBB0_200:
-	LONG $0x1479e3c4; WORD $0x0af1 // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_201
-
-LBB0_698:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_699
-
-LBB0_202:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_203
-
-LBB0_700:
-	LONG $0x7ef961c4; BYTE $0xd9               // vmovq    rcx, xmm11
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_701
-
-LBB0_204:
-	LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_205
-
-LBB0_702:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_703
-
-LBB0_206:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_207
-
-LBB0_704:
-	LONG $0x7ef961c4; BYTE $0xd1               // vmovq    rcx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_705
-
-LBB0_208:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_209
-
-LBB0_706:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_707
-
-LBB0_210:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_211
-
-LBB0_708:
-	LONG $0x7ef961c4; BYTE $0xc9               // vmovq    rcx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_709
-
-LBB0_212:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_213
-
-LBB0_710:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm1, 6
-	QUAD $0x010000013c2484f6                   // test    byte [rsp + 316], 1
-	JNE  LBB0_711
-
-LBB0_214:
-	WORD $0xc3f6; BYTE $0x01 // test    bl, 1
-	JE   LBB0_215
-
-LBB0_712:
-	LONG $0x7ef961c4; BYTE $0xc1               // vmovq    rcx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm1, 8
-	LONG $0x01c1f641                           // test    r9b, 1
-	QUAD $0x000000e024948b4c                   // mov    r10, qword [rsp + 224]
-	QUAD $0x00000090249c8b4c                   // mov    r11, qword [rsp + 144]
-	JNE  LBB0_713
-
-LBB0_216:
-	LONG $0x01c5f641         // test    r13b, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_217
-
-LBB0_714:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x00000120248c8b4c                   // mov    r9, qword [rsp + 288]
-	QUAD $0x000000e824848b48                   // mov    rax, qword [rsp + 232]
-	JNE  LBB0_715
-
-LBB0_218:
-	LONG $0x01c7f641 // test    r15b, 1
-	JE   LBB0_219
-
-LBB0_716:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x0000008824ac8b4c                   // mov    r13, qword [rsp + 136]
-	QUAD $0x0000008024bc8b4c                   // mov    r15, qword [rsp + 128]
-	JNE  LBB0_717
-
-LBB0_220:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_221
-
-LBB0_718:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_222
-	JMP  LBB0_223
-
-LBB0_191:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_192
-
-LBB0_689:
-	LONG $0x16f963c4; WORD $0x01f9             // vpextrq    rcx, xmm15, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_690
-
-LBB0_193:
-	LONG $0x1479e3c4; WORD $0x03f1 // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_194
-
-LBB0_691:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_692
-
-LBB0_195:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_196
-
-LBB0_693:
-	LONG $0x16f9e3c4; WORD $0x01e9             // vpextrq    rcx, xmm5, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_694
-
-LBB0_197:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_198
-
-LBB0_695:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_696
-
-LBB0_199:
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_200
-
-LBB0_697:
-	LONG $0x16f963c4; WORD $0x01e1             // vpextrq    rcx, xmm12, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_698
-
-LBB0_201:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_202
-
-LBB0_699:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_700
-
-LBB0_203:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_204
-
-LBB0_701:
-	LONG $0x16f963c4; WORD $0x01d9             // vpextrq    rcx, xmm11, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_702
-
-LBB0_205:
-	LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_206
-
-LBB0_703:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_704
-
-LBB0_207:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_208
-
-LBB0_705:
-	LONG $0x16f963c4; WORD $0x01d1             // vpextrq    rcx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_706
-
-LBB0_209:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_210
-
-LBB0_707:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_708
-
-LBB0_211:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_212
-
-LBB0_709:
-	LONG $0x16f963c4; WORD $0x01c9             // vpextrq    rcx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_710
-
-LBB0_213:
-	QUAD $0x010000013c2484f6 // test    byte [rsp + 316], 1
-	JE   LBB0_214
-
-LBB0_711:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm1, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_712
-
-LBB0_215:
-	LONG $0x01c1f641         // test    r9b, 1
-	QUAD $0x000000e024948b4c // mov    r10, qword [rsp + 224]
-	QUAD $0x00000090249c8b4c // mov    r11, qword [rsp + 144]
-	JE   LBB0_216
-
-LBB0_713:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c5f641                           // test    r13b, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_714
-
-LBB0_217:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x00000120248c8b4c // mov    r9, qword [rsp + 288]
-	QUAD $0x000000e824848b48 // mov    rax, qword [rsp + 232]
-	JE   LBB0_218
-
-LBB0_715:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c7f641                           // test    r15b, 1
-	JNE  LBB0_716
-
-LBB0_219:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x0000008824ac8b4c // mov    r13, qword [rsp + 136]
-	QUAD $0x0000008024bc8b4c // mov    r15, qword [rsp + 128]
-	JE   LBB0_220
-
-LBB0_717:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_718
-
-LBB0_221:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_223
-
-LBB0_222:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_223:
-	QUAD $0x0002e0248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 736]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_224
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_720
-
-LBB0_225:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_227
-
-LBB0_226:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_227:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x245c8b48; BYTE $0x48               // mov    rbx, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_228
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_722
-
-LBB0_229:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_230
-
-LBB0_723:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_724
-
-LBB0_231:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_232
-
-LBB0_725:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_726
-
-LBB0_233:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_234
-
-LBB0_727:
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r10], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_728
-
-LBB0_235:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_236
-
-LBB0_729:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_730
-
-LBB0_237:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_239
-
-LBB0_238:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_239:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_241
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_241:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_243
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_243:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_245
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_245:
-	LONG $0x244c8b48; BYTE $0x38               // mov    rcx, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c8             // vpextrb    eax, xmm1, 1
-	LONG $0x28244489                           // mov    dword [rsp + 40], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_247
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_247:
-	QUAD $0x0000011824948b48                   // mov    rdx, qword [rsp + 280]
-	LONG $0x24748b48; BYTE $0x70               // mov    rsi, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x02c8             // vpextrb    eax, xmm1, 2
-	LONG $0x24244489                           // mov    dword [rsp + 36], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_249
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x3f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + r15], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_249:
-	LONG $0x24448b48; BYTE $0x78               // mov    rax, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x03cb             // vpextrb    ebx, xmm1, 3
-	LONG $0x20245c89                           // mov    dword [rsp + 32], ebx
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JE   LBB0_250
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c8             // vpextrb    eax, xmm1, 4
-	LONG $0x1c244489                           // mov    dword [rsp + 28], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_732
-
-LBB0_251:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_252
-
-LBB0_733:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x06c8             // vpextrb    eax, xmm1, 6
-	LONG $0x14244489                           // mov    dword [rsp + 20], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_734
-
-LBB0_253:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_254
-
-LBB0_735:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_736
-
-LBB0_255:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_256
-
-LBB0_737:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_738
-
-LBB0_257:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_258
-
-LBB0_739:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_740
-
-LBB0_259:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_260
-
-LBB0_741:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_742
-
-LBB0_261:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_263
-
-LBB0_262:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_263:
-	LONG $0x7175c1c4; WORD $0x02d6             // vpsrlw    ymm1, ymm14, 2
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_264
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_744
-
-LBB0_265:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_266
-
-LBB0_745:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_746
-
-LBB0_267:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_268
-
-LBB0_747:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_748
-
-LBB0_269:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_270
-
-LBB0_749:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_750
-
-LBB0_271:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_272
-
-LBB0_751:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_752
-
-LBB0_273:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_274
-
-LBB0_753:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_754
-
-LBB0_275:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_276
-
-LBB0_755:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_756
-
-LBB0_277:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_278
-
-LBB0_757:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_758
-
-LBB0_279:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_280
-
-LBB0_759:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_760
-
-LBB0_281:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_282
-
-LBB0_761:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_762
-
-LBB0_283:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_284
-
-LBB0_763:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_764
-
-LBB0_285:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_286
-
-LBB0_765:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_766
-
-LBB0_287:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_288
-
-LBB0_767:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_768
-
-LBB0_289:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_290
-
-LBB0_769:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_770
-
-LBB0_291:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_292
-
-LBB0_771:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_772
-
-LBB0_293:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_294
-
-LBB0_773:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_295
-	JMP  LBB0_296
-
-LBB0_224:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_225
-
-LBB0_720:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_226
-	JMP  LBB0_227
-
-LBB0_228:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_229
-
-LBB0_722:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_723
-
-LBB0_230:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_231
-
-LBB0_724:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_725
-
-LBB0_232:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_233
-
-LBB0_726:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_727
-
-LBB0_234:
-	LONG $0x1479e3c4; WORD $0x0af1 // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_235
-
-LBB0_728:
-	LONG $0x2009a3c4; WORD $0x1f0c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + r11], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_729
-
-LBB0_236:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_237
-
-LBB0_730:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_238
-	JMP  LBB0_239
-
-LBB0_250:
-	LONG $0x1479e3c4; WORD $0x04c8 // vpextrb    eax, xmm1, 4
-	LONG $0x1c244489               // mov    dword [rsp + 28], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_251
-
-LBB0_732:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0f14; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rcx], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_733
-
-LBB0_252:
-	LONG $0x1479e3c4; WORD $0x06c8 // vpextrb    eax, xmm1, 6
-	LONG $0x14244489               // mov    dword [rsp + 20], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_253
-
-LBB0_734:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_735
-
-LBB0_254:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_255
-
-LBB0_736:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_737
-
-LBB0_256:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_257
-
-LBB0_738:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_739
-
-LBB0_258:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_259
-
-LBB0_740:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_741
-
-LBB0_260:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_261
-
-LBB0_742:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_262
-	JMP  LBB0_263
-
-LBB0_264:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_265
-
-LBB0_744:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_745
-
-LBB0_266:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_267
-
-LBB0_746:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_747
-
-LBB0_268:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_269
-
-LBB0_748:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_749
-
-LBB0_270:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_271
-
-LBB0_750:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_751
-
-LBB0_272:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_273
-
-LBB0_752:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_753
-
-LBB0_274:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_275
-
-LBB0_754:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_755
-
-LBB0_276:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_277
-
-LBB0_756:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_757
-
-LBB0_278:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_279
-
-LBB0_758:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_759
-
-LBB0_280:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_281
-
-LBB0_760:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_761
-
-LBB0_282:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_283
-
-LBB0_762:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_763
-
-LBB0_284:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_285
-
-LBB0_764:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_765
-
-LBB0_286:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_287
-
-LBB0_766:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_767
-
-LBB0_288:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_289
-
-LBB0_768:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_769
-
-LBB0_290:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_291
-
-LBB0_770:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_771
-
-LBB0_292:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_293
-
-LBB0_772:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_773
-
-LBB0_294:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_296
-
-LBB0_295:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_296:
-	QUAD $0x0002c0248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 704]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_297
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_775
-
-LBB0_298:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_300
-
-LBB0_299:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_300:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_301
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_777
-
-LBB0_302:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_303
-
-LBB0_778:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_779
-
-LBB0_304:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_305
-
-LBB0_780:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_781
-
-LBB0_306:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_308
-
-LBB0_307:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_308:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_309
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_783
-
-LBB0_310:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_311
-
-LBB0_784:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_312
-	JMP  LBB0_313
-
-LBB0_297:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_298
-
-LBB0_775:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_299
-	JMP  LBB0_300
-
-LBB0_301:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_302
-
-LBB0_777:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_778
-
-LBB0_303:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_304
-
-LBB0_779:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_780
-
-LBB0_305:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_306
-
-LBB0_781:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_307
-	JMP  LBB0_308
-
-LBB0_309:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_310
-
-LBB0_783:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_784
-
-LBB0_311:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_313
-
-LBB0_312:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_313:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_315
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_315:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_317
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_317:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_319
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_319:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_320
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_786
-
-LBB0_321:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_322
-
-LBB0_787:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_788
-
-LBB0_323:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_325
-
-LBB0_324:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_325:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_326
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_790
-
-LBB0_327:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_328
-
-LBB0_791:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_792
-
-LBB0_329:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_330
-
-LBB0_793:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_794
-
-LBB0_331:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_332
-
-LBB0_795:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_796
-
-LBB0_333:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_334
-
-LBB0_797:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_335
-	JMP  LBB0_336
-
-LBB0_320:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_321
-
-LBB0_786:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_787
-
-LBB0_322:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_323
-
-LBB0_788:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_324
-	JMP  LBB0_325
-
-LBB0_326:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_327
-
-LBB0_790:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_791
-
-LBB0_328:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_329
-
-LBB0_792:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_793
-
-LBB0_330:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_331
-
-LBB0_794:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_795
-
-LBB0_332:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_333
-
-LBB0_796:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_797
-
-LBB0_334:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_336
-
-LBB0_335:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_336:
-	LONG $0x7175c1c4; WORD $0x03d6             // vpsrlw    ymm1, ymm14, 3
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_337
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_799
-
-LBB0_338:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_339
-
-LBB0_800:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_801
-
-LBB0_340:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_341
-
-LBB0_802:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_803
-
-LBB0_342:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_343
-
-LBB0_804:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_805
-
-LBB0_344:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_345
-
-LBB0_806:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_807
-
-LBB0_346:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_347
-
-LBB0_808:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_809
-
-LBB0_348:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_349
-
-LBB0_810:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_811
-
-LBB0_350:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_351
-
-LBB0_812:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_813
-
-LBB0_352:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_353
-
-LBB0_814:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_815
-
-LBB0_354:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_355
-
-LBB0_816:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_817
-
-LBB0_356:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_357
-
-LBB0_818:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_819
-
-LBB0_358:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_359
-
-LBB0_820:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_821
-
-LBB0_360:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_361
-
-LBB0_822:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_823
-
-LBB0_362:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_363
-
-LBB0_824:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_825
-
-LBB0_364:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_365
-
-LBB0_826:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_827
-
-LBB0_366:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_367
-
-LBB0_828:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_368
-	JMP  LBB0_369
-
-LBB0_337:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_338
-
-LBB0_799:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_800
-
-LBB0_339:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_340
-
-LBB0_801:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_802
-
-LBB0_341:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_342
-
-LBB0_803:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_804
-
-LBB0_343:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_344
-
-LBB0_805:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_806
-
-LBB0_345:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_346
-
-LBB0_807:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_808
-
-LBB0_347:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_348
-
-LBB0_809:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_810
-
-LBB0_349:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_350
-
-LBB0_811:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_812
-
-LBB0_351:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_352
-
-LBB0_813:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_814
-
-LBB0_353:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_354
-
-LBB0_815:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_816
-
-LBB0_355:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_356
-
-LBB0_817:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_818
-
-LBB0_357:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_358
-
-LBB0_819:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_820
-
-LBB0_359:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_360
-
-LBB0_821:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_822
-
-LBB0_361:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_362
-
-LBB0_823:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_824
-
-LBB0_363:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_364
-
-LBB0_825:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_826
-
-LBB0_365:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_366
-
-LBB0_827:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_828
-
-LBB0_367:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_369
-
-LBB0_368:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_369:
-	QUAD $0x0002a0248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 672]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_370
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_830
-
-LBB0_371:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_373
-
-LBB0_372:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_373:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_374
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_832
-
-LBB0_375:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_376
-
-LBB0_833:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_834
-
-LBB0_377:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_378
-
-LBB0_835:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_836
-
-LBB0_379:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_381
-
-LBB0_380:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_381:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_382
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_838
-
-LBB0_383:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_384
-
-LBB0_839:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_385
-	JMP  LBB0_386
-
-LBB0_370:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_371
-
-LBB0_830:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_372
-	JMP  LBB0_373
-
-LBB0_374:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_375
-
-LBB0_832:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_833
-
-LBB0_376:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_377
-
-LBB0_834:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_835
-
-LBB0_378:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_379
-
-LBB0_836:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_380
-	JMP  LBB0_381
-
-LBB0_382:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_383
-
-LBB0_838:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_839
-
-LBB0_384:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_386
-
-LBB0_385:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_386:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_388
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_388:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_390
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_390:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_392
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_392:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_393
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_841
-
-LBB0_394:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_395
-
-LBB0_842:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_843
-
-LBB0_396:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_398
-
-LBB0_397:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_398:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_399
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_845
-
-LBB0_400:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_401
-
-LBB0_846:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_847
-
-LBB0_402:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_403
-
-LBB0_848:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_849
-
-LBB0_404:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_405
-
-LBB0_850:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_851
-
-LBB0_406:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_407
-
-LBB0_852:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_408
-	JMP  LBB0_409
-
-LBB0_393:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_394
-
-LBB0_841:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_842
-
-LBB0_395:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_396
-
-LBB0_843:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_397
-	JMP  LBB0_398
-
-LBB0_399:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_400
-
-LBB0_845:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_846
-
-LBB0_401:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_402
-
-LBB0_847:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_848
-
-LBB0_403:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_404
-
-LBB0_849:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_850
-
-LBB0_405:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_406
-
-LBB0_851:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_852
-
-LBB0_407:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_409
-
-LBB0_408:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_409:
-	LONG $0x7175c1c4; WORD $0x04d6             // vpsrlw    ymm1, ymm14, 4
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_410
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_854
-
-LBB0_411:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_412
-
-LBB0_855:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_856
-
-LBB0_413:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_414
-
-LBB0_857:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_858
-
-LBB0_415:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_416
-
-LBB0_859:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_860
-
-LBB0_417:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_418
-
-LBB0_861:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_862
-
-LBB0_419:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_420
-
-LBB0_863:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_864
-
-LBB0_421:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_422
-
-LBB0_865:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_866
-
-LBB0_423:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_424
-
-LBB0_867:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_868
-
-LBB0_425:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_426
-
-LBB0_869:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_870
-
-LBB0_427:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_428
-
-LBB0_871:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_872
-
-LBB0_429:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_430
-
-LBB0_873:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_874
-
-LBB0_431:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_432
-
-LBB0_875:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_876
-
-LBB0_433:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_434
-
-LBB0_877:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_878
-
-LBB0_435:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_436
-
-LBB0_879:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_880
-
-LBB0_437:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_438
-
-LBB0_881:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_882
-
-LBB0_439:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_440
-
-LBB0_883:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_441
-	JMP  LBB0_442
-
-LBB0_410:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_411
-
-LBB0_854:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_855
-
-LBB0_412:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_413
-
-LBB0_856:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_857
-
-LBB0_414:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_415
-
-LBB0_858:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_859
-
-LBB0_416:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_417
-
-LBB0_860:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_861
-
-LBB0_418:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_419
-
-LBB0_862:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_863
-
-LBB0_420:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_421
-
-LBB0_864:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_865
-
-LBB0_422:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_423
-
-LBB0_866:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_867
-
-LBB0_424:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_425
-
-LBB0_868:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_869
-
-LBB0_426:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_427
-
-LBB0_870:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_871
-
-LBB0_428:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_429
-
-LBB0_872:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_873
-
-LBB0_430:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_431
-
-LBB0_874:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_875
-
-LBB0_432:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_433
-
-LBB0_876:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_877
-
-LBB0_434:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_435
-
-LBB0_878:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_879
-
-LBB0_436:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_437
-
-LBB0_880:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_881
-
-LBB0_438:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_439
-
-LBB0_882:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_883
-
-LBB0_440:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_442
-
-LBB0_441:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_442:
-	QUAD $0x000280248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 640]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_443
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_885
-
-LBB0_444:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_446
-
-LBB0_445:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_446:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_447
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_887
-
-LBB0_448:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_449
-
-LBB0_888:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_889
-
-LBB0_450:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_451
-
-LBB0_890:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_891
-
-LBB0_452:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_454
-
-LBB0_453:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_454:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_455
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_893
-
-LBB0_456:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_457
-
-LBB0_894:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_458
-	JMP  LBB0_459
-
-LBB0_443:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_444
-
-LBB0_885:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_445
-	JMP  LBB0_446
-
-LBB0_447:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_448
-
-LBB0_887:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_888
-
-LBB0_449:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_450
-
-LBB0_889:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_890
-
-LBB0_451:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_452
-
-LBB0_891:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_453
-	JMP  LBB0_454
-
-LBB0_455:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_456
-
-LBB0_893:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_894
-
-LBB0_457:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_459
-
-LBB0_458:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_459:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_461
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_461:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_463
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_463:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_465
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_465:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_466
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_896
-
-LBB0_467:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_468
-
-LBB0_897:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_898
-
-LBB0_469:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_471
-
-LBB0_470:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_471:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_472
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_900
-
-LBB0_473:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_474
-
-LBB0_901:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_902
-
-LBB0_475:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_476
-
-LBB0_903:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_904
-
-LBB0_477:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_478
-
-LBB0_905:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_906
-
-LBB0_479:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_480
-
-LBB0_907:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_481
-	JMP  LBB0_482
-
-LBB0_466:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_467
-
-LBB0_896:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_897
-
-LBB0_468:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_469
-
-LBB0_898:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_470
-	JMP  LBB0_471
-
-LBB0_472:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_473
-
-LBB0_900:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_901
-
-LBB0_474:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_475
-
-LBB0_902:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_903
-
-LBB0_476:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_477
-
-LBB0_904:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_905
-
-LBB0_478:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_479
-
-LBB0_906:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_907
-
-LBB0_480:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_482
-
-LBB0_481:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_482:
-	LONG $0x7175c1c4; WORD $0x05d6             // vpsrlw    ymm1, ymm14, 5
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_483
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_909
-
-LBB0_484:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_485
-
-LBB0_910:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_911
-
-LBB0_486:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_487
-
-LBB0_912:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_913
-
-LBB0_488:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_489
-
-LBB0_914:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_915
-
-LBB0_490:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_491
-
-LBB0_916:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_917
-
-LBB0_492:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_493
-
-LBB0_918:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_919
-
-LBB0_494:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_495
-
-LBB0_920:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_921
-
-LBB0_496:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_497
-
-LBB0_922:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_923
-
-LBB0_498:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_499
-
-LBB0_924:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_925
-
-LBB0_500:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_501
-
-LBB0_926:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_927
-
-LBB0_502:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_503
-
-LBB0_928:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_929
-
-LBB0_504:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_505
-
-LBB0_930:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_931
-
-LBB0_506:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_507
-
-LBB0_932:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_933
-
-LBB0_508:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_509
-
-LBB0_934:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_935
-
-LBB0_510:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_511
-
-LBB0_936:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_937
-
-LBB0_512:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_513
-
-LBB0_938:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_514
-	JMP  LBB0_515
-
-LBB0_483:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_484
-
-LBB0_909:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_910
-
-LBB0_485:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_486
-
-LBB0_911:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_912
-
-LBB0_487:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_488
-
-LBB0_913:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_914
-
-LBB0_489:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_490
-
-LBB0_915:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_916
-
-LBB0_491:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_492
-
-LBB0_917:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_918
-
-LBB0_493:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_494
-
-LBB0_919:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_920
-
-LBB0_495:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_496
-
-LBB0_921:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_922
-
-LBB0_497:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_498
-
-LBB0_923:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_924
-
-LBB0_499:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_500
-
-LBB0_925:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_926
-
-LBB0_501:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_502
-
-LBB0_927:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_928
-
-LBB0_503:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_504
-
-LBB0_929:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_930
-
-LBB0_505:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_506
-
-LBB0_931:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_932
-
-LBB0_507:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_508
-
-LBB0_933:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_934
-
-LBB0_509:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_510
-
-LBB0_935:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_936
-
-LBB0_511:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_512
-
-LBB0_937:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_938
-
-LBB0_513:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_515
-
-LBB0_514:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_515:
-	QUAD $0x000260248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 608]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_516
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_940
-
-LBB0_517:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_519
-
-LBB0_518:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_519:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_520
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_942
-
-LBB0_521:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_522
-
-LBB0_943:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_944
-
-LBB0_523:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_524
-
-LBB0_945:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_946
-
-LBB0_525:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_527
-
-LBB0_526:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_527:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_528
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_948
-
-LBB0_529:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_530
-
-LBB0_949:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_531
-	JMP  LBB0_532
-
-LBB0_516:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_517
-
-LBB0_940:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_518
-	JMP  LBB0_519
-
-LBB0_520:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_521
-
-LBB0_942:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_943
-
-LBB0_522:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_523
-
-LBB0_944:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_945
-
-LBB0_524:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_525
-
-LBB0_946:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_526
-	JMP  LBB0_527
-
-LBB0_528:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_529
-
-LBB0_948:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_949
-
-LBB0_530:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_532
-
-LBB0_531:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_532:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_534
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_534:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_536
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_536:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_538
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_538:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_539
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_951
-
-LBB0_540:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_541
-
-LBB0_952:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_953
-
-LBB0_542:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_544
-
-LBB0_543:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_544:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_545
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_955
-
-LBB0_546:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_547
-
-LBB0_956:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_957
-
-LBB0_548:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_549
-
-LBB0_958:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_959
-
-LBB0_550:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_551
-
-LBB0_960:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_961
-
-LBB0_552:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_553
-
-LBB0_962:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_554
-	JMP  LBB0_555
-
-LBB0_539:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_540
-
-LBB0_951:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_952
-
-LBB0_541:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_542
-
-LBB0_953:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_543
-	JMP  LBB0_544
-
-LBB0_545:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_546
-
-LBB0_955:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_956
-
-LBB0_547:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_548
-
-LBB0_957:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_958
-
-LBB0_549:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_550
-
-LBB0_959:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_960
-
-LBB0_551:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_552
-
-LBB0_961:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_962
-
-LBB0_553:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_555
-
-LBB0_554:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_555:
-	LONG $0x7175c1c4; WORD $0x06d6             // vpsrlw    ymm1, ymm14, 6
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_556
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_964
-
-LBB0_557:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_558
-
-LBB0_965:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_966
-
-LBB0_559:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_560
-
-LBB0_967:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_968
-
-LBB0_561:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_562
-
-LBB0_969:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_970
-
-LBB0_563:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_564
-
-LBB0_971:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_972
-
-LBB0_565:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_566
-
-LBB0_973:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_974
-
-LBB0_567:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_568
-
-LBB0_975:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_976
-
-LBB0_569:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_570
-
-LBB0_977:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_978
-
-LBB0_571:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_572
-
-LBB0_979:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_980
-
-LBB0_573:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_574
-
-LBB0_981:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_982
-
-LBB0_575:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_576
-
-LBB0_983:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_984
-
-LBB0_577:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_578
-
-LBB0_985:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_986
-
-LBB0_579:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_580
-
-LBB0_987:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_988
-
-LBB0_581:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_582
-
-LBB0_989:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_990
-
-LBB0_583:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_584
-
-LBB0_991:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_992
-
-LBB0_585:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_586
-
-LBB0_993:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_587
-	JMP  LBB0_588
-
-LBB0_556:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_557
-
-LBB0_964:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_965
-
-LBB0_558:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_559
-
-LBB0_966:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_967
-
-LBB0_560:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_561
-
-LBB0_968:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_969
-
-LBB0_562:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_563
-
-LBB0_970:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_971
-
-LBB0_564:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_565
-
-LBB0_972:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_973
-
-LBB0_566:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_567
-
-LBB0_974:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_975
-
-LBB0_568:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_569
-
-LBB0_976:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_977
-
-LBB0_570:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_571
-
-LBB0_978:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_979
-
-LBB0_572:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_573
-
-LBB0_980:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_981
-
-LBB0_574:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_575
-
-LBB0_982:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_983
-
-LBB0_576:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_577
-
-LBB0_984:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_985
-
-LBB0_578:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_579
-
-LBB0_986:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_987
-
-LBB0_580:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_581
-
-LBB0_988:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_989
-
-LBB0_582:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_583
-
-LBB0_990:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_991
-
-LBB0_584:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_585
-
-LBB0_992:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_993
-
-LBB0_586:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_588
-
-LBB0_587:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_588:
-	QUAD $0x000240248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 576]
-	QUAD $0x000200249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 512]
-	QUAD $0x0001e02494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 480]
-	QUAD $0x0001802484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 384]
-	QUAD $0x00016024bcebf5c5; BYTE $0x00 // vpor    ymm7, ymm1, yword [rsp + 352]
-	QUAD $0x0001c0248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 448]
-	QUAD $0x0001a024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 416]
-	QUAD $0x0001402494ebf5c5; BYTE $0x00 // vpor    ymm2, ymm1, yword [rsp + 320]
-	LONG $0xf9eb5dc5                     // vpor    ymm15, ymm4, ymm1
-	LONG $0x466dc3c4; WORD $0x31df       // vperm2i128    ymm3, ymm2, ymm15, 49
-	LONG $0x386dc3c4; WORD $0x01e7       // vinserti128    ymm4, ymm2, xmm15, 1
-	LONG $0xdbc6dcc5; BYTE $0x88         // vshufps    ymm3, ymm4, ymm3, 136
-	LONG $0x4635e3c4; WORD $0x31e5       // vperm2i128    ymm4, ymm9, ymm5, 49
-	LONG $0x383563c4; WORD $0x01e5       // vinserti128    ymm12, ymm9, xmm5, 1
-	LONG $0xe4c69cc5; BYTE $0x88         // vshufps    ymm4, ymm12, ymm4, 136
-	LONG $0x463d63c4; WORD $0x31e7       // vperm2i128    ymm12, ymm8, ymm7, 49
-	LONG $0x383d63c4; WORD $0x01ef       // vinserti128    ymm13, ymm8, xmm7, 1
-	LONG $0xc61441c4; WORD $0x88e4       // vshufps    ymm12, ymm13, ymm12, 136
-	LONG $0x462543c4; WORD $0x31ea       // vperm2i128    ymm13, ymm11, ymm10, 49
-	LONG $0x382543c4; WORD $0x01f2       // vinserti128    ymm14, ymm11, xmm10, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0x667d41c4; BYTE $0xe4         // vpcmpgtd    ymm12, ymm0, ymm12
-	LONG $0x6b1541c4; BYTE $0xe4         // vpackssdw    ymm12, ymm13, ymm12
-	LONG $0x00fd43c4; WORD $0xd8e4       // vpermq    ymm12, ymm12, 216
-	LONG $0xe466fdc5                     // vpcmpgtd    ymm4, ymm0, ymm4
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6bddc5                     // vpackssdw    ymm3, ymm4, ymm3
-	LONG $0x00fde3c4; WORD $0xd8db       // vpermq    ymm3, ymm3, 216
-	LONG $0xdb639dc5                     // vpacksswb    ymm3, ymm12, ymm3
-	LONG $0xdedbe5c5                     // vpand    ymm3, ymm3, ymm6
-	LONG $0xd97ef9c5                     // vmovd    ecx, xmm3
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_589
-	LONG $0x787de2c4; WORD $0x1724       // vpbroadcastb    ymm4, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01d9       // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_995
-
-LBB0_590:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02d9 // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_592
-
-LBB0_591:
-	LONG $0x2059e3c4; WORD $0x1f34; BYTE $0x02 // vpinsrb    xmm6, xmm4, byte [rdi + rbx], 2
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_592:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03d9             // vpextrb    ecx, xmm3, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_593
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2059e3c4; WORD $0x0f34; BYTE $0x03 // vpinsrb    xmm6, xmm4, byte [rdi + rcx], 3
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x04d9             // vpextrb    ecx, xmm3, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_997
-
-LBB0_594:
-	LONG $0x1479e3c4; WORD $0x05d9 // vpextrb    ecx, xmm3, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_595
-
-LBB0_998:
-	LONG $0x2059e3c4; WORD $0x1734; BYTE $0x05 // vpinsrb    xmm6, xmm4, byte [rdi + rdx], 5
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x06d9             // vpextrb    ecx, xmm3, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_999
-
-LBB0_596:
-	LONG $0x1479e3c4; WORD $0x07d9 // vpextrb    ecx, xmm3, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_597
-
-LBB0_1000:
-	LONG $0x2059a3c4; WORD $0x0f34; BYTE $0x07 // vpinsrb    xmm6, xmm4, byte [rdi + r9], 7
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x08d9             // vpextrb    ecx, xmm3, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1001
-
-LBB0_598:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09d9 // vpextrb    ecx, xmm3, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_600
-
-LBB0_599:
-	LONG $0x2059a3c4; WORD $0x3f34; BYTE $0x09 // vpinsrb    xmm6, xmm4, byte [rdi + r15], 9
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_600:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0ad9             // vpextrb    ecx, xmm3, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_601
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0a // vpinsrb    xmm6, xmm4, byte [rdi + rax], 10
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x0bd9             // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1003
-
-LBB0_602:
-	LONG $0x1479e3c4; WORD $0x0cd9 // vpextrb    ecx, xmm3, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_603
-
-LBB0_1004:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0c // vpinsrb    xmm6, xmm4, byte [rdi + rax], 12
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x0dd9             // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_604
-	JMP  LBB0_605
-
-LBB0_589:
-	LONG $0x1479e3c4; WORD $0x01d9 // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_590
-
-LBB0_995:
-	LONG $0x2059e3c4; WORD $0x3734; BYTE $0x01 // vpinsrb    xmm6, xmm4, byte [rdi + rsi], 1
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02d9             // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_591
-	JMP  LBB0_592
-
-LBB0_593:
-	LONG $0x1479e3c4; WORD $0x04d9 // vpextrb    ecx, xmm3, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_594
-
-LBB0_997:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2059e3c4; WORD $0x0f34; BYTE $0x04 // vpinsrb    xmm6, xmm4, byte [rdi + rcx], 4
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x05d9             // vpextrb    ecx, xmm3, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_998
-
-LBB0_595:
-	LONG $0x1479e3c4; WORD $0x06d9 // vpextrb    ecx, xmm3, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_596
-
-LBB0_999:
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x06 // vpinsrb    xmm6, xmm4, byte [rdi + rax], 6
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x07d9             // vpextrb    ecx, xmm3, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1000
-
-LBB0_597:
-	LONG $0x1479e3c4; WORD $0x08d9 // vpextrb    ecx, xmm3, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_598
-
-LBB0_1001:
-	LONG $0x2059e3c4; WORD $0x3734; BYTE $0x08 // vpinsrb    xmm6, xmm4, byte [rdi + rsi], 8
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09d9             // vpextrb    ecx, xmm3, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_599
-	JMP  LBB0_600
-
-LBB0_601:
-	LONG $0x1479e3c4; WORD $0x0bd9 // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_602
-
-LBB0_1003:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0b // vpinsrb    xmm6, xmm4, byte [rdi + rax], 11
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x0cd9             // vpextrb    ecx, xmm3, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1004
-
-LBB0_603:
-	LONG $0x1479e3c4; WORD $0x0dd9 // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_605
-
-LBB0_604:
-	LONG $0x2059e3c4; WORD $0x1734; BYTE $0x0d // vpinsrb    xmm6, xmm4, byte [rdi + rdx], 13
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_605:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ed9             // vpextrb    ecx, xmm3, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_607
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0e // vpinsrb    xmm6, xmm4, byte [rdi + rax], 14
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_607:
-	LONG $0x1479e3c4; WORD $0x0fd9             // vpextrb    ecx, xmm3, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_609
-	LONG $0x2059a3c4; WORD $0x1734; BYTE $0x0f // vpinsrb    xmm6, xmm4, byte [rdi + r10], 15
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_609:
-	LONG $0x397de3c4; WORD $0x01de             // vextracti128    xmm6, ymm3, 1
-	LONG $0xf07ef9c5                           // vmovd    eax, xmm6
-	LONG $0x00248489; WORD $0x0002; BYTE $0x00 // mov    dword [rsp + 512], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_611
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x00 // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 0
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-
-LBB0_611:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01f1             // vpextrb    ecx, xmm6, 1
-	LONG $0xe0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 480], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_612
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm1, byte [rdi + rsi], 1
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	LONG $0xc0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 448], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1006
-
-LBB0_613:
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	LONG $0xa0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 416], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_614
-
-LBB0_1007:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071a3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm1, byte [rdi + r9], 3
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	LONG $0x80248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 384], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1008
-
-LBB0_615:
-	LONG $0x1479e3c4; WORD $0x05f0             // vpextrb    eax, xmm6, 5
-	LONG $0x60248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 352], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_617
-
-LBB0_616:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071a3c4; WORD $0x2f0c; BYTE $0x05 // vpinsrb    xmm1, xmm1, byte [rdi + r13], 5
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-
-LBB0_617:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	QUAD $0x000000b024948b48                   // mov    rdx, qword [rsp + 176]
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	LONG $0x40248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 320], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_618
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 6
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x07f0             // vpextrb    eax, xmm6, 7
-	LONG $0x98248489; WORD $0x0000; BYTE $0x00 // mov    dword [rsp + 152], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_1010
-
-LBB0_619:
-	LONG $0x1479c3c4; WORD $0x08f1 // vpextrb    r9d, xmm6, 8
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_620
-
-LBB0_1011:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 8
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1012
-
-LBB0_621:
-	LONG $0x1479c3c4; WORD $0x0af3 // vpextrb    r11d, xmm6, 10
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_622
-
-LBB0_1013:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm1, byte [rdi + rax], 10
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x0bf0             // vpextrb    eax, xmm6, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_1014
-
-LBB0_623:
-	LONG $0x1479e3c4; WORD $0x0cf6 // vpextrb    esi, xmm6, 12
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_624
-
-LBB0_1015:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0c // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 12
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x0df2             // vpextrb    r10d, xmm6, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_1016
-
-LBB0_625:
-	QUAD $0x000000a824948b48       // mov    rdx, qword [rsp + 168]
-	LONG $0x1479c3c4; WORD $0x0ef5 // vpextrb    r13d, xmm6, 14
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_626
-
-LBB0_1017:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0e // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 14
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	QUAD $0x000000a024948b48                   // mov    rdx, qword [rsp + 160]
-	LONG $0x1479c3c4; WORD $0x0ff6             // vpextrb    r14d, xmm6, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_627
-	JMP  LBB0_628
-
-LBB0_612:
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	LONG $0xc0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 448], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_613
-
-LBB0_1006:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 2
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	LONG $0xa0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 416], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1007
-
-LBB0_614:
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	LONG $0x80248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 384], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_615
-
-LBB0_1008:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x04 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 4
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x05f0             // vpextrb    eax, xmm6, 5
-	LONG $0x60248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 352], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_616
-	JMP  LBB0_617
-
-LBB0_618:
-	LONG $0x1479e3c4; WORD $0x07f0             // vpextrb    eax, xmm6, 7
-	LONG $0x98248489; WORD $0x0000; BYTE $0x00 // mov    dword [rsp + 152], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_619
-
-LBB0_1010:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 7
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x08f1             // vpextrb    r9d, xmm6, 8
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_1011
-
-LBB0_620:
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_621
-
-LBB0_1012:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x09 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 9
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x0af3             // vpextrb    r11d, xmm6, 10
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_1013
-
-LBB0_622:
-	LONG $0x1479e3c4; WORD $0x0bf0 // vpextrb    eax, xmm6, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_623
-
-LBB0_1014:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000c024b48b48                   // mov    rsi, qword [rsp + 192]
-	LONG $0x2071e3c4; WORD $0x370c; BYTE $0x0b // vpinsrb    xmm1, xmm1, byte [rdi + rsi], 11
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x0cf6             // vpextrb    esi, xmm6, 12
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_1015
-
-LBB0_624:
-	LONG $0x1479c3c4; WORD $0x0df2 // vpextrb    r10d, xmm6, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_625
-
-LBB0_1016:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 13
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	QUAD $0x000000a824948b48                   // mov    rdx, qword [rsp + 168]
-	LONG $0x1479c3c4; WORD $0x0ef5             // vpextrb    r13d, xmm6, 14
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_1017
-
-LBB0_626:
-	QUAD $0x000000a024948b48       // mov    rdx, qword [rsp + 160]
-	LONG $0x1479c3c4; WORD $0x0ff6 // vpextrb    r14d, xmm6, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_628
-
-LBB0_627:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 15
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-
-LBB0_628:
-	LONG $0xd471f5c5; BYTE $0x07               // vpsrlw    ymm1, ymm4, 7
-	QUAD $0x00000080a5dbf5c5                   // vpand    ymm4, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xdf               // vmovd    r15d, xmm3
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_629
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm4, 0
-	LONG $0x1479e3c4; WORD $0x01db             // vpextrb    ebx, xmm3, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1019
-
-LBB0_630:
-	LONG $0x1479e3c4; WORD $0x02db // vpextrb    ebx, xmm3, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_631
-
-LBB0_1020:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm4, 2
-	LONG $0x1479e3c4; WORD $0x03db             // vpextrb    ebx, xmm3, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1021
-
-LBB0_632:
-	LONG $0x1479e3c4; WORD $0x04db // vpextrb    ebx, xmm3, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_633
-
-LBB0_1022:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm4, 4
-	LONG $0x1479e3c4; WORD $0x05db             // vpextrb    ebx, xmm3, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1023
-
-LBB0_634:
-	LONG $0x1479e3c4; WORD $0x06db // vpextrb    ebx, xmm3, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_635
-
-LBB0_1024:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm4, 6
-	LONG $0x1479e3c4; WORD $0x07db             // vpextrb    ebx, xmm3, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1025
-
-LBB0_636:
-	LONG $0x1479e3c4; WORD $0x08db // vpextrb    ebx, xmm3, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_637
-
-LBB0_1026:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm4, 8
-	LONG $0x1479e3c4; WORD $0x09db             // vpextrb    ebx, xmm3, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1027
-
-LBB0_638:
-	LONG $0x1479e3c4; WORD $0x0adb // vpextrb    ebx, xmm3, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_639
-
-LBB0_1028:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm4, 10
-	LONG $0x1479e3c4; WORD $0x0bdb             // vpextrb    ebx, xmm3, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1029
-
-LBB0_640:
-	LONG $0x1479e3c4; WORD $0x0cdb // vpextrb    ebx, xmm3, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_641
-
-LBB0_1030:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm4, 12
-	LONG $0x1479e3c4; WORD $0x0ddb             // vpextrb    ebx, xmm3, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000380248c6f7dc5; BYTE $0x00       // vmovdqa    ymm9, yword [rsp + 896]
-	JNE  LBB0_1031
-
-LBB0_642:
-	LONG $0x1479e3c4; WORD $0x0edb // vpextrb    ebx, xmm3, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_643
-
-LBB0_1032:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm4, 14
-	LONG $0x1479e3c4; WORD $0x0fdb             // vpextrb    ebx, xmm3, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1033
-
-LBB0_644:
-	QUAD $0x01000002002484f6             // test    byte [rsp + 512], 1
-	QUAD $0x000340249c6ffdc5; BYTE $0x00 // vmovdqa    ymm3, yword [rsp + 832]
-	JE   LBB0_645
-
-LBB0_1034:
-	LONG $0x7ef961c4; BYTE $0xc3               // vmovq    rbx, xmm8
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	QUAD $0x01000001e02484f6                   // test    byte [rsp + 480], 1
-	JNE  LBB0_1035
-
-LBB0_646:
-	QUAD $0x01000001c02484f6 // test    byte [rsp + 448], 1
-	JE   LBB0_647
-
-LBB0_1036:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	QUAD $0x01000001a02484f6                   // test    byte [rsp + 416], 1
-	JNE  LBB0_1037
-
-LBB0_648:
-	QUAD $0x01000001802484f6 // test    byte [rsp + 384], 1
-	JE   LBB0_649
-
-LBB0_1038:
-	LONG $0x7ef9e1c4; BYTE $0xfb               // vmovq    rbx, xmm7
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	QUAD $0x01000001602484f6                   // test    byte [rsp + 352], 1
-	QUAD $0x00036024846f7dc5; BYTE $0x00       // vmovdqa    ymm8, yword [rsp + 864]
-	JNE  LBB0_1039
-
-LBB0_650:
-	QUAD $0x01000001402484f6 // test    byte [rsp + 320], 1
-	JE   LBB0_651
-
-LBB0_1040:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	QUAD $0x01000000982484f6                   // test    byte [rsp + 152], 1
-	JNE  LBB0_1041
-
-LBB0_652:
-	LONG $0x01c1f641             // test    r9b, 1
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	JE   LBB0_653
-
-LBB0_1042:
-	LONG $0x7ef9e1c4; BYTE $0xd2               // vmovq    rdx, xmm2
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1043
-
-LBB0_654:
-	LONG $0x01c3f641         // test    r11b, 1
-	QUAD $0x00000130249c8b4c // mov    r11, qword [rsp + 304]
-	JE   LBB0_655
-
-LBB0_1044:
-	LONG $0x397de3c4; WORD $0x01d1             // vextracti128    xmm1, ymm2, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_1045
-
-LBB0_656:
-	LONG $0x01c6f640 // test    sil, 1
-	JE   LBB0_657
-
-LBB0_1046:
-	LONG $0x7ef961c4; BYTE $0xf9               // vmovq    rcx, xmm15
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x00032024946ffdc5; BYTE $0x00       // vmovdqa    ymm2, yword [rsp + 800]
-	JNE  LBB0_1047
-
-LBB0_658:
-	LONG $0x01c5f641             // test    r13b, 1
-	LONG $0x24548b4c; BYTE $0x30 // mov    r10, qword [rsp + 48]
-	JE   LBB0_659
-
-LBB0_1048:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	JE   LBB0_25
-	JMP  LBB0_1049
-
-LBB0_629:
-	LONG $0x1479e3c4; WORD $0x01db // vpextrb    ebx, xmm3, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_630
-
-LBB0_1019:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm4, 1
-	LONG $0x1479e3c4; WORD $0x02db             // vpextrb    ebx, xmm3, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1020
-
-LBB0_631:
-	LONG $0x1479e3c4; WORD $0x03db // vpextrb    ebx, xmm3, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_632
-
-LBB0_1021:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm4, 3
-	LONG $0x1479e3c4; WORD $0x04db             // vpextrb    ebx, xmm3, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1022
-
-LBB0_633:
-	LONG $0x1479e3c4; WORD $0x05db // vpextrb    ebx, xmm3, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_634
-
-LBB0_1023:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm4, 5
-	LONG $0x1479e3c4; WORD $0x06db             // vpextrb    ebx, xmm3, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1024
-
-LBB0_635:
-	LONG $0x1479e3c4; WORD $0x07db // vpextrb    ebx, xmm3, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_636
-
-LBB0_1025:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm4, 7
-	LONG $0x1479e3c4; WORD $0x08db             // vpextrb    ebx, xmm3, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1026
-
-LBB0_637:
-	LONG $0x1479e3c4; WORD $0x09db // vpextrb    ebx, xmm3, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_638
-
-LBB0_1027:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm4, 9
-	LONG $0x1479e3c4; WORD $0x0adb             // vpextrb    ebx, xmm3, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1028
-
-LBB0_639:
-	LONG $0x1479e3c4; WORD $0x0bdb // vpextrb    ebx, xmm3, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_640
-
-LBB0_1029:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm4, 11
-	LONG $0x1479e3c4; WORD $0x0cdb             // vpextrb    ebx, xmm3, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1030
-
-LBB0_641:
-	LONG $0x1479e3c4; WORD $0x0ddb       // vpextrb    ebx, xmm3, 13
-	WORD $0xc3f6; BYTE $0x01             // test    bl, 1
-	QUAD $0x000380248c6f7dc5; BYTE $0x00 // vmovdqa    ymm9, yword [rsp + 896]
-	JE   LBB0_642
-
-LBB0_1031:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm4, 13
-	LONG $0x1479e3c4; WORD $0x0edb             // vpextrb    ebx, xmm3, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1032
-
-LBB0_643:
-	LONG $0x1479e3c4; WORD $0x0fdb // vpextrb    ebx, xmm3, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_644
-
-LBB0_1033:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm4, 15
-	QUAD $0x01000002002484f6                   // test    byte [rsp + 512], 1
-	QUAD $0x000340249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 832]
-	JNE  LBB0_1034
-
-LBB0_645:
-	QUAD $0x01000001e02484f6 // test    byte [rsp + 480], 1
-	JE   LBB0_646
-
-LBB0_1035:
-	LONG $0x16f963c4; WORD $0x01c3             // vpextrq    rbx, xmm8, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	QUAD $0x01000001c02484f6                   // test    byte [rsp + 448], 1
-	JNE  LBB0_1036
-
-LBB0_647:
-	QUAD $0x01000001a02484f6 // test    byte [rsp + 416], 1
-	JE   LBB0_648
-
-LBB0_1037:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	QUAD $0x01000001802484f6                   // test    byte [rsp + 384], 1
-	JNE  LBB0_1038
-
-LBB0_649:
-	QUAD $0x01000001602484f6             // test    byte [rsp + 352], 1
-	QUAD $0x00036024846f7dc5; BYTE $0x00 // vmovdqa    ymm8, yword [rsp + 864]
-	JE   LBB0_650
-
-LBB0_1039:
-	LONG $0x16f9e3c4; WORD $0x01fb             // vpextrq    rbx, xmm7, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	QUAD $0x01000001402484f6                   // test    byte [rsp + 320], 1
-	JNE  LBB0_1040
-
-LBB0_651:
-	QUAD $0x01000000982484f6 // test    byte [rsp + 152], 1
-	JE   LBB0_652
-
-LBB0_1041:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	LONG $0x244c8b44; BYTE $0x10               // mov    r9d, dword [rsp + 16]
-	JNE  LBB0_1042
-
-LBB0_653:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_654
-
-LBB0_1043:
-	LONG $0x16f9e3c4; WORD $0x01d1             // vpextrq    rcx, xmm2, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c3f641                           // test    r11b, 1
-	QUAD $0x00000130249c8b4c                   // mov    r11, qword [rsp + 304]
-	JNE  LBB0_1044
-
-LBB0_655:
-	WORD $0x01a8  // test    al, 1
-	JE   LBB0_656
-
-LBB0_1045:
-	LONG $0x397de3c4; WORD $0x01d1             // vextracti128    xmm1, ymm2, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_1046
-
-LBB0_657:
-	LONG $0x01c2f641                     // test    r10b, 1
-	QUAD $0x00032024946ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword [rsp + 800]
-	JE   LBB0_658
-
-LBB0_1047:
-	LONG $0x16f963c4; WORD $0x01f9             // vpextrq    rcx, xmm15, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c5f641                           // test    r13b, 1
-	LONG $0x24548b4c; BYTE $0x30               // mov    r10, qword [rsp + 48]
-	JNE  LBB0_1048
-
-LBB0_659:
-	LONG $0x01c6f641 // test    r14b, 1
-	JE   LBB0_25
-
-LBB0_1049:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-	JMP  LBB0_25
-
-LBB0_1050:
-	WORD $0x394d; BYTE $0xd4 // cmp    r12, r10
-	JNE  LBB0_1055
-
-LBB0_1051:
-	MOVQ 960(SP), SP
-	VZEROUPPER
-	RET
-
-LBB0_1052:
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	LONG $0x24548b4c; BYTE $0x30 // mov    r10, qword [rsp + 48]
-	JMP  LBB0_1055
-
-LBB0_1054:
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	JMP  LBB0_1055
diff --git a/go/parquet/internal/utils/unpack_bool_avx2.go b/go/parquet/internal/utils/unpack_bool_avx2_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/unpack_bool_avx2.go
rename to go/parquet/internal/utils/unpack_bool_avx2_amd64.go
diff --git a/go/parquet/internal/utils/unpack_bool_avx2_amd64.s b/go/parquet/internal/utils/unpack_bool_avx2_amd64.s
new file mode 100644
index 00000000000..459ff78675d
--- /dev/null
+++ b/go/parquet/internal/utils/unpack_bool_avx2_amd64.s
@@ -0,0 +1,88 @@
+//+build !noasm !appengine
+// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
+
+TEXT ·_bytes_to_bools_avx2(SB), $0-32
+
+	MOVQ in+0(FP), DI
+	MOVQ len+8(FP), SI
+	MOVQ out+16(FP), DX
+	MOVQ outlen+24(FP), CX
+
+	WORD $0xf685             // test    esi, esi
+	JLE  LBB0_5
+	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
+	LONG $0x03e0c149         // shl    r8, 3
+	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
+	JMP  LBB0_2
+
+LBB0_4:
+	LONG $0x08c28349         // add    r10, 8
+	LONG $0x01c78348         // add    rdi, 1
+	WORD $0x394d; BYTE $0xd0 // cmp    r8, r10
+	JE   LBB0_5
+
+LBB0_2:
+	WORD $0x3941; BYTE $0xca // cmp    r10d, ecx
+	JGE  LBB0_4
+	WORD $0x8945; BYTE $0xd1 // mov    r9d, r10d
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0x0124             // and    al, 1
+	LONG $0x0a048842         // mov    byte [rdx + r9], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x01ce8348         // or    rsi, 1
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8d0             // shr    al, 1
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x02ce8348         // or    rsi, 2
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x02 // shr    al, 2
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x03ce8348         // or    rsi, 3
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x03 // shr    al, 3
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x04ce8348         // or    rsi, 4
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x04 // shr    al, 4
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x05ce8348         // or    rsi, 5
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x05 // shr    al, 5
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x06ce8348         // or    rsi, 6
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x06 // shr    al, 6
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	LONG $0x07c98349         // or    r9, 7
+	WORD $0x3941; BYTE $0xc9 // cmp    r9d, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x07 // shr    al, 7
+	LONG $0x0a048842         // mov    byte [rdx + r9], al
+	JMP  LBB0_4
+
+LBB0_5:
+	RET
diff --git a/go/parquet/internal/utils/unpack_bool_s390x.go b/go/parquet/internal/utils/unpack_bool_s390x.go
new file mode 100644
index 00000000000..d833c2b9d62
--- /dev/null
+++ b/go/parquet/internal/utils/unpack_bool_s390x.go
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+// BytesToBools when built with the noasm tag will direct to the pure go implementation
+// for converting a bitmap to a slice of bools
+func BytesToBools(in []byte, out []bool) {
+	bytesToBoolsGo(in, out)
+}
diff --git a/go/parquet/internal/utils/unpack_bool_sse4.go b/go/parquet/internal/utils/unpack_bool_sse4_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/unpack_bool_sse4.go
rename to go/parquet/internal/utils/unpack_bool_sse4_amd64.go
diff --git a/go/parquet/internal/utils/unpack_bool_sse4.s b/go/parquet/internal/utils/unpack_bool_sse4_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/unpack_bool_sse4.s
rename to go/parquet/internal/utils/unpack_bool_sse4_amd64.s
diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go
new file mode 100644
index 00000000000..92abae57dc1
--- /dev/null
+++ b/go/parquet/reader_properties.go
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet
+
+import (
+	"bytes"
+	"io"
+
+	"github.com/apache/arrow/go/arrow/ipc"
+	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
+)
+
+// ReaderProperties are used to define how the file reader will handle buffering and allocating buffers
+type ReaderProperties struct {
+	alloc memory.Allocator
+	// Default buffer size to utilize when reading chunks
+	BufferSize int64
+	// create with NewFileDecryptionProperties if dealing with an encrypted file
+	FileDecryptProps *FileDecryptionProperties
+	// If this is set to true, then the reader will use SectionReader to
+	// just use the read stream when reading data. Otherwise we will buffer
+	// the data we're going to read into memory first and then read that buffer.
+	//
+	// If reading from higher latency IO, like S3, it might improve performance to
+	// set this to true in order to read the entire row group in at once rather than
+	// make multiple smaller data requests. For low latency IO streams or if only
+	// reading small portions / subsets  of the parquet file, this can be set to false
+	// to reduce the amount of IO performed in order to avoid reading excess amounts of data.
+	BufferedStreamEnabled bool
+}
+
+// NewReaderProperties returns the default Reader Properties using the provided allocator.
+//
+// If nil is passed for the allocator, then memory.DefaultAllocator will be used.
+func NewReaderProperties(alloc memory.Allocator) *ReaderProperties {
+	if alloc == nil {
+		alloc = memory.DefaultAllocator
+	}
+	return &ReaderProperties{alloc, DefaultBufSize, nil, false}
+}
+
+// Allocator returns the allocator that the properties were initialized with
+func (r *ReaderProperties) Allocator() memory.Allocator { return r.alloc }
+
+// GetStream returns a section of the underlying reader based on whether or not BufferedStream is enabled.
+//
+// If BufferedStreamEnabled is true, it creates an io.SectionReader, otherwise it will read the entire section
+// into a buffer in memory and return a bytes.NewReader for that buffer.
+func (r *ReaderProperties) GetStream(source io.ReaderAt, start, nbytes int64) (ipc.ReadAtSeeker, error) {
+	if r.BufferedStreamEnabled {
+		return io.NewSectionReader(source, start, nbytes), nil
+	}
+
+	data := make([]byte, nbytes)
+	n, err := source.ReadAt(data, start)
+	if err != nil {
+		return nil, xerrors.Errorf("parquet: tried reading from file, but got error: %w", err)
+	}
+	if n != int(nbytes) {
+		return nil, xerrors.Errorf("parquet: tried reading %d bytes starting at position %d from file but only got %d", nbytes, start, n)
+	}
+
+	return bytes.NewReader(data), nil
+}
diff --git a/go/parquet/reader_writer_properties_test.go b/go/parquet/reader_writer_properties_test.go
new file mode 100644
index 00000000000..ed10a76bf05
--- /dev/null
+++ b/go/parquet/reader_writer_properties_test.go
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet_test
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/compress"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestReaderPropBasics(t *testing.T) {
+	props := parquet.NewReaderProperties(nil)
+	assert.Equal(t, parquet.DefaultBufSize, props.BufferSize)
+	assert.False(t, props.BufferedStreamEnabled)
+}
+
+func TestWriterPropBasics(t *testing.T) {
+	props := parquet.NewWriterProperties()
+
+	assert.Equal(t, parquet.DefaultDataPageSize, props.DataPageSize())
+	assert.Equal(t, parquet.DefaultDictionaryPageSizeLimit, props.DictionaryPageSizeLimit())
+	assert.Equal(t, parquet.V1, props.Version())
+	assert.Equal(t, parquet.DataPageV1, props.DataPageVersion())
+}
+
+func TestWriterPropAdvanced(t *testing.T) {
+	props := parquet.NewWriterProperties(
+		parquet.WithCompressionFor("gzip", compress.Codecs.Gzip),
+		parquet.WithCompressionFor("zstd", compress.Codecs.Zstd),
+		parquet.WithCompression(compress.Codecs.Snappy),
+		parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked),
+		parquet.WithEncodingFor("delta-length", parquet.Encodings.DeltaLengthByteArray),
+		parquet.WithDataPageVersion(parquet.DataPageV2))
+
+	assert.Equal(t, compress.Codecs.Gzip, props.CompressionPath(parquet.ColumnPathFromString("gzip")))
+	assert.Equal(t, compress.Codecs.Zstd, props.CompressionFor("zstd"))
+	assert.Equal(t, compress.Codecs.Snappy, props.CompressionPath(parquet.ColumnPathFromString("delta-length")))
+	assert.Equal(t, parquet.Encodings.DeltaBinaryPacked, props.EncodingFor("gzip"))
+	assert.Equal(t, parquet.Encodings.DeltaLengthByteArray, props.EncodingPath(parquet.ColumnPathFromString("delta-length")))
+	assert.Equal(t, parquet.DataPageV2, props.DataPageVersion())
+}
+
+func TestReaderPropsGetStreamInsufficient(t *testing.T) {
+	data := "shorter than expected"
+	buf := memory.NewBufferBytes([]byte(data))
+	rdr := bytes.NewReader(buf.Bytes())
+
+	props := parquet.NewReaderProperties(nil)
+	_, err := props.GetStream(rdr, 12, 15)
+	assert.Error(t, err)
+}
diff --git a/go/parquet/schema/column.go b/go/parquet/schema/column.go
new file mode 100644
index 00000000000..c33ddf0d8a6
--- /dev/null
+++ b/go/parquet/schema/column.go
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// Column encapsulates the information necessary to interpret primitive
+// column data in the context of a particular schema. We have to examine
+// the node structure of a column's path to the root in the schema tree
+// to be able to reassemble the nested structure from the repetition and
+// definition levels.
+type Column struct {
+	pnode *PrimitiveNode
+	// the maximum definition level in this column
+	// if this is > 0 then either this column or a parent column must be optional.
+	maxDefLvl int16
+	// the maximum repetition level in this column
+	// if this is > 0, then either this column or a parent column must be repeated.
+	// when the repetition level in the column data equals this value, it indicates
+	// additional elements in the innermost list.
+	maxRepLvl int16
+}
+
+// NewColumn returns a new column object for the given node with the provided
+// maximum definition and repetition levels.
+func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column {
+	return &Column{n, maxDefinitionLvl, maxRepetitionLvl}
+}
+
+// Name is the column's name
+func (c *Column) Name() string { return c.pnode.Name() }
+
+// ColumnPath returns the full path to this column from the root of the schema
+func (c *Column) ColumnPath() parquet.ColumnPath { return c.pnode.columnPath() }
+
+// Path is equivalent to ColumnPath().String() returning the dot-string version of the path
+func (c *Column) Path() string { return c.pnode.Path() }
+
+// TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column
+func (c *Column) TypeLength() int { return c.pnode.TypeLength() }
+
+func (c *Column) MaxDefinitionLevel() int16        { return c.maxDefLvl }
+func (c *Column) MaxRepetitionLevel() int16        { return c.maxRepLvl }
+func (c *Column) PhysicalType() parquet.Type       { return c.pnode.PhysicalType() }
+func (c *Column) ConvertedType() ConvertedType     { return c.pnode.convertedType }
+func (c *Column) LogicalType() LogicalType         { return c.pnode.logicalType }
+func (c *Column) ColumnOrder() parquet.ColumnOrder { return c.pnode.ColumnOrder }
+func (c *Column) String() string {
+	var bld strings.Builder
+	bld.WriteString("column descriptor = {\n")
+	fmt.Fprintf(&bld, "  name: %s,\n", c.Name())
+	fmt.Fprintf(&bld, "  path: %s,\n", c.Path())
+	fmt.Fprintf(&bld, "  physical_type: %s,\n", c.PhysicalType())
+	fmt.Fprintf(&bld, "  converted_type: %s,\n", c.ConvertedType())
+	fmt.Fprintf(&bld, "  logical_type: %s,\n", c.LogicalType())
+	fmt.Fprintf(&bld, "  max_definition_level: %d,\n", c.MaxDefinitionLevel())
+	fmt.Fprintf(&bld, "  max_repetition_level: %d,\n", c.MaxRepetitionLevel())
+	if c.PhysicalType() == parquet.Types.FixedLenByteArray {
+		fmt.Fprintf(&bld, "  length: %d,\n", c.TypeLength())
+	}
+	if c.ConvertedType() == ConvertedTypes.Decimal {
+		fmt.Fprintf(&bld, "  precision: %d,\n  scale: %d,\n", c.pnode.decimalMetaData.Precision, c.pnode.decimalMetaData.Scale)
+	}
+	bld.WriteString("}")
+	return bld.String()
+}
+
+// Equals will return true if the rhs Column has the same Max Repetition and Definition levels
+// along with having the same node definition.
+func (c *Column) Equals(rhs *Column) bool {
+	return c.pnode.Equals(rhs.pnode) &&
+		c.MaxRepetitionLevel() == rhs.MaxRepetitionLevel() &&
+		c.MaxDefinitionLevel() == rhs.MaxDefinitionLevel()
+}
+
+// SchemaNode returns the underlying Node in the schema tree for this column.
+func (c *Column) SchemaNode() Node {
+	return c.pnode
+}
+
+// SortOrder returns the sort order of this column's statistics based on the
+// Logical and Converted types.
+func (c *Column) SortOrder() SortOrder {
+	if c.LogicalType() != nil {
+		return GetLogicalSortOrder(c.LogicalType(), format.Type(c.pnode.PhysicalType()))
+	}
+	return GetSortOrder(c.ConvertedType(), format.Type(c.pnode.PhysicalType()))
+}
diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go
new file mode 100644
index 00000000000..b5ceff31257
--- /dev/null
+++ b/go/parquet/schema/converted_types.go
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// ConvertedType corresponds to the ConvertedType in the parquet.Thrift,
+// with added values of None and NA for handling when these values are not
+// set in the metadata
+type ConvertedType format.ConvertedType
+
+var (
+	// ConvertedTypes is a struct containing the constants for the types
+	// to make it easy to reference them while making it clear what they are
+	ConvertedTypes = struct {
+		None            ConvertedType
+		UTF8            ConvertedType
+		Map             ConvertedType
+		MapKeyValue     ConvertedType
+		List            ConvertedType
+		Enum            ConvertedType
+		Decimal         ConvertedType
+		Date            ConvertedType
+		TimeMillis      ConvertedType
+		TimeMicros      ConvertedType
+		TimestampMillis ConvertedType
+		TimestampMicros ConvertedType
+		Uint8           ConvertedType
+		Uint16          ConvertedType
+		Uint32          ConvertedType
+		Uint64          ConvertedType
+		Int8            ConvertedType
+		Int16           ConvertedType
+		Int32           ConvertedType
+		Int64           ConvertedType
+		JSON            ConvertedType
+		BSON            ConvertedType
+		Interval        ConvertedType
+		NA              ConvertedType
+	}{
+		None:            -1, // thrift enum starts at 0, so we know this will not be used
+		UTF8:            ConvertedType(format.ConvertedType_UTF8),
+		Map:             ConvertedType(format.ConvertedType_MAP),
+		MapKeyValue:     ConvertedType(format.ConvertedType_MAP_KEY_VALUE),
+		List:            ConvertedType(format.ConvertedType_LIST),
+		Enum:            ConvertedType(format.ConvertedType_ENUM),
+		Decimal:         ConvertedType(format.ConvertedType_DECIMAL),
+		Date:            ConvertedType(format.ConvertedType_DATE),
+		TimeMillis:      ConvertedType(format.ConvertedType_TIME_MILLIS),
+		TimeMicros:      ConvertedType(format.ConvertedType_TIME_MICROS),
+		TimestampMillis: ConvertedType(format.ConvertedType_TIMESTAMP_MILLIS),
+		TimestampMicros: ConvertedType(format.ConvertedType_TIMESTAMP_MICROS),
+		Uint8:           ConvertedType(format.ConvertedType_UINT_8),
+		Uint16:          ConvertedType(format.ConvertedType_UINT_16),
+		Uint32:          ConvertedType(format.ConvertedType_UINT_32),
+		Uint64:          ConvertedType(format.ConvertedType_UINT_64),
+		Int8:            ConvertedType(format.ConvertedType_INT_8),
+		Int16:           ConvertedType(format.ConvertedType_INT_16),
+		Int32:           ConvertedType(format.ConvertedType_INT_32),
+		Int64:           ConvertedType(format.ConvertedType_INT_64),
+		JSON:            ConvertedType(format.ConvertedType_JSON),
+		BSON:            ConvertedType(format.ConvertedType_BSON),
+		Interval:        ConvertedType(format.ConvertedType_INTERVAL),
+		NA:              24, // should always be the last values after Interval
+	}
+)
+
+func (p ConvertedType) String() string {
+	switch p {
+	case ConvertedTypes.None:
+		return "NONE"
+	case ConvertedTypes.NA:
+		return "UNKNOWN"
+	default:
+		return format.ConvertedType(p).String()
+	}
+}
+
+// ToLogicalType returns the correct LogicalType for the given ConvertedType, using the decimal
+// metadata provided to define the precision/scale if necessary
+func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) LogicalType {
+	switch p {
+	case ConvertedTypes.UTF8:
+		return StringLogicalType{}
+	case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
+		return MapLogicalType{}
+	case ConvertedTypes.List:
+		return ListLogicalType{}
+	case ConvertedTypes.Enum:
+		return EnumLogicalType{}
+	case ConvertedTypes.Decimal:
+		return NewDecimalLogicalType(convertedDecimal.Precision, convertedDecimal.Scale)
+	case ConvertedTypes.Date:
+		return DateLogicalType{}
+	case ConvertedTypes.TimeMillis:
+		return NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitMillis)
+	case ConvertedTypes.TimeMicros:
+		return NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
+	case ConvertedTypes.TimestampMillis:
+		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMillis)
+		t.(*TimestampLogicalType).fromConverted = true
+		return t
+	case ConvertedTypes.TimestampMicros:
+		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
+		t.(*TimestampLogicalType).fromConverted = true
+		return t
+	case ConvertedTypes.Interval:
+		return IntervalLogicalType{}
+	case ConvertedTypes.Int8:
+		return NewIntLogicalType(8 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Int16:
+		return NewIntLogicalType(16 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Int32:
+		return NewIntLogicalType(32 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Int64:
+		return NewIntLogicalType(64 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Uint8:
+		return NewIntLogicalType(8 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.Uint16:
+		return NewIntLogicalType(16 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.Uint32:
+		return NewIntLogicalType(32 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.Uint64:
+		return NewIntLogicalType(64 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.JSON:
+		return JSONLogicalType{}
+	case ConvertedTypes.BSON:
+		return BSONLogicalType{}
+	case ConvertedTypes.None:
+		return NoLogicalType{}
+	case ConvertedTypes.NA:
+		fallthrough
+	default:
+		return UnknownLogicalType{}
+	}
+}
+
+// GetSortOrder defaults to the sort order based on the physical type if convert
+// is ConvertedTypes.None, otherwise determines the sort order by the converted type.
+func GetSortOrder(convert ConvertedType, primitive format.Type) SortOrder {
+	if convert == ConvertedTypes.None {
+		return DefaultSortOrder(primitive)
+	}
+	switch convert {
+	case ConvertedTypes.Int8,
+		ConvertedTypes.Int16,
+		ConvertedTypes.Int32,
+		ConvertedTypes.Int64,
+		ConvertedTypes.Date,
+		ConvertedTypes.TimeMicros,
+		ConvertedTypes.TimeMillis,
+		ConvertedTypes.TimestampMicros,
+		ConvertedTypes.TimestampMillis,
+		ConvertedTypes.Decimal:
+		return SortSIGNED
+	case ConvertedTypes.Uint8,
+		ConvertedTypes.Uint16,
+		ConvertedTypes.Uint32,
+		ConvertedTypes.Uint64,
+		ConvertedTypes.Enum,
+		ConvertedTypes.UTF8,
+		ConvertedTypes.BSON,
+		ConvertedTypes.JSON:
+		return SortUNSIGNED
+	case ConvertedTypes.List,
+		ConvertedTypes.Map,
+		ConvertedTypes.MapKeyValue,
+		ConvertedTypes.Interval,
+		ConvertedTypes.None,
+		ConvertedTypes.NA:
+		return SortUNKNOWN
+	default:
+		return SortUNKNOWN
+	}
+}
diff --git a/go/parquet/schema/converted_types_test.go b/go/parquet/schema/converted_types_test.go
new file mode 100644
index 00000000000..86e0cb023e3
--- /dev/null
+++ b/go/parquet/schema/converted_types_test.go
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestConvertedTypesToString(t *testing.T) {
+	assert.Equal(t, "NONE", schema.ConvertedTypes.None.String())
+	assert.Equal(t, "UTF8", schema.ConvertedTypes.UTF8.String())
+	assert.Equal(t, "MAP", schema.ConvertedTypes.Map.String())
+	assert.Equal(t, "MAP_KEY_VALUE", schema.ConvertedTypes.MapKeyValue.String())
+	assert.Equal(t, "LIST", schema.ConvertedTypes.List.String())
+	assert.Equal(t, "ENUM", schema.ConvertedTypes.Enum.String())
+	assert.Equal(t, "DECIMAL", schema.ConvertedTypes.Decimal.String())
+	assert.Equal(t, "DATE", schema.ConvertedTypes.Date.String())
+	assert.Equal(t, "TIME_MILLIS", schema.ConvertedTypes.TimeMillis.String())
+	assert.Equal(t, "TIME_MICROS", schema.ConvertedTypes.TimeMicros.String())
+	assert.Equal(t, "TIMESTAMP_MILLIS", schema.ConvertedTypes.TimestampMillis.String())
+	assert.Equal(t, "TIMESTAMP_MICROS", schema.ConvertedTypes.TimestampMicros.String())
+	assert.Equal(t, "UINT_8", schema.ConvertedTypes.Uint8.String())
+	assert.Equal(t, "UINT_16", schema.ConvertedTypes.Uint16.String())
+	assert.Equal(t, "UINT_32", schema.ConvertedTypes.Uint32.String())
+	assert.Equal(t, "UINT_64", schema.ConvertedTypes.Uint64.String())
+	assert.Equal(t, "INT_8", schema.ConvertedTypes.Int8.String())
+	assert.Equal(t, "INT_16", schema.ConvertedTypes.Int16.String())
+	assert.Equal(t, "INT_32", schema.ConvertedTypes.Int32.String())
+	assert.Equal(t, "INT_64", schema.ConvertedTypes.Int64.String())
+	assert.Equal(t, "JSON", schema.ConvertedTypes.JSON.String())
+	assert.Equal(t, "BSON", schema.ConvertedTypes.BSON.String())
+	assert.Equal(t, "INTERVAL", schema.ConvertedTypes.Interval.String())
+}
diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go
new file mode 100644
index 00000000000..70df2a3d0b0
--- /dev/null
+++ b/go/parquet/schema/helpers.go
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"github.com/apache/arrow/go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// ListOf is a convenience helper function to create a properly structured
+// list structure according to the Parquet Spec.
+//
+// <list-repetition> group <name> (LIST) {
+//   repeated group list {
+//     <element-repetition> <element-type> element;
+//   }
+// }
+//
+// <list-repetition> can only be optional or required. panics if repeated.
+// <element-repetition> can only be optional or required. panics if repeated.
+func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
+	if rep == parquet.Repetitions.Repeated || n.RepetitionType() == parquet.Repetitions.Repeated {
+		return nil, xerrors.New("parquet: listof repetition and element repetition must not be repeated.")
+	}
+	listName := n.Name()
+
+	switch n := n.(type) {
+	case *PrimitiveNode:
+		n.name = "element"
+	case *GroupNode:
+		n.name = "element"
+	}
+
+	list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{n}, -1 /* fieldID */)
+	if err != nil {
+		return nil, err
+	}
+	return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID)
+}
+
+// MapOf is a convenience helper function to create a properly structured
+// parquet map node setup according to the Parquet Spec.
+//
+// <map-repetition> group <name> (MAP) {
+// 	 repeated group key_value {
+// 	   required <key-type> key;
+//     <value-repetition> <value-type> value;
+//   }
+// }
+//
+// key node will be renamed to "key", value node if not nil will be renamed to "value"
+//
+// <map-repetition> must be only optional or required. panics if repeated is passed.
+//
+// the key node *must* be required repetition. panics if optional or repeated
+//
+// value node can be nil (omitted) or have a repetition of required or optional *only*.
+// panics if value node is not nil and has a repetition of repeated.
+func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error) {
+	if mapRep == parquet.Repetitions.Repeated {
+		return nil, xerrors.New("parquet: map repetition cannot be Repeated")
+	}
+	if key.RepetitionType() != parquet.Repetitions.Required {
+		return nil, xerrors.New("parquet: map key repetition must be Required")
+	}
+	if value != nil {
+		if value.RepetitionType() == parquet.Repetitions.Repeated {
+			return nil, xerrors.New("parquet: map value cannot have repetition Repeated")
+		}
+		switch value := value.(type) {
+		case *PrimitiveNode:
+			value.name = "value"
+		case *GroupNode:
+			value.name = "value"
+		}
+	}
+
+	switch key := key.(type) {
+	case *PrimitiveNode:
+		key.name = "key"
+	case *GroupNode:
+		key.name = "key"
+	}
+
+	keyval := FieldList{key}
+	if value != nil {
+		keyval = append(keyval, value)
+	}
+
+	kvNode, err := NewGroupNode("key_value" /* name */, parquet.Repetitions.Repeated, keyval, -1 /* fieldID */)
+	if err != nil {
+		return nil, err
+	}
+	return NewGroupNodeLogical(name, mapRep, FieldList{kvNode}, MapLogicalType{}, fieldID)
+}
diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go
new file mode 100644
index 00000000000..0c1a6ab3560
--- /dev/null
+++ b/go/parquet/schema/helpers_test.go
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestListOf(t *testing.T) {
+	n := schema.NewByteArrayNode("str", parquet.Repetitions.Required, 1)
+	list, err := schema.ListOf(n, parquet.Repetitions.Optional, 2)
+
+	assert.NoError(t, err)
+	assert.Equal(t, "str", list.Name())
+	assert.Equal(t, parquet.Repetitions.Optional, list.RepetitionType())
+	assert.Equal(t, 1, list.NumFields())
+	assert.EqualValues(t, 2, list.FieldID())
+	assert.IsType(t, &schema.GroupNode{}, list.Field(0))
+	assert.Equal(t, "list", list.Field(0).Name())
+	assert.Equal(t, 1, list.Field(0).(*schema.GroupNode).NumFields())
+	assert.Same(t, n, list.Field(0).(*schema.GroupNode).Field(0))
+	assert.Equal(t, "element", list.Field(0).(*schema.GroupNode).Field(0).Name())
+}
+
+func TestListOfNested(t *testing.T) {
+	n, err := schema.ListOf(schema.NewInt32Node("arrays", parquet.Repetitions.Required, -1), parquet.Repetitions.Required, -1)
+	assert.NoError(t, err)
+	final, err := schema.ListOf(n, parquet.Repetitions.Required, -1)
+	assert.NoError(t, err)
+
+	var buf bytes.Buffer
+	schema.PrintSchema(final, &buf, 4)
+	assert.Equal(t,
+		`required group field_id=-1 arrays (List) {
+    repeated group field_id=-1 list {
+        required group field_id=-1 element (List) {
+            repeated group field_id=-1 list {
+                required int32 field_id=-1 element;
+            }
+        }
+    }
+}`, strings.TrimSpace(buf.String()))
+}
+
+func TestMapOfNestedTypes(t *testing.T) {
+	n, err := schema.NewGroupNode("student", parquet.Repetitions.Required, schema.FieldList{
+		schema.NewByteArrayNode("name", parquet.Repetitions.Required, -1),
+		schema.NewInt32Node("age", parquet.Repetitions.Optional, -1),
+	}, -1)
+	assert.NoError(t, err)
+
+	grp, err := schema.NewGroupNode("classes", parquet.Repetitions.Optional, schema.FieldList{
+		schema.NewInt32Node("a", parquet.Repetitions.Repeated, -1),
+		schema.NewFloat32Node("b", parquet.Repetitions.Repeated, -1),
+	}, -1)
+	assert.NoError(t, err)
+
+	classes, err := schema.ListOf(grp, parquet.Repetitions.Optional, -1)
+	assert.NoError(t, err)
+
+	m, err := schema.MapOf("studentmap", n, classes, parquet.Repetitions.Required, 1)
+	assert.NoError(t, err)
+
+	var buf bytes.Buffer
+	schema.PrintSchema(m, &buf, 4)
+	assert.Equal(t,
+		`required group field_id=1 studentmap (Map) {
+    repeated group field_id=-1 key_value {
+        required group field_id=-1 key {
+            required byte_array field_id=-1 name;
+            optional int32 field_id=-1 age;
+        }
+        optional group field_id=-1 value (List) {
+            repeated group field_id=-1 list {
+                optional group field_id=-1 element {
+                    repeated int32 field_id=-1 a;
+                    repeated float field_id=-1 b;
+                }
+            }
+        }
+    }
+}`, strings.TrimSpace(buf.String()))
+}
diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go
new file mode 100644
index 00000000000..b425c895d84
--- /dev/null
+++ b/go/parquet/schema/logical_types.go
@@ -0,0 +1,1097 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"encoding/json"
+	"fmt"
+	"math"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/debug"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// DecimalMetadata is a struct for managing scale and precision information between
+// converted and logical types.
+type DecimalMetadata struct {
+	IsSet     bool
+	Scale     int32
+	Precision int32
+}
+
+func getLogicalType(l *format.LogicalType) LogicalType {
+	switch {
+	case l.IsSetSTRING():
+		return StringLogicalType{}
+	case l.IsSetMAP():
+		return MapLogicalType{}
+	case l.IsSetLIST():
+		return ListLogicalType{}
+	case l.IsSetENUM():
+		return EnumLogicalType{}
+	case l.IsSetDECIMAL():
+		return &DecimalLogicalType{typ: l.DECIMAL}
+	case l.IsSetDATE():
+		return DateLogicalType{}
+	case l.IsSetTIME():
+		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
+			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
+		}
+		return &TimeLogicalType{typ: l.TIME}
+	case l.IsSetTIMESTAMP():
+		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
+			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
+		}
+		return &TimestampLogicalType{typ: l.TIMESTAMP}
+	case l.IsSetINTEGER():
+		return &IntLogicalType{typ: l.INTEGER}
+	case l.IsSetUNKNOWN():
+		return NullLogicalType{}
+	case l.IsSetJSON():
+		return JSONLogicalType{}
+	case l.IsSetBSON():
+		return BSONLogicalType{}
+	case l.IsSetUUID():
+		return UUIDLogicalType{}
+	case l == nil:
+		return NoLogicalType{}
+	default:
+		panic("invalid logical type")
+	}
+}
+
+// TimeUnitType is an enum for denoting whether a time based logical type
+// is using milliseconds, microseconds or nanoseconds.
+type TimeUnitType int
+
+// Constants for the TimeUnitType
+const (
+	TimeUnitMillis TimeUnitType = iota
+	TimeUnitMicros
+	TimeUnitNanos
+	TimeUnitUnknown
+)
+
+// LogicalType is the descriptor that defines the usage of a physical primitive
+// type in the schema, such as an Interval, Date, etc.
+type LogicalType interface {
+	// Returns true if a nested type like List or Map
+	IsNested() bool
+	// Returns true if this type can be serialized, ie: not Unknown/NoType/Interval
+	IsSerialized() bool
+	// Returns true if not NoLogicalType
+	IsValid() bool
+	// Returns true if it is NoType
+	IsNone() bool
+	// returns a string representation of the Logical Type
+	String() string
+	toThrift() *format.LogicalType
+	// Return the equivalent ConvertedType for legacy Parquet systems
+	ToConvertedType() (ConvertedType, DecimalMetadata)
+	// Returns true if the specified ConvertedType is compatible with this
+	// logical type
+	IsCompatible(ConvertedType, DecimalMetadata) bool
+	// Returns true if this logical type can be used with the provided physical type
+	IsApplicable(t parquet.Type, tlen int32) bool
+	// Returns true if the logical types are the same
+	Equals(LogicalType) bool
+	// Returns the default stat sort order for this logical type
+	SortOrder() SortOrder
+}
+
+// TemporalLogicalType is a smaller interface for Time based logical types
+// like Time / Timestamp
+type TemporalLogicalType interface {
+	LogicalType
+	IsAdjustedToUTC() bool
+	TimeUnit() TimeUnitType
+}
+
+// SortOrder mirrors the parquet.thrift sort order type
+type SortOrder int8
+
+// Constants for the Stat sort order definitions
+const (
+	SortSIGNED SortOrder = iota
+	SortUNSIGNED
+	SortUNKNOWN
+)
+
+// DefaultSortOrder returns the default stat sort order for the given physical type
+func DefaultSortOrder(primitive format.Type) SortOrder {
+	switch primitive {
+	case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE:
+		return SortSIGNED
+	case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY:
+		return SortUNSIGNED
+	case format.Type_INT96:
+		fallthrough
+	default:
+		return SortUNKNOWN
+	}
+}
+
+// GetLogicalSortOrder returns the default sort order for this logical type
+// or falls back to the default sort order for the physical type if not valid
+func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder {
+	switch {
+	case logical == nil || !logical.IsValid():
+		return SortUNKNOWN
+	case logical.Equals(NoLogicalType{}):
+		return DefaultSortOrder(primitive)
+	default:
+		return logical.SortOrder()
+	}
+}
+
+type baseLogicalType struct{}
+
+func (baseLogicalType) IsSerialized() bool {
+	return true
+}
+
+func (baseLogicalType) IsValid() bool {
+	return true
+}
+
+func (baseLogicalType) IsNested() bool {
+	return false
+}
+
+func (baseLogicalType) IsNone() bool { return false }
+
+// StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
+type StringLogicalType struct{ baseLogicalType }
+
+func (StringLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (StringLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()})
+}
+
+func (StringLogicalType) String() string {
+	return "String"
+}
+
+func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.UTF8, DecimalMetadata{}
+}
+
+func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.UTF8 && !dec.IsSet
+}
+
+func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (StringLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{STRING: format.NewStringType()}
+}
+
+func (StringLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(StringLogicalType)
+	return ok
+}
+
+// MapLogicalType represents a mapped type
+type MapLogicalType struct{ baseLogicalType }
+
+func (MapLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (MapLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()})
+}
+
+func (MapLogicalType) String() string {
+	return "Map"
+}
+
+func (MapLogicalType) IsNested() bool {
+	return true
+}
+
+func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Map, DecimalMetadata{}
+}
+
+func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet
+}
+
+func (MapLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return false
+}
+
+func (MapLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{MAP: format.NewMapType()}
+}
+
+func (MapLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(MapLogicalType)
+	return ok
+}
+
+func NewListLogicalType() LogicalType {
+	return ListLogicalType{}
+}
+
+// ListLogicalType is used for columns which are themselves nested lists
+type ListLogicalType struct{ baseLogicalType }
+
+func (ListLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (ListLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()})
+}
+
+func (ListLogicalType) String() string {
+	return "List"
+}
+
+func (ListLogicalType) IsNested() bool {
+	return true
+}
+
+func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.List, DecimalMetadata{}
+}
+
+func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.List && !dec.IsSet
+}
+
+func (ListLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return false
+}
+
+func (ListLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{LIST: format.NewListType()}
+}
+
+func (ListLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(ListLogicalType)
+	return ok
+}
+
+// EnumLogicalType is for representing an enum, which should be a byte array type
+type EnumLogicalType struct{ baseLogicalType }
+
+func (EnumLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (EnumLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()})
+}
+
+func (EnumLogicalType) String() string {
+	return "Enum"
+}
+
+func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Enum, DecimalMetadata{}
+}
+
+func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.Enum && !dec.IsSet
+}
+
+func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (EnumLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{ENUM: format.NewEnumType()}
+}
+
+func (EnumLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(EnumLogicalType)
+	return ok
+}
+
+// NewDecimalLogicalType returns a Decimal logical type with the given
+// precision and scale.
+//
+// Panics if precision < 1 or scale is not in the range (0, precision)
+func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
+	if precision < 1 {
+		panic("parquet: precision must be greater than or equal to 1 for decimal logical type")
+	}
+	if scale < 0 || scale > precision {
+		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
+	}
+	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
+}
+
+// DecimalLogicalType is used to represent a decimal value of a given
+// precision and scale
+type DecimalLogicalType struct {
+	baseLogicalType
+	typ *format.DecimalType
+}
+
+func (t DecimalLogicalType) Precision() int32 {
+	return t.typ.Precision
+}
+
+func (t DecimalLogicalType) Scale() int32 {
+	return t.typ.Scale
+}
+
+func (DecimalLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (t DecimalLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale})
+}
+
+func (t DecimalLogicalType) String() string {
+	return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale)
+}
+
+func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()}
+}
+
+func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.Decimal &&
+		dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision
+}
+
+func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool {
+	switch typ {
+	case parquet.Types.Int32:
+		return 1 <= t.typ.Precision && t.typ.Precision <= 9
+	case parquet.Types.Int64:
+		if t.typ.Precision < 10 {
+			debug.Log("int64 used for decimal logical, precision is small enough to use int32")
+		}
+		return 1 <= t.typ.Precision && t.typ.Precision <= 18
+	case parquet.Types.FixedLenByteArray:
+		return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0)))))
+	case parquet.Types.ByteArray:
+		return true
+	}
+	return false
+}
+
+func (t DecimalLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{DECIMAL: t.typ}
+}
+
+func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*DecimalLogicalType)
+	if !ok {
+		return false
+	}
+	return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale
+}
+
+// DateLogicalType is an int32 representing the number of days since the Unix Epoch
+// 1 January 1970
+type DateLogicalType struct{ baseLogicalType }
+
+func (DateLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (DateLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()})
+}
+
+func (DateLogicalType) String() string {
+	return "Date"
+}
+
+func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Date, DecimalMetadata{}
+}
+
+func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.Date && !dec.IsSet
+}
+
+func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.Int32
+}
+
+func (DateLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{DATE: format.NewDateType()}
+}
+
+func (DateLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(DateLogicalType)
+	return ok
+}
+
+func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType {
+	switch {
+	case unit == nil:
+		return TimeUnitUnknown
+	case unit.IsSetMILLIS():
+		return TimeUnitMillis
+	case unit.IsSetMICROS():
+		return TimeUnitMicros
+	case unit.IsSetNANOS():
+		return TimeUnitNanos
+	default:
+		return TimeUnitUnknown
+	}
+}
+
+func timeUnitToString(unit *format.TimeUnit) string {
+	switch {
+	case unit == nil:
+		return "unknown"
+	case unit.IsSetMILLIS():
+		return "milliseconds"
+	case unit.IsSetMICROS():
+		return "microseconds"
+	case unit.IsSetNANOS():
+		return "nanoseconds"
+	default:
+		return "unknown"
+	}
+}
+
+func timeUnitFromString(v string) TimeUnitType {
+	switch v {
+	case "millis":
+		return TimeUnitMillis
+	case "micros":
+		return TimeUnitMicros
+	case "nanos":
+		return TimeUnitNanos
+	default:
+		return TimeUnitUnknown
+	}
+}
+
+func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
+	tunit := format.NewTimeUnit()
+	switch unit {
+	case TimeUnitMicros:
+		tunit.MICROS = format.NewMicroSeconds()
+	case TimeUnitMillis:
+		tunit.MILLIS = format.NewMilliSeconds()
+	case TimeUnitNanos:
+		tunit.NANOS = format.NewNanoSeconds()
+	default:
+		panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type")
+	}
+	return tunit
+}
+
+// NewTimeLogicalType returns a time type of the given unit.
+func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
+	return &TimeLogicalType{typ: &format.TimeType{
+		IsAdjustedToUTC: isAdjustedToUTC,
+		Unit:            createTimeUnit(unit),
+	}}
+}
+
+// TimeLogicalType is a time type without a date and must be an
+// int32 for milliseconds, or an int64 for micro or nano seconds.
+type TimeLogicalType struct {
+	baseLogicalType
+	typ *format.TimeType
+}
+
+func (t TimeLogicalType) IsAdjustedToUTC() bool {
+	return t.typ.IsAdjustedToUTC
+}
+
+func (t TimeLogicalType) TimeUnit() TimeUnitType {
+	return timeUnitFromThrift(t.typ.Unit)
+}
+
+func (TimeLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (t TimeLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())})
+}
+
+func (t TimeLogicalType) String() string {
+	return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()))
+}
+
+func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	unit := timeUnitFromThrift(t.typ.Unit)
+	if t.typ.IsAdjustedToUTC {
+		switch unit {
+		case TimeUnitMillis:
+			return ConvertedTypes.TimeMillis, DecimalMetadata{}
+		case TimeUnitMicros:
+			return ConvertedTypes.TimeMicros, DecimalMetadata{}
+		}
+	}
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	unit := timeUnitFromThrift(t.typ.Unit)
+	if t.typ.IsAdjustedToUTC {
+		switch unit {
+		case TimeUnitMillis:
+			return c == ConvertedTypes.TimeMillis
+		case TimeUnitMicros:
+			return c == ConvertedTypes.TimeMicros
+		}
+	}
+
+	return c == ConvertedTypes.None || c == ConvertedTypes.NA
+}
+
+func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
+	return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) ||
+		(typ == parquet.Types.Int64 &&
+			(t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS()))
+}
+
+func (t TimeLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{TIME: t.typ}
+}
+
+func (t TimeLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*TimeLogicalType)
+	if !ok {
+		return false
+	}
+	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
+		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
+}
+
+// NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
+// set to false
+func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
+	return &TimestampLogicalType{
+		typ: &format.TimestampType{
+			IsAdjustedToUTC: isAdjustedToUTC,
+			Unit:            createTimeUnit(unit),
+		},
+		forceConverted: false,
+		fromConverted:  false,
+	}
+}
+
+// NewTimestampLogicalTypeForce returns a timestamp logical type with
+// "forceConverted" set to true
+func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
+	return &TimestampLogicalType{
+		typ: &format.TimestampType{
+			IsAdjustedToUTC: isAdjustedToUTC,
+			Unit:            createTimeUnit(unit),
+		},
+		forceConverted: true,
+		fromConverted:  false,
+	}
+}
+
+// TimestampLogicalType represents an int64 number that can be decoded
+// into a year, month, day, hour, minute, second, and subsecond
+type TimestampLogicalType struct {
+	baseLogicalType
+	typ *format.TimestampType
+	// forceConverted denotes whether or not the resulting serialized
+	// type when writing to parquet will be written as the legacy
+	// ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true)
+	// or if it will write the proper current Logical Types (false, default)
+	forceConverted bool
+	// fromConverted denotes if the timestamp type was created by
+	// translating a legacy converted type of TIMESTAMP_MILLIS or
+	// TIMESTAMP_MICROS rather than by using the current logical
+	// types. Default is false.
+	fromConverted bool
+}
+
+func (t TimestampLogicalType) IsFromConvertedType() bool {
+	return t.fromConverted
+}
+
+func (t TimestampLogicalType) IsAdjustedToUTC() bool {
+	return t.typ.IsAdjustedToUTC
+}
+
+func (t TimestampLogicalType) TimeUnit() TimeUnitType {
+	return timeUnitFromThrift(t.typ.Unit)
+}
+
+func (TimestampLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (t TimestampLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"Type":                     "Timestamp",
+		"isAdjustedToUTC":          t.typ.IsAdjustedToUTC,
+		"timeUnit":                 timeUnitToString(t.typ.GetUnit()),
+		"is_from_converted_type":   t.fromConverted,
+		"force_set_converted_type": t.forceConverted,
+	})
+}
+
+func (t TimestampLogicalType) IsSerialized() bool {
+	return !t.fromConverted
+}
+
+func (t TimestampLogicalType) String() string {
+	return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)",
+		t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted)
+}
+
+func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	unit := timeUnitFromThrift(t.typ.Unit)
+	if t.typ.IsAdjustedToUTC || t.forceConverted {
+		switch unit {
+		case TimeUnitMillis:
+			return ConvertedTypes.TimestampMillis, DecimalMetadata{}
+		case TimeUnitMicros:
+			return ConvertedTypes.TimestampMicros, DecimalMetadata{}
+		}
+	}
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+
+	switch timeUnitFromThrift(t.typ.Unit) {
+	case TimeUnitMillis:
+		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
+			return c == ConvertedTypes.TimestampMillis
+		}
+	case TimeUnitMicros:
+		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
+			return c == ConvertedTypes.TimestampMicros
+		}
+	}
+
+	return c == ConvertedTypes.None || c == ConvertedTypes.NA
+}
+
+func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.Int64
+}
+
+func (t TimestampLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{TIMESTAMP: t.typ}
+}
+
+func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*TimestampLogicalType)
+	if !ok {
+		return false
+	}
+	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
+		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
+}
+
+// NewIntLogicalType creates an integer logical type of the desired bitwidth
+// and whether it is signed or not.
+//
+// Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
+func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
+	switch bitWidth {
+	case 8, 16, 32, 64:
+	default:
+		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
+	}
+	return &IntLogicalType{
+		typ: &format.IntType{
+			BitWidth: bitWidth,
+			IsSigned: signed,
+		},
+	}
+}
+
+// IntLogicalType represents an integer type of a specific bit width and
+// is either signed or unsigned.
+type IntLogicalType struct {
+	baseLogicalType
+	typ *format.IntType
+}
+
+func (t IntLogicalType) BitWidth() int8 {
+	return t.typ.BitWidth
+}
+
+func (t IntLogicalType) IsSigned() bool {
+	return t.typ.IsSigned
+}
+
+func (t IntLogicalType) SortOrder() SortOrder {
+	if t.typ.IsSigned {
+		return SortSIGNED
+	}
+	return SortUNSIGNED
+}
+
+func (t IntLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned,
+	})
+}
+
+func (t IntLogicalType) String() string {
+	return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned())
+}
+
+func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	var d DecimalMetadata
+	if t.typ.IsSigned {
+		switch t.typ.BitWidth {
+		case 8:
+			return ConvertedTypes.Int8, d
+		case 16:
+			return ConvertedTypes.Int16, d
+		case 32:
+			return ConvertedTypes.Int32, d
+		case 64:
+			return ConvertedTypes.Int64, d
+		}
+	} else {
+		switch t.typ.BitWidth {
+		case 8:
+			return ConvertedTypes.Uint8, d
+		case 16:
+			return ConvertedTypes.Uint16, d
+		case 32:
+			return ConvertedTypes.Uint32, d
+		case 64:
+			return ConvertedTypes.Uint64, d
+		}
+	}
+	return ConvertedTypes.None, d
+}
+
+func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	v, _ := t.ToConvertedType()
+	return c == v
+}
+
+func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
+	return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) ||
+		(typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64)
+}
+
+func (t IntLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{INTEGER: t.typ}
+}
+
+func (t IntLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*IntLogicalType)
+	if !ok {
+		return false
+	}
+
+	return t.typ.GetIsSigned() == other.typ.GetIsSigned() &&
+		t.typ.GetBitWidth() == other.typ.GetBitWidth()
+}
+
+// UnknownLogicalType is a type that is essentially a placeholder for when
+// we don't know the type.
+type UnknownLogicalType struct{ baseLogicalType }
+
+func (UnknownLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (UnknownLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()})
+}
+
+func (UnknownLogicalType) IsValid() bool { return false }
+
+func (UnknownLogicalType) IsSerialized() bool { return false }
+
+func (UnknownLogicalType) String() string {
+	return "Unknown"
+}
+
+func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.NA, DecimalMetadata{}
+}
+
+func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.NA && !dec.IsSet
+}
+
+func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true }
+
+func (UnknownLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{UNKNOWN: format.NewNullType()}
+}
+
+func (UnknownLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(UnknownLogicalType)
+	return ok
+}
+
+// JSONLogicalType represents a byte array column which is to be interpreted
+// as a JSON string.
+type JSONLogicalType struct{ baseLogicalType }
+
+func (JSONLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (JSONLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()})
+}
+
+func (JSONLogicalType) String() string {
+	return "JSON"
+}
+
+func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.JSON, DecimalMetadata{}
+}
+
+func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.JSON && !dec.IsSet
+}
+
+func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (JSONLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{JSON: format.NewJsonType()}
+}
+
+func (JSONLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(JSONLogicalType)
+	return ok
+}
+
+// BSONLogicalType represents a binary JSON string in the byte array
+type BSONLogicalType struct{ baseLogicalType }
+
+func (BSONLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (BSONLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()})
+}
+
+func (BSONLogicalType) String() string {
+	return "BSON"
+}
+
+func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.BSON, DecimalMetadata{}
+}
+
+func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.BSON && !dec.IsSet
+}
+
+func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (BSONLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{BSON: format.NewBsonType()}
+}
+
+func (BSONLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(BSONLogicalType)
+	return ok
+}
+
+// UUIDLogicalType can only be used with a FixedLength byte array column
+// that is exactly 16 bytes long
+type UUIDLogicalType struct{ baseLogicalType }
+
+func (UUIDLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (UUIDLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()})
+}
+
+func (UUIDLogicalType) String() string {
+	return "UUID"
+}
+
+func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	switch c {
+	case ConvertedTypes.None, ConvertedTypes.NA:
+		return true
+	}
+	return false
+}
+
+func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
+	return t == parquet.Types.FixedLenByteArray && tlen == 16
+}
+
+func (UUIDLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{UUID: format.NewUUIDType()}
+}
+
+func (UUIDLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(UUIDLogicalType)
+	return ok
+}
+
+// IntervalLogicalType is not yet in the thrift spec, but represents
+// an interval time and needs to be a fixed length byte array of 12 bytes
+type IntervalLogicalType struct{ baseLogicalType }
+
+func (IntervalLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (IntervalLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()})
+}
+
+func (IntervalLogicalType) String() string {
+	return "Interval"
+}
+
+func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Interval, DecimalMetadata{}
+}
+
+func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.Interval && !dec.IsSet
+}
+
+func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
+	return t == parquet.Types.FixedLenByteArray && tlen == 12
+}
+
+func (IntervalLogicalType) toThrift() *format.LogicalType {
+	panic("no parquet IntervalLogicalType yet implemented")
+}
+
+func (IntervalLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(IntervalLogicalType)
+	return ok
+}
+
+type NullLogicalType struct{ baseLogicalType }
+
+func (NullLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (NullLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()})
+}
+
+func (NullLogicalType) String() string {
+	return "Null"
+}
+
+func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	switch c {
+	case ConvertedTypes.None, ConvertedTypes.NA:
+		return true
+	}
+	return false
+}
+
+func (NullLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return true
+}
+
+func (NullLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{UNKNOWN: format.NewNullType()}
+}
+
+func (NullLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(NullLogicalType)
+	return ok
+}
+
+type NoLogicalType struct{ baseLogicalType }
+
+func (NoLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (NoLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()})
+}
+
+func (NoLogicalType) IsSerialized() bool { return false }
+
+func (NoLogicalType) String() string {
+	return "None"
+}
+
+func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.None && !dec.IsSet
+}
+
+func (NoLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return true
+}
+
+func (NoLogicalType) toThrift() *format.LogicalType {
+	panic("cannot convert NoLogicalType to thrift")
+}
+
+func (NoLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(NoLogicalType)
+	return ok
+}
+
+func (NoLogicalType) IsNone() bool { return true }
diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go
new file mode 100644
index 00000000000..cc2b23301df
--- /dev/null
+++ b/go/parquet/schema/logical_types_test.go
@@ -0,0 +1,559 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestConvertedLogicalEquivalences(t *testing.T) {
+	tests := []struct {
+		name      string
+		converted schema.ConvertedType
+		logical   schema.LogicalType
+		expected  schema.LogicalType
+	}{
+		{"utf8", schema.ConvertedTypes.UTF8, schema.StringLogicalType{}, schema.StringLogicalType{}},
+		{"map", schema.ConvertedTypes.Map, schema.MapLogicalType{}, schema.MapLogicalType{}},
+		{"mapkeyval", schema.ConvertedTypes.MapKeyValue, schema.MapLogicalType{}, schema.MapLogicalType{}},
+		{"list", schema.ConvertedTypes.List, schema.NewListLogicalType(), schema.NewListLogicalType()},
+		{"enum", schema.ConvertedTypes.Enum, schema.EnumLogicalType{}, schema.EnumLogicalType{}},
+		{"date", schema.ConvertedTypes.Date, schema.DateLogicalType{}, schema.DateLogicalType{}},
+		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
+		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
+		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimestampLogicalType{}},
+		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimestampLogicalType{}},
+		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"json", schema.ConvertedTypes.JSON, schema.JSONLogicalType{}, schema.JSONLogicalType{}},
+		{"bson", schema.ConvertedTypes.BSON, schema.BSONLogicalType{}, schema.BSONLogicalType{}},
+		{"interval", schema.ConvertedTypes.Interval, schema.IntervalLogicalType{}, schema.IntervalLogicalType{}},
+		{"none", schema.ConvertedTypes.None, schema.NoLogicalType{}, schema.NoLogicalType{}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			fromConverted := tt.converted.ToLogicalType(schema.DecimalMetadata{})
+			assert.IsType(t, tt.logical, fromConverted)
+			assert.True(t, fromConverted.Equals(tt.logical))
+			assert.IsType(t, tt.expected, fromConverted)
+			assert.IsType(t, tt.expected, tt.logical)
+		})
+	}
+
+	t.Run("decimal", func(t *testing.T) {
+		decimalMeta := schema.DecimalMetadata{IsSet: true, Precision: 10, Scale: 4}
+		fromConverted := schema.ConvertedTypes.Decimal.ToLogicalType(decimalMeta)
+		fromMake := schema.NewDecimalLogicalType(10, 4)
+		assert.IsType(t, fromMake, fromConverted)
+		assert.True(t, fromConverted.Equals(fromMake))
+		assert.IsType(t, &schema.DecimalLogicalType{}, fromConverted)
+		assert.IsType(t, &schema.DecimalLogicalType{}, fromMake)
+		assert.True(t, schema.NewDecimalLogicalType(16, 0).Equals(schema.NewDecimalLogicalType(16, 0)))
+	})
+}
+
+func TestConvertedTypeCompatibility(t *testing.T) {
+	tests := []struct {
+		name            string
+		logical         schema.LogicalType
+		expectConverted schema.ConvertedType
+	}{
+		{"utf8", schema.StringLogicalType{}, schema.ConvertedTypes.UTF8},
+		{"map", schema.MapLogicalType{}, schema.ConvertedTypes.Map},
+		{"list", schema.NewListLogicalType(), schema.ConvertedTypes.List},
+		{"enum", schema.EnumLogicalType{}, schema.ConvertedTypes.Enum},
+		{"date", schema.DateLogicalType{}, schema.ConvertedTypes.Date},
+		{"time_milli", schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimeMillis},
+		{"time_micro", schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimeMicros},
+		{"timestamp_milli", schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimestampMillis},
+		{"timestamp_micro", schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimestampMicros},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint8},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint16},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint32},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint64},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int8},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int16},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int32},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int64},
+		{"json", schema.JSONLogicalType{}, schema.ConvertedTypes.JSON},
+		{"bson", schema.BSONLogicalType{}, schema.ConvertedTypes.BSON},
+		{"interval", schema.IntervalLogicalType{}, schema.ConvertedTypes.Interval},
+		{"none", schema.NoLogicalType{}, schema.ConvertedTypes.None},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.True(t, tt.logical.IsValid())
+			converted, decimalMeta := tt.logical.ToConvertedType()
+			assert.Equal(t, tt.expectConverted, converted)
+			assert.False(t, decimalMeta.IsSet)
+			assert.True(t, tt.logical.IsCompatible(converted, decimalMeta))
+			assert.False(t, tt.logical.IsCompatible(converted, schema.DecimalMetadata{IsSet: true, Precision: 1, Scale: 1}))
+			reconstruct := converted.ToLogicalType(decimalMeta)
+			assert.True(t, reconstruct.IsValid())
+			assert.True(t, reconstruct.Equals(tt.logical))
+		})
+	}
+
+	var (
+		orig          schema.LogicalType
+		converted     schema.ConvertedType
+		convertedMeta schema.DecimalMetadata
+	)
+
+	orig = schema.NewDecimalLogicalType(6 /* precision */, 2 /* scale */)
+	converted, convertedMeta = orig.ToConvertedType()
+	assert.True(t, orig.IsValid())
+	assert.Equal(t, schema.ConvertedTypes.Decimal, converted)
+	assert.True(t, convertedMeta.IsSet)
+	assert.EqualValues(t, 6, convertedMeta.Precision)
+	assert.EqualValues(t, 2, convertedMeta.Scale)
+	assert.True(t, orig.IsCompatible(converted, convertedMeta))
+	reconstruct := converted.ToLogicalType(convertedMeta)
+	assert.True(t, reconstruct.IsValid())
+	assert.True(t, reconstruct.Equals(orig))
+
+	orig = schema.UnknownLogicalType{}
+	converted, convertedMeta = orig.ToConvertedType()
+	assert.False(t, orig.IsValid())
+	assert.Equal(t, schema.ConvertedTypes.NA, converted)
+	assert.False(t, convertedMeta.IsSet)
+	assert.True(t, orig.IsCompatible(converted, convertedMeta))
+	reconstruct = converted.ToLogicalType(convertedMeta)
+	assert.False(t, reconstruct.IsValid())
+	assert.True(t, reconstruct.Equals(orig))
+}
+
+func TestNewTypeIncompatibility(t *testing.T) {
+	tests := []struct {
+		name     string
+		logical  schema.LogicalType
+		expected schema.LogicalType
+	}{
+		{"uuid", schema.UUIDLogicalType{}, schema.UUIDLogicalType{}},
+		{"null", schema.NullLogicalType{}, schema.NullLogicalType{}},
+		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjutedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
+		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjutedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
+		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
+		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
+		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
+		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.IsType(t, tt.expected, tt.logical)
+			assert.True(t, tt.logical.IsValid())
+			converted, meta := tt.logical.ToConvertedType()
+			assert.Equal(t, schema.ConvertedTypes.None, converted)
+			assert.False(t, meta.IsSet)
+		})
+	}
+}
+
+func TestFactoryPanic(t *testing.T) {
+	tests := []struct {
+		name string
+		f    func()
+	}{
+		{"invalid TimeUnit", func() { schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitUnknown) }},
+		{"invalid timestamp unit", func() { schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitUnknown) }},
+		{"negative bitwidth", func() { schema.NewIntLogicalType(-1 /* bitWidth */, false /* signed */) }},
+		{"zero bitwidth", func() { schema.NewIntLogicalType(0 /* bitWidth */, false /* signed */) }},
+		{"bitwidth one", func() { schema.NewIntLogicalType(1 /* bitWidth */, false /* signed */) }},
+		{"invalid bitwidth", func() { schema.NewIntLogicalType(65 /* bitWidth */, false /* signed */) }},
+		{"negative precision", func() { schema.NewDecimalLogicalType(-1 /* precision */, 0 /* scale */) }},
+		{"zero precision", func() { schema.NewDecimalLogicalType(0 /* precision */, 0 /* scale */) }},
+		{"negative scale", func() { schema.NewDecimalLogicalType(10 /* precision */, -1 /* scale */) }},
+		{"invalid scale", func() { schema.NewDecimalLogicalType(10 /* precision */, 11 /* scale */) }},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Panics(t, tt.f)
+		})
+	}
+}
+
+func TestLogicalTypeProperties(t *testing.T) {
+	tests := []struct {
+		name       string
+		logical    schema.LogicalType
+		nested     bool
+		serialized bool
+		valid      bool
+	}{
+		{"string", schema.StringLogicalType{}, false, true, true},
+		{"map", schema.MapLogicalType{}, true, true, true},
+		{"list", schema.NewListLogicalType(), true, true, true},
+		{"enum", schema.EnumLogicalType{}, false, true, true},
+		{"decimal", schema.NewDecimalLogicalType(16 /* precision */, 6 /* scale */), false, true, true},
+		{"date", schema.DateLogicalType{}, false, true, true},
+		{"time", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), false, true, true},
+		{"timestamp", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), false, true, true},
+		{"interval", schema.IntervalLogicalType{}, false, true, true},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), false, true, true},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), false, true, true},
+		{"null", schema.NullLogicalType{}, false, true, true},
+		{"json", schema.JSONLogicalType{}, false, true, true},
+		{"bson", schema.BSONLogicalType{}, false, true, true},
+		{"uuid", schema.UUIDLogicalType{}, false, true, true},
+		{"nological", schema.NoLogicalType{}, false, false, true},
+		{"unknown", schema.UnknownLogicalType{}, false, false, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.True(t, tt.nested == tt.logical.IsNested())
+			assert.True(t, tt.serialized == tt.logical.IsSerialized())
+			assert.True(t, tt.valid == tt.logical.IsValid())
+		})
+	}
+}
+
+var physicalTypeList = []parquet.Type{
+	parquet.Types.Boolean,
+	parquet.Types.Int32,
+	parquet.Types.Int64,
+	parquet.Types.Int96,
+	parquet.Types.Float,
+	parquet.Types.Double,
+	parquet.Types.ByteArray,
+	parquet.Types.FixedLenByteArray,
+}
+
+func TestLogicalSingleTypeApplicability(t *testing.T) {
+	tests := []struct {
+		name       string
+		logical    schema.LogicalType
+		applicable parquet.Type
+	}{
+		{"string", schema.StringLogicalType{}, parquet.Types.ByteArray},
+		{"enum", schema.EnumLogicalType{}, parquet.Types.ByteArray},
+		{"date", schema.DateLogicalType{}, parquet.Types.Int32},
+		{"timemilli", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), parquet.Types.Int32},
+		{"timemicro", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), parquet.Types.Int64},
+		{"timenano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), parquet.Types.Int64},
+		{"timestampmilli", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), parquet.Types.Int64},
+		{"timestampmicro", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), parquet.Types.Int64},
+		{"timestampnanos", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), parquet.Types.Int64},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), parquet.Types.Int32},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), parquet.Types.Int32},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), parquet.Types.Int32},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), parquet.Types.Int64},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), parquet.Types.Int32},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), parquet.Types.Int32},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), parquet.Types.Int32},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), parquet.Types.Int64},
+		{"json", schema.JSONLogicalType{}, parquet.Types.ByteArray},
+		{"bson", schema.BSONLogicalType{}, parquet.Types.ByteArray},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			for _, typ := range physicalTypeList {
+				if typ == tt.applicable {
+					assert.True(t, tt.logical.IsApplicable(typ, -1))
+				} else {
+					assert.False(t, tt.logical.IsApplicable(typ, -1))
+				}
+			}
+		})
+	}
+}
+
+func TestLogicalNoTypeApplicability(t *testing.T) {
+	tests := []struct {
+		name    string
+		logical schema.LogicalType
+	}{
+		{"map", schema.MapLogicalType{}},
+		{"list", schema.NewListLogicalType()},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			for _, typ := range physicalTypeList {
+				assert.False(t, tt.logical.IsApplicable(typ, -1))
+			}
+		})
+	}
+}
+
+func TestLogicalUniversalTypeApplicability(t *testing.T) {
+	tests := []struct {
+		name    string
+		logical schema.LogicalType
+	}{
+		{"null", schema.NullLogicalType{}},
+		{"none", schema.NoLogicalType{}},
+		{"unknown", schema.UnknownLogicalType{}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			for _, typ := range physicalTypeList {
+				assert.True(t, tt.logical.IsApplicable(typ, -1))
+			}
+		})
+	}
+}
+
+func TestLogicalInapplicableTypes(t *testing.T) {
+	tests := []struct {
+		name string
+		typ  parquet.Type
+		len  int32
+	}{
+		{"fixed 8", parquet.Types.FixedLenByteArray, 8},
+		{"fixed 20", parquet.Types.FixedLenByteArray, 20},
+		{"bool", parquet.Types.Boolean, -1},
+		{"int32", parquet.Types.Int32, -1},
+		{"int64", parquet.Types.Int64, -1},
+		{"int96", parquet.Types.Int96, -1},
+		{"float", parquet.Types.Float, -1},
+		{"double", parquet.Types.Double, -1},
+		{"bytearray", parquet.Types.ByteArray, -1},
+	}
+
+	var logical schema.LogicalType
+
+	logical = schema.IntervalLogicalType{}
+	assert.True(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, 12))
+	for _, tt := range tests {
+		t.Run("interval "+tt.name, func(t *testing.T) {
+			assert.False(t, logical.IsApplicable(tt.typ, tt.len))
+		})
+	}
+
+	logical = schema.UUIDLogicalType{}
+	assert.True(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, 16))
+	for _, tt := range tests {
+		t.Run("uuid "+tt.name, func(t *testing.T) {
+			assert.False(t, logical.IsApplicable(tt.typ, tt.len))
+		})
+	}
+}
+
+func TestDecimalLogicalTypeApplicability(t *testing.T) {
+	const scale = 0
+	var logical schema.LogicalType
+	for prec := int32(1); prec <= 9; prec++ {
+		logical = schema.NewDecimalLogicalType(prec, scale)
+		assert.Truef(t, logical.IsApplicable(parquet.Types.Int32, -1), "prec: %d", prec)
+	}
+
+	logical = schema.NewDecimalLogicalType(10 /* precision */, scale)
+	assert.False(t, logical.IsApplicable(parquet.Types.Int32, -1))
+
+	for prec := int32(1); prec <= 18; prec++ {
+		logical = schema.NewDecimalLogicalType(prec, scale)
+		assert.Truef(t, logical.IsApplicable(parquet.Types.Int64, -1), "prec: %d", prec)
+	}
+
+	logical = schema.NewDecimalLogicalType(19, scale)
+	assert.False(t, logical.IsApplicable(parquet.Types.Int64, 0))
+
+	for prec := int32(1); prec <= 36; prec++ {
+		logical = schema.NewDecimalLogicalType(prec, scale)
+		assert.Truef(t, logical.IsApplicable(parquet.Types.ByteArray, 0), "prec: %d", prec)
+	}
+
+	tests := []struct {
+		physicalLen    int32
+		precisionLimit int32
+	}{
+		{1, 2}, {2, 4}, {3, 6}, {4, 9}, {8, 18}, {10, 23}, {16, 38}, {20, 47}, {32, 76},
+	}
+	for _, tt := range tests {
+		var prec int32
+		for prec = 1; prec <= tt.precisionLimit; prec++ {
+			logical = schema.NewDecimalLogicalType(prec, 0)
+			assert.Truef(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, tt.physicalLen), "prec: %d, len: %d", prec, tt.physicalLen)
+		}
+		logical = schema.NewDecimalLogicalType(prec, 0)
+		assert.Falsef(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, tt.physicalLen), "prec: %d, len: %d", prec, tt.physicalLen)
+	}
+
+	assert.False(t, schema.NewDecimalLogicalType(16, 6).IsApplicable(parquet.Types.Boolean, 0))
+	assert.False(t, schema.NewDecimalLogicalType(16, 6).IsApplicable(parquet.Types.Float, 0))
+	assert.False(t, schema.NewDecimalLogicalType(16, 6).IsApplicable(parquet.Types.Double, 0))
+}
+
+func TestLogicalTypeRepresentation(t *testing.T) {
+	tests := []struct {
+		name     string
+		logical  schema.LogicalType
+		expected string
+		expjson  string
+	}{
+		{"unknown", schema.UnknownLogicalType{}, "Unknown", `{"Type": "Unknown"}`},
+		{"string", schema.StringLogicalType{}, "String", `{"Type": "String"}`},
+		{"map", schema.MapLogicalType{}, "Map", `{"Type": "Map"}`},
+		{"list", schema.NewListLogicalType(), "List", `{"Type": "List"}`},
+		{"enum", schema.EnumLogicalType{}, "Enum", `{"Type": "Enum"}`},
+		{"decimal 10 4", schema.NewDecimalLogicalType(10 /* precision */, 4 /* scale */), "Decimal(precision=10, scale=4)", `{"Type": "Decimal", "precision": 10, "scale": 4}`},
+		{"decimal 10 0", schema.NewDecimalLogicalType(10 /* precision */, 0 /* scale */), "Decimal(precision=10, scale=0)", `{"Type": "Decimal", "precision": 10, "scale": 0}`},
+		{"date", schema.DateLogicalType{}, "Date", `{"Type": "Date"}`},
+		{"time milli", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), "Time(isAdjustedToUTC=true, timeUnit=milliseconds)", `{"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "milliseconds"}`},
+		{"time micro", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), "Time(isAdjustedToUTC=true, timeUnit=microseconds)", `{"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "microseconds"}`},
+		{"time nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), "Time(isAdjustedToUTC=true, timeUnit=nanoseconds)", `{"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "nanoseconds"}`},
+		{"time notutc milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), "Time(isAdjustedToUTC=false, timeUnit=milliseconds)", `{"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "milliseconds"}`},
+		{"time notutc micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), "Time(isAdjustedToUTC=false, timeUnit=microseconds)", `{"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "microseconds"}`},
+		{"time notutc nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), "Time(isAdjustedToUTC=false, timeUnit=nanoseconds)", `{"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"}`},
+		{"timestamp milli", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), "Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp micro", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), "Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), "Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp notutc milli", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), "Timestamp(isAdjustedToUTC=false, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp notutc micro", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), "Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp notutc nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), "Timestamp(isAdjustedToUTC=false, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"interval", schema.IntervalLogicalType{}, "Interval", `{"Type": "Interval"}`},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), "Int(bitWidth=8, isSigned=false)", `{"Type": "Int", "bitWidth": 8, "isSigned": false}`},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), "Int(bitWidth=16, isSigned=false)", `{"Type": "Int", "bitWidth": 16, "isSigned": false}`},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), "Int(bitWidth=32, isSigned=false)", `{"Type": "Int", "bitWidth": 32, "isSigned": false}`},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), "Int(bitWidth=64, isSigned=false)", `{"Type": "Int", "bitWidth": 64, "isSigned": false}`},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), "Int(bitWidth=8, isSigned=true)", `{"Type": "Int", "bitWidth": 8, "isSigned": true}`},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), "Int(bitWidth=16, isSigned=true)", `{"Type": "Int", "bitWidth": 16, "isSigned": true}`},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), "Int(bitWidth=32, isSigned=true)", `{"Type": "Int", "bitWidth": 32, "isSigned": true}`},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), "Int(bitWidth=64, isSigned=true)", `{"Type": "Int", "bitWidth": 64, "isSigned": true}`},
+		{"null", schema.NullLogicalType{}, "Null", `{"Type": "Null"}`},
+		{"json", schema.JSONLogicalType{}, "JSON", `{"Type": "JSON"}`},
+		{"bson", schema.BSONLogicalType{}, "BSON", `{"Type": "BSON"}`},
+		{"uuid", schema.UUIDLogicalType{}, "UUID", `{"Type": "UUID"}`},
+		{"none", schema.NoLogicalType{}, "None", `{"Type": "None"}`},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expected, tt.logical.String())
+			out, err := json.Marshal(tt.logical)
+			assert.NoError(t, err)
+			assert.JSONEq(t, tt.expjson, string(out))
+		})
+	}
+}
+
+func TestLogicalTypeSortOrder(t *testing.T) {
+	tests := []struct {
+		name    string
+		logical schema.LogicalType
+		order   schema.SortOrder
+	}{
+		{"unknown", schema.UnknownLogicalType{}, schema.SortUNKNOWN},
+		{"string", schema.StringLogicalType{}, schema.SortUNSIGNED},
+		{"map", schema.MapLogicalType{}, schema.SortUNKNOWN},
+		{"list", schema.NewListLogicalType(), schema.SortUNKNOWN},
+		{"enum", schema.EnumLogicalType{}, schema.SortUNSIGNED},
+		{"decimal", schema.NewDecimalLogicalType(8 /* precision */, 2 /* scale */), schema.SortSIGNED},
+		{"date", schema.DateLogicalType{}, schema.SortSIGNED},
+		{"time utc milli", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.SortSIGNED},
+		{"time utc micros", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.SortSIGNED},
+		{"time utc nanos", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.SortSIGNED},
+		{"time not utc milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), schema.SortSIGNED},
+		{"time not utc micros", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), schema.SortSIGNED},
+		{"time not utc nanos", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.SortSIGNED},
+		{"interval", schema.IntervalLogicalType{}, schema.SortUNKNOWN},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"null", schema.NullLogicalType{}, schema.SortUNKNOWN},
+		{"json", schema.JSONLogicalType{}, schema.SortUNSIGNED},
+		{"bson", schema.BSONLogicalType{}, schema.SortUNSIGNED},
+		{"uuid", schema.UUIDLogicalType{}, schema.SortUNSIGNED},
+		{"none", schema.NoLogicalType{}, schema.SortUNKNOWN},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.order, tt.logical.SortOrder())
+		})
+	}
+}
+
+func TestNodeFactoryEquivalences(t *testing.T) {
+	tests := []struct {
+		name        string
+		logical     schema.LogicalType
+		converted   schema.ConvertedType
+		typ         parquet.Type
+		physicalLen int
+		precision   int
+		scale       int
+	}{
+		{"string", schema.StringLogicalType{}, schema.ConvertedTypes.UTF8, parquet.Types.ByteArray, -1, -1, -1},
+		{"enum", schema.EnumLogicalType{}, schema.ConvertedTypes.Enum, parquet.Types.ByteArray, -1, -1, -1},
+		{"decimal", schema.NewDecimalLogicalType(16 /* precision */, 6 /* scale */), schema.ConvertedTypes.Decimal, parquet.Types.Int64, -1, 16, 6},
+		{"date", schema.DateLogicalType{}, schema.ConvertedTypes.Date, parquet.Types.Int32, -1, -1, -1},
+		{"time millis", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimeMillis, parquet.Types.Int32, -1, -1, -1},
+		{"time micros", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimeMicros, parquet.Types.Int64, -1, -1, -1},
+		{"timestamp millis", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimestampMillis, parquet.Types.Int64, -1, -1, -1},
+		{"timestamp micros", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimestampMicros, parquet.Types.Int64, -1, -1, -1},
+		{"interval", schema.IntervalLogicalType{}, schema.ConvertedTypes.Interval, parquet.Types.FixedLenByteArray, 12, -1, -1},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint8, parquet.Types.Int32, -1, -1, -1},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int8, parquet.Types.Int32, -1, -1, -1},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint16, parquet.Types.Int32, -1, -1, -1},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int16, parquet.Types.Int32, -1, -1, -1},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint32, parquet.Types.Int32, -1, -1, -1},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int32, parquet.Types.Int32, -1, -1, -1},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint64, parquet.Types.Int64, -1, -1, -1},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int64, parquet.Types.Int64, -1, -1, -1},
+		{"json", schema.JSONLogicalType{}, schema.ConvertedTypes.JSON, parquet.Types.ByteArray, -1, -1, -1},
+		{"bson", schema.BSONLogicalType{}, schema.ConvertedTypes.BSON, parquet.Types.ByteArray, -1, -1, -1},
+		{"none", schema.NoLogicalType{}, schema.ConvertedTypes.None, parquet.Types.Int64, -1, -1, -1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			name := "something"
+			repetition := parquet.Repetitions.Required
+
+			fromConverted := schema.MustPrimitive(schema.NewPrimitiveNodeConverted(name, repetition, tt.typ, tt.converted, tt.physicalLen, tt.precision, tt.scale, -1 /* fieldID */))
+			fromLogical := schema.MustPrimitive(schema.NewPrimitiveNodeLogical(name, repetition, tt.logical, tt.typ, tt.physicalLen, -1 /* fieldID */))
+			assert.True(t, fromConverted.Equals(fromLogical))
+		})
+	}
+
+	rep := parquet.Repetitions.Optional
+	fromConverted, err := schema.NewGroupNodeConverted("map" /* name */, rep, []schema.Node{}, schema.ConvertedTypes.Map, -1 /* fieldID */)
+	assert.NoError(t, err)
+
+	fromLogical, err := schema.NewGroupNodeLogical("map" /* name */, rep, []schema.Node{}, schema.MapLogicalType{}, -1 /* fieldID */)
+	assert.NoError(t, err)
+	assert.True(t, fromConverted.Equals(fromLogical))
+
+	fromConverted, err = schema.NewGroupNodeConverted("list" /* name */, rep, []schema.Node{}, schema.ConvertedTypes.List, -1 /* fieldID */)
+	assert.NoError(t, err)
+
+	fromLogical, err = schema.NewGroupNodeLogical("list" /* name */, rep, []schema.Node{}, schema.NewListLogicalType(), -1 /* fieldID */)
+	assert.NoError(t, err)
+	assert.True(t, fromConverted.Equals(fromLogical))
+}
diff --git a/go/parquet/schema/node.go b/go/parquet/schema/node.go
new file mode 100644
index 00000000000..03884426490
--- /dev/null
+++ b/go/parquet/schema/node.go
@@ -0,0 +1,627 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/thrift/lib/go/thrift"
+	"golang.org/x/xerrors"
+)
+
+// NodeType describes whether the Node is a Primitive or Group node
+type NodeType int
+
+// the available constants for NodeType
+const (
+	Primitive NodeType = iota
+	Group
+)
+
+// Node is the interface for both Group and Primitive Nodes.
+// A logical schema type has a name, repetition level, and optionally
+// a logical type (converted type is the deprecated version of the logical
+// type concept, which is maintained for forward compatibility)
+type Node interface {
+	Name() string
+	Type() NodeType
+	RepetitionType() parquet.Repetition
+	ConvertedType() ConvertedType
+	LogicalType() LogicalType
+	FieldID() int32
+	Parent() Node
+	SetParent(Node)
+	Path() string
+	Equals(Node) bool
+	Visit(v Visitor)
+	toThrift() *format.SchemaElement
+}
+
+// Visitor is an interface for creating functionality to walk the schema tree.
+//
+// A visitor can be passed to the Visit function of a Node in order to walk
+// the tree. VisitPre is called the first time a node is encountered. If
+// it is a group node, the return is checked and if it is false, the children
+// will be skipped.
+//
+// VisitPost is called after visiting any children
+type Visitor interface {
+	VisitPre(Node) bool
+	VisitPost(Node)
+}
+
+// ColumnPathFromNode walks the parents of the given node to construct it's
+// column path
+func ColumnPathFromNode(n Node) parquet.ColumnPath {
+	if n == nil {
+		return nil
+	}
+
+	c := make([]string, 0)
+
+	// build the path in reverse order as we traverse nodes to the top
+	cursor := n
+	for cursor.Parent() != nil {
+		c = append(c, cursor.Name())
+		cursor = cursor.Parent()
+	}
+
+	// reverse the order of the list in place so that our result
+	// is in the proper, correct order.
+	for i := len(c)/2 - 1; i >= 0; i-- {
+		opp := len(c) - 1 - i
+		c[i], c[opp] = c[opp], c[i]
+	}
+
+	return c
+}
+
+// node is the base embedded struct for both group and primitive nodes
+type node struct {
+	typ    NodeType
+	parent Node
+
+	name          string
+	repetition    parquet.Repetition
+	fieldID       int32
+	logicalType   LogicalType
+	convertedType ConvertedType
+	colPath       parquet.ColumnPath
+}
+
+func (n *node) toThrift() *format.SchemaElement    { return nil }
+func (n *node) Name() string                       { return n.name }
+func (n *node) Type() NodeType                     { return n.typ }
+func (n *node) RepetitionType() parquet.Repetition { return n.repetition }
+func (n *node) ConvertedType() ConvertedType       { return n.convertedType }
+func (n *node) LogicalType() LogicalType           { return n.logicalType }
+func (n *node) FieldID() int32                     { return n.fieldID }
+func (n *node) Parent() Node                       { return n.parent }
+func (n *node) SetParent(p Node)                   { n.parent = p }
+func (n *node) Path() string {
+	return n.columnPath().String()
+}
+func (n *node) columnPath() parquet.ColumnPath {
+	if n.colPath == nil {
+		n.colPath = ColumnPathFromNode(n)
+	}
+	return n.colPath
+}
+
+func (n *node) Equals(rhs Node) bool {
+	return n.typ == rhs.Type() &&
+		n.Name() == rhs.Name() &&
+		n.RepetitionType() == rhs.RepetitionType() &&
+		n.ConvertedType() == rhs.ConvertedType() &&
+		n.FieldID() == rhs.FieldID() &&
+		n.LogicalType().Equals(rhs.LogicalType())
+}
+
+func (n *node) Visit(v Visitor) {}
+
+// A PrimitiveNode is a type that is one of the primitive Parquet storage types. In addition to
+// the other type metadata (name, repetition level, logical type), also has the
+// physical storage type and their type-specific metadata (byte width, decimal
+// parameters)
+type PrimitiveNode struct {
+	node
+
+	ColumnOrder     parquet.ColumnOrder
+	physicalType    parquet.Type
+	typeLen         int
+	decimalMetaData DecimalMetadata
+}
+
+// NewPrimitiveNodeLogical constructs a Primtive node using the provided logical type for a given
+// physical type and typelength.
+func NewPrimitiveNodeLogical(name string, repetition parquet.Repetition, logicalType LogicalType, physicalType parquet.Type, typeLen int, id int32) (*PrimitiveNode, error) {
+	n := &PrimitiveNode{
+		node:         node{typ: Primitive, name: name, repetition: repetition, logicalType: logicalType, fieldID: id},
+		physicalType: physicalType,
+		typeLen:      typeLen,
+	}
+
+	if logicalType != nil {
+		if !logicalType.IsNested() {
+			if logicalType.IsApplicable(physicalType, int32(typeLen)) {
+				n.convertedType, n.decimalMetaData = n.logicalType.ToConvertedType()
+			} else {
+				return nil, xerrors.Errorf("%s cannot be applied to primitive type %s", logicalType, physicalType)
+			}
+		} else {
+			return nil, xerrors.Errorf("nested logical type %s can not be applied to a non-group node", logicalType)
+		}
+	} else {
+		n.logicalType = NoLogicalType{}
+		n.convertedType, n.decimalMetaData = n.logicalType.ToConvertedType()
+	}
+
+	if !(n.logicalType != nil && !n.logicalType.IsNested() && n.logicalType.IsCompatible(n.convertedType, n.decimalMetaData)) {
+		return nil, xerrors.Errorf("invalid logical type %s", n.logicalType)
+	}
+
+	if n.physicalType == parquet.Types.FixedLenByteArray && n.typeLen <= 0 {
+		return nil, xerrors.New("invalid fixed length byte array length")
+	}
+	return n, nil
+}
+
+// NewPrimitiveNodeConverted constructs a primitive node from the given physical type and converted type,
+// determining the logical type from the converted type.
+func NewPrimitiveNodeConverted(name string, repetition parquet.Repetition, typ parquet.Type, converted ConvertedType, typeLen, precision, scale int, id int32) (*PrimitiveNode, error) {
+	n := &PrimitiveNode{
+		node:         node{typ: Primitive, name: name, repetition: repetition, convertedType: converted, fieldID: id},
+		physicalType: typ,
+		typeLen:      -1,
+	}
+
+	switch converted {
+	case ConvertedTypes.None:
+	case ConvertedTypes.UTF8, ConvertedTypes.JSON, ConvertedTypes.BSON:
+		if typ != parquet.Types.ByteArray {
+			return nil, xerrors.Errorf("parquet: %s can only annotate BYTE_LEN fields", typ)
+		}
+	case ConvertedTypes.Decimal:
+		switch typ {
+		case parquet.Types.Int32, parquet.Types.Int64, parquet.Types.ByteArray, parquet.Types.FixedLenByteArray:
+		default:
+			return nil, xerrors.New("parquet: DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED")
+		}
+
+		switch {
+		case precision <= 0:
+			return nil, xerrors.Errorf("parquet: invalid decimal precision: %d, must be between 1 and 38 inclusive", precision)
+		case scale < 0:
+			return nil, xerrors.Errorf("parquet: invalid decimal scale: %d, must be a number between 0 and precision inclusive", scale)
+		case scale > precision:
+			return nil, xerrors.Errorf("parquet: invalid decimal scale %d, cannot be greater than precision: %d", scale, precision)
+		}
+		n.decimalMetaData.IsSet = true
+		n.decimalMetaData.Precision = int32(precision)
+		n.decimalMetaData.Scale = int32(scale)
+	case ConvertedTypes.Date,
+		ConvertedTypes.TimeMillis,
+		ConvertedTypes.Int8,
+		ConvertedTypes.Int16,
+		ConvertedTypes.Int32,
+		ConvertedTypes.Uint8,
+		ConvertedTypes.Uint16,
+		ConvertedTypes.Uint32:
+		if typ != parquet.Types.Int32 {
+			return nil, xerrors.Errorf("parquet: %s can only annotate INT32", converted)
+		}
+	case ConvertedTypes.TimeMicros,
+		ConvertedTypes.TimestampMicros,
+		ConvertedTypes.TimestampMillis,
+		ConvertedTypes.Int64,
+		ConvertedTypes.Uint64:
+		if typ != parquet.Types.Int64 {
+			return nil, xerrors.Errorf("parquet: %s can only annotate INT64", converted)
+		}
+	case ConvertedTypes.Interval:
+		if typ != parquet.Types.FixedLenByteArray || typeLen != 12 {
+			return nil, xerrors.New("parquet: INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)")
+		}
+	case ConvertedTypes.Enum:
+		if typ != parquet.Types.ByteArray {
+			return nil, xerrors.New("parquet: ENUM can only annotate BYTE_ARRAY fields")
+		}
+	case ConvertedTypes.NA:
+	default:
+		return nil, xerrors.Errorf("parquet: %s cannot be applied to a primitive type", converted.String())
+	}
+
+	n.logicalType = n.convertedType.ToLogicalType(n.decimalMetaData)
+	if !(n.logicalType != nil && !n.logicalType.IsNested() && n.logicalType.IsCompatible(n.convertedType, n.decimalMetaData)) {
+		return nil, xerrors.Errorf("invalid logical type %s", n.logicalType)
+	}
+
+	if n.physicalType == parquet.Types.FixedLenByteArray {
+		if typeLen <= 0 {
+			return nil, xerrors.New("invalid fixed len byte array length")
+		}
+		n.typeLen = typeLen
+	}
+
+	return n, nil
+}
+
+func PrimitiveNodeFromThrift(elem *format.SchemaElement) (*PrimitiveNode, error) {
+	fieldID := int32(-1)
+	if elem.IsSetFieldID() {
+		fieldID = elem.GetFieldID()
+	}
+
+	if elem.IsSetLogicalType() {
+		return NewPrimitiveNodeLogical(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()),
+			getLogicalType(elem.GetLogicalType()), parquet.Type(elem.GetType()), int(elem.GetTypeLength()),
+			fieldID)
+	} else if elem.IsSetConvertedType() {
+		return NewPrimitiveNodeConverted(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()),
+			parquet.Type(elem.GetType()), ConvertedType(elem.GetConvertedType()),
+			int(elem.GetTypeLength()), int(elem.GetPrecision()), int(elem.GetScale()), fieldID)
+	}
+	return NewPrimitiveNodeLogical(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()), NoLogicalType{}, parquet.Type(elem.GetType()), int(elem.GetTypeLength()), fieldID)
+}
+
+// NewPrimitiveNode constructs a primitive node with the ConvertedType of None and no logical type.
+//
+// Use NewPrimitiveNodeLogical and NewPrimitiveNodeConverted to specify the logical or converted type.
+func NewPrimitiveNode(name string, repetition parquet.Repetition, typ parquet.Type, fieldID, typeLength int32) (*PrimitiveNode, error) {
+	return NewPrimitiveNodeLogical(name, repetition, nil, typ, int(typeLength), fieldID)
+}
+
+// Equals returns true if both nodes are primitive nodes with the same physical
+// and converted/logical types.
+func (p *PrimitiveNode) Equals(rhs Node) bool {
+	if !p.node.Equals(rhs) {
+		return false
+	}
+
+	other := rhs.(*PrimitiveNode)
+	if p == other {
+		return true
+	}
+
+	if p.PhysicalType() != other.PhysicalType() {
+		return false
+	}
+
+	equal := true
+	if p.ConvertedType() == ConvertedTypes.Decimal {
+		equal = equal &&
+			(p.decimalMetaData.Precision == other.decimalMetaData.Precision &&
+				p.decimalMetaData.Scale == other.decimalMetaData.Scale)
+	}
+	if p.PhysicalType() == parquet.Types.FixedLenByteArray {
+		equal = equal && p.TypeLength() == other.TypeLength()
+	}
+	return equal
+}
+
+// PhysicalType returns the proper Physical parquet.Type primitive that is used
+// to store the values in this column.
+func (p *PrimitiveNode) PhysicalType() parquet.Type { return p.physicalType }
+
+// SetTypeLength will change the type length of the node, has no effect if the
+// physical type is not FixedLength Byte Array
+func (p *PrimitiveNode) SetTypeLength(length int) {
+	if p.PhysicalType() == parquet.Types.FixedLenByteArray {
+		p.typeLen = length
+	}
+}
+
+// TypeLength will be -1 if not a FixedLenByteArray column, otherwise will be the
+// length of the FixedLen Byte Array
+func (p *PrimitiveNode) TypeLength() int { return p.typeLen }
+
+// DecimalMetadata returns the current metadata for the node. If not a decimal
+// typed column, the return should have IsSet == false.
+func (p *PrimitiveNode) DecimalMetadata() DecimalMetadata { return p.decimalMetaData }
+
+// Visit is for implementing a Visitor pattern handler to walk a schema's tree. One
+// example is the Schema Printer which walks the tree to print out the schema in order.
+func (p *PrimitiveNode) Visit(v Visitor) {
+	v.VisitPre(p)
+	v.VisitPost(p)
+}
+
+func (p *PrimitiveNode) toThrift() *format.SchemaElement {
+	elem := &format.SchemaElement{
+		Name:           p.Name(),
+		RepetitionType: format.FieldRepetitionTypePtr(format.FieldRepetitionType(p.RepetitionType())),
+		Type:           format.TypePtr(format.Type(p.PhysicalType())),
+	}
+	if p.ConvertedType() != ConvertedTypes.None {
+		elem.ConvertedType = format.ConvertedTypePtr(format.ConvertedType(p.ConvertedType()))
+	}
+	if p.FieldID() >= 0 {
+		elem.FieldID = thrift.Int32Ptr(p.FieldID())
+	}
+	if p.logicalType != nil && p.logicalType.IsSerialized() && !p.logicalType.Equals(IntervalLogicalType{}) {
+		elem.LogicalType = p.logicalType.toThrift()
+	}
+	if p.physicalType == parquet.Types.FixedLenByteArray {
+		elem.TypeLength = thrift.Int32Ptr(int32(p.typeLen))
+	}
+	if p.decimalMetaData.IsSet {
+		elem.Precision = &p.decimalMetaData.Precision
+		elem.Scale = &p.decimalMetaData.Scale
+	}
+	return elem
+}
+
+// FieldList is an alias for a slice of Nodes
+type FieldList []Node
+
+// Len is equivalent to len(fieldlist)
+func (f FieldList) Len() int { return len(f) }
+
+// GroupNode is for mananging nested nodes like List, Map, etc.
+type GroupNode struct {
+	node
+	fields    FieldList
+	nameToIdx strIntMultimap
+}
+
+// NewGroupNodeConverted constructs a group node with the provided fields and converted type,
+// determining the logical type from that converted type.
+func NewGroupNodeConverted(name string, repetition parquet.Repetition, fields FieldList, converted ConvertedType, id int32) (n *GroupNode, err error) {
+	n = &GroupNode{
+		node:   node{typ: Group, name: name, repetition: repetition, convertedType: converted, fieldID: id},
+		fields: fields,
+	}
+	n.logicalType = n.convertedType.ToLogicalType(DecimalMetadata{})
+	if !(n.logicalType != nil && (n.logicalType.IsNested() || n.logicalType.IsNone()) && n.logicalType.IsCompatible(n.convertedType, DecimalMetadata{})) {
+		err = xerrors.Errorf("invalid logical type %s", n.logicalType.String())
+		return
+	}
+
+	n.nameToIdx = make(strIntMultimap)
+	for idx, f := range n.fields {
+		f.SetParent(n)
+		n.nameToIdx.Add(f.Name(), idx)
+	}
+	return
+}
+
+// NewGroupNodeLogical constructs a group node with the provided fields and logical type,
+// determining the converted type from the provided logical type.
+func NewGroupNodeLogical(name string, repetition parquet.Repetition, fields FieldList, logical LogicalType, id int32) (n *GroupNode, err error) {
+	n = &GroupNode{
+		node:   node{typ: Group, name: name, repetition: repetition, logicalType: logical, fieldID: id},
+		fields: fields,
+	}
+
+	if logical != nil {
+		if logical.IsNested() {
+			n.convertedType, _ = logical.ToConvertedType()
+		} else {
+			err = xerrors.Errorf("logical type %s cannot be applied to group node", logical)
+			return
+		}
+	} else {
+		n.logicalType = NoLogicalType{}
+		n.convertedType, _ = n.logicalType.ToConvertedType()
+	}
+
+	if !(n.logicalType != nil && (n.logicalType.IsNested() || n.logicalType.IsNone()) && n.logicalType.IsCompatible(n.convertedType, DecimalMetadata{})) {
+		err = xerrors.Errorf("invalid logical type %s", n.logicalType)
+		return
+	}
+
+	n.nameToIdx = make(strIntMultimap)
+	for idx, f := range n.fields {
+		f.SetParent(n)
+		n.nameToIdx.Add(f.Name(), idx)
+	}
+	return
+}
+
+// NewGroupNode constructs a new group node with the provided fields,
+// but with converted type None and No Logical Type
+func NewGroupNode(name string, repetition parquet.Repetition, fields FieldList, fieldID int32) (*GroupNode, error) {
+	return NewGroupNodeConverted(name, repetition, fields, ConvertedTypes.None, fieldID)
+}
+
+// Must is a convenience function for the NewNode functions that return a Node
+// and an error, panic'ing if err != nil or returning the node
+func Must(n Node, err error) Node {
+	if err != nil {
+		panic(err)
+	}
+	return n
+}
+
+// MustGroup is like Must, except it casts the node to a *GroupNode, which will panic
+// if it is a primitive node.
+func MustGroup(n Node, err error) *GroupNode {
+	if err != nil {
+		panic(err)
+	}
+	return n.(*GroupNode)
+}
+
+// MustPrimitive is like Must except it casts the node to *PrimitiveNode which will panic
+// if it is a group node.
+func MustPrimitive(n Node, err error) *PrimitiveNode {
+	if err != nil {
+		panic(err)
+	}
+	return n.(*PrimitiveNode)
+}
+
+func GroupNodeFromThrift(elem *format.SchemaElement, fields FieldList) (*GroupNode, error) {
+	id := int32(-1)
+	if elem.IsSetFieldID() {
+		id = elem.GetFieldID()
+	}
+
+	if elem.IsSetLogicalType() {
+		return NewGroupNodeLogical(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()), fields, getLogicalType(elem.GetLogicalType()), id)
+	}
+
+	converted := ConvertedTypes.None
+	if elem.IsSetConvertedType() {
+		converted = ConvertedType(elem.GetConvertedType())
+	}
+	return NewGroupNodeConverted(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()), fields, converted, id)
+}
+
+func (g *GroupNode) toThrift() *format.SchemaElement {
+	elem := &format.SchemaElement{
+		Name:           g.name,
+		NumChildren:    thrift.Int32Ptr(int32(len(g.fields))),
+		RepetitionType: format.FieldRepetitionTypePtr(format.FieldRepetitionType(g.RepetitionType())),
+	}
+	if g.convertedType != ConvertedTypes.None {
+		elem.ConvertedType = format.ConvertedTypePtr(format.ConvertedType(g.convertedType))
+	}
+	if g.fieldID >= 0 {
+		elem.FieldID = &g.fieldID
+	}
+	if g.logicalType != nil && g.logicalType.IsSerialized() {
+		elem.LogicalType = g.logicalType.toThrift()
+	}
+	return elem
+}
+
+// Equals will compare this node to the provided node and only return true if
+// this node and all of it's children are the same as the passed in node and its
+// children.
+func (g *GroupNode) Equals(rhs Node) bool {
+	if !g.node.Equals(rhs) {
+		return false
+	}
+
+	other := rhs.(*GroupNode)
+	if g == other {
+		return true
+	}
+	if len(g.fields) != len(other.fields) {
+		return false
+	}
+
+	for idx, field := range g.fields {
+		if !field.Equals(other.fields[idx]) {
+			return false
+		}
+	}
+	return true
+}
+
+// NumFields returns the number of direct child fields for this group node
+func (g *GroupNode) NumFields() int {
+	return len(g.fields)
+}
+
+// Field returns the node in the field list which is of the provided (0-based) index
+func (g *GroupNode) Field(i int) Node {
+	return g.fields[i]
+}
+
+// FieldIndexByName provides the index for the field of the given name. Returns
+// -1 if not found.
+//
+// If there are more than one field of this name, it returns the index for the first one.
+func (g *GroupNode) FieldIndexByName(name string) int {
+	if idx, ok := g.nameToIdx[name]; ok {
+		return idx[0]
+	}
+	return -1
+}
+
+// FieldIndexByField looks up the index child of this node. Returns -1
+// if n isn't a child of this group
+func (g *GroupNode) FieldIndexByField(n Node) int {
+	if search, ok := g.nameToIdx[n.Name()]; ok {
+		for _, idx := range search {
+			if n == g.fields[idx] {
+				return idx
+			}
+		}
+	}
+	return -1
+}
+
+// Visit is for implementing a Visitor pattern handler to walk a schema's tree. One
+// example is the Schema Printer which walks the tree to print out the schema in order.
+func (g *GroupNode) Visit(v Visitor) {
+	if v.VisitPre(g) {
+		for _, field := range g.fields {
+			field.Visit(v)
+		}
+	}
+	v.VisitPost(g)
+}
+
+// HasRepeatedFields returns true if any of the children of this node have
+// Repeated as its repetition type.
+//
+// This is recursive and will check the children of any group nodes that are children.
+func (g *GroupNode) HasRepeatedFields() bool {
+	for _, field := range g.fields {
+		if field.RepetitionType() == parquet.Repetitions.Repeated {
+			return true
+		}
+		if field.Type() == Group {
+			return field.(*GroupNode).HasRepeatedFields()
+		}
+	}
+	return false
+}
+
+// NewInt32Node is a convenience factory for constructing an Int32 Primitive Node
+func NewInt32Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Int32, fieldID, -1))
+}
+
+// NewInt64Node is a convenience factory for constructing an Int64 Primitive Node
+func NewInt64Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Int64, fieldID, -1))
+}
+
+// NewInt96Node is a convenience factory for constructing an Int96 Primitive Node
+func NewInt96Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Int96, fieldID, -1))
+}
+
+// NewFloat32Node is a convenience factory for constructing an Float Primitive Node
+func NewFloat32Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Float, fieldID, -1))
+}
+
+// NewFloat64Node is a convenience factory for constructing an Double Primitive Node
+func NewFloat64Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Double, fieldID, -1))
+}
+
+// NewBooleanNode is a convenience factory for constructing an Boolean Primitive Node
+func NewBooleanNode(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Boolean, fieldID, -1))
+}
+
+// NewByteArrayNode is a convenience factory for constructing an Byte Array Primitive Node
+func NewByteArrayNode(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.ByteArray, fieldID, -1))
+}
+
+// NewFixedLenByteArrayNode is a convenience factory for constructing an Fixed Length
+// Byte Array Primitive Node of the given length
+func NewFixedLenByteArrayNode(name string, rep parquet.Repetition, length int32, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.FixedLenByteArray, fieldID, length))
+}
diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go
new file mode 100644
index 00000000000..8da05fb540f
--- /dev/null
+++ b/go/parquet/schema/reflection.go
@@ -0,0 +1,827 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"reflect"
+	"strconv"
+	"strings"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"golang.org/x/xerrors"
+)
+
+type taggedInfo struct {
+	Name string
+
+	Type      parquet.Type
+	KeyType   parquet.Type
+	ValueType parquet.Type
+
+	Length      int32
+	KeyLength   int32
+	ValueLength int32
+
+	Scale      int32
+	KeyScale   int32
+	ValueScale int32
+
+	Precision      int32
+	KeyPrecision   int32
+	ValuePrecision int32
+
+	FieldID      int32
+	KeyFieldID   int32
+	ValueFieldID int32
+
+	RepetitionType  parquet.Repetition
+	ValueRepetition parquet.Repetition
+
+	Converted      ConvertedType
+	KeyConverted   ConvertedType
+	ValueConverted ConvertedType
+
+	LogicalFields      map[string]string
+	KeyLogicalFields   map[string]string
+	ValueLogicalFields map[string]string
+
+	LogicalType      LogicalType
+	KeyLogicalType   LogicalType
+	ValueLogicalType LogicalType
+}
+
+func (t *taggedInfo) CopyForKey() (ret taggedInfo) {
+	ret = *t
+	ret.Type = t.KeyType
+	ret.Length = t.KeyLength
+	ret.Scale = t.KeyScale
+	ret.Precision = t.KeyPrecision
+	ret.FieldID = t.KeyFieldID
+	ret.RepetitionType = parquet.Repetitions.Required
+	ret.Converted = t.KeyConverted
+	ret.LogicalType = t.KeyLogicalType
+	return
+}
+
+func (t *taggedInfo) CopyForValue() (ret taggedInfo) {
+	ret = *t
+	ret.Type = t.ValueType
+	ret.Length = t.ValueLength
+	ret.Scale = t.ValueScale
+	ret.Precision = t.ValuePrecision
+	ret.FieldID = t.ValueFieldID
+	ret.RepetitionType = t.ValueRepetition
+	ret.Converted = t.ValueConverted
+	ret.LogicalType = t.ValueLogicalType
+	return
+}
+
+func (t *taggedInfo) UpdateLogicalTypes() {
+	processLogicalType := func(fields map[string]string, precision, scale int32) LogicalType {
+		t, ok := fields["type"]
+		if !ok {
+			return NoLogicalType{}
+		}
+
+		switch strings.ToLower(t) {
+		case "string":
+			return StringLogicalType{}
+		case "map":
+			return MapLogicalType{}
+		case "list":
+			return ListLogicalType{}
+		case "enum":
+			return EnumLogicalType{}
+		case "decimal":
+			if v, ok := fields["precision"]; ok {
+				precision = int32FromType(v)
+			}
+			if v, ok := fields["scale"]; ok {
+				scale = int32FromType(v)
+			}
+			return NewDecimalLogicalType(precision, scale)
+		case "date":
+			return DateLogicalType{}
+		case "time":
+			unit, ok := fields["unit"]
+			if !ok {
+				panic("must specify unit for time logical type")
+			}
+			adjustedToUtc, ok := fields["isadjustedutc"]
+			if !ok {
+				adjustedToUtc = "true"
+			}
+			return NewTimeLogicalType(boolFromStr(adjustedToUtc), timeUnitFromString(strings.ToLower(unit)))
+		case "timestamp":
+			unit, ok := fields["unit"]
+			if !ok {
+				panic("must specify unit for time logical type")
+			}
+			adjustedToUtc, ok := fields["isadjustedutc"]
+			if !ok {
+				adjustedToUtc = "true"
+			}
+			return NewTimestampLogicalType(boolFromStr(adjustedToUtc), timeUnitFromString(unit))
+		case "integer":
+			width, ok := fields["bitwidth"]
+			if !ok {
+				panic("must specify bitwidth if explicitly setting integer logical type")
+			}
+			signed, ok := fields["signed"]
+			if !ok {
+				signed = "true"
+			}
+
+			return NewIntLogicalType(int8(int32FromType(width)), boolFromStr(signed))
+		case "null":
+			return NullLogicalType{}
+		case "json":
+			return JSONLogicalType{}
+		case "bson":
+			return BSONLogicalType{}
+		case "uuid":
+			return UUIDLogicalType{}
+		default:
+			panic(xerrors.Errorf("invalid logical type specified: %s", t))
+		}
+	}
+
+	t.LogicalType = processLogicalType(t.LogicalFields, t.Precision, t.Scale)
+	t.KeyLogicalType = processLogicalType(t.KeyLogicalFields, t.KeyPrecision, t.KeyScale)
+	t.ValueLogicalType = processLogicalType(t.ValueLogicalFields, t.ValuePrecision, t.ValueScale)
+}
+
+func newTaggedInfo() taggedInfo {
+	return taggedInfo{
+		Type:               parquet.Types.Undefined,
+		KeyType:            parquet.Types.Undefined,
+		ValueType:          parquet.Types.Undefined,
+		RepetitionType:     parquet.Repetitions.Undefined,
+		ValueRepetition:    parquet.Repetitions.Undefined,
+		Converted:          ConvertedTypes.NA,
+		KeyConverted:       ConvertedTypes.NA,
+		ValueConverted:     ConvertedTypes.NA,
+		FieldID:            -1,
+		KeyFieldID:         -1,
+		ValueFieldID:       -1,
+		LogicalFields:      make(map[string]string),
+		KeyLogicalFields:   make(map[string]string),
+		ValueLogicalFields: make(map[string]string),
+		LogicalType:        NoLogicalType{},
+		KeyLogicalType:     NoLogicalType{},
+		ValueLogicalType:   NoLogicalType{},
+	}
+}
+
+var int32FromType = func(v string) int32 {
+	val, err := strconv.Atoi(v)
+	if err != nil {
+		panic(err)
+	}
+	return int32(val)
+}
+
+var boolFromStr = func(v string) bool {
+	val, err := strconv.ParseBool(v)
+	if err != nil {
+		panic(err)
+	}
+	return val
+}
+
+func infoFromTags(f reflect.StructTag) *taggedInfo {
+	typeFromStr := func(v string) parquet.Type {
+		t, err := format.TypeFromString(strings.ToUpper(v))
+		if err != nil {
+			panic(xerrors.Errorf("invalid type specified: %s", v))
+		}
+		return parquet.Type(t)
+	}
+
+	repFromStr := func(v string) parquet.Repetition {
+		r, err := format.FieldRepetitionTypeFromString(strings.ToUpper(v))
+		if err != nil {
+			panic(err)
+		}
+		return parquet.Repetition(r)
+	}
+
+	convertedFromStr := func(v string) ConvertedType {
+		c, err := format.ConvertedTypeFromString(strings.ToUpper(v))
+		if err != nil {
+			panic(err)
+		}
+		return ConvertedType(c)
+	}
+
+	if ptags, ok := f.Lookup("parquet"); ok {
+		info := newTaggedInfo()
+		for _, tag := range strings.Split(strings.Replace(ptags, "\t", "", -1), ",") {
+			tag = strings.TrimSpace(tag)
+			kv := strings.SplitN(tag, "=", 2)
+			key := strings.TrimSpace(strings.ToLower(kv[0]))
+			value := strings.TrimSpace(kv[1])
+
+			switch key {
+			case "name":
+				info.Name = value
+			case "type":
+				info.Type = typeFromStr(value)
+			case "keytype":
+				info.KeyType = typeFromStr(value)
+			case "valuetype":
+				info.ValueType = typeFromStr(value)
+			case "length":
+				info.Length = int32FromType(value)
+			case "keylength":
+				info.KeyLength = int32FromType(value)
+			case "valuelength":
+				info.ValueLength = int32FromType(value)
+			case "scale":
+				info.Scale = int32FromType(value)
+			case "keyscale":
+				info.KeyScale = int32FromType(value)
+			case "valuescale":
+				info.ValueScale = int32FromType(value)
+			case "precision":
+				info.Precision = int32FromType(value)
+			case "keyprecision":
+				info.KeyPrecision = int32FromType(value)
+			case "valueprecision":
+				info.ValuePrecision = int32FromType(value)
+			case "fieldid":
+				info.FieldID = int32FromType(value)
+			case "keyfieldid":
+				info.KeyFieldID = int32FromType(value)
+			case "valuefieldid":
+				info.ValueFieldID = int32FromType(value)
+			case "repetition":
+				info.RepetitionType = repFromStr(value)
+			case "valuerepetition":
+				info.ValueRepetition = repFromStr(value)
+			case "converted":
+				info.Converted = convertedFromStr(value)
+			case "keyconverted":
+				info.KeyConverted = convertedFromStr(value)
+			case "valueconverted":
+				info.ValueConverted = convertedFromStr(value)
+			case "logical":
+				info.LogicalFields["type"] = value
+			case "keylogical":
+				info.KeyLogicalFields["type"] = value
+			case "valuelogical":
+				info.ValueLogicalFields["type"] = value
+			default:
+				switch {
+				case strings.HasPrefix(key, "logical."):
+					info.LogicalFields[strings.TrimPrefix(key, "logical.")] = value
+				case strings.HasPrefix(key, "keylogical."):
+					info.KeyLogicalFields[strings.TrimPrefix(key, "keylogical.")] = value
+				case strings.HasPrefix(key, "valuelogical."):
+					info.ValueLogicalFields[strings.TrimPrefix(key, "valuelogical.")] = value
+				}
+			}
+		}
+		info.UpdateLogicalTypes()
+		return &info
+	}
+	return nil
+}
+
+// typeToNode recurseively converts a physical type and the tag info into parquet Nodes
+//
+// to avoid having to propagate errors up potentially high numbers of recursive calls
+// we use panics and then recover in the public function NewSchemaFromStruct so that a
+// failure very far down the stack quickly unwinds.
+func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info *taggedInfo) Node {
+	// set up our default values for everything
+	var (
+		converted             = ConvertedTypes.None
+		logical   LogicalType = NoLogicalType{}
+		fieldID               = int32(-1)
+		physical              = parquet.Types.Undefined
+		typeLen               = 0
+		precision             = 0
+		scale                 = 0
+	)
+	if info != nil { // we have struct tag info to process
+		fieldID = info.FieldID
+		if info.Converted != ConvertedTypes.NA {
+			converted = info.Converted
+		}
+		logical = info.LogicalType
+		physical = info.Type
+		typeLen = int(info.Length)
+		precision = int(info.Precision)
+		scale = int(info.Scale)
+
+		if info.Name != "" {
+			name = info.Name
+		}
+		if info.RepetitionType != parquet.Repetitions.Undefined {
+			repType = info.RepetitionType
+		}
+	}
+
+	// simplify the logic by switching based on the reflection Kind
+	switch typ.Kind() {
+	case reflect.Map:
+		// a map must have a logical type of MAP or have no tag for logical type in which case
+		// we assume MAP logical type.
+		if !logical.IsNone() && !logical.Equals(MapLogicalType{}) {
+			panic("cannot set logical type to something other than map for a map")
+		}
+
+		infoCopy := newTaggedInfo()
+		if info != nil { // populate any value specific tags to propagate for the value type
+			infoCopy = info.CopyForValue()
+		}
+
+		// create the node for the value type of the map
+		value := typeToNode("value", typ.Elem(), parquet.Repetitions.Required, &infoCopy)
+		if info != nil { // change our copy to now use the key specific tags if they exist
+			infoCopy = info.CopyForKey()
+		}
+
+		// create the node for the key type of the map
+		key := typeToNode("key", typ.Key(), parquet.Repetitions.Required, &infoCopy)
+		if key.RepetitionType() != parquet.Repetitions.Required { // key cannot be optional
+			panic("key type of map must be Required")
+		}
+		return Must(MapOf(name, key, value, repType, fieldID))
+	case reflect.Struct:
+		// structs are Group nodes
+		fields := make(FieldList, 0)
+		for i := 0; i < typ.NumField(); i++ {
+			f := typ.Field(i)
+
+			fields = append(fields, typeToNode(f.Name, f.Type, parquet.Repetitions.Required, infoFromTags(f.Tag)))
+		}
+		// group nodes don't have a physical type
+		if physical != parquet.Types.Undefined {
+			panic("cannot specify custom type on struct")
+		}
+		// group nodes don't have converted or logical types
+		if converted != ConvertedTypes.None {
+			panic("cannot specify converted types for a struct")
+		}
+		if !logical.IsNone() {
+			panic("cannot specify logicaltype for a struct")
+		}
+		return Must(NewGroupNode(name, repType, fields, fieldID))
+	case reflect.Ptr: // if we encounter a pointer create a node for the type it points to, but mark it as optional
+		return typeToNode(name, typ.Elem(), parquet.Repetitions.Optional, info)
+	case reflect.Array:
+		// arrays are repeated or fixed size
+		if typ == reflect.TypeOf(parquet.Int96{}) {
+			return NewInt96Node(name, repType, fieldID)
+		}
+
+		if typ.Elem() == reflect.TypeOf(byte(0)) { // something like [12]byte translates to FixedLenByteArray with length 12
+			if physical == parquet.Types.Undefined {
+				physical = parquet.Types.FixedLenByteArray
+			}
+			if typeLen == 0 { // if there was no type length specified in the tag, use the length of the type.
+				typeLen = typ.Len()
+			}
+			if !logical.IsNone() {
+				return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
+			}
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
+		}
+		fallthrough // if it's not a fixed len byte array type, then just treat it like a slice
+	case reflect.Slice:
+		// for slices, we default to treating them as lists unless the repetition type is set to REPEATED or they are
+		// a bytearray/fixedlenbytearray
+		switch {
+		case repType == parquet.Repetitions.Repeated:
+			return typeToNode(name, typ.Elem(), parquet.Repetitions.Repeated, info)
+		case physical == parquet.Types.FixedLenByteArray || physical == parquet.Types.ByteArray:
+			if typ.Elem() != reflect.TypeOf(byte(0)) {
+				panic("slice with physical type ByteArray or FixedLenByteArray must be []byte")
+			}
+			fallthrough
+		case typ.Elem() == reflect.TypeOf(byte(0)):
+			if physical == parquet.Types.Undefined {
+				physical = parquet.Types.ByteArray
+			}
+			if !logical.IsNone() {
+				return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
+			}
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
+		default:
+			var elemInfo *taggedInfo
+			if info != nil {
+				elemInfo = &taggedInfo{}
+				*elemInfo = info.CopyForValue()
+			}
+
+			if !logical.IsNone() && !logical.Equals(ListLogicalType{}) {
+				panic("slice must either be repeated or a List type")
+			}
+			if converted != ConvertedTypes.None && converted != ConvertedTypes.List {
+				panic("slice must either be repeated or a List type")
+			}
+			return Must(ListOf(typeToNode(name, typ.Elem(), parquet.Repetitions.Required, elemInfo), repType, fieldID))
+		}
+	case reflect.String:
+		// strings are byte arrays or fixedlen byte array
+		t := parquet.Types.ByteArray
+		switch physical {
+		case parquet.Types.Undefined, parquet.Types.ByteArray:
+		case parquet.Types.FixedLenByteArray:
+			t = parquet.Types.FixedLenByteArray
+		default:
+			panic("string fields should be of type bytearray or fixedlenbytearray only")
+		}
+
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, t, typeLen, fieldID))
+		}
+
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, t, converted, typeLen, precision, scale, fieldID))
+	case reflect.Int, reflect.Int32, reflect.Int8, reflect.Int16, reflect.Int64:
+		// handle integer types, default to setting the corresponding logical type
+		ptyp := parquet.Types.Int32
+		if typ.Bits() == 64 {
+			ptyp = parquet.Types.Int64
+		}
+
+		if physical != parquet.Types.Undefined {
+			ptyp = physical
+		}
+
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
+		}
+
+		bitwidth := int8(typ.Bits())
+		if physical != parquet.Types.Undefined {
+			if ptyp == parquet.Types.Int32 {
+				bitwidth = 32
+			} else if ptyp == parquet.Types.Int64 {
+				bitwidth = 64
+			}
+		}
+
+		if converted != ConvertedTypes.None {
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
+		}
+
+		return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, true), ptyp, 0, fieldID))
+	case reflect.Uint, reflect.Uint32, reflect.Uint8, reflect.Uint16, reflect.Uint64:
+		// handle unsigned integer types and default to the corresponding logical type for it.
+		ptyp := parquet.Types.Int32
+		if typ.Bits() == 64 {
+			ptyp = parquet.Types.Int64
+		}
+
+		if physical != parquet.Types.Undefined {
+			ptyp = physical
+		}
+
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
+		}
+
+		bitwidth := int8(typ.Bits())
+		if physical != parquet.Types.Undefined {
+			if ptyp == parquet.Types.Int32 {
+				bitwidth = 32
+			} else if ptyp == parquet.Types.Int64 {
+				bitwidth = 64
+			}
+		}
+
+		if converted != ConvertedTypes.None {
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
+		}
+
+		return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, false), ptyp, 0, fieldID))
+	case reflect.Bool:
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Boolean, typeLen, fieldID))
+		}
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Boolean, converted, typeLen, precision, scale, fieldID))
+	case reflect.Float32:
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Float, typeLen, fieldID))
+		}
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Float, converted, typeLen, precision, scale, fieldID))
+	case reflect.Float64:
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Double, typeLen, fieldID))
+		}
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Double, converted, typeLen, precision, scale, fieldID))
+	}
+	return nil
+}
+
+// NewSchemaFromStruct generates a schema from an object type via reflection of
+// the type and reading struct tags for "parquet".
+//
+// Rules
+//
+// Everything defaults to Required repetition, unless otherwise specified.
+// Pointer types become Optional repetition.
+// Arrays and Slices become logical List types unless using the tag `repetition=repeated`.
+//
+// A length specified byte field (like [5]byte) becomes a fixed_len_byte_array of that length
+// unless otherwise specified by tags.
+//
+// string and []byte both become ByteArray unless otherwise specified.
+//
+// Integer types will default to having a logical type of the appropriate bit width
+// and signedness rather than having no logical type, ie: an int8 will become an int32
+// node with logical type Int(bitWidth=8, signed=true).
+//
+// Structs will become group nodes with the fields of the struct as the fields of the group,
+// recursively creating the nodes.
+//
+// maps will become appropriate Map structures in the schema of the defined key and values.
+//
+// Available Tags
+//
+// name: by default the node will have the same name as the field, this tag let's you specify a name
+//
+// type: Specify the physical type instead of using the field type
+//
+// length: specify the type length of the node, only relevant for fixed_len_byte_array
+//
+// scale: specify the scale for a decimal field
+//
+// precision: specify the precision for a decimal field
+//
+// fieldid: specify the field ID for that node, defaults to -1 which means it is not set in the parquet file.
+//
+// repetition: specify the repetition as something other than what is determined by the type
+//
+// converted: specify the Converted Type of the field
+//
+// logical: specify the logical type of the field, if using decimal then the scale and precision
+// will be determined by the precision and scale fields, or by the logical.precision / logical.scale fields
+// with the logical. prefixed versions taking precedence. For Time or Timestamp logical types,
+// use logical.unit=<millis|micros|nanos> and logical.isadjustedutc=<true|false> to set those. Unit is required
+// isadjustedutc defaults to true. For Integer logical type, use logical.bitwidth and logical.signed to specify
+// those values, with bitwidth being required, and signed defaulting to true.
+//
+// All tags other than name can use a prefix of "key<tagname>=<value>" to refer to the type of the key for a map
+// and "value<tagname>=<value>" to refer to the value type of a map or the element of a list (such as the type of a slice)
+func NewSchemaFromStruct(obj interface{}) (sc *Schema, err error) {
+	ot := reflect.TypeOf(obj)
+	if ot.Kind() == reflect.Ptr {
+		ot = ot.Elem()
+	}
+
+	// typeToNode uses panics to fail fast / fail early instead of propagating
+	// errors up recursive stacks. so we recover here and return it as an error
+	defer func() {
+		if r := recover(); r != nil {
+			sc = nil
+			switch x := r.(type) {
+			case string:
+				err = xerrors.New(x)
+			case error:
+				err = x
+			default:
+				err = xerrors.New("unknown panic")
+			}
+		}
+	}()
+
+	root := typeToNode(ot.Name(), ot, parquet.Repetitions.Repeated, nil)
+	return NewSchema(root.(*GroupNode)), nil
+}
+
+var parquetTypeToReflect = map[parquet.Type]reflect.Type{
+	parquet.Types.Boolean:           reflect.TypeOf(true),
+	parquet.Types.Int32:             reflect.TypeOf(int32(0)),
+	parquet.Types.Int64:             reflect.TypeOf(int64(0)),
+	parquet.Types.Float:             reflect.TypeOf(float32(0)),
+	parquet.Types.Double:            reflect.TypeOf(float64(0)),
+	parquet.Types.Int96:             reflect.TypeOf(parquet.Int96{}),
+	parquet.Types.ByteArray:         reflect.TypeOf(parquet.ByteArray{}),
+	parquet.Types.FixedLenByteArray: reflect.TypeOf(parquet.FixedLenByteArray{}),
+}
+
+func typeFromNode(n Node) reflect.Type {
+	switch n.Type() {
+	case Primitive:
+		typ := parquetTypeToReflect[n.(*PrimitiveNode).PhysicalType()]
+		// if a bytearray field is annoted as a String logical type or a UTF8 converted type
+		// then use a string instead of parquet.ByteArray / parquet.FixedLenByteArray which are []byte
+		if n.LogicalType().Equals(StringLogicalType{}) || n.ConvertedType() == ConvertedTypes.UTF8 {
+			typ = reflect.TypeOf(string(""))
+		}
+
+		if n.RepetitionType() == parquet.Repetitions.Optional {
+			typ = reflect.PtrTo(typ)
+		} else if n.RepetitionType() == parquet.Repetitions.Repeated {
+			typ = reflect.SliceOf(typ)
+		}
+
+		return typ
+	case Group:
+		gnode := n.(*GroupNode)
+		switch gnode.ConvertedType() {
+		case ConvertedTypes.List:
+			// According to the Parquet Spec, a list should always be a 3-level structure
+			//
+			//	<list-repetition> group <name> (LIST) {
+			//		repeated group list {
+			//			<element-repetition> <element-type> element;
+			//		}
+			//	}
+			//
+			// Outer-most level must be a group annotated with LIST containing a single field named "list".
+			// this level must be only optional (if the list is nullable) or required
+			// Middle level, named list, must be repeated group with a single field named "element"
+			// "element" field is the lists element type and repetition, which should be only required or optional
+
+			if gnode.fields.Len() != 1 {
+				panic("invalid list node, should have exactly 1 child.")
+			}
+
+			if gnode.fields[0].RepetitionType() != parquet.Repetitions.Repeated {
+				panic("invalid list node, child should be repeated")
+			}
+
+			// it is required that the repeated group of elements is named "list" and it's element
+			// field is named "element", however existing data may not use this so readers shouldn't
+			// enforce them as errors
+			//
+			// Rules for backward compatibility from the parquet spec:
+			//
+			// 1) if the repeated field is not a group, then it's type is the element type and elements
+			//    must be required.
+			// 2) if the repeated field is a group with multiple fields, then its type is the element type
+			//    and elements must be required.
+			// 3) if the repeated field is a group with one field AND is named either "array" or uses the
+			//    LIST-annotated group's name with "_tuple" suffix, then the repeated type is the element
+			//    type and the elements must be required.
+			// 4) otherwise, the repeated field's type is the element type with the repeated field's repetition
+
+			elemMustBeRequired := false
+			addSlice := false
+			var elemType reflect.Type
+			elemNode := gnode.fields[0]
+			switch {
+			case elemNode.Type() == Primitive,
+				elemNode.(*GroupNode).fields.Len() > 1,
+				elemNode.(*GroupNode).fields.Len() == 1 && (elemNode.Name() == "array" || elemNode.Name() == gnode.Name()+"_tuple"):
+				elemMustBeRequired = true
+				elemType = typeFromNode(elemNode)
+			default:
+				addSlice = true
+				elemType = typeFromNode(elemNode.(*GroupNode).fields[0])
+			}
+
+			if elemMustBeRequired && elemType.Kind() == reflect.Ptr {
+				elemType = elemType.Elem()
+			}
+			if addSlice {
+				elemType = reflect.SliceOf(elemType)
+			}
+			if gnode.RepetitionType() == parquet.Repetitions.Optional {
+				elemType = reflect.PtrTo(elemType)
+			}
+			return elemType
+		case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
+			// According to the Parquet Spec, the outer-most level should be
+			// a group containing a single field named "key_value" with repetition
+			// either optional or required for whether or not the map is nullable.
+			//
+			// The key_value middle level *must* be a repeated group with a "key" field
+			// and *optionally* a "value" field
+			//
+			// the "key" field *must* be required and must always exist
+			//
+			// the "value" field can be required or optional or omitted.
+			//
+			// 	<map-repetition> group <name> (MAP) {
+			//		repeated group key_value {
+			//			required <key-type> key;
+			//			<value-repetition> <value-type> value;
+			//		}
+			//	}
+
+			if gnode.fields.Len() != 1 {
+				panic("invalid map node, should have exactly 1 child")
+			}
+
+			if gnode.fields[0].Type() != Group {
+				panic("invalid map node, child should be a group node")
+			}
+
+			// that said, this may not be used in existing data and should not be
+			// enforced as errors when reading.
+			//
+			// some data may also incorrectly use MAP_KEY_VALUE instead of MAP
+			//
+			// so any group with MAP_KEY_VALUE that is not contained inside of a "MAP"
+			// group, should be considered equivalent to being a MAP group itself.
+			//
+			// in addition, the fields may not be called "key" and "value" in existing
+			// data, and as such should not be enforced as errors when reading.
+
+			keyval := gnode.fields[0].(*GroupNode)
+
+			keyIndex := keyval.FieldIndexByName("key")
+			if keyIndex == -1 {
+				keyIndex = 0 // use first child if there is no child named "key"
+			}
+
+			keyType := typeFromNode(keyval.fields[keyIndex])
+			if keyType.Kind() == reflect.Ptr {
+				keyType = keyType.Elem()
+			}
+			// can't use a []byte as a key for a map, so use string
+			if keyType == reflect.TypeOf(parquet.ByteArray{}) || keyType == reflect.TypeOf(parquet.FixedLenByteArray{}) {
+				keyType = reflect.TypeOf(string(""))
+			}
+
+			// if the value node is omitted, then consider this a "set" and make it a
+			// map[key-type]bool
+			valType := reflect.TypeOf(true)
+			if keyval.fields.Len() > 1 {
+				valIndex := keyval.FieldIndexByName("value")
+				if valIndex == -1 {
+					valIndex = 1 // use second child if there is no child named "value"
+				}
+
+				valType = typeFromNode(keyval.fields[valIndex])
+			}
+
+			mapType := reflect.MapOf(keyType, valType)
+			if gnode.RepetitionType() == parquet.Repetitions.Optional {
+				mapType = reflect.PtrTo(mapType)
+			}
+			return mapType
+		default:
+			fields := []reflect.StructField{}
+			for _, f := range gnode.fields {
+				fields = append(fields, reflect.StructField{
+					Name:    f.Name(),
+					Type:    typeFromNode(f),
+					PkgPath: "parquet",
+				})
+			}
+
+			structType := reflect.StructOf(fields)
+			if gnode.RepetitionType() == parquet.Repetitions.Repeated {
+				return reflect.SliceOf(structType)
+			}
+			if gnode.RepetitionType() == parquet.Repetitions.Optional {
+				return reflect.PtrTo(structType)
+			}
+			return structType
+		}
+	}
+	panic("what happened?")
+}
+
+// NewStructFromSchema generates a struct type as a reflect.Type from the schema
+// by using the appropriate physical types and making things either pointers or slices
+// based on whether they are repeated/optional/required. It does not use the logical
+// or converted types to change the physical storage so that it is more efficient to use
+// the resulting type for reading without having to do conversions.
+//
+// It will use maps for map types and slices for list types, but otherwise ignores the
+// converted and logical types of the nodes. Group nodes that are not List or Map will
+// be nested structs.
+func NewStructFromSchema(sc *Schema) (t reflect.Type, err error) {
+	defer func() {
+		if r := recover(); r != nil {
+			t = nil
+			switch x := r.(type) {
+			case string:
+				err = xerrors.New(x)
+			case error:
+				err = x
+			default:
+				err = xerrors.New("unknown panic")
+			}
+		}
+	}()
+
+	t = typeFromNode(sc.root)
+	if t.Kind() == reflect.Slice || t.Kind() == reflect.Ptr {
+		return t.Elem(), nil
+	}
+	return
+}
diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go
new file mode 100644
index 00000000000..ba092159693
--- /dev/null
+++ b/go/parquet/schema/reflection_test.go
@@ -0,0 +1,403 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"log"
+	"os"
+	"reflect"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func ExampleNewSchemaFromStruct_primitives() {
+	type Schema struct {
+		Bool              bool
+		Int8              int8
+		Uint16            uint16
+		Int32             int32
+		Int64             int64
+		Int96             parquet.Int96
+		Float             float32
+		Double            float64
+		ByteArray         string
+		FixedLenByteArray [10]byte
+	}
+
+	sc, err := schema.NewSchemaFromStruct(Schema{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 Schema {
+	//   required boolean field_id=-1 Bool;
+	//   required int32 field_id=-1 Int8 (Int(bitWidth=8, isSigned=true));
+	//   required int32 field_id=-1 Uint16 (Int(bitWidth=16, isSigned=false));
+	//   required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true));
+	//   required int64 field_id=-1 Int64 (Int(bitWidth=64, isSigned=true));
+	//   required int96 field_id=-1 Int96;
+	//   required float field_id=-1 Float;
+	//   required double field_id=-1 Double;
+	//   required byte_array field_id=-1 ByteArray;
+	//   required fixed_len_byte_array field_id=-1 FixedLenByteArray;
+	// }
+}
+
+func ExampleNewSchemaFromStruct_convertedtypes() {
+	type ConvertedSchema struct {
+		Utf8           string        `parquet:"name=utf8, converted=UTF8"`
+		Uint32         uint32        `parquet:"converted=INT_32"`
+		Date           int32         `parquet:"name=date, converted=date"`
+		TimeMilli      int32         `parquet:"name=timemilli, converted=TIME_MILLIS"`
+		TimeMicro      int64         `parquet:"name=timemicro, converted=time_micros"`
+		TimeStampMilli int64         `parquet:"converted=timestamp_millis"`
+		TimeStampMicro int64         `parquet:"converted=timestamp_micros"`
+		Interval       parquet.Int96 `parquet:"converted=INTERVAL"`
+		Decimal1       int32         `parquet:"converted=decimal, scale=2, precision=9"`
+		Decimal2       int64         `parquet:"converted=decimal, scale=2, precision=18"`
+		Decimal3       [12]byte      `parquet:"converted=decimal, scale=2, precision=10"`
+		Decimal4       string        `parquet:"converted=decimal, scale=2, precision=20"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(&ConvertedSchema{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 ConvertedSchema {
+	//   required byte_array field_id=-1 utf8 (String);
+	//   required int32 field_id=-1 Uint32 (Int(bitWidth=32, isSigned=true));
+	//   required int32 field_id=-1 date (Date);
+	//   required int32 field_id=-1 timemilli (Time(isAdjustedToUTC=true, timeUnit=milliseconds));
+	//   required int64 field_id=-1 timemicro (Time(isAdjustedToUTC=true, timeUnit=microseconds));
+	//   required int64 field_id=-1 TimeStampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=true, force_set_converted_type=false));
+	//   required int64 field_id=-1 TimeStampMicro (Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, is_from_converted_type=true, force_set_converted_type=false));
+	//   required int96 field_id=-1 Interval;
+	//   required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2));
+	//   required int64 field_id=-1 Decimal2 (Decimal(precision=18, scale=2));
+	//   required fixed_len_byte_array field_id=-1 Decimal3 (Decimal(precision=10, scale=2));
+	//   required byte_array field_id=-1 Decimal4 (Decimal(precision=20, scale=2));
+	// }
+}
+
+func ExampleNewSchemaFromStruct_repetition() {
+	type RepetitionSchema struct {
+		List     []int64 `parquet:"fieldid=1"`
+		Repeated []int64 `parquet:"repetition=repeated, fieldid=2"`
+		Optional *int64  `parquet:"fieldid=3"`
+		Required *int64  `parquet:"repetition=REQUIRED, fieldid=4"`
+		Opt      int64   `parquet:"repetition=OPTIONAL, fieldid=5"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(RepetitionSchema{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 RepetitionSchema {
+	//   required group field_id=1 List (List) {
+	//     repeated group field_id=-1 list {
+	//       required int64 field_id=-1 element (Int(bitWidth=64, isSigned=true));
+	//     }
+	//   }
+	//   repeated int64 field_id=2 Repeated (Int(bitWidth=64, isSigned=true));
+	//   optional int64 field_id=3 Optional (Int(bitWidth=64, isSigned=true));
+	//   required int64 field_id=4 Required (Int(bitWidth=64, isSigned=true));
+	//   optional int64 field_id=5 Opt (Int(bitWidth=64, isSigned=true));
+	// }
+}
+
+func ExampleNewSchemaFromStruct_logicaltypes() {
+	type LogicalTypes struct {
+		String                []byte   `parquet:"logical=String"`
+		Enum                  string   `parquet:"logical=enum"`
+		Date                  int32    `parquet:"logical=date"`
+		Decimal1              int32    `parquet:"logical=decimal, precision=9, scale=2"`
+		Decimal2              int32    `parquet:"logical=decimal, logical.precision=9, scale=2"`
+		Decimal3              int32    `parquet:"logical=decimal, precision=5, logical.precision=9, scale=1, logical.scale=3"`
+		TimeMilliUTC          int32    `parquet:"logical=TIME, logical.unit=millis"`
+		TimeMilli             int32    `parquet:"logical=Time, logical.unit=millis, logical.isadjustedutc=false"`
+		TimeMicros            int64    `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=false"`
+		TimeMicrosUTC         int64    `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=true"`
+		TimeNanos             int64    `parquet:"logical=time, logical.unit=nanos"`
+		TimestampMilli        int64    `parquet:"logical=timestamp, logical.unit=millis"`
+		TimestampMicrosNotUTC int64    `parquet:"logical=timestamp, logical.unit=micros, logical.isadjustedutc=false"`
+		TimestampNanos        int64    `parquet:"logical=timestamp, logical.unit=nanos"`
+		JSON                  string   `parquet:"logical=json"`
+		BSON                  []byte   `parquet:"logical=BSON"`
+		UUID                  [16]byte `parquet:"logical=uuid"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(LogicalTypes{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 LogicalTypes {
+	//   required byte_array field_id=-1 String (String);
+	//   required byte_array field_id=-1 Enum (Enum);
+	//   required int32 field_id=-1 Date (Date);
+	//   required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2));
+	//   required int32 field_id=-1 Decimal2 (Decimal(precision=9, scale=2));
+	//   required int32 field_id=-1 Decimal3 (Decimal(precision=9, scale=3));
+	//   required int32 field_id=-1 TimeMilliUTC (Time(isAdjustedToUTC=true, timeUnit=milliseconds));
+	//   required int32 field_id=-1 TimeMilli (Time(isAdjustedToUTC=false, timeUnit=milliseconds));
+	//   required int64 field_id=-1 TimeMicros (Time(isAdjustedToUTC=false, timeUnit=microseconds));
+	//   required int64 field_id=-1 TimeMicrosUTC (Time(isAdjustedToUTC=true, timeUnit=microseconds));
+	//   required int64 field_id=-1 TimeNanos (Time(isAdjustedToUTC=true, timeUnit=nanoseconds));
+	//   required int64 field_id=-1 TimestampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false));
+	//   required int64 field_id=-1 TimestampMicrosNotUTC (Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false));
+	//   required int64 field_id=-1 TimestampNanos (Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false));
+	//   required byte_array field_id=-1 JSON (JSON);
+	//   required byte_array field_id=-1 BSON (BSON);
+	//   required fixed_len_byte_array field_id=-1 UUID (UUID);
+	// }
+}
+
+func ExampleNewSchemaFromStruct_physicaltype() {
+	type ChangeTypes struct {
+		Int32        int64  `parquet:"type=int32"`
+		FixedLen     string `parquet:"type=fixed_len_byte_array, length=10"`
+		SliceAsFixed []byte `parquet:"type=fixed_len_byte_array, length=12"`
+		Int          int    `parquet:"type=int32"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(ChangeTypes{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 ChangeTypes {
+	//   required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true));
+	//   required fixed_len_byte_array field_id=-1 FixedLen;
+	//   required fixed_len_byte_array field_id=-1 SliceAsFixed;
+	//   required int32 field_id=-1 Int (Int(bitWidth=32, isSigned=true));
+	// }
+}
+
+func ExampleNewSchemaFromStruct_nestedtypes() {
+	type Other struct {
+		OptionalMap *map[string]*string `parquet:"valuerepetition=required, keylogical=String, valueconverted=BSON"`
+	}
+
+	type MyMap map[int32]string
+
+	type Nested struct {
+		SimpleMap     map[int32]string
+		FixedLenMap   map[string][]byte `parquet:"keytype=fixed_len_byte_array, keyfieldid=10, valuefieldid=11, keylength=10"`
+		DecimalMap    map[int32]string  `parquet:"logical=map, keyconverted=DECIMAL, keyscale=3, keyprecision=7, valuetype=fixed_len_byte_array, valuelength=4, valuelogical=decimal, valuelogical.precision=9, valuescale=2"`
+		OtherList     []*Other
+		OtherRepeated []Other  `parquet:"repetition=repeated"`
+		DateArray     [5]int32 `parquet:"valuelogical=date, logical=list"`
+		DateMap       MyMap    `parquet:"keylogical=TIME, keylogical.unit=MILLIS, keylogical.isadjustedutc=false, valuelogical=enum"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(Nested{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 Nested {
+	//   required group field_id=-1 SimpleMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required int32 field_id=-1 key (Int(bitWidth=32, isSigned=true));
+	//       required byte_array field_id=-1 value;
+	//     }
+	//   }
+	//   required group field_id=-1 FixedLenMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required fixed_len_byte_array field_id=10 key;
+	//       required byte_array field_id=11 value;
+	//     }
+	//   }
+	//   required group field_id=-1 DecimalMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required int32 field_id=-1 key (Decimal(precision=7, scale=3));
+	//       required fixed_len_byte_array field_id=-1 value (Decimal(precision=9, scale=2));
+	//     }
+	//   }
+	//   required group field_id=-1 OtherList (List) {
+	//     repeated group field_id=-1 list {
+	//       optional group field_id=-1 element {
+	//         optional group field_id=-1 OptionalMap (Map) {
+	//           repeated group field_id=-1 key_value {
+	//             required byte_array field_id=-1 key (String);
+	//             required byte_array field_id=-1 value (BSON);
+	//           }
+	//         }
+	//       }
+	//     }
+	//   }
+	//   repeated group field_id=-1 OtherRepeated {
+	//     optional group field_id=-1 OptionalMap (Map) {
+	//       repeated group field_id=-1 key_value {
+	//         required byte_array field_id=-1 key (String);
+	//         required byte_array field_id=-1 value (BSON);
+	//       }
+	//     }
+	//   }
+	//   required group field_id=-1 DateArray (List) {
+	//     repeated group field_id=-1 list {
+	//       required int32 field_id=-1 element (Date);
+	//     }
+	//   }
+	//   required group field_id=-1 DateMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required int32 field_id=-1 key (Time(isAdjustedToUTC=false, timeUnit=milliseconds));
+	//       required byte_array field_id=-1 value (Enum);
+	//     }
+	//   }
+	// }
+}
+
+func TestStructFromSchema(t *testing.T) {
+	root, err := schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{
+		schema.NewBooleanNode("bool", parquet.Repetitions.Required, -1),
+		schema.NewInt32Node("int32", parquet.Repetitions.Optional, -1),
+		schema.NewInt64Node("int64", parquet.Repetitions.Repeated, -1),
+		schema.NewInt96Node("int96", parquet.Repetitions.Required, -1),
+		schema.NewFloat32Node("float", parquet.Repetitions.Required, -1),
+		schema.NewByteArrayNode("bytearray", parquet.Repetitions.Required, -1),
+		schema.NewFixedLenByteArrayNode("fixedLen", parquet.Repetitions.Required, 10, -1),
+	}, -1)
+	assert.NoError(t, err)
+
+	sc := schema.NewSchema(root)
+
+	typ, err := schema.NewStructFromSchema(sc)
+	assert.NoError(t, err)
+
+	assert.Equal(t, reflect.Struct, typ.Kind())
+	assert.Equal(t, "struct { bool bool; int32 *int32; int64 []int64; int96 parquet.Int96; float float32; bytearray parquet.ByteArray; fixedLen parquet.FixedLenByteArray }",
+		typ.String())
+}
+
+func TestStructFromSchemaWithNesting(t *testing.T) {
+	type Other struct {
+		List *[]*float32
+	}
+
+	type Nested struct {
+		Nest         []int32
+		OptionalNest []*int64
+		Mapped       map[string]float32
+		Other        []Other
+		Other2       Other
+	}
+
+	sc, err := schema.NewSchemaFromStruct(Nested{})
+	assert.NoError(t, err)
+
+	typ, err := schema.NewStructFromSchema(sc)
+	assert.NoError(t, err)
+	assert.Equal(t, "struct { Nest []int32; OptionalNest []*int64; Mapped map[string]float32; Other []struct { List *[]*float32 }; Other2 struct { List *[]*float32 } }",
+		typ.String())
+}
+
+func TestStructFromSchemaBackwardsCompatList(t *testing.T) {
+	tests := []struct {
+		name     string
+		n        schema.Node
+		expected string
+	}{
+		{"proper list", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required,
+			schema.FieldList{
+				schema.MustGroup(schema.NewGroupNode("list", parquet.Repetitions.Repeated, schema.FieldList{schema.NewBooleanNode("element", parquet.Repetitions.Optional, -1)}, -1)),
+			}, schema.NewListLogicalType(), -1)), "struct { my_list []*bool }"},
+		{"backward nullable list nonnull ints", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
+			schema.NewInt32Node("element", parquet.Repetitions.Repeated, -1),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list *[]int32 }"},
+		{"backward nullable list tuple string int", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("element", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
+				schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string; num int32 } }"},
+		{"list tuple string", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("array", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list []struct { str parquet.ByteArray } }"},
+		{"list tuple string my_list_tuple", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("my_list_tuple", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
+			}, -1)),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string } }"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expected, typ.String())
+		})
+	}
+}
+
+func TestStructFromSchemaMaps(t *testing.T) {
+	tests := []struct {
+		name     string
+		n        schema.Node
+		expected string
+	}{
+		{"map string int", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Required, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("key_value", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("key", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
+				schema.NewInt32Node("value", parquet.Repetitions.Optional, -1),
+			}, -1)),
+		}, schema.MapLogicalType{}, -1)), "struct { my_map map[string]*int32 }"},
+		{"nullable map string, int, required values", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
+				schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.MapLogicalType{}, -1)), "struct { my_map *map[string]int32 }"},
+		{"map_key_value with missing value", schema.MustGroup(schema.NewGroupNodeConverted("my_map", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.NewByteArrayNode("key", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.ConvertedTypes.MapKeyValue, -1)), "struct { my_map *map[string]bool }"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expected, typ.String())
+		})
+	}
+}
diff --git a/go/parquet/schema/schema.go b/go/parquet/schema/schema.go
new file mode 100644
index 00000000000..9402edc6f1f
--- /dev/null
+++ b/go/parquet/schema/schema.go
@@ -0,0 +1,328 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package schema provides types and functions for manipulating and building parquet
+// file schemas.
+//
+// Some of the utilities provided include building a schema using Struct Tags
+// on a struct type, getting Column Paths from a node, and dealing with the
+// converted and logical types for Parquet.
+//
+// Logical types specify ways to interpret the primitive types allowing the
+// number of primitive types to be smaller and reuse efficient encodings.
+// For instance a "string" is just a ByteArray column with a UTF-8 annotation
+// or "String Logical Type".
+//
+// For more information about Logical and Converted Types, check:
+// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+package schema
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// Schema is the container for the converted Parquet schema with a computed
+// information from the schema analysis needed for file reading
+//
+// * Column index to Node
+//
+// * Max repetition / definition levels for each primitive node
+//
+// The ColumnDescriptor objects produced by this class can be used to assist in
+// the reconstruction of fully materialized data structures from the
+// repetition-definition level encoding of nested data
+type Schema struct {
+	root Node
+
+	leaves      []*Column
+	nodeToLeaf  map[*PrimitiveNode]int
+	leafToBase  map[int]Node
+	leafToIndex strIntMultimap
+}
+
+// FromParquet converts a slice of thrift Schema Elements to the correct node type
+func FromParquet(elems []*format.SchemaElement) (Node, error) {
+	if len(elems) == 0 {
+		return nil, xerrors.New("parquet: empty schema (no root)")
+	}
+
+	if elems[0].GetNumChildren() == 0 {
+		if len(elems) > 1 {
+			return nil, xerrors.New("parquet: schema had multiple nodes but root had no children")
+		}
+		// parquet file with no columns
+		return GroupNodeFromThrift(elems[0], []Node{})
+	}
+
+	// We don't check that the root node is repeated since this is not
+	// consistently set by implementations
+	var (
+		pos      = 0
+		nextNode func() (Node, error)
+	)
+
+	nextNode = func() (Node, error) {
+		if pos == len(elems) {
+			return nil, xerrors.New("parquet: malformed schema: not enough elements")
+		}
+
+		elem := elems[pos]
+		pos++
+
+		if elem.GetNumChildren() == 0 {
+			return PrimitiveNodeFromThrift(elem)
+		}
+
+		fields := make([]Node, 0, elem.GetNumChildren())
+		for i := 0; i < int(elem.GetNumChildren()); i++ {
+			n, err := nextNode()
+			if err != nil {
+				return nil, err
+			}
+			fields = append(fields, n)
+		}
+
+		return GroupNodeFromThrift(elem, fields)
+	}
+
+	return nextNode()
+}
+
+// Root returns the group node that is the root of this schema
+func (s *Schema) Root() *GroupNode {
+	return s.root.(*GroupNode)
+}
+
+// NumColumns returns the number of leaf nodes that are the actual primitive
+// columns in this schema.
+func (s *Schema) NumColumns() int {
+	return len(s.leaves)
+}
+
+// Equals returns true as long as the leaf columns are equal, doesn't take
+// into account the groups and only checks whether the schemas are compatible
+// at the physical storage level.
+func (s *Schema) Equals(rhs *Schema) bool {
+	if s.NumColumns() != rhs.NumColumns() {
+		return false
+	}
+
+	for idx, c := range s.leaves {
+		if !c.Equals(rhs.Column(idx)) {
+			return false
+		}
+	}
+	return true
+}
+
+func (s *Schema) buildTree(n Node, maxDefLvl, maxRepLvl int16, base Node) {
+	switch n.RepetitionType() {
+	case parquet.Repetitions.Repeated:
+		maxRepLvl++
+		fallthrough
+	case parquet.Repetitions.Optional:
+		maxDefLvl++
+	}
+
+	switch n := n.(type) {
+	case *GroupNode:
+		for _, f := range n.fields {
+			s.buildTree(f, maxDefLvl, maxRepLvl, base)
+		}
+	case *PrimitiveNode:
+		s.nodeToLeaf[n] = len(s.leaves)
+		s.leaves = append(s.leaves, NewColumn(n, maxDefLvl, maxRepLvl))
+		s.leafToBase[len(s.leaves)-1] = base
+		s.leafToIndex.Add(n.Path(), len(s.leaves)-1)
+	}
+}
+
+// Column returns the (0-indexed) column of the provided index.
+func (s *Schema) Column(i int) *Column {
+	return s.leaves[i]
+}
+
+// ColumnIndexByName looks up the column by it's full dot separated
+// node path. If there are multiple columns that match, it returns the first one.
+//
+// Returns -1 if not found.
+func (s *Schema) ColumnIndexByName(nodePath string) int {
+	if search, ok := s.leafToIndex[nodePath]; ok {
+		return search[0]
+	}
+	return -1
+}
+
+// ColumnIndexByNode returns the index of the column represented by this node.
+//
+// Returns -1 if not found.
+func (s *Schema) ColumnIndexByNode(n Node) int {
+	if search, ok := s.leafToIndex[n.Path()]; ok {
+		for _, idx := range search {
+			if n == s.Column(idx).SchemaNode() {
+				return idx
+			}
+		}
+	}
+	return -1
+}
+
+// ColumnRoot returns the root node of a given column if it is under a
+// nested group node, providing that root group node.
+func (s *Schema) ColumnRoot(i int) Node {
+	return s.leafToBase[i]
+}
+
+// HasRepeatedFields returns true if any node in the schema has a repeated field type.
+func (s *Schema) HasRepeatedFields() bool {
+	return s.root.(*GroupNode).HasRepeatedFields()
+}
+
+// UpdateColumnOrders must get a slice that is the same length as the number of leaf columns
+// and is used to update the schema metadata Column Orders. len(orders) must equal s.NumColumns()
+func (s *Schema) UpdateColumnOrders(orders []parquet.ColumnOrder) error {
+	if len(orders) != s.NumColumns() {
+		return xerrors.New("parquet: malformed schema: not enough ColumnOrder values")
+	}
+
+	visitor := schemaColumnOrderUpdater{orders, 0}
+	s.root.Visit(&visitor)
+	return nil
+}
+
+// NewSchema constructs a new Schema object from a root group node.
+//
+// Any fields with a field-id of -1 will be given an appropriate field number based on their order.
+func NewSchema(root *GroupNode) *Schema {
+	s := &Schema{
+		root,
+		make([]*Column, 0),
+		make(map[*PrimitiveNode]int),
+		make(map[int]Node),
+		make(strIntMultimap),
+	}
+
+	for _, f := range root.fields {
+		s.buildTree(f, 0, 0, f)
+	}
+	return s
+}
+
+type schemaColumnOrderUpdater struct {
+	colOrders []parquet.ColumnOrder
+	leafCount int
+}
+
+func (s *schemaColumnOrderUpdater) VisitPre(n Node) bool {
+	if n.Type() == Primitive {
+		leaf := n.(*PrimitiveNode)
+		leaf.ColumnOrder = s.colOrders[s.leafCount]
+		s.leafCount++
+	}
+	return true
+}
+
+func (s *schemaColumnOrderUpdater) VisitPost(Node) {}
+
+type toThriftVisitor struct {
+	elements []*format.SchemaElement
+}
+
+func (t *toThriftVisitor) VisitPre(n Node) bool {
+	t.elements = append(t.elements, n.toThrift())
+	return true
+}
+
+func (t *toThriftVisitor) VisitPost(Node) {}
+
+// ToThrift converts a GroupNode to a slice of SchemaElements which is used
+// for thrift serialization.
+func ToThrift(schema *GroupNode) []*format.SchemaElement {
+	t := &toThriftVisitor{make([]*format.SchemaElement, 0)}
+	schema.Visit(t)
+	return t.elements
+}
+
+type schemaPrinter struct {
+	w           io.Writer
+	indent      int
+	indentWidth int
+}
+
+func (s *schemaPrinter) VisitPre(n Node) bool {
+	fmt.Fprint(s.w, strings.Repeat(" ", s.indent))
+	if n.Type() == Group {
+		g := n.(*GroupNode)
+		fmt.Fprintf(s.w, "%s group field_id=%d %s", g.RepetitionType(), g.FieldID(), g.Name())
+		_, invalid := g.logicalType.(UnknownLogicalType)
+		_, none := g.logicalType.(NoLogicalType)
+
+		if g.logicalType != nil && !invalid && !none {
+			fmt.Fprintf(s.w, " (%s)", g.logicalType)
+		} else if g.convertedType != ConvertedTypes.None {
+			fmt.Fprintf(s.w, " (%s)", g.convertedType)
+		}
+
+		fmt.Fprintln(s.w, " {")
+		s.indent += s.indentWidth
+	} else {
+		p := n.(*PrimitiveNode)
+		fmt.Fprintf(s.w, "%s %s field_id=%d %s", p.RepetitionType(), strings.ToLower(p.PhysicalType().String()), p.FieldID(), p.Name())
+		_, invalid := p.logicalType.(UnknownLogicalType)
+		_, none := p.logicalType.(NoLogicalType)
+
+		if p.logicalType != nil && !invalid && !none {
+			fmt.Fprintf(s.w, " (%s)", p.logicalType)
+		} else if p.convertedType == ConvertedTypes.Decimal {
+			fmt.Fprintf(s.w, " (%s(%d,%d))", p.convertedType, p.DecimalMetadata().Precision, p.DecimalMetadata().Scale)
+		} else if p.convertedType != ConvertedTypes.None {
+			fmt.Fprintf(s.w, " (%s)", p.convertedType)
+		}
+		fmt.Fprintln(s.w, ";")
+	}
+	return true
+}
+
+func (s *schemaPrinter) VisitPost(n Node) {
+	if n.Type() == Group {
+		s.indent -= s.indentWidth
+		fmt.Fprint(s.w, strings.Repeat(" ", s.indent))
+		fmt.Fprintln(s.w, "}")
+	}
+}
+
+// PrintSchema writes a string representation of the tree to w using the indent
+// width provided.
+func PrintSchema(n Node, w io.Writer, indentWidth int) {
+	n.Visit(&schemaPrinter{w, 0, indentWidth})
+}
+
+type strIntMultimap map[string][]int
+
+func (f strIntMultimap) Add(key string, val int) bool {
+	if _, ok := f[key]; !ok {
+		f[key] = []int{val}
+		return false
+	}
+	f[key] = append(f[key], val)
+	return true
+}
diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go
new file mode 100644
index 00000000000..7a43d243215
--- /dev/null
+++ b/go/parquet/schema/schema_element_test.go
@@ -0,0 +1,514 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+type schemaElementConstruction struct {
+	node            Node
+	element         *format.SchemaElement
+	name            string
+	expectConverted bool
+	converted       ConvertedType
+	expectLogical   bool
+	checkLogical    func(*format.SchemaElement) bool
+}
+
+type decimalSchemaElementConstruction struct {
+	schemaElementConstruction
+	precision int
+	scale     int
+}
+
+type temporalSchemaElementConstruction struct {
+	schemaElementConstruction
+	adjusted bool
+	unit     TimeUnitType
+	getUnit  func(*format.SchemaElement) *format.TimeUnit
+}
+
+type intSchemaElementConstruction struct {
+	schemaElementConstruction
+	width  int8
+	signed bool
+}
+
+type legacySchemaElementConstructArgs struct {
+	name            string
+	physical        parquet.Type
+	len             int
+	expectConverted bool
+	converted       ConvertedType
+	expectLogical   bool
+	checkLogical    func(*format.SchemaElement) bool
+}
+
+type schemaElementConstructArgs struct {
+	name            string
+	logical         LogicalType
+	physical        parquet.Type
+	len             int
+	expectConverted bool
+	converted       ConvertedType
+	expectLogical   bool
+	checkLogical    func(*format.SchemaElement) bool
+}
+type SchemaElementConstructionSuite struct {
+	suite.Suite
+}
+
+func (s *SchemaElementConstructionSuite) reconstruct(c schemaElementConstructArgs) *schemaElementConstruction {
+	ret := &schemaElementConstruction{
+		node:            MustPrimitive(NewPrimitiveNodeLogical(c.name, parquet.Repetitions.Required, c.logical, c.physical, c.len, -1)),
+		name:            c.name,
+		expectConverted: c.expectConverted,
+		converted:       c.converted,
+		expectLogical:   c.expectLogical,
+		checkLogical:    c.checkLogical,
+	}
+	ret.element = ret.node.toThrift()
+	return ret
+}
+
+func (s *SchemaElementConstructionSuite) legacyReconstruct(c legacySchemaElementConstructArgs) *schemaElementConstruction {
+	ret := &schemaElementConstruction{
+		node:            MustPrimitive(NewPrimitiveNodeConverted(c.name, parquet.Repetitions.Required, c.physical, c.converted, c.len, 0, 0, -1)),
+		name:            c.name,
+		expectConverted: c.expectConverted,
+		converted:       c.converted,
+		expectLogical:   c.expectLogical,
+		checkLogical:    c.checkLogical,
+	}
+	ret.element = ret.node.toThrift()
+	return ret
+}
+
+func (s *SchemaElementConstructionSuite) inspect(c *schemaElementConstruction) {
+	if c.expectConverted {
+		s.True(c.element.IsSetConvertedType())
+		s.Equal(c.converted, ConvertedType(*c.element.ConvertedType))
+	} else {
+		s.False(c.element.IsSetConvertedType())
+	}
+	if c.expectLogical {
+		s.True(c.element.IsSetLogicalType())
+		s.True(c.checkLogical(c.element))
+	} else {
+		s.False(c.element.IsSetLogicalType())
+	}
+}
+
+func (s *SchemaElementConstructionSuite) TestSimple() {
+	checkNone := func(*format.SchemaElement) bool { return true }
+
+	tests := []struct {
+		name   string
+		args   *schemaElementConstructArgs
+		legacy *legacySchemaElementConstructArgs
+	}{
+		{"string", &schemaElementConstructArgs{
+			"string", StringLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.UTF8, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetSTRING() },
+		}, nil},
+		{"enum", &schemaElementConstructArgs{
+			"enum", EnumLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.Enum, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetENUM() },
+		}, nil},
+		{"date", &schemaElementConstructArgs{
+			"date", DateLogicalType{}, parquet.Types.Int32, -1, true, ConvertedTypes.Date, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetDATE() },
+		}, nil},
+		{"interval", &schemaElementConstructArgs{
+			"interval", IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 12, true, ConvertedTypes.Interval, false,
+			checkNone,
+		}, nil},
+		{"null", &schemaElementConstructArgs{
+			"null", NullLogicalType{}, parquet.Types.Double, -1, false, ConvertedTypes.NA, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetUNKNOWN() },
+		}, nil},
+		{"json", &schemaElementConstructArgs{
+			"json", JSONLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.JSON, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetJSON() },
+		}, nil},
+		{"bson", &schemaElementConstructArgs{
+			"bson", BSONLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.BSON, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetBSON() },
+		}, nil},
+		{"uuid", &schemaElementConstructArgs{
+			"uuid", UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16, false, ConvertedTypes.NA, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetUUID() },
+		}, nil},
+		{"none", &schemaElementConstructArgs{
+			"none", NoLogicalType{}, parquet.Types.Int64, -1, false, ConvertedTypes.NA, false,
+			checkNone,
+		}, nil},
+		{"unknown", &schemaElementConstructArgs{
+			"unknown", UnknownLogicalType{}, parquet.Types.Int64, -1, true, ConvertedTypes.NA, false,
+			checkNone,
+		}, nil},
+		{"timestamp_ms", nil, &legacySchemaElementConstructArgs{
+			"timestamp_ms", parquet.Types.Int64, -1, true, ConvertedTypes.TimestampMillis, false, checkNone}},
+		{"timestamp_us", nil, &legacySchemaElementConstructArgs{
+			"timestamp_us", parquet.Types.Int64, -1, true, ConvertedTypes.TimestampMicros, false, checkNone}},
+	}
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			var sc *schemaElementConstruction
+			if tt.args != nil {
+				sc = s.reconstruct(*tt.args)
+			} else {
+				sc = s.legacyReconstruct(*tt.legacy)
+			}
+			s.Equal(tt.name, sc.element.Name)
+			s.inspect(sc)
+		})
+	}
+}
+
+func (s *SchemaElementConstructionSuite) reconstructDecimal(c schemaElementConstructArgs) *decimalSchemaElementConstruction {
+	ret := s.reconstruct(c)
+	dec := c.logical.(*DecimalLogicalType)
+	return &decimalSchemaElementConstruction{*ret, int(dec.Precision()), int(dec.Scale())}
+}
+
+func (s *SchemaElementConstructionSuite) inspectDecimal(d *decimalSchemaElementConstruction) {
+	s.inspect(&d.schemaElementConstruction)
+	s.EqualValues(d.precision, d.element.GetPrecision())
+	s.EqualValues(d.scale, d.element.GetScale())
+	s.EqualValues(d.precision, d.element.LogicalType.DECIMAL.Precision)
+	s.EqualValues(d.scale, d.element.LogicalType.DECIMAL.Scale)
+}
+
+func (s *SchemaElementConstructionSuite) TestDecimal() {
+	checkDecimal := func(p *format.SchemaElement) bool { return p.LogicalType.IsSetDECIMAL() }
+
+	tests := []schemaElementConstructArgs{
+		{
+			name: "decimal16_6", logical: NewDecimalLogicalType(16 /* precision */, 6 /* scale */),
+			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+		{
+			name: "decimal1_0", logical: NewDecimalLogicalType(1 /* precision */, 0 /* scale */),
+			physical: parquet.Types.Int32, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+		{
+			name: "decimal10", logical: NewDecimalLogicalType(10 /* precision */, 0 /* scale */),
+			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+		{
+			name: "decimal11_11", logical: NewDecimalLogicalType(11 /* precision */, 11 /* scale */),
+			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+	}
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			d := s.reconstructDecimal(tt)
+			s.Equal(tt.name, d.element.Name)
+			s.inspectDecimal(d)
+		})
+	}
+}
+
+func (s *SchemaElementConstructionSuite) reconstructTemporal(c schemaElementConstructArgs, getUnit func(*format.SchemaElement) *format.TimeUnit) *temporalSchemaElementConstruction {
+	base := s.reconstruct(c)
+	t := c.logical.(TemporalLogicalType)
+	return &temporalSchemaElementConstruction{
+		*base,
+		t.IsAdjustedToUTC(),
+		t.TimeUnit(),
+		getUnit,
+	}
+}
+
+func (s *SchemaElementConstructionSuite) inspectTemporal(t *temporalSchemaElementConstruction) {
+	s.inspect(&t.schemaElementConstruction)
+	switch t.unit {
+	case TimeUnitMillis:
+		s.True(t.getUnit(t.element).IsSetMILLIS())
+	case TimeUnitMicros:
+		s.True(t.getUnit(t.element).IsSetMICROS())
+	case TimeUnitNanos:
+		s.True(t.getUnit(t.element).IsSetNANOS())
+	case TimeUnitUnknown:
+		fallthrough
+	default:
+		s.Fail("invalid time unit in test case")
+	}
+}
+
+func (s *SchemaElementConstructionSuite) TestTemporal() {
+	checkTime := func(p *format.SchemaElement) bool {
+		return p.LogicalType.IsSetTIME()
+	}
+	checkTimestamp := func(p *format.SchemaElement) bool {
+		return p.LogicalType.IsSetTIMESTAMP()
+	}
+
+	getTimeUnit := func(p *format.SchemaElement) *format.TimeUnit {
+		return p.LogicalType.TIME.Unit
+	}
+	getTimestampUnit := func(p *format.SchemaElement) *format.TimeUnit {
+		return p.LogicalType.TIMESTAMP.Unit
+	}
+
+	timeTests := []schemaElementConstructArgs{
+		{
+			name: "time_T_ms", logical: NewTimeLogicalType(true, TimeUnitMillis), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimeMillis, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_F_ms", logical: NewTimeLogicalType(false, TimeUnitMillis), physical: parquet.Types.Int32, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_T_us", logical: NewTimeLogicalType(true, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimeMicros, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_F_us", logical: NewTimeLogicalType(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_T_ns", logical: NewTimeLogicalType(true, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_F_ns", logical: NewTimeLogicalType(false, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+	}
+	timeStampTests := []schemaElementConstructArgs{
+		{
+			name: "timestamp_T_ms", logical: NewTimestampLogicalType(true, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMillis, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_ms", logical: NewTimestampLogicalType(false, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_ms_force", logical: NewTimestampLogicalTypeForce(false, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMillis, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_T_us", logical: NewTimestampLogicalType(true, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMicros, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_us", logical: NewTimestampLogicalType(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_us_force", logical: NewTimestampLogicalTypeForce(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMicros, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_T_ns", logical: NewTimestampLogicalType(true, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_ns", logical: NewTimestampLogicalType(false, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+	}
+
+	for _, tt := range timeTests {
+		s.Run(tt.name, func() {
+			t := s.reconstructTemporal(tt, getTimeUnit)
+			s.Equal(t.adjusted, t.element.LogicalType.TIME.IsAdjustedToUTC)
+			s.inspectTemporal(t)
+		})
+	}
+	for _, tt := range timeStampTests {
+		s.Run(tt.name, func() {
+			t := s.reconstructTemporal(tt, getTimestampUnit)
+			s.Equal(t.adjusted, t.element.LogicalType.TIMESTAMP.IsAdjustedToUTC)
+			s.inspectTemporal(t)
+		})
+	}
+}
+
+func (s *SchemaElementConstructionSuite) reconstructInteger(c schemaElementConstructArgs) *intSchemaElementConstruction {
+	base := s.reconstruct(c)
+	l := c.logical.(*IntLogicalType)
+	return &intSchemaElementConstruction{
+		*base,
+		l.BitWidth(),
+		l.IsSigned(),
+	}
+}
+
+func (s *SchemaElementConstructionSuite) inspectInt(i *intSchemaElementConstruction) {
+	s.inspect(&i.schemaElementConstruction)
+	s.Equal(i.width, i.element.LogicalType.INTEGER.BitWidth)
+	s.Equal(i.signed, i.element.LogicalType.INTEGER.IsSigned)
+}
+
+func (s *SchemaElementConstructionSuite) TestIntegerCases() {
+	checkInt := func(p *format.SchemaElement) bool { return p.LogicalType.IsSetINTEGER() }
+
+	tests := []schemaElementConstructArgs{
+		{
+			name: "uint8", logical: NewIntLogicalType(8, false), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint8, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "uint16", logical: NewIntLogicalType(16, false), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint16, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "uint32", logical: NewIntLogicalType(32, false), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint32, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "uint64", logical: NewIntLogicalType(64, false), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint64, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int8", logical: NewIntLogicalType(8, true), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int8, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int16", logical: NewIntLogicalType(16, true), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int16, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int32", logical: NewIntLogicalType(32, true), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int32, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int64", logical: NewIntLogicalType(64, true), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int64, expectLogical: true, checkLogical: checkInt,
+		},
+	}
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			t := s.reconstructInteger(tt)
+			s.inspectInt(t)
+		})
+	}
+}
+
+func TestSchemaElementNestedSerialization(t *testing.T) {
+	// confirm that the intermediate thrift objects created during node serialization
+	// contain correct ConvertedType and ConvertedType information
+
+	strNode := MustPrimitive(NewPrimitiveNodeLogical("string" /*name */, parquet.Repetitions.Required, StringLogicalType{}, parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
+	dateNode := MustPrimitive(NewPrimitiveNodeLogical("date" /*name */, parquet.Repetitions.Required, DateLogicalType{}, parquet.Types.Int32, -1 /* type len */, -1 /* fieldID */))
+	jsonNode := MustPrimitive(NewPrimitiveNodeLogical("json" /*name */, parquet.Repetitions.Required, JSONLogicalType{}, parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
+	uuidNode := MustPrimitive(NewPrimitiveNodeLogical("uuid" /*name */, parquet.Repetitions.Required, UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16 /* type len */, - /* fieldID */ 1))
+	timestampNode := MustPrimitive(NewPrimitiveNodeLogical("timestamp" /*name */, parquet.Repetitions.Required, NewTimestampLogicalType(false /* adjustedToUTC */, TimeUnitNanos), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	intNode := MustPrimitive(NewPrimitiveNodeLogical("int" /*name */, parquet.Repetitions.Required, NewIntLogicalType(64 /* bitWidth */, false /* signed */), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	decimalNode := MustPrimitive(NewPrimitiveNodeLogical("decimal" /*name */, parquet.Repetitions.Required, NewDecimalLogicalType(16 /* precision */, 6 /* scale */), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	listNode := MustGroup(NewGroupNodeLogical("list" /*name */, parquet.Repetitions.Repeated, []Node{strNode, dateNode, jsonNode, uuidNode, timestampNode, intNode, decimalNode}, NewListLogicalType(), -1 /* fieldID */))
+
+	listElems := ToThrift(listNode)
+	assert.Equal(t, "list", listElems[0].Name)
+	assert.True(t, listElems[0].IsSetConvertedType())
+	assert.True(t, listElems[0].IsSetLogicalType())
+	assert.Equal(t, format.ConvertedType(ConvertedTypes.List), listElems[0].GetConvertedType())
+	assert.True(t, listElems[0].LogicalType.IsSetLIST())
+	assert.True(t, listElems[1].LogicalType.IsSetSTRING())
+	assert.True(t, listElems[2].LogicalType.IsSetDATE())
+	assert.True(t, listElems[3].LogicalType.IsSetJSON())
+	assert.True(t, listElems[4].LogicalType.IsSetUUID())
+	assert.True(t, listElems[5].LogicalType.IsSetTIMESTAMP())
+	assert.True(t, listElems[6].LogicalType.IsSetINTEGER())
+	assert.True(t, listElems[7].LogicalType.IsSetDECIMAL())
+
+	mapNode := MustGroup(NewGroupNodeLogical("map" /* name */, parquet.Repetitions.Required, []Node{}, MapLogicalType{}, -1 /* fieldID */))
+	mapElems := ToThrift(mapNode)
+	assert.Equal(t, "map", mapElems[0].Name)
+	assert.True(t, mapElems[0].IsSetConvertedType())
+	assert.True(t, mapElems[0].IsSetLogicalType())
+	assert.Equal(t, format.ConvertedType(ConvertedTypes.Map), mapElems[0].GetConvertedType())
+	assert.True(t, mapElems[0].LogicalType.IsSetMAP())
+}
+
+func TestLogicalTypeSerializationRoundTrip(t *testing.T) {
+	tests := []struct {
+		name     string
+		logical  LogicalType
+		physical parquet.Type
+		len      int
+	}{
+		{"string", StringLogicalType{}, parquet.Types.ByteArray, -1},
+		{"enum", EnumLogicalType{}, parquet.Types.ByteArray, -1},
+		{"decimal", NewDecimalLogicalType(16, 6), parquet.Types.Int64, -1},
+		{"date", DateLogicalType{}, parquet.Types.Int32, -1},
+		{"time_T_ms", NewTimeLogicalType(true, TimeUnitMillis), parquet.Types.Int32, -1},
+		{"time_T_us", NewTimeLogicalType(true, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"time_T_ns", NewTimeLogicalType(true, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"time_F_ms", NewTimeLogicalType(false, TimeUnitMillis), parquet.Types.Int32, -1},
+		{"time_F_us", NewTimeLogicalType(false, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"time_F_ns", NewTimeLogicalType(false, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"timestamp_T_ms", NewTimestampLogicalType(true, TimeUnitMillis), parquet.Types.Int64, -1},
+		{"timestamp_T_us", NewTimestampLogicalType(true, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"timestamp_T_ns", NewTimestampLogicalType(true, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"timestamp_F_ms", NewTimestampLogicalType(false, TimeUnitMillis), parquet.Types.Int64, -1},
+		{"timestamp_F_us", NewTimestampLogicalType(false, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"timestamp_F_ns", NewTimestampLogicalType(false, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"interval", IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 12},
+		{"uint8", NewIntLogicalType(8, false), parquet.Types.Int32, -1},
+		{"uint16", NewIntLogicalType(16, false), parquet.Types.Int32, -1},
+		{"uint32", NewIntLogicalType(32, false), parquet.Types.Int32, -1},
+		{"uint64", NewIntLogicalType(64, false), parquet.Types.Int64, -1},
+		{"int8", NewIntLogicalType(8, true), parquet.Types.Int32, -1},
+		{"int16", NewIntLogicalType(16, true), parquet.Types.Int32, -1},
+		{"int32", NewIntLogicalType(32, true), parquet.Types.Int32, -1},
+		{"int64", NewIntLogicalType(64, true), parquet.Types.Int64, -1},
+		{"null", NullLogicalType{}, parquet.Types.Boolean, -1},
+		{"json", JSONLogicalType{}, parquet.Types.ByteArray, -1},
+		{"bson", BSONLogicalType{}, parquet.Types.ByteArray, -1},
+		{"uuid", UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16},
+		{"none", NoLogicalType{}, parquet.Types.Boolean, -1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			n := MustPrimitive(NewPrimitiveNodeLogical("something" /* name */, parquet.Repetitions.Required, tt.logical, tt.physical, tt.len, -1 /* fieldID */))
+			elem := n.toThrift()
+			recover := MustPrimitive(PrimitiveNodeFromThrift(elem))
+			assert.True(t, n.Equals(recover))
+		})
+	}
+
+	n := MustGroup(NewGroupNodeLogical("map" /* name */, parquet.Repetitions.Required, []Node{}, MapLogicalType{}, -1 /* fieldID */))
+	elem := n.toThrift()
+	recover := MustGroup(GroupNodeFromThrift(elem, []Node{}))
+	assert.True(t, recover.Equals(n))
+
+	n = MustGroup(NewGroupNodeLogical("list" /* name */, parquet.Repetitions.Required, []Node{}, ListLogicalType{}, -1 /* fieldID */))
+	elem = n.toThrift()
+	recover = MustGroup(GroupNodeFromThrift(elem, []Node{}))
+	assert.True(t, recover.Equals(n))
+}
+
+func TestSchemaElementConstruction(t *testing.T) {
+	suite.Run(t, new(SchemaElementConstructionSuite))
+}
diff --git a/go/parquet/schema/schema_flatten_test.go b/go/parquet/schema/schema_flatten_test.go
new file mode 100644
index 00000000000..cbe76df718c
--- /dev/null
+++ b/go/parquet/schema/schema_flatten_test.go
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+func NewPrimitive(name string, repetition format.FieldRepetitionType, typ format.Type, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		Type:           format.TypePtr(typ),
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+func NewGroup(name string, repetition format.FieldRepetitionType, numChildren, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		NumChildren:    &numChildren,
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+type SchemaFlattenSuite struct {
+	suite.Suite
+
+	name string
+}
+
+func (s *SchemaFlattenSuite) SetupSuite() {
+	s.name = "parquet_schema"
+}
+
+func (s *SchemaFlattenSuite) TestDecimalMetadata() {
+	group := MustGroup(NewGroupNodeConverted("group" /* name */, parquet.Repetitions.Repeated, FieldList{
+		MustPrimitive(NewPrimitiveNodeConverted("decimal" /* name */, parquet.Repetitions.Required, parquet.Types.Int64,
+			ConvertedTypes.Decimal, 0 /* type len */, 8 /* precision */, 4 /* scale */, -1 /* fieldID */)),
+	}, ConvertedTypes.List, -1 /* fieldID */))
+	elements := ToThrift(group)
+
+	s.Len(elements, 2)
+	s.Equal("decimal", elements[1].GetName())
+	s.True(elements[1].IsSetPrecision())
+	s.True(elements[1].IsSetScale())
+
+	group = MustGroup(NewGroupNodeLogical("group" /* name */, parquet.Repetitions.Repeated, FieldList{
+		MustPrimitive(NewPrimitiveNodeLogical("decimal" /* name */, parquet.Repetitions.Required, NewDecimalLogicalType(10 /* precision */, 5 /* scale */),
+			parquet.Types.Int64, 0 /* type len */, -1 /* fieldID */)),
+	}, NewListLogicalType(), -1 /* fieldID */))
+	elements = ToThrift(group)
+	s.Equal("decimal", elements[1].Name)
+	s.True(elements[1].IsSetPrecision())
+	s.True(elements[1].IsSetScale())
+
+	group = MustGroup(NewGroupNodeConverted("group" /* name */, parquet.Repetitions.Repeated, FieldList{
+		NewInt64Node("int64" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)}, ConvertedTypes.List, -1 /* fieldID */))
+	elements = ToThrift(group)
+	s.Equal("int64", elements[1].Name)
+	s.False(elements[0].IsSetPrecision())
+	s.False(elements[1].IsSetPrecision())
+	s.False(elements[0].IsSetScale())
+	s.False(elements[1].IsSetScale())
+}
+
+func (s *SchemaFlattenSuite) TestNestedExample() {
+	elements := make([]*format.SchemaElement, 0)
+	elements = append(elements,
+		NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */),
+		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */),
+		NewGroup("bag" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 2 /* fieldID */))
+
+	elt := NewGroup("b" /* name */, format.FieldRepetitionType_REPEATED, 1 /* numChildren */, 3 /* fieldID */)
+	elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_LIST)
+	elt.LogicalType = &format.LogicalType{LIST: format.NewListType()}
+	elements = append(elements, elt, NewPrimitive("item" /* name */, format.FieldRepetitionType_OPTIONAL, format.Type_INT64, 4 /* fieldID */))
+
+	fields := FieldList{NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */)}
+	list := MustGroup(NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, FieldList{
+		NewInt64Node("item" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)}, ConvertedTypes.List, 3 /* fieldID */))
+	fields = append(fields, MustGroup(NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, FieldList{list}, 2 /* fieldID */)))
+
+	sc := MustGroup(NewGroupNode(s.name, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
+
+	flattened := ToThrift(sc)
+	s.Len(flattened, len(elements))
+	for idx, elem := range flattened {
+		s.Equal(elements[idx], elem)
+	}
+}
+
+func TestSchemaFlatten(t *testing.T) {
+	suite.Run(t, new(SchemaFlattenSuite))
+}
+
+func TestInvalidConvertedTypeInDeserialize(t *testing.T) {
+	n := MustPrimitive(NewPrimitiveNodeLogical("string" /* name */, parquet.Repetitions.Required, StringLogicalType{},
+		parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
+	assert.True(t, n.LogicalType().Equals(StringLogicalType{}))
+	assert.True(t, n.LogicalType().IsValid())
+	assert.True(t, n.LogicalType().IsSerialized())
+	intermediary := n.toThrift()
+	// corrupt it
+	intermediary.LogicalType.STRING = nil
+	assert.Panics(t, func() {
+		PrimitiveNodeFromThrift(intermediary)
+	})
+}
+
+func TestInvalidTimeUnitInTimeLogical(t *testing.T) {
+	n := MustPrimitive(NewPrimitiveNodeLogical("time" /* name */, parquet.Repetitions.Required,
+		NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitNanos), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	intermediary := n.toThrift()
+	// corrupt it
+	intermediary.LogicalType.TIME.Unit.NANOS = nil
+	assert.Panics(t, func() {
+		PrimitiveNodeFromThrift(intermediary)
+	})
+}
+
+func TestInvalidTimeUnitInTimestampLogical(t *testing.T) {
+	n := MustPrimitive(NewPrimitiveNodeLogical("time" /* name */, parquet.Repetitions.Required,
+		NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitNanos), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	intermediary := n.toThrift()
+	// corrupt it
+	intermediary.LogicalType.TIMESTAMP.Unit.NANOS = nil
+	assert.Panics(t, func() {
+		PrimitiveNodeFromThrift(intermediary)
+	})
+}
diff --git a/go/parquet/schema/schema_test.go b/go/parquet/schema/schema_test.go
new file mode 100644
index 00000000000..b2ce2291612
--- /dev/null
+++ b/go/parquet/schema/schema_test.go
@@ -0,0 +1,666 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"os"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/apache/thrift/lib/go/thrift"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+func TestColumnPath(t *testing.T) {
+	p := parquet.ColumnPath([]string{"toplevel", "leaf"})
+	assert.Equal(t, "toplevel.leaf", p.String())
+
+	p2 := parquet.ColumnPathFromString("toplevel.leaf")
+	assert.Equal(t, "toplevel.leaf", p2.String())
+
+	extend := p2.Extend("anotherlevel")
+	assert.Equal(t, "toplevel.leaf.anotherlevel", extend.String())
+}
+
+func NewPrimitive(name string, repetition format.FieldRepetitionType, typ format.Type, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		Type:           format.TypePtr(typ),
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+func NewGroup(name string, repetition format.FieldRepetitionType, numChildren, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		NumChildren:    &numChildren,
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+func TestSchemaNodes(t *testing.T) {
+	suite.Run(t, new(PrimitiveNodeTestSuite))
+	suite.Run(t, new(GroupNodeTestSuite))
+	suite.Run(t, new(SchemaConverterSuite))
+}
+
+type PrimitiveNodeTestSuite struct {
+	suite.Suite
+
+	name    string
+	fieldID int32
+	node    schema.Node
+}
+
+func (p *PrimitiveNodeTestSuite) SetupTest() {
+	p.name = "name"
+	p.fieldID = 5
+}
+
+func (p *PrimitiveNodeTestSuite) convert(elt *format.SchemaElement) {
+	p.node = schema.MustPrimitive(schema.PrimitiveNodeFromThrift(elt))
+	p.IsType(&schema.PrimitiveNode{}, p.node)
+}
+
+func (p *PrimitiveNodeTestSuite) TestAttrs() {
+	node1 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+	node2 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("bar" /* name */, parquet.Repetitions.Optional, parquet.Types.ByteArray,
+		schema.ConvertedTypes.UTF8, 0 /* type len */, 0 /* precision */, 0 /* scale */, -1 /* fieldID */))
+
+	p.Equal("foo", node1.Name())
+	p.Equal(schema.Primitive, node1.Type())
+	p.Equal(schema.Primitive, node2.Type())
+
+	p.Equal(parquet.Repetitions.Repeated, node1.RepetitionType())
+	p.Equal(parquet.Repetitions.Optional, node2.RepetitionType())
+
+	p.Equal(parquet.Types.Int32, node1.PhysicalType())
+	p.Equal(parquet.Types.ByteArray, node2.PhysicalType())
+
+	p.Equal(schema.ConvertedTypes.None, node1.ConvertedType())
+	p.Equal(schema.ConvertedTypes.UTF8, node2.ConvertedType())
+}
+
+func (p *PrimitiveNodeTestSuite) TestFromParquet() {
+	p.Run("Optional Int32", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_INT32, p.fieldID)
+		p.convert(elt)
+
+		p.Equal(p.name, p.node.Name())
+		p.Equal(p.fieldID, p.node.FieldID())
+		p.Equal(parquet.Repetitions.Optional, p.node.RepetitionType())
+		p.Equal(parquet.Types.Int32, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(schema.ConvertedTypes.None, p.node.ConvertedType())
+	})
+
+	p.Run("LogicalType", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_REQUIRED, format.Type_BYTE_ARRAY, p.fieldID)
+		elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_UTF8)
+		p.convert(elt)
+
+		p.Equal(parquet.Repetitions.Required, p.node.RepetitionType())
+		p.Equal(parquet.Types.ByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(schema.ConvertedTypes.UTF8, p.node.ConvertedType())
+	})
+
+	p.Run("FixedLenByteArray", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_FIXED_LEN_BYTE_ARRAY, p.fieldID)
+		elt.TypeLength = thrift.Int32Ptr(16)
+		p.convert(elt)
+
+		p.Equal(p.name, p.node.Name())
+		p.Equal(p.fieldID, p.node.FieldID())
+		p.Equal(parquet.Repetitions.Optional, p.node.RepetitionType())
+		p.Equal(parquet.Types.FixedLenByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(16, p.node.(*schema.PrimitiveNode).TypeLength())
+	})
+
+	p.Run("convertedtype::decimal", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_FIXED_LEN_BYTE_ARRAY, p.fieldID)
+		elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_DECIMAL)
+		elt.TypeLength = thrift.Int32Ptr(6)
+		elt.Scale = thrift.Int32Ptr(2)
+		elt.Precision = thrift.Int32Ptr(12)
+
+		p.convert(elt)
+		p.Equal(parquet.Types.FixedLenByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(schema.ConvertedTypes.Decimal, p.node.ConvertedType())
+		p.Equal(6, p.node.(*schema.PrimitiveNode).TypeLength())
+		p.EqualValues(2, p.node.(*schema.PrimitiveNode).DecimalMetadata().Scale)
+		p.EqualValues(12, p.node.(*schema.PrimitiveNode).DecimalMetadata().Precision)
+	})
+}
+
+func (p *PrimitiveNodeTestSuite) TestEquals() {
+	const fieldID = -1
+	node1 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
+	node2 := schema.NewInt64Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
+	node3 := schema.NewInt32Node("bar" /* name */, parquet.Repetitions.Required, fieldID)
+	node4 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Optional, fieldID)
+	node5 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
+
+	p.True(node1.Equals(node1))
+	p.False(node1.Equals(node2))
+	p.False(node1.Equals(node3))
+	p.False(node1.Equals(node4))
+	p.True(node1.Equals(node5))
+
+	flba1 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 12 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
+	flba2 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 1 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
+	flba2.SetTypeLength(12)
+
+	flba3 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 1 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
+	flba3.SetTypeLength(16)
+
+	flba4 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 12 /* type len */, 4 /* precision */, 0 /* scale */, fieldID))
+	flba5 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.None, 12 /* type len */, 4 /* precision */, 0 /* scale */, fieldID))
+
+	p.True(flba1.Equals(flba2))
+	p.False(flba1.Equals(flba3))
+	p.False(flba1.Equals(flba4))
+	p.False(flba1.Equals(flba5))
+}
+
+func (p *PrimitiveNodeTestSuite) TestPhysicalLogicalMapping() {
+	tests := []struct {
+		typ       parquet.Type
+		cnv       schema.ConvertedType
+		typLen    int
+		precision int
+		scale     int
+		shouldErr bool
+	}{
+		{parquet.Types.Int32, schema.ConvertedTypes.Int32, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.JSON, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.Int32, schema.ConvertedTypes.JSON, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.Int64, schema.ConvertedTypes.TimestampMillis, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.Int32, schema.ConvertedTypes.Int64, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.Int8, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.Interval, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Enum, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.Enum, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 0 /* type len */, 2 /* precision */, 4 /* scale */, true},
+		{parquet.Types.Float, schema.ConvertedTypes.Decimal, 0 /* type len */, 2 /* precision */, 4 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 0 /* type len */, 4 /* precision */, 0 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 4 /* precision */, -1 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 2 /* precision */, 4 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 6 /* precision */, 4 /* scale */, false},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Interval, 12 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Interval, 10 /* type len */, 0 /* precision */, 0 /* scale */, true},
+	}
+	for _, tt := range tests {
+		p.Run(tt.typ.String(), func() {
+			_, err := schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, tt.typ, tt.cnv, tt.typLen, tt.precision, tt.scale, -1 /* fieldID */)
+			if tt.shouldErr {
+				p.Error(err)
+			} else {
+				p.NoError(err)
+			}
+		})
+	}
+}
+
+type GroupNodeTestSuite struct {
+	suite.Suite
+}
+
+func (g *GroupNodeTestSuite) fields1() []schema.Node {
+	return schema.FieldList{
+		schema.NewInt32Node("one" /* name */, parquet.Repetitions.Required, -1 /* fieldID */),
+		schema.NewInt64Node("two" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+		schema.NewFloat64Node("three" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+	}
+}
+
+func (g *GroupNodeTestSuite) fields2() []schema.Node {
+	return schema.FieldList{
+		schema.NewInt32Node("duplicate" /* name */, parquet.Repetitions.Required, -1 /* fieldID */),
+		schema.NewInt64Node("unique" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+		schema.NewFloat64Node("duplicate" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+	}
+}
+
+func (g *GroupNodeTestSuite) TestAttrs() {
+	fields := g.fields1()
+
+	node1 := schema.MustGroup(schema.NewGroupNode("foo" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+	node2 := schema.MustGroup(schema.NewGroupNodeConverted("bar" /* name */, parquet.Repetitions.Optional, fields, schema.ConvertedTypes.List, -1 /* fieldID */))
+
+	g.Equal("foo", node1.Name())
+	g.Equal(schema.Group, node1.Type())
+	g.Equal(len(fields), node1.NumFields())
+	g.Equal(parquet.Repetitions.Repeated, node1.RepetitionType())
+	g.Equal(parquet.Repetitions.Optional, node2.RepetitionType())
+
+	g.Equal(schema.ConvertedTypes.None, node1.ConvertedType())
+	g.Equal(schema.ConvertedTypes.List, node2.ConvertedType())
+}
+
+func (g *GroupNodeTestSuite) TestEquals() {
+	f1 := g.fields1()
+	f2 := g.fields1()
+
+	group1 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f1, -1 /* fieldID */))
+	group2 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
+	group3 := schema.Must(schema.NewGroupNode("group2" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
+
+	f2 = append(f2, schema.NewFloat32Node("four" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */))
+	group4 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
+	group5 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, g.fields1(), -1 /* fieldID */))
+
+	g.True(group1.Equals(group1))
+	g.True(group1.Equals(group2))
+	g.False(group1.Equals(group3))
+	g.False(group1.Equals(group4))
+	g.False(group5.Equals(group4))
+}
+
+func (g *GroupNodeTestSuite) TestFieldIndex() {
+	fields := g.fields1()
+	group := schema.MustGroup(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Required, fields, -1 /* fieldID */))
+	for idx, field := range fields {
+		f := group.Field(idx)
+		g.Same(field, f)
+		g.Equal(idx, group.FieldIndexByField(f))
+		g.Equal(idx, group.FieldIndexByName(field.Name()))
+	}
+
+	// Non field nodes
+	nonFieldAlien := schema.NewInt32Node("alien" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+	nonFieldFamiliar := schema.NewInt32Node("one" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+	g.Less(group.FieldIndexByField(nonFieldAlien), 0)
+	g.Less(group.FieldIndexByField(nonFieldFamiliar), 0)
+}
+
+func (g *GroupNodeTestSuite) TestFieldIndexDuplicateName() {
+	fields := g.fields2()
+	group := schema.MustGroup(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Required, fields, -1 /* fieldID */))
+	for idx, field := range fields {
+		f := group.Field(idx)
+		g.Same(f, field)
+		g.Equal(idx, group.FieldIndexByField(f))
+	}
+}
+
+type SchemaConverterSuite struct {
+	suite.Suite
+
+	name string
+	node schema.Node
+}
+
+func (s *SchemaConverterSuite) SetupSuite() {
+	s.name = "parquet_schema"
+}
+
+func (s *SchemaConverterSuite) convert(elems []*format.SchemaElement) {
+	s.node = schema.Must(schema.FromParquet(elems))
+	s.Equal(schema.Group, s.node.Type())
+}
+
+func (s *SchemaConverterSuite) checkParentConsistency(groupRoot *schema.GroupNode) bool {
+	// each node should have the group as parent
+	for i := 0; i < groupRoot.NumFields(); i++ {
+		field := groupRoot.Field(i)
+		if field.Parent() != groupRoot {
+			return false
+		}
+		if field.Type() == schema.Group {
+			if !s.checkParentConsistency(field.(*schema.GroupNode)) {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+func (s *SchemaConverterSuite) TestNestedExample() {
+	elements := make([]*format.SchemaElement, 0)
+	elements = append(elements,
+		NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */),
+		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */),
+		NewGroup("bag" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 2 /* fieldID */))
+	elt := NewGroup("b" /* name */, format.FieldRepetitionType_REPEATED, 1 /* numChildren */, 3 /* fieldID */)
+	elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_LIST)
+	elements = append(elements, elt, NewPrimitive("item" /* name */, format.FieldRepetitionType_OPTIONAL, format.Type_INT64, 4 /* fieldID */))
+
+	s.convert(elements)
+
+	// construct the expected schema
+	fields := make([]schema.Node, 0)
+	fields = append(fields, schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */))
+
+	// 3-level list encoding
+	item := schema.NewInt64Node("item" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)
+	list := schema.MustGroup(schema.NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item}, schema.ConvertedTypes.List, 3 /* fieldID */))
+	bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, 2 /* fieldID */))
+	fields = append(fields, bag)
+
+	sc := schema.MustGroup(schema.NewGroupNode(s.name, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
+	s.True(sc.Equals(s.node))
+	s.Nil(s.node.Parent())
+	s.True(s.checkParentConsistency(s.node.(*schema.GroupNode)))
+}
+
+func (s *SchemaConverterSuite) TestZeroColumns() {
+	elements := []*format.SchemaElement{NewGroup("schema" /* name */, format.FieldRepetitionType_REPEATED, 0 /* numChildren */, 0 /* fieldID */)}
+	s.NotPanics(func() { s.convert(elements) })
+}
+
+func (s *SchemaConverterSuite) TestInvalidRoot() {
+	// According to the Parquet spec, the first element in the list<SchemaElement>
+	// is a group whose children (and their descendants) contain all of the rest of
+	// the flattened schema elments. If the first element is not a group, it is malformed
+	elements := []*format.SchemaElement{NewPrimitive("not-a-group" /* name */, format.FieldRepetitionType_REQUIRED,
+		format.Type_INT32, 0 /* fieldID */), format.NewSchemaElement()}
+	s.Panics(func() { s.convert(elements) })
+
+	// While the parquet spec indicates that the root group should have REPEATED
+	// repetition type, some implementations may return REQUIRED or OPTIONAL
+	// groups as the first element. These tests check that this is okay as a
+	// practicality matter
+	elements = []*format.SchemaElement{
+		NewGroup("not-repeated" /* name */, format.FieldRepetitionType_REQUIRED, 1 /* numChildren */, 0 /* fieldID */),
+		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */)}
+	s.NotPanics(func() { s.convert(elements) })
+
+	elements[0] = NewGroup("not-repeated" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 0 /* fieldID */)
+	s.NotPanics(func() { s.convert(elements) })
+}
+
+func (s *SchemaConverterSuite) TestNotEnoughChildren() {
+	s.Panics(func() {
+		s.convert([]*format.SchemaElement{NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */)})
+	})
+}
+
+func TestColumnDesc(t *testing.T) {
+	n := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("name" /* name */, parquet.Repetitions.Optional, parquet.Types.ByteArray,
+		schema.ConvertedTypes.UTF8, 0 /* type len */, 0 /* precision */, 0 /* scale */, -1 /* fieldID */))
+	descr := schema.NewColumn(n, 4, 1)
+
+	assert.Equal(t, "name", descr.Name())
+	assert.EqualValues(t, 4, descr.MaxDefinitionLevel())
+	assert.EqualValues(t, 1, descr.MaxRepetitionLevel())
+	assert.Equal(t, parquet.Types.ByteArray, descr.PhysicalType())
+	assert.Equal(t, -1, descr.TypeLength())
+
+	expectedDesc := `column descriptor = {
+  name: name,
+  path: ,
+  physical_type: BYTE_ARRAY,
+  converted_type: UTF8,
+  logical_type: String,
+  max_definition_level: 4,
+  max_repetition_level: 1,
+}`
+	assert.Equal(t, expectedDesc, descr.String())
+
+	n = schema.MustPrimitive(schema.NewPrimitiveNodeConverted("name" /* name */, parquet.Repetitions.Optional, parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 12 /* type len */, 10 /* precision */, 4 /* scale */, -1 /* fieldID */))
+	descr2 := schema.NewColumn(n, 4, 1)
+
+	assert.Equal(t, parquet.Types.FixedLenByteArray, descr2.PhysicalType())
+	assert.Equal(t, 12, descr2.TypeLength())
+
+	expectedDesc = `column descriptor = {
+  name: name,
+  path: ,
+  physical_type: FIXED_LEN_BYTE_ARRAY,
+  converted_type: DECIMAL,
+  logical_type: Decimal(precision=10, scale=4),
+  max_definition_level: 4,
+  max_repetition_level: 1,
+  length: 12,
+  precision: 10,
+  scale: 4,
+}`
+	assert.Equal(t, expectedDesc, descr2.String())
+}
+
+func TestSchemaDescriptor(t *testing.T) {
+	t.Run("Equals", func(t *testing.T) {
+		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		intb := schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		intb2 := schema.NewInt64Node("b2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		intc := schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+
+		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
+
+		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
+		bag2 := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Required, schema.FieldList{list}, -1 /* fieldID */))
+
+		descr1 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag}, -1 /* fieldID */)))
+		assert.True(t, descr1.Equals(descr1))
+
+		descr2 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag2}, -1 /* fieldID */)))
+		assert.False(t, descr1.Equals(descr2))
+
+		descr3 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb2, intc, bag}, -1 /* fieldID */)))
+		assert.False(t, descr1.Equals(descr3))
+
+		descr4 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("SCHEMA" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag}, -1 /* fieldID */)))
+		assert.True(t, descr1.Equals(descr4))
+
+		descr5 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag, intb2}, -1 /* fieldID */)))
+		assert.False(t, descr1.Equals(descr5))
+
+		col1 := schema.NewColumn(inta, 5 /* maxDefLvl */, 1 /* maxRepLvl */)
+		col2 := schema.NewColumn(inta, 6 /* maxDefLvl */, 1 /* maxRepLvl */)
+		col3 := schema.NewColumn(inta, 5 /* maxDefLvl */, 2 /* maxRepLvl */)
+
+		assert.True(t, col1.Equals(col1))
+		assert.False(t, col1.Equals(col2))
+		assert.False(t, col2.Equals(col3))
+	})
+
+	t.Run("BuildTree", func(t *testing.T) {
+		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		fields := schema.FieldList{inta}
+		fields = append(fields,
+			schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+			schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */))
+
+		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
+		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
+		fields = append(fields, bag)
+
+		sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+		descr := schema.NewSchema(sc)
+
+		const nleaves = 6
+		assert.Equal(t, nleaves, descr.NumColumns())
+
+		//                             mdef mrep
+		// required int32 a            0    0
+		// optional int64 b            1    0
+		// repeated byte_array c       1    1
+		// optional group bag          1    0
+		//   repeated group records    2    1
+		//     required int64 item1    2    1
+		//     optional boolean item2  3    1
+		//     repeated int32 item3    3    2
+		var (
+			exMaxDefLevels = [...]int16{0, 1, 1, 2, 3, 3}
+			exMaxRepLevels = [...]int16{0, 0, 1, 1, 1, 2}
+		)
+
+		for i := 0; i < nleaves; i++ {
+			col := descr.Column(i)
+			assert.Equal(t, exMaxDefLevels[i], col.MaxDefinitionLevel())
+			assert.Equal(t, exMaxRepLevels[i], col.MaxRepetitionLevel())
+		}
+
+		assert.Equal(t, "a", descr.Column(0).Path())
+		assert.Equal(t, "b", descr.Column(1).Path())
+		assert.Equal(t, "c", descr.Column(2).Path())
+		assert.Equal(t, "bag.records.item1", descr.Column(3).Path())
+		assert.Equal(t, "bag.records.item2", descr.Column(4).Path())
+		assert.Equal(t, "bag.records.item3", descr.Column(5).Path())
+
+		for i := 0; i < nleaves; i++ {
+			col := descr.Column(i)
+			assert.Equal(t, i, descr.ColumnIndexByNode(col.SchemaNode()))
+		}
+
+		nonColumnAlien := schema.NewInt32Node("alien" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		nonColumnFamiliar := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		assert.Less(t, descr.ColumnIndexByNode(nonColumnAlien), 0)
+		assert.Less(t, descr.ColumnIndexByNode(nonColumnFamiliar), 0)
+
+		assert.Same(t, inta, descr.ColumnRoot(0))
+		assert.Same(t, bag, descr.ColumnRoot(3))
+		assert.Same(t, bag, descr.ColumnRoot(4))
+		assert.Same(t, bag, descr.ColumnRoot(5))
+
+		assert.Same(t, sc, descr.Root())
+	})
+
+	t.Run("HasRepeatedFields", func(t *testing.T) {
+		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		fields := schema.FieldList{inta}
+		fields = append(fields,
+			schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+			schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */))
+
+		sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+		descr := schema.NewSchema(sc)
+		assert.True(t, descr.HasRepeatedFields())
+
+		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
+		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
+		fields = append(fields, bag)
+
+		sc = schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+		descr = schema.NewSchema(sc)
+		assert.True(t, descr.HasRepeatedFields())
+
+		itemKey := schema.NewInt64Node("key" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		itemValue := schema.NewBooleanNode("value" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		sc = schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, append(fields, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("my_map" /* name */, parquet.Repetitions.Optional, schema.FieldList{
+				schema.MustGroup(schema.NewGroupNodeConverted("map" /* name */, parquet.Repetitions.Repeated, schema.FieldList{itemKey, itemValue}, schema.ConvertedTypes.Map, -1 /* fieldID */)),
+			}, -1 /* fieldID */)),
+		}...), -1 /* fieldID */))
+		descr = schema.NewSchema(sc)
+		assert.True(t, descr.HasRepeatedFields())
+	})
+}
+
+func ExamplePrintSchema() {
+	fields := schema.FieldList{schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */)}
+	item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)
+	item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Required, 5 /* fieldID */)
+	list := schema.MustGroup(schema.NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2}, schema.ConvertedTypes.List, 3 /* fieldID */))
+	bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, 2 /* fieldID */))
+	fields = append(fields, bag)
+
+	fields = append(fields,
+		schema.MustPrimitive(schema.NewPrimitiveNodeConverted("c" /* name */, parquet.Repetitions.Required, parquet.Types.Int32, schema.ConvertedTypes.Decimal, 0 /* type len */, 3 /* precision */, 2 /* scale */, 6 /* fieldID */)),
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("d" /* name */, parquet.Repetitions.Required, schema.NewDecimalLogicalType(10 /* precision */, 5 /* scale */), parquet.Types.Int64, -1 /* type len */, 7 /* fieldID */)))
+
+	sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
+	schema.PrintSchema(sc, os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=0 schema {
+	//   required int32 field_id=1 a;
+	//   optional group field_id=2 bag {
+	//     repeated group field_id=3 b (List) {
+	//       optional int64 field_id=4 item1;
+	//       required boolean field_id=5 item2;
+	//     }
+	//   }
+	//   required int32 field_id=6 c (Decimal(precision=3, scale=2));
+	//   required int64 field_id=7 d (Decimal(precision=10, scale=5));
+	// }
+}
+
+func TestPanicSchemaNodeCreation(t *testing.T) {
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("map" /* name */, parquet.Repetitions.Required, schema.MapLogicalType{}, parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	}, "nested logical type on non-group node")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("string" /* name */, parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.Boolean, -1 /* type len */, -1 /* fieldID */))
+	}, "incompatible primitive type")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("interval" /* name */, parquet.Repetitions.Required, schema.IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 11 /* type len */, -1 /* fieldID */))
+	}, "incompatible primitive length")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("decimal" /* name */, parquet.Repetitions.Required, schema.NewDecimalLogicalType(16, 6), parquet.Types.Int32, -1 /* type len */, -1 /* fieldID */))
+	}, "primitive too small for given precision")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("uuid" /* name */, parquet.Repetitions.Required, schema.UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 64 /* type len */, -1 /* fieldID */))
+	}, "incompatible primitive length")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("negative_len" /* name */, parquet.Repetitions.Required, schema.NoLogicalType{}, parquet.Types.FixedLenByteArray, -16 /* type len */, -1 /* fieldID */))
+	}, "non-positive length for fixed length binary")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("zero_len" /* name */, parquet.Repetitions.Required, schema.NoLogicalType{}, parquet.Types.FixedLenByteArray, 0 /* type len */, -1 /* fieldID */))
+	}, "non-positive length for fixed length binary")
+
+	assert.Panics(t, func() {
+		schema.MustGroup(schema.NewGroupNodeLogical("list" /* name */, parquet.Repetitions.Repeated, schema.FieldList{}, schema.JSONLogicalType{}, -1 /* fieldID */))
+	}, "non-nested logical type on group node")
+}
+
+func TestNullLogicalConvertsToNone(t *testing.T) {
+	var (
+		empty schema.LogicalType
+		n     schema.Node
+	)
+	assert.NotPanics(t, func() {
+		n = schema.MustPrimitive(schema.NewPrimitiveNodeLogical("value" /* name */, parquet.Repetitions.Required, empty, parquet.Types.Double, -1 /* type len */, -1 /* fieldID */))
+	})
+	assert.True(t, n.LogicalType().IsNone())
+	assert.Equal(t, schema.ConvertedTypes.None, n.ConvertedType())
+	assert.NotPanics(t, func() {
+		n = schema.MustGroup(schema.NewGroupNodeLogical("items" /* name */, parquet.Repetitions.Repeated, schema.FieldList{}, empty, -1 /* fieldID */))
+	})
+	assert.True(t, n.LogicalType().IsNone())
+	assert.Equal(t, schema.ConvertedTypes.None, n.ConvertedType())
+}
diff --git a/go/parquet/types.go b/go/parquet/types.go
index 0f29de08551..b55d664c91e 100644
--- a/go/parquet/types.go
+++ b/go/parquet/types.go
@@ -19,8 +19,12 @@ package parquet
 import (
 	"encoding/binary"
 	"reflect"
+	"strings"
 	"time"
 	"unsafe"
+
+	"github.com/apache/arrow/go/arrow"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
 )
 
 const (
@@ -165,3 +169,186 @@ func (fixedLenByteArrayTraits) CastFromBytes(b []byte) []FixedLenByteArray {
 
 	return res
 }
+
+// Creating our own enums allows avoiding the transitive dependency on the
+// compiled thrift definitions in the public API, allowing us to not export
+// the entire Thrift definitions, while making everything a simple cast between.
+//
+// It also let's us add special values like NONE to distinguish between values
+// that are set or not set
+type (
+	// Type is the physical type as in parquet.thrift
+	Type format.Type
+	// Cipher is the parquet Cipher Algorithms
+	Cipher int
+	// ColumnOrder is the Column Order from the parquet.thrift
+	ColumnOrder *format.ColumnOrder
+	// Version is the parquet version type
+	Version int8
+	// DataPageVersion is the version of the Parquet Data Pages
+	DataPageVersion int8
+	// Encoding is the parquet Encoding type
+	Encoding format.Encoding
+	// Repetition is the underlying parquet field repetition type as in parquet.thrift
+	Repetition format.FieldRepetitionType
+	// ColumnPath is the path from the root of the schema to a given column
+	ColumnPath []string
+)
+
+func (c ColumnPath) String() string {
+	if c == nil {
+		return ""
+	}
+	return strings.Join(c, ".")
+}
+
+// Extend creates a new ColumnPath from an existing one, with the new ColumnPath having s appended to the end.
+func (c ColumnPath) Extend(s string) ColumnPath {
+	p := make([]string, len(c), len(c)+1)
+	copy(p, c)
+	return append(p, s)
+}
+
+// ColumnPathFromString constructs a ColumnPath from a dot separated string
+func ColumnPathFromString(s string) ColumnPath {
+	return strings.Split(s, ".")
+}
+
+// constants for choosing the Aes Algorithm to use for encryption/decryption
+const (
+	AesGcm Cipher = iota
+	AesCtr
+)
+
+// Constants for the parquet Version
+const (
+	V1 Version = 1
+	V2 Version = 2
+)
+
+// constants for the parquet DataPage Version to use
+const (
+	DataPageV1 DataPageVersion = iota
+	DataPageV2
+)
+
+func (e Encoding) String() string {
+	return format.Encoding(e).String()
+}
+
+var (
+	// Types contains constants for the Physical Types that are used in the Parquet Spec
+	//
+	// They can be specified when needed as such: `parquet.Types.Int32` etc. The values
+	// all correspond to the values in parquet.thrift
+	Types = struct {
+		Boolean           Type
+		Int32             Type
+		Int64             Type
+		Int96             Type
+		Float             Type
+		Double            Type
+		ByteArray         Type
+		FixedLenByteArray Type
+		// this only exists as a convienence so we can denote it when necessary
+		// nearly all functions that take a parquet.Type will error/panic if given
+		// Undefined
+		Undefined Type
+	}{
+		Boolean:           Type(format.Type_BOOLEAN),
+		Int32:             Type(format.Type_INT32),
+		Int64:             Type(format.Type_INT64),
+		Int96:             Type(format.Type_INT96),
+		Float:             Type(format.Type_FLOAT),
+		Double:            Type(format.Type_DOUBLE),
+		ByteArray:         Type(format.Type_BYTE_ARRAY),
+		FixedLenByteArray: Type(format.Type_FIXED_LEN_BYTE_ARRAY),
+		Undefined:         Type(format.Type_FIXED_LEN_BYTE_ARRAY + 1),
+	}
+
+	// Encodings contains constants for the encoding types of the column data
+	//
+	// The values used all correspond to the values in parquet.thrift for the
+	// corresponding encoding type.
+	Encodings = struct {
+		Plain                Encoding
+		PlainDict            Encoding
+		RLE                  Encoding
+		RLEDict              Encoding
+		BitPacked            Encoding // deprecated, not implemented
+		DeltaByteArray       Encoding
+		DeltaBinaryPacked    Encoding
+		DeltaLengthByteArray Encoding
+	}{
+		Plain:                Encoding(format.Encoding_PLAIN),
+		PlainDict:            Encoding(format.Encoding_PLAIN_DICTIONARY),
+		RLE:                  Encoding(format.Encoding_RLE),
+		RLEDict:              Encoding(format.Encoding_RLE_DICTIONARY),
+		BitPacked:            Encoding(format.Encoding_BIT_PACKED),
+		DeltaByteArray:       Encoding(format.Encoding_DELTA_BYTE_ARRAY),
+		DeltaBinaryPacked:    Encoding(format.Encoding_DELTA_BINARY_PACKED),
+		DeltaLengthByteArray: Encoding(format.Encoding_DELTA_LENGTH_BYTE_ARRAY),
+	}
+
+	// ColumnOrders contains constants for the Column Ordering fields
+	ColumnOrders = struct {
+		Undefined        ColumnOrder
+		TypeDefinedOrder ColumnOrder
+	}{
+		Undefined:        format.NewColumnOrder(),
+		TypeDefinedOrder: &format.ColumnOrder{TYPE_ORDER: format.NewTypeDefinedOrder()},
+	}
+
+	// DefaultColumnOrder is to use TypeDefinedOrder
+	DefaultColumnOrder = ColumnOrders.TypeDefinedOrder
+
+	// Repetitions contains the constants for Field Repetition Types
+	Repetitions = struct {
+		Required  Repetition
+		Optional  Repetition
+		Repeated  Repetition
+		Undefined Repetition // convenience value
+	}{
+		Required:  Repetition(format.FieldRepetitionType_REQUIRED),
+		Optional:  Repetition(format.FieldRepetitionType_OPTIONAL),
+		Repeated:  Repetition(format.FieldRepetitionType_REPEATED),
+		Undefined: Repetition(format.FieldRepetitionType_REPEATED + 1),
+	}
+)
+
+func (t Type) String() string {
+	switch t {
+	case Types.Undefined:
+		return "UNDEFINED"
+	default:
+		return format.Type(t).String()
+	}
+}
+
+func (r Repetition) String() string {
+	return strings.ToLower(format.FieldRepetitionType(r).String())
+}
+
+// ByteSize returns the number of bytes required to store a single value of
+// the given parquet.Type in memory.
+func (t Type) ByteSize() int {
+	switch t {
+	case Types.Boolean:
+		return 1
+	case Types.Int32:
+		return arrow.Int32SizeBytes
+	case Types.Int64:
+		return arrow.Int64SizeBytes
+	case Types.Int96:
+		return Int96SizeBytes
+	case Types.Float:
+		return arrow.Float32SizeBytes
+	case Types.Double:
+		return arrow.Float64SizeBytes
+	case Types.ByteArray:
+		return ByteArraySizeBytes
+	case Types.FixedLenByteArray:
+		return FixedLenByteArraySizeBytes
+	}
+	panic("no bytesize info for type")
+}
diff --git a/go/parquet/writer_properties.go b/go/parquet/writer_properties.go
new file mode 100644
index 00000000000..ef11454a863
--- /dev/null
+++ b/go/parquet/writer_properties.go
@@ -0,0 +1,510 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet/compress"
+)
+
+// Constants for default property values used for the default reader, writer and column props.
+const (
+	// Default Buffer size used for the Reader
+	DefaultBufSize int64 = 4096 * 4
+	// Default data page size limit is 1K it's not guaranteed, but we will try to
+	// cut data pages off at this size where possible.
+	DefaultDataPageSize int64 = 1024 * 1024
+	// Default is for dictionary encoding to be turned on, use WithDictionaryDefault
+	// writer property to change that.
+	DefaultDictionaryEnabled = true
+	// If the dictionary reaches the size of this limitation, the writer will use
+	// the fallback encoding (usually plain) instead of continuing to build the
+	// dictionary index.
+	DefaultDictionaryPageSizeLimit = DefaultDataPageSize
+	// In order to attempt to facilitate data page size limits for writing,
+	// data is written in batches. Increasing the batch size may improve performance
+	// but the larger the batch size, the easier it is to overshoot the datapage limit.
+	DefaultWriteBatchSize int64 = 1024
+	// Default maximum number of rows for a single row group
+	DefaultMaxRowGroupLen int64 = 64 * 1024 * 1024
+	// Default is to have stats enabled for all columns, use writer properties to
+	// change the default, or to enable/disable for specific columns.
+	DefaultStatsEnabled = true
+	// If the stats are larger than 4K the writer will skip writing them out anyways.
+	DefaultMaxStatsSize int64 = 4096
+	DefaultCreatedBy          = "parquet-go version 1.0.0"
+)
+
+// ColumnProperties defines the encoding, codec, and so on for a given column.
+type ColumnProperties struct {
+	Encoding          Encoding
+	Codec             compress.Compression
+	DictionaryEnabled bool
+	StatsEnabled      bool
+	MaxStatsSize      int64
+	CompressionLevel  int
+}
+
+// DefaultColumnProperties returns the default properties which get utilized for writing.
+//
+// The default column properties are the following constants:
+//	Encoding:						Encodings.Plain
+//	Codec:							compress.Codecs.Uncompressed
+//	DictionaryEnabled:	DefaultDictionaryEnabled
+//	StatsEnabled:				DefaultStatsEnabled
+//	MaxStatsSize:				DefaultMaxStatsSize
+//	CompressionLevel:		compress.DefaultCompressionLevel
+func DefaultColumnProperties() ColumnProperties {
+	return ColumnProperties{
+		Encoding:          Encodings.Plain,
+		Codec:             compress.Codecs.Uncompressed,
+		DictionaryEnabled: DefaultDictionaryEnabled,
+		StatsEnabled:      DefaultStatsEnabled,
+		MaxStatsSize:      DefaultMaxStatsSize,
+		CompressionLevel:  compress.DefaultCompressionLevel,
+	}
+}
+
+type writerPropConfig struct {
+	wr            *WriterProperties
+	encodings     map[string]Encoding
+	codecs        map[string]compress.Compression
+	compressLevel map[string]int
+	dictEnabled   map[string]bool
+	statsEnabled  map[string]bool
+}
+
+// WriterProperty is used as the options for building a writer properties instance
+type WriterProperty func(*writerPropConfig)
+
+// WithAllocator specifies the writer to use the given allocator
+func WithAllocator(mem memory.Allocator) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.mem = mem
+	}
+}
+
+// WithDictionaryDefault sets the default value for whether to enable dictionary encoding
+func WithDictionaryDefault(dict bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.DictionaryEnabled = dict
+	}
+}
+
+// WithDictionaryFor allows enabling or disabling dictionary encoding for a given column path string
+func WithDictionaryFor(path string, dict bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.dictEnabled[path] = true
+	}
+}
+
+// WithDictionaryPath is like WithDictionaryFor, but takes a ColumnPath type
+func WithDictionaryPath(path ColumnPath, dict bool) WriterProperty {
+	return WithDictionaryFor(path.String(), dict)
+}
+
+// WithDictionaryPageSizeLimit is the limit of the dictionary at which the writer
+// will fallback to plain encoding instead
+func WithDictionaryPageSizeLimit(limit int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.dictPagesize = limit
+	}
+}
+
+// WithBatchSize specifies the number of rows to use for batch writes to columns
+func WithBatchSize(batch int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.batchSize = batch
+	}
+}
+
+// WithMaxRowGroupLength specifies the number of rows as the maximum number of rows for a given row group in the writer.
+func WithMaxRowGroupLength(nrows int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.maxRowGroupLen = nrows
+	}
+}
+
+// WithDataPageSize specifies the size to use for splitting data pages for column writing.
+func WithDataPageSize(pgsize int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.pageSize = pgsize
+	}
+}
+
+// WithDataPageVersion specifies whether to use Version 1 or Version 2 of the DataPage spec
+func WithDataPageVersion(version DataPageVersion) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.dataPageVersion = version
+	}
+}
+
+// WithVersion specifies which Parquet Spec version to utilize for writing.
+func WithVersion(version Version) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.parquetVersion = version
+	}
+}
+
+// WithCreatedBy specifies the "created by" string to use for the writer
+func WithCreatedBy(createdby string) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.createdBy = createdby
+	}
+}
+
+// WithEncoding defines the encoding that is used when we aren't using dictionary encoding.
+//
+// This is either applied if dictionary encoding is disabled, or if we fallback if the dictionary
+// grew too large.
+func WithEncoding(encoding Encoding) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		if encoding == Encodings.PlainDict || encoding == Encodings.RLEDict {
+			panic("parquet: can't use dictionary encoding as fallback encoding")
+		}
+		cfg.wr.defColumnProps.Encoding = encoding
+	}
+}
+
+// WithEncodingFor is for defining the encoding only for a specific column path. This encoding will be used
+// if dictionary encoding is disabled for the column or if we fallback because the dictionary grew too large
+func WithEncodingFor(path string, encoding Encoding) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		if encoding == Encodings.PlainDict || encoding == Encodings.RLEDict {
+			panic("parquet: can't use dictionary encoding as fallback encoding")
+		}
+		cfg.encodings[path] = encoding
+	}
+}
+
+// WithEncodingPath is the same as WithEncodingFor but takes a ColumnPath directly.
+func WithEncodingPath(path ColumnPath, encoding Encoding) WriterProperty {
+	return WithEncodingFor(path.String(), encoding)
+}
+
+// WithCompression specifies the default compression type to use for column writing.
+func WithCompression(codec compress.Compression) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.Codec = codec
+	}
+}
+
+// WithCompressionFor specifies the compression type for the given column.
+func WithCompressionFor(path string, codec compress.Compression) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.codecs[path] = codec
+	}
+}
+
+// WithCompressionPath is the same as WithCompressionFor but takes a ColumnPath directly.
+func WithCompressionPath(path ColumnPath, codec compress.Compression) WriterProperty {
+	return WithCompressionFor(path.String(), codec)
+}
+
+// WithMaxStatsSize sets a maximum size for the statistics before we decide not to include them.
+func WithMaxStatsSize(maxStatsSize int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.MaxStatsSize = maxStatsSize
+	}
+}
+
+// WithCompressionLevel specifies the default compression level for the compressor in every column.
+//
+// The provided compression level is compressor specific. The user would have to know what the available
+// levels are for the selected compressor. If the compressor does not allow for selecting different
+// compression levels, then this function will have no effect. Parquet and Arrow will not validate the
+// passed compression level. If no level is selected by the user or if the special compress.DefaultCompressionLevel
+// value is used, then parquet will select the compression level.
+func WithCompressionLevel(level int) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.CompressionLevel = level
+	}
+}
+
+// WithCompressionLevelFor is like WithCompressionLevel but only for the given column path.
+func WithCompressionLevelFor(path string, level int) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.compressLevel[path] = level
+	}
+}
+
+// WithCompressionLevelPath is the same as WithCompressionLevelFor but takes a ColumnPath
+func WithCompressionLevelPath(path ColumnPath, level int) WriterProperty {
+	return WithCompressionLevelFor(path.String(), level)
+}
+
+// WithStats specifies a default for whether or not to enable column statistics.
+func WithStats(enabled bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.StatsEnabled = enabled
+	}
+}
+
+// WithStatsFor specifies a per column value as to enable or disable statistics in the resulting file.
+func WithStatsFor(path string, enabled bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.statsEnabled[path] = enabled
+	}
+}
+
+// WithStatsPath is the same as WithStatsFor but takes a ColumnPath
+func WithStatsPath(path ColumnPath, enabled bool) WriterProperty {
+	return WithStatsFor(path.String(), enabled)
+}
+
+// WithEncryptionProperties specifies the file level encryption handling for writing the file.
+func WithEncryptionProperties(props *FileEncryptionProperties) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.encryptionProps = props
+	}
+}
+
+// WriterProperties is the collection of properties to use for writing a parquet file. The values are
+// read only once it has been constructed.
+type WriterProperties struct {
+	mem             memory.Allocator
+	dictPagesize    int64
+	batchSize       int64
+	maxRowGroupLen  int64
+	pageSize        int64
+	parquetVersion  Version
+	createdBy       string
+	dataPageVersion DataPageVersion
+
+	defColumnProps  ColumnProperties
+	columnProps     map[string]*ColumnProperties
+	encryptionProps *FileEncryptionProperties
+}
+
+func defaultWriterProperties() *WriterProperties {
+	return &WriterProperties{
+		mem:             memory.DefaultAllocator,
+		dictPagesize:    DefaultDictionaryPageSizeLimit,
+		batchSize:       DefaultWriteBatchSize,
+		maxRowGroupLen:  DefaultMaxRowGroupLen,
+		pageSize:        DefaultDataPageSize,
+		parquetVersion:  V1,
+		dataPageVersion: DataPageV1,
+		createdBy:       DefaultCreatedBy,
+		defColumnProps:  DefaultColumnProperties(),
+	}
+}
+
+// NewWriterProperties takes a list of options for building the properties. If multiple options are used which conflict
+// then the last option is the one which will take effect. If no WriterProperty options are provided, then the default
+// properties will be utilized for writing.
+//
+// The Default properties use the following constants:
+//	Allocator:					memory.DefaultAllocator
+// 	DictionaryPageSize: DefaultDictionaryPageSizeLimit
+//	BatchSize:					DefaultWriteBatchSize
+//	MaxRowGroupLength:	DefaultMaxRowGroupLen
+//	PageSize:						DefaultDataPageSize
+//	ParquetVersion:			V1
+//	DataPageVersion:		DataPageV1
+//	CreatedBy:					DefaultCreatedBy
+func NewWriterProperties(opts ...WriterProperty) *WriterProperties {
+	cfg := writerPropConfig{
+		wr:            defaultWriterProperties(),
+		encodings:     make(map[string]Encoding),
+		codecs:        make(map[string]compress.Compression),
+		compressLevel: make(map[string]int),
+		dictEnabled:   make(map[string]bool),
+		statsEnabled:  make(map[string]bool),
+	}
+	for _, o := range opts {
+		o(&cfg)
+	}
+
+	cfg.wr.columnProps = make(map[string]*ColumnProperties)
+	get := func(key string) *ColumnProperties {
+		if p, ok := cfg.wr.columnProps[key]; ok {
+			return p
+		}
+		cfg.wr.columnProps[key] = new(ColumnProperties)
+		*cfg.wr.columnProps[key] = cfg.wr.defColumnProps
+		return cfg.wr.columnProps[key]
+	}
+
+	for key, value := range cfg.encodings {
+		get(key).Encoding = value
+	}
+
+	for key, value := range cfg.codecs {
+		get(key).Codec = value
+	}
+
+	for key, value := range cfg.compressLevel {
+		get(key).CompressionLevel = value
+	}
+
+	for key, value := range cfg.dictEnabled {
+		get(key).DictionaryEnabled = value
+	}
+
+	for key, value := range cfg.statsEnabled {
+		get(key).StatsEnabled = value
+	}
+	return cfg.wr
+}
+
+// FileEncryptionProperties returns the current encryption properties that were
+// used to create the writer properties.
+func (w *WriterProperties) FileEncryptionProperties() *FileEncryptionProperties {
+	return w.encryptionProps
+}
+
+func (w *WriterProperties) Allocator() memory.Allocator      { return w.mem }
+func (w *WriterProperties) CreatedBy() string                { return w.createdBy }
+func (w *WriterProperties) WriteBatchSize() int64            { return w.batchSize }
+func (w *WriterProperties) DataPageSize() int64              { return w.pageSize }
+func (w *WriterProperties) DictionaryPageSizeLimit() int64   { return w.dictPagesize }
+func (w *WriterProperties) Version() Version                 { return w.parquetVersion }
+func (w *WriterProperties) DataPageVersion() DataPageVersion { return w.dataPageVersion }
+func (w *WriterProperties) MaxRowGroupLength() int64         { return w.maxRowGroupLen }
+
+// Compression returns the default compression type that will be used for any columns that don't
+// have a specific compression defined.
+func (w *WriterProperties) Compression() compress.Compression { return w.defColumnProps.Codec }
+
+// CompressionFor will return the compression type that is specified for the given column path, or
+// the default compression codec if there isn't one specific to this column.
+func (w *WriterProperties) CompressionFor(path string) compress.Compression {
+	if p, ok := w.columnProps[path]; ok {
+		return p.Codec
+	}
+	return w.defColumnProps.Codec
+}
+
+//CompressionPath is the same as CompressionFor but takes a ColumnPath
+func (w *WriterProperties) CompressionPath(path ColumnPath) compress.Compression {
+	return w.CompressionFor(path.String())
+}
+
+// CompressionLevel returns the default compression level that will be used for any column
+// that doesn't have a compression level specified for it.
+func (w *WriterProperties) CompressionLevel() int { return w.defColumnProps.CompressionLevel }
+
+// CompressionLevelFor returns the compression level that will be utilized for the given column,
+// or the default compression level if the column doesn't have a specific level specified.
+func (w *WriterProperties) CompressionLevelFor(path string) int {
+	if p, ok := w.columnProps[path]; ok {
+		return p.CompressionLevel
+	}
+	return w.defColumnProps.CompressionLevel
+}
+
+// CompressionLevelPath is the same as CompressionLevelFor but takes a ColumnPath object
+func (w *WriterProperties) CompressionLevelPath(path ColumnPath) int {
+	return w.CompressionLevelFor(path.String())
+}
+
+// Encoding returns the default encoding that will be utilized for any columns which don't have a different value
+// specified.
+func (w *WriterProperties) Encoding() Encoding { return w.defColumnProps.Encoding }
+
+// EncodingFor returns the encoding that will be used for the given column path, or the default encoding if there
+// isn't one specified for this column.
+func (w *WriterProperties) EncodingFor(path string) Encoding {
+	if p, ok := w.columnProps[path]; ok {
+		return p.Encoding
+	}
+	return w.defColumnProps.Encoding
+}
+
+// EncodingPath is the same as EncodingFor but takes a ColumnPath object
+func (w *WriterProperties) EncodingPath(path ColumnPath) Encoding {
+	return w.EncodingFor(path.String())
+}
+
+// DictionaryIndexEncoding returns which encoding will be used for the Dictionary Index values based on the
+// parquet version. V1 uses PlainDict and V2 uses RLEDict
+func (w *WriterProperties) DictionaryIndexEncoding() Encoding {
+	if w.parquetVersion == V1 {
+		return Encodings.PlainDict
+	}
+	return Encodings.RLEDict
+}
+
+// DictionaryPageEncoding returns the encoding that will be utilized for the DictionaryPage itself based on the parquet
+// version. V1 uses PlainDict, v2 uses Plain
+func (w *WriterProperties) DictionaryPageEncoding() Encoding {
+	if w.parquetVersion == V1 {
+		return Encodings.PlainDict
+	}
+	return Encodings.Plain
+}
+
+// DictionaryEnabled returns the default value as for whether or not dictionary encoding will be utilized for columns
+// that aren't separately specified.
+func (w *WriterProperties) DictionaryEnabled() bool { return w.defColumnProps.DictionaryEnabled }
+
+// DictionaryEnabledFor returns whether or not dictionary encoding will be used for the specified column when writing
+// or the default value if the column was not separately specified.
+func (w *WriterProperties) DictionaryEnabledFor(path string) bool {
+	if p, ok := w.columnProps[path]; ok {
+		return p.DictionaryEnabled
+	}
+	return w.defColumnProps.DictionaryEnabled
+}
+
+// DictionaryEnabledPath is the same as DictionaryEnabledFor but takes a ColumnPath object.
+func (w *WriterProperties) DictionaryEnabledPath(path ColumnPath) bool {
+	return w.DictionaryEnabledFor(path.String())
+}
+
+// StatisticsEnabled returns the default value for whether or not stats are enabled to be written for columns
+// that aren't separately specified.
+func (w *WriterProperties) StatisticsEnabled() bool { return w.defColumnProps.StatsEnabled }
+
+// StatisticsEnabledFor returns whether stats will be written for the given column path, or the default value if
+// it wasn't separately specified.
+func (w *WriterProperties) StatisticsEnabledFor(path string) bool {
+	if p, ok := w.columnProps[path]; ok {
+		return p.StatsEnabled
+	}
+	return w.defColumnProps.StatsEnabled
+}
+
+// StatisticsEnabledPath is the same as StatisticsEnabledFor but takes a ColumnPath object.
+func (w *WriterProperties) StatisticsEnabledPath(path ColumnPath) bool {
+	return w.StatisticsEnabledFor(path.String())
+}
+
+// MaxStatsSize returns the default maximum size for stats
+func (w *WriterProperties) MaxStatsSize() int64 { return w.defColumnProps.MaxStatsSize }
+
+// MaxStatsSizeFor returns the maximum stat size for the given column path
+func (w *WriterProperties) MaxStatsSizeFor(path string) int64 {
+	if p, ok := w.columnProps[path]; ok {
+		return p.MaxStatsSize
+	}
+	return w.defColumnProps.MaxStatsSize
+}
+
+// MaxStatsSizePath is the same as MaxStatsSizeFor but takes a ColumnPath
+func (w *WriterProperties) MaxStatsSizePath(path ColumnPath) int64 {
+	return w.MaxStatsSizeFor(path.String())
+}
+
+// ColumnEncryptionProperties returns the specific properties for encryption that will be used for the given column path
+func (w *WriterProperties) ColumnEncryptionProperties(path string) *ColumnEncryptionProperties {
+	if w.encryptionProps != nil {
+		return w.encryptionProps.ColumnEncryptionProperties(path)
+	}
+	return nil
+}
diff --git a/java/README.md b/java/README.md
index 9e6f657457e..29d1fcf4c44 100644
--- a/java/README.md
+++ b/java/README.md
@@ -24,7 +24,7 @@
 The following guides explain the fundamental data structures used in the Java implementation of Apache Arrow.
 
 - [ValueVector](https://arrow.apache.org/docs/java/vector.html) is an abstraction that is used to store a sequence of values having the same type in an individual column.
-- [VectorSchemaRoot](https://arrow.apache.org/docs/java/vector_schema_root.html) is a container that can hold multiple vectors based on a schema. 
+- [VectorSchemaRoot](https://arrow.apache.org/docs/java/vector_schema_root.html) is a container that can hold multiple vectors based on a schema.
 - The [Reading/Writing IPC formats](https://arrow.apache.org/docs/java/ipc.html) guide explains how to stream record batches as well as serializing record batches to files.
 
 Generated javadoc documentation is available [here](https://arrow.apache.org/docs/java/).
@@ -64,26 +64,56 @@ and arrow-format into a single JAR.  Using the classifier "shade-format-flatbuff
 pom.xml will make use of this JAR, you can then exclude/resolve the original dependency to
 a version of your choosing.
 
+### Updating the flatbuffers generated code
+
+1. Verify that your version of flatc matches the declared dependency:
+
+```bash
+$ flatc --version
+flatc version 1.12.0
+
+$ grep "dep.fbs.version" java/pom.xml
+    <dep.fbs.version>1.12.0</dep.fbs.version>
+```
+
+2. Generate the flatbuffer java files by performing the following:
+
+```bash
+cd $ARROW_HOME
+
+# remove the existing files
+rm -rf java/format/src
+
+# regenerate from the .fbs files
+flatc --java -o java/format/src/main/java format/*.fbs
+
+# prepend license header
+find java/format/src -type f | while read file; do
+  (cat header | while read line; do echo "// $line"; done; cat $file) > $file.tmp
+  mv $file.tmp $file
+done
+```
+
 ## Performance Tuning
 
 There are several system/environmental variables that users can configure.  These trade off safety (they turn off checking) for speed.  Typically they are only used in production settings after the code has been thoroughly tested without using them.
 
-* Bounds Checking for memory accesses: Bounds checking is on by default.  You can disable it by setting either the 
+* Bounds Checking for memory accesses: Bounds checking is on by default.  You can disable it by setting either the
 system property("arrow.enable_unsafe_memory_access") or the environmental variable
-("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS") to "true". When both the system property and the environmental 
+("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS") to "true". When both the system property and the environmental
 variable are set, the system property takes precedence.
 
-* null checking for gets: ValueVector get methods (not getObject) methods by default verify the slot is not null.  You can disable it by setting either the 
-system property("arrow.enable_null_check_for_get") or the environmental variable 
-("ARROW_ENABLE_NULL_CHECK_FOR_GET") to "false". When both the system property and the environmental 
-variable are set, the system property takes precedence. 
+* null checking for gets: ValueVector get methods (not getObject) methods by default verify the slot is not null.  You can disable it by setting either the
+system property("arrow.enable_null_check_for_get") or the environmental variable
+("ARROW_ENABLE_NULL_CHECK_FOR_GET") to "false". When both the system property and the environmental
+variable are set, the system property takes precedence.
 
 ## Java Properties
 
  * For java 9 or later, should set "-Dio.netty.tryReflectionSetAccessible=true".
 This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by netty.
  * To support duplicate fields in a `StructVector` enable "-Darrow.struct.conflict.policy=CONFLICT_APPEND".
-Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for 
+Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for
 conflicting or duplicate fields set this JVM flag or use the correct static constructor methods for `StructVector`s.
 
 ## Java Code Style Guide
@@ -118,10 +148,10 @@ See [Logback Configuration][1] for more details.
 
 ## Integration Tests
 
-Integration tests which require more time or more memory can be run by activating 
+Integration tests which require more time or more memory can be run by activating
 the `integration-tests` profile. This activates the [maven failsafe][4] plugin
 and any class prefixed with `IT` will be run during the testing phase. The integration
-tests currently require a larger amount of memory (>4GB) and time to complete. To activate 
+tests currently require a larger amount of memory (>4GB) and time to complete. To activate
 the profile:
 
 ```bash
@@ -131,4 +161,4 @@ mvn -Pintegration-tests <rest of mvn arguments>
 [1]: https://logback.qos.ch/manual/configuration.html
 [2]: https://github.com/apache/arrow/blob/master/cpp/README.md
 [3]: http://google.github.io/styleguide/javaguide.html
-[4]: https://maven.apache.org/surefire/maven-failsafe-plugin/
\ No newline at end of file
+[4]: https://maven.apache.org/surefire/maven-failsafe-plugin/
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
index d2b242f93cf..96952454e3b 100644
--- a/java/adapter/avro/pom.xml
+++ b/java/adapter/avro/pom.xml
@@ -16,7 +16,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
index f24f0f1a07d..a00cd7704d4 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
@@ -36,7 +36,6 @@
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.StructVector;
-import org.apache.arrow.vector.util.JsonStringArrayList;
 import org.apache.arrow.vector.util.Text;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumWriter;
@@ -88,7 +87,7 @@ protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Excepti
   protected void checkArrayResult(List<List<?>> expected, ListVector vector) {
     assertEquals(expected.size(), vector.getValueCount());
     for (int i = 0; i < expected.size(); i++) {
-      checkArrayElement(expected.get(i), (JsonStringArrayList) vector.getObject(i));
+      checkArrayElement(expected.get(i), vector.getObject(i));
     }
   }
 
@@ -177,7 +176,7 @@ protected void checkArrayResult(List<List<?>> expected, List<ListVector> vectors
     int index = 0;
     for (ListVector vector : vectors) {
       for (int i = 0; i < vector.getValueCount(); i++) {
-        checkArrayElement(expected.get(index++), (JsonStringArrayList) vector.getObject(i));
+        checkArrayElement(expected.get(index++), vector.getObject(i));
       }
     }
   }
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index b75135fd8b2..76a5f8fba8e 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
index b8796cda71e..e44505605a3 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
@@ -139,7 +139,7 @@ private VectorSchemaRoot createVectorSchemaRoot() {
   private void load(VectorSchemaRoot root) throws SQLException {
 
     for (int i = 1; i <= consumers.length; i++) {
-      consumers[i - 1].resetValueVector(root.getVector(rsmd.getColumnName(i)));
+      consumers[i - 1].resetValueVector(root.getVector(rsmd.getColumnLabel(i)));
     }
 
     consumeData(root);
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 8361c11b5ec..e05f21d48cf 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -115,7 +115,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
    * <ul>
    *  <li>{@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}</li>
    *  <li>{@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}</li>
-   *  <li>{@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnName(int)}</li>
+   *  <li>{@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}</li>
    *  <li>{@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}</li>
    * </ul>
    * </p>
@@ -139,7 +139,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
     List<Field> fields = new ArrayList<>();
     int columnCount = rsmd.getColumnCount();
     for (int i = 1; i <= columnCount; i++) {
-      final String columnName = rsmd.getColumnName(i);
+      final String columnName = rsmd.getColumnLabel(i);
 
       final Map<String, String> metadata;
       if (config.shouldIncludeMetadata()) {
@@ -155,7 +155,8 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
 
       final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(new JdbcFieldInfo(rsmd, i));
       if (arrowType != null) {
-        final FieldType fieldType = new FieldType(true, arrowType, /* dictionary encoding */ null, metadata);
+        final FieldType fieldType = new FieldType(
+                isColumnNullable(rsmd, i), arrowType, /* dictionary encoding */ null, metadata);
 
         List<Field> children = null;
         if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) {
@@ -195,7 +196,7 @@ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType(
 
     JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn);
     if (fieldInfo == null) {
-      fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnName(arrayColumn));
+      fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnLabel(arrayColumn));
     }
     return fieldInfo;
   }
@@ -219,7 +220,11 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
   }
 
   static boolean isColumnNullable(ResultSet resultSet, int index) throws SQLException {
-    int nullableValue = resultSet.getMetaData().isNullable(index);
+    return isColumnNullable(resultSet.getMetaData(), index);
+  }
+
+  static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index) throws SQLException {
+    int nullableValue = resultSetMetadata.isNullable(index);
     return nullableValue == ResultSetMetaData.columnNullable ||
         nullableValue == ResultSetMetaData.columnNullableUnknown;
   }
@@ -241,7 +246,7 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT
 
     JdbcConsumer[] consumers = new JdbcConsumer[columnCount];
     for (int i = 1; i <= columnCount; i++) {
-      FieldVector vector = root.getVector(rsmd.getColumnName(i));
+      FieldVector vector = root.getVector(rsmd.getColumnLabel(i));
       consumers[i - 1] = getConsumer(vector.getField().getType(), i, isColumnNullable(rs, i), vector, config);
     }
 
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
index c194dfbc355..9fdb32d80e9 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
@@ -263,7 +263,7 @@ public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData
 
       assertEquals(rsmd.getCatalogName(i), metadata.get(Constants.SQL_CATALOG_NAME_KEY));
       assertEquals(rsmd.getTableName(i), metadata.get(Constants.SQL_TABLE_NAME_KEY));
-      assertEquals(rsmd.getColumnName(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY));
+      assertEquals(rsmd.getColumnLabel(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY));
       assertEquals(rsmd.getColumnTypeName(i), metadata.get(Constants.SQL_TYPE_KEY));
     }
   }
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
new file mode 100644
index 00000000000..f44818a9f09
--- /dev/null
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.junit.Assert.assertEquals;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.List;
+
+import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JdbcAliasToArrowTest {
+  private Connection conn = null;
+
+  private static final String CREATE_STATEMENT =
+      "CREATE TABLE example_table (id INTEGER);";
+  private static final String INSERT_STATEMENT =
+      "INSERT INTO example_table (id) VALUES (?);";
+  private static final String QUERY = "SELECT id as a, id as b FROM example_table;";
+  private static final String DROP_STATEMENT = "DROP TABLE example_table;";
+  private static final String ORIGINAL_COLUMN_NAME = "ID";
+  private static final String COLUMN_A = "A";
+  private static final String COLUMN_B = "B";
+
+  @Before
+  public void setUp() throws Exception {
+    String url = "jdbc:h2:mem:JdbcAliasToArrowTest";
+    String driver = "org.h2.Driver";
+    Class.forName(driver);
+    conn = DriverManager.getConnection(url);
+    try (Statement stmt = conn.createStatement()) {
+      stmt.executeUpdate(CREATE_STATEMENT);
+    }
+  }
+
+  /**
+   * Test h2 database query with alias for column name and column label.
+   * To vetify reading field alias from an H2 database works as expected.
+   * If this test fails, something is either wrong with the setup,
+   * or the H2 SQL behavior changed.
+   */
+  @Test
+  public void testReadH2Alias() throws Exception {
+    // insert rows
+    int rowCount = 4;
+    insertRows(rowCount);
+
+    try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+      ResultSetMetaData rsmd = resultSet.getMetaData();
+      assertEquals(2, rsmd.getColumnCount());
+
+      // check column name and column label
+      assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(1));
+      assertEquals(COLUMN_A, rsmd.getColumnLabel(1));
+      assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(2));
+      assertEquals(COLUMN_B, rsmd.getColumnLabel(2));
+
+      int rowNum = 0;
+
+      while (resultSet.next()) {
+        assertEquals(rowNum, resultSet.getInt(COLUMN_A));
+        assertEquals(rowNum, resultSet.getInt(COLUMN_B));
+        ++rowNum;
+      }
+
+      assertEquals(rowCount, rowNum);
+    }
+  }
+
+  /**
+   * Test jdbc query results with alias to arrow works expected.
+   * Arrow result schema name should be field alias name.
+   */
+  @Test
+  public void testJdbcAliasToArrow() throws Exception {
+    int rowCount = 4;
+    insertRows(rowCount);
+
+    try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+      final VectorSchemaRoot vector =
+          JdbcToArrow.sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE));
+
+      assertEquals(rowCount, vector.getRowCount());
+      Schema vectorSchema = vector.getSchema();
+      List<Field> vectorFields = vectorSchema.getFields();
+      assertEquals(vectorFields.get(0).getName(), COLUMN_A);
+      assertEquals(vectorFields.get(1).getName(), COLUMN_B);
+    }
+  }
+
+  @After
+  public void tearDown() throws SQLException {
+    try (Statement stmt = conn.createStatement()) {
+      stmt.executeUpdate(DROP_STATEMENT);
+    } finally {
+      if (conn != null) {
+        conn.close();
+        conn = null;
+      }
+    }
+  }
+
+  private void insertRows(int numRows) throws SQLException {
+    // Insert [numRows] Rows
+    try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+      for (int i = 0; i < numRows; ++i) {
+        stmt.setInt(1, i);
+        stmt.executeUpdate();
+      }
+    }
+  }
+}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
index f4c78d09588..b2ac349b596 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
@@ -62,7 +62,7 @@ public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table oject
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
index cafb7a050d7..2be6a83c342 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -117,7 +117,7 @@ public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
index 57fcf566d7d..fd373091f93 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
@@ -94,7 +94,7 @@ public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
new file mode 100644
index 00000000000..4ab9017e247
--- /dev/null
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality for
+ * (non-)optional columns, in particular with regard to the ensuing VectorSchemaRoot's schema.
+ */
+@RunWith(Parameterized.class)
+public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest {
+  private static final String[] testFiles = {
+    "h2/test1_null_and_notnull.yml"
+  };
+
+  /**
+   * Constructor which populates the table object for each test iteration.
+   *
+   * @param table Table object
+   */
+  public JdbcToArrowOptionalColumnsTest(Table table) {
+    this.table = table;
+  }
+
+  /**
+   * Get the test data as a collection of Table objects for each test iteration.
+   *
+   * @return Collection of Table objects
+   * @throws SQLException           on error
+   * @throws ClassNotFoundException on error
+   * @throws IOException            on error
+   */
+  @Parameterized.Parameters
+  public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+    return Arrays.asList(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class));
+  }
+
+  /**
+   * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns.
+   */
+  @Test
+  public void testJdbcToArrowValues() throws SQLException, IOException {
+    testDataSets(JdbcToArrow.sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
+  }
+
+  /**
+   * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column becomes
+   * nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes non-nullable.
+   *
+   * @param root VectorSchemaRoot for test
+   */
+  public void testDataSets(VectorSchemaRoot root) {
+    JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+    assertTrue(root.getSchema().getFields().get(0).isNullable());
+    assertFalse(root.getSchema().getFields().get(1).isNullable());
+  }
+
+}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 76bb56da58c..8c5a17c37f7 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -109,7 +109,7 @@ public class JdbcToArrowTest extends AbstractJdbcToArrowTest {
   private static final String[] testFiles = {"h2/test1_all_datatypes_h2.yml"};
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
@@ -487,7 +487,7 @@ public int getColumnDisplaySize(int column) throws SQLException {
 
         @Override
         public String getColumnLabel(int column) throws SQLException {
-          return null;
+          return getColumnName(column);
         }
 
         @Override
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
index ef2b406d120..7062fa6aec1 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
@@ -78,7 +78,7 @@ public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
index 36a23701580..edd1952be05 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
@@ -74,7 +74,7 @@
 public class JdbcToArrowVectorIteratorTest extends JdbcToArrowTest {
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml b/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml
new file mode 100644
index 00000000000..20e9e5e5ecc
--- /dev/null
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml
@@ -0,0 +1,26 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'nullnotnull'
+
+create: 'CREATE TABLE table1 (int_field_null INT, int_field_notnull INT NOT NULL);'
+
+data:
+  - 'INSERT INTO table1 VALUES (0, 0);'
+  - 'INSERT INTO table1 VALUES (1, 1);'
+
+rowCount: '2'
+
+query: 'select int_field_null, int_field_notnull from table1;'
+
+drop: 'DROP table table1;'
\ No newline at end of file
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 3ec5f36e75b..7928d2ca272 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -35,7 +35,7 @@
 	<dependency>
             <groupId>org.apache.orc</groupId>
             <artifactId>orc-core</artifactId>
-            <version>1.5.5</version>
+            <version>1.6.9</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>
@@ -79,7 +79,7 @@
         <dependency>
             <groupId>org.apache.hive</groupId>
             <artifactId>hive-storage-api</artifactId>
-            <version>2.6.0</version>
+            <version>2.7.1</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
@@ -87,7 +87,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 3ad44d449df..cb504c73b6a 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-algorithm</artifactId>
   <name>Arrow Algorithms</name>
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
index 39678a17686..e93eb2c3dea 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
@@ -66,8 +66,19 @@ public ParallelSearcher(V vector, ExecutorService threadPool, int numThreads) {
     this.numThreads = numThreads;
   }
 
+  private CompletableFuture<Boolean>[] initSearch() {
+    keyPosition = -1;
+    final CompletableFuture<Boolean>[] futures = new CompletableFuture[numThreads];
+    for (int i = 0; i < futures.length; i++) {
+      futures[i] = new CompletableFuture<>();
+    }
+    return futures;
+  }
+
   /**
-   * Search for the key in the target vector.
+   * Search for the key in the target vector. The element-wise comparison is based on
+   * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise
+   * comparison: equal and un-equal.
    * @param keyVector the vector containing the search key.
    * @param keyIndex the index of the search key in the key vector.
    * @return the position of a matched value in the target vector,
@@ -80,13 +91,8 @@ public ParallelSearcher(V vector, ExecutorService threadPool, int numThreads) {
    * @throws InterruptedException if a thread is interrupted.
    */
   public int search(V keyVector, int keyIndex) throws ExecutionException, InterruptedException {
-    keyPosition = -1;
+    final CompletableFuture<Boolean>[] futures = initSearch();
     final int valueCount = vector.getValueCount();
-    final CompletableFuture<Boolean>[] futures = new CompletableFuture[numThreads];
-    for (int i = 0; i < futures.length; i++) {
-      futures[i] = new CompletableFuture<>();
-    }
-
     for (int i = 0; i < numThreads; i++) {
       final int tid = i;
       threadPool.submit(() -> {
@@ -124,4 +130,61 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
     CompletableFuture.allOf(futures).get();
     return keyPosition;
   }
+
+  /**
+   * Search for the key in the target vector. The element-wise comparison is based on
+   * {@link VectorValueComparator}, so there are three possible results for each element-wise
+   * comparison: less than, equal to and greater than.
+   * @param keyVector the vector containing the search key.
+   * @param keyIndex the index of the search key in the key vector.
+   * @param comparator the comparator for comparing the key against vector elements.
+   * @return the position of a matched value in the target vector,
+   *     or -1 if none is found. Please note that if there are multiple
+   *     matches of the key in the target vector, this method makes no
+   *     guarantees about which instance is returned.
+   *     For an alternative search implementation that always finds the first match of the key,
+   *     see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+   * @throws ExecutionException if an exception occurs in a thread.
+   * @throws InterruptedException if a thread is interrupted.
+   */
+  public int search(
+      V keyVector, int keyIndex, VectorValueComparator<V> comparator) throws ExecutionException, InterruptedException {
+    final CompletableFuture<Boolean>[] futures = initSearch();
+    final int valueCount = vector.getValueCount();
+    for (int i = 0; i < numThreads; i++) {
+      final int tid = i;
+      threadPool.submit(() -> {
+        // convert to long to avoid overflow
+        int start = (int) (((long) valueCount) * tid / numThreads);
+        int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+        if (start >= end) {
+          // no data assigned to this task.
+          futures[tid].complete(false);
+          return;
+        }
+
+        VectorValueComparator<V> localComparator = comparator.createNew();
+        localComparator.attachVectors(vector, keyVector);
+        for (int pos = start; pos < end; pos++) {
+          if (keyPosition != -1) {
+            // the key has been found by another task
+            futures[tid].complete(false);
+            return;
+          }
+          if (localComparator.compare(pos, keyIndex) == 0) {
+            keyPosition = pos;
+            futures[tid].complete(true);
+            return;
+          }
+        }
+
+        // no match value is found.
+        futures[tid].complete(false);
+      });
+    }
+
+    CompletableFuture.allOf(futures).get();
+    return keyPosition;
+  }
 }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
index 56fdfe96993..dd22ac96fac 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
@@ -250,7 +250,7 @@ public void testEncodeStrings() {
 
         assertEquals(vector.getValueCount(), decoded.getValueCount());
         for (int i = 0; i < 5; i++) {
-          assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i));
+          assertEquals(vector.getObject(i), decoded.getObject(i));
         }
       }
     }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
index a01cc1af3bb..767935aaa4b 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
@@ -19,10 +19,15 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.IntVector;
@@ -30,24 +35,51 @@
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
 /**
  * Test cases for {@link ParallelSearcher}.
  */
+@RunWith(Parameterized.class)
 public class TestParallelSearcher {
 
-  private static final int THREAD_COUNT = 10;
+  private enum ComparatorType {
+    EqualityComparator,
+    OrderingComparator;
+  }
 
   private static final int VECTOR_LENGTH = 10000;
 
+  private final int threadCount;
+
   private BufferAllocator allocator;
 
   private ExecutorService threadPool;
 
+  private final ComparatorType comparatorType;
+
+  public TestParallelSearcher(ComparatorType comparatorType, int threadCount) {
+    this.comparatorType = comparatorType;
+    this.threadCount = threadCount;
+  }
+
+  @Parameterized.Parameters(name = "comparator type = {0}, thread count = {1}")
+  public static Collection<Object[]> getComparatorName() {
+    List<Object[]> params = new ArrayList<>();
+    int[] threadCounts = {1, 2, 5, 10, 20, 50};
+    for (ComparatorType type : ComparatorType.values()) {
+      for (int count : threadCounts) {
+        params.add(new Object[] {type, count});
+      }
+    }
+    return params;
+  }
+
   @Before
   public void prepare() {
     allocator = new RootAllocator(1024 * 1024);
-    threadPool = Executors.newFixedThreadPool(THREAD_COUNT);
+    threadPool = Executors.newFixedThreadPool(threadCount);
   }
 
   @After
@@ -63,6 +95,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
       targetVector.allocateNew(VECTOR_LENGTH);
       keyVector.allocateNew(VECTOR_LENGTH);
 
+      // if we are comparing elements using equality semantics, we do not need a comparator here.
+      VectorValueComparator<IntVector> comparator = comparatorType == ComparatorType.EqualityComparator ? null
+          : DefaultVectorComparators.createDefaultComparator(targetVector);
+
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         targetVector.set(i, i);
         keyVector.set(i, i * 2);
@@ -70,9 +106,9 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
       targetVector.setValueCount(VECTOR_LENGTH);
       keyVector.setValueCount(VECTOR_LENGTH);
 
-      ParallelSearcher<IntVector> searcher = new ParallelSearcher<>(targetVector, threadPool, THREAD_COUNT);
+      ParallelSearcher<IntVector> searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        int pos = searcher.search(keyVector, i);
+        int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
         if (i * 2 < VECTOR_LENGTH) {
           assertEquals(i * 2, pos);
         } else {
@@ -89,6 +125,10 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc
       targetVector.allocateNew(VECTOR_LENGTH);
       keyVector.allocateNew(VECTOR_LENGTH);
 
+      // if we are comparing elements using equality semantics, we do not need a comparator here.
+      VectorValueComparator<VarCharVector> comparator = comparatorType == ComparatorType.EqualityComparator ? null
+          : DefaultVectorComparators.createDefaultComparator(targetVector);
+
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         targetVector.setSafe(i, String.valueOf(i).getBytes());
         keyVector.setSafe(i, String.valueOf(i * 2).getBytes());
@@ -96,9 +136,9 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc
       targetVector.setValueCount(VECTOR_LENGTH);
       keyVector.setValueCount(VECTOR_LENGTH);
 
-      ParallelSearcher<VarCharVector> searcher = new ParallelSearcher<>(targetVector, threadPool, THREAD_COUNT);
+      ParallelSearcher<VarCharVector> searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        int pos = searcher.search(keyVector, i);
+        int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
         if (i * 2 < VECTOR_LENGTH) {
           assertEquals(i * 2, pos);
         } else {
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index dc0a9586539..652d9e692ec 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-compression</artifactId>
   <name>Arrow Compression</name>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index c4246a89090..a71b36210f6 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index 1fc87411f37..93232f99156 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
index e85f1b98632..5b7fa53c836 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
@@ -193,12 +193,28 @@ public static Throwable toGrpcException(Throwable ex) {
       return ex;
     } else if (ex instanceof FlightRuntimeException) {
       final FlightRuntimeException fre = (FlightRuntimeException) ex;
+      if (fre.status().metadata() != null) {
+        Metadata trailers = toGrpcMetadata(fre.status().metadata());
+        return new StatusRuntimeException(toGrpcStatus(fre.status()), trailers);
+      }
       return toGrpcStatus(fre.status()).asRuntimeException();
     }
     return Status.INTERNAL.withCause(ex).withDescription("There was an error servicing your request.")
         .asRuntimeException();
   }
 
+  private static Metadata toGrpcMetadata(ErrorFlightMetadata metadata) {
+    final Metadata trailers = new Metadata();
+    for (final String key : metadata.keys()) {
+      if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+        trailers.put(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER), metadata.getByte(key));
+      } else {
+        trailers.put(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER), metadata.get(key));
+      }
+    }
+    return trailers;
+  }
+
   /**
    * Maps a transformation function to the elements of an iterator, while wrapping exceptions in {@link
    * FlightRuntimeException}.
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
index 02a21f2711a..2c62bc7fa68 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
@@ -36,18 +36,22 @@ public class TestErrorMetadata {
   private static final Metadata.BinaryMarshaller<Status> marshaller =
           ProtoUtils.metadataMarshaller(Status.getDefaultInstance());
 
+  /** Ensure metadata attached to a gRPC error is propagated. */
   @Test
-  public void testMetadata() throws Exception {
+  public void testGrpcMetadata() throws Exception {
     PerfOuterClass.Perf perf = PerfOuterClass.Perf.newBuilder()
                 .setStreamCount(12)
                 .setRecordsPerBatch(1000)
                 .setRecordsPerStream(1000000L)
                 .build();
+    StatusRuntimeExceptionProducer producer = new StatusRuntimeExceptionProducer(perf);
     try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
-        final FlightServer s =
+         final FlightServer s =
              FlightTestUtil.getStartedServer(
-               (location) -> FlightServer.builder(allocator, location, new TestFlightProducer(perf)).build());
-          final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+               (location) -> {
+                 return FlightServer.builder(allocator, location, producer).build();
+               });
+         final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
       final CallStatus flightStatus = FlightTestUtil.assertCode(FlightStatusCode.CANCELLED, () -> {
         FlightStream stream = client.getStream(new Ticket("abs".getBytes()));
         stream.next();
@@ -72,10 +76,37 @@ public void testMetadata() throws Exception {
     }
   }
 
-  private static class TestFlightProducer extends NoOpFlightProducer {
+  /** Ensure metadata attached to a Flight error is propagated. */
+  @Test
+  public void testFlightMetadata() throws Exception {
+    try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+         final FlightServer s =
+                 FlightTestUtil.getStartedServer(
+                   (location) -> FlightServer.builder(allocator, location, new CallStatusProducer()).build());
+         final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      CallStatus flightStatus = FlightTestUtil.assertCode(FlightStatusCode.INVALID_ARGUMENT, () -> {
+        FlightStream stream = client.getStream(new Ticket(new byte[0]));
+        stream.next();
+      });
+      ErrorFlightMetadata metadata = flightStatus.metadata();
+      Assert.assertNotNull(metadata);
+      Assert.assertEquals("foo", metadata.get("x-foo"));
+      Assert.assertArrayEquals(new byte[]{1}, metadata.getByte("x-bar-bin"));
+
+      flightStatus = FlightTestUtil.assertCode(FlightStatusCode.INVALID_ARGUMENT, () -> {
+        client.getInfo(FlightDescriptor.command(new byte[0]));
+      });
+      metadata = flightStatus.metadata();
+      Assert.assertNotNull(metadata);
+      Assert.assertEquals("foo", metadata.get("x-foo"));
+      Assert.assertArrayEquals(new byte[]{1}, metadata.getByte("x-bar-bin"));
+    }
+  }
+
+  private static class StatusRuntimeExceptionProducer extends NoOpFlightProducer {
     private final PerfOuterClass.Perf perf;
 
-    private TestFlightProducer(PerfOuterClass.Perf perf) {
+    private StatusRuntimeExceptionProducer(PerfOuterClass.Perf perf) {
       this.perf = perf;
     }
 
@@ -89,4 +120,24 @@ public void getStream(CallContext context, Ticket ticket, ServerStreamListener l
       listener.error(sre);
     }
   }
+
+  private static class CallStatusProducer extends NoOpFlightProducer {
+    ErrorFlightMetadata metadata;
+
+    CallStatusProducer() {
+      this.metadata = new ErrorFlightMetadata();
+      metadata.insert("x-foo", "foo");
+      metadata.insert("x-bar-bin", new byte[]{1});
+    }
+
+    @Override
+    public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+      listener.error(CallStatus.INVALID_ARGUMENT.withDescription("Failed").withMetadata(metadata).toRuntimeException());
+    }
+
+    @Override
+    public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+      throw CallStatus.INVALID_ARGUMENT.withDescription("Failed").withMetadata(metadata).toRuntimeException();
+    }
+  }
 }
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
index 5652e987d51..9e2d7cc544f 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
@@ -103,7 +103,7 @@ public void throughput() throws Exception {
             res.add(f.get());
           }
           return res;
-        }).get();
+        }, pool).get();
 
         double seconds = r.nanos * 1.0d / 1000 / 1000 / 1000;
         throughPuts[i] = (r.bytes * 1.0d / 1024 / 1024) / seconds;
diff --git a/java/flight/flight-grpc/pom.xml b/java/flight/flight-grpc/pom.xml
index 8399642ad1f..48ed5000d75 100644
--- a/java/flight/flight-grpc/pom.xml
+++ b/java/flight/flight-grpc/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-java-root</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 07290ac7998..aeb5d86dd20 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 <parent>
   <artifactId>arrow-java-root</artifactId>
   <groupId>org.apache.arrow</groupId>
-  <version>4.0.0-SNAPSHOT</version>
+  <version>6.0.0-SNAPSHOT</version>
 </parent>
 
 <artifactId>arrow-format</artifactId>
@@ -23,13 +23,6 @@
 <name>Arrow Format</name>
 <description>Generated Java files from the IPC Flatbuffer definitions.</description>
 
-  <properties>
-    <flatc.download.skip>false</flatc.download.skip>
-    <flatc.executable>${project.build.directory}/flatc-${os.detected.classifier}-${dep.flatc.version}.exe</flatc.executable>
-    <flatc.generated.files>${project.build.directory}/generated-sources/flatc</flatc.generated.files>
-    <os-maven-plugin.version>1.5.0.Final</os-maven-plugin.version>
-  </properties>
-
   <dependencies>
     <dependency>
       <groupId>com.google.flatbuffers</groupId>
@@ -38,121 +31,8 @@
   </dependencies>
 
  <build>
-  <extensions>
-    <!-- provides os.detected.classifier (i.e. linux-x86_64, osx-x86_64) property -->
-    <extension>
-      <groupId>kr.motd.maven</groupId>
-      <artifactId>os-maven-plugin</artifactId>
-      <version>${os-maven-plugin.version}</version>
-    </extension>
-  </extensions>
 
   <plugins>
-    <plugin> <!-- download the flatbuffer compiler -->
-      <groupId>org.apache.maven.plugins</groupId>
-      <artifactId>maven-dependency-plugin</artifactId>
-      <executions>
-        <execution>
-          <id>copy-flatc</id>
-          <phase>initialize</phase>
-          <goals>
-            <goal>copy</goal>
-          </goals>
-          <configuration>
-            <artifactItems>
-              <artifactItem>
-                <groupId>com.github.icexelloss</groupId>
-                <artifactId>flatc-${os.detected.classifier}</artifactId>
-                <version>${dep.flatc.version}</version>
-                <type>exe</type>
-                <overWrite>true</overWrite>
-                <outputDirectory>${project.build.directory}</outputDirectory>
-              </artifactItem>
-            </artifactItems>
-            <skip>${flatc.download.skip}</skip>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
-    <plugin>
-      <groupId>org.codehaus.mojo</groupId>
-      <artifactId>exec-maven-plugin</artifactId>
-      <version>1.4.0</version>
-      <executions>
-        <execution> <!-- make the flatbuffer compiler executable -->
-          <id>script-chmod</id>
-          <goals>
-            <goal>exec</goal>
-          </goals>
-          <phase>generate-sources</phase>
-          <configuration>
-            <executable>chmod</executable>
-            <arguments>
-              <argument>+x</argument>
-              <argument>${project.build.directory}/flatc-${os.detected.classifier}-${dep.flatc.version}.exe</argument>
-            </arguments>
-            <skip>${flatc.download.skip}</skip>
-          </configuration>
-        </execution>
-        <execution> <!-- generate sources by executing the flatbuffer compiler -->
-          <goals>
-            <goal>exec</goal>
-          </goals>
-          <phase>generate-sources</phase>
-          <configuration>
-            <executable>${flatc.executable}</executable>
-            <arguments>
-              <argument>-j</argument>
-              <argument>-o</argument>
-              <argument>${flatc.generated.files}</argument>
-              <argument>../../format/Schema.fbs</argument>
-              <argument>../../format/Tensor.fbs</argument>
-              <argument>../../format/SparseTensor.fbs</argument>
-              <argument>../../format/File.fbs</argument>
-              <argument>../../format/Message.fbs</argument>
-            </arguments>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
-    <plugin> <!-- add the license header to the generated files -->
-      <groupId>com.mycila</groupId>
-      <artifactId>license-maven-plugin</artifactId>
-      <version>2.3</version>
-      <configuration>
-        <header>${basedir}/../../header</header>
-        <includes>
-          <include>**/*.java</include>
-        </includes>
-      </configuration>
-      <executions>
-        <execution>
-          <phase>process-sources</phase>
-          <goals>
-            <goal>format</goal>
-          </goals>
-        </execution>
-      </executions>
-    </plugin>
-    <plugin> <!-- add generated sources to classpath -->
-      <groupId>org.codehaus.mojo</groupId>
-      <artifactId>build-helper-maven-plugin</artifactId>
-      <version>1.9.1</version>
-      <executions>
-        <execution>
-          <id>add-generated-sources-to-classpath</id>
-          <phase>generate-sources</phase>
-          <goals>
-            <goal>add-source</goal>
-          </goals>
-          <configuration>
-            <sources>
-              <source>${flatc.generated.files}</source>
-            </sources>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
     <plugin> <!-- no checkstyle on the generated code -->
       <groupId>org.apache.maven.plugins</groupId>
       <artifactId>maven-checkstyle-plugin</artifactId>
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
new file mode 100644
index 00000000000..f2ea5250278
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Opaque binary data
+ */
+public final class Binary extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Binary getRootAsBinary(ByteBuffer _bb) { return getRootAsBinary(_bb, new Binary()); }
+  public static Binary getRootAsBinary(ByteBuffer _bb, Binary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Binary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startBinary(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endBinary(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Binary get(int j) { return get(new Binary(), j); }
+    public Binary get(Binary obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
new file mode 100644
index 00000000000..e1435f83250
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Block extends Struct {
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Block __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Index to the start of the RecordBlock (note this is past the Message header)
+   */
+  public long offset() { return bb.getLong(bb_pos + 0); }
+  /**
+   * Length of the metadata
+   */
+  public int metaDataLength() { return bb.getInt(bb_pos + 8); }
+  /**
+   * Length of the data (this is aligned so there can be a gap between this and
+   * the metadata).
+   */
+  public long bodyLength() { return bb.getLong(bb_pos + 16); }
+
+  public static int createBlock(FlatBufferBuilder builder, long offset, int metaDataLength, long bodyLength) {
+    builder.prep(8, 24);
+    builder.putLong(bodyLength);
+    builder.pad(4);
+    builder.putInt(metaDataLength);
+    builder.putLong(offset);
+    return builder.offset();
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Block get(int j) { return get(new Block(), j); }
+    public Block get(Block obj, int j) {  return obj.__assign(__element(j), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
new file mode 100644
index 00000000000..650454eb154
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Optional compression for the memory buffers constituting IPC message
+ * bodies. Intended for use with RecordBatch but could be used for other
+ * message types
+ */
+public final class BodyCompression extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb) { return getRootAsBodyCompression(_bb, new BodyCompression()); }
+  public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb, BodyCompression obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Compressor library
+   */
+  public byte codec() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * Indicates the way the record batch body was compressed
+   */
+  public byte method() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; }
+
+  public static int createBodyCompression(FlatBufferBuilder builder,
+      byte codec,
+      byte method) {
+    builder.startTable(2);
+    BodyCompression.addMethod(builder, method);
+    BodyCompression.addCodec(builder, codec);
+    return BodyCompression.endBodyCompression(builder);
+  }
+
+  public static void startBodyCompression(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addCodec(FlatBufferBuilder builder, byte codec) { builder.addByte(0, codec, 0); }
+  public static void addMethod(FlatBufferBuilder builder, byte method) { builder.addByte(1, method, 0); }
+  public static int endBodyCompression(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public BodyCompression get(int j) { return get(new BodyCompression(), j); }
+    public BodyCompression get(BodyCompression obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
new file mode 100644
index 00000000000..48cff16e751
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * Provided for forward compatibility in case we need to support different
+ * strategies for compressing the IPC message body (like whole-body
+ * compression rather than buffer-level) in the future
+ */
+public final class BodyCompressionMethod {
+  private BodyCompressionMethod() { }
+  /**
+   * Each constituent buffer is first compressed with the indicated
+   * compressor, and then written with the uncompressed length in the first 8
+   * bytes as a 64-bit little-endian signed integer followed by the compressed
+   * buffer bytes (and then padding as required by the protocol). The
+   * uncompressed length may be set to -1 to indicate that the data that
+   * follows is not compressed, which can be useful for cases where
+   * compression does not yield appreciable savings.
+   */
+  public static final byte BUFFER = 0;
+
+  public static final String[] names = { "BUFFER", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
new file mode 100644
index 00000000000..e6b54e4b7bf
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Bool extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Bool getRootAsBool(ByteBuffer _bb) { return getRootAsBool(_bb, new Bool()); }
+  public static Bool getRootAsBool(ByteBuffer _bb, Bool obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Bool __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startBool(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endBool(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Bool get(int j) { return get(new Bool(), j); }
+    public Bool get(Bool obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
new file mode 100644
index 00000000000..589ed0b711e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A Buffer represents a single contiguous memory segment
+ */
+public final class Buffer extends Struct {
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Buffer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The relative offset into the shared memory page where the bytes for this
+   * buffer starts
+   */
+  public long offset() { return bb.getLong(bb_pos + 0); }
+  /**
+   * The absolute length (in bytes) of the memory buffer. The memory is found
+   * from offset (inclusive) to offset + length (non-inclusive). When building
+   * messages using the encapsulated IPC message, padding bytes may be written
+   * after a buffer, but such padding bytes do not need to be accounted for in
+   * the size here.
+   */
+  public long length() { return bb.getLong(bb_pos + 8); }
+
+  public static int createBuffer(FlatBufferBuilder builder, long offset, long length) {
+    builder.prep(8, 16);
+    builder.putLong(length);
+    builder.putLong(offset);
+    return builder.offset();
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Buffer get(int j) { return get(new Buffer(), j); }
+    public Buffer get(Buffer obj, int j) {  return obj.__assign(__element(j), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java b/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
new file mode 100644
index 00000000000..0597ffd30ab
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class CompressionType {
+  private CompressionType() { }
+  public static final byte LZ4_FRAME = 0;
+  public static final byte ZSTD = 1;
+
+  public static final String[] names = { "LZ4_FRAME", "ZSTD", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
new file mode 100644
index 00000000000..b2fcc9e39e3
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
+ * epoch (1970-01-01), stored in either of two units:
+ *
+ * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
+ *   leap seconds), where the values are evenly divisible by 86400000
+ * * Days (32 bits) since the UNIX epoch
+ */
+public final class Date extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, new Date()); }
+  public static Date getRootAsDate(ByteBuffer _bb, Date obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+
+  public static int createDate(FlatBufferBuilder builder,
+      short unit) {
+    builder.startTable(1);
+    Date.addUnit(builder, unit);
+    return Date.endDate(builder);
+  }
+
+  public static void startDate(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+  public static int endDate(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Date get(int j) { return get(new Date(), j); }
+    public Date get(Date obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java
new file mode 100644
index 00000000000..f2c96f45b2e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class DateUnit {
+  private DateUnit() { }
+  public static final short DAY = 0;
+  public static final short MILLISECOND = 1;
+
+  public static final String[] names = { "DAY", "MILLISECOND", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java
new file mode 100644
index 00000000000..8ffaa1ebb73
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Exact decimal value represented as an integer value in two's
+ * complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
+ * are used. The representation uses the endianness indicated
+ * in the Schema.
+ */
+public final class Decimal extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Decimal getRootAsDecimal(ByteBuffer _bb) { return getRootAsDecimal(_bb, new Decimal()); }
+  public static Decimal getRootAsDecimal(ByteBuffer _bb, Decimal obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Decimal __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Total number of decimal digits
+   */
+  public int precision() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+  /**
+   * Number of digits after the decimal point "."
+   */
+  public int scale() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+  /**
+   * Number of bits per value. The only accepted widths are 128 and 256.
+   * We use bitWidth for consistency with Int::bitWidth.
+   */
+  public int bitWidth() { int o = __offset(8); return o != 0 ? bb.getInt(o + bb_pos) : 128; }
+
+  public static int createDecimal(FlatBufferBuilder builder,
+      int precision,
+      int scale,
+      int bitWidth) {
+    builder.startTable(3);
+    Decimal.addBitWidth(builder, bitWidth);
+    Decimal.addScale(builder, scale);
+    Decimal.addPrecision(builder, precision);
+    return Decimal.endDecimal(builder);
+  }
+
+  public static void startDecimal(FlatBufferBuilder builder) { builder.startTable(3); }
+  public static void addPrecision(FlatBufferBuilder builder, int precision) { builder.addInt(0, precision, 0); }
+  public static void addScale(FlatBufferBuilder builder, int scale) { builder.addInt(1, scale, 0); }
+  public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(2, bitWidth, 128); }
+  public static int endDecimal(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Decimal get(int j) { return get(new Decimal(), j); }
+    public Decimal get(Decimal obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java
new file mode 100644
index 00000000000..fe6c59fb51e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * For sending dictionary encoding information. Any Field can be
+ * dictionary-encoded, but in this case none of its children may be
+ * dictionary-encoded.
+ * There is one vector / column per dictionary, but that vector / column
+ * may be spread across multiple dictionary batches by using the isDelta
+ * flag
+ */
+public final class DictionaryBatch extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb) { return getRootAsDictionaryBatch(_bb, new DictionaryBatch()); }
+  public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb, DictionaryBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public DictionaryBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  public org.apache.arrow.flatbuf.RecordBatch data() { return data(new org.apache.arrow.flatbuf.RecordBatch()); }
+  public org.apache.arrow.flatbuf.RecordBatch data(org.apache.arrow.flatbuf.RecordBatch obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * If isDelta is true the values in the dictionary are to be appended to a
+   * dictionary with the indicated id. If isDelta is false this dictionary
+   * should replace the existing dictionary.
+   */
+  public boolean isDelta() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static int createDictionaryBatch(FlatBufferBuilder builder,
+      long id,
+      int dataOffset,
+      boolean isDelta) {
+    builder.startTable(3);
+    DictionaryBatch.addId(builder, id);
+    DictionaryBatch.addData(builder, dataOffset);
+    DictionaryBatch.addIsDelta(builder, isDelta);
+    return DictionaryBatch.endDictionaryBatch(builder);
+  }
+
+  public static void startDictionaryBatch(FlatBufferBuilder builder) { builder.startTable(3); }
+  public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); }
+  public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addOffset(1, dataOffset, 0); }
+  public static void addIsDelta(FlatBufferBuilder builder, boolean isDelta) { builder.addBoolean(2, isDelta, false); }
+  public static int endDictionaryBatch(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public DictionaryBatch get(int j) { return get(new DictionaryBatch(), j); }
+    public DictionaryBatch get(DictionaryBatch obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java
new file mode 100644
index 00000000000..8b2bb73e794
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class DictionaryEncoding extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb) { return getRootAsDictionaryEncoding(_bb, new DictionaryEncoding()); }
+  public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb, DictionaryEncoding obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public DictionaryEncoding __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The known dictionary id in the application where this data is used. In
+   * the file or streaming formats, the dictionary ids are found in the
+   * DictionaryBatch messages
+   */
+  public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  /**
+   * The dictionary indices are constrained to be non-negative integers. If
+   * this field is null, the indices must be signed int32. To maximize
+   * cross-language compatibility and performance, implementations are
+   * recommended to prefer signed integer types over unsigned integer types
+   * and to avoid uint64 indices unless they are required by an application.
+   */
+  public org.apache.arrow.flatbuf.Int indexType() { return indexType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indexType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * By default, dictionaries are not ordered, or the order does not have
+   * semantic meaning. In some statistical, applications, dictionary-encoding
+   * is used to represent ordered categorical data, and we provide a way to
+   * preserve that metadata here
+   */
+  public boolean isOrdered() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+  public short dictionaryKind() { int o = __offset(10); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+  public static int createDictionaryEncoding(FlatBufferBuilder builder,
+      long id,
+      int indexTypeOffset,
+      boolean isOrdered,
+      short dictionaryKind) {
+    builder.startTable(4);
+    DictionaryEncoding.addId(builder, id);
+    DictionaryEncoding.addIndexType(builder, indexTypeOffset);
+    DictionaryEncoding.addDictionaryKind(builder, dictionaryKind);
+    DictionaryEncoding.addIsOrdered(builder, isOrdered);
+    return DictionaryEncoding.endDictionaryEncoding(builder);
+  }
+
+  public static void startDictionaryEncoding(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); }
+  public static void addIndexType(FlatBufferBuilder builder, int indexTypeOffset) { builder.addOffset(1, indexTypeOffset, 0); }
+  public static void addIsOrdered(FlatBufferBuilder builder, boolean isOrdered) { builder.addBoolean(2, isOrdered, false); }
+  public static void addDictionaryKind(FlatBufferBuilder builder, short dictionaryKind) { builder.addShort(3, dictionaryKind, 0); }
+  public static int endDictionaryEncoding(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public DictionaryEncoding get(int j) { return get(new DictionaryEncoding(), j); }
+    public DictionaryEncoding get(DictionaryEncoding obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java
new file mode 100644
index 00000000000..ecefa4b7655
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Dictionary encoding metadata
+ * Maintained for forwards compatibility, in the future
+ * Dictionaries might be explicit maps between integers and values
+ * allowing for non-contiguous index values
+ */
+public final class DictionaryKind {
+  private DictionaryKind() { }
+  public static final short DenseArray = 0;
+
+  public static final String[] names = { "DenseArray", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java
new file mode 100644
index 00000000000..e1495f3002d
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Duration extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Duration getRootAsDuration(ByteBuffer _bb) { return getRootAsDuration(_bb, new Duration()); }
+  public static Duration getRootAsDuration(ByteBuffer _bb, Duration obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Duration __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+
+  public static int createDuration(FlatBufferBuilder builder,
+      short unit) {
+    builder.startTable(1);
+    Duration.addUnit(builder, unit);
+    return Duration.endDuration(builder);
+  }
+
+  public static void startDuration(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+  public static int endDuration(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Duration get(int j) { return get(new Duration(), j); }
+    public Duration get(Duration obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java
new file mode 100644
index 00000000000..494a3dcf57f
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Endianness of the platform producing the data
+ */
+public final class Endianness {
+  private Endianness() { }
+  public static final short Little = 0;
+  public static final short Big = 1;
+
+  public static final String[] names = { "Little", "Big", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java
new file mode 100644
index 00000000000..a4fa84c3728
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * Represents Arrow Features that might not have full support
+ * within implementations. This is intended to be used in
+ * two scenarios:
+ *  1.  A mechanism for readers of Arrow Streams
+ *      and files to understand that the stream or file makes
+ *      use of a feature that isn't supported or unknown to
+ *      the implementation (and therefore can meet the Arrow
+ *      forward compatibility guarantees).
+ *  2.  A means of negotiating between a client and server
+ *      what features a stream is allowed to use. The enums
+ *      values here are intented to represent higher level
+ *      features, additional details maybe negotiated
+ *      with key-value pairs specific to the protocol.
+ *
+ * Enums added to this list should be assigned power-of-two values
+ * to facilitate exchanging and comparing bitmaps for supported
+ * features.
+ */
+public final class Feature {
+  private Feature() { }
+  /**
+   * Needed to make flatbuffers happy.
+   */
+  public static final long UNUSED = 0;
+  /**
+   * The stream makes use of multiple full dictionaries with the
+   * same ID and assumes clients implement dictionary replacement
+   * correctly.
+   */
+  public static final long DICTIONARY_REPLACEMENT = 1;
+  /**
+   * The stream makes use of compressed bodies as described
+   * in Message.fbs.
+   */
+  public static final long COMPRESSED_BODY = 2;
+
+  public static final String[] names = { "UNUSED", "DICTIONARY_REPLACEMENT", "COMPRESSED_BODY", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java
new file mode 100644
index 00000000000..d34501e0ac2
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A field represents a named column in a record / row batch or child of a
+ * nested type.
+ */
+public final class Field extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Field getRootAsField(ByteBuffer _bb) { return getRootAsField(_bb, new Field()); }
+  public static Field getRootAsField(ByteBuffer _bb, Field obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Field __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Name is not required, in i.e. a List
+   */
+  public String name() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
+  public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); }
+  /**
+   * Whether or not this field can contain nulls. Should be true in general.
+   */
+  public boolean nullable() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+  public byte typeType() { int o = __offset(8); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * This is the type of the decoded value if the field is dictionary encoded.
+   */
+  public Table type(Table obj) { int o = __offset(10); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * Present only if the field is dictionary encoded.
+   */
+  public org.apache.arrow.flatbuf.DictionaryEncoding dictionary() { return dictionary(new org.apache.arrow.flatbuf.DictionaryEncoding()); }
+  public org.apache.arrow.flatbuf.DictionaryEncoding dictionary(org.apache.arrow.flatbuf.DictionaryEncoding obj) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * children apply only to nested data types like Struct, List and Union. For
+   * primitive types children will have length 0.
+   */
+  public org.apache.arrow.flatbuf.Field children(int j) { return children(new org.apache.arrow.flatbuf.Field(), j); }
+  public org.apache.arrow.flatbuf.Field children(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(14); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int childrenLength() { int o = __offset(14); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Field.Vector childrenVector() { return childrenVector(new org.apache.arrow.flatbuf.Field.Vector()); }
+  public org.apache.arrow.flatbuf.Field.Vector childrenVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(14); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * User-defined metadata
+   */
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(16); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(16); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(16); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+  public static int createField(FlatBufferBuilder builder,
+      int nameOffset,
+      boolean nullable,
+      byte type_type,
+      int typeOffset,
+      int dictionaryOffset,
+      int childrenOffset,
+      int custom_metadataOffset) {
+    builder.startTable(7);
+    Field.addCustomMetadata(builder, custom_metadataOffset);
+    Field.addChildren(builder, childrenOffset);
+    Field.addDictionary(builder, dictionaryOffset);
+    Field.addType(builder, typeOffset);
+    Field.addName(builder, nameOffset);
+    Field.addTypeType(builder, type_type);
+    Field.addNullable(builder, nullable);
+    return Field.endField(builder);
+  }
+
+  public static void startField(FlatBufferBuilder builder) { builder.startTable(7); }
+  public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(0, nameOffset, 0); }
+  public static void addNullable(FlatBufferBuilder builder, boolean nullable) { builder.addBoolean(1, nullable, false); }
+  public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(2, typeType, 0); }
+  public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(3, typeOffset, 0); }
+  public static void addDictionary(FlatBufferBuilder builder, int dictionaryOffset) { builder.addOffset(4, dictionaryOffset, 0); }
+  public static void addChildren(FlatBufferBuilder builder, int childrenOffset) { builder.addOffset(5, childrenOffset, 0); }
+  public static int createChildrenVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startChildrenVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(6, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endField(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Field get(int j) { return get(new Field(), j); }
+    public Field get(Field obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java
new file mode 100644
index 00000000000..3ea9805f6bc
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Data structures for describing a table row batch (a collection of
+ * equal-length Arrow arrays)
+ * Metadata about a field at some level of a nested type tree (but not
+ * its children).
+ *
+ * For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
+ * would have {length: 5, null_count: 2} for its List node, and {length: 6,
+ * null_count: 0} for its Int16 node, as separate FieldNode structs
+ */
+public final class FieldNode extends Struct {
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FieldNode __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The number of value slots in the Arrow array at this level of a nested
+   * tree
+   */
+  public long length() { return bb.getLong(bb_pos + 0); }
+  /**
+   * The number of observed nulls. Fields with null_count == 0 may choose not
+   * to write their physical validity bitmap out as a materialized buffer,
+   * instead setting the length of the bitmap buffer to 0.
+   */
+  public long nullCount() { return bb.getLong(bb_pos + 8); }
+
+  public static int createFieldNode(FlatBufferBuilder builder, long length, long nullCount) {
+    builder.prep(8, 16);
+    builder.putLong(nullCount);
+    builder.putLong(length);
+    return builder.offset();
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FieldNode get(int j) { return get(new FieldNode(), j); }
+    public FieldNode get(FieldNode obj, int j) {  return obj.__assign(__element(j), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java
new file mode 100644
index 00000000000..287b34e2258
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FixedSizeBinary extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb) { return getRootAsFixedSizeBinary(_bb, new FixedSizeBinary()); }
+  public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb, FixedSizeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FixedSizeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Number of bytes per value
+   */
+  public int byteWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+
+  public static int createFixedSizeBinary(FlatBufferBuilder builder,
+      int byteWidth) {
+    builder.startTable(1);
+    FixedSizeBinary.addByteWidth(builder, byteWidth);
+    return FixedSizeBinary.endFixedSizeBinary(builder);
+  }
+
+  public static void startFixedSizeBinary(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addByteWidth(FlatBufferBuilder builder, int byteWidth) { builder.addInt(0, byteWidth, 0); }
+  public static int endFixedSizeBinary(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FixedSizeBinary get(int j) { return get(new FixedSizeBinary(), j); }
+    public FixedSizeBinary get(FixedSizeBinary obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java
new file mode 100644
index 00000000000..d0d88923871
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FixedSizeList extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb) { return getRootAsFixedSizeList(_bb, new FixedSizeList()); }
+  public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb, FixedSizeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FixedSizeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Number of list items per value
+   */
+  public int listSize() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+
+  public static int createFixedSizeList(FlatBufferBuilder builder,
+      int listSize) {
+    builder.startTable(1);
+    FixedSizeList.addListSize(builder, listSize);
+    return FixedSizeList.endFixedSizeList(builder);
+  }
+
+  public static void startFixedSizeList(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addListSize(FlatBufferBuilder builder, int listSize) { builder.addInt(0, listSize, 0); }
+  public static int endFixedSizeList(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FixedSizeList get(int j) { return get(new FixedSizeList(), j); }
+    public FixedSizeList get(FixedSizeList obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java
new file mode 100644
index 00000000000..945fa627d4d
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FloatingPoint extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb) { return getRootAsFloatingPoint(_bb, new FloatingPoint()); }
+  public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb, FloatingPoint obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FloatingPoint __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short precision() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+  public static int createFloatingPoint(FlatBufferBuilder builder,
+      short precision) {
+    builder.startTable(1);
+    FloatingPoint.addPrecision(builder, precision);
+    return FloatingPoint.endFloatingPoint(builder);
+  }
+
+  public static void startFloatingPoint(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addPrecision(FlatBufferBuilder builder, short precision) { builder.addShort(0, precision, 0); }
+  public static int endFloatingPoint(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FloatingPoint get(int j) { return get(new FloatingPoint(), j); }
+    public FloatingPoint get(FloatingPoint obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java
new file mode 100644
index 00000000000..86fd75e03bd
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Arrow File metadata
+ *
+ */
+public final class Footer extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Footer getRootAsFooter(ByteBuffer _bb) { return getRootAsFooter(_bb, new Footer()); }
+  public static Footer getRootAsFooter(ByteBuffer _bb, Footer obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Footer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public org.apache.arrow.flatbuf.Schema schema() { return schema(new org.apache.arrow.flatbuf.Schema()); }
+  public org.apache.arrow.flatbuf.Schema schema(org.apache.arrow.flatbuf.Schema obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  public org.apache.arrow.flatbuf.Block dictionaries(int j) { return dictionaries(new org.apache.arrow.flatbuf.Block(), j); }
+  public org.apache.arrow.flatbuf.Block dictionaries(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; }
+  public int dictionariesLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Block.Vector dictionariesVector() { return dictionariesVector(new org.apache.arrow.flatbuf.Block.Vector()); }
+  public org.apache.arrow.flatbuf.Block.Vector dictionariesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; }
+  public org.apache.arrow.flatbuf.Block recordBatches(int j) { return recordBatches(new org.apache.arrow.flatbuf.Block(), j); }
+  public org.apache.arrow.flatbuf.Block recordBatches(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; }
+  public int recordBatchesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector() { return recordBatchesVector(new org.apache.arrow.flatbuf.Block.Vector()); }
+  public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; }
+  /**
+   * User-defined metadata
+   */
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+  public static int createFooter(FlatBufferBuilder builder,
+      short version,
+      int schemaOffset,
+      int dictionariesOffset,
+      int recordBatchesOffset,
+      int custom_metadataOffset) {
+    builder.startTable(5);
+    Footer.addCustomMetadata(builder, custom_metadataOffset);
+    Footer.addRecordBatches(builder, recordBatchesOffset);
+    Footer.addDictionaries(builder, dictionariesOffset);
+    Footer.addSchema(builder, schemaOffset);
+    Footer.addVersion(builder, version);
+    return Footer.endFooter(builder);
+  }
+
+  public static void startFooter(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); }
+  public static void addSchema(FlatBufferBuilder builder, int schemaOffset) { builder.addOffset(1, schemaOffset, 0); }
+  public static void addDictionaries(FlatBufferBuilder builder, int dictionariesOffset) { builder.addOffset(2, dictionariesOffset, 0); }
+  public static void startDictionariesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); }
+  public static void addRecordBatches(FlatBufferBuilder builder, int recordBatchesOffset) { builder.addOffset(3, recordBatchesOffset, 0); }
+  public static void startRecordBatchesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endFooter(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+  public static void finishFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Footer get(int j) { return get(new Footer(), j); }
+    public Footer get(Footer obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java
new file mode 100644
index 00000000000..94cb96a05f3
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Int extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Int getRootAsInt(ByteBuffer _bb) { return getRootAsInt(_bb, new Int()); }
+  public static Int getRootAsInt(ByteBuffer _bb, Int obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Int __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public int bitWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+  public boolean isSigned() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static int createInt(FlatBufferBuilder builder,
+      int bitWidth,
+      boolean is_signed) {
+    builder.startTable(2);
+    Int.addBitWidth(builder, bitWidth);
+    Int.addIsSigned(builder, is_signed);
+    return Int.endInt(builder);
+  }
+
+  public static void startInt(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(0, bitWidth, 0); }
+  public static void addIsSigned(FlatBufferBuilder builder, boolean isSigned) { builder.addBoolean(1, isSigned, false); }
+  public static int endInt(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Int get(int j) { return get(new Int(), j); }
+    public Int get(Int obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java
new file mode 100644
index 00000000000..e690b0badde
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Interval extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Interval getRootAsInterval(ByteBuffer _bb) { return getRootAsInterval(_bb, new Interval()); }
+  public static Interval getRootAsInterval(ByteBuffer _bb, Interval obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Interval __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+  public static int createInterval(FlatBufferBuilder builder,
+      short unit) {
+    builder.startTable(1);
+    Interval.addUnit(builder, unit);
+    return Interval.endInterval(builder);
+  }
+
+  public static void startInterval(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); }
+  public static int endInterval(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Interval get(int j) { return get(new Interval(), j); }
+    public Interval get(Interval obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java
new file mode 100644
index 00000000000..76b689bccd1
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class IntervalUnit {
+  private IntervalUnit() { }
+  public static final short YEAR_MONTH = 0;
+  public static final short DAY_TIME = 1;
+
+  public static final String[] names = { "YEAR_MONTH", "DAY_TIME", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java b/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java
new file mode 100644
index 00000000000..0c6e9f66ea8
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * user defined key value pairs to add custom metadata to arrow
+ * key namespacing is the responsibility of the user
+ */
+public final class KeyValue extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static KeyValue getRootAsKeyValue(ByteBuffer _bb) { return getRootAsKeyValue(_bb, new KeyValue()); }
+  public static KeyValue getRootAsKeyValue(ByteBuffer _bb, KeyValue obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public KeyValue __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public String key() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer keyAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
+  public ByteBuffer keyInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); }
+  public String value() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer valueAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+  public ByteBuffer valueInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+  public static int createKeyValue(FlatBufferBuilder builder,
+      int keyOffset,
+      int valueOffset) {
+    builder.startTable(2);
+    KeyValue.addValue(builder, valueOffset);
+    KeyValue.addKey(builder, keyOffset);
+    return KeyValue.endKeyValue(builder);
+  }
+
+  public static void startKeyValue(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addKey(FlatBufferBuilder builder, int keyOffset) { builder.addOffset(0, keyOffset, 0); }
+  public static void addValue(FlatBufferBuilder builder, int valueOffset) { builder.addOffset(1, valueOffset, 0); }
+  public static int endKeyValue(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public KeyValue get(int j) { return get(new KeyValue(), j); }
+    public KeyValue get(KeyValue obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java
new file mode 100644
index 00000000000..b7377bbe947
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as Binary, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeBinary extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb) { return getRootAsLargeBinary(_bb, new LargeBinary()); }
+  public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb, LargeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public LargeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startLargeBinary(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endLargeBinary(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public LargeBinary get(int j) { return get(new LargeBinary(), j); }
+    public LargeBinary get(LargeBinary obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java
new file mode 100644
index 00000000000..32cc0034c46
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as List, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeList extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static LargeList getRootAsLargeList(ByteBuffer _bb) { return getRootAsLargeList(_bb, new LargeList()); }
+  public static LargeList getRootAsLargeList(ByteBuffer _bb, LargeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public LargeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startLargeList(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endLargeList(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public LargeList get(int j) { return get(new LargeList(), j); }
+    public LargeList get(LargeList obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java
new file mode 100644
index 00000000000..7e7a20117de
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as Utf8, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeUtf8 extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb) { return getRootAsLargeUtf8(_bb, new LargeUtf8()); }
+  public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb, LargeUtf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public LargeUtf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startLargeUtf8(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endLargeUtf8(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public LargeUtf8 get(int j) { return get(new LargeUtf8(), j); }
+    public LargeUtf8 get(LargeUtf8 obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/List.java b/java/format/src/main/java/org/apache/arrow/flatbuf/List.java
new file mode 100644
index 00000000000..4493f9c5b3e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/List.java
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class List extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static List getRootAsList(ByteBuffer _bb) { return getRootAsList(_bb, new List()); }
+  public static List getRootAsList(ByteBuffer _bb, List obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public List __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startList(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endList(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public List get(int j) { return get(new List(), j); }
+    public List get(List obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java
new file mode 100644
index 00000000000..704426e92d4
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A Map is a logical nested type that is represented as
+ *
+ * List<entries: Struct<key: K, value: V>>
+ *
+ * In this layout, the keys and values are each respectively contiguous. We do
+ * not constrain the key and value types, so the application is responsible
+ * for ensuring that the keys are hashable and unique. Whether the keys are sorted
+ * may be set in the metadata for this field.
+ *
+ * In a field with Map type, the field has a child Struct field, which then
+ * has two children: key type and the second the value type. The names of the
+ * child fields may be respectively "entries", "key", and "value", but this is
+ * not enforced.
+ *
+ * Map
+ * ```text
+ *   - child[0] entries: Struct
+ *     - child[0] key: K
+ *     - child[1] value: V
+ * ```
+ * Neither the "entries" field nor the "key" field may be nullable.
+ *
+ * The metadata is structured so that Arrow systems without special handling
+ * for Map can make Map an alias for List. The "layout" attribute for the Map
+ * field must have the same contents as a List.
+ */
+public final class Map extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Map getRootAsMap(ByteBuffer _bb) { return getRootAsMap(_bb, new Map()); }
+  public static Map getRootAsMap(ByteBuffer _bb, Map obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Map __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Set to true if the keys within each value are sorted
+   */
+  public boolean keysSorted() { int o = __offset(4); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static int createMap(FlatBufferBuilder builder,
+      boolean keysSorted) {
+    builder.startTable(1);
+    Map.addKeysSorted(builder, keysSorted);
+    return Map.endMap(builder);
+  }
+
+  public static void startMap(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addKeysSorted(FlatBufferBuilder builder, boolean keysSorted) { builder.addBoolean(0, keysSorted, false); }
+  public static int endMap(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Map get(int j) { return get(new Map(), j); }
+    public Map get(Map obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java
new file mode 100644
index 00000000000..c7738ad95a2
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Message extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Message getRootAsMessage(ByteBuffer _bb) { return getRootAsMessage(_bb, new Message()); }
+  public static Message getRootAsMessage(ByteBuffer _bb, Message obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Message __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public byte headerType() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  public Table header(Table obj) { int o = __offset(8); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  public long bodyLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+  public static int createMessage(FlatBufferBuilder builder,
+      short version,
+      byte header_type,
+      int headerOffset,
+      long bodyLength,
+      int custom_metadataOffset) {
+    builder.startTable(5);
+    Message.addBodyLength(builder, bodyLength);
+    Message.addCustomMetadata(builder, custom_metadataOffset);
+    Message.addHeader(builder, headerOffset);
+    Message.addVersion(builder, version);
+    Message.addHeaderType(builder, header_type);
+    return Message.endMessage(builder);
+  }
+
+  public static void startMessage(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); }
+  public static void addHeaderType(FlatBufferBuilder builder, byte headerType) { builder.addByte(1, headerType, 0); }
+  public static void addHeader(FlatBufferBuilder builder, int headerOffset) { builder.addOffset(2, headerOffset, 0); }
+  public static void addBodyLength(FlatBufferBuilder builder, long bodyLength) { builder.addLong(3, bodyLength, 0L); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endMessage(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+  public static void finishMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Message get(int j) { return get(new Message(), j); }
+    public Message get(Message obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java b/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java
new file mode 100644
index 00000000000..179b6ba0f54
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * The root Message type
+ * This union enables us to easily send different message types without
+ * redundant storage, and in the future we can easily add new message types.
+ *
+ * Arrow implementations do not need to implement all of the message types,
+ * which may include experimental metadata types. For maximum compatibility,
+ * it is best to send data using RecordBatch
+ */
+public final class MessageHeader {
+  private MessageHeader() { }
+  public static final byte NONE = 0;
+  public static final byte Schema = 1;
+  public static final byte DictionaryBatch = 2;
+  public static final byte RecordBatch = 3;
+  public static final byte Tensor = 4;
+  public static final byte SparseTensor = 5;
+
+  public static final String[] names = { "NONE", "Schema", "DictionaryBatch", "RecordBatch", "Tensor", "SparseTensor", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java b/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java
new file mode 100644
index 00000000000..8ce9d84fc2b
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class MetadataVersion {
+  private MetadataVersion() { }
+  /**
+   * 0.1.0 (October 2016).
+   */
+  public static final short V1 = 0;
+  /**
+   * 0.2.0 (February 2017). Non-backwards compatible with V1.
+   */
+  public static final short V2 = 1;
+  /**
+   * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
+   */
+  public static final short V3 = 2;
+  /**
+   * >= 0.8.0 (December 2017). Non-backwards compatible with V3.
+   */
+  public static final short V4 = 3;
+  /**
+   * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+   * metadata and IPC messages). Implementations are recommended to provide a
+   * V4 compatibility mode with V5 format changes disabled.
+   *
+   * Incompatible changes between V4 and V5:
+   * - Union buffer layout has changed. In V5, Unions don't have a validity
+   *   bitmap buffer.
+   */
+  public static final short V5 = 4;
+
+  public static final String[] names = { "V1", "V2", "V3", "V4", "V5", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java
new file mode 100644
index 00000000000..b7a30f2e822
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * These are stored in the flatbuffer in the Type union below
+ */
+public final class Null extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Null getRootAsNull(ByteBuffer _bb) { return getRootAsNull(_bb, new Null()); }
+  public static Null getRootAsNull(ByteBuffer _bb, Null obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Null __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startNull(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endNull(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Null get(int j) { return get(new Null(), j); }
+    public Null get(Null obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java
new file mode 100644
index 00000000000..e2c42237a67
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class Precision {
+  private Precision() { }
+  public static final short HALF = 0;
+  public static final short SINGLE = 1;
+  public static final short DOUBLE = 2;
+
+  public static final String[] names = { "HALF", "SINGLE", "DOUBLE", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
new file mode 100644
index 00000000000..eb814e07dcc
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A data header describing the shared memory layout of a "record" or "row"
+ * batch. Some systems call this a "row batch" internally and others a "record
+ * batch".
+ */
+public final class RecordBatch extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb) { return getRootAsRecordBatch(_bb, new RecordBatch()); }
+  public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb, RecordBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public RecordBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * number of records / rows. The arrays in the batch should all have this
+   * length
+   */
+  public long length() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  /**
+   * Nodes correspond to the pre-ordered flattened logical schema
+   */
+  public org.apache.arrow.flatbuf.FieldNode nodes(int j) { return nodes(new org.apache.arrow.flatbuf.FieldNode(), j); }
+  public org.apache.arrow.flatbuf.FieldNode nodes(org.apache.arrow.flatbuf.FieldNode obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int nodesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector() { return nodesVector(new org.apache.arrow.flatbuf.FieldNode.Vector()); }
+  public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector(org.apache.arrow.flatbuf.FieldNode.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * Buffers correspond to the pre-ordered flattened buffer tree
+   *
+   * The number of buffers appended to this list depends on the schema. For
+   * example, most primitive arrays will have 2 buffers, 1 for the validity
+   * bitmap and 1 for the values. For struct arrays, there will only be a
+   * single buffer for the validity (nulls) bitmap
+   */
+  public org.apache.arrow.flatbuf.Buffer buffers(int j) { return buffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+  public org.apache.arrow.flatbuf.Buffer buffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int buffersLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Buffer.Vector buffersVector() { return buffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+  public org.apache.arrow.flatbuf.Buffer.Vector buffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * Optional compression of the message body
+   */
+  public org.apache.arrow.flatbuf.BodyCompression compression() { return compression(new org.apache.arrow.flatbuf.BodyCompression()); }
+  public org.apache.arrow.flatbuf.BodyCompression compression(org.apache.arrow.flatbuf.BodyCompression obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+
+  public static int createRecordBatch(FlatBufferBuilder builder,
+      long length,
+      int nodesOffset,
+      int buffersOffset,
+      int compressionOffset) {
+    builder.startTable(4);
+    RecordBatch.addLength(builder, length);
+    RecordBatch.addCompression(builder, compressionOffset);
+    RecordBatch.addBuffers(builder, buffersOffset);
+    RecordBatch.addNodes(builder, nodesOffset);
+    return RecordBatch.endRecordBatch(builder);
+  }
+
+  public static void startRecordBatch(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addLength(FlatBufferBuilder builder, long length) { builder.addLong(0, length, 0L); }
+  public static void addNodes(FlatBufferBuilder builder, int nodesOffset) { builder.addOffset(1, nodesOffset, 0); }
+  public static void startNodesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addBuffers(FlatBufferBuilder builder, int buffersOffset) { builder.addOffset(2, buffersOffset, 0); }
+  public static void startBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addCompression(FlatBufferBuilder builder, int compressionOffset) { builder.addOffset(3, compressionOffset, 0); }
+  public static int endRecordBatch(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public RecordBatch get(int j) { return get(new RecordBatch(), j); }
+    public RecordBatch get(RecordBatch obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java
new file mode 100644
index 00000000000..69c025254b2
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A Schema describes the columns in a row batch
+ */
+public final class Schema extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Schema getRootAsSchema(ByteBuffer _bb) { return getRootAsSchema(_bb, new Schema()); }
+  public static Schema getRootAsSchema(ByteBuffer _bb, Schema obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Schema __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * endianness of the buffer
+   * it is Little Endian by default
+   * if endianness doesn't match the underlying system then the vectors need to be converted
+   */
+  public short endianness() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public org.apache.arrow.flatbuf.Field fields(int j) { return fields(new org.apache.arrow.flatbuf.Field(), j); }
+  public org.apache.arrow.flatbuf.Field fields(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int fieldsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Field.Vector fieldsVector() { return fieldsVector(new org.apache.arrow.flatbuf.Field.Vector()); }
+  public org.apache.arrow.flatbuf.Field.Vector fieldsVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * Features used in the stream/file.
+   */
+  public long features(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+  public int featuresLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public LongVector featuresVector() { return featuresVector(new LongVector()); }
+  public LongVector featuresVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer featuresAsByteBuffer() { return __vector_as_bytebuffer(10, 8); }
+  public ByteBuffer featuresInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); }
+
+  public static int createSchema(FlatBufferBuilder builder,
+      short endianness,
+      int fieldsOffset,
+      int custom_metadataOffset,
+      int featuresOffset) {
+    builder.startTable(4);
+    Schema.addFeatures(builder, featuresOffset);
+    Schema.addCustomMetadata(builder, custom_metadataOffset);
+    Schema.addFields(builder, fieldsOffset);
+    Schema.addEndianness(builder, endianness);
+    return Schema.endSchema(builder);
+  }
+
+  public static void startSchema(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addEndianness(FlatBufferBuilder builder, short endianness) { builder.addShort(0, endianness, 0); }
+  public static void addFields(FlatBufferBuilder builder, int fieldsOffset) { builder.addOffset(1, fieldsOffset, 0); }
+  public static int createFieldsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startFieldsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(2, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addFeatures(FlatBufferBuilder builder, int featuresOffset) { builder.addOffset(3, featuresOffset, 0); }
+  public static int createFeaturesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+  public static void startFeaturesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+  public static int endSchema(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+  public static void finishSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Schema get(int j) { return get(new Schema(), j); }
+    public Schema get(Schema obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java
new file mode 100644
index 00000000000..2ad314f2e85
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class SparseMatrixCompressedAxis {
+  private SparseMatrixCompressedAxis() { }
+  public static final short Row = 0;
+  public static final short Column = 1;
+
+  public static final String[] names = { "Row", "Column", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java
new file mode 100644
index 00000000000..9516a6ec146
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Compressed Sparse format, that is matrix-specific.
+ */
+public final class SparseMatrixIndexCSX extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb) { return getRootAsSparseMatrixIndexCSX(_bb, new SparseMatrixIndexCSX()); }
+  public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb, SparseMatrixIndexCSX obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseMatrixIndexCSX __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Which axis, row or column, is compressed
+   */
+  public short compressedAxis() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  /**
+   * The type of values in indptrBuffer
+   */
+  public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indptrBuffer stores the location and size of indptr array that
+   * represents the range of the rows.
+   * The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+   * The length of this array is 1 + (the number of rows), and the type
+   * of index value is long.
+   *
+   * For example, let X be the following 6x4 matrix:
+   * ```text
+   *   X := [[0, 1, 2, 0],
+   *         [0, 0, 3, 0],
+   *         [0, 4, 0, 5],
+   *         [0, 0, 0, 0],
+   *         [6, 0, 7, 8],
+   *         [0, 9, 0, 0]].
+   * ```
+   * The array of non-zero values in X is:
+   * ```text
+   *   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+   * ```
+   * And the indptr of X is:
+   * ```text
+   *   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+   * ```
+   */
+  public org.apache.arrow.flatbuf.Buffer indptrBuffer() { return indptrBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer indptrBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+  /**
+   * The type of values in indicesBuffer
+   */
+  public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indicesBuffer stores the location and size of the array that
+   * contains the column indices of the corresponding non-zero values.
+   * The type of index value is long.
+   *
+   * For example, the indices of the above X is:
+   * ```text
+   *   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+   * ```
+   * Note that the indices are sorted in lexicographical order for each row.
+   */
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+  public static void startSparseMatrixIndexCSX(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addCompressedAxis(FlatBufferBuilder builder, short compressedAxis) { builder.addShort(0, compressedAxis, 0); }
+  public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(1, indptrTypeOffset, 0); }
+  public static void addIndptrBuffer(FlatBufferBuilder builder, int indptrBufferOffset) { builder.addStruct(2, indptrBufferOffset, 0); }
+  public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(3, indicesTypeOffset, 0); }
+  public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(4, indicesBufferOffset, 0); }
+  public static int endSparseMatrixIndexCSX(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 6);  // indptrType
+    builder.required(o, 8);  // indptrBuffer
+    builder.required(o, 10);  // indicesType
+    builder.required(o, 12);  // indicesBuffer
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseMatrixIndexCSX get(int j) { return get(new SparseMatrixIndexCSX(), j); }
+    public SparseMatrixIndexCSX get(SparseMatrixIndexCSX obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java
new file mode 100644
index 00000000000..9b4cdf6e891
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class SparseTensor extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb) { return getRootAsSparseTensor(_bb, new SparseTensor()); }
+  public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb, SparseTensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseTensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * The type of data contained in a value cell.
+   * Currently only fixed-width value types are supported,
+   * no strings or nested types.
+   */
+  public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * The dimensions of the tensor, optionally named.
+   */
+  public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); }
+  public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * The number of non-zero values in a sparse tensor.
+   */
+  public long nonZeroLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  public byte sparseIndexType() { int o = __offset(12); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * Sparse tensor index
+   */
+  public Table sparseIndex(Table obj) { int o = __offset(14); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * The location and size of the tensor's data
+   */
+  public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(16); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+  public static void startSparseTensor(FlatBufferBuilder builder) { builder.startTable(7); }
+  public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); }
+  public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); }
+  public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); }
+  public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addNonZeroLength(FlatBufferBuilder builder, long nonZeroLength) { builder.addLong(3, nonZeroLength, 0L); }
+  public static void addSparseIndexType(FlatBufferBuilder builder, byte sparseIndexType) { builder.addByte(4, sparseIndexType, 0); }
+  public static void addSparseIndex(FlatBufferBuilder builder, int sparseIndexOffset) { builder.addOffset(5, sparseIndexOffset, 0); }
+  public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(6, dataOffset, 0); }
+  public static int endSparseTensor(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 6);  // type
+    builder.required(o, 8);  // shape
+    builder.required(o, 14);  // sparseIndex
+    builder.required(o, 16);  // data
+    return o;
+  }
+  public static void finishSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseTensor get(int j) { return get(new SparseTensor(), j); }
+    public SparseTensor get(SparseTensor obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java
new file mode 100644
index 00000000000..5b9444abcf0
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class SparseTensorIndex {
+  private SparseTensorIndex() { }
+  public static final byte NONE = 0;
+  public static final byte SparseTensorIndexCOO = 1;
+  public static final byte SparseMatrixIndexCSX = 2;
+  public static final byte SparseTensorIndexCSF = 3;
+
+  public static final String[] names = { "NONE", "SparseTensorIndexCOO", "SparseMatrixIndexCSX", "SparseTensorIndexCSF", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java
new file mode 100644
index 00000000000..a84238d662d
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * EXPERIMENTAL: Data structures for sparse tensors
+ * Coordinate (COO) format of sparse tensor index.
+ *
+ * COO's index list are represented as a NxM matrix,
+ * where N is the number of non-zero values,
+ * and M is the number of dimensions of a sparse tensor.
+ *
+ * indicesBuffer stores the location and size of the data of this indices
+ * matrix.  The value type and the stride of the indices matrix is
+ * specified in indicesType and indicesStrides fields.
+ *
+ * For example, let X be a 2x3x4x5 tensor, and it has the following
+ * 6 non-zero values:
+ * ```text
+ *   X[0, 1, 2, 0] := 1
+ *   X[1, 1, 2, 3] := 2
+ *   X[0, 2, 1, 0] := 3
+ *   X[0, 1, 3, 0] := 4
+ *   X[0, 1, 2, 1] := 5
+ *   X[1, 2, 0, 4] := 6
+ * ```
+ * In COO format, the index matrix of X is the following 4x6 matrix:
+ * ```text
+ *   [[0, 0, 0, 0, 1, 1],
+ *    [1, 1, 1, 2, 1, 2],
+ *    [2, 2, 3, 1, 2, 0],
+ *    [0, 1, 0, 0, 3, 4]]
+ * ```
+ * When isCanonical is true, the indices is sorted in lexicographical order
+ * (row-major order), and it does not have duplicated entries.  Otherwise,
+ * the indices may not be sorted, or may have duplicated entries.
+ */
+public final class SparseTensorIndexCOO extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb) { return getRootAsSparseTensorIndexCOO(_bb, new SparseTensorIndexCOO()); }
+  public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb, SparseTensorIndexCOO obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseTensorIndexCOO __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The type of values in indicesBuffer
+   */
+  public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * Non-negative byte offsets to advance one value cell along each dimension
+   * If omitted, default to row-major order (C-like).
+   */
+  public long indicesStrides(int j) { int o = __offset(6); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+  public int indicesStridesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public LongVector indicesStridesVector() { return indicesStridesVector(new LongVector()); }
+  public LongVector indicesStridesVector(LongVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer indicesStridesAsByteBuffer() { return __vector_as_bytebuffer(6, 8); }
+  public ByteBuffer indicesStridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 8); }
+  /**
+   * The location and size of the indices matrix's data
+   */
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+  /**
+   * This flag is true if and only if the indices matrix is sorted in
+   * row-major order, and does not have duplicated entries.
+   * This sort order is the same as of Tensorflow's SparseTensor,
+   * but it is inverse order of SciPy's canonical coo_matrix
+   * (SciPy employs column-major order for its coo_matrix).
+   */
+  public boolean isCanonical() { int o = __offset(10); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static void startSparseTensorIndexCOO(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(0, indicesTypeOffset, 0); }
+  public static void addIndicesStrides(FlatBufferBuilder builder, int indicesStridesOffset) { builder.addOffset(1, indicesStridesOffset, 0); }
+  public static int createIndicesStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+  public static void startIndicesStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+  public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(2, indicesBufferOffset, 0); }
+  public static void addIsCanonical(FlatBufferBuilder builder, boolean isCanonical) { builder.addBoolean(3, isCanonical, false); }
+  public static int endSparseTensorIndexCOO(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 4);  // indicesType
+    builder.required(o, 8);  // indicesBuffer
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseTensorIndexCOO get(int j) { return get(new SparseTensorIndexCOO(), j); }
+    public SparseTensorIndexCOO get(SparseTensorIndexCOO obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java
new file mode 100644
index 00000000000..abc4662be18
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java
@@ -0,0 +1,173 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Compressed Sparse Fiber (CSF) sparse tensor index.
+ */
+public final class SparseTensorIndexCSF extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb) { return getRootAsSparseTensorIndexCSF(_bb, new SparseTensorIndexCSF()); }
+  public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb, SparseTensorIndexCSF obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseTensorIndexCSF __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * CSF is a generalization of compressed sparse row (CSR) index.
+   * See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
+   *
+   * CSF index recursively compresses each dimension of a tensor into a set
+   * of prefix trees. Each path from a root to leaf forms one tensor
+   * non-zero index. CSF is implemented with two arrays of buffers and one
+   * arrays of integers.
+   *
+   * For example, let X be a 2x3x4x5 tensor and let it have the following
+   * 8 non-zero values:
+   * ```text
+   *   X[0, 0, 0, 1] := 1
+   *   X[0, 0, 0, 2] := 2
+   *   X[0, 1, 0, 0] := 3
+   *   X[0, 1, 0, 2] := 4
+   *   X[0, 1, 1, 0] := 5
+   *   X[1, 1, 1, 0] := 6
+   *   X[1, 1, 1, 1] := 7
+   *   X[1, 1, 1, 2] := 8
+   * ```
+   * As a prefix tree this would be represented as:
+   * ```text
+   *         0          1
+   *        / \         |
+   *       0   1        1
+   *      /   / \       |
+   *     0   0   1      1
+   *    /|  /|   |    /| |
+   *   1 2 0 2   0   0 1 2
+   * ```
+   * The type of values in indptrBuffers
+   */
+  public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indptrBuffers stores the sparsity structure.
+   * Each two consecutive dimensions in a tensor correspond to a buffer in
+   * indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
+   * and `indptrBuffers[dim][i + 1]` signify a range of nodes in
+   * `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
+   *
+   * For example, the indptrBuffers for the above X is:
+   * ```text
+   *   indptrBuffer(X) = [
+   *                       [0, 2, 3],
+   *                       [0, 1, 3, 4],
+   *                       [0, 2, 4, 5, 8]
+   *                     ].
+   * ```
+   */
+  public org.apache.arrow.flatbuf.Buffer indptrBuffers(int j) { return indptrBuffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+  public org.apache.arrow.flatbuf.Buffer indptrBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int indptrBuffersLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector() { return indptrBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+  public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * The type of values in indicesBuffers
+   */
+  public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indicesBuffers stores values of nodes.
+   * Each tensor dimension corresponds to a buffer in indicesBuffers.
+   * For example, the indicesBuffers for the above X is:
+   * ```text
+   *   indicesBuffer(X) = [
+   *                        [0, 1],
+   *                        [0, 1, 1],
+   *                        [0, 0, 1, 1],
+   *                        [1, 2, 0, 2, 0, 0, 1, 2]
+   *                      ].
+   * ```
+   */
+  public org.apache.arrow.flatbuf.Buffer indicesBuffers(int j) { return indicesBuffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+  public org.apache.arrow.flatbuf.Buffer indicesBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int indicesBuffersLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector() { return indicesBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+  public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * axisOrder stores the sequence in which dimensions were traversed to
+   * produce the prefix tree.
+   * For example, the axisOrder for the above X is:
+   * ```text
+   *   axisOrder(X) = [0, 1, 2, 3].
+   * ```
+   */
+  public int axisOrder(int j) { int o = __offset(12); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; }
+  public int axisOrderLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+  public IntVector axisOrderVector() { return axisOrderVector(new IntVector()); }
+  public IntVector axisOrderVector(IntVector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer axisOrderAsByteBuffer() { return __vector_as_bytebuffer(12, 4); }
+  public ByteBuffer axisOrderInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 12, 4); }
+
+  public static int createSparseTensorIndexCSF(FlatBufferBuilder builder,
+      int indptrTypeOffset,
+      int indptrBuffersOffset,
+      int indicesTypeOffset,
+      int indicesBuffersOffset,
+      int axisOrderOffset) {
+    builder.startTable(5);
+    SparseTensorIndexCSF.addAxisOrder(builder, axisOrderOffset);
+    SparseTensorIndexCSF.addIndicesBuffers(builder, indicesBuffersOffset);
+    SparseTensorIndexCSF.addIndicesType(builder, indicesTypeOffset);
+    SparseTensorIndexCSF.addIndptrBuffers(builder, indptrBuffersOffset);
+    SparseTensorIndexCSF.addIndptrType(builder, indptrTypeOffset);
+    return SparseTensorIndexCSF.endSparseTensorIndexCSF(builder);
+  }
+
+  public static void startSparseTensorIndexCSF(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(0, indptrTypeOffset, 0); }
+  public static void addIndptrBuffers(FlatBufferBuilder builder, int indptrBuffersOffset) { builder.addOffset(1, indptrBuffersOffset, 0); }
+  public static void startIndptrBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(2, indicesTypeOffset, 0); }
+  public static void addIndicesBuffers(FlatBufferBuilder builder, int indicesBuffersOffset) { builder.addOffset(3, indicesBuffersOffset, 0); }
+  public static void startIndicesBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addAxisOrder(FlatBufferBuilder builder, int axisOrderOffset) { builder.addOffset(4, axisOrderOffset, 0); }
+  public static int createAxisOrderVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); }
+  public static void startAxisOrderVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endSparseTensorIndexCSF(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 4);  // indptrType
+    builder.required(o, 6);  // indptrBuffers
+    builder.required(o, 8);  // indicesType
+    builder.required(o, 10);  // indicesBuffers
+    builder.required(o, 12);  // axisOrder
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseTensorIndexCSF get(int j) { return get(new SparseTensorIndexCSF(), j); }
+    public SparseTensorIndexCSF get(SparseTensorIndexCSF obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java
new file mode 100644
index 00000000000..1285f288430
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
+ * (according to the physical memory layout). We used Struct_ here as
+ * Struct is a reserved word in Flatbuffers
+ */
+public final class Struct_ extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Struct_ getRootAsStruct_(ByteBuffer _bb) { return getRootAsStruct_(_bb, new Struct_()); }
+  public static Struct_ getRootAsStruct_(ByteBuffer _bb, Struct_ obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Struct_ __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startStruct_(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endStruct_(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Struct_ get(int j) { return get(new Struct_(), j); }
+    public Struct_ get(Struct_ obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java
new file mode 100644
index 00000000000..d4466bcf2f5
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Tensor extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Tensor getRootAsTensor(ByteBuffer _bb) { return getRootAsTensor(_bb, new Tensor()); }
+  public static Tensor getRootAsTensor(ByteBuffer _bb, Tensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Tensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * The type of data contained in a value cell. Currently only fixed-width
+   * value types are supported, no strings or nested types
+   */
+  public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * The dimensions of the tensor, optionally named
+   */
+  public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); }
+  public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * Non-negative byte offsets to advance one value cell along each dimension
+   * If omitted, default to row-major order (C-like).
+   */
+  public long strides(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+  public int stridesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public LongVector stridesVector() { return stridesVector(new LongVector()); }
+  public LongVector stridesVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer stridesAsByteBuffer() { return __vector_as_bytebuffer(10, 8); }
+  public ByteBuffer stridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); }
+  /**
+   * The location and size of the tensor's data
+   */
+  public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+  public static void startTensor(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); }
+  public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); }
+  public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); }
+  public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addStrides(FlatBufferBuilder builder, int stridesOffset) { builder.addOffset(3, stridesOffset, 0); }
+  public static int createStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+  public static void startStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+  public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(4, dataOffset, 0); }
+  public static int endTensor(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 6);  // type
+    builder.required(o, 8);  // shape
+    builder.required(o, 12);  // data
+    return o;
+  }
+  public static void finishTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Tensor get(int j) { return get(new Tensor(), j); }
+    public Tensor get(Tensor obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java b/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java
new file mode 100644
index 00000000000..fad8caacd2e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Data structures for dense tensors
+ * Shape data for a single axis in a tensor
+ */
+public final class TensorDim extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static TensorDim getRootAsTensorDim(ByteBuffer _bb) { return getRootAsTensorDim(_bb, new TensorDim()); }
+  public static TensorDim getRootAsTensorDim(ByteBuffer _bb, TensorDim obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public TensorDim __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Length of dimension
+   */
+  public long size() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  /**
+   * Name of the dimension, optional
+   */
+  public String name() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+  public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+  public static int createTensorDim(FlatBufferBuilder builder,
+      long size,
+      int nameOffset) {
+    builder.startTable(2);
+    TensorDim.addSize(builder, size);
+    TensorDim.addName(builder, nameOffset);
+    return TensorDim.endTensorDim(builder);
+  }
+
+  public static void startTensorDim(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addSize(FlatBufferBuilder builder, long size) { builder.addLong(0, size, 0L); }
+  public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(1, nameOffset, 0); }
+  public static int endTensorDim(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public TensorDim get(int j) { return get(new TensorDim(), j); }
+    public TensorDim get(TensorDim obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
new file mode 100644
index 00000000000..596d403a3ea
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Time type. The physical storage type depends on the unit
+ * - SECOND and MILLISECOND: 32 bits
+ * - MICROSECOND and NANOSECOND: 64 bits
+ */
+public final class Time extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Time getRootAsTime(ByteBuffer _bb) { return getRootAsTime(_bb, new Time()); }
+  public static Time getRootAsTime(ByteBuffer _bb, Time obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Time __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+  public int bitWidth() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 32; }
+
+  public static int createTime(FlatBufferBuilder builder,
+      short unit,
+      int bitWidth) {
+    builder.startTable(2);
+    Time.addBitWidth(builder, bitWidth);
+    Time.addUnit(builder, unit);
+    return Time.endTime(builder);
+  }
+
+  public static void startTime(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+  public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(1, bitWidth, 32); }
+  public static int endTime(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Time get(int j) { return get(new Time(), j); }
+    public Time get(Time obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java
new file mode 100644
index 00000000000..828e44c13f9
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class TimeUnit {
+  private TimeUnit() { }
+  public static final short SECOND = 0;
+  public static final short MILLISECOND = 1;
+  public static final short MICROSECOND = 2;
+  public static final short NANOSECOND = 3;
+
+  public static final String[] names = { "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
new file mode 100644
index 00000000000..66f9cc8911b
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
+ * leap seconds, as a 64-bit integer. Note that UNIX time does not include
+ * leap seconds.
+ *
+ * The Timestamp metadata supports both "time zone naive" and "time zone
+ * aware" timestamps. Read about the timezone attribute for more detail
+ */
+public final class Timestamp extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Timestamp getRootAsTimestamp(ByteBuffer _bb) { return getRootAsTimestamp(_bb, new Timestamp()); }
+  public static Timestamp getRootAsTimestamp(ByteBuffer _bb, Timestamp obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Timestamp __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  /**
+   * The time zone is a string indicating the name of a time zone, one of:
+   *
+   * * As used in the Olson time zone database (the "tz database" or
+   *   "tzdata"), such as "America/New_York"
+   * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+   *
+   * Whether a timezone string is present indicates different semantics about
+   * the data:
+   *
+   * * If the time zone is null or equal to an empty string, the data is "time
+   *   zone naive" and shall be displayed *as is* to the user, not localized
+   *   to the locale of the user. This data can be though of as UTC but
+   *   without having "UTC" as the time zone, it is not considered to be
+   *   localized to any time zone
+   *
+   * * If the time zone is set to a valid value, values can be displayed as
+   *   "localized" to that time zone, even though the underlying 64-bit
+   *   integers are identical to the same data stored in UTC. Converting
+   *   between time zones is a metadata-only operation and does not change the
+   *   underlying values
+   */
+  public String timezone() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer timezoneAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+  public ByteBuffer timezoneInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+  public static int createTimestamp(FlatBufferBuilder builder,
+      short unit,
+      int timezoneOffset) {
+    builder.startTable(2);
+    Timestamp.addTimezone(builder, timezoneOffset);
+    Timestamp.addUnit(builder, unit);
+    return Timestamp.endTimestamp(builder);
+  }
+
+  public static void startTimestamp(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); }
+  public static void addTimezone(FlatBufferBuilder builder, int timezoneOffset) { builder.addOffset(1, timezoneOffset, 0); }
+  public static int endTimestamp(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Timestamp get(int j) { return get(new Timestamp(), j); }
+    public Timestamp get(Timestamp obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
new file mode 100644
index 00000000000..5f1a550cfff
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Top-level Type value, enabling extensible type-specific metadata. We can
+ * add new logical types to Type without breaking backwards compatibility
+ */
+public final class Type {
+  private Type() { }
+  public static final byte NONE = 0;
+  public static final byte Null = 1;
+  public static final byte Int = 2;
+  public static final byte FloatingPoint = 3;
+  public static final byte Binary = 4;
+  public static final byte Utf8 = 5;
+  public static final byte Bool = 6;
+  public static final byte Decimal = 7;
+  public static final byte Date = 8;
+  public static final byte Time = 9;
+  public static final byte Timestamp = 10;
+  public static final byte Interval = 11;
+  public static final byte List = 12;
+  public static final byte Struct_ = 13;
+  public static final byte Union = 14;
+  public static final byte FixedSizeBinary = 15;
+  public static final byte FixedSizeList = 16;
+  public static final byte Map = 17;
+  public static final byte Duration = 18;
+  public static final byte LargeBinary = 19;
+  public static final byte LargeUtf8 = 20;
+  public static final byte LargeList = 21;
+
+  public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java
new file mode 100644
index 00000000000..7e282243425
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A union is a complex type with children in Field
+ * By default ids in the type vector refer to the offsets in the children
+ * optionally typeIds provides an indirection between the child offset and the type id
+ * for each child `typeIds[offset]` is the id used in the type vector
+ */
+public final class Union extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Union getRootAsUnion(ByteBuffer _bb) { return getRootAsUnion(_bb, new Union()); }
+  public static Union getRootAsUnion(ByteBuffer _bb, Union obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Union __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short mode() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public int typeIds(int j) { int o = __offset(6); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; }
+  public int typeIdsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public IntVector typeIdsVector() { return typeIdsVector(new IntVector()); }
+  public IntVector typeIdsVector(IntVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer typeIdsAsByteBuffer() { return __vector_as_bytebuffer(6, 4); }
+  public ByteBuffer typeIdsInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 4); }
+
+  public static int createUnion(FlatBufferBuilder builder,
+      short mode,
+      int typeIdsOffset) {
+    builder.startTable(2);
+    Union.addTypeIds(builder, typeIdsOffset);
+    Union.addMode(builder, mode);
+    return Union.endUnion(builder);
+  }
+
+  public static void startUnion(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addMode(FlatBufferBuilder builder, short mode) { builder.addShort(0, mode, 0); }
+  public static void addTypeIds(FlatBufferBuilder builder, int typeIdsOffset) { builder.addOffset(1, typeIdsOffset, 0); }
+  public static int createTypeIdsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); }
+  public static void startTypeIdsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endUnion(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Union get(int j) { return get(new Union(), j); }
+    public Union get(Union obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java b/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java
new file mode 100644
index 00000000000..23a6013f8e4
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class UnionMode {
+  private UnionMode() { }
+  public static final short Sparse = 0;
+  public static final short Dense = 1;
+
+  public static final String[] names = { "Sparse", "Dense", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java
new file mode 100644
index 00000000000..d77fe205f42
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Unicode with UTF-8 encoding
+ */
+public final class Utf8 extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Utf8 getRootAsUtf8(ByteBuffer _bb) { return getRootAsUtf8(_bb, new Utf8()); }
+  public static Utf8 getRootAsUtf8(ByteBuffer _bb, Utf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Utf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startUtf8(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endUtf8(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Utf8 get(int j) { return get(new Utf8(), j); }
+    public Utf8 get(Utf8 obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/gandiva/CMakeLists.txt b/java/gandiva/CMakeLists.txt
index 0a7c4d03e3b..5010daf7996 100644
--- a/java/gandiva/CMakeLists.txt
+++ b/java/gandiva/CMakeLists.txt
@@ -28,35 +28,28 @@ message("generating headers to ${JNI_HEADERS_DIR}/jni")
 # centos5 does not have java8 images, so supporting java 7 too.
 # unfortunately create_javah does not work in java8 correctly.
 if(ARROW_GANDIVA_JAVA7)
-  add_jar(
-    gandiva_java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
-    src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java)
+  add_jar(gandiva_java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
+          src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java)
 
-  create_javah(TARGET
-               gandiva_jni_headers
-               CLASSES
-               org.apache.arrow.gandiva.evaluator.ConfigurationBuilder
-               org.apache.arrow.gandiva.evaluator.JniWrapper
-               org.apache.arrow.gandiva.evaluator.ExpressionRegistryJniHelper
-               org.apache.arrow.gandiva.exceptions.GandivaException
-               DEPENDS
-               gandiva_java
-               CLASSPATH
-               gandiva_java
-               OUTPUT_DIR
-               ${JNI_HEADERS_DIR}/jni)
+  create_javah(TARGET gandiva_jni_headers
+               CLASSES org.apache.arrow.gandiva.evaluator.ConfigurationBuilder
+                       org.apache.arrow.gandiva.evaluator.JniWrapper
+                       org.apache.arrow.gandiva.evaluator.ExpressionRegistryJniHelper
+                       org.apache.arrow.gandiva.exceptions.GandivaException
+               DEPENDS gandiva_java
+               CLASSPATH gandiva_java
+               OUTPUT_DIR ${JNI_HEADERS_DIR}/jni)
 else()
-  add_jar(
-    gandiva_java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
-    src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java
-    GENERATE_NATIVE_HEADERS
-    gandiva_jni_headers
-    DESTINATION
-    ${JNI_HEADERS_DIR}/jni)
+  add_jar(gandiva_java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
+          src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java
+          GENERATE_NATIVE_HEADERS
+          gandiva_jni_headers
+          DESTINATION
+          ${JNI_HEADERS_DIR}/jni)
 endif()
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 85343888762..3b13f809829 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -14,7 +14,7 @@
     <parent>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-java-root</artifactId>
-      <version>4.0.0-SNAPSHOT</version>
+      <version>6.0.0-SNAPSHOT</version>
     </parent>
 
     <groupId>org.apache.arrow.gandiva</groupId>
@@ -26,7 +26,6 @@
         <maven.compiler.source>1.8</maven.compiler.source>
         <maven.compiler.target>1.8</maven.compiler.target>
         <protobuf.version>2.5.0</protobuf.version>
-        <dep.guava.version>18.0</dep.guava.version>
         <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
         <arrow.cpp.build.dir>../../../cpp/release-build</arrow.cpp.build.dir>
     </properties>
@@ -56,7 +55,6 @@
         <dependency>
             <groupId>com.google.guava</groupId>
             <artifactId>guava</artifactId>
-            <version>23.0</version>
         </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
index 08ef7f01bcd..0f8de962869 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
@@ -34,6 +34,8 @@ public class InNode implements TreeNode {
 
   private final Set<Integer> intValues;
   private final Set<Long> longValues;
+  private final Set<Float> floatValues;
+  private final Set<Double> doubleValues;
   private final Set<BigDecimal> decimalValues;
   private final Set<String> stringValues;
   private final Set<byte[]> binaryValues;
@@ -43,7 +45,8 @@ public class InNode implements TreeNode {
   private final Integer scale;
 
   private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValues, Set<byte[]>
-          binaryValues, Set<BigDecimal> decimalValues, Integer precision, Integer scale, TreeNode node) {
+          binaryValues, Set<BigDecimal> decimalValues, Integer precision, Integer scale,
+                 Set<Float> floatValues, Set<Double> doubleValues, TreeNode node) {
     this.intValues = values;
     this.longValues = longValues;
     this.decimalValues = decimalValues;
@@ -51,33 +54,75 @@ private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValu
     this.scale = scale;
     this.stringValues = stringValues;
     this.binaryValues = binaryValues;
+    this.floatValues = floatValues;
+    this.doubleValues = doubleValues;
     this.input = node;
   }
 
+  /**
+   * Makes an IN node for int values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param intValues Int values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
   public static InNode makeIntInExpr(TreeNode node, Set<Integer> intValues) {
     return new InNode(intValues,
-            null, null, null, null, null, null, node);
+            null, null, null, null, null, null, null,
+            null, node);
   }
 
+  /**
+   * Makes an IN node for long values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param longValues Long values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
   public static InNode makeLongInExpr(TreeNode node, Set<Long> longValues) {
     return new InNode(null, longValues,
-            null, null, null, null, null, node);
+            null, null, null, null, null, null,
+            null, node);
+  }
+
+  /**
+   * Makes an IN node for float values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param floatValues Float values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
+  public static InNode makeFloatInExpr(TreeNode node, Set<Float> floatValues) {
+    return new InNode(null, null, null, null, null, null,
+            null, floatValues, null, node);
+  }
+
+  /**
+   * Makes an IN node for double values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param doubleValues Double values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
+  public static InNode makeDoubleInExpr(TreeNode node, Set<Double> doubleValues) {
+    return new InNode(null, null, null, null, null,
+            null, null, null, doubleValues, node);
   }
 
   public static InNode makeDecimalInExpr(TreeNode node, Set<BigDecimal> decimalValues,
                                          Integer precision, Integer scale) {
     return new InNode(null, null, null, null,
-            decimalValues, precision, scale, node);
+            decimalValues, precision, scale, null, null, node);
   }
 
   public static InNode makeStringInExpr(TreeNode node, Set<String> stringValues) {
     return new InNode(null, null, stringValues, null,
-            null, null, null, node);
+            null, null, null, null, null, node);
   }
 
   public static InNode makeBinaryInExpr(TreeNode node, Set<byte[]> binaryValues) {
     return new InNode(null, null, null, binaryValues,
-            null, null, null, node);
+            null, null, null, null, null, node);
   }
 
   @Override
@@ -96,6 +141,16 @@ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
       longValues.stream().forEach(val -> longConstants.addLongValues(GandivaTypes.LongNode.newBuilder()
               .setValue(val).build()));
       inNode.setLongValues(longConstants.build());
+    } else if (floatValues != null) {
+      GandivaTypes.FloatConstants.Builder floatConstants = GandivaTypes.FloatConstants.newBuilder();
+      floatValues.stream().forEach(val -> floatConstants.addFloatValues(GandivaTypes.FloatNode.newBuilder()
+              .setValue(val).build()));
+      inNode.setFloatValues(floatConstants.build());
+    } else if (doubleValues != null) {
+      GandivaTypes.DoubleConstants.Builder doubleConstants = GandivaTypes.DoubleConstants.newBuilder();
+      doubleValues.stream().forEach(val -> doubleConstants.addDoubleValues(GandivaTypes.DoubleNode.newBuilder()
+              .setValue(val).build()));
+      inNode.setDoubleValues(doubleConstants.build());
     } else if (decimalValues != null) {
       GandivaTypes.DecimalConstants.Builder decimalConstants = GandivaTypes.DecimalConstants.newBuilder();
       decimalValues.stream().forEach(val -> decimalConstants.addDecimalValues(GandivaTypes.DecimalNode.newBuilder()
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
index 067715c0ae1..8656e886aae 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -208,6 +208,16 @@ public static TreeNode makeInExpressionDecimal(TreeNode resultNode,
     return InNode.makeDecimalInExpr(resultNode, decimalValues, precision, scale);
   }
 
+  public static TreeNode makeInExpressionFloat(TreeNode resultNode,
+                                                Set<Float> floatValues) {
+    return InNode.makeFloatInExpr(resultNode, floatValues);
+  }
+
+  public static TreeNode makeInExpressionDouble(TreeNode resultNode,
+                                                Set<Double> doubleValues) {
+    return InNode.makeDoubleInExpr(resultNode, doubleValues);
+  }
+
   public static TreeNode makeInExpressionString(TreeNode resultNode,
                                                 Set<String> stringValues) {
     return InNode.makeStringInExpr(resultNode, stringValues);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 446efd12840..03c9377b0e7 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -73,7 +73,13 @@ public class ProjectorTest extends BaseEvaluatorTest {
 
   List<ArrowBuf> varBufs(String[] strings, Charset charset) {
     ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4);
-    ArrowBuf dataBuffer = allocator.buffer(strings.length * 8);
+    
+    long dataBufferSize = 0L;
+    for (String string : strings) {
+      dataBufferSize += string.getBytes(charset).length;
+    }
+
+    ArrowBuf dataBuffer = allocator.buffer(dataBufferSize);
 
     int startOffset = 0;
     for (int i = 0; i < strings.length; i++) {
@@ -648,6 +654,66 @@ public void testRegex() throws GandivaException {
     eval.close();
   }
 
+  @Test
+  public void testRegexpReplace() throws GandivaException {
+
+    Field x = Field.nullable("x", new ArrowType.Utf8());
+    Field replaceString = Field.nullable("replaceString", new ArrowType.Utf8());
+
+    Field retType = Field.nullable("c", new ArrowType.Utf8());
+
+    TreeNode cond =
+            TreeBuilder.makeFunction(
+                    "regexp_replace",
+                    Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("ana"),
+                            TreeBuilder.makeField(replaceString)),
+                    new ArrowType.Utf8());
+    ExpressionTree expr = TreeBuilder.makeExpression(cond, retType);
+    Schema schema = new Schema(Lists.newArrayList(x, replaceString));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    int numRows = 5;
+    byte[] validity = new byte[]{(byte) 15, 0};
+    String[] valuesX = new String[]{"banana", "bananaana", "bananana", "anaana", "anaana"};
+    String[] valuesReplace = new String[]{"ue", "", "", "c", ""};
+    String[] expected = new String[]{"buena", "bna", "bn", "cc", null};
+
+    ArrowBuf validityX = buf(validity);
+    ArrowBuf validityReplace = buf(validity);
+    List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+    List<ArrowBuf> dataBufsReplace = stringBufs(valuesReplace);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(fieldNode, fieldNode),
+                    Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1), validityReplace,
+                            dataBufsReplace.get(0), dataBufsReplace.get(1)));
+
+    // allocate data for output vector.
+    VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+    outVector.allocateNew(numRows * 15, numRows);
+
+    // evaluate expression
+    List<ValueVector> output = new ArrayList<>();
+    output.add(outVector);
+    eval.evaluate(batch, output);
+    eval.close();
+
+    // match expected output.
+    for (int i = 0; i < numRows - 1; i++) {
+      assertFalse("Expect none value equals null", outVector.isNull(i));
+      assertEquals(expected[i], new String(outVector.get(i)));
+    }
+
+    assertTrue("Last value must be null", outVector.isNull(numRows - 1));
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
+
   @Test
   public void testRand() throws GandivaException {
 
@@ -1193,7 +1259,7 @@ public void testInExpr() throws GandivaException, Exception {
     Field c1 = Field.nullable("c1", int32);
 
     TreeNode inExpr =
-            TreeBuilder.makeInExpressionInt32(TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4, 5, 15, 16));
+        TreeBuilder.makeInExpressionInt32(TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4, 5, 15, 16));
     ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
@@ -1208,10 +1274,10 @@ public void testInExpr() throws GandivaException, Exception {
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode, fieldNode),
-                    Lists.newArrayList(c1Validity, c1Data, c2Validity));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, c1Data, c2Validity));
 
     BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
     bitVector.allocateNew(numRows);
@@ -1220,7 +1286,7 @@ public void testInExpr() throws GandivaException, Exception {
     output.add(bitVector);
     eval.evaluate(batch, output);
 
-    for (int i = 1; i < 5; i++) {
+    for (int i = 0; i < 5; i++) {
       assertTrue(bitVector.getObject(i).booleanValue());
     }
     for (int i = 5; i < 16; i++) {
@@ -1245,29 +1311,29 @@ public void testInExprDecimal() throws GandivaException, Exception {
     decimalSet.add(new BigDecimal(Long.MAX_VALUE));
     decimalSet.add(new BigDecimal(Long.MIN_VALUE));
     TreeNode inExpr =
-            TreeBuilder.makeInExpressionDecimal(TreeBuilder.makeField(c1),
-                    decimalSet, precision, scale);
+        TreeBuilder.makeInExpressionDecimal(TreeBuilder.makeField(c1),
+            decimalSet, precision, scale);
     ExpressionTree expr = TreeBuilder.makeExpression(inExpr,
-            Field.nullable("result", boolType));
+        Field.nullable("result", boolType));
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
 
     int numRows = 16;
     byte[] validity = new byte[]{(byte) 255, 0};
     String[] c1Values =
-            new String[]{"1", "2", "3", "4", "-0.0", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    String.valueOf(Long.MAX_VALUE),
-                    String.valueOf(Long.MIN_VALUE)};
+        new String[]{"1", "2", "3", "4", "-0.0", "6", "7", "8", "9", "10", "11", "12", "13", "14",
+            String.valueOf(Long.MAX_VALUE),
+            String.valueOf(Long.MIN_VALUE)};
 
     DecimalVector c1Data = decimalVector(c1Values, precision, scale);
     ArrowBuf c1Validity = buf(validity);
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode, fieldNode),
-                    Lists.newArrayList(c1Validity, c1Data.getDataBuffer(), c1Data.getValidityBuffer()));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, c1Data.getDataBuffer(), c1Data.getValidityBuffer()));
 
     BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
     bitVector.allocateNew(numRows);
@@ -1276,7 +1342,7 @@ public void testInExprDecimal() throws GandivaException, Exception {
     output.add(bitVector);
     eval.evaluate(batch, output);
 
-    for (int i = 1; i < 5; i++) {
+    for (int i = 0; i < 5; i++) {
       assertTrue(bitVector.getObject(i).booleanValue());
     }
     for (int i = 5; i < 16; i++) {
@@ -1288,6 +1354,56 @@ public void testInExprDecimal() throws GandivaException, Exception {
     eval.close();
   }
 
+  @Test
+  public void testInExprDouble() throws GandivaException, Exception {
+    Field c1 = Field.nullable("c1", float64);
+
+    TreeNode inExpr =
+        TreeBuilder.makeInExpressionDouble(TreeBuilder.makeField(c1),
+            Sets.newHashSet(1.0, -0.0, 3.0, 4.0, Double.NaN,
+                Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+    ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
+    Schema schema = new Schema(Lists.newArrayList(c1));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    // Create a row-batch with some sample data to look for
+    int numRows = 16;
+    // Only the first 8 values will be valid.
+    byte[] validity = new byte[]{(byte) 255, 0};
+    double[] c1Values = new double[]{1, -0.0, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN,
+        6, 7, 8, 9, 10, 11, 12, 13, 14, 4, 3};
+
+    ArrowBuf c1Validity = buf(validity);
+    ArrowBuf c1Data = doubleBuf(c1Values);
+    ArrowBuf c2Validity = buf(validity);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, c1Data, c2Validity));
+
+    BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+    bitVector.allocateNew(numRows);
+
+    List<ValueVector> output = new ArrayList<ValueVector>();
+    output.add(bitVector);
+    eval.evaluate(batch, output);
+
+    // The first four values in the vector must match the expression, but not the other ones.
+    for (int i = 0; i < 4; i++) {
+      assertTrue(bitVector.getObject(i).booleanValue());
+    }
+    for (int i = 4; i < 16; i++) {
+      assertFalse(bitVector.getObject(i).booleanValue());
+    }
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+    eval.close();
+  }
+
   @Test
   public void testInExprStrings() throws GandivaException, Exception {
     Field c1 = Field.nullable("c1", new ArrowType.Utf8());
@@ -1297,7 +1413,7 @@ public void testInExprStrings() throws GandivaException, Exception {
     List<TreeNode> args = Lists.newArrayList(TreeBuilder.makeField(c1), l1, l2);
     TreeNode substr = TreeBuilder.makeFunction("substr", args, new ArrowType.Utf8());
     TreeNode inExpr =
-            TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou"));
+        TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou"));
     ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
@@ -1305,8 +1421,8 @@ public void testInExprStrings() throws GandivaException, Exception {
     int numRows = 16;
     byte[] validity = new byte[]{(byte) 255, 0};
     String[] c1Values = new String[]{"one", "two", "three", "four", "five", "six", "seven",
-      "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
-      "sixteen"};
+        "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+        "sixteen"};
 
     ArrowBuf c1Validity = buf(validity);
     List<ArrowBuf> dataBufsX = stringBufs(c1Values);
@@ -1314,10 +1430,10 @@ public void testInExprStrings() throws GandivaException, Exception {
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode, fieldNode),
-                    Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity));
 
     BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
     bitVector.allocateNew(numRows);
@@ -1509,9 +1625,9 @@ public void testDateTrunc() throws Exception {
 
     Field resultField = Field.nullable("result", date64);
     List<ExpressionTree> exprs =
-            Lists.newArrayList(
-                    TreeBuilder.makeExpression(dateToYear, resultField),
-                    TreeBuilder.makeExpression(dateToMonth, resultField));
+        Lists.newArrayList(
+            TreeBuilder.makeExpression(dateToYear, resultField),
+            TreeBuilder.makeExpression(dateToMonth, resultField));
 
     Schema schema = new Schema(Lists.newArrayList(dateField));
     Projector eval = Projector.make(schema, exprs);
@@ -1544,10 +1660,10 @@ public void testDateTrunc() throws Exception {
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode),
-                    Lists.newArrayList(bufValidity, millisData));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode),
+            Lists.newArrayList(bufValidity, millisData));
 
     List<ValueVector> output = new ArrayList<ValueVector>();
     for (int i = 0; i < exprs.size(); i++) {
@@ -1950,6 +2066,59 @@ public void testCastFloat() throws Exception {
     releaseValueVectors(output);
   }
 
+  @Test
+  public void testCastFloatVarbinary() throws Exception {
+    Field inField = Field.nullable("input", new ArrowType.Binary());
+    TreeNode inNode = TreeBuilder.makeField(inField);
+    TreeNode castFLOAT8Fn = TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode),
+            float64);
+    Field resultField = Field.nullable("result", float64);
+    List<ExpressionTree> exprs =
+            Lists.newArrayList(
+                    TreeBuilder.makeExpression(castFLOAT8Fn, resultField));
+    Schema schema = new Schema(Lists.newArrayList(inField));
+    Projector eval = Projector.make(schema, exprs);
+    int numRows = 5;
+    byte[] validity = new byte[] {(byte) 255};
+    String[] values =
+        new String[] {
+            "2.3",
+            "-11.11",
+            "0",
+            "111",
+            "12345.67"
+        };
+    double[] expValues =
+        new double[] {
+            2.3, -11.11, 0, 111, 12345.67
+        };
+    ArrowBuf bufValidity = buf(validity);
+    List<ArrowBuf> bufData = stringBufs(values);
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(fieldNode),
+                    Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1)));
+    List<ValueVector> output = new ArrayList<>();
+    for (int i = 0; i < exprs.size(); i++) {
+      Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+      float8Vector.allocateNew(numRows);
+      output.add(float8Vector);
+    }
+    eval.evaluate(batch, output);
+    eval.close();
+    for (ValueVector valueVector : output) {
+      Float8Vector float8Vector = (Float8Vector) valueVector;
+      for (int j = 0; j < numRows; j++) {
+        assertFalse(float8Vector.isNull(j));
+        assertTrue(expValues[j] == float8Vector.get(j));
+      }
+    }
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
+
   @Test(expected = GandivaException.class)
   public void testCastFloatInvalidValue() throws Exception {
     Field inField = Field.nullable("input", new ArrowType.Utf8());
@@ -2044,6 +2213,258 @@ public void testEvaluateWithUnsetTargetHostCPU() throws Exception {
     releaseRecordBatch(batch);
     releaseValueVectors(output);
     eval.close();
+  }  
+
+  @Test
+  public void testCastVarcharFromInteger() throws Exception {
+    Field inField = Field.nullable("input", int32);
+    Field lenField = Field.nullable("outLength", int64);
+
+    TreeNode inNode = TreeBuilder.makeField(inField);
+    TreeNode lenNode = TreeBuilder.makeField(lenField);
+
+    TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(inNode, lenNode),
+        new ArrowType.Utf8());
+
+    Field resultField = Field.nullable("result", new ArrowType.Utf8());
+    List<ExpressionTree> exprs =
+        Lists.newArrayList(
+            TreeBuilder.makeExpression(tsToString, resultField));
+
+    Schema schema = new Schema(Lists.newArrayList(inField, lenField));
+    Projector eval = Projector.make(schema, exprs);
+
+    int numRows = 5;
+    byte[] validity = new byte[] {(byte) 255};
+    int[] values =
+        new int[] {
+            2345,
+            2345,
+            2345,
+            2345,
+            -2345,
+        };
+    long[] lenValues =
+        new long[] {
+            0L, 4L, 2L, 6L, 5L
+        };
+
+    String[] expValues =
+        new String[] {
+            "",
+            Integer.toString(2345).substring(0, 4),
+            Integer.toString(2345).substring(0, 2),
+            Integer.toString(2345),
+            Integer.toString(-2345)
+        };
+
+    ArrowBuf bufValidity = buf(validity);
+    ArrowBuf bufData = intBuf(values);
+    ArrowBuf lenValidity = buf(validity);
+    ArrowBuf lenData = longBuf(lenValues);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(bufValidity, bufData, lenValidity, lenData));
+
+    List<ValueVector> output = new ArrayList<>();
+    for (int i = 0; i < exprs.size(); i++) {
+      VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+
+      charVector.allocateNew(numRows * 5, numRows);
+      output.add(charVector);
+    }
+    eval.evaluate(batch, output);
+    eval.close();
+
+    for (ValueVector valueVector : output) {
+      VarCharVector charVector = (VarCharVector) valueVector;
+
+      for (int j = 0; j < numRows; j++) {
+        assertFalse(charVector.isNull(j));
+        assertEquals(expValues[j], new String(charVector.get(j)));
+      }
+    }
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
   }
 
+  @Test
+  public void testCastVarcharFromFloat() throws Exception {
+    Field inField = Field.nullable("input", float64);
+    Field lenField = Field.nullable("outLength", int64);
+
+    TreeNode inNode = TreeBuilder.makeField(inField);
+    TreeNode lenNode = TreeBuilder.makeField(lenField);
+
+    TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(inNode, lenNode),
+        new ArrowType.Utf8());
+
+    Field resultField = Field.nullable("result", new ArrowType.Utf8());
+    List<ExpressionTree> exprs =
+        Lists.newArrayList(
+            TreeBuilder.makeExpression(tsToString, resultField));
+
+    Schema schema = new Schema(Lists.newArrayList(inField, lenField));
+    Projector eval = Projector.make(schema, exprs);
+
+    int numRows = 5;
+    byte[] validity = new byte[] {(byte) 255};
+    double[] values =
+        new double[] {
+            0.0,
+            -0.0,
+            1.0,
+            0.001,
+            0.0009,
+            0.00099893,
+            999999.9999,
+            10000000.0,
+            23943410000000.343434,
+            Double.POSITIVE_INFINITY,
+            Double.NEGATIVE_INFINITY,
+            Double.NaN,
+            23.45,
+            23.45,
+            -23.45,
+        };
+    long[] lenValues =
+        new long[] {
+            6L, 6L, 6L, 6L, 10L, 15L, 15L, 15L, 30L,
+            15L, 15L, 15L, 0L, 6L, 6L
+        };
+
+    /* The Java real numbers are represented in two ways and Gandiva must
+     * follow the same rules:
+     * - If the number is greater or equals than 10^7 and less than 10^(-3)
+     *   it will be represented using scientific notation, e.g:
+     *       - 0.000012 -> 1.2E-5
+     *       - 10000002.3 -> 1.00000023E7
+     * - If the numbers are between that interval above, they are showed as is.
+     *
+     * The test checks if the Gandiva function casts the number with the same notation of the
+     * Java.
+     * */
+    String[] expValues =
+        new String[] {
+            Double.toString(0.0), // must be cast to -> "0.0"
+            Double.toString(-0.0), // must be cast to -> "-0.0"
+            Double.toString(1.0), // must be cast to -> "1.0"
+            Double.toString(0.001), // must be cast to -> "0.001"
+            Double.toString(0.0009), // must be cast to -> "9E-4"
+            Double.toString(0.00099893), // must be cast to -> "9E-4"
+            Double.toString(999999.9999), // must be cast to -> "999999.9999"
+            Double.toString(10000000.0), // must be cast to 1E7
+            Double.toString(23943410000000.343434),
+            Double.toString(Double.POSITIVE_INFINITY),
+            Double.toString(Double.NEGATIVE_INFINITY),
+            Double.toString(Double.NaN),
+            "",
+            Double.toString(23.45),
+            Double.toString(-23.45)
+        };
+
+    ArrowBuf bufValidity = buf(validity);
+    ArrowBuf bufData = doubleBuf(values);
+    ArrowBuf lenValidity = buf(validity);
+    ArrowBuf lenData = longBuf(lenValues);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(bufValidity, bufData, lenValidity, lenData));
+
+    List<ValueVector> output = new ArrayList<>();
+    for (int i = 0; i < exprs.size(); i++) {
+      VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+
+      charVector.allocateNew(numRows * 5, numRows);
+      output.add(charVector);
+    }
+    eval.evaluate(batch, output);
+    eval.close();
+
+    for (ValueVector valueVector : output) {
+      VarCharVector charVector = (VarCharVector) valueVector;
+
+      for (int j = 0; j < numRows; j++) {
+        assertFalse(charVector.isNull(j));
+        assertEquals(expValues[j], new String(charVector.get(j)));
+      }
+    }
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
+
+  @Test
+  public void testInitCap() throws Exception {
+
+    Field x = Field.nullable("x", new ArrowType.Utf8());
+
+    Field retType = Field.nullable("c", new ArrowType.Utf8());
+
+    TreeNode cond =
+            TreeBuilder.makeFunction(
+                    "initcap",
+                    Lists.newArrayList(TreeBuilder.makeField(x)),
+                    new ArrowType.Utf8());
+    ExpressionTree expr = TreeBuilder.makeExpression(cond, retType);
+    Schema schema = new Schema(Lists.newArrayList(x));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    int numRows = 5;
+    byte[] validity = new byte[]{(byte) 15, 0};
+    String[] valuesX = new String[]{
+        "  øhpqršvñ  \n\n",
+        "möbelträger1füße   \nmöbelträge'rfüße",
+        "ÂbĆDËFgh\néll",
+        "citroën CaR",
+        "kjk"
+    };
+
+    String[] expected = new String[]{
+        "  Øhpqršvñ  \n\n",
+        "Möbelträger1füße   \nMöbelträge'Rfüße",
+        "Âbćdëfgh\nÉll",
+        "Citroën Car",
+        null
+    };
+
+    ArrowBuf validityX = buf(validity);
+    List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+                    Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1)));
+
+    // allocate data for output vector.
+    VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+    outVector.allocateNew(numRows * 100, numRows);
+
+    // evaluate expression
+    List<ValueVector> output = new ArrayList<>();
+    output.add(outVector);
+    eval.evaluate(batch, output);
+    eval.close();
+
+    // match expected output.
+    for (int i = 0; i < numRows - 1; i++) {
+      assertFalse("Expect none value equals null", outVector.isNull(i));
+      assertEquals(expected[i], new String(outVector.get(i)));
+    }
+
+    assertTrue("Last value must be null", outVector.isNull(numRows - 1));
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
 }
diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index b9f4514aed7..65abe8e0152 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index 246b2212e26..8d21cef7aa3 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -715,6 +715,11 @@ public static ImmutableConfig.Builder configBuilder() {
     return ImmutableConfig.builder();
   }
 
+  @Override
+  public RoundingPolicy getRoundingPolicy() {
+    return roundingPolicy;
+  }
+
   /**
    * Config class of {@link BaseAllocator}.
    */
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
index 8fbf6f7b073..e59349c6498 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
@@ -19,6 +19,9 @@
 
 import java.util.Collection;
 
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+
 /**
  * Wrapper class to deal with byte buffer allocation. Ensures users only use designated methods.
  */
@@ -225,4 +228,11 @@ BufferAllocator newChildAllocator(
    * a no-op.
    */
   void assertOpen();
+
+  /**
+   * Gets the rounding policy of the allocator.
+   */
+  default RoundingPolicy getRoundingPolicy() {
+    return DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY;
+  }
 }
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index d427f572377..b5f256fb102 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml
index c1b9af36570..d5ceb2cde57 100644
--- a/java/memory/memory-unsafe/pom.xml
+++ b/java/memory/memory-unsafe/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index 814e3da4417..a7520a76edb 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-memory</artifactId>
   <name>Arrow Memory</name>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index dffe7f2cbd2..d41df57876b 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-performance</artifactId>
     <packaging>jar</packaging>
@@ -86,7 +86,7 @@
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-algorithm</artifactId>
-            <version>4.0.0-SNAPSHOT</version>
+            <version>6.0.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
@@ -99,8 +99,12 @@
         <skip.perf.benchmarks>true</skip.perf.benchmarks>
         <benchmark.filter>.*</benchmark.filter>
         <benchmark.forks>1</benchmark.forks>
+        <benchmark.jvmargs> </benchmark.jvmargs>
         <benchmark.warmups>5</benchmark.warmups>
         <benchmark.runs>5</benchmark.runs>
+        <benchmark.list> </benchmark.list>
+        <benchmark.resultfile>jmh-result.json</benchmark.resultfile>
+        <benchmark.resultformat>json</benchmark.resultformat>
     </properties>
 
     <build>
@@ -169,10 +173,17 @@
                         <argument>${benchmark.filter}</argument>
                         <argument>-f</argument>
                         <argument>${benchmark.forks}</argument>
+                        <argument>-jvmArgs</argument>
+                        <argument>${benchmark.jvmargs}</argument>
                         <argument>-wi</argument>
                         <argument>${benchmark.warmups}</argument>
                         <argument>-i</argument>
                         <argument>${benchmark.runs}</argument>
+                        <argument>${benchmark.list}</argument>
+                        <argument>-rff</argument>
+                        <argument>${benchmark.resultfile}</argument>
+                        <argument>-rf</argument>
+                        <argument>${benchmark.resultformat}</argument>
                     </arguments>
                 </configuration>
             </plugin>
diff --git a/java/plasma/pom.xml b/java/plasma/pom.xml
index 7e298d70439..57609fc70f2 100644
--- a/java/plasma/pom.xml
+++ b/java/plasma/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-plasma</artifactId>
     <name>Arrow Plasma Client</name>
diff --git a/java/pom.xml b/java/pom.xml
index c776b833a17..8752abe1fc4 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-java-root</artifactId>
-  <version>4.0.0-SNAPSHOT</version>
+  <version>6.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache Arrow Java Root POM</name>
@@ -32,12 +32,11 @@
     <dep.junit.platform.version>1.4.0</dep.junit.platform.version>
     <dep.junit.jupiter.version>5.4.0</dep.junit.jupiter.version>
     <dep.slf4j.version>1.7.25</dep.slf4j.version>
-    <dep.guava.version>20.0</dep.guava.version>
+    <dep.guava.version>30.1.1-jre</dep.guava.version>
     <dep.netty.version>4.1.48.Final</dep.netty.version>
     <dep.jackson.version>2.11.4</dep.jackson.version>
     <dep.hadoop.version>2.7.1</dep.hadoop.version>
-    <dep.fbs.version>1.9.0</dep.fbs.version>
-    <dep.flatc.version>1.9.0</dep.flatc.version>
+    <dep.fbs.version>1.12.0</dep.fbs.version>
     <dep.avro.version>1.10.0</dep.avro.version>
     <arrow.vector.classifier />
     <forkCount>2</forkCount>
@@ -401,17 +400,7 @@
             <argLine>-Darrow.vector.max_allocation_bytes=1048576</argLine>
           </configuration>
         </plugin>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-release-plugin</artifactId>
-          <version>2.5.2</version>
-          <configuration>
-            <useReleaseProfile>false</useReleaseProfile>
-            <pushChanges>false</pushChanges>
-            <goals>deploy</goals>
-            <arguments>-Papache-release ${arguments}</arguments>
-          </configuration>
-        </plugin>
+
 
         <!--This plugin's configuration is used to store Eclipse m2e settings
           only. It has no influence on the Maven build itself. -->
@@ -712,7 +701,7 @@
 
     <profile>
       <id>error-prone</id>
-      <!-- 
+      <!--
            Do not activate Error Prone while running with Eclipse/M2E as it causes incompatibilities
            with other annotation processors.
            See https://github.com/jbosstools/m2e-apt/issues/62 for details
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 375d2435154..aed13379c50 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-tools</artifactId>
     <name>Arrow Tools</name>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index ed22e9b94cd..1336f0debe2 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-vector</artifactId>
   <name>Arrow Vectors</name>
diff --git a/java/vector/src/main/codegen/includes/vv_imports.ftl b/java/vector/src/main/codegen/includes/vv_imports.ftl
index 2acd4361553..c9a8820b258 100644
--- a/java/vector/src/main/codegen/includes/vv_imports.ftl
+++ b/java/vector/src/main/codegen/includes/vv_imports.ftl
@@ -36,6 +36,7 @@ import org.apache.arrow.vector.complex.impl.*;
 import org.apache.arrow.vector.complex.writer.*;
 import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.util.JsonStringArrayList;
 
 import java.util.Arrays;
diff --git a/java/vector/src/main/codegen/templates/AbstractFieldReader.java b/java/vector/src/main/codegen/templates/AbstractFieldReader.java
index 05c1296a424..84bcbdf49fc 100644
--- a/java/vector/src/main/codegen/templates/AbstractFieldReader.java
+++ b/java/vector/src/main/codegen/templates/AbstractFieldReader.java
@@ -77,6 +77,10 @@ public void copyAsField(String name, ListWriter writer) {
     fail("CopyAsFieldList");
   }
 
+  public void copyAsField(String name, MapWriter writer) {
+    fail("CopyAsFieldMap");
+  }
+
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
   <#assign boxedType = (minor.boxedType!type.boxedType) />
   public void read(${name}Holder holder) {
diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
index bce842d5911..1f80f25266b 100644
--- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
+++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
@@ -56,12 +56,42 @@ public void end() {
 
   @Override
   public void startList() {
-    throw new IllegalStateException(String.format("You tried to start when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+    throw new IllegalStateException(String.format("You tried to start a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
   }
 
   @Override
   public void endList() {
-    throw new IllegalStateException(String.format("You tried to end when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+    throw new IllegalStateException(String.format("You tried to end a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void startMap() {
+    throw new IllegalStateException(String.format("You tried to start a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void endMap() {
+    throw new IllegalStateException(String.format("You tried to end a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void startEntry() {
+    throw new IllegalStateException(String.format("You tried to start a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public MapWriter key() {
+    throw new IllegalStateException(String.format("You tried to start a map key when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public MapWriter value() {
+    throw new IllegalStateException(String.format("You tried to start a map value when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void endEntry() {
+    throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
   }
 
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
@@ -123,6 +153,12 @@ public ListWriter list() {
     return null;
   }
 
+  @Override
+  public MapWriter map() {
+    fail("Map");
+    return null;
+  }
+
   @Override
   public StructWriter struct(String name) {
     fail("Struct");
@@ -135,6 +171,23 @@ public ListWriter list(String name) {
     return null;
   }
 
+  @Override
+  public MapWriter map(String name) {
+    fail("Map");
+    return null;
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    fail("Map");
+    return null;
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    fail("Map");
+    return null;
+  }
   <#list vv.types as type><#list type.minor as minor>
   <#assign lowerName = minor.class?uncap_first />
   <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
index 6b14dbf2a57..264e8502185 100644
--- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
+++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
@@ -15,11 +15,6 @@
  * limitations under the License.
  */
 
-import org.apache.arrow.memory.ArrowBuf;
-import org.apache.arrow.vector.types.Types;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-
 <@pp.dropOutputFile />
 <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractPromotableFieldWriter.java" />
 
@@ -44,7 +39,11 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter {
    * @param type the type of the values we want to write
    * @return the corresponding field writer
    */
-  abstract protected FieldWriter getWriter(MinorType type);
+  protected FieldWriter getWriter(MinorType type) {
+    return getWriter(type, null);
+  }
+
+  abstract protected FieldWriter getWriter(MinorType type, ArrowType arrowType);
 
   /**
    * @return the current FieldWriter
@@ -73,6 +72,37 @@ public void endList() {
     setPosition(idx() + 1);
   }
 
+  @Override
+  public void startMap() {
+    getWriter(MinorType.MAP).startMap();
+  }
+
+  @Override
+  public void endMap() {
+    getWriter(MinorType.MAP).endMap();
+    setPosition(idx() + 1);
+  }
+
+  @Override
+  public void startEntry() {
+    getWriter(MinorType.MAP).startEntry();
+  }
+
+  @Override
+  public MapWriter key() {
+    return getWriter(MinorType.MAP).key();
+  }
+
+  @Override
+  public MapWriter value() {
+    return getWriter(MinorType.MAP).value();
+  }
+
+  @Override
+  public void endEntry() {
+    getWriter(MinorType.MAP).endEntry();
+  }
+
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
     <#assign fields = minor.fields!type.fields />
   <#if minor.class != "Decimal" && minor.class != "Decimal256">
@@ -144,6 +174,16 @@ public ListWriter list() {
     return getWriter(MinorType.LIST).list();
   }
 
+  @Override
+  public MapWriter map() {
+    return getWriter(MinorType.LIST).map();
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+  }
+
   @Override
   public StructWriter struct(String name) {
     return getWriter(MinorType.STRUCT).struct(name);
@@ -154,6 +194,15 @@ public ListWriter list(String name) {
     return getWriter(MinorType.STRUCT).list(name);
   }
 
+  @Override
+  public MapWriter map(String name) {
+    return getWriter(MinorType.STRUCT).map(name);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    return getWriter(MinorType.STRUCT).map(name, keysSorted);
+  }
   <#list vv.types as type><#list type.minor as minor>
   <#assign lowerName = minor.class?uncap_first />
   <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
diff --git a/java/vector/src/main/codegen/templates/BaseReader.java b/java/vector/src/main/codegen/templates/BaseReader.java
index 670644c0b17..85d582a53bf 100644
--- a/java/vector/src/main/codegen/templates/BaseReader.java
+++ b/java/vector/src/main/codegen/templates/BaseReader.java
@@ -60,6 +60,16 @@ public interface RepeatedListReader extends ListReader{
     int size();
     void copyAsValue(ListWriter writer);
   }
+
+  public interface MapReader extends BaseReader{
+    FieldReader reader();
+  }
+
+  public interface RepeatedMapReader extends MapReader{
+    boolean next();
+    int size();
+    void copyAsValue(MapWriter writer);
+  }
   
   public interface ScalarReader extends  
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list> 
diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java
index 9c9f7ee89bc..4d63fb73e98 100644
--- a/java/vector/src/main/codegen/templates/BaseWriter.java
+++ b/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -62,6 +62,8 @@ public interface StructWriter extends BaseWriter {
     void copyReaderToField(String name, FieldReader reader);
     StructWriter struct(String name);
     ListWriter list(String name);
+    MapWriter map(String name);
+    MapWriter map(String name, boolean keysSorted);
     void start();
     void end();
   }
@@ -71,6 +73,8 @@ public interface ListWriter extends BaseWriter {
     void endList();
     StructWriter struct();
     ListWriter list();
+    MapWriter map();
+    MapWriter map(boolean keysSorted);
     void copyReader(FieldReader reader);
 
     <#list vv.types as type><#list type.minor as minor>
@@ -82,6 +86,17 @@ public interface ListWriter extends BaseWriter {
     </#list></#list>
   }
 
+  public interface MapWriter extends ListWriter {
+    void startMap();
+    void endMap();
+
+    void startEntry();
+    void endEntry();
+
+    MapWriter key();
+    MapWriter value();
+  }
+
   public interface ScalarWriter extends
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, </#list></#list> BaseWriter {}
 
diff --git a/java/vector/src/main/codegen/templates/DenseUnionReader.java b/java/vector/src/main/codegen/templates/DenseUnionReader.java
index f7e161ac86f..d3e9b582406 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionReader.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionReader.java
@@ -129,6 +129,18 @@ private FieldReader getList(byte typeId) {
     return listReader;
   }
 
+  private UnionMapReader mapReader;
+
+  private FieldReader getMap(byte typeId) {
+    UnionMapReader mapReader = (UnionMapReader) readers[typeId];
+    if (mapReader == null) {
+      mapReader = new UnionMapReader((MapVector) data.getVectorByType(typeId));
+      mapReader.setPosition(idx());
+      readers[typeId] = mapReader;
+    }
+    return mapReader;
+  }
+
   @Override
   public java.util.Iterator<String> iterator() {
     throw new UnsupportedOperationException();
diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java
index cf56b514fae..c1991f65b92 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionVector.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java
@@ -84,10 +84,7 @@
  * each time the vector is accessed.
  * Source code generated using FreeMarker template ${.template_name}
  */
-public class DenseUnionVector implements FieldVector {
-
-  private String name;
-  private BufferAllocator allocator;
+public class DenseUnionVector extends AbstractContainerVector implements FieldVector {
   int valueCount;
 
   NonNullableStructVector internalStruct;
@@ -109,13 +106,12 @@ public class DenseUnionVector implements FieldVector {
   private byte[] typeMapFields = new byte[Byte.MAX_VALUE + 1];
 
   /**
-   * The next typd id to allocate.
+   * The next type id to allocate.
    */
   private byte nextTypeId = 0;
 
   private FieldReader reader;
 
-  private final CallBack callBack;
   private long typeBufferAllocationSizeInBytes;
   private long offsetBufferAllocationSizeInBytes;
 
@@ -134,8 +130,7 @@ public static DenseUnionVector empty(String name, BufferAllocator allocator) {
   }
 
   public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
-    this.name = name;
-    this.allocator = allocator;
+    super(name, allocator, callBack);
     this.fieldType = fieldType;
     this.internalStruct = new NonNullableStructVector(
         "internal",
@@ -145,7 +140,6 @@ public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldT
         AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
         false);
     this.typeBuffer = allocator.getEmpty();
-    this.callBack = callBack;
     this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
     this.offsetBuffer = allocator.getEmpty();
     this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
@@ -342,6 +336,22 @@ public ListVector getList(byte typeId) {
     return listVector;
   }
 
+  public MapVector getMap(byte typeId) {
+    MapVector mapVector = typeId < 0 ? null : (MapVector) childVectors[typeId];
+    if (mapVector == null) {
+      int vectorCount = internalStruct.size();
+      mapVector = addOrGet(typeId, MinorType.MAP, MapVector.class);
+      if (internalStruct.size() > vectorCount) {
+        mapVector.allocateNew();
+        childVectors[typeId] = mapVector;
+        if (callBack != null) {
+          callBack.doWork();
+        }
+      }
+    }
+    return mapVector;
+  }
+
   public byte getTypeId(int index) {
     return typeBuffer.getByte(index * TYPE_WIDTH);
   }
@@ -559,7 +569,7 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
   }
 
   public FieldVector addVector(byte typeId, FieldVector v) {
-    String name = fieldName(typeId, v.getMinorType());
+    final String name = v.getName().isEmpty() ? fieldName(typeId, v.getMinorType()) : v.getName();
     Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
     final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
     v.makeTransferPair(newVector).transfer();
@@ -893,4 +903,35 @@ private void setNegative(long start, long end) {
       typeBuffer.setByte(i, -1);
     }
   }
+
+  @Override
+  public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+    return internalStruct.addOrGet(name, fieldType, clazz);
+  }
+
+  @Override
+  public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+    return internalStruct.getChild(name, clazz);
+  }
+
+  @Override
+  public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+    return internalStruct.getChildVectorWithOrdinal(name);
+  }
+
+  @Override
+  public int size() {
+    return internalStruct.size();
+  }
+
+  @Override
+  public void setInitialCapacity(int valueCount, double density) {
+    for (final ValueVector vector : internalStruct) {
+      if (vector instanceof DensityAwareVector) {
+        ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+      } else {
+        vector.setInitialCapacity(valueCount);
+      }
+    }
+  }
 }
diff --git a/java/vector/src/main/codegen/templates/DenseUnionWriter.java b/java/vector/src/main/codegen/templates/DenseUnionWriter.java
index 769b84268af..e69a62a9e0f 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionWriter.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionWriter.java
@@ -111,6 +111,20 @@ public ListWriter asList(byte typeId) {
     return getListWriter(typeId);
   }
 
+  private MapWriter getMapWriter(byte typeId) {
+    MapWriter mapWriter = (MapWriter) writers[typeId];
+    if (mapWriter == null) {
+      mapWriter = new UnionMapWriter((MapVector) data.getVectorByType(typeId));
+      writers[typeId] = mapWriter;
+    }
+    return mapWriter;
+  }
+
+  public MapWriter asMap(byte typeId) {
+    data.setTypeId(idx(), typeId);
+    return getMapWriter(typeId);
+  }
+
   BaseWriter getWriter(byte typeId) {
     MinorType minorType = data.getVectorByType(typeId).getMinorType();
     switch (minorType) {
@@ -118,6 +132,8 @@ BaseWriter getWriter(byte typeId) {
         return getStructWriter(typeId);
       case LIST:
         return getListWriter(typeId);
+      case MAP:
+        return getMapWriter(typeId);
     <#list vv.types as type>
       <#list type.minor as minor>
         <#assign name = minor.class?cap_first />
@@ -195,6 +211,30 @@ public ListWriter list(String name) {
     return getStructWriter(typeId).list(name);
   }
 
+  @Override
+  public MapWriter map() {
+    byte typeId = data.getTypeId(idx());
+    data.setTypeId(idx(), typeId);
+    getListWriter(typeId).setPosition(data.getOffset(idx()));
+    return getMapWriter(typeId).map();
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    byte typeId = data.getTypeId(idx());
+    data.setTypeId(idx(), typeId);
+    getStructWriter(typeId).setPosition(data.getOffset(idx()));
+    return getStructWriter(typeId).map(name);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    byte typeId = data.getTypeId(idx());
+    data.setTypeId(idx(), typeId);
+    getStructWriter(typeId).setPosition(data.getOffset(idx()));
+    return getStructWriter(typeId).map(name, keysSorted);
+  }
+
   @Override
   public StructWriter struct(String name) {
     byte typeId = data.getTypeId(idx());
diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java
index b908d1058fb..69693c63011 100644
--- a/java/vector/src/main/codegen/templates/StructWriters.java
+++ b/java/vector/src/main/codegen/templates/StructWriters.java
@@ -64,6 +64,11 @@ public class ${mode}StructWriter extends AbstractFieldWriter {
       case LIST:
         list(child.getName());
         break;
+      case MAP: {
+        ArrowType.Map arrowType = (ArrowType.Map) child.getType();
+        map(child.getName(), arrowType.getKeysSorted());
+        break;
+      }
       case UNION:
         FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null);
         UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory());
@@ -184,6 +189,41 @@ public ListWriter list(String name) {
     return writer;
   }
 
+  @Override
+  public MapWriter map(String name) {
+    return map(name, false);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    FieldWriter writer = fields.get(handleCase(name));
+    if(writer == null) {
+      ValueVector vector;
+      ValueVector currentVector = container.getChild(name);
+      MapVector v = container.addOrGet(name,
+          new FieldType(addVectorAsNullable,
+            new ArrowType.Map(keysSorted)
+          ,null, null),
+          MapVector.class);
+      writer = new PromotableWriter(v, container, getNullableStructWriterFactory());
+      vector = v;
+      if (currentVector == null || currentVector != vector) {
+        if(this.initialCapacity > 0) {
+          vector.setInitialCapacity(this.initialCapacity);
+        }
+        vector.allocateNewSafe();
+      }
+      writer.setPosition(idx());
+      fields.put(handleCase(name), writer);
+    } else {
+      if (writer instanceof PromotableWriter) {
+        // ensure writers are initialized
+        ((PromotableWriter)writer).getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+      }
+    }
+    return writer;
+  }
+
   public void setValueCount(int count) {
     container.setValueCount(count);
   }
diff --git a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
index f04b4db3208..55c661bfc60 100644
--- a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
@@ -169,6 +169,29 @@ public StructWriter struct(String name) {
     return structWriter;
   }
 
+  @Override
+  public MapWriter map() {
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    MapWriter mapWriter = writer.map(name);
+    return mapWriter;
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    writer.map(keysSorted);
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    MapWriter mapWriter = writer.map(name, keysSorted);
+    return mapWriter;
+  }
+
   @Override
   public void startList() {
     int start = vector.startNewValue(idx());
diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java
index 15c601942c6..926276b5eb4 100644
--- a/java/vector/src/main/codegen/templates/UnionListWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionListWriter.java
@@ -176,6 +176,29 @@ public StructWriter struct(String name) {
     return structWriter;
   }
 
+  @Override
+  public MapWriter map() {
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    MapWriter mapWriter = writer.map(name);
+    return mapWriter;
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    writer.map(keysSorted);
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    MapWriter mapWriter = writer.map(name, keysSorted);
+    return mapWriter;
+  }
+
   <#if listName == "LargeList">
   @Override
   public void startList() {
diff --git a/java/vector/src/main/codegen/templates/UnionMapWriter.java b/java/vector/src/main/codegen/templates/UnionMapWriter.java
index cec73c45f5c..606f880377b 100644
--- a/java/vector/src/main/codegen/templates/UnionMapWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionMapWriter.java
@@ -207,4 +207,16 @@ public ListWriter list() {
         return super.list();
     }
   }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    switch (mode) {
+      case KEY:
+        return entryWriter.map(MapVector.KEY_NAME, keysSorted);
+      case VALUE:
+        return entryWriter.map(MapVector.VALUE_NAME, keysSorted);
+      default:
+        return super.map();
+    }
+  }
 }
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 6ed03fa2117..0e263cc91fc 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -84,6 +84,8 @@ private FieldReader getReaderForIndex(int index) {
       return (FieldReader) getStruct();
     case LIST:
       return (FieldReader) getList();
+    case MAP:
+      return (FieldReader) getMap();
     <#list vv.types as type>
       <#list type.minor as minor>
         <#assign name = minor.class?cap_first />
@@ -121,6 +123,17 @@ private FieldReader getList() {
     return listReader;
   }
 
+  private UnionMapReader mapReader;
+
+  private FieldReader getMap() {
+    if (mapReader == null) {
+      mapReader = new UnionMapReader(data.getMap());
+      mapReader.setPosition(idx());
+      readers[MinorType.MAP.ordinal()] = mapReader;
+    }
+    return mapReader;
+  }
+
   @Override
   public java.util.Iterator<String> iterator() {
     return getStruct().iterator();
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index f33f44bbc60..bd5202977b8 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -84,10 +84,7 @@
  * each time the vector is accessed.
  * Source code generated using FreeMarker template ${.template_name}
  */
-public class UnionVector implements FieldVector {
-
-  private String name;
-  private BufferAllocator allocator;
+public class UnionVector extends AbstractContainerVector implements FieldVector {
   int valueCount;
 
   NonNullableStructVector internalStruct;
@@ -95,13 +92,13 @@ public class UnionVector implements FieldVector {
 
   private StructVector structVector;
   private ListVector listVector;
+  private MapVector mapVector;
 
   private FieldReader reader;
 
   private int singleType = 0;
   private ValueVector singleVector;
 
-  private final CallBack callBack;
   private int typeBufferAllocationSizeInBytes;
 
   private final FieldType fieldType;
@@ -123,8 +120,7 @@ public UnionVector(String name, BufferAllocator allocator, CallBack callBack) {
   }
 
   public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
-    this.name = name;
-    this.allocator = allocator;
+    super(name, allocator, callBack);
     this.fieldType = fieldType;
     this.internalStruct = new NonNullableStructVector(
         "internal",
@@ -134,7 +130,6 @@ public UnionVector(String name, BufferAllocator allocator, FieldType fieldType,
         AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
         false);
     this.typeBuffer = allocator.getEmpty();
-    this.callBack = callBack;
     this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
   }
 
@@ -319,6 +314,31 @@ public ListVector getList() {
     return listVector;
   }
 
+  public MapVector getMap() {
+    if (mapVector == null) {
+      throw new IllegalArgumentException("No map present. Provide ArrowType argument to create a new vector");
+    }
+    return mapVector;
+  }
+
+  public MapVector getMap(ArrowType arrowType) {
+    return getMap(null, arrowType);
+  }
+
+  public MapVector getMap(String name, ArrowType arrowType) {
+    if (mapVector == null) {
+      int vectorCount = internalStruct.size();
+      mapVector = addOrGet(name, MinorType.MAP, arrowType, MapVector.class);
+      if (internalStruct.size() > vectorCount) {
+        mapVector.allocateNew();
+        if (callBack != null) {
+          callBack.doWork();
+        }
+      }
+    }
+    return mapVector;
+  }
+
   public int getTypeValue(int index) {
     return typeBuffer.getByte(index * TYPE_WIDTH);
   }
@@ -474,7 +494,7 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
   }
 
   public FieldVector addVector(FieldVector v) {
-    String name = v.getMinorType().name().toLowerCase();
+    final String name = v.getName().isEmpty() ? fieldName(v.getMinorType()) : v.getName();
     Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
     final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
     v.makeTransferPair(newVector).transfer();
@@ -489,7 +509,7 @@ public FieldVector addVector(FieldVector v) {
    * Directly put a vector to internalStruct without creating a new one with same type.
    */
   public void directAddVector(FieldVector v) {
-    String name = v.getMinorType().name().toLowerCase();
+    String name = fieldName(v.getMinorType());
     Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
     internalStruct.putChild(name, v);
     if (callBack != null) {
@@ -647,6 +667,8 @@ public ValueVector getVectorByType(int typeId, ArrowType arrowType) {
           return getStruct();
         case LIST:
           return getList();
+        case MAP:
+          return getMap(name, arrowType);
         default:
           throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]);
       }
@@ -797,4 +819,35 @@ public String getName() {
     public String toString() {
       return ValueVectorUtility.getToString(this, 0, getValueCount());
     }
+
+    @Override
+    public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+      return internalStruct.addOrGet(name, fieldType, clazz);
+    }
+
+    @Override
+    public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+      return internalStruct.getChild(name, clazz);
+    }
+
+    @Override
+    public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+      return internalStruct.getChildVectorWithOrdinal(name);
+    }
+
+    @Override
+    public int size() {
+      return internalStruct.size();
+    }
+
+    @Override
+    public void setInitialCapacity(int valueCount, double density) {
+      for (final ValueVector vector : internalStruct) {
+        if (vector instanceof DensityAwareVector) {
+          ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+        } else {
+          vector.setInitialCapacity(valueCount);
+        }
+      }
+    }
 }
diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java
index 59322d42fde..fc4fd7dd798 100644
--- a/java/vector/src/main/codegen/templates/UnionWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionWriter.java
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 
+import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory;
 import org.apache.arrow.vector.types.Types;
 
@@ -39,6 +40,7 @@ public class UnionWriter extends AbstractFieldWriter implements FieldWriter {
   UnionVector data;
   private StructWriter structWriter;
   private UnionListWriter listWriter;
+  private UnionMapWriter mapWriter;
   private List<BaseWriter> writers = new java.util.ArrayList<>();
   private final NullableStructWriterFactory nullableStructWriterFactory;
 
@@ -82,6 +84,37 @@ public void endList() {
     getListWriter().endList();
   }
 
+  @Override
+  public void startMap() {
+    getMapWriter().startMap();
+    data.setType(idx(), MinorType.MAP);
+  }
+
+  @Override
+  public void endMap() {
+    getMapWriter().endMap();
+  }
+
+  @Override
+  public void startEntry() {
+    getMapWriter().startEntry();
+  }
+
+  @Override
+  public MapWriter key() {
+    return getMapWriter().key();
+  }
+
+  @Override
+  public MapWriter value() {
+    return getMapWriter().value();
+  }
+
+  @Override
+  public void endEntry() {
+    getMapWriter().endEntry();
+  }
+
   private StructWriter getStructWriter() {
     if (structWriter == null) {
       structWriter = nullableStructWriterFactory.build(data.getStruct());
@@ -110,6 +143,29 @@ public ListWriter asList() {
     return getListWriter();
   }
 
+  private MapWriter getMapWriter() {
+    if (mapWriter == null) {
+      mapWriter = new UnionMapWriter(data.getMap(new ArrowType.Map(false)));
+      mapWriter.setPosition(idx());
+      writers.add(mapWriter);
+    }
+    return mapWriter;
+  }
+
+  private MapWriter getMapWriter(ArrowType arrowType) {
+    if (mapWriter == null) {
+      mapWriter = new UnionMapWriter(data.getMap(arrowType));
+      mapWriter.setPosition(idx());
+      writers.add(mapWriter);
+    }
+    return mapWriter;
+  }
+
+  public MapWriter asMap(ArrowType arrowType) {
+    data.setType(idx(), MinorType.MAP);
+    return getMapWriter(arrowType);
+  }
+
   BaseWriter getWriter(MinorType minorType) {
     return getWriter(minorType, null);
   }
@@ -120,6 +176,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) {
       return getStructWriter();
     case LIST:
       return getListWriter();
+    case MAP:
+      return getMapWriter(arrowType);
     <#list vv.types as type>
       <#list type.minor as minor>
         <#assign name = minor.class?cap_first />
@@ -221,6 +279,34 @@ public StructWriter struct(String name) {
     return getStructWriter().struct(name);
   }
 
+  @Override
+  public MapWriter map() {
+    data.setType(idx(), MinorType.MAP);
+    getListWriter().setPosition(idx());
+    return getListWriter().map();
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    data.setType(idx(), MinorType.MAP);
+    getListWriter().setPosition(idx());
+    return getListWriter().map(keysSorted);
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    data.setType(idx(), MinorType.MAP);
+    getStructWriter().setPosition(idx());
+    return getStructWriter().map(name);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    data.setType(idx(), MinorType.MAP);
+    getStructWriter().setPosition(idx());
+    return getStructWriter().map(name, keysSorted);
+  }
+
   <#list vv.types as type><#list type.minor as minor>
   <#assign lowerName = minor.class?uncap_first />
   <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
index 0af9461c525..22fe4254ffd 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -23,7 +23,6 @@
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.ReferenceManager;
-import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
 import org.apache.arrow.vector.util.TransferPair;
@@ -141,7 +140,7 @@ long computeCombinedBufferSize(int valueCount, int typeWidth) {
     } else {
       bufferSize += DataSizeRoundingUtil.roundUpTo8Multiple((long) valueCount * typeWidth);
     }
-    return CommonUtil.nextPowerOfTwo(bufferSize);
+    return allocator.getRoundingPolicy().getRoundedSize(bufferSize);
   }
 
   /**
@@ -174,7 +173,7 @@ DataAndValidityBuffers allocFixedDataAndValidityBufs(int valueCount, int typeWid
     if (typeWidth == 0) {
       validityBufferSize = dataBufferSize = bufferSize / 2;
     } else {
-      // Due to roundup to power-of-2 allocation, the bufferSize could be greater than the
+      // Due to the rounding policy, the bufferSize could be greater than the
       // requested size. Utilize the allocated buffer fully.;
       long actualCount = (long) ((bufferSize * 8.0) / (8 * typeWidth + 1));
       do {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
index df17ec93dba..516077d8328 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
@@ -36,7 +36,7 @@
  * A vector that wraps an underlying vector, used to help implement extension types.
  * @param <T> The wrapped vector type.
  */
-public abstract class ExtensionTypeVector<T extends BaseValueVector & FieldVector> extends BaseValueVector implements
+public abstract class ExtensionTypeVector<T extends ValueVector & FieldVector> extends BaseValueVector implements
     FieldVector {
 
   private final T underlyingVector;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 67673051a89..c22cba43c56 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -445,7 +445,7 @@ public ArrowBuf getOffsetBuffer() {
   }
 
   @Override
-  public Object getObject(int index) {
+  public List<?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
index ef70a012ce9..31e30cc44d4 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
@@ -822,7 +822,7 @@ protected void invalidateReader() {
    * @return Object at given position
    */
   @Override
-  public Object getObject(int index) {
+  public List<?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index cd77b94e701..7e969263cb9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -699,7 +699,7 @@ protected void invalidateReader() {
    * @return Object at given position
    */
   @Override
-  public Object getObject(int index) {
+  public List<?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
index 436b4d170c3..4da2668121a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
@@ -315,7 +315,7 @@ public int getValueCapacity() {
   }
 
   @Override
-  public Object getObject(int index) {
+  public Map<String, ?> getObject(int index) {
     Map<String, Object> vv = new JsonStringHashMap<>();
     for (String child : getChildFieldNames()) {
       ValueVector v = getChild(child);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
index 60ac2432a6c..18d8eec615d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -23,6 +23,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
@@ -507,7 +508,7 @@ public ArrowBuf getOffsetBuffer() {
   }
 
   @Override
-  public Object getObject(int index) {
+  public Map<String, ?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     } else {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
index b40d13e4743..c80fcb89d0c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
@@ -21,6 +21,7 @@
 
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.complex.writer.FieldWriter;
 import org.apache.arrow.vector.holders.DenseUnionHolder;
 import org.apache.arrow.vector.holders.UnionHolder;
@@ -109,4 +110,9 @@ public void copyAsValue(DenseUnionWriter writer) {
   public void copyAsValue(ListWriter writer) {
     ComplexCopier.copy(this, (FieldWriter) writer);
   }
+
+  @Override
+  public void copyAsValue(MapWriter writer) {
+    ComplexCopier.copy(this, (FieldWriter) writer);
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
index d60e5b430f6..06b064fdaac 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.complex.writer.FieldWriter;
@@ -214,6 +215,9 @@ private void setWriter(ValueVector v) {
       case LIST:
         writer = new UnionListWriter((ListVector) vector, nullableStructWriterFactory);
         break;
+      case MAP:
+        writer = new UnionMapWriter((MapVector) vector);
+        break;
       case UNION:
         writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory);
         break;
@@ -243,13 +247,10 @@ public void setPosition(int index) {
     }
   }
 
-  protected FieldWriter getWriter(MinorType type) {
-    return getWriter(type, null);
-  }
-
+  @Override
   protected FieldWriter getWriter(MinorType type, ArrowType arrowType) {
     if (state == State.UNION) {
-      if (type == MinorType.DECIMAL) {
+      if (type == MinorType.DECIMAL || type == MinorType.MAP) {
         ((UnionWriter) writer).getWriter(type, arrowType);
       } else {
         ((UnionWriter) writer).getWriter(type);
@@ -276,7 +277,7 @@ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) {
       writer.setPosition(position);
     } else if (type != this.type) {
       promoteToUnion();
-      if (type == MinorType.DECIMAL) {
+      if (type == MinorType.DECIMAL || type == MinorType.MAP) {
         ((UnionWriter) writer).getWriter(type, arrowType);
       } else {
         ((UnionWriter) writer).getWriter(type);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
index 8825bc35edb..a888abbaa7d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
@@ -18,7 +18,9 @@
 package org.apache.arrow.vector.complex.reader;
 
 import org.apache.arrow.vector.complex.reader.BaseReader.ListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.MapReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedMapReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedStructReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.ScalarReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
@@ -28,5 +30,6 @@
  * Composite of all Reader types (e.g. {@link StructReader}, {@link ScalarReader}, etc).  Each reader type
  * is in essence a way of iterating over a {@link org.apache.arrow.vector.ValueVector}.
  */
-public interface FieldReader extends StructReader, ListReader, ScalarReader, RepeatedStructReader, RepeatedListReader {
+public interface FieldReader extends StructReader, ListReader, MapReader, ScalarReader,
+    RepeatedStructReader, RepeatedListReader, RepeatedMapReader {
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
index bf05a0ace30..a3cb7108a11 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.vector.complex.writer;
 
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
 
@@ -25,7 +26,7 @@
  * Composite of all writer types.  Writers are convenience classes for incrementally
  * adding values to {@linkplain org.apache.arrow.vector.ValueVector}s.
  */
-public interface FieldWriter extends StructWriter, ListWriter, ScalarWriter {
+public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter {
   void allocate();
 
   void clear();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
index 5d332eb8f3c..6597e0302c7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
@@ -723,8 +723,13 @@ public static MessageMetadataResult readMessage(ReadChannel in) throws IOExcepti
   public static ArrowBuf readMessageBody(ReadChannel in, long bodyLength,
       BufferAllocator allocator) throws IOException {
     ArrowBuf bodyBuffer = allocator.buffer(bodyLength);
-    if (in.readFully(bodyBuffer, bodyLength) != bodyLength) {
-      throw new IOException("Unexpected end of input trying to read batch.");
+    try {
+      if (in.readFully(bodyBuffer, bodyLength) != bodyLength) {
+        throw new IOException("Unexpected end of input trying to read batch.");
+      }
+    } catch (RuntimeException | IOException e) {
+      bodyBuffer.close();
+      throw e;
     }
     return bodyBuffer;
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
index d7fccd1ed15..01becf00794 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
@@ -31,6 +31,7 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
 import org.apache.arrow.vector.holders.NullableBigIntHolder;
 import org.apache.arrow.vector.holders.NullableBitHolder;
 import org.apache.arrow.vector.holders.NullableFloat4Holder;
@@ -346,6 +347,19 @@ public void testGetFieldTypeInfo() throws Exception {
     vector.initializeChildrenFromFields(children);
 
     assertEquals(vector.getField(), field);
+
+    // Union has 2 child vectors
+    assertEquals(vector.size(), 2);
+
+    // Check child field 0
+    VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+    assertEquals(intChild.ordinal, 0);
+    assertEquals(intChild.vector.getField(), children.get(0));
+
+    // Check child field 1
+    VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+    assertEquals(varcharChild.ordinal, 1);
+    assertEquals(varcharChild.vector.getField(), children.get(1));
   }
 
   @Test
@@ -406,8 +420,8 @@ public void testGetBufferAddress() throws Exception {
   @Test
   public void testMultipleStructs() {
     FieldType type = new FieldType(true, ArrowType.Struct.INSTANCE, null, null);
-    try (StructVector structVector1 = new StructVector("struct", allocator, type, null);
-         StructVector structVector2 = new StructVector("struct", allocator, type, null);
+    try (StructVector structVector1 = new StructVector("struct1", allocator, type, null);
+         StructVector structVector2 = new StructVector("struct2", allocator, type, null);
          DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) {
 
       // prepare sub vectors
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
index e8fc444d14a..165cb7bad3e 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
@@ -29,6 +29,7 @@
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.function.ToIntBiFunction;
 
@@ -51,8 +52,6 @@
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
 import org.apache.arrow.vector.types.pojo.FieldType;
-import org.apache.arrow.vector.util.JsonStringArrayList;
-import org.apache.arrow.vector.util.JsonStringHashMap;
 import org.apache.arrow.vector.util.Text;
 import org.junit.After;
 import org.junit.Before;
@@ -654,17 +653,17 @@ public void testEncodeListSubField() {
         assertEquals(ListVector.class, encoded.getClass());
 
         assertEquals(6, encoded.getValueCount());
-        int[] realValue1 = convertListToIntArray((JsonStringArrayList) encoded.getObject(0));
+        int[] realValue1 = convertListToIntArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
-        int[] realValue2 = convertListToIntArray((JsonStringArrayList) encoded.getObject(1));
+        int[] realValue2 = convertListToIntArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
-        int[] realValue3 = convertListToIntArray((JsonStringArrayList) encoded.getObject(2));
+        int[] realValue3 = convertListToIntArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue3));
-        int[] realValue4 = convertListToIntArray((JsonStringArrayList) encoded.getObject(3));
+        int[] realValue4 = convertListToIntArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue4));
-        int[] realValue5 = convertListToIntArray((JsonStringArrayList) encoded.getObject(4));
+        int[] realValue5 = convertListToIntArray(encoded.getObject(4));
         assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue5));
-        int[] realValue6 = convertListToIntArray((JsonStringArrayList) encoded.getObject(5));
+        int[] realValue6 = convertListToIntArray(encoded.getObject(5));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue6));
 
         // now run through the decoder and verify we get the original back
@@ -732,13 +731,13 @@ public void testEncodeFixedSizeListSubField() {
         assertEquals(FixedSizeListVector.class, encoded.getClass());
 
         assertEquals(4, encoded.getValueCount());
-        int[] realValue1 = convertListToIntArray((JsonStringArrayList) encoded.getObject(0));
+        int[] realValue1 = convertListToIntArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
-        int[] realValue2 = convertListToIntArray((JsonStringArrayList) encoded.getObject(1));
+        int[] realValue2 = convertListToIntArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
-        int[] realValue3 = convertListToIntArray((JsonStringArrayList) encoded.getObject(2));
+        int[] realValue3 = convertListToIntArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new int[] {2, 3}, realValue3));
-        int[] realValue4 = convertListToIntArray((JsonStringArrayList) encoded.getObject(3));
+        int[] realValue4 = convertListToIntArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue4));
 
         // now run through the decoder and verify we get the original back
@@ -799,15 +798,15 @@ public void testEncodeStructSubField() {
         assertEquals(StructVector.class, encoded.getClass());
 
         assertEquals(5, encoded.getValueCount());
-        Object[] realValue1 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(0));
+        Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new Object[] {0, 1}, realValue1));
-        Object[] realValue2 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(1));
+        Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new Object[] {1, 2}, realValue2));
-        Object[] realValue3 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(2));
+        Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new Object[] {2, 0}, realValue3));
-        Object[] realValue4 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(3));
+        Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new Object[] {0, 0}, realValue4));
-        Object[] realValue5 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(4));
+        Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
         assertTrue(Arrays.equals(new Object[] {3, 0}, realValue5));
 
         // now run through the decoder and verify we get the original back
@@ -856,15 +855,15 @@ public void testEncodeStructSubFieldWithCertainColumns() {
         assertEquals(StructVector.class, encoded.getClass());
 
         assertEquals(5, encoded.getValueCount());
-        Object[] realValue1 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(0));
+        Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new Object[] {0, new Text("baz")}, realValue1));
-        Object[] realValue2 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(1));
+        Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new Object[] {1, new Text("bar")}, realValue2));
-        Object[] realValue3 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(2));
+        Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new Object[] {2, new Text("foo")}, realValue3));
-        Object[] realValue4 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(3));
+        Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new Object[] {0, new Text("foo")}, realValue4));
-        Object[] realValue5 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(4));
+        Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
         assertTrue(Arrays.equals(new Object[] {3, new Text("foo")}, realValue5));
 
         // now run through the decoder and verify we get the original back
@@ -982,7 +981,7 @@ public void testDictionaryUIntOverflow() {
     }
   }
 
-  private int[] convertListToIntArray(JsonStringArrayList list) {
+  private int[] convertListToIntArray(List list) {
     int[] values = new int[list.size()];
     for (int i = 0; i < list.size(); i++) {
       values[i] = (int) list.get(i);
@@ -990,7 +989,7 @@ private int[] convertListToIntArray(JsonStringArrayList list) {
     return values;
   }
 
-  private Object[] convertMapValuesToArray(JsonStringHashMap map) {
+  private Object[] convertMapValuesToArray(Map map) {
     Object[] values = new Object[map.size()];
     Iterator valueIterator = map.values().iterator();
     for (int i = 0; i < map.size(); i++) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
index c8bb37132ca..365789e04c8 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
@@ -37,7 +37,6 @@
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.FieldType;
-import org.apache.arrow.vector.util.JsonStringArrayList;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Assert;
@@ -293,11 +292,11 @@ public void testUnionFixedSizeListWriter() throws Exception {
 
       assertEquals(3, vector1.getValueCount());
 
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      int[] realValue1 = convertListToIntArray(vector1.getObject(0));
       assertTrue(Arrays.equals(values1, realValue1));
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      int[] realValue2 = convertListToIntArray(vector1.getObject(1));
       assertTrue(Arrays.equals(values2, realValue2));
-      int[] realValue3 = convertListToIntArray((JsonStringArrayList) vector1.getObject(2));
+      int[] realValue3 = convertListToIntArray(vector1.getObject(2));
       assertTrue(Arrays.equals(values3, realValue3));
     }
   }
@@ -366,9 +365,9 @@ public void testWriteIllegalData() throws Exception {
       writer1.setValueCount(3);
 
       assertEquals(3, vector1.getValueCount());
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      int[] realValue1 = convertListToIntArray(vector1.getObject(0));
       assertTrue(Arrays.equals(values1, realValue1));
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      int[] realValue2 = convertListToIntArray(vector1.getObject(1));
       assertTrue(Arrays.equals(values2, realValue2));
     }
   }
@@ -395,9 +394,9 @@ public void testSplitAndTransfer() throws Exception {
       FixedSizeListVector targetVector = (FixedSizeListVector) transferPair.getTo();
 
       assertEquals(2, targetVector.getValueCount());
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) targetVector.getObject(0));
+      int[] realValue1 = convertListToIntArray(targetVector.getObject(0));
       assertTrue(Arrays.equals(values1, realValue1));
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) targetVector.getObject(1));
+      int[] realValue2 = convertListToIntArray(targetVector.getObject(1));
       assertTrue(Arrays.equals(values2, realValue2));
 
       targetVector.clear();
@@ -425,12 +424,12 @@ public void testZeroWidthVector() {
 
       assertEquals(4, vector1.getValueCount());
 
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      int[] realValue1 = convertListToIntArray(vector1.getObject(0));
       assertArrayEquals(values1, realValue1);
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      int[] realValue2 = convertListToIntArray(vector1.getObject(1));
       assertArrayEquals(values2, realValue2);
       assertNull(vector1.getObject(2));
-      int[] realValue4 = convertListToIntArray((JsonStringArrayList) vector1.getObject(3));
+      int[] realValue4 = convertListToIntArray(vector1.getObject(3));
       assertArrayEquals(values4, realValue4);
     }
   }
@@ -456,18 +455,18 @@ public void testVectorWithNulls() {
 
       assertEquals(4, vector1.getValueCount());
 
-      List realValue1 = (JsonStringArrayList) vector1.getObject(0);
+      List realValue1 = vector1.getObject(0);
       assertEquals(values1, realValue1);
-      List realValue2 = (JsonStringArrayList) vector1.getObject(1);
+      List realValue2 = vector1.getObject(1);
       assertEquals(values2, realValue2);
-      List realValue3 = (JsonStringArrayList) vector1.getObject(2);
+      List realValue3 = vector1.getObject(2);
       assertEquals(values3, realValue3);
-      List realValue4 = (JsonStringArrayList) vector1.getObject(3);
+      List realValue4 = vector1.getObject(3);
       assertEquals(values4, realValue4);
     }
   }
 
-  private int[] convertListToIntArray(JsonStringArrayList list) {
+  private int[] convertListToIntArray(List list) {
     int[] values = new int[list.size()];
     for (int i = 0; i < list.size(); i++) {
       values[i] = (int) list.get(i);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
index fc2a78597f3..c1d60da4d59 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
@@ -101,9 +101,9 @@ public void testCopyFrom() throws Exception {
       Object result = outVector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
       assertEquals(3, resultSet.size());
-      assertEquals(new Long(1), (Long) resultSet.get(0));
-      assertEquals(new Long(2), (Long) resultSet.get(1));
-      assertEquals(new Long(3), (Long) resultSet.get(2));
+      assertEquals(new Long(1), resultSet.get(0));
+      assertEquals(new Long(2), resultSet.get(1));
+      assertEquals(new Long(3), resultSet.get(2));
 
       /* index 1 */
       result = outVector.getObject(1);
@@ -220,37 +220,37 @@ public void testSetLastSetUsage() throws Exception {
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
-      Object actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      Long actual = dataVector.getObject(offset);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
@@ -323,7 +323,7 @@ public void testSplitAndTransfer() throws Exception {
 
       int index = 0;
       int offset = 0;
-      Object actual = null;
+      Long actual = null;
 
       /* index 0 */
       assertFalse(listVector.isNull(index));
@@ -331,13 +331,13 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       /* index 1 */
       index++;
@@ -346,10 +346,10 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       /* index 2 */
       index++;
@@ -358,16 +358,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(18), (Long) actual);
+      assertEquals(new Long(18), actual);
 
       /* index 3 */
       index++;
@@ -376,7 +376,7 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(19), (Long) actual);
+      assertEquals(new Long(19), actual);
 
       /* index 4 */
       index++;
@@ -385,16 +385,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(20), (Long) actual);
+      assertEquals(new Long(20), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(21), (Long) actual);
+      assertEquals(new Long(21), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(22), (Long) actual);
+      assertEquals(new Long(22), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(23), (Long) actual);
+      assertEquals(new Long(23), actual);
 
       /* index 5 */
       index++;
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index b684efd86c4..ffeedf04d03 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -219,37 +219,37 @@ public void testSetLastSetUsage() throws Exception {
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
-      Object actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      Long actual = dataVector.getObject(offset);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
@@ -322,7 +322,7 @@ public void testSplitAndTransfer() throws Exception {
 
       int index = 0;
       int offset = 0;
-      Object actual = null;
+      Long actual = null;
 
       /* index 0 */
       assertFalse(listVector.isNull(index));
@@ -330,13 +330,13 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       /* index 1 */
       index++;
@@ -345,10 +345,10 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       /* index 2 */
       index++;
@@ -357,16 +357,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(18), (Long) actual);
+      assertEquals(new Long(18), actual);
 
       /* index 3 */
       index++;
@@ -375,7 +375,7 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(19), (Long) actual);
+      assertEquals(new Long(19), actual);
 
       /* index 4 */
       index++;
@@ -384,16 +384,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(20), (Long) actual);
+      assertEquals(new Long(20), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(21), (Long) actual);
+      assertEquals(new Long(21), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(22), (Long) actual);
+      assertEquals(new Long(22), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(23), (Long) actual);
+      assertEquals(new Long(23), actual);
 
       /* index 5 */
       index++;
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
index 3b85ed6fe69..9637021dbda 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
@@ -33,6 +33,7 @@
 import org.apache.arrow.vector.complex.impl.UnionMapWriter;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.FieldType;
@@ -341,15 +342,15 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(10L, getResultKey(result));
       assertEquals(1.0, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(11L, getResultKey(result));
       assertEquals(1.1, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(12L, getResultKey(result));
       assertEquals(1.2, getResultValue(result));
 
@@ -359,11 +360,11 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(13L, getResultKey(result));
       assertEquals(1.3, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(14L, getResultKey(result));
       assertEquals(1.4, getResultValue(result));
 
@@ -373,19 +374,19 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(15L, getResultKey(result));
       assertEquals(1.5, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(16L, getResultKey(result));
       assertEquals(1.6, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(17L, getResultKey(result));
       assertEquals(1.7, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(18L, getResultKey(result));
       assertEquals(1.8, getResultValue(result));
 
@@ -395,7 +396,7 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(19L, getResultKey(result));
       assertEquals(1.9, getResultValue(result));
 
@@ -405,19 +406,19 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(20L, getResultKey(result));
       assertEquals(2.0, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(21L, getResultKey(result));
       assertEquals(2.1, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(22L, getResultKey(result));
       assertEquals(2.2, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(23L, getResultKey(result));
       assertEquals(2.3, getResultValue(result));
 
@@ -624,6 +625,416 @@ public void testMapWithListValue() throws Exception {
     }
   }
 
+  @Test
+  public void testMapWithMapValue() throws Exception {
+    try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+      UnionMapWriter mapWriter = mapVector.getWriter();
+      MapWriter valueWriter;
+
+      // we are essentially writing Map<Long, Map<Long, Long>>
+      // populate map vector with the following three records
+      // [
+      //    null,
+      //    [1:[50: 100, 200:400], 2:[75: 175, 150: 250]],
+      //    [3:[10: 20], 4:[15: 20], 5:[25: 30, 35: null]]
+      // ]
+
+      /* write null at index 0 */
+      mapWriter.setPosition(0);
+      mapWriter.writeNull();
+
+      /* write one or more maps at index 1 */
+      mapWriter.setPosition(1);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(1);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 50, 100L);
+      writeEntry(valueWriter, 200, 400L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(2);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 75, 175L);
+      writeEntry(valueWriter, 150, 250L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      /* write one or more maps at index 2 */
+      mapWriter.setPosition(2);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(3);
+      valueWriter = mapWriter.value().map(true);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 10, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(4);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 15, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(5);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 25, 30L);
+      writeEntry(valueWriter, 35, (Long) null);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      assertEquals(2, mapVector.getLastSet());
+
+      mapWriter.setValueCount(3);
+
+      assertEquals(3, mapVector.getValueCount());
+
+      // Get mapVector element at index 0
+      Object result = mapVector.getObject(0);
+      assertNull(result);
+
+      // Get mapVector element at index 1
+      result = mapVector.getObject(1);
+      ArrayList<?> resultSet = (ArrayList<?>) result;
+
+      // 2 map entries at index 0
+      assertEquals(2, resultSet.size());
+
+      // First Map entry
+      Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+      assertEquals(1L, getResultKey(resultStruct));
+      ArrayList<Map<?, ?>> list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of 2 two maps
+      Map<?, ?> innerMap = list.get(0);
+      assertEquals(50L, getResultKey(innerMap));
+      assertEquals(100L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(200L, getResultKey(innerMap));
+      assertEquals(400L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(1);
+      assertEquals(2L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(75L, getResultKey(innerMap));
+      assertEquals(175L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(150L, getResultKey(innerMap));
+      assertEquals(250L, getResultValue(innerMap));
+
+      // Get mapVector element at index 2
+      result = mapVector.getObject(2);
+      resultSet = (ArrayList<?>) result;
+
+      // 3 map entries at index 1
+      assertEquals(3, resultSet.size());
+
+      // First Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(0);
+      assertEquals(3L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(10L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(1);
+      assertEquals(4L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(15L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Third Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(2);
+      assertEquals(5L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of maps with 2 elements
+      innerMap = list.get(0);
+      assertEquals(25L, getResultKey(innerMap));
+      assertEquals(30L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(35L, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      /* check underlying bitVector */
+      assertTrue(mapVector.isNull(0));
+      assertFalse(mapVector.isNull(1));
+      assertFalse(mapVector.isNull(2));
+
+      /* check underlying offsets */
+      final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+      /* mapVector has 0 entries at index 0, 2 entries at index 1, and 3 entries at index 2 */
+      assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+      assertEquals(2, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+      assertEquals(5, offsetBuffer.getInt(3 * MapVector.OFFSET_WIDTH));
+    }
+  }
+
+  @Test
+  public void testMapWithMapKeyAndMapValue() throws Exception {
+    try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+      UnionMapWriter mapWriter = mapVector.getWriter();
+      MapWriter keyWriter;
+      MapWriter valueWriter;
+
+      // we are essentially writing Map<Map<Integer, Integer>, Map<Long, Long>>
+      // populate map vector with the following two records
+      // [
+      //    [[5: 10, 20: 40]:[50: 100, 200: 400], [50: 100]:[75: 175, 150: 250]],
+      //    [[1: 2]:[10: 20], [30: 40]:[15: 20], [50: 60, 70: null]:[25: 30, 35: null], [5: null]: null]
+      // ]
+
+      mapWriter.setPosition(0);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 5, 10);
+      writeEntry(keyWriter, 20, 40);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 50, 100L);
+      writeEntry(valueWriter, 200, 400L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 50, 100);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 75, 175L);
+      writeEntry(valueWriter, 150, 250L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      /* write one or more maps at index 1 */
+      mapWriter.setPosition(1);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 1, 2);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(true);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 10, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 30, 40);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 15, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 50, 60);
+      writeEntry(keyWriter, 70, (Integer) null);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 25, 30L);
+      writeEntry(valueWriter, 35, (Long) null);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 5, (Integer) null);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.writeNull();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      assertEquals(1, mapVector.getLastSet());
+
+      mapWriter.setValueCount(2);
+
+      assertEquals(2, mapVector.getValueCount());
+
+      // Get mapVector element at index 0
+      Object result = mapVector.getObject(0);
+      ArrayList<?> resultSet = (ArrayList<?>) result;
+
+      // 2 map entries at index 0
+      assertEquals(2, resultSet.size());
+
+      // First Map entry
+      Map<?, ArrayList<Map<?, ?>>> resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+      ArrayList<Map<?, ?>> list = getResultKey(resultStruct);
+      assertEquals(2, list.size()); // key is a list of 2 two maps
+      Map<?, ?> innerMap = list.get(0);
+      assertEquals(5, getResultKey(innerMap));
+      assertEquals(10, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(20, getResultKey(innerMap));
+      assertEquals(40, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of 2 two maps
+      innerMap = list.get(0);
+      assertEquals(50L, getResultKey(innerMap));
+      assertEquals(100L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(200L, getResultKey(innerMap));
+      assertEquals(400L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of 1 two map
+      innerMap = list.get(0);
+      assertEquals(50, getResultKey(innerMap));
+      assertEquals(100, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(75L, getResultKey(innerMap));
+      assertEquals(175L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(150L, getResultKey(innerMap));
+      assertEquals(250L, getResultValue(innerMap));
+
+      // Get mapVector element at index 1
+      result = mapVector.getObject(1);
+      resultSet = (ArrayList<?>) result;
+
+      // 4 map entries at index 1
+      assertEquals(4, resultSet.size());
+
+      // First Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of 1 map
+      innerMap = list.get(0);
+      assertEquals(1, getResultKey(innerMap));
+      assertEquals(2, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(10L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of 1 map
+      innerMap = list.get(0);
+      assertEquals(30, getResultKey(innerMap));
+      assertEquals(40, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(15L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Third Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(2);
+      list = getResultKey(resultStruct);
+      assertEquals(2, list.size()); // key is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(50, getResultKey(innerMap));
+      assertEquals(60, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(70, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      list = getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of maps with 2 elements
+      innerMap = list.get(0);
+      assertEquals(25L, getResultKey(innerMap));
+      assertEquals(30L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(35L, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      // Fourth Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(3);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(5, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      assertNull(resultStruct.get(MapVector.VALUE_NAME));
+
+      /* check underlying bitVector */
+      assertFalse(mapVector.isNull(0));
+      assertFalse(mapVector.isNull(1));
+
+      /* check underlying offsets */
+      final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+      /* mapVector has 2 entries at index 0 and 4 entries at index 1 */
+      assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+      assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+      assertEquals(6, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+    }
+  }
+
+  private void writeEntry(MapWriter writer, long key, Long value) {
+    writer.startEntry();
+    writer.key().bigInt().writeBigInt(key);
+    if (value != null) {
+      writer.value().bigInt().writeBigInt(value);
+    }
+    writer.endEntry();
+  }
+
+  private void writeEntry(MapWriter writer, int key, Integer value) {
+    writer.startEntry();
+    writer.key().integer().writeInt(key);
+    if (value != null) {
+      writer.value().integer().writeInt(value);
+    }
+    writer.endEntry();
+  }
+
   @Test
   public void testClearAndReuse() {
     try (final MapVector vector = MapVector.empty("map", allocator, false)) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
index 15d81ab6799..962c233889d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
@@ -29,7 +29,10 @@
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
 import org.apache.arrow.vector.holders.NullableBitHolder;
 import org.apache.arrow.vector.holders.NullableFloat4Holder;
 import org.apache.arrow.vector.holders.NullableIntHolder;
@@ -91,6 +94,67 @@ public void testUnionVector() throws Exception {
     }
   }
 
+  @Test
+  public void testUnionVectorMapValue() throws Exception {
+    try (UnionVector unionVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) {
+      unionVector.allocateNew();
+
+      UnionWriter writer = (UnionWriter) unionVector.getWriter();
+
+      // populate map vector with the following two records
+      // [
+      //    null,
+      //    [[1: 2], [3: 4], [5: null]]
+      // ]
+
+      writer.setPosition(0);
+      writer.writeNull();
+
+      writer.setPosition(1);
+      writer.startMap();
+
+      writer.startEntry();
+      writer.key().integer().writeInt(1);
+      writer.value().integer().writeInt(2);
+      writer.endEntry();
+
+      writer.startEntry();
+      writer.key().integer().writeInt(3);
+      writer.value().integer().writeInt(4);
+      writer.endEntry();
+
+      writer.startEntry();
+      writer.key().integer().writeInt(5);
+      writer.endEntry();
+
+      writer.endMap();
+
+      unionVector.setValueCount(2);
+
+      // check that what we wrote is correct
+      assertEquals(2, unionVector.getValueCount());
+
+      // first entry
+      assertNull(unionVector.getObject(0));
+
+      // second entry
+      List<Map<String, Integer>> resultList = (List<Map<String, Integer>>) unionVector.getObject(1);
+      assertEquals(3, resultList.size());
+
+      Map<String, Integer> resultMap = resultList.get(0);
+      assertEquals(1, (int) resultMap.get(MapVector.KEY_NAME));
+      assertEquals(2, (int) resultMap.get(MapVector.VALUE_NAME));
+
+      resultMap = resultList.get(1);
+      assertEquals(3, (int) resultMap.get(MapVector.KEY_NAME));
+      assertEquals(4, (int) resultMap.get(MapVector.VALUE_NAME));
+
+      resultMap = resultList.get(2);
+      assertEquals(5, (int) resultMap.get(MapVector.KEY_NAME));
+      assertNull(resultMap.get(MapVector.VALUE_NAME));
+    }
+  }
+
   @Test
   public void testTransfer() throws Exception {
     try (UnionVector srcVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) {
@@ -329,6 +393,19 @@ public void testGetFieldTypeInfo() throws Exception {
     vector.initializeChildrenFromFields(children);
 
     assertTrue(vector.getField().equals(field));
+
+    // Union has 2 child vectors
+    assertEquals(vector.size(), 2);
+
+    // Check child field 0
+    VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+    assertEquals(intChild.ordinal, 0);
+    assertEquals(intChild.vector.getField(), children.get(0));
+
+    // Check child field 1
+    VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+    assertEquals(varcharChild.ordinal, 1);
+    assertEquals(varcharChild.vector.getField(), children.get(1));
   }
 
   @Test
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
index b9e7c8661a7..dfc75ec8e34 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
@@ -18,12 +18,16 @@
 package org.apache.arrow.vector;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.util.Arrays;
 import java.util.Collections;
 
+import org.apache.arrow.memory.AllocationListener;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.Types.MinorType;
@@ -42,14 +46,19 @@
 public class TestVectorAlloc {
   private BufferAllocator rootAllocator;
 
+  private BufferAllocator policyAllocator;
+
   @Before
   public void init() {
     rootAllocator = new RootAllocator(Long.MAX_VALUE);
+    policyAllocator =
+        new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy());
   }
 
   @After
   public void terminate() throws Exception {
     rootAllocator.close();
+    policyAllocator.close();
   }
 
   private static Field field(String name, ArrowType type) {
@@ -103,4 +112,58 @@ public void testVectorAllocWithField() {
       }
     }
   }
+
+  private static final int CUSTOM_SEGMENT_SIZE = 200;
+
+  /**
+   * A custom rounding policy that rounds the size to
+   * the next multiple of 200.
+   */
+  private static class CustomPolicy implements RoundingPolicy {
+
+    @Override
+    public long getRoundedSize(long requestSize) {
+      return (requestSize + CUSTOM_SEGMENT_SIZE - 1) / CUSTOM_SEGMENT_SIZE * CUSTOM_SEGMENT_SIZE;
+    }
+  }
+
+  @Test
+  public void testFixedWidthVectorAllocation() {
+    try (IntVector vec1 = new IntVector("vec", policyAllocator);
+        IntVector vec2 = new IntVector("vec", rootAllocator)) {
+      assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+      vec1.allocateNew(50);
+      long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getDataBuffer().capacity();
+
+      // the total capacity must be a multiple of the segment size
+      assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+      assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+      vec2.allocateNew(50);
+      totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getDataBuffer().capacity();
+
+      // the total capacity must be a power of two
+      assertEquals(totalCapacity & (totalCapacity - 1), 0);
+    }
+  }
+
+  @Test
+  public void testVariableWidthVectorAllocation() {
+    try (VarCharVector vec1 = new VarCharVector("vec", policyAllocator);
+         VarCharVector vec2 = new VarCharVector("vec", rootAllocator)) {
+      assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+      vec1.allocateNew(50);
+      long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getOffsetBuffer().capacity();
+
+      // the total capacity must be a multiple of the segment size
+      assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+      assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+      vec2.allocateNew(50);
+      totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getOffsetBuffer().capacity();
+
+      // the total capacity must be a power of two
+      assertEquals(totalCapacity & (totalCapacity - 1), 0);
+    }
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index 043022e96b2..d44ada2f30f 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -36,6 +36,7 @@
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.SchemaChangeCallBack;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -44,6 +45,7 @@
 import org.apache.arrow.vector.complex.impl.SingleStructWriter;
 import org.apache.arrow.vector.complex.impl.UnionListReader;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
 import org.apache.arrow.vector.complex.impl.UnionReader;
 import org.apache.arrow.vector.complex.impl.UnionWriter;
 import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
@@ -54,6 +56,7 @@
 import org.apache.arrow.vector.complex.reader.IntReader;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
 import org.apache.arrow.vector.holders.DecimalHolder;
 import org.apache.arrow.vector.holders.IntHolder;
@@ -546,6 +549,59 @@ private void checkUnionList(ListVector listVector) {
     }
   }
 
+  @Test
+  public void testListMapType() {
+    try (ListVector listVector = ListVector.empty("list", allocator)) {
+      listVector.allocateNew();
+      UnionListWriter listWriter = new UnionListWriter(listVector);
+      MapWriter innerMapWriter = listWriter.map(true);
+
+      for (int i = 0; i < COUNT; i++) {
+        listWriter.startList();
+        for (int j = 0; j < i % 7; j++) {
+          innerMapWriter.startMap();
+          for (int k = 0; k < i % 13; k++) {
+            innerMapWriter.startEntry();
+            innerMapWriter.key().integer().writeInt(k);
+            if (k % 2 == 0) {
+              innerMapWriter.value().bigInt().writeBigInt(k);
+            }
+            innerMapWriter.endEntry();
+          }
+          innerMapWriter.endMap();
+        }
+        listWriter.endList();
+      }
+      listWriter.setValueCount(COUNT);
+      checkListMap(listVector);
+
+      // Verify that the map vector has keysSorted = true
+      MapVector mapVector = (MapVector) listVector.getDataVector();
+      ArrowType arrowType = mapVector.getField().getFieldType().getType();
+      assertTrue(((ArrowType.Map) arrowType).getKeysSorted());
+    }
+  }
+
+  private void checkListMap(ListVector listVector) {
+    UnionListReader listReader = new UnionListReader(listVector);
+    for (int i = 0; i < COUNT; i++) {
+      listReader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        listReader.next();
+        UnionMapReader mapReader = (UnionMapReader) listReader.reader();
+        for (int k = 0; k < i % 13; k++) {
+          mapReader.next();
+          Assert.assertEquals("record key: " + i, k, mapReader.key().readInteger().intValue());
+          if (k % 2 == 0) {
+            Assert.assertEquals("record value: " + i, k, mapReader.value().readLong().longValue());
+          } else {
+            Assert.assertNull("record value: " + i, mapReader.value().readLong());
+          }
+        }
+      }
+    }
+  }
+
   @Test
   public void simpleUnion() {
     UnionVector vector = new UnionVector("union", allocator, null);
@@ -1022,6 +1078,7 @@ public void testSingleStructWriter1() {
       Float4Writer float4Writer = singleStructWriter.float4("float4Field");
       Float8Writer float8Writer = singleStructWriter.float8("float8Field");
       ListWriter listWriter = singleStructWriter.list("listField");
+      MapWriter mapWriter = singleStructWriter.map("mapField", false);
 
       int intValue = 100;
       long bigIntValue = 10000;
@@ -1044,6 +1101,18 @@ public void testSingleStructWriter1() {
         listWriter.integer().writeInt(intValue + i + 3);
         listWriter.endList();
 
+        mapWriter.setPosition(i);
+        mapWriter.startMap();
+        mapWriter.startEntry();
+        mapWriter.key().integer().writeInt(intValue + i);
+        mapWriter.value().integer().writeInt(intValue + i + 1);
+        mapWriter.endEntry();
+        mapWriter.startEntry();
+        mapWriter.key().integer().writeInt(intValue + i + 2);
+        mapWriter.value().integer().writeInt(intValue + i + 3);
+        mapWriter.endEntry();
+        mapWriter.endMap();
+
         singleStructWriter.end();
       }
 
@@ -1070,6 +1139,7 @@ public void testSingleStructWriter1() {
       Float4Reader float4Reader = singleStructReader.reader("float4Field");
       Float8Reader float8Reader = singleStructReader.reader("float8Field");
       UnionListReader listReader = (UnionListReader) singleStructReader.reader("listField");
+      UnionMapReader mapReader = (UnionMapReader) singleStructReader.reader("mapField");
 
       for (int i = 0; i < initialCapacity; i++) {
         intReader.setPosition(i);
@@ -1077,6 +1147,7 @@ public void testSingleStructWriter1() {
         float4Reader.setPosition(i);
         float8Reader.setPosition(i);
         listReader.setPosition(i);
+        mapReader.setPosition(i);
 
         assertEquals(intValue + i, intReader.readInteger().intValue());
         assertEquals(bigIntValue + (long) i, bigIntReader.readLong().longValue());
@@ -1087,6 +1158,12 @@ public void testSingleStructWriter1() {
           listReader.next();
           assertEquals(intValue + i + j, listReader.reader().readInteger().intValue());
         }
+
+        for (int k = 0; k < 4; k += 2) {
+          mapReader.next();
+          assertEquals(intValue + k + i, mapReader.key().readInteger().intValue());
+          assertEquals(intValue + k + i + 1, mapReader.value().readInteger().intValue());
+        }
       }
     }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
index f1fcb830267..383331b691c 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
@@ -648,8 +648,7 @@ protected void validateVarBinary(int count, VectorSchemaRoot root) {
     int numVarBinaryValues = 0;
     for (int i = 0; i < count; i++) {
       expectedArray[i] = (byte) i;
-      Object obj = listVector.getObject(i);
-      List<?> objList = (List) obj;
+      List<?> objList = listVector.getObject(i);
       if (i % 3 == 0) {
         Assert.assertTrue(objList.isEmpty());
       } else {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
index ae18fab743e..11b8d4fadd1 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -29,6 +30,7 @@
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.channels.Channels;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
@@ -72,8 +74,8 @@ private int intToByteRoundtrip(int v, byte[] bytes) {
   @Test
   public void testIntToBytes() {
     byte[] bytes = new byte[4];
-    int[] values = new int[] {1, 15, 1 << 8, 1 << 16, Integer.MAX_VALUE};
-    for (int v: values) {
+    int[] values = new int[]{1, 15, 1 << 8, 1 << 16, Integer.MAX_VALUE};
+    for (int v : values) {
       assertEquals(intToByteRoundtrip(v, bytes), v);
     }
   }
@@ -157,9 +159,9 @@ public void testSchemaDictionaryMessageSerialization() throws IOException {
 
   @Test
   public void testSerializeRecordBatchV4() throws IOException {
-    byte[] validity = new byte[] {(byte) 255, 0};
+    byte[] validity = new byte[]{(byte) 255, 0};
     // second half is "undefined"
-    byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+    byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
 
     BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
     ArrowBuf validityb = buf(alloc, validity);
@@ -181,10 +183,10 @@ public void testSerializeRecordBatchV4() throws IOException {
   }
 
   @Test
-  public void testSerializeRecordBatchV5() throws IOException {
-    byte[] validity = new byte[] {(byte) 255, 0};
+  public void testSerializeRecordBatchV5() throws Exception {
+    byte[] validity = new byte[]{(byte) 255, 0};
     // second half is "undefined"
-    byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+    byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
 
     BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
     ArrowBuf validityb = buf(alloc, validity);
@@ -197,12 +199,30 @@ public void testSerializeRecordBatchV5() throws IOException {
     IpcOption option = new IpcOption(false, MetadataVersion.V5);
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option);
+    validityb.close();
+    valuesb.close();
+    batch.close();
+
+    {
+      ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+      ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+      ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
+      assertEquals(ArrowRecordBatch.class, deserialized.getClass());
+      verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+      deserialized.close();
+    }
 
-    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
-    ReadChannel channel = new ReadChannel(Channels.newChannel(in));
-    ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
-    assertEquals(ArrowRecordBatch.class, deserialized.getClass());
-    verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+    {
+      byte[] validBytes = out.toByteArray();
+      byte[] missingBytes = Arrays.copyOfRange(validBytes, /*from=*/0, validBytes.length - 1);
+
+      ByteArrayInputStream in = new ByteArrayInputStream(missingBytes);
+      ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+
+      assertThrows(IOException.class, () -> MessageSerializer.deserializeMessageBatch(channel, alloc));
+    }
+
+    alloc.close();
   }
 
   public static Schema testSchema() {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
index 3e652b9bb06..53f009cb761 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
@@ -38,9 +38,12 @@
 import org.apache.arrow.vector.ExtensionTypeVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.ipc.ArrowFileReader;
 import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
 import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
 import org.junit.Assert;
 import org.junit.Test;
@@ -171,6 +174,62 @@ public void testNullCheck() {
     assertTrue(e.getMessage().contains("underlyingVector can not be null."));
   }
 
+  /**
+   * Test that a custom Location type can be round-tripped through a temporary file.
+   */
+  @Test
+  public void roundtripLocation() throws IOException {
+    ExtensionTypeRegistry.register(new LocationType());
+    final Schema schema = new Schema(Collections.singletonList(Field.nullable("location", new LocationType())));
+    try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+         final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+      LocationVector vector = (LocationVector) root.getVector("location");
+      vector.allocateNew();
+      vector.set(0, 34.073814f, -118.240784f);
+      vector.set(2, 37.768056f, -122.3875f);
+      vector.set(3, 40.739716f, -73.840782f);
+      vector.setValueCount(4);
+      root.setRowCount(4);
+
+      final File file = File.createTempFile("locationtest", ".arrow");
+      try (final WritableByteChannel channel = FileChannel
+              .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+           final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+        writer.start();
+        writer.writeBatch();
+        writer.end();
+      }
+
+      try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+           final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+        reader.loadNextBatch();
+        final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+        Assert.assertEquals(root.getSchema(), readerRoot.getSchema());
+
+        final Field field = readerRoot.getSchema().getFields().get(0);
+        final LocationType expectedType = new LocationType();
+        Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+                expectedType.extensionName());
+        Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+                expectedType.serialize());
+
+        final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0);
+        Assert.assertTrue(deserialized instanceof LocationVector);
+        Assert.assertEquals(deserialized.getName(), "location");
+        StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector();
+        Assert.assertNotNull(deserStruct.getChild("Latitude"));
+        Assert.assertNotNull(deserStruct.getChild("Longitude"));
+        Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+        for (int i = 0; i < vector.getValueCount(); i++) {
+          Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+          if (!vector.isNull(i)) {
+            Assert.assertEquals(vector.getObject(i), deserialized.getObject(i));
+          }
+        }
+      }
+    }
+  }
+
   static class UuidType extends ExtensionType {
 
     @Override
@@ -205,7 +264,6 @@ public String serialize() {
     public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
       return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16));
     }
-
   }
 
   static class UuidVector extends ExtensionTypeVector<FixedSizeBinaryVector> {
@@ -237,4 +295,78 @@ public void set(int index, UUID uuid) {
       getUnderlyingVector().set(index, bb.array());
     }
   }
+
+  static class LocationType extends ExtensionType {
+
+    @Override
+    public ArrowType storageType() {
+      return Struct.INSTANCE;
+    }
+
+    @Override
+    public String extensionName() {
+      return "location";
+    }
+
+    @Override
+    public boolean extensionEquals(ExtensionType other) {
+      return other instanceof LocationType;
+    }
+
+    @Override
+    public ArrowType deserialize(ArrowType storageType, String serializedData) {
+      if (!storageType.equals(storageType())) {
+        throw new UnsupportedOperationException("Cannot construct LocationType from underlying type " + storageType);
+      }
+      return new LocationType();
+    }
+
+    @Override
+    public String serialize() {
+      return "";
+    }
+
+    @Override
+    public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+      return new LocationVector(name, allocator);
+    }
+  }
+
+  static class LocationVector extends ExtensionTypeVector<StructVector> {
+
+    private static StructVector buildUnderlyingVector(String name, BufferAllocator allocator) {
+      final StructVector underlyingVector =
+              new StructVector(name, allocator, FieldType.nullable(ArrowType.Struct.INSTANCE), null);
+      underlyingVector.addOrGet("Latitude",
+              FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+      underlyingVector.addOrGet("Longitude",
+              FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+      return underlyingVector;
+    }
+
+    public LocationVector(String name, BufferAllocator allocator) {
+      super(name, allocator, buildUnderlyingVector(name, allocator));
+    }
+
+    @Override
+    public int hashCode(int index) {
+      return hashCode(index, null);
+    }
+
+    @Override
+    public int hashCode(int index, ArrowBufHasher hasher) {
+      return getUnderlyingVector().hashCode(index, hasher);
+    }
+
+    @Override
+    public java.util.Map<String, ?> getObject(int index) {
+      return getUnderlyingVector().getObject(index);
+    }
+
+    public void set(int index, float latitude, float longitude) {
+      getUnderlyingVector().getChild("Latitude", Float4Vector.class).set(index, latitude);
+      getUnderlyingVector().getChild("Longitude", Float4Vector.class).set(index, longitude);
+      getUnderlyingVector().setIndexDefined(index);
+    }
+  }
 }
diff --git a/js/.eslintignore b/js/.eslintignore
new file mode 100644
index 00000000000..94ef668a61c
--- /dev/null
+++ b/js/.eslintignore
@@ -0,0 +1,5 @@
+.eslintrc.js
+gulp
+jest.config.js
+jestconfigs
+targets
diff --git a/js/.eslintrc.js b/js/.eslintrc.js
new file mode 100644
index 00000000000..6d5020db10b
--- /dev/null
+++ b/js/.eslintrc.js
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    env: {
+        browser: true,
+        es6: true,
+        node: true,
+    },
+    parser: "@typescript-eslint/parser",
+    parserOptions: {
+        project: "tsconfig.json",
+        sourceType: "module",
+        ecmaVersion: 2020,
+    },
+    plugins: ["@typescript-eslint", "jest"],
+    extends: [
+        "eslint:recommended",
+        "plugin:jest/recommended",
+        "plugin:jest/style",
+        "plugin:@typescript-eslint/recommended",
+    ],
+    rules: {
+        "@typescript-eslint/member-delimiter-style": [
+            "error",
+            {
+                multiline: {
+                    delimiter: "semi",
+                    requireLast: true,
+                },
+                singleline: {
+                    delimiter: "semi",
+                    requireLast: false,
+                },
+            },
+        ],
+        "@typescript-eslint/no-namespace": ["error", { "allowDeclarations": true }],
+        "@typescript-eslint/no-require-imports": "error",
+        "@typescript-eslint/no-var-requires": "off",  // handled by rule above
+        "@typescript-eslint/quotes": [
+            "error",
+            "single",
+            {
+                avoidEscape: true,
+                allowTemplateLiterals: true
+            },
+        ],
+        "@typescript-eslint/semi": ["error", "always"],
+        "@typescript-eslint/type-annotation-spacing": "error",
+        "@typescript-eslint/indent": "off",
+        "@typescript-eslint/no-empty-function": "off",
+        "@typescript-eslint/no-unused-expressions": "off",
+        "@typescript-eslint/no-use-before-define": "off",
+        "@typescript-eslint/explicit-module-boundary-types": "off",
+        "@typescript-eslint/no-explicit-any": "off",
+        "@typescript-eslint/no-misused-new": "off",
+        "@typescript-eslint/ban-ts-comment": "off",
+        "@typescript-eslint/no-non-null-assertion": "off",
+        "@typescript-eslint/no-unused-vars": "off",  // ts already takes care of this
+
+        "prefer-const": ["error", {
+            "destructuring": "all"
+        }],
+        "curly": ["error", "multi-line"],
+        "brace-style": ["error", "1tbs", { "allowSingleLine": true }],
+        "eol-last": "error",
+        "no-multiple-empty-lines": "error",
+        "no-trailing-spaces": "error",
+        "no-var": "error",
+        "no-empty": "off",
+        "no-cond-assign": "off"
+    },
+};
diff --git a/js/.gitignore b/js/.gitignore
index 9a11ab8f2cb..799f789d64d 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -23,9 +23,6 @@ npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 
-.vscode/**
-!.vscode/launch.json
-
 # Runtime data
 pids
 *.pid
@@ -41,12 +38,6 @@ coverage
 # nyc test coverage
 .nyc_output
 
-# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
-.grunt
-
-# Bower dependency directory (https://bower.io/)
-bower_components
-
 # node-waf configuration
 .lock-wscript
 
@@ -89,3 +80,6 @@ test/data/**/*.arrow
 
 # jest snapshots (too big)
 test/__snapshots__/
+
+# VSCode
+!.vscode
diff --git a/js/.npmrc b/js/.npmrc
index 5536efc09ce..e55040abad7 100644
--- a/js/.npmrc
+++ b/js/.npmrc
@@ -1,2 +1,7 @@
+fund=false
+audit=false
 save-prefix=
+save-exact=true
 engine-strict=true
+update-notifier=false
+registry=https://registry.npmjs.org/
diff --git a/js/.vscode/extensions.json b/js/.vscode/extensions.json
new file mode 100644
index 00000000000..1cb01b6b9fe
--- /dev/null
+++ b/js/.vscode/extensions.json
@@ -0,0 +1,6 @@
+{
+    "recommendations": [
+        "dbaeumer.vscode-eslint",
+        "augustocdias.tasks-shell-input",
+    ]
+}
diff --git a/js/.vscode/launch.json b/js/.vscode/launch.json
index 43851ba5358..ae72e1f4850 100644
--- a/js/.vscode/launch.json
+++ b/js/.vscode/launch.json
@@ -1,182 +1,240 @@
 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug Gulp Build",
-            "program": "${workspaceFolder}/node_modules/gulp/bin/gulp.js",
-            "args": [
-                "build",
-                // Specify we want to debug the "src" target, which won't clean or build -- essentially a "dry-run" of the gulp build
-                "--target", "src"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug Unit Tests",
-            "cwd": "${workspaceRoot}",
-            "program": "${workspaceFolder}/node_modules/.bin/jest",
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "env": {
-                "NODE_NO_WARNINGS": "1",
-                "READABLE_STREAM": "disable",
-                "TEST_DOM_STREAMS": "true",
-                "TEST_NODE_STREAMS": "true",
-                // Modify these environment variables to run tests on a specific compilation target + module format combo
-                "TEST_TS_SOURCE": "true",
-                // "TEST_TS_SOURCE": "false",
-                // "TEST_TARGET": "es5",
-                // "TEST_MODULE": "umd"
-            },
-            "args": [
-                "-i",
-                "test/unit/",
-                // "test/unit/builders/",
-
-                // Uncomment any of these to run individual test suites
-                // "test/unit/builders/builder-tests.ts",
-                // "test/unit/builders/int64-tests.ts",
-                // "test/unit/builders/uint64-tests.ts",
-                // "test/unit/builders/date-tests.ts",
-                // "test/unit/builders/primitive-tests.ts",
-                // "test/unit/builders/dictionary-tests.ts",
-                // "test/unit/builders/utf8-tests.ts",
-
-                // "test/unit/int-tests.ts",
-                // "test/unit/math-tests.ts",
-                // "test/unit/table-tests.ts",
-                // "test/unit/generated-data-tests.ts",
-
-                // "test/unit/table/assign-tests.ts",
-                // "test/unit/table/serialize-tests.ts",
-                // "test/unit/recordbatch/record-batch-tests.ts",
-
-                // "test/unit/vector/vector-tests.ts",
-                // "test/unit/vector/bool-vector-tests.ts",
-                // "test/unit/vector/date-vector-tests.ts",
-                // "test/unit/vector/numeric-vector-tests.ts",
-
-                // "test/unit/visitor-tests.ts",
-
-                // "test/unit/ipc/message-reader-tests.ts",
-                // "test/unit/ipc/reader/file-reader-tests.ts",
-                // "test/unit/ipc/reader/json-reader-tests.ts",
-                // "test/unit/ipc/reader/from-inference-tests.ts",
-                // "test/unit/ipc/reader/stream-reader-tests.ts",
-                // "test/unit/ipc/reader/streams-dom-tests.ts",
-                // "test/unit/ipc/reader/streams-node-tests.ts",
-                // "test/unit/ipc/writer/file-writer-tests.ts",
-                // "test/unit/ipc/writer/json-writer-tests.ts",
-                // "test/unit/ipc/writer/stream-writer-tests.ts",
-                // "test/unit/ipc/writer/streams-dom-tests.ts",
-                // "test/unit/ipc/writer/streams-node-tests.ts",
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug Integration Tests",
-            "cwd": "${workspaceRoot}",
-            "program": "${workspaceFolder}/bin/integration.js",
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "env": {
-                "NODE_NO_WARNINGS": "1",
-                "READABLE_STREAM": "disable"
-            },
-            "args": [
-                "--mode", "VALIDATE"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/arrow2csv",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "console": "integratedTerminal",
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/src/bin/arrow2csv.ts",
-                "-f", "./test/data/cpp/stream/simple.arrow"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/file-to-stream",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/file-to-stream.js",
-                "./test/data/cpp/file/struct_example.arrow",
-                "./struct_example-stream-out.arrow",
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/stream-to-file",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/stream-to-file.js",
-                "./test/data/cpp/stream/struct_example.arrow",
-                "./struct_example-file-out.arrow",
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/json-to-arrow",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/json-to-arrow.js",
-                "-j", "./test/data/json/struct_example.json",
-                "-a", "./struct_example-stream-out.arrow",
-                "-f", "stream"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/print-buffer-alignment",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/print-buffer-alignment.js",
-                "./test/data/cpp/stream/struct_example.arrow"
-            ]
-        }
-    ]
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "inputs": [
+    {
+      "type": "pickString",
+      "default": "src",
+      "id": "TEST_TARGET",
+      "options": [
+        "src",
+        "apache-arrow",
+        "ts",
+        "es5.cjs",
+        "es5.esm",
+        "es5.umd",
+        "es2015.cjs",
+        "es2015.esm",
+        "es2015.umd",
+        "esnext.cjs",
+        "esnext.esm",
+        "esnext.umd",
+      ],
+      "description": "The JS version + Module format combination to test (or src to test source files)",
+    },
+    {
+      "type": "command",
+      "id": "TEST_FILE",
+      "command": "shellCommand.execute",
+      "args": {
+        "cwd": "${workspaceFolder}",
+        "description": "Select a file to debug",
+        "command": "./node_modules/.bin/jest --listTests | sed -r \"s@$PWD/test/@@g\"",
+      }
+    },
+    {
+      "type": "command",
+      "id": "TEST_RUNTIME_ARGS",
+      "command": "shellCommand.execute",
+      "args": {
+        "useSingleResult": "true",
+        "command": "case \"${input:TEST_TARGET}\" in *cjs | *umd | apache-arrow) echo '';; *) echo '--experimental-vm-modules';; esac"
+      }
+    },
+  ],
+  "configurations": [
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug Gulp Build",
+      "program": "${workspaceFolder}/node_modules/gulp/bin/gulp.js",
+      "args": [
+        "build",
+        // Specify we want to debug the "src" target, which won't clean or build -- essentially a "dry-run" of the gulp build
+        "--target",
+        "src"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug Unit Tests",
+      "cwd": "${workspaceRoot}",
+      "console": "integratedTerminal",
+      "program": "${workspaceFolder}/node_modules/.bin/jest",
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "env": {
+        "NODE_NO_WARNINGS": "1",
+        "NODE_ENV": "production",
+        "TEST_DOM_STREAMS": "true",
+        "TEST_NODE_STREAMS": "true",
+      },
+      "runtimeArgs": ["${input:TEST_RUNTIME_ARGS}"],
+      "args": [
+        "--verbose",
+        "--runInBand",
+        "-c", "jestconfigs/jest.${input:TEST_TARGET}.config.js",
+        "${input:TEST_FILE}"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug Integration Tests",
+      "cwd": "${workspaceRoot}",
+      "program": "${workspaceFolder}/bin/integration.js",
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "env": {
+        "NODE_NO_WARNINGS": "1",
+      },
+      "args": [
+        "--mode",
+        "VALIDATE"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/arrow2csv",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "console": "integratedTerminal",
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/src/bin/arrow2csv.ts",
+        "-f",
+        "./test/data/cpp/stream/simple.arrow"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/file-to-stream",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/file-to-stream.js",
+        "./test/data/cpp/file/struct_example.arrow",
+        "./struct_example-stream-out.arrow",
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/stream-to-file",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/stream-to-file.js",
+        "./test/data/cpp/stream/struct_example.arrow",
+        "./struct_example-file-out.arrow",
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/json-to-arrow",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/json-to-arrow.js",
+        "-j",
+        "./test/data/json/struct_example.json",
+        "-a",
+        "./struct_example-stream-out.arrow",
+        "-f",
+        "stream"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/print-buffer-alignment",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/print-buffer-alignment.js",
+        "./test/data/cpp/stream/struct_example.arrow"
+      ]
+    },{
+      "type": "node",
+      "name": "vscode-jest-tests",
+      "request": "launch",
+      "console": "integratedTerminal",
+      "internalConsoleOptions": "neverOpen",
+      "disableOptimisticBPs": true,
+      "cwd": "${workspaceFolder}",
+      "program": "${workspaceFolder}/node_modules/.bin/jest",
+      "runtimeArgs": [
+        "--experimental-vm-modules"
+      ],
+      "args": [
+        "--runInBand",
+        "--watchAll=false"
+      ],
+      "env": {
+        "NODE_NO_WARNINGS": "1",
+        "TEST_DOM_STREAMS": "true",
+        "TEST_NODE_STREAMS": "true",
+        "TEST_TS_SOURCE": "true"
+      },
+    }
+  ]
 }
diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json
new file mode 100644
index 00000000000..379ddf14d0f
--- /dev/null
+++ b/js/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+  "typescript.tsdk": "node_modules/typescript/lib",
+  "editor.trimAutoWhitespace": true,
+  "editor.codeActionsOnSave": {
+    "source.fixAll.eslint": true
+  }
+}
diff --git a/js/DEVELOP.md b/js/DEVELOP.md
index 566c473c30f..66cefb08435 100644
--- a/js/DEVELOP.md
+++ b/js/DEVELOP.md
@@ -50,7 +50,7 @@ We use [yarn](https://yarnpkg.com/) to install dependencies and run scrips.
 
 These scripts accept argument lists of targets × modules:
 
-* Available `targets` are `es5`, `es2015`, `esnext`, and `all` (default: `all`)
+* Available `targets` are `es5`, `es2015`, `esnext`, `ts`, and `all` (default: `all`)
 * Available `modules` are `cjs`, `esm`, `umd`, and `all` (default: `all`)
 
 Examples:
@@ -70,6 +70,12 @@ To run tests directly on the sources without bundling, use the `src` target (e.g
 
 Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm with [conventional](https://conventionalcommits.org/) [changelogs](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/conventional-changelog-cli).
 
+# Running the Performance Benchmarks
+
+You can run the benchmarks with `yarn perf`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
+
+You can change the target you want to test by changing the imports in `perf/index.ts`. Note that you need to compile the bundles with `yarn build` before you can import them.
+
 # Updating the Arrow format flatbuffers generated code
 
 1. Once generated, the flatbuffers format code needs to be adjusted for our build scripts (assumes `gnu-sed`):
@@ -102,8 +108,6 @@ Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm
     sed -i '+s+./flatbuffers+flatbuffers+ig' *_generated.ts
     # Fix the Union createTypeIdsVector typings
     sed -i -r '+s+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Uint8Array+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Int32Array+ig' Schema_generated.ts
-    # Add `/* tslint:disable:class-name */` to the top of `Schema.ts`
-    echo -e '/* tslint:disable:class-name */\n' | cat - Schema_generated.ts > Schema1.ts && mv Schema1.ts Schema_generated.ts
     # Remove "_generated" suffix from TS files
     mv File{_generated,}.ts && mv Schema{_generated,}.ts && mv Message{_generated,}.ts
     ```
diff --git a/js/README.md b/js/README.md
index 4eebd008a09..4ab99402652 100644
--- a/js/README.md
+++ b/js/README.md
@@ -20,8 +20,6 @@
 # [Apache Arrow](https://github.com/apache/arrow) in JS
 
 [![npm version](https://img.shields.io/npm/v/apache-arrow.svg)](https://www.npmjs.com/package/apache-arrow)
-[![Build Status](https://travis-ci.org/apache/arrow.svg?branch=master)](https://travis-ci.org/apache/arrow)
-[![Coverage Status](https://coveralls.io/repos/github/apache/arrow/badge.svg)](https://coveralls.io/github/apache/arrow)
 
 Arrow is a set of technologies that enable big data systems to process and transfer data quickly.
 
@@ -149,47 +147,6 @@ for (let i = -1, n = column.length; ++i < n;) {
 }
 ```
 
-### Usage with MapD Core
-
-```js
-import MapD from 'rxjs-mapd';
-import { Table } from 'apache-arrow';
-
-const port = 9091;
-const host = `localhost`;
-const db = `mapd`;
-const user = `mapd`;
-const password = `HyperInteractive`;
-
-MapD.open(host, port)
-  .connect(db, user, password)
-  .flatMap((session) =>
-    // queryDF returns Arrow buffers
-    session.queryDF(`
-      SELECT origin_city
-      FROM flights
-      WHERE dest_city ILIKE 'dallas'
-      LIMIT 5`
-    ).disconnect()
-  )
-  .map(([schema, records]) =>
-    // Create Arrow Table from results
-    Table.from([schema, records]))
-  .map((table) =>
-    // Stringify the table to CSV with row numbers
-    table.toString({ index: true }))
-  .subscribe((csvStr) =>
-    console.log(csvStr));
-/*
-Index,   origin_city
-    0, Oklahoma City
-    1, Oklahoma City
-    2, Oklahoma City
-    3,   San Antonio
-    4,   San Antonio
-*/
-```
-
 # Getting involved
 
 See [DEVELOP.md](DEVELOP.md)
@@ -224,7 +181,7 @@ The base `apache-arrow` package includes all the compilation targets for conveni
 The targets are also published under the `@apache-arrow` namespace:
 
 ```sh
-npm install apache-arrow # <-- combined es5/UMD, esnext/CommonJS/ESModules/UMD, and TypeScript package
+npm install apache-arrow # <-- combined es2015/CommonJS/ESModules/UMD + esnext/UMD
 npm install @apache-arrow/ts # standalone TypeScript package
 npm install @apache-arrow/es5-cjs # standalone es5/CommonJS package
 npm install @apache-arrow/es5-esm # standalone es5/ESModules package
@@ -243,12 +200,19 @@ The JS community is a diverse group with a varied list of target environments an
 
 If you think we missed a compilation target and it's a blocker for adoption, please open an issue.
 
+### Supported Browsers and Platforms
+
+The bundles we compile support moderns browser released in the last 5 years. This includes supported versions of
+Firefox, Chrome, Edge, and Safari. We do not actively support Internet Explorer.
+Apache Arrow also works on [maintained versions of Node](https://nodejs.org/en/about/releases/).
+
 # People
 
 Full list of broader Apache Arrow [committers](https://arrow.apache.org/committers/).
 
-* Brian Hulette,  _committer_
-* Paul Taylor, Graphistry, Inc.,  _committer_
+* Brian Hulette, _committer_
+* Paul Taylor, _committer_
+* Dominik Moritz, _committer_
 
 # Powered By Apache Arrow in JS
 
@@ -257,15 +221,11 @@ Full list of broader Apache Arrow [projects & organizations](https://arrow.apach
 ## Open Source Projects
 
 * [Apache Arrow](https://arrow.apache.org) -- Parent project for Powering Columnar In-Memory Analytics, including affiliated open source projects
-* [rxjs-mapd](https://github.com/graphistry/rxjs-mapd) -- A MapD Core node-driver that returns query results as Arrow columns
 * [Perspective](https://github.com/jpmorganchase/perspective) -- Perspective is a streaming data visualization engine by J.P. Morgan for JavaScript for building real-time & user-configurable analytics entirely in the browser.
 * [Falcon](https://github.com/uwdata/falcon) is a visualization tool for linked interactions across multiple aggregate visualizations of millions or billions of records.
-
-## Companies & Organizations
-
-* [CCRi](https://www.ccri.com/) -- Commonwealth Computer Research Inc, or CCRi, is a Central Virginia based data science and software engineering company
-* [GOAI](https://gpuopenanalytics.com/) -- GPU Open Analytics Initiative standardizes on Arrow as part of creating common data frameworks that enable developers and statistical researchers to accelerate data science on GPUs
-* [Graphistry, Inc.](https://www.graphistry.com/) - An end-to-end GPU accelerated visual investigation platform used by teams for security, anti-fraud, and related investigations. Graphistry uses Arrow in its NodeJS GPU backend and client libraries, and is an early contributing member to GOAI and Arrow\[JS\] working to bring these technologies to the enterprise.
+* [Vega](https://github.com/vega) is an ecosystem of tools for interactive visualizations on the web. The Vega team implemented an [Arrow loader](https://github.com/vega/vega-loader-arrow).
+* [Arquero](https://github.com/uwdata/arquero) is a library for query processing and transformation of array-backed data tables.
+* [OmniSci](https://github.com/omnisci/mapd-connector) is a GPU database. Its JavaScript connector returns Arrow dataframes.
 
 # License
 
diff --git a/js/bin/integration.js b/js/bin/integration.js
index c6f6cd7a24e..507514ebade 100755
--- a/js/bin/integration.js
+++ b/js/bin/integration.js
@@ -30,7 +30,7 @@ const {
     Table,
     RecordBatchReader,
     util: { createElementComparator }
-} = require('../targets/apache-arrow/Arrow.es5.min');
+} = require('../targets/apache-arrow/');
 
 const exists = async (p) => {
     try {
@@ -63,7 +63,7 @@ const exists = async (p) => {
     }
 })()
 .then((x) => +x || 0, (e) => {
-    e && process.stderr.write(`${e && e.stack || e}\n`);
+    e && process.stderr.write(`${e?.stack || e}\n`);
     return process.exitCode || 1;
 }).then((code) => process.exit(code));
 
@@ -141,7 +141,7 @@ function validateReaderIntegration(jsonData, arrowBuffer) {
         for (const [jsonRecordBatch, binaryRecordBatch] of zip(jsonReader, binaryReader)) {
             compareTableIsh(jsonRecordBatch, binaryRecordBatch);
         }
-    } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+    } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); }
     process.stdout.write(`${msg}: pass\n`);
 }
 
@@ -151,7 +151,7 @@ function validateTableFromBuffersIntegration(jsonData, arrowBuffer) {
         const jsonTable = Table.from(jsonData);
         const binaryTable = Table.from(arrowBuffer);
         compareTableIsh(jsonTable, binaryTable);
-    } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+    } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); }
     process.stdout.write(`${msg}: pass\n`);
 }
 
@@ -164,7 +164,7 @@ function validateTableToBuffersIntegration(srcFormat, arrowFormat) {
             const srcTable = Table.from(srcFormat === `json` ? jsonData : arrowBuffer);
             const dstTable = Table.from(srcTable.serialize(`binary`, arrowFormat === `stream`));
             compareTableIsh(dstTable, refTable);
-        } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+        } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); }
         process.stdout.write(`${msg}: pass\n`);
     };
 }
@@ -189,7 +189,7 @@ function compareTableIsh(actual, expected) {
 function compareVectors(actual, expected) {
 
     if ((actual == null && expected != null) || (expected == null && actual != null)) {
-        throw new Error(`${actual == null ? `actual` : `expected`} is null, was expecting ${actual == null ? expected : actual} to be that also`);
+        throw new Error(`${actual == null ? `actual` : `expected`} is null, was expecting ${actual ?? expected} to be that also`);
     }
 
     let props = ['type', 'length', 'nullCount'];
diff --git a/js/examples/read_file.html b/js/examples/read_file.html
index ec96d0e4755..1013fbe79ef 100644
--- a/js/examples/read_file.html
+++ b/js/examples/read_file.html
@@ -86,6 +86,6 @@
       <tbody id="tbody">
       </tbody>
     </table>
-    <script type="text/javascript" src="../targets/es5/umd/Arrow.js"></script>
+    <script type="text/javascript" src="../targets/es2015/umd/Arrow.js"></script>
   </body>
 </html>
diff --git a/js/gulp/argv.js b/js/gulp/argv.js
index 3a028f813f9..0acdad7d5e1 100644
--- a/js/gulp/argv.js
+++ b/js/gulp/argv.js
@@ -15,10 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-const fs = require('fs');
-const glob = require('glob');
-const path = require('path');
-
 const argv = require(`command-line-args`)([
     { name: `all`, type: Boolean },
     { name: 'verbose', alias: `v`, type: Boolean },
diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 35880e006ff..54a046a1434 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -24,31 +24,37 @@ const gulp = require('gulp');
 const mkdirp = require('mkdirp');
 const gulpRename = require(`gulp-rename`);
 const { memoizeTask } = require('./memoize-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    forkJoin: ObservableForkJoin,
+} = require('rxjs');
+const {
+    share
+} = require('rxjs/operators');
 const pipeline = require('util').promisify(require('stream').pipeline);
 
 const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
     const out = targetDir(target);
-    const dtsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.ts`;
-    const cjsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.js`;
-    const esmGlob = `${targetDir(`esnext`, `esm`)}/**/*.js`;
-    const es5UmdGlob = `${targetDir(`es5`, `umd`)}/*.js`;
+    const dtsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.ts`;
+    const cjsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.js`;
+    const esmGlob = `${targetDir(`es2015`, `esm`)}/**/*.js`;
+    const es2015UmdGlob = `${targetDir(`es2015`, `umd`)}/*.js`;
     const esnextUmdGlob = `${targetDir(`esnext`, `umd`)}/*.js`;
-    const cjsSourceMapsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.map`;
-    const esmSourceMapsGlob = `${targetDir(`esnext`, `esm`)}/**/*.map`;
-    const es5UmdSourceMapsGlob = `${targetDir(`es5`, `umd`)}/*.map`;
+    const cjsSourceMapsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.map`;
+    const esmSourceMapsGlob = `${targetDir(`es2015`, `esm`)}/**/*.map`;
+    const es2015UmdSourceMapsGlob = `${targetDir(`es2015`, `umd`)}/*.map`;
     const esnextUmdSourceMapsGlob = `${targetDir(`esnext`, `umd`)}/*.map`;
-    return Observable.forkJoin(
+    return ObservableForkJoin(
         observableFromStreams(gulp.src(dtsGlob),                 gulp.dest(out)), // copy d.ts files
-        observableFromStreams(gulp.src(cjsGlob),                 gulp.dest(out)), // copy esnext cjs files
-        observableFromStreams(gulp.src(cjsSourceMapsGlob),       gulp.dest(out)), // copy esnext cjs sourcemaps
-        observableFromStreams(gulp.src(esmSourceMapsGlob),       gulp.dest(out)), // copy esnext esm sourcemaps
-        observableFromStreams(gulp.src(es5UmdSourceMapsGlob),    gulp.dest(out)), // copy es5 umd sourcemap files, but don't rename
+        observableFromStreams(gulp.src(cjsGlob),                 gulp.dest(out)), // copy es2015 cjs files
+        observableFromStreams(gulp.src(cjsSourceMapsGlob),       gulp.dest(out)), // copy es2015 cjs sourcemaps
+        observableFromStreams(gulp.src(esmSourceMapsGlob),       gulp.dest(out)), // copy es2015 esm sourcemaps
+        observableFromStreams(gulp.src(es2015UmdSourceMapsGlob), gulp.dest(out)), // copy es2015 umd sourcemap files, but don't rename
         observableFromStreams(gulp.src(esnextUmdSourceMapsGlob), gulp.dest(out)), // copy esnext umd sourcemap files, but don't rename
-        observableFromStreams(gulp.src(esmGlob),       gulpRename((p) => { p.extname = '.mjs'; }),          gulp.dest(out)), // copy esnext esm files and rename to `.mjs`
-        observableFromStreams(gulp.src(es5UmdGlob),    gulpRename((p) => { p.basename += `.es5.min`; }),    gulp.dest(out)), // copy es5 umd files and add `.min`
+        observableFromStreams(gulp.src(esmGlob),       gulpRename((p) => { p.extname = '.mjs'; }),          gulp.dest(out)), // copy es2015 esm files and rename to `.mjs`
+        observableFromStreams(gulp.src(es2015UmdGlob), gulpRename((p) => { p.basename += `.es2015.min`; }), gulp.dest(out)), // copy es2015 umd files and add `.es2015.min`
         observableFromStreams(gulp.src(esnextUmdGlob), gulpRename((p) => { p.basename += `.esnext.min`; }), gulp.dest(out)), // copy esnext umd files and add `.esnext.min`
-    ).publish(new ReplaySubject()).refCount();
+    ).pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false }));
 }))({});
 
 const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) {
@@ -57,8 +63,8 @@ const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target,
     await pipeline(gulp.src(`src/**/*`), gulp.dest(out));
     await del(`${out}/**/*.js`);
 }))({});
-  
-  
+
+
 module.exports = arrowTask;
 module.exports.arrowTask = arrowTask;
 module.exports.arrowTSTask = arrowTSTask;
diff --git a/js/gulp/clean-task.js b/js/gulp/clean-task.js
index 551aeb41af7..0034f9a095d 100644
--- a/js/gulp/clean-task.js
+++ b/js/gulp/clean-task.js
@@ -16,15 +16,19 @@
 // under the License.
 
 const del = require('del');
-const { Observable } = require('rxjs');
 const { targetDir } = require('./util');
 const memoizeTask = require('./memoize-task');
+const { catchError } = require('rxjs/operators');
+const {
+    from: ObservableFrom,
+    EMPTY: ObservableEmpty,
+} = require('rxjs');
 
 const cleanTask = ((cache) => memoizeTask(cache, function clean(target, format) {
     const dir = targetDir(target, format);
-    return Observable.from(del(dir))
-        .catch((e) => Observable.empty());
+    return ObservableFrom(del(dir))
+        .pipe(catchError((e) => ObservableEmpty()));
 }))({});
 
 module.exports = cleanTask;
-module.exports.cleanTask = cleanTask;
+module.exports.cleanTask = cleanTask;
\ No newline at end of file
diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index bbb48cebcd3..6e5a61d82b5 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -50,7 +50,7 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
     const exportedImports = publicModulePaths(srcAbsolute).reduce((entries, publicModulePath) => [
         ...entries, {
             publicModulePath,
-            exports_: getPublicExportedNames(esmRequire(publicModulePath, { warnings: false }))
+            exports_: getPublicExportedNames(esmRequire(publicModulePath))
         }
     ], []);
 
@@ -72,12 +72,12 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
                 /* external libs first */
                 `node_modules/flatbuffers/package.json`,
                 `node_modules/flatbuffers/js/flatbuffers.mjs`,
-                `node_modules/text-encoding-utf-8/package.json`,
-                `node_modules/text-encoding-utf-8/src/encoding.js`,
                 `${src}/**/*.js` /* <-- then source globs */
             ], { base: `./` }),
             sourcemaps.init(),
-            closureCompiler(createClosureArgs(entry_point, externs)),
+            closureCompiler(createClosureArgs(entry_point, externs, target), {
+                platform: ['native', 'java', 'javascript']
+            }),
             // rename the sourcemaps from *.js.map files to *.min.js.map
             sourcemaps.write(`.`, { mapFile: (mapPath) => mapPath.replace(`.js.map`, `.${target}.min.js.map`) }),
             gulp.dest(out)
@@ -88,7 +88,7 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
 module.exports = closureTask;
 module.exports.closureTask = closureTask;
 
-const createClosureArgs = (entry_point, externs) => ({
+const createClosureArgs = (entry_point, externs, target) => ({
     externs,
     entry_point,
     third_party: true,
@@ -103,7 +103,7 @@ const createClosureArgs = (entry_point, externs) => ({
     assume_function_wrapper: true,
     js_output_file: `${mainExport}.js`,
     language_in: gCCLanguageNames[`esnext`],
-    language_out: gCCLanguageNames[`es5`],
+    language_out: gCCLanguageNames[target],
     output_wrapper:`${apacheHeader()}
 (function (global, factory) {
     typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
@@ -148,7 +148,7 @@ function externBody({ exportName, staticNames, instanceNames }) {
 function externsHeader() {
     return (`${apacheHeader()}
 // @ts-nocheck
-/* tslint:disable */
+/* eslint-disable */
 /**
  * @fileoverview Closure Compiler externs for Arrow
  * @externs
@@ -209,5 +209,5 @@ function apacheHeader() {
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
-// under the License.`
+// under the License.`;
 }
diff --git a/js/gulp/compile-task.js b/js/gulp/compile-task.js
index 60e2ebbe36a..07109ef73e0 100644
--- a/js/gulp/compile-task.js
+++ b/js/gulp/compile-task.js
@@ -19,7 +19,6 @@ const { Observable } = require('rxjs');
 const { npmPkgName } = require('./util');
 const { memoizeTask } = require('./memoize-task');
 
-const minifyTask = require('./minify-task');
 const closureTask = require('./closure-task');
 const typescriptTask = require('./typescript-task');
 const { arrowTask, arrowTSTask } = require('./arrow-task');
@@ -28,8 +27,7 @@ const compileTask = ((cache) => memoizeTask(cache, function compile(target, form
     return target === `src`                    ? Observable.empty()
          : target === npmPkgName               ? arrowTask(target, format, ...args)()
          : target === `ts`                     ? arrowTSTask(target, format, ...args)()
-         : format === `umd` ? target === `es5` ? closureTask(target, format, ...args)()
-                                               : minifyTask(target, format, ...args)()
+         : format === `umd`                    ? closureTask(target, format, ...args)()
                                                : typescriptTask(target, format, ...args)();
 }))({});
 
diff --git a/js/gulp/minify-task.js b/js/gulp/minify-task.js
deleted file mode 100644
index 81cb5e5f3f5..00000000000
--- a/js/gulp/minify-task.js
+++ /dev/null
@@ -1,90 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-const {
-    targetDir,
-    mainExport,
-    UMDSourceTargets,
-    terserLanguageNames,
-    shouldRunInChildProcess,
-    spawnGulpCommandInChildProcess,
-} = require('./util');
-
-const path = require('path');
-const webpack = require(`webpack`);
-const { memoizeTask } = require('./memoize-task');
-const { compileBinFiles } = require('./typescript-task');
-const { Observable, ReplaySubject } = require('rxjs');
-const TerserPlugin = require(`terser-webpack-plugin`);
-
-const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJS(target, format) {
-
-    if (shouldRunInChildProcess(target, format)) {
-        return spawnGulpCommandInChildProcess('compile', target, format);
-    }
-
-    const sourceTarget = UMDSourceTargets[target];
-    const out = targetDir(target, format), src = targetDir(sourceTarget, `cls`);
-
-    const targetConfig = { ...commonConfig,
-        output: { ...commonConfig.output,
-            path: path.resolve(`./${out}`) } };
-
-    const webpackConfigs = [mainExport].map((entry) => ({
-        ...targetConfig,
-        name: entry,
-        entry: { [entry]: path.resolve(`${src}/${entry}.dom.js`) },
-        plugins: [
-            ...(targetConfig.plugins || []),
-            new webpack.SourceMapDevToolPlugin({
-                filename: `[name].${target}.min.js.map`,
-                moduleFilenameTemplate: ({ resourcePath }) =>
-                    resourcePath
-                        .replace(/\s/, `_`)
-                        .replace(/\.\/node_modules\//, ``)
-            })
-        ],
-        optimization: {
-            minimize: true,
-            minimizer: [
-                new TerserPlugin({
-                    sourceMap: true,
-                    terserOptions: {
-                        ecma: terserLanguageNames[target],
-                        output: { comments: false, beautify: false },
-                        compress: { unsafe: true },
-                        mangle: true,
-                        safari10: true // <-- works around safari10 bugs, see the "safari10" option here: https://github.com/terser-js/terser#minify-options
-                    },
-                })
-            ]
-        }
-    }));
-
-    const compilers = webpack(webpackConfigs);
-    return Observable
-            .bindNodeCallback(compilers.run.bind(compilers))()
-            .merge(compileBinFiles(target, format)).takeLast(1)
-            .multicast(new ReplaySubject()).refCount();
-}))({}, {
-    resolve: { mainFields: [`module`, `main`] },
-    module: { rules: [{ test: /\.js$/, enforce: `pre`, use: [`source-map-loader`] }] },
-    output: { filename: '[name].js', library: mainExport, libraryTarget: `umd`, umdNamedDefine: true },
-});
-
-module.exports = minifyTask;
-module.exports.minifyTask = minifyTask;
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index 440a90ea6bf..23edf0b8c6c 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -23,53 +23,74 @@ const {
 
 const gulp = require('gulp');
 const { memoizeTask } = require('./memoize-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    EMPTY: ObservableEmpty,
+    forkJoin: ObservableForkJoin,
+} = require('rxjs');
+const {
+    share
+} = require('rxjs/operators');
 const gulpJsonTransform = require('gulp-json-transform');
 
 const packageTask = ((cache) => memoizeTask(cache, function bundle(target, format) {
-    if (target === `src`) return Observable.empty();
+    if (target === `src`) return ObservableEmpty();
     const out = targetDir(target, format);
     const jsonTransform = gulpJsonTransform(target === npmPkgName ? createMainPackageJson(target, format) :
                                             target === `ts`       ? createTypeScriptPackageJson(target, format)
                                                                   : createScopedPackageJSON(target, format),
                                             2);
-    return Observable.forkJoin(
+    return ObservableForkJoin([
       observableFromStreams(gulp.src(metadataFiles), gulp.dest(out)), // copy metadata files
       observableFromStreams(gulp.src(`package.json`), jsonTransform, gulp.dest(out)) // write packageJSONs
-    ).publish(new ReplaySubject()).refCount();
+    ]).pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false }));
 }))({});
 
 module.exports = packageTask;
 module.exports.packageTask = packageTask;
 
+// FIXME: set this to false when we have no side effects
+const sideEffects = true;
+
 const createMainPackageJson = (target, format) => (orig) => ({
     ...createTypeScriptPackageJson(target, format)(orig),
     bin: orig.bin,
     name: npmPkgName,
-    main: `${mainExport}.node`,
-    browser: `${mainExport}.dom`,
-    module: `${mainExport}.dom.mjs`,
+    type: 'commonjs',
+    main: `${mainExport}.node.js`,
+    module: `${mainExport}.node.mjs`,
+    browser: {
+        [`${mainExport}.node.js`]: `${mainExport}.dom.js`,
+        [`${mainExport}.node.mjs`]: `${mainExport}.dom.mjs`
+    },
+    exports: {
+        import: `./${mainExport}.node.mjs`,
+        require: `./${mainExport}.node.js`,
+    },
     types: `${mainExport}.node.d.ts`,
-    unpkg: `${mainExport}.es5.min.js`,
-    jsdelivr: `${mainExport}.es5.min.js`,
-    sideEffects: false,
-    esm: { mode: `all`, sourceMap: true },
+    unpkg: `${mainExport}.es2015.min.js`,
+    jsdelivr: `${mainExport}.es2015.min.js`,
+    sideEffects: sideEffects,
+    esm: { mode: `all`, sourceMap: true }
 });
-  
+
 const createTypeScriptPackageJson = (target, format) => (orig) => ({
     ...createScopedPackageJSON(target, format)(orig),
     bin: undefined,
-    module: undefined,
     main: `${mainExport}.node.ts`,
+    module: `${mainExport}.node.ts`,
     types: `${mainExport}.node.ts`,
     browser: `${mainExport}.dom.ts`,
+    type: "module",
+    sideEffects: sideEffects,
+    esm: { mode: `auto`, sourceMap: true },
     dependencies: {
         '@types/flatbuffers': '*',
         '@types/node': '*',
         ...orig.dependencies
     }
 });
-  
+
 const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
     packageJSONFields.reduce(
         (xs, key) => ({ ...xs, [key]: xs[key] || orig[key] }),
@@ -84,11 +105,13 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
             // set "browser" if building scoped UMD target, otherwise "Arrow.dom"
             browser:  format === 'umd' ? `${mainExport}.js` : `${mainExport}.dom.js`,
             // set "main" to "Arrow" if building scoped UMD target, otherwise "Arrow.node"
-            main:     format === 'umd' ? `${mainExport}.js` : `${mainExport}.node`,
-            // set "module" (for https://www.npmjs.com/package/@pika/pack) if building scoped ESM target
-            module:   format === 'esm' ? `${mainExport}.dom.js` : undefined,
+            main:     format === 'umd' ? `${mainExport}.js` : `${mainExport}.node.js`,
+            // set "type" to `module` or `commonjs` (https://nodejs.org/api/packages.html#packages_type)
+            type:     format === 'esm' ? `module` : `commonjs`,
+            // set "module" if building scoped ESM target
+            module:   format === 'esm' ? `${mainExport}.node.js` : undefined,
             // set "sideEffects" to false as a hint to Webpack that it's safe to tree-shake the ESM target
-            sideEffects: format === 'esm' ? false : undefined,
+            sideEffects: format === 'esm' ? sideEffects : undefined,
             // include "esm" settings for https://www.npmjs.com/package/esm if building scoped ESM target
             esm:      format === `esm` ? { mode: `auto`, sourceMap: true } : undefined,
             // set "types" (for TypeScript/VSCode)
diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index 149a58976b5..2012f742907 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -28,12 +28,17 @@ const readFile = promisify(require('fs').readFile);
 const asyncDone = promisify(require('async-done'));
 const exec = promisify(require('child_process').exec);
 const parseXML = promisify(require('xml2js').parseString);
+const { targetAndModuleCombinations, npmPkgName } = require('./util');
 
-const jestArgv = [];
-argv.verbose && jestArgv.push(`--verbose`);
-argv.coverage
-    ? jestArgv.push(`-c`, `jest.coverage.config.js`, `--coverage`, `-i`)
-    : jestArgv.push(`-c`, `jest.config.js`, `-i`)
+const jestArgv = [`--reporters=jest-silent-reporter`];
+
+if (argv.verbose) {
+    jestArgv.push(`--verbose`);
+}
+
+if (targetAndModuleCombinations.length > 1) {
+    jestArgv.push(`--detectOpenHandles`);
+}
 
 const jest = path.join(path.parse(require.resolve(`jest`)).dir, `../bin/jest.js`);
 const testOptions = {
@@ -42,15 +47,21 @@ const testOptions = {
         ...process.env,
         // hide fs.promises/stream[Symbol.asyncIterator] warnings
         NODE_NO_WARNINGS: `1`,
-        // prevent the user-land `readable-stream` module from
-        // patching node's streams -- they're better now
-        READABLE_STREAM: `disable`
     },
 };
 
 const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format) {
     const opts = { ...testOptions };
-    const args = [...execArgv, `test/unit/`];
+    const args = [...execArgv];
+    if (format === 'esm' || target === 'ts' || target === 'src' || target === npmPkgName) {
+        args.unshift(`--experimental-vm-modules`);
+    }
+    if (argv.coverage) {
+        args.push(`-c`, `jestconfigs/jest.coverage.config.js`);
+    } else {
+        const cfgname = [target, format].filter(Boolean).join('.');
+        args.push(`-c`, `jestconfigs/jest.${cfgname}.config.js`, `test/unit/`);
+    }
     opts.env = {
         ...opts.env,
         TEST_TARGET: target,
@@ -73,7 +84,6 @@ const ARROW_JAVA_DIR = process.env.ARROW_JAVA_DIR || path.join(ARROW_HOME, 'java
 const CPP_EXE_PATH = process.env.ARROW_CPP_EXE_PATH || path.join(ARROW_HOME, 'cpp/build/debug');
 const ARROW_INTEGRATION_DIR = process.env.ARROW_INTEGRATION_DIR || path.join(ARROW_HOME, 'integration');
 const CPP_JSON_TO_ARROW = path.join(CPP_EXE_PATH, 'arrow-json-integration-test');
-const CPP_STREAM_TO_FILE = path.join(CPP_EXE_PATH, 'arrow-stream-to-file');
 const CPP_FILE_TO_STREAM = path.join(CPP_EXE_PATH, 'arrow-file-to-stream');
 
 const testFilesDir = path.join(ARROW_HOME, 'js/test/data');
@@ -146,7 +156,7 @@ async function createTestData() {
             { maxBuffer: Math.pow(2, 53) - 1 }
         );
     }
-    
+
     async function generateCPPStream(filePath, streamPath) {
         await del(streamPath);
         return await exec(
@@ -154,7 +164,7 @@ async function createTestData() {
             { maxBuffer: Math.pow(2, 53) - 1 }
         );
     }
-    
+
     async function generateJavaFile(jsonPath, filePath) {
         await del(filePath);
         return await exec(
@@ -164,7 +174,7 @@ async function createTestData() {
             { maxBuffer: Math.pow(2, 53) - 1 }
         );
     }
-    
+
     async function generateJavaStream(filePath, streamPath) {
         await del(streamPath);
         return await exec(
diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js
index a56de42d381..7c672e70250 100644
--- a/js/gulp/typescript-task.js
+++ b/js/gulp/typescript-task.js
@@ -28,10 +28,17 @@ const path = require('path');
 const ts = require(`gulp-typescript`);
 const sourcemaps = require('gulp-sourcemaps');
 const { memoizeTask } = require('./memoize-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    forkJoin: ObservableForkJoin,
+} = require('rxjs');
+const {
+    mergeWith,
+    takeLast,
+    share
+} = require('rxjs/operators');
 
 const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) {
-
     if (shouldRunInChildProcess(target, format)) {
         return spawnGulpCommandInChildProcess('compile', target, format);
     }
@@ -39,8 +46,9 @@ const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target
     const out = targetDir(target, format);
     const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName(target, format)}.json`);
     return compileTypescript(out, tsconfigPath)
-        .merge(compileBinFiles(target, format)).takeLast(1)
-        .publish(new ReplaySubject()).refCount();
+        .pipe(mergeWith(compileBinFiles(target, format)))
+        .pipe(takeLast(1))
+        .pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false }))
 }))({});
 
 function compileBinFiles(target, format) {
@@ -55,10 +63,11 @@ function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
       tsProject.src(), sourcemaps.init(),
       tsProject(ts.reporter.defaultReporter())
     );
-    const writeDTypes = observableFromStreams(dts, gulp.dest(out));
+    const writeSources = observableFromStreams(tsProject.src(), gulp.dest(out));
+    const writeDTypes = observableFromStreams(dts, sourcemaps.write('./', { includeContent: false }), gulp.dest(out));
     const mapFile = tsProject.options.module === 5 ? esmMapFile : cjsMapFile;
-    const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile }), gulp.dest(out));
-    return Observable.forkJoin(writeDTypes, writeJS);
+    const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile, includeContent: false }), gulp.dest(out));
+    return ObservableForkJoin(writeSources, writeDTypes, writeJS);
 }
 
 function cjsMapFile(mapFilePath) { return mapFilePath; }
diff --git a/js/gulp/util.js b/js/gulp/util.js
index fd7a3775ae0..d8cde29e8fa 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -20,7 +20,19 @@ const path = require(`path`);
 const pump = require(`stream`).pipeline;
 const child_process = require(`child_process`);
 const { targets, modules } = require('./argv');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    empty: ObservableEmpty,
+    throwError: ObservableThrow,
+    fromEvent: ObservableFromEvent
+} = require('rxjs');
+const {
+    share,
+    flatMap,
+    takeUntil,
+    defaultIfEmpty,
+    mergeWith,
+} = require('rxjs/operators');
 const asyncDone = require('util').promisify(require('async-done'));
 
 const mainExport = `Arrow`;
@@ -42,7 +54,7 @@ const packageJSONFields = [
 ];
 
 const metadataFiles = [`LICENSE.txt`, `NOTICE.txt`, `README.md`].map((filename) => {
-    let err = false, prefixes = [`./`, `../`];
+    let prefixes = [`./`, `../`];
     let p = prefixes.find((prefix) => {
         try {
             fs.statSync(path.resolve(path.join(prefix, filename)));
@@ -61,41 +73,11 @@ const gCCLanguageNames = {
  es2015: `ECMASCRIPT_2015`,
  es2016: `ECMASCRIPT_2016`,
  es2017: `ECMASCRIPT_2017`,
+ es2018: `ECMASCRIPT_2018`,
+ es2019: `ECMASCRIPT_2019`,
  esnext: `ECMASCRIPT_NEXT`
 };
 
-const UMDSourceTargets = {
-    es5: `es5`,
- es2015: `es2015`,
- es2016: `es2015`,
- es2017: `es2015`,
- esnext: `esnext`
-};
-
-const terserLanguageNames = {
-    es5: 5, es2015: 6,
- es2016: 7, es2017: 8,
- esnext: 8 // <--- ?
-};
-
-// ES7+ keywords Terser shouldn't mangle
-// Hardcoded here since some are from ES7+, others are
-// only defined in interfaces, so difficult to get by reflection.
-const ESKeywords = [
-    // PropertyDescriptors
-    `configurable`, `enumerable`,
-    // IteratorResult, Symbol.asyncIterator
-    `done`, `value`, `Symbol.asyncIterator`, `asyncIterator`,
-    // AsyncObserver
-    `values`, `hasError`, `hasCompleted`,`errorValue`, `closed`,
-    // Observable/Subscription/Scheduler
-    `next`, `error`, `complete`, `subscribe`, `unsubscribe`, `isUnsubscribed`,
-    // EventTarget
-    `addListener`, `removeListener`, `addEventListener`, `removeEventListener`,
-    // Arrow properties
-    `low`, `high`, `data`, `index`, `field`, `columns`, 'numCols', 'numRows', `values`, `valueOffsets`, `nullBitmap`, `subarray`
-];
-
 function taskName(target, format) {
     return !format ? target : `${target}:${format}`;
 }
@@ -132,32 +114,32 @@ function spawnGulpCommandInChildProcess(command, target, format) {
         .catch((e) => { throw `Error in "${command}:${taskName(target, format)}" task`; });
 }
 
-const logAndDie = (e) => { if (e) { process.exit(1); } };
+const logAndDie = (e) => { if (e) { process.exit(1) } };
 function observableFromStreams(...streams) {
-    if (streams.length <= 0) { return Observable.empty(); }
+    if (streams.length <= 0) { return ObservableEmpty(); }
     const pumped = streams.length <= 1 ? streams[0] : pump(...streams, logAndDie);
-    const fromEvent = Observable.fromEvent.bind(null, pumped);
-    const streamObs = fromEvent(`data`)
-               .merge(fromEvent(`error`).flatMap((e) => Observable.throw(e)))
-           .takeUntil(fromEvent(`end`).merge(fromEvent(`close`)))
-           .defaultIfEmpty(`empty stream`)
-           .multicast(new ReplaySubject()).refCount();
+    const fromEvent = ObservableFromEvent.bind(null, pumped);
+    const streamObs = fromEvent(`data`).pipe(
+        mergeWith(fromEvent(`error`).pipe(flatMap((e) => ObservableThrow(e)))),
+        takeUntil(fromEvent(`end`).pipe(mergeWith(fromEvent(`close`)))),
+        defaultIfEmpty(`empty stream`),
+        share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false })
+    );
     streamObs.stream = pumped;
     streamObs.observable = streamObs;
     return streamObs;
 }
 
 function* combinations(_targets, _modules) {
-
     const targets = known(knownTargets, _targets || [`all`]);
     const modules = known(knownModules, _modules || [`all`]);
 
-    if (_targets.indexOf(`src`) > -1) {
+    if (_targets.includes(`src`)) {
         yield [`src`, ``];
         return;
     }
 
-    if (_targets.indexOf(`all`) > -1 && _modules.indexOf(`all`) > -1) {
+    if (_targets.includes(`all`) && _modules.includes(`all`)) {
         yield [`ts`, ``];
         yield [`src`, ``];
         yield [npmPkgName, ``];
@@ -170,11 +152,11 @@ function* combinations(_targets, _modules) {
     }
 
     function known(known, values) {
-        return ~values.indexOf(`all`) ? known
-            :  ~values.indexOf(`src`) ? [`src`]
+        return values.includes(`all`) ? known
+            :  values.includes(`src`) ? [`src`]
             : Object.keys(
                 values.reduce((map, arg) => ((
-                    (known.indexOf(arg) !== -1) &&
+                    (known.includes(arg)) &&
                     (map[arg.toLowerCase()] = true)
                     || true) && map
                 ), {})
@@ -207,12 +189,12 @@ const esmRequire = require(`esm`)(module, {
 });
 
 module.exports = {
-
     mainExport, npmPkgName, npmOrgName, metadataFiles, packageJSONFields,
 
-    knownTargets, knownModules, tasksToSkipPerTargetOrFormat,
-    gCCLanguageNames, UMDSourceTargets, terserLanguageNames,
+    knownTargets, knownModules, tasksToSkipPerTargetOrFormat, gCCLanguageNames,
 
     taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams,
-    ESKeywords, publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess
+    publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess,
+
+    targetAndModuleCombinations: [...combinations(targets, modules)]
 };
diff --git a/js/gulpfile.js b/js/gulpfile.js
index cf840ad307b..a257a2deff0 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -17,17 +17,21 @@
 
 const del = require('del');
 const gulp = require('gulp');
-const { Observable } = require('rxjs');
+const { targets } = require('./gulp/argv');
+const {
+    from: ObservableFrom,
+    bindNodeCallback: ObservableBindNodeCallback
+} = require('rxjs');
+const { flatMap } = require('rxjs/operators');
 const cleanTask = require('./gulp/clean-task');
 const compileTask = require('./gulp/compile-task');
 const packageTask = require('./gulp/package-task');
-const { targets, modules } = require('./gulp/argv');
 const { testTask, createTestData, cleanTestData } = require('./gulp/test-task');
 const {
     taskName, combinations,
     targetDir, knownTargets,
-    npmPkgName, UMDSourceTargets,
-    tasksToSkipPerTargetOrFormat
+    npmPkgName, tasksToSkipPerTargetOrFormat,
+    targetAndModuleCombinations
 } = require('./gulp/util');
 
 for (const [target, format] of combinations([`all`], [`all`])) {
@@ -46,7 +50,7 @@ for (const [target, format] of combinations([`all`], [`all`])) {
 // a minifier, so we special case that here.
 knownTargets.forEach((target) => {
     const umd = taskName(target, `umd`);
-    const cls = taskName(UMDSourceTargets[target], `cls`);
+    const cls = taskName(target, `cls`);
     gulp.task(`build:${umd}`, gulp.series(
         `build:${cls}`,
         `clean:${umd}`, `compile:${umd}`, `package:${umd}`,
@@ -56,15 +60,15 @@ knownTargets.forEach((target) => {
     ));
 });
 
-// The main "apache-arrow" module builds the es5/umd, esnext/cjs,
-// esnext/esm, and esnext/umd targets, then copies and renames the
+// The main "apache-arrow" module builds the es2015/umd, es2015/cjs,
+// es2015/esm, and esnext/umd targets, then copies and renames the
 // compiled output into the apache-arrow folder
 gulp.task(`build:${npmPkgName}`,
     gulp.series(
         gulp.parallel(
-            `build:${taskName(`es5`, `umd`)}`,
-            `build:${taskName(`esnext`, `cjs`)}`,
-            `build:${taskName(`esnext`, `esm`)}`,
+            `build:${taskName(`es2015`, `umd`)}`,
+            `build:${taskName(`es2015`, `cjs`)}`,
+            `build:${taskName(`es2015`, `esm`)}`,
             `build:${taskName(`esnext`, `umd`)}`
         ),
         `clean:${npmPkgName}`,
@@ -83,17 +87,16 @@ gulp.task(`compile`, gulpConcurrent(getTasks(`compile`)));
 gulp.task(`package`, gulpConcurrent(getTasks(`package`)));
 gulp.task(`default`,  gulp.series(`clean`, `build`, `test`));
 
-function gulpConcurrent(tasks) {
-    const numCPUs = Math.max(1, require('os').cpus().length * 0.75) | 0;
-    return () => Observable.from(tasks.map((task) => gulp.series(task)))
-        .flatMap((task) => Observable.bindNodeCallback(task)(), numCPUs);
+function gulpConcurrent(tasks, numCPUs = Math.max(1, require('os').cpus().length * 0.5) | 0) {
+    return () => ObservableFrom(tasks.map((task) => gulp.series(task)))
+        .pipe(flatMap((task) => ObservableBindNodeCallback(task)(), numCPUs || 1));
 }
 
 function getTasks(name) {
     const tasks = [];
-    if (targets.indexOf(`ts`) !== -1) tasks.push(`${name}:ts`);
-    if (targets.indexOf(npmPkgName) !== -1) tasks.push(`${name}:${npmPkgName}`);
-    for (const [target, format] of combinations(targets, modules)) {
+    if (targets.includes(`ts`)) tasks.push(`${name}:ts`);
+    if (targets.includes(npmPkgName)) tasks.push(`${name}:${npmPkgName}`);
+    for (const [target, format] of targetAndModuleCombinations) {
         if (tasksToSkipPerTargetOrFormat[target] && tasksToSkipPerTargetOrFormat[target][name]) continue;
         if (tasksToSkipPerTargetOrFormat[format] && tasksToSkipPerTargetOrFormat[format][name]) continue;
         tasks.push(`${name}:${taskName(target, format)}`);
diff --git a/js/jest.config.js b/js/jest.config.js
index 55028d09f96..fb3f97c4409 100644
--- a/js/jest.config.js
+++ b/js/jest.config.js
@@ -16,41 +16,38 @@
 // under the License.
 
 module.exports = {
-    "verbose": false,
-    "reporters": [
-      "jest-silent-reporter"
-    ],
-    "testEnvironment": "node",
-    "globals": {
-      "ts-jest": {
-        "diagnostics": false,
-        "tsConfig": "test/tsconfig.json"
-      }
+  verbose: false,
+  testEnvironment: "node",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "test/tsconfig.json",
+      useESM: true,
     },
-    "roots": [
-      "<rootDir>/test/"
-    ],
-    "moduleFileExtensions": [
-      "js",
-      "ts",
-      "tsx"
-    ],
-    "coverageReporters": [
-      "lcov"
-    ],
-    "coveragePathIgnorePatterns": [
-      "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$",
-      "test\\/.*\\.(ts|tsx|js)$",
-      "/node_modules/"
-    ],
-    "transform": {
-      "^.+\\.jsx?$": "ts-jest",
-      "^.+\\.tsx?$": "ts-jest"
-    },
-    "transformIgnorePatterns": [
-      "/node_modules/(?!web-stream-tools).+\\.js$"
-    ],
-    "testRegex": "(.*(-|\\.)(test|spec)s?)\\.(ts|tsx|js)$",
-    "preset": "ts-jest",
-    "testMatch": null
+  },
+  rootDir: ".",
+  roots: ["<rootDir>/test/"],
+  preset: "ts-jest/presets/default-esm",
+  moduleFileExtensions: ["mjs", "js", "ts"],
+  coverageReporters: ["lcov", "json"],
+  coveragePathIgnorePatterns: [
+    "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$",
+    "test\\/.*\\.(ts|js)$",
+    "/node_modules/",
+  ],
+  transform: {
+    "^.+\\.js$": "ts-jest",
+    "^.+\\.ts$": "ts-jest",
+  },
+  transformIgnorePatterns: [
+    "/targets/(es5|es2015|esnext|apache-arrow)/",
+    "/node_modules/(?!@openpgp/web-stream-tools)/",
+  ],
+  testRegex: "(.*(-|\\.)(test|spec)s?)\\.(ts|js)$",
+  testMatch: null,
+  moduleNameMapper: {
+    "^apache-arrow$": "<rootDir>/src/Arrow.node",
+    "^apache-arrow(.*)": "<rootDir>/src$1",
+    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+  },
 };
diff --git a/js/jestconfigs/jest.apache-arrow.config.js b/js/jestconfigs/jest.apache-arrow.config.js
new file mode 100644
index 00000000000..103dc5a92c2
--- /dev/null
+++ b/js/jestconfigs/jest.apache-arrow.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.apache-arrow.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/apache-arrow$1",
+  },
+};
diff --git a/js/jest.coverage.config.js b/js/jestconfigs/jest.coverage.config.js
similarity index 76%
rename from js/jest.coverage.config.js
rename to js/jestconfigs/jest.coverage.config.js
index 72ddd3c9345..3b0b6a1c6c4 100644
--- a/js/jest.coverage.config.js
+++ b/js/jestconfigs/jest.coverage.config.js
@@ -16,15 +16,15 @@
 // under the License.
 
 module.exports = {
-    ...require('./jest.config'),
-    "reporters": undefined,
-    "coverageReporters": [
-        "lcov", "json"
-    ],
-    "globals": {
-        "ts-jest": {
-            "diagnostics": false,
-            "tsConfig": "test/tsconfig.coverage.json"
-        }
-    }
+  ...require("../jest.config"),
+  rootDir: "../",
+  collectCoverage: true,
+  reporters: undefined,
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.coverage.json",
+      useESM: true,
+    },
+  },
 };
diff --git a/js/jestconfigs/jest.es2015.cjs.config.js b/js/jestconfigs/jest.es2015.cjs.config.js
new file mode 100644
index 00000000000..1d567676129
--- /dev/null
+++ b/js/jestconfigs/jest.es2015.cjs.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.cjs.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es2015/cjs$1",
+  },
+};
diff --git a/js/jestconfigs/jest.es2015.esm.config.js b/js/jestconfigs/jest.es2015.esm.config.js
new file mode 100644
index 00000000000..cf564fb234a
--- /dev/null
+++ b/js/jestconfigs/jest.es2015.esm.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.esm.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es2015/esm$1",
+    tslib: "tslib/tslib.es6.js"
+  },
+};
diff --git a/js/jestconfigs/jest.es2015.umd.config.js b/js/jestconfigs/jest.es2015.umd.config.js
new file mode 100644
index 00000000000..21f27872d91
--- /dev/null
+++ b/js/jestconfigs/jest.es2015.umd.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.umd.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es2015/umd/Arrow.js",
+  },
+};
diff --git a/js/jestconfigs/jest.es5.cjs.config.js b/js/jestconfigs/jest.es5.cjs.config.js
new file mode 100644
index 00000000000..ae3e9bb4230
--- /dev/null
+++ b/js/jestconfigs/jest.es5.cjs.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es5.cjs.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es5/cjs$1",
+  },
+};
diff --git a/js/jestconfigs/jest.es5.esm.config.js b/js/jestconfigs/jest.es5.esm.config.js
new file mode 100644
index 00000000000..0a0a21b761c
--- /dev/null
+++ b/js/jestconfigs/jest.es5.esm.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es5.esm.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es5/esm$1",
+    tslib: "tslib/tslib.es6.js"
+  },
+};
diff --git a/js/jestconfigs/jest.es5.umd.config.js b/js/jestconfigs/jest.es5.umd.config.js
new file mode 100644
index 00000000000..f52af07bc8a
--- /dev/null
+++ b/js/jestconfigs/jest.es5.umd.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es5.umd.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es5/umd/Arrow.js",
+  },
+};
diff --git a/js/jestconfigs/jest.esnext.cjs.config.js b/js/jestconfigs/jest.esnext.cjs.config.js
new file mode 100644
index 00000000000..8be999e3d38
--- /dev/null
+++ b/js/jestconfigs/jest.esnext.cjs.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.esnext.cjs.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/esnext/cjs$1",
+  },
+};
diff --git a/js/jestconfigs/jest.esnext.esm.config.js b/js/jestconfigs/jest.esnext.esm.config.js
new file mode 100644
index 00000000000..aca4c520805
--- /dev/null
+++ b/js/jestconfigs/jest.esnext.esm.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.esnext.esm.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/esnext/esm$1",
+    tslib: "tslib/tslib.es6.js"
+  },
+};
diff --git a/js/jestconfigs/jest.esnext.umd.config.js b/js/jestconfigs/jest.esnext.umd.config.js
new file mode 100644
index 00000000000..5013d45e03a
--- /dev/null
+++ b/js/jestconfigs/jest.esnext.umd.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.esnext.umd.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/esnext/umd/Arrow.js",
+  },
+};
diff --git a/js/jestconfigs/jest.src.config.js b/js/jestconfigs/jest.src.config.js
new file mode 100644
index 00000000000..08ccad061ba
--- /dev/null
+++ b/js/jestconfigs/jest.src.config.js
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.src.json",
+      useESM: true,
+    },
+  },
+};
diff --git a/js/jestconfigs/jest.ts.config.js b/js/jestconfigs/jest.ts.config.js
new file mode 100644
index 00000000000..e56161b8b4c
--- /dev/null
+++ b/js/jestconfigs/jest.ts.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.ts.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/ts$1"
+  },
+};
diff --git a/js/lerna.json b/js/lerna.json
index 053736e57f1..99d6f64fbd4 100644
--- a/js/lerna.json
+++ b/js/lerna.json
@@ -1,4 +1,6 @@
 {
+  "lerna": "3.22.1",
+  "version": "4.0.0",
   "npmClient": "yarn",
   "packages": [
     "targets/ts",
diff --git a/js/npm-release.sh b/js/npm-release.sh
index 54d9df07cd5..5d92d01cb0a 100755
--- a/js/npm-release.sh
+++ b/js/npm-release.sh
@@ -22,5 +22,7 @@ set -e
 yarn --frozen-lockfile
 yarn gulp
 
+read -p "Please enter your npm 2FA one-time password (or leave empty if you don't have 2FA enabled): " NPM_OTP </dev/tty
+
 # publish the JS target modules to npm
-yarn lerna exec --no-bail -- npm publish
+yarn lerna exec --concurrency 1 --no-bail "npm publish${NPM_OTP:+ --otp=$NPM_OTP}"
diff --git a/js/package.json b/js/package.json
index 0e41e2e007d..efc5f0ff253 100644
--- a/js/package.json
+++ b/js/package.json
@@ -1,33 +1,26 @@
 {
   "name": "apache-arrow",
   "description": "Apache Arrow columnar in-memory format",
-  "main": "./index",
   "bin": {
     "arrow2csv": "bin/arrow2csv.js"
   },
   "scripts": {
     "lerna": "lerna",
-    "test": "NODE_NO_WARNINGS=1 gulp test",
-    "build": "NODE_NO_WARNINGS=1 gulp build",
-    "clean": "NODE_NO_WARNINGS=1 gulp clean",
-    "debug": "NODE_NO_WARNINGS=1 gulp debug",
-    "perf": "node ./perf/index.js",
+    "test": "cross-env NODE_NO_WARNINGS=1 gulp test",
+    "build": "cross-env NODE_NO_WARNINGS=1 gulp build",
+    "clean": "cross-env NODE_NO_WARNINGS=1 gulp clean",
+    "debug": "cross-env NODE_NO_WARNINGS=1 gulp debug",
+    "perf": "ts-node-transpile-only ./perf/index.ts",
     "test:integration": "node ./bin/integration.js --mode validate",
-    "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
     "release": "./npm-release.sh",
     "clean:all": "run-p clean clean:testdata",
     "clean:testdata": "gulp clean:testdata",
     "create:testdata": "gulp create:testdata",
     "test:coverage": "gulp test -t src --coverage",
     "doc": "del-cli ./doc && typedoc --options typedoc.js",
-    "lint": "run-p lint:src lint:test",
-    "lint:ci": "run-p lint:src:ci lint:test:ci",
-    "lint:src": "tslint --fix --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
-    "lint:test": "tslint --fix --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"",
-    "lint:src:ci": "tslint --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
-    "lint:test:ci": "tslint --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"",
-    "prepublishOnly": "echo \"Error: do 'yarn release' instead of 'npm publish'\" && exit 1",
-    "version": "yarn && yarn clean:all"
+    "lint": "eslint src test --fix",
+    "lint:ci": "eslint src test",
+    "prepublishOnly": "echo \"Error: do 'yarn release' instead of 'npm publish'\" && exit 1"
   },
   "repository": {
     "type": "git",
@@ -47,68 +40,66 @@
     "bin",
     "src",
     "gulp",
+    "jestconfigs",
     "test",
     "*.json",
-    "tsconfig",
+    "tsconfigs",
     "README.md",
     "gulpfile.js",
     "npm-release.sh",
-    "jest.config.js",
-    "jest.coverage.config.js"
+    "jest.config.js"
   ],
   "dependencies": {
-    "@types/flatbuffers": "^1.9.1",
-    "@types/node": "^12.0.4",
-    "@types/text-encoding-utf-8": "^1.0.1",
-    "command-line-args": "5.0.2",
-    "command-line-usage": "5.0.5",
+    "@types/flatbuffers": "^1.10.0",
+    "@types/node": "^16.4.0",
+    "command-line-args": "5.1.3",
+    "command-line-usage": "6.1.1",
     "flatbuffers": "1.12.0",
     "json-bignum": "^0.0.3",
     "pad-left": "^2.1.0",
-    "text-encoding-utf-8": "^1.0.2",
-    "tslib": "^1.12.0"
+    "tslib": "^2.3.0"
   },
   "devDependencies": {
-    "@types/glob": "7.1.1",
-    "@types/jest": "25.2.2",
-    "async-done": "1.3.1",
-    "benchmark": "2.1.4",
-    "coveralls": "3.0.3",
-    "cpy": "^8.1.2",
-    "del-cli": "3.0.1",
-    "esm": "3.2.25",
-    "glob": "7.1.4",
-    "google-closure-compiler": "20200830.0.0",
+    "@openpgp/web-stream-tools": "0.0.6",
+    "@types/glob": "7.1.4",
+    "@types/jest": "26.0.24",
+    "@types/randomatic": "3.1.2",
+    "@typescript-eslint/eslint-plugin": "4.28.4",
+    "@typescript-eslint/parser": "4.28.4",
+    "async-done": "1.3.2",
+    "benny": "3.6.15",
+    "cpy": "8.1.2",
+    "cross-env": "7.0.3",
+    "del-cli": "4.0.1",
+    "eslint": "7.31.0",
+    "eslint-plugin-jest": "24.3.7",
+    "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz",
+    "glob": "7.1.7",
+    "google-closure-compiler": "20210601.0.0",
     "gulp": "4.0.2",
-    "gulp-json-transform": "0.4.6",
-    "gulp-rename": "1.4.0",
-    "gulp-sourcemaps": "2.6.5",
+    "gulp-json-transform": "0.4.7",
+    "gulp-rename": "2.0.0",
+    "gulp-sourcemaps": "3.0.0",
     "gulp-typescript": "5.0.1",
-    "ix": "2.5.3",
-    "jest": "26.3.0",
-    "jest-silent-reporter": "0.1.2",
-    "json": "9.0.6",
-    "lerna": "3.22.1",
-    "memfs": "2.15.2",
+    "ix": "4.4.1",
+    "jest": "27.0.6",
+    "jest-silent-reporter": "0.5.0",
+    "lerna": "4.0.0",
+    "memfs": "3.2.2",
     "mkdirp": "1.0.4",
-    "multistream": "2.1.1",
+    "multistream": "4.1.0",
     "npm-run-all": "4.1.5",
     "randomatic": "3.1.1",
-    "rxjs": "5.5.11",
-    "source-map-loader": "0.2.4",
-    "terser-webpack-plugin": "4.2.2",
-    "ts-jest": "26.3.0",
-    "ts-node": "9.0.0",
-    "tslint": "6.1.3",
-    "typedoc": "0.20.19",
+    "rxjs": "7.2.0",
+    "ts-jest": "27.0.3",
+    "ts-node": "10.1.0",
+    "typedoc": "0.21.4",
     "typescript": "4.0.2",
-    "web-stream-tools": "0.0.1",
-    "web-streams-polyfill": "2.0.3",
-    "webpack": "4.29.0",
-    "xml2js": "0.4.19"
+    "web-streams-polyfill": "3.0.3",
+    "xml2js": "0.4.23"
   },
   "engines": {
-    "node": ">=11.12"
+    "node": ">=12.0"
   },
-  "version": "4.0.0-SNAPSHOT"
+  "version": "6.0.0-SNAPSHOT"
 }
diff --git a/js/perf/config.js b/js/perf/config.js
deleted file mode 100644
index cca10801547..00000000000
--- a/js/perf/config.js
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-const fs = require('fs');
-const path = require('path');
-const glob = require('glob');
-
-const config = [];
-const filenames = glob.sync(path.resolve(__dirname, `../test/data/cpp/stream`, `*.arrow`));
-
-for (const filename of filenames) {
-    const { name } = path.parse(filename);
-    config.push({ name, buffers: [fs.readFileSync(filename)] });
-}
-
-module.exports = config;
diff --git a/js/perf/config.ts b/js/perf/config.ts
new file mode 100644
index 00000000000..08ea9ecc1d5
--- /dev/null
+++ b/js/perf/config.ts
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import * as Arrow from '../src/Arrow.dom';
+
+// from https://stackoverflow.com/a/19303725/214950
+let seed = 1;
+function random() {
+    const x = Math.sin(seed++) * 10000;
+    return x - Math.floor(x);
+}
+
+console.time('Prepare Data');
+
+const LENGTH = 100000;
+const NUM_BATCHES = 10;
+
+const values = Arrow.Utf8Vector.from(['Charlottesville', 'New York', 'San Francisco', 'Seattle', 'Terre Haute', 'Washington, DC']);
+
+const batches = Array.from({length: NUM_BATCHES}).map(() => {
+    const lat = Float32Array.from(
+        { length: LENGTH },
+        () => ((random() - 0.5) * 2 * 90));
+    const lng = Float32Array.from(
+        { length: LENGTH },
+        () => ((random() - 0.5) * 2 * 90));
+
+    const origin = Uint8Array.from(
+        { length: LENGTH },
+        () => (random() * 6));
+    const destination = Uint8Array.from(
+        { length: LENGTH },
+        () => (random() * 6));
+
+    const originType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false);
+    const destinationType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false);
+
+    return Arrow.RecordBatch.new({
+        'lat': Arrow.Float32Vector.from(lat),
+        'lng': Arrow.Float32Vector.from(lng),
+        'origin': Arrow.Vector.new(Arrow.Data.Dictionary(originType, 0, origin.length, 0, null, origin, values)),
+        'destination': Arrow.Vector.new(Arrow.Data.Dictionary(destinationType, 0, destination.length, 0, null, destination, values)),
+    });
+});
+
+const tracks = new Arrow.DataFrame(batches[0].schema, batches);
+
+console.timeEnd('Prepare Data');
+
+export default [
+    {
+        name: 'tracks',
+        df: tracks,
+        ipc: tracks.serialize(),
+        countBys: ['origin', 'destination'],
+        counts: [
+            {column: 'lat',    test: 'gt' as 'gt' | 'eq', value: 0        },
+            {column: 'lng',    test: 'gt' as 'gt' | 'eq', value: 0        },
+            {column: 'origin', test: 'eq' as 'gt' | 'eq', value: 'Seattle'},
+        ],
+    }
+];
diff --git a/js/perf/index.js b/js/perf/index.js
deleted file mode 100644
index e332af208a4..00000000000
--- a/js/perf/index.js
+++ /dev/null
@@ -1,248 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Use the ES5 UMD target as perf baseline
-// const { predicate, Table, RecordBatchReader } = require('../targets/es5/umd');
-// const { predicate, Table, RecordBatchReader } = require('../targets/es5/cjs');
-// const { predicate, Table, RecordBatchReader } = require('../targets/es2015/umd');
-const { predicate, Table, RecordBatchReader } = require('../targets/es2015/cjs');
-const { col } = predicate;
-
-const Benchmark = require('benchmark');
-
-const suites = [];
-
-for (let { name, buffers } of require('./table_config')) {
-    const parseSuiteName = `Parse "${name}"`;
-    const sliceSuiteName = `Slice "${name}" vectors`;
-    const iterateSuiteName = `Iterate "${name}" vectors`;
-    const getByIndexSuiteName = `Get "${name}" values by index`;
-    const sliceToArraySuiteName = `Slice toArray "${name}" vectors`;
-    suites.push(createTestSuite(parseSuiteName, createFromTableTest(name, buffers)));
-    suites.push(createTestSuite(parseSuiteName, createReadBatchesTest(name, buffers)));
-    const table = Table.from(buffers), schema = table.schema;
-    suites.push(...schema.fields.map((f, i) => createTestSuite(getByIndexSuiteName, createGetByIndexTest(table.getColumnAt(i), f.name))));
-    suites.push(...schema.fields.map((f, i) => createTestSuite(iterateSuiteName, createIterateTest(table.getColumnAt(i), f.name))));
-    suites.push(...schema.fields.map((f, i) => createTestSuite(sliceToArraySuiteName, createSliceToArrayTest(table.getColumnAt(i), f.name))));
-    suites.push(...schema.fields.map((f, i) => createTestSuite(sliceSuiteName, createSliceTest(table.getColumnAt(i), f.name))));
-}
-
-for (let {name, buffers, countBys, counts} of require('./table_config')) {
-    const table = Table.from(buffers);
-
-    const tableIterateSuiteName = `Table Iterate "${name}"`;
-    const dfCountBySuiteName = `DataFrame Count By "${name}"`;
-    const dfFilterCountSuiteName = `DataFrame Filter-Scan Count "${name}"`;
-    const dfDirectCountSuiteName = `DataFrame Direct Count "${name}"`;
-    const dfFilterIterSuiteName = `DataFrame Filter-Iterate "${name}"`;
-
-    suites.push(createTestSuite(tableIterateSuiteName, createTableIterateTest(table)));
-    suites.push(...countBys.map((countBy) => createTestSuite(dfCountBySuiteName, createDataFrameCountByTest(table, countBy))));
-    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfFilterCountSuiteName, createDataFrameFilterCountTest(table, col, test, value))));
-    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfDirectCountSuiteName, createDataFrameDirectCountTest(table, col, test, value))));
-    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfFilterIterSuiteName, createDataFrameFilterIterateTest(table, col, test, value))));
-}
-
-console.log('Running apache-arrow performance tests...\n');
-
-run();
-
-function run() {
-    var suite = suites.shift();
-    suite && suite.on('complete', function() {
-        console.log(suite.name + ':\n' + this.map(function(x) {
-            var str = x.toString();
-            var meanMsPerOp = Math.round(x.stats.mean * 100000)/100;
-            var sliceOf60FPS = Math.round((meanMsPerOp / (1000/60)) * 100000)/1000;
-            return `${str}\n   avg: ${meanMsPerOp}ms\n   ${sliceOf60FPS}% of a frame @ 60FPS ${x.suffix || ''}`;
-        }).join('\n') + '\n');
-        if (suites.length > 0) {
-            setTimeout(run, 1000);
-        }
-    })
-    .run({ async: true });
-}
-
-function createTestSuite(name, test) {
-    return new Benchmark.Suite(name, { async: true }).add(test);
-}
-
-function createFromTableTest(name, buffers) {
-    let table;
-    return {
-        async: true,
-        name: `Table.from\n`,
-        fn() { table = Table.from(buffers); }
-    };
-}
-
-function createReadBatchesTest(name, buffers) {
-    let recordBatch;
-    return {
-        async: true,
-        name: `readBatches\n`,
-        fn() { for (recordBatch of RecordBatchReader.from(buffers)) {} }
-    };
-}
-
-function createSliceTest(vector, name) {
-    let xs;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() { xs = vector.slice(); }
-    };
-}
-
-function createSliceToArrayTest(vector, name) {
-    let xs;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() { xs = vector.slice().toArray(); }
-    };
-}
-
-function createIterateTest(vector, name) {
-    let value;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() { for (value of vector) {} }
-    };
-}
-
-function createGetByIndexTest(vector, name) {
-    let value;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() {
-            for (let i = -1, n = vector.length; ++i < n;) {
-                value = vector.get(i);
-            }
-        }
-    };
-}
-
-function createTableIterateTest(table) {
-    let value;
-    return {
-        async: true,
-        name: `length: ${table.length}\n`,
-        fn() { for (value of table) {} }
-    };
-}
-
-function createDataFrameDirectCountTest(table, column, test, value) {
-    let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column);
-
-    if (test == 'gt') {
-        op = () => {
-            sum = 0;
-            let batches = table.chunks;
-            let numBatches = batches.length;
-            for (let batchIndex = -1; ++batchIndex < numBatches;) {
-                // load batches
-                const batch = batches[batchIndex];
-                const vector = batch.getChildAt(colidx);
-                // yield all indices
-                for (let index = -1, length = batch.length; ++index < length;) {
-                    sum += (vector.get(index) >= value);
-                }
-            }
-            return sum;
-        }
-    } else if (test == 'eq') {
-        op = () => {
-            sum = 0;
-            let batches = table.chunks;
-            let numBatches = batches.length;
-            for (let batchIndex = -1; ++batchIndex < numBatches;) {
-                // load batches
-                const batch = batches[batchIndex];
-                const vector = batch.getChildAt(colidx);
-                // yield all indices
-                for (let index = -1, length = batch.length; ++index < length;) {
-                    sum += (vector.get(index) === value);
-                }
-            }
-            return sum;
-        }
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
-
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}, test: ${test}, value: ${value}\n`,
-        fn: op
-    };
-}
-
-function createDataFrameCountByTest(table, column) {
-    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
-
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}\n`,
-        fn() {
-            table.countBy(column);
-        }
-    };
-}
-
-function createDataFrameFilterCountTest(table, column, test, value) {
-    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
-    let df;
-
-    if (test == 'gt') {
-        df = table.filter(col(column).gt(value));
-    } else if (test == 'eq') {
-        df = table.filter(col(column).eq(value));
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
-
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}, test: ${test}, value: ${value}\n`,
-        fn() {
-            df.count();
-        }
-    };
-}
-
-function createDataFrameFilterIterateTest(table, column, test, value) {
-    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
-    let df;
-
-    if (test == 'gt') {
-        df = table.filter(col(column).gt(value));
-    } else if (test == 'eq') {
-        df = table.filter(col(column).eq(value));
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
-
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}, test: ${test}, value: ${value}\n`,
-        fn() { for (value of df) {} }
-    };
-}
-
diff --git a/js/perf/index.ts b/js/perf/index.ts
new file mode 100644
index 00000000000..9f6cb8f79a0
--- /dev/null
+++ b/js/perf/index.ts
@@ -0,0 +1,234 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Alternatively, use bundles for performance tests
+// import * as Arrow from '../targets/es5/umd';
+// import * as Arrow from '../targets/es5/cjs';
+// import * as Arrow from '../targets/es2015/umd';
+// import * as Arrow from '../targets/es2015/cjs';
+
+import * as Arrow from '../src/Arrow';
+
+import config from './config';
+import b from 'benny';
+import { CaseResult, Summary } from 'benny/lib/internal/common-types';
+import kleur from 'kleur';
+
+const { predicate, Table, RecordBatchReader } = Arrow;
+const { col } = predicate;
+
+
+const args = process.argv.slice(2);
+const json = args[0] === '--json';
+
+const formatter = new Intl.NumberFormat();
+function formatNumber(number: number, precision = 0) {
+    const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision));
+    return formatter.format(rounded);
+}
+
+const results: CaseResult[] = [];
+
+function cycle(result: CaseResult, _summary: Summary) {
+    const duration = result.details.median * 1000;
+    if (json) {
+        result.suite = _summary.name;
+        results.push(result);
+    }
+    console.log(
+        `${kleur.cyan(result.name)} ${formatNumber(result.ops, 3)} ops/s ±${result.margin.toPrecision(2)}%, ${formatNumber(duration, 2)} ms, ${kleur.gray(result.samples + ' samples')}`,
+    );
+}
+
+for (const { name, ipc, df } of config) {
+    b.suite(
+        `Parse`,
+
+        b.add(`dataset: ${name}, function: Table.from`, () => {
+            Table.from(ipc);
+        }),
+
+        b.add(`dataset: ${name}, function: readBatches`, () => {
+            for (const _recordBatch of RecordBatchReader.from(ipc)) {}
+        }),
+
+        b.add(`dataset: ${name}, function: serialize`, () => {
+            df.serialize();
+        }),
+
+        b.cycle(cycle)
+    );
+
+    const schema = df.schema;
+
+    const suites = [{
+            suite_name: `Get values by index`,
+            fn(vector: Arrow.Column<any>) {
+                for (let i = -1, n = vector.length; ++i < n;) {
+                    vector.get(i);
+                }
+            }
+        }, {
+            suite_name: `Iterate vectors`,
+            fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
+        }, {
+            suite_name: `Slice toArray vectors`,
+            fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
+        }, {
+            suite_name: `Slice vectors`,
+            fn(vector: Arrow.Column<any>) { vector.slice(); }
+        }];
+
+    for (const {suite_name, fn} of suites) {
+        b.suite(
+            suite_name,
+
+            ...schema.fields.map((f, i) => {
+                const vector = df.getColumnAt(i)!;
+                return b.add(`dataset: ${name}, column: ${f.name}, length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
+                    fn(vector);
+                });
+            }),
+
+            b.cycle(cycle)
+        );
+    }
+}
+
+
+for (const { name, df, countBys, counts } of config) {
+    b.suite(
+        `DataFrame Iterate`,
+
+        b.add(`dataset: ${name}, length: ${formatNumber(df.length)}`, () => {
+            for (const _value of df) {}
+        }),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Count By`,
+
+        ...countBys.map((column: string) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
+            () => df.countBy(column)
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Filter-Scan Count`,
+
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            () => {
+                let filteredDf: Arrow.FilteredDataFrame;
+                if (test == 'gt') {
+                    filteredDf = df.filter(col(column).gt(value));
+                } else if (test == 'eq') {
+                    filteredDf = df.filter(col(column).eq(value));
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+
+                return () => filteredDf.count();
+            }
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Filter-Iterate`,
+
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            () => {
+                let filteredDf: Arrow.FilteredDataFrame;
+                if (test == 'gt') {
+                    filteredDf = df.filter(col(column).gt(value));
+                } else if (test == 'eq') {
+                    filteredDf = df.filter(col(column).eq(value));
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+
+                return () => {
+                    for (const _value of filteredDf) {}
+                };
+            }
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Direct Count`,
+
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            () => {
+                const colidx = df.schema.fields.findIndex((c)=> c.name === column);
+
+                if (test == 'gt') {
+                    return () => {
+                        let sum = 0;
+                        const batches = df.chunks;
+                        const numBatches = batches.length;
+                        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                            // load batches
+                            const batch = batches[batchIndex];
+                            const vector = batch.getChildAt(colidx)!;
+                            // yield all indices
+                            for (let index = -1, length = batch.length; ++index < length;) {
+                                sum += (vector.get(index) >= value) ? 1 : 0;
+                            }
+                        }
+                        return sum;
+                    };
+                } else if (test == 'eq') {
+                    return () => {
+                        let sum = 0;
+                        const batches = df.chunks;
+                        const numBatches = batches.length;
+                        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                            // load batches
+                            const batch = batches[batchIndex];
+                            const vector = batch.getChildAt(colidx)!;
+                            // yield all indices
+                            for (let index = -1, length = batch.length; ++index < length;) {
+                                sum += (vector.get(index) === value) ?  1 : 0;
+                            }
+                        }
+                        return sum;
+                    };
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+            }
+        )),
+
+        b.cycle(cycle),
+
+        b.complete(() => {
+            // last benchmark finished
+            json && process.stderr.write(JSON.stringify(results, null, 2));
+        })
+    );
+}
diff --git a/js/perf/table_config.js b/js/perf/table_config.js
deleted file mode 100644
index 190908bc328..00000000000
--- a/js/perf/table_config.js
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-const fs = require('fs');
-const path = require('path');
-const glob = require('glob');
-
-const config = [];
-const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
-
-countBys = {
-    "tracks": ['origin', 'destination']
-}
-counts = {
-    "tracks": [
-        {col: 'lat',    test: 'gt', value: 0        },
-        {col: 'lng',    test: 'gt', value: 0        },
-        {col: 'origin', test: 'eq', value: 'Seattle'},
-    ]
-}
-
-for (const filename of filenames) {
-    const { name } = path.parse(filename);
-    if (name in counts) {
-        config.push({
-            name,
-            buffers: [fs.readFileSync(filename)],
-            countBys: countBys[name],
-            counts: counts[name],
-        });
-    }
-}
-
-module.exports = config;
diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts
index 38729797a3a..07f0c8b8e06 100644
--- a/js/src/Arrow.dom.ts
+++ b/js/src/Arrow.dom.ts
@@ -109,4 +109,5 @@ export {
     TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder,
     UnionBuilder, DenseUnionBuilder, SparseUnionBuilder,
     Utf8Builder,
+    isTypedArray,
 } from './Arrow';
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index 41408c673ae..8bf29631039 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -132,3 +132,5 @@ export const util = {
     compareFields,
     compareTypes,
 };
+
+export { isTypedArray } from './util/args';
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index b774844b65e..d5803cce02c 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -17,13 +17,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable */
-
 import * as fs from 'fs';
 import * as stream from 'stream';
 import { valueToString } from '../util/pretty';
 import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node';
 
+/* eslint-disable @typescript-eslint/no-require-imports */
+
 const padLeft = require('pad-left');
 const bignumJSONParse = require('json-bignum').parse;
 const argv = require(`command-line-args`)(cliOpts(), { partial: true });
@@ -67,7 +67,7 @@ type ToStringState = {
 })()
 .then((x) => +x || 0, (err) => {
     if (err) {
-        console.error(`${err && err.stack || err}`);
+        console.error(`${err?.stack || err}`);
     }
     return process.exitCode || 1;
 }).then((code) => process.exit(code));
@@ -78,7 +78,7 @@ function pipeTo(source: NodeJS.ReadableStream, sink: NodeJS.WritableStream, opts
         source.on('end', onEnd).pipe(sink, opts).on('error', onErr);
 
         function onEnd() { done(undefined, resolve); }
-        function onErr(err:any) { done(err, reject); }
+        function onErr(err: any) { done(err, reject); }
         function done(e: any, cb: (e?: any) => void) {
             source.removeListener('end', onEnd);
             sink.removeListener('error', onErr);
@@ -89,9 +89,9 @@ function pipeTo(source: NodeJS.ReadableStream, sink: NodeJS.WritableStream, opts
 
 async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStream) {
 
-    let json = new AsyncByteQueue();
-    let stream = new AsyncByteQueue();
-    let source = createSourceStream();
+    const json = new AsyncByteQueue();
+    const stream = new AsyncByteQueue();
+    const source = createSourceStream();
     let reader: RecordBatchReader | null = null;
     let readers: AsyncIterable<RecordBatchReader> | null = null;
     // tee the input source, just in case it's JSON
@@ -147,7 +147,7 @@ function batchesToString(state: ToStringState, schema: Schema) {
         },
         transform(batch: RecordBatch, _enc: string, cb: (error?: Error, data?: any) => void) {
 
-            batch = !(state.schema && state.schema.length) ? batch : batch.select(...state.schema);
+            batch = !state.schema?.length ? batch : batch.select(...state.schema);
 
             if (state.closed) { return cb(undefined, null); }
 
@@ -199,7 +199,7 @@ function formatMetadata(metadata: Map<string, string>) {
         `  ${key}: ${formatMetadataValue(val)}`
     ).join(',  \n');
 
-    function formatMetadataValue(value: string = '') {
+    function formatMetadataValue(value = '') {
         let parsed = value;
         try {
             parsed = JSON.stringify(JSON.parse(value), null, 2);
@@ -256,7 +256,7 @@ const typedArrayElementWidths = (() => {
         [Float32Array, maxElementWidth(Float32Array)],
         [Float64Array, maxElementWidth(Float64Array)],
         [Uint8ClampedArray, maxElementWidth(Uint8ClampedArray)]
-    ])
+    ]);
 })();
 
 function cliOpts() {
diff --git a/js/src/builder.ts b/js/src/builder.ts
index 6065711dd79..86db953065a 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -35,7 +35,7 @@ import {
 export interface BuilderOptions<T extends DataType = any, TNull = any> {
     type: T;
     nullValues?: TNull[] | ReadonlyArray<TNull> | null;
-    children?: { [key: string]: BuilderOptions; } | BuilderOptions[];
+    children?: { [key: string]: BuilderOptions } | BuilderOptions[];
 }
 
 /**
@@ -279,25 +279,20 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
         return this.children.reduce((size, child) => size + child.reservedByteLength, size);
     }
 
-    // @ts-ignore
-    protected _offsets: DataBufferBuilder<Int32Array>;
+    protected _offsets!: DataBufferBuilder<Int32Array>;
     public get valueOffsets() { return this._offsets ? this._offsets.buffer : null; }
 
-    // @ts-ignore
-    protected _values: BufferBuilder<T['TArray'], any>;
+    protected _values!: BufferBuilder<T['TArray'], any>;
     public get values() { return this._values ? this._values.buffer : null; }
 
     protected _nulls: BitmapBufferBuilder;
     public get nullBitmap() { return this._nulls ? this._nulls.buffer : null; }
 
-    // @ts-ignore
-    protected _typeIds: DataBufferBuilder<Int8Array>;
+    protected _typeIds!: DataBufferBuilder<Int8Array>;
     public get typeIds() { return this._typeIds ? this._typeIds.buffer : null; }
 
-    // @ts-ignore
-    protected _isValid: (value: T['TValue'] | TNull) => boolean;
-    // @ts-ignore
-    protected _setValue: (inst: Builder<T>, index: number, value: T['TValue']) => void;
+    protected _isValid!: (value: T['TValue'] | TNull) => boolean;
+    protected _setValue!: (inst: Builder<T>, index: number, value: T['TValue']) => void;
 
     /**
      * Appends a value (or null) to this `Builder`.
@@ -310,7 +305,6 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
      * Validates whether a value is valid (true), or null (false)
      * @param {T['TValue'] | TNull } value The value to compare against null the value representations
      */
-    // @ts-ignore
     public isValid(value: T['TValue'] | TNull): boolean { return this._isValid(value); }
 
     /**
@@ -336,7 +330,6 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
      * @param {number} index
      * @param {T['TValue'] | TNull } value
      */
-    // @ts-ignore
     public setValue(index: number, value: T['TValue']) { this._setValue(this, index, value); }
     public setValid(index: number, valid: boolean) {
         this.length = this._nulls.set(index, +valid).length;
@@ -442,7 +435,7 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
 
 /** @ignore */
 export abstract class VariableWidthBuilder<T extends Binary | Utf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
-    protected _pendingLength: number = 0;
+    protected _pendingLength = 0;
     protected _offsets: OffsetsBufferBuilder;
     protected _pending: Map<number, any> | undefined;
     constructor(opts: BuilderOptions<T, TNull>) {
@@ -499,7 +492,7 @@ function throughIterable<T extends DataType = any, TNull = any>(options: Iterabl
     const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength';
     return function*(source: Iterable<T['TValue'] | TNull>) {
         let numChunks = 0;
-        let builder = Builder.new(options);
+        const builder = Builder.new(options);
         for (const value of source) {
             if (builder.append(value)[sizeProperty] >= highWaterMark) {
                 ++numChunks && (yield builder.toVector());
@@ -521,7 +514,7 @@ function throughAsyncIterable<T extends DataType = any, TNull = any>(options: It
     const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength';
     return async function* (source: Iterable<T['TValue'] | TNull> | AsyncIterable<T['TValue'] | TNull>) {
         let numChunks = 0;
-        let builder = Builder.new(options);
+        const builder = Builder.new(options);
         for await (const value of source) {
             if (builder.append(value)[sizeProperty] >= highWaterMark) {
                 ++numChunks && (yield builder.toVector());
diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts
index 7aa336a2a80..3c20cc001b3 100644
--- a/js/src/builder/buffer.ts
+++ b/js/src/builder/buffer.ts
@@ -157,10 +157,8 @@ export class OffsetsBufferBuilder extends DataBufferBuilder<Int32Array> {
 
 /** @ignore */
 export class WideBufferBuilder<T extends TypedArray, R extends BigIntArray> extends BufferBuilder<T, DataValue<T>> {
-    // @ts-ignore
-    public buffer64: R;
-    // @ts-ignore
-    protected _ArrayType64: BigIntArrayConstructor<R>;
+    public buffer64!: R;
+    protected _ArrayType64!: BigIntArrayConstructor<R>;
     public get ArrayType64() {
         return this._ArrayType64 || (this._ArrayType64 = <BigIntArrayConstructor<R>> (this.buffer instanceof Int32Array ? BigInt64Array : BigUint64Array));
     }
diff --git a/js/src/builder/dictionary.ts b/js/src/builder/dictionary.ts
index dda2df2c11a..6602825dd16 100644
--- a/js/src/builder/dictionary.ts
+++ b/js/src/builder/dictionary.ts
@@ -61,8 +61,8 @@ export class DictionaryBuilder<T extends Dictionary, TNull = any> extends Builde
         return valid;
     }
     public setValue(index: number, value: T['TValue']) {
-        let keysToIndices = this._keysToIndices;
-        let key = this.valueToKey(value);
+        const keysToIndices = this._keysToIndices;
+        const key = this.valueToKey(value);
         let idx = keysToIndices[key];
         if (idx === undefined) {
             keysToIndices[key] = idx = this._dictionaryOffset + this.dictionary.append(value).length - 1;
diff --git a/js/src/builder/map.ts b/js/src/builder/map.ts
index 806fbc00da9..25affef2c77 100644
--- a/js/src/builder/map.ts
+++ b/js/src/builder/map.ts
@@ -40,7 +40,7 @@ export class MapBuilder<K extends DataType = any, V extends DataType = any, TNul
         pending.set(index, value);
     }
 
-    public addChild(child: Builder<Struct<{ key: K, value: V }>>, name = `${this.numChildren}`) {
+    public addChild(child: Builder<Struct<{ key: K; value: V }>>, name = `${this.numChildren}`) {
         if (this.numChildren > 0) {
             throw new Error('ListBuilder can only have one child.');
         }
diff --git a/js/src/builder/run.ts b/js/src/builder/run.ts
index c4ab84f2a94..5239f51f293 100644
--- a/js/src/builder/run.ts
+++ b/js/src/builder/run.ts
@@ -20,8 +20,7 @@ import { DataType } from '../type';
 
 /** @ignore */
 export class Run<T extends DataType = any, TNull = any> {
-    // @ts-ignore
-    protected _values: ArrayLike<T['TValue'] | TNull>;
+    protected _values!: ArrayLike<T['TValue'] | TNull>;
     public get length() { return this._values.length; }
     public get(index: number) { return this._values[index]; }
     public clear() { this._values = <any> null; return this; }
diff --git a/js/src/builder/union.ts b/js/src/builder/union.ts
index af75702b7fc..18ac05bf69e 100644
--- a/js/src/builder/union.ts
+++ b/js/src/builder/union.ts
@@ -53,13 +53,11 @@ export abstract class UnionBuilder<T extends Union, TNull = any> extends Builder
         return this;
     }
 
-    // @ts-ignore
     public setValue(index: number, value: T['TValue'], childTypeId?: number) {
         this._typeIds.set(index, childTypeId!);
         super.setValue(index, value);
     }
 
-    // @ts-ignore
     public addChild(child: Builder, name = `${this.children.length}`) {
         const childTypeId = this.children.push(child);
         const { type: { children, mode, typeIds } } = this;
diff --git a/js/src/builder/valid.ts b/js/src/builder/valid.ts
index c07144610db..ae5b799fb06 100644
--- a/js/src/builder/valid.ts
+++ b/js/src/builder/valid.ts
@@ -47,7 +47,7 @@ export function createIsValidFunction<T extends DataType = any, TNull = any>(nul
     }
 
     let fnBody = '';
-    let noNaNs = nullValues.filter((x) => x === x);
+    const noNaNs = nullValues.filter((x) => x === x);
 
     if (noNaNs.length > 0) {
         fnBody = `
diff --git a/js/src/column.ts b/js/src/column.ts
index 0336e884c7a..48b40e5a1b3 100644
--- a/js/src/column.ts
+++ b/js/src/column.ts
@@ -92,7 +92,7 @@ export class Column<T extends DataType = any>
 
         if (index < 0 || index >= this.numChildren) { return null; }
 
-        let columns = this._children || (this._children = []);
+        const columns = this._children || (this._children = []);
         let column: Column<R>, field: Field<R>, chunks: Vector<R>[];
 
         if (column = columns[index]) { return column; }
diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts
index af9c8c7a38b..e9df3719490 100644
--- a/js/src/compute/dataframe.ts
+++ b/js/src/compute/dataframe.ts
@@ -29,11 +29,16 @@ export type BindFunc = (batch: RecordBatch) => void;
 /** @ignore */
 export type NextFunc = (idx: number, batch: RecordBatch) => void;
 
-Table.prototype.countBy = function(this: Table, name: Col | string) { return new DataFrame(this.chunks).countBy(name); };
-Table.prototype.scan = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scan(next, bind); };
-Table.prototype.scanReverse = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scanReverse(next, bind); };
-Table.prototype.filter = function(this: Table, predicate: Predicate): FilteredDataFrame { return new DataFrame(this.chunks).filter(predicate); };
-
+/**
+ * `DataFrame` extends {@link Table} with support for predicate filtering.
+ *
+ * You can construct `DataFrames` like tables or convert a `Table` to a `DataFrame`
+ * with the constructor.
+ *
+ * ```ts
+ * const df = new DataFrame(table);
+ * ```
+ */
 export class DataFrame<T extends { [key: string]: DataType } = any> extends Table<T> {
     public filter(predicate: Predicate): FilteredDataFrame<T> {
         return new FilteredDataFrame<T>(this.chunks, predicate);
@@ -86,7 +91,7 @@ export class DataFrame<T extends { [key: string]: DataType } = any> extends Tabl
             const keys = (count_by.vector as V<Dictionary>).indices;
             // yield all indices
             for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                let key = keys.get(index);
+                const key = keys.get(index);
                 if (key !== null) { counts[key]++; }
             }
         }
@@ -95,16 +100,16 @@ export class DataFrame<T extends { [key: string]: DataType } = any> extends Tabl
 }
 
 /** @ignore */
-export class CountByResult<T extends DataType = any, TCount extends Int = Int> extends Table<{ values: T,  counts: TCount }> {
+export class CountByResult<T extends DataType = any, TCount extends Int = Int> extends Table<{ values: T;  counts: TCount }> {
     constructor(values: Vector<T>, counts: V<TCount>) {
-        type R = { values: T, counts: TCount };
+        type R = { values: T; counts: TCount };
         const schema = new Schema<R>([
             new Field('values', values.type),
             new Field('counts', counts.type)
         ]);
         super(new RecordBatch<R>(schema, counts.length, [values, counts]));
     }
-    public toJSON(): Object {
+    public toJSON(): Record<string, unknown> {
         const values = this.getColumnAt(0)!;
         const counts = this.getColumnAt(1)!;
         const result = {} as { [k: string]: number | null };
@@ -274,7 +279,7 @@ export class FilteredDataFrame<T extends { [key: string]: DataType } = any> exte
             const keys = (count_by.vector as V<Dictionary>).indices;
             // yield all indices
             for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                let key = keys.get(index);
+                const key = keys.get(index);
                 if (key !== null && predicate(index, batch)) { counts[key]++; }
             }
         }
diff --git a/js/src/compute/predicate.ts b/js/src/compute/predicate.ts
index fe0cd8e3f53..52030763dc3 100644
--- a/js/src/compute/predicate.ts
+++ b/js/src/compute/predicate.ts
@@ -56,10 +56,8 @@ export class Literal<T= any> extends Value<T> {
 
 /** @ignore */
 export class Col<T= any> extends Value<T> {
-    // @ts-ignore
-    public vector: Vector;
-    // @ts-ignore
-    public colidx: number;
+    public vector!: Vector;
+    public colidx!: number;
 
     constructor(public name: string) { super(); }
     bind(batch: RecordBatch): (idx: number, batch?: RecordBatch) => any {
diff --git a/js/src/data.ts b/js/src/data.ts
index 47f644c0a4e..2a549088c65 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -70,14 +70,10 @@ export class Data<T extends DataType = DataType> {
      */
     public dictionary?: Vector;
 
-    // @ts-ignore
-    public readonly values: Buffers<T>[BufferType.DATA];
-    // @ts-ignore
-    public readonly typeIds: Buffers<T>[BufferType.TYPE];
-    // @ts-ignore
-    public readonly nullBitmap: Buffers<T>[BufferType.VALIDITY];
-    // @ts-ignore
-    public readonly valueOffsets: Buffers<T>[BufferType.OFFSET];
+    public readonly values!: Buffers<T>[BufferType.DATA];
+    public readonly typeIds!: Buffers<T>[BufferType.TYPE];
+    public readonly nullBitmap!: Buffers<T>[BufferType.VALIDITY];
+    public readonly valueOffsets!: Buffers<T>[BufferType.OFFSET];
 
     public get typeId(): T['TType'] { return this.type.typeId; }
     public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; }
@@ -86,7 +82,7 @@ export class Data<T extends DataType = DataType> {
     }
     public get byteLength(): number {
         let byteLength = 0;
-        let { valueOffsets, values, nullBitmap, typeIds } = this;
+        const { valueOffsets, values, nullBitmap, typeIds } = this;
         valueOffsets && (byteLength += valueOffsets.byteLength);
         values       && (byteLength += values.byteLength);
         nullBitmap   && (byteLength += nullBitmap.byteLength);
@@ -166,7 +162,8 @@ export class Data<T extends DataType = DataType> {
     }
 
     protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> {
-        let arr: any, { buffers } = this;
+        let arr: any;
+        const { buffers } = this;
         // If typeIds exist, slice the typeIds buffer
         (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length));
         // If offsets exist, only slice the offsets buffer
diff --git a/js/src/fb/.eslintrc.js b/js/src/fb/.eslintrc.js
new file mode 100644
index 00000000000..d448540e4af
--- /dev/null
+++ b/js/src/fb/.eslintrc.js
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    rules: {
+        "@typescript-eslint/no-require-imports": "off",
+        "@typescript-eslint/no-inferrable-types": "off"
+    },
+};
\ No newline at end of file
diff --git a/js/src/fb/File.ts b/js/src/fb/File.ts
index a82437b187e..5746dd183a5 100644
--- a/js/src/fb/File.ts
+++ b/js/src/fb/File.ts
@@ -47,7 +47,7 @@ export class Footer {
      * @returns MetadataVersion
      */
     version(): NS13596923344997147894.MetadataVersion {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1;
     }
 
@@ -56,7 +56,7 @@ export class Footer {
      * @returns Schema|null
      */
     schema(obj?: NS13596923344997147894.Schema): NS13596923344997147894.Schema | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new NS13596923344997147894.Schema()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -66,7 +66,7 @@ export class Footer {
      * @returns Block
      */
     dictionaries(index: number, obj?: Block): Block | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null;
     }
 
@@ -74,7 +74,7 @@ export class Footer {
      * @returns number
      */
     dictionariesLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -84,7 +84,7 @@ export class Footer {
      * @returns Block
      */
     recordBatches(index: number, obj?: Block): Block | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null;
     }
 
@@ -92,7 +92,7 @@ export class Footer {
      * @returns number
      */
     recordBatchesLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -104,7 +104,7 @@ export class Footer {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -112,7 +112,7 @@ export class Footer {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -205,7 +205,7 @@ export class Footer {
      * @returns flatbuffers.Offset
      */
     static endFooter(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
diff --git a/js/src/fb/Message.ts b/js/src/fb/Message.ts
index da240d96bf3..973eb042534 100644
--- a/js/src/fb/Message.ts
+++ b/js/src/fb/Message.ts
@@ -161,7 +161,7 @@ export class BodyCompression {
      * @returns CompressionType
      */
     codec(): CompressionType {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt8(this.bb_pos + offset)) : CompressionType.LZ4_FRAME;
     }
 
@@ -171,7 +171,7 @@ export class BodyCompression {
      * @returns BodyCompressionMethod
      */
     method(): BodyCompressionMethod {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? /**  */ (this.bb!.readInt8(this.bb_pos + offset)) : BodyCompressionMethod.BUFFER;
     }
 
@@ -203,7 +203,7 @@ export class BodyCompression {
      * @returns flatbuffers.Offset
      */
     static endBodyCompression(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -262,7 +262,7 @@ export class RecordBatch {
      * @returns flatbuffers.Long
      */
     length(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -274,7 +274,7 @@ export class RecordBatch {
      * @returns FieldNode
      */
     nodes(index: number, obj?: FieldNode): FieldNode | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new FieldNode()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null;
     }
 
@@ -282,7 +282,7 @@ export class RecordBatch {
      * @returns number
      */
     nodesLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -299,7 +299,7 @@ export class RecordBatch {
      * @returns Buffer
      */
     buffers(index: number, obj?: NS13596923344997147894.Buffer): NS13596923344997147894.Buffer | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? (obj || new NS13596923344997147894.Buffer()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null;
     }
 
@@ -307,7 +307,7 @@ export class RecordBatch {
      * @returns number
      */
     buffersLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -318,7 +318,7 @@ export class RecordBatch {
      * @returns BodyCompression|null
      */
     compression(obj?: BodyCompression): BodyCompression | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? (obj || new BodyCompression()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -382,7 +382,7 @@ export class RecordBatch {
      * @returns flatbuffers.Offset
      */
     static endRecordBatch(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -443,7 +443,7 @@ export class DictionaryBatch {
      * @returns flatbuffers.Long
      */
     id(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -452,7 +452,7 @@ export class DictionaryBatch {
      * @returns RecordBatch|null
      */
     data(obj?: RecordBatch): RecordBatch | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new RecordBatch()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -464,7 +464,7 @@ export class DictionaryBatch {
      * @returns boolean
      */
     isDelta(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -504,7 +504,7 @@ export class DictionaryBatch {
      * @returns flatbuffers.Offset
      */
     static endDictionaryBatch(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -557,7 +557,7 @@ export class Message {
      * @returns MetadataVersion
      */
     version(): NS13596923344997147894.MetadataVersion {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1;
     }
 
@@ -565,7 +565,7 @@ export class Message {
      * @returns MessageHeader
      */
     headerType(): MessageHeader {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? /**  */ (this.bb!.readUint8(this.bb_pos + offset)) : MessageHeader.NONE;
     }
 
@@ -574,7 +574,7 @@ export class Message {
      * @returns ?flatbuffers.Table
      */
     header<T extends flatbuffers.Table>(obj: T): T | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null;
     }
 
@@ -582,7 +582,7 @@ export class Message {
      * @returns flatbuffers.Long
      */
     bodyLength(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -592,7 +592,7 @@ export class Message {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -600,7 +600,7 @@ export class Message {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -677,7 +677,7 @@ export class Message {
      * @returns flatbuffers.Offset
      */
     static endMessage(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
diff --git a/js/src/fb/Schema.ts b/js/src/fb/Schema.ts
index f2c7fb10dd3..f675bc2a062 100644
--- a/js/src/fb/Schema.ts
+++ b/js/src/fb/Schema.ts
@@ -1,5 +1,3 @@
-/* tslint:disable:class-name */
-
 // automatically generated by the FlatBuffers compiler, do not modify
 
 import { flatbuffers } from 'flatbuffers';
@@ -232,7 +230,7 @@ export class Null {
      * @returns flatbuffers.Offset
      */
     static endNull(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -294,7 +292,7 @@ export class Struct_ {
      * @returns flatbuffers.Offset
      */
     static endStruct_(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -352,7 +350,7 @@ export class List {
      * @returns flatbuffers.Offset
      */
     static endList(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -413,7 +411,7 @@ export class LargeList {
      * @returns flatbuffers.Offset
      */
     static endLargeList(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -465,7 +463,7 @@ export class FixedSizeList {
      * @returns number
      */
     listSize(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -489,7 +487,7 @@ export class FixedSizeList {
      * @returns flatbuffers.Offset
      */
     static endFixedSizeList(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -567,7 +565,7 @@ export class Map {
      * @returns boolean
      */
     keysSorted(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -591,7 +589,7 @@ export class Map {
      * @returns flatbuffers.Offset
      */
     static endMap(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -647,7 +645,7 @@ export class Union {
      * @returns UnionMode
      */
     mode(): UnionMode {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : UnionMode.Sparse;
     }
 
@@ -656,7 +654,7 @@ export class Union {
      * @returns number
      */
     typeIds(index: number): number | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.readInt32(this.bb!.__vector(this.bb_pos + offset) + index * 4) : 0;
     }
 
@@ -664,7 +662,7 @@ export class Union {
      * @returns number
      */
     typeIdsLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -672,7 +670,7 @@ export class Union {
      * @returns Int32Array
      */
     typeIdsArray(): Int32Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? new Int32Array(this.bb!.bytes().buffer, this.bb!.bytes().byteOffset + this.bb!.__vector(this.bb_pos + offset), this.bb!.__vector_len(this.bb_pos + offset)) : null;
     }
 
@@ -725,7 +723,7 @@ export class Union {
      * @returns flatbuffers.Offset
      */
     static endUnion(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -777,7 +775,7 @@ export class Int {
      * @returns number
      */
     bitWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -785,7 +783,7 @@ export class Int {
      * @returns boolean
      */
     isSigned(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -817,7 +815,7 @@ export class Int {
      * @returns flatbuffers.Offset
      */
     static endInt(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -869,7 +867,7 @@ export class FloatingPoint {
      * @returns Precision
      */
     precision(): Precision {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : Precision.HALF;
     }
 
@@ -893,7 +891,7 @@ export class FloatingPoint {
      * @returns flatbuffers.Offset
      */
     static endFloatingPoint(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -954,7 +952,7 @@ export class Utf8 {
      * @returns flatbuffers.Offset
      */
     static endUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1014,7 +1012,7 @@ export class Binary {
      * @returns flatbuffers.Offset
      */
     static endBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1075,7 +1073,7 @@ export class LargeUtf8 {
      * @returns flatbuffers.Offset
      */
     static endLargeUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1136,7 +1134,7 @@ export class LargeBinary {
      * @returns flatbuffers.Offset
      */
     static endLargeBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1188,7 +1186,7 @@ export class FixedSizeBinary {
      * @returns number
      */
     byteWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -1212,7 +1210,7 @@ export class FixedSizeBinary {
      * @returns flatbuffers.Offset
      */
     static endFixedSizeBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1271,7 +1269,7 @@ export class Bool {
      * @returns flatbuffers.Offset
      */
     static endBool(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1328,7 +1326,7 @@ export class Decimal {
      * @returns number
      */
     precision(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -1338,7 +1336,7 @@ export class Decimal {
      * @returns number
      */
     scale(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -1349,7 +1347,7 @@ export class Decimal {
      * @returns number
      */
     bitWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 128;
     }
 
@@ -1389,7 +1387,7 @@ export class Decimal {
      * @returns flatbuffers.Offset
      */
     static endDecimal(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1449,7 +1447,7 @@ export class Date {
      * @returns DateUnit
      */
     unit(): DateUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : DateUnit.MILLISECOND;
     }
 
@@ -1473,7 +1471,7 @@ export class Date {
      * @returns flatbuffers.Offset
      */
     static endDate(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1528,7 +1526,7 @@ export class Time {
      * @returns TimeUnit
      */
     unit(): TimeUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND;
     }
 
@@ -1536,7 +1534,7 @@ export class Time {
      * @returns number
      */
     bitWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 32;
     }
 
@@ -1568,7 +1566,7 @@ export class Time {
      * @returns flatbuffers.Offset
      */
     static endTime(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1627,7 +1625,7 @@ export class Timestamp {
      * @returns TimeUnit
      */
     unit(): TimeUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.SECOND;
     }
 
@@ -1659,7 +1657,7 @@ export class Timestamp {
     timezone(): string | null;
     timezone(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     timezone(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -1691,7 +1689,7 @@ export class Timestamp {
      * @returns flatbuffers.Offset
      */
     static endTimestamp(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1743,7 +1741,7 @@ export class Interval {
      * @returns IntervalUnit
      */
     unit(): IntervalUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : IntervalUnit.YEAR_MONTH;
     }
 
@@ -1767,7 +1765,7 @@ export class Interval {
      * @returns flatbuffers.Offset
      */
     static endInterval(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1818,7 +1816,7 @@ export class Duration {
      * @returns TimeUnit
      */
     unit(): TimeUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND;
     }
 
@@ -1842,7 +1840,7 @@ export class Duration {
      * @returns flatbuffers.Offset
      */
     static endDuration(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1900,7 +1898,7 @@ export class KeyValue {
     key(): string | null;
     key(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     key(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -1911,7 +1909,7 @@ export class KeyValue {
     value(): string | null;
     value(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     value(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -1943,7 +1941,7 @@ export class KeyValue {
      * @returns flatbuffers.Offset
      */
     static endKeyValue(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1999,7 +1997,7 @@ export class DictionaryEncoding {
      * @returns flatbuffers.Long
      */
     id(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -2014,7 +2012,7 @@ export class DictionaryEncoding {
      * @returns Int|null
      */
     indexType(obj?: Int): Int | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -2027,7 +2025,7 @@ export class DictionaryEncoding {
      * @returns boolean
      */
     isOrdered(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -2035,7 +2033,7 @@ export class DictionaryEncoding {
      * @returns DictionaryKind
      */
     dictionaryKind(): DictionaryKind {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : DictionaryKind.DenseArray;
     }
 
@@ -2083,7 +2081,7 @@ export class DictionaryEncoding {
      * @returns flatbuffers.Offset
      */
     static endDictionaryEncoding(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -2146,7 +2144,7 @@ export class Field {
     name(): string | null;
     name(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     name(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -2156,7 +2154,7 @@ export class Field {
      * @returns boolean
      */
     nullable(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -2164,7 +2162,7 @@ export class Field {
      * @returns Type
      */
     typeType(): Type {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? /**  */ (this.bb!.readUint8(this.bb_pos + offset)) : Type.NONE;
     }
 
@@ -2175,7 +2173,7 @@ export class Field {
      * @returns ?flatbuffers.Table
      */
     type<T extends flatbuffers.Table>(obj: T): T | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null;
     }
 
@@ -2186,7 +2184,7 @@ export class Field {
      * @returns DictionaryEncoding|null
      */
     dictionary(obj?: DictionaryEncoding): DictionaryEncoding | null {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? (obj || new DictionaryEncoding()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -2199,7 +2197,7 @@ export class Field {
      * @returns Field
      */
     children(index: number, obj?: Field): Field | null {
-        let offset = this.bb!.__offset(this.bb_pos, 14);
+        const offset = this.bb!.__offset(this.bb_pos, 14);
         return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2207,7 +2205,7 @@ export class Field {
      * @returns number
      */
     childrenLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 14);
+        const offset = this.bb!.__offset(this.bb_pos, 14);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2219,7 +2217,7 @@ export class Field {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: KeyValue): KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 16);
+        const offset = this.bb!.__offset(this.bb_pos, 16);
         return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2227,7 +2225,7 @@ export class Field {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 16);
+        const offset = this.bb!.__offset(this.bb_pos, 16);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2341,7 +2339,7 @@ export class Field {
      * @returns flatbuffers.Offset
      */
     static endField(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -2463,7 +2461,7 @@ export class Schema {
      * @returns Endianness
      */
     endianness(): Endianness {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : Endianness.Little;
     }
 
@@ -2473,7 +2471,7 @@ export class Schema {
      * @returns Field
      */
     fields(index: number, obj?: Field): Field | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2481,7 +2479,7 @@ export class Schema {
      * @returns number
      */
     fieldsLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2491,7 +2489,7 @@ export class Schema {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: KeyValue): KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2499,7 +2497,7 @@ export class Schema {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2510,7 +2508,7 @@ export class Schema {
      * @returns flatbuffers.Long
      */
     features(index: number): flatbuffers.Long | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? /**  */ (this.bb!.readInt64(this.bb!.__vector(this.bb_pos + offset) + index * 8)) : this.bb!.createLong(0, 0);
     }
 
@@ -2518,7 +2516,7 @@ export class Schema {
      * @returns number
      */
     featuresLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2629,7 +2627,7 @@ export class Schema {
      * @returns flatbuffers.Offset
      */
     static endSchema(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts
index 8e16c4150e3..43977ca7af5 100644
--- a/js/src/interfaces.ts
+++ b/js/src/interfaces.ts
@@ -162,6 +162,20 @@ export type DataTypeCtor<T extends Type | DataType | VectorType = any> =
                            never
     ;
 
+/** @ignore */
+export type TypedArrayDataType<T extends Exclude<TypedArray, Uint8ClampedArray> | BigIntArray> =
+    T extends Int8Array ? type.Int8 :
+    T extends Int16Array ? type.Int16 :
+    T extends Int32Array ? type.Int32 :
+    T extends BigInt64Array ? type.Int64 :
+    T extends Uint8Array ? type.Uint8 :
+    T extends Uint16Array ? type.Uint16 :
+    T extends Uint32Array ? type.Uint32 :
+    T extends BigUint64Array ? type.Uint64 :
+    T extends Float32Array ? type.Float32 :
+    T extends Float64Array ? type.Float64 :
+    never;
+
 /** @ignore */
 type TypeToVector<T extends Type> = {
     [key: number               ]: vecs.Vector<any>                ;
diff --git a/js/src/io/adapters.ts b/js/src/io/adapters.ts
index d14dbe7982f..a83346ef74f 100644
--- a/js/src/io/adapters.ts
+++ b/js/src/io/adapters.ts
@@ -25,9 +25,9 @@ import {
 
 import { ReadableDOMStreamOptions } from './interfaces';
 
-interface ReadableStreamReadResult<T> { done: boolean; value: T; }
-type Uint8ArrayGenerator = Generator<Uint8Array, null, { cmd: 'peek' | 'read', size: number }>;
-type AsyncUint8ArrayGenerator = AsyncGenerator<Uint8Array, null, { cmd: 'peek' | 'read', size: number }>;
+interface ReadableStreamReadResult<T> { done: boolean; value: T }
+type Uint8ArrayGenerator = Generator<Uint8Array, null, { cmd: 'peek' | 'read'; size: number }>;
+type AsyncUint8ArrayGenerator = AsyncGenerator<Uint8Array, null, { cmd: 'peek' | 'read'; size: number }>;
 
 /** @ignore */
 export default {
@@ -75,7 +75,7 @@ function* fromIterable<T extends ArrayBufferViewInput>(source: Iterable<T> | T):
     ({ cmd, size } = yield <any> null);
 
     // initialize the iterator
-    let it = toUint8ArrayIterator(source)[Symbol.iterator]();
+    const it = toUint8ArrayIterator(source)[Symbol.iterator]();
 
     try {
         do {
@@ -121,7 +121,7 @@ async function* fromAsyncIterable<T extends ArrayBufferViewInput>(source: AsyncI
     ({ cmd, size } = (yield <any> null)!);
 
     // initialize the iterator
-    let it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator]();
+    const it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator]();
 
     try {
         do {
@@ -171,7 +171,7 @@ async function* fromDOMStream<T extends ArrayBufferViewInput>(source: ReadableSt
     ({ cmd, size } = yield <any> null);
 
     // initialize the reader and lock the stream
-    let it = new AdaptiveByteReader(source);
+    const it = new AdaptiveByteReader(source);
 
     try {
         do {
@@ -212,7 +212,7 @@ class AdaptiveByteReader<T extends ArrayBufferViewInput> {
         try {
             this.supportsBYOB = !!(this.reader = this.getBYOBReader());
         } catch (e) {
-            this.supportsBYOB = !!!(this.reader = this.getDefaultReader());
+            this.supportsBYOB = !(this.reader = this.getDefaultReader());
         }
     }
 
@@ -297,7 +297,7 @@ type EventName = 'end' | 'error' | 'readable';
 type Event = [EventName, (_: any) => void, Promise<[EventName, Error | null]>];
 /** @ignore */
 const onEvent = <T extends string>(stream: NodeJS.ReadableStream, event: T) => {
-    let handler = (_: any) => resolve([event, _]);
+    const handler = (_: any) => resolve([event, _]);
     let resolve: (value?: [T, any] | PromiseLike<[T, any]>) => void;
     return [event, handler, new Promise<[T, any]>(
         (r) => (resolve = r) && stream['once'](event, handler)
@@ -307,7 +307,7 @@ const onEvent = <T extends string>(stream: NodeJS.ReadableStream, event: T) => {
 /** @ignore */
 async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGenerator {
 
-    let events: Event[] = [];
+    const events: Event[] = [];
     let event: EventName = 'error';
     let done = false, err: Error | null = null;
     let cmd: 'peek' | 'read', size: number, bufferLength = 0;
@@ -379,7 +379,7 @@ async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGe
 
     function cleanup<T extends Error | null | void>(events: Event[], err?: T) {
         buffer = buffers = <any> null;
-        return new Promise<T>(async (resolve, reject) => {
+        return new Promise<T>((resolve, reject) => {
             for (const [evt, fn] of events) {
                 stream['off'](evt, fn);
             }
diff --git a/js/src/io/file.ts b/js/src/io/file.ts
index 59f4094be57..20b7dbf02df 100644
--- a/js/src/io/file.ts
+++ b/js/src/io/file.ts
@@ -22,7 +22,7 @@ import { ArrayBufferViewInput, toUint8Array } from '../util/buffer';
 /** @ignore */
 export class RandomAccessFile extends ByteStream {
     public size: number;
-    public position: number = 0;
+    public position = 0;
     protected buffer: Uint8Array | null;
     constructor(buffer: ArrayBufferViewInput, byteLength?: number) {
         super();
@@ -59,9 +59,8 @@ export class RandomAccessFile extends ByteStream {
 
 /** @ignore */
 export class AsyncRandomAccessFile extends AsyncByteStream {
-    // @ts-ignore
-    public size: number;
-    public position: number = 0;
+    public size!: number;
+    public position = 0;
     public _pending?: Promise<void>;
     protected _handle: FileHandle | null;
     constructor(file: FileHandle, byteLength?: number) {
@@ -91,8 +90,8 @@ export class AsyncRandomAccessFile extends AsyncByteStream {
         if (file && position < size) {
             if (typeof nBytes !== 'number') { nBytes = Infinity; }
             let pos = position, offset = 0, bytesRead = 0;
-            let end = Math.min(size, pos + Math.min(size - pos, nBytes));
-            let buffer = new Uint8Array(Math.max(0, (this.position = end) - pos));
+            const end = Math.min(size, pos + Math.min(size - pos, nBytes));
+            const buffer = new Uint8Array(Math.max(0, (this.position = end) - pos));
             while ((pos += bytesRead) < end && (offset += bytesRead) < buffer.byteLength) {
                 ({ bytesRead } = await file.read(buffer, offset, buffer.byteLength - offset, pos));
             }
diff --git a/js/src/io/interfaces.ts b/js/src/io/interfaces.ts
index e057c2d6d57..4b5641ff13a 100644
--- a/js/src/io/interfaces.ts
+++ b/js/src/io/interfaces.ts
@@ -23,13 +23,12 @@ export const ITERATOR_DONE: any = Object.freeze({ done: true, value: void (0) })
 /** @ignore */
 export type FileHandle = import('fs').promises.FileHandle;
 /** @ignore */
-export type ArrowJSONLike = { schema: any; batches?: any[]; dictionaries?: any[]; };
+export type ArrowJSONLike = { schema: any; batches?: any[]; dictionaries?: any[] };
 /** @ignore */
-export type ReadableDOMStreamOptions = { type: 'bytes' | undefined, autoAllocateChunkSize?: number, highWaterMark?: number };
+export type ReadableDOMStreamOptions = { type: 'bytes' | undefined; autoAllocateChunkSize?: number; highWaterMark?: number };
 
 /** @ignore */
 export class ArrowJSON {
-    // @ts-ignore
     constructor(private _json: ArrowJSONLike) {}
     public get schema(): any { return this._json['schema']; }
     public get batches(): any[] { return (this._json['batches'] || []) as any[]; }
@@ -73,11 +72,11 @@ export abstract class ReadableInterop<T> {
     public tee(): [ReadableStream<T>, ReadableStream<T>] {
         return this._getDOMStream().tee();
     }
-    public pipe<R extends NodeJS.WritableStream>(writable: R, options?: { end?: boolean; }) {
+    public pipe<R extends NodeJS.WritableStream>(writable: R, options?: { end?: boolean }) {
         return this._getNodeStream().pipe(writable, options);
     }
     public pipeTo(writable: WritableStream<T>, options?: PipeOptions) { return this._getDOMStream().pipeTo(writable, options); }
-    public pipeThrough<R extends ReadableStream<any>>(duplex: { writable: WritableStream<T>, readable: R }, options?: PipeOptions) {
+    public pipeThrough<R extends ReadableStream<any>>(duplex: { writable: WritableStream<T>; readable: R }, options?: PipeOptions) {
         return this._getDOMStream().pipeThrough(duplex, options);
     }
 
@@ -93,14 +92,14 @@ export abstract class ReadableInterop<T> {
 }
 
 /** @ignore */
-type Resolution<T> = { resolve: (value?: T | PromiseLike<T>) => void; reject: (reason?: any) => void; };
+type Resolution<T> = { resolve: (value?: T | PromiseLike<T>) => void; reject: (reason?: any) => void };
 
 /** @ignore */
 export class AsyncQueue<TReadable = Uint8Array, TWritable = TReadable> extends ReadableInterop<TReadable>
     implements AsyncIterableIterator<TReadable>, ReadableWritable<TReadable, TWritable> {
 
     protected _values: TWritable[] = [];
-    protected _error?: { error: any; };
+    protected _error?: { error: any };
     protected _closedPromise: Promise<void>;
     protected _closedPromiseResolve?: (value?: any) => void;
     protected resolvers: Resolution<IteratorResult<TReadable>>[] = [];
@@ -175,6 +174,6 @@ export class AsyncQueue<TReadable = Uint8Array, TWritable = TReadable> extends R
         if (this._closedPromiseResolve) {
             return true;
         }
-        throw new Error(`${this} is closed`);
+        throw new Error(`AsyncQueue is closed`);
     }
 }
diff --git a/js/src/io/node/iterable.ts b/js/src/io/node/iterable.ts
index b174d1eed44..457bc894dad 100644
--- a/js/src/io/node/iterable.ts
+++ b/js/src/io/node/iterable.ts
@@ -51,9 +51,10 @@ class IterableReadable<T extends Uint8Array | any> extends Readable {
         }
     }
     _destroy(e: Error | null, cb: (e: Error | null) => void) {
-        let it = this._iterator, fn: any;
+        const it = this._iterator;
+        let fn: any;
         it && (fn = e != null && it.throw || it.return);
-        fn && fn.call(it, e);
+        fn?.call(it, e);
         cb && cb(null);
     }
     private _pull(size: number, it: SourceIterator<T>) {
@@ -65,7 +66,7 @@ class IterableReadable<T extends Uint8Array | any> extends Readable {
             }
             if (!this.push(r.value) || size <= 0) { break; }
         }
-        if ((r && r.done || !this.readable) && (this.push(null) || true)) {
+        if ((r?.done || !this.readable) && (this.push(null) || true)) {
             it.return && it.return();
         }
         return !this.readable;
@@ -90,9 +91,10 @@ class AsyncIterableReadable<T extends Uint8Array | any> extends Readable {
         }
     }
     _destroy(e: Error | null, cb: (e: Error | null) => void) {
-        let it = this._iterator, fn: any;
+        const it = this._iterator;
+        let fn: any;
         it && (fn = e != null && it.throw || it.return);
-        fn && fn.call(it, e).then(() => cb && cb(null)) || (cb && cb(null));
+        fn?.call(it, e).then(() => cb && cb(null)) || (cb && cb(null));
     }
     private async _pull(size: number, it: AsyncSourceIterator<T>) {
         const bm = this._bytesMode;
@@ -103,7 +105,7 @@ class AsyncIterableReadable<T extends Uint8Array | any> extends Readable {
             }
             if (!this.push(r.value) || size <= 0) { break; }
         }
-        if ((r && r.done || !this.readable) && (this.push(null) || true)) {
+        if ((r?.done || !this.readable) && (this.push(null) || true)) {
             it.return && it.return();
         }
         return !this.readable;
diff --git a/js/src/io/node/reader.ts b/js/src/io/node/reader.ts
index 7705b4634ad..a51fb0b4036 100644
--- a/js/src/io/node/reader.ts
+++ b/js/src/io/node/reader.ts
@@ -31,8 +31,8 @@ type CB = (error?: Error | null | undefined) => void;
 
 /** @ignore */
 class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
-    private _pulling: boolean = false;
-    private _autoDestroy: boolean = true;
+    private _pulling = false;
+    private _autoDestroy = true;
     private _reader: RecordBatchReader | null;
     private _asyncQueue: AsyncByteQueue | null;
     constructor(options?: DuplexOptions & { autoDestroy: boolean }) {
@@ -44,12 +44,12 @@ class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> exten
     }
     _final(cb?: CB) {
         const aq = this._asyncQueue;
-        aq && aq.close();
+        aq?.close();
         cb && cb();
     }
     _write(x: any, _: string, cb: CB) {
         const aq = this._asyncQueue;
-        aq && aq.write(x);
+        aq?.write(x);
         cb && cb();
         return true;
     }
@@ -77,7 +77,7 @@ class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> exten
         while (this.readable && !(r = await reader.next()).done) {
             if (!this.push(r.value) || (size != null && --size <= 0)) { break; }
         }
-        if (!this.readable || (r && r.done && (reader.autoDestroy || (await reader.reset().open()).closed))) {
+        if (!this.readable || (r?.done && (reader.autoDestroy || (await reader.reset().open()).closed))) {
             this.push(null);
             await reader.cancel();
         }
diff --git a/js/src/io/node/writer.ts b/js/src/io/node/writer.ts
index c5fc80926d8..79d61b9a315 100644
--- a/js/src/io/node/writer.ts
+++ b/js/src/io/node/writer.ts
@@ -30,7 +30,7 @@ type CB = (error?: Error | null | undefined) => void;
 
 /** @ignore */
 class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
-    private _pulling: boolean = false;
+    private _pulling = false;
     private _reader: AsyncByteStream | null;
     private _writer: RecordBatchWriter | null;
     constructor(writer: RecordBatchWriter<T>, options?: DuplexOptions) {
@@ -40,12 +40,12 @@ class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> exten
     }
     _final(cb?: CB) {
         const writer = this._writer;
-        writer && writer.close();
+        writer?.close();
         cb && cb();
     }
     _write(x: any, _: string, cb: CB) {
         const writer = this._writer;
-        writer && writer.write(x);
+        writer?.write(x);
         cb && cb();
         return true;
     }
@@ -68,7 +68,7 @@ class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> exten
             }
             if (!this.push(r.value) || size <= 0) { break; }
         }
-        if ((r && r.done || !this.readable)) {
+        if ((r?.done || !this.readable)) {
             this.push(null);
             await reader.cancel();
         }
diff --git a/js/src/io/stream.ts b/js/src/io/stream.ts
index c714925100c..2384ab0b96f 100644
--- a/js/src/io/stream.ts
+++ b/js/src/io/stream.ts
@@ -49,7 +49,8 @@ export class AsyncByteQueue<T extends ArrayBufferViewInput = Uint8Array> extends
     public toUint8Array(sync?: false): Promise<Uint8Array>;
     public toUint8Array(sync = false) {
         return sync ? joinUint8Arrays(this._values as any[])[0] : (async () => {
-            let buffers = [], byteLength = 0;
+            const buffers = [];
+            let byteLength = 0;
             for await (const chunk of this) {
                 buffers.push(chunk);
                 byteLength += chunk.byteLength;
@@ -61,8 +62,7 @@ export class AsyncByteQueue<T extends ArrayBufferViewInput = Uint8Array> extends
 
 /** @ignore */
 export class ByteStream implements IterableIterator<Uint8Array> {
-    // @ts-ignore
-    private source: ByteStreamSource<Uint8Array>;
+    private source!: ByteStreamSource<Uint8Array>;
     constructor(source?: Iterable<ArrayBufferViewInput> | ArrayBufferViewInput) {
         if (source) {
             this.source = new ByteStreamSource(streamAdapters.fromIterable(source));
@@ -78,8 +78,7 @@ export class ByteStream implements IterableIterator<Uint8Array> {
 
 /** @ignore */
 export class AsyncByteStream implements Readable<Uint8Array>, AsyncIterableIterator<Uint8Array> {
-    // @ts-ignore
-    private source: AsyncByteStreamSource<Uint8Array>;
+    private source!: AsyncByteStreamSource<Uint8Array>;
     constructor(source?: PromiseLike<ArrayBufferViewInput> | Response | ReadableStream<ArrayBufferViewInput> | NodeJS.ReadableStream | AsyncIterable<ArrayBufferViewInput> | Iterable<ArrayBufferViewInput>) {
         if (source instanceof AsyncByteStream) {
             this.source = (source as AsyncByteStream).source;
@@ -110,9 +109,9 @@ export class AsyncByteStream implements Readable<Uint8Array>, AsyncIterableItera
 }
 
 /** @ignore */
-type ByteStreamSourceIterator<T> = Generator<T, null, { cmd: 'peek' | 'read', size?: number | null }>;
+type ByteStreamSourceIterator<T> = Generator<T, null, { cmd: 'peek' | 'read'; size?: number | null }>;
 /** @ignore */
-type AsyncByteStreamSourceIterator<T> = AsyncGenerator<T, null, { cmd: 'peek' | 'read', size?: number | null }>;
+type AsyncByteStreamSourceIterator<T> = AsyncGenerator<T, null, { cmd: 'peek' | 'read'; size?: number | null }>;
 
 /** @ignore */
 class ByteStreamSource<T> {
diff --git a/js/src/io/whatwg/builder.ts b/js/src/io/whatwg/builder.ts
index 15a4333c5c9..c65511844b9 100644
--- a/js/src/io/whatwg/builder.ts
+++ b/js/src/io/whatwg/builder.ts
@@ -24,8 +24,8 @@ import { Builder, BuilderOptions } from '../../builder/index';
 export interface BuilderTransformOptions<T extends DataType = any, TNull = any> extends BuilderOptions<T, TNull> {
     queueingStrategy?: 'bytes' | 'count';
     dictionaryHashFunction?: (value: any) => string | number;
-    readableStrategy?: { highWaterMark?: number, size?: any, type?: 'bytes'; };
-    writableStrategy?: { highWaterMark?: number, size?: any, type?: 'bytes'; };
+    readableStrategy?: { highWaterMark?: number; size?: any; type?: 'bytes' };
+    writableStrategy?: { highWaterMark?: number; size?: any; type?: 'bytes' };
     valueToChildTypeId?: (builder: Builder<T, TNull>, value: any, offset: number) => number;
 }
 
diff --git a/js/src/io/whatwg/iterable.ts b/js/src/io/whatwg/iterable.ts
index b8428f6852c..ce9e97369f1 100644
--- a/js/src/io/whatwg/iterable.ts
+++ b/js/src/io/whatwg/iterable.ts
@@ -36,14 +36,14 @@ export function toDOMStream<T>(source: Iterable<T> | AsyncIterable<T>, options?:
 function iterableAsReadableDOMStream<T>(source: Iterable<T>, options?: ReadableDOMStreamOptions) {
 
     let it: SourceIterator<T> | null = null;
-    const bm = (options && options.type === 'bytes') || false;
-    const hwm = options && options.highWaterMark || (2 ** 24);
+    const bm = (options?.type === 'bytes') || false;
+    const hwm = options?.highWaterMark || (2 ** 24);
 
     return new ReadableStream<T>({
         ...options as any,
         start(controller) { next(controller, it || (it = source[Symbol.iterator]() as SourceIterator<T>)); },
         pull(controller) { it ? (next(controller, it)) : controller.close(); },
-        cancel() { (it && (it.return && it.return()) || true) && (it = null); }
+        cancel() { (it?.return && it.return() || true) && (it = null); }
     }, { highWaterMark: bm ? hwm : undefined, ...options });
 
     function next(controller: ReadableStreamDefaultController<T>, it: SourceIterator<T>) {
@@ -66,14 +66,14 @@ function iterableAsReadableDOMStream<T>(source: Iterable<T>, options?: ReadableD
 function asyncIterableAsReadableDOMStream<T>(source: AsyncIterable<T>, options?: ReadableDOMStreamOptions) {
 
     let it: AsyncSourceIterator<T> | null = null;
-    const bm = (options && options.type === 'bytes') || false;
-    const hwm = options && options.highWaterMark || (2 ** 24);
+    const bm = (options?.type === 'bytes') || false;
+    const hwm = options?.highWaterMark || (2 ** 24);
 
     return new ReadableStream<T>({
         ...options as any,
         async start(controller) { await next(controller, it || (it = source[Symbol.asyncIterator]() as AsyncSourceIterator<T>)); },
         async pull(controller) { it ? (await next(controller, it)) : controller.close(); },
-        async cancel() { (it && (it.return && await it.return()) || true) && (it = null); },
+        async cancel() { (it?.return && await it.return() || true) && (it = null); },
     }, { highWaterMark: bm ? hwm : undefined, ...options });
 
     async function next(controller: ReadableStreamDefaultController<T>, it: AsyncSourceIterator<T>) {
diff --git a/js/src/io/whatwg/writer.ts b/js/src/io/whatwg/writer.ts
index de3b3f1d247..49789bdd33a 100644
--- a/js/src/io/whatwg/writer.ts
+++ b/js/src/io/whatwg/writer.ts
@@ -24,7 +24,7 @@ import { RecordBatchWriter } from '../../ipc/writer';
 export function recordBatchWriterThroughDOMStream<T extends { [key: string]: DataType } = any>(
     this: typeof RecordBatchWriter,
     writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
-    readableStrategy?: { highWaterMark?: number, size?: any }
+    readableStrategy?: { highWaterMark?: number; size?: any }
 ) {
 
     const writer = new this<T>(writableStrategy);
diff --git a/js/src/ipc/message.ts b/js/src/ipc/message.ts
index 47136b7a6c0..34c0aa3082a 100644
--- a/js/src/ipc/message.ts
+++ b/js/src/ipc/message.ts
@@ -72,7 +72,7 @@ export class MessageReader implements IterableIterator<Message> {
     public readSchema(throwIfNull = false) {
         const type = MessageHeader.Schema;
         const message = this.readMessage(type);
-        const schema = message && message.header();
+        const schema = message?.header();
         if (throwIfNull && !schema) {
             throw new Error(nullMessage(type));
         }
@@ -81,7 +81,7 @@ export class MessageReader implements IterableIterator<Message> {
     protected readMetadataLength(): IteratorResult<number> {
         const buf = this.source.read(PADDING);
         const bb = buf && new ByteBuffer(buf);
-        const len = bb && bb.readInt32(0) || 0;
+        const len = bb?.readInt32(0) || 0;
         return { done: len === 0, value: len };
     }
     protected readMetadata(metadataLength: number): IteratorResult<Message> {
@@ -141,7 +141,7 @@ export class AsyncMessageReader implements AsyncIterableIterator<Message> {
     public async readSchema(throwIfNull = false) {
         const type = MessageHeader.Schema;
         const message = await this.readMessage(type);
-        const schema = message && message.header();
+        const schema = message?.header();
         if (throwIfNull && !schema) {
             throw new Error(nullMessage(type));
         }
@@ -150,7 +150,7 @@ export class AsyncMessageReader implements AsyncIterableIterator<Message> {
     protected async readMetadataLength(): Promise<IteratorResult<number>> {
         const buf = await this.source.read(PADDING);
         const bb = buf && new ByteBuffer(buf);
-        const len = bb && bb.readInt32(0) || 0;
+        const len = bb?.readInt32(0) || 0;
         return { done: len === 0, value: len };
     }
     protected async readMetadata(metadataLength: number): Promise<IteratorResult<Message>> {
@@ -220,7 +220,7 @@ export class JSONMessageReader extends MessageReader {
     public readSchema() {
         const type = MessageHeader.Schema;
         const message = this.readMessage(type);
-        const schema = message && message.header();
+        const schema = message?.header();
         if (!message || !schema) {
             throw new Error(nullMessage(type));
         }
diff --git a/js/src/ipc/metadata/file.ts b/js/src/ipc/metadata/file.ts
index 10bb342f67a..5a1be844e15 100644
--- a/js/src/ipc/metadata/file.ts
+++ b/js/src/ipc/metadata/file.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable:class-name */
+/* eslint-disable @typescript-eslint/naming-convention */
 
 import {
     Block as _Block,
@@ -69,10 +69,8 @@ class Footer_ {
         return b.asUint8Array();
     }
 
-    // @ts-ignore
-    protected _recordBatches: FileBlock[];
-    // @ts-ignore
-    protected _dictionaryBatches: FileBlock[];
+    protected _recordBatches!: FileBlock[];
+    protected _dictionaryBatches!: FileBlock[];
     public get numRecordBatches() { return this._recordBatches.length; }
     public get numDictionaries() { return this._dictionaryBatches.length; }
 
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index 983fa7ed5ea..399615c31d4 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable brace-style */
+
 import { Schema, Field } from '../../schema';
 import {
     DataType, Dictionary, TimeBitWidth,
@@ -107,7 +109,6 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>)
         type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries));
         field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
     }
-    // tslint:disable
     // If dictionary encoded and the first time we've seen this dictionary id, decode
     // the data type and child fields, then wrap in a Dictionary type and insert the
     // data type into the dictionary types map.
@@ -130,7 +131,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>)
 }
 
 /** @ignore */
-function customMetadataFromJSON(_metadata?: object) {
+function customMetadataFromJSON(_metadata?: Record<string, string>) {
     return new Map<string, string>(Object.entries(_metadata || {}));
 }
 
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index 484d68afbf7..2ebb73e4c0f 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable brace-style */
+
 import { flatbuffers } from 'flatbuffers';
 
 import {
@@ -90,7 +92,8 @@ export class Message<T extends MessageHeader = any> {
 
     /** @nocollapse */
     public static encode<T extends MessageHeader>(message: Message<T>) {
-        let b = new Builder(), headerOffset = -1;
+        const b = new Builder();
+        let headerOffset = -1;
         if (message.isSchema()) {
             headerOffset = Schema.encode(b, message.header() as Schema);
         } else if (message.isRecordBatch()) {
@@ -121,7 +124,6 @@ export class Message<T extends MessageHeader = any> {
         throw new Error(`Unrecognized Message header: ${header}`);
     }
 
-    // @ts-ignore
     public body: Uint8Array;
     protected _headerType: T;
     protected _bodyLength: number;
@@ -130,8 +132,7 @@ export class Message<T extends MessageHeader = any> {
     public get version() { return this._version; }
     public get headerType() { return this._headerType; }
     public get bodyLength() { return this._bodyLength; }
-    // @ts-ignore
-    protected _createHeader: MessageHeaderDecoder;
+    protected _createHeader!: MessageHeaderDecoder;
     public header() { return this._createHeader<T>(); }
     public isSchema(): this is Message<MessageHeader.Schema> { return this.headerType === MessageHeader.Schema; }
     public isRecordBatch(): this is Message<MessageHeader.RecordBatch> { return this.headerType === MessageHeader.RecordBatch; }
@@ -180,7 +181,7 @@ export class DictionaryBatch {
     public get nodes(): FieldNode[] { return this.data.nodes; }
     public get buffers(): BufferRegion[] { return this.data.buffers; }
 
-    constructor(data: RecordBatch, id: Long | number, isDelta: boolean = false) {
+    constructor(data: RecordBatch, id: Long | number, isDelta = false) {
         this._data = data;
         this._isDelta = isDelta;
         this._id = typeof id === 'number' ? id : id.low;
@@ -384,7 +385,6 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>) {
         type = decodeFieldType(f, decodeFieldChildren(f, dictionaries));
         field = new Field(f.name()!, type, f.nullable(), decodeCustomMetadata(f));
     }
-    // tslint:disable
     // If dictionary encoded and the first time we've seen this dictionary id, decode
     // the data type and child fields, then wrap in a Dictionary type and insert the
     // data type into the dictionary types map.
@@ -523,7 +523,7 @@ function encodeField(b: Builder, field: Field) {
     let typeOffset = -1;
     let dictionaryOffset = -1;
 
-    let type = field.type;
+    const type = field.type;
     let typeId: Type = <any> field.typeId;
 
     if (!DataType.isDictionary(type)) {
diff --git a/js/src/ipc/reader.ts b/js/src/ipc/reader.ts
index e44361a7b9b..a150ac1bb3c 100644
--- a/js/src/ipc/reader.ts
+++ b/js/src/ipc/reader.ts
@@ -54,7 +54,7 @@ import {
 /** @ignore */ export type FromArg5 = FileHandle | PromiseLike<FileHandle> | PromiseLike<FromArg4>;
 /** @ignore */ export type FromArgs = FromArg0 | FromArg1 | FromArg2 | FromArg3 | FromArg4 | FromArg5;
 
-/** @ignore */ type OpenOptions = { autoDestroy?: boolean; };
+/** @ignore */ type OpenOptions = { autoDestroy?: boolean };
 /** @ignore */ type RecordBatchReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | RecordBatchStreamReader<T>;
 /** @ignore */ type AsyncRecordBatchReaders<T extends { [key: string]: DataType } = any> = AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>;
 /** @ignore */ type RecordBatchFileReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | AsyncRecordBatchFileReader<T>;
@@ -137,7 +137,7 @@ export class RecordBatchReader<T extends { [key: string]: DataType } = any> exte
         writableStrategy?: ByteLengthQueuingStrategy,
         // @ts-ignore
         readableStrategy?: { autoDestroy: boolean }
-    ): { writable: WritableStream<Uint8Array>, readable: ReadableStream<RecordBatch<T>> } {
+    ): { writable: WritableStream<Uint8Array>; readable: ReadableStream<RecordBatch<T>> } {
         throw new Error(`"throughDOM" not available in this environment`);
     }
 
@@ -318,8 +318,7 @@ interface AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } =
 /** @ignore */
 abstract class RecordBatchReaderImpl<T extends { [key: string]: DataType } = any> implements RecordBatchReaderImpl<T> {
 
-    // @ts-ignore
-    public schema: Schema;
+    public schema!: Schema<T>;
     public closed = false;
     public autoDestroy = true;
     public dictionaries: Map<number, Vector>;
@@ -414,7 +413,8 @@ class RecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> e
     }
     public next(): IteratorResult<RecordBatch<T>> {
         if (this.closed) { return ITERATOR_DONE; }
-        let message: Message | null, { _reader: reader } = this;
+        let message: Message | null;
+        const { _reader: reader } = this;
         while (message = this._readNextMessageAndValidate()) {
             if (message.isSchema()) {
                 this.reset(message.header());
@@ -488,7 +488,8 @@ class AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = a
     }
     public async next() {
         if (this.closed) { return ITERATOR_DONE; }
-        let message: Message | null, { _reader: reader } = this;
+        let message: Message | null;
+        const { _reader: reader } = this;
         while (message = await this._readNextMessageAndValidate()) {
             if (message.isSchema()) {
                 await this.reset(message.header());
@@ -520,10 +521,8 @@ class AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = a
 /** @ignore */
 class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
 
-    // @ts-ignore
     protected _footer?: Footer;
-    // @ts-ignore
-    protected _handle: RandomAccessFile;
+    protected _handle!: RandomAccessFile;
     public get footer() { return this._footer!; }
     public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
     public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
@@ -548,7 +547,7 @@ class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> ext
         const block = this._footer && this._footer.getRecordBatch(index);
         if (block && this._handle.seek(block.offset)) {
             const message = this._reader.readMessage(MessageHeader.RecordBatch);
-            if (message && message.isRecordBatch()) {
+            if (message?.isRecordBatch()) {
                 const header = message.header();
                 const buffer = this._reader.readMessageBody(message.bodyLength);
                 const recordBatch = this._loadRecordBatch(header, buffer);
@@ -561,7 +560,7 @@ class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> ext
         const block = this._footer && this._footer.getDictionaryBatch(index);
         if (block && this._handle.seek(block.offset)) {
             const message = this._reader.readMessage(MessageHeader.DictionaryBatch);
-            if (message && message.isDictionaryBatch()) {
+            if (message?.isDictionaryBatch()) {
                 const header = message.header();
                 const buffer = this._reader.readMessageBody(message.bodyLength);
                 const vector = this._loadDictionaryBatch(header, buffer);
@@ -593,8 +592,7 @@ class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any
     implements AsyncRecordBatchFileReaderImpl<T> {
 
     protected _footer?: Footer;
-    // @ts-ignore
-    protected _handle: AsyncRandomAccessFile;
+    protected _handle!: AsyncRandomAccessFile;
     public get footer() { return this._footer!; }
     public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
     public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
@@ -623,7 +621,7 @@ class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any
         const block = this._footer && this._footer.getRecordBatch(index);
         if (block && (await this._handle.seek(block.offset))) {
             const message = await this._reader.readMessage(MessageHeader.RecordBatch);
-            if (message && message.isRecordBatch()) {
+            if (message?.isRecordBatch()) {
                 const header = message.header();
                 const buffer = await this._reader.readMessageBody(message.bodyLength);
                 const recordBatch = this._loadRecordBatch(header, buffer);
@@ -636,7 +634,7 @@ class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any
         const block = this._footer && this._footer.getDictionaryBatch(index);
         if (block && (await this._handle.seek(block.offset))) {
             const message = await this._reader.readMessage(MessageHeader.DictionaryBatch);
-            if (message && message.isDictionaryBatch()) {
+            if (message?.isDictionaryBatch()) {
                 const header = message.header();
                 const buffer = await this._reader.readMessageBody(message.bodyLength);
                 const vector = this._loadDictionaryBatch(header, buffer);
diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts
index 1416b5c86d3..12aa83355f0 100644
--- a/js/src/ipc/writer.ts
+++ b/js/src/ipc/writer.ts
@@ -61,8 +61,8 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
         // @ts-ignore
         writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
         // @ts-ignore
-        readableStrategy?: { highWaterMark?: number, size?: any }
-    ): { writable: WritableStream<Table<T> | RecordBatch<T>>, readable: ReadableStream<Uint8Array> } {
+        readableStrategy?: { highWaterMark?: number; size?: any }
+    ): { writable: WritableStream<Table<T> | RecordBatch<T>>; readable: ReadableStream<Uint8Array> } {
         throw new Error(`"throughDOM" not available in this environment`);
     }
 
@@ -124,7 +124,6 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
         return this;
     }
     public reset(sink: WritableSink<ArrayBufferViewInput> = this._sink, schema: Schema<T> | null = null) {
-
         if ((sink === this._sink) || (sink instanceof AsyncByteQueue)) {
             this._sink = sink as AsyncByteQueue;
         } else {
@@ -160,12 +159,11 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
     }
 
     public write(payload?: Table<T> | RecordBatch<T> | Iterable<RecordBatch<T>> | null) {
-
         let schema: Schema<T> | null = null;
 
         if (!this._sink) {
             throw new Error(`RecordBatchWriter is closed`);
-        } else if (payload === null || payload === undefined) {
+        } else if (payload == null) {
             return this.finish() && undefined;
         } else if (payload instanceof Table && !(schema = payload.schema)) {
             return this.finish() && undefined;
@@ -192,7 +190,6 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
     }
 
     protected _writeMessage<T extends MessageHeader>(message: Message<T>, alignment = 8) {
-
         const a = alignment - 1;
         const buffer = Message.encode(message);
         const flatbufferSize = buffer.byteLength;
@@ -458,7 +455,7 @@ async function writeAllAsync<T extends { [key: string]: DataType } = any>(writer
 }
 
 /** @ignore */
-function fieldToJSON({ name, type, nullable }: Field): object {
+function fieldToJSON({ name, type, nullable }: Field): Record<string, unknown> {
     const assembler = new JSONTypeAssembler();
     return {
         'name': name, 'nullable': nullable,
diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts
index bde561dc84c..5463a387fae 100644
--- a/js/src/recordbatch.ts
+++ b/js/src/recordbatch.ts
@@ -70,7 +70,7 @@ export class RecordBatch<T extends { [key: string]: DataType } = any>
     constructor(schema: Schema<T>, data: Data<Struct<T>>, children?: Vector[]);
     constructor(...args: any[]) {
         let data: Data<Struct<T>>;
-        let schema = args[0] as Schema<T>;
+        const schema = args[0] as Schema<T>;
         let children: Vector[] | undefined;
         if (args[1] instanceof Data) {
             [, data, children] = (args as [any, Data<Struct<T>>, Vector<T[keyof T]>[]?]);
@@ -117,7 +117,7 @@ export class RecordBatch<T extends { [key: string]: DataType } = any>
  * @ignore
  * @private
  */
-/* tslint:disable:class-name */
+/* eslint-disable @typescript-eslint/naming-convention */
 export class _InternalEmptyPlaceholderRecordBatch<T extends { [key: string]: DataType } = any> extends RecordBatch<T> {
     constructor(schema: Schema<T>) {
         super(schema, 0, schema.fields.map((f) => Data.new(f.type, 0, 0, 0)));
diff --git a/js/src/schema.ts b/js/src/schema.ts
index c0de4420ee6..437ffa228ec 100644
--- a/js/src/schema.ts
+++ b/js/src/schema.ts
@@ -76,7 +76,7 @@ export class Schema<T extends { [key: string]: DataType } = any> {
 
 export class Field<T extends DataType = any> {
 
-    public static new<T extends DataType = any>(props: { name: string | number, type: T, nullable?: boolean, metadata?: Map<string, string> | null }): Field<T>;
+    public static new<T extends DataType = any>(props: { name: string | number; type: T; nullable?: boolean; metadata?: Map<string, string> | null }): Field<T>;
     public static new<T extends DataType = any>(name: string | number | Field<T>, type: T, nullable?: boolean, metadata?: Map<string, string> | null): Field<T>;
     /** @nocollapse */
     public static new<T extends DataType = any>(...args: any[]) {
@@ -105,7 +105,7 @@ export class Field<T extends DataType = any> {
     public get typeId() { return this.type.typeId; }
     public get [Symbol.toStringTag]() { return 'Field'; }
     public toString() { return `${this.name}: ${this.type}`; }
-    public clone<R extends DataType = T>(props: { name?: string | number, type?: R, nullable?: boolean, metadata?: Map<string, string> | null }): Field<R>;
+    public clone<R extends DataType = T>(props: { name?: string | number; type?: R; nullable?: boolean; metadata?: Map<string, string> | null }): Field<R>;
     public clone<R extends DataType = T>(name?: string | number | Field<T>, type?: R, nullable?: boolean, metadata?: Map<string, string> | null): Field<R>;
     public clone<R extends DataType = T>(...args: any[]) {
         let [name, type, nullable, metadata] = args;
diff --git a/js/src/table.ts b/js/src/table.ts
index 5c41e14a9f5..d5e121de78c 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -15,21 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data } from './data';
 import { Column } from './column';
-import { Schema, Field } from './schema';
-import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from './recordbatch';
-import { DataFrame } from './compute/dataframe';
+import { Data } from './data';
+import { TypedArray, TypedArrayDataType } from './interfaces';
 import { RecordBatchReader } from './ipc/reader';
-import { DataType, RowLike, Struct } from './type';
-import { selectColumnArgs, selectArgs } from './util/args';
-import { Clonable, Sliceable, Applicative } from './vector';
-import { isPromise, isIterable, isAsyncIterable } from './util/compat';
 import { RecordBatchFileWriter, RecordBatchStreamWriter } from './ipc/writer';
+import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from './recordbatch';
+import { Field, Schema } from './schema';
+import { DataType, RowLike, Struct } from './type';
+import { selectArgs, selectColumnArgs } from './util/args';
+import { isAsyncIterable, isIterable, isPromise } from './util/compat';
 import { distributeColumnsIntoRecordBatches, distributeVectorsIntoRecordBatches } from './util/recordbatch';
-import { Vector, Chunked, StructVector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index';
+import { Applicative, Clonable, Sliceable } from './vector';
+import { Chunked, StructVector, Vector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index';
 
-type VectorMap = { [key: string]: Vector };
+type VectorMap = { [key: string]: Vector | Exclude<TypedArray, Uint8ClampedArray> };
 type Fields<T extends { [key: string]: DataType }> = (keyof T)[] | Field<T[keyof T]>[];
 type ChildData<T extends { [key: string]: DataType }> = Data<T[keyof T]>[] | Vector<T[keyof T]>[];
 type Columns<T extends { [key: string]: DataType }> = Column<T[keyof T]>[] | Column<T[keyof T]>[][];
@@ -42,24 +42,18 @@ export interface Table<T extends { [key: string]: DataType } = any> {
     slice(begin?: number, end?: number): Table<T>;
     concat(...others: Vector<Struct<T>>[]): Table<T>;
     clone(chunks?: RecordBatch<T>[], offsets?: Uint32Array): Table<T>;
-
-    scan(next: import('./compute/dataframe').NextFunc, bind?: import('./compute/dataframe').BindFunc): void;
-    scanReverse(next: import('./compute/dataframe').NextFunc, bind?: import('./compute/dataframe').BindFunc): void;
-    countBy(name: import('./compute/predicate').Col | string): import('./compute/dataframe').CountByResult;
-    filter(predicate: import('./compute/predicate').Predicate): import('./compute/dataframe').FilteredDataFrame<T>;
 }
 
 export class Table<T extends { [key: string]: DataType } = any>
     extends Chunked<Struct<T>>
-    implements DataFrame<T>,
-               Clonable<Table<T>>,
+    implements Clonable<Table<T>>,
                Sliceable<Table<T>>,
                Applicative<Struct<T>, Table<T>> {
 
     /** @nocollapse */
-    public static empty<T extends { [key: string]: DataType } = {}>(schema = new Schema<T>([])) { return new Table<T>(schema, []); }
+    public static empty<T extends { [key: string]: DataType } = Record<string, never>>(schema = new Schema<T>([])) { return new Table<T>(schema, []); }
 
-    public static from(): Table<{}>;
+    public static from(): Table<Record<string, never>>;
     public static from<T extends { [key: string]: DataType } = any>(source: RecordBatchReader<T>): Table<T>;
     public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg0): Table<T>;
     public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg2): Table<T>;
@@ -76,7 +70,7 @@ export class Table<T extends { [key: string]: DataType } = any>
         if (!input) { return Table.empty(); }
 
         if (typeof input === 'object') {
-            let table = isIterable(input['values']) ? tableFromIterable<T, TNull>(input)
+            const table = isIterable(input['values']) ? tableFromIterable<T, TNull>(input)
                  : isAsyncIterable(input['values']) ? tableFromAsyncIterable<T, TNull>(input)
                                                     : null;
             if (table !== null) { return table; }
@@ -95,7 +89,7 @@ export class Table<T extends { [key: string]: DataType } = any>
             const schema = reader.schema;
             const batches: RecordBatch[] = [];
             if (schema) {
-                for await (let batch of reader) {
+                for await (const batch of reader) {
                     batches.push(batch);
                 }
                 return new Table<T>(schema, batches);
@@ -165,13 +159,14 @@ export class Table<T extends { [key: string]: DataType } = any>
      * 125,000 bytes (`((1e6 + 63) & ~63) >> 3`), or approx. `0.11MiB`
      */
     public static new<T extends { [key: string]: DataType } = any>(...columns: Columns<T>): Table<T>;
-    public static new<T extends VectorMap = any>(children: T): Table<{ [P in keyof T]: T[P]['type'] }>;
+    public static new<T extends VectorMap = any>(children: T): Table<{ [P in keyof T]: T[P] extends Vector ? T[P]['type'] : T[P] extends Exclude<TypedArray, Uint8ClampedArray> ? TypedArrayDataType<T[P]> : never}>;
     public static new<T extends { [key: string]: DataType } = any>(children: ChildData<T>, fields?: Fields<T>): Table<T>;
     /** @nocollapse */
     public static new(...cols: any[]) {
         return new Table(...distributeColumnsIntoRecordBatches(selectColumnArgs(cols)));
     }
 
+    constructor(table: Table<T>);
     constructor(batches: RecordBatch<T>[]);
     constructor(...batches: RecordBatch<T>[]);
     constructor(schema: Schema<T>, batches: RecordBatch<T>[]);
@@ -180,11 +175,11 @@ export class Table<T extends { [key: string]: DataType } = any>
 
         let schema: Schema<T> = null!;
 
-        if (args[0] instanceof Schema) { schema = args.shift(); }
+        if (args[0] instanceof Schema) { schema = args[0]; }
 
-        let chunks = selectArgs<RecordBatch<T>>(RecordBatch, args);
+        const chunks = args[0] instanceof Table ? (args[0] as Table<T>).chunks : selectArgs<RecordBatch<T>>(RecordBatch, args);
 
-        if (!schema && !(schema = chunks[0] && chunks[0].schema)) {
+        if (!schema && !(schema = chunks[0]?.schema)) {
             throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch');
         }
 
diff --git a/js/src/type.ts b/js/src/type.ts
index e09fb8a8c0d..7d5c051ad0e 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable:class-name */
+/* eslint-disable @typescript-eslint/naming-convention */
 
 import { Field } from './schema';
 import { flatbuffers } from 'flatbuffers';
@@ -39,8 +39,8 @@ export type IsSigned = { 'true': true; 'false': false };
 export type RowLike<T extends { [key: string]: DataType }> =
       ( Iterable<[string, T[keyof T]['TValue'] | null]> )
     & { [P in keyof T]: T[P]['TValue'] | null }
-    & { get<K extends keyof T>(key: K): T[K]['TValue'] | null; }
-    & { set<K extends keyof T>(key: K, val: T[K]['TValue'] | null): void; }
+    & { get<K extends keyof T>(key: K): T[K]['TValue'] | null }
+    & { set<K extends keyof T>(key: K, val: T[K]['TValue'] | null): void }
     ;
 
 /** @ignore */
@@ -63,27 +63,26 @@ export interface DataType<TType extends Type = Type, TChildren extends { [key: s
  */
 export abstract class DataType<TType extends Type = Type, TChildren extends { [key: string]: DataType } = any> {
 
-    // @ts-ignore
     public [Symbol.toStringTag]: string;
 
-    /** @nocollapse */ static            isNull (x: any): x is Null            { return x && x.typeId === Type.Null;            }
-    /** @nocollapse */ static             isInt (x: any): x is Int_            { return x && x.typeId === Type.Int;             }
-    /** @nocollapse */ static           isFloat (x: any): x is Float           { return x && x.typeId === Type.Float;           }
-    /** @nocollapse */ static          isBinary (x: any): x is Binary          { return x && x.typeId === Type.Binary;          }
-    /** @nocollapse */ static            isUtf8 (x: any): x is Utf8            { return x && x.typeId === Type.Utf8;            }
-    /** @nocollapse */ static            isBool (x: any): x is Bool            { return x && x.typeId === Type.Bool;            }
-    /** @nocollapse */ static         isDecimal (x: any): x is Decimal         { return x && x.typeId === Type.Decimal;         }
-    /** @nocollapse */ static            isDate (x: any): x is Date_           { return x && x.typeId === Type.Date;            }
-    /** @nocollapse */ static            isTime (x: any): x is Time_           { return x && x.typeId === Type.Time;            }
-    /** @nocollapse */ static       isTimestamp (x: any): x is Timestamp_      { return x && x.typeId === Type.Timestamp;       }
-    /** @nocollapse */ static        isInterval (x: any): x is Interval_       { return x && x.typeId === Type.Interval;        }
-    /** @nocollapse */ static            isList (x: any): x is List            { return x && x.typeId === Type.List;            }
-    /** @nocollapse */ static          isStruct (x: any): x is Struct          { return x && x.typeId === Type.Struct;          }
-    /** @nocollapse */ static           isUnion (x: any): x is Union_          { return x && x.typeId === Type.Union;           }
-    /** @nocollapse */ static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x && x.typeId === Type.FixedSizeBinary; }
-    /** @nocollapse */ static   isFixedSizeList (x: any): x is FixedSizeList   { return x && x.typeId === Type.FixedSizeList;   }
-    /** @nocollapse */ static             isMap (x: any): x is Map_            { return x && x.typeId === Type.Map;             }
-    /** @nocollapse */ static      isDictionary (x: any): x is Dictionary      { return x && x.typeId === Type.Dictionary;      }
+    /** @nocollapse */ static            isNull (x: any): x is Null            { return x?.typeId === Type.Null;            }
+    /** @nocollapse */ static             isInt (x: any): x is Int_            { return x?.typeId === Type.Int;             }
+    /** @nocollapse */ static           isFloat (x: any): x is Float           { return x?.typeId === Type.Float;           }
+    /** @nocollapse */ static          isBinary (x: any): x is Binary          { return x?.typeId === Type.Binary;          }
+    /** @nocollapse */ static            isUtf8 (x: any): x is Utf8            { return x?.typeId === Type.Utf8;            }
+    /** @nocollapse */ static            isBool (x: any): x is Bool            { return x?.typeId === Type.Bool;            }
+    /** @nocollapse */ static         isDecimal (x: any): x is Decimal         { return x?.typeId === Type.Decimal;         }
+    /** @nocollapse */ static            isDate (x: any): x is Date_           { return x?.typeId === Type.Date;            }
+    /** @nocollapse */ static            isTime (x: any): x is Time_           { return x?.typeId === Type.Time;            }
+    /** @nocollapse */ static       isTimestamp (x: any): x is Timestamp_      { return x?.typeId === Type.Timestamp;       }
+    /** @nocollapse */ static        isInterval (x: any): x is Interval_       { return x?.typeId === Type.Interval;        }
+    /** @nocollapse */ static            isList (x: any): x is List            { return x?.typeId === Type.List;            }
+    /** @nocollapse */ static          isStruct (x: any): x is Struct          { return x?.typeId === Type.Struct;          }
+    /** @nocollapse */ static           isUnion (x: any): x is Union_          { return x?.typeId === Type.Union;           }
+    /** @nocollapse */ static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x?.typeId === Type.FixedSizeBinary; }
+    /** @nocollapse */ static   isFixedSizeList (x: any): x is FixedSizeList   { return x?.typeId === Type.FixedSizeList;   }
+    /** @nocollapse */ static             isMap (x: any): x is Map_            { return x?.typeId === Type.Map;             }
+    /** @nocollapse */ static      isDictionary (x: any): x is Dictionary      { return x?.typeId === Type.Dictionary;      }
 
     public get typeId(): TType { return <any> Type.NONE; }
 
@@ -95,7 +94,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends { [k
 }
 
 /** @ignore */
-export interface Null extends DataType<Type.Null> { TArray: void; TValue: null; }
+export interface Null extends DataType<Type.Null> { TArray: void; TValue: null }
 /** @ignore */
 export class Null extends DataType<Type.Null> {
     public toString() { return `Null`; }
@@ -109,19 +108,19 @@ export class Null extends DataType<Type.Null> {
 type Ints = Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64;
 /** @ignore */
 type IType = {
-    [Type.Int   ]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray;    TValue: number | bigint | Int32Array | Uint32Array; };
-    [Type.Int8  ]: { bitWidth:           8; isSigned: true;         TArray: Int8Array;   TValue: number; };
-    [Type.Int16 ]: { bitWidth:          16; isSigned: true;         TArray: Int16Array;  TValue: number; };
-    [Type.Int32 ]: { bitWidth:          32; isSigned: true;         TArray: Int32Array;  TValue: number; };
-    [Type.Int64 ]: { bitWidth:          64; isSigned: true;         TArray: Int32Array;  TValue: bigint | Int32Array | Uint32Array; };
-    [Type.Uint8 ]: { bitWidth:           8; isSigned: false;        TArray: Uint8Array;  TValue: number; };
-    [Type.Uint16]: { bitWidth:          16; isSigned: false;        TArray: Uint16Array; TValue: number; };
-    [Type.Uint32]: { bitWidth:          32; isSigned: false;        TArray: Uint32Array; TValue: number; };
-    [Type.Uint64]: { bitWidth:          64; isSigned: false;        TArray: Uint32Array; TValue: bigint | Int32Array | Uint32Array; };
+    [Type.Int   ]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray;    TValue: number | bigint | Int32Array | Uint32Array };
+    [Type.Int8  ]: { bitWidth:           8; isSigned: true;         TArray: Int8Array;   TValue: number };
+    [Type.Int16 ]: { bitWidth:          16; isSigned: true;         TArray: Int16Array;  TValue: number };
+    [Type.Int32 ]: { bitWidth:          32; isSigned: true;         TArray: Int32Array;  TValue: number };
+    [Type.Int64 ]: { bitWidth:          64; isSigned: true;         TArray: Int32Array;  TValue: bigint | Int32Array | Uint32Array };
+    [Type.Uint8 ]: { bitWidth:           8; isSigned: false;        TArray: Uint8Array;  TValue: number };
+    [Type.Uint16]: { bitWidth:          16; isSigned: false;        TArray: Uint16Array; TValue: number };
+    [Type.Uint32]: { bitWidth:          32; isSigned: false;        TArray: Uint32Array; TValue: number };
+    [Type.Uint64]: { bitWidth:          64; isSigned: false;        TArray: Uint32Array; TValue: bigint | Int32Array | Uint32Array };
 };
 
 /** @ignore */
-interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TArray']; TValue: IType[T]['TValue']; }
+interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TArray']; TValue: IType[T]['TValue'] }
 /** @ignore */
 class Int_<T extends Ints = Ints> extends DataType<T> {
     constructor(public readonly isSigned: IType[T]['isSigned'],
@@ -178,14 +177,14 @@ Object.defineProperty(Uint64.prototype, 'ArrayType', { value: Uint32Array });
 type Floats = Type.Float | Type.Float16 | Type.Float32 | Type.Float64;
 /** @ignore */
 type FType = {
-    [Type.Float  ]: { precision: Precision;        TArray: FloatArray;    TValue: number; };
-    [Type.Float16]: { precision: Precision.HALF;   TArray: Uint16Array;   TValue: number; };
-    [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array;  TValue: number; };
-    [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array;  TValue: number; };
+    [Type.Float  ]: { precision: Precision;        TArray: FloatArray;    TValue: number };
+    [Type.Float16]: { precision: Precision.HALF;   TArray: Uint16Array;   TValue: number };
+    [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array;  TValue: number };
+    [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array;  TValue: number };
 };
 
 /** @ignore */
-export interface Float<T extends Floats = Floats> extends DataType<T> { TArray: FType[T]['TArray']; TValue: number; }
+export interface Float<T extends Floats = Floats> extends DataType<T> { TArray: FType[T]['TArray']; TValue: number }
 /** @ignore */
 export class Float<T extends Floats = Floats> extends DataType<T> {
     constructor(public readonly precision: Precision) {
@@ -220,7 +219,7 @@ Object.defineProperty(Float32.prototype, 'ArrayType', { value: Float32Array });
 Object.defineProperty(Float64.prototype, 'ArrayType', { value: Float64Array });
 
 /** @ignore */
-export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class Binary extends DataType<Type.Binary> {
     constructor() {
@@ -235,7 +234,7 @@ export class Binary extends DataType<Type.Binary> {
 }
 
 /** @ignore */
-export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class Utf8 extends DataType<Type.Utf8> {
     constructor() {
@@ -250,7 +249,7 @@ export class Utf8 extends DataType<Type.Utf8> {
 }
 
 /** @ignore */
-export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class Bool extends DataType<Type.Bool> {
     constructor() {
@@ -265,7 +264,7 @@ export class Bool extends DataType<Type.Bool> {
 }
 
 /** @ignore */
-export interface Decimal extends DataType<Type.Decimal> { TArray: Uint32Array; TValue: Uint32Array; ArrayType: TypedArrayConstructor<Uint32Array>; }
+export interface Decimal extends DataType<Type.Decimal> { TArray: Uint32Array; TValue: Uint32Array; ArrayType: TypedArrayConstructor<Uint32Array> }
 /** @ignore */
 export class Decimal extends DataType<Type.Decimal> {
     constructor(public readonly scale: number,
@@ -285,7 +284,7 @@ export class Decimal extends DataType<Type.Decimal> {
 /** @ignore */
 export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
 /** @ignore */
-export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array>; }
+export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 export class Date_<T extends Dates = Dates> extends DataType<T> {
     constructor(public readonly unit: DateUnit) {
@@ -310,14 +309,14 @@ type Times = Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicro
 /** @ignore */
 type TimesType = {
     [Type.Time           ]: { unit: TimeUnit;             TValue: number | Int32Array };
-    [Type.TimeSecond     ]: { unit: TimeUnit.SECOND;      TValue: number;             };
-    [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number;             };
-    [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: Int32Array;         };
-    [Type.TimeNanosecond ]: { unit: TimeUnit.NANOSECOND;  TValue: Int32Array;         };
+    [Type.TimeSecond     ]: { unit: TimeUnit.SECOND;      TValue: number             };
+    [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number             };
+    [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: Int32Array         };
+    [Type.TimeNanosecond ]: { unit: TimeUnit.NANOSECOND;  TValue: Int32Array         };
 };
 
 /** @ignore */
-interface Time_<T extends Times = Times> extends DataType<T> { TArray: Int32Array; TValue: TimesType[T]['TValue']; ArrayType: TypedArrayConstructor<Int32Array>; }
+interface Time_<T extends Times = Times> extends DataType<T> { TArray: Int32Array; TValue: TimesType[T]['TValue']; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 class Time_<T extends Times = Times> extends DataType<T> {
     constructor(public readonly unit: TimesType[T]['unit'],
@@ -348,7 +347,7 @@ export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() {
 /** @ignore */
 type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
 /** @ignore */
-interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> { TArray: Int32Array; TValue: number; ArrayType: TypedArrayConstructor<Int32Array>; }
+interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> { TArray: Int32Array; TValue: number; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
     constructor(public readonly unit: TimeUnit,
@@ -379,7 +378,7 @@ export class TimestampNanosecond extends Timestamp_<Type.TimestampNanosecond> {
 /** @ignore */
 type Intervals = Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth;
 /** @ignore */
-interface Interval_<T extends Intervals = Intervals> extends DataType<T> { TArray: Int32Array; TValue: Int32Array; ArrayType: TypedArrayConstructor<Int32Array>; }
+interface Interval_<T extends Intervals = Intervals> extends DataType<T> { TArray: Int32Array; TValue: Int32Array; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 class Interval_<T extends Intervals = Intervals> extends DataType<T> {
     constructor(public readonly unit: IntervalUnit) {
@@ -402,7 +401,7 @@ export class IntervalDayTime extends Interval_<Type.IntervalDayTime> { construct
 export class IntervalYearMonth extends Interval_<Type.IntervalYearMonth> { constructor() { super(IntervalUnit.YEAR_MONTH); } }
 
 /** @ignore */
-export interface List<T extends DataType = any> extends DataType<Type.List, { [0]: T }>  { TArray: IterableArrayLike<T>; TValue: V<T>; }
+export interface List<T extends DataType = any> extends DataType<Type.List, { [0]: T }>  { TArray: IterableArrayLike<T>; TValue: V<T> }
 /** @ignore */
 export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> {
     constructor(child: Field<T>) {
@@ -422,7 +421,7 @@ export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T
 }
 
 /** @ignore */
-export interface Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct> { TArray: IterableArrayLike<RowLike<T>>; TValue: RowLike<T>; dataTypes: T; }
+export interface Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct> { TArray: IterableArrayLike<RowLike<T>>; TValue: RowLike<T>; dataTypes: T }
 /** @ignore */
 export class Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct, T> {
     public readonly children: Field<T[keyof T]>[];
@@ -441,7 +440,7 @@ export class Struct<T extends { [key: string]: DataType } = any> extends DataTyp
 /** @ignore */
 type Unions = Type.Union | Type.DenseUnion | Type.SparseUnion;
 /** @ignore */
-interface Union_<T extends Unions = Unions> extends DataType<T> { TArray: Int8Array; TValue: any; ArrayType: TypedArrayConstructor<Int8Array>; }
+interface Union_<T extends Unions = Unions> extends DataType<T> { TArray: Int8Array; TValue: any; ArrayType: TypedArrayConstructor<Int8Array> }
 /** @ignore */
 class Union_<T extends Unions = Unions> extends DataType<T> {
     public readonly mode: UnionMode;
@@ -460,9 +459,11 @@ class Union_<T extends Unions = Unions> extends DataType<T> {
         }, Object.create(null) as { [key: number]: number });
     }
     public get typeId() { return Type.Union as T; }
-    public toString() { return `${this[Symbol.toStringTag]}<${
+    public toString() {
+ return `${this[Symbol.toStringTag]}<${
         this.children.map((x) => `${x.type}`).join(` | `)
-    }>`; }
+    }>`;
+}
     protected static [Symbol.toStringTag] = ((proto: Union_) => {
         (<any> proto).mode = null;
         (<any> proto).typeIds = null;
@@ -490,7 +491,7 @@ export class SparseUnion extends Union_<Type.SparseUnion> {
 }
 
 /** @ignore */
-export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
     constructor(public readonly byteWidth: number) {
@@ -506,7 +507,7 @@ export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
 }
 
 /** @ignore */
-export interface FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> { TArray: IterableArrayLike<T['TArray']>; TValue: V<T>; }
+export interface FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> { TArray: IterableArrayLike<T['TArray']>; TValue: V<T> }
 /** @ignore */
 export class FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList, { [0]: T }> {
     public readonly children: Field<T>[];
@@ -529,19 +530,19 @@ export class FixedSizeList<T extends DataType = any> extends DataType<Type.Fixed
 /** @ignore */
 export interface Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map> {
     TArray: IterableArrayLike<Map<TKey['TValue'], TValue['TValue'] | null>>;
-    TChild: Struct<{ key: TKey, value: TValue }>;
+    TChild: Struct<{ key: TKey; value: TValue }>;
     TValue: MapLike<TKey, TValue>;
 }
 
 /** @ignore */
 export class Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map> {
-    constructor(child: Field<Struct<{ key: TKey, value: TValue }>>, keysSorted = false) {
+    constructor(child: Field<Struct<{ key: TKey; value: TValue }>>, keysSorted = false) {
         super();
         this.children = [child];
         this.keysSorted = keysSorted;
     }
     public readonly keysSorted: boolean;
-    public readonly children: Field<Struct<{ key: TKey, value: TValue }>>[];
+    public readonly children: Field<Struct<{ key: TKey; value: TValue }>>[];
     public get typeId() { return Type.Map as Type.Map; }
     public get keyType(): TKey { return this.children[0].type.children[0].type as TKey; }
     public get valueType(): TValue { return this.children[0].type.children[1].type as TValue; }
@@ -560,7 +561,7 @@ const getId = ((atomicDictionaryId) => () => ++atomicDictionaryId)(-1);
 export type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32;
 
 /** @ignore */
-export interface Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> { TArray: TKey['TArray']; TValue: T['TValue']; }
+export interface Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> { TArray: TKey['TArray']; TValue: T['TValue'] }
 /** @ignore */
 export class Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> {
     public readonly id: number;
@@ -597,7 +598,7 @@ export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16
 
 /** @ignore */
 export function strideForType(type: DataType) {
-    let t: any = type;
+    const t: any = type;
     switch (type.typeId) {
         case Type.Decimal: return 4;
         case Type.Timestamp: return 2;
diff --git a/js/src/util/args.ts b/js/src/util/args.ts
index ca6f6381a59..25f571999ff 100644
--- a/js/src/util/args.ts
+++ b/js/src/util/args.ts
@@ -19,13 +19,55 @@ import { Data } from '../data';
 import { Field } from '../schema';
 import { Column } from '../column';
 import { Vector } from '../vector';
-import { DataType } from '../type';
+import { DataType, Float32, Float64, FloatArray, IntArray, Int16, Int32, Int64, Int8, Uint16, Uint32, Uint64, Uint8 } from '../type';
 import { Chunked } from '../vector/chunked';
+import { BigIntArray, TypedArray as TypedArray_ } from '../interfaces';
+import { FloatArrayCtor } from '../vector/float';
+import { IntArrayCtor } from '../vector/int';
 
 type RecordBatchCtor = typeof import('../recordbatch').RecordBatch;
 
 const isArray = Array.isArray;
 
+type TypedArray = Exclude<TypedArray_ | BigIntArray, Uint8ClampedArray>;
+
+/** @ignore */
+export function isTypedArray(arr: any): arr is TypedArray {
+    return ArrayBuffer.isView(arr) && 'BYTES_PER_ELEMENT' in arr;
+}
+
+
+/** @ignore */
+type ArrayCtor = FloatArrayCtor | IntArrayCtor;
+
+/** @ignore */
+export function arrayTypeToDataType(ctor: ArrayCtor) {
+    switch (ctor) {
+        case Int8Array:         return Int8;
+        case Int16Array:        return Int16;
+        case Int32Array:        return Int32;
+        case BigInt64Array:     return Int64;
+        case Uint8Array:        return Uint8;
+        case Uint16Array:       return Uint16;
+        case Uint32Array:       return Uint32;
+        case BigUint64Array:    return Uint64;
+        case Float32Array:      return Float32;
+        case Float64Array:      return Float64;
+        default: return null;
+    }
+}
+
+/** @ignore */
+function vectorFromTypedArray(array: TypedArray): Vector {
+    const ArrowType = arrayTypeToDataType(array.constructor as ArrayCtor);
+    if (!ArrowType) {
+        throw new TypeError('Unrecognized Array input');
+    }
+    const type = new ArrowType();
+    const data = Data.new(type, 0, array.length, 0, [undefined, array as IntArray | FloatArray]);
+    return Vector.new(data);
+}
+
 /** @ignore */
 export const selectArgs = <T>(Ctor: any, vals: any[]) => _selectArgs(Ctor, vals, [], 0) as T[];
 /** @ignore */
@@ -34,6 +76,7 @@ export const selectColumnArgs = <T extends { [key: string]: DataType }>(args: an
     return values.map((x, i) =>
         x instanceof Column ? Column.new(x.field.clone(fields[i]), x) :
         x instanceof Vector ? Column.new(fields[i], x) as Column<T[keyof T]> :
+        isTypedArray(x)     ? Column.new(fields[i], vectorFromTypedArray(x)) as Column<T[keyof T]> :
                               Column.new(fields[i], [] as Vector<T[keyof T]>[]));
 };
 
@@ -49,7 +92,8 @@ export const selectColumnChildrenArgs = <T extends Column>(Ctor: RecordBatchCtor
 /** @ignore */
 function _selectArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectArgs(Ctor, value, res, j).length;
@@ -61,7 +105,8 @@ function _selectArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
 /** @ignore */
 function _selectChunkArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectChunkArgs(Ctor, value, res, j).length;
@@ -75,7 +120,8 @@ function _selectChunkArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
 /** @ignore */
 function _selectVectorChildrenArgs<T extends Vector>(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectVectorChildrenArgs(Ctor, value, res, j).length;
@@ -89,7 +135,8 @@ function _selectVectorChildrenArgs<T extends Vector>(Ctor: RecordBatchCtor, vals
 /** @ignore */
 function _selectColumnChildrenArgs<T extends Column>(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectColumnChildrenArgs(Ctor, value, res, j).length;
@@ -104,15 +151,16 @@ function _selectColumnChildrenArgs<T extends Column>(Ctor: RecordBatchCtor, vals
 const toKeysAndValues = (xs: [any[], any[]], [k, v]: [any, any], i: number) => (xs[0][i] = k, xs[1][i] = v, xs);
 
 /** @ignore */
-function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], ret: [Field<T[keyof T]>[], Vector<T[keyof T]>[]]): [Field<T[keyof T]>[], (T[keyof T] | Vector<T[keyof T]>)[]] {
-    let keys: any[], n: number;
+function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], ret: [Field<T[keyof T]>[], (Vector<T[keyof T]> | TypedArray)[]]): [Field<T[keyof T]>[], (T[keyof T] | Vector<T[keyof T]> | TypedArray)[]] {
+    let keys: any[];
+    let n: number;
     switch (n = vals.length) {
         case 0: return ret;
         case 1:
             keys = ret[0];
             if (!(vals[0])) { return ret; }
             if (isArray(vals[0])) { return _selectFieldArgs(vals[0], ret); }
-            if (!(vals[0] instanceof Data || vals[0] instanceof Vector || vals[0] instanceof DataType)) {
+            if (!(vals[0] instanceof Data || vals[0] instanceof Vector || isTypedArray(vals[0]) || vals[0] instanceof DataType)) {
                 [keys, vals] = Object.entries(vals[0]).reduce(toKeysAndValues, ret);
             }
             break;
@@ -124,10 +172,11 @@ function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], re
 
     let fieldIndex = -1;
     let valueIndex = -1;
-    let idx = -1, len = vals.length;
+    let idx = -1;
+    const len = vals.length;
     let field: number | string | Field<T[keyof T]>;
     let val: Vector<T[keyof T]> | Data<T[keyof T]>;
-    let [fields, values] = ret as [Field<T[keyof T]>[], any[]];
+    const [fields, values] = ret as [Field<T[keyof T]>[], any[]];
 
     while (++idx < len) {
         val = vals[idx];
@@ -137,7 +186,7 @@ function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], re
             ({ [idx]: field = idx } = keys);
             if (val instanceof DataType && (values[++valueIndex] = val)) {
                 fields[++fieldIndex] = Field.new(field, val as DataType, true) as Field<T[keyof T]>;
-            } else if (val && val.type && (values[++valueIndex] = val)) {
+            } else if (val?.type && (values[++valueIndex] = val)) {
                 val instanceof Data && (values[valueIndex] = val = Vector.new(val) as Vector);
                 fields[++fieldIndex] = Field.new(field, val.type, true) as Field<T[keyof T]>;
             }
diff --git a/js/src/util/bit.ts b/js/src/util/bit.ts
index 4b0a0cd1e80..e4c3d267ecf 100644
--- a/js/src/util/bit.ts
+++ b/js/src/util/bit.ts
@@ -48,7 +48,7 @@ export function truncateBitmap(offset: number, length: number, bitmap: Uint8Arra
 
 /** @ignore */
 export function packBools(values: Iterable<any>) {
-    let xs: number[] = [];
+    const xs: number[] = [];
     let i = 0, bit = 0, byte = 0;
     for (const value of values) {
         value && (byte |= 1 << bit);
@@ -58,7 +58,7 @@ export function packBools(values: Iterable<any>) {
         }
     }
     if (i === 0 || bit > 0) { xs[i++] = byte; }
-    let b = new Uint8Array((xs.length + 7) & ~7);
+    const b = new Uint8Array((xs.length + 7) & ~7);
     b.set(xs);
     return b;
 }
diff --git a/js/src/util/bn.ts b/js/src/util/bn.ts
index 7e93dbbdca6..7c71969a419 100644
--- a/js/src/util/bn.ts
+++ b/js/src/util/bn.ts
@@ -71,9 +71,11 @@ Object.assign(DecimalBigNum.prototype,  BigNum.prototype, { 'constructor': Decim
 
 /** @ignore */
 function bignumToNumber<T extends BN<BigNumArray>>(bn: T) {
-    let { buffer, byteOffset, length, 'signed': signed } = bn;
-    let words = new Int32Array(buffer, byteOffset, length);
-    let number = 0, i = 0, n = words.length, hi, lo;
+    const { buffer, byteOffset, length, 'signed': signed } = bn;
+    const words = new Int32Array(buffer, byteOffset, length);
+    let number = 0, i = 0;
+    const n = words.length;
+    let hi, lo;
     while (i < n) {
         lo = words[i++];
         hi = words[i++];
@@ -84,9 +86,9 @@ function bignumToNumber<T extends BN<BigNumArray>>(bn: T) {
 }
 
 /** @ignore */
-export let bignumToString: { <T extends BN<BigNumArray>>(a: T): string; };
+export let bignumToString: { <T extends BN<BigNumArray>>(a: T): string };
 /** @ignore */
-export let bignumToBigInt: { <T extends BN<BigNumArray>>(a: T): bigint; };
+export let bignumToBigInt: { <T extends BN<BigNumArray>>(a: T): bigint };
 
 if (!BigIntAvailable) {
     bignumToString = decimalToString;
@@ -99,10 +101,11 @@ if (!BigIntAvailable) {
 /** @ignore */
 function decimalToString<T extends BN<BigNumArray>>(a: T) {
     let digits = '';
-    let base64 = new Uint32Array(2);
+    const base64 = new Uint32Array(2);
     let base32 = new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2);
-    let checks = new Uint32Array((base32 = new Uint16Array(base32).reverse()).buffer);
-    let i = -1, n = base32.length - 1;
+    const checks = new Uint32Array((base32 = new Uint16Array(base32).reverse()).buffer);
+    let i = -1;
+    const n = base32.length - 1;
     do {
         for (base64[0] = base32[i = 0]; i < n;) {
             base32[i++] = base64[1] = base64[0] / 10;
diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts
index dfdfefc5f2e..86dae86c6b3 100644
--- a/js/src/util/buffer.ts
+++ b/js/src/util/buffer.ts
@@ -27,7 +27,7 @@ const SharedArrayBuf = (typeof SharedArrayBuffer !== 'undefined' ? SharedArrayBu
 
 /** @ignore */
 function collapseContiguousByteRanges(chunks: Uint8Array[]) {
-    let result = chunks[0] ? [chunks[0]] : [];
+    const result = chunks[0] ? [chunks[0]] : [];
     let xOffset: number, yOffset: number, xLen: number, yLen: number;
     for (let x, y, i = 0, j = 0, n = chunks.length; ++i < n;) {
         x = result[j];
@@ -63,10 +63,11 @@ export function joinUint8Arrays(chunks: Uint8Array[], size?: number | null): [Ui
     // collapse chunks that share the same underlying ArrayBuffer and whose byte ranges overlap,
     // to avoid unnecessarily copying the bytes to do this buffer join. This is a common case during
     // streaming, where we may be reading partial byte ranges out of the same underlying ArrayBuffer
-    let result = collapseContiguousByteRanges(chunks);
-    let byteLength = result.reduce((x, b) => x + b.byteLength, 0);
+    const result = collapseContiguousByteRanges(chunks);
+    const byteLength = result.reduce((x, b) => x + b.byteLength, 0);
     let source: Uint8Array, sliced: Uint8Array, buffer: Uint8Array | void;
-    let offset = 0, index = -1, length = Math.min(size || Infinity, byteLength);
+    let offset = 0, index = -1;
+    const length = Math.min(size || Infinity, byteLength);
     for (let n = result.length; ++index < n;) {
         source = result[index];
         sliced = source.subarray(0, Math.min(source.length, length - offset));
@@ -176,7 +177,7 @@ export async function* toArrayBufferViewAsyncIterator<T extends TypedArray>(Arra
         yield* pump((function*(it: Iterator<any>) {
             let r: IteratorResult<any> = <any> null;
             do {
-                r = it.next(yield r && r.value);
+                r = it.next(yield r?.value);
             } while (!r.done);
         })(source[Symbol.iterator]()));
     };
@@ -224,7 +225,8 @@ export function rebaseValueOffsets(offset: number, length: number, valueOffsets:
 
 /** @ignore */
 export function compareArrayLike<T extends ArrayLike<any>>(a: T, b: T) {
-    let i = 0, n = a.length;
+    let i = 0;
+    const n = a.length;
     if (n !== b.length) { return false; }
     if (n > 0) {
         do { if (a[i] !== b[i]) { return false; } } while (++i < n);
diff --git a/js/src/util/compat.ts b/js/src/util/compat.ts
index 2e51ee2ba6d..62fcb772e43 100644
--- a/js/src/util/compat.ts
+++ b/js/src/util/compat.ts
@@ -81,6 +81,7 @@ export { BigUint64ArrayCtor as BigUint64Array, BigUint64ArrayAvailable };
 /** @ignore */ const isBoolean = (x: any) => typeof x === 'boolean';
 /** @ignore */ const isFunction = (x: any) => typeof x === 'function';
 /** @ignore */
+// eslint-disable-next-line @typescript-eslint/ban-types
 export const isObject = (x: any): x is Object => x != null && Object(x) === x;
 
 /** @ignore */
diff --git a/js/src/util/int.ts b/js/src/util/int.ts
index 48aabb07005..147106dbb30 100644
--- a/js/src/util/int.ts
+++ b/js/src/util/int.ts
@@ -147,7 +147,7 @@ export class Uint64 extends BaseInt64 {
     public static fromString(str: string, out_buffer = new Uint32Array(2)): Uint64 {
         const length = str.length;
 
-        let out = new Uint64(out_buffer);
+        const out = new Uint64(out_buffer);
         for (let posn = 0; posn < length;) {
             const group = kInt32DecimalDigits < length - posn ?
                           kInt32DecimalDigits : length - posn;
@@ -174,13 +174,13 @@ export class Uint64 extends BaseInt64 {
 
     /** @nocollapse */
     public static multiply(left: Uint64, right: Uint64): Uint64 {
-        let rtrn = new Uint64(new Uint32Array(left.buffer));
+        const rtrn = new Uint64(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
     /** @nocollapse */
     public static add(left: Uint64, right: Uint64): Uint64 {
-        let rtrn = new Uint64(new Uint32Array(left.buffer));
+        const rtrn = new Uint64(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 }
@@ -238,7 +238,7 @@ export class Int64 extends BaseInt64 {
         const negate = str.startsWith('-');
         const length = str.length;
 
-        let out = new Int64(out_buffer);
+        const out = new Int64(out_buffer);
         for (let posn = negate ? 1 : 0; posn < length;) {
             const group = kInt32DecimalDigits < length - posn ?
                           kInt32DecimalDigits : length - posn;
@@ -264,13 +264,13 @@ export class Int64 extends BaseInt64 {
 
     /** @nocollapse */
     public static multiply(left: Int64, right: Int64): Int64 {
-        let rtrn = new Int64(new Uint32Array(left.buffer));
+        const rtrn = new Int64(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
     /** @nocollapse */
     public static add(left: Int64, right: Int64): Int64 {
-        let rtrn = new Int64(new Uint32Array(left.buffer));
+        const rtrn = new Int64(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 }
@@ -320,7 +320,7 @@ export class Int128 {
         let product = Uint64.multiply(L3, R3);
         this.buffer[0] = product.low();
 
-        let sum = new Uint64(new Uint32Array([product.high(), 0]));
+        const sum = new Uint64(new Uint32Array([product.high(), 0]));
 
         product = Uint64.multiply(L2, R3);
         sum.plus(product);
@@ -333,7 +333,7 @@ export class Int128 {
         this.buffer[3] = (sum.lessThan(product) ? 1 : 0);
 
         this.buffer[2] = sum.high();
-        let high = new Uint64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset + 8, 2));
+        const high = new Uint64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset + 8, 2));
 
         high.plus(Uint64.multiply(L1, R3))
             .plus(Uint64.multiply(L2, R2))
@@ -347,7 +347,7 @@ export class Int128 {
     }
 
     public plus(other: Int128): Int128 {
-        let sums = new Uint32Array(4);
+        const sums = new Uint32Array(4);
         sums[3] = (this.buffer[3] + other.buffer[3]) >>> 0;
         sums[2] = (this.buffer[2] + other.buffer[2]) >>> 0;
         sums[1] = (this.buffer[1] + other.buffer[1]) >>> 0;
@@ -377,13 +377,13 @@ export class Int128 {
 
     /** @nocollapse */
     public static multiply(left: Int128, right: Int128): Int128 {
-        let rtrn = new Int128(new Uint32Array(left.buffer));
+        const rtrn = new Int128(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
     /** @nocollapse */
     public static add(left: Int128, right: Int128): Int128 {
-        let rtrn = new Int128(new Uint32Array(left.buffer));
+        const rtrn = new Int128(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 
@@ -412,7 +412,7 @@ export class Int128 {
         const negate = str.startsWith('-');
         const length = str.length;
 
-        let out = new Int128(out_buffer);
+        const out = new Int128(out_buffer);
         for (let posn = negate ? 1 : 0; posn < length;) {
             const group = kInt32DecimalDigits < length - posn ?
                           kInt32DecimalDigits : length - posn;
diff --git a/js/src/util/math.ts b/js/src/util/math.ts
index e9b600aadf3..47678e1a961 100644
--- a/js/src/util/math.ts
+++ b/js/src/util/math.ts
@@ -26,9 +26,9 @@ const u32 = new Uint32Array(f64.buffer);
  * @ignore
  */
 export function uint16ToFloat64(h: number) {
-    let expo = (h & 0x7C00) >> 10;
-    let sigf = (h & 0x03FF) / 1024;
-    let sign = (-1) ** ((h & 0x8000) >> 15);
+    const expo = (h & 0x7C00) >> 10;
+    const sigf = (h & 0x03FF) / 1024;
+    const sign = (-1) ** ((h & 0x8000) >> 15);
     switch (expo) {
         case 0x1F: return sign * (sigf ? NaN : 1 / 0);
         case 0x00: return sign * (sigf ? 6.103515625e-5 * sigf : 0);
@@ -54,7 +54,7 @@ export function float64ToUint16(d: number) {
     // 0x7ff00000 = 01111111 11110000 00000000 00000000 -- masks the 21st-31st bits
     // 0x000fffff = 00000000 00001111 11111111 11111111 -- masks the 1st-20th bit
 
-    let sign = (u32[1] & 0x80000000) >> 16 & 0xFFFF;
+    const sign = (u32[1] & 0x80000000) >> 16 & 0xFFFF;
     let expo = (u32[1] & 0x7ff00000), sigf = 0x0000;
 
     if (expo >= 0x40f00000) {
diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts
index 2828a6eb734..37a630858d9 100644
--- a/js/src/util/recordbatch.ts
+++ b/js/src/util/recordbatch.ts
@@ -36,7 +36,8 @@ export function ensureSameLengthData<T extends { [key: string]: DataType } = any
 ) {
     let data: Data<T[keyof T]>;
     let field: Field<T[keyof T]>;
-    let i = -1, n = chunks.length;
+    let i = -1;
+    const n = chunks.length;
     const fields = [...schema.fields];
     const batchData = [] as Data<T[keyof T]>[];
     const bitmapLength = ((batchLength + 63) & ~63) >> 3;
@@ -70,7 +71,8 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
     const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) };
 
     let numBatches = 0, batchLength = 0;
-    let i: number = -1, numColumns = columns.length;
+    let i = -1;
+    const numColumns = columns.length;
     let child: Data<T[keyof T]>, childData: Data<T[keyof T]>[] = [];
 
     while (memo.numBatches-- > 0) {
@@ -97,7 +99,8 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
 function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, childData: Data<T[keyof T]>[], columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
     let data: Data<T[keyof T]>;
     let field: Field<T[keyof T]>;
-    let length = 0, i = -1, n = columns.length;
+    let length = 0, i = -1;
+    const n = columns.length;
     const bitmapLength = ((batchLength + 63) & ~63) >> 3;
     while (++i < n) {
         if ((data = childData[i]) && ((length = data.length) >= batchLength)) {
diff --git a/js/src/util/utf8.ts b/js/src/util/utf8.ts
index 4e04a8e4a6e..b6f8fcdb824 100644
--- a/js/src/util/utf8.ts
+++ b/js/src/util/utf8.ts
@@ -15,34 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { toUint8Array } from './buffer';
-import {
-    TextDecoder as TextDecoderPolyfill,
-    TextEncoder as TextEncoderPolyfill,
-} from 'text-encoding-utf-8';
-
-/** @ignore @suppress {missingRequire} */
-const _Buffer = eval("typeof Buffer === 'function' ? Buffer : null");
-/** @ignore */
-const useNativeEncoders = typeof TextDecoder === 'function' && typeof TextEncoder === 'function';
-
+const decoder = new TextDecoder('utf-8');
 /** @ignore */
-export const decodeUtf8 = ((TextDecoder) => {
-    if (useNativeEncoders || !_Buffer) {
-        const decoder = new TextDecoder('utf-8');
-        return (buffer?: ArrayBuffer | ArrayBufferView) => decoder.decode(buffer);
-    }
-    return (input: ArrayBufferLike | ArrayBufferView) => {
-        const { buffer, byteOffset, length } = toUint8Array(input);
-        return _Buffer.from(buffer, byteOffset, length).toString();
-    };
-})(typeof TextDecoder !== 'undefined' ? TextDecoder : TextDecoderPolyfill);
+export const decodeUtf8 = (buffer?: BufferSource) => decoder.decode(buffer);
 
+const encoder = new TextEncoder();
 /** @ignore */
-export const encodeUtf8 = ((TextEncoder) => {
-    if (useNativeEncoders || !_Buffer) {
-        const encoder = new TextEncoder();
-        return (value?: string) => encoder.encode(value);
-    }
-    return (input = '') => toUint8Array(_Buffer.from(input, 'utf8'));
-})(typeof TextEncoder !== 'undefined' ? TextEncoder : TextEncoderPolyfill);
+export const encodeUtf8 = (value?: string) => encoder.encode(value);
diff --git a/js/src/util/vector.ts b/js/src/util/vector.ts
index 4a465936d67..a6cfd0373f1 100644
--- a/js/src/util/vector.ts
+++ b/js/src/util/vector.ts
@@ -46,7 +46,7 @@ export function clampRange<T extends RangeLike, N extends ClampRangeThen<T> = Cl
     // Adjust args similar to Array.prototype.slice. Normalize begin/end to
     // clamp between 0 and length, and wrap around on negative indices, e.g.
     // slice(-1, 5) or slice(5, -1)
-    let { length: len = 0 } = source;
+    const { length: len = 0 } = source;
     let lhs = typeof begin !== 'number' ? 0 : begin;
     let rhs = typeof end !== 'number' ? len : end;
     // wrap around on negative start/end positions
@@ -65,7 +65,7 @@ const isNaNFast = (value: any) => value !== value;
 
 /** @ignore */
 export function createElementComparator(search: any) {
-    let typeofSearch = typeof search;
+    const typeofSearch = typeof search;
     // Compare primitives
     if (typeofSearch !== 'object' || search === null) {
         // Compare NaN
@@ -177,7 +177,7 @@ function compareObject(comparators: ((x: any) => boolean)[], obj: Map<any, any>,
     const rValItr = obj instanceof Map ? obj.values() : Object.values(obj)[Symbol.iterator]();
 
     let i = 0;
-    let n = comparators.length;
+    const n = comparators.length;
     let rVal = rValItr.next();
     let lKey = lKeyItr.next();
     let rKey = rKeyItr.next();
diff --git a/js/src/vector/base.ts b/js/src/vector/base.ts
index 5fdf1c21cf3..2ceecdda4a0 100644
--- a/js/src/vector/base.ts
+++ b/js/src/vector/base.ts
@@ -96,7 +96,7 @@ export abstract class BaseVector<T extends DataType = any> extends AbstractVecto
         ) as Vector<R>;
     }
 
-    public toJSON(): any { return [...this]; }
+    public toJSON() { return [...this]; }
 
     protected _sliceInternal(self: this, begin: number, end: number) {
         return self.clone(self.data.slice(begin, end - begin), null!);
diff --git a/js/src/vector/chunked.ts b/js/src/vector/chunked.ts
index a752831c9a7..656c4a1b6c7 100644
--- a/js/src/vector/chunked.ts
+++ b/js/src/vector/chunked.ts
@@ -91,7 +91,7 @@ export class Chunked<T extends DataType = any>
     protected _chunks: Vector<T>[];
     protected _numChildren: number;
     protected _children?: Chunked[];
-    protected _nullCount: number = -1;
+    protected _nullCount = -1;
     protected _chunkOffsets: Uint32Array;
 
     constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) {
@@ -166,7 +166,7 @@ export class Chunked<T extends DataType = any>
 
         if (index < 0 || index >= this._numChildren) { return null; }
 
-        let columns = this._children || (this._children = []);
+        const columns = this._children || (this._children = []);
         let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[];
 
         if (child = columns[index]) { return child; }
@@ -185,9 +185,10 @@ export class Chunked<T extends DataType = any>
     public search(index: number): [number, number] | null;
     public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
     public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
-        let idx = index;
+        const idx = index;
         // binary search to find the child vector and value indices
-        let offsets = this._chunkOffsets, rhs = offsets.length - 1;
+        const offsets = this._chunkOffsets;
+        let rhs = offsets.length - 1;
         // return early if out of bounds, or if there's just one child
         if (idx < 0            ) { return null; }
         if (idx >= offsets[rhs]) { return null; }
@@ -228,15 +229,16 @@ export class Chunked<T extends DataType = any>
         let ArrayType: any = this._type.ArrayType;
         if (n <= 0) { return new ArrayType(0); }
         if (n <= 1) { return chunks[0].toArray(); }
-        let len = 0, src = new Array(n);
+        let len = 0;
+        const src = new Array(n);
         for (let i = -1; ++i < n;) {
             len += (src[i] = chunks[i].toArray()).length;
         }
         if (ArrayType !== src[0].constructor) {
             ArrayType = src[0].constructor;
         }
-        let dst = new ArrayType(len);
-        let set: any = ArrayType === Array ? arraySet : typedSet;
+        const dst = new ArrayType(len);
+        const set: any = ArrayType === Array ? arraySet : typedSet;
         for (let i = -1, idx = 0; ++i < n;) {
             idx = set(src[i], dst, idx);
         }
@@ -246,7 +248,8 @@ export class Chunked<T extends DataType = any>
     protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); }
     protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); }
     protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) {
-        let i = chunkIndex - 1, n = _chunks.length;
+        let i = chunkIndex - 1;
+        const n = _chunks.length;
         let start = fromIndex, offset = 0, found = -1;
         while (++i < n) {
             if (~(found = _chunks[i].indexOf(element, start))) {
@@ -285,8 +288,9 @@ export class Chunked<T extends DataType = any>
 
 /** @ignore */
 function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) {
-    let offsets = new Uint32Array((vectors || []).length + 1);
-    let offset = offsets[0] = 0, length = offsets.length;
+    const offsets = new Uint32Array((vectors || []).length + 1);
+    let offset = offsets[0] = 0;
+    const length = offsets.length;
     for (let index = 0; ++index < length;) {
         offsets[index] = (offset += vectors[index - 1].length);
     }
diff --git a/js/src/vector/float.ts b/js/src/vector/float.ts
index cb15d154415..8260d2b27db 100644
--- a/js/src/vector/float.ts
+++ b/js/src/vector/float.ts
@@ -41,7 +41,7 @@ type FromInput<T extends Float, TNull = any> =
     VectorBuilderOptionsAsync<T, TNull> ;
 
 /** @ignore */
-type FloatArrayCtor = TypedArrayConstructor<FloatArray>;
+export type FloatArrayCtor = TypedArrayConstructor<FloatArray>;
 
 /** @ignore */
 export class FloatVector<T extends Float = Float> extends BaseVector<T> {
@@ -68,7 +68,7 @@ export class FloatVector<T extends Float = Float> extends BaseVector<T> {
         let ArrowType = vectorTypeToDataType(this);
 
         if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) {
-            let InputType = arrayTypeToDataType(input.constructor as FloatArrayCtor) || ArrowType;
+            const InputType = arrayTypeToDataType(input.constructor as FloatArrayCtor) || ArrowType;
             // Special case, infer the Arrow DataType from the input if calling the base
             // FloatVector.from with a TypedArray, e.g. `FloatVector.from(new Float32Array())`
             if (ArrowType === null) {
@@ -77,8 +77,8 @@ export class FloatVector<T extends Float = Float> extends BaseVector<T> {
             // If the DataType inferred from the Vector constructor matches the
             // DataType inferred from the input arguments, return zero-copy view
             if (ArrowType && ArrowType === InputType) {
-                let type = new ArrowType();
-                let length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
+                const type = new ArrowType();
+                const length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
                 // If the ArrowType is Float16 but the input type isn't a Uint16Array,
                 // let the Float16Builder handle casting the input values to Uint16s.
                 if (!convertTo16Bit(ArrowType, input.constructor)) {
diff --git a/js/src/vector/index.ts b/js/src/vector/index.ts
index 4711a6be930..30f5e3cfa8a 100644
--- a/js/src/vector/index.ts
+++ b/js/src/vector/index.ts
@@ -90,9 +90,9 @@ function newVector<T extends DataType>(data: Data<T>, ...args: VectorCtorArgs<V<
 }
 
 /** @ignore */
-export interface VectorBuilderOptions<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: Iterable<T['TValue'] | TNull>; }
+export interface VectorBuilderOptions<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: Iterable<T['TValue'] | TNull> }
 /** @ignore */
-export interface VectorBuilderOptionsAsync<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: AsyncIterable<T['TValue'] | TNull>; }
+export interface VectorBuilderOptionsAsync<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: AsyncIterable<T['TValue'] | TNull> }
 
 /** @ignore */
 export function vectorFromValuesWithType<T extends DataType, TNull = any>(newDataType: () => T, input: Iterable<T['TValue'] | TNull> | AsyncIterable<T['TValue'] | TNull> | VectorBuilderOptions<T, TNull> | VectorBuilderOptionsAsync<T, TNull>) {
@@ -191,7 +191,7 @@ function wrapNullableGet<T extends DataType, V extends Vector<T>, F extends (i:
 /** @ignore */
 function wrapNullableSet<T extends DataType, V extends BaseVector<T>, F extends (i: number, a: any) => void>(fn: F): (...args: Parameters<F>) => void {
     return function(this: V, i: number, a: any) {
-        if (setBool(this.nullBitmap, this.offset + i, !(a === null || a === undefined))) {
+        if (setBool(this.nullBitmap, this.offset + i, !((a == null)))) {
             fn.call(this, i, a);
         }
     };
diff --git a/js/src/vector/int.ts b/js/src/vector/int.ts
index 74c284e11c4..dbfba58c9d9 100644
--- a/js/src/vector/int.ts
+++ b/js/src/vector/int.ts
@@ -51,7 +51,7 @@ type FromInput<T extends Int, TNull = any> =
 type FromArgs<T extends Int, TNull = any> = [FromInput<T, TNull>, boolean?];
 
 /** @ignore */
-type IntArrayCtor = TypedArrayConstructor<IntArray> | BigIntArrayConstructor<BigIntArray>;
+export type IntArrayCtor = TypedArrayConstructor<IntArray> | BigIntArrayConstructor<BigIntArray>;
 
 /** @ignore */
 export class IntVector<T extends Int = Int> extends BaseVector<T> {
@@ -87,11 +87,11 @@ export class IntVector<T extends Int = Int> extends BaseVector<T> {
     /** @nocollapse */
     public static from<T extends Int, TNull = any>(this: IntVectorConstructors, ...args: FromArgs<T, TNull>) {
 
-        let [input, is64bit = false] = args;
+        const [input, is64bit = false] = args;
         let ArrowType = vectorTypeToDataType(this, is64bit);
 
         if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) {
-            let InputType = arrayTypeToDataType(input.constructor as IntArrayCtor, is64bit) || ArrowType;
+            const InputType = arrayTypeToDataType(input.constructor as IntArrayCtor, is64bit) || ArrowType;
             // Special case, infer the Arrow DataType from the input if calling the base
             // IntVector.from with a TypedArray, e.g. `IntVector.from(new Int32Array())`
             if (ArrowType === null) {
@@ -100,7 +100,7 @@ export class IntVector<T extends Int = Int> extends BaseVector<T> {
             // If the DataType inferred from the Vector constructor matches the
             // DataType inferred from the input arguments, return zero-copy view
             if (ArrowType && ArrowType === InputType) {
-                let type = new ArrowType();
+                const type = new ArrowType();
                 let length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
                 // If the ArrowType is 64bit but the input type is 32bit pairs, update the logical length
                 if (convert32To64Bit(ArrowType, input.constructor)) {
@@ -136,8 +136,7 @@ export class Int64Vector extends IntVector<Int64> {
     public toBigInt64Array() {
         return toBigInt64Array(this.values);
     }
-    // @ts-ignore
-    private _values64: BigInt64Array;
+    private _values64!: BigInt64Array;
     public get values64(): BigInt64Array {
         return this._values64 || (this._values64 = this.toBigInt64Array());
     }
@@ -154,8 +153,7 @@ export class Uint64Vector extends IntVector<Uint64> {
     public toBigUint64Array() {
         return toBigUint64Array(this.values);
     }
-    // @ts-ignore
-    private _values64: BigUint64Array;
+    private _values64!: BigUint64Array;
     public get values64(): BigUint64Array {
         return this._values64 || (this._values64 = this.toBigUint64Array());
     }
diff --git a/js/src/vector/map.ts b/js/src/vector/map.ts
index db7726a2cc3..9975919f7c6 100644
--- a/js/src/vector/map.ts
+++ b/js/src/vector/map.ts
@@ -24,12 +24,12 @@ import { DataType, Map_, Struct, List } from '../type';
 /** @ignore */
 export class MapVector<K extends DataType = any, V extends DataType = any> extends BaseVector<Map_<K, V>> {
     public asList() {
-        const child = this.type.children[0] as Field<Struct<{ key: K, value: V }>>;
-        return Vector.new(this.data.clone(new List<Struct<{ key: K, value: V }>>(child)));
+        const child = this.type.children[0] as Field<Struct<{ key: K; value: V }>>;
+        return Vector.new(this.data.clone(new List<Struct<{ key: K; value: V }>>(child)));
     }
     public bind(index: number): Map_<K, V>['TValue'] {
-        const child = this.getChildAt<Struct<{ key: K, value: V }>>(0);
+        const child = this.getChildAt<Struct<{ key: K; value: V }>>(0)!;
         const { [index]: begin, [index + 1]: end } = this.valueOffsets;
-        return new MapRow(child!.slice(begin, end));
+        return new MapRow(child.slice(begin, end));
     }
 }
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
index 7305627fbad..23d1b5440f8 100644
--- a/js/src/vector/row.ts
+++ b/js/src/vector/row.ts
@@ -54,7 +54,7 @@ abstract class Row<K extends PropertyKey = any, V = any> implements Map<K, V> {
 
     public get(key: K) {
         let val = undefined;
-        if (key !== null && key !== undefined) {
+        if (key != null) {
             const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
             let idx = ktoi.get(key);
             if (idx !== undefined) {
@@ -70,7 +70,7 @@ abstract class Row<K extends PropertyKey = any, V = any> implements Map<K, V> {
     }
 
     public set(key: K, val: V) {
-        if (key !== null && key !== undefined) {
+        if (key != null) {
             const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
             let idx = ktoi.get(key);
             if (idx === undefined) {
@@ -158,7 +158,7 @@ abstract class Row<K extends PropertyKey = any, V = any> implements Map<K, V> {
 }
 
 export class MapRow<K extends DataType = any, V extends DataType = any> extends Row<K['TValue'], V['TValue'] | null> {
-    constructor(slice: Vector<Struct<{ key: K, value: V }>>) {
+    constructor(slice: Vector<Struct<{ key: K; value: V }>>) {
         super(slice, slice.length);
         return createRowProxy(this);
     }
@@ -217,15 +217,16 @@ Object.setPrototypeOf(Row.prototype, Map.prototype);
 const defineRowProxyProperties = (() => {
     const desc = { enumerable: true, configurable: false, get: null as any, set: null as any };
     return <T extends Row>(row: T) => {
-        let idx = -1, ktoi = row[kKeyToIdx] || (row[kKeyToIdx] = new Map());
+        let idx = -1;
+        const ktoi = row[kKeyToIdx] || (row[kKeyToIdx] = new Map());
         const getter = (key: any) => function(this: T) { return this.get(key); };
         const setter = (key: any) => function(this: T, val: any) { return this.set(key, val); };
         for (const key of row.keys()) {
             ktoi.set(key, ++idx);
             desc.get = getter(key);
             desc.set = setter(key);
-            row.hasOwnProperty(key) || (desc.enumerable = true, Object.defineProperty(row, key, desc));
-            row.hasOwnProperty(idx) || (desc.enumerable = false, Object.defineProperty(row, idx, desc));
+            Object.prototype.hasOwnProperty.call(row, key) || (desc.enumerable = true, Object.defineProperty(row, key, desc));
+            Object.prototype.hasOwnProperty.call(row, idx) || (desc.enumerable = false, Object.defineProperty(row, idx, desc));
         }
         desc.get = desc.set = null;
         return row;
diff --git a/js/src/vector/struct.ts b/js/src/vector/struct.ts
index 00af4aeb380..b825f092e4f 100644
--- a/js/src/vector/struct.ts
+++ b/js/src/vector/struct.ts
@@ -22,8 +22,7 @@ import { DataType, Struct } from '../type';
 /** @ignore */ const kRowIndex = Symbol.for('rowIndex');
 /** @ignore */
 export class StructVector<T extends { [key: string]: DataType } = any> extends BaseVector<Struct<T>> {
-    // @ts-ignore
-    private _row: StructRow<T>;
+    private _row!: StructRow<T>;
     public bind(index: number): Struct<T>['TValue'] {
         const proto = this._row || (this._row = new StructRow<T>(this));
         const bound = Object.create(proto);
diff --git a/js/src/visitor.ts b/js/src/visitor.ts
index 9877a55b7b0..3a63c93f963 100644
--- a/js/src/visitor.ts
+++ b/js/src/visitor.ts
@@ -54,11 +54,10 @@ export abstract class Visitor {
 function getVisitFn<T extends DataType>(visitor: Visitor, node: any, throwIfNotFound = true) {
     let fn: any = null;
     let dtype: T['TType'] = Type.NONE;
-    // tslint:disable
-    if      (node instanceof Data    ) { dtype = inferDType(node.type as T); }
-    else if (node instanceof Vector  ) { dtype = inferDType(node.type as T); }
-    else if (node instanceof DataType) { dtype = inferDType(node      as T); }
-    else if (typeof (dtype = node) !== 'number') { dtype = Type[node] as any as T['TType']; }
+    if      (node instanceof Data    ) dtype = inferDType(node.type as T);
+    else if (node instanceof Vector  ) dtype = inferDType(node.type as T);
+    else if (node instanceof DataType) dtype = inferDType(node      as T);
+    else if (typeof (dtype = node) !== 'number') dtype = Type[node] as any as T['TType'];
 
     switch (dtype) {
         case Type.Null:                 fn = visitor.visitNull; break;
@@ -114,7 +113,7 @@ function getVisitFn<T extends DataType>(visitor: Visitor, node: any, throwIfNotF
 function inferDType<T extends DataType>(type: T): Type {
     switch (type.typeId) {
         case Type.Null: return Type.Null;
-        case Type.Int:
+        case Type.Int: {
             const { bitWidth, isSigned } = (type as any as Int);
             switch (bitWidth) {
                 case  8: return isSigned ? Type.Int8  : Type.Uint8 ;
@@ -124,6 +123,7 @@ function inferDType<T extends DataType>(type: T): Type {
             }
             // @ts-ignore
             return Type.Int;
+        }
         case Type.Float:
             switch((type as any as Float).precision) {
                 case Precision.HALF: return Type.Float16;
diff --git a/js/src/visitor/jsontypeassembler.ts b/js/src/visitor/jsontypeassembler.ts
index 55acba4f1af..54f046f648a 100644
--- a/js/src/visitor/jsontypeassembler.ts
+++ b/js/src/visitor/jsontypeassembler.ts
@@ -22,12 +22,12 @@ import { Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from '../enum'
 
 /** @ignore */
 export interface JSONTypeAssembler extends Visitor {
-    visit<T extends type.DataType>(node: T): object | undefined;
+    visit<T extends type.DataType>(node: T): Record<string, unknown> | undefined;
 }
 
 /** @ignore */
 export class JSONTypeAssembler extends Visitor {
-    public visit<T extends type.DataType>(node: T): object | undefined {
+    public visit<T extends type.DataType>(node: T): Record<string, unknown> | undefined {
         return node == null ? undefined : super.visit(node);
     }
     public visitNull<T extends type.Null>({ typeId }: T) {
diff --git a/js/src/visitor/jsonvectorassembler.ts b/js/src/visitor/jsonvectorassembler.ts
index 8d5f324cd5e..f3c013344fc 100644
--- a/js/src/visitor/jsonvectorassembler.ts
+++ b/js/src/visitor/jsonvectorassembler.ts
@@ -34,24 +34,24 @@ import {
 /** @ignore */
 export interface JSONVectorAssembler extends Visitor {
 
-    visit     <T extends Column>  (node: T  ): object;
-    visitMany <T extends Column>  (cols: T[]): object[];
-    getVisitFn<T extends DataType>(node: Column<T>): (column: Column<T>) => { name: string, count: number, VALIDITY: (0 | 1)[], DATA?: any[], OFFSET?: number[], TYPE?: number[], children?: any[] };
+    visit     <T extends Column>  (node: T  ): Record<string, unknown>;
+    visitMany <T extends Column>  (cols: T[]): Record<string, unknown>[];
+    getVisitFn<T extends DataType>(node: Column<T>): (column: Column<T>) => { name: string; count: number; VALIDITY: (0 | 1)[]; DATA?: any[]; OFFSET?: number[]; TYPE?: number[]; children?: any[] };
 
-    visitNull                 <T extends Null>            (vector: V<T>): { };
+    visitNull                 <T extends Null>            (vector: V<T>): Record<string, never>;
     visitBool                 <T extends Bool>            (vector: V<T>): { DATA: boolean[] };
     visitInt                  <T extends Int>             (vector: V<T>): { DATA: (number | string)[]  };
     visitFloat                <T extends Float>           (vector: V<T>): { DATA: number[]  };
-    visitUtf8                 <T extends Utf8>            (vector: V<T>): { DATA: string[], OFFSET: number[] };
-    visitBinary               <T extends Binary>          (vector: V<T>): { DATA: string[], OFFSET: number[] };
+    visitUtf8                 <T extends Utf8>            (vector: V<T>): { DATA: string[]; OFFSET: number[] };
+    visitBinary               <T extends Binary>          (vector: V<T>): { DATA: string[]; OFFSET: number[] };
     visitFixedSizeBinary      <T extends FixedSizeBinary> (vector: V<T>): { DATA: string[]  };
     visitDate                 <T extends Date_>           (vector: V<T>): { DATA: number[]  };
     visitTimestamp            <T extends Timestamp>       (vector: V<T>): { DATA: string[]  };
     visitTime                 <T extends Time>            (vector: V<T>): { DATA: number[]  };
     visitDecimal              <T extends Decimal>         (vector: V<T>): { DATA: string[]  };
-    visitList                 <T extends List>            (vector: V<T>): { children: any[], OFFSET: number[] };
+    visitList                 <T extends List>            (vector: V<T>): { children: any[]; OFFSET: number[] };
     visitStruct               <T extends Struct>          (vector: V<T>): { children: any[] };
-    visitUnion                <T extends Union>           (vector: V<T>): { children: any[], TYPE: number[],  };
+    visitUnion                <T extends Union>           (vector: V<T>): { children: any[]; TYPE: number[]  };
     visitInterval             <T extends Interval>        (vector: V<T>): { DATA: number[]  };
     visitFixedSizeList        <T extends FixedSizeList>   (vector: V<T>): { children: any[] };
     visitMap                  <T extends Map_>            (vector: V<T>): { children: any[] };
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index 307c839da78..77985e5be71 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -140,11 +140,12 @@ const setNumericX2       = <T extends Numeric2X>      (vector: VectorType<T>, in
     switch (typeof value) {
         case 'bigint': vector.values64[index] = value; break;
         case 'number': vector.values[index * vector.stride] = value; break;
-        default:
+        default: {
             const val = value as T['TArray'];
             const { stride, ArrayType } = vector;
             const long = toArrayBufferView<T['TArray']>(ArrayType, val);
             vector.values.set(long.subarray(0, stride), stride * index);
+        }
     }
 };
 /** @ignore */
@@ -238,10 +239,10 @@ const setMap = <T extends Map_>(vector: VectorType<T>, index: number, value: T['
     }
 };
 
-/** @ignore */ const _setStructArrayValue = (o: number, v: any[]) => (c: Vector | null, _: Field, i: number) => c && c.set(o, v[i]);
-/** @ignore */ const _setStructVectorValue = (o: number, v: Vector) => (c: Vector | null, _: Field, i: number) => c && c.set(o, v.get(i));
-/** @ignore */ const _setStructMapValue = (o: number, v: Map<string, any>) => (c: Vector | null, f: Field, _: number) => c && c.set(o, v.get(f.name));
-/** @ignore */ const _setStructObjectValue = (o: number, v: { [key: string]: any }) => (c: Vector | null, f: Field, _: number) => c && c.set(o, v[f.name]);
+/** @ignore */ const _setStructArrayValue = (o: number, v: any[]) => (c: Vector | null, _: Field, i: number) => c?.set(o, v[i]);
+/** @ignore */ const _setStructVectorValue = (o: number, v: Vector) => (c: Vector | null, _: Field, i: number) => c?.set(o, v.get(i));
+/** @ignore */ const _setStructMapValue = (o: number, v: Map<string, any>) => (c: Vector | null, f: Field, _: number) => c?.set(o, v.get(f.name));
+/** @ignore */ const _setStructObjectValue = (o: number, v: { [key: string]: any }) => (c: Vector | null, f: Field, _: number) => c?.set(o, v[f.name]);
 /** @ignore */
 const setStruct = <T extends Struct>(vector: VectorType<T>, index: number, value: T['TValue']) => {
 
diff --git a/js/src/visitor/vectorloader.ts b/js/src/visitor/vectorloader.ts
index 91e72c9dbf4..0a7bb41d811 100644
--- a/js/src/visitor/vectorloader.ts
+++ b/js/src/visitor/vectorloader.ts
@@ -38,9 +38,9 @@ export interface VectorLoader extends Visitor {
 export class VectorLoader extends Visitor {
     private bytes: Uint8Array;
     private nodes: FieldNode[];
-    private nodesIndex: number = -1;
+    private nodesIndex = -1;
     private buffers: BufferRegion[];
-    private buffersIndex: number = -1;
+    private buffersIndex = -1;
     private dictionaries: Map<number, Vector<any>>;
     constructor(bytes: Uint8Array, nodes: FieldNode[], buffers: BufferRegion[], dictionaries: Map<number, Vector<any>>) {
         super();
diff --git a/js/test/.eslintrc.js b/js/test/.eslintrc.js
new file mode 100644
index 00000000000..311a356e294
--- /dev/null
+++ b/js/test/.eslintrc.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    rules: {
+        "@typescript-eslint/no-require-imports": "off",
+        "@typescript-eslint/no-inferrable-types": "off",
+        "@typescript-eslint/naming-convention": "off",
+        "prefer-const": "off",
+        "max-len": "off",
+
+        "jest/no-export": "off",
+        "jest/valid-title": "off",
+        "jest/expect-expect": "off",
+        "jest/no-conditional-expect": "off",
+    },
+};
diff --git a/js/test/Arrow.ts b/js/test/Arrow.ts
index e81c07f5e6d..de2bc58c715 100644
--- a/js/test/Arrow.ts
+++ b/js/test/Arrow.ts
@@ -15,50 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable */
-// Dynamically load an Arrow target build based on command line arguments
-
 import 'web-streams-polyfill';
 
-/* tslint:disable */
-// import this before assigning window global since it does a `typeof window` check
-require('web-stream-tools');
-
-(<any> global).window = (<any> global).window || global;
-
-// Fix for Jest in node v10.x
-Object.defineProperty(Object, Symbol.hasInstance, {
-    writable: true,
-    configurable: true,
-    value(inst: any) {
-        return inst && inst.constructor && inst.constructor.name === 'Object';
-    }
-});
-Object.defineProperty(ArrayBuffer, Symbol.hasInstance, {
-    writable: true,
-    configurable: true,
-    value(inst: any) {
-        return inst && inst.constructor && inst.constructor.name === 'ArrayBuffer';
-    }
-});
-
-// these are duplicated in the gulpfile :<
-const targets = [`es5`, `es2015`, `esnext`];
-const formats = [`cjs`, `esm`, `cls`, `umd`];
-
-const path = require('path');
-const target = process.env.TEST_TARGET!;
-const format = process.env.TEST_MODULE!;
-const useSrc = process.env.TEST_TS_SOURCE === `true` || (!~targets.indexOf(target) || !~formats.indexOf(format));
-
-let modulePath = ``;
-
-if (useSrc) modulePath = '../src';
-else if (target === `ts` || target === `apache-arrow`) modulePath = target;
-else modulePath = path.join(target, format);
-
-modulePath = path.resolve(`./targets`, modulePath);
-modulePath = path.join(modulePath, `Arrow${format === 'umd' ? '' : '.node'}`);
-const Arrow: typeof import('../src/Arrow') = require(modulePath);
-
-export = Arrow;
+export * from 'apache-arrow';
diff --git a/js/test/data/tables/generate.py b/js/test/data/tables/generate.py
deleted file mode 100644
index 36def01620d..00000000000
--- a/js/test/data/tables/generate.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pyarrow as pa
-import random
-import numpy as np
-import pandas as pd
-
-
-cities = [u'Charlottesville', u'New York', u'San Francisco', u'Seattle', u'Terre Haute', u'Washington, DC']
-
-def generate_batch(batch_len):
-    return pa.RecordBatch.from_arrays([
-        pa.Array.from_pandas(pd.Series(np.random.uniform(-90,90,batch_len), dtype="float32")),
-        pa.Array.from_pandas(pd.Series(np.random.uniform(-180,180,batch_len), dtype="float32")),
-        pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities)),
-        pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities))
-    ], ['lat', 'lng', 'origin', 'destination'])
-
-def write_record_batches(filename, batch_len, num_batches):
-    with pa.ipc.RecordBatchStreamWriter(filename, generate_batch(1).schema) as writer:
-        for _ in range(num_batches):
-            writer.write_batch(generate_batch(batch_len))
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument('filename', help='number of batches')
-    parser.add_argument('-n', '--num-batches', help='number of batches', type=int, default=10)
-    parser.add_argument('-b', '--batch-size', help='size of each batch', type=int, default=100000)
-
-    args = parser.parse_args()
-
-    print("Writing {} {}-element batches to '{}'".format(args.num_batches, args.batch_size, args.filename))
-    write_record_batches(args.filename, args.batch_size, args.num_batches)
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index 35c040269d2..030176e629b 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -15,10 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable */
-const randomatic = require('randomatic');
-import { TextEncoder } from 'text-encoding-utf-8';
-import { VectorType as V } from '../src/interfaces';
+import randomatic from 'randomatic';
+import { VectorType as V } from 'apache-arrow/interfaces';
 
 import {
     Data, Vector, Visitor, DataType,
@@ -127,7 +125,7 @@ const defaultStructChildren = () => [
 ];
 
 const defaultMapChild = () => [
-    new Field('', new Struct<{ key: Utf8, value: Float32 }>([
+    new Field('', new Struct<{ key: Utf8; value: Float32 }>([
         new Field('key', new Utf8()),
         new Field('value', new Float32())
     ]))
@@ -228,7 +226,7 @@ export const dictionary = <T extends DataType = Utf8, TKey extends TKeys = Int32
 export const intervalDayTime = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalDayTime(), length, nullCount);
 export const intervalYearMonth = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalYearMonth(), length, nullCount);
 export const fixedSizeList = (length = 100, nullCount = length * 0.2 | 0, listSize = 2, child = defaultListChild) => vectorGenerator.visit(new FixedSizeList(listSize, child), length, nullCount);
-export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = length * 0.2 | 0, child: Field<Struct<{key: TKey, value: TValue}>> = <any> defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
+export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = length * 0.2 | 0, child: Field<Struct<{key: TKey; value: TValue}>> = <any> defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
 
 export const vecs = {
     null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map
@@ -399,7 +397,7 @@ function generateInterval<T extends Interval>(this: TestDataVectorGenerator, typ
         return values;
     });
     iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
-        !valid && data.set(new Int32Array(stride), i * stride)
+        !valid && data.set(new Int32Array(stride), i * stride);
     });
     return { values, vector: Vector.new(Data.Interval(type, 0, length, nullCount, nullBitmap, data)) };
 }
@@ -484,7 +482,7 @@ function generateUnion<T extends Union>(this: TestDataVectorGenerator, type: T,
     const nullBitmap = createBitmap(length, nullCount);
     const typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
         return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
-    }, Object.create(null) as { [key: number]: number })
+    }, Object.create(null) as { [key: number]: number });
 
     if (type.mode === UnionMode.Sparse) {
         const values = memoize(() => {
@@ -556,7 +554,7 @@ function generateMap<T extends Map_>(this: TestDataVectorGenerator,
     const stride = childVec.length / (length - nullCount);
     const offsets = createVariableWidthOffsets(length, nullBitmap, childVec.length, stride);
     const values = memoize(() => {
-        const childValues = child.values() as { key: K; value: V; }[];
+        const childValues = child.values() as { key: K; value: V }[];
         const values: (T['TValue'] | null)[] = [...offsets.slice(1)]
             .map((offset, i) => isValid(nullBitmap, i) ? offset : null)
             .map((o, i) => o == null ? null : (() => {
@@ -582,15 +580,13 @@ type TypedArrayConstructor =
 
 const rand = Math.random.bind(Math);
 const randomBytes = (length: number) => fillRandom(Uint8Array, length);
-const randomString = ((opts) =>
-    (length: number) => randomatic('?', length, opts)
-)({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
+const randomString = (length: number) => randomatic('?', length, { chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
 
 const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
 
 const encodeUtf8 = ((encoder) =>
     encoder.encode.bind(encoder) as (input?: string, options?: { stream?: boolean }) => Uint8Array
-)(new TextEncoder('utf-8'));
+)(new TextEncoder());
 
 function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: number) {
     const BPE = ArrayType.BYTES_PER_ELEMENT;
diff --git a/js/test/inference/column.ts b/js/test/inference/column.ts
index 442dd8c8749..440116b69c9 100644
--- a/js/test/inference/column.ts
+++ b/js/test/inference/column.ts
@@ -15,11 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data } from '../../src/data';
-import { Field } from '../../src/schema';
-import { Column } from '../../src/column';
-import { Vector } from '../../src/vector';
-import { Bool, Int8, Utf8, List, Dictionary, Struct } from '../../src/type';
+/* eslint-disable jest/no-standalone-expect */
+
+import { Data } from 'apache-arrow/data';
+import { Field } from 'apache-arrow/schema';
+import { Column } from 'apache-arrow/column';
+import { Vector } from 'apache-arrow/vector';
+import { Bool, Int8, Utf8, List, Dictionary, Struct } from 'apache-arrow/type';
 
 const boolType = new Bool();
 const boolVector = Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2)));
diff --git a/js/test/inference/nested.ts b/js/test/inference/nested.ts
index 5a811b576b3..0e3dc95e3f4 100644
--- a/js/test/inference/nested.ts
+++ b/js/test/inference/nested.ts
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data } from '../../src/data';
-import { Field } from '../../src/schema';
-import { DataType } from '../../src/type';
-import { Vector, BoolVector } from '../../src/vector/index';
-import { Bool, Int8, Utf8, List, Dictionary, Struct } from '../../src/type';
-
-type NamedSchema = { a: Int8, b: Utf8, c: Dictionary<List<Bool>>; [idx: string]: DataType; };
-type IndexSchema = { 0: Int8, 1: Utf8, 2: Dictionary<List<Bool>>; [idx: number]: DataType; };
+import { Data } from 'apache-arrow/data';
+import { Field } from 'apache-arrow/schema';
+import { DataType } from 'apache-arrow/type';
+import { Vector, BoolVector } from 'apache-arrow/vector/index';
+import { Bool, Int8, Utf8, List, Dictionary, Struct } from 'apache-arrow/type';
+
+type NamedSchema = { a: Int8; b: Utf8; c: Dictionary<List<Bool>>; [idx: string]: DataType };
+type IndexSchema = { 0: Int8; 1: Utf8; 2: Dictionary<List<Bool>>; [idx: number]: DataType };
 
 checkIndexTypes({ 0: new Int8(), 1: new Utf8(), 2: new Dictionary<List<Bool>>(null!, null!) } as IndexSchema);
 checkNamedTypes({ a: new Int8(), b: new Utf8(), c: new Dictionary<List<Bool>>(null!, null!) } as NamedSchema);
diff --git a/js/test/inference/visitor/get.ts b/js/test/inference/visitor/get.ts
index ad7605f7e21..a983d94d19f 100644
--- a/js/test/inference/visitor/get.ts
+++ b/js/test/inference/visitor/get.ts
@@ -20,7 +20,7 @@ import {
     Bool, List, Dictionary
 } from '../../Arrow';
 
-import { instance as getVisitor } from '../../../src/visitor/get';
+import { instance as getVisitor } from 'apache-arrow/visitor/get';
 
 const data_Bool = new Data(new Bool(), 0, 0);
 const data_List_Bool = new Data(new List<Bool>(null as any), 0, 0);
diff --git a/js/test/jest-extensions.ts b/js/test/jest-extensions.ts
index 5a5524bfa52..6adde0b8374 100644
--- a/js/test/jest-extensions.ts
+++ b/js/test/jest-extensions.ts
@@ -53,7 +53,7 @@ function toArrowCompare(this: jest.MatcherUtils, actual: any, expected: any) {
 
 function toEqualTable(this: jest.MatcherUtils, actual: Table, expected: Table) {
     const failures = [] as string[];
-    try { expect(actual.length).toEqual(expected.length); } catch (e) { failures.push(`${e}`); }
+    try { expect(actual).toHaveLength(expected.length); } catch (e) { failures.push(`${e}`); }
     try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
     try { expect(actual.schema.metadata).toEqual(expected.schema.metadata); } catch (e) { failures.push(`${e}`); }
     (() => {
@@ -74,7 +74,7 @@ function toEqualTable(this: jest.MatcherUtils, actual: Table, expected: Table) {
 
 function toEqualRecordBatch(this: jest.MatcherUtils, actual: RecordBatch, expected: RecordBatch) {
     const failures = [] as string[];
-    try { expect(actual.length).toEqual(expected.length); } catch (e) { failures.push(`${e}`); }
+    try { expect(actual).toHaveLength(expected.length); } catch (e) { failures.push(`${e}`); }
     try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
     (() => {
         for (let i = -1, n = actual.numCols; ++i < n;) {
@@ -105,7 +105,7 @@ function toEqualVector<
     if (v1 == null || v2 == null) {
         return {
             pass: false,
-            message: [
+            message: () => [
                 [columnName, `(${format(this, format1, format2, ' !== ')})`].filter(Boolean).join(':'),
                 `${v1 == null ? 'actual' : 'expected'} is null`
             ].join('\n')
diff --git a/js/test/tsconfig.coverage.json b/js/test/tsconfig.coverage.json
deleted file mode 100644
index 6830bfb9d66..00000000000
--- a/js/test/tsconfig.coverage.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "extends": "./tsconfig.json",
-  "compilerOptions": {
-    "target": "es2015"
-  }
-}
diff --git a/js/test/tsconfig.json b/js/test/tsconfig.json
index c4977d5d694..8cf2e7e7b66 100644
--- a/js/test/tsconfig.json
+++ b/js/test/tsconfig.json
@@ -1,18 +1,24 @@
 {
   "extends": "../tsconfig.json",
-  "include": ["./**/*.ts"],
+  "include": ["../src/**/*.ts", "../test/**/*.ts"],
   "compilerOptions": {
     "target": "esnext",
-    "module": "commonjs",
+    "module": "es2020",
     "allowJs": true,
     "declaration": false,
+    "declarationMap": false,
     "importHelpers": false,
+    "noEmit": true,
     "noEmitHelpers": false,
     "noEmitOnError": false,
-
-    "sourceMap": false,
+    "sourceMap": true,
     "inlineSources": false,
     "inlineSourceMap": false,
-    "downlevelIteration": false
+    "downlevelIteration": false,
+    "baseUrl": "../",
+    "paths": {
+      "apache-arrow": ["src/Arrow.node"],
+      "apache-arrow/*": ["src/*"]
+    }
   }
 }
diff --git a/js/test/tsconfig/tsconfig.apache-arrow.json b/js/test/tsconfig/tsconfig.apache-arrow.json
new file mode 100644
index 00000000000..161374e02e1
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.apache-arrow.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the apache-arrow target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.base.json b/js/test/tsconfig/tsconfig.base.json
new file mode 100644
index 00000000000..fcae71fb45d
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.base.json
@@ -0,0 +1,26 @@
+// Base TypeScript configuration for all targets' tests
+{
+  "extends": "../../tsconfig/tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs",
+    "allowJs": true,
+    "declaration": false,
+    "importHelpers": false,
+    "noEmit": false,
+    "noEmitHelpers": false,
+    "noEmitOnError": false,
+    "sourceMap": true,
+    "inlineSources": false,
+    "inlineSourceMap": false,
+    "downlevelIteration": false,
+    "esModuleInterop": true,
+    "baseUrl": "../../",
+    "paths": {
+      "apache-arrow": ["src/Arrow.node"],
+      "apache-arrow/*": ["src/*"]
+    }
+  },
+  "exclude": ["../../node_modules"],
+  "include": ["../../src/**/*.ts"]
+}
diff --git a/js/test/tsconfig/tsconfig.coverage.json b/js/test/tsconfig/tsconfig.coverage.json
new file mode 100644
index 00000000000..e903aa1e5b7
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.coverage.json
@@ -0,0 +1,6 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "target": "esnext"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es2015.cjs.json b/js/test/tsconfig/tsconfig.es2015.cjs.json
new file mode 100644
index 00000000000..ed600bc24d2
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es2015.cjs.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ES2015 CommonJS target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es2015.esm.json b/js/test/tsconfig/tsconfig.es2015.esm.json
new file mode 100644
index 00000000000..a030beba7c1
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es2015.esm.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ES2015 ESModules target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es2015.umd.json b/js/test/tsconfig/tsconfig.es2015.umd.json
new file mode 100644
index 00000000000..3e4de6f3cb5
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es2015.umd.json
@@ -0,0 +1,11 @@
+// TypeScript configuration for the ES2015 Closure Compiler target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "umd",
+    "declaration": false,
+    "noEmitHelpers": true,
+    "importHelpers": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es5.cjs.json b/js/test/tsconfig/tsconfig.es5.cjs.json
new file mode 100644
index 00000000000..edcd6977366
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es5.cjs.json
@@ -0,0 +1,9 @@
+// TypeScript configuration for the ES5 CommonJS target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs",
+    "downlevelIteration": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es5.esm.json b/js/test/tsconfig/tsconfig.es5.esm.json
new file mode 100644
index 00000000000..01af8fabdfe
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es5.esm.json
@@ -0,0 +1,9 @@
+// TypeScript configuration for the ES5 ESModules target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020",
+    "downlevelIteration": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es5.umd.json b/js/test/tsconfig/tsconfig.es5.umd.json
new file mode 100644
index 00000000000..445ec8809b7
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es5.umd.json
@@ -0,0 +1,12 @@
+// TypeScript configuration for the ES5 Closure Compiler target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "umd",
+    "declaration": false,
+    "noEmitHelpers": true,
+    "importHelpers": true,
+    "downlevelIteration": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.esnext.cjs.json b/js/test/tsconfig/tsconfig.esnext.cjs.json
new file mode 100644
index 00000000000..6f21fd56c50
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.esnext.cjs.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ESNext CommonJS target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.esnext.esm.json b/js/test/tsconfig/tsconfig.esnext.esm.json
new file mode 100644
index 00000000000..3a9c277453e
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.esnext.esm.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ESNext ESModules target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.esnext.umd.json b/js/test/tsconfig/tsconfig.esnext.umd.json
new file mode 100644
index 00000000000..baccc6994c1
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.esnext.umd.json
@@ -0,0 +1,11 @@
+// TypeScript configuration for the ESNext Closure Compiler target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "umd",
+    "declaration": false,
+    "noEmitHelpers": true,
+    "importHelpers": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.src.json b/js/test/tsconfig/tsconfig.src.json
new file mode 100644
index 00000000000..5413898f79d
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.src.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the source target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.ts.json b/js/test/tsconfig/tsconfig.ts.json
new file mode 100644
index 00000000000..1e053698e99
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.ts.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the TypeScript target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/unit/bit-tests.ts b/js/test/unit/bit-tests.ts
index de9c5ee67a7..cdfb37c1681 100644
--- a/js/test/unit/bit-tests.ts
+++ b/js/test/unit/bit-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as Arrow from '../Arrow';
+import * as Arrow from 'apache-arrow';
 const { BitIterator, getBool } = Arrow.util;
 
 describe('Bits', () => {
diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts
index 6817999a034..b6fa60271bc 100644
--- a/js/test/unit/builders/builder-tests.ts
+++ b/js/test/unit/builders/builder-tests.ts
@@ -16,10 +16,11 @@
 // under the License.
 
 import '../../jest-extensions';
-import { AsyncIterable } from 'ix';
+import { from, fromDOMStream, toArray } from 'ix/asynciterable';
+import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
 import { validateVector } from './utils';
 import * as generate from '../../generate-test-data';
-import { Type, DataType, Chunked, util, Builder, UnionVector } from '../../Arrow';
+import { Type, DataType, Chunked, util, Builder, UnionVector } from 'apache-arrow';
 
 const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
 const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
@@ -227,9 +228,9 @@ function fillNADefault(values: any[], nulls: any[]): any[] {
     });
 }
 
-type BuilderOptions<T extends DataType = any, TNull = any> = import('../../../src/builder').BuilderOptions<T, TNull>;
-type BuilderDuplexOptions<T extends DataType = any, TNull = any> = import('../../../src/io/node/builder').BuilderDuplexOptions<T, TNull>;
-type BuilderTransformOptions<T extends DataType = any, TNull = any> = import('../../../src/io/whatwg/builder').BuilderTransformOptions<T, TNull>;
+type BuilderOptions<T extends DataType = any, TNull = any> = import('apache-arrow/builder').BuilderOptions<T, TNull>;
+type BuilderDuplexOptions<T extends DataType = any, TNull = any> = import('apache-arrow/io/node/builder').BuilderDuplexOptions<T, TNull>;
+type BuilderTransformOptions<T extends DataType = any, TNull = any> = import('apache-arrow/io/whatwg/builder').BuilderTransformOptions<T, TNull>;
 
 async function encodeSingle<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderOptions<T, TNull>) {
     const builder = Builder.new(options);
@@ -243,11 +244,10 @@ async function encodeChunks<T extends DataType, TNull = any>(values: (T['TValue'
 
 async function encodeChunksDOM<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderTransformOptions<T, TNull>) {
 
-    const stream = AsyncIterable
-        .from(values).toDOMStream()
+    const stream = from(values).toDOMStream()
         .pipeThrough(Builder.throughDOM(options));
 
-    const chunks = await AsyncIterable.fromDOMStream(stream).toArray();
+    const chunks = await fromDOMStream(stream).pipe(toArray);
 
     return Chunked.concat(...chunks);
 }
@@ -258,12 +258,11 @@ async function encodeChunksNode<T extends DataType, TNull = any>(values: (T['TVa
         options.nullValues =  [...options.nullValues, undefined] as TNull[];
     }
 
-    const stream = AsyncIterable
-        .from(fillNA(values, [undefined]))
+    const stream = from(fillNA(values, [undefined]))
         .toNodeStream({ objectMode: true })
         .pipe(Builder.throughNode(options));
 
-    const chunks: any[] = await AsyncIterable.fromNodeStream(stream, options.highWaterMark).toArray();
+    const chunks: any[] = await fromNodeStream(stream, options.highWaterMark).pipe(toArray);
 
     return Chunked.concat(...chunks);
 }
diff --git a/js/test/unit/builders/date-tests.ts b/js/test/unit/builders/date-tests.ts
index a03dc8dc09a..5a9cc092b16 100644
--- a/js/test/unit/builders/date-tests.ts
+++ b/js/test/unit/builders/date-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import { validateVector } from './utils';
-import { Vector, DateDay, DateMillisecond } from '../../Arrow';
+import { Vector, DateDay, DateMillisecond } from 'apache-arrow';
 import {
     encodeAll,
     encodeEach,
@@ -32,7 +32,6 @@ const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
 const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
 
 describe('DateDayBuilder', () => {
-
     runTestsWithEncoder('encodeAll', encodeAll(() => new DateDay()));
     runTestsWithEncoder('encodeEach: 5', encodeEach(() => new DateDay(), 5));
     runTestsWithEncoder('encodeEach: 25', encodeEach(() => new DateDay(), 25));
@@ -55,7 +54,6 @@ describe('DateDayBuilder', () => {
 });
 
 describe('DateMillisecondBuilder', () => {
-
     runTestsWithEncoder('encodeAll', encodeAll(() => new DateMillisecond()));
     runTestsWithEncoder('encodeEach: 5', encodeEach(() => new DateMillisecond(), 5));
     runTestsWithEncoder('encodeEach: 25', encodeEach(() => new DateMillisecond(), 25));
@@ -77,7 +75,7 @@ describe('DateMillisecondBuilder', () => {
     }
 });
 
-describe('DateMillisecondBuilder', () => {
+describe('DateMillisecondBuilder with nulls', () => {
     const encode = encodeAll(() => new DateMillisecond());
     const dates = [
         null,
diff --git a/js/test/unit/builders/dictionary-tests.ts b/js/test/unit/builders/dictionary-tests.ts
index 9314aced047..19b3603bce1 100644
--- a/js/test/unit/builders/dictionary-tests.ts
+++ b/js/test/unit/builders/dictionary-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import { validateVector } from './utils';
-import { Dictionary, Utf8, Int32, Vector } from '../../Arrow';
+import { Dictionary, Utf8, Int32, Vector } from 'apache-arrow';
 import {
     encodeAll,
     encodeEach,
diff --git a/js/test/unit/builders/int64-tests.ts b/js/test/unit/builders/int64-tests.ts
index 38e6cecd10e..876ce703028 100644
--- a/js/test/unit/builders/int64-tests.ts
+++ b/js/test/unit/builders/int64-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { util, Vector, DataType, Int64 } from '../../Arrow';
+import { util, Vector, DataType, Int64 } from 'apache-arrow';
 import {
     validateVector,
     encodeAll, encodeEach, encodeEachDOM, encodeEachNode,
diff --git a/js/test/unit/builders/primitive-tests.ts b/js/test/unit/builders/primitive-tests.ts
index 994d78ed052..3fd515bf406 100644
--- a/js/test/unit/builders/primitive-tests.ts
+++ b/js/test/unit/builders/primitive-tests.ts
@@ -18,7 +18,7 @@
 import {
     Vector, DataType,
     Bool, Int8, Int16, Int32, Uint8, Uint16, Uint32, Float16, Float32, Float64
-} from '../../Arrow';
+} from 'apache-arrow';
 
 import {
     validateVector,
diff --git a/js/test/unit/builders/uint64-tests.ts b/js/test/unit/builders/uint64-tests.ts
index 38802bca04d..e08e25b5c49 100644
--- a/js/test/unit/builders/uint64-tests.ts
+++ b/js/test/unit/builders/uint64-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { util, Vector, DataType, Uint64 } from '../../Arrow';
+import { util, Vector, DataType, Uint64 } from 'apache-arrow';
 import {
     validateVector,
     encodeAll, encodeEach, encodeEachDOM, encodeEachNode,
diff --git a/js/test/unit/builders/utf8-tests.ts b/js/test/unit/builders/utf8-tests.ts
index f6bac44e324..212879ab441 100644
--- a/js/test/unit/builders/utf8-tests.ts
+++ b/js/test/unit/builders/utf8-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import { validateVector } from './utils';
-import { Vector, Utf8 } from '../../Arrow';
+import { Vector, Utf8 } from 'apache-arrow';
 import {
     encodeAll,
     encodeEach,
diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts
index 2e68b783f26..9bd16fff38c 100644
--- a/js/test/unit/builders/utils.ts
+++ b/js/test/unit/builders/utils.ts
@@ -16,14 +16,15 @@
 // under the License.
 
 import '../../jest-extensions';
-import { AsyncIterable } from 'ix';
-import { util } from '../../Arrow';
-import { Builder } from '../../Arrow';
-import { DataType, Vector, Chunked } from '../../Arrow';
+import { from, fromDOMStream, toArray } from 'ix/asynciterable';
+import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
+import 'ix/Ix.node';
+import { util } from 'apache-arrow';
+import { Builder } from 'apache-arrow';
+import { DataType, Vector, Chunked } from 'apache-arrow';
+import randstr from 'randomatic';
 
 const rand = Math.random.bind(Math);
-/* tslint:disable */
-const randstr = require('randomatic');
 const randnulls = <T, TNull = null>(values: T[], n: TNull = <any> null) => values.map((x) => Math.random() > 0.25 ? x : n) as (T | TNull)[];
 
 export const randomBytes = (length: number) => fillRandom(Uint8Array, length);
@@ -59,7 +60,7 @@ export const int64sNoNulls = (length = 20) => Array.from({ length }, (_, i) => {
         // Int32Array (util.BN is-a Int32Array)
         case 0: return bn;
         // BigInt
-        case 1: return bn[Symbol.toPrimitive]()
+        case 1: return bn[Symbol.toPrimitive]();
         // number
         case 2:
         default: return bn[0];
@@ -77,7 +78,7 @@ export const uint64sNoNulls = (length = 20) => Array.from({ length }, (_, i) =>
         // UInt32Array (util.BN is-a Uint32Array)
         case 0: return bn;
         // BigInt
-        case 1: return bn[Symbol.toPrimitive]()
+        case 1: return bn[Symbol.toPrimitive]();
         // number
         case 2:
         default: return bn[0];
@@ -148,9 +149,9 @@ export function encodeEachDOM<T extends DataType>(typeFactory: () => T, chunkLen
     return async function encodeEachDOM<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
         const type = typeFactory();
         const strategy = { highWaterMark: chunkLen };
-        const source = AsyncIterable.from(vals).toDOMStream();
+        const source = from(vals).toDOMStream();
         const builder = Builder.throughDOM({ type, nullValues, readableStrategy: strategy, writableStrategy: strategy });
-        const chunks = await AsyncIterable.fromDOMStream(source.pipeThrough(builder)).toArray();
+        const chunks = await fromDOMStream(source.pipeThrough(builder)).pipe(toArray);
         return Chunked.concat(...chunks) as Chunked<T>;
     };
 }
@@ -159,10 +160,10 @@ export function encodeEachNode<T extends DataType>(typeFactory: () => T, chunkLe
     return async function encodeEachNode<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
         const type = typeFactory();
         const vals_ = vals.map((x) => x === null ? undefined : x);
-        const source = AsyncIterable.from(vals_).toNodeStream({ objectMode: true });
+        const source = from(vals_).toNodeStream({ objectMode: true });
         const nulls_ = nullValues ? nullValues.map((x) => x === null ? undefined : x) : nullValues;
         const builder = Builder.throughNode({ type, nullValues: nulls_, highWaterMark: chunkLen });
-        const chunks: any[] = await AsyncIterable.fromNodeStream(source.pipe(builder), chunkLen).toArray();
+        const chunks: any[] = await fromNodeStream(source.pipe(builder), chunkLen).pipe(toArray);
         return Chunked.concat(...chunks) as Chunked<T>;
     };
 }
@@ -181,9 +182,9 @@ export function validateVector<T extends DataType>(vals: (T['TValue'] | null)[],
     try {
         for (x of vec) {
             if (nulls.has(y = vals[i])) {
-                expect(x).toEqual(null);
+                expect(x).toBeNull();
             } else if (isInt64Null(nulls, y)) {
-                expect(x).toEqual(null);
+                expect(x).toBeNull();
             } else {
                 expect(x).toArrowCompare(y);
             }
@@ -194,7 +195,7 @@ export function validateVector<T extends DataType>(vals: (T['TValue'] | null)[],
         // debugger;
         // vec.get(i);
         throw new Error([
-            `${(vec as any).VectorName}[${i}]: ${e && e.stack || e}`,
+            `${(vec as any).VectorName}[${i}]: ${e?.stack || e}`,
             `nulls: [${nullVals.join(', ')}]`,
             `values: [${vals.join(', ')}]`,
         ].join('\n'));
diff --git a/js/test/unit/dataframe-tests.ts b/js/test/unit/dataframe-tests.ts
new file mode 100644
index 00000000000..9e87e372d52
--- /dev/null
+++ b/js/test/unit/dataframe-tests.ts
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../jest-extensions';
+import {
+    predicate, DataFrame, RecordBatch
+} from 'apache-arrow';
+import { test_data } from './table-tests';
+import { jest } from '@jest/globals';
+
+const { col, lit, custom, and, or, And, Or } = predicate;
+
+const F32 = 0, I32 = 1, DICT = 2;
+
+describe(`DataFrame`, () => {
+
+    for (let datum of test_data) {
+        describe(datum.name, () => {
+
+            describe(`scan()`, () => {
+                test(`yields all values`, () => {
+                    const df = new DataFrame(datum.table());
+                    let expected_idx = 0;
+                    df.scan((idx, batch) => {
+                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                        expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
+                    });
+                });
+                test(`calls bind function with every batch`, () => {
+                    const df = new DataFrame(datum.table());
+                    let bind = jest.fn();
+                    df.scan(() => { }, bind);
+                    for (let batch of df.chunks) {
+                        expect(bind).toHaveBeenCalledWith(batch);
+                    }
+                });
+            });
+            describe(`scanReverse()`, () => {
+                test(`yields all values`, () => {
+                    const df = new DataFrame(datum.table());
+                    let expected_idx = values.length;
+                    df.scanReverse((idx, batch) => {
+                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                        expect(columns.map((c) => c.get(idx))).toEqual(values[--expected_idx]);
+                    });
+                });
+                test(`calls bind function with every batch`, () => {
+                    const df = new DataFrame(datum.table());
+                    let bind = jest.fn();
+                    df.scanReverse(() => { }, bind);
+                    for (let batch of df.chunks) {
+                        expect(bind).toHaveBeenCalledWith(batch);
+                    }
+                });
+            });
+            test(`count() returns the correct length`, () => {
+                const df = new DataFrame(datum.table());
+                const values = datum.values();
+                expect(df.count()).toEqual(values.length);
+            });
+            test(`getColumnIndex`, () => {
+                const df = new DataFrame(datum.table());
+                expect(df.getColumnIndex('i32')).toEqual(I32);
+                expect(df.getColumnIndex('f32')).toEqual(F32);
+                expect(df.getColumnIndex('dictionary')).toEqual(DICT);
+            });
+            const df = new DataFrame(datum.table());
+            const values = datum.values();
+            let get_i32: (idx: number) => number, get_f32: (idx: number) => number;
+            const filter_tests = [
+                {
+                    name: `filter on f32 >= 0`,
+                    filtered: df.filter(col('f32').ge(0)),
+                    expected: values.filter((row) => row[F32] >= 0)
+                }, {
+                    name: `filter on 0 <= f32`,
+                    filtered: df.filter(lit(0).le(col('f32'))),
+                    expected: values.filter((row) => 0 <= row[F32])
+                }, {
+                    name: `filter on i32 <= 0`,
+                    filtered: df.filter(col('i32').le(0)),
+                    expected: values.filter((row) => row[I32] <= 0)
+                }, {
+                    name: `filter on 0 >= i32`,
+                    filtered: df.filter(lit(0).ge(col('i32'))),
+                    expected: values.filter((row) => 0 >= row[I32])
+                }, {
+                    name: `filter on f32 < 0`,
+                    filtered: df.filter(col('f32').lt(0)),
+                    expected: values.filter((row) => row[F32] < 0)
+                }, {
+                    name: `filter on i32 > 1 (empty)`,
+                    filtered: df.filter(col('i32').gt(0)),
+                    expected: values.filter((row) => row[I32] > 0)
+                }, {
+                    name: `filter on f32 <= -.25 || f3 >= .25`,
+                    filtered: df.filter(col('f32').le(-.25).or(col('f32').ge(.25))),
+                    expected: values.filter((row) => row[F32] <= -.25 || row[F32] >= .25)
+                }, {
+                    name: `filter on !(f32 <= -.25 || f3 >= .25) (not)`,
+                    filtered: df.filter(col('f32').le(-.25).or(col('f32').ge(.25)).not()),
+                    expected: values.filter((row) => !(row[F32] <= -.25 || row[F32] >= .25))
+                }, {
+                    name: `filter method combines predicates (f32 >= 0 && i32 <= 0)`,
+                    filtered: df.filter(col('i32').le(0)).filter(col('f32').ge(0)),
+                    expected: values.filter((row) => row[I32] <= 0 && row[F32] >= 0)
+                }, {
+                    name: `filter on dictionary == 'a'`,
+                    filtered: df.filter(col('dictionary').eq('a')),
+                    expected: values.filter((row) => row[DICT] === 'a')
+                }, {
+                    name: `filter on 'a' == dictionary (commutativity)`,
+                    filtered: df.filter(lit('a').eq(col('dictionary'))),
+                    expected: values.filter((row) => row[DICT] === 'a')
+                }, {
+                    name: `filter on dictionary != 'b'`,
+                    filtered: df.filter(col('dictionary').ne('b')),
+                    expected: values.filter((row) => row[DICT] !== 'b')
+                }, {
+                    name: `filter on f32 >= i32`,
+                    filtered: df.filter(col('f32').ge(col('i32'))),
+                    expected: values.filter((row) => row[F32] >= row[I32])
+                }, {
+                    name: `filter on f32 <= i32`,
+                    filtered: df.filter(col('f32').le(col('i32'))),
+                    expected: values.filter((row) => row[F32] <= row[I32])
+                }, {
+                    name: `filter on f32*i32 > 0 (custom predicate)`,
+                    filtered: df.filter(custom(
+                        (idx: number) => (get_f32(idx) * get_i32(idx) > 0),
+                        (batch: RecordBatch) => {
+                            get_f32 = col('f32').bind(batch);
+                            get_i32 = col('i32').bind(batch);
+                        })),
+                    expected: values.filter((row) => (row[F32] as number) * (row[I32] as number) > 0)
+                }, {
+                    name: `filter out all records`,
+                    filtered: df.filter(lit(1).eq(0)),
+                    expected: []
+                }
+            ];
+            for (let this_test of filter_tests) {
+                const { name, filtered, expected } = this_test;
+                describe(name, () => {
+                    test(`count() returns the correct length`, () => {
+                        expect(filtered.count()).toEqual(expected.length);
+                    });
+                    describe(`scan()`, () => {
+                        test(`iterates over expected values`, () => {
+                            let expected_idx = 0;
+                            filtered.scan((idx, batch) => {
+                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                                expect(columns.map((c) => c.get(idx))).toEqual(expected[expected_idx++]);
+                            });
+                        });
+                        test(`calls bind function lazily`, () => {
+                            let bind = jest.fn();
+                            filtered.scan(() => { }, bind);
+                            if (expected.length) {
+                                expect(bind).toHaveBeenCalled();
+                            } else {
+                                expect(bind).not.toHaveBeenCalled();
+                            }
+                        });
+                    });
+                    describe(`scanReverse()`, () => {
+                        test(`iterates over expected values in reverse`, () => {
+                            let expected_idx = expected.length;
+                            filtered.scanReverse((idx, batch) => {
+                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                                expect(columns.map((c) => c.get(idx))).toEqual(expected[--expected_idx]);
+                            });
+                        });
+                        test(`calls bind function lazily`, () => {
+                            let bind = jest.fn();
+                            filtered.scanReverse(() => { }, bind);
+                            if (expected.length) {
+                                expect(bind).toHaveBeenCalled();
+                            } else {
+                                expect(bind).not.toHaveBeenCalled();
+                            }
+                        });
+                    });
+                });
+            }
+            test(`countBy on dictionary returns the correct counts`, () => {
+                // Make sure countBy works both with and without the Col wrapper
+                // class
+                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
+                for (let row of values) {
+                    expected[row[DICT]] += 1;
+                }
+
+                expect(df.countBy(col('dictionary')).toJSON()).toEqual(expected);
+                expect(df.countBy('dictionary').toJSON()).toEqual(expected);
+            });
+            test(`countBy on dictionary with filter returns the correct counts`, () => {
+                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
+                for (let row of values) {
+                    if (row[I32] === 1) { expected[row[DICT]] += 1; }
+                }
+
+                expect(df.filter(col('i32').eq(1)).countBy('dictionary').toJSON()).toEqual(expected);
+            });
+            test(`countBy on non dictionary column throws error`, () => {
+                expect(() => { df.countBy('i32'); }).toThrow();
+                expect(() => { df.filter(col('dict').eq('a')).countBy('i32'); }).toThrow();
+            });
+            test(`countBy on non-existent column throws error`, () => {
+                expect(() => { df.countBy('FAKE' as any); }).toThrow();
+            });
+            test(`table.select() basic tests`, () => {
+                let selected = df.select('f32', 'dictionary');
+                expect(selected.schema.fields).toHaveLength(2);
+                expect(selected.schema.fields[0]).toEqual(df.schema.fields[0]);
+                expect(selected.schema.fields[1]).toEqual(df.schema.fields[2]);
+
+                expect(selected).toHaveLength(values.length);
+                let idx = 0, expected_row;
+                for (let row of selected) {
+                    expected_row = values[idx++];
+                    expect(row.f32).toEqual(expected_row[F32]);
+                    expect(row.dictionary).toEqual(expected_row[DICT]);
+                }
+            });
+            test(`table.filter(..).count() on always false predicates returns 0`, () => {
+                expect(df.filter(col('i32').ge(100)).count()).toEqual(0);
+                expect(df.filter(col('dictionary').eq('z')).count()).toEqual(0);
+            });
+            describe(`lit-lit comparison`, () => {
+                test(`always-false count() returns 0`, () => {
+                    expect(df.filter(lit('abc').eq('def')).count()).toEqual(0);
+                    expect(df.filter(lit(0).ge(1)).count()).toEqual(0);
+                });
+                test(`always-true count() returns length`, () => {
+                    expect(df.filter(lit('abc').eq('abc')).count()).toEqual(df.length);
+                    expect(df.filter(lit(-100).le(0)).count()).toEqual(df.length);
+                });
+            });
+            describe(`col-col comparison`, () => {
+                test(`always-false count() returns 0`, () => {
+                    expect(df.filter(col('dictionary').eq(col('i32'))).count()).toEqual(0);
+                });
+                test(`always-true count() returns length`, () => {
+                    expect(df.filter(col('dictionary').eq(col('dictionary'))).count()).toEqual(df.length);
+                });
+            });
+        });
+    }
+});
+
+describe(`Predicate`, () => {
+    const p1 = col('a').gt(100);
+    const p2 = col('a').lt(1000);
+    const p3 = col('b').eq('foo');
+    const p4 = col('c').eq('bar');
+    const expected = [p1, p2, p3, p4];
+    test(`and flattens children`, () => {
+        expect(and(p1, p2, p3, p4).children).toEqual(expected);
+        expect(and(p1.and(p2), new And(p3, p4)).children).toEqual(expected);
+        expect(and(p1.and(p2, p3, p4)).children).toEqual(expected);
+    });
+    test(`or flattens children`, () => {
+        expect(or(p1, p2, p3, p4).children).toEqual(expected);
+        expect(or(p1.or(p2), new Or(p3, p4)).children).toEqual(expected);
+        expect(or(p1.or(p2, p3, p4)).children).toEqual(expected);
+    });
+});
diff --git a/js/test/unit/generated-data-validators.ts b/js/test/unit/generated-data-validators.ts
index 03155459dba..910386d4a0e 100644
--- a/js/test/unit/generated-data-validators.ts
+++ b/js/test/unit/generated-data-validators.ts
@@ -22,10 +22,10 @@ import {
     GeneratedVector
 } from '../generate-test-data';
 
-import { util } from '../Arrow';
+import { util } from 'apache-arrow';
 const { createElementComparator: compare } = util;
 
-type DeferredTest = { description: string, tests?: DeferredTest[], run: (...args: any[]) => any };
+type DeferredTest = { description: string; tests?: DeferredTest[]; run: (...args: any[]) => any };
 
 function deferTest(description: string, run: (...args: any[]) => any) {
     return { description, run: () => test(description, run) } as DeferredTest;
@@ -33,7 +33,7 @@ function deferTest(description: string, run: (...args: any[]) => any) {
 
 function deferDescribe(description: string, tests: DeferredTest | DeferredTest[]) {
     const t = (Array.isArray(tests) ? tests : [tests]).filter(Boolean);
-    return { description, tests: t, run: () => describe(description, () => t.forEach((x) => x.run())) };
+    return { description, tests: t, run: () => describe(description, () => { t.forEach((x) => x.run()); } ) };
 }
 
 export function validateTable({ keys, rows, cols, rowBatches, colBatches, keyBatches, table }: GeneratedTable) {
@@ -73,7 +73,7 @@ export function validateVector({ values: createTestValues, vector, keys }: Gener
     const suites = [
         deferDescribe(`Validate ${vector.type} (sliced=${sliced})`, [
             deferTest(`length is correct`, () => {
-                expect(vector.length).toBe(values.length);
+                expect(vector).toHaveLength(values.length);
             }),
             deferTest(`gets expected values`, () => {
                 expect.hasAssertions();
@@ -94,7 +94,7 @@ export function validateVector({ values: createTestValues, vector, keys }: Gener
                     while (++i < n) {
                         indices.isValid(i)
                             ? expect(indices.get(i)).toBe(keys[i])
-                            : expect(indices.get(i)).toBe(null);
+                            : expect(indices.get(i)).toBeNull();
                     }
                 } catch (e) { throw new Error(`${indices}[${i}]: ${e}`); }
             }) || null as any as DeferredTest,
diff --git a/js/test/unit/int-tests.ts b/js/test/unit/int-tests.ts
index 09c531e5432..15c75e1a11d 100644
--- a/js/test/unit/int-tests.ts
+++ b/js/test/unit/int-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as Arrow from '../Arrow';
+import * as Arrow from 'apache-arrow';
 const { Int64, Uint64, Int128 } = Arrow.util;
 
 describe(`Uint64`, () => {
diff --git a/js/test/unit/ipc/helpers.ts b/js/test/unit/ipc/helpers.ts
index f8ae1609f3f..9fccefec968 100644
--- a/js/test/unit/ipc/helpers.ts
+++ b/js/test/unit/ipc/helpers.ts
@@ -23,14 +23,12 @@ import {
     RecordBatchFileWriter,
     RecordBatchJSONWriter,
     RecordBatchStreamWriter,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 import * as fs from 'fs';
 import { fs as memfs } from 'memfs';
 import { Readable, PassThrough } from 'stream';
-
-/* tslint:disable */
-const randomatic = require('randomatic');
+import randomatic from 'randomatic';
 
 export abstract class ArrowIOTestHelper {
 
@@ -173,8 +171,6 @@ export async function* readableDOMStreamToAsyncIterator<T>(stream: ReadableStrea
             // Else yield the chunk
             yield value as T;
         }
-    } catch (e) {
-        throw e;
     } finally {
         try { stream.locked && reader.releaseLock(); } catch (e) {}
     }
diff --git a/js/test/unit/ipc/message-reader-tests.ts b/js/test/unit/ipc/message-reader-tests.ts
index 7320d6ea7aa..c48aa2ce156 100644
--- a/js/test/unit/ipc/message-reader-tests.ts
+++ b/js/test/unit/ipc/message-reader-tests.ts
@@ -22,7 +22,7 @@ import {
 } from '../../data/tables';
 
 import { ArrowIOTestHelper } from './helpers';
-import { MessageReader, AsyncMessageReader } from '../../Arrow';
+import { MessageReader, AsyncMessageReader } from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
 
diff --git a/js/test/unit/ipc/reader/file-reader-tests.ts b/js/test/unit/ipc/reader/file-reader-tests.ts
index 2d784d06589..a7ddfc940a6 100644
--- a/js/test/unit/ipc/reader/file-reader-tests.ts
+++ b/js/test/unit/ipc/reader/file-reader-tests.ts
@@ -31,7 +31,7 @@ import {
     RecordBatchReader,
     RecordBatchFileReader,
     AsyncRecordBatchFileReader
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
 
diff --git a/js/test/unit/ipc/reader/from-inference-tests.ts b/js/test/unit/ipc/reader/from-inference-tests.ts
index a901990ceef..c444b78fcc8 100644
--- a/js/test/unit/ipc/reader/from-inference-tests.ts
+++ b/js/test/unit/ipc/reader/from-inference-tests.ts
@@ -27,13 +27,11 @@ import {
     RecordBatchStreamReader,
     AsyncRecordBatchFileReader,
     AsyncRecordBatchStreamReader
-} from '../../../Arrow';
-
-/* tslint:disable */
-const { parse: bignumJSONParse } = require('json-bignum');
+} from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
     const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+    // eslint-disable-next-line jest/valid-describe
     describe('RecordBatchReader.from', ((table, name) => () => {
         testFromFile(ArrowIOTestHelper.file(table), name);
         testFromJSON(ArrowIOTestHelper.json(table), name);
@@ -44,7 +42,7 @@ for (const table of generateRandomTables([10, 20, 30])) {
 function testFromJSON(io: ArrowIOTestHelper, name: string) {
     describe(`should return a RecordBatchJSONReader (${name})`, () => {
         test(`Uint8Array`, io.buffer((buffer) => {
-            const json = bignumJSONParse(`${Buffer.from(buffer)}`);
+            const json = JSON.parse(`${Buffer.from(buffer)}`);
             const reader = RecordBatchReader.from(json);
             expect(reader.isSync()).toEqual(true);
             expect(reader.isAsync()).toEqual(false);
diff --git a/js/test/unit/ipc/reader/json-reader-tests.ts b/js/test/unit/ipc/reader/json-reader-tests.ts
index 7a223f03fa5..9bd1e346625 100644
--- a/js/test/unit/ipc/reader/json-reader-tests.ts
+++ b/js/test/unit/ipc/reader/json-reader-tests.ts
@@ -21,12 +21,9 @@ import {
 } from '../../../data/tables';
 
 import { ArrowIOTestHelper } from '../helpers';
-import { RecordBatchReader } from '../../../Arrow';
+import { RecordBatchReader } from 'apache-arrow';
 import { validateRecordBatchReader } from '../validate';
 
-/* tslint:disable */
-const { parse: bignumJSONParse } = require('json-bignum');
-
 for (const table of generateRandomTables([10, 20, 30])) {
 
     const io = ArrowIOTestHelper.json(table);
@@ -35,7 +32,7 @@ for (const table of generateRandomTables([10, 20, 30])) {
     describe(`RecordBatchJSONReader (${name})`, () => {
         describe(`should read all RecordBatches`, () => {
             test(`Uint8Array`, io.buffer((buffer) => {
-                const json = bignumJSONParse(Buffer.from(buffer).toString());
+                const json = JSON.parse(Buffer.from(buffer).toString());
                 validateRecordBatchReader('json', 3, RecordBatchReader.from(json));
             }));
         });
diff --git a/js/test/unit/ipc/reader/stream-reader-tests.ts b/js/test/unit/ipc/reader/stream-reader-tests.ts
index ae7bbfbf98a..23879cf795e 100644
--- a/js/test/unit/ipc/reader/stream-reader-tests.ts
+++ b/js/test/unit/ipc/reader/stream-reader-tests.ts
@@ -26,7 +26,7 @@ import {
 } from '../validate';
 
 import { ArrowIOTestHelper } from '../helpers';
-import { RecordBatchReader } from '../../../Arrow';
+import { RecordBatchReader } from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
 
diff --git a/js/test/unit/ipc/reader/streams-dom-tests.ts b/js/test/unit/ipc/reader/streams-dom-tests.ts
index 27aaee917cd..a380e161932 100644
--- a/js/test/unit/ipc/reader/streams-dom-tests.ts
+++ b/js/test/unit/ipc/reader/streams-dom-tests.ts
@@ -24,7 +24,7 @@ import {
     Table,
     RecordBatchReader,
     RecordBatchStreamWriter
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import { validateRecordBatchAsyncIterator } from '../validate';
 import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers';
@@ -35,11 +35,6 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
         return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
-    const { parse: bignumJSONParse } = require('json-bignum');
-    /* tslint:disable */
-    const { concatStream } = require('web-stream-tools').default;
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -65,7 +60,7 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
         describe(`toDOMStream (${name})`, () => {
 
             describe(`RecordBatchJSONReader`, () => {
-                test('Uint8Array', json.buffer((source) => validate(bignumJSONParse(`${Buffer.from(source)}`))));
+                test('Uint8Array', json.buffer((source) => validate(JSON.parse(`${Buffer.from(source)}`))));
             });
 
             describe(`RecordBatchFileReader`, () => {
@@ -111,6 +106,8 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
     }
 
     it('readAll() should pipe to separate WhatWG WritableStreams', async () => {
+        // @ts-ignore
+        const { concatStream } = await import('@openpgp/web-stream-tools');
 
         expect.hasAssertions();
 
@@ -148,6 +145,8 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
     });
 
     it('should not close the underlying WhatWG ReadableStream when reading multiple tables to completion', async () => {
+        // @ts-ignore
+        const { concatStream } = await import('@openpgp/web-stream-tools');
 
         expect.hasAssertions();
 
@@ -179,6 +178,8 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
     });
 
     it('should close the underlying WhatWG ReadableStream when reading multiple tables and we break early', async () => {
+        // @ts-ignore
+        const { concatStream } = await import('@openpgp/web-stream-tools');
 
         expect.hasAssertions();
 
diff --git a/js/test/unit/ipc/reader/streams-node-tests.ts b/js/test/unit/ipc/reader/streams-node-tests.ts
index fe0795e9ffa..822f9935020 100644
--- a/js/test/unit/ipc/reader/streams-node-tests.ts
+++ b/js/test/unit/ipc/reader/streams-node-tests.ts
@@ -16,15 +16,14 @@
 // under the License.
 
 import {
-    generateRandomTables,
-    // generateDictionaryTables
+    generateRandomTables
 } from '../../../data/tables';
 
 import {
     Table,
     RecordBatchReader,
     RecordBatchStreamWriter
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import { ArrowIOTestHelper } from '../helpers';
 import { validateRecordBatchAsyncIterator } from '../validate';
@@ -35,15 +34,6 @@ import { validateRecordBatchAsyncIterator } from '../validate';
         return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
-    const { Readable, PassThrough } = require('stream');
-    /* tslint:disable */
-    const { parse: bignumJSONParse } = require('json-bignum');
-    /* tslint:disable */
-    const concatStream = ((multistream) => (...xs: any[]) =>
-        new Readable().wrap(multistream(...xs))
-    )(require('multistream'));
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -69,7 +59,7 @@ import { validateRecordBatchAsyncIterator } from '../validate';
         describe(`toNodeStream (${name})`, () => {
 
             describe(`RecordBatchJSONReader`, () => {
-                test('Uint8Array', json.buffer((source) => validate(bignumJSONParse(`${Buffer.from(source)}`))));
+                test('Uint8Array', json.buffer((source) => validate(JSON.parse(`${Buffer.from(source)}`))));
             });
 
             describe(`RecordBatchFileReader`, () => {
@@ -114,12 +104,15 @@ import { validateRecordBatchAsyncIterator } from '../validate';
     }
 
     it('readAll() should pipe to separate NodeJS WritableStreams', async () => {
+        // @ts-ignore
+        const { default: MultiStream } = await import('multistream');
+        const { PassThrough } = await import('stream');
 
         expect.hasAssertions();
 
         const tables = [...generateRandomTables([10, 20, 30])];
 
-        const stream = concatStream(tables.map((table) =>
+        const stream = new MultiStream(tables.map((table) =>
             () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
         )) as NodeJS.ReadableStream;
 
@@ -148,12 +141,14 @@ import { validateRecordBatchAsyncIterator } from '../validate';
     });
 
     it('should not close the underlying NodeJS ReadableStream when reading multiple tables to completion', async () => {
+        // @ts-ignore
+        const { default: MultiStream } = await import('multistream');
 
         expect.hasAssertions();
 
         const tables = [...generateRandomTables([10, 20, 30])];
 
-        const stream = concatStream(tables.map((table) =>
+        const stream = new MultiStream(tables.map((table) =>
             () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
         )) as NodeJS.ReadableStream;
 
@@ -176,12 +171,14 @@ import { validateRecordBatchAsyncIterator } from '../validate';
     });
 
     it('should close the underlying NodeJS ReadableStream when reading multiple tables and we break early', async () => {
+        // @ts-ignore
+        const { default: MultiStream } = await import('multistream');
 
         expect.hasAssertions();
 
         const tables = [...generateRandomTables([10, 20, 30])];
 
-        const stream = concatStream(tables.map((table) =>
+        const stream = new MultiStream(tables.map((table) =>
             () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
         )) as NodeJS.ReadableStream;
 
@@ -209,8 +206,7 @@ import { validateRecordBatchAsyncIterator } from '../validate';
             }
         }
 
-        // stream.readable should be false here
-        validateStreamState(reader, stream, true);
+        validateStreamState(reader, stream, true, true);
         expect(tableIndex).toBe(tables.length / 2 | 0);
     });
 })();
diff --git a/js/test/unit/ipc/validate.ts b/js/test/unit/ipc/validate.ts
index 27e1e03e69e..aedf87a2d09 100644
--- a/js/test/unit/ipc/validate.ts
+++ b/js/test/unit/ipc/validate.ts
@@ -23,7 +23,7 @@ import {
     RecordBatchReader,
     RecordBatchFileReader,
     RecordBatchStreamReader,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 export function validateRecordBatchReader<T extends RecordBatchFileReader | RecordBatchStreamReader>(type: 'json' | 'file' | 'stream', numBatches: number, r: T) {
     const reader = r.open();
diff --git a/js/test/unit/ipc/writer/file-writer-tests.ts b/js/test/unit/ipc/writer/file-writer-tests.ts
index 81066462304..fa639e5f672 100644
--- a/js/test/unit/ipc/writer/file-writer-tests.ts
+++ b/js/test/unit/ipc/writer/file-writer-tests.ts
@@ -21,7 +21,7 @@ import {
 } from '../../../data/tables';
 
 import { validateRecordBatchIterator } from '../validate';
-import { Table, RecordBatchFileWriter } from '../../../Arrow';
+import { Table, RecordBatchFileWriter } from 'apache-arrow';
 
 describe('RecordBatchFileWriter', () => {
     for (const table of generateRandomTables([10, 20, 30])) {
diff --git a/js/test/unit/ipc/writer/json-writer-tests.ts b/js/test/unit/ipc/writer/json-writer-tests.ts
index d18cd914117..05be0e27272 100644
--- a/js/test/unit/ipc/writer/json-writer-tests.ts
+++ b/js/test/unit/ipc/writer/json-writer-tests.ts
@@ -21,10 +21,7 @@ import {
 } from '../../../data/tables';
 
 import { validateRecordBatchIterator } from '../validate';
-import { Table, RecordBatchJSONWriter } from '../../../Arrow';
-
-/* tslint:disable */
-const { parse: bignumJSONParse } = require('json-bignum');
+import { Table, RecordBatchJSONWriter } from 'apache-arrow';
 
 describe('RecordBatchJSONWriter', () => {
     for (const table of generateRandomTables([10, 20, 30])) {
@@ -43,7 +40,7 @@ function testJSONWriter(table: Table, name: string) {
 
 async function validateTable(source: Table) {
     const writer = RecordBatchJSONWriter.writeAll(source);
-    const result = Table.from(bignumJSONParse(await writer.toString()));
+    const result = Table.from(JSON.parse(await writer.toString()));
     validateRecordBatchIterator(3, source.chunks);
     expect(result).toEqualTable(source);
 }
diff --git a/js/test/unit/ipc/writer/stream-writer-tests.ts b/js/test/unit/ipc/writer/stream-writer-tests.ts
index 8f572bf3ee7..a83aa39da4c 100644
--- a/js/test/unit/ipc/writer/stream-writer-tests.ts
+++ b/js/test/unit/ipc/writer/stream-writer-tests.ts
@@ -22,9 +22,9 @@ import {
 
 import * as generate from '../../../generate-test-data';
 import { validateRecordBatchIterator } from '../validate';
-import { RecordBatchStreamWriterOptions } from '../../../../src/ipc/writer';
-import { DictionaryVector, Dictionary, Uint32, Int32 } from '../../../Arrow';
-import { Table, Schema, Field, Chunked, Builder, RecordBatch, RecordBatchReader, RecordBatchStreamWriter } from '../../../Arrow';
+import { RecordBatchStreamWriterOptions } from 'apache-arrow/ipc/writer';
+import { DictionaryVector, Dictionary, Uint32, Int32 } from 'apache-arrow';
+import { Table, Schema, Field, Chunked, Builder, RecordBatch, RecordBatchReader, RecordBatchStreamWriter } from 'apache-arrow';
 
 describe('RecordBatchStreamWriter', () => {
 
@@ -101,7 +101,7 @@ describe('RecordBatchStreamWriter', () => {
 
         expect(resultTable).toEqualTable(sourceTable);
         expect((dictionary as Chunked)).toBeInstanceOf(Chunked);
-        expect((dictionary as Chunked).chunks.length).toBe(20);
+        expect((dictionary as Chunked).chunks).toHaveLength(20);
     });
 });
 
diff --git a/js/test/unit/ipc/writer/streams-dom-tests.ts b/js/test/unit/ipc/writer/streams-dom-tests.ts
index 79348e4039a..a19ddcdd77b 100644
--- a/js/test/unit/ipc/writer/streams-dom-tests.ts
+++ b/js/test/unit/ipc/writer/streams-dom-tests.ts
@@ -20,7 +20,8 @@ import {
     // generateDictionaryTables
 } from '../../../data/tables';
 
-import { AsyncIterable } from 'ix';
+import { from, as } from 'ix/asynciterable';
+import { tap, flatMap } from 'ix/asynciterable/operators';
 
 import {
     Table,
@@ -29,7 +30,7 @@ import {
     RecordBatchFileWriter,
     RecordBatchJSONWriter,
     RecordBatchStreamWriter,
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import {
     ArrowIOTestHelper,
@@ -49,9 +50,6 @@ import {
         return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
-    const { parse: bignumJSONParse } = require('json-bignum');
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -106,7 +104,7 @@ import {
 
             describe(`RecordBatchJSONWriter`, () => {
 
-                const toJSON = (x: any): { schema: any } => bignumJSONParse(`${Buffer.from(x)}`);
+                const toJSON = (x: any): { schema: any } => JSON.parse(`${Buffer.from(x)}`);
 
                 test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
                 test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
@@ -235,9 +233,9 @@ import {
         it(`should write a stream of tables to the same output stream`, async () => {
 
             const tables = [] as Table[];
-            const stream = AsyncIterable.from(generateRandomTables([10, 20, 30]))
+            const stream: ReadableStream<any> = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
                 .pipeThrough(RecordBatchStreamWriter.throughDOM(opts));
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
@@ -246,18 +244,18 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(stream.locked).toBe(false);
         });
 
         it(`should write a stream of record batches to the same output stream`, async () => {
 
             const tables = [] as Table[];
-            const stream = AsyncIterable.from(generateRandomTables([10, 20, 30]))
+            const stream = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
                 // flatMap from Table -> RecordBatches[]
-                .flatMap((table) => AsyncIterable.as(table.chunks))
+                .pipe(flatMap((table) => as(table.chunks)))
                 .pipeThrough(RecordBatchStreamWriter.throughDOM(opts));
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
@@ -266,7 +264,7 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(stream.locked).toBe(false);
         });
     });
diff --git a/js/test/unit/ipc/writer/streams-node-tests.ts b/js/test/unit/ipc/writer/streams-node-tests.ts
index ca619b1310e..662129b1b6d 100644
--- a/js/test/unit/ipc/writer/streams-node-tests.ts
+++ b/js/test/unit/ipc/writer/streams-node-tests.ts
@@ -20,7 +20,9 @@ import {
     // generateDictionaryTables
 } from '../../../data/tables';
 
-import { AsyncIterable } from 'ix';
+import { from, as } from 'ix/asynciterable';
+import { tap, flatMap } from 'ix/asynciterable/operators';
+import 'ix/Ix.node';
 
 import {
     Table,
@@ -29,7 +31,7 @@ import {
     RecordBatchFileWriter,
     RecordBatchJSONWriter,
     RecordBatchStreamWriter,
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import {
     ArrowIOTestHelper,
@@ -48,9 +50,6 @@ import {
         return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
-    const { parse: bignumJSONParse } = require('json-bignum');
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -105,7 +104,7 @@ import {
 
             describe(`RecordBatchJSONWriter`, () => {
 
-                const toJSON = (x: any): { schema: any } => bignumJSONParse(`${Buffer.from(x)}`);
+                const toJSON = (x: any): { schema: any } => JSON.parse(`${Buffer.from(x)}`);
 
                 test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
                 test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
@@ -234,10 +233,9 @@ import {
 
             const tables = [] as Table[];
             const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
-            const stream = AsyncIterable
-                .from(generateRandomTables([10, 20, 30]))
+            const stream = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
                 .pipe(writer);
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
@@ -246,7 +244,7 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(writer.readable).toBe(false);
             expect((writer as any).destroyed).toBe(true);
         });
@@ -255,11 +253,10 @@ import {
 
             const tables = [] as Table[];
             const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
-            const stream = AsyncIterable
-                .from(generateRandomTables([10, 20, 30]))
+            const stream = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
-                .flatMap((table) => AsyncIterable.as(table.chunks))
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
+                .pipe(flatMap((table) => as(table.chunks)))
                 .pipe(writer);
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
@@ -268,7 +265,7 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(writer.readable).toBe(false);
             expect((writer as any).destroyed).toBe(true);
         });
diff --git a/js/test/unit/math-tests.ts b/js/test/unit/math-tests.ts
index 2baaa034623..7e3ffcd8ff0 100644
--- a/js/test/unit/math-tests.ts
+++ b/js/test/unit/math-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as Arrow from '../Arrow';
+import * as Arrow from 'apache-arrow';
 const { float64ToUint16, uint16ToFloat64 } = Arrow.util;
 
 describe('Float16', () => {
diff --git a/js/test/unit/recordbatch/record-batch-tests.ts b/js/test/unit/recordbatch/record-batch-tests.ts
index 7dd064a7416..520c04f84ed 100644
--- a/js/test/unit/recordbatch/record-batch-tests.ts
+++ b/js/test/unit/recordbatch/record-batch-tests.ts
@@ -19,7 +19,7 @@ import '../../jest-extensions';
 import {
     Data, RecordBatch,
     Vector, Int32Vector, Float32Vector, Float32, Int32,
-} from '../../Arrow';
+} from 'apache-arrow';
 import { arange } from '../utils';
 
 function numsRecordBatch(i32Len: number, f32Len: number) {
@@ -37,14 +37,14 @@ describe(`RecordBatch`, () => {
             const i32s = new Int32Array(arange(new Array<number>(10)));
 
             let i32 = Vector.new(Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullCount).toBe(0);
 
             const batch = RecordBatch.new([i32], ['i32']);
             i32 = batch.getChildAt(0) as Int32Vector;
 
             expect(batch.schema.fields[0].name).toBe('i32');
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullCount).toBe(0);
 
             expect(i32).toEqualVector(Int32Vector.from(i32s));
@@ -57,8 +57,8 @@ describe(`RecordBatch`, () => {
 
             let i32 = Vector.new(Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
             let f32 = Vector.new(Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(0);
 
@@ -68,8 +68,8 @@ describe(`RecordBatch`, () => {
 
             expect(batch.schema.fields[0].name).toBe('i32');
             expect(batch.schema.fields[1].name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(0);
 
@@ -85,8 +85,8 @@ describe(`RecordBatch`, () => {
             let i32 = Int32Vector.from(i32s);
             let f32 = Float32Vector.from(f32s);
 
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(0);
 
@@ -96,8 +96,8 @@ describe(`RecordBatch`, () => {
 
             expect(batch.schema.fields[0].name).toBe('0');
             expect(batch.schema.fields[1].name).toBe('1');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(i32s.length); // new length should be the same as the longest sibling
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(i32s.length - f32s.length);
 
@@ -116,7 +116,7 @@ describe(`RecordBatch`, () => {
             const batch = numsRecordBatch(32, 27);
             const i32sBatch = batch.select('i32');
             expect(i32sBatch.numCols).toBe(1);
-            expect(i32sBatch.length).toBe(32);
+            expect(i32sBatch).toHaveLength(32);
         });
     });
     describe(`selectAt()`, () => {
@@ -124,7 +124,7 @@ describe(`RecordBatch`, () => {
             const batch = numsRecordBatch(32, 45);
             const f32sBatch = batch.selectAt(1);
             expect(f32sBatch.numCols).toBe(1);
-            expect(f32sBatch.length).toBe(45);
+            expect(f32sBatch).toHaveLength(45);
         });
     });
 });
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 2740ecccea8..2f138182bbd 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -17,18 +17,15 @@
 
 import '../jest-extensions';
 import {
-    predicate,
     Data, Schema, Field, Table, RecordBatch, Column,
     Vector, Int32Vector, Float32Vector, Utf8Vector, DictionaryVector,
     Struct, Float32, Int32, Dictionary, Utf8, Int8
-} from '../Arrow';
+} from 'apache-arrow';
 import { arange } from './utils';
 
-const { col, lit, custom, and, or, And, Or } = predicate;
-
 const NAMES = ['f32', 'i32', 'dictionary'] as (keyof TestDataSchema)[];
 const F32 = 0, I32 = 1, DICT = 2;
-const test_data = [
+export const test_data = [
     {
         name: `single record batch`,
         table: getSingleRecordBatchTable,
@@ -73,7 +70,7 @@ const test_data = [
 ];
 
 function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch, table: Table) {
-    expect(batch.length).toEqual(table.length);
+    expect(batch).toHaveLength(table.length);
     expect(table.numCols).toEqual(source.numCols);
     expect(batch.numCols).toEqual(source.numCols);
     for (let i = -1, n = source.numCols; ++i < n;) {
@@ -88,13 +85,13 @@ function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch,
 
 describe(`Table`, () => {
     test(`can create an empty table`, () => {
-        expect(Table.empty().length).toEqual(0);
+        expect(Table.empty()).toHaveLength(0);
     });
     test(`Table.from([]) creates an empty table`, () => {
-        expect(Table.from([]).length).toEqual(0);
+        expect(Table.from([])).toHaveLength(0);
     });
     test(`Table.from() creates an empty table`, () => {
-        expect(Table.from().length).toEqual(0);
+        expect(Table.from()).toHaveLength(0);
     });
 
     describe(`new()`, () => {
@@ -104,9 +101,9 @@ describe(`Table`, () => {
             const table = Table.new(i32, f32);
             i32 = table.getColumn('i32')!;
             f32 = table.getColumn('f32')!;
-            expect(table.length).toBe(0);
-            expect(i32.length).toBe(0);
-            expect(f32.length).toBe(0);
+            expect(table).toHaveLength(0);
+            expect(i32).toHaveLength(0);
+            expect(f32).toHaveLength(0);
             expect(i32.toArray()).toBeInstanceOf(Int32Array);
             expect(f32.toArray()).toBeInstanceOf(Float32Array);
         });
@@ -117,7 +114,7 @@ describe(`Table`, () => {
 
             let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
             expect(i32.name).toBe('i32');
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
 
@@ -125,7 +122,7 @@ describe(`Table`, () => {
             i32 = table.getColumnAt(0)!;
 
             expect(i32.name).toBe('i32');
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
 
@@ -141,8 +138,8 @@ describe(`Table`, () => {
             let f32 = Column.new('f32', Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -154,8 +151,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -175,8 +172,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -188,8 +185,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(i32s.length); // new length should be the same as the longest sibling
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true); // true, with 12 additional nulls
             expect(i32.nullCount).toBe(0);
@@ -214,8 +211,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -227,8 +224,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32Renamed');
             expect(f32.name).toBe('f32Renamed');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(i32s.length); // new length should be the same as the longest sibling
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true); // true, with 4 additional nulls
             expect(i32.nullCount).toBe(0);
@@ -242,6 +239,22 @@ describe(`Table`, () => {
             expect(i32).toEqualVector(Int32Vector.from(i32s));
             expect(f32).toEqualVector(new Float32Vector(f32Expected));
         });
+
+        test(`creates a new Table from Typed Arrays`, () => {
+            let i32s = Int32Array.from({length: 10}, (_, i) => i);
+            let f32s = Float32Array.from({length: 10}, (_, i) => i);
+            const table = Table.new({ i32s, f32s });
+            const i32 = table.getColumn('i32s')!;
+            const f32 = table.getColumn('f32s')!;
+
+            expect(table).toHaveLength(10);
+            expect(i32).toHaveLength(10);
+            expect(f32).toHaveLength(10);
+            expect(i32.toArray()).toBeInstanceOf(Int32Array);
+            expect(f32.toArray()).toBeInstanceOf(Float32Array);
+            expect(i32.toArray()).toEqual(i32s);
+            expect(f32.toArray()).toEqual(f32s);
+        });
     });
 
     test(`Table.serialize() serializes sliced RecordBatches`, () => {
@@ -273,7 +286,7 @@ describe(`Table`, () => {
             test(`has the correct length`, () => {
                 const table = datum.table();
                 const values = datum.values();
-                expect(table.length).toEqual(values.length);
+                expect(table).toHaveLength(values.length);
             });
             test(`gets expected values`, () => {
                 const table = datum.table();
@@ -303,42 +316,6 @@ describe(`Table`, () => {
                 expect(clone).toEqualTable(table);
             });
 
-            describe(`scan()`, () => {
-                test(`yields all values`, () => {
-                    const table = datum.table();
-                    let expected_idx = 0;
-                    table.scan((idx, batch) => {
-                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                        expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
-                    });
-                });
-                test(`calls bind function with every batch`, () => {
-                    const table = datum.table();
-                    let bind = jest.fn();
-                    table.scan(() => { }, bind);
-                    for (let batch of table.chunks) {
-                        expect(bind).toHaveBeenCalledWith(batch);
-                    }
-                });
-            });
-            describe(`scanReverse()`, () => {
-                test(`yields all values`, () => {
-                    const table = datum.table();
-                    let expected_idx = values.length;
-                    table.scanReverse((idx, batch) => {
-                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                        expect(columns.map((c) => c.get(idx))).toEqual(values[--expected_idx]);
-                    });
-                });
-                test(`calls bind function with every batch`, () => {
-                    const table = datum.table();
-                    let bind = jest.fn();
-                    table.scanReverse(() => { }, bind);
-                    for (let batch of table.chunks) {
-                        expect(bind).toHaveBeenCalledWith(batch);
-                    }
-                });
-            });
             test(`count() returns the correct length`, () => {
                 const table = datum.table();
                 const values = datum.values();
@@ -350,158 +327,17 @@ describe(`Table`, () => {
                 expect(table.getColumnIndex('f32')).toEqual(F32);
                 expect(table.getColumnIndex('dictionary')).toEqual(DICT);
             });
+
             const table = datum.table();
             const values = datum.values();
-            let get_i32: (idx: number) => number, get_f32: (idx: number) => number;
-            const filter_tests = [
-                {
-                    name: `filter on f32 >= 0`,
-                    filtered: table.filter(col('f32').ge(0)),
-                    expected: values.filter((row) => row[F32] >= 0)
-                }, {
-                    name: `filter on 0 <= f32`,
-                    filtered: table.filter(lit(0).le(col('f32'))),
-                    expected: values.filter((row) => 0 <= row[F32])
-                }, {
-                    name: `filter on i32 <= 0`,
-                    filtered: table.filter(col('i32').le(0)),
-                    expected: values.filter((row) => row[I32] <= 0)
-                }, {
-                    name: `filter on 0 >= i32`,
-                    filtered: table.filter(lit(0).ge(col('i32'))),
-                    expected: values.filter((row) => 0 >= row[I32])
-                }, {
-                    name: `filter on f32 < 0`,
-                    filtered: table.filter(col('f32').lt(0)),
-                    expected: values.filter((row) => row[F32] < 0)
-                }, {
-                    name: `filter on i32 > 1 (empty)`,
-                    filtered: table.filter(col('i32').gt(0)),
-                    expected: values.filter((row) => row[I32] > 0)
-                }, {
-                    name: `filter on f32 <= -.25 || f3 >= .25`,
-                    filtered: table.filter(col('f32').le(-.25).or(col('f32').ge(.25))),
-                    expected: values.filter((row) => row[F32] <= -.25 || row[F32] >= .25)
-                }, {
-                    name: `filter on !(f32 <= -.25 || f3 >= .25) (not)`,
-                    filtered: table.filter(col('f32').le(-.25).or(col('f32').ge(.25)).not()),
-                    expected: values.filter((row) => !(row[F32] <= -.25 || row[F32] >= .25))
-                }, {
-                    name: `filter method combines predicates (f32 >= 0 && i32 <= 0)`,
-                    filtered: table.filter(col('i32').le(0)).filter(col('f32').ge(0)),
-                    expected: values.filter((row) => row[I32] <= 0 && row[F32] >= 0)
-                }, {
-                    name: `filter on dictionary == 'a'`,
-                    filtered: table.filter(col('dictionary').eq('a')),
-                    expected: values.filter((row) => row[DICT] === 'a')
-                }, {
-                    name: `filter on 'a' == dictionary (commutativity)`,
-                    filtered: table.filter(lit('a').eq(col('dictionary'))),
-                    expected: values.filter((row) => row[DICT] === 'a')
-                }, {
-                    name: `filter on dictionary != 'b'`,
-                    filtered: table.filter(col('dictionary').ne('b')),
-                    expected: values.filter((row) => row[DICT] !== 'b')
-                }, {
-                    name: `filter on f32 >= i32`,
-                    filtered: table.filter(col('f32').ge(col('i32'))),
-                    expected: values.filter((row) => row[F32] >= row[I32])
-                }, {
-                    name: `filter on f32 <= i32`,
-                    filtered: table.filter(col('f32').le(col('i32'))),
-                    expected: values.filter((row) => row[F32] <= row[I32])
-                }, {
-                    name: `filter on f32*i32 > 0 (custom predicate)`,
-                    filtered: table.filter(custom(
-                        (idx: number) => (get_f32(idx) * get_i32(idx) > 0),
-                        (batch: RecordBatch) => {
-                            get_f32 = col('f32').bind(batch);
-                            get_i32 = col('i32').bind(batch);
-                        })),
-                    expected: values.filter((row) => (row[F32] as number) * (row[I32] as number) > 0)
-                }, {
-                    name: `filter out all records`,
-                    filtered: table.filter(lit(1).eq(0)),
-                    expected: []
-                }
-            ];
-            for (let this_test of filter_tests) {
-                const { name, filtered, expected } = this_test;
-                describe(name, () => {
-                    test(`count() returns the correct length`, () => {
-                        expect(filtered.count()).toEqual(expected.length);
-                    });
-                    describe(`scan()`, () => {
-                        test(`iterates over expected values`, () => {
-                            let expected_idx = 0;
-                            filtered.scan((idx, batch) => {
-                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                                expect(columns.map((c) => c.get(idx))).toEqual(expected[expected_idx++]);
-                            });
-                        });
-                        test(`calls bind function lazily`, () => {
-                            let bind = jest.fn();
-                            filtered.scan(() => { }, bind);
-                            if (expected.length) {
-                                expect(bind).toHaveBeenCalled();
-                            } else {
-                                expect(bind).not.toHaveBeenCalled();
-                            }
-                        });
-                    });
-                    describe(`scanReverse()`, () => {
-                        test(`iterates over expected values in reverse`, () => {
-                            let expected_idx = expected.length;
-                            filtered.scanReverse((idx, batch) => {
-                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                                expect(columns.map((c) => c.get(idx))).toEqual(expected[--expected_idx]);
-                            });
-                        });
-                        test(`calls bind function lazily`, () => {
-                            let bind = jest.fn();
-                            filtered.scanReverse(() => { }, bind);
-                            if (expected.length) {
-                                expect(bind).toHaveBeenCalled();
-                            } else {
-                                expect(bind).not.toHaveBeenCalled();
-                            }
-                        });
-                    });
-                });
-            }
-            test(`countBy on dictionary returns the correct counts`, () => {
-                // Make sure countBy works both with and without the Col wrapper
-                // class
-                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
-                for (let row of values) {
-                    expected[row[DICT]] += 1;
-                }
 
-                expect(table.countBy(col('dictionary')).toJSON()).toEqual(expected);
-                expect(table.countBy('dictionary').toJSON()).toEqual(expected);
-            });
-            test(`countBy on dictionary with filter returns the correct counts`, () => {
-                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
-                for (let row of values) {
-                    if (row[I32] === 1) { expected[row[DICT]] += 1; }
-                }
-
-                expect(table.filter(col('i32').eq(1)).countBy('dictionary').toJSON()).toEqual(expected);
-            });
-            test(`countBy on non dictionary column throws error`, () => {
-                expect(() => { table.countBy('i32'); }).toThrow();
-                expect(() => { table.filter(col('dict').eq('a')).countBy('i32'); }).toThrow();
-            });
-            test(`countBy on non-existent column throws error`, () => {
-                expect(() => { table.countBy('FAKE' as any); }).toThrow();
-            });
             test(`table.select() basic tests`, () => {
                 let selected = table.select('f32', 'dictionary');
-                expect(selected.schema.fields.length).toEqual(2);
+                expect(selected.schema.fields).toHaveLength(2);
                 expect(selected.schema.fields[0]).toEqual(table.schema.fields[0]);
                 expect(selected.schema.fields[1]).toEqual(table.schema.fields[2]);
 
-                expect(selected.length).toEqual(values.length);
+                expect(selected).toHaveLength(values.length);
                 let idx = 0, expected_row;
                 for (let row of selected) {
                     expected_row = values[idx++];
@@ -509,65 +345,11 @@ describe(`Table`, () => {
                     expect(row.dictionary).toEqual(expected_row[DICT]);
                 }
             });
-            // test(`table.toString()`, () => {
-            //     let selected = table.select('i32', 'dictionary');
-            //     let headers = [`"row_id"`, `"i32: Int32"`, `"dictionary: Dictionary<Int8, Utf8>"`];
-            //     let expected = [headers.join(' | '), ...values.map((row, idx) => {
-            //         return [`${idx}`, `${row[I32]}`, `"${row[DICT]}"`].map((str, col) => {
-            //             return leftPad(str, ' ', headers[col].length);
-            //         }).join(' | ');
-            //     })].join('\n') + '\n';
-            //     expect(selected.toString()).toEqual(expected);
-            // });
-            test(`table.filter(..).count() on always false predicates returns 0`, () => {
-                expect(table.filter(col('i32').ge(100)).count()).toEqual(0);
-                expect(table.filter(col('dictionary').eq('z')).count()).toEqual(0);
-            });
-            describe(`lit-lit comparison`, () => {
-                test(`always-false count() returns 0`, () => {
-                    expect(table.filter(lit('abc').eq('def')).count()).toEqual(0);
-                    expect(table.filter(lit(0).ge(1)).count()).toEqual(0);
-                });
-                test(`always-true count() returns length`, () => {
-                    expect(table.filter(lit('abc').eq('abc')).count()).toEqual(table.length);
-                    expect(table.filter(lit(-100).le(0)).count()).toEqual(table.length);
-                });
-            });
-            describe(`col-col comparison`, () => {
-                test(`always-false count() returns 0`, () => {
-                    expect(table.filter(col('dictionary').eq(col('i32'))).count()).toEqual(0);
-                });
-                test(`always-true count() returns length`, () => {
-                    expect(table.filter(col('dictionary').eq(col('dictionary'))).count()).toEqual(table.length);
-                });
-            });
         });
     }
 });
 
-describe(`Predicate`, () => {
-    const p1 = col('a').gt(100);
-    const p2 = col('a').lt(1000);
-    const p3 = col('b').eq('foo');
-    const p4 = col('c').eq('bar');
-    const expected = [p1, p2, p3, p4];
-    test(`and flattens children`, () => {
-        expect(and(p1, p2, p3, p4).children).toEqual(expected);
-        expect(and(p1.and(p2), new And(p3, p4)).children).toEqual(expected);
-        expect(and(p1.and(p2, p3, p4)).children).toEqual(expected);
-    });
-    test(`or flattens children`, () => {
-        expect(or(p1, p2, p3, p4).children).toEqual(expected);
-        expect(or(p1.or(p2), new Or(p3, p4)).children).toEqual(expected);
-        expect(or(p1.or(p2, p3, p4)).children).toEqual(expected);
-    });
-});
-
-// function leftPad(str: string, fill: string, n: number) {
-//     return (new Array(n + 1).join(fill) + str).slice(-1 * n);
-// }
-
-type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8>; };
+type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8> };
 
 function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: number[]) {
 
@@ -578,7 +360,7 @@ function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: n
     return [Vector.new(f32Data), Vector.new(i32Data), DictionaryVector.from(values, new Int8(), dictIndices)];
 }
 
-export function getSingleRecordBatchTable() {
+function getSingleRecordBatchTable() {
     const vectors = getTestVectors(
         [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3],
         [-1, 1, -1, 1, -1, 1, -1],
diff --git a/js/test/unit/table/assign-tests.ts b/js/test/unit/table/assign-tests.ts
index 84d8a8582cf..fa1dacbc638 100644
--- a/js/test/unit/table/assign-tests.ts
+++ b/js/test/unit/table/assign-tests.ts
@@ -15,13 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable jest/no-standalone-expect */
+
 import '../../jest-extensions';
 import { zip } from 'ix/iterable';
 import * as generate from '../../generate-test-data';
 import { validateTable } from '../generated-data-validators';
 import {
     Schema, Field, DataType, Int32, Float32, Utf8
-} from '../../Arrow';
+} from 'apache-arrow';
 
 const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x)));
 const schema1             = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Utf8()]);
@@ -43,6 +45,7 @@ describe('Table.assign()', () => {
         const table = lhs.table.assign(rhs.table);
         const f = assignGeneratedTables(lhs, rhs);
         expect(table.schema.fields.map((f) => f.name)).toEqual(['a', 'b', 'c', 'f']);
+        // eslint-disable-next-line no-sparse-arrays
         validateTable({ ...f([ , , 2], [0,1,3]), table }).run();
     });
     describe(`should assign completely-overlapping fields`, () => {
@@ -51,6 +54,7 @@ describe('Table.assign()', () => {
         const table = lhs.table.assign(rhs.table);
         const f = assignGeneratedTables(lhs, rhs);
         expect(table.schema.fields.map((f) => f.name)).toEqual(['d', 'e', 'f']);
+        // eslint-disable-next-line no-sparse-arrays
         validateTable({ ...f([ , , ], [0,1,2]), table }).run();
     });
 });
diff --git a/js/test/unit/table/serialize-tests.ts b/js/test/unit/table/serialize-tests.ts
index 9dce2f5c62a..5eb21176362 100644
--- a/js/test/unit/table/serialize-tests.ts
+++ b/js/test/unit/table/serialize-tests.ts
@@ -19,7 +19,7 @@ import '../../jest-extensions';
 import * as generate from '../../generate-test-data';
 import {
     Table, Schema, Field, DataType, Dictionary, Int32, Float32, Utf8, Null, Int32Vector
-} from '../../Arrow';
+} from 'apache-arrow';
 
 const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x)));
 const schema1 = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Dictionary(new Utf8(), new Int32())]);
@@ -48,7 +48,7 @@ describe('Table#serialize()', () => {
     test(`Table#empty round-trips through serialization`, () => {
         const source = Table.empty();
         source.schema.metadata.set('foo', 'bar');
-        expect(source.length).toBe(0);
+        expect(source).toHaveLength(0);
         expect(source.numCols).toBe(0);
         const result = Table.from(source.serialize());
         expect(result).toEqualTable(source);
@@ -57,7 +57,7 @@ describe('Table#serialize()', () => {
 
     test(`Schema metadata round-trips through serialization`, () => {
         const source = createTable(schema1, [20]);
-        expect(source.length).toBe(20);
+        expect(source).toHaveLength(20);
         expect(source.numCols).toBe(3);
         const result = Table.from(source.serialize());
         expect(result).toEqualTable(source);
@@ -68,7 +68,7 @@ describe('Table#serialize()', () => {
         const table1 = new Table(nullSchema);
         const table2 = Table.empty();
         const source = table1.assign(table2);
-        expect(source.length).toBe(0);
+        expect(source).toHaveLength(0);
         expect(source.numCols).toBe(1);
         const result = Table.from(source.serialize());
         expect(result).toEqualTable(source);
@@ -102,7 +102,7 @@ describe('Table#serialize()', () => {
             const table1 = table(schema1);
             const source = table1.assign(Table.empty());
             expect(source.numCols).toBe(table1.numCols);
-            expect(source.length).toBe(table1.length);
+            expect(source).toHaveLength(table1.length);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
             expect(result.schema.metadata.get('foo')).toEqual('bar');
@@ -111,7 +111,7 @@ describe('Table#serialize()', () => {
             const table1 = new Table(nullSchema);
             const table2 = table(schema1);
             const source = table1.assign(table2);
-            expect(source.length).toBe(table2.length);
+            expect(source).toHaveLength(table2.length);
             expect(source.numCols).toBe(4);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
@@ -122,7 +122,7 @@ describe('Table#serialize()', () => {
             const table2 = createTable(schema2, [102, 4, 10, 97, 10, 2, 4]);
             const source = table1.assign(table2);
             expect(source.numCols).toBe(6);
-            expect(source.length).toBe(Math.max(table1.length, table2.length));
+            expect(source).toHaveLength(Math.max(table1.length, table2.length));
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
             expect(result.schema.metadata.get('foo')).toEqual('bar');
@@ -142,7 +142,7 @@ describe('Table#serialize()', () => {
             const [begin, end] = [length * .25, length * .75].map((x) => x | 0);
             const source = table1.slice(begin, end);
             expect(source.numCols).toBe(3);
-            expect(source.length).toBe(end - begin);
+            expect(source).toHaveLength(end - begin);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
             expect(result.schema.metadata.get('foo')).toEqual('bar');
@@ -155,9 +155,9 @@ describe('Table#serialize()', () => {
             const slice1 = table1.slice(begin1, end1);
             const slice2 = table1.slice(begin2, end2);
             const source = slice1.concat(slice2);
-            expect(slice1.length).toBe(end1 - begin1);
-            expect(slice2.length).toBe(end2 - begin2);
-            expect(source.length).toBe((end1 - begin1) + (end2 - begin2));
+            expect(slice1).toHaveLength(end1 - begin1);
+            expect(slice2).toHaveLength(end2 - begin2);
+            expect(source).toHaveLength((end1 - begin1) + (end2 - begin2));
             [slice1, slice2, source].forEach((x) => expect(x.numCols).toBe(3));
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
diff --git a/js/test/unit/utils-tests.ts b/js/test/unit/utils-tests.ts
new file mode 100644
index 00000000000..985bec7aab4
--- /dev/null
+++ b/js/test/unit/utils-tests.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { isTypedArray } from 'apache-arrow';
+
+describe('isTypedArray', () => {
+    test('works for typed arrays', () => {
+        expect(isTypedArray(new Int8Array())).toBeTruthy();
+        expect(isTypedArray(new Int32Array())).toBeTruthy();
+        expect(isTypedArray(new BigInt64Array())).toBeTruthy();
+    });
+
+    test('does not recognize arrays, buffers, or data views', () => {
+        expect(isTypedArray(new Array([1, 2, 3]))).toBeFalsy();
+        expect(isTypedArray(new ArrayBuffer(10))).toBeFalsy();
+        expect(isTypedArray(new DataView(new ArrayBuffer(10)))).toBeFalsy();
+    });
+});
diff --git a/js/test/unit/utils.ts b/js/test/unit/utils.ts
index 7338f712609..c57de487f9e 100644
--- a/js/test/unit/utils.ts
+++ b/js/test/unit/utils.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-export function arange<T extends { length: number; [n: number]: number; }>(arr: T, n = arr.length) {
+export function arange<T extends { length: number; [n: number]: number }>(arr: T, n = arr.length) {
     for (let i = -1; ++i < n; arr[i] = i) { }
     return arr;
 }
diff --git a/js/test/unit/vector/bool-vector-tests.ts b/js/test/unit/vector/bool-vector-tests.ts
index 1d59a3c975c..41c53da6075 100644
--- a/js/test/unit/vector/bool-vector-tests.ts
+++ b/js/test/unit/vector/bool-vector-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data, Bool, Vector, BoolVector } from '../../Arrow';
+import { Data, Bool, Vector, BoolVector } from 'apache-arrow';
 
 const newBoolVector = (length: number, data: Uint8Array) => Vector.new(Data.Bool(new Bool(), 0, length, 0, null, data));
 
diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts
index 4f41d4f8a05..4658633ba74 100644
--- a/js/test/unit/vector/date-vector-tests.ts
+++ b/js/test/unit/vector/date-vector-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Table, DateDay, DateMillisecond } from '../../Arrow';
+import { Table, DateDay, DateMillisecond } from 'apache-arrow';
 
 describe(`DateVector`, () => {
     it('returns days since the epoch as correct JS Dates', () => {
diff --git a/js/test/unit/vector/numeric-vector-tests.ts b/js/test/unit/vector/numeric-vector-tests.ts
index 432efaadffa..61418c431f3 100644
--- a/js/test/unit/vector/numeric-vector-tests.ts
+++ b/js/test/unit/vector/numeric-vector-tests.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable jest/no-identical-title */
+
 import {
     util,
     Data, Vector,
@@ -23,12 +25,12 @@ import {
     FloatVector, Float16Vector, Float32Vector, Float64Vector,
     IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector,
     Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 const { float64ToUint16, uint16ToFloat64 } = util;
-import { VectorType as V } from '../../../src/interfaces';
-import { TypedArray, TypedArrayConstructor } from '../../../src/interfaces';
-import { BigIntArray, BigIntArrayConstructor } from '../../../src/interfaces';
+import { VectorType as V } from 'apache-arrow/interfaces';
+import { TypedArray, TypedArrayConstructor } from 'apache-arrow/interfaces';
+import { BigIntArray, BigIntArrayConstructor } from 'apache-arrow/interfaces';
 
 const { joinUint8Arrays, BN } = util;
 const uint16ToFloat64Array = (b: ArrayBuffer) => new Float64Array([...new Uint16Array(b)].map(uint16ToFloat64));
@@ -359,21 +361,25 @@ function testIntVector<T extends Int>(DataType: new () => T, values?: Array<any>
     combos.forEach(([chunksType, vector]) => {
         describe(chunksType, () => {
             // test base case no slicing
-            describe(`base case no slicing`, () => testAndValidateVector(vector, typed, jsArray));
+            describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); });
             // test slicing without args
-            describe(`slicing without args`, () => testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()));
+            describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); });
             // test slicing the middle half
-            describe(`slice the middle half`, () => testAndValidateVector(
-                vector.slice(vectorBegin, vectorEnd),
-                typed.slice(typedBegin, typedEnd),
-                jsArray.slice(jsArrayBegin, jsArrayEnd)
-            ));
+            describe(`slice the middle half`, () => {
+                testAndValidateVector(
+                    vector.slice(vectorBegin, vectorEnd),
+                    typed.slice(typedBegin, typedEnd),
+                    jsArray.slice(jsArrayBegin, jsArrayEnd)
+                );
+            });
             // test splicing out the middle half
-            describe(`splicing out the middle half`, () => testAndValidateVector(
-                vector.slice(0, vectorBegin).concat(vector.slice(vectorEnd)),
-                new ArrayType([...typed.slice(0, typedBegin), ...typed.slice(typedEnd)]),
-                [...jsArray.slice(0, jsArrayBegin), ...jsArray.slice(jsArrayEnd)]
-            ));
+            describe(`splicing out the middle half`, () => {
+                testAndValidateVector(
+                    vector.slice(0, vectorBegin).concat(vector.slice(vectorEnd)),
+                    new ArrayType([...typed.slice(0, typedBegin), ...typed.slice(typedEnd)]),
+                    [...jsArray.slice(0, jsArrayBegin), ...jsArray.slice(jsArrayEnd)]
+                );
+            });
         });
     });
 }
@@ -397,21 +403,25 @@ function testFloatVector<T extends Float>(DataType: new () => T, values?: Array<
     combos.forEach(([chunksType, vector]) => {
         describe(chunksType, () => {
             // test base case no slicing
-            describe(`base case no slicing`, () => testAndValidateVector(vector, typed, jsArray));
+            describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); });
             // test slicing without args
-            describe(`slicing without args`, () => testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()));
+            describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); });
             // test slicing the middle half
-            describe(`slice the middle half`, () => testAndValidateVector(
-                vector.slice(begin, end),
-                typed.slice(begin, end),
-                jsArray.slice(begin, end)
-            ));
+            describe(`slice the middle half`, () => {
+                    testAndValidateVector(
+                    vector.slice(begin, end),
+                    typed.slice(begin, end),
+                    jsArray.slice(begin, end)
+                );
+            });
             // test splicing out the middle half
-            describe(`splicing out the middle half`, () => testAndValidateVector(
-                vector.slice(0, begin).concat(vector.slice(end)),
-                new ArrayType([...typed.slice(0, begin), ...typed.slice(end)]),
-                [...jsArray.slice(0, begin), ...jsArray.slice(end)]
-            ));
+            describe(`splicing out the middle half`, () => {
+                testAndValidateVector(
+                    vector.slice(0, begin).concat(vector.slice(end)),
+                    new ArrayType([...typed.slice(0, begin), ...typed.slice(end)]),
+                    [...jsArray.slice(0, begin), ...jsArray.slice(end)]
+                );
+            });
         });
     });
 }
diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts
index 91c402697f1..60bff94f8a1 100644
--- a/js/test/unit/vector/vector-tests.ts
+++ b/js/test/unit/vector/vector-tests.ts
@@ -18,7 +18,7 @@
 import {
     Int32, Dictionary, DateUnit, util,
     Data, Vector, Utf8Vector, DateVector, DictionaryVector,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 describe(`DateVector`, () => {
     const extras = [
diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts
index 9a6bf26ffd9..22b3e5ced05 100644
--- a/js/test/unit/visitor-tests.ts
+++ b/js/test/unit/visitor-tests.ts
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Field } from '../Arrow';
-import { Visitor } from '../Arrow';
+import { Field } from 'apache-arrow';
+import { Visitor } from 'apache-arrow';
 import {
     DataType, Dictionary,
     Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
@@ -27,7 +27,7 @@ import {
     Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
     Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
     Union, DenseUnion, SparseUnion,
-} from '../Arrow';
+} from 'apache-arrow';
 
 class BasicVisitor extends Visitor {
     public type: DataType | undefined;
@@ -111,7 +111,7 @@ describe('Visitor', () => {
         test(`visits Dictionary types`, () => validateBasicVisitor(new Dictionary(null as any, null as any)));
         test(`visits Interval types`, () => validateBasicVisitor(new Interval(0)));
         test(`visits FixedSizeList types`, () => validateBasicVisitor(new FixedSizeList(2, null as any)));
-        test(`visits Map types`, () => validateBasicVisitor(new Map_(new Field('', new Struct<{ key: Int, value: Int }>([] as any[])))));
+        test(`visits Map types`, () => validateBasicVisitor(new Map_(new Field('', new Struct<{ key: Int; value: Int }>([] as any[])))));
         function validateBasicVisitor<T extends DataType>(type: T) {
             const visitor = new BasicVisitor();
             const result = visitor.visit(type);
@@ -157,7 +157,7 @@ describe('Visitor', () => {
         test(`visits IntervalDayTime types`, () => validateFeatureVisitor(new IntervalDayTime()));
         test(`visits IntervalYearMonth types`, () => validateFeatureVisitor(new IntervalYearMonth()));
         test(`visits FixedSizeList types`, () => validateFeatureVisitor(new FixedSizeList(2, null as any)));
-        test(`visits Map types`, () => validateFeatureVisitor(new Map_(new Field('', new Struct<{ key: Int, value: Int }>([] as any[])))));
+        test(`visits Map types`, () => validateFeatureVisitor(new Map_(new Field('', new Struct<{ key: Int; value: Int }>([] as any[])))));
 
         function validateFeatureVisitor<T extends DataType>(type: T) {
             const visitor = new FeatureVisitor();
diff --git a/js/tsconfig.json b/js/tsconfig.json
index 8542ebfc3c4..c1e02ca0139 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -1,12 +1,19 @@
 {
   "extends": "./tsconfig/tsconfig.base.json",
   "formatCodeOptions": {
-    "tabSize": 4,
-    "indentSize": 4
+    "tabSize": 2,
+    "indentSize": 2
   },
   "compilerOptions": {
-    "target": "ESNEXT",
+    "target": "esnext",
     "module": "commonjs",
-    "noEmit": true
-  }
+    "noEmit": true,
+    "esModuleInterop": true,
+    "baseUrl": "./",
+    "paths": {
+      "apache-arrow": ["src/Arrow.node"],
+      "apache-arrow/*": ["src/*"]
+    }
+  },
+  "include": ["src/**/*.ts", "test/**/*.ts", "perf/**/*.ts"]
 }
diff --git a/js/tsconfig/tsconfig.base.json b/js/tsconfig/tsconfig.base.json
index 1fbd3296bf6..8ee0d98f65b 100644
--- a/js/tsconfig/tsconfig.base.json
+++ b/js/tsconfig/tsconfig.base.json
@@ -3,6 +3,10 @@
   "include": ["../src/**/*.ts"],
   "compileOnSave": false,
   "compilerOptions": {
+    "baseUrl": "./",
+    "paths": {
+      "apache-arrow/*": ["src/*"]
+    },
 
     /* Basic stuff */
     "moduleResolution": "node",
@@ -10,6 +14,7 @@
 
     /* Control what is emitted */
     "declaration": true,
+    "declarationMap": true,
     "noEmitOnError": true,
     "removeComments": false,
     "noErrorTruncation": true,
@@ -40,6 +45,6 @@
     "allowUnreachableCode": false,
     "noStrictGenericChecks": false,
     "noFallthroughCasesInSwitch": true,
-    "forceConsistentCasingInFileNames": true,
+    "forceConsistentCasingInFileNames": true
   }
 }
diff --git a/js/tsconfig/tsconfig.bin.cjs.json b/js/tsconfig/tsconfig.bin.cjs.json
index 5c0139bf41b..e9671810a7b 100644
--- a/js/tsconfig/tsconfig.bin.cjs.json
+++ b/js/tsconfig/tsconfig.bin.cjs.json
@@ -1,11 +1,12 @@
 // Compiler configuration to build the ES5 CommonJS bin files
 {
-    "extends": "./tsconfig.base.json",
-    "exclude": ["../node_modules"],
-    "include": ["../src/bin/*.ts"],
-      "compilerOptions": {
-      "target": "esnext",
-      "module": "commonjs",
-      "declaration": false
-    }
+  "extends": "./tsconfig.base.json",
+  "exclude": ["../node_modules"],
+  "include": ["../src/bin/*.ts"],
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs",
+    "declaration": false,
+    "declarationMap": false
   }
+}
diff --git a/js/tsconfig/tsconfig.docs.json b/js/tsconfig/tsconfig.docs.json
new file mode 100644
index 00000000000..722838f5ba3
--- /dev/null
+++ b/js/tsconfig/tsconfig.docs.json
@@ -0,0 +1,8 @@
+// Compiler configuration to build the docs
+{
+  "extends": "./tsconfig.base.json",
+  "include": ["../src/**/*.ts"],
+  "compilerOptions": {
+    "target": "esnext"
+  }
+}
diff --git a/js/tsconfig/tsconfig.es2015.cjs.json b/js/tsconfig/tsconfig.es2015.cjs.json
index 6c7df20fb9e..92f05dd1ccd 100644
--- a/js/tsconfig/tsconfig.es2015.cjs.json
+++ b/js/tsconfig/tsconfig.es2015.cjs.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES2015",
+    "target": "es2015",
     "module": "commonjs"
   }
 }
diff --git a/js/tsconfig/tsconfig.es2015.cls.json b/js/tsconfig/tsconfig.es2015.cls.json
index ae1b25f2f36..7cc364b3658 100644
--- a/js/tsconfig/tsconfig.es2015.cls.json
+++ b/js/tsconfig/tsconfig.es2015.cls.json
@@ -2,10 +2,11 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES2015",
+    "target": "esnext",
     "module": "es2015",
     "declaration": false,
+    "declarationMap": false,
     "noEmitHelpers": true,
-    "importHelpers": true
+    "importHelpers": false
   }
 }
diff --git a/js/tsconfig/tsconfig.es2015.esm.json b/js/tsconfig/tsconfig.es2015.esm.json
index 17636623d4c..c56b972633d 100644
--- a/js/tsconfig/tsconfig.es2015.esm.json
+++ b/js/tsconfig/tsconfig.es2015.esm.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES2015",
+    "target": "es2015",
     "module": "es2015"
   }
 }
diff --git a/js/tsconfig/tsconfig.es5.cjs.json b/js/tsconfig/tsconfig.es5.cjs.json
index c6f329a397f..7c149d39de2 100644
--- a/js/tsconfig/tsconfig.es5.cjs.json
+++ b/js/tsconfig/tsconfig.es5.cjs.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES5",
+    "target": "es5",
     "module": "commonjs"
   }
 }
diff --git a/js/tsconfig/tsconfig.es5.cls.json b/js/tsconfig/tsconfig.es5.cls.json
index 2c379b84759..a03808d365c 100644
--- a/js/tsconfig/tsconfig.es5.cls.json
+++ b/js/tsconfig/tsconfig.es5.cls.json
@@ -5,6 +5,7 @@
     "target": "esnext",
     "module": "es2015",
     "declaration": false,
+    "declarationMap": false,
     "noEmitHelpers": true,
     "importHelpers": false
   }
diff --git a/js/tsconfig/tsconfig.es5.esm.json b/js/tsconfig/tsconfig.es5.esm.json
index 87a2455bef1..782c303e631 100644
--- a/js/tsconfig/tsconfig.es5.esm.json
+++ b/js/tsconfig/tsconfig.es5.esm.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES5",
+    "target": "es5",
     "module": "es2015"
   }
 }
diff --git a/js/tsconfig/tsconfig.esnext.cjs.json b/js/tsconfig/tsconfig.esnext.cjs.json
index 6b1fae47126..fb0d2eb1171 100644
--- a/js/tsconfig/tsconfig.esnext.cjs.json
+++ b/js/tsconfig/tsconfig.esnext.cjs.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ESNEXT",
+    "target": "esnext",
     "module": "commonjs"
   }
 }
diff --git a/js/tsconfig/tsconfig.esnext.cls.json b/js/tsconfig/tsconfig.esnext.cls.json
index 3c68218d81f..dc35c3f8837 100644
--- a/js/tsconfig/tsconfig.esnext.cls.json
+++ b/js/tsconfig/tsconfig.esnext.cls.json
@@ -2,10 +2,11 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ESNEXT",
+    "target": "esnext",
     "module": "es2015",
     "declaration": false,
+    "declarationMap": false,
     "noEmitHelpers": true,
-    "importHelpers": true
+    "importHelpers": false
   }
 }
diff --git a/js/tsconfig/tsconfig.esnext.esm.json b/js/tsconfig/tsconfig.esnext.esm.json
index 4c70161f882..6701c8e13c1 100644
--- a/js/tsconfig/tsconfig.esnext.esm.json
+++ b/js/tsconfig/tsconfig.esnext.esm.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ESNEXT",
+    "target": "esnext",
     "module": "es2015"
   }
 }
diff --git a/js/tslint.json b/js/tslint.json
deleted file mode 100644
index 705ef8cb744..00000000000
--- a/js/tslint.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "rules": {
-    "curly": true,
-    "eofline": false,
-    "align": [true, "parameters"],
-    "class-name": true,
-    "indent": [true, "spaces"],
-    "max-line-length": [false, 150],
-    "no-consecutive-blank-lines": [true],
-    "no-trailing-whitespace": true,
-    "no-duplicate-variable": true,
-    "no-var-keyword": true,
-    "no-empty": false,
-    "no-unused-expression": false,
-    "no-use-before-declare": false,
-    "no-var-requires": true,
-    "no-require-imports": true,
-    "one-line": [true,
-      "check-else",
-      "check-whitespace",
-      "check-open-brace"],
-    "quotemark": [true,
-      "single",
-      "avoid-escape"],
-    "semicolon": [true, "always"],
-    "typedef-whitespace": [true, {
-      "call-signature": "nospace",
-      "index-signature": "nospace",
-      "parameter": "nospace",
-      "property-declaration": "nospace",
-      "variable-declaration": "nospace"
-    }],
-    "whitespace": [true,
-      "check-branch",
-      "check-decl",
-      "check-operator",
-      "check-type"]
-  }
-}
\ No newline at end of file
diff --git a/js/typedoc.js b/js/typedoc.js
index e246108e645..3512c01f2d3 100644
--- a/js/typedoc.js
+++ b/js/typedoc.js
@@ -19,10 +19,11 @@ module.exports = {
     entryPoints: ['src/Arrow.dom.ts', 'src/Arrow.node.ts'],
     out: 'doc',
     name: 'Apache Arrow',
-    tsconfig: 'tsconfig.json',
+    tsconfig: 'tsconfig/tsconfig.docs.json',
     excludePrivate: true,
     excludeProtected: true,
     excludeExternals: true,
+    includeVersion: true,
     exclude: [
         'src/fb/*.ts',
         'src/bin/*.ts'
diff --git a/js/yarn.lock b/js/yarn.lock
index d41dafde930..3c77aeed030 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -2,32 +2,73 @@
 # yarn lockfile v1
 
 
-"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.13.tgz#dcfc826beef65e75c50e21d3837d7d95798dd658"
-  integrity sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==
+"@arrows/array@^1.4.0":
+  version "1.4.1"
+  resolved "https://registry.yarnpkg.com/@arrows/array/-/array-1.4.1.tgz#a6580a08cee219755ca9a8eb14e956d3c29a5508"
+  integrity sha512-MGYS8xi3c4tTy1ivhrVntFvufoNzje0PchjEz6G/SsWRgUKxL4tKwS6iPdO8vsaJYldagAeWMd5KRD0aX3Q39g==
   dependencies:
-    "@babel/highlight" "^7.12.13"
+    "@arrows/composition" "^1.2.2"
 
-"@babel/compat-data@^7.13.12":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.13.15.tgz#7e8eea42d0b64fda2b375b22d06c605222e848f4"
-  integrity sha512-ltnibHKR1VnrU4ymHyQ/CXtNXI6yZC0oJThyW78Hft8XndANwi+9H+UIklBDraIjFEJzw8wmcM427oDd9KS5wA==
+"@arrows/composition@^1.0.0", "@arrows/composition@^1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@arrows/composition/-/composition-1.2.2.tgz#d0a213cac8f8c36c1c75856a1e6ed940c27e9169"
+  integrity sha512-9fh1yHwrx32lundiB3SlZ/VwuStPB4QakPsSLrGJFH6rCXvdrd060ivAZ7/2vlqPnEjBkPRRXOcG1YOu19p2GQ==
 
-"@babel/core@^7.1.0", "@babel/core@^7.7.5":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.13.15.tgz#a6d40917df027487b54312202a06812c4f7792d0"
-  integrity sha512-6GXmNYeNjS2Uz+uls5jalOemgIhnTMeaXo+yBUA72kC2uX/8VW6XyhVIo2L8/q0goKQA3EVKx0KOQpVKSeWadQ==
+"@arrows/dispatch@^1.0.2":
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/@arrows/dispatch/-/dispatch-1.0.3.tgz#c4c06260f89e9dd4ce280df3712980aa2f3de976"
+  integrity sha512-v/HwvrFonitYZM2PmBlAlCqVqxrkIIoiEuy5bQgn0BdfvlL0ooSBzcPzTMrtzY8eYktPyYcHg8fLbSgyybXEqw==
   dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.13.9"
-    "@babel/helper-compilation-targets" "^7.13.13"
-    "@babel/helper-module-transforms" "^7.13.14"
-    "@babel/helpers" "^7.13.10"
-    "@babel/parser" "^7.13.15"
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.13.15"
-    "@babel/types" "^7.13.14"
+    "@arrows/composition" "^1.2.2"
+
+"@arrows/error@^1.0.2":
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/@arrows/error/-/error-1.0.2.tgz#4e68036f901118ba6f1de88656ef6be49e650414"
+  integrity sha512-yvkiv1ay4Z3+Z6oQsUkedsQm5aFdyPpkBUQs8vejazU/RmANABx6bMMcBPPHI4aW43VPQmXFfBzr/4FExwWTEA==
+
+"@arrows/multimethod@^1.1.6":
+  version "1.1.7"
+  resolved "https://registry.yarnpkg.com/@arrows/multimethod/-/multimethod-1.1.7.tgz#bc7c26c3aa7703fc967e65da4f00718b1428eb4a"
+  integrity sha512-EjHD3XuGAV4G28rm7mu8k7zQJh/EOizh104/p9i2ofGcnL5mgKONFH/Bq6H3SJjM+WDAlKcR9WBpNhaAKCnH2g==
+  dependencies:
+    "@arrows/array" "^1.4.0"
+    "@arrows/composition" "^1.2.2"
+    "@arrows/error" "^1.0.2"
+    fast-deep-equal "^3.1.1"
+
+"@babel/code-frame@7.12.11":
+  version "7.12.11"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.11.tgz#f4ad435aa263db935b8f10f2c552d23fb716a63f"
+  integrity sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==
+  dependencies:
+    "@babel/highlight" "^7.10.4"
+
+"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13", "@babel/code-frame@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.14.5.tgz#23b08d740e83f49c5e59945fbf1b43e80bbf4edb"
+  integrity sha512-9pzDqyc6OLDaqe+zbACgFkb6fKMNG6CObKpnYXChRsvYGyEdc7CA2BaqeOM+vOtCS5ndmJicPJhKAwYRI6UfFw==
+  dependencies:
+    "@babel/highlight" "^7.14.5"
+
+"@babel/compat-data@^7.14.5":
+  version "7.14.7"
+  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.14.7.tgz#7b047d7a3a89a67d2258dc61f604f098f1bc7e08"
+  integrity sha512-nS6dZaISCXJ3+518CWiBfEr//gHyMO02uDxBkXTKZDN5POruCnOZ1N4YBRZDCabwF8nZMWBpRxIicmXtBs+fvw==
+
+"@babel/core@^7.1.0", "@babel/core@^7.7.2", "@babel/core@^7.7.5":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.14.8.tgz#20cdf7c84b5d86d83fac8710a8bc605a7ba3f010"
+  integrity sha512-/AtaeEhT6ErpDhInbXmjHcUQXH0L0TEgscfcxk1qbOvLuKCa5aZT0SOOtDKFY96/CLROwbLSKyFor6idgNaU4Q==
+  dependencies:
+    "@babel/code-frame" "^7.14.5"
+    "@babel/generator" "^7.14.8"
+    "@babel/helper-compilation-targets" "^7.14.5"
+    "@babel/helper-module-transforms" "^7.14.8"
+    "@babel/helpers" "^7.14.8"
+    "@babel/parser" "^7.14.8"
+    "@babel/template" "^7.14.5"
+    "@babel/traverse" "^7.14.8"
+    "@babel/types" "^7.14.8"
     convert-source-map "^1.7.0"
     debug "^4.1.0"
     gensync "^1.0.0-beta.2"
@@ -35,137 +76,144 @@
     semver "^6.3.0"
     source-map "^0.5.0"
 
-"@babel/generator@^7.13.9":
-  version "7.13.9"
-  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.13.9.tgz#3a7aa96f9efb8e2be42d38d80e2ceb4c64d8de39"
-  integrity sha512-mHOOmY0Axl/JCTkxTU6Lf5sWOg/v8nUa+Xkt4zMTftX0wqmb6Sh7J8gvcehBw7q0AhrhAR+FDacKjCZ2X8K+Sw==
+"@babel/generator@^7.14.8", "@babel/generator@^7.7.2":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.14.8.tgz#bf86fd6af96cf3b74395a8ca409515f89423e070"
+  integrity sha512-cYDUpvIzhBVnMzRoY1fkSEhK/HmwEVwlyULYgn/tMQYd6Obag3ylCjONle3gdErfXBW61SVTlR9QR7uWlgeIkg==
   dependencies:
-    "@babel/types" "^7.13.0"
+    "@babel/types" "^7.14.8"
     jsesc "^2.5.1"
     source-map "^0.5.0"
 
-"@babel/helper-compilation-targets@^7.13.13":
-  version "7.13.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.13.tgz#2b2972a0926474853f41e4adbc69338f520600e5"
-  integrity sha512-q1kcdHNZehBwD9jYPh3WyXcsFERi39X4I59I3NadciWtNDyZ6x+GboOxncFK0kXlKIv6BJm5acncehXWUjWQMQ==
+"@babel/helper-compilation-targets@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.14.5.tgz#7a99c5d0967911e972fe2c3411f7d5b498498ecf"
+  integrity sha512-v+QtZqXEiOnpO6EYvlImB6zCD2Lel06RzOPzmkz/D/XgQiUu3C/Jb1LOqSt/AIA34TYi/Q+KlT8vTQrgdxkbLw==
   dependencies:
-    "@babel/compat-data" "^7.13.12"
-    "@babel/helper-validator-option" "^7.12.17"
-    browserslist "^4.14.5"
+    "@babel/compat-data" "^7.14.5"
+    "@babel/helper-validator-option" "^7.14.5"
+    browserslist "^4.16.6"
     semver "^6.3.0"
 
-"@babel/helper-function-name@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.12.13.tgz#93ad656db3c3c2232559fd7b2c3dbdcbe0eb377a"
-  integrity sha512-TZvmPn0UOqmvi5G4vvw0qZTpVptGkB1GL61R6lKvrSdIxGm5Pky7Q3fpKiIkQCAtRCBUwB0PaThlx9vebCDSwA==
-  dependencies:
-    "@babel/helper-get-function-arity" "^7.12.13"
-    "@babel/template" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-get-function-arity@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz#bc63451d403a3b3082b97e1d8b3fe5bd4091e583"
-  integrity sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-member-expression-to-functions@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.13.12.tgz#dfe368f26d426a07299d8d6513821768216e6d72"
-  integrity sha512-48ql1CLL59aKbU94Y88Xgb2VFy7a95ykGRbJJaaVv+LX5U8wFpLfiGXJJGUozsmA1oEh/o5Bp60Voq7ACyA/Sw==
-  dependencies:
-    "@babel/types" "^7.13.12"
-
-"@babel/helper-module-imports@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.13.12.tgz#c6a369a6f3621cb25da014078684da9196b61977"
-  integrity sha512-4cVvR2/1B693IuOvSI20xqqa/+bl7lqAMR59R4iu39R9aOX8/JoYY1sFaNvUMyMBGnHdwvJgUrzNLoUZxXypxA==
-  dependencies:
-    "@babel/types" "^7.13.12"
-
-"@babel/helper-module-transforms@^7.13.14":
-  version "7.13.14"
-  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.13.14.tgz#e600652ba48ccb1641775413cb32cfa4e8b495ef"
-  integrity sha512-QuU/OJ0iAOSIatyVZmfqB0lbkVP0kDRiKj34xy+QNsnVZi/PA6BoSoreeqnxxa9EHFAIL0R9XOaAR/G9WlIy5g==
-  dependencies:
-    "@babel/helper-module-imports" "^7.13.12"
-    "@babel/helper-replace-supers" "^7.13.12"
-    "@babel/helper-simple-access" "^7.13.12"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/helper-validator-identifier" "^7.12.11"
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.13.13"
-    "@babel/types" "^7.13.14"
-
-"@babel/helper-optimise-call-expression@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz#5c02d171b4c8615b1e7163f888c1c81c30a2aaea"
-  integrity sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.8.0":
-  version "7.13.0"
-  resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.13.0.tgz#806526ce125aed03373bc416a828321e3a6a33af"
-  integrity sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==
-
-"@babel/helper-replace-supers@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.13.12.tgz#6442f4c1ad912502481a564a7386de0c77ff3804"
-  integrity sha512-Gz1eiX+4yDO8mT+heB94aLVNCL+rbuT2xy4YfyNqu8F+OI6vMvJK891qGBTqL9Uc8wxEvRW92Id6G7sDen3fFw==
-  dependencies:
-    "@babel/helper-member-expression-to-functions" "^7.13.12"
-    "@babel/helper-optimise-call-expression" "^7.12.13"
-    "@babel/traverse" "^7.13.0"
-    "@babel/types" "^7.13.12"
-
-"@babel/helper-simple-access@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.13.12.tgz#dd6c538afb61819d205a012c31792a39c7a5eaf6"
-  integrity sha512-7FEjbrx5SL9cWvXioDbnlYTppcZGuCY6ow3/D5vMggb2Ywgu4dMrpTJX0JdQAIcRRUElOIxF3yEooa9gUb9ZbA==
-  dependencies:
-    "@babel/types" "^7.13.12"
-
-"@babel/helper-split-export-declaration@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz#e9430be00baf3e88b0e13e6f9d4eaf2136372b05"
-  integrity sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-validator-identifier@^7.12.11":
-  version "7.12.11"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.12.11.tgz#c9a1f021917dcb5ccf0d4e453e399022981fc9ed"
-  integrity sha512-np/lG3uARFybkoHokJUmf1QfEvRVCPbmQeUQpKow5cQ3xWrV9i3rUHodKDJPQfTVX61qKi+UdYk8kik84n7XOw==
-
-"@babel/helper-validator-option@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz#d1fbf012e1a79b7eebbfdc6d270baaf8d9eb9831"
-  integrity sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==
-
-"@babel/helpers@^7.13.10":
-  version "7.13.10"
-  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.13.10.tgz#fd8e2ba7488533cdeac45cc158e9ebca5e3c7df8"
-  integrity sha512-4VO883+MWPDUVRF3PhiLBUFHoX/bsLTGFpFK/HqvvfBZz2D57u9XzPVNFVBTc0PW/CWR9BXTOKt8NF4DInUHcQ==
-  dependencies:
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.13.0"
-    "@babel/types" "^7.13.0"
-
-"@babel/highlight@^7.12.13":
-  version "7.13.10"
-  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.13.10.tgz#a8b2a66148f5b27d666b15d81774347a731d52d1"
-  integrity sha512-5aPpe5XQPzflQrFwL1/QoeHkP2MsA4JCntcXHRhEsdsfPVkvPi2w7Qix4iV7t5S/oC9OodGrggd8aco1g3SZFg==
-  dependencies:
-    "@babel/helper-validator-identifier" "^7.12.11"
+"@babel/helper-function-name@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.14.5.tgz#89e2c474972f15d8e233b52ee8c480e2cfcd50c4"
+  integrity sha512-Gjna0AsXWfFvrAuX+VKcN/aNNWonizBj39yGwUzVDVTlMYJMK2Wp6xdpy72mfArFq5uK+NOuexfzZlzI1z9+AQ==
+  dependencies:
+    "@babel/helper-get-function-arity" "^7.14.5"
+    "@babel/template" "^7.14.5"
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-get-function-arity@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-get-function-arity/-/helper-get-function-arity-7.14.5.tgz#25fbfa579b0937eee1f3b805ece4ce398c431815"
+  integrity sha512-I1Db4Shst5lewOM4V+ZKJzQ0JGGaZ6VY1jYvMghRjqs6DWgxLCIyFt30GlnKkfUeFLpJt2vzbMVEXVSXlIFYUg==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-hoist-variables@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.14.5.tgz#e0dd27c33a78e577d7c8884916a3e7ef1f7c7f8d"
+  integrity sha512-R1PXiz31Uc0Vxy4OEOm07x0oSjKAdPPCh3tPivn/Eo8cvz6gveAeuyUUPB21Hoiif0uoPQSSdhIPS3352nvdyQ==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-member-expression-to-functions@^7.14.5":
+  version "7.14.7"
+  resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.14.7.tgz#97e56244beb94211fe277bd818e3a329c66f7970"
+  integrity sha512-TMUt4xKxJn6ccjcOW7c4hlwyJArizskAhoSTOCkA0uZ+KghIaci0Qg9R043kUMWI9mtQfgny+NQ5QATnZ+paaA==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-module-imports@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.14.5.tgz#6d1a44df6a38c957aa7c312da076429f11b422f3"
+  integrity sha512-SwrNHu5QWS84XlHwGYPDtCxcA0hrSlL2yhWYLgeOc0w7ccOl2qv4s/nARI0aYZW+bSwAL5CukeXA47B/1NKcnQ==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-module-transforms@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.14.8.tgz#d4279f7e3fd5f4d5d342d833af36d4dd87d7dc49"
+  integrity sha512-RyE+NFOjXn5A9YU1dkpeBaduagTlZ0+fccnIcAGbv1KGUlReBj7utF7oEth8IdIBQPcux0DDgW5MFBH2xu9KcA==
+  dependencies:
+    "@babel/helper-module-imports" "^7.14.5"
+    "@babel/helper-replace-supers" "^7.14.5"
+    "@babel/helper-simple-access" "^7.14.8"
+    "@babel/helper-split-export-declaration" "^7.14.5"
+    "@babel/helper-validator-identifier" "^7.14.8"
+    "@babel/template" "^7.14.5"
+    "@babel/traverse" "^7.14.8"
+    "@babel/types" "^7.14.8"
+
+"@babel/helper-optimise-call-expression@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.14.5.tgz#f27395a8619e0665b3f0364cddb41c25d71b499c"
+  integrity sha512-IqiLIrODUOdnPU9/F8ib1Fx2ohlgDhxnIDU7OEVi+kAbEZcyiF7BLU8W6PfvPi9LzztjS7kcbzbmL7oG8kD6VA==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.14.5", "@babel/helper-plugin-utils@^7.8.0":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.14.5.tgz#5ac822ce97eec46741ab70a517971e443a70c5a9"
+  integrity sha512-/37qQCE3K0vvZKwoK4XU/irIJQdIfCJuhU5eKnNxpFDsOkgFaUAwbv+RYw6eYgsC0E4hS7r5KqGULUogqui0fQ==
+
+"@babel/helper-replace-supers@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.14.5.tgz#0ecc0b03c41cd567b4024ea016134c28414abb94"
+  integrity sha512-3i1Qe9/8x/hCHINujn+iuHy+mMRLoc77b2nI9TB0zjH1hvn9qGlXjWlggdwUcju36PkPCy/lpM7LLUdcTyH4Ow==
+  dependencies:
+    "@babel/helper-member-expression-to-functions" "^7.14.5"
+    "@babel/helper-optimise-call-expression" "^7.14.5"
+    "@babel/traverse" "^7.14.5"
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-simple-access@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.14.8.tgz#82e1fec0644a7e775c74d305f212c39f8fe73924"
+  integrity sha512-TrFN4RHh9gnWEU+s7JloIho2T76GPwRHhdzOWLqTrMnlas8T9O7ec+oEDNsRXndOmru9ymH9DFrEOxpzPoSbdg==
+  dependencies:
+    "@babel/types" "^7.14.8"
+
+"@babel/helper-split-export-declaration@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.14.5.tgz#22b23a54ef51c2b7605d851930c1976dd0bc693a"
+  integrity sha512-hprxVPu6e5Kdp2puZUmvOGjaLv9TCe58E/Fl6hRq4YiVQxIcNvuq6uTM2r1mT/oPskuS9CgR+I94sqAYv0NGKA==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-validator-identifier@^7.14.5", "@babel/helper-validator-identifier@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.8.tgz#32be33a756f29e278a0d644fa08a2c9e0f88a34c"
+  integrity sha512-ZGy6/XQjllhYQrNw/3zfWRwZCTVSiBLZ9DHVZxn9n2gip/7ab8mv2TWlKPIBk26RwedCBoWdjLmn+t9na2Gcow==
+
+"@babel/helper-validator-option@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.14.5.tgz#6e72a1fff18d5dfcb878e1e62f1a021c4b72d5a3"
+  integrity sha512-OX8D5eeX4XwcroVW45NMvoYaIuFI+GQpA2a8Gi+X/U/cDUIRsV37qQfF905F0htTRCREQIB4KqPeaveRJUl3Ow==
+
+"@babel/helpers@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.14.8.tgz#839f88f463025886cff7f85a35297007e2da1b77"
+  integrity sha512-ZRDmI56pnV+p1dH6d+UN6GINGz7Krps3+270qqI9UJ4wxYThfAIcI5i7j5vXC4FJ3Wap+S9qcebxeYiqn87DZw==
+  dependencies:
+    "@babel/template" "^7.14.5"
+    "@babel/traverse" "^7.14.8"
+    "@babel/types" "^7.14.8"
+
+"@babel/highlight@^7.10.4", "@babel/highlight@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.14.5.tgz#6861a52f03966405001f6aa534a01a24d99e8cd9"
+  integrity sha512-qf9u2WFWVV0MppaL877j2dBtQIDgmidgjGk5VIMw3OadXvYaXn66U1BFlH2t4+t3i+8PhedppRv+i40ABzd+gg==
+  dependencies:
+    "@babel/helper-validator-identifier" "^7.14.5"
     chalk "^2.0.0"
     js-tokens "^4.0.0"
 
-"@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.13.15":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.13.15.tgz#8e66775fb523599acb6a289e12929fa5ab0954d8"
-  integrity sha512-b9COtcAlVEQljy/9fbcMHpG+UIW9ReF+gpaxDHTlZd0c6/UU9ng8zdySAW9sRTzpvcdCHn6bUcbuYUgGzLAWVQ==
+"@babel/parser@^7.1.0", "@babel/parser@^7.14.5", "@babel/parser@^7.14.8", "@babel/parser@^7.7.2":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.14.8.tgz#66fd41666b2d7b840bd5ace7f7416d5ac60208d4"
+  integrity sha512-syoCQFOoo/fzkWDeM0dLEZi5xqurb5vuyzwIMNZRNun+N/9A4cUZeQaE7dTrB8jGaKuJRBtEOajtnmw0I5hvvA==
 
 "@babel/plugin-syntax-async-generators@^7.8.4":
   version "7.8.4"
@@ -245,42 +293,49 @@
     "@babel/helper-plugin-utils" "^7.8.0"
 
 "@babel/plugin-syntax-top-level-await@^7.8.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.12.13.tgz#c5f0fa6e249f5b739727f923540cf7a806130178"
-  integrity sha512-A81F9pDwyS7yM//KwbCSDqy3Uj4NMIurtplxphWxoYtNPov7cJsDkAFNNyVlIZ3jwGycVsurZ+LtOA8gZ376iQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/template@^7.12.13", "@babel/template@^7.3.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.12.13.tgz#530265be8a2589dbb37523844c5bcb55947fb327"
-  integrity sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/parser" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/traverse@^7.1.0", "@babel/traverse@^7.13.0", "@babel/traverse@^7.13.13", "@babel/traverse@^7.13.15":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.13.15.tgz#c38bf7679334ddd4028e8e1f7b3aa5019f0dada7"
-  integrity sha512-/mpZMNvj6bce59Qzl09fHEs8Bt8NnpEDQYleHUPZQ3wXUMvXi+HJPLars68oAbmp839fGoOkv2pSL2z9ajCIaQ==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.13.9"
-    "@babel/helper-function-name" "^7.12.13"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/parser" "^7.13.15"
-    "@babel/types" "^7.13.14"
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz#c1cfdadc35a646240001f06138247b741c34d94c"
+  integrity sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.14.5"
+
+"@babel/plugin-syntax-typescript@^7.7.2":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.14.5.tgz#b82c6ce471b165b5ce420cf92914d6fb46225716"
+  integrity sha512-u6OXzDaIXjEstBRRoBCQ/uKQKlbuaeE5in0RvWdA4pN6AhqxTIwUsnHPU1CFZA/amYObMsuWhYfRl3Ch90HD0Q==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.14.5"
+
+"@babel/template@^7.14.5", "@babel/template@^7.3.3":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.14.5.tgz#a9bc9d8b33354ff6e55a9c60d1109200a68974f4"
+  integrity sha512-6Z3Po85sfxRGachLULUhOmvAaOo7xCvqGQtxINai2mEGPFm6pQ4z5QInFnUrRpfoSV60BnjyF5F3c+15fxFV1g==
+  dependencies:
+    "@babel/code-frame" "^7.14.5"
+    "@babel/parser" "^7.14.5"
+    "@babel/types" "^7.14.5"
+
+"@babel/traverse@^7.1.0", "@babel/traverse@^7.14.5", "@babel/traverse@^7.14.8", "@babel/traverse@^7.7.2":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.14.8.tgz#c0253f02677c5de1a8ff9df6b0aacbec7da1a8ce"
+  integrity sha512-kexHhzCljJcFNn1KYAQ6A5wxMRzq9ebYpEDV4+WdNyr3i7O44tanbDOR/xjiG2F3sllan+LgwK+7OMk0EmydHg==
+  dependencies:
+    "@babel/code-frame" "^7.14.5"
+    "@babel/generator" "^7.14.8"
+    "@babel/helper-function-name" "^7.14.5"
+    "@babel/helper-hoist-variables" "^7.14.5"
+    "@babel/helper-split-export-declaration" "^7.14.5"
+    "@babel/parser" "^7.14.8"
+    "@babel/types" "^7.14.8"
     debug "^4.1.0"
     globals "^11.1.0"
 
-"@babel/types@^7.0.0", "@babel/types@^7.12.13", "@babel/types@^7.13.0", "@babel/types@^7.13.12", "@babel/types@^7.13.14", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
-  version "7.13.14"
-  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.13.14.tgz#c35a4abb15c7cd45a2746d78ab328e362cbace0d"
-  integrity sha512-A2aa3QTkWoyqsZZFl56MLUsfmh7O0gN41IPvXAE/++8ojpbz12SszD7JEGYVdn4f9Kt4amIei07swF1h4AqmmQ==
+"@babel/types@^7.0.0", "@babel/types@^7.14.5", "@babel/types@^7.14.8", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.14.8.tgz#38109de8fcadc06415fbd9b74df0065d4d41c728"
+  integrity sha512-iob4soQa7dZw8nodR/KlOQkPh9S4I8RwCxwRIFuiMRYjOzH/KJzdUfDgz6cGi5dDaclXF4P2PAhCdrBJNIg68Q==
   dependencies:
-    "@babel/helper-validator-identifier" "^7.12.11"
-    lodash "^4.17.19"
+    "@babel/helper-validator-identifier" "^7.14.8"
     to-fast-properties "^2.0.0"
 
 "@bcoe/v8-coverage@^0.2.3":
@@ -288,100 +343,33 @@
   resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
   integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==
 
-"@cnakazawa/watch@^1.0.3":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.4.tgz#f864ae85004d0fcab6f50be9141c4da368d1656a"
-  integrity sha512-v9kIhKwjeZThiWrLmj0y17CWoyddASLj9O2yvbZkbvw/N3rWOYy9zkV66ursAoVr0mV15bL8g0c4QZUE6cdDoQ==
-  dependencies:
-    exec-sh "^0.3.2"
-    minimist "^1.2.0"
-
-"@evocateur/libnpmaccess@^3.1.2":
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/@evocateur/libnpmaccess/-/libnpmaccess-3.1.2.tgz#ecf7f6ce6b004e9f942b098d92200be4a4b1c845"
-  integrity sha512-KSCAHwNWro0CF2ukxufCitT9K5LjL/KuMmNzSu8wuwN2rjyKHD8+cmOsiybK+W5hdnwc5M1SmRlVCaMHQo+3rg==
-  dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    aproba "^2.0.0"
-    figgy-pudding "^3.5.1"
-    get-stream "^4.0.0"
-    npm-package-arg "^6.1.0"
-
-"@evocateur/libnpmpublish@^1.2.2":
-  version "1.2.2"
-  resolved "https://registry.yarnpkg.com/@evocateur/libnpmpublish/-/libnpmpublish-1.2.2.tgz#55df09d2dca136afba9c88c759ca272198db9f1a"
-  integrity sha512-MJrrk9ct1FeY9zRlyeoyMieBjGDG9ihyyD9/Ft6MMrTxql9NyoEx2hw9casTIP4CdqEVu+3nQ2nXxoJ8RCXyFg==
+"@eslint/eslintrc@^0.4.3":
+  version "0.4.3"
+  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.4.3.tgz#9e42981ef035beb3dd49add17acb96e8ff6f394c"
+  integrity sha512-J6KFFz5QCYUJq3pf0mjEcCJVERbzv71PUIDczuh9JkwGEzced6CO5ADLHB1rbf/+oPBtoPfMYNOpGDzCANlbXw==
   dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    aproba "^2.0.0"
-    figgy-pudding "^3.5.1"
-    get-stream "^4.0.0"
-    lodash.clonedeep "^4.5.0"
-    normalize-package-data "^2.4.0"
-    npm-package-arg "^6.1.0"
-    semver "^5.5.1"
-    ssri "^6.0.1"
-
-"@evocateur/npm-registry-fetch@^4.0.0":
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/@evocateur/npm-registry-fetch/-/npm-registry-fetch-4.0.0.tgz#8c4c38766d8d32d3200fcb0a83f064b57365ed66"
-  integrity sha512-k1WGfKRQyhJpIr+P17O5vLIo2ko1PFLKwoetatdduUSt/aQ4J2sJrJwwatdI5Z3SiYk/mRH9S3JpdmMFd/IK4g==
-  dependencies:
-    JSONStream "^1.3.4"
-    bluebird "^3.5.1"
-    figgy-pudding "^3.4.1"
-    lru-cache "^5.1.1"
-    make-fetch-happen "^5.0.0"
-    npm-package-arg "^6.1.0"
-    safe-buffer "^5.1.2"
-
-"@evocateur/pacote@^9.6.3":
-  version "9.6.5"
-  resolved "https://registry.yarnpkg.com/@evocateur/pacote/-/pacote-9.6.5.tgz#33de32ba210b6f17c20ebab4d497efc6755f4ae5"
-  integrity sha512-EI552lf0aG2nOV8NnZpTxNo2PcXKPmDbF9K8eCBFQdIZwHNGN/mi815fxtmUMa2wTa1yndotICIDt/V0vpEx2w==
-  dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    bluebird "^3.5.3"
-    cacache "^12.0.3"
-    chownr "^1.1.2"
-    figgy-pudding "^3.5.1"
-    get-stream "^4.1.0"
-    glob "^7.1.4"
-    infer-owner "^1.0.4"
-    lru-cache "^5.1.1"
-    make-fetch-happen "^5.0.0"
+    ajv "^6.12.4"
+    debug "^4.1.1"
+    espree "^7.3.0"
+    globals "^13.9.0"
+    ignore "^4.0.6"
+    import-fresh "^3.2.1"
+    js-yaml "^3.13.1"
     minimatch "^3.0.4"
-    minipass "^2.3.5"
-    mississippi "^3.0.0"
-    mkdirp "^0.5.1"
-    normalize-package-data "^2.5.0"
-    npm-package-arg "^6.1.0"
-    npm-packlist "^1.4.4"
-    npm-pick-manifest "^3.0.0"
-    osenv "^0.1.5"
-    promise-inflight "^1.0.1"
-    promise-retry "^1.1.1"
-    protoduck "^5.0.1"
-    rimraf "^2.6.3"
-    safe-buffer "^5.2.0"
-    semver "^5.7.0"
-    ssri "^6.0.1"
-    tar "^4.4.10"
-    unique-filename "^1.1.1"
-    which "^1.3.1"
+    strip-json-comments "^3.1.1"
 
-"@gulp-sourcemaps/identity-map@1.X":
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/@gulp-sourcemaps/identity-map/-/identity-map-1.0.2.tgz#1e6fe5d8027b1f285dc0d31762f566bccd73d5a9"
-  integrity sha512-ciiioYMLdo16ShmfHBXJBOFm3xPC4AuwO4xeRpFeHz7WK9PYsWCmigagG2XyzZpubK4a3qNKoUBDhbzHfa50LQ==
+"@gulp-sourcemaps/identity-map@^2.0.1":
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/@gulp-sourcemaps/identity-map/-/identity-map-2.0.1.tgz#a6e8b1abec8f790ec6be2b8c500e6e68037c0019"
+  integrity sha512-Tb+nSISZku+eQ4X1lAkevcQa+jknn/OVUgZ3XCxEKIsLsqYuPoJwJOPQeaOk75X3WPftb29GWY1eqE7GLsXb1Q==
   dependencies:
-    acorn "^5.0.3"
-    css "^2.2.1"
-    normalize-path "^2.1.1"
+    acorn "^6.4.1"
+    normalize-path "^3.0.0"
+    postcss "^7.0.16"
     source-map "^0.6.0"
-    through2 "^2.0.3"
+    through2 "^3.0.1"
 
-"@gulp-sourcemaps/map-sources@1.X":
+"@gulp-sourcemaps/map-sources@^1.0.0":
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/@gulp-sourcemaps/map-sources/-/map-sources-1.0.0.tgz#890ae7c5d8c877f6d384860215ace9d7ec945bda"
   integrity sha1-iQrnxdjId/bThIYCFazp1+yUW9o=
@@ -389,6 +377,25 @@
     normalize-path "^2.0.1"
     through2 "^2.0.3"
 
+"@humanwhocodes/config-array@^0.5.0":
+  version "0.5.0"
+  resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.5.0.tgz#1407967d4c6eecd7388f83acf1eaf4d0c6e58ef9"
+  integrity sha512-FagtKFz74XrTl7y6HCzQpwDfXP0yhxe9lHLD1UZxjvZIcbyRz8zTFF/yYNfSfzU414eDwZ1SrO0Qvtyf+wFMQg==
+  dependencies:
+    "@humanwhocodes/object-schema" "^1.2.0"
+    debug "^4.1.1"
+    minimatch "^3.0.4"
+
+"@humanwhocodes/object-schema@^1.2.0":
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-1.2.0.tgz#87de7af9c231826fdd68ac7258f77c429e0e5fcf"
+  integrity sha512-wdppn25U8z/2yiaT6YGquE6X8sSv7hNMWSXYSSU1jGv/yd6XqjXgTDJ8KP4NgjTXfJ3GbRjeeb8RTV7a/VpM+w==
+
+"@hutson/parse-repository-url@^3.0.0":
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/@hutson/parse-repository-url/-/parse-repository-url-3.0.2.tgz#98c23c950a3d9b6c8f0daed06da6c3af06981340"
+  integrity sha512-H9XAx3hc0BQHY6l+IFSWHDySypcXsvsuLhgYLUGywmJ5pswRVQJUHpOsobnLYp2ZUaUlKiKDrgWWhosOwAEM8Q==
+
 "@istanbuljs/load-nyc-config@^1.0.0":
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz#fd3db1d59ecf7cf121e80650bb86712f9b55eced"
@@ -405,111 +412,94 @@
   resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98"
   integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==
 
-"@jest/console@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/console/-/console-24.9.0.tgz#79b1bc06fb74a8cfb01cbdedf945584b1b9707f0"
-  integrity sha512-Zuj6b8TnKXi3q4ymac8EQfc3ea/uhLeCGThFqXeC8H9/raaH8ARPUTdId+XyGd03Z4In0/VjD2OYFcBF09fNLQ==
-  dependencies:
-    "@jest/source-map" "^24.9.0"
-    chalk "^2.0.1"
-    slash "^2.0.0"
-
-"@jest/console@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/console/-/console-26.6.2.tgz#4e04bc464014358b03ab4937805ee36a0aeb98f2"
-  integrity sha512-IY1R2i2aLsLr7Id3S6p2BA82GNWryt4oSvEXLAKc+L2zdi89dSkE8xC1C+0kpATG4JhBJREnQOH7/zmccM2B0g==
+"@jest/console@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/console/-/console-27.0.6.tgz#3eb72ea80897495c3d73dd97aab7f26770e2260f"
+  integrity sha512-fMlIBocSHPZ3JxgWiDNW/KPj6s+YRd0hicb33IrmelCcjXo/pXPwvuiKFmZz+XuqI/1u7nbUK10zSsWL/1aegg==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     chalk "^4.0.0"
-    jest-message-util "^26.6.2"
-    jest-util "^26.6.2"
+    jest-message-util "^27.0.6"
+    jest-util "^27.0.6"
     slash "^3.0.0"
 
-"@jest/core@^26.3.0", "@jest/core@^26.6.3":
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/@jest/core/-/core-26.6.3.tgz#7639fcb3833d748a4656ada54bde193051e45fad"
-  integrity sha512-xvV1kKbhfUqFVuZ8Cyo+JPpipAHHAV3kcDBftiduK8EICXmTFddryy3P7NfZt8Pv37rA9nEJBKCCkglCPt/Xjw==
+"@jest/core@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/core/-/core-27.0.6.tgz#c5f642727a0b3bf0f37c4b46c675372d0978d4a1"
+  integrity sha512-SsYBm3yhqOn5ZLJCtccaBcvD/ccTLCeuDv8U41WJH/V1MW5eKUkeMHT9U+Pw/v1m1AIWlnIW/eM2XzQr0rEmow==
   dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/reporters" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.6"
+    "@jest/reporters" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
+    emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-changed-files "^26.6.2"
-    jest-config "^26.6.3"
-    jest-haste-map "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-resolve-dependencies "^26.6.3"
-    jest-runner "^26.6.3"
-    jest-runtime "^26.6.3"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    jest-watcher "^26.6.2"
-    micromatch "^4.0.2"
+    jest-changed-files "^27.0.6"
+    jest-config "^27.0.6"
+    jest-haste-map "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-resolve-dependencies "^27.0.6"
+    jest-runner "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
+    jest-watcher "^27.0.6"
+    micromatch "^4.0.4"
     p-each-series "^2.1.0"
     rimraf "^3.0.0"
     slash "^3.0.0"
     strip-ansi "^6.0.0"
 
-"@jest/environment@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-26.6.2.tgz#ba364cc72e221e79cc8f0a99555bf5d7577cf92c"
-  integrity sha512-nFy+fHl28zUrRsCeMB61VDThV1pVTtlEokBRgqPrcT1JNq4yRNIyTHfyht6PqtUvY9IsuLGTrbG8kPXjSZIZwA==
+"@jest/environment@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-27.0.6.tgz#ee293fe996db01d7d663b8108fa0e1ff436219d2"
+  integrity sha512-4XywtdhwZwCpPJ/qfAkqExRsERW+UaoSRStSHCCiQTUpoYdLukj+YJbQSFrZjhlUDRZeNiU9SFH0u7iNimdiIg==
   dependencies:
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
-    jest-mock "^26.6.2"
+    jest-mock "^27.0.6"
 
-"@jest/fake-timers@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-24.9.0.tgz#ba3e6bf0eecd09a636049896434d306636540c93"
-  integrity sha512-eWQcNa2YSwzXWIMC5KufBh3oWRIijrQFROsIqt6v/NS9Io/gknw1jsAC9c+ih/RQX4A3O7SeWAhQeN0goKhT9A==
-  dependencies:
-    "@jest/types" "^24.9.0"
-    jest-message-util "^24.9.0"
-    jest-mock "^24.9.0"
-
-"@jest/fake-timers@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-26.6.2.tgz#459c329bcf70cee4af4d7e3f3e67848123535aad"
-  integrity sha512-14Uleatt7jdzefLPYM3KLcnUl1ZNikaKq34enpb5XG9i81JpppDb5muZvonvKyrl7ftEHkKS5L5/eB/kxJ+bvA==
+"@jest/fake-timers@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-27.0.6.tgz#cbad52f3fe6abe30e7acb8cd5fa3466b9588e3df"
+  integrity sha512-sqd+xTWtZ94l3yWDKnRTdvTeZ+A/V7SSKrxsrOKSqdyddb9CeNRF8fbhAU0D7ZJBpTTW2nbp6MftmKJDZfW2LQ==
   dependencies:
-    "@jest/types" "^26.6.2"
-    "@sinonjs/fake-timers" "^6.0.1"
+    "@jest/types" "^27.0.6"
+    "@sinonjs/fake-timers" "^7.0.2"
     "@types/node" "*"
-    jest-message-util "^26.6.2"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
+    jest-message-util "^27.0.6"
+    jest-mock "^27.0.6"
+    jest-util "^27.0.6"
 
-"@jest/globals@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-26.6.2.tgz#5b613b78a1aa2655ae908eba638cc96a20df720a"
-  integrity sha512-85Ltnm7HlB/KesBUuALwQ68YTU72w9H2xW9FjZ1eL1U3lhtefjjl5c2MiUbpXt/i6LaPRvoOFJ22yCBSfQ0JIA==
+"@jest/globals@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-27.0.6.tgz#48e3903f99a4650673d8657334d13c9caf0e8f82"
+  integrity sha512-DdTGCP606rh9bjkdQ7VvChV18iS7q0IMJVP1piwTWyWskol4iqcVwthZmoJEf7obE1nc34OpIyoVGPeqLC+ryw==
   dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    expect "^26.6.2"
+    "@jest/environment" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    expect "^27.0.6"
 
-"@jest/reporters@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-26.6.2.tgz#1f518b99637a5f18307bd3ecf9275f6882a667f6"
-  integrity sha512-h2bW53APG4HvkOnVMo8q3QXa6pcaNt1HkwVsOPMBV6LD/q9oSpxNSYZQYkAnjdMjrJ86UuYeLo+aEZClV6opnw==
+"@jest/reporters@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-27.0.6.tgz#91e7f2d98c002ad5df94d5b5167c1eb0b9fd5b00"
+  integrity sha512-TIkBt09Cb2gptji3yJXb3EE+eVltW6BjO7frO7NEfjI9vSIYoISi5R3aI3KpEDXlB1xwB+97NXIqz84qYeYsfA==
   dependencies:
     "@bcoe/v8-coverage" "^0.2.3"
-    "@jest/console" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     chalk "^4.0.0"
     collect-v8-coverage "^1.0.0"
     exit "^0.1.2"
@@ -520,106 +510,66 @@
     istanbul-lib-report "^3.0.0"
     istanbul-lib-source-maps "^4.0.0"
     istanbul-reports "^3.0.2"
-    jest-haste-map "^26.6.2"
-    jest-resolve "^26.6.2"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
+    jest-haste-map "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-util "^27.0.6"
+    jest-worker "^27.0.6"
     slash "^3.0.0"
     source-map "^0.6.0"
     string-length "^4.0.1"
     terminal-link "^2.0.0"
-    v8-to-istanbul "^7.0.0"
-  optionalDependencies:
-    node-notifier "^8.0.0"
+    v8-to-istanbul "^8.0.0"
 
-"@jest/source-map@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-24.9.0.tgz#0e263a94430be4b41da683ccc1e6bffe2a191714"
-  integrity sha512-/Xw7xGlsZb4MJzNDgB7PW5crou5JqWiBQaz6xyPd3ArOg2nfn/PunV8+olXbbEZzNl591o5rWKE9BRDaFAuIBg==
-  dependencies:
-    callsites "^3.0.0"
-    graceful-fs "^4.1.15"
-    source-map "^0.6.0"
-
-"@jest/source-map@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-26.6.2.tgz#29af5e1e2e324cafccc936f218309f54ab69d535"
-  integrity sha512-YwYcCwAnNmOVsZ8mr3GfnzdXDAl4LaenZP5z+G0c8bzC9/dugL8zRmxZzdoTl4IaS3CryS1uWnROLPFmb6lVvA==
+"@jest/source-map@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-27.0.6.tgz#be9e9b93565d49b0548b86e232092491fb60551f"
+  integrity sha512-Fek4mi5KQrqmlY07T23JRi0e7Z9bXTOOD86V/uS0EIW4PClvPDqZOyFlLpNJheS6QI0FNX1CgmPjtJ4EA/2M+g==
   dependencies:
     callsites "^3.0.0"
     graceful-fs "^4.2.4"
     source-map "^0.6.0"
 
-"@jest/test-result@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-24.9.0.tgz#11796e8aa9dbf88ea025757b3152595ad06ba0ca"
-  integrity sha512-XEFrHbBonBJ8dGp2JmF8kP/nQI/ImPpygKHwQ/SY+es59Z3L5PI4Qb9TQQMAEeYsThG1xF0k6tmG0tIKATNiiA==
+"@jest/test-result@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-27.0.6.tgz#3fa42015a14e4fdede6acd042ce98c7f36627051"
+  integrity sha512-ja/pBOMTufjX4JLEauLxE3LQBPaI2YjGFtXexRAjt1I/MbfNlMx0sytSX3tn5hSLzQsR3Qy2rd0hc1BWojtj9w==
   dependencies:
-    "@jest/console" "^24.9.0"
-    "@jest/types" "^24.9.0"
-    "@types/istanbul-lib-coverage" "^2.0.0"
-
-"@jest/test-result@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-26.6.2.tgz#55da58b62df134576cc95476efa5f7949e3f5f18"
-  integrity sha512-5O7H5c/7YlojphYNrK02LlDIV2GNPYisKwHm2QTKjNZeEzezCbwYs9swJySv2UfPMyZ0VdsmMv7jIlD/IKYQpQ==
-  dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/istanbul-lib-coverage" "^2.0.0"
     collect-v8-coverage "^1.0.0"
 
-"@jest/test-sequencer@^26.6.3":
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-26.6.3.tgz#98e8a45100863886d074205e8ffdc5a7eb582b17"
-  integrity sha512-YHlVIjP5nfEyjlrSr8t/YdNfU/1XEt7c5b4OxcXCjyRhjzLYu/rO69/WHPuYcbCWkz8kAeZVZp2N2+IOLLEPGw==
+"@jest/test-sequencer@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-27.0.6.tgz#80a913ed7a1130545b1cd777ff2735dd3af5d34b"
+  integrity sha512-bISzNIApazYOlTHDum9PwW22NOyDa6VI31n6JucpjTVM0jD6JDgqEZ9+yn575nDdPF0+4csYDxNNW13NvFQGZA==
   dependencies:
-    "@jest/test-result" "^26.6.2"
+    "@jest/test-result" "^27.0.6"
     graceful-fs "^4.2.4"
-    jest-haste-map "^26.6.2"
-    jest-runner "^26.6.3"
-    jest-runtime "^26.6.3"
+    jest-haste-map "^27.0.6"
+    jest-runtime "^27.0.6"
 
-"@jest/transform@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-26.6.2.tgz#5ac57c5fa1ad17b2aae83e73e45813894dcf2e4b"
-  integrity sha512-E9JjhUgNzvuQ+vVAL21vlyfy12gP0GhazGgJC4h6qUt1jSdUXGWJ1wfu/X7Sd8etSgxV4ovT1pb9v5D6QW4XgA==
+"@jest/transform@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-27.0.6.tgz#189ad7107413208f7600f4719f81dd2f7278cc95"
+  integrity sha512-rj5Dw+mtIcntAUnMlW/Vju5mr73u8yg+irnHwzgtgoeI6cCPOvUwQ0D1uQtc/APmWgvRweEb1g05pkUpxH3iCA==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.6"
     babel-plugin-istanbul "^6.0.0"
     chalk "^4.0.0"
     convert-source-map "^1.4.0"
     fast-json-stable-stringify "^2.0.0"
     graceful-fs "^4.2.4"
-    jest-haste-map "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-util "^26.6.2"
-    micromatch "^4.0.2"
+    jest-haste-map "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-util "^27.0.6"
+    micromatch "^4.0.4"
     pirates "^4.0.1"
     slash "^3.0.0"
     source-map "^0.6.1"
     write-file-atomic "^3.0.0"
 
-"@jest/types@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/types/-/types-24.9.0.tgz#63cb26cb7500d069e5a389441a7c6ab5e909fc59"
-  integrity sha512-XKK7ze1apu5JWQ5eZjHITP66AX+QsLlbaJRBGYr8pNzwcAE2JVkwnf0yqjHTsDRcjR0mujy/NmZMXw5kl+kGBw==
-  dependencies:
-    "@types/istanbul-lib-coverage" "^2.0.0"
-    "@types/istanbul-reports" "^1.1.1"
-    "@types/yargs" "^13.0.0"
-
-"@jest/types@^25.5.0":
-  version "25.5.0"
-  resolved "https://registry.yarnpkg.com/@jest/types/-/types-25.5.0.tgz#4d6a4793f7b9599fc3680877b856a97dbccf2a9d"
-  integrity sha512-OXD0RgQ86Tu3MazKo8bnrkDRaDXXMGUqd+kTtLtK1Zb7CRzQcaSRPPPV37SvYTdevXEBVxe0HXylEjs8ibkmCw==
-  dependencies:
-    "@types/istanbul-lib-coverage" "^2.0.0"
-    "@types/istanbul-reports" "^1.1.1"
-    "@types/yargs" "^15.0.0"
-    chalk "^3.0.0"
-
 "@jest/types@^26.6.2":
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/@jest/types/-/types-26.6.2.tgz#bef5a532030e1d88a2f5a6d933f84e97226ed48e"
@@ -631,690 +581,692 @@
     "@types/yargs" "^15.0.0"
     chalk "^4.0.0"
 
-"@lerna/add@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/add/-/add-3.21.0.tgz#27007bde71cc7b0a2969ab3c2f0ae41578b4577b"
-  integrity sha512-vhUXXF6SpufBE1EkNEXwz1VLW03f177G9uMOFMQkp6OJ30/PWg4Ekifuz9/3YfgB2/GH8Tu4Lk3O51P2Hskg/A==
-  dependencies:
-    "@evocateur/pacote" "^9.6.3"
-    "@lerna/bootstrap" "3.21.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/npm-conf" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
+"@jest/types@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/types/-/types-27.0.6.tgz#9a992bc517e0c49f035938b8549719c2de40706b"
+  integrity sha512-aSquT1qa9Pik26JK5/3rvnYb4bGtm1VFNesHKmNTwmPIgOrixvhL2ghIvFRNEpzy3gU+rUgjIF/KodbkFAl++g==
+  dependencies:
+    "@types/istanbul-lib-coverage" "^2.0.0"
+    "@types/istanbul-reports" "^3.0.0"
+    "@types/node" "*"
+    "@types/yargs" "^16.0.0"
+    chalk "^4.0.0"
+
+"@lerna/add@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/add/-/add-4.0.0.tgz#c36f57d132502a57b9e7058d1548b7a565ef183f"
+  integrity sha512-cpmAH1iS3k8JBxNvnMqrGTTjbY/ZAiKa1ChJzFevMYY3eeqbvhsBKnBcxjRXtdrJ6bd3dCQM+ZtK+0i682Fhng==
+  dependencies:
+    "@lerna/bootstrap" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/npm-conf" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    npm-package-arg "^6.1.0"
-    p-map "^2.1.0"
-    semver "^6.2.0"
+    npm-package-arg "^8.1.0"
+    p-map "^4.0.0"
+    pacote "^11.2.6"
+    semver "^7.3.4"
 
-"@lerna/bootstrap@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/bootstrap/-/bootstrap-3.21.0.tgz#bcd1b651be5b0970b20d8fae04c864548123aed6"
-  integrity sha512-mtNHlXpmvJn6JTu0KcuTTPl2jLsDNud0QacV/h++qsaKbhAaJr/FElNZ5s7MwZFUM3XaDmvWzHKaszeBMHIbBw==
-  dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/has-npm-version" "3.16.5"
-    "@lerna/npm-install" "3.16.5"
-    "@lerna/package-graph" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/rimraf-dir" "3.16.5"
-    "@lerna/run-lifecycle" "3.16.2"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/symlink-binary" "3.17.0"
-    "@lerna/symlink-dependencies" "3.17.0"
-    "@lerna/validation-error" "3.13.0"
+"@lerna/bootstrap@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/bootstrap/-/bootstrap-4.0.0.tgz#5f5c5e2c6cfc8fcec50cb2fbe569a8c607101891"
+  integrity sha512-RkS7UbeM2vu+kJnHzxNRCLvoOP9yGNgkzRdy4UV2hNalD7EP41bLvRVOwRYQ7fhc2QcbhnKNdOBihYRL0LcKtw==
+  dependencies:
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/has-npm-version" "4.0.0"
+    "@lerna/npm-install" "4.0.0"
+    "@lerna/package-graph" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/rimraf-dir" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/symlink-binary" "4.0.0"
+    "@lerna/symlink-dependencies" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    get-port "^4.2.0"
-    multimatch "^3.0.0"
-    npm-package-arg "^6.1.0"
+    get-port "^5.1.1"
+    multimatch "^5.0.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    p-finally "^1.0.0"
-    p-map "^2.1.0"
-    p-map-series "^1.0.0"
-    p-waterfall "^1.0.0"
-    read-package-tree "^5.1.6"
-    semver "^6.2.0"
+    p-map "^4.0.0"
+    p-map-series "^2.1.0"
+    p-waterfall "^2.1.1"
+    read-package-tree "^5.3.1"
+    semver "^7.3.4"
 
-"@lerna/changed@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/changed/-/changed-3.21.0.tgz#108e15f679bfe077af500f58248c634f1044ea0b"
-  integrity sha512-hzqoyf8MSHVjZp0gfJ7G8jaz+++mgXYiNs9iViQGA8JlN/dnWLI5sWDptEH3/B30Izo+fdVz0S0s7ydVE3pWIw==
+"@lerna/changed@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/changed/-/changed-4.0.0.tgz#b9fc76cea39b9292a6cd263f03eb57af85c9270b"
+  integrity sha512-cD+KuPRp6qiPOD+BO6S6SN5cARspIaWSOqGBpGnYzLb4uWT8Vk4JzKyYtc8ym1DIwyoFXHosXt8+GDAgR8QrgQ==
   dependencies:
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/listable" "3.18.5"
-    "@lerna/output" "3.13.0"
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/listable" "4.0.0"
+    "@lerna/output" "4.0.0"
 
-"@lerna/check-working-tree@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/check-working-tree/-/check-working-tree-3.16.5.tgz#b4f8ae61bb4523561dfb9f8f8d874dd46bb44baa"
-  integrity sha512-xWjVBcuhvB8+UmCSb5tKVLB5OuzSpw96WEhS2uz6hkWVa/Euh1A0/HJwn2cemyK47wUrCQXtczBUiqnq9yX5VQ==
+"@lerna/check-working-tree@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/check-working-tree/-/check-working-tree-4.0.0.tgz#257e36a602c00142e76082a19358e3e1ae8dbd58"
+  integrity sha512-/++bxM43jYJCshBiKP5cRlCTwSJdRSxVmcDAXM+1oUewlZJVSVlnks5eO0uLxokVFvLhHlC5kHMc7gbVFPHv6Q==
   dependencies:
-    "@lerna/collect-uncommitted" "3.16.5"
-    "@lerna/describe-ref" "3.16.5"
-    "@lerna/validation-error" "3.13.0"
+    "@lerna/collect-uncommitted" "4.0.0"
+    "@lerna/describe-ref" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
 
-"@lerna/child-process@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/child-process/-/child-process-3.16.5.tgz#38fa3c18064aa4ac0754ad80114776a7b36a69b2"
-  integrity sha512-vdcI7mzei9ERRV4oO8Y1LHBZ3A5+ampRKg1wq5nutLsUA4mEBN6H7JqjWOMY9xZemv6+kATm2ofjJ3lW5TszQg==
+"@lerna/child-process@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/child-process/-/child-process-4.0.0.tgz#341b96a57dffbd9705646d316e231df6fa4df6e1"
+  integrity sha512-XtCnmCT9eyVsUUHx6y/CTBYdV9g2Cr/VxyseTWBgfIur92/YKClfEtJTbOh94jRT62hlKLqSvux/UhxXVh613Q==
   dependencies:
-    chalk "^2.3.1"
-    execa "^1.0.0"
-    strong-log-transformer "^2.0.0"
+    chalk "^4.1.0"
+    execa "^5.0.0"
+    strong-log-transformer "^2.1.0"
 
-"@lerna/clean@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/clean/-/clean-3.21.0.tgz#c0b46b5300cc3dae2cda3bec14b803082da3856d"
-  integrity sha512-b/L9l+MDgE/7oGbrav6rG8RTQvRiZLO1zTcG17zgJAAuhlsPxJExMlh2DFwJEVi2les70vMhHfST3Ue1IMMjpg==
-  dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/rimraf-dir" "3.16.5"
-    p-map "^2.1.0"
-    p-map-series "^1.0.0"
-    p-waterfall "^1.0.0"
-
-"@lerna/cli@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/cli/-/cli-3.18.5.tgz#c90c461542fcd35b6d5b015a290fb0dbfb41d242"
-  integrity sha512-erkbxkj9jfc89vVs/jBLY/fM0I80oLmJkFUV3Q3wk9J3miYhP14zgVEBsPZY68IZlEjT6T3Xlq2xO1AVaatHsA==
-  dependencies:
-    "@lerna/global-options" "3.13.0"
+"@lerna/clean@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/clean/-/clean-4.0.0.tgz#8f778b6f2617aa2a936a6b5e085ae62498e57dc5"
+  integrity sha512-uugG2iN9k45ITx2jtd8nEOoAtca8hNlDCUM0N3lFgU/b1mEQYAPRkqr1qs4FLRl/Y50ZJ41wUz1eazS+d/0osA==
+  dependencies:
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/rimraf-dir" "4.0.0"
+    p-map "^4.0.0"
+    p-map-series "^2.1.0"
+    p-waterfall "^2.1.1"
+
+"@lerna/cli@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/cli/-/cli-4.0.0.tgz#8eabd334558836c1664df23f19acb95e98b5bbf3"
+  integrity sha512-Neaw3GzFrwZiRZv2g7g6NwFjs3er1vhraIniEs0jjVLPMNC4eata0na3GfE5yibkM/9d3gZdmihhZdZ3EBdvYA==
+  dependencies:
+    "@lerna/global-options" "4.0.0"
     dedent "^0.7.0"
     npmlog "^4.1.2"
-    yargs "^14.2.2"
+    yargs "^16.2.0"
 
-"@lerna/collect-uncommitted@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/collect-uncommitted/-/collect-uncommitted-3.16.5.tgz#a494d61aac31cdc7aec4bbe52c96550274132e63"
-  integrity sha512-ZgqnGwpDZiWyzIQVZtQaj9tRizsL4dUOhuOStWgTAw1EMe47cvAY2kL709DzxFhjr6JpJSjXV5rZEAeU3VE0Hg==
+"@lerna/collect-uncommitted@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/collect-uncommitted/-/collect-uncommitted-4.0.0.tgz#855cd64612969371cfc2453b90593053ff1ba779"
+  integrity sha512-ufSTfHZzbx69YNj7KXQ3o66V4RC76ffOjwLX0q/ab//61bObJ41n03SiQEhSlmpP+gmFbTJ3/7pTe04AHX9m/g==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    chalk "^2.3.1"
-    figgy-pudding "^3.5.1"
+    "@lerna/child-process" "4.0.0"
+    chalk "^4.1.0"
     npmlog "^4.1.2"
 
-"@lerna/collect-updates@3.20.0":
-  version "3.20.0"
-  resolved "https://registry.yarnpkg.com/@lerna/collect-updates/-/collect-updates-3.20.0.tgz#62f9d76ba21a25b7d9fbf31c02de88744a564bd1"
-  integrity sha512-qBTVT5g4fupVhBFuY4nI/3FSJtQVcDh7/gEPOpRxoXB/yCSnT38MFHXWl+y4einLciCjt/+0x6/4AG80fjay2Q==
+"@lerna/collect-updates@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/collect-updates/-/collect-updates-4.0.0.tgz#8e208b1bafd98a372ff1177f7a5e288f6bea8041"
+  integrity sha512-bnNGpaj4zuxsEkyaCZLka9s7nMs58uZoxrRIPJ+nrmrZYp1V5rrd+7/NYTuunOhY2ug1sTBvTAxj3NZQ+JKnOw==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/describe-ref" "3.16.5"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/describe-ref" "4.0.0"
     minimatch "^3.0.4"
     npmlog "^4.1.2"
-    slash "^2.0.0"
+    slash "^3.0.0"
 
-"@lerna/command@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/command/-/command-3.21.0.tgz#9a2383759dc7b700dacfa8a22b2f3a6e190121f7"
-  integrity sha512-T2bu6R8R3KkH5YoCKdutKv123iUgUbW8efVjdGCDnCMthAQzoentOJfDeodBwn0P2OqCl3ohsiNVtSn9h78fyQ==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/package-graph" "3.18.5"
-    "@lerna/project" "3.21.0"
-    "@lerna/validation-error" "3.13.0"
-    "@lerna/write-log-file" "3.13.0"
+"@lerna/command@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/command/-/command-4.0.0.tgz#991c7971df8f5bf6ae6e42c808869a55361c1b98"
+  integrity sha512-LM9g3rt5FsPNFqIHUeRwWXLNHJ5NKzOwmVKZ8anSp4e1SPrv2HNc1V02/9QyDDZK/w+5POXH5lxZUI1CHaOK/A==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    "@lerna/package-graph" "4.0.0"
+    "@lerna/project" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    "@lerna/write-log-file" "4.0.0"
     clone-deep "^4.0.1"
     dedent "^0.7.0"
-    execa "^1.0.0"
+    execa "^5.0.0"
     is-ci "^2.0.0"
     npmlog "^4.1.2"
 
-"@lerna/conventional-commits@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/conventional-commits/-/conventional-commits-3.22.0.tgz#2798f4881ee2ef457bdae027ab7d0bf0af6f1e09"
-  integrity sha512-z4ZZk1e8Mhz7+IS8NxHr64wyklHctCJyWpJKEZZPJiLFJ8yKto/x38O80R10pIzC0rr8Sy/OsjSH4bl0TbbgqA==
-  dependencies:
-    "@lerna/validation-error" "3.13.0"
-    conventional-changelog-angular "^5.0.3"
-    conventional-changelog-core "^3.1.6"
-    conventional-recommended-bump "^5.0.0"
-    fs-extra "^8.1.0"
-    get-stream "^4.0.0"
+"@lerna/conventional-commits@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/conventional-commits/-/conventional-commits-4.0.0.tgz#660fb2c7b718cb942ead70110df61f18c6f99750"
+  integrity sha512-CSUQRjJHFrH8eBn7+wegZLV3OrNc0Y1FehYfYGhjLE2SIfpCL4bmfu/ViYuHh9YjwHaA+4SX6d3hR+xkeseKmw==
+  dependencies:
+    "@lerna/validation-error" "4.0.0"
+    conventional-changelog-angular "^5.0.12"
+    conventional-changelog-core "^4.2.2"
+    conventional-recommended-bump "^6.1.0"
+    fs-extra "^9.1.0"
+    get-stream "^6.0.0"
     lodash.template "^4.5.0"
-    npm-package-arg "^6.1.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    pify "^4.0.1"
-    semver "^6.2.0"
+    pify "^5.0.0"
+    semver "^7.3.4"
 
-"@lerna/create-symlink@3.16.2":
-  version "3.16.2"
-  resolved "https://registry.yarnpkg.com/@lerna/create-symlink/-/create-symlink-3.16.2.tgz#412cb8e59a72f5a7d9463e4e4721ad2070149967"
-  integrity sha512-pzXIJp6av15P325sgiIRpsPXLFmkisLhMBCy4764d+7yjf2bzrJ4gkWVMhsv4AdF0NN3OyZ5jjzzTtLNqfR+Jw==
+"@lerna/create-symlink@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/create-symlink/-/create-symlink-4.0.0.tgz#8c5317ce5ae89f67825443bd7651bf4121786228"
+  integrity sha512-I0phtKJJdafUiDwm7BBlEUOtogmu8+taxq6PtIrxZbllV9hWg59qkpuIsiFp+no7nfRVuaasNYHwNUhDAVQBig==
   dependencies:
-    "@zkochan/cmd-shim" "^3.1.0"
-    fs-extra "^8.1.0"
+    cmd-shim "^4.1.0"
+    fs-extra "^9.1.0"
     npmlog "^4.1.2"
 
-"@lerna/create@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/create/-/create-3.22.0.tgz#d6bbd037c3dc5b425fe5f6d1b817057c278f7619"
-  integrity sha512-MdiQQzCcB4E9fBF1TyMOaAEz9lUjIHp1Ju9H7f3lXze5JK6Fl5NYkouAvsLgY6YSIhXMY8AHW2zzXeBDY4yWkw==
+"@lerna/create@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/create/-/create-4.0.0.tgz#b6947e9b5dfb6530321952998948c3e63d64d730"
+  integrity sha512-mVOB1niKByEUfxlbKTM1UNECWAjwUdiioIbRQZEeEabtjCL69r9rscIsjlGyhGWCfsdAG5wfq4t47nlDXdLLag==
   dependencies:
-    "@evocateur/pacote" "^9.6.3"
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/npm-conf" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
-    camelcase "^5.0.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/npm-conf" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    fs-extra "^8.1.0"
-    globby "^9.2.0"
-    init-package-json "^1.10.3"
-    npm-package-arg "^6.1.0"
-    p-reduce "^1.0.0"
-    pify "^4.0.1"
-    semver "^6.2.0"
-    slash "^2.0.0"
-    validate-npm-package-license "^3.0.3"
+    fs-extra "^9.1.0"
+    globby "^11.0.2"
+    init-package-json "^2.0.2"
+    npm-package-arg "^8.1.0"
+    p-reduce "^2.1.0"
+    pacote "^11.2.6"
+    pify "^5.0.0"
+    semver "^7.3.4"
+    slash "^3.0.0"
+    validate-npm-package-license "^3.0.4"
     validate-npm-package-name "^3.0.0"
-    whatwg-url "^7.0.0"
+    whatwg-url "^8.4.0"
+    yargs-parser "20.2.4"
 
-"@lerna/describe-ref@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/describe-ref/-/describe-ref-3.16.5.tgz#a338c25aaed837d3dc70b8a72c447c5c66346ac0"
-  integrity sha512-c01+4gUF0saOOtDBzbLMFOTJDHTKbDFNErEY6q6i9QaXuzy9LNN62z+Hw4acAAZuJQhrVWncVathcmkkjvSVGw==
+"@lerna/describe-ref@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/describe-ref/-/describe-ref-4.0.0.tgz#53c53b4ea65fdceffa072a62bfebe6772c45d9ec"
+  integrity sha512-eTU5+xC4C5Gcgz+Ey4Qiw9nV2B4JJbMulsYJMW8QjGcGh8zudib7Sduj6urgZXUYNyhYpRs+teci9M2J8u+UvQ==
   dependencies:
-    "@lerna/child-process" "3.16.5"
+    "@lerna/child-process" "4.0.0"
     npmlog "^4.1.2"
 
-"@lerna/diff@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/diff/-/diff-3.21.0.tgz#e6df0d8b9916167ff5a49fcb02ac06424280a68d"
-  integrity sha512-5viTR33QV3S7O+bjruo1SaR40m7F2aUHJaDAC7fL9Ca6xji+aw1KFkpCtVlISS0G8vikUREGMJh+c/VMSc8Usw==
+"@lerna/diff@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/diff/-/diff-4.0.0.tgz#6d3071817aaa4205a07bf77cfc6e932796d48b92"
+  integrity sha512-jYPKprQVg41+MUMxx6cwtqsNm0Yxx9GDEwdiPLwcUTFx+/qKCEwifKNJ1oGIPBxyEHX2PFCOjkK39lHoj2qiag==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/validation-error" "3.13.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     npmlog "^4.1.2"
 
-"@lerna/exec@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/exec/-/exec-3.21.0.tgz#17f07533893cb918a17b41bcc566dc437016db26"
-  integrity sha512-iLvDBrIE6rpdd4GIKTY9mkXyhwsJ2RvQdB9ZU+/NhR3okXfqKc6py/24tV111jqpXTtZUW6HNydT4dMao2hi1Q==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/profiler" "3.20.0"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/validation-error" "3.13.0"
-    p-map "^2.1.0"
-
-"@lerna/filter-options@3.20.0":
-  version "3.20.0"
-  resolved "https://registry.yarnpkg.com/@lerna/filter-options/-/filter-options-3.20.0.tgz#0f0f5d5a4783856eece4204708cc902cbc8af59b"
-  integrity sha512-bmcHtvxn7SIl/R9gpiNMVG7yjx7WyT0HSGw34YVZ9B+3xF/83N3r5Rgtjh4hheLZ+Q91Or0Jyu5O3Nr+AwZe2g==
-  dependencies:
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/filter-packages" "3.18.0"
+"@lerna/exec@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/exec/-/exec-4.0.0.tgz#eb6cb95cb92d42590e9e2d628fcaf4719d4a8be6"
+  integrity sha512-VGXtL/b/JfY84NB98VWZpIExfhLOzy0ozm/0XaS4a2SmkAJc5CeUfrhvHxxkxiTBLkU+iVQUyYEoAT0ulQ8PCw==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/profiler" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    p-map "^4.0.0"
+
+"@lerna/filter-options@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/filter-options/-/filter-options-4.0.0.tgz#ac94cc515d7fa3b47e2f7d74deddeabb1de5e9e6"
+  integrity sha512-vV2ANOeZhOqM0rzXnYcFFCJ/kBWy/3OA58irXih9AMTAlQLymWAK0akWybl++sUJ4HB9Hx12TOqaXbYS2NM5uw==
+  dependencies:
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/filter-packages" "4.0.0"
     dedent "^0.7.0"
-    figgy-pudding "^3.5.1"
     npmlog "^4.1.2"
 
-"@lerna/filter-packages@3.18.0":
-  version "3.18.0"
-  resolved "https://registry.yarnpkg.com/@lerna/filter-packages/-/filter-packages-3.18.0.tgz#6a7a376d285208db03a82958cfb8172e179b4e70"
-  integrity sha512-6/0pMM04bCHNATIOkouuYmPg6KH3VkPCIgTfQmdkPJTullERyEQfNUKikrefjxo1vHOoCACDpy65JYyKiAbdwQ==
+"@lerna/filter-packages@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/filter-packages/-/filter-packages-4.0.0.tgz#b1f70d70e1de9cdd36a4e50caa0ac501f8d012f2"
+  integrity sha512-+4AJIkK7iIiOaqCiVTYJxh/I9qikk4XjNQLhE3kixaqgMuHl1NQ99qXRR0OZqAWB9mh8Z1HA9bM5K1HZLBTOqA==
   dependencies:
-    "@lerna/validation-error" "3.13.0"
-    multimatch "^3.0.0"
+    "@lerna/validation-error" "4.0.0"
+    multimatch "^5.0.0"
     npmlog "^4.1.2"
 
-"@lerna/get-npm-exec-opts@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-3.13.0.tgz#d1b552cb0088199fc3e7e126f914e39a08df9ea5"
-  integrity sha512-Y0xWL0rg3boVyJk6An/vurKzubyJKtrxYv2sj4bB8Mc5zZ3tqtv0ccbOkmkXKqbzvNNF7VeUt1OJ3DRgtC/QZw==
+"@lerna/get-npm-exec-opts@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-4.0.0.tgz#dc955be94a4ae75c374ef9bce91320887d34608f"
+  integrity sha512-yvmkerU31CTWS2c7DvmAWmZVeclPBqI7gPVr5VATUKNWJ/zmVcU4PqbYoLu92I9Qc4gY1TuUplMNdNuZTSL7IQ==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/get-packed@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/get-packed/-/get-packed-3.16.0.tgz#1b316b706dcee86c7baa55e50b087959447852ff"
-  integrity sha512-AjsFiaJzo1GCPnJUJZiTW6J1EihrPkc2y3nMu6m3uWFxoleklsSCyImumzVZJssxMi3CPpztj8LmADLedl9kXw==
+"@lerna/get-packed@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/get-packed/-/get-packed-4.0.0.tgz#0989d61624ac1f97e393bdad2137c49cd7a37823"
+  integrity sha512-rfWONRsEIGyPJTxFzC8ECb3ZbsDXJbfqWYyeeQQDrJRPnEJErlltRLPLgC2QWbxFgFPsoDLeQmFHJnf0iDfd8w==
   dependencies:
-    fs-extra "^8.1.0"
-    ssri "^6.0.1"
-    tar "^4.4.8"
+    fs-extra "^9.1.0"
+    ssri "^8.0.1"
+    tar "^6.1.0"
 
-"@lerna/github-client@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/github-client/-/github-client-3.22.0.tgz#5d816aa4f76747ed736ae64ff962b8f15c354d95"
-  integrity sha512-O/GwPW+Gzr3Eb5bk+nTzTJ3uv+jh5jGho9BOqKlajXaOkMYGBELEAqV5+uARNGWZFvYAiF4PgqHb6aCUu7XdXg==
+"@lerna/github-client@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/github-client/-/github-client-4.0.0.tgz#2ced67721363ef70f8e12ffafce4410918f4a8a4"
+  integrity sha512-2jhsldZtTKXYUBnOm23Lb0Fx8G4qfSXF9y7UpyUgWUj+YZYd+cFxSuorwQIgk5P4XXrtVhsUesIsli+BYSThiw==
   dependencies:
-    "@lerna/child-process" "3.16.5"
+    "@lerna/child-process" "4.0.0"
     "@octokit/plugin-enterprise-rest" "^6.0.1"
-    "@octokit/rest" "^16.28.4"
-    git-url-parse "^11.1.2"
+    "@octokit/rest" "^18.1.0"
+    git-url-parse "^11.4.4"
     npmlog "^4.1.2"
 
-"@lerna/gitlab-client@3.15.0":
-  version "3.15.0"
-  resolved "https://registry.yarnpkg.com/@lerna/gitlab-client/-/gitlab-client-3.15.0.tgz#91f4ec8c697b5ac57f7f25bd50fe659d24aa96a6"
-  integrity sha512-OsBvRSejHXUBMgwWQqNoioB8sgzL/Pf1pOUhHKtkiMl6aAWjklaaq5HPMvTIsZPfS6DJ9L5OK2GGZuooP/5c8Q==
+"@lerna/gitlab-client@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/gitlab-client/-/gitlab-client-4.0.0.tgz#00dad73379c7b38951d4b4ded043504c14e2b67d"
+  integrity sha512-OMUpGSkeDWFf7BxGHlkbb35T7YHqVFCwBPSIR6wRsszY8PAzCYahtH3IaJzEJyUg6vmZsNl0FSr3pdA2skhxqA==
   dependencies:
-    node-fetch "^2.5.0"
+    node-fetch "^2.6.1"
     npmlog "^4.1.2"
-    whatwg-url "^7.0.0"
-
-"@lerna/global-options@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/global-options/-/global-options-3.13.0.tgz#217662290db06ad9cf2c49d8e3100ee28eaebae1"
-  integrity sha512-SlZvh1gVRRzYLVluz9fryY1nJpZ0FHDGB66U9tFfvnnxmueckRQxLopn3tXj3NU1kc3QANT2I5BsQkOqZ4TEFQ==
-
-"@lerna/has-npm-version@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/has-npm-version/-/has-npm-version-3.16.5.tgz#ab83956f211d8923ea6afe9b979b38cc73b15326"
-  integrity sha512-WL7LycR9bkftyqbYop5rEGJ9sRFIV55tSGmbN1HLrF9idwOCD7CLrT64t235t3t4O5gehDnwKI5h2U3oxTrF8Q==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    semver "^6.2.0"
-
-"@lerna/import@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/import/-/import-3.22.0.tgz#1a5f0394f38e23c4f642a123e5e1517e70d068d2"
-  integrity sha512-uWOlexasM5XR6tXi4YehODtH9Y3OZrFht3mGUFFT3OIl2s+V85xIGFfqFGMTipMPAGb2oF1UBLL48kR43hRsOg==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/validation-error" "3.13.0"
+    whatwg-url "^8.4.0"
+
+"@lerna/global-options@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/global-options/-/global-options-4.0.0.tgz#c7d8b0de6a01d8a845e2621ea89e7f60f18c6a5f"
+  integrity sha512-TRMR8afAHxuYBHK7F++Ogop2a82xQjoGna1dvPOY6ltj/pEx59pdgcJfYcynYqMkFIk8bhLJJN9/ndIfX29FTQ==
+
+"@lerna/has-npm-version@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/has-npm-version/-/has-npm-version-4.0.0.tgz#d3fc3292c545eb28bd493b36e6237cf0279f631c"
+  integrity sha512-LQ3U6XFH8ZmLCsvsgq1zNDqka0Xzjq5ibVN+igAI5ccRWNaUsE/OcmsyMr50xAtNQMYMzmpw5GVLAivT2/YzCg==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    semver "^7.3.4"
+
+"@lerna/import@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/import/-/import-4.0.0.tgz#bde656c4a451fa87ae41733ff8a8da60547c5465"
+  integrity sha512-FaIhd+4aiBousKNqC7TX1Uhe97eNKf5/SC7c5WZANVWtC7aBWdmswwDt3usrzCNpj6/Wwr9EtEbYROzxKH8ffg==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    fs-extra "^8.1.0"
-    p-map-series "^1.0.0"
+    fs-extra "^9.1.0"
+    p-map-series "^2.1.0"
 
-"@lerna/info@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/info/-/info-3.21.0.tgz#76696b676fdb0f35d48c83c63c1e32bb5e37814f"
-  integrity sha512-0XDqGYVBgWxUquFaIptW2bYSIu6jOs1BtkvRTWDDhw4zyEdp6q4eaMvqdSap1CG+7wM5jeLCi6z94wS0AuiuwA==
+"@lerna/info@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/info/-/info-4.0.0.tgz#b9fb0e479d60efe1623603958a831a88b1d7f1fc"
+  integrity sha512-8Uboa12kaCSZEn4XRfPz5KU9XXoexSPS4oeYGj76s2UQb1O1GdnEyfjyNWoUl1KlJ2i/8nxUskpXIftoFYH0/Q==
   dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/output" "3.13.0"
-    envinfo "^7.3.1"
+    "@lerna/command" "4.0.0"
+    "@lerna/output" "4.0.0"
+    envinfo "^7.7.4"
 
-"@lerna/init@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/init/-/init-3.21.0.tgz#1e810934dc8bf4e5386c031041881d3b4096aa5c"
-  integrity sha512-6CM0z+EFUkFfurwdJCR+LQQF6MqHbYDCBPyhu/d086LRf58GtYZYj49J8mKG9ktayp/TOIxL/pKKjgLD8QBPOg==
+"@lerna/init@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/init/-/init-4.0.0.tgz#dadff67e6dfb981e8ccbe0e6a310e837962f6c7a"
+  integrity sha512-wY6kygop0BCXupzWj5eLvTUqdR7vIAm0OgyV9WHpMYQGfs1V22jhztt8mtjCloD/O0nEe4tJhdG62XU5aYmPNQ==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    fs-extra "^8.1.0"
-    p-map "^2.1.0"
-    write-json-file "^3.2.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    fs-extra "^9.1.0"
+    p-map "^4.0.0"
+    write-json-file "^4.3.0"
 
-"@lerna/link@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/link/-/link-3.21.0.tgz#8be68ff0ccee104b174b5bbd606302c2f06e9d9b"
-  integrity sha512-tGu9GxrX7Ivs+Wl3w1+jrLi1nQ36kNI32dcOssij6bg0oZ2M2MDEFI9UF2gmoypTaN9uO5TSsjCFS7aR79HbdQ==
+"@lerna/link@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/link/-/link-4.0.0.tgz#c3a38aabd44279d714e90f2451e31b63f0fb65ba"
+  integrity sha512-KlvPi7XTAcVOByfaLlOeYOfkkDcd+bejpHMCd1KcArcFTwijOwXOVi24DYomIeHvy6HsX/IUquJ4PPUJIeB4+w==
   dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/package-graph" "3.18.5"
-    "@lerna/symlink-dependencies" "3.17.0"
-    p-map "^2.1.0"
-    slash "^2.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/package-graph" "4.0.0"
+    "@lerna/symlink-dependencies" "4.0.0"
+    p-map "^4.0.0"
+    slash "^3.0.0"
 
-"@lerna/list@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/list/-/list-3.21.0.tgz#42f76fafa56dea13b691ec8cab13832691d61da2"
-  integrity sha512-KehRjE83B1VaAbRRkRy6jLX1Cin8ltsrQ7FHf2bhwhRHK0S54YuA6LOoBnY/NtA8bHDX/Z+G5sMY78X30NS9tg==
+"@lerna/list@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/list/-/list-4.0.0.tgz#24b4e6995bd73f81c556793fe502b847efd9d1d7"
+  integrity sha512-L2B5m3P+U4Bif5PultR4TI+KtW+SArwq1i75QZ78mRYxPc0U/piau1DbLOmwrdqr99wzM49t0Dlvl6twd7GHFg==
   dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/listable" "3.18.5"
-    "@lerna/output" "3.13.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/listable" "4.0.0"
+    "@lerna/output" "4.0.0"
 
-"@lerna/listable@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/listable/-/listable-3.18.5.tgz#e82798405b5ed8fc51843c8ef1e7a0e497388a1a"
-  integrity sha512-Sdr3pVyaEv5A7ZkGGYR7zN+tTl2iDcinryBPvtuv20VJrXBE8wYcOks1edBTcOWsPjCE/rMP4bo1pseyk3UTsg==
+"@lerna/listable@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/listable/-/listable-4.0.0.tgz#d00d6cb4809b403f2b0374fc521a78e318b01214"
+  integrity sha512-/rPOSDKsOHs5/PBLINZOkRIX1joOXUXEtyUs5DHLM8q6/RP668x/1lFhw6Dx7/U+L0+tbkpGtZ1Yt0LewCLgeQ==
   dependencies:
-    "@lerna/query-graph" "3.18.5"
-    chalk "^2.3.1"
+    "@lerna/query-graph" "4.0.0"
+    chalk "^4.1.0"
     columnify "^1.5.4"
 
-"@lerna/log-packed@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/log-packed/-/log-packed-3.16.0.tgz#f83991041ee77b2495634e14470b42259fd2bc16"
-  integrity sha512-Fp+McSNBV/P2mnLUYTaSlG8GSmpXM7krKWcllqElGxvAqv6chk2K3c2k80MeVB4WvJ9tRjUUf+i7HUTiQ9/ckQ==
+"@lerna/log-packed@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/log-packed/-/log-packed-4.0.0.tgz#95168fe2e26ac6a71e42f4be857519b77e57a09f"
+  integrity sha512-+dpCiWbdzgMAtpajLToy9PO713IHoE6GV/aizXycAyA07QlqnkpaBNZ8DW84gHdM1j79TWockGJo9PybVhrrZQ==
   dependencies:
-    byte-size "^5.0.1"
+    byte-size "^7.0.0"
     columnify "^1.5.4"
     has-unicode "^2.0.1"
     npmlog "^4.1.2"
 
-"@lerna/npm-conf@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-conf/-/npm-conf-3.16.0.tgz#1c10a89ae2f6c2ee96962557738685300d376827"
-  integrity sha512-HbO3DUrTkCAn2iQ9+FF/eisDpWY5POQAOF1m7q//CZjdC2HSW3UYbKEGsSisFxSfaF9Z4jtrV+F/wX6qWs3CuA==
+"@lerna/npm-conf@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-conf/-/npm-conf-4.0.0.tgz#b259fd1e1cee2bf5402b236e770140ff9ade7fd2"
+  integrity sha512-uS7H02yQNq3oejgjxAxqq/jhwGEE0W0ntr8vM3EfpCW1F/wZruwQw+7bleJQ9vUBjmdXST//tk8mXzr5+JXCfw==
   dependencies:
-    config-chain "^1.1.11"
-    pify "^4.0.1"
+    config-chain "^1.1.12"
+    pify "^5.0.0"
 
-"@lerna/npm-dist-tag@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-dist-tag/-/npm-dist-tag-3.18.5.tgz#9ef9abb7c104077b31f6fab22cc73b314d54ac55"
-  integrity sha512-xw0HDoIG6HreVsJND9/dGls1c+lf6vhu7yJoo56Sz5bvncTloYGLUppIfDHQr4ZvmPCK8rsh0euCVh2giPxzKQ==
+"@lerna/npm-dist-tag@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-dist-tag/-/npm-dist-tag-4.0.0.tgz#d1e99b4eccd3414142f0548ad331bf2d53f3257a"
+  integrity sha512-F20sg28FMYTgXqEQihgoqSfwmq+Id3zT23CnOwD+XQMPSy9IzyLf1fFVH319vXIw6NF6Pgs4JZN2Qty6/CQXGw==
   dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    "@lerna/otplease" "3.18.5"
-    figgy-pudding "^3.5.1"
-    npm-package-arg "^6.1.0"
+    "@lerna/otplease" "4.0.0"
+    npm-package-arg "^8.1.0"
+    npm-registry-fetch "^9.0.0"
     npmlog "^4.1.2"
 
-"@lerna/npm-install@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-install/-/npm-install-3.16.5.tgz#d6bfdc16f81285da66515ae47924d6e278d637d3"
-  integrity sha512-hfiKk8Eku6rB9uApqsalHHTHY+mOrrHeWEs+gtg7+meQZMTS3kzv4oVp5cBZigndQr3knTLjwthT/FX4KvseFg==
+"@lerna/npm-install@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-install/-/npm-install-4.0.0.tgz#31180be3ab3b7d1818a1a0c206aec156b7094c78"
+  integrity sha512-aKNxq2j3bCH3eXl3Fmu4D54s/YLL9WSwV8W7X2O25r98wzrO38AUN6AB9EtmAx+LV/SP15et7Yueg9vSaanRWg==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/get-npm-exec-opts" "3.13.0"
-    fs-extra "^8.1.0"
-    npm-package-arg "^6.1.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/get-npm-exec-opts" "4.0.0"
+    fs-extra "^9.1.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    signal-exit "^3.0.2"
-    write-pkg "^3.1.0"
-
-"@lerna/npm-publish@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-publish/-/npm-publish-3.18.5.tgz#240e4039959fd9816b49c5b07421e11b5cb000af"
-  integrity sha512-3etLT9+2L8JAx5F8uf7qp6iAtOLSMj+ZYWY6oUgozPi/uLqU0/gsMsEXh3F0+YVW33q0M61RpduBoAlOOZnaTg==
-  dependencies:
-    "@evocateur/libnpmpublish" "^1.2.2"
-    "@lerna/otplease" "3.18.5"
-    "@lerna/run-lifecycle" "3.16.2"
-    figgy-pudding "^3.5.1"
-    fs-extra "^8.1.0"
-    npm-package-arg "^6.1.0"
+    signal-exit "^3.0.3"
+    write-pkg "^4.0.0"
+
+"@lerna/npm-publish@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-publish/-/npm-publish-4.0.0.tgz#84eb62e876fe949ae1fd62c60804423dbc2c4472"
+  integrity sha512-vQb7yAPRo5G5r77DRjHITc9piR9gvEKWrmfCH7wkfBnGWEqu7n8/4bFQ7lhnkujvc8RXOsYpvbMQkNfkYibD/w==
+  dependencies:
+    "@lerna/otplease" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    fs-extra "^9.1.0"
+    libnpmpublish "^4.0.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    pify "^4.0.1"
-    read-package-json "^2.0.13"
+    pify "^5.0.0"
+    read-package-json "^3.0.0"
 
-"@lerna/npm-run-script@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-run-script/-/npm-run-script-3.16.5.tgz#9c2ec82453a26c0b46edc0bb7c15816c821f5c15"
-  integrity sha512-1asRi+LjmVn3pMjEdpqKJZFT/3ZNpb+VVeJMwrJaV/3DivdNg7XlPK9LTrORuKU4PSvhdEZvJmSlxCKyDpiXsQ==
+"@lerna/npm-run-script@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-run-script/-/npm-run-script-4.0.0.tgz#dfebf4f4601442e7c0b5214f9fb0d96c9350743b"
+  integrity sha512-Jmyh9/IwXJjOXqKfIgtxi0bxi1pUeKe5bD3S81tkcy+kyng/GNj9WSqD5ZggoNP2NP//s4CLDAtUYLdP7CU9rA==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/get-npm-exec-opts" "3.13.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/get-npm-exec-opts" "4.0.0"
     npmlog "^4.1.2"
 
-"@lerna/otplease@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/otplease/-/otplease-3.18.5.tgz#b77b8e760b40abad9f7658d988f3ea77d4fd0231"
-  integrity sha512-S+SldXAbcXTEDhzdxYLU0ZBKuYyURP/ND2/dK6IpKgLxQYh/z4ScljPDMyKymmEvgiEJmBsPZAAPfmNPEzxjog==
+"@lerna/otplease@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/otplease/-/otplease-4.0.0.tgz#84972eb43448f8a1077435ba1c5e59233b725850"
+  integrity sha512-Sgzbqdk1GH4psNiT6hk+BhjOfIr/5KhGBk86CEfHNJTk9BK4aZYyJD4lpDbDdMjIV4g03G7pYoqHzH765T4fxw==
   dependencies:
-    "@lerna/prompt" "3.18.5"
-    figgy-pudding "^3.5.1"
+    "@lerna/prompt" "4.0.0"
 
-"@lerna/output@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/output/-/output-3.13.0.tgz#3ded7cc908b27a9872228a630d950aedae7a4989"
-  integrity sha512-7ZnQ9nvUDu/WD+bNsypmPG5MwZBwu86iRoiW6C1WBuXXDxM5cnIAC1m2WxHeFnjyMrYlRXM9PzOQ9VDD+C15Rg==
+"@lerna/output@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/output/-/output-4.0.0.tgz#b1d72215c0e35483e4f3e9994debc82c621851f2"
+  integrity sha512-Un1sHtO1AD7buDQrpnaYTi2EG6sLF+KOPEAMxeUYG5qG3khTs2Zgzq5WE3dt2N/bKh7naESt20JjIW6tBELP0w==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/pack-directory@3.16.4":
-  version "3.16.4"
-  resolved "https://registry.yarnpkg.com/@lerna/pack-directory/-/pack-directory-3.16.4.tgz#3eae5f91bdf5acfe0384510ed53faddc4c074693"
-  integrity sha512-uxSF0HZeGyKaaVHz5FroDY9A5NDDiCibrbYR6+khmrhZtY0Bgn6hWq8Gswl9iIlymA+VzCbshWIMX4o2O8C8ng==
+"@lerna/pack-directory@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/pack-directory/-/pack-directory-4.0.0.tgz#8b617db95d20792f043aaaa13a9ccc0e04cb4c74"
+  integrity sha512-NJrmZNmBHS+5aM+T8N6FVbaKFScVqKlQFJNY2k7nsJ/uklNKsLLl6VhTQBPwMTbf6Tf7l6bcKzpy7aePuq9UiQ==
   dependencies:
-    "@lerna/get-packed" "3.16.0"
-    "@lerna/package" "3.16.0"
-    "@lerna/run-lifecycle" "3.16.2"
-    figgy-pudding "^3.5.1"
-    npm-packlist "^1.4.4"
+    "@lerna/get-packed" "4.0.0"
+    "@lerna/package" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    npm-packlist "^2.1.4"
     npmlog "^4.1.2"
-    tar "^4.4.10"
-    temp-write "^3.4.0"
+    tar "^6.1.0"
+    temp-write "^4.0.0"
 
-"@lerna/package-graph@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/package-graph/-/package-graph-3.18.5.tgz#c740e2ea3578d059e551633e950690831b941f6b"
-  integrity sha512-8QDrR9T+dBegjeLr+n9WZTVxUYUhIUjUgZ0gvNxUBN8S1WB9r6H5Yk56/MVaB64tA3oGAN9IIxX6w0WvTfFudA==
+"@lerna/package-graph@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/package-graph/-/package-graph-4.0.0.tgz#16a00253a8ac810f72041481cb46bcee8d8123dd"
+  integrity sha512-QED2ZCTkfXMKFoTGoccwUzjHtZMSf3UKX14A4/kYyBms9xfFsesCZ6SLI5YeySEgcul8iuIWfQFZqRw+Qrjraw==
   dependencies:
-    "@lerna/prerelease-id-from-version" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
-    npm-package-arg "^6.1.0"
+    "@lerna/prerelease-id-from-version" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    semver "^6.2.0"
+    semver "^7.3.4"
 
-"@lerna/package@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/package/-/package-3.16.0.tgz#7e0a46e4697ed8b8a9c14d59c7f890e0d38ba13c"
-  integrity sha512-2lHBWpaxcBoiNVbtyLtPUuTYEaB/Z+eEqRS9duxpZs6D+mTTZMNy6/5vpEVSCBmzvdYpyqhqaYjjSLvjjr5Riw==
+"@lerna/package@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/package/-/package-4.0.0.tgz#1b4c259c4bcff45c876ee1d591a043aacbc0d6b7"
+  integrity sha512-l0M/izok6FlyyitxiQKr+gZLVFnvxRQdNhzmQ6nRnN9dvBJWn+IxxpM+cLqGACatTnyo9LDzNTOj2Db3+s0s8Q==
   dependencies:
-    load-json-file "^5.3.0"
-    npm-package-arg "^6.1.0"
-    write-pkg "^3.1.0"
+    load-json-file "^6.2.0"
+    npm-package-arg "^8.1.0"
+    write-pkg "^4.0.0"
 
-"@lerna/prerelease-id-from-version@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/prerelease-id-from-version/-/prerelease-id-from-version-3.16.0.tgz#b24bfa789f5e1baab914d7b08baae9b7bd7d83a1"
-  integrity sha512-qZyeUyrE59uOK8rKdGn7jQz+9uOpAaF/3hbslJVFL1NqF9ELDTqjCPXivuejMX/lN4OgD6BugTO4cR7UTq/sZA==
+"@lerna/prerelease-id-from-version@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/prerelease-id-from-version/-/prerelease-id-from-version-4.0.0.tgz#c7e0676fcee1950d85630e108eddecdd5b48c916"
+  integrity sha512-GQqguzETdsYRxOSmdFZ6zDBXDErIETWOqomLERRY54f4p+tk4aJjoVdd9xKwehC9TBfIFvlRbL1V9uQGHh1opg==
   dependencies:
-    semver "^6.2.0"
+    semver "^7.3.4"
 
-"@lerna/profiler@3.20.0":
-  version "3.20.0"
-  resolved "https://registry.yarnpkg.com/@lerna/profiler/-/profiler-3.20.0.tgz#0f6dc236f4ea8f9ea5f358c6703305a4f32ad051"
-  integrity sha512-bh8hKxAlm6yu8WEOvbLENm42i2v9SsR4WbrCWSbsmOElx3foRnMlYk7NkGECa+U5c3K4C6GeBbwgqs54PP7Ljg==
+"@lerna/profiler@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/profiler/-/profiler-4.0.0.tgz#8a53ab874522eae15d178402bff90a14071908e9"
+  integrity sha512-/BaEbqnVh1LgW/+qz8wCuI+obzi5/vRE8nlhjPzdEzdmWmZXuCKyWSEzAyHOJWw1ntwMiww5dZHhFQABuoFz9Q==
   dependencies:
-    figgy-pudding "^3.5.1"
-    fs-extra "^8.1.0"
+    fs-extra "^9.1.0"
     npmlog "^4.1.2"
-    upath "^1.2.0"
+    upath "^2.0.1"
 
-"@lerna/project@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/project/-/project-3.21.0.tgz#5d784d2d10c561a00f20320bcdb040997c10502d"
-  integrity sha512-xT1mrpET2BF11CY32uypV2GPtPVm6Hgtha7D81GQP9iAitk9EccrdNjYGt5UBYASl4CIDXBRxwmTTVGfrCx82A==
+"@lerna/project@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/project/-/project-4.0.0.tgz#ff84893935833533a74deff30c0e64ddb7f0ba6b"
+  integrity sha512-o0MlVbDkD5qRPkFKlBZsXZjoNTWPyuL58564nSfZJ6JYNmgAptnWPB2dQlAc7HWRZkmnC2fCkEdoU+jioPavbg==
   dependencies:
-    "@lerna/package" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
-    cosmiconfig "^5.1.0"
+    "@lerna/package" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    cosmiconfig "^7.0.0"
     dedent "^0.7.0"
-    dot-prop "^4.2.0"
-    glob-parent "^5.0.0"
-    globby "^9.2.0"
-    load-json-file "^5.3.0"
+    dot-prop "^6.0.1"
+    glob-parent "^5.1.1"
+    globby "^11.0.2"
+    load-json-file "^6.2.0"
     npmlog "^4.1.2"
-    p-map "^2.1.0"
-    resolve-from "^4.0.0"
-    write-json-file "^3.2.0"
+    p-map "^4.0.0"
+    resolve-from "^5.0.0"
+    write-json-file "^4.3.0"
 
-"@lerna/prompt@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/prompt/-/prompt-3.18.5.tgz#628cd545f225887d060491ab95df899cfc5218a1"
-  integrity sha512-rkKj4nm1twSbBEb69+Em/2jAERK8htUuV8/xSjN0NPC+6UjzAwY52/x9n5cfmpa9lyKf/uItp7chCI7eDmNTKQ==
+"@lerna/prompt@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/prompt/-/prompt-4.0.0.tgz#5ec69a803f3f0db0ad9f221dad64664d3daca41b"
+  integrity sha512-4Ig46oCH1TH5M7YyTt53fT6TuaKMgqUUaqdgxvp6HP6jtdak6+amcsqB8YGz2eQnw/sdxunx84DfI9XpoLj4bQ==
   dependencies:
-    inquirer "^6.2.0"
+    inquirer "^7.3.3"
     npmlog "^4.1.2"
 
-"@lerna/publish@3.22.1":
-  version "3.22.1"
-  resolved "https://registry.yarnpkg.com/@lerna/publish/-/publish-3.22.1.tgz#b4f7ce3fba1e9afb28be4a1f3d88222269ba9519"
-  integrity sha512-PG9CM9HUYDreb1FbJwFg90TCBQooGjj+n/pb3gw/eH5mEDq0p8wKdLFe0qkiqUkm/Ub5C8DbVFertIo0Vd0zcw==
-  dependencies:
-    "@evocateur/libnpmaccess" "^3.1.2"
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    "@evocateur/pacote" "^9.6.3"
-    "@lerna/check-working-tree" "3.16.5"
-    "@lerna/child-process" "3.16.5"
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/describe-ref" "3.16.5"
-    "@lerna/log-packed" "3.16.0"
-    "@lerna/npm-conf" "3.16.0"
-    "@lerna/npm-dist-tag" "3.18.5"
-    "@lerna/npm-publish" "3.18.5"
-    "@lerna/otplease" "3.18.5"
-    "@lerna/output" "3.13.0"
-    "@lerna/pack-directory" "3.16.4"
-    "@lerna/prerelease-id-from-version" "3.16.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/run-lifecycle" "3.16.2"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/validation-error" "3.13.0"
-    "@lerna/version" "3.22.1"
-    figgy-pudding "^3.5.1"
-    fs-extra "^8.1.0"
-    npm-package-arg "^6.1.0"
+"@lerna/publish@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/publish/-/publish-4.0.0.tgz#f67011305adeba120066a3b6d984a5bb5fceef65"
+  integrity sha512-K8jpqjHrChH22qtkytA5GRKIVFEtqBF6JWj1I8dWZtHs4Jywn8yB1jQ3BAMLhqmDJjWJtRck0KXhQQKzDK2UPg==
+  dependencies:
+    "@lerna/check-working-tree" "4.0.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/describe-ref" "4.0.0"
+    "@lerna/log-packed" "4.0.0"
+    "@lerna/npm-conf" "4.0.0"
+    "@lerna/npm-dist-tag" "4.0.0"
+    "@lerna/npm-publish" "4.0.0"
+    "@lerna/otplease" "4.0.0"
+    "@lerna/output" "4.0.0"
+    "@lerna/pack-directory" "4.0.0"
+    "@lerna/prerelease-id-from-version" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    "@lerna/version" "4.0.0"
+    fs-extra "^9.1.0"
+    libnpmaccess "^4.0.1"
+    npm-package-arg "^8.1.0"
+    npm-registry-fetch "^9.0.0"
     npmlog "^4.1.2"
-    p-finally "^1.0.0"
-    p-map "^2.1.0"
-    p-pipe "^1.2.0"
-    semver "^6.2.0"
+    p-map "^4.0.0"
+    p-pipe "^3.1.0"
+    pacote "^11.2.6"
+    semver "^7.3.4"
 
-"@lerna/pulse-till-done@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/pulse-till-done/-/pulse-till-done-3.13.0.tgz#c8e9ce5bafaf10d930a67d7ed0ccb5d958fe0110"
-  integrity sha512-1SOHpy7ZNTPulzIbargrgaJX387csN7cF1cLOGZiJQA6VqnS5eWs2CIrG8i8wmaUavj2QlQ5oEbRMVVXSsGrzA==
+"@lerna/pulse-till-done@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/pulse-till-done/-/pulse-till-done-4.0.0.tgz#04bace7d483a8205c187b806bcd8be23d7bb80a3"
+  integrity sha512-Frb4F7QGckaybRhbF7aosLsJ5e9WuH7h0KUkjlzSByVycxY91UZgaEIVjS2oN9wQLrheLMHl6SiFY0/Pvo0Cxg==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/query-graph@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/query-graph/-/query-graph-3.18.5.tgz#df4830bb5155273003bf35e8dda1c32d0927bd86"
-  integrity sha512-50Lf4uuMpMWvJ306be3oQDHrWV42nai9gbIVByPBYJuVW8dT8O8pA3EzitNYBUdLL9/qEVbrR0ry1HD7EXwtRA==
+"@lerna/query-graph@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/query-graph/-/query-graph-4.0.0.tgz#09dd1c819ac5ee3f38db23931143701f8a6eef63"
+  integrity sha512-YlP6yI3tM4WbBmL9GCmNDoeQyzcyg1e4W96y/PKMZa5GbyUvkS2+Jc2kwPD+5KcXou3wQZxSPzR3Te5OenaDdg==
   dependencies:
-    "@lerna/package-graph" "3.18.5"
-    figgy-pudding "^3.5.1"
+    "@lerna/package-graph" "4.0.0"
 
-"@lerna/resolve-symlink@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/resolve-symlink/-/resolve-symlink-3.16.0.tgz#37fc7095fabdbcf317c26eb74e0d0bde8efd2386"
-  integrity sha512-Ibj5e7njVHNJ/NOqT4HlEgPFPtPLWsO7iu59AM5bJDcAJcR96mLZ7KGVIsS2tvaO7akMEJvt2P+ErwCdloG3jQ==
+"@lerna/resolve-symlink@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/resolve-symlink/-/resolve-symlink-4.0.0.tgz#6d006628a210c9b821964657a9e20a8c9a115e14"
+  integrity sha512-RtX8VEUzqT+uLSCohx8zgmjc6zjyRlh6i/helxtZTMmc4+6O4FS9q5LJas2uGO2wKvBlhcD6siibGt7dIC3xZA==
   dependencies:
-    fs-extra "^8.1.0"
+    fs-extra "^9.1.0"
     npmlog "^4.1.2"
-    read-cmd-shim "^1.0.1"
+    read-cmd-shim "^2.0.0"
 
-"@lerna/rimraf-dir@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/rimraf-dir/-/rimraf-dir-3.16.5.tgz#04316ab5ffd2909657aaf388ea502cb8c2f20a09"
-  integrity sha512-bQlKmO0pXUsXoF8lOLknhyQjOZsCc0bosQDoX4lujBXSWxHVTg1VxURtWf2lUjz/ACsJVDfvHZbDm8kyBk5okA==
+"@lerna/rimraf-dir@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/rimraf-dir/-/rimraf-dir-4.0.0.tgz#2edf3b62d4eb0ef4e44e430f5844667d551ec25a"
+  integrity sha512-QNH9ABWk9mcMJh2/muD9iYWBk1oQd40y6oH+f3wwmVGKYU5YJD//+zMiBI13jxZRtwBx0vmBZzkBkK1dR11cBg==
   dependencies:
-    "@lerna/child-process" "3.16.5"
+    "@lerna/child-process" "4.0.0"
     npmlog "^4.1.2"
-    path-exists "^3.0.0"
-    rimraf "^2.6.2"
+    path-exists "^4.0.0"
+    rimraf "^3.0.2"
 
-"@lerna/run-lifecycle@3.16.2":
-  version "3.16.2"
-  resolved "https://registry.yarnpkg.com/@lerna/run-lifecycle/-/run-lifecycle-3.16.2.tgz#67b288f8ea964db9ea4fb1fbc7715d5bbb0bce00"
-  integrity sha512-RqFoznE8rDpyyF0rOJy3+KjZCeTkO8y/OB9orPauR7G2xQ7PTdCpgo7EO6ZNdz3Al+k1BydClZz/j78gNCmL2A==
+"@lerna/run-lifecycle@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/run-lifecycle/-/run-lifecycle-4.0.0.tgz#e648a46f9210a9bcd7c391df6844498cb5079334"
+  integrity sha512-IwxxsajjCQQEJAeAaxF8QdEixfI7eLKNm4GHhXHrgBu185JcwScFZrj9Bs+PFKxwb+gNLR4iI5rpUdY8Y0UdGQ==
   dependencies:
-    "@lerna/npm-conf" "3.16.0"
-    figgy-pudding "^3.5.1"
-    npm-lifecycle "^3.1.2"
+    "@lerna/npm-conf" "4.0.0"
+    npm-lifecycle "^3.1.5"
     npmlog "^4.1.2"
 
-"@lerna/run-topologically@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/run-topologically/-/run-topologically-3.18.5.tgz#3cd639da20e967d7672cb88db0f756b92f2fdfc3"
-  integrity sha512-6N1I+6wf4hLOnPW+XDZqwufyIQ6gqoPfHZFkfWlvTQ+Ue7CuF8qIVQ1Eddw5HKQMkxqN10thKOFfq/9NQZ4NUg==
+"@lerna/run-topologically@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/run-topologically/-/run-topologically-4.0.0.tgz#af846eeee1a09b0c2be0d1bfb5ef0f7b04bb1827"
+  integrity sha512-EVZw9hGwo+5yp+VL94+NXRYisqgAlj0jWKWtAIynDCpghRxCE5GMO3xrQLmQgqkpUl9ZxQFpICgYv5DW4DksQA==
   dependencies:
-    "@lerna/query-graph" "3.18.5"
-    figgy-pudding "^3.5.1"
-    p-queue "^4.0.0"
+    "@lerna/query-graph" "4.0.0"
+    p-queue "^6.6.2"
 
-"@lerna/run@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/run/-/run-3.21.0.tgz#2a35ec84979e4d6e42474fe148d32e5de1cac891"
-  integrity sha512-fJF68rT3veh+hkToFsBmUJ9MHc9yGXA7LSDvhziAojzOb0AI/jBDp6cEcDQyJ7dbnplba2Lj02IH61QUf9oW0Q==
-  dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/npm-run-script" "3.16.5"
-    "@lerna/output" "3.13.0"
-    "@lerna/profiler" "3.20.0"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/timer" "3.13.0"
-    "@lerna/validation-error" "3.13.0"
-    p-map "^2.1.0"
-
-"@lerna/symlink-binary@3.17.0":
-  version "3.17.0"
-  resolved "https://registry.yarnpkg.com/@lerna/symlink-binary/-/symlink-binary-3.17.0.tgz#8f8031b309863814883d3f009877f82e38aef45a"
-  integrity sha512-RLpy9UY6+3nT5J+5jkM5MZyMmjNHxZIZvXLV+Q3MXrf7Eaa1hNqyynyj4RO95fxbS+EZc4XVSk25DGFQbcRNSQ==
-  dependencies:
-    "@lerna/create-symlink" "3.16.2"
-    "@lerna/package" "3.16.0"
-    fs-extra "^8.1.0"
-    p-map "^2.1.0"
-
-"@lerna/symlink-dependencies@3.17.0":
-  version "3.17.0"
-  resolved "https://registry.yarnpkg.com/@lerna/symlink-dependencies/-/symlink-dependencies-3.17.0.tgz#48d6360e985865a0e56cd8b51b308a526308784a"
-  integrity sha512-KmjU5YT1bpt6coOmdFueTJ7DFJL4H1w5eF8yAQ2zsGNTtZ+i5SGFBWpb9AQaw168dydc3s4eu0W0Sirda+F59Q==
-  dependencies:
-    "@lerna/create-symlink" "3.16.2"
-    "@lerna/resolve-symlink" "3.16.0"
-    "@lerna/symlink-binary" "3.17.0"
-    fs-extra "^8.1.0"
-    p-finally "^1.0.0"
-    p-map "^2.1.0"
-    p-map-series "^1.0.0"
+"@lerna/run@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/run/-/run-4.0.0.tgz#4bc7fda055a729487897c23579694f6183c91262"
+  integrity sha512-9giulCOzlMPzcZS/6Eov6pxE9gNTyaXk0Man+iCIdGJNMrCnW7Dme0Z229WWP/UoxDKg71F2tMsVVGDiRd8fFQ==
+  dependencies:
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/npm-run-script" "4.0.0"
+    "@lerna/output" "4.0.0"
+    "@lerna/profiler" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/timer" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    p-map "^4.0.0"
 
-"@lerna/timer@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/timer/-/timer-3.13.0.tgz#bcd0904551db16e08364d6c18e5e2160fc870781"
-  integrity sha512-RHWrDl8U4XNPqY5MQHkToWS9jHPnkLZEt5VD+uunCKTfzlxGnRCr3/zVr8VGy/uENMYpVP3wJa4RKGY6M0vkRw==
+"@lerna/symlink-binary@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/symlink-binary/-/symlink-binary-4.0.0.tgz#21009f62d53a425f136cb4c1a32c6b2a0cc02d47"
+  integrity sha512-zualodWC4q1QQc1pkz969hcFeWXOsVYZC5AWVtAPTDfLl+TwM7eG/O6oP+Rr3fFowspxo6b1TQ6sYfDV6HXNWA==
+  dependencies:
+    "@lerna/create-symlink" "4.0.0"
+    "@lerna/package" "4.0.0"
+    fs-extra "^9.1.0"
+    p-map "^4.0.0"
+
+"@lerna/symlink-dependencies@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/symlink-dependencies/-/symlink-dependencies-4.0.0.tgz#8910eca084ae062642d0490d8972cf2d98e9ebbd"
+  integrity sha512-BABo0MjeUHNAe2FNGty1eantWp8u83BHSeIMPDxNq0MuW2K3CiQRaeWT3EGPAzXpGt0+hVzBrA6+OT0GPn7Yuw==
+  dependencies:
+    "@lerna/create-symlink" "4.0.0"
+    "@lerna/resolve-symlink" "4.0.0"
+    "@lerna/symlink-binary" "4.0.0"
+    fs-extra "^9.1.0"
+    p-map "^4.0.0"
+    p-map-series "^2.1.0"
+
+"@lerna/timer@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/timer/-/timer-4.0.0.tgz#a52e51bfcd39bfd768988049ace7b15c1fd7a6da"
+  integrity sha512-WFsnlaE7SdOvjuyd05oKt8Leg3ENHICnvX3uYKKdByA+S3g+TCz38JsNs7OUZVt+ba63nC2nbXDlUnuT2Xbsfg==
 
-"@lerna/validation-error@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/validation-error/-/validation-error-3.13.0.tgz#c86b8f07c5ab9539f775bd8a54976e926f3759c3"
-  integrity sha512-SiJP75nwB8GhgwLKQfdkSnDufAaCbkZWJqEDlKOUPUvVOplRGnfL+BPQZH5nvq2BYSRXsksXWZ4UHVnQZI/HYA==
+"@lerna/validation-error@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/validation-error/-/validation-error-4.0.0.tgz#af9d62fe8304eaa2eb9a6ba1394f9aa807026d35"
+  integrity sha512-1rBOM5/koiVWlRi3V6dB863E1YzJS8v41UtsHgMr6gB2ncJ2LsQtMKlJpi3voqcgh41H8UsPXR58RrrpPpufyw==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/version@3.22.1":
-  version "3.22.1"
-  resolved "https://registry.yarnpkg.com/@lerna/version/-/version-3.22.1.tgz#9805a9247a47ee62d6b81bd9fa5fb728b24b59e2"
-  integrity sha512-PSGt/K1hVqreAFoi3zjD0VEDupQ2WZVlVIwesrE5GbrL2BjXowjCsTDPqblahDUPy0hp6h7E2kG855yLTp62+g==
-  dependencies:
-    "@lerna/check-working-tree" "3.16.5"
-    "@lerna/child-process" "3.16.5"
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/conventional-commits" "3.22.0"
-    "@lerna/github-client" "3.22.0"
-    "@lerna/gitlab-client" "3.15.0"
-    "@lerna/output" "3.13.0"
-    "@lerna/prerelease-id-from-version" "3.16.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/run-lifecycle" "3.16.2"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/validation-error" "3.13.0"
-    chalk "^2.3.1"
+"@lerna/version@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/version/-/version-4.0.0.tgz#532659ec6154d8a8789c5ab53878663e244e3228"
+  integrity sha512-otUgiqs5W9zGWJZSCCMRV/2Zm2A9q9JwSDS7s/tlKq4mWCYriWo7+wsHEA/nPTMDyYyBO5oyZDj+3X50KDUzeA==
+  dependencies:
+    "@lerna/check-working-tree" "4.0.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/conventional-commits" "4.0.0"
+    "@lerna/github-client" "4.0.0"
+    "@lerna/gitlab-client" "4.0.0"
+    "@lerna/output" "4.0.0"
+    "@lerna/prerelease-id-from-version" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    chalk "^4.1.0"
     dedent "^0.7.0"
-    load-json-file "^5.3.0"
+    load-json-file "^6.2.0"
     minimatch "^3.0.4"
     npmlog "^4.1.2"
-    p-map "^2.1.0"
-    p-pipe "^1.2.0"
-    p-reduce "^1.0.0"
-    p-waterfall "^1.0.0"
-    semver "^6.2.0"
-    slash "^2.0.0"
-    temp-write "^3.4.0"
-    write-json-file "^3.2.0"
+    p-map "^4.0.0"
+    p-pipe "^3.1.0"
+    p-reduce "^2.1.0"
+    p-waterfall "^2.1.1"
+    semver "^7.3.4"
+    slash "^3.0.0"
+    temp-write "^4.0.0"
+    write-json-file "^4.3.0"
 
-"@lerna/write-log-file@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/write-log-file/-/write-log-file-3.13.0.tgz#b78d9e4cfc1349a8be64d91324c4c8199e822a26"
-  integrity sha512-RibeMnDPvlL8bFYW5C8cs4mbI3AHfQef73tnJCQ/SgrXZHehmHnsyWUiE7qDQCAo+B1RfTapvSyFF69iPj326A==
+"@lerna/write-log-file@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/write-log-file/-/write-log-file-4.0.0.tgz#18221a38a6a307d6b0a5844dd592ad53fa27091e"
+  integrity sha512-XRG5BloiArpXRakcnPHmEHJp+4AtnhRtpDIHSghmXD5EichI1uD73J7FgPp30mm2pDRq3FdqB0NbwSEsJ9xFQg==
   dependencies:
     npmlog "^4.1.2"
-    write-file-atomic "^2.3.0"
+    write-file-atomic "^3.0.3"
+
+"@mattiasbuelens/web-streams-adapter@~0.1.0":
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/@mattiasbuelens/web-streams-adapter/-/web-streams-adapter-0.1.0.tgz#607b5a25682f4ae2741da7ba6df39302505336b3"
+  integrity sha512-oV4PyZfwJNtmFWhvlJLqYIX1Nn22ML8FZpS16ZUKv0hg7414xV1fjsGqxQzLT2dyK92TKxsJSwMOd7VNHAtPmA==
 
 "@mrmlnc/readdir-enhanced@^2.2.1":
   version "2.2.1"
@@ -1324,18 +1276,18 @@
     call-me-maybe "^1.0.1"
     glob-to-regexp "^0.3.0"
 
-"@nodelib/fs.scandir@2.1.4":
-  version "2.1.4"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz#d4b3549a5db5de2683e0c1071ab4f140904bbf69"
-  integrity sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==
+"@nodelib/fs.scandir@2.1.5":
+  version "2.1.5"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
+  integrity sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==
   dependencies:
-    "@nodelib/fs.stat" "2.0.4"
+    "@nodelib/fs.stat" "2.0.5"
     run-parallel "^1.1.9"
 
-"@nodelib/fs.stat@2.0.4", "@nodelib/fs.stat@^2.0.2":
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz#a3f2dd61bab43b8db8fa108a121cfffe4c676655"
-  integrity sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==
+"@nodelib/fs.stat@2.0.5", "@nodelib/fs.stat@^2.0.2":
+  version "2.0.5"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz#5bd262af94e9d25bd1e71b05deed44876a222e8b"
+  integrity sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==
 
 "@nodelib/fs.stat@^1.1.2":
   version "1.1.3"
@@ -1343,13 +1295,40 @@
   integrity sha512-shAmDyaQC4H92APFoIaVDHCx5bStIocgvbwQyxPRrbUY20V1EYTbSDchWbuwlMG3V17cprZhA6+78JfB+3DTPw==
 
 "@nodelib/fs.walk@^1.2.3":
-  version "1.2.6"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz#cce9396b30aa5afe9e3756608f5831adcb53d063"
-  integrity sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==
+  version "1.2.8"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz#e95737e8bb6746ddedf69c556953494f196fe69a"
+  integrity sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==
   dependencies:
-    "@nodelib/fs.scandir" "2.1.4"
+    "@nodelib/fs.scandir" "2.1.5"
     fastq "^1.6.0"
 
+"@npmcli/ci-detect@^1.0.0":
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/@npmcli/ci-detect/-/ci-detect-1.3.0.tgz#6c1d2c625fb6ef1b9dea85ad0a5afcbef85ef22a"
+  integrity sha512-oN3y7FAROHhrAt7Rr7PnTSwrHrZVRTS2ZbyxeQwSSYD0ifwM3YNgQqbaRmjcWoPyq77MjchusjJDspbzMmip1Q==
+
+"@npmcli/git@^2.1.0":
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/@npmcli/git/-/git-2.1.0.tgz#2fbd77e147530247d37f325930d457b3ebe894f6"
+  integrity sha512-/hBFX/QG1b+N7PZBFs0bi+evgRZcK9nWBxQKZkGoXUT5hJSwl5c4d7y8/hm+NQZRPhQ67RzFaj5UM9YeyKoryw==
+  dependencies:
+    "@npmcli/promise-spawn" "^1.3.2"
+    lru-cache "^6.0.0"
+    mkdirp "^1.0.4"
+    npm-pick-manifest "^6.1.1"
+    promise-inflight "^1.0.1"
+    promise-retry "^2.0.1"
+    semver "^7.3.5"
+    which "^2.0.2"
+
+"@npmcli/installed-package-contents@^1.0.6":
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/@npmcli/installed-package-contents/-/installed-package-contents-1.0.7.tgz#ab7408c6147911b970a8abe261ce512232a3f4fa"
+  integrity sha512-9rufe0wnJusCQoLpV9ZPKIVP55itrM5BxOXs10DmdbRfgWtHy1LDyskbwRnBghuB0PrF7pNPOqREVtpz4HqzKw==
+  dependencies:
+    npm-bundled "^1.1.1"
+    npm-normalize-package-bin "^1.0.1"
+
 "@npmcli/move-file@^1.0.1":
   version "1.1.2"
   resolved "https://registry.yarnpkg.com/@npmcli/move-file/-/move-file-1.1.2.tgz#1a82c3e372f7cae9253eb66d72543d6b8685c674"
@@ -1358,119 +1337,142 @@
     mkdirp "^1.0.4"
     rimraf "^3.0.2"
 
-"@octokit/auth-token@^2.4.0":
+"@npmcli/node-gyp@^1.0.2":
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/@npmcli/node-gyp/-/node-gyp-1.0.2.tgz#3cdc1f30e9736dbc417373ed803b42b1a0a29ede"
+  integrity sha512-yrJUe6reVMpktcvagumoqD9r08fH1iRo01gn1u0zoCApa9lnZGEigVKUd2hzsCId4gdtkZZIVscLhNxMECKgRg==
+
+"@npmcli/promise-spawn@^1.2.0", "@npmcli/promise-spawn@^1.3.2":
+  version "1.3.2"
+  resolved "https://registry.yarnpkg.com/@npmcli/promise-spawn/-/promise-spawn-1.3.2.tgz#42d4e56a8e9274fba180dabc0aea6e38f29274f5"
+  integrity sha512-QyAGYo/Fbj4MXeGdJcFzZ+FkDkomfRBrPM+9QYJSg+PxgAUL+LU3FneQk37rKR2/zjqkCV1BLHccX98wRXG3Sg==
+  dependencies:
+    infer-owner "^1.0.4"
+
+"@npmcli/run-script@^1.8.2":
+  version "1.8.5"
+  resolved "https://registry.yarnpkg.com/@npmcli/run-script/-/run-script-1.8.5.tgz#f250a0c5e1a08a792d775a315d0ff42fc3a51e1d"
+  integrity sha512-NQspusBCpTjNwNRFMtz2C5MxoxyzlbuJ4YEhxAKrIonTiirKDtatsZictx9RgamQIx6+QuHMNmPl0wQdoESs9A==
+  dependencies:
+    "@npmcli/node-gyp" "^1.0.2"
+    "@npmcli/promise-spawn" "^1.3.2"
+    infer-owner "^1.0.4"
+    node-gyp "^7.1.0"
+    read-package-json-fast "^2.0.1"
+
+"@octokit/auth-token@^2.4.4":
   version "2.4.5"
   resolved "https://registry.yarnpkg.com/@octokit/auth-token/-/auth-token-2.4.5.tgz#568ccfb8cb46f36441fac094ce34f7a875b197f3"
   integrity sha512-BpGYsPgJt05M7/L/5FoE1PiAbdxXFZkX/3kDYcsvd1v6UhlnE5e96dTDr0ezX/EFwciQxf3cNV0loipsURU+WA==
   dependencies:
     "@octokit/types" "^6.0.3"
 
+"@octokit/core@^3.5.0":
+  version "3.5.1"
+  resolved "https://registry.yarnpkg.com/@octokit/core/-/core-3.5.1.tgz#8601ceeb1ec0e1b1b8217b960a413ed8e947809b"
+  integrity sha512-omncwpLVxMP+GLpLPgeGJBF6IWJFjXDS5flY5VbppePYX9XehevbDykRH9PdCdvqt9TS5AOTiDide7h0qrkHjw==
+  dependencies:
+    "@octokit/auth-token" "^2.4.4"
+    "@octokit/graphql" "^4.5.8"
+    "@octokit/request" "^5.6.0"
+    "@octokit/request-error" "^2.0.5"
+    "@octokit/types" "^6.0.3"
+    before-after-hook "^2.2.0"
+    universal-user-agent "^6.0.0"
+
 "@octokit/endpoint@^6.0.1":
-  version "6.0.11"
-  resolved "https://registry.yarnpkg.com/@octokit/endpoint/-/endpoint-6.0.11.tgz#082adc2aebca6dcefa1fb383f5efb3ed081949d1"
-  integrity sha512-fUIPpx+pZyoLW4GCs3yMnlj2LfoXTWDUVPTC4V3MUEKZm48W+XYpeWSZCv+vYF1ZABUm2CqnDVf1sFtIYrj7KQ==
+  version "6.0.12"
+  resolved "https://registry.yarnpkg.com/@octokit/endpoint/-/endpoint-6.0.12.tgz#3b4d47a4b0e79b1027fb8d75d4221928b2d05658"
+  integrity sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA==
   dependencies:
     "@octokit/types" "^6.0.3"
     is-plain-object "^5.0.0"
     universal-user-agent "^6.0.0"
 
-"@octokit/openapi-types@^6.0.0":
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-6.0.0.tgz#7da8d7d5a72d3282c1a3ff9f951c8133a707480d"
-  integrity sha512-CnDdK7ivHkBtJYzWzZm7gEkanA7gKH6a09Eguz7flHw//GacPJLmkHA3f3N++MJmlxD1Fl+mB7B32EEpSCwztQ==
+"@octokit/graphql@^4.5.8":
+  version "4.6.4"
+  resolved "https://registry.yarnpkg.com/@octokit/graphql/-/graphql-4.6.4.tgz#0c3f5bed440822182e972317122acb65d311a5ed"
+  integrity sha512-SWTdXsVheRmlotWNjKzPOb6Js6tjSqA2a8z9+glDJng0Aqjzti8MEWOtuT8ZSu6wHnci7LZNuarE87+WJBG4vg==
+  dependencies:
+    "@octokit/request" "^5.6.0"
+    "@octokit/types" "^6.0.3"
+    universal-user-agent "^6.0.0"
+
+"@octokit/openapi-types@^9.0.0":
+  version "9.0.0"
+  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-9.0.0.tgz#05d33f999326785445c915d25167d68bd5eddb24"
+  integrity sha512-GSpv5VUFqarOXZl6uWPsDnjChkKCxnaMALmQhzvCWGiMxONQxX7ZwlomCMS+wB1KqxLPCA5n6gYt016oEMkHmQ==
 
 "@octokit/plugin-enterprise-rest@^6.0.1":
   version "6.0.1"
   resolved "https://registry.yarnpkg.com/@octokit/plugin-enterprise-rest/-/plugin-enterprise-rest-6.0.1.tgz#e07896739618dab8da7d4077c658003775f95437"
   integrity sha512-93uGjlhUD+iNg1iWhUENAtJata6w5nE+V4urXOAlIXdco6xNZtUSfYY8dzp3Udy74aqO/B5UZL80x/YMa5PKRw==
 
-"@octokit/plugin-paginate-rest@^1.1.1":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-1.1.2.tgz#004170acf8c2be535aba26727867d692f7b488fc"
-  integrity sha512-jbsSoi5Q1pj63sC16XIUboklNw+8tL9VOnJsWycWYR78TKss5PVpIPb1TUUcMQ+bBh7cY579cVAWmf5qG+dw+Q==
+"@octokit/plugin-paginate-rest@^2.6.2":
+  version "2.14.0"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.14.0.tgz#f469cb4a908792fb44679c5973d8bba820c88b0f"
+  integrity sha512-S2uEu2uHeI7Vf+Lvj8tv3O5/5TCAa8GHS0dUQN7gdM7vKA6ZHAbR6HkAVm5yMb1mbedLEbxOuQ+Fa0SQ7tCDLA==
   dependencies:
-    "@octokit/types" "^2.0.1"
+    "@octokit/types" "^6.18.0"
 
-"@octokit/plugin-request-log@^1.0.0":
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-request-log/-/plugin-request-log-1.0.3.tgz#70a62be213e1edc04bb8897ee48c311482f9700d"
-  integrity sha512-4RFU4li238jMJAzLgAwkBAw+4Loile5haQMQr+uhFq27BmyJXcXSKvoQKqh0agsZEiUlW6iSv3FAgvmGkur7OQ==
+"@octokit/plugin-request-log@^1.0.2":
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-request-log/-/plugin-request-log-1.0.4.tgz#5e50ed7083a613816b1e4a28aeec5fb7f1462e85"
+  integrity sha512-mLUsMkgP7K/cnFEw07kWqXGF5LKrOkD+lhCrKvPHXWDywAwuDUeDwWBpc69XK3pNX0uKiVt8g5z96PJ6z9xCFA==
 
-"@octokit/plugin-rest-endpoint-methods@2.4.0":
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-2.4.0.tgz#3288ecf5481f68c494dd0602fc15407a59faf61e"
-  integrity sha512-EZi/AWhtkdfAYi01obpX0DF7U6b1VRr30QNQ5xSFPITMdLSfhcBqjamE3F+sKcxPbD7eZuMHu3Qkk2V+JGxBDQ==
+"@octokit/plugin-rest-endpoint-methods@5.4.2":
+  version "5.4.2"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.4.2.tgz#d090e93ee68ec09985e1ff0a1d2d28581cc883a5"
+  integrity sha512-imNDDvUMy9YzECcP6zTcKNjwutSwqCYGMZjLPnBHF0kdb3V9URrHWmalD0ZvNEYjwbpm2zw8RPewj3ebCpMBRw==
   dependencies:
-    "@octokit/types" "^2.0.1"
+    "@octokit/types" "^6.19.1"
     deprecation "^2.3.1"
 
-"@octokit/request-error@^1.0.2":
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-1.2.1.tgz#ede0714c773f32347576c25649dc013ae6b31801"
-  integrity sha512-+6yDyk1EES6WK+l3viRDElw96MvwfJxCt45GvmjDUKWjYIb3PJZQkq3i46TwGwoPD4h8NmTrENmtyA1FwbmhRA==
-  dependencies:
-    "@octokit/types" "^2.0.0"
-    deprecation "^2.0.0"
-    once "^1.4.0"
-
-"@octokit/request-error@^2.0.0":
-  version "2.0.5"
-  resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-2.0.5.tgz#72cc91edc870281ad583a42619256b380c600143"
-  integrity sha512-T/2wcCFyM7SkXzNoyVNWjyVlUwBvW3igM3Btr/eKYiPmucXTtkxt2RBsf6gn3LTzaLSLTQtNmvg+dGsOxQrjZg==
+"@octokit/request-error@^2.0.5", "@octokit/request-error@^2.1.0":
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-2.1.0.tgz#9e150357831bfc788d13a4fd4b1913d60c74d677"
+  integrity sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg==
   dependencies:
     "@octokit/types" "^6.0.3"
     deprecation "^2.0.0"
     once "^1.4.0"
 
-"@octokit/request@^5.2.0":
-  version "5.4.14"
-  resolved "https://registry.yarnpkg.com/@octokit/request/-/request-5.4.14.tgz#ec5f96f78333bb2af390afa5ff66f114b063bc96"
-  integrity sha512-VkmtacOIQp9daSnBmDI92xNIeLuSRDOIuplp/CJomkvzt7M18NXgG044Cx/LFKLgjKt9T2tZR6AtJayba9GTSA==
+"@octokit/request@^5.6.0":
+  version "5.6.0"
+  resolved "https://registry.yarnpkg.com/@octokit/request/-/request-5.6.0.tgz#6084861b6e4fa21dc40c8e2a739ec5eff597e672"
+  integrity sha512-4cPp/N+NqmaGQwbh3vUsYqokQIzt7VjsgTYVXiwpUP2pxd5YiZB2XuTedbb0SPtv9XS7nzAKjAuQxmY8/aZkiA==
   dependencies:
     "@octokit/endpoint" "^6.0.1"
-    "@octokit/request-error" "^2.0.0"
-    "@octokit/types" "^6.7.1"
-    deprecation "^2.0.0"
+    "@octokit/request-error" "^2.1.0"
+    "@octokit/types" "^6.16.1"
     is-plain-object "^5.0.0"
     node-fetch "^2.6.1"
-    once "^1.4.0"
     universal-user-agent "^6.0.0"
 
-"@octokit/rest@^16.28.4":
-  version "16.43.2"
-  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-16.43.2.tgz#c53426f1e1d1044dee967023e3279c50993dd91b"
-  integrity sha512-ngDBevLbBTFfrHZeiS7SAMAZ6ssuVmXuya+F/7RaVvlysgGa1JKJkKWY+jV6TCJYcW0OALfJ7nTIGXcBXzycfQ==
-  dependencies:
-    "@octokit/auth-token" "^2.4.0"
-    "@octokit/plugin-paginate-rest" "^1.1.1"
-    "@octokit/plugin-request-log" "^1.0.0"
-    "@octokit/plugin-rest-endpoint-methods" "2.4.0"
-    "@octokit/request" "^5.2.0"
-    "@octokit/request-error" "^1.0.2"
-    atob-lite "^2.0.0"
-    before-after-hook "^2.0.0"
-    btoa-lite "^1.0.0"
-    deprecation "^2.0.0"
-    lodash.get "^4.4.2"
-    lodash.set "^4.3.2"
-    lodash.uniq "^4.5.0"
-    octokit-pagination-methods "^1.1.0"
-    once "^1.4.0"
-    universal-user-agent "^4.0.0"
+"@octokit/rest@^18.1.0":
+  version "18.6.8"
+  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-18.6.8.tgz#f73ef3b59686df18206183551c2a835d1db1424f"
+  integrity sha512-n2aT0mJL9N/idCPmnBynCino1qNScfRHvr8OeskQdBNhUYAMc7cxoc8KLlv1DMWxlZUNhed+5kVdu7majVdVag==
+  dependencies:
+    "@octokit/core" "^3.5.0"
+    "@octokit/plugin-paginate-rest" "^2.6.2"
+    "@octokit/plugin-request-log" "^1.0.2"
+    "@octokit/plugin-rest-endpoint-methods" "5.4.2"
 
-"@octokit/types@^2.0.0", "@octokit/types@^2.0.1":
-  version "2.16.2"
-  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-2.16.2.tgz#4c5f8da3c6fecf3da1811aef678fda03edac35d2"
-  integrity sha512-O75k56TYvJ8WpAakWwYRN8Bgu60KrmX0z1KqFp1kNiFNkgW+JW+9EBKZ+S33PU6SLvbihqd+3drvPxKK68Ee8Q==
+"@octokit/types@^6.0.3", "@octokit/types@^6.16.1", "@octokit/types@^6.18.0", "@octokit/types@^6.19.1":
+  version "6.19.1"
+  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.19.1.tgz#6ea5f759d8d37e892e59c0a65f10892789b84a25"
+  integrity sha512-hMI2EokQzMG8ABWcnvcrabqQFuFHqUdN0HUOG4DPTaOtnf/jqhzhK1SHOGu5vDlI/x+hWJ60e28VxB7QhOP0CQ==
   dependencies:
-    "@types/node" ">= 8"
+    "@octokit/openapi-types" "^9.0.0"
 
-"@octokit/types@^6.0.3", "@octokit/types@^6.7.1":
-  version "6.13.0"
-  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.13.0.tgz#779e5b7566c8dde68f2f6273861dd2f0409480d0"
-  integrity sha512-W2J9qlVIU11jMwKHUp5/rbVUeErqelCsO5vW5PKNb7wAXQVUz87Rc+imjlEvpvbH8yUb+KHmv8NEjVZdsdpyxA==
+"@openpgp/web-stream-tools@0.0.6":
+  version "0.0.6"
+  resolved "https://registry.yarnpkg.com/@openpgp/web-stream-tools/-/web-stream-tools-0.0.6.tgz#98ba42f09254149e6a431062f7eab3ebfc804cf7"
+  integrity sha512-U2Ujy4GUwz315W2QfleOWFnlvXTGz2Fjt4mg/nATedruT3EbIWjWzw4qfbaIvnBHjaVIijltsiESuNxIFRdHkw==
   dependencies:
-    "@octokit/openapi-types" "^6.0.0"
+    "@mattiasbuelens/web-streams-adapter" "~0.1.0"
+    web-streams-polyfill "~3.0.3"
 
 "@sinonjs/commons@^1.7.0":
   version "1.8.3"
@@ -1479,17 +1481,42 @@
   dependencies:
     type-detect "4.0.8"
 
-"@sinonjs/fake-timers@^6.0.1":
-  version "6.0.1"
-  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-6.0.1.tgz#293674fccb3262ac782c7aadfdeca86b10c75c40"
-  integrity sha512-MZPUxrmFubI36XS1DI3qmI0YdN1gks62JtFZvxR67ljjSNCeK6U08Zx4msEWOXuofgqUt6zPHSi1H9fbjR/NRA==
+"@sinonjs/fake-timers@^7.0.2":
+  version "7.1.2"
+  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-7.1.2.tgz#2524eae70c4910edccf99b2f4e6efc5894aff7b5"
+  integrity sha512-iQADsW4LBMISqZ6Ci1dupJL9pprqwcVFTcOsEmQOEhW+KLCVn/Y4Jrvg2k19fIHCp+iFprriYPTdRcQR8NbUPg==
   dependencies:
     "@sinonjs/commons" "^1.7.0"
 
-"@types/babel__core@^7.0.0", "@types/babel__core@^7.1.7":
-  version "7.1.14"
-  resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.14.tgz#faaeefc4185ec71c389f4501ee5ec84b170cc402"
-  integrity sha512-zGZJzzBUVDo/eV6KgbE0f0ZI7dInEYvo12Rb70uNQDshC3SkRMb67ja0GgRHZgAX3Za6rhaWlvbDO8rrGyAb1g==
+"@tootallnate/once@1":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82"
+  integrity sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==
+
+"@tsconfig/node10@^1.0.7":
+  version "1.0.8"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.8.tgz#c1e4e80d6f964fbecb3359c43bd48b40f7cadad9"
+  integrity sha512-6XFfSQmMgq0CFLY1MslA/CPUfhIL919M1rMsa5lP2P097N2Wd1sSX0tx1u4olM16fLNhtHZpRhedZJphNJqmZg==
+
+"@tsconfig/node12@^1.0.7":
+  version "1.0.9"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node12/-/node12-1.0.9.tgz#62c1f6dee2ebd9aead80dc3afa56810e58e1a04c"
+  integrity sha512-/yBMcem+fbvhSREH+s14YJi18sp7J9jpuhYByADT2rypfajMZZN4WQ6zBGgBKp53NKmqI36wFYDb3yaMPurITw==
+
+"@tsconfig/node14@^1.0.0":
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node14/-/node14-1.0.1.tgz#95f2d167ffb9b8d2068b0b235302fafd4df711f2"
+  integrity sha512-509r2+yARFfHHE7T6Puu2jjkoycftovhXRqW328PDXTVGKihlb1P8Z9mMZH04ebyajfRY7dedfGynlrFHJUQCg==
+
+"@tsconfig/node16@^1.0.1":
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.2.tgz#423c77877d0569db20e1fc80885ac4118314010e"
+  integrity sha512-eZxlbI8GZscaGS7kkc/trHTT5xgrjH3/1n2JDwusC9iahPKWMRvRjJSAN5mCXviuTGQ/lHnhvv8Q1YTpnfz9gA==
+
+"@types/babel__core@^7.0.0", "@types/babel__core@^7.1.14":
+  version "7.1.15"
+  resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.15.tgz#2ccfb1ad55a02c83f8e0ad327cbc332f55eb1024"
+  integrity sha512-bxlMKPDbY8x5h6HBwVzEOk2C8fb6SLfYQ5Jw3uBYuYF1lfWk/kbLd81la82vrIkBb0l+JdmrZaDikPrNxpS/Ew==
   dependencies:
     "@babel/parser" "^7.1.0"
     "@babel/types" "^7.0.0"
@@ -1498,50 +1525,36 @@
     "@types/babel__traverse" "*"
 
 "@types/babel__generator@*":
-  version "7.6.2"
-  resolved "https://registry.yarnpkg.com/@types/babel__generator/-/babel__generator-7.6.2.tgz#f3d71178e187858f7c45e30380f8f1b7415a12d8"
-  integrity sha512-MdSJnBjl+bdwkLskZ3NGFp9YcXGx5ggLpQQPqtgakVhsWK0hTtNYhjpZLlWQTviGTvF8at+Bvli3jV7faPdgeQ==
+  version "7.6.3"
+  resolved "https://registry.yarnpkg.com/@types/babel__generator/-/babel__generator-7.6.3.tgz#f456b4b2ce79137f768aa130d2423d2f0ccfaba5"
+  integrity sha512-/GWCmzJWqV7diQW54smJZzWbSFf4QYtF71WCKhcx6Ru/tFyQIY2eiiITcCAeuPbNSvT9YCGkVMqqvSk2Z0mXiA==
   dependencies:
     "@babel/types" "^7.0.0"
 
 "@types/babel__template@*":
-  version "7.4.0"
-  resolved "https://registry.yarnpkg.com/@types/babel__template/-/babel__template-7.4.0.tgz#0c888dd70b3ee9eebb6e4f200e809da0076262be"
-  integrity sha512-NTPErx4/FiPCGScH7foPyr+/1Dkzkni+rHiYHHoTjvwou7AQzJkNeD60A9CXRy+ZEN2B1bggmkTMCDb+Mv5k+A==
+  version "7.4.1"
+  resolved "https://registry.yarnpkg.com/@types/babel__template/-/babel__template-7.4.1.tgz#3d1a48fd9d6c0edfd56f2ff578daed48f36c8969"
+  integrity sha512-azBFKemX6kMg5Io+/rdGT0dkGreboUVR0Cdm3fz9QJWpaQGJRQXl7C+6hOTCZcMll7KFyEQpgbYI2lHdsS4U7g==
   dependencies:
     "@babel/parser" "^7.1.0"
     "@babel/types" "^7.0.0"
 
 "@types/babel__traverse@*", "@types/babel__traverse@^7.0.4", "@types/babel__traverse@^7.0.6":
-  version "7.11.1"
-  resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.11.1.tgz#654f6c4f67568e24c23b367e947098c6206fa639"
-  integrity sha512-Vs0hm0vPahPMYi9tDjtP66llufgO3ST16WXaSTtDGEl9cewAl3AibmxWw6TINOqHPT9z0uABKAYjT9jNSg4npw==
+  version "7.14.2"
+  resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.14.2.tgz#ffcd470bbb3f8bf30481678fb5502278ca833a43"
+  integrity sha512-K2waXdXBi2302XUdcHcR1jCeU0LL4TD9HRs/gk0N2Xvrht+G/BfJa4QObBQZfhMdxiCpV3COl5Nfq4uKTeTnJA==
   dependencies:
     "@babel/types" "^7.3.0"
 
-"@types/events@*":
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/@types/events/-/events-3.0.0.tgz#2862f3f58a9a7f7c3e78d79f130dd4d71c25c2a7"
-  integrity sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g==
-
-"@types/flatbuffers@^1.9.1":
+"@types/flatbuffers@^1.10.0":
   version "1.10.0"
   resolved "https://registry.yarnpkg.com/@types/flatbuffers/-/flatbuffers-1.10.0.tgz#aa74e30ffdc86445f2f060e1808fc9d56b5603ba"
   integrity sha512-7btbphLrKvo5yl/5CC2OCxUSMx1wV1wvGT1qDXkSt7yi00/YW7E8k6qzXqJHsp+WU0eoG7r6MTQQXI9lIvd0qA==
 
-"@types/glob@7.1.1":
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.1.tgz#aa59a1c6e3fbc421e07ccd31a944c30eba521575"
-  integrity sha512-1Bh06cbWJUHMC97acuD6UMG29nMt0Aqz1vF3guLfG+kHHJhy3AyohZFFxYk2f7Q1SQIrNwvncxAE0N/9s70F2w==
-  dependencies:
-    "@types/events" "*"
-    "@types/minimatch" "*"
-    "@types/node" "*"
-
-"@types/glob@^7.1.1":
-  version "7.1.3"
-  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.3.tgz#e6ba80f36b7daad2c685acd9266382e68985c183"
-  integrity sha512-SEYeGAIQIQX8NN6LDKprLjbrd5dARM5EXsd8GI/A5l0apYI1fGMWgPHSe4ZKL4eozlAyI+doUE9XbYS4xCkQ1w==
+"@types/glob@7.1.4", "@types/glob@^7.1.1":
+  version "7.1.4"
+  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.4.tgz#ea59e21d2ee5c517914cb4bc8e4153b99e566672"
+  integrity sha512-w+LsMxKyYQm347Otw+IfBXOv9UWVjpHpCDdbBMt8Kz/xbvCYNjP+0qPh91Km3iKfSRLBB0P7fAMf0KHrPu+MyA==
   dependencies:
     "@types/minimatch" "*"
     "@types/node" "*"
@@ -1565,274 +1578,160 @@
   dependencies:
     "@types/istanbul-lib-coverage" "*"
 
-"@types/istanbul-reports@^1.1.1":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-1.1.2.tgz#e875cc689e47bce549ec81f3df5e6f6f11cfaeb2"
-  integrity sha512-P/W9yOX/3oPZSpaYOCQzGqgCQRXn0FFO/V8bWrCQs+wLmvVVxk6CRBXALEvNs9OHIatlnlFokfhuDo2ug01ciw==
-  dependencies:
-    "@types/istanbul-lib-coverage" "*"
-    "@types/istanbul-lib-report" "*"
-
 "@types/istanbul-reports@^3.0.0":
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.0.tgz#508b13aa344fa4976234e75dddcc34925737d821"
-  integrity sha512-nwKNbvnwJ2/mndE9ItP/zc2TCzw6uuodnF4EHYWD+gCQDVBuRQL5UzbZD0/ezy1iKsFU2ZQiDqg4M9dN4+wZgA==
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.1.tgz#9153fe98bba2bd565a63add9436d6f0d7f8468ff"
+  integrity sha512-c3mAZEuK0lvBp8tmuL74XRKn1+y2dcwOUpH7x4WrF6gk1GIgiluDRgMYQtw2OFcBvAJWlt6ASU3tSqxp0Uu0Aw==
   dependencies:
     "@types/istanbul-lib-report" "*"
 
-"@types/jest@25.2.2":
-  version "25.2.2"
-  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-25.2.2.tgz#6a752e7a00f69c3e790ea00c345029d5cefa92bf"
-  integrity sha512-aRctFbG8Pb7DSLzUt/fEtL3q/GKb9mretFuYhRub2J0q6NhzBYbx9HTQzHrWgBNIxYOlxGNVe6Z54cpbUt+Few==
-  dependencies:
-    jest-diff "^25.2.1"
-    pretty-format "^25.2.1"
-
-"@types/jest@26.x":
-  version "26.0.22"
-  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.22.tgz#8308a1debdf1b807aa47be2838acdcd91e88fbe6"
-  integrity sha512-eeWwWjlqxvBxc4oQdkueW5OF/gtfSceKk4OnOAGlUSwS/liBRtZppbJuz1YkgbrbfGOoeBHun9fOvXnjNwrSOw==
+"@types/jest@26.0.24":
+  version "26.0.24"
+  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.24.tgz#943d11976b16739185913a1936e0de0c4a7d595a"
+  integrity sha512-E/X5Vib8BWqZNRlDxj9vYXhsDwPYbPINqKF9BsnSoon4RQ0D9moEuLD8txgyypFLH7J4+Lho9Nr/c8H0Fi+17w==
   dependencies:
     jest-diff "^26.0.0"
     pretty-format "^26.0.0"
 
-"@types/json-schema@^7.0.5":
-  version "7.0.7"
-  resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
-  integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
+"@types/json-schema@^7.0.7":
+  version "7.0.8"
+  resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.8.tgz#edf1bf1dbf4e04413ca8e5b17b3b7d7d54b59818"
+  integrity sha512-YSBPTLTVm2e2OoQIDYx8HaeWJ5tTToLH67kXR7zYNGupXMEHa2++G8k+DczX2cFVgalypqtyZIcU19AFcmOpmg==
 
-"@types/minimatch@*":
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.4.tgz#f0ec25dbf2f0e4b18647313ac031134ca5b24b21"
-  integrity sha512-1z8k4wzFnNjVK/tlxvrWuK5WMt6mydWWP7+zvH5eFep4oj+UkrfiJTRtjCeBXNpwaA/FYqqtb4/QS4ianFpIRA==
-
-"@types/minimist@^1.2.0":
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.1.tgz#283f669ff76d7b8260df8ab7a4262cc83d988256"
-  integrity sha512-fZQQafSREFyuZcdWFAExYjBiCL7AUCdgsk80iO0q4yihYYdcIiH28CcuPTGFgLOCC8RlW49GSQxdHwZP+I7CNg==
+"@types/minimatch@*", "@types/minimatch@^3.0.3":
+  version "3.0.5"
+  resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.5.tgz#1001cc5e6a3704b83c236027e77f2f58ea010f40"
+  integrity sha512-Klz949h02Gz2uZCMGwDUSDS1YBlTdDDgbWHi+81l29tQALUtvz4rAYi5uoVhE5Lagoq6DeqAUlbrHvW/mXDgdQ==
 
-"@types/node@*", "@types/node@>= 8":
-  version "14.14.37"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.37.tgz#a3dd8da4eb84a996c36e331df98d82abd76b516e"
-  integrity sha512-XYmBiy+ohOR4Lh5jE379fV2IU+6Jn4g5qASinhitfyO71b/sCo6MKsMLF5tc7Zf2CE8hViVQyYSobJNke8OvUw==
+"@types/minimist@^1.2.0", "@types/minimist@^1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.2.tgz#ee771e2ba4b3dc5b372935d549fd9617bf345b8c"
+  integrity sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==
 
-"@types/node@^11.11.6":
-  version "11.15.50"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-11.15.50.tgz#a8c76622a20320d4a04adf2002b04737c510ef11"
-  integrity sha512-kG/ZmA/uD1L1gVD7vVXQB6v+ICZlJgvakrodHiltT3Zq0YjXq5H9tfgop8MsdMGCwrcLJg9QCQDRP4DZsn9T/g==
+"@types/node@*", "@types/node@^16.4.0":
+  version "16.4.0"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-16.4.0.tgz#2c219eaa3b8d1e4d04f4dd6e40bc68c7467d5272"
+  integrity sha512-HrJuE7Mlqcjj+00JqMWpZ3tY8w7EUd+S0U3L1+PQSWiXZbOgyQDvi+ogoUxaHApPJq5diKxYBQwA3iIlNcPqOg==
 
-"@types/node@^12.0.4":
-  version "12.20.7"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-12.20.7.tgz#1cb61fd0c85cb87e728c43107b5fd82b69bc9ef8"
-  integrity sha512-gWL8VUkg8VRaCAUgG9WmhefMqHmMblxe2rVpMF86nZY/+ZysU+BkAp+3cz03AixWDSSz0ks5WX59yAhv/cDwFA==
+"@types/node@^13.7.4":
+  version "13.13.52"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-13.13.52.tgz#03c13be70b9031baaed79481c0c0cfb0045e53f7"
+  integrity sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ==
 
 "@types/normalize-package-data@^2.4.0":
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
-  integrity sha512-f5j5b/Gf71L+dbqxIpQ4Z2WlmI/mPJ0fOkGGmFgtb6sAu97EPczzbS3/tJKxmcYDj55OX6ssqwDAWOHIYDRDGA==
+  version "2.4.1"
+  resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz#d3357479a0fdfdd5907fe67e17e0a85c906e1301"
+  integrity sha512-Gj7cI7z+98M282Tqmp2K5EIsoouUEzbBJhQQzDE3jSIRk6r9gsz0oUokqIUR4u1R3dMHo0pDHM7sNOHyhulypw==
 
-"@types/prettier@^2.0.0":
-  version "2.2.3"
-  resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.2.3.tgz#ef65165aea2924c9359205bf748865b8881753c0"
-  integrity sha512-PijRCG/K3s3w1We6ynUKdxEc5AcuuH3NBmMDP8uvKVp6X43UY7NQlTzczakXP3DJR0F4dfNQIGjU2cUeRYs2AA==
+"@types/parse-json@^4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0"
+  integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==
 
-"@types/stack-utils@^1.0.1":
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-1.0.1.tgz#0a851d3bd96498fa25c33ab7278ed3bd65f06c3e"
-  integrity sha512-l42BggppR6zLmpfU6fq9HEa2oGPEI8yrSPL3GITjfRInppYFahObbIQOQK3UGxEnyQpltZLaPe75046NOZQikw==
+"@types/prettier@^2.1.5":
+  version "2.3.2"
+  resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.3.2.tgz#fc8c2825e4ed2142473b4a81064e6e081463d1b3"
+  integrity sha512-eI5Yrz3Qv4KPUa/nSIAi0h+qX0XyewOliug5F2QAtuRg6Kjg6jfmxe1GIwoIRhZspD1A0RP8ANrPwvEXXtRFog==
 
-"@types/stack-utils@^2.0.0":
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.0.tgz#7036640b4e21cc2f259ae826ce843d277dad8cff"
-  integrity sha512-RJJrrySY7A8havqpGObOB4W92QXKJo63/jFLLgpvOtsGUqbQZ9Sbgl35KMm1DjC6j7AvmmU2bIno+3IyEaemaw==
+"@types/randomatic@3.1.2":
+  version "3.1.2"
+  resolved "https://registry.yarnpkg.com/@types/randomatic/-/randomatic-3.1.2.tgz#3485f0e113bf47fe25fee62fc20ca27713642975"
+  integrity sha512-lLsR0U1lUTjJ8vy1r3VGWlgprGtB/QPVwxs+QVSe28b0MS/7sR5tUfvhDd9XLV/AWc50OmDADAhzdqujavdykg==
 
-"@types/text-encoding-utf-8@^1.0.1":
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/@types/text-encoding-utf-8/-/text-encoding-utf-8-1.0.1.tgz#908d884af1114e5d8df47597b1e04f833383d23d"
-  integrity sha512-GpIEYaS+yNfYqpowLLziiY42pyaL+lThd/wMh6tTubaKuG4IRkXqqyxK7Nddn3BvpUg2+go3Gv/jbXvAFMRjiQ==
+"@types/stack-utils@^2.0.0":
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.1.tgz#20f18294f797f2209b5f65c8e3b5c8e8261d127c"
+  integrity sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==
 
 "@types/yargs-parser@*":
-  version "20.2.0"
-  resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.0.tgz#dd3e6699ba3237f0348cd085e4698780204842f9"
-  integrity sha512-37RSHht+gzzgYeobbG+KWryeAW8J33Nhr69cjTqSYymXVZEN9NbRYWoYlRtDhHKPVT1FyNKwaTPC1NynKZpzRA==
+  version "20.2.1"
+  resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.1.tgz#3b9ce2489919d9e4fea439b76916abc34b2df129"
+  integrity sha512-7tFImggNeNBVMsn0vLrpn1H1uPrUBdnARPTpZoitY37ZrdJREzf7I16tMrlK3hen349gr1NYh8CmZQa7CTG6Aw==
 
-"@types/yargs@^13.0.0":
-  version "13.0.11"
-  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-13.0.11.tgz#def2f0c93e4bdf2c61d7e34899b17e34be28d3b1"
-  integrity sha512-NRqD6T4gktUrDi1o1wLH3EKC1o2caCr7/wR87ODcbVITQF106OM3sFN92ysZ++wqelOd1CTzatnOBRDYYG6wGQ==
+"@types/yargs@^15.0.0":
+  version "15.0.14"
+  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.14.tgz#26d821ddb89e70492160b66d10a0eb6df8f6fb06"
+  integrity sha512-yEJzHoxf6SyQGhBhIYGXQDSCkJjB6HohDShto7m8vaKg9Yp0Yn8+71J9eakh2bnPg6BfsH9PRMhiRTZnd4eXGQ==
   dependencies:
     "@types/yargs-parser" "*"
 
-"@types/yargs@^15.0.0":
-  version "15.0.13"
-  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.13.tgz#34f7fec8b389d7f3c1fd08026a5763e072d3c6dc"
-  integrity sha512-kQ5JNTrbDv3Rp5X2n/iUu37IJBDU2gsZ5R/g1/KHOOEc5IKfUFjXT6DENPGduh08I/pamwtEq4oul7gUqKTQDQ==
+"@types/yargs@^16.0.0":
+  version "16.0.4"
+  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-16.0.4.tgz#26aad98dd2c2a38e421086ea9ad42b9e51642977"
+  integrity sha512-T8Yc9wt/5LbJyCaLiHPReJa0kApcIgJ7Bn735GjItUfh08Z1pJvu8QZqb9s+mMvKV6WUQRV7K2R46YbjMXTTJw==
   dependencies:
     "@types/yargs-parser" "*"
 
-"@webassemblyjs/ast@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.7.11.tgz#b988582cafbb2b095e8b556526f30c90d057cace"
-  integrity sha512-ZEzy4vjvTzScC+SH8RBssQUawpaInUdMTYwYYLh54/s8TuT0gBLuyUnppKsVyZEi876VmmStKsUs28UxPgdvrA==
-  dependencies:
-    "@webassemblyjs/helper-module-context" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/wast-parser" "1.7.11"
-
-"@webassemblyjs/floating-point-hex-parser@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.7.11.tgz#a69f0af6502eb9a3c045555b1a6129d3d3f2e313"
-  integrity sha512-zY8dSNyYcgzNRNT666/zOoAyImshm3ycKdoLsyDw/Bwo6+/uktb7p4xyApuef1dwEBo/U/SYQzbGBvV+nru2Xg==
-
-"@webassemblyjs/helper-api-error@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-api-error/-/helper-api-error-1.7.11.tgz#c7b6bb8105f84039511a2b39ce494f193818a32a"
-  integrity sha512-7r1qXLmiglC+wPNkGuXCvkmalyEstKVwcueZRP2GNC2PAvxbLYwLLPr14rcdJaE4UtHxQKfFkuDFuv91ipqvXg==
-
-"@webassemblyjs/helper-buffer@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-buffer/-/helper-buffer-1.7.11.tgz#3122d48dcc6c9456ed982debe16c8f37101df39b"
-  integrity sha512-MynuervdylPPh3ix+mKZloTcL06P8tenNH3sx6s0qE8SLR6DdwnfgA7Hc9NSYeob2jrW5Vql6GVlsQzKQCa13w==
-
-"@webassemblyjs/helper-code-frame@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.7.11.tgz#cf8f106e746662a0da29bdef635fcd3d1248364b"
-  integrity sha512-T8ESC9KMXFTXA5urJcyor5cn6qWeZ4/zLPyWeEXZ03hj/x9weSokGNkVCdnhSabKGYWxElSdgJ+sFa9G/RdHNw==
-  dependencies:
-    "@webassemblyjs/wast-printer" "1.7.11"
-
-"@webassemblyjs/helper-fsm@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-fsm/-/helper-fsm-1.7.11.tgz#df38882a624080d03f7503f93e3f17ac5ac01181"
-  integrity sha512-nsAQWNP1+8Z6tkzdYlXT0kxfa2Z1tRTARd8wYnc/e3Zv3VydVVnaeePgqUzFrpkGUyhUUxOl5ML7f1NuT+gC0A==
-
-"@webassemblyjs/helper-module-context@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-module-context/-/helper-module-context-1.7.11.tgz#d874d722e51e62ac202476935d649c802fa0e209"
-  integrity sha512-JxfD5DX8Ygq4PvXDucq0M+sbUFA7BJAv/GGl9ITovqE+idGX+J3QSzJYz+LwQmL7fC3Rs+utvWoJxDb6pmC0qg==
-
-"@webassemblyjs/helper-wasm-bytecode@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.7.11.tgz#dd9a1e817f1c2eb105b4cf1013093cb9f3c9cb06"
-  integrity sha512-cMXeVS9rhoXsI9LLL4tJxBgVD/KMOKXuFqYb5oCJ/opScWpkCMEz9EJtkonaNcnLv2R3K5jIeS4TRj/drde1JQ==
-
-"@webassemblyjs/helper-wasm-section@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.7.11.tgz#9c9ac41ecf9fbcfffc96f6d2675e2de33811e68a"
-  integrity sha512-8ZRY5iZbZdtNFE5UFunB8mmBEAbSI3guwbrsCl4fWdfRiAcvqQpeqd5KHhSWLL5wuxo53zcaGZDBU64qgn4I4Q==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-buffer" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/wasm-gen" "1.7.11"
-
-"@webassemblyjs/ieee754@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ieee754/-/ieee754-1.7.11.tgz#c95839eb63757a31880aaec7b6512d4191ac640b"
-  integrity sha512-Mmqx/cS68K1tSrvRLtaV/Lp3NZWzXtOHUW2IvDvl2sihAwJh4ACE0eL6A8FvMyDG9abes3saB6dMimLOs+HMoQ==
-  dependencies:
-    "@xtuc/ieee754" "^1.2.0"
-
-"@webassemblyjs/leb128@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/leb128/-/leb128-1.7.11.tgz#d7267a1ee9c4594fd3f7e37298818ec65687db63"
-  integrity sha512-vuGmgZjjp3zjcerQg+JA+tGOncOnJLWVkt8Aze5eWQLwTQGNgVLcyOTqgSCxWTR4J42ijHbBxnuRaL1Rv7XMdw==
-  dependencies:
-    "@xtuc/long" "4.2.1"
-
-"@webassemblyjs/utf8@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/utf8/-/utf8-1.7.11.tgz#06d7218ea9fdc94a6793aa92208160db3d26ee82"
-  integrity sha512-C6GFkc7aErQIAH+BMrIdVSmW+6HSe20wg57HEC1uqJP8E/xpMjXqQUxkQw07MhNDSDcGpxI9G5JSNOQCqJk4sA==
-
-"@webassemblyjs/wasm-edit@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-edit/-/wasm-edit-1.7.11.tgz#8c74ca474d4f951d01dbae9bd70814ee22a82005"
-  integrity sha512-FUd97guNGsCZQgeTPKdgxJhBXkUbMTY6hFPf2Y4OedXd48H97J+sOY2Ltaq6WGVpIH8o/TGOVNiVz/SbpEMJGg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-buffer" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/helper-wasm-section" "1.7.11"
-    "@webassemblyjs/wasm-gen" "1.7.11"
-    "@webassemblyjs/wasm-opt" "1.7.11"
-    "@webassemblyjs/wasm-parser" "1.7.11"
-    "@webassemblyjs/wast-printer" "1.7.11"
-
-"@webassemblyjs/wasm-gen@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-gen/-/wasm-gen-1.7.11.tgz#9bbba942f22375686a6fb759afcd7ac9c45da1a8"
-  integrity sha512-U/KDYp7fgAZX5KPfq4NOupK/BmhDc5Kjy2GIqstMhvvdJRcER/kUsMThpWeRP8BMn4LXaKhSTggIJPOeYHwISA==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/ieee754" "1.7.11"
-    "@webassemblyjs/leb128" "1.7.11"
-    "@webassemblyjs/utf8" "1.7.11"
-
-"@webassemblyjs/wasm-opt@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-opt/-/wasm-opt-1.7.11.tgz#b331e8e7cef8f8e2f007d42c3a36a0580a7d6ca7"
-  integrity sha512-XynkOwQyiRidh0GLua7SkeHvAPXQV/RxsUeERILmAInZegApOUAIJfRuPYe2F7RcjOC9tW3Cb9juPvAC/sCqvg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-buffer" "1.7.11"
-    "@webassemblyjs/wasm-gen" "1.7.11"
-    "@webassemblyjs/wasm-parser" "1.7.11"
-
-"@webassemblyjs/wasm-parser@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-parser/-/wasm-parser-1.7.11.tgz#6e3d20fa6a3519f6b084ef9391ad58211efb0a1a"
-  integrity sha512-6lmXRTrrZjYD8Ng8xRyvyXQJYUQKYSXhJqXOBLw24rdiXsHAOlvw5PhesjdcaMadU/pyPQOJ5dHreMjBxwnQKg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-api-error" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/ieee754" "1.7.11"
-    "@webassemblyjs/leb128" "1.7.11"
-    "@webassemblyjs/utf8" "1.7.11"
-
-"@webassemblyjs/wast-parser@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-parser/-/wast-parser-1.7.11.tgz#25bd117562ca8c002720ff8116ef9072d9ca869c"
-  integrity sha512-lEyVCg2np15tS+dm7+JJTNhNWq9yTZvi3qEhAIIOaofcYlUp0UR5/tVqOwa/gXYr3gjwSZqw+/lS9dscyLelbQ==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/floating-point-hex-parser" "1.7.11"
-    "@webassemblyjs/helper-api-error" "1.7.11"
-    "@webassemblyjs/helper-code-frame" "1.7.11"
-    "@webassemblyjs/helper-fsm" "1.7.11"
-    "@xtuc/long" "4.2.1"
-
-"@webassemblyjs/wast-printer@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-printer/-/wast-printer-1.7.11.tgz#c4245b6de242cb50a2cc950174fdbf65c78d7813"
-  integrity sha512-m5vkAsuJ32QpkdkDOUPGSltrg8Cuk3KBx4YrmAGQwCZPRdUHXxG4phIOuuycLemHFr74sWL9Wthqss4fzdzSwg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/wast-parser" "1.7.11"
-    "@xtuc/long" "4.2.1"
-
-"@xtuc/ieee754@^1.2.0":
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/@xtuc/ieee754/-/ieee754-1.2.0.tgz#eef014a3145ae477a1cbc00cd1e552336dceb790"
-  integrity sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==
-
-"@xtuc/long@4.2.1":
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.1.tgz#5c85d662f76fa1d34575766c5dcd6615abcd30d8"
-  integrity sha512-FZdkNBDqBRHKQ2MEbSC17xnPFOhZxeJ2YGSfr2BKf3sujG49Qe3bB+rGCwQfIaA7WHnGeGkSijX4FuBCdrzW/g==
+"@typescript-eslint/eslint-plugin@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.28.4.tgz#e73c8cabbf3f08dee0e1bda65ed4e622ae8f8921"
+  integrity sha512-s1oY4RmYDlWMlcV0kKPBaADn46JirZzvvH7c2CtAqxCY96S538JRBAzt83RrfkDheV/+G/vWNK0zek+8TB3Gmw==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "4.28.4"
+    "@typescript-eslint/scope-manager" "4.28.4"
+    debug "^4.3.1"
+    functional-red-black-tree "^1.0.1"
+    regexpp "^3.1.0"
+    semver "^7.3.5"
+    tsutils "^3.21.0"
+
+"@typescript-eslint/experimental-utils@4.28.4", "@typescript-eslint/experimental-utils@^4.0.1":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.28.4.tgz#9c70c35ebed087a5c70fb0ecd90979547b7fec96"
+  integrity sha512-OglKWOQRWTCoqMSy6pm/kpinEIgdcXYceIcH3EKWUl4S8xhFtN34GQRaAvTIZB9DD94rW7d/U7tUg3SYeDFNHA==
+  dependencies:
+    "@types/json-schema" "^7.0.7"
+    "@typescript-eslint/scope-manager" "4.28.4"
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/typescript-estree" "4.28.4"
+    eslint-scope "^5.1.1"
+    eslint-utils "^3.0.0"
+
+"@typescript-eslint/parser@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.28.4.tgz#bc462dc2779afeefdcf49082516afdc3e7b96fab"
+  integrity sha512-4i0jq3C6n+og7/uCHiE6q5ssw87zVdpUj1k6VlVYMonE3ILdFApEzTWgppSRG4kVNB/5jxnH+gTeKLMNfUelQA==
+  dependencies:
+    "@typescript-eslint/scope-manager" "4.28.4"
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/typescript-estree" "4.28.4"
+    debug "^4.3.1"
+
+"@typescript-eslint/scope-manager@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.28.4.tgz#bdbce9b6a644e34f767bd68bc17bb14353b9fe7f"
+  integrity sha512-ZJBNs4usViOmlyFMt9X9l+X0WAFcDH7EdSArGqpldXu7aeZxDAuAzHiMAeI+JpSefY2INHrXeqnha39FVqXb8w==
+  dependencies:
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/visitor-keys" "4.28.4"
+
+"@typescript-eslint/types@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.28.4.tgz#41acbd79b5816b7c0dd7530a43d97d020d3aeb42"
+  integrity sha512-3eap4QWxGqkYuEmVebUGULMskR6Cuoc/Wii0oSOddleP4EGx1tjLnZQ0ZP33YRoMDCs5O3j56RBV4g14T4jvww==
+
+"@typescript-eslint/typescript-estree@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.28.4.tgz#252e6863278dc0727244be9e371eb35241c46d00"
+  integrity sha512-z7d8HK8XvCRyN2SNp+OXC2iZaF+O2BTquGhEYLKLx5k6p0r05ureUtgEfo5f6anLkhCxdHtCf6rPM1p4efHYDQ==
+  dependencies:
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/visitor-keys" "4.28.4"
+    debug "^4.3.1"
+    globby "^11.0.3"
+    is-glob "^4.0.1"
+    semver "^7.3.5"
+    tsutils "^3.21.0"
 
-"@zkochan/cmd-shim@^3.1.0":
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/@zkochan/cmd-shim/-/cmd-shim-3.1.0.tgz#2ab8ed81f5bb5452a85f25758eb9b8681982fd2e"
-  integrity sha512-o8l0+x7C7sMZU3v9GuJIAU10qQLtwR1dtRQIOmlNMtyaqhmpXOzx1HWiYoWfmmf9HHZoAkXpc9TM9PQYF9d4Jg==
+"@typescript-eslint/visitor-keys@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.28.4.tgz#92dacfefccd6751cbb0a964f06683bfd72d0c4d3"
+  integrity sha512-NIAXAdbz1XdOuzqkJHjNKXKj8QQ4cv5cxR/g0uQhCYf/6//XrmfpaYsM7PnBcNbfvTDLUkqQ5TPNm1sozDdTWg==
   dependencies:
-    is-windows "^1.0.0"
-    mkdirp-promise "^5.0.1"
-    mz "^2.5.0"
+    "@typescript-eslint/types" "4.28.4"
+    eslint-visitor-keys "^2.0.0"
 
-JSONStream@^1.0.4, JSONStream@^1.3.4:
+JSONStream@^1.0.4:
   version "1.3.5"
   resolved "https://registry.yarnpkg.com/JSONStream/-/JSONStream-1.3.5.tgz#3208c1f08d3a4d99261ab64f92302bc15e111ca0"
   integrity sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ==
@@ -1850,11 +1749,6 @@ abbrev@1:
   resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8"
   integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==
 
-acorn-dynamic-import@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/acorn-dynamic-import/-/acorn-dynamic-import-4.0.0.tgz#482210140582a36b83c3e342e1cfebcaa9240948"
-  integrity sha512-d3OEjQV4ROpoflsnUA8HozoIR504TFxNivYEUi6uwz0IYhBkTDXGuWlNdMtybRt3nqVx/L6XqMt0FxkXuWKZhw==
-
 acorn-globals@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/acorn-globals/-/acorn-globals-6.0.0.tgz#46cdd39f0f8ff08a876619b55f5ac8a6dc770b45"
@@ -1863,50 +1757,50 @@ acorn-globals@^6.0.0:
     acorn "^7.1.1"
     acorn-walk "^7.1.1"
 
+acorn-jsx@^5.3.1:
+  version "5.3.2"
+  resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz#7ed5bb55908b3b2f1bc55c6af1653bada7f07937"
+  integrity sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==
+
 acorn-walk@^7.1.1:
   version "7.2.0"
   resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-7.2.0.tgz#0de889a601203909b0fbe07b8938dc21d2e967bc"
   integrity sha512-OPdCF6GsMIP+Az+aWfAAOEt2/+iVDKE7oy6lJ098aoe59oAmK76qV6Gw60SbZ8jHuG2wH058GF4pLFbYamYrVA==
 
-acorn@5.X, acorn@^5.0.3:
-  version "5.7.4"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-5.7.4.tgz#3e8d8a9947d0599a1796d10225d7432f4a4acf5e"
-  integrity sha512-1D++VG7BhrtvQpNbBzovKNc1FLGGEE/oGe7b9xJm/RFHMBeUaUGpluV9RLjZa47YFdPcDAenEYuq9pQPcMdLJg==
-
-acorn@^6.0.5:
+acorn@^6.4.1:
   version "6.4.2"
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6"
   integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ==
 
-acorn@^7.1.1:
+acorn@^7.1.1, acorn@^7.4.0:
   version "7.4.1"
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa"
   integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
 
-acorn@^8.1.0:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.1.0.tgz#52311fd7037ae119cbb134309e901aa46295b3fe"
-  integrity sha512-LWCF/Wn0nfHOmJ9rzQApGnxnvgfROzGilS8936rqN/lfcYkY9MYZzdMqN+2NJ4SlTc+m5HiSa+kNfDtI64dwUA==
+acorn@^8.2.4:
+  version "8.4.1"
+  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.4.1.tgz#56c36251fc7cabc7096adc18f05afe814321a28c"
+  integrity sha512-asabaBSkEKosYKMITunzX177CXxQ4Q8BSSzMTKD+FefUhipQC70gfW5SiUDhYQ3vk8G+81HqQk7Fv9OXwwn9KA==
 
-agent-base@4, agent-base@^4.3.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee"
-  integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==
-  dependencies:
-    es6-promisify "^5.0.0"
+add-stream@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/add-stream/-/add-stream-1.0.0.tgz#6a7990437ca736d5e1288db92bd3266d5f5cb2aa"
+  integrity sha1-anmQQ3ynNtXhKI25K9MmbV9csqo=
 
-agent-base@~4.2.1:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.2.1.tgz#d89e5999f797875674c07d87f260fc41e83e8ca9"
-  integrity sha512-JVwXMr9nHYTUXsBFKUqhJwvlcYU/blreOEUkhNR2eXZIvwd+c+o5V4MgDPKWnMS/56awN3TRzIP+KoPn+roQtg==
+agent-base@6, agent-base@^6.0.2:
+  version "6.0.2"
+  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77"
+  integrity sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==
   dependencies:
-    es6-promisify "^5.0.0"
+    debug "4"
 
-agentkeepalive@^3.4.1:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-3.5.2.tgz#a113924dd3fa24a0bc3b78108c450c2abee00f67"
-  integrity sha512-e0L/HNe6qkQ7H19kTlRRqUibEAwDK5AFk6y3PtMsuut2VAH6+Q4xZml1tNDJD7kSAyqmbG/K08K5WEJYtUrSlQ==
+agentkeepalive@^4.1.3:
+  version "4.1.4"
+  resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-4.1.4.tgz#d928028a4862cb11718e55227872e842a44c945b"
+  integrity sha512-+V/rGa3EuU74H6wR04plBb7Ks10FbtUQgRj/FQOG7uUIEuaINI+AiqJR1k6t3SVNs7o7ZjIdus6706qqzVq8jQ==
   dependencies:
+    debug "^4.1.0"
+    depd "^1.1.2"
     humanize-ms "^1.2.1"
 
 aggregate-error@^3.0.0:
@@ -1917,17 +1811,7 @@ aggregate-error@^3.0.0:
     clean-stack "^2.0.0"
     indent-string "^4.0.0"
 
-ajv-errors@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/ajv-errors/-/ajv-errors-1.0.1.tgz#f35986aceb91afadec4102fbd85014950cefa64d"
-  integrity sha512-DCRfO/4nQ+89p/RK43i8Ezd41EqdGIU4ld7nGF8OQ14oc/we5rEntLCUa7+jrn3nn83BosfwZA0wb4pon2o8iQ==
-
-ajv-keywords@^3.1.0, ajv-keywords@^3.5.2:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d"
-  integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==
-
-ajv@^6.1.0, ajv@^6.12.3, ajv@^6.12.4:
+ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4:
   version "6.12.6"
   resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
   integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==
@@ -1937,6 +1821,16 @@ ajv@^6.1.0, ajv@^6.12.3, ajv@^6.12.4:
     json-schema-traverse "^0.4.1"
     uri-js "^4.2.2"
 
+ajv@^8.0.1:
+  version "8.6.2"
+  resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.6.2.tgz#2fb45e0e5fcbc0813326c1c3da535d1881bb0571"
+  integrity sha512-9807RlWAgT564wT+DjeyU5OFMPjmzxVobvDFmNAhY+5zD6A2ly3jDp6sgnfyDtlIQ+7H97oc/DGCzzfu9rjw9w==
+  dependencies:
+    fast-deep-equal "^3.1.1"
+    json-schema-traverse "^1.0.0"
+    require-from-string "^2.0.2"
+    uri-js "^4.2.2"
+
 ansi-colors@^1.0.1:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-1.1.0.tgz#6374b4dd5d4718ff3ce27a671a3b1cad077132a9"
@@ -1949,12 +1843,12 @@ ansi-colors@^3.0.5:
   resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-3.2.4.tgz#e3a3da4bfbae6c86a9c285625de124a234026fbf"
   integrity sha512-hHUXGagefjN2iRrID63xckIvotOXOojhQKWIPUZ4mNUZ9nLZW+7FMNoE1lOkEhNWYsx/7ysGIuJYCiMAA9FnrA==
 
-ansi-escapes@^3.2.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-3.2.0.tgz#8780b98ff9dbf5638152d1f1fe5c1d7b4442976b"
-  integrity sha512-cBhpre4ma+U0T1oM5fXg7Dy1Jw7zzwv7lt/GoCpr+hDQJoYnKVPLL4dCvSEFMmQurOQvSrwT7SL/DAlhBI97RQ==
+ansi-colors@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.1.tgz#cbb9ae256bf750af1eab344f229aa27fe94ba348"
+  integrity sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==
 
-ansi-escapes@^4.2.1:
+ansi-escapes@^4.2.1, ansi-escapes@^4.3.0:
   version "4.3.2"
   resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.2.tgz#6b2291d1db7d98b6521d5f1efa42d0f3a9feb65e"
   integrity sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==
@@ -1978,17 +1872,12 @@ ansi-regex@^3.0.0:
   resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-3.0.0.tgz#ed0317c322064f79466c02966bddb605ab37d998"
   integrity sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=
 
-ansi-regex@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-4.1.0.tgz#8b9f8f08cf1acb843756a839ca8c7e3168c51997"
-  integrity sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==
-
 ansi-regex@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.0.tgz#388539f55179bf39339c81af30a654d69f87cb75"
   integrity sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==
 
-ansi-styles@^3.2.0, ansi-styles@^3.2.1:
+ansi-styles@^3.2.1:
   version "3.2.1"
   resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d"
   integrity sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==
@@ -2002,16 +1891,16 @@ ansi-styles@^4.0.0, ansi-styles@^4.1.0:
   dependencies:
     color-convert "^2.0.1"
 
+ansi-styles@^5.0.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b"
+  integrity sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==
+
 ansi-wrap@0.1.0, ansi-wrap@^0.1.0:
   version "0.1.0"
   resolved "https://registry.yarnpkg.com/ansi-wrap/-/ansi-wrap-0.1.0.tgz#a82250ddb0015e9a27ca82e82ea603bbfa45efaf"
   integrity sha1-qCJQ3bABXponyoLoLqYDu/pF768=
 
-any-promise@^1.0.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/any-promise/-/any-promise-1.3.0.tgz#abc6afeedcea52e809cdc0376aed3ce39635d17f"
-  integrity sha1-q8av7tzqUugJzcA3au0845Y10X8=
-
 anymatch@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-2.0.0.tgz#bcb24b4f37934d9aa7ac17b4adaf89e7c76ef2eb"
@@ -2020,7 +1909,7 @@ anymatch@^2.0.0:
     micromatch "^3.1.4"
     normalize-path "^2.1.1"
 
-anymatch@^3.0.3, anymatch@~3.1.1:
+anymatch@^3.0.3:
   version "3.1.2"
   resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.2.tgz#c0557c096af32f106198f4f4e2a383537e378716"
   integrity sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==
@@ -2035,7 +1924,7 @@ append-buffer@^1.0.2:
   dependencies:
     buffer-equal "^1.0.0"
 
-aproba@^1.0.3, aproba@^1.1.1:
+aproba@^1.0.3:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/aproba/-/aproba-1.2.0.tgz#6802e6264efd18c790a1b0d517f0f2627bf2c94a"
   integrity sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==
@@ -2070,14 +1959,6 @@ argparse@^1.0.7:
   dependencies:
     sprintf-js "~1.0.2"
 
-argv-tools@^0.1.1:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/argv-tools/-/argv-tools-0.1.2.tgz#fc4918a70775b8cc5f8296fa0cfea137bd8a8229"
-  integrity sha512-wxqoymY0BEu9NblZVQiOTOAiJUjPhaa/kbNMjC2h6bnrmUSgnxKgWJo3lzXvi3bHJRwXyqK/dHzMlZVRT89Cxg==
-  dependencies:
-    array-back "^2.0.0"
-    find-replace "^2.0.1"
-
 arr-diff@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/arr-diff/-/arr-diff-4.0.0.tgz#d6461074febfec71e7e15235761a329a5dc7c520"
@@ -2107,28 +1988,26 @@ arr-union@^3.1.0:
   resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4"
   integrity sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=
 
-array-back@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/array-back/-/array-back-2.0.0.tgz#6877471d51ecc9c9bfa6136fb6c7d5fe69748022"
-  integrity sha512-eJv4pLLufP3g5kcZry0j6WXpIbzYw9GUB4mVJZno9wfwiBxbizTnHCw3VJb07cBihbFX48Y7oSrW9y+gt4glyw==
-  dependencies:
-    typical "^2.6.1"
+array-back@^3.0.1, array-back@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/array-back/-/array-back-3.1.0.tgz#b8859d7a508871c9a7b2cf42f99428f65e96bfb0"
+  integrity sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==
 
-array-differ@^2.0.3:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/array-differ/-/array-differ-2.1.0.tgz#4b9c1c3f14b906757082925769e8ab904f4801b1"
-  integrity sha512-KbUpJgx909ZscOc/7CLATBFam7P1Z1QRQInvgT0UztM9Q72aGKCunKASAl7WNW0tnPmPyEMeMhdsfWhfmW037w==
+array-back@^4.0.1:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/array-back/-/array-back-4.0.2.tgz#8004e999a6274586beeb27342168652fdb89fa1e"
+  integrity sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==
+
+array-differ@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/array-differ/-/array-differ-3.0.0.tgz#3cbb3d0f316810eafcc47624734237d6aee4ae6b"
+  integrity sha512-THtfYS6KtME/yIAhKjZ2ul7XI96lQGHRputJQHO80LAWQnuGP4iCIN8vdMRboGbIEYBwU33q8Tch1os2+X0kMg==
 
 array-each@^1.0.0, array-each@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/array-each/-/array-each-1.0.1.tgz#a794af0c05ab1752846ee753a1f211a05ba0c44f"
   integrity sha1-p5SvDAWrF1KEbudTofIRoFugxE8=
 
-array-find-index@^1.0.1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/array-find-index/-/array-find-index-1.0.2.tgz#df010aa1287e164bbda6f9723b0a96a1ec4187a1"
-  integrity sha1-3wEKoSh+Fku9pvlyOwqWoexBh6E=
-
 array-ify@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/array-ify/-/array-ify-1.0.0.tgz#9e528762b4a9066ad163a6962a364418e9626ece"
@@ -2200,16 +2079,6 @@ asap@^2.0.0, asap@~2.0.6:
   resolved "https://registry.yarnpkg.com/asap/-/asap-2.0.6.tgz#e50347611d7e690943208bbdafebcbc2fb866d46"
   integrity sha1-5QNHYR1+aQlDIIu9r+vLwvuGbUY=
 
-asn1.js@^5.2.0:
-  version "5.4.1"
-  resolved "https://registry.yarnpkg.com/asn1.js/-/asn1.js-5.4.1.tgz#11a980b84ebb91781ce35b0fdc2ee294e3783f07"
-  integrity sha512-+I//4cYPccV8LdmBLiX8CYvf9Sp3vQsrqu2QNXRcrbiWvcx/UdlFiqUJJzxRQxgsZmvhXhn4cSKeSmoFjVdupA==
-  dependencies:
-    bn.js "^4.0.0"
-    inherits "^2.0.1"
-    minimalistic-assert "^1.0.0"
-    safer-buffer "^2.1.0"
-
 asn1@~0.2.3:
   version "0.2.4"
   resolved "https://registry.yarnpkg.com/asn1/-/asn1-0.2.4.tgz#8d2475dfab553bb33e77b54e59e880bb8ce23136"
@@ -2222,30 +2091,17 @@ assert-plus@1.0.0, assert-plus@^1.0.0:
   resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525"
   integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=
 
-assert@^1.1.1:
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/assert/-/assert-1.5.0.tgz#55c109aaf6e0aefdb3dc4b71240c70bf574b18eb"
-  integrity sha512-EDsgawzwoun2CZkCgtxJbv392v4nbk9XDD06zI+kQYoBM/3RBWLlEyJARDOmhAAosBjWACEkKL6S+lIZtcAubA==
-  dependencies:
-    object-assign "^4.1.1"
-    util "0.10.3"
-
 assign-symbols@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367"
   integrity sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=
 
-async-done@1.3.1:
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/async-done/-/async-done-1.3.1.tgz#14b7b73667b864c8f02b5b253fc9c6eddb777f3e"
-  integrity sha512-R1BaUeJ4PMoLNJuk+0tLJgjmEqVsdN118+Z8O+alhnQDQgy0kmD5Mqi0DNEmMx2LM0Ed5yekKu+ZXYvIHceicg==
-  dependencies:
-    end-of-stream "^1.1.0"
-    once "^1.3.2"
-    process-nextick-args "^1.0.7"
-    stream-exhaust "^1.0.1"
+astral-regex@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31"
+  integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==
 
-async-done@^1.2.0, async-done@^1.2.2:
+async-done@1.3.2, async-done@^1.2.0, async-done@^1.2.2:
   version "1.3.2"
   resolved "https://registry.yarnpkg.com/async-done/-/async-done-1.3.2.tgz#5e15aa729962a4b07414f528a88cdf18e0b290a2"
   integrity sha512-uYkTP8dw2og1tu1nmza1n1CMW0qb8gWWlwqMmLb7MhBVs4BXrFziT6HXUd+/RlRA/i4H9AkofYloUbs1fwMqlw==
@@ -2267,13 +2123,6 @@ async-settle@^1.0.0:
   dependencies:
     async-done "^1.2.2"
 
-async@^2.5.0:
-  version "2.6.3"
-  resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff"
-  integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==
-  dependencies:
-    lodash "^4.17.14"
-
 asynckit@^0.4.0:
   version "0.4.0"
   resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
@@ -2284,11 +2133,6 @@ at-least-node@^1.0.0:
   resolved "https://registry.yarnpkg.com/at-least-node/-/at-least-node-1.0.0.tgz#602cd4b46e844ad4effc92a8011a3c46e0238dc2"
   integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==
 
-atob-lite@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/atob-lite/-/atob-lite-2.0.0.tgz#0fef5ad46f1bd7a8502c65727f0367d5ee43d696"
-  integrity sha1-D+9a1G8b16hQLGVyfwNn1e5D1pY=
-
 atob@^2.1.2:
   version "2.1.2"
   resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9"
@@ -2304,16 +2148,16 @@ aws4@^1.8.0:
   resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59"
   integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==
 
-babel-jest@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-26.6.3.tgz#d87d25cb0037577a0c89f82e5755c5d293c01056"
-  integrity sha512-pl4Q+GAVOHwvjrck6jKjvmGhnO3jHX/xuB9d27f+EJZ/6k+6nMuPjorrYp7s++bKKdANwzElBWnLWaObvTnaZA==
+babel-jest@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-27.0.6.tgz#e99c6e0577da2655118e3608b68761a5a69bd0d8"
+  integrity sha512-iTJyYLNc4wRofASmofpOc5NK9QunwMk+TLFgGXsTFS8uEqmd8wdI7sga0FPe2oVH3b5Agt/EAK1QjPEuKL8VfA==
   dependencies:
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/babel__core" "^7.1.7"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    "@types/babel__core" "^7.1.14"
     babel-plugin-istanbul "^6.0.0"
-    babel-preset-jest "^26.6.2"
+    babel-preset-jest "^27.0.6"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
     slash "^3.0.0"
@@ -2329,10 +2173,10 @@ babel-plugin-istanbul@^6.0.0:
     istanbul-lib-instrument "^4.0.0"
     test-exclude "^6.0.0"
 
-babel-plugin-jest-hoist@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-26.6.2.tgz#8185bd030348d254c6d7dd974355e6a28b21e62d"
-  integrity sha512-PO9t0697lNTmcEHH69mdtYiOIkkOlj9fySqfO3K1eCcdISevLAE0xY59VLLUj0SoiPiTX/JU2CYFpILydUa5Lw==
+babel-plugin-jest-hoist@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-27.0.6.tgz#f7c6b3d764af21cb4a2a1ab6870117dbde15b456"
+  integrity sha512-CewFeM9Vv2gM7Yr9n5eyyLVPRSiBnk6lKZRjgwYnGKSl9M14TMn2vkN02wTF04OGuSDLEzlWiMzvjXuW9mB6Gw==
   dependencies:
     "@babel/template" "^7.3.3"
     "@babel/types" "^7.3.3"
@@ -2357,12 +2201,12 @@ babel-preset-current-node-syntax@^1.0.0:
     "@babel/plugin-syntax-optional-chaining" "^7.8.3"
     "@babel/plugin-syntax-top-level-await" "^7.8.3"
 
-babel-preset-jest@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-26.6.2.tgz#747872b1171df032252426586881d62d31798fee"
-  integrity sha512-YvdtlVm9t3k777c5NPQIv6cxFFFapys25HiUmuSgHwIZhfifweR5c5Sf5nwE3MAbfu327CYSvps8Yx6ANLyleQ==
+babel-preset-jest@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-27.0.6.tgz#909ef08e9f24a4679768be2f60a3df0856843f9d"
+  integrity sha512-WObA0/Biw2LrVVwZkF/2GqbOdzhKD6Fkdwhoy9ASIrOWr/zodcSpQh72JOkEn6NWyjmnPDjNSqaGN4KnpKzhXw==
   dependencies:
-    babel-plugin-jest-hoist "^26.6.2"
+    babel-plugin-jest-hoist "^27.0.6"
     babel-preset-current-node-syntax "^1.0.0"
 
 bach@^1.0.0:
@@ -2385,11 +2229,6 @@ balanced-match@^1.0.0:
   resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
   integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==
 
-base64-js@^1.0.2:
-  version "1.5.1"
-  resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a"
-  integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==
-
 base@^0.11.1:
   version "0.11.2"
   resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f"
@@ -2410,12 +2249,12 @@ bcrypt-pbkdf@^1.0.0:
   dependencies:
     tweetnacl "^0.14.3"
 
-before-after-hook@^2.0.0:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/before-after-hook/-/before-after-hook-2.2.1.tgz#73540563558687586b52ed217dad6a802ab1549c"
-  integrity sha512-/6FKxSTWoJdbsLDF8tdIjaRiFXiE6UHsEHE3OPI/cwPURCVi1ukP0gmLn7XWEiFk5TcwQjjY5PWsU+j+tgXgmw==
+before-after-hook@^2.2.0:
+  version "2.2.2"
+  resolved "https://registry.yarnpkg.com/before-after-hook/-/before-after-hook-2.2.2.tgz#a6e8ca41028d90ee2c24222f201c90956091613e"
+  integrity sha512-3pZEU3NT5BFUo/AD5ERPWOgQOCZITni6iavr5AUw5AUwQjMlI0kzu5btnyD39AF0gUEsDPwJT+oY1ORBJijPjQ==
 
-benchmark@2.1.4:
+benchmark@^2.1.4:
   version "2.1.4"
   resolved "https://registry.yarnpkg.com/benchmark/-/benchmark-2.1.4.tgz#09f3de31c916425d498cc2ee565a0ebf3c2a5629"
   integrity sha1-CfPeMckWQl1JjMLuVloOvzwqVik=
@@ -2423,21 +2262,27 @@ benchmark@2.1.4:
     lodash "^4.17.4"
     platform "^1.3.3"
 
-big.js@^5.2.2:
-  version "5.2.2"
-  resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328"
-  integrity sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==
+benny@3.6.15:
+  version "3.6.15"
+  resolved "https://registry.yarnpkg.com/benny/-/benny-3.6.15.tgz#930826819b89546b274febe803da2d248a676caa"
+  integrity sha512-kq6XVGGYVou3Y8KNPs3SEF881vi5fJ8sIf9w69D2rreiNfRicWVWK6u6/mObMw6BiexoHHumtipn5gcu0Tngng==
+  dependencies:
+    "@arrows/composition" "^1.0.0"
+    "@arrows/dispatch" "^1.0.2"
+    "@arrows/multimethod" "^1.1.6"
+    benchmark "^2.1.4"
+    fs-extra "^9.0.1"
+    json2csv "^5.0.4"
+    kleur "^4.1.3"
+    log-update "^4.0.0"
+    prettier "^2.1.2"
+    stats-median "^1.0.1"
 
 binary-extensions@^1.0.0:
   version "1.13.1"
   resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-1.13.1.tgz#598afe54755b2868a5330d2aff9d4ebb53209b65"
   integrity sha512-Un7MIEDdUC5gNpcGDV97op1Ywk748MpHcFTHoYs6qnj1Z3j7I53VG3nwZhKzoBZmbdRNnb6WRdFlwl7tSDuZGw==
 
-binary-extensions@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d"
-  integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==
-
 bindings@^1.5.0:
   version "1.5.0"
   resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df"
@@ -2445,21 +2290,6 @@ bindings@^1.5.0:
   dependencies:
     file-uri-to-path "1.0.0"
 
-bluebird@^3.5.1, bluebird@^3.5.3, bluebird@^3.5.5:
-  version "3.7.2"
-  resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f"
-  integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==
-
-bn.js@^4.0.0, bn.js@^4.1.0, bn.js@^4.11.9:
-  version "4.12.0"
-  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-4.12.0.tgz#775b3f278efbb9718eec7361f483fb36fbbfea88"
-  integrity sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==
-
-bn.js@^5.0.0, bn.js@^5.1.1:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-5.2.0.tgz#358860674396c6997771a9d051fcc1b57d4ae002"
-  integrity sha512-D7iWRBvnZE8ecXiLj/9wbxH7Tk79fAh8IHaTNq1RWRixsS02W+5qS+iE9yq6RYl0asXx5tw0bLhmT5pIfbSquw==
-
 brace-expansion@^1.1.7:
   version "1.1.11"
   resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
@@ -2484,94 +2314,28 @@ braces@^2.3.1, braces@^2.3.2:
     split-string "^3.0.2"
     to-regex "^3.0.1"
 
-braces@^3.0.1, braces@~3.0.2:
+braces@^3.0.1:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107"
   integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==
   dependencies:
     fill-range "^7.0.1"
 
-brorand@^1.0.1, brorand@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/brorand/-/brorand-1.1.0.tgz#12c25efe40a45e3c323eb8675a0a0ce57b22371f"
-  integrity sha1-EsJe/kCkXjwyPrhnWgoM5XsiNx8=
-
 browser-process-hrtime@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626"
   integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
 
-browserify-aes@^1.0.0, browserify-aes@^1.0.4:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/browserify-aes/-/browserify-aes-1.2.0.tgz#326734642f403dabc3003209853bb70ad428ef48"
-  integrity sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==
-  dependencies:
-    buffer-xor "^1.0.3"
-    cipher-base "^1.0.0"
-    create-hash "^1.1.0"
-    evp_bytestokey "^1.0.3"
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
-browserify-cipher@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/browserify-cipher/-/browserify-cipher-1.0.1.tgz#8d6474c1b870bfdabcd3bcfcc1934a10e94f15f0"
-  integrity sha512-sPhkz0ARKbf4rRQt2hTpAHqn47X3llLkUGn+xEJzLjwY8LRs2p0v7ljvI5EyoRO/mexrNunNECisZs+gw2zz1w==
+browserslist@^4.16.6:
+  version "4.16.6"
+  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.6.tgz#d7901277a5a88e554ed305b183ec9b0c08f66fa2"
+  integrity sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==
   dependencies:
-    browserify-aes "^1.0.4"
-    browserify-des "^1.0.0"
-    evp_bytestokey "^1.0.0"
-
-browserify-des@^1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/browserify-des/-/browserify-des-1.0.2.tgz#3af4f1f59839403572f1c66204375f7a7f703e9c"
-  integrity sha512-BioO1xf3hFwz4kc6iBhI3ieDFompMhrMlnDFC4/0/vd5MokpuAc3R+LYbwTA9A5Yc9pq9UYPqffKpW2ObuwX5A==
-  dependencies:
-    cipher-base "^1.0.1"
-    des.js "^1.0.0"
-    inherits "^2.0.1"
-    safe-buffer "^5.1.2"
-
-browserify-rsa@^4.0.0, browserify-rsa@^4.0.1:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/browserify-rsa/-/browserify-rsa-4.1.0.tgz#b2fd06b5b75ae297f7ce2dc651f918f5be158c8d"
-  integrity sha512-AdEER0Hkspgno2aR97SAf6vi0y0k8NuOpGnVH3O99rcA5Q6sh8QxcngtHuJ6uXwnfAXNM4Gn1Gb7/MV1+Ymbog==
-  dependencies:
-    bn.js "^5.0.0"
-    randombytes "^2.0.1"
-
-browserify-sign@^4.0.0:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/browserify-sign/-/browserify-sign-4.2.1.tgz#eaf4add46dd54be3bb3b36c0cf15abbeba7956c3"
-  integrity sha512-/vrA5fguVAKKAVTNJjgSm1tRQDHUU6DbwO9IROu/0WAzC8PKhucDSh18J0RMvVeHAn5puMd+QHC2erPRNf8lmg==
-  dependencies:
-    bn.js "^5.1.1"
-    browserify-rsa "^4.0.1"
-    create-hash "^1.2.0"
-    create-hmac "^1.1.7"
-    elliptic "^6.5.3"
-    inherits "^2.0.4"
-    parse-asn1 "^5.1.5"
-    readable-stream "^3.6.0"
-    safe-buffer "^5.2.0"
-
-browserify-zlib@^0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/browserify-zlib/-/browserify-zlib-0.2.0.tgz#2869459d9aa3be245fe8fe2ca1f46e2e7f54d73f"
-  integrity sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==
-  dependencies:
-    pako "~1.0.5"
-
-browserslist@^4.14.5:
-  version "4.16.3"
-  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.3.tgz#340aa46940d7db878748567c5dea24a48ddf3717"
-  integrity sha512-vIyhWmIkULaq04Gt93txdh+j02yX/JzlyhLYbV3YQCn/zvES3JnY7TifHHvvr1w5hTDluNKMkV05cs4vy8Q7sw==
-  dependencies:
-    caniuse-lite "^1.0.30001181"
-    colorette "^1.2.1"
-    electron-to-chromium "^1.3.649"
+    caniuse-lite "^1.0.30001219"
+    colorette "^1.2.2"
+    electron-to-chromium "^1.3.723"
     escalade "^3.1.1"
-    node-releases "^1.1.70"
+    node-releases "^1.1.71"
 
 bs-logger@0.x:
   version "0.2.6"
@@ -2587,44 +2351,15 @@ bser@2.1.1:
   dependencies:
     node-int64 "^0.4.0"
 
-btoa-lite@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/btoa-lite/-/btoa-lite-1.0.0.tgz#337766da15801210fdd956c22e9c6891ab9d0337"
-  integrity sha1-M3dm2hWAEhD92VbCLpxokaudAzc=
-
 buffer-equal@^1.0.0:
   version "1.0.0"
-  resolved "https://registry.yarnpkg.com/buffer-equal/-/buffer-equal-1.0.0.tgz#59616b498304d556abd466966b22eeda3eca5fbe"
-  integrity sha1-WWFrSYME1Var1GaWayLu2j7KX74=
-
-buffer-from@1.x, buffer-from@^1.0.0:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef"
-  integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==
-
-buffer-xor@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9"
-  integrity sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk=
-
-buffer@^4.3.0:
-  version "4.9.2"
-  resolved "https://registry.yarnpkg.com/buffer/-/buffer-4.9.2.tgz#230ead344002988644841ab0244af8c44bbe3ef8"
-  integrity sha512-xq+q3SRMOxGivLhBNaUdC64hDTQwejJ+H0T/NB1XMtTVEwNTrfFF3gAxiyW0Bu/xWEGhjVKgUcMhCrUy2+uCWg==
-  dependencies:
-    base64-js "^1.0.2"
-    ieee754 "^1.1.4"
-    isarray "^1.0.0"
+  resolved "https://registry.yarnpkg.com/buffer-equal/-/buffer-equal-1.0.0.tgz#59616b498304d556abd466966b22eeda3eca5fbe"
+  integrity sha1-WWFrSYME1Var1GaWayLu2j7KX74=
 
-builtin-modules@^1.1.1:
+buffer-from@1.x, buffer-from@^1.0.0:
   version "1.1.1"
-  resolved "https://registry.yarnpkg.com/builtin-modules/-/builtin-modules-1.1.1.tgz#270f076c5a72c02f5b65a47df94c5fe3a278892f"
-  integrity sha1-Jw8HbFpywC9bZaR9+Uxf46J4iS8=
-
-builtin-status-codes@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz#85982878e21b98e1c66425e03d0174788f569ee8"
-  integrity sha1-hZgoeOIbmOHGZCXgPQF0eI9Wnug=
+  resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef"
+  integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==
 
 builtins@^1.0.3:
   version "1.0.3"
@@ -2636,36 +2371,15 @@ byline@^5.0.0:
   resolved "https://registry.yarnpkg.com/byline/-/byline-5.0.0.tgz#741c5216468eadc457b03410118ad77de8c1ddb1"
   integrity sha1-dBxSFkaOrcRXsDQQEYrXfejB3bE=
 
-byte-size@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/byte-size/-/byte-size-5.0.1.tgz#4b651039a5ecd96767e71a3d7ed380e48bed4191"
-  integrity sha512-/XuKeqWocKsYa/cBY1YbSJSWWqTi4cFgr9S6OyM7PBaPbr9zvNGwWP33vt0uqGhwDdN+y3yhbXVILEUpnwEWGw==
-
-cacache@^12.0.0, cacache@^12.0.2, cacache@^12.0.3:
-  version "12.0.4"
-  resolved "https://registry.yarnpkg.com/cacache/-/cacache-12.0.4.tgz#668bcbd105aeb5f1d92fe25570ec9525c8faa40c"
-  integrity sha512-a0tMB40oefvuInr4Cwb3GerbL9xTj1D5yg0T5xrjGCGyfvbxseIXX7BAO/u/hIXdafzOI5JC3wDwHyf24buOAQ==
-  dependencies:
-    bluebird "^3.5.5"
-    chownr "^1.1.1"
-    figgy-pudding "^3.5.1"
-    glob "^7.1.4"
-    graceful-fs "^4.1.15"
-    infer-owner "^1.0.3"
-    lru-cache "^5.1.1"
-    mississippi "^3.0.0"
-    mkdirp "^0.5.1"
-    move-concurrently "^1.0.1"
-    promise-inflight "^1.0.1"
-    rimraf "^2.6.3"
-    ssri "^6.0.1"
-    unique-filename "^1.1.1"
-    y18n "^4.0.0"
+byte-size@^7.0.0:
+  version "7.0.1"
+  resolved "https://registry.yarnpkg.com/byte-size/-/byte-size-7.0.1.tgz#b1daf3386de7ab9d706b941a748dbfc71130dee3"
+  integrity sha512-crQdqyCwhokxwV1UyDzLZanhkugAgft7vt0qbbdt60C6Zf3CAiGmtUCylbtYwrU6loOUw3euGrNtW1J651ot1A==
 
-cacache@^15.0.5:
-  version "15.0.6"
-  resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.0.6.tgz#65a8c580fda15b59150fb76bf3f3a8e45d583099"
-  integrity sha512-g1WYDMct/jzW+JdWEyjaX2zoBkZ6ZT9VpOyp2I/VMtDsNLffNat3kqPFfi1eDRSK9/SuKGyORDHcQMcPF8sQ/w==
+cacache@^15.0.5, cacache@^15.2.0:
+  version "15.2.0"
+  resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.2.0.tgz#73af75f77c58e72d8c630a7a2858cb18ef523389"
+  integrity sha512-uKoJSHmnrqXgthDFx/IU6ED/5xd+NNGe+Bb+kLZy7Ku4P+BaiWEUflAKPZ7eAzsYGcsAGASJZsybXp+quEcHTw==
   dependencies:
     "@npmcli/move-file" "^1.0.1"
     chownr "^2.0.0"
@@ -2713,47 +2427,11 @@ call-me-maybe@^1.0.1:
   resolved "https://registry.yarnpkg.com/call-me-maybe/-/call-me-maybe-1.0.1.tgz#26d208ea89e37b5cbde60250a15f031c16a4d66b"
   integrity sha1-JtII6onje1y95gJQoV8DHBak1ms=
 
-caller-callsite@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/caller-callsite/-/caller-callsite-2.0.0.tgz#847e0fce0a223750a9a027c54b33731ad3154134"
-  integrity sha1-hH4PzgoiN1CpoCfFSzNzGtMVQTQ=
-  dependencies:
-    callsites "^2.0.0"
-
-caller-path@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/caller-path/-/caller-path-2.0.0.tgz#468f83044e369ab2010fac5f06ceee15bb2cb1f4"
-  integrity sha1-Ro+DBE42mrIBD6xfBs7uFbsssfQ=
-  dependencies:
-    caller-callsite "^2.0.0"
-
-callsites@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/callsites/-/callsites-2.0.0.tgz#06eb84f00eea413da86affefacbffb36093b3c50"
-  integrity sha1-BuuE8A7qQT2oav/vrL/7Ngk7PFA=
-
 callsites@^3.0.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73"
   integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==
 
-camelcase-keys@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-2.1.0.tgz#308beeaffdf28119051efa1d932213c91b8f92e7"
-  integrity sha1-MIvur/3ygRkFHvodkyITyRuPkuc=
-  dependencies:
-    camelcase "^2.0.0"
-    map-obj "^1.0.0"
-
-camelcase-keys@^4.0.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-4.2.0.tgz#a2aa5fb1af688758259c32c141426d78923b9b77"
-  integrity sha1-oqpfsa9oh1glnDLBQUJteJI7m3c=
-  dependencies:
-    camelcase "^4.1.0"
-    map-obj "^2.0.0"
-    quick-lru "^1.0.0"
-
 camelcase-keys@^6.2.2:
   version "6.2.2"
   resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-6.2.2.tgz#5e755d6ba51aa223ec7d3d52f25778210f9dc3c0"
@@ -2763,49 +2441,42 @@ camelcase-keys@^6.2.2:
     map-obj "^4.0.0"
     quick-lru "^4.0.1"
 
-camelcase@^2.0.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-2.1.1.tgz#7c1d16d679a1bbe59ca02cacecfb011e201f5a1f"
-  integrity sha1-fB0W1nmhu+WcoCys7PsBHiAfWh8=
+camelcase-keys@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-7.0.0.tgz#40fcbe171f7432888369d0c871df7cfa5ce4f788"
+  integrity sha512-qlQlECgDl5Ev+gkvONaiD4X4TF2gyZKuLBvzx0zLo2UwAxmz3hJP/841aaMHTeH1T7v5HRwoRq91daulXoYWvg==
+  dependencies:
+    camelcase "^6.2.0"
+    map-obj "^4.1.0"
+    quick-lru "^5.1.1"
+    type-fest "^1.2.1"
 
 camelcase@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-3.0.0.tgz#32fc4b9fcdaf845fcdf7e73bb97cac2261f0ab0a"
   integrity sha1-MvxLn82vhF/N9+c7uXysImHwqwo=
 
-camelcase@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-4.1.0.tgz#d545635be1e33c542649c69173e5de6acfae34dd"
-  integrity sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=
-
 camelcase@^5.0.0, camelcase@^5.3.1:
   version "5.3.1"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
   integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
 
-camelcase@^6.0.0:
+camelcase@^6.2.0:
   version "6.2.0"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.2.0.tgz#924af881c9d525ac9d87f40d964e5cea982a1809"
   integrity sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==
 
-caniuse-lite@^1.0.30001181:
-  version "1.0.30001208"
-  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001208.tgz#a999014a35cebd4f98c405930a057a0d75352eb9"
-  integrity sha512-OE5UE4+nBOro8Dyvv0lfx+SRtfVIOM9uhKqFmJeUbGriqhhStgp1A0OyBpgy3OUF8AhYCT+PVwPC1gMl2ZcQMA==
-
-capture-exit@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/capture-exit/-/capture-exit-2.0.0.tgz#fb953bfaebeb781f62898239dabb426d08a509a4"
-  integrity sha512-PiT/hQmTonHhl/HFGN+Lx3JJUznrVYJ3+AQsnthneZbvW7x+f08Tk7yLJTLEOUvBTbduLeeBkxEaYXUOUrRq6g==
-  dependencies:
-    rsvp "^4.8.4"
+caniuse-lite@^1.0.30001219:
+  version "1.0.30001246"
+  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001246.tgz#fe17d9919f87124d6bb416ef7b325356d69dc76c"
+  integrity sha512-Tc+ff0Co/nFNbLOrziBXmMVtpt9S2c2Y+Z9Nk9Khj09J+0zR9ejvIW5qkZAErCbOrVODCx/MN+GpB5FNBs5GFA==
 
 caseless@~0.12.0:
   version "0.12.0"
   resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc"
   integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=
 
-chalk@2.x, chalk@^2.0.0, chalk@^2.0.1, chalk@^2.3.0, chalk@^2.3.1, chalk@^2.4.1, chalk@^2.4.2:
+chalk@2.x, chalk@^2.0.0, chalk@^2.4.1, chalk@^2.4.2:
   version "2.4.2"
   resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
   integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==
@@ -2814,18 +2485,10 @@ chalk@2.x, chalk@^2.0.0, chalk@^2.0.1, chalk@^2.3.0, chalk@^2.3.1, chalk@^2.4.1,
     escape-string-regexp "^1.0.5"
     supports-color "^5.3.0"
 
-chalk@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4"
-  integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
-  dependencies:
-    ansi-styles "^4.1.0"
-    supports-color "^7.1.0"
-
-chalk@^4.0.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a"
-  integrity sha512-qwx12AxXe2Q5xQ43Ac//I6v5aXTipYrSESdOgzrN+9XjgEpyjpKuvSGaN4qE93f7TQTlerQQ8S+EQ0EyDoVL1A==
+chalk@^4.0.0, chalk@^4.1.0:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.1.tgz#c80b3fab28bf6371e6863325eee67e618b77e6ad"
+  integrity sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==
   dependencies:
     ansi-styles "^4.1.0"
     supports-color "^7.1.0"
@@ -2840,7 +2503,7 @@ chardet@^0.7.0:
   resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e"
   integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==
 
-chokidar@^2.0.0, chokidar@^2.1.8:
+chokidar@^2.0.0:
   version "2.1.8"
   resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-2.1.8.tgz#804b3a7b6a99358c3c5c61e71d8728f041cff917"
   integrity sha512-ZmZUazfOzf0Nve7duiCKD23PFSCs4JPoYyccjUFF3aQkQadqBhfzhjkwBH2mNOG9cTBwhamM37EIsIkZw3nRgg==
@@ -2859,22 +2522,7 @@ chokidar@^2.0.0, chokidar@^2.1.8:
   optionalDependencies:
     fsevents "^1.2.7"
 
-chokidar@^3.4.1:
-  version "3.5.1"
-  resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.1.tgz#ee9ce7bbebd2b79f49f304799d5468e31e14e68a"
-  integrity sha512-9+s+Od+W0VJJzawDma/gvBNQqkTiqYTWLuZoyAsivsI4AaWTCzHG06/TMjsf1cYe9Cb97UCEhjz7HvnPk2p/tw==
-  dependencies:
-    anymatch "~3.1.1"
-    braces "~3.0.2"
-    glob-parent "~5.1.0"
-    is-binary-path "~2.1.0"
-    is-glob "~4.0.1"
-    normalize-path "~3.0.0"
-    readdirp "~3.5.0"
-  optionalDependencies:
-    fsevents "~2.3.1"
-
-chownr@^1.1.1, chownr@^1.1.2:
+chownr@^1.1.1:
   version "1.1.4"
   resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
   integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
@@ -2884,28 +2532,20 @@ chownr@^2.0.0:
   resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
   integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==
 
-chrome-trace-event@^1.0.0:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz#1015eced4741e15d06664a957dbbf50d041e26ac"
-  integrity sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==
-
 ci-info@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46"
   integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==
 
-cipher-base@^1.0.0, cipher-base@^1.0.1, cipher-base@^1.0.3:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/cipher-base/-/cipher-base-1.0.4.tgz#8760e4ecc272f4c363532f926d874aae2c1397de"
-  integrity sha512-Kkht5ye6ZGmwv40uUDZztayT2ThLQGfnj/T71N/XzeZeo3nf8foyW7zGTsPYkEya3m5f3cAypH+qe7YOrM1U2Q==
-  dependencies:
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
+ci-info@^3.1.1:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.2.0.tgz#2876cb948a498797b5236f0095bc057d0dca38b6"
+  integrity sha512-dVqRX7fLUm8J6FgHJ418XuIgDLZDkYcDFTeL6TA2gt5WlIZUQrrH6EZrNClwT/H0FateUsZkGIOPRrLbP+PR9A==
 
-cjs-module-lexer@^0.6.0:
-  version "0.6.0"
-  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-0.6.0.tgz#4186fcca0eae175970aee870b9fe2d6cf8d5655f"
-  integrity sha512-uc2Vix1frTfnuzxxu1Hp4ktSvM3QaI4oXl4ZUqL1wjTu/BGki9TrCWoqLTg/drR1KwAEarXuRFCG2Svr1GxPFw==
+cjs-module-lexer@^1.0.0:
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz#9f84ba3244a512f3a54e5277e8eef4c489864e40"
+  integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA==
 
 class-utils@^0.3.5:
   version "0.3.6"
@@ -2922,17 +2562,17 @@ clean-stack@^2.0.0:
   resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b"
   integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==
 
-cli-cursor@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-2.1.0.tgz#b35dac376479facc3e94747d41d0d0f5238ffcb5"
-  integrity sha1-s12sN2R5+sw+lHR9QdDQ9SOP/LU=
+cli-cursor@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307"
+  integrity sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw==
   dependencies:
-    restore-cursor "^2.0.0"
+    restore-cursor "^3.1.0"
 
-cli-width@^2.0.0:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-2.2.1.tgz#b0433d0b4e9c847ef18868a4ef16fd5fc8271c48"
-  integrity sha512-GRMWDxpOB6Dgk2E5Uo+3eEBvtOOlimMmpbFiKuLFnQzYDavtLFY3K5ona41jgN/WdRZtG7utuVSVTL4HbZHGkw==
+cli-width@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-3.0.0.tgz#a2f48437a2caa9a22436e794bf071ec9e61cedf6"
+  integrity sha512-FxqpkPPwu1HjuN93Omfm4h8uIanXofW0RxVEW3k5RKx+mJJYSthzNhp32Kzxxy3YAEZ/Dc/EWN1vZRY0+kOhbw==
 
 cliui@^3.2.0:
   version "3.2.0"
@@ -2943,23 +2583,14 @@ cliui@^3.2.0:
     strip-ansi "^3.0.1"
     wrap-ansi "^2.0.0"
 
-cliui@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/cliui/-/cliui-5.0.0.tgz#deefcfdb2e800784aa34f46fa08e06851c7bbbc5"
-  integrity sha512-PYeGSEmmHM6zvoef2w8TPzlrnNpXIjTipYK780YswmIP9vjxmd6Y2a3CB2Ks6/AU8NHjZugXvo8w3oWM2qnwXA==
-  dependencies:
-    string-width "^3.1.0"
-    strip-ansi "^5.2.0"
-    wrap-ansi "^5.1.0"
-
-cliui@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1"
-  integrity sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==
+cliui@^7.0.2:
+  version "7.0.4"
+  resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f"
+  integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==
   dependencies:
     string-width "^4.2.0"
     strip-ansi "^6.0.0"
-    wrap-ansi "^6.2.0"
+    wrap-ansi "^7.0.0"
 
 clone-buffer@^1.0.0:
   version "1.0.0"
@@ -2999,6 +2630,13 @@ cloneable-readable@^1.0.0:
     process-nextick-args "^2.0.0"
     readable-stream "^2.3.5"
 
+cmd-shim@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/cmd-shim/-/cmd-shim-4.1.0.tgz#b3a904a6743e9fede4148c6f3800bf2a08135bdd"
+  integrity sha512-lb9L7EM4I/ZRVuljLPEtUJOP+xiQVknZ4ZMpMgEp4JzNldPb27HU03hi6K1/6CoIuit/Zm/LQXySErFeXxDprw==
+  dependencies:
+    mkdirp-infer-owner "^2.0.0"
+
 co@^4.6.0:
   version "4.6.0"
   resolved "https://registry.yarnpkg.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184"
@@ -3060,16 +2698,11 @@ color-support@^1.1.3:
   resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2"
   integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==
 
-colorette@^1.2.1:
+colorette@^1.2.2:
   version "1.2.2"
   resolved "https://registry.yarnpkg.com/colorette/-/colorette-1.2.2.tgz#cbcc79d5e99caea2dbf10eb3a26fd8b3e6acfa94"
   integrity sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==
 
-colors@^1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78"
-  integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA==
-
 columnify@^1.5.4:
   version "1.5.4"
   resolved "https://registry.yarnpkg.com/columnify/-/columnify-1.5.4.tgz#4737ddf1c7b69a8a7c340570782e947eec8e78bb"
@@ -3078,43 +2711,37 @@ columnify@^1.5.4:
     strip-ansi "^3.0.0"
     wcwidth "^1.0.0"
 
-combined-stream@^1.0.6, combined-stream@~1.0.6:
+combined-stream@^1.0.6, combined-stream@^1.0.8, combined-stream@~1.0.6:
   version "1.0.8"
   resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
   integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
   dependencies:
     delayed-stream "~1.0.0"
 
-command-line-args@5.0.2:
-  version "5.0.2"
-  resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.0.2.tgz#c4e56b016636af1323cf485aa25c3cb203dfbbe4"
-  integrity sha512-/qPcbL8zpqg53x4rAaqMFlRV4opN3pbla7I7k9x8kyOBMQoGT6WltjN6sXZuxOXw6DgdK7Ad+ijYS5gjcr7vlA==
+command-line-args@5.1.3:
+  version "5.1.3"
+  resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.1.3.tgz#1e57d2816f28804073bb5e75cd24e02e2aa321e7"
+  integrity sha512-a5tF6mjqRSOBswBwdMkKY47JQ464Dkg9Pcwbxwo9wxRhKWZjtBktmBASllk3AMJ7qBuWgsAGtVa7b2/+EsymOQ==
   dependencies:
-    argv-tools "^0.1.1"
-    array-back "^2.0.0"
-    find-replace "^2.0.1"
+    array-back "^3.1.0"
+    find-replace "^3.0.0"
     lodash.camelcase "^4.3.0"
-    typical "^2.6.1"
+    typical "^4.0.0"
 
-command-line-usage@5.0.5:
-  version "5.0.5"
-  resolved "https://registry.yarnpkg.com/command-line-usage/-/command-line-usage-5.0.5.tgz#5f25933ffe6dedd983c635d38a21d7e623fda357"
-  integrity sha512-d8NrGylA5oCXSbGoKz05FkehDAzSmIm4K03S5VDh4d5lZAtTWfc3D1RuETtuQCn8129nYfJfDdF7P/lwcz1BlA==
+command-line-usage@6.1.1:
+  version "6.1.1"
+  resolved "https://registry.yarnpkg.com/command-line-usage/-/command-line-usage-6.1.1.tgz#c908e28686108917758a49f45efb4f02f76bc03f"
+  integrity sha512-F59pEuAR9o1SF/bD0dQBDluhpT4jJQNWUHEuVBqpDmCUo6gPjCi+m9fCWnWZVR/oG6cMTUms4h+3NPl74wGXvA==
   dependencies:
-    array-back "^2.0.0"
-    chalk "^2.4.1"
-    table-layout "^0.4.3"
-    typical "^2.6.1"
-
-commander@^2.12.1, commander@^2.20.0:
-  version "2.20.3"
-  resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
-  integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
+    array-back "^4.0.1"
+    chalk "^2.4.2"
+    table-layout "^1.0.1"
+    typical "^5.2.0"
 
-commondir@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b"
-  integrity sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs=
+commander@^6.1.0:
+  version "6.2.1"
+  resolved "https://registry.yarnpkg.com/commander/-/commander-6.2.1.tgz#0792eb682dfbc325999bb2b84fddddba110ac73c"
+  integrity sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==
 
 compare-func@^2.0.0:
   version "2.0.0"
@@ -3134,7 +2761,7 @@ concat-map@0.0.1:
   resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
   integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
 
-concat-stream@^1.5.0, concat-stream@^1.6.0:
+concat-stream@^1.6.0:
   version "1.6.2"
   resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.6.2.tgz#904bdf194cd3122fc675c77fc4ac3d4ff0fd1a34"
   integrity sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==
@@ -3154,30 +2781,20 @@ concat-stream@^2.0.0:
     readable-stream "^3.0.2"
     typedarray "^0.0.6"
 
-config-chain@^1.1.11:
-  version "1.1.12"
-  resolved "https://registry.yarnpkg.com/config-chain/-/config-chain-1.1.12.tgz#0fde8d091200eb5e808caf25fe618c02f48e4efa"
-  integrity sha512-a1eOIcu8+7lUInge4Rpf/n4Krkf3Dd9lqhljRzII1/Zno/kRtUWnznPO3jOKBmTEktkt3fkxisUcivoj0ebzoA==
+config-chain@^1.1.12:
+  version "1.1.13"
+  resolved "https://registry.yarnpkg.com/config-chain/-/config-chain-1.1.13.tgz#fad0795aa6a6cdaff9ed1b68e9dff94372c232f4"
+  integrity sha512-qj+f8APARXHrM0hraqXYb2/bOVSV4PvJQlNZ/DVj0QrmNM2q2euizkeuVckQ57J+W0mRH6Hvi+k50M4Jul2VRQ==
   dependencies:
     ini "^1.3.4"
     proto-list "~1.2.1"
 
-console-browserify@^1.1.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/console-browserify/-/console-browserify-1.2.0.tgz#67063cef57ceb6cf4993a2ab3a55840ae8c49336"
-  integrity sha512-ZMkYO/LkF17QvCPqM0gxw8yUzigAOZOSWSHg91FH6orS7vcEj5dVZTidN2fQ14yBSdg97RqhSNwLUXInd52OTA==
-
 console-control-strings@^1.0.0, console-control-strings@~1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/console-control-strings/-/console-control-strings-1.1.0.tgz#3d7cf4464db6446ea644bf4b39507f9851008e8e"
   integrity sha1-PXz0Rk22RG6mRL9LOVB/mFEAjo4=
 
-constants-browserify@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/constants-browserify/-/constants-browserify-1.0.0.tgz#c20b96d8c617748aaf1c16021760cd27fcb8cb75"
-  integrity sha1-wguW2MYXdIqvHBYCF2DNJ/y4y3U=
-
-conventional-changelog-angular@^5.0.3:
+conventional-changelog-angular@^5.0.12:
   version "5.0.12"
   resolved "https://registry.yarnpkg.com/conventional-changelog-angular/-/conventional-changelog-angular-5.0.12.tgz#c979b8b921cbfe26402eb3da5bbfda02d865a2b9"
   integrity sha512-5GLsbnkR/7A89RyHLvvoExbiGbd9xKdKqDTrArnPbOqBqG/2wIosu0fHwpeIRI8Tl94MhVNBXcLJZl92ZQ5USw==
@@ -3185,36 +2802,36 @@ conventional-changelog-angular@^5.0.3:
     compare-func "^2.0.0"
     q "^1.5.1"
 
-conventional-changelog-core@^3.1.6:
-  version "3.2.3"
-  resolved "https://registry.yarnpkg.com/conventional-changelog-core/-/conventional-changelog-core-3.2.3.tgz#b31410856f431c847086a7dcb4d2ca184a7d88fb"
-  integrity sha512-LMMX1JlxPIq/Ez5aYAYS5CpuwbOk6QFp8O4HLAcZxe3vxoCtABkhfjetk8IYdRB9CDQGwJFLR3Dr55Za6XKgUQ==
+conventional-changelog-core@^4.2.2:
+  version "4.2.3"
+  resolved "https://registry.yarnpkg.com/conventional-changelog-core/-/conventional-changelog-core-4.2.3.tgz#ce44d4bbba4032e3dc14c00fcd5b53fc00b66433"
+  integrity sha512-MwnZjIoMRL3jtPH5GywVNqetGILC7g6RQFvdb8LRU/fA/338JbeWAku3PZ8yQ+mtVRViiISqJlb0sOz0htBZig==
   dependencies:
-    conventional-changelog-writer "^4.0.6"
-    conventional-commits-parser "^3.0.3"
+    add-stream "^1.0.0"
+    conventional-changelog-writer "^5.0.0"
+    conventional-commits-parser "^3.2.0"
     dateformat "^3.0.0"
-    get-pkg-repo "^1.0.0"
-    git-raw-commits "2.0.0"
+    get-pkg-repo "^4.0.0"
+    git-raw-commits "^2.0.8"
     git-remote-origin-url "^2.0.0"
-    git-semver-tags "^2.0.3"
-    lodash "^4.2.1"
-    normalize-package-data "^2.3.5"
+    git-semver-tags "^4.1.1"
+    lodash "^4.17.15"
+    normalize-package-data "^3.0.0"
     q "^1.5.1"
     read-pkg "^3.0.0"
     read-pkg-up "^3.0.0"
-    through2 "^3.0.0"
+    through2 "^4.0.0"
 
-conventional-changelog-preset-loader@^2.1.1:
+conventional-changelog-preset-loader@^2.3.4:
   version "2.3.4"
   resolved "https://registry.yarnpkg.com/conventional-changelog-preset-loader/-/conventional-changelog-preset-loader-2.3.4.tgz#14a855abbffd59027fd602581f1f34d9862ea44c"
   integrity sha512-GEKRWkrSAZeTq5+YjUZOYxdHq+ci4dNwHvpaBC3+ENalzFWuCWa9EZXSuZBpkr72sMdKB+1fyDV4takK1Lf58g==
 
-conventional-changelog-writer@^4.0.6:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/conventional-changelog-writer/-/conventional-changelog-writer-4.1.0.tgz#1ca7880b75aa28695ad33312a1f2366f4b12659f"
-  integrity sha512-WwKcUp7WyXYGQmkLsX4QmU42AZ1lqlvRW9mqoyiQzdD+rJWbTepdWoKJuwXTS+yq79XKnQNa93/roViPQrAQgw==
+conventional-changelog-writer@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/conventional-changelog-writer/-/conventional-changelog-writer-5.0.0.tgz#c4042f3f1542f2f41d7d2e0d6cad23aba8df8eec"
+  integrity sha512-HnDh9QHLNWfL6E1uHz6krZEQOgm8hN7z/m7tT16xwd802fwgMN0Wqd7AQYVkhpsjDUx/99oo+nGgvKF657XP5g==
   dependencies:
-    compare-func "^2.0.0"
     conventional-commits-filter "^2.0.7"
     dateformat "^3.0.0"
     handlebars "^4.7.6"
@@ -3225,7 +2842,7 @@ conventional-changelog-writer@^4.0.6:
     split "^1.0.0"
     through2 "^4.0.0"
 
-conventional-commits-filter@^2.0.2, conventional-commits-filter@^2.0.7:
+conventional-commits-filter@^2.0.7:
   version "2.0.7"
   resolved "https://registry.yarnpkg.com/conventional-commits-filter/-/conventional-commits-filter-2.0.7.tgz#f8d9b4f182fce00c9af7139da49365b136c8a0b3"
   integrity sha512-ASS9SamOP4TbCClsRHxIHXRfcGCnIoQqkvAzCSbZzTFLfcTqJVugB0agRgsEELsqaeWgsXv513eS116wnlSSPA==
@@ -3233,7 +2850,7 @@ conventional-commits-filter@^2.0.2, conventional-commits-filter@^2.0.7:
     lodash.ismatch "^4.4.0"
     modify-values "^1.0.0"
 
-conventional-commits-parser@^3.0.3:
+conventional-commits-parser@^3.2.0:
   version "3.2.1"
   resolved "https://registry.yarnpkg.com/conventional-commits-parser/-/conventional-commits-parser-3.2.1.tgz#ba44f0b3b6588da2ee9fd8da508ebff50d116ce2"
   integrity sha512-OG9kQtmMZBJD/32NEw5IhN5+HnBqVjy03eC+I71I0oQRFA5rOgA4OtPOYG7mz1GkCfCNxn3gKIX8EiHJYuf1cA==
@@ -3246,39 +2863,27 @@ conventional-commits-parser@^3.0.3:
     through2 "^4.0.0"
     trim-off-newlines "^1.0.0"
 
-conventional-recommended-bump@^5.0.0:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/conventional-recommended-bump/-/conventional-recommended-bump-5.0.1.tgz#5af63903947b6e089e77767601cb592cabb106ba"
-  integrity sha512-RVdt0elRcCxL90IrNP0fYCpq1uGt2MALko0eyeQ+zQuDVWtMGAy9ng6yYn3kax42lCj9+XBxQ8ZN6S9bdKxDhQ==
+conventional-recommended-bump@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/conventional-recommended-bump/-/conventional-recommended-bump-6.1.0.tgz#cfa623285d1de554012f2ffde70d9c8a22231f55"
+  integrity sha512-uiApbSiNGM/kkdL9GTOLAqC4hbptObFo4wW2QRyHsKciGAfQuLU1ShZ1BIVI/+K2BE/W1AWYQMCXAsv4dyKPaw==
   dependencies:
     concat-stream "^2.0.0"
-    conventional-changelog-preset-loader "^2.1.1"
-    conventional-commits-filter "^2.0.2"
-    conventional-commits-parser "^3.0.3"
-    git-raw-commits "2.0.0"
-    git-semver-tags "^2.0.3"
-    meow "^4.0.0"
+    conventional-changelog-preset-loader "^2.3.4"
+    conventional-commits-filter "^2.0.7"
+    conventional-commits-parser "^3.2.0"
+    git-raw-commits "^2.0.8"
+    git-semver-tags "^4.1.1"
+    meow "^8.0.0"
     q "^1.5.1"
 
-convert-source-map@1.X, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
-  version "1.7.0"
-  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.7.0.tgz#17a2cb882d7f77d3490585e2ce6c524424a3a442"
-  integrity sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==
+convert-source-map@^1.0.0, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
+  version "1.8.0"
+  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.8.0.tgz#f3373c32d21b4d780dd8004514684fb791ca4369"
+  integrity sha512-+OQdjP49zViI/6i7nIJpA8rAl4sV/JdPfU9nZs3VqOwGIgizICvuN2ru6fMd+4llL0tar18UYJXfZ/TWtmhUjA==
   dependencies:
     safe-buffer "~5.1.1"
 
-copy-concurrently@^1.0.0:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/copy-concurrently/-/copy-concurrently-1.0.5.tgz#92297398cae34937fcafd6ec8139c18051f0b5e0"
-  integrity sha512-f2domd9fsVDFtaFcbaRZuYXwtdmnzqbADSwhSWYxYB/Q8zsdUUFMXVRwXGDMWmbEzAn1kdRrtI1T/KTFOL4X2A==
-  dependencies:
-    aproba "^1.1.1"
-    fs-write-stream-atomic "^1.0.8"
-    iferr "^0.1.5"
-    mkdirp "^0.5.1"
-    rimraf "^2.5.4"
-    run-queue "^1.0.0"
-
 copy-descriptor@^0.1.0:
   version "0.1.1"
   resolved "https://registry.yarnpkg.com/copy-descriptor/-/copy-descriptor-0.1.1.tgz#676f6eb3c39997c2ee1ac3a924fd6124748f578d"
@@ -3297,27 +2902,16 @@ core-util-is@1.0.2, core-util-is@~1.0.0:
   resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
   integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=
 
-cosmiconfig@^5.1.0:
-  version "5.2.1"
-  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-5.2.1.tgz#040f726809c591e77a17c0a3626ca45b4f168b1a"
-  integrity sha512-H65gsXo1SKjf8zmrJ67eJk8aIRKV5ff2D4uKZIBZShbhGSpEmsQOPW/SKMKYhSTrqR7ufy6RP69rPogdaPh/kA==
-  dependencies:
-    import-fresh "^2.0.0"
-    is-directory "^0.3.1"
-    js-yaml "^3.13.1"
-    parse-json "^4.0.0"
-
-coveralls@3.0.3:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/coveralls/-/coveralls-3.0.3.tgz#83b1c64aea1c6afa69beaf50b55ac1bc4d13e2b8"
-  integrity sha512-viNfeGlda2zJr8Gj1zqXpDMRjw9uM54p7wzZdvLRyOgnAfCe974Dq4veZkjJdxQXbmdppu6flEajFYseHYaUhg==
+cosmiconfig@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-7.0.0.tgz#ef9b44d773959cae63ddecd122de23853b60f8d3"
+  integrity sha512-pondGvTuVYDk++upghXJabWzL6Kxu6f26ljFw64Swq9v6sQPUL3EUlVDV56diOjpCayKihL6hVe8exIACU4XcA==
   dependencies:
-    growl "~> 1.10.0"
-    js-yaml "^3.11.0"
-    lcov-parse "^0.0.10"
-    log-driver "^1.2.7"
-    minimist "^1.2.0"
-    request "^2.86.0"
+    "@types/parse-json" "^4.0.0"
+    import-fresh "^3.2.1"
+    parse-json "^5.0.0"
+    path-type "^4.0.0"
+    yaml "^1.10.0"
 
 cp-file@^7.0.0:
   version "7.0.0"
@@ -3329,7 +2923,7 @@ cp-file@^7.0.0:
     nested-error-stacks "^2.0.0"
     p-event "^4.1.0"
 
-cpy@^8.1.2:
+cpy@8.1.2:
   version "8.1.2"
   resolved "https://registry.yarnpkg.com/cpy/-/cpy-8.1.2.tgz#e339ea54797ad23f8e3919a5cffd37bfc3f25935"
   integrity sha512-dmC4mUesv0OYH2kNFEidtf/skUwv4zePmGeepjyyJ0qTo5+8KhA1o99oIAwVVLzQMAeDJml74d6wPPKb6EZUTg==
@@ -3344,38 +2938,19 @@ cpy@^8.1.2:
     p-filter "^2.1.0"
     p-map "^3.0.0"
 
-create-ecdh@^4.0.0:
-  version "4.0.4"
-  resolved "https://registry.yarnpkg.com/create-ecdh/-/create-ecdh-4.0.4.tgz#d6e7f4bffa66736085a0762fd3a632684dabcc4e"
-  integrity sha512-mf+TCx8wWc9VpuxfP2ht0iSISLZnt0JgWlrOKZiNqyUZWnjIaCIVNQArMHnCZKfEYRg6IM7A+NeJoN8gf/Ws0A==
-  dependencies:
-    bn.js "^4.1.0"
-    elliptic "^6.5.3"
-
-create-hash@^1.1.0, create-hash@^1.1.2, create-hash@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/create-hash/-/create-hash-1.2.0.tgz#889078af11a63756bcfb59bd221996be3a9ef196"
-  integrity sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==
-  dependencies:
-    cipher-base "^1.0.1"
-    inherits "^2.0.1"
-    md5.js "^1.3.4"
-    ripemd160 "^2.0.1"
-    sha.js "^2.4.0"
+create-require@^1.1.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
+  integrity sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==
 
-create-hmac@^1.1.0, create-hmac@^1.1.4, create-hmac@^1.1.7:
-  version "1.1.7"
-  resolved "https://registry.yarnpkg.com/create-hmac/-/create-hmac-1.1.7.tgz#69170c78b3ab957147b2b8b04572e47ead2243ff"
-  integrity sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==
+cross-env@7.0.3:
+  version "7.0.3"
+  resolved "https://registry.yarnpkg.com/cross-env/-/cross-env-7.0.3.tgz#865264b29677dc015ba8418918965dd232fc54cf"
+  integrity sha512-+/HKd6EgcQCJGh2PSjZuUitQBQynKor4wrFbRg4DtAgS1aWO+gU52xpH7M9ScGgXSYmAVS9bIJ8EzuaGw0oNAw==
   dependencies:
-    cipher-base "^1.0.3"
-    create-hash "^1.1.0"
-    inherits "^2.0.1"
-    ripemd160 "^2.0.0"
-    safe-buffer "^5.0.1"
-    sha.js "^2.4.8"
+    cross-spawn "^7.0.1"
 
-cross-spawn@^6.0.0, cross-spawn@^6.0.5:
+cross-spawn@^6.0.5:
   version "6.0.5"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
   integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==
@@ -3386,7 +2961,7 @@ cross-spawn@^6.0.0, cross-spawn@^6.0.5:
     shebang-command "^1.2.0"
     which "^1.2.9"
 
-cross-spawn@^7.0.0:
+cross-spawn@^7.0.1, cross-spawn@^7.0.2, cross-spawn@^7.0.3:
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
   integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
@@ -3395,32 +2970,14 @@ cross-spawn@^7.0.0:
     shebang-command "^2.0.0"
     which "^2.0.1"
 
-crypto-browserify@^3.11.0:
-  version "3.12.0"
-  resolved "https://registry.yarnpkg.com/crypto-browserify/-/crypto-browserify-3.12.0.tgz#396cf9f3137f03e4b8e532c58f698254e00f80ec"
-  integrity sha512-fz4spIh+znjO2VjL+IdhEpRJ3YN6sMzITSBijk6FK2UvTqruSQW+/cCZTSNsMiZNvUeq0CqurF+dAbyiGOY6Wg==
-  dependencies:
-    browserify-cipher "^1.0.0"
-    browserify-sign "^4.0.0"
-    create-ecdh "^4.0.0"
-    create-hash "^1.1.0"
-    create-hmac "^1.1.0"
-    diffie-hellman "^5.0.0"
-    inherits "^2.0.1"
-    pbkdf2 "^3.0.3"
-    public-encrypt "^4.0.0"
-    randombytes "^2.0.0"
-    randomfill "^1.0.3"
-
-css@2.X, css@^2.2.1:
-  version "2.2.4"
-  resolved "https://registry.yarnpkg.com/css/-/css-2.2.4.tgz#c646755c73971f2bba6a601e2cf2fd71b1298929"
-  integrity sha512-oUnjmWpy0niI3x/mPL8dVEI1l7MnG3+HHyRPHf+YFSbK+svOhXpmSOcDURUh2aOCgl2grzrOPt1nHLuCVFULLw==
+css@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/css/-/css-3.0.0.tgz#4447a4d58fdd03367c516ca9f64ae365cee4aa5d"
+  integrity sha512-DG9pFfwOrzc+hawpmqX/dHYHJG+Bsdb0klhyi1sDneOgGOXy9wQIC8hzyVp1e4NRYDBdxcylvywPkkXCHAzTyQ==
   dependencies:
-    inherits "^2.0.3"
+    inherits "^2.0.4"
     source-map "^0.6.1"
-    source-map-resolve "^0.5.2"
-    urix "^0.1.0"
+    source-map-resolve "^0.6.0"
 
 cssom@^0.4.4:
   version "0.4.4"
@@ -3439,18 +2996,6 @@ cssstyle@^2.3.0:
   dependencies:
     cssom "~0.3.6"
 
-currently-unhandled@^0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/currently-unhandled/-/currently-unhandled-0.4.1.tgz#988df33feab191ef799a61369dd76c17adf957ea"
-  integrity sha1-mI3zP+qxke95mmE2nddsF635V+o=
-  dependencies:
-    array-find-index "^1.0.1"
-
-cyclist@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-1.0.1.tgz#596e9698fd0c80e12038c2b82d6eb1b35b6224d9"
-  integrity sha1-WW6WmP0MgOEgOMK4LW6xs1tiJNk=
-
 d@1, d@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
@@ -3459,12 +3004,10 @@ d@1, d@^1.0.1:
     es5-ext "^0.10.50"
     type "^1.0.1"
 
-dargs@^4.0.1:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/dargs/-/dargs-4.1.0.tgz#03a9dbb4b5c2f139bf14ae53f0b8a2a6a86f4e17"
-  integrity sha1-A6nbtLXC8Tm/FK5T8LiipqhvThc=
-  dependencies:
-    number-is-nan "^1.0.0"
+dargs@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/dargs/-/dargs-7.0.0.tgz#04015c41de0bcb69ec84050f3d9be0caf8d6d5cc"
+  integrity sha512-2iy1EkLdlBzQGvbweYRFxmFath8+K7+AKB0TlhHWkNuH+TmovaMH/Wp7V7R4u7f4SnX3OgLsU9t1NI9ioDnUpg==
 
 dashdash@^1.12.0:
   version "1.14.1"
@@ -3487,7 +3030,7 @@ dateformat@^3.0.0:
   resolved "https://registry.yarnpkg.com/dateformat/-/dateformat-3.0.3.tgz#a6e37499a4d9a9cf85ef5872044d62901c9889ae"
   integrity sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==
 
-debug-fabulous@1.X:
+debug-fabulous@^1.0.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/debug-fabulous/-/debug-fabulous-1.1.0.tgz#af8a08632465224ef4174a9f06308c3c2a1ebc8e"
   integrity sha512-GZqvGIgKNlUnHUPQhepnUZFIMoi3dgZKQBzKDeL2g7oJF9SNAji/AAu36dusFUas0O+pae74lNeoIPHqXWDkLg==
@@ -3496,20 +3039,20 @@ debug-fabulous@1.X:
     memoizee "0.4.X"
     object-assign "4.X"
 
-debug@3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-3.1.0.tgz#5bb5a0672628b64149566ba16819e61518c67261"
-  integrity sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==
-  dependencies:
-    ms "2.0.0"
-
-debug@3.X, debug@^3.1.0:
+debug@3.X:
   version "3.2.7"
   resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
   integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
   dependencies:
     ms "^2.1.1"
 
+debug@4, debug@^4.0.1, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1:
+  version "4.3.2"
+  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b"
+  integrity sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==
+  dependencies:
+    ms "2.1.2"
+
 debug@^2.2.0, debug@^2.3.3:
   version "2.6.9"
   resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
@@ -3517,19 +3060,12 @@ debug@^2.2.0, debug@^2.3.3:
   dependencies:
     ms "2.0.0"
 
-debug@^4.1.0, debug@^4.1.1:
-  version "4.3.1"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
-  integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
-  dependencies:
-    ms "2.1.2"
-
 debuglog@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/debuglog/-/debuglog-1.0.1.tgz#aa24ffb9ac3df9a2351837cfb2d279360cd78492"
   integrity sha1-qiT/uaw9+aI1GDfPstJ5NgzXhJI=
 
-decamelize-keys@^1.0.0, decamelize-keys@^1.1.0:
+decamelize-keys@^1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/decamelize-keys/-/decamelize-keys-1.1.0.tgz#d171a87933252807eb3cb61dc1c1445d078df2d9"
   integrity sha1-0XGoeTMlKAfrPLYdwcFEXQeN8tk=
@@ -3537,15 +3073,20 @@ decamelize-keys@^1.0.0, decamelize-keys@^1.1.0:
     decamelize "^1.1.0"
     map-obj "^1.0.0"
 
-decamelize@^1.1.0, decamelize@^1.1.1, decamelize@^1.1.2, decamelize@^1.2.0:
+decamelize@^1.1.0, decamelize@^1.1.1, decamelize@^1.2.0:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
   integrity sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=
 
+decamelize@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-5.0.0.tgz#88358157b010ef133febfd27c18994bd80c6215b"
+  integrity sha512-U75DcT5hrio3KNtvdULAWnLiAPbFUC4191ldxMmj4FA/mRuBnmDwU0boNfPyFRhnan+Jm+haLeSn3P0afcBn4w==
+
 decimal.js@^10.2.1:
-  version "10.2.1"
-  resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.2.1.tgz#238ae7b0f0c793d3e3cea410108b35a2c01426a3"
-  integrity sha512-KaL7+6Fw6i5A2XSnsbhm/6B+NuEA7TZ4vqxnd5tXz9sbKtrN9Srj8ab4vKVdK8YAqZO9P1kg45Y6YLoduPf+kw==
+  version "10.3.1"
+  resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.3.1.tgz#d8c3a444a9c6774ba60ca6ad7261c3a94fd5e783"
+  integrity sha512-V0pfhfr8suzyPGOx3nmq4aHqabehUZn6Ch9kyFpV79TGDTWFmHqUqXdabR7QHqxzrYolF4+tVmJhUG4OURg5dQ==
 
 decode-uri-component@^0.2.0:
   version "0.2.0"
@@ -3562,7 +3103,7 @@ deep-extend@~0.6.0:
   resolved "https://registry.yarnpkg.com/deep-extend/-/deep-extend-0.6.0.tgz#c4fa7c95404a17a9c3e8ca7e1537312b736330ac"
   integrity sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==
 
-deep-is@~0.1.3:
+deep-is@^0.1.3, deep-is@~0.1.3:
   version "0.1.3"
   resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
   integrity sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=
@@ -3620,26 +3161,26 @@ define-property@^2.0.2:
     is-descriptor "^1.0.2"
     isobject "^3.0.1"
 
-del-cli@3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-3.0.1.tgz#2d27ff260204b5104cadeda86f78f180a4ebe89a"
-  integrity sha512-BLHItGr82rUbHhjMu41d+vw9Md49i81jmZSV00HdTq4t+RTHywmEht/23mNFpUl2YeLYJZJyGz4rdlMAyOxNeg==
+del-cli@4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-4.0.1.tgz#2303ccaa45708ee8c6211568344cf87336abf30a"
+  integrity sha512-KtR/6cBfZkGDAP2NA7z+bP4p1OMob3wjN9mq13+SWvExx6jT9gFWfLgXEeX8J2B47OKeNCq9yTONmtryQ+m+6g==
   dependencies:
-    del "^5.1.0"
-    meow "^6.1.1"
+    del "^6.0.0"
+    meow "^10.1.0"
 
-del@^5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/del/-/del-5.1.0.tgz#d9487c94e367410e6eff2925ee58c0c84a75b3a7"
-  integrity sha512-wH9xOVHnczo9jN2IW68BabcecVPxacIA3g/7z6vhSU/4stOKQzeCRK0yD0A24WiAAUJmmVpWqrERcTxnLo3AnA==
+del@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/del/-/del-6.0.0.tgz#0b40d0332cea743f1614f818be4feb717714c952"
+  integrity sha512-1shh9DQ23L16oXSZKB2JxpL7iMy2E0S9d517ptA1P8iw0alkPtQcrKH7ru31rYtKwF499HkTu+DRzq3TCKDFRQ==
   dependencies:
-    globby "^10.0.1"
-    graceful-fs "^4.2.2"
+    globby "^11.0.1"
+    graceful-fs "^4.2.4"
     is-glob "^4.0.1"
     is-path-cwd "^2.2.0"
-    is-path-inside "^3.0.1"
-    p-map "^3.0.0"
-    rimraf "^3.0.0"
+    is-path-inside "^3.0.2"
+    p-map "^4.0.0"
+    rimraf "^3.0.2"
     slash "^3.0.0"
 
 delayed-stream@~1.0.0:
@@ -3652,19 +3193,16 @@ delegates@^1.0.0:
   resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a"
   integrity sha1-hMbhWbgZBP3KWaDvRM2HDTElD5o=
 
+depd@^1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9"
+  integrity sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=
+
 deprecation@^2.0.0, deprecation@^2.3.1:
   version "2.3.1"
   resolved "https://registry.yarnpkg.com/deprecation/-/deprecation-2.3.1.tgz#6368cbdb40abf3373b525ac87e4a260c3a700919"
   integrity sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==
 
-des.js@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/des.js/-/des.js-1.0.1.tgz#5382142e1bdc53f85d86d53e5f4aa7deb91e0843"
-  integrity sha512-Q0I4pfFrv2VPd34/vfLrFOoRmlYj3OV50i7fskps1jZWK1kApMWWT9G6RRUeYedLcBDIhnSDaUvJMb3AhUlaEA==
-  dependencies:
-    inherits "^2.0.1"
-    minimalistic-assert "^1.0.0"
-
 detect-file@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/detect-file/-/detect-file-1.0.0.tgz#f0d66d03672a825cb1b73bdb3fe62310c8e552b7"
@@ -3675,7 +3213,12 @@ detect-indent@^5.0.0:
   resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-5.0.0.tgz#3871cc0a6a002e8c3e5b3cf7f336264675f06b9d"
   integrity sha1-OHHMCmoALow+Wzz38zYmRnXwa50=
 
-detect-newline@2.X:
+detect-indent@^6.0.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-6.1.0.tgz#592485ebbbf6b3b1ab2be175c8393d04ca0d57e6"
+  integrity sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==
+
+detect-newline@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-2.1.0.tgz#f41f1c10be4b00e87b5f13da680759f2c5bfd3e2"
   integrity sha1-9B8cEL5LAOh7XxPaaAdZ8sW/0+I=
@@ -3693,30 +3236,21 @@ dezalgo@^1.0.0:
     asap "^2.0.0"
     wrappy "1"
 
-diff-sequences@^25.2.6:
-  version "25.2.6"
-  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-25.2.6.tgz#5f467c00edd35352b7bca46d7927d60e687a76dd"
-  integrity sha512-Hq8o7+6GaZeoFjtpgvRBUknSXNeJiCx7V9Fr94ZMljNiCr9n9L8H8aJqgWOQiDDGdyn29fRNcDdRVJ5fdyihfg==
-
 diff-sequences@^26.6.2:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-26.6.2.tgz#48ba99157de1923412eed41db6b6d4aa9ca7c0b1"
   integrity sha512-Mv/TDa3nZ9sbc5soK+OoA74BsS3mL37yixCvUAQkiuA4Wz6YtwP/K47n2rv2ovzHZvoiQeA5FTQOschKkEwB0Q==
 
+diff-sequences@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-27.0.6.tgz#3305cb2e55a033924054695cc66019fd7f8e5723"
+  integrity sha512-ag6wfpBFyNXZ0p8pcuIDS//D8H062ZQJ3fzYxjpmeKjnz8W4pekL3AI8VohmyZmsWW2PWaHgjsmqR6L13101VQ==
+
 diff@^4.0.1:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/diff/-/diff-4.0.2.tgz#60f3aecb89d5fae520c11aa19efc2bb982aade7d"
   integrity sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==
 
-diffie-hellman@^5.0.0:
-  version "5.0.3"
-  resolved "https://registry.yarnpkg.com/diffie-hellman/-/diffie-hellman-5.0.3.tgz#40e8ee98f55a2149607146921c63e1ae5f3d2875"
-  integrity sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==
-  dependencies:
-    bn.js "^4.1.0"
-    miller-rabin "^4.0.0"
-    randombytes "^2.0.0"
-
 dir-glob@^2.2.2:
   version "2.2.2"
   resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-2.2.2.tgz#fa09f0694153c8918b18ba0deafae94769fc50c4"
@@ -3731,10 +3265,12 @@ dir-glob@^3.0.1:
   dependencies:
     path-type "^4.0.0"
 
-domain-browser@^1.1.1:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda"
-  integrity sha512-jnjyiM6eRyZl2H+W8Q/zLMA481hzi0eszAaBUzIVnmYVDBbnLxVNnfu1HgEBvCbL+71FrxMl3E6lpKH7Ge3OXA==
+doctrine@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-3.0.0.tgz#addebead72a6574db783639dc87a121773973961"
+  integrity sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==
+  dependencies:
+    esutils "^2.0.2"
 
 domexception@^2.0.1:
   version "2.0.1"
@@ -3743,13 +3279,6 @@ domexception@^2.0.1:
   dependencies:
     webidl-conversions "^5.0.0"
 
-dot-prop@^4.2.0:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-4.2.1.tgz#45884194a71fc2cda71cbb4bceb3a4dd2f433ba4"
-  integrity sha512-l0p4+mIuJIua0mhxGoh4a+iNL9bmeK5DvnSVQa6T0OhrVmaEa1XScX5Etc673FePCJOArq/4Pa2cLGODUWTPOQ==
-  dependencies:
-    is-obj "^1.0.0"
-
 dot-prop@^5.1.0:
   version "5.3.0"
   resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-5.3.0.tgz#90ccce708cd9cd82cc4dc8c3ddd9abdd55b20e88"
@@ -3757,12 +3286,19 @@ dot-prop@^5.1.0:
   dependencies:
     is-obj "^2.0.0"
 
+dot-prop@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-6.0.1.tgz#fc26b3cf142b9e59b74dbd39ed66ce620c681083"
+  integrity sha512-tE7ztYzXHIeyvc7N+hR3oi7FIbf/NIjVP9hmAt3yMXzrQ072/fpjGLx2GxNxGxUl5V73MEqYzioOMoVhGMJ5cA==
+  dependencies:
+    is-obj "^2.0.0"
+
 duplexer@^0.1.1:
   version "0.1.2"
   resolved "https://registry.yarnpkg.com/duplexer/-/duplexer-0.1.2.tgz#3abe43aef3835f8ae077d136ddce0f276b0400e6"
   integrity sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==
 
-duplexify@^3.4.2, duplexify@^3.6.0:
+duplexify@^3.6.0:
   version "3.7.1"
   resolved "https://registry.yarnpkg.com/duplexify/-/duplexify-3.7.1.tgz#2a4df5317f6ccfd91f86d6fd25d8d8a103b88309"
   integrity sha512-07z8uv2wMyS51kKhD1KsdXJg5WQ6t93RneqRxUHnskXVtlYYkLqM0gqStQZ3pj073g687jPCHrqNfCzawLYh5g==
@@ -3788,45 +3324,22 @@ ecc-jsbn@~0.1.1:
     jsbn "~0.1.0"
     safer-buffer "^2.1.0"
 
-electron-to-chromium@^1.3.649:
-  version "1.3.711"
-  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.711.tgz#92c3caf7ffed5e18bf63f66b4b57b4db2409c450"
-  integrity sha512-XbklBVCDiUeho0PZQCjC25Ha6uBwqqJeyDhPLwLwfWRAo4x+FZFsmu1pPPkXT+B4MQMQoQULfyaMltDopfeiHQ==
-
-elliptic@^6.5.3:
-  version "6.5.4"
-  resolved "https://registry.yarnpkg.com/elliptic/-/elliptic-6.5.4.tgz#da37cebd31e79a1367e941b592ed1fbebd58abbb"
-  integrity sha512-iLhC6ULemrljPZb+QutR5TQGB+pdW6KGD5RSegS+8sorOZT+rdQFbsQFJgvN3eRqNALqJer4oQ16YvJHlU8hzQ==
-  dependencies:
-    bn.js "^4.11.9"
-    brorand "^1.1.0"
-    hash.js "^1.0.0"
-    hmac-drbg "^1.0.1"
-    inherits "^2.0.4"
-    minimalistic-assert "^1.0.1"
-    minimalistic-crypto-utils "^1.0.1"
+electron-to-chromium@^1.3.723:
+  version "1.3.782"
+  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.782.tgz#522740fe6b4b5255ca754c68d9c406a17b0998e2"
+  integrity sha512-6AI2se1NqWA1SBf/tlD6tQD/6ZOt+yAhqmrTlh4XZw4/g0Mt3p6JhTQPZxRPxPZiOg0o7ss1EBP/CpYejfnoIA==
 
-emittery@^0.7.1:
-  version "0.7.2"
-  resolved "https://registry.yarnpkg.com/emittery/-/emittery-0.7.2.tgz#25595908e13af0f5674ab419396e2fb394cdfa82"
-  integrity sha512-A8OG5SR/ij3SsJdWDJdkkSYUjQdCUx6APQXem0SaEePBSRg4eymGYwBkKo1Y6DU+af/Jn2dBQqDBvjnr9Vi8nQ==
-
-emoji-regex@^7.0.1:
-  version "7.0.3"
-  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-7.0.3.tgz#933a04052860c85e83c122479c4748a8e4c72156"
-  integrity sha512-CwBLREIQ7LvYFB0WyRvwhq5N5qPhc6PMjD6bYggFlI5YyDgl+0vxq5VHbMOFqLg7hfWzmu8T5Z1QofhmTIhItA==
+emittery@^0.8.1:
+  version "0.8.1"
+  resolved "https://registry.yarnpkg.com/emittery/-/emittery-0.8.1.tgz#bb23cc86d03b30aa75a7f734819dee2e1ba70860"
+  integrity sha512-uDfvUjVrfGJJhymx/kz6prltenw1u7WrCg1oa94zYY8xxVpLLUu045LAT0dhDZdXG58/EpPL/5kA180fQ/qudg==
 
 emoji-regex@^8.0.0:
   version "8.0.0"
   resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
   integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
 
-emojis-list@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-3.0.0.tgz#5570662046ad29e2e916e71aae260abdff4f6a78"
-  integrity sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==
-
-encoding@^0.1.11:
+encoding@^0.1.12:
   version "0.1.13"
   resolved "https://registry.yarnpkg.com/encoding/-/encoding-0.1.13.tgz#56574afdd791f54a8e9b2785c0582a2d26210fa9"
   integrity sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==
@@ -3840,36 +3353,27 @@ end-of-stream@^1.0.0, end-of-stream@^1.1.0:
   dependencies:
     once "^1.4.0"
 
-enhanced-resolve@^4.1.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-4.5.0.tgz#2f3cfd84dbe3b487f18f2db2ef1e064a571ca5ec"
-  integrity sha512-Nv9m36S/vxpsI+Hc4/ZGRs0n9mXqSWGGq49zxb/cJfPAQMbUtttJAlNPS4AQzaBdw/pKskw5bMbekT/Y7W/Wlg==
+enquirer@^2.3.5:
+  version "2.3.6"
+  resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d"
+  integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==
   dependencies:
-    graceful-fs "^4.1.2"
-    memory-fs "^0.5.0"
-    tapable "^1.0.0"
+    ansi-colors "^4.1.1"
 
 env-paths@^2.2.0:
   version "2.2.1"
   resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-2.2.1.tgz#420399d416ce1fbe9bc0a07c62fa68d67fd0f8f2"
   integrity sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==
 
-envinfo@^7.3.1:
+envinfo@^7.7.4:
   version "7.8.1"
   resolved "https://registry.yarnpkg.com/envinfo/-/envinfo-7.8.1.tgz#06377e3e5f4d379fea7ac592d5ad8927e0c4d475"
   integrity sha512-/o+BXHmB7ocbHEAs6F2EnG0ogybVVUdkRunTT2glZU9XAaGmhqskrvKwqXuDfNjEO0LZKWdejEEpnq8aM0tOaw==
 
-err-code@^1.0.0:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/err-code/-/err-code-1.1.2.tgz#06e0116d3028f6aef4806849eb0ea6a748ae6960"
-  integrity sha1-BuARbTAo9q70gGhJ6w6mp0iuaWA=
-
-errno@^0.1.3, errno@~0.1.7:
-  version "0.1.8"
-  resolved "https://registry.yarnpkg.com/errno/-/errno-0.1.8.tgz#8bb3e9c7d463be4976ff888f76b4809ebc2e811f"
-  integrity sha512-dJ6oBr5SQ1VSd9qkk7ByRgb/1SH4JZjCHSW/mr63/QcXO9zLVxvJ6Oy13nio03rxpSnVDDjFor75SjVeZWPW/A==
-  dependencies:
-    prr "~1.0.1"
+err-code@^2.0.2:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/err-code/-/err-code-2.0.3.tgz#23c2f3b756ffdfc608d30e27c9a941024807e7f9"
+  integrity sha512-2bmlRpNKBxT/CRmPOlyISQpNj+qSeYvcym/uT0Jx2bMOlKLtSy1ZmLuVxSEKKyor/N5yhvp/ZiG1oE3DEYMSFA==
 
 error-ex@^1.2.0, error-ex@^1.3.1:
   version "1.3.2"
@@ -3879,9 +3383,9 @@ error-ex@^1.2.0, error-ex@^1.3.1:
     is-arrayish "^0.2.1"
 
 es-abstract@^1.18.0-next.2:
-  version "1.18.0"
-  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.0.tgz#ab80b359eecb7ede4c298000390bc5ac3ec7b5a4"
-  integrity sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==
+  version "1.18.3"
+  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.3.tgz#25c4c3380a27aa203c44b2b685bba94da31b63e0"
+  integrity sha512-nQIr12dxV7SSxE6r6f1l3DtAeEYdsGpps13dR0TwJg1S8gyp4ZPgy3FZcHBgbiQqnoqSTb+oC+kO4UQ0C/J8vw==
   dependencies:
     call-bind "^1.0.2"
     es-to-primitive "^1.2.1"
@@ -3891,14 +3395,14 @@ es-abstract@^1.18.0-next.2:
     has-symbols "^1.0.2"
     is-callable "^1.2.3"
     is-negative-zero "^2.0.1"
-    is-regex "^1.1.2"
-    is-string "^1.0.5"
-    object-inspect "^1.9.0"
+    is-regex "^1.1.3"
+    is-string "^1.0.6"
+    object-inspect "^1.10.3"
     object-keys "^1.1.1"
     object.assign "^4.1.2"
     string.prototype.trimend "^1.0.4"
     string.prototype.trimstart "^1.0.4"
-    unbox-primitive "^1.0.0"
+    unbox-primitive "^1.0.1"
 
 es-to-primitive@^1.2.1:
   version "1.2.1"
@@ -3927,18 +3431,6 @@ es6-iterator@^2.0.1, es6-iterator@^2.0.3, es6-iterator@~2.0.3:
     es5-ext "^0.10.35"
     es6-symbol "^3.1.1"
 
-es6-promise@^4.0.3:
-  version "4.2.8"
-  resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a"
-  integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==
-
-es6-promisify@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203"
-  integrity sha1-UQnWLz5W6pZ8S2NQWu8IKRyKUgM=
-  dependencies:
-    es6-promise "^4.0.3"
-
 es6-symbol@^3.1.1, es6-symbol@~3.1.3:
   version "3.1.3"
   resolved "https://registry.yarnpkg.com/es6-symbol/-/es6-symbol-3.1.3.tgz#bad5d3c1bcdac28269f4cb331e431c78ac705d18"
@@ -3972,6 +3464,11 @@ escape-string-regexp@^2.0.0:
   resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz#a30304e99daa32e23b2fd20f51babd07cffca344"
   integrity sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==
 
+escape-string-regexp@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
+  integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
+
 escodegen@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-2.0.0.tgz#5e32b12833e8aa8fa35e1bf0befa89380484c7dd"
@@ -3984,25 +3481,117 @@ escodegen@^2.0.0:
   optionalDependencies:
     source-map "~0.6.1"
 
-eslint-scope@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-4.0.3.tgz#ca03833310f6889a3264781aa82e63eb9cfe7848"
-  integrity sha512-p7VutNr1O/QrxysMo3E45FjYDTeXBy0iTltPFNSqKAIfjDSXC+4dj+qfyuD8bfAXrW/y6lW3O76VaYNPKfpKrg==
+eslint-plugin-jest@24.3.7:
+  version "24.3.7"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.3.7.tgz#a4deaa9e88182b92533a9c25cc4f3c369d7f33eb"
+  integrity sha512-pXED2NA4q2M/5mxlN6GyuUXAFJndT0uosOkQCHaUED9pqgBPd89ZzpcZEU6c5HtZNahC00M36FkwLdDHMDqaHw==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "^4.0.1"
+
+eslint-scope@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
+  integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==
   dependencies:
-    esrecurse "^4.1.0"
+    esrecurse "^4.3.0"
     estraverse "^4.1.1"
 
-esm@3.2.25:
+eslint-utils@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/eslint-utils/-/eslint-utils-2.1.0.tgz#d2de5e03424e707dc10c74068ddedae708741b27"
+  integrity sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==
+  dependencies:
+    eslint-visitor-keys "^1.1.0"
+
+eslint-utils@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/eslint-utils/-/eslint-utils-3.0.0.tgz#8aebaface7345bb33559db0a1f13a1d2d48c3672"
+  integrity sha512-uuQC43IGctw68pJA1RgbQS8/NP7rch6Cwd4j3ZBtgo4/8Flj4eGE7ZYSZRN3iq5pVUv6GPdW5Z1RFleo84uLDA==
+  dependencies:
+    eslint-visitor-keys "^2.0.0"
+
+eslint-visitor-keys@^1.1.0, eslint-visitor-keys@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz#30ebd1ef7c2fdff01c3a4f151044af25fab0523e"
+  integrity sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==
+
+eslint-visitor-keys@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz#f65328259305927392c938ed44eb0a5c9b2bd303"
+  integrity sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==
+
+eslint@7.31.0:
+  version "7.31.0"
+  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.31.0.tgz#f972b539424bf2604907a970860732c5d99d3aca"
+  integrity sha512-vafgJpSh2ia8tnTkNUkwxGmnumgckLh5aAbLa1xRmIn9+owi8qBNGKL+B881kNKNTy7FFqTEkpNkUvmw0n6PkA==
+  dependencies:
+    "@babel/code-frame" "7.12.11"
+    "@eslint/eslintrc" "^0.4.3"
+    "@humanwhocodes/config-array" "^0.5.0"
+    ajv "^6.10.0"
+    chalk "^4.0.0"
+    cross-spawn "^7.0.2"
+    debug "^4.0.1"
+    doctrine "^3.0.0"
+    enquirer "^2.3.5"
+    escape-string-regexp "^4.0.0"
+    eslint-scope "^5.1.1"
+    eslint-utils "^2.1.0"
+    eslint-visitor-keys "^2.0.0"
+    espree "^7.3.1"
+    esquery "^1.4.0"
+    esutils "^2.0.2"
+    fast-deep-equal "^3.1.3"
+    file-entry-cache "^6.0.1"
+    functional-red-black-tree "^1.0.1"
+    glob-parent "^5.1.2"
+    globals "^13.6.0"
+    ignore "^4.0.6"
+    import-fresh "^3.0.0"
+    imurmurhash "^0.1.4"
+    is-glob "^4.0.0"
+    js-yaml "^3.13.1"
+    json-stable-stringify-without-jsonify "^1.0.1"
+    levn "^0.4.1"
+    lodash.merge "^4.6.2"
+    minimatch "^3.0.4"
+    natural-compare "^1.4.0"
+    optionator "^0.9.1"
+    progress "^2.0.0"
+    regexpp "^3.1.0"
+    semver "^7.2.1"
+    strip-ansi "^6.0.0"
+    strip-json-comments "^3.1.0"
+    table "^6.0.9"
+    text-table "^0.2.0"
+    v8-compile-cache "^2.0.3"
+
+"esm@https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz":
   version "3.2.25"
-  resolved "https://registry.yarnpkg.com/esm/-/esm-3.2.25.tgz#342c18c29d56157688ba5ce31f8431fbb795cc10"
-  integrity sha512-U1suiZ2oDVWv4zPO56S0NcR5QriEahGtdN2OR6FiOG4WJvcjBVFB0qI4+eKoWFH483PKGuLuu6V8Z4T5g63UVA==
+  resolved "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz#c463cfa4e14aceea6b7cd7e669ef90de072ea60a"
+
+espree@^7.3.0, espree@^7.3.1:
+  version "7.3.1"
+  resolved "https://registry.yarnpkg.com/espree/-/espree-7.3.1.tgz#f2df330b752c6f55019f8bd89b7660039c1bbbb6"
+  integrity sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==
+  dependencies:
+    acorn "^7.4.0"
+    acorn-jsx "^5.3.1"
+    eslint-visitor-keys "^1.3.0"
 
 esprima@^4.0.0, esprima@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71"
   integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==
 
-esrecurse@^4.1.0:
+esquery@^1.4.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.4.0.tgz#2148ffc38b82e8c7057dfed48425b3e61f0f24a5"
+  integrity sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==
+  dependencies:
+    estraverse "^5.1.0"
+
+esrecurse@^4.3.0:
   version "4.3.0"
   resolved "https://registry.yarnpkg.com/esrecurse/-/esrecurse-4.3.0.tgz#7ad7964d679abb28bee72cec63758b1c5d2c9921"
   integrity sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==
@@ -4014,7 +3603,7 @@ estraverse@^4.1.1:
   resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d"
   integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==
 
-estraverse@^5.2.0:
+estraverse@^5.1.0, estraverse@^5.2.0:
   version "5.2.0"
   resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-5.2.0.tgz#307df42547e6cc7324d3cf03c155d5cdb8c53880"
   integrity sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==
@@ -4032,55 +3621,24 @@ event-emitter@^0.3.5:
     d "1"
     es5-ext "~0.10.14"
 
-eventemitter3@^3.1.0:
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-3.1.2.tgz#2d3d48f9c346698fce83a85d7d664e98535df6e7"
-  integrity sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==
-
-events@^3.0.0:
-  version "3.3.0"
-  resolved "https://registry.yarnpkg.com/events/-/events-3.3.0.tgz#31a95ad0a924e2d2c419a813aeb2c4e878ea7400"
-  integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==
-
-evp_bytestokey@^1.0.0, evp_bytestokey@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz#7fcbdb198dc71959432efe13842684e0525acb02"
-  integrity sha512-/f2Go4TognH/KvCISP7OUsHn85hT9nUkxxA9BEWxFn+Oj9o8ZNLm/40hdlgSLyuOimsrTKLUMEorQexp/aPQeA==
-  dependencies:
-    md5.js "^1.3.4"
-    safe-buffer "^5.1.1"
-
-exec-sh@^0.3.2:
-  version "0.3.6"
-  resolved "https://registry.yarnpkg.com/exec-sh/-/exec-sh-0.3.6.tgz#ff264f9e325519a60cb5e273692943483cca63bc"
-  integrity sha512-nQn+hI3yp+oD0huYhKwvYI32+JFeq+XkNcD1GAo3Y/MjxsfVGmrrzrnzjWiNY6f+pUCP440fThsFh5gZrRAU/w==
-
-execa@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-1.0.0.tgz#c6236a5bb4df6d6f15e88e7f017798216749ddd8"
-  integrity sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==
-  dependencies:
-    cross-spawn "^6.0.0"
-    get-stream "^4.0.0"
-    is-stream "^1.1.0"
-    npm-run-path "^2.0.0"
-    p-finally "^1.0.0"
-    signal-exit "^3.0.0"
-    strip-eof "^1.0.0"
+eventemitter3@^4.0.4:
+  version "4.0.7"
+  resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f"
+  integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
 
-execa@^4.0.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a"
-  integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA==
+execa@^5.0.0:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.1.tgz#f80ad9cbf4298f7bd1d4c9555c21e93741c411dd"
+  integrity sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==
   dependencies:
-    cross-spawn "^7.0.0"
-    get-stream "^5.0.0"
-    human-signals "^1.1.1"
+    cross-spawn "^7.0.3"
+    get-stream "^6.0.0"
+    human-signals "^2.1.0"
     is-stream "^2.0.0"
     merge-stream "^2.0.0"
-    npm-run-path "^4.0.0"
-    onetime "^5.1.0"
-    signal-exit "^3.0.2"
+    npm-run-path "^4.0.1"
+    onetime "^5.1.2"
+    signal-exit "^3.0.3"
     strip-final-newline "^2.0.0"
 
 exit@^0.1.2:
@@ -4108,17 +3666,17 @@ expand-tilde@^2.0.0, expand-tilde@^2.0.2:
   dependencies:
     homedir-polyfill "^1.0.1"
 
-expect@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/expect/-/expect-26.6.2.tgz#c6b996bf26bf3fe18b67b2d0f51fc981ba934417"
-  integrity sha512-9/hlOBkQl2l/PLHJx6JjoDF6xPKcJEsUlWKb23rKE7KzeDqUZKXKNMW27KIue5JMdBV9HgmoJPcc8HtO85t9IA==
+expect@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/expect/-/expect-27.0.6.tgz#a4d74fbe27222c718fff68ef49d78e26a8fd4c05"
+  integrity sha512-psNLt8j2kwg42jGBDSfAlU49CEZxejN1f1PlANWDZqIhBOVU/c2Pm888FcjWJzFewhIsNWfZJeLjUjtKGiPuSw==
   dependencies:
-    "@jest/types" "^26.6.2"
-    ansi-styles "^4.0.0"
-    jest-get-type "^26.3.0"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-regex-util "^26.0.0"
+    "@jest/types" "^27.0.6"
+    ansi-styles "^5.0.0"
+    jest-get-type "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-regex-util "^27.0.6"
 
 ext@^1.1.2:
   version "1.4.0"
@@ -4190,16 +3748,11 @@ fancy-log@^1.3.2:
     parse-node-version "^1.0.0"
     time-stamp "^1.0.0"
 
-fast-deep-equal@^3.1.1:
+fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3:
   version "3.1.3"
   resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525"
   integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==
 
-fast-extend@0.0.2:
-  version "0.0.2"
-  resolved "https://registry.yarnpkg.com/fast-extend/-/fast-extend-0.0.2.tgz#f5ec42cf40b9460f521a6387dfb52deeed671dbd"
-  integrity sha1-9exCz0C5Rg9SGmOH37Ut7u1nHb0=
-
 fast-glob@^2.2.6:
   version "2.2.7"
   resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-2.2.7.tgz#6953857c3afa475fff92ee6015d52da70a4cd39d"
@@ -4212,17 +3765,16 @@ fast-glob@^2.2.6:
     merge2 "^1.2.3"
     micromatch "^3.1.10"
 
-fast-glob@^3.0.3:
-  version "3.2.5"
-  resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.5.tgz#7939af2a656de79a4f1901903ee8adcaa7cb9661"
-  integrity sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==
+fast-glob@^3.1.1:
+  version "3.2.7"
+  resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.7.tgz#fd6cb7a2d7e9aa7a7846111e85a196d6b2f766a1"
+  integrity sha512-rYGMRwip6lUMvYD3BTScMwT1HtAs2d71SMv66Vrxs0IekGZEjhM0pcMfjQPnknBt2zeCwQMEupiN02ZP4DiT1Q==
   dependencies:
     "@nodelib/fs.stat" "^2.0.2"
     "@nodelib/fs.walk" "^1.2.3"
-    glob-parent "^5.1.0"
+    glob-parent "^5.1.2"
     merge2 "^1.3.0"
-    micromatch "^4.0.2"
-    picomatch "^2.2.1"
+    micromatch "^4.0.4"
 
 fast-json-stable-stringify@2.x, fast-json-stable-stringify@^2.0.0:
   version "2.1.0"
@@ -4234,15 +3786,15 @@ fast-levenshtein@^1.0.0:
   resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-1.1.4.tgz#e6a754cc8f15e58987aa9cbd27af66fd6f4e5af9"
   integrity sha1-5qdUzI8V5YmHqpy9J69m/W9OWvk=
 
-fast-levenshtein@~2.0.6:
+fast-levenshtein@^2.0.6, fast-levenshtein@~2.0.6:
   version "2.0.6"
   resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917"
   integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
 
 fastq@^1.6.0:
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.11.0.tgz#bb9fb955a07130a918eb63c1f5161cc32a5d0858"
-  integrity sha512-7Eczs8gIPDrVzT+EksYBcupqMyxSHXXrHOLRRxU2/DicV8789MRBRR8+Hc2uWzUupOs4YS4JzBmBxjjCVBxD/g==
+  version "1.11.1"
+  resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.11.1.tgz#5d8175aae17db61947f8b162cfc7f63264d22807"
+  integrity sha512-HOnr8Mc60eNYl1gzwp6r5RoUyAn5/glBolUzP/Ez6IFVPMPirxn/9phgL6zhOtaTy7ISwPvQ+wT+hfcRZh/bzw==
   dependencies:
     reusify "^1.0.4"
 
@@ -4253,18 +3805,20 @@ fb-watchman@^2.0.0:
   dependencies:
     bser "2.1.1"
 
-figgy-pudding@^3.4.1, figgy-pudding@^3.5.1:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/figgy-pudding/-/figgy-pudding-3.5.2.tgz#b4eee8148abb01dcf1d1ac34367d59e12fa61d6e"
-  integrity sha512-0btnI/H8f2pavGMN8w40mlSKOfTK2SVJmBfBeVIj3kNw0swwgzyRq0d5TJVOwodFmtvpPeWPN/MCcfuWF0Ezbw==
-
-figures@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/figures/-/figures-2.0.0.tgz#3ab1a2d2a62c8bfb431a0c94cb797a2fce27c962"
-  integrity sha1-OrGi0qYsi/tDGgyUy3l6L84nyWI=
+figures@^3.0.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/figures/-/figures-3.2.0.tgz#625c18bd293c604dc4a8ddb2febf0c88341746af"
+  integrity sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg==
   dependencies:
     escape-string-regexp "^1.0.5"
 
+file-entry-cache@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"
+  integrity sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==
+  dependencies:
+    flat-cache "^3.0.4"
+
 file-uri-to-path@1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd"
@@ -4292,31 +3846,12 @@ filter-obj@^1.1.0:
   resolved "https://registry.yarnpkg.com/filter-obj/-/filter-obj-1.1.0.tgz#9b311112bc6c6127a16e016c6c5d7f19e0805c5b"
   integrity sha1-mzERErxsYSehbgFsbF1/GeCAXFs=
 
-find-cache-dir@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-2.1.0.tgz#8d0f94cd13fe43c6c7c261a0d86115ca918c05f7"
-  integrity sha512-Tq6PixE0w/VMFfCgbONnkiQIVol/JJL7nRMi20fqzA4NRs9AfeqMGeRdPi3wIhYkxjeBaWh2rxwapn5Tu3IqOQ==
-  dependencies:
-    commondir "^1.0.1"
-    make-dir "^2.0.0"
-    pkg-dir "^3.0.0"
-
-find-cache-dir@^3.3.1:
-  version "3.3.1"
-  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-3.3.1.tgz#89b33fad4a4670daa94f855f7fbe31d6d84fe880"
-  integrity sha512-t2GDMt3oGC/v+BMwzmllWDuJF/xcDtE5j/fCGbqDD7OLuJkj0cfh1YSA5VKPvwMeLFLNDBkwOKZ2X85jGLVftQ==
-  dependencies:
-    commondir "^1.0.1"
-    make-dir "^3.0.2"
-    pkg-dir "^4.1.0"
-
-find-replace@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/find-replace/-/find-replace-2.0.1.tgz#6d9683a7ca20f8f9aabeabad07e4e2580f528550"
-  integrity sha512-LzDo3Fpa30FLIBsh6DCDnMN1KW2g4QKkqKmejlImgWY67dDFPX/x9Kh/op/GK522DchQXEvDi/wD48HKW49XOQ==
+find-replace@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/find-replace/-/find-replace-3.0.0.tgz#3e7e23d3b05167a76f770c9fbd5258b0def68c38"
+  integrity sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ==
   dependencies:
-    array-back "^2.0.0"
-    test-value "^3.0.0"
+    array-back "^3.0.1"
 
 find-up@^1.0.0:
   version "1.1.2"
@@ -4333,13 +3868,6 @@ find-up@^2.0.0:
   dependencies:
     locate-path "^2.0.0"
 
-find-up@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/find-up/-/find-up-3.0.0.tgz#49169f1d7993430646da61ecc5ae355c21c97b73"
-  integrity sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==
-  dependencies:
-    locate-path "^3.0.0"
-
 find-up@^4.0.0, find-up@^4.1.0:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19"
@@ -4348,6 +3876,14 @@ find-up@^4.0.0, find-up@^4.1.0:
     locate-path "^5.0.0"
     path-exists "^4.0.0"
 
+find-up@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc"
+  integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==
+  dependencies:
+    locate-path "^6.0.0"
+    path-exists "^4.0.0"
+
 findup-sync@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/findup-sync/-/findup-sync-2.0.0.tgz#9326b1488c22d1a6088650a86901b2d9a90a2cbc"
@@ -4384,12 +3920,25 @@ flagged-respawn@^1.0.0:
   resolved "https://registry.yarnpkg.com/flagged-respawn/-/flagged-respawn-1.0.1.tgz#e7de6f1279ddd9ca9aac8a5971d618606b3aab41"
   integrity sha512-lNaHNVymajmk0OJMBn8fVUAU1BtDeKIqKoVhk4xAALB57aALg6b4W0MfJ/cUE0g9YBXy5XhSlPIpYIJ7HaY/3Q==
 
+flat-cache@^3.0.4:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/flat-cache/-/flat-cache-3.0.4.tgz#61b0338302b2fe9f957dcc32fc2a87f1c3048b11"
+  integrity sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==
+  dependencies:
+    flatted "^3.1.0"
+    rimraf "^3.0.2"
+
 flatbuffers@1.12.0:
   version "1.12.0"
   resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-1.12.0.tgz#72e87d1726cb1b216e839ef02658aa87dcef68aa"
   integrity sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==
 
-flush-write-stream@^1.0.0, flush-write-stream@^1.0.2:
+flatted@^3.1.0:
+  version "3.2.1"
+  resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.2.1.tgz#bbef080d95fca6709362c73044a1634f7c6e7d05"
+  integrity sha512-OMQjaErSFHmHqZe+PSidH5n8j3O0F2DdnVh8JB4j4eUQ2k6KvB0qGfrKIhapvez5JerBbmWkaLYUYWISaESoXg==
+
+flush-write-stream@^1.0.2:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"
   integrity sha512-3Z4XhFZ3992uIq0XOqb9AreonueSYphE6oYbpt5+3u06JWklbsPkNv3ZKkP9Bz/r+1MWCaMoSQ28P85+1Yc77w==
@@ -4414,6 +3963,15 @@ forever-agent@~0.6.1:
   resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91"
   integrity sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=
 
+form-data@^3.0.0:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
+  integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
+  dependencies:
+    asynckit "^0.4.0"
+    combined-stream "^1.0.8"
+    mime-types "^2.1.12"
+
 form-data@~2.3.2:
   version "2.3.3"
   resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.3.3.tgz#dcce52c05f644f298c6a7ab936bd724ceffbf3a6"
@@ -4430,24 +3988,7 @@ fragment-cache@^0.2.1:
   dependencies:
     map-cache "^0.2.2"
 
-from2@^2.1.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/from2/-/from2-2.3.0.tgz#8bfb5502bde4a4d36cfdeea007fcca21d7e382af"
-  integrity sha1-i/tVAr3kpNNs/e6gB/zKIdfjgq8=
-  dependencies:
-    inherits "^2.0.1"
-    readable-stream "^2.0.0"
-
-fs-extra@^8.1.0:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-8.1.0.tgz#49d43c45a88cd9677668cb7be1b46efdb8d2e1c0"
-  integrity sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==
-  dependencies:
-    graceful-fs "^4.2.0"
-    jsonfile "^4.0.0"
-    universalify "^0.1.0"
-
-fs-extra@^9.0.1:
+fs-extra@^9.0.1, fs-extra@^9.1.0:
   version "9.1.0"
   resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
   integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ==
@@ -4464,7 +4005,7 @@ fs-minipass@^1.2.5:
   dependencies:
     minipass "^2.6.0"
 
-fs-minipass@^2.0.0:
+fs-minipass@^2.0.0, fs-minipass@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb"
   integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==
@@ -4479,20 +4020,10 @@ fs-mkdirp-stream@^1.0.0:
     graceful-fs "^4.1.11"
     through2 "^2.0.3"
 
-fs-monkey@^0.3.3:
-  version "0.3.3"
-  resolved "https://registry.yarnpkg.com/fs-monkey/-/fs-monkey-0.3.3.tgz#7960bb2b1fa2653731b9d0e2e84812a7e8b3664a"
-  integrity sha512-FNUvuTAJ3CqCQb5ELn+qCbGR/Zllhf2HtwsdAtBi59s1WeCjKMT81fHcSu7dwIskqGVK+MmOrb7VOBlq3/SItw==
-
-fs-write-stream-atomic@^1.0.8:
-  version "1.0.10"
-  resolved "https://registry.yarnpkg.com/fs-write-stream-atomic/-/fs-write-stream-atomic-1.0.10.tgz#b47df53493ef911df75731e70a9ded0189db40c9"
-  integrity sha1-tH31NJPvkR33VzHnCp3tAYnbQMk=
-  dependencies:
-    graceful-fs "^4.1.2"
-    iferr "^0.1.5"
-    imurmurhash "^0.1.4"
-    readable-stream "1 || 2"
+fs-monkey@1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/fs-monkey/-/fs-monkey-1.0.3.tgz#ae3ac92d53bb328efe0e9a1d9541f6ad8d48e2d3"
+  integrity sha512-cybjIfiiE+pTWicSCLFHSrXZ6EilF30oh91FDP9S2B051prEa7QWfrVTQm10/dDpswBDXZugPa1Ogu8Yh+HV0Q==
 
 fs.realpath@^1.0.0:
   version "1.0.0"
@@ -4507,7 +4038,7 @@ fsevents@^1.2.7:
     bindings "^1.5.0"
     nan "^2.12.1"
 
-fsevents@^2.1.2, fsevents@~2.3.1:
+fsevents@^2.3.2:
   version "2.3.2"
   resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
   integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
@@ -4517,6 +4048,11 @@ function-bind@^1.1.1:
   resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
   integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
 
+functional-red-black-tree@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz#1b0ab3bd553b2a0d6399d29c0e3ea0b252078327"
+  integrity sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=
+
 gauge@~2.7.3:
   version "2.7.4"
   resolved "https://registry.yarnpkg.com/gauge/-/gauge-2.7.4.tgz#2c03405c7538c39d7eb37b317022e325fb018bf7"
@@ -4531,11 +4067,6 @@ gauge@~2.7.3:
     strip-ansi "^3.0.1"
     wide-align "^1.1.0"
 
-genfun@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/genfun/-/genfun-5.0.0.tgz#9dd9710a06900a5c4a5bf57aca5da4e52fe76537"
-  integrity sha512-KGDOARWVga7+rnB3z9Sd2Letx515owfk0hSxHGuqjANb1M+x2bGZGqHLiozPsYMdM2OubeMni/Hpwmjq6qIUhA==
-
 gensync@^1.0.0-beta.2:
   version "1.0.0-beta.2"
   resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0"
@@ -4546,7 +4077,7 @@ get-caller-file@^1.0.1:
   resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-1.0.3.tgz#f978fa4c90d1dfe7ff2d6beda2a515e713bdcf4a"
   integrity sha512-3t6rVToeoZfYSGd8YoLFR2DJkiQrIiUrGcjvFX2mDw3bn6k2OtwHN0TNCLbBO+w8qTvimhDkv+LSscbJY1vE6w==
 
-get-caller-file@^2.0.1:
+get-caller-file@^2.0.5:
   version "2.0.5"
   resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e"
   integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==
@@ -4565,40 +4096,25 @@ get-package-type@^0.1.0:
   resolved "https://registry.yarnpkg.com/get-package-type/-/get-package-type-0.1.0.tgz#8de2d803cff44df3bc6c456e6668b36c3926e11a"
   integrity sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==
 
-get-pkg-repo@^1.0.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/get-pkg-repo/-/get-pkg-repo-1.4.0.tgz#c73b489c06d80cc5536c2c853f9e05232056972d"
-  integrity sha1-xztInAbYDMVTbCyFP54FIyBWly0=
+get-pkg-repo@^4.0.0:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/get-pkg-repo/-/get-pkg-repo-4.1.2.tgz#c4ffd60015cf091be666a0212753fc158f01a4c0"
+  integrity sha512-/FjamZL9cBYllEbReZkxF2IMh80d8TJoC4e3bmLNif8ibHw95aj0N/tzqK0kZz9eU/3w3dL6lF4fnnX/sDdW3A==
   dependencies:
-    hosted-git-info "^2.1.4"
-    meow "^3.3.0"
-    normalize-package-data "^2.3.0"
-    parse-github-repo-url "^1.3.0"
+    "@hutson/parse-repository-url" "^3.0.0"
+    hosted-git-info "^4.0.0"
+    meow "^7.0.0"
     through2 "^2.0.0"
 
-get-port@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/get-port/-/get-port-4.2.0.tgz#e37368b1e863b7629c43c5a323625f95cf24b119"
-  integrity sha512-/b3jarXkH8KJoOMQc3uVGHASwGLPq3gSFJ7tgJm2diza+bydJPTGOibin2steecKeOylE8oY2JERlVWkAJO6yw==
-
-get-stdin@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/get-stdin/-/get-stdin-4.0.1.tgz#b968c6b0a04384324902e8bf1a5df32579a450fe"
-  integrity sha1-uWjGsKBDhDJJAui/Gl3zJXmkUP4=
-
-get-stream@^4.0.0, get-stream@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-4.1.0.tgz#c1b255575f3dc21d59bfc79cd3d2b46b1c3a54b5"
-  integrity sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==
-  dependencies:
-    pump "^3.0.0"
+get-port@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/get-port/-/get-port-5.1.1.tgz#0469ed07563479de6efb986baf053dcd7d4e3193"
+  integrity sha512-g/Q1aTSDOxFpchXC4i8ZWvxA1lnPqx/JHqcpIw0/LX9T8x/GBbi6YnlN5nhaKIFkT8oFsscUKgDJYxfwfS6QsQ==
 
-get-stream@^5.0.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3"
-  integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==
-  dependencies:
-    pump "^3.0.0"
+get-stream@^6.0.0:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7"
+  integrity sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==
 
 get-value@^2.0.3, get-value@^2.0.6:
   version "2.0.6"
@@ -4612,16 +4128,16 @@ getpass@^0.1.1:
   dependencies:
     assert-plus "^1.0.0"
 
-git-raw-commits@2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/git-raw-commits/-/git-raw-commits-2.0.0.tgz#d92addf74440c14bcc5c83ecce3fb7f8a79118b5"
-  integrity sha512-w4jFEJFgKXMQJ0H0ikBk2S+4KP2VEjhCvLCNqbNRQC8BgGWgLKNCO7a9K9LI+TVT7Gfoloje502sEnctibffgg==
+git-raw-commits@^2.0.8:
+  version "2.0.10"
+  resolved "https://registry.yarnpkg.com/git-raw-commits/-/git-raw-commits-2.0.10.tgz#e2255ed9563b1c9c3ea6bd05806410290297bbc1"
+  integrity sha512-sHhX5lsbG9SOO6yXdlwgEMQ/ljIn7qMpAbJZCGfXX2fq5T8M5SrDnpYk9/4HswTildcIqatsWa91vty6VhWSaQ==
   dependencies:
-    dargs "^4.0.1"
-    lodash.template "^4.0.2"
-    meow "^4.0.0"
-    split2 "^2.0.0"
-    through2 "^2.0.0"
+    dargs "^7.0.0"
+    lodash "^4.17.15"
+    meow "^8.0.0"
+    split2 "^3.0.0"
+    through2 "^4.0.0"
 
 git-remote-origin-url@^2.0.0:
   version "2.0.0"
@@ -4631,26 +4147,26 @@ git-remote-origin-url@^2.0.0:
     gitconfiglocal "^1.0.0"
     pify "^2.3.0"
 
-git-semver-tags@^2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/git-semver-tags/-/git-semver-tags-2.0.3.tgz#48988a718acf593800f99622a952a77c405bfa34"
-  integrity sha512-tj4FD4ww2RX2ae//jSrXZzrocla9db5h0V7ikPl1P/WwoZar9epdUhwR7XHXSgc+ZkNq72BEEerqQuicoEQfzA==
+git-semver-tags@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/git-semver-tags/-/git-semver-tags-4.1.1.tgz#63191bcd809b0ec3e151ba4751c16c444e5b5780"
+  integrity sha512-OWyMt5zBe7xFs8vglMmhM9lRQzCWL3WjHtxNNfJTMngGym7pC1kh8sP6jevfydJ6LP3ZvGxfb6ABYgPUM0mtsA==
   dependencies:
-    meow "^4.0.0"
+    meow "^8.0.0"
     semver "^6.0.0"
 
 git-up@^4.0.0:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/git-up/-/git-up-4.0.2.tgz#10c3d731051b366dc19d3df454bfca3f77913a7c"
-  integrity sha512-kbuvus1dWQB2sSW4cbfTeGpCMd8ge9jx9RKnhXhuJ7tnvT+NIrTVfYZxjtflZddQYcmdOTlkAcjmx7bor+15AQ==
+  version "4.0.5"
+  resolved "https://registry.yarnpkg.com/git-up/-/git-up-4.0.5.tgz#e7bb70981a37ea2fb8fe049669800a1f9a01d759"
+  integrity sha512-YUvVDg/vX3d0syBsk/CKUTib0srcQME0JyHkL5BaYdwLsiCslPWmDSi8PUMo9pXYjrryMcmsCoCgsTpSCJEQaA==
   dependencies:
     is-ssh "^1.3.0"
-    parse-url "^5.0.0"
+    parse-url "^6.0.0"
 
-git-url-parse@^11.1.2:
-  version "11.4.4"
-  resolved "https://registry.yarnpkg.com/git-url-parse/-/git-url-parse-11.4.4.tgz#5d747debc2469c17bc385719f7d0427802d83d77"
-  integrity sha512-Y4o9o7vQngQDIU9IjyCmRJBin5iYjI5u9ZITnddRZpD7dcCFQj2sL2XuMNbLRE4b4B/4ENPsp2Q8P44fjAZ0Pw==
+git-url-parse@^11.4.4:
+  version "11.5.0"
+  resolved "https://registry.yarnpkg.com/git-url-parse/-/git-url-parse-11.5.0.tgz#acaaf65239cb1536185b19165a24bbc754b3f764"
+  integrity sha512-TZYSMDeM37r71Lqg1mbnMlOqlHd7BSij9qN7XwTkRqSAYFMihGLGhfHwgqQob3GUhEneKnV4nskN9rbQw2KGxA==
   dependencies:
     git-up "^4.0.0"
 
@@ -4669,7 +4185,7 @@ glob-parent@^3.1.0:
     is-glob "^3.1.0"
     path-dirname "^1.0.0"
 
-glob-parent@^5.0.0, glob-parent@^5.1.0, glob-parent@~5.1.0:
+glob-parent@^5.1.1, glob-parent@^5.1.2:
   version "5.1.2"
   resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4"
   integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
@@ -4710,22 +4226,10 @@ glob-watcher@^5.0.3:
     normalize-path "^3.0.0"
     object.defaults "^1.1.0"
 
-glob@7.1.4:
-  version "7.1.4"
-  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.4.tgz#aa608a2f6c577ad357e1ae5a5c26d9a8d1969255"
-  integrity sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A==
-  dependencies:
-    fs.realpath "^1.0.0"
-    inflight "^1.0.4"
-    inherits "2"
-    minimatch "^3.0.4"
-    once "^1.3.0"
-    path-is-absolute "^1.0.0"
-
-glob@^7.0.0, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4:
-  version "7.1.6"
-  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6"
-  integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==
+glob@7.1.7, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6, glob@^7.1.7:
+  version "7.1.7"
+  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.7.tgz#3b193e9233f01d42d0b3f78294bbeeb418f94a90"
+  integrity sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==
   dependencies:
     fs.realpath "^1.0.0"
     inflight "^1.0.4"
@@ -4759,18 +4263,23 @@ globals@^11.1.0:
   resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e"
   integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==
 
-globby@^10.0.1:
-  version "10.0.2"
-  resolved "https://registry.yarnpkg.com/globby/-/globby-10.0.2.tgz#277593e745acaa4646c3ab411289ec47a0392543"
-  integrity sha512-7dUi7RvCoT/xast/o/dLN53oqND4yk0nsHkhRgn9w65C4PofCLOoJ39iSOg+qVDdWQPIEj+eszMHQ+aLVwwQSg==
+globals@^13.6.0, globals@^13.9.0:
+  version "13.10.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-13.10.0.tgz#60ba56c3ac2ca845cfbf4faeca727ad9dd204676"
+  integrity sha512-piHC3blgLGFjvOuMmWZX60f+na1lXFDhQXBf1UYp2fXPXqvEUbOhNwi6BsQ0bQishwedgnjkwv1d9zKf+MWw3g==
+  dependencies:
+    type-fest "^0.20.2"
+
+globby@^11.0.1, globby@^11.0.2, globby@^11.0.3:
+  version "11.0.4"
+  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.4.tgz#2cbaff77c2f2a62e71e9b2813a67b97a3a3001a5"
+  integrity sha512-9O4MVG9ioZJ08ffbcyVYyLOJLk5JQ688pJ4eMGLpdWLHq/Wr1D9BlriLQyL0E+jbkuePVZXYFj47QM/v093wHg==
   dependencies:
-    "@types/glob" "^7.1.1"
     array-union "^2.1.0"
     dir-glob "^3.0.1"
-    fast-glob "^3.0.3"
-    glob "^7.1.3"
-    ignore "^5.1.1"
-    merge2 "^1.2.3"
+    fast-glob "^3.1.1"
+    ignore "^5.1.4"
+    merge2 "^1.3.0"
     slash "^3.0.0"
 
 globby@^9.2.0:
@@ -4794,56 +4303,46 @@ glogg@^1.0.0:
   dependencies:
     sparkles "^1.0.0"
 
-google-closure-compiler-java@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20200830.0.0.tgz#627cbddb17fd0012f901450ee06a617bdb7023a7"
-  integrity sha512-DLlcY875mQB7PA9wtfbPBVL9chJj+si/cmxyp3euw7x09MiFYynR4tmQJ9KjWUffPbhvCRDEO/jKcVyNWQVS1Q==
+google-closure-compiler-java@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20210601.0.0.tgz#88dc11b334bee6a704d9674c5143fd2e0d553517"
+  integrity sha512-bH6nIwOmp4qDWvlbXx5/DE3XA2aDGQoCpmRYZJGONY1Sy6Xfbq0ioXRHH9eBDP9hxhCJ5Sd/K89A0NZ8Nz9RJA==
 
-google-closure-compiler-linux@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20200830.0.0.tgz#c231a735b29b2d94ecfbe01ce86f3182ee85495a"
-  integrity sha512-QfxFA3+fOrNe0RH2lcXmkdiaM97KvZQOtO3trobNvfkMNr2h9OUtpXkqWExwolo/jsJWNumsdaRnEAwEthMUOw==
+google-closure-compiler-linux@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20210601.0.0.tgz#6e5dd7b00b96dc1fd1ba30e3401af85558768322"
+  integrity sha512-rnEQt7zz/1P1SfPhJiHQpfCgMPrsVVyEgDs09h67xn6+LXa9L0RP+hrJDEHqSWwjDPz0BkfUUv6zkqZvp1h/lw==
 
-google-closure-compiler-osx@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20200830.0.0.tgz#daa7cf607374fc6a85e3f5be2cc323867988d607"
-  integrity sha512-qHKjRBJVq2+2mT25eoT6iOMVbUGT02sJUwkdLlsohWKV4sMEY8/nwnkZYsdm7KnPJnmQLlrfYJ1ZTh1VTlAJpQ==
+google-closure-compiler-osx@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20210601.0.0.tgz#e23356bc9ef6e68c2980f60a207f603767b50b21"
+  integrity sha512-A5r4s/WthR2iLMM0mxsluw8EW2AcOomC5ri/H6FjzpMq0RVEnLTgaGYdXolUAfEzH/7XtJJT2+JkYk3HSLCtrg==
 
-google-closure-compiler-windows@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20200830.0.0.tgz#881cb0cd5adb4002987103d6f9c91450791a0df4"
-  integrity sha512-IpJAyxJo+GQ2DSVC4sslPydhIPyWRINkdNynIK/Bk+vbM/7i4LoEm/Y5rY/KJOLRCSds+s3Ov9LYdFkN8C//7g==
+google-closure-compiler-windows@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20210601.0.0.tgz#b5400d06bbf0bbd2602ee3ae0c2bc7ebd5829692"
+  integrity sha512-6r94bPShnB0XXh9+5/qXGDHJN2PQGhF9yJPcgBZj+FAZlQGzlYkT0pkyp+loZT3lG+YRbjD28Lgo7xMcY4xgkA==
 
-google-closure-compiler@20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20200830.0.0.tgz#2c76bc20b23275e4811d4b92ef0d840314910839"
-  integrity sha512-Pri8kyHGmd2xqLM38QBarx+fdkm2HuLniGz7GimbdjQ1KUuPNIz7IJOYc8NGGwYPGAB45vg4IZRk/LepAqnoxg==
+google-closure-compiler@20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20210601.0.0.tgz#34597c33c9285ebd3a5364f5299f6c9ddc9fc88a"
+  integrity sha512-lzzEoG2VTB7uUjnWnMyeZMU163w69HJpM27yh8Up9Ha5McHZeESjt3NRwU8cWMbCRdY06nFbRCDIVCRcadHCiw==
   dependencies:
     chalk "2.x"
-    google-closure-compiler-java "^20200830.0.0"
+    google-closure-compiler-java "^20210601.0.0"
     minimist "1.x"
     vinyl "2.x"
     vinyl-sourcemaps-apply "^0.2.0"
   optionalDependencies:
-    google-closure-compiler-linux "^20200830.0.0"
-    google-closure-compiler-osx "^20200830.0.0"
-    google-closure-compiler-windows "^20200830.0.0"
+    google-closure-compiler-linux "^20210601.0.0"
+    google-closure-compiler-osx "^20210601.0.0"
+    google-closure-compiler-windows "^20210601.0.0"
 
-graceful-fs@4.X, graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.4:
+graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.3, graceful-fs@^4.2.4:
   version "4.2.6"
   resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee"
   integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==
 
-"growl@~> 1.10.0":
-  version "1.10.5"
-  resolved "https://registry.yarnpkg.com/growl/-/growl-1.10.5.tgz#f2735dc2283674fa67478b10181059355c369e5e"
-  integrity sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==
-
-growly@^1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/growly/-/growly-1.3.0.tgz#f10748cbe76af964b7c96c93c6bcc28af120c081"
-  integrity sha1-8QdIy+dq+WS3yWyTxrzCivEgwIE=
-
 gulp-cli@^2.2.0:
   version "2.3.0"
   resolved "https://registry.yarnpkg.com/gulp-cli/-/gulp-cli-2.3.0.tgz#ec0d380e29e52aa45e47977f0d32e18fd161122f"
@@ -4868,10 +4367,10 @@ gulp-cli@^2.2.0:
     v8flags "^3.2.0"
     yargs "^7.1.0"
 
-gulp-json-transform@0.4.6:
-  version "0.4.6"
-  resolved "https://registry.yarnpkg.com/gulp-json-transform/-/gulp-json-transform-0.4.6.tgz#37ab209463df62c9e779887d675fb6025eb07b89"
-  integrity sha512-laPoNiJP/+lAeiyb0lgY3cynOOi7R/QbPvKBEXJY6bm836nYg90pwY4mgwR7w8nFDlXiCToUeaoQCBIc2NudjA==
+gulp-json-transform@0.4.7:
+  version "0.4.7"
+  resolved "https://registry.yarnpkg.com/gulp-json-transform/-/gulp-json-transform-0.4.7.tgz#41c37524c976e41f3d46c06f985b01530a472e34"
+  integrity sha512-Wi0p5GpoLXbTDwaZnw6rgj3FMLW3PscaHaX1okxrTgPWeqnIiMo4aJz7VlG68JYkxPeAXJrPce8AGEfcT2IifA==
   dependencies:
     ansi-colors "^1.0.1"
     fancy-log "^1.3.2"
@@ -4880,27 +4379,27 @@ gulp-json-transform@0.4.6:
     through2 "^2.0.3"
     vinyl "^2.1.0"
 
-gulp-rename@1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/gulp-rename/-/gulp-rename-1.4.0.tgz#de1c718e7c4095ae861f7296ef4f3248648240bd"
-  integrity sha512-swzbIGb/arEoFK89tPY58vg3Ok1bw+d35PfUNwWqdo7KM4jkmuGA78JiDNqR+JeZFaeeHnRg9N7aihX3YPmsyg==
-
-gulp-sourcemaps@2.6.5:
-  version "2.6.5"
-  resolved "https://registry.yarnpkg.com/gulp-sourcemaps/-/gulp-sourcemaps-2.6.5.tgz#a3f002d87346d2c0f3aec36af7eb873f23de8ae6"
-  integrity sha512-SYLBRzPTew8T5Suh2U8jCSDKY+4NARua4aqjj8HOysBh2tSgT9u4jc1FYirAdPx1akUxxDeK++fqw6Jg0LkQRg==
-  dependencies:
-    "@gulp-sourcemaps/identity-map" "1.X"
-    "@gulp-sourcemaps/map-sources" "1.X"
-    acorn "5.X"
-    convert-source-map "1.X"
-    css "2.X"
-    debug-fabulous "1.X"
-    detect-newline "2.X"
-    graceful-fs "4.X"
-    source-map "~0.6.0"
-    strip-bom-string "1.X"
-    through2 "2.X"
+gulp-rename@2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/gulp-rename/-/gulp-rename-2.0.0.tgz#9bbc3962b0c0f52fc67cd5eaff6c223ec5b9cf6c"
+  integrity sha512-97Vba4KBzbYmR5VBs9mWmK+HwIf5mj+/zioxfZhOKeXtx5ZjBk57KFlePf5nxq9QsTtFl0ejnHE3zTC9MHXqyQ==
+
+gulp-sourcemaps@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/gulp-sourcemaps/-/gulp-sourcemaps-3.0.0.tgz#2e154e1a2efed033c0e48013969e6f30337b2743"
+  integrity sha512-RqvUckJkuYqy4VaIH60RMal4ZtG0IbQ6PXMNkNsshEGJ9cldUPRb/YCgboYae+CLAs1HQNb4ADTKCx65HInquQ==
+  dependencies:
+    "@gulp-sourcemaps/identity-map" "^2.0.1"
+    "@gulp-sourcemaps/map-sources" "^1.0.0"
+    acorn "^6.4.1"
+    convert-source-map "^1.0.0"
+    css "^3.0.0"
+    debug-fabulous "^1.0.0"
+    detect-newline "^2.0.0"
+    graceful-fs "^4.0.0"
+    source-map "^0.6.0"
+    strip-bom-string "^1.0.0"
+    through2 "^2.0.0"
 
 gulp-typescript@5.0.1:
   version "5.0.1"
@@ -4931,7 +4430,7 @@ gulplog@^1.0.0:
   dependencies:
     glogg "^1.0.0"
 
-handlebars@^4.7.6:
+handlebars@^4.7.6, handlebars@^4.7.7:
   version "4.7.7"
   resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.7.7.tgz#9ce33416aad02dbd6c8fafa8240d5d98004945a1"
   integrity sha512-aAcXm5OAfE/8IXkcZvCepKU3VzW1/39Fb5ZuqMtgI/hT8X2YgoMvBY5dLhq/cpOvw7Lk1nK/UF71aLG/ZnVYRA==
@@ -5031,32 +4530,6 @@ has@^1.0.3:
   dependencies:
     function-bind "^1.1.1"
 
-hash-base@^3.0.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/hash-base/-/hash-base-3.1.0.tgz#55c381d9e06e1d2997a883b4a3fddfe7f0d3af33"
-  integrity sha512-1nmYp/rhMDiE7AYkDw+lLwlAzz0AntGIe51F3RfFfEqyQ3feY2eI/NcwC6umIQVOASPMsWJLJScWKSSvzL9IVA==
-  dependencies:
-    inherits "^2.0.4"
-    readable-stream "^3.6.0"
-    safe-buffer "^5.2.0"
-
-hash.js@^1.0.0, hash.js@^1.0.3:
-  version "1.1.7"
-  resolved "https://registry.yarnpkg.com/hash.js/-/hash.js-1.1.7.tgz#0babca538e8d4ee4a0f8988d68866537a003cf42"
-  integrity sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==
-  dependencies:
-    inherits "^2.0.3"
-    minimalistic-assert "^1.0.1"
-
-hmac-drbg@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/hmac-drbg/-/hmac-drbg-1.0.1.tgz#d2745701025a6c775a6c545793ed502fc0c649a1"
-  integrity sha1-0nRXAQJabHdabFRXk+1QL8DGSaE=
-  dependencies:
-    hash.js "^1.0.3"
-    minimalistic-assert "^1.0.0"
-    minimalistic-crypto-utils "^1.0.1"
-
 homedir-polyfill@^1.0.1:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/homedir-polyfill/-/homedir-polyfill-1.0.3.tgz#743298cef4e5af3e194161fbadcc2151d3a058e8"
@@ -5064,12 +4537,12 @@ homedir-polyfill@^1.0.1:
   dependencies:
     parse-passwd "^1.0.0"
 
-hosted-git-info@^2.1.4, hosted-git-info@^2.7.1:
+hosted-git-info@^2.1.4:
   version "2.8.9"
   resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9"
   integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==
 
-hosted-git-info@^4.0.1:
+hosted-git-info@^4.0.0, hosted-git-info@^4.0.1:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-4.0.2.tgz#5e425507eede4fea846b7262f0838456c4209961"
   integrity sha512-c9OGXbZ3guC/xOlCg1Ci/VgWlwsqDv1yMQL1CWqXDL0hDjXuNcq0zuR4xqPSuasI3kqFDhqSyTjREz5gzq0fXg==
@@ -5088,18 +4561,19 @@ html-escaper@^2.0.0:
   resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
   integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
 
-http-cache-semantics@^3.8.1:
-  version "3.8.1"
-  resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-3.8.1.tgz#39b0e16add9b605bf0a9ef3d9daaf4843b4cacd2"
-  integrity sha512-5ai2iksyV8ZXmnZhHH4rWPoxxistEexSi5936zIQ1bnNTW5VnA85B6P/VpXiRM017IgRvb2kKo1a//y+0wSp3w==
+http-cache-semantics@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz#49e91c5cbf36c9b94bcfcd71c23d5249ec74e390"
+  integrity sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ==
 
-http-proxy-agent@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-2.1.0.tgz#e4821beef5b2142a2026bd73926fe537631c5405"
-  integrity sha512-qwHbBLV7WviBl0rQsOzH6o5lwyOIvwp/BdFnvVxXORldu5TmjFfjzBcWUWS5kWAZhmv+JtiDhSuQCp4sBfbIgg==
+http-proxy-agent@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz#8a8c8ef7f5932ccf953c296ca8291b95aa74aa3a"
+  integrity sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==
   dependencies:
-    agent-base "4"
-    debug "3.1.0"
+    "@tootallnate/once" "1"
+    agent-base "6"
+    debug "4"
 
 http-signature@~1.2.0:
   version "1.2.0"
@@ -5110,23 +4584,18 @@ http-signature@~1.2.0:
     jsprim "^1.2.2"
     sshpk "^1.7.0"
 
-https-browserify@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/https-browserify/-/https-browserify-1.0.0.tgz#ec06c10e0a34c0f2faf199f7fd7fc78fffd03c73"
-  integrity sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM=
-
-https-proxy-agent@^2.2.3:
-  version "2.2.4"
-  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b"
-  integrity sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==
+https-proxy-agent@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.0.tgz#e2a90542abb68a762e0a0850f6c9edadfd8506b2"
+  integrity sha512-EkYm5BcKUGiduxzSt3Eppko+PiNWNEpa4ySk9vTC6wDsQJW9rHSa+UhGNJoRYp7bz6Ht1eaRIa6QaJqO5rCFbA==
   dependencies:
-    agent-base "^4.3.0"
-    debug "^3.1.0"
+    agent-base "6"
+    debug "4"
 
-human-signals@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
-  integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw==
+human-signals@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0"
+  integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==
 
 humanize-ms@^1.2.1:
   version "1.2.1"
@@ -5143,54 +4612,36 @@ iconv-lite@0.4.24, iconv-lite@^0.4.24:
     safer-buffer ">= 2.1.2 < 3"
 
 iconv-lite@^0.6.2:
-  version "0.6.2"
-  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.2.tgz#ce13d1875b0c3a674bd6a04b7f76b01b1b6ded01"
-  integrity sha512-2y91h5OpQlolefMPmUlivelittSWy0rP+oYVpn6A7GwVHNE8AWzoYOBNmlwks3LobaJxgHCYZAnyNo2GgpNRNQ==
+  version "0.6.3"
+  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.3.tgz#a52f80bf38da1952eb5c681790719871a1a72501"
+  integrity sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==
   dependencies:
     safer-buffer ">= 2.1.2 < 3.0.0"
 
-ieee754@^1.1.4:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352"
-  integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==
-
-iferr@^0.1.5:
-  version "0.1.5"
-  resolved "https://registry.yarnpkg.com/iferr/-/iferr-0.1.5.tgz#c60eed69e6d8fdb6b3104a1fcbca1c192dc5b501"
-  integrity sha1-xg7taebY/bazEEofy8ocGS3FtQE=
-
-ignore-walk@^3.0.1:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.3.tgz#017e2447184bfeade7c238e4aefdd1e8f95b1e37"
-  integrity sha512-m7o6xuOaT1aqheYHKf8W6J5pYH85ZI9w077erOzLje3JsB1gkafkAhHHY19dqjulgIZHFm32Cp5uNZgcQqdJKw==
+ignore-walk@^3.0.3:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.4.tgz#c9a09f69b7c7b479a5d74ac1a3c0d4236d2a6335"
+  integrity sha512-PY6Ii8o1jMRA1z4F2hRkH/xN59ox43DavKvD3oDpfurRlOJyAHpifIwpbdv1n4jt4ov0jSpw3kQ4GhJnpBL6WQ==
   dependencies:
     minimatch "^3.0.4"
 
-ignore@^4.0.3:
+ignore@^4.0.3, ignore@^4.0.6:
   version "4.0.6"
   resolved "https://registry.yarnpkg.com/ignore/-/ignore-4.0.6.tgz#750e3db5862087b4737ebac8207ffd1ef27b25fc"
   integrity sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==
 
-ignore@^5.1.1:
+ignore@^5.1.4:
   version "5.1.8"
   resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.8.tgz#f150a8b50a34289b33e22f5889abd4d8016f0e57"
   integrity sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==
 
-import-fresh@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-2.0.0.tgz#d81355c15612d386c61f9ddd3922d4304822a546"
-  integrity sha1-2BNVwVYS04bGH53dOSLUMEgipUY=
-  dependencies:
-    caller-path "^2.0.0"
-    resolve-from "^3.0.0"
-
-import-local@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/import-local/-/import-local-2.0.0.tgz#55070be38a5993cf18ef6db7e961f5bee5c5a09d"
-  integrity sha512-b6s04m3O+s3CGSbqDIyP4R6aAwAeYlVq9+WUWep6iHa8ETRf9yei1U48C5MmfJmV9AiLYYBKPMq/W+/WRpQmCQ==
+import-fresh@^3.0.0, import-fresh@^3.2.1:
+  version "3.3.0"
+  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b"
+  integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==
   dependencies:
-    pkg-dir "^3.0.0"
-    resolve-cwd "^2.0.0"
+    parent-module "^1.0.0"
+    resolve-from "^4.0.0"
 
 import-local@^3.0.2:
   version "3.0.2"
@@ -5205,24 +4656,17 @@ imurmurhash@^0.1.4:
   resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea"
   integrity sha1-khi5srkoojixPcT7a21XbyMUU+o=
 
-indent-string@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-2.1.0.tgz#8e2d48348742121b4a8218b7a137e9a52049dc80"
-  integrity sha1-ji1INIdCEhtKghi3oTfppSBJ3IA=
-  dependencies:
-    repeating "^2.0.0"
-
-indent-string@^3.0.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-3.2.0.tgz#4a5fd6d27cc332f37e5419a504dbb837105c9289"
-  integrity sha1-Sl/W0nzDMvN+VBmlBNu4NxBckok=
-
 indent-string@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
   integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
 
-infer-owner@^1.0.3, infer-owner@^1.0.4:
+indent-string@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-5.0.0.tgz#4fd2980fccaf8622d14c64d694f4cf33c81951a5"
+  integrity sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==
+
+infer-owner@^1.0.4:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/infer-owner/-/infer-owner-1.0.4.tgz#c4cefcaa8e51051c2a40ba2ce8a3d27295af9467"
   integrity sha512-IClj+Xz94+d7irH5qRyfJonOdfTzuDaifE6ZPWfx0N0+/ATZCbuTPq2prFl526urkQd90WyUKIh1DfBQ2hMz9A==
@@ -5235,60 +4679,50 @@ inflight@^1.0.4:
     once "^1.3.0"
     wrappy "1"
 
-inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.1, inherits@~2.0.3:
+inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.3:
   version "2.0.4"
   resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
   integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
 
-inherits@2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.1.tgz#b17d08d326b4423e568eff719f91b0b1cbdf69f1"
-  integrity sha1-sX0I0ya0Qj5Wjv9xn5GwscvfafE=
-
-inherits@2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
-  integrity sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=
-
 ini@^1.3.2, ini@^1.3.4:
   version "1.3.8"
   resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c"
   integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==
 
-init-package-json@^1.10.3:
-  version "1.10.3"
-  resolved "https://registry.yarnpkg.com/init-package-json/-/init-package-json-1.10.3.tgz#45ffe2f610a8ca134f2bd1db5637b235070f6cbe"
-  integrity sha512-zKSiXKhQveNteyhcj1CoOP8tqp1QuxPIPBl8Bid99DGLFqA1p87M6lNgfjJHSBoWJJlidGOv5rWjyYKEB3g2Jw==
+init-package-json@^2.0.2:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/init-package-json/-/init-package-json-2.0.3.tgz#c8ae4f2a4ad353bcbc089e5ffe98a8f1a314e8fd"
+  integrity sha512-tk/gAgbMMxR6fn1MgMaM1HpU1ryAmBWWitnxG5OhuNXeX0cbpbgV5jA4AIpQJVNoyOfOevTtO6WX+rPs+EFqaQ==
   dependencies:
     glob "^7.1.1"
-    npm-package-arg "^4.0.0 || ^5.0.0 || ^6.0.0"
+    npm-package-arg "^8.1.2"
     promzard "^0.3.0"
     read "~1.0.1"
-    read-package-json "1 || 2"
-    semver "2.x || 3.x || 4 || 5"
-    validate-npm-package-license "^3.0.1"
+    read-package-json "^3.0.1"
+    semver "^7.3.5"
+    validate-npm-package-license "^3.0.4"
     validate-npm-package-name "^3.0.0"
 
-inquirer@^6.2.0:
-  version "6.5.2"
-  resolved "https://registry.yarnpkg.com/inquirer/-/inquirer-6.5.2.tgz#ad50942375d036d327ff528c08bd5fab089928ca"
-  integrity sha512-cntlB5ghuB0iuO65Ovoi8ogLHiWGs/5yNrtUcKjFhSSiVeAIVpD7koaSU9RM8mpXw5YDi9RdYXGQMaOURB7ycQ==
+inquirer@^7.3.3:
+  version "7.3.3"
+  resolved "https://registry.yarnpkg.com/inquirer/-/inquirer-7.3.3.tgz#04d176b2af04afc157a83fd7c100e98ee0aad003"
+  integrity sha512-JG3eIAj5V9CwcGvuOmoo6LB9kbAYT8HXffUl6memuszlwDC/qvFAJw49XJ5NROSFNPxp3iQg1GqkFhaY/CR0IA==
   dependencies:
-    ansi-escapes "^3.2.0"
-    chalk "^2.4.2"
-    cli-cursor "^2.1.0"
-    cli-width "^2.0.0"
+    ansi-escapes "^4.2.1"
+    chalk "^4.1.0"
+    cli-cursor "^3.1.0"
+    cli-width "^3.0.0"
     external-editor "^3.0.3"
-    figures "^2.0.0"
-    lodash "^4.17.12"
-    mute-stream "0.0.7"
-    run-async "^2.2.0"
-    rxjs "^6.4.0"
-    string-width "^2.1.0"
-    strip-ansi "^5.1.0"
+    figures "^3.0.0"
+    lodash "^4.17.19"
+    mute-stream "0.0.8"
+    run-async "^2.4.0"
+    rxjs "^6.6.0"
+    string-width "^4.1.0"
+    strip-ansi "^6.0.0"
     through "^2.3.6"
 
-interpret@^1.0.0, interpret@^1.4.0:
+interpret@^1.4.0:
   version "1.4.0"
   resolved "https://registry.yarnpkg.com/interpret/-/interpret-1.4.0.tgz#665ab8bc4da27a774a40584e812e3e0fa45b1a1e"
   integrity sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==
@@ -5298,7 +4732,7 @@ invert-kv@^1.0.0:
   resolved "https://registry.yarnpkg.com/invert-kv/-/invert-kv-1.0.0.tgz#104a8e4aaca6d3d8cd157a8ef8bfab2d7a3ffdb6"
   integrity sha1-EEqOSqym09jNFXqO+L+rLXo//bY=
 
-ip@1.1.5:
+ip@^1.1.5:
   version "1.1.5"
   resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.5.tgz#bdded70114290828c0a039e72ef25f5aaec4354a"
   integrity sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo=
@@ -5331,9 +4765,9 @@ is-arrayish@^0.2.1:
   integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=
 
 is-bigint@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.1.tgz#6923051dfcbc764278540b9ce0e6b3213aa5ebc2"
-  integrity sha512-J0ELF4yHFxHy0cmSxZuheDOz2luOdVvqjwmEcj8H/L1JHeuEDSDbeRP+Dk9kFVk5RTFzbucJ2Kb9F7ixY2QaCg==
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.2.tgz#ffb381442503235ad245ea89e45b3dbff040ee5a"
+  integrity sha512-0JV5+SOCQkIdzjBK9buARcV804Ddu7A0Qet6sHi3FimE9ne6m4BGQZfRn+NZiXbBk4F4XmHfDZIipLj9pX8dSA==
 
 is-binary-path@^1.0.0:
   version "1.0.1"
@@ -5342,19 +4776,12 @@ is-binary-path@^1.0.0:
   dependencies:
     binary-extensions "^1.0.0"
 
-is-binary-path@~2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-2.1.0.tgz#ea1f7f3b80f064236e83470f86c09c254fb45b09"
-  integrity sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==
-  dependencies:
-    binary-extensions "^2.0.0"
-
 is-boolean-object@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.0.tgz#e2aaad3a3a8fca34c28f6eee135b156ed2587ff0"
-  integrity sha512-a7Uprx8UtD+HWdyYwnD1+ExtTgqQtD2k/1yJgtXP6wnMm8byhkoTZRl+95LLThpzNZJ5aEvi46cdH+ayMFRwmA==
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.1.tgz#3c0878f035cb821228d350d2e1e36719716a3de8"
+  integrity sha512-bXdQWkECBUIAcCkeH1unwJLIpZYaa5VvuygSyS/c2lf719mTKZDU5UdDRlpd01UjADgmW8RfqaP+mRaVPdr/Ng==
   dependencies:
-    call-bind "^1.0.0"
+    call-bind "^1.0.2"
 
 is-buffer@^1.1.5:
   version "1.1.6"
@@ -5373,10 +4800,17 @@ is-ci@^2.0.0:
   dependencies:
     ci-info "^2.0.0"
 
+is-ci@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-3.0.0.tgz#c7e7be3c9d8eef7d0fa144390bd1e4b88dc4c994"
+  integrity sha512-kDXyttuLeslKAHYL/K28F2YkM3x5jvFPEw3yXbRptXydjD9rpLEz+C5K5iutY9ZiUu6AP41JdvRQwF4Iqs4ZCQ==
+  dependencies:
+    ci-info "^3.1.1"
+
 is-core-module@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.2.0.tgz#97037ef3d52224d85163f5597b2b63d9afed981a"
-  integrity sha512-XRAfAdyyY5F5cOXn7hYQDqh2Xmii+DEfIcQGxK/uNwMHhIkPWO0g8msXcbzLe+MpGoR951MlqM/2iIlU4vKDdQ==
+  version "2.5.0"
+  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.5.0.tgz#f754843617c70bfd29b7bd87327400cda5c18491"
+  integrity sha512-TXCMSDsEHMEEZ6eCA8rwRDbLu55MRGmrctljsBX/2v1d9/GzqHOxW5c5oPSgrUt2vBFXebu9rGqckXGPWOlYpg==
   dependencies:
     has "^1.0.3"
 
@@ -5395,9 +4829,9 @@ is-data-descriptor@^1.0.0:
     kind-of "^6.0.0"
 
 is-date-object@^1.0.1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.2.tgz#bda736f2cd8fd06d32844e7743bfa7494c3bfd7e"
-  integrity sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g==
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.4.tgz#550cfcc03afada05eea3dd30981c7b09551f73e5"
+  integrity sha512-/b4ZVsG7Z5XVtIxs/h9W8nvfLgSAyKYdtGWQLbqy6jA1icmgjf8WCoTKgeS4wy5tYaPePouzFMANbnj94c2Z+A==
 
 is-descriptor@^0.1.0:
   version "0.1.6"
@@ -5417,16 +4851,6 @@ is-descriptor@^1.0.0, is-descriptor@^1.0.2:
     is-data-descriptor "^1.0.0"
     kind-of "^6.0.2"
 
-is-directory@^0.3.1:
-  version "0.3.1"
-  resolved "https://registry.yarnpkg.com/is-directory/-/is-directory-0.3.1.tgz#61339b6f2475fc772fd9c9d83f5c8575dc154ae1"
-  integrity sha1-YTObbyR1/Hcv2cnYP1yFddwVSuE=
-
-is-docker@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.2.0.tgz#b037c8815281edaad6c2562648a5f5f18839d5f7"
-  integrity sha512-K4GwB4i/HzhAzwP/XSlspzRdFTI9N8OxJOyOU7Y5Rz+p+WBokXWVWblaJeBkggthmoSV0OoGTH5thJNvplpkvQ==
-
 is-extendable@^0.1.0, is-extendable@^0.1.1:
   version "0.1.1"
   resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89"
@@ -5444,11 +4868,6 @@ is-extglob@^2.1.0, is-extglob@^2.1.1:
   resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2"
   integrity sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=
 
-is-finite@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-finite/-/is-finite-1.1.0.tgz#904135c77fb42c0641d6aa1bcdbc4daa8da082f3"
-  integrity sha512-cdyMtqX/BOqqNBBiKlIVkytNHm49MtMlYyn1zxzvJKWmFMlGzm+ry5BBfYyeY9YmNKbRSo/o7OX9w9ale0wg3w==
-
 is-fullwidth-code-point@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz#ef9e31386f031a7f0d643af82fde50c457ef00cb"
@@ -5478,13 +4897,18 @@ is-glob@^3.0.0, is-glob@^3.1.0:
   dependencies:
     is-extglob "^2.1.0"
 
-is-glob@^4.0.0, is-glob@^4.0.1, is-glob@~4.0.1:
+is-glob@^4.0.0, is-glob@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.1.tgz#7567dbe9f2f5e2467bc77ab83c4a29482407a5dc"
   integrity sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==
   dependencies:
     is-extglob "^2.1.1"
 
+is-lambda@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/is-lambda/-/is-lambda-1.0.1.tgz#3d9877899e6a53efc0160504cde15f82e6f061d5"
+  integrity sha1-PZh3iZ5qU+/AFgUEzeFfgubwYdU=
+
 is-negated-glob@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/is-negated-glob/-/is-negated-glob-1.0.0.tgz#6910bca5da8c95e784b5751b976cf5a10fee36d2"
@@ -5496,9 +4920,9 @@ is-negative-zero@^2.0.1:
   integrity sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==
 
 is-number-object@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.4.tgz#36ac95e741cf18b283fc1ddf5e83da798e3ec197"
-  integrity sha512-zohwelOAur+5uXtk8O3GPQ1eAcu4ZX3UwxQhUlfFFMNpUd83gXgjbhJh6HmB6LUNV/ieOLQuDwJO3dWJosUeMw==
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.5.tgz#6edfaeed7950cff19afedce9fbfca9ee6dd289eb"
+  integrity sha512-RU0lI/n95pMoUKu9v1BZP5MBcZuNSVJkMkAG2dJqC4z2GlkGUNeH68SuHuBKBD/XFe+LHZ+f9BKkLET60Niedw==
 
 is-number@^3.0.0:
   version "3.0.0"
@@ -5517,11 +4941,6 @@ is-number@^7.0.0:
   resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b"
   integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
 
-is-obj@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-1.0.1.tgz#3e4729ac1f5fde025cd7d83a896dab9f4f67db0f"
-  integrity sha1-PkcprB9f3gJc19g6iW2rn09n2w8=
-
 is-obj@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-2.0.0.tgz#473fb05d973705e3fd9620545018ca8e22ef4982"
@@ -5532,7 +4951,7 @@ is-path-cwd@^2.2.0:
   resolved "https://registry.yarnpkg.com/is-path-cwd/-/is-path-cwd-2.2.0.tgz#67d43b82664a7b5191fd9119127eb300048a9fdb"
   integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ==
 
-is-path-inside@^3.0.1:
+is-path-inside@^3.0.2:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283"
   integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==
@@ -5542,6 +4961,11 @@ is-plain-obj@^1.0.0, is-plain-obj@^1.1.0:
   resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e"
   integrity sha1-caUMhCnfync8kqOQpKA7OfzVHT4=
 
+is-plain-obj@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287"
+  integrity sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==
+
 is-plain-object@^2.0.1, is-plain-object@^2.0.3, is-plain-object@^2.0.4:
   version "2.0.4"
   resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677"
@@ -5554,7 +4978,7 @@ is-plain-object@^5.0.0:
   resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344"
   integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==
 
-is-potential-custom-element-name@^1.0.0:
+is-potential-custom-element-name@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz#171ed6f19e3ac554394edf78caa05784a45bebb5"
   integrity sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==
@@ -5564,13 +4988,13 @@ is-promise@^2.2.2:
   resolved "https://registry.yarnpkg.com/is-promise/-/is-promise-2.2.2.tgz#39ab959ccbf9a774cf079f7b40c7a26f763135f1"
   integrity sha512-+lP4/6lKUBfQjZ2pdxThZvLUAafmZb8OAxFb8XXtiQmS35INgr85hdOGoEs124ez1FCnZJt6jau/T+alh58QFQ==
 
-is-regex@^1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.2.tgz#81c8ebde4db142f2cf1c53fc86d6a45788266251"
-  integrity sha512-axvdhb5pdhEVThqJzYXwMlVuZwC+FF2DpcOhTS+y/8jVq4trxyPgfcwIxIKiyeuLlSQYKkmUaPQJ8ZE4yNKXDg==
+is-regex@^1.1.3:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.3.tgz#d029f9aff6448b93ebbe3f33dac71511fdcbef9f"
+  integrity sha512-qSVXFz28HM7y+IWX6vLCsexdlvzT1PJNFSBuaQLQ5o0IEw8UDYW6/2+eCMVyIsbM8CNLX2a/QWmSpyxYEHY7CQ==
   dependencies:
     call-bind "^1.0.2"
-    has-symbols "^1.0.1"
+    has-symbols "^1.0.2"
 
 is-relative@^1.0.0:
   version "1.0.0"
@@ -5580,33 +5004,28 @@ is-relative@^1.0.0:
     is-unc-path "^1.0.0"
 
 is-ssh@^1.3.0:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/is-ssh/-/is-ssh-1.3.2.tgz#a4b82ab63d73976fd8263cceee27f99a88bdae2b"
-  integrity sha512-elEw0/0c2UscLrNG+OAorbP539E3rhliKPg+hDMWN9VwrDXfYK+4PBEykDPfxlYYtQvl84TascnQyobfQLHEhQ==
+  version "1.3.3"
+  resolved "https://registry.yarnpkg.com/is-ssh/-/is-ssh-1.3.3.tgz#7f133285ccd7f2c2c7fc897b771b53d95a2b2c7e"
+  integrity sha512-NKzJmQzJfEEma3w5cJNcUMxoXfDjz0Zj0eyCalHn2E6VOwlzjZo0yuO2fcBSf8zhFuVCL/82/r5gRcoi6aEPVQ==
   dependencies:
     protocols "^1.1.0"
 
-is-stream@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-1.1.0.tgz#12d4a3dd4e68e0b79ceb8dbc84173ae80d91ca44"
-  integrity sha1-EtSj3U5o4Lec6428hBc66A2RykQ=
-
 is-stream@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.0.tgz#bde9c32680d6fae04129d6ac9d921ce7815f78e3"
   integrity sha512-XCoy+WlUr7d1+Z8GgSuXmpuUFC9fOhRXglJMx+dwLKTkL44Cjd4W1Z5P+BQZpr+cR93aGP4S/s7Ftw6Nd/kiEw==
 
-is-string@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.5.tgz#40493ed198ef3ff477b8c7f92f644ec82a5cd3a6"
-  integrity sha512-buY6VNRjhQMiF1qWDouloZlQbRhDPCebwxSjxMjxgemYT46YMd2NR0/H+fBhEfWX4A/w9TBJ+ol+okqJKFE6vQ==
+is-string@^1.0.5, is-string@^1.0.6:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.6.tgz#3fe5d5992fb0d93404f32584d4b0179a71b54a5f"
+  integrity sha512-2gdzbKUuqtQ3lYNrUTQYoClPhm7oQu4UdpSZMp1/DGgkHBT8E2Z1l0yMdb6D4zNAxwDiMv8MdulKROJGNl0Q0w==
 
 is-symbol@^1.0.2, is-symbol@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.3.tgz#38e1014b9e6329be0de9d24a414fd7441ec61937"
-  integrity sha512-OwijhaRSgqvhm/0ZdAcXNZt9lYdKFpcRDT5ULUuYXPoT794UNOdU+gpT6Rzo7b4V2HUl/op6GqY894AZwv9faQ==
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.4.tgz#a6dac93b635b063ca6872236de88910a57af139c"
+  integrity sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==
   dependencies:
-    has-symbols "^1.0.1"
+    has-symbols "^1.0.2"
 
 is-text-path@^1.0.1:
   version "1.0.1"
@@ -5637,24 +5056,12 @@ is-valid-glob@^1.0.0:
   resolved "https://registry.yarnpkg.com/is-valid-glob/-/is-valid-glob-1.0.0.tgz#29bf3eff701be2d4d315dbacc39bc39fe8f601aa"
   integrity sha1-Kb8+/3Ab4tTTFdusw5vDn+j2Aao=
 
-is-windows@^1.0.0, is-windows@^1.0.1, is-windows@^1.0.2:
+is-windows@^1.0.1, is-windows@^1.0.2:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d"
   integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==
 
-is-wsl@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-1.1.0.tgz#1f16e4aa22b04d1336b66188a66af3c600c3a66d"
-  integrity sha1-HxbkqiKwTRM2tmGIpmrzxgDDpm0=
-
-is-wsl@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271"
-  integrity sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==
-  dependencies:
-    is-docker "^2.0.0"
-
-isarray@1.0.0, isarray@^1.0.0, isarray@~1.0.0:
+isarray@1.0.0, isarray@~1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
   integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=
@@ -5722,77 +5129,94 @@ istanbul-reports@^3.0.2:
     html-escaper "^2.0.0"
     istanbul-lib-report "^3.0.0"
 
-ix@2.5.3:
-  version "2.5.3"
-  resolved "https://registry.yarnpkg.com/ix/-/ix-2.5.3.tgz#b62899723297b2805343d1c77756b8568ed8dbec"
-  integrity sha512-Syz3oYUx5+P0T9F3BZOE2DzvWYF31Mbha1VqDbJsr4e4A+twhxTAf2c1BO8TMlwon12oAvvxAtQ+NJm9P8ISYQ==
+ix@4.4.1:
+  version "4.4.1"
+  resolved "https://registry.yarnpkg.com/ix/-/ix-4.4.1.tgz#8ec5f4f420c504a9906ffc2e2234f50147b9488a"
+  integrity sha512-Jsl7cUf7CA1MkznzAuVy4K6V1Zsfx+EAh0ZgiGhGAADaEGKiMV+sJx8Qe4hx0CsyI475Yt3ppoRS8M8oOueqlA==
   dependencies:
-    "@types/node" "^11.11.6"
-    tslib "^1.9.3"
+    "@types/node" "^13.7.4"
+    tslib "^2.3.0"
 
-jest-changed-files@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-26.6.2.tgz#f6198479e1cc66f22f9ae1e22acaa0b429c042d0"
-  integrity sha512-fDS7szLcY9sCtIip8Fjry9oGf3I2ht/QT21bAHm5Dmf0mD4X3ReNUf17y+bO6fR8WgbIZTlbyG1ak/53cbRzKQ==
+jest-changed-files@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-27.0.6.tgz#bed6183fcdea8a285482e3b50a9a7712d49a7a8b"
+  integrity sha512-BuL/ZDauaq5dumYh5y20sn4IISnf1P9A0TDswTxUi84ORGtVa86ApuBHqICL0vepqAnZiY6a7xeSPWv2/yy4eA==
   dependencies:
-    "@jest/types" "^26.6.2"
-    execa "^4.0.0"
-    throat "^5.0.0"
+    "@jest/types" "^27.0.6"
+    execa "^5.0.0"
+    throat "^6.0.1"
 
-jest-cli@^26.3.0:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-26.6.3.tgz#43117cfef24bc4cd691a174a8796a532e135e92a"
-  integrity sha512-GF9noBSa9t08pSyl3CY4frMrqp+aQXFGFkf5hEPbh/pIUFYWMK6ZLTfbmadxJVcJrdRoChlWQsA2VkJcDFK8hg==
+jest-circus@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-27.0.6.tgz#dd4df17c4697db6a2c232aaad4e9cec666926668"
+  integrity sha512-OJlsz6BBeX9qR+7O9lXefWoc2m9ZqcZ5Ohlzz0pTEAG4xMiZUJoacY8f4YDHxgk0oKYxj277AfOk9w6hZYvi1Q==
   dependencies:
-    "@jest/core" "^26.6.3"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/environment" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    co "^4.6.0"
+    dedent "^0.7.0"
+    expect "^27.0.6"
+    is-generator-fn "^2.0.0"
+    jest-each "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    pretty-format "^27.0.6"
+    slash "^3.0.0"
+    stack-utils "^2.0.3"
+    throat "^6.0.1"
+
+jest-cli@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-27.0.6.tgz#d021e5f4d86d6a212450d4c7b86cb219f1e6864f"
+  integrity sha512-qUUVlGb9fdKir3RDE+B10ULI+LQrz+MCflEH2UJyoUjoHHCbxDrMxSzjQAPUMsic4SncI62ofYCcAvW6+6rhhg==
+  dependencies:
+    "@jest/core" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
     chalk "^4.0.0"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
     import-local "^3.0.2"
-    is-ci "^2.0.0"
-    jest-config "^26.6.3"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
+    jest-config "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
     prompts "^2.0.1"
-    yargs "^15.4.1"
+    yargs "^16.0.3"
 
-jest-config@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-26.6.3.tgz#64f41444eef9eb03dc51d5c53b75c8c71f645349"
-  integrity sha512-t5qdIj/bCj2j7NFVHb2nFB4aUdfucDn3JRKgrZnplb8nieAirAzRSHP8uDEd+qV6ygzg9Pz4YG7UTJf94LPSyg==
+jest-config@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-27.0.6.tgz#119fb10f149ba63d9c50621baa4f1f179500277f"
+  integrity sha512-JZRR3I1Plr2YxPBhgqRspDE2S5zprbga3swYNrvY3HfQGu7p/GjyLOqwrYad97tX3U3mzT53TPHVmozacfP/3w==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/test-sequencer" "^26.6.3"
-    "@jest/types" "^26.6.2"
-    babel-jest "^26.6.3"
+    "@jest/test-sequencer" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    babel-jest "^27.0.6"
     chalk "^4.0.0"
     deepmerge "^4.2.2"
     glob "^7.1.1"
     graceful-fs "^4.2.4"
-    jest-environment-jsdom "^26.6.2"
-    jest-environment-node "^26.6.2"
-    jest-get-type "^26.3.0"
-    jest-jasmine2 "^26.6.3"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    micromatch "^4.0.2"
-    pretty-format "^26.6.2"
-
-jest-diff@^25.2.1:
-  version "25.5.0"
-  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-25.5.0.tgz#1dd26ed64f96667c068cef026b677dfa01afcfa9"
-  integrity sha512-z1kygetuPiREYdNIumRpAHY6RXiGmp70YHptjdaxTWGmA085W3iCnXNx0DhflK3vwrKmrRWyY1wUpkPMVxMK7A==
-  dependencies:
-    chalk "^3.0.0"
-    diff-sequences "^25.2.6"
-    jest-get-type "^25.2.6"
-    pretty-format "^25.5.0"
-
-jest-diff@^26.0.0, jest-diff@^26.6.2:
+    is-ci "^3.0.0"
+    jest-circus "^27.0.6"
+    jest-environment-jsdom "^27.0.6"
+    jest-environment-node "^27.0.6"
+    jest-get-type "^27.0.6"
+    jest-jasmine2 "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-runner "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
+    micromatch "^4.0.4"
+    pretty-format "^27.0.6"
+
+jest-diff@^26.0.0:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-26.6.2.tgz#1aa7468b52c3a68d7d5c5fdcdfcd5e49bd164394"
   integrity sha512-6m+9Z3Gv9wN0WFVasqjCL/06+EFCMTqDEUl/b87HYK2rAPTyfz4ZIuSlPhY51PIQRWx5TaxeF1qmXKe9gfN3sA==
@@ -5802,164 +5226,152 @@ jest-diff@^26.0.0, jest-diff@^26.6.2:
     jest-get-type "^26.3.0"
     pretty-format "^26.6.2"
 
-jest-docblock@^26.0.0:
-  version "26.0.0"
-  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-26.0.0.tgz#3e2fa20899fc928cb13bd0ff68bd3711a36889b5"
-  integrity sha512-RDZ4Iz3QbtRWycd8bUEPxQsTlYazfYn/h5R65Fc6gOfwozFhoImx+affzky/FFBuqISPTqjXomoIGJVKBWoo0w==
-  dependencies:
-    detect-newline "^3.0.0"
-
-jest-each@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-26.6.2.tgz#02526438a77a67401c8a6382dfe5999952c167cb"
-  integrity sha512-Mer/f0KaATbjl8MCJ+0GEpNdqmnVmDYqCTJYTvoo7rqmRiDllmp2AYN+06F93nXcY3ur9ShIjS+CO/uD+BbH4A==
+jest-diff@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.0.6.tgz#4a7a19ee6f04ad70e0e3388f35829394a44c7b5e"
+  integrity sha512-Z1mqgkTCSYaFgwTlP/NUiRzdqgxmmhzHY1Tq17zL94morOHfHu3K4bgSgl+CR4GLhpV8VxkuOYuIWnQ9LnFqmg==
   dependencies:
-    "@jest/types" "^26.6.2"
     chalk "^4.0.0"
-    jest-get-type "^26.3.0"
-    jest-util "^26.6.2"
-    pretty-format "^26.6.2"
+    diff-sequences "^27.0.6"
+    jest-get-type "^27.0.6"
+    pretty-format "^27.0.6"
 
-jest-environment-jsdom@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-26.6.2.tgz#78d09fe9cf019a357009b9b7e1f101d23bd1da3e"
-  integrity sha512-jgPqCruTlt3Kwqg5/WVFyHIOJHsiAvhcp2qiR2QQstuG9yWox5+iHpU3ZrcBxW14T4fe5Z68jAfLRh7joCSP2Q==
+jest-docblock@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-27.0.6.tgz#cc78266acf7fe693ca462cbbda0ea4e639e4e5f3"
+  integrity sha512-Fid6dPcjwepTFraz0YxIMCi7dejjJ/KL9FBjPYhBp4Sv1Y9PdhImlKZqYU555BlN4TQKaTc+F2Av1z+anVyGkA==
   dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
-    jsdom "^16.4.0"
+    detect-newline "^3.0.0"
 
-jest-environment-node@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-26.6.2.tgz#824e4c7fb4944646356f11ac75b229b0035f2b0c"
-  integrity sha512-zhtMio3Exty18dy8ee8eJ9kjnRyZC1N4C1Nt/VShN1apyXc8rWGtJ9lI7vqiWcyyXS4BVSEn9lxAM2D+07/Tag==
+jest-each@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-27.0.6.tgz#cee117071b04060158dc8d9a66dc50ad40ef453b"
+  integrity sha512-m6yKcV3bkSWrUIjxkE9OC0mhBZZdhovIW5ergBYirqnkLXkyEn3oUUF/QZgyecA1cF1QFyTE8bRRl8Tfg1pfLA==
   dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.6"
+    chalk "^4.0.0"
+    jest-get-type "^27.0.6"
+    jest-util "^27.0.6"
+    pretty-format "^27.0.6"
+
+jest-environment-jsdom@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-27.0.6.tgz#f66426c4c9950807d0a9f209c590ce544f73291f"
+  integrity sha512-FvetXg7lnXL9+78H+xUAsra3IeZRTiegA3An01cWeXBspKXUhAwMM9ycIJ4yBaR0L7HkoMPaZsozCLHh4T8fuw==
+  dependencies:
+    "@jest/environment" "^27.0.6"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
-
-jest-get-type@^25.2.6:
-  version "25.2.6"
-  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-25.2.6.tgz#0b0a32fab8908b44d508be81681487dbabb8d877"
-  integrity sha512-DxjtyzOHjObRM+sM1knti6or+eOgcGU4xVSb2HNP1TqO4ahsT+rqZg+nyqHWJSvWgKC5cG3QjGFBqxLghiF/Ig==
+    jest-mock "^27.0.6"
+    jest-util "^27.0.6"
+    jsdom "^16.6.0"
+
+jest-environment-node@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-27.0.6.tgz#a6699b7ceb52e8d68138b9808b0c404e505f3e07"
+  integrity sha512-+Vi6yLrPg/qC81jfXx3IBlVnDTI6kmRr08iVa2hFCWmJt4zha0XW7ucQltCAPhSR0FEKEoJ3i+W4E6T0s9is0w==
+  dependencies:
+    "@jest/environment" "^27.0.6"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    "@types/node" "*"
+    jest-mock "^27.0.6"
+    jest-util "^27.0.6"
 
 jest-get-type@^26.3.0:
   version "26.3.0"
   resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-26.3.0.tgz#e97dc3c3f53c2b406ca7afaed4493b1d099199e0"
   integrity sha512-TpfaviN1R2pQWkIihlfEanwOXK0zcxrKEE4MlU6Tn7keoXdN6/3gK/xl0yEh8DOunn5pOVGKf8hB4R9gVh04ig==
 
-jest-haste-map@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-26.6.2.tgz#dd7e60fe7dc0e9f911a23d79c5ff7fb5c2cafeaa"
-  integrity sha512-easWIJXIw71B2RdR8kgqpjQrbMRWQBgiBwXYEhtGUTaX+doCjBheluShdDMeR8IMfJiTqH4+zfhtg29apJf/8w==
+jest-get-type@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-27.0.6.tgz#0eb5c7f755854279ce9b68a9f1a4122f69047cfe"
+  integrity sha512-XTkK5exIeUbbveehcSR8w0bhH+c0yloW/Wpl+9vZrjzztCPWrxhHwkIFpZzCt71oRBsgxmuUfxEqOYoZI2macg==
+
+jest-haste-map@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.0.6.tgz#4683a4e68f6ecaa74231679dca237279562c8dc7"
+  integrity sha512-4ldjPXX9h8doB2JlRzg9oAZ2p6/GpQUNAeiYXqcpmrKbP0Qev0wdZlxSMOmz8mPOEnt4h6qIzXFLDi8RScX/1w==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.6"
     "@types/graceful-fs" "^4.1.2"
     "@types/node" "*"
     anymatch "^3.0.3"
     fb-watchman "^2.0.0"
     graceful-fs "^4.2.4"
-    jest-regex-util "^26.0.0"
-    jest-serializer "^26.6.2"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
-    micromatch "^4.0.2"
-    sane "^4.0.3"
+    jest-regex-util "^27.0.6"
+    jest-serializer "^27.0.6"
+    jest-util "^27.0.6"
+    jest-worker "^27.0.6"
+    micromatch "^4.0.4"
     walker "^1.0.7"
   optionalDependencies:
-    fsevents "^2.1.2"
+    fsevents "^2.3.2"
 
-jest-jasmine2@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-26.6.3.tgz#adc3cf915deacb5212c93b9f3547cd12958f2edd"
-  integrity sha512-kPKUrQtc8aYwBV7CqBg5pu+tmYXlvFlSFYn18ev4gPFtrRzB15N2gW/Roew3187q2w2eHuu0MU9TJz6w0/nPEg==
+jest-jasmine2@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-27.0.6.tgz#fd509a9ed3d92bd6edb68a779f4738b100655b37"
+  integrity sha512-cjpH2sBy+t6dvCeKBsHpW41mjHzXgsavaFMp+VWRf0eR4EW8xASk1acqmljFtK2DgyIECMv2yCdY41r2l1+4iA==
   dependencies:
     "@babel/traverse" "^7.1.0"
-    "@jest/environment" "^26.6.2"
-    "@jest/source-map" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/environment" "^27.0.6"
+    "@jest/source-map" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     chalk "^4.0.0"
     co "^4.6.0"
-    expect "^26.6.2"
+    expect "^27.0.6"
     is-generator-fn "^2.0.0"
-    jest-each "^26.6.2"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-runtime "^26.6.3"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    pretty-format "^26.6.2"
-    throat "^5.0.0"
-
-jest-leak-detector@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-26.6.2.tgz#7717cf118b92238f2eba65054c8a0c9c653a91af"
-  integrity sha512-i4xlXpsVSMeKvg2cEKdfhh0H39qlJlP5Ex1yQxwF9ubahboQYMgTtz5oML35AVA3B4Eu+YsmwaiKVev9KCvLxg==
-  dependencies:
-    jest-get-type "^26.3.0"
-    pretty-format "^26.6.2"
-
-jest-matcher-utils@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-26.6.2.tgz#8e6fd6e863c8b2d31ac6472eeb237bc595e53e7a"
-  integrity sha512-llnc8vQgYcNqDrqRDXWwMr9i7rS5XFiCwvh6DTP7Jqa2mqpcCBBlpCbn+trkG0KNhPu/h8rzyBkriOtBstvWhw==
-  dependencies:
-    chalk "^4.0.0"
-    jest-diff "^26.6.2"
-    jest-get-type "^26.3.0"
-    pretty-format "^26.6.2"
-
-jest-message-util@^24.9.0:
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-24.9.0.tgz#527f54a1e380f5e202a8d1149b0ec872f43119e3"
-  integrity sha512-oCj8FiZ3U0hTP4aSui87P4L4jC37BtQwUMqk+zk/b11FR19BJDeZsZAvIHutWnmtw7r85UmR3CEWZ0HWU2mAlw==
+    jest-each "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    pretty-format "^27.0.6"
+    throat "^6.0.1"
+
+jest-leak-detector@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-27.0.6.tgz#545854275f85450d4ef4b8fe305ca2a26450450f"
+  integrity sha512-2/d6n2wlH5zEcdctX4zdbgX8oM61tb67PQt4Xh8JFAIy6LRKUnX528HulkaG6nD5qDl5vRV1NXejCe1XRCH5gQ==
+  dependencies:
+    jest-get-type "^27.0.6"
+    pretty-format "^27.0.6"
+
+jest-matcher-utils@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.0.6.tgz#2a8da1e86c620b39459f4352eaa255f0d43e39a9"
+  integrity sha512-OFgF2VCQx9vdPSYTHWJ9MzFCehs20TsyFi6bIHbk5V1u52zJOnvF0Y/65z3GLZHKRuTgVPY4Z6LVePNahaQ+tA==
   dependencies:
-    "@babel/code-frame" "^7.0.0"
-    "@jest/test-result" "^24.9.0"
-    "@jest/types" "^24.9.0"
-    "@types/stack-utils" "^1.0.1"
-    chalk "^2.0.1"
-    micromatch "^3.1.10"
-    slash "^2.0.0"
-    stack-utils "^1.0.1"
+    chalk "^4.0.0"
+    jest-diff "^27.0.6"
+    jest-get-type "^27.0.6"
+    pretty-format "^27.0.6"
 
-jest-message-util@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-26.6.2.tgz#58173744ad6fc0506b5d21150b9be56ef001ca07"
-  integrity sha512-rGiLePzQ3AzwUshu2+Rn+UMFk0pHN58sOG+IaJbk5Jxuqo3NYO1U2/MIR4S1sKgsoYSXSzdtSa0TgrmtUwEbmA==
+jest-message-util@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-27.0.6.tgz#158bcdf4785706492d164a39abca6a14da5ab8b5"
+  integrity sha512-rBxIs2XK7rGy+zGxgi+UJKP6WqQ+KrBbD1YMj517HYN3v2BG66t3Xan3FWqYHKZwjdB700KiAJ+iES9a0M+ixw==
   dependencies:
-    "@babel/code-frame" "^7.0.0"
-    "@jest/types" "^26.6.2"
+    "@babel/code-frame" "^7.12.13"
+    "@jest/types" "^27.0.6"
     "@types/stack-utils" "^2.0.0"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
-    micromatch "^4.0.2"
-    pretty-format "^26.6.2"
+    micromatch "^4.0.4"
+    pretty-format "^27.0.6"
     slash "^3.0.0"
-    stack-utils "^2.0.2"
-
-jest-mock@^24.9.0:
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-24.9.0.tgz#c22835541ee379b908673ad51087a2185c13f1c6"
-  integrity sha512-3BEYN5WbSq9wd+SyLDES7AHnjH9A/ROBwmz7l2y+ol+NtSFO8DYiEBzoO1CeFc9a8DYy10EO4dDFVv/wN3zl1w==
-  dependencies:
-    "@jest/types" "^24.9.0"
+    stack-utils "^2.0.3"
 
-jest-mock@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-26.6.2.tgz#d6cb712b041ed47fe0d9b6fc3474bc6543feb302"
-  integrity sha512-YyFjePHHp1LzpzYcmgqkJ0nm0gg/lJx2aZFzFy1S6eUqNjXsOqTK10zNRff2dNfssgokjkG65OlWNcIlgd3zew==
+jest-mock@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-27.0.6.tgz#0efdd40851398307ba16778728f6d34d583e3467"
+  integrity sha512-lzBETUoK8cSxts2NYXSBWT+EJNzmUVtVVwS1sU9GwE1DLCfGsngg+ZVSIe0yd0ZSm+y791esiuo+WSwpXJQ5Bw==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
 
 jest-pnp-resolver@^1.2.2:
@@ -5967,132 +5379,142 @@ jest-pnp-resolver@^1.2.2:
   resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.2.tgz#b704ac0ae028a89108a4d040b3f919dfddc8e33c"
   integrity sha512-olV41bKSMm8BdnuMsewT4jqlZ8+3TCARAXjZGT9jcoSnrfUnRCqnMoF9XEeoWjbzObpqF9dRhHQj0Xb9QdF6/w==
 
-jest-regex-util@^26.0.0:
-  version "26.0.0"
-  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-26.0.0.tgz#d25e7184b36e39fd466c3bc41be0971e821fee28"
-  integrity sha512-Gv3ZIs/nA48/Zvjrl34bf+oD76JHiGDUxNOVgUjh3j890sblXryjY4rss71fPtD/njchl6PSE2hIhvyWa1eT0A==
+jest-regex-util@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-27.0.6.tgz#02e112082935ae949ce5d13b2675db3d8c87d9c5"
+  integrity sha512-SUhPzBsGa1IKm8hx2F4NfTGGp+r7BXJ4CulsZ1k2kI+mGLG+lxGrs76veN2LF/aUdGosJBzKgXmNCw+BzFqBDQ==
 
-jest-resolve-dependencies@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-26.6.3.tgz#6680859ee5d22ee5dcd961fe4871f59f4c784fb6"
-  integrity sha512-pVwUjJkxbhe4RY8QEWzN3vns2kqyuldKpxlxJlzEYfKSvY6/bMvxoFrYYzUO1Gx28yKWN37qyV7rIoIp2h8fTg==
+jest-resolve-dependencies@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-27.0.6.tgz#3e619e0ef391c3ecfcf6ef4056207a3d2be3269f"
+  integrity sha512-mg9x9DS3BPAREWKCAoyg3QucCr0n6S8HEEsqRCKSPjPcu9HzRILzhdzY3imsLoZWeosEbJZz6TKasveczzpJZA==
   dependencies:
-    "@jest/types" "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-snapshot "^26.6.2"
+    "@jest/types" "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-snapshot "^27.0.6"
 
-jest-resolve@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-26.6.2.tgz#a3ab1517217f469b504f1b56603c5bb541fbb507"
-  integrity sha512-sOxsZOq25mT1wRsfHcbtkInS+Ek7Q8jCHUB0ZUTP0tc/c41QHriU/NunqMfCUWsL4H3MHpvQD4QR9kSYhS7UvQ==
+jest-resolve@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-27.0.6.tgz#e90f436dd4f8fbf53f58a91c42344864f8e55bff"
+  integrity sha512-yKmIgw2LgTh7uAJtzv8UFHGF7Dm7XfvOe/LQ3Txv101fLM8cx2h1QVwtSJ51Q/SCxpIiKfVn6G2jYYMDNHZteA==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.6"
     chalk "^4.0.0"
+    escalade "^3.1.1"
     graceful-fs "^4.2.4"
     jest-pnp-resolver "^1.2.2"
-    jest-util "^26.6.2"
-    read-pkg-up "^7.0.1"
-    resolve "^1.18.1"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
+    resolve "^1.20.0"
     slash "^3.0.0"
 
-jest-runner@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-26.6.3.tgz#2d1fed3d46e10f233fd1dbd3bfaa3fe8924be159"
-  integrity sha512-atgKpRHnaA2OvByG/HpGA4g6CSPS/1LK0jK3gATJAoptC1ojltpmVlYC3TYgdmGp+GLuhzpH30Gvs36szSL2JQ==
+jest-runner@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-27.0.6.tgz#1325f45055539222bbc7256a6976e993ad2f9520"
+  integrity sha512-W3Bz5qAgaSChuivLn+nKOgjqNxM7O/9JOJoKDCqThPIg2sH/d4A/lzyiaFgnb9V1/w29Le11NpzTJSzga1vyYQ==
   dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/environment" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.6"
+    "@jest/environment" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     chalk "^4.0.0"
-    emittery "^0.7.1"
+    emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-config "^26.6.3"
-    jest-docblock "^26.0.0"
-    jest-haste-map "^26.6.2"
-    jest-leak-detector "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-resolve "^26.6.2"
-    jest-runtime "^26.6.3"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
+    jest-docblock "^27.0.6"
+    jest-environment-jsdom "^27.0.6"
+    jest-environment-node "^27.0.6"
+    jest-haste-map "^27.0.6"
+    jest-leak-detector "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-util "^27.0.6"
+    jest-worker "^27.0.6"
     source-map-support "^0.5.6"
-    throat "^5.0.0"
-
-jest-runtime@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-26.6.3.tgz#4f64efbcfac398331b74b4b3c82d27d401b8fa2b"
-  integrity sha512-lrzyR3N8sacTAMeonbqpnSka1dHNux2uk0qqDXVkMv2c/A3wYnvQ4EXuI013Y6+gSKSCxdaczvf4HF0mVXHRdw==
-  dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/globals" "^26.6.2"
-    "@jest/source-map" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/yargs" "^15.0.0"
+    throat "^6.0.1"
+
+jest-runtime@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-27.0.6.tgz#45877cfcd386afdd4f317def551fc369794c27c9"
+  integrity sha512-BhvHLRVfKibYyqqEFkybsznKwhrsu7AWx2F3y9G9L95VSIN3/ZZ9vBpm/XCS2bS+BWz3sSeNGLzI3TVQ0uL85Q==
+  dependencies:
+    "@jest/console" "^27.0.6"
+    "@jest/environment" "^27.0.6"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/globals" "^27.0.6"
+    "@jest/source-map" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    "@types/yargs" "^16.0.0"
     chalk "^4.0.0"
-    cjs-module-lexer "^0.6.0"
+    cjs-module-lexer "^1.0.0"
     collect-v8-coverage "^1.0.0"
     exit "^0.1.2"
     glob "^7.1.3"
     graceful-fs "^4.2.4"
-    jest-config "^26.6.3"
-    jest-haste-map "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-mock "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
+    jest-haste-map "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-mock "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
     slash "^3.0.0"
     strip-bom "^4.0.0"
-    yargs "^15.4.1"
+    yargs "^16.0.3"
 
-jest-serializer@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-26.6.2.tgz#d139aafd46957d3a448f3a6cdabe2919ba0742d1"
-  integrity sha512-S5wqyz0DXnNJPd/xfIzZ5Xnp1HrJWBczg8mMfMpN78OJ5eDxXyf+Ygld9wX1DnUWbIbhM1YDY95NjR4CBXkb2g==
+jest-serializer@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-27.0.6.tgz#93a6c74e0132b81a2d54623251c46c498bb5bec1"
+  integrity sha512-PtGdVK9EGC7dsaziskfqaAPib6wTViY3G8E5wz9tLVPhHyiDNTZn/xjZ4khAw+09QkoOVpn7vF5nPSN6dtBexA==
   dependencies:
     "@types/node" "*"
     graceful-fs "^4.2.4"
 
-jest-silent-reporter@0.1.2:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.1.2.tgz#9d797c0b509e1def16647a07daf25f014c50b333"
-  integrity sha512-w/qc9NvWqdX0vZv6TUG4EE15d72+JxQJYh+3hqq8cTi3BnfBOtwNtL3T6TwkZSy/sfc3REW5niz0eSBPTIvWnA==
-  dependencies:
-    chalk "^2.3.1"
-    jest-util "^24.0.0"
-
-jest-snapshot@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-26.6.2.tgz#f3b0af1acb223316850bd14e1beea9837fb39c84"
-  integrity sha512-OLhxz05EzUtsAmOMzuupt1lHYXCNib0ECyuZ/PZOx9TrZcC8vL0x+DUG3TL+GLX3yHG45e6YGjIm0XwDc3q3og==
+jest-silent-reporter@0.5.0:
+  version "0.5.0"
+  resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.5.0.tgz#5fd8ccd61665227e3bf19d908b7350719d06ff38"
+  integrity sha512-epdLt8Oj0a1AyRiR6F8zx/1SVT1Mi7VU3y4wB2uOBHs/ohIquC7v2eeja7UN54uRPyHInIKWdL+RdG228n5pJQ==
   dependencies:
+    chalk "^4.0.0"
+    jest-util "^26.0.0"
+
+jest-snapshot@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-27.0.6.tgz#f4e6b208bd2e92e888344d78f0f650bcff05a4bf"
+  integrity sha512-NTHaz8He+ATUagUgE7C/UtFcRoHqR2Gc+KDfhQIyx+VFgwbeEMjeP+ILpUTLosZn/ZtbNdCF5LkVnN/l+V751A==
+  dependencies:
+    "@babel/core" "^7.7.2"
+    "@babel/generator" "^7.7.2"
+    "@babel/parser" "^7.7.2"
+    "@babel/plugin-syntax-typescript" "^7.7.2"
+    "@babel/traverse" "^7.7.2"
     "@babel/types" "^7.0.0"
-    "@jest/types" "^26.6.2"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/babel__traverse" "^7.0.4"
-    "@types/prettier" "^2.0.0"
+    "@types/prettier" "^2.1.5"
+    babel-preset-current-node-syntax "^1.0.0"
     chalk "^4.0.0"
-    expect "^26.6.2"
+    expect "^27.0.6"
     graceful-fs "^4.2.4"
-    jest-diff "^26.6.2"
-    jest-get-type "^26.3.0"
-    jest-haste-map "^26.6.2"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-resolve "^26.6.2"
+    jest-diff "^27.0.6"
+    jest-get-type "^27.0.6"
+    jest-haste-map "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-util "^27.0.6"
     natural-compare "^1.4.0"
-    pretty-format "^26.6.2"
+    pretty-format "^27.0.6"
     semver "^7.3.2"
 
-jest-util@26.x, jest-util@^26.6.2:
+jest-util@^26.0.0:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.6.2.tgz#907535dbe4d5a6cb4c47ac9b926f6af29576cbc1"
   integrity sha512-MDW0fKfsn0OI7MS7Euz6h8HNDXVQ0gaM9uW6RjfDmd1DAFcaxX9OqIakHIqhbnmF08Cf2DLDG+ulq8YQQ0Lp0Q==
@@ -6104,73 +5526,67 @@ jest-util@26.x, jest-util@^26.6.2:
     is-ci "^2.0.0"
     micromatch "^4.0.2"
 
-jest-util@^24.0.0:
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-24.9.0.tgz#7396814e48536d2e85a37de3e4c431d7cb140162"
-  integrity sha512-x+cZU8VRmOJxbA1K5oDBdxQmdq0OIdADarLxk0Mq+3XS4jgvhG/oKGWcIDCtPG0HgjxOYvF+ilPJQsAyXfbNOg==
+jest-util@^27.0.0, jest-util@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.6.tgz#e8e04eec159de2f4d5f57f795df9cdc091e50297"
+  integrity sha512-1JjlaIh+C65H/F7D11GNkGDDZtDfMEM8EBXsvd+l/cxtgQ6QhxuloOaiayt89DxUvDarbVhqI98HhgrM1yliFQ==
   dependencies:
-    "@jest/console" "^24.9.0"
-    "@jest/fake-timers" "^24.9.0"
-    "@jest/source-map" "^24.9.0"
-    "@jest/test-result" "^24.9.0"
-    "@jest/types" "^24.9.0"
-    callsites "^3.0.0"
-    chalk "^2.0.1"
-    graceful-fs "^4.1.15"
-    is-ci "^2.0.0"
-    mkdirp "^0.5.1"
-    slash "^2.0.0"
-    source-map "^0.6.0"
+    "@jest/types" "^27.0.6"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    is-ci "^3.0.0"
+    picomatch "^2.2.3"
 
-jest-validate@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-26.6.2.tgz#23d380971587150467342911c3d7b4ac57ab20ec"
-  integrity sha512-NEYZ9Aeyj0i5rQqbq+tpIOom0YS1u2MVu6+euBsvpgIme+FOfRmoC4R5p0JiAUpaFvFy24xgrpMknarR/93XjQ==
+jest-validate@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-27.0.6.tgz#930a527c7a951927df269f43b2dc23262457e2a6"
+  integrity sha512-yhZZOaMH3Zg6DC83n60pLmdU1DQE46DW+KLozPiPbSbPhlXXaiUTDlhHQhHFpaqIFRrInko1FHXjTRpjWRuWfA==
   dependencies:
-    "@jest/types" "^26.6.2"
-    camelcase "^6.0.0"
+    "@jest/types" "^27.0.6"
+    camelcase "^6.2.0"
     chalk "^4.0.0"
-    jest-get-type "^26.3.0"
+    jest-get-type "^27.0.6"
     leven "^3.1.0"
-    pretty-format "^26.6.2"
+    pretty-format "^27.0.6"
 
-jest-watcher@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-26.6.2.tgz#a5b683b8f9d68dbcb1d7dae32172d2cca0592975"
-  integrity sha512-WKJob0P/Em2csiVthsI68p6aGKTIcsfjH9Gsx1f0A3Italz43e3ho0geSAVsmj09RWOELP1AZ/DXyJgOgDKxXQ==
+jest-watcher@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-27.0.6.tgz#89526f7f9edf1eac4e4be989bcb6dec6b8878d9c"
+  integrity sha512-/jIoKBhAP00/iMGnTwUBLgvxkn7vsOweDrOTSPzc7X9uOyUtJIDthQBTI1EXz90bdkrxorUZVhJwiB69gcHtYQ==
   dependencies:
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
-    jest-util "^26.6.2"
+    jest-util "^27.0.6"
     string-length "^4.0.1"
 
-jest-worker@^26.3.0, jest-worker@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-26.6.2.tgz#7f72cbc4d643c365e27b9fd775f9d0eaa9c7a8ed"
-  integrity sha512-KWYVV1c4i+jbMpaBC+U++4Va0cp8OisU185o73T1vo99hqi7w8tSJfUXYswwqqrjzwxa6KpRK54WhPvwf5w6PQ==
+jest-worker@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.0.6.tgz#a5fdb1e14ad34eb228cfe162d9f729cdbfa28aed"
+  integrity sha512-qupxcj/dRuA3xHPMUd40gr2EaAurFbkwzOh7wfPaeE9id7hyjURRQoqNfHifHK3XjJU6YJJUQKILGUnwGPEOCA==
   dependencies:
     "@types/node" "*"
     merge-stream "^2.0.0"
-    supports-color "^7.0.0"
+    supports-color "^8.0.0"
 
-jest@26.3.0:
-  version "26.3.0"
-  resolved "https://registry.yarnpkg.com/jest/-/jest-26.3.0.tgz#366e25827831e65743a324bc476de54f41f2e07b"
-  integrity sha512-LFCry7NS6bTa4BUGUHC+NvZ3B9WG7Jv8F+Lb96dAJFM23LMwSsL5RiJcw9S+nejsh8lS1VxHq+RSH4Xa9tujpA==
+jest@27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest/-/jest-27.0.6.tgz#10517b2a628f0409087fbf473db44777d7a04505"
+  integrity sha512-EjV8aETrsD0wHl7CKMibKwQNQc3gIRBXlTikBmmHUeVMKaPFxdcUIBfoDqTSXDoGJIivAYGqCWVlzCSaVjPQsA==
   dependencies:
-    "@jest/core" "^26.3.0"
+    "@jest/core" "^27.0.6"
     import-local "^3.0.2"
-    jest-cli "^26.3.0"
+    jest-cli "^27.0.6"
 
 js-tokens@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"
   integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==
 
-js-yaml@^3.11.0, js-yaml@^3.13.1:
+js-yaml@^3.13.1:
   version "3.14.1"
   resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537"
   integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==
@@ -6183,13 +5599,13 @@ jsbn@~0.1.0:
   resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
   integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM=
 
-jsdom@^16.4.0:
-  version "16.5.2"
-  resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-16.5.2.tgz#583fac89a0aea31dbf6237e7e4bedccd9beab472"
-  integrity sha512-JxNtPt9C1ut85boCbJmffaQ06NBnzkQY/MWO3YxPW8IWS38A26z+B1oBvA9LwKrytewdfymnhi4UNH3/RAgZrg==
+jsdom@^16.6.0:
+  version "16.6.0"
+  resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-16.6.0.tgz#f79b3786682065492a3da6a60a4695da983805ac"
+  integrity sha512-Ty1vmF4NHJkolaEmdjtxTfSfkdb8Ywarwf63f+F8/mDD1uLSSWDxDuMiZxiPhwunLrn9LOSVItWj4bLYsLN3Dg==
   dependencies:
     abab "^2.0.5"
-    acorn "^8.1.0"
+    acorn "^8.2.4"
     acorn-globals "^6.0.0"
     cssom "^0.4.4"
     cssstyle "^2.3.0"
@@ -6197,12 +5613,13 @@ jsdom@^16.4.0:
     decimal.js "^10.2.1"
     domexception "^2.0.1"
     escodegen "^2.0.0"
+    form-data "^3.0.0"
     html-encoding-sniffer "^2.0.1"
-    is-potential-custom-element-name "^1.0.0"
+    http-proxy-agent "^4.0.1"
+    https-proxy-agent "^5.0.0"
+    is-potential-custom-element-name "^1.0.1"
     nwsapi "^2.2.0"
     parse5 "6.0.1"
-    request "^2.88.2"
-    request-promise-native "^1.0.9"
     saxes "^5.0.1"
     symbol-tree "^3.2.4"
     tough-cookie "^4.0.0"
@@ -6212,7 +5629,7 @@ jsdom@^16.4.0:
     whatwg-encoding "^1.0.5"
     whatwg-mimetype "^2.3.0"
     whatwg-url "^8.5.0"
-    ws "^7.4.4"
+    ws "^7.4.5"
     xml-name-validator "^3.0.0"
 
 jsesc@^2.5.1:
@@ -6225,7 +5642,7 @@ json-bignum@^0.0.3:
   resolved "https://registry.yarnpkg.com/json-bignum/-/json-bignum-0.0.3.tgz#41163b50436c773d82424dbc20ed70db7604b8d7"
   integrity sha1-QRY7UENsdz2CQk28IO1w23YEuNc=
 
-json-parse-better-errors@^1.0.0, json-parse-better-errors@^1.0.1, json-parse-better-errors@^1.0.2:
+json-parse-better-errors@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9"
   integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==
@@ -6240,6 +5657,11 @@ json-schema-traverse@^0.4.1:
   resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660"
   integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==
 
+json-schema-traverse@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2"
+  integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==
+
 json-schema@0.2.3:
   version "0.2.3"
   resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.2.3.tgz#b480c892e59a2f05954ce727bd3f2a4e882f9e13"
@@ -6255,32 +5677,22 @@ json-stringify-safe@^5.0.1, json-stringify-safe@~5.0.1:
   resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
   integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=
 
-json5@2.x, json5@^2.1.0, json5@^2.1.2:
+json2csv@^5.0.4:
+  version "5.0.6"
+  resolved "https://registry.yarnpkg.com/json2csv/-/json2csv-5.0.6.tgz#590e0e1b9579e59baa53bda0c0d840f4d8009687"
+  integrity sha512-0/4Lv6IenJV0qj2oBdgPIAmFiKKnh8qh7bmLFJ+/ZZHLjSeiL3fKKGX3UryvKPbxFbhV+JcYo9KUC19GJ/Z/4A==
+  dependencies:
+    commander "^6.1.0"
+    jsonparse "^1.3.1"
+    lodash.get "^4.4.2"
+
+json5@2.x, json5@^2.1.2, json5@^2.2.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3"
   integrity sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==
   dependencies:
     minimist "^1.2.5"
 
-json5@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.1.tgz#779fb0018604fa854eacbf6252180d83543e3dbe"
-  integrity sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==
-  dependencies:
-    minimist "^1.2.0"
-
-json@9.0.6:
-  version "9.0.6"
-  resolved "https://registry.yarnpkg.com/json/-/json-9.0.6.tgz#7972c2a5a48a42678db2730c7c2c4ee6e4e24585"
-  integrity sha1-eXLCpaSKQmeNsnMMfCxO5uTiRYU=
-
-jsonfile@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb"
-  integrity sha1-h3Gq4HmbZAdrdmQPygWPnBDjPss=
-  optionalDependencies:
-    graceful-fs "^4.1.6"
-
 jsonfile@^6.0.1:
   version "6.1.0"
   resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae"
@@ -6290,7 +5702,7 @@ jsonfile@^6.0.1:
   optionalDependencies:
     graceful-fs "^4.1.6"
 
-jsonparse@^1.2.0:
+jsonparse@^1.2.0, jsonparse@^1.3.1:
   version "1.3.1"
   resolved "https://registry.yarnpkg.com/jsonparse/-/jsonparse-1.3.1.tgz#3f4dae4a91fac315f71062f8521cc239f1366280"
   integrity sha1-P02uSpH6wxX3EGL4UhzCOfE2YoA=
@@ -6344,6 +5756,11 @@ kleur@^3.0.3:
   resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e"
   integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==
 
+kleur@^4.1.3:
+  version "4.1.4"
+  resolved "https://registry.yarnpkg.com/kleur/-/kleur-4.1.4.tgz#8c202987d7e577766d039a8cd461934c01cda04d"
+  integrity sha512-8QADVssbrFjivHWQU7KkMgptGTl6WAcSdlbBPY4uNF+mWr6DGcKrvY2w4FQJoXch7+fKMjj0dRrL75vk3k23OA==
+
 last-run@^1.1.0:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/last-run/-/last-run-1.1.1.tgz#45b96942c17b1c79c772198259ba943bebf8ca5b"
@@ -6366,11 +5783,6 @@ lcid@^1.0.0:
   dependencies:
     invert-kv "^1.0.0"
 
-lcov-parse@^0.0.10:
-  version "0.0.10"
-  resolved "https://registry.yarnpkg.com/lcov-parse/-/lcov-parse-0.0.10.tgz#1b0b8ff9ac9c7889250582b70b71315d9da6d9a3"
-  integrity sha1-GwuP+ayceIklBYK3C3ExXZ2m2aM=
-
 lead@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/lead/-/lead-1.0.0.tgz#6f14f99a37be3a9dd784f5495690e5903466ee42"
@@ -6378,28 +5790,28 @@ lead@^1.0.0:
   dependencies:
     flush-write-stream "^1.0.2"
 
-lerna@3.22.1:
-  version "3.22.1"
-  resolved "https://registry.yarnpkg.com/lerna/-/lerna-3.22.1.tgz#82027ac3da9c627fd8bf02ccfeff806a98e65b62"
-  integrity sha512-vk1lfVRFm+UuEFA7wkLKeSF7Iz13W+N/vFd48aW2yuS7Kv0RbNm2/qcDPV863056LMfkRlsEe+QYOw3palj5Lg==
-  dependencies:
-    "@lerna/add" "3.21.0"
-    "@lerna/bootstrap" "3.21.0"
-    "@lerna/changed" "3.21.0"
-    "@lerna/clean" "3.21.0"
-    "@lerna/cli" "3.18.5"
-    "@lerna/create" "3.22.0"
-    "@lerna/diff" "3.21.0"
-    "@lerna/exec" "3.21.0"
-    "@lerna/import" "3.22.0"
-    "@lerna/info" "3.21.0"
-    "@lerna/init" "3.21.0"
-    "@lerna/link" "3.21.0"
-    "@lerna/list" "3.21.0"
-    "@lerna/publish" "3.22.1"
-    "@lerna/run" "3.21.0"
-    "@lerna/version" "3.22.1"
-    import-local "^2.0.0"
+lerna@4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/lerna/-/lerna-4.0.0.tgz#b139d685d50ea0ca1be87713a7c2f44a5b678e9e"
+  integrity sha512-DD/i1znurfOmNJb0OBw66NmNqiM8kF6uIrzrJ0wGE3VNdzeOhz9ziWLYiRaZDGGwgbcjOo6eIfcx9O5Qynz+kg==
+  dependencies:
+    "@lerna/add" "4.0.0"
+    "@lerna/bootstrap" "4.0.0"
+    "@lerna/changed" "4.0.0"
+    "@lerna/clean" "4.0.0"
+    "@lerna/cli" "4.0.0"
+    "@lerna/create" "4.0.0"
+    "@lerna/diff" "4.0.0"
+    "@lerna/exec" "4.0.0"
+    "@lerna/import" "4.0.0"
+    "@lerna/info" "4.0.0"
+    "@lerna/init" "4.0.0"
+    "@lerna/link" "4.0.0"
+    "@lerna/list" "4.0.0"
+    "@lerna/publish" "4.0.0"
+    "@lerna/run" "4.0.0"
+    "@lerna/version" "4.0.0"
+    import-local "^3.0.2"
     npmlog "^4.1.2"
 
 leven@^3.1.0:
@@ -6407,6 +5819,14 @@ leven@^3.1.0:
   resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2"
   integrity sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==
 
+levn@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/levn/-/levn-0.4.1.tgz#ae4562c007473b932a6200d403268dd2fffc6ade"
+  integrity sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==
+  dependencies:
+    prelude-ls "^1.2.1"
+    type-check "~0.4.0"
+
 levn@~0.3.0:
   version "0.3.0"
   resolved "https://registry.yarnpkg.com/levn/-/levn-0.3.0.tgz#3b09924edf9f083c0490fdd4c0bc4421e04764ee"
@@ -6415,6 +5835,27 @@ levn@~0.3.0:
     prelude-ls "~1.1.2"
     type-check "~0.3.2"
 
+libnpmaccess@^4.0.1:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/libnpmaccess/-/libnpmaccess-4.0.3.tgz#dfb0e5b0a53c315a2610d300e46b4ddeb66e7eec"
+  integrity sha512-sPeTSNImksm8O2b6/pf3ikv4N567ERYEpeKRPSmqlNt1dTZbvgpJIzg5vAhXHpw2ISBsELFRelk0jEahj1c6nQ==
+  dependencies:
+    aproba "^2.0.0"
+    minipass "^3.1.1"
+    npm-package-arg "^8.1.2"
+    npm-registry-fetch "^11.0.0"
+
+libnpmpublish@^4.0.0:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/libnpmpublish/-/libnpmpublish-4.0.2.tgz#be77e8bf5956131bcb45e3caa6b96a842dec0794"
+  integrity sha512-+AD7A2zbVeGRCFI2aO//oUmapCwy7GHqPXFJh3qpToSRNU+tXKJ2YFUgjt04LPPAf2dlEH95s6EhIHM1J7bmOw==
+  dependencies:
+    normalize-package-data "^3.0.2"
+    npm-package-arg "^8.1.2"
+    npm-registry-fetch "^11.0.0"
+    semver "^7.1.3"
+    ssri "^8.0.1"
+
 liftoff@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/liftoff/-/liftoff-3.1.0.tgz#c9ba6081f908670607ee79062d700df062c52ed3"
@@ -6455,30 +5896,15 @@ load-json-file@^4.0.0:
     pify "^3.0.0"
     strip-bom "^3.0.0"
 
-load-json-file@^5.3.0:
-  version "5.3.0"
-  resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-5.3.0.tgz#4d3c1e01fa1c03ea78a60ac7af932c9ce53403f3"
-  integrity sha512-cJGP40Jc/VXUsp8/OrnyKyTZ1y6v/dphm3bioS+RrKXjK2BB6wHUd6JptZEFDGgGahMT+InnZO5i1Ei9mpC8Bw==
+load-json-file@^6.2.0:
+  version "6.2.0"
+  resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-6.2.0.tgz#5c7770b42cafa97074ca2848707c61662f4251a1"
+  integrity sha512-gUD/epcRms75Cw8RT1pUdHugZYM5ce64ucs2GEISABwkRsOQr0q2wm/MV2TKThycIe5e0ytRweW2RZxclogCdQ==
   dependencies:
     graceful-fs "^4.1.15"
-    parse-json "^4.0.0"
-    pify "^4.0.1"
-    strip-bom "^3.0.0"
-    type-fest "^0.3.0"
-
-loader-runner@^2.3.0:
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-2.4.0.tgz#ed47066bfe534d7e84c4c7b9998c2a75607d9357"
-  integrity sha512-Jsmr89RcXGIwivFY21FcRrisYZfvLMTWx5kOLc+JTxtpBOG6xML0vzbc6SEQG2FO9/4Fc3wW4LVcB5DmGflaRw==
-
-loader-utils@^1.1.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.0.tgz#c579b5e34cb34b1a74edc6c1fb36bfa371d5a613"
-  integrity sha512-qH0WSMBtn/oHuwjy/NucEgbx5dbxxnxup9s4PVXJUDHZBQY+s0NWA9rJf53RBnQZxfch7euUui7hpoAPvALZdA==
-  dependencies:
-    big.js "^5.2.2"
-    emojis-list "^3.0.0"
-    json5 "^1.0.1"
+    parse-json "^5.0.0"
+    strip-bom "^4.0.0"
+    type-fest "^0.6.0"
 
 locate-path@^2.0.0:
   version "2.0.0"
@@ -6488,14 +5914,6 @@ locate-path@^2.0.0:
     p-locate "^2.0.0"
     path-exists "^3.0.0"
 
-locate-path@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-3.0.0.tgz#dbec3b3ab759758071b58fe59fc41871af21400e"
-  integrity sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==
-  dependencies:
-    p-locate "^3.0.0"
-    path-exists "^3.0.0"
-
 locate-path@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0"
@@ -6503,6 +5921,13 @@ locate-path@^5.0.0:
   dependencies:
     p-locate "^4.1.0"
 
+locate-path@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286"
+  integrity sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==
+  dependencies:
+    p-locate "^5.0.0"
+
 lodash._reinterpolate@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/lodash._reinterpolate/-/lodash._reinterpolate-3.0.0.tgz#0ccf2d89166af03b3663c796538b75ac6e114d9d"
@@ -6528,27 +5953,12 @@ lodash.ismatch@^4.4.0:
   resolved "https://registry.yarnpkg.com/lodash.ismatch/-/lodash.ismatch-4.4.0.tgz#756cb5150ca3ba6f11085a78849645f188f85f37"
   integrity sha1-dWy1FQyjum8RCFp4hJZF8Yj4Xzc=
 
-lodash.memoize@4.x:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-4.1.2.tgz#bcc6c49a42a2840ed997f323eada5ecd182e0bfe"
-  integrity sha1-vMbEmkKihA7Zl/Mj6tpezRguC/4=
-
-lodash.padend@^4.6.1:
-  version "4.6.1"
-  resolved "https://registry.yarnpkg.com/lodash.padend/-/lodash.padend-4.6.1.tgz#53ccba047d06e158d311f45da625f4e49e6f166e"
-  integrity sha1-U8y6BH0G4VjTEfRdpiX05J5vFm4=
-
-lodash.set@^4.3.2:
-  version "4.3.2"
-  resolved "https://registry.yarnpkg.com/lodash.set/-/lodash.set-4.3.2.tgz#d8757b1da807dde24816b0d6a84bea1a76230b23"
-  integrity sha1-2HV7HagH3eJIFrDWqEvqGnYjCyM=
-
-lodash.sortby@^4.7.0:
-  version "4.7.0"
-  resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438"
-  integrity sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=
+lodash.merge@^4.6.2:
+  version "4.6.2"
+  resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
+  integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==
 
-lodash.template@^4.0.2, lodash.template@^4.5.0:
+lodash.template@^4.5.0:
   version "4.5.0"
   resolved "https://registry.yarnpkg.com/lodash.template/-/lodash.template-4.5.0.tgz#f976195cf3f347d0d5f52483569fe8031ccce8ab"
   integrity sha512-84vYFxIkmidUiFxidA/KjjH9pAycqW+h980j7Fuz5qxRtO9pgB7MDFTdys1N7A5mcucRiDyEq4fusljItR1T/A==
@@ -6563,28 +5973,25 @@ lodash.templatesettings@^4.0.0:
   dependencies:
     lodash._reinterpolate "^3.0.0"
 
-lodash.uniq@^4.5.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
-  integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=
+lodash.truncate@^4.4.2:
+  version "4.4.2"
+  resolved "https://registry.yarnpkg.com/lodash.truncate/-/lodash.truncate-4.4.2.tgz#5a350da0b1113b837ecfffd5812cbe58d6eae193"
+  integrity sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=
 
-lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.20, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
+lodash@4.x, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.4, lodash@^4.7.0:
   version "4.17.21"
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
 
-log-driver@^1.2.7:
-  version "1.2.7"
-  resolved "https://registry.yarnpkg.com/log-driver/-/log-driver-1.2.7.tgz#63b95021f0702fedfa2c9bb0a24e7797d71871d8"
-  integrity sha512-U7KCmLdqsGHBLeWqYlFA0V0Sl6P08EE1ZrmA9cxjUE0WVqT9qnyVDPz1kzpFEP0jdJuFnasWIfSd7fsaNXkpbg==
-
-loud-rejection@^1.0.0:
-  version "1.6.0"
-  resolved "https://registry.yarnpkg.com/loud-rejection/-/loud-rejection-1.6.0.tgz#5b46f80147edee578870f086d04821cf998e551f"
-  integrity sha1-W0b4AUft7leIcPCG0Eghz5mOVR8=
+log-update@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/log-update/-/log-update-4.0.0.tgz#589ecd352471f2a1c0c570287543a64dfd20e0a1"
+  integrity sha512-9fkkDevMefjg0mmzWFBW8YkFP91OrizzkW3diF7CpG+S2EYdy4+TVfGwz1zeF8x7hCx1ovSPTOE9Ngib74qqUg==
   dependencies:
-    currently-unhandled "^0.4.1"
-    signal-exit "^3.0.0"
+    ansi-escapes "^4.3.0"
+    cli-cursor "^3.1.0"
+    slice-ansi "^4.0.0"
+    wrap-ansi "^6.2.0"
 
 lru-cache@^5.1.1:
   version "5.1.1"
@@ -6612,19 +6019,7 @@ lunr@^2.3.9:
   resolved "https://registry.yarnpkg.com/lunr/-/lunr-2.3.9.tgz#18b123142832337dd6e964df1a5a7707b25d35e1"
   integrity sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==
 
-macos-release@^2.2.0:
-  version "2.4.1"
-  resolved "https://registry.yarnpkg.com/macos-release/-/macos-release-2.4.1.tgz#64033d0ec6a5e6375155a74b1a1eba8e509820ac"
-  integrity sha512-H/QHeBIN1fIGJX517pvK8IEK53yQOW7YcEI55oYtgjDdoCQQz7eJS94qt5kNrscReEyuD/JcdFCm2XBEcGOITg==
-
-make-dir@^1.0.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-1.3.0.tgz#79c1033b80515bd6d24ec9933e860ca75ee27f0c"
-  integrity sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ==
-  dependencies:
-    pify "^3.0.0"
-
-make-dir@^2.0.0, make-dir@^2.1.0:
+make-dir@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-2.1.0.tgz#5f0310e18b8be898cc07009295a30ae41e91e6f5"
   integrity sha512-LS9X+dc8KLxXCb8dni79fLIIUA5VyZoyjSMCwTluaXA0o27cCK0bhXkpgw+sTXVpPy/lSO57ilRixqk0vDmtRA==
@@ -6632,7 +6027,7 @@ make-dir@^2.0.0, make-dir@^2.1.0:
     pify "^4.0.1"
     semver "^5.6.0"
 
-make-dir@^3.0.0, make-dir@^3.0.2:
+make-dir@^3.0.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f"
   integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==
@@ -6644,22 +6039,48 @@ make-error@1.x, make-error@^1.1.1:
   resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2"
   integrity sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==
 
-make-fetch-happen@^5.0.0:
-  version "5.0.2"
-  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-5.0.2.tgz#aa8387104f2687edca01c8687ee45013d02d19bd"
-  integrity sha512-07JHC0r1ykIoruKO8ifMXu+xEU8qOXDFETylktdug6vJDACnP+HKevOu3PXyNPzFyTSlz8vrBYlBO1JZRe8Cag==
+make-fetch-happen@^8.0.9:
+  version "8.0.14"
+  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-8.0.14.tgz#aaba73ae0ab5586ad8eaa68bd83332669393e222"
+  integrity sha512-EsS89h6l4vbfJEtBZnENTOFk8mCRpY5ru36Xe5bcX1KYIli2mkSHqoFsp5O1wMDvTJJzxe/4THpCTtygjeeGWQ==
   dependencies:
-    agentkeepalive "^3.4.1"
-    cacache "^12.0.0"
-    http-cache-semantics "^3.8.1"
-    http-proxy-agent "^2.1.0"
-    https-proxy-agent "^2.2.3"
-    lru-cache "^5.1.1"
-    mississippi "^3.0.0"
-    node-fetch-npm "^2.0.2"
-    promise-retry "^1.1.1"
-    socks-proxy-agent "^4.0.0"
-    ssri "^6.0.0"
+    agentkeepalive "^4.1.3"
+    cacache "^15.0.5"
+    http-cache-semantics "^4.1.0"
+    http-proxy-agent "^4.0.1"
+    https-proxy-agent "^5.0.0"
+    is-lambda "^1.0.1"
+    lru-cache "^6.0.0"
+    minipass "^3.1.3"
+    minipass-collect "^1.0.2"
+    minipass-fetch "^1.3.2"
+    minipass-flush "^1.0.5"
+    minipass-pipeline "^1.2.4"
+    promise-retry "^2.0.1"
+    socks-proxy-agent "^5.0.0"
+    ssri "^8.0.0"
+
+make-fetch-happen@^9.0.1:
+  version "9.0.4"
+  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-9.0.4.tgz#ceaa100e60e0ef9e8d1ede94614bb2ba83c8bb24"
+  integrity sha512-sQWNKMYqSmbAGXqJg2jZ+PmHh5JAybvwu0xM8mZR/bsTjGiTASj3ldXJV7KFHy1k/IJIBkjxQFoWIVsv9+PQMg==
+  dependencies:
+    agentkeepalive "^4.1.3"
+    cacache "^15.2.0"
+    http-cache-semantics "^4.1.0"
+    http-proxy-agent "^4.0.1"
+    https-proxy-agent "^5.0.0"
+    is-lambda "^1.0.1"
+    lru-cache "^6.0.0"
+    minipass "^3.1.3"
+    minipass-collect "^1.0.2"
+    minipass-fetch "^1.3.2"
+    minipass-flush "^1.0.5"
+    minipass-pipeline "^1.2.4"
+    negotiator "^0.6.2"
+    promise-retry "^2.0.1"
+    socks-proxy-agent "^5.0.0"
+    ssri "^8.0.0"
 
 make-iterator@^1.0.0:
   version "1.0.1"
@@ -6680,17 +6101,12 @@ map-cache@^0.2.0, map-cache@^0.2.2:
   resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf"
   integrity sha1-wyq9C9ZSXZsFFkW7TyasXcmKDb8=
 
-map-obj@^1.0.0, map-obj@^1.0.1:
+map-obj@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-1.0.1.tgz#d933ceb9205d82bdcf4886f6742bdc2b4dea146d"
   integrity sha1-2TPOuSBdgr3PSIb2dCvcK03qFG0=
 
-map-obj@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-2.0.0.tgz#a65cd29087a92598b8791257a523e021222ac1f9"
-  integrity sha1-plzSkIepJZi4eRJXpSPgISIqwfk=
-
-map-obj@^4.0.0:
+map-obj@^4.0.0, map-obj@^4.1.0:
   version "4.2.1"
   resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-4.2.1.tgz#e4ea399dbc979ae735c83c863dd31bdf364277b7"
   integrity sha512-+WA2/1sPmDj1dlvvJmB5G6JKfY9dpn7EVBUL06+y6PoljPkh+6V1QihwxNkbcGxCRjt2b0F9K0taiCuo7MbdFQ==
@@ -6702,10 +6118,10 @@ map-visit@^1.0.0:
   dependencies:
     object-visit "^1.0.0"
 
-marked@^1.2.5:
-  version "1.2.9"
-  resolved "https://registry.yarnpkg.com/marked/-/marked-1.2.9.tgz#53786f8b05d4c01a2a5a76b7d1ec9943d29d72dc"
-  integrity sha512-H8lIX2SvyitGX+TRdtS06m1jHMijKN/XjfH6Ooii9fvxMlh8QdqBfBDkGUpMWH2kQNrtixjzYUa3SH8ROTgRRw==
+marked@^2.1.1:
+  version "2.1.3"
+  resolved "https://registry.yarnpkg.com/marked/-/marked-2.1.3.tgz#bd017cef6431724fd4b27e0657f5ceb14bff3753"
+  integrity sha512-/Q+7MGzaETqifOMWYEA7HVMaZb4XbcRfaOzcSsHZEith83KGlvaSG33u0SKu89Mj5h+T8V2hM+8O45Qc5XTgwA==
 
 matchdep@^2.0.0:
   version "2.0.0"
@@ -6722,22 +6138,12 @@ math-random@^1.0.1:
   resolved "https://registry.yarnpkg.com/math-random/-/math-random-1.0.4.tgz#5dd6943c938548267016d4e34f057583080c514c"
   integrity sha512-rUxjysqif/BZQH2yhd5Aaq7vXMSx9NdEsQcyA07uEzIvxgI7zIr33gGsh+RU0/XjmQpCW7RsVof1vlkvQVCK5A==
 
-md5.js@^1.3.4:
-  version "1.3.5"
-  resolved "https://registry.yarnpkg.com/md5.js/-/md5.js-1.3.5.tgz#b5d07b8e3216e3e27cd728d72f70d1e6a342005f"
-  integrity sha512-xitP+WxNPcTTOgnTJcrhM0xvdPepipPSf3I8EIpGKeFLjt3PlJLIDG3u8EX53ZIubkb+5U2+3rELYpEhHhzdkg==
-  dependencies:
-    hash-base "^3.0.0"
-    inherits "^2.0.1"
-    safe-buffer "^5.1.2"
-
-memfs@2.15.2:
-  version "2.15.2"
-  resolved "https://registry.yarnpkg.com/memfs/-/memfs-2.15.2.tgz#199b64580cf849ea641d8fac81d96742bfebd26d"
-  integrity sha512-jFC2mc3Aa224nJB824vbJzuiksf3+wPjFSKrXS7bA3o3H0Yy4/bh0R1nAsQyL/P80PVyT56ZowQJ+NGniHWhVQ==
+memfs@3.2.2:
+  version "3.2.2"
+  resolved "https://registry.yarnpkg.com/memfs/-/memfs-3.2.2.tgz#5de461389d596e3f23d48bb7c2afb6161f4df40e"
+  integrity sha512-RE0CwmIM3CEvpcdK3rZ19BC4E6hv9kADkMN5rPduRak58cNArWLi/9jFLsa4rhsjfVxMP3v0jO7FHXq7SvFY5Q==
   dependencies:
-    fast-extend "0.0.2"
-    fs-monkey "^0.3.3"
+    fs-monkey "1.0.3"
 
 memoizee@0.4.X:
   version "0.4.15"
@@ -6753,68 +6159,39 @@ memoizee@0.4.X:
     next-tick "^1.1.0"
     timers-ext "^0.1.7"
 
-memory-fs@^0.5.0:
-  version "0.5.0"
-  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.5.0.tgz#324c01288b88652966d161db77838720845a8e3c"
-  integrity sha512-jA0rdU5KoQMC0e6ppoNRtpp6vjFq6+NY7r8hywnC7V+1Xj/MtHwGIbB1QaK/dunyjWteJzmkpd7ooeWg10T7GA==
-  dependencies:
-    errno "^0.1.3"
-    readable-stream "^2.0.1"
-
-memory-fs@~0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.4.1.tgz#3a9a20b8462523e447cfbc7e8bb80ed667bfc552"
-  integrity sha1-OpoguEYlI+RHz7x+i7gO1me/xVI=
-  dependencies:
-    errno "^0.1.3"
-    readable-stream "^2.0.1"
-
 memorystream@^0.3.1:
   version "0.3.1"
   resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2"
   integrity sha1-htcJCzDORV1j+64S3aUaR93K+bI=
 
-meow@^3.3.0:
-  version "3.7.0"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-3.7.0.tgz#72cb668b425228290abbfa856892587308a801fb"
-  integrity sha1-cstmi0JSKCkKu/qFaJJYcwioAfs=
-  dependencies:
-    camelcase-keys "^2.0.0"
-    decamelize "^1.1.2"
-    loud-rejection "^1.0.0"
-    map-obj "^1.0.1"
-    minimist "^1.1.3"
-    normalize-package-data "^2.3.4"
-    object-assign "^4.0.1"
-    read-pkg-up "^1.0.1"
-    redent "^1.0.0"
-    trim-newlines "^1.0.0"
-
-meow@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-4.0.1.tgz#d48598f6f4b1472f35bf6317a95945ace347f975"
-  integrity sha512-xcSBHD5Z86zaOc+781KrupuHAzeGXSLtiAOmBsiLDiPSaYSB6hdew2ng9EBAnZ62jagG9MHAOdxpDi/lWBFJ/A==
-  dependencies:
-    camelcase-keys "^4.0.0"
-    decamelize-keys "^1.0.0"
-    loud-rejection "^1.0.0"
-    minimist "^1.1.3"
-    minimist-options "^3.0.1"
-    normalize-package-data "^2.3.4"
-    read-pkg-up "^3.0.0"
-    redent "^2.0.0"
-    trim-newlines "^2.0.0"
-
-meow@^6.1.1:
-  version "6.1.1"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-6.1.1.tgz#1ad64c4b76b2a24dfb2f635fddcadf320d251467"
-  integrity sha512-3YffViIt2QWgTy6Pale5QpopX/IvU3LPL03jOTqp6pGj3VjesdO/U8CuHMKpnQr4shCNCM5fd5XFFvIIl6JBHg==
+meow@^10.1.0:
+  version "10.1.0"
+  resolved "https://registry.yarnpkg.com/meow/-/meow-10.1.0.tgz#43edce35b3c5b7056d74bd9d63897220d3c190a6"
+  integrity sha512-bks/XR5OSTWcPZbJ/NsE2uCWQJ/ejqv8M9XOYxzhufBjreUMuz7S5ApDN5knzQce/4sLT5QoOQc6BbD5O0yP/w==
+  dependencies:
+    "@types/minimist" "^1.2.2"
+    camelcase-keys "^7.0.0"
+    decamelize "^5.0.0"
+    decamelize-keys "^1.1.0"
+    hard-rejection "^2.1.0"
+    minimist-options "4.1.0"
+    normalize-package-data "^3.0.2"
+    read-pkg-up "^8.0.0"
+    redent "^4.0.0"
+    trim-newlines "^4.0.2"
+    type-fest "^1.2.2"
+    yargs-parser "^20.2.9"
+
+meow@^7.0.0:
+  version "7.1.1"
+  resolved "https://registry.yarnpkg.com/meow/-/meow-7.1.1.tgz#7c01595e3d337fcb0ec4e8eed1666ea95903d306"
+  integrity sha512-GWHvA5QOcS412WCo8vwKDlTelGLsCGBVevQB5Kva961rmNfun0PCbv5+xta2kUMFJyR8/oWnn7ddeKdosbAPbA==
   dependencies:
     "@types/minimist" "^1.2.0"
     camelcase-keys "^6.2.2"
     decamelize-keys "^1.1.0"
     hard-rejection "^2.1.0"
-    minimist-options "^4.0.2"
+    minimist-options "4.1.0"
     normalize-package-data "^2.5.0"
     read-pkg-up "^7.0.1"
     redent "^3.0.0"
@@ -6849,7 +6226,7 @@ merge2@^1.2.3, merge2@^1.3.0:
   resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae"
   integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==
 
-micromatch@^3.0.4, micromatch@^3.1.10, micromatch@^3.1.4, micromatch@^3.1.8:
+micromatch@^3.0.4, micromatch@^3.1.10, micromatch@^3.1.4:
   version "3.1.10"
   resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-3.1.10.tgz#70859bc95c9840952f359a068a3fc49f9ecfac23"
   integrity sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==
@@ -6868,59 +6245,36 @@ micromatch@^3.0.4, micromatch@^3.1.10, micromatch@^3.1.4, micromatch@^3.1.8:
     snapdragon "^0.8.1"
     to-regex "^3.0.2"
 
-micromatch@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.2.tgz#4fcb0999bf9fbc2fcbdd212f6d629b9a56c39259"
-  integrity sha512-y7FpHSbMUMoyPbYUSzO6PaZ6FyRnQOpHuKwbo1G+Knck95XVU4QAiKdGEnj5wwoS7PlOgthX/09u5iFJ+aYf5Q==
+micromatch@^4.0.2, micromatch@^4.0.4:
+  version "4.0.4"
+  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.4.tgz#896d519dfe9db25fce94ceb7a500919bf881ebf9"
+  integrity sha512-pRmzw/XUcwXGpD9aI9q/0XOwLNygjETJ8y0ao0wdqprrzDa4YnxLcz7fQRZr8voh8V10kGhABbNcHVk5wHgWwg==
   dependencies:
     braces "^3.0.1"
-    picomatch "^2.0.5"
+    picomatch "^2.2.3"
 
-miller-rabin@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/miller-rabin/-/miller-rabin-4.0.1.tgz#f080351c865b0dc562a8462966daa53543c78a4d"
-  integrity sha512-115fLhvZVqWwHPbClyntxEVfVDfl9DLLTuJvq3g2O/Oxi8AiNouAHvDSzHS0viUJc+V5vm3eq91Xwqn9dp4jRA==
-  dependencies:
-    bn.js "^4.0.0"
-    brorand "^1.0.1"
-
-mime-db@1.47.0:
-  version "1.47.0"
-  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.47.0.tgz#8cb313e59965d3c05cfbf898915a267af46a335c"
-  integrity sha512-QBmA/G2y+IfeS4oktet3qRZ+P5kPhCKRXxXnQEudYqUaEioAU1/Lq2us3D/t1Jfo4hE9REQPrbB7K5sOczJVIw==
+mime-db@1.48.0:
+  version "1.48.0"
+  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.48.0.tgz#e35b31045dd7eada3aaad537ed88a33afbef2d1d"
+  integrity sha512-FM3QwxV+TnZYQ2aRqhlKBMHxk10lTbMt3bBkMAp54ddrNeVSfcQYOOKuGuy3Ddrm38I04If834fOUSq1yzslJQ==
 
 mime-types@^2.1.12, mime-types@~2.1.19:
-  version "2.1.30"
-  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.30.tgz#6e7be8b4c479825f85ed6326695db73f9305d62d"
-  integrity sha512-crmjA4bLtR8m9qLpHvgxSChT+XoSlZi8J4n/aIdn3z92e/U47Z0V/yl+Wh9W046GgFVAmoNR/fmdbZYcSSIUeg==
+  version "2.1.31"
+  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.31.tgz#a00d76b74317c61f9c2db2218b8e9f8e9c5c9e6b"
+  integrity sha512-XGZnNzm3QvgKxa8dpzyhFTHmpP3l5YNusmne07VUOXxou9CqUqYa/HBy124RqtVh/O2pECas/MOcsDgpilPOPg==
   dependencies:
-    mime-db "1.47.0"
-
-mimic-fn@^1.0.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-1.2.0.tgz#820c86a39334640e99516928bd03fca88057d022"
-  integrity sha512-jf84uxzwiuiIVKiOLpfYk7N46TSy8ubTonmneY9vrpHNAnp0QBt2BxWV9dO3/j+BoVAb+a5G6YDPW3M5HOdMWQ==
+    mime-db "1.48.0"
 
 mimic-fn@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
   integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==
 
-min-indent@^1.0.0:
+min-indent@^1.0.0, min-indent@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869"
   integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==
 
-minimalistic-assert@^1.0.0, minimalistic-assert@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz#2e194de044626d4a10e7f7fbc00ce73e83e4d5c7"
-  integrity sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==
-
-minimalistic-crypto-utils@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/minimalistic-crypto-utils/-/minimalistic-crypto-utils-1.0.1.tgz#f6c00c1c0b082246e5c4d99dfb8c7c083b2b582a"
-  integrity sha1-9sAMHAsIIkblxNmd+4x8CDsrWCo=
-
 minimatch@^3.0.0, minimatch@^3.0.4:
   version "3.0.4"
   resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
@@ -6928,7 +6282,7 @@ minimatch@^3.0.0, minimatch@^3.0.4:
   dependencies:
     brace-expansion "^1.1.7"
 
-minimist-options@4.1.0, minimist-options@^4.0.2:
+minimist-options@4.1.0:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-4.1.0.tgz#c0655713c53a8a2ebd77ffa247d342c40f010619"
   integrity sha512-Q4r8ghd80yhO/0j1O3B2BjweX3fiHg9cdOwjJd2J76Q135c+NDxGCqdYKQ1SKBuFfgWbAUzBfvYjPUEeNgqN1A==
@@ -6937,15 +6291,7 @@ minimist-options@4.1.0, minimist-options@^4.0.2:
     is-plain-obj "^1.1.0"
     kind-of "^6.0.3"
 
-minimist-options@^3.0.1:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-3.0.2.tgz#fba4c8191339e13ecf4d61beb03f070103f3d954"
-  integrity sha512-FyBrT/d0d4+uiZRbqznPXqw3IpZZG3gl3wKWiX784FycUKVwBt0uLBFkQrtE4tZOrgo78nZp2jnKz3L65T5LdQ==
-  dependencies:
-    arrify "^1.0.1"
-    is-plain-obj "^1.1.0"
-
-minimist@1.x, minimist@^1.1.1, minimist@^1.1.3, minimist@^1.2.0, minimist@^1.2.5:
+minimist@1.x, minimist@^1.2.0, minimist@^1.2.5:
   version "1.2.5"
   resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
   integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
@@ -6957,6 +6303,17 @@ minipass-collect@^1.0.2:
   dependencies:
     minipass "^3.0.0"
 
+minipass-fetch@^1.3.0, minipass-fetch@^1.3.2:
+  version "1.3.4"
+  resolved "https://registry.yarnpkg.com/minipass-fetch/-/minipass-fetch-1.3.4.tgz#63f5af868a38746ca7b33b03393ddf8c291244fe"
+  integrity sha512-TielGogIzbUEtd1LsjZFs47RWuHHfhl6TiCx1InVxApBAmQ8bL0dL5ilkLGcRvuyW/A9nE+Lvn855Ewz8S0PnQ==
+  dependencies:
+    minipass "^3.1.0"
+    minipass-sized "^1.0.3"
+    minizlib "^2.0.0"
+  optionalDependencies:
+    encoding "^0.1.12"
+
 minipass-flush@^1.0.5:
   version "1.0.5"
   resolved "https://registry.yarnpkg.com/minipass-flush/-/minipass-flush-1.0.5.tgz#82e7135d7e89a50ffe64610a787953c4c4cbb373"
@@ -6964,14 +6321,29 @@ minipass-flush@^1.0.5:
   dependencies:
     minipass "^3.0.0"
 
-minipass-pipeline@^1.2.2:
+minipass-json-stream@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/minipass-json-stream/-/minipass-json-stream-1.0.1.tgz#7edbb92588fbfc2ff1db2fc10397acb7b6b44aa7"
+  integrity sha512-ODqY18UZt/I8k+b7rl2AENgbWE8IDYam+undIJONvigAz8KR5GWblsFTEfQs0WODsjbSXWlm+JHEv8Gr6Tfdbg==
+  dependencies:
+    jsonparse "^1.3.1"
+    minipass "^3.0.0"
+
+minipass-pipeline@^1.2.2, minipass-pipeline@^1.2.4:
   version "1.2.4"
   resolved "https://registry.yarnpkg.com/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz#68472f79711c084657c067c5c6ad93cddea8214c"
   integrity sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==
   dependencies:
     minipass "^3.0.0"
 
-minipass@^2.3.5, minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
+minipass-sized@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/minipass-sized/-/minipass-sized-1.0.3.tgz#70ee5a7c5052070afacfbc22977ea79def353b70"
+  integrity sha512-MbkQQ2CTiBMlA2Dm/5cY+9SWFEN8pzzOXi6rlM5Xxq0Yqbda5ZQy9sU75a673FE9ZK0Zsbr6Y5iP6u9nktfg2g==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
   version "2.9.0"
   resolved "https://registry.yarnpkg.com/minipass/-/minipass-2.9.0.tgz#e713762e7d3e32fed803115cf93e04bca9fcc9a6"
   integrity sha512-wxfUjg9WebH+CUDX/CdbRlh5SmfZiy/hpkxaRI16Y9W56Pa75sWgd/rvFilSgrauD9NyFymP/+JFV3KwzIsJeg==
@@ -6979,7 +6351,7 @@ minipass@^2.3.5, minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
     safe-buffer "^5.1.2"
     yallist "^3.0.0"
 
-minipass@^3.0.0, minipass@^3.1.1:
+minipass@^3.0.0, minipass@^3.1.0, minipass@^3.1.1, minipass@^3.1.3:
   version "3.1.3"
   resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.1.3.tgz#7d42ff1f39635482e15f9cdb53184deebd5815fd"
   integrity sha512-Mgd2GdMVzY+x3IJ+oHnVM+KG3lA5c8tnabyJKmHSaG2kAGpudxuOf8ToDkhumF7UzME7DecbQE9uOZhNm7PuJg==
@@ -6993,7 +6365,7 @@ minizlib@^1.2.1:
   dependencies:
     minipass "^2.9.0"
 
-minizlib@^2.1.1:
+minizlib@^2.0.0, minizlib@^2.1.1:
   version "2.1.2"
   resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931"
   integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==
@@ -7001,22 +6373,6 @@ minizlib@^2.1.1:
     minipass "^3.0.0"
     yallist "^4.0.0"
 
-mississippi@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/mississippi/-/mississippi-3.0.0.tgz#ea0a3291f97e0b5e8776b363d5f0a12d94c67022"
-  integrity sha512-x471SsVjUtBRtcvd4BzKE9kFC+/2TeWgKCgw0bZcw1b9l2X3QX5vCWgF+KaZaYm87Ss//rHnWryupDrgLvmSkA==
-  dependencies:
-    concat-stream "^1.5.0"
-    duplexify "^3.4.2"
-    end-of-stream "^1.1.0"
-    flush-write-stream "^1.0.0"
-    from2 "^2.1.0"
-    parallel-transform "^1.1.0"
-    pump "^3.0.0"
-    pumpify "^1.3.3"
-    stream-each "^1.1.0"
-    through2 "^2.0.0"
-
 mixin-deep@^1.2.0:
   version "1.3.2"
   resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566"
@@ -7025,19 +6381,21 @@ mixin-deep@^1.2.0:
     for-in "^1.0.2"
     is-extendable "^1.0.1"
 
-mkdirp-promise@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/mkdirp-promise/-/mkdirp-promise-5.0.1.tgz#e9b8f68e552c68a9c1713b84883f7a1dd039b8a1"
-  integrity sha1-6bj2jlUsaKnBcTuEiD96HdA5uKE=
+mkdirp-infer-owner@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/mkdirp-infer-owner/-/mkdirp-infer-owner-2.0.0.tgz#55d3b368e7d89065c38f32fd38e638f0ab61d316"
+  integrity sha512-sdqtiFt3lkOaYvTXSRIUjkIdPTcxgv5+fgqYE/5qgwdw12cOrAuzzgzvVExIkH/ul1oeHN3bCLOWSG3XOqbKKw==
   dependencies:
-    mkdirp "*"
+    chownr "^2.0.0"
+    infer-owner "^1.0.4"
+    mkdirp "^1.0.3"
 
-mkdirp@*, mkdirp@1.0.4, mkdirp@1.x, mkdirp@^1.0.3, mkdirp@^1.0.4:
+mkdirp@1.0.4, mkdirp@1.x, mkdirp@^1.0.3, mkdirp@^1.0.4:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
   integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
 
-mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@^0.5.3, mkdirp@~0.5.0:
+mkdirp@^0.5.0, mkdirp@^0.5.1:
   version "0.5.5"
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def"
   integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==
@@ -7049,18 +6407,6 @@ modify-values@^1.0.0:
   resolved "https://registry.yarnpkg.com/modify-values/-/modify-values-1.0.1.tgz#b3939fa605546474e3e3e3c63d64bd43b4ee6022"
   integrity sha512-xV2bxeN6F7oYjZWTe/YPAy6MN2M+sL4u/Rlm2AHCIVGfo2p1yGmBHQ6vHehl4bRTZBdHu3TSkWdYgkwpYzAGSw==
 
-move-concurrently@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/move-concurrently/-/move-concurrently-1.0.1.tgz#be2c005fda32e0b29af1f05d7c4b33214c701f92"
-  integrity sha1-viwAX9oy4LKa8fBdfEszIUxwH5I=
-  dependencies:
-    aproba "^1.1.1"
-    copy-concurrently "^1.0.0"
-    fs-write-stream-atomic "^1.0.8"
-    mkdirp "^0.5.1"
-    rimraf "^2.5.4"
-    run-queue "^1.0.3"
-
 ms@2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
@@ -7076,48 +6422,35 @@ ms@^2.0.0, ms@^2.1.1:
   resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
   integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
 
-multimatch@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/multimatch/-/multimatch-3.0.0.tgz#0e2534cc6bc238d9ab67e1b9cd5fcd85a6dbf70b"
-  integrity sha512-22foS/gqQfANZ3o+W7ST2x25ueHDVNWl/b9OlGcLpy/iKxjCpvcNCM51YCenUi7Mt/jAjjqv8JwZRs8YP5sRjA==
+multimatch@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/multimatch/-/multimatch-5.0.0.tgz#932b800963cea7a31a033328fa1e0c3a1874dbe6"
+  integrity sha512-ypMKuglUrZUD99Tk2bUQ+xNQj43lPEfAeX2o9cTteAmShXy2VHDJpuwu1o0xqoKCt9jLVAvwyFKdLTPXKAfJyA==
   dependencies:
-    array-differ "^2.0.3"
-    array-union "^1.0.2"
-    arrify "^1.0.1"
+    "@types/minimatch" "^3.0.3"
+    array-differ "^3.0.0"
+    array-union "^2.1.0"
+    arrify "^2.0.1"
     minimatch "^3.0.4"
 
-multistream@2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/multistream/-/multistream-2.1.1.tgz#629d3a29bd76623489980d04519a2c365948148c"
-  integrity sha512-xasv76hl6nr1dEy3lPvy7Ej7K/Lx3O/FCvwge8PeVJpciPPoNCbaANcNiBug3IpdvTveZUcAV0DJzdnUDMesNQ==
+multistream@4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/multistream/-/multistream-4.1.0.tgz#7bf00dfd119556fbc153cff3de4c6d477909f5a8"
+  integrity sha512-J1XDiAmmNpRCBfIWJv+n0ymC4ABcf/Pl+5YvC5B/D2f/2+8PtHvCNxMPKiQcZyi922Hq69J2YOpb1pTywfifyw==
   dependencies:
-    inherits "^2.0.1"
-    readable-stream "^2.0.5"
+    once "^1.4.0"
+    readable-stream "^3.6.0"
 
 mute-stdout@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/mute-stdout/-/mute-stdout-1.0.1.tgz#acb0300eb4de23a7ddeec014e3e96044b3472331"
   integrity sha512-kDcwXR4PS7caBpuRYYBUz9iVixUk3anO3f5OYFiIPwK/20vCzKCHyKoulbiDY1S53zD2bxUpxN/IJ+TnXjfvxg==
 
-mute-stream@0.0.7:
-  version "0.0.7"
-  resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-0.0.7.tgz#3075ce93bc21b8fab43e1bc4da7e8115ed1e7bab"
-  integrity sha1-MHXOk7whuPq0PhvE2n6BFe0ee6s=
-
-mute-stream@~0.0.4:
+mute-stream@0.0.8, mute-stream@~0.0.4:
   version "0.0.8"
   resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-0.0.8.tgz#1630c42b2251ff81e2a283de96a5497ea92e5e0d"
   integrity sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==
 
-mz@^2.5.0:
-  version "2.7.0"
-  resolved "https://registry.yarnpkg.com/mz/-/mz-2.7.0.tgz#95008057a56cafadc2bc63dde7f9ff6955948e32"
-  integrity sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==
-  dependencies:
-    any-promise "^1.0.0"
-    object-assign "^4.0.1"
-    thenify-all "^1.0.0"
-
 nan@^2.12.1:
   version "2.14.2"
   resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.2.tgz#f5376400695168f4cc694ac9393d0c9585eeea19"
@@ -7145,7 +6478,12 @@ natural-compare@^1.4.0:
   resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
   integrity sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=
 
-neo-async@^2.5.0, neo-async@^2.6.0:
+negotiator@^0.6.2:
+  version "0.6.2"
+  resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.2.tgz#feacf7ccf525a77ae9634436a64883ffeca346fb"
+  integrity sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==
+
+neo-async@^2.6.0:
   version "2.6.2"
   resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f"
   integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==
@@ -7170,16 +6508,7 @@ nice-try@^1.0.4:
   resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366"
   integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==
 
-node-fetch-npm@^2.0.2:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/node-fetch-npm/-/node-fetch-npm-2.0.4.tgz#6507d0e17a9ec0be3bec516958a497cec54bf5a4"
-  integrity sha512-iOuIQDWDyjhv9qSDrj9aq/klt6F9z1p2otB3AV7v3zBDcL/x+OfGsvGQZZCcMZbUf4Ujw1xGNQkjvGnVT22cKg==
-  dependencies:
-    encoding "^0.1.11"
-    json-parse-better-errors "^1.0.0"
-    safe-buffer "^5.1.1"
-
-node-fetch@^2.5.0, node-fetch@^2.6.1:
+node-fetch@^2.6.1:
   version "2.6.1"
   resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052"
   integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==
@@ -7201,61 +6530,36 @@ node-gyp@^5.0.2:
     tar "^4.4.12"
     which "^1.3.1"
 
+node-gyp@^7.1.0:
+  version "7.1.2"
+  resolved "https://registry.yarnpkg.com/node-gyp/-/node-gyp-7.1.2.tgz#21a810aebb187120251c3bcec979af1587b188ae"
+  integrity sha512-CbpcIo7C3eMu3dL1c3d0xw449fHIGALIJsRP4DDPHpyiW8vcriNY7ubh9TE4zEKfSxscY7PjeFnshE7h75ynjQ==
+  dependencies:
+    env-paths "^2.2.0"
+    glob "^7.1.4"
+    graceful-fs "^4.2.3"
+    nopt "^5.0.0"
+    npmlog "^4.1.2"
+    request "^2.88.2"
+    rimraf "^3.0.2"
+    semver "^7.3.2"
+    tar "^6.0.2"
+    which "^2.0.2"
+
 node-int64@^0.4.0:
   version "0.4.0"
   resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
   integrity sha1-h6kGXNs1XTGC2PlM4RGIuCXGijs=
 
-node-libs-browser@^2.0.0:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/node-libs-browser/-/node-libs-browser-2.2.1.tgz#b64f513d18338625f90346d27b0d235e631f6425"
-  integrity sha512-h/zcD8H9kaDZ9ALUWwlBUDo6TKF8a7qBSCSEGfjTVIYeqsioSKaAX+BN7NgiMGp6iSIXZ3PxgCu8KS3b71YK5Q==
-  dependencies:
-    assert "^1.1.1"
-    browserify-zlib "^0.2.0"
-    buffer "^4.3.0"
-    console-browserify "^1.1.0"
-    constants-browserify "^1.0.0"
-    crypto-browserify "^3.11.0"
-    domain-browser "^1.1.1"
-    events "^3.0.0"
-    https-browserify "^1.0.0"
-    os-browserify "^0.3.0"
-    path-browserify "0.0.1"
-    process "^0.11.10"
-    punycode "^1.2.4"
-    querystring-es3 "^0.2.0"
-    readable-stream "^2.3.3"
-    stream-browserify "^2.0.1"
-    stream-http "^2.7.2"
-    string_decoder "^1.0.0"
-    timers-browserify "^2.0.4"
-    tty-browserify "0.0.0"
-    url "^0.11.0"
-    util "^0.11.0"
-    vm-browserify "^1.0.1"
-
 node-modules-regexp@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/node-modules-regexp/-/node-modules-regexp-1.0.0.tgz#8d9dbe28964a4ac5712e9131642107c71e90ec40"
   integrity sha1-jZ2+KJZKSsVxLpExZCEHxx6Q7EA=
 
-node-notifier@^8.0.0:
-  version "8.0.2"
-  resolved "https://registry.yarnpkg.com/node-notifier/-/node-notifier-8.0.2.tgz#f3167a38ef0d2c8a866a83e318c1ba0efeb702c5"
-  integrity sha512-oJP/9NAdd9+x2Q+rfphB2RJCHjod70RcRLjosiPMMu5gjIfwVnOUGq2nbTjTUbmy0DJ/tFIVT30+Qe3nzl4TJg==
-  dependencies:
-    growly "^1.3.0"
-    is-wsl "^2.2.0"
-    semver "^7.3.2"
-    shellwords "^0.1.1"
-    uuid "^8.3.0"
-    which "^2.0.2"
-
-node-releases@^1.1.70:
-  version "1.1.71"
-  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.71.tgz#cb1334b179896b1c89ecfdd4b725fb7bbdfc7dbb"
-  integrity sha512-zR6HoT6LrLCRBwukmrVbHv0EpEQjksO6GmFcZQQuCAy139BEsoVKPYnf3jongYW83fAa1torLGYwxxky/p28sg==
+node-releases@^1.1.71:
+  version "1.1.73"
+  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.73.tgz#dd4e81ddd5277ff846b80b52bb40c49edf7a7b20"
+  integrity sha512-uW7fodD6pyW2FZNZnp/Z3hvWKeEW1Y8R1+1CnErE8cXFXzl5blBOoVB41CvMer6P6Q0S5FXDwcHgFd1Wj0U9zg==
 
 nopt@^4.0.1:
   version "4.0.3"
@@ -7265,7 +6569,14 @@ nopt@^4.0.1:
     abbrev "1"
     osenv "^0.1.4"
 
-normalize-package-data@^2.0.0, normalize-package-data@^2.3.0, normalize-package-data@^2.3.2, normalize-package-data@^2.3.4, normalize-package-data@^2.3.5, normalize-package-data@^2.4.0, normalize-package-data@^2.5.0:
+nopt@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/nopt/-/nopt-5.0.0.tgz#530942bb58a512fccafe53fe210f13a25355dc88"
+  integrity sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==
+  dependencies:
+    abbrev "1"
+
+normalize-package-data@^2.0.0, normalize-package-data@^2.3.2, normalize-package-data@^2.5.0:
   version "2.5.0"
   resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8"
   integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==
@@ -7275,7 +6586,7 @@ normalize-package-data@^2.0.0, normalize-package-data@^2.3.0, normalize-package-
     semver "2 || 3 || 4 || 5"
     validate-npm-package-license "^3.0.1"
 
-normalize-package-data@^3.0.0:
+normalize-package-data@^3.0.0, normalize-package-data@^3.0.2:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-3.0.2.tgz#cae5c410ae2434f9a6c1baa65d5bc3b9366c8699"
   integrity sha512-6CdZocmfGaKnIHPVFhJJZ3GuR8SsLKvDANFp47Jmy51aKIr8akjAWTSxtpI+MBgBFdSMRyo4hMpDlT6dTffgZg==
@@ -7292,15 +6603,15 @@ normalize-path@^2.0.1, normalize-path@^2.1.1:
   dependencies:
     remove-trailing-separator "^1.0.1"
 
-normalize-path@^3.0.0, normalize-path@~3.0.0:
+normalize-path@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
   integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
 
-normalize-url@^3.3.0:
-  version "3.3.0"
-  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-3.3.0.tgz#b2e1c4dc4f7c6d57743df733a4f5978d18650559"
-  integrity sha512-U+JJi7duF1o+u2pynbp2zXDW2/PADgC30f0GsHZtRh+HOcXHnw137TrNlyxxRvWW5fjKd3bcLHPxofWuCjaeZg==
+normalize-url@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-6.1.0.tgz#40d0885b535deffe3f3147bec877d05fe4c5668a"
+  integrity sha512-DlL+XwOy3NxAQ8xuC0okPgK46iuVNAK01YN7RueYBqqFeGsBjV9XmCAzAdgt+667bCl5kPh9EqKKDwnaPG1I7A==
 
 now-and-later@^2.0.0:
   version "2.0.1"
@@ -7309,14 +6620,21 @@ now-and-later@^2.0.0:
   dependencies:
     once "^1.3.2"
 
-npm-bundled@^1.0.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/npm-bundled/-/npm-bundled-1.1.1.tgz#1edd570865a94cdb1bc8220775e29466c9fb234b"
-  integrity sha512-gqkfgGePhTpAEgUsGEgcq1rqPXA+tv/aVBlgEzfXwA1yiUJF7xtEt3CtVwOjNYQOVknDk0F20w58Fnm3EtG0fA==
+npm-bundled@^1.1.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/npm-bundled/-/npm-bundled-1.1.2.tgz#944c78789bd739035b70baa2ca5cc32b8d860bc1"
+  integrity sha512-x5DHup0SuyQcmL3s7Rx/YQ8sbw/Hzg0rj48eN0dV7hf5cmQq5PXIeioroH3raV1QC1yh3uTYuMThvEQF3iKgGQ==
   dependencies:
     npm-normalize-package-bin "^1.0.1"
 
-npm-lifecycle@^3.1.2:
+npm-install-checks@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/npm-install-checks/-/npm-install-checks-4.0.0.tgz#a37facc763a2fde0497ef2c6d0ac7c3fbe00d7b4"
+  integrity sha512-09OmyDkNLYwqKPOnbI8exiOZU2GVVmQp7tgez2BPi5OZC8M82elDAps7sxC4l//uSUtotWqoEIDwjRvWH4qz8w==
+  dependencies:
+    semver "^7.1.1"
+
+npm-lifecycle@^3.1.5:
   version "3.1.5"
   resolved "https://registry.yarnpkg.com/npm-lifecycle/-/npm-lifecycle-3.1.5.tgz#9882d3642b8c82c815782a12e6a1bfeed0026309"
   integrity sha512-lDLVkjfZmvmfvpvBzA4vzee9cn+Me4orq0QF8glbswJVEbIcSNWib7qGOffolysc3teCqbbPZZkzbr3GQZTL1g==
@@ -7335,33 +6653,60 @@ npm-normalize-package-bin@^1.0.0, npm-normalize-package-bin@^1.0.1:
   resolved "https://registry.yarnpkg.com/npm-normalize-package-bin/-/npm-normalize-package-bin-1.0.1.tgz#6e79a41f23fd235c0623218228da7d9c23b8f6e2"
   integrity sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA==
 
-"npm-package-arg@^4.0.0 || ^5.0.0 || ^6.0.0", npm-package-arg@^6.0.0, npm-package-arg@^6.1.0:
+npm-package-arg@^8.0.0, npm-package-arg@^8.0.1, npm-package-arg@^8.1.0, npm-package-arg@^8.1.2:
+  version "8.1.5"
+  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.5.tgz#3369b2d5fe8fdc674baa7f1786514ddc15466e44"
+  integrity sha512-LhgZrg0n0VgvzVdSm1oiZworPbTxYHUJCgtsJW8mGvlDpxTM1vSJc3m5QZeUkhAHIzbz3VCHd/R4osi1L1Tg/Q==
+  dependencies:
+    hosted-git-info "^4.0.1"
+    semver "^7.3.4"
+    validate-npm-package-name "^3.0.0"
+
+npm-packlist@^2.1.4:
+  version "2.2.2"
+  resolved "https://registry.yarnpkg.com/npm-packlist/-/npm-packlist-2.2.2.tgz#076b97293fa620f632833186a7a8f65aaa6148c8"
+  integrity sha512-Jt01acDvJRhJGthnUJVF/w6gumWOZxO7IkpY/lsX9//zqQgnF7OJaxgQXcerd4uQOLu7W5bkb4mChL9mdfm+Zg==
+  dependencies:
+    glob "^7.1.6"
+    ignore-walk "^3.0.3"
+    npm-bundled "^1.1.1"
+    npm-normalize-package-bin "^1.0.1"
+
+npm-pick-manifest@^6.0.0, npm-pick-manifest@^6.1.1:
   version "6.1.1"
-  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-6.1.1.tgz#02168cb0a49a2b75bf988a28698de7b529df5cb7"
-  integrity sha512-qBpssaL3IOZWi5vEKUKW0cO7kzLeT+EQO9W8RsLOZf76KF9E/K9+wH0C7t06HXPpaH8WH5xF1MExLuCwbTqRUg==
+  resolved "https://registry.yarnpkg.com/npm-pick-manifest/-/npm-pick-manifest-6.1.1.tgz#7b5484ca2c908565f43b7f27644f36bb816f5148"
+  integrity sha512-dBsdBtORT84S8V8UTad1WlUyKIY9iMsAmqxHbLdeEeBNMLQDlDWWra3wYUx9EBEIiG/YwAy0XyNHDd2goAsfuA==
   dependencies:
-    hosted-git-info "^2.7.1"
-    osenv "^0.1.5"
-    semver "^5.6.0"
-    validate-npm-package-name "^3.0.0"
+    npm-install-checks "^4.0.0"
+    npm-normalize-package-bin "^1.0.1"
+    npm-package-arg "^8.1.2"
+    semver "^7.3.4"
 
-npm-packlist@^1.4.4:
-  version "1.4.8"
-  resolved "https://registry.yarnpkg.com/npm-packlist/-/npm-packlist-1.4.8.tgz#56ee6cc135b9f98ad3d51c1c95da22bbb9b2ef3e"
-  integrity sha512-5+AZgwru5IevF5ZdnFglB5wNlHG1AOOuw28WhUq8/8emhBmLv6jX5by4WJCh7lW0uSYZYS6DXqIsyZVIXRZU9A==
+npm-registry-fetch@^11.0.0:
+  version "11.0.0"
+  resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-11.0.0.tgz#68c1bb810c46542760d62a6a965f85a702d43a76"
+  integrity sha512-jmlgSxoDNuhAtxUIG6pVwwtz840i994dL14FoNVZisrmZW5kWd63IUTNv1m/hyRSGSqWjCUp/YZlS1BJyNp9XA==
   dependencies:
-    ignore-walk "^3.0.1"
-    npm-bundled "^1.0.1"
-    npm-normalize-package-bin "^1.0.1"
+    make-fetch-happen "^9.0.1"
+    minipass "^3.1.3"
+    minipass-fetch "^1.3.0"
+    minipass-json-stream "^1.0.1"
+    minizlib "^2.0.0"
+    npm-package-arg "^8.0.0"
 
-npm-pick-manifest@^3.0.0:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/npm-pick-manifest/-/npm-pick-manifest-3.0.2.tgz#f4d9e5fd4be2153e5f4e5f9b7be8dc419a99abb7"
-  integrity sha512-wNprTNg+X5nf+tDi+hbjdHhM4bX+mKqv6XmPh7B5eG+QY9VARfQPfCEH013H5GqfNj6ee8Ij2fg8yk0mzps1Vw==
+npm-registry-fetch@^9.0.0:
+  version "9.0.0"
+  resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-9.0.0.tgz#86f3feb4ce00313bc0b8f1f8f69daae6face1661"
+  integrity sha512-PuFYYtnQ8IyVl6ib9d3PepeehcUeHN9IO5N/iCRhyg9tStQcqGQBRVHmfmMWPDERU3KwZoHFvbJ4FPXPspvzbA==
   dependencies:
-    figgy-pudding "^3.5.1"
-    npm-package-arg "^6.0.0"
-    semver "^5.4.1"
+    "@npmcli/ci-detect" "^1.0.0"
+    lru-cache "^6.0.0"
+    make-fetch-happen "^8.0.9"
+    minipass "^3.1.3"
+    minipass-fetch "^1.3.0"
+    minipass-json-stream "^1.0.1"
+    minizlib "^2.0.0"
+    npm-package-arg "^8.0.0"
 
 npm-run-all@4.1.5:
   version "4.1.5"
@@ -7378,14 +6723,7 @@ npm-run-all@4.1.5:
     shell-quote "^1.6.1"
     string.prototype.padend "^3.0.0"
 
-npm-run-path@^2.0.0:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-2.0.2.tgz#35a9232dfa35d7067b4cb2ddf2357b1871536c5f"
-  integrity sha1-NakjLfo11wZ7TLLd8jV7GHFTbF8=
-  dependencies:
-    path-key "^2.0.0"
-
-npm-run-path@^4.0.0:
+npm-run-path@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea"
   integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==
@@ -7417,7 +6755,7 @@ oauth-sign@~0.9.0:
   resolved "https://registry.yarnpkg.com/oauth-sign/-/oauth-sign-0.9.0.tgz#47a7b016baa68b5fa0ecf3dee08a85c679ac6455"
   integrity sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==
 
-object-assign@4.X, object-assign@^4.0.1, object-assign@^4.1.0, object-assign@^4.1.1:
+object-assign@4.X, object-assign@^4.1.0:
   version "4.1.1"
   resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
   integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=
@@ -7431,10 +6769,10 @@ object-copy@^0.1.0:
     define-property "^0.2.5"
     kind-of "^3.0.3"
 
-object-inspect@^1.9.0:
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.9.0.tgz#c90521d74e1127b67266ded3394ad6116986533a"
-  integrity sha512-i3Bp9iTqwhaLZBxGkRfo5ZbE07BQRT7MGu8+nNgwW9ItGp1TzCTw2DLEoWwjClxBjOFI/hWljTAmYGCEwmtnOw==
+object-inspect@^1.10.3, object-inspect@^1.9.0:
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.11.0.tgz#9dceb146cedd4148a0d9e51ab88d34cf509922b1"
+  integrity sha512-jp7ikS6Sd3GxQfZJPyH3cjcbJF6GZPClgdV+EFygjFLQ5FmW/dRUnTd9PQ9k0JhoNDabWFbpF1yCdSWCC6gexg==
 
 object-keys@^1.0.12, object-keys@^1.1.1:
   version "1.1.1"
@@ -7500,11 +6838,6 @@ object.reduce@^1.0.0:
     for-own "^1.0.0"
     make-iterator "^1.0.0"
 
-octokit-pagination-methods@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/octokit-pagination-methods/-/octokit-pagination-methods-1.1.0.tgz#cf472edc9d551055f9ef73f6e42b4dbb4c80bea4"
-  integrity sha512-fZ4qZdQ2nxJvtcasX7Ghl+WlWS/d9IgnBIwFZXVNNZUmzpno91SX5bc5vuxiuKoCtK78XxGGNuSCrDC7xYB3OQ==
-
 once@^1.3.0, once@^1.3.1, once@^1.3.2, once@^1.4.0:
   version "1.4.0"
   resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
@@ -7512,14 +6845,7 @@ once@^1.3.0, once@^1.3.1, once@^1.3.2, once@^1.4.0:
   dependencies:
     wrappy "1"
 
-onetime@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/onetime/-/onetime-2.0.1.tgz#067428230fd67443b2794b22bba528b6867962d4"
-  integrity sha1-BnQoIw/WdEOyeUsiu6UotoZ5YtQ=
-  dependencies:
-    mimic-fn "^1.0.0"
-
-onetime@^5.1.0:
+onetime@^5.1.0, onetime@^5.1.2:
   version "5.1.2"
   resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e"
   integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==
@@ -7545,6 +6871,18 @@ optionator@^0.8.1:
     type-check "~0.3.2"
     word-wrap "~1.2.3"
 
+optionator@^0.9.1:
+  version "0.9.1"
+  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.1.tgz#4f236a6373dae0566a6d43e1326674f50c291499"
+  integrity sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==
+  dependencies:
+    deep-is "^0.1.3"
+    fast-levenshtein "^2.0.6"
+    levn "^0.4.1"
+    prelude-ls "^1.2.1"
+    type-check "^0.4.0"
+    word-wrap "^1.2.3"
+
 ordered-read-streams@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/ordered-read-streams/-/ordered-read-streams-1.0.1.tgz#77c0cb37c41525d64166d990ffad7ec6a0e1363e"
@@ -7552,11 +6890,6 @@ ordered-read-streams@^1.0.0:
   dependencies:
     readable-stream "^2.0.1"
 
-os-browserify@^0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/os-browserify/-/os-browserify-0.3.0.tgz#854373c7f5c2315914fc9bfc6bd8238fdda1ec27"
-  integrity sha1-hUNzx/XCMVkU/Jv8a9gjj92h7Cc=
-
 os-homedir@^1.0.0:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/os-homedir/-/os-homedir-1.0.2.tgz#ffbc4988336e0e833de0c168c7ef152121aa7fb3"
@@ -7569,20 +6902,12 @@ os-locale@^1.4.0:
   dependencies:
     lcid "^1.0.0"
 
-os-name@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/os-name/-/os-name-3.1.0.tgz#dec19d966296e1cd62d701a5a66ee1ddeae70801"
-  integrity sha512-h8L+8aNjNcMpo/mAIBPn5PXCM16iyPGjHNWo6U1YO8sJTMHtEtyczI6QJnLoplswm6goopQkqc7OAnjhWcugVg==
-  dependencies:
-    macos-release "^2.2.0"
-    windows-release "^3.1.0"
-
 os-tmpdir@^1.0.0, os-tmpdir@~1.0.2:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274"
   integrity sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=
 
-osenv@^0.1.4, osenv@^0.1.5:
+osenv@^0.1.4:
   version "0.1.5"
   resolved "https://registry.yarnpkg.com/osenv/-/osenv-0.1.5.tgz#85cdfafaeb28e8677f416e287592b5f3f49ea410"
   integrity sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==
@@ -7628,7 +6953,7 @@ p-limit@^1.1.0:
   dependencies:
     p-try "^1.0.0"
 
-p-limit@^2.0.0, p-limit@^2.2.0:
+p-limit@^2.2.0:
   version "2.3.0"
   resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1"
   integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==
@@ -7649,13 +6974,6 @@ p-locate@^2.0.0:
   dependencies:
     p-limit "^1.1.0"
 
-p-locate@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-3.0.0.tgz#322d69a05c0264b25997d9f40cd8a891ab0064a4"
-  integrity sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==
-  dependencies:
-    p-limit "^2.0.0"
-
 p-locate@^4.1.0:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07"
@@ -7663,14 +6981,19 @@ p-locate@^4.1.0:
   dependencies:
     p-limit "^2.2.0"
 
-p-map-series@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-map-series/-/p-map-series-1.0.0.tgz#bf98fe575705658a9e1351befb85ae4c1f07bdca"
-  integrity sha1-v5j+V1cFZYqeE1G++4WuTB8Hvco=
+p-locate@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-5.0.0.tgz#83c8315c6785005e3bd021839411c9e110e6d834"
+  integrity sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==
   dependencies:
-    p-reduce "^1.0.0"
+    p-limit "^3.0.2"
 
-p-map@^2.0.0, p-map@^2.1.0:
+p-map-series@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/p-map-series/-/p-map-series-2.1.0.tgz#7560d4c452d9da0c07e692fdbfe6e2c81a2a91f2"
+  integrity sha512-RpYIIK1zXSNEOdwxcfe7FdvGcs7+y5n8rifMhMNWvaxRNMPINJHF5GDeuVxWqnfrcHPSCnp7Oo5yNXHId9Av2Q==
+
+p-map@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175"
   integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==
@@ -7689,24 +7012,25 @@ p-map@^4.0.0:
   dependencies:
     aggregate-error "^3.0.0"
 
-p-pipe@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/p-pipe/-/p-pipe-1.2.0.tgz#4b1a11399a11520a67790ee5a0c1d5881d6befe9"
-  integrity sha1-SxoROZoRUgpneQ7loMHViB1r7+k=
+p-pipe@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/p-pipe/-/p-pipe-3.1.0.tgz#48b57c922aa2e1af6a6404cb7c6bf0eb9cc8e60e"
+  integrity sha512-08pj8ATpzMR0Y80x50yJHn37NF6vjrqHutASaX5LiH5npS9XPvrUmscd9MF5R4fuYRHOxQR1FfMIlF7AzwoPqw==
 
-p-queue@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/p-queue/-/p-queue-4.0.0.tgz#ed0eee8798927ed6f2c2f5f5b77fdb2061a5d346"
-  integrity sha512-3cRXXn3/O0o3+eVmUroJPSj/esxoEFIm0ZOno/T+NzG/VZgPOqQ8WKmlNqubSEpZmCIngEy34unkHGg83ZIBmg==
+p-queue@^6.6.2:
+  version "6.6.2"
+  resolved "https://registry.yarnpkg.com/p-queue/-/p-queue-6.6.2.tgz#2068a9dcf8e67dd0ec3e7a2bcb76810faa85e426"
+  integrity sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==
   dependencies:
-    eventemitter3 "^3.1.0"
+    eventemitter3 "^4.0.4"
+    p-timeout "^3.2.0"
 
-p-reduce@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-reduce/-/p-reduce-1.0.0.tgz#18c2b0dd936a4690a529f8231f58a0fdb6a47dfa"
-  integrity sha1-GMKw3ZNqRpClKfgjH1ig/bakffo=
+p-reduce@^2.0.0, p-reduce@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/p-reduce/-/p-reduce-2.1.0.tgz#09408da49507c6c274faa31f28df334bc712b64a"
+  integrity sha512-2USApvnsutq8uoxZBGbbWM0JIYLiEMJ9RlaN7fAzVNb9OZN0SHjjTTfIcb667XynS5Y1VhwDJVDa72TnPzAYWw==
 
-p-timeout@^3.1.0:
+p-timeout@^3.1.0, p-timeout@^3.2.0:
   version "3.2.0"
   resolved "https://registry.yarnpkg.com/p-timeout/-/p-timeout-3.2.0.tgz#c7e17abc971d2a7962ef83626b35d635acf23dfe"
   integrity sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==
@@ -7723,12 +7047,37 @@ p-try@^2.0.0:
   resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6"
   integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==
 
-p-waterfall@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-waterfall/-/p-waterfall-1.0.0.tgz#7ed94b3ceb3332782353af6aae11aa9fc235bb00"
-  integrity sha1-ftlLPOszMngjU69qrhGqn8I1uwA=
+p-waterfall@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/p-waterfall/-/p-waterfall-2.1.1.tgz#63153a774f472ccdc4eb281cdb2967fcf158b2ee"
+  integrity sha512-RRTnDb2TBG/epPRI2yYXsimO0v3BXC8Yd3ogr1545IaqKK17VGhbWVeGGN+XfCm/08OK8635nH31c8bATkHuSw==
+  dependencies:
+    p-reduce "^2.0.0"
+
+pacote@^11.2.6:
+  version "11.3.5"
+  resolved "https://registry.yarnpkg.com/pacote/-/pacote-11.3.5.tgz#73cf1fc3772b533f575e39efa96c50be8c3dc9d2"
+  integrity sha512-fT375Yczn4zi+6Hkk2TBe1x1sP8FgFsEIZ2/iWaXY2r/NkhDJfxbcn5paz1+RTFCyNf+dPnaoBDJoAxXSU8Bkg==
   dependencies:
-    p-reduce "^1.0.0"
+    "@npmcli/git" "^2.1.0"
+    "@npmcli/installed-package-contents" "^1.0.6"
+    "@npmcli/promise-spawn" "^1.2.0"
+    "@npmcli/run-script" "^1.8.2"
+    cacache "^15.0.5"
+    chownr "^2.0.0"
+    fs-minipass "^2.1.0"
+    infer-owner "^1.0.4"
+    minipass "^3.1.3"
+    mkdirp "^1.0.3"
+    npm-package-arg "^8.0.1"
+    npm-packlist "^2.1.4"
+    npm-pick-manifest "^6.0.0"
+    npm-registry-fetch "^11.0.0"
+    promise-retry "^2.0.1"
+    read-package-json-fast "^2.0.1"
+    rimraf "^3.0.2"
+    ssri "^8.0.1"
+    tar "^6.1.0"
 
 pad-left@^2.1.0:
   version "2.1.0"
@@ -7737,30 +7086,12 @@ pad-left@^2.1.0:
   dependencies:
     repeat-string "^1.5.4"
 
-pako@~1.0.5:
-  version "1.0.11"
-  resolved "https://registry.yarnpkg.com/pako/-/pako-1.0.11.tgz#6c9599d340d54dfd3946380252a35705a6b992bf"
-  integrity sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==
-
-parallel-transform@^1.1.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/parallel-transform/-/parallel-transform-1.2.0.tgz#9049ca37d6cb2182c3b1d2c720be94d14a5814fc"
-  integrity sha512-P2vSmIu38uIlvdcU7fDkyrxj33gTUy/ABO5ZUbGowxNCopBq/OoD42bP4UmMrJoPyk4Uqf0mu3mtWBhHCZD8yg==
-  dependencies:
-    cyclist "^1.0.1"
-    inherits "^2.0.3"
-    readable-stream "^2.1.5"
-
-parse-asn1@^5.0.0, parse-asn1@^5.1.5:
-  version "5.1.6"
-  resolved "https://registry.yarnpkg.com/parse-asn1/-/parse-asn1-5.1.6.tgz#385080a3ec13cb62a62d39409cb3e88844cdaed4"
-  integrity sha512-RnZRo1EPU6JBnra2vGHj0yhp6ebyjBZpmUCLHWiFhxlzvBCCpAuZ7elsBp1PVAbQN0/04VD/19rfzlBSwLstMw==
+parent-module@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2"
+  integrity sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==
   dependencies:
-    asn1.js "^5.2.0"
-    browserify-aes "^1.0.0"
-    evp_bytestokey "^1.0.0"
-    pbkdf2 "^3.0.3"
-    safe-buffer "^5.1.1"
+    callsites "^3.0.0"
 
 parse-filepath@^1.0.1:
   version "1.0.2"
@@ -7771,11 +7102,6 @@ parse-filepath@^1.0.1:
     map-cache "^0.2.0"
     path-root "^0.1.1"
 
-parse-github-repo-url@^1.3.0:
-  version "1.4.1"
-  resolved "https://registry.yarnpkg.com/parse-github-repo-url/-/parse-github-repo-url-1.4.1.tgz#9e7d8bb252a6cb6ba42595060b7bf6df3dbc1f50"
-  integrity sha1-nn2LslKmy2ukJZUGC3v23z28H1A=
-
 parse-json@^2.2.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-2.2.0.tgz#f480f40434ef80741f8469099f8dea18f55a4dc9"
@@ -7791,7 +7117,7 @@ parse-json@^4.0.0:
     error-ex "^1.3.1"
     json-parse-better-errors "^1.0.1"
 
-parse-json@^5.0.0:
+parse-json@^5.0.0, parse-json@^5.2.0:
   version "5.2.0"
   resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.2.0.tgz#c76fc66dee54231c962b22bcc8a72cf2f99753cd"
   integrity sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==
@@ -7821,13 +7147,13 @@ parse-path@^4.0.0:
     qs "^6.9.4"
     query-string "^6.13.8"
 
-parse-url@^5.0.0:
-  version "5.0.2"
-  resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-5.0.2.tgz#856a3be1fcdf78dc93fc8b3791f169072d898b59"
-  integrity sha512-Czj+GIit4cdWtxo3ISZCvLiUjErSo0iI3wJ+q9Oi3QuMYTI6OZu+7cewMWZ+C1YAnKhYTk6/TLuhIgCypLthPA==
+parse-url@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-6.0.0.tgz#f5dd262a7de9ec00914939220410b66cff09107d"
+  integrity sha512-cYyojeX7yIIwuJzledIHeLUBVJ6COVLeT4eF+2P6aKVzwvgKQPndCBv3+yQ7pcWjqToYwaligxzSYNNmGoMAvw==
   dependencies:
     is-ssh "^1.3.0"
-    normalize-url "^3.3.0"
+    normalize-url "^6.1.0"
     parse-path "^4.0.0"
     protocols "^1.4.0"
 
@@ -7841,11 +7167,6 @@ pascalcase@^0.1.1:
   resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14"
   integrity sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ=
 
-path-browserify@0.0.1:
-  version "0.0.1"
-  resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-0.0.1.tgz#e6c4ddd7ed3aa27c68a20cc4e50e1a4ee83bbc4a"
-  integrity sha512-BapA40NHICOS+USX9SN4tyhq+A2RrN/Ws5F0Z5aMHDp98Fl86lX8Oti8B7uN93L4Ifv4fHOEA+pQw87gmMO/lQ==
-
 path-dirname@^1.0.0:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/path-dirname/-/path-dirname-1.0.2.tgz#cc33d24d525e099a5388c0336c6e32b9160609e0"
@@ -7873,7 +7194,7 @@ path-is-absolute@^1.0.0:
   resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
   integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
 
-path-key@^2.0.0, path-key@^2.0.1:
+path-key@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40"
   integrity sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=
@@ -7884,9 +7205,9 @@ path-key@^3.0.0, path-key@^3.1.0:
   integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
 
 path-parse@^1.0.6:
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c"
-  integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735"
+  integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==
 
 path-root-regex@^0.1.0:
   version "0.1.2"
@@ -7921,26 +7242,15 @@ path-type@^4.0.0:
   resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
   integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
 
-pbkdf2@^3.0.3:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/pbkdf2/-/pbkdf2-3.1.1.tgz#cb8724b0fada984596856d1a6ebafd3584654b94"
-  integrity sha512-4Ejy1OPxi9f2tt1rRV7Go7zmfDQ+ZectEQz3VGUQhgq62HtIRPDyG/JtnwIxs6x3uNMwo2V7q1fMvKjb+Tnpqg==
-  dependencies:
-    create-hash "^1.1.2"
-    create-hmac "^1.1.4"
-    ripemd160 "^2.0.1"
-    safe-buffer "^5.0.1"
-    sha.js "^2.4.8"
-
 performance-now@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
   integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
 
-picomatch@^2.0.4, picomatch@^2.0.5, picomatch@^2.2.1:
-  version "2.2.2"
-  resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.2.2.tgz#21f333e9b6b8eaff02468f5146ea406d345f4dad"
-  integrity sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg==
+picomatch@^2.0.4, picomatch@^2.2.3:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.0.tgz#f1f061de8f6a4bf022892e2d128234fb98302972"
+  integrity sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==
 
 pidtree@^0.3.0:
   version "0.3.1"
@@ -7962,6 +7272,11 @@ pify@^4.0.1:
   resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231"
   integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==
 
+pify@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/pify/-/pify-5.0.0.tgz#1f5eca3f5e87ebec28cc6d54a0e4aaf00acc127f"
+  integrity sha512-eW/gHNMlxdSP6dmG6uJip6FXN0EQBwm2clYYd8Wul42Cwu/DK8HEftzsapcNdYe2MfLiIwZqsDk2RDEsTE79hA==
+
 pinkie-promise@^2.0.0:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/pinkie-promise/-/pinkie-promise-2.0.1.tgz#2135d6dfa7a358c069ac9b178776288228450ffa"
@@ -7981,14 +7296,7 @@ pirates@^4.0.1:
   dependencies:
     node-modules-regexp "^1.0.0"
 
-pkg-dir@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-3.0.0.tgz#2749020f239ed990881b1f71210d51eb6523bea3"
-  integrity sha512-/E57AYkoeQ25qkxMj5PBOVgF8Kiu/h7cYS30Z5+R7WaiCCBfLq58ZI/dSeaEKb9WVJV5n/03QwrN3IeWIFllvw==
-  dependencies:
-    find-up "^3.0.0"
-
-pkg-dir@^4.1.0, pkg-dir@^4.2.0:
+pkg-dir@^4.2.0:
   version "4.2.0"
   resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3"
   integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==
@@ -8015,20 +7323,29 @@ posix-character-classes@^0.1.0:
   resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab"
   integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=
 
+postcss@^7.0.16:
+  version "7.0.36"
+  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.36.tgz#056f8cffa939662a8f5905950c07d5285644dfcb"
+  integrity sha512-BebJSIUMwJHRH0HAQoxN4u1CN86glsrwsW0q7T+/m44eXOUAxSNdHRkNZPYz5vVUbg17hFgOQDE7fZk7li3pZw==
+  dependencies:
+    chalk "^2.4.2"
+    source-map "^0.6.1"
+    supports-color "^6.1.0"
+
+prelude-ls@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
+  integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==
+
 prelude-ls@~1.1.2:
   version "1.1.2"
   resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
   integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
 
-pretty-format@^25.2.1, pretty-format@^25.5.0:
-  version "25.5.0"
-  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-25.5.0.tgz#7873c1d774f682c34b8d48b6743a2bf2ac55791a"
-  integrity sha512-kbo/kq2LQ/A/is0PQwsEHM7Ca6//bGPPvU6UnsdDRSKTWxT/ru/xb88v4BJf6a69H+uTytOEsTusT9ksd/1iWQ==
-  dependencies:
-    "@jest/types" "^25.5.0"
-    ansi-regex "^5.0.0"
-    ansi-styles "^4.0.0"
-    react-is "^16.12.0"
+prettier@^2.1.2:
+  version "2.3.2"
+  resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.3.2.tgz#ef280a05ec253712e486233db5c6f23441e7342d"
+  integrity sha512-lnJzDfJ66zkMy58OL5/NY5zp70S7Nz6KqcKkXYzn2tMVrNxvbqaBpg7H3qHaLxCJ5lNMsGuM8+ohS7cZrthdLQ==
 
 pretty-format@^26.0.0, pretty-format@^26.6.2:
   version "26.6.2"
@@ -8040,27 +7357,27 @@ pretty-format@^26.0.0, pretty-format@^26.6.2:
     ansi-styles "^4.0.0"
     react-is "^17.0.1"
 
+pretty-format@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.0.6.tgz#ab770c47b2c6f893a21aefc57b75da63ef49a11f"
+  integrity sha512-8tGD7gBIENgzqA+UBzObyWqQ5B778VIFZA/S66cclyd5YkFLYs2Js7gxDKf0MXtTc9zcS7t1xhdfcElJ3YIvkQ==
+  dependencies:
+    "@jest/types" "^27.0.6"
+    ansi-regex "^5.0.0"
+    ansi-styles "^5.0.0"
+    react-is "^17.0.1"
+
 pretty-hrtime@^1.0.0:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz#b7e3ea42435a4c9b2759d99e0f201eb195802ee1"
   integrity sha1-t+PqQkNaTJsnWdmeDyAesZWALuE=
 
-process-nextick-args@^1.0.7:
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-1.0.7.tgz#150e20b756590ad3f91093f25a4f2ad8bff30ba3"
-  integrity sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=
-
 process-nextick-args@^2.0.0, process-nextick-args@~2.0.0:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
   integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
 
-process@^0.11.10:
-  version "0.11.10"
-  resolved "https://registry.yarnpkg.com/process/-/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182"
-  integrity sha1-czIwDoQBYb2j5podHZGn1LwW8YI=
-
-progress@^2.0.3:
+progress@^2.0.0, progress@^2.0.3:
   version "2.0.3"
   resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
   integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
@@ -8070,13 +7387,13 @@ promise-inflight@^1.0.1:
   resolved "https://registry.yarnpkg.com/promise-inflight/-/promise-inflight-1.0.1.tgz#98472870bf228132fcbdd868129bad12c3c029e3"
   integrity sha1-mEcocL8igTL8vdhoEputEsPAKeM=
 
-promise-retry@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/promise-retry/-/promise-retry-1.1.1.tgz#6739e968e3051da20ce6497fb2b50f6911df3d6d"
-  integrity sha1-ZznpaOMFHaIM5kl/srUPaRHfPW0=
+promise-retry@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/promise-retry/-/promise-retry-2.0.1.tgz#ff747a13620ab57ba688f5fc67855410c370da22"
+  integrity sha512-y+WKFlBR8BGXnsNlIHFGPZmyDf3DFMoLhaflAnyZgV6rG6xu+JwesTo2Q9R6XwYmtmwAFCkAk3e35jEdoeh/3g==
   dependencies:
-    err-code "^1.0.0"
-    retry "^0.10.0"
+    err-code "^2.0.2"
+    retry "^0.12.0"
 
 promise@^8.0.1:
   version "8.1.0"
@@ -8110,35 +7427,11 @@ protocols@^1.1.0, protocols@^1.4.0:
   resolved "https://registry.yarnpkg.com/protocols/-/protocols-1.4.8.tgz#48eea2d8f58d9644a4a32caae5d5db290a075ce8"
   integrity sha512-IgjKyaUSjsROSO8/D49Ab7hP8mJgTYcqApOqdPhLoPxAplXmkp+zRvsrSQjFn5by0rhm4VH0GAUELIPpx7B1yg==
 
-protoduck@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/protoduck/-/protoduck-5.0.1.tgz#03c3659ca18007b69a50fd82a7ebcc516261151f"
-  integrity sha512-WxoCeDCoCBY55BMvj4cAEjdVUFGRWed9ZxPlqTKYyw1nDDTQ4pqmnIMAGfJlg7Dx35uB/M+PHJPTmGOvaCaPTg==
-  dependencies:
-    genfun "^5.0.0"
-
-prr@~1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476"
-  integrity sha1-0/wRS6BplaRexok/SEzrHXj19HY=
-
 psl@^1.1.28, psl@^1.1.33:
   version "1.8.0"
   resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
   integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==
 
-public-encrypt@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/public-encrypt/-/public-encrypt-4.0.3.tgz#4fcc9d77a07e48ba7527e7cbe0de33d0701331e0"
-  integrity sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==
-  dependencies:
-    bn.js "^4.1.0"
-    browserify-rsa "^4.0.0"
-    create-hash "^1.1.0"
-    parse-asn1 "^5.0.0"
-    randombytes "^2.0.1"
-    safe-buffer "^5.1.2"
-
 pump@^2.0.0:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/pump/-/pump-2.0.1.tgz#12399add6e4cf7526d973cbc8b5ce2e2908b3909"
@@ -8147,15 +7440,7 @@ pump@^2.0.0:
     end-of-stream "^1.1.0"
     once "^1.3.1"
 
-pump@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64"
-  integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==
-  dependencies:
-    end-of-stream "^1.1.0"
-    once "^1.3.1"
-
-pumpify@^1.3.3, pumpify@^1.3.5:
+pumpify@^1.3.5:
   version "1.5.1"
   resolved "https://registry.yarnpkg.com/pumpify/-/pumpify-1.5.1.tgz#36513be246ab27570b1a374a5ce278bfd74370ce"
   integrity sha512-oClZI37HvuUJJxSKKrC17bZ9Cu0ZYhEAGPsPUy9KlMUmv9dKX2o77RUmq7f3XjIxbwyGwYzbzQ1L2Ks8sIradQ==
@@ -8164,16 +7449,6 @@ pumpify@^1.3.3, pumpify@^1.3.5:
     inherits "^2.0.3"
     pump "^2.0.0"
 
-punycode@1.3.2:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d"
-  integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=
-
-punycode@^1.2.4:
-  version "1.4.1"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
-  integrity sha1-wNWmOycYgArY4esPpSachN1BhF4=
-
 punycode@^2.1.0, punycode@^2.1.1:
   version "2.1.1"
   resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
@@ -8206,31 +7481,21 @@ query-string@^6.13.8:
     split-on-first "^1.0.0"
     strict-uri-encode "^2.0.0"
 
-querystring-es3@^0.2.0:
-  version "0.2.1"
-  resolved "https://registry.yarnpkg.com/querystring-es3/-/querystring-es3-0.2.1.tgz#9ec61f79049875707d69414596fd907a4d711e73"
-  integrity sha1-nsYfeQSYdXB9aUFFlv2Qek1xHnM=
-
-querystring@0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620"
-  integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=
-
 queue-microtask@^1.2.2:
   version "1.2.3"
   resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243"
   integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==
 
-quick-lru@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-1.1.0.tgz#4360b17c61136ad38078397ff11416e186dcfbb8"
-  integrity sha1-Q2CxfGETatOAeDl/8RQW4Ybc+7g=
-
 quick-lru@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f"
   integrity sha512-ARhCpm70fzdcvNQfPoy49IaanKkTlRWF2JMzqhcJbhSFRZv7nPTvZJdcY7301IPmvW+/p0RgIWnQDLJxifsQ7g==
 
+quick-lru@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932"
+  integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==
+
 randomatic@3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/randomatic/-/randomatic-3.1.1.tgz#b776efc59375984e36c537b2f51a1f0aff0da1ed"
@@ -8240,39 +7505,25 @@ randomatic@3.1.1:
     kind-of "^6.0.0"
     math-random "^1.0.1"
 
-randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
-  integrity sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==
-  dependencies:
-    safe-buffer "^5.1.0"
-
-randomfill@^1.0.3:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/randomfill/-/randomfill-1.0.4.tgz#c92196fc86ab42be983f1bf31778224931d61458"
-  integrity sha512-87lcbR8+MhcWcUiQ+9e+Rwx8MyR2P7qnt15ynUlbm3TU/fjbgz4GsvfSUDTemtCCtVCqb4ZcEFlyPNTh9bBTLw==
-  dependencies:
-    randombytes "^2.0.5"
-    safe-buffer "^5.1.0"
-
-react-is@^16.12.0:
-  version "16.13.1"
-  resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4"
-  integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==
-
 react-is@^17.0.1:
   version "17.0.2"
   resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0"
   integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==
 
-read-cmd-shim@^1.0.1:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/read-cmd-shim/-/read-cmd-shim-1.0.5.tgz#87e43eba50098ba5a32d0ceb583ab8e43b961c16"
-  integrity sha512-v5yCqQ/7okKoZZkBQUAfTsQ3sVJtXdNfbPnI5cceppoxEVLYA3k+VtV2omkeo8MS94JCy4fSiUwlRBAwCVRPUA==
+read-cmd-shim@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/read-cmd-shim/-/read-cmd-shim-2.0.0.tgz#4a50a71d6f0965364938e9038476f7eede3928d9"
+  integrity sha512-HJpV9bQpkl6KwjxlJcBoqu9Ba0PQg8TqSNIOrulGt54a0uup0HtevreFHzYzkm0lpnleRdNBzXznKrgxglEHQw==
+
+read-package-json-fast@^2.0.1:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/read-package-json-fast/-/read-package-json-fast-2.0.2.tgz#2dcb24d9e8dd50fb322042c8c35a954e6cc7ac9e"
+  integrity sha512-5fyFUyO9B799foVk4n6ylcoAktG/FbE3jwRKxvwaeSrIunaoMc0u81dzXxjeAFKOce7O5KncdfwpGvvs6r5PsQ==
   dependencies:
-    graceful-fs "^4.1.2"
+    json-parse-even-better-errors "^2.3.0"
+    npm-normalize-package-bin "^1.0.1"
 
-"read-package-json@1 || 2", read-package-json@^2.0.0, read-package-json@^2.0.13:
+read-package-json@^2.0.0:
   version "2.1.2"
   resolved "https://registry.yarnpkg.com/read-package-json/-/read-package-json-2.1.2.tgz#6992b2b66c7177259feb8eaac73c3acd28b9222a"
   integrity sha512-D1KmuLQr6ZSJS0tW8hf3WGpRlwszJOXZ3E8Yd/DNRaM5d+1wVRZdHlpGBLAuovjr28LbWvjpWkBHMxpRGGjzNA==
@@ -8282,7 +7533,17 @@ read-cmd-shim@^1.0.1:
     normalize-package-data "^2.0.0"
     npm-normalize-package-bin "^1.0.0"
 
-read-package-tree@^5.1.6:
+read-package-json@^3.0.0, read-package-json@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/read-package-json/-/read-package-json-3.0.1.tgz#c7108f0b9390257b08c21e3004d2404c806744b9"
+  integrity sha512-aLcPqxovhJTVJcsnROuuzQvv6oziQx4zd3JvG0vGCL5MjTONUc4uJ90zCBC6R7W7oUKBNoR/F8pkyfVwlbxqng==
+  dependencies:
+    glob "^7.1.1"
+    json-parse-even-better-errors "^2.3.0"
+    normalize-package-data "^3.0.0"
+    npm-normalize-package-bin "^1.0.0"
+
+read-package-tree@^5.3.1:
   version "5.3.1"
   resolved "https://registry.yarnpkg.com/read-package-tree/-/read-package-tree-5.3.1.tgz#a32cb64c7f31eb8a6f31ef06f9cedf74068fe636"
   integrity sha512-mLUDsD5JVtlZxjSlPPx1RETkNjjvQYuweKwNVt1Sn8kP5Jh44pvYuUHCp6xSVDZWbNxVxG5lyZJ921aJH61sTw==
@@ -8316,6 +7577,15 @@ read-pkg-up@^7.0.1:
     read-pkg "^5.2.0"
     type-fest "^0.8.1"
 
+read-pkg-up@^8.0.0:
+  version "8.0.0"
+  resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-8.0.0.tgz#72f595b65e66110f43b052dd9af4de6b10534670"
+  integrity sha512-snVCqPczksT0HS2EC+SxUndvSzn6LRCwpfSvLrIfR5BKDQQZMaI6jPRC9dYvYFDRAuFEAnkwww8kBBNE/3VvzQ==
+  dependencies:
+    find-up "^5.0.0"
+    read-pkg "^6.0.0"
+    type-fest "^1.0.1"
+
 read-pkg@^1.0.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-1.1.0.tgz#f5ffaa5ecd29cb31c0474bca7d756b6bb29e3f28"
@@ -8344,6 +7614,16 @@ read-pkg@^5.2.0:
     parse-json "^5.0.0"
     type-fest "^0.6.0"
 
+read-pkg@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-6.0.0.tgz#a67a7d6a1c2b0c3cd6aa2ea521f40c458a4a504c"
+  integrity sha512-X1Fu3dPuk/8ZLsMhEj5f4wFAF0DWoK7qhGJvgaijocXxBmSToKfbFtqbxMO7bVjNA1dmE5huAzjXj/ey86iw9Q==
+  dependencies:
+    "@types/normalize-package-data" "^2.4.0"
+    normalize-package-data "^3.0.2"
+    parse-json "^5.2.0"
+    type-fest "^1.0.1"
+
 read@1, read@~1.0.1:
   version "1.0.7"
   resolved "https://registry.yarnpkg.com/read/-/read-1.0.7.tgz#b3da19bd052431a97671d44a42634adf710b40c4"
@@ -8351,7 +7631,16 @@ read@1, read@~1.0.1:
   dependencies:
     mute-stream "~0.0.4"
 
-"readable-stream@1 || 2", readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@^2.0.6, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.5, readable-stream@^2.3.6, readable-stream@~2.3.6:
+"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2, readable-stream@^3.6.0:
+  version "3.6.0"
+  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
+  integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
+  dependencies:
+    inherits "^2.0.3"
+    string_decoder "^1.1.1"
+    util-deprecate "^1.0.1"
+
+readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@^2.0.6, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.5, readable-stream@^2.3.6, readable-stream@~2.3.6:
   version "2.3.7"
   resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.7.tgz#1eca1cf711aef814c04f62252a36a62f6cb23b57"
   integrity sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==
@@ -8364,15 +7653,6 @@ read@1, read@~1.0.1:
     string_decoder "~1.1.1"
     util-deprecate "~1.0.1"
 
-"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2, readable-stream@^3.6.0:
-  version "3.6.0"
-  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
-  integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
-  dependencies:
-    inherits "^2.0.3"
-    string_decoder "^1.1.1"
-    util-deprecate "^1.0.1"
-
 readdir-scoped-modules@^1.0.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/readdir-scoped-modules/-/readdir-scoped-modules-1.1.0.tgz#8d45407b4f870a0dcaebc0e28670d18e74514309"
@@ -8392,13 +7672,6 @@ readdirp@^2.2.1:
     micromatch "^3.1.10"
     readable-stream "^2.0.2"
 
-readdirp@~3.5.0:
-  version "3.5.0"
-  resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.5.0.tgz#9ba74c019b15d365278d2e91bb8c48d7b4d42c9e"
-  integrity sha512-cMhu7c/8rdhkHXWsY+osBhfSy0JikwpHK/5+imo+LpeasTF8ouErHrlYkwT0++njiyuDvc7OFY5T3ukvZ8qmFQ==
-  dependencies:
-    picomatch "^2.2.1"
-
 rechoir@^0.6.2:
   version "0.6.2"
   resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384"
@@ -8406,22 +7679,6 @@ rechoir@^0.6.2:
   dependencies:
     resolve "^1.1.6"
 
-redent@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/redent/-/redent-1.0.0.tgz#cf916ab1fd5f1f16dfb20822dd6ec7f730c2afde"
-  integrity sha1-z5Fqsf1fHxbfsggi3W7H9zDCr94=
-  dependencies:
-    indent-string "^2.1.0"
-    strip-indent "^1.0.1"
-
-redent@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/redent/-/redent-2.0.0.tgz#c1b2007b42d57eb1389079b3c8333639d5e1ccaa"
-  integrity sha1-wbIAe0LVfrE4kHmzyDM2OdXhzKo=
-  dependencies:
-    indent-string "^3.0.0"
-    strip-indent "^2.0.0"
-
 redent@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f"
@@ -8430,10 +7687,18 @@ redent@^3.0.0:
     indent-string "^4.0.0"
     strip-indent "^3.0.0"
 
-reduce-flatten@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/reduce-flatten/-/reduce-flatten-1.0.1.tgz#258c78efd153ddf93cb561237f61184f3696e327"
-  integrity sha1-JYx479FT3fk8tWEjf2EYTzaW4yc=
+redent@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/redent/-/redent-4.0.0.tgz#0c0ba7caabb24257ab3bb7a4fd95dd1d5c5681f9"
+  integrity sha512-tYkDkVVtYkSVhuQ4zBgfvciymHaeuel+zFKXShfDnFP5SyVEP7qo70Rf1jTOTCx3vGNAbnEi/xFkcfQVMIBWag==
+  dependencies:
+    indent-string "^5.0.0"
+    strip-indent "^4.0.0"
+
+reduce-flatten@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/reduce-flatten/-/reduce-flatten-2.0.0.tgz#734fd84e65f375d7ca4465c69798c25c9d10ae27"
+  integrity sha512-EJ4UNY/U1t2P/2k6oqotuX2Cc3T6nxJwsM0N0asT7dhrtH1ltUxDn4NalSYmPE2rCkVpcf/X6R0wDwcFpzhd4w==
 
 regex-not@^1.0.0, regex-not@^1.0.2:
   version "1.0.2"
@@ -8443,6 +7708,11 @@ regex-not@^1.0.0, regex-not@^1.0.2:
     extend-shallow "^3.0.2"
     safe-regex "^1.1.0"
 
+regexpp@^3.1.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.2.0.tgz#0425a2768d8f23bad70ca4b90461fa2f1213e1b2"
+  integrity sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg==
+
 remove-bom-buffer@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/remove-bom-buffer/-/remove-bom-buffer-3.0.0.tgz#c2bf1e377520d324f623892e33c10cac2c252b53"
@@ -8475,13 +7745,6 @@ repeat-string@^1.5.4, repeat-string@^1.6.1:
   resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637"
   integrity sha1-jcrkcOHIirwtYA//Sndihtp15jc=
 
-repeating@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/repeating/-/repeating-2.0.1.tgz#5214c53a926d3552707527fbab415dbc08d06dda"
-  integrity sha1-UhTFOpJtNVJwdSf7q0FdvAjQbdo=
-  dependencies:
-    is-finite "^1.0.0"
-
 replace-ext@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/replace-ext/-/replace-ext-1.0.1.tgz#2d6d996d04a15855d967443631dd5f77825b016a"
@@ -8496,23 +7759,7 @@ replace-homedir@^1.0.0:
     is-absolute "^1.0.0"
     remove-trailing-separator "^1.1.0"
 
-request-promise-core@1.1.4:
-  version "1.1.4"
-  resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.4.tgz#3eedd4223208d419867b78ce815167d10593a22f"
-  integrity sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==
-  dependencies:
-    lodash "^4.17.19"
-
-request-promise-native@^1.0.9:
-  version "1.0.9"
-  resolved "https://registry.yarnpkg.com/request-promise-native/-/request-promise-native-1.0.9.tgz#e407120526a5efdc9a39b28a5679bf47b9d9dc28"
-  integrity sha512-wcW+sIUiWnKgNY0dqCpOZkUbF/I+YPi+f09JZIDa39Ec+q82CpSYniDp+ISgTTbKmnpJWASeJBPZmoxH84wt3g==
-  dependencies:
-    request-promise-core "1.1.4"
-    stealthy-require "^1.1.1"
-    tough-cookie "^2.3.3"
-
-request@^2.86.0, request@^2.88.0, request@^2.88.2:
+request@^2.88.0, request@^2.88.2:
   version "2.88.2"
   resolved "https://registry.yarnpkg.com/request/-/request-2.88.2.tgz#d73c918731cb5a87da047e207234146f664d12b3"
   integrity sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==
@@ -8543,23 +7790,16 @@ require-directory@^2.1.1:
   resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
   integrity sha1-jGStX9MNqxyXbiNE/+f3kqam30I=
 
+require-from-string@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909"
+  integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==
+
 require-main-filename@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-1.0.1.tgz#97f717b69d48784f5f526a6c5aa8ffdda055a4d1"
   integrity sha1-l/cXtp1IeE9fUmpsWqj/3aBVpNE=
 
-require-main-filename@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-2.0.0.tgz#d0b329ecc7cc0f61649f62215be69af54aa8989b"
-  integrity sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==
-
-resolve-cwd@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-2.0.0.tgz#00a9f7387556e27038eae232caa372a6a59b665a"
-  integrity sha1-AKn3OHVW4nA46uIyyqNypqWbZlo=
-  dependencies:
-    resolve-from "^3.0.0"
-
 resolve-cwd@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-3.0.0.tgz#0f0075f1bb2544766cf73ba6a6e2adfebcb13f2d"
@@ -8575,11 +7815,6 @@ resolve-dir@^1.0.0, resolve-dir@^1.0.1:
     expand-tilde "^2.0.0"
     global-modules "^1.0.0"
 
-resolve-from@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-3.0.0.tgz#b22c7af7d9d6881bc8b6e653335eebcb0a188748"
-  integrity sha1-six699nWiBvItuZTM17rywoYh0g=
-
 resolve-from@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"
@@ -8602,7 +7837,7 @@ resolve-url@^0.2.1:
   resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a"
   integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=
 
-resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.18.1, resolve@^1.20.0, resolve@^1.3.2, resolve@^1.4.0:
+resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.20.0, resolve@^1.4.0:
   version "1.20.0"
   resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975"
   integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==
@@ -8610,12 +7845,12 @@ resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.18.1, resolve@^1.20.
     is-core-module "^2.2.0"
     path-parse "^1.0.6"
 
-restore-cursor@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-2.0.0.tgz#9f7ee287f82fd326d4fd162923d62129eee0dfaf"
-  integrity sha1-n37ih/gv0ybU/RYpI9YhKe7g368=
+restore-cursor@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-3.1.0.tgz#39f67c54b3a7a58cea5236d95cf0034239631f7e"
+  integrity sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA==
   dependencies:
-    onetime "^2.0.0"
+    onetime "^5.1.0"
     signal-exit "^3.0.2"
 
 ret@~0.1.10:
@@ -8623,17 +7858,17 @@ ret@~0.1.10:
   resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc"
   integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==
 
-retry@^0.10.0:
-  version "0.10.1"
-  resolved "https://registry.yarnpkg.com/retry/-/retry-0.10.1.tgz#e76388d217992c252750241d3d3956fed98d8ff4"
-  integrity sha1-52OI0heZLCUnUCQdPTlW/tmNj/Q=
+retry@^0.12.0:
+  version "0.12.0"
+  resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b"
+  integrity sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs=
 
 reusify@^1.0.4:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76"
   integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==
 
-rimraf@^2.5.4, rimraf@^2.6.2, rimraf@^2.6.3:
+rimraf@^2.6.3:
   version "2.7.1"
   resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
   integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==
@@ -8647,20 +7882,7 @@ rimraf@^3.0.0, rimraf@^3.0.2:
   dependencies:
     glob "^7.1.3"
 
-ripemd160@^2.0.0, ripemd160@^2.0.1:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/ripemd160/-/ripemd160-2.0.2.tgz#a1c1a6f624751577ba5d07914cbc92850585890c"
-  integrity sha512-ii4iagi25WusVoiC4B4lq7pbXfAp3D9v5CwfkY33vffw2+pkDjY1D8GaN7spsxvCSx8dkPqOZCEZyfxcmJG2IA==
-  dependencies:
-    hash-base "^3.0.0"
-    inherits "^2.0.1"
-
-rsvp@^4.8.4:
-  version "4.8.5"
-  resolved "https://registry.yarnpkg.com/rsvp/-/rsvp-4.8.5.tgz#c8f155311d167f68f21e168df71ec5b083113734"
-  integrity sha512-nfMOlASu9OnRJo1mbEk2cz0D56a1MBNrJ7orjRZQG10XDyuvwksKbuXNp6qa+kbn839HwjwhBzhFmdsaEAfauA==
-
-run-async@^2.2.0:
+run-async@^2.4.0:
   version "2.4.1"
   resolved "https://registry.yarnpkg.com/run-async/-/run-async-2.4.1.tgz#8440eccf99ea3e70bd409d49aab88e10c189a455"
   integrity sha512-tvVnVv01b8c1RrA6Ep7JkStj85Guv/YrMcwqYQnwjsAS2cTmmPGBBjAjpCW7RrSodNSoE2/qg9O4bceNvUuDgQ==
@@ -8672,28 +7894,21 @@ run-parallel@^1.1.9:
   dependencies:
     queue-microtask "^1.2.2"
 
-run-queue@^1.0.0, run-queue@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/run-queue/-/run-queue-1.0.3.tgz#e848396f057d223f24386924618e25694161ec47"
-  integrity sha1-6Eg5bwV9Ij8kOGkkYY4laUFh7Ec=
-  dependencies:
-    aproba "^1.1.1"
-
-rxjs@5.5.11:
-  version "5.5.11"
-  resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-5.5.11.tgz#f733027ca43e3bec6b994473be4ab98ad43ced87"
-  integrity sha512-3bjO7UwWfA2CV7lmwYMBzj4fQ6Cq+ftHc2MvUe+WMS7wcdJ1LosDWmdjPQanYp2dBRj572p7PeU81JUxHKOcBA==
+rxjs@7.2.0:
+  version "7.2.0"
+  resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-7.2.0.tgz#5cd12409639e9514a71c9f5f9192b2c4ae94de31"
+  integrity sha512-aX8w9OpKrQmiPKfT1bqETtUr9JygIz6GZ+gql8v7CijClsP0laoFUdKzxFAoWuRdSlOdU2+crss+cMf+cqMTnw==
   dependencies:
-    symbol-observable "1.0.1"
+    tslib "~2.1.0"
 
-rxjs@^6.4.0:
+rxjs@^6.6.0:
   version "6.6.7"
   resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-6.6.7.tgz#90ac018acabf491bf65044235d5863c4dab804c9"
   integrity sha512-hTdwr+7yYNIT5n4AMYp85KA6yw2Va0FLa3Rguvbpa4W3I5xynaBZo41cM3XM+4Q6fRMj3sBYIR1VAmZMXYJvRQ==
   dependencies:
     tslib "^1.9.0"
 
-safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@^5.1.2, safe-buffer@^5.2.0, safe-buffer@~5.2.0:
+safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.2, safe-buffer@~5.2.0:
   version "5.2.1"
   resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
   integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
@@ -8715,21 +7930,6 @@ safe-regex@^1.1.0:
   resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
   integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
 
-sane@^4.0.3:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/sane/-/sane-4.1.0.tgz#ed881fd922733a6c461bc189dc2b6c006f3ffded"
-  integrity sha512-hhbzAgTIX8O7SHfp2c8/kREfEn4qO/9q8C9beyY6+tvZ87EpoZ3i1RIEvp27YBswnNbY9mWd6paKVmKbAgLfZA==
-  dependencies:
-    "@cnakazawa/watch" "^1.0.3"
-    anymatch "^2.0.0"
-    capture-exit "^2.0.0"
-    exec-sh "^0.3.2"
-    execa "^1.0.0"
-    fb-watchman "^2.0.0"
-    micromatch "^3.1.4"
-    minimist "^1.1.1"
-    walker "~1.0.5"
-
 sax@>=0.6.0:
   version "1.2.4"
   resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
@@ -8742,32 +7942,6 @@ saxes@^5.0.1:
   dependencies:
     xmlchars "^2.2.0"
 
-schema-utils@^0.4.4:
-  version "0.4.7"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-0.4.7.tgz#ba74f597d2be2ea880131746ee17d0a093c68187"
-  integrity sha512-v/iwU6wvwGK8HbU9yi3/nhGzP0yGSuhQMzL6ySiec1FSrZZDkhm4noOSWzrNFo/jEc+SJY6jRTwuwbSXJPDUnQ==
-  dependencies:
-    ajv "^6.1.0"
-    ajv-keywords "^3.1.0"
-
-schema-utils@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-1.0.0.tgz#0b79a93204d7b600d4b2850d1f66c2a34951c770"
-  integrity sha512-i27Mic4KovM/lnGsy8whRCHhc7VicJajAjTrYg11K9zfZXnYIt4k5F+kZkwjnrhKzLic/HLU4j11mjsz2G/75g==
-  dependencies:
-    ajv "^6.1.0"
-    ajv-errors "^1.0.0"
-    ajv-keywords "^3.1.0"
-
-schema-utils@^2.7.1:
-  version "2.7.1"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-2.7.1.tgz#1ca4f32d1b24c590c203b8e7a50bf0ea4cd394d7"
-  integrity sha512-SHiNtMOUGWBQJwzISiVYKu82GiV4QYGePp3odlY1tuKO7gPtphAT5R/py0fA6xtbgLL/RvtJZnU9b8s0F1q0Xg==
-  dependencies:
-    "@types/json-schema" "^7.0.5"
-    ajv "^6.12.4"
-    ajv-keywords "^3.5.2"
-
 semver-greatest-satisfied-range@^1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/semver-greatest-satisfied-range/-/semver-greatest-satisfied-range-1.1.0.tgz#13e8c2658ab9691cb0cd71093240280d36f77a5b"
@@ -8775,37 +7949,23 @@ semver-greatest-satisfied-range@^1.1.0:
   dependencies:
     sver-compat "^1.5.0"
 
-"semver@2 || 3 || 4 || 5", "semver@2.x || 3.x || 4 || 5", semver@^5.3.0, semver@^5.4.1, semver@^5.5.0, semver@^5.5.1, semver@^5.6.0, semver@^5.7.0, semver@^5.7.1:
+"semver@2 || 3 || 4 || 5", semver@^5.5.0, semver@^5.6.0, semver@^5.7.1:
   version "5.7.1"
   resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7"
   integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==
 
-semver@7.x, semver@^7.3.2, semver@^7.3.4:
+semver@7.x, semver@^7.1.1, semver@^7.1.3, semver@^7.2.1, semver@^7.3.2, semver@^7.3.4, semver@^7.3.5:
   version "7.3.5"
   resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.5.tgz#0b621c879348d8998e4b0e4be94b3f12e6018ef7"
   integrity sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==
   dependencies:
     lru-cache "^6.0.0"
 
-semver@^6.0.0, semver@^6.2.0, semver@^6.3.0:
+semver@^6.0.0, semver@^6.3.0:
   version "6.3.0"
   resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
   integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
 
-serialize-javascript@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-4.0.0.tgz#b525e1238489a5ecfc42afacc3fe99e666f4b1aa"
-  integrity sha512-GaNA54380uFefWghODBWEGisLZFj00nS5ACs6yHa9nLqlLpVLO8ChDGeKRjZnV4Nh4n0Qi7nhYZD/9fCPzEqkw==
-  dependencies:
-    randombytes "^2.1.0"
-
-serialize-javascript@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-5.0.1.tgz#7886ec848049a462467a97d3d918ebb2aaf934f4"
-  integrity sha512-SaaNal9imEO737H2c05Og0/8LUXG7EnsZyMa8MzkmuHoELfT6txuj0cMqRj6zfPKnmQ1yasR4PCJc8x+M4JSPA==
-  dependencies:
-    randombytes "^2.1.0"
-
 set-blocking@^2.0.0, set-blocking@~2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
@@ -8821,19 +7981,6 @@ set-value@^2.0.0, set-value@^2.0.1:
     is-plain-object "^2.0.3"
     split-string "^3.0.1"
 
-setimmediate@^1.0.4:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/setimmediate/-/setimmediate-1.0.5.tgz#290cbb232e306942d7d7ea9b83732ab7856f8285"
-  integrity sha1-KQy7Iy4waULX1+qbg3Mqt4VvgoU=
-
-sha.js@^2.4.0, sha.js@^2.4.8:
-  version "2.4.11"
-  resolved "https://registry.yarnpkg.com/sha.js/-/sha.js-2.4.11.tgz#37a5cf0b81ecbc6943de109ba2960d1b26584ae7"
-  integrity sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==
-  dependencies:
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
 shallow-clone@^3.0.0:
   version "3.0.1"
   resolved "https://registry.yarnpkg.com/shallow-clone/-/shallow-clone-3.0.1.tgz#8f2981ad92531f55035b01fb230769a40e02efa3"
@@ -8865,49 +8012,19 @@ shebang-regex@^3.0.0:
   resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
   integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
 
-shell-quote@^1.6.1:
-  version "1.7.2"
-  resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.7.2.tgz#67a7d02c76c9da24f99d20808fcaded0e0e04be2"
-  integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg==
-
-shelljs@^0.8.4:
-  version "0.8.4"
-  resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.4.tgz#de7684feeb767f8716b326078a8a00875890e3c2"
-  integrity sha512-7gk3UZ9kOfPLIAbslLzyWeGiEqx9e3rxwZM0KE6EL8GlGwjym9Mrlx5/p33bWTu9YG6vcS4MBxYZDHYr5lr8BQ==
-  dependencies:
-    glob "^7.0.0"
-    interpret "^1.0.0"
-    rechoir "^0.6.2"
-
-shellwords@^0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/shellwords/-/shellwords-0.1.1.tgz#d6b9181c1a48d397324c84871efbcfc73fc0654b"
-  integrity sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww==
-
-shiki-languages@^0.2.7:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/shiki-languages/-/shiki-languages-0.2.7.tgz#7230b675b96d37a36ac1bf995525375ce69f3924"
-  integrity sha512-REmakh7pn2jCn9GDMRSK36oDgqhh+rSvJPo77sdWTOmk44C5b0XlYPwJZcFOMJWUZJE0c7FCbKclw4FLwUKLRw==
-  dependencies:
-    vscode-textmate "^5.2.0"
-
-shiki-themes@^0.2.7:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/shiki-themes/-/shiki-themes-0.2.7.tgz#6e04451d832152e0fc969876a7bd926b3963c1f2"
-  integrity sha512-ZMmboDYw5+SEpugM8KGUq3tkZ0vXg+k60XX6NngDK7gc1Sv6YLUlanpvG3evm57uKJvfXsky/S5MzSOTtYKLjA==
-  dependencies:
-    json5 "^2.1.0"
-    vscode-textmate "^5.2.0"
+shell-quote@^1.6.1:
+  version "1.7.2"
+  resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.7.2.tgz#67a7d02c76c9da24f99d20808fcaded0e0e04be2"
+  integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg==
 
-shiki@^0.2.7:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.2.7.tgz#d2547548ed8742673730e1e4bbe792a77c445540"
-  integrity sha512-bwVc7cdtYYHEO9O+XJ8aNOskKRfaQd5Y4ovLRfbQkmiLSUaR+bdlssbZUUhbQ0JAFMYcTcJ5tjG5KtnufttDHQ==
+shiki@^0.9.3:
+  version "0.9.5"
+  resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.9.5.tgz#c8da81a05fbfd1810729c6873901a729a72ec541"
+  integrity sha512-XFn+rl3wIowDjzdr5DlHoHgQphXefgUTs2bNp/bZu4WF9gTrTLnKwio3f28VjiFG6Jpip7yQn/p4mMj6OrjrtQ==
   dependencies:
+    json5 "^2.2.0"
     onigasm "^2.2.5"
-    shiki-languages "^0.2.7"
-    shiki-themes "^0.2.7"
-    vscode-textmate "^5.2.0"
+    vscode-textmate "5.2.0"
 
 side-channel@^1.0.4:
   version "1.0.4"
@@ -8918,7 +8035,7 @@ side-channel@^1.0.4:
     get-intrinsic "^1.0.2"
     object-inspect "^1.9.0"
 
-signal-exit@^3.0.0, signal-exit@^3.0.2:
+signal-exit@^3.0.0, signal-exit@^3.0.2, signal-exit@^3.0.3:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.3.tgz#a1410c2edd8f077b08b4e253c8eacfcaf057461c"
   integrity sha512-VUJ49FC8U1OxwZLxIbTTrDvLnf/6TDgxZcK8wxR8zs13xpx7xbG60ndBlhNrFi2EMuFRoeDoJO7wthSLq42EjA==
@@ -8938,6 +8055,15 @@ slash@^3.0.0:
   resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634"
   integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==
 
+slice-ansi@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-4.0.0.tgz#500e8dd0fd55b05815086255b3195adf2a45fe6b"
+  integrity sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==
+  dependencies:
+    ansi-styles "^4.0.0"
+    astral-regex "^2.0.0"
+    is-fullwidth-code-point "^3.0.0"
+
 slide@^1.1.6:
   version "1.1.6"
   resolved "https://registry.yarnpkg.com/slide/-/slide-1.1.6.tgz#56eb027d65b4d2dce6cb2e2d32c4d4afc9e1d707"
@@ -8978,20 +8104,21 @@ snapdragon@^0.8.1:
     source-map-resolve "^0.5.0"
     use "^3.1.0"
 
-socks-proxy-agent@^4.0.0:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-4.0.2.tgz#3c8991f3145b2799e70e11bd5fbc8b1963116386"
-  integrity sha512-NT6syHhI9LmuEMSK6Kd2V7gNv5KFZoLE7V5udWmn0de+3Mkj3UMA/AJPLyeNUVmElCurSHtUdM3ETpR3z770Wg==
+socks-proxy-agent@^5.0.0:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-5.0.1.tgz#032fb583048a29ebffec2e6a73fca0761f48177e"
+  integrity sha512-vZdmnjb9a2Tz6WEQVIurybSwElwPxMZaIc7PzqbJTrezcKNznv6giT7J7tZDZ1BojVaa1jvO/UiUdhDVB0ACoQ==
   dependencies:
-    agent-base "~4.2.1"
-    socks "~2.3.2"
+    agent-base "^6.0.2"
+    debug "4"
+    socks "^2.3.3"
 
-socks@~2.3.2:
-  version "2.3.3"
-  resolved "https://registry.yarnpkg.com/socks/-/socks-2.3.3.tgz#01129f0a5d534d2b897712ed8aceab7ee65d78e3"
-  integrity sha512-o5t52PCNtVdiOvzMry7wU4aOqYWL0PeCXRWBEiJow4/i/wr+wpsJQ9awEu1EonLIqsfGd5qSgDdxEOvCdmBEpA==
+socks@^2.3.3:
+  version "2.6.1"
+  resolved "https://registry.yarnpkg.com/socks/-/socks-2.6.1.tgz#989e6534a07cf337deb1b1c94aaa44296520d30e"
+  integrity sha512-kLQ9N5ucj8uIcxrDwjm0Jsqk06xdpBjGNQtpXy4Q8/QY2k+fY7nZH8CARy+hkbG+SGAovmzzuauCpBlb8FrnBA==
   dependencies:
-    ip "1.1.5"
+    ip "^1.1.5"
     smart-buffer "^4.1.0"
 
 sort-keys@^2.0.0:
@@ -9001,20 +8128,14 @@ sort-keys@^2.0.0:
   dependencies:
     is-plain-obj "^1.0.0"
 
-source-list-map@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/source-list-map/-/source-list-map-2.0.1.tgz#3993bd873bfc48479cca9ea3a547835c7c154b34"
-  integrity sha512-qnQ7gVMxGNxsiL4lEuJwe/To8UnK7fAnmbGEEH8RpLouuKbeEm0lhbQVFIrNSuB+G7tVrAlVsZgETT5nljf+Iw==
-
-source-map-loader@0.2.4:
-  version "0.2.4"
-  resolved "https://registry.yarnpkg.com/source-map-loader/-/source-map-loader-0.2.4.tgz#c18b0dc6e23bf66f6792437557c569a11e072271"
-  integrity sha512-OU6UJUty+i2JDpTItnizPrlpOIBLmQbWMuBg9q5bVtnHACqw1tn9nNwqJLbv0/00JjnJb/Ee5g5WS5vrRv7zIQ==
+sort-keys@^4.0.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/sort-keys/-/sort-keys-4.2.0.tgz#6b7638cee42c506fff8c1cecde7376d21315be18"
+  integrity sha512-aUYIEU/UviqPgc8mHR6IW1EGxkAXpeRETYcrzg8cLAvUPZcpAlleSXHV2mY7G12GphSH6Gzv+4MMVSSkbdteHg==
   dependencies:
-    async "^2.5.0"
-    loader-utils "^1.1.0"
+    is-plain-obj "^2.0.0"
 
-source-map-resolve@^0.5.0, source-map-resolve@^0.5.2:
+source-map-resolve@^0.5.0:
   version "0.5.3"
   resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a"
   integrity sha512-Htz+RnsXWk5+P2slx5Jh3Q66vhQj1Cllm0zvnaY98+NFx+Dv2CF/f5O/t8x+KaNdrdIAsruNzoh/KpialbqAnw==
@@ -9025,7 +8146,15 @@ source-map-resolve@^0.5.0, source-map-resolve@^0.5.2:
     source-map-url "^0.4.0"
     urix "^0.1.0"
 
-source-map-support@^0.5.17, source-map-support@^0.5.6, source-map-support@~0.5.12, source-map-support@~0.5.19:
+source-map-resolve@^0.6.0:
+  version "0.6.0"
+  resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.6.0.tgz#3d9df87e236b53f16d01e58150fc7711138e5ed2"
+  integrity sha512-KXBr9d/fO/bWo97NXsPIAW1bFSBOuCnjbNTBMO7N59hsv5i9yzRDfcYwwt0l04+VqnKC+EwzvJZIP/qkuMgR/w==
+  dependencies:
+    atob "^2.1.2"
+    decode-uri-component "^0.2.0"
+
+source-map-support@^0.5.17, source-map-support@^0.5.6:
   version "0.5.19"
   resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61"
   integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==
@@ -9043,12 +8172,12 @@ source-map@^0.5.0, source-map@^0.5.1, source-map@^0.5.6:
   resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"
   integrity sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=
 
-source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.0, source-map@~0.6.1:
+source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.1:
   version "0.6.1"
   resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
   integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
 
-source-map@^0.7.3, source-map@~0.7.2:
+source-map@^0.7.3:
   version "0.7.3"
   resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.3.tgz#5302f8169031735226544092e64981f751750383"
   integrity sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==
@@ -9080,9 +8209,9 @@ spdx-expression-parse@^3.0.0:
     spdx-license-ids "^3.0.0"
 
 spdx-license-ids@^3.0.0:
-  version "3.0.7"
-  resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.7.tgz#e9c18a410e5ed7e12442a549fbd8afa767038d65"
-  integrity sha512-U+MTEOO0AiDzxwFvoa4JVnMV6mZlJKk2sBLt90s7G0Gd0Mlknc7kxEn3nuDPNZRta7O2uy8oLcZLVT+4sqNZHQ==
+  version "3.0.9"
+  resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.9.tgz#8a595135def9592bda69709474f1cbeea7c2467f"
+  integrity sha512-Ki212dKK4ogX+xDo4CtOZBVIwhsKBEfsEEcwmJfLQzirgc2jIWdzg40Unxz/HzEUqM1WFzVlQSMF9kZZ2HboLQ==
 
 split-on-first@^1.0.0:
   version "1.1.0"
@@ -9096,13 +8225,6 @@ split-string@^3.0.1, split-string@^3.0.2:
   dependencies:
     extend-shallow "^3.0.0"
 
-split2@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/split2/-/split2-2.2.0.tgz#186b2575bcf83e85b7d18465756238ee4ee42493"
-  integrity sha512-RAb22TG39LhI31MbreBgIuKiIKhVsawfTgEGqKHTK87aG+ul/PB8Sqoi3I7kVdRWiCfrKxK3uo4/YUkpNvhPbw==
-  dependencies:
-    through2 "^2.0.2"
-
 split2@^3.0.0:
   version "3.2.2"
   resolved "https://registry.yarnpkg.com/split2/-/split2-3.2.2.tgz#bf2cf2a37d838312c249c89206fd7a17dd12365f"
@@ -9137,14 +8259,7 @@ sshpk@^1.7.0:
     safer-buffer "^2.0.2"
     tweetnacl "~0.14.0"
 
-ssri@^6.0.0, ssri@^6.0.1:
-  version "6.0.2"
-  resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.2.tgz#157939134f20464e7301ddba3e90ffa8f7728ac5"
-  integrity sha512-cepbSq/neFK7xB6A50KHN0xHDotYzq58wWCa5LeWqnPrHG8GzfEjO/4O8kpmcGW+oaxkvhEJCWgbgNk4/ZV93Q==
-  dependencies:
-    figgy-pudding "^3.5.1"
-
-ssri@^8.0.1:
+ssri@^8.0.0, ssri@^8.0.1:
   version "8.0.1"
   resolved "https://registry.yarnpkg.com/ssri/-/ssri-8.0.1.tgz#638e4e439e2ffbd2cd289776d5ca457c4f51a2af"
   integrity sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ==
@@ -9156,14 +8271,7 @@ stack-trace@0.0.10:
   resolved "https://registry.yarnpkg.com/stack-trace/-/stack-trace-0.0.10.tgz#547c70b347e8d32b4e108ea1a2a159e5fdde19c0"
   integrity sha1-VHxws0fo0ytOEI6hoqFZ5f3eGcA=
 
-stack-utils@^1.0.1:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/stack-utils/-/stack-utils-1.0.5.tgz#a19b0b01947e0029c8e451d5d61a498f5bb1471b"
-  integrity sha512-KZiTzuV3CnSnSvgMRrARVCj+Ht7rMbauGDK0LdVFRGyenwdylpajAp4Q0i6SX8rEmbTpMMf6ryq2gb8pPq2WgQ==
-  dependencies:
-    escape-string-regexp "^2.0.0"
-
-stack-utils@^2.0.2:
+stack-utils@^2.0.3:
   version "2.0.3"
   resolved "https://registry.yarnpkg.com/stack-utils/-/stack-utils-2.0.3.tgz#cd5f030126ff116b78ccb3c027fe302713b61277"
   integrity sha512-gL//fkxfWUsIlFL2Tl42Cl6+HFALEaB1FU76I/Fy+oZjRreP7OPMXFlGbxM7NQsI0ZpUfw76sHnv0WNYuTb7Iw==
@@ -9178,43 +8286,16 @@ static-extend@^0.1.1:
     define-property "^0.2.5"
     object-copy "^0.1.0"
 
-stealthy-require@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"
-  integrity sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=
-
-stream-browserify@^2.0.1:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/stream-browserify/-/stream-browserify-2.0.2.tgz#87521d38a44aa7ee91ce1cd2a47df0cb49dd660b"
-  integrity sha512-nX6hmklHs/gr2FuxYDltq8fJA1GDlxKQCz8O/IM4atRqBH8OORmBNgfvW5gG10GT/qQ9u0CzIvr2X5Pkt6ntqg==
-  dependencies:
-    inherits "~2.0.1"
-    readable-stream "^2.0.2"
-
-stream-each@^1.1.0:
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/stream-each/-/stream-each-1.2.3.tgz#ebe27a0c389b04fbcc233642952e10731afa9bae"
-  integrity sha512-vlMC2f8I2u/bZGqkdfLQW/13Zihpej/7PmSiMQsbYddxuTsJp8vRe2x2FvVExZg7FaOds43ROAuFJwPR4MTZLw==
-  dependencies:
-    end-of-stream "^1.1.0"
-    stream-shift "^1.0.0"
+stats-median@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/stats-median/-/stats-median-1.0.1.tgz#ca8497cb1014d23d145db4d6fc93c8e815eed3ef"
+  integrity sha512-IYsheLg6dasD3zT/w9+8Iq9tcIQqqu91ZIpJOnIEM25C3X/g4Tl8mhXwW2ZQpbrsJISr9+wizEYgsibN5/b32Q==
 
 stream-exhaust@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/stream-exhaust/-/stream-exhaust-1.0.2.tgz#acdac8da59ef2bc1e17a2c0ccf6c320d120e555d"
   integrity sha512-b/qaq/GlBK5xaq1yrK9/zFcyRSTNxmcZwFLGSTG0mXgZl/4Z6GgiyYOXOvY7N3eEvFRAG1bkDRz5EPGSvPYQlw==
 
-stream-http@^2.7.2:
-  version "2.8.3"
-  resolved "https://registry.yarnpkg.com/stream-http/-/stream-http-2.8.3.tgz#b2d242469288a5a27ec4fe8933acf623de6514fc"
-  integrity sha512-+TSkfINHDo4J+ZobQLWiMouQYB+UVYFttRA94FpEzzJ7ZdqcL4uUUQ7WkdkI4DSozGmgBUE/a47L+38PenXhUw==
-  dependencies:
-    builtin-status-codes "^3.0.0"
-    inherits "^2.0.1"
-    readable-stream "^2.3.6"
-    to-arraybuffer "^1.0.0"
-    xtend "^4.0.0"
-
 stream-shift@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/stream-shift/-/stream-shift-1.0.1.tgz#d7088281559ab2778424279b0877da3c392d5a3d"
@@ -9242,7 +8323,7 @@ string-width@^1.0.1, string-width@^1.0.2:
     is-fullwidth-code-point "^1.0.0"
     strip-ansi "^3.0.0"
 
-"string-width@^1.0.2 || 2", string-width@^2.1.0:
+"string-width@^1.0.2 || 2":
   version "2.1.1"
   resolved "https://registry.yarnpkg.com/string-width/-/string-width-2.1.1.tgz#ab93f27a8dc13d28cac815c462143a6d9012ae9e"
   integrity sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw==
@@ -9250,15 +8331,6 @@ string-width@^1.0.1, string-width@^1.0.2:
     is-fullwidth-code-point "^2.0.0"
     strip-ansi "^4.0.0"
 
-string-width@^3.0.0, string-width@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/string-width/-/string-width-3.1.0.tgz#22767be21b62af1081574306f69ac51b62203961"
-  integrity sha512-vafcv6KjVZKSgz06oM/H6GDBrAtz8vdhQakGjFIvNrHA6y3HCF1CInLy+QLq8dTJPQ1b+KDUqDFctkdRW44e1w==
-  dependencies:
-    emoji-regex "^7.0.1"
-    is-fullwidth-code-point "^2.0.0"
-    strip-ansi "^5.1.0"
-
 string-width@^4.1.0, string-width@^4.2.0:
   version "4.2.2"
   resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.2.tgz#dafd4f9559a7585cfba529c6a0a4f73488ebd4c5"
@@ -9293,7 +8365,7 @@ string.prototype.trimstart@^1.0.4:
     call-bind "^1.0.2"
     define-properties "^1.1.3"
 
-string_decoder@^1.0.0, string_decoder@^1.1.1:
+string_decoder@^1.1.1:
   version "1.3.0"
   resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
   integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
@@ -9321,13 +8393,6 @@ strip-ansi@^4.0.0:
   dependencies:
     ansi-regex "^3.0.0"
 
-strip-ansi@^5.0.0, strip-ansi@^5.1.0, strip-ansi@^5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-5.2.0.tgz#8c9a536feb6afc962bdfa5b104a5091c1ad9c0ae"
-  integrity sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==
-  dependencies:
-    ansi-regex "^4.1.0"
-
 strip-ansi@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.0.tgz#0b1571dd7669ccd4f3e06e14ef1eed26225ae532"
@@ -9335,7 +8400,7 @@ strip-ansi@^6.0.0:
   dependencies:
     ansi-regex "^5.0.0"
 
-strip-bom-string@1.X:
+strip-bom-string@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/strip-bom-string/-/strip-bom-string-1.0.0.tgz#e5211e9224369fbb81d633a2f00044dc8cedad92"
   integrity sha1-5SEekiQ2n7uB1jOi8ABE3IztrZI=
@@ -9357,28 +8422,11 @@ strip-bom@^4.0.0:
   resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-4.0.0.tgz#9c3505c1db45bcedca3d9cf7a16f5c5aa3901878"
   integrity sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==
 
-strip-eof@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/strip-eof/-/strip-eof-1.0.0.tgz#bb43ff5598a6eb05d89b59fcd129c983313606bf"
-  integrity sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=
-
 strip-final-newline@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
   integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==
 
-strip-indent@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-1.0.1.tgz#0c7962a6adefa7bbd4ac366460a638552ae1a0a2"
-  integrity sha1-DHlipq3vp7vUrDZkYKY4VSrhoKI=
-  dependencies:
-    get-stdin "^4.0.1"
-
-strip-indent@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-2.0.0.tgz#5ef8db295d01e6ed6cbf7aab96998d7822527b68"
-  integrity sha1-XvjbKV0B5u1sv3qrlpmNeCJSe2g=
-
 strip-indent@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001"
@@ -9386,7 +8434,19 @@ strip-indent@^3.0.0:
   dependencies:
     min-indent "^1.0.0"
 
-strong-log-transformer@^2.0.0:
+strip-indent@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-4.0.0.tgz#b41379433dd06f5eae805e21d631e07ee670d853"
+  integrity sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA==
+  dependencies:
+    min-indent "^1.0.1"
+
+strip-json-comments@^3.1.0, strip-json-comments@^3.1.1:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
+  integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
+
+strong-log-transformer@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/strong-log-transformer/-/strong-log-transformer-2.1.0.tgz#0f5ed78d325e0421ac6f90f7f10e691d6ae3ae10"
   integrity sha512-B3Hgul+z0L9a236FAUC9iZsL+nVHgoCJnqCbN588DjYxvGXaXaaFbfmQ/JhvKjZwsOukuR72XbHv71Qkug0HxA==
@@ -9402,6 +8462,13 @@ supports-color@^5.3.0:
   dependencies:
     has-flag "^3.0.0"
 
+supports-color@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-6.1.0.tgz#0764abc69c63d5ac842dd4867e8d025e880df8f3"
+  integrity sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==
+  dependencies:
+    has-flag "^3.0.0"
+
 supports-color@^7.0.0, supports-color@^7.1.0:
   version "7.2.0"
   resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
@@ -9409,6 +8476,13 @@ supports-color@^7.0.0, supports-color@^7.1.0:
   dependencies:
     has-flag "^4.0.0"
 
+supports-color@^8.0.0:
+  version "8.1.1"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-8.1.1.tgz#cd6fc17e28500cff56c1b86c0a7fd4a54a73005c"
+  integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==
+  dependencies:
+    has-flag "^4.0.0"
+
 supports-hyperlinks@^2.0.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/supports-hyperlinks/-/supports-hyperlinks-2.2.0.tgz#4f77b42488765891774b70c79babd87f9bd594bb"
@@ -9425,33 +8499,34 @@ sver-compat@^1.5.0:
     es6-iterator "^2.0.1"
     es6-symbol "^3.1.1"
 
-symbol-observable@1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/symbol-observable/-/symbol-observable-1.0.1.tgz#8340fc4702c3122df5d22288f88283f513d3fdd4"
-  integrity sha1-g0D8RwLDEi310iKI+IKD9RPT/dQ=
-
 symbol-tree@^3.2.4:
   version "3.2.4"
   resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.2.4.tgz#430637d248ba77e078883951fb9aa0eed7c63fa2"
   integrity sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==
 
-table-layout@^0.4.3:
-  version "0.4.5"
-  resolved "https://registry.yarnpkg.com/table-layout/-/table-layout-0.4.5.tgz#d906de6a25fa09c0c90d1d08ecd833ecedcb7378"
-  integrity sha512-zTvf0mcggrGeTe/2jJ6ECkJHAQPIYEwDoqsiqBjI24mvRmQbInK5jq33fyypaCBxX08hMkfmdOqj6haT33EqWw==
+table-layout@^1.0.1:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/table-layout/-/table-layout-1.0.2.tgz#c4038a1853b0136d63365a734b6931cf4fad4a04"
+  integrity sha512-qd/R7n5rQTRFi+Zf2sk5XVVd9UQl6ZkduPFC3S7WEGJAmetDTjY3qPN50eSKzwuzEyQKy5TN2TiZdkIjos2L6A==
   dependencies:
-    array-back "^2.0.0"
+    array-back "^4.0.1"
     deep-extend "~0.6.0"
-    lodash.padend "^4.6.1"
-    typical "^2.6.1"
-    wordwrapjs "^3.0.0"
+    typical "^5.2.0"
+    wordwrapjs "^4.0.0"
 
-tapable@^1.0.0, tapable@^1.1.0:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/tapable/-/tapable-1.1.3.tgz#a1fccc06b58db61fd7a45da2da44f5f3a3e67ba2"
-  integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA==
+table@^6.0.9:
+  version "6.7.1"
+  resolved "https://registry.yarnpkg.com/table/-/table-6.7.1.tgz#ee05592b7143831a8c94f3cee6aae4c1ccef33e2"
+  integrity sha512-ZGum47Yi6KOOFDE8m223td53ath2enHcYLgOCjGr5ngu8bdIARQk6mN/wRMv4yMRcHnCSnHbCEha4sobQx5yWg==
+  dependencies:
+    ajv "^8.0.1"
+    lodash.clonedeep "^4.5.0"
+    lodash.truncate "^4.4.2"
+    slice-ansi "^4.0.0"
+    string-width "^4.2.0"
+    strip-ansi "^6.0.0"
 
-tar@^4.4.10, tar@^4.4.12, tar@^4.4.8:
+tar@^4.4.12:
   version "4.4.13"
   resolved "https://registry.yarnpkg.com/tar/-/tar-4.4.13.tgz#43b364bc52888d555298637b10d60790254ab525"
   integrity sha512-w2VwSrBoHa5BsSyH+KxEqeQBAllHhccyMFVHtGtdMpF4W7IRWfZjFiQceJPChOeTsSDVUpER2T8FA93pr0L+QA==
@@ -9464,7 +8539,7 @@ tar@^4.4.10, tar@^4.4.12, tar@^4.4.8:
     safe-buffer "^5.1.2"
     yallist "^3.0.3"
 
-tar@^6.0.2:
+tar@^6.0.2, tar@^6.1.0:
   version "6.1.0"
   resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.0.tgz#d1724e9bcc04b977b18d5c573b333a2207229a83"
   integrity sha512-DUCttfhsnLCjwoDoFcI+B2iJgYa93vBnDUATYEeRx6sntCTdN01VnqsIuTlALXla/LWooNg0yEGeB+Y8WdFxGA==
@@ -9481,17 +8556,16 @@ temp-dir@^1.0.0:
   resolved "https://registry.yarnpkg.com/temp-dir/-/temp-dir-1.0.0.tgz#0a7c0ea26d3a39afa7e0ebea9c1fc0bc4daa011d"
   integrity sha1-CnwOom06Oa+n4OvqnB/AvE2qAR0=
 
-temp-write@^3.4.0:
-  version "3.4.0"
-  resolved "https://registry.yarnpkg.com/temp-write/-/temp-write-3.4.0.tgz#8cff630fb7e9da05f047c74ce4ce4d685457d492"
-  integrity sha1-jP9jD7fp2gXwR8dM5M5NaFRX1JI=
+temp-write@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/temp-write/-/temp-write-4.0.0.tgz#cd2e0825fc826ae72d201dc26eef3bf7e6fc9320"
+  integrity sha512-HIeWmj77uOOHb0QX7siN3OtwV3CTntquin6TNVg6SHOqCP3hYKmox90eeFOGaY1MqJ9WYDDjkyZrW6qS5AWpbw==
   dependencies:
-    graceful-fs "^4.1.2"
-    is-stream "^1.1.0"
-    make-dir "^1.0.0"
-    pify "^3.0.0"
+    graceful-fs "^4.1.15"
+    is-stream "^2.0.0"
+    make-dir "^3.0.0"
     temp-dir "^1.0.0"
-    uuid "^3.0.1"
+    uuid "^3.3.2"
 
 terminal-link@^2.0.0:
   version "2.1.1"
@@ -9501,54 +8575,6 @@ terminal-link@^2.0.0:
     ansi-escapes "^4.2.1"
     supports-hyperlinks "^2.0.0"
 
-terser-webpack-plugin@4.2.2:
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-4.2.2.tgz#d86200c700053bba637913fe4310ba1bdeb5568e"
-  integrity sha512-3qAQpykRTD5DReLu5/cwpsg7EZFzP3Q0Hp2XUWJUw2mpq2jfgOKTZr8IZKKnNieRVVo1UauROTdhbQJZveGKtQ==
-  dependencies:
-    cacache "^15.0.5"
-    find-cache-dir "^3.3.1"
-    jest-worker "^26.3.0"
-    p-limit "^3.0.2"
-    schema-utils "^2.7.1"
-    serialize-javascript "^5.0.1"
-    source-map "^0.6.1"
-    terser "^5.3.2"
-    webpack-sources "^1.4.3"
-
-terser-webpack-plugin@^1.1.0:
-  version "1.4.5"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-1.4.5.tgz#a217aefaea330e734ffacb6120ec1fa312d6040b"
-  integrity sha512-04Rfe496lN8EYruwi6oPQkG0vo8C+HT49X687FZnpPF0qMAIHONI6HEXYPKDOE8e5HjXTyKfqRd/agHtH0kOtw==
-  dependencies:
-    cacache "^12.0.2"
-    find-cache-dir "^2.1.0"
-    is-wsl "^1.1.0"
-    schema-utils "^1.0.0"
-    serialize-javascript "^4.0.0"
-    source-map "^0.6.1"
-    terser "^4.1.2"
-    webpack-sources "^1.4.0"
-    worker-farm "^1.7.0"
-
-terser@^4.1.2:
-  version "4.8.0"
-  resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.0.tgz#63056343d7c70bb29f3af665865a46fe03a0df17"
-  integrity sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==
-  dependencies:
-    commander "^2.20.0"
-    source-map "~0.6.1"
-    source-map-support "~0.5.12"
-
-terser@^5.3.2:
-  version "5.6.1"
-  resolved "https://registry.yarnpkg.com/terser/-/terser-5.6.1.tgz#a48eeac5300c0a09b36854bf90d9c26fb201973c"
-  integrity sha512-yv9YLFQQ+3ZqgWCUk+pvNJwgUTdlIxUk1WTN+RnaFJe2L7ipG2csPT0ra2XRm7Cs8cxN7QXmK1rFzEwYEQkzXw==
-  dependencies:
-    commander "^2.20.0"
-    source-map "~0.7.2"
-    source-map-support "~0.5.19"
-
 test-exclude@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/test-exclude/-/test-exclude-6.0.0.tgz#04a8698661d805ea6fa293b6cb9e63ac044ef15e"
@@ -9558,42 +8584,20 @@ test-exclude@^6.0.0:
     glob "^7.1.4"
     minimatch "^3.0.4"
 
-test-value@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/test-value/-/test-value-3.0.0.tgz#9168c062fab11a86b8d444dd968bb4b73851ce92"
-  integrity sha512-sVACdAWcZkSU9x7AOmJo5TqE+GyNJknHaHsMrR6ZnhjVlVN9Yx6FjHrsKZ3BjIpPCT68zYesPWkakrNupwfOTQ==
-  dependencies:
-    array-back "^2.0.0"
-    typical "^2.6.1"
-
-text-encoding-utf-8@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/text-encoding-utf-8/-/text-encoding-utf-8-1.0.2.tgz#585b62197b0ae437e3c7b5d0af27ac1021e10d13"
-  integrity sha512-8bw4MY9WjdsD2aMtO0OzOCY3pXGYNx2d2FfHRVUKkiCPDWjKuOlhLVASS+pD7VkLTVjW268LYJHwsnPFlBpbAg==
-
 text-extensions@^1.0.0:
   version "1.9.0"
   resolved "https://registry.yarnpkg.com/text-extensions/-/text-extensions-1.9.0.tgz#1853e45fee39c945ce6f6c36b2d659b5aabc2a26"
   integrity sha512-wiBrwC1EhBelW12Zy26JeOUkQ5mRu+5o8rpsJk5+2t+Y5vE7e842qtZDQ2g1NpX/29HdyFeJ4nSIhI47ENSxlQ==
 
-thenify-all@^1.0.0:
-  version "1.6.0"
-  resolved "https://registry.yarnpkg.com/thenify-all/-/thenify-all-1.6.0.tgz#1a1918d402d8fc3f98fbf234db0bcc8cc10e9726"
-  integrity sha1-GhkY1ALY/D+Y+/I02wvMjMEOlyY=
-  dependencies:
-    thenify ">= 3.1.0 < 4"
-
-"thenify@>= 3.1.0 < 4":
-  version "3.3.1"
-  resolved "https://registry.yarnpkg.com/thenify/-/thenify-3.3.1.tgz#8932e686a4066038a016dd9e2ca46add9838a95f"
-  integrity sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==
-  dependencies:
-    any-promise "^1.0.0"
+text-table@^0.2.0:
+  version "0.2.0"
+  resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4"
+  integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=
 
-throat@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/throat/-/throat-5.0.0.tgz#c5199235803aad18754a667d659b5e72ce16764b"
-  integrity sha512-fcwX4mndzpLQKBS1DVYhGAcYaYt7vsHNIvQV+WXMvnow5cgjPphq5CaayLaGsjRdSCKZFNGt7/GYAuXaNOiYCA==
+throat@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/throat/-/throat-6.0.1.tgz#d514fedad95740c12c2d7fc70ea863eb51ade375"
+  integrity sha512-8hmiGIJMDlwjg7dlJ4yKGLK8EsYqKgPWbG3b4wjJddKNwc7N7Dpn08Df4szr/sZdMVeOstrdYSsqzX6BYbcB+w==
 
 through2-filter@^3.0.0:
   version "3.0.0"
@@ -9603,7 +8607,7 @@ through2-filter@^3.0.0:
     through2 "~2.0.0"
     xtend "~4.0.0"
 
-through2@2.X, through2@^2.0.0, through2@^2.0.2, through2@^2.0.3, through2@~2.0.0:
+through2@^2.0.0, through2@^2.0.3, through2@~2.0.0:
   version "2.0.5"
   resolved "https://registry.yarnpkg.com/through2/-/through2-2.0.5.tgz#01c1e39eb31d07cb7d03a96a70823260b23132cd"
   integrity sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==
@@ -9611,7 +8615,7 @@ through2@2.X, through2@^2.0.0, through2@^2.0.2, through2@^2.0.3, through2@~2.0.0
     readable-stream "~2.3.6"
     xtend "~4.0.1"
 
-through2@^3.0.0:
+through2@^3.0.0, through2@^3.0.1:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/through2/-/through2-3.0.2.tgz#99f88931cfc761ec7678b41d5d7336b5b6a07bf4"
   integrity sha512-enaDQ4MUyP2W6ZyT6EsMzqBPZaM/avg8iuo+l2d3QCs0J+6RaqkHV/2/lOwDTueBHeJ/2LG9lrLW3d5rWPucuQ==
@@ -9636,13 +8640,6 @@ time-stamp@^1.0.0:
   resolved "https://registry.yarnpkg.com/time-stamp/-/time-stamp-1.1.0.tgz#764a5a11af50561921b133f3b44e618687e0f5c3"
   integrity sha1-dkpaEa9QVhkhsTPztE5hhofg9cM=
 
-timers-browserify@^2.0.4:
-  version "2.0.12"
-  resolved "https://registry.yarnpkg.com/timers-browserify/-/timers-browserify-2.0.12.tgz#44a45c11fbf407f34f97bccd1577c652361b00ee"
-  integrity sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==
-  dependencies:
-    setimmediate "^1.0.4"
-
 timers-ext@^0.1.7:
   version "0.1.7"
   resolved "https://registry.yarnpkg.com/timers-ext/-/timers-ext-0.1.7.tgz#6f57ad8578e07a3fb9f91d9387d65647555e25c6"
@@ -9671,11 +8668,6 @@ to-absolute-glob@^2.0.0:
     is-absolute "^1.0.0"
     is-negated-glob "^1.0.0"
 
-to-arraybuffer@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/to-arraybuffer/-/to-arraybuffer-1.0.1.tgz#7d229b1fcc637e466ca081180836a7aabff83f43"
-  integrity sha1-fSKbH8xjfkZsoIEYCDanqr/4P0M=
-
 to-fast-properties@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e"
@@ -9720,14 +8712,6 @@ to-through@^2.0.0:
   dependencies:
     through2 "^2.0.3"
 
-tough-cookie@^2.3.3, tough-cookie@~2.5.0:
-  version "2.5.0"
-  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
-  integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
-  dependencies:
-    psl "^1.1.28"
-    punycode "^2.1.1"
-
 tough-cookie@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-4.0.0.tgz#d822234eeca882f991f0f908824ad2622ddbece4"
@@ -9737,104 +8721,90 @@ tough-cookie@^4.0.0:
     punycode "^2.1.1"
     universalify "^0.1.2"
 
-tr46@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/tr46/-/tr46-1.0.1.tgz#a8b13fd6bfd2489519674ccde55ba3693b706d09"
-  integrity sha1-qLE/1r/SSJUZZ0zN5VujaTtwbQk=
+tough-cookie@~2.5.0:
+  version "2.5.0"
+  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
+  integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
   dependencies:
-    punycode "^2.1.0"
+    psl "^1.1.28"
+    punycode "^2.1.1"
 
-tr46@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.0.2.tgz#03273586def1595ae08fedb38d7733cee91d2479"
-  integrity sha512-3n1qG+/5kg+jrbTzwAykB5yRYtQCTqOGKq5U5PE3b0a1/mzo6snDhjGS0zJVJunO0NrT3Dg1MLy5TjWP/UJppg==
+tr46@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.1.0.tgz#fa87aa81ca5d5941da8cbf1f9b749dc969a4e240"
+  integrity sha512-15Ih7phfcdP5YxqiB+iDtLoaTz4Nd35+IiAv0kQ5FNKHzXgdWqPoTIqEDDJmXceQt4JZk6lVPT8lnDlPpGDppw==
   dependencies:
     punycode "^2.1.1"
 
-trim-newlines@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-1.0.0.tgz#5887966bb582a4503a41eb524f7d35011815a613"
-  integrity sha1-WIeWa7WCpFA6QetST301ARgVphM=
-
-trim-newlines@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-2.0.0.tgz#b403d0b91be50c331dfc4b82eeceb22c3de16d20"
-  integrity sha1-tAPQuRvlDDMd/EuC7s6yLD3hbSA=
-
 trim-newlines@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.0.tgz#79726304a6a898aa8373427298d54c2ee8b1cb30"
-  integrity sha512-C4+gOpvmxaSMKuEf9Qc134F1ZuOHVXKRbtEflf4NTtuuJDEIJ9p5PXsalL8SkeRw+qit1Mo+yuvMPAKwWg/1hA==
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144"
+  integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw==
+
+trim-newlines@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.0.2.tgz#d6aaaf6a0df1b4b536d183879a6b939489808c7c"
+  integrity sha512-GJtWyq9InR/2HRiLZgpIKv+ufIKrVrvjQWEj7PxAXNc5dwbNJkqhAUoAGgzRmULAnoOM5EIpveYd3J2VeSAIew==
 
 trim-off-newlines@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/trim-off-newlines/-/trim-off-newlines-1.0.1.tgz#9f9ba9d9efa8764c387698bcbfeb2c848f11adb3"
   integrity sha1-n5up2e+odkw4dpi8v+sshI8RrbM=
 
-ts-jest@26.3.0:
-  version "26.3.0"
-  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-26.3.0.tgz#6b2845045347dce394f069bb59358253bc1338a9"
-  integrity sha512-Jq2uKfx6bPd9+JDpZNMBJMdMQUC3sJ08acISj8NXlVgR2d5OqslEHOR2KHMgwymu8h50+lKIm0m0xj/ioYdW2Q==
+ts-jest@27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-27.0.3.tgz#808492f022296cde19390bb6ad627c8126bf93f8"
+  integrity sha512-U5rdMjnYam9Ucw+h0QvtNDbc5+88nxt7tbIvqaZUhFrfG4+SkWhMXjejCLVGcpILTPuV+H3W/GZDZrnZFpPeXw==
   dependencies:
-    "@types/jest" "26.x"
     bs-logger "0.x"
     buffer-from "1.x"
     fast-json-stable-stringify "2.x"
-    jest-util "26.x"
+    jest-util "^27.0.0"
     json5 "2.x"
-    lodash.memoize "4.x"
+    lodash "4.x"
     make-error "1.x"
     mkdirp "1.x"
     semver "7.x"
-    yargs-parser "18.x"
+    yargs-parser "20.x"
 
-ts-node@9.0.0:
-  version "9.0.0"
-  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-9.0.0.tgz#e7699d2a110cc8c0d3b831715e417688683460b3"
-  integrity sha512-/TqB4SnererCDR/vb4S/QvSZvzQMJN8daAslg7MeaiHvD8rDZsSfXmNeNumyZZzMned72Xoq/isQljYSt8Ynfg==
+ts-node@10.1.0:
+  version "10.1.0"
+  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-10.1.0.tgz#e656d8ad3b61106938a867f69c39a8ba6efc966e"
+  integrity sha512-6szn3+J9WyG2hE+5W8e0ruZrzyk1uFLYye6IGMBadnOzDh8aP7t8CbFpsfCiEx2+wMixAhjFt7lOZC4+l+WbEA==
   dependencies:
+    "@tsconfig/node10" "^1.0.7"
+    "@tsconfig/node12" "^1.0.7"
+    "@tsconfig/node14" "^1.0.0"
+    "@tsconfig/node16" "^1.0.1"
     arg "^4.1.0"
+    create-require "^1.1.0"
     diff "^4.0.1"
     make-error "^1.1.1"
     source-map-support "^0.5.17"
     yn "3.1.1"
 
-tslib@^1.12.0, tslib@^1.13.0, tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
+tslib@^1.8.1, tslib@^1.9.0:
   version "1.14.1"
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
   integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
 
-tslint@6.1.3:
-  version "6.1.3"
-  resolved "https://registry.yarnpkg.com/tslint/-/tslint-6.1.3.tgz#5c23b2eccc32487d5523bd3a470e9aa31789d904"
-  integrity sha512-IbR4nkT96EQOvKE2PW/djGz8iGNeJ4rF2mBfiYaR/nvUWYKJhLwimoJKgjIFEIDibBtOevj7BqCRL4oHeWWUCg==
-  dependencies:
-    "@babel/code-frame" "^7.0.0"
-    builtin-modules "^1.1.1"
-    chalk "^2.3.0"
-    commander "^2.12.1"
-    diff "^4.0.1"
-    glob "^7.1.1"
-    js-yaml "^3.13.1"
-    minimatch "^3.0.4"
-    mkdirp "^0.5.3"
-    resolve "^1.3.2"
-    semver "^5.3.0"
-    tslib "^1.13.0"
-    tsutils "^2.29.0"
+tslib@^2.3.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.3.0.tgz#803b8cdab3e12ba581a4ca41c8839bbb0dacb09e"
+  integrity sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==
+
+tslib@~2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a"
+  integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A==
 
-tsutils@^2.29.0:
-  version "2.29.0"
-  resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-2.29.0.tgz#32b488501467acbedd4b85498673a0812aca0b99"
-  integrity sha512-g5JVHCIJwzfISaXpXE1qvNalca5Jwob6FjI4AoPlqMusJ6ftFE7IkkFoMhVLRgK+4Kx3gkzb8UZK5t5yTTvEmA==
+tsutils@^3.21.0:
+  version "3.21.0"
+  resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
+  integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==
   dependencies:
     tslib "^1.8.1"
 
-tty-browserify@0.0.0:
-  version "0.0.0"
-  resolved "https://registry.yarnpkg.com/tty-browserify/-/tty-browserify-0.0.0.tgz#a157ba402da24e9bf957f9aa69d524eed42901a6"
-  integrity sha1-oVe6QC2iTpv5V/mqadUk7tQpAaY=
-
 tunnel-agent@^0.6.0:
   version "0.6.0"
   resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
@@ -9847,6 +8817,13 @@ tweetnacl@^0.14.3, tweetnacl@~0.14.0:
   resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64"
   integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=
 
+type-check@^0.4.0, type-check@~0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1"
+  integrity sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==
+  dependencies:
+    prelude-ls "^1.2.1"
+
 type-check@~0.3.2:
   version "0.3.2"
   resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.3.2.tgz#5884cab512cf1d355e3fb784f30804b2b520db72"
@@ -9869,15 +8846,20 @@ type-fest@^0.18.0:
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.18.1.tgz#db4bc151a4a2cf4eebf9add5db75508db6cc841f"
   integrity sha512-OIAYXk8+ISY+qTOwkHtKqzAuxchoMiD9Udx+FSGQDuiRR+PJKJHc2NJAXlbhkGwTt/4/nKZxELY1w3ReWOL8mw==
 
+type-fest@^0.20.2:
+  version "0.20.2"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.20.2.tgz#1bf207f4b28f91583666cb5fbd327887301cd5f4"
+  integrity sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==
+
 type-fest@^0.21.3:
   version "0.21.3"
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.21.3.tgz#d260a24b0198436e133fa26a524a6d65fa3b2e37"
   integrity sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==
 
-type-fest@^0.3.0:
-  version "0.3.1"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.3.1.tgz#63d00d204e059474fe5e1b7c011112bbd1dc29e1"
-  integrity sha512-cUGJnCdr4STbePCgqNFbpVNCepa+kAVohJs1sLhxzdH+gnEoOd8VhbYa7pD3zZYGiURWM2xzEII3fQcRizDkYQ==
+type-fest@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.4.1.tgz#8bdf77743385d8a4f13ba95f610f5ccd68c728f8"
+  integrity sha512-IwzA/LSfD2vC1/YDYMv/zHP4rDF1usCwllsDpbolT3D4fUepIO7f9K70jjmUewU/LmGUKJcwcVtDCpnKk4BPMw==
 
 type-fest@^0.6.0:
   version "0.6.0"
@@ -9889,6 +8871,11 @@ type-fest@^0.8.1:
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d"
   integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==
 
+type-fest@^1.0.1, type-fest@^1.2.1, type-fest@^1.2.2:
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-1.2.2.tgz#1930bc36b2064f7ab4aa307a6d1b65965199c698"
+  integrity sha512-pfkPYCcuV0TJoo/jlsUeWNV8rk7uMU6ocnYNvca1Vu+pyKi8Rl8Zo2scPt9O72gCsXIm+dMxOOWuA3VFDSdzWA==
+
 type@^1.0.1:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/type/-/type-1.2.0.tgz#848dd7698dafa3e54a6c479e759c4bc3f18847a0"
@@ -9911,42 +8898,44 @@ typedarray@^0.0.6:
   resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"
   integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=
 
-typedoc-default-themes@^0.12.5:
+typedoc-default-themes@^0.12.10:
   version "0.12.10"
   resolved "https://registry.yarnpkg.com/typedoc-default-themes/-/typedoc-default-themes-0.12.10.tgz#614c4222fe642657f37693ea62cad4dafeddf843"
   integrity sha512-fIS001cAYHkyQPidWXmHuhs8usjP5XVJjWB8oZGqkTowZaz3v7g3KDZeeqE82FBrmkAnIBOY3jgy7lnPnqATbA==
 
-typedoc@0.20.19:
-  version "0.20.19"
-  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.20.19.tgz#4871f659bc03a545c572066329273f1b30fb1cba"
-  integrity sha512-9FjQ1xQGtxpXm8R5QKvU8wFBaaYe8RW3NzrhGWB8RigbOALwG+4ywJ/EyArPGWXvmXYB7I8h2YHzeyFvZ2s0ow==
+typedoc@0.21.4:
+  version "0.21.4"
+  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.21.4.tgz#fced3cffdc30180db60a5dbfec9dbbb273cb5b31"
+  integrity sha512-slZQhvD9U0d9KacktYAyuNMMOXJRFNHy+Gd8xY2Qrqq3eTTTv3frv3N4au/cFnab9t3T5WA0Orb6QUjMc+1bDA==
   dependencies:
-    colors "^1.4.0"
-    fs-extra "^9.0.1"
-    handlebars "^4.7.6"
-    lodash "^4.17.20"
+    glob "^7.1.7"
+    handlebars "^4.7.7"
     lunr "^2.3.9"
-    marked "^1.2.5"
+    marked "^2.1.1"
     minimatch "^3.0.0"
     progress "^2.0.3"
-    shelljs "^0.8.4"
-    shiki "^0.2.7"
-    typedoc-default-themes "^0.12.5"
+    shiki "^0.9.3"
+    typedoc-default-themes "^0.12.10"
 
 typescript@4.0.2:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.0.2.tgz#7ea7c88777c723c681e33bf7988be5d008d05ac2"
   integrity sha512-e4ERvRV2wb+rRZ/IQeb3jm2VxBsirQLpQhdxplZ2MEzGvDkkMmPglecnNDfSUBivMjP93vRbngYYDQqQ/78bcQ==
 
-typical@^2.6.1:
-  version "2.6.1"
-  resolved "https://registry.yarnpkg.com/typical/-/typical-2.6.1.tgz#5c080e5d661cbbe38259d2e70a3c7253e873881d"
-  integrity sha1-XAgOXWYcu+OCWdLnCjxyU+hziB0=
+typical@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/typical/-/typical-4.0.0.tgz#cbeaff3b9d7ae1e2bbfaf5a4e6f11eccfde94fc4"
+  integrity sha512-VAH4IvQ7BDFYglMd7BPRDfLgxZZX4O4TFcRDA6EN5X7erNJJq+McIEp8np9aVtxrCJ6qx4GTYVfOWNjcqwZgRw==
+
+typical@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/typical/-/typical-5.2.0.tgz#4daaac4f2b5315460804f0acf6cb69c52bb93066"
+  integrity sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==
 
 uglify-js@^3.1.4:
-  version "3.13.3"
-  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.3.tgz#ce72a1ad154348ea2af61f50933c76cc8802276e"
-  integrity sha512-otIc7O9LyxpUcQoXzj2hL4LPWKklO6LJWoJUzNa8A17Xgi4fOeDC8FBDOLHnC/Slo1CQgsZMcM6as0M76BZaig==
+  version "3.13.10"
+  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.10.tgz#a6bd0d28d38f592c3adb6b180ea6e07e1e540a8d"
+  integrity sha512-57H3ACYFXeo1IaZ1w02sfA71wI60MGco/IQFjOqK+WtKoprh7Go2/yvd2HPtoJILO2Or84ncLccI4xoHMTSbGg==
 
 uid-number@0.0.6:
   version "0.0.6"
@@ -9958,7 +8947,7 @@ umask@^1.1.0:
   resolved "https://registry.yarnpkg.com/umask/-/umask-1.1.0.tgz#f29cebf01df517912bb58ff9c4e50fde8e33320d"
   integrity sha1-8pzr8B31F5ErtY/5xOUP3o4zMg0=
 
-unbox-primitive@^1.0.0:
+unbox-primitive@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/unbox-primitive/-/unbox-primitive-1.0.1.tgz#085e215625ec3162574dc8859abee78a59b14471"
   integrity sha512-tZU/3NqK3dA5gpE1KtyiJUrEB0lxnGkMFHptJ7q6ewdZ8s12QrODwNbhIJStmJkd1QDXa1NRA8aF2A1zk/Ypyw==
@@ -10026,19 +9015,12 @@ unique-stream@^2.0.2:
     json-stable-stringify-without-jsonify "^1.0.1"
     through2-filter "^3.0.0"
 
-universal-user-agent@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/universal-user-agent/-/universal-user-agent-4.0.1.tgz#fd8d6cb773a679a709e967ef8288a31fcc03e557"
-  integrity sha512-LnST3ebHwVL2aNe4mejI9IQh2HfZ1RLo8Io2HugSif8ekzD1TlWpHpColOB/eh8JHMLkGH3Akqf040I+4ylNxg==
-  dependencies:
-    os-name "^3.1.0"
-
 universal-user-agent@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/universal-user-agent/-/universal-user-agent-6.0.0.tgz#3381f8503b251c0d9cd21bc1de939ec9df5480ee"
   integrity sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==
 
-universalify@^0.1.0, universalify@^0.1.2:
+universalify@^0.1.2:
   version "0.1.2"
   resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66"
   integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==
@@ -10056,11 +9038,16 @@ unset-value@^1.0.0:
     has-value "^0.3.1"
     isobject "^3.0.0"
 
-upath@^1.1.1, upath@^1.2.0:
+upath@^1.1.1:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/upath/-/upath-1.2.0.tgz#8f66dbcd55a883acdae4408af8b035a5044c1894"
   integrity sha512-aZwGpamFO61g3OlfT7OQCHqhGnW43ieH9WZeP7QxN/G/jS4jfqUkZxoryvJgVPEcrl5NL/ggHsSmLMHuH64Lhg==
 
+upath@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/upath/-/upath-2.0.1.tgz#50c73dea68d6f6b990f51d279ce6081665d61a8b"
+  integrity sha512-1uEe95xksV1O0CYKXo8vQvN1JEbtJp7lb7C5U9HMsIp6IVwntkH/oNUzyVNQSd4S1sYk2FpSSW44FqMc8qee5w==
+
 uri-js@^4.2.2:
   version "4.4.1"
   resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e"
@@ -10073,14 +9060,6 @@ urix@^0.1.0:
   resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72"
   integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=
 
-url@^0.11.0:
-  version "0.11.0"
-  resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1"
-  integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
-  dependencies:
-    punycode "1.3.2"
-    querystring "0.2.0"
-
 use@^3.1.0:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
@@ -10098,34 +9077,20 @@ util-promisify@^2.1.0:
   dependencies:
     object.getownpropertydescriptors "^2.0.3"
 
-util@0.10.3:
-  version "0.10.3"
-  resolved "https://registry.yarnpkg.com/util/-/util-0.10.3.tgz#7afb1afe50805246489e3db7fe0ed379336ac0f9"
-  integrity sha1-evsa/lCAUkZInj23/g7TeTNqwPk=
-  dependencies:
-    inherits "2.0.1"
-
-util@^0.11.0:
-  version "0.11.1"
-  resolved "https://registry.yarnpkg.com/util/-/util-0.11.1.tgz#3236733720ec64bb27f6e26f421aaa2e1b588d61"
-  integrity sha512-HShAsny+zS2TZfaXxD9tYj4HQGlBezXZMZuM/S5PKLLoZkShZiGk9o5CzukI1LVHZvjdvZ2Sj1aW/Ndn2NB/HQ==
-  dependencies:
-    inherits "2.0.3"
-
-uuid@^3.0.1, uuid@^3.3.2:
+uuid@^3.3.2:
   version "3.4.0"
   resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee"
   integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==
 
-uuid@^8.3.0:
-  version "8.3.2"
-  resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2"
-  integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==
+v8-compile-cache@^2.0.3:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz#2de19618c66dc247dcfb6f99338035d8245a2cee"
+  integrity sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==
 
-v8-to-istanbul@^7.0.0:
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-7.1.1.tgz#04bfd1026ba4577de5472df4f5e89af49de5edda"
-  integrity sha512-p0BB09E5FRjx0ELN6RgusIPsSPhtgexSRcKETybEs6IGOTXJSZqfwxp7r//55nnu0f1AxltY5VvdVqy2vZf9AA==
+v8-to-istanbul@^8.0.0:
+  version "8.0.0"
+  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-8.0.0.tgz#4229f2a99e367f3f018fa1d5c2b8ec684667c69c"
+  integrity sha512-LkmXi8UUNxnCC+JlH7/fsfsKr5AU110l+SYGJimWNkWhxbN5EyeOtm1MJ0hhvqMMOhGwBj1Fp70Yv9i+hX0QAg==
   dependencies:
     "@types/istanbul-lib-coverage" "^2.0.1"
     convert-source-map "^1.6.0"
@@ -10138,7 +9103,7 @@ v8flags@^3.2.0:
   dependencies:
     homedir-polyfill "^1.0.1"
 
-validate-npm-package-license@^3.0.1, validate-npm-package-license@^3.0.3:
+validate-npm-package-license@^3.0.1, validate-npm-package-license@^3.0.4:
   version "3.0.4"
   resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a"
   integrity sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==
@@ -10222,15 +9187,10 @@ vinyl@2.x, vinyl@^2.0.0, vinyl@^2.1.0:
     remove-trailing-separator "^1.0.1"
     replace-ext "^1.0.0"
 
-vm-browserify@^1.0.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/vm-browserify/-/vm-browserify-1.1.2.tgz#78641c488b8e6ca91a75f511e7a3b32a86e5dda0"
-  integrity sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==
-
-vscode-textmate@^5.2.0:
-  version "5.4.0"
-  resolved "https://registry.yarnpkg.com/vscode-textmate/-/vscode-textmate-5.4.0.tgz#4b25ffc1f14ac3a90faf9a388c67a01d24257cd7"
-  integrity sha512-c0Q4zYZkcLizeYJ3hNyaVUM2AA8KDhNCA3JvXY8CeZSJuBdAy3bAvSbv46RClC4P3dSO9BdwhnKEx2zOo6vP/w==
+vscode-textmate@5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/vscode-textmate/-/vscode-textmate-5.2.0.tgz#01f01760a391e8222fe4f33fbccbd1ad71aed74e"
+  integrity sha512-Uw5ooOQxRASHgu6C7GVvUxisKXfSgW4oFlO+aa+PAkgmH89O3CXxEEzNRNtHSqtXFTl0nAC1uYj0GMSH27uwtQ==
 
 w3c-hr-time@^1.0.2:
   version "1.0.2"
@@ -10246,31 +9206,13 @@ w3c-xmlserializer@^2.0.0:
   dependencies:
     xml-name-validator "^3.0.0"
 
-walker@^1.0.7, walker@~1.0.5:
+walker@^1.0.7:
   version "1.0.7"
   resolved "https://registry.yarnpkg.com/walker/-/walker-1.0.7.tgz#2f7f9b8fd10d677262b18a884e28d19618e028fb"
   integrity sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=
   dependencies:
     makeerror "1.0.x"
 
-watchpack-chokidar2@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/watchpack-chokidar2/-/watchpack-chokidar2-2.0.1.tgz#38500072ee6ece66f3769936950ea1771be1c957"
-  integrity sha512-nCFfBIPKr5Sh61s4LPpy1Wtfi0HE8isJ3d2Yb5/Ppw2P2B/3eVSEBjKfN0fmHJSK14+31KwMKmcrzs2GM4P0Ww==
-  dependencies:
-    chokidar "^2.1.8"
-
-watchpack@^1.5.0:
-  version "1.7.5"
-  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-1.7.5.tgz#1267e6c55e0b9b5be44c2023aed5437a2c26c453"
-  integrity sha512-9P3MWk6SrKjHsGkLT2KHXdQ/9SNkyoJbabxnKOoJepsvJjJG8uYTR3yTPxPQvNDI3w4Nz1xnE0TLHK4RIVe/MQ==
-  dependencies:
-    graceful-fs "^4.1.2"
-    neo-async "^2.5.0"
-  optionalDependencies:
-    chokidar "^3.4.1"
-    watchpack-chokidar2 "^2.0.1"
-
 wcwidth@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/wcwidth/-/wcwidth-1.0.1.tgz#f0b0dcf915bc5ff1528afadb2c0e17b532da2fe8"
@@ -10278,20 +9220,10 @@ wcwidth@^1.0.0:
   dependencies:
     defaults "^1.0.3"
 
-web-stream-tools@0.0.1:
-  version "0.0.1"
-  resolved "https://registry.yarnpkg.com/web-stream-tools/-/web-stream-tools-0.0.1.tgz#6d2c06a6f5f46eab5e73d82285bae3c9b5ee71a0"
-  integrity sha512-MZUYhvTAMMy1u07OJL2pyp/tdrIu15fRJlGgnfvCQVXBS4cBNbIV1+6veYfVhTfnq0ZLispgx4nv17QxpuX+6w==
-
-web-streams-polyfill@2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-2.0.3.tgz#0c396f069a5eedc96c711393b12f2c67cf283a00"
-  integrity sha512-pOqiHmL3RBAGS+SgOR42RbPU6nc8/n15N2rsOXFYHLnTfs2Z8QHs8AizOeOaYEnhwPN4+hu3M2D9XvAqzvt6MA==
-
-webidl-conversions@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-4.0.2.tgz#a855980b1f0b6b359ba1d5d9fb39ae941faa63ad"
-  integrity sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==
+web-streams-polyfill@3.0.3, web-streams-polyfill@~3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.0.3.tgz#f49e487eedeca47a207c1aee41ee5578f884b42f"
+  integrity sha512-d2H/t0eqRNM4w2WvmTdoeIvzAUSpK7JmATB8Nr2lb7nQ9BTIJVjbQ/TRFVEh2gUH1HwclPdoPtfMoFfetXaZnA==
 
 webidl-conversions@^5.0.0:
   version "5.0.0"
@@ -10303,44 +9235,6 @@ webidl-conversions@^6.1.0:
   resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-6.1.0.tgz#9111b4d7ea80acd40f5270d666621afa78b69514"
   integrity sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==
 
-webpack-sources@^1.3.0, webpack-sources@^1.4.0, webpack-sources@^1.4.3:
-  version "1.4.3"
-  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-1.4.3.tgz#eedd8ec0b928fbf1cbfe994e22d2d890f330a933"
-  integrity sha512-lgTS3Xhv1lCOKo7SA5TjKXMjpSM4sBjNV5+q2bqesbSPs5FjGmU6jjtBSkX9b4qW87vDIsCIlUPOEhbZrMdjeQ==
-  dependencies:
-    source-list-map "^2.0.0"
-    source-map "~0.6.1"
-
-webpack@4.29.0:
-  version "4.29.0"
-  resolved "https://registry.yarnpkg.com/webpack/-/webpack-4.29.0.tgz#f2cfef83f7ae404ba889ff5d43efd285ca26e750"
-  integrity sha512-pxdGG0keDBtamE1mNvT5zyBdx+7wkh6mh7uzMOo/uRQ/fhsdj5FXkh/j5mapzs060forql1oXqXN9HJGju+y7w==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-module-context" "1.7.11"
-    "@webassemblyjs/wasm-edit" "1.7.11"
-    "@webassemblyjs/wasm-parser" "1.7.11"
-    acorn "^6.0.5"
-    acorn-dynamic-import "^4.0.0"
-    ajv "^6.1.0"
-    ajv-keywords "^3.1.0"
-    chrome-trace-event "^1.0.0"
-    enhanced-resolve "^4.1.0"
-    eslint-scope "^4.0.0"
-    json-parse-better-errors "^1.0.2"
-    loader-runner "^2.3.0"
-    loader-utils "^1.1.0"
-    memory-fs "~0.4.1"
-    micromatch "^3.1.8"
-    mkdirp "~0.5.0"
-    neo-async "^2.5.0"
-    node-libs-browser "^2.0.0"
-    schema-utils "^0.4.4"
-    tapable "^1.1.0"
-    terser-webpack-plugin "^1.1.0"
-    watchpack "^1.5.0"
-    webpack-sources "^1.3.0"
-
 whatwg-encoding@^1.0.5:
   version "1.0.5"
   resolved "https://registry.yarnpkg.com/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz#5abacf777c32166a51d085d6b4f3e7d27113ddb0"
@@ -10353,22 +9247,13 @@ whatwg-mimetype@^2.3.0:
   resolved "https://registry.yarnpkg.com/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz#3d4b1e0312d2079879f826aff18dbeeca5960fbf"
   integrity sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==
 
-whatwg-url@^7.0.0:
-  version "7.1.0"
-  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-7.1.0.tgz#c2c492f1eca612988efd3d2266be1b9fc6170d06"
-  integrity sha512-WUu7Rg1DroM7oQvGWfOiAK21n74Gg+T4elXEQYkOhtyLeWiJFoOGLXPKI/9gzIie9CtwVLm8wtw6YJdKyxSjeg==
-  dependencies:
-    lodash.sortby "^4.7.0"
-    tr46 "^1.0.1"
-    webidl-conversions "^4.0.2"
-
-whatwg-url@^8.0.0, whatwg-url@^8.5.0:
-  version "8.5.0"
-  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.5.0.tgz#7752b8464fc0903fec89aa9846fc9efe07351fd3"
-  integrity sha512-fy+R77xWv0AiqfLl4nuGUlQ3/6b5uNfQ4WAbGQVMYshCTCCPK9psC1nWh3XHuxGVCtlcDDQPQW1csmmIQo+fwg==
+whatwg-url@^8.0.0, whatwg-url@^8.4.0, whatwg-url@^8.5.0:
+  version "8.7.0"
+  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.7.0.tgz#656a78e510ff8f3937bc0bcbe9f5c0ac35941b77"
+  integrity sha512-gAojqb/m9Q8a5IV96E3fHJM70AzCkgt4uXYX2O7EmuyOnLrViCQlsEBmF9UQIu3/aeAIp2U17rtbpZWNntQqdg==
   dependencies:
     lodash "^4.7.0"
-    tr46 "^2.0.2"
+    tr46 "^2.1.0"
     webidl-conversions "^6.1.0"
 
 which-boxed-primitive@^1.0.2:
@@ -10387,11 +9272,6 @@ which-module@^1.0.0:
   resolved "https://registry.yarnpkg.com/which-module/-/which-module-1.0.0.tgz#bba63ca861948994ff307736089e3b96026c2a4f"
   integrity sha1-u6Y8qGGUiZT/MHc2CJ47lgJsKk8=
 
-which-module@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/which-module/-/which-module-2.0.0.tgz#d9ef07dce77b9902b8a3a8fa4b31c3e3f7e6e87a"
-  integrity sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=
-
 which@^1.2.14, which@^1.2.9, which@^1.3.1:
   version "1.3.1"
   resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a"
@@ -10413,14 +9293,7 @@ wide-align@^1.1.0:
   dependencies:
     string-width "^1.0.2 || 2"
 
-windows-release@^3.1.0:
-  version "3.3.3"
-  resolved "https://registry.yarnpkg.com/windows-release/-/windows-release-3.3.3.tgz#1c10027c7225743eec6b89df160d64c2e0293999"
-  integrity sha512-OSOGH1QYiW5yVor9TtmXKQvt2vjQqbYS+DqmsZw+r7xDwLXEeT3JGW0ZppFmHx4diyXmxt238KFR3N9jzevBRg==
-  dependencies:
-    execa "^1.0.0"
-
-word-wrap@~1.2.3:
+word-wrap@^1.2.3, word-wrap@~1.2.3:
   version "1.2.3"
   resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.3.tgz#610636f6b1f703891bd34771ccb17fb93b47079c"
   integrity sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==
@@ -10430,20 +9303,13 @@ wordwrap@^1.0.0:
   resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb"
   integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus=
 
-wordwrapjs@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/wordwrapjs/-/wordwrapjs-3.0.0.tgz#c94c372894cadc6feb1a66bff64e1d9af92c5d1e"
-  integrity sha512-mO8XtqyPvykVCsrwj5MlOVWvSnCdT+C+QVbm6blradR7JExAhbkZ7hZ9A+9NUtwzSqrlUo9a67ws0EiILrvRpw==
-  dependencies:
-    reduce-flatten "^1.0.1"
-    typical "^2.6.1"
-
-worker-farm@^1.7.0:
-  version "1.7.0"
-  resolved "https://registry.yarnpkg.com/worker-farm/-/worker-farm-1.7.0.tgz#26a94c5391bbca926152002f69b84a4bf772e5a8"
-  integrity sha512-rvw3QTZc8lAxyVrqcSGVm5yP/IJ2UcB3U0graE3LCFoZ0Yn2x4EoVSqJKdB/T5M+FLcRPjz4TDacRf3OCfNUzw==
+wordwrapjs@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/wordwrapjs/-/wordwrapjs-4.0.1.tgz#d9790bccfb110a0fc7836b5ebce0937b37a8b98f"
+  integrity sha512-kKlNACbvHrkpIw6oPeYDSmdCTu2hdMHoyXLTcUKala++lx5Y+wjJ/e474Jqv5abnVmwxw08DiTuHmw69lJGksA==
   dependencies:
-    errno "~0.1.7"
+    reduce-flatten "^2.0.0"
+    typical "^5.2.0"
 
 wrap-ansi@^2.0.0:
   version "2.1.0"
@@ -10453,15 +9319,6 @@ wrap-ansi@^2.0.0:
     string-width "^1.0.1"
     strip-ansi "^3.0.1"
 
-wrap-ansi@^5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-5.1.0.tgz#1fd1f67235d5b6d0fee781056001bfb694c03b09"
-  integrity sha512-QC1/iN/2/RPVJ5jYK8BGttj5z83LmSKmvbvrXPNCLZSEb32KKVDJDl/MOt2N01qU2H/FkzEa9PKto1BqDjtd7Q==
-  dependencies:
-    ansi-styles "^3.2.0"
-    string-width "^3.0.0"
-    strip-ansi "^5.0.0"
-
 wrap-ansi@^6.2.0:
   version "6.2.0"
   resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53"
@@ -10471,12 +9328,21 @@ wrap-ansi@^6.2.0:
     string-width "^4.1.0"
     strip-ansi "^6.0.0"
 
+wrap-ansi@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
+  integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
+  dependencies:
+    ansi-styles "^4.0.0"
+    string-width "^4.1.0"
+    strip-ansi "^6.0.0"
+
 wrappy@1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
   integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
 
-write-file-atomic@^2.0.0, write-file-atomic@^2.3.0, write-file-atomic@^2.4.2:
+write-file-atomic@^2.4.2:
   version "2.4.3"
   resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-2.4.3.tgz#1fd2e9ae1df3e75b8d8c367443c692d4ca81f481"
   integrity sha512-GaETH5wwsX+GcnzhPgKcKjJ6M2Cq3/iZp1WyY/X1CSqrW+jVNM9Y7D8EC2sM4ZG/V8wZlSniJnCKWPmBYAucRQ==
@@ -10485,7 +9351,7 @@ write-file-atomic@^2.0.0, write-file-atomic@^2.3.0, write-file-atomic@^2.4.2:
     imurmurhash "^0.1.4"
     signal-exit "^3.0.2"
 
-write-file-atomic@^3.0.0:
+write-file-atomic@^3.0.0, write-file-atomic@^3.0.3:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-3.0.3.tgz#56bd5c5a5c70481cd19c571bd39ab965a5de56e8"
   integrity sha512-AvHcyZ5JnSfq3ioSyjrBkH9yW4m7Ayk8/9My/DD9onKeu/94fwrMocemO2QAJFAlnnDN+ZDS+ZjAR5ua1/PV/Q==
@@ -10495,18 +9361,6 @@ write-file-atomic@^3.0.0:
     signal-exit "^3.0.2"
     typedarray-to-buffer "^3.1.5"
 
-write-json-file@^2.2.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/write-json-file/-/write-json-file-2.3.0.tgz#2b64c8a33004d54b8698c76d585a77ceb61da32f"
-  integrity sha1-K2TIozAE1UuGmMdtWFp3zrYdoy8=
-  dependencies:
-    detect-indent "^5.0.0"
-    graceful-fs "^4.1.2"
-    make-dir "^1.0.0"
-    pify "^3.0.0"
-    sort-keys "^2.0.0"
-    write-file-atomic "^2.0.0"
-
 write-json-file@^3.2.0:
   version "3.2.0"
   resolved "https://registry.yarnpkg.com/write-json-file/-/write-json-file-3.2.0.tgz#65bbdc9ecd8a1458e15952770ccbadfcff5fe62a"
@@ -10519,43 +9373,56 @@ write-json-file@^3.2.0:
     sort-keys "^2.0.0"
     write-file-atomic "^2.4.2"
 
-write-pkg@^3.1.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/write-pkg/-/write-pkg-3.2.0.tgz#0e178fe97820d389a8928bc79535dbe68c2cff21"
-  integrity sha512-tX2ifZ0YqEFOF1wjRW2Pk93NLsj02+n1UP5RvO6rCs0K6R2g1padvf006cY74PQJKMGS2r42NK7FD0dG6Y6paw==
+write-json-file@^4.3.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/write-json-file/-/write-json-file-4.3.0.tgz#908493d6fd23225344af324016e4ca8f702dd12d"
+  integrity sha512-PxiShnxf0IlnQuMYOPPhPkhExoCQuTUNPOa/2JWCYTmBquU9njyyDuwRKN26IZBlp4yn1nt+Agh2HOOBl+55HQ==
+  dependencies:
+    detect-indent "^6.0.0"
+    graceful-fs "^4.1.15"
+    is-plain-obj "^2.0.0"
+    make-dir "^3.0.0"
+    sort-keys "^4.0.0"
+    write-file-atomic "^3.0.0"
+
+write-pkg@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/write-pkg/-/write-pkg-4.0.0.tgz#675cc04ef6c11faacbbc7771b24c0abbf2a20039"
+  integrity sha512-v2UQ+50TNf2rNHJ8NyWttfm/EJUBWMJcx6ZTYZr6Qp52uuegWw/lBkCtCbnYZEmPRNL61m+u67dAmGxo+HTULA==
   dependencies:
     sort-keys "^2.0.0"
-    write-json-file "^2.2.0"
+    type-fest "^0.4.1"
+    write-json-file "^3.2.0"
 
-ws@^7.4.4:
-  version "7.4.4"
-  resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.4.tgz#383bc9742cb202292c9077ceab6f6047b17f2d59"
-  integrity sha512-Qm8k8ojNQIMx7S+Zp8u/uHOx7Qazv3Yv4q68MiWWWOJhiwG5W3x7iqmRtJo8xxrciZUY4vRxUTJCKuRnF28ZZw==
+ws@^7.4.5:
+  version "7.5.3"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.3.tgz#160835b63c7d97bfab418fc1b8a9fced2ac01a74"
+  integrity sha512-kQ/dHIzuLrS6Je9+uv81ueZomEwH0qVYstcAQ4/Z93K8zeko9gtAbttJWzoC5ukqXY1PpoouV3+VSOqEAFt5wg==
 
 xml-name-validator@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
   integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==
 
-xml2js@0.4.19:
-  version "0.4.19"
-  resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.19.tgz#686c20f213209e94abf0d1bcf1efaa291c7827a7"
-  integrity sha512-esZnJZJOiJR9wWKMyuvSE1y6Dq5LCuJanqhxslH2bxM6duahNZ+HMpCLhBQGZkbX6xRf8x1Y2eJlgt2q3qo49Q==
+xml2js@0.4.23:
+  version "0.4.23"
+  resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.23.tgz#a0c69516752421eb2ac758ee4d4ccf58843eac66"
+  integrity sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==
   dependencies:
     sax ">=0.6.0"
-    xmlbuilder "~9.0.1"
+    xmlbuilder "~11.0.0"
 
-xmlbuilder@~9.0.1:
-  version "9.0.7"
-  resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-9.0.7.tgz#132ee63d2ec5565c557e20f4c22df9aca686b10d"
-  integrity sha1-Ey7mPS7FVlxVfiD0wi35rKaGsQ0=
+xmlbuilder@~11.0.0:
+  version "11.0.1"
+  resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3"
+  integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==
 
 xmlchars@^2.2.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb"
   integrity sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==
 
-xtend@^4.0.0, xtend@~4.0.0, xtend@~4.0.1:
+xtend@~4.0.0, xtend@~4.0.1:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
   integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==
@@ -10565,10 +9432,10 @@ y18n@^3.2.1:
   resolved "https://registry.yarnpkg.com/y18n/-/y18n-3.2.2.tgz#85c901bd6470ce71fc4bb723ad209b70f7f28696"
   integrity sha512-uGZHXkHnhF0XeeAPgnKfPv1bgKAYyVvmNL1xlKsPYZPaIHxGti2hHqvOCQv71XMsLxu1QjergkqogUnms5D3YQ==
 
-y18n@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.3.tgz#b5f259c82cd6e336921efd7bfd8bf560de9eeedf"
-  integrity sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ==
+y18n@^5.0.5:
+  version "5.0.8"
+  resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"
+  integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==
 
 yallist@^3.0.0, yallist@^3.0.2, yallist@^3.0.3:
   version "3.1.1"
@@ -10580,7 +9447,22 @@ yallist@^4.0.0:
   resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
   integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
 
-yargs-parser@18.x, yargs-parser@^18.1.2, yargs-parser@^18.1.3:
+yaml@^1.10.0:
+  version "1.10.2"
+  resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.2.tgz#2301c5ffbf12b467de8da2333a459e29e7920e4b"
+  integrity sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg==
+
+yargs-parser@20.2.4:
+  version "20.2.4"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.4.tgz#b42890f14566796f85ae8e3a25290d205f154a54"
+  integrity sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==
+
+yargs-parser@20.x, yargs-parser@^20.2.2, yargs-parser@^20.2.3, yargs-parser@^20.2.9:
+  version "20.2.9"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee"
+  integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==
+
+yargs-parser@^18.1.3:
   version "18.1.3"
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
   integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
@@ -10588,65 +9470,31 @@ yargs-parser@18.x, yargs-parser@^18.1.2, yargs-parser@^18.1.3:
     camelcase "^5.0.0"
     decamelize "^1.2.0"
 
-yargs-parser@5.0.0-security.0:
-  version "5.0.0-security.0"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-5.0.0-security.0.tgz#4ff7271d25f90ac15643b86076a2ab499ec9ee24"
-  integrity sha512-T69y4Ps64LNesYxeYGYPvfoMTt/7y1XtfpIslUeK4um+9Hu7hlGoRtaDLvdXb7+/tfq4opVa2HRY5xGip022rQ==
+yargs-parser@^5.0.1:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-5.0.1.tgz#7ede329c1d8cdbbe209bd25cdb990e9b1ebbb394"
+  integrity sha512-wpav5XYiddjXxirPoCTUPbqM0PXvJ9hiBMvuJgInvo4/lAOTZzUprArw17q2O1P2+GHhbBr18/iQwjL5Z9BqfA==
   dependencies:
     camelcase "^3.0.0"
     object.assign "^4.1.0"
 
-yargs-parser@^15.0.1:
-  version "15.0.1"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-15.0.1.tgz#54786af40b820dcb2fb8025b11b4d659d76323b3"
-  integrity sha512-0OAMV2mAZQrs3FkNpDQcBk1x5HXb8X4twADss4S0Iuk+2dGnLOE/fRHrsYm542GduMveyA77OF4wrNJuanRCWw==
-  dependencies:
-    camelcase "^5.0.0"
-    decamelize "^1.2.0"
-
-yargs-parser@^20.2.3:
-  version "20.2.7"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a"
-  integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==
-
-yargs@^14.2.2:
-  version "14.2.3"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-14.2.3.tgz#1a1c3edced1afb2a2fea33604bc6d1d8d688a414"
-  integrity sha512-ZbotRWhF+lkjijC/VhmOT9wSgyBQ7+zr13+YLkhfsSiTriYsMzkTUFP18pFhWwBeMa5gUc1MzbhrO6/VB7c9Xg==
-  dependencies:
-    cliui "^5.0.0"
-    decamelize "^1.2.0"
-    find-up "^3.0.0"
-    get-caller-file "^2.0.1"
-    require-directory "^2.1.1"
-    require-main-filename "^2.0.0"
-    set-blocking "^2.0.0"
-    string-width "^3.0.0"
-    which-module "^2.0.0"
-    y18n "^4.0.0"
-    yargs-parser "^15.0.1"
-
-yargs@^15.4.1:
-  version "15.4.1"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-15.4.1.tgz#0d87a16de01aee9d8bec2bfbf74f67851730f4f8"
-  integrity sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==
+yargs@^16.0.3, yargs@^16.2.0:
+  version "16.2.0"
+  resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66"
+  integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==
   dependencies:
-    cliui "^6.0.0"
-    decamelize "^1.2.0"
-    find-up "^4.1.0"
-    get-caller-file "^2.0.1"
+    cliui "^7.0.2"
+    escalade "^3.1.1"
+    get-caller-file "^2.0.5"
     require-directory "^2.1.1"
-    require-main-filename "^2.0.0"
-    set-blocking "^2.0.0"
     string-width "^4.2.0"
-    which-module "^2.0.0"
-    y18n "^4.0.0"
-    yargs-parser "^18.1.2"
+    y18n "^5.0.5"
+    yargs-parser "^20.2.2"
 
 yargs@^7.1.0:
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-7.1.1.tgz#67f0ef52e228d4ee0d6311acede8850f53464df6"
-  integrity sha512-huO4Fr1f9PmiJJdll5kwoS2e4GqzGSsMT3PPMpOwoVkOK8ckqAewMTZyA6LXVQWflleb/Z8oPBEvNsMft0XE+g==
+  version "7.1.2"
+  resolved "https://registry.yarnpkg.com/yargs/-/yargs-7.1.2.tgz#63a0a5d42143879fdbb30370741374e0641d55db"
+  integrity sha512-ZEjj/dQYQy0Zx0lgLMLR8QuaqTihnxirir7EwUHp1Axq4e3+k8jXU5K0VLbNvedv1f4EWtBonDIZm0NUr+jCcA==
   dependencies:
     camelcase "^3.0.0"
     cliui "^3.2.0"
@@ -10660,7 +9508,7 @@ yargs@^7.1.0:
     string-width "^1.0.2"
     which-module "^1.0.0"
     y18n "^3.2.1"
-    yargs-parser "5.0.0-security.0"
+    yargs-parser "^5.0.1"
 
 yn@3.1.1:
   version "3.1.1"
diff --git a/julia/Arrow/docs/src/manual.md b/julia/Arrow/docs/src/manual.md
index 8f49f14f302..b819a1b3b73 100644
--- a/julia/Arrow/docs/src/manual.md
+++ b/julia/Arrow/docs/src/manual.md
@@ -2,7 +2,7 @@
 
 The goal of this documentation is to provide a brief introduction to the arrow data format, then provide a walk-through of the functionality provided in the Arrow.jl Julia package, with an aim to expose a little of the machinery "under the hood" to help explain how things work and how that influences real-world use-cases for the arrow data format.
 
-The best place to learn about the Apache arrow project is [the website itself](https://arrow.apache.org/), specifically the data format [specification](https://arrow.apache.org/docs/format/Columnar.html). Put briefly, the arrow project provides a formal speficiation for how columnar, "table" data can be laid out efficiently in memory to standardize and maximize the ability to share data across languages/platforms. In the current [apache/arrow GitHub repository](https://github.com/apache/arrow), language implementations exist for C++, Java, Go, Javascript, Rust, to name a few. Other database vendors and data processing frameworks/applications have also built support for the arrow format, allowing for a wide breadth of possibility for applications to "speak the data language" of arrow.
+The best place to learn about the Apache arrow project is [the website itself](https://arrow.apache.org/), specifically the data format [specification](https://arrow.apache.org/docs/format/Columnar.html). Put briefly, the arrow project provides a formal specification for how columnar, "table" data can be laid out efficiently in memory to standardize and maximize the ability to share data across languages/platforms. In the current [apache/arrow GitHub repository](https://github.com/apache/arrow), language implementations exist for C++, Java, Go, Javascript, Rust, to name a few. Other database vendors and data processing frameworks/applications have also built support for the arrow format, allowing for a wide breadth of possibility for applications to "speak the data language" of arrow.
 
 The [Arrow.jl](https://github.com/JuliaData/Arrow.jl) Julia package is another implementation, allowing the ability to both read and write data in the arrow format. As a data format, arrow specifies an exact memory layout to be used for columnar table data, and as such, "reading" involves custom Julia objects ([`Arrow.Table`](@ref) and [`Arrow.Stream`](@ref)), which read the *metadata* of an "arrow memory blob", then *wrap* the array data contained therein, having learned the type and size, amongst other properties, from the metadata. Let's take a closer look at what this "reading" of arrow memory really means/looks like.
 
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index fb80670b1fd..3c3b873ef37 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -15,46 +15,268 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 3.2)
+cmake_minimum_required(VERSION 3.20)
+
+# Build the Arrow C++ libraries.
+function(build_arrow)
+  set(options BUILD_GTEST)
+  set(one_value_args)
+  set(multi_value_args)
+  cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  if(WIN32)
+    set(ARROW_IMPORTED_TYPE IMPORTED_IMPLIB)
+    set(ARROW_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
+  else()
+    set(ARROW_IMPORTED_TYPE IMPORTED_LOCATION)
+    set(ARROW_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif()
+
+  set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix")
+  set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
+  set(ARROW_LIBRARY_DIR "${ARROW_PREFIX}/lib")
+  set(ARROW_SHARED_LIB
+      "${ARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${ARROW_LIBRARY_SUFFIX}")
+  set(ARROW_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-build")
+  set(ARROW_CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}"
+                       "-DCMAKE_INSTALL_LIBDIR=lib" "-DARROW_BUILD_STATIC=OFF")
+  set(ARROW_BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}")
+
+  # Building the Arrow C++ libraries and bundled GoogleTest binaries requires ExternalProject.
+  include(ExternalProject)
+
+  if(ARG_BUILD_GTEST)
+    enable_gtest()
+  endif()
+
+  externalproject_add(arrow_ep
+                      SOURCE_DIR "${CMAKE_SOURCE_DIR}/../cpp"
+                      BINARY_DIR "${ARROW_BINARY_DIR}"
+                      CMAKE_ARGS ${ARROW_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${ARROW_BUILD_BYPRODUCTS})
+
+  set(ARROW_LIBRARY_TARGET arrow_shared)
+
+  # If find_package has already found a valid Arrow installation, then
+  # we don't want to link against the newly built arrow_shared library.
+  # However, we still need create a library target to trigger building
+  # of the arrow_ep target, which will ultimately build the bundled
+  # GoogleTest binaries.
+  if(Arrow_FOUND)
+    set(ARROW_LIBRARY_TARGET arrow_shared_for_gtest)
+  endif()
+
+  file(MAKE_DIRECTORY "${ARROW_INCLUDE_DIR}")
+  add_library(${ARROW_LIBRARY_TARGET} SHARED IMPORTED)
+  set_target_properties(${ARROW_LIBRARY_TARGET}
+                        PROPERTIES ${ARROW_IMPORTED_TYPE} ${ARROW_SHARED_LIB}
+                                   INTERFACE_INCLUDE_DIRECTORIES ${ARROW_INCLUDE_DIR})
+
+  add_dependencies(${ARROW_LIBRARY_TARGET} arrow_ep)
+
+  if(ARG_BUILD_GTEST)
+    build_gtest()
+  endif()
+
+endfunction()
+
+macro(enable_gtest)
+  if(WIN32)
+    set(ARROW_GTEST_IMPORTED_TYPE IMPORTED_IMPLIB)
+    set(ARROW_GTEST_MAIN_IMPORTED_TYPE IMPORTED_IMPLIB)
+
+    set(ARROW_GTEST_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
+    set(ARROW_GTEST_MAIN_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
+  else()
+    set(ARROW_GTEST_IMPORTED_TYPE IMPORTED_LOCATION)
+    set(ARROW_GTEST_MAIN_IMPORTED_TYPE IMPORTED_LOCATION)
+
+    set(ARROW_GTEST_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+    set(ARROW_GTEST_MAIN_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif()
+
+  set(ARROW_GTEST_PREFIX "${ARROW_BINARY_DIR}/googletest_ep-prefix")
+  set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include")
+  set(ARROW_GTEST_LIBRARY_DIR "${ARROW_GTEST_PREFIX}/lib")
+  set(ARROW_GTEST_SHARED_LIB
+      "${ARROW_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${ARROW_GTEST_LIBRARY_SUFFIX}"
+  )
+
+  set(ARROW_GTEST_MAIN_PREFIX "${ARROW_BINARY_DIR}/googletest_ep-prefix")
+  set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include")
+  set(ARROW_GTEST_MAIN_LIBRARY_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib")
+  set(ARROW_GTEST_MAIN_SHARED_LIB
+      "${ARROW_GTEST_MAIN_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${ARROW_GTEST_MAIN_LIBRARY_SUFFIX}"
+  )
+
+  list(APPEND ARROW_CMAKE_ARGS "-DARROW_BUILD_TESTS=ON")
+  list(APPEND ARROW_BUILD_BYPRODUCTS "${ARROW_GTEST_SHARED_LIB}"
+       "${ARROW_GTEST_MAIN_SHARED_LIB}")
+endmacro()
+
+# Build the GoogleTest binaries that are bundled with the Arrow C++ libraries.
+macro(build_gtest)
+  set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include")
+  set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include")
+
+  file(MAKE_DIRECTORY "${ARROW_GTEST_INCLUDE_DIR}")
+
+  if(WIN32)
+    set(ARROW_GTEST_RUNTIME_DIR "${ARROW_GTEST_PREFIX}/bin")
+    set(ARROW_GTEST_MAIN_RUNTIME_DIR "${ARROW_GTEST_MAIN_PREFIX}/bin")
+    set(ARROW_GTEST_RUNTIME_SUFFIX "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    set(ARROW_GTEST_MAIN_RUNTIME_SUFFIX "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    set(ARROW_GTEST_RUNTIME_LIB
+        "${ARROW_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${ARROW_GTEST_RUNTIME_SUFFIX}"
+    )
+    set(ARROW_GTEST_MAIN_RUNTIME_LIB
+        "${ARROW_GTEST_MAIN_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${ARROW_GTEST_MAIN_RUNTIME_SUFFIX}"
+    )
+
+    # Multi-Configuration generators (e.g. Visual Studio or XCode) place their build artifacts
+    # in a subdirectory named ${CMAKE_BUILD_TYPE} by default, where ${CMAKE_BUILD_TYPE} varies
+    # depending on the chosen build configuration (e.g. Release or Debug).
+    get_property(GENERATOR_IS_MULTI_CONFIG_VALUE GLOBAL
+                 PROPERTY GENERATOR_IS_MULTI_CONFIG)
+    if(GENERATOR_IS_MULTI_CONFIG_VALUE)
+      set(MATLAB_TESTS_DIR "${CMAKE_BINARY_DIR}/$<CONFIG>")
+    else()
+      set(MATLAB_TESTS_DIR "${CMAKE_BINARY_DIR}")
+    endif()
+
+    # We need to copy the gtest and gtest_main runtime DLLs into the directory where the
+    # MATLAB C++ tests reside, since Windows requires that runtime DLLs are in the same
+    # directory as the executables that depend on them (or on the %PATH%).
+    externalproject_add_step(arrow_ep copy
+                             COMMAND ${CMAKE_COMMAND} -E make_directory
+                                     ${MATLAB_TESTS_DIR}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${ARROW_GTEST_RUNTIME_LIB}
+                                     ${MATLAB_TESTS_DIR}
+                             COMMAND ${CMAKE_COMMAND} -E copy
+                                     ${ARROW_GTEST_MAIN_RUNTIME_LIB} ${MATLAB_TESTS_DIR}
+                             DEPENDEES install)
+  endif()
+
+  add_library(GTest::gtest SHARED IMPORTED)
+  set_target_properties(GTest::gtest
+                        PROPERTIES ${ARROW_GTEST_IMPORTED_TYPE} ${ARROW_GTEST_SHARED_LIB}
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   ${ARROW_GTEST_INCLUDE_DIR})
+
+  add_library(GTest::gtest_main SHARED IMPORTED)
+  set_target_properties(GTest::gtest_main
+                        PROPERTIES ${ARROW_GTEST_MAIN_IMPORTED_TYPE}
+                                   ${ARROW_GTEST_MAIN_SHARED_LIB}
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   ${ARROW_GTEST_MAIN_INCLUDE_DIR})
+
+  add_dependencies(GTest::gtest arrow_ep)
+  add_dependencies(GTest::gtest_main arrow_ep)
+endmacro()
+
 set(CMAKE_CXX_STANDARD 11)
 
-set(MLARROW_VERSION "4.0.0-SNAPSHOT")
+set(MLARROW_VERSION "6.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
 
 project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
 
+option(MATLAB_BUILD_TESTS "Build the C++ tests for the MATLAB interface" OFF)
+
 # Grab CMAKE Modules from the CPP interface
 set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules")
 if(EXISTS "${CPP_CMAKE_MODULES}")
   set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES})
 endif()
 
-## Arrow is Required
-find_package(Arrow REQUIRED)
-
-## MATLAB is required to be installed to build MEX interfaces
-set(MATLAB_ADDITIONAL_VERSIONS "R2018a=9.4")
-find_package(Matlab REQUIRED MX_LIBRARY)
-
-# Build featherread mex file based on the arrow shared library
-matlab_add_mex(NAME
-               featherreadmex
-               SRC
-               src/featherreadmex.cc
-               src/feather_reader.cc
-               src/util/handle_status.cc
-               src/util/unicode_conversion.cc
-               LINK_TO
-               ${ARROW_SHARED_LIB})
-target_include_directories(featherreadmex PRIVATE ${ARROW_INCLUDE_DIR})
-
-# Build featherwrite mex file based on the arrow shared library
-matlab_add_mex(NAME
-               featherwritemex
-               SRC
-               src/featherwritemex.cc
-               src/feather_writer.cc
-               src/util/handle_status.cc
-               LINK_TO
-               ${ARROW_SHARED_LIB})
-target_include_directories(featherwritemex PRIVATE ${ARROW_INCLUDE_DIR})
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake_modules)
+
+# Only build the MATLAB interface C++ tests if MATLAB_BUILD_TESTS=ON.
+if(MATLAB_BUILD_TESTS)
+  # find_package(GTest) supports custom GTEST_ROOT as well as package managers.
+  find_package(GTest)
+  if(NOT GTest_FOUND)
+    # find_package(Arrow) supports custom ARROW_HOME as well as package
+    # managers.
+    find_package(Arrow)
+    # Trigger an automatic build of the Arrow C++ libraries and bundled
+    # GoogleTest binaries. If a valid Arrow installation was not already
+    # found by find_package, then build_arrow will use the Arrow
+    # C++ libraries that are built from source.
+    build_arrow(BUILD_GTEST)
+  else()
+    find_package(Arrow)
+    if(NOT Arrow_FOUND)
+      # Trigger an automatic build of the Arrow C++ libraries.
+      build_arrow()
+    endif()
+  endif()
+else()
+  find_package(Arrow)
+  if(NOT Arrow_FOUND)
+    build_arrow()
+  endif()
+endif()
+
+# MATLAB is Required
+find_package(Matlab REQUIRED)
+
+# Construct the absolute path to featherread's source files
+set(featherread_sources featherreadmex.cc feather_reader.cc util/handle_status.cc
+                        util/unicode_conversion.cc)
+list(TRANSFORM featherread_sources PREPEND ${CMAKE_SOURCE_DIR}/src/)
+
+# Build featherreadmex MEX binary
+matlab_add_mex(R2018a
+               NAME featherreadmex
+               SRC ${featherread_sources}
+               LINK_TO arrow_shared)
+
+# Construct the absolute path to featherwrite's source files
+set(featherwrite_sources featherwritemex.cc feather_writer.cc util/handle_status.cc
+                         util/unicode_conversion.cc)
+list(TRANSFORM featherwrite_sources PREPEND ${CMAKE_SOURCE_DIR}/src/)
+
+# Build featherwritemex MEX binary
+matlab_add_mex(R2018a
+               NAME featherwritemex
+               SRC ${featherwrite_sources}
+               LINK_TO arrow_shared)
+
+# Ensure the MEX binaries are placed in the src directory on all platforms
+if(WIN32)
+  set_target_properties(featherreadmex PROPERTIES RUNTIME_OUTPUT_DIRECTORY
+                                                  $<1:${CMAKE_SOURCE_DIR}/src>)
+  set_target_properties(featherwritemex PROPERTIES RUNTIME_OUTPUT_DIRECTORY
+                                                   $<1:${CMAKE_SOURCE_DIR}/src>)
+else()
+  set_target_properties(featherreadmex PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                                  $<1:${CMAKE_SOURCE_DIR}/src>)
+  set_target_properties(featherwritemex PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                                   $<1:${CMAKE_SOURCE_DIR}/src>)
+endif()
+
+# ##############################################################################
+# C++ Tests
+# ##############################################################################
+# Only build the C++ tests if MATLAB_BUILD_TESTS=ON.
+if(MATLAB_BUILD_TESTS)
+  enable_testing()
+
+  # Define a test executable target. TODO: Remove the placeholder test. This is
+  # just for testing GoogleTest integration.
+  add_executable(placeholder_test ${CMAKE_SOURCE_DIR}/src/placeholder_test.cc)
+  # Declare a dependency on the GTest::gtest and GTest::gtest_main IMPORTED
+  # targets.
+  target_link_libraries(placeholder_test GTest::gtest GTest::gtest_main)
+
+  # Add a test target.
+  add_test(PlaceholderTestTarget placeholder_test)
+endif()
diff --git a/matlab/doc/matlab_interface_for_apache_arrow_design.md b/matlab/doc/matlab_interface_for_apache_arrow_design.md
index de2bb13c39d..5d64c8e85bb 100644
--- a/matlab/doc/matlab_interface_for_apache_arrow_design.md
+++ b/matlab/doc/matlab_interface_for_apache_arrow_design.md
@@ -362,5 +362,5 @@ The table below provides a high-level roadmap for the development of specific ca
 [Add-On Explorer]: https://www.mathworks.com/help/matlab/matlab_env/get-add-ons.html
 [JavaScript user]: https://github.com/apache/arrow/tree/master/js
 [`apache-arrow` package via the `npm` package manager]: https://www.npmjs.com/package/apache-arrow
-[Rust user]: https://github.com/apache/arrow/tree/master/rust
+[Rust user]: https://github.com/apache/arrow-rs
 [`arrow` crate via the `cargo` package manager]: https://crates.io/crates/arrow
diff --git a/matlab/src/+mlarrow/+util/createMetadataStruct.m b/matlab/src/+mlarrow/+util/createMetadataStruct.m
index 7a2397059b6..b1b8bc7edd9 100644
--- a/matlab/src/+mlarrow/+util/createMetadataStruct.m
+++ b/matlab/src/+mlarrow/+util/createMetadataStruct.m
@@ -1,4 +1,4 @@
-function metadata = createMetadataStruct(description, numRows, numVariables)
+function metadata = createMetadataStruct(numRows, numVariables)
 % CREATEMETADATASTRUCT Helper function for creating Feather MEX metadata
 % struct.
 
@@ -17,8 +17,7 @@
 % implied.  See the License for the specific language governing
 % permissions and limitations under the License.
 
-metadata = struct('Description', description, ...
-                  'NumRows', numRows, ...
+metadata = struct('NumRows', numRows, ...
                   'NumVariables', numVariables);
 end
 
diff --git a/matlab/src/+mlarrow/+util/table2mlarrow.m b/matlab/src/+mlarrow/+util/table2mlarrow.m
index 3103724f945..36e4d1d15a9 100644
--- a/matlab/src/+mlarrow/+util/table2mlarrow.m
+++ b/matlab/src/+mlarrow/+util/table2mlarrow.m
@@ -23,7 +23,6 @@
 %
 %   Field Name    Class         Description
 %   ------------  -------       ----------------------------------------------
-%   Description   char          Table description (T.Properties.Description)
 %   NumRows       double        Number of table rows (height(T))
 %   NumVariables  double        Number of table variables (width(T))
 %
@@ -51,7 +50,7 @@
 variables = repmat(createVariableStruct('', [], [], ''), 1, width(t));
 
 % Struct representing table-level metadata.
-metadata = createMetadataStruct(t.Properties.Description, height(t), width(t));
+metadata = createMetadataStruct(height(t), width(t));
 
 % Iterate over each variable in the given table,
 % extracting the underlying array data.
diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc
index 484c300e0e4..1cbb50541e7 100644
--- a/matlab/src/feather_reader.cc
+++ b/matlab/src/feather_reader.cc
@@ -18,16 +18,21 @@
 #include <algorithm>
 #include <cmath>
 
+#include "feather_reader.h"
+
+#include <arrow/array/array_base.h>
+#include <arrow/array/builder_base.h>
+#include <arrow/array/builder_primitive.h>
 #include <arrow/io/file.h>
 #include <arrow/ipc/feather.h>
+#include <arrow/result.h>
 #include <arrow/status.h>
 #include <arrow/table.h>
 #include <arrow/type.h>
-#include <arrow/util/bit-util.h>
-
+#include <arrow/type_traits.h>
+#include <arrow/util/bitmap_visit.h>
 #include <mex.h>
 
-#include "feather_reader.h"
 #include "matlab_traits.h"
 #include "util/handle_status.h"
 #include "util/unicode_conversion.h"
@@ -52,11 +57,11 @@ mxArray* ReadNumericVariableData(const std::shared_ptr<Array>& column) {
   mxArray* variable_data =
       mxCreateNumericMatrix(column->length(), 1, matlab_class_id, mxREAL);
 
-  std::shared_ptr<ArrowArrayType> integer_array =
+  auto arrow_numeric_array =
       std::static_pointer_cast<ArrowArrayType>(column);
 
   // Get a raw pointer to the Arrow array data.
-  const MatlabType* source = integer_array->raw_values();
+  const MatlabType* source = arrow_numeric_array->raw_values();
 
   // Get a mutable pointer to the MATLAB array data and std::copy the
   // Arrow array data into it.
@@ -121,8 +126,7 @@ void BitUnpackBuffer(const std::shared_ptr<Buffer>& source, int64_t length,
 // writes to a zero-initialized destination buffer.
 // Implements a fast path for the fully-valid and fully-invalid cases.
 // Returns true if the destination buffer was successfully populated.
-bool TryBitUnpackFastPath(const std::shared_ptr<Array>& array,
-                          mxLogical* destination) {
+bool TryBitUnpackFastPath(const std::shared_ptr<Array>& array, mxLogical* destination) {
   const int64_t null_count = array->null_count();
   const int64_t length = array->length();
 
@@ -177,32 +181,24 @@ Status FeatherReader::Open(const std::string& filename,
   *feather_reader = std::shared_ptr<FeatherReader>(new FeatherReader());
 
   // Open file with given filename as a ReadableFile.
-  std::shared_ptr<io::ReadableFile> readable_file(nullptr);
-
-  RETURN_NOT_OK(io::ReadableFile::Open(filename, &readable_file));
-
-  // TableReader expects a RandomAccessFile.
-  std::shared_ptr<io::RandomAccessFile> random_access_file(readable_file);
-
+  ARROW_ASSIGN_OR_RAISE(auto readable_file, io::ReadableFile::Open(filename));
+ 
   // Open the Feather file for reading with a TableReader.
-  RETURN_NOT_OK(ipc::feather::TableReader::Open(random_access_file,
-                                                &(*feather_reader)->table_reader_));
-
-  // Read the table metadata from the Feather file.
-  (*feather_reader)->num_rows_ = (*feather_reader)->table_reader_->num_rows();
-  (*feather_reader)->num_variables_ = (*feather_reader)->table_reader_->num_columns();
-  (*feather_reader)->description_ =
-      (*feather_reader)->table_reader_->HasDescription()
-          ? (*feather_reader)->table_reader_->GetDescription()
-          : "";
-
-  if ((*feather_reader)->num_rows_ > internal::MAX_MATLAB_SIZE ||
-      (*feather_reader)->num_variables_ > internal::MAX_MATLAB_SIZE) {
-    mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge",
-                      "The table size exceeds MATLAB limits: %u x %u",
-                      (*feather_reader)->num_rows_, (*feather_reader)->num_variables_);
+  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::feather::Reader::Open(readable_file));
+ 
+  // Set the internal reader_ object.
+  (*feather_reader)->reader_ = reader;
+
+  // Check the feather file version
+  auto version = reader->version();
+  if (version == ipc::feather::kFeatherV2Version) {
+    return Status::NotImplemented("Support for Feather V2 has not been implemented.");
+  } else if (version != ipc::feather::kFeatherV1Version) {
+    return Status::Invalid("Unknown Feather format version.");
   }
 
+  // read the table metadata from the Feather file
+  (*feather_reader)->num_variables_ = reader->schema()->num_fields();
   return Status::OK();
 }
 
@@ -225,15 +221,11 @@ mxArray* FeatherReader::ReadMetadata() const {
   mxSetField(metadata, 0, "NumVariables",
              mxCreateDoubleScalar(static_cast<double>(num_variables_)));
 
-  // Set the description.
-  mxSetField(metadata, 0, "Description",
-             util::ConvertUTF8StringToUTF16CharMatrix(description_));
-
   return metadata;
 }
 
 // Read the table variables from the Feather file as a mxArray*.
-mxArray* FeatherReader::ReadVariables() const {
+mxArray* FeatherReader::ReadVariables() {
   const int32_t num_variable_fields = 4;
   const char* fieldnames[] = {"Name", "Type", "Data", "Valid"};
 
@@ -242,16 +234,34 @@ mxArray* FeatherReader::ReadVariables() const {
   mxArray* variables =
       mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames);
 
-  // Read all the table variables in the Feather file into memory.
+  std::shared_ptr<arrow::Table> table;
+  auto status = reader_->Read(&table);
+  if (!status.ok()) {
+    mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable",
+                      "Failed to read arrow::Table from Feather file. Reason: %s",
+                      status.message().c_str());
+  }
+
+  // Set the number of rows
+  num_rows_ = table->num_rows();
+
+  if (num_rows_ > internal::MAX_MATLAB_SIZE ||
+      num_variables_ > internal::MAX_MATLAB_SIZE) {
+    mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge",
+                      "The table size exceeds MATLAB limits: %u x %u", num_rows_,
+                      num_variables_);
+  }
+
+  auto column_names = table->ColumnNames();
+
   for (int64_t i = 0; i < num_variables_; ++i) {
-    std::shared_ptr<ChunkedArray> column;
-    util::HandleStatus(table_reader_->GetColumn(i, &column));
+    auto column = table->column(i);
     if (column->num_chunks() != 1) {
       mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::ReadVariables",
                         "Chunked columns not yet supported");
     }
     std::shared_ptr<Array> chunk = column->chunk(0);
-    const std::string column_name = table_reader_->GetColumnName(i);
+    const std::string column_name = column_names[i];
 
     // set the struct fields data
     mxSetField(variables, i, "Name", internal::ReadVariableName(column_name));
diff --git a/matlab/src/feather_reader.h b/matlab/src/feather_reader.h
index 00fea68f7ae..197e470bf6e 100644
--- a/matlab/src/feather_reader.h
+++ b/matlab/src/feather_reader.h
@@ -23,7 +23,6 @@
 #include <arrow/ipc/feather.h>
 #include <arrow/status.h>
 #include <arrow/type.h>
-
 #include <matrix.h>
 
 namespace arrow {
@@ -56,7 +55,7 @@ class FeatherReader {
   ///        Clients are responsible for freeing the returned mxArray memory
   ///        when it is no longer needed, or passing it to MATLAB to be managed.
   /// \return variables mxArray* struct array containing table variable data
-  mxArray* ReadVariables() const;
+  mxArray* ReadVariables();
 
   /// \brief Initialize a FeatherReader object from a given Feather file.
   /// \param[in] filename path to a Feather file
@@ -66,7 +65,7 @@ class FeatherReader {
 
  private:
   FeatherReader() = default;
-  std::unique_ptr<ipc::feather::TableReader> table_reader_;
+  std::shared_ptr<ipc::feather::Reader> reader_;
   int64_t num_rows_;
   int64_t num_variables_;
   std::string description_;
@@ -74,4 +73,3 @@ class FeatherReader {
 
 }  // namespace matlab
 }  // namespace arrow
-
diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc
index bd1576bca46..1a76ada1995 100644
--- a/matlab/src/feather_writer.cc
+++ b/matlab/src/feather_writer.cc
@@ -19,6 +19,8 @@
 #include <functional> /* for std::multiplies */
 #include <numeric>    /* for std::accumulate */
 
+#include "feather_writer.h"
+
 #include <arrow/array.h>
 #include <arrow/buffer.h>
 #include <arrow/io/file.h>
@@ -26,11 +28,11 @@
 #include <arrow/status.h>
 #include <arrow/table.h>
 #include <arrow/type.h>
-#include <arrow/util/bit-util.h>
-
+#include <arrow/util/bit_util.h>
+#include <arrow/util/bitmap_generate.h>
+#include <arrow/util/key_value_metadata.h>
 #include <mex.h>
 
-#include "feather_writer.h"
 #include "matlab_traits.h"
 #include "util/handle_status.h"
 
@@ -38,6 +40,37 @@ namespace arrow {
 namespace matlab {
 namespace internal {
 
+// Returns the arrow::DataType that corresponds to the input type string
+std::shared_ptr<arrow::DataType> ConvertMatlabTypeStringToArrowDataType(
+    const std::string& t) {
+  if (t == "double") {
+    return arrow::float64();
+  } else if (t == "single") {
+    return arrow::float32();
+  } else if (t == "uint64") {
+    return arrow::uint64();
+  } else if (t == "uint32") {
+    return arrow::uint32();
+  } else if (t == "uint16") {
+    return arrow::uint16();
+  } else if (t == "uint8") {
+    return arrow::uint8();
+  } else if (t == "int64") {
+    return arrow::int64();
+  } else if (t == "int32") {
+    return arrow::int32();
+  } else if (t == "int16") {
+    return arrow::int16();
+  } else if (t == "int8") {
+    return arrow::int8();
+  }
+  mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedMatlabTypeString",
+                    "Unsupported MATLAB type string: '%s'", t.c_str());
+
+  // mexErrMsgIdAndTxt throws unconditionally so we should never reach this line
+  return nullptr;
+}
+
 // Utility that helps verify the input mxArray struct field name and type.
 // Returns void since any errors will throw and terminate MEX execution.
 void ValidateMxStructField(const mxArray* struct_array, const char* fieldname,
@@ -71,8 +104,7 @@ void ValidateMxStructField(const mxArray* struct_array, const char* fieldname,
                         mxGetClassName(field), fieldname);
     }
   }
-
-  // Some struct fields (like the table description) can be empty, while others 
+  // Some struct fields (like Data) can be empty, while others
   // (like NumRows) should never be empty. This conditional helps account for both cases.
   if (!can_be_empty) {
     // Ensure that individual mxStructArray fields are non-empty.
@@ -120,7 +152,7 @@ void ValidateNumRows(int64_t actual, int64_t expected) {
 }
 
 // Calculate the number of bytes required in the bit-packed validity buffer.
-constexpr int64_t BitPackedLength(int64_t num_elements) {
+int64_t BitPackedLength(int64_t num_elements) {
   // Since mxLogicalArray encodes [0, 1] in a full byte, we can compress that byte
   // down to a bit...therefore dividing the mxLogicalArray length by 8 here.
   return static_cast<int64_t>(std::ceil(num_elements / 8.0));
@@ -134,7 +166,7 @@ size_t GetNumberOfElements(const mxArray* array) {
   const size_t* dimensions = mxGetDimensions(array);
 
   // Iterate over the dimensions array and accumulate the total number of elements.
-  return std::accumulate(dimensions, dimensions + num_dimensions, 1,
+  return std::accumulate(dimensions, dimensions + num_dimensions, size_t{1},
                          std::multiplies<size_t>());
 }
 
@@ -164,7 +196,7 @@ void BitPackBuffer(const mxArray* logical_array,
 
   // Iterate over the mxLogical array and write bit-packed bools to the arrow::Buffer.
   // Call into a loop-unrolled Arrow utility for better performance when bit-packing.
-  auto generator = [&]() -> uint8_t { return *unpacked_buffer_ptr++; };
+  auto generator = [&]() -> bool { return *(unpacked_buffer_ptr++); };
   const int64_t start_offset = 0;
   arrow::internal::GenerateBitsUnrolled(packed_buffer_ptr, start_offset,
                                         unpacked_buffer_length, generator);
@@ -195,8 +227,8 @@ std::unique_ptr<Array> WriteNumericData(const mxArray* data,
                                mxGetElementSize(data) * mxGetNumberOfElements(data));
 
   // Construct arrow::NumericArray specialization using arrow::Buffer.
-  // Pass in nulls information...we could compute and provide the number of nulls here too,
-  // but passing -1 for now so that Arrow recomputes it if necessary.
+  // Pass in nulls information...we could compute and provide the number of nulls here
+  // too, but passing -1 for now so that Arrow recomputes it if necessary.
   return std::unique_ptr<Array>(new NumericArray<ArrowDataType>(
       mxGetNumberOfElements(data), buffer, validity_bitmap, -1));
 }
@@ -228,7 +260,6 @@ std::unique_ptr<Array> WriteVariableData(const mxArray* data, const std::string&
       return WriteNumericData<Int32Type>(data, validity_bitmap);
     case mxINT64_CLASS:
       return WriteNumericData<Int64Type>(data, validity_bitmap);
-
     default: {
       mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedArrowType",
                         "Unsupported arrow::Type '%s' for variable '%s'",
@@ -248,60 +279,41 @@ Status FeatherWriter::Open(const std::string& filename,
   *feather_writer = std::shared_ptr<FeatherWriter>(new FeatherWriter());
 
   // Open a FileOutputStream corresponding to the provided filename.
-  std::shared_ptr<io::OutputStream> writable_file(nullptr);
-  ARROW_RETURN_NOT_OK(io::FileOutputStream::Open(filename, &writable_file));
-
-  // TableWriter::Open expects a shared_ptr to an OutputStream.
-  // Open the Feather file for writing with a TableWriter.
-  return ipc::feather::TableWriter::Open(writable_file,
-                                         &(*feather_writer)->table_writer_);
-}
-
-// Write table metadata to the Feather file from a mxArray*.
-void FeatherWriter::WriteMetadata(const mxArray* metadata) {
-  // Verify that all required fieldnames are provided.
-  internal::ValidateMxStructField(metadata, "Description", mxCHAR_CLASS, true);
-  internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false);
-  internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, false);
-
-  // Convert Description to a std::string and set on FeatherWriter and TableWriter.
-  std::string description =
-      internal::MxArrayToString(mxGetField(metadata, 0, "Description"));
-  this->description_ = description;
-  this->table_writer_->SetDescription(description);
-
-  // Get the NumRows field in the struct array and set on TableWriter.
-  this->num_rows_ = static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumRows")));
-  this->table_writer_->SetNumRows(this->num_rows_);
-
-  // Get the total number of variables. This is checked later for consistency with
-  // the provided number of columns before finishing the file write.
-  this->num_variables_ =
-      static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumVariables")));
+  ARROW_ASSIGN_OR_RAISE((*feather_writer)->file_output_stream_,
+      io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_)));
+  return Status::OK();
 }
 
 // Write mxArrays from MATLAB into a Feather file.
-Status FeatherWriter::WriteVariables(const mxArray* variables) {
+Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* metadata) {
   // Verify that all required fieldnames are provided.
   internal::ValidateMxStructField(variables, "Name", mxCHAR_CLASS, true);
   internal::ValidateMxStructField(variables, "Type", mxCHAR_CLASS, false);
   internal::ValidateMxStructField(variables, "Data", mxUNKNOWN_CLASS, true);
   internal::ValidateMxStructField(variables, "Valid", mxLOGICAL_CLASS, true);
 
+  // Verify that all required fieldnames are provided.
+  internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false);
+  internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, false);
+
   // Get the number of columns in the struct array.
   size_t num_columns = internal::GetNumberOfElements(variables);
 
+  // Get the NumRows field in the struct array and set on TableWriter.
+  num_rows_ = static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumRows")));
+  // Get the total number of variables. This is checked later for consistency with
+  // the provided number of columns before finishing the file write.
+  num_variables_ =
+      static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumVariables")));
+
   // Verify that we have all the columns required for writing
   // Currently we need all columns to be passed in together in the WriteVariables method.
-  internal::ValidateNumColumns(static_cast<int64_t>(num_columns), this->num_variables_);
+  internal::ValidateNumColumns(static_cast<int64_t>(num_columns), num_variables_);
+
+  arrow::SchemaBuilder schema_builder;
+  std::vector<std::shared_ptr<arrow::Array>> table_columns;
 
-  // Allocate a packed validity bitmap for later arrow::Buffers to reference and populate.
-  // Since this is defined in the enclosing scope around any arrow::Buffer usage, this
-  // should outlive any arrow::Buffers created on this range, thus avoiding dangling
-  // references.
-  std::shared_ptr<ResizableBuffer> validity_bitmap;
-  ARROW_RETURN_NOT_OK(AllocateResizableBuffer(internal::BitPackedLength(this->num_rows_),
-                                              &validity_bitmap));
+  const int64_t bitpacked_length = internal::BitPackedLength(num_rows_);
 
   // Iterate over the input columns and generate arrow arrays.
   for (int idx = 0; idx < num_columns; ++idx) {
@@ -316,22 +328,38 @@ Status FeatherWriter::WriteVariables(const mxArray* variables) {
     std::string name_str = internal::MxArrayToString(name);
     std::string type_str = internal::MxArrayToString(type);
 
+    auto datatype = internal::ConvertMatlabTypeStringToArrowDataType(type_str);
+    auto field = std::make_shared<arrow::Field>(name_str, datatype);
+
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> validity_bitmap,
+        arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_)));
+
     // Populate bit-packed arrow::Buffer using validity data in the mxArray*.
     internal::BitPackBuffer(valid, validity_bitmap);
 
     // Wrap mxArray data in an arrow::Array of the equivalent type.
-    std::unique_ptr<Array> array =
+    auto array =
         internal::WriteVariableData(data, type_str, validity_bitmap);
 
     // Verify that the arrow::Array has the right number of elements.
-    internal::ValidateNumRows(array->length(), this->num_rows_);
+    internal::ValidateNumRows(array->length(), num_rows_);
 
-    // Write another column to the Feather file.
-    ARROW_RETURN_NOT_OK(this->table_writer_->Append(name_str, *array));
+    // Append the field to the schema builder
+    RETURN_NOT_OK(schema_builder.AddField(field));
+
+    // Store the table column
+    table_columns.push_back(std::move(array));
   }
+  // Create the table schema
+  ARROW_ASSIGN_OR_RAISE(auto table_schema, schema_builder.Finish());
+
+  // Specify the feather file format version as V1
+  arrow::ipc::feather::WriteProperties write_props;
+  write_props.version = arrow::ipc::feather::kFeatherV1Version;
 
+  std::shared_ptr<arrow::Table> table = arrow::Table::Make(table_schema, table_columns);
   // Write the Feather file metadata to the end of the file.
-  return this->table_writer_->Finalize();
+  return ipc::feather::WriteTable(*table, file_output_stream_.get(), write_props);
 }
 
 }  // namespace matlab
diff --git a/matlab/src/feather_writer.h b/matlab/src/feather_writer.h
index 4b402e01e17..a35b1434340 100644
--- a/matlab/src/feather_writer.h
+++ b/matlab/src/feather_writer.h
@@ -23,7 +23,6 @@
 #include <arrow/ipc/feather.h>
 #include <arrow/status.h>
 #include <arrow/type.h>
-
 #include <matrix.h>
 
 namespace arrow {
@@ -33,24 +32,21 @@ class FeatherWriter {
  public:
   ~FeatherWriter() = default;
 
-  /// \brief Write Feather file metadata using information from an mxArray* struct.
-  ///        The input mxArray must be a scalar struct array with the following fields:
-  ///         - "Description" :: Nx1 mxChar array, table-level description
-  ///         - "NumRows" :: scalar mxDouble array, number of rows in table
-  ///         - "NumVariables" :: scalar mxDouble array, total number of variables
-  /// \param[in] metadata mxArray* scalar struct containing table-level metadata
-  void WriteMetadata(const mxArray* metadata);
-
-  /// \brief Write mxArrays to a Feather file. The input must be a N-by-1 mxStruct
-  //         array with the following fields:
+  /// \brief Write mxArrays to a Feather file. The first input must be a N-by-1 mxStruct
+  ///         array with the following fields:
   ///         - "Name" :: Nx1 mxChar array, name of the column
   ///         - "Type" :: Nx1 mxChar array, the variable's MATLAB datatype
   ///         - "Data" :: Nx1 mxArray, data for this variable
   ///         - "Valid" :: Nx1 mxLogical array, 0 represents invalid (null) values and
   ///                                           1 represents valid (non-null) values
+  ///        The second input must be a scalar mxStruct  with the following
+  ///        fields:
+  ///         - "NumRows" :: scalar mxDouble array, number of rows in table
+  ///         - "NumVariables" :: scalar mxDouble array, total number of variables
   /// \param[in] variables mxArray* struct array containing table variable data
+  /// \param[in] metadata mxArray* scalar struct containing table-level metadata
   /// \return status
-  Status WriteVariables(const mxArray* variables);
+  Status WriteVariables(const mxArray* variables, const mxArray* metadata);
 
   /// \brief Initialize a FeatherWriter object that writes to a Feather file
   /// \param[in] filename path to the new Feather file
@@ -62,12 +58,11 @@ class FeatherWriter {
  private:
   FeatherWriter() = default;
 
-  std::unique_ptr<ipc::feather::TableWriter> table_writer_;
   int64_t num_rows_;
   int64_t num_variables_;
   std::string description_;
+  std::shared_ptr<arrow::io::OutputStream> file_output_stream_;
 };
 
 }  // namespace matlab
 }  // namespace arrow
-
diff --git a/matlab/src/featherread.m b/matlab/src/featherread.m
index 4ac8a565182..31bc426b877 100644
--- a/matlab/src/featherread.m
+++ b/matlab/src/featherread.m
@@ -83,8 +83,4 @@
     t.Properties.VariableDescriptions = cellstr(variableDescriptions);
 end
 
-% Set the Description property of the table based on the Feather file
-% description.
-t.Properties.Description = metadata.Description;
-
 end
diff --git a/matlab/src/featherwritemex.cc b/matlab/src/featherwritemex.cc
index 3a6815e02c1..d8f90baafc5 100644
--- a/matlab/src/featherwritemex.cc
+++ b/matlab/src/featherwritemex.cc
@@ -32,6 +32,5 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
       arrow::matlab::FeatherWriter::Open(filename, &feather_writer));
 
   // Write the Feather file table variables and table metadata from MATLAB.
-  feather_writer->WriteMetadata(prhs[2]);
-  arrow::matlab::util::HandleStatus(feather_writer->WriteVariables(prhs[1]));
+  arrow::matlab::util::HandleStatus(feather_writer->WriteVariables(prhs[1], prhs[2]));
 }
diff --git a/matlab/src/placeholder_test.cc b/matlab/src/placeholder_test.cc
new file mode 100644
index 00000000000..eef37e178f6
--- /dev/null
+++ b/matlab/src/placeholder_test.cc
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+namespace arrow {
+namespace matlab {
+namespace test {
+// TODO: Remove this placeholder test.
+TEST(PlaceholderTestSuite, PlaceholderTestCase) { ASSERT_TRUE(true); }
+}  // namespace test
+}  // namespace matlab
+}  // namespace arrow
diff --git a/matlab/test/tfeathermex.m b/matlab/test/tfeathermex.m
index fa79b4bdef0..77070ad1421 100644
--- a/matlab/test/tfeathermex.m
+++ b/matlab/test/tfeathermex.m
@@ -60,7 +60,7 @@ function InvalidMATLABTableVariableNames(testCase)
             invalidVariable = mlarrow.util.createVariableStruct('double', 1, true, '@');
             validVariable = mlarrow.util.createVariableStruct('double', 1, true, 'Valid');
             variables = [invalidVariable, validVariable];
-            metadata = mlarrow.util.createMetadataStruct('', 1, 2);
+            metadata = mlarrow.util.createMetadataStruct(1, 2);
             featherwritemex(filename, variables, metadata);
             t = featherread(filename);
             
diff --git a/matlab/test/util/createVariablesAndMetadataStructs.m b/matlab/test/util/createVariablesAndMetadataStructs.m
index 01a8f58261b..0c60cbfbbcc 100644
--- a/matlab/test/util/createVariablesAndMetadataStructs.m
+++ b/matlab/test/util/createVariablesAndMetadataStructs.m
@@ -90,9 +90,8 @@
              singleVariable, ...
              doubleVariable];
 
-description = 'test';
 numRows = 3;
 numVariables = length(variables);
 
-metadata = createMetadataStruct(description, numRows, numVariables);
+metadata = createMetadataStruct(numRows, numVariables);
 end
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 0714aa412e1..8f91fbeb0dd 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -81,7 +81,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF)
   option(PYARROW_BUNDLE_BOOST "Bundle the Boost libraries when we bundle Arrow C++" OFF)
   option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF)
-  set(PYARROW_CXXFLAGS "" CACHE STRING "Compiler flags to append when compiling Arrow")
+  set(PYARROW_CXXFLAGS
+      ""
+      CACHE STRING "Compiler flags to append when compiling Arrow")
 endif()
 
 find_program(CCACHE_FOUND ccache)
@@ -132,8 +134,8 @@ else()
   # Suppress Cython warnings
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-maybe-uninitialized")
 
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
     # Cython warnings in clang
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand")
@@ -172,10 +174,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
   if(NOT APPLE)
     set(MORE_ARGS "-T")
   endif()
-  execute_process(COMMAND ln
-                          ${MORE_ARGS}
-                          -sf
-                          ${BUILD_OUTPUT_ROOT_DIRECTORY}
+  execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
                           ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
 else()
   set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
@@ -237,20 +236,17 @@ function(bundle_arrow_lib library_path)
   # Only copy the shared library with ABI version on Linux and macOS
 
   if(MSVC)
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
+    configure_file(${${library_path}}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   COPYONLY)
   elseif(APPLE)
-    configure_file(
-      ${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
+    configure_file(${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   COPYONLY)
   else()
-    configure_file(
-      ${${library_path}}.${ARG_SO_VERSION}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION}
-      COPYONLY)
+    configure_file(${${library_path}}.${ARG_SO_VERSION}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION}
+                   COPYONLY)
   endif()
 
 endfunction(bundle_arrow_lib)
@@ -270,15 +266,13 @@ function(bundle_boost_lib library_path)
   set(Boost_SO_VERSION
       "${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}")
   if(APPLE)
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
+    configure_file(${${library_path}}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   COPYONLY)
   else()
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}.${Boost_SO_VERSION}
-      COPYONLY)
+    configure_file(${${library_path}}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}.${Boost_SO_VERSION}
+                   COPYONLY)
   endif()
 endfunction()
 
@@ -320,9 +314,8 @@ function(bundle_arrow_dependency library_name)
   if(SHARED_LIB_PATH)
     get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH)
     get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME)
-    message(
-      STATUS
-        "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}")
+    message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}"
+    )
     configure_file(${SHARED_LIB_REALPATH}
                    ${BUILD_OUTPUT_ROOT_DIRECTORY}/${SHARED_LIB_NAME} COPYONLY)
   else()
@@ -331,7 +324,8 @@ function(bundle_arrow_dependency library_name)
 endfunction()
 
 # Always bundle includes
-file(COPY ${ARROW_INCLUDE_DIR}/arrow DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
+get_filename_component(ARROW_INCLUDE_REALPATH "${ARROW_INCLUDE_DIR}/arrow" REALPATH)
+file(COPY ${ARROW_INCLUDE_REALPATH} DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
 
 if(PYARROW_BUNDLE_ARROW_CPP)
   # arrow
@@ -366,7 +360,9 @@ if(PYARROW_BUNDLE_ARROW_CPP)
       # disable autolinking in boost
       add_definitions(-DBOOST_ALL_NO_LIB)
     endif()
-    find_package(Boost COMPONENTS regex REQUIRED)
+    find_package(Boost
+                 COMPONENTS regex
+                 REQUIRED)
     bundle_boost_lib(Boost_REGEX_LIBRARY)
   endif()
 
@@ -387,9 +383,11 @@ endif()
 
 set(CYTHON_EXTENSIONS
     lib
-    _fs
     _compute
     _csv
+    _feather
+    _fs
+    _hdfsio
     _json)
 
 set(LINK_LIBS arrow_shared arrow_python_shared)
@@ -458,7 +456,9 @@ if(PYARROW_BUILD_PARQUET)
     else()
       set(Boost_USE_STATIC_LIBS ON)
     endif()
-    find_package(Boost COMPONENTS regex REQUIRED)
+    find_package(Boost
+                 COMPONENTS regex
+                 REQUIRED)
     add_thirdparty_lib(boost_regex STATIC_LIB ${Boost_REGEX_LIBRARY_RELEASE})
     add_thirdparty_lib(thrift STATIC_LIB ${THRIFT_STATIC_LIB})
     set(PARQUET_LINK_LIBS parquet_static thrift_static boost_regex_static)
@@ -553,8 +553,8 @@ foreach(module ${CYTHON_EXTENSIONS})
 
   if(directories)
     string(REPLACE ";" "/" module_output_directory ${directories})
-    set_target_properties(${module_name}
-                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${module_output_directory})
+    set_target_properties(${module_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                                    ${module_output_directory})
   endif()
 
   if(PYARROW_BUNDLE_ARROW_CPP)
@@ -582,9 +582,8 @@ foreach(module ${CYTHON_EXTENSIONS})
   endif()
 
   if(PYARROW_GENERATE_COVERAGE)
-    set_target_properties(${module_name}
-                          PROPERTIES COMPILE_DEFINITIONS
-                                     "CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
+    set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS
+                                                    "CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
   endif()
 
   target_link_libraries(${module_name} PRIVATE ${LINK_LIBS})
diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh
index 6f93ebd5647..13df3d3a89c 100755
--- a/python/examples/minimal_build/build_conda.sh
+++ b/python/examples/minimal_build/build_conda.sh
@@ -50,9 +50,9 @@ function setup_miniconda() {
   conda config --add channels conda-forge
 
   conda create -y -n pyarrow-$PYTHON -c conda-forge \
-        --file arrow/ci/conda_env_unix.yml \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_python.yml \
+        --file arrow/ci/conda_env_unix.txt \
+        --file arrow/ci/conda_env_cpp.txt \
+        --file arrow/ci/conda_env_python.txt \
         compilers \
         python=3.7 \
         pandas
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index adfd69c18b3..91bffeb6ad4 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -67,7 +67,8 @@ def parse_git(root, **kwargs):
 from pyarrow.lib import (BuildInfo, RuntimeInfo, VersionInfo,
                          cpp_build_info, cpp_version, cpp_version_info,
                          runtime_info, cpu_count, set_cpu_count,
-                         enable_signal_handlers)
+                         enable_signal_handlers,
+                         io_thread_count, set_io_thread_count)
 
 
 def show_versions():
@@ -164,15 +165,17 @@ def show_versions():
                          log_memory_allocations, jemalloc_set_decay_ms)
 
 # I/O
-from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
+from pyarrow.lib import (NativeFile, PythonFile,
                          BufferedInputStream, BufferedOutputStream,
                          CompressedInputStream, CompressedOutputStream,
                          TransformInputStream, transcoding_input_stream,
                          FixedSizeBufferWriter,
                          BufferReader, BufferOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
-                         create_memory_map, have_libhdfs,
-                         MockOutputStream, input_stream, output_stream)
+                         create_memory_map, MockOutputStream,
+                         input_stream, output_stream)
+
+from pyarrow._hdfsio import HdfsFile, have_libhdfs
 
 from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
                          concat_arrays, concat_tables)
diff --git a/python/pyarrow/_compute.pxd b/python/pyarrow/_compute.pxd
index e187ed75b69..8358271efa7 100644
--- a/python/pyarrow/_compute.pxd
+++ b/python/pyarrow/_compute.pxd
@@ -23,5 +23,8 @@ from pyarrow.includes.libarrow cimport *
 
 
 cdef class FunctionOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CFunctionOptions] wrapped
 
     cdef const CFunctionOptions* get_options(self) except NULL
+    cdef void init(self, unique_ptr[CFunctionOptions] options)
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 1515bdcfd36..46cfdc4e2ef 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -17,6 +17,8 @@
 
 # cython: language_level = 3
 
+import sys
+
 from cython.operator cimport dereference as deref
 
 from collections import namedtuple
@@ -526,9 +528,45 @@ def call_function(name, args, options=None, memory_pool=None):
 
 
 cdef class FunctionOptions(_Weakrefable):
+    __slots__ = ()  # avoid mistakingly creating attributes
 
     cdef const CFunctionOptions* get_options(self) except NULL:
-        raise NotImplementedError("Unimplemented base options")
+        return self.wrapped.get()
+
+    cdef void init(self, unique_ptr[CFunctionOptions] options):
+        self.wrapped = move(options)
+
+    def serialize(self):
+        cdef:
+            CResult[shared_ptr[CBuffer]] res = self.get_options().Serialize()
+            shared_ptr[CBuffer] c_buf = GetResultValue(res)
+        return pyarrow_wrap_buffer(c_buf)
+
+    @staticmethod
+    def deserialize(buf):
+        cdef:
+            shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
+            CResult[unique_ptr[CFunctionOptions]] maybe_options = \
+                DeserializeFunctionOptions(deref(c_buf))
+            unique_ptr[CFunctionOptions] c_options
+        c_options = move(GetResultValue(move(maybe_options)))
+        type_name = frombytes(c_options.get().options_type().type_name())
+        module = globals()
+        if type_name not in module:
+            raise ValueError(f"Cannot deserialize '{type_name}'")
+        klass = module[type_name]
+        options = klass.__new__(klass)
+        (<FunctionOptions> options).init(move(c_options))
+        return options
+
+    def __repr__(self):
+        type_name = self.__class__.__name__
+        # Remove {} so we can use our own braces
+        string_repr = frombytes(self.get_options().ToString())[1:-1]
+        return f"{type_name}({string_repr})"
+
+    def __eq__(self, FunctionOptions other):
+        return self.get_options().Equals(deref(other.get_options()))
 
 
 # NOTE:
@@ -541,17 +579,16 @@ cdef class FunctionOptions(_Weakrefable):
 
 cdef class _CastOptions(FunctionOptions):
     cdef:
-        unique_ptr[CCastOptions] options
-
-    __slots__ = ()  # avoid mistakingly creating attributes
+        CCastOptions* options
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.options.get()
+    cdef void init(self, unique_ptr[CFunctionOptions] options):
+        FunctionOptions.init(self, move(options))
+        self.options = <CCastOptions*> self.wrapped.get()
 
     def _set_options(self, DataType target_type, allow_int_overflow,
                      allow_time_truncate, allow_time_overflow,
                      allow_float_truncate, allow_invalid_utf8):
-        self.options.reset(new CCastOptions())
+        self.init(unique_ptr[CFunctionOptions](new CCastOptions()))
         self._set_type(target_type)
         if allow_int_overflow is not None:
             self.allow_int_overflow = allow_int_overflow
@@ -571,10 +608,12 @@ cdef class _CastOptions(FunctionOptions):
             )
 
     def _set_safe(self):
-        self.options.reset(new CCastOptions(CCastOptions.Safe()))
+        self.init(unique_ptr[CFunctionOptions](
+            new CCastOptions(CCastOptions.Safe())))
 
     def _set_unsafe(self):
-        self.options.reset(new CCastOptions(CCastOptions.Unsafe()))
+        self.init(unique_ptr[CFunctionOptions](
+            new CCastOptions(CCastOptions.Unsafe())))
 
     def is_safe(self):
         return not (
@@ -650,33 +689,65 @@ class CastOptions(_CastOptions):
         return self
 
 
-cdef class _MatchSubstringOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CMatchSubstringOptions] match_substring_options
+cdef class _ElementWiseAggregateOptions(FunctionOptions):
+    def _set_options(self, bint skip_nulls):
+        self.wrapped.reset(new CElementWiseAggregateOptions(skip_nulls))
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.match_substring_options.get()
 
-    def _set_options(self, pattern):
-        self.match_substring_options.reset(
-            new CMatchSubstringOptions(tobytes(pattern)))
+class ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
+    def __init__(self, bint skip_nulls=True):
+        self._set_options(skip_nulls)
+
+
+cdef class _JoinOptions(FunctionOptions):
+    def _set_options(self, null_handling, null_replacement):
+        cdef:
+            CJoinNullHandlingBehavior c_null_handling = \
+                CJoinNullHandlingBehavior_EMIT_NULL
+            c_string c_null_replacement = tobytes(null_replacement)
+        if null_handling == 'emit_null':
+            c_null_handling = CJoinNullHandlingBehavior_EMIT_NULL
+        elif null_handling == 'skip':
+            c_null_handling = CJoinNullHandlingBehavior_SKIP
+        elif null_handling == 'replace':
+            c_null_handling = CJoinNullHandlingBehavior_REPLACE
+        else:
+            raise ValueError(
+                '"{}" is not a valid null_handling'
+                .format(null_handling))
+        self.wrapped.reset(
+            new CJoinOptions(c_null_handling, c_null_replacement))
+
+
+class JoinOptions(_JoinOptions):
+    def __init__(self, null_handling='emit_null', null_replacement=''):
+        self._set_options(null_handling, null_replacement)
+
+
+cdef class _MatchSubstringOptions(FunctionOptions):
+    def _set_options(self, pattern, bint ignore_case):
+        self.wrapped.reset(
+            new CMatchSubstringOptions(tobytes(pattern), ignore_case))
 
 
 class MatchSubstringOptions(_MatchSubstringOptions):
-    def __init__(self, pattern):
-        self._set_options(pattern)
+    def __init__(self, pattern, bint ignore_case=False):
+        self._set_options(pattern, ignore_case)
 
 
-cdef class _TrimOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTrimOptions] trim_options
+cdef class _PadOptions(FunctionOptions):
+    def _set_options(self, width, padding):
+        self.wrapped.reset(new CPadOptions(width, tobytes(padding)))
+
+
+class PadOptions(_PadOptions):
+    def __init__(self, width, padding=' '):
+        self._set_options(width, padding)
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.trim_options.get()
 
+cdef class _TrimOptions(FunctionOptions):
     def _set_options(self, characters):
-        self.trim_options.reset(
-            new CTrimOptions(tobytes(characters)))
+        self.wrapped.reset(new CTrimOptions(tobytes(characters)))
 
 
 class TrimOptions(_TrimOptions):
@@ -684,15 +755,21 @@ class TrimOptions(_TrimOptions):
         self._set_options(characters)
 
 
-cdef class _ReplaceSubstringOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CReplaceSubstringOptions] replace_substring_options
+cdef class _ReplaceSliceOptions(FunctionOptions):
+    def _set_options(self, start, stop, replacement):
+        self.wrapped.reset(
+            new CReplaceSliceOptions(start, stop, tobytes(replacement))
+        )
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.replace_substring_options.get()
 
+class ReplaceSliceOptions(_ReplaceSliceOptions):
+    def __init__(self, start, stop, replacement):
+        self._set_options(start, stop, replacement)
+
+
+cdef class _ReplaceSubstringOptions(FunctionOptions):
     def _set_options(self, pattern, replacement, max_replacements):
-        self.replace_substring_options.reset(
+        self.wrapped.reset(
             new CReplaceSubstringOptions(tobytes(pattern),
                                          tobytes(replacement),
                                          max_replacements)
@@ -704,19 +781,35 @@ class ReplaceSubstringOptions(_ReplaceSubstringOptions):
         self._set_options(pattern, replacement, max_replacements)
 
 
-cdef class _FilterOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CFilterOptions] filter_options
+cdef class _ExtractRegexOptions(FunctionOptions):
+    def _set_options(self, pattern):
+        self.wrapped.reset(
+            new CExtractRegexOptions(tobytes(pattern)))
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.filter_options.get()
 
+class ExtractRegexOptions(_ExtractRegexOptions):
+    def __init__(self, pattern):
+        self._set_options(pattern)
+
+
+cdef class _SliceOptions(FunctionOptions):
+    def _set_options(self, int64_t start, int64_t stop, int64_t step):
+        self.wrapped.reset(new CSliceOptions(start, stop, step))
+
+
+class SliceOptions(_SliceOptions):
+    def __init__(self, int64_t start, int64_t stop=sys.maxsize,
+                 int64_t step=1):
+        self._set_options(start, stop, step)
+
+
+cdef class _FilterOptions(FunctionOptions):
     def _set_options(self, null_selection_behavior):
         if null_selection_behavior == 'drop':
-            self.filter_options.reset(
+            self.wrapped.reset(
                 new CFilterOptions(CFilterNullSelectionBehavior_DROP))
         elif null_selection_behavior == 'emit_null':
-            self.filter_options.reset(
+            self.wrapped.reset(
                 new CFilterOptions(CFilterNullSelectionBehavior_EMIT_NULL))
         else:
             raise ValueError(
@@ -730,19 +823,13 @@ class FilterOptions(_FilterOptions):
 
 
 cdef class _DictionaryEncodeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CDictionaryEncodeOptions] dictionary_encode_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.dictionary_encode_options.get()
-
     def _set_options(self, null_encoding_behavior):
         if null_encoding_behavior == 'encode':
-            self.dictionary_encode_options.reset(
+            self.wrapped.reset(
                 new CDictionaryEncodeOptions(
                     CDictionaryEncodeNullEncodingBehavior_ENCODE))
         elif null_encoding_behavior == 'mask':
-            self.dictionary_encode_options.reset(
+            self.wrapped.reset(
                 new CDictionaryEncodeOptions(
                     CDictionaryEncodeNullEncodingBehavior_MASK))
         else:
@@ -756,14 +843,8 @@ class DictionaryEncodeOptions(_DictionaryEncodeOptions):
 
 
 cdef class _TakeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTakeOptions] take_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.take_options.get()
-
     def _set_options(self, boundscheck):
-        self.take_options.reset(new CTakeOptions(boundscheck))
+        self.wrapped.reset(new CTakeOptions(boundscheck))
 
 
 class TakeOptions(_TakeOptions):
@@ -772,14 +853,8 @@ class TakeOptions(_TakeOptions):
 
 
 cdef class _PartitionNthOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CPartitionNthOptions] partition_nth_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.partition_nth_options.get()
-
     def _set_options(self, int64_t pivot):
-        self.partition_nth_options.reset(new CPartitionNthOptions(pivot))
+        self.wrapped.reset(new CPartitionNthOptions(pivot))
 
 
 class PartitionNthOptions(_PartitionNthOptions):
@@ -787,85 +862,53 @@ class PartitionNthOptions(_PartitionNthOptions):
         self._set_options(pivot)
 
 
-cdef class _ProjectOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CProjectOptions] project_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.project_options.get()
-
+cdef class _MakeStructOptions(FunctionOptions):
     def _set_options(self, field_names):
         cdef:
             vector[c_string] c_field_names
         for n in field_names:
             c_field_names.push_back(tobytes(n))
-        self.project_options.reset(new CProjectOptions(field_names))
+        self.wrapped.reset(new CMakeStructOptions(c_field_names))
 
 
-class ProjectOptions(_ProjectOptions):
+class MakeStructOptions(_MakeStructOptions):
     def __init__(self, field_names):
         self._set_options(field_names)
 
 
-cdef class _MinMaxOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CMinMaxOptions] min_max_options
+cdef class _ScalarAggregateOptions(FunctionOptions):
+    def _set_options(self, skip_nulls, min_count):
+        self.wrapped.reset(
+            new CScalarAggregateOptions(skip_nulls, min_count))
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.min_max_options.get()
-
-    def _set_options(self, null_handling):
-        if null_handling == 'skip':
-            self.min_max_options.reset(
-                new CMinMaxOptions(CMinMaxMode_SKIP))
-        elif null_handling == 'emit_null':
-            self.min_max_options.reset(
-                new CMinMaxOptions(CMinMaxMode_EMIT_NULL))
-        else:
-            raise ValueError(
-                '{!r} is not a valid null_handling'
-                .format(null_handling))
 
+class ScalarAggregateOptions(_ScalarAggregateOptions):
+    def __init__(self, skip_nulls=True, min_count=1):
+        self._set_options(skip_nulls, min_count)
 
-class MinMaxOptions(_MinMaxOptions):
-    def __init__(self, null_handling='skip'):
-        self._set_options(null_handling)
 
+cdef class _IndexOptions(FunctionOptions):
+    def _set_options(self, Scalar scalar):
+        self.wrapped.reset(new CIndexOptions(pyarrow_unwrap_scalar(scalar)))
 
-cdef class _CountOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CCountOptions] count_options
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.count_options.get()
-
-    def _set_options(self, count_mode):
-        if count_mode == 'count_null':
-            self.count_options.reset(
-                new CCountOptions(CCountMode_COUNT_NULL))
-        elif count_mode == 'count_non_null':
-            self.count_options.reset(
-                new CCountOptions(CCountMode_COUNT_NON_NULL))
-        else:
-            raise ValueError(
-                '{!r} is not a valid count_mode'
-                .format(count_mode))
+class IndexOptions(_IndexOptions):
+    """
+    Options for the index kernel.
 
+    Parameters
+    ----------
+    value : Scalar
+        The value to search for.
+    """
 
-class CountOptions(_CountOptions):
-    def __init__(self, count_mode='count_non_null'):
-        self._set_options(count_mode)
+    def __init__(self, value):
+        self._set_options(value)
 
 
 cdef class _ModeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CModeOptions] mode_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.mode_options.get()
-
     def _set_options(self, n):
-        self.mode_options.reset(new CModeOptions(n))
+        self.wrapped.reset(new CModeOptions(n))
 
 
 class ModeOptions(_ModeOptions):
@@ -875,12 +918,8 @@ class ModeOptions(_ModeOptions):
 
 cdef class _SetLookupOptions(FunctionOptions):
     cdef:
-        unique_ptr[CSetLookupOptions] set_lookup_options
         unique_ptr[CDatum] valset
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.set_lookup_options.get()
-
     def _set_options(self, value_set, c_bool skip_nulls):
         if isinstance(value_set, Array):
             self.valset.reset(new CDatum((<Array> value_set).sp_array))
@@ -893,9 +932,8 @@ cdef class _SetLookupOptions(FunctionOptions):
         else:
             raise ValueError('"{}" is not a valid value_set'.format(value_set))
 
-        self.set_lookup_options.reset(
-            new CSetLookupOptions(deref(self.valset), skip_nulls)
-        )
+        self.wrapped.reset(
+            new CSetLookupOptions(deref(self.valset), skip_nulls))
 
 
 class SetLookupOptions(_SetLookupOptions):
@@ -904,27 +942,20 @@ class SetLookupOptions(_SetLookupOptions):
 
 
 cdef class _StrptimeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CStrptimeOptions] strptime_options
-        TimeUnit time_unit
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.strptime_options.get()
-
     def _set_options(self, format, unit):
         if unit == 's':
-            self.time_unit = TimeUnit_SECOND
+            time_unit = TimeUnit_SECOND
         elif unit == 'ms':
-            self.time_unit = TimeUnit_MILLI
+            time_unit = TimeUnit_MILLI
         elif unit == 'us':
-            self.time_unit = TimeUnit_MICRO
+            time_unit = TimeUnit_MICRO
         elif unit == 'ns':
-            self.time_unit = TimeUnit_NANO
+            time_unit = TimeUnit_NANO
         else:
             raise ValueError('"{}" is not a valid time unit'.format(unit))
 
-        self.strptime_options.reset(
-            new CStrptimeOptions(tobytes(format), self.time_unit)
+        self.wrapped.reset(
+            new CStrptimeOptions(tobytes(format), time_unit)
         )
 
 
@@ -933,15 +964,21 @@ class StrptimeOptions(_StrptimeOptions):
         self._set_options(format, unit)
 
 
-cdef class _VarianceOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CVarianceOptions] variance_options
+cdef class _DayOfWeekOptions(FunctionOptions):
+    def _set_options(self, one_based_numbering, week_start):
+        self.wrapped.reset(
+            new CDayOfWeekOptions(one_based_numbering, week_start)
+        )
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.variance_options.get()
 
+class DayOfWeekOptions(_DayOfWeekOptions):
+    def __init__(self, one_based_numbering=False, week_start=1):
+        self._set_options(one_based_numbering, week_start)
+
+
+cdef class _VarianceOptions(FunctionOptions):
     def _set_options(self, ddof):
-        self.variance_options.reset(new CVarianceOptions(ddof))
+        self.wrapped.reset(new CVarianceOptions(ddof))
 
 
 class VarianceOptions(_VarianceOptions):
@@ -950,14 +987,8 @@ class VarianceOptions(_VarianceOptions):
 
 
 cdef class _SplitOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSplitOptions] split_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.split_options.get()
-
     def _set_options(self, max_splits, reverse):
-        self.split_options.reset(
+        self.wrapped.reset(
             new CSplitOptions(max_splits, reverse))
 
 
@@ -967,14 +998,8 @@ class SplitOptions(_SplitOptions):
 
 
 cdef class _SplitPatternOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSplitPatternOptions] split_pattern_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.split_pattern_options.get()
-
     def _set_options(self, pattern, max_splits, reverse):
-        self.split_pattern_options.reset(
+        self.wrapped.reset(
             new CSplitPatternOptions(tobytes(pattern), max_splits, reverse))
 
 
@@ -984,19 +1009,11 @@ class SplitPatternOptions(_SplitPatternOptions):
 
 
 cdef class _ArraySortOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CArraySortOptions] array_sort_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.array_sort_options.get()
-
     def _set_options(self, order):
         if order == "ascending":
-            self.array_sort_options.reset(
-                new CArraySortOptions(CSortOrder_Ascending))
+            self.wrapped.reset(new CArraySortOptions(CSortOrder_Ascending))
         elif order == "descending":
-            self.array_sort_options.reset(
-                new CArraySortOptions(CSortOrder_Descending))
+            self.wrapped.reset(new CArraySortOptions(CSortOrder_Descending))
         else:
             raise ValueError(
                 "{!r} is not a valid order".format(order)
@@ -1009,12 +1026,6 @@ class ArraySortOptions(_ArraySortOptions):
 
 
 cdef class _SortOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSortOptions] sort_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.sort_options.get()
-
     def _set_options(self, sort_keys):
         cdef:
             vector[CSortKey] c_sort_keys
@@ -1033,7 +1044,7 @@ cdef class _SortOptions(FunctionOptions):
             c_name = tobytes(name)
             c_sort_keys.push_back(CSortKey(c_name, c_order))
 
-        self.sort_options.reset(new CSortOptions(c_sort_keys))
+        self.wrapped.reset(new CSortOptions(c_sort_keys))
 
 
 class SortOptions(_SortOptions):
@@ -1044,12 +1055,6 @@ class SortOptions(_SortOptions):
 
 
 cdef class _QuantileOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CQuantileOptions] quantile_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.quantile_options.get()
-
     def _set_options(self, quantiles, interp):
         interp_dict = {
             'linear': CQuantileInterp_LINEAR,
@@ -1062,7 +1067,7 @@ cdef class _QuantileOptions(FunctionOptions):
             raise ValueError(
                 '{!r} is not a valid interpolation'
                 .format(interp))
-        self.quantile_options.reset(
+        self.wrapped.reset(
             new CQuantileOptions(quantiles, interp_dict[interp]))
 
 
@@ -1074,14 +1079,8 @@ class QuantileOptions(_QuantileOptions):
 
 
 cdef class _TDigestOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTDigestOptions] tdigest_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.tdigest_options.get()
-
     def _set_options(self, quantiles, delta, buffer_size):
-        self.tdigest_options.reset(
+        self.wrapped.reset(
             new CTDigestOptions(quantiles, delta, buffer_size))
 
 
diff --git a/python/pyarrow/_csv.pxd b/python/pyarrow/_csv.pxd
index f8e12f16bc8..b2fe7d639ae 100644
--- a/python/pyarrow/_csv.pxd
+++ b/python/pyarrow/_csv.pxd
@@ -23,7 +23,7 @@ from pyarrow.lib cimport _Weakrefable
 
 cdef class ConvertOptions(_Weakrefable):
     cdef:
-        CCSVConvertOptions options
+        unique_ptr[CCSVConvertOptions] options
 
     @staticmethod
     cdef ConvertOptions wrap(CCSVConvertOptions options)
@@ -31,7 +31,7 @@ cdef class ConvertOptions(_Weakrefable):
 
 cdef class ParseOptions(_Weakrefable):
     cdef:
-        CCSVParseOptions options
+        unique_ptr[CCSVParseOptions] options
 
     @staticmethod
     cdef ParseOptions wrap(CCSVParseOptions options)
@@ -39,8 +39,16 @@ cdef class ParseOptions(_Weakrefable):
 
 cdef class ReadOptions(_Weakrefable):
     cdef:
-        CCSVReadOptions options
+        unique_ptr[CCSVReadOptions] options
         public object encoding
 
     @staticmethod
     cdef ReadOptions wrap(CCSVReadOptions options)
+
+
+cdef class WriteOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CCSVWriteOptions] options
+
+    @staticmethod
+    cdef WriteOptions wrap(CCSVWriteOptions options)
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index a98160cfa99..295c64246fe 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 # cython: language_level = 3
 
 from cython.operator cimport dereference as deref
@@ -31,10 +30,11 @@ from pyarrow.lib cimport (check_status, Field, MemoryPool, Schema,
                           RecordBatchReader, ensure_type,
                           maybe_unbox_memory_pool, get_input_stream,
                           get_writer, native_transcoding_input_stream,
-                          pyarrow_unwrap_batch, pyarrow_unwrap_table,
-                          pyarrow_wrap_schema, pyarrow_wrap_table,
-                          pyarrow_wrap_data_type, pyarrow_unwrap_data_type,
-                          Table, RecordBatch, StopToken)
+                          pyarrow_unwrap_batch, pyarrow_unwrap_schema,
+                          pyarrow_unwrap_table, pyarrow_wrap_schema,
+                          pyarrow_wrap_table, pyarrow_wrap_data_type,
+                          pyarrow_unwrap_data_type, Table, RecordBatch,
+                          StopToken, _CRecordBatchWriter)
 from pyarrow.lib import frombytes, tobytes, SignalStopHandler
 from pyarrow.util import _stringify_path
 
@@ -57,10 +57,19 @@ cdef class ReadOptions(_Weakrefable):
     block_size : int, optional
         How much bytes to process at a time from the input stream.
         This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
+        the size of individual record batches or table chunks.
+        Minimum valid value for block size is 1
     skip_rows: int, optional (default 0)
         The number of rows to skip before the column names (if any)
         and the CSV data.
+    skip_rows_after_names: int, optional (default 0)
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names aread (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
     column_names: list, optional
         The column names of the target table.  If empty, fall back on
         `autogenerate_column_names`.
@@ -77,10 +86,13 @@ cdef class ReadOptions(_Weakrefable):
     # Avoid mistakingly creating attributes
     __slots__ = ()
 
+    # __init__() is not called when unpickling, initialize storage here
+    def __cinit__(self, *argw, **kwargs):
+        self.options.reset(new CCSVReadOptions(CCSVReadOptions.Defaults()))
+
     def __init__(self, *, use_threads=None, block_size=None, skip_rows=None,
                  column_names=None, autogenerate_column_names=None,
-                 encoding='utf8'):
-        self.options = CCSVReadOptions.Defaults()
+                 encoding='utf8', skip_rows_after_names=None):
         if use_threads is not None:
             self.use_threads = use_threads
         if block_size is not None:
@@ -93,42 +105,45 @@ cdef class ReadOptions(_Weakrefable):
             self.autogenerate_column_names= autogenerate_column_names
         # Python-specific option
         self.encoding = encoding
+        if skip_rows_after_names is not None:
+            self.skip_rows_after_names = skip_rows_after_names
 
     @property
     def use_threads(self):
         """
         Whether to use multiple threads to accelerate reading.
         """
-        return self.options.use_threads
+        return deref(self.options).use_threads
 
     @use_threads.setter
     def use_threads(self, value):
-        self.options.use_threads = value
+        deref(self.options).use_threads = value
 
     @property
     def block_size(self):
         """
         How much bytes to process at a time from the input stream.
         This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
+        the size of individual record batches or table chunks.
         """
-        return self.options.block_size
+        return deref(self.options).block_size
 
     @block_size.setter
     def block_size(self, value):
-        self.options.block_size = value
+        deref(self.options).block_size = value
 
     @property
     def skip_rows(self):
         """
         The number of rows to skip before the column names (if any)
         and the CSV data.
+        See `skip_rows_after_names` for interaction description
         """
-        return self.options.skip_rows
+        return deref(self.options).skip_rows
 
     @skip_rows.setter
     def skip_rows(self, value):
-        self.options.skip_rows = value
+        deref(self.options).skip_rows = value
 
     @property
     def column_names(self):
@@ -136,13 +151,13 @@ cdef class ReadOptions(_Weakrefable):
         The column names of the target table.  If empty, fall back on
         `autogenerate_column_names`.
         """
-        return [frombytes(s) for s in self.options.column_names]
+        return [frombytes(s) for s in deref(self.options).column_names]
 
     @column_names.setter
     def column_names(self, value):
-        self.options.column_names.clear()
+        deref(self.options).column_names.clear()
         for item in value:
-            self.options.column_names.push_back(tobytes(item))
+            deref(self.options).column_names.push_back(tobytes(item))
 
     @property
     def autogenerate_column_names(self):
@@ -152,11 +167,31 @@ cdef class ReadOptions(_Weakrefable):
         If false, column names will be read from the first CSV row
         after `skip_rows`.
         """
-        return self.options.autogenerate_column_names
+        return deref(self.options).autogenerate_column_names
 
     @autogenerate_column_names.setter
     def autogenerate_column_names(self, value):
-        self.options.autogenerate_column_names = value
+        deref(self.options).autogenerate_column_names = value
+
+    @property
+    def skip_rows_after_names(self):
+        """
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names aread (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
+        """
+        return deref(self.options).skip_rows_after_names
+
+    @skip_rows_after_names.setter
+    def skip_rows_after_names(self, value):
+        deref(self.options).skip_rows_after_names = value
+
+    def validate(self):
+        check_status(deref(self.options).Validate())
 
     def equals(self, ReadOptions other):
         return (
@@ -166,25 +201,26 @@ cdef class ReadOptions(_Weakrefable):
             self.column_names == other.column_names and
             self.autogenerate_column_names ==
             other.autogenerate_column_names and
-            self.encoding == other.encoding
+            self.encoding == other.encoding and
+            self.skip_rows_after_names == other.skip_rows_after_names
         )
 
     @staticmethod
     cdef ReadOptions wrap(CCSVReadOptions options):
         out = ReadOptions()
-        out.options = options
+        out.options.reset(new CCSVReadOptions(move(options)))
         out.encoding = 'utf8'  # No way to know this
         return out
 
     def __getstate__(self):
         return (self.use_threads, self.block_size, self.skip_rows,
                 self.column_names, self.autogenerate_column_names,
-                self.encoding)
+                self.encoding, self.skip_rows_after_names)
 
     def __setstate__(self, state):
         (self.use_threads, self.block_size, self.skip_rows,
          self.column_names, self.autogenerate_column_names,
-         self.encoding) = state
+         self.encoding, self.skip_rows_after_names) = state
 
     def __eq__(self, other):
         try:
@@ -221,10 +257,12 @@ cdef class ParseOptions(_Weakrefable):
     """
     __slots__ = ()
 
+    def __cinit__(self, *argw, **kwargs):
+        self.options.reset(new CCSVParseOptions(CCSVParseOptions.Defaults()))
+
     def __init__(self, *, delimiter=None, quote_char=None, double_quote=None,
                  escape_char=None, newlines_in_values=None,
                  ignore_empty_lines=None):
-        self.options = CCSVParseOptions.Defaults()
         if delimiter is not None:
             self.delimiter = delimiter
         if quote_char is not None:
@@ -243,11 +281,11 @@ cdef class ParseOptions(_Weakrefable):
         """
         The character delimiting individual cells in the CSV data.
         """
-        return chr(self.options.delimiter)
+        return chr(deref(self.options).delimiter)
 
     @delimiter.setter
     def delimiter(self, value):
-        self.options.delimiter = _single_char(value)
+        deref(self.options).delimiter = _single_char(value)
 
     @property
     def quote_char(self):
@@ -255,18 +293,18 @@ cdef class ParseOptions(_Weakrefable):
         The character used optionally for quoting CSV values
         (False if quoting is not allowed).
         """
-        if self.options.quoting:
-            return chr(self.options.quote_char)
+        if deref(self.options).quoting:
+            return chr(deref(self.options).quote_char)
         else:
             return False
 
     @quote_char.setter
     def quote_char(self, value):
         if value is False:
-            self.options.quoting = False
+            deref(self.options).quoting = False
         else:
-            self.options.quote_char = _single_char(value)
-            self.options.quoting = True
+            deref(self.options).quote_char = _single_char(value)
+            deref(self.options).quoting = True
 
     @property
     def double_quote(self):
@@ -274,11 +312,11 @@ cdef class ParseOptions(_Weakrefable):
         Whether two quotes in a quoted CSV value denote a single quote
         in the data.
         """
-        return self.options.double_quote
+        return deref(self.options).double_quote
 
     @double_quote.setter
     def double_quote(self, value):
-        self.options.double_quote = value
+        deref(self.options).double_quote = value
 
     @property
     def escape_char(self):
@@ -286,18 +324,18 @@ cdef class ParseOptions(_Weakrefable):
         The character used optionally for escaping special characters
         (False if escaping is not allowed).
         """
-        if self.options.escaping:
-            return chr(self.options.escape_char)
+        if deref(self.options).escaping:
+            return chr(deref(self.options).escape_char)
         else:
             return False
 
     @escape_char.setter
     def escape_char(self, value):
         if value is False:
-            self.options.escaping = False
+            deref(self.options).escaping = False
         else:
-            self.options.escape_char = _single_char(value)
-            self.options.escaping = True
+            deref(self.options).escape_char = _single_char(value)
+            deref(self.options).escaping = True
 
     @property
     def newlines_in_values(self):
@@ -306,11 +344,11 @@ cdef class ParseOptions(_Weakrefable):
         Setting this to True reduces the performance of multi-threaded
         CSV reading.
         """
-        return self.options.newlines_in_values
+        return deref(self.options).newlines_in_values
 
     @newlines_in_values.setter
     def newlines_in_values(self, value):
-        self.options.newlines_in_values = value
+        deref(self.options).newlines_in_values = value
 
     @property
     def ignore_empty_lines(self):
@@ -319,11 +357,14 @@ cdef class ParseOptions(_Weakrefable):
         If False, an empty line is interpreted as containing a single empty
         value (assuming a one-column CSV file).
         """
-        return self.options.ignore_empty_lines
+        return deref(self.options).ignore_empty_lines
 
     @ignore_empty_lines.setter
     def ignore_empty_lines(self, value):
-        self.options.ignore_empty_lines = value
+        deref(self.options).ignore_empty_lines = value
+
+    def validate(self):
+        check_status(deref(self.options).Validate())
 
     def equals(self, ParseOptions other):
         return (
@@ -338,7 +379,7 @@ cdef class ParseOptions(_Weakrefable):
     @staticmethod
     cdef ParseOptions wrap(CCSVParseOptions options):
         out = ParseOptions()
-        out.options = options
+        out.options.reset(new CCSVParseOptions(move(options)))
         return out
 
     def __getstate__(self):
@@ -396,6 +437,9 @@ cdef class ConvertOptions(_Weakrefable):
     false_values: list, optional
         A sequence of strings that denote false booleans in the data
         (defaults are appropriate in most cases).
+    decimal_point: 1-character string, optional (default '.')
+        The character used as decimal point in floating-point and decimal
+        data.
     timestamp_parsers: list, optional
         A sequence of strptime()-compatible format strings, tried in order
         when attempting to infer or convert timestamp values (the special
@@ -406,6 +450,12 @@ cdef class ConvertOptions(_Weakrefable):
         If true, then strings in null_values are considered null for
         string columns.
         If false, then all strings are valid string values.
+    quoted_strings_can_be_null: bool, optional (default True)
+        Whether string / binary columns can have quoted null values.
+        If true *and* strings_can_be_null is true, then strings in
+        null_values are considered null for string columns, even when
+        quoted.
+        Otherwise, then all quoted strings are valid string values.
     auto_dict_encode: bool, optional (default False)
         Whether to try to automatically dict-encode string / binary data.
         If true, then when type inference detects a string or binary column,
@@ -431,12 +481,16 @@ cdef class ConvertOptions(_Weakrefable):
     # Avoid mistakingly creating attributes
     __slots__ = ()
 
+    def __cinit__(self, *argw, **kwargs):
+        self.options.reset(
+            new CCSVConvertOptions(CCSVConvertOptions.Defaults()))
+
     def __init__(self, *, check_utf8=None, column_types=None, null_values=None,
-                 true_values=None, false_values=None,
-                 strings_can_be_null=None, include_columns=None,
-                 include_missing_columns=None, auto_dict_encode=None,
-                 auto_dict_max_cardinality=None, timestamp_parsers=None):
-        self.options = CCSVConvertOptions.Defaults()
+                 true_values=None, false_values=None, decimal_point=None,
+                 strings_can_be_null=None, quoted_strings_can_be_null=None,
+                 include_columns=None, include_missing_columns=None,
+                 auto_dict_encode=None, auto_dict_max_cardinality=None,
+                 timestamp_parsers=None):
         if check_utf8 is not None:
             self.check_utf8 = check_utf8
         if column_types is not None:
@@ -447,8 +501,12 @@ cdef class ConvertOptions(_Weakrefable):
             self.true_values = true_values
         if false_values is not None:
             self.false_values = false_values
+        if decimal_point is not None:
+            self.decimal_point = decimal_point
         if strings_can_be_null is not None:
             self.strings_can_be_null = strings_can_be_null
+        if quoted_strings_can_be_null is not None:
+            self.quoted_strings_can_be_null = quoted_strings_can_be_null
         if include_columns is not None:
             self.include_columns = include_columns
         if include_missing_columns is not None:
@@ -465,22 +523,33 @@ cdef class ConvertOptions(_Weakrefable):
         """
         Whether to check UTF8 validity of string columns.
         """
-        return self.options.check_utf8
+        return deref(self.options).check_utf8
 
     @check_utf8.setter
     def check_utf8(self, value):
-        self.options.check_utf8 = value
+        deref(self.options).check_utf8 = value
 
     @property
     def strings_can_be_null(self):
         """
         Whether string / binary columns can have null values.
         """
-        return self.options.strings_can_be_null
+        return deref(self.options).strings_can_be_null
 
     @strings_can_be_null.setter
     def strings_can_be_null(self, value):
-        self.options.strings_can_be_null = value
+        deref(self.options).strings_can_be_null = value
+
+    @property
+    def quoted_strings_can_be_null(self):
+        """
+        Whether string / binary columns can have quoted null values.
+        """
+        return deref(self.options).quoted_strings_can_be_null
+
+    @quoted_strings_can_be_null.setter
+    def quoted_strings_can_be_null(self, value):
+        deref(self.options).quoted_strings_can_be_null = value
 
     @property
     def column_types(self):
@@ -488,7 +557,7 @@ cdef class ConvertOptions(_Weakrefable):
         Explicitly map column names to column types.
         """
         d = {frombytes(item.first): pyarrow_wrap_data_type(item.second)
-             for item in self.options.column_types}
+             for item in deref(self.options).column_types}
         return d
 
     @column_types.setter
@@ -499,7 +568,7 @@ cdef class ConvertOptions(_Weakrefable):
         if isinstance(value, Mapping):
             value = value.items()
 
-        self.options.column_types.clear()
+        deref(self.options).column_types.clear()
         for item in value:
             if isinstance(item, Field):
                 k = item.name
@@ -508,51 +577,63 @@ cdef class ConvertOptions(_Weakrefable):
                 k, v = item
             typ = pyarrow_unwrap_data_type(ensure_type(v))
             assert typ != NULL
-            self.options.column_types[tobytes(k)] = typ
+            deref(self.options).column_types[tobytes(k)] = typ
 
     @property
     def null_values(self):
         """
         A sequence of strings that denote nulls in the data.
         """
-        return [frombytes(x) for x in self.options.null_values]
+        return [frombytes(x) for x in deref(self.options).null_values]
 
     @null_values.setter
     def null_values(self, value):
-        self.options.null_values = [tobytes(x) for x in value]
+        deref(self.options).null_values = [tobytes(x) for x in value]
 
     @property
     def true_values(self):
         """
         A sequence of strings that denote true booleans in the data.
         """
-        return [frombytes(x) for x in self.options.true_values]
+        return [frombytes(x) for x in deref(self.options).true_values]
 
     @true_values.setter
     def true_values(self, value):
-        self.options.true_values = [tobytes(x) for x in value]
+        deref(self.options).true_values = [tobytes(x) for x in value]
 
     @property
     def false_values(self):
         """
         A sequence of strings that denote false booleans in the data.
         """
-        return [frombytes(x) for x in self.options.false_values]
+        return [frombytes(x) for x in deref(self.options).false_values]
 
     @false_values.setter
     def false_values(self, value):
-        self.options.false_values = [tobytes(x) for x in value]
+        deref(self.options).false_values = [tobytes(x) for x in value]
+
+    @property
+    def decimal_point(self):
+        """
+        The character used as decimal point in floating-point and decimal
+        data.
+        """
+        return chr(deref(self.options).decimal_point)
+
+    @decimal_point.setter
+    def decimal_point(self, value):
+        deref(self.options).decimal_point = _single_char(value)
 
     @property
     def auto_dict_encode(self):
         """
         Whether to try to automatically dict-encode string / binary data.
         """
-        return self.options.auto_dict_encode
+        return deref(self.options).auto_dict_encode
 
     @auto_dict_encode.setter
     def auto_dict_encode(self, value):
-        self.options.auto_dict_encode = value
+        deref(self.options).auto_dict_encode = value
 
     @property
     def auto_dict_max_cardinality(self):
@@ -561,11 +642,11 @@ cdef class ConvertOptions(_Weakrefable):
 
         This value is per chunk.
         """
-        return self.options.auto_dict_max_cardinality
+        return deref(self.options).auto_dict_max_cardinality
 
     @auto_dict_max_cardinality.setter
     def auto_dict_max_cardinality(self, value):
-        self.options.auto_dict_max_cardinality = value
+        deref(self.options).auto_dict_max_cardinality = value
 
     @property
     def include_columns(self):
@@ -575,13 +656,13 @@ cdef class ConvertOptions(_Weakrefable):
         If empty, the Table will include all columns from the CSV file.
         If not empty, only these columns will be included, in this order.
         """
-        return [frombytes(s) for s in self.options.include_columns]
+        return [frombytes(s) for s in deref(self.options).include_columns]
 
     @include_columns.setter
     def include_columns(self, value):
-        self.options.include_columns.clear()
+        deref(self.options).include_columns.clear()
         for item in value:
-            self.options.include_columns.push_back(tobytes(item))
+            deref(self.options).include_columns.push_back(tobytes(item))
 
     @property
     def include_missing_columns(self):
@@ -593,11 +674,11 @@ cdef class ConvertOptions(_Weakrefable):
         or null by default).
         This option is ignored if `include_columns` is empty.
         """
-        return self.options.include_missing_columns
+        return deref(self.options).include_missing_columns
 
     @include_missing_columns.setter
     def include_missing_columns(self, value):
-        self.options.include_missing_columns = value
+        deref(self.options).include_missing_columns = value
 
     @property
     def timestamp_parsers(self):
@@ -612,7 +693,7 @@ cdef class ConvertOptions(_Weakrefable):
             c_string kind
 
         parsers = []
-        for c_parser in self.options.timestamp_parsers:
+        for c_parser in deref(self.options).timestamp_parsers:
             kind = deref(c_parser).kind()
             if kind == b'strptime':
                 parsers.append(frombytes(deref(c_parser).format()))
@@ -635,14 +716,17 @@ cdef class ConvertOptions(_Weakrefable):
             else:
                 raise TypeError("Expected list of str or ISO8601 objects")
 
-        self.options.timestamp_parsers = move(c_parsers)
+        deref(self.options).timestamp_parsers = move(c_parsers)
 
     @staticmethod
     cdef ConvertOptions wrap(CCSVConvertOptions options):
         out = ConvertOptions()
-        out.options = options
+        out.options.reset(new CCSVConvertOptions(move(options)))
         return out
 
+    def validate(self):
+        check_status(deref(self.options).Validate())
+
     def equals(self, ConvertOptions other):
         return (
             self.check_utf8 == other.check_utf8 and
@@ -650,8 +734,11 @@ cdef class ConvertOptions(_Weakrefable):
             self.null_values == other.null_values and
             self.true_values == other.true_values and
             self.false_values == other.false_values and
+            self.decimal_point == other.decimal_point and
             self.timestamp_parsers == other.timestamp_parsers and
             self.strings_can_be_null == other.strings_can_be_null and
+            self.quoted_strings_can_be_null ==
+            other.quoted_strings_can_be_null and
             self.auto_dict_encode == other.auto_dict_encode and
             self.auto_dict_max_cardinality ==
             other.auto_dict_max_cardinality and
@@ -661,15 +748,17 @@ cdef class ConvertOptions(_Weakrefable):
 
     def __getstate__(self):
         return (self.check_utf8, self.column_types, self.null_values,
-                self.true_values, self.false_values, self.timestamp_parsers,
-                self.strings_can_be_null, self.auto_dict_encode,
+                self.true_values, self.false_values, self.decimal_point,
+                self.timestamp_parsers, self.strings_can_be_null,
+                self.quoted_strings_can_be_null, self.auto_dict_encode,
                 self.auto_dict_max_cardinality, self.include_columns,
                 self.include_missing_columns)
 
     def __setstate__(self, state):
         (self.check_utf8, self.column_types, self.null_values,
-         self.true_values, self.false_values, self.timestamp_parsers,
-         self.strings_can_be_null, self.auto_dict_encode,
+         self.true_values, self.false_values, self.decimal_point,
+         self.timestamp_parsers, self.strings_can_be_null,
+         self.quoted_strings_can_be_null, self.auto_dict_encode,
          self.auto_dict_max_cardinality, self.include_columns,
          self.include_missing_columns) = state
 
@@ -694,14 +783,14 @@ cdef _get_read_options(ReadOptions read_options, CCSVReadOptions* out):
     if read_options is None:
         out[0] = CCSVReadOptions.Defaults()
     else:
-        out[0] = read_options.options
+        out[0] = deref(read_options.options)
 
 
 cdef _get_parse_options(ParseOptions parse_options, CCSVParseOptions* out):
     if parse_options is None:
         out[0] = CCSVParseOptions.Defaults()
     else:
-        out[0] = parse_options.options
+        out[0] = deref(parse_options.options)
 
 
 cdef _get_convert_options(ConvertOptions convert_options,
@@ -709,7 +798,7 @@ cdef _get_convert_options(ConvertOptions convert_options,
     if convert_options is None:
         out[0] = CCSVConvertOptions.Defaults()
     else:
-        out[0] = convert_options.options
+        out[0] = deref(convert_options.options)
 
 
 cdef class CSVStreamingReader(RecordBatchReader):
@@ -868,14 +957,12 @@ cdef class WriteOptions(_Weakrefable):
         How many rows to process together when converting and writing
         CSV data
     """
-    cdef:
-        CCSVWriteOptions options
 
     # Avoid mistakingly creating attributes
     __slots__ = ()
 
     def __init__(self, *, include_header=None, batch_size=None):
-        self.options = CCSVWriteOptions.Defaults()
+        self.options.reset(new CCSVWriteOptions(CCSVWriteOptions.Defaults()))
         if include_header is not None:
             self.include_header = include_header
         if batch_size is not None:
@@ -886,11 +973,11 @@ cdef class WriteOptions(_Weakrefable):
         """
         Whether to write an initial header line with column names.
         """
-        return self.options.include_header
+        return deref(self.options).include_header
 
     @include_header.setter
     def include_header(self, value):
-        self.options.include_header = value
+        deref(self.options).include_header = value
 
     @property
     def batch_size(self):
@@ -898,18 +985,27 @@ cdef class WriteOptions(_Weakrefable):
         How many rows to process together when converting and writing
         CSV data.
         """
-        return self.options.batch_size
+        return deref(self.options).batch_size
 
     @batch_size.setter
     def batch_size(self, value):
-        self.options.batch_size = value
+        deref(self.options).batch_size = value
+
+    @staticmethod
+    cdef WriteOptions wrap(CCSVWriteOptions options):
+        out = WriteOptions()
+        out.options.reset(new CCSVWriteOptions(move(options)))
+        return out
+
+    def validate(self):
+        check_status(self.options.get().Validate())
 
 
 cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
     if write_options is None:
         out[0] = CCSVWriteOptions.Defaults()
     else:
-        out[0] = write_options.options
+        out[0] = deref(write_options.options)
 
 
 def write_csv(data, output_file, write_options=None,
@@ -921,7 +1017,7 @@ def write_csv(data, output_file, write_options=None,
     ----------
     data: pyarrow.RecordBatch or pyarrow.Table
         The data to write.
-    output_file: string, path, pyarrow.OutputStream or file-like object
+    output_file: string, path, pyarrow.NativeFile, or file-like object
         The location where to write the CSV data.
     write_options: pyarrow.csv.WriteOptions
         Options to configure writing the CSV data.
@@ -938,15 +1034,44 @@ def write_csv(data, output_file, write_options=None,
 
     get_writer(output_file, &stream)
     c_memory_pool = maybe_unbox_memory_pool(memory_pool)
+    c_write_options.io_context = CIOContext(c_memory_pool)
     if isinstance(data, RecordBatch):
         batch = pyarrow_unwrap_batch(data).get()
         with nogil:
-            check_status(WriteCSV(deref(batch), c_write_options, c_memory_pool,
-                                  stream.get()))
+            check_status(WriteCSV(deref(batch), c_write_options, stream.get()))
     elif isinstance(data, Table):
         table = pyarrow_unwrap_table(data).get()
         with nogil:
-            check_status(WriteCSV(deref(table), c_write_options, c_memory_pool,
-                                  stream.get()))
+            check_status(WriteCSV(deref(table), c_write_options, stream.get()))
     else:
         raise TypeError(f"Expected Table or RecordBatch, got '{type(data)}'")
+
+
+cdef class CSVWriter(_CRecordBatchWriter):
+    """Writer to create a CSV file.
+
+    Parameters
+    ----------
+    sink: string, path, pyarrow.OutputStream or file-like object
+        The location where to write the CSV data.
+    schema: pyarrow.Schema
+        The schema of the data to be written.
+    write_options: pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool: MemoryPool, optional
+        Pool for temporary allocations.
+    """
+
+    def __init__(self, sink, Schema schema, *,
+                 WriteOptions write_options=None, MemoryPool memory_pool=None):
+        cdef:
+            shared_ptr[COutputStream] c_stream
+            shared_ptr[CSchema] c_schema = pyarrow_unwrap_schema(schema)
+            CCSVWriteOptions c_write_options
+            CMemoryPool* c_memory_pool = maybe_unbox_memory_pool(memory_pool)
+        _get_write_options(write_options, &c_write_options)
+        c_write_options.io_context = CIOContext(c_memory_pool)
+        get_writer(sink, &c_stream)
+        with nogil:
+            self.writer = GetResultValue(MakeCSVWriter(
+                c_stream, c_schema, c_write_options))
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 3320b472e1b..945475bd7f1 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -22,6 +22,7 @@
 from cpython.object cimport Py_LT, Py_EQ, Py_GT, Py_LE, Py_NE, Py_GE
 from cython.operator cimport dereference as deref
 
+import collections
 import os
 import warnings
 
@@ -30,7 +31,8 @@ from pyarrow.lib cimport *
 from pyarrow.lib import ArrowTypeError, frombytes, tobytes
 from pyarrow.includes.libarrow_dataset cimport *
 from pyarrow._fs cimport FileSystem, FileInfo, FileSelector
-from pyarrow._csv cimport ConvertOptions, ParseOptions, ReadOptions
+from pyarrow._csv cimport (
+    ConvertOptions, ParseOptions, ReadOptions, WriteOptions)
 from pyarrow.util import _is_iterable, _is_path_like, _stringify_path
 
 from pyarrow._parquet cimport (
@@ -84,6 +86,14 @@ cdef CFileSource _make_file_source(object file, FileSystem filesystem=None):
     return c_source
 
 
+cdef CSegmentEncoding _get_segment_encoding(str segment_encoding):
+    if segment_encoding == "none":
+        return CSegmentEncodingNone
+    elif segment_encoding == "uri":
+        return CSegmentEncodingUri
+    raise ValueError(f"Unknown segment encoding: {segment_encoding}")
+
+
 cdef class Expression(_Weakrefable):
     """
     A logical expression to be evaluated against some input.
@@ -297,6 +307,7 @@ cdef class Dataset(_Weakrefable):
         classes = {
             'union': UnionDataset,
             'filesystem': FileSystemDataset,
+            'in-memory': InMemoryDataset,
         }
 
         class_ = classes.get(type_name, None)
@@ -358,16 +369,12 @@ cdef class Dataset(_Weakrefable):
         for maybe_fragment in c_fragments:
             yield Fragment.wrap(GetResultValue(move(maybe_fragment)))
 
-    def _scanner(self, **kwargs):
-        return Scanner.from_dataset(self, **kwargs)
-
-    def scan(self, **kwargs):
+    def scanner(self, **kwargs):
         """Builds a scan operation against the dataset.
 
-        It produces a stream of ScanTasks which is meant to be a unit of work
-        to be dispatched. The tasks are not executed automatically, the user is
-        responsible to execute and dispatch the individual tasks, so custom
-        local task scheduling can be implemented.
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
 
         Parameters
         ----------
@@ -395,6 +402,11 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        use_async : bool, default False
+            If enabled, an async scanner will be used that should offer
+            better performance with high-latency/highly-parallel filesystems
+            (e.g. S3)
+
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -404,7 +416,7 @@ cdef class Dataset(_Weakrefable):
 
         Returns
         -------
-        scan_tasks : iterator of ScanTask
+        scanner : Scanner
 
         Examples
         --------
@@ -413,31 +425,30 @@ cdef class Dataset(_Weakrefable):
 
         Selecting a subset of the columns:
 
-        >>> dataset.scan(columns=["A", "B"])
+        >>> dataset.scanner(columns=["A", "B"]).to_table()
 
         Projecting selected columns using an expression:
 
-        >>> dataset.scan(columns={"A_int": ds.field("A").cast("int64")})
+        >>> dataset.scanner(columns={
+        ...     "A_int": ds.field("A").cast("int64"),
+        ... }).to_table()
 
         Filtering rows while scanning:
 
-        >>> dataset.scan(filter=ds.field("A") > 0)
+        >>> dataset.scanner(filter=ds.field("A") > 0).to_table()
         """
-        return self._scanner(**kwargs).scan()
+        return Scanner.from_dataset(self, **kwargs)
 
     def to_batches(self, **kwargs):
         """Read the dataset as materialized record batches.
 
-        Builds a scan operation against the dataset and sequentially executes
-        the ScanTasks as the returned generator gets consumed.
-
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
         record_batches : iterator of RecordBatch
         """
-        return self._scanner(**kwargs).to_batches()
+        return self.scanner(**kwargs).to_batches()
 
     def to_table(self, **kwargs):
         """Read the dataset to an arrow table.
@@ -445,13 +456,46 @@ cdef class Dataset(_Weakrefable):
         Note that this method reads all the selected data from the dataset
         into memory.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
         table : Table instance
         """
-        return self._scanner(**kwargs).to_table()
+        return self.scanner(**kwargs).to_table()
+
+    def take(self, object indices, **kwargs):
+        """Select rows of data by index.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self.scanner(**kwargs).take(indices)
+
+    def head(self, int num_rows, **kwargs):
+        """Load the first N rows of the dataset.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self.scanner(**kwargs).head(num_rows)
+
+    def count_rows(self, **kwargs):
+        """Count rows matching the scanner filter.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        count : int
+        """
+        return self.scanner(**kwargs).count_rows()
 
     @property
     def schema(self):
@@ -506,19 +550,10 @@ cdef class InMemoryDataset(Dataset):
             table = pa.Table.from_batches(batches, schema=schema)
             in_memory_dataset = make_shared[CInMemoryDataset](
                 pyarrow_unwrap_table(table))
-        elif isinstance(source, pa.ipc.RecordBatchReader):
-            reader = source
-            in_memory_dataset = make_shared[CInMemoryDataset](reader.reader)
-        elif _is_iterable(source):
-            if schema is None:
-                raise ValueError('Must provide schema to construct in-memory '
-                                 'dataset from an iterable')
-            reader = pa.ipc.RecordBatchReader.from_batches(schema, source)
-            in_memory_dataset = make_shared[CInMemoryDataset](reader.reader)
         else:
             raise TypeError(
-                'Expected a table, batch, iterable of tables/batches, or a '
-                'record batch reader instead of the given type: ' +
+                'Expected a table, batch, or list of tables/batches '
+                'instead of the given type: ' +
                 type(source).__name__
             )
 
@@ -634,6 +669,25 @@ cdef class FileSystemDataset(Dataset):
     def filesystem(self):
         return FileSystem.wrap(self.filesystem_dataset.filesystem())
 
+    @property
+    def partitioning(self):
+        """
+        The partitioning of the Dataset source, if discovered.
+
+        If the FileSystemDataset is created using the ``dataset()`` factory
+        function with a partitioning specified, this will return the
+        finalized Partitioning object from the dataset discovery. In all
+        other cases, this returns None.
+        """
+        c_partitioning = self.filesystem_dataset.partitioning()
+        if c_partitioning.get() == nullptr:
+            return None
+        try:
+            return Partitioning.wrap(c_partitioning)
+        except TypeError:
+            # e.g. type_name "default"
+            return None
+
     cdef void init(self, const shared_ptr[CDataset]& sp):
         Dataset.init(self, sp)
         self.filesystem_dataset = <CFileSystemDataset*> sp.get()
@@ -727,20 +781,21 @@ cdef class FileWriteOptions(_Weakrefable):
 
     cdef:
         shared_ptr[CFileWriteOptions] wrapped
-        CFileWriteOptions* options
+        CFileWriteOptions* c_options
 
     def __init__(self):
         _forbid_instantiation(self.__class__)
 
     cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
         self.wrapped = sp
-        self.options = sp.get()
+        self.c_options = sp.get()
 
     @staticmethod
     cdef wrap(const shared_ptr[CFileWriteOptions]& sp):
         type_name = frombytes(sp.get().type_name())
 
         classes = {
+            'csv': CsvFileWriteOptions,
             'ipc': IpcFileWriteOptions,
             'parquet': ParquetFileWriteOptions,
         }
@@ -755,7 +810,7 @@ cdef class FileWriteOptions(_Weakrefable):
 
     @property
     def format(self):
-        return FileFormat.wrap(self.options.format())
+        return FileFormat.wrap(self.c_options.format())
 
     cdef inline shared_ptr[CFileWriteOptions] unwrap(self):
         return self.wrapped
@@ -891,10 +946,10 @@ cdef class Fragment(_Weakrefable):
         """Return the physical schema of this Fragment. This schema can be
         different from the dataset read schema."""
         cdef:
-            shared_ptr[CSchema] c_schema
-
-        c_schema = GetResultValue(self.fragment.ReadPhysicalSchema())
-        return pyarrow_wrap_schema(c_schema)
+            CResult[shared_ptr[CSchema]] maybe_schema
+        with nogil:
+            maybe_schema = self.fragment.ReadPhysicalSchema()
+        return pyarrow_wrap_schema(GetResultValue(maybe_schema))
 
     @property
     def partition_expression(self):
@@ -903,16 +958,12 @@ cdef class Fragment(_Weakrefable):
         """
         return Expression.wrap(self.fragment.partition_expression())
 
-    def _scanner(self, **kwargs):
-        return Scanner.from_fragment(self, **kwargs)
-
-    def scan(self, Schema schema=None, **kwargs):
+    def scanner(self, Schema schema=None, **kwargs):
         """Builds a scan operation against the dataset.
 
-        It produces a stream of ScanTasks which is meant to be a unit of work
-        to be dispatched. The tasks are not executed automatically, the user is
-        responsible to execute and dispatch the individual tasks, so custom
-        local task scheduling can be implemented.
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
 
         Parameters
         ----------
@@ -953,20 +1004,21 @@ cdef class Fragment(_Weakrefable):
 
         Returns
         -------
-        scan_tasks : iterator of ScanTask
+        scanner : Scanner
+
         """
-        return self._scanner(schema=schema, **kwargs).scan()
+        return Scanner.from_fragment(self, schema=schema, **kwargs)
 
     def to_batches(self, Schema schema=None, **kwargs):
         """Read the fragment as materialized record batches.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
         record_batches : iterator of RecordBatch
         """
-        return self._scanner(schema=schema, **kwargs).to_batches()
+        return self.scanner(schema=schema, **kwargs).to_batches()
 
     def to_table(self, Schema schema=None, **kwargs):
         """Convert this Fragment into a Table.
@@ -974,13 +1026,46 @@ cdef class Fragment(_Weakrefable):
         Use this convenience utility with care. This will serially materialize
         the Scan result in memory before creating the Table.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
         table : Table
         """
-        return self._scanner(schema=schema, **kwargs).to_table()
+        return self.scanner(schema=schema, **kwargs).to_table()
+
+    def take(self, object indices, **kwargs):
+        """Select rows of data by index.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self.scanner(**kwargs).take(indices)
+
+    def head(self, int num_rows, **kwargs):
+        """Load the first N rows of the fragment.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self.scanner(**kwargs).head(num_rows)
+
+    def count_rows(self, **kwargs):
+        """Count rows matching the scanner filter.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        count : int
+        """
+        return self.scanner(**kwargs).count_rows()
 
 
 cdef class FileFragment(Fragment):
@@ -993,6 +1078,23 @@ cdef class FileFragment(Fragment):
         Fragment.init(self, sp)
         self.file_fragment = <CFileFragment*> sp.get()
 
+    def __repr__(self):
+        type_name = frombytes(self.fragment.type_name())
+        if type_name != "parquet":
+            typ = f" type={type_name}"
+        else:
+            # parquet has a subclass -> type embedded in class name
+            typ = ""
+        partition_dict = _get_partition_keys(self.partition_expression)
+        partition = ", ".join(
+            [f"{key}={val}" for key, val in partition_dict.items()]
+        )
+        if partition:
+            partition = f" partition=[{partition}]"
+        return "<pyarrow.dataset.{0}{1} path={2}{3}>".format(
+            self.__class__.__name__, typ, self.path, partition
+        )
+
     def __reduce__(self):
         buffer = self.buffer
         return self.format.make_fragment, (
@@ -1295,17 +1397,38 @@ cdef class ParquetReadOptions(_Weakrefable):
     dictionary_columns : list of string, default None
         Names of columns which should be dictionary encoded as
         they are read.
+    coerce_int96_timestamp_unit : str, default None.
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be infered as timestamps
+        in nanoseconds.
     """
 
     cdef public:
         set dictionary_columns
+        TimeUnit _coerce_int96_timestamp_unit
 
     # Also see _PARQUET_READ_OPTIONS
-    def __init__(self, dictionary_columns=None):
+    def __init__(self, dictionary_columns=None,
+                 coerce_int96_timestamp_unit=None):
         self.dictionary_columns = set(dictionary_columns or set())
+        self.coerce_int96_timestamp_unit = coerce_int96_timestamp_unit
+
+    @property
+    def coerce_int96_timestamp_unit(self):
+        return timeunit_to_string(self._coerce_int96_timestamp_unit)
+
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit):
+        if unit is not None:
+            self._coerce_int96_timestamp_unit = string_to_timeunit(unit)
+        else:
+            self._coerce_int96_timestamp_unit = TimeUnit_NANO
 
     def equals(self, ParquetReadOptions other):
-        return self.dictionary_columns == other.dictionary_columns
+        return (self.dictionary_columns == other.dictionary_columns and
+                self.coerce_int96_timestamp_unit ==
+                other.coerce_int96_timestamp_unit)
 
     def __eq__(self, other):
         try:
@@ -1314,8 +1437,11 @@ cdef class ParquetReadOptions(_Weakrefable):
             return False
 
     def __repr__(self):
-        return (f"<ParquetReadOptions"
-                f" dictionary_columns={self.dictionary_columns}>")
+        return (
+            f"<ParquetReadOptions"
+            f" dictionary_columns={self.dictionary_columns}"
+            f" coerce_int96_timestamp_unit={self.coerce_int96_timestamp_unit}>"
+        )
 
 
 cdef class ParquetFileWriteOptions(FileWriteOptions):
@@ -1398,7 +1524,9 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
         self._set_arrow_properties()
 
 
-cdef set _PARQUET_READ_OPTIONS = {'dictionary_columns'}
+cdef set _PARQUET_READ_OPTIONS = {
+    'dictionary_columns', 'coerce_int96_timestamp_unit'
+}
 
 
 cdef class ParquetFileFormat(FileFormat):
@@ -1463,6 +1591,8 @@ cdef class ParquetFileFormat(FileFormat):
         if read_options.dictionary_columns is not None:
             for column in read_options.dictionary_columns:
                 options.dict_columns.insert(tobytes(column))
+        options.coerce_int96_timestamp_unit = \
+            read_options._coerce_int96_timestamp_unit
 
         self.init(<shared_ptr[CFileFormat]> wrapped)
         self.default_fragment_scan_options = default_fragment_scan_options
@@ -1475,10 +1605,15 @@ cdef class ParquetFileFormat(FileFormat):
     def read_options(self):
         cdef CParquetFileFormatReaderOptions* options
         options = &self.parquet_format.reader_options
-        return ParquetReadOptions(
+        parquet_read_options = ParquetReadOptions(
             dictionary_columns={frombytes(col)
                                 for col in options.dict_columns},
         )
+        # Read options getter/setter works with strings so setting
+        # the private property which uses the C Type
+        parquet_read_options._coerce_int96_timestamp_unit = \
+            options.coerce_int96_timestamp_unit
+        return parquet_read_options
 
     def make_write_options(self, **kwargs):
         opts = FileFormat.make_write_options(self)
@@ -1623,12 +1758,14 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
             self.buffer_size == other.buffer_size and
             self.pre_buffer == other.pre_buffer and
             self.enable_parallel_column_conversion ==
-            other.enable_parallel_column_conversion)
+            other.enable_parallel_column_conversion
+        )
 
     def __reduce__(self):
         return ParquetFragmentScanOptions, (
             self.use_buffered_stream, self.buffer_size, self.pre_buffer,
-            self.enable_parallel_column_conversion)
+            self.enable_parallel_column_conversion
+        )
 
 
 cdef class IpcFileWriteOptions(FileWriteOptions):
@@ -1688,8 +1825,11 @@ cdef class CsvFileFormat(FileFormat):
         FileFormat.init(self, sp)
         self.csv_format = <CCsvFileFormat*> sp.get()
 
-    def make_write_options(self):
-        raise NotImplemented("writing CSV datasets")
+    def make_write_options(self, **kwargs):
+        cdef CsvFileWriteOptions opts = \
+            <CsvFileWriteOptions> FileFormat.make_write_options(self)
+        opts.write_options = WriteOptions(**kwargs)
+        return opts
 
     @property
     def parse_options(self):
@@ -1697,7 +1837,7 @@ cdef class CsvFileFormat(FileFormat):
 
     @parse_options.setter
     def parse_options(self, ParseOptions parse_options not None):
-        self.csv_format.parse_options = parse_options.options
+        self.csv_format.parse_options = deref(parse_options.options)
 
     cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
         if options.type_name == 'csv':
@@ -1747,7 +1887,7 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
 
     @convert_options.setter
     def convert_options(self, ConvertOptions convert_options not None):
-        self.csv_options.convert_options = convert_options.options
+        self.csv_options.convert_options = deref(convert_options.options)
 
     @property
     def read_options(self):
@@ -1755,7 +1895,7 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
 
     @read_options.setter
     def read_options(self, ReadOptions read_options not None):
-        self.csv_options.read_options = read_options.options
+        self.csv_options.read_options = deref(read_options.options)
 
     def equals(self, CsvFragmentScanOptions other):
         return (
@@ -1768,6 +1908,28 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
                                         self.read_options)
 
 
+cdef class CsvFileWriteOptions(FileWriteOptions):
+    cdef:
+        CCsvFileWriteOptions* csv_options
+        object _properties
+
+    def __init__(self):
+        _forbid_instantiation(self.__class__)
+
+    @property
+    def write_options(self):
+        return WriteOptions.wrap(deref(self.csv_options.write_options))
+
+    @write_options.setter
+    def write_options(self, WriteOptions write_options not None):
+        self.csv_options.write_options.reset(
+            new CCSVWriteOptions(deref(write_options.options)))
+
+    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
+        FileWriteOptions.init(self, sp)
+        self.csv_options = <CCsvFileWriteOptions*> sp.get()
+
+
 cdef class Partitioning(_Weakrefable):
 
     cdef:
@@ -1874,6 +2036,9 @@ cdef class DirectoryPartitioning(Partitioning):
         corresponding entry of `dictionaries` must be an array containing
         every value which may be taken by the corresponding column or an
         error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
 
     Returns
     -------
@@ -1891,13 +2056,17 @@ cdef class DirectoryPartitioning(Partitioning):
     cdef:
         CDirectoryPartitioning* directory_partitioning
 
-    def __init__(self, Schema schema not None, dictionaries=None):
+    def __init__(self, Schema schema not None, dictionaries=None,
+                 segment_encoding="uri"):
         cdef:
             shared_ptr[CDirectoryPartitioning] c_partitioning
+            CKeyValuePartitioningOptions c_options
 
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
         c_partitioning = make_shared[CDirectoryPartitioning](
             pyarrow_unwrap_schema(schema),
-            _partitioning_dictionaries(schema, dictionaries)
+            _partitioning_dictionaries(schema, dictionaries),
+            c_options,
         )
         self.init(<shared_ptr[CPartitioning]> c_partitioning)
 
@@ -1908,7 +2077,7 @@ cdef class DirectoryPartitioning(Partitioning):
     @staticmethod
     def discover(field_names=None, infer_dictionary=False,
                  max_partition_dictionary_size=0,
-                 schema=None):
+                 schema=None, segment_encoding="uri"):
         """
         Discover a DirectoryPartitioning.
 
@@ -1931,6 +2100,9 @@ cdef class DirectoryPartitioning(Partitioning):
             Use this schema instead of inferring a schema from partition
             values. Partition values will be validated against this schema
             before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
 
         Returns
         -------
@@ -1944,8 +2116,8 @@ cdef class DirectoryPartitioning(Partitioning):
         if max_partition_dictionary_size in {-1, None}:
             infer_dictionary = True
         elif max_partition_dictionary_size != 0:
-            raise NotImplemented("max_partition_dictionary_size must be "
-                                 "0, -1, or None")
+            raise NotImplementedError("max_partition_dictionary_size must be "
+                                      "0, -1, or None")
 
         if infer_dictionary:
             c_options.infer_dictionary = True
@@ -1959,9 +2131,33 @@ cdef class DirectoryPartitioning(Partitioning):
                 "cannot infer field_names")
         else:
             c_field_names = [tobytes(s) for s in field_names]
+
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
+
         return PartitioningFactory.wrap(
             CDirectoryPartitioning.MakeFactory(c_field_names, c_options))
 
+    @property
+    def dictionaries(self):
+        """
+        The unique values for each partition field, if available.
+
+        Those values are only available if the Partitioning object was
+        created through dataset discovery from a PartitioningFactory, or
+        if the dictionaries were manually specified in the constructor.
+        If not available, this returns None.
+        """
+        cdef vector[shared_ptr[CArray]] c_arrays
+        c_arrays = self.directory_partitioning.dictionaries()
+        res = []
+        for arr in c_arrays:
+            if arr.get() == nullptr:
+                # Partitioning object has not been created through
+                # inspected Factory
+                return None
+            res.append(pyarrow_wrap_array(arr))
+        return res
+
 
 cdef class HivePartitioning(Partitioning):
     """
@@ -1988,6 +2184,9 @@ cdef class HivePartitioning(Partitioning):
         error will be raised in parsing.
     null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
         If any field is None then this fallback will be used as a label
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
 
     Returns
     -------
@@ -2009,16 +2208,20 @@ cdef class HivePartitioning(Partitioning):
     def __init__(self,
                  Schema schema not None,
                  dictionaries=None,
-                 null_fallback="__HIVE_DEFAULT_PARTITION__"):
+                 null_fallback="__HIVE_DEFAULT_PARTITION__",
+                 segment_encoding="uri"):
 
         cdef:
             shared_ptr[CHivePartitioning] c_partitioning
-            c_string c_null_fallback = tobytes(null_fallback)
+            CHivePartitioningOptions c_options
+
+        c_options.null_fallback = tobytes(null_fallback)
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
 
         c_partitioning = make_shared[CHivePartitioning](
             pyarrow_unwrap_schema(schema),
             _partitioning_dictionaries(schema, dictionaries),
-            c_null_fallback
+            c_options,
         )
         self.init(<shared_ptr[CPartitioning]> c_partitioning)
 
@@ -2030,7 +2233,8 @@ cdef class HivePartitioning(Partitioning):
     def discover(infer_dictionary=False,
                  max_partition_dictionary_size=0,
                  null_fallback="__HIVE_DEFAULT_PARTITION__",
-                 schema=None):
+                 schema=None,
+                 segment_encoding="uri"):
         """
         Discover a HivePartitioning.
 
@@ -2054,6 +2258,9 @@ cdef class HivePartitioning(Partitioning):
             Use this schema instead of inferring a schema from partition
             values. Partition values will be validated against this schema
             before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
 
         Returns
         -------
@@ -2066,8 +2273,8 @@ cdef class HivePartitioning(Partitioning):
         if max_partition_dictionary_size in {-1, None}:
             infer_dictionary = True
         elif max_partition_dictionary_size != 0:
-            raise NotImplemented("max_partition_dictionary_size must be "
-                                 "0, -1, or None")
+            raise NotImplementedError("max_partition_dictionary_size must be "
+                                      "0, -1, or None")
 
         if infer_dictionary:
             c_options.infer_dictionary = True
@@ -2077,9 +2284,32 @@ cdef class HivePartitioning(Partitioning):
         if schema:
             c_options.schema = pyarrow_unwrap_schema(schema)
 
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
+
         return PartitioningFactory.wrap(
             CHivePartitioning.MakeFactory(c_options))
 
+    @property
+    def dictionaries(self):
+        """
+        The unique values for each partition field, if available.
+
+        Those values are only available if the Partitioning object was
+        created through dataset discovery from a PartitioningFactory, or
+        if the dictionaries were manually specified in the constructor.
+        If not available, this returns None.
+        """
+        cdef vector[shared_ptr[CArray]] c_arrays
+        c_arrays = self.hive_partitioning.dictionaries()
+        res = []
+        for arr in c_arrays:
+            if arr.get() == nullptr:
+                # Partitioning object has not been created through
+                # inspected Factory
+                return None
+            res.append(pyarrow_wrap_array(arr))
+        return res
+
 
 cdef class DatasetFactory(_Weakrefable):
     """
@@ -2519,82 +2749,73 @@ cdef class ParquetDatasetFactory(DatasetFactory):
         self.parquet_factory = <CParquetDatasetFactory*> sp.get()
 
 
-cdef class ScanTask(_Weakrefable):
-    """Read record batches from a range of a single data fragment.
-
-    A ScanTask is meant to be a unit of work to be dispatched.
-    """
-
+cdef class RecordBatchIterator(_Weakrefable):
+    """An iterator over a sequence of record batches."""
     cdef:
-        shared_ptr[CScanTask] wrapped
-        CScanTask* task
+        # An object that must be kept alive with the iterator.
+        object iterator_owner
+        # Iterator is a non-POD type and Cython uses offsetof, leading
+        # to a compiler warning unless wrapped like so
+        shared_ptr[CRecordBatchIterator] iterator
 
     def __init__(self):
         _forbid_instantiation(self.__class__, subclasses_instead=False)
 
-    cdef init(self, shared_ptr[CScanTask]& sp):
-        self.wrapped = sp
-        self.task = self.wrapped.get()
-
     @staticmethod
-    cdef wrap(shared_ptr[CScanTask]& sp):
-        cdef ScanTask self = ScanTask.__new__(ScanTask)
-        self.init(sp)
+    cdef wrap(object owner, CRecordBatchIterator iterator):
+        cdef RecordBatchIterator self = \
+            RecordBatchIterator.__new__(RecordBatchIterator)
+        self.iterator_owner = owner
+        self.iterator = make_shared[CRecordBatchIterator](move(iterator))
         return self
 
-    cdef inline shared_ptr[CScanTask] unwrap(self) nogil:
-        return self.wrapped
+    def __iter__(self):
+        return self
 
-    def execute(self):
-        """Iterate through sequence of materialized record batches.
+    def __next__(self):
+        cdef shared_ptr[CRecordBatch] record_batch
+        with nogil:
+            record_batch = GetResultValue(move(self.iterator.get().Next()))
+        if record_batch == NULL:
+            raise StopIteration
+        return pyarrow_wrap_batch(record_batch)
 
-        Execution semantics are encapsulated in the particular ScanTask
-        implementation.
 
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-        # Return an explicit iterator object instead of using a
-        # generator so that this method is eagerly evaluated (a
-        # generator would mean no work gets done until the first
-        # iteration). This also works around a bug in Cython's
-        # generator.
-        cdef CRecordBatchIterator iterator
-        with nogil:
-            iterator = move(GetResultValue(self.task.Execute()))
-        return RecordBatchIterator.wrap(self, move(iterator))
+class TaggedRecordBatch(collections.namedtuple(
+        "TaggedRecordBatch", ["record_batch", "fragment"])):
+    """A combination of a record batch and the fragment it came from."""
 
 
-cdef class RecordBatchIterator(_Weakrefable):
-    """An iterator over a sequence of record batches."""
+cdef class TaggedRecordBatchIterator(_Weakrefable):
+    """An iterator over a sequence of record batches with fragments."""
     cdef:
-        ScanTask task
-        # Iterator is a non-POD type and Cython uses offsetof, leading
-        # to a compiler warning unless wrapped like so
-        shared_ptr[CRecordBatchIterator] iterator
+        object iterator_owner
+        shared_ptr[CTaggedRecordBatchIterator] iterator
 
     def __init__(self):
         _forbid_instantiation(self.__class__, subclasses_instead=False)
 
     @staticmethod
-    cdef wrap(ScanTask task, CRecordBatchIterator iterator):
-        cdef RecordBatchIterator self = \
-            RecordBatchIterator.__new__(RecordBatchIterator)
-        self.task = task
-        self.iterator = make_shared[CRecordBatchIterator](move(iterator))
+    cdef wrap(object owner, CTaggedRecordBatchIterator iterator):
+        cdef TaggedRecordBatchIterator self = \
+            TaggedRecordBatchIterator.__new__(TaggedRecordBatchIterator)
+        self.iterator_owner = owner
+        self.iterator = make_shared[CTaggedRecordBatchIterator](
+            move(iterator))
         return self
 
     def __iter__(self):
         return self
 
     def __next__(self):
-        cdef shared_ptr[CRecordBatch] record_batch
+        cdef CTaggedRecordBatch batch
         with nogil:
-            record_batch = GetResultValue(move(self.iterator.get().Next()))
-        if record_batch == NULL:
+            batch = GetResultValue(move(self.iterator.get().Next()))
+        if batch.record_batch == NULL:
             raise StopIteration
-        return pyarrow_wrap_batch(record_batch)
+        return TaggedRecordBatch(
+            record_batch=pyarrow_wrap_batch(batch.record_batch),
+            fragment=Fragment.wrap(batch.fragment))
 
 
 _DEFAULT_BATCH_SIZE = 2**20
@@ -2603,7 +2824,7 @@ _DEFAULT_BATCH_SIZE = 2**20
 cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr,
                             object columns=None, Expression filter=None,
                             int batch_size=_DEFAULT_BATCH_SIZE,
-                            bint use_threads=True,
+                            bint use_threads=True, bint use_async=False,
                             MemoryPool memory_pool=None,
                             FragmentScanOptions fragment_scan_options=None)\
         except *:
@@ -2639,6 +2860,7 @@ cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr,
 
     check_status(builder.BatchSize(batch_size))
     check_status(builder.UseThreads(use_threads))
+    check_status(builder.UseAsync(use_async))
     if memory_pool:
         check_status(builder.Pool(maybe_unbox_memory_pool(memory_pool)))
     if fragment_scan_options:
@@ -2679,6 +2901,10 @@ cdef class Scanner(_Weakrefable):
     use_threads : bool, default True
         If enabled, then maximum parallelism will be used determined by
         the number of available CPU cores.
+    use_async : bool, default False
+        If enabled, an async scanner will be used that should offer
+        better performance with high-latency/highly-parallel filesystems
+        (e.g. S3)
     memory_pool : MemoryPool, default None
         For memory allocations, if required. If not specified, uses the
         default pool.
@@ -2706,7 +2932,8 @@ cdef class Scanner(_Weakrefable):
 
     @staticmethod
     def from_dataset(Dataset dataset not None,
-                     bint use_threads=True, MemoryPool memory_pool=None,
+                     bint use_threads=True, bint use_async=False,
+                     MemoryPool memory_pool=None,
                      object columns=None, Expression filter=None,
                      int batch_size=_DEFAULT_BATCH_SIZE,
                      FragmentScanOptions fragment_scan_options=None):
@@ -2718,7 +2945,7 @@ cdef class Scanner(_Weakrefable):
         builder = make_shared[CScannerBuilder](dataset.unwrap(), options)
         _populate_builder(builder, columns=columns, filter=filter,
                           batch_size=batch_size, use_threads=use_threads,
-                          memory_pool=memory_pool,
+                          use_async=use_async, memory_pool=memory_pool,
                           fragment_scan_options=fragment_scan_options)
 
         scanner = GetResultValue(builder.get().Finish())
@@ -2726,7 +2953,8 @@ cdef class Scanner(_Weakrefable):
 
     @staticmethod
     def from_fragment(Fragment fragment not None, Schema schema=None,
-                      bint use_threads=True, MemoryPool memory_pool=None,
+                      bint use_threads=True, bint use_async=False,
+                      MemoryPool memory_pool=None,
                       object columns=None, Expression filter=None,
                       int batch_size=_DEFAULT_BATCH_SIZE,
                       FragmentScanOptions fragment_scan_options=None):
@@ -2741,38 +2969,93 @@ cdef class Scanner(_Weakrefable):
                                                fragment.unwrap(), options)
         _populate_builder(builder, columns=columns, filter=filter,
                           batch_size=batch_size, use_threads=use_threads,
-                          memory_pool=memory_pool,
+                          use_async=use_async, memory_pool=memory_pool,
                           fragment_scan_options=fragment_scan_options)
 
         scanner = GetResultValue(builder.get().Finish())
         return Scanner.wrap(scanner)
 
-    def scan(self):
-        """Returns a stream of ScanTasks
+    @staticmethod
+    def from_batches(source, Schema schema=None, bint use_threads=True,
+                     bint use_async=False,
+                     MemoryPool memory_pool=None, object columns=None,
+                     Expression filter=None,
+                     int batch_size=_DEFAULT_BATCH_SIZE,
+                     FragmentScanOptions fragment_scan_options=None):
+        """Create a Scanner from an iterator of batches.
 
-        The caller is responsible to dispatch/schedule said tasks. Tasks should
-        be safe to run in a concurrent fashion and outlive the iterator.
+        This creates a scanner which can be used only once. It is
+        intended to support writing a dataset (which takes a scanner)
+        from a source which can be read only once (e.g. a
+        RecordBatchReader or generator).
+        """
+        cdef:
+            shared_ptr[CScanOptions] options = make_shared[CScanOptions]()
+            shared_ptr[CScannerBuilder] builder
+            shared_ptr[CScanner] scanner
+            RecordBatchReader reader
+        if isinstance(source, pa.ipc.RecordBatchReader):
+            if schema:
+                raise ValueError('Cannot specify a schema when providing '
+                                 'a RecordBatchReader')
+            reader = source
+        elif _is_iterable(source):
+            if schema is None:
+                raise ValueError('Must provide schema to construct scanner '
+                                 'from an iterable')
+            reader = pa.ipc.RecordBatchReader.from_batches(schema, source)
+        else:
+            raise TypeError('Expected a RecordBatchReader or an iterable of '
+                            'batches instead of the given type: ' +
+                            type(source).__name__)
+        builder = CScannerBuilder.FromRecordBatchReader(reader.reader)
+        _populate_builder(builder, columns=columns, filter=filter,
+                          batch_size=batch_size, use_threads=use_threads,
+                          use_async=use_async, memory_pool=memory_pool,
+                          fragment_scan_options=fragment_scan_options)
+        scanner = GetResultValue(builder.get().Finish())
+        return Scanner.wrap(scanner)
 
-        Returns
-        -------
-        scan_tasks : iterator of ScanTask
+    @property
+    def dataset_schema(self):
+        """The schema with which batches will be read from fragments."""
+        return pyarrow_wrap_schema(
+            self.scanner.options().get().dataset_schema)
+
+    @property
+    def projected_schema(self):
+        """The materialized schema of the data, accounting for projections.
+
+        This is the schema of any data returned from the scanner.
         """
-        for maybe_task in GetResultValue(self.scanner.Scan()):
-            yield ScanTask.wrap(GetResultValue(move(maybe_task)))
+        return pyarrow_wrap_schema(
+            self.scanner.options().get().projected_schema)
 
     def to_batches(self):
         """Consume a Scanner in record batches.
 
-        Sequentially executes the ScanTasks as the returned generator gets
-        consumed.
-
         Returns
         -------
         record_batches : iterator of RecordBatch
         """
-        for task in self.scan():
-            for batch in task.execute():
-                yield batch
+        def _iterator(batch_iter):
+            for batch in batch_iter:
+                yield batch.record_batch
+        # Don't make ourselves a generator so errors are raised immediately
+        return _iterator(self.scan_batches())
+
+    def scan_batches(self):
+        """Consume a Scanner in record batches with corresponding fragments.
+
+        Returns
+        -------
+        record_batches : iterator of TaggedRecordBatch
+        """
+        cdef CTaggedRecordBatchIterator iterator
+        with nogil:
+            iterator = move(GetResultValue(self.scanner.ScanBatches()))
+        # Don't make ourselves a generator so errors are raised immediately
+        return TaggedRecordBatchIterator.wrap(self, move(iterator))
 
     def to_table(self):
         """Convert a Scanner into a Table.
@@ -2791,13 +3074,53 @@ cdef class Scanner(_Weakrefable):
 
         return pyarrow_wrap_table(GetResultValue(result))
 
-    def get_fragments(self):
-        """Returns an iterator over the fragments in this scan.
+    def take(self, object indices):
+        """Select rows of data by index.
+
+        Will only consume as many batches of the underlying dataset as
+        needed. Otherwise, this is equivalent to
+        ``to_table().take(indices)``.
+
+        Returns
+        -------
+        table : Table
         """
-        cdef CFragmentIterator c_fragments = move(GetResultValue(
-            self.scanner.GetFragments()))
-        for maybe_fragment in c_fragments:
-            yield Fragment.wrap(GetResultValue(move(maybe_fragment)))
+        cdef CResult[shared_ptr[CTable]] result
+        cdef shared_ptr[CArray] c_indices = pyarrow_unwrap_array(indices)
+        with nogil:
+            result = self.scanner.TakeRows(deref(c_indices))
+        return pyarrow_wrap_table(GetResultValue(result))
+
+    def head(self, int num_rows):
+        """Load the first N rows of the dataset.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        cdef CResult[shared_ptr[CTable]] result
+        with nogil:
+            result = self.scanner.Head(num_rows)
+        return pyarrow_wrap_table(GetResultValue(result))
+
+    def count_rows(self):
+        """Count rows matching the scanner filter.
+
+        Returns
+        -------
+        count : int
+        """
+        cdef CResult[int64_t] result
+        with nogil:
+            result = self.scanner.CountRows()
+        return GetResultValue(result)
+
+    def to_reader(self):
+        """Consume this scanner as a RecordBatchReader."""
+        cdef RecordBatchReader reader
+        reader = RecordBatchReader.__new__(RecordBatchReader)
+        reader.reader = GetResultValue(self.scanner.ToRecordBatchReader())
+        return reader
 
 
 def _get_partition_keys(Expression partition_expression):
@@ -2811,14 +3134,14 @@ def _get_partition_keys(Expression partition_expression):
 
     For example, an expression of
     <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
-    is converted to {'part': 'a', 'year': 2016}
+    is converted to {'part': 'A', 'year': 2016}
     """
     cdef:
         CExpression expr = partition_expression.unwrap()
         pair[CFieldRef, CDatum] ref_val
 
     out = {}
-    for ref_val in GetResultValue(CExtractKnownFieldValues(expr)):
+    for ref_val in GetResultValue(CExtractKnownFieldValues(expr)).map:
         assert ref_val.first.name() != nullptr
         assert ref_val.second.kind() == DatumType_SCALAR
         val = pyarrow_wrap_scalar(ref_val.second.scalar())
@@ -2826,16 +3149,62 @@ def _get_partition_keys(Expression partition_expression):
     return out
 
 
+ctypedef CParquetFileWriter* _CParquetFileWriterPtr
+
+cdef class WrittenFile(_Weakrefable):
+    """
+    Metadata information about files written as
+    part of a dataset write operation
+    """
+
+    """The full path to the created file"""
+    cdef public str path
+    """
+    If the file is a parquet file this will contain the parquet metadata.
+    This metadata will have the file path attribute set to the path of
+    the written file.
+    """
+    cdef public object metadata
+
+    def __init__(self, path, metadata):
+        self.path = path
+        self.metadata = metadata
+
+cdef void _filesystemdataset_write_visitor(
+        dict visit_args,
+        CFileWriter* file_writer):
+    cdef:
+        str path
+        str base_dir
+        WrittenFile written_file
+        FileMetaData parquet_metadata
+        CParquetFileWriter* parquet_file_writer
+
+    parquet_metadata = None
+    path = frombytes(deref(file_writer).destination().path)
+    if deref(deref(file_writer).format()).type_name() == b"parquet":
+        parquet_file_writer = dynamic_cast[_CParquetFileWriterPtr](file_writer)
+        with nogil:
+            metadata = deref(
+                deref(parquet_file_writer).parquet_writer()).metadata()
+        if metadata:
+            base_dir = frombytes(visit_args['base_dir'])
+            parquet_metadata = FileMetaData()
+            parquet_metadata.init(metadata)
+            parquet_metadata.set_file_path(os.path.relpath(path, base_dir))
+    written_file = WrittenFile(path, parquet_metadata)
+    visit_args['file_visitor'](written_file)
+
+
 def _filesystemdataset_write(
-    Dataset data not None,
+    Scanner data not None,
     object base_dir not None,
     str basename_template not None,
-    Schema schema not None,
     FileSystem filesystem not None,
     Partitioning partitioning not None,
     FileWriteOptions file_options not None,
-    bint use_threads,
     int max_partitions,
+    object file_visitor
 ):
     """
     CFileSystemDataset.Write wrapper
@@ -2844,6 +3213,7 @@ def _filesystemdataset_write(
         CFileSystemDatasetWriteOptions c_options
         shared_ptr[CScanner] c_scanner
         vector[shared_ptr[CRecordBatch]] c_batches
+        dict visit_args
 
     c_options.file_write_options = file_options.unwrap()
     c_options.filesystem = filesystem.unwrap()
@@ -2851,9 +3221,14 @@ def _filesystemdataset_write(
     c_options.partitioning = partitioning.unwrap()
     c_options.max_partitions = max_partitions
     c_options.basename_template = tobytes(basename_template)
-
-    scanner = data._scanner(use_threads=use_threads)
-
-    c_scanner = (<Scanner> scanner).unwrap()
+    if file_visitor is not None:
+        visit_args = {'base_dir': c_options.base_dir,
+                      'file_visitor': file_visitor}
+        # Need to use post_finish because parquet metadata is not available
+        # until after Finish has been called
+        c_options.writer_post_finish = BindFunction[cb_writer_finish_internal](
+            &_filesystemdataset_write_visitor, visit_args)
+
+    c_scanner = data.unwrap()
     with nogil:
         check_status(CFileSystemDataset.Write(c_options, c_scanner))
diff --git a/python/pyarrow/_feather.pyx b/python/pyarrow/_feather.pyx
new file mode 100644
index 00000000000..8df7935aaf3
--- /dev/null
+++ b/python/pyarrow/_feather.pyx
@@ -0,0 +1,113 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ---------------------------------------------------------------------
+# Implement Feather file format
+
+# cython: profile=False
+# distutils: language = c++
+# cython: language_level=3
+
+from cython.operator cimport dereference as deref
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_feather cimport *
+from pyarrow.lib cimport (check_status, Table, _Weakrefable,
+                          get_writer, get_reader, pyarrow_wrap_table)
+from pyarrow.lib import tobytes
+
+
+class FeatherError(Exception):
+    pass
+
+
+def write_feather(Table table, object dest, compression=None,
+                  compression_level=None, chunksize=None, version=2):
+    cdef shared_ptr[COutputStream] sink
+    get_writer(dest, &sink)
+
+    cdef CFeatherProperties properties
+    if version == 2:
+        properties.version = kFeatherV2Version
+    else:
+        properties.version = kFeatherV1Version
+
+    if compression == 'zstd':
+        properties.compression = CCompressionType_ZSTD
+    elif compression == 'lz4':
+        properties.compression = CCompressionType_LZ4_FRAME
+    else:
+        properties.compression = CCompressionType_UNCOMPRESSED
+
+    if chunksize is not None:
+        properties.chunksize = chunksize
+
+    if compression_level is not None:
+        properties.compression_level = compression_level
+
+    with nogil:
+        check_status(WriteFeather(deref(table.table), sink.get(),
+                                  properties))
+
+
+cdef class FeatherReader(_Weakrefable):
+    cdef:
+        shared_ptr[CFeatherReader] reader
+
+    def __cinit__(self, source, c_bool use_memory_map):
+        cdef shared_ptr[CRandomAccessFile] reader
+        get_reader(source, use_memory_map, &reader)
+        with nogil:
+            self.reader = GetResultValue(CFeatherReader.Open(reader))
+
+    @property
+    def version(self):
+        return self.reader.get().version()
+
+    def read(self):
+        cdef shared_ptr[CTable] sp_table
+        with nogil:
+            check_status(self.reader.get()
+                         .Read(&sp_table))
+
+        return pyarrow_wrap_table(sp_table)
+
+    def read_indices(self, indices):
+        cdef:
+            shared_ptr[CTable] sp_table
+            vector[int] c_indices
+
+        for index in indices:
+            c_indices.push_back(index)
+        with nogil:
+            check_status(self.reader.get()
+                         .Read(c_indices, &sp_table))
+
+        return pyarrow_wrap_table(sp_table)
+
+    def read_names(self, names):
+        cdef:
+            shared_ptr[CTable] sp_table
+            vector[c_string] c_names
+
+        for name in names:
+            c_names.push_back(tobytes(name))
+        with nogil:
+            check_status(self.reader.get()
+                         .Read(c_names, &sp_table))
+
+        return pyarrow_wrap_table(sp_table)
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index 7a8dcdbbfa8..c3536a6edc9 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -16,7 +16,6 @@
 # under the License.
 
 # cython: language_level = 3
-# cython: embedsignature = True
 
 import collections
 import contextlib
@@ -32,7 +31,7 @@ from cython.operator cimport postincrement
 from libcpp cimport bool as c_bool
 
 from pyarrow.lib cimport *
-from pyarrow.lib import ArrowException, ArrowInvalid
+from pyarrow.lib import ArrowException, ArrowInvalid, SignalStopHandler
 from pyarrow.lib import as_buffer, frombytes, tobytes
 from pyarrow.includes.libarrow_flight cimport *
 from pyarrow.ipc import _get_legacy_format_default, _ReadPandasMixin
@@ -874,6 +873,16 @@ cdef class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
 
         return chunk
 
+    def to_reader(self):
+        """Convert this reader into a regular RecordBatchReader.
+
+        This may fail if the schema cannot be read from the remote end.
+        """
+        cdef RecordBatchReader reader
+        reader = RecordBatchReader.__new__(RecordBatchReader)
+        reader.reader = GetResultValue(MakeRecordBatchReader(self.reader))
+        return reader
+
 
 cdef class MetadataRecordBatchReader(_MetadataRecordBatchReader):
     """The virtual base class for readers for Flight streams."""
@@ -887,6 +896,19 @@ cdef class FlightStreamReader(MetadataRecordBatchReader):
         with nogil:
             (<CFlightStreamReader*> self.reader.get()).Cancel()
 
+    def read_all(self):
+        """Read the entire contents of the stream as a Table."""
+        cdef:
+            shared_ptr[CTable] c_table
+            CStopToken stop_token
+        with SignalStopHandler() as stop_handler:
+            stop_token = (<StopToken> stop_handler.stop_token).stop_token
+            with nogil:
+                check_flight_status(
+                    (<CFlightStreamReader*> self.reader.get())
+                    .ReadAllWithStopToken(&c_table, stop_token))
+        return pyarrow_wrap_table(c_table)
+
 
 cdef class MetadataRecordBatchWriter(_CRecordBatchWriter):
     """A RecordBatchWriter that also allows writing application metadata.
@@ -1194,17 +1216,20 @@ cdef class FlightClient(_Weakrefable):
             vector[CActionType] results
             CFlightCallOptions* c_options = FlightCallOptions.unwrap(options)
 
-        with nogil:
-            check_flight_status(
-                self.client.get().ListActions(deref(c_options), &results))
+        with SignalStopHandler() as stop_handler:
+            c_options.stop_token = \
+                (<StopToken> stop_handler.stop_token).stop_token
+            with nogil:
+                check_flight_status(
+                    self.client.get().ListActions(deref(c_options), &results))
 
-        result = []
-        for action_type in results:
-            py_action = ActionType(frombytes(action_type.type),
-                                   frombytes(action_type.description))
-            result.append(py_action)
+            result = []
+            for action_type in results:
+                py_action = ActionType(frombytes(action_type.type),
+                                       frombytes(action_type.description))
+                result.append(py_action)
 
-        return result
+            return result
 
     def do_action(self, action, options: FlightCallOptions = None):
         """
@@ -1237,9 +1262,8 @@ cdef class FlightClient(_Weakrefable):
         cdef CAction c_action = Action.unwrap(<Action> action)
         with nogil:
             check_flight_status(
-                self.client.get().DoAction(deref(c_options), c_action,
-                                           &results))
-
+                self.client.get().DoAction(
+                    deref(c_options), c_action, &results))
         while True:
             result = Result.__new__(Result)
             with nogil:
@@ -1260,18 +1284,21 @@ cdef class FlightClient(_Weakrefable):
         if criteria:
             c_criteria.expression = tobytes(criteria)
 
-        with nogil:
-            check_flight_status(
-                self.client.get().ListFlights(deref(c_options),
-                                              c_criteria, &listing))
-
-        while True:
-            result = FlightInfo.__new__(FlightInfo)
+        with SignalStopHandler() as stop_handler:
+            c_options.stop_token = \
+                (<StopToken> stop_handler.stop_token).stop_token
             with nogil:
-                check_flight_status(listing.get().Next(&result.info))
-                if result.info == NULL:
-                    break
-            yield result
+                check_flight_status(
+                    self.client.get().ListFlights(deref(c_options),
+                                                  c_criteria, &listing))
+
+            while True:
+                result = FlightInfo.__new__(FlightInfo)
+                with nogil:
+                    check_flight_status(listing.get().Next(&result.info))
+                    if result.info == NULL:
+                        break
+                yield result
 
     def get_flight_info(self, descriptor: FlightDescriptor,
                         options: FlightCallOptions = None):
@@ -1487,6 +1514,9 @@ cdef class ServerCallContext(_Weakrefable):
         # Set safe=True as gRPC on Windows sometimes gives garbage bytes
         return frombytes(self.context.peer(), safe=True)
 
+    def is_cancelled(self):
+        return self.context.is_cancelled()
+
     def get_middleware(self, key):
         """
         Get a middleware instance by key.
@@ -1645,70 +1675,83 @@ cdef CStatus _data_stream_next(void* self, CFlightPayload* payload) except *:
         raise RuntimeError("self object in callback is not GeneratorStream")
     stream = <GeneratorStream> py_stream
 
-    if stream.current_stream != nullptr:
-        check_flight_status(stream.current_stream.get().Next(payload))
-        # If the stream ended, see if there's another stream from the
-        # generator
-        if payload.ipc_message.metadata != nullptr:
+    # The generator is allowed to yield a reader or table which we
+    # yield from; if that sub-generator is empty, we need to reset and
+    # try again. However, limit the number of attempts so that we
+    # don't just spin forever.
+    max_attempts = 128
+    for _ in range(max_attempts):
+        if stream.current_stream != nullptr:
+            check_flight_status(stream.current_stream.get().Next(payload))
+            # If the stream ended, see if there's another stream from the
+            # generator
+            if payload.ipc_message.metadata != nullptr:
+                return CStatus_OK()
+            stream.current_stream.reset(nullptr)
+
+        try:
+            result = next(stream.generator)
+        except StopIteration:
+            payload.ipc_message.metadata.reset(<CBuffer*> nullptr)
             return CStatus_OK()
-        stream.current_stream.reset(nullptr)
+        except FlightError as flight_error:
+            return (<FlightError> flight_error).to_status()
 
-    try:
-        result = next(stream.generator)
-    except StopIteration:
-        payload.ipc_message.metadata.reset(<CBuffer*> nullptr)
+        if isinstance(result, (list, tuple)):
+            result, metadata = result
+        else:
+            result, metadata = result, None
+
+        if isinstance(result, (Table, RecordBatchReader)):
+            if metadata:
+                raise ValueError("Can only return metadata alongside a "
+                                 "RecordBatch.")
+            result = RecordBatchStream(result)
+
+        stream_schema = pyarrow_wrap_schema(stream.schema)
+        if isinstance(result, FlightDataStream):
+            if metadata:
+                raise ValueError("Can only return metadata alongside a "
+                                 "RecordBatch.")
+            data_stream = unique_ptr[CFlightDataStream](
+                (<FlightDataStream> result).to_stream())
+            substream_schema = pyarrow_wrap_schema(data_stream.get().schema())
+            if substream_schema != stream_schema:
+                raise ValueError("Got a FlightDataStream whose schema "
+                                 "does not match the declared schema of this "
+                                 "GeneratorStream. "
+                                 "Got: {}\nExpected: {}".format(
+                                     substream_schema, stream_schema))
+            stream.current_stream.reset(
+                new CPyFlightDataStream(result, move(data_stream)))
+            # Loop around and try again
+            continue
+        elif isinstance(result, RecordBatch):
+            batch = <RecordBatch> result
+            if batch.schema != stream_schema:
+                raise ValueError("Got a RecordBatch whose schema does not "
+                                 "match the declared schema of this "
+                                 "GeneratorStream. "
+                                 "Got: {}\nExpected: {}".format(batch.schema,
+                                                                stream_schema))
+            check_flight_status(GetRecordBatchPayload(
+                deref(batch.batch),
+                stream.c_options,
+                &payload.ipc_message))
+            if metadata:
+                payload.app_metadata = pyarrow_unwrap_buffer(
+                    as_buffer(metadata))
+        else:
+            raise TypeError("GeneratorStream must be initialized with "
+                            "an iterator of FlightDataStream, Table, "
+                            "RecordBatch, or RecordBatchStreamReader objects, "
+                            "not {}.".format(type(result)))
+        # Don't loop around
         return CStatus_OK()
-    except FlightError as flight_error:
-        return (<FlightError> flight_error).to_status()
-
-    if isinstance(result, (list, tuple)):
-        result, metadata = result
-    else:
-        result, metadata = result, None
-
-    if isinstance(result, (Table, RecordBatchReader)):
-        if metadata:
-            raise ValueError("Can only return metadata alongside a "
-                             "RecordBatch.")
-        result = RecordBatchStream(result)
-
-    stream_schema = pyarrow_wrap_schema(stream.schema)
-    if isinstance(result, FlightDataStream):
-        if metadata:
-            raise ValueError("Can only return metadata alongside a "
-                             "RecordBatch.")
-        data_stream = unique_ptr[CFlightDataStream](
-            (<FlightDataStream> result).to_stream())
-        substream_schema = pyarrow_wrap_schema(data_stream.get().schema())
-        if substream_schema != stream_schema:
-            raise ValueError("Got a FlightDataStream whose schema does not "
-                             "match the declared schema of this "
-                             "GeneratorStream. "
-                             "Got: {}\nExpected: {}".format(substream_schema,
-                                                            stream_schema))
-        stream.current_stream.reset(
-            new CPyFlightDataStream(result, move(data_stream)))
-        return _data_stream_next(self, payload)
-    elif isinstance(result, RecordBatch):
-        batch = <RecordBatch> result
-        if batch.schema != stream_schema:
-            raise ValueError("Got a RecordBatch whose schema does not "
-                             "match the declared schema of this "
-                             "GeneratorStream. "
-                             "Got: {}\nExpected: {}".format(batch.schema,
-                                                            stream_schema))
-        check_flight_status(GetRecordBatchPayload(
-            deref(batch.batch),
-            stream.c_options,
-            &payload.ipc_message))
-        if metadata:
-            payload.app_metadata = pyarrow_unwrap_buffer(as_buffer(metadata))
-    else:
-        raise TypeError("GeneratorStream must be initialized with "
-                        "an iterator of FlightDataStream, Table, "
-                        "RecordBatch, or RecordBatchStreamReader objects, "
-                        "not {}.".format(type(result)))
-    return CStatus_OK()
+    # Ran out of attempts (the RPC handler kept yielding empty tables/readers)
+    raise RuntimeError("While getting next payload, ran out of attempts to "
+                       "get something to send "
+                       "(application server implementation error)")
 
 
 cdef CStatus _list_flights(void* self, const CServerCallContext& context,
diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index d881c749c71..42e2484e9cf 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -627,7 +627,8 @@ cdef class FileSystem(_Weakrefable):
             stream, path=path, compression=compression, buffer_size=buffer_size
         )
 
-    def open_output_stream(self, path, compression='detect', buffer_size=None):
+    def open_output_stream(self, path, compression='detect',
+                           buffer_size=None, metadata=None):
         """
         Open an output stream for sequential writing.
 
@@ -646,6 +647,11 @@ cdef class FileSystem(_Weakrefable):
         buffer_size: int optional, default None
             If None or 0, no buffering will happen. Otherwise the size of the
             temporary write buffer.
+        metadata: dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
 
         Returns
         -------
@@ -655,9 +661,14 @@ cdef class FileSystem(_Weakrefable):
             c_string pathstr = _path_as_bytes(path)
             NativeFile stream = NativeFile()
             shared_ptr[COutputStream] out_handle
+            shared_ptr[const CKeyValueMetadata] c_metadata
+
+        if metadata is not None:
+            c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(metadata))
 
         with nogil:
-            out_handle = GetResultValue(self.fs.OpenOutputStream(pathstr))
+            out_handle = GetResultValue(
+                self.fs.OpenOutputStream(pathstr, c_metadata))
 
         stream.set_output_stream(out_handle)
         stream.is_writable = True
@@ -666,7 +677,8 @@ cdef class FileSystem(_Weakrefable):
             stream, path=path, compression=compression, buffer_size=buffer_size
         )
 
-    def open_append_stream(self, path, compression='detect', buffer_size=None):
+    def open_append_stream(self, path, compression='detect',
+                           buffer_size=None, metadata=None):
         """
         Open an output stream for appending.
 
@@ -685,6 +697,11 @@ cdef class FileSystem(_Weakrefable):
         buffer_size: int optional, default None
             If None or 0, no buffering will happen. Otherwise the size of the
             temporary write buffer.
+        metadata: dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
 
         Returns
         -------
@@ -694,9 +711,14 @@ cdef class FileSystem(_Weakrefable):
             c_string pathstr = _path_as_bytes(path)
             NativeFile stream = NativeFile()
             shared_ptr[COutputStream] out_handle
+            shared_ptr[const CKeyValueMetadata] c_metadata
+
+        if metadata is not None:
+            c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(metadata))
 
         with nogil:
-            out_handle = GetResultValue(self.fs.OpenAppendStream(pathstr))
+            out_handle = GetResultValue(
+                self.fs.OpenAppendStream(pathstr, c_metadata))
 
         stream.set_output_stream(out_handle)
         stream.is_writable = True
@@ -970,13 +992,13 @@ class FileSystemHandler(ABC):
         """
 
     @abstractmethod
-    def open_output_stream(self, path):
+    def open_output_stream(self, path, metadata):
         """
         Implement PyFileSystem.open_output_stream(...).
         """
 
     @abstractmethod
-    def open_append_stream(self, path):
+    def open_append_stream(self, path, metadata):
         """
         Implement PyFileSystem.open_append_stream(...).
         """
@@ -1067,17 +1089,23 @@ cdef void _cb_open_input_file(handler, const c_string& path,
                         "a PyArrow file")
     out[0] = (<NativeFile> stream).get_random_access_file()
 
-cdef void _cb_open_output_stream(handler, const c_string& path,
-                                 shared_ptr[COutputStream]* out) except *:
-    stream = handler.open_output_stream(frombytes(path))
+cdef void _cb_open_output_stream(
+        handler, const c_string& path,
+        const shared_ptr[const CKeyValueMetadata]& metadata,
+        shared_ptr[COutputStream]* out) except *:
+    stream = handler.open_output_stream(
+        frombytes(path), pyarrow_wrap_metadata(metadata))
     if not isinstance(stream, NativeFile):
         raise TypeError("open_output_stream should have returned "
                         "a PyArrow file")
     out[0] = (<NativeFile> stream).get_output_stream()
 
-cdef void _cb_open_append_stream(handler, const c_string& path,
-                                 shared_ptr[COutputStream]* out) except *:
-    stream = handler.open_append_stream(frombytes(path))
+cdef void _cb_open_append_stream(
+        handler, const c_string& path,
+        const shared_ptr[const CKeyValueMetadata]& metadata,
+        shared_ptr[COutputStream]* out) except *:
+    stream = handler.open_append_stream(
+        frombytes(path), pyarrow_wrap_metadata(metadata))
     if not isinstance(stream, NativeFile):
         raise TypeError("open_append_stream should have returned "
                         "a PyArrow file")
diff --git a/python/pyarrow/_hdfs.pyx b/python/pyarrow/_hdfs.pyx
index f5cf12fa109..6d1a85e18e5 100644
--- a/python/pyarrow/_hdfs.pyx
+++ b/python/pyarrow/_hdfs.pyx
@@ -37,6 +37,8 @@ cdef class HadoopFileSystem(FileSystem):
         HDFS host to connect to.
     port : int, default 8020
         HDFS port to connect to.
+    user : str, default None
+        Username when connecting to HDFS; None implies login user.
     replication : int, default 3
         Number of copies each block will have.
     buffer_size : int, default 0
@@ -47,6 +49,9 @@ cdef class HadoopFileSystem(FileSystem):
         128 MB.
     kerb_ticket : string or path, default None
         If not None, the path to the Kerberos ticket cache.
+    extra_conf : dict, default None
+        Extra key/value pairs for configuration; will override any
+        hdfs-site.xml properties.
     """
 
     cdef:
@@ -93,9 +98,11 @@ cdef class HadoopFileSystem(FileSystem):
         Instantiate HadoopFileSystem object from an URI string.
 
         The following two calls are equivalent
-        * HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test'
-                                    '&replication=1')
-        * HadoopFileSystem('localhost', port=8020, user='test', replication=1)
+
+        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
+&replication=1')``
+        * ``HadoopFileSystem('localhost', port=8020, user='test', \
+replication=1)``
 
         Parameters
         ----------
diff --git a/python/pyarrow/_hdfsio.pyx b/python/pyarrow/_hdfsio.pyx
new file mode 100644
index 00000000000..b864f8a686a
--- /dev/null
+++ b/python/pyarrow/_hdfsio.pyx
@@ -0,0 +1,480 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ----------------------------------------------------------------------
+# HDFS IO implementation
+
+# cython: language_level = 3
+
+import re
+
+from pyarrow.lib cimport check_status, _Weakrefable, NativeFile
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow.lib import frombytes, tobytes, ArrowIOError
+
+from queue import Queue, Empty as QueueEmpty, Full as QueueFull
+
+
+_HDFS_PATH_RE = re.compile(r'hdfs://(.*):(\d+)(.*)')
+
+
+def have_libhdfs():
+    try:
+        with nogil:
+            check_status(HaveLibHdfs())
+        return True
+    except Exception:
+        return False
+
+
+def strip_hdfs_abspath(path):
+    m = _HDFS_PATH_RE.match(path)
+    if m:
+        return m.group(3)
+    else:
+        return path
+
+
+cdef class HadoopFileSystem(_Weakrefable):
+    cdef:
+        shared_ptr[CIOHadoopFileSystem] client
+
+    cdef readonly:
+        bint is_open
+        object host
+        object user
+        object kerb_ticket
+        int port
+        dict extra_conf
+
+    def _connect(self, host, port, user, kerb_ticket, extra_conf):
+        cdef HdfsConnectionConfig conf
+
+        if host is not None:
+            conf.host = tobytes(host)
+        self.host = host
+
+        conf.port = port
+        self.port = port
+
+        if user is not None:
+            conf.user = tobytes(user)
+        self.user = user
+
+        if kerb_ticket is not None:
+            conf.kerb_ticket = tobytes(kerb_ticket)
+        self.kerb_ticket = kerb_ticket
+
+        with nogil:
+            check_status(HaveLibHdfs())
+
+        if extra_conf is not None and isinstance(extra_conf, dict):
+            conf.extra_conf = {tobytes(k): tobytes(v)
+                               for k, v in extra_conf.items()}
+        self.extra_conf = extra_conf
+
+        with nogil:
+            check_status(CIOHadoopFileSystem.Connect(&conf, &self.client))
+        self.is_open = True
+
+    @classmethod
+    def connect(cls, *args, **kwargs):
+        return cls(*args, **kwargs)
+
+    def __dealloc__(self):
+        if self.is_open:
+            self.close()
+
+    def close(self):
+        """
+        Disconnect from the HDFS cluster
+        """
+        self._ensure_client()
+        with nogil:
+            check_status(self.client.get().Disconnect())
+        self.is_open = False
+
+    cdef _ensure_client(self):
+        if self.client.get() == NULL:
+            raise IOError('HDFS client improperly initialized')
+        elif not self.is_open:
+            raise IOError('HDFS client is closed')
+
+    def exists(self, path):
+        """
+        Returns True if the path is known to the cluster, False if it does not
+        (or there is an RPC error)
+        """
+        self._ensure_client()
+
+        cdef c_string c_path = tobytes(path)
+        cdef c_bool result
+        with nogil:
+            result = self.client.get().Exists(c_path)
+        return result
+
+    def isdir(self, path):
+        cdef HdfsPathInfo info
+        try:
+            self._path_info(path, &info)
+        except ArrowIOError:
+            return False
+        return info.kind == ObjectType_DIRECTORY
+
+    def isfile(self, path):
+        cdef HdfsPathInfo info
+        try:
+            self._path_info(path, &info)
+        except ArrowIOError:
+            return False
+        return info.kind == ObjectType_FILE
+
+    def get_capacity(self):
+        """
+        Get reported total capacity of file system
+
+        Returns
+        -------
+        capacity : int
+        """
+        cdef int64_t capacity = 0
+        with nogil:
+            check_status(self.client.get().GetCapacity(&capacity))
+        return capacity
+
+    def get_space_used(self):
+        """
+        Get space used on file system
+
+        Returns
+        -------
+        space_used : int
+        """
+        cdef int64_t space_used = 0
+        with nogil:
+            check_status(self.client.get().GetUsed(&space_used))
+        return space_used
+
+    def df(self):
+        """
+        Return free space on disk, like the UNIX df command
+
+        Returns
+        -------
+        space : int
+        """
+        return self.get_capacity() - self.get_space_used()
+
+    def rename(self, path, new_path):
+        cdef c_string c_path = tobytes(path)
+        cdef c_string c_new_path = tobytes(new_path)
+        with nogil:
+            check_status(self.client.get().Rename(c_path, c_new_path))
+
+    def info(self, path):
+        """
+        Return detailed HDFS information for path
+
+        Parameters
+        ----------
+        path : string
+            Path to file or directory
+
+        Returns
+        -------
+        path_info : dict
+        """
+        cdef HdfsPathInfo info
+        self._path_info(path, &info)
+        return {
+            'path': frombytes(info.name),
+            'owner': frombytes(info.owner),
+            'group': frombytes(info.group),
+            'size': info.size,
+            'block_size': info.block_size,
+            'last_modified': info.last_modified_time,
+            'last_accessed': info.last_access_time,
+            'replication': info.replication,
+            'permissions': info.permissions,
+            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
+                     else 'file')
+        }
+
+    def stat(self, path):
+        """
+        Return basic file system statistics about path
+
+        Parameters
+        ----------
+        path : string
+            Path to file or directory
+
+        Returns
+        -------
+        stat : dict
+        """
+        cdef FileStatistics info
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_status(self.client.get()
+                         .Stat(c_path, &info))
+        return {
+            'size': info.size,
+            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
+                     else 'file')
+        }
+
+    cdef _path_info(self, path, HdfsPathInfo* info):
+        cdef c_string c_path = tobytes(path)
+
+        with nogil:
+            check_status(self.client.get()
+                         .GetPathInfo(c_path, info))
+
+    def ls(self, path, bint full_info):
+        cdef:
+            c_string c_path = tobytes(path)
+            vector[HdfsPathInfo] listing
+            list results = []
+            int i
+
+        self._ensure_client()
+
+        with nogil:
+            check_status(self.client.get()
+                         .ListDirectory(c_path, &listing))
+
+        cdef const HdfsPathInfo* info
+        for i in range(<int> listing.size()):
+            info = &listing[i]
+
+            # Try to trim off the hdfs://HOST:PORT piece
+            name = strip_hdfs_abspath(frombytes(info.name))
+
+            if full_info:
+                kind = ('file' if info.kind == ObjectType_FILE
+                        else 'directory')
+
+                results.append({
+                    'kind': kind,
+                    'name': name,
+                    'owner': frombytes(info.owner),
+                    'group': frombytes(info.group),
+                    'last_modified_time': info.last_modified_time,
+                    'last_access_time': info.last_access_time,
+                    'size': info.size,
+                    'replication': info.replication,
+                    'block_size': info.block_size,
+                    'permissions': info.permissions
+                })
+            else:
+                results.append(name)
+
+        return results
+
+    def chmod(self, path, mode):
+        """
+        Change file permissions
+
+        Parameters
+        ----------
+        path : string
+            absolute path to file or directory
+        mode : int
+            POSIX-like bitmask
+        """
+        self._ensure_client()
+        cdef c_string c_path = tobytes(path)
+        cdef int c_mode = mode
+        with nogil:
+            check_status(self.client.get()
+                         .Chmod(c_path, c_mode))
+
+    def chown(self, path, owner=None, group=None):
+        """
+        Change file permissions
+
+        Parameters
+        ----------
+        path : string
+            absolute path to file or directory
+        owner : string, default None
+            New owner, None for no change
+        group : string, default None
+            New group, None for no change
+        """
+        cdef:
+            c_string c_path
+            c_string c_owner
+            c_string c_group
+            const char* c_owner_ptr = NULL
+            const char* c_group_ptr = NULL
+
+        self._ensure_client()
+
+        c_path = tobytes(path)
+        if owner is not None:
+            c_owner = tobytes(owner)
+            c_owner_ptr = c_owner.c_str()
+
+        if group is not None:
+            c_group = tobytes(group)
+            c_group_ptr = c_group.c_str()
+
+        with nogil:
+            check_status(self.client.get()
+                         .Chown(c_path, c_owner_ptr, c_group_ptr))
+
+    def mkdir(self, path):
+        """
+        Create indicated directory and any necessary parent directories
+        """
+        self._ensure_client()
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_status(self.client.get()
+                         .MakeDirectory(c_path))
+
+    def delete(self, path, bint recursive=False):
+        """
+        Delete the indicated file or directory
+
+        Parameters
+        ----------
+        path : string
+        recursive : boolean, default False
+            If True, also delete child paths for directories
+        """
+        self._ensure_client()
+
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_status(self.client.get()
+                         .Delete(c_path, recursive == 1))
+
+    def open(self, path, mode='rb', buffer_size=None, replication=None,
+             default_block_size=None):
+        """
+        Open HDFS file for reading or writing
+
+        Parameters
+        ----------
+        mode : string
+            Must be one of 'rb', 'wb', 'ab'
+
+        Returns
+        -------
+        handle : HdfsFile
+        """
+        self._ensure_client()
+
+        cdef HdfsFile out = HdfsFile()
+
+        if mode not in ('rb', 'wb', 'ab'):
+            raise Exception("Mode must be 'rb' (read), "
+                            "'wb' (write, new file), or 'ab' (append)")
+
+        cdef c_string c_path = tobytes(path)
+        cdef c_bool append = False
+
+        # 0 in libhdfs means "use the default"
+        cdef int32_t c_buffer_size = buffer_size or 0
+        cdef int16_t c_replication = replication or 0
+        cdef int64_t c_default_block_size = default_block_size or 0
+
+        cdef shared_ptr[HdfsOutputStream] wr_handle
+        cdef shared_ptr[HdfsReadableFile] rd_handle
+
+        if mode in ('wb', 'ab'):
+            if mode == 'ab':
+                append = True
+
+            with nogil:
+                check_status(
+                    self.client.get()
+                    .OpenWritable(c_path, append, c_buffer_size,
+                                  c_replication, c_default_block_size,
+                                  &wr_handle))
+
+            out.set_output_stream(<shared_ptr[COutputStream]> wr_handle)
+            out.is_writable = True
+        else:
+            with nogil:
+                check_status(self.client.get()
+                             .OpenReadable(c_path, &rd_handle))
+
+            out.set_random_access_file(
+                <shared_ptr[CRandomAccessFile]> rd_handle)
+            out.is_readable = True
+
+        assert not out.closed
+
+        if c_buffer_size == 0:
+            c_buffer_size = 2 ** 16
+
+        out.mode = mode
+        out.buffer_size = c_buffer_size
+        out.parent = _HdfsFileNanny(self, out)
+        out.own_file = True
+
+        return out
+
+    def download(self, path, stream, buffer_size=None):
+        with self.open(path, 'rb') as f:
+            f.download(stream, buffer_size=buffer_size)
+
+    def upload(self, path, stream, buffer_size=None):
+        """
+        Upload file-like object to HDFS path
+        """
+        with self.open(path, 'wb') as f:
+            f.upload(stream, buffer_size=buffer_size)
+
+
+# ARROW-404: Helper class to ensure that files are closed before the
+# client. During deallocation of the extension class, the attributes are
+# decref'd which can cause the client to get closed first if the file has the
+# last remaining reference
+cdef class _HdfsFileNanny(_Weakrefable):
+    cdef:
+        object client
+        object file_handle_ref
+
+    def __cinit__(self, client, file_handle):
+        import weakref
+        self.client = client
+        self.file_handle_ref = weakref.ref(file_handle)
+
+    def __dealloc__(self):
+        fh = self.file_handle_ref()
+        if fh:
+            fh.close()
+        # avoid cyclic GC
+        self.file_handle_ref = None
+        self.client = None
+
+
+cdef class HdfsFile(NativeFile):
+    cdef readonly:
+        int32_t buffer_size
+        object mode
+        object parent
+
+    def __dealloc__(self):
+        self.parent = None
diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx
index ef2567a009a..183bd4fdd4d 100644
--- a/python/pyarrow/_json.pyx
+++ b/python/pyarrow/_json.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 # cython: language_level = 3
 
 from pyarrow.includes.common cimport *
diff --git a/python/pyarrow/_orc.pxd b/python/pyarrow/_orc.pxd
index ed4fa7353d5..fd72ac42930 100644
--- a/python/pyarrow/_orc.pxd
+++ b/python/pyarrow/_orc.pxd
@@ -22,7 +22,7 @@ from libc.string cimport const_char
 from libcpp.vector cimport vector as std_vector
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport (CArray, CSchema, CStatus,
-                                        CTable, CMemoryPool,
+                                        CResult, CTable, CMemoryPool,
                                         CKeyValueMetadata,
                                         CRecordBatch,
                                         CTable,
@@ -39,6 +39,8 @@ cdef extern from "arrow/adapters/orc/adapter.h" \
                      CMemoryPool* pool,
                      unique_ptr[ORCFileReader]* reader)
 
+        CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata()
+
         CStatus ReadSchema(shared_ptr[CSchema]* out)
 
         CStatus ReadStripe(int64_t stripe, shared_ptr[CRecordBatch]* out)
@@ -51,3 +53,11 @@ cdef extern from "arrow/adapters/orc/adapter.h" \
         int64_t NumberOfStripes()
 
         int64_t NumberOfRows()
+
+    cdef cppclass ORCFileWriter:
+        @staticmethod
+        CResult[unique_ptr[ORCFileWriter]] Open(COutputStream* output_stream)
+
+        CStatus Write(const CTable& table)
+
+        CStatus Close()
diff --git a/python/pyarrow/_orc.pyx b/python/pyarrow/_orc.pyx
index 5bdc74f635b..f58b52a0d37 100644
--- a/python/pyarrow/_orc.pyx
+++ b/python/pyarrow/_orc.pyx
@@ -17,19 +17,25 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 
 from cython.operator cimport dereference as deref
 from libcpp.vector cimport vector as std_vector
+from libcpp.utility cimport move
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.lib cimport (check_status, _Weakrefable,
                           MemoryPool, maybe_unbox_memory_pool,
                           Schema, pyarrow_wrap_schema,
+                          KeyValueMetadata,
                           pyarrow_wrap_batch,
                           RecordBatch,
+                          Table,
                           pyarrow_wrap_table,
-                          get_reader)
+                          pyarrow_unwrap_schema,
+                          pyarrow_wrap_metadata,
+                          pyarrow_unwrap_table,
+                          get_reader,
+                          get_writer)
 
 
 cdef class ORCReader(_Weakrefable):
@@ -52,6 +58,24 @@ cdef class ORCReader(_Weakrefable):
             check_status(ORCFileReader.Open(rd_handle, self.allocator,
                                             &self.reader))
 
+    def metadata(self):
+        """
+        The arrow metadata for this file.
+
+        Returns
+        -------
+        metadata : pyarrow.KeyValueMetadata
+        """
+        cdef:
+            shared_ptr[const CKeyValueMetadata] sp_arrow_metadata
+
+        with nogil:
+            sp_arrow_metadata = GetResultValue(
+                deref(self.reader).ReadMetadata()
+            )
+
+        return pyarrow_wrap_metadata(sp_arrow_metadata)
+
     def schema(self):
         """
         The arrow schema for this file.
@@ -109,3 +133,27 @@ cdef class ORCReader(_Weakrefable):
                 check_status(deref(self.reader).Read(indices, &sp_table))
 
         return pyarrow_wrap_table(sp_table)
+
+cdef class ORCWriter(_Weakrefable):
+    cdef:
+        object source
+        unique_ptr[ORCFileWriter] writer
+        shared_ptr[COutputStream] rd_handle
+
+    def open(self, object source):
+        self.source = source
+        get_writer(source, &self.rd_handle)
+        with nogil:
+            self.writer = move(GetResultValue[unique_ptr[ORCFileWriter]](
+                ORCFileWriter.Open(self.rd_handle.get())))
+
+    def write(self, Table table):
+        cdef:
+            shared_ptr[CTable] sp_table
+        sp_table = pyarrow_unwrap_table(table)
+        with nogil:
+            check_status(deref(self.writer).Write(deref(sp_table)))
+
+    def close(self):
+        with nogil:
+            check_status(deref(self.writer).Close())
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 96bfd77552e..6f3a495dffb 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -241,6 +241,8 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
         int64_t distinct_count() const
         int64_t num_values() const
         bint HasMinMax()
+        bint HasNullCount()
+        bint HasDistinctCount()
         c_bool Equals(const CStatistics&) const
         void Reset()
         c_string EncodeMin()
@@ -345,6 +347,8 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
         int64_t batch_size()
         void set_pre_buffer(c_bool pre_buffer)
         c_bool pre_buffer() const
+        void set_coerce_int96_timestamp_unit(TimeUnit unit)
+        TimeUnit coerce_int96_timestamp_unit() const
 
     ArrowReaderProperties default_arrow_reader_properties()
 
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 67c1c5a4fc8..ef7b6f98d7b 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 
 import io
 from textwrap import indent
@@ -37,7 +36,8 @@ from pyarrow.lib cimport (_Weakrefable, Buffer, Array, Schema,
                           pyarrow_wrap_table,
                           pyarrow_wrap_buffer,
                           pyarrow_wrap_batch,
-                          NativeFile, get_reader, get_writer)
+                          NativeFile, get_reader, get_writer,
+                          string_to_timeunit)
 
 from pyarrow.lib import (ArrowException, NativeFile, BufferOutputStream,
                          _stringify_path, _datetime_from_int,
@@ -96,6 +96,14 @@ cdef class Statistics(_Weakrefable):
     def has_min_max(self):
         return self.statistics.get().HasMinMax()
 
+    @property
+    def has_null_count(self):
+        return self.statistics.get().HasNullCount()
+
+    @property
+    def has_distinct_count(self):
+        return self.statistics.get().HasDistinctCount()
+
     @property
     def min_raw(self):
         if self.has_min_max:
@@ -919,9 +927,10 @@ cdef class ParquetReader(_Weakrefable):
         self.pool = maybe_unbox_memory_pool(memory_pool)
         self._metadata = None
 
-    def open(self, object source, bint use_memory_map=True,
+    def open(self, object source not None, bint use_memory_map=True,
              read_dictionary=None, FileMetaData metadata=None,
-             int buffer_size=0):
+             int buffer_size=0, bint pre_buffer=False,
+             coerce_int96_timestamp_unit=None):
         cdef:
             shared_ptr[CRandomAccessFile] rd_handle
             shared_ptr[CFileMetaData] c_metadata
@@ -930,6 +939,7 @@ cdef class ParquetReader(_Weakrefable):
                 default_arrow_reader_properties())
             c_string path
             FileReaderBuilder builder
+            TimeUnit int96_timestamp_unit_code
 
         if metadata is not None:
             c_metadata = metadata.sp_metadata
@@ -942,6 +952,15 @@ cdef class ParquetReader(_Weakrefable):
         else:
             raise ValueError('Buffer size must be larger than zero')
 
+        arrow_props.set_pre_buffer(pre_buffer)
+
+        if coerce_int96_timestamp_unit is None:
+            # use the default defined in default_arrow_reader_properties()
+            pass
+        else:
+            arrow_props.set_coerce_int96_timestamp_unit(
+                string_to_timeunit(coerce_int96_timestamp_unit))
+
         self.source = source
 
         get_reader(source, use_memory_map, &rd_handle)
diff --git a/python/pyarrow/_plasma.pyx b/python/pyarrow/_plasma.pyx
index e2c53175ffc..e38c81f8026 100644
--- a/python/pyarrow/_plasma.pyx
+++ b/python/pyarrow/_plasma.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 # cython: language_level = 3
 
 from libcpp cimport bool as c_bool, nullptr
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index ccec4600d41..a45be28d726 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -17,8 +17,9 @@
 
 # cython: language_level = 3
 
-from pyarrow.lib cimport check_status
-from pyarrow.lib import frombytes, tobytes
+from pyarrow.lib cimport (check_status, pyarrow_wrap_metadata,
+                          pyarrow_unwrap_metadata)
+from pyarrow.lib import frombytes, tobytes, KeyValueMetadata
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.includes.libarrow_fs cimport *
@@ -92,18 +93,34 @@ cdef class S3FileSystem(FileSystem):
     endpoint_override: str, default None
         Override region with a connect string such as "localhost:9000"
     background_writes: boolean, default True
-        Whether OutputStream writes will be issued in the background, without
+        Whether file writes will be issued in the background, without
         blocking.
+    default_metadata: mapping or KeyValueMetadata, default None
+        Default metadata for open_output_stream.  This will be ignored if
+        non-empty metadata is passed to open_output_stream.
+    proxy_options: dict or str, default None
+        If a proxy is used, provide the options here. Supported options are:
+        'scheme' (str: 'http' or 'https'; required), 'host' (str; required),
+        'port' (int; required), 'username' (str; optional),
+        'password' (str; optional).
+        A proxy URI (str) can also be provided, in which case these options
+        will be derived from the provided URI.
+        The following are equivalent::
+
+            S3FileSystem(proxy_options='http://username:password@localhost:8020')
+            S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost',
+                                        'port': 8020, 'username': 'username',
+                                        'password': 'password'})
     """
 
     cdef:
         CS3FileSystem* s3fs
 
     def __init__(self, *, access_key=None, secret_key=None, session_token=None,
-                 anonymous=False, region=None, scheme=None,
+                 bint anonymous=False, region=None, scheme=None,
                  endpoint_override=None, bint background_writes=True,
-                 role_arn=None, session_name=None, external_id=None,
-                 load_frequency=900):
+                 default_metadata=None, role_arn=None, session_name=None,
+                 external_id=None, load_frequency=900, proxy_options=None):
         cdef:
             CS3Options options
             shared_ptr[CS3FileSystem] wrapped
@@ -148,8 +165,13 @@ cdef class S3FileSystem(FileSystem):
                 tobytes(session_token)
             )
         elif anonymous:
+            if role_arn:
+                raise ValueError(
+                    'Cannot provide role_arn with anonymous=True')
+
             options = CS3Options.Anonymous()
-        elif role_arn is not None:
+        elif role_arn:
+
             options = CS3Options.FromAssumeRole(
                 tobytes(role_arn),
                 tobytes(session_name),
@@ -167,6 +189,30 @@ cdef class S3FileSystem(FileSystem):
             options.endpoint_override = tobytes(endpoint_override)
         if background_writes is not None:
             options.background_writes = background_writes
+        if default_metadata is not None:
+            if not isinstance(default_metadata, KeyValueMetadata):
+                default_metadata = KeyValueMetadata(default_metadata)
+            options.default_metadata = pyarrow_unwrap_metadata(
+                default_metadata)
+
+        if proxy_options is not None:
+            if isinstance(proxy_options, dict):
+                options.proxy_options.scheme = tobytes(proxy_options["scheme"])
+                options.proxy_options.host = tobytes(proxy_options["host"])
+                options.proxy_options.port = proxy_options["port"]
+                proxy_username = proxy_options.get("username", None)
+                if proxy_username:
+                    options.proxy_options.username = tobytes(proxy_username)
+                proxy_password = proxy_options.get("password", None)
+                if proxy_password:
+                    options.proxy_options.password = tobytes(proxy_password)
+            elif isinstance(proxy_options, str):
+                options.proxy_options = GetResultValue(
+                    CS3ProxyOptions.FromUriString(tobytes(proxy_options)))
+            else:
+                raise TypeError(
+                    "'proxy_options': expected 'dict' or 'str', "
+                    f"got {type(proxy_options)} instead.")
 
         with nogil:
             wrapped = GetResultValue(CS3FileSystem.Make(options))
@@ -184,32 +230,40 @@ cdef class S3FileSystem(FileSystem):
     def __reduce__(self):
         cdef CS3Options opts = self.s3fs.options()
 
-        role_arn = frombytes(opts.role_arn)
-
-        # if role_arn is set, we should not re-use temporary credentials
-        # but instead recreate a new assume role session
-        if role_arn:
-            access_key = None
-            secret_key = None
-            session_token = None
-        else:
+        # if creds were explicitly provided, then use them
+        # else obtain them as they were last time.
+        if opts.credentials_kind == CS3CredentialsKind_Explicit:
             access_key = frombytes(opts.GetAccessKey())
             secret_key = frombytes(opts.GetSecretKey())
             session_token = frombytes(opts.GetSessionToken())
+        else:
+            access_key = None
+            secret_key = None
+            session_token = None
 
         return (
             S3FileSystem._reconstruct, (dict(
                 access_key=access_key,
                 secret_key=secret_key,
                 session_token=session_token,
+                anonymous=(opts.credentials_kind ==
+                           CS3CredentialsKind_Anonymous),
                 region=frombytes(opts.region),
                 scheme=frombytes(opts.scheme),
                 endpoint_override=frombytes(opts.endpoint_override),
-                role_arn=role_arn,
+                role_arn=frombytes(opts.role_arn),
                 session_name=frombytes(opts.session_name),
                 external_id=frombytes(opts.external_id),
                 load_frequency=opts.load_frequency,
-                background_writes=opts.background_writes
+                background_writes=opts.background_writes,
+                default_metadata=pyarrow_wrap_metadata(opts.default_metadata),
+                proxy_options={'scheme': frombytes(opts.proxy_options.scheme),
+                               'host': frombytes(opts.proxy_options.host),
+                               'port': opts.proxy_options.port,
+                               'username': frombytes(
+                                   opts.proxy_options.username),
+                               'password': frombytes(
+                                   opts.proxy_options.password)}
             ),)
         )
 
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 748a64e183a..62523696c8b 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -537,7 +537,8 @@ def _normalize_slice(object arrow_obj, slice key):
         indices = np.arange(start, stop, step)
         return arrow_obj.take(indices)
     else:
-        return arrow_obj.slice(start, stop - start)
+        length = max(stop - start, 0)
+        return arrow_obj.slice(start, length)
 
 
 cdef Py_ssize_t _normalize_index(Py_ssize_t index,
@@ -835,11 +836,12 @@ cdef class Array(_PandasConvertible):
             result = GetResultValue(self.ap.View(type.sp_type))
         return pyarrow_wrap_array(result)
 
-    def sum(self):
+    def sum(self, **kwargs):
         """
         Sum the values in a numerical array.
         """
-        return _pc().call_function('sum', [self])
+        options = _pc().ScalarAggregateOptions(**kwargs)
+        return _pc().call_function('sum', [self], options)
 
     def unique(self):
         """
@@ -1021,9 +1023,11 @@ cdef class Array(_PandasConvertible):
         try:
             return self.equals(other)
         except TypeError:
+            # This also handles comparing with None
+            # as Array.equals(None) raises a TypeError.
             return NotImplemented
 
-    def equals(Array self, Array other):
+    def equals(Array self, Array other not None):
         return self.ap.Equals(deref(other.ap))
 
     def __len__(self):
@@ -1101,6 +1105,8 @@ cdef class Array(_PandasConvertible):
         if length is None:
             result = self.ap.Slice(offset)
         else:
+            if length < 0:
+                raise ValueError('Length must be non-negative')
             result = self.ap.Slice(offset, length)
 
         return pyarrow_wrap_array(result)
@@ -1117,6 +1123,14 @@ cdef class Array(_PandasConvertible):
         """
         return _pc().filter(self, mask, null_selection_behavior)
 
+    def index(self, value, start=None, end=None, *, memory_pool=None):
+        """
+        Find the first index of a value.
+
+        See pyarrow.compute.index for full usage.
+        """
+        return _pc().index(self, value, start, end, memory_pool=memory_pool)
+
     def _to_pandas(self, options, **kwargs):
         return _array_like_to_pandas(self, options)
 
@@ -1159,6 +1173,11 @@ cdef class Array(_PandasConvertible):
             raise ValueError(
                 "Cannot return a writable array if asking for zero-copy")
 
+        # If there are nulls and the array is a DictionaryArray
+        # decoding the dictionary will make sure nulls are correctly handled.
+        # Decoding a dictionary does imply a copy by the way,
+        # so it can't be done if the user requested a zero_copy.
+        c_options.decode_dictionaries = not zero_copy_only
         c_options.zero_copy_only = zero_copy_only
 
         with nogil:
@@ -1586,6 +1605,38 @@ cdef class ListArray(BaseListArray):
         Returns
         -------
         list_array : ListArray
+
+        Examples
+        --------
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
+        [
+          [
+            0,
+            1
+          ],
+          [
+            2,
+            3
+          ]
+        ]
+        # nulls in the offsets array become null lists
+        >>> offsets = pa.array([0, None, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
+        [
+          [
+            0,
+            1
+          ],
+          null,
+          [
+            2,
+            3
+          ]
+        ]
         """
         cdef:
             Array _offsets, _values
@@ -1990,6 +2041,12 @@ cdef class DictionaryArray(Array):
     def dictionary_encode(self):
         return self
 
+    def dictionary_decode(self):
+        """
+        Decodes the DictionaryArray to an Array.
+        """
+        return self.dictionary.take(self.indices)
+
     @property
     def dictionary(self):
         cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
@@ -2140,7 +2197,8 @@ cdef class StructArray(Array):
         return [pyarrow_wrap_array(arr) for arr in arrays]
 
     @staticmethod
-    def from_arrays(arrays, names=None, fields=None):
+    def from_arrays(arrays, names=None, fields=None, mask=None,
+                    memory_pool=None):
         """
         Construct StructArray from collection of arrays representing
         each field in the struct.
@@ -2154,6 +2212,10 @@ cdef class StructArray(Array):
             Field names for each struct child.
         fields : List[Field] (optional)
             Field instances for each struct child.
+        mask : pyarrow.Array[bool] (optional)
+            Indicate which values are null (True) or not null (False).
+        memory_pool : MemoryPool (optional)
+            For memory allocations, if required, otherwise uses default pool.
 
         Returns
         -------
@@ -2161,6 +2223,7 @@ cdef class StructArray(Array):
         """
         cdef:
             shared_ptr[CArray] c_array
+            shared_ptr[CBuffer] c_mask
             vector[shared_ptr[CArray]] c_arrays
             vector[c_string] c_names
             vector[shared_ptr[CField]] c_fields
@@ -2176,9 +2239,24 @@ cdef class StructArray(Array):
         if names is not None and fields is not None:
             raise ValueError('Must pass either names or fields, not both')
 
+        if mask is None:
+            c_mask = shared_ptr[CBuffer]()
+        elif isinstance(mask, Array):
+            if mask.type.id != Type_BOOL:
+                raise ValueError('Mask must be a pyarrow.Array of type bool')
+            if mask.null_count != 0:
+                raise ValueError('Mask must not contain nulls')
+            inverted_mask = _pc().invert(mask, memory_pool=memory_pool)
+            c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
+        else:
+            raise ValueError('Mask must be a pyarrow.Array of type bool')
+
         arrays = [asarray(x) for x in arrays]
         for arr in arrays:
-            c_arrays.push_back(pyarrow_unwrap_array(arr))
+            c_array = pyarrow_unwrap_array(arr)
+            if c_array == nullptr:
+                raise TypeError(f"Expected Array, got {arr.__class__}")
+            c_arrays.push_back(c_array)
         if names is not None:
             for name in names:
                 c_names.push_back(tobytes(name))
@@ -2199,10 +2277,10 @@ cdef class StructArray(Array):
             # XXX Cannot pass "nullptr" for a shared_ptr<T> argument:
             # https://github.com/cython/cython/issues/3020
             c_result = CStructArray.MakeFromFieldNames(
-                c_arrays, c_names, shared_ptr[CBuffer](), -1, 0)
+                c_arrays, c_names, c_mask, -1, 0)
         else:
             c_result = CStructArray.MakeFromFields(
-                c_arrays, c_fields, shared_ptr[CBuffer](), -1, 0)
+                c_arrays, c_fields, c_mask, -1, 0)
         cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
         result.validate()
         return result
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 3928b9cb904..85f637fce5a 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -31,21 +31,28 @@
     # Option classes
     ArraySortOptions,
     CastOptions,
-    CountOptions,
     DictionaryEncodeOptions,
+    ElementWiseAggregateOptions,
+    ExtractRegexOptions,
     FilterOptions,
+    IndexOptions,
+    JoinOptions,
     MatchSubstringOptions,
-    MinMaxOptions,
     ModeOptions,
-    SplitOptions,
-    SplitPatternOptions,
+    PadOptions,
     PartitionNthOptions,
-    ProjectOptions,
+    MakeStructOptions,
     QuantileOptions,
+    ReplaceSliceOptions,
     ReplaceSubstringOptions,
+    ScalarAggregateOptions,
     SetLookupOptions,
+    SliceOptions,
     SortOptions,
+    SplitOptions,
+    SplitPatternOptions,
     StrptimeOptions,
+    DayOfWeekOptions,
     TakeOptions,
     TDigestOptions,
     TrimOptions,
@@ -57,6 +64,7 @@
     list_functions,
 )
 
+import inspect
 from textwrap import dedent
 import warnings
 
@@ -64,20 +72,12 @@
 
 
 def _get_arg_names(func):
-    arg_names = func._doc.arg_names
-    if not arg_names:
-        if func.arity == 1:
-            arg_names = ["arg"]
-        elif func.arity == 2:
-            arg_names = ["left", "right"]
-        else:
-            raise NotImplementedError(
-                f"unsupported arity: {func.arity} (function: {func.name})")
-
-    return arg_names
+    return func._doc.arg_names
 
 
 def _decorate_compute_function(wrapper, exposed_name, func, option_class):
+    # Decorate the given compute function wrapper with useful metadata
+    # and documentation.
     wrapper.__arrow_compute_function__ = dict(name=func.name,
                                               arity=func.arity)
     wrapper.__name__ = exposed_name
@@ -127,7 +127,7 @@ def _decorate_compute_function(wrapper, exposed_name, func, option_class):
             options : pyarrow.compute.{0}, optional
                 Parameters altering compute function semantics
             **kwargs : optional
-                Parameters for {0} constructor.  Either `options`
+                Parameters for {0} constructor. Either `options`
                 or `**kwargs` can be passed, but not both at the same time.
             """.format(option_class.__name__))
 
@@ -168,41 +168,50 @@ def _handle_options(name, option_class, options, kwargs):
     return options
 
 
-_wrapper_template = dedent("""\
-    def make_wrapper(func, option_class):
-        def {func_name}({args_sig}{kwonly}, memory_pool=None):
-            return func.call([{args_sig}], None, memory_pool)
-        return {func_name}
-    """)
-
-_wrapper_options_template = dedent("""\
-    def make_wrapper(func, option_class):
-        def {func_name}({args_sig}{kwonly}, options=None, memory_pool=None,
-                        **kwargs):
-            options = _handle_options({func_name!r}, option_class, options,
+def _make_generic_wrapper(func_name, func, option_class):
+    if option_class is None:
+        def wrapper(*args, memory_pool=None):
+            return func.call(args, None, memory_pool)
+    else:
+        def wrapper(*args, memory_pool=None, options=None, **kwargs):
+            options = _handle_options(func_name, option_class, options,
                                       kwargs)
-            return func.call([{args_sig}], options, memory_pool)
-        return {func_name}
-    """)
+            return func.call(args, options, memory_pool)
+    return wrapper
+
+
+def _make_signature(arg_names, var_arg_names, option_class):
+    from inspect import Parameter
+    params = []
+    for name in arg_names:
+        params.append(Parameter(name, Parameter.POSITIONAL_OR_KEYWORD))
+    for name in var_arg_names:
+        params.append(Parameter(name, Parameter.VAR_POSITIONAL))
+    params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
+                            default=None))
+    if option_class is not None:
+        params.append(Parameter("options", Parameter.KEYWORD_ONLY,
+                                default=None))
+        options_sig = inspect.signature(option_class)
+        for p in options_sig.parameters.values():
+            # XXX for now, our generic wrappers don't allow positional
+            # option arguments
+            params.append(p.replace(kind=Parameter.KEYWORD_ONLY))
+    return inspect.Signature(params)
 
 
 def _wrap_function(name, func):
     option_class = _get_options_class(func)
     arg_names = _get_arg_names(func)
-    args_sig = ', '.join(arg_names)
-    kwonly = '' if arg_names[-1].startswith('*') else ', *'
-
-    # Generate templated wrapper, so that the signature matches
-    # the documented argument names.
-    ns = {}
-    if option_class is not None:
-        template = _wrapper_options_template
+    has_vararg = arg_names and arg_names[-1].startswith('*')
+    if has_vararg:
+        var_arg_names = [arg_names.pop().lstrip('*')]
     else:
-        template = _wrapper_template
-    exec(template.format(func_name=name, args_sig=args_sig, kwonly=kwonly),
-         globals(), ns)
-    wrapper = ns['make_wrapper'](func, option_class)
+        var_arg_names = []
 
+    wrapper = _make_generic_wrapper(name, func, option_class)
+    wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
+                                            option_class)
     return _decorate_compute_function(wrapper, name, func, option_class)
 
 
@@ -288,9 +297,10 @@ def cast(arr, target_type, safe=True):
     return call_function("cast", [arr], options)
 
 
-def match_substring(array, pattern):
+def count_substring(array, pattern, *, ignore_case=False):
     """
-    Test if substring *pattern* is contained within a value of a string array.
+    Count the occurrences of substring *pattern* in each value of a
+    string array.
 
     Parameters
     ----------
@@ -302,41 +312,134 @@ def match_substring(array, pattern):
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
-    return call_function("match_substring", [array],
-                         MatchSubstringOptions(pattern))
+    return call_function("count_substring", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
-def match_substring_regex(array, pattern):
+def count_substring_regex(array, pattern, *, ignore_case=False):
     """
-    Test if regex *pattern* matches at any position a value of a string array.
+    Count the non-overlapping matches of regex *pattern* in each value
+    of a string array.
 
     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
-        regex pattern to search
+        pattern to search for exact matches
 
     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
-    return call_function("match_substring_regex", [array],
-                         MatchSubstringOptions(pattern))
+    return call_function("count_substring_regex", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
-def sum(array):
+def find_substring(array, pattern, *, ignore_case=False):
     """
-    Sum the values in a numerical (chunked) array.
+    Find the index of the first occurrence of substring *pattern* in each
+    value of a string array.
 
     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        pattern to search for exact matches
+    ignore_case : bool, default False
+        Ignore case while searching.
 
     Returns
     -------
-    sum : pyarrow.Scalar
+    result : pyarrow.Array or pyarrow.ChunkedArray
     """
-    return call_function('sum', [array])
+    return call_function("find_substring", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
+
+
+def find_substring_regex(array, pattern, *, ignore_case=False):
+    """
+    Find the index of the first match of regex *pattern* in each
+    value of a string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        regex pattern to search for
+    ignore_case : bool, default False
+        Ignore case while searching.
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+    """
+    return call_function("find_substring_regex", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
+
+
+def match_like(array, pattern, *, ignore_case=False):
+    """
+    Test if the SQL-style LIKE pattern *pattern* matches a value of a
+    string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        SQL-style LIKE pattern. '%' will match any number of
+        characters, '_' will match exactly one character, and all
+        other characters match themselves. To match a literal percent
+        sign or underscore, precede the character with a backslash.
+    ignore_case : bool, default False
+        Ignore case while searching.
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+
+    """
+    return call_function("match_like", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
+
+
+def match_substring(array, pattern, *, ignore_case=False):
+    """
+    Test if substring *pattern* is contained within a value of a string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        pattern to search for exact matches
+    ignore_case : bool, default False
+        Ignore case while searching.
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+    """
+    return call_function("match_substring", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
+
+
+def match_substring_regex(array, pattern, *, ignore_case=False):
+    """
+    Test if regex *pattern* matches at any position a value of a string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        regex pattern to search
+    ignore_case : bool, default False
+        Ignore case while searching.
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+    """
+    return call_function("match_substring_regex", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
 def mode(array, n=1):
@@ -412,6 +515,40 @@ def filter(data, mask, null_selection_behavior='drop'):
     return call_function('filter', [data, mask], options)
 
 
+def index(data, value, start=None, end=None, *, memory_pool=None):
+    """
+    Find the index of the first occurrence of a given value.
+
+    Parameters
+    ----------
+    data : Array or ChunkedArray
+    value : Scalar-like object
+    start : int, optional
+    end : int, optional
+
+    Returns
+    -------
+    index : the index, or -1 if not found
+    """
+    if start is not None:
+        if end is not None:
+            data = data.slice(start, end - start)
+        else:
+            data = data.slice(start)
+    elif end is not None:
+        data = data.slice(0, end)
+
+    if not isinstance(value, pa.Scalar):
+        value = pa.scalar(value, type=data.type)
+    elif data.type != value.type:
+        value = pa.scalar(value.as_py(), type=data.type)
+    options = IndexOptions(value=value)
+    result = call_function('index', [data], options, memory_pool)
+    if start is not None and result.as_py() >= 0:
+        result = pa.scalar(result.as_py() + start, type=pa.int64())
+    return result
+
+
 def take(data, indices, *, boundscheck=True, memory_pool=None):
     """
     Select values (or records) from array- or table-like data given integer
diff --git a/python/pyarrow/csv.py b/python/pyarrow/csv.py
index fc1dcafba0b..e073252cbed 100644
--- a/python/pyarrow/csv.py
+++ b/python/pyarrow/csv.py
@@ -19,4 +19,4 @@
 from pyarrow._csv import (  # noqa
     ReadOptions, ParseOptions, ConvertOptions, ISO8601,
     open_csv, read_csv, CSVStreamingReader, write_csv,
-    WriteOptions)
+    WriteOptions, CSVWriter)
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 615cb2516dc..8b5799e6da2 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -49,7 +49,7 @@
     PartitioningFactory,
     RowGroupInfo,
     Scanner,
-    ScanTask,
+    TaggedRecordBatch,
     UnionDataset,
     UnionDatasetFactory,
     _get_partition_keys,
@@ -517,8 +517,8 @@ def dataset(source, schema=None, format=None, filesystem=None,
 
     Parameters
     ----------
-    source : path, list of paths, dataset, list of datasets, (list of) batches
-             or tables, iterable of batches, RecordBatchReader, or URI
+    source : path, list of paths, dataset, list of datasets, (list of) batches\
+or tables, iterable of batches, RecordBatchReader, or URI
         Path pointing to a single file:
             Open a FileSystemDataset from a single file.
         Path pointing to a directory:
@@ -669,10 +669,7 @@ def dataset(source, schema=None, format=None, filesystem=None,
                 'of batches or tables. The given list contains the following '
                 'types: {}'.format(type_names)
             )
-    elif isinstance(source, (pa.RecordBatch, pa.ipc.RecordBatchReader,
-                             pa.Table)):
-        return _in_memory_dataset(source, **kwargs)
-    elif _is_iterable(source):
+    elif isinstance(source, (pa.RecordBatch, pa.Table)):
         return _in_memory_dataset(source, **kwargs)
     else:
         raise TypeError(
@@ -693,7 +690,7 @@ def _ensure_write_partitioning(scheme):
 def write_dataset(data, base_dir, basename_template=None, format=None,
                   partitioning=None, schema=None,
                   filesystem=None, file_options=None, use_threads=True,
-                  max_partitions=None):
+                  use_async=False, max_partitions=None, file_visitor=None):
     """
     Write a dataset to a given format and partitioning.
 
@@ -728,24 +725,48 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
     use_threads : bool, default True
         Write files in parallel. If enabled, then maximum parallelism will be
         used determined by the number of available CPU cores.
+    use_async : bool, default False
+        If enabled, an async scanner will be used that should offer
+        better performance with high-latency/highly-parallel filesystems
+        (e.g. S3)
     max_partitions : int, default 1024
         Maximum number of partitions any batch may be written into.
+    file_visitor : Function
+        If set, this function will be called with a WrittenFile instance
+        for each file created during the call.  This object will have both
+        a path attribute and a metadata attribute.
+
+        The path attribute will be a string containing the path to
+        the created file.
+
+        The metadata attribute will be the parquet metadata of the file.
+        This metadata will have the file path attribute set and can be used
+        to build a _metadata file.  The metadata attribute will be None if
+        the format is not parquet.
+
+        Example visitor which simple collects the filenames created::
+
+            visited_paths = []
+
+            def file_visitor(written_file):
+                visited_paths.append(written_file.path)
     """
     from pyarrow.fs import _resolve_filesystem_and_path
 
-    if isinstance(data, Dataset):
-        schema = schema or data.schema
-    elif isinstance(data, (list, tuple)):
+    if isinstance(data, (list, tuple)):
         schema = schema or data[0].schema
         data = InMemoryDataset(data, schema=schema)
-    elif isinstance(data, (pa.RecordBatch, pa.ipc.RecordBatchReader,
-                           pa.Table)) or _is_iterable(data):
-        data = InMemoryDataset(data, schema=schema)
+    elif isinstance(data, (pa.RecordBatch, pa.Table)):
         schema = schema or data.schema
-    else:
+        data = InMemoryDataset(data, schema=schema)
+    elif isinstance(data, pa.ipc.RecordBatchReader) or _is_iterable(data):
+        data = Scanner.from_batches(data, schema=schema)
+        schema = None
+    elif not isinstance(data, (Dataset, Scanner)):
         raise ValueError(
-            "Only Dataset, Table/RecordBatch, RecordBatchReader, a list "
-            "of Tables/RecordBatches, or iterable of batches are supported."
+            "Only Dataset, Scanner, Table/RecordBatch, RecordBatchReader, "
+            "a list of Tables/RecordBatches, or iterable of batches are "
+            "supported."
         )
 
     if format is None and isinstance(data, FileSystemDataset):
@@ -771,8 +792,16 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
 
     filesystem, base_dir = _resolve_filesystem_and_path(base_dir, filesystem)
 
+    if isinstance(data, Dataset):
+        scanner = data.scanner(use_threads=use_threads, use_async=use_async)
+    else:
+        # scanner was passed directly by the user, in which case a schema
+        # cannot be passed
+        if schema is not None:
+            raise ValueError("Cannot specify a schema when writing a Scanner")
+        scanner = data
+
     _filesystemdataset_write(
-        data, base_dir, basename_template, schema,
-        filesystem, partitioning, file_options, use_threads,
-        max_partitions
+        scanner, base_dir, basename_template, filesystem, partitioning,
+        file_options, max_partitions, file_visitor
     )
diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index f9e45f238df..882427f32ea 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -25,6 +25,8 @@ import os
 import signal
 import threading
 
+from pyarrow.util import _break_traceback_cycle_from_frame
+
 
 class ArrowException(Exception):
     pass
@@ -171,6 +173,10 @@ def enable_signal_handlers(c_bool enable):
 
 # For internal use
 
+# Whether we need a workaround for https://bugs.python.org/issue42248
+have_signal_refcycle = (sys.version_info < (3, 8, 10) or
+                        (3, 9) <= sys.version_info < (3, 9, 5))
+
 cdef class SignalStopHandler:
     cdef:
         StopToken _stop_token
@@ -180,7 +186,17 @@ cdef class SignalStopHandler:
     def __cinit__(self):
         self._enabled = False
 
-        tid = threading.current_thread().ident
+        self._init_signals()
+        if have_signal_refcycle:
+            _break_traceback_cycle_from_frame(sys._getframe(0))
+
+        self._stop_token = StopToken()
+        if not self._signals.empty():
+            self._stop_token.init(GetResultValue(
+                SetSignalStopSource()).token())
+            self._enabled = True
+
+    def _init_signals(self):
         if (signal_handlers_enabled and
                 threading.current_thread() is threading.main_thread()):
             self._signals = [
@@ -188,12 +204,6 @@ cdef class SignalStopHandler:
                 if signal.getsignal(sig) not in (signal.SIG_DFL,
                                                  signal.SIG_IGN, None)]
 
-        if not self._signals.empty():
-            self._stop_token = StopToken()
-            self._stop_token.init(GetResultValue(
-                SetSignalStopSource()).token())
-            self._enabled = True
-
     def __enter__(self):
         if self._enabled:
             check_status(RegisterCancellingSignalHandler(self._signals))
diff --git a/python/pyarrow/feather.pxi b/python/pyarrow/feather.pxi
deleted file mode 100644
index a608035b56f..00000000000
--- a/python/pyarrow/feather.pxi
+++ /dev/null
@@ -1,105 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# ---------------------------------------------------------------------
-# Implement Feather file format
-
-
-class FeatherError(Exception):
-    pass
-
-
-def write_feather(Table table, object dest, compression=None,
-                  compression_level=None, chunksize=None, version=2):
-    cdef shared_ptr[COutputStream] sink
-    get_writer(dest, &sink)
-
-    cdef CFeatherProperties properties
-    if version == 2:
-        properties.version = kFeatherV2Version
-    else:
-        properties.version = kFeatherV1Version
-
-    if compression == 'zstd':
-        properties.compression = CCompressionType_ZSTD
-    elif compression == 'lz4':
-        properties.compression = CCompressionType_LZ4_FRAME
-    else:
-        properties.compression = CCompressionType_UNCOMPRESSED
-
-    if chunksize is not None:
-        properties.chunksize = chunksize
-
-    if compression_level is not None:
-        properties.compression_level = compression_level
-
-    with nogil:
-        check_status(WriteFeather(deref(table.table), sink.get(),
-                                  properties))
-
-
-cdef class FeatherReader(_Weakrefable):
-    cdef:
-        shared_ptr[CFeatherReader] reader
-
-    def __cinit__(self):
-        pass
-
-    def open(self, source, c_bool use_memory_map=True):
-        cdef shared_ptr[CRandomAccessFile] reader
-        get_reader(source, use_memory_map, &reader)
-
-        with nogil:
-            self.reader = GetResultValue(CFeatherReader.Open(reader))
-
-    @property
-    def version(self):
-        return self.reader.get().version()
-
-    def read(self):
-        cdef shared_ptr[CTable] sp_table
-        with nogil:
-            check_status(self.reader.get()
-                         .Read(&sp_table))
-
-        return pyarrow_wrap_table(sp_table)
-
-    def read_indices(self, indices):
-        cdef:
-            shared_ptr[CTable] sp_table
-            vector[int] c_indices
-
-        for index in indices:
-            c_indices.push_back(index)
-        with nogil:
-            check_status(self.reader.get()
-                         .Read(c_indices, &sp_table))
-
-        return pyarrow_wrap_table(sp_table)
-
-    def read_names(self, names):
-        cdef:
-            shared_ptr[CTable] sp_table
-            vector[c_string] c_names
-
-        for name in names:
-            c_names.push_back(tobytes(name))
-        with nogil:
-            check_status(self.reader.get()
-                         .Read(c_names, &sp_table))
-
-        return pyarrow_wrap_table(sp_table)
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 025c2330df5..1e6875ac08e 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -19,9 +19,11 @@
 import os
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
-from pyarrow.lib import (Codec, FeatherError, Table,  # noqa
+from pyarrow.lib import (Codec, Table,  # noqa
                          concat_tables, schema)
 import pyarrow.lib as ext
+from pyarrow import _feather
+from pyarrow._feather import FeatherError  # noqa: F401
 from pyarrow.vendored.version import Version
 
 
@@ -180,9 +182,9 @@ def write_feather(df, dest, compression=None, compression_level=None,
                                                 _FEATHER_SUPPORTED_CODECS))
 
     try:
-        ext.write_feather(table, dest, compression=compression,
-                          compression_level=compression_level,
-                          chunksize=chunksize, version=version)
+        _feather.write_feather(table, dest, compression=compression,
+                               compression_level=compression_level,
+                               chunksize=chunksize, version=version)
     except Exception:
         if isinstance(dest, str):
             try:
@@ -234,8 +236,7 @@ def read_table(source, columns=None, memory_map=True):
     -------
     table : pyarrow.Table
     """
-    reader = ext.FeatherReader()
-    reader.open(source, use_memory_map=memory_map)
+    reader = _feather.FeatherReader(source, use_memory_map=memory_map)
 
     if columns is None:
         return reader.read()
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index 7e63c01a57e..1b86e4b7e0f 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -263,7 +263,10 @@ def get_file_info_selector(self, selector):
 
     def create_dir(self, path, recursive):
         # mkdir also raises FileNotFoundError when base directory is not found
-        self.fs.mkdir(path, create_parents=recursive)
+        try:
+            self.fs.mkdir(path, create_parents=recursive)
+        except FileExistsError:
+            pass
 
     def delete_dir(self, path):
         self.fs.rm(path, recursive=True)
@@ -299,6 +302,8 @@ def copy_file(self, src, dest):
         # instead of a file
         self.fs.copy(src, dest)
 
+    # TODO can we read/pass metadata (e.g. Content-Type) in the methods below?
+
     def open_input_stream(self, path):
         from pyarrow import PythonFile
 
@@ -315,12 +320,12 @@ def open_input_file(self, path):
 
         return PythonFile(self.fs.open(path, mode="rb"), mode="r")
 
-    def open_output_stream(self, path):
+    def open_output_stream(self, path, metadata):
         from pyarrow import PythonFile
 
         return PythonFile(self.fs.open(path, mode="wb"), mode="w")
 
-    def open_append_stream(self, path):
+    def open_append_stream(self, path, metadata):
         from pyarrow import PythonFile
 
         return PythonFile(self.fs.open(path, mode="ab"), mode="w")
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 6bbd673ed36..12d572b3307 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -18,7 +18,6 @@
 # cython: profile=False
 # distutils: language = c++
 # cython: language_level = 3
-# cython: embedsignature = True
 
 from libcpp cimport bool as c_bool, nullptr
 from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
@@ -30,7 +29,8 @@ from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
 from pyarrow.includes.libarrow cimport *
 from pyarrow.lib cimport (Array, DataType, Field, MemoryPool, RecordBatch,
                           Schema, check_status, pyarrow_wrap_array,
-                          pyarrow_wrap_data_type, ensure_type, _Weakrefable)
+                          pyarrow_wrap_data_type, ensure_type, _Weakrefable,
+                          pyarrow_wrap_field)
 from pyarrow.lib import frombytes
 
 from pyarrow.includes.libgandiva cimport (
@@ -93,6 +93,16 @@ cdef class Node(_Weakrefable):
         self.node = node
         return self
 
+    def __str__(self):
+        return self.node.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def return_type(self):
+        return pyarrow_wrap_data_type(self.node.get().return_type())
+
 cdef class Expression(_Weakrefable):
     cdef:
         shared_ptr[CExpression] expression
@@ -100,6 +110,19 @@ cdef class Expression(_Weakrefable):
     cdef void init(self, shared_ptr[CExpression] expression):
         self.expression = expression
 
+    def __str__(self):
+        return self.expression.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def root(self):
+        return Node.create(self.expression.get().root())
+
+    def result(self):
+        return pyarrow_wrap_field(self.expression.get().result())
+
 cdef class Condition(_Weakrefable):
     cdef:
         shared_ptr[CCondition] condition
@@ -115,6 +138,19 @@ cdef class Condition(_Weakrefable):
         self.condition = condition
         return self
 
+    def __str__(self):
+        return self.condition.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def root(self):
+        return Node.create(self.condition.get().root())
+
+    def result(self):
+        return pyarrow_wrap_field(self.condition.get().result())
+
 cdef class SelectionVector(_Weakrefable):
     cdef:
         shared_ptr[CSelectionVector] selection_vector
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index 7ad379bd660..56667bd5df5 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -23,14 +23,18 @@
 
 from pyarrow.util import implements, _DEPR_MSG
 from pyarrow.filesystem import FileSystem
-import pyarrow.lib as lib
+import pyarrow._hdfsio as _hdfsio
 
 
-class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
+class HadoopFileSystem(_hdfsio.HadoopFileSystem, FileSystem):
     """
-    FileSystem interface for HDFS cluster.
+    DEPRECATED: FileSystem interface for HDFS cluster.
 
     See pyarrow.hdfs.connect for full connection details
+
+    .. deprecated:: 2.0
+        ``pyarrow.hdfs.HadoopFileSystem`` is deprecated,
+        please use ``pyarrow.fs.HadoopFileSystem`` instead.
     """
 
     def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
@@ -184,13 +188,19 @@ def _libhdfs_walk_files_dirs(top_path, contents):
 def connect(host="default", port=0, user=None, kerb_ticket=None,
             extra_conf=None):
     """
-    Connect to an HDFS cluster. All parameters are optional and should
-    only be set if the defaults need to be overridden.
+    DEPRECATED: Connect to an HDFS cluster.
+
+    All parameters are optional and should only be set if the defaults need
+    to be overridden.
 
     Authentication should be automatic if the HDFS cluster uses Kerberos.
     However, if a username is specified, then the ticket cache will likely
     be required.
 
+    .. deprecated:: 2.0
+        ``pyarrow.hdfs.connect`` is deprecated,
+        please use ``pyarrow.fs.HadoopFileSystem`` instead.
+
     Parameters
     ----------
     host : NameNode. Set to "default" for fs.defaultFS from core-site.xml.
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index 3f67a3256cc..902eaafbbbd 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -128,6 +128,7 @@ cdef extern from "arrow/result.h" namespace "arrow" nogil:
 
 cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
     T GetResultValue[T](CResult[T]) except *
+    cdef function[F] BindFunction[F](void* unbound, object bound, ...)
 
 
 cdef inline object PyObject_to_object(PyObject* o):
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index ebdcd08334c..34d81bce04b 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -286,6 +286,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         int64_t bytes_allocated()
         int64_t max_memory()
         c_string backend_name()
+        void ReleaseUnused()
 
     cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
         CLoggingMemoryPool(CMemoryPool*)
@@ -1019,6 +1020,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CUnionScalar" arrow::UnionScalar"(CScalar):
         shared_ptr[CScalar] value
+        int8_t type_code
 
     shared_ptr[CScalar] MakeScalar[Value](Value value)
 
@@ -1162,6 +1164,8 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
         CIOContext(CMemoryPool*, CStopToken)
 
     CIOContext c_default_io_context "arrow::io::default_io_context"()
+    int GetIOThreadPoolCapacity()
+    CStatus SetIOThreadPoolCapacity(int threads)
 
     cdef cppclass FileStatistics:
         int64_t size
@@ -1193,7 +1197,7 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
 
     cdef cppclass CInputStream" arrow::io::InputStream"(FileInterface,
                                                         Readable):
-        pass
+        CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata()
 
     cdef cppclass CRandomAccessFile" arrow::io::RandomAccessFile"(CInputStream,
                                                                   Seekable):
@@ -1558,30 +1562,6 @@ cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
             const CIpcWriteOptions& options,
             CIpcPayload* out)
 
-    int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
-    int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
-
-    cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
-        int version
-        int chunksize
-        CCompressionType compression
-        int compression_level
-
-    CStatus WriteFeather" arrow::ipc::feather::WriteTable"\
-        (const CTable& table, COutputStream* out,
-         CFeatherProperties properties)
-
-    cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
-        @staticmethod
-        CResult[shared_ptr[CFeatherReader]] Open(
-            const shared_ptr[CRandomAccessFile]& file)
-        int version()
-        shared_ptr[CSchema] schema()
-
-        CStatus Read(shared_ptr[CTable]* out)
-        CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
-        CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)
-
 
 cdef extern from 'arrow/util/value_parsing.h' namespace 'arrow' nogil:
     cdef cppclass CTimestampParser" arrow::TimestampParser":
@@ -1607,9 +1587,14 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         c_bool newlines_in_values
         c_bool ignore_empty_lines
 
+        CCSVParseOptions()
+        CCSVParseOptions(CCSVParseOptions&&)
+
         @staticmethod
         CCSVParseOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVConvertOptions" arrow::csv::ConvertOptions":
         c_bool check_utf8
         unordered_map[c_string, shared_ptr[CDataType]] column_types
@@ -1617,34 +1602,53 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         vector[c_string] true_values
         vector[c_string] false_values
         c_bool strings_can_be_null
+        c_bool quoted_strings_can_be_null
         vector[shared_ptr[CTimestampParser]] timestamp_parsers
 
         c_bool auto_dict_encode
         int32_t auto_dict_max_cardinality
+        unsigned char decimal_point
 
         vector[c_string] include_columns
         c_bool include_missing_columns
 
+        CCSVConvertOptions()
+        CCSVConvertOptions(CCSVConvertOptions&&)
+
         @staticmethod
         CCSVConvertOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVReadOptions" arrow::csv::ReadOptions":
         c_bool use_threads
         int32_t block_size
         int32_t skip_rows
+        int32_t skip_rows_after_names
         vector[c_string] column_names
         c_bool autogenerate_column_names
 
+        CCSVReadOptions()
+        CCSVReadOptions(CCSVReadOptions&&)
+
         @staticmethod
         CCSVReadOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVWriteOptions" arrow::csv::WriteOptions":
         c_bool include_header
         int32_t batch_size
+        CIOContext io_context
+
+        CCSVWriteOptions()
+        CCSVWriteOptions(CCSVWriteOptions&&)
 
         @staticmethod
         CCSVWriteOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVReader" arrow::csv::TableReader":
         @staticmethod
         CResult[shared_ptr[CCSVReader]] Make(
@@ -1660,10 +1664,12 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
             CIOContext, shared_ptr[CInputStream],
             CCSVReadOptions, CCSVParseOptions, CCSVConvertOptions)
 
+    cdef CStatus WriteCSV(CTable&, CCSVWriteOptions& options, COutputStream*)
     cdef CStatus WriteCSV(
-        CTable&, CCSVWriteOptions& options, CMemoryPool*, COutputStream*)
-    cdef CStatus WriteCSV(
-        CRecordBatch&, CCSVWriteOptions& options, CMemoryPool*, COutputStream*)
+        CRecordBatch&, CCSVWriteOptions& options, COutputStream*)
+    cdef CResult[shared_ptr[CRecordBatchWriter]] MakeCSVWriter(
+        shared_ptr[COutputStream], shared_ptr[CSchema],
+        CCSVWriteOptions& options)
 
 
 cdef extern from "arrow/json/options.h" nogil:
@@ -1753,8 +1759,19 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         vector[c_string] arg_names
         c_string options_class
 
+    cdef cppclass CFunctionOptionsType" arrow::compute::FunctionOptionsType":
+        const char* type_name() const
+
     cdef cppclass CFunctionOptions" arrow::compute::FunctionOptions":
-        pass
+        const CFunctionOptionsType* options_type() const
+        const char* type_name() const
+        c_bool Equals(const CFunctionOptions& other)
+        c_string ToString()
+        CResult[shared_ptr[CBuffer]] Serialize() const
+
+        @staticmethod
+        CResult[unique_ptr[CFunctionOptions]] Deserialize(
+            const c_string& type_name, const CBuffer&)
 
     cdef cppclass CFunction" arrow::compute::Function":
         const c_string& name() const
@@ -1793,16 +1810,51 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
     CFunctionRegistry* GetFunctionRegistry()
 
+    cdef cppclass CElementWiseAggregateOptions \
+            "arrow::compute::ElementWiseAggregateOptions"(CFunctionOptions):
+        CElementWiseAggregateOptions(c_bool skip_nulls)
+        c_bool skip_nulls
+
+    enum CJoinNullHandlingBehavior \
+            "arrow::compute::JoinOptions::NullHandlingBehavior":
+        CJoinNullHandlingBehavior_EMIT_NULL \
+            "arrow::compute::JoinOptions::EMIT_NULL"
+        CJoinNullHandlingBehavior_SKIP \
+            "arrow::compute::JoinOptions::SKIP"
+        CJoinNullHandlingBehavior_REPLACE \
+            "arrow::compute::JoinOptions::REPLACE"
+
+    cdef cppclass CJoinOptions \
+            "arrow::compute::JoinOptions"(CFunctionOptions):
+        CJoinOptions(CJoinNullHandlingBehavior null_handling,
+                     c_string null_replacement)
+        CJoinNullHandlingBehavior null_handling
+        c_string null_replacement
+
     cdef cppclass CMatchSubstringOptions \
             "arrow::compute::MatchSubstringOptions"(CFunctionOptions):
-        CMatchSubstringOptions(c_string pattern)
+        CMatchSubstringOptions(c_string pattern, c_bool ignore_case)
         c_string pattern
+        c_bool ignore_case
 
     cdef cppclass CTrimOptions \
             "arrow::compute::TrimOptions"(CFunctionOptions):
         CTrimOptions(c_string characters)
         c_string characters
 
+    cdef cppclass CPadOptions \
+            "arrow::compute::PadOptions"(CFunctionOptions):
+        CPadOptions(int64_t width, c_string padding)
+        int64_t width
+        c_string padding
+
+    cdef cppclass CSliceOptions \
+            "arrow::compute::SliceOptions"(CFunctionOptions):
+        CSliceOptions(int64_t start, int64_t stop, int64_t step)
+        int64_t start
+        int64_t stop
+        int64_t step
+
     cdef cppclass CSplitOptions \
             "arrow::compute::SplitOptions"(CFunctionOptions):
         CSplitOptions(int64_t max_splits, c_bool reverse)
@@ -1810,11 +1862,20 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         c_bool reverse
 
     cdef cppclass CSplitPatternOptions \
-            "arrow::compute::SplitPatternOptions"(CSplitOptions):
+            "arrow::compute::SplitPatternOptions"(CFunctionOptions):
         CSplitPatternOptions(c_string pattern, int64_t max_splits,
                              c_bool reverse)
+        int64_t max_splits
+        c_bool reverse
         c_string pattern
 
+    cdef cppclass CReplaceSliceOptions \
+            "arrow::compute::ReplaceSliceOptions"(CFunctionOptions):
+        CReplaceSliceOptions(int64_t start, int64_t stop, c_string replacement)
+        int64_t start
+        int64_t stop
+        c_string replacement
+
     cdef cppclass CReplaceSubstringOptions \
             "arrow::compute::ReplaceSubstringOptions"(CFunctionOptions):
         CReplaceSubstringOptions(c_string pattern, c_string replacement,
@@ -1823,6 +1884,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         c_string replacement
         int64_t max_replacements
 
+    cdef cppclass CExtractRegexOptions \
+            "arrow::compute::ExtractRegexOptions"(CFunctionOptions):
+        CExtractRegexOptions(c_string pattern)
+        c_string pattern
+
     cdef cppclass CCastOptions" arrow::compute::CastOptions"(CFunctionOptions):
         CCastOptions()
         CCastOptions(c_bool safe)
@@ -1876,48 +1942,40 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
             "arrow::compute::StrptimeOptions"(CFunctionOptions):
         CStrptimeOptions(c_string format, TimeUnit unit)
 
+    cdef cppclass CDayOfWeekOptions \
+            "arrow::compute::DayOfWeekOptions"(CFunctionOptions):
+        CDayOfWeekOptions(c_bool one_based_numbering, uint32_t week_start)
+        c_bool one_based_numbering
+        uint32_t week_start
+
     cdef cppclass CVarianceOptions \
             "arrow::compute::VarianceOptions"(CFunctionOptions):
         CVarianceOptions(int ddof)
         int ddof
 
-    enum CMinMaxMode \
-            "arrow::compute::MinMaxOptions::Mode":
-        CMinMaxMode_SKIP \
-            "arrow::compute::MinMaxOptions::SKIP"
-        CMinMaxMode_EMIT_NULL \
-            "arrow::compute::MinMaxOptions::EMIT_NULL"
-
-    cdef cppclass CMinMaxOptions \
-            "arrow::compute::MinMaxOptions"(CFunctionOptions):
-        CMinMaxOptions(CMinMaxMode null_handling)
-        CMinMaxMode null_handling
+    cdef cppclass CScalarAggregateOptions \
+            "arrow::compute::ScalarAggregateOptions"(CFunctionOptions):
+        CScalarAggregateOptions(c_bool skip_nulls, uint32_t min_count)
+        c_bool skip_nulls
+        uint32_t min_count
 
     cdef cppclass CModeOptions \
             "arrow::compute::ModeOptions"(CFunctionOptions):
         CModeOptions(int64_t n)
         int64_t n
 
-    enum CCountMode \
-            "arrow::compute::CountOptions::Mode":
-        CCountMode_COUNT_NON_NULL \
-            "arrow::compute::CountOptions::COUNT_NON_NULL"
-        CCountMode_COUNT_NULL \
-            "arrow::compute::CountOptions::COUNT_NULL"
-
-    cdef cppclass CCountOptions \
-            "arrow::compute::CountOptions"(CFunctionOptions):
-        CCountOptions(CCountMode count_mode)
-        CCountMode count_mode
+    cdef cppclass CIndexOptions \
+            "arrow::compute::IndexOptions"(CFunctionOptions):
+        CIndexOptions(shared_ptr[CScalar] value)
 
     cdef cppclass CPartitionNthOptions \
             "arrow::compute::PartitionNthOptions"(CFunctionOptions):
         CPartitionNthOptions(int64_t pivot)
         int64_t pivot
 
-    cdef cppclass CProjectOptions \
-            "arrow::compute::ProjectOptions"(CFunctionOptions):
-        CProjectOptions(vector[c_string] field_names)
+    cdef cppclass CMakeStructOptions \
+            "arrow::compute::MakeStructOptions"(CFunctionOptions):
+        CMakeStructOptions(vector[c_string] field_names)
         vector[c_string] field_names
 
     ctypedef enum CSortOrder" arrow::compute::SortOrder":
@@ -1996,6 +2054,25 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         c_bool skip_nulls
 
 
+cdef extern from * namespace "arrow::compute":
+    # inlined from compute/function_internal.h to avoid exposing
+    # implementation details
+    """
+    #include "arrow/compute/function.h"
+    namespace arrow {
+    namespace compute {
+    namespace internal {
+    Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(
+        const Buffer& buffer);
+    } //  namespace internal
+    } //  namespace compute
+    } //  namespace arrow
+    """
+    CResult[unique_ptr[CFunctionOptions]] DeserializeFunctionOptions\
+        " arrow::compute::internal::DeserializeFunctionOptions"(
+            const CBuffer& buffer)
+
+
 cdef extern from "arrow/python/api.h" namespace "arrow::py":
     # Requires GIL
     CResult[shared_ptr[CDataType]] InferArrowType(
@@ -2143,6 +2220,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         c_bool safe_cast
         c_bool split_blocks
         c_bool self_destruct
+        c_bool decode_dictionaries
         unordered_set[c_string] categorical_columns
         unordered_set[c_string] extension_columns
 
@@ -2276,6 +2354,23 @@ cdef extern from 'arrow/util/compression.h' namespace 'arrow' nogil:
         @staticmethod
         CResult[unique_ptr[CCodec]] Create(CCompressionType codec)
 
+        @staticmethod
+        CResult[unique_ptr[CCodec]] CreateWithLevel" Create"(
+            CCompressionType codec,
+            int compression_level)
+
+        @staticmethod
+        c_bool SupportsCompressionLevel(CCompressionType codec)
+
+        @staticmethod
+        CResult[int] MinimumCompressionLevel(CCompressionType codec)
+
+        @staticmethod
+        CResult[int] MaximumCompressionLevel(CCompressionType codec)
+
+        @staticmethod
+        CResult[int] DefaultCompressionLevel(CCompressionType codec)
+
         @staticmethod
         c_bool IsAvailable(CCompressionType codec)
 
@@ -2286,6 +2381,7 @@ cdef extern from 'arrow/util/compression.h' namespace 'arrow' nogil:
                                   int64_t output_buffer_len,
                                   uint8_t* output_buffer)
         c_string name() const
+        int compression_level() const
         int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input)
 
 
@@ -2333,6 +2429,9 @@ cdef extern from 'arrow/c/bridge.h' namespace 'arrow' nogil:
     CStatus ExportType(CDataType&, ArrowSchema* out)
     CResult[shared_ptr[CDataType]] ImportType(ArrowSchema*)
 
+    CStatus ExportField(CField&, ArrowSchema* out)
+    CResult[shared_ptr[CField]] ImportField(ArrowSchema*)
+
     CStatus ExportSchema(CSchema&, ArrowSchema* out)
     CResult[shared_ptr[CSchema]] ImportSchema(ArrowSchema*)
 
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 06ec69c8b80..51ae9881f92 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -32,33 +32,67 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         pass
 
 
-cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
-
-    cdef cppclass CExpression "arrow::dataset::Expression":
+cdef extern from * namespace "arrow::compute":
+    # inlined from expression_internal.h to avoid
+    # proliferation of #include <unordered_map>
+    """
+    #include <unordered_map>
+
+    #include "arrow/type.h"
+    #include "arrow/datum.h"
+
+    namespace arrow {
+    namespace compute {
+    struct KnownFieldValues {
+      std::unordered_map<FieldRef, Datum, FieldRef::Hash> map;
+    };
+    } //  namespace compute
+    } //  namespace arrow
+    """
+    cdef struct CKnownFieldValues "arrow::compute::KnownFieldValues":
+        unordered_map[CFieldRef, CDatum, CFieldRefHash] map
+
+cdef extern from "arrow/compute/exec/expression.h" \
+        namespace "arrow::compute" nogil:
+
+    cdef cppclass CExpression "arrow::compute::Expression":
         c_bool Equals(const CExpression& other) const
         c_string ToString() const
         CResult[CExpression] Bind(const CSchema&)
 
     cdef CExpression CMakeScalarExpression \
-        "arrow::dataset::literal"(shared_ptr[CScalar] value)
+        "arrow::compute::literal"(shared_ptr[CScalar] value)
 
     cdef CExpression CMakeFieldExpression \
-        "arrow::dataset::field_ref"(c_string name)
+        "arrow::compute::field_ref"(c_string name)
 
     cdef CExpression CMakeCallExpression \
-        "arrow::dataset::call"(c_string function,
+        "arrow::compute::call"(c_string function,
                                vector[CExpression] arguments,
                                shared_ptr[CFunctionOptions] options)
 
     cdef CResult[shared_ptr[CBuffer]] CSerializeExpression \
-        "arrow::dataset::Serialize"(const CExpression&)
+        "arrow::compute::Serialize"(const CExpression&)
+
     cdef CResult[CExpression] CDeserializeExpression \
-        "arrow::dataset::Deserialize"(shared_ptr[CBuffer])
+        "arrow::compute::Deserialize"(shared_ptr[CBuffer])
+
+    cdef CResult[CKnownFieldValues] \
+        CExtractKnownFieldValues "arrow::compute::ExtractKnownFieldValues"(
+            const CExpression& partition_expression)
+
+ctypedef CStatus cb_writer_finish_internal(CFileWriter*)
+ctypedef void cb_writer_finish(dict, CFileWriter*)
+
+cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
 
     cdef cppclass CScanOptions "arrow::dataset::ScanOptions":
         @staticmethod
         shared_ptr[CScanOptions] Make(shared_ptr[CSchema] schema)
 
+        shared_ptr[CSchema] dataset_schema
+        shared_ptr[CSchema] projected_schema
+
     cdef cppclass CFragmentScanOptions "arrow::dataset::FragmentScanOptions":
         c_string type_name() const
 
@@ -86,12 +120,24 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CInMemoryFragment(vector[shared_ptr[CRecordBatch]] record_batches,
                           CExpression partition_expression)
 
+    cdef cppclass CTaggedRecordBatch "arrow::dataset::TaggedRecordBatch":
+        shared_ptr[CRecordBatch] record_batch
+        shared_ptr[CFragment] fragment
+
+    ctypedef CIterator[CTaggedRecordBatch] CTaggedRecordBatchIterator \
+        "arrow::dataset::TaggedRecordBatchIterator"
+
     cdef cppclass CScanner "arrow::dataset::Scanner":
         CScanner(shared_ptr[CDataset], shared_ptr[CScanOptions])
         CScanner(shared_ptr[CFragment], shared_ptr[CScanOptions])
         CResult[CScanTaskIterator] Scan()
+        CResult[CTaggedRecordBatchIterator] ScanBatches()
         CResult[shared_ptr[CTable]] ToTable()
+        CResult[shared_ptr[CTable]] TakeRows(const CArray& indices)
+        CResult[shared_ptr[CTable]] Head(int64_t num_rows)
+        CResult[int64_t] CountRows()
         CResult[CFragmentIterator] GetFragments()
+        CResult[shared_ptr[CRecordBatchReader]] ToRecordBatchReader()
         const shared_ptr[CScanOptions]& options()
 
     cdef cppclass CScannerBuilder "arrow::dataset::ScannerBuilder":
@@ -99,10 +145,15 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
                         shared_ptr[CScanOptions] scan_options)
         CScannerBuilder(shared_ptr[CSchema], shared_ptr[CFragment],
                         shared_ptr[CScanOptions] scan_options)
+
+        @staticmethod
+        shared_ptr[CScannerBuilder] FromRecordBatchReader(
+            shared_ptr[CRecordBatchReader] reader)
         CStatus ProjectColumns "Project"(const vector[c_string]& columns)
         CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns)
         CStatus Filter(CExpression filter)
         CStatus UseThreads(c_bool use_threads)
+        CStatus UseAsync(c_bool use_async)
         CStatus Pool(CMemoryPool* pool)
         CStatus BatchSize(int64_t batch_size)
         CStatus FragmentScanOptions(
@@ -174,6 +225,17 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         const shared_ptr[CFileFormat]& format() const
         c_string type_name() const
 
+    cdef cppclass CFileWriter \
+            "arrow::dataset::FileWriter":
+        const shared_ptr[CFileFormat]& format() const
+        const shared_ptr[CSchema]& schema() const
+        const shared_ptr[CFileWriteOptions]& options() const
+        const CFileLocator& destination() const
+
+    cdef cppclass CParquetFileWriter \
+            "arrow::dataset::ParquetFileWriter"(CFileWriter):
+        const shared_ptr[FileWriter]& parquet_writer() const
+
     cdef cppclass CFileFormat "arrow::dataset::FileFormat":
         shared_ptr[CFragmentScanOptions] default_fragment_scan_options
         c_string type_name() const
@@ -214,6 +276,8 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CPartitioning] partitioning
         int max_partitions
         c_string basename_template
+        function[cb_writer_finish_internal] writer_pre_finish
+        function[cb_writer_finish_internal] writer_post_finish
 
     cdef cppclass CFileSystemDataset \
             "arrow::dataset::FileSystemDataset"(CDataset):
@@ -234,10 +298,12 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         vector[c_string] files()
         const shared_ptr[CFileFormat]& format() const
         const shared_ptr[CFileSystem]& filesystem() const
+        const shared_ptr[CPartitioning]& partitioning() const
 
     cdef cppclass CParquetFileFormatReaderOptions \
             "arrow::dataset::ParquetFileFormat::ReaderOptions":
         unordered_set[c_string] dict_columns
+        TimeUnit coerce_int96_timestamp_unit
 
     cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
             CFileFormat):
@@ -262,6 +328,11 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
             CFileFormat):
         pass
 
+    cdef cppclass CCsvFileWriteOptions \
+            "arrow::dataset::CsvFileWriteOptions"(CFileWriteOptions):
+        shared_ptr[CCSVWriteOptions] write_options
+        CMemoryPool* pool
+
     cdef cppclass CCsvFileFormat "arrow::dataset::CsvFileFormat"(
             CFileFormat):
         CCSVParseOptions parse_options
@@ -276,16 +347,35 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CResult[CExpression] Parse(const c_string & path) const
         const shared_ptr[CSchema] & schema()
 
+    cdef cppclass CSegmentEncoding" arrow::dataset::SegmentEncoding":
+        pass
+
+    CSegmentEncoding CSegmentEncodingNone\
+        " arrow::dataset::SegmentEncoding::None"
+    CSegmentEncoding CSegmentEncodingUri\
+        " arrow::dataset::SegmentEncoding::Uri"
+
+    cdef cppclass CKeyValuePartitioningOptions \
+            "arrow::dataset::KeyValuePartitioningOptions":
+        CSegmentEncoding segment_encoding
+
+    cdef cppclass CHivePartitioningOptions \
+            "arrow::dataset::HivePartitioningOptions":
+        CSegmentEncoding segment_encoding
+        c_string null_fallback
+
     cdef cppclass CPartitioningFactoryOptions \
             "arrow::dataset::PartitioningFactoryOptions":
         c_bool infer_dictionary
         shared_ptr[CSchema] schema
+        CSegmentEncoding segment_encoding
 
     cdef cppclass CHivePartitioningFactoryOptions \
             "arrow::dataset::HivePartitioningFactoryOptions":
-        c_bool infer_dictionary,
+        c_bool infer_dictionary
         c_string null_fallback
         shared_ptr[CSchema] schema
+        CSegmentEncoding segment_encoding
 
     cdef cppclass CPartitioningFactory "arrow::dataset::PartitioningFactory":
         pass
@@ -299,15 +389,20 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CPartitioningFactory] MakeFactory(
             vector[c_string] field_names, CPartitioningFactoryOptions)
 
+        vector[shared_ptr[CArray]] dictionaries() const
+
     cdef cppclass CHivePartitioning \
             "arrow::dataset::HivePartitioning"(CPartitioning):
         CHivePartitioning(shared_ptr[CSchema] schema,
-                          vector[shared_ptr[CArray]] dictionaries)
+                          vector[shared_ptr[CArray]] dictionaries,
+                          CHivePartitioningOptions options)
 
         @staticmethod
         shared_ptr[CPartitioningFactory] MakeFactory(
             CHivePartitioningFactoryOptions)
 
+        vector[shared_ptr[CArray]] dictionaries() const
+
     cdef cppclass CPartitioningOrFactory \
             "arrow::dataset::PartitioningOrFactory":
         CPartitioningOrFactory(shared_ptr[CPartitioning])
@@ -318,10 +413,6 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CPartitioning] partitioning() const
         shared_ptr[CPartitioningFactory] factory() const
 
-    cdef CResult[unordered_map[CFieldRef, CDatum, CFieldRefHash]] \
-        CExtractKnownFieldValues "arrow::dataset::ExtractKnownFieldValues"(
-            const CExpression& partition_expression)
-
     cdef cppclass CFileSystemFactoryOptions \
             "arrow::dataset::FileSystemFactoryOptions":
         CPartitioningOrFactory partitioning
diff --git a/python/pyarrow/includes/libarrow_feather.pxd b/python/pyarrow/includes/libarrow_feather.pxd
new file mode 100644
index 00000000000..ddfc8b2e53a
--- /dev/null
+++ b/python/pyarrow/includes/libarrow_feather.pxd
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.libarrow cimport (CCompressionType, CStatus, CTable,
+                                        COutputStream, CResult, shared_ptr,
+                                        vector, CRandomAccessFile, CSchema,
+                                        c_string)
+
+
+cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
+    int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
+    int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
+
+    cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
+        int version
+        int chunksize
+        CCompressionType compression
+        int compression_level
+
+    CStatus WriteFeather" arrow::ipc::feather::WriteTable" \
+        (const CTable& table, COutputStream* out,
+         CFeatherProperties properties)
+
+    cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
+        @staticmethod
+        CResult[shared_ptr[CFeatherReader]] Open(
+            const shared_ptr[CRandomAccessFile]& file)
+        int version()
+        shared_ptr[CSchema] schema()
+
+        CStatus Read(shared_ptr[CTable]* out)
+        CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
+        CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)
diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd
index 161a8041c31..2ac737abaa0 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -151,6 +151,10 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
         CStatus Next(CFlightStreamChunk* out)
         CStatus ReadAll(shared_ptr[CTable]* table)
 
+    CResult[shared_ptr[CRecordBatchReader]] MakeRecordBatchReader\
+        " arrow::flight::MakeRecordBatchReader"(
+            shared_ptr[CMetadataRecordBatchReader])
+
     cdef cppclass CMetadataRecordBatchWriter \
             " arrow::flight::MetadataRecordBatchWriter"(CRecordBatchWriter):
         CStatus Begin(shared_ptr[CSchema] schema,
@@ -162,6 +166,8 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
     cdef cppclass CFlightStreamReader \
             " arrow::flight::FlightStreamReader"(CMetadataRecordBatchReader):
         void Cancel()
+        CStatus ReadAllWithStopToken" ReadAll"\
+            (shared_ptr[CTable]* table, const CStopToken& stop_token)
 
     cdef cppclass CFlightMessageReader \
             " arrow::flight::FlightMessageReader"(CMetadataRecordBatchReader):
@@ -207,6 +213,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
     cdef cppclass CServerCallContext" arrow::flight::ServerCallContext":
         c_string& peer_identity()
         c_string& peer()
+        c_bool is_cancelled()
         CServerMiddleware* GetMiddleware(const c_string& key)
 
     cdef cppclass CTimeoutDuration" arrow::flight::TimeoutDuration":
@@ -217,6 +224,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
         CTimeoutDuration timeout
         CIpcWriteOptions write_options
         vector[pair[c_string, c_string]] headers
+        CStopToken stop_token
 
     cdef cppclass CCertKeyPair" arrow::flight::CertKeyPair":
         CCertKeyPair()
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 35d630d85da..eef3757bff0 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -52,6 +52,10 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_bool allow_not_found
         c_bool recursive
 
+    cdef cppclass CFileLocator "arrow::fs::FileLocator":
+        shared_ptr[CFileSystem] filesystem
+        c_string path
+
     cdef cppclass CFileSystem "arrow::fs::FileSystem":
         shared_ptr[CFileSystem] shared_from_this()
         c_string type_name() const
@@ -73,9 +77,9 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         CResult[shared_ptr[CRandomAccessFile]] OpenInputFile(
             const c_string& path)
         CResult[shared_ptr[COutputStream]] OpenOutputStream(
-            const c_string& path)
+            const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
         CResult[shared_ptr[COutputStream]] OpenAppendStream(
-            const c_string& path)
+            const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
         c_bool Equals(const CFileSystem& other)
         c_bool Equals(shared_ptr[CFileSystem] other)
 
@@ -125,15 +129,38 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
     cdef struct CS3GlobalOptions "arrow::fs::S3GlobalOptions":
         CS3LogLevel log_level
 
+    cdef cppclass CS3ProxyOptions "arrow::fs::S3ProxyOptions":
+        c_string scheme
+        c_string host
+        int port
+        c_string username
+        c_string password
+        c_bool Equals(const CS3ProxyOptions& other)
+
+        @staticmethod
+        CResult[CS3ProxyOptions] FromUriString "FromUri"(
+            const c_string& uri_string)
+
+    ctypedef enum CS3CredentialsKind "arrow::fs::S3CredentialsKind":
+        CS3CredentialsKind_Anonymous "arrow::fs::S3CredentialsKind::Anonymous"
+        CS3CredentialsKind_Default "arrow::fs::S3CredentialsKind::Default"
+        CS3CredentialsKind_Explicit "arrow::fs::S3CredentialsKind::Explicit"
+        CS3CredentialsKind_Role "arrow::fs::S3CredentialsKind::Role"
+        CS3CredentialsKind_WebIdentity \
+            "arrow::fs::S3CredentialsKind::WebIdentity"
+
     cdef cppclass CS3Options "arrow::fs::S3Options":
         c_string region
         c_string endpoint_override
         c_string scheme
         c_bool background_writes
+        shared_ptr[const CKeyValueMetadata] default_metadata
         c_string role_arn
         c_string session_name
         c_string external_id
         int load_frequency
+        CS3ProxyOptions proxy_options
+        CS3CredentialsKind credentials_kind
         void ConfigureDefaultCredentials()
         void ConfigureAccessKey(const c_string& access_key,
                                 const c_string& secret_key,
@@ -221,8 +248,9 @@ ctypedef void CallbackOpenInputStream(object, const c_string&,
                                       shared_ptr[CInputStream]*)
 ctypedef void CallbackOpenInputFile(object, const c_string&,
                                     shared_ptr[CRandomAccessFile]*)
-ctypedef void CallbackOpenOutputStream(object, const c_string&,
-                                       shared_ptr[COutputStream]*)
+ctypedef void CallbackOpenOutputStream(
+    object, const c_string&, const shared_ptr[const CKeyValueMetadata]&,
+    shared_ptr[COutputStream]*)
 ctypedef void CallbackNormalizePath(object, const c_string&, c_string*)
 
 cdef extern from "arrow/python/filesystem.h" namespace "arrow::py::fs" nogil:
diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd
index 12d1cb38b82..c75977d37e8 100644
--- a/python/pyarrow/includes/libgandiva.pxd
+++ b/python/pyarrow/includes/libgandiva.pxd
@@ -24,13 +24,16 @@ from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 
-cdef extern from "gandiva/gandiva_aliases.h" namespace "gandiva" nogil:
+cdef extern from "gandiva/node.h" namespace "gandiva" nogil:
 
     cdef cppclass CNode" gandiva::Node":
-        pass
+        c_string ToString()
+        shared_ptr[CDataType] return_type()
 
     cdef cppclass CExpression" gandiva::Expression":
-        pass
+        c_string ToString()
+        shared_ptr[CNode] root()
+        shared_ptr[CField] result()
 
     ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector"
 
@@ -95,7 +98,9 @@ cdef inline str _selection_mode_name(CSelectionVector_Mode ctype):
 cdef extern from "gandiva/condition.h" namespace "gandiva" nogil:
 
     cdef cppclass CCondition" gandiva::Condition":
-        pass
+        c_string ToString()
+        shared_ptr[CNode] root()
+        shared_ptr[CField] result()
 
 cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil:
 
diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/io-hdfs.pxi
deleted file mode 100644
index 2cdb1b7bfda..00000000000
--- a/python/pyarrow/io-hdfs.pxi
+++ /dev/null
@@ -1,470 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# ----------------------------------------------------------------------
-# HDFS IO implementation
-
-from queue import Queue, Empty as QueueEmpty, Full as QueueFull
-
-
-_HDFS_PATH_RE = re.compile(r'hdfs://(.*):(\d+)(.*)')
-
-
-def have_libhdfs():
-    try:
-        with nogil:
-            check_status(HaveLibHdfs())
-        return True
-    except Exception:
-        return False
-
-
-def strip_hdfs_abspath(path):
-    m = _HDFS_PATH_RE.match(path)
-    if m:
-        return m.group(3)
-    else:
-        return path
-
-
-cdef class HadoopFileSystem(_Weakrefable):
-    cdef:
-        shared_ptr[CIOHadoopFileSystem] client
-
-    cdef readonly:
-        bint is_open
-        object host
-        object user
-        object kerb_ticket
-        int port
-        dict extra_conf
-
-    def _connect(self, host, port, user, kerb_ticket, extra_conf):
-        cdef HdfsConnectionConfig conf
-
-        if host is not None:
-            conf.host = tobytes(host)
-        self.host = host
-
-        conf.port = port
-        self.port = port
-
-        if user is not None:
-            conf.user = tobytes(user)
-        self.user = user
-
-        if kerb_ticket is not None:
-            conf.kerb_ticket = tobytes(kerb_ticket)
-        self.kerb_ticket = kerb_ticket
-
-        with nogil:
-            check_status(HaveLibHdfs())
-
-        if extra_conf is not None and isinstance(extra_conf, dict):
-            conf.extra_conf = {tobytes(k): tobytes(v)
-                               for k, v in extra_conf.items()}
-        self.extra_conf = extra_conf
-
-        with nogil:
-            check_status(CIOHadoopFileSystem.Connect(&conf, &self.client))
-        self.is_open = True
-
-    @classmethod
-    def connect(cls, *args, **kwargs):
-        return cls(*args, **kwargs)
-
-    def __dealloc__(self):
-        if self.is_open:
-            self.close()
-
-    def close(self):
-        """
-        Disconnect from the HDFS cluster
-        """
-        self._ensure_client()
-        with nogil:
-            check_status(self.client.get().Disconnect())
-        self.is_open = False
-
-    cdef _ensure_client(self):
-        if self.client.get() == NULL:
-            raise IOError('HDFS client improperly initialized')
-        elif not self.is_open:
-            raise IOError('HDFS client is closed')
-
-    def exists(self, path):
-        """
-        Returns True if the path is known to the cluster, False if it does not
-        (or there is an RPC error)
-        """
-        self._ensure_client()
-
-        cdef c_string c_path = tobytes(path)
-        cdef c_bool result
-        with nogil:
-            result = self.client.get().Exists(c_path)
-        return result
-
-    def isdir(self, path):
-        cdef HdfsPathInfo info
-        try:
-            self._path_info(path, &info)
-        except ArrowIOError:
-            return False
-        return info.kind == ObjectType_DIRECTORY
-
-    def isfile(self, path):
-        cdef HdfsPathInfo info
-        try:
-            self._path_info(path, &info)
-        except ArrowIOError:
-            return False
-        return info.kind == ObjectType_FILE
-
-    def get_capacity(self):
-        """
-        Get reported total capacity of file system
-
-        Returns
-        -------
-        capacity : int
-        """
-        cdef int64_t capacity = 0
-        with nogil:
-            check_status(self.client.get().GetCapacity(&capacity))
-        return capacity
-
-    def get_space_used(self):
-        """
-        Get space used on file system
-
-        Returns
-        -------
-        space_used : int
-        """
-        cdef int64_t space_used = 0
-        with nogil:
-            check_status(self.client.get().GetUsed(&space_used))
-        return space_used
-
-    def df(self):
-        """
-        Return free space on disk, like the UNIX df command
-
-        Returns
-        -------
-        space : int
-        """
-        return self.get_capacity() - self.get_space_used()
-
-    def rename(self, path, new_path):
-        cdef c_string c_path = tobytes(path)
-        cdef c_string c_new_path = tobytes(new_path)
-        with nogil:
-            check_status(self.client.get().Rename(c_path, c_new_path))
-
-    def info(self, path):
-        """
-        Return detailed HDFS information for path
-
-        Parameters
-        ----------
-        path : string
-            Path to file or directory
-
-        Returns
-        -------
-        path_info : dict
-        """
-        cdef HdfsPathInfo info
-        self._path_info(path, &info)
-        return {
-            'path': frombytes(info.name),
-            'owner': frombytes(info.owner),
-            'group': frombytes(info.group),
-            'size': info.size,
-            'block_size': info.block_size,
-            'last_modified': info.last_modified_time,
-            'last_accessed': info.last_access_time,
-            'replication': info.replication,
-            'permissions': info.permissions,
-            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
-                     else 'file')
-        }
-
-    def stat(self, path):
-        """
-        Return basic file system statistics about path
-
-        Parameters
-        ----------
-        path : string
-            Path to file or directory
-
-        Returns
-        -------
-        stat : dict
-        """
-        cdef FileStatistics info
-        cdef c_string c_path = tobytes(path)
-        with nogil:
-            check_status(self.client.get()
-                         .Stat(c_path, &info))
-        return {
-            'size': info.size,
-            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
-                     else 'file')
-        }
-
-    cdef _path_info(self, path, HdfsPathInfo* info):
-        cdef c_string c_path = tobytes(path)
-
-        with nogil:
-            check_status(self.client.get()
-                         .GetPathInfo(c_path, info))
-
-    def ls(self, path, bint full_info):
-        cdef:
-            c_string c_path = tobytes(path)
-            vector[HdfsPathInfo] listing
-            list results = []
-            int i
-
-        self._ensure_client()
-
-        with nogil:
-            check_status(self.client.get()
-                         .ListDirectory(c_path, &listing))
-
-        cdef const HdfsPathInfo* info
-        for i in range(<int> listing.size()):
-            info = &listing[i]
-
-            # Try to trim off the hdfs://HOST:PORT piece
-            name = strip_hdfs_abspath(frombytes(info.name))
-
-            if full_info:
-                kind = ('file' if info.kind == ObjectType_FILE
-                        else 'directory')
-
-                results.append({
-                    'kind': kind,
-                    'name': name,
-                    'owner': frombytes(info.owner),
-                    'group': frombytes(info.group),
-                    'last_modified_time': info.last_modified_time,
-                    'last_access_time': info.last_access_time,
-                    'size': info.size,
-                    'replication': info.replication,
-                    'block_size': info.block_size,
-                    'permissions': info.permissions
-                })
-            else:
-                results.append(name)
-
-        return results
-
-    def chmod(self, path, mode):
-        """
-        Change file permissions
-
-        Parameters
-        ----------
-        path : string
-            absolute path to file or directory
-        mode : int
-            POSIX-like bitmask
-        """
-        self._ensure_client()
-        cdef c_string c_path = tobytes(path)
-        cdef int c_mode = mode
-        with nogil:
-            check_status(self.client.get()
-                         .Chmod(c_path, c_mode))
-
-    def chown(self, path, owner=None, group=None):
-        """
-        Change file permissions
-
-        Parameters
-        ----------
-        path : string
-            absolute path to file or directory
-        owner : string, default None
-            New owner, None for no change
-        group : string, default None
-            New group, None for no change
-        """
-        cdef:
-            c_string c_path
-            c_string c_owner
-            c_string c_group
-            const char* c_owner_ptr = NULL
-            const char* c_group_ptr = NULL
-
-        self._ensure_client()
-
-        c_path = tobytes(path)
-        if owner is not None:
-            c_owner = tobytes(owner)
-            c_owner_ptr = c_owner.c_str()
-
-        if group is not None:
-            c_group = tobytes(group)
-            c_group_ptr = c_group.c_str()
-
-        with nogil:
-            check_status(self.client.get()
-                         .Chown(c_path, c_owner_ptr, c_group_ptr))
-
-    def mkdir(self, path):
-        """
-        Create indicated directory and any necessary parent directories
-        """
-        self._ensure_client()
-        cdef c_string c_path = tobytes(path)
-        with nogil:
-            check_status(self.client.get()
-                         .MakeDirectory(c_path))
-
-    def delete(self, path, bint recursive=False):
-        """
-        Delete the indicated file or directory
-
-        Parameters
-        ----------
-        path : string
-        recursive : boolean, default False
-            If True, also delete child paths for directories
-        """
-        self._ensure_client()
-
-        cdef c_string c_path = tobytes(path)
-        with nogil:
-            check_status(self.client.get()
-                         .Delete(c_path, recursive == 1))
-
-    def open(self, path, mode='rb', buffer_size=None, replication=None,
-             default_block_size=None):
-        """
-        Open HDFS file for reading or writing
-
-        Parameters
-        ----------
-        mode : string
-            Must be one of 'rb', 'wb', 'ab'
-
-        Returns
-        -------
-        handle : HdfsFile
-        """
-        self._ensure_client()
-
-        cdef HdfsFile out = HdfsFile()
-
-        if mode not in ('rb', 'wb', 'ab'):
-            raise Exception("Mode must be 'rb' (read), "
-                            "'wb' (write, new file), or 'ab' (append)")
-
-        cdef c_string c_path = tobytes(path)
-        cdef c_bool append = False
-
-        # 0 in libhdfs means "use the default"
-        cdef int32_t c_buffer_size = buffer_size or 0
-        cdef int16_t c_replication = replication or 0
-        cdef int64_t c_default_block_size = default_block_size or 0
-
-        cdef shared_ptr[HdfsOutputStream] wr_handle
-        cdef shared_ptr[HdfsReadableFile] rd_handle
-
-        if mode in ('wb', 'ab'):
-            if mode == 'ab':
-                append = True
-
-            with nogil:
-                check_status(
-                    self.client.get()
-                    .OpenWritable(c_path, append, c_buffer_size,
-                                  c_replication, c_default_block_size,
-                                  &wr_handle))
-
-            out.set_output_stream(<shared_ptr[COutputStream]> wr_handle)
-            out.is_writable = True
-        else:
-            with nogil:
-                check_status(self.client.get()
-                             .OpenReadable(c_path, &rd_handle))
-
-            out.set_random_access_file(
-                <shared_ptr[CRandomAccessFile]> rd_handle)
-            out.is_readable = True
-
-        assert not out.closed
-
-        if c_buffer_size == 0:
-            c_buffer_size = 2 ** 16
-
-        out.mode = mode
-        out.buffer_size = c_buffer_size
-        out.parent = _HdfsFileNanny(self, out)
-        out.own_file = True
-
-        return out
-
-    def download(self, path, stream, buffer_size=None):
-        with self.open(path, 'rb') as f:
-            f.download(stream, buffer_size=buffer_size)
-
-    def upload(self, path, stream, buffer_size=None):
-        """
-        Upload file-like object to HDFS path
-        """
-        with self.open(path, 'wb') as f:
-            f.upload(stream, buffer_size=buffer_size)
-
-
-# ARROW-404: Helper class to ensure that files are closed before the
-# client. During deallocation of the extension class, the attributes are
-# decref'd which can cause the client to get closed first if the file has the
-# last remaining reference
-cdef class _HdfsFileNanny(_Weakrefable):
-    cdef:
-        object client
-        object file_handle_ref
-
-    def __cinit__(self, client, file_handle):
-        import weakref
-        self.client = client
-        self.file_handle_ref = weakref.ref(file_handle)
-
-    def __dealloc__(self):
-        fh = self.file_handle_ref()
-        if fh:
-            fh.close()
-        # avoid cyclic GC
-        self.file_handle_ref = None
-        self.client = None
-
-
-cdef class HdfsFile(NativeFile):
-    cdef readonly:
-        int32_t buffer_size
-        object mode
-        object parent
-
-    def __dealloc__(self):
-        self.parent = None
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 3fc098478d6..7d7cb1afb00 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -27,6 +27,7 @@ import threading
 import time
 import warnings
 from io import BufferedIOBase, IOBase, TextIOBase, UnsupportedOperation
+from queue import Queue, Empty as QueueEmpty
 
 from pyarrow.util import _is_path_like, _stringify_path
 
@@ -41,6 +42,46 @@ cdef extern from "Python.h":
         char *v, Py_ssize_t len) except NULL
 
 
+def io_thread_count():
+    """
+    Return the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool. The number of threads is set to a fixed value at
+    startup. It can be modified at runtime by calling
+    :func:`set_io_thread_count()`.
+
+    See Also
+    --------
+    set_io_thread_count : Modify the size of this pool.
+    cpu_count : The analogous function for the CPU thread pool.
+    """
+    return GetIOThreadPoolCapacity()
+
+
+def set_io_thread_count(int count):
+    """
+    Set the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool.
+
+    Parameters
+    ----------
+    count : int
+        The max number of threads that may be used for I/O.
+        Must be positive.
+
+    See Also
+    --------
+    io_thread_count : Get the size of this pool.
+    set_cpu_count : The analogous function for the CPU thread pool.
+    """
+    if count < 1:
+        raise ValueError("IO thread count must be strictly positive")
+    check_status(SetIOThreadPoolCapacity(count))
+
+
 cdef class NativeFile(_Weakrefable):
     """
     The base class for all Arrow streams.
@@ -189,6 +230,24 @@ cdef class NativeFile(_Weakrefable):
 
         return size
 
+    def metadata(self):
+        """
+        Return file metadata
+        """
+        cdef:
+            shared_ptr[const CKeyValueMetadata] c_metadata
+
+        handle = self.get_input_stream()
+        with nogil:
+            c_metadata = GetResultValue(handle.get().ReadMetadata())
+
+        metadata = {}
+        if c_metadata.get() != nullptr:
+            for i in range(c_metadata.get().size()):
+                metadata[frombytes(c_metadata.get().key(i))] = \
+                    c_metadata.get().value(i)
+        return metadata
+
     def tell(self):
         """
         Return current stream position
@@ -1223,21 +1282,20 @@ cdef class CompressedOutputStream(NativeFile):
 
     Parameters
     ----------
-    stream : pa.NativeFile
+    stream : string, path, pa.NativeFile, or file-like object
         Input stream object to wrap with the compression.
     compression : str
         The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
     """
 
-    def __init__(self, NativeFile stream, str compression not None):
+    def __init__(self, object stream, str compression not None):
         cdef:
             Codec codec = Codec(compression)
+            shared_ptr[COutputStream] c_writer
             shared_ptr[CCompressedOutputStream] compressed_stream
+        get_writer(stream, &c_writer)
         compressed_stream = GetResultValue(
-            CCompressedOutputStream.Make(
-                codec.unwrap(),
-                stream.get_output_stream()
-            )
+            CCompressedOutputStream.Make(codec.unwrap(), c_writer)
         )
         self.set_output_stream(<shared_ptr[COutputStream]> compressed_stream)
         self.is_writable = True
@@ -1567,6 +1625,40 @@ cdef class Codec(_Weakrefable):
         Type of compression codec to initialize, valid values are: 'gzip',
         'bz2', 'brotli', 'lz4' (or 'lz4_frame'), 'lz4_raw', 'zstd' and
         'snappy'.
+    compression_level: int, None
+        Optional parameter specifying how aggressively to compress.  The
+        possible ranges and effect of this parameter depend on the specific
+        codec chosen.  Higher values compress more but typically use more
+        resources (CPU/RAM).  Some codecs support negative values.
+
+        gzip
+            The compression_level maps to the memlevel parameter of
+            deflateInit2.  Higher levels use more RAM but are faster
+            and should have higher compression ratios.
+
+        bz2
+            The compression level maps to the blockSize100k parameter of
+            the BZ2_bzCompressInit function.  Higher levels use more RAM
+            but are faster and should have higher compression ratios.
+
+        brotli
+            The compression level maps to the BROTLI_PARAM_QUALITY
+            parameter.  Higher values are slower and should have higher
+            compression ratios.
+
+        lz4/lz4_frame/lz4_raw
+            The compression level parameter is not supported and must
+            be None
+
+        zstd
+            The compression level maps to the compressionLevel parameter
+            of ZSTD_initCStream.  Negative values are supported.  Higher
+            values are slower and should have higher compression ratios.
+
+        snappy
+            The compression level parameter is not supported and must
+            be None
+
 
     Raises
     ------
@@ -1574,9 +1666,14 @@ cdef class Codec(_Weakrefable):
         If invalid compression value is passed.
     """
 
-    def __init__(self, str compression not None):
+    def __init__(self, str compression not None, compression_level=None):
         cdef CCompressionType typ = _ensure_compression(compression)
-        self.wrapped = move(GetResultValue(CCodec.Create(typ)))
+        if compression_level is not None:
+            self.wrapped = shared_ptr[CCodec](move(GetResultValue(
+                CCodec.CreateWithLevel(typ, compression_level))))
+        else:
+            self.wrapped = shared_ptr[CCodec](move(GetResultValue(
+                CCodec.Create(typ))))
 
     cdef inline CCodec* unwrap(self) nogil:
         return self.wrapped.get()
@@ -1622,10 +1719,50 @@ cdef class Codec(_Weakrefable):
         cdef CCompressionType typ = _ensure_compression(compression)
         return CCodec.IsAvailable(typ)
 
+    @staticmethod
+    def supports_compression_level(str compression not None):
+        """
+        Returns true if the compression level parameter is supported
+        for the given codec.
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return CCodec.SupportsCompressionLevel(typ)
+
+    @staticmethod
+    def default_compression_level(str compression not None):
+        """
+        Returns the compression level that Arrow will use for the codec if
+        None is specified.
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return GetResultValue(CCodec.DefaultCompressionLevel(typ))
+
+    @staticmethod
+    def minimum_compression_level(str compression not None):
+        """
+        Returns the smallest valid value for the compression level
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return GetResultValue(CCodec.MinimumCompressionLevel(typ))
+
+    @staticmethod
+    def maximum_compression_level(str compression not None):
+        """
+        Returns the largest valid value for the compression level
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return GetResultValue(CCodec.MaximumCompressionLevel(typ))
+
     @property
     def name(self):
+        """Returns the name of the codec"""
         return frombytes(self.unwrap().name())
 
+    @property
+    def compression_level(self):
+        """Returns the compression level parameter of the codec"""
+        return frombytes(self.unwrap().compression_level())
+
     def compress(self, object buf, asbytes=False, memory_pool=None):
         """
         Compress data from buffer-like object.
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 2f82a9f64df..4b22acc076f 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -72,7 +72,7 @@ _ReadStats = namedtuple(
 
 
 class ReadStats(_ReadStats):
-    """IPC write statistics
+    """IPC read statistics
     """
     __slots__ = ()
 
@@ -96,9 +96,11 @@ cdef class IpcWriteOptions(_Weakrefable):
         If true, allow field lengths that don't fit in a signed 32-bit int.
     use_legacy_format : bool, default False
         Whether to use the pre-Arrow 0.15 IPC format.
-    compression: str or None
-        If not None, compression codec to use for record batch buffers.
-        May only be "lz4", "zstd" or None.
+    compression: str, Codec, or None
+        compression codec to use for record batch buffers.
+        If None then batch buffers will be uncompressed.
+        Must be "lz4", "zstd" or None.
+        To specify a compression_level use `pyarrow.Codec`
     use_threads: bool
         Whether to use the global CPU thread pool to parallelize any
         computational tasks like compression.
@@ -158,9 +160,14 @@ cdef class IpcWriteOptions(_Weakrefable):
     def compression(self, value):
         if value is None:
             self.c_options.codec.reset()
-        else:
+        elif isinstance(value, str):
             self.c_options.codec = shared_ptr[CCodec](GetResultValue(
                 CCodec.Create(_ensure_compression(value))).release())
+        elif isinstance(value, Codec):
+            self.c_options.codec = (<Codec>value).wrapped
+        else:
+            raise TypeError(
+                "Property `compression` must be None, str, or pyarrow.Codec")
 
     @property
     def use_threads(self):
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 8880179e7c4..592533e7015 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -499,7 +499,7 @@ cdef class RecordBatchReader(_Weakrefable):
 
 cdef class Codec(_Weakrefable):
     cdef:
-        unique_ptr[CCodec] wrapped
+        shared_ptr[CCodec] wrapped
 
     cdef inline CCodec* unwrap(self) nogil
 
@@ -526,10 +526,13 @@ cdef shared_ptr[CInputStream] native_transcoding_input_stream(
 # Default is allow_none=False
 cpdef DataType ensure_type(object type, bint allow_none=*)
 
+cdef timeunit_to_string(TimeUnit unit)
+cdef TimeUnit string_to_timeunit(unit) except *
+
 # Exceptions may be raised when converting dict values, so need to
 # check exception state on return
-cdef shared_ptr[CKeyValueMetadata] pyarrow_unwrap_metadata(object meta) \
-    except *
+cdef shared_ptr[const CKeyValueMetadata] pyarrow_unwrap_metadata(
+    object meta) except *
 cdef object pyarrow_wrap_metadata(
     const shared_ptr[const CKeyValueMetadata]& meta)
 
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index aa6918b54e3..60e1f8c53bb 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -16,7 +16,6 @@
 # under the License.
 
 # cython: profile = False
-# cython: embedsignature = True
 # cython: nonecheck = True
 # distutils: language = c++
 
@@ -48,6 +47,11 @@ def cpu_count():
     If neither is present, it will default to the number of hardware threads
     on the system. It can be modified at runtime by calling
     :func:`set_cpu_count()`.
+
+    See Also
+    --------
+    set_cpu_count : Modify the size of this pool.
+    io_thread_count : The analogous function for the I/O thread pool.
     """
     return GetCpuThreadPoolCapacity()
 
@@ -55,6 +59,11 @@ def cpu_count():
 def set_cpu_count(int count):
     """
     Set the number of threads to use in parallel operations.
+
+    See Also
+    --------
+    cpu_count : Get the size of this pool.
+    set_io_thread_count : The analogous function for the I/O thread pool.
     """
     if count < 1:
         raise ValueError("CPU count must be strictly positive")
@@ -140,14 +149,10 @@ include "tensor.pxi"
 
 # File IO
 include "io.pxi"
-include "io-hdfs.pxi"
 
 # IPC / Messaging
 include "ipc.pxi"
 
-# Feather format
-include "feather.pxi"
-
 # Python serialization
 include "serialization.pxi"
 
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index 28777ea629e..fc0d32aad56 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -36,6 +36,21 @@ cdef class MemoryPool(_Weakrefable):
     cdef void init(self, CMemoryPool* pool):
         self.pool = pool
 
+    def release_unused(self):
+        """
+        Attempt to return to the OS any memory being held onto by the pool.
+
+        This function should not be called except potentially for
+        benchmarking or debugging as it could be expensive and detrimental to
+        performance.
+
+        This is best effort and may not have any effect on some memory pools
+        or in some situations (e.g. fragmentation).
+        """
+        cdef CMemoryPool* pool = c_get_memory_pool()
+        with nogil:
+            pool.ReleaseUnused()
+
     def bytes_allocated(self):
         """
         Return the number of bytes that are currently allocated from this
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index 226d7e36cfb..20aaa31fc20 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -18,9 +18,10 @@
 
 from itertools import count
 from numbers import Integral
+import warnings
 
 from pyarrow import types
-from pyarrow.lib import Schema
+from pyarrow.lib import Schema, Table
 import pyarrow._orc as _orc
 
 
@@ -74,6 +75,11 @@ def __init__(self, source):
         self.reader.open(source)
         self._column_index_lookup = _schema_to_indices(self.schema)
 
+    @property
+    def metadata(self):
+        """The file metadata, as an arrow KeyValueMetadata"""
+        return self.reader.metadata()
+
     @property
     def schema(self):
         """The file schema, as an arrow schema"""
@@ -147,3 +153,63 @@ def read(self, columns=None):
         """
         include_indices = self._select_indices(columns)
         return self.reader.read(include_indices=include_indices)
+
+
+class ORCWriter:
+    """
+    Writer interface for a single ORC file
+
+    Parameters
+    ----------
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    """
+
+    def __init__(self, where):
+        self.writer = _orc.ORCWriter()
+        self.writer.open(where)
+
+    def write(self, table):
+        """
+        Write the table into an ORC file. The schema of the table must
+        be equal to the schema used when opening the ORC file.
+
+        Parameters
+        ----------
+        schema : pyarrow.lib.Table
+            The table to be written into the ORC file
+        """
+        self.writer.write(table)
+
+    def close(self):
+        """
+        Close the ORC file
+        """
+        self.writer.close()
+
+
+def write_table(table, where):
+    """
+    Write a table into an ORC file
+
+    Parameters
+    ----------
+    table : pyarrow.lib.Table
+        The table to be written into the ORC file
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    """
+    if isinstance(where, Table):
+        warnings.warn(
+            "The order of the arguments has changed. Pass as "
+            "'write_table(table, where)' instead. The old order will raise "
+            "an error in the future.", FutureWarning, stacklevel=2
+        )
+        table, where = where, table
+    writer = ORCWriter(where)
+    writer.write(table)
+    writer.close()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 91b38b8426c..e4b13175fe1 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -647,7 +647,6 @@ def get_datetimetz_type(values, dtype, type_):
 
 
 def dataframe_to_serialized_dict(frame):
-    import pandas.core.internals as _int
     block_manager = frame._data
 
     blocks = []
@@ -657,11 +656,11 @@ def dataframe_to_serialized_dict(frame):
         values = block.values
         block_data = {}
 
-        if isinstance(block, _int.DatetimeTZBlock):
+        if _pandas_api.is_datetimetz(values.dtype):
             block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
             if hasattr(values, 'values'):
                 values = values.values
-        elif isinstance(block, _int.CategoricalBlock):
+        elif _pandas_api.is_categorical(values):
             block_data.update(dictionary=values.categories,
                               ordered=values.ordered)
             values = values.codes
@@ -670,10 +669,8 @@ def dataframe_to_serialized_dict(frame):
             block=values
         )
 
-        # If we are dealing with an object array, pickle it instead. Note that
-        # we do not use isinstance here because _int.CategoricalBlock is a
-        # subclass of _int.ObjectBlock.
-        if type(block) == _int.ObjectBlock:
+        # If we are dealing with an object array, pickle it instead.
+        if values.dtype == np.dtype(object):
             block_data['object'] = None
             block_data['block'] = builtin_pickle.dumps(
                 values, protocol=builtin_pickle.HIGHEST_PROTOCOL)
@@ -731,8 +728,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
         cat = _pandas_api.categorical_type.from_codes(
             block_arr, categories=item['dictionary'],
             ordered=item['ordered'])
-        block = _int.make_block(cat, placement=placement,
-                                klass=_int.CategoricalBlock)
+        block = _int.make_block(cat, placement=placement)
     elif 'timezone' in item:
         dtype = make_datetimetz(item['timezone'])
         block = _int.make_block(block_arr, placement=placement,
@@ -740,7 +736,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
                                 dtype=dtype)
     elif 'object' in item:
         block = _int.make_block(builtin_pickle.loads(block_arr),
-                                placement=placement, klass=_int.ObjectBlock)
+                                placement=placement)
     elif 'py_array' in item:
         # create ExtensionBlock
         arr = item['py_array']
@@ -751,8 +747,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
             raise ValueError("This column does not support to be converted "
                              "to a pandas ExtensionArray")
         pd_ext_arr = pandas_dtype.__from_arrow__(arr)
-        block = _int.make_block(pd_ext_arr, placement=placement,
-                                klass=_int.ExtensionBlock)
+        block = _int.make_block(pd_ext_arr, placement=placement)
     else:
         block = _int.make_block(block_arr, placement=placement)
 
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 1b3602de8cd..6aaf9d9645c 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -27,6 +27,7 @@
 import re
 import operator
 import urllib.parse
+import warnings
 
 import pyarrow as pa
 import pyarrow.lib as lib
@@ -209,14 +210,27 @@ class ParquetFile:
     buffer_size : int, default 0
         If positive, perform read buffering when deserializing individual
         column chunks. Otherwise IO calls are unbuffered.
+    pre_buffer : bool, default False
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool.
+    coerce_int96_timestamp_unit : str, default None.
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be infered as timestamps
+        in nanoseconds.
     """
 
     def __init__(self, source, metadata=None, common_metadata=None,
-                 read_dictionary=None, memory_map=False, buffer_size=0):
+                 read_dictionary=None, memory_map=False, buffer_size=0,
+                 pre_buffer=False, coerce_int96_timestamp_unit=None):
         self.reader = ParquetReader()
-        self.reader.open(source, use_memory_map=memory_map,
-                         buffer_size=buffer_size,
-                         read_dictionary=read_dictionary, metadata=metadata)
+        self.reader.open(
+            source, use_memory_map=memory_map,
+            buffer_size=buffer_size, pre_buffer=pre_buffer,
+            read_dictionary=read_dictionary, metadata=metadata,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+        )
         self.common_metadata = common_metadata
         self._nested_paths_by_prefix = self._build_nested_paths()
 
@@ -510,7 +524,9 @@ def _sanitize_table(table, new_schema, flavor):
 allow_truncated_timestamps : bool, default False
     Allow loss of data when coercing timestamps to a particular
     resolution. E.g. if microsecond or nanosecond data is lost when coercing to
-    'ms', do not raise an exception.
+    'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+    will NOT result in the truncation exception being ignored unless
+    ``coerce_timestamps`` is not None.
 compression : str or dict
     Specify the compression codec, either on a general basis or per-column.
     Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
@@ -547,8 +563,8 @@ def _sanitize_table(table, new_schema, flavor):
     LogicalTypes.md#nested-types>`_, defaults to ``False``.
     For ``use_compliant_nested_type=True``, this will write into a list
     with 3-level structure where the middle level, named ``list``,
-    is a repeated group with a single field named ``element``
-    ::
+    is a repeated group with a single field named ``element``::
+
         <list-repetition> group <name> (LIST) {
             repeated group list {
                   <element-repetition> <element-type> element;
@@ -558,8 +574,8 @@ def _sanitize_table(table, new_schema, flavor):
     For ``use_compliant_nested_type=False``, this will also write into a list
     with 3-level structure, where the name of the single field of the middle
     level ``list`` is taken from the element name for nested columns in Arrow,
-    which defaults to ``item``
-    ::
+    which defaults to ``item``::
+
         <list-repetition> group <name> (LIST) {
             repeated group list {
                 <element-repetition> <element-type> item;
@@ -698,11 +714,17 @@ def _get_pandas_index_columns(keyvalues):
 
 class ParquetDatasetPiece:
     """
-    A single chunk of a potentially larger Parquet dataset to read.
+    DEPRECATED: A single chunk of a potentially larger Parquet dataset to read.
 
     The arguments will indicate to read either a single row group or all row
     groups, and whether to add partition keys to the resulting pyarrow.Table.
 
+    .. deprecated:: 5.0
+        Directly constructing a ``ParquetDatasetPiece`` is deprecated, as well
+        as accessing the pieces of a ``ParquetDataset`` object. Specify
+        ``use_legacy_dataset=False`` when constructing the ``ParquetDataset``
+        and use the ``ParquetDataset.fragments`` attribute instead.
+
     Parameters
     ----------
     path : str or pathlib.Path
@@ -717,6 +739,23 @@ class ParquetDatasetPiece:
 
     def __init__(self, path, open_file_func=partial(open, mode='rb'),
                  file_options=None, row_group=None, partition_keys=None):
+        warnings.warn(
+            "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
+            "be removed in a future version.",
+            DeprecationWarning, stacklevel=2)
+        self._init(
+            path, open_file_func, file_options, row_group, partition_keys)
+
+    @staticmethod
+    def _create(path, open_file_func=partial(open, mode='rb'),
+                file_options=None, row_group=None, partition_keys=None):
+        self = ParquetDatasetPiece.__new__(ParquetDatasetPiece)
+        self._init(
+            path, open_file_func, file_options, row_group, partition_keys)
+        return self
+
+    def _init(self, path, open_file_func, file_options, row_group,
+              partition_keys):
         self.path = _stringify_path(path)
         self.open_file_func = open_file_func
         self.row_group = row_group
@@ -1099,8 +1138,8 @@ def _parse_partition(self, dirname):
 
     def _push_pieces(self, files, part_keys):
         self.pieces.extend([
-            ParquetDatasetPiece(path, partition_keys=part_keys,
-                                open_file_func=self.open_file_func)
+            ParquetDatasetPiece._create(path, partition_keys=part_keys,
+                                        open_file_func=self.open_file_func)
             for path in files
         ])
 
@@ -1146,6 +1185,12 @@ def _open_dataset_file(dataset, path, meta=None):
     )
 
 
+_DEPR_MSG = (
+    "'{}' attribute is deprecated as of pyarrow 5.0.0 and will be removed "
+    "in a future version.{}"
+)
+
+
 _read_docstring_common = """\
 read_dictionary : list, default None
     List of names or column paths (for nested types) to read directly
@@ -1210,13 +1255,25 @@ class ParquetDataset:
     new Arrow Dataset API). Among other things, this allows to pass
     `filters` for all columns and not only the partition keys, enables
     different partitioning schemes, etc.
+pre_buffer : bool, default True
+    Coalesce and issue file reads in parallel to improve performance on
+    high-latency filesystems (e.g. S3). If True, Arrow will use a
+    background I/O thread pool. This option is only supported for
+    use_legacy_dataset=False. If using a filesystem layer that itself
+    performs readahead (e.g. fsspec's S3FS), disable readahead for best
+    results.
+coerce_int96_timestamp_unit : str, default None.
+    Cast timestamps that are stored in INT96 format to a particular resolution
+    (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
+    timestamps will be infered as timestamps in nanoseconds.
 """.format(_read_docstring_common, _DNF_filter_doc)
 
     def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
                 metadata=None, split_row_groups=False, validate_schema=True,
                 filters=None, metadata_nthreads=1, read_dictionary=None,
                 memory_map=False, buffer_size=0, partitioning="hive",
-                use_legacy_dataset=None):
+                use_legacy_dataset=None, pre_buffer=True,
+                coerce_int96_timestamp_unit=None):
         if use_legacy_dataset is None:
             # if a new filesystem is passed -> default to new implementation
             if isinstance(filesystem, FileSystem):
@@ -1226,17 +1283,21 @@ def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
                 use_legacy_dataset = True
 
         if not use_legacy_dataset:
-            return _ParquetDatasetV2(path_or_paths, filesystem=filesystem,
-                                     filters=filters,
-                                     partitioning=partitioning,
-                                     read_dictionary=read_dictionary,
-                                     memory_map=memory_map,
-                                     buffer_size=buffer_size,
-                                     # unsupported keywords
-                                     schema=schema, metadata=metadata,
-                                     split_row_groups=split_row_groups,
-                                     validate_schema=validate_schema,
-                                     metadata_nthreads=metadata_nthreads)
+            return _ParquetDatasetV2(
+                path_or_paths, filesystem=filesystem,
+                filters=filters,
+                partitioning=partitioning,
+                read_dictionary=read_dictionary,
+                memory_map=memory_map,
+                buffer_size=buffer_size,
+                pre_buffer=pre_buffer,
+                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
+                # unsupported keywords
+                schema=schema, metadata=metadata,
+                split_row_groups=split_row_groups,
+                validate_schema=validate_schema,
+                metadata_nthreads=metadata_nthreads
+            )
         self = object.__new__(cls)
         return self
 
@@ -1244,7 +1305,8 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
                  metadata=None, split_row_groups=False, validate_schema=True,
                  filters=None, metadata_nthreads=1, read_dictionary=None,
                  memory_map=False, buffer_size=0, partitioning="hive",
-                 use_legacy_dataset=True):
+                 use_legacy_dataset=True, pre_buffer=True,
+                 coerce_int96_timestamp_unit=None):
         if partitioning != "hive":
             raise ValueError(
                 'Only "hive" for hive-like partitioning is supported when '
@@ -1264,16 +1326,16 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
         self._metadata.memory_map = memory_map
         self._metadata.buffer_size = buffer_size
 
-        (self.pieces,
-         self.partitions,
+        (self._pieces,
+         self._partitions,
          self.common_metadata_path,
          self.metadata_path) = _make_manifest(
-             path_or_paths, self.fs, metadata_nthreads=metadata_nthreads,
+             path_or_paths, self._fs, metadata_nthreads=metadata_nthreads,
              open_file_func=partial(_open_dataset_file, self._metadata)
         )
 
         if self.common_metadata_path is not None:
-            with self.fs.open(self.common_metadata_path) as f:
+            with self._fs.open(self.common_metadata_path) as f:
                 self._metadata.common_metadata = read_metadata(
                     f,
                     memory_map=memory_map
@@ -1282,7 +1344,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
             self._metadata.common_metadata = None
 
         if metadata is None and self.metadata_path is not None:
-            with self.fs.open(self.metadata_path) as f:
+            with self._fs.open(self.metadata_path) as f:
                 self.metadata = read_metadata(f, memory_map=memory_map)
         else:
             self.metadata = metadata
@@ -1305,14 +1367,17 @@ def equals(self, other):
         if not isinstance(other, ParquetDataset):
             raise TypeError('`other` must be an instance of ParquetDataset')
 
-        if self.fs.__class__ != other.fs.__class__:
+        if self._fs.__class__ != other._fs.__class__:
             return False
-        for prop in ('paths', 'memory_map', 'pieces', 'partitions',
+        for prop in ('paths', '_pieces', '_partitions',
                      'common_metadata_path', 'metadata_path',
                      'common_metadata', 'metadata', 'schema',
-                     'buffer_size', 'split_row_groups'):
+                     'split_row_groups'):
             if getattr(self, prop) != getattr(other, prop):
                 return False
+        for prop in ('memory_map', 'buffer_size'):
+            if getattr(self._metadata, prop) != getattr(other._metadata, prop):
+                return False
 
         return True
 
@@ -1327,7 +1392,7 @@ def validate_schemas(self):
             if self.common_metadata is not None:
                 self.schema = self.common_metadata.schema
             else:
-                self.schema = self.pieces[0].get_metadata().schema
+                self.schema = self._pieces[0].get_metadata().schema
         elif self.schema is None:
             self.schema = self.metadata.schema
 
@@ -1335,13 +1400,13 @@ def validate_schemas(self):
         dataset_schema = self.schema.to_arrow_schema()
         # Exclude the partition columns from the schema, they are provided
         # by the path, not the DatasetPiece
-        if self.partitions is not None:
-            for partition_name in self.partitions.partition_names:
+        if self._partitions is not None:
+            for partition_name in self._partitions.partition_names:
                 if dataset_schema.get_field_index(partition_name) != -1:
                     field_idx = dataset_schema.get_field_index(partition_name)
                     dataset_schema = dataset_schema.remove(field_idx)
 
-        for piece in self.pieces:
+        for piece in self._pieces:
             file_metadata = piece.get_metadata()
             file_schema = file_metadata.schema.to_arrow_schema()
             if not dataset_schema.equals(file_schema, check_metadata=False):
@@ -1369,9 +1434,9 @@ def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
             Content of the file as a table (of columns).
         """
         tables = []
-        for piece in self.pieces:
+        for piece in self._pieces:
             table = piece.read(columns=columns, use_threads=use_threads,
-                               partitions=self.partitions,
+                               partitions=self._partitions,
                                use_pandas_metadata=use_pandas_metadata)
             tables.append(table)
 
@@ -1410,7 +1475,7 @@ def _get_common_pandas_metadata(self):
         return keyvalues.get(b'pandas', None)
 
     def _filter(self, filters):
-        accepts_filter = self.partitions.filter_accepts_partition
+        accepts_filter = self._partitions.filter_accepts_partition
 
         def one_filter_accepts(piece, filter):
             return all(accepts_filter(part_key, filter, level)
@@ -1420,17 +1485,65 @@ def all_filters_accept(piece):
             return any(all(one_filter_accepts(piece, f) for f in conjunction)
                        for conjunction in filters)
 
-        self.pieces = [p for p in self.pieces if all_filters_accept(p)]
+        self._pieces = [p for p in self._pieces if all_filters_accept(p)]
+
+    @property
+    def pieces(self):
+        warnings.warn(
+            _DEPR_MSG.format(
+                "ParquetDataset.pieces",
+                " Specify 'use_legacy_dataset=False' while constructing the "
+                "ParquetDataset, and then use the '.fragments' attribute "
+                "instead."),
+            DeprecationWarning, stacklevel=2)
+        return self._pieces
+
+    @property
+    def partitions(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.partitions", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._partitions
+
+    @property
+    def memory_map(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.memory_map", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.memory_map
+
+    @property
+    def read_dictionary(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.read_dictionary
 
-    fs = property(operator.attrgetter('_metadata.fs'))
-    memory_map = property(operator.attrgetter('_metadata.memory_map'))
-    read_dictionary = property(
-        operator.attrgetter('_metadata.read_dictionary')
+    @property
+    def buffer_size(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.buffer_size", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.buffer_size
+
+    _fs = property(
+        operator.attrgetter('_metadata.fs')
     )
+
+    @property
+    def fs(self):
+        warnings.warn(
+            _DEPR_MSG.format(
+                "ParquetDataset.fs",
+                " Specify 'use_legacy_dataset=False' while constructing the "
+                "ParquetDataset, and then use the '.filesystem' attribute "
+                "instead."),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.fs
+
     common_metadata = property(
         operator.attrgetter('_metadata.common_metadata')
     )
-    buffer_size = property(operator.attrgetter('_metadata.buffer_size'))
 
 
 def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
@@ -1465,12 +1578,19 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
             if not fs.isfile(path):
                 raise OSError('Passed non-file path: {}'
                               .format(path))
-            piece = ParquetDatasetPiece(path, open_file_func=open_file_func)
+            piece = ParquetDatasetPiece._create(
+                path, open_file_func=open_file_func)
             pieces.append(piece)
 
     return pieces, partitions, common_metadata_path, metadata_path
 
 
+def _is_local_file_system(fs):
+    return isinstance(fs, LocalFileSystem) or isinstance(
+        fs, legacyfs.LocalFileSystem
+    )
+
+
 class _ParquetDatasetV2:
     """
     ParquetDataset shim using the Dataset API under the hood.
@@ -1478,7 +1598,8 @@ class _ParquetDatasetV2:
 
     def __init__(self, path_or_paths, filesystem=None, filters=None,
                  partitioning="hive", read_dictionary=None, buffer_size=None,
-                 memory_map=False, ignore_prefixes=None, **kwargs):
+                 memory_map=False, ignore_prefixes=None, pre_buffer=True,
+                 coerce_int96_timestamp_unit=None, **kwargs):
         import pyarrow.dataset as ds
 
         # Raise error for not supported keywords
@@ -1492,7 +1613,10 @@ def __init__(self, path_or_paths, filesystem=None, filters=None,
                     "Dataset API".format(keyword))
 
         # map format arguments
-        read_options = {}
+        read_options = {
+            "pre_buffer": pre_buffer,
+            "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit
+        }
         if buffer_size:
             read_options.update(use_buffered_stream=True,
                                 buffer_size=buffer_size)
@@ -1512,6 +1636,18 @@ def __init__(self, path_or_paths, filesystem=None, filters=None,
             # path can in principle be URI for any filesystem)
             filesystem = LocalFileSystem(use_mmap=memory_map)
 
+        # This needs to be checked after _ensure_filesystem, because that
+        # handles the case of an fsspec LocalFileSystem
+        if (
+            hasattr(path_or_paths, "__fspath__") and
+            filesystem is not None and
+            not _is_local_file_system(filesystem)
+        ):
+            raise TypeError(
+                "Path-like objects with __fspath__ must only be used with "
+                f"local file systems, not {type(filesystem)}"
+            )
+
         # check for single fragment dataset
         single_file = None
         if isinstance(path_or_paths, list):
@@ -1519,7 +1655,7 @@ def __init__(self, path_or_paths, filesystem=None, filters=None,
                 single_file = path_or_paths[0]
         else:
             if _is_path_like(path_or_paths):
-                path_or_paths = str(path_or_paths)
+                path_or_paths = _stringify_path(path_or_paths)
                 if filesystem is None:
                     # path might be a URI describing the FileSystem as well
                     try:
@@ -1629,9 +1765,24 @@ def read_pandas(self, **kwargs):
 
     @property
     def pieces(self):
-        # TODO raise deprecation warning
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.pieces",
+                             " Use the '.fragments' attribute instead"),
+            DeprecationWarning, stacklevel=2)
+        return list(self._dataset.get_fragments())
+
+    @property
+    def fragments(self):
         return list(self._dataset.get_fragments())
 
+    @property
+    def files(self):
+        return self._dataset.files
+
+    @property
+    def filesystem(self):
+        return self._dataset.filesystem
+
 
 _read_table_docstring = """
 {0}
@@ -1676,6 +1827,13 @@ def pieces(self):
     and different partitioning schemes are supported.
 
     {3}
+pre_buffer : bool, default True
+    Coalesce and issue file reads in parallel to improve performance on
+    high-latency filesystems (e.g. S3). If True, Arrow will use a
+    background I/O thread pool. This option is only supported for
+    use_legacy_dataset=False. If using a filesystem layer that itself
+    performs readahead (e.g. fsspec's S3FS), disable readahead for best
+    results.
 
 Returns
 -------
@@ -1687,7 +1845,8 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
                use_pandas_metadata=False, memory_map=False,
                read_dictionary=None, filesystem=None, filters=None,
                buffer_size=0, partitioning="hive", use_legacy_dataset=False,
-               ignore_prefixes=None):
+               ignore_prefixes=None, pre_buffer=True,
+               coerce_int96_timestamp_unit=None):
     if not use_legacy_dataset:
         if metadata is not None:
             raise ValueError(
@@ -1706,6 +1865,8 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
                 buffer_size=buffer_size,
                 filters=filters,
                 ignore_prefixes=ignore_prefixes,
+                pre_buffer=pre_buffer,
+                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
             )
         except ImportError:
             # fall back on ParquetFile for simple cases when pyarrow.dataset
@@ -1726,7 +1887,10 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
             # TODO test that source is not a directory or a list
             dataset = ParquetFile(
                 source, metadata=metadata, read_dictionary=read_dictionary,
-                memory_map=memory_map, buffer_size=buffer_size)
+                memory_map=memory_map, buffer_size=buffer_size,
+                pre_buffer=pre_buffer,
+                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+            )
 
         return dataset.read(columns=columns, use_threads=use_threads,
                             use_pandas_metadata=use_pandas_metadata)
@@ -1737,16 +1901,22 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
             "use_legacy_dataset=False")
 
     if _is_path_like(source):
-        pf = ParquetDataset(source, metadata=metadata, memory_map=memory_map,
-                            read_dictionary=read_dictionary,
-                            buffer_size=buffer_size,
-                            filesystem=filesystem, filters=filters,
-                            partitioning=partitioning)
+        pf = ParquetDataset(
+            source, metadata=metadata, memory_map=memory_map,
+            read_dictionary=read_dictionary,
+            buffer_size=buffer_size,
+            filesystem=filesystem, filters=filters,
+            partitioning=partitioning,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+        )
     else:
-        pf = ParquetFile(source, metadata=metadata,
-                         read_dictionary=read_dictionary,
-                         memory_map=memory_map,
-                         buffer_size=buffer_size)
+        pf = ParquetFile(
+            source, metadata=metadata,
+            read_dictionary=read_dictionary,
+            memory_map=memory_map,
+            buffer_size=buffer_size,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+        )
     return pf.read(columns=columns, use_threads=use_threads,
                    use_pandas_metadata=use_pandas_metadata)
 
@@ -1915,8 +2085,10 @@ def write_to_dataset(table, root_path, partition_cols=None,
             "implementation."
         )
         metadata_collector = kwargs.pop('metadata_collector', None)
+        file_visitor = None
         if metadata_collector is not None:
-            raise ValueError(msg.format("metadata_collector"))
+            def file_visitor(written_file):
+                metadata_collector.append(written_file.metadata)
         if partition_filename_cb is not None:
             raise ValueError(msg.format("partition_filename_cb"))
 
@@ -1936,7 +2108,8 @@ def write_to_dataset(table, root_path, partition_cols=None,
         ds.write_dataset(
             table, root_path, filesystem=filesystem,
             format=parquet_format, file_options=write_options, schema=schema,
-            partitioning=partitioning, use_threads=use_threads)
+            partitioning=partitioning, use_threads=use_threads,
+            file_visitor=file_visitor)
         return
 
     fs, root_path = legacyfs.resolve_filesystem_and_path(root_path, filesystem)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index effe60c73b2..7953bd93621 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -83,7 +83,7 @@ cdef class Scalar(_Weakrefable):
     def __str__(self):
         return str(self.as_py())
 
-    def equals(self, Scalar other):
+    def equals(self, Scalar other not None):
         return self.wrapped.get().Equals(other.unwrap().get()[0])
 
     def __eq__(self, other):
@@ -614,17 +614,14 @@ cdef class StructScalar(Scalar, collections.abc.Mapping):
             CStructType* dtype = <CStructType*> sp.type.get()
             vector[shared_ptr[CField]] fields = dtype.fields()
 
-        if sp.is_valid:
-            for i in range(dtype.num_fields()):
-                yield frombytes(fields[i].get().name())
+        for i in range(dtype.num_fields()):
+            yield frombytes(fields[i].get().name())
+
+    def items(self):
+        return ((key, self[i]) for i, key in enumerate(self))
 
     def __contains__(self, key):
-        try:
-            self[key]
-        except (KeyError, IndexError):
-            return False
-        else:
-            return True
+        return key in list(self)
 
     def __getitem__(self, key):
         """
@@ -652,21 +649,42 @@ cdef class StructScalar(Scalar, collections.abc.Mapping):
 
         try:
             return Scalar.wrap(GetResultValue(sp.field(ref)))
-        except ArrowInvalid:
+        except ArrowInvalid as exc:
             if isinstance(key, int):
-                raise IndexError(key)
+                raise IndexError(key) from exc
             else:
-                raise KeyError(key)
+                raise KeyError(key) from exc
 
     def as_py(self):
         """
         Return this value as a Python dict.
         """
         if self.is_valid:
-            return {k: v.as_py() for k, v in self.items()}
+            try:
+                return {k: self[k].as_py() for k in self.keys()}
+            except KeyError:
+                raise ValueError(
+                    "Converting to Python dictionary is not supported when "
+                    "duplicate field names are present")
+        else:
+            return None
+
+    def _as_py_tuple(self):
+        # a version that returns a tuple instead of dict to support repr/str
+        # with the presence of duplicate field names
+        if self.is_valid:
+            return [(key, self[i].as_py()) for i, key in enumerate(self)]
         else:
             return None
 
+    def __repr__(self):
+        return '<pyarrow.{}: {!r}>'.format(
+            self.__class__.__name__, self._as_py_tuple()
+        )
+
+    def __str__(self):
+        return str(self._as_py_tuple())
+
 
 cdef class MapScalar(ListScalar):
     """
@@ -811,6 +829,14 @@ cdef class UnionScalar(Scalar):
         value = self.value
         return None if value is None else value.as_py()
 
+    @property
+    def type_code(self):
+        """
+        Return the union type code for this scalar.
+        """
+        cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
+        return sp.type_code
+
 
 cdef dict _scalar_classes = {
     _Type_BOOL: BooleanScalar,
diff --git a/python/pyarrow/serialization.pxi b/python/pyarrow/serialization.pxi
index c6e40c145b8..9177b2aa27b 100644
--- a/python/pyarrow/serialization.pxi
+++ b/python/pyarrow/serialization.pxi
@@ -355,10 +355,14 @@ cdef class SerializedPyObject(_Weakrefable):
 
 def serialize(object value, SerializationContext context=None):
     """
-    EXPERIMENTAL: Serialize a general Python sequence for transient storage
+    DEPRECATED: Serialize a general Python sequence for transient storage
     and transport.
 
-    This may have better performance and memory efficiency than Python pickle.
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Notes
     -----
@@ -398,7 +402,13 @@ def _serialize(object value, SerializationContext context=None):
 
 def serialize_to(object value, sink, SerializationContext context=None):
     """
-    EXPERIMENTAL: Serialize a Python sequence to a file.
+    DEPRECATED: Serialize a Python sequence to a file.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Parameters
     ----------
@@ -417,7 +427,13 @@ def serialize_to(object value, sink, SerializationContext context=None):
 
 def read_serialized(source, base=None):
     """
-    EXPERIMENTAL: Read serialized Python sequence from file-like object.
+    DEPRECATED: Read serialized Python sequence from file-like object.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Parameters
     ----------
@@ -449,7 +465,13 @@ def _read_serialized(source, base=None):
 
 def deserialize_from(source, object base, SerializationContext context=None):
     """
-    EXPERIMENTAL: Deserialize a Python sequence from a file.
+    DEPRECATED: Deserialize a Python sequence from a file.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     This only can interact with data produced by pyarrow.serialize or
     pyarrow.serialize_to.
@@ -476,7 +498,14 @@ def deserialize_from(source, object base, SerializationContext context=None):
 
 def deserialize_components(components, SerializationContext context=None):
     """
-    Reconstruct Python object from output of SerializedPyObject.to_components.
+    DEPRECATED: Reconstruct Python object from output of
+    SerializedPyObject.to_components.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Parameters
     ----------
@@ -495,9 +524,15 @@ def deserialize_components(components, SerializationContext context=None):
 
 def deserialize(obj, SerializationContext context=None):
     """
-    EXPERIMENTAL: Deserialize Python object from Buffer or other Python
+    DEPRECATED: Deserialize Python object from Buffer or other Python
     object supporting the buffer protocol.
 
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
+
     This only can interact with data produced by pyarrow.serialize or
     pyarrow.serialize_to.
 
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 662c0e39fd9..d92bdb2efa3 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -382,6 +382,14 @@ cdef class ChunkedArray(_PandasConvertible):
         """
         return _pc().filter(self, mask, null_selection_behavior)
 
+    def index(self, value, start=None, end=None, *, memory_pool=None):
+        """
+        Find the first index of a value.
+
+        See pyarrow.compute.index for full usage.
+        """
+        return _pc().index(self, value, start, end, memory_pool=memory_pool)
+
     def take(self, object indices):
         """
         Select values from a chunked array. See pyarrow.compute.take for full
@@ -608,6 +616,30 @@ cdef class RecordBatch(_PandasConvertible):
         self.sp_batch = batch
         self.batch = batch.get()
 
+    @staticmethod
+    def from_pydict(mapping, schema=None, metadata=None):
+        """
+        Construct a RecordBatch from Arrow arrays or columns.
+
+        Parameters
+        ----------
+        mapping : dict or Mapping
+            A mapping of strings to Arrays or Python lists.
+        schema : Schema, default None
+            If not passed, will be inferred from the Mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        RecordBatch
+        """
+
+        return _from_pydict(cls=RecordBatch,
+                            mapping=mapping,
+                            schema=schema,
+                            metadata=metadata)
+
     def __reduce__(self):
         return _reconstruct_record_batch, (self.columns, self.schema)
 
@@ -656,7 +688,7 @@ cdef class RecordBatch(_PandasConvertible):
 
     def replace_schema_metadata(self, metadata=None):
         """
-        EXPERIMENTAL: Create shallow copy of record batch by replacing schema
+        Create shallow copy of record batch by replacing schema
         key-value metadata with the indicated new metadata (which may be None,
         which deletes any existing metadata
 
@@ -1323,7 +1355,7 @@ cdef class Table(_PandasConvertible):
 
     def replace_schema_metadata(self, metadata=None):
         """
-        EXPERIMENTAL: Create shallow copy of table by replacing schema
+        Create shallow copy of table by replacing schema
         key-value metadata with the indicated new metadata (which may be None),
         which deletes any existing metadata.
 
@@ -1623,33 +1655,11 @@ cdef class Table(_PandasConvertible):
         -------
         Table
         """
-        arrays = []
-        if schema is None:
-            names = []
-            for k, v in mapping.items():
-                names.append(k)
-                arrays.append(asarray(v))
-            return Table.from_arrays(arrays, names, metadata=metadata)
-        elif isinstance(schema, Schema):
-            for field in schema:
-                try:
-                    v = mapping[field.name]
-                except KeyError:
-                    try:
-                        v = mapping[tobytes(field.name)]
-                    except KeyError:
-                        present = mapping.keys()
-                        missing = [n for n in schema.names if n not in present]
-                        raise KeyError(
-                            "The passed mapping doesn't contain the "
-                            "following field(s) of the schema: {}".
-                            format(', '.join(missing))
-                        )
-                arrays.append(asarray(v, type=field.type))
-            # Will raise if metadata is not None
-            return Table.from_arrays(arrays, schema=schema, metadata=metadata)
-        else:
-            raise TypeError('Schema must be an instance of pyarrow.Schema')
+
+        return _from_pydict(cls=Table,
+                            mapping=mapping,
+                            schema=schema,
+                            metadata=metadata)
 
     @staticmethod
     def from_batches(batches, Schema schema=None):
@@ -2264,3 +2274,51 @@ def concat_tables(tables, c_bool promote=False, MemoryPool memory_pool=None):
             ConcatenateTables(c_tables, options, pool))
 
     return pyarrow_wrap_table(c_result_table)
+
+
+def _from_pydict(cls, mapping, schema, metadata):
+    """
+    Construct a Table/RecordBatch from Arrow arrays or columns.
+
+    Parameters
+    ----------
+    cls : Class Table/RecordBatch
+    mapping : dict or Mapping
+        A mapping of strings to Arrays or Python lists.
+    schema : Schema, default None
+        If not passed, will be inferred from the Mapping values.
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if inferred).
+
+    Returns
+    -------
+    Table/RecordBatch
+    """
+
+    arrays = []
+    if schema is None:
+        names = []
+        for k, v in mapping.items():
+            names.append(k)
+            arrays.append(asarray(v))
+        return cls.from_arrays(arrays, names, metadata=metadata)
+    elif isinstance(schema, Schema):
+        for field in schema:
+            try:
+                v = mapping[field.name]
+            except KeyError:
+                try:
+                    v = mapping[tobytes(field.name)]
+                except KeyError:
+                    present = mapping.keys()
+                    missing = [n for n in schema.names if n not in present]
+                    raise KeyError(
+                        "The passed mapping doesn't contain the "
+                        "following field(s) of the schema: {}".
+                        format(', '.join(missing))
+                    )
+            arrays.append(asarray(v, type=field.type))
+        # Will raise if metadata is not None
+        return cls.from_arrays(arrays, schema=schema, metadata=metadata)
+    else:
+        raise TypeError('Schema must be an instance of pyarrow.Schema')
diff --git a/python/pyarrow/tests/bound_function_visit_strings.pyx b/python/pyarrow/tests/bound_function_visit_strings.pyx
new file mode 100644
index 00000000000..90437be8cde
--- /dev/null
+++ b/python/pyarrow/tests/bound_function_visit_strings.pyx
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language=c++
+# cython: language_level = 3
+
+import pyarrow as pa
+from pyarrow.lib cimport *
+from pyarrow.lib import frombytes, tobytes
+
+# basic test to roundtrip through a BoundFunction
+
+ctypedef CStatus visit_string_cb(const c_string&)
+
+cdef extern from * namespace "arrow::py" nogil:
+    """
+    #include <functional>
+    #include <string>
+    #include <vector>
+
+    #include "arrow/status.h"
+
+    namespace arrow {
+    namespace py {
+
+    Status VisitStrings(const std::vector<std::string>& strs,
+                        std::function<Status(const std::string&)> cb) {
+      for (const std::string& str : strs) {
+        RETURN_NOT_OK(cb(str));
+      }
+      return Status::OK();
+    }
+
+    }  // namespace py
+    }  // namespace arrow
+    """
+    cdef CStatus CVisitStrings" arrow::py::VisitStrings"(
+        vector[c_string], function[visit_string_cb])
+
+
+cdef void _visit_strings_impl(py_cb, const c_string& s) except *:
+    py_cb(frombytes(s))
+
+
+def _visit_strings(strings, cb):
+    cdef:
+        function[visit_string_cb] c_cb
+        vector[c_string] c_strings
+
+    c_cb = BindFunction[visit_string_cb](&_visit_strings_impl, cb)
+    for s in strings:
+        c_strings.push_back(tobytes(s))
+
+    check_status(CVisitStrings(c_strings, c_cb))
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index c6e49668b95..40836867f5f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -24,6 +24,7 @@
 import hypothesis as h
 
 from pyarrow.util import find_free_port
+from pyarrow import Codec
 
 
 # setup hypothesis profiles
@@ -44,13 +45,17 @@
 
 
 groups = [
+    'brotli',
+    'bz2',
     'cython',
     'dataset',
     'hypothesis',
     'fastparquet',
     'gandiva',
+    'gzip',
     'hdfs',
     'large_memory',
+    'lz4',
     'memory_leak',
     'nopandas',
     'orc',
@@ -58,20 +63,26 @@
     'parquet',
     'plasma',
     's3',
+    'snappy',
     'tensorflow',
     'flight',
     'slow',
     'requires_testing_data',
+    'zstd',
 ]
 
 defaults = {
+    'brotli': Codec.is_available('brotli'),
+    'bz2': Codec.is_available('bz2'),
     'cython': False,
     'dataset': False,
     'fastparquet': False,
     'hypothesis': False,
     'gandiva': False,
+    'gzip': Codec.is_available('gzip'),
     'hdfs': False,
     'large_memory': False,
+    'lz4': Codec.is_available('lz4'),
     'memory_leak': False,
     'orc': False,
     'nopandas': False,
@@ -79,10 +90,12 @@
     'parquet': False,
     'plasma': False,
     's3': False,
+    'snappy': Codec.is_available('snappy'),
     'tensorflow': False,
     'flight': False,
     'slow': False,
     'requires_testing_data': True,
+    'zstd': Codec.is_available('zstd'),
 }
 
 try:
@@ -231,10 +244,19 @@ def base_datadir():
     return pathlib.Path(__file__).parent / 'data'
 
 
+@pytest.fixture(autouse=True)
+def disable_aws_metadata(monkeypatch):
+    """Stop the AWS SDK from trying to contact the EC2 metadata server.
+
+    Otherwise, this causes a 5 second delay in tests that exercise the
+    S3 filesystem.
+    """
+    monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
+
+
 # TODO(kszucs): move the following fixtures to test_fs.py once the previous
 # parquet dataset implementation and hdfs implementation are removed.
 
-@pytest.mark.hdfs
 @pytest.fixture(scope='session')
 def hdfs_connection():
     host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default')
@@ -243,7 +265,6 @@ def hdfs_connection():
     return host, port, user
 
 
-@pytest.mark.s3
 @pytest.fixture(scope='session')
 def s3_connection():
     host, port = 'localhost', find_free_port()
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 8dc3fcebda7..adaa39f7ed6 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -23,7 +23,7 @@
 
 import pyarrow as pa
 from pyarrow import fs
-from pyarrow.filesystem import LocalFileSystem
+from pyarrow.filesystem import LocalFileSystem, FileSystem
 from pyarrow.tests import util
 from pyarrow.tests.parquet.common import (_check_roundtrip, _roundtrip_table,
                                           parametrize_legacy_dataset)
@@ -137,6 +137,20 @@ def test_special_chars_filename(tempdir, use_legacy_dataset):
     assert table_read.equals(table)
 
 
+@parametrize_legacy_dataset
+def test_invalid_source(use_legacy_dataset):
+    # Test that we provide an helpful error message pointing out
+    # that None wasn't expected when trying to open a Parquet None file.
+    #
+    # Depending on use_legacy_dataset the message changes slightly
+    # but in both cases it should point out that None wasn't expected.
+    with pytest.raises(TypeError, match="None"):
+        pq.read_table(None, use_legacy_dataset=use_legacy_dataset)
+
+    with pytest.raises(TypeError, match="None"):
+        pq.ParquetFile(None)
+
+
 @pytest.mark.slow
 def test_file_with_over_int16_max_row_groups():
     # PARQUET-1857: Parquet encryption support introduced a INT16_MAX upper
@@ -222,6 +236,25 @@ def test_multiple_path_types(tempdir, use_legacy_dataset):
     tm.assert_frame_equal(df, df_read)
 
 
+@parametrize_legacy_dataset
+def test_fspath(tempdir, use_legacy_dataset):
+    # ARROW-12472 support __fspath__ objects without using str()
+    path = tempdir / "test.parquet"
+    table = pa.table({"a": [1, 2, 3]})
+    _write_table(table, path)
+
+    fs_protocol_obj = util.FSProtocolClass(path)
+
+    result = _read_table(
+        fs_protocol_obj, use_legacy_dataset=use_legacy_dataset
+    )
+    assert result.equals(table)
+
+    # combined with non-local filesystem raises
+    with pytest.raises(TypeError):
+        _read_table(fs_protocol_obj, filesystem=FileSystem())
+
+
 @pytest.mark.dataset
 @parametrize_legacy_dataset
 @pytest.mark.parametrize("filesystem", [
@@ -245,13 +278,25 @@ def test_relative_paths(tempdir, use_legacy_dataset, filesystem):
     assert result.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_read_non_existing_file(use_legacy_dataset):
+def test_read_non_existing_file():
     # ensure we have a proper error message
     with pytest.raises(FileNotFoundError):
         pq.read_table('i-am-not-existing.parquet')
 
 
+def test_file_error_python_exception():
+    class BogusFile(io.BytesIO):
+        def read(self, *args):
+            raise ZeroDivisionError("zorglub")
+
+        def seek(self, *args):
+            raise ZeroDivisionError("zorglub")
+
+    # ensure the Python exception is restored
+    with pytest.raises(ZeroDivisionError, match="zorglub"):
+        pq.read_table(BogusFile(b""))
+
+
 @parametrize_legacy_dataset
 def test_parquet_read_from_buffer(tempdir, use_legacy_dataset):
     # reading from a buffer from python's open()
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index 804f3738f12..5b10ed4c2f0 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -35,6 +35,8 @@
 except ImportError:
     pd = tm = None
 
+pytestmark = pytest.mark.parquet
+
 # Tests for ARROW-11497
 _test_data_simple = [
     {'items': [1, 2]},
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index b99d8f26610..bdbc6b7b5a5 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -240,7 +240,7 @@ def test_decimal_roundtrip(tempdir, use_legacy_dataset):
 
 @pytest.mark.pandas
 @pytest.mark.xfail(
-    raises=pa.ArrowException, reason='Parquet does not support negative scale'
+    raises=OSError, reason='Parquet does not support negative scale'
 )
 def test_decimal_roundtrip_negative_scale(tempdir):
     expected = pd.DataFrame({'decimal_num': [decimal.Decimal('1.23E4')]})
@@ -404,6 +404,7 @@ def test_fixed_size_binary():
 # -----------------------------------------------------------------------------
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 def test_large_table_int32_overflow():
     size = np.iinfo('int32').max + 1
@@ -424,6 +425,7 @@ def _simple_table_roundtrip(table, use_legacy_dataset=False, **write_kwargs):
     return _read_table(buf, use_legacy_dataset=use_legacy_dataset)
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 @parametrize_legacy_dataset
 def test_byte_array_exactly_2gb(use_legacy_dataset):
@@ -444,6 +446,7 @@ def test_byte_array_exactly_2gb(use_legacy_dataset):
         assert t.equals(result)
 
 
+@pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
 @parametrize_legacy_dataset
@@ -469,6 +472,7 @@ def test_binary_array_overflow_to_chunked(use_legacy_dataset):
     assert tbl.equals(read_tbl)
 
 
+@pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
 @parametrize_legacy_dataset
@@ -499,6 +503,7 @@ def test_large_binary():
             _check_roundtrip(table, use_dictionary=use_dictionary)
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 def test_large_binary_huge():
     s = b'xy' * 997
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index fce6ae58af7..e90a24121e1 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -57,7 +57,8 @@ def test_parquet_piece_read(tempdir):
     path = tempdir / 'parquet_piece_read.parquet'
     _write_table(table, path, version='2.0')
 
-    piece1 = pq.ParquetDatasetPiece(path)
+    with pytest.warns(DeprecationWarning):
+        piece1 = pq.ParquetDatasetPiece(path)
 
     result = piece1.read()
     assert result.equals(table)
@@ -71,7 +72,8 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
     path = tempdir / 'parquet_piece_read.parquet'
     _write_table(table, path, version='2.0')
 
-    piece = pq.ParquetDatasetPiece(path)
+    with pytest.warns(DeprecationWarning):
+        piece = pq.ParquetDatasetPiece(path)
     table1 = piece.read()
     assert isinstance(table1, pa.Table)
     meta1 = piece.get_metadata()
@@ -80,6 +82,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
     assert table.equals(table1)
 
 
+@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:DeprecationWarning")
 def test_parquet_piece_basics():
     path = '/baz.parq'
 
@@ -139,6 +142,7 @@ def test_read_partitioned_directory(tempdir, use_legacy_dataset):
     _partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 @pytest.mark.pandas
 def test_create_parquet_dataset_multi_threaded(tempdir):
     fs = LocalFileSystem._get_instance()
@@ -597,6 +601,7 @@ def test_partition_keys_with_underscores(tempdir, use_legacy_dataset):
     assert result.column("year_week").to_pylist() == string_keys
 
 
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_read_s3fs(s3_example_s3fs, use_legacy_dataset):
     fs, path = s3_example_s3fs
@@ -610,6 +615,7 @@ def test_read_s3fs(s3_example_s3fs, use_legacy_dataset):
     assert result.equals(table)
 
 
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_read_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
     fs, directory = s3_example_s3fs
@@ -649,6 +655,7 @@ def test_read_partitioned_directory_s3fs_wrapper(
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_read_partitioned_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
     fs, path = s3_example_s3fs
@@ -979,6 +986,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
     tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 @pytest.mark.pandas
 @parametrize_legacy_dataset
 def test_dataset_memory_map(tempdir, use_legacy_dataset):
@@ -1021,6 +1029,27 @@ def test_dataset_enable_buffered_stream(tempdir, use_legacy_dataset):
         assert dataset.read().equals(table)
 
 
+@pytest.mark.pandas
+@parametrize_legacy_dataset
+def test_dataset_enable_pre_buffer(tempdir, use_legacy_dataset):
+    dirpath = tempdir / guid()
+    dirpath.mkdir()
+
+    df = _test_dataframe(10, seed=0)
+    path = dirpath / '{}.parquet'.format(0)
+    table = pa.Table.from_pandas(df)
+    _write_table(table, path, version='2.0')
+
+    for pre_buffer in (True, False):
+        dataset = pq.ParquetDataset(
+            dirpath, pre_buffer=pre_buffer,
+            use_legacy_dataset=use_legacy_dataset)
+        assert dataset.read().equals(table)
+        actual = pq.read_table(dirpath, pre_buffer=pre_buffer,
+                               use_legacy_dataset=use_legacy_dataset)
+        assert actual.equals(table)
+
+
 def _make_example_multifile_dataset(base_path, nfiles=10, file_nrows=5):
     test_data = []
     paths = []
@@ -1035,7 +1064,7 @@ def _make_example_multifile_dataset(base_path, nfiles=10, file_nrows=5):
 
 def _assert_dataset_paths(dataset, paths, use_legacy_dataset):
     if use_legacy_dataset:
-        assert set(map(str, paths)) == {x.path for x in dataset.pieces}
+        assert set(map(str, paths)) == {x.path for x in dataset._pieces}
     else:
         paths = [str(path.as_posix()) for path in paths]
         assert set(paths) == set(dataset._dataset.files)
@@ -1326,6 +1355,7 @@ def test_write_to_dataset_pathlib_nonlocal(
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_write_to_dataset_with_partitions_s3fs(
     s3_example_s3fs, use_legacy_dataset
@@ -1337,6 +1367,7 @@ def test_write_to_dataset_with_partitions_s3fs(
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_write_to_dataset_no_partitions_s3fs(
     s3_example_s3fs, use_legacy_dataset
@@ -1347,6 +1378,7 @@ def test_write_to_dataset_no_partitions_s3fs(
         path, use_legacy_dataset, filesystem=fs)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 @pytest.mark.pandas
 @parametrize_legacy_dataset_not_supported
 def test_write_to_dataset_with_partitions_and_custom_filenames(
@@ -1383,6 +1415,7 @@ def partition_filename_callback(keys):
     assert sorted(expected_basenames) == sorted(output_basenames)
 
 
+@pytest.mark.dataset
 @pytest.mark.pandas
 def test_write_to_dataset_filesystem(tempdir):
     df = pd.DataFrame({'A': [1, 2, 3]})
@@ -1434,7 +1467,7 @@ def is_pickleable(obj):
     for column in dataset.metadata.schema:
         assert is_pickleable(column)
 
-    for piece in dataset.pieces:
+    for piece in dataset._pieces:
         assert is_pickleable(piece)
         metadata = piece.get_metadata()
         assert metadata.num_row_groups
@@ -1572,6 +1605,7 @@ def test_parquet_dataset_new_filesystem(tempdir):
     assert result.equals(table)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
     # ARROW-10462 ensure that on Windows we properly use posix-style paths
     # as used by fsspec
@@ -1586,3 +1620,33 @@ def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
     # ensure the piece path is also posix-style
     expected = path + "/data.parquet"
     assert dataset.pieces[0].path == expected
+
+
+def test_parquet_dataset_deprecated_properties(tempdir):
+    table = pa.table({'a': [1, 2, 3]})
+    path = tempdir / 'data.parquet'
+    pq.write_table(table, path)
+    dataset = pq.ParquetDataset(path)
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+        dataset.pieces
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.partitions"):
+        dataset.partitions
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.memory_map"):
+        dataset.memory_map
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.read_dictio"):
+        dataset.read_dictionary
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.buffer_size"):
+        dataset.buffer_size
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.fs"):
+        dataset.fs
+
+    dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+        dataset2.pieces
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index fdbd2422800..5e665637704 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -262,6 +262,73 @@ def test_date_time_types(tempdir):
     assert read_table.equals(expected)
 
 
+@pytest.mark.pandas
+@pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
+def test_coerce_int96_timestamp_unit(unit):
+    i_s = pd.Timestamp('2010-01-01').value / 1000000000  # := 1262304000
+
+    d_s = np.arange(i_s, i_s + 10, 1, dtype='int64')
+    d_ms = d_s * 1000
+    d_us = d_ms * 1000
+    d_ns = d_us * 1000
+
+    a_s = pa.array(d_s, type=pa.timestamp('s'))
+    a_ms = pa.array(d_ms, type=pa.timestamp('ms'))
+    a_us = pa.array(d_us, type=pa.timestamp('us'))
+    a_ns = pa.array(d_ns, type=pa.timestamp('ns'))
+
+    arrays = {"s": a_s, "ms": a_ms, "us": a_us, "ns": a_ns}
+    names = ['ts_s', 'ts_ms', 'ts_us', 'ts_ns']
+    table = pa.Table.from_arrays([a_s, a_ms, a_us, a_ns], names)
+
+    # For either Parquet version, coercing to nanoseconds is allowed
+    # if Int96 storage is used
+    expected = pa.Table.from_arrays([arrays.get(unit)]*4, names)
+    read_table_kwargs = {"coerce_int96_timestamp_unit": unit}
+    _check_roundtrip(table, expected,
+                     read_table_kwargs=read_table_kwargs,
+                     use_deprecated_int96_timestamps=True)
+    _check_roundtrip(table, expected, version='2.0',
+                     read_table_kwargs=read_table_kwargs,
+                     use_deprecated_int96_timestamps=True)
+
+
+@pytest.mark.pandas
+@pytest.mark.parametrize('pq_reader_method', ['ParquetFile', 'read_table'])
+def test_coerce_int96_timestamp_overflow(pq_reader_method, tempdir):
+
+    def get_table(pq_reader_method, filename, **kwargs):
+        if pq_reader_method == "ParquetFile":
+            return pq.ParquetFile(filename, **kwargs).read()
+        elif pq_reader_method == "read_table":
+            return pq.read_table(filename, **kwargs)
+
+    # Recreating the initial JIRA issue referrenced in ARROW-12096
+    oob_dts = [
+        datetime.datetime(1000, 1, 1),
+        datetime.datetime(2000, 1, 1),
+        datetime.datetime(3000, 1, 1)
+    ]
+    df = pd.DataFrame({"a": oob_dts})
+    table = pa.table(df)
+
+    filename = tempdir / "test_round_trip_overflow.parquet"
+    pq.write_table(table, filename, use_deprecated_int96_timestamps=True,
+                   version="1.0")
+
+    # with the default resolution of ns, we get wrong values for INT96
+    # that are out of bounds for nanosecond range
+    tab_error = get_table(pq_reader_method, filename)
+    assert tab_error["a"].to_pylist() != oob_dts
+
+    # avoid this overflow by specifying the resolution to use for INT96 values
+    tab_correct = get_table(
+        pq_reader_method, filename, coerce_int96_timestamp_unit="s"
+    )
+    df_correct = tab_correct.to_pandas(timestamp_as_object=True)
+    tm.assert_frame_equal(df, df_correct)
+
+
 def test_timestamp_restore_timezone():
     # ARROW-5888, restore timezone from serialized metadata
     ty = pa.timestamp('ms', tz='America/New_York')
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 5679b8a6eb2..3ba8a467c40 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -138,8 +138,8 @@ def test_parquet_metadata_lifetime(tempdir):
     # ARROW-6642 - ensure that chained access keeps parent objects alive
     table = pa.table({'a': [1, 2, 3]})
     pq.write_table(table, tempdir / 'test_metadata_segfault.parquet')
-    dataset = pq.ParquetDataset(tempdir / 'test_metadata_segfault.parquet')
-    dataset.pieces[0].get_metadata().row_group(0).column(0).statistics
+    parquet_file = pq.ParquetFile(tempdir / 'test_metadata_segfault.parquet')
+    parquet_file.metadata.row_group(0).column(0).statistics
 
 
 @pytest.mark.pandas
@@ -291,10 +291,27 @@ def test_parquet_write_disable_statistics(tempdir):
 
 def test_field_id_metadata():
     # ARROW-7080
-    table = pa.table([pa.array([1], type='int32'),
-                      pa.array([[]], type=pa.list_(pa.int32())),
-                      pa.array([b'boo'], type='binary')],
-                     ['f0', 'f1', 'f2'])
+    field_id = b'PARQUET:field_id'
+    inner = pa.field('inner', pa.int32(), metadata={field_id: b'100'})
+    middle = pa.field('middle', pa.struct(
+        [inner]), metadata={field_id: b'101'})
+    fields = [
+        pa.field('basic', pa.int32(), metadata={
+                 b'other': b'abc', field_id: b'1'}),
+        pa.field(
+            'list',
+            pa.list_(pa.field('list-inner', pa.int32(),
+                              metadata={field_id: b'10'})),
+            metadata={field_id: b'11'}),
+        pa.field('struct', pa.struct([middle]), metadata={field_id: b'102'}),
+        pa.field('no-metadata', pa.int32()),
+        pa.field('non-integral-field-id', pa.int32(),
+                 metadata={field_id: b'xyz'}),
+        pa.field('negative-field-id', pa.int32(),
+                 metadata={field_id: b'-1000'})
+    ]
+    arrs = [[] for _ in fields]
+    table = pa.table(arrs, schema=pa.schema(fields))
 
     bio = pa.BufferOutputStream()
     pq.write_table(table, bio)
@@ -303,28 +320,29 @@ def test_field_id_metadata():
     pf = pq.ParquetFile(pa.BufferReader(contents))
     schema = pf.schema_arrow
 
-    # Expected Parquet schema for reference
-    #
-    # required group field_id=0 schema {
-    #   optional int32 field_id=1 f0;
-    #   optional group field_id=2 f1 (List) {
-    #     repeated group field_id=3 list {
-    #       optional int32 field_id=4 item;
-    #     }
-    #   }
-    #   optional binary field_id=5 f2;
-    # }
-
-    field_name = b'PARQUET:field_id'
-    assert schema[0].metadata[field_name] == b'1'
+    assert schema[0].metadata[field_id] == b'1'
+    assert schema[0].metadata[b'other'] == b'abc'
 
     list_field = schema[1]
-    assert list_field.metadata[field_name] == b'2'
+    assert list_field.metadata[field_id] == b'11'
 
     list_item_field = list_field.type.value_field
-    assert list_item_field.metadata[field_name] == b'4'
+    assert list_item_field.metadata[field_id] == b'10'
+
+    struct_field = schema[2]
+    assert struct_field.metadata[field_id] == b'102'
+
+    struct_middle_field = struct_field.type[0]
+    assert struct_middle_field.metadata[field_id] == b'101'
+
+    struct_inner_field = struct_middle_field.type[0]
+    assert struct_inner_field.metadata[field_id] == b'100'
 
-    assert schema[2].metadata[field_name] == b'5'
+    assert schema[3].metadata is None
+    # Invalid input is passed through (ok) but does not
+    # have field_id in parquet (not tested)
+    assert schema[4].metadata[field_id] == b'xyz'
+    assert schema[5].metadata[field_id] == b'-1000'
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 85f81a3423e..43175b72ab7 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -36,6 +36,8 @@
 except ImportError:
     pd = tm = None
 
+pytestmark = pytest.mark.parquet
+
 
 @pytest.mark.pandas
 def test_pass_separate_metadata():
@@ -256,3 +258,19 @@ def get_all_batches(f):
         )
 
         batch_no += 1
+
+
+@pytest.mark.pandas
+@pytest.mark.parametrize('pre_buffer', [False, True])
+def test_pre_buffer(pre_buffer):
+    N, K = 10000, 4
+    df = alltypes_sample(size=N)
+    a_table = pa.Table.from_pandas(df)
+
+    buf = io.BytesIO()
+    _write_table(a_table, buf, row_group_size=N / K,
+                 compression='snappy', version='2.0')
+
+    buf.seek(0)
+    pf = pq.ParquetFile(buf, pre_buffer=pre_buffer)
+    assert pf.read().num_rows == N
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index ec1d5256bfd..d766d276cc6 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -36,6 +36,8 @@
 except ImportError:
     pd = tm = None
 
+pytestmark = pytest.mark.parquet
+
 
 @pytest.mark.pandas
 @parametrize_legacy_dataset
@@ -204,6 +206,7 @@ def test_parquet_writer_filesystem_s3_uri(s3_example_fs):
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 def test_parquet_writer_filesystem_s3fs(s3_example_s3fs):
     df = _test_dataframe(100)
     table = pa.Table.from_pandas(df, preserve_index=False)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 01ee2977fec..0f137383378 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -413,6 +413,9 @@ def test_array_slice():
     with pytest.raises(IndexError):
         arr.slice(-1)
 
+    with pytest.raises(ValueError):
+        arr.slice(2, -1)
+
     # Test slice notation
     assert arr[2:].equals(arr.slice(2))
     assert arr[2:5].equals(arr.slice(2, 3))
@@ -421,7 +424,11 @@ def test_array_slice():
     n = len(arr)
     for start in range(-n * 2, n * 2):
         for stop in range(-n * 2, n * 2):
-            assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop]
+            res = arr[start:stop]
+            res.validate()
+            expected = arr.to_pylist()[start:stop]
+            assert res.to_pylist() == expected
+            assert res.to_numpy().tolist() == expected
 
 
 def test_array_slice_negative_step():
@@ -522,6 +529,9 @@ def test_array_eq():
     assert (arr1 == arr3) is False
     assert (arr1 != arr3) is True
 
+    assert (arr1 == 1) is False
+    assert (arr1 == None) is False  # noqa: E711
+
 
 def test_array_from_buffers():
     values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
@@ -665,6 +675,38 @@ def test_struct_from_arrays():
     with pytest.raises(ValueError, match="int64 vs int32"):
         pa.StructArray.from_arrays([a, b, c], fields=[fa2, fb, fc])
 
+    arrays = [a, b, c]
+    fields = [fa, fb, fc]
+    # With mask
+    mask = pa.array([True, False, False])
+    arr = pa.StructArray.from_arrays(arrays, fields=fields, mask=mask)
+    assert arr.to_pylist() == [None] + expected_list[1:]
+
+    arr = pa.StructArray.from_arrays(arrays, names=['a', 'b', 'c'], mask=mask)
+    assert arr.to_pylist() == [None] + expected_list[1:]
+
+    # Bad masks
+    with pytest.raises(ValueError, match='Mask must be'):
+        pa.StructArray.from_arrays(arrays, fields, mask=[True, False, False])
+
+    with pytest.raises(ValueError, match='not contain nulls'):
+        pa.StructArray.from_arrays(
+            arrays, fields, mask=pa.array([True, False, None]))
+
+    with pytest.raises(ValueError, match='Mask must be'):
+        pa.StructArray.from_arrays(
+            arrays, fields, mask=pa.chunked_array([mask]))
+
+
+def test_struct_array_from_chunked():
+    # ARROW-11780
+    # Check that we don't segfault when trying to build
+    # a StructArray from a chunked array.
+    chunked_arr = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
+
+    with pytest.raises(TypeError, match="Expected Array"):
+        pa.StructArray.from_arrays([chunked_arr], ["foo"])
+
 
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
@@ -688,6 +730,76 @@ def test_dictionary_from_numpy():
             assert d2[i].as_py() == dictionary[indices[i]]
 
 
+def test_dictionary_to_numpy():
+    expected = pa.array(
+        ["foo", "bar", None, "foo"]
+    ).to_numpy(zero_copy_only=False)
+    a = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, None, 0]),
+        pa.array(['foo', 'bar'])
+    )
+    np.testing.assert_array_equal(a.to_numpy(zero_copy_only=False),
+                                  expected)
+
+    with pytest.raises(pa.ArrowInvalid):
+        # If this would be changed to no longer raise in the future,
+        # ensure to test the actual result because, currently, to_numpy takes
+        # for granted that when zero_copy_only=True there will be no nulls
+        # (it's the decoding of the DictionaryArray that handles the nulls and
+        # this is only activated with zero_copy_only=False)
+        a.to_numpy(zero_copy_only=True)
+
+    anonulls = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, 1, 0]),
+        pa.array(['foo', 'bar'])
+    )
+    expected = pa.array(
+        ["foo", "bar", "bar", "foo"]
+    ).to_numpy(zero_copy_only=False)
+    np.testing.assert_array_equal(anonulls.to_numpy(zero_copy_only=False),
+                                  expected)
+
+    with pytest.raises(pa.ArrowInvalid):
+        anonulls.to_numpy(zero_copy_only=True)
+
+    afloat = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, 1, 0]),
+        pa.array([13.7, 11.0])
+    )
+    expected = pa.array([13.7, 11.0, 11.0, 13.7]).to_numpy()
+    np.testing.assert_array_equal(afloat.to_numpy(zero_copy_only=True),
+                                  expected)
+    np.testing.assert_array_equal(afloat.to_numpy(zero_copy_only=False),
+                                  expected)
+
+    afloat2 = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, None, 0]),
+        pa.array([13.7, 11.0])
+    )
+    expected = pa.array(
+        [13.7, 11.0, None, 13.7]
+    ).to_numpy(zero_copy_only=False)
+    np.testing.assert_allclose(
+        afloat2.to_numpy(zero_copy_only=False),
+        expected,
+        equal_nan=True
+    )
+
+    # Testing for integers can reveal problems related to dealing
+    # with None values, as a numpy array of int dtype
+    # can't contain NaN nor None.
+    aints = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, None, 0]),
+        pa.array([7, 11])
+    )
+    expected = pa.array([7, 11, None, 7]).to_numpy(zero_copy_only=False)
+    np.testing.assert_allclose(
+        aints.to_numpy(zero_copy_only=False),
+        expected,
+        equal_nan=True
+    )
+
+
 def test_dictionary_from_boxed_arrays():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -849,6 +961,25 @@ def test_fixed_size_list_from_arrays():
         pa.FixedSizeListArray.from_arrays(values, 5)
 
 
+def test_variable_list_from_arrays():
+    values = pa.array([1, 2, 3, 4], pa.int64())
+    offsets = pa.array([0, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], [3, 4]]
+    assert result.type.equals(pa.list_(pa.int64()))
+
+    offsets = pa.array([0, None, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], None, [3, 4]]
+
+    # raise if offset out of bounds
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([-1, 2, 4]), values)
+
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([0, 2, 5]), values)
+
+
 def test_union_from_dense():
     binary = pa.array([b'a', b'b', b'c', b'd'], type='binary')
     int64 = pa.array([1, 2, 3], type='int64')
@@ -1358,12 +1489,13 @@ def test_cast_from_null():
         pa.struct([pa.field('a', pa.int32()),
                    pa.field('b', pa.list_(pa.int8())),
                    pa.field('c', pa.string())]),
+        pa.dictionary(pa.int32(), pa.string()),
     ]
     for out_type in out_types:
         _check_cast_case((in_data, in_type, in_data, out_type))
 
     out_types = [
-        pa.dictionary(pa.int32(), pa.string()),
+
         pa.union([pa.field('a', pa.binary(10)),
                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
         pa.union([pa.field('a', pa.binary(10)),
@@ -1392,12 +1524,26 @@ def test_cast_string_to_number_roundtrip():
 
 
 def test_cast_dictionary():
-    arr = pa.DictionaryArray.from_arrays(
-        pa.array([0, 1, None], type=pa.int32()),
-        pa.array(["foo", "bar"]))
-    assert arr.cast(pa.string()).equals(pa.array(["foo", "bar", None]))
+    # cast to the value type
+    arr = pa.array(
+        ["foo", "bar", None],
+        type=pa.dictionary(pa.int64(), pa.string())
+    )
+    expected = pa.array(["foo", "bar", None])
+    assert arr.type == pa.dictionary(pa.int64(), pa.string())
+    assert arr.cast(pa.string()) == expected
+
+    # cast to a different key type
+    for key_type in [pa.int8(), pa.int16(), pa.int32()]:
+        typ = pa.dictionary(key_type, pa.string())
+        expected = pa.array(
+            ["foo", "bar", None],
+            type=pa.dictionary(key_type, pa.string())
+        )
+        assert arr.cast(typ) == expected
+
+    # shouldn't crash (ARROW-7077)
     with pytest.raises(pa.ArrowInvalid):
-        # Shouldn't crash (ARROW-7077)
         arr.cast(pa.int32())
 
 
@@ -1528,6 +1674,26 @@ def test_dictionary_encode_zero_length():
     encoded.validate(full=True)
 
 
+def test_dictionary_decode():
+    cases = [
+        (pa.array([1, 2, 3, None, 1, 2, 3]),
+         pa.DictionaryArray.from_arrays(
+             pa.array([0, 1, 2, None, 0, 1, 2], type='int32'),
+             [1, 2, 3])),
+        (pa.array(['foo', None, 'bar', 'foo']),
+         pa.DictionaryArray.from_arrays(
+             pa.array([0, None, 1, 0], type='int32'),
+             ['foo', 'bar'])),
+        (pa.array(['foo', None, 'bar', 'foo'], type=pa.large_binary()),
+         pa.DictionaryArray.from_arrays(
+             pa.array([0, None, 1, 0], type='int32'),
+             pa.array(['foo', 'bar'], type=pa.large_binary()))),
+    ]
+    for expected, arr in cases:
+        result = arr.dictionary_decode()
+        assert result.equals(expected)
+
+
 def test_cast_time32_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int32'),
                    type=pa.time32('s'))
@@ -2077,6 +2243,23 @@ def test_array_from_strided_bool():
     assert result.equals(expected)
 
 
+def test_array_from_strided():
+    pydata = [
+        ([b"ab", b"cd", b"ef"], (pa.binary(), pa.binary(2))),
+        ([1, 2, 3], (pa.int8(), pa.int16(), pa.int32(), pa.int64())),
+        ([1.0, 2.0, 3.0], (pa.float32(), pa.float64())),
+        (["ab", "cd", "ef"], (pa.utf8(), ))
+    ]
+
+    for values, dtypes in pydata:
+        nparray = np.array(values)
+        for patype in dtypes:
+            for mask in (None, np.array([False, False])):
+                arrow_array = pa.array(nparray[::2], patype,
+                                       mask=mask)
+                assert values[::2] == arrow_array.to_pylist()
+
+
 def test_boolean_true_count_false_count():
     # ARROW-9145
     arr = pa.array([True, True, None, False, None, True] * 1000)
@@ -2478,6 +2661,7 @@ def test_array_from_numpy_str_utf8():
         pa.array(vec, pa.string(), mask=np.array([False]))
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 def test_numpy_binary_overflow_to_chunked():
     # ARROW-3762, ARROW-5966
@@ -2562,6 +2746,51 @@ def test_array_masked():
     assert arr.type == pa.int64()
 
 
+def test_binary_array_masked():
+    # ARROW-12431
+    masked_basic = pa.array([b'\x05'], type=pa.binary(1),
+                            mask=np.array([False]))
+    assert [b'\x05'] == masked_basic.to_pylist()
+
+    # Fixed Length Binary
+    masked = pa.array(np.array([b'\x05']), type=pa.binary(1),
+                      mask=np.array([False]))
+    assert [b'\x05'] == masked.to_pylist()
+
+    masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(1),
+                            mask=np.array([True]))
+    assert [None] == masked_nulls.to_pylist()
+
+    # Variable Length Binary
+    masked = pa.array(np.array([b'\x05']), type=pa.binary(),
+                      mask=np.array([False]))
+    assert [b'\x05'] == masked.to_pylist()
+
+    masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(),
+                            mask=np.array([True]))
+    assert [None] == masked_nulls.to_pylist()
+
+    # Fixed Length Binary, copy
+    npa = np.array([b'aaa', b'bbb', b'ccc']*10)
+    arrow_array = pa.array(npa, type=pa.binary(3),
+                           mask=np.array([False, False, False]*10))
+    npa[npa == b"bbb"] = b"XXX"
+    assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist()
+
+
+def test_binary_array_strided():
+    # Masked
+    nparray = np.array([b"ab", b"cd", b"ef"])
+    arrow_array = pa.array(nparray[::2], pa.binary(2),
+                           mask=np.array([False, False]))
+    assert [b"ab", b"ef"] == arrow_array.to_pylist()
+
+    # Unmasked
+    nparray = np.array([b"ab", b"cd", b"ef"])
+    arrow_array = pa.array(nparray[::2], pa.binary(2))
+    assert [b"ab", b"ef"] == arrow_array.to_pylist()
+
+
 def test_array_invalid_mask_raises():
     # ARROW-10742
     cases = [
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 5505e571645..db0da5652df 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -105,6 +105,30 @@ def test_export_import_type():
         pa.DataType._import_from_c(ptr_schema)
 
 
+@needs_cffi
+def test_export_import_field():
+    c_schema = ffi.new("struct ArrowSchema*")
+    ptr_schema = int(ffi.cast("uintptr_t", c_schema))
+
+    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
+    old_allocated = pa.total_allocated_bytes()
+
+    field = pa.field("test", pa.list_(pa.int32()), nullable=True)
+    field._export_to_c(ptr_schema)
+    assert pa.total_allocated_bytes() > old_allocated
+    # Delete and recreate C++ object from exported pointer
+    del field
+    assert pa.total_allocated_bytes() > old_allocated
+
+    field_new = pa.Field._import_from_c(ptr_schema)
+    assert field_new == pa.field("test", pa.list_(pa.int32()), nullable=True)
+    assert pa.total_allocated_bytes() == old_allocated
+
+    # Now released
+    with assert_schema_released:
+        pa.Field._import_from_c(ptr_schema)
+
+
 @needs_cffi
 def test_export_import_array():
     c_schema = ffi.new("struct ArrowSchema*")
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 94a6189f41c..b0baa76e50a 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -25,6 +25,11 @@
 
 import numpy as np
 
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
+
 import pyarrow as pa
 import pyarrow.compute as pc
 
@@ -106,6 +111,45 @@ def test_exported_option_classes():
                                       param.VAR_KEYWORD)
 
 
+def test_option_class_equality():
+    options = [
+        pc.CastOptions.safe(pa.int8()),
+        pc.ExtractRegexOptions("pattern"),
+        pc.IndexOptions(pa.scalar(1)),
+        pc.MatchSubstringOptions("pattern"),
+        pc.PadOptions(5, " "),
+        pc.PartitionNthOptions(1),
+        pc.MakeStructOptions(["field", "names"]),
+        pc.DayOfWeekOptions(False, 0),
+        pc.ReplaceSliceOptions(start=0, stop=1, replacement="a"),
+        pc.ReplaceSubstringOptions("a", "b"),
+        pc.SetLookupOptions(value_set=pa.array([1])),
+        pc.SliceOptions(start=0, stop=1, step=1),
+        pc.SplitPatternOptions(pattern="pattern"),
+        pc.StrptimeOptions("%Y", "s"),
+        pc.TrimOptions(" "),
+    ]
+    classes = {type(option) for option in options}
+    for cls in exported_option_classes:
+        if cls not in classes:
+            try:
+                options.append(cls())
+            except TypeError:
+                pytest.fail(f"Options class is not tested: {cls}")
+    for option in options:
+        assert option == option
+        assert repr(option).startswith(option.__class__.__name__)
+        buf = option.serialize()
+        deserialized = pc.FunctionOptions.deserialize(buf)
+        assert option == deserialized
+        assert repr(option) == repr(deserialized)
+    for option1, option2 in zip(options, options[1:]):
+        assert option1 != option2
+
+    assert repr(pc.IndexOptions(pa.scalar(1))) == "IndexOptions(value=int64:1)"
+    assert repr(pc.ArraySortOptions()) == "ArraySortOptions(order=Ascending)"
+
+
 def test_list_functions():
     assert len(pc.list_functions()) > 10
     assert "add" in pc.list_functions()
@@ -206,8 +250,20 @@ def test_sum_array(arrow_type):
     assert arr.sum().as_py() == 10
     assert pc.sum(arr).as_py() == 10
 
+    arr = pa.array([1, 2, 3, 4, None], type=arrow_type)
+    assert arr.sum().as_py() == 10
+    assert pc.sum(arr).as_py() == 10
+
+    arr = pa.array([None], type=arrow_type)
+    assert arr.sum().as_py() is None  # noqa: E711
+    assert pc.sum(arr).as_py() is None  # noqa: E711
+    assert arr.sum(min_count=0).as_py() == 0
+    assert pc.sum(arr, min_count=0).as_py() == 0
+
     arr = pa.array([], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
+    assert arr.sum(min_count=0).as_py() == 0
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
 
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
@@ -230,6 +286,7 @@ def test_sum_chunked_array(arrow_type):
     arr = pa.chunked_array((), type=arrow_type)
     assert arr.num_chunks == 0
     assert pc.sum(arr).as_py() is None  # noqa: E711
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
 
 def test_mode_array():
@@ -272,12 +329,80 @@ def test_variance():
     assert pc.variance(data, ddof=1).as_py() == 6.0
 
 
+def test_count_substring():
+    for (ty, offset) in [(pa.string(), pa.int32()),
+                         (pa.large_string(), pa.int64())]:
+        arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None], type=ty)
+
+        result = pc.count_substring(arr, "ab")
+        expected = pa.array([1, 1, 2, 0, 0, None], type=offset)
+        assert expected.equals(result)
+
+        result = pc.count_substring(arr, "ab", ignore_case=True)
+        expected = pa.array([1, 1, 2, 0, 1, None], type=offset)
+        assert expected.equals(result)
+
+
+def test_count_substring_regex():
+    for (ty, offset) in [(pa.string(), pa.int32()),
+                         (pa.large_string(), pa.int64())]:
+        arr = pa.array(["ab", "cab", "baAacaa", "ba", "AB", None], type=ty)
+
+        result = pc.count_substring_regex(arr, "a+")
+        expected = pa.array([1, 1, 3, 1, 0, None], type=offset)
+        assert expected.equals(result)
+
+        result = pc.count_substring_regex(arr, "a+", ignore_case=True)
+        expected = pa.array([1, 1, 2, 1, 1, None], type=offset)
+        assert expected.equals(result)
+
+
+def test_find_substring():
+    for ty in [pa.string(), pa.binary(), pa.large_string(), pa.large_binary()]:
+        arr = pa.array(["ab", "cab", "ba", None], type=ty)
+        result = pc.find_substring(arr, "ab")
+        assert result.to_pylist() == [0, 1, -1, None]
+
+        result = pc.find_substring_regex(arr, "a?b")
+        assert result.to_pylist() == [0, 1, 0, None]
+
+        arr = pa.array(["ab*", "cAB*", "ba", "aB?"], type=ty)
+        result = pc.find_substring(arr, "aB*", ignore_case=True)
+        assert result.to_pylist() == [0, 1, -1, -1]
+
+        result = pc.find_substring_regex(arr, "a?b", ignore_case=True)
+        assert result.to_pylist() == [0, 1, 0, 0]
+
+
+def test_match_like():
+    arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
+    result = pc.match_like(arr, r"_a\%%")
+    expected = pa.array([False, True, False, True, None])
+    assert expected.equals(result)
+
+    arr = pa.array(["aB", "bA%", "ba", "ca%d", None])
+    result = pc.match_like(arr, r"_a\%%", ignore_case=True)
+    expected = pa.array([False, True, False, True, None])
+    assert expected.equals(result)
+    result = pc.match_like(arr, r"_a\%%", ignore_case=False)
+    expected = pa.array([False, False, False, True, None])
+    assert expected.equals(result)
+
+
 def test_match_substring():
     arr = pa.array(["ab", "abc", "ba", None])
     result = pc.match_substring(arr, "ab")
     expected = pa.array([True, True, False, None])
     assert expected.equals(result)
 
+    arr = pa.array(["áB", "Ábc", "ba", None])
+    result = pc.match_substring(arr, "áb", ignore_case=True)
+    expected = pa.array([True, True, False, None])
+    assert expected.equals(result)
+    result = pc.match_substring(arr, "áb", ignore_case=False)
+    expected = pa.array([False, False, False, None])
+    assert expected.equals(result)
+
 
 def test_match_substring_regex():
     arr = pa.array(["ab", "abc", "ba", "c", None])
@@ -285,6 +410,14 @@ def test_match_substring_regex():
     expected = pa.array([True, True, True, False, None])
     assert expected.equals(result)
 
+    arr = pa.array(["aB", "Abc", "BA", "c", None])
+    result = pc.match_substring_regex(arr, "^a?b", ignore_case=True)
+    expected = pa.array([True, True, True, False, None])
+    assert expected.equals(result)
+    result = pc.match_substring_regex(arr, "^a?b", ignore_case=False)
+    expected = pa.array([False, False, False, False, None])
+    assert expected.equals(result)
+
 
 def test_trim():
     # \u3000 is unicode whitespace
@@ -304,6 +437,18 @@ def test_trim():
     assert expected.equals(result)
 
 
+def test_slice_compatibility():
+    arr = pa.array(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])
+    for start in range(-6, 6):
+        for stop in range(-6, 6):
+            for step in [-3, -2, -1, 1, 2, 3]:
+                expected = pa.array([k.as_py()[start:stop:step]
+                                     for k in arr])
+                result = pc.utf8_slice_codeunits(
+                    arr, start=start, stop=stop, step=step)
+                assert expected.equals(result)
+
+
 def test_split_pattern():
     arr = pa.array(["-foo---bar--", "---foo---b"])
     result = pc.split_pattern(arr, pattern="---")
@@ -349,29 +494,45 @@ def test_split_whitespace_ascii():
     assert expected.equals(result)
 
 
+def test_split_pattern_regex():
+    arr = pa.array(["-foo---bar--", "---foo---b"])
+    result = pc.split_pattern_regex(arr, pattern="-+")
+    expected = pa.array([["", "foo", "bar", ""], ["", "foo", "b"]])
+    assert expected.equals(result)
+
+    result = pc.split_pattern_regex(arr, pattern="-+", max_splits=1)
+    expected = pa.array([["", "foo---bar--"], ["", "foo---b"]])
+    assert expected.equals(result)
+
+    with pytest.raises(NotImplementedError,
+                       match="Cannot split in reverse with regex"):
+        result = pc.split_pattern_regex(
+            arr, pattern="---", max_splits=1, reverse=True)
+
+
 def test_min_max():
     # An example generated function wrapper with possible options
     data = [4, 5, 6, None, 1]
     s = pc.min_max(data)
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.MinMaxOptions())
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions())
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.MinMaxOptions(null_handling='skip'))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.MinMaxOptions(null_handling='emit_null'))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
     assert s.as_py() == {'min': None, 'max': None}
 
     # Options as dict of kwargs
-    s = pc.min_max(data, options={'null_handling': 'emit_null'})
+    s = pc.min_max(data, options={'skip_nulls': False})
     assert s.as_py() == {'min': None, 'max': None}
     # Options as named functions arguments
-    s = pc.min_max(data, null_handling='emit_null')
+    s = pc.min_max(data, skip_nulls=False)
     assert s.as_py() == {'min': None, 'max': None}
 
     # Both options and named arguments
     with pytest.raises(TypeError):
-        s = pc.min_max(data, options=pc.MinMaxOptions(),
-                       null_handling='emit_null')
+        s = pc.min_max(
+            data, options=pc.ScalarAggregateOptions(), skip_nulls=False)
 
     # Wrong options type
     options = pc.TakeOptions()
@@ -380,37 +541,47 @@ def test_min_max():
 
     # Missing argument
     with pytest.raises(
-            TypeError,
-            match=r"min_max\(\) missing 1 required positional argument"):
+            ValueError,
+            match=r"Function min_max accepts 1 argument"):
         s = pc.min_max()
 
 
 def test_any():
     # ARROW-1846
+
+    options = pc.ScalarAggregateOptions(skip_nulls=False)
     a = pa.array([False, None, True])
     assert pc.any(a).as_py() is True
+    assert pc.any(a, options=options).as_py() is True
 
     a = pa.array([False, None, False])
     assert pc.any(a).as_py() is False
+    assert pc.any(a, options=options).as_py() is None
 
 
 def test_all():
     # ARROW-10301
 
+    options = pc.ScalarAggregateOptions(skip_nulls=False)
     a = pa.array([], type='bool')
     assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is True
 
     a = pa.array([False, True])
     assert pc.all(a).as_py() is False
+    assert pc.all(a, options=options).as_py() is False
 
     a = pa.array([True, None])
     assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [True, None]])
     assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [False]])
     assert pc.all(a).as_py() is False
+    assert pc.all(a, options=options).as_py() is False
 
 
 def test_is_valid():
@@ -427,7 +598,7 @@ def test_generated_docstrings():
         Compute the minimum and maximum values of a numeric array.
 
         Null values are ignored by default.
-        This can be changed through MinMaxOptions.
+        This can be changed through ScalarAggregateOptions.
 
         Parameters
         ----------
@@ -435,10 +606,10 @@ def test_generated_docstrings():
             Argument to compute function
         memory_pool : pyarrow.MemoryPool, optional
             If not passed, will allocate memory from the default memory pool.
-        options : pyarrow.compute.MinMaxOptions, optional
+        options : pyarrow.compute.ScalarAggregateOptions, optional
             Parameters altering compute function semantics
         **kwargs : optional
-            Parameters for MinMaxOptions constructor.  Either `options`
+            Parameters for ScalarAggregateOptions constructor. Either `options`
             or `**kwargs` can be passed, but not both at the same time.
         """)
     assert pc.add.__doc__ == textwrap.dedent("""\
@@ -459,6 +630,22 @@ def test_generated_docstrings():
         """)
 
 
+def test_generated_signatures():
+    # The self-documentation provided by signatures should show acceptable
+    # options and their default values.
+    sig = inspect.signature(pc.add)
+    assert str(sig) == "(x, y, *, memory_pool=None)"
+    sig = inspect.signature(pc.min_max)
+    assert str(sig) == ("(array, *, memory_pool=None, "
+                        "options=None, skip_nulls=True, min_count=1)")
+    sig = inspect.signature(pc.quantile)
+    assert str(sig) == ("(array, *, memory_pool=None, "
+                        "options=None, q=0.5, interpolation='linear')")
+    sig = inspect.signature(pc.binary_join_element_wise)
+    assert str(sig) == ("(*strings, memory_pool=None, options=None, "
+                        "null_handling='emit_null', null_replacement='')")
+
+
 # We use isprintable to find about codepoints that Python doesn't know, but
 # utf8proc does (or in a future version of Python the other way around).
 # These codepoints cannot be compared between Arrow and the Python
@@ -586,6 +773,41 @@ def test_string_py_compat_boolean(function_name, variant):
             assert arrow_func(ar)[0].as_py() == getattr(c, py_name)()
 
 
+def test_pad():
+    arr = pa.array([None, 'a', 'abcd'])
+    assert pc.ascii_center(arr, width=3).tolist() == [None, ' a ', 'abcd']
+    assert pc.ascii_lpad(arr, width=3).tolist() == [None, '  a', 'abcd']
+    assert pc.ascii_rpad(arr, width=3).tolist() == [None, 'a  ', 'abcd']
+
+    arr = pa.array([None, 'á', 'abcd'])
+    assert pc.utf8_center(arr, width=3).tolist() == [None, ' á ', 'abcd']
+    assert pc.utf8_lpad(arr, width=3).tolist() == [None, '  á', 'abcd']
+    assert pc.utf8_rpad(arr, width=3).tolist() == [None, 'á  ', 'abcd']
+
+
+@pytest.mark.pandas
+def test_replace_slice():
+    offsets = range(-3, 4)
+
+    arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde'])
+    series = arr.to_pandas()
+    for start in offsets:
+        for stop in offsets:
+            expected = series.str.slice_replace(start, stop, 'XX')
+            actual = pc.binary_replace_slice(
+                arr, start=start, stop=stop, replacement='XX')
+            assert actual.tolist() == expected.tolist()
+
+    arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde'])
+    series = arr.to_pandas()
+    for start in offsets:
+        for stop in offsets:
+            expected = series.str.slice_replace(start, stop, 'XX')
+            actual = pc.utf8_replace_slice(
+                arr, start=start, stop=stop, replacement='XX')
+            assert actual.tolist() == expected.tolist()
+
+
 def test_replace_plain():
     ar = pa.array(['foo', 'food', None])
     ar = pc.replace_substring(ar, pattern='foo', replacement='bar')
@@ -598,6 +820,54 @@ def test_replace_regex():
     assert ar.tolist() == ['f00', 'm00d', None]
 
 
+def test_extract_regex():
+    ar = pa.array(['a1', 'zb2z'])
+    struct = pc.extract_regex(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d)')
+    assert struct.tolist() == [{'letter': 'a', 'digit': '1'}, {
+        'letter': 'b', 'digit': '2'}]
+
+
+def test_binary_join():
+    ar_list = pa.array([['foo', 'bar'], None, []])
+    expected = pa.array(['foo-bar', None, ''])
+    assert pc.binary_join(ar_list, '-').equals(expected)
+
+    separator_array = pa.array(['1', '2'], type=pa.binary())
+    expected = pa.array(['a1b', 'c2d'], type=pa.binary())
+    ar_list = pa.array([['a', 'b'], ['c', 'd']], type=pa.list_(pa.binary()))
+    assert pc.binary_join(ar_list, separator_array).equals(expected)
+
+
+def test_binary_join_element_wise():
+    null = pa.scalar(None, type=pa.string())
+    arrs = [[None, 'a', 'b'], ['c', None, 'd'], [None, '-', '--']]
+    assert pc.binary_join_element_wise(*arrs).to_pylist() == \
+        [None, None, 'b--d']
+    assert pc.binary_join_element_wise('a', 'b', '-').as_py() == 'a-b'
+    assert pc.binary_join_element_wise('a', null, '-').as_py() is None
+    assert pc.binary_join_element_wise('a', 'b', null).as_py() is None
+
+    skip = pc.JoinOptions('skip')
+    assert pc.binary_join_element_wise(*arrs, options=skip).to_pylist() == \
+        [None, 'a', 'b--d']
+    assert pc.binary_join_element_wise(
+        'a', 'b', '-', options=skip).as_py() == 'a-b'
+    assert pc.binary_join_element_wise(
+        'a', null, '-', options=skip).as_py() == 'a'
+    assert pc.binary_join_element_wise(
+        'a', 'b', null, options=skip).as_py() is None
+
+    replace = pc.JoinOptions('replace', null_replacement='spam')
+    assert pc.binary_join_element_wise(*arrs, options=replace).to_pylist() == \
+        [None, 'a-spam', 'b--d']
+    assert pc.binary_join_element_wise(
+        'a', 'b', '-', options=replace).as_py() == 'a-b'
+    assert pc.binary_join_element_wise(
+        'a', null, '-', options=replace).as_py() == 'a-spam'
+    assert pc.binary_join_element_wise(
+        'a', 'b', null, options=replace).as_py() is None
+
+
 @pytest.mark.parametrize(('ty', 'values'), all_array_types)
 def test_take(ty, values):
     arr = pa.array(values, type=ty)
@@ -994,6 +1264,11 @@ def test_fill_null():
     expected = pa.array([b'a', b'bb', b'ccc'], type=pa.large_binary())
     assert result.equals(expected)
 
+    arr = pa.array(['a', 'bb', None])
+    result = arr.fill_null(None)
+    expected = pa.array(['a', 'bb', None])
+    assert result.equals(expected)
+
 
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
 def test_fill_null_array(arrow_type):
@@ -1072,6 +1347,11 @@ def test_cast():
     expected = pa.array([1262304000000, 1420070400000], type='timestamp[ms]')
     assert pc.cast(arr, 'timestamp[ms]') == expected
 
+    arr = pa.array([[1, 2], [3, 4, 5]], type=pa.large_list(pa.int8()))
+    expected = pa.array([["1", "2"], ["3", "4", "5"]],
+                        type=pa.list_(pa.utf8()))
+    assert pc.cast(arr, expected.type) == expected
+
 
 def test_strptime():
     arr = pa.array(["5/1/2020", None, "12/13/1900"])
@@ -1082,14 +1362,102 @@ def test_strptime():
     assert got == expected
 
 
+def _check_datetime_components(timestamps, timezone=None):
+    from pyarrow.vendored.version import Version
+
+    ts = pd.to_datetime(timestamps).to_series()
+    tsa = pa.array(ts)
+
+    subseconds = ((ts.dt.microsecond * 10**3 +
+                   ts.dt.nanosecond) * 10**-9).round(9)
+    iso_calendar_fields = [
+        pa.field('iso_year', pa.int64()),
+        pa.field('iso_week', pa.int64()),
+        pa.field('iso_day_of_week', pa.int64())
+    ]
+
+    if Version(pd.__version__) < Version("1.1.0"):
+        # https://github.com/pandas-dev/pandas/issues/33206
+        iso_year = ts.map(lambda x: x.isocalendar()[0]).astype("int64")
+        iso_week = ts.map(lambda x: x.isocalendar()[1]).astype("int64")
+        iso_day = ts.map(lambda x: x.isocalendar()[2]).astype("int64")
+    else:
+        # Casting is required because pandas isocalendar returns int32
+        # while arrow isocalendar returns int64.
+        iso_year = ts.dt.isocalendar()["year"].astype("int64")
+        iso_week = ts.dt.isocalendar()["week"].astype("int64")
+        iso_day = ts.dt.isocalendar()["day"].astype("int64")
+
+    iso_calendar = pa.StructArray.from_arrays(
+        [iso_year, iso_week, iso_day],
+        fields=iso_calendar_fields)
+
+    assert pc.year(tsa).equals(pa.array(ts.dt.year))
+    assert pc.month(tsa).equals(pa.array(ts.dt.month))
+    assert pc.day(tsa).equals(pa.array(ts.dt.day))
+    assert pc.day_of_week(tsa).equals(pa.array(ts.dt.dayofweek))
+    assert pc.day_of_year(tsa).equals(pa.array(ts.dt.dayofyear))
+    assert pc.iso_year(tsa).equals(pa.array(iso_year))
+    assert pc.iso_week(tsa).equals(pa.array(iso_week))
+    assert pc.iso_calendar(tsa).equals(iso_calendar)
+    assert pc.quarter(tsa).equals(pa.array(ts.dt.quarter))
+    assert pc.hour(tsa).equals(pa.array(ts.dt.hour))
+    assert pc.minute(tsa).equals(pa.array(ts.dt.minute))
+    assert pc.second(tsa).equals(pa.array(ts.dt.second.values))
+    assert pc.millisecond(tsa).equals(pa.array(ts.dt.microsecond // 10**3))
+    assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10**3))
+    assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond))
+    assert pc.subsecond(tsa).equals(pa.array(subseconds))
+
+    day_of_week_options = pc.DayOfWeekOptions(
+        one_based_numbering=True, week_start=1)
+    assert pc.day_of_week(tsa, options=day_of_week_options).equals(
+        pa.array(ts.dt.dayofweek+1))
+
+
+@pytest.mark.pandas
+def test_extract_datetime_components():
+    timestamps = ["1970-01-01T00:00:59.123456789",
+                  "2000-02-29T23:23:23.999999999",
+                  "2033-05-18T03:33:20.000000000",
+                  "2020-01-01T01:05:05.001",
+                  "2019-12-31T02:10:10.002",
+                  "2019-12-30T03:15:15.003",
+                  "2009-12-31T04:20:20.004132",
+                  "2010-01-01T05:25:25.005321",
+                  "2010-01-03T06:30:30.006163",
+                  "2010-01-04T07:35:35",
+                  "2006-01-01T08:40:40",
+                  "2005-12-31T09:45:45",
+                  "2008-12-28",
+                  "2008-12-29",
+                  "2012-01-01 01:02:03"]
+
+    _check_datetime_components(timestamps)
+
+
 def test_count():
     arr = pa.array([1, 2, 3, None, None])
     assert pc.count(arr).as_py() == 3
-    assert pc.count(arr, count_mode='count_non_null').as_py() == 3
-    assert pc.count(arr, count_mode='count_null').as_py() == 2
+    assert pc.count(arr, skip_nulls=True).as_py() == 3
+    assert pc.count(arr, skip_nulls=False).as_py() == 2
+
+    with pytest.raises(TypeError, match="an integer is required"):
+        pc.count(arr, min_count='zzz')
 
-    with pytest.raises(ValueError, match="'zzz' is not a valid count_mode"):
-        pc.count(arr, count_mode='zzz')
+
+def test_index():
+    arr = pa.array([0, 1, None, 3, 4], type=pa.int64())
+    assert pc.index(arr, pa.scalar(0)).as_py() == 0
+    assert pc.index(arr, pa.scalar(2, type=pa.int8())).as_py() == -1
+    assert pc.index(arr, 4).as_py() == 4
+    assert arr.index(3, start=2).as_py() == 3
+    assert arr.index(None).as_py() == -1
+
+    arr = pa.chunked_array([[1, 2], [1, 3]], type=pa.int64())
+    assert arr.index(1).as_py() == 0
+    assert arr.index(1, start=2).as_py() == 2
+    assert arr.index(1, start=1, end=2).as_py() == -1
 
 
 def test_partition_nth():
@@ -1241,3 +1609,70 @@ def test_tdigest():
     arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
     result = pc.tdigest(arr, q=[0, 0.5, 1])
     assert result.to_pylist() == [1, 2.5, 4]
+
+
+def test_fill_null_segfault():
+    # ARROW-12672
+    arr = pa.array([None], pa.bool_()).fill_null(False)
+    result = arr.cast(pa.int8())
+    assert result == pa.array([0], pa.int8())
+
+
+def test_min_max_element_wise():
+    arr1 = pa.array([1, 2, 3])
+    arr2 = pa.array([3, 1, 2])
+    arr3 = pa.array([2, 3, None])
+
+    result = pc.max_element_wise(arr1, arr2)
+    assert result == pa.array([3, 2, 3])
+    result = pc.min_element_wise(arr1, arr2)
+    assert result == pa.array([1, 1, 2])
+
+    result = pc.max_element_wise(arr1, arr2, arr3)
+    assert result == pa.array([3, 3, 3])
+    result = pc.min_element_wise(arr1, arr2, arr3)
+    assert result == pa.array([1, 1, 2])
+
+    # with specifying the option
+    result = pc.max_element_wise(arr1, arr3, skip_nulls=True)
+    assert result == pa.array([2, 3, 3])
+    result = pc.min_element_wise(arr1, arr3, skip_nulls=True)
+    assert result == pa.array([1, 2, 3])
+    result = pc.max_element_wise(
+        arr1, arr3, options=pc.ElementWiseAggregateOptions())
+    assert result == pa.array([2, 3, 3])
+    result = pc.min_element_wise(
+        arr1, arr3, options=pc.ElementWiseAggregateOptions())
+    assert result == pa.array([1, 2, 3])
+
+    # not skipping nulls
+    result = pc.max_element_wise(arr1, arr3, skip_nulls=False)
+    assert result == pa.array([2, 3, None])
+    result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
+    assert result == pa.array([1, 2, None])
+
+
+def test_make_struct():
+    assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
+
+    assert pc.make_struct(1, 'a', field_names=['i', 's']).as_py() == {
+        'i': 1, 's': 'a'}
+
+    assert pc.make_struct([1, 2, 3],
+                          "a b c".split()) == pa.StructArray.from_arrays([
+                              [1, 2, 3],
+                              "a b c".split()], names='0 1'.split())
+
+    with pytest.raises(ValueError, match="Array arguments must all "
+                                         "be the same length"):
+        pc.make_struct([1, 2, 3, 4], "a b c".split())
+
+    with pytest.raises(ValueError, match="0 arguments but 2 field names"):
+        pc.make_struct(field_names=['one', 'two'])
+
+
+def test_case_when():
+    assert pc.case_when(pc.make_struct([True, False, None],
+                                       [False, True, None]),
+                        [1, 2, 3],
+                        [11, 12, 13]) == pa.array([1, 12, None])
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 933d8d61214..8c4a909e1aa 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -756,6 +756,7 @@ def test_large_binary_array(ty):
     assert len(arr) == nrepeats
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 @pytest.mark.parametrize("ty", [pa.large_binary(), pa.large_string()])
 def test_large_binary_value(ty):
@@ -1501,6 +1502,84 @@ def test_sequence_decimal_too_high_precision():
         pa.array([decimal.Decimal('1' * 80)])
 
 
+def test_sequence_decimal_infer():
+    for data, typ in [
+        # simple case
+        (decimal.Decimal('1.234'), pa.decimal128(4, 3)),
+        # trailing zeros
+        (decimal.Decimal('12300'), pa.decimal128(5, 0)),
+        (decimal.Decimal('12300.0'), pa.decimal128(6, 1)),
+        # scientific power notation
+        (decimal.Decimal('1.23E+4'), pa.decimal128(5, 0)),
+        (decimal.Decimal('123E+2'), pa.decimal128(5, 0)),
+        (decimal.Decimal('123E+4'), pa.decimal128(7, 0)),
+        # leading zeros
+        (decimal.Decimal('0.0123'), pa.decimal128(4, 4)),
+        (decimal.Decimal('0.01230'), pa.decimal128(5, 5)),
+        (decimal.Decimal('1.230E-2'), pa.decimal128(5, 5)),
+    ]:
+        assert pa.infer_type([data]) == typ
+        arr = pa.array([data])
+        assert arr.type == typ
+        assert arr.to_pylist()[0] == data
+
+
+def test_sequence_decimal_infer_mixed():
+    # ARROW-12150 - ensure mixed precision gets correctly inferred to
+    # common type that can hold all input values
+    cases = [
+        ([decimal.Decimal('1.234'), decimal.Decimal('3.456')],
+         pa.decimal128(4, 3)),
+        ([decimal.Decimal('1.234'), decimal.Decimal('456.7')],
+         pa.decimal128(6, 3)),
+        ([decimal.Decimal('123.4'), decimal.Decimal('4.567')],
+         pa.decimal128(6, 3)),
+        ([decimal.Decimal('123e2'), decimal.Decimal('4567e3')],
+         pa.decimal128(7, 0)),
+        ([decimal.Decimal('123e4'), decimal.Decimal('4567e2')],
+         pa.decimal128(7, 0)),
+        ([decimal.Decimal('0.123'), decimal.Decimal('0.04567')],
+         pa.decimal128(5, 5)),
+        ([decimal.Decimal('0.001'), decimal.Decimal('1.01E5')],
+         pa.decimal128(9, 3)),
+    ]
+    for data, typ in cases:
+        assert pa.infer_type(data) == typ
+        arr = pa.array(data)
+        assert arr.type == typ
+        assert arr.to_pylist() == data
+
+
+def test_sequence_decimal_given_type():
+    for data, typs, wrong_typs in [
+        # simple case
+        (
+            decimal.Decimal('1.234'),
+            [pa.decimal128(4, 3), pa.decimal128(5, 3), pa.decimal128(5, 4)],
+            [pa.decimal128(4, 2), pa.decimal128(4, 4)]
+        ),
+        # trailing zeros
+        (
+            decimal.Decimal('12300'),
+            [pa.decimal128(5, 0), pa.decimal128(6, 0), pa.decimal128(3, -2)],
+            [pa.decimal128(4, 0), pa.decimal128(3, -3)]
+        ),
+        # scientific power notation
+        (
+            decimal.Decimal('1.23E+4'),
+            [pa.decimal128(5, 0), pa.decimal128(6, 0), pa.decimal128(3, -2)],
+            [pa.decimal128(4, 0), pa.decimal128(3, -3)]
+        ),
+    ]:
+        for typ in typs:
+            arr = pa.array([data], type=typ)
+            assert arr.type == typ
+            assert arr.to_pylist()[0] == data
+        for typ in wrong_typs:
+            with pytest.raises(ValueError):
+                pa.array([data], type=typ)
+
+
 def test_range_types():
     arr1 = pa.array(range(3))
     arr2 = pa.array((0, 1, 2))
@@ -1647,7 +1726,7 @@ def test_struct_from_list_of_pairs():
         [('a', 6), ('a', 'bar'), ('b', False)],
     ]
     arr = pa.array(data, type=ty)
-    with pytest.raises(KeyError):
+    with pytest.raises(ValueError):
         # TODO(kszucs): ARROW-9997
         arr.to_pylist()
 
@@ -2091,7 +2170,6 @@ def test_auto_chunking_list_of_binary():
     assert arr.chunk(1).to_pylist() == [['x' * 1024]] * 2
 
 
-@pytest.mark.slow
 @pytest.mark.large_memory
 def test_auto_chunking_list_like():
     item = np.ones((2**28,), dtype='uint8')
@@ -2107,7 +2185,11 @@ def test_auto_chunking_list_like():
     assert arr.num_chunks == 2
     assert len(arr.chunk(0)) == 7
     assert len(arr.chunk(1)) == 1
-    assert arr.chunk(1)[0].as_py() == list(item)
+    chunk = arr.chunk(1)
+    scalar = chunk[0]
+    assert isinstance(scalar, pa.ListScalar)
+    expected = pa.array(item, type=pa.uint8())
+    assert scalar.values == expected
 
 
 @pytest.mark.slow
@@ -2154,3 +2236,49 @@ def test_nested_auto_chunking(ty, char):
         'integer': 1,
         'string-like': char
     }
+
+
+@pytest.mark.large_memory
+def test_array_from_pylist_data_overflow():
+    # Regression test for ARROW-12983
+    # Data buffer overflow - should result in chunked array
+    items = [b'a' * 4096] * (2 ** 19)
+    arr = pa.array(items, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**19
+    assert len(arr.chunks) > 1
+
+    mask = np.zeros(2**19, bool)
+    arr = pa.array(items, mask=mask, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**19
+    assert len(arr.chunks) > 1
+
+    arr = pa.array(items, type=pa.binary())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**19
+    assert len(arr.chunks) > 1
+
+
+@pytest.mark.slow
+@pytest.mark.large_memory
+def test_array_from_pylist_offset_overflow():
+    # Regression test for ARROW-12983
+    # Offset buffer overflow - should result in chunked array
+    # Note this doesn't apply to primitive arrays
+    items = [b'a'] * (2 ** 31)
+    arr = pa.array(items, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**31
+    assert len(arr.chunks) > 1
+
+    mask = np.zeros(2**31, bool)
+    arr = pa.array(items, mask=mask, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**31
+    assert len(arr.chunks) > 1
+
+    arr = pa.array(items, type=pa.binary())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**31
+    assert len(arr.chunks) > 1
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 395f9486315..b70072c8107 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -27,11 +27,11 @@
 import shutil
 import signal
 import string
-import sys
 import tempfile
 import threading
 import time
 import unittest
+import weakref
 
 import pytest
 
@@ -40,7 +40,8 @@
 import pyarrow as pa
 from pyarrow.csv import (
     open_csv, read_csv, ReadOptions, ParseOptions, ConvertOptions, ISO8601,
-    write_csv, WriteOptions)
+    write_csv, WriteOptions, CSVWriter)
+from pyarrow.tests import util
 
 
 def generate_col_names():
@@ -52,12 +53,13 @@ def generate_col_names():
             yield first + second
 
 
-def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n'):
+def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True):
     arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows))
-    col_names = list(itertools.islice(generate_col_names(), num_cols))
     csv = io.StringIO()
-    csv.write(",".join(col_names))
-    csv.write(linesep)
+    col_names = list(itertools.islice(generate_col_names(), num_cols))
+    if write_names:
+        csv.write(",".join(col_names))
+        csv.write(linesep)
     for row in arr.T:
         csv.write(",".join(map(str, row)))
         csv.write(linesep)
@@ -97,6 +99,7 @@ def check_options_class(cls, **attr_values):
         assert getattr(opts, name) == value
 
 
+# The various options classes need to be picklable for dataset
 def check_options_class_pickling(cls, **attr_values):
     opts = cls(**attr_values)
     new_opts = pickle.loads(pickle.dumps(opts,
@@ -113,7 +116,15 @@ def test_read_options():
                         skip_rows=[0, 3],
                         column_names=[[], ["ab", "cd"]],
                         autogenerate_column_names=[False, True],
-                        encoding=['utf8', 'utf16'])
+                        encoding=['utf8', 'utf16'],
+                        skip_rows_after_names=[0, 27])
+
+    check_options_class_pickling(cls, use_threads=True,
+                                 skip_rows=3,
+                                 column_names=["ab", "cd"],
+                                 autogenerate_column_names=False,
+                                 encoding='utf16',
+                                 skip_rows_after_names=27)
 
     assert opts.block_size > 0
     opts.block_size = 12345
@@ -122,6 +133,34 @@ def test_read_options():
     opts = cls(block_size=1234)
     assert opts.block_size == 1234
 
+    opts.validate()
+
+    match = "ReadOptions: block_size must be at least 1: 0"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.block_size = 0
+        opts.validate()
+
+    match = "ReadOptions: skip_rows cannot be negative: -1"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.skip_rows = -1
+        opts.validate()
+
+    match = "ReadOptions: skip_rows_after_names cannot be negative: -1"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.skip_rows_after_names = -1
+        opts.validate()
+
+    match = "ReadOptions: autogenerate_column_names cannot be true when" \
+            " column_names are provided"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.autogenerate_column_names = True
+        opts.column_names = ('a', 'b')
+        opts.validate()
+
 
 def test_parse_options():
     cls = ParseOptions
@@ -133,7 +172,6 @@ def test_parse_options():
                         newlines_in_values=[False, True],
                         ignore_empty_lines=[True, False])
 
-    # ParseOptions needs to be picklable for dataset
     check_options_class_pickling(cls, delimiter='x',
                                  escape_char='y',
                                  quote_char=False,
@@ -141,6 +179,44 @@ def test_parse_options():
                                  newlines_in_values=True,
                                  ignore_empty_lines=False)
 
+    cls().validate()
+    opts = cls()
+    opts.delimiter = "\t"
+    opts.validate()
+
+    match = "ParseOptions: delimiter cannot be \\\\r or \\\\n"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.delimiter = "\n"
+        opts.validate()
+
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.delimiter = "\r"
+        opts.validate()
+
+    match = "ParseOptions: quote_char cannot be \\\\r or \\\\n"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.quote_char = "\n"
+        opts.validate()
+
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.quote_char = "\r"
+        opts.validate()
+
+    match = "ParseOptions: escape_char cannot be \\\\r or \\\\n"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.escape_char = "\n"
+        opts.validate()
+
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.escape_char = "\r"
+        opts.validate()
+
 
 def test_convert_options():
     cls = ConvertOptions
@@ -149,11 +225,26 @@ def test_convert_options():
     check_options_class(
         cls, check_utf8=[True, False],
         strings_can_be_null=[False, True],
+        quoted_strings_can_be_null=[True, False],
+        decimal_point=['.', ','],
         include_columns=[[], ['def', 'abc']],
         include_missing_columns=[False, True],
         auto_dict_encode=[False, True],
         timestamp_parsers=[[], [ISO8601, '%y-%m']])
 
+    check_options_class_pickling(
+        cls, check_utf8=False,
+        strings_can_be_null=True,
+        quoted_strings_can_be_null=False,
+        decimal_point=',',
+        include_columns=['def', 'abc'],
+        include_missing_columns=False,
+        auto_dict_encode=True,
+        timestamp_parsers=[ISO8601, '%y-%m'])
+
+    with pytest.raises(ValueError):
+        opts.decimal_point = '..'
+
     assert opts.auto_dict_max_cardinality > 0
     opts.auto_dict_max_cardinality = 99999
     assert opts.auto_dict_max_cardinality == 99999
@@ -221,8 +312,119 @@ def test_write_options():
     opts = cls(batch_size=9876)
     assert opts.batch_size == 9876
 
+    opts.validate()
+
+    match = "WriteOptions: batch_size must be at least 1: 0"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.batch_size = 0
+        opts.validate()
+
+
+class BaseTestCSV:
+    """Common tests which are shared by streaming and non streaming readers"""
 
-class BaseTestCSVRead:
+    def base_row_number_offset_in_errors(self, use_threads, read_bytes,
+                                         num_blocks=3):
+        """
+        num_blocks is a temporary work around because streaming reader does
+        not get schema from first non empty block
+        """
+
+        # Row numbers are only correctly counted in serial reads
+        def format_msg(msg_format, row, *args):
+            if use_threads:
+                row_info = ""
+            else:
+                row_info = "Row #{}: ".format(row)
+            return msg_format.format(row_info, *args)
+
+        csv, _ = make_random_csv(4, 100, write_names=True)
+
+        read_options = ReadOptions()
+        read_options.block_size = len(csv) / num_blocks
+        convert_options = ConvertOptions()
+        convert_options.column_types = {"a": pa.int32()}
+
+        # Test without skip_rows and column names in the csv
+        csv_bad_columns = csv + b"1,2\r\n"
+        message_columns = format_msg("{}Expected 4 columns, got 2", 102)
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        csv_bad_type = csv + b"a,b,c,d\r\n"
+        message_value = format_msg(
+            "In CSV column #0: {}"
+            "CSV conversion error to int32: invalid value 'a'",
+            102, csv)
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+        long_row = (b"this is a long row" * 15) + b",3\r\n"
+        csv_bad_columns_long = csv + long_row
+        message_long = format_msg("{}Expected 4 columns, got 2: {} ...", 102,
+                                  long_row[0:96].decode("utf-8"))
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
+            read_bytes(csv_bad_columns_long, read_options=read_options,
+                       convert_options=convert_options)
+
+        # Test skipping rows after the names
+        read_options.skip_rows_after_names = 47
+
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
+            read_bytes(csv_bad_columns_long, read_options=read_options,
+                       convert_options=convert_options)
+
+        read_options.skip_rows_after_names = 0
+
+        # Test without skip_rows and column names not in the csv
+        csv, _ = make_random_csv(4, 100, write_names=False)
+        read_options.column_names = ["a", "b", "c", "d"]
+        csv_bad_columns = csv + b"1,2\r\n"
+        message_columns = format_msg("{}Expected 4 columns, got 2", 101)
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        csv_bad_columns_long = csv + long_row
+        message_long = format_msg("{}Expected 4 columns, got 2: {} ...", 101,
+                                  long_row[0:96].decode("utf-8"))
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
+            read_bytes(csv_bad_columns_long, read_options=read_options,
+                       convert_options=convert_options)
+
+        csv_bad_type = csv + b"a,b,c,d\r\n"
+        message_value = format_msg(
+            "In CSV column #0: {}"
+            "CSV conversion error to int32: invalid value 'a'",
+            101)
+        message_value = message_value.format(len(csv))
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+        # Test with skip_rows and column names not in the csv
+        read_options.skip_rows = 23
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+
+class BaseTestCSVRead(BaseTestCSV):
 
     def read_bytes(self, b, **kwargs):
         return self.read_csv(pa.py_buffer(b), **kwargs)
@@ -304,6 +506,109 @@ def test_header_skip_rows(self):
             "kl": ["op"],
         }
 
+    def test_skip_rows_after_names(self):
+        rows = b"ab,cd\nef,gh\nij,kl\nmn,op\n"
+
+        opts = ReadOptions()
+        opts.skip_rows_after_names = 1
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": ["ij", "mn"],
+            "cd": ["kl", "op"],
+        }
+
+        opts.skip_rows_after_names = 3
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": [],
+            "cd": [],
+        }
+
+        opts.skip_rows_after_names = 4
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": [],
+            "cd": [],
+        }
+
+        # Can skip rows with a different number of columns
+        rows = b"abcd\n,,,,,\nij,kl\nmn,op\n"
+        opts.skip_rows_after_names = 2
+        opts.column_names = ["f0", "f1"]
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["f0", "f1"])
+        assert table.to_pydict() == {
+            "f0": ["ij", "mn"],
+            "f1": ["kl", "op"],
+        }
+        opts = ReadOptions()
+
+        # Can skip rows with new lines in the value
+        rows = b'ab,cd\n"e\nf","g\n\nh"\n"ij","k\nl"\nmn,op'
+        opts.skip_rows_after_names = 2
+        parse_opts = ParseOptions()
+        parse_opts.newlines_in_values = True
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": ["mn"],
+            "cd": ["op"],
+        }
+
+        # Can skip rows when block ends in middle of quoted value
+        opts.skip_rows_after_names = 2
+        opts.block_size = 26
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": ["mn"],
+            "cd": ["op"],
+        }
+        opts = ReadOptions()
+
+        # Can skip rows that are beyond the first block without lexer
+        rows, expected = make_random_csv(num_cols=5, num_rows=1000)
+        opts.skip_rows_after_names = 900
+        opts.block_size = len(rows) / 11
+        table = self.read_bytes(rows, read_options=opts)
+        assert table.schema == expected.schema
+        assert table.num_rows == 100
+        table_dict = table.to_pydict()
+        for name, values in expected.to_pydict().items():
+            assert values[900:] == table_dict[name]
+
+        # Can skip rows that are beyond the first block with lexer
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        assert table.schema == expected.schema
+        assert table.num_rows == 100
+        table_dict = table.to_pydict()
+        for name, values in expected.to_pydict().items():
+            assert values[900:] == table_dict[name]
+
+        # Skip rows and skip rows after names
+        rows, expected = make_random_csv(num_cols=5, num_rows=200,
+                                         write_names=False)
+        opts = ReadOptions()
+        opts.skip_rows = 37
+        opts.skip_rows_after_names = 41
+        opts.column_names = expected.schema.names
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        assert table.schema == expected.schema
+        assert (table.num_rows ==
+                expected.num_rows - opts.skip_rows -
+                opts.skip_rows_after_names)
+        table_dict = table.to_pydict()
+        for name, values in expected.to_pydict().items():
+            assert (values[opts.skip_rows + opts.skip_rows_after_names:] ==
+                    table_dict[name])
+
     def test_header_column_names(self):
         rows = b"ab,cd\nef,gh\nij,kl\nmn,op\n"
 
@@ -507,6 +812,31 @@ def test_simple_nulls(self):
             'f': [None, True, False],
         }
 
+    def test_decimal_point(self):
+        # Infer floats with a custom decimal point
+        parse_options = ParseOptions(delimiter=';')
+        rows = b"a;b\n1.25;2,5\nNA;-3\n-4;NA"
+
+        table = self.read_bytes(rows, parse_options=parse_options)
+        schema = pa.schema([('a', pa.float64()),
+                            ('b', pa.string())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [1.25, None, -4.0],
+            'b': ["2,5", "-3", "NA"],
+        }
+
+        convert_options = ConvertOptions(decimal_point=',')
+        table = self.read_bytes(rows, parse_options=parse_options,
+                                convert_options=convert_options)
+        schema = pa.schema([('a', pa.string()),
+                            ('b', pa.float64())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': ["1.25", "NA", "-4"],
+            'b': [2.5, -3.0, None],
+        }
+
     def test_simple_timestamps(self):
         # Infer a timestamp column
         rows = (b"a,b,c\n"
@@ -594,6 +924,33 @@ def test_dates(self):
             'b': [datetime(1970, 1, 2), datetime(1971, 1, 2)],
         }
 
+    def test_times(self):
+        # Times are inferred as time32[s] by default
+        from datetime import time
+
+        rows = b"a,b\n12:34:56,12:34:56.789\n23:59:59,23:59:59.999\n"
+        table = self.read_bytes(rows)
+        # Column 'b' has subseconds, so cannot be inferred as time32[s]
+        schema = pa.schema([('a', pa.time32('s')),
+                            ('b', pa.string())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [time(12, 34, 56), time(23, 59, 59)],
+            'b': ["12:34:56.789", "23:59:59.999"],
+        }
+
+        # Can ask for time types explicitly
+        opts = ConvertOptions()
+        opts.column_types = {'a': pa.time64('us'), 'b': pa.time32('ms')}
+        table = self.read_bytes(rows, convert_options=opts)
+        schema = pa.schema([('a', pa.time64('us')),
+                            ('b', pa.time32('ms'))])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [time(12, 34, 56), time(23, 59, 59)],
+            'b': [time(12, 34, 56, 789000), time(23, 59, 59, 999000)],
+        }
+
     def test_auto_dict_encode(self):
         opts = ConvertOptions(auto_dict_encode=True)
         rows = "a,b\nab,1\ncdé,2\ncdé,3\nab,4".encode()
@@ -646,7 +1003,7 @@ def test_auto_dict_encode(self):
     def test_custom_nulls(self):
         # Infer nulls with custom values
         opts = ConvertOptions(null_values=['Xxx', 'Zzz'])
-        rows = b"a,b,c,d\nZzz,Xxx,1,2\nXxx,#N/A,,Zzz\n"
+        rows = b"""a,b,c,d\nZzz,"Xxx",1,2\nXxx,#N/A,,Zzz\n"""
         table = self.read_bytes(rows, convert_options=opts)
         schema = pa.schema([('a', pa.null()),
                             ('b', pa.string()),
@@ -669,6 +1026,14 @@ def test_custom_nulls(self):
             'c': ["1", ""],
             'd': [2, None],
         }
+        opts.quoted_strings_can_be_null = False
+        table = self.read_bytes(rows, convert_options=opts)
+        assert table.to_pydict() == {
+            'a': [None, None],
+            'b': ["Xxx", "#N/A"],
+            'c': ["1", ""],
+            'd': [2, None],
+        }
 
         opts = ConvertOptions(null_values=[])
         rows = b"a,b\n#N/A,\n"
@@ -903,17 +1268,8 @@ def test_cancellation(self):
         if (threading.current_thread().ident !=
                 threading.main_thread().ident):
             pytest.skip("test only works from main Python thread")
-
-        if sys.version_info >= (3, 8):
-            raise_signal = signal.raise_signal
-        elif os.name == 'nt':
-            # On Windows, os.kill() doesn't actually send a signal,
-            # it just terminates the process with the given exit code.
-            pytest.skip("test requires Python 3.8+ on Windows")
-        else:
-            # On Unix, emulate raise_signal() with os.kill().
-            def raise_signal(signum):
-                os.kill(os.getpid(), signum)
+        # Skips test if not available
+        raise_signal = util.get_raise_signal()
 
         # Make the interruptible workload large enough to not finish
         # before the interrupt comes, even in release mode on fast machines
@@ -942,6 +1298,18 @@ def signal_from_thread():
         assert isinstance(e, pa.ArrowCancelled)
         assert e.signum == signal.SIGINT
 
+    def test_cancellation_disabled(self):
+        # ARROW-12622: reader would segfault when the cancelling signal
+        # handler was not enabled (e.g. if disabled, or if not on the
+        # main thread)
+        t = threading.Thread(target=lambda: self.read_bytes(b"f64\n0.1"))
+        t.start()
+        t.join()
+
+    def test_row_number_offset_in_errors(self):
+        self.base_row_number_offset_in_errors(
+            isinstance(self, TestParallelCSVRead), self.read_bytes)
+
 
 class TestSerialCSVRead(BaseTestCSVRead, unittest.TestCase):
 
@@ -963,10 +1331,16 @@ def read_csv(self, *args, validate_full=True, **kwargs):
         return table
 
 
-class BaseTestStreamingCSVRead:
+@pytest.mark.parametrize('use_threads', [False, True])
+class TestStreamingCSVRead(BaseTestCSV):
+
+    def open_bytes(self, b, use_threads, **kwargs):
+        return self.open_csv(pa.py_buffer(b), use_threads, **kwargs)
 
-    def open_bytes(self, b, **kwargs):
-        return self.open_csv(pa.py_buffer(b), **kwargs)
+    def open_csv(self, b, use_threads, *args, **kwargs):
+        read_options = kwargs.setdefault('read_options', ReadOptions())
+        read_options.use_threads = use_threads
+        return open_csv(b, *args, **kwargs)
 
     def check_reader(self, reader, expected_schema, expected_data):
         assert reader.schema == expected_schema
@@ -977,24 +1351,24 @@ def check_reader(self, reader, expected_schema, expected_data):
             assert batch.schema == expected_schema
             assert batch.to_pydict() == expected_batch
 
-    def test_file_object(self):
+    def test_file_object(self, use_threads):
         data = b"a,b\n1,2\n3,4\n"
         expected_data = {'a': [1, 3], 'b': [2, 4]}
         bio = io.BytesIO(data)
-        reader = self.open_csv(bio)
+        reader = self.open_csv(bio, use_threads)
         expected_schema = pa.schema([('a', pa.int64()),
                                      ('b', pa.int64())])
         self.check_reader(reader, expected_schema, [expected_data])
 
-    def test_header(self):
+    def test_header(self, use_threads):
         rows = b"abc,def,gh\n"
-        reader = self.open_bytes(rows)
+        reader = self.open_bytes(rows, use_threads)
         expected_schema = pa.schema([('abc', pa.null()),
                                      ('def', pa.null()),
                                      ('gh', pa.null())])
         self.check_reader(reader, expected_schema, [])
 
-    def test_inference(self):
+    def test_inference(self, use_threads):
         # Inference is done on first block
         rows = b"a,b\n123,456\nabc,de\xff\ngh,ij\n"
         expected_schema = pa.schema([('a', pa.string()),
@@ -1002,25 +1376,25 @@ def test_inference(self):
 
         read_options = ReadOptions()
         read_options.block_size = len(rows)
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         self.check_reader(reader, expected_schema,
                           [{'a': ['123', 'abc', 'gh'],
                             'b': [b'456', b'de\xff', b'ij']}])
 
         read_options.block_size = len(rows) - 1
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         self.check_reader(reader, expected_schema,
                           [{'a': ['123', 'abc'],
                             'b': [b'456', b'de\xff']},
                            {'a': ['gh'],
                             'b': [b'ij']}])
 
-    def test_inference_failure(self):
+    def test_inference_failure(self, use_threads):
         # Inference on first block, then conversion failure on second block
         rows = b"a,b\n123,456\nabc,de\xff\ngh,ij\n"
         read_options = ReadOptions()
         read_options.block_size = len(rows) - 7
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.int64()),
                                      ('b', pa.int64())])
         assert reader.schema == expected_schema
@@ -1035,38 +1409,20 @@ def test_inference_failure(self):
         with pytest.raises(StopIteration):
             reader.read_next_batch()
 
-        # Inference on first block, then conversion failure on second block,
-        # then success on third block
-        rows = b"a,b\n1,2\nabc,def\n45,67\n"
-        read_options.block_size = 8
-        reader = self.open_bytes(rows, read_options=read_options)
-        expected_schema = pa.schema([('a', pa.int64()),
-                                     ('b', pa.int64())])
-        assert reader.schema == expected_schema
-        assert reader.read_next_batch().to_pydict() == {'a': [1], 'b': [2]}
-        # Second block
-        with pytest.raises(ValueError,
-                           match="CSV conversion error to int64"):
-            reader.read_next_batch()
-        # Third block
-        assert reader.read_next_batch().to_pydict() == {'a': [45], 'b': [67]}
-        # EOF
-        with pytest.raises(StopIteration):
-            reader.read_next_batch()
-
-    def test_invalid_csv(self):
+    def test_invalid_csv(self, use_threads):
         # CSV errors on first block
         rows = b"a,b\n1,2,3\n4,5\n6,7\n"
         read_options = ReadOptions()
         read_options.block_size = 10
         with pytest.raises(pa.ArrowInvalid,
                            match="Expected 2 columns, got 3"):
-            reader = self.open_bytes(rows, read_options=read_options)
+            reader = self.open_bytes(
+                rows, use_threads, read_options=read_options)
 
         # CSV errors on second block
         rows = b"a,b\n1,2\n3,4,5\n6,7\n"
         read_options.block_size = 8
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         assert reader.read_next_batch().to_pydict() == {'a': [1], 'b': [2]}
         with pytest.raises(pa.ArrowInvalid,
                            match="Expected 2 columns, got 3"):
@@ -1075,9 +1431,9 @@ def test_invalid_csv(self):
         with pytest.raises(StopIteration):
             reader.read_next_batch()
 
-    def test_options_delimiter(self):
+    def test_options_delimiter(self, use_threads):
         rows = b"a;b,c\nde,fg;eh\n"
-        reader = self.open_bytes(rows)
+        reader = self.open_bytes(rows, use_threads)
         expected_schema = pa.schema([('a;b', pa.string()),
                                      ('c', pa.string())])
         self.check_reader(reader, expected_schema,
@@ -1085,17 +1441,17 @@ def test_options_delimiter(self):
                             'c': ['fg;eh']}])
 
         opts = ParseOptions(delimiter=';')
-        reader = self.open_bytes(rows, parse_options=opts)
+        reader = self.open_bytes(rows, use_threads, parse_options=opts)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b,c', pa.string())])
         self.check_reader(reader, expected_schema,
                           [{'a': ['de,fg'],
                             'b,c': ['eh']}])
 
-    def test_no_ending_newline(self):
+    def test_no_ending_newline(self, use_threads):
         # No \n after last line
         rows = b"a,b,c\n1,2,3\n4,5,6"
-        reader = self.open_bytes(rows)
+        reader = self.open_bytes(rows, use_threads)
         expected_schema = pa.schema([('a', pa.int64()),
                                      ('b', pa.int64()),
                                      ('c', pa.int64())])
@@ -1104,16 +1460,16 @@ def test_no_ending_newline(self):
                             'b': [2, 5],
                             'c': [3, 6]}])
 
-    def test_empty_file(self):
+    def test_empty_file(self, use_threads):
         with pytest.raises(ValueError, match="Empty CSV file"):
-            self.open_bytes(b"")
+            self.open_bytes(b"", use_threads)
 
-    def test_column_options(self):
+    def test_column_options(self, use_threads):
         # With column_names
         rows = b"1,2,3\n4,5,6"
         read_options = ReadOptions()
         read_options.column_names = ['d', 'e', 'f']
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('d', pa.int64()),
                                      ('e', pa.int64()),
                                      ('f', pa.int64())])
@@ -1125,7 +1481,7 @@ def test_column_options(self):
         # With include_columns
         convert_options = ConvertOptions()
         convert_options.include_columns = ['f', 'e']
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('f', pa.int64()),
                                      ('e', pa.int64())])
@@ -1135,7 +1491,7 @@ def test_column_options(self):
 
         # With column_types
         convert_options.column_types = {'e': pa.string()}
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('f', pa.int64()),
                                      ('e', pa.string())])
@@ -1148,11 +1504,12 @@ def test_column_options(self):
         with pytest.raises(
                 KeyError,
                 match="Column 'g' in include_columns does not exist"):
-            reader = self.open_bytes(rows, read_options=read_options,
+            reader = self.open_bytes(rows, use_threads,
+                                     read_options=read_options,
                                      convert_options=convert_options)
 
         convert_options.include_missing_columns = True
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('g', pa.null()),
                                      ('f', pa.int64()),
@@ -1163,7 +1520,7 @@ def test_column_options(self):
                             'f': [3, 6]}])
 
         convert_options.column_types = {'e': pa.string(), 'g': pa.float64()}
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('g', pa.float64()),
                                      ('f', pa.int64()),
@@ -1173,11 +1530,11 @@ def test_column_options(self):
                             'e': ["2", "5"],
                             'f': [3, 6]}])
 
-    def test_encoding(self):
+    def test_encoding(self, use_threads):
         # latin-1 (invalid utf-8)
         rows = b"a,b\nun,\xe9l\xe9phant"
         read_options = ReadOptions()
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b', pa.binary())])
         self.check_reader(reader, expected_schema,
@@ -1185,7 +1542,7 @@ def test_encoding(self):
                             'b': [b"\xe9l\xe9phant"]}])
 
         read_options.encoding = 'latin1'
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b', pa.string())])
         self.check_reader(reader, expected_schema,
@@ -1196,22 +1553,22 @@ def test_encoding(self):
         rows = (b'\xff\xfea\x00,\x00b\x00\n\x00u\x00n\x00,'
                 b'\x00\xe9\x00l\x00\xe9\x00p\x00h\x00a\x00n\x00t\x00')
         read_options.encoding = 'utf16'
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b', pa.string())])
         self.check_reader(reader, expected_schema,
                           [{'a': ["un"],
                             'b': ["éléphant"]}])
 
-    def test_small_random_csv(self):
+    def test_small_random_csv(self, use_threads):
         csv, expected = make_random_csv(num_cols=2, num_rows=10)
-        reader = self.open_bytes(csv)
+        reader = self.open_bytes(csv, use_threads)
         table = reader.read_all()
         assert table.schema == expected.schema
         assert table.equals(expected)
         assert table.to_pydict() == expected.to_pydict()
 
-    def test_stress_block_sizes(self):
+    def test_stress_block_sizes(self, use_threads):
         # Test a number of small block sizes to stress block stitching
         csv_base, expected = make_random_csv(num_cols=2, num_rows=500)
         block_sizes = [19, 21, 23, 26, 37, 111]
@@ -1221,22 +1578,15 @@ def test_stress_block_sizes(self):
                 # Need at least two lines for type inference
                 assert csv[:block_size].count(b'\n') >= 2
                 read_options = ReadOptions(block_size=block_size)
-                reader = self.open_bytes(csv, read_options=read_options)
+                reader = self.open_bytes(
+                    csv, use_threads, read_options=read_options)
                 table = reader.read_all()
                 assert table.schema == expected.schema
                 if not table.equals(expected):
                     # Better error output
                     assert table.to_pydict() == expected.to_pydict()
 
-
-class TestSerialStreamingCSVRead(BaseTestStreamingCSVRead, unittest.TestCase):
-
-    def open_csv(self, *args, **kwargs):
-        read_options = kwargs.setdefault('read_options', ReadOptions())
-        read_options.use_threads = False
-        return open_csv(*args, **kwargs)
-
-    def test_batch_lifetime(self):
+    def test_batch_lifetime(self, use_threads):
         gc.collect()
         old_allocated = pa.total_allocated_bytes()
 
@@ -1249,21 +1599,28 @@ def check_one_batch(reader, expected):
         read_options = ReadOptions()
         read_options.column_names = ['a', 'b']
         read_options.block_size = 6
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         check_one_batch(reader, {'a': [10], 'b': [11]})
         allocated_after_first_batch = pa.total_allocated_bytes()
         check_one_batch(reader, {'a': [12], 'b': [13]})
-        assert pa.total_allocated_bytes() == allocated_after_first_batch
+        assert pa.total_allocated_bytes() <= allocated_after_first_batch
         check_one_batch(reader, {'a': [14], 'b': [15]})
-        assert pa.total_allocated_bytes() == allocated_after_first_batch
+        assert pa.total_allocated_bytes() <= allocated_after_first_batch
         check_one_batch(reader, {'a': [16], 'b': [17]})
-        assert pa.total_allocated_bytes() == allocated_after_first_batch
+        assert pa.total_allocated_bytes() <= allocated_after_first_batch
         with pytest.raises(StopIteration):
             reader.read_next_batch()
         assert pa.total_allocated_bytes() == old_allocated
         reader = None
         assert pa.total_allocated_bytes() == old_allocated
 
+    def test_row_number_offset_in_errors(self, use_threads):
+        def read_bytes(b, **kwargs):
+            return self.open_bytes(b, use_threads, **kwargs).read_all()
+
+        self.base_row_number_offset_in_errors(use_threads, read_bytes,
+                                              num_blocks=1)
+
 
 class BaseTestCompressedCSVRead:
 
@@ -1343,3 +1700,34 @@ def test_write_read_round_trip():
 
         read_options = ReadOptions(column_names=t.column_names)
         assert t == read_csv(buf, read_options=read_options)
+
+    # Test with writer
+    for read_options, write_options in [
+            (None, WriteOptions(include_header=True)),
+            (ReadOptions(column_names=t.column_names),
+             WriteOptions(include_header=False)),
+    ]:
+        buf = io.BytesIO()
+        with CSVWriter(buf, t.schema, write_options=write_options) as writer:
+            writer.write_table(t)
+        buf.seek(0)
+        assert t == read_csv(buf, read_options=read_options)
+
+        buf = io.BytesIO()
+        with CSVWriter(buf, t.schema, write_options=write_options) as writer:
+            for batch in t.to_batches(max_chunksize=1):
+                writer.write_batch(batch)
+        buf.seek(0)
+        assert t == read_csv(buf, read_options=read_options)
+
+
+def test_read_csv_reference_cycle():
+    # ARROW-13187
+    def inner():
+        buf = io.BytesIO(b"a,b,c\n1,2,3\n4,5,6")
+        table = read_csv(buf)
+        return weakref.ref(table)
+
+    with util.disabled_gc():
+        wr = inner()
+        assert wr() is None
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index b852981ba39..e202b417a18 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -27,6 +27,11 @@
 
 
 here = os.path.dirname(os.path.abspath(__file__))
+test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')
+if os.name == 'posix':
+    compiler_opts = ['-std=c++11']
+else:
+    compiler_opts = []
 
 
 setup_template = """if 1:
@@ -82,18 +87,12 @@ def test_cython_api(tmpdir):
     # Fail early if cython is not found
     import cython  # noqa
 
-    test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')
-
     with tmpdir.as_cwd():
         # Set up temporary workspace
         pyx_file = 'pyarrow_cython_example.pyx'
         shutil.copyfile(os.path.join(here, pyx_file),
                         os.path.join(str(tmpdir), pyx_file))
         # Create setup.py file
-        if os.name == 'posix':
-            compiler_opts = ['-std=c++11']
-        else:
-            compiler_opts = []
         setup_code = setup_template.format(pyx_file=pyx_file,
                                            compiler_opts=compiler_opts,
                                            test_ld_path=test_ld_path)
@@ -141,3 +140,41 @@ def test_cython_api(tmpdir):
         subprocess.check_call([sys.executable, '-c', code],
                               stdout=subprocess.PIPE,
                               env=subprocess_env)
+
+
+@pytest.mark.cython
+def test_visit_strings(tmpdir):
+    with tmpdir.as_cwd():
+        # Set up temporary workspace
+        pyx_file = 'bound_function_visit_strings.pyx'
+        shutil.copyfile(os.path.join(here, pyx_file),
+                        os.path.join(str(tmpdir), pyx_file))
+        # Create setup.py file
+        setup_code = setup_template.format(pyx_file=pyx_file,
+                                           compiler_opts=compiler_opts,
+                                           test_ld_path=test_ld_path)
+        with open('setup.py', 'w') as f:
+            f.write(setup_code)
+
+        subprocess_env = test_util.get_modified_env_with_pythonpath()
+
+        # Compile extension module
+        subprocess.check_call([sys.executable, 'setup.py',
+                               'build_ext', '--inplace'],
+                              env=subprocess_env)
+
+    sys.path.insert(0, str(tmpdir))
+    mod = __import__('bound_function_visit_strings')
+
+    strings = ['a', 'b', 'c']
+    visited = []
+    mod._visit_strings(strings, visited.append)
+
+    assert visited == strings
+
+    with pytest.raises(ValueError, match="wtf"):
+        def raise_on_b(s):
+            if s == 'b':
+                raise ValueError('wtf')
+
+        mod._visit_strings(strings, raise_on_b)
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 31f4e080461..992da7f13a9 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -21,15 +21,15 @@
 import pathlib
 import pickle
 import textwrap
-import threading
 
 import numpy as np
 import pytest
 
 import pyarrow as pa
 import pyarrow.csv
+import pyarrow.feather
 import pyarrow.fs as fs
-from pyarrow.tests.util import change_cwd, _filesystem_uri
+from pyarrow.tests.util import change_cwd, _filesystem_uri, FSProtocolClass
 
 try:
     import pandas as pd
@@ -150,7 +150,7 @@ def multisourcefs(request):
     df = _generate_data(1000)
     mockfs = fs._MockFileSystem()
 
-    # simply split the dataframe into three chunks to construct a data source
+    # simply split the dataframe into four chunks to construct a data source
     # from each chunk into its own directory
     df_a, df_b, df_c, df_d = np.array_split(df, 4)
 
@@ -207,6 +207,63 @@ def dataset(mockfs):
     return factory.finish()
 
 
+@pytest.fixture(params=[
+    (True, True),
+    (True, False),
+    (False, True),
+    (False, False)
+], ids=['threaded-async', 'threaded-sync', 'serial-async', 'serial-sync'])
+def dataset_reader(request):
+    '''
+    Fixture which allows dataset scanning operations to be
+    run with/without threads and with/without async
+    '''
+    use_threads, use_async = request.param
+
+    class reader:
+
+        def __init__(self):
+            self.use_threads = use_threads
+            self.use_async = use_async
+
+        def _patch_kwargs(self, kwargs):
+            if 'use_threads' in kwargs:
+                raise Exception(
+                    ('Invalid use of dataset_reader, do not specify'
+                     ' use_threads'))
+            if 'use_async' in kwargs:
+                raise Exception(
+                    'Invalid use of dataset_reader, do not specify use_async')
+            kwargs['use_threads'] = use_threads
+            kwargs['use_async'] = use_async
+
+        def to_table(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.to_table(**kwargs)
+
+        def to_batches(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.to_batches(**kwargs)
+
+        def scanner(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.scanner(**kwargs)
+
+        def head(self, dataset, num_rows, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.head(num_rows, **kwargs)
+
+        def take(self, dataset, indices, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.take(indices, **kwargs)
+
+        def count_rows(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.count_rows(**kwargs)
+
+    return reader()
+
+
 def test_filesystem_dataset(mockfs):
     schema = pa.schema([
         pa.field('const', pa.int64())
@@ -278,7 +335,7 @@ def test_filesystem_dataset(mockfs):
         ds.FileSystemDataset.from_paths(fragments, format=file_format)
 
 
-def test_filesystem_dataset_no_filesystem_interaction():
+def test_filesystem_dataset_no_filesystem_interaction(dataset_reader):
     # ARROW-8283
     schema = pa.schema([
         pa.field('f1', pa.int64())
@@ -297,26 +354,27 @@ def test_filesystem_dataset_no_filesystem_interaction():
 
     # scanning does raise
     with pytest.raises(FileNotFoundError):
-        dataset.to_table()
+        dataset_reader.to_table(dataset)
 
 
-def test_dataset(dataset):
+def test_dataset(dataset, dataset_reader):
     assert isinstance(dataset, ds.Dataset)
     assert isinstance(dataset.schema, pa.Schema)
 
     # TODO(kszucs): test non-boolean Exprs for filter do raise
     expected_i64 = pa.array([0, 1, 2, 3, 4], type=pa.int64())
     expected_f64 = pa.array([0, 1, 2, 3, 4], type=pa.float64())
-    for task in dataset.scan():
-        assert isinstance(task, ds.ScanTask)
-        for batch in task.execute():
-            assert batch.column(0).equals(expected_i64)
-            assert batch.column(1).equals(expected_f64)
 
-    batches = dataset.to_batches()
-    assert all(isinstance(batch, pa.RecordBatch) for batch in batches)
+    for batch in dataset_reader.to_batches(dataset):
+        assert isinstance(batch, pa.RecordBatch)
+        assert batch.column(0).equals(expected_i64)
+        assert batch.column(1).equals(expected_f64)
 
-    table = dataset.to_table()
+    for batch in dataset_reader.scanner(dataset).scan_batches():
+        assert isinstance(batch, ds.TaggedRecordBatch)
+        assert isinstance(batch.fragment, ds.Fragment)
+
+    table = dataset_reader.to_table(dataset)
     assert isinstance(table, pa.Table)
     assert len(table) == 10
 
@@ -330,37 +388,86 @@ def test_dataset(dataset):
     assert sorted(result['key']) == ['xxx', 'yyy']
 
 
-def test_dataset_execute_iterator(dataset):
-    # ARROW-11596: this would segfault due to Cython raising
-    # StopIteration without holding the GIL. (Fixed on Cython master,
-    # post 3.0a6)
-    tasks = dataset.scan()
-    task = next(tasks)
-    iterator = task.execute()
-    thread = threading.Thread(target=lambda: next(iterator))
-    thread.start()
-    thread.join()
-    with pytest.raises(StopIteration):
-        next(iterator)
-
-
-def test_scanner(dataset):
-    scanner = ds.Scanner.from_dataset(dataset,
-                                      memory_pool=pa.default_memory_pool())
+def test_scanner(dataset, dataset_reader):
+    scanner = dataset_reader.scanner(
+        dataset, memory_pool=pa.default_memory_pool())
     assert isinstance(scanner, ds.Scanner)
-    assert len(list(scanner.scan())) == 2
 
     with pytest.raises(pa.ArrowInvalid):
-        ds.Scanner.from_dataset(dataset, columns=['unknown'])
+        dataset_reader.scanner(dataset, columns=['unknown'])
 
-    scanner = ds.Scanner.from_dataset(dataset, columns=['i64'],
-                                      memory_pool=pa.default_memory_pool())
+    scanner = dataset_reader.scanner(dataset, columns=['i64'],
+                                     memory_pool=pa.default_memory_pool())
+    assert scanner.dataset_schema == dataset.schema
+    assert scanner.projected_schema == pa.schema([("i64", pa.int64())])
 
     assert isinstance(scanner, ds.Scanner)
-    assert len(list(scanner.scan())) == 2
-    for task in scanner.scan():
-        for batch in task.execute():
-            assert batch.num_columns == 1
+    table = scanner.to_table()
+    for batch in scanner.to_batches():
+        assert batch.schema == scanner.projected_schema
+        assert batch.num_columns == 1
+    assert table == scanner.to_reader().read_all()
+
+    assert table.schema == scanner.projected_schema
+    for i in range(table.num_rows):
+        indices = pa.array([i])
+        assert table.take(indices) == scanner.take(indices)
+    with pytest.raises(pa.ArrowIndexError):
+        scanner.take(pa.array([table.num_rows]))
+
+    assert table.num_rows == scanner.count_rows()
+
+
+def test_head(dataset, dataset_reader):
+    result = dataset_reader.head(dataset, 0)
+    assert result == pa.Table.from_batches([], schema=dataset.schema)
+
+    result = dataset_reader.head(dataset, 1, columns=['i64']).to_pydict()
+    assert result == {'i64': [0]}
+
+    result = dataset_reader.head(dataset, 2, columns=['i64'],
+                                 filter=ds.field('i64') > 1).to_pydict()
+    assert result == {'i64': [2, 3]}
+
+    result = dataset_reader.head(dataset, 1024, columns=['i64']).to_pydict()
+    assert result == {'i64': list(range(5)) * 2}
+
+    fragment = next(dataset.get_fragments())
+    result = fragment.head(1, columns=['i64']).to_pydict()
+    assert result == {'i64': [0]}
+
+    result = fragment.head(1024, columns=['i64']).to_pydict()
+    assert result == {'i64': list(range(5))}
+
+
+def test_take(dataset, dataset_reader):
+    fragment = next(dataset.get_fragments())
+    indices = pa.array([1, 3])
+    assert dataset_reader.take(
+        fragment, indices) == dataset_reader.to_table(fragment).take(indices)
+    with pytest.raises(IndexError):
+        dataset_reader.take(fragment, pa.array([5]))
+
+    indices = pa.array([1, 7])
+    assert dataset_reader.take(
+        dataset, indices) == dataset_reader.to_table(dataset).take(indices)
+    with pytest.raises(IndexError):
+        dataset_reader.take(dataset, pa.array([10]))
+
+
+def test_count_rows(dataset, dataset_reader):
+    fragment = next(dataset.get_fragments())
+    assert dataset_reader.count_rows(fragment) == 5
+    assert dataset_reader.count_rows(
+        fragment, filter=ds.field("i64") == 4) == 1
+
+    assert dataset_reader.count_rows(dataset) == 10
+    # Filter on partition key
+    assert dataset_reader.count_rows(
+        dataset, filter=ds.field("group") == 1) == 5
+    # Filter on data
+    assert dataset_reader.count_rows(dataset, filter=ds.field("i64") >= 3) == 4
+    assert dataset_reader.count_rows(dataset, filter=ds.field("i64") < 0) == 0
 
 
 def test_abstract_classes():
@@ -389,6 +496,7 @@ def test_partitioning():
             pa.field('key', pa.float64())
         ])
     )
+    assert partitioning.dictionaries is None
     expr = partitioning.parse('/3/3.14')
     assert isinstance(expr, ds.Expression)
 
@@ -409,6 +517,7 @@ def test_partitioning():
         ]),
         null_fallback='xyz'
     )
+    assert partitioning.dictionaries is None
     expr = partitioning.parse('/alpha=0/beta=3')
     expected = (
         (ds.field('alpha') == ds.scalar(0)) &
@@ -520,13 +629,29 @@ def test_partition_keys():
 def test_parquet_read_options():
     opts1 = ds.ParquetReadOptions()
     opts2 = ds.ParquetReadOptions(dictionary_columns=['a', 'b'])
+    opts3 = ds.ParquetReadOptions(coerce_int96_timestamp_unit="ms")
 
     assert opts1.dictionary_columns == set()
 
     assert opts2.dictionary_columns == {'a', 'b'}
 
+    assert opts1.coerce_int96_timestamp_unit == "ns"
+    assert opts3.coerce_int96_timestamp_unit == "ms"
+
     assert opts1 == opts1
     assert opts1 != opts2
+    assert opts1 != opts3
+
+
+def test_parquet_file_format_read_options():
+    pff1 = ds.ParquetFileFormat()
+    pff2 = ds.ParquetFileFormat(dictionary_columns={'a'})
+    pff3 = ds.ParquetFileFormat(coerce_int96_timestamp_unit="s")
+
+    assert pff1.read_options == ds.ParquetReadOptions()
+    assert pff2.read_options == ds.ParquetReadOptions(dictionary_columns=['a'])
+    assert pff3.read_options == ds.ParquetReadOptions(
+        coerce_int96_timestamp_unit="s")
 
 
 def test_parquet_scan_options():
@@ -640,27 +765,28 @@ def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
 
     dataset = factory.finish()
     assert isinstance(dataset, ds.FileSystemDataset)
-    assert len(list(dataset.scan())) == 2
 
-    scanner = ds.Scanner.from_dataset(dataset)
+    scanner = dataset.scanner()
     expected_i64 = pa.array([0, 1, 2, 3, 4], type=pa.int64())
     expected_f64 = pa.array([0, 1, 2, 3, 4], type=pa.float64())
     expected_str = pa.DictionaryArray.from_arrays(
         pa.array([0, 1, 2, 3, 4], type=pa.int32()),
         pa.array("0 1 2 3 4".split(), type=pa.string())
     )
-    for task, group, key in zip(scanner.scan(), [1, 2], ['xxx', 'yyy']):
+    iterator = scanner.scan_batches()
+    for (batch, fragment), group, key in zip(iterator, [1, 2], ['xxx', 'yyy']):
         expected_group = pa.array([group] * 5, type=pa.int32())
         expected_key = pa.array([key] * 5, type=pa.string())
         expected_const = pa.array([group - 1] * 5, type=pa.int64())
-        for batch in task.execute():
-            assert batch.num_columns == 6
-            assert batch[0].equals(expected_i64)
-            assert batch[1].equals(expected_f64)
-            assert batch[2].equals(expected_str)
-            assert batch[3].equals(expected_const)
-            assert batch[4].equals(expected_group)
-            assert batch[5].equals(expected_key)
+        # Can't compare or really introspect expressions from Python
+        assert fragment.partition_expression is not None
+        assert batch.num_columns == 6
+        assert batch[0].equals(expected_i64)
+        assert batch[1].equals(expected_f64)
+        assert batch[2].equals(expected_str)
+        assert batch[3].equals(expected_const)
+        assert batch[4].equals(expected_group)
+        assert batch[5].equals(expected_key)
 
     table = dataset.to_table()
     assert isinstance(table, pa.Table)
@@ -686,7 +812,7 @@ def test_make_fragment(multisourcefs):
         assert row_group_fragment.row_groups == [0]
 
 
-def test_make_csv_fragment_from_buffer():
+def test_make_csv_fragment_from_buffer(dataset_reader):
     content = textwrap.dedent("""
         alpha,num,animal
         a,12,dog
@@ -702,14 +828,14 @@ def test_make_csv_fragment_from_buffer():
                          [12, 11, 10],
                          ['dog', 'cat', 'rabbit']],
                         names=['alpha', 'num', 'animal'])
-    assert fragment.to_table().equals(expected)
+    assert dataset_reader.to_table(fragment).equals(expected)
 
     pickled = pickle.loads(pickle.dumps(fragment))
-    assert pickled.to_table().equals(fragment.to_table())
+    assert dataset_reader.to_table(pickled).equals(fragment.to_table())
 
 
 @pytest.mark.parquet
-def test_make_parquet_fragment_from_buffer():
+def test_make_parquet_fragment_from_buffer(dataset_reader):
     import pyarrow.parquet as pq
 
     arrays = [
@@ -742,10 +868,10 @@ def test_make_parquet_fragment_from_buffer():
         buffer = out.getvalue()
 
         fragment = format_.make_fragment(buffer)
-        assert fragment.to_table().equals(table)
+        assert dataset_reader.to_table(fragment).equals(table)
 
         pickled = pickle.loads(pickle.dumps(fragment))
-        assert pickled.to_table().equals(table)
+        assert dataset_reader.to_table(pickled).equals(table)
 
 
 def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
@@ -770,7 +896,7 @@ def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments(tempdir):
+def test_fragments(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
     # list fragments
@@ -785,19 +911,20 @@ def test_fragments(tempdir):
     assert f.partition_expression.equals(ds.field('part') == 'a')
 
     # By default, the partition column is not part of the schema.
-    result = f.to_table()
+    result = dataset_reader.to_table(f)
     assert result.column_names == physical_names
     assert result.equals(table.remove_column(2).slice(0, 4))
 
     # scanning fragment includes partition columns when given the proper
     # schema.
-    result = f.to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(f, schema=dataset.schema)
     assert result.column_names == ['f1', 'f2', 'part']
     assert result.equals(table.slice(0, 4))
     assert f.physical_schema == result.schema.remove(2)
 
     # scanning fragments follow filter predicate
-    result = f.to_table(schema=dataset.schema, filter=ds.field('f1') < 2)
+    result = dataset_reader.to_table(
+        f, schema=dataset.schema, filter=ds.field('f1') < 2)
     assert result.column_names == ['f1', 'f2', 'part']
 
 
@@ -819,7 +946,7 @@ def test_fragments_implicit_cast(tempdir):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_reconstruct(tempdir):
+def test_fragments_reconstruct(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
     def assert_yields_projected(fragment, row_slice,
@@ -837,13 +964,15 @@ def assert_yields_projected(fragment, row_slice,
 
     # test pickle roundtrip
     pickled_fragment = pickle.loads(pickle.dumps(fragment))
-    assert pickled_fragment.to_table() == fragment.to_table()
+    assert dataset_reader.to_table(
+        pickled_fragment) == dataset_reader.to_table(fragment)
 
     # manually re-construct a fragment, with explicit schema
     new_fragment = parquet_format.make_fragment(
         fragment.path, fragment.filesystem,
         partition_expression=fragment.partition_expression)
-    assert new_fragment.to_table().equals(fragment.to_table())
+    assert dataset_reader.to_table(new_fragment).equals(
+        dataset_reader.to_table(fragment))
     assert_yields_projected(new_fragment, (0, 4))
 
     # filter / column projection, inspected schema
@@ -874,12 +1003,12 @@ def assert_yields_projected(fragment, row_slice,
         new_fragment = parquet_format.make_fragment(
             fragment.path, fragment.filesystem,
             partition_expression=fragment.partition_expression)
-        new_fragment.to_table(filter=ds.field('part') == 'a')
+        dataset_reader.to_table(new_fragment, filter=ds.field('part') == 'a')
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_row_groups(tempdir):
+def test_fragments_parquet_row_groups(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
     fragment = list(dataset.get_fragments())[0]
@@ -887,7 +1016,8 @@ def test_fragments_parquet_row_groups(tempdir):
     # list and scan row group fragments
     row_group_fragments = list(fragment.split_by_row_group())
     assert len(row_group_fragments) == fragment.num_row_groups == 2
-    result = row_group_fragments[0].to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(
+        row_group_fragments[0], schema=dataset.schema)
     assert result.column_names == ['f1', 'f2', 'part']
     assert len(result) == 2
     assert result.equals(table.slice(0, 2))
@@ -902,7 +1032,8 @@ def test_fragments_parquet_row_groups(tempdir):
     fragment = list(dataset.get_fragments(filter=ds.field('f1') < 1))[0]
     row_group_fragments = list(fragment.split_by_row_group(ds.field('f1') < 1))
     assert len(row_group_fragments) == 1
-    result = row_group_fragments[0].to_table(filter=ds.field('f1') < 1)
+    result = dataset_reader.to_table(
+        row_group_fragments[0], filter=ds.field('f1') < 1)
     assert len(result) == 1
 
 
@@ -928,7 +1059,7 @@ def test_fragments_parquet_num_row_groups(tempdir):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_row_groups_dictionary(tempdir):
+def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader):
     import pandas as pd
 
     df = pd.DataFrame(dict(col1=['a', 'b'], col2=[1, 2]))
@@ -939,7 +1070,7 @@ def test_fragments_parquet_row_groups_dictionary(tempdir):
 
     import pyarrow.dataset as ds
     dataset = ds.dataset(tempdir / 'test_filter_dictionary.parquet')
-    result = dataset.to_table(filter=ds.field("col1") == "a")
+    result = dataset_reader.to_table(dataset, filter=ds.field("col1") == "a")
 
     assert (df.iloc[0] == result.to_pandas()).all().all()
 
@@ -1137,7 +1268,7 @@ def test_fragments_parquet_row_groups_predicate(tempdir):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_row_groups_reconstruct(tempdir):
+def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
     fragment = list(dataset.get_fragments())[0]
@@ -1146,23 +1277,25 @@ def test_fragments_parquet_row_groups_reconstruct(tempdir):
 
     # test pickle roundtrip
     pickled_fragment = pickle.loads(pickle.dumps(fragment))
-    assert pickled_fragment.to_table() == fragment.to_table()
+    assert dataset_reader.to_table(
+        pickled_fragment) == dataset_reader.to_table(fragment)
 
     # manually re-construct row group fragments
     new_fragment = parquet_format.make_fragment(
         fragment.path, fragment.filesystem,
         partition_expression=fragment.partition_expression,
         row_groups=[0])
-    result = new_fragment.to_table()
-    assert result.equals(row_group_fragments[0].to_table())
+    result = dataset_reader.to_table(new_fragment)
+    assert result.equals(dataset_reader.to_table(row_group_fragments[0]))
 
     # manually re-construct a row group fragment with filter/column projection
     new_fragment = parquet_format.make_fragment(
         fragment.path, fragment.filesystem,
         partition_expression=fragment.partition_expression,
         row_groups={1})
-    result = new_fragment.to_table(schema=table.schema, columns=['f1', 'part'],
-                                   filter=ds.field('f1') < 3, )
+    result = dataset_reader.to_table(
+        new_fragment, schema=table.schema, columns=['f1', 'part'],
+        filter=ds.field('f1') < 3, )
     assert result.column_names == ['f1', 'part']
     assert len(result) == 1
 
@@ -1172,12 +1305,13 @@ def test_fragments_parquet_row_groups_reconstruct(tempdir):
         partition_expression=fragment.partition_expression,
         row_groups={2})
     with pytest.raises(IndexError, match="references row group 2"):
-        new_fragment.to_table()
+        dataset_reader.to_table(new_fragment)
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_subset_ids(tempdir, open_logging_fs):
+def test_fragments_parquet_subset_ids(tempdir, open_logging_fs,
+                                      dataset_reader):
     fs, assert_opens = open_logging_fs
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=1,
                                                    filesystem=fs)
@@ -1191,21 +1325,22 @@ def test_fragments_parquet_subset_ids(tempdir, open_logging_fs):
         assert subfrag.row_groups[0].statistics is not None
 
     # check correct scan result of subset
-    result = subfrag.to_table()
+    result = dataset_reader.to_table(subfrag)
     assert result.to_pydict() == {"f1": [0, 3], "f2": [1, 1]}
 
     # empty list of ids
     subfrag = fragment.subset(row_group_ids=[])
     assert subfrag.num_row_groups == 0
     assert subfrag.row_groups == []
-    result = subfrag.to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(subfrag, schema=dataset.schema)
     assert result.num_rows == 0
     assert result.equals(table[:0])
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_subset_filter(tempdir, open_logging_fs):
+def test_fragments_parquet_subset_filter(tempdir, open_logging_fs,
+                                         dataset_reader):
     fs, assert_opens = open_logging_fs
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=1,
                                                    filesystem=fs)
@@ -1219,14 +1354,14 @@ def test_fragments_parquet_subset_filter(tempdir, open_logging_fs):
         assert subfrag.row_groups[0].statistics is not None
 
     # check correct scan result of subset
-    result = subfrag.to_table()
+    result = dataset_reader.to_table(subfrag)
     assert result.to_pydict() == {"f1": [1, 2, 3], "f2": [1, 1, 1]}
 
     # filter that results in empty selection
     subfrag = fragment.subset(ds.field("f1") > 5)
     assert subfrag.num_row_groups == 0
     assert subfrag.row_groups == []
-    result = subfrag.to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(subfrag, schema=dataset.schema)
     assert result.num_rows == 0
     assert result.equals(table[:0])
 
@@ -1249,6 +1384,39 @@ def test_fragments_parquet_subset_invalid(tempdir):
         fragment.subset()
 
 
+@pytest.mark.pandas
+@pytest.mark.parquet
+def test_fragments_repr(tempdir, dataset):
+    # partitioned parquet dataset
+    fragment = list(dataset.get_fragments())[0]
+    assert (
+        repr(fragment) ==
+        "<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet "
+        "partition=[key=xxx, group=1]>"
+    )
+
+    # single-file parquet dataset (no partition information in repr)
+    table, path = _create_single_file(tempdir)
+    dataset = ds.dataset(path, format="parquet")
+    fragment = list(dataset.get_fragments())[0]
+    assert (
+        repr(fragment) ==
+        "<pyarrow.dataset.ParquetFileFragment path={}>".format(
+            dataset.filesystem.normalize_path(str(path)))
+    )
+
+    # non-parquet format
+    path = tempdir / "data.feather"
+    pa.feather.write_feather(table, path)
+    dataset = ds.dataset(path, format="feather")
+    fragment = list(dataset.get_fragments())[0]
+    assert (
+        repr(fragment) ==
+        "<pyarrow.dataset.FileFragment type=ipc path={}>".format(
+            dataset.filesystem.normalize_path(str(path)))
+    )
+
+
 def test_partitioning_factory(mockfs):
     paths_or_selector = fs.FileSelector('subdir', recursive=True)
     format = ds.ParquetFileFormat()
@@ -1308,6 +1476,99 @@ def test_partitioning_factory_dictionary(mockfs, infer_dictionary):
         assert inferred_schema.field('key').type == pa.string()
 
 
+def test_partitioning_factory_segment_encoding():
+    mockfs = fs._MockFileSystem()
+    format = ds.IpcFileFormat()
+    schema = pa.schema([("i64", pa.int64())])
+    table = pa.table([pa.array(range(10))], schema=schema)
+    partition_schema = pa.schema(
+        [("date", pa.timestamp("s")), ("string", pa.string())])
+    string_partition_schema = pa.schema(
+        [("date", pa.string()), ("string", pa.string())])
+    full_schema = pa.schema(list(schema) + list(partition_schema))
+    for directory in [
+            "directory/2021-05-04 00%3A00%3A00/%24",
+            "hive/date=2021-05-04 00%3A00%3A00/string=%24",
+    ]:
+        mockfs.create_dir(directory)
+        with mockfs.open_output_stream(directory + "/0.feather") as sink:
+            with pa.ipc.new_file(sink, schema) as writer:
+                writer.write_table(table)
+                writer.close()
+
+    # Directory
+    selector = fs.FileSelector("directory", recursive=True)
+    options = ds.FileSystemFactoryOptions("directory")
+    options.partitioning_factory = ds.DirectoryPartitioning.discover(
+        schema=partition_schema)
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    inferred_schema = factory.inspect()
+    assert inferred_schema == full_schema
+    actual = factory.finish().to_table(columns={
+        "date_int": ds.field("date").cast(pa.int64()),
+    })
+    assert actual[0][0].as_py() == 1620086400
+
+    options.partitioning_factory = ds.DirectoryPartitioning.discover(
+        ["date", "string"], segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning = ds.DirectoryPartitioning(
+        string_partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning_factory = ds.DirectoryPartitioning.discover(
+        schema=partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    with pytest.raises(pa.ArrowInvalid,
+                       match="Could not cast segments for partition field"):
+        inferred_schema = factory.inspect()
+
+    # Hive
+    selector = fs.FileSelector("hive", recursive=True)
+    options = ds.FileSystemFactoryOptions("hive")
+    options.partitioning_factory = ds.HivePartitioning.discover(
+        schema=partition_schema)
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    inferred_schema = factory.inspect()
+    assert inferred_schema == full_schema
+    actual = factory.finish().to_table(columns={
+        "date_int": ds.field("date").cast(pa.int64()),
+    })
+    assert actual[0][0].as_py() == 1620086400
+
+    options.partitioning_factory = ds.HivePartitioning.discover(
+        segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning = ds.HivePartitioning(
+        string_partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning_factory = ds.HivePartitioning.discover(
+        schema=partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    with pytest.raises(pa.ArrowInvalid,
+                       match="Could not cast segments for partition field"):
+        inferred_schema = factory.inspect()
+
+
 def test_dictionary_partitioning_outer_nulls_raises(tempdir):
     table = pa.table({'a': ['x', 'y', None], 'b': ['x', 'y', 'z']})
     part = ds.partitioning(
@@ -1407,6 +1668,7 @@ def test_directory_partitioning_dictionary_key(mockfs):
     dataset = ds.dataset(
         "subdir", format="parquet", filesystem=mockfs, partitioning=part
     )
+    assert dataset.partitioning.schema == schema
     table = dataset.to_table()
 
     assert table.column('group').type.equals(schema.types[0])
@@ -1426,6 +1688,7 @@ def test_hive_partitioning_dictionary_key(multisourcefs):
     dataset = ds.dataset(
         "hive", format="parquet", filesystem=multisourcefs, partitioning=part
     )
+    assert dataset.partitioning.schema == schema
     table = dataset.to_table()
 
     year_dictionary = list(range(2006, 2011))
@@ -1462,14 +1725,14 @@ def _create_directory_of_files(base_dir):
     return (table1, table2), (path1, path2)
 
 
-def _check_dataset(dataset, table):
+def _check_dataset(dataset, table, dataset_reader):
     # also test that pickle roundtrip keeps the functionality
     for d in [dataset, pickle.loads(pickle.dumps(dataset))]:
         assert dataset.schema.equals(table.schema)
-        assert dataset.to_table().equals(table)
+        assert dataset_reader.to_table(dataset).equals(table)
 
 
-def _check_dataset_from_path(path, table, **kwargs):
+def _check_dataset_from_path(path, table, dataset_reader, **kwargs):
     # pathlib object
     assert isinstance(path, pathlib.Path)
 
@@ -1477,39 +1740,39 @@ def _check_dataset_from_path(path, table, **kwargs):
     for p in [path, str(path), [path], [str(path)]]:
         dataset = ds.dataset(path, **kwargs)
         assert isinstance(dataset, ds.FileSystemDataset)
-        _check_dataset(dataset, table)
+        _check_dataset(dataset, table, dataset_reader)
 
     # relative string path
     with change_cwd(path.parent):
         dataset = ds.dataset(path.name, **kwargs)
         assert isinstance(dataset, ds.FileSystemDataset)
-        _check_dataset(dataset, table)
+        _check_dataset(dataset, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_open_dataset_single_file(tempdir):
+def test_open_dataset_single_file(tempdir, dataset_reader):
     table, path = _create_single_file(tempdir)
-    _check_dataset_from_path(path, table)
+    _check_dataset_from_path(path, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_deterministic_row_order(tempdir):
+def test_deterministic_row_order(tempdir, dataset_reader):
     # ARROW-8447 Ensure that dataset.to_table (and Scanner::ToTable) returns a
     # deterministic row ordering. This is achieved by constructing a single
     # parquet file with one row per RowGroup.
     table, path = _create_single_file(tempdir, row_group_size=1)
-    _check_dataset_from_path(path, table)
+    _check_dataset_from_path(path, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_open_dataset_directory(tempdir):
+def test_open_dataset_directory(tempdir, dataset_reader):
     tables, _ = _create_directory_of_files(tempdir)
     table = pa.concat_tables(tables)
-    _check_dataset_from_path(tempdir, table)
+    _check_dataset_from_path(tempdir, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_open_dataset_list_of_files(tempdir):
+def test_open_dataset_list_of_files(tempdir, dataset_reader):
     tables, (path1, path2) = _create_directory_of_files(tempdir)
     table = pa.concat_tables(tables)
 
@@ -1523,11 +1786,31 @@ def test_open_dataset_list_of_files(tempdir):
 
     for dataset in datasets:
         assert dataset.schema.equals(table.schema)
-        result = dataset.to_table()
+        result = dataset_reader.to_table(dataset)
         assert result.equals(table)
 
 
-def test_construct_from_single_file(tempdir):
+@pytest.mark.parquet
+def test_open_dataset_filesystem_fspath(tempdir):
+    # single file
+    table, path = _create_single_file(tempdir)
+
+    fspath = FSProtocolClass(path)
+
+    # filesystem inferred from path
+    dataset1 = ds.dataset(fspath)
+    assert dataset1.schema.equals(table.schema)
+
+    # filesystem specified
+    dataset2 = ds.dataset(fspath, filesystem=fs.LocalFileSystem())
+    assert dataset2.schema.equals(table.schema)
+
+    # passing different filesystem
+    with pytest.raises(TypeError):
+        ds.dataset(fspath, filesystem=fs._MockFileSystem())
+
+
+def test_construct_from_single_file(tempdir, dataset_reader):
     directory = tempdir / 'single-file'
     directory.mkdir()
     table, path = _create_single_file(directory)
@@ -1542,10 +1825,11 @@ def test_construct_from_single_file(tempdir):
     # pickle roundtrip
     d4 = pickle.loads(pickle.dumps(d1))
 
-    assert d1.to_table() == d2.to_table() == d3.to_table() == d4.to_table()
+    assert dataset_reader.to_table(d1) == dataset_reader.to_table(
+        d2) == dataset_reader.to_table(d3) == dataset_reader.to_table(d4)
 
 
-def test_construct_from_single_directory(tempdir):
+def test_construct_from_single_directory(tempdir, dataset_reader):
     directory = tempdir / 'single-directory'
     directory.mkdir()
     tables, paths = _create_directory_of_files(directory)
@@ -1553,18 +1837,18 @@ def test_construct_from_single_directory(tempdir):
     d1 = ds.dataset(directory)
     d2 = ds.dataset(directory, filesystem=fs.LocalFileSystem())
     d3 = ds.dataset(directory.name, filesystem=_filesystem_uri(tempdir))
-    t1 = d1.to_table()
-    t2 = d2.to_table()
-    t3 = d3.to_table()
+    t1 = dataset_reader.to_table(d1)
+    t2 = dataset_reader.to_table(d2)
+    t3 = dataset_reader.to_table(d3)
     assert t1 == t2 == t3
 
     # test pickle roundtrip
     for d in [d1, d2, d3]:
         restored = pickle.loads(pickle.dumps(d))
-        assert restored.to_table() == t1
+        assert dataset_reader.to_table(restored) == t1
 
 
-def test_construct_from_list_of_files(tempdir):
+def test_construct_from_list_of_files(tempdir, dataset_reader):
     # instantiate from a list of files
     directory = tempdir / 'list-of-files'
     directory.mkdir()
@@ -1573,15 +1857,15 @@ def test_construct_from_list_of_files(tempdir):
     relative_paths = [p.relative_to(tempdir) for p in paths]
     with change_cwd(tempdir):
         d1 = ds.dataset(relative_paths)
-        t1 = d1.to_table()
+        t1 = dataset_reader.to_table(d1)
         assert len(t1) == sum(map(len, tables))
 
     d2 = ds.dataset(relative_paths, filesystem=_filesystem_uri(tempdir))
-    t2 = d2.to_table()
+    t2 = dataset_reader.to_table(d2)
     d3 = ds.dataset(paths)
-    t3 = d3.to_table()
+    t3 = dataset_reader.to_table(d3)
     d4 = ds.dataset(paths, filesystem=fs.LocalFileSystem())
-    t4 = d4.to_table()
+    t4 = dataset_reader.to_table(d4)
 
     assert t1 == t2 == t3 == t4
 
@@ -1666,9 +1950,10 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
         ds.dataset(None)
 
     expected = (
-        "Must provide schema to construct in-memory dataset from an iterable"
+        "Expected a path-like, list of path-likes or a list of Datasets "
+        "instead of the given type: generator"
     )
-    with pytest.raises(ValueError, match=expected):
+    with pytest.raises(TypeError, match=expected):
         ds.dataset((batch1 for _ in range(3)))
 
     expected = (
@@ -1698,86 +1983,82 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
         ds.InMemoryDataset([batch1, 0])
 
 
-def test_construct_in_memory():
+def test_construct_in_memory(dataset_reader):
     batch = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
     table = pa.Table.from_batches([batch])
-    reader = pa.ipc.RecordBatchReader.from_batches(batch.schema, [batch])
-    iterable = (batch for _ in range(1))
-
-    for source in (batch, table, reader, [batch], [table]):
-        dataset = ds.dataset(source)
-        assert dataset.to_table() == table
 
-    assert ds.dataset(iterable, schema=batch.schema).to_table().equals(table)
     assert ds.dataset([], schema=pa.schema([])).to_table() == pa.table([])
 
-    # When constructed from batches/tables, should be reusable
     for source in (batch, table, [batch], [table]):
         dataset = ds.dataset(source)
+        assert dataset_reader.to_table(dataset) == table
         assert len(list(dataset.get_fragments())) == 1
-        assert len(list(dataset.get_fragments())) == 1
-        assert dataset.to_table() == table
-        assert dataset.to_table() == table
         assert next(dataset.get_fragments()).to_table() == table
+        assert pa.Table.from_batches(list(dataset.to_batches())) == table
 
+
+@pytest.mark.parametrize('use_threads,use_async',
+                         [(False, False), (False, True),
+                          (True, False), (True, True)])
+def test_scan_iterator(use_threads, use_async):
+    batch = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
+    table = pa.Table.from_batches([batch])
     # When constructed from readers/iterators, should be one-shot
-    match = "InMemoryDataset was already consumed"
-    for factory in (
-            lambda: pa.ipc.RecordBatchReader.from_batches(
-                batch.schema, [batch]),
-            lambda: (batch for _ in range(1)),
+    match = "OneShotFragment was already scanned"
+    for factory, schema in (
+            (lambda: pa.ipc.RecordBatchReader.from_batches(
+                batch.schema, [batch]), None),
+            (lambda: (batch for _ in range(1)), batch.schema),
     ):
-        dataset = ds.dataset(factory(), schema=batch.schema)
-        # Getting fragments consumes the underlying iterator
-        fragments = list(dataset.get_fragments())
-        assert len(fragments) == 1
-        assert fragments[0].to_table() == table
-        with pytest.raises(pa.ArrowInvalid, match=match):
-            list(dataset.get_fragments())
-        with pytest.raises(pa.ArrowInvalid, match=match):
-            dataset.to_table()
-        # Materializing consumes the underlying iterator
-        dataset = ds.dataset(factory(), schema=batch.schema)
-        assert dataset.to_table() == table
+        # Scanning the fragment consumes the underlying iterator
+        scanner = ds.Scanner.from_batches(
+            factory(), schema=schema, use_threads=use_threads,
+            use_async=use_async)
+        assert scanner.to_table() == table
         with pytest.raises(pa.ArrowInvalid, match=match):
-            list(dataset.get_fragments())
-        with pytest.raises(pa.ArrowInvalid, match=match):
-            dataset.to_table()
+            scanner.to_table()
 
 
-@pytest.mark.parquet
-def test_open_dataset_partitioned_directory(tempdir):
+def _create_partitioned_dataset(basedir):
     import pyarrow.parquet as pq
     table = pa.table({'a': range(9), 'b': [0.] * 4 + [1.] * 5})
 
-    path = tempdir / "dataset"
+    path = basedir / "dataset-partitioned"
     path.mkdir()
 
-    for part in range(3):
-        part = path / "part={}".format(part)
+    for i in range(3):
+        part = path / "part={}".format(i)
         part.mkdir()
-        pq.write_table(table, part / "test.parquet")
+        pq.write_table(table.slice(3*i, 3), part / "test.parquet")
+
+    full_table = table.append_column(
+        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int32()))
+
+    return full_table, path
+
+
+@pytest.mark.parquet
+def test_open_dataset_partitioned_directory(tempdir, dataset_reader):
+    full_table, path = _create_partitioned_dataset(tempdir)
 
     # no partitioning specified, just read all individual files
-    full_table = pa.concat_tables([table] * 3)
-    _check_dataset_from_path(path, full_table)
+    table = full_table.select(['a', 'b'])
+    _check_dataset_from_path(path, table, dataset_reader)
 
     # specify partition scheme with discovery
     dataset = ds.dataset(
         str(path), partitioning=ds.partitioning(flavor="hive"))
-    expected_schema = table.schema.append(pa.field("part", pa.int32()))
-    assert dataset.schema.equals(expected_schema)
+    assert dataset.schema.equals(full_table.schema)
 
     # specify partition scheme with discovery and relative path
     with change_cwd(tempdir):
-        dataset = ds.dataset(
-            "dataset/", partitioning=ds.partitioning(flavor="hive"))
-        expected_schema = table.schema.append(pa.field("part", pa.int32()))
-        assert dataset.schema.equals(expected_schema)
+        dataset = ds.dataset("dataset-partitioned/",
+                             partitioning=ds.partitioning(flavor="hive"))
+        assert dataset.schema.equals(full_table.schema)
 
     # specify partition scheme with string short-cut
     dataset = ds.dataset(str(path), partitioning="hive")
-    assert dataset.schema.equals(expected_schema)
+    assert dataset.schema.equals(full_table.schema)
 
     # specify partition scheme with explicit scheme
     dataset = ds.dataset(
@@ -1788,8 +2069,8 @@ def test_open_dataset_partitioned_directory(tempdir):
     assert dataset.schema.equals(expected_schema)
 
     result = dataset.to_table()
-    expected = full_table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 9), type=pa.int8()))
+    expected = table.append_column(
+        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int8()))
     assert result.equals(expected)
 
 
@@ -1824,7 +2105,7 @@ def test_open_dataset_unsupported_format(tempdir):
 
 
 @pytest.mark.parquet
-def test_open_union_dataset(tempdir):
+def test_open_union_dataset(tempdir, dataset_reader):
     _, path = _create_single_file(tempdir)
     dataset = ds.dataset(path)
 
@@ -1832,7 +2113,7 @@ def test_open_union_dataset(tempdir):
     assert isinstance(union, ds.UnionDataset)
 
     pickled = pickle.loads(pickle.dumps(union))
-    assert pickled.to_table() == union.to_table()
+    assert dataset_reader.to_table(pickled) == dataset_reader.to_table(union)
 
 
 def test_open_union_dataset_with_additional_kwargs(multisourcefs):
@@ -1977,17 +2258,17 @@ def s3_example_simple(s3_connection, s3_server):
 
 @pytest.mark.parquet
 @pytest.mark.s3
-def test_open_dataset_from_uri_s3(s3_example_simple):
+def test_open_dataset_from_uri_s3(s3_example_simple, dataset_reader):
     # open dataset from non-localfs string path
     table, path, fs, uri, _, _, _, _ = s3_example_simple
 
     # full string URI
     dataset = ds.dataset(uri, format="parquet")
-    assert dataset.to_table().equals(table)
+    assert dataset_reader.to_table(dataset).equals(table)
 
     # passing filesystem object
     dataset = ds.dataset(path, format="parquet", filesystem=fs)
-    assert dataset.to_table().equals(table)
+    assert dataset_reader.to_table(dataset).equals(table)
 
 
 @pytest.mark.parquet
@@ -2096,7 +2377,7 @@ def test_open_dataset_from_fsspec(tempdir):
 
 
 @pytest.mark.pandas
-def test_filter_timestamp(tempdir):
+def test_filter_timestamp(tempdir, dataset_reader):
     # ARROW-11379
     path = tempdir / "test_partition_timestamps"
 
@@ -2114,24 +2395,24 @@ def test_filter_timestamp(tempdir):
     dataset = ds.dataset(path, format="feather", partitioning=part)
 
     condition = ds.field("dates") > pd.Timestamp("2012-01-01")
-    table = dataset.to_table(filter=condition)
+    table = dataset_reader.to_table(dataset, filter=condition)
     assert table.column('id').to_pylist() == [1, 3, 5, 7, 9]
 
     import datetime
     condition = ds.field("dates") > datetime.datetime(2012, 1, 1)
-    table = dataset.to_table(filter=condition)
+    table = dataset_reader.to_table(dataset, filter=condition)
     assert table.column('id').to_pylist() == [1, 3, 5, 7, 9]
 
 
 @pytest.mark.parquet
-def test_filter_implicit_cast(tempdir):
+def test_filter_implicit_cast(tempdir, dataset_reader):
     # ARROW-7652
     table = pa.table({'a': pa.array([0, 1, 2, 3, 4, 5], type=pa.int8())})
     _, path = _create_single_file(tempdir, table)
     dataset = ds.dataset(str(path))
 
     filter_ = ds.field('a') > 2
-    assert len(dataset.to_table(filter=filter_)) == 3
+    assert len(dataset_reader.to_table(dataset, filter=filter_)) == 3
 
 
 def test_dataset_union(multisourcefs):
@@ -2254,7 +2535,7 @@ def test_union_dataset_filesystem_datasets(multisourcefs):
 
 
 @pytest.mark.parquet
-def test_specified_schema(tempdir):
+def test_specified_schema(tempdir, dataset_reader):
     import pyarrow.parquet as pq
 
     table = pa.table({'a': [1, 2, 3], 'b': [.1, .2, .3]})
@@ -2266,7 +2547,7 @@ def _check_dataset(schema, expected, expected_schema=None):
             assert dataset.schema.equals(expected_schema)
         else:
             assert dataset.schema.equals(schema)
-        result = dataset.to_table()
+        result = dataset_reader.to_table(dataset)
         assert result.equals(expected)
 
     # no schema specified
@@ -2310,10 +2591,10 @@ def _check_dataset(schema, expected, expected_schema=None):
     assert dataset.schema.equals(schema)
     with pytest.raises(NotImplementedError,
                        match='Unsupported cast from int64 to list'):
-        dataset.to_table()
+        dataset_reader.to_table(dataset)
 
 
-def test_ipc_format(tempdir):
+def test_ipc_format(tempdir, dataset_reader):
     table = pa.table({'a': pa.array([1, 2, 3], type="int8"),
                       'b': pa.array([.1, .2, .3], type="float64")})
 
@@ -2324,17 +2605,17 @@ def test_ipc_format(tempdir):
         writer.close()
 
     dataset = ds.dataset(path, format=ds.IpcFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     for format_str in ["ipc", "arrow"]:
         dataset = ds.dataset(path, format=format_str)
-        result = dataset.to_table()
+        result = dataset_reader.to_table(dataset)
         assert result.equals(table)
 
 
 @pytest.mark.pandas
-def test_csv_format(tempdir):
+def test_csv_format(tempdir, dataset_reader):
     table = pa.table({'a': pa.array([1, 2, 3], type="int64"),
                       'b': pa.array([.1, .2, .3], type="float64")})
 
@@ -2342,11 +2623,11 @@ def test_csv_format(tempdir):
     table.to_pandas().to_csv(path, index=False)
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     dataset = ds.dataset(path, format='csv')
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
 
@@ -2357,7 +2638,7 @@ def test_csv_format(tempdir):
     "lz4",
     "zstd",
 ])
-def test_csv_format_compressed(tempdir, compression):
+def test_csv_format_compressed(tempdir, compression, dataset_reader):
     if not pyarrow.Codec.is_available(compression):
         pytest.skip("{} support is not built".format(compression))
     table = pa.table({'a': pa.array([1, 2, 3], type="int64"),
@@ -2373,32 +2654,32 @@ def test_csv_format_compressed(tempdir, compression):
         sink.write(csv_str.encode('utf-8'))
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
 
-def test_csv_format_options(tempdir):
+def test_csv_format_options(tempdir, dataset_reader):
     path = str(tempdir / 'test.csv')
     with open(path, 'w') as sink:
         sink.write('skipped\ncol0\nfoo\nbar\n')
     dataset = ds.dataset(path, format='csv')
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(
         pa.table({'skipped': pa.array(['col0', 'foo', 'bar'])}))
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat(
         read_options=pa.csv.ReadOptions(skip_rows=1)))
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(pa.table({'col0': pa.array(['foo', 'bar'])}))
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat(
         read_options=pa.csv.ReadOptions(column_names=['foo'])))
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(
         pa.table({'foo': pa.array(['skipped', 'col0', 'foo', 'bar'])}))
 
 
-def test_csv_fragment_options(tempdir):
+def test_csv_fragment_options(tempdir, dataset_reader):
     path = str(tempdir / 'test.csv')
     with open(path, 'w') as sink:
         sink.write('col0\nfoo\nspam\nMYNULL\n')
@@ -2408,21 +2689,21 @@ def test_csv_fragment_options(tempdir):
     options = ds.CsvFragmentScanOptions(
         convert_options=convert_options,
         read_options=pa.csv.ReadOptions(block_size=2**16))
-    result = dataset.to_table(fragment_scan_options=options)
+    result = dataset_reader.to_table(dataset, fragment_scan_options=options)
     assert result.equals(pa.table({'col0': pa.array(['foo', 'spam', None])}))
 
     csv_format = ds.CsvFileFormat(convert_options=convert_options)
     dataset = ds.dataset(path, format=csv_format)
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(pa.table({'col0': pa.array(['foo', 'spam', None])}))
 
     options = ds.CsvFragmentScanOptions()
-    result = dataset.to_table(fragment_scan_options=options)
+    result = dataset_reader.to_table(dataset, fragment_scan_options=options)
     assert result.equals(
         pa.table({'col0': pa.array(['foo', 'spam', 'MYNULL'])}))
 
 
-def test_feather_format(tempdir):
+def test_feather_format(tempdir, dataset_reader):
     from pyarrow.feather import write_feather
 
     table = pa.table({'a': pa.array([1, 2, 3], type="int8"),
@@ -2433,26 +2714,29 @@ def test_feather_format(tempdir):
     write_feather(table, str(basedir / "data.feather"))
 
     dataset = ds.dataset(basedir, format=ds.IpcFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     dataset = ds.dataset(basedir, format="feather")
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     # ARROW-8641 - column selection order
-    result = dataset.to_table(columns=["b", "a"])
+    result = dataset_reader.to_table(dataset, columns=["b", "a"])
     assert result.column_names == ["b", "a"]
-    result = dataset.to_table(columns=["a", "a"])
+    result = dataset_reader.to_table(dataset, columns=["a", "a"])
     assert result.column_names == ["a", "a"]
 
     # error with Feather v1 files
     write_feather(table, str(basedir / "data1.feather"), version=1)
     with pytest.raises(ValueError):
-        ds.dataset(basedir, format="feather").to_table()
+        dataset_reader.to_table(ds.dataset(basedir, format="feather"))
 
 
 def _create_parquet_dataset_simple(root_path):
+    """
+    Creates a simple (flat files, no nested partitioning) Parquet dataset
+    """
     import pyarrow.parquet as pq
 
     metadata_collector = []
@@ -2484,6 +2768,58 @@ def test_parquet_dataset_factory(tempdir):
     assert result.num_rows == 40
 
 
+@pytest.mark.parquet
+@pytest.mark.pandas  # write_to_dataset currently requires pandas
+@pytest.mark.parametrize('use_legacy_dataset', [False, True])
+def test_parquet_dataset_factory_roundtrip(tempdir, use_legacy_dataset):
+    # Simple test to ensure we can roundtrip dataset to
+    # _metadata/common_metadata and back.  A more complex test
+    # using partitioning will have to wait for ARROW-13269.  The
+    # above test (test_parquet_dataset_factory) will not work
+    # when legacy is False as there is no "append" equivalent in
+    # the new dataset until ARROW-12358
+    import pyarrow.parquet as pq
+    root_path = tempdir / "test_parquet_dataset"
+    table = pa.table({'f1': [0] * 10, 'f2': np.random.randn(10)})
+    metadata_collector = []
+    pq.write_to_dataset(
+        table, str(root_path), metadata_collector=metadata_collector,
+        use_legacy_dataset=use_legacy_dataset
+    )
+    metadata_path = str(root_path / '_metadata')
+    # write _metadata file
+    pq.write_metadata(
+        table.schema, metadata_path,
+        metadata_collector=metadata_collector
+    )
+    dataset = ds.parquet_dataset(metadata_path)
+    assert dataset.schema.equals(table.schema)
+    result = dataset.to_table()
+    assert result.num_rows == 10
+
+
+def test_parquet_dataset_factory_order(tempdir):
+    # The order of the fragments in the dataset should match the order of the
+    # row groups in the _metadata file.
+    import pyarrow.parquet as pq
+    metadatas = []
+    # Create a dataset where f1 is incrementing from 0 to 100 spread across
+    # 10 files.  Put the row groups in the correct order in _metadata
+    for i in range(10):
+        table = pa.table(
+            {'f1': list(range(i*10, (i+1)*10))})
+        table_path = tempdir / f'{i}.parquet'
+        pq.write_table(table, table_path, metadata_collector=metadatas)
+        metadatas[-1].set_file_path(f'{i}.parquet')
+    metadata_path = str(tempdir / '_metadata')
+    pq.write_metadata(table.schema, metadata_path, metadatas)
+    dataset = ds.parquet_dataset(metadata_path)
+    # Ensure the table contains values from 0-100 in the right order
+    scanned_table = dataset.to_table()
+    scanned_col = scanned_table.column('f1').to_pylist()
+    assert scanned_col == list(range(0, 100))
+
+
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_parquet_dataset_factory_invalid(tempdir):
@@ -2609,15 +2945,15 @@ def test_parquet_dataset_lazy_filtering(tempdir, open_logging_fs):
 
 @pytest.mark.parquet
 @pytest.mark.pandas
-def test_dataset_schema_metadata(tempdir):
+def test_dataset_schema_metadata(tempdir, dataset_reader):
     # ARROW-8802
     df = pd.DataFrame({'a': [1, 2, 3]})
     path = tempdir / "test.parquet"
     df.to_parquet(path)
     dataset = ds.dataset(path)
 
-    schema = dataset.to_table().schema
-    projected_schema = dataset.to_table(columns=["a"]).schema
+    schema = dataset_reader.to_table(dataset).schema
+    projected_schema = dataset_reader.to_table(dataset, columns=["a"]).schema
 
     # ensure the pandas metadata is included in the schema
     assert b"pandas" in schema.metadata
@@ -2626,7 +2962,7 @@ def test_dataset_schema_metadata(tempdir):
 
 
 @pytest.mark.parquet
-def test_filter_mismatching_schema(tempdir):
+def test_filter_mismatching_schema(tempdir, dataset_reader):
     # ARROW-9146
     import pyarrow.parquet as pq
 
@@ -2640,17 +2976,18 @@ def test_filter_mismatching_schema(tempdir):
 
     # filtering on a column with such type mismatch should implicitly
     # cast the column
-    filtered = dataset.to_table(filter=ds.field("col") > 2)
+    filtered = dataset_reader.to_table(dataset, filter=ds.field("col") > 2)
     assert filtered["col"].equals(table["col"].cast('int64').slice(2))
 
     fragment = list(dataset.get_fragments())[0]
-    filtered = fragment.to_table(filter=ds.field("col") > 2, schema=schema)
+    filtered = dataset_reader.to_table(
+        fragment, filter=ds.field("col") > 2, schema=schema)
     assert filtered["col"].equals(table["col"].cast('int64').slice(2))
 
 
 @pytest.mark.parquet
 @pytest.mark.pandas
-def test_dataset_project_only_partition_columns(tempdir):
+def test_dataset_project_only_partition_columns(tempdir, dataset_reader):
     # ARROW-8729
     import pyarrow.parquet as pq
 
@@ -2660,15 +2997,15 @@ def test_dataset_project_only_partition_columns(tempdir):
     pq.write_to_dataset(table, path, partition_cols=['part'])
     dataset = ds.dataset(path, partitioning='hive')
 
-    all_cols = dataset.to_table(use_threads=False)
-    part_only = dataset.to_table(columns=['part'], use_threads=False)
+    all_cols = dataset_reader.to_table(dataset)
+    part_only = dataset_reader.to_table(dataset, columns=['part'])
 
     assert all_cols.column('part').equals(part_only.column('part'))
 
 
 @pytest.mark.parquet
 @pytest.mark.pandas
-def test_dataset_project_null_column(tempdir):
+def test_dataset_project_null_column(tempdir, dataset_reader):
     import pandas as pd
     df = pd.DataFrame({"col": np.array([None, None, None], dtype='object')})
 
@@ -2678,17 +3015,17 @@ def test_dataset_project_null_column(tempdir):
     dataset = ds.dataset(f, format="parquet",
                          schema=pa.schema([("col", pa.int64())]))
     expected = pa.table({'col': pa.array([None, None, None], pa.int64())})
-    assert dataset.to_table().equals(expected)
+    assert dataset_reader.to_table(dataset).equals(expected)
 
 
-def test_dataset_project_columns(tempdir):
+def test_dataset_project_columns(tempdir, dataset_reader):
     # basic column re-projection with expressions
     from pyarrow import feather
     table = pa.table({"A": [1, 2, 3], "B": [1., 2., 3.], "C": ["a", "b", "c"]})
     feather.write_feather(table, tempdir / "data.feather")
 
     dataset = ds.dataset(tempdir / "data.feather", format="feather")
-    result = dataset.to_table(columns={
+    result = dataset_reader.to_table(dataset, columns={
         'A_renamed': ds.field('A'),
         'B_as_int': ds.field('B').cast("int32", safe=False),
         'C_is_a': ds.field('C') == 'a'
@@ -2702,7 +3039,64 @@ def test_dataset_project_columns(tempdir):
 
     # raise proper error when not passing an expression
     with pytest.raises(TypeError, match="Expected an Expression"):
-        dataset.to_table(columns={"A": "A"})
+        dataset_reader.to_table(dataset, columns={"A": "A"})
+
+
+@pytest.mark.pandas
+@pytest.mark.parquet
+def test_dataset_preserved_partitioning(tempdir):
+    # ARROW-8655
+
+    # through discovery, but without partitioning
+    _, path = _create_single_file(tempdir)
+    dataset = ds.dataset(path)
+    assert dataset.partitioning is None
+
+    # through discovery, with hive partitioning but not specified
+    full_table, path = _create_partitioned_dataset(tempdir)
+    dataset = ds.dataset(path)
+    assert dataset.partitioning is None
+
+    # through discovery, with hive partitioning (from a partitioning factory)
+    dataset = ds.dataset(path, partitioning="hive")
+    part = dataset.partitioning
+    assert part is not None
+    assert isinstance(part, ds.HivePartitioning)
+    assert part.schema == pa.schema([("part", pa.int32())])
+    assert len(part.dictionaries) == 1
+    assert part.dictionaries[0] == pa.array([0, 1, 2], pa.int32())
+
+    # through discovery, with hive partitioning (from a partitioning object)
+    part = ds.partitioning(pa.schema([("part", pa.int32())]), flavor="hive")
+    assert isinstance(part, ds.HivePartitioning)  # not a factory
+    assert part.dictionaries is None
+    dataset = ds.dataset(path, partitioning=part)
+    part = dataset.partitioning
+    assert isinstance(part, ds.HivePartitioning)
+    assert part.schema == pa.schema([("part", pa.int32())])
+    # TODO is this expected?
+    assert part.dictionaries is None
+
+    # through manual creation -> not available
+    dataset = ds.dataset(path, partitioning="hive")
+    dataset2 = ds.FileSystemDataset(
+        list(dataset.get_fragments()), schema=dataset.schema,
+        format=dataset.format, filesystem=dataset.filesystem
+    )
+    assert dataset2.partitioning is None
+
+    # through discovery with ParquetDatasetFactory
+    root_path = tempdir / "data-partitioned-metadata"
+    metadata_path, _ = _create_parquet_dataset_partitioned(root_path)
+    dataset = ds.parquet_dataset(metadata_path, partitioning="hive")
+    part = dataset.partitioning
+    assert part is not None
+    assert isinstance(part, ds.HivePartitioning)
+    assert part.schema == pa.schema([("part", pa.string())])
+    assert len(part.dictionaries) == 1
+    # will be fixed by ARROW-13153 (order is not preserved at the moment)
+    # assert part.dictionaries[0] == pa.array(["a", "b"], pa.string())
+    assert set(part.dictionaries[0].to_pylist()) == {"a", "b"}
 
 
 @pytest.mark.parquet
@@ -2874,10 +3268,29 @@ def test_write_dataset_use_threads(tempdir):
         pa.schema([("part", pa.string())]), flavor="hive")
 
     target1 = tempdir / 'partitioned1'
+    paths_written = []
+
+    def file_visitor(written_file):
+        paths_written.append(written_file.path)
+
     ds.write_dataset(
         dataset, target1, format="feather", partitioning=partitioning,
-        use_threads=True
+        use_threads=True, file_visitor=file_visitor
     )
+
+    # Since it is a multi-threaded write there is no way to know which
+    # directory gets part-0 and which gets part-1
+    expected_paths_a = {
+        target1 / 'part=a' / 'part-0.feather',
+        target1 / 'part=b' / 'part-1.feather'
+    }
+    expected_paths_b = {
+        target1 / 'part=a' / 'part-1.feather',
+        target1 / 'part=b' / 'part-0.feather'
+    }
+    paths_written_set = set(map(pathlib.Path, paths_written))
+    assert paths_written_set in [expected_paths_a, expected_paths_b]
+
     target2 = tempdir / 'partitioned2'
     ds.write_dataset(
         dataset, target2, format="feather", partitioning=partitioning,
@@ -2909,19 +3322,28 @@ def test_write_table(tempdir):
 
     # with partitioning
     base_dir = tempdir / 'partitioned'
+    expected_paths = [
+        base_dir / "part=a", base_dir / "part=a" / "dat_0.arrow",
+        base_dir / "part=b", base_dir / "part=b" / "dat_1.arrow"
+    ]
+
+    visited_paths = []
+
+    def file_visitor(written_file):
+        visited_paths.append(written_file.path)
+
     partitioning = ds.partitioning(
         pa.schema([("part", pa.string())]), flavor="hive")
     ds.write_dataset(table, base_dir, format="feather",
                      basename_template='dat_{i}.arrow',
-                     partitioning=partitioning)
+                     partitioning=partitioning, file_visitor=file_visitor)
     file_paths = list(base_dir.rglob("*"))
-    expected_paths = [
-        base_dir / "part=a", base_dir / "part=a" / "dat_0.arrow",
-        base_dir / "part=b", base_dir / "part=b" / "dat_1.arrow"
-    ]
     assert set(file_paths) == set(expected_paths)
     result = ds.dataset(base_dir, format="ipc", partitioning=partitioning)
     assert result.to_table().equals(table)
+    assert len(visited_paths) == 2
+    for visited_path in visited_paths:
+        assert pathlib.Path(visited_path) in expected_paths
 
 
 def test_write_table_multiple_fragments(tempdir):
@@ -2982,6 +3404,37 @@ def test_write_iterable(tempdir):
     assert result.equals(table)
 
 
+def test_write_scanner(tempdir, dataset_reader):
+    if dataset_reader.use_async:
+        pytest.skip(
+            ('ARROW-12803: Write dataset with scanner does not'
+             ' support async scan'))
+
+    table = pa.table([
+        pa.array(range(20)), pa.array(np.random.randn(20)),
+        pa.array(np.repeat(['a', 'b'], 10))
+    ], names=["f1", "f2", "part"])
+    dataset = ds.dataset(table)
+
+    base_dir = tempdir / 'dataset_from_scanner'
+    ds.write_dataset(dataset_reader.scanner(
+        dataset), base_dir, format="feather")
+    result = dataset_reader.to_table(ds.dataset(base_dir, format="ipc"))
+    assert result.equals(table)
+
+    # scanner with different projected_schema
+    base_dir = tempdir / 'dataset_from_scanner2'
+    ds.write_dataset(dataset_reader.scanner(dataset, columns=["f1"]),
+                     base_dir, format="feather")
+    result = dataset_reader.to_table(ds.dataset(base_dir, format="ipc"))
+    assert result.equals(table.select(["f1"]))
+
+    # schema not allowed when writing a scanner
+    with pytest.raises(ValueError, match="Cannot specify a schema"):
+        ds.write_dataset(dataset_reader.scanner(dataset), base_dir,
+                         schema=table.schema, format="feather")
+
+
 def test_write_table_partitioned_dict(tempdir):
     # ensure writing table partitioned on a dictionary column works without
     # specifying the dictionary values explicitly
@@ -3037,6 +3490,95 @@ def test_write_dataset_parquet(tempdir):
         assert meta.format_version == version
 
 
+def test_write_dataset_csv(tempdir):
+    table = pa.table([
+        pa.array(range(20)), pa.array(np.random.randn(20)),
+        pa.array(np.repeat(['a', 'b'], 10))
+    ], names=["f1", "f2", "chr1"])
+
+    base_dir = tempdir / 'csv_dataset'
+    ds.write_dataset(table, base_dir, format="csv")
+    # check that all files are present
+    file_paths = list(base_dir.rglob("*"))
+    expected_paths = [base_dir / "part-0.csv"]
+    assert set(file_paths) == set(expected_paths)
+    # check Table roundtrip
+    result = ds.dataset(base_dir, format="csv").to_table()
+    assert result.equals(table)
+
+    # using custom options
+    format = ds.CsvFileFormat(read_options=pyarrow.csv.ReadOptions(
+        column_names=table.schema.names))
+    opts = format.make_write_options(include_header=False)
+    base_dir = tempdir / 'csv_dataset_noheader'
+    ds.write_dataset(table, base_dir, format=format, file_options=opts)
+    result = ds.dataset(base_dir, format=format).to_table()
+    assert result.equals(table)
+
+
+@pytest.mark.parquet
+def test_write_dataset_parquet_file_visitor(tempdir):
+    table = pa.table([
+        pa.array(range(20)), pa.array(np.random.randn(20)),
+        pa.array(np.repeat(['a', 'b'], 10))
+    ], names=["f1", "f2", "part"])
+
+    visitor_called = False
+
+    def file_visitor(written_file):
+        nonlocal visitor_called
+        if (written_file.metadata is not None and
+                written_file.metadata.num_columns == 3):
+            visitor_called = True
+
+    base_dir = tempdir / 'parquet_dataset'
+    ds.write_dataset(table, base_dir, format="parquet",
+                     file_visitor=file_visitor)
+
+    assert visitor_called
+
+
+def test_partition_dataset_parquet_file_visitor(tempdir):
+    f1_vals = [item for chunk in range(4) for item in [chunk] * 10]
+    f2_vals = [item*10 for chunk in range(4) for item in [chunk] * 10]
+    table = pa.table({'f1': f1_vals, 'f2': f2_vals,
+                      'part': np.repeat(['a', 'b'], 20)})
+
+    root_path = tempdir / 'partitioned'
+    partitioning = ds.partitioning(
+        pa.schema([("part", pa.string())]), flavor="hive")
+
+    paths_written = []
+
+    sample_metadata = None
+
+    def file_visitor(written_file):
+        nonlocal sample_metadata
+        if written_file.metadata:
+            sample_metadata = written_file.metadata
+        paths_written.append(written_file.path)
+
+    ds.write_dataset(
+        table, root_path, format="parquet", partitioning=partitioning,
+        use_threads=True, file_visitor=file_visitor
+    )
+
+    # Since it is a multi-threaded write there is no way to know which
+    # directory gets part-0 and which gets part-1
+    expected_paths_a = {
+        root_path / 'part=a' / 'part-0.parquet',
+        root_path / 'part=b' / 'part-1.parquet'
+    }
+    expected_paths_b = {
+        root_path / 'part=a' / 'part-1.parquet',
+        root_path / 'part=b' / 'part-0.parquet'
+    }
+    paths_written_set = set(map(pathlib.Path, paths_written))
+    assert paths_written_set in [expected_paths_a, expected_paths_b]
+    assert sample_metadata is not None
+    assert sample_metadata.num_columns == 2
+
+
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_write_dataset_arrow_schema_metadata(tempdir):
@@ -3124,3 +3666,24 @@ def test_write_dataset_s3(s3_example_simple):
         "mybucket/dataset3", filesystem=fs, format="ipc", partitioning="hive"
     ).to_table()
     assert result.equals(table)
+
+
+@pytest.mark.parquet
+def test_dataset_null_to_dictionary_cast(tempdir, dataset_reader):
+    # ARROW-12420
+    import pyarrow.parquet as pq
+
+    table = pa.table({"a": [None, None]})
+    pq.write_table(table, tempdir / "test.parquet")
+
+    schema = pa.schema([
+        pa.field("a", pa.dictionary(pa.int32(), pa.string()))
+    ])
+    fsds = ds.FileSystemDataset.from_paths(
+        paths=[tempdir / "test.parquet"],
+        schema=schema,
+        format=ds.ParquetFileFormat(),
+        filesystem=fs.LocalFileSystem(),
+    )
+    table = dataset_reader.to_table(fsds)
+    assert table.schema == schema
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 8e9cb1a9300..ba8366a43c6 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -525,7 +525,7 @@ def test_parquet_period(tmpdir, registered_period_type):
     # When reading in, properly create extension type if it is registered
     result = pq.read_table(filename)
     assert result.schema.field("ext").type == period_type
-    assert result.schema.field("ext").metadata == {b'PARQUET:field_id': b'1'}
+    assert result.schema.field("ext").metadata == {}
     # Get the exact array class defined by the registered type.
     result_array = result.column("ext").chunk(0)
     assert type(result_array) is period_class
@@ -537,8 +537,7 @@ def test_parquet_period(tmpdir, registered_period_type):
     # The extension metadata is present for roundtripping.
     assert result.schema.field("ext").metadata == {
         b'ARROW:extension:metadata': b'freq=D',
-        b'ARROW:extension:name': b'test.period',
-        b'PARQUET:field_id': b'1',
+        b'ARROW:extension:name': b'test.period'
     }
 
 
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index f01ac292ddf..3d0451ee33e 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -55,6 +55,9 @@ def version(request):
 
 @pytest.fixture(scope="module", params=[None, "uncompressed", "lz4", "zstd"])
 def compression(request):
+    if request.param in ['lz4', 'zstd'] and not pa.Codec.is_available(
+            request.param):
+        pytest.skip(f'{request.param} is not available')
     yield request.param
 
 
@@ -599,6 +602,9 @@ def test_v2_set_chunksize():
 
 
 @pytest.mark.pandas
+@pytest.mark.lz4
+@pytest.mark.snappy
+@pytest.mark.zstd
 def test_v2_compression_options():
     df = pd.DataFrame({'A': np.arange(1000)})
 
@@ -776,6 +782,7 @@ def test_roundtrip(table, compression):
     _check_arrow_roundtrip(table, compression=compression)
 
 
+@pytest.mark.lz4
 def test_feather_v017_experimental_compression_backward_compatibility(datadir):
     # ARROW-11163 - ensure newer pyarrow versions can read the old feather
     # files from version 0.17.0 with experimental compression support (before
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 45ba5c2dac9..1b838f54d47 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -17,7 +17,9 @@
 
 import ast
 import base64
+import itertools
 import os
+import signal
 import struct
 import tempfile
 import threading
@@ -30,6 +32,7 @@
 
 from pyarrow.lib import tobytes
 from pyarrow.util import pathlib, find_free_port
+from pyarrow.tests import util
 
 try:
     from pyarrow import flight
@@ -206,6 +209,10 @@ def do_put(self, context, descriptor, reader, writer):
             assert self.expected_schema == reader.schema
         self.last_message = reader.read_all()
 
+    def do_exchange(self, context, descriptor, reader, writer):
+        for chunk in reader:
+            pass
+
 
 class EchoStreamFlightServer(EchoFlightServer):
     """An echo server that streams individual record batches."""
@@ -303,6 +310,25 @@ def do_get(self, context, ticket):
         return flight.GeneratorStream(self.schema, [table1, table2])
 
 
+class NeverSendsDataFlightServer(FlightServerBase):
+    """A Flight server that never actually yields data."""
+
+    schema = pa.schema([('a', pa.int32())])
+
+    def do_get(self, context, ticket):
+        if ticket.ticket == b'yield_data':
+            # Check that the server handler will ignore empty tables
+            # up to a certain extent
+            data = [
+                self.schema.empty_table(),
+                self.schema.empty_table(),
+                pa.RecordBatch.from_arrays([range(5)], schema=self.schema),
+            ]
+            return flight.GeneratorStream(self.schema, data)
+        return flight.GeneratorStream(
+            self.schema, itertools.repeat(self.schema.empty_table()))
+
+
 class SlowFlightServer(FlightServerBase):
     """A Flight server that delays its responses to test timeouts."""
 
@@ -794,6 +820,23 @@ def sending_headers(self):
         return MultiHeaderClientMiddleware.EXPECTED
 
 
+class LargeMetadataFlightServer(FlightServerBase):
+    """Regression test for ARROW-13253."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._metadata = b' ' * (2 ** 31 + 1)
+
+    def do_get(self, context, ticket):
+        schema = pa.schema([('a', pa.int64())])
+        return flight.GeneratorStream(schema, [
+            (pa.record_batch([[1]], schema=schema), self._metadata),
+        ])
+
+    def do_exchange(self, context, descriptor, reader, writer):
+        writer.write_metadata(self._metadata)
+
+
 def test_flight_server_location_argument():
     locations = [
         None,
@@ -857,6 +900,10 @@ def test_flight_do_get_ints():
         data = client.do_get(flight.Ticket(b'ints')).read_all()
         assert data.equals(table)
 
+        # Also test via RecordBatchReader interface
+        data = client.do_get(flight.Ticket(b'ints')).to_reader().read_all()
+        assert data.equals(table)
+
     with pytest.raises(flight.FlightServerError,
                        match="expected IpcWriteOptions, got <class 'int'>"):
         with ConstantFlightServer(options=42) as server:
@@ -1806,3 +1853,126 @@ def test_generic_options():
                                 generic_options=options)
         with pytest.raises(pa.ArrowInvalid):
             client.do_get(flight.Ticket(b'ints'))
+
+
+class CancelFlightServer(FlightServerBase):
+    """A server for testing StopToken."""
+
+    def do_get(self, context, ticket):
+        schema = pa.schema([])
+        rb = pa.RecordBatch.from_arrays([], schema=schema)
+        return flight.GeneratorStream(schema, itertools.repeat(rb))
+
+    def do_exchange(self, context, descriptor, reader, writer):
+        schema = pa.schema([])
+        rb = pa.RecordBatch.from_arrays([], schema=schema)
+        writer.begin(schema)
+        while not context.is_cancelled():
+            writer.write_batch(rb)
+            time.sleep(0.5)
+
+
+def test_interrupt():
+    if threading.current_thread().ident != threading.main_thread().ident:
+        pytest.skip("test only works from main Python thread")
+    # Skips test if not available
+    raise_signal = util.get_raise_signal()
+
+    def signal_from_thread():
+        time.sleep(0.5)
+        raise_signal(signal.SIGINT)
+
+    exc_types = (KeyboardInterrupt, pa.ArrowCancelled)
+
+    def test(read_all):
+        try:
+            try:
+                t = threading.Thread(target=signal_from_thread)
+                with pytest.raises(exc_types) as exc_info:
+                    t.start()
+                    read_all()
+            finally:
+                t.join()
+        except KeyboardInterrupt:
+            # In case KeyboardInterrupt didn't interrupt read_all
+            # above, at least prevent it from stopping the test suite
+            pytest.fail("KeyboardInterrupt didn't interrupt Flight read_all")
+        e = exc_info.value.__context__
+        assert isinstance(e, pa.ArrowCancelled) or \
+            isinstance(e, KeyboardInterrupt)
+
+    with CancelFlightServer() as server:
+        client = FlightClient(("localhost", server.port))
+
+        reader = client.do_get(flight.Ticket(b""))
+        test(reader.read_all)
+
+        descriptor = flight.FlightDescriptor.for_command(b"echo")
+        writer, reader = client.do_exchange(descriptor)
+        test(reader.read_all)
+
+
+def test_never_sends_data():
+    # Regression test for ARROW-12779
+    match = "application server implementation error"
+    with NeverSendsDataFlightServer() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(flight.FlightServerError, match=match):
+            client.do_get(flight.Ticket(b'')).read_all()
+
+        # Check that the server handler will ignore empty tables
+        # up to a certain extent
+        table = client.do_get(flight.Ticket(b'yield_data')).read_all()
+        assert table.num_rows == 5
+
+
+@pytest.mark.large_memory
+@pytest.mark.slow
+def test_large_descriptor():
+    # Regression test for ARROW-13253. Placed here with appropriate marks
+    # since some CI pipelines can't run the C++ equivalent
+    large_descriptor = flight.FlightDescriptor.for_command(
+        b' ' * (2 ** 31 + 1))
+    with FlightServerBase() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(OSError,
+                           match="Failed to serialize Flight descriptor"):
+            writer, _ = client.do_put(large_descriptor, pa.schema([]))
+            writer.close()
+        with pytest.raises(pa.ArrowException,
+                           match="Failed to serialize Flight descriptor"):
+            client.do_exchange(large_descriptor)
+
+
+@pytest.mark.large_memory
+@pytest.mark.slow
+def test_large_metadata_client():
+    # Regression test for ARROW-13253
+    descriptor = flight.FlightDescriptor.for_command(b'')
+    metadata = b' ' * (2 ** 31 + 1)
+    with EchoFlightServer() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(pa.ArrowCapacityError,
+                           match="app_metadata size overflow"):
+            writer, _ = client.do_put(descriptor, pa.schema([]))
+            with writer:
+                writer.write_metadata(metadata)
+                writer.close()
+        with pytest.raises(pa.ArrowCapacityError,
+                           match="app_metadata size overflow"):
+            writer, reader = client.do_exchange(descriptor)
+            with writer:
+                writer.write_metadata(metadata)
+
+    del metadata
+    with LargeMetadataFlightServer() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(flight.FlightServerError,
+                           match="app_metadata size overflow"):
+            reader = client.do_get(flight.Ticket(b''))
+            reader.read_all()
+        with pytest.raises(pa.ArrowException,
+                           match="app_metadata size overflow"):
+            writer, reader = client.do_exchange(descriptor)
+            with writer:
+                reader.read_all()
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 1beecc66b12..365b7defe32 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -128,12 +128,12 @@ def open_input_file(self, path):
         data = "{0}:input_file".format(path).encode('utf8')
         return pa.BufferReader(data)
 
-    def open_output_stream(self, path):
+    def open_output_stream(self, path, metadata):
         if "notfound" in path:
             raise FileNotFoundError(path)
         return pa.BufferOutputStream()
 
-    def open_append_stream(self, path):
+    def open_append_stream(self, path, metadata):
         if "notfound" in path:
             raise FileNotFoundError(path)
         return pa.BufferOutputStream()
@@ -193,11 +193,11 @@ def open_input_stream(self, path):
     def open_input_file(self, path):
         return self._fs.open_input_file(path)
 
-    def open_output_stream(self, path):
-        return self._fs.open_output_stream(path)
+    def open_output_stream(self, path, metadata):
+        return self._fs.open_output_stream(path, metadata=metadata)
 
-    def open_append_stream(self, path):
-        return self._fs.open_append_stream(path)
+    def open_append_stream(self, path, metadata):
+        return self._fs.open_append_stream(path, metadata=metadata)
 
 
 @pytest.fixture
@@ -205,7 +205,6 @@ def localfs(request, tempdir):
     return dict(
         fs=LocalFileSystem(),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -216,7 +215,6 @@ def py_localfs(request, tempdir):
     return dict(
         fs=PyFileSystem(ProxyHandler(LocalFileSystem())),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -227,7 +225,6 @@ def mockfs(request):
     return dict(
         fs=_MockFileSystem(),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -238,7 +235,6 @@ def py_mockfs(request):
     return dict(
         fs=PyFileSystem(ProxyHandler(_MockFileSystem())),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -249,7 +245,6 @@ def localfs_with_mmap(request, tempdir):
     return dict(
         fs=LocalFileSystem(use_mmap=True),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -260,7 +255,6 @@ def subtree_localfs(request, tempdir, localfs):
     return dict(
         fs=SubTreeFileSystem(str(tempdir), localfs['fs']),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -285,7 +279,6 @@ def s3fs(request, s3_connection, s3_server):
     yield dict(
         fs=fs,
         pathfn=bucket.__add__,
-        allow_copy_file=True,
         allow_move_dir=False,
         allow_append_to_file=False,
     )
@@ -298,7 +291,6 @@ def subtree_s3fs(request, s3fs):
     return dict(
         fs=SubTreeFileSystem(prefix, s3fs['fs']),
         pathfn=prefix.__add__,
-        allow_copy_file=True,
         allow_move_dir=False,
         allow_append_to_file=False,
     )
@@ -318,7 +310,6 @@ def hdfs(request, hdfs_connection):
     return dict(
         fs=fs,
         pathfn=lambda p: p,
-        allow_copy_file=False,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -331,7 +322,6 @@ def py_fsspec_localfs(request, tempdir):
     return dict(
         fs=PyFileSystem(FSSpecHandler(fs)),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -347,7 +337,6 @@ def py_fsspec_memoryfs(request, tempdir):
     return dict(
         fs=PyFileSystem(FSSpecHandler(fs)),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -374,7 +363,6 @@ def py_fsspec_s3fs(request, s3_connection, s3_server):
     yield dict(
         fs=fs,
         pathfn=bucket.__add__,
-        allow_copy_file=True,
         allow_move_dir=False,
         allow_append_to_file=True,
     )
@@ -396,11 +384,13 @@ def py_fsspec_s3fs(request, s3_connection, s3_server):
     ),
     pytest.param(
         pytest.lazy_fixture('s3fs'),
-        id='S3FileSystem'
+        id='S3FileSystem',
+        marks=pytest.mark.s3
     ),
     pytest.param(
         pytest.lazy_fixture('hdfs'),
-        id='HadoopFileSystem'
+        id='HadoopFileSystem',
+        marks=pytest.mark.hdfs
     ),
     pytest.param(
         pytest.lazy_fixture('mockfs'),
@@ -424,7 +414,8 @@ def py_fsspec_s3fs(request, s3_connection, s3_server):
     ),
     pytest.param(
         pytest.lazy_fixture('py_fsspec_s3fs'),
-        id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))'
+        id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))',
+        marks=pytest.mark.s3
     ),
 ])
 def filesystem_config(request):
@@ -446,11 +437,6 @@ def allow_move_dir(request, filesystem_config):
     return filesystem_config['allow_move_dir']
 
 
-@pytest.fixture
-def allow_copy_file(request, filesystem_config):
-    return filesystem_config['allow_copy_file']
-
-
 @pytest.fixture
 def allow_append_to_file(request, filesystem_config):
     return filesystem_config['allow_append_to_file']
@@ -804,20 +790,16 @@ def test_delete_root_dir_contents(mockfs, py_mockfs):
     _check_root_dir_contents(py_mockfs)
 
 
-def test_copy_file(fs, pathfn, allow_copy_file):
+def test_copy_file(fs, pathfn):
     s = pathfn('test-copy-source-file')
     t = pathfn('test-copy-target-file')
 
     with fs.open_output_stream(s):
         pass
 
-    if allow_copy_file:
-        fs.copy_file(s, t)
-        fs.delete_file(s)
-        fs.delete_file(t)
-    else:
-        with pytest.raises(pa.ArrowNotImplementedError):
-            fs.copy_file(s, t)
+    fs.copy_file(s, t)
+    fs.delete_file(s)
+    fs.delete_file(t)
 
 
 def test_move_directory(fs, pathfn, allow_move_dir):
@@ -876,6 +858,7 @@ def identity(v):
     return v
 
 
+@pytest.mark.gzip
 @pytest.mark.parametrize(
     ('compression', 'buffer_size', 'compressor'),
     [
@@ -913,6 +896,7 @@ def test_open_input_file(fs, pathfn):
     assert result == data[read_from:]
 
 
+@pytest.mark.gzip
 @pytest.mark.parametrize(
     ('compression', 'buffer_size', 'decompressor'),
     [
@@ -934,6 +918,7 @@ def test_open_output_stream(fs, pathfn, compression, buffer_size,
         assert f.read(len(data)) == data
 
 
+@pytest.mark.gzip
 @pytest.mark.parametrize(
     ('compression', 'buffer_size', 'compressor', 'decompressor'),
     [
@@ -967,6 +952,25 @@ def test_open_append_stream(fs, pathfn, compression, buffer_size, compressor,
                                   buffer_size=buffer_size)
 
 
+def test_open_output_stream_metadata(fs, pathfn):
+    p = pathfn('open-output-stream-metadata')
+    metadata = {'Content-Type': 'x-pyarrow/test'}
+
+    data = b'some data'
+    with fs.open_output_stream(p, metadata=metadata) as f:
+        f.write(data)
+
+    with fs.open_input_stream(p) as f:
+        assert f.read() == data
+        got_metadata = f.metadata()
+
+    if fs.type_name == 's3' or 'mock' in fs.type_name:
+        for k, v in metadata.items():
+            assert got_metadata[k] == v.encode()
+    else:
+        assert got_metadata == {}
+
+
 def test_localfs_options():
     # LocalFileSystem instantiation
     LocalFileSystem(use_mmap=False)
@@ -1020,12 +1024,9 @@ def test_mockfs_mtime_roundtrip(mockfs):
 
 
 @pytest.mark.s3
-def test_s3_options(monkeypatch):
+def test_s3_options():
     from pyarrow.fs import S3FileSystem
 
-    # Avoid wait for unavailable metadata server in ARN role example below
-    monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
-
     fs = S3FileSystem(access_key='access', secret_key='secret',
                       session_token='token', region='us-east-2',
                       scheme='https', endpoint_override='localhost:8999')
@@ -1038,6 +1039,16 @@ def test_s3_options(monkeypatch):
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs
 
+    fs = S3FileSystem(anonymous=True)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    fs = S3FileSystem(background_writes=True,
+                      default_metadata={"ACL": "authenticated-read",
+                                        "Content-Type": "text/plain"})
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
     with pytest.raises(ValueError):
         S3FileSystem(access_key='access')
     with pytest.raises(ValueError):
@@ -1050,6 +1061,155 @@ def test_s3_options(monkeypatch):
         S3FileSystem(
             access_key='access', secret_key='secret', role_arn='arn'
         )
+    with pytest.raises(ValueError):
+        S3FileSystem(
+            access_key='access', secret_key='secret', anonymous=True
+        )
+    with pytest.raises(ValueError):
+        S3FileSystem(role_arn="arn", anonymous=True)
+    with pytest.raises(ValueError):
+        S3FileSystem(default_metadata=["foo", "bar"])
+
+
+@pytest.mark.s3
+def test_s3_proxy_options(monkeypatch):
+    from pyarrow.fs import S3FileSystem
+
+    # The following two are equivalent:
+    proxy_opts_1_dict = {'scheme': 'http', 'host': 'localhost', 'port': 8999}
+    proxy_opts_1_str = 'http://localhost:8999'
+    # The following two are equivalent:
+    proxy_opts_2_dict = {'scheme': 'https', 'host': 'localhost', 'port': 8080}
+    proxy_opts_2_str = 'https://localhost:8080'
+
+    # Check dict case for 'proxy_options'
+    fs = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    fs = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    # Check str case for 'proxy_options'
+    fs = S3FileSystem(proxy_options=proxy_opts_1_str)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    fs = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    # Check that two FSs using the same proxy_options dict are equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    # Check that two FSs using the same proxy_options str are equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    # Check that two FSs using equivalent proxy_options
+    # (one dict, one str) are equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    # Check that two FSs using nonequivalent proxy_options are not equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    # Check that two FSs (one using proxy_options and the other not)
+    # are not equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    # Only dict and str are supported
+    with pytest.raises(TypeError):
+        S3FileSystem(proxy_options=('http', 'localhost', 9090))
+    # Missing scheme
+    with pytest.raises(KeyError):
+        S3FileSystem(proxy_options={'host': 'localhost', 'port': 9090})
+    # Missing host
+    with pytest.raises(KeyError):
+        S3FileSystem(proxy_options={'scheme': 'https', 'port': 9090})
+    # Missing port
+    with pytest.raises(KeyError):
+        S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})
+    # Invalid proxy URI (invalid scheme htttps)
+    with pytest.raises(pa.ArrowInvalid):
+        S3FileSystem(proxy_options='htttps://localhost:9000')
+    # Invalid proxy_options dict (invalid scheme htttps)
+    with pytest.raises(pa.ArrowInvalid):
+        S3FileSystem(proxy_options={'scheme': 'htttp', 'host': 'localhost',
+                                    'port': 8999})
 
 
 @pytest.mark.hdfs
@@ -1355,6 +1515,13 @@ def test_s3_real_aws():
     fs = S3FileSystem(anonymous=True, region='us-east-2')
     entries = fs.get_file_info(FileSelector('ursa-labs-taxi-data'))
     assert len(entries) > 0
+    with fs.open_input_stream('ursa-labs-taxi-data/2019/06/data.parquet') as f:
+        md = f.metadata()
+        assert 'Content-Type' in md
+        assert md['Last-Modified'] == b'2020-01-17T16:26:28Z'
+        # For some reason, the header value is quoted
+        # (both with AWS and Minio)
+        assert md['ETag'] == b'"f1efd5d76cb82861e1542117bfa52b90-8"'
 
 
 @pytest.mark.s3
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
index 4a26ad433e9..6522c233a15 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -37,12 +37,16 @@ def test_tree_exp_builder():
     node_a = builder.make_field(field_a)
     node_b = builder.make_field(field_b)
 
+    assert node_a.return_type() == field_a.type
+
     condition = builder.make_function("greater_than", [node_a, node_b],
                                       pa.bool_())
     if_node = builder.make_if(condition, node_a, node_b, pa.int32())
 
     expr = builder.make_expression(if_node, field_result)
 
+    assert expr.result().type == pa.int32()
+
     projector = gandiva.make_projector(
         schema, [expr], pa.default_memory_pool())
 
@@ -98,6 +102,8 @@ def test_filter():
     cond = builder.make_function("less_than", [node_a, thousand], pa.bool_())
     condition = builder.make_condition(cond)
 
+    assert condition.result().type == pa.bool_()
+
     filter = gandiva.make_filter(table.schema, condition)
     # Gandiva generates compute kernel function named `@expr_X`
     assert filter.llvm_ir.find("@expr_") != -1
@@ -363,3 +369,23 @@ def test_filter_project():
 
     exp = pa.array([1, -21, None], pa.int32())
     assert r.equals(exp)
+
+
+@pytest.mark.gandiva
+def test_to_string():
+    import pyarrow.gandiva as gandiva
+    builder = gandiva.TreeExprBuilder()
+
+    assert str(builder.make_literal(2.0, pa.float64())
+               ).startswith('(const double) 2 raw(')
+    assert str(builder.make_literal(2, pa.int64())) == '(const int64) 2'
+    assert str(builder.make_field(pa.field('x', pa.float64()))) == '(double) x'
+    assert str(builder.make_field(pa.field('y', pa.string()))) == '(string) y'
+
+    field_z = builder.make_field(pa.field('z', pa.bool_()))
+    func_node = builder.make_function('not', [field_z], pa.bool_())
+    assert str(func_node) == 'bool not((bool) z)'
+
+    field_y = builder.make_field(pa.field('y', pa.bool_()))
+    and_node = builder.make_and([func_node, field_y])
+    assert str(and_node) == 'bool not((bool) z) && (bool) y'
diff --git a/python/pyarrow/tests/test_hdfs.py b/python/pyarrow/tests/test_hdfs.py
index e5b8d1a6106..c71353b45f0 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -39,6 +39,15 @@
 # HDFS tests
 
 
+def check_libhdfs_present():
+    if not pa.have_libhdfs():
+        message = 'No libhdfs available on system'
+        if os.environ.get('PYARROW_HDFS_TEST_LIBHDFS_REQUIRE'):
+            pytest.fail(message)
+        else:
+            pytest.skip(message)
+
+
 def hdfs_test_client():
     host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default')
     user = os.environ.get('ARROW_HDFS_TEST_USER', None)
@@ -382,12 +391,7 @@ class TestLibHdfs(HdfsTestCases, unittest.TestCase):
 
     @classmethod
     def check_driver(cls):
-        if not pa.have_libhdfs():
-            message = 'No libhdfs available on system'
-            if os.environ.get('PYARROW_HDFS_TEST_LIBHDFS_REQUIRE'):
-                pytest.fail(message)
-            else:
-                pytest.skip(message)
+        check_libhdfs_present()
 
     def test_orphaned_file(self):
         hdfs = hdfs_test_client()
@@ -418,6 +422,7 @@ def _get_hdfs_uri(path):
 def test_fastparquet_read_with_hdfs():
     from pandas.testing import assert_frame_equal
 
+    check_libhdfs_present()
     try:
         import snappy  # noqa
     except ImportError:
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index d637fc8b557..5119e162595 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -624,6 +624,89 @@ def test_compress_decompress(compression):
         pa.decompress(compressed_bytes, codec=compression)
 
 
+@pytest.mark.parametrize("compression", [
+    pytest.param(
+        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    ),
+    "brotli",
+    "gzip",
+    "lz4",
+    "zstd",
+    "snappy"
+])
+def test_compression_level(compression):
+    if not Codec.is_available(compression):
+        pytest.skip("{} support is not built".format(compression))
+
+    # These codecs do not support a compression level
+    no_level = ['snappy', 'lz4']
+    if compression in no_level:
+        assert not Codec.supports_compression_level(compression)
+        with pytest.raises(ValueError):
+            Codec(compression, 0)
+        with pytest.raises(ValueError):
+            Codec.minimum_compression_level(compression)
+        with pytest.raises(ValueError):
+            Codec.maximum_compression_level(compression)
+        with pytest.raises(ValueError):
+            Codec.default_compression_level(compression)
+        return
+
+    INPUT_SIZE = 10000
+    test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
+                 .astype(np.uint8)
+                 .tobytes())
+    test_buf = pa.py_buffer(test_data)
+
+    min_level = Codec.minimum_compression_level(compression)
+    max_level = Codec.maximum_compression_level(compression)
+    default_level = Codec.default_compression_level(compression)
+
+    assert min_level < max_level
+    assert default_level >= min_level
+    assert default_level <= max_level
+
+    for compression_level in range(min_level, max_level+1):
+        codec = Codec(compression, compression_level)
+        compressed_buf = codec.compress(test_buf)
+        compressed_bytes = codec.compress(test_data, asbytes=True)
+        assert isinstance(compressed_bytes, bytes)
+        decompressed_buf = codec.decompress(compressed_buf, INPUT_SIZE)
+        decompressed_bytes = codec.decompress(compressed_bytes, INPUT_SIZE,
+                                              asbytes=True)
+
+        assert isinstance(decompressed_bytes, bytes)
+
+        assert decompressed_buf.equals(test_buf)
+        assert decompressed_bytes == test_data
+
+        with pytest.raises(ValueError):
+            codec.decompress(compressed_bytes)
+
+    # The ability to set a seed this way is not present on older versions of
+    # numpy (currently in our python 3.6 CI build).  Some inputs might just
+    # happen to compress the same between the two levels so using seeded
+    # random numbers is neccesary to help get more reliable results
+    #
+    # The goal of this part is to ensure the compression_level is being
+    # passed down to the C++ layer, not to verify the compression algs
+    # themselves
+    if not hasattr(np.random, 'default_rng'):
+        pytest.skip('Requires newer version of numpy')
+    rng = np.random.default_rng(seed=42)
+    values = rng.integers(0, 100, 1000)
+    arr = pa.array(values)
+    hard_to_compress_buffer = arr.buffers()[1]
+
+    weak_codec = Codec(compression, min_level)
+    weakly_compressed_buf = weak_codec.compress(hard_to_compress_buffer)
+
+    strong_codec = Codec(compression, max_level)
+    strongly_compressed_buf = strong_codec.compress(hard_to_compress_buffer)
+
+    assert len(weakly_compressed_buf) > len(strongly_compressed_buf)
+
+
 def test_buffer_memoryview_is_immutable():
     val = b'some data'
 
@@ -1183,6 +1266,7 @@ def check_compressed_input(data, fn, compression):
         assert buf.to_pybytes() == data
 
 
+@pytest.mark.gzip
 def test_compressed_input_gzip(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     fn = str(tmpdir / "compressed_input_test.gz")
@@ -1209,6 +1293,7 @@ def check_compressed_concatenated(data, fn, compression):
         assert got == data
 
 
+@pytest.mark.gzip
 def test_compressed_concatenated_gzip(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     fn = str(tmpdir / "compressed_input_test2.gz")
@@ -1219,6 +1304,7 @@ def test_compressed_concatenated_gzip(tmpdir):
     check_compressed_concatenated(data, fn, "gzip")
 
 
+@pytest.mark.gzip
 def test_compressed_input_invalid():
     data = b"foo" * 10
     raw = pa.BufferReader(data)
@@ -1246,6 +1332,7 @@ def make_compressed_output(data, fn, compression):
         f.write(raw.getvalue())
 
 
+@pytest.mark.gzip
 def test_compressed_output_gzip(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     fn = str(tmpdir / "compressed_output_test.gz")
@@ -1267,6 +1354,16 @@ def test_compressed_output_bz2(tmpdir):
         assert got == data
 
 
+def test_output_stream_constructor(tmpdir):
+    if not Codec.is_available("gzip"):
+        pytest.skip("gzip support is not built")
+    with pa.CompressedOutputStream(tmpdir / "ctor.gz", "gzip") as stream:
+        stream.write(b"test")
+    with (tmpdir / "ctor2.gz").open("wb") as f:
+        with pa.CompressedOutputStream(f, "gzip") as stream:
+            stream.write(b"test")
+
+
 @pytest.mark.parametrize(("path", "expected_compression"), [
     ("file.bz2", "bz2"),
     ("file.lz4", "lz4"),
@@ -1423,6 +1520,7 @@ def test_transcoding_decoding_error(src_encoding, dest_encoding):
 # ----------------------------------------------------------------------
 # High-level API
 
+@pytest.mark.gzip
 def test_input_stream_buffer():
     data = b"some test data\n" * 10 + b"eof\n"
     for arg in [pa.py_buffer(data), memoryview(data)]:
@@ -1468,6 +1566,7 @@ def test_input_stream_file_path(tmpdir):
     assert stream.read() == data
 
 
+@pytest.mark.gzip
 def test_input_stream_file_path_compressed(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     gz_data = gzip.compress(data)
@@ -1514,6 +1613,7 @@ def test_input_stream_file_path_buffered(tmpdir):
         pa.input_stream(file_path, buffer_size='million')
 
 
+@pytest.mark.gzip
 def test_input_stream_file_path_compressed_and_buffered(tmpdir):
     data = b"some test data\n" * 100 + b"eof\n"
     gz_data = gzip.compress(data)
@@ -1529,6 +1629,7 @@ def test_input_stream_file_path_compressed_and_buffered(tmpdir):
     assert stream.read() == data
 
 
+@pytest.mark.gzip
 def test_input_stream_python_file(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     bio = BytesIO(data)
@@ -1552,6 +1653,7 @@ def test_input_stream_python_file(tmpdir):
         assert stream.read() == data
 
 
+@pytest.mark.gzip
 def test_input_stream_native_file():
     data = b"some test data\n" * 10 + b"eof\n"
     gz_data = gzip.compress(data)
@@ -1630,6 +1732,7 @@ def check_data(file_path, data):
     check_data(pathlib.Path(str(file_path)), data)
 
 
+@pytest.mark.gzip
 def test_output_stream_file_path_compressed(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     file_path = tmpdir / 'output_stream.gz'
@@ -1680,6 +1783,7 @@ def check_data(file_path, data, **kwargs):
     assert result == data
 
 
+@pytest.mark.gzip
 def test_output_stream_file_path_compressed_and_buffered(tmpdir):
     data = b"some test data\n" * 100 + b"eof\n"
     file_path = tmpdir / 'output_stream_compressed_and_buffered.gz'
@@ -1719,6 +1823,7 @@ def check_data(file_path, data, **kwargs):
     assert check_data(file_path, data, buffer_size=1024) == data
 
 
+@pytest.mark.gzip
 def test_output_stream_python_file(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index a15960bce74..87944bcc066 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -329,6 +329,37 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format):
         reader.read_next_batch()
 
 
+@pytest.mark.zstd
+def test_compression_roundtrip():
+    sink = io.BytesIO()
+    values = np.random.randint(0, 10, 10000)
+    table = pa.Table.from_arrays([values], names=["values"])
+
+    options = pa.ipc.IpcWriteOptions(compression='zstd')
+    with pa.ipc.RecordBatchFileWriter(
+            sink, table.schema, options=options) as writer:
+        writer.write_table(table)
+    len1 = len(sink.getvalue())
+
+    sink2 = io.BytesIO()
+    codec = pa.Codec('zstd', compression_level=5)
+    options = pa.ipc.IpcWriteOptions(compression=codec)
+    with pa.ipc.RecordBatchFileWriter(
+            sink2, table.schema, options=options) as writer:
+        writer.write_table(table)
+    len2 = len(sink2.getvalue())
+
+    # In theory len2 should be less than len1 but for this test we just want
+    # to ensure compression_level is being correctly passed down to the C++
+    # layer so we don't really care if it makes it worse or better
+    assert len2 != len1
+
+    t1 = pa.ipc.open_file(sink).read_all()
+    t2 = pa.ipc.open_file(sink2).read_all()
+
+    assert t1 == t2
+
+
 def test_write_options():
     options = pa.ipc.IpcWriteOptions()
     assert options.allow_64bit is False
@@ -349,28 +380,33 @@ def test_write_options():
 
     assert options.compression is None
     for value in ['lz4', 'zstd']:
-        options.compression = value
-        assert options.compression == value
-        options.compression = value.upper()
-        assert options.compression == value
+        if pa.Codec.is_available(value):
+            options.compression = value
+            assert options.compression == value
+            options.compression = value.upper()
+            assert options.compression == value
     options.compression = None
     assert options.compression is None
 
+    with pytest.raises(TypeError):
+        options.compression = 0
+
     assert options.use_threads is True
     options.use_threads = False
     assert options.use_threads is False
 
-    options = pa.ipc.IpcWriteOptions(
-        metadata_version=pa.ipc.MetadataVersion.V4,
-        allow_64bit=True,
-        use_legacy_format=True,
-        compression='lz4',
-        use_threads=False)
-    assert options.metadata_version == pa.ipc.MetadataVersion.V4
-    assert options.allow_64bit is True
-    assert options.use_legacy_format is True
-    assert options.compression == 'lz4'
-    assert options.use_threads is False
+    if pa.Codec.is_available('lz4'):
+        options = pa.ipc.IpcWriteOptions(
+            metadata_version=pa.ipc.MetadataVersion.V4,
+            allow_64bit=True,
+            use_legacy_format=True,
+            compression='lz4',
+            use_threads=False)
+        assert options.metadata_version == pa.ipc.MetadataVersion.V4
+        assert options.allow_64bit is True
+        assert options.use_legacy_format is True
+        assert options.compression == 'lz4'
+        assert options.use_threads is False
 
 
 def test_write_options_legacy_exclusive(stream_fixture):
@@ -564,6 +600,7 @@ def test_message_serialize_read_message(example_messages):
         pa.ipc.read_message(reader)
 
 
+@pytest.mark.gzip
 def test_message_read_from_compressed(example_messages):
     # Part of ARROW-5910
     _, messages = example_messages
diff --git a/python/pyarrow/tests/test_memory.py b/python/pyarrow/tests/test_memory.py
index f6af92e63f1..b8dd7344fa9 100644
--- a/python/pyarrow/tests/test_memory.py
+++ b/python/pyarrow/tests/test_memory.py
@@ -104,6 +104,11 @@ def test_default_backend_name():
     assert pool.backend_name in possible_backends
 
 
+def test_release_unused():
+    pool = pa.default_memory_pool()
+    pool.release_unused()
+
+
 def check_env_var(name, expected, *, expect_warning=False):
     code = f"""if 1:
         import pyarrow as pa
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 74764779361..012f15e16be 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -45,6 +45,16 @@ def test_cpu_count():
         pa.set_cpu_count(n)
 
 
+def test_io_thread_count():
+    n = pa.io_thread_count()
+    assert n > 0
+    try:
+        pa.set_io_thread_count(n + 5)
+        assert pa.io_thread_count() == n + 5
+    finally:
+        pa.set_io_thread_count(n)
+
+
 def test_build_info():
     assert isinstance(pa.cpp_build_info, pa.BuildInfo)
     assert isinstance(pa.cpp_version_info, pa.VersionInfo)
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index bdfe6ca17c5..c9add765552 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -21,6 +21,7 @@
 
 import pyarrow as pa
 
+
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not orc'
 pytestmark = pytest.mark.orc
@@ -33,9 +34,9 @@
     pass
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def datadir(base_datadir):
-    return base_datadir / 'orc'
+    return base_datadir / "orc"
 
 
 def fix_example_values(actual_cols, expected_cols):
@@ -46,12 +47,19 @@ def fix_example_values(actual_cols, expected_cols):
     for name in expected_cols:
         expected = expected_cols[name]
         actual = actual_cols[name]
+        if (name == "map" and
+                [d.keys() == {'key', 'value'} for m in expected for d in m]):
+            # convert [{'key': k, 'value': v}, ...] to [(k, v), ...]
+            for i, m in enumerate(expected):
+                expected_cols[name][i] = [(d['key'], d['value']) for d in m]
+            continue
+
         typ = actual[0].__class__
         if issubclass(typ, datetime.datetime):
             # timestamp fields are represented as strings in JSON files
             expected = pd.to_datetime(expected)
         elif issubclass(typ, datetime.date):
-            # # date fields are represented as strings in JSON files
+            # date fields are represented as strings in JSON files
             expected = expected.dt.date
         elif typ is decimal.Decimal:
             converted_decimals = [None] * len(expected)
@@ -131,35 +139,54 @@ def test_example_using_json(filename, datadir):
 def test_orcfile_empty(datadir):
     from pyarrow import orc
 
-    table = orc.ORCFile(datadir / 'TestOrcFile.emptyFile.orc').read()
+    table = orc.ORCFile(datadir / "TestOrcFile.emptyFile.orc").read()
     assert table.num_rows == 0
 
     expected_schema = pa.schema([
-        ('boolean1', pa.bool_()),
-        ('byte1', pa.int8()),
-        ('short1', pa.int16()),
-        ('int1', pa.int32()),
-        ('long1', pa.int64()),
-        ('float1', pa.float32()),
-        ('double1', pa.float64()),
-        ('bytes1', pa.binary()),
-        ('string1', pa.string()),
-        ('middle', pa.struct([
-            ('list', pa.list_(pa.struct([
-                ('int1', pa.int32()),
-                ('string1', pa.string()),
-            ]))),
-        ])),
-        ('list', pa.list_(pa.struct([
-            ('int1', pa.int32()),
-            ('string1', pa.string()),
-        ]))),
-        ('map', pa.list_(pa.struct([
-            ('key', pa.string()),
-            ('value', pa.struct([
-                ('int1', pa.int32()),
-                ('string1', pa.string()),
-            ])),
-        ]))),
+        ("boolean1", pa.bool_()),
+        ("byte1", pa.int8()),
+        ("short1", pa.int16()),
+        ("int1", pa.int32()),
+        ("long1", pa.int64()),
+        ("float1", pa.float32()),
+        ("double1", pa.float64()),
+        ("bytes1", pa.binary()),
+        ("string1", pa.string()),
+        ("middle", pa.struct(
+            [("list", pa.list_(
+                pa.struct([("int1", pa.int32()),
+                           ("string1", pa.string())])))
+             ])),
+        ("list", pa.list_(
+            pa.struct([("int1", pa.int32()),
+                       ("string1", pa.string())])
+        )),
+        ("map", pa.map_(pa.string(),
+                        pa.struct([("int1", pa.int32()),
+                                   ("string1", pa.string())])
+                        )),
     ])
     assert table.schema == expected_schema
+
+
+def test_orcfile_readwrite():
+    from pyarrow import orc
+
+    buffer_output_stream = pa.BufferOutputStream()
+    a = pa.array([1, None, 3, None])
+    b = pa.array([None, "Arrow", None, "ORC"])
+    table = pa.table({"int64": a, "utf8": b})
+    orc.write_table(table, buffer_output_stream)
+    buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
+    orc_file = orc.ORCFile(buffer_reader)
+    output_table = orc_file.read()
+    assert table.equals(output_table)
+
+    # deprecated keyword order
+    buffer_output_stream = pa.BufferOutputStream()
+    with pytest.warns(FutureWarning):
+        orc.write_table(buffer_output_stream, table)
+    buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
+    orc_file = orc.ORCFile(buffer_reader)
+    output_table = orc_file.read()
+    assert table.equals(output_table)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 77c18b839c6..b6557875c2c 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1705,7 +1705,7 @@ def test_numpy_string_array_to_fixed_size_binary(self):
         expected = pa.array(list(arr), type=pa.binary(3))
         assert converted.equals(expected)
 
-        mask = np.array([True, False, True])
+        mask = np.array([False, True, False])
         converted = pa.array(arr, type=pa.binary(3), mask=mask)
         expected = pa.array([b'foo', None, b'baz'], type=pa.binary(3))
         assert converted.equals(expected)
@@ -2167,20 +2167,19 @@ def test_list_of_dictionary(self):
         expected[2] = None
         tm.assert_series_equal(arr.to_pandas(), expected)
 
-    @pytest.mark.slow
     @pytest.mark.large_memory
     def test_auto_chunking_on_list_overflow(self):
         # ARROW-9976
-        n = 2**24
+        n = 2**21
         df = pd.DataFrame.from_dict({
-            "a": list(np.zeros((n, 2**7), dtype='uint8')),
+            "a": list(np.zeros((n, 2**10), dtype='uint8')),
             "b": range(n)
         })
         table = pa.Table.from_pandas(df)
 
         column_a = table[0]
         assert column_a.num_chunks == 2
-        assert len(column_a.chunk(0)) == 2**24 - 1
+        assert len(column_a.chunk(0)) == 2**21 - 1
         assert len(column_a.chunk(1)) == 1
 
     def test_map_array_roundtrip(self):
@@ -2356,6 +2355,7 @@ def test_from_numpy_nested(self):
             {'x': {'xx': 1, 'yy': True}, 'y': 2, 'z': 'foo'},
             {'x': {'xx': 3, 'yy': False}, 'y': 4, 'z': 'bar'}]
 
+    @pytest.mark.slow
     @pytest.mark.large_memory
     def test_from_numpy_large(self):
         # Exercise rechunking + nulls
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 120c9659f8f..86dfe949cb4 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -77,6 +77,7 @@ def test_basics(value, ty, klass, deprecated):
     assert s != "else"
     assert hash(s) == hash(s)
     assert s.is_valid is True
+    assert s != None  # noqa: E711
     with pytest.warns(FutureWarning):
         assert isinstance(s, deprecated)
 
@@ -499,6 +500,7 @@ def test_struct():
     assert 'x' in s
     assert 'y' in s
     assert 'z' not in s
+    assert 0 not in s
 
     assert s.as_py() == v
     assert repr(s) != repr(v)
@@ -513,7 +515,7 @@ def test_struct():
         s['non-existent']
 
     s = pa.scalar(None, type=ty)
-    assert list(s) == []
+    assert list(s) == list(s.keys()) == ['x', 'y']
     assert s.as_py() is None
     assert 'x' in s
     assert 'y' in s
@@ -525,6 +527,49 @@ def test_struct():
     assert s['y'].as_py() is None
 
 
+def test_struct_duplicate_fields():
+    ty = pa.struct([
+        pa.field('x', pa.int16()),
+        pa.field('y', pa.float32()),
+        pa.field('x', pa.int64()),
+    ])
+    s = pa.scalar([('x', 1), ('y', 2.0), ('x', 3)], type=ty)
+
+    assert list(s) == list(s.keys()) == ['x', 'y', 'x']
+    assert len(s) == 3
+    assert s == s
+    assert list(s.items()) == [
+        ('x', pa.scalar(1, pa.int16())),
+        ('y', pa.scalar(2.0, pa.float32())),
+        ('x', pa.scalar(3, pa.int64()))
+    ]
+
+    assert 'x' in s
+    assert 'y' in s
+    assert 'z' not in s
+    assert 0 not in s
+
+    # getitem with field names fails for duplicate fields, works for others
+    with pytest.raises(KeyError):
+        s['x']
+
+    assert isinstance(s['y'], pa.FloatScalar)
+    assert s['y'].as_py() == 2.0
+
+    # getitem with integer index works for all fields
+    assert isinstance(s[0], pa.Int16Scalar)
+    assert s[0].as_py() == 1
+    assert isinstance(s[1], pa.FloatScalar)
+    assert s[1].as_py() == 2.0
+    assert isinstance(s[2], pa.Int64Scalar)
+    assert s[2].as_py() == 3
+
+    assert "pyarrow.StructScalar" in repr(s)
+
+    with pytest.raises(ValueError, match="duplicate field names"):
+        s.as_py()
+
+
 def test_map():
     ty = pa.map_(pa.string(), pa.int8())
     v = [('a', 1), ('b', 2)]
@@ -600,9 +645,13 @@ def test_union():
         with pytest.raises(pa.ArrowNotImplementedError):
             pickle.loads(pickle.dumps(s))
 
+    assert arr[0].type_code == 0
     assert arr[0].as_py() == "a"
+    assert arr[1].type_code == 0
     assert arr[1].as_py() == "b"
+    assert arr[2].type_code == 1
     assert arr[2].as_py() == 3
+    assert arr[3].type_code == 1
     assert arr[3].as_py() == 4
 
     # dense
@@ -621,5 +670,7 @@ def test_union():
         with pytest.raises(pa.ArrowNotImplementedError):
             pickle.loads(pickle.dumps(s))
 
+    assert arr[0].type_code == 0
     assert arr[0].as_py() == b'a'
+    assert arr[5].type_code == 1
     assert arr[5].as_py() == 3
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index e87f620070d..7ae7dd8099e 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -604,7 +604,9 @@ def test_type_schema_pickling():
         pa.timestamp('ns'),
         pa.decimal128(12, 2),
         pa.decimal256(76, 38),
-        pa.field('a', 'string', metadata={b'foo': b'bar'})
+        pa.field('a', 'string', metadata={b'foo': b'bar'}),
+        pa.list_(pa.field("element", pa.int64())),
+        pa.large_list(pa.field("element", pa.int64()))
     ]
 
     for val in cases:
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 7ba844aa809..72bb8ef2d99 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1339,8 +1339,15 @@ def test_from_arrays_schema(data, klass):
         pa.Table.from_arrays(data, schema=schema, metadata={b'foo': b'bar'})
 
 
-def test_table_from_pydict():
-    table = pa.Table.from_pydict({})
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_from_pydict(cls):
+    table = cls.from_pydict({})
     assert table.num_columns == 0
     assert table.num_rows == 0
     assert table.schema == pa.schema([])
@@ -1351,7 +1358,7 @@ def test_table_from_pydict():
     # With lists as values
     data = OrderedDict([('strs', ['', 'foo', 'bar']),
                         ('floats', [4.5, 5, None])])
-    table = pa.Table.from_pydict(data)
+    table = cls.from_pydict(data)
     assert table.num_columns == 2
     assert table.num_rows == 3
     assert table.schema == schema
@@ -1360,29 +1367,29 @@ def test_table_from_pydict():
     # With metadata and inferred schema
     metadata = {b'foo': b'bar'}
     schema = schema.with_metadata(metadata)
-    table = pa.Table.from_pydict(data, metadata=metadata)
+    table = cls.from_pydict(data, metadata=metadata)
     assert table.schema == schema
     assert table.schema.metadata == metadata
     assert table.to_pydict() == data
 
     # With explicit schema
-    table = pa.Table.from_pydict(data, schema=schema)
+    table = cls.from_pydict(data, schema=schema)
     assert table.schema == schema
     assert table.schema.metadata == metadata
     assert table.to_pydict() == data
 
     # Cannot pass both schema and metadata
     with pytest.raises(ValueError):
-        pa.Table.from_pydict(data, schema=schema, metadata=metadata)
+        cls.from_pydict(data, schema=schema, metadata=metadata)
 
     # Non-convertible values given schema
     with pytest.raises(TypeError):
-        pa.Table.from_pydict({'c0': [0, 1, 2]},
-                             schema=pa.schema([("c0", pa.string())]))
+        cls.from_pydict({'c0': [0, 1, 2]},
+                        schema=pa.schema([("c0", pa.string())]))
 
     # Missing schema fields from the passed mapping
     with pytest.raises(KeyError, match="doesn\'t contain.* c, d"):
-        pa.Table.from_pydict(
+        cls.from_pydict(
             {'a': [1, 2, 3], 'b': [3, 4, 5]},
             schema=pa.schema([
                 ('a', pa.int64()),
@@ -1393,7 +1400,7 @@ def test_table_from_pydict():
 
     # Passed wrong schema type
     with pytest.raises(TypeError):
-        pa.Table.from_pydict({'a': [1, 2, 3]}, schema={})
+        cls.from_pydict({'a': [1, 2, 3]}, schema={})
 
 
 @pytest.mark.parametrize('data, klass', [
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 07493144919..aee46bc9369 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -108,6 +108,7 @@ def test_tensor_ipc_roundtrip(tmpdir):
     assert result.equals(tensor)
 
 
+@pytest.mark.gzip
 def test_tensor_ipc_read_from_compressed(tempdir):
     # ARROW-5910
     data = np.random.randn(10, 4)
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 698ba8df0cc..cd6de936358 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -403,7 +403,7 @@ def test_timestamp():
             assert ty.tz == tz
 
     for invalid_unit in ('m', 'arbit', 'rary'):
-        with pytest.raises(ValueError, match='Invalid TimeUnit string'):
+        with pytest.raises(ValueError, match='Invalid time unit'):
             pa.timestamp(invalid_unit)
 
 
@@ -413,7 +413,7 @@ def test_time32_units():
         assert ty.unit == valid_unit
 
     for invalid_unit in ('m', 'us', 'ns'):
-        error_msg = 'Invalid TimeUnit for time32: {}'.format(invalid_unit)
+        error_msg = 'Invalid time unit for time32: {!r}'.format(invalid_unit)
         with pytest.raises(ValueError, match=error_msg):
             pa.time32(invalid_unit)
 
@@ -424,7 +424,7 @@ def test_time64_units():
         assert ty.unit == valid_unit
 
     for invalid_unit in ('m', 's', 'ms'):
-        error_msg = 'Invalid TimeUnit for time64: {}'.format(invalid_unit)
+        error_msg = 'Invalid time unit for time64: {!r}'.format(invalid_unit)
         with pytest.raises(ValueError, match=error_msg):
             pa.time64(invalid_unit)
 
@@ -435,7 +435,7 @@ def test_duration():
         assert ty.unit == unit
 
     for invalid_unit in ('m', 'arbit', 'rary'):
-        with pytest.raises(ValueError, match='Invalid TimeUnit string'):
+        with pytest.raises(ValueError, match='Invalid time unit'):
             pa.duration(invalid_unit)
 
 
diff --git a/python/pyarrow/tests/test_util.py b/python/pyarrow/tests/test_util.py
new file mode 100644
index 00000000000..2b351a53442
--- /dev/null
+++ b/python/pyarrow/tests/test_util.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import gc
+import signal
+import sys
+import weakref
+
+import pytest
+
+from pyarrow import util
+from pyarrow.tests.util import disabled_gc
+
+
+def exhibit_signal_refcycle():
+    # Put an object in the frame locals and return a weakref to it.
+    # If `signal.getsignal` has a bug where it creates a reference cycle
+    # keeping alive the current execution frames, `obj` will not be
+    # destroyed immediately when this function returns.
+    obj = set()
+    signal.getsignal(signal.SIGINT)
+    return weakref.ref(obj)
+
+
+def test_signal_refcycle():
+    # Test possible workaround for https://bugs.python.org/issue42248
+    with disabled_gc():
+        wr = exhibit_signal_refcycle()
+        if wr() is None:
+            pytest.skip(
+                "Python version does not have the bug we're testing for")
+
+    gc.collect()
+    with disabled_gc():
+        wr = exhibit_signal_refcycle()
+        assert wr() is not None
+        util._break_traceback_cycle_from_frame(sys._getframe(0))
+        assert wr() is None
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 8f29e5853f8..558df8cf1b0 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -25,10 +25,13 @@
 import numpy as np
 import os
 import random
+import signal
 import string
 import subprocess
 import sys
 
+import pytest
+
 import pyarrow as pa
 
 
@@ -222,6 +225,15 @@ def change_cwd(path):
         os.chdir(curdir)
 
 
+@contextlib.contextmanager
+def disabled_gc():
+    gc.disable()
+    try:
+        yield
+    finally:
+        gc.enable()
+
+
 def _filesystem_uri(path):
     # URIs on Windows must follow 'file:///C:...' or 'file:/C:...' patterns.
     if os.name == 'nt':
@@ -229,3 +241,25 @@ def _filesystem_uri(path):
     else:
         uri = 'file://{}'.format(path)
     return uri
+
+
+class FSProtocolClass:
+    def __init__(self, path):
+        self._path = path
+
+    def __fspath__(self):
+        return str(self._path)
+
+
+def get_raise_signal():
+    if sys.version_info >= (3, 8):
+        return signal.raise_signal
+    elif os.name == 'nt':
+        # On Windows, os.kill() doesn't actually send a signal,
+        # it just terminates the process with the given exit code.
+        pytest.skip("test requires Python 3.8+ on Windows")
+    else:
+        # On Unix, emulate raise_signal() with os.kill().
+        def raise_signal(signum):
+            os.kill(os.getpid(), signum)
+        return raise_signal
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 184e3dd8a7c..451ff20a776 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -277,7 +277,7 @@ cdef class ListType(DataType):
         self.list_type = <const CListType*> type.get()
 
     def __reduce__(self):
-        return list_, (self.value_type,)
+        return list_, (self.value_field,)
 
     @property
     def value_field(self):
@@ -302,7 +302,7 @@ cdef class LargeListType(DataType):
         self.list_type = <const CLargeListType*> type.get()
 
     def __reduce__(self):
-        return large_list, (self.value_type,)
+        return large_list, (self.value_field,)
 
     @property
     def value_field(self):
@@ -1201,6 +1201,27 @@ cdef class Field(_Weakrefable):
             flattened = self.field.Flatten()
         return [pyarrow_wrap_field(f) for f in flattened]
 
+    def _export_to_c(self, uintptr_t out_ptr):
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+        check_status(ExportField(deref(self.field), <ArrowSchema*> out_ptr))
+
+    @staticmethod
+    def _import_from_c(uintptr_t in_ptr):
+        """
+        Import Field from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+        with nogil:
+            result = GetResultValue(ImportField(<ArrowSchema*> in_ptr))
+        return pyarrow_wrap_field(result)
+
 
 cdef class Schema(_Weakrefable):
 
@@ -1850,6 +1871,19 @@ cdef timeunit_to_string(TimeUnit unit):
         return 'ns'
 
 
+cdef TimeUnit string_to_timeunit(unit) except *:
+    if unit == 's':
+        return TimeUnit_SECOND
+    elif unit == 'ms':
+        return TimeUnit_MILLI
+    elif unit == 'us':
+        return TimeUnit_MICRO
+    elif unit == 'ns':
+        return TimeUnit_NANO
+    else:
+        raise ValueError(f"Invalid time unit: {unit!r}")
+
+
 def tzinfo_to_string(tz):
     """
     Converts a time zone object into a string indicating the name of a time
@@ -1924,16 +1958,7 @@ def timestamp(unit, tz=None):
         TimeUnit unit_code
         c_string c_timezone
 
-    if unit == "s":
-        unit_code = TimeUnit_SECOND
-    elif unit == 'ms':
-        unit_code = TimeUnit_MILLI
-    elif unit == 'us':
-        unit_code = TimeUnit_MICRO
-    elif unit == 'ns':
-        unit_code = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit string')
+    unit_code = string_to_timeunit(unit)
 
     cdef TimestampType out = TimestampType.__new__(TimestampType)
 
@@ -1982,7 +2007,7 @@ def time32(unit):
     elif unit == 'ms':
         unit_code = TimeUnit_MILLI
     else:
-        raise ValueError('Invalid TimeUnit for time32: {}'.format(unit))
+        raise ValueError(f"Invalid time unit for time32: {unit!r}")
 
     if unit_code in _time_type_cache:
         return _time_type_cache[unit_code]
@@ -2025,7 +2050,7 @@ def time64(unit):
     elif unit == 'ns':
         unit_code = TimeUnit_NANO
     else:
-        raise ValueError('Invalid TimeUnit for time64: {}'.format(unit))
+        raise ValueError(f"Invalid time unit for time64: {unit!r}")
 
     if unit_code in _time_type_cache:
         return _time_type_cache[unit_code]
@@ -2063,16 +2088,7 @@ def duration(unit):
     cdef:
         TimeUnit unit_code
 
-    if unit == "s":
-        unit_code = TimeUnit_SECOND
-    elif unit == 'ms':
-        unit_code = TimeUnit_MILLI
-    elif unit == 'us':
-        unit_code = TimeUnit_MICRO
-    elif unit == 'ns':
-        unit_code = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit string')
+    unit_code = string_to_timeunit(unit)
 
     if unit_code in _duration_type_cache:
         return _duration_type_cache[unit_code]
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 446e6733351..69bde250cac 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -19,8 +19,11 @@
 
 import contextlib
 import functools
+import gc
 import pathlib
 import socket
+import sys
+import types
 import warnings
 
 
@@ -150,3 +153,26 @@ def find_free_port():
 def guid():
     from uuid import uuid4
     return uuid4().hex
+
+
+def _break_traceback_cycle_from_frame(frame):
+    # Clear local variables in all inner frames, so as to break the
+    # reference cycle.
+    this_frame = sys._getframe(0)
+    refs = gc.get_referrers(frame)
+    while refs:
+        for frame in refs:
+            if frame is not this_frame and isinstance(frame, types.FrameType):
+                break
+        else:
+            # No frame found in referrers (finished?)
+            break
+        refs = None
+        # Clear the frame locals, to try and break the cycle (it is
+        # somewhere along the chain of execution frames).
+        frame.clear()
+        # To visit the inner frame, we need to find it among the
+        # referers of this frame (while `frame.f_back` would let
+        # us visit the outer frame).
+        refs = gc.get_referrers(frame)
+    refs = frame = this_frame = None
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index 74a352e8a5b..b2878d2971c 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,6 +1,11 @@
 cython>=0.29.11
-numpy==1.16.6; python_version < "3.9" and platform_machine != "aarch64"
-numpy==1.19.4; python_version >= "3.9" or platform_machine == "aarch64"
-pandas<1.1.0; python_version < "3.8"
 setuptools_scm
 wheel
+numpy==1.19.4; platform_system == "Linux"   and platform_machine == "aarch64"
+numpy==1.16.6; platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.9"
+numpy==1.19.4; platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.9"
+numpy==1.21.0; platform_system == "Darwin"  and platform_machine == "arm64"
+numpy==1.16.6; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.8"
+numpy==1.19.4; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.8"
+numpy==1.16.6; platform_system == "Windows"                                   and python_version <  "3.9"
+numpy==1.19.4; platform_system == "Windows"                                   and python_version >= "3.9"
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index 7377f6b6d8f..723dbdff76c 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -1,11 +1,26 @@
 cffi
 cython
 hypothesis
-numpy==1.19.4; python_version != "3.9" or platform_machine != "aarch64"
-numpy==1.20.1; python_version == "3.9" and platform_machine == "aarch64"
-pandas<1.1.0; python_version < "3.8" and platform_machine != "aarch64"
-pandas; python_version >= "3.8" or platform_machine == "aarch64"
-pickle5; (python_version == "3.6" or python_version == "3.7") and sys_platform != "win32"
+pickle5; platform_system != "Windows" and python_version < "3.8"
 pytest
 pytest-lazy-fixture
 pytz
+
+numpy==1.19.5; platform_system == "Linux"   and platform_machine == "aarch64" and python_version <  "3.7"
+numpy==1.20.3; platform_system == "Linux"   and platform_machine == "aarch64" and python_version >= "3.7"
+numpy==1.19.5; platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.9"
+numpy==1.20.3; platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.9"
+numpy==1.21.0; platform_system == "Darwin"  and platform_machine == "arm64"
+numpy==1.19.5; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.9"
+numpy==1.20.3; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.9"
+numpy==1.19.5; platform_system == "Windows"                                   and python_version <  "3.9"
+numpy==1.20.3; platform_system == "Windows"                                   and python_version >= "3.9"
+
+pandas<1.1.0;  platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.8"
+pandas;        platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.8"
+pandas;        platform_system == "Linux"   and platform_machine == "aarch64"
+pandas<1.1.0;  platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.8"
+pandas;        platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.8"
+pandas;        platform_system == "Darwin"  and platform_machine == "arm64"
+pandas<1.1.0;  platform_system == "Windows"                                   and python_version <  "3.8"
+pandas;        platform_system == "Windows"                                   and python_version >= "3.8"
diff --git a/python/setup.py b/python/setup.py
index a2abb050177..e0aff1aef2d 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -110,6 +110,7 @@ def run(self):
                      ('with-flight', None, 'build the Flight extension'),
                      ('with-dataset', None, 'build the Dataset extension'),
                      ('with-parquet', None, 'build the Parquet extension'),
+                     ('with-s3', None, 'build the Amazon S3 extension'),
                      ('with-static-parquet', None, 'link parquet statically'),
                      ('with-static-boost', None, 'link boost statically'),
                      ('with-plasma', None, 'build the Plasma extension'),
@@ -197,11 +198,13 @@ def initialize_options(self):
         '_cuda',
         '_flight',
         '_dataset',
+        '_feather',
         '_parquet',
         '_orc',
         '_plasma',
         '_s3fs',
         '_hdfs',
+        '_hdfsio',
         'gandiva']
 
     def _run_cmake(self):
@@ -519,15 +522,11 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name):
 
 # If the event of not running from a git clone (e.g. from a git archive
 # or a Python sdist), see if we can set the version number ourselves
-default_version = '4.0.0-SNAPSHOT'
+default_version = '6.0.0-SNAPSHOT'
 if (not os.path.exists('../.git') and
         not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
-    if os.path.exists('PKG-INFO'):
-        # We're probably in a Python sdist, setuptools_scm will handle fine
-        pass
-    else:
-        os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
-            default_version.replace('-SNAPSHOT', 'a0')
+    os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
+        default_version.replace('-SNAPSHOT', 'a0')
 
 
 # See https://github.com/pypa/setuptools_scm#configuration-parameters
diff --git a/r/.Rbuildignore b/r/.Rbuildignore
index cf4b7ce31ba..4bead75ea7e 100644
--- a/r/.Rbuildignore
+++ b/r/.Rbuildignore
@@ -24,3 +24,6 @@ clang_format.sh
 ^apache-arrow.rb$
 ^.*\.Rhistory$
 ^extra-tests
+STYLE.md
+^.lintr
+^.styler_excludes.R
diff --git a/r/.lintr b/r/.lintr
new file mode 100644
index 00000000000..fb9ca8f87c7
--- /dev/null
+++ b/r/.lintr
@@ -0,0 +1,31 @@
+license:  #  Licensed to the Apache Software Foundation (ASF) under one
+  #  or more contributor license agreements.  See the NOTICE file
+  #  distributed with this work for additional information
+  #  regarding copyright ownership.  The ASF licenses this file
+  #  to you under the Apache License, Version 2.0 (the
+  #  "License"); you may not use this file except in compliance
+  #  with the License.  You may obtain a copy of the License at
+  #
+  #   http://www.apache.org/licenses/LICENSE-2.0
+  #
+  #  Unless required by applicable law or agreed to in writing,
+  #  software distributed under the License is distributed on an
+  #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  #  KIND, either express or implied.  See the License for the
+  #  specific language governing permissions and limitations
+  #  under the License.
+linters: with_defaults(
+  line_length_linter = line_length_linter(120),
+  object_name_linter = NULL,
+  # Even with a liberal definition of name styles, some of our names cause issues due to `.`s for s3 classes or NA in the name
+  # TODO: figure out if we con contribute to lintr to make these work
+  # object_name_linter = object_name_linter(styles = c("snake_case", "camelCase", "CamelCase", "symbols", "dotted.case", "UPPERCASE", "SNAKE_CASE")),
+  object_length_linter = object_length_linter(40),
+  object_usage_linter = NULL, # R6 methods are flagged,
+  cyclocomp_linter = cyclocomp_linter(26) # TODO: reduce to default of 15
+  )
+exclusions: list(
+  "tests/testthat/latin1.R",
+  "R/arrowExports.R",
+  "data-raw/codegen.R"
+  )
diff --git a/r/.styler_excludes.R b/r/.styler_excludes.R
new file mode 100644
index 00000000000..19cd1ffa550
--- /dev/null
+++ b/r/.styler_excludes.R
@@ -0,0 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+c("tests/testthat/latin1.R", "data-raw/codegen.R")
\ No newline at end of file
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index a355e790a2d..b531f75643f 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,10 +1,13 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 3.0.0.9000
+Version: 5.0.0.9000
 Authors@R: c(
+    person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
+    person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
+    person("Nic", "Crane", email = "thisisnic@gmail.com", role = c("aut")),
+    person("Jonathan", "Keane", email = "jkeane@gmail.com", role = c("aut")),
     person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut"), comment = c(ORCID = "0000-0002-2444-4226")),
     person("Jeroen", "Ooms", email = "jeroen@berkeley.edu", role = c("aut")),
-    person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
     person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")),
     person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")),
     person("Apache Arrow", email = "dev@arrow.apache.org", role = c("aut", "cph"))
@@ -20,7 +23,6 @@ URL: https://github.com/apache/arrow/, https://arrow.apache.org/docs/r/
 BugReports: https://issues.apache.org/jira/projects/ARROW/issues
 Encoding: UTF-8
 Language: en-US
-LazyData: true
 SystemRequirements: C++11; for AWS S3 support on Linux, libcurl and openssl (optional)
 Biarch: true
 Imports:
@@ -35,24 +37,29 @@ Imports:
     utils,
     vctrs
 Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.1.9001
 VignetteBuilder: knitr
 Suggests:
+    DBI,
+    dbplyr,
     decor,
     distro,
     dplyr,
+    duckdb (>= 0.2.8),
     hms,
     knitr,
     lubridate,
     pkgload,
     reticulate,
     rmarkdown,
+    stringi,
     stringr,
     testthat,
     tibble,
     withr
 LinkingTo: cpp11 (>= 0.2.0)
 Collate:
+    'arrowExports.R'
     'enums.R'
     'arrow-package.R'
     'type.R'
@@ -60,7 +67,6 @@ Collate:
     'arrow-datum.R'
     'array.R'
     'arrow-tabular.R'
-    'arrowExports.R'
     'buffer.R'
     'chunked-array.R'
     'io.R'
@@ -77,10 +83,20 @@ Collate:
     'dataset-write.R'
     'deprecated.R'
     'dictionary.R'
+    'dplyr-arrange.R'
+    'dplyr-collect.R'
+    'dplyr-eval.R'
+    'dplyr-filter.R'
+    'expression.R'
+    'dplyr-functions.R'
+    'dplyr-group-by.R'
+    'dplyr-mutate.R'
+    'dplyr-select.R'
+    'dplyr-summarize.R'
     'record-batch.R'
     'table.R'
-    'expression.R'
     'dplyr.R'
+    'duckdb.R'
     'feather.R'
     'field.R'
     'filesystem.R'
@@ -93,9 +109,11 @@ Collate:
     'metadata.R'
     'parquet.R'
     'python.R'
+    'query-engine.R'
     'record-batch-reader.R'
     'record-batch-writer.R'
     'reexports-bit64.R'
     'reexports-tidyselect.R'
     'schema.R'
     'util.R'
+UseLTO: false
diff --git a/r/Makefile b/r/Makefile
index efc55abf0ae..7a51cbd5188 100644
--- a/r/Makefile
+++ b/r/Makefile
@@ -19,7 +19,13 @@ VERSION=$(shell grep ^Version DESCRIPTION | sed s/Version:\ //)
 ARROW_R_DEV="TRUE"
 ARROW_LARGE_MEMORY_TESTS=$(ARROW_R_DEV)
 
-doc:
+style:
+	R -s -e 'setwd(".."); if (requireNamespace("styler")) styler::style_file(setdiff(system("git diff --name-only | grep r/.*R$$", intern = TRUE), file.path("r", source("r/.styler_excludes.R")$$value)))'
+
+style-all:
+	R -s -e 'styler::style_file(setdiff(dir(pattern = "R$$", recursive = TRUE), source(".styler_excludes.R")$$value))'
+
+doc: style
 	R -s -e 'roxygen2::roxygenize()'
 	-git add --all man/*.Rd
 
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 567353876ca..8bcc58653fb 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -21,8 +21,9 @@ S3method("[[<-",Schema)
 S3method("names<-",ArrowTabular)
 S3method(Ops,ArrowDatum)
 S3method(Ops,Expression)
-S3method(Ops,array_expression)
+S3method(all,ArrowDatum)
 S3method(all,equal.ArrowObject)
+S3method(any,ArrowDatum)
 S3method(as.character,ArrowDatum)
 S3method(as.character,FileFormat)
 S3method(as.character,FragmentScanOptions)
@@ -35,7 +36,6 @@ S3method(as.list,ArrowTabular)
 S3method(as.list,Schema)
 S3method(as.raw,Buffer)
 S3method(as.vector,ArrowDatum)
-S3method(as.vector,array_expression)
 S3method(c,Dataset)
 S3method(dim,ArrowTabular)
 S3method(dim,Dataset)
@@ -46,22 +46,25 @@ S3method(head,ArrowDatum)
 S3method(head,ArrowTabular)
 S3method(head,Dataset)
 S3method(head,arrow_dplyr_query)
+S3method(is.finite,ArrowDatum)
+S3method(is.infinite,ArrowDatum)
 S3method(is.na,ArrowDatum)
 S3method(is.na,Expression)
-S3method(is.na,Scalar)
-S3method(is.na,array_expression)
 S3method(is.nan,ArrowDatum)
-S3method(is_in,ArrowDatum)
-S3method(is_in,default)
 S3method(length,ArrowDatum)
+S3method(length,ArrowTabular)
 S3method(length,Scalar)
 S3method(length,Schema)
-S3method(match_arrow,ArrowDatum)
-S3method(match_arrow,default)
 S3method(max,ArrowDatum)
 S3method(mean,ArrowDatum)
 S3method(median,ArrowDatum)
 S3method(min,ArrowDatum)
+S3method(na.exclude,ArrowDatum)
+S3method(na.exclude,ArrowTabular)
+S3method(na.fail,ArrowDatum)
+S3method(na.fail,ArrowTabular)
+S3method(na.omit,ArrowDatum)
+S3method(na.omit,ArrowTabular)
 S3method(names,Dataset)
 S3method(names,FeatherReader)
 S3method(names,RecordBatch)
@@ -72,7 +75,6 @@ S3method(names,StructArray)
 S3method(names,Table)
 S3method(names,arrow_dplyr_query)
 S3method(print,"arrow-enum")
-S3method(print,array_expression)
 S3method(print,arrow_dplyr_query)
 S3method(print,arrow_info)
 S3method(print,arrow_r_metadata)
@@ -114,6 +116,7 @@ export(CsvFragmentScanOptions)
 export(CsvParseOptions)
 export(CsvReadOptions)
 export(CsvTableReader)
+export(CsvWriteOptions)
 export(Dataset)
 export(DatasetFactory)
 export(DateUnit)
@@ -150,10 +153,12 @@ export(MessageReader)
 export(MessageType)
 export(MetadataVersion)
 export(NullEncodingBehavior)
+export(NullHandlingBehavior)
 export(ParquetArrowReaderProperties)
 export(ParquetFileFormat)
 export(ParquetFileReader)
 export(ParquetFileWriter)
+export(ParquetFragmentScanOptions)
 export(ParquetVersionType)
 export(ParquetWriterProperties)
 export(Partitioning)
@@ -222,6 +227,7 @@ export(int16)
 export(int32)
 export(int64)
 export(int8)
+export(io_thread_count)
 export(is_in)
 export(large_binary)
 export(large_list_of)
@@ -254,12 +260,14 @@ export(record_batch)
 export(s3_bucket)
 export(schema)
 export(set_cpu_count)
+export(set_io_thread_count)
 export(starts_with)
 export(string)
 export(struct)
 export(time32)
 export(time64)
 export(timestamp)
+export(to_duckdb)
 export(type)
 export(uint16)
 export(uint32)
@@ -269,6 +277,7 @@ export(unify_schemas)
 export(utf8)
 export(value_counts)
 export(write_arrow)
+export(write_csv_arrow)
 export(write_dataset)
 export(write_feather)
 export(write_ipc_stream)
@@ -281,9 +290,11 @@ importFrom(bit64,print.integer64)
 importFrom(bit64,str.integer64)
 importFrom(methods,as)
 importFrom(purrr,as_mapper)
+importFrom(purrr,imap_chr)
 importFrom(purrr,keep)
 importFrom(purrr,map)
 importFrom(purrr,map2)
+importFrom(purrr,map2_chr)
 importFrom(purrr,map_chr)
 importFrom(purrr,map_dfr)
 importFrom(purrr,map_int)
@@ -292,6 +303,7 @@ importFrom(rlang,"%||%")
 importFrom(rlang,.data)
 importFrom(rlang,abort)
 importFrom(rlang,as_label)
+importFrom(rlang,caller_env)
 importFrom(rlang,dots_n)
 importFrom(rlang,enexpr)
 importFrom(rlang,enexprs)
@@ -303,6 +315,7 @@ importFrom(rlang,eval_tidy)
 importFrom(rlang,exec)
 importFrom(rlang,expr)
 importFrom(rlang,is_bare_character)
+importFrom(rlang,is_character)
 importFrom(rlang,is_false)
 importFrom(rlang,is_integerish)
 importFrom(rlang,is_quosure)
@@ -311,13 +324,19 @@ importFrom(rlang,new_data_mask)
 importFrom(rlang,new_environment)
 importFrom(rlang,quo_get_expr)
 importFrom(rlang,quo_is_null)
+importFrom(rlang,quo_name)
 importFrom(rlang,quo_set_expr)
 importFrom(rlang,quos)
 importFrom(rlang,seq2)
 importFrom(rlang,set_names)
 importFrom(rlang,syms)
+importFrom(rlang,trace_back)
 importFrom(rlang,warn)
 importFrom(stats,median)
+importFrom(stats,na.exclude)
+importFrom(stats,na.fail)
+importFrom(stats,na.omit)
+importFrom(stats,na.pass)
 importFrom(stats,quantile)
 importFrom(tidyselect,contains)
 importFrom(tidyselect,ends_with)
diff --git a/r/NEWS.md b/r/NEWS.md
index 1f1acb89805..2a22681e457 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,18 +17,107 @@
   under the License.
 -->
 
-# arrow 3.0.0.9000
+# arrow 5.0.0.9000
+
+# arrow 5.0.0
+
+## More dplyr
+
+* There are now more than 250 compute functions available for use in `dplyr::filter()`, `mutate()`, etc. Additions in this release include:
+
+  * String operations: `strsplit()` and `str_split()`; `strptime()`; `paste()`, `paste0()`, and `str_c()`; `substr()` and `str_sub()`; `str_like()`; `str_pad()`; `stri_reverse()`
+  * Date/time operations: `lubridate` methods such as `year()`, `month()`, `wday()`, and so on
+  * Math: logarithms (`log()` et al.); trigonometry (`sin()`, `cos()`, et al.); `abs()`; `sign()`; `pmin()` and `pmax()`; `ceiling()`, `floor()`, and `trunc()`
+  * Conditional functions, with some limitations on input type in this release: `ifelse()` and `if_else()` for all but `Decimal` types; `case_when()` for logical, numeric, and temporal types only; `coalesce()` for all but lists/structs. Note also that in this release, factors/dictionaries are converted to strings in these functions.
+  * `is.*` functions are supported and can be used inside `relocate()`
+
+* The print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
+* `transmute()` now errors if passed arguments `.keep`, `.before`, or `.after`, for consistency with the behavior of `dplyr` on `data.frame`s.
+
+## CSV writing
+
+* `write_csv_arrow()` to use Arrow to write a data.frame to a single CSV file
+* `write_dataset(format = "csv", ...)` to write a Dataset to CSVs, including with partitioning
+
+## C interface
+
+* Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
+* C interface methods are exposed on Arrow objects (e.g. `Array$export_to_c()`, `RecordBatch$import_from_c()`), similar to how they are in `pyarrow`. This facilitates their use in other packages. See the `py_to_r()` and `r_to_py()` methods for usage examples.
+
+## Other enhancements
+
+* Converting an R `data.frame` to an Arrow `Table` uses multithreading across columns
+* Some Arrow array types now use ALTREP when converting to R. To disable this, set `options(arrow.use_altrep = FALSE)`
+* `is.na()` now evaluates to `TRUE` on `NaN` values in floating point number fields, for consistency with base R.
+* `is.nan()` now evaluates to `FALSE` on `NA` values in floating point number fields and `FALSE` on all values in non-floating point fields, for consistency with base R.
+* Additional methods for `Array`, `ChunkedArray`, `RecordBatch`, and `Table`: `na.omit()` and friends, `any()`/`all()`
+* Scalar inputs to `RecordBatch$create()` and `Table$create()` are recycled
+* `arrow_info()` includes details on the C++ build, such as compiler version
+* `match_arrow()` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
+* Row-level metadata is now restricted to reading/writing single parquet or feather files. Row-level metadata with datasets is ignored (with a warning) if the dataset contains row-level metadata. Writing a dataset with row-level metadata will also be ignored (with a warning). We are working on a more robust implementation to support row-level metadata (and other complex types) --- stay tuned. For working with {sf} objects, [{sfarrow}](https://CRAN.R-project.org/package=sfarrow) is helpful for serializing sf columns and sharing them with geopandas.
+
+# arrow 4.0.1
+
+* Resolved a few bugs in new string compute kernels (ARROW-12774, ARROW-12670)
+
+# arrow 4.0.0.1
+
+ * The mimalloc memory allocator is the default memory allocator when using a static source build of the package on Linux. This is because it has better behavior under valgrind than jemalloc does. A full-featured build (installed with `LIBARROW_MINIMAL=false`) includes both jemalloc and mimalloc, and it has still has jemalloc as default, though this is configurable at runtime with the `ARROW_DEFAULT_MEMORY_POOL` environment variable.
+ * Environment variables `LIBARROW_MINIMAL`, `LIBARROW_DOWNLOAD`, and `NOT_CRAN` are now case-insensitive in the Linux build script.
+ * A build configuration issue in the macOS binary package has been resolved.
+
+# arrow 4.0.0
 
 ## dplyr methods
 
-* `dplyr::mutate()` is now supported in Arrow for many applications. For queries on `Table` and `RecordBatch` that are not yet supported in Arrow, the implementation falls back to pulling data into an R `data.frame` first, as in the previous release. For queries on `Dataset`, it raises an error if the feature is not implemented.
+Many more `dplyr` verbs are supported on Arrow objects:
+
+* `dplyr::mutate()` is now supported in Arrow for many applications. For queries on `Table` and `RecordBatch` that are not yet supported in Arrow, the implementation falls back to pulling data into an in-memory R `data.frame` first, as in the previous release. For queries on `Dataset` (which can be larger than memory), it raises an error if the function is not implemented. The main `mutate()` features that cannot yet be called on Arrow objects are (1) `mutate()` after `group_by()` (which is typically used in combination with aggregation) and (2) queries that use `dplyr::across()`.
+* `dplyr::transmute()` (which calls `mutate()`)
+* `dplyr::group_by()` now preserves the `.drop` argument and supports on-the-fly definition of columns
+* `dplyr::relocate()` to reorder columns
+* `dplyr::arrange()` to sort rows
+* `dplyr::compute()` to evaluate the lazy expressions and return an Arrow Table. This is equivalent to `dplyr::collect(as_data_frame = FALSE)`, which was added in 2.0.0.
+
+Over 100 functions can now be called on Arrow objects inside a `dplyr` verb:
+
 * String functions `nchar()`, `tolower()`, and `toupper()`, along with their `stringr` spellings `str_length()`, `str_to_lower()`, and `str_to_upper()`, are supported in Arrow `dplyr` calls. `str_trim()` is also supported.
+* Regular expression functions `sub()`, `gsub()`, and `grepl()`, along with `str_replace()`, `str_replace_all()`, and `str_detect()`, are supported.
+* `cast(x, type)` and `dictionary_encode()` allow changing the type of columns in Arrow objects; `as.numeric()`, `as.character()`, etc. are exposed as similar type-altering conveniences
+* `dplyr::between()`; the Arrow version also allows the `left` and `right` arguments to be columns in the data and not just scalars
+* Additionally, any Arrow C++ compute function can be called inside a `dplyr` verb. This enables you to access Arrow functions that don't have a direct R mapping. See `list_compute_functions()` for all available functions, which are available in `dplyr` prefixed by `arrow_`.
+* Arrow C++ compute functions now do more systematic type promotion when called on data with different types (e.g. int32 and float64). Previously, Scalars in an expressions were always cast to match the type of the corresponding Array, so this new type promotion enables, among other things, operations on two columns (Arrays) in a dataset. As a side effect, some comparisons that worked in prior versions are no longer supported: for example, `dplyr::filter(arrow_dataset, string_column == 3)` will error with a message about the type mismatch between the numeric `3` and the string type of `string_column`.
+
+## Datasets
+
+* `open_dataset()` now accepts a vector of file paths (or even a single file path). Among other things, this enables you to open a single very large file and use `write_dataset()` to partition it without having to read the whole file into memory.
+* Datasets can now detect and read a directory of compressed CSVs
+* `write_dataset()` now defaults to `format = "parquet"` and better validates the `format` argument
+* Invalid input for `schema` in `open_dataset()` is now correctly handled
+* Collecting 0 columns from a Dataset now no longer returns all of the columns
+* The `Scanner$Scan()` method has been removed; use `Scanner$ScanBatches()`
 
 ## Other improvements
 
 * `value_counts()` to tabulate values in an `Array` or `ChunkedArray`, similar to `base::table()`.
 * `StructArray` objects gain data.frame-like methods, including `names()`, `$`, `[[`, and `dim()`.
 * RecordBatch columns can now be added, replaced, or removed by assigning (`<-`) with either `$` or `[[`
+* Similarly, `Schema` can now be edited by assigning in new types. This enables using the CSV reader to detect the schema of a file, modify the `Schema` object for any columns that you want to read in as a different type, and then use that `Schema` to read the data.
+* Better validation when creating a `Table` with a schema, with columns of different lengths, and with scalar value recycling
+* Reading Parquet files in Japanese or other multi-byte locales on Windows no longer hangs (workaround for a [bug in libstdc++](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98723); thanks @yutannihilation for the persistence in discovering this!)
+* If you attempt to read string data that has embedded nul (`\0`) characters, the error message now informs you that you can set `options(arrow.skip_nul = TRUE)` to strip them out. It is not recommended to set this option by default since this code path is significantly slower, and most string data does not contain nuls.
+* `read_json_arrow()` now accepts a schema: `read_json_arrow("file.json", schema = schema(col_a = float64(), col_b = string()))`
+
+## Installation and configuration
+
+* The R package can now support working with an Arrow C++ library that has additional features (such as dataset, parquet, string libraries) disabled, and the bundled build script enables setting environment variables to disable them. See `vignette("install", package = "arrow")` for details. This allows a faster, smaller package build in cases where that is useful, and it enables a minimal, functioning R package build on Solaris.
+* On macOS, it is now possible to use the same bundled C++ build that is used by default on Linux, along with all of its customization parameters, by setting the environment variable `FORCE_BUNDLED_BUILD=true`.
+* `arrow` now uses the `mimalloc` memory allocator by default on macOS, if available (as it is in CRAN binaries), instead of `jemalloc`. There are [configuration issues](https://issues.apache.org/jira/browse/ARROW-6994) with `jemalloc` on macOS, and [benchmark analysis](https://ursalabs.org/blog/2021-r-benchmarks-part-1/) shows that this has negative effects on performance, especially on memory-intensive workflows. `jemalloc` remains the default on Linux; `mimalloc` is default on Windows.
+* Setting the `ARROW_DEFAULT_MEMORY_POOL` environment variable to switch memory allocators now works correctly when the Arrow C++ library has been statically linked (as is usually the case when installing from CRAN).
+* The `arrow_info()` function now reports on the additional optional features, as well as the detected SIMD level. If key features or compression libraries are not enabled in the build, `arrow_info()` will refer to the installation vignette for guidance on how to install a more complete build, if desired.
+* If you attempt to read a file that was compressed with a codec that your Arrow build does not contain support for, the error message now will tell you how to reinstall Arrow with that feature enabled.
+* A new vignette about developer environment setup `vignette("developing", package = "arrow")`.
+* When building from source, you can use the environment variable `ARROW_HOME` to point to a specific directory where the Arrow libraries are. This is similar to passing `INCLUDE_DIR` and `LIB_DIR`.
 
 # arrow 3.0.0
 
diff --git a/r/R/array-data.R b/r/R/array-data.R
index 08b09133361..99c24fdcf67 100644
--- a/r/R/array-data.R
+++ b/r/R/array-data.R
@@ -27,11 +27,11 @@
 #' ```
 #' data <- Array$create(x)$data()
 #'
-#' data$type()
-#' data$length()
-#' data$null_count()
-#' data$offset()
-#' data$buffers()
+#' data$type
+#' data$length
+#' data$null_count
+#' data$offset
+#' data$buffers
 #' ```
 #'
 #' @section Methods:
diff --git a/r/R/array.R b/r/R/array.R
index 1d63c5735a7..46acc14ff0e 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -56,9 +56,9 @@
 #' - `$IsNull(i)`: Return true if value at index is null. Does not boundscheck
 #' - `$IsValid(i)`: Return true if value at index is valid. Does not boundscheck
 #' - `$length()`: Size in the number of elements this array contains
-#' - `$offset()`: A relative position into another array's data, to enable zero-copy slicing
-#' - `$null_count()`: The number of null entries in the array
-#' - `$type()`: logical type of data
+#' - `$offset`: A relative position into another array's data, to enable zero-copy slicing
+#' - `$null_count`: The number of null entries in the array
+#' - `$type`: logical type of data
 #' - `$type_id()`: type id
 #' - `$Equals(other)` : is this array equal to `other`
 #' - `$ApproxEquals(other)` :
@@ -84,6 +84,26 @@
 #'
 #' @rdname array
 #' @name array
+#' @examplesIf arrow_available()
+#' my_array <- Array$create(1:10)
+#' my_array$type
+#' my_array$cast(int8())
+#'
+#' # Check if value is null; zero-indexed
+#' na_array <- Array$create(c(1:5, NA))
+#' na_array$IsNull(0)
+#' na_array$IsNull(5)
+#' na_array$IsValid(5)
+#' na_array$null_count
+#'
+#' # zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+#' new_array <- na_array$Slice(5)
+#' new_array$offset
+#'
+#' # Compare 2 arrays
+#' na_array2 <- na_array
+#' na_array2 == na_array # element-wise comparison
+#' na_array2$Equals(na_array) # overall comparison
 #' @export
 Array <- R6Class("Array",
   inherit = ArrowDatum,
@@ -146,7 +166,8 @@ Array <- R6Class("Array",
     View = function(type) {
       Array$create(Array__View(self, as_type(type)))
     },
-    Validate = function() Array__Validate(self)
+    Validate = function() Array__Validate(self),
+    export_to_c = function(array_ptr, schema_ptr) ExportArray(self, array_ptr, schema_ptr)
   ),
   active = list(
     null_count = function() Array__null_count(self),
@@ -167,12 +188,15 @@ Array$create <- function(x, type = NULL) {
   }
   vec_to_arrow(x, type)
 }
+#' @include arrowExports.R
+Array$import_from_c <- ImportArray
 
 #' @rdname array
 #' @usage NULL
 #' @format NULL
 #' @export
-DictionaryArray <- R6Class("DictionaryArray", inherit = Array,
+DictionaryArray <- R6Class("DictionaryArray",
+  inherit = Array,
   public = list(
     indices = function() DictionaryArray__indices(self),
     dictionary = function() DictionaryArray__dictionary(self)
@@ -203,7 +227,8 @@ DictionaryArray$create <- function(x, dict = NULL) {
 #' @usage NULL
 #' @format NULL
 #' @export
-StructArray <- R6Class("StructArray", inherit = Array,
+StructArray <- R6Class("StructArray",
+  inherit = Array,
   public = list(
     field = function(i) StructArray__field(self, i),
     GetFieldByName = function(name) StructArray__GetFieldByName(self, name),
@@ -248,7 +273,8 @@ as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...
 #' @usage NULL
 #' @format NULL
 #' @export
-ListArray <- R6Class("ListArray", inherit = Array,
+ListArray <- R6Class("ListArray",
+  inherit = Array,
   public = list(
     values = function() ListArray__values(self),
     value_length = function(i) ListArray__value_length(self, i),
@@ -264,7 +290,8 @@ ListArray <- R6Class("ListArray", inherit = Array,
 #' @usage NULL
 #' @format NULL
 #' @export
-LargeListArray <- R6Class("LargeListArray", inherit = Array,
+LargeListArray <- R6Class("LargeListArray",
+  inherit = Array,
   public = list(
     values = function() LargeListArray__values(self),
     value_length = function(i) LargeListArray__value_length(self, i),
@@ -280,7 +307,8 @@ LargeListArray <- R6Class("LargeListArray", inherit = Array,
 #' @usage NULL
 #' @format NULL
 #' @export
-FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array,
+FixedSizeListArray <- R6Class("FixedSizeListArray",
+  inherit = Array,
   public = list(
     values = function() FixedSizeListArray__values(self),
     value_length = function(i) FixedSizeListArray__value_length(self, i),
@@ -292,7 +320,7 @@ FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array,
   )
 )
 
-is.Array <- function(x, type = NULL) {
+is.Array <- function(x, type = NULL) { # nolint
   is_it <- inherits(x, c("Array", "ChunkedArray"))
   if (is_it && !is.null(type)) {
     is_it <- x$type$ToString() %in% type
diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index dd43307c9cc..b3635f239c4 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -19,7 +19,8 @@
 
 # Base class for Array, ChunkedArray, and Scalar, for S3 method dispatch only.
 # Does not exist in C++ class hierarchy
-ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
+ArrowDatum <- R6Class("ArrowDatum",
+  inherit = ArrowObject,
   public = list(
     cast = function(target_type, safe = TRUE, ...) {
       opts <- cast_options(safe, ...)
@@ -33,10 +34,40 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
 length.ArrowDatum <- function(x) x$length()
 
 #' @export
-is.na.ArrowDatum <- function(x) call_function("is_null", x)
+is.finite.ArrowDatum <- function(x) {
+  is_fin <- call_function("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !is.na(is_fin)
+}
 
 #' @export
-is.nan.ArrowDatum <- function(x) call_function("is_nan", x)
+is.infinite.ArrowDatum <- function(x) {
+  is_inf <- call_function("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !is.na(is_inf)
+}
+
+#' @export
+is.na.ArrowDatum <- function(x) {
+  # TODO: if an option is added to the is_null kernel to treat NaN as NA,
+  # use that to simplify the code here (ARROW-13367)
+  if (x$type_id() %in% TYPES_WITH_NAN) {
+    call_function("is_nan", x) | call_function("is_null", x)
+  } else {
+    call_function("is_null", x)
+  }
+}
+
+#' @export
+is.nan.ArrowDatum <- function(x) {
+  if (x$type_id() %in% TYPES_WITH_NAN) {
+    # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
+    # use that to simplify the code here (ARROW-13366)
+    call_function("is_nan", x) & call_function("is_valid", x)
+  } else {
+    Scalar$create(FALSE)$as_array(length(x))
+  }
+}
 
 #' @export
 as.vector.ArrowDatum <- function(x, mode) {
@@ -46,14 +77,92 @@ as.vector.ArrowDatum <- function(x, mode) {
   )
 }
 
+#' @export
+Ops.ArrowDatum <- function(e1, e2) {
+  if (.Generic == "!") {
+    eval_array_expression(.Generic, e1)
+  } else if (.Generic %in% names(.array_function_map)) {
+    eval_array_expression(.Generic, e1, e2)
+  } else {
+    stop(paste0("Unsupported operation on `", class(e1)[1L], "` : "), .Generic, call. = FALSE)
+  }
+}
+
+# Wrapper around call_function that:
+# (1) maps R function names to Arrow C++ compute ("/" --> "divide_checked")
+# (2) wraps R input args as Array or Scalar
+eval_array_expression <- function(FUN,
+                                  ...,
+                                  args = list(...),
+                                  options = empty_named_list()) {
+  if (FUN == "-" && length(args) == 1L) {
+    if (inherits(args[[1]], "ArrowObject")) {
+      return(eval_array_expression("negate_checked", args[[1]]))
+    } else {
+      return(-args[[1]])
+    }
+  }
+  args <- lapply(args, .wrap_arrow, FUN)
+
+  # In Arrow, "divide" is one function, which does integer division on
+  # integer inputs and floating-point division on floats
+  if (FUN == "/") {
+    # TODO: omg so many ways it's wrong to assume these types
+    args <- map(args, ~ .$cast(float64()))
+  } else if (FUN == "%/%") {
+    # In R, integer division works like floor(float division)
+    out <- eval_array_expression("/", args = args, options = options)
+    return(out$cast(int32(), allow_float_truncate = TRUE))
+  } else if (FUN == "%%") {
+    # We can't simply do {e1 - e2 * ( e1 %/% e2 )} since Ops.Array evaluates
+    # eagerly, but we can build that up
+    quotient <- eval_array_expression("%/%", args = args)
+    base <- eval_array_expression("*", quotient, args[[2]])
+    # this cast is to ensure that the result of this and e1 are the same
+    # (autocasting only applies to scalars)
+    base <- base$cast(args[[1]]$type)
+    return(eval_array_expression("-", args[[1]], base))
+  }
+
+  call_function(
+    .array_function_map[[FUN]] %||% FUN,
+    args = args,
+    options = options
+  )
+}
+
+.wrap_arrow <- function(arg, fun) {
+  if (!inherits(arg, "ArrowObject")) {
+    # TODO: Array$create if lengths are equal?
+    if (fun == "%in%") {
+      arg <- Array$create(arg)
+    } else {
+      arg <- Scalar$create(arg)
+    }
+  }
+  arg
+}
+
+#' @export
+na.omit.ArrowDatum <- function(object, ...) {
+  object$Filter(!is.na(object))
+}
+
+#' @export
+na.exclude.ArrowDatum <- na.omit.ArrowDatum
+
+#' @export
+na.fail.ArrowDatum <- function(object, ...) {
+  if (object$null_count > 0) {
+    stop("missing values in object", call. = FALSE)
+  }
+  object
+}
+
 filter_rows <- function(x, i, keep_na = TRUE, ...) {
   # General purpose function for [ row subsetting with R semantics
   # Based on the input for `i`, calls x$Filter, x$Slice, or x$Take
   nrows <- x$num_rows %||% x$length() # Depends on whether Array or Table-like
-  if (inherits(i, "array_expression")) {
-    # Evaluate it
-    i <- eval_array_expression(i)
-  }
   if (is.logical(i)) {
     if (isTRUE(i)) {
       # Shortcut without doing any work
@@ -84,7 +193,7 @@ filter_rows <- function(x, i, keep_na = TRUE, ...) {
     if (is.Array(i)) {
       stop("Cannot extract rows with an Array of type ", i$type$ToString(), call. = FALSE)
     }
-    stop("Cannot extract rows with an object of class ", class(i), call.=FALSE)
+    stop("Cannot extract rows with an object of class ", class(i), call. = FALSE)
   }
 }
 
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 30d59491d79..10c14a00af4 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -15,11 +15,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @importFrom stats quantile median
+#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
-#' @importFrom purrr as_mapper map map2 map_chr map_dfr map_int map_lgl keep
+#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap_chr
 #' @importFrom assertthat assert_that is.string
-#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
+#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec
+#' @importFrom rlang is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs
+#' @importFrom rlang expr caller_env is_character quo_name
 #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
@@ -42,15 +45,28 @@
   }
   s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
 
-  for (cl in c("Array", "RecordBatch", "ChunkedArray", "Table", "Schema")) {
+  for (cl in c(
+    "Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
+    "Field", "DataType", "RecordBatchReader"
+  )) {
     s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl))
     s3_register("reticulate::r_to_py", cl)
   }
 
   # Create these once, at package build time
   if (arrow_available()) {
-    dplyr_functions$dataset <- build_function_list(build_dataset_expression)
-    dplyr_functions$array <- build_function_list(build_array_expression)
+    # Also include all available Arrow Compute functions,
+    # namespaced as arrow_fun.
+    # We can't do this at install time because list_compute_functions() may error
+    all_arrow_funs <- list_compute_functions()
+    arrow_funcs <- set_names(
+      lapply(all_arrow_funs, function(fun) {
+        force(fun)
+        function(...) build_expr(fun, ...)
+      }),
+      paste0("arrow_", all_arrow_funs)
+    )
+    .cache$functions <- c(nse_funcs, arrow_funcs)
   }
   invisible()
 }
@@ -100,25 +116,33 @@
 #' `vignette("install", package = "arrow")` for guidance on reinstalling the
 #' package.
 arrow_available <- function() {
-  tryCatch(.Call(`_arrow_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_arrow_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 #' @rdname arrow_available
 #' @export
 arrow_with_dataset <- function() {
-  tryCatch(.Call(`_dataset_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_dataset_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 #' @rdname arrow_available
 #' @export
 arrow_with_parquet <- function() {
-  tryCatch(.Call(`_parquet_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_parquet_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 #' @rdname arrow_available
 #' @export
 arrow_with_s3 <- function() {
-  tryCatch(.Call(`_s3_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_s3_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 option_use_threads <- function() {
@@ -144,6 +168,7 @@ arrow_info <- function() {
   if (out$libarrow) {
     pool <- default_memory_pool()
     runtimeinfo <- runtime_info()
+    buildinfo <- build_info()
     compute_funcs <- list_compute_functions()
     out <- c(out, list(
       capabilities = c(
@@ -163,6 +188,15 @@ arrow_info <- function() {
       runtime_info = list(
         simd_level = runtimeinfo[1],
         detected_simd_level = runtimeinfo[2]
+      ),
+      build_info = list(
+        cpp_version = buildinfo[1],
+        cpp_compiler = buildinfo[2],
+        cpp_compiler_version = buildinfo[3],
+        cpp_compiler_flags = buildinfo[4],
+        # git_id is "" if not built from a git checkout
+        # convert that to NULL
+        git_id = if (nzchar(buildinfo[5])) buildinfo[5]
       )
     ))
   }
@@ -197,7 +231,10 @@ print.arrow_info <- function(x, ...) {
     ))
     if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) {
       # Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris
-      cat("To reinstall with more optional capabilities enabled, see\n  https://arrow.apache.org/docs/r/articles/install.html\n\n")
+      cat(
+        "To reinstall with more optional capabilities enabled, see\n",
+        "  https://arrow.apache.org/docs/r/articles/install.html\n\n"
+      )
     }
 
     if (length(x$options)) {
@@ -217,8 +254,17 @@ print.arrow_info <- function(x, ...) {
       `SIMD Level` = x$runtime_info$simd_level,
       `Detected SIMD Level` = x$runtime_info$detected_simd_level
     ))
+    print_key_values("Build", c(
+      `C++ Library Version` = x$build_info$cpp_version,
+      `C++ Compiler` = x$build_info$cpp_compiler,
+      `C++ Compiler Version` = x$build_info$cpp_compiler_version,
+      `Git ID` = x$build_info$git_id
+    ))
   } else {
-    cat("Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html for troubleshooting.\n")
+    cat(
+      "Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html ",
+      "for troubleshooting.\n"
+    )
   }
   invisible(x)
 }
@@ -231,7 +277,6 @@ option_compress_metadata <- function() {
 ArrowObject <- R6Class("ArrowObject",
   public = list(
     initialize = function(xp) self$set_pointer(xp),
-
     pointer = function() get(".:xp:.", envir = self),
     `.:xp:.` = NULL,
     set_pointer = function(xp) {
@@ -252,12 +297,11 @@ ArrowObject <- R6Class("ArrowObject",
         class_title <- class(self)[[1]]
       }
       cat(class_title, "\n", sep = "")
-      if (!is.null(self$ToString)){
+      if (!is.null(self$ToString)) {
         cat(self$ToString(), "\n", sep = "")
       }
       invisible(self)
     },
-
     invalidate = function() {
       assign(".:xp:.", NULL, envir = self)
     }
@@ -265,10 +309,10 @@ ArrowObject <- R6Class("ArrowObject",
 )
 
 #' @export
-`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs)
+`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs) # nolint
 
 #' @export
-`==.ArrowObject` <- function(x, y) {
+`==.ArrowObject` <- function(x, y) { # nolint
   x$Equals(y)
 }
 
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index f32111688a2..250f4f90b39 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -19,7 +19,8 @@
 
 # Base class for RecordBatch and Table for S3 method dispatch only.
 # Does not exist in C++ class hierarchy
-ArrowTabular <- R6Class("ArrowTabular", inherit = ArrowObject,
+ArrowTabular <- R6Class("ArrowTabular",
+  inherit = ArrowObject,
   public = list(
     ToString = function() ToString_tabular(self),
     Take = function(i) {
@@ -211,6 +212,26 @@ head.ArrowTabular <- head.ArrowDatum
 #' @export
 tail.ArrowTabular <- tail.ArrowDatum
 
+#' @export
+na.fail.ArrowTabular <- function(object, ...) {
+  for (col in seq_len(object$num_columns)) {
+    if (object$column(col - 1L)$null_count > 0) {
+      stop("missing values in object", call. = FALSE)
+    }
+  }
+  object
+}
+
+#' @export
+na.omit.ArrowTabular <- function(object, ...) {
+  not_na <- map(object$columns, ~ call_function("is_valid", .x))
+  not_na_agg <- Reduce("&", not_na)
+  object$Filter(not_na_agg)
+}
+
+#' @export
+na.exclude.ArrowTabular <- na.omit.ArrowTabular
+
 ToString_tabular <- function(x, ...) {
   # Generic to work with both RecordBatch and Table
   sch <- unlist(strsplit(x$schema$ToString(), "\n"))
@@ -218,3 +239,6 @@ ToString_tabular <- function(x, ...) {
   dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x))
   paste(c(dims, sch), collapse = "\n")
 }
+
+#' @export
+length.ArrowTabular <- function(x) x$num_columns
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 9811dc9f8d3..73f3ba67de6 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1,1660 +1,1782 @@
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
-Array__Slice1 <- function(array, offset){
-    .Call(`_arrow_Array__Slice1`, array, offset)
+is_altrep_int_nonull <- function(x) {
+  .Call(`_arrow_is_altrep_int_nonull`, x)
 }
 
-Array__Slice2 <- function(array, offset, length){
-    .Call(`_arrow_Array__Slice2`, array, offset, length)
+is_altrep_dbl_nonull <- function(x) {
+  .Call(`_arrow_is_altrep_dbl_nonull`, x)
 }
 
-Array__IsNull <- function(x, i){
-    .Call(`_arrow_Array__IsNull`, x, i)
+Array__Slice1 <- function(array, offset) {
+  .Call(`_arrow_Array__Slice1`, array, offset)
 }
 
-Array__IsValid <- function(x, i){
-    .Call(`_arrow_Array__IsValid`, x, i)
+Array__Slice2 <- function(array, offset, length) {
+  .Call(`_arrow_Array__Slice2`, array, offset, length)
 }
 
-Array__length <- function(x){
-    .Call(`_arrow_Array__length`, x)
+Array__IsNull <- function(x, i) {
+  .Call(`_arrow_Array__IsNull`, x, i)
 }
 
-Array__offset <- function(x){
-    .Call(`_arrow_Array__offset`, x)
+Array__IsValid <- function(x, i) {
+  .Call(`_arrow_Array__IsValid`, x, i)
 }
 
-Array__null_count <- function(x){
-    .Call(`_arrow_Array__null_count`, x)
+Array__length <- function(x) {
+  .Call(`_arrow_Array__length`, x)
 }
 
-Array__type <- function(x){
-    .Call(`_arrow_Array__type`, x)
+Array__offset <- function(x) {
+  .Call(`_arrow_Array__offset`, x)
 }
 
-Array__ToString <- function(x){
-    .Call(`_arrow_Array__ToString`, x)
+Array__null_count <- function(x) {
+  .Call(`_arrow_Array__null_count`, x)
 }
 
-Array__type_id <- function(x){
-    .Call(`_arrow_Array__type_id`, x)
+Array__type <- function(x) {
+  .Call(`_arrow_Array__type`, x)
 }
 
-Array__Equals <- function(lhs, rhs){
-    .Call(`_arrow_Array__Equals`, lhs, rhs)
+Array__ToString <- function(x) {
+  .Call(`_arrow_Array__ToString`, x)
 }
 
-Array__ApproxEquals <- function(lhs, rhs){
-    .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
+Array__type_id <- function(x) {
+  .Call(`_arrow_Array__type_id`, x)
 }
 
-Array__Diff <- function(lhs, rhs){
-    .Call(`_arrow_Array__Diff`, lhs, rhs)
+Array__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Equals`, lhs, rhs)
 }
 
-Array__data <- function(array){
-    .Call(`_arrow_Array__data`, array)
+Array__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
 }
 
-Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx){
-    .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
+Array__Diff <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Diff`, lhs, rhs)
 }
 
-Array__View <- function(array, type){
-    .Call(`_arrow_Array__View`, array, type)
+Array__data <- function(array) {
+  .Call(`_arrow_Array__data`, array)
 }
 
-Array__Validate <- function(array){
-    invisible(.Call(`_arrow_Array__Validate`, array))
+Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) {
+  .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
 }
 
-DictionaryArray__indices <- function(array){
-    .Call(`_arrow_DictionaryArray__indices`, array)
+Array__View <- function(array, type) {
+  .Call(`_arrow_Array__View`, array, type)
 }
 
-DictionaryArray__dictionary <- function(array){
-    .Call(`_arrow_DictionaryArray__dictionary`, array)
+Array__Validate <- function(array) {
+  invisible(.Call(`_arrow_Array__Validate`, array))
 }
 
-StructArray__field <- function(array, i){
-    .Call(`_arrow_StructArray__field`, array, i)
+DictionaryArray__indices <- function(array) {
+  .Call(`_arrow_DictionaryArray__indices`, array)
 }
 
-StructArray__GetFieldByName <- function(array, name){
-    .Call(`_arrow_StructArray__GetFieldByName`, array, name)
+DictionaryArray__dictionary <- function(array) {
+  .Call(`_arrow_DictionaryArray__dictionary`, array)
 }
 
-StructArray__Flatten <- function(array){
-    .Call(`_arrow_StructArray__Flatten`, array)
+StructArray__field <- function(array, i) {
+  .Call(`_arrow_StructArray__field`, array, i)
 }
 
-ListArray__value_type <- function(array){
-    .Call(`_arrow_ListArray__value_type`, array)
+StructArray__GetFieldByName <- function(array, name) {
+  .Call(`_arrow_StructArray__GetFieldByName`, array, name)
 }
 
-LargeListArray__value_type <- function(array){
-    .Call(`_arrow_LargeListArray__value_type`, array)
+StructArray__Flatten <- function(array) {
+  .Call(`_arrow_StructArray__Flatten`, array)
 }
 
-ListArray__values <- function(array){
-    .Call(`_arrow_ListArray__values`, array)
+ListArray__value_type <- function(array) {
+  .Call(`_arrow_ListArray__value_type`, array)
 }
 
-LargeListArray__values <- function(array){
-    .Call(`_arrow_LargeListArray__values`, array)
+LargeListArray__value_type <- function(array) {
+  .Call(`_arrow_LargeListArray__value_type`, array)
 }
 
-ListArray__value_length <- function(array, i){
-    .Call(`_arrow_ListArray__value_length`, array, i)
+ListArray__values <- function(array) {
+  .Call(`_arrow_ListArray__values`, array)
 }
 
-LargeListArray__value_length <- function(array, i){
-    .Call(`_arrow_LargeListArray__value_length`, array, i)
+LargeListArray__values <- function(array) {
+  .Call(`_arrow_LargeListArray__values`, array)
 }
 
-FixedSizeListArray__value_length <- function(array, i){
-    .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
+ListArray__value_length <- function(array, i) {
+  .Call(`_arrow_ListArray__value_length`, array, i)
 }
 
-ListArray__value_offset <- function(array, i){
-    .Call(`_arrow_ListArray__value_offset`, array, i)
+LargeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_length`, array, i)
 }
 
-LargeListArray__value_offset <- function(array, i){
-    .Call(`_arrow_LargeListArray__value_offset`, array, i)
+FixedSizeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
 }
 
-FixedSizeListArray__value_offset <- function(array, i){
-    .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
+ListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_ListArray__value_offset`, array, i)
 }
 
-ListArray__raw_value_offsets <- function(array){
-    .Call(`_arrow_ListArray__raw_value_offsets`, array)
+LargeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_offset`, array, i)
 }
 
-LargeListArray__raw_value_offsets <- function(array){
-    .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
+FixedSizeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
 }
 
-Array__as_vector <- function(array){
-    .Call(`_arrow_Array__as_vector`, array)
+ListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_ListArray__raw_value_offsets`, array)
 }
 
-ChunkedArray__as_vector <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array)
+LargeListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
 }
 
-RecordBatch__to_dataframe <- function(batch, use_threads){
-    .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
+Array__as_vector <- function(array) {
+  .Call(`_arrow_Array__as_vector`, array)
 }
 
-Table__to_dataframe <- function(table, use_threads){
-    .Call(`_arrow_Table__to_dataframe`, table, use_threads)
+ChunkedArray__as_vector <- function(chunked_array, use_threads) {
+  .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
 }
 
-ArrayData__get_type <- function(x){
-    .Call(`_arrow_ArrayData__get_type`, x)
+RecordBatch__to_dataframe <- function(batch, use_threads) {
+  .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
 }
 
-ArrayData__get_length <- function(x){
-    .Call(`_arrow_ArrayData__get_length`, x)
+Table__to_dataframe <- function(table, use_threads) {
+  .Call(`_arrow_Table__to_dataframe`, table, use_threads)
 }
 
-ArrayData__get_null_count <- function(x){
-    .Call(`_arrow_ArrayData__get_null_count`, x)
+ArrayData__get_type <- function(x) {
+  .Call(`_arrow_ArrayData__get_type`, x)
 }
 
-ArrayData__get_offset <- function(x){
-    .Call(`_arrow_ArrayData__get_offset`, x)
+ArrayData__get_length <- function(x) {
+  .Call(`_arrow_ArrayData__get_length`, x)
 }
 
-ArrayData__buffers <- function(x){
-    .Call(`_arrow_ArrayData__buffers`, x)
+ArrayData__get_null_count <- function(x) {
+  .Call(`_arrow_ArrayData__get_null_count`, x)
 }
 
-Buffer__is_mutable <- function(buffer){
-    .Call(`_arrow_Buffer__is_mutable`, buffer)
+ArrayData__get_offset <- function(x) {
+  .Call(`_arrow_ArrayData__get_offset`, x)
 }
 
-Buffer__ZeroPadding <- function(buffer){
-    invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
+ArrayData__buffers <- function(x) {
+  .Call(`_arrow_ArrayData__buffers`, x)
 }
 
-Buffer__capacity <- function(buffer){
-    .Call(`_arrow_Buffer__capacity`, buffer)
+Buffer__is_mutable <- function(buffer) {
+  .Call(`_arrow_Buffer__is_mutable`, buffer)
 }
 
-Buffer__size <- function(buffer){
-    .Call(`_arrow_Buffer__size`, buffer)
+Buffer__ZeroPadding <- function(buffer) {
+  invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
 }
 
-r___RBuffer__initialize <- function(x){
-    .Call(`_arrow_r___RBuffer__initialize`, x)
+Buffer__capacity <- function(buffer) {
+  .Call(`_arrow_Buffer__capacity`, buffer)
 }
 
-Buffer__data <- function(buffer){
-    .Call(`_arrow_Buffer__data`, buffer)
+Buffer__size <- function(buffer) {
+  .Call(`_arrow_Buffer__size`, buffer)
 }
 
-Buffer__Equals <- function(x, y){
-    .Call(`_arrow_Buffer__Equals`, x, y)
+r___RBuffer__initialize <- function(x) {
+  .Call(`_arrow_r___RBuffer__initialize`, x)
 }
 
-ChunkedArray__length <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__length`, chunked_array)
+Buffer__data <- function(buffer) {
+  .Call(`_arrow_Buffer__data`, buffer)
 }
 
-ChunkedArray__null_count <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
+Buffer__Equals <- function(x, y) {
+  .Call(`_arrow_Buffer__Equals`, x, y)
 }
 
-ChunkedArray__num_chunks <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
+ChunkedArray__length <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__length`, chunked_array)
 }
 
-ChunkedArray__chunk <- function(chunked_array, i){
-    .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
+ChunkedArray__null_count <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
 }
 
-ChunkedArray__chunks <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
+ChunkedArray__num_chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
 }
 
-ChunkedArray__type <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__type`, chunked_array)
+ChunkedArray__chunk <- function(chunked_array, i) {
+  .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
 }
 
-ChunkedArray__Slice1 <- function(chunked_array, offset){
-    .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
+ChunkedArray__chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
 }
 
-ChunkedArray__Slice2 <- function(chunked_array, offset, length){
-    .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
+ChunkedArray__type <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__type`, chunked_array)
 }
 
-ChunkedArray__View <- function(array, type){
-    .Call(`_arrow_ChunkedArray__View`, array, type)
+ChunkedArray__Slice1 <- function(chunked_array, offset) {
+  .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
 }
 
-ChunkedArray__Validate <- function(chunked_array){
-    invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
+ChunkedArray__Slice2 <- function(chunked_array, offset, length) {
+  .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
 }
 
-ChunkedArray__Equals <- function(x, y){
-    .Call(`_arrow_ChunkedArray__Equals`, x, y)
+ChunkedArray__View <- function(array, type) {
+  .Call(`_arrow_ChunkedArray__View`, array, type)
 }
 
-ChunkedArray__ToString <- function(x){
-    .Call(`_arrow_ChunkedArray__ToString`, x)
+ChunkedArray__Validate <- function(chunked_array) {
+  invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
 }
 
-ChunkedArray__from_list <- function(chunks, s_type){
-    .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
+ChunkedArray__Equals <- function(x, y) {
+  .Call(`_arrow_ChunkedArray__Equals`, x, y)
 }
 
-util___Codec__Create <- function(codec, compression_level){
-    .Call(`_arrow_util___Codec__Create`, codec, compression_level)
+ChunkedArray__ToString <- function(x) {
+  .Call(`_arrow_ChunkedArray__ToString`, x)
 }
 
-util___Codec__name <- function(codec){
-    .Call(`_arrow_util___Codec__name`, codec)
+ChunkedArray__from_list <- function(chunks, s_type) {
+  .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
 }
 
-util___Codec__IsAvailable <- function(codec){
-    .Call(`_arrow_util___Codec__IsAvailable`, codec)
+util___Codec__Create <- function(codec, compression_level) {
+  .Call(`_arrow_util___Codec__Create`, codec, compression_level)
 }
 
-io___CompressedOutputStream__Make <- function(codec, raw){
-    .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
+util___Codec__name <- function(codec) {
+  .Call(`_arrow_util___Codec__name`, codec)
 }
 
-io___CompressedInputStream__Make <- function(codec, raw){
-    .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
+util___Codec__IsAvailable <- function(codec) {
+  .Call(`_arrow_util___Codec__IsAvailable`, codec)
 }
 
-RecordBatch__cast <- function(batch, schema, options){
-    .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
+io___CompressedOutputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
 }
 
-Table__cast <- function(table, schema, options){
-    .Call(`_arrow_Table__cast`, table, schema, options)
+io___CompressedInputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
 }
 
-compute__CallFunction <- function(func_name, args, options){
-    .Call(`_arrow_compute__CallFunction`, func_name, args, options)
+ExecPlan_create <- function(use_threads) {
+  .Call(`_arrow_ExecPlan_create`, use_threads)
 }
 
-compute__GroupBy <- function(arguments, keys, options){
-    .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
+ExecPlan_run <- function(plan, final_node) {
+  .Call(`_arrow_ExecPlan_run`, plan, final_node)
 }
 
-compute__GetFunctionNames <- function(){
-    .Call(`_arrow_compute__GetFunctionNames`)
+ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names) {
+  .Call(`_arrow_ExecNode_Scan`, plan, dataset, filter, materialized_field_names)
 }
 
-csv___ReadOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ReadOptions__initialize`, options)
+ExecNode_Filter <- function(input, filter) {
+  .Call(`_arrow_ExecNode_Filter`, input, filter)
 }
 
-csv___ParseOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ParseOptions__initialize`, options)
+ExecNode_Project <- function(input, exprs, names) {
+  .Call(`_arrow_ExecNode_Project`, input, exprs, names)
 }
 
-csv___ReadOptions__column_names <- function(options){
-    .Call(`_arrow_csv___ReadOptions__column_names`, options)
+ExecNode_ScalarAggregate <- function(input, options, target_names, out_field_names) {
+  .Call(`_arrow_ExecNode_ScalarAggregate`, input, options, target_names, out_field_names)
 }
 
-csv___ConvertOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+ExecNode_GroupByAggregate <- function(input, group_vars, agg_srcs, aggregations) {
+  .Call(`_arrow_ExecNode_GroupByAggregate`, input, group_vars, agg_srcs, aggregations)
 }
 
-csv___TableReader__Make <- function(input, read_options, parse_options, convert_options){
-    .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+RecordBatch__cast <- function(batch, schema, options) {
+  .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
 
-csv___TableReader__Read <- function(table_reader){
-    .Call(`_arrow_csv___TableReader__Read`, table_reader)
+Table__cast <- function(table, schema, options) {
+  .Call(`_arrow_Table__cast`, table, schema, options)
 }
 
-TimestampParser__kind <- function(parser){
-    .Call(`_arrow_TimestampParser__kind`, parser)
+compute__CallFunction <- function(func_name, args, options) {
+  .Call(`_arrow_compute__CallFunction`, func_name, args, options)
 }
 
-TimestampParser__format <- function(parser){
-    .Call(`_arrow_TimestampParser__format`, parser)
+compute__GroupBy <- function(arguments, keys, options) {
+  .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
 }
 
-TimestampParser__MakeStrptime <- function(format){
-    .Call(`_arrow_TimestampParser__MakeStrptime`, format)
+compute__GetFunctionNames <- function() {
+  .Call(`_arrow_compute__GetFunctionNames`)
 }
 
-TimestampParser__MakeISO8601 <- function(){
-    .Call(`_arrow_TimestampParser__MakeISO8601`)
+build_info <- function() {
+  .Call(`_arrow_build_info`)
 }
 
-dataset___Dataset__NewScan <- function(ds){
-    .Call(`_arrow_dataset___Dataset__NewScan`, ds)
+runtime_info <- function() {
+  .Call(`_arrow_runtime_info`)
 }
 
-dataset___Dataset__schema <- function(dataset){
-    .Call(`_arrow_dataset___Dataset__schema`, dataset)
+csv___WriteOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___WriteOptions__initialize`, options)
 }
 
-dataset___Dataset__type_name <- function(dataset){
-    .Call(`_arrow_dataset___Dataset__type_name`, dataset)
+csv___ReadOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__initialize`, options)
 }
 
-dataset___Dataset__ReplaceSchema <- function(dataset, schm){
-    .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
+csv___ParseOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ParseOptions__initialize`, options)
 }
 
-dataset___UnionDataset__create <- function(datasets, schm){
-    .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
+csv___ReadOptions__column_names <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__column_names`, options)
 }
 
-dataset___InMemoryDataset__create <- function(table){
-    .Call(`_arrow_dataset___InMemoryDataset__create`, table)
+csv___ConvertOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ConvertOptions__initialize`, options)
 }
 
-dataset___UnionDataset__children <- function(ds){
-    .Call(`_arrow_dataset___UnionDataset__children`, ds)
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
+  .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
 }
 
-dataset___FileSystemDataset__format <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
+csv___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_csv___TableReader__Read`, table_reader)
 }
 
-dataset___FileSystemDataset__filesystem <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
+TimestampParser__kind <- function(parser) {
+  .Call(`_arrow_TimestampParser__kind`, parser)
 }
 
-dataset___FileSystemDataset__files <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
+TimestampParser__format <- function(parser) {
+  .Call(`_arrow_TimestampParser__format`, parser)
 }
 
-dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas){
-    .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
+TimestampParser__MakeStrptime <- function(format) {
+  .Call(`_arrow_TimestampParser__MakeStrptime`, format)
 }
 
-dataset___DatasetFactory__Finish2 <- function(factory, schema){
-    .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
+TimestampParser__MakeISO8601 <- function() {
+  .Call(`_arrow_TimestampParser__MakeISO8601`)
 }
 
-dataset___DatasetFactory__Inspect <- function(factory, unify_schemas){
-    .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
+csv___WriteCSV__Table <- function(table, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
 }
 
-dataset___UnionDatasetFactory__Make <- function(children){
-    .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
+csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
 }
 
-dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
+dataset___Dataset__NewScan <- function(ds) {
+  .Call(`_arrow_dataset___Dataset__NewScan`, ds)
 }
 
-dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
+dataset___Dataset__schema <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__schema`, dataset)
 }
 
-dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
+dataset___Dataset__type_name <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__type_name`, dataset)
 }
 
-dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
+dataset___Dataset__ReplaceSchema <- function(dataset, schm) {
+  .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
 }
 
-dataset___FileFormat__type_name <- function(format){
-    .Call(`_arrow_dataset___FileFormat__type_name`, format)
+dataset___UnionDataset__create <- function(datasets, schm) {
+  .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
 }
 
-dataset___FileFormat__DefaultWriteOptions <- function(fmt){
-    .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
+dataset___InMemoryDataset__create <- function(table) {
+  .Call(`_arrow_dataset___InMemoryDataset__create`, table)
 }
 
-dataset___ParquetFileFormat__Make <- function(options, dict_columns){
-    .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
+dataset___UnionDataset__children <- function(ds) {
+  .Call(`_arrow_dataset___UnionDataset__children`, ds)
 }
 
-dataset___FileWriteOptions__type_name <- function(options){
-    .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
+dataset___FileSystemDataset__format <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
 }
 
-dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props){
-    invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
+dataset___FileSystemDataset__filesystem <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
 }
 
-dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version){
-    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
+dataset___FileSystemDataset__files <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
 }
 
-dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version){
-    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
+dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
 }
 
-dataset___IpcFileFormat__Make <- function(){
-    .Call(`_arrow_dataset___IpcFileFormat__Make`)
+dataset___DatasetFactory__Finish2 <- function(factory, schema) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
 }
 
-dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options){
-    .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
+dataset___DatasetFactory__Inspect <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
 }
 
-dataset___FragmentScanOptions__type_name <- function(fragment_scan_options){
-    .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
+dataset___UnionDatasetFactory__Make <- function(children) {
+  .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
 }
 
-dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options){
-    .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
+dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
 }
 
-dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer){
-    .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
+dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
 }
 
-dataset___DirectoryPartitioning <- function(schm){
-    .Call(`_arrow_dataset___DirectoryPartitioning`, schm)
+dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
 }
 
-dataset___DirectoryPartitioning__MakeFactory <- function(field_names){
-    .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names)
+dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
 }
 
-dataset___HivePartitioning <- function(schm, null_fallback){
-    .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback)
+dataset___FileFormat__type_name <- function(format) {
+  .Call(`_arrow_dataset___FileFormat__type_name`, format)
 }
 
-dataset___HivePartitioning__MakeFactory <- function(null_fallback){
-    .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback)
+dataset___FileFormat__DefaultWriteOptions <- function(fmt) {
+  .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
 }
 
-dataset___ScannerBuilder__ProjectNames <- function(sb, cols){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
+dataset___ParquetFileFormat__Make <- function(options, dict_columns) {
+  .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
 }
 
-dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
+dataset___FileWriteOptions__type_name <- function(options) {
+  .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
 }
 
-dataset___ScannerBuilder__Filter <- function(sb, expr){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
+dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props) {
+  invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
 }
 
-dataset___ScannerBuilder__UseThreads <- function(sb, threads){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
+dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
 }
 
-dataset___ScannerBuilder__BatchSize <- function(sb, batch_size){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
+dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
 }
 
-dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
+dataset___CsvFileWriteOptions__update <- function(csv_options, write_options) {
+  invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
 }
 
-dataset___ScannerBuilder__schema <- function(sb){
-    .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
+dataset___IpcFileFormat__Make <- function() {
+  .Call(`_arrow_dataset___IpcFileFormat__Make`)
 }
 
-dataset___ScannerBuilder__Finish <- function(sb){
-    .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
+dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
 }
 
-dataset___Scanner__ToTable <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
+dataset___FragmentScanOptions__type_name <- function(fragment_scan_options) {
+  .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
 }
 
-dataset___Scanner__head <- function(scanner, n){
-    .Call(`_arrow_dataset___Scanner__head`, scanner, n)
+dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
 }
 
-dataset___Scanner__Scan <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__Scan`, scanner)
+dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer) {
+  .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
 }
 
-dataset___Scanner__schema <- function(sc){
-    .Call(`_arrow_dataset___Scanner__schema`, sc)
+dataset___DirectoryPartitioning <- function(schm, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
 }
 
-dataset___ScanTask__get_batches <- function(scan_task){
-    .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
+dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
 }
 
-dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner){
-    invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
+dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
 }
 
-Int8__initialize <- function(){
-    .Call(`_arrow_Int8__initialize`)
+dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
 }
 
-Int16__initialize <- function(){
-    .Call(`_arrow_Int16__initialize`)
+dataset___ScannerBuilder__ProjectNames <- function(sb, cols) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
 }
 
-Int32__initialize <- function(){
-    .Call(`_arrow_Int32__initialize`)
+dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
 }
 
-Int64__initialize <- function(){
-    .Call(`_arrow_Int64__initialize`)
+dataset___ScannerBuilder__Filter <- function(sb, expr) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
 }
 
-UInt8__initialize <- function(){
-    .Call(`_arrow_UInt8__initialize`)
+dataset___ScannerBuilder__UseThreads <- function(sb, threads) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
 }
 
-UInt16__initialize <- function(){
-    .Call(`_arrow_UInt16__initialize`)
+dataset___ScannerBuilder__UseAsync <- function(sb, use_async) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
 }
 
-UInt32__initialize <- function(){
-    .Call(`_arrow_UInt32__initialize`)
+dataset___ScannerBuilder__BatchSize <- function(sb, batch_size) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
 }
 
-UInt64__initialize <- function(){
-    .Call(`_arrow_UInt64__initialize`)
+dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
 }
 
-Float16__initialize <- function(){
-    .Call(`_arrow_Float16__initialize`)
+dataset___ScannerBuilder__schema <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
 }
 
-Float32__initialize <- function(){
-    .Call(`_arrow_Float32__initialize`)
+dataset___ScannerBuilder__Finish <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
 }
 
-Float64__initialize <- function(){
-    .Call(`_arrow_Float64__initialize`)
+dataset___Scanner__ToTable <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
 }
 
-Boolean__initialize <- function(){
-    .Call(`_arrow_Boolean__initialize`)
+dataset___Scanner__ScanBatches <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
 }
 
-Utf8__initialize <- function(){
-    .Call(`_arrow_Utf8__initialize`)
+dataset___Scanner__ToRecordBatchReader <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
 }
 
-LargeUtf8__initialize <- function(){
-    .Call(`_arrow_LargeUtf8__initialize`)
+dataset___Scanner__head <- function(scanner, n) {
+  .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
 
-Binary__initialize <- function(){
-    .Call(`_arrow_Binary__initialize`)
+dataset___Scanner__schema <- function(sc) {
+  .Call(`_arrow_dataset___Scanner__schema`, sc)
 }
 
-LargeBinary__initialize <- function(){
-    .Call(`_arrow_LargeBinary__initialize`)
+dataset___ScanTask__get_batches <- function(scan_task) {
+  .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
 }
 
-Date32__initialize <- function(){
-    .Call(`_arrow_Date32__initialize`)
+dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner) {
+  invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
 }
 
-Date64__initialize <- function(){
-    .Call(`_arrow_Date64__initialize`)
+dataset___Scanner__TakeRows <- function(scanner, indices) {
+  .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
 }
 
-Null__initialize <- function(){
-    .Call(`_arrow_Null__initialize`)
+dataset___Scanner__CountRows <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
 }
 
-Decimal128Type__initialize <- function(precision, scale){
-    .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
+Int8__initialize <- function() {
+  .Call(`_arrow_Int8__initialize`)
 }
 
-FixedSizeBinary__initialize <- function(byte_width){
-    .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
+Int16__initialize <- function() {
+  .Call(`_arrow_Int16__initialize`)
 }
 
-Timestamp__initialize <- function(unit, timezone){
-    .Call(`_arrow_Timestamp__initialize`, unit, timezone)
+Int32__initialize <- function() {
+  .Call(`_arrow_Int32__initialize`)
 }
 
-Time32__initialize <- function(unit){
-    .Call(`_arrow_Time32__initialize`, unit)
+Int64__initialize <- function() {
+  .Call(`_arrow_Int64__initialize`)
 }
 
-Time64__initialize <- function(unit){
-    .Call(`_arrow_Time64__initialize`, unit)
+UInt8__initialize <- function() {
+  .Call(`_arrow_UInt8__initialize`)
 }
 
-list__ <- function(x){
-    .Call(`_arrow_list__`, x)
+UInt16__initialize <- function() {
+  .Call(`_arrow_UInt16__initialize`)
 }
 
-large_list__ <- function(x){
-    .Call(`_arrow_large_list__`, x)
+UInt32__initialize <- function() {
+  .Call(`_arrow_UInt32__initialize`)
 }
 
-fixed_size_list__ <- function(x, list_size){
-    .Call(`_arrow_fixed_size_list__`, x, list_size)
+UInt64__initialize <- function() {
+  .Call(`_arrow_UInt64__initialize`)
 }
 
-struct__ <- function(fields){
-    .Call(`_arrow_struct__`, fields)
+Float16__initialize <- function() {
+  .Call(`_arrow_Float16__initialize`)
 }
 
-DataType__ToString <- function(type){
-    .Call(`_arrow_DataType__ToString`, type)
+Float32__initialize <- function() {
+  .Call(`_arrow_Float32__initialize`)
 }
 
-DataType__name <- function(type){
-    .Call(`_arrow_DataType__name`, type)
+Float64__initialize <- function() {
+  .Call(`_arrow_Float64__initialize`)
 }
 
-DataType__Equals <- function(lhs, rhs){
-    .Call(`_arrow_DataType__Equals`, lhs, rhs)
+Boolean__initialize <- function() {
+  .Call(`_arrow_Boolean__initialize`)
 }
 
-DataType__num_fields <- function(type){
-    .Call(`_arrow_DataType__num_fields`, type)
+Utf8__initialize <- function() {
+  .Call(`_arrow_Utf8__initialize`)
 }
 
-DataType__fields <- function(type){
-    .Call(`_arrow_DataType__fields`, type)
+LargeUtf8__initialize <- function() {
+  .Call(`_arrow_LargeUtf8__initialize`)
 }
 
-DataType__id <- function(type){
-    .Call(`_arrow_DataType__id`, type)
+Binary__initialize <- function() {
+  .Call(`_arrow_Binary__initialize`)
 }
 
-ListType__ToString <- function(type){
-    .Call(`_arrow_ListType__ToString`, type)
+LargeBinary__initialize <- function() {
+  .Call(`_arrow_LargeBinary__initialize`)
 }
 
-FixedWidthType__bit_width <- function(type){
-    .Call(`_arrow_FixedWidthType__bit_width`, type)
+Date32__initialize <- function() {
+  .Call(`_arrow_Date32__initialize`)
 }
 
-DateType__unit <- function(type){
-    .Call(`_arrow_DateType__unit`, type)
+Date64__initialize <- function() {
+  .Call(`_arrow_Date64__initialize`)
 }
 
-TimeType__unit <- function(type){
-    .Call(`_arrow_TimeType__unit`, type)
+Null__initialize <- function() {
+  .Call(`_arrow_Null__initialize`)
 }
 
-DecimalType__precision <- function(type){
-    .Call(`_arrow_DecimalType__precision`, type)
+Decimal128Type__initialize <- function(precision, scale) {
+  .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
 }
 
-DecimalType__scale <- function(type){
-    .Call(`_arrow_DecimalType__scale`, type)
+FixedSizeBinary__initialize <- function(byte_width) {
+  .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
 }
 
-TimestampType__timezone <- function(type){
-    .Call(`_arrow_TimestampType__timezone`, type)
+Timestamp__initialize <- function(unit, timezone) {
+  .Call(`_arrow_Timestamp__initialize`, unit, timezone)
 }
 
-TimestampType__unit <- function(type){
-    .Call(`_arrow_TimestampType__unit`, type)
+Time32__initialize <- function(unit) {
+  .Call(`_arrow_Time32__initialize`, unit)
 }
 
-DictionaryType__initialize <- function(index_type, value_type, ordered){
-    .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
+Time64__initialize <- function(unit) {
+  .Call(`_arrow_Time64__initialize`, unit)
 }
 
-DictionaryType__index_type <- function(type){
-    .Call(`_arrow_DictionaryType__index_type`, type)
+list__ <- function(x) {
+  .Call(`_arrow_list__`, x)
 }
 
-DictionaryType__value_type <- function(type){
-    .Call(`_arrow_DictionaryType__value_type`, type)
+large_list__ <- function(x) {
+  .Call(`_arrow_large_list__`, x)
 }
 
-DictionaryType__name <- function(type){
-    .Call(`_arrow_DictionaryType__name`, type)
+fixed_size_list__ <- function(x, list_size) {
+  .Call(`_arrow_fixed_size_list__`, x, list_size)
 }
 
-DictionaryType__ordered <- function(type){
-    .Call(`_arrow_DictionaryType__ordered`, type)
+struct__ <- function(fields) {
+  .Call(`_arrow_struct__`, fields)
 }
 
-StructType__GetFieldByName <- function(type, name){
-    .Call(`_arrow_StructType__GetFieldByName`, type, name)
+DataType__ToString <- function(type) {
+  .Call(`_arrow_DataType__ToString`, type)
 }
 
-StructType__GetFieldIndex <- function(type, name){
-    .Call(`_arrow_StructType__GetFieldIndex`, type, name)
+DataType__name <- function(type) {
+  .Call(`_arrow_DataType__name`, type)
 }
 
-StructType__field_names <- function(type){
-    .Call(`_arrow_StructType__field_names`, type)
+DataType__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_DataType__Equals`, lhs, rhs)
 }
 
-ListType__value_field <- function(type){
-    .Call(`_arrow_ListType__value_field`, type)
+DataType__num_fields <- function(type) {
+  .Call(`_arrow_DataType__num_fields`, type)
 }
 
-ListType__value_type <- function(type){
-    .Call(`_arrow_ListType__value_type`, type)
+DataType__fields <- function(type) {
+  .Call(`_arrow_DataType__fields`, type)
 }
 
-LargeListType__value_field <- function(type){
-    .Call(`_arrow_LargeListType__value_field`, type)
+DataType__id <- function(type) {
+  .Call(`_arrow_DataType__id`, type)
 }
 
-LargeListType__value_type <- function(type){
-    .Call(`_arrow_LargeListType__value_type`, type)
+ListType__ToString <- function(type) {
+  .Call(`_arrow_ListType__ToString`, type)
 }
 
-FixedSizeListType__value_field <- function(type){
-    .Call(`_arrow_FixedSizeListType__value_field`, type)
+FixedWidthType__bit_width <- function(type) {
+  .Call(`_arrow_FixedWidthType__bit_width`, type)
 }
 
-FixedSizeListType__value_type <- function(type){
-    .Call(`_arrow_FixedSizeListType__value_type`, type)
+DateType__unit <- function(type) {
+  .Call(`_arrow_DateType__unit`, type)
 }
 
-FixedSizeListType__list_size <- function(type){
-    .Call(`_arrow_FixedSizeListType__list_size`, type)
+TimeType__unit <- function(type) {
+  .Call(`_arrow_TimeType__unit`, type)
 }
 
-dataset___expr__call <- function(func_name, argument_list, options){
-    .Call(`_arrow_dataset___expr__call`, func_name, argument_list, options)
+DecimalType__precision <- function(type) {
+  .Call(`_arrow_DecimalType__precision`, type)
 }
 
-dataset___expr__field_ref <- function(name){
-    .Call(`_arrow_dataset___expr__field_ref`, name)
+DecimalType__scale <- function(type) {
+  .Call(`_arrow_DecimalType__scale`, type)
 }
 
-dataset___expr__get_field_ref_name <- function(ref){
-    .Call(`_arrow_dataset___expr__get_field_ref_name`, ref)
+TimestampType__timezone <- function(type) {
+  .Call(`_arrow_TimestampType__timezone`, type)
 }
 
-dataset___expr__scalar <- function(x){
-    .Call(`_arrow_dataset___expr__scalar`, x)
+TimestampType__unit <- function(type) {
+  .Call(`_arrow_TimestampType__unit`, type)
 }
 
-dataset___expr__ToString <- function(x){
-    .Call(`_arrow_dataset___expr__ToString`, x)
+DictionaryType__initialize <- function(index_type, value_type, ordered) {
+  .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
 }
 
-ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
-    invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
+DictionaryType__index_type <- function(type) {
+  .Call(`_arrow_DictionaryType__index_type`, type)
 }
 
-ipc___feather___Reader__version <- function(reader){
-    .Call(`_arrow_ipc___feather___Reader__version`, reader)
+DictionaryType__value_type <- function(type) {
+  .Call(`_arrow_DictionaryType__value_type`, type)
 }
 
-ipc___feather___Reader__Read <- function(reader, columns){
-    .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
+DictionaryType__name <- function(type) {
+  .Call(`_arrow_DictionaryType__name`, type)
 }
 
-ipc___feather___Reader__Open <- function(stream){
-    .Call(`_arrow_ipc___feather___Reader__Open`, stream)
+DictionaryType__ordered <- function(type) {
+  .Call(`_arrow_DictionaryType__ordered`, type)
 }
 
-ipc___feather___Reader__schema <- function(reader){
-    .Call(`_arrow_ipc___feather___Reader__schema`, reader)
+StructType__GetFieldByName <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldByName`, type, name)
 }
 
-Field__initialize <- function(name, field, nullable){
-    .Call(`_arrow_Field__initialize`, name, field, nullable)
+StructType__GetFieldIndex <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldIndex`, type, name)
 }
 
-Field__ToString <- function(field){
-    .Call(`_arrow_Field__ToString`, field)
+StructType__field_names <- function(type) {
+  .Call(`_arrow_StructType__field_names`, type)
 }
 
-Field__name <- function(field){
-    .Call(`_arrow_Field__name`, field)
+ListType__value_field <- function(type) {
+  .Call(`_arrow_ListType__value_field`, type)
 }
 
-Field__Equals <- function(field, other){
-    .Call(`_arrow_Field__Equals`, field, other)
+ListType__value_type <- function(type) {
+  .Call(`_arrow_ListType__value_type`, type)
 }
 
-Field__nullable <- function(field){
-    .Call(`_arrow_Field__nullable`, field)
+LargeListType__value_field <- function(type) {
+  .Call(`_arrow_LargeListType__value_field`, type)
 }
 
-Field__type <- function(field){
-    .Call(`_arrow_Field__type`, field)
+LargeListType__value_type <- function(type) {
+  .Call(`_arrow_LargeListType__value_type`, type)
 }
 
-fs___FileInfo__type <- function(x){
-    .Call(`_arrow_fs___FileInfo__type`, x)
+FixedSizeListType__value_field <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_field`, type)
 }
 
-fs___FileInfo__set_type <- function(x, type){
-    invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
+FixedSizeListType__value_type <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_type`, type)
 }
 
-fs___FileInfo__path <- function(x){
-    .Call(`_arrow_fs___FileInfo__path`, x)
+FixedSizeListType__list_size <- function(type) {
+  .Call(`_arrow_FixedSizeListType__list_size`, type)
 }
 
-fs___FileInfo__set_path <- function(x, path){
-    invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
+compute___expr__call <- function(func_name, argument_list, options) {
+  .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
 
-fs___FileInfo__size <- function(x){
-    .Call(`_arrow_fs___FileInfo__size`, x)
+field_names_in_expression <- function(x) {
+  .Call(`_arrow_field_names_in_expression`, x)
 }
 
-fs___FileInfo__set_size <- function(x, size){
-    invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
+compute___expr__get_field_ref_name <- function(x) {
+  .Call(`_arrow_compute___expr__get_field_ref_name`, x)
 }
 
-fs___FileInfo__base_name <- function(x){
-    .Call(`_arrow_fs___FileInfo__base_name`, x)
+compute___expr__field_ref <- function(name) {
+  .Call(`_arrow_compute___expr__field_ref`, name)
 }
 
-fs___FileInfo__extension <- function(x){
-    .Call(`_arrow_fs___FileInfo__extension`, x)
+compute___expr__scalar <- function(x) {
+  .Call(`_arrow_compute___expr__scalar`, x)
 }
 
-fs___FileInfo__mtime <- function(x){
-    .Call(`_arrow_fs___FileInfo__mtime`, x)
+compute___expr__ToString <- function(x) {
+  .Call(`_arrow_compute___expr__ToString`, x)
 }
 
-fs___FileInfo__set_mtime <- function(x, time){
-    invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
+compute___expr__type <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type`, x, schema)
 }
 
-fs___FileSelector__base_dir <- function(selector){
-    .Call(`_arrow_fs___FileSelector__base_dir`, selector)
+compute___expr__type_id <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type_id`, x, schema)
 }
 
-fs___FileSelector__allow_not_found <- function(selector){
-    .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
+ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level) {
+  invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
 }
 
-fs___FileSelector__recursive <- function(selector){
-    .Call(`_arrow_fs___FileSelector__recursive`, selector)
+ipc___feather___Reader__version <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__version`, reader)
 }
 
-fs___FileSelector__create <- function(base_dir, allow_not_found, recursive){
-    .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
+ipc___feather___Reader__Read <- function(reader, columns) {
+  .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
 }
 
-fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths){
-    .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
+ipc___feather___Reader__Open <- function(stream) {
+  .Call(`_arrow_ipc___feather___Reader__Open`, stream)
 }
 
-fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector){
-    .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
+ipc___feather___Reader__schema <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__schema`, reader)
 }
 
-fs___FileSystem__CreateDir <- function(file_system, path, recursive){
-    invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
+Field__initialize <- function(name, field, nullable) {
+  .Call(`_arrow_Field__initialize`, name, field, nullable)
 }
 
-fs___FileSystem__DeleteDir <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
+Field__ToString <- function(field) {
+  .Call(`_arrow_Field__ToString`, field)
 }
 
-fs___FileSystem__DeleteDirContents <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
+Field__name <- function(field) {
+  .Call(`_arrow_Field__name`, field)
 }
 
-fs___FileSystem__DeleteFile <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
+Field__Equals <- function(field, other) {
+  .Call(`_arrow_Field__Equals`, field, other)
 }
 
-fs___FileSystem__DeleteFiles <- function(file_system, paths){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
+Field__nullable <- function(field) {
+  .Call(`_arrow_Field__nullable`, field)
 }
 
-fs___FileSystem__Move <- function(file_system, src, dest){
-    invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
+Field__type <- function(field) {
+  .Call(`_arrow_Field__type`, field)
 }
 
-fs___FileSystem__CopyFile <- function(file_system, src, dest){
-    invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
+fs___FileInfo__type <- function(x) {
+  .Call(`_arrow_fs___FileInfo__type`, x)
 }
 
-fs___FileSystem__OpenInputStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
+fs___FileInfo__set_type <- function(x, type) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
 }
 
-fs___FileSystem__OpenInputFile <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
+fs___FileInfo__path <- function(x) {
+  .Call(`_arrow_fs___FileInfo__path`, x)
 }
 
-fs___FileSystem__OpenOutputStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
+fs___FileInfo__set_path <- function(x, path) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
 }
 
-fs___FileSystem__OpenAppendStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
+fs___FileInfo__size <- function(x) {
+  .Call(`_arrow_fs___FileInfo__size`, x)
 }
 
-fs___FileSystem__type_name <- function(file_system){
-    .Call(`_arrow_fs___FileSystem__type_name`, file_system)
+fs___FileInfo__set_size <- function(x, size) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
 }
 
-fs___LocalFileSystem__create <- function(){
-    .Call(`_arrow_fs___LocalFileSystem__create`)
+fs___FileInfo__base_name <- function(x) {
+  .Call(`_arrow_fs___FileInfo__base_name`, x)
 }
 
-fs___SubTreeFileSystem__create <- function(base_path, base_fs){
-    .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
+fs___FileInfo__extension <- function(x) {
+  .Call(`_arrow_fs___FileInfo__extension`, x)
 }
 
-fs___SubTreeFileSystem__base_fs <- function(file_system){
-    .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
+fs___FileInfo__mtime <- function(x) {
+  .Call(`_arrow_fs___FileInfo__mtime`, x)
 }
 
-fs___SubTreeFileSystem__base_path <- function(file_system){
-    .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
+fs___FileInfo__set_mtime <- function(x, time) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
 }
 
-fs___FileSystemFromUri <- function(path){
-    .Call(`_arrow_fs___FileSystemFromUri`, path)
+fs___FileSelector__base_dir <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__base_dir`, selector)
 }
 
-fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads){
-    invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
+fs___FileSelector__allow_not_found <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
 }
 
-fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes){
-    .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
+fs___FileSelector__recursive <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__recursive`, selector)
 }
 
-fs___S3FileSystem__region <- function(fs){
-    .Call(`_arrow_fs___S3FileSystem__region`, fs)
+fs___FileSelector__create <- function(base_dir, allow_not_found, recursive) {
+  .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
 }
 
-io___Readable__Read <- function(x, nbytes){
-    .Call(`_arrow_io___Readable__Read`, x, nbytes)
+fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
 }
 
-io___InputStream__Close <- function(x){
-    invisible(.Call(`_arrow_io___InputStream__Close`, x))
+fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
 }
 
-io___OutputStream__Close <- function(x){
-    invisible(.Call(`_arrow_io___OutputStream__Close`, x))
+fs___FileSystem__CreateDir <- function(file_system, path, recursive) {
+  invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
 }
 
-io___RandomAccessFile__GetSize <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
+fs___FileSystem__DeleteDir <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
 }
 
-io___RandomAccessFile__supports_zero_copy <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
+fs___FileSystem__DeleteDirContents <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
 }
 
-io___RandomAccessFile__Seek <- function(x, position){
-    invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
+fs___FileSystem__DeleteFile <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
 }
 
-io___RandomAccessFile__Tell <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__Tell`, x)
+fs___FileSystem__DeleteFiles <- function(file_system, paths) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
 }
 
-io___RandomAccessFile__Read0 <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__Read0`, x)
+fs___FileSystem__Move <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
 }
 
-io___RandomAccessFile__ReadAt <- function(x, position, nbytes){
-    .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
+fs___FileSystem__CopyFile <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
 }
 
-io___MemoryMappedFile__Create <- function(path, size){
-    .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
+fs___FileSystem__OpenInputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
 }
 
-io___MemoryMappedFile__Open <- function(path, mode){
-    .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
+fs___FileSystem__OpenInputFile <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
 }
 
-io___MemoryMappedFile__Resize <- function(x, size){
-    invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
+fs___FileSystem__OpenOutputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
 }
 
-io___ReadableFile__Open <- function(path){
-    .Call(`_arrow_io___ReadableFile__Open`, path)
+fs___FileSystem__OpenAppendStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
 }
 
-io___BufferReader__initialize <- function(buffer){
-    .Call(`_arrow_io___BufferReader__initialize`, buffer)
+fs___FileSystem__type_name <- function(file_system) {
+  .Call(`_arrow_fs___FileSystem__type_name`, file_system)
 }
 
-io___Writable__write <- function(stream, buf){
-    invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
+fs___LocalFileSystem__create <- function() {
+  .Call(`_arrow_fs___LocalFileSystem__create`)
 }
 
-io___OutputStream__Tell <- function(stream){
-    .Call(`_arrow_io___OutputStream__Tell`, stream)
+fs___SubTreeFileSystem__create <- function(base_path, base_fs) {
+  .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
 }
 
-io___FileOutputStream__Open <- function(path){
-    .Call(`_arrow_io___FileOutputStream__Open`, path)
+fs___SubTreeFileSystem__base_fs <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
 }
 
-io___BufferOutputStream__Create <- function(initial_capacity){
-    .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
+fs___SubTreeFileSystem__base_path <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
 }
 
-io___BufferOutputStream__capacity <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
+fs___FileSystemFromUri <- function(path) {
+  .Call(`_arrow_fs___FileSystemFromUri`, path)
 }
 
-io___BufferOutputStream__Finish <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
+fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads) {
+  invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
 }
 
-io___BufferOutputStream__Tell <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
+fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) {
+  .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
 }
 
-io___BufferOutputStream__Write <- function(stream, bytes){
-    invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
+fs___S3FileSystem__region <- function(fs) {
+  .Call(`_arrow_fs___S3FileSystem__region`, fs)
 }
 
-json___ReadOptions__initialize <- function(use_threads, block_size){
-    .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
+io___Readable__Read <- function(x, nbytes) {
+  .Call(`_arrow_io___Readable__Read`, x, nbytes)
 }
 
-json___ParseOptions__initialize <- function(newlines_in_values){
-    .Call(`_arrow_json___ParseOptions__initialize`, newlines_in_values)
+io___InputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___InputStream__Close`, x))
 }
 
-json___TableReader__Make <- function(input, read_options, parse_options){
-    .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
+io___OutputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___OutputStream__Close`, x))
 }
 
-json___TableReader__Read <- function(table_reader){
-    .Call(`_arrow_json___TableReader__Read`, table_reader)
+io___RandomAccessFile__GetSize <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
 }
 
-MemoryPool__default <- function(){
-    .Call(`_arrow_MemoryPool__default`)
+io___RandomAccessFile__supports_zero_copy <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
 }
 
-MemoryPool__bytes_allocated <- function(pool){
-    .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
+io___RandomAccessFile__Seek <- function(x, position) {
+  invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
 }
 
-MemoryPool__max_memory <- function(pool){
-    .Call(`_arrow_MemoryPool__max_memory`, pool)
+io___RandomAccessFile__Tell <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Tell`, x)
 }
 
-MemoryPool__backend_name <- function(pool){
-    .Call(`_arrow_MemoryPool__backend_name`, pool)
+io___RandomAccessFile__Read0 <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Read0`, x)
 }
 
-supported_memory_backends <- function(){
-    .Call(`_arrow_supported_memory_backends`)
+io___RandomAccessFile__ReadAt <- function(x, position, nbytes) {
+  .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
 }
 
-ipc___Message__body_length <- function(message){
-    .Call(`_arrow_ipc___Message__body_length`, message)
+io___MemoryMappedFile__Create <- function(path, size) {
+  .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
 }
 
-ipc___Message__metadata <- function(message){
-    .Call(`_arrow_ipc___Message__metadata`, message)
+io___MemoryMappedFile__Open <- function(path, mode) {
+  .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
 }
 
-ipc___Message__body <- function(message){
-    .Call(`_arrow_ipc___Message__body`, message)
+io___MemoryMappedFile__Resize <- function(x, size) {
+  invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
 }
 
-ipc___Message__Verify <- function(message){
-    .Call(`_arrow_ipc___Message__Verify`, message)
+io___ReadableFile__Open <- function(path) {
+  .Call(`_arrow_io___ReadableFile__Open`, path)
 }
 
-ipc___Message__type <- function(message){
-    .Call(`_arrow_ipc___Message__type`, message)
+io___BufferReader__initialize <- function(buffer) {
+  .Call(`_arrow_io___BufferReader__initialize`, buffer)
 }
 
-ipc___Message__Equals <- function(x, y){
-    .Call(`_arrow_ipc___Message__Equals`, x, y)
+io___Writable__write <- function(stream, buf) {
+  invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
 }
 
-ipc___ReadRecordBatch__Message__Schema <- function(message, schema){
-    .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
+io___OutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___OutputStream__Tell`, stream)
 }
 
-ipc___ReadSchema_InputStream <- function(stream){
-    .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
+io___FileOutputStream__Open <- function(path) {
+  .Call(`_arrow_io___FileOutputStream__Open`, path)
 }
 
-ipc___ReadSchema_Message <- function(message){
-    .Call(`_arrow_ipc___ReadSchema_Message`, message)
+io___BufferOutputStream__Create <- function(initial_capacity) {
+  .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
 }
 
-ipc___MessageReader__Open <- function(stream){
-    .Call(`_arrow_ipc___MessageReader__Open`, stream)
+io___BufferOutputStream__capacity <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
 }
 
-ipc___MessageReader__ReadNextMessage <- function(reader){
-    .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
+io___BufferOutputStream__Finish <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
 }
 
-ipc___ReadMessage <- function(stream){
-    .Call(`_arrow_ipc___ReadMessage`, stream)
+io___BufferOutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
 }
 
-parquet___arrow___ArrowReaderProperties__Make <- function(use_threads){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
+io___BufferOutputStream__Write <- function(stream, bytes) {
+  invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
 }
 
-parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads){
-    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
+json___ReadOptions__initialize <- function(use_threads, block_size) {
+  .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
 }
 
-parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
+json___ParseOptions__initialize1 <- function(newlines_in_values) {
+  .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
 }
 
-parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema) {
+  .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
 }
 
-parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict){
-    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
+json___TableReader__Make <- function(input, read_options, parse_options) {
+  .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
 }
 
-parquet___arrow___FileReader__OpenFile <- function(file, props){
-    .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
+json___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_json___TableReader__Read`, table_reader)
 }
 
-parquet___arrow___FileReader__ReadTable1 <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
+MemoryPool__default <- function() {
+  .Call(`_arrow_MemoryPool__default`)
 }
 
-parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
+MemoryPool__bytes_allocated <- function(pool) {
+  .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
 }
 
-parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
+MemoryPool__max_memory <- function(pool) {
+  .Call(`_arrow_MemoryPool__max_memory`, pool)
 }
 
-parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
+MemoryPool__backend_name <- function(pool) {
+  .Call(`_arrow_MemoryPool__backend_name`, pool)
 }
 
-parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
+supported_memory_backends <- function() {
+  .Call(`_arrow_supported_memory_backends`)
 }
 
-parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
+ipc___Message__body_length <- function(message) {
+  .Call(`_arrow_ipc___Message__body_length`, message)
 }
 
-parquet___arrow___FileReader__num_rows <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
+ipc___Message__metadata <- function(message) {
+  .Call(`_arrow_ipc___Message__metadata`, message)
 }
 
-parquet___arrow___FileReader__num_columns <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
+ipc___Message__body <- function(message) {
+  .Call(`_arrow_ipc___Message__body`, message)
 }
 
-parquet___arrow___FileReader__num_row_groups <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
+ipc___Message__Verify <- function(message) {
+  .Call(`_arrow_ipc___Message__Verify`, message)
 }
 
-parquet___arrow___FileReader__ReadColumn <- function(reader, i){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
+ipc___Message__type <- function(message) {
+  .Call(`_arrow_ipc___Message__type`, message)
 }
 
-parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit){
-    .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
+ipc___Message__Equals <- function(x, y) {
+  .Call(`_arrow_ipc___Message__Equals`, x, y)
 }
 
-parquet___WriterProperties___Builder__create <- function(){
-    .Call(`_arrow_parquet___WriterProperties___Builder__create`)
+ipc___ReadRecordBatch__Message__Schema <- function(message, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
 }
 
-parquet___WriterProperties___Builder__version <- function(builder, version){
-    invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
+ipc___ReadSchema_InputStream <- function(stream) {
+  .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
 }
 
-parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
+ipc___ReadSchema_Message <- function(message) {
+  .Call(`_arrow_ipc___ReadSchema_Message`, message)
 }
 
-parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
+ipc___MessageReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___MessageReader__Open`, stream)
 }
 
-parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
+ipc___MessageReader__ReadNextMessage <- function(reader) {
+  .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
 }
 
-parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
+ipc___ReadMessage <- function(stream) {
+  .Call(`_arrow_ipc___ReadMessage`, stream)
 }
 
-parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
+parquet___arrow___ArrowReaderProperties__Make <- function(use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
 }
 
-parquet___WriterProperties___Builder__build <- function(builder){
-    .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
+parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
 }
 
-parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties){
-    .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
+parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
 }
 
-parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size){
-    invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
+parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
 }
 
-parquet___arrow___FileWriter__Close <- function(writer){
-    invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
+parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
 }
 
-parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties){
-    invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
+parquet___arrow___FileReader__OpenFile <- function(file, props) {
+  .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
 }
 
-parquet___arrow___FileReader__GetSchema <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
+parquet___arrow___FileReader__ReadTable1 <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
 }
 
-ImportArray <- function(array, schema){
-    .Call(`_arrow_ImportArray`, array, schema)
+parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
 }
 
-ImportRecordBatch <- function(array, schema){
-    .Call(`_arrow_ImportRecordBatch`, array, schema)
+parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
 }
 
-ImportSchema <- function(schema){
-    .Call(`_arrow_ImportSchema`, schema)
+parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
 }
 
-allocate_arrow_schema <- function(){
-    .Call(`_arrow_allocate_arrow_schema`)
+parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
 }
 
-delete_arrow_schema <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
 }
 
-allocate_arrow_array <- function(){
-    .Call(`_arrow_allocate_arrow_array`)
+parquet___arrow___FileReader__num_rows <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
 }
 
-delete_arrow_array <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+parquet___arrow___FileReader__num_columns <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
 }
 
-ExportType <- function(type, ptr){
-    invisible(.Call(`_arrow_ExportType`, type, ptr))
+parquet___arrow___FileReader__num_row_groups <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
 }
 
-ExportSchema <- function(schema, ptr){
-    invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
+parquet___arrow___FileReader__ReadColumn <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
 }
 
-ExportArray <- function(array, array_ptr, schema_ptr){
-    invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
+parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) {
+  .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
 }
 
-ExportRecordBatch <- function(batch, array_ptr, schema_ptr){
-    invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
+parquet___WriterProperties___Builder__create <- function() {
+  .Call(`_arrow_parquet___WriterProperties___Builder__create`)
 }
 
-vec_to_arrow <- function(x, s_type){
-    .Call(`_arrow_vec_to_arrow`, x, s_type)
+parquet___WriterProperties___Builder__version <- function(builder, version) {
+  invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
 }
 
-DictionaryArray__FromArrays <- function(type, indices, dict){
-    .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
+parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
 }
 
-RecordBatch__num_columns <- function(x){
-    .Call(`_arrow_RecordBatch__num_columns`, x)
+parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
 }
 
-RecordBatch__num_rows <- function(x){
-    .Call(`_arrow_RecordBatch__num_rows`, x)
+parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
 }
 
-RecordBatch__schema <- function(x){
-    .Call(`_arrow_RecordBatch__schema`, x)
+parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
 }
 
-RecordBatch__RenameColumns <- function(batch, names){
-    .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
+parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
 }
 
-RecordBatch__ReplaceSchemaMetadata <- function(x, metadata){
-    .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
+parquet___WriterProperties___Builder__build <- function(builder) {
+  .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
 }
 
-RecordBatch__columns <- function(batch){
-    .Call(`_arrow_RecordBatch__columns`, batch)
+parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties) {
+  .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
 }
 
-RecordBatch__column <- function(batch, i){
-    .Call(`_arrow_RecordBatch__column`, batch, i)
+parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
 }
 
-RecordBatch__GetColumnByName <- function(batch, name){
-    .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
+parquet___arrow___FileWriter__Close <- function(writer) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
 }
 
-RecordBatch__SelectColumns <- function(batch, indices){
-    .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
+parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties) {
+  invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
 }
 
-RecordBatch__Equals <- function(self, other, check_metadata){
-    .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
+parquet___arrow___FileReader__GetSchema <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
 }
 
-RecordBatch__AddColumn <- function(batch, i, field, column){
-    .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
+allocate_arrow_schema <- function() {
+  .Call(`_arrow_allocate_arrow_schema`)
 }
 
-RecordBatch__SetColumn <- function(batch, i, field, column){
-    .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
+delete_arrow_schema <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
 }
 
-RecordBatch__RemoveColumn <- function(batch, i){
-    .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
+allocate_arrow_array <- function() {
+  .Call(`_arrow_allocate_arrow_array`)
 }
 
-RecordBatch__column_name <- function(batch, i){
-    .Call(`_arrow_RecordBatch__column_name`, batch, i)
+delete_arrow_array <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array`, ptr))
 }
 
-RecordBatch__names <- function(batch){
-    .Call(`_arrow_RecordBatch__names`, batch)
+allocate_arrow_array_stream <- function() {
+  .Call(`_arrow_allocate_arrow_array_stream`)
 }
 
-RecordBatch__Slice1 <- function(self, offset){
-    .Call(`_arrow_RecordBatch__Slice1`, self, offset)
+delete_arrow_array_stream <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
 }
 
-RecordBatch__Slice2 <- function(self, offset, length){
-    .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
+ImportArray <- function(array, schema) {
+  .Call(`_arrow_ImportArray`, array, schema)
 }
 
-ipc___SerializeRecordBatch__Raw <- function(batch){
-    .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
+ImportRecordBatch <- function(array, schema) {
+  .Call(`_arrow_ImportRecordBatch`, array, schema)
 }
 
-ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema){
-    .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
+ImportSchema <- function(schema) {
+  .Call(`_arrow_ImportSchema`, schema)
 }
 
-RecordBatch__from_arrays <- function(schema_sxp, lst){
-    .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
+ImportField <- function(field) {
+  .Call(`_arrow_ImportField`, field)
 }
 
-RecordBatchReader__schema <- function(reader){
-    .Call(`_arrow_RecordBatchReader__schema`, reader)
+ImportType <- function(type) {
+  .Call(`_arrow_ImportType`, type)
 }
 
-RecordBatchReader__ReadNext <- function(reader){
-    .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
+ImportRecordBatchReader <- function(stream) {
+  .Call(`_arrow_ImportRecordBatchReader`, stream)
 }
 
-ipc___RecordBatchStreamReader__Open <- function(stream){
-    .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
+ExportType <- function(type, ptr) {
+  invisible(.Call(`_arrow_ExportType`, type, ptr))
 }
 
-ipc___RecordBatchStreamReader__batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchStreamReader__batches`, reader)
+ExportField <- function(field, ptr) {
+  invisible(.Call(`_arrow_ExportField`, field, ptr))
 }
 
-ipc___RecordBatchFileReader__schema <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
+ExportSchema <- function(schema, ptr) {
+  invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
 }
 
-ipc___RecordBatchFileReader__num_record_batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
+ExportArray <- function(array, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
 }
 
-ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i){
-    .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
+ExportRecordBatch <- function(batch, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
 }
 
-ipc___RecordBatchFileReader__Open <- function(file){
-    .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
+ExportRecordBatchReader <- function(reader, stream_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
 }
 
-Table__from_RecordBatchReader <- function(reader){
-    .Call(`_arrow_Table__from_RecordBatchReader`, reader)
+Table__from_dots <- function(lst, schema_sxp, use_threads) {
+  .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
 }
 
-Table__from_RecordBatchFileReader <- function(reader){
-    .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
+vec_to_arrow <- function(x, s_type) {
+  .Call(`_arrow_vec_to_arrow`, x, s_type)
 }
 
-ipc___RecordBatchFileReader__batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
+DictionaryArray__FromArrays <- function(type, indices, dict) {
+  .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
 }
 
-ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
+RecordBatch__num_columns <- function(x) {
+  .Call(`_arrow_RecordBatch__num_columns`, x)
 }
 
-ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
+RecordBatch__num_rows <- function(x) {
+  .Call(`_arrow_RecordBatch__num_rows`, x)
 }
 
-ipc___RecordBatchWriter__Close <- function(batch_writer){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
+RecordBatch__schema <- function(x) {
+  .Call(`_arrow_RecordBatch__schema`, x)
 }
 
-ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
-    .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+RecordBatch__RenameColumns <- function(batch, names) {
+  .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
 }
 
-ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
-    .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+RecordBatch__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
 }
 
-runtime_info <- function(){
-    .Call(`_arrow_runtime_info`)
+RecordBatch__columns <- function(batch) {
+  .Call(`_arrow_RecordBatch__columns`, batch)
 }
 
-Array__GetScalar <- function(x, i){
-    .Call(`_arrow_Array__GetScalar`, x, i)
+RecordBatch__column <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column`, batch, i)
 }
 
-Scalar__ToString <- function(s){
-    .Call(`_arrow_Scalar__ToString`, s)
+RecordBatch__GetColumnByName <- function(batch, name) {
+  .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
 }
 
-StructScalar__field <- function(s, i){
-    .Call(`_arrow_StructScalar__field`, s, i)
+RecordBatch__SelectColumns <- function(batch, indices) {
+  .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
 }
 
-StructScalar__GetFieldByName <- function(s, name){
-    .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
+RecordBatch__Equals <- function(self, other, check_metadata) {
+  .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
 }
 
-Scalar__as_vector <- function(scalar){
-    .Call(`_arrow_Scalar__as_vector`, scalar)
+RecordBatch__AddColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
 }
 
-MakeArrayFromScalar <- function(scalar){
-    .Call(`_arrow_MakeArrayFromScalar`, scalar)
+RecordBatch__SetColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
 }
 
-Scalar__is_valid <- function(s){
-    .Call(`_arrow_Scalar__is_valid`, s)
+RecordBatch__RemoveColumn <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
 }
 
-Scalar__type <- function(s){
-    .Call(`_arrow_Scalar__type`, s)
+RecordBatch__column_name <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column_name`, batch, i)
 }
 
-Scalar__Equals <- function(lhs, rhs){
-    .Call(`_arrow_Scalar__Equals`, lhs, rhs)
+RecordBatch__names <- function(batch) {
+  .Call(`_arrow_RecordBatch__names`, batch)
 }
 
-Scalar__ApproxEquals <- function(lhs, rhs){
-    .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
+RecordBatch__Slice1 <- function(self, offset) {
+  .Call(`_arrow_RecordBatch__Slice1`, self, offset)
 }
 
-schema_ <- function(fields){
-    .Call(`_arrow_schema_`, fields)
+RecordBatch__Slice2 <- function(self, offset, length) {
+  .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
 }
 
-Schema__ToString <- function(s){
-    .Call(`_arrow_Schema__ToString`, s)
+ipc___SerializeRecordBatch__Raw <- function(batch) {
+  .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
 }
 
-Schema__num_fields <- function(s){
-    .Call(`_arrow_Schema__num_fields`, s)
+ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
 }
 
-Schema__field <- function(s, i){
-    .Call(`_arrow_Schema__field`, s, i)
+RecordBatch__from_arrays <- function(schema_sxp, lst) {
+  .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
 }
 
-Schema__AddField <- function(s, i, field){
-    .Call(`_arrow_Schema__AddField`, s, i, field)
+RecordBatchReader__schema <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__schema`, reader)
 }
 
-Schema__SetField <- function(s, i, field){
-    .Call(`_arrow_Schema__SetField`, s, i, field)
+RecordBatchReader__ReadNext <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
 }
 
-Schema__RemoveField <- function(s, i){
-    .Call(`_arrow_Schema__RemoveField`, s, i)
+RecordBatchReader__batches <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__batches`, reader)
 }
 
-Schema__GetFieldByName <- function(s, x){
-    .Call(`_arrow_Schema__GetFieldByName`, s, x)
+Table__from_RecordBatchReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchReader`, reader)
 }
 
-Schema__fields <- function(schema){
-    .Call(`_arrow_Schema__fields`, schema)
+ipc___RecordBatchStreamReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
 }
 
-Schema__field_names <- function(schema){
-    .Call(`_arrow_Schema__field_names`, schema)
+ipc___RecordBatchFileReader__schema <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
 }
 
-Schema__HasMetadata <- function(schema){
-    .Call(`_arrow_Schema__HasMetadata`, schema)
+ipc___RecordBatchFileReader__num_record_batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
 }
 
-Schema__metadata <- function(schema){
-    .Call(`_arrow_Schema__metadata`, schema)
+ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
 }
 
-Schema__WithMetadata <- function(schema, metadata){
-    .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
+ipc___RecordBatchFileReader__Open <- function(file) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
 }
 
-Schema__serialize <- function(schema){
-    .Call(`_arrow_Schema__serialize`, schema)
+Table__from_RecordBatchFileReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
 }
 
-Schema__Equals <- function(schema, other, check_metadata){
-    .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
+ipc___RecordBatchFileReader__batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
 }
 
-arrow__UnifySchemas <- function(schemas){
-    .Call(`_arrow_arrow__UnifySchemas`, schemas)
+ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
 }
 
-Table__num_columns <- function(x){
-    .Call(`_arrow_Table__num_columns`, x)
+ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
 }
 
-Table__num_rows <- function(x){
-    .Call(`_arrow_Table__num_rows`, x)
+ipc___RecordBatchWriter__Close <- function(batch_writer) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
 }
 
-Table__schema <- function(x){
-    .Call(`_arrow_Table__schema`, x)
+ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-Table__ReplaceSchemaMetadata <- function(x, metadata){
-    .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
+ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-Table__column <- function(table, i){
-    .Call(`_arrow_Table__column`, table, i)
+Array__GetScalar <- function(x, i) {
+  .Call(`_arrow_Array__GetScalar`, x, i)
 }
 
-Table__field <- function(table, i){
-    .Call(`_arrow_Table__field`, table, i)
+Scalar__ToString <- function(s) {
+  .Call(`_arrow_Scalar__ToString`, s)
 }
 
-Table__columns <- function(table){
-    .Call(`_arrow_Table__columns`, table)
+StructScalar__field <- function(s, i) {
+  .Call(`_arrow_StructScalar__field`, s, i)
 }
 
-Table__ColumnNames <- function(table){
-    .Call(`_arrow_Table__ColumnNames`, table)
+StructScalar__GetFieldByName <- function(s, name) {
+  .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
 }
 
-Table__RenameColumns <- function(table, names){
-    .Call(`_arrow_Table__RenameColumns`, table, names)
+Scalar__as_vector <- function(scalar) {
+  .Call(`_arrow_Scalar__as_vector`, scalar)
 }
 
-Table__Slice1 <- function(table, offset){
-    .Call(`_arrow_Table__Slice1`, table, offset)
+MakeArrayFromScalar <- function(scalar, n) {
+  .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
 }
 
-Table__Slice2 <- function(table, offset, length){
-    .Call(`_arrow_Table__Slice2`, table, offset, length)
+Scalar__is_valid <- function(s) {
+  .Call(`_arrow_Scalar__is_valid`, s)
 }
 
-Table__Equals <- function(lhs, rhs, check_metadata){
-    .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
+Scalar__type <- function(s) {
+  .Call(`_arrow_Scalar__type`, s)
 }
 
-Table__Validate <- function(table){
-    .Call(`_arrow_Table__Validate`, table)
+Scalar__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__Equals`, lhs, rhs)
 }
 
-Table__ValidateFull <- function(table){
-    .Call(`_arrow_Table__ValidateFull`, table)
+Scalar__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
 }
 
-Table__GetColumnByName <- function(table, name){
-    .Call(`_arrow_Table__GetColumnByName`, table, name)
+schema_ <- function(fields) {
+  .Call(`_arrow_schema_`, fields)
 }
 
-Table__RemoveColumn <- function(table, i){
-    .Call(`_arrow_Table__RemoveColumn`, table, i)
+Schema__ToString <- function(s) {
+  .Call(`_arrow_Schema__ToString`, s)
 }
 
-Table__AddColumn <- function(table, i, field, column){
-    .Call(`_arrow_Table__AddColumn`, table, i, field, column)
+Schema__num_fields <- function(s) {
+  .Call(`_arrow_Schema__num_fields`, s)
 }
 
-Table__SetColumn <- function(table, i, field, column){
-    .Call(`_arrow_Table__SetColumn`, table, i, field, column)
+Schema__field <- function(s, i) {
+  .Call(`_arrow_Schema__field`, s, i)
 }
 
-Table__SelectColumns <- function(table, indices){
-    .Call(`_arrow_Table__SelectColumns`, table, indices)
+Schema__AddField <- function(s, i, field) {
+  .Call(`_arrow_Schema__AddField`, s, i, field)
 }
 
-all_record_batches <- function(lst){
-    .Call(`_arrow_all_record_batches`, lst)
+Schema__SetField <- function(s, i, field) {
+  .Call(`_arrow_Schema__SetField`, s, i, field)
 }
 
-Table__from_record_batches <- function(batches, schema_sxp){
-    .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+Schema__RemoveField <- function(s, i) {
+  .Call(`_arrow_Schema__RemoveField`, s, i)
 }
 
-Table__from_dots <- function(lst, schema_sxp){
-    .Call(`_arrow_Table__from_dots`, lst, schema_sxp)
+Schema__GetFieldByName <- function(s, x) {
+  .Call(`_arrow_Schema__GetFieldByName`, s, x)
 }
 
-GetCpuThreadPoolCapacity <- function(){
-    .Call(`_arrow_GetCpuThreadPoolCapacity`)
+Schema__fields <- function(schema) {
+  .Call(`_arrow_Schema__fields`, schema)
 }
 
-SetCpuThreadPoolCapacity <- function(threads){
-    invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+Schema__field_names <- function(schema) {
+  .Call(`_arrow_Schema__field_names`, schema)
 }
 
-Array__infer_type <- function(x){
-    .Call(`_arrow_Array__infer_type`, x)
+Schema__HasMetadata <- function(schema) {
+  .Call(`_arrow_Schema__HasMetadata`, schema)
 }
 
+Schema__metadata <- function(schema) {
+  .Call(`_arrow_Schema__metadata`, schema)
+}
+
+Schema__WithMetadata <- function(schema, metadata) {
+  .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
+}
+
+Schema__serialize <- function(schema) {
+  .Call(`_arrow_Schema__serialize`, schema)
+}
+
+Schema__Equals <- function(schema, other, check_metadata) {
+  .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
+}
+
+arrow__UnifySchemas <- function(schemas) {
+  .Call(`_arrow_arrow__UnifySchemas`, schemas)
+}
+
+Table__num_columns <- function(x) {
+  .Call(`_arrow_Table__num_columns`, x)
+}
+
+Table__num_rows <- function(x) {
+  .Call(`_arrow_Table__num_rows`, x)
+}
+
+Table__schema <- function(x) {
+  .Call(`_arrow_Table__schema`, x)
+}
+
+Table__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
+}
+
+Table__column <- function(table, i) {
+  .Call(`_arrow_Table__column`, table, i)
+}
+
+Table__field <- function(table, i) {
+  .Call(`_arrow_Table__field`, table, i)
+}
+
+Table__columns <- function(table) {
+  .Call(`_arrow_Table__columns`, table)
+}
+
+Table__ColumnNames <- function(table) {
+  .Call(`_arrow_Table__ColumnNames`, table)
+}
+
+Table__RenameColumns <- function(table, names) {
+  .Call(`_arrow_Table__RenameColumns`, table, names)
+}
+
+Table__Slice1 <- function(table, offset) {
+  .Call(`_arrow_Table__Slice1`, table, offset)
+}
+
+Table__Slice2 <- function(table, offset, length) {
+  .Call(`_arrow_Table__Slice2`, table, offset, length)
+}
+
+Table__Equals <- function(lhs, rhs, check_metadata) {
+  .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
+}
+
+Table__Validate <- function(table) {
+  .Call(`_arrow_Table__Validate`, table)
+}
+
+Table__ValidateFull <- function(table) {
+  .Call(`_arrow_Table__ValidateFull`, table)
+}
+
+Table__GetColumnByName <- function(table, name) {
+  .Call(`_arrow_Table__GetColumnByName`, table, name)
+}
+
+Table__RemoveColumn <- function(table, i) {
+  .Call(`_arrow_Table__RemoveColumn`, table, i)
+}
+
+Table__AddColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__AddColumn`, table, i, field, column)
+}
+
+Table__SetColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__SetColumn`, table, i, field, column)
+}
 
+Table__SelectColumns <- function(table, indices) {
+  .Call(`_arrow_Table__SelectColumns`, table, indices)
+}
+
+all_record_batches <- function(lst) {
+  .Call(`_arrow_all_record_batches`, lst)
+}
+
+Table__from_record_batches <- function(batches, schema_sxp) {
+  .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+}
+
+GetCpuThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetCpuThreadPoolCapacity`)
+}
+
+SetCpuThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+}
+
+GetIOThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetIOThreadPoolCapacity`)
+}
+
+SetIOThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+}
+
+Array__infer_type <- function(x) {
+  .Call(`_arrow_Array__infer_type`, x)
+}
 
diff --git a/r/R/buffer.R b/r/R/buffer.R
index db61ed36d78..a9424fd0da1 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -25,17 +25,24 @@
 #' `buffer()` lets you create an `arrow::Buffer` from an R object
 #' @section Methods:
 #'
-#' - `$is_mutable()` :
-#' - `$ZeroPadding()` :
-#' - `$size()` :
-#' - `$capacity()`:
+#' - `$is_mutable` : is this buffer mutable?
+#' - `$ZeroPadding()` : zero bytes in padding, i.e. bytes between size and capacity
+#' - `$size` : size in memory, in bytes
+#' - `$capacity`: possible capacity, in bytes
 #'
 #' @rdname buffer
 #' @name buffer
+#' @examplesIf arrow_available()
+#' my_buffer <- buffer(c(1, 2, 3, 4))
+#' my_buffer$is_mutable
+#' my_buffer$ZeroPadding()
+#' my_buffer$size
+#' my_buffer$capacity
 #' @export
 #' @include arrow-package.R
 #' @include enums.R
-Buffer <- R6Class("Buffer", inherit = ArrowObject,
+Buffer <- R6Class("Buffer",
+  inherit = ArrowObject,
   public = list(
     ZeroPadding = function() Buffer__ZeroPadding(self),
     data = function() Buffer__data(self),
@@ -43,7 +50,6 @@ Buffer <- R6Class("Buffer", inherit = ArrowObject,
       inherits(other, "Buffer") && Buffer__Equals(self, other)
     }
   ),
-
   active = list(
     is_mutable = function() Buffer__is_mutable(self),
     size = function() Buffer__size(self),
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index a7f9c8f790c..597180ea738 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -45,10 +45,10 @@
 #'    used to rearrange the `ChunkedArray` in ascending or descending order
 #' - `$cast(target_type, safe = TRUE, options = cast_options(safe))`: Alter the
 #'    data in the array to change its type.
-#' - `$null_count()`: The number of null entries in the array
-#' - `$chunks()`: return a list of `Array`s
-#' - `$num_chunks()`: integer number of chunks in the `ChunkedArray`
-#' - `$type()`: logical type of data
+#' - `$null_count`: The number of null entries in the array
+#' - `$chunks`: return a list of `Array`s
+#' - `$num_chunks`: integer number of chunks in the `ChunkedArray`
+#' - `$type`: logical type of data
 #' - `$View(type)`: Construct a zero-copy view of this `ChunkedArray` with the
 #'    given type.
 #' - `$Validate()`: Perform any validation checks to determine obvious inconsistencies
@@ -57,13 +57,34 @@
 #' @rdname ChunkedArray
 #' @name ChunkedArray
 #' @seealso [Array]
+#' @examplesIf arrow_available()
+#' # Pass items into chunked_array as separate objects to create chunks
+#' class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+#' class_scores$num_chunks
+#'
+#' # When taking a Slice from a chunked_array, chunks are preserved
+#' class_scores$Slice(2, length = 5)
+#'
+#' # You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
+#' # containing all values, ordered.
+#' class_scores$Take(class_scores$SortIndices(descending = TRUE))
+#'
+#' # If you pass a list into chunked_array, you get a list of length 1
+#' list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
+#' list_scores$num_chunks
+#'
+#' # When constructing a ChunkedArray, the first chunk is used to infer type.
+#' doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+#' doubles$type
 #' @export
-ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum,
+ChunkedArray <- R6Class("ChunkedArray",
+  inherit = ArrowDatum,
   public = list(
     length = function() ChunkedArray__length(self),
+    type_id = function() ChunkedArray__type(self)$id,
     chunk = function(i) Array$create(ChunkedArray__chunk(self, i)),
-    as_vector = function() ChunkedArray__as_vector(self),
-    Slice = function(offset, length = NULL){
+    as_vector = function() ChunkedArray__as_vector(self, option_use_threads()),
+    Slice = function(offset, length = NULL) {
       if (is.null(length)) {
         ChunkedArray__Slice1(self, offset)
       } else {
diff --git a/r/R/compression.R b/r/R/compression.R
index ebd4c54cd82..7107012d031 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -38,7 +38,8 @@
 #' @rdname Codec
 #' @name Codec
 #' @export
-Codec <- R6Class("Codec", inherit = ArrowObject,
+Codec <- R6Class("Codec",
+  inherit = ArrowObject,
   active = list(
     name = function() util___Codec__name(self),
     level = function() abort("Codec$level() not yet implemented")
@@ -63,12 +64,14 @@ Codec$create <- function(type = "gzip", compression_level = NA) {
 #' "zstd", "lz4", "lzo", or "bz2", case insensitive.
 #' @return Logical: is `type` available?
 #' @export
+#' @examplesIf arrow_available()
+#' codec_is_available("gzip")
 codec_is_available <- function(type) {
   util___Codec__IsAvailable(compression_from_name(type))
 }
 
 compression_from_name <- function(name) {
-  map_int(name, ~CompressionType[[match.arg(toupper(.x), names(CompressionType))]])
+  map_int(name, ~ CompressionType[[match.arg(toupper(.x), names(CompressionType))]])
 }
 
 #' @title Compressed stream classes
@@ -97,7 +100,7 @@ compression_from_name <- function(name) {
 #' @export
 #' @include arrow-package.R
 CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream)
-CompressedOutputStream$create <- function(stream, codec = "gzip", compression_level = NA){
+CompressedOutputStream$create <- function(stream, codec = "gzip", compression_level = NA) {
   codec <- Codec$create(codec, compression_level = compression_level)
   if (is.string(stream)) {
     stream <- FileOutputStream$create(stream)
@@ -111,7 +114,7 @@ CompressedOutputStream$create <- function(stream, codec = "gzip", compression_le
 #' @format NULL
 #' @export
 CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream)
-CompressedInputStream$create <- function(stream, codec = "gzip", compression_level = NA){
+CompressedInputStream$create <- function(stream, codec = "gzip", compression_level = NA) {
   codec <- Codec$create(codec, compression_level = compression_level)
   if (is.string(stream)) {
     stream <- ReadableFile$create(stream)
diff --git a/r/R/compute.R b/r/R/compute.R
index 1b79d76f037..8cfaaf7b415 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -27,17 +27,18 @@
 #' `RecordBatch`, or `Table`.
 #' @param args list arguments as an alternative to specifying in `...`
 #' @param options named list of C++ function options.
+#' @details When passing indices in `...`, `args`, or `options`, express them as
+#' 0-based integers (consistent with C++).
 #' @return An `Array`, `ChunkedArray`, `Scalar`, `RecordBatch`, or `Table`, whatever the compute function results in.
-#' @seealso [Arrow C++ documentation](https://arrow.apache.org/docs/cpp/compute.html) for the functions and their respective options.
-#' @examples
-#' \donttest{
+#' @seealso [Arrow C++ documentation](https://arrow.apache.org/docs/cpp/compute.html) for
+#'   the functions and their respective options.
+#' @examplesIf arrow_available()
 #' a <- Array$create(c(1L, 2L, 3L, NA, 5L))
 #' s <- Scalar$create(4L)
 #' call_function("fill_null", a, s)
 #'
 #' a <- Array$create(rnorm(10000))
 #' call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
-#' }
 #' @export
 #' @include array.R
 #' @include chunked-array.R
@@ -47,11 +48,15 @@ call_function <- function(function_name, ..., args = list(...), options = empty_
   assert_that(is.list(options), !is.null(names(options)))
 
   datum_classes <- c("Array", "ChunkedArray", "RecordBatch", "Table", "Scalar")
-  valid_args <- map_lgl(args, ~inherits(., datum_classes))
+  valid_args <- map_lgl(args, ~ inherits(., datum_classes))
   if (!all(valid_args)) {
     # Lame, just pick one to report
     first_bad <- min(which(!valid_args))
-    stop("Argument ", first_bad, " is of class ", head(class(args[[first_bad]]), 1), " but it must be one of ", oxford_paste(datum_classes, "or"), call. = FALSE)
+    stop(
+      "Argument ", first_bad, " is of class ", head(class(args[[first_bad]]), 1),
+      " but it must be one of ", oxford_paste(datum_classes, "or"),
+      call. = FALSE
+    )
   }
 
   compute__CallFunction(function_name, args, options)
@@ -83,6 +88,9 @@ call_function <- function(function_name, ..., args = list(...), options = empty_
 #' @param pattern Optional regular expression to filter the function list
 #' @param ... Additional parameters passed to `grep()`
 #' @return A character vector of available Arrow C++ function names
+#' @examplesIf arrow_available()
+#' available_funcs <- list_compute_functions()
+#' utf8_funcs <- list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
 #' @export
 list_compute_functions <- function(pattern = NULL, ...) {
   funcs <- compute__GetFunctionNames()
@@ -93,10 +101,14 @@ list_compute_functions <- function(pattern = NULL, ...) {
 }
 
 #' @export
-sum.ArrowDatum <- function(..., na.rm = FALSE) scalar_aggregate("sum", ..., na.rm = na.rm)
+sum.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("sum", ..., na.rm = na.rm)
+}
 
 #' @export
-mean.ArrowDatum <- function(..., na.rm = FALSE) scalar_aggregate("mean", ..., na.rm = na.rm)
+mean.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("mean", ..., na.rm = na.rm)
+}
 
 #' @export
 min.ArrowDatum <- function(..., na.rm = FALSE) {
@@ -108,26 +120,34 @@ max.ArrowDatum <- function(..., na.rm = FALSE) {
   scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("max")
 }
 
-scalar_aggregate <- function(FUN, ..., na.rm = FALSE) {
+scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) {
   a <- collect_arrays_from_dots(list(...))
-  if (!na.rm && a$null_count > 0 && (FUN %in% c("mean", "sum"))) {
-    # Arrow sum/mean function always drops NAs so handle that here
-    # https://issues.apache.org/jira/browse/ARROW-9054
-    return(Scalar$create(NA_real_))
+  if (!na.rm) {
+    # When not removing null values, we require all values to be not null and
+    # return null otherwise. We do that by setting minimum count of non-null
+    # option values to the full array length.
+    na.min_count <- length(a)
+  }
+  if (FUN == "min_max" && na.rm && a$null_count == length(a)) {
+    Array$create(data.frame(min = Inf, max = -Inf))
+    # If na.rm == TRUE and all values in array are NA, R returns
+    # Inf/-Inf, which are type double. Since Arrow is type-stable
+    # and does not do that, we handle this special case here.
+  } else {
+    call_function(FUN, a, options = list(na.rm = na.rm, na.min_count = na.min_count))
   }
-
-  call_function(FUN, a, options = list(na.rm = na.rm))
 }
 
 collect_arrays_from_dots <- function(dots) {
   # Given a list that may contain both Arrays and ChunkedArrays,
   # return a single ChunkedArray containing all of those chunks
   # (may return a regular Array if there is only one element in dots)
-  assert_that(all(map_lgl(dots, is.Array)))
+  # If there is only one element and it is a scalar, it returns the scalar
   if (length(dots) == 1) {
     return(dots[[1]])
   }
 
+  assert_that(all(map_lgl(dots, is.Array)))
   arrays <- unlist(lapply(dots, function(x) {
     if (inherits(x, "ChunkedArray")) {
       x$chunks
@@ -186,26 +206,56 @@ unique.ArrowDatum <- function(x, incomparables = FALSE, ...) {
   call_function("unique", x)
 }
 
+#' @export
+any.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("any", ..., na.rm = na.rm)
+}
+
+#' @export
+all.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("all", ..., na.rm = na.rm)
+}
+
 #' `match` and `%in%` for Arrow objects
 #'
 #' `base::match()` is not a generic, so we can't just define Arrow methods for
 #' it. This function exposes the analogous functions in the Arrow C++ library.
 #'
-#' @param x `Array` or `ChunkedArray`
-#' @param table `Array`, `ChunkedArray`, or R vector lookup table.
+#' @param x `Scalar`, `Array` or `ChunkedArray`
+#' @param table `Scalar`, Array`, `ChunkedArray`, or R vector lookup table.
 #' @param ... additional arguments, ignored
-#' @return `match_arrow()` returns an `int32`-type `Array` of the same length
-#' as `x` with the (0-based) indexes into `table`. `is_in()` returns a
-#' `boolean`-type `Array` of the same length as `x` with values indicating
+#' @return `match_arrow()` returns an `int32`-type Arrow object of the same length
+#' and type as `x` with the (0-based) indexes into `table`. `is_in()` returns a
+#' `boolean`-type Arrow object of the same length and type as `x` with values indicating
 #' per element of `x` it it is present in `table`.
+#' @examplesIf arrow_available()
+#' # note that the returned value is 0-indexed
+#' cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
+#' match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+#'
+#' is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+#'
+#' # Although there are multiple matches, you are returned the index of the first
+#' # match, as with the base R equivalent
+#' match(4, mtcars$cyl) # 1-indexed
+#' match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+#'
+#' # If `x` contains multiple values, you are returned the indices of the first
+#' # match for each value.
+#' match(c(4, 6, 8), mtcars$cyl)
+#' match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+#'
+#' # Return type matches type of `x`
+#' is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+#' is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+#' is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+#' is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
 #' @export
-match_arrow <- function(x, table, ...) UseMethod("match_arrow")
-
-#' @export
-match_arrow.default <- function(x, table, ...) match(x, table, ...)
+match_arrow <- function(x, table, ...) {
+  if (!inherits(x, "ArrowDatum")) {
+    x <- Array$create(x)
+  }
 
-#' @export
-match_arrow.ArrowDatum <- function(x, table, ...) {
   if (!inherits(table, c("Array", "ChunkedArray"))) {
     table <- Array$create(table)
   }
@@ -214,13 +264,11 @@ match_arrow.ArrowDatum <- function(x, table, ...) {
 
 #' @rdname match_arrow
 #' @export
-is_in <- function(x, table, ...) UseMethod("is_in")
-
-#' @export
-is_in.default <- function(x, table, ...) x %in% table
+is_in <- function(x, table, ...) {
+  if (!inherits(x, "ArrowDatum")) {
+    x <- Array$create(x)
+  }
 
-#' @export
-is_in.ArrowDatum <- function(x, table, ...) {
   if (!inherits(table, c("Array", "DictionaryArray", "ChunkedArray"))) {
     table <- Array$create(table)
   }
@@ -233,6 +281,9 @@ is_in.ArrowDatum <- function(x, table, ...) {
 #' @param x `Array` or `ChunkedArray`
 #' @return A `StructArray` containing "values" (same type as `x`) and "counts"
 #' `Int64`.
+#' @examplesIf arrow_available()
+#' cyl_vals <- Array$create(mtcars$cyl)
+#' counts <- value_counts(cyl_vals)
 #' @export
 value_counts <- function(x) {
   call_function("value_counts", x)
diff --git a/r/R/config.R b/r/R/config.R
index 301d0fad547..af07ad9a9d2 100644
--- a/r/R/config.R
+++ b/r/R/config.R
@@ -28,3 +28,17 @@ cpu_count <- function() {
 set_cpu_count <- function(num_threads) {
   SetCpuThreadPoolCapacity(as.integer(num_threads))
 }
+
+#' Manage the global I/O thread pool in libarrow
+#'
+#' @export
+io_thread_count <- function() {
+  GetIOThreadPoolCapacity()
+}
+
+#' @rdname io_thread_count
+#' @param num_threads integer: New number of threads for thread pool
+#' @export
+set_io_thread_count <- function(num_threads) {
+  SetIOThreadPoolCapacity(as.integer(num_threads))
+}
diff --git a/r/R/csv.R b/r/R/csv.R
index 160c46e4753..9206a04ff5d 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -129,16 +129,14 @@
 #'
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
 #' @export
-#' @examples
-#' \donttest{
-#'   tf <- tempfile()
-#'   on.exit(unlink(tf))
-#'   write.csv(mtcars, file = tf)
-#'   df <- read_csv_arrow(tf)
-#'   dim(df)
-#'   # Can select columns
-#'   df <- read_csv_arrow(tf, col_select = starts_with("d"))
-#' }
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write.csv(mtcars, file = tf)
+#' df <- read_csv_arrow(tf)
+#' dim(df)
+#' # Can select columns
+#' df <- read_csv_arrow(tf, col_select = starts_with("d"))
 read_delim_arrow <- function(file,
                              delim = ",",
                              quote = '"',
@@ -228,7 +226,6 @@ read_csv_arrow <- function(file,
                            read_options = NULL,
                            as_data_frame = TRUE,
                            timestamp_parsers = NULL) {
-
   mc <- match.call()
   mc$delim <- ","
   mc[[1]] <- get("read_delim_arrow", envir = asNamespace("arrow"))
@@ -254,7 +251,6 @@ read_tsv_arrow <- function(file,
                            read_options = NULL,
                            as_data_frame = TRUE,
                            timestamp_parsers = NULL) {
-
   mc <- match.call()
   mc$delim <- "\t"
   mc[[1]] <- get("read_delim_arrow", envir = asNamespace("arrow"))
@@ -287,7 +283,8 @@ read_tsv_arrow <- function(file,
 #'
 #' @include arrow-package.R
 #' @export
-CsvTableReader <- R6Class("CsvTableReader", inherit = ArrowObject,
+CsvTableReader <- R6Class("CsvTableReader",
+  inherit = ArrowObject,
   public = list(
     Read = function() csv___TableReader__Read(self)
   )
@@ -381,6 +378,11 @@ CsvTableReader$create <- function(file,
 #' `TimestampParser$create()` takes an optional `format` string argument.
 #' See [`strptime()`][base::strptime()] for example syntax.
 #' The default is to use an ISO-8601 format parser.
+#'
+#' The `CsvWriteOptions$create()` factory method takes the following arguments:
+#' - `include_header` Whether to write an initial header line with column names
+#' - `batch_size` Maximum number of rows processed at a time. Default is 1024.
+#'
 #' @section Active bindings:
 #'
 #' - `column_names`: from `CsvReadOptions`
@@ -408,6 +410,19 @@ CsvReadOptions$create <- function(use_threads = option_use_threads(),
   )
 }
 
+#' @rdname CsvReadOptions
+#' @export
+CsvWriteOptions <- R6Class("CsvWriteOptions", inherit = ArrowObject)
+CsvWriteOptions$create <- function(include_header = TRUE, batch_size = 1024L) {
+  assert_that(is_integerish(batch_size, n = 1, finite = TRUE), batch_size > 0)
+  csv___WriteOptions__initialize(
+    list(
+      include_header = include_header,
+      batch_size = as.integer(batch_size)
+    )
+  )
+}
+
 readr_to_csv_read_options <- function(skip, col_names, col_types) {
   if (isTRUE(col_names)) {
     # C++ default to parse is 0-length string array
@@ -431,10 +446,9 @@ CsvParseOptions$create <- function(delimiter = ",",
                                    quote_char = '"',
                                    double_quote = TRUE,
                                    escaping = FALSE,
-                                   escape_char = '\\',
+                                   escape_char = "\\",
                                    newlines_in_values = FALSE,
                                    ignore_empty_lines = TRUE) {
-
   csv___ParseOptions__initialize(
     list(
       delimiter = delimiter,
@@ -462,7 +476,7 @@ readr_to_csv_parse_options <- function(delim = ",",
     quote_char = quote,
     double_quote = escape_double,
     escaping = escape_backslash,
-    escape_char = '\\',
+    escape_char = "\\",
     newlines_in_values = escape_backslash,
     ignore_empty_lines = skip_empty_rows
   )
@@ -473,7 +487,8 @@ readr_to_csv_parse_options <- function(delim = ",",
 #' @format NULL
 #' @docType class
 #' @export
-TimestampParser <- R6Class("TimestampParser", inherit = ArrowObject,
+TimestampParser <- R6Class("TimestampParser",
+  inherit = ArrowObject,
   public = list(
     kind = function() TimestampParser__kind(self),
     format = function() TimestampParser__format(self)
@@ -496,7 +511,7 @@ CsvConvertOptions <- R6Class("CsvConvertOptions", inherit = ArrowObject)
 CsvConvertOptions$create <- function(check_utf8 = TRUE,
                                      null_values = c("", "NA"),
                                      true_values = c("T", "true", "TRUE"),
-                                     false_values= c("F", "false", "FALSE"),
+                                     false_values = c("F", "false", "FALSE"),
                                      strings_can_be_null = FALSE,
                                      col_types = NULL,
                                      auto_dict_encode = FALSE,
@@ -504,7 +519,6 @@ CsvConvertOptions$create <- function(check_utf8 = TRUE,
                                      include_columns = character(),
                                      include_missing_columns = FALSE,
                                      timestamp_parsers = NULL) {
-
   if (!is.null(col_types) && !inherits(col_types, "Schema")) {
     abort(c(
       "Unsupported `col_types` specification.",
@@ -546,25 +560,25 @@ readr_to_csv_convert_options <- function(na,
       abort("Compact specification for `col_types` requires `col_names`")
     }
 
-    col_types <- set_names(nm = col_names, map2(specs, col_names, ~{
+    col_types <- set_names(nm = col_names, map2(specs, col_names, ~ {
       switch(.x,
-             "c" = utf8(),
-             "i" = int32(),
-             "n" = float64(),
-             "d" = float64(),
-             "l" = bool(),
-             "f" = dictionary(),
-             "D" = date32(),
-             "T" = time32(),
-             "t" = timestamp(),
-             "_" = null(),
-             "-" = null(),
-             "?" = NULL,
-             abort("Unsupported compact specification: '", .x,"' for column '", .y, "'")
+        "c" = utf8(),
+        "i" = int32(),
+        "n" = float64(),
+        "d" = float64(),
+        "l" = bool(),
+        "f" = dictionary(),
+        "D" = date32(),
+        "T" = time32(),
+        "t" = timestamp(),
+        "_" = null(),
+        "-" = null(),
+        "?" = NULL,
+        abort("Unsupported compact specification: '", .x, "' for column '", .y, "'")
       )
     }))
     # To "guess" types, omit them from col_types
-    col_types <- keep(col_types, ~!is.null(.x))
+    col_types <- keep(col_types, ~ !is.null(.x))
     col_types <- schema(!!!col_types)
   }
 
@@ -572,7 +586,7 @@ readr_to_csv_convert_options <- function(na,
     assert_is(col_types, "Schema")
     # If any columns are null(), drop them
     # (by specifying the other columns in include_columns)
-    nulls <- map_lgl(col_types$fields, ~.$type$Equals(null()))
+    nulls <- map_lgl(col_types$fields, ~ .$type$Equals(null()))
     if (any(nulls)) {
       include_columns <- setdiff(col_names, names(col_types)[nulls])
     }
@@ -585,3 +599,46 @@ readr_to_csv_convert_options <- function(na,
     include_columns = include_columns
   )
 }
+
+#' Write CSV file to disk
+#'
+#' @param x `data.frame`, [RecordBatch], or [Table]
+#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' system (`SubTreeFileSystem`)
+#' @param include_header Whether to write an initial header line with column names
+#' @param batch_size Maximum number of rows processed at a time. Default is 1024.
+#'
+#' @return The input `x`, invisibly. Note that if `sink` is an [OutputStream],
+#' the stream will be left open.
+#' @export
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_csv_arrow(mtcars, tf)
+#' @include arrow-package.R
+write_csv_arrow <- function(x,
+                            sink,
+                            include_header = TRUE,
+                            batch_size = 1024L) {
+  write_options <- CsvWriteOptions$create(include_header, batch_size)
+
+  x_out <- x
+  if (is.data.frame(x)) {
+    x <- Table$create(x)
+  }
+
+  assert_that(is_writable_table(x))
+
+  if (!inherits(sink, "OutputStream")) {
+    sink <- make_output_stream(sink)
+    on.exit(sink$close())
+  }
+
+  if (inherits(x, "RecordBatch")) {
+    csv___WriteCSV__RecordBatch(x, write_options, sink)
+  } else if (inherits(x, "Table")) {
+    csv___WriteCSV__Table(x, write_options, sink)
+  }
+
+  invisible(x_out)
+}
diff --git a/r/R/dataset-factory.R b/r/R/dataset-factory.R
index 0e029cb74bd..c56a6b18106 100644
--- a/r/R/dataset-factory.R
+++ b/r/R/dataset-factory.R
@@ -21,7 +21,8 @@
 #' @format NULL
 #' @rdname Dataset
 #' @export
-DatasetFactory <- R6Class("DatasetFactory", inherit = ArrowObject,
+DatasetFactory <- R6Class("DatasetFactory",
+  inherit = ArrowObject,
   public = list(
     Finish = function(schema = NULL, unify_schemas = FALSE) {
       if (is.null(schema)) {
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index 854672b66a2..983b6f614a7 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -53,8 +53,21 @@
 #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`)
 #' @rdname FileFormat
 #' @name FileFormat
+#' @examplesIf arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows"
+#' ## Semi-colon delimited files
+#' # Set up directory for examples
+#' tf <- tempfile()
+#' dir.create(tf)
+#' on.exit(unlink(tf))
+#' write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+#'
+#' # Create FileFormat object
+#' format <- FileFormat$create(format = "text", delimiter = ";")
+#'
+#' open_dataset(tf, format = format)
 #' @export
-FileFormat <- R6Class("FileFormat", inherit = ArrowObject,
+FileFormat <- R6Class("FileFormat",
+  inherit = ArrowObject,
   active = list(
     # @description
     # Return the `FileFormat`'s type
@@ -90,8 +103,8 @@ as.character.FileFormat <- function(x, ...) {
 ParquetFileFormat <- R6Class("ParquetFileFormat", inherit = FileFormat)
 ParquetFileFormat$create <- function(...,
                                      dict_columns = character(0)) {
- options <- ParquetFragmentScanOptions$create(...)
- dataset___ParquetFileFormat__Make(options, dict_columns)
+  options <- ParquetFragmentScanOptions$create(...)
+  dataset___ParquetFileFormat__Make(options, dict_columns)
 }
 
 #' @usage NULL
@@ -106,8 +119,8 @@ IpcFileFormat <- R6Class("IpcFileFormat", inherit = FileFormat)
 #' @export
 CsvFileFormat <- R6Class("CsvFileFormat", inherit = FileFormat)
 CsvFileFormat$create <- function(..., opts = csv_file_format_parse_options(...),
-                                 convert_options = csv_file_format_convert_options(...),
-                                 read_options = csv_file_format_read_options(...)) {
+                                 convert_options = csv_file_format_convert_opts(...),
+                                 read_options = csv_file_format_read_opts(...)) {
   dataset___CsvFileFormat__Make(opts, convert_options, read_options)
 }
 
@@ -162,18 +175,20 @@ csv_file_format_parse_options <- function(...) {
   ambig_opts <- opt_names[is_ambig_opt]
   if (length(ambig_opts)) {
     stop("Ambiguous ",
-         ngettext(length(ambig_opts), "option", "options"),
-         ": ",
-         oxford_paste(ambig_opts),
-         ". Use full argument names",
-         call. = FALSE)
+      ngettext(length(ambig_opts), "option", "options"),
+      ": ",
+      oxford_paste(ambig_opts),
+      ". Use full argument names",
+      call. = FALSE
+    )
   }
   if (any(is_readr_opt)) {
     # Catch cases when the user specifies a mix of Arrow C++ options and
     # readr-style options
     if (!all(is_readr_opt)) {
       stop("Use either Arrow parse options or readr parse options, not both",
-           call. = FALSE)
+        call. = FALSE
+      )
     }
     do.call(readr_to_csv_parse_options, opts) # all options have readr-style names
   } else {
@@ -181,7 +196,7 @@ csv_file_format_parse_options <- function(...) {
   }
 }
 
-csv_file_format_convert_options <- function(...) {
+csv_file_format_convert_opts <- function(...) {
   opts <- list(...)
   # Filter out arguments meant for CsvParseOptions/CsvReadOptions
   arrow_opts <- names(formals(CsvParseOptions$create))
@@ -193,7 +208,7 @@ csv_file_format_convert_options <- function(...) {
   do.call(CsvConvertOptions$create, opts)
 }
 
-csv_file_format_read_options <- function(...) {
+csv_file_format_read_opts <- function(...) {
   opts <- list(...)
   # Filter out arguments meant for CsvParseOptions/CsvConvertOptions
   arrow_opts <- names(formals(CsvParseOptions$create))
@@ -235,7 +250,8 @@ csv_file_format_read_options <- function(...) {
 #' @rdname FragmentScanOptions
 #' @name FragmentScanOptions
 #' @export
-FragmentScanOptions <- R6Class("FragmentScanOptions", inherit = ArrowObject,
+FragmentScanOptions <- R6Class("FragmentScanOptions",
+  inherit = ArrowObject,
   active = list(
     # @description
     # Return the `FragmentScanOptions`'s type
@@ -243,7 +259,6 @@ FragmentScanOptions <- R6Class("FragmentScanOptions", inherit = ArrowObject,
   )
 )
 FragmentScanOptions$create <- function(format, ...) {
-  opt_names <- names(list(...))
   if (format %in% c("csv", "text", "tsv")) {
     CsvFragmentScanOptions$create(...)
   } else if (format == "parquet") {
@@ -264,8 +279,8 @@ as.character.FragmentScanOptions <- function(x, ...) {
 #' @export
 CsvFragmentScanOptions <- R6Class("CsvFragmentScanOptions", inherit = FragmentScanOptions)
 CsvFragmentScanOptions$create <- function(...,
-                                          convert_opts = csv_file_format_convert_options(...),
-                                          read_opts = csv_file_format_read_options(...)) {
+                                          convert_opts = csv_file_format_convert_opts(...),
+                                          read_opts = csv_file_format_read_opts(...)) {
   dataset___CsvFragmentScanOptions__Make(convert_opts, read_opts)
 }
 
@@ -284,25 +299,37 @@ ParquetFragmentScanOptions$create <- function(use_buffered_stream = FALSE,
 #'
 #' @description
 #' A `FileWriteOptions` holds write options specific to a `FileFormat`.
-FileWriteOptions <- R6Class("FileWriteOptions", inherit = ArrowObject,
+FileWriteOptions <- R6Class("FileWriteOptions",
+  inherit = ArrowObject,
   public = list(
-    update = function(...) {
+    update = function(table, ...) {
       if (self$type == "parquet") {
-        dataset___ParquetFileWriteOptions__update(self,
-            ParquetWriterProperties$create(...),
-            ParquetArrowWriterProperties$create(...))
+        dataset___ParquetFileWriteOptions__update(
+          self,
+          ParquetWriterProperties$create(table, ...),
+          ParquetArrowWriterProperties$create(...)
+        )
       } else if (self$type == "ipc") {
         args <- list(...)
         if (is.null(args$codec)) {
-          dataset___IpcFileWriteOptions__update1(self,
-              get_ipc_use_legacy_format(args$use_legacy_format),
-              get_ipc_metadata_version(args$metadata_version))
+          dataset___IpcFileWriteOptions__update1(
+            self,
+            get_ipc_use_legacy_format(args$use_legacy_format),
+            get_ipc_metadata_version(args$metadata_version)
+          )
         } else {
-          dataset___IpcFileWriteOptions__update2(self,
-              get_ipc_use_legacy_format(args$use_legacy_format),
-              args$codec,
-              get_ipc_metadata_version(args$metadata_version))
+          dataset___IpcFileWriteOptions__update2(
+            self,
+            get_ipc_use_legacy_format(args$use_legacy_format),
+            args$codec,
+            get_ipc_metadata_version(args$metadata_version)
+          )
         }
+      } else if (self$type == "csv") {
+        dataset___CsvFileWriteOptions__update(
+          self,
+          CsvWriteOptions$create(...)
+        )
       }
       invisible(self)
     }
diff --git a/r/R/dataset-partition.R b/r/R/dataset-partition.R
index e40427a9f18..35d5bc00cd1 100644
--- a/r/R/dataset-partition.R
+++ b/r/R/dataset-partition.R
@@ -64,15 +64,20 @@ Partitioning <- R6Class("Partitioning", inherit = ArrowObject)
 #' @rdname Partitioning
 #' @export
 DirectoryPartitioning <- R6Class("DirectoryPartitioning", inherit = Partitioning)
-DirectoryPartitioning$create <- dataset___DirectoryPartitioning
+DirectoryPartitioning$create <- function(schm, segment_encoding = "uri") {
+  dataset___DirectoryPartitioning(schm, segment_encoding = segment_encoding)
+}
 
 #' @usage NULL
 #' @format NULL
 #' @rdname Partitioning
 #' @export
 HivePartitioning <- R6Class("HivePartitioning", inherit = Partitioning)
-HivePartitioning$create <- function(schm, null_fallback = NULL) {
-  dataset___HivePartitioning(schm, null_fallback = null_fallback_or_default(null_fallback))
+HivePartitioning$create <- function(schm, null_fallback = NULL, segment_encoding = "uri") {
+  dataset___HivePartitioning(schm,
+    null_fallback = null_fallback_or_default(null_fallback),
+    segment_encoding = segment_encoding
+  )
 }
 
 #' Construct Hive partitioning
@@ -86,19 +91,19 @@ HivePartitioning$create <- function(schm, null_fallback = NULL) {
 #' @param null_fallback character to be used in place of missing values (`NA` or `NULL`)
 #' in partition columns. Default is `"__HIVE_DEFAULT_PARTITION__"`,
 #' which is what Hive uses.
+#' @param segment_encoding Decode partition segments after splitting paths.
+#' Default is `"uri"` (URI-decode segments). May also be `"none"` (leave as-is).
 #' @return A [HivePartitioning][Partitioning], or a `HivePartitioningFactory` if
 #' calling `hive_partition()` with no arguments.
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_with_dataset()
 #' hive_partition(year = int16(), month = int8())
-#' }
 #' @export
-hive_partition <- function(..., null_fallback = NULL) {
+hive_partition <- function(..., null_fallback = NULL, segment_encoding = "uri") {
   schm <- schema(...)
   if (length(schm) == 0) {
-    HivePartitioningFactory$create(null_fallback)
+    HivePartitioningFactory$create(null_fallback, segment_encoding)
   } else {
-    HivePartitioning$create(schm, null_fallback)
+    HivePartitioning$create(schm, null_fallback, segment_encoding)
   }
 }
 
@@ -109,15 +114,17 @@ PartitioningFactory <- R6Class("PartitioningFactory", inherit = ArrowObject)
 #' @rdname Partitioning
 #' @export
 DirectoryPartitioningFactory <- R6Class("DirectoryPartitioningFactory ", inherit = PartitioningFactory)
-DirectoryPartitioningFactory$create <- dataset___DirectoryPartitioning__MakeFactory
+DirectoryPartitioningFactory$create <- function(field_names, segment_encoding = "uri") {
+  dataset___DirectoryPartitioning__MakeFactory(field_names, segment_encoding)
+}
 
 #' @usage NULL
 #' @format NULL
 #' @rdname Partitioning
 #' @export
 HivePartitioningFactory <- R6Class("HivePartitioningFactory", inherit = PartitioningFactory)
-HivePartitioningFactory$create <- function(null_fallback = NULL) {
-  dataset___HivePartitioning__MakeFactory(null_fallback_or_default(null_fallback))
+HivePartitioningFactory$create <- function(null_fallback = NULL, segment_encoding = "uri") {
+  dataset___HivePartitioning__MakeFactory(null_fallback_or_default(null_fallback), segment_encoding)
 }
 
 null_fallback_or_default <- function(null_fallback) {
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index f7ede663c7f..66ecfa7a429 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -32,6 +32,8 @@
 #' * `filter`: A `Expression` to filter the scanned rows by, or `TRUE` (default)
 #'    to keep all rows.
 #' * `use_threads`: logical: should scanning use multithreading? Default `TRUE`
+#' * `use_async`: logical: should the async scanner (performs better on
+#'    high-latency/highly parallel filesystems like S3) be used? Default `FALSE`
 #' * `...`: Additional arguments, currently ignored
 #' @section Methods:
 #' `ScannerBuilder` has the following methods:
@@ -42,6 +44,7 @@
 #' - `$UseThreads(threads)`: logical: should the scan use multithreading?
 #' The method's default input is `TRUE`, but you must call the method to enable
 #' multithreading because the scanner default is `FALSE`.
+#' - `$UseAsync(use_async)`: logical: should the async scanner be used?
 #' - `$BatchSize(batch_size)`: integer: Maximum row count of scanned record
 #' batches, default is 32K. If scanned record batches are overflowing memory
 #' then this method can be called to reduce their size.
@@ -53,10 +56,13 @@
 #' @rdname Scanner
 #' @name Scanner
 #' @export
-Scanner <- R6Class("Scanner", inherit = ArrowObject,
+Scanner <- R6Class("Scanner",
+  inherit = ArrowObject,
   public = list(
     ToTable = function() dataset___Scanner__ToTable(self),
-    Scan = function() dataset___Scanner__Scan(self)
+    ScanBatches = function() dataset___Scanner__ScanBatches(self),
+    ToRecordBatchReader = function() dataset___Scanner__ToRecordBatchReader(self),
+    CountRows = function() dataset___Scanner__CountRows(self)
   ),
   active = list(
     schema = function() dataset___Scanner__schema(self)
@@ -66,9 +72,14 @@ Scanner$create <- function(dataset,
                            projection = NULL,
                            filter = TRUE,
                            use_threads = option_use_threads(),
+                           use_async = NULL,
                            batch_size = NULL,
                            fragment_scan_options = NULL,
                            ...) {
+  if (is.null(use_async)) {
+    use_async <- getOption("arrow.use_async", FALSE)
+  }
+
   if (inherits(dataset, "arrow_dplyr_query")) {
     if (inherits(dataset$.data, "ArrowTabular")) {
       # To handle mutate() on Table/RecordBatch, we need to collect(as_data_frame=FALSE) now
@@ -79,6 +90,7 @@ Scanner$create <- function(dataset,
       c(dataset$selected_columns, dataset$temp_columns),
       dataset$filtered_rows,
       use_threads,
+      use_async,
       batch_size,
       fragment_scan_options,
       ...
@@ -93,6 +105,9 @@ Scanner$create <- function(dataset,
   if (use_threads) {
     scanner_builder$UseThreads()
   }
+  if (use_async) {
+    scanner_builder$UseAsync()
+  }
   if (!is.null(projection)) {
     scanner_builder$Project(projection)
   }
@@ -111,7 +126,8 @@ Scanner$create <- function(dataset,
 #' @export
 names.Scanner <- function(x) names(x$schema)
 
-ScanTask <- R6Class("ScanTask", inherit = ArrowObject,
+ScanTask <- R6Class("ScanTask",
+  inherit = ArrowObject,
   public = list(
     Execute = function() dataset___ScanTask__get_batches(self)
   )
@@ -141,18 +157,11 @@ map_batches <- function(X, FUN, ..., .data.frame = TRUE) {
   }
   scanner <- Scanner$create(ensure_group_vars(X))
   FUN <- as_mapper(FUN)
-  # message("Making ScanTasks")
-  lapply(scanner$Scan(), function(scan_task) {
-    # This outer lapply could be parallelized
-    # message("Making Batches")
-    lapply(scan_task$Execute(), function(batch) {
-      # message("Processing Batch")
-      # This inner lapply cannot be parallelized
-      # TODO: wrap batch in arrow_dplyr_query with X$selected_columns,
-      # X$temp_columns, and X$group_by_vars
-      # if X is arrow_dplyr_query, if some other arg (.dplyr?) == TRUE
-      FUN(batch, ...)
-    })
+  lapply(scanner$ScanBatches(), function(batch) {
+    # TODO: wrap batch in arrow_dplyr_query with X$selected_columns,
+    # X$temp_columns, and X$group_by_vars
+    # if X is arrow_dplyr_query, if some other arg (.dplyr?) == TRUE
+    FUN(batch, ...)
   })
 }
 
@@ -160,22 +169,19 @@ map_batches <- function(X, FUN, ..., .data.frame = TRUE) {
 #' @format NULL
 #' @rdname Scanner
 #' @export
-ScannerBuilder <- R6Class("ScannerBuilder", inherit = ArrowObject,
+ScannerBuilder <- R6Class("ScannerBuilder",
+  inherit = ArrowObject,
   public = list(
     Project = function(cols) {
       # cols is either a character vector or a named list of Expressions
       if (is.character(cols)) {
         dataset___ScannerBuilder__ProjectNames(self, cols)
+      } else if (length(cols) == 0) {
+        # Empty projection
+        dataset___ScannerBuilder__ProjectNames(self, character(0))
       } else {
-        # If we have expressions, but they all turn out to be field_refs,
-        # we can still call the simple method
-        field_names <- get_field_names(cols)
-        if (all(nzchar(field_names))) {
-          dataset___ScannerBuilder__ProjectNames(self, field_names)
-        } else {
-          # Else, we are projecting/mutating
-          dataset___ScannerBuilder__ProjectExprs(self, cols, names(cols))
-        }
+        # List of Expressions
+        dataset___ScannerBuilder__ProjectExprs(self, cols, names(cols))
       }
       self
     },
@@ -188,6 +194,10 @@ ScannerBuilder <- R6Class("ScannerBuilder", inherit = ArrowObject,
       dataset___ScannerBuilder__UseThreads(self, threads)
       self
     },
+    UseAsync = function(use_async = TRUE) {
+      dataset___ScannerBuilder__UseAsync(self, use_async)
+      self
+    },
     BatchSize = function(batch_size) {
       dataset___ScannerBuilder__BatchSize(self, batch_size)
       self
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 8c9a1efc8d8..9deb0233d58 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -57,21 +57,13 @@
 #' @export
 write_dataset <- function(dataset,
                           path,
-                          format = c("parquet", "feather", "arrow", "ipc"),
+                          format = c("parquet", "feather", "arrow", "ipc", "csv"),
                           partitioning = dplyr::group_vars(dataset),
                           basename_template = paste0("part-{i}.", as.character(format)),
                           hive_style = TRUE,
                           ...) {
   format <- match.arg(format)
   if (inherits(dataset, "arrow_dplyr_query")) {
-    if (inherits(dataset$.data, "ArrowTabular")) {
-      # collect() to materialize any mutate/rename
-      dataset <- dplyr::collect(dataset, as_data_frame = FALSE)
-    }
-    # We can select a subset of columns but we can't rename them
-    if (!all(get_field_names(dataset) == names(dataset$selected_columns))) {
-      stop("Renaming columns when writing a dataset is not yet supported", call. = FALSE)
-    }
     # partitioning vars need to be in the `select` schema
     dataset <- ensure_group_vars(dataset)
   } else if (inherits(dataset, "grouped_df")) {
@@ -94,6 +86,8 @@ write_dataset <- function(dataset,
   path_and_fs <- get_path_and_filesystem(path)
   options <- FileWriteOptions$create(format, table = scanner, ...)
 
-  dataset___Dataset__Write(options, path_and_fs$fs, path_and_fs$path,
-                           partitioning, basename_template, scanner)
+  dataset___Dataset__Write(
+    options, path_and_fs$fs, path_and_fs$path,
+    partitioning, basename_template, scanner
+  )
 }
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 007ffc95dc8..072a0f3ae96 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -60,6 +60,18 @@
 #' be slow) but `TRUE` when `sources` is a list of `Dataset`s (because there
 #' should be few `Dataset`s in the list and their `Schema`s are already in
 #' memory).
+#' @param format A [FileFormat] object, or a string identifier of the format of
+#' the files in `x`. This argument is ignored when `sources` is a list of `Dataset` objects.
+#' Currently supported values:
+#' * "parquet"
+#' * "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+#'   only version 2 files are supported
+#' * "csv"/"text", aliases for the same thing (because comma is the default
+#'   delimiter for text files
+#' * "tsv", equivalent to passing `format = "text", delimiter = "\t"`
+#'
+#' Default is "parquet", unless a `delimiter` is also specified, in which case
+#' it is assumed to be "text".
 #' @param ... additional arguments passed to `dataset_factory()` when `sources`
 #' is a directory path/URI or vector of file paths/URIs, otherwise ignored.
 #' These may include `format` to indicate the file format, or other
@@ -69,16 +81,65 @@
 #' @export
 #' @seealso `vignette("dataset", package = "arrow")`
 #' @include arrow-package.R
+#' @examplesIf arrow_with_dataset() & arrow_with_parquet()
+#' # Set up directory for examples
+#' tf <- tempfile()
+#' dir.create(tf)
+#' on.exit(unlink(tf))
+#'
+#' data <- dplyr::group_by(mtcars, cyl)
+#' write_dataset(data, tf)
+#'
+#' # You can specify a directory containing the files for your dataset and
+#' # open_dataset will scan all files in your directory.
+#' open_dataset(tf)
+#'
+#' # You can also supply a vector of paths
+#' open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf, "cyl=8/part-2.parquet")))
+#'
+#' ## You must specify the file format if using a format other than parquet.
+#' tf2 <- tempfile()
+#' dir.create(tf2)
+#' on.exit(unlink(tf2))
+#' write_dataset(data, tf2, format = "ipc")
+#' # This line will results in errors when you try to work with the data
+#' \dontrun{
+#' open_dataset(tf2)
+#' }
+#' # This line will work
+#' open_dataset(tf2, format = "ipc")
+#'
+#' ## You can specify file partitioning to include it as a field in your dataset
+#' # Create a temporary directory and write example dataset
+#' tf3 <- tempfile()
+#' dir.create(tf3)
+#' on.exit(unlink(tf3))
+#' write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+#'
+#' # View files - you can see the partitioning means that files have been written
+#' # to folders based on Month/Day values
+#' tf3_files <- list.files(tf3, recursive = TRUE)
+#'
+#' # With no partitioning specified, dataset contains all files but doesn't include
+#' # directory names as field names
+#' open_dataset(tf3)
+#'
+#' # Now that partitioning has been specified, your dataset contains columns for Month and Day
+#' open_dataset(tf3, partitioning = c("Month", "Day"))
+#'
+#' # If you want to specify the data types for your fields, you can pass in a Schema
+#' open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
 open_dataset <- function(sources,
                          schema = NULL,
                          partitioning = hive_partition(),
                          unify_schemas = NULL,
+                         format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
                          ...) {
   if (is_list_of(sources, "Dataset")) {
     if (is.null(schema)) {
       if (is.null(unify_schemas) || isTRUE(unify_schemas)) {
         # Default is to unify schemas here
-        schema <- unify_schemas(schemas = map(sources, ~.$schema))
+        schema <- unify_schemas(schemas = map(sources, ~ .$schema))
       } else {
         # Take the first one.
         schema <- sources[[1]]$schema
@@ -92,9 +153,15 @@ open_dataset <- function(sources,
     })
     return(dataset___UnionDataset__create(sources, schema))
   }
-  factory <- DatasetFactory$create(sources, partitioning = partitioning, ...)
-  # Default is _not_ to inspect/unify schemas
-  factory$Finish(schema, isTRUE(unify_schemas))
+
+  factory <- DatasetFactory$create(sources, partitioning = partitioning, format = format, ...)
+  tryCatch(
+    # Default is _not_ to inspect/unify schemas
+    factory$Finish(schema, isTRUE(unify_schemas)),
+    error = function(e) {
+      handle_parquet_io_error(e, format)
+    }
+  )
 }
 
 #' Multi-file datasets
@@ -155,7 +222,8 @@ open_dataset <- function(sources,
 #'
 #' @export
 #' @seealso [open_dataset()] for a simple interface to creating a `Dataset`
-Dataset <- R6Class("Dataset", inherit = ArrowObject,
+Dataset <- R6Class("Dataset",
+  inherit = ArrowObject,
   public = list(
     # @description
     # Start a new scan of the data
@@ -173,10 +241,7 @@ Dataset <- R6Class("Dataset", inherit = ArrowObject,
       }
     },
     metadata = function() self$schema$metadata,
-    num_rows = function() {
-      warning("Number of rows unknown; returning NA", call. = FALSE)
-      NA_integer_
-    },
+    num_rows = function() self$NewScan()$Finish()$CountRows(),
     num_cols = function() length(self$schema),
     # @description
     # Return the Dataset's type.
@@ -188,7 +253,8 @@ Dataset$create <- open_dataset
 #' @name FileSystemDataset
 #' @rdname Dataset
 #' @export
-FileSystemDataset <- R6Class("FileSystemDataset", inherit = Dataset,
+FileSystemDataset <- R6Class("FileSystemDataset",
+  inherit = Dataset,
   public = list(
     .class_title = function() {
       nfiles <- length(self$files)
@@ -220,20 +286,6 @@ FileSystemDataset <- R6Class("FileSystemDataset", inherit = Dataset,
     # Return the filesystem of files in this `Dataset`
     filesystem = function() {
       dataset___FileSystemDataset__filesystem(self)
-    },
-    num_rows = function() {
-      if (inherits(self$format, "ParquetFileFormat")) {
-        # It's generally fast enough to skim the files directly
-        sum(map_int(self$files, ~ParquetFileReader$create(.x)$num_rows))
-      } else {
-        # TODO: implement for other file formats
-        warning("Number of rows unknown; returning NA", call. = FALSE)
-        NA_integer_
-        # Could do a scan, picking only the last column, which hopefully is virtual
-        # But this is can be slow
-        # Scanner$create(self, projection = tail(names(self), 1))$ToTable()$num_rows
-        # See also https://issues.apache.org/jira/browse/ARROW-9697
-      }
     }
   )
 )
@@ -241,7 +293,8 @@ FileSystemDataset <- R6Class("FileSystemDataset", inherit = Dataset,
 #' @name UnionDataset
 #' @rdname Dataset
 #' @export
-UnionDataset <- R6Class("UnionDataset", inherit = Dataset,
+UnionDataset <- R6Class("UnionDataset",
+  inherit = Dataset,
   active = list(
     # @description
     # Return the UnionDataset's child `Dataset`s
@@ -285,13 +338,10 @@ tail.Dataset <- function(x, n = 6L, ...) {
   result <- list()
   batch_num <- 0
   scanner <- Scanner$create(ensure_group_vars(x))
-  for (scan_task in rev(dataset___Scanner__Scan(scanner))) {
-    for (batch in rev(scan_task$Execute())) {
-      batch_num <- batch_num + 1
-      result[[batch_num]] <- tail(batch, n)
-      n <- n - nrow(batch)
-      if (n <= 0) break
-    }
+  for (batch in rev(dataset___Scanner__ScanBatches(scanner))) {
+    batch_num <- batch_num + 1
+    result[[batch_num]] <- tail(batch, n)
+    n <- n - nrow(batch)
     if (n <= 0) break
   }
   Table$create(!!!rev(result))
@@ -314,28 +364,10 @@ tail.Dataset <- function(x, n = 6L, ...) {
 }
 
 take_dataset_rows <- function(x, i) {
-  # TODO: move this to cpp
   if (!is.numeric(i) || any(i < 0)) {
     stop("Only slicing with positive indices is supported", call. = FALSE)
   }
-  result <- list()
-  result_order <- order(i)
-  i <- sort(i) - 1L
   scanner <- Scanner$create(ensure_group_vars(x))
-  for (scan_task in dataset___Scanner__Scan(scanner)) {
-    for (batch in scan_task$Execute()) {
-      # Take all rows that are in this batch
-      this_batch_nrows <- batch$num_rows
-      in_this_batch <- i > -1L & i < this_batch_nrows
-      if (any(in_this_batch)) {
-        result[[length(result) + 1L]] <- batch$Take(i[in_this_batch])
-      }
-      i <- i - this_batch_nrows
-      if (all(i < 0L)) break
-    }
-    if (all(i < 0L)) break
-  }
-  tab <- Table$create(!!!result)
-  # Now sort
-  tab$Take(result_order - 1L)
+  i <- Array$create(i - 1)
+  dataset___Scanner__TakeRows(scanner, i)
 }
diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R
new file mode 100644
index 00000000000..345fc183295
--- /dev/null
+++ b/r/R/dplyr-arrange.R
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
+  call <- match.call()
+  exprs <- quos(...)
+  if (.by_group) {
+    # when the data is is grouped and .by_group is TRUE, order the result by
+    # the grouping columns first
+    exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
+  }
+  if (length(exprs) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+  .data <- arrow_dplyr_query(.data)
+  # find and remove any dplyr::desc() and tidy-eval
+  # the arrange expressions inside an Arrow data_mask
+  sorts <- vector("list", length(exprs))
+  descs <- logical(0)
+  mask <- arrow_mask(.data)
+  for (i in seq_along(exprs)) {
+    x <- find_and_remove_desc(exprs[[i]])
+    exprs[[i]] <- x[["quos"]]
+    sorts[[i]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(sorts[[i]], "try-error")) {
+      msg <- paste("Expression", as_label(exprs[[i]]), "not supported in Arrow")
+      return(abandon_ship(call, .data, msg))
+    }
+    names(sorts)[i] <- as_label(exprs[[i]])
+    descs[i] <- x[["desc"]]
+  }
+  .data$arrange_vars <- c(sorts, .data$arrange_vars)
+  .data$arrange_desc <- c(descs, .data$arrange_desc)
+  .data
+}
+arrange.Dataset <- arrange.ArrowTabular <- arrange.arrow_dplyr_query
+
+# Helper to handle desc() in arrange()
+# * Takes a quosure as input
+# * Returns a list with two elements:
+#   1. The quosure with any wrapping parentheses and desc() removed
+#   2. A logical value indicating whether desc() was found
+# * Performs some other validation
+find_and_remove_desc <- function(quosure) {
+  expr <- quo_get_expr(quosure)
+  descending <- FALSE
+  if (length(all.vars(expr)) < 1L) {
+    stop(
+      "Expression in arrange() does not contain any field names: ",
+      deparse(expr),
+      call. = FALSE
+    )
+  }
+  # Use a while loop to remove any number of nested pairs of enclosing
+  # parentheses and any number of nested desc() calls. In the case of multiple
+  # nested desc() calls, each one toggles the sort order.
+  while (identical(typeof(expr), "language") && is.call(expr)) {
+    if (identical(expr[[1]], quote(`(`))) {
+      # remove enclosing parentheses
+      expr <- expr[[2]]
+    } else if (identical(expr[[1]], quote(desc))) {
+      # ensure desc() has only one argument (when an R expression is a function
+      # call, length == 2 means it has exactly one argument)
+      if (length(expr) > 2) {
+        stop("desc() expects only one argument", call. = FALSE)
+      }
+      # remove desc() and toggle descending
+      expr <- expr[[2]]
+      descending <- !descending
+    } else {
+      break
+    }
+  }
+  return(
+    list(
+      quos = quo_set_expr(quosure, expr),
+      desc = descending
+    )
+  )
+}
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
new file mode 100644
index 00000000000..cec56ab9110
--- /dev/null
+++ b/r/R/dplyr-collect.R
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
+  x <- ensure_group_vars(x)
+  x <- ensure_arrange_vars(x) # this sets x$temp_columns
+  # Pull only the selected rows and cols into R
+  # See dataset.R for Dataset and Scanner(Builder) classes
+  tab <- Scanner$create(x)$ToTable()
+  # Arrange rows
+  if (length(x$arrange_vars) > 0) {
+    tab <- tab[
+      tab$SortIndices(names(x$arrange_vars), x$arrange_desc),
+      names(x$selected_columns), # this omits x$temp_columns from the result
+      drop = FALSE
+    ]
+  }
+  if (as_data_frame) {
+    df <- as.data.frame(tab)
+    tab$invalidate()
+    restore_dplyr_features(df, x)
+  } else {
+    restore_dplyr_features(tab, x)
+  }
+}
+collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
+  if (as_data_frame) {
+    as.data.frame(x, ...)
+  } else {
+    x
+  }
+}
+collect.Dataset <- function(x, ...) dplyr::collect(arrow_dplyr_query(x), ...)
+
+compute.arrow_dplyr_query <- function(x, ...) dplyr::collect(x, as_data_frame = FALSE)
+compute.ArrowTabular <- function(x, ...) x
+compute.Dataset <- compute.arrow_dplyr_query
+
+pull.arrow_dplyr_query <- function(.data, var = -1) {
+  .data <- arrow_dplyr_query(.data)
+  var <- vars_pull(names(.data), !!enquo(var))
+  .data$selected_columns <- set_names(.data$selected_columns[var], var)
+  dplyr::collect(.data)[[1]]
+}
+pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
new file mode 100644
index 00000000000..3a1261602a3
--- /dev/null
+++ b/r/R/dplyr-eval.R
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_eval <- function(expr, mask) {
+  # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
+  # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
+  # .data is a Dataset).
+
+  # This yields an Expression as long as the `exprs` are implemented in Arrow.
+  # Otherwise, it returns a try-error
+  tryCatch(eval_tidy(expr, mask), error = function(e) {
+    # Look for the cases where bad input was given, i.e. this would fail
+    # in regular dplyr anyway, and let those raise those as errors;
+    # else, for things not supported by Arrow return a "try-error",
+    # which we'll handle differently
+    msg <- conditionMessage(e)
+    patterns <- .cache$i18ized_error_pattern
+    if (is.null(patterns)) {
+      patterns <- i18ize_error_messages()
+      # Memoize it
+      .cache$i18ized_error_pattern <- patterns
+    }
+    if (grepl(patterns, msg)) {
+      stop(e)
+    }
+
+    out <- structure(msg, class = "try-error", condition = e)
+    if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) {
+      # One of ours. Mark it so that consumers can handle it differently
+      class(out) <- c("arrow-try-error", class(out))
+    }
+    invisible(out)
+  })
+}
+
+handle_arrow_not_supported <- function(err, lab) {
+  # Look for informative message from the Arrow function version (see above)
+  if (inherits(err, "arrow-try-error")) {
+    # Include it if found
+    paste0("In ", lab, ", ", as.character(err))
+  } else {
+    # Otherwise be opaque (the original error is probably not useful)
+    paste("Expression", lab, "not supported in Arrow")
+  }
+}
+
+i18ize_error_messages <- function() {
+  # Figure out what the error messages will be with this LANGUAGE
+  # so that we can look for them
+  out <- list(
+    obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
+    fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
+  )
+  paste(map(out, ~ sub("X_____X", ".*", .)), collapse = "|")
+}
+
+# Helper to raise a common error
+arrow_not_supported <- function(msg) {
+  # TODO: raise a classed error?
+  stop(paste(msg, "not supported by Arrow"), call. = FALSE)
+}
+
+# Create a data mask for evaluating a dplyr expression
+arrow_mask <- function(.data, aggregation = FALSE) {
+  f_env <- new_environment(.cache$functions)
+
+  # Add functions that need to error hard and clear.
+  # Some R functions will still try to evaluate on an Expression
+  # and return NA with a warning
+  fail <- function(...) stop("Not implemented")
+  for (f in c("mean", "sd")) {
+    f_env[[f]] <- fail
+  }
+
+  if (aggregation) {
+    f_env <- new_environment(agg_funcs, parent = f_env)
+  }
+
+  # Assign the schema to the expressions
+  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
+
+  # Add the column references and make the mask
+  out <- new_data_mask(
+    new_environment(.data$selected_columns, parent = f_env),
+    f_env
+  )
+  # Then insert the data pronoun
+  # TODO: figure out what rlang::as_data_pronoun does/why we should use it
+  # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
+  out$.data <- .data$selected_columns
+  out
+}
diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R
new file mode 100644
index 00000000000..c9aa96fd5a7
--- /dev/null
+++ b/r/R/dplyr-filter.R
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
+  # TODO something with the .preserve argument
+  filts <- quos(...)
+  if (length(filts) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+
+  .data <- arrow_dplyr_query(.data)
+  # tidy-eval the filter expressions inside an Arrow data_mask
+  filters <- lapply(filts, arrow_eval, arrow_mask(.data))
+  bad_filters <- map_lgl(filters, ~ inherits(., "try-error"))
+  if (any(bad_filters)) {
+    # This is similar to abandon_ship() except that the filter eval is
+    # vectorized, and we apply filters that _did_ work before abandoning ship
+    # with the rest
+    expr_labs <- map_chr(filts[bad_filters], as_label)
+    if (query_on_dataset(.data)) {
+      # Abort. We don't want to auto-collect if this is a Dataset because that
+      # could blow up, too big.
+      stop(
+        "Filter expression not supported for Arrow Datasets: ",
+        oxford_paste(expr_labs, quote = FALSE),
+        "\nCall collect() first to pull data into R.",
+        call. = FALSE
+      )
+    } else {
+      arrow_errors <- map2_chr(
+        filters[bad_filters], expr_labs,
+        handle_arrow_not_supported
+      )
+      if (length(arrow_errors) == 1) {
+        msg <- paste0(arrow_errors, "; ")
+      } else {
+        msg <- paste0("* ", arrow_errors, "\n", collapse = "")
+      }
+      warning(
+        msg, "pulling data into R",
+        immediate. = TRUE,
+        call. = FALSE
+      )
+      # Set any valid filters first, then collect and then apply the invalid ones in R
+      .data <- set_filters(.data, filters[!bad_filters])
+      return(dplyr::filter(dplyr::collect(.data), !!!filts[bad_filters]))
+    }
+  }
+
+  set_filters(.data, filters)
+}
+filter.Dataset <- filter.ArrowTabular <- filter.arrow_dplyr_query
+
+set_filters <- function(.data, expressions) {
+  if (length(expressions)) {
+    # expressions is a list of Expressions. AND them together and set them on .data
+    new_filter <- Reduce("&", expressions)
+    if (isTRUE(.data$filtered_rows)) {
+      # TRUE is default (i.e. no filter yet), so we don't need to & with it
+      .data$filtered_rows <- new_filter
+    } else {
+      .data$filtered_rows <- .data$filtered_rows & new_filter
+    }
+  }
+  .data
+}
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
new file mode 100644
index 00000000000..9972d4796a8
--- /dev/null
+++ b/r/R/dplyr-functions.R
@@ -0,0 +1,818 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+#' @include expression.R
+NULL
+
+# This environment is an internal cache for things including data mask functions
+# We'll populate it at package load time.
+.cache <- NULL
+init_env <- function() {
+  .cache <<- new.env(hash = TRUE)
+}
+init_env()
+
+# nse_funcs is a list of functions that operated on (and return) Expressions
+# These will be the basis for a data_mask inside dplyr methods
+# and will be added to .cache at package load time
+
+# Start with mappings from R function name spellings
+nse_funcs <- lapply(set_names(names(.array_function_map)), function(operator) {
+  force(operator)
+  function(...) build_expr(operator, ...)
+})
+
+# Now add functions to that list where the mapping from R to Arrow isn't 1:1
+# Each of these functions should have the same signature as the R function
+# they're replacing.
+#
+# When to use `build_expr()` vs. `Expression$create()`?
+#
+# Use `build_expr()` if you need to
+# (1) map R function names to Arrow C++ functions
+# (2) wrap R inputs (vectors) as Array/Scalar
+#
+# `Expression$create()` is lower level. Most of the functions below use it
+# because they manage the preparation of the user-provided inputs
+# and don't need to wrap scalars
+
+nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
+  opts <- cast_options(safe, ...)
+  opts$to_type <- as_type(target_type)
+  Expression$create("cast", x, options = opts)
+}
+
+nse_funcs$coalesce <- function(...) {
+  args <- list2(...)
+  if (length(args) < 1) {
+    abort("At least one argument must be supplied to coalesce()")
+  }
+
+  # Treat NaN like NA for consistency with dplyr::coalesce(), but if *all*
+  # the values are NaN, we should return NaN, not NA, so don't replace
+  # NaN with NA in the final (or only) argument
+  # TODO: if an option is added to the coalesce kernel to treat NaN as NA,
+  # use that to simplify the code here (ARROW-13389)
+  attr(args[[length(args)]], "last") <- TRUE
+  args <- lapply(args, function(arg) {
+    last_arg <- is.null(attr(arg, "last"))
+    attr(arg, "last") <- NULL
+
+    if (!inherits(arg, "Expression")) {
+      arg <- Expression$scalar(arg)
+    }
+
+    # coalesce doesn't yet support factors/dictionaries
+    # TODO: remove this after ARROW-13390 is merged
+    if (nse_funcs$is.factor(arg)) {
+      warning("Dictionaries (in R: factors) are currently converted to strings (characters) in coalesce", call. = FALSE)
+    }
+
+    if (last_arg && arg$type_id() %in% TYPES_WITH_NAN) {
+      # store the NA_real_ in the same type as arg to avoid avoid casting
+      # smaller float types to larger float types
+      NA_expr <- Expression$scalar(Scalar$create(NA_real_, type = arg$type()))
+      Expression$create("if_else", Expression$create("is_nan", arg), NA_expr, arg)
+    } else {
+      arg
+    }
+  })
+  Expression$create("coalesce", args = args)
+}
+
+nse_funcs$is.na <- function(x) {
+  # TODO: if an option is added to the is_null kernel to treat NaN as NA,
+  # use that to simplify the code here (ARROW-13367)
+  if (is.double(x) || (inherits(x, "Expression") &&
+    x$type_id() %in% TYPES_WITH_NAN)) {
+    build_expr("is_nan", x) | build_expr("is_null", x)
+  } else {
+    build_expr("is_null", x)
+  }
+}
+
+nse_funcs$is.nan <- function(x) {
+  if (is.double(x) || (inherits(x, "Expression") &&
+    x$type_id() %in% TYPES_WITH_NAN)) {
+    # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
+    # use that to simplify the code here (ARROW-13366)
+    build_expr("is_nan", x) & build_expr("is_valid", x)
+  } else {
+    Expression$scalar(FALSE)
+  }
+}
+
+nse_funcs$is <- function(object, class2) {
+  if (is.string(class2)) {
+    switch(class2,
+      # for R data types, pass off to is.*() functions
+      character = nse_funcs$is.character(object),
+      numeric = nse_funcs$is.numeric(object),
+      integer = nse_funcs$is.integer(object),
+      integer64 = nse_funcs$is.integer64(object),
+      logical = nse_funcs$is.logical(object),
+      factor = nse_funcs$is.factor(object),
+      list = nse_funcs$is.list(object),
+      # for Arrow data types, compare class2 with object$type()$ToString(),
+      # but first strip off any parameters to only compare the top-level data
+      # type,  and canonicalize class2
+      sub("^([^([<]+).*$", "\\1", object$type()$ToString()) ==
+        canonical_type_str(class2)
+    )
+  } else if (inherits(class2, "DataType")) {
+    object$type() == as_type(class2)
+  } else {
+    stop("Second argument to is() is not a string or DataType", call. = FALSE)
+  }
+}
+
+nse_funcs$dictionary_encode <- function(x,
+                                        null_encoding_behavior = c("mask", "encode")) {
+  behavior <- toupper(match.arg(null_encoding_behavior))
+  null_encoding_behavior <- NullEncodingBehavior[[behavior]]
+  Expression$create(
+    "dictionary_encode",
+    x,
+    options = list(null_encoding_behavior = null_encoding_behavior)
+  )
+}
+
+nse_funcs$between <- function(x, left, right) {
+  x >= left & x <= right
+}
+
+nse_funcs$is.finite <- function(x) {
+  is_fin <- Expression$create("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !nse_funcs$is.na(is_fin)
+}
+
+nse_funcs$is.infinite <- function(x) {
+  is_inf <- Expression$create("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !nse_funcs$is.na(is_inf)
+}
+
+# as.* type casting functions
+# as.factor() is mapped in expression.R
+nse_funcs$as.character <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = string()))
+}
+nse_funcs$as.double <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = float64()))
+}
+nse_funcs$as.integer <- function(x) {
+  Expression$create(
+    "cast",
+    x,
+    options = cast_options(
+      to_type = int32(),
+      allow_float_truncate = TRUE,
+      allow_decimal_truncate = TRUE
+    )
+  )
+}
+nse_funcs$as.integer64 <- function(x) {
+  Expression$create(
+    "cast",
+    x,
+    options = cast_options(
+      to_type = int64(),
+      allow_float_truncate = TRUE,
+      allow_decimal_truncate = TRUE
+    )
+  )
+}
+nse_funcs$as.logical <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = boolean()))
+}
+nse_funcs$as.numeric <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = float64()))
+}
+
+# is.* type functions
+nse_funcs$is.character <- function(x) {
+  is.character(x) || (inherits(x, "Expression") &&
+    x$type_id() %in% Type[c("STRING", "LARGE_STRING")])
+}
+nse_funcs$is.numeric <- function(x) {
+  is.numeric(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
+    "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+    "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
+    "DECIMAL", "DECIMAL256"
+  )])
+}
+nse_funcs$is.double <- function(x) {
+  is.double(x) || (inherits(x, "Expression") && x$type_id() == Type["DOUBLE"])
+}
+nse_funcs$is.integer <- function(x) {
+  is.integer(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
+    "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+    "UINT64", "INT64"
+  )])
+}
+nse_funcs$is.integer64 <- function(x) {
+  is.integer64(x) || (inherits(x, "Expression") && x$type_id() == Type["INT64"])
+}
+nse_funcs$is.logical <- function(x) {
+  is.logical(x) || (inherits(x, "Expression") && x$type_id() == Type["BOOL"])
+}
+nse_funcs$is.factor <- function(x) {
+  is.factor(x) || (inherits(x, "Expression") && x$type_id() == Type["DICTIONARY"])
+}
+nse_funcs$is.list <- function(x) {
+  is.list(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
+    "LIST", "FIXED_SIZE_LIST", "LARGE_LIST"
+  )])
+}
+
+# rlang::is_* type functions
+nse_funcs$is_character <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.character(x)
+}
+nse_funcs$is_double <- function(x, n = NULL, finite = NULL) {
+  assert_that(is.null(n) && is.null(finite))
+  nse_funcs$is.double(x)
+}
+nse_funcs$is_integer <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.integer(x)
+}
+nse_funcs$is_list <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.list(x)
+}
+nse_funcs$is_logical <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.logical(x)
+}
+
+# String functions
+nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
+  if (allowNA) {
+    arrow_not_supported("allowNA = TRUE")
+  }
+  if (is.na(keepNA)) {
+    keepNA <- !identical(type, "width")
+  }
+  if (!keepNA) {
+    # TODO: I think there is a fill_null kernel we could use, set null to 2
+    arrow_not_supported("keepNA = TRUE")
+  }
+  if (identical(type, "bytes")) {
+    Expression$create("binary_length", x)
+  } else {
+    Expression$create("utf8_length", x)
+  }
+}
+
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
+  assert_that(
+    is.null(collapse),
+    msg = "paste() with the collapse argument is not yet supported in Arrow"
+  )
+  if (!inherits(sep, "Expression")) {
+    assert_that(!is.na(sep), msg = "Invalid separator")
+  }
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(
+    is.null(collapse),
+    msg = "paste0() with the collapse argument is not yet supported in Arrow"
+  )
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(
+    is.null(collapse),
+    msg = "str_c() with the collapse argument is not yet supported in Arrow"
+  )
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) {
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string concatenation"
+        )
+        Expression$scalar(as.character(arg))
+      } else {
+        nse_funcs$as.character(arg)
+      }
+    })
+    Expression$create(
+      "binary_join_element_wise",
+      args = args,
+      options = list(
+        null_handling = null_handling,
+        null_replacement = null_replacement
+      )
+    )
+  }
+}
+
+nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
+  side <- match.arg(side)
+  trim_fun <- switch(side,
+    left = "utf8_ltrim_whitespace",
+    right = "utf8_rtrim_whitespace",
+    both = "utf8_trim_whitespace"
+  )
+  Expression$create(trim_fun, string)
+}
+
+nse_funcs$substr <- function(x, start, stop) {
+  assert_that(
+    length(start) == 1,
+    msg = "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+  assert_that(
+    length(stop) == 1,
+    msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  # substr treats values as if they're on a continous number line, so values
+  # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
+  # this behavior
+  if (start <= 0) {
+    start <- 1
+  }
+
+  # if `stop` is lower than `start`, this is invalid, so set `stop` to
+  # 0 so that an empty string will be returned (consistent with base::substr())
+  if (stop < start) {
+    stop <- 0
+  }
+
+  Expression$create(
+    "utf8_slice_codeunits",
+    x,
+    # we don't need to subtract 1 from `stop` as C++ counts exclusively
+    # which effectively cancels out the difference in indexing between R & C++
+    options = list(start = start - 1L, stop = stop)
+  )
+}
+
+nse_funcs$substring <- function(text, first, last) {
+  nse_funcs$substr(x = text, start = first, stop = last)
+}
+
+nse_funcs$str_sub <- function(string, start = 1L, end = -1L) {
+  assert_that(
+    length(start) == 1,
+    msg = "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+  assert_that(
+    length(end) == 1,
+    msg = "`end` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  # In stringr::str_sub, an `end` value of -1 means the end of the string, so
+  # set it to the maximum integer to match this behavior
+  if (end == -1) {
+    end <- .Machine$integer.max
+  }
+
+  # An end value lower than a start value returns an empty string in
+  # stringr::str_sub so set end to 0 here to match this behavior
+  if (end < start) {
+    end <- 0
+  }
+
+  # subtract 1 from `start` because C++ is 0-based and R is 1-based
+  # str_sub treats a `start` value of 0 or 1 as the same thing so don't subtract 1 when `start` == 0
+  # when `start` < 0, both str_sub and utf8_slice_codeunits count backwards from the end
+  if (start > 0) {
+    start <- start - 1L
+  }
+
+  Expression$create(
+    "utf8_slice_codeunits",
+    string,
+    options = list(start = start, stop = end)
+  )
+}
+
+nse_funcs$grepl <- function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
+  arrow_fun <- ifelse(fixed, "match_substring", "match_substring_regex")
+  Expression$create(
+    arrow_fun,
+    x,
+    options = list(pattern = pattern, ignore_case = ignore.case)
+  )
+}
+
+nse_funcs$str_detect <- function(string, pattern, negate = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  out <- nse_funcs$grepl(
+    pattern = opts$pattern,
+    x = string,
+    ignore.case = opts$ignore_case,
+    fixed = opts$fixed
+  )
+  if (negate) {
+    out <- !out
+  }
+  out
+}
+
+nse_funcs$str_like <- function(string, pattern, ignore_case = TRUE) {
+  Expression$create(
+    "match_like",
+    string,
+    options = list(pattern = pattern, ignore_case = ignore_case)
+  )
+}
+
+# Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
+arrow_r_string_replace_function <- function(max_replacements) {
+  function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
+    Expression$create(
+      ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"),
+      x,
+      options = list(
+        pattern = format_string_pattern(pattern, ignore.case, fixed),
+        replacement = format_string_replacement(replacement, ignore.case, fixed),
+        max_replacements = max_replacements
+      )
+    )
+  }
+}
+
+arrow_stringr_string_replace_function <- function(max_replacements) {
+  function(string, pattern, replacement) {
+    opts <- get_stringr_pattern_options(enexpr(pattern))
+    arrow_r_string_replace_function(max_replacements)(
+      pattern = opts$pattern,
+      replacement = replacement,
+      x = string,
+      ignore.case = opts$ignore_case,
+      fixed = opts$fixed
+    )
+  }
+}
+
+nse_funcs$sub <- arrow_r_string_replace_function(1L)
+nse_funcs$gsub <- arrow_r_string_replace_function(-1L)
+nse_funcs$str_replace <- arrow_stringr_string_replace_function(1L)
+nse_funcs$str_replace_all <- arrow_stringr_string_replace_function(-1L)
+
+nse_funcs$strsplit <- function(x,
+                               split,
+                               fixed = FALSE,
+                               perl = FALSE,
+                               useBytes = FALSE) {
+  assert_that(is.string(split))
+
+  arrow_fun <- ifelse(fixed, "split_pattern", "split_pattern_regex")
+  # warn when the user specifies both fixed = TRUE and perl = TRUE, for
+  # consistency with the behavior of base::strsplit()
+  if (fixed && perl) {
+    warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
+  }
+  # since split is not a regex, proceed without any warnings or errors regardless
+  # of the value of perl, for consistency with the behavior of base::strsplit()
+  Expression$create(
+    arrow_fun,
+    x,
+    options = list(pattern = split, reverse = FALSE, max_splits = -1L)
+  )
+}
+
+nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex")
+  if (opts$ignore_case) {
+    arrow_not_supported("Case-insensitive string splitting")
+  }
+  if (n == 0) {
+    arrow_not_supported("Splitting strings into zero parts")
+  }
+  if (identical(n, Inf)) {
+    n <- 0L
+  }
+  if (simplify) {
+    warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
+  }
+  # The max_splits option in the Arrow C++ library controls the maximum number
+  # of places at which the string is split, whereas the argument n to
+  # str_split() controls the maximum number of pieces to return. So we must
+  # subtract 1 from n to get max_splits.
+  Expression$create(
+    arrow_fun,
+    string,
+    options = list(
+      pattern = opts$pattern,
+      reverse = FALSE,
+      max_splits = n - 1L
+    )
+  )
+}
+
+nse_funcs$pmin <- function(..., na.rm = FALSE) {
+  build_expr(
+    "min_element_wise",
+    ...,
+    options = list(skip_nulls = na.rm)
+  )
+}
+
+nse_funcs$pmax <- function(..., na.rm = FALSE) {
+  build_expr(
+    "max_element_wise",
+    ...,
+    options = list(skip_nulls = na.rm)
+  )
+}
+
+nse_funcs$str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ") {
+  assert_that(is_integerish(width))
+  side <- match.arg(side)
+  assert_that(is.string(pad))
+
+  if (side == "left") {
+    pad_func <- "utf8_lpad"
+  } else if (side == "right") {
+    pad_func <- "utf8_rpad"
+  } else if (side == "both") {
+    pad_func <- "utf8_center"
+  }
+
+  Expression$create(
+    pad_func,
+    string,
+    options = list(width = width, padding = pad)
+  )
+}
+
+# String function helpers
+
+# format `pattern` as needed for case insensitivity and literal matching by RE2
+format_string_pattern <- function(pattern, ignore.case, fixed) {
+  # Arrow lacks native support for case-insensitive literal string matching and
+  # replacement, so we use the regular expression engine (RE2) to do this.
+  # https://github.com/google/re2/wiki/Syntax
+  if (ignore.case) {
+    if (fixed) {
+      # Everything between "\Q" and "\E" is treated as literal text.
+      # If the search text contains any literal "\E" strings, make them
+      # lowercase so they won't signal the end of the literal text:
+      pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE)
+      pattern <- paste0("\\Q", pattern, "\\E")
+    }
+    # Prepend "(?i)" for case-insensitive matching
+    pattern <- paste0("(?i)", pattern)
+  }
+  pattern
+}
+
+# format `replacement` as needed for literal replacement by RE2
+format_string_replacement <- function(replacement, ignore.case, fixed) {
+  # Arrow lacks native support for case-insensitive literal string
+  # replacement, so we use the regular expression engine (RE2) to do this.
+  # https://github.com/google/re2/wiki/Syntax
+  if (ignore.case && fixed) {
+    # Escape single backslashes in the regex replacement text so they are
+    # interpreted as literal backslashes:
+    replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE)
+  }
+  replacement
+}
+
+#' Get `stringr` pattern options
+#'
+#' This function assigns definitions for the `stringr` pattern modifier
+#' functions (`fixed()`, `regex()`, etc.) inside itself, and uses them to
+#' evaluate the quoted expression `pattern`, returning a list that is used
+#' to control pattern matching behavior in internal `arrow` functions.
+#'
+#' @param pattern Unevaluated expression containing a call to a `stringr`
+#' pattern modifier function
+#'
+#' @return List containing elements `pattern`, `fixed`, and `ignore_case`
+#' @keywords internal
+get_stringr_pattern_options <- function(pattern) {
+  fixed <- function(pattern, ignore_case = FALSE, ...) {
+    check_dots(...)
+    list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case)
+  }
+  regex <- function(pattern, ignore_case = FALSE, ...) {
+    check_dots(...)
+    list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case)
+  }
+  coll <- function(...) {
+    arrow_not_supported("Pattern modifier `coll()`")
+  }
+  boundary <- function(...) {
+    arrow_not_supported("Pattern modifier `boundary()`")
+  }
+  check_dots <- function(...) {
+    dots <- list(...)
+    if (length(dots)) {
+      warning(
+        "Ignoring pattern modifier ",
+        ngettext(length(dots), "argument ", "arguments "),
+        "not supported in Arrow: ",
+        oxford_paste(names(dots)),
+        call. = FALSE
+      )
+    }
+  }
+  ensure_opts <- function(opts) {
+    if (is.character(opts)) {
+      opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
+    }
+    opts
+  }
+  ensure_opts(eval(pattern))
+}
+
+#' Does this string contain regex metacharacters?
+#'
+#' @param string String to be tested
+#' @keywords internal
+#' @return Logical: does `string` contain regex metacharacters?
+contains_regex <- function(string) {
+  grepl("[.\\|()[{^$*+?]", string)
+}
+
+nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit = "ms") {
+  # Arrow uses unit for time parsing, strptime() does not.
+  # Arrow has no default option for strptime (format, unit),
+  # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms",
+  # (ARROW-12809)
+
+  # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820).
+  # Stop if tz is provided.
+  if (is.character(tz)) {
+    arrow_not_supported("Time zone argument")
+  }
+
+  unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units))
+
+  Expression$create("strptime", x, options = list(format = format, unit = unit))
+}
+
+nse_funcs$second <- function(x) {
+  Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x))
+}
+
+nse_funcs$trunc <- function(x, ...) {
+  # accepts and ignores ... for consistency with base::trunc()
+  build_expr("trunc", x)
+}
+
+nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {
+
+  # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
+  # When the ticket below is resolved, we should be able to support the label argument
+  # https://issues.apache.org/jira/browse/ARROW-13133
+  if (label) {
+    arrow_not_supported("Label argument")
+  }
+
+  Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start))
+}
+
+nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {
+  if (base == exp(1)) {
+    return(Expression$create("ln_checked", x))
+  }
+
+  if (base == 2) {
+    return(Expression$create("log2_checked", x))
+  }
+
+  if (base == 10) {
+    return(Expression$create("log10_checked", x))
+  }
+  # ARROW-13345
+  arrow_not_supported("`base` values other than exp(1), 2 and 10")
+}
+
+nse_funcs$if_else <- function(condition, true, false, missing = NULL) {
+  if (!is.null(missing)) {
+    return(nse_funcs$if_else(
+      nse_funcs$is.na(condition),
+      missing,
+      nse_funcs$if_else(condition, true, false)
+    ))
+  }
+
+  # if_else doesn't yet support factors/dictionaries
+  # TODO: remove this after ARROW-13358 is merged
+  warn_types <- nse_funcs$is.factor(true) | nse_funcs$is.factor(false)
+  if (warn_types) {
+    warning(
+      "Dictionaries (in R: factors) are currently converted to strings (characters) ",
+      "in if_else and ifelse",
+      call. = FALSE
+    )
+  }
+
+  build_expr("if_else", condition, true, false)
+}
+
+# Although base R ifelse allows `yes` and `no` to be different classes
+#
+nse_funcs$ifelse <- function(test, yes, no) {
+  nse_funcs$if_else(condition = test, true = yes, false = no)
+}
+
+nse_funcs$case_when <- function(...) {
+  formulas <- list2(...)
+  n <- length(formulas)
+  if (n == 0) {
+    abort("No cases provided in case_when()")
+  }
+  query <- vector("list", n)
+  value <- vector("list", n)
+  mask <- caller_env()
+  for (i in seq_len(n)) {
+    f <- formulas[[i]]
+    if (!inherits(f, "formula")) {
+      abort("Each argument to case_when() must be a two-sided formula")
+    }
+    query[[i]] <- arrow_eval(f[[2]], mask)
+    value[[i]] <- arrow_eval(f[[3]], mask)
+    if (!nse_funcs$is.logical(query[[i]])) {
+      abort("Left side of each formula in case_when() must be a logical expression")
+    }
+  }
+  build_expr(
+    "case_when",
+    args = c(
+      build_expr(
+        "make_struct",
+        args = query,
+        options = list(field_names = as.character(seq_along(query)))
+      ),
+      value
+    )
+  )
+}
+
+# Aggregation functions
+# These all return a list of:
+# @param fun string function name
+# @param data Expression (these are all currently a single field)
+# @param options list of function options, as passed to call_function
+# For group-by aggregation, `hash_` gets prepended to the function name.
+# So to see a list of available hash aggregation functions,
+# you can use list_compute_functions("^hash_")
+agg_funcs <- list()
+agg_funcs$sum <- function(x, na.rm = FALSE) {
+  list(
+    fun = "sum",
+    data = x,
+    options = arrow_na_rm(na.rm = na.rm)
+  )
+}
+agg_funcs$any <- function(x, na.rm = FALSE) {
+  list(
+    fun = "any",
+    data = x,
+    options = arrow_na_rm(na.rm)
+  )
+}
+agg_funcs$all <- function(x, na.rm = FALSE) {
+  list(
+    fun = "all",
+    data = x,
+    options = arrow_na_rm(na.rm)
+  )
+}
+
+arrow_na_rm <- function(na.rm) {
+  if (!isTRUE(na.rm)) {
+    # TODO: ARROW-13497
+    arrow_not_supported(paste("na.rm =", na.rm))
+  }
+  list(na.rm = na.rm, na.min_count = 0L)
+}
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
new file mode 100644
index 00000000000..42cca039022
--- /dev/null
+++ b/r/R/dplyr-group-by.R
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+group_by.arrow_dplyr_query <- function(.data,
+                                       ...,
+                                       .add = FALSE,
+                                       add = .add,
+                                       .drop = dplyr::group_by_drop_default(.data)) {
+  .data <- arrow_dplyr_query(.data)
+  new_groups <- enquos(...)
+  # ... can contain expressions (i.e. can add (or rename?) columns) and so we
+  # need to identify those and add them on to the query with mutate. Specifically,
+  # we want to mark as new:
+  #   * expressions (named or otherwise)
+  #   * variables that have new names
+  # All others (i.e. simple references to variables) should not be (re)-added
+  new_group_ind <- map_lgl(new_groups, ~ !(quo_name(.x) %in% names(.data)))
+  named_group_ind <- map_lgl(names(new_groups), nzchar)
+  new_groups <- new_groups[new_group_ind | named_group_ind]
+  if (length(new_groups)) {
+    # now either use the name that was given in ... or if that is "" then use the expr
+    names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x), .y))
+
+    # Add them to the data
+    .data <- dplyr::mutate(.data, !!!new_groups)
+  }
+  if (".add" %in% names(formals(dplyr::group_by))) {
+    # For compatibility with dplyr >= 1.0
+    gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
+  } else {
+    gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
+  }
+  .data$group_by_vars <- gv
+  .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
+  .data
+}
+group_by.Dataset <- group_by.ArrowTabular <- group_by.arrow_dplyr_query
+
+groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
+groups.Dataset <- groups.ArrowTabular <- function(x) NULL
+
+group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
+group_vars.Dataset <- group_vars.ArrowTabular <- function(x) NULL
+
+# the logical literal in the two functions below controls the default value of
+# the .drop argument to group_by()
+group_by_drop_default.arrow_dplyr_query <-
+  function(.tbl) .tbl$drop_empty_groups %||% TRUE
+group_by_drop_default.Dataset <- group_by_drop_default.ArrowTabular <-
+  function(.tbl) TRUE
+
+ungroup.arrow_dplyr_query <- function(x, ...) {
+  x$group_by_vars <- character()
+  x$drop_empty_groups <- NULL
+  x
+}
+ungroup.Dataset <- ungroup.ArrowTabular <- force
diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
new file mode 100644
index 00000000000..f19505c1958
--- /dev/null
+++ b/r/R/dplyr-mutate.R
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+mutate.arrow_dplyr_query <- function(.data,
+                                     ...,
+                                     .keep = c("all", "used", "unused", "none"),
+                                     .before = NULL,
+                                     .after = NULL) {
+  call <- match.call()
+  exprs <- quos(...)
+
+  .keep <- match.arg(.keep)
+  .before <- enquo(.before)
+  .after <- enquo(.after)
+
+  if (.keep %in% c("all", "unused") && length(exprs) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+
+  .data <- arrow_dplyr_query(.data)
+
+  # Restrict the cases we support for now
+  if (length(dplyr::group_vars(.data)) > 0) {
+    # mutate() on a grouped dataset does calculations within groups
+    # This doesn't matter on scalar ops (arithmetic etc.) but it does
+    # for things with aggregations (e.g. subtracting the mean)
+    return(abandon_ship(call, .data, "mutate() on grouped data not supported in Arrow"))
+  }
+
+  # Check for unnamed expressions and fix if any
+  unnamed <- !nzchar(names(exprs))
+  # Deparse and take the first element in case they're long expressions
+  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
+
+  mask <- arrow_mask(.data)
+  results <- list()
+  for (i in seq_along(exprs)) {
+    # Iterate over the indices and not the names because names may be repeated
+    # (which overwrites the previous name)
+    new_var <- names(exprs)[i]
+    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(results[[new_var]], "try-error")) {
+      msg <- handle_arrow_not_supported(
+        results[[new_var]],
+        as_label(exprs[[i]])
+      )
+      return(abandon_ship(call, .data, msg))
+    } else if (!inherits(results[[new_var]], "Expression") &&
+      !is.null(results[[new_var]])) {
+      # We need some wrapping to handle literal values
+      if (length(results[[new_var]]) != 1) {
+        msg <- paste0("In ", new_var, " = ", as_label(exprs[[i]]), ", only values of size one are recycled")
+        return(abandon_ship(call, .data, msg))
+      }
+      results[[new_var]] <- Expression$scalar(results[[new_var]])
+    }
+    # Put it in the data mask too
+    mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+  }
+
+  old_vars <- names(.data$selected_columns)
+  # Note that this is names(exprs) not names(results):
+  # if results$new_var is NULL, that means we are supposed to remove it
+  new_vars <- names(exprs)
+
+  # Assign the new columns into the .data$selected_columns
+  for (new_var in new_vars) {
+    .data$selected_columns[[new_var]] <- results[[new_var]]
+  }
+
+  # Deduplicate new_vars and remove NULL columns from new_vars
+  new_vars <- intersect(new_vars, names(.data$selected_columns))
+
+  # Respect .before and .after
+  if (!quo_is_null(.before) || !quo_is_null(.after)) {
+    new <- setdiff(new_vars, old_vars)
+    .data <- dplyr::relocate(.data, !!new, .before = !!.before, .after = !!.after)
+  }
+
+  # Respect .keep
+  if (.keep == "none") {
+    .data$selected_columns <- .data$selected_columns[new_vars]
+  } else if (.keep != "all") {
+    # "used" or "unused"
+    used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE)
+    if (.keep == "used") {
+      .data$selected_columns[setdiff(old_vars, used_vars)] <- NULL
+    } else {
+      # "unused"
+      .data$selected_columns[intersect(old_vars, used_vars)] <- NULL
+    }
+  }
+  # Even if "none", we still keep group vars
+  ensure_group_vars(.data)
+}
+mutate.Dataset <- mutate.ArrowTabular <- mutate.arrow_dplyr_query
+
+transmute.arrow_dplyr_query <- function(.data, ...) {
+  dots <- check_transmute_args(...)
+  dplyr::mutate(.data, !!!dots, .keep = "none")
+}
+transmute.Dataset <- transmute.ArrowTabular <- transmute.arrow_dplyr_query
+
+# This function is a copy of dplyr:::check_transmute_args at
+# https://github.com/tidyverse/dplyr/blob/master/R/mutate.R
+check_transmute_args <- function(..., .keep, .before, .after) {
+  if (!missing(.keep)) {
+    abort("`transmute()` does not support the `.keep` argument")
+  }
+  if (!missing(.before)) {
+    abort("`transmute()` does not support the `.before` argument")
+  }
+  if (!missing(.after)) {
+    abort("`transmute()` does not support the `.after` argument")
+  }
+  enquos(...)
+}
diff --git a/r/R/dplyr-select.R b/r/R/dplyr-select.R
new file mode 100644
index 00000000000..ee740db4cfb
--- /dev/null
+++ b/r/R/dplyr-select.R
@@ -0,0 +1,125 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
+
+select.arrow_dplyr_query <- function(.data, ...) {
+  check_select_helpers(enexprs(...))
+  column_select(arrow_dplyr_query(.data), !!!enquos(...))
+}
+select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
+
+rename.arrow_dplyr_query <- function(.data, ...) {
+  check_select_helpers(enexprs(...))
+  column_select(arrow_dplyr_query(.data), !!!enquos(...), .FUN = vars_rename)
+}
+rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
+
+column_select <- function(.data, ..., .FUN = vars_select) {
+  # .FUN is either tidyselect::vars_select or tidyselect::vars_rename
+  # It operates on the names() of selected_columns, i.e. the column names
+  # factoring in any renaming that may already have happened
+  out <- .FUN(names(.data), !!!enquos(...))
+  # Make sure that the resulting selected columns map back to the original data,
+  # as in when there are multiple renaming steps
+  .data$selected_columns <- set_names(.data$selected_columns[out], names(out))
+
+  # If we've renamed columns, we need to project that renaming into other
+  # query parameters we've collected
+  renamed <- out[names(out) != out]
+  if (length(renamed)) {
+    # Massage group_by
+    gbv <- .data$group_by_vars
+    renamed_groups <- gbv %in% renamed
+    gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)]
+    .data$group_by_vars <- gbv
+    # No need to massage filters because those contain references to Arrow objects
+  }
+  .data
+}
+
+relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) {
+  # The code in this function is adapted from the code in dplyr::relocate.data.frame
+  # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
+  # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
+
+  .data <- arrow_dplyr_query(.data)
+
+  # Assign the schema to the expressions
+  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
+
+  # Create a mask for evaluating expressions in tidyselect helpers
+  mask <- new_environment(.cache$functions, parent = caller_env())
+
+  to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask)
+
+  .before <- enquo(.before)
+  .after <- enquo(.after)
+  has_before <- !quo_is_null(.before)
+  has_after <- !quo_is_null(.after)
+
+  if (has_before && has_after) {
+    abort("Must supply only one of `.before` and `.after`.")
+  } else if (has_before) {
+    where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask)))
+    if (!where %in% to_move) {
+      to_move <- c(to_move, where)
+    }
+  } else if (has_after) {
+    where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask)))
+    if (!where %in% to_move) {
+      to_move <- c(where, to_move)
+    }
+  } else {
+    where <- 1L
+    if (!where %in% to_move) {
+      to_move <- c(to_move, where)
+    }
+  }
+
+  lhs <- setdiff(seq2(1, where - 1), to_move)
+  rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move)
+
+  pos <- vec_unique(c(lhs, to_move, rhs))
+  new_names <- names(pos)
+  .data$selected_columns <- .data$selected_columns[pos]
+
+  if (!is.null(new_names)) {
+    names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""]
+  }
+  .data
+}
+relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query
+
+check_select_helpers <- function(exprs) {
+  # Throw an error if unsupported tidyselect selection helpers in `exprs`
+  exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x)
+  unsup_select_helpers <- "where"
+  funs_in_exprs <- unlist(lapply(exprs, all_funs))
+  unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers]
+  if (length(unsup_funs)) {
+    stop(
+      "Unsupported selection ",
+      ngettext(length(unsup_funs), "helper: ", "helpers: "),
+      oxford_paste(paste0(unsup_funs, "()"), quote = FALSE),
+      call. = FALSE
+    )
+  }
+}
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
new file mode 100644
index 00000000000..87e1157dfc7
--- /dev/null
+++ b/r/R/dplyr-summarize.R
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb")) {
+  call <- match.call()
+  .data <- arrow_dplyr_query(.data)
+  exprs <- quos(...)
+  # Only retain the columns we need to do our aggregations
+  vars_to_keep <- unique(c(
+    unlist(lapply(exprs, all.vars)), # vars referenced in summarise
+    dplyr::group_vars(.data) # vars needed for grouping
+  ))
+  .data <- dplyr::select(.data, vars_to_keep)
+  if (match.arg(.engine) == "duckdb") {
+    dplyr::summarise(to_duckdb(.data), ...)
+  } else {
+    # Try stuff, if successful return()
+    out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
+    if (inherits(out, "try-error")) {
+      return(abandon_ship(call, .data, format(out)))
+    } else {
+      return(out)
+    }
+  }
+}
+summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
+
+do_arrow_summarize <- function(.data, ..., .groups = NULL) {
+  if (!is.null(.groups)) {
+    # ARROW-13550
+    abort("`summarize()` with `.groups` argument not supported in Arrow")
+  }
+  exprs <- quos(...)
+  # Check for unnamed expressions and fix if any
+  unnamed <- !nzchar(names(exprs))
+  # Deparse and take the first element in case they're long expressions
+  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
+
+  mask <- arrow_mask(.data, aggregation = TRUE)
+
+  results <- list()
+  for (i in seq_along(exprs)) {
+    # Iterate over the indices and not the names because names may be repeated
+    # (which overwrites the previous name)
+    new_var <- names(exprs)[i]
+    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(results[[new_var]], "try-error")) {
+      msg <- handle_arrow_not_supported(
+        results[[new_var]],
+        as_label(exprs[[i]])
+      )
+      stop(msg, call. = FALSE)
+    }
+    # Put it in the data mask too?
+    # Should we: mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+  }
+
+  # Now, from that, split out the data (expressions) and options
+  .data$aggregations <- lapply(results, function(x) x[c("fun", "options")])
+
+  inputs <- lapply(results, function(x) x$data)
+  # This is essentially a projection, and the column names don't matter
+  # (but must exist)
+  names(inputs) <- as.character(seq_along(inputs))
+  .data$selected_columns <- inputs
+
+  # Eventually, we will return .data here if (dataset) but do it eagerly now
+  do_exec_plan(.data, group_vars = dplyr::group_vars(.data))
+}
+
+do_exec_plan <- function(.data, group_vars = NULL) {
+  plan <- ExecPlan$create()
+
+  grouped <- length(group_vars) > 0
+
+  # Collect the target names first because we have to add back the group vars
+  target_names <- names(.data)
+
+  if (grouped) {
+    .data <- ensure_group_vars(.data)
+    # We also need to prefix all of the aggregation function names with "hash_"
+    .data$aggregations <- lapply(.data$aggregations, function(x) {
+      x[["fun"]] <- paste0("hash_", x[["fun"]])
+      x
+    })
+  }
+
+  start_node <- plan$Scan(.data)
+  # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
+  if (inherits(.data$filtered_rows, "Expression")) {
+    start_node <- start_node$Filter(.data$filtered_rows)
+  }
+  # If any columns are derived we need to Project (otherwise this may be no-op)
+  project_node <- start_node$Project(.data$selected_columns)
+
+  if (grouped) {
+    final_node <- project_node$GroupByAggregate(
+      group_vars,
+      target_names = target_names,
+      aggregations = .data$aggregations
+    )
+    out <- plan$Run(final_node)
+    # The result will have result columns first (named by their function)
+    # then the grouping cols. dplyr orders group cols first, and it accepts
+    # names for the result cols. Adapt the result to meet that expectation.
+    n_results <- length(.data$aggregations)
+    names(out)[seq_along(.data$aggregations)] <- names(.data$aggregations)
+    out <- out[c((n_results + 1):ncol(out), seq_along(.data$aggregations))]
+  } else {
+    final_node <- project_node$ScalarAggregate(
+      options = .data$aggregations,
+      target_names = target_names,
+      out_field_names = names(.data$aggregations)
+    )
+    out <- plan$Run(final_node)
+  }
+  out
+}
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 845cb3a1815..b2793bdb3c3 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -30,14 +30,31 @@ arrow_dplyr_query <- function(.data) {
   if (inherits(.data, "arrow_dplyr_query")) {
     return(.data)
   }
+
+  # Evaluating expressions on a dataset with duplicated fieldnames will error
+  dupes <- duplicated(names(.data))
+  if (any(dupes)) {
+    abort(c(
+      "Duplicated field names",
+      x = paste0(
+        "The following field names were found more than once in the data: ",
+        oxford_paste(names(.data)[dupes])
+      )
+    ))
+  }
+
   structure(
     list(
-      .data = .data$clone(),
+      .data = if (inherits(.data, "Dataset")) {
+        .data$clone()
+      } else {
+        InMemoryDataset$create(.data)
+      },
       # selected_columns is a named list:
       # * contents are references/expressions pointing to the data
       # * names are the names they should be in the end (i.e. this
       #   records any renaming)
-      selected_columns = make_field_refs(names(.data), dataset = inherits(.data, "Dataset")),
+      selected_columns = make_field_refs(names(.data)),
       # filtered_rows will be an Expression
       filtered_rows = TRUE,
       # group_by_vars is a character vector of columns (as renamed)
@@ -58,40 +75,39 @@ arrow_dplyr_query <- function(.data) {
   )
 }
 
+make_field_refs <- function(field_names) {
+  set_names(lapply(field_names, Expression$field_ref), field_names)
+}
+
 #' @export
 print.arrow_dplyr_query <- function(x, ...) {
   schm <- x$.data$schema
-  cols <- get_field_names(x)
-  # If cols are expressions, they won't be in the schema and will be "" in cols
-  fields <- map_chr(cols, function(name) {
+  types <- map_chr(x$selected_columns, function(expr) {
+    name <- expr$field_name
     if (nzchar(name)) {
-      schm$GetFieldByName(name)$ToString()
+      # Just a field_ref, so look up in the schema
+      schm$GetFieldByName(name)$type$ToString()
     } else {
-      "expr"
+      # Expression, so get its type and append the expression
+      paste0(
+        expr$type(schm)$ToString(),
+        " (", expr$ToString(), ")"
+      )
     }
   })
-  # Strip off the field names as they are in the dataset and add the renamed ones
-  fields <- paste(names(cols), sub("^.*?: ", "", fields), sep = ": ", collapse = "\n")
+  fields <- paste(names(types), types, sep = ": ", collapse = "\n")
   cat(class(x$.data)[1], " (query)\n", sep = "")
   cat(fields, "\n", sep = "")
   cat("\n")
   if (!isTRUE(x$filtered_rows)) {
-    if (query_on_dataset(x)) {
-      filter_string <- x$filtered_rows$ToString()
-    } else {
-      filter_string <- .format_array_expression(x$filtered_rows)
-    }
+    filter_string <- x$filtered_rows$ToString()
     cat("* Filter: ", filter_string, "\n", sep = "")
   }
   if (length(x$group_by_vars)) {
     cat("* Grouped by ", paste(x$group_by_vars, collapse = ", "), "\n", sep = "")
   }
   if (length(x$arrange_vars)) {
-    if (query_on_dataset(x)) {
-      arrange_strings <- map_chr(x$arrange_vars, function(x) x$ToString())
-    } else {
-      arrange_strings <- map_chr(x$arrange_vars, .format_array_expression)
-    }
+    arrange_strings <- map_chr(x$arrange_vars, function(x) x$ToString())
     cat(
       "* Sorted by ",
       paste(
@@ -109,33 +125,6 @@ print.arrow_dplyr_query <- function(x, ...) {
   invisible(x)
 }
 
-get_field_names <- function(selected_cols) {
-  if (inherits(selected_cols, "arrow_dplyr_query")) {
-    selected_cols <- selected_cols$selected_columns
-  }
-  map_chr(selected_cols, function(x) {
-    if (inherits(x, "Expression")) {
-      out <- x$field_name
-    } else if (inherits(x, "array_expression")) {
-      out <- x$args$field_name
-    } else {
-      out <- NULL
-    }
-    # If x isn't some kind of field reference, out is NULL,
-    # but we always need to return a string
-    out %||% ""
-  })
-}
-
-make_field_refs <- function(field_names, dataset = TRUE) {
-  if (dataset) {
-    out <- lapply(field_names, Expression$field_ref)
-  } else {
-    out <- lapply(field_names, function(x) array_expression("array_ref", field_name = x))
-  }
-  set_names(out, field_names)
-}
-
 # These are the names reflecting all select/rename, not what is in Arrow
 #' @export
 names.arrow_dplyr_query <- function(x) names(x$selected_columns)
@@ -146,13 +135,8 @@ dim.arrow_dplyr_query <- function(x) {
 
   if (isTRUE(x$filtered)) {
     rows <- x$.data$num_rows
-  } else if (query_on_dataset(x)) {
-    warning("Number of rows unknown; returning NA", call. = FALSE)
-    # TODO: https://issues.apache.org/jira/browse/ARROW-9697
-    rows <- NA_integer_
   } else {
-    # Evaluate the filter expression to a BooleanArray and count
-    rows <- as.integer(sum(eval_array_expression(x$filtered_rows, x$.data), na.rm = TRUE))
+    rows <- Scanner$create(x)$CountRows()
   }
   c(rows, cols)
 }
@@ -164,566 +148,19 @@ as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALS
 
 #' @export
 head.arrow_dplyr_query <- function(x, n = 6L, ...) {
-  if (query_on_dataset(x)) {
-    head.Dataset(x, n, ...)
-  } else {
-    out <- collect.arrow_dplyr_query(x, as_data_frame = FALSE)
-    if (inherits(out, "arrow_dplyr_query")) {
-      out$.data <- head(out$.data, n)
-    } else {
-      out <- head(out, n)
-    }
-    out
-  }
+  out <- head.Dataset(x, n, ...)
+  restore_dplyr_features(out, x)
 }
 
 #' @export
 tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
-  if (query_on_dataset(x)) {
-    tail.Dataset(x, n, ...)
-  } else {
-    out <- collect.arrow_dplyr_query(x, as_data_frame = FALSE)
-    if (inherits(out, "arrow_dplyr_query")) {
-      out$.data <- tail(out$.data, n)
-    } else {
-      out <- tail(out, n)
-    }
-    out
-  }
+  out <- tail.Dataset(x, n, ...)
+  restore_dplyr_features(out, x)
 }
 
 #' @export
-`[.arrow_dplyr_query` <- function(x, i, j, ..., drop = FALSE) {
-  if (query_on_dataset(x)) {
-    `[.Dataset`(x, i, j, ..., drop = FALSE)
-  } else {
-    stop(
-      "[ method not implemented for queries. Call 'collect(x, as_data_frame = FALSE)' first",
-      call. = FALSE
-    )
-  }
-}
-
-# The following S3 methods are registered on load if dplyr is present
-tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
-
-select.arrow_dplyr_query <- function(.data, ...) {
-  check_select_helpers(enexprs(...))
-  column_select(arrow_dplyr_query(.data), !!!enquos(...))
-}
-select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
-
-rename.arrow_dplyr_query <- function(.data, ...) {
-  check_select_helpers(enexprs(...))
-  column_select(arrow_dplyr_query(.data), !!!enquos(...), .FUN = vars_rename)
-}
-rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
-
-column_select <- function(.data, ..., .FUN = vars_select) {
-  # .FUN is either tidyselect::vars_select or tidyselect::vars_rename
-  # It operates on the names() of selected_columns, i.e. the column names
-  # factoring in any renaming that may already have happened
-  out <- .FUN(names(.data), !!!enquos(...))
-  # Make sure that the resulting selected columns map back to the original data,
-  # as in when there are multiple renaming steps
-  .data$selected_columns <- set_names(.data$selected_columns[out], names(out))
-
-  # If we've renamed columns, we need to project that renaming into other
-  # query parameters we've collected
-  renamed <- out[names(out) != out]
-  if (length(renamed)) {
-    # Massage group_by
-    gbv <- .data$group_by_vars
-    renamed_groups <- gbv %in% renamed
-    gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)]
-    .data$group_by_vars <- gbv
-    # No need to massage filters because those contain references to Arrow objects
-  }
-  .data
-}
-
-relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) {
-  # The code in this function is adapted from the code in dplyr::relocate.data.frame
-  # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
-  # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
-  check_select_helpers(c(enexprs(...), enexpr(.before), enexpr(.after)))
-
-  .data <- arrow_dplyr_query(.data)
-
-  to_move <- eval_select(expr(c(...)), .data$selected_columns)
-
-  .before <- enquo(.before)
-  .after <- enquo(.after)
-  has_before <- !quo_is_null(.before)
-  has_after <- !quo_is_null(.after)
-
-  if (has_before && has_after) {
-    abort("Must supply only one of `.before` and `.after`.")
-  } else if (has_before) {
-    where <- min(unname(eval_select(.before, .data$selected_columns)))
-    if (!where %in% to_move) {
-      to_move <- c(to_move, where)
-    }
-  } else if (has_after) {
-    where <- max(unname(eval_select(.after, .data$selected_columns)))
-    if (!where %in% to_move) {
-      to_move <- c(where, to_move)
-    }
-  } else {
-    where <- 1L
-    if (!where %in% to_move) {
-      to_move <- c(to_move, where)
-    }
-  }
-
-  lhs <- setdiff(seq2(1, where - 1), to_move)
-  rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move)
-
-  pos <- vec_unique(c(lhs, to_move, rhs))
-  new_names <- names(pos)
-  .data$selected_columns <- .data$selected_columns[pos]
-
-  if (!is.null(new_names)) {
-    names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""]
-  }
-  .data
-}
-relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query
-
-check_select_helpers <- function(exprs) {
-  # Throw an error if unsupported tidyselect selection helpers in `exprs`
-  exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x)
-  unsup_select_helpers <- "where"
-  funs_in_exprs <- unlist(lapply(exprs, all_funs))
-  unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers]
-  if (length(unsup_funs)) {
-    stop(
-      "Unsupported selection ",
-      ngettext(length(unsup_funs), "helper: ", "helpers: "),
-      oxford_paste(paste0(unsup_funs, "()"), quote = FALSE),
-      call. = FALSE
-    )
-  }
-}
-
-filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
-  # TODO something with the .preserve argument
-  filts <- quos(...)
-  if (length(filts) == 0) {
-    # Nothing to do
-    return(.data)
-  }
-
-  .data <- arrow_dplyr_query(.data)
-  # tidy-eval the filter expressions inside an Arrow data_mask
-  filters <- lapply(filts, arrow_eval, arrow_mask(.data))
-  bad_filters <- map_lgl(filters, ~inherits(., "try-error"))
-  if (any(bad_filters)) {
-    bads <- oxford_paste(map_chr(filts, as_label)[bad_filters], quote = FALSE)
-    if (query_on_dataset(.data)) {
-      # Abort. We don't want to auto-collect if this is a Dataset because that
-      # could blow up, too big.
-      stop(
-        "Filter expression not supported for Arrow Datasets: ", bads,
-        "\nCall collect() first to pull data into R.",
-        call. = FALSE
-      )
-    } else {
-      # TODO: only show this in some debug mode?
-      warning(
-        "Filter expression not implemented in Arrow: ", bads, "; pulling data into R",
-        immediate. = TRUE,
-        call. = FALSE
-      )
-      # Set any valid filters first, then collect and then apply the invalid ones in R
-      .data <- set_filters(.data, filters[!bad_filters])
-      return(dplyr::filter(dplyr::collect(.data), !!!filts[bad_filters]))
-    }
-  }
-
-  set_filters(.data, filters)
-}
-filter.Dataset <- filter.ArrowTabular <- filter.arrow_dplyr_query
-
-arrow_eval <- function (expr, mask) {
-  # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
-  # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
-  # .data is a Dataset).
-
-  # This yields an Expression as long as the `exprs` are implemented in Arrow.
-  # Otherwise, it returns a try-error
-  tryCatch(eval_tidy(expr, mask), error = function(e) {
-    # Look for the cases where bad input was given, i.e. this would fail
-    # in regular dplyr anyway, and let those raise those as errors;
-    # else, for things not supported by Arrow return a "try-error",
-    # which we'll handle differently
-    msg <- conditionMessage(e)
-    patterns <- dplyr_functions$i18ized_error_pattern
-    if (is.null(patterns)) {
-      patterns <- i18ize_error_messages()
-      # Memoize it
-      dplyr_functions$i18ized_error_pattern <- patterns
-    }
-    if (grepl(patterns, msg)) {
-      stop(e)
-    }
-    invisible(structure(msg, class = "try-error", condition = e))
-  })
-}
-
-i18ize_error_messages <- function() {
-  # Figure out what the error messages will be with this LANGUAGE
-  # so that we can look for them
-  out <- list(
-    obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
-    fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
-  )
-  paste(map(out, ~sub("X_____X", ".*", .)), collapse = "|")
-}
-
-# Helper to assemble the functions that go in the NSE data mask
-# The only difference between the Dataset and the Table/RecordBatch versions
-# is that they use a different wrapping function (FUN) to hold the unevaluated
-# expression.
-build_function_list <- function(FUN) {
-  wrapper <- function(operator) {
-    force(operator)
-    function(...) FUN(operator, ...)
-  }
-  all_arrow_funs <- list_compute_functions()
-
-  c(
-    # Include mappings from R function name spellings
-    lapply(set_names(names(.array_function_map)), wrapper),
-    # Plus some special handling where it's not 1:1
-    cast = function(x, target_type, safe = TRUE, ...) {
-      opts <- cast_options(safe, ...)
-      opts$to_type <- as_type(target_type)
-      FUN("cast", x, options = opts)
-    },
-    dictionary_encode = function(x, null_encoding_behavior = c("mask", "encode")) {
-      null_encoding_behavior <-
-        NullEncodingBehavior[[toupper(match.arg(null_encoding_behavior))]]
-      FUN(
-        "dictionary_encode",
-        x,
-        options = list(null_encoding_behavior = null_encoding_behavior)
-      )
-    },
-    # as.factor() is mapped in expression.R
-    as.character = function(x) {
-      FUN("cast", x, options = cast_options(to_type = string()))
-    },
-    as.double = function(x) {
-      FUN("cast", x, options = cast_options(to_type = float64()))
-    },
-    as.integer = function(x) {
-      FUN(
-        "cast",
-        x,
-        options = cast_options(
-          to_type = int32(),
-          allow_float_truncate = TRUE,
-          allow_decimal_truncate = TRUE
-        )
-      )
-    },
-    as.integer64 = function(x) {
-      FUN(
-        "cast",
-        x,
-        options = cast_options(
-          to_type = int64(),
-          allow_float_truncate = TRUE,
-          allow_decimal_truncate = TRUE
-        )
-      )
-    },
-    as.logical = function(x) {
-      FUN("cast", x, options = cast_options(to_type = boolean()))
-    },
-    as.numeric = function(x) {
-      FUN("cast", x, options = cast_options(to_type = float64()))
-    },
-    nchar = function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
-      if (allowNA) {
-        stop("allowNA = TRUE not supported for Arrow", call. = FALSE)
-      }
-      if (is.na(keepNA)) {
-        keepNA <- !identical(type, "width")
-      }
-      if (!keepNA) {
-        # TODO: I think there is a fill_null kernel we could use, set null to 2
-        stop("keepNA = TRUE not supported for Arrow", call. = FALSE)
-      }
-      if (identical(type, "bytes")) {
-        FUN("binary_length", x)
-      } else {
-        FUN("utf8_length", x)
-      }
-    },
-    str_trim = function(string, side = c("both", "left", "right")) {
-      side <- match.arg(side)
-      switch(
-        side,
-        left = FUN("utf8_ltrim_whitespace", string),
-        right = FUN("utf8_rtrim_whitespace", string),
-        both = FUN("utf8_trim_whitespace", string)
-      )
-    },
-    grepl = arrow_r_string_match_function(FUN),
-    str_detect = arrow_stringr_string_match_function(FUN),
-    sub = arrow_r_string_replace_function(FUN, 1L),
-    gsub = arrow_r_string_replace_function(FUN, -1L),
-    str_replace = arrow_stringr_string_replace_function(FUN, 1L),
-    str_replace_all = arrow_stringr_string_replace_function(FUN, -1L),
-    between = function(x, left, right) {
-      x >= left & x <= right
-    },
-    # Now also include all available Arrow Compute functions,
-    # namespaced as arrow_fun
-    set_names(
-      lapply(all_arrow_funs, wrapper),
-      paste0("arrow_", all_arrow_funs)
-    )
-  )
-}
-
-arrow_r_string_match_function <- function(FUN) {
-  function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
-    FUN(
-      ifelse(fixed && !ignore.case, "match_substring", "match_substring_regex"),
-      x,
-      options = list(pattern = format_string_pattern(pattern, ignore.case, fixed))
-    )
-  }
-}
-
-arrow_stringr_string_match_function <- function(FUN) {
-  function(string, pattern, negate = FALSE) {
-    opts <- get_stringr_pattern_options(enexpr(pattern))
-    out <- arrow_r_string_match_function(FUN)(
-      pattern = opts$pattern,
-      x = string,
-      ignore.case = opts$ignore_case,
-      fixed = opts$fixed
-    )
-    if (negate) out <- FUN("invert", out)
-    out
-  }
-}
-
-arrow_r_string_replace_function <- function(FUN, max_replacements) {
-  function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
-    FUN(
-      ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"),
-      x,
-      options = list(
-        pattern = format_string_pattern(pattern, ignore.case, fixed),
-        replacement =  format_string_replacement(replacement, ignore.case, fixed),
-        max_replacements = max_replacements
-      )
-    )
-  }
-}
-
-arrow_stringr_string_replace_function <- function(FUN, max_replacements) {
-  function(string, pattern, replacement) {
-    opts <- get_stringr_pattern_options(enexpr(pattern))
-    arrow_r_string_replace_function(FUN, max_replacements)(
-      pattern = opts$pattern,
-      replacement = replacement,
-      x = string,
-      ignore.case = opts$ignore_case,
-      fixed = opts$fixed
-    )
-  }
-}
-
-# format `pattern` as needed for case insensitivity and literal matching by RE2
-format_string_pattern <- function(pattern, ignore.case, fixed) {
-  # Arrow lacks native support for case-insensitive literal string matching and
-  # replacement, so we use the regular expression engine (RE2) to do this.
-  # https://github.com/google/re2/wiki/Syntax
-  if (ignore.case) {
-    if (fixed) {
-      # Everything between "\Q" and "\E" is treated as literal text.
-      # If the search text contains any literal "\E" strings, make them
-      # lowercase so they won't signal the end of the literal text:
-      pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE)
-      pattern <- paste0("\\Q", pattern, "\\E")
-    }
-    # Prepend "(?i)" for case-insensitive matching
-    pattern <- paste0("(?i)", pattern)
-  }
-  pattern
-}
-
-# format `replacement` as needed for literal replacement by RE2
-format_string_replacement <- function(replacement, ignore.case, fixed) {
-  # Arrow lacks native support for case-insensitive literal string
-  # replacement, so we use the regular expression engine (RE2) to do this.
-  # https://github.com/google/re2/wiki/Syntax
-  if (ignore.case && fixed) {
-    # Escape single backslashes in the regex replacement text so they are
-    # interpreted as literal backslashes:
-    replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE)
-  }
-  replacement
-}
-
-# this function assigns definitions for the stringr pattern modifier functions
-# (fixed, regex, etc.) in itself, and uses them to evaluate the quoted
-# expression `pattern`
-get_stringr_pattern_options <- function(pattern) {
-  fixed <- function(pattern, ignore_case = FALSE, ...) {
-    check_dots(...)
-    list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case)
-  }
-  regex <- function(pattern, ignore_case = FALSE, ...) {
-    check_dots(...)
-    list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case)
-  }
-  coll <- boundary <- function(...) {
-    stop(
-      "Pattern modifier `",
-      match.call()[[1]],
-      "()` is not supported in Arrow",
-      call. = FALSE
-    )
-  }
-  check_dots <- function(...) {
-    dots <- list(...)
-    if (length(dots)) {
-      warning(
-        "Ignoring pattern modifier ",
-        ngettext(length(dots), "argument ", "arguments "),
-        "not supported in Arrow: ",
-        oxford_paste(names(dots)),
-        call. = FALSE
-      )
-    }
-  }
-  ensure_opts <- function(opts) {
-    if (is.character(opts)) {
-      opts <- list(pattern = opts, fixed = TRUE, ignore_case = FALSE)
-    }
-    opts
-  }
-  ensure_opts(eval(pattern))
-}
-
-# We'll populate these at package load time.
-dplyr_functions <- NULL
-init_env <- function () {
-  dplyr_functions <<- new.env(hash = TRUE)
-}
-init_env()
-
-# Create a data mask for evaluating a dplyr expression
-arrow_mask <- function(.data) {
-  if (query_on_dataset(.data)) {
-    f_env <- new_environment(dplyr_functions$dataset)
-  } else {
-    f_env <- new_environment(dplyr_functions$array)
-  }
-
-  # Add functions that need to error hard and clear.
-  # Some R functions will still try to evaluate on an Expression
-  # and return NA with a warning
-  fail <- function(...) stop("Not implemented")
-  for (f in c("mean")) {
-    f_env[[f]] <- fail
-  }
-
-  # Add the column references and make the mask
-  out <- new_data_mask(
-    new_environment(.data$selected_columns, parent = f_env),
-    f_env
-  )
-  # Then insert the data pronoun
-  # TODO: figure out what rlang::as_data_pronoun does/why we should use it
-  # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
-  out$.data <- .data$selected_columns
-  out
-}
-
-set_filters <- function(.data, expressions) {
-  if (length(expressions)) {
-    # expressions is a list of Expressions. AND them together and set them on .data
-    new_filter <- Reduce("&", expressions)
-    if (isTRUE(.data$filtered_rows)) {
-      # TRUE is default (i.e. no filter yet), so we don't need to & with it
-      .data$filtered_rows <- new_filter
-    } else {
-      .data$filtered_rows <- .data$filtered_rows & new_filter
-    }
-  }
-  .data
-}
-
-collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
-  x <- ensure_group_vars(x)
-  x <- ensure_arrange_vars(x) # this sets x$temp_columns
-  # Pull only the selected rows and cols into R
-  if (query_on_dataset(x)) {
-    # See dataset.R for Dataset and Scanner(Builder) classes
-    tab <- Scanner$create(x)$ToTable()
-  } else {
-    # This is a Table or RecordBatch
-
-    # Filter and select the data referenced in selected columns
-    if (isTRUE(x$filtered_rows)) {
-      filter <- TRUE
-    } else {
-      filter <- eval_array_expression(x$filtered_rows, x$.data)
-    }
-    # TODO: shortcut if identical(names(x$.data), find_array_refs(c(x$selected_columns, x$temp_columns)))?
-    tab <- x$.data[
-      filter,
-      find_array_refs(c(x$selected_columns, x$temp_columns)),
-      keep_na = FALSE
-    ]
-    # Now evaluate those expressions on the filtered table
-    cols <- lapply(c(x$selected_columns, x$temp_columns), eval_array_expression, data = tab)
-    if (length(cols) == 0) {
-      tab <- tab[, integer(0)]
-    } else {
-      if (inherits(x$.data, "Table")) {
-        tab <- Table$create(!!!cols)
-      } else {
-        tab <- RecordBatch$create(!!!cols)
-      }
-    }
-  }
-  # Arrange rows
-  if (length(x$arrange_vars) > 0) {
-    tab <- tab[
-      tab$SortIndices(names(x$arrange_vars), x$arrange_desc),
-      names(x$selected_columns), # this omits x$temp_columns from the result
-      drop = FALSE
-    ]
-  }
-  if (as_data_frame) {
-    df <- as.data.frame(tab)
-    tab$invalidate()
-    restore_dplyr_features(df, x)
-  } else {
-    restore_dplyr_features(tab, x)
-  }
-}
-collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
-  if (as_data_frame) {
-    as.data.frame(x, ...)
-  } else {
-    x
-  }
-}
-collect.Dataset <- function(x, ...) dplyr::collect(arrow_dplyr_query(x), ...)
-
-compute.arrow_dplyr_query <- function(x, ...) dplyr::collect(x, as_data_frame = FALSE)
-compute.ArrowTabular <- function(x, ...) x
-compute.Dataset <- compute.arrow_dplyr_query
+`[.arrow_dplyr_query` <- `[.Dataset`
+# TODO: ^ should also probably restore_dplyr_features, and/or that should be moved down
 
 ensure_group_vars <- function(x) {
   if (inherits(x, "arrow_dplyr_query")) {
@@ -733,7 +170,7 @@ ensure_group_vars <- function(x) {
       # Add them back
       x$selected_columns <- c(
         x$selected_columns,
-        make_field_refs(gv, dataset = query_on_dataset(.data))
+        make_field_refs(gv)
       )
     }
   }
@@ -758,13 +195,7 @@ restore_dplyr_features <- function(df, query) {
   # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
   # After calling collect(), make sure these features are carried over
 
-  grouped <- length(query$group_by_vars) > 0
-  renamed <- ncol(df) && !identical(names(df), names(query))
-  if (renamed) {
-    # In case variables were renamed, apply those names
-    names(df) <- names(query)
-  }
-  if (grouped) {
+  if (length(query$group_by_vars) > 0) {
     # Preserve groupings, if present
     if (is.data.frame(df)) {
       df <- dplyr::grouped_df(
@@ -782,320 +213,20 @@ restore_dplyr_features <- function(df, query) {
   df
 }
 
-pull.arrow_dplyr_query <- function(.data, var = -1) {
-  .data <- arrow_dplyr_query(.data)
-  var <- vars_pull(names(.data), !!enquo(var))
-  .data$selected_columns <- set_names(.data$selected_columns[var], var)
-  dplyr::collect(.data)[[1]]
-}
-pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
-
-summarise.arrow_dplyr_query <- function(.data, ...) {
-  call <- match.call()
-  .data <- arrow_dplyr_query(.data)
-  if (query_on_dataset(.data)) {
-    not_implemented_for_dataset("summarize()")
-  }
-  exprs <- quos(...)
-  # Only retain the columns we need to do our aggregations
-  vars_to_keep <- unique(c(
-    unlist(lapply(exprs, all.vars)), # vars referenced in summarise
-    dplyr::group_vars(.data)             # vars needed for grouping
-  ))
-  .data <- dplyr::select(.data, vars_to_keep)
-  if (isTRUE(getOption("arrow.summarize", FALSE))) {
-    # Try stuff, if successful return()
-    out <- try(do_arrow_group_by(.data, ...), silent = TRUE)
-    if (inherits(out, "try-error")) {
-      return(abandon_ship(call, .data, format(out)))
-    } else {
-      return(out)
-    }
-  } else {
-    # If unsuccessful or if option not set, do the work in R
-    dplyr::summarise(dplyr::collect(.data), ...)
-  }
-}
-summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
-
-do_arrow_group_by <- function(.data, ...) {
-  exprs <- quos(...)
-  mask <- arrow_mask(.data)
-  # Add aggregation wrappers to arrow_mask somehow
-  # (this is not ideal, would overwrite same-named objects)
-  mask$sum <- function(x, na.rm = FALSE) {
-    list(
-      fun = "sum",
-      data = x,
-      options = list(na.rm = na.rm)
-    )
-  }
-  results <- list()
-  for (i in seq_along(exprs)) {
-    # Iterate over the indices and not the names because names may be repeated
-    # (which overwrites the previous name)
-    new_var <- names(exprs)[i]
-    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
-    if (inherits(results[[new_var]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
-      stop(msg, call. = FALSE)
-    }
-    # Put it in the data mask too?
-    #mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
-  }
-  # Now, from that, split out the array (expressions) and options
-  opts <- lapply(results, function(x) x[c("fun", "options")])
-  inputs <- lapply(results, function(x) eval_array_expression(x$data, .data$.data))
-  grouping_vars <- lapply(.data$group_by_vars, function(x) eval_array_expression(.data$selected_columns[[x]], .data$.data))
-  compute__GroupBy(inputs, grouping_vars, opts)
-}
-
-group_by.arrow_dplyr_query <- function(.data,
-                                       ...,
-                                       .add = FALSE,
-                                       add = .add,
-                                       .drop = dplyr::group_by_drop_default(.data)) {
-  .data <- arrow_dplyr_query(.data)
-  # ... can contain expressions (i.e. can add (or rename?) columns)
-  # Check for those (they show up as named expressions)
-  new_groups <- enquos(...)
-  new_groups <- new_groups[nzchar(names(new_groups))]
-  if (length(new_groups)) {
-    # Add them to the data
-    .data <- dplyr::mutate(.data, !!!new_groups)
-  }
-  if (".add" %in% names(formals(dplyr::group_by))) {
-    # dplyr >= 1.0
-    gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
-  } else {
-    gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
-  }
-  .data$group_by_vars <- gv
-  .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
-  .data
-}
-group_by.Dataset <- group_by.ArrowTabular <- group_by.arrow_dplyr_query
-
-groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
-groups.Dataset <- groups.ArrowTabular <- function(x) NULL
-
-group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
-group_vars.Dataset <- group_vars.ArrowTabular <- function(x) NULL
-
-# the logical literal in the two functions below controls the default value of
-# the .drop argument to group_by()
-group_by_drop_default.arrow_dplyr_query <-
-  function(.tbl) .tbl$drop_empty_groups %||% TRUE
-group_by_drop_default.Dataset <- group_by_drop_default.ArrowTabular <-
-  function(.tbl) TRUE
-
-ungroup.arrow_dplyr_query <- function(x, ...) {
-  x$group_by_vars <- character()
-  x$drop_empty_groups <- NULL
-  x
-}
-ungroup.Dataset <- ungroup.ArrowTabular <- force
-
-mutate.arrow_dplyr_query <- function(.data,
-                                     ...,
-                                     .keep = c("all", "used", "unused", "none"),
-                                     .before = NULL,
-                                     .after = NULL) {
-  call <- match.call()
-  exprs <- quos(...)
-
-  .keep <- match.arg(.keep)
-  .before <- enquo(.before)
-  .after <- enquo(.after)
-
-  if (.keep %in% c("all", "unused") && length(exprs) == 0) {
-    # Nothing to do
-    return(.data)
-  }
-
-  .data <- arrow_dplyr_query(.data)
-
-  # Restrict the cases we support for now
-  if (length(dplyr::group_vars(.data)) > 0) {
-    # mutate() on a grouped dataset does calculations within groups
-    # This doesn't matter on scalar ops (arithmetic etc.) but it does
-    # for things with aggregations (e.g. subtracting the mean)
-    return(abandon_ship(call, .data, 'mutate() on grouped data not supported in Arrow'))
-  }
-
-  # Check for unnamed expressions and fix if any
-  unnamed <- !nzchar(names(exprs))
-  # Deparse and take the first element in case they're long expressions
-  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
-
-  is_dataset <- query_on_dataset(.data)
-  mask <- arrow_mask(.data)
-  results <- list()
-  for (i in seq_along(exprs)) {
-    # Iterate over the indices and not the names because names may be repeated
-    # (which overwrites the previous name)
-    new_var <- names(exprs)[i]
-    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
-    if (inherits(results[[new_var]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
-      return(abandon_ship(call, .data, msg))
-    } else if (is_dataset &&
-               !inherits(results[[new_var]], "Expression") &&
-               !is.null(results[[new_var]])) {
-      # We need some wrapping to handle literal values
-      if (length(results[[new_var]]) != 1) {
-        msg <- paste0('In ', new_var, " = ", as_label(exprs[[i]]), ", only values of size one are recycled")
-        return(abandon_ship(call, .data, msg))
-      }
-      results[[new_var]] <- Expression$scalar(results[[new_var]])
-    }
-    # Put it in the data mask too
-    mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
-  }
-
-  old_vars <- names(.data$selected_columns)
-  # Note that this is names(exprs) not names(results):
-  # if results$new_var is NULL, that means we are supposed to remove it
-  new_vars <- names(exprs)
-
-  # Assign the new columns into the .data$selected_columns
-  for (new_var in new_vars) {
-    .data$selected_columns[[new_var]] <- results[[new_var]]
-  }
-
-  # Deduplicate new_vars and remove NULL columns from new_vars
-  new_vars <- intersect(new_vars, names(.data$selected_columns))
-
-  # Respect .before and .after
-  if (!quo_is_null(.before) || !quo_is_null(.after)) {
-    new <- setdiff(new_vars, old_vars)
-    .data <- dplyr::relocate(.data, !!new, .before = !!.before, .after = !!.after)
-  }
-
-  # Respect .keep
-  if (.keep == "none") {
-    .data$selected_columns <- .data$selected_columns[new_vars]
-  } else if (.keep != "all") {
-    # "used" or "unused"
-    used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE)
-    if (.keep == "used") {
-      .data$selected_columns[setdiff(old_vars, used_vars)] <- NULL
-    } else {
-      # "unused"
-      .data$selected_columns[intersect(old_vars, used_vars)] <- NULL
-    }
-  }
-  # Even if "none", we still keep group vars
-  ensure_group_vars(.data)
-}
-mutate.Dataset <- mutate.ArrowTabular <- mutate.arrow_dplyr_query
-
-transmute.arrow_dplyr_query <- function(.data, ...) dplyr::mutate(.data, ..., .keep = "none")
-transmute.Dataset <- transmute.ArrowTabular <- transmute.arrow_dplyr_query
-
 # Helper to handle unsupported dplyr features
 # * For Table/RecordBatch, we collect() and then call the dplyr method in R
 # * For Dataset, we just error
-abandon_ship <- function(call, .data, msg = NULL) {
+abandon_ship <- function(call, .data, msg) {
   dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]]))
   if (query_on_dataset(.data)) {
-    if (is.null(msg)) {
-      # Default message: function not implemented
-      not_implemented_for_dataset(paste0(dplyr_fun_name, "()"))
-    } else {
-      stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
-    }
+    stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
   }
-
   # else, collect and call dplyr method
-  if (!is.null(msg)) {
-    warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
-  }
+  msg <- sub("\\n$", "", msg)
+  warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
   call$.data <- dplyr::collect(.data)
   call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr"))
   eval.parent(call, 2)
 }
 
-arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
-  call <- match.call()
-  exprs <- quos(...)
-  if (.by_group) {
-    # when the data is is grouped and .by_group is TRUE, order the result by
-    # the grouping columns first
-    exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
-  }
-  if (length(exprs) == 0) {
-    # Nothing to do
-    return(.data)
-  }
-  .data <- arrow_dplyr_query(.data)
-  # find and remove any dplyr::desc() and tidy-eval
-  # the arrange expressions inside an Arrow data_mask
-  sorts <- vector("list", length(exprs))
-  descs <- logical(0)
-  mask <- arrow_mask(.data)
-  for (i in seq_along(exprs)) {
-    x <- find_and_remove_desc(exprs[[i]])
-    exprs[[i]] <- x[["quos"]]
-    sorts[[i]] <- arrow_eval(exprs[[i]], mask)
-    if (inherits(sorts[[i]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
-      return(abandon_ship(call, .data, msg))
-    }
-    names(sorts)[i] <- as_label(exprs[[i]])
-    descs[i] <- x[["desc"]]
-  }
-  .data$arrange_vars <- c(sorts, .data$arrange_vars)
-  .data$arrange_desc <- c(descs, .data$arrange_desc)
-  .data
-}
-arrange.Dataset <- arrange.ArrowTabular <- arrange.arrow_dplyr_query
-
-# Helper to handle desc() in arrange()
-# * Takes a quosure as input
-# * Returns a list with two elements:
-#   1. The quosure with any wrapping parentheses and desc() removed
-#   2. A logical value indicating whether desc() was found
-# * Performs some other validation
-find_and_remove_desc <- function(quosure) {
-  expr <- quo_get_expr(quosure)
-  descending <- FALSE
-  if (length(all.vars(expr)) < 1L) {
-    stop(
-      "Expression in arrange() does not contain any field names: ",
-      deparse(expr),
-      call. = FALSE
-    )
-  }
-  # Use a while loop to remove any number of nested pairs of enclosing
-  # parentheses and any number of nested desc() calls. In the case of multiple
-  # nested desc() calls, each one toggles the sort order.
-  while (identical(typeof(expr), "language") && is.call(expr)) {
-    if (identical(expr[[1]], quote(`(`))) {
-      # remove enclosing parentheses
-      expr <- expr[[2]]
-    } else if (identical(expr[[1]], quote(desc))) {
-      # remove desc() and toggle descending
-      expr <- expr[[2]]
-      descending <- !descending
-    } else {
-      break
-    }
-  }
-  return(
-    list(
-      quos = quo_set_expr(quosure, expr),
-      desc = descending
-    )
-  )
-}
-
-query_on_dataset <- function(x) inherits(x$.data, "Dataset")
-
-not_implemented_for_dataset <- function(method) {
-  stop(
-    method, " is not currently implemented for Arrow Datasets. ",
-    "Call collect() first to pull data into R.",
-    call. = FALSE
-  )
-}
+query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
new file mode 100644
index 00000000000..edef5cdc143
--- /dev/null
+++ b/r/R/duckdb.R
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Create a (virtual) DuckDB table from an Arrow object
+#'
+#' This will do the necessary configuration to create a (virtual) table in DuckDB
+#' that is backed by the Arrow object given. No data is copied or modified until
+#' `collect()` or `compute()` are called or a query is run against the table.
+#'
+#' The result is a dbplyr-compatible object that can be used in d(b)plyr pipelines.
+#'
+#' Alternatively, one can pass the argument `.engine = "duckdb"` to `summarise()`
+#' that starts with an Arrow object to use DuckDB to calculate the summarization
+#' step. Internally, this calls `to_duckdb()` with all of the default argument
+#' values.
+#'
+#' @param .data the Arrow object (e.g. Dataset, Table) to use for the DuckDB table
+#' @param con a DuckDB connection to use (default will create one and store it
+#' in `options("arrow_duck_con")`)
+#' @param table_name a name to use in DuckDB for this object. The default is a
+#' unique string `"arrow_"` followed by numbers.
+#' @param auto_disconnect should the table be automatically cleaned up when the
+#' resulting object is removed (and garbage collected)? Default: `TRUE`
+#'
+#' @return A `tbl` of the new table in DuckDB
+#'
+#' @name to_duckdb
+#' @export
+#' @examplesIf getFromNamespace("run_duckdb_examples", "arrow")()
+#' library(dplyr)
+#'
+#' ds <- InMemoryDataset$create(mtcars)
+#'
+#' ds %>%
+#'   filter(mpg < 30) %>%
+#'   to_duckdb() %>%
+#'   group_by(cyl) %>%
+#'   summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+#'
+#' # the same query can be simplified using .engine = "duckdb"
+#' ds %>%
+#'   filter(mpg < 30) %>%
+#'   group_by(cyl) %>%
+#'   summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
+to_duckdb <- function(.data,
+                      con = arrow_duck_connection(),
+                      table_name = unique_arrow_tablename(),
+                      auto_disconnect = TRUE) {
+  .data <- arrow_dplyr_query(.data)
+  duckdb::duckdb_register_arrow(con, table_name, .data)
+
+  tbl <- tbl(con, table_name)
+  groups <- dplyr::groups(.data)
+  if (length(groups)) {
+    tbl <- dplyr::group_by(tbl, groups)
+  }
+
+  if (auto_disconnect) {
+    # this will add the correct connection disconnection when the tbl is gced.
+    # we should probably confirm that this use of src$disco is kosher.
+    tbl$src$disco <- duckdb_disconnector(con, table_name)
+  }
+
+  tbl
+}
+
+arrow_duck_connection <- function() {
+  con <- getOption("arrow_duck_con")
+  if (is.null(con) || !DBI::dbIsValid(con)) {
+    con <- DBI::dbConnect(duckdb::duckdb())
+    # Use the same CPU count that the arrow library is set to
+    DBI::dbExecute(con, paste0("PRAGMA threads=", cpu_count()))
+    options(arrow_duck_con = con)
+  }
+  con
+}
+
+# helper function to determine if duckdb examples should run
+# see: https://github.com/r-lib/roxygen2/issues/1242
+run_duckdb_examples <- function() {
+  arrow_with_dataset() &&
+    requireNamespace("duckdb", quietly = TRUE) &&
+    packageVersion("duckdb") > "0.2.7" &&
+    requireNamespace("dplyr", quietly = TRUE) &&
+    requireNamespace("dbplyr", quietly = TRUE) &&
+    # These examples are flaking: https://github.com/duckdb/duckdb/issues/2100
+    FALSE
+}
+
+# Adapted from dbplyr
+unique_arrow_tablename <- function() {
+  i <- getOption("arrow_table_name", 0) + 1
+  options(arrow_table_name = i)
+  sprintf("arrow_%03i", i)
+}
+
+# Creates an environment that disconnects the database when it's GC'd
+duckdb_disconnector <- function(con, tbl_name) {
+  reg.finalizer(environment(), function(...) {
+    # remote the table we ephemerally created (though only if the connection is
+    # still valid)
+    if (DBI::dbIsValid(con)) {
+      duckdb::duckdb_unregister_arrow(con, tbl_name)
+    }
+
+    # and there are no more tables, so we can safely shutdown
+    if (length(DBI::dbListTables(con)) == 0) {
+      DBI::dbDisconnect(con, shutdown = TRUE)
+    }
+  })
+  environment()
+}
diff --git a/r/R/enums.R b/r/R/enums.R
index ae44ccf2cad..019ebc7a337 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -16,11 +16,11 @@
 # under the License.
 
 #' @export
-`print.arrow-enum` <- function(x, ...){
+`print.arrow-enum` <- function(x, ...) {
   NextMethod()
 }
 
-enum <- function(class, ..., .list = list(...)){
+enum <- function(class, ..., .list = list(...)) {
   structure(
     .list,
     class = c(class, "arrow-enum")
@@ -81,6 +81,8 @@ Type <- enum("Type::type",
   LARGE_LIST = 36L
 )
 
+TYPES_WITH_NAN <- Type[c("HALF_FLOAT", "FLOAT", "DOUBLE")]
+
 #' @rdname enums
 #' @export
 StatusCode <- enum("StatusCode",
@@ -140,3 +142,9 @@ QuantileInterpolation <- enum("QuantileInterpolation",
 NullEncodingBehavior <- enum("NullEncodingBehavior",
   ENCODE = 0L, MASK = 1L
 )
+
+#' @export
+#' @rdname enums
+NullHandlingBehavior <- enum("NullHandlingBehavior",
+  EMIT_NULL = 0L, SKIP = 1L, REPLACE = 2L
+)
diff --git a/r/R/expression.R b/r/R/expression.R
index 1974fc7f59b..0526eb73bc9 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -17,121 +17,70 @@
 
 #' @include arrowExports.R
 
-array_expression <- function(FUN,
-                             ...,
-                             args = list(...),
-                             options = empty_named_list()) {
-  structure(
-    list(
-      fun = FUN,
-      args = args,
-      options = options
-    ),
-    class = "array_expression"
-  )
-}
-
-#' @export
-Ops.ArrowDatum <- function(e1, e2) {
-  if (.Generic == "!") {
-    eval_array_expression(build_array_expression(.Generic, e1))
-  } else if (.Generic %in% names(.array_function_map)) {
-    eval_array_expression(build_array_expression(.Generic, e1, e2))
-  } else {
-    stop(paste0("Unsupported operation on `", class(e1)[1L], "` : "), .Generic, call. = FALSE)
-  }
-}
-
-#' @export
-Ops.array_expression <- function(e1, e2) {
-  if (.Generic == "!") {
-    build_array_expression(.Generic, e1)
-  } else {
-    build_array_expression(.Generic, e1, e2)
-  }
-}
-
-build_array_expression <- function(FUN,
-                                   ...,
-                                   args = list(...),
-                                   options = empty_named_list()) {
-  if (FUN == "-" && length(args) == 1L) {
-    # Unary -, i.e. just make it negative, and somehow this works
-    if (inherits(args[[1]], c("ArrowObject", "array_expression"))) {
-      # Make it be 0 - arg
-      # TODO(ARROW-11950): do this in C++ compute
-      args <- list(0L, args[[1]])
-    } else {
-      # Somehow this works
-      return(-args[[1]])
-    }
-  }
-  args <- lapply(args, .wrap_arrow, FUN)
-
-  # In Arrow, "divide" is one function, which does integer division on
-  # integer inputs and floating-point division on floats
-  if (FUN == "/") {
-    # TODO: omg so many ways it's wrong to assume these types
-    args <- lapply(args, cast_array_expression, float64())
-  } else if (FUN == "%/%") {
-    # In R, integer division works like floor(float division)
-    out <- build_array_expression("/", args = args, options = options)
-    return(cast_array_expression(out, int32(), allow_float_truncate = TRUE))
-  } else if (FUN == "%%") {
-    # {e1 - e2 * ( e1 %/% e2 )}
-    # ^^^ form doesn't work because Ops.Array evaluates eagerly,
-    # but we can build that up
-    quotient <- build_array_expression("%/%", args = args)
-    base <- build_array_expression("*", quotient, args[[2]])
-    # this cast is to ensure that the result of this and e1 are the same
-    # (autocasting only applies to scalars)
-    base <- cast_array_expression(base, args[[1]]$type)
-    return(build_array_expression("-", args[[1]], base))
-  }
-
-  array_expression(.array_function_map[[FUN]] %||% FUN, args = args, options = options)
-}
-
-cast_array_expression <- function(x, to_type, safe = TRUE, ...) {
-  opts <- list(
-    to_type = to_type,
-    allow_int_overflow = !safe,
-    allow_time_truncate = !safe,
-    allow_float_truncate = !safe
-  )
-  array_expression("cast", x, options = modifyList(opts, list(...)))
-}
-
-.wrap_arrow <- function(arg, fun) {
-  if (!inherits(arg, c("ArrowObject", "array_expression"))) {
-    # TODO: Array$create if lengths are equal?
-    # TODO: these kernels should autocast like the dataset ones do (e.g. int vs. float)
-    if (fun == "%in%") {
-      arg <- Array$create(arg)
-    } else {
-      arg <- Scalar$create(arg)
-    }
-  }
-  arg
-}
-
 .unary_function_map <- list(
+  # NOTE: Each of the R functions mapped here takes exactly *one* argument, maps
+  # *directly* to an Arrow C++ compute kernel, and does not require any
+  # non-default options to be specified. More complex R function mappings are
+  # defined in dplyr-functions.R.
+
+  # functions are arranged alphabetically by name within categories
+
+  # arithmetic functions
+  "abs" = "abs_checked",
+  "ceiling" = "ceil",
+  "floor" = "floor",
+  "log10" = "log10_checked",
+  "log1p" = "log1p_checked",
+  "log2" = "log2_checked",
+  "sign" = "sign",
+  # trunc is defined in dplyr-functions.R
+
+  # trigonometric functions
+  "acos" = "acos_checked",
+  "asin" = "asin_checked",
+  "cos" = "cos_checked",
+  "sin" = "sin_checked",
+  "tan" = "tan_checked",
+
+  # logical functions
   "!" = "invert",
-  "as.factor" = "dictionary_encode",
-  "is.na" = "is_null",
-  "is.nan" = "is_nan",
-  # nchar is defined in dplyr.R because it is more complex
-  # "nchar" = "utf8_length",
-  "tolower" = "utf8_lower",
-  "toupper" = "utf8_upper",
-  # stringr spellings of those
+
+  # string functions
+  # nchar is defined in dplyr-functions.R
   "str_length" = "utf8_length",
+  # str_pad is defined in dplyr-functions.R
+  # str_sub is defined in dplyr-functions.R
   "str_to_lower" = "utf8_lower",
-  "str_to_upper" = "utf8_upper"
-  # str_trim is defined in dplyr.R
+  "str_to_upper" = "utf8_upper",
+  # str_trim is defined in dplyr-functions.R
+  "stri_reverse" = "utf8_reverse",
+  # substr is defined in dplyr-functions.R
+  # substring is defined in dplyr-functions.R
+  "tolower" = "utf8_lower",
+  "toupper" = "utf8_upper",
+
+  # date and time functions
+  "day" = "day",
+  "hour" = "hour",
+  "isoweek" = "iso_week",
+  "isoyear" = "iso_year",
+  "minute" = "minute",
+  "month" = "month",
+  "quarter" = "quarter",
+  # second is defined in dplyr-functions.R
+  # wday is defined in dplyr-functions.R
+  "yday" = "day_of_year",
+  "year" = "year",
+
+  # type conversion functions
+  "as.factor" = "dictionary_encode"
 )
 
 .binary_function_map <- list(
+  # NOTE: Each of the R functions/operators mapped here takes exactly *two*
+  # arguments. Most map *directly* to an Arrow C++ compute kernel and require no
+  # non-default options, but some are modified by build_expr(). More complex R
+  # function/operator mappings are defined in dplyr-functions.R.
   "==" = "equal",
   "!=" = "not_equal",
   ">" = "greater",
@@ -146,94 +95,14 @@ cast_array_expression <- function(x, to_type, safe = TRUE, ...) {
   "/" = "divide_checked",
   "%/%" = "divide_checked",
   # we don't actually use divide_checked with `%%`, rather it is rewritten to
-  # use %/% above.
+  # use `%/%` above.
   "%%" = "divide_checked",
-  # TODO: "^"  (ARROW-11070)
+  "^" = "power_checked",
   "%in%" = "is_in_meta_binary"
 )
 
 .array_function_map <- c(.unary_function_map, .binary_function_map)
 
-eval_array_expression <- function(x, data = NULL) {
-  if (!is.null(data)) {
-    x <- bind_array_refs(x, data)
-  }
-  if (!inherits(x, "array_expression")) {
-    # Nothing to evaluate
-    return(x)
-  }
-  x$args <- lapply(x$args, function (a) {
-    if (inherits(a, "array_expression")) {
-      eval_array_expression(a)
-    } else {
-      a
-    }
-  })
-  if (x$fun == "is_in_meta_binary" && inherits(x$args[[2]], "Scalar")) {
-    x$args[[2]] <- Array$create(x$args[[2]])
-  }
-  call_function(x$fun, args = x$args, options = x$options %||% empty_named_list())
-}
-
-find_array_refs <- function(x) {
-  if (identical(x$fun, "array_ref")) {
-    out <- x$args$field_name
-  } else {
-    out <- lapply(x$args, find_array_refs)
-  }
-  unlist(out)
-}
-
-# Take an array_expression and replace array_refs with arrays/chunkedarrays from data
-bind_array_refs <- function(x, data) {
-  if (inherits(x, "array_expression")) {
-    if (identical(x$fun, "array_ref")) {
-      x <- data[[x$args$field_name]]
-    } else {
-      x$args <- lapply(x$args, bind_array_refs, data)
-    }
-  }
-  x
-}
-
-#' @export
-is.na.array_expression <- function(x) array_expression("is.na", x)
-
-#' @export
-as.vector.array_expression <- function(x, ...) {
-  as.vector(eval_array_expression(x))
-}
-
-#' @export
-print.array_expression <- function(x, ...) {
-  cat(.format_array_expression(x), "\n", sep = "")
-  invisible(x)
-}
-
-.format_array_expression <- function(x) {
-  printed_args <- map_chr(x$args, function(arg) {
-    if (inherits(arg, "Scalar")) {
-      deparse(as.vector(arg))
-    } else if (inherits(arg, "ArrowObject")) {
-      paste0("<", class(arg)[1], ">")
-    } else if (inherits(arg, "array_expression")) {
-      .format_array_expression(arg)
-    } else {
-      # Should not happen
-      deparse(arg)
-    }
-  })
-  if (identical(x$fun, "array_ref")) {
-    x$args$field_name
-  } else {
-    # Prune this for readability
-    function_name <- sub("_kleene", "", x$fun)
-    paste0(function_name, "(", paste(printed_args, collapse = ", "), ")")
-  }
-}
-
-###########
-
 #' Arrow expressions
 #'
 #' @description
@@ -251,9 +120,21 @@ print.array_expression <- function(x, ...) {
 #' @name Expression
 #' @rdname Expression
 #' @export
-Expression <- R6Class("Expression", inherit = ArrowObject,
+Expression <- R6Class("Expression",
+  inherit = ArrowObject,
   public = list(
-    ToString = function() dataset___expr__ToString(self),
+    ToString = function() compute___expr__ToString(self),
+    # TODO: Implement type determination without storing
+    # schemas in Expression objects (ARROW-13186)
+    schema = NULL,
+    type = function(schema = self$schema) {
+      assert_that(!is.null(schema))
+      compute___expr__type(self, schema)
+    },
+    type_id = function(schema = self$schema) {
+      assert_that(!is.null(schema))
+      compute___expr__type_id(self, schema)
+    },
     cast = function(to_type, safe = TRUE, ...) {
       opts <- list(
         to_type = to_type,
@@ -265,7 +146,7 @@ Expression <- R6Class("Expression", inherit = ArrowObject,
     }
   ),
   active = list(
-    field_name = function() dataset___expr__get_field_ref_name(self)
+    field_name = function() compute___expr__get_field_ref_name(self)
   )
 )
 Expression$create <- function(function_name,
@@ -273,27 +154,33 @@ Expression$create <- function(function_name,
                               args = list(...),
                               options = empty_named_list()) {
   assert_that(is.string(function_name))
-  dataset___expr__call(function_name, args, options)
+  assert_that(is_list_of(args, "Expression"), msg = "Expression arguments must be Expression objects")
+  expr <- compute___expr__call(function_name, args, options)
+  expr$schema <- unify_schemas(schemas = lapply(args, function(x) x$schema))
+  expr
 }
+
 Expression$field_ref <- function(name) {
   assert_that(is.string(name))
-  dataset___expr__field_ref(name)
+  compute___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
-  dataset___expr__scalar(Scalar$create(x))
+  expr <- compute___expr__scalar(Scalar$create(x))
+  expr$schema <- schema()
+  expr
 }
 
-build_dataset_expression <- function(FUN,
-                                     ...,
-                                     args = list(...),
-                                     options = empty_named_list()) {
+# Wrapper around Expression$create that:
+# (1) maps R function names to Arrow C++ compute ("/" --> "divide_checked")
+# (2) wraps R input args as Array or Scalar
+build_expr <- function(FUN,
+                       ...,
+                       args = list(...),
+                       options = empty_named_list()) {
   if (FUN == "-" && length(args) == 1L) {
-    # Unary -, i.e. make it negative
     if (inherits(args[[1]], c("ArrowObject", "Expression"))) {
-      # TODO(ARROW-11950): do this in C++ compute
-      args <- list(0L, args[[1]])
+      return(build_expr("negate_checked", args[[1]]))
     } else {
-      # Somehow this just works
       return(-args[[1]])
     }
   }
@@ -322,10 +209,10 @@ build_dataset_expression <- function(FUN,
       args <- lapply(args, function(x) x$cast(float64()))
     } else if (FUN == "%/%") {
       # In R, integer division works like floor(float division)
-      out <- build_dataset_expression("/", args = args)
+      out <- build_expr("/", args = args)
       return(out$cast(int32(), allow_float_truncate = TRUE))
     } else if (FUN == "%%") {
-      return(args[[1]] - args[[2]] * ( args[[1]] %/% args[[2]] ))
+      return(args[[1]] - args[[2]] * (args[[1]] %/% args[[2]]))
     }
 
     expr <- Expression$create(.array_function_map[[FUN]] %||% FUN, args = args, options = options)
@@ -336,11 +223,19 @@ build_dataset_expression <- function(FUN,
 #' @export
 Ops.Expression <- function(e1, e2) {
   if (.Generic == "!") {
-    build_dataset_expression(.Generic, e1)
+    build_expr(.Generic, e1)
   } else {
-    build_dataset_expression(.Generic, e1, e2)
+    build_expr(.Generic, e1, e2)
   }
 }
 
 #' @export
-is.na.Expression <- function(x) Expression$create("is_null", x)
+is.na.Expression <- function(x) {
+  if (!is.null(x$schema) && x$type_id() %in% TYPES_WITH_NAN) {
+    # TODO: if an option is added to the is_null kernel to treat NaN as NA,
+    # use that to simplify the code here (ARROW-13367)
+    Expression$create("is_nan", x) | build_expr("is_null", x)
+  } else {
+    Expression$create("is_null", x)
+  }
+}
diff --git a/r/R/feather.R b/r/R/feather.R
index a9781106b03..70a270bbe02 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -45,12 +45,10 @@
 #' @export
 #' @seealso [RecordBatchWriter] for lower-level access to writing Arrow IPC data.
 #' @seealso [Schema] for information about schemas and metadata handling.
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_feather(mtcars, tf)
-#' }
 #' @include arrow-package.R
 write_feather <- function(x,
                           sink,
@@ -96,7 +94,7 @@ write_feather <- function(x,
 
   # "lz4" is the convenience
   if (compression == "lz4") {
-     compression <- "lz4_frame"
+    compression <- "lz4_frame"
   }
 
   compression <- compression_from_name(compression)
@@ -105,7 +103,8 @@ write_feather <- function(x,
   if (is.data.frame(x) || inherits(x, "RecordBatch")) {
     x <- Table$create(x)
   }
-  assert_is(x, "Table")
+
+  assert_that(is_writable_table(x))
 
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
@@ -132,8 +131,7 @@ write_feather <- function(x,
 #'
 #' @export
 #' @seealso [FeatherReader] and [RecordBatchReader] for lower-level access to reading Arrow IPC data.
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_feather(mtcars, tf)
@@ -141,7 +139,6 @@ write_feather <- function(x,
 #' dim(df)
 #' # Can select columns
 #' df <- read_feather(tf, col_select = starts_with("d"))
-#' }
 read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
   if (!inherits(file, "RandomAccessFile")) {
     file <- make_readable_file(file, ...)
@@ -193,7 +190,8 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
 #'
 #' @export
 #' @include arrow-package.R
-FeatherReader <- R6Class("FeatherReader", inherit = ArrowObject,
+FeatherReader <- R6Class("FeatherReader",
+  inherit = ArrowObject,
   public = list(
     Read = function(columns) {
       ipc___feather___Reader__Read(self, columns)
diff --git a/r/R/field.R b/r/R/field.R
index 33549d344c5..e5c938bf5ee 100644
--- a/r/R/field.R
+++ b/r/R/field.R
@@ -31,16 +31,17 @@
 #' @rdname Field
 #' @name Field
 #' @export
-Field <- R6Class("Field", inherit = ArrowObject,
+Field <- R6Class("Field",
+  inherit = ArrowObject,
   public = list(
     ToString = function() {
       prettier_dictionary_type(Field__ToString(self))
     },
     Equals = function(other, ...) {
       inherits(other, "Field") && Field__Equals(self, other)
-    }
+    },
+    export_to_c = function(ptr) ExportField(self, ptr)
   ),
-
   active = list(
     name = function() {
       Field__name(self)
@@ -59,15 +60,15 @@ Field$create <- function(name, type, metadata) {
   assert_that(missing(metadata), msg = "metadata= is currently ignored")
   Field__initialize(enc2utf8(name), type, TRUE)
 }
+#' @include arrowExports.R
+Field$import_from_c <- ImportField
 
 #' @param name field name
 #' @param type logical type, instance of [DataType]
 #' @param metadata currently ignored
 #'
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' field("x", int32())
-#' }
 #' @rdname Field
 #' @export
 field <- Field$create
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index 3a624fd89e1..a09d0a51d7b 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -57,7 +57,6 @@ FileInfo <- R6Class("FileInfo",
         invisible(fs___FileInfo__set_path(self))
       }
     },
-
     size = function(size) {
       if (missing(size)) {
         fs___FileInfo__size(self)
@@ -65,7 +64,6 @@ FileInfo <- R6Class("FileInfo",
         invisible(fs___FileInfo__set_size(self, size))
       }
     },
-
     mtime = function(time) {
       if (missing(time)) {
         fs___FileInfo__mtime(self)
@@ -198,46 +196,39 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F
 #' @rdname FileSystem
 #' @name FileSystem
 #' @export
-FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
+FileSystem <- R6Class("FileSystem",
+  inherit = ArrowObject,
   public = list(
     GetFileInfo = function(x) {
       if (inherits(x, "FileSelector")) {
         fs___FileSystem__GetTargetInfos_FileSelector(self, x)
-      } else if (is.character(x)){
+      } else if (is.character(x)) {
         fs___FileSystem__GetTargetInfos_Paths(self, clean_path_rel(x))
       } else {
         abort("incompatible type for FileSystem$GetFileInfo()")
       }
     },
-
     CreateDir = function(path, recursive = TRUE) {
       fs___FileSystem__CreateDir(self, clean_path_rel(path), isTRUE(recursive))
     },
-
     DeleteDir = function(path) {
       fs___FileSystem__DeleteDir(self, clean_path_rel(path))
     },
-
     DeleteDirContents = function(path) {
       fs___FileSystem__DeleteDirContents(self, clean_path_rel(path))
     },
-
     DeleteFile = function(path) {
       fs___FileSystem__DeleteFile(self, clean_path_rel(path))
     },
-
     DeleteFiles = function(paths) {
       fs___FileSystem__DeleteFiles(self, clean_path_rel(paths))
     },
-
     Move = function(src, dest) {
       fs___FileSystem__Move(self, clean_path_rel(src), clean_path_rel(dest))
     },
-
     CopyFile = function(src, dest) {
       fs___FileSystem__CopyFile(self, clean_path_rel(src), clean_path_rel(dest))
     },
-
     OpenInputStream = function(path) {
       fs___FileSystem__OpenInputStream(self, clean_path_rel(path))
     },
@@ -257,7 +248,7 @@ FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
     ls = function(path = "", ...) {
       selector <- FileSelector$create(path, ...) # ... for recursive = TRUE
       infos <- self$GetFileInfo(selector)
-      map_chr(infos, ~.$path)
+      map_chr(infos, ~ .$path)
       # TODO: add full.names argument like base::dir() (default right now is TRUE)
       # TODO: see fs package for glob/regexp filtering
       # TODO: verbose method that shows other attributes as df
@@ -289,14 +280,14 @@ get_paths_and_filesystem <- function(x, filesystem = NULL) {
       # Stop? Can't have URL (which yields a fs) and another fs
     }
     x <- lapply(x, FileSystem$from_uri)
-    if (length(unique(map(x, ~class(.$fs)))) > 1) {
+    if (length(unique(map(x, ~ class(.$fs)))) > 1) {
       stop(
         "Vectors of URIs for different file systems are not supported",
         call. = FALSE
       )
     }
-    fs  <- x[[1]]$fs
-    path <- map_chr(x, ~.$path) # singular name "path" used for compatibility
+    fs <- x[[1]]$fs
+    path <- map_chr(x, ~ .$path) # singular name "path" used for compatibility
   } else {
     fs <- filesystem %||% LocalFileSystem$create()
     if (inherits(fs, "LocalFileSystem")) {
@@ -335,7 +326,8 @@ LocalFileSystem$create <- function() {
 #' @rdname FileSystem
 #' @importFrom utils modifyList
 #' @export
-S3FileSystem <- R6Class("S3FileSystem", inherit = FileSystem,
+S3FileSystem <- R6Class("S3FileSystem",
+  inherit = FileSystem,
   active = list(
     region = function() fs___S3FileSystem__region(self)
   )
@@ -343,7 +335,13 @@ S3FileSystem <- R6Class("S3FileSystem", inherit = FileSystem,
 S3FileSystem$create <- function(anonymous = FALSE, ...) {
   args <- list2(...)
   if (anonymous) {
-    invalid_args <- intersect(c("access_key", "secret_key", "session_token", "role_arn", "session_name", "external_id", "load_frequency"), names(args))
+    invalid_args <- intersect(
+      c(
+        "access_key", "secret_key", "session_token", "role_arn", "session_name",
+        "external_id", "load_frequency"
+      ),
+      names(args)
+    )
     if (length(invalid_args)) {
       stop("Cannot specify ", oxford_paste(invalid_args), " when anonymous = TRUE", call. = FALSE)
     }
@@ -398,10 +396,8 @@ default_s3_options <- list(
 #' @return A `SubTreeFileSystem` containing an `S3FileSystem` and the bucket's
 #' relative path. Note that this function's success does not guarantee that you
 #' are authorized to access the bucket's contents.
-#' @examples
-#' if (arrow_with_s3()) {
-#'   bucket <- s3_bucket("ursa-labs-taxi-data")
-#' }
+#' @examplesIf arrow_with_s3()
+#' bucket <- s3_bucket("ursa-labs-taxi-data")
 #' @export
 s3_bucket <- function(bucket, ...) {
   assert_that(is.string(bucket))
@@ -427,7 +423,8 @@ s3_bucket <- function(bucket, ...) {
 #' @format NULL
 #' @rdname FileSystem
 #' @export
-SubTreeFileSystem <- R6Class("SubTreeFileSystem", inherit = FileSystem,
+SubTreeFileSystem <- R6Class("SubTreeFileSystem",
+  inherit = FileSystem,
   public = list(
     print = function(...) {
       if (inherits(self$base_fs, "LocalFileSystem")) {
@@ -476,15 +473,13 @@ SubTreeFileSystem$create <- function(base_path, base_fs = NULL) {
 #' copying but may help accommodate high latency FileSystems.
 #' @return Nothing: called for side effects in the file system
 #' @export
-#' @examples
-#' \dontrun{
+#' @examplesIf FALSE
 #' # Copy an S3 bucket's files to a local directory:
 #' copy_files("s3://your-bucket-name", "local-directory")
 #' # Using a FileSystem object
 #' copy_files(s3_bucket("your-bucket-name"), "local-directory")
 #' # Or go the other way, from local to S3
 #' copy_files("local-directory", s3_bucket("your-bucket-name"))
-#' }
 copy_files <- function(from, to, chunk_size = 1024L * 1024L) {
   from <- get_path_and_filesystem(from)
   to <- get_path_and_filesystem(to)
diff --git a/r/R/flight.R b/r/R/flight.R
index 486c59a9e12..cde29785324 100644
--- a/r/R/flight.R
+++ b/r/R/flight.R
@@ -21,6 +21,8 @@
 #' @param path file system path where the Python module is found. Default is
 #' to look in the `inst/` directory for included modules.
 #' @export
+#' @examplesIf FALSE
+#' load_flight_server("demo_flight_server")
 load_flight_server <- function(name, path = system.file(package = "arrow")) {
   reticulate::import_from_path(name, path)
 }
@@ -105,7 +107,8 @@ list_flights <- function(client) {
 #' @rdname list_flights
 #' @export
 flight_path_exists <- function(client, path) {
-  it_exists <- tryCatch({
+  it_exists <- tryCatch(
+    expr = {
       client$get_flight_info(descriptor_for_path(path))
       TRUE
     },
diff --git a/r/R/io.R b/r/R/io.R
index 5f015ce3b06..898b306a3dd 100644
--- a/r/R/io.R
+++ b/r/R/io.R
@@ -21,7 +21,8 @@
 
 # OutputStream ------------------------------------------------------------
 
-Writable <- R6Class("Writable", inherit = ArrowObject,
+Writable <- R6Class("Writable",
+  inherit = ArrowObject,
   public = list(
     write = function(x) io___Writable__write(self, buffer(x))
   )
@@ -55,7 +56,8 @@ Writable <- R6Class("Writable", inherit = ArrowObject,
 #'
 #' @rdname OutputStream
 #' @name OutputStream
-OutputStream <- R6Class("OutputStream", inherit = Writable,
+OutputStream <- R6Class("OutputStream",
+  inherit = Writable,
   public = list(
     close = function() io___OutputStream__Close(self),
     tell = function() io___OutputStream__Tell(self)
@@ -75,7 +77,8 @@ FileOutputStream$create <- function(path) {
 #' @format NULL
 #' @rdname OutputStream
 #' @export
-BufferOutputStream <- R6Class("BufferOutputStream", inherit = OutputStream,
+BufferOutputStream <- R6Class("BufferOutputStream",
+  inherit = OutputStream,
   public = list(
     capacity = function() io___BufferOutputStream__capacity(self),
     finish = function() io___BufferOutputStream__Finish(self),
@@ -90,7 +93,8 @@ BufferOutputStream$create <- function(initial_capacity = 0L) {
 # InputStream -------------------------------------------------------------
 
 
-Readable <- R6Class("Readable", inherit = ArrowObject,
+Readable <- R6Class("Readable",
+  inherit = ArrowObject,
   public = list(
     Read = function(nbytes) io___Readable__Read(self, nbytes)
   )
@@ -129,7 +133,8 @@ Readable <- R6Class("Readable", inherit = ArrowObject,
 #'
 #' @rdname InputStream
 #' @name InputStream
-InputStream <- R6Class("InputStream", inherit = Readable,
+InputStream <- R6Class("InputStream",
+  inherit = Readable,
   public = list(
     close = function() io___InputStream__Close(self)
   )
@@ -139,13 +144,13 @@ InputStream <- R6Class("InputStream", inherit = Readable,
 #' @format NULL
 #' @rdname InputStream
 #' @export
-RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream,
+RandomAccessFile <- R6Class("RandomAccessFile",
+  inherit = InputStream,
   public = list(
     GetSize = function() io___RandomAccessFile__GetSize(self),
     supports_zero_copy = function() io___RandomAccessFile__supports_zero_copy(self),
     seek = function(position) io___RandomAccessFile__Seek(self, position),
     tell = function() io___RandomAccessFile__Tell(self),
-
     Read = function(nbytes = NULL) {
       if (is.null(nbytes)) {
         io___RandomAccessFile__Read0(self)
@@ -153,7 +158,6 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream,
         io___Readable__Read(self, nbytes)
       }
     },
-
     ReadAt = function(position, nbytes = NULL) {
       if (is.null(nbytes)) {
         nbytes <- self$GetSize() - position
@@ -167,7 +171,8 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream,
 #' @format NULL
 #' @rdname InputStream
 #' @export
-MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile,
+MemoryMappedFile <- R6Class("MemoryMappedFile",
+  inherit = RandomAccessFile,
   public = list(
     Resize = function(size) io___MemoryMappedFile__Resize(self, size)
   )
@@ -267,7 +272,7 @@ make_output_stream <- function(x, filesystem = NULL) {
     x <- x$base_path
   } else if (is_url(x)) {
     fs_and_path <- FileSystem$from_uri(x)
-    filesystem = fs_and_path$fs
+    filesystem <- fs_and_path$fs
     x <- fs_and_path$path
   }
   assert_that(is.string(x))
diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R
index 4f506f3332b..c45d1de6e33 100644
--- a/r/R/ipc_stream.R
+++ b/r/R/ipc_stream.R
@@ -18,7 +18,8 @@
 #' Write Arrow IPC stream format
 #'
 #' Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather. `write_ipc_stream()`
 #' and [write_feather()] write those formats, respectively.
 #'
@@ -35,11 +36,16 @@
 #' serialize data to a buffer.
 #' [RecordBatchWriter] for a lower-level interface.
 #' @export
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_ipc_stream(mtcars, tf)
 write_ipc_stream <- function(x, sink, ...) {
   x_out <- x # So we can return the data we got
   if (is.data.frame(x)) {
     x <- Table$create(x)
   }
+  assert_that(is_writable_table(x))
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
     on.exit(sink$close())
@@ -60,6 +66,9 @@ write_ipc_stream <- function(x, sink, ...) {
 #' @inheritParams write_feather
 #' @param format one of `c("stream", "file")`, indicating the IPC format to use
 #' @return A `raw` vector containing the bytes of the IPC serialized data.
+#' @examplesIf arrow_available()
+#' # The default format is "stream"
+#' mtcars_raw <- write_to_raw(mtcars)
 #' @export
 write_to_raw <- function(x, format = c("stream", "file")) {
   sink <- BufferOutputStream$create()
@@ -74,7 +83,8 @@ write_to_raw <- function(x, format = c("stream", "file")) {
 #' Read Arrow IPC stream format
 #'
 #' Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather. `read_ipc_stream()`
 #' and [read_feather()] read those formats, respectively.
 #'
diff --git a/r/R/json.R b/r/R/json.R
index cc16774050b..6560a07fe06 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -20,30 +20,30 @@
 #' Using [JsonTableReader]
 #'
 #' @inheritParams read_delim_arrow
+#' @param schema [Schema] that describes the table.
 #' @param ... Additional options passed to `JsonTableReader$create()`
 #'
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
 #' @export
-#' @examples
-#' \donttest{
-#'   tf <- tempfile()
-#'   on.exit(unlink(tf))
-#'   writeLines('
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' writeLines('
 #'     { "hello": 3.5, "world": false, "yo": "thing" }
 #'     { "hello": 3.25, "world": null }
 #'     { "hello": 0.0, "world": true, "yo": null }
-#'   ', tf, useBytes=TRUE)
-#'   df <- read_json_arrow(tf)
-#' }
+#'   ', tf, useBytes = TRUE)
+#' df <- read_json_arrow(tf)
 read_json_arrow <- function(file,
                             col_select = NULL,
                             as_data_frame = TRUE,
+                            schema = NULL,
                             ...) {
   if (!inherits(file, "InputStream")) {
     file <- make_readable_file(file)
     on.exit(file$close())
   }
-  tab <- JsonTableReader$create(file, ...)$Read()
+  tab <- JsonTableReader$create(file, schema = schema, ...)$Read()
 
   col_select <- enquo(col_select)
   if (!quo_is_null(col_select)) {
@@ -62,14 +62,16 @@ read_json_arrow <- function(file,
 #' @format NULL
 #' @docType class
 #' @export
-JsonTableReader <- R6Class("JsonTableReader", inherit = ArrowObject,
+JsonTableReader <- R6Class("JsonTableReader",
+  inherit = ArrowObject,
   public = list(
     Read = function() json___TableReader__Read(self)
   )
 )
 JsonTableReader$create <- function(file,
                                    read_options = JsonReadOptions$create(),
-                                   parse_options = JsonParseOptions$create(),
+                                   parse_options = JsonParseOptions$create(schema = schema),
+                                   schema = NULL,
                                    ...) {
   assert_is(file, "InputStream")
   json___TableReader__Make(file, read_options, parse_options)
@@ -91,6 +93,10 @@ JsonReadOptions$create <- function(use_threads = option_use_threads(), block_siz
 #' @docType class
 #' @export
 JsonParseOptions <- R6Class("JsonParseOptions", inherit = ArrowObject)
-JsonParseOptions$create <- function(newlines_in_values = FALSE) {
-  json___ParseOptions__initialize(newlines_in_values)
+JsonParseOptions$create <- function(newlines_in_values = FALSE, schema = NULL) {
+  if (is.null(schema)) {
+    json___ParseOptions__initialize1(newlines_in_values)
+  } else {
+    json___ParseOptions__initialize2(newlines_in_values, schema)
+  }
 }
diff --git a/r/R/message.R b/r/R/message.R
index 6a374a2b24f..ef33f1623d2 100644
--- a/r/R/message.R
+++ b/r/R/message.R
@@ -29,7 +29,8 @@
 #'
 #' @rdname Message
 #' @name Message
-Message <- R6Class("Message", inherit = ArrowObject,
+Message <- R6Class("Message",
+  inherit = ArrowObject,
   public = list(
     Equals = function(other, ...) {
       inherits(other, "Message") && ipc___Message__Equals(self, other)
@@ -57,7 +58,8 @@ Message <- R6Class("Message", inherit = ArrowObject,
 #' @rdname MessageReader
 #' @name MessageReader
 #' @export
-MessageReader <- R6Class("MessageReader", inherit = ArrowObject,
+MessageReader <- R6Class("MessageReader",
+  inherit = ArrowObject,
   public = list(
     ReadNextMessage = function() ipc___MessageReader__ReadNextMessage(self)
   )
diff --git a/r/R/metadata.R b/r/R/metadata.R
index d3e5e2150bb..104e5408764 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -36,50 +36,74 @@
 }
 
 .unserialize_arrow_r_metadata <- function(x) {
-  tryCatch({
-    out <- unserialize(charToRaw(x))
+  tryCatch(
+    expr = {
+      out <- unserialize(charToRaw(x))
 
-    # if this is still raw, try decompressing
-    if (is.raw(out)) {
-      out <- unserialize(memDecompress(out, type = "gzip"))
+      # if this is still raw, try decompressing
+      if (is.raw(out)) {
+        out <- unserialize(memDecompress(out, type = "gzip"))
+      }
+      out
+    },
+    error = function(e) {
+      warning("Invalid metadata$r", call. = FALSE)
+      NULL
     }
-    out
-  }, error = function(e) {
-    warning("Invalid metadata$r", call. = FALSE)
-    NULL
-  })
+  )
 }
 
+#' @importFrom rlang trace_back
 apply_arrow_r_metadata <- function(x, r_metadata) {
-  tryCatch({
-    columns_metadata <- r_metadata$columns
-    if (is.data.frame(x)) {
-      if (length(names(x)) && !is.null(columns_metadata)) {
-        for (name in intersect(names(columns_metadata), names(x))) {
-          x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
+  tryCatch(
+    expr = {
+      columns_metadata <- r_metadata$columns
+      if (is.data.frame(x)) {
+        if (length(names(x)) && !is.null(columns_metadata)) {
+          for (name in intersect(names(columns_metadata), names(x))) {
+            x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
+          }
         }
+      } else if (is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
+        # If we have a list and "columns_metadata" this applies row-level metadata
+        # inside of a column in a dataframe.
+
+        # However, if we are inside of a dplyr collection (including all datasets),
+        # we cannot apply this row-level metadata, since the order of the rows is
+        # not guaranteed to be the same, so don't even try, but warn what's going on
+        trace <- trace_back()
+        in_dplyr_collect <- any(map_lgl(trace$calls, function(x) {
+          grepl("collect.arrow_dplyr_query", x, fixed = TRUE)[[1]]
+        }))
+        if (in_dplyr_collect) {
+          warning(
+            "Row-level metadata is not compatible with this operation and has ",
+            "been ignored",
+            call. = FALSE
+          )
+        } else {
+          x <- map2(x, columns_metadata, function(.x, .y) {
+            apply_arrow_r_metadata(.x, .y)
+          })
+        }
+        x
       }
-    } else if(is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
-      x <- map2(x, columns_metadata, function(.x, .y) {
-        apply_arrow_r_metadata(.x, .y)
-      })
-      x
-    }
 
-    if (!is.null(r_metadata$attributes)) {
-      attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes
-      if (inherits(x, "POSIXlt")) {
-        # We store POSIXlt as a StructArray, which is translated back to R
-        # as a data.frame, but while data frames have a row.names = c(NA, nrow(x))
-        # attribute, POSIXlt does not, so since this is now no longer an object
-        # of class data.frame, remove the extraneous attribute
-        attr(x, "row.names") <- NULL
+      if (!is.null(r_metadata$attributes)) {
+        attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes
+        if (inherits(x, "POSIXlt")) {
+          # We store POSIXlt as a StructArray, which is translated back to R
+          # as a data.frame, but while data frames have a row.names = c(NA, nrow(x))
+          # attribute, POSIXlt does not, so since this is now no longer an object
+          # of class data.frame, remove the extraneous attribute
+          attr(x, "row.names") <- NULL
+        }
       }
+    },
+    error = function(e) {
+      warning("Invalid metadata$r", call. = FALSE)
     }
-
-  }, error = function(e) {
-    warning("Invalid metadata$r", call. = FALSE)
-  })
+  )
   x
 }
 
@@ -116,9 +140,23 @@ arrow_attributes <- function(x, only_top_level = FALSE) {
 
   columns <- NULL
   if (is.list(x) && !inherits(x, "POSIXlt")) {
-    # for list columns, we also keep attributes of each
-    # element in columns
-    columns <- map(x, arrow_attributes)
+    # However, if we are inside of a dplyr collection (including all datasets),
+    # we cannot apply this row-level metadata, since the order of the rows is
+    # not guaranteed to be the same, so don't even try, but warn what's going on
+    trace <- trace_back()
+    in_dataset_write <- any(map_lgl(trace$calls, function(x) {
+      grepl("write_dataset", x, fixed = TRUE)[[1]]
+    }))
+    if (in_dataset_write) {
+      warning(
+        "Row-level metadata is not compatible with datasets and will be discarded",
+        call. = FALSE
+      )
+    } else {
+      # for list columns, we also keep attributes of each
+      # element in columns
+      columns <- map(x, arrow_attributes)
+    }
     if (all(map_lgl(columns, is.null))) {
       columns <- NULL
     }
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 169d9f57f52..ee2ed57de24 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -26,14 +26,12 @@
 #'
 #' @return A [arrow::Table][Table], or a `data.frame` if `as_data_frame` is
 #' `TRUE` (the default).
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_with_parquet()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_parquet(mtcars, tf)
 #' df <- read_parquet(tf, col_select = starts_with("d"))
 #' head(df)
-#' }
 #' @export
 read_parquet <- function(file,
                          col_select = NULL,
@@ -127,8 +125,7 @@ read_parquet <- function(file,
 #'
 #' @return the input `x` invisibly.
 #'
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_with_parquet()
 #' tf1 <- tempfile(fileext = ".parquet")
 #' write_parquet(data.frame(x = 1:5), tf1)
 #'
@@ -137,7 +134,6 @@ read_parquet <- function(file,
 #'   tf2 <- tempfile(fileext = ".gz.parquet")
 #'   write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
 #' }
-#' }
 #' @export
 write_parquet <- function(x,
                           sink,
@@ -156,10 +152,13 @@ write_parquet <- function(x,
                           properties = NULL,
                           arrow_properties = NULL) {
   x_out <- x
-  if (!inherits(x, "Table")) {
+
+  if (is.data.frame(x) || inherits(x, "RecordBatch")) {
     x <- Table$create(x)
   }
 
+  assert_that(is_writable_table(x))
+
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
     on.exit(sink$close())
@@ -167,10 +166,16 @@ write_parquet <- function(x,
 
   # Deprecation warnings
   if (!is.null(properties)) {
-    warning("Providing 'properties' is deprecated. If you need to assemble properties outside this function, use ParquetFileWriter instead.")
+    warning(
+      "Providing 'properties' is deprecated. If you need to assemble properties outside ",
+      "this function, use ParquetFileWriter instead."
+    )
   }
   if (!is.null(arrow_properties)) {
-    warning("Providing 'arrow_properties' is deprecated. If you need to assemble arrow_properties outside this function, use ParquetFileWriter instead.")
+    warning(
+      "Providing 'arrow_properties' is deprecated. If you need to assemble arrow_properties ",
+      "outside this function, use ParquetFileWriter instead."
+    )
   }
 
   writer <- ParquetFileWriter$create(
@@ -214,7 +219,8 @@ ParquetArrowWriterProperties$create <- function(use_deprecated_int96_timestamps
   if (is.null(coerce_timestamps)) {
     timestamp_unit <- -1L # null sentinel value
   } else {
-    timestamp_unit <- make_valid_time_unit(coerce_timestamps,
+    timestamp_unit <- make_valid_time_unit(
+      coerce_timestamps,
       c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO)
     )
   }
@@ -237,7 +243,7 @@ make_valid_version <- function(version, valid_versions = valid_parquet_version)
   tryCatch(
     valid_versions[[match.arg(version, choices = names(valid_versions))]],
     error = function(cond) {
-      stop('"version" should be one of ', oxford_paste(names(valid_versions), "or"), call.=FALSE)
+      stop('"version" should be one of ', oxford_paste(names(valid_versions), "or"), call. = FALSE)
     }
   )
 }
@@ -285,7 +291,8 @@ make_valid_version <- function(version, valid_versions = valid_parquet_version)
 #'
 #' @export
 ParquetWriterProperties <- R6Class("ParquetWriterProperties", inherit = ArrowObject)
-ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inherit = ArrowObject,
+ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder",
+  inherit = ArrowObject,
   public = list(
     set_version = function(version) {
       parquet___WriterProperties___Builder__version(self, make_valid_version(version))
@@ -293,26 +300,30 @@ ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inhe
     set_compression = function(table, compression) {
       compression <- compression_from_name(compression)
       assert_that(is.integer(compression))
-      private$.set(table, compression,
+      private$.set(
+        table, compression,
         parquet___ArrowWriterProperties___Builder__set_compressions
       )
     },
-    set_compression_level = function(table, compression_level){
+    set_compression_level = function(table, compression_level) {
       # cast to integer but keep names
       compression_level <- set_names(as.integer(compression_level), names(compression_level))
-      private$.set(table, compression_level,
+      private$.set(
+        table, compression_level,
         parquet___ArrowWriterProperties___Builder__set_compression_levels
       )
     },
     set_dictionary = function(table, use_dictionary) {
       assert_that(is.logical(use_dictionary))
-      private$.set(table, use_dictionary,
+      private$.set(
+        table, use_dictionary,
         parquet___ArrowWriterProperties___Builder__set_use_dictionary
       )
     },
     set_write_statistics = function(table, write_statistics) {
       assert_that(is.logical(write_statistics))
-      private$.set(table, write_statistics,
+      private$.set(
+        table, write_statistics,
         parquet___ArrowWriterProperties___Builder__set_write_statistics
       )
     },
@@ -320,7 +331,6 @@ ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inhe
       parquet___ArrowWriterProperties___Builder__data_page_size(self, data_page_size)
     }
   ),
-
   private = list(
     .set = function(table, value, FUN) {
       msg <- paste0("unsupported ", substitute(value), "= specification")
@@ -400,7 +410,8 @@ ParquetWriterProperties$create <- function(table,
 #'
 #' @export
 #' @include arrow-package.R
-ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = ArrowObject,
+ParquetFileWriter <- R6Class("ParquetFileWriter",
+  inherit = ArrowObject,
   public = list(
     WriteTable = function(table, chunk_size) {
       parquet___arrow___FileWriter__WriteTable(self, table, chunk_size)
@@ -442,7 +453,8 @@ ParquetFileWriter$create <- function(schema,
 #'    `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
 #' - `$ReadRowGroup(i, column_indices)`: get an `arrow::Table` by reading the `i`th row group (0-based).
 #'    The optional `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
-#' - `$ReadRowGroups(row_groups, column_indices)`: get an `arrow::Table` by reading several row groups (0-based integers).
+#' - `$ReadRowGroups(row_groups, column_indices)`: get an `arrow::Table` by reading several row
+#'    groups (0-based integers).
 #'    The optional `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
 #' - `$GetSchema()`: get the `arrow::Schema` of the data in the file
 #' - `$ReadColumn(i)`: read the `i`th column (0-based) as a [ChunkedArray].
@@ -454,9 +466,8 @@ ParquetFileWriter$create <- function(schema,
 #' - `$num_row_groups`: number of row groups.
 #'
 #' @export
-#' @examples
-#' \dontrun{
-#' f <- system.file("v0.7.1.parquet", package="arrow")
+#' @examplesIf arrow_with_parquet()
+#' f <- system.file("v0.7.1.parquet", package = "arrow")
 #' pq <- ParquetFileReader$create(f)
 #' pq$GetSchema()
 #' if (codec_is_available("snappy")) {
@@ -464,7 +475,6 @@ ParquetFileWriter$create <- function(schema,
 #'   tab <- pq$ReadTable()
 #'   tab$schema
 #' }
-#' }
 #' @include arrow-package.R
 ParquetFileReader <- R6Class("ParquetFileReader",
   inherit = ArrowObject,
@@ -561,7 +571,7 @@ ParquetArrowReaderProperties <- R6Class("ParquetArrowReaderProperties",
   ),
   active = list(
     use_threads = function(use_threads) {
-      if(missing(use_threads)) {
+      if (missing(use_threads)) {
         parquet___arrow___ArrowReaderProperties__get_use_threads(self)
       } else {
         parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads)
diff --git a/r/R/python.R b/r/R/python.R
index b200d939a96..07cd4456b1a 100644
--- a/r/R/python.R
+++ b/r/R/python.R
@@ -24,7 +24,7 @@ py_to_r.pyarrow.lib.Array <- function(x, ...) {
   })
 
   x$`_export_to_c`(array_ptr, schema_ptr)
-  ImportArray(array_ptr, schema_ptr)
+  Array$import_from_c(array_ptr, schema_ptr)
 }
 
 r_to_py.Array <- function(x, convert = FALSE) {
@@ -37,7 +37,7 @@ r_to_py.Array <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportArray(x, array_ptr, schema_ptr)
+  x$export_to_c(array_ptr, schema_ptr)
   out <- pa$Array$`_import_from_c`(array_ptr, schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
@@ -54,7 +54,7 @@ py_to_r.pyarrow.lib.RecordBatch <- function(x, ...) {
 
   x$`_export_to_c`(array_ptr, schema_ptr)
 
-  ImportRecordBatch(array_ptr, schema_ptr)
+  RecordBatch$import_from_c(array_ptr, schema_ptr)
 }
 
 r_to_py.RecordBatch <- function(x, convert = FALSE) {
@@ -67,7 +67,7 @@ r_to_py.RecordBatch <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportRecordBatch(x, array_ptr, schema_ptr)
+  x$export_to_c(array_ptr, schema_ptr)
   out <- pa$RecordBatch$`_import_from_c`(array_ptr, schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
@@ -108,7 +108,7 @@ py_to_r.pyarrow.lib.Schema <- function(x, ...) {
   on.exit(delete_arrow_schema(schema_ptr))
 
   x$`_export_to_c`(schema_ptr)
-  ImportSchema(schema_ptr)
+  Schema$import_from_c(schema_ptr)
 }
 
 r_to_py.Schema <- function(x, convert = FALSE) {
@@ -117,13 +117,78 @@ r_to_py.Schema <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportSchema(x, schema_ptr)
+  x$export_to_c(schema_ptr)
   out <- pa$Schema$`_import_from_c`(schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
   out
 }
 
+py_to_r.pyarrow.lib.Field <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  x$`_export_to_c`(schema_ptr)
+  Field$import_from_c(schema_ptr)
+}
+
+r_to_py.Field <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(schema_ptr)
+  out <- pa$Field$`_import_from_c`(schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.DataType <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  x$`_export_to_c`(schema_ptr)
+  DataType$import_from_c(schema_ptr)
+}
+
+r_to_py.DataType <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(schema_ptr)
+  out <- pa$DataType$`_import_from_c`(schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.RecordBatchReader <- function(x, ...) {
+  stream_ptr <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr))
+
+  x$`_export_to_c`(stream_ptr)
+  RecordBatchReader$import_from_c(stream_ptr)
+}
+
+r_to_py.RecordBatchReader <- function(x, convert = FALSE) {
+  stream_ptr <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(stream_ptr)
+  # TODO: handle subclasses of RecordBatchReader?
+  out <- pa$lib$RecordBatchReader$`_import_from_c`(stream_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+
 maybe_py_to_r <- function(x) {
   if (inherits(x, "python.builtin.object")) {
     # Depending on some auto-convert behavior, x may already be converted
@@ -147,7 +212,8 @@ maybe_py_to_r <- function(x) {
 #' @export
 install_pyarrow <- function(envname = NULL, nightly = FALSE, ...) {
   if (nightly) {
-    reticulate::py_install("pyarrow", envname = envname, ...,
+    reticulate::py_install("pyarrow",
+      envname = envname, ...,
       # Nightly for pip
       pip_options = "--extra-index-url https://repo.fury.io/arrow-nightlies/ --pre --upgrade",
       # Nightly for conda
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
new file mode 100644
index 00000000000..72c35c515db
--- /dev/null
+++ b/r/R/query-engine.R
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ExecPlan <- R6Class("ExecPlan",
+  inherit = ArrowObject,
+  public = list(
+    Scan = function(dataset) {
+      # Handle arrow_dplyr_query
+      if (inherits(dataset, "arrow_dplyr_query")) {
+        filter <- dataset$filtered_rows
+        if (isTRUE(filter)) {
+          filter <- Expression$scalar(TRUE)
+        }
+        # Use FieldsInExpression to find all from dataset$selected_columns
+        colnames <- unique(unlist(map(
+          dataset$selected_columns,
+          field_names_in_expression
+        )))
+        dataset <- dataset$.data
+      } else {
+        if (inherits(dataset, "ArrowTabular")) {
+          dataset <- InMemoryDataset$create(dataset)
+        }
+        assert_is(dataset, "Dataset")
+        # Set some defaults
+        filter <- Expression$scalar(TRUE)
+        colnames <- names(dataset)
+      }
+      # ScanNode needs the filter to do predicate pushdown and skip partitions,
+      # and it needs to know which fields to materialize (and which are unnecessary)
+      ExecNode_Scan(self, dataset, filter, colnames)
+    },
+    Run = function(node) {
+      assert_is(node, "ExecNode")
+      ExecPlan_run(self, node)
+    }
+  )
+)
+ExecPlan$create <- function(use_threads = option_use_threads()) {
+  ExecPlan_create(use_threads)
+}
+
+ExecNode <- R6Class("ExecNode",
+  inherit = ArrowObject,
+  public = list(
+    Project = function(cols) {
+      assert_is_list_of(cols, "Expression")
+      ExecNode_Project(self, cols, names(cols))
+    },
+    Filter = function(expr) {
+      assert_is(expr, "Expression")
+      ExecNode_Filter(self, expr)
+    },
+    ScalarAggregate = function(options, target_names, out_field_names) {
+      ExecNode_ScalarAggregate(self, options, target_names, out_field_names)
+    },
+    GroupByAggregate = function(group_vars, target_names, aggregations) {
+      ExecNode_GroupByAggregate(self, group_vars, target_names, aggregations)
+    }
+  )
+)
diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R
index 119ebd67fd9..53f8f26291a 100644
--- a/r/R/record-batch-reader.R
+++ b/r/R/record-batch-reader.R
@@ -18,10 +18,11 @@
 
 #' @title RecordBatchReader classes
 #' @description Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather.
 #' `RecordBatchStreamReader` and `RecordBatchFileReader` are
-#' interfaces for accessing record batches from input sources those formats,
+#' interfaces for accessing record batches from input sources in those formats,
 #' respectively.
 #'
 #' For guidance on how to use these classes, see the examples section.
@@ -56,8 +57,7 @@
 #' @rdname RecordBatchReader
 #' @name RecordBatchReader
 #' @include arrow-package.R
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #'
@@ -91,12 +91,13 @@
 #' # Unlike the Writers, we don't have to close RecordBatchReaders,
 #' # but we do still need to close the file connection
 #' read_file_obj$close()
-#' }
-RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject,
+RecordBatchReader <- R6Class("RecordBatchReader",
+  inherit = ArrowObject,
   public = list(
-    read_next_batch = function() {
-      RecordBatchReader__ReadNext(self)
-    }
+    read_next_batch = function() RecordBatchReader__ReadNext(self),
+    batches = function() RecordBatchReader__batches(self),
+    read_table = function() Table__from_RecordBatchReader(self),
+    export_to_c = function(stream_ptr) ExportRecordBatchReader(self, stream_ptr)
   ),
   active = list(
     schema = function() RecordBatchReader__schema(self)
@@ -107,12 +108,7 @@ RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject,
 #' @usage NULL
 #' @format NULL
 #' @export
-RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader,
-  public = list(
-    batches = function() ipc___RecordBatchStreamReader__batches(self),
-    read_table = function() Table__from_RecordBatchReader(self)
-  )
-)
+RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader)
 RecordBatchStreamReader$create <- function(stream) {
   if (inherits(stream, c("raw", "Buffer"))) {
     # TODO: deprecate this because it doesn't close the connection to the Buffer
@@ -122,12 +118,15 @@ RecordBatchStreamReader$create <- function(stream) {
   assert_is(stream, "InputStream")
   ipc___RecordBatchStreamReader__Open(stream)
 }
+#' @include arrowExports.R
+RecordBatchReader$import_from_c <- RecordBatchStreamReader$import_from_c <- ImportRecordBatchReader
 
 #' @rdname RecordBatchReader
 #' @usage NULL
 #' @format NULL
 #' @export
-RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = ArrowObject,
+RecordBatchFileReader <- R6Class("RecordBatchFileReader",
+  inherit = ArrowObject,
   # Why doesn't this inherit from RecordBatchReader in C++?
   # Origin: https://github.com/apache/arrow/pull/679
   public = list(
diff --git a/r/R/record-batch-writer.R b/r/R/record-batch-writer.R
index 60e87c951dd..8675e785a41 100644
--- a/r/R/record-batch-writer.R
+++ b/r/R/record-batch-writer.R
@@ -18,7 +18,8 @@
 
 #' @title RecordBatchWriter classes
 #' @description Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather.
 #' `RecordBatchStreamWriter` and `RecordBatchFileWriter` are
 #' interfaces for writing record batches to those formats, respectively.
@@ -59,8 +60,7 @@
 #' @rdname RecordBatchWriter
 #' @name RecordBatchWriter
 #' @include arrow-package.R
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #'
@@ -94,12 +94,11 @@
 #' # Unlike the Writers, we don't have to close RecordBatchReaders,
 #' # but we do still need to close the file connection
 #' read_file_obj$close()
-#' }
-RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = ArrowObject,
+RecordBatchWriter <- R6Class("RecordBatchWriter",
+  inherit = ArrowObject,
   public = list(
     write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch),
     write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table),
-
     write = function(x) {
       if (inherits(x, "RecordBatch")) {
         self$write_batch(x)
@@ -109,7 +108,6 @@ RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = ArrowObject,
         self$write_table(Table$create(x))
       }
     },
-
     close = function() ipc___RecordBatchWriter__Close(self)
   )
 )
@@ -175,7 +173,7 @@ get_ipc_metadata_version <- function(x) {
     x <- paste0("V", x)
   } else if (is.null(x)) {
     if (identical(Sys.getenv("ARROW_PRE_1_0_METADATA_VERSION"), "1") ||
-        identical(Sys.getenv("ARROW_PRE_0_15_IPC_FORMAT"), "1")) {
+      identical(Sys.getenv("ARROW_PRE_0_15_IPC_FORMAT"), "1")) {
       # PRE_1_0 is specific for this;
       # if you already set PRE_0_15, PRE_1_0 should be implied
       x <- "V4"
diff --git a/r/R/record-batch.R b/r/R/record-batch.R
index db5c6177854..e1c5251b254 100644
--- a/r/R/record-batch.R
+++ b/r/R/record-batch.R
@@ -77,7 +77,8 @@
 #' @rdname RecordBatch
 #' @name RecordBatch
 #' @export
-RecordBatch <- R6Class("RecordBatch", inherit = ArrowTabular,
+RecordBatch <- R6Class("RecordBatch",
+  inherit = ArrowTabular,
   public = list(
     column = function(i) RecordBatch__column(self, i),
     column_name = function(i) RecordBatch__column_name(self, i),
@@ -118,9 +119,11 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowTabular,
     invalidate = function() {
       .Call(`_arrow_RecordBatch__Reset`, self)
       super$invalidate()
+    },
+    export_to_c = function(array_ptr, schema_ptr) {
+      ExportRecordBatch(self, array_ptr, schema_ptr)
     }
   ),
-
   active = list(
     num_columns = function() RecordBatch__num_columns(self),
     num_rows = function() RecordBatch__num_rows(self),
@@ -148,13 +151,23 @@ RecordBatch$create <- function(..., schema = NULL) {
   if (length(arrays) == 1 && inherits(arrays[[1]], c("raw", "Buffer", "InputStream", "Message"))) {
     return(RecordBatch$from_message(arrays[[1]], schema))
   }
-  # Else, list of arrays
+
+  # Else, a list of arrays or data.frames
   # making sure there are always names
   if (is.null(names(arrays))) {
     names(arrays) <- rep_len("", length(arrays))
   }
   stopifnot(length(arrays) > 0)
 
+  # Preserve any grouping
+  if (length(arrays) == 1 && inherits(arrays[[1]], "grouped_df")) {
+    out <- RecordBatch__from_arrays(schema, arrays)
+    return(dplyr::group_by(out, !!!dplyr::groups(arrays[[1]])))
+  }
+
+  # If any arrays are length 1, recycle them
+  arrays <- recycle_scalars(arrays)
+
   # TODO: should this also assert that they're all Arrays?
   RecordBatch__from_arrays(schema, arrays)
 }
@@ -172,6 +185,8 @@ RecordBatch$from_message <- function(obj, schema) {
     ipc___ReadRecordBatch__Message__Schema(obj, schema)
   }
 }
+#' @include arrowExports.R
+RecordBatch$import_from_c <- ImportRecordBatch
 
 #' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
 #' mixture of data.frames and vectors, the inputs will be autospliced together
@@ -180,8 +195,7 @@ RecordBatch$from_message <- function(obj, schema) {
 #' @param schema a [Schema], or `NULL` (the default) to infer the schema from
 #' the data in `...`. When providing an Arrow IPC buffer, `schema` is required.
 #' @rdname RecordBatch
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' batch <- record_batch(name = rownames(mtcars), mtcars)
 #' dim(batch)
 #' dim(head(batch))
@@ -189,7 +203,6 @@ RecordBatch$from_message <- function(obj, schema) {
 #' batch$mpg
 #' batch[["cyl"]]
 #' as.data.frame(batch[4:8, c("gear", "hp", "wt")])
-#' }
 #' @export
 record_batch <- RecordBatch$create
 
diff --git a/r/R/scalar.R b/r/R/scalar.R
index cbda5964a2c..4dedc6c1232 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -24,16 +24,41 @@
 #'
 #' @description A `Scalar` holds a single value of an Arrow type.
 #'
+#' @section Methods:
+#'   `$ToString()`: convert to a string
+#'   `$as_vector()`: convert to an R vector
+#'   `$as_array()`: convert to an Arrow `Array`
+#'   `$Equals(other)`: is this Scalar equal to `other`
+#'   `$ApproxEquals(other)`: is this Scalar approximately equal to `other`
+#'   `$is_valid`: is this Scalar valid
+#'   `$null_count`: number of invalid values - 1 or 0
+#'   `$type`: Scalar type
+#'
 #' @name Scalar
 #' @rdname Scalar
+#' @examplesIf arrow_available()
+#' Scalar$create(pi)
+#' Scalar$create(404)
+#' # If you pass a vector into Scalar$create, you get a list containing your items
+#' Scalar$create(c(1, 2, 3))
+#'
+#' # Comparisons
+#' my_scalar <- Scalar$create(99)
+#' my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+#' my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+#' my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+#' my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+#'
+#' my_scalar$ToString()
 #' @export
 Scalar <- R6Class("Scalar",
   inherit = ArrowDatum,
   # TODO: document the methods
   public = list(
     ToString = function() Scalar__ToString(self),
+    type_id = function() Scalar__type(self)$id,
     as_vector = function() Scalar__as_vector(self),
-    as_array = function() MakeArrayFromScalar(self),
+    as_array = function(length = 1L) MakeArrayFromScalar(self, as.integer(length)),
     Equals = function(other, ...) {
       inherits(other, "Scalar") && Scalar__Equals(self, other)
     },
@@ -72,8 +97,5 @@ StructScalar <- R6Class("StructScalar",
 #' @export
 length.Scalar <- function(x) 1L
 
-#' @export
-is.na.Scalar <- function(x) !x$is_valid
-
 #' @export
 sort.Scalar <- function(x, decreasing = FALSE, ...) x
diff --git a/r/R/schema.R b/r/R/schema.R
index ec3bc43cf21..60223c95665 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -77,14 +77,12 @@
 #'
 #' @rdname Schema
 #' @name Schema
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
 #' tab1 <- Table$create(df)
 #' tab1$schema
 #' tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32()))
 #' tab2$schema
-#' }
 #' @export
 Schema <- R6Class("Schema",
   inherit = ArrowObject,
@@ -114,7 +112,8 @@ Schema <- R6Class("Schema",
     },
     Equals = function(other, check_metadata = FALSE, ...) {
       inherits(other, "Schema") && Schema__Equals(self, other, isTRUE(check_metadata))
-    }
+    },
+    export_to_c = function(ptr) ExportSchema(self, ptr)
   ),
   active = list(
     names = function() {
@@ -138,6 +137,8 @@ Schema <- R6Class("Schema",
   )
 )
 Schema$create <- function(...) schema_(.fields(list2(...)))
+#' @include arrowExports.R
+Schema$import_from_c <- ImportSchema
 
 prepare_key_value_metadata <- function(metadata) {
   # key-value-metadata must be a named character vector;
@@ -157,7 +158,7 @@ prepare_key_value_metadata <- function(metadata) {
 
 print_schema_fields <- function(s) {
   # Alternative to Schema__ToString that doesn't print metadata
-  paste(map_chr(s$fields, ~.$ToString()), collapse = "\n")
+  paste(map_chr(s$fields, ~ .$ToString()), collapse = "\n")
 }
 
 #' @param ... named list of [data types][data-type]
@@ -235,7 +236,7 @@ length.Schema <- function(x) x$num_fields
       i <- setdiff(seq_len(length(x)), -1 * i)
     }
   }
-  fields <- map(i, ~x[[.]])
+  fields <- map(i, ~ x[[.]])
   invalid <- map_lgl(fields, is.null)
   if (any(invalid)) {
     stop(
@@ -282,15 +283,17 @@ read_schema <- function(stream, ...) {
 #'
 #' @param ... [Schema]s to unify
 #' @param schemas Alternatively, a list of schemas
-#' @return A `Schema` with the union of fields contained in the inputs
+#' @return A `Schema` with the union of fields contained in the inputs, or
+#'   `NULL` if any of `schemas` is `NULL`
 #' @export
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_available()
 #' a <- schema(b = double(), c = bool())
 #' z <- schema(b = double(), k = utf8())
 #' unify_schemas(a, z)
-#' }
 unify_schemas <- function(..., schemas = list(...)) {
+  if (any(vapply(schemas, is.null, TRUE))) {
+    return(NULL)
+  }
   arrow__UnifySchemas(schemas)
 }
 
diff --git a/r/R/table.R b/r/R/table.R
index fdf3f5cc20d..5aae067f0fc 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -85,8 +85,7 @@
 #' - `$columns`: Returns a list of `ChunkedArray`s
 #' @rdname Table
 #' @name Table
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tab <- Table$create(name = rownames(mtcars), mtcars)
 #' dim(tab)
 #' dim(head(tab))
@@ -94,9 +93,9 @@
 #' tab$mpg
 #' tab[["cyl"]]
 #' as.data.frame(tab[4:8, c("gear", "hp", "wt")])
-#' }
 #' @export
-Table <- R6Class("Table", inherit = ArrowTabular,
+Table <- R6Class("Table",
+  inherit = ArrowTabular,
   public = list(
     column = function(i) Table__column(self, i),
     ColumnNames = function() Table__ColumnNames(self),
@@ -138,7 +137,6 @@ Table <- R6Class("Table", inherit = ArrowTabular,
       super$invalidate()
     }
   ),
-
   active = list(
     num_columns = function() Table__num_columns(self),
     num_rows = function() Table__num_rows(self),
@@ -168,11 +166,21 @@ Table$create <- function(..., schema = NULL) {
     names(dots) <- rep_len("", length(dots))
   }
   stopifnot(length(dots) > 0)
+
   if (all_record_batches(dots)) {
-    Table__from_record_batches(dots, schema)
-  } else {
-    Table__from_dots(dots, schema)
+    return(Table__from_record_batches(dots, schema))
+  }
+
+  # If any arrays are length 1, recycle them
+  dots <- recycle_scalars(dots)
+
+  out <- Table__from_dots(dots, schema, option_use_threads())
+
+  # Preserve any grouping
+  if (length(dots) == 1 && inherits(dots[[1]], "grouped_df")) {
+    out <- dplyr::group_by(out, !!!dplyr::groups(dots[[1]]))
   }
+  out
 }
 
 #' @export
diff --git a/r/R/type.R b/r/R/type.R
index ecb9b48a185..4ef7cefb56e 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -39,9 +39,9 @@ DataType <- R6Class("DataType",
     },
     fields = function() {
       DataType__fields(self)
-    }
+    },
+    export_to_c = function(ptr) ExportType(self, ptr)
   ),
-
   active = list(
     id = function() DataType__id(self),
     name = function() DataType__name(self),
@@ -49,6 +49,9 @@ DataType <- R6Class("DataType",
   )
 )
 
+#' @include arrowExports.R
+DataType$import_from_c <- ImportType
+
 INTEGER_TYPES <- as.character(outer(c("uint", "int"), c(8, 16, 32, 64), paste0))
 FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double")
 
@@ -57,6 +60,13 @@ FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double"
 #' @param x an R vector
 #'
 #' @return an arrow logical type
+#' @examplesIf arrow_available()
+#' type(1:10)
+#' type(1L:10L)
+#' type(c(1, 1.5, 2))
+#' type(c("A", "B", "C"))
+#' type(mtcars)
+#' type(Sys.Date())
 #' @export
 type <- function(x) UseMethod("type")
 
@@ -128,7 +138,7 @@ Null <- R6Class("Null", inherit = DataType)
 Timestamp <- R6Class("Timestamp",
   inherit = FixedWidthType,
   public = list(
-    timezone = function()  TimestampType__timezone(self),
+    timezone = function() TimestampType__timezone(self),
     unit = function() TimestampType__unit(self)
   )
 )
@@ -186,13 +196,11 @@ NestedType <- R6Class("NestedType", inherit = DataType)
 #' @return An Arrow type object inheriting from DataType.
 #' @export
 #' @seealso [dictionary()] for creating a dictionary (factor-like) type.
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' bool()
 #' struct(a = int32(), b = double())
 #' timestamp("ms", timezone = "CEST")
 #' time64("ns")
-#' }
 int8 <- function() Int8__initialize()
 
 #' @rdname data-type
@@ -423,6 +431,55 @@ as_type <- function(type, name = "type") {
   type
 }
 
+canonical_type_str <- function(type_str) {
+  # canonicalizes data type strings, converting data type function names and
+  # aliases to match the strings returned by DataType$ToString()
+  assert_that(is.string(type_str))
+  if (grepl("[([<]", type_str)) {
+    stop("Cannot interpret string representations of data types that have parameters", call. = FALSE)
+  }
+  switch(type_str,
+    int8 = "int8",
+    int16 = "int16",
+    int32 = "int32",
+    int64 = "int64",
+    uint8 = "uint8",
+    uint16 = "uint16",
+    uint32 = "uint32",
+    uint64 = "uint64",
+    float16 = "halffloat",
+    halffloat = "halffloat",
+    float32 = "float",
+    float = "float",
+    float64 = "double",
+    double = "double",
+    boolean = "bool",
+    bool = "bool",
+    utf8 = "string",
+    large_utf8 = "large_string",
+    large_string = "large_string",
+    binary = "binary",
+    large_binary = "large_binary",
+    fixed_size_binary = "fixed_size_binary",
+    string = "string",
+    date32 = "date32",
+    date64 = "date64",
+    time32 = "time32",
+    time64 = "time64",
+    null = "null",
+    timestamp = "timestamp",
+    decimal = "decimal128",
+    struct = "struct",
+    list_of = "list",
+    list = "list",
+    large_list_of = "large_list",
+    large_list = "large_list",
+    fixed_size_list_of = "fixed_size_list",
+    fixed_size_list = "fixed_size_list",
+    stop("Unrecognized string representation of data type", call. = FALSE)
+  )
+}
+
 # vctrs support -----------------------------------------------------------
 str_dup <- function(x, times) {
   paste0(rep(x, times = times), collapse = "")
diff --git a/r/R/util.R b/r/R/util.R
index f5b505f352a..5958b0b3111 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -17,7 +17,7 @@
 
 # for compatibility with R versions earlier than 4.0.0
 if (!exists("deparse1")) {
-  deparse1 <- function (expr, collapse = " ", width.cutoff = 500L, ...) {
+  deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) {
     paste(deparse(expr, width.cutoff, ...), collapse = collapse)
   }
 }
@@ -48,7 +48,7 @@ assert_is_list_of <- function(object, class) {
 }
 
 is_list_of <- function(object, class) {
-  is.list(object) && all(map_lgl(object, ~inherits(., class)))
+  is.list(object) && all(map_lgl(object, ~ inherits(., class)))
 }
 
 empty_named_list <- function() structure(list(), .Names = character(0))
@@ -72,7 +72,7 @@ is_function <- function(expr, name) {
 
 all_funs <- function(expr) {
   names <- all_names(expr)
-  names[vapply(names, function(name) {is_function(expr, name)}, TRUE)]
+  names[vapply(names, function(name) is_function(expr, name), TRUE)]
 }
 
 all_vars <- function(expr) {
@@ -110,3 +110,76 @@ handle_embedded_nul_error <- function(e) {
   }
   stop(e)
 }
+
+handle_parquet_io_error <- function(e, format) {
+  msg <- conditionMessage(e)
+  if (grepl("Parquet magic bytes not found in footer", msg) && length(format) > 1 && is_character(format)) {
+    # If length(format) > 1, that means it is (almost certainly) the default/not specified value
+    # so let the user know that they should specify the actual (not parquet) format
+    abort(c(
+      msg,
+      i = "Did you mean to specify a 'format' other than the default (parquet)?"
+    ))
+  }
+  stop(e)
+}
+
+is_writable_table <- function(x) {
+  inherits(x, c("data.frame", "ArrowTabular"))
+}
+
+# This attribute is used when is_writable is passed into assert_that, and allows
+# the call to form part of the error message when is_writable is FALSE
+attr(is_writable_table, "fail") <- function(call, env) {
+  paste0(
+    deparse(call$x),
+    " must be an object of class 'data.frame', 'RecordBatch', or 'Table', not '",
+    class(env[[deparse(call$x)]])[[1]],
+    "'."
+  )
+}
+
+#' Recycle scalar values in a list of arrays
+#'
+#' @param arrays List of arrays
+#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled
+#' @keywords internal
+recycle_scalars <- function(arrays) {
+  # Get lengths of items in arrays
+  arr_lens <- map_int(arrays, NROW)
+
+  is_scalar <- arr_lens == 1
+
+  if (length(arrays) > 1 && any(is_scalar) && !all(is_scalar)) {
+
+    # Recycling not supported for tibbles and data.frames
+    if (all(map_lgl(arrays, ~ inherits(.x, "data.frame")))) {
+      abort(c(
+        "All input tibbles or data.frames must have the same number of rows",
+        x = paste(
+          "Number of rows in longest and shortest inputs:",
+          oxford_paste(c(max(arr_lens), min(arr_lens)))
+        )
+      ))
+    }
+
+    max_array_len <- max(arr_lens)
+    arrays[is_scalar] <- lapply(arrays[is_scalar], repeat_value_as_array, max_array_len)
+  }
+  arrays
+}
+
+#' Take an object of length 1 and repeat it.
+#'
+#' @param object Object of length 1 to be repeated - vector, `Scalar`, `Array`, or `ChunkedArray`
+#' @param n Number of repetitions
+#'
+#' @return `Array` of length `n`
+#'
+#' @keywords internal
+repeat_value_as_array <- function(object, n) {
+  if (inherits(object, "ChunkedArray")) {
+    return(Scalar$create(object$chunks[[1]])$as_array(n))
+  }
+  return(Scalar$create(object)$as_array(n))
+}
diff --git a/r/README.md b/r/README.md
index c103000f5f6..b568a362c95 100644
--- a/r/README.md
+++ b/r/README.md
@@ -4,31 +4,57 @@
 [![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amaster+event%3Apush)
 [![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow)
 
-[Apache Arrow](https://arrow.apache.org/) is a cross-language
-development platform for in-memory data. It specifies a standardized
+**[Apache Arrow](https://arrow.apache.org/) is a cross-language
+development platform for in-memory data.** It specifies a standardized
 language-independent columnar memory format for flat and hierarchical
 data, organized for efficient analytic operations on modern hardware. It
 also provides computational libraries and zero-copy streaming messaging
 and interprocess communication.
 
-The `arrow` package exposes an interface to the Arrow C++ library to
-access many of its features in R. This includes support for analyzing
-large, multi-file datasets (`open_dataset()`), working with individual
-Parquet (`read_parquet()`, `write_parquet()`) and Feather
-(`read_feather()`, `write_feather()`) files, as well as lower-level
-access to Arrow memory and messages.
+**The `arrow` package exposes an interface to the Arrow C++ library,
+enabling access to many of its features in R.** It provides low-level
+access to the Arrow C++ library API and higher-level access through a
+`dplyr` backend and familiar R functions.
+
+## What can the `arrow` package do?
+
+-   Read and write **Parquet files** (`read_parquet()`,
+    `write_parquet()`), an efficient and widely used columnar format
+-   Read and write **Feather files** (`read_feather()`,
+    `write_feather()`), a format optimized for speed and
+    interoperability
+-   Analyze, process, and write **multi-file, larger-than-memory
+    datasets** (`open_dataset()`, `write_dataset()`)
+-   Read **large CSV and JSON files** with excellent **speed and
+    efficiency** (`read_csv_arrow()`, `read_json_arrow()`)
+-   Manipulate and analyze Arrow data with **`dplyr` verbs**
+-   Read and write files in **Amazon S3** buckets with no additional
+    function calls
+-   Exercise **fine control over column types** for seamless
+    interoperability with databases and data warehouse systems
+-   Use **compression codecs** including Snappy, gzip, Brotli,
+    Zstandard, LZ4, LZO, and bzip2 for reading and writing data
+-   Enable **zero-copy data sharing** between **R and Python**
+-   Connect to **Arrow Flight** RPC servers to send and receive large
+    datasets over networks
+-   Access and manipulate Arrow objects through **low-level bindings**
+    to the C++ library
+-   Provide a **toolkit for building connectors** to other applications
+    and services that use Arrow
 
 ## Installation
 
+### Installing the latest release version
+
 Install the latest release of `arrow` from CRAN with
 
-```r
+``` r
 install.packages("arrow")
 ```
 
 Conda users can install `arrow` from conda-forge with
 
-```
+``` shell
 conda install -c conda-forge --strict-channel-priority r-arrow
 ```
 
@@ -36,218 +62,245 @@ Installing a released version of the `arrow` package requires no
 additional system dependencies. For macOS and Windows, CRAN hosts binary
 packages that contain the Arrow C++ library. On Linux, source package
 installation will also build necessary C++ dependencies. For a faster,
-more complete installation, set the environment variable `NOT_CRAN=true`.
-See `vignette("install", package = "arrow")` for details.
+more complete installation, set the environment variable
+`NOT_CRAN=true`. See `vignette("install", package = "arrow")` for
+details.
 
-## Installing a development version
+### Installing a development version
 
-Development versions of the package (binary and source) are built daily and hosted at
-<https://arrow-r-nightly.s3.amazonaws.com>. To install from there:
+Development versions of the package (binary and source) are built
+nightly and hosted at <https://arrow-r-nightly.s3.amazonaws.com>. To
+install from there:
 
 ``` r
 install.packages("arrow", repos = "https://arrow-r-nightly.s3.amazonaws.com")
 ```
 
-Or
-
-```r
-arrow::install_arrow(nightly = TRUE)
-```
-
-Conda users can install `arrow` nightlies from our nightlies channel using:
+Conda users can install `arrow` nightly builds with
 
-```
+``` shell
 conda install -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow
 ```
 
-These daily package builds are not official Apache releases and are not
-recommended for production use. They may be useful for testing bug fixes
-and new features under active development.
+If you already have a version of `arrow` installed, you can switch to
+the latest nightly development version with
 
-## Developing
-
-Windows and macOS users who wish to contribute to the R package and
-don’t need to alter the Arrow C++ library may be able to obtain a
-recent version of the library without building from source. On macOS,
-you may install the C++ library using [Homebrew](https://brew.sh/):
-
-``` shell
-# For the released version:
-brew install apache-arrow
-# Or for a development version, you can try:
-brew install apache-arrow --HEAD
+``` r
+arrow::install_arrow(nightly = TRUE)
 ```
 
-On Windows, you can download a .zip file with the arrow dependencies from the
-[nightly repository](https://arrow-r-nightly.s3.amazonaws.com/libarrow/bin/windows/),
-and then set the `RWINLIB_LOCAL` environment variable to point to that
-zip file before installing the `arrow` R package. Version numbers in that
-repository correspond to dates, and you will likely want the most recent.
+These nightly package builds are not official Apache releases and are
+not recommended for production use. They may be useful for testing bug
+fixes and new features under active development.
 
-If you need to alter both the Arrow C++ library and the R package code,
-or if you can’t get a binary version of the latest C++ library
-elsewhere, you’ll need to build it from source too.
+## Usage
 
-First, install the C++ library. See the [developer
-guide](https://arrow.apache.org/docs/developers/cpp/building.html) for details.
-It's recommended to make a `build` directory inside of the `cpp` directory of
-the Arrow git repository (it is git-ignored). Assuming you are inside `cpp/build`,
-you'll first call `cmake` to configure the build and then `make install`.
-For the R package, you'll need to enable several features in the C++ library
-using `-D` flags:
+Among the many applications of the `arrow` package, two of the most accessible are:
 
-```
-cmake \
-  -DARROW_COMPUTE=ON \
-  -DARROW_CSV=ON \
-  -DARROW_DATASET=ON \
-  -DARROW_FILESYSTEM=ON \
-  -DARROW_JEMALLOC=ON \
-  -DARROW_JSON=ON \
-  -DARROW_PARQUET=ON \
-  -DCMAKE_BUILD_TYPE=release \
-  -DARROW_INSTALL_NAME_RPATH=OFF \
-  ..
-```
-
-where `..` is the path to the `cpp/` directory when you're in `cpp/build`.
+-   High-performance reading and writing of data files with multiple
+    file formats and compression codecs, including built-in support for
+    cloud storage
+-   Analyzing and manipulating bigger-than-memory data with `dplyr`
+    verbs
 
-To enable optional features including S3 support, an alternative memory allocator, and additional compression libraries, add some or all of these flags:
+The sections below describe these two uses and illustrate them with
+basic examples. The sections below mention two Arrow data structures:
 
-```
-  -DARROW_S3=ON \
-  -DARROW_MIMALLOC=ON \
-  -DARROW_WITH_BROTLI=ON \
-  -DARROW_WITH_BZ2=ON \
-  -DARROW_WITH_LZ4=ON \
-  -DARROW_WITH_SNAPPY=ON \
-  -DARROW_WITH_ZLIB=ON \
-  -DARROW_WITH_ZSTD=ON \
-```
+-   `Table`: a tabular, column-oriented data structure capable of
+    storing and processing large amounts of data more efficiently than
+    R’s built-in `data.frame` and with SQL-like column data types that
+    afford better interoperability with databases and data warehouse
+    systems
+-   `Dataset`: a data structure functionally similar to `Table` but with
+    the capability to work on larger-than-memory data partitioned across
+    multiple files
 
-Other flags that may be useful:
+### Reading and writing data files with `arrow`
 
-* `-DARROW_EXTRA_ERROR_CONTEXT=ON` makes errors coming from the C++ library point to files and line numbers
-* `-DBOOST_SOURCE=BUNDLED`, for example, or any other dependency `*_SOURCE`, if you have a system version of a C++ dependency that doesn't work correctly with Arrow. This tells the build to compile its own version of the dependency from source.
+The `arrow` package provides functions for reading single data files in
+several common formats. By default, calling any of these functions
+returns an R `data.frame`. To return an Arrow `Table`, set argument
+`as_data_frame = FALSE`.
 
-Note that after any change to the C++ library, you must reinstall it and
-run `make clean` or `git clean -fdx .` to remove any cached object code
-in the `r/src/` directory before reinstalling the R package. This is
-only necessary if you make changes to the C++ library source; you do not
-need to manually purge object files if you are only editing R or C++
-code inside `r/`.
+-   `read_parquet()`: read a file in Parquet format
+-   `read_feather()`: read a file in Feather format (the Apache Arrow
+    IPC format)
+-   `read_delim_arrow()`: read a delimited text file (default delimiter
+    is comma)
+-   `read_csv_arrow()`: read a comma-separated values (CSV) file
+-   `read_tsv_arrow()`: read a tab-separated values (TSV) file
+-   `read_json_arrow()`: read a JSON data file
 
-Once you’ve built the C++ library, you can install the R package and its
-dependencies, along with additional dev dependencies, from the git
-checkout:
+For writing data to single files, the `arrow` package provides the
+functions `write_parquet()` and `write_feather()`. These can be used
+with R `data.frame` and Arrow `Table` objects.
 
-``` shell
-cd ../../r
+For example, let’s write the Star Wars characters data that’s included
+in `dplyr` to a Parquet file, then read it back in. Parquet is a popular
+choice for storing analytic data; it is optimized for reduced file sizes
+and fast read performance, especially for column-based access patterns.
+Parquet is widely supported by many tools and platforms.
 
-Rscript -e '
-options(repos = "https://cloud.r-project.org/")
-if (!require("remotes")) install.packages("remotes")
-remotes::install_deps(dependencies = TRUE)
-'
+First load the `arrow` and `dplyr` packages:
 
-R CMD INSTALL .
+``` r
+library(arrow, warn.conflicts = FALSE)
+library(dplyr, warn.conflicts = FALSE)
 ```
 
-If you need to set any compilation flags while building the C++
-extensions, you can use the `ARROW_R_CXXFLAGS` environment variable. For
-example, if you are using `perf` to profile the R extensions, you may
-need to set
+Then write the `data.frame` named `starwars` to a Parquet file at
+`file_path`:
 
-``` shell
-export ARROW_R_CXXFLAGS=-fno-omit-frame-pointer
+``` r
+file_path <- tempfile()
+write_parquet(starwars, file_path)
 ```
 
-If the package fails to install/load with an error like this:
-
-    ** testing if installed package can be loaded from temporary location
-    Error: package or namespace load failed for 'arrow' in dyn.load(file, DLLpath = DLLpath, ...):
-    unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
-    dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
+Then read the Parquet file into an R `data.frame` named `sw`:
 
-ensure that `-DARROW_INSTALL_NAME_RPATH=OFF` was passed (this is important on
-macOS to prevent problems at link time and is a no-op on other platforms).
-Alternativelly, try setting the environment variable `R_LD_LIBRARY_PATH` to
-wherever Arrow C++ was put in `make install`, e.g. `export
-R_LD_LIBRARY_PATH=/usr/local/lib`, and retry installing the R package.
-
-When installing from source, if the R and C++ library versions do not
-match, installation may fail. If you’ve previously installed the
-libraries and want to upgrade the R package, you’ll need to update the
-Arrow C++ library first.
-
-For any other build/configuration challenges, see the [C++ developer
-guide](https://arrow.apache.org/docs/developers/cpp/building.html) and
-`vignette("install", package = "arrow")`.
-
-### Editing C++ code
+``` r
+sw <- read_parquet(file_path)
+```
 
-The `arrow` package uses some customized tools on top of `cpp11` to
-prepare its C++ code in `src/`. If you change C++ code in the R package,
-you will need to set the `ARROW_R_DEV` environment variable to `TRUE`
-(optionally, add it to your`~/.Renviron` file to persist across
-sessions) so that the `data-raw/codegen.R` file is used for code
-generation.
+R object attributes are preserved when writing data to Parquet or
+Feather files and when reading those files back into R. This enables
+round-trip writing and reading of `sf::sf` objects, R `data.frame`s with
+with `haven::labelled` columns, and `data.frame`s with other custom
+attributes.
 
-We use Google C++ style in our C++ code. Check for style errors with
+For reading and writing larger files or sets of multiple files, `arrow`
+defines `Dataset` objects and provides the functions `open_dataset()`
+and `write_dataset()`, which enable analysis and processing of
+bigger-than-memory data, including the ability to partition data into
+smaller chunks without loading the full data into memory. For examples
+of these functions, see `vignette("dataset", package = "arrow")`.
 
-    ./lint.sh
+All these functions can read and write files in the local filesystem or
+in Amazon S3 (by passing S3 URIs beginning with `s3://`). For more
+details, see `vignette("fs", package = "arrow")`
 
-Fix any style issues before committing with
+### Using `dplyr` with `arrow`
 
-    ./lint.sh --fix
+The `arrow` package provides a `dplyr` backend enabling manipulation of
+Arrow tabular data with `dplyr` verbs. To use it, first load both
+packages `arrow` and `dplyr`. Then load data into an Arrow `Table` or
+`Dataset` object. For example, read the Parquet file written in the
+previous example into an Arrow `Table` named `sw`:
 
-The lint script requires Python 3 and `clang-format-8`. If the command
-isn’t found, you can explicitly provide the path to it like
-`CLANG_FORMAT=$(which clang-format-8) ./lint.sh`. On macOS, you can get
-this by installing LLVM via Homebrew and running the script as
-`CLANG_FORMAT=$(brew --prefix llvm@8)/bin/clang-format ./lint.sh`
+``` r
+sw <- read_parquet(file_path, as_data_frame = FALSE)
+```
 
-### Running tests
+Next, pipe on `dplyr` verbs:
 
-Some tests are conditionally enabled based on the availability of certain
-features in the package build (S3 support, compression libraries, etc.).
-Others are generally skipped by default but can be enabled with environment
-variables or other settings:
+``` r
+result <- sw %>%
+  filter(homeworld == "Tatooine") %>%
+  rename(height_cm = height, mass_kg = mass) %>%
+  mutate(height_in = height_cm / 2.54, mass_lbs = mass_kg * 2.2046) %>%
+  arrange(desc(birth_year)) %>%
+  select(name, height_in, mass_lbs)
+```
 
-* All tests are skipped on Linux if the package builds without the C++ libarrow.
-  To make the build fail if libarrow is not available (as in, to test that
-  the C++ build was successful), set `TEST_R_WITH_ARROW=TRUE`
-* Some tests are disabled unless `ARROW_R_DEV=TRUE`
-* Tests that require allocating >2GB of memory to test Large types are disabled
-  unless `ARROW_LARGE_MEMORY_TESTS=TRUE`
-* Integration tests against a real S3 bucket are disabled unless credentials
-  are set in `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`; these are available
-  on request
-* S3 tests using [MinIO](https://min.io/) locally are enabled if the
-  `minio server` process is found running. If you're running MinIO with custom
-  settings, you can set `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, and
-  `MINIO_PORT` to override the defaults.
+The `arrow` package uses lazy evaluation to delay computation until the
+result is required. This speeds up processing by enabling the Arrow C++
+library to perform multiple computations in one operation. `result` is
+an object with class `arrow_dplyr_query` which represents all the
+computations to be performed:
 
-### Useful functions
+``` r
+result
+#> Table (query)
+#> name: string
+#> height_in: expr
+#> mass_lbs: expr
+#>
+#> * Filter: equal(homeworld, "Tatooine")
+#> * Sorted by birth_year [desc]
+#> See $.data for the source Arrow object
+```
 
-Within an R session, these can help with package development:
+To perform these computations and materialize the result, call
+`compute()` or `collect()`. `compute()` returns an Arrow `Table`,
+suitable for passing to other `arrow` or `dplyr` functions:
 
 ``` r
-devtools::load_all() # Load the dev package
-devtools::test(filter="^regexp$") # Run the test suite, optionally filtering file names
-devtools::document() # Update roxygen documentation
-pkgdown::build_site() # To preview the documentation website
-devtools::check() # All package checks; see also below
-covr::package_coverage() # See test coverage statistics
+result %>% compute()
+#> Table
+#> 10 rows x 3 columns
+#> $name <string>
+#> $height_in <double>
+#> $mass_lbs <double>
 ```
 
-Any of those can be run from the command line by wrapping them in `R -e
-'$COMMAND'`. There’s also a `Makefile` to help with some common tasks
-from the command line (`make test`, `make doc`, `make clean`, etc.)
-
-### Full package validation
+`collect()` returns an R `data.frame`, suitable for viewing or passing
+to other R functions for analysis or visualization:
 
-``` shell
-R CMD build .
-R CMD check arrow_*.tar.gz --as-cran
+``` r
+result %>% collect()
+#> # A tibble: 10 x 3
+#>    name               height_in mass_lbs
+#>    <chr>                  <dbl>    <dbl>
+#>  1 C-3PO                   65.7    165.
+#>  2 Cliegg Lars             72.0     NA  
+#>  3 Shmi Skywalker          64.2     NA  
+#>  4 Owen Lars               70.1    265.
+#>  5 Beru Whitesun lars      65.0    165.
+#>  6 Darth Vader             79.5    300.
+#>  7 Anakin Skywalker        74.0    185.
+#>  8 Biggs Darklighter       72.0    185.
+#>  9 Luke Skywalker          67.7    170.
+#> 10 R5-D4                   38.2     70.5
 ```
+
+The `arrow` package works with most single-table `dplyr` verbs except those that
+compute aggregates, such as `summarise()` and `mutate()` after
+`group_by()`. Inside `dplyr` verbs, Arrow offers support for many
+functions and operators, with common functions mapped to their base R and
+tidyverse equivalents. The
+[changelog](https://arrow.apache.org/docs/r/news/index.html) lists many of them.
+If there are additional functions you would
+like to see implemented, please file an issue as described in the
+[Getting help](#getting-help) section below.
+
+For `dplyr` queries on `Table` objects, if the `arrow` package detects
+an unimplemented function within a `dplyr` verb, it automatically calls
+`collect()` to return the data as an R `data.frame` before processing
+that `dplyr` verb. For queries on `Dataset` objects (which can be larger
+than memory), it raises an error if the function is unimplemented;
+you need to explicitly tell it to `collect()`.
+
+### Additional features
+
+Other applications of `arrow` are described in the following vignettes:
+
+-   `vignette("python", package = "arrow")`: use `arrow` and
+    `reticulate` to pass data between R and Python
+-   `vignette("flight", package = "arrow")`: connect to Arrow Flight RPC
+    servers to send and receive data
+-   `vignette("arrow", package = "arrow")`: access and manipulate Arrow
+    objects through low-level bindings to the C++ library
+
+## Getting help
+
+If you encounter a bug, please file an issue with a minimal reproducible
+example on the [Apache Jira issue
+tracker](https://issues.apache.org/jira/projects/ARROW/issues). Create
+an account or log in, then click **Create** to file an issue. Select the
+project **Apache Arrow (ARROW)**, select the component **R**, and begin
+the issue summary with **`[R]`** followed by a space. For more
+information, see the **Report bugs and propose features** section of the
+[Contributing to Apache
+Arrow](https://arrow.apache.org/docs/developers/contributing.html) page
+in the Arrow developer documentation.
+
+We welcome questions, discussion, and contributions from users of the
+`arrow` package. For information about mailing lists and other venues
+for engaging with the Arrow developer and user communities, please see
+the [Apache Arrow Community](https://arrow.apache.org/community/) page.
+
+------------------------------------------------------------------------
+
+All participation in the Apache Arrow project is governed by the Apache
+Software Foundation’s [code of
+conduct](https://www.apache.org/foundation/policies/conduct.html).
diff --git a/r/STYLE.md b/r/STYLE.md
new file mode 100644
index 00000000000..760084936a4
--- /dev/null
+++ b/r/STYLE.md
@@ -0,0 +1,38 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Style
+
+This is a style guide to writing documentation for arrow.
+
+## Coding style
+
+Please use the [tidyverse coding style](https://style.tidyverse.org/).
+
+## Referring to external packages
+
+When referring to external packages, include a link to the package at the first mention, and subsequently refer to it in plain text, e.g.
+
+* "The arrow R package provides a [dplyr](https://dplyr.tidyverse.org/) interface to Arrow Datasets.  This vignette introduces Datasets and shows how to use dplyr to analyze them."
+
+## Data frames
+
+When referring to the concept, use the phrase "data frame", whereas when referring to an object of that class or when the class is important, write `data.frame`, e.g.
+
+* "You can call `write_dataset()` on tabular data objects such as Arrow Tables or RecordBatches, or R data frames. If working with data frames you might want to use a `tibble` instead of a `data.frame` to take advantage of the default behaviour of partitioning data based on grouped variables."
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index ab20769c3e6..90d900ddf28 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -57,6 +57,21 @@ navbar:
           href: https://arrow.apache.org/docs/python
         - text: R
           href: index.html
+    articles:
+      text: Articles
+      menu:
+        - text: Installing the Arrow Package on Linux
+          href: articles/install.html
+        - text: Working with Arrow Datasets and dplyr
+          href: articles/dataset.html
+        - text: Working with Cloud Storage (S3)
+          href: articles/fs.html
+        - text: Apache Arrow in Python and R with reticulate
+          href: articles/python.html
+        - text: Connecting to Flight RPC Servers
+          href: articles/flight.html
+        - text: Arrow R Developer Guide
+          href: articles/developing.html
 reference:
   - title: Multi-file datasets
     contents:
@@ -83,6 +98,7 @@ reference:
       - write_ipc_stream
       - write_to_raw
       - write_parquet
+      - write_csv_arrow
   - title: C++ reader/writer interface
     contents:
       - ParquetFileReader
@@ -94,6 +110,7 @@ reference:
       - RecordBatchReader
       - RecordBatchWriter
       - CsvReadOptions
+      - CsvWriteOptions
   - title: Arrow data containers
     contents:
       - array
@@ -147,10 +164,14 @@ reference:
       - match_arrow
       - value_counts
       - list_compute_functions
+  - title: Connections to other systems
+    contents:
+      - to_duckdb
   - title: Configuration
     contents:
       - arrow_info
       - cpu_count
+      - io_thread_count
       - arrow_available
       - install_arrow
       - install_pyarrow
diff --git a/r/configure b/r/configure
index 3473fd5cdc0..760c60e6a5c 100755
--- a/r/configure
+++ b/r/configure
@@ -38,6 +38,9 @@ ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
 FORCE_AUTOBREW=`echo $FORCE_AUTOBREW | tr '[:upper:]' '[:lower:]'`
 FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
 ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
+LIBARROW_MINIMAL=`echo $LIBARROW_MINIMAL | tr '[:upper:]' '[:lower:]'`
+LIBARROW_DOWNLOAD=`echo $LIBARROW_DOWNLOAD | tr '[:upper:]' '[:lower:]'`
+NOT_CRAN=`echo $NOT_CRAN | tr '[:upper:]' '[:lower:]'`
 
 VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
 UNAME=`uname -s`
@@ -66,22 +69,27 @@ if [ "$FORCE_AUTOBREW" = "true" ] || [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
 fi
 
 # Note that cflags may be empty in case of success
-if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then
-  echo "*** Using INCLUDE_DIR/LIB_DIR"
+if [ "$ARROW_HOME" ] && [ "$FORCE_BUNDLED_BUILD" != "true" ]; then
+  echo "*** Using ARROW_HOME as the source of libarrow"
+  PKG_CFLAGS="-I$ARROW_HOME/include $PKG_CFLAGS"
+  PKG_DIRS="-L$ARROW_HOME/lib"
+elif [ "$INCLUDE_DIR" ] && [ "$LIB_DIR" ]; then
+  echo "*** Using INCLUDE_DIR/LIB_DIR as the source of libarrow"
   PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS"
   PKG_DIRS="-L$LIB_DIR"
 else
   # Use pkg-config if available and allowed
   pkg-config --version >/dev/null 2>&1
-  if [ "$ARROW_USE_PKG_CONFIG" != "false" ] && [ $? -eq 0 ]; then
+  if [ $? -eq 0 ] && [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then
     PKGCONFIG_CFLAGS=`pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME}`
     PKGCONFIG_LIBS=`pkg-config --libs-only-l --silence-errors ${PKG_CONFIG_NAME}`
     PKGCONFIG_DIRS=`pkg-config --libs-only-L --silence-errors ${PKG_CONFIG_NAME}`
     # TODO: what about --libs-only-other?
   fi
 
-  if [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then
-    echo "*** Arrow C++ libraries found via pkg-config"
+  if [ "$PKGCONFIG_CFLAGS" ] && [ "$PKGCONFIG_LIBS" ]; then
+    FOUND_LIB_DIR=`echo $PKG_DIRS | sed -e 's/^-L//'`
+    echo "*** Arrow C++ libraries found via pkg-config at $FOUND_LIB_DIR"
     PKG_CFLAGS="$PKGCONFIG_CFLAGS"
     PKG_LIBS=${PKGCONFIG_LIBS}
     PKG_DIRS=${PKGCONFIG_DIRS}
@@ -144,7 +152,7 @@ else
       if [ "$UNAME" = "Darwin" ] && [ "${OPENSSL_ROOT_DIR}" = "" ]; then
         brew --prefix openssl >/dev/null 2>&1
         if [ $? -eq 0 ]; then
-          export OPENSSL_ROOT_DIR="$(brew --prefix openssl)"
+          OPENSSL_ROOT_DIR="`brew --prefix openssl`"; export OPENSSL_ROOT_DIR
         fi
       fi
 
@@ -178,6 +186,12 @@ else
   fi
 fi
 
+# If on Raspberry Pi, need to manually link against latomic
+# See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81358 for similar example
+if grep raspbian /etc/os-release >/dev/null 2>&1; then
+  PKG_CFLAGS="$PKG_CFLAGS -DARROW_CXXFLAGS=-latomic"
+fi
+
 # If libarrow uses the old GLIBCXX ABI, so we have to use it too
 if [ "$ARROW_USE_OLD_CXXABI" ]; then
   PKG_CFLAGS="$PKG_CFLAGS -D_GLIBCXX_USE_CXX11_ABI=0"
@@ -237,7 +251,7 @@ if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then
     fi
   fi
   # prepend PKG_DIRS and append BUNDLED_LIBS to PKG_LIBS
-  PKG_LIBS="$PKG_DIRS $PKG_LIBS $BUNDLED_LIBS"
+  PKG_LIBS="$PKG_DIRS $PKG_LIBS $BUNDLED_LIBS -fno-lto"
   echo "PKG_CFLAGS=$PKG_CFLAGS"
   echo "PKG_LIBS=$PKG_LIBS"
 else
diff --git a/r/configure.win b/r/configure.win
index d645834fac8..f31bf8f9cf4 100644
--- a/r/configure.win
+++ b/r/configure.win
@@ -50,7 +50,7 @@ AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-man
 # NOTE: If you make changes to the libraries below, you should also change
 # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD
 PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET"
-PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH) '"-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 ${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
+PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH)$(CRT) '"-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 ${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
 
 # S3 and re2 support only for Rtools40 (i.e. R >= 4.0)
 "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e 'R.version$major >= 4' | grep TRUE >/dev/null 2>&1
diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R
index ad4514a3124..bb0e92eb640 100644
--- a/r/data-raw/codegen.R
+++ b/r/data-raw/codegen.R
@@ -67,13 +67,13 @@ get_exported_functions <- function(decorations, export_tag) {
 
 glue_collapse_data <- function(data, ..., sep = ", ", last = "") {
   res <- glue_collapse(glue_data(data, ...), sep = sep, last = last)
-  if(length(res) == 0) res <- ""
+  if (length(res) == 0) res <- ""
   res
 }
 
 wrap_call <- function(name, return_type, args) {
   call <- glue::glue('{name}({list_params})', list_params = glue_collapse_data(args, "{name}"))
-  if(return_type == "void") {
+  if (return_type == "void") {
     glue::glue("\t{call};\n\treturn R_NilValue;", .trim = FALSE)
   } else {
     glue::glue("\treturn cpp11::as_sexp({call});")
@@ -149,7 +149,7 @@ cpp_functions_definitions <- arrow_exports %>%
       sep = "\n",
       real_params = glue_collapse_data(args, "{type} {name}"),
       input_params = glue_collapse_data(args, "\tarrow::r::Input<{type}>::type {name}({name}_sexp);", sep = "\n"),
-      return_line = if(nrow(args)) "\n" else "")
+      return_line = if (nrow(args)) "\n" else "")
 
     glue::glue('
     // {basename(file)}
@@ -162,7 +162,7 @@ cpp_functions_definitions <- arrow_exports %>%
 
 cpp_functions_registration <- arrow_exports %>%
   select(name, return_type, args) %>%
-  pmap_chr(function(name, return_type, args){
+  pmap_chr(function(name, return_type, args) {
     glue('\t\t{{ "_arrow_{name}", (DL_FUNC) &_arrow_{name}, {nrow(args)}}}, ')
   }) %>%
   glue_collapse(sep = "\n")
@@ -214,6 +214,11 @@ glue::glue('\n
 'extern "C" void R_init_arrow(DllInfo* dll){
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);
+
+  #if defined(ARROW_R_WITH_ARROW) && defined(HAS_ALTREP)
+  arrow::r::Init_Altrep_classes(dll);
+  #endif
+
 }
 \n')
 
@@ -233,8 +238,8 @@ r_functions <- arrow_exports %>%
     }
 
     glue::glue('
-    {name} <- function({list_params}){{
-        {call}
+    {name} <- function({list_params}) {{
+      {call}
     }}
 
     ',
@@ -248,7 +253,6 @@ arrow_exports_r <- glue::glue('
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
 {r_functions}
-
-\n')
+')
 
 write_if_modified(arrow_exports_r, "R/arrowExports.R")
diff --git a/r/extra-tests/helpers.R b/r/extra-tests/helpers.R
index af57d45e5d2..3fb450ee332 100644
--- a/r/extra-tests/helpers.R
+++ b/r/extra-tests/helpers.R
@@ -24,13 +24,13 @@ if_version_less_than <- function(version) {
 }
 
 skip_if_version_less_than <- function(version, msg) {
-  if(if_version(version, `<`)) {
+  if (if_version(version, `<`)) {
     skip(msg)
   }
 }
 
 skip_if_version_equals <- function(version, msg) {
-  if(if_version(version, `==`)) {
+  if (if_version(version, `==`)) {
     skip(msg)
   }
 }
diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R
index 10e9f957920..a2453e2516e 100644
--- a/r/extra-tests/test-read-files.R
+++ b/r/extra-tests/test-read-files.R
@@ -35,7 +35,9 @@ test_that("Can see the metadata (parquet)", {
   df <- read_parquet(pq_file)
   expect_s3_class(df, "tbl")
 
-  expect_equal(
+  # expect_mapequal() instead of expect_equal() because there was an order change where
+  # `class` is located in version 3.0.0 and above.
+  expect_mapequal(
     attributes(df),
     list(
       names = letters[1:4],
@@ -78,7 +80,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) {
     df <- read_feather(feather_file)
     expect_s3_class(df, "tbl")
 
-    expect_equal(
+    expect_mapequal(
       attributes(df),
       list(
         names = letters[1:4],
@@ -137,7 +139,7 @@ test_that("Can see the metadata (stream)", {
 
   expect_s3_class(df, "tbl")
 
-  expect_equal(
+  expect_mapequal(
     attributes(df),
     list(
       names = letters[1:4],
diff --git a/r/extra-tests/write-files.R b/r/extra-tests/write-files.R
index 75889b61407..4495507f3b3 100644
--- a/r/extra-tests/write-files.R
+++ b/r/extra-tests/write-files.R
@@ -26,7 +26,7 @@ source("tests/testthat/helper-data.R")
 write_parquet(example_with_metadata, "extra-tests/files/ex_data.parquet")
 
 for (comp in c("lz4", "uncompressed", "zstd")) {
-  if(!codec_is_available(comp)) break
+  if (!codec_is_available(comp)) break
 
   name <- paste0("extra-tests/files/ex_data_", comp, ".feather")
   write_feather(example_with_metadata, name, compression = comp)
@@ -39,4 +39,3 @@ write_feather(example_with_metadata_v1, "extra-tests/files/ex_data_v1.feather",
 write_ipc_stream(example_with_metadata, "extra-tests/files/ex_data.stream")
 
 write_parquet(example_with_extra_metadata, "extra-tests/files/ex_data_extra_metadata.parquet")
-
diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index e9c82a10e40..fdd99ca52ef 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -36,6 +36,9 @@ set -x
 SOURCE_DIR="$(cd "${SOURCE_DIR}" && pwd)"
 DEST_DIR="$(mkdir -p "${DEST_DIR}" && cd "${DEST_DIR}" && pwd)"
 
+# Make some env vars case-insensitive
+LIBARROW_MINIMAL=`echo $LIBARROW_MINIMAL | tr '[:upper:]' '[:lower:]'`
+
 if [ "$LIBARROW_MINIMAL" = "false" ]; then
   ARROW_DEFAULT_PARAM="ON"
 else
@@ -53,8 +56,8 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_DATASET=${ARROW_DATASET:-ON} \
     -DARROW_DEPENDENCY_SOURCE=BUNDLED \
     -DARROW_FILESYSTEM=ON \
-    -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
-    -DARROW_MIMALLOC=${ARROW_MIMALLOC:-$ARROW_DEFAULT_PARAM} \
+    -DARROW_JEMALLOC=${ARROW_JEMALLOC:-$ARROW_DEFAULT_PARAM} \
+    -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \
     -DARROW_JSON=ON \
     -DARROW_PARQUET=${ARROW_PARQUET:-ON} \
     -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \
@@ -66,12 +69,12 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
     -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-$ARROW_DEFAULT_PARAM} \
     -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-$ARROW_DEFAULT_PARAM} \
-    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \
     -DCMAKE_INSTALL_LIBDIR=lib \
     -DCMAKE_INSTALL_PREFIX=${DEST_DIR} \
     -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \
     -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \
-    -DCMAKE_UNITY_BUILD=ON \
+    -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-ON} \
     ${EXTRA_CMAKE_FLAGS} \
     -G ${CMAKE_GENERATOR:-"Unix Makefiles"} \
     ${SOURCE_DIR}
diff --git a/r/lint.sh b/r/lint.sh
index 58c26d9f731..243444e0d1e 100755
--- a/r/lint.sh
+++ b/r/lint.sh
@@ -39,3 +39,7 @@ $CPP_BUILD_SUPPORT/run_cpplint.py \
     --cpplint_binary=$CPPLINT \
     --exclude_glob=$CPP_BUILD_SUPPORT/lint_exclusions.txt \
     --source_dir=$SOURCE_DIR/src --quiet
+
+# Run lintr
+R -e "if(!requireNamespace('lintr', quietly=TRUE)){stop('lintr is not installed, please install it with R -e \"install.packages(\'lintr\')\"')}"
+NOT_CRAN=true R -e "lintr::lint_package('${SOURCE_DIR}', path_prefix = 'r')"
diff --git a/r/man/ArrayData.Rd b/r/man/ArrayData.Rd
index 24530c42317..383ab317d1e 100644
--- a/r/man/ArrayData.Rd
+++ b/r/man/ArrayData.Rd
@@ -11,11 +11,11 @@ inside an \code{arrow::Array}.
 \section{Usage}{
 \preformatted{data <- Array$create(x)$data()
 
-data$type()
-data$length()
-data$null_count()
-data$offset()
-data$buffers()
+data$type
+data$length
+data$null_count
+data$offset
+data$buffers
 }
 }
 
diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index 90dd2e39e40..3a504f01466 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -42,10 +42,10 @@ logical vector or Arrow boolean-type \verb{(Chunked)Array} \code{i} is \code{TRU
 used to rearrange the \code{ChunkedArray} in ascending or descending order
 \item \verb{$cast(target_type, safe = TRUE, options = cast_options(safe))}: Alter the
 data in the array to change its type.
-\item \verb{$null_count()}: The number of null entries in the array
-\item \verb{$chunks()}: return a list of \code{Array}s
-\item \verb{$num_chunks()}: integer number of chunks in the \code{ChunkedArray}
-\item \verb{$type()}: logical type of data
+\item \verb{$null_count}: The number of null entries in the array
+\item \verb{$chunks}: return a list of \code{Array}s
+\item \verb{$num_chunks}: integer number of chunks in the \code{ChunkedArray}
+\item \verb{$type}: logical type of data
 \item \verb{$View(type)}: Construct a zero-copy view of this \code{ChunkedArray} with the
 given type.
 \item \verb{$Validate()}: Perform any validation checks to determine obvious inconsistencies
@@ -53,6 +53,28 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Pass items into chunked_array as separate objects to create chunks
+class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+class_scores$num_chunks
+
+# When taking a Slice from a chunked_array, chunks are preserved
+class_scores$Slice(2, length = 5)
+
+# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
+# containing all values, ordered.
+class_scores$Take(class_scores$SortIndices(descending = TRUE))
+
+# If you pass a list into chunked_array, you get a list of length 1
+list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
+list_scores$num_chunks
+
+# When constructing a ChunkedArray, the first chunk is used to infer type.
+doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+doubles$type
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \link{Array}
 }
diff --git a/r/man/CsvReadOptions.Rd b/r/man/CsvReadOptions.Rd
index 805330768b3..d088692708b 100644
--- a/r/man/CsvReadOptions.Rd
+++ b/r/man/CsvReadOptions.Rd
@@ -3,6 +3,7 @@
 \docType{class}
 \name{CsvReadOptions}
 \alias{CsvReadOptions}
+\alias{CsvWriteOptions}
 \alias{CsvParseOptions}
 \alias{TimestampParser}
 \alias{CsvConvertOptions}
@@ -89,6 +90,12 @@ starting from the beginning of this vector. Possible values are
 \code{TimestampParser$create()} takes an optional \code{format} string argument.
 See \code{\link[base:strptime]{strptime()}} for example syntax.
 The default is to use an ISO-8601 format parser.
+
+The \code{CsvWriteOptions$create()} factory method takes the following arguments:
+\itemize{
+\item \code{include_header} Whether to write an initial header line with column names
+\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
+}
 }
 
 \section{Active bindings}{
diff --git a/r/man/Field.Rd b/r/man/Field.Rd
index d5f147c595f..77d31fa637a 100644
--- a/r/man/Field.Rd
+++ b/r/man/Field.Rd
@@ -29,7 +29,7 @@ field(name, type, metadata)
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 field("x", int32())
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index 795027e1f24..cabacc93755 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -28,11 +28,8 @@ delimiter for text files
 
 `format = "parquet"``:
 \itemize{
-\item \code{use_buffered_stream}: Read files through buffered input streams rather than
-loading entire row groups at once. This may be enabled
-to reduce memory overhead. Disabled by default.
-\item \code{buffer_size}: Size of buffered stream, if enabled. Default is 8KB.
 \item \code{dict_columns}: Names of columns which should be read as dictionaries.
+\item Any Parquet options from \link{FragmentScanOptions}.
 }
 
 \code{format = "text"}: see \link{CsvParseOptions}. Note that you can specify them either
@@ -54,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time)
 It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat})
 }
 
+\examples{
+\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+## Semi-colon delimited files
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+
+# Create FileFormat object
+format <- FileFormat$create(format = "text", delimiter = ";")
+
+open_dataset(tf, format = format)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/FragmentScanOptions.Rd b/r/man/FragmentScanOptions.Rd
index 8bafbb0b21c..103d0589505 100644
--- a/r/man/FragmentScanOptions.Rd
+++ b/r/man/FragmentScanOptions.Rd
@@ -3,6 +3,7 @@
 \name{FragmentScanOptions}
 \alias{FragmentScanOptions}
 \alias{CsvFragmentScanOptions}
+\alias{ParquetFragmentScanOptions}
 \title{Format-specific scan options}
 \description{
 A \code{FragmentScanOptions} holds options specific to a \code{FileFormat} and a scan
@@ -14,14 +15,24 @@ operation.
 \itemize{
 \item \code{format}: A string identifier of the file format. Currently supported values:
 \itemize{
+\item "parquet"
 \item "csv"/"text", aliases for the same format.
 }
 \item \code{...}: Additional format-specific options
 
+`format = "parquet"``:
+\itemize{
+\item \code{use_buffered_stream}: Read files through buffered input streams rather than
+loading entire row groups at once. This may be enabled
+to reduce memory overhead. Disabled by default.
+\item \code{buffer_size}: Size of buffered stream, if enabled. Default is 8KB.
+\item \code{pre_buffer}: Pre-buffer the raw Parquet data. This can improve performance
+on high-latency filesystems. Disabled by default.
 \code{format = "text"}: see \link{CsvConvertOptions}. Note that options can only be
 specified with the Arrow C++ library naming. Also, "block_size" from
 \link{CsvReadOptions} may be given.
 }
+}
 
 It returns the appropriate subclass of \code{FragmentScanOptions}
 (e.g. \code{CsvFragmentScanOptions}).
diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd
index 9885802011d..30d0725a498 100644
--- a/r/man/ParquetFileReader.Rd
+++ b/r/man/ParquetFileReader.Rd
@@ -28,7 +28,8 @@ takes the following arguments:
 \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
 \item \verb{$ReadRowGroup(i, column_indices)}: get an \code{arrow::Table} by reading the \code{i}th row group (0-based).
 The optional \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
-\item \verb{$ReadRowGroups(row_groups, column_indices)}: get an \code{arrow::Table} by reading several row groups (0-based integers).
+\item \verb{$ReadRowGroups(row_groups, column_indices)}: get an \code{arrow::Table} by reading several row
+groups (0-based integers).
 The optional \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
 \item \verb{$GetSchema()}: get the \code{arrow::Schema} of the data in the file
 \item \verb{$ReadColumn(i)}: read the \code{i}th column (0-based) as a \link{ChunkedArray}.
@@ -45,8 +46,8 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat
 }
 
 \examples{
-\dontrun{
-f <- system.file("v0.7.1.parquet", package="arrow")
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+f <- system.file("v0.7.1.parquet", package = "arrow")
 pq <- ParquetFileReader$create(f)
 pq$GetSchema()
 if (codec_is_available("snappy")) {
@@ -54,5 +55,5 @@ if (codec_is_available("snappy")) {
   tab <- pq$ReadTable()
   tab$schema
 }
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd
index 184fea99c7f..ff08c215853 100644
--- a/r/man/RecordBatch.Rd
+++ b/r/man/RecordBatch.Rd
@@ -80,7 +80,7 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 batch <- record_batch(name = rownames(mtcars), mtcars)
 dim(batch)
 dim(head(batch))
@@ -88,5 +88,5 @@ names(batch)
 batch$mpg
 batch[["cyl"]]
 as.data.frame(batch[4:8, c("gear", "hp", "wt")])
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd
index 6b204b0aae2..90c796a6693 100644
--- a/r/man/RecordBatchReader.Rd
+++ b/r/man/RecordBatchReader.Rd
@@ -10,7 +10,7 @@
 Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
 a "stream" format and a "file" format, known as Feather.
 \code{RecordBatchStreamReader} and \code{RecordBatchFileReader} are
-interfaces for accessing record batches from input sources those formats,
+interfaces for accessing record batches from input sources in those formats,
 respectively.
 
 For guidance on how to use these classes, see the examples section.
@@ -44,7 +44,7 @@ are in the file.
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 
@@ -78,7 +78,7 @@ all.equal(df, chickwts, check.attributes = FALSE)
 # Unlike the Writers, we don't have to close RecordBatchReaders,
 # but we do still need to close the file connection
 read_file_obj$close()
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface
diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd
index 038653b9e24..219c150e6a4 100644
--- a/r/man/RecordBatchWriter.Rd
+++ b/r/man/RecordBatchWriter.Rd
@@ -46,7 +46,7 @@ to be closed separately.
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 
@@ -80,7 +80,7 @@ all.equal(df, chickwts, check.attributes = FALSE)
 # Unlike the Writers, we don't have to close RecordBatchReaders,
 # but we do still need to close the file connection
 read_file_obj$close()
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler
diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd
index 2ef5b02ccbe..21e04c12e08 100644
--- a/r/man/Scalar.Rd
+++ b/r/man/Scalar.Rd
@@ -7,3 +7,32 @@
 \description{
 A \code{Scalar} holds a single value of an Arrow type.
 }
+\section{Methods}{
+
+\verb{$ToString()}: convert to a string
+\verb{$as_vector()}: convert to an R vector
+\verb{$as_array()}: convert to an Arrow \code{Array}
+\verb{$Equals(other)}: is this Scalar equal to \code{other}
+\verb{$ApproxEquals(other)}: is this Scalar approximately equal to \code{other}
+\verb{$is_valid}: is this Scalar valid
+\verb{$null_count}: number of invalid values - 1 or 0
+\verb{$type}: Scalar type
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+Scalar$create(pi)
+Scalar$create(404)
+# If you pass a vector into Scalar$create, you get a list containing your items
+Scalar$create(c(1, 2, 3))
+
+# Comparisons
+my_scalar <- Scalar$create(99)
+my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+
+my_scalar$ToString()
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/Scanner.Rd b/r/man/Scanner.Rd
index 36b6e7f21f7..76946c69b9b 100644
--- a/r/man/Scanner.Rd
+++ b/r/man/Scanner.Rd
@@ -20,6 +20,8 @@ It takes the following arguments:
 \item \code{filter}: A \code{Expression} to filter the scanned rows by, or \code{TRUE} (default)
 to keep all rows.
 \item \code{use_threads}: logical: should scanning use multithreading? Default \code{TRUE}
+\item \code{use_async}: logical: should the async scanner (performs better on
+high-latency/highly parallel filesystems like S3) be used? Default \code{FALSE}
 \item \code{...}: Additional arguments, currently ignored
 }
 }
@@ -34,6 +36,7 @@ by \code{cols}, a character vector of column names
 \item \verb{$UseThreads(threads)}: logical: should the scan use multithreading?
 The method's default input is \code{TRUE}, but you must call the method to enable
 multithreading because the scanner default is \code{FALSE}.
+\item \verb{$UseAsync(use_async)}: logical: should the async scanner be used?
 \item \verb{$BatchSize(batch_size)}: integer: Maximum row count of scanned record
 batches, default is 32K. If scanned record batches are overflowing memory
 then this method can be called to reduce their size.
diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd
index c2fb2fac681..6e385bb804e 100644
--- a/r/man/Schema.Rd
+++ b/r/man/Schema.Rd
@@ -75,11 +75,11 @@ the metadata is dropped.
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
 tab1 <- Table$create(df)
 tab1$schema
 tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32()))
 tab2$schema
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/Table.Rd b/r/man/Table.Rd
index 98a5c354ced..2675943e572 100644
--- a/r/man/Table.Rd
+++ b/r/man/Table.Rd
@@ -80,7 +80,7 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tab <- Table$create(name = rownames(mtcars), mtcars)
 dim(tab)
 dim(head(tab))
@@ -88,5 +88,5 @@ names(tab)
 tab$mpg
 tab[["cyl"]]
 as.data.frame(tab[4:8, c("gear", "hp", "wt")])
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/array.Rd b/r/man/array.Rd
index f65afe9fbc3..78d3eaff6ea 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -54,9 +54,9 @@ a == a
 \item \verb{$IsNull(i)}: Return true if value at index is null. Does not boundscheck
 \item \verb{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck
 \item \verb{$length()}: Size in the number of elements this array contains
-\item \verb{$offset()}: A relative position into another array's data, to enable zero-copy slicing
-\item \verb{$null_count()}: The number of null entries in the array
-\item \verb{$type()}: logical type of data
+\item \verb{$offset}: A relative position into another array's data, to enable zero-copy slicing
+\item \verb{$null_count}: The number of null entries in the array
+\item \verb{$type}: logical type of data
 \item \verb{$type_id()}: type id
 \item \verb{$Equals(other)} : is this array equal to \code{other}
 \item \verb{$ApproxEquals(other)} :
@@ -82,3 +82,26 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_array <- Array$create(1:10)
+my_array$type
+my_array$cast(int8())
+
+# Check if value is null; zero-indexed
+na_array <- Array$create(c(1:5, NA))
+na_array$IsNull(0)
+na_array$IsNull(5)
+na_array$IsValid(5)
+na_array$null_count
+
+# zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+new_array <- na_array$Slice(5)
+new_array$offset
+
+# Compare 2 arrays
+na_array2 <- na_array
+na_array2 == na_array # element-wise comparison
+na_array2$Equals(na_array) # overall comparison
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index 0c19402a045..94620e9be1c 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -26,6 +26,9 @@ Useful links:
 
 Authors:
 \itemize{
+  \item Ian Cook \email{ianmcook@gmail.com}
+  \item Nic Crane \email{thisisnic@gmail.com}
+  \item Jonathan Keane \email{jkeane@gmail.com}
   \item Romain François \email{romain@rstudio.com} (\href{https://orcid.org/0000-0002-2444-4226}{ORCID})
   \item Jeroen Ooms \email{jeroen@berkeley.edu}
   \item Apache Arrow \email{dev@arrow.apache.org} [copyright holder]
diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd
index 4a479b7d650..a3ca1fc2fcb 100644
--- a/r/man/buffer.Rd
+++ b/r/man/buffer.Rd
@@ -26,10 +26,19 @@ contiguous memory with a particular size.
 \section{Methods}{
 
 \itemize{
-\item \verb{$is_mutable()} :
-\item \verb{$ZeroPadding()} :
-\item \verb{$size()} :
-\item \verb{$capacity()}:
+\item \verb{$is_mutable} : is this buffer mutable?
+\item \verb{$ZeroPadding()} : zero bytes in padding, i.e. bytes between size and capacity
+\item \verb{$size} : size in memory, in bytes
+\item \verb{$capacity}: possible capacity, in bytes
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_buffer <- buffer(c(1, 2, 3, 4))
+my_buffer$is_mutable
+my_buffer$ZeroPadding()
+my_buffer$size
+my_buffer$capacity
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd
index 4ab9fd7e942..bef89f10b18 100644
--- a/r/man/call_function.Rd
+++ b/r/man/call_function.Rd
@@ -31,16 +31,21 @@ Many Arrow compute functions are mapped to R methods,
 and in a \code{dplyr} evaluation context, \link[=list_compute_functions]{all Arrow functions}
 are callable with an \code{arrow_} prefix.
 }
+\details{
+When passing indices in \code{...}, \code{args}, or \code{options}, express them as
+0-based integers (consistent with C++).
+}
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 a <- Array$create(c(1L, 2L, 3L, NA, 5L))
 s <- Scalar$create(4L)
 call_function("fill_null", a, s)
 
 a <- Array$create(rnorm(10000))
 call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
-\href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for the functions and their respective options.
+\href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for
+the functions and their respective options.
 }
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
index 1b5e8278fa9..b3238ff1dca 100644
--- a/r/man/codec_is_available.Rd
+++ b/r/man/codec_is_available.Rd
@@ -18,3 +18,8 @@ Support for compression libraries depends on the build-time settings of
 the Arrow C++ library. This function lets you know which are available for
 use.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+codec_is_available("gzip")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/contains_regex.Rd b/r/man/contains_regex.Rd
new file mode 100644
index 00000000000..f05f11d0279
--- /dev/null
+++ b/r/man/contains_regex.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-functions.R
+\name{contains_regex}
+\alias{contains_regex}
+\title{Does this string contain regex metacharacters?}
+\usage{
+contains_regex(string)
+}
+\arguments{
+\item{string}{String to be tested}
+}
+\value{
+Logical: does \code{string} contain regex metacharacters?
+}
+\description{
+Does this string contain regex metacharacters?
+}
+\keyword{internal}
diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd
index 65edf56cb48..1b83703f19f 100644
--- a/r/man/copy_files.Rd
+++ b/r/man/copy_files.Rd
@@ -24,12 +24,12 @@ Nothing: called for side effects in the file system
 Copy files between FileSystems
 }
 \examples{
-\dontrun{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Copy an S3 bucket's files to a local directory:
 copy_files("s3://your-bucket-name", "local-directory")
 # Using a FileSystem object
 copy_files(s3_bucket("your-bucket-name"), "local-directory")
 # Or go the other way, from local to S3
 copy_files("local-directory", s3_bucket("your-bucket-name"))
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
index f113455a90d..a0631897573 100644
--- a/r/man/data-type.Rd
+++ b/r/man/data-type.Rd
@@ -151,12 +151,12 @@ types, this conversion can be disabled (so that \code{int64} always yields a
 \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}.
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 bool()
 struct(a = int32(), b = double())
 timestamp("ms", timezone = "CEST")
 time64("ns")
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
index b871516def8..57ec3ba115e 100644
--- a/r/man/enums.Rd
+++ b/r/man/enums.Rd
@@ -15,6 +15,7 @@
 \alias{MetadataVersion}
 \alias{QuantileInterpolation}
 \alias{NullEncodingBehavior}
+\alias{NullHandlingBehavior}
 \title{Arrow enums}
 \format{
 An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.
@@ -40,6 +41,8 @@ An object of class \code{MetadataVersion} (inherits from \code{arrow-enum}) of l
 An object of class \code{QuantileInterpolation} (inherits from \code{arrow-enum}) of length 5.
 
 An object of class \code{NullEncodingBehavior} (inherits from \code{arrow-enum}) of length 2.
+
+An object of class \code{NullHandlingBehavior} (inherits from \code{arrow-enum}) of length 3.
 }
 \usage{
 TimeUnit
@@ -65,6 +68,8 @@ MetadataVersion
 QuantileInterpolation
 
 NullEncodingBehavior
+
+NullHandlingBehavior
 }
 \description{
 Arrow enums
diff --git a/r/man/get_stringr_pattern_options.Rd b/r/man/get_stringr_pattern_options.Rd
new file mode 100644
index 00000000000..7107b906024
--- /dev/null
+++ b/r/man/get_stringr_pattern_options.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-functions.R
+\name{get_stringr_pattern_options}
+\alias{get_stringr_pattern_options}
+\title{Get \code{stringr} pattern options}
+\usage{
+get_stringr_pattern_options(pattern)
+}
+\arguments{
+\item{pattern}{Unevaluated expression containing a call to a \code{stringr}
+pattern modifier function}
+}
+\value{
+List containing elements \code{pattern}, \code{fixed}, and \code{ignore_case}
+}
+\description{
+This function assigns definitions for the \code{stringr} pattern modifier
+functions (\code{fixed()}, \code{regex()}, etc.) inside itself, and uses them to
+evaluate the quoted expression \code{pattern}, returning a list that is used
+to control pattern matching behavior in internal \code{arrow} functions.
+}
+\keyword{internal}
diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd
index ab427f746b3..eef9f9157ea 100644
--- a/r/man/hive_partition.Rd
+++ b/r/man/hive_partition.Rd
@@ -4,7 +4,7 @@
 \alias{hive_partition}
 \title{Construct Hive partitioning}
 \usage{
-hive_partition(..., null_fallback = NULL)
+hive_partition(..., null_fallback = NULL, segment_encoding = "uri")
 }
 \arguments{
 \item{...}{named list of \link[=data-type]{data types}, passed to \code{\link[=schema]{schema()}}}
@@ -12,6 +12,9 @@ hive_partition(..., null_fallback = NULL)
 \item{null_fallback}{character to be used in place of missing values (\code{NA} or \code{NULL})
 in partition columns. Default is \code{"__HIVE_DEFAULT_PARTITION__"},
 which is what Hive uses.}
+
+\item{segment_encoding}{Decode partition segments after splitting paths.
+Default is \code{"uri"} (URI-decode segments). May also be \code{"none"} (leave as-is).}
 }
 \value{
 A \link[=Partitioning]{HivePartitioning}, or a \code{HivePartitioningFactory} if
@@ -26,7 +29,7 @@ Because fields are named in the path segments, order of fields passed to
 \code{hive_partition()} does not matter.
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 hive_partition(year = int16(), month = int8())
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/io_thread_count.Rd b/r/man/io_thread_count.Rd
new file mode 100644
index 00000000000..b1dfa0ba780
--- /dev/null
+++ b/r/man/io_thread_count.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/config.R
+\name{io_thread_count}
+\alias{io_thread_count}
+\alias{set_io_thread_count}
+\title{Manage the global I/O thread pool in libarrow}
+\usage{
+io_thread_count()
+
+set_io_thread_count(num_threads)
+}
+\arguments{
+\item{num_threads}{integer: New number of threads for thread pool}
+}
+\description{
+Manage the global I/O thread pool in libarrow
+}
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index ba17688d833..45e0338368e 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -37,3 +37,9 @@ The package includes Arrow methods for many base R functions that can
 be called directly on Arrow objects, as well as some tidyverse-flavored versions
 available inside \code{dplyr} verbs.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+available_funcs <- list_compute_functions()
+utf8_funcs <- list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd
index 7e2000a9ca2..66d30f39147 100644
--- a/r/man/load_flight_server.Rd
+++ b/r/man/load_flight_server.Rd
@@ -15,3 +15,8 @@ to look in the \verb{inst/} directory for included modules.}
 \description{
 Load a Python Flight server
 }
+\examples{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+load_flight_server("demo_flight_server")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd
index 1460ba40926..c2343361c6e 100644
--- a/r/man/match_arrow.Rd
+++ b/r/man/match_arrow.Rd
@@ -10,19 +10,44 @@ match_arrow(x, table, ...)
 is_in(x, table, ...)
 }
 \arguments{
-\item{x}{\code{Array} or \code{ChunkedArray}}
+\item{x}{\code{Scalar}, \code{Array} or \code{ChunkedArray}}
 
-\item{table}{\code{Array}, \code{ChunkedArray}, or R vector lookup table.}
+\item{table}{\code{Scalar}, Array\verb{, }ChunkedArray`, or R vector lookup table.}
 
 \item{...}{additional arguments, ignored}
 }
 \value{
-\code{match_arrow()} returns an \code{int32}-type \code{Array} of the same length
-as \code{x} with the (0-based) indexes into \code{table}. \code{is_in()} returns a
-\code{boolean}-type \code{Array} of the same length as \code{x} with values indicating
+\code{match_arrow()} returns an \code{int32}-type Arrow object of the same length
+and type as \code{x} with the (0-based) indexes into \code{table}. \code{is_in()} returns a
+\code{boolean}-type Arrow object of the same length and type as \code{x} with values indicating
 per element of \code{x} it it is present in \code{table}.
 }
 \description{
 \code{base::match()} is not a generic, so we can't just define Arrow methods for
 it. This function exposes the analogous functions in the Arrow C++ library.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# note that the returned value is 0-indexed
+cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
+match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+
+is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+
+# Although there are multiple matches, you are returned the index of the first
+# match, as with the base R equivalent
+match(4, mtcars$cyl) # 1-indexed
+match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+
+# If `x` contains multiple values, you are returned the indices of the first
+# match for each value.
+match(c(4, 6, 8), mtcars$cyl)
+match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+
+# Return type matches type of `x`
+is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index e3e36178627..59717863932 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -9,6 +9,7 @@ open_dataset(
   schema = NULL,
   partitioning = hive_partition(),
   unify_schemas = NULL,
+  format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
   ...
 )
 }
@@ -58,6 +59,21 @@ be slow) but \code{TRUE} when \code{sources} is a list of \code{Dataset}s (becau
 should be few \code{Dataset}s in the list and their \code{Schema}s are already in
 memory).}
 
+\item{format}{A \link{FileFormat} object, or a string identifier of the format of
+the files in \code{x}. This argument is ignored when \code{sources} is a list of \code{Dataset} objects.
+Currently supported values:
+\itemize{
+\item "parquet"
+\item "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+only version 2 files are supported
+\item "csv"/"text", aliases for the same thing (because comma is the default
+delimiter for text files
+\item "tsv", equivalent to passing \verb{format = "text", delimiter = "\\t"}
+}
+
+Default is "parquet", unless a \code{delimiter} is also specified, in which case
+it is assumed to be "text".}
+
 \item{...}{additional arguments passed to \code{dataset_factory()} when \code{sources}
 is a directory path/URI or vector of file paths/URIs, otherwise ignored.
 These may include \code{format} to indicate the file format, or other
@@ -74,6 +90,57 @@ can accelerate queries that only touch some partitions (files). Call
 \code{open_dataset()} to point to a directory of data files and return a
 \code{Dataset}, then use \code{dplyr} methods to query it.
 }
+\examples{
+\dontshow{if (arrow_with_dataset() & arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+
+data <- dplyr::group_by(mtcars, cyl)
+write_dataset(data, tf)
+
+# You can specify a directory containing the files for your dataset and
+# open_dataset will scan all files in your directory.
+open_dataset(tf)
+
+# You can also supply a vector of paths
+open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf, "cyl=8/part-2.parquet")))
+
+## You must specify the file format if using a format other than parquet.
+tf2 <- tempfile()
+dir.create(tf2)
+on.exit(unlink(tf2))
+write_dataset(data, tf2, format = "ipc")
+# This line will results in errors when you try to work with the data
+\dontrun{
+open_dataset(tf2)
+}
+# This line will work
+open_dataset(tf2, format = "ipc")
+
+## You can specify file partitioning to include it as a field in your dataset
+# Create a temporary directory and write example dataset
+tf3 <- tempfile()
+dir.create(tf3)
+on.exit(unlink(tf3))
+write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+
+# View files - you can see the partitioning means that files have been written
+# to folders based on Month/Day values
+tf3_files <- list.files(tf3, recursive = TRUE)
+
+# With no partitioning specified, dataset contains all files but doesn't include
+# directory names as field names
+open_dataset(tf3)
+
+# Now that partitioning has been specified, your dataset contains columns for Month and Day
+open_dataset(tf3, partitioning = c("Month", "Day"))
+
+# If you want to specify the data types for your fields, you can pass in a Schema
+open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{vignette("dataset", package = "arrow")}
 }
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index f676b9fc75d..30b146a4fee 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -206,13 +206,13 @@ to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
 }
 
 \examples{
-\donttest{
-  tf <- tempfile()
-  on.exit(unlink(tf))
-  write.csv(mtcars, file = tf)
-  df <- read_csv_arrow(tf)
-  dim(df)
-  # Can select columns
-  df <- read_csv_arrow(tf, col_select = starts_with("d"))
-}
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write.csv(mtcars, file = tf)
+df <- read_csv_arrow(tf)
+dim(df)
+# Can select columns
+df <- read_csv_arrow(tf, col_select = starts_with("d"))
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index fe3a7f1e23d..95f4d1d12c6 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -35,7 +35,7 @@ This function reads both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_feather(mtcars, tf)
@@ -43,7 +43,7 @@ df <- read_feather(tf)
 dim(df)
 # Can select columns
 df <- read_feather(tf, col_select = starts_with("d"))
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data.
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 81118914849..53d7107ae81 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -4,7 +4,13 @@
 \alias{read_json_arrow}
 \title{Read a JSON file}
 \usage{
-read_json_arrow(file, col_select = NULL, as_data_frame = TRUE, ...)
+read_json_arrow(
+  file,
+  col_select = NULL,
+  as_data_frame = TRUE,
+  schema = NULL,
+  ...
+)
 }
 \arguments{
 \item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
@@ -22,6 +28,8 @@ of columns, as used in \code{dplyr::select()}.}
 \item{as_data_frame}{Should the function return a \code{data.frame} (default) or
 an Arrow \link{Table}?}
 
+\item{schema}{\link{Schema} that describes the table.}
+
 \item{...}{Additional options passed to \code{JsonTableReader$create()}}
 }
 \value{
@@ -31,14 +39,14 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
 Using \link{JsonTableReader}
 }
 \examples{
-\donttest{
-  tf <- tempfile()
-  on.exit(unlink(tf))
-  writeLines('
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+writeLines('
     { "hello": 3.5, "world": false, "yo": "thing" }
     { "hello": 3.25, "world": null }
     { "hello": 0.0, "world": true, "yo": null }
-  ', tf, useBytes=TRUE)
-  df <- read_json_arrow(tf)
-}
+  ', tf, useBytes = TRUE)
+df <- read_json_arrow(tf)
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index 5d6e2e2d5b3..056e8644747 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -40,11 +40,11 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is
 This function enables you to read Parquet files into R.
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_parquet(mtcars, tf)
 df <- read_parquet(tf, col_select = starts_with("d"))
 head(df)
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/recycle_scalars.Rd b/r/man/recycle_scalars.Rd
new file mode 100644
index 00000000000..3d97ecfd79f
--- /dev/null
+++ b/r/man/recycle_scalars.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{recycle_scalars}
+\alias{recycle_scalars}
+\title{Recycle scalar values in a list of arrays}
+\usage{
+recycle_scalars(arrays)
+}
+\arguments{
+\item{arrays}{List of arrays}
+}
+\value{
+List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled
+}
+\description{
+Recycle scalar values in a list of arrays
+}
+\keyword{internal}
diff --git a/r/man/repeat_value_as_array.Rd b/r/man/repeat_value_as_array.Rd
new file mode 100644
index 00000000000..a4937326efa
--- /dev/null
+++ b/r/man/repeat_value_as_array.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{repeat_value_as_array}
+\alias{repeat_value_as_array}
+\title{Take an object of length 1 and repeat it.}
+\usage{
+repeat_value_as_array(object, n)
+}
+\arguments{
+\item{object}{Object of length 1 to be repeated - vector, \code{Scalar}, \code{Array}, or \code{ChunkedArray}}
+
+\item{n}{Number of repetitions}
+}
+\value{
+\code{Array} of length \code{n}
+}
+\description{
+Take an object of length 1 and repeat it.
+}
+\keyword{internal}
diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd
index 7791e9bc5f2..95a086deae5 100644
--- a/r/man/s3_bucket.Rd
+++ b/r/man/s3_bucket.Rd
@@ -22,7 +22,7 @@ that automatically detects the bucket's AWS region and holding onto the its
 relative path.
 }
 \examples{
-if (arrow_with_s3()) {
-  bucket <- s3_bucket("ursa-labs-taxi-data")
-}
+\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bucket <- s3_bucket("ursa-labs-taxi-data")
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd
new file mode 100644
index 00000000000..ffde91f14f2
--- /dev/null
+++ b/r/man/to_duckdb.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/duckdb.R
+\name{to_duckdb}
+\alias{to_duckdb}
+\title{Create a (virtual) DuckDB table from an Arrow object}
+\usage{
+to_duckdb(
+  .data,
+  con = arrow_duck_connection(),
+  table_name = unique_arrow_tablename(),
+  auto_disconnect = TRUE
+)
+}
+\arguments{
+\item{.data}{the Arrow object (e.g. Dataset, Table) to use for the DuckDB table}
+
+\item{con}{a DuckDB connection to use (default will create one and store it
+in \code{options("arrow_duck_con")})}
+
+\item{table_name}{a name to use in DuckDB for this object. The default is a
+unique string \code{"arrow_"} followed by numbers.}
+
+\item{auto_disconnect}{should the table be automatically cleaned up when the
+resulting object is removed (and garbage collected)? Default: \code{TRUE}}
+}
+\value{
+A \code{tbl} of the new table in DuckDB
+}
+\description{
+This will do the necessary configuration to create a (virtual) table in DuckDB
+that is backed by the Arrow object given. No data is copied or modified until
+\code{collect()} or \code{compute()} are called or a query is run against the table.
+}
+\details{
+The result is a dbplyr-compatible object that can be used in d(b)plyr pipelines.
+
+Alternatively, one can pass the argument \code{.engine = "duckdb"} to \code{summarise()}
+that starts with an Arrow object to use DuckDB to calculate the summarization
+step. Internally, this calls \code{to_duckdb()} with all of the default argument
+values.
+}
+\examples{
+\dontshow{if (getFromNamespace("run_duckdb_examples", "arrow")()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+library(dplyr)
+
+ds <- InMemoryDataset$create(mtcars)
+
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  to_duckdb() \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+
+# the same query can be simplified using .engine = "duckdb"
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/type.Rd b/r/man/type.Rd
index 2f85e4a6ac6..d55bbe24bd5 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -15,3 +15,13 @@ an arrow logical type
 \description{
 infer the arrow Array type from an R vector
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+type(1:10)
+type(1L:10L)
+type(c(1, 1.5, 2))
+type(c("A", "B", "C"))
+type(mtcars)
+type(Sys.Date())
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd
index a6b7ec0f015..50c80c2dda9 100644
--- a/r/man/unify_schemas.Rd
+++ b/r/man/unify_schemas.Rd
@@ -12,15 +12,16 @@ unify_schemas(..., schemas = list(...))
 \item{schemas}{Alternatively, a list of schemas}
 }
 \value{
-A \code{Schema} with the union of fields contained in the inputs
+A \code{Schema} with the union of fields contained in the inputs, or
+\code{NULL} if any of \code{schemas} is \code{NULL}
 }
 \description{
 Combine and harmonize schemas
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 a <- schema(b = double(), c = bool())
 z <- schema(b = double(), k = utf8())
 unify_schemas(a, z)
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd
index 139af8edc63..7e64d1550cf 100644
--- a/r/man/value_counts.Rd
+++ b/r/man/value_counts.Rd
@@ -16,3 +16,9 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts"
 \description{
 This function tabulates the values in the array and returns a table of counts.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+cyl_vals <- Array$create(mtcars$cyl)
+counts <- value_counts(cyl_vals)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
new file mode 100644
index 00000000000..55a239ca998
--- /dev/null
+++ b/r/man/write_csv_arrow.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{write_csv_arrow}
+\alias{write_csv_arrow}
+\title{Write CSV file to disk}
+\usage{
+write_csv_arrow(x, sink, include_header = TRUE, batch_size = 1024L)
+}
+\arguments{
+\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
+
+\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+system (\code{SubTreeFileSystem})}
+
+\item{include_header}{Whether to write an initial header line with column names}
+
+\item{batch_size}{Maximum number of rows processed at a time. Default is 1024.}
+}
+\value{
+The input \code{x}, invisibly. Note that if \code{sink} is an \link{OutputStream},
+the stream will be left open.
+}
+\description{
+Write CSV file to disk
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_csv_arrow(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index 225aab28dd2..f29a9ee2189 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -7,7 +7,7 @@
 write_dataset(
   dataset,
   path,
-  format = c("parquet", "feather", "arrow", "ipc"),
+  format = c("parquet", "feather", "arrow", "ipc", "csv"),
   partitioning = dplyr::group_vars(dataset),
   basename_template = paste0("part-{i}.", as.character(format)),
   hive_style = TRUE,
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
index 691adbeef05..c6273b61be8 100644
--- a/r/man/write_feather.Rd
+++ b/r/man/write_feather.Rd
@@ -48,11 +48,11 @@ This function writes both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_feather(mtcars, tf)
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data.
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index 4f742ce9178..2f215f25fd7 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -31,6 +31,13 @@ with some nonstandard behavior, is deprecated. You should explicitly choose
 the function that will write the desired IPC format (stream or file) since
 either can be written to a file or \code{OutputStream}.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_ipc_stream(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to
 serialize data to a buffer.
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index c89c709dfb0..d7147f7e8e6 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -95,7 +95,7 @@ disable compression, set \code{compression = "uncompressed"}.
 Note that "uncompressed" columns may still have dictionary encoding.
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf1 <- tempfile(fileext = ".parquet")
 write_parquet(data.frame(x = 1:5), tf1)
 
@@ -104,5 +104,5 @@ if (codec_is_available("gzip")) {
   tf2 <- tempfile(fileext = ".gz.parquet")
   write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
 }
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd
index 46af09a96e8..a3c6e324b54 100644
--- a/r/man/write_to_raw.Rd
+++ b/r/man/write_to_raw.Rd
@@ -20,3 +20,9 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give
 This function wraps those so that you can serialize data to a buffer and
 access that buffer as a \code{raw} vector in R.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# The default format is "stream"
+mtcars_raw <- write_to_raw(mtcars)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/pkgdown/extra.js b/r/pkgdown/extra.js
new file mode 100644
index 00000000000..aca15c56625
--- /dev/null
+++ b/r/pkgdown/extra.js
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+(function () {
+  // Load the rmarkdown tabset script
+  var script = document.createElement("script");
+  script.type = "text/javascript";
+  script.async = true;
+  script.src =
+    "https://cdn.jsdelivr.net/gh/rstudio/rmarkdown@47d837d3d9cd5e8e212b05767454f058db7d2789/inst/rmd/h/navigation-1.1/tabsets.js";
+  script.integrity = "sha256-Rs54TE1FCN1uLM4f7VQEMiRTl1Ia7TiQLkMruItwV+Q=";
+  script.crossOrigin = "anonymous";
+
+  // Run the processing as the onload callback
+  script.onload = () => {
+    // Monkey patch the .html method to use the .text method
+    $(document).ready(function () {
+      (function ($) {
+        $.fn.html = function (content) {
+          return this.text();
+        };
+      })(jQuery);
+
+      window.buildTabsets("toc");
+    });
+
+    $(document).ready(function () {
+      $(".tabset-dropdown > .nav-tabs > li").click(function () {
+        $(this).parent().toggleClass("nav-tabs-open");
+      });
+    });
+
+    $(document).ready(function () {
+      /**
+       * The tabset creation above sometimes relies on empty headers to stop the
+       * tabbing. Though they shouldn't be included in the TOC in the first place,
+       * this will remove empty headers from the TOC after it's created.
+       */
+
+      // find all the empty <a> elements and remove them (and their parents)
+      var empty_a = $("#toc").find("a").filter(":empty");
+      empty_a.parent().remove();
+
+      // now find any empty <ul>s and remove them too
+      var empty_ul = $("#toc").find("ul").filter(":empty");
+      empty_ul.remove();
+    });
+  };
+
+  document.head.appendChild(script);
+})();
diff --git a/r/src/Makevars.ucrt b/r/src/Makevars.ucrt
new file mode 100644
index 00000000000..52488eb2b85
--- /dev/null
+++ b/r/src/Makevars.ucrt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CRT=-ucrt
+include Makevars.win
diff --git a/r/src/RTasks.cpp b/r/src/RTasks.cpp
new file mode 100644
index 00000000000..25bd944cc62
--- /dev/null
+++ b/r/src/RTasks.cpp
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./r_task_group.h"
+
+#if defined(ARROW_R_WITH_ARROW)
+
+namespace arrow {
+namespace r {
+
+RTasks::RTasks(bool use_threads)
+    : use_threads_(use_threads),
+      stop_source_(),
+      parallel_tasks_(use_threads
+                          ? arrow::internal::TaskGroup::MakeThreaded(
+                                arrow::internal::GetCpuThreadPool(), stop_source_.token())
+                          : nullptr) {}
+
+Status RTasks::Finish() {
+  Status status = Status::OK();
+
+  // run the delayed tasks now
+  for (auto& task : delayed_serial_tasks_) {
+    status &= std::move(task)();
+    if (!status.ok()) {
+      stop_source_.RequestStop();
+      break;
+    }
+  }
+
+  // then wait for the parallel tasks to finish
+  if (use_threads_) {
+    status &= parallel_tasks_->Finish();
+  }
+
+  return status;
+}
+
+void RTasks::Append(bool parallel, RTasks::Task&& task) {
+  if (parallel && use_threads_) {
+    parallel_tasks_->Append(std::move(task));
+  } else {
+    delayed_serial_tasks_.push_back(std::move(task));
+  }
+}
+
+void RTasks::Reset() {
+  delayed_serial_tasks_.clear();
+
+  stop_source_.Reset();
+  if (use_threads_) {
+    parallel_tasks_ = arrow::internal::TaskGroup::MakeThreaded(
+        arrow::internal::GetCpuThreadPool(), stop_source_.token());
+  }
+}
+
+}  // namespace r
+}  // namespace arrow
+
+#endif
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
new file mode 100644
index 00000000000..b07cbe70ed3
--- /dev/null
+++ b/r/src/altrep.cpp
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./arrow_types.h"
+
+#if defined(ARROW_R_WITH_ARROW)
+
+#include <cpp11/altrep.hpp>
+#if defined(HAS_ALTREP)
+
+#if R_VERSION < R_Version(3, 6, 0)
+
+// workaround because R's <R_ext/Altrep.h> not so conveniently uses `class`
+// as a variable name, and C++ is not happy about that
+//
+// SEXP R_new_altrep(R_altrep_class_t class, SEXP data1, SEXP data2);
+//
+#define class klass
+
+// Because functions declared in <R_ext/Altrep.h> have C linkage
+extern "C" {
+#include <R_ext/Altrep.h>
+}
+
+// undo the workaround
+#undef class
+
+#else
+#include <R_ext/Altrep.h>
+#endif
+
+#include <arrow/array.h>
+
+namespace arrow {
+namespace r {
+
+template <int sexp_type>
+struct ArrayNoNull {
+  using data_type = typename std::conditional<sexp_type == INTSXP, int, double>::type;
+  static void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
+  using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
+
+  // altrep object around an Array with no nulls
+  // data1: an external pointer to a shared pointer to the Array
+  // data2: not used
+
+  static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>& array) {
+    // we don't need the whole r6 object, just an external pointer
+    // that retain the array
+    Pointer xp(new std::shared_ptr<Array>(array));
+
+    SEXP res = R_new_altrep(class_t, xp, R_NilValue);
+    MARK_NOT_MUTABLE(res);
+
+    return res;
+  }
+
+  static Rboolean Inspect(SEXP x, int pre, int deep, int pvec,
+                          void (*inspect_subtree)(SEXP, int, int, int)) {
+    const auto& array = Get(x);
+    Rprintf("arrow::Array<%s, NONULL> len=%d, Array=<%p>\n",
+            array->type()->ToString().c_str(), array->length(), array.get());
+    inspect_subtree(R_altrep_data1(x), pre, deep + 1, pvec);
+    return TRUE;
+  }
+
+  static const std::shared_ptr<Array>& Get(SEXP vec) {
+    return *Pointer(R_altrep_data1(vec));
+  }
+
+  static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); }
+
+  static const void* Dataptr_or_null(SEXP vec) {
+    return Get(vec)->data()->template GetValues<data_type>(1);
+  }
+
+  static SEXP Duplicate(SEXP vec, Rboolean) {
+    const auto& array = Get(vec);
+    auto size = array->length();
+
+    SEXP copy = PROTECT(Rf_allocVector(sexp_type, array->length()));
+
+    memcpy(DATAPTR(copy), Dataptr_or_null(vec), size * sizeof(data_type));
+
+    UNPROTECT(1);
+    return copy;
+  }
+
+  static void* Dataptr(SEXP vec, Rboolean writeable) {
+    return const_cast<void*>(Dataptr_or_null(vec));
+  }
+
+  // by definition, there are no NA
+  static int No_NA(SEXP vec) { return 1; }
+
+  static void Init(R_altrep_class_t class_t, DllInfo* dll) {
+    // altrep
+    R_set_altrep_Length_method(class_t, ArrayNoNull::Length);
+    R_set_altrep_Inspect_method(class_t, ArrayNoNull::Inspect);
+    R_set_altrep_Duplicate_method(class_t, ArrayNoNull::Duplicate);
+
+    // altvec
+    R_set_altvec_Dataptr_method(class_t, ArrayNoNull::Dataptr);
+    R_set_altvec_Dataptr_or_null_method(class_t, ArrayNoNull::Dataptr_or_null);
+  }
+};
+
+struct DoubleArrayNoNull {
+  static R_altrep_class_t class_t;
+
+  static void Init(DllInfo* dll) {
+    class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll);
+    ArrayNoNull<REALSXP>::Init(class_t, dll);
+    R_set_altreal_No_NA_method(class_t, ArrayNoNull<REALSXP>::No_NA);
+  }
+
+  static SEXP Make(const std::shared_ptr<Array>& array) {
+    return ArrayNoNull<REALSXP>::Make(class_t, array);
+  }
+};
+
+struct Int32ArrayNoNull {
+  static R_altrep_class_t class_t;
+
+  static void Init(DllInfo* dll) {
+    class_t = R_make_altinteger_class("array_nonull_int_vector", "arrow", dll);
+    ArrayNoNull<INTSXP>::Init(class_t, dll);
+    R_set_altinteger_No_NA_method(class_t, ArrayNoNull<INTSXP>::No_NA);
+  }
+
+  static SEXP Make(const std::shared_ptr<Array>& array) {
+    return ArrayNoNull<INTSXP>::Make(class_t, array);
+  }
+};
+
+R_altrep_class_t Int32ArrayNoNull::class_t;
+R_altrep_class_t DoubleArrayNoNull::class_t;
+
+void Init_Altrep_classes(DllInfo* dll) {
+  DoubleArrayNoNull::Init(dll);
+  Int32ArrayNoNull::Init(dll);
+}
+
+SEXP MakeDoubleArrayNoNull(const std::shared_ptr<Array>& array) {
+  return DoubleArrayNoNull::Make(array);
+}
+
+SEXP MakeInt32ArrayNoNull(const std::shared_ptr<Array>& array) {
+  return Int32ArrayNoNull::Make(array);
+}
+
+}  // namespace r
+}  // namespace arrow
+
+#endif
+
+// [[arrow::export]]
+bool is_altrep_int_nonull(SEXP x) {
+#if defined(HAS_ALTREP)
+  return R_altrep_inherits(x, arrow::r::Int32ArrayNoNull::class_t);
+#else
+  return false;
+#endif
+}
+
+// [[arrow::export]]
+bool is_altrep_dbl_nonull(SEXP x) {
+#if defined(HAS_ALTREP)
+  return R_altrep_inherits(x, arrow::r::DoubleArrayNoNull::class_t);
+#else
+  return false;
+#endif
+}
+
+#endif
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index d5fae295181..d5a5425966f 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -1,7 +1,7 @@
 // Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
+// or more contributor license agreements.  See the NOTICE file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
@@ -25,11 +25,12 @@
 #include <arrow/util/bitmap_reader.h>
 #include <arrow/util/bitmap_writer.h>
 #include <arrow/util/int_util.h>
-#include <arrow/util/parallel.h>
-#include <arrow/util/task_group.h>
 
+#include <cpp11/altrep.hpp>
 #include <type_traits>
 
+#include "./r_task_group.h"
+
 namespace arrow {
 
 using internal::checked_cast;
@@ -39,7 +40,8 @@ namespace r {
 
 class Converter {
  public:
-  explicit Converter(ArrayVector arrays) : arrays_(std::move(arrays)) {}
+  explicit Converter(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : chunked_array_(std::move(chunked_array)) {}
 
   virtual ~Converter() {}
 
@@ -58,55 +60,79 @@ class Converter {
                                    R_xlen_t start, R_xlen_t n,
                                    size_t chunk_index) const = 0;
 
-  // ingest one array
-  Status IngestOne(SEXP data, const std::shared_ptr<arrow::Array>& array, R_xlen_t start,
-                   R_xlen_t n, size_t chunk_index) const {
-    if (array->null_count() == n) {
-      return Ingest_all_nulls(data, start, n);
-    } else {
-      return Ingest_some_nulls(data, array, start, n, chunk_index);
-    }
-  }
-
   // can this run in parallel ?
   virtual bool Parallel() const { return true; }
 
-  // Ingest all the arrays serially
-  Status IngestSerial(SEXP data) {
-    R_xlen_t k = 0, i = 0;
-    for (const auto& array : arrays_) {
-      auto n_chunk = array->length();
-      RETURN_NOT_OK(IngestOne(data, array, k, n_chunk, i));
-      k += n_chunk;
-      i++;
+  // converter is passed as self to outlive the scope of Converter::Convert()
+  SEXP ScheduleConvertTasks(RTasks& tasks, std::shared_ptr<Converter> self) {
+#if defined(HAS_ALTREP)
+    // special case when there is only one array
+    if (chunked_array_->num_chunks() == 1) {
+      const auto& array = chunked_array_->chunk(0);
+      if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 &&
+          array->null_count() == 0) {
+        switch (array->type()->id()) {
+          case arrow::Type::DOUBLE:
+            return arrow::r::MakeDoubleArrayNoNull(array);
+          case arrow::Type::INT32:
+            return arrow::r::MakeInt32ArrayNoNull(array);
+          default:
+            break;
+        }
+      }
     }
-    return Status::OK();
-  }
+#endif
 
-  // ingest the arrays in parallel
-  //
-  // for each array, add a task to the task group
-  //
-  // The task group is Finish() in the caller
-  // The converter itself is passed as `self` so that if one of the parallel ops
-  // hits `stop()`, we don't bail before `tg` is destroyed, which would cause a crash
-  void IngestParallel(SEXP data, const std::shared_ptr<arrow::internal::TaskGroup>& tg,
-                      std::shared_ptr<Converter> self) {
+    // allocating the R vector upfront
+    SEXP out = PROTECT(Allocate(chunked_array_->length()));
+
+    // for each array, fill the relevant slice of `out`, potentially in parallel
     R_xlen_t k = 0, i = 0;
-    for (const auto& array : arrays_) {
+    for (const auto& array : chunked_array_->chunks()) {
       auto n_chunk = array->length();
-      tg->Append([=] { return self->IngestOne(data, array, k, n_chunk, i); });
+
+      tasks.Append(Parallel(), [=] {
+        if (array->null_count() == n_chunk) {
+          return self->Ingest_all_nulls(out, k, n_chunk);
+        } else {
+          return self->Ingest_some_nulls(out, array, k, n_chunk, i);
+        }
+      });
+
       k += n_chunk;
       i++;
     }
+
+    UNPROTECT(1);
+    return out;
   }
 
   // Converter factory
-  static std::shared_ptr<Converter> Make(const std::shared_ptr<DataType>& type,
-                                         ArrayVector arrays);
+  static std::shared_ptr<Converter> Make(
+      const std::shared_ptr<ChunkedArray>& chunked_array);
+
+  static SEXP LazyConvert(const std::shared_ptr<ChunkedArray>& chunked_array,
+                          RTasks& tasks) {
+    auto converter = Make(chunked_array);
+    return converter->ScheduleConvertTasks(tasks, converter);
+  }
+
+  static SEXP Convert(const std::shared_ptr<ChunkedArray>& chunked_array,
+                      bool use_threads) {
+    RTasks tasks(use_threads);
+    SEXP out = PROTECT(Converter::LazyConvert(chunked_array, tasks));
+    StopIfNotOk(tasks.Finish());
+
+    UNPROTECT(1);
+    return out;
+  }
+
+  static SEXP Convert(const std::shared_ptr<Array>& array) {
+    return Convert(std::make_shared<ChunkedArray>(array), false);
+  }
 
  protected:
-  ArrayVector arrays_;
+  std::shared_ptr<ChunkedArray> chunked_array_;
 };
 
 template <typename SetNonNull, typename SetNull>
@@ -140,22 +166,13 @@ Status IngestSome(const std::shared_ptr<arrow::Array>& array, R_xlen_t n,
   return IngestSome(array, n, std::forward<SetNonNull>(set_non_null), nothing);
 }
 
-// Allocate + Ingest
-SEXP ArrayVector__as_vector(R_xlen_t n, const std::shared_ptr<DataType>& type,
-                            const ArrayVector& arrays) {
-  auto converter = Converter::Make(type, arrays);
-  SEXP data = PROTECT(converter->Allocate(n));
-  StopIfNotOk(converter->IngestSerial(data));
-  UNPROTECT(1);
-  return data;
-}
-
 template <typename Type>
 class Converter_Int : public Converter {
   using value_type = typename TypeTraits<Type>::ArrayType::value_type;
 
  public:
-  explicit Converter_Int(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Int(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(INTSXP, n); }
 
@@ -189,7 +206,8 @@ class Converter_Double : public Converter {
   using value_type = typename TypeTraits<Type>::ArrayType::value_type;
 
  public:
-  explicit Converter_Double(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Double(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(REALSXP, n); }
 
@@ -220,7 +238,8 @@ class Converter_Double : public Converter {
 
 class Converter_Date32 : public Converter {
  public:
-  explicit Converter_Date32(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Date32(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP data = PROTECT(Rf_allocVector(REALSXP, n));
@@ -257,7 +276,8 @@ class Converter_Date32 : public Converter {
 template <typename StringArrayType>
 struct Converter_String : public Converter {
  public:
-  explicit Converter_String(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_String(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(STRSXP, n); }
 
@@ -395,7 +415,8 @@ struct Converter_String : public Converter {
 
 class Converter_Boolean : public Converter {
  public:
-  explicit Converter_Boolean(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Boolean(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(LGLSXP, n); }
 
@@ -433,7 +454,8 @@ template <typename ArrayType>
 class Converter_Binary : public Converter {
  public:
   using offset_type = typename ArrayType::offset_type;
-  explicit Converter_Binary(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Binary(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP res = PROTECT(Rf_allocVector(VECSXP, n));
@@ -477,8 +499,9 @@ class Converter_Binary : public Converter {
 
 class Converter_FixedSizeBinary : public Converter {
  public:
-  explicit Converter_FixedSizeBinary(const ArrayVector& arrays, int byte_width)
-      : Converter(arrays), byte_width_(byte_width) {}
+  explicit Converter_FixedSizeBinary(const std::shared_ptr<ChunkedArray>& chunked_array,
+                                     int byte_width)
+      : Converter(chunked_array), byte_width_(byte_width) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP res = PROTECT(Rf_allocVector(VECSXP, n));
@@ -527,25 +550,27 @@ class Converter_Dictionary : public Converter {
   std::shared_ptr<Array> dictionary_;
 
  public:
-  explicit Converter_Dictionary(const ArrayVector& arrays)
-      : Converter(arrays), need_unification_(NeedUnification()) {
+  explicit Converter_Dictionary(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array), need_unification_(NeedUnification()) {
     if (need_unification_) {
-      const auto& arr_first = checked_cast<const DictionaryArray&>(*arrays[0]);
+      const auto& arr_first =
+          checked_cast<const DictionaryArray&>(*chunked_array->chunk(0));
       const auto& arr_type = checked_cast<const DictionaryType&>(*arr_first.type());
       unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
 
-      size_t n_arrays = arrays.size();
+      size_t n_arrays = chunked_array->num_chunks();
       arrays_transpose_.resize(n_arrays);
 
       for (size_t i = 0; i < n_arrays; i++) {
         const auto& dict_i =
-            *checked_cast<const DictionaryArray&>(*arrays[i]).dictionary();
+            *checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)).dictionary();
         StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i]));
       }
 
       StopIfNotOk(unifier_->GetResult(&out_type_, &dictionary_));
     } else {
-      const auto& dict_array = checked_cast<const DictionaryArray&>(*arrays_[0]);
+      const auto& dict_array =
+          checked_cast<const DictionaryArray&>(*chunked_array->chunk(0));
 
       auto indices = dict_array.indices();
       switch (indices->type_id()) {
@@ -647,13 +672,14 @@ class Converter_Dictionary : public Converter {
   }
 
   bool NeedUnification() {
-    int n = arrays_.size();
+    int n = chunked_array_->num_chunks();
     if (n < 2) {
       return false;
     }
-    const auto& arr_first = checked_cast<const DictionaryArray&>(*arrays_[0]);
+    const auto& arr_first =
+        checked_cast<const DictionaryArray&>(*chunked_array_->chunk(0));
     for (int i = 1; i < n; i++) {
-      const auto& arr = checked_cast<const DictionaryArray&>(*arrays_[i]);
+      const auto& arr = checked_cast<const DictionaryArray&>(*chunked_array_->chunk(i));
       if (!(arr_first.dictionary()->Equals(arr.dictionary()))) {
         return true;
       }
@@ -662,7 +688,9 @@ class Converter_Dictionary : public Converter {
   }
 
   bool GetOrdered() const {
-    return checked_cast<const DictionaryArray&>(*arrays_[0]).dict_type()->ordered();
+    return checked_cast<const DictionaryArray&>(*chunked_array_->chunk(0))
+        .dict_type()
+        ->ordered();
   }
 
   SEXP GetLevels() const {
@@ -674,8 +702,7 @@ class Converter_Dictionary : public Converter {
       cpp11::warning("Coercing dictionary values to R character factor levels");
     }
 
-    SEXP vec = PROTECT(ArrayVector__as_vector(dictionary_->length(), dictionary_->type(),
-                                              {dictionary_}));
+    SEXP vec = PROTECT(Converter::Convert(dictionary_));
     SEXP strings_vec = PROTECT(Rf_coerceVector(vec, STRSXP));
     UNPROTECT(2);
     return strings_vec;
@@ -684,18 +711,21 @@ class Converter_Dictionary : public Converter {
 
 class Converter_Struct : public Converter {
  public:
-  explicit Converter_Struct(const ArrayVector& arrays) : Converter(arrays), converters() {
-    auto first_array = checked_cast<const arrow::StructArray*>(this->arrays_[0].get());
+  explicit Converter_Struct(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array), converters() {
+    auto first_array =
+        checked_cast<const arrow::StructArray*>(this->chunked_array_->chunk(0).get());
     int nf = first_array->num_fields();
     for (int i = 0; i < nf; i++) {
       converters.push_back(
-          Converter::Make(first_array->field(i)->type(), {first_array->field(i)}));
+          Converter::Make(std::make_shared<ChunkedArray>(first_array->field(i))));
     }
   }
 
   SEXP Allocate(R_xlen_t n) const {
     // allocate a data frame column to host each array
-    auto first_array = checked_cast<const arrow::StructArray*>(this->arrays_[0].get());
+    auto first_array =
+        checked_cast<const arrow::StructArray*>(this->chunked_array_->chunk(0).get());
     auto type = first_array->struct_type();
     auto out =
         arrow::r::to_r_list(converters, [n](const std::shared_ptr<Converter>& converter) {
@@ -750,7 +780,8 @@ double ms_to_seconds(int64_t ms) { return static_cast<double>(ms) / 1000; }
 
 class Converter_Date64 : public Converter {
  public:
-  explicit Converter_Date64(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Date64(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
@@ -782,7 +813,8 @@ class Converter_Date64 : public Converter {
 template <typename value_type, typename unit_type = TimeType>
 class Converter_Time : public Converter {
  public:
-  explicit Converter_Time(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Time(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
@@ -836,13 +868,14 @@ class Converter_Time : public Converter {
 template <typename value_type>
 class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
  public:
-  explicit Converter_Timestamp(const ArrayVector& arrays)
-      : Converter_Time<value_type, TimestampType>(arrays) {}
+  explicit Converter_Timestamp(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter_Time<value_type, TimestampType>(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
     Rf_classgets(data, arrow::r::data::classes_POSIXct);
-    auto array = checked_cast<const TimestampArray*>(this->arrays_[0].get());
+    auto array =
+        checked_cast<const TimestampArray*>(this->chunked_array_->chunk(0).get());
     auto array_type = checked_cast<const TimestampType*>(array->type().get());
     std::string tzone = array_type->timezone();
     if (tzone.size() > 0) {
@@ -854,7 +887,8 @@ class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
 
 class Converter_Decimal : public Converter {
  public:
-  explicit Converter_Decimal(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Decimal(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(REALSXP, n); }
 
@@ -887,9 +921,9 @@ class Converter_List : public Converter {
   std::shared_ptr<arrow::DataType> value_type_;
 
  public:
-  explicit Converter_List(const ArrayVector& arrays,
+  explicit Converter_List(const std::shared_ptr<ChunkedArray>& chunked_array,
                           const std::shared_ptr<arrow::DataType>& value_type)
-      : Converter(arrays), value_type_(value_type) {}
+      : Converter(chunked_array), value_type_(value_type) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::list res(n);
@@ -905,7 +939,7 @@ class Converter_List : public Converter {
     StopIfNotOk(builder->Finish(&array));
 
     // convert to an R object to store as the list' ptype
-    res.attr(arrow::r::symbols::ptype) = Array__as_vector(array);
+    res.attr(arrow::r::symbols::ptype) = Converter::Convert(array);
 
     return res;
   }
@@ -922,7 +956,7 @@ class Converter_List : public Converter {
 
     auto ingest_one = [&](R_xlen_t i) {
       auto slice = list_array->value_slice(i);
-      SET_VECTOR_ELT(data, i + start, Array__as_vector(slice));
+      SET_VECTOR_ELT(data, i + start, Converter::Convert(slice));
       return Status::OK();
     };
 
@@ -938,10 +972,10 @@ class Converter_FixedSizeList : public Converter {
   int list_size_;
 
  public:
-  explicit Converter_FixedSizeList(const ArrayVector& arrays,
+  explicit Converter_FixedSizeList(const std::shared_ptr<ChunkedArray>& chunked_array,
                                    const std::shared_ptr<arrow::DataType>& value_type,
                                    int list_size)
-      : Converter(arrays), value_type_(value_type), list_size_(list_size) {}
+      : Converter(chunked_array), value_type_(value_type), list_size_(list_size) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::list res(n);
@@ -956,7 +990,7 @@ class Converter_FixedSizeList : public Converter {
     StopIfNotOk(builder->Finish(&array));
 
     // convert to an R object to store as the list' ptype
-    res.attr(arrow::r::symbols::ptype) = Array__as_vector(array);
+    res.attr(arrow::r::symbols::ptype) = Converter::Convert(array);
 
     return res;
   }
@@ -973,7 +1007,7 @@ class Converter_FixedSizeList : public Converter {
 
     auto ingest_one = [&](R_xlen_t i) {
       auto slice = fixed_size_list_array.value_slice(i);
-      SET_VECTOR_ELT(data, i + start, Array__as_vector(slice));
+      SET_VECTOR_ELT(data, i + start, Converter::Convert(slice));
       return Status::OK();
     };
     return IngestSome(array, n, ingest_one);
@@ -984,7 +1018,8 @@ class Converter_FixedSizeList : public Converter {
 
 class Converter_Int64 : public Converter {
  public:
-  explicit Converter_Int64(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Int64(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
@@ -1023,7 +1058,8 @@ class Converter_Int64 : public Converter {
 
 class Converter_Null : public Converter {
  public:
-  explicit Converter_Null(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Null(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP data = PROTECT(Rf_allocVector(LGLSXP, n));
@@ -1065,147 +1101,135 @@ bool GetBoolOption(const std::string& name, bool default_) {
   }
 }
 
-std::shared_ptr<Converter> Converter::Make(const std::shared_ptr<DataType>& type,
-                                           ArrayVector arrays) {
-  if (arrays.empty()) {
-    // slight hack for the 0-row case since the converters expect at least one
-    // chunk to process.
-    arrays.push_back(ValueOrStop(arrow::MakeArrayOfNull(type, 0)));
-  }
-
+std::shared_ptr<Converter> Converter::Make(
+    const std::shared_ptr<ChunkedArray>& chunked_array) {
+  const auto& type = chunked_array->type();
   switch (type->id()) {
     // direct support
     case Type::INT32:
-      return std::make_shared<arrow::r::Converter_Int<arrow::Int32Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::Int32Type>>(chunked_array);
 
     case Type::DOUBLE:
       return std::make_shared<arrow::r::Converter_Double<arrow::DoubleType>>(
-          std::move(arrays));
+          chunked_array);
 
       // need to handle 1-bit case
     case Type::BOOL:
-      return std::make_shared<arrow::r::Converter_Boolean>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Boolean>(chunked_array);
 
     case Type::BINARY:
       return std::make_shared<arrow::r::Converter_Binary<arrow::BinaryArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::LARGE_BINARY:
       return std::make_shared<arrow::r::Converter_Binary<arrow::LargeBinaryArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::FIXED_SIZE_BINARY:
       return std::make_shared<arrow::r::Converter_FixedSizeBinary>(
-          std::move(arrays),
-          checked_cast<const FixedSizeBinaryType&>(*type).byte_width());
+          chunked_array, checked_cast<const FixedSizeBinaryType&>(*type).byte_width());
 
       // handle memory dense strings
     case Type::STRING:
       return std::make_shared<arrow::r::Converter_String<arrow::StringArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::LARGE_STRING:
       return std::make_shared<arrow::r::Converter_String<arrow::LargeStringArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::DICTIONARY:
-      return std::make_shared<arrow::r::Converter_Dictionary>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Dictionary>(chunked_array);
 
     case Type::DATE32:
-      return std::make_shared<arrow::r::Converter_Date32>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Date32>(chunked_array);
 
     case Type::DATE64:
-      return std::make_shared<arrow::r::Converter_Date64>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Date64>(chunked_array);
 
       // promotions to integer vector
     case Type::INT8:
-      return std::make_shared<arrow::r::Converter_Int<arrow::Int8Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::Int8Type>>(chunked_array);
 
     case Type::UINT8:
-      return std::make_shared<arrow::r::Converter_Int<arrow::UInt8Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::UInt8Type>>(chunked_array);
 
     case Type::INT16:
-      return std::make_shared<arrow::r::Converter_Int<arrow::Int16Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::Int16Type>>(chunked_array);
 
     case Type::UINT16:
-      return std::make_shared<arrow::r::Converter_Int<arrow::UInt16Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::UInt16Type>>(chunked_array);
 
       // promotions to numeric vector, if they don't fit into int32
     case Type::UINT32:
-      if (ArraysCanFitInteger(arrays)) {
+      if (ArraysCanFitInteger(chunked_array->chunks())) {
         return std::make_shared<arrow::r::Converter_Int<arrow::UInt32Type>>(
-            std::move(arrays));
+            chunked_array);
       } else {
         return std::make_shared<arrow::r::Converter_Double<arrow::UInt32Type>>(
-            std::move(arrays));
+            chunked_array);
       }
 
     case Type::UINT64:
-      if (ArraysCanFitInteger(arrays)) {
+      if (ArraysCanFitInteger(chunked_array->chunks())) {
         return std::make_shared<arrow::r::Converter_Int<arrow::UInt64Type>>(
-            std::move(arrays));
+            chunked_array);
       } else {
         return std::make_shared<arrow::r::Converter_Double<arrow::UInt64Type>>(
-            std::move(arrays));
+            chunked_array);
       }
 
     case Type::HALF_FLOAT:
       return std::make_shared<arrow::r::Converter_Double<arrow::HalfFloatType>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::FLOAT:
       return std::make_shared<arrow::r::Converter_Double<arrow::FloatType>>(
-          std::move(arrays));
+          chunked_array);
 
       // time32 and time64
     case Type::TIME32:
-      return std::make_shared<arrow::r::Converter_Time<int32_t>>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Time<int32_t>>(chunked_array);
 
     case Type::TIME64:
-      return std::make_shared<arrow::r::Converter_Time<int64_t>>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Time<int64_t>>(chunked_array);
 
     case Type::TIMESTAMP:
-      return std::make_shared<arrow::r::Converter_Timestamp<int64_t>>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Timestamp<int64_t>>(chunked_array);
 
     case Type::INT64:
       // Prefer integer if it fits, unless option arrow.int64_downcast is `false`
-      if (GetBoolOption("arrow.int64_downcast", true) && ArraysCanFitInteger(arrays)) {
-        return std::make_shared<arrow::r::Converter_Int<arrow::Int64Type>>(
-            std::move(arrays));
+      if (GetBoolOption("arrow.int64_downcast", true) &&
+          ArraysCanFitInteger(chunked_array->chunks())) {
+        return std::make_shared<arrow::r::Converter_Int<arrow::Int64Type>>(chunked_array);
       } else {
-        return std::make_shared<arrow::r::Converter_Int64>(std::move(arrays));
+        return std::make_shared<arrow::r::Converter_Int64>(chunked_array);
       }
 
     case Type::DECIMAL:
-      return std::make_shared<arrow::r::Converter_Decimal>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Decimal>(chunked_array);
 
       // nested
     case Type::STRUCT:
-      return std::make_shared<arrow::r::Converter_Struct>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Struct>(chunked_array);
 
     case Type::LIST:
       return std::make_shared<arrow::r::Converter_List<arrow::ListArray>>(
-          std::move(arrays),
-          checked_cast<const arrow::ListType*>(type.get())->value_type());
+          chunked_array, checked_cast<const arrow::ListType*>(type.get())->value_type());
 
     case Type::LARGE_LIST:
       return std::make_shared<arrow::r::Converter_List<arrow::LargeListArray>>(
-          std::move(arrays),
+          chunked_array,
           checked_cast<const arrow::LargeListType*>(type.get())->value_type());
 
     case Type::FIXED_SIZE_LIST:
       return std::make_shared<arrow::r::Converter_FixedSizeList>(
-          std::move(arrays),
+          chunked_array,
           checked_cast<const arrow::FixedSizeListType&>(*type).value_type(),
           checked_cast<const arrow::FixedSizeListType&>(*type).list_size());
 
     case Type::NA:
-      return std::make_shared<arrow::r::Converter_Null>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Null>(chunked_array);
 
     default:
       break;
@@ -1214,54 +1238,32 @@ std::shared_ptr<Converter> Converter::Make(const std::shared_ptr<DataType>& type
   cpp11::stop("cannot handle Array of type ", type->name().c_str());
 }
 
-cpp11::writable::list to_dataframe_serial(
-    int64_t nr, int64_t nc, const cpp11::writable::strings& names,
-    const std::vector<std::shared_ptr<Converter>>& converters) {
-  cpp11::writable::list tbl(nc);
-  for (int i = 0; i < nc; i++) {
-    SEXP column = tbl[i] = converters[i]->Allocate(nr);
-    StopIfNotOk(converters[i]->IngestSerial(column));
-  }
-  tbl.attr(R_NamesSymbol) = names;
-  tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
-  tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr);
-  return tbl;
+std::shared_ptr<ChunkedArray> to_chunks(const std::shared_ptr<Array>& array) {
+  return std::make_shared<ChunkedArray>(array);
 }
 
-cpp11::writable::list to_dataframe_parallel(
-    int64_t nr, int64_t nc, const cpp11::writable::strings& names,
-    const std::vector<std::shared_ptr<Converter>>& converters) {
-  cpp11::writable::list tbl(nc);
-
-  // task group to ingest data in parallel
-  auto tg = arrow::internal::TaskGroup::MakeThreaded(arrow::internal::GetCpuThreadPool());
+std::shared_ptr<ChunkedArray> to_chunks(
+    const std::shared_ptr<ChunkedArray>& chunked_array) {
+  return chunked_array;
+}
 
-  // allocate and start ingesting immediately the columns that
-  // can be ingested in parallel, i.e. when ingestion no longer
-  // need to happen on the main thread
-  for (int i = 0; i < nc; i++) {
-    // allocate data for column i
-    SEXP column = tbl[i] = converters[i]->Allocate(nr);
+template <typename Rectangle>
+cpp11::writable::list to_data_frame(const std::shared_ptr<Rectangle>& data,
+                                    bool use_threads) {
+  int64_t nc = data->num_columns();
+  int64_t nr = data->num_rows();
+  cpp11::writable::strings names(nc);
 
-    // add a task to ingest data of that column if that can be done in parallel
-    if (converters[i]->Parallel()) {
-      converters[i]->IngestParallel(column, tg, converters[i]);
-    }
-  }
+  arrow::r::RTasks tasks(use_threads);
 
-  arrow::Status status = arrow::Status::OK();
+  cpp11::writable::list tbl(nc);
 
-  // ingest the columns that cannot be dealt with in parallel
   for (int i = 0; i < nc; i++) {
-    if (!converters[i]->Parallel()) {
-      status &= converters[i]->IngestSerial(tbl[i]);
-    }
+    names[i] = data->schema()->field(i)->name();
+    tbl[i] = Converter::LazyConvert(to_chunks(data->column(i)), tasks);
   }
 
-  // wait for the ingestion to be finished
-  status &= tg->Finish();
-
-  StopIfNotOk(status);
+  StopIfNotOk(tasks.Finish());
 
   tbl.attr(R_NamesSymbol) = names;
   tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
@@ -1275,56 +1277,25 @@ cpp11::writable::list to_dataframe_parallel(
 
 // [[arrow::export]]
 SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {
-  return arrow::r::ArrayVector__as_vector(array->length(), array->type(), {array});
+  return arrow::r::Converter::Convert(array);
 }
 
 // [[arrow::export]]
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
-  return arrow::r::ArrayVector__as_vector(chunked_array->length(), chunked_array->type(),
-                                          chunked_array->chunks());
+SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array,
+                             bool use_threads = false) {
+  return arrow::r::Converter::Convert(chunked_array, use_threads);
 }
 
 // [[arrow::export]]
 cpp11::writable::list RecordBatch__to_dataframe(
     const std::shared_ptr<arrow::RecordBatch>& batch, bool use_threads) {
-  int64_t nc = batch->num_columns();
-  int64_t nr = batch->num_rows();
-  cpp11::writable::strings names(nc);
-  std::vector<arrow::ArrayVector> arrays(nc);
-  std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);
-
-  for (R_xlen_t i = 0; i < nc; i++) {
-    names[i] = batch->column_name(i);
-    arrays[i] = {batch->column(i)};
-    converters[i] = arrow::r::Converter::Make(batch->column(i)->type(), arrays[i]);
-  }
-
-  if (use_threads) {
-    return arrow::r::to_dataframe_parallel(nr, nc, names, converters);
-  } else {
-    return arrow::r::to_dataframe_serial(nr, nc, names, converters);
-  }
+  return arrow::r::to_data_frame(batch, use_threads);
 }
 
 // [[arrow::export]]
 cpp11::writable::list Table__to_dataframe(const std::shared_ptr<arrow::Table>& table,
                                           bool use_threads) {
-  int64_t nc = table->num_columns();
-  int64_t nr = table->num_rows();
-  cpp11::writable::strings names(nc);
-  std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);
-
-  for (R_xlen_t i = 0; i < nc; i++) {
-    converters[i] =
-        arrow::r::Converter::Make(table->column(i)->type(), table->column(i)->chunks());
-    names[i] = table->field(i)->name();
-  }
-
-  if (use_threads) {
-    return arrow::r::to_dataframe_parallel(nr, nc, names, converters);
-  } else {
-    return arrow::r::to_dataframe_serial(nr, nc, names, converters);
-  }
+  return arrow::r::to_data_frame(table, use_threads);
 }
 
 #endif
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 4c2ebed670e..92ddbae23fd 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -4,6 +4,36 @@
 
 #include "./arrow_types.h"
 
+// altrep.cpp
+#if defined(ARROW_R_WITH_ARROW)
+bool is_altrep_int_nonull(SEXP x);
+extern "C" SEXP _arrow_is_altrep_int_nonull(SEXP x_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type x(x_sexp);
+	return cpp11::as_sexp(is_altrep_int_nonull(x));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_is_altrep_int_nonull(SEXP x_sexp){
+	Rf_error("Cannot call is_altrep_int_nonull(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// altrep.cpp
+#if defined(ARROW_R_WITH_ARROW)
+bool is_altrep_dbl_nonull(SEXP x);
+extern "C" SEXP _arrow_is_altrep_dbl_nonull(SEXP x_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type x(x_sexp);
+	return cpp11::as_sexp(is_altrep_dbl_nonull(x));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_is_altrep_dbl_nonull(SEXP x_sexp){
+	Rf_error("Cannot call is_altrep_dbl_nonull(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // array.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array, R_xlen_t offset);
@@ -553,15 +583,16 @@ extern "C" SEXP _arrow_Array__as_vector(SEXP array_sexp){
 
 // array_to_vector.cpp
 #if defined(ARROW_R_WITH_ARROW)
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
-extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){
+SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array, bool use_threads);
+extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp, SEXP use_threads_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type chunked_array(chunked_array_sexp);
-	return cpp11::as_sexp(ChunkedArray__as_vector(chunked_array));
+	arrow::r::Input<bool>::type use_threads(use_threads_sexp);
+	return cpp11::as_sexp(ChunkedArray__as_vector(chunked_array, use_threads));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){
+extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp, SEXP use_threads_sexp){
 	Rf_error("Cannot call ChunkedArray__as_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -1061,6 +1092,124 @@ extern "C" SEXP _arrow_io___CompressedInputStream__Make(SEXP codec_sexp, SEXP ra
 }
 #endif
 
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecPlan> ExecPlan_create(bool use_threads);
+extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){
+BEGIN_CPP11
+	arrow::r::Input<bool>::type use_threads(use_threads_sexp);
+	return cpp11::as_sexp(ExecPlan_create(use_threads));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){
+	Rf_error("Cannot call ExecPlan_create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> ExecPlan_run(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<compute::ExecNode>& final_node);
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type final_node(final_node_sexp);
+	return cpp11::as_sexp(ExecPlan_run(plan, final_node));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+	Rf_error("Cannot call ExecPlan_run(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_DATASET)
+std::shared_ptr<compute::ExecNode> ExecNode_Scan(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<arrow::dataset::Dataset>& dataset, const std::shared_ptr<compute::Expression>& filter, std::vector<std::string> materialized_field_names);
+extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::dataset::Dataset>&>::type dataset(dataset_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type filter(filter_sexp);
+	arrow::r::Input<std::vector<std::string>>::type materialized_field_names(materialized_field_names_sexp);
+	return cpp11::as_sexp(ExecNode_Scan(plan, dataset, filter, materialized_field_names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
+	Rf_error("Cannot call ExecNode_Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_Filter(const std::shared_ptr<compute::ExecNode>& input, const std::shared_ptr<compute::Expression>& filter);
+extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type filter(filter_sexp);
+	return cpp11::as_sexp(ExecNode_Filter(input, filter));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
+	Rf_error("Cannot call ExecNode_Filter(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_Project(const std::shared_ptr<compute::ExecNode>& input, const std::vector<std::shared_ptr<compute::Expression>>& exprs, std::vector<std::string> names);
+extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<const std::vector<std::shared_ptr<compute::Expression>>&>::type exprs(exprs_sexp);
+	arrow::r::Input<std::vector<std::string>>::type names(names_sexp);
+	return cpp11::as_sexp(ExecNode_Project(input, exprs, names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){
+	Rf_error("Cannot call ExecNode_Project(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(const std::shared_ptr<compute::ExecNode>& input, cpp11::list options, std::vector<std::string> target_names, std::vector<std::string> out_field_names);
+extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<cpp11::list>::type options(options_sexp);
+	arrow::r::Input<std::vector<std::string>>::type target_names(target_names_sexp);
+	arrow::r::Input<std::vector<std::string>>::type out_field_names(out_field_names_sexp);
+	return cpp11::as_sexp(ExecNode_ScalarAggregate(input, options, target_names, out_field_names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){
+	Rf_error("Cannot call ExecNode_ScalarAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_GroupByAggregate(const std::shared_ptr<compute::ExecNode>& input, std::vector<std::string> group_vars, std::vector<std::string> agg_srcs, cpp11::list aggregations);
+extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<std::vector<std::string>>::type group_vars(group_vars_sexp);
+	arrow::r::Input<std::vector<std::string>>::type agg_srcs(agg_srcs_sexp);
+	arrow::r::Input<cpp11::list>::type aggregations(aggregations_sexp);
+	return cpp11::as_sexp(ExecNode_GroupByAggregate(input, group_vars, agg_srcs, aggregations));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){
+	Rf_error("Cannot call ExecNode_GroupByAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // compute.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::RecordBatch> RecordBatch__cast(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& schema, cpp11::list options);
@@ -1143,6 +1292,49 @@ extern "C" SEXP _arrow_compute__GetFunctionNames(){
 }
 #endif
 
+// config.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::vector<std::string> build_info();
+extern "C" SEXP _arrow_build_info(){
+BEGIN_CPP11
+	return cpp11::as_sexp(build_info());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_build_info(){
+	Rf_error("Cannot call build_info(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// config.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::vector<std::string> runtime_info();
+extern "C" SEXP _arrow_runtime_info(){
+BEGIN_CPP11
+	return cpp11::as_sexp(runtime_info());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_runtime_info(){
+	Rf_error("Cannot call runtime_info(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// csv.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::csv::WriteOptions> csv___WriteOptions__initialize(cpp11::list options);
+extern "C" SEXP _arrow_csv___WriteOptions__initialize(SEXP options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<cpp11::list>::type options(options_sexp);
+	return cpp11::as_sexp(csv___WriteOptions__initialize(options));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_csv___WriteOptions__initialize(SEXP options_sexp){
+	Rf_error("Cannot call csv___WriteOptions__initialize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // csv.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(cpp11::list options);
@@ -1295,6 +1487,42 @@ extern "C" SEXP _arrow_TimestampParser__MakeISO8601(){
 }
 #endif
 
+// csv.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void csv___WriteCSV__Table(const std::shared_ptr<arrow::Table>& table, const std::shared_ptr<arrow::csv::WriteOptions>& write_options, const std::shared_ptr<arrow::io::OutputStream>& stream);
+extern "C" SEXP _arrow_csv___WriteCSV__Table(SEXP table_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::csv::WriteOptions>&>::type write_options(write_options_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::io::OutputStream>&>::type stream(stream_sexp);
+	csv___WriteCSV__Table(table, write_options, stream);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_csv___WriteCSV__Table(SEXP table_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+	Rf_error("Cannot call csv___WriteCSV__Table(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// csv.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void csv___WriteCSV__RecordBatch(const std::shared_ptr<arrow::RecordBatch>& record_batch, const std::shared_ptr<arrow::csv::WriteOptions>& write_options, const std::shared_ptr<arrow::io::OutputStream>& stream);
+extern "C" SEXP _arrow_csv___WriteCSV__RecordBatch(SEXP record_batch_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type record_batch(record_batch_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::csv::WriteOptions>&>::type write_options(write_options_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::io::OutputStream>&>::type stream(stream_sexp);
+	csv___WriteCSV__RecordBatch(record_batch, write_options, stream);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_csv___WriteCSV__RecordBatch(SEXP record_batch_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+	Rf_error("Cannot call csv___WriteCSV__RecordBatch(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<ds::ScannerBuilder> dataset___Dataset__NewScan(const std::shared_ptr<ds::Dataset>& ds);
@@ -1696,6 +1924,23 @@ extern "C" SEXP _arrow_dataset___IpcFileWriteOptions__update1(SEXP ipc_options_s
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+void dataset___CsvFileWriteOptions__update(const std::shared_ptr<ds::CsvFileWriteOptions>& csv_options, const std::shared_ptr<arrow::csv::WriteOptions>& write_options);
+extern "C" SEXP _arrow_dataset___CsvFileWriteOptions__update(SEXP csv_options_sexp, SEXP write_options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::CsvFileWriteOptions>&>::type csv_options(csv_options_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::csv::WriteOptions>&>::type write_options(write_options_sexp);
+	dataset___CsvFileWriteOptions__update(csv_options, write_options);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___CsvFileWriteOptions__update(SEXP csv_options_sexp, SEXP write_options_sexp){
+	Rf_error("Cannot call dataset___CsvFileWriteOptions__update(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<ds::IpcFileFormat> dataset___IpcFileFormat__Make();
@@ -1777,61 +2022,65 @@ extern "C" SEXP _arrow_dataset___ParquetFragmentScanOptions__Make(SEXP use_buffe
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::DirectoryPartitioning> dataset___DirectoryPartitioning(const std::shared_ptr<arrow::Schema>& schm);
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp){
+std::shared_ptr<ds::DirectoryPartitioning> dataset___DirectoryPartitioning(const std::shared_ptr<arrow::Schema>& schm, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schm(schm_sexp);
-	return cpp11::as_sexp(dataset___DirectoryPartitioning(schm));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___DirectoryPartitioning(schm, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp){
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___DirectoryPartitioning(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::PartitioningFactory> dataset___DirectoryPartitioning__MakeFactory(const std::vector<std::string>& field_names);
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp){
+std::shared_ptr<ds::PartitioningFactory> dataset___DirectoryPartitioning__MakeFactory(const std::vector<std::string>& field_names, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::vector<std::string>&>::type field_names(field_names_sexp);
-	return cpp11::as_sexp(dataset___DirectoryPartitioning__MakeFactory(field_names));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___DirectoryPartitioning__MakeFactory(field_names, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp){
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___DirectoryPartitioning__MakeFactory(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::HivePartitioning> dataset___HivePartitioning(const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback);
-extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp){
+std::shared_ptr<ds::HivePartitioning> dataset___HivePartitioning(const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schm(schm_sexp);
 	arrow::r::Input<const std::string&>::type null_fallback(null_fallback_sexp);
-	return cpp11::as_sexp(dataset___HivePartitioning(schm, null_fallback));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___HivePartitioning(schm, null_fallback, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp){
+extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___HivePartitioning(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::PartitioningFactory> dataset___HivePartitioning__MakeFactory(const std::string& null_fallback);
-extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp){
+std::shared_ptr<ds::PartitioningFactory> dataset___HivePartitioning__MakeFactory(const std::string& null_fallback, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::string&>::type null_fallback(null_fallback_sexp);
-	return cpp11::as_sexp(dataset___HivePartitioning__MakeFactory(null_fallback));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___HivePartitioning__MakeFactory(null_fallback, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp){
+extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___HivePartitioning__MakeFactory(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -1855,11 +2104,11 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectNames(SEXP sb_sexp, SEXP
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-void dataset___ScannerBuilder__ProjectExprs(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::vector<std::shared_ptr<ds::Expression>>& exprs, const std::vector<std::string>& names);
+void dataset___ScannerBuilder__ProjectExprs(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::vector<std::shared_ptr<compute::Expression>>& exprs, const std::vector<std::string>& names);
 extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP exprs_sexp, SEXP names_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<ds::ScannerBuilder>&>::type sb(sb_sexp);
-	arrow::r::Input<const std::vector<std::shared_ptr<ds::Expression>>&>::type exprs(exprs_sexp);
+	arrow::r::Input<const std::vector<std::shared_ptr<compute::Expression>>&>::type exprs(exprs_sexp);
 	arrow::r::Input<const std::vector<std::string>&>::type names(names_sexp);
 	dataset___ScannerBuilder__ProjectExprs(sb, exprs, names);
 	return R_NilValue;
@@ -1873,11 +2122,11 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-void dataset___ScannerBuilder__Filter(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::shared_ptr<ds::Expression>& expr);
+void dataset___ScannerBuilder__Filter(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::shared_ptr<compute::Expression>& expr);
 extern "C" SEXP _arrow_dataset___ScannerBuilder__Filter(SEXP sb_sexp, SEXP expr_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<ds::ScannerBuilder>&>::type sb(sb_sexp);
-	arrow::r::Input<const std::shared_ptr<ds::Expression>&>::type expr(expr_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type expr(expr_sexp);
 	dataset___ScannerBuilder__Filter(sb, expr);
 	return R_NilValue;
 END_CPP11
@@ -1905,6 +2154,23 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__UseThreads(SEXP sb_sexp, SEXP t
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+void dataset___ScannerBuilder__UseAsync(const std::shared_ptr<ds::ScannerBuilder>& sb, bool use_async);
+extern "C" SEXP _arrow_dataset___ScannerBuilder__UseAsync(SEXP sb_sexp, SEXP use_async_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::ScannerBuilder>&>::type sb(sb_sexp);
+	arrow::r::Input<bool>::type use_async(use_async_sexp);
+	dataset___ScannerBuilder__UseAsync(sb, use_async);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___ScannerBuilder__UseAsync(SEXP sb_sexp, SEXP use_async_sexp){
+	Rf_error("Cannot call dataset___ScannerBuilder__UseAsync(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 void dataset___ScannerBuilder__BatchSize(const std::shared_ptr<ds::ScannerBuilder>& sb, int64_t batch_size);
@@ -1986,32 +2252,47 @@ extern "C" SEXP _arrow_dataset___Scanner__ToTable(SEXP scanner_sexp){
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<arrow::Table> dataset___Scanner__head(const std::shared_ptr<ds::Scanner>& scanner, int n);
-extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){
+cpp11::list dataset___Scanner__ScanBatches(const std::shared_ptr<ds::Scanner>& scanner);
+extern "C" SEXP _arrow_dataset___Scanner__ScanBatches(SEXP scanner_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
-	arrow::r::Input<int>::type n(n_sexp);
-	return cpp11::as_sexp(dataset___Scanner__head(scanner, n));
+	return cpp11::as_sexp(dataset___Scanner__ScanBatches(scanner));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){
-	Rf_error("Cannot call dataset___Scanner__head(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_dataset___Scanner__ScanBatches(SEXP scanner_sexp){
+	Rf_error("Cannot call dataset___Scanner__ScanBatches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+std::shared_ptr<arrow::RecordBatchReader> dataset___Scanner__ToRecordBatchReader(const std::shared_ptr<ds::Scanner>& scanner);
+extern "C" SEXP _arrow_dataset___Scanner__ToRecordBatchReader(SEXP scanner_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	return cpp11::as_sexp(dataset___Scanner__ToRecordBatchReader(scanner));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__ToRecordBatchReader(SEXP scanner_sexp){
+	Rf_error("Cannot call dataset___Scanner__ToRecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-cpp11::list dataset___Scanner__Scan(const std::shared_ptr<ds::Scanner>& scanner);
-extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){
+std::shared_ptr<arrow::Table> dataset___Scanner__head(const std::shared_ptr<ds::Scanner>& scanner, int n);
+extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
-	return cpp11::as_sexp(dataset___Scanner__Scan(scanner));
+	arrow::r::Input<int>::type n(n_sexp);
+	return cpp11::as_sexp(dataset___Scanner__head(scanner, n));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){
-	Rf_error("Cannot call dataset___Scanner__Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){
+	Rf_error("Cannot call dataset___Scanner__head(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -2066,6 +2347,37 @@ extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SE
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(const std::shared_ptr<ds::Scanner>& scanner, const std::shared_ptr<arrow::Array>& indices);
+extern "C" SEXP _arrow_dataset___Scanner__TakeRows(SEXP scanner_sexp, SEXP indices_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type indices(indices_sexp);
+	return cpp11::as_sexp(dataset___Scanner__TakeRows(scanner, indices));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__TakeRows(SEXP scanner_sexp, SEXP indices_sexp){
+	Rf_error("Cannot call dataset___Scanner__TakeRows(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+int64_t dataset___Scanner__CountRows(const std::shared_ptr<ds::Scanner>& scanner);
+extern "C" SEXP _arrow_dataset___Scanner__CountRows(SEXP scanner_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	return cpp11::as_sexp(dataset___Scanner__CountRows(scanner));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__CountRows(SEXP scanner_sexp){
+	Rf_error("Cannot call dataset___Scanner__CountRows(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // datatype.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::DataType> Int8__initialize();
@@ -2911,79 +3223,126 @@ extern "C" SEXP _arrow_FixedSizeListType__list_size(SEXP type_sexp){
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::Expression> dataset___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options);
-extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options);
+extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
 BEGIN_CPP11
 	arrow::r::Input<std::string>::type func_name(func_name_sexp);
 	arrow::r::Input<cpp11::list>::type argument_list(argument_list_sexp);
 	arrow::r::Input<cpp11::list>::type options(options_sexp);
-	return cpp11::as_sexp(dataset___expr__call(func_name, argument_list, options));
+	return cpp11::as_sexp(compute___expr__call(func_name, argument_list, options));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
-	Rf_error("Cannot call dataset___expr__call(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
+	Rf_error("Cannot call compute___expr__call(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::Expression> dataset___expr__field_ref(std::string name);
-extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::vector<std::string> field_names_in_expression(const std::shared_ptr<compute::Expression>& x);
+extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){
 BEGIN_CPP11
-	arrow::r::Input<std::string>::type name(name_sexp);
-	return cpp11::as_sexp(dataset___expr__field_ref(name));
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	return cpp11::as_sexp(field_names_in_expression(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){
-	Rf_error("Cannot call dataset___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){
+	Rf_error("Cannot call field_names_in_expression(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::string dataset___expr__get_field_ref_name(const std::shared_ptr<ds::Expression>& ref);
-extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::string compute___expr__get_field_ref_name(const std::shared_ptr<compute::Expression>& x);
+extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<ds::Expression>&>::type ref(ref_sexp);
-	return cpp11::as_sexp(dataset___expr__get_field_ref_name(ref));
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	return cpp11::as_sexp(compute___expr__get_field_ref_name(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){
-	Rf_error("Cannot call dataset___expr__get_field_ref_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){
+	Rf_error("Cannot call compute___expr__get_field_ref_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::Expression> dataset___expr__scalar(const std::shared_ptr<arrow::Scalar>& x);
-extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name);
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+BEGIN_CPP11
+	arrow::r::Input<std::string>::type name(name_sexp);
+	return cpp11::as_sexp(compute___expr__field_ref(name));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+	Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__scalar(const std::shared_ptr<arrow::Scalar>& x);
+extern "C" SEXP _arrow_compute___expr__scalar(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Scalar>&>::type x(x_sexp);
-	return cpp11::as_sexp(dataset___expr__scalar(x));
+	return cpp11::as_sexp(compute___expr__scalar(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){
-	Rf_error("Cannot call dataset___expr__scalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__scalar(SEXP x_sexp){
+	Rf_error("Cannot call compute___expr__scalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::string dataset___expr__ToString(const std::shared_ptr<ds::Expression>& x);
-extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::string compute___expr__ToString(const std::shared_ptr<compute::Expression>& x);
+extern "C" SEXP _arrow_compute___expr__ToString(SEXP x_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	return cpp11::as_sexp(compute___expr__ToString(x));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__ToString(SEXP x_sexp){
+	Rf_error("Cannot call compute___expr__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::DataType> compute___expr__type(const std::shared_ptr<compute::Expression>& x, const std::shared_ptr<arrow::Schema>& schema);
+extern "C" SEXP _arrow_compute___expr__type(SEXP x_sexp, SEXP schema_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schema(schema_sexp);
+	return cpp11::as_sexp(compute___expr__type(x, schema));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__type(SEXP x_sexp, SEXP schema_sexp){
+	Rf_error("Cannot call compute___expr__type(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::Type::type compute___expr__type_id(const std::shared_ptr<compute::Expression>& x, const std::shared_ptr<arrow::Schema>& schema);
+extern "C" SEXP _arrow_compute___expr__type_id(SEXP x_sexp, SEXP schema_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<ds::Expression>&>::type x(x_sexp);
-	return cpp11::as_sexp(dataset___expr__ToString(x));
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schema(schema_sexp);
+	return cpp11::as_sexp(compute___expr__type_id(x, schema));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){
-	Rf_error("Cannot call dataset___expr__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__type_id(SEXP x_sexp, SEXP schema_sexp){
+	Rf_error("Cannot call compute___expr__type_id(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -4115,16 +4474,32 @@ extern "C" SEXP _arrow_json___ReadOptions__initialize(SEXP use_threads_sexp, SEX
 
 // json.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(bool newlines_in_values);
-extern "C" SEXP _arrow_json___ParseOptions__initialize(SEXP newlines_in_values_sexp){
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize1(bool newlines_in_values);
+extern "C" SEXP _arrow_json___ParseOptions__initialize1(SEXP newlines_in_values_sexp){
+BEGIN_CPP11
+	arrow::r::Input<bool>::type newlines_in_values(newlines_in_values_sexp);
+	return cpp11::as_sexp(json___ParseOptions__initialize1(newlines_in_values));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_json___ParseOptions__initialize1(SEXP newlines_in_values_sexp){
+	Rf_error("Cannot call json___ParseOptions__initialize1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// json.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize2(bool newlines_in_values, const std::shared_ptr<arrow::Schema>& explicit_schema);
+extern "C" SEXP _arrow_json___ParseOptions__initialize2(SEXP newlines_in_values_sexp, SEXP explicit_schema_sexp){
 BEGIN_CPP11
 	arrow::r::Input<bool>::type newlines_in_values(newlines_in_values_sexp);
-	return cpp11::as_sexp(json___ParseOptions__initialize(newlines_in_values));
+	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type explicit_schema(explicit_schema_sexp);
+	return cpp11::as_sexp(json___ParseOptions__initialize2(newlines_in_values, explicit_schema));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_json___ParseOptions__initialize(SEXP newlines_in_values_sexp){
-	Rf_error("Cannot call json___ParseOptions__initialize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_json___ParseOptions__initialize2(SEXP newlines_in_values_sexp, SEXP explicit_schema_sexp){
+	Rf_error("Cannot call json___ParseOptions__initialize2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -4909,6 +5284,96 @@ extern "C" SEXP _arrow_parquet___arrow___FileReader__GetSchema(SEXP reader_sexp)
 }
 #endif
 
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema();
+extern "C" SEXP _arrow_allocate_arrow_schema(){
+BEGIN_CPP11
+	return cpp11::as_sexp(allocate_arrow_schema());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_allocate_arrow_schema(){
+	Rf_error("Cannot call allocate_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr);
+extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
+	delete_arrow_schema(ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
+	Rf_error("Cannot call delete_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::r::Pointer<struct ArrowArray> allocate_arrow_array();
+extern "C" SEXP _arrow_allocate_arrow_array(){
+BEGIN_CPP11
+	return cpp11::as_sexp(allocate_arrow_array());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_allocate_arrow_array(){
+	Rf_error("Cannot call allocate_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr);
+extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArray>>::type ptr(ptr_sexp);
+	delete_arrow_array(ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
+	Rf_error("Cannot call delete_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::r::Pointer<struct ArrowArrayStream> allocate_arrow_array_stream();
+extern "C" SEXP _arrow_allocate_arrow_array_stream(){
+BEGIN_CPP11
+	return cpp11::as_sexp(allocate_arrow_array_stream());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_allocate_arrow_array_stream(){
+	Rf_error("Cannot call allocate_arrow_array_stream(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void delete_arrow_array_stream(arrow::r::Pointer<struct ArrowArrayStream> ptr);
+extern "C" SEXP _arrow_delete_arrow_array_stream(SEXP ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArrayStream>>::type ptr(ptr_sexp);
+	delete_arrow_array_stream(ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_delete_arrow_array_stream(SEXP ptr_sexp){
+	Rf_error("Cannot call delete_arrow_array_stream(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Array> ImportArray(arrow::r::Pointer<struct ArrowArray> array, arrow::r::Pointer<struct ArrowSchema> schema);
@@ -4958,78 +5423,80 @@ extern "C" SEXP _arrow_ImportSchema(SEXP schema_sexp){
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema();
-extern "C" SEXP _arrow_allocate_arrow_schema(){
+std::shared_ptr<arrow::Field> ImportField(arrow::r::Pointer<struct ArrowSchema> field);
+extern "C" SEXP _arrow_ImportField(SEXP field_sexp){
 BEGIN_CPP11
-	return cpp11::as_sexp(allocate_arrow_schema());
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type field(field_sexp);
+	return cpp11::as_sexp(ImportField(field));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_allocate_arrow_schema(){
-	Rf_error("Cannot call allocate_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ImportField(SEXP field_sexp){
+	Rf_error("Cannot call ImportField(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr);
-extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
+std::shared_ptr<arrow::DataType> ImportType(arrow::r::Pointer<struct ArrowSchema> type);
+extern "C" SEXP _arrow_ImportType(SEXP type_sexp){
 BEGIN_CPP11
-	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
-	delete_arrow_schema(ptr);
-	return R_NilValue;
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type type(type_sexp);
+	return cpp11::as_sexp(ImportType(type));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
-	Rf_error("Cannot call delete_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ImportType(SEXP type_sexp){
+	Rf_error("Cannot call ImportType(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-arrow::r::Pointer<struct ArrowArray> allocate_arrow_array();
-extern "C" SEXP _arrow_allocate_arrow_array(){
+std::shared_ptr<arrow::RecordBatchReader> ImportRecordBatchReader(arrow::r::Pointer<struct ArrowArrayStream> stream);
+extern "C" SEXP _arrow_ImportRecordBatchReader(SEXP stream_sexp){
 BEGIN_CPP11
-	return cpp11::as_sexp(allocate_arrow_array());
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArrayStream>>::type stream(stream_sexp);
+	return cpp11::as_sexp(ImportRecordBatchReader(stream));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_allocate_arrow_array(){
-	Rf_error("Cannot call allocate_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ImportRecordBatchReader(SEXP stream_sexp){
+	Rf_error("Cannot call ImportRecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr);
-extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
+void ExportType(const std::shared_ptr<arrow::DataType>& type, arrow::r::Pointer<struct ArrowSchema> ptr);
+extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
 BEGIN_CPP11
-	arrow::r::Input<arrow::r::Pointer<struct ArrowArray>>::type ptr(ptr_sexp);
-	delete_arrow_array(ptr);
+	arrow::r::Input<const std::shared_ptr<arrow::DataType>&>::type type(type_sexp);
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
+	ExportType(type, ptr);
 	return R_NilValue;
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
-	Rf_error("Cannot call delete_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
+	Rf_error("Cannot call ExportType(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void ExportType(const std::shared_ptr<arrow::DataType>& type, arrow::r::Pointer<struct ArrowSchema> ptr);
-extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
+void ExportField(const std::shared_ptr<arrow::Field>& field, arrow::r::Pointer<struct ArrowSchema> ptr);
+extern "C" SEXP _arrow_ExportField(SEXP field_sexp, SEXP ptr_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::DataType>&>::type type(type_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
-	ExportType(type, ptr);
+	ExportField(field, ptr);
 	return R_NilValue;
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
-	Rf_error("Cannot call ExportType(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ExportField(SEXP field_sexp, SEXP ptr_sexp){
+	Rf_error("Cannot call ExportField(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -5086,6 +5553,40 @@ extern "C" SEXP _arrow_ExportRecordBatch(SEXP batch_sexp, SEXP array_ptr_sexp, S
 }
 #endif
 
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void ExportRecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader, arrow::r::Pointer<struct ArrowArrayStream> stream_ptr);
+extern "C" SEXP _arrow_ExportRecordBatchReader(SEXP reader_sexp, SEXP stream_ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArrayStream>>::type stream_ptr(stream_ptr_sexp);
+	ExportRecordBatchReader(reader, stream_ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExportRecordBatchReader(SEXP reader_sexp, SEXP stream_ptr_sexp){
+	Rf_error("Cannot call ExportRecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// r_to_arrow.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp, bool use_threads);
+extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp, SEXP use_threads_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type lst(lst_sexp);
+	arrow::r::Input<SEXP>::type schema_sxp(schema_sxp_sexp);
+	arrow::r::Input<bool>::type use_threads(use_threads_sexp);
+	return cpp11::as_sexp(Table__from_dots(lst, schema_sxp, use_threads));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp, SEXP use_threads_sexp){
+	Rf_error("Cannot call Table__from_dots(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // r_to_arrow.cpp
 #if defined(ARROW_R_WITH_ARROW)
 SEXP vec_to_arrow(SEXP x, SEXP s_type);
@@ -5245,11 +5746,11 @@ extern "C" SEXP _arrow_RecordBatch__GetColumnByName(SEXP batch_sexp, SEXP name_s
 
 // recordbatch.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::RecordBatch> RecordBatch__SelectColumns(const std::shared_ptr<arrow::RecordBatch>& batch, cpp11::integers indices);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__SelectColumns(const std::shared_ptr<arrow::RecordBatch>& batch, const std::vector<int>& indices);
 extern "C" SEXP _arrow_RecordBatch__SelectColumns(SEXP batch_sexp, SEXP indices_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<cpp11::integers>::type indices(indices_sexp);
+	arrow::r::Input<const std::vector<int>&>::type indices(indices_sexp);
 	return cpp11::as_sexp(RecordBatch__SelectColumns(batch, indices));
 END_CPP11
 }
@@ -5471,31 +5972,46 @@ extern "C" SEXP _arrow_RecordBatchReader__ReadNext(SEXP reader_sexp){
 
 // recordbatchreader.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::ipc::RecordBatchStreamReader> ipc___RecordBatchStreamReader__Open(const std::shared_ptr<arrow::io::InputStream>& stream);
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
+cpp11::list RecordBatchReader__batches(const std::shared_ptr<arrow::RecordBatchReader>& reader);
+extern "C" SEXP _arrow_RecordBatchReader__batches(SEXP reader_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::io::InputStream>&>::type stream(stream_sexp);
-	return cpp11::as_sexp(ipc___RecordBatchStreamReader__Open(stream));
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
+	return cpp11::as_sexp(RecordBatchReader__batches(reader));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
-	Rf_error("Cannot call ipc___RecordBatchStreamReader__Open(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_RecordBatchReader__batches(SEXP reader_sexp){
+	Rf_error("Cannot call RecordBatchReader__batches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // recordbatchreader.cpp
 #if defined(ARROW_R_WITH_ARROW)
-cpp11::list ipc___RecordBatchStreamReader__batches(const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>& reader);
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP reader_sexp){
+std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader);
+extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>&>::type reader(reader_sexp);
-	return cpp11::as_sexp(ipc___RecordBatchStreamReader__batches(reader));
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
+	return cpp11::as_sexp(Table__from_RecordBatchReader(reader));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
+	Rf_error("Cannot call Table__from_RecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// recordbatchreader.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::ipc::RecordBatchStreamReader> ipc___RecordBatchStreamReader__Open(const std::shared_ptr<arrow::io::InputStream>& stream);
+extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::io::InputStream>&>::type stream(stream_sexp);
+	return cpp11::as_sexp(ipc___RecordBatchStreamReader__Open(stream));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP reader_sexp){
-	Rf_error("Cannot call ipc___RecordBatchStreamReader__batches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
+	Rf_error("Cannot call ipc___RecordBatchStreamReader__Open(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -5560,21 +6076,6 @@ extern "C" SEXP _arrow_ipc___RecordBatchFileReader__Open(SEXP file_sexp){
 }
 #endif
 
-// recordbatchreader.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader);
-extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
-BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
-	return cpp11::as_sexp(Table__from_RecordBatchReader(reader));
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
-	Rf_error("Cannot call Table__from_RecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
 // recordbatchreader.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Table> Table__from_RecordBatchFileReader(const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& reader);
@@ -5691,20 +6192,6 @@ extern "C" SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP stream_sexp, SEX
 }
 #endif
 
-// runtimeinfo.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::vector<std::string> runtime_info();
-extern "C" SEXP _arrow_runtime_info(){
-BEGIN_CPP11
-	return cpp11::as_sexp(runtime_info());
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_runtime_info(){
-	Rf_error("Cannot call runtime_info(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
 // scalar.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Scalar> Array__GetScalar(const std::shared_ptr<arrow::Array>& x, int64_t i);
@@ -5785,15 +6272,16 @@ extern "C" SEXP _arrow_Scalar__as_vector(SEXP scalar_sexp){
 
 // scalar.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Array> MakeArrayFromScalar(const std::shared_ptr<arrow::Scalar>& scalar);
-extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp){
+std::shared_ptr<arrow::Array> MakeArrayFromScalar(const std::shared_ptr<arrow::Scalar>& scalar, int n);
+extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp, SEXP n_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Scalar>&>::type scalar(scalar_sexp);
-	return cpp11::as_sexp(MakeArrayFromScalar(scalar));
+	arrow::r::Input<int>::type n(n_sexp);
+	return cpp11::as_sexp(MakeArrayFromScalar(scalar, n));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp){
+extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp, SEXP n_sexp){
 	Rf_error("Cannot call MakeArrayFromScalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -6444,22 +6932,6 @@ extern "C" SEXP _arrow_Table__from_record_batches(SEXP batches_sexp, SEXP schema
 }
 #endif
 
-// table.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp);
-extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp){
-BEGIN_CPP11
-	arrow::r::Input<SEXP>::type lst(lst_sexp);
-	arrow::r::Input<SEXP>::type schema_sxp(schema_sxp_sexp);
-	return cpp11::as_sexp(Table__from_dots(lst, schema_sxp));
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp){
-	Rf_error("Cannot call Table__from_dots(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
 // threadpool.cpp
 #if defined(ARROW_R_WITH_ARROW)
 int GetCpuThreadPoolCapacity();
@@ -6490,6 +6962,36 @@ extern "C" SEXP _arrow_SetCpuThreadPoolCapacity(SEXP threads_sexp){
 }
 #endif
 
+// threadpool.cpp
+#if defined(ARROW_R_WITH_ARROW)
+int GetIOThreadPoolCapacity();
+extern "C" SEXP _arrow_GetIOThreadPoolCapacity(){
+BEGIN_CPP11
+	return cpp11::as_sexp(GetIOThreadPoolCapacity());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_GetIOThreadPoolCapacity(){
+	Rf_error("Cannot call GetIOThreadPoolCapacity(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// threadpool.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void SetIOThreadPoolCapacity(int threads);
+extern "C" SEXP _arrow_SetIOThreadPoolCapacity(SEXP threads_sexp){
+BEGIN_CPP11
+	arrow::r::Input<int>::type threads(threads_sexp);
+	SetIOThreadPoolCapacity(threads);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_SetIOThreadPoolCapacity(SEXP threads_sexp){
+	Rf_error("Cannot call SetIOThreadPoolCapacity(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // type_infer.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::DataType> Array__infer_type(SEXP x);
@@ -6572,6 +7074,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_dataset_available", (DL_FUNC)& _dataset_available, 0 },
 		{ "_parquet_available", (DL_FUNC)& _parquet_available, 0 },
 		{ "_s3_available", (DL_FUNC)& _s3_available, 0 },
+		{ "_arrow_is_altrep_int_nonull", (DL_FUNC) &_arrow_is_altrep_int_nonull, 1}, 
+		{ "_arrow_is_altrep_dbl_nonull", (DL_FUNC) &_arrow_is_altrep_dbl_nonull, 1}, 
 		{ "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, 
 		{ "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, 
 		{ "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, 
@@ -6607,7 +7111,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, 
 		{ "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, 
 		{ "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, 
-		{ "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, 
+		{ "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 2}, 
 		{ "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, 
 		{ "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, 
 		{ "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, 
@@ -6640,11 +7144,21 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, 
 		{ "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, 
 		{ "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, 
+		{ "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, 
+		{ "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 2}, 
+		{ "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, 
+		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
+		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
+		{ "_arrow_ExecNode_ScalarAggregate", (DL_FUNC) &_arrow_ExecNode_ScalarAggregate, 4}, 
+		{ "_arrow_ExecNode_GroupByAggregate", (DL_FUNC) &_arrow_ExecNode_GroupByAggregate, 4}, 
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
 		{ "_arrow_compute__GroupBy", (DL_FUNC) &_arrow_compute__GroupBy, 3}, 
 		{ "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, 
+		{ "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, 
+		{ "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, 
+		{ "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, 
 		{ "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, 
 		{ "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, 
 		{ "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, 
@@ -6655,6 +7169,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, 
 		{ "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, 
 		{ "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, 
+		{ "_arrow_csv___WriteCSV__Table", (DL_FUNC) &_arrow_csv___WriteCSV__Table, 3}, 
+		{ "_arrow_csv___WriteCSV__RecordBatch", (DL_FUNC) &_arrow_csv___WriteCSV__RecordBatch, 3}, 
 		{ "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, 
 		{ "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, 
 		{ "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, 
@@ -6680,29 +7196,34 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, 
 		{ "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, 
 		{ "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, 
+		{ "_arrow_dataset___CsvFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___CsvFileWriteOptions__update, 2}, 
 		{ "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, 
 		{ "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 3}, 
 		{ "_arrow_dataset___FragmentScanOptions__type_name", (DL_FUNC) &_arrow_dataset___FragmentScanOptions__type_name, 1}, 
 		{ "_arrow_dataset___CsvFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___CsvFragmentScanOptions__Make, 2}, 
 		{ "_arrow_dataset___ParquetFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___ParquetFragmentScanOptions__Make, 3}, 
-		{ "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 1}, 
-		{ "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 1}, 
-		{ "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 2}, 
-		{ "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 1}, 
+		{ "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 2}, 
+		{ "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 2}, 
+		{ "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 3}, 
+		{ "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__ProjectNames", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectNames, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__ProjectExprs", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectExprs, 3}, 
 		{ "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__UseAsync", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseAsync, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__FragmentScanOptions", (DL_FUNC) &_arrow_dataset___ScannerBuilder__FragmentScanOptions, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, 
 		{ "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, 
 		{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, 
+		{ "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1}, 
+		{ "_arrow_dataset___Scanner__ToRecordBatchReader", (DL_FUNC) &_arrow_dataset___Scanner__ToRecordBatchReader, 1}, 
 		{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, 
-		{ "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, 
 		{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, 
 		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, 
 		{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, 
+		{ "_arrow_dataset___Scanner__TakeRows", (DL_FUNC) &_arrow_dataset___Scanner__TakeRows, 2}, 
+		{ "_arrow_dataset___Scanner__CountRows", (DL_FUNC) &_arrow_dataset___Scanner__CountRows, 1}, 
 		{ "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, 
 		{ "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, 
 		{ "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, 
@@ -6760,11 +7281,14 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, 
 		{ "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, 
 		{ "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, 
-		{ "_arrow_dataset___expr__call", (DL_FUNC) &_arrow_dataset___expr__call, 3}, 
-		{ "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, 
-		{ "_arrow_dataset___expr__get_field_ref_name", (DL_FUNC) &_arrow_dataset___expr__get_field_ref_name, 1}, 
-		{ "_arrow_dataset___expr__scalar", (DL_FUNC) &_arrow_dataset___expr__scalar, 1}, 
-		{ "_arrow_dataset___expr__ToString", (DL_FUNC) &_arrow_dataset___expr__ToString, 1}, 
+		{ "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, 
+		{ "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, 
+		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, 
+		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, 
+		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, 
+		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, 
+		{ "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, 
+		{ "_arrow_compute___expr__type_id", (DL_FUNC) &_arrow_compute___expr__type_id, 2}, 
 		{ "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, 
 		{ "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, 
 		{ "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, 
@@ -6835,7 +7359,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, 
 		{ "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, 
 		{ "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, 
-		{ "_arrow_json___ParseOptions__initialize", (DL_FUNC) &_arrow_json___ParseOptions__initialize, 1}, 
+		{ "_arrow_json___ParseOptions__initialize1", (DL_FUNC) &_arrow_json___ParseOptions__initialize1, 1}, 
+		{ "_arrow_json___ParseOptions__initialize2", (DL_FUNC) &_arrow_json___ParseOptions__initialize2, 2}, 
 		{ "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, 
 		{ "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, 
 		{ "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, 
@@ -6885,17 +7410,25 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, 
 		{ "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, 
 		{ "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, 
-		{ "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, 
-		{ "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, 
-		{ "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, 
 		{ "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, 
 		{ "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, 
 		{ "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, 
 		{ "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, 
+		{ "_arrow_allocate_arrow_array_stream", (DL_FUNC) &_arrow_allocate_arrow_array_stream, 0}, 
+		{ "_arrow_delete_arrow_array_stream", (DL_FUNC) &_arrow_delete_arrow_array_stream, 1}, 
+		{ "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, 
+		{ "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, 
+		{ "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, 
+		{ "_arrow_ImportField", (DL_FUNC) &_arrow_ImportField, 1}, 
+		{ "_arrow_ImportType", (DL_FUNC) &_arrow_ImportType, 1}, 
+		{ "_arrow_ImportRecordBatchReader", (DL_FUNC) &_arrow_ImportRecordBatchReader, 1}, 
 		{ "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, 
+		{ "_arrow_ExportField", (DL_FUNC) &_arrow_ExportField, 2}, 
 		{ "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, 
 		{ "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, 
 		{ "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, 
+		{ "_arrow_ExportRecordBatchReader", (DL_FUNC) &_arrow_ExportRecordBatchReader, 2}, 
+		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 3}, 
 		{ "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, 
 		{ "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, 
 		{ "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, 
@@ -6920,13 +7453,13 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, 
 		{ "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, 
 		{ "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, 
+		{ "_arrow_RecordBatchReader__batches", (DL_FUNC) &_arrow_RecordBatchReader__batches, 1}, 
+		{ "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, 
 		{ "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, 
-		{ "_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, 
 		{ "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, 
-		{ "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, 
 		{ "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, 
 		{ "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, 
@@ -6934,13 +7467,12 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, 
 		{ "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, 
 		{ "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, 
-		{ "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, 
 		{ "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, 
 		{ "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, 
 		{ "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, 
 		{ "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, 
 		{ "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, 
-		{ "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 1}, 
+		{ "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 2}, 
 		{ "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, 
 		{ "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, 
 		{ "_arrow_Scalar__Equals", (DL_FUNC) &_arrow_Scalar__Equals, 2}, 
@@ -6982,9 +7514,10 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, 
 		{ "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, 
 		{ "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, 
-		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, 
 		{ "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, 
 		{ "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, 
+		{ "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0}, 
+		{ "_arrow_SetIOThreadPoolCapacity", (DL_FUNC) &_arrow_SetIOThreadPoolCapacity, 1}, 
 		{ "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, 
 		{ "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, 
 		{ "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, 
@@ -6993,6 +7526,11 @@ static const R_CallMethodDef CallEntries[] = {
 extern "C" void R_init_arrow(DllInfo* dll){
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);
+
+  #if defined(ARROW_R_WITH_ARROW) && defined(HAS_ALTREP)
+  arrow::r::Init_Altrep_classes(dll);
+  #endif
+
 }
 
 
diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index 1d0e26e1a38..f8dc2bc7322 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -141,7 +141,6 @@ struct data {
   static SEXP classes_ordered;
 
   static SEXP names_metadata;
-  static SEXP empty_raw;
 };
 
 struct ns {
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index b94ab764729..4ecb99174b5 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -47,6 +47,15 @@
 #include <arrow/type_fwd.h>
 #include <arrow/util/type_fwd.h>
 
+namespace arrow {
+namespace compute {
+
+class ExecPlan;
+class ExecNode;
+
+}  // namespace compute
+}  // namespace arrow
+
 #if defined(ARROW_R_WITH_PARQUET)
 #include <parquet/type_fwd.h>
 #endif
@@ -55,12 +64,12 @@
 namespace ds = ::arrow::dataset;
 #endif
 
+namespace compute = ::arrow::compute;
 namespace fs = ::arrow::fs;
 
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
-SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
 std::shared_ptr<arrow::RecordBatch> RecordBatch__from_arrays(SEXP, SEXP);
 arrow::MemoryPool* gc_memory_pool();
+arrow::compute::ExecContext* gc_context();
 
 #if (R_VERSION < R_Version(3, 5, 0))
 #define LOGICAL_RO(x) ((const int*)LOGICAL(x))
@@ -78,8 +87,10 @@ arrow::MemoryPool* gc_memory_pool();
 namespace arrow {
 
 static inline void StopIfNotOk(const Status& status) {
-  if (!(status.ok())) {
-    cpp11::stop(status.ToString());
+  if (!status.ok()) {
+    // ARROW-13039: be careful not to interpret our error message as a %-format string
+    std::string s = status.ToString();
+    cpp11::stop("%s", s.c_str());
   }
 }
 
@@ -148,12 +159,26 @@ void TraverseDots(cpp11::list dots, int num_fields, Lambda lambda) {
   }
 }
 
+inline cpp11::writable::list FlattenDots(cpp11::list dots, int num_fields) {
+  std::vector<SEXP> out(num_fields);
+  auto set = [&](int j, SEXP x, cpp11::r_string) { out[j] = x; };
+  TraverseDots(dots, num_fields, set);
+
+  return cpp11::writable::list(out.begin(), out.end());
+}
+
 arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields,
                                   std::shared_ptr<arrow::Schema>& schema);
 
 arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
                                   std::shared_ptr<arrow::Schema>& schema);
 
+#if defined(HAS_ALTREP)
+void Init_Altrep_classes(DllInfo* dll);
+SEXP MakeInt32ArrayNoNull(const std::shared_ptr<Array>& array);
+SEXP MakeDoubleArrayNoNull(const std::shared_ptr<Array>& array);
+#endif
+
 }  // namespace r
 }  // namespace arrow
 
@@ -178,6 +203,7 @@ R6_CLASS_NAME(arrow::csv::ReadOptions, "CsvReadOptions");
 R6_CLASS_NAME(arrow::csv::ParseOptions, "CsvParseOptions");
 R6_CLASS_NAME(arrow::csv::ConvertOptions, "CsvConvertOptions");
 R6_CLASS_NAME(arrow::csv::TableReader, "CsvTableReader");
+R6_CLASS_NAME(arrow::csv::WriteOptions, "CsvWriteOptions");
 
 #if defined(ARROW_R_WITH_PARQUET)
 R6_CLASS_NAME(parquet::ArrowReaderProperties, "ParquetArrowReaderProperties");
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
new file mode 100644
index 00000000000..61a79bf462e
--- /dev/null
+++ b/r/src/compute-exec.cpp
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./arrow_types.h"
+
+#if defined(ARROW_R_WITH_ARROW)
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/exec_plan.h>
+#include <arrow/compute/exec/expression.h>
+#include <arrow/table.h>
+#include <arrow/util/future.h>
+#include <arrow/util/thread_pool.h>
+
+#include <iostream>
+
+namespace compute = ::arrow::compute;
+
+std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
+                                                               cpp11::list options);
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecPlan> ExecPlan_create(bool use_threads) {
+  static compute::ExecContext threaded_context{gc_memory_pool(),
+                                               arrow::internal::GetCpuThreadPool()};
+  auto plan = ValueOrStop(
+      compute::ExecPlan::Make(use_threads ? &threaded_context : gc_context()));
+  return plan;
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> ExecPlan_run(
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    const std::shared_ptr<compute::ExecNode>& final_node) {
+  // For now, don't require R to construct SinkNodes.
+  // Instead, just pass the node we should collect as an argument.
+  auto sink_gen = compute::MakeSinkNode(final_node.get(), "sink");
+
+  StopIfNotOk(plan->Validate());
+  StopIfNotOk(plan->StartProducing());
+
+  std::shared_ptr<arrow::RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      final_node->output_schema(), std::move(sink_gen), gc_memory_pool());
+
+  plan->finished().Wait();
+  return ValueOrStop(arrow::Table::FromRecordBatchReader(sink_reader.get()));
+}
+
+std::shared_ptr<compute::ExecNode> ExecNodeOrStop(
+    arrow::Result<compute::ExecNode*> maybe_node) {
+  return std::shared_ptr<compute::ExecNode>(ValueOrStop(maybe_node), [](...) {
+    // empty destructor: ExecNode lifetime is managed by an ExecPlan
+  });
+}
+
+#if defined(ARROW_R_WITH_DATASET)
+
+#include <arrow/dataset/scanner.h>
+
+// [[dataset::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_Scan(
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    const std::shared_ptr<arrow::dataset::Dataset>& dataset,
+    const std::shared_ptr<compute::Expression>& filter,
+    std::vector<std::string> materialized_field_names) {
+  // TODO: pass in FragmentScanOptions
+  auto options = std::make_shared<arrow::dataset::ScanOptions>();
+
+  options->use_async = true;
+
+  options->dataset_schema = dataset->schema();
+
+  // ScanNode needs the filter to do predicate pushdown and skip partitions
+  options->filter = ValueOrStop(filter->Bind(*dataset->schema()));
+
+  // ScanNode needs to know which fields to materialize (and which are unnecessary)
+  std::vector<compute::Expression> exprs;
+  for (const auto& name : materialized_field_names) {
+    exprs.push_back(compute::field_ref(name));
+  }
+
+  options->projection =
+      ValueOrStop(call("make_struct", std::move(exprs),
+                       compute::MakeStructOptions{std::move(materialized_field_names)})
+                      .Bind(*dataset->schema()));
+
+  return ExecNodeOrStop(arrow::dataset::MakeScanNode(plan.get(), dataset, options));
+}
+
+#endif
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_Filter(
+    const std::shared_ptr<compute::ExecNode>& input,
+    const std::shared_ptr<compute::Expression>& filter) {
+  return ExecNodeOrStop(
+      compute::MakeFilterNode(input.get(), /*label=*/"filter", *filter));
+}
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_Project(
+    const std::shared_ptr<compute::ExecNode>& input,
+    const std::vector<std::shared_ptr<compute::Expression>>& exprs,
+    std::vector<std::string> names) {
+  // We have shared_ptrs of expressions but need the Expressions
+  std::vector<compute::Expression> expressions;
+  for (auto expr : exprs) {
+    expressions.push_back(*expr);
+  }
+  return ExecNodeOrStop(compute::MakeProjectNode(
+      input.get(), /*label=*/"project", std::move(expressions), std::move(names)));
+}
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
+    const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
+    std::vector<std::string> target_names, std::vector<std::string> out_field_names) {
+  std::vector<arrow::compute::internal::Aggregate> aggregates;
+  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
+
+  for (cpp11::list name_opts : options) {
+    auto name = cpp11::as_cpp<std::string>(name_opts[0]);
+    auto opts = make_compute_options(name, name_opts[1]);
+
+    aggregates.push_back(
+        arrow::compute::internal::Aggregate{std::move(name), opts.get()});
+    keep_alives.push_back(std::move(opts));
+  }
+
+  std::vector<arrow::FieldRef> targets;
+  for (auto&& name : target_names) {
+    targets.emplace_back(std::move(name));
+  }
+  return ExecNodeOrStop(compute::MakeScalarAggregateNode(
+      input.get(), /*label=*/"scalar_agg", std::move(aggregates), std::move(targets),
+      std::move(out_field_names)));
+}
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_GroupByAggregate(
+    const std::shared_ptr<compute::ExecNode>& input, std::vector<std::string> group_vars,
+    std::vector<std::string> agg_srcs, cpp11::list aggregations) {
+  std::vector<arrow::compute::internal::Aggregate> aggs;
+  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
+
+  for (cpp11::list name_opts : aggregations) {
+    auto name = cpp11::as_cpp<std::string>(name_opts[0]);
+    auto opts = make_compute_options(name, name_opts[1]);
+
+    aggs.push_back(arrow::compute::internal::Aggregate{std::move(name), opts.get()});
+    keep_alives.push_back(std::move(opts));
+  }
+
+  return ExecNodeOrStop(compute::MakeGroupByNode(input.get(), /*label=*/"group_agg",
+                                                 /*keys=*/std::move(group_vars),
+                                                 std::move(agg_srcs), std::move(aggs)));
+}
+
+// Result<ExecNode*> MakeGroupByNode(ExecNode* input, std::string label,
+//                                   std::vector<std::string> keys,
+//                                   std::vector<std::string> agg_srcs,
+//                                   std::vector<internal::Aggregate> aggs);
+#endif
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 34bc3bea456..30821137383 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -171,14 +171,24 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return out;
   }
 
-  if (func_name == "min_max") {
-    using Options = arrow::compute::MinMaxOptions;
+  if (func_name == "min_max" || func_name == "sum" || func_name == "mean" ||
+      func_name == "count" || func_name == "any" || func_name == "all") {
+    using Options = arrow::compute::ScalarAggregateOptions;
     auto out = std::make_shared<Options>(Options::Defaults());
-    out->null_handling =
-        cpp11::as_cpp<bool>(options["na.rm"]) ? Options::SKIP : Options::EMIT_NULL;
+    out->min_count = cpp11::as_cpp<int>(options["na.min_count"]);
+    out->skip_nulls = cpp11::as_cpp<bool>(options["na.rm"]);
     return out;
   }
 
+  if (func_name == "min_element_wise" || func_name == "max_element_wise") {
+    using Options = arrow::compute::ElementWiseAggregateOptions;
+    bool skip_nulls = true;
+    if (!Rf_isNull(options["skip_nulls"])) {
+      skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
+    }
+    return std::make_shared<Options>(skip_nulls);
+  }
+
   if (func_name == "quantile") {
     using Options = arrow::compute::QuantileOptions;
     auto out = std::make_shared<Options>(Options::Defaults());
@@ -217,9 +227,37 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return make_cast_options(options);
   }
 
-  if (func_name == "match_substring" || func_name == "match_substring_regex") {
+  if (func_name == "binary_join_element_wise") {
+    using Options = arrow::compute::JoinOptions;
+    auto out = std::make_shared<Options>(Options::Defaults());
+    if (!Rf_isNull(options["null_handling"])) {
+      out->null_handling =
+          cpp11::as_cpp<enum arrow::compute::JoinOptions::NullHandlingBehavior>(
+              options["null_handling"]);
+    }
+    if (!Rf_isNull(options["null_replacement"])) {
+      out->null_replacement = cpp11::as_cpp<std::string>(options["null_replacement"]);
+    }
+    return out;
+  }
+
+  if (func_name == "make_struct") {
+    using Options = arrow::compute::MakeStructOptions;
+    // TODO (ARROW-13371): accept `field_nullability` and `field_metadata` options
+    return std::make_shared<Options>(
+        cpp11::as_cpp<std::vector<std::string>>(options["field_names"]));
+  }
+
+  if (func_name == "match_substring" || func_name == "match_substring_regex" ||
+      func_name == "find_substring" || func_name == "find_substring_regex" ||
+      func_name == "match_like") {
     using Options = arrow::compute::MatchSubstringOptions;
-    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]));
+    bool ignore_case = false;
+    if (!Rf_isNull(options["ignore_case"])) {
+      ignore_case = cpp11::as_cpp<bool>(options["ignore_case"]);
+    }
+    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]),
+                                     ignore_case);
   }
 
   if (func_name == "replace_substring" || func_name == "replace_substring_regex") {
@@ -233,6 +271,80 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      max_replacements);
   }
 
+  if (func_name == "day_of_week") {
+    using Options = arrow::compute::DayOfWeekOptions;
+    bool one_based_numbering = true;
+    if (!Rf_isNull(options["one_based_numbering"])) {
+      one_based_numbering = cpp11::as_cpp<bool>(options["one_based_numbering"]);
+    }
+    return std::make_shared<Options>(one_based_numbering,
+                                     cpp11::as_cpp<uint32_t>(options["week_start"]));
+  }
+
+  if (func_name == "strptime") {
+    using Options = arrow::compute::StrptimeOptions;
+    return std::make_shared<Options>(
+        cpp11::as_cpp<std::string>(options["format"]),
+        cpp11::as_cpp<arrow::TimeUnit::type>(options["unit"]));
+  }
+
+  if (func_name == "split_pattern" || func_name == "split_pattern_regex") {
+    using Options = arrow::compute::SplitPatternOptions;
+    int64_t max_splits = -1;
+    if (!Rf_isNull(options["max_splits"])) {
+      max_splits = cpp11::as_cpp<int64_t>(options["max_splits"]);
+    }
+    bool reverse = false;
+    if (!Rf_isNull(options["reverse"])) {
+      reverse = cpp11::as_cpp<bool>(options["reverse"]);
+    }
+    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]),
+                                     max_splits, reverse);
+  }
+
+  if (func_name == "utf8_lpad" || func_name == "utf8_rpad" ||
+      func_name == "utf8_center" || func_name == "ascii_lpad" ||
+      func_name == "ascii_rpad" || func_name == "ascii_center") {
+    using Options = arrow::compute::PadOptions;
+    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["width"]),
+                                     cpp11::as_cpp<std::string>(options["padding"]));
+  }
+
+  if (func_name == "utf8_split_whitespace" || func_name == "ascii_split_whitespace") {
+    using Options = arrow::compute::SplitOptions;
+    int64_t max_splits = -1;
+    if (!Rf_isNull(options["max_splits"])) {
+      max_splits = cpp11::as_cpp<int64_t>(options["max_splits"]);
+    }
+    bool reverse = false;
+    if (!Rf_isNull(options["reverse"])) {
+      reverse = cpp11::as_cpp<bool>(options["reverse"]);
+    }
+    return std::make_shared<Options>(max_splits, reverse);
+  }
+
+  if (func_name == "utf8_slice_codeunits") {
+    using Options = arrow::compute::SliceOptions;
+
+    int64_t step = 1;
+    if (!Rf_isNull(options["step"])) {
+      step = cpp11::as_cpp<int64_t>(options["step"]);
+    }
+
+    int64_t stop = std::numeric_limits<int32_t>::max();
+    if (!Rf_isNull(options["stop"])) {
+      stop = cpp11::as_cpp<int64_t>(options["stop"]);
+    }
+
+    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["start"]), stop,
+                                     step);
+  }
+
+  if (func_name == "variance" || func_name == "stddev") {
+    using Options = arrow::compute::VarianceOptions;
+    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["ddof"]));
+  }
+
   return nullptr;
 }
 
diff --git a/r/src/config.cpp b/r/src/config.cpp
new file mode 100644
index 00000000000..497843573bb
--- /dev/null
+++ b/r/src/config.cpp
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./arrow_types.h"
+
+#if defined(ARROW_R_WITH_ARROW)
+
+#include <arrow/config.h>
+
+// [[arrow::export]]
+std::vector<std::string> build_info() {
+  auto info = arrow::GetBuildInfo();
+  return {info.version_string, info.compiler_id, info.compiler_version,
+          info.compiler_flags, info.git_id};
+}
+
+// [[arrow::export]]
+std::vector<std::string> runtime_info() {
+  auto info = arrow::GetRuntimeInfo();
+  return {info.simd_level, info.detected_simd_level};
+}
+
+#endif
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
index 0ce4cd699f8..93d07d82ed4 100644
--- a/r/src/csv.cpp
+++ b/r/src/csv.cpp
@@ -20,8 +20,21 @@
 #if defined(ARROW_R_WITH_ARROW)
 
 #include <arrow/csv/reader.h>
+#include <arrow/csv/writer.h>
+#include <arrow/memory_pool.h>
 #include <arrow/util/value_parsing.h>
 
+// [[arrow::export]]
+std::shared_ptr<arrow::csv::WriteOptions> csv___WriteOptions__initialize(
+    cpp11::list options) {
+  auto res =
+      std::make_shared<arrow::csv::WriteOptions>(arrow::csv::WriteOptions::Defaults());
+  res->include_header = cpp11::as_cpp<bool>(options["include_header"]);
+  res->batch_size = cpp11::as_cpp<int>(options["batch_size"]);
+  res->io_context = arrow::io::IOContext(gc_memory_pool());
+  return res;
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(
     cpp11::list options) {
@@ -99,7 +112,7 @@ std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(
   if (!Rf_isNull(op_timestamp_parsers)) {
     std::vector<std::shared_ptr<arrow::TimestampParser>> timestamp_parsers;
 
-    // if we have a character vector, convert to arrow::TimestampParser
+    // if we have a character vector, convert to arrow::StrptimeTimestampParser
     if (TYPEOF(op_timestamp_parsers) == STRSXP) {
       cpp11::strings s_timestamp_parsers(op_timestamp_parsers);
       for (cpp11::r_string s : s_timestamp_parsers) {
@@ -174,4 +187,19 @@ std::shared_ptr<arrow::TimestampParser> TimestampParser__MakeISO8601() {
   return arrow::TimestampParser::MakeISO8601();
 }
 
+// [[arrow::export]]
+void csv___WriteCSV__Table(const std::shared_ptr<arrow::Table>& table,
+                           const std::shared_ptr<arrow::csv::WriteOptions>& write_options,
+                           const std::shared_ptr<arrow::io::OutputStream>& stream) {
+  StopIfNotOk(arrow::csv::WriteCSV(*table, *write_options, stream.get()));
+}
+
+// [[arrow::export]]
+void csv___WriteCSV__RecordBatch(
+    const std::shared_ptr<arrow::RecordBatch>& record_batch,
+    const std::shared_ptr<arrow::csv::WriteOptions>& write_options,
+    const std::shared_ptr<arrow::io::OutputStream>& stream) {
+  StopIfNotOk(arrow::csv::WriteCSV(*record_batch, *write_options, stream.get()));
+}
+
 #endif
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index c8fdb7ae311..b92e3ad4276 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -19,6 +19,8 @@
 
 #if defined(ARROW_R_WITH_DATASET)
 
+#include <arrow/array.h>
+#include <arrow/compute/api.h>
 #include <arrow/dataset/api.h>
 #include <arrow/filesystem/filesystem.h>
 #include <arrow/ipc/writer.h>
@@ -29,6 +31,7 @@
 
 namespace ds = ::arrow::dataset;
 namespace fs = ::arrow::fs;
+namespace compute = ::arrow::compute;
 
 namespace cpp11 {
 
@@ -67,9 +70,9 @@ const char* r6_class_name<ds::FileFormat>::get(
 // [[dataset::export]]
 std::shared_ptr<ds::ScannerBuilder> dataset___Dataset__NewScan(
     const std::shared_ptr<ds::Dataset>& ds) {
-  auto options = std::make_shared<ds::ScanOptions>();
-  options->pool = gc_memory_pool();
-  return ValueOrStop(ds->NewScan(std::move(options)));
+  auto builder = ValueOrStop(ds->NewScan());
+  StopIfNotOk(builder->Pool(gc_memory_pool()));
+  return builder;
 }
 
 // [[dataset::export]]
@@ -276,6 +279,13 @@ void dataset___IpcFileWriteOptions__update1(
   ipc_options->options->metadata_version = metadata_version;
 }
 
+// [[dataset::export]]
+void dataset___CsvFileWriteOptions__update(
+    const std::shared_ptr<ds::CsvFileWriteOptions>& csv_options,
+    const std::shared_ptr<arrow::csv::WriteOptions>& write_options) {
+  *csv_options->write_options = *write_options;
+}
+
 // [[dataset::export]]
 std::shared_ptr<ds::IpcFileFormat> dataset___IpcFileFormat__Make() {
   return std::make_shared<ds::IpcFileFormat>();
@@ -330,30 +340,50 @@ dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buf
 
 // DirectoryPartitioning, HivePartitioning
 
+ds::SegmentEncoding GetSegmentEncoding(const std::string& segment_encoding) {
+  if (segment_encoding == "none") {
+    return ds::SegmentEncoding::None;
+  } else if (segment_encoding == "uri") {
+    return ds::SegmentEncoding::Uri;
+  }
+  cpp11::stop("invalid segment encoding: " + segment_encoding);
+  return ds::SegmentEncoding::None;
+}
+
 // [[dataset::export]]
 std::shared_ptr<ds::DirectoryPartitioning> dataset___DirectoryPartitioning(
-    const std::shared_ptr<arrow::Schema>& schm) {
-  return std::make_shared<ds::DirectoryPartitioning>(schm);
+    const std::shared_ptr<arrow::Schema>& schm, const std::string& segment_encoding) {
+  ds::KeyValuePartitioningOptions options;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
+  std::vector<std::shared_ptr<arrow::Array>> dictionaries;
+  return std::make_shared<ds::DirectoryPartitioning>(schm, dictionaries, options);
 }
 
 // [[dataset::export]]
 std::shared_ptr<ds::PartitioningFactory> dataset___DirectoryPartitioning__MakeFactory(
-    const std::vector<std::string>& field_names) {
-  return ds::DirectoryPartitioning::MakeFactory(field_names);
+    const std::vector<std::string>& field_names, const std::string& segment_encoding) {
+  ds::PartitioningFactoryOptions options;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
+  return ds::DirectoryPartitioning::MakeFactory(field_names, options);
 }
 
 // [[dataset::export]]
 std::shared_ptr<ds::HivePartitioning> dataset___HivePartitioning(
-    const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback) {
+    const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback,
+    const std::string& segment_encoding) {
+  ds::HivePartitioningOptions options;
+  options.null_fallback = null_fallback;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
   std::vector<std::shared_ptr<arrow::Array>> dictionaries;
-  return std::make_shared<ds::HivePartitioning>(schm, dictionaries, null_fallback);
+  return std::make_shared<ds::HivePartitioning>(schm, dictionaries, options);
 }
 
 // [[dataset::export]]
 std::shared_ptr<ds::PartitioningFactory> dataset___HivePartitioning__MakeFactory(
-    const std::string& null_fallback) {
+    const std::string& null_fallback, const std::string& segment_encoding) {
   ds::HivePartitioningFactoryOptions options;
   options.null_fallback = null_fallback;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
   return ds::HivePartitioning::MakeFactory(options);
 }
 
@@ -368,10 +398,10 @@ void dataset___ScannerBuilder__ProjectNames(const std::shared_ptr<ds::ScannerBui
 // [[dataset::export]]
 void dataset___ScannerBuilder__ProjectExprs(
     const std::shared_ptr<ds::ScannerBuilder>& sb,
-    const std::vector<std::shared_ptr<ds::Expression>>& exprs,
+    const std::vector<std::shared_ptr<compute::Expression>>& exprs,
     const std::vector<std::string>& names) {
   // We have shared_ptrs of expressions but need the Expressions
-  std::vector<ds::Expression> expressions;
+  std::vector<compute::Expression> expressions;
   for (auto expr : exprs) {
     expressions.push_back(*expr);
   }
@@ -380,7 +410,7 @@ void dataset___ScannerBuilder__ProjectExprs(
 
 // [[dataset::export]]
 void dataset___ScannerBuilder__Filter(const std::shared_ptr<ds::ScannerBuilder>& sb,
-                                      const std::shared_ptr<ds::Expression>& expr) {
+                                      const std::shared_ptr<compute::Expression>& expr) {
   StopIfNotOk(sb->Filter(*expr));
 }
 
@@ -390,6 +420,12 @@ void dataset___ScannerBuilder__UseThreads(const std::shared_ptr<ds::ScannerBuild
   StopIfNotOk(sb->UseThreads(threads));
 }
 
+// [[dataset::export]]
+void dataset___ScannerBuilder__UseAsync(const std::shared_ptr<ds::ScannerBuilder>& sb,
+                                        bool use_async) {
+  StopIfNotOk(sb->UseAsync(use_async));
+}
+
 // [[dataset::export]]
 void dataset___ScannerBuilder__BatchSize(const std::shared_ptr<ds::ScannerBuilder>& sb,
                                          int64_t batch_size) {
@@ -422,42 +458,33 @@ std::shared_ptr<arrow::Table> dataset___Scanner__ToTable(
 }
 
 // [[dataset::export]]
-std::shared_ptr<arrow::Table> dataset___Scanner__head(
-    const std::shared_ptr<ds::Scanner>& scanner, int n) {
-  // TODO: make this a full Slice with offset > 0
-  std::vector<std::shared_ptr<arrow::RecordBatch>> batches;
-  std::shared_ptr<arrow::RecordBatch> current_batch;
-
-  for (auto st : ValueOrStop(scanner->Scan())) {
-    for (auto b : ValueOrStop(ValueOrStop(st)->Execute())) {
-      current_batch = ValueOrStop(b);
-      batches.push_back(current_batch->Slice(0, n));
-      n -= current_batch->num_rows();
-      if (n < 0) break;
-    }
-    if (n < 0) break;
-  }
-  return ValueOrStop(arrow::Table::FromRecordBatches(std::move(batches)));
+cpp11::list dataset___Scanner__ScanBatches(const std::shared_ptr<ds::Scanner>& scanner) {
+  auto it = ValueOrStop(scanner->ScanBatches());
+  arrow::RecordBatchVector batches;
+  StopIfNotOk(it.Visit([&](ds::TaggedRecordBatch tagged_batch) {
+    batches.push_back(std::move(tagged_batch.record_batch));
+    return arrow::Status::OK();
+  }));
+  return arrow::r::to_r_list(batches);
 }
 
 // [[dataset::export]]
-cpp11::list dataset___Scanner__Scan(const std::shared_ptr<ds::Scanner>& scanner) {
-  auto it = ValueOrStop(scanner->Scan());
-  std::vector<std::shared_ptr<ds::ScanTask>> out;
-  std::shared_ptr<ds::ScanTask> scan_task;
-  // TODO(npr): can this iteration be parallelized?
-  for (auto st : it) {
-    scan_task = ValueOrStop(st);
-    out.push_back(scan_task);
-  }
+std::shared_ptr<arrow::RecordBatchReader> dataset___Scanner__ToRecordBatchReader(
+    const std::shared_ptr<ds::Scanner>& scanner) {
+  return ValueOrStop(scanner->ToRecordBatchReader());
+}
 
-  return arrow::r::to_r_list(out);
+// [[dataset::export]]
+std::shared_ptr<arrow::Table> dataset___Scanner__head(
+    const std::shared_ptr<ds::Scanner>& scanner, int n) {
+  // TODO: make this a full Slice with offset > 0
+  return ValueOrStop(scanner->Head(n));
 }
 
 // [[dataset::export]]
 std::shared_ptr<arrow::Schema> dataset___Scanner__schema(
     const std::shared_ptr<ds::Scanner>& sc) {
-  return sc->schema();
+  return sc->options()->projected_schema;
 }
 
 // [[dataset::export]]
@@ -489,4 +516,16 @@ void dataset___Dataset__Write(
   StopIfNotOk(ds::FileSystemDataset::Write(opts, scanner));
 }
 
+// [[dataset::export]]
+std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(
+    const std::shared_ptr<ds::Scanner>& scanner,
+    const std::shared_ptr<arrow::Array>& indices) {
+  return ValueOrStop(scanner->TakeRows(*indices));
+}
+
+// [[dataset::export]]
+int64_t dataset___Scanner__CountRows(const std::shared_ptr<ds::Scanner>& scanner) {
+  return ValueOrStop(scanner->CountRows());
+}
+
 #endif
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index 0e8fd52034d..3fcba46e911 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -17,55 +17,81 @@
 
 #include "./arrow_types.h"
 
-#if defined(ARROW_R_WITH_DATASET)
+#if defined(ARROW_R_WITH_ARROW)
 
 #include <arrow/compute/api_scalar.h>
-#include <arrow/dataset/api.h>
-namespace ds = ::arrow::dataset;
+#include <arrow/compute/exec/expression.h>
 
-std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
-    std::string func_name, cpp11::list options);
+namespace compute = ::arrow::compute;
 
-// [[dataset::export]]
-std::shared_ptr<ds::Expression> dataset___expr__call(std::string func_name,
-                                                     cpp11::list argument_list,
-                                                     cpp11::list options) {
-  std::vector<ds::Expression> arguments;
+std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
+                                                               cpp11::list options);
+
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name,
+                                                          cpp11::list argument_list,
+                                                          cpp11::list options) {
+  std::vector<compute::Expression> arguments;
   for (SEXP argument : argument_list) {
-    auto argument_ptr = cpp11::as_cpp<std::shared_ptr<ds::Expression>>(argument);
+    auto argument_ptr = cpp11::as_cpp<std::shared_ptr<compute::Expression>>(argument);
     arguments.push_back(*argument_ptr);
   }
 
   auto options_ptr = make_compute_options(func_name, options);
 
-  return std::make_shared<ds::Expression>(
-      ds::call(std::move(func_name), std::move(arguments), std::move(options_ptr)));
+  return std::make_shared<compute::Expression>(
+      compute::call(std::move(func_name), std::move(arguments), std::move(options_ptr)));
 }
 
-// [[dataset::export]]
-std::shared_ptr<ds::Expression> dataset___expr__field_ref(std::string name) {
-  return std::make_shared<ds::Expression>(ds::field_ref(std::move(name)));
+// [[arrow::export]]
+std::vector<std::string> field_names_in_expression(
+    const std::shared_ptr<compute::Expression>& x) {
+  std::vector<std::string> out;
+  auto field_refs = FieldsInExpression(*x);
+  for (auto f : field_refs) {
+    out.push_back(*f.name());
+  }
+  return out;
 }
 
-// [[dataset::export]]
-std::string dataset___expr__get_field_ref_name(
-    const std::shared_ptr<ds::Expression>& ref) {
-  auto field_ref = ref->field_ref();
-  if (field_ref == nullptr) {
-    return "";
+// [[arrow::export]]
+std::string compute___expr__get_field_ref_name(
+    const std::shared_ptr<compute::Expression>& x) {
+  if (auto field_ref = x->field_ref()) {
+    return *field_ref->name();
   }
-  return *field_ref->name();
+  return "";
+}
+
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name) {
+  return std::make_shared<compute::Expression>(compute::field_ref(std::move(name)));
 }
 
-// [[dataset::export]]
-std::shared_ptr<ds::Expression> dataset___expr__scalar(
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__scalar(
     const std::shared_ptr<arrow::Scalar>& x) {
-  return std::make_shared<ds::Expression>(ds::literal(std::move(x)));
+  return std::make_shared<compute::Expression>(compute::literal(std::move(x)));
 }
 
-// [[dataset::export]]
-std::string dataset___expr__ToString(const std::shared_ptr<ds::Expression>& x) {
+// [[arrow::export]]
+std::string compute___expr__ToString(const std::shared_ptr<compute::Expression>& x) {
   return x->ToString();
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::DataType> compute___expr__type(
+    const std::shared_ptr<compute::Expression>& x,
+    const std::shared_ptr<arrow::Schema>& schema) {
+  auto bound = ValueOrStop(x->Bind(*schema));
+  return bound.type();
+}
+
+// [[arrow::export]]
+arrow::Type::type compute___expr__type_id(const std::shared_ptr<compute::Expression>& x,
+                                          const std::shared_ptr<arrow::Schema>& schema) {
+  auto bound = ValueOrStop(x->Bind(*schema));
+  return bound.type()->id();
+}
+
 #endif
diff --git a/r/src/json.cpp b/r/src/json.cpp
index 87d40623f6b..edc5e075754 100644
--- a/r/src/json.cpp
+++ b/r/src/json.cpp
@@ -31,7 +31,7 @@ std::shared_ptr<arrow::json::ReadOptions> json___ReadOptions__initialize(bool us
 }
 
 // [[arrow::export]]
-std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize1(
     bool newlines_in_values) {
   auto res =
       std::make_shared<arrow::json::ParseOptions>(arrow::json::ParseOptions::Defaults());
@@ -39,6 +39,16 @@ std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(
   return res;
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize2(
+    bool newlines_in_values, const std::shared_ptr<arrow::Schema>& explicit_schema) {
+  auto res =
+      std::make_shared<arrow::json::ParseOptions>(arrow::json::ParseOptions::Defaults());
+  res->newlines_in_values = newlines_in_values;
+  res->explicit_schema = explicit_schema;
+  return res;
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::json::TableReader> json___TableReader__Make(
     const std::shared_ptr<arrow::io::InputStream>& input,
diff --git a/r/src/py-to-r.cpp b/r/src/py-to-r.cpp
index a571cfaab9a..80cd65c5171 100644
--- a/r/src/py-to-r.cpp
+++ b/r/src/py-to-r.cpp
@@ -21,6 +21,26 @@
 
 #include <arrow/c/bridge.h>
 
+// [[arrow::export]]
+arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema() { return {}; }
+
+// [[arrow::export]]
+void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr) { ptr.finalize(); }
+
+// [[arrow::export]]
+arrow::r::Pointer<struct ArrowArray> allocate_arrow_array() { return {}; }
+
+// [[arrow::export]]
+void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr) { ptr.finalize(); }
+
+// [[arrow::export]]
+arrow::r::Pointer<struct ArrowArrayStream> allocate_arrow_array_stream() { return {}; }
+
+// [[arrow::export]]
+void delete_arrow_array_stream(arrow::r::Pointer<struct ArrowArrayStream> ptr) {
+  ptr.finalize();
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::Array> ImportArray(arrow::r::Pointer<struct ArrowArray> array,
                                           arrow::r::Pointer<struct ArrowSchema> schema) {
@@ -41,16 +61,20 @@ std::shared_ptr<arrow::Schema> ImportSchema(
 }
 
 // [[arrow::export]]
-arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema() { return {}; }
-
-// [[arrow::export]]
-void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr) { ptr.finalize(); }
+std::shared_ptr<arrow::Field> ImportField(arrow::r::Pointer<struct ArrowSchema> field) {
+  return ValueOrStop(arrow::ImportField(field));
+}
 
 // [[arrow::export]]
-arrow::r::Pointer<struct ArrowArray> allocate_arrow_array() { return {}; }
+std::shared_ptr<arrow::DataType> ImportType(arrow::r::Pointer<struct ArrowSchema> type) {
+  return ValueOrStop(arrow::ImportType(type));
+}
 
 // [[arrow::export]]
-void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr) { ptr.finalize(); }
+std::shared_ptr<arrow::RecordBatchReader> ImportRecordBatchReader(
+    arrow::r::Pointer<struct ArrowArrayStream> stream) {
+  return ValueOrStop(arrow::ImportRecordBatchReader(stream));
+}
 
 // [[arrow::export]]
 void ExportType(const std::shared_ptr<arrow::DataType>& type,
@@ -58,6 +82,12 @@ void ExportType(const std::shared_ptr<arrow::DataType>& type,
   StopIfNotOk(arrow::ExportType(*type, ptr));
 }
 
+// [[arrow::export]]
+void ExportField(const std::shared_ptr<arrow::Field>& field,
+                 arrow::r::Pointer<struct ArrowSchema> ptr) {
+  StopIfNotOk(arrow::ExportField(*field, ptr));
+}
+
 // [[arrow::export]]
 void ExportSchema(const std::shared_ptr<arrow::Schema>& schema,
                   arrow::r::Pointer<struct ArrowSchema> ptr) {
@@ -78,4 +108,10 @@ void ExportRecordBatch(const std::shared_ptr<arrow::RecordBatch>& batch,
   StopIfNotOk(arrow::ExportRecordBatch(*batch, array_ptr, schema_ptr));
 }
 
+// [[arrow::export]]
+void ExportRecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader,
+                             arrow::r::Pointer<struct ArrowArrayStream> stream_ptr) {
+  StopIfNotOk(arrow::ExportRecordBatchReader(reader, stream_ptr));
+}
+
 #endif
diff --git a/r/src/r_task_group.h b/r/src/r_task_group.h
new file mode 100644
index 00000000000..723251cd9db
--- /dev/null
+++ b/r/src/r_task_group.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_R_WITH_ARROW)
+
+#include <arrow/util/parallel.h>
+#include <arrow/util/task_group.h>
+
+namespace arrow {
+namespace r {
+
+class RTasks {
+ public:
+  using Task = internal::FnOnce<Status()>;
+
+  explicit RTasks(bool use_threads);
+
+  // This Finish() method must never be called from a thread pool thread
+  // as this would deadlock.
+  //
+  // Usage is to :
+  // - create an RTasks instance on the main thread
+  // - add some tasks with .Append()
+  // - and then call .Finish() so that the parallel tasks are finished
+  Status Finish();
+  void Append(bool parallel, Task&& task);
+
+  void Reset();
+
+  bool use_threads_;
+  StopSource stop_source_;
+  std::shared_ptr<arrow::internal::TaskGroup> parallel_tasks_;
+  std::vector<Task> delayed_serial_tasks_;
+};
+
+}  // namespace r
+}  // namespace arrow
+
+#endif
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index 0ab9718da26..b4c16211fb5 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -25,10 +25,14 @@
 #include <arrow/array/builder_dict.h>
 #include <arrow/array/builder_nested.h>
 #include <arrow/array/builder_primitive.h>
+#include <arrow/table.h>
 #include <arrow/type_traits.h>
 #include <arrow/util/bitmap_writer.h>
 #include <arrow/util/checked_cast.h>
 #include <arrow/util/converter.h>
+#include <arrow/util/logging.h>
+
+#include "./r_task_group.h"
 
 namespace arrow {
 
@@ -168,49 +172,88 @@ bool is_NA<int64_t>(int64_t value) {
 }
 
 template <typename T>
-struct RVectorVisitor {
+class RVectorIterator {
+ public:
+  using value_type = T;
+  RVectorIterator(SEXP x, int64_t start)
+      : ptr_x_(reinterpret_cast<const T*>(DATAPTR_RO(x)) + start) {}
+
+  RVectorIterator& operator++() {
+    ++ptr_x_;
+    return *this;
+  }
+
+  const T operator*() const { return *ptr_x_; }
+
+ private:
+  const T* ptr_x_;
+};
+
+template <typename T>
+class RVectorIterator_ALTREP {
+ public:
+  using value_type = T;
   using data_type =
       typename std::conditional<std::is_same<T, int64_t>::value, double, T>::type;
   using r_vector_type = cpp11::r_vector<data_type>;
+  using r_vector_iterator = typename r_vector_type::const_iterator;
 
-  template <typename AppendNull, typename AppendValue>
-  static Status Visit(SEXP x, int64_t size, AppendNull&& append_null,
-                      AppendValue&& append_value) {
-    r_vector_type values(x);
-    auto it = values.begin();
+  RVectorIterator_ALTREP(SEXP x, int64_t start)
+      : vector_(x), it_(vector_.begin() + start) {}
 
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      auto value = GetValue(*it);
-
-      if (is_NA<T>(value)) {
-        RETURN_NOT_OK(append_null());
-      } else {
-        RETURN_NOT_OK(append_value(value));
-      }
-    }
-
-    return Status::OK();
+  RVectorIterator_ALTREP& operator++() {
+    ++it_;
+    return *this;
   }
 
+  const T operator*() const { return GetValue(*it_); }
+
   static T GetValue(data_type x) { return x; }
+
+ private:
+  r_vector_type vector_;
+  r_vector_iterator it_;
 };
 
 template <>
-int64_t RVectorVisitor<int64_t>::GetValue(double x) {
+int64_t RVectorIterator_ALTREP<int64_t>::GetValue(double x) {
   int64_t value;
   memcpy(&value, &x, sizeof(int64_t));
   return value;
 }
 
+template <typename Iterator, typename AppendNull, typename AppendValue>
+Status VisitVector(Iterator it, int64_t n, AppendNull&& append_null,
+                   AppendValue&& append_value) {
+  for (R_xlen_t i = 0; i < n; i++, ++it) {
+    auto value = *it;
+
+    if (is_NA<typename Iterator::value_type>(value)) {
+      RETURN_NOT_OK(append_null());
+    } else {
+      RETURN_NOT_OK(append_value(value));
+    }
+  }
+
+  return Status::OK();
+}
+
 class RConverter : public Converter<SEXP, RConversionOptions> {
  public:
   virtual Status Append(SEXP) { return Status::NotImplemented("Append"); }
 
-  virtual Status Extend(SEXP values, int64_t size) {
-    return Status::NotImplemented("ExtendMasked");
+  virtual Status Extend(SEXP values, int64_t size, int64_t offset = 0) {
+    return Status::NotImplemented("Extend");
   }
 
-  virtual Status ExtendMasked(SEXP values, SEXP mask, int64_t size) {
+  // by default, just delay the ->Extend(), i.e. not run in parallel
+  // implementations might redefine so that ->Extend() is run in parallel
+  virtual void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(false, task);
+  }
+
+  virtual Status ExtendMasked(SEXP values, SEXP mask, int64_t size, int64_t offset = 0) {
     return Status::NotImplemented("ExtendMasked");
   }
 };
@@ -307,27 +350,28 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_null<T>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP, int64_t size) override {
-    return this->primitive_builder_->AppendNulls(size);
+  Status Extend(SEXP, int64_t size, int64_t offset = 0) override {
+    return this->primitive_builder_->AppendNulls(size - offset);
   }
 };
 
+// TODO: extend this to BooleanType, but this needs some work in RConvert
 template <typename T>
 class RPrimitiveConverter<
     T, enable_if_t<is_integer_type<T>::value || is_floating_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     auto rtype = GetVectorType(x);
     switch (rtype) {
       case UINT8:
-        return AppendRangeDispatch<unsigned char>(x, size);
+        return ExtendDispatch<unsigned char>(x, size, offset);
       case INT32:
-        return AppendRangeDispatch<int>(x, size);
+        return ExtendDispatch<int>(x, size, offset);
       case FLOAT64:
-        return AppendRangeDispatch<double>(x, size);
+        return ExtendDispatch<double>(x, size, offset);
       case INT64:
-        return AppendRangeDispatch<int64_t>(x, size);
+        return ExtendDispatch<int64_t>(x, size, offset);
 
       default:
         break;
@@ -336,83 +380,49 @@ class RPrimitiveConverter<
     return Status::Invalid("cannot convert");
   }
 
- private:
-  template <typename r_value_type>
-  Status AppendRangeLoopDifferentType(SEXP x, int64_t size) {
-    RETURN_NOT_OK(this->Reserve(size));
-
-    auto append_value = [this](r_value_type value) {
-      ARROW_ASSIGN_OR_RAISE(auto converted,
-                            RConvert::Convert(this->primitive_type_, value));
-      this->primitive_builder_->UnsafeAppend(converted);
-      return Status::OK();
-    };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<r_value_type>::Visit(x, size, append_null, append_value);
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 
+ private:
   template <typename r_value_type>
-  Status AppendRangeSameTypeNotALTREP(SEXP x, int64_t size) {
-    auto p = reinterpret_cast<const r_value_type*>(DATAPTR_RO(x));
-    auto p_end = p + size;
-
-    auto first_na = std::find_if(p, p_end, is_NA<r_value_type>);
-
-    if (first_na == p_end) {
-      // no nulls, so we can use AppendValues() directly
-      return this->primitive_builder_->AppendValues(p, p_end);
-    }
-
-    // Append all values up until the first NULL
-    RETURN_NOT_OK(this->primitive_builder_->AppendValues(p, first_na));
-
-    // loop for the remaining
-    RETURN_NOT_OK(this->primitive_builder_->Reserve(p_end - first_na));
-    p = first_na;
-    for (; p < p_end; ++p) {
-      r_value_type value = *p;
-      if (is_NA<r_value_type>(value)) {
-        this->primitive_builder_->UnsafeAppendNull();
-      } else {
-        this->primitive_builder_->UnsafeAppend(value);
-      }
+  Status ExtendDispatch(SEXP x, int64_t size, int64_t offset) {
+    if (ALTREP(x)) {
+      // `x` is an ALTREP R vector storing `r_value_type`
+      // and that type matches exactly the type of the array this is building
+      return Extend_impl(RVectorIterator_ALTREP<r_value_type>(x, offset), size);
+    } else {
+      // `x` is not an ALTREP vector so we have direct access to a range of values
+      return Extend_impl(RVectorIterator<r_value_type>(x, offset), size);
     }
-    return Status::OK();
   }
 
-  template <typename r_value_type>
-  Status AppendRangeSameTypeALTREP(SEXP x, int64_t size) {
-    // if it is altrep, then we use cpp11 looping
-    // without needing to convert
+  template <typename Iterator>
+  Status Extend_impl(Iterator it, int64_t size) {
+    using r_value_type = typename Iterator::value_type;
     RETURN_NOT_OK(this->primitive_builder_->Reserve(size));
-    typename RVectorVisitor<r_value_type>::r_vector_type vec(x);
-    auto it = vec.begin();
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      r_value_type value = RVectorVisitor<r_value_type>::GetValue(*it);
-      if (is_NA<r_value_type>(value)) {
-        this->primitive_builder_->UnsafeAppendNull();
-      } else {
-        this->primitive_builder_->UnsafeAppend(value);
-      }
-    }
-    return Status::OK();
-  }
 
-  template <typename r_value_type>
-  Status AppendRangeDispatch(SEXP x, int64_t size) {
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
+
     if (std::is_same<typename T::c_type, r_value_type>::value) {
-      if (!ALTREP(x)) {
-        return AppendRangeSameTypeNotALTREP<r_value_type>(x, size);
-      } else {
-        return AppendRangeSameTypeALTREP<r_value_type>(x, size);
-      }
+      auto append_value = [this](r_value_type value) {
+        this->primitive_builder_->UnsafeAppend(value);
+        return Status::OK();
+      };
+      return VisitVector(it, size, append_null, append_value);
+    } else {
+      auto append_value = [this](r_value_type value) {
+        ARROW_ASSIGN_OR_RAISE(auto converted,
+                              RConvert::Convert(this->primitive_type_, value));
+        this->primitive_builder_->UnsafeAppend(converted);
+        return Status::OK();
+      };
+      return VisitVector(it, size, append_null, append_value);
     }
-
-    // here if underlying types differ so going
-    return AppendRangeLoopDifferentType<r_value_type>(x, size);
   }
 };
 
@@ -420,22 +430,38 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_boolean_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     auto rtype = GetVectorType(x);
     if (rtype != BOOLEAN) {
       return Status::Invalid("Expecting a logical vector");
     }
+
+    if (ALTREP(x)) {
+      return Extend_impl(RVectorIterator_ALTREP<cpp11::r_bool>(x, offset), size);
+    } else {
+      return Extend_impl(RVectorIterator<cpp11::r_bool>(x, offset), size);
+    }
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
+  }
+
+ private:
+  template <typename Iterator>
+  Status Extend_impl(Iterator it, int64_t size) {
     RETURN_NOT_OK(this->Reserve(size));
 
-    auto append_value = [this](cpp11::r_bool value) {
-      this->primitive_builder_->UnsafeAppend(value == 1);
-      return Status::OK();
-    };
     auto append_null = [this]() {
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
     };
-    return RVectorVisitor<cpp11::r_bool>::Visit(x, size, append_null, append_value);
+    auto append_value = [this](cpp11::r_bool value) {
+      this->primitive_builder_->UnsafeAppend(value == 1);
+      return Status::OK();
+    };
+    return VisitVector(it, size, append_null, append_value);
   }
 };
 
@@ -443,18 +469,16 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
-
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     switch (GetVectorType(x)) {
       case DATE_INT:
-        return AppendRange_Date<int>(x, size);
+        return AppendRange_Date_dispatch<int>(x, size, offset);
 
       case DATE_DBL:
-        return AppendRange_Date<double>(x, size);
+        return AppendRange_Date_dispatch<double>(x, size, offset);
 
       case POSIXCT:
-        return AppendRange_Posixct(x, size);
+        return AppendRange_Posixct_dispatch(x, size, offset);
 
       default:
         break;
@@ -463,9 +487,27 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     return Status::Invalid("cannot convert to date type ");
   }
 
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
+  }
+
  private:
   template <typename r_value_type>
-  Status AppendRange_Date(SEXP x, int64_t size) {
+  Status AppendRange_Date_dispatch(SEXP x, int64_t size, int64_t offset) {
+    if (ALTREP(x)) {
+      return AppendRange_Date(RVectorIterator_ALTREP<r_value_type>(x, offset),
+                              size - offset);
+    } else {
+      return AppendRange_Date(RVectorIterator<r_value_type>(x, offset), size - offset);
+    }
+  }
+
+  template <typename Iterator>
+  Status AppendRange_Date(Iterator it, int64_t size) {
+    using r_value_type = typename Iterator::value_type;
+    RETURN_NOT_OK(this->Reserve(size));
+
     auto append_null = [this]() {
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
@@ -474,21 +516,32 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
       this->primitive_builder_->UnsafeAppend(FromRDate(this->primitive_type_, value));
       return Status::OK();
     };
+    return VisitVector(it, size, append_null, append_value);
+  }
 
-    return RVectorVisitor<r_value_type>::Visit(x, size, append_null, append_value);
+  Status AppendRange_Posixct_dispatch(SEXP x, int64_t size, int64_t offset) {
+    if (ALTREP(x)) {
+      return AppendRange_Posixct(RVectorIterator_ALTREP<double>(x, offset),
+                                 size - offset);
+    } else {
+      return AppendRange_Posixct(RVectorIterator<double>(x, offset), size - offset);
+    }
   }
 
-  Status AppendRange_Posixct(SEXP x, int64_t size) {
+  template <typename Iterator>
+  Status AppendRange_Posixct(Iterator it, int64_t size) {
+    using r_value_type = typename Iterator::value_type;
+    RETURN_NOT_OK(this->Reserve(size));
+
     auto append_null = [this]() {
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
     };
-    auto append_value = [this](double value) {
+    auto append_value = [this](r_value_type value) {
       this->primitive_builder_->UnsafeAppend(FromPosixct(this->primitive_type_, value));
       return Status::OK();
     };
-
-    return RVectorVisitor<double>::Visit(x, size, append_null, append_value);
+    return VisitVector(it, size, append_null, append_value);
   }
 
   static int FromRDate(const Date32Type*, int from) { return from; }
@@ -525,8 +578,8 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_time_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
     auto rtype = GetVectorType(x);
     if (rtype != TIME) {
       return Status::Invalid("Invalid conversion to time");
@@ -553,16 +606,28 @@ class RPrimitiveConverter<T, enable_if_t<is_time_type<T>::value>>
     auto multiplier =
         get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier;
 
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
     auto append_value = [this, multiplier](double value) {
       auto converted = static_cast<typename T::c_type>(value * multiplier);
       this->primitive_builder_->UnsafeAppend(converted);
       return Status::OK();
     };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<double>::Visit(x, size, append_null, append_value);
+
+    if (ALTREP(x)) {
+      return VisitVector(RVectorIterator_ALTREP<double>(x, offset), size, append_null,
+                         append_value);
+    } else {
+      return VisitVector(RVectorIterator<double>(x, offset), size, append_null,
+                         append_value);
+    }
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -570,8 +635,8 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_timestamp_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
 
     RVectorType rtype = GetVectorType(x);
     if (rtype != POSIXCT) {
@@ -589,7 +654,19 @@ class RPrimitiveConverter<T, enable_if_t<is_timestamp_type<T>::value>>
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
     };
-    return RVectorVisitor<double>::Visit(x, size, append_null, append_value);
+
+    if (ALTREP(x)) {
+      return VisitVector(RVectorIterator_ALTREP<double>(x, offset), size, append_null,
+                         append_value);
+    } else {
+      return VisitVector(RVectorIterator<double>(x, offset), size, append_null,
+                         append_value);
+    }
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -597,7 +674,7 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_decimal_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     return Status::NotImplemented("Extend");
   }
 };
@@ -629,21 +706,27 @@ class RPrimitiveConverter<T, enable_if_binary<T>>
  public:
   using OffsetType = typename T::offset_type;
 
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
     RETURN_NOT_OK(check_binary(x, size));
 
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
+
     auto append_value = [this](SEXP raw) {
       R_xlen_t n = XLENGTH(raw);
       ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n));
       this->primitive_builder_->UnsafeAppend(RAW_RO(raw), static_cast<OffsetType>(n));
       return Status::OK();
     };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<SEXP>::Visit(x, size, append_null, append_value);
+    return VisitVector(RVectorIterator<SEXP>(x, offset), size, append_null, append_value);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -651,10 +734,15 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
     RETURN_NOT_OK(check_binary(x, size));
 
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
+
     auto append_value = [this](SEXP raw) {
       R_xlen_t n = XLENGTH(raw);
 
@@ -665,11 +753,12 @@ class RPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::v
       this->primitive_builder_->UnsafeAppend(RAW_RO(raw));
       return Status::OK();
     };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<SEXP>::Visit(x, size, append_null, append_value);
+    return VisitVector(RVectorIterator<SEXP>(x, offset), size, append_null, append_value);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -679,34 +768,42 @@ class RPrimitiveConverter<T, enable_if_string_like<T>>
  public:
   using OffsetType = typename T::offset_type;
 
-  Status Extend(SEXP x, int64_t size) override {
-    int64_t start = 0;
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     RVectorType rtype = GetVectorType(x);
     if (rtype != STRING) {
       return Status::Invalid("Expecting a character vector");
     }
+    return UnsafeAppendUtf8Strings(arrow::r::utf8_strings(x), size, offset);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    // TODO: refine this., e.g. extract setup from Extend()
+    tasks.Append(false, std::move(task));
+  }
 
-    cpp11::strings s(arrow::r::utf8_strings(x));
+ private:
+  Status UnsafeAppendUtf8Strings(const cpp11::strings& s, int64_t size, int64_t offset) {
     RETURN_NOT_OK(this->primitive_builder_->Reserve(s.size()));
-    auto it = s.begin() + start;
+    const SEXP* p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
 
     // we know all the R strings are utf8 already, so we can get
     // a definite size and then use UnsafeAppend*()
     int64_t total_length = 0;
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      cpp11::r_string si = *it;
-      total_length += cpp11::is_na(si) ? 0 : si.size();
+    for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
+      SEXP si = *p_strings;
+      total_length += si == NA_STRING ? 0 : LENGTH(si);
     }
     RETURN_NOT_OK(this->primitive_builder_->ReserveData(total_length));
 
     // append
-    it = s.begin() + start;
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      cpp11::r_string si = *it;
+    p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
+    for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
+      SEXP si = *p_strings;
       if (si == NA_STRING) {
         this->primitive_builder_->UnsafeAppendNull();
       } else {
-        this->primitive_builder_->UnsafeAppend(CHAR(si), si.size());
+        this->primitive_builder_->UnsafeAppend(CHAR(si), LENGTH(si));
       }
     }
 
@@ -718,7 +815,7 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_duration_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     // TODO: look in lubridate
     return Status::NotImplemented("Extend");
   }
@@ -734,7 +831,7 @@ template <typename U>
 class RDictionaryConverter<U, enable_if_has_c_type<U>>
     : public DictionaryConverter<U, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     return Status::NotImplemented("Extend");
   }
 };
@@ -745,26 +842,25 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
  public:
   using BuilderType = DictionaryBuilder<ValueType>;
 
-  Status Extend(SEXP x, int64_t size) override {
-    // first we need to handle the levels
-    cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol));
-    auto memo_array = arrow::r::vec_to_arrow(levels, utf8(), false);
-    RETURN_NOT_OK(this->value_builder_->InsertMemoValues(*memo_array));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(ExtendSetup(x, size, offset));
+    return ExtendImpl(x, size, offset, GetCharLevels(x));
+  }
 
-    // then we can proceed
-    RETURN_NOT_OK(this->Reserve(size));
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    // the setup runs synchronously first
+    Status setup = ExtendSetup(values, size, /*offset=*/0);
 
-    RVectorType rtype = GetVectorType(x);
-    if (rtype != FACTOR) {
-      return Status::Invalid("invalid R type to convert to dictionary");
-    }
+    if (!setup.ok()) {
+      // if that fails, propagate the error
+      tasks.Append(false, [setup]() { return setup; });
+    } else {
+      auto char_levels = GetCharLevels(values);
 
-    auto append_value = [this, levels](int value) {
-      SEXP s = STRING_ELT(levels, value - 1);
-      return this->value_builder_->Append(CHAR(s));
-    };
-    auto append_null = [this]() { return this->value_builder_->AppendNull(); };
-    return RVectorVisitor<int>::Visit(x, size, append_null, append_value);
+      tasks.Append(true, [this, values, size, char_levels]() {
+        return this->ExtendImpl(values, size, /*offset=*/0, char_levels);
+      });
+    }
   }
 
   Result<std::shared_ptr<Array>> ToArray() override {
@@ -780,6 +876,45 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
 
     return std::make_shared<DictionaryArray>(result->data());
   }
+
+ private:
+  std::vector<const char*> GetCharLevels(SEXP x) {
+    SEXP levels = Rf_getAttrib(x, R_LevelsSymbol);
+    R_xlen_t n_levels = XLENGTH(levels);
+    std::vector<const char*> char_levels(XLENGTH(levels));
+    const SEXP* p_levels = reinterpret_cast<const SEXP*>(DATAPTR_RO(levels));
+    for (R_xlen_t i = 0; i < n_levels; i++, ++p_levels) {
+      char_levels[i] = CHAR(*p_levels);
+    }
+
+    return char_levels;
+  }
+
+  Status ExtendSetup(SEXP x, int64_t size, int64_t offset) {
+    RVectorType rtype = GetVectorType(x);
+    if (rtype != FACTOR) {
+      return Status::Invalid("invalid R type to convert to dictionary");
+    }
+
+    // first we need to handle the levels
+    SEXP levels = Rf_getAttrib(x, R_LevelsSymbol);
+    auto memo_array = arrow::r::vec_to_arrow(levels, utf8(), false);
+    RETURN_NOT_OK(this->value_builder_->InsertMemoValues(*memo_array));
+
+    // then we can proceed
+    return this->Reserve(size - offset);
+  }
+
+  Status ExtendImpl(SEXP values, int64_t size, int64_t offset,
+                    const std::vector<const char*>& char_levels) {
+    auto append_null = [this]() { return this->value_builder_->AppendNull(); };
+    auto append_value = [this, &char_levels](int value) {
+      return this->value_builder_->Append(char_levels[value - 1]);
+    };
+
+    return VisitVector(RVectorIterator<int>(values, offset), size, append_null,
+                       append_value);
+  }
 };
 
 template <typename T, typename Enable = void>
@@ -800,7 +935,7 @@ struct RConverterTrait<T, enable_if_list_like<T>> {
 template <typename T>
 class RListConverter : public ListConverter<T, RConverter, RConverterTrait> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     RETURN_NOT_OK(this->Reserve(size));
 
     RVectorType rtype = GetVectorType(x);
@@ -808,15 +943,28 @@ class RListConverter : public ListConverter<T, RConverter, RConverterTrait> {
       return Status::Invalid("Cannot convert to list type");
     }
 
+    auto append_null = [this]() { return this->list_builder_->AppendNull(); };
+
     auto append_value = [this](SEXP value) {
+      // TODO: if we decide that this can be run concurrently
+      //       we'll have to do vec_size() upfront
       int n = vctrs::vec_size(value);
 
       RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n));
       RETURN_NOT_OK(this->list_builder_->Append());
       return this->value_converter_.get()->Extend(value, n);
     };
-    auto append_null = [this]() { return this->list_builder_->AppendNull(); };
-    return RVectorVisitor<SEXP>::Visit(x, size, append_null, append_value);
+
+    return VisitVector(RVectorIterator<SEXP>(x, offset), size, append_null, append_value);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    // NOTE: because Extend::[]append_value() calls Extend() on the
+    // value converter, which might require a setup step, it feels
+    // complicated to run this task concurrently.
+    //
+    // TODO: perhaps allow running concurrently in some cases, e.g. list(int32(!altrep))
+    tasks.Append(false, [this, values, size]() { return this->Extend(values, size); });
   }
 };
 
@@ -829,7 +977,46 @@ struct RConverterTrait<StructType> {
 
 class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(ExtendSetup(x, size, offset));
+
+    auto fields = this->struct_type_->fields();
+    R_xlen_t n_columns = XLENGTH(x);
+    for (R_xlen_t i = offset; i < n_columns; i++) {
+      auto status = children_[i]->Extend(VECTOR_ELT(x, i), size);
+      if (!status.ok()) {
+        return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(),
+                               "): ", status.ToString());
+      }
+    }
+
+    return Status::OK();
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    // the setup runs synchronously first
+    Status setup = ExtendSetup(values, size, /*offset=*/0);
+
+    if (!setup.ok()) {
+      // if that fails, propagate the error
+      tasks.Append(false, [setup]() { return setup; });
+    } else {
+      // otherwise deal with each column, maybe concurrently
+      auto fields = this->struct_type_->fields();
+      R_xlen_t n_columns = XLENGTH(values);
+
+      for (R_xlen_t i = 0; i < n_columns; i++) {
+        children_[i]->DelayedExtend(VECTOR_ELT(values, i), size, tasks);
+      }
+    }
+  }
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    return StructConverter<RConverter, RConverterTrait>::Init(pool);
+  }
+
+  Status ExtendSetup(SEXP x, int64_t size, int64_t offset) {
     // check that x is compatible
     R_xlen_t n_columns = XLENGTH(x);
 
@@ -860,15 +1047,6 @@ class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
       return Status::OK();
     }));
 
-    for (R_xlen_t i = 0; i < n_columns; i++) {
-      std::string name(x_names[i]);
-      if (name != fields[i]->name()) {
-        return Status::RError(
-            "Field name in position ", i, " (", fields[i]->name(),
-            ") does not match the name of the column of the data frame (", name, ")");
-      }
-    }
-
     for (R_xlen_t i = 0; i < n_columns; i++) {
       SEXP x_i = VECTOR_ELT(x, i);
       if (vctrs::vec_size(x_i) < size) {
@@ -876,27 +1054,14 @@ class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
       }
     }
 
-    RETURN_NOT_OK(this->Reserve(size));
+    RETURN_NOT_OK(this->Reserve(size - offset));
 
     for (R_xlen_t i = 0; i < size; i++) {
       RETURN_NOT_OK(struct_builder_->Append());
     }
 
-    for (R_xlen_t i = 0; i < n_columns; i++) {
-      auto status = children_[i]->Extend(VECTOR_ELT(x, i), size);
-      if (!status.ok()) {
-        return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(),
-                               "): ", status.ToString());
-      }
-    }
-
     return Status::OK();
   }
-
- protected:
-  Status Init(MemoryPool* pool) override {
-    return StructConverter<RConverter, RConverterTrait>::Init(pool);
-  }
 };
 
 template <>
@@ -992,6 +1157,37 @@ std::shared_ptr<arrow::Array> vec_to_arrow__reuse_memory(SEXP x) {
   cpp11::stop("Unreachable: you might need to fix can_reuse_memory()");
 }
 
+Status vector_to_Array(SEXP x, const std::shared_ptr<arrow::DataType>& type,
+                       bool type_inferred,
+                       std::shared_ptr<internal::TaskGroup>& task_group,
+                       std::shared_ptr<arrow::Array>& out) {
+  // short circuit if `x` is already an Array
+  if (Rf_inherits(x, "Array")) {
+    out = cpp11::as_cpp<std::shared_ptr<arrow::Array>>(x);
+    return Status::OK();
+  }
+
+  RConversionOptions options;
+  options.strict = !type_inferred;
+  options.type = type;
+  options.size = vctrs::vec_size(x);
+
+  // maybe short circuit when zero-copy is possible
+  if (can_reuse_memory(x, options.type)) {
+    out = vec_to_arrow__reuse_memory(x);
+    return Status::OK();
+  }
+
+  // otherwise go through the converter api
+  auto converter = ValueOrStop(MakeConverter<RConverter, RConverterTrait>(
+      options.type, options, gc_memory_pool()));
+
+  RETURN_NOT_OK(converter->Extend(x, options.size));
+  ARROW_ASSIGN_OR_RAISE(out, converter->ToArray());
+
+  return Status::OK();
+}
+
 std::shared_ptr<arrow::Array> vec_to_arrow(SEXP x,
                                            const std::shared_ptr<arrow::DataType>& type,
                                            bool type_inferred) {
@@ -1015,12 +1211,230 @@ std::shared_ptr<arrow::Array> vec_to_arrow(SEXP x,
       options.type, options, gc_memory_pool()));
 
   StopIfNotOk(converter->Extend(x, options.size));
+
   return ValueOrStop(converter->ToArray());
 }
 
+// TODO: most of this is very similar to MakeSimpleArray, just adapted to
+//       leverage concurrency. Maybe some refactoring needed.
+template <typename RVector, typename Type>
+bool vector_from_r_memory_impl(SEXP x, const std::shared_ptr<DataType>& type,
+                               std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns,
+                               int j, RTasks& tasks) {
+  RVector vec(x);
+  using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type;
+  auto buffer = std::make_shared<RBuffer<RVector>>(vec);
+
+  tasks.Append(true, [buffer, x, &columns, j]() {
+    std::vector<std::shared_ptr<Buffer>> buffers{nullptr, buffer};
+
+    auto n = XLENGTH(x);
+    auto p_x_start = reinterpret_cast<const value_type*>(DATAPTR_RO(x));
+    auto p_x_end = p_x_start + n;
+
+    int null_count = 0;
+    auto first_na = std::find_if(p_x_start, p_x_end, is_NA<value_type>);
+
+    if (first_na < p_x_end) {
+      auto null_bitmap =
+          ValueOrStop(AllocateBuffer(BitUtil::BytesForBits(n), gc_memory_pool()));
+      internal::FirstTimeBitmapWriter bitmap_writer(null_bitmap->mutable_data(), 0, n);
+
+      // first loop to clear all the bits before the first NA
+      auto k = std::distance(p_x_start, first_na);
+      int i = 0;
+      for (; i < k; i++, bitmap_writer.Next()) {
+        bitmap_writer.Set();
+      }
+
+      auto p_vec = first_na;
+      // then finish
+      for (; i < n; i++, bitmap_writer.Next(), ++p_vec) {
+        if (is_NA<value_type>(*p_vec)) {
+          bitmap_writer.Clear();
+          null_count++;
+        } else {
+          bitmap_writer.Set();
+        }
+      }
+
+      bitmap_writer.Finish();
+      buffers[0] = std::move(null_bitmap);
+    }
+
+    auto data = ArrayData::Make(std::make_shared<Type>(), n, std::move(buffers),
+                                null_count, 0 /*offset*/);
+    auto array = std::make_shared<typename TypeTraits<Type>::ArrayType>(data);
+    columns[j] = std::make_shared<arrow::ChunkedArray>(array);
+
+    return Status::OK();
+  });
+
+  return true;
+}
+
+bool vector_from_r_memory(SEXP x, const std::shared_ptr<DataType>& type,
+                          std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns,
+                          int j, RTasks& tasks) {
+  if (ALTREP(x)) return false;
+
+  switch (type->id()) {
+    case Type::INT32:
+      return TYPEOF(x) == INTSXP && !OBJECT(x) &&
+             vector_from_r_memory_impl<cpp11::integers, Int32Type>(x, type, columns, j,
+                                                                   tasks);
+
+    case Type::DOUBLE:
+      return TYPEOF(x) == REALSXP && !OBJECT(x) &&
+             vector_from_r_memory_impl<cpp11::doubles, DoubleType>(x, type, columns, j,
+                                                                   tasks);
+
+    case Type::UINT8:
+      return TYPEOF(x) == RAWSXP && !OBJECT(x) &&
+             vector_from_r_memory_impl<cpp11::raws, UInt8Type>(x, type, columns, j,
+                                                               tasks);
+
+    case Type::INT64:
+      return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64") &&
+             vector_from_r_memory_impl<cpp11::doubles, Int64Type>(x, type, columns, j,
+                                                                  tasks);
+    default:
+      break;
+  }
+
+  return false;
+}
+
 }  // namespace r
 }  // namespace arrow
 
+arrow::Status check_consistent_column_length(
+    const std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns) {
+  if (columns.size()) {
+    int64_t num_rows = columns[0]->length();
+
+    for (const auto& column : columns) {
+      if (column->length() != num_rows) {
+        return arrow::Status::Invalid("All columns must have the same length");
+      }
+    }
+  }
+
+  return arrow::Status::OK();
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp,
+                                               bool use_threads) {
+  bool infer_schema = !Rf_inherits(schema_sxp, "Schema");
+
+  int num_fields;
+  StopIfNotOk(arrow::r::count_fields(lst, &num_fields));
+
+  // schema + metadata
+  std::shared_ptr<arrow::Schema> schema;
+  StopIfNotOk(arrow::r::InferSchemaFromDots(lst, schema_sxp, num_fields, schema));
+  StopIfNotOk(arrow::r::AddMetadataFromDots(lst, num_fields, schema));
+
+  if (!infer_schema && schema->num_fields() != num_fields) {
+    cpp11::stop("incompatible. schema has %d fields, and %d columns are supplied",
+                schema->num_fields(), num_fields);
+  }
+
+  // table
+  std::vector<std::shared_ptr<arrow::ChunkedArray>> columns(num_fields);
+
+  if (!infer_schema) {
+    auto check_name = [&](int j, SEXP, cpp11::r_string name) {
+      std::string cpp_name(name);
+      if (schema->field(j)->name() != cpp_name) {
+        cpp11::stop("field at index %d has name '%s' != '%s'", j + 1,
+                    schema->field(j)->name().c_str(), cpp_name.c_str());
+      }
+    };
+    arrow::r::TraverseDots(lst, num_fields, check_name);
+  }
+
+  // must be careful to avoid R stop() until the tasks
+  // are finished, i.e. after tasks.Finish()
+  arrow::r::RTasks tasks(use_threads);
+
+  arrow::Status status = arrow::Status::OK();
+
+  auto flatten_lst = arrow::r::FlattenDots(lst, num_fields);
+  std::vector<std::unique_ptr<arrow::r::RConverter>> converters(num_fields);
+
+  // init converters
+  for (int j = 0; j < num_fields && status.ok(); j++) {
+    SEXP x = flatten_lst[j];
+
+    if (Rf_inherits(x, "ChunkedArray")) {
+      columns[j] = cpp11::as_cpp<std::shared_ptr<arrow::ChunkedArray>>(x);
+    } else if (Rf_inherits(x, "Array")) {
+      columns[j] = std::make_shared<arrow::ChunkedArray>(
+          cpp11::as_cpp<std::shared_ptr<arrow::Array>>(x));
+    } else {
+      arrow::r::RConversionOptions options;
+      options.strict = !infer_schema;
+      options.type = schema->field(j)->type();
+      options.size = vctrs::vec_size(x);
+
+      // first try to add a task to do a zero copy in parallel
+      if (arrow::r::vector_from_r_memory(x, options.type, columns, j, tasks)) {
+        continue;
+      }
+
+      // if unsuccessful: use RConverter api
+      auto converter_result =
+          arrow::MakeConverter<arrow::r::RConverter, arrow::r::RConverterTrait>(
+              options.type, options, gc_memory_pool());
+      if (!converter_result.ok()) {
+        status = converter_result.status();
+        break;
+      }
+      converters[j] = std::move(converter_result.ValueUnsafe());
+    }
+  }
+
+  // if the previous loop didn't break early, spawn
+  // tasks to Extend, maybe in parallel
+  if (status.ok()) {
+    for (int j = 0; j < num_fields; j++) {
+      auto& converter = converters[j];
+      if (converter != nullptr) {
+        converter->DelayedExtend(flatten_lst[j], converter->options().size, tasks);
+      }
+    }
+  }
+
+  // in any case, this needs to wait until all tasks are finished
+  status &= tasks.Finish();
+
+  // nothing is running in parallel here, so we have an opportunity to stop
+  StopIfNotOk(status);
+
+  // then finally convert to chunked arrays in parallel
+  tasks.Reset();
+
+  for (int j = 0; j < num_fields; j++) {
+    tasks.Append(true, [&columns, j, &converters]() {
+      auto& converter = converters[j];
+      if (converter != nullptr) {
+        ARROW_ASSIGN_OR_RAISE(auto array, converter->ToArray());
+        columns[j] = std::make_shared<arrow::ChunkedArray>(array);
+      }
+      return arrow::Status::OK();
+    });
+  }
+  status &= tasks.Finish();
+  StopIfNotOk(status);
+
+  status &= check_consistent_column_length(columns);
+  StopIfNotOk(status);
+
+  return arrow::Table::Make(schema, columns);
+}
+
 // [[arrow::export]]
 SEXP vec_to_arrow(SEXP x, SEXP s_type) {
   if (Rf_inherits(x, "Array")) return x;
diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp
index 9628d464874..81e20e9ec9a 100644
--- a/r/src/recordbatch.cpp
+++ b/r/src/recordbatch.cpp
@@ -93,25 +93,8 @@ std::shared_ptr<arrow::Array> RecordBatch__GetColumnByName(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__SelectColumns(
-    const std::shared_ptr<arrow::RecordBatch>& batch, cpp11::integers indices) {
-  R_xlen_t n = indices.size();
-  auto nrows = batch->num_rows();
-  auto ncols = batch->num_columns();
-
-  std::vector<std::shared_ptr<arrow::Field>> fields(n);
-  std::vector<std::shared_ptr<arrow::Array>> columns(n);
-
-  for (R_xlen_t i = 0; i < n; i++) {
-    int pos = indices[i];
-    if (pos < 0 || pos > ncols - 1) {
-      cpp11::stop("Invalid column index %d to select columns.", pos);
-    }
-    fields[i] = batch->schema()->field(pos);
-    columns[i] = batch->column(pos);
-  }
-
-  auto schema = std::make_shared<arrow::Schema>(std::move(fields));
-  return arrow::RecordBatch::Make(schema, nrows, columns);
+    const std::shared_ptr<arrow::RecordBatch>& batch, const std::vector<int>& indices) {
+  return ValueOrStop(batch->SelectColumns(indices));
 }
 
 // [[arrow::export]]
diff --git a/r/src/recordbatchreader.cpp b/r/src/recordbatchreader.cpp
index e2819daf89b..14af503b4a1 100644
--- a/r/src/recordbatchreader.cpp
+++ b/r/src/recordbatchreader.cpp
@@ -35,6 +35,22 @@ std::shared_ptr<arrow::RecordBatch> RecordBatchReader__ReadNext(
   return batch;
 }
 
+// [[arrow::export]]
+cpp11::list RecordBatchReader__batches(
+    const std::shared_ptr<arrow::RecordBatchReader>& reader) {
+  std::vector<std::shared_ptr<arrow::RecordBatch>> res;
+  StopIfNotOk(reader->ReadAll(&res));
+  return arrow::r::to_r_list(res);
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(
+    const std::shared_ptr<arrow::RecordBatchReader>& reader) {
+  std::shared_ptr<arrow::Table> table = nullptr;
+  StopIfNotOk(reader->ReadAll(&table));
+  return table;
+}
+
 // -------- RecordBatchStreamReader
 
 // [[arrow::export]]
@@ -45,22 +61,6 @@ std::shared_ptr<arrow::ipc::RecordBatchStreamReader> ipc___RecordBatchStreamRead
   return ValueOrStop(arrow::ipc::RecordBatchStreamReader::Open(stream, options));
 }
 
-// [[arrow::export]]
-cpp11::list ipc___RecordBatchStreamReader__batches(
-    const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>& reader) {
-  std::vector<std::shared_ptr<arrow::RecordBatch>> res;
-
-  while (true) {
-    std::shared_ptr<arrow::RecordBatch> batch;
-    StopIfNotOk(reader->ReadNext(&batch));
-    if (!batch) break;
-
-    res.push_back(batch);
-  }
-
-  return arrow::r::to_r_list(res);
-}
-
 // -------- RecordBatchFileReader
 
 // [[arrow::export]]
@@ -92,14 +92,6 @@ std::shared_ptr<arrow::ipc::RecordBatchFileReader> ipc___RecordBatchFileReader__
   return ValueOrStop(arrow::ipc::RecordBatchFileReader::Open(file, options));
 }
 
-// [[arrow::export]]
-std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(
-    const std::shared_ptr<arrow::RecordBatchReader>& reader) {
-  std::shared_ptr<arrow::Table> table = nullptr;
-  StopIfNotOk(reader->ReadAll(&table));
-  return table;
-}
-
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__from_RecordBatchFileReader(
     const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& reader) {
diff --git a/r/src/runtimeinfo.cpp b/r/src/runtimeinfo.cpp
deleted file mode 100644
index c18fa702342..00000000000
--- a/r/src/runtimeinfo.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "./arrow_types.h"
-
-#if defined(ARROW_R_WITH_ARROW)
-
-#include <arrow/config.h>
-
-// [[arrow::export]]
-std::vector<std::string> runtime_info() {
-  auto info = arrow::GetRuntimeInfo();
-  return {info.simd_level, info.detected_simd_level};
-}
-
-#endif
diff --git a/r/src/scalar.cpp b/r/src/scalar.cpp
index 057e587e7eb..5450a6f0ab7 100644
--- a/r/src/scalar.cpp
+++ b/r/src/scalar.cpp
@@ -70,8 +70,8 @@ SEXP Scalar__as_vector(const std::shared_ptr<arrow::Scalar>& scalar) {
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Array> MakeArrayFromScalar(
-    const std::shared_ptr<arrow::Scalar>& scalar) {
-  return ValueOrStop(arrow::MakeArrayFromScalar(*scalar, 1, gc_memory_pool()));
+    const std::shared_ptr<arrow::Scalar>& scalar, int n) {
+  return ValueOrStop(arrow::MakeArrayFromScalar(*scalar, n, gc_memory_pool()));
 }
 
 // [[arrow::export]]
diff --git a/r/src/symbols.cpp b/r/src/symbols.cpp
index 256f9e7acce..49f18271b57 100644
--- a/r/src/symbols.cpp
+++ b/r/src/symbols.cpp
@@ -36,15 +36,19 @@ SEXP symbols::create = Rf_install("create");
 
 // persistently protect `x` and return it
 SEXP precious(SEXP x) {
+  PROTECT(x);
   R_PreserveObject(x);
+  UNPROTECT(1);
   return x;
 }
 
 // returns the namespace environment for package `name`
-SEXP r_namespace(std::string name) {
+SEXP precious_namespace(std::string name) {
   SEXP s_name = PROTECT(cpp11::writable::strings({name}));
   SEXP ns = R_FindNamespace(s_name);
+  R_PreserveObject(ns);
   UNPROTECT(1);
+
   return ns;
 }
 SEXP data::classes_POSIXct = precious(cpp11::writable::strings({"POSIXct", "POSIXt"}));
@@ -71,9 +75,8 @@ SEXP data::classes_arrow_fixed_size_list = precious(cpp11::writable::strings(
     {"arrow_fixed_size_list", "vctrs_list_of", "vctrs_vctr", "list"}));
 
 SEXP data::names_metadata = precious(cpp11::writable::strings({"attributes", "columns"}));
-SEXP data::empty_raw = precious(Rf_allocVector(RAWSXP, 0));
 
-SEXP ns::arrow = precious(r_namespace("arrow"));
+SEXP ns::arrow = precious_namespace("arrow");
 
 void inspect(SEXP obj) {
   SEXP call_inspect = PROTECT(Rf_lang2(symbols::inspect, obj));
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 997d8f137cb..68adefcfd4a 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -150,21 +150,6 @@ std::shared_ptr<arrow::Table> Table__SelectColumns(
 namespace arrow {
 namespace r {
 
-arrow::Status check_consistent_column_length(
-    const std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns) {
-  if (columns.size()) {
-    int64_t num_rows = columns[0]->length();
-
-    for (const auto& column : columns) {
-      if (column->length() != num_rows) {
-        return arrow::Status::Invalid("All columns must have the same length");
-      }
-    }
-  }
-
-  return arrow::Status::OK();
-}
-
 arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields,
                                   std::shared_ptr<arrow::Schema>& schema) {
   // maybe a schema was given
@@ -269,33 +254,6 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
   return arrow::Status::OK();
 }
 
-arrow::Status CollectTableColumns(
-    SEXP lst, const std::shared_ptr<arrow::Schema>& schema, int num_fields, bool inferred,
-    std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns) {
-  if (!inferred && schema->num_fields() != num_fields) {
-    cpp11::stop("incompatible. schema has %d fields, and %d columns are supplied",
-                schema->num_fields(), num_fields);
-  }
-  auto extract_one_column = [&columns, &schema, inferred](int j, SEXP x,
-                                                          std::string name) {
-    if (!inferred && schema->field(j)->name() != name) {
-      cpp11::stop("field at index %d has name '%s' != '%s'", j + 1,
-                  schema->field(j)->name().c_str(), name.c_str());
-    }
-    if (Rf_inherits(x, "ChunkedArray")) {
-      columns[j] = cpp11::as_cpp<std::shared_ptr<arrow::ChunkedArray>>(x);
-    } else if (Rf_inherits(x, "Array")) {
-      columns[j] = std::make_shared<arrow::ChunkedArray>(
-          cpp11::as_cpp<std::shared_ptr<arrow::Array>>(x));
-    } else {
-      auto array = arrow::r::vec_to_arrow(x, schema->field(j)->type(), inferred);
-      columns[j] = std::make_shared<arrow::ChunkedArray>(array);
-    }
-  };
-  arrow::r::TraverseDots(lst, num_fields, extract_one_column);
-  return arrow::Status::OK();
-}
-
 }  // namespace r
 }  // namespace arrow
 
@@ -325,26 +283,4 @@ std::shared_ptr<arrow::Table> Table__from_record_batches(
   return tab;
 }
 
-// [[arrow::export]]
-std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp) {
-  bool infer_schema = !Rf_inherits(schema_sxp, "Schema");
-
-  int num_fields;
-  StopIfNotOk(arrow::r::count_fields(lst, &num_fields));
-
-  // schema + metadata
-  std::shared_ptr<arrow::Schema> schema;
-  StopIfNotOk(arrow::r::InferSchemaFromDots(lst, schema_sxp, num_fields, schema));
-  StopIfNotOk(arrow::r::AddMetadataFromDots(lst, num_fields, schema));
-
-  // table
-  std::vector<std::shared_ptr<arrow::ChunkedArray>> columns(num_fields);
-  StopIfNotOk(
-      arrow::r::CollectTableColumns(lst, schema, num_fields, infer_schema, columns));
-
-  StopIfNotOk(arrow::r::check_consistent_column_length(columns));
-
-  return arrow::Table::Make(schema, columns);
-}
-
 #endif
diff --git a/r/src/threadpool.cpp b/r/src/threadpool.cpp
index 0a2013d0304..fb5005517a4 100644
--- a/r/src/threadpool.cpp
+++ b/r/src/threadpool.cpp
@@ -48,4 +48,12 @@ void SetCpuThreadPoolCapacity(int threads) {
   StopIfNotOk(arrow::SetCpuThreadPoolCapacity(threads));
 }
 
+// [[arrow::export]]
+int GetIOThreadPoolCapacity() { return arrow::GetCpuThreadPoolCapacity(); }
+
+// [[arrow::export]]
+void SetIOThreadPoolCapacity(int threads) {
+  StopIfNotOk(arrow::SetCpuThreadPoolCapacity(threads));
+}
+
 #endif
diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp
index 93e51be6462..022a29ea5b2 100644
--- a/r/src/type_infer.cpp
+++ b/r/src/type_infer.cpp
@@ -179,7 +179,7 @@ std::shared_ptr<arrow::DataType> InferArrowType(SEXP x) {
     case REALSXP:
       return InferArrowTypeFromVector<REALSXP>(x);
     case RAWSXP:
-      return int8();
+      return uint8();
     case STRSXP:
       return InferArrowTypeFromVector<STRSXP>(x);
     case VECSXP:
diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R
index 89d9bf07ee6..5f2dad841a1 100644
--- a/r/tests/testthat/helper-arrow.R
+++ b/r/tests/testthat/helper-arrow.R
@@ -47,7 +47,7 @@ with_language <- function(lang, expr) {
   Sys.setenv(LANGUAGE = lang)
   on.exit({
     Sys.setenv(LANGUAGE = old)
-    dplyr_functions$i18ized_error_pattern <<- NULL
+    .cache$i18ized_error_pattern <<- NULL
   })
   if (!identical(before, i18ize_error_messages())) {
     skip(paste("This OS either does not support changing languages to", lang, "or it caches translations"))
@@ -67,3 +67,9 @@ test_that <- function(what, code) {
 r_only <- function(code) {
   withr::with_options(list(..skip.tests = FALSE), code)
 }
+
+make_temp_dir <- function() {
+  path <- tempfile()
+  dir.create(path)
+  normalizePath(path, winslash = "/")
+}
diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R
index 43b5bf0354f..e3d1d9ab162 100644
--- a/r/tests/testthat/helper-data.R
+++ b/r/tests/testthat/helper-data.R
@@ -109,15 +109,15 @@ verses <- list(
 
 make_big_string <- function() {
   # This creates a character vector that would exceed the capacity of BinaryArray
-  rep(purrr::map_chr(2047:2050, ~paste(sample(letters, ., replace = TRUE), collapse = "")), 2^18)
+  rep(purrr::map_chr(2047:2050, ~ paste(sample(letters, ., replace = TRUE), collapse = "")), 2^18)
 }
 
 make_random_string_of_size <- function(size = 1) {
-  purrr::map_chr(1000*size, ~paste(sample(letters, ., replace = TRUE), collapse = ""))
+  purrr::map_chr(1000 * size, ~ paste(sample(letters, ., replace = TRUE), collapse = ""))
 }
 
 make_string_of_size <- function(size = 1) {
-  paste(rep(letters, length = 1000*size), collapse = "")
+  paste(rep(letters, length = 1000 * size), collapse = "")
 }
 
 example_with_extra_metadata <- example_with_metadata
@@ -140,19 +140,20 @@ example_with_logical_factors <- tibble::tibble(
 # sort order. The Arrow C++ library orders strings lexicographically as byte
 # strings. The order of a string array sorted by Arrow will not match the order
 # of an equivalent character vector sorted by R unless you set the R collation
-# locale to "C" by running:
-#   Sys.setlocale("LC_COLLATE", "C")
+# locale to "C" by running:   Sys.setlocale("LC_COLLATE", "C")
 # These test scripts set that, but if you are running individual tests you might
 # need to set it manually. When finished, you can restore the default
-# collation locale by running:
-#   Sys.setlocale("LC_COLLATE")
+# collation locale by running:   Sys.setlocale("LC_COLLATE")
 # In the future, the string collation locale used by the Arrow C++ library might
 # be configurable (ARROW-12046).
 example_data_for_sorting <- tibble::tibble(
   int = c(-.Machine$integer.max, -101L, -100L, 0L, 0L, 1L, 100L, 1000L, .Machine$integer.max, NA_integer_),
-  dbl = c(-Inf, -.Machine$double.xmax, -.Machine$double.xmin, 0, .Machine$double.xmin, pi, .Machine$double.xmax, Inf, NaN, NA_real_),
+  dbl = c(
+    -Inf, -.Machine$double.xmax, -.Machine$double.xmin, 0, .Machine$double.xmin,
+    pi, .Machine$double.xmax, Inf, NaN, NA_real_
+  ),
   chr = c("", "", "\"", "&", "ABC", "NULL", "a", "abc", "zzz", NA_character_),
-  lgl = c(rep(FALSE, 4L), rep(TRUE, 5L), NA), # bool is not supported (ARROW-12016)
+  lgl = c(rep(FALSE, 4L), rep(TRUE, 5L), NA),
   dttm = lubridate::ymd_hms(c(
     "0000-01-01 00:00:00",
     "1919-05-29 13:08:55",
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 39cc9e0597a..72f07f32c96 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -15,18 +15,28 @@
 # specific language governing permissions and limitations
 # under the License.
 
-expect_vector <- function(x, y, ...) {
-  expect_equal(as.vector(x), y, ...)
+expect_as_vector <- function(x, y, ignore_attr = FALSE, ...) {
+  expect_fun <- if (ignore_attr) {
+    expect_equivalent
+  } else {
+    expect_equal
+  }
+  expect_fun(as.vector(x), y, ...)
 }
 
 expect_data_frame <- function(x, y, ...) {
   expect_equal(as.data.frame(x), y, ...)
 }
 
+expect_r6_class <- function(object, class) {
+  expect_s3_class(object, class)
+  expect_s3_class(object, "R6")
+}
+
 expect_equivalent <- function(object, expected, ...) {
   # HACK: dplyr includes an all.equal.tbl_df method that is causing failures.
   # They look spurious, like:
-  # `Can't join on 'b' x 'b' because of incompatible types (tbl_df/tbl/data.frame / tbl_df/tbl/data.frame)`
+  # `Can't join on 'b' x 'b' because of incompatible types (tbl_df/tbl/data.frame / tbl_df/tbl/data.frame)` # nolint
   if (tibble::is_tibble(object)) {
     class(object) <- "data.frame"
   }
@@ -47,7 +57,7 @@ expect_type_equal <- function(object, expected, ...) {
   expect_equal(object, expected, ..., label = object$ToString(), expected.label = expected$ToString())
 }
 
-expect_match_arg_error <- function(object, values=c()) {
+expect_match_arg_error <- function(object, values = c()) {
   expect_error(object, paste0("'arg' .*", paste(dQuote(values), collapse = ", ")))
 }
 
@@ -60,20 +70,40 @@ verify_output <- function(...) {
   testthat::verify_output(...)
 }
 
-expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its start
-                               tbl,  # A tbl/df as reference, will make RB/Table with
-                               skip_record_batch = NULL, # Msg, if should skip RB test
-                               skip_table = NULL,        # Msg, if should skip Table test
+#' @param expr A dplyr pipeline with `input` as its start
+#' @param tbl A tbl/df as reference, will make RB/Table with
+#' @param skip_record_batch string skip message, if should skip RB test
+#' @param skip_table string skip message, if should skip Table test
+#' @param warning string expected warning from the RecordBatch and Table paths,
+#'   passed to `expect_warning()`. Special values:
+#'     * `NA` (the default) for ensuring no warning message
+#'     * `TRUE` is a special case to mean to check for the
+#'      "not supported in Arrow; pulling data into R" message.
+#' @param ... additional arguments, passed to `expect_equivalent()`
+expect_dplyr_equal <- function(expr,
+                               tbl,
+                               skip_record_batch = NULL,
+                               skip_table = NULL,
+                               warning = NA,
                                ...) {
   expr <- rlang::enquo(expr)
   expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl)))
 
+  if (isTRUE(warning)) {
+    # Special-case the simple warning:
+    # TODO: ARROW-13362 pick one of in or by and use it everywhere
+    warning <- "not supported (in|by) Arrow; pulling data into R"
+  }
+
   skip_msg <- NULL
 
   if (is.null(skip_record_batch)) {
-    via_batch <- rlang::eval_tidy(
-      expr,
-      rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+    expect_warning(
+      via_batch <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+      ),
+      warning
     )
     expect_equivalent(via_batch, expected, ...)
   } else {
@@ -81,9 +111,12 @@ expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its star
   }
 
   if (is.null(skip_table)) {
-    via_table <- rlang::eval_tidy(
-      expr,
-      rlang::new_data_mask(rlang::env(input = Table$create(tbl)))
+    expect_warning(
+      via_table <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = Table$create(tbl)))
+      ),
+      warning
     )
     expect_equivalent(via_table, expected, ...)
   } else {
@@ -91,19 +124,42 @@ expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its star
   }
 
   if (!is.null(skip_msg)) {
-    skip(paste(skip_msg, collpase = "\n"))
+    skip(paste(skip_msg, collapse = "\n"))
   }
 }
 
 expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its start
-                               tbl,  # A tbl/df as reference, will make RB/Table with
+                               tbl, # A tbl/df as reference, will make RB/Table with
                                ...) {
+  # ensure we have supplied tbl
+  force(tbl)
+
   expr <- rlang::enquo(expr)
   msg <- tryCatch(
     rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl))),
-    error = function (e) conditionMessage(e)
+    error = function(e) {
+      msg <- conditionMessage(e)
+
+      # The error here is of the form:
+      #
+      # Problem with `filter()` input `..1`.
+      # x object 'b_var' not found
+      # ℹ Input `..1` is `chr == b_var`.
+      #
+      # but what we really care about is the `x` block
+      # so (temporarily) let's pull those blocks out when we find them
+      pattern <- i18ize_error_messages()
+
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
   )
-  expect_is(msg, "character", label = "dplyr on data.frame did not error")
+  # make sure msg is a character object (i.e. there has been an error)
+  # If it did not error, we would get a data.frame or whatever
+  # This expectation will tell us "dplyr on data.frame errored is not TRUE"
+  expect_true(identical(typeof(msg), "character"), label = "dplyr on data.frame errored")
 
   expect_error(
     rlang::eval_tidy(
@@ -124,13 +180,13 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
 }
 
 expect_vector_equal <- function(expr, # A vectorized R expression containing `input` as its input
-                               vec,  # A vector as reference, will make Array/ChunkedArray with
-                               skip_array = NULL, # Msg, if should skip Array test
-                               skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
-                               ...) {
+                                vec, # A vector as reference, will make Array/ChunkedArray with
+                                skip_array = NULL, # Msg, if should skip Array test
+                                skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
+                                ignore_attr = FALSE, # ignore attributes?
+                                ...) {
   expr <- rlang::enquo(expr)
   expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec)))
-
   skip_msg <- NULL
 
   if (is.null(skip_array)) {
@@ -138,26 +194,91 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
       expr,
       rlang::new_data_mask(rlang::env(input = Array$create(vec)))
     )
-    expect_vector(via_array, expected, ...)
+    expect_as_vector(via_array, expected, ignore_attr, ...)
   } else {
     skip_msg <- c(skip_msg, skip_array)
   }
 
   if (is.null(skip_chunked_array)) {
     # split input vector into two to exercise ChunkedArray with >1 chunk
-    vec_split <- length(vec) %/% 2
-    vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
-    vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
+    split_vector <- split_vector_as_list(vec)
+
     via_chunked <- rlang::eval_tidy(
       expr,
-      rlang::new_data_mask(rlang::env(input = ChunkedArray$create(vec1, vec2)))
+      rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+    )
+    expect_as_vector(via_chunked, expected, ignore_attr, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_chunked_array)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collapse = "\n"))
+  }
+}
+
+expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input
+                                vec, # A vector as reference, will make Array/ChunkedArray with
+                                skip_array = NULL, # Msg, if should skip Array test
+                                skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
+                                ...) {
+  expr <- rlang::enquo(expr)
+
+  msg <- tryCatch(
+    rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))),
+    error = function(e) {
+      msg <- conditionMessage(e)
+
+      pattern <- i18ize_error_messages()
+
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
+  )
+
+  expect_true(identical(typeof(msg), "character"), label = "vector errored")
+
+  skip_msg <- NULL
+
+  if (is.null(skip_array)) {
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = Array$create(vec)))
+      ),
+      msg,
+      ...
+    )
+  } else {
+    skip_msg <- c(skip_msg, skip_array)
+  }
+
+  if (is.null(skip_chunked_array)) {
+    # split input vector into two to exercise ChunkedArray with >1 chunk
+    split_vector <- split_vector_as_list(vec)
+
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+      ),
+      msg,
+      ...
     )
-    expect_vector(via_chunked, expected, ...)
   } else {
     skip_msg <- c(skip_msg, skip_chunked_array)
   }
 
   if (!is.null(skip_msg)) {
-    skip(paste(skip_msg, collpase = "\n"))
+    skip(paste(skip_msg, collapse = "\n"))
   }
 }
+
+split_vector_as_list <- function(vec) {
+  vec_split <- length(vec) %/% 2
+  vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
+  vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
+  list(vec1, vec2)
+}
diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index 6fb97da000d..906963e38d1 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -22,6 +22,11 @@ build_features <- c(
 )
 
 skip_if_not_available <- function(feature) {
+  if (feature == "re2") {
+    # RE2 does not support valgrind (on purpose): https://github.com/google/re2/issues/177
+    skip_on_valgrind()
+  }
+
   yes <- feature %in% names(build_features) && build_features[feature]
   if (!yes) {
     skip(paste("Arrow C++ not built with", feature))
@@ -29,6 +34,8 @@ skip_if_not_available <- function(feature) {
 }
 
 skip_if_no_pyarrow <- function() {
+  skip_on_valgrind()
+
   skip_if_not_installed("reticulate")
   if (!reticulate::py_module_available("pyarrow")) {
     skip("pyarrow not available for testing")
@@ -49,6 +56,18 @@ skip_if_not_running_large_memory_tests <- function() {
   )
 }
 
+skip_on_valgrind <- function() {
+  # This does not actually skip on valgrind because we can't exactly detect it.
+  # Instead, it skips on CRAN when the OS is linux + and the R version is development
+  # (which is where valgrind is run as of this code)
+  linux_dev <- identical(tolower(Sys.info()[["sysname"]]), "linux") &&
+    grepl("devel", R.version.string)
+
+  if (linux_dev) {
+    skip_on_cran()
+  }
+}
+
 process_is_running <- function(x) {
   cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x)
   tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE)
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 35ae357f703..a2fd7bfec86 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -53,6 +53,7 @@ test_that("binary Array", {
   expect_array_roundtrip(bin, fixed_size_binary(byte_width = 10))
 
   # degenerate cases
+  skip_on_valgrind() # valgrind errors on these tests ARROW-12638
   bin <- vctrs::new_vctr(
     list(1:10),
     class = "arrow_binary"
@@ -94,13 +95,13 @@ test_that("Slice() and RangeEquals()", {
   y <- x$Slice(10)
   expect_equal(y$type, int32())
   expect_equal(length(y), 15L)
-  expect_vector(y, c(101:110, 201:205))
+  expect_as_vector(y, c(101:110, 201:205))
   expect_true(x$RangeEquals(y, 10, 24))
   expect_false(x$RangeEquals(y, 9, 23))
   expect_false(x$RangeEquals(y, 11, 24))
 
   z <- x$Slice(10, 5)
-  expect_vector(z, c(101:105))
+  expect_as_vector(z, c(101:105))
   expect_true(x$RangeEquals(z, 10, 15, 0))
 
   # Input validation
@@ -131,10 +132,11 @@ test_that("Slice() and RangeEquals()", {
   expect_error(x$RangeEquals(y, 10, NA), "'end_idx' cannot be NA")
   expect_error(x$RangeEquals(y, 10, 24, NA), "'other_start_idx' cannot be NA")
   expect_error(x$RangeEquals(y, "ten", 24))
-  # TODO (if anyone uses RangeEquals)
-  # expect_error(x$RangeEquals(y, 10, 2400, 0)) # does not error
-  # expect_error(x$RangeEquals(y, 1000, 24, 0)) # does not error
-  # expect_error(x$RangeEquals(y, 10, 24, 1000)) # does not error
+
+  skip("TODO: (if anyone uses RangeEquals)")
+  expect_error(x$RangeEquals(y, 10, 2400, 0)) # does not error
+  expect_error(x$RangeEquals(y, 1000, 24, 0)) # does not error
+  expect_error(x$RangeEquals(y, 10, 24, 1000)) # does not error
 })
 
 test_that("Double Array", {
@@ -263,7 +265,7 @@ test_that("array supports POSIXct (ARROW-3340)", {
 test_that("array supports POSIXct without timezone", {
   # Make sure timezone is not set
   withr::with_envvar(c(TZ = ""), {
-    times <- strptime("2019-02-03 12:34:56", format="%Y-%m-%d %H:%M:%S") + 1:10
+    times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
     expect_array_roundtrip(times, timestamp("us", ""))
 
     # Also test the INTSXP code path
@@ -278,9 +280,9 @@ test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", {
   # Write a feather file as that's what the initial bug report used
   df <- tibble::tibble(
     no_tz = lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10,
-    yes_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Asia/Pyongyang") + 1:10
+    yes_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas") + 1:10
   )
-  if (!identical(Sys.timezone(), "Asia/Pyongyang")) {
+  if (!identical(Sys.timezone(), "Pacific/Marquesas")) {
     # Confirming that the columns are in fact different
     expect_false(any(df$no_tz == df$yes_tz))
   }
@@ -316,6 +318,23 @@ test_that("support for NaN (ARROW-3615)", {
   expect_equal(y$null_count, 1L)
 })
 
+test_that("is.nan() evalutes to FALSE on NA (for consistency with base R)", {
+  x <- c(1.0, NA, NaN, -1.0)
+  expect_vector_equal(is.nan(input), x)
+})
+
+test_that("is.nan() evalutes to FALSE on non-floats (for consistency with base R)", {
+  x <- c(1L, 2L, 3L)
+  y <- c("foo", "bar")
+  expect_vector_equal(is.nan(input), x)
+  expect_vector_equal(is.nan(input), y)
+})
+
+test_that("is.na() evalutes to TRUE on NaN (for consistency with base R)", {
+  x <- c(1, NA, NaN, -1)
+  expect_vector_equal(is.na(input), x)
+})
+
 test_that("integer types casts (ARROW-3741)", {
   # Defining some type groups for use here and in the following tests
   int_types <- c(int8(), int16(), int32(), int64())
@@ -457,7 +476,10 @@ test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", {
   expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8()))
   expect_equivalent(as.vector(a), df)
 
-  df <- structure(list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")), class = "data.frame", row.names = c(NA, -1L))
+  df <- structure(
+    list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")),
+    class = "data.frame", row.names = c(NA, -1L)
+  )
   a <- Array$create(df)
   expect_type_equal(a$type, struct(col = list_of(list_of(list_of(float64())))))
   expect_equivalent(as.vector(a), df)
@@ -480,10 +502,16 @@ test_that("Array$create() can handle data frame with custom struct type (not inf
   expect_type_equal(a$type, type)
 
   type <- struct(x = float64(), y = int16(), z = int32())
-  expect_error(Array$create(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame")
+  expect_error(
+    Array$create(df, type = type),
+    regexp = "Number of fields in struct.* incompatible with number of columns in the data frame"
+  )
 
   type <- struct(y = int16(), x = float64())
-  expect_error(Array$create(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame")
+  expect_error(
+    Array$create(df, type = type),
+    regexp = "Field name in position.*does not match the name of the column of the data frame"
+  )
 
   type <- struct(x = float64(), y = utf8())
   expect_error(Array$create(df, type = type), regexp = "Invalid")
@@ -496,7 +524,7 @@ test_that("Array$create() supports tibble with no columns (ARROW-8354)", {
 
 test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
   # Should be able to create an empty list with a type hint.
-  expect_is(Array$create(list(), list_of(bool())), "ListArray")
+  expect_r6_class(Array$create(list(), list_of(bool())), "ListArray")
 
   # logical
   expect_array_roundtrip(list(NA), list_of(bool()))
@@ -542,35 +570,63 @@ test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
 
 test_that("Array$create() handles vector -> large list arrays", {
   # Should be able to create an empty list with a type hint.
-  expect_is(Array$create(list(), type = large_list_of(bool())), "LargeListArray")
+  expect_r6_class(Array$create(list(), type = large_list_of(bool())), "LargeListArray")
 
   # logical
   expect_array_roundtrip(list(NA), large_list_of(bool()), as = large_list_of(bool()))
   expect_array_roundtrip(list(logical(0)), large_list_of(bool()), as = large_list_of(bool()))
   expect_array_roundtrip(list(c(TRUE), c(FALSE), c(FALSE, TRUE)), large_list_of(bool()), as = large_list_of(bool()))
-  expect_array_roundtrip(list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)), large_list_of(bool()), as = large_list_of(bool()))
+  expect_array_roundtrip(
+    list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)),
+    large_list_of(bool()),
+    as = large_list_of(bool())
+  )
 
   # integer
   expect_array_roundtrip(list(NA_integer_), large_list_of(int32()), as = large_list_of(int32()))
   expect_array_roundtrip(list(integer(0)), large_list_of(int32()), as = large_list_of(int32()))
   expect_array_roundtrip(list(1:2, 3:4, 12:18), large_list_of(int32()), as = large_list_of(int32()))
-  expect_array_roundtrip(list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)), large_list_of(int32()), as = large_list_of(int32()))
+  expect_array_roundtrip(
+    list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)),
+    large_list_of(int32()),
+    as = large_list_of(int32())
+  )
 
   # numeric
   expect_array_roundtrip(list(NA_real_), large_list_of(float64()), as = large_list_of(float64()))
   expect_array_roundtrip(list(numeric(0)), large_list_of(float64()), as = large_list_of(float64()))
   expect_array_roundtrip(list(1, c(2, 3), 4), large_list_of(float64()), as = large_list_of(float64()))
-  expect_array_roundtrip(list(1, numeric(0), c(2, 3, NA_real_), 4), large_list_of(float64()), as = large_list_of(float64()))
+  expect_array_roundtrip(
+    list(1, numeric(0), c(2, 3, NA_real_), 4),
+    large_list_of(float64()),
+    as = large_list_of(float64())
+  )
 
   # character
   expect_array_roundtrip(list(NA_character_), large_list_of(utf8()), as = large_list_of(utf8()))
   expect_array_roundtrip(list(character(0)), large_list_of(utf8()), as = large_list_of(utf8()))
-  expect_array_roundtrip(list("itsy", c("bitsy", "spider"), c("is")), large_list_of(utf8()), as = large_list_of(utf8()))
-  expect_array_roundtrip(list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")), large_list_of(utf8()), as = large_list_of(utf8()))
+  expect_array_roundtrip(
+    list("itsy", c("bitsy", "spider"), c("is")),
+    large_list_of(utf8()),
+    as = large_list_of(utf8())
+  )
+  expect_array_roundtrip(
+    list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")),
+    large_list_of(utf8()),
+    as = large_list_of(utf8())
+  )
 
   # factor
-  expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), large_list_of(dictionary(int8(), utf8())), as = large_list_of(dictionary(int8(), utf8())))
-  expect_array_roundtrip(list(factor(NA, levels = c("a", "b"))), large_list_of(dictionary(int8(), utf8())), as = large_list_of(dictionary(int8(), utf8())))
+  expect_array_roundtrip(
+    list(factor(c("b", "a"), levels = c("a", "b"))),
+    large_list_of(dictionary(int8(), utf8())),
+    as = large_list_of(dictionary(int8(), utf8()))
+  )
+  expect_array_roundtrip(
+    list(factor(NA, levels = c("a", "b"))),
+    large_list_of(dictionary(int8(), utf8())),
+    as = large_list_of(dictionary(int8(), utf8()))
+  )
 
   # struct
   expect_array_roundtrip(
@@ -587,29 +643,57 @@ test_that("Array$create() handles vector -> large list arrays", {
 
 test_that("Array$create() handles vector -> fixed size list arrays", {
   # Should be able to create an empty list with a type hint.
-  expect_is(Array$create(list(), type = fixed_size_list_of(bool(), 20)), "FixedSizeListArray")
+  expect_r6_class(Array$create(list(), type = fixed_size_list_of(bool(), 20)), "FixedSizeListArray")
 
   # logical
   expect_array_roundtrip(list(NA), fixed_size_list_of(bool(), 1L), as = fixed_size_list_of(bool(), 1L))
-  expect_array_roundtrip(list(c(TRUE, FALSE), c(FALSE, TRUE)), fixed_size_list_of(bool(), 2L), as = fixed_size_list_of(bool(), 2L))
-  expect_array_roundtrip(list(c(TRUE), c(FALSE), NA), fixed_size_list_of(bool(), 1L), as = fixed_size_list_of(bool(), 1L))
+  expect_array_roundtrip(
+    list(c(TRUE, FALSE), c(FALSE, TRUE)),
+    fixed_size_list_of(bool(), 2L),
+    as = fixed_size_list_of(bool(), 2L)
+  )
+  expect_array_roundtrip(
+    list(c(TRUE), c(FALSE), NA),
+    fixed_size_list_of(bool(), 1L),
+    as = fixed_size_list_of(bool(), 1L)
+  )
 
   # integer
   expect_array_roundtrip(list(NA_integer_), fixed_size_list_of(int32(), 1L), as = fixed_size_list_of(int32(), 1L))
   expect_array_roundtrip(list(1:2, 3:4, 11:12), fixed_size_list_of(int32(), 2L), as = fixed_size_list_of(int32(), 2L))
-  expect_array_roundtrip(list(c(1:2), c(NA_integer_, 3L)), fixed_size_list_of(int32(), 2L), as = fixed_size_list_of(int32(), 2L))
+  expect_array_roundtrip(
+    list(c(1:2), c(NA_integer_, 3L)),
+    fixed_size_list_of(int32(), 2L),
+    as = fixed_size_list_of(int32(), 2L)
+  )
 
   # numeric
   expect_array_roundtrip(list(NA_real_), fixed_size_list_of(float64(), 1L), as = fixed_size_list_of(float64(), 1L))
-  expect_array_roundtrip(list(c(1,2), c(2, 3)), fixed_size_list_of(float64(), 2L), as = fixed_size_list_of(float64(), 2L))
-  expect_array_roundtrip(list(c(1,2), c(NA_real_, 4)), fixed_size_list_of(float64(), 2L), as = fixed_size_list_of(float64(), 2L))
+  expect_array_roundtrip(
+    list(c(1, 2), c(2, 3)),
+    fixed_size_list_of(float64(), 2L),
+    as = fixed_size_list_of(float64(), 2L)
+  )
+  expect_array_roundtrip(
+    list(c(1, 2), c(NA_real_, 4)),
+    fixed_size_list_of(float64(), 2L),
+    as = fixed_size_list_of(float64(), 2L)
+  )
 
   # character
   expect_array_roundtrip(list(NA_character_), fixed_size_list_of(utf8(), 1L), as = fixed_size_list_of(utf8(), 1L))
-  expect_array_roundtrip(list(c("itsy", "bitsy"), c("spider", "is"), c(NA_character_, NA_character_), c("", "")), fixed_size_list_of(utf8(), 2L), as = fixed_size_list_of(utf8(), 2L))
+  expect_array_roundtrip(
+    list(c("itsy", "bitsy"), c("spider", "is"), c(NA_character_, NA_character_), c("", "")),
+    fixed_size_list_of(utf8(), 2L),
+    as = fixed_size_list_of(utf8(), 2L)
+  )
 
   # factor
-  expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), fixed_size_list_of(dictionary(int8(), utf8()), 2L), as = fixed_size_list_of(dictionary(int8(), utf8()), 2L))
+  expect_array_roundtrip(
+    list(factor(c("b", "a"), levels = c("a", "b"))),
+    fixed_size_list_of(dictionary(int8(), utf8()), 2L),
+    as = fixed_size_list_of(dictionary(int8(), utf8()), 2L)
+  )
 
   # struct
   expect_array_roundtrip(
@@ -636,8 +720,10 @@ test_that("Handling string data with embedded nuls", {
     as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
     as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
     as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
-    as.raw(c(0x74, 0x76))),
-    class = c("arrow_binary", "vctrs_vctr", "list"))
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
   expect_error(
     rawToChar(raws[[3]]),
     "embedded nul in string: 'ma\\0n'", # See?
@@ -646,7 +732,10 @@ test_that("Handling string data with embedded nuls", {
   array_with_nul <- Array$create(raws)$cast(utf8())
   expect_error(
     as.vector(array_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+      "to R, set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
@@ -708,12 +797,12 @@ test_that("Array$Take()", {
 test_that("[ method on Array", {
   vec <- 11:20
   a <- Array$create(vec)
-  expect_vector(a[5:9], vec[5:9])
-  expect_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
-  expect_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
-  expect_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
-  expect_vector(a[-4], vec[-4])
-  expect_vector(a[-1], vec[-1])
+  expect_as_vector(a[5:9], vec[5:9])
+  expect_as_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
+  expect_as_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
+  expect_as_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
+  expect_as_vector(a[-4], vec[-4])
+  expect_as_vector(a[-1], vec[-1])
 })
 
 test_that("[ accepts Arrays and otherwise handles bad input", {
@@ -724,12 +813,12 @@ test_that("[ accepts Arrays and otherwise handles bad input", {
     a[Array$create(ind)],
     "Cannot extract rows with an Array of type double"
   )
-  expect_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
-  expect_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
-  expect_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])
+  expect_as_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
+  expect_as_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
+  expect_as_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])
 
   filt <- seq_along(vec) %in% ind
-  expect_vector(a[Array$create(filt)], vec[filt])
+  expect_as_vector(a[Array$create(filt)], vec[filt])
 
   expect_error(
     a["string"],
@@ -754,21 +843,21 @@ test_that("[ accepts Expressions", {
   vec <- 11:20
   a <- Array$create(vec)
   b <- Array$create(1:10)
-  expect_vector(a[b > 4], vec[5:10])
+  expect_as_vector(a[b > 4], vec[5:10])
 })
 
 test_that("Array head/tail", {
   vec <- 11:20
   a <- Array$create(vec)
-  expect_vector(head(a), head(vec))
-  expect_vector(head(a, 4), head(vec, 4))
-  expect_vector(head(a, 40), head(vec, 40))
-  expect_vector(head(a, -4), head(vec, -4))
-  expect_vector(head(a, -40), head(vec, -40))
-  expect_vector(tail(a), tail(vec))
-  expect_vector(tail(a, 4), tail(vec, 4))
-  expect_vector(tail(a, 40), tail(vec, 40))
-  expect_vector(tail(a, -40), tail(vec, -40))
+  expect_as_vector(head(a), head(vec))
+  expect_as_vector(head(a, 4), head(vec, 4))
+  expect_as_vector(head(a, 40), head(vec, 40))
+  expect_as_vector(head(a, -4), head(vec, -4))
+  expect_as_vector(head(a, -40), head(vec, -40))
+  expect_as_vector(tail(a), tail(vec))
+  expect_as_vector(tail(a, 4), tail(vec, 4))
+  expect_as_vector(tail(a, 40), tail(vec, 40))
+  expect_as_vector(tail(a, -40), tail(vec, -40))
 })
 
 test_that("Dictionary array: create from arrays, not factor", {
@@ -818,3 +907,23 @@ test_that("auto int64 conversion to int can be disabled (ARROW-10093)", {
     expect_true(inherits(as.data.frame(batch)$x, "integer64"))
   })
 })
+
+
+test_that("Array to C-interface", {
+  # create a struct array since that's one of the more complicated array types
+  df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
+  arr <- Array$create(df)
+
+  # export the array via the C-interface
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  arr$export_to_c(array_ptr, schema_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Array$import_from_c(array_ptr, schema_ptr)
+  expect_equal(arr, circle)
+
+  # must clean up the pointers or we leak
+  delete_arrow_schema(schema_ptr)
+  delete_arrow_array(array_ptr)
+})
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index b71c07b78c2..dc327c07981 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -15,8 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-context("RecordBatch")
-
 test_that("RecordBatch", {
   # Note that we're reusing `tbl` and `batch` throughout the tests in this file
   tbl <- tibble::tibble(
@@ -54,27 +52,27 @@ test_that("RecordBatch", {
   expect_error(batch$column_name("one"))
 
   col_int <- batch$column(0)
-  expect_true(inherits(col_int, 'Array'))
+  expect_true(inherits(col_int, "Array"))
   expect_equal(col_int$as_vector(), tbl$int)
   expect_equal(col_int$type, int32())
 
   col_dbl <- batch$column(1)
-  expect_true(inherits(col_dbl, 'Array'))
+  expect_true(inherits(col_dbl, "Array"))
   expect_equal(col_dbl$as_vector(), tbl$dbl)
   expect_equal(col_dbl$type, float64())
 
   col_lgl <- batch$column(2)
-  expect_true(inherits(col_dbl, 'Array'))
+  expect_true(inherits(col_dbl, "Array"))
   expect_equal(col_lgl$as_vector(), tbl$lgl)
   expect_equal(col_lgl$type, boolean())
 
   col_chr <- batch$column(3)
-  expect_true(inherits(col_chr, 'Array'))
+  expect_true(inherits(col_chr, "Array"))
   expect_equal(col_chr$as_vector(), tbl$chr)
   expect_equal(col_chr$type, utf8())
 
   col_fct <- batch$column(4)
-  expect_true(inherits(col_fct, 'Array'))
+  expect_true(inherits(col_fct, "Array"))
   expect_equal(col_fct$as_vector(), tbl$fct)
   expect_equal(col_fct$type, dictionary(int8(), utf8()))
 
@@ -91,7 +89,7 @@ test_that("RecordBatch", {
     schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8()))
   )
   expect_equal(batch2$column(0), batch$column(1))
-  expect_data_frame(batch2, tbl[,-1])
+  expect_data_frame(batch2, tbl[, -1])
 
   # input validation
   expect_error(batch$RemoveColumn(NA), "'i' cannot be NA")
@@ -111,10 +109,10 @@ test_that("RecordBatch S3 methods", {
 
 test_that("RecordBatch$Slice", {
   batch3 <- batch$Slice(5)
-  expect_data_frame(batch3, tbl[6:10,])
+  expect_data_frame(batch3, tbl[6:10, ])
 
   batch4 <- batch$Slice(5, 2)
-  expect_data_frame(batch4, tbl[6:7,])
+  expect_data_frame(batch4, tbl[6:7, ])
 
   # Input validation
   expect_error(batch$Slice("ten"))
@@ -133,37 +131,37 @@ test_that("RecordBatch$Slice", {
 })
 
 test_that("[ on RecordBatch", {
-  expect_data_frame(batch[6:7,], tbl[6:7,])
-  expect_data_frame(batch[c(6, 7),], tbl[6:7,])
+  expect_data_frame(batch[6:7, ], tbl[6:7, ])
+  expect_data_frame(batch[c(6, 7), ], tbl[6:7, ])
   expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
   expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
   expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr)
   expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
   expect_data_frame(
-    batch[rep(c(FALSE, TRUE), 5),],
-    tbl[c(2, 4, 6, 8, 10),]
+    batch[rep(c(FALSE, TRUE), 5), ],
+    tbl[c(2, 4, 6, 8, 10), ]
   )
   # bool Array
-  expect_data_frame(batch[batch$lgl,], tbl[tbl$lgl,])
+  expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
   # int Array
   expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
 
   # input validation
   expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
-  expect_error(batch[, c(6, NA)], 'Column indices cannot be NA')
-  expect_error(batch[, c(2, -2)], 'Invalid column index')
+  expect_error(batch[, c(6, NA)], "Column indices cannot be NA")
+  expect_error(batch[, c(2, -2)], "Invalid column index")
 })
 
 test_that("[[ and $ on RecordBatch", {
-  expect_vector(batch[["int"]], tbl$int)
-  expect_vector(batch$int, tbl$int)
-  expect_vector(batch[[4]], tbl$chr)
+  expect_as_vector(batch[["int"]], tbl$int)
+  expect_as_vector(batch$int, tbl$int)
+  expect_as_vector(batch[[4]], tbl$chr)
   expect_null(batch$qwerty)
   expect_null(batch[["asdf"]])
   expect_error(batch[[c(4, 3)]])
   expect_error(batch[[NA]], "'i' must be character or numeric, not logical")
   expect_error(batch[[NULL]], "'i' must be character or numeric, not NULL")
-  expect_error(batch[[c("asdf", "jkl;")]], 'name is not a string', fixed = TRUE)
+  expect_error(batch[[c("asdf", "jkl;")]], "name is not a string", fixed = TRUE)
 })
 
 test_that("[[<- assignment", {
@@ -190,16 +188,16 @@ test_that("[[<- assignment", {
 
   # can replace a column by index
   batch[[2]] <- as.numeric(10:1)
-  expect_vector(batch[[2]], as.numeric(10:1))
+  expect_as_vector(batch[[2]], as.numeric(10:1))
 
   # can add a column by index
   batch[[5]] <- as.numeric(10:1)
-  expect_vector(batch[[5]], as.numeric(10:1))
-  expect_vector(batch[["5"]], as.numeric(10:1))
+  expect_as_vector(batch[[5]], as.numeric(10:1))
+  expect_as_vector(batch[["5"]], as.numeric(10:1))
 
   # can replace a column
   batch[["int"]] <- 10:1
-  expect_vector(batch[["int"]], 10:1)
+  expect_as_vector(batch[["int"]], 10:1)
 
   # can use $
   batch$new <- NULL
@@ -207,11 +205,11 @@ test_that("[[<- assignment", {
   expect_identical(dim(batch), c(10L, 4L))
 
   batch$int <- 1:10
-  expect_vector(batch$int, 1:10)
+  expect_as_vector(batch$int, 1:10)
 
   # recycling
   batch[["atom"]] <- 1L
-  expect_vector(batch[["atom"]], rep(1L, 10))
+  expect_as_vector(batch[["atom"]], rep(1L, 10))
 
   expect_error(
     batch[["atom"]] <- 1:6,
@@ -221,7 +219,7 @@ test_that("[[<- assignment", {
   # assign Arrow array
   array <- Array$create(c(10:1))
   batch$array <- array
-  expect_vector(batch$array, 10:1)
+  expect_as_vector(batch$array, 10:1)
 
   # nonsense indexes
   expect_error(batch[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
@@ -327,7 +325,7 @@ test_that("record_batch(schema=) does some basic consistency checking of the sch
 })
 
 test_that("RecordBatch dim() and nrow() (ARROW-3816)", {
-  batch <- record_batch(x = 1:10, y  = 1:10)
+  batch <- record_batch(x = 1:10, y = 1:10)
   expect_equal(dim(batch), c(10L, 2L))
   expect_equal(nrow(batch), 10L)
 })
@@ -413,17 +411,51 @@ test_that("record_batch() only auto splice data frames", {
 
 test_that("record_batch() handles null type (ARROW-7064)", {
   batch <- record_batch(a = 1:10, n = vctrs::unspecified(10))
-  expect_equivalent(batch$schema,  schema(a = int32(), n = null()))
+  expect_equivalent(batch$schema, schema(a = int32(), n = null()))
 })
 
-test_that("record_batch() scalar recycling", {
-  skip("Not implemented (ARROW-11705)")
+test_that("record_batch() scalar recycling with vectors", {
   expect_data_frame(
     record_batch(a = 1:10, b = 5),
     tibble::tibble(a = 1:10, b = 5)
   )
 })
 
+test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Scalar$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Array$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("record_batch() no recycling with tibbles", {
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1, b = 5)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+})
+
 test_that("RecordBatch$Equals", {
   df <- tibble::tibble(x = 1:10, y = letters[1:10])
   a <- record_batch(df)
@@ -436,10 +468,10 @@ test_that("RecordBatch$Equals", {
 test_that("RecordBatch$Equals(check_metadata)", {
   df <- tibble::tibble(x = 1:2, y = c("a", "b"))
   rb1 <- record_batch(df)
-  rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some="metadata")))
+  rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some = "metadata")))
 
-  expect_is(rb1, "RecordBatch")
-  expect_is(rb2, "RecordBatch")
+  expect_r6_class(rb1, "RecordBatch")
+  expect_r6_class(rb2, "RecordBatch")
   expect_false(rb1$schema$HasMetadata)
   expect_true(rb2$schema$HasMetadata)
   expect_identical(rb2$schema$metadata, list(some = "metadata"))
@@ -448,8 +480,8 @@ test_that("RecordBatch$Equals(check_metadata)", {
   expect_true(rb1$Equals(rb2))
   expect_false(rb1$Equals(rb2, check_metadata = TRUE))
 
-  expect_failure(expect_equal(rb1, rb2))  # expect_equal has check_metadata=TRUE
-  expect_equivalent(rb1, rb2)  # expect_equivalent has check_metadata=FALSE
+  expect_failure(expect_equal(rb1, rb2)) # expect_equal has check_metadata=TRUE
+  expect_equivalent(rb1, rb2) # expect_equivalent has check_metadata=FALSE
 
   expect_false(rb1$Equals(24)) # Not a RecordBatch
 })
@@ -468,8 +500,7 @@ test_that("RecordBatch name assignment", {
 
 test_that("record_batch() with different length arrays", {
   msg <- "All arrays must have the same length"
-  expect_error(record_batch(a=1:5, b = 42), msg)
-  expect_error(record_batch(a=1:5, b = 1:6), msg)
+  expect_error(record_batch(a = 1:5, b = 1:6), msg)
 })
 
 test_that("Handling string data with embedded nuls", {
@@ -478,13 +509,18 @@ test_that("Handling string data with embedded nuls", {
     as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
     as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
     as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
-    as.raw(c(0x74, 0x76))),
-    class = c("arrow_binary", "vctrs_vctr", "list"))
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
   batch_with_nul <- record_batch(a = 1:5, b = raws)
   batch_with_nul$b <- batch_with_nul$b$cast(utf8())
   expect_error(
     as.data.frame(batch_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
@@ -498,4 +534,86 @@ test_that("Handling string data with embedded nuls", {
       fixed = TRUE
     )
   })
-})
\ No newline at end of file
+})
+
+test_that("ARROW-11769 - grouping preserved in record batch creation", {
+  skip_if_not_available("dataset")
+
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  expect_identical(
+    tbl %>%
+      dplyr::group_by(fct, fct2) %>%
+      record_batch() %>%
+      dplyr::group_vars(),
+    c("fct", "fct2")
+  )
+})
+
+test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  rb <- record_batch(!!!tbl)
+
+  expect_identical(length(rb), 3L)
+})
+
+test_that("RecordBatchReader to C-interface", {
+  skip_if_not_available("dataset")
+
+  tab <- Table$create(example_data)
+
+  # export the RecordBatchReader via the C-interface
+  stream_ptr <- allocate_arrow_array_stream()
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  reader$export_to_c(stream_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+  tab_from_c_new <- circle$read_table()
+  expect_equal(tab, tab_from_c_new)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr)
+
+  # export the RecordBatchStreamReader via the C-interface
+  stream_ptr_new <- allocate_arrow_array_stream()
+  bytes <- write_to_raw(example_data)
+  expect_type(bytes, "raw")
+  reader_new <- RecordBatchStreamReader$create(bytes)
+  reader_new$export_to_c(stream_ptr_new)
+
+  # then import it and check that the roundtripped value is the same
+  circle_new <- RecordBatchStreamReader$import_from_c(stream_ptr_new)
+  tab_from_c_new <- circle_new$read_table()
+  expect_equal(tab, tab_from_c_new)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr_new)
+})
+
+test_that("RecordBatch to C-interface", {
+  batch <- RecordBatch$create(example_data)
+
+  # export the RecordBatch via the C-interface
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  batch$export_to_c(array_ptr, schema_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatch$import_from_c(array_ptr, schema_ptr)
+  expect_equal
+
+  # must clean up the pointers or we leak
+  delete_arrow_schema(schema_ptr)
+  delete_arrow_array(array_ptr)
+})
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 7a0b8bd6c02..00ba4036164 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -15,8 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-context("Table")
-
 test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
   tbl <- tibble::tibble(
     int = 1:10, dbl = as.numeric(1:10),
@@ -72,7 +70,7 @@ test_that("Table S3 methods", {
 })
 
 test_that("Table $column and $field", {
-  tab <- Table$create(x = 1:10, y  = 1:10)
+  tab <- Table$create(x = 1:10, y = 1:10)
 
   expect_equal(tab$field(0), field("x", int32()))
 
@@ -102,30 +100,30 @@ test_that("[, [[, $ for Table", {
 
   expect_identical(names(tab), names(tbl))
 
-  expect_data_frame(tab[6:7,], tbl[6:7,])
+  expect_data_frame(tab[6:7, ], tbl[6:7, ])
   expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
   expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
-  expect_vector(tab[, "chr", drop = TRUE], tbl$chr)
+  expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr)
   # Take within a single chunk
   expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
-  expect_data_frame(tab[rep(c(FALSE, TRUE), 5),], tbl[c(2, 4, 6, 8, 10),])
+  expect_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ])
   # bool ChunkedArray (with one chunk)
-  expect_data_frame(tab[tab$lgl,], tbl[tbl$lgl,])
+  expect_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ])
   # ChunkedArray with multiple chunks
   c1 <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
   c2 <- c(FALSE, FALSE, TRUE, TRUE, FALSE)
   ca <- ChunkedArray$create(c1, c2)
-  expect_data_frame(tab[ca,], tbl[c(1, 3, 4, 8, 9),])
+  expect_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ])
   # int Array
   expect_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
   # ChunkedArray
   expect_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
   # Expression
-  expect_data_frame(tab[tab$int > 6,], tbl[tbl$int > 6,])
+  expect_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ])
 
-  expect_vector(tab[["int"]], tbl$int)
-  expect_vector(tab$int, tbl$int)
-  expect_vector(tab[[4]], tbl$chr)
+  expect_as_vector(tab[["int"]], tbl$int)
+  expect_as_vector(tab$int, tbl$int)
+  expect_as_vector(tab[[4]], tbl$chr)
   expect_null(tab$qwerty)
   expect_null(tab[["asdf"]])
   # List-like column slicing
@@ -136,14 +134,14 @@ test_that("[, [[, $ for Table", {
   expect_error(tab[[c(4, 3)]])
   expect_error(tab[[NA]], "'i' must be character or numeric, not logical")
   expect_error(tab[[NULL]], "'i' must be character or numeric, not NULL")
-  expect_error(tab[[c("asdf", "jkl;")]], 'length(name) not equal to 1', fixed = TRUE)
+  expect_error(tab[[c("asdf", "jkl;")]], "length(name) not equal to 1", fixed = TRUE)
   expect_error(tab[-3:3], "Invalid column index")
-  expect_error(tab[1000],  "Invalid column index")
+  expect_error(tab[1000], "Invalid column index")
   expect_error(tab[1:1000], "Invalid column index")
 
   # input validation
   expect_error(tab[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
-  expect_error(tab[, c(6, NA)], 'Column indices cannot be NA')
+  expect_error(tab[, c(6, NA)], "Column indices cannot be NA")
 
   skip("Table with 0 cols doesn't know how many rows it should have")
   expect_data_frame(tab[0], tbl[0])
@@ -173,16 +171,16 @@ test_that("[[<- assignment", {
 
   # can replace a column by index
   tab[[2]] <- as.numeric(10:1)
-  expect_vector(tab[[2]], as.numeric(10:1))
+  expect_as_vector(tab[[2]], as.numeric(10:1))
 
   # can add a column by index
   tab[[5]] <- as.numeric(10:1)
-  expect_vector(tab[[5]], as.numeric(10:1))
-  expect_vector(tab[["5"]], as.numeric(10:1))
+  expect_as_vector(tab[[5]], as.numeric(10:1))
+  expect_as_vector(tab[["5"]], as.numeric(10:1))
 
   # can replace a column
   tab[["int"]] <- 10:1
-  expect_vector(tab[["int"]], 10:1)
+  expect_as_vector(tab[["int"]], 10:1)
 
   # can use $
   tab$new <- NULL
@@ -190,11 +188,11 @@ test_that("[[<- assignment", {
   expect_identical(dim(tab), c(10L, 4L))
 
   tab$int <- 1:10
-  expect_vector(tab$int, 1:10)
+  expect_as_vector(tab$int, 1:10)
 
   # recycling
   tab[["atom"]] <- 1L
-  expect_vector(tab[["atom"]], rep(1L, 10))
+  expect_as_vector(tab[["atom"]], rep(1L, 10))
 
   expect_error(
     tab[["atom"]] <- 1:6,
@@ -204,10 +202,10 @@ test_that("[[<- assignment", {
   # assign Arrow array and chunked_array
   array <- Array$create(c(10:1))
   tab$array <- array
-  expect_vector(tab$array, 10:1)
+  expect_as_vector(tab$array, 10:1)
 
   tab$chunked <- chunked_array(1:10)
-  expect_vector(tab$chunked, 1:10)
+  expect_as_vector(tab$chunked, 1:10)
 
   # nonsense indexes
   expect_error(tab[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
@@ -228,10 +226,10 @@ test_that("Table$Slice", {
   )
   tab <- Table$create(tbl)
   tab2 <- tab$Slice(5)
-  expect_data_frame(tab2, tbl[6:10,])
+  expect_data_frame(tab2, tbl[6:10, ])
 
   tab3 <- tab$Slice(5, 2)
-  expect_data_frame(tab3, tbl[6:7,])
+  expect_data_frame(tab3, tbl[6:7, ])
 
   # Input validation
   expect_error(tab$Slice("ten"))
@@ -299,7 +297,7 @@ test_that("table active bindings", {
   tab <- Table$create(tbl)
 
   expect_identical(dim(tbl), dim(tab))
-  expect_is(tab$columns, "list")
+  expect_type(tab$columns, "list")
   expect_equal(tab$columns[[1]], tab[[1]])
 })
 
@@ -336,7 +334,8 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", {
   )
   res <- as.data.frame(tab)
   expect_equal(names(res), c("a", "b", "c", "x", "y"))
-  expect_equal(res,
+  expect_equal(
+    res,
     tibble::tibble(a = 1:10, b = 1:10, c = v, x = 1:10, y = letters[1:10])
   )
 })
@@ -397,11 +396,13 @@ test_that("==.Table", {
 
 test_that("Table$Equals(check_metadata)", {
   tab1 <- Table$create(x = 1:2, y = c("a", "b"))
-  tab2 <- Table$create(x = 1:2, y = c("a", "b"),
-                       schema = tab1$schema$WithMetadata(list(some="metadata")))
+  tab2 <- Table$create(
+    x = 1:2, y = c("a", "b"),
+    schema = tab1$schema$WithMetadata(list(some = "metadata"))
+  )
 
-  expect_is(tab1, "Table")
-  expect_is(tab2, "Table")
+  expect_r6_class(tab1, "Table")
+  expect_r6_class(tab2, "Table")
   expect_false(tab1$schema$HasMetadata)
   expect_true(tab2$schema$HasMetadata)
   expect_identical(tab2$schema$metadata, list(some = "metadata"))
@@ -410,8 +411,8 @@ test_that("Table$Equals(check_metadata)", {
   expect_true(tab1$Equals(tab2))
   expect_false(tab1$Equals(tab2, check_metadata = TRUE))
 
-  expect_failure(expect_equal(tab1, tab2))  # expect_equal has check_metadata=TRUE
-  expect_equivalent(tab1, tab2)  # expect_equivalent has check_metadata=FALSE
+  expect_failure(expect_equal(tab1, tab2)) # expect_equal has check_metadata=TRUE
+  expect_equivalent(tab1, tab2) # expect_equivalent has check_metadata=FALSE
 
   expect_false(tab1$Equals(24)) # Not a Table
 })
@@ -422,10 +423,10 @@ test_that("Table handles null type (ARROW-7064)", {
 })
 
 test_that("Can create table with specific dictionary types", {
-  fact <- example_data[,"fct"]
+  fact <- example_data[, "fct"]
   int_types <- c(int8(), int16(), int32(), int64())
   # TODO: test uint types when format allows
-  # uint_types <- c(uint8(), uint16(), uint32(), uint64())
+  # uint_types <- c(uint8(), uint16(), uint32(), uint64()) # nolint
   for (i in int_types) {
     sch <- schema(fct = dictionary(i, utf8()))
     tab <- Table$create(fact, schema = sch)
@@ -472,6 +473,77 @@ test_that("Table name assignment", {
 
 test_that("Table$create() with different length columns", {
   msg <- "All columns must have the same length"
-  expect_error(Table$create(a=1:5, b = 42), msg)
-  expect_error(Table$create(a=1:5, b = 1:6), msg)
+  expect_error(Table$create(a = 1:5, b = 1:6), msg)
+})
+
+test_that("Table$create() scalar recycling with vectors", {
+  expect_data_frame(
+    Table$create(a = 1:10, b = 5),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("Table$create() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = Scalar$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = Array$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = ChunkedArray$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("Table$create() no recycling with tibbles", {
+  expect_error(
+    Table$create(
+      tibble::tibble(a = 1:10, b = 5),
+      tibble::tibble(a = 1, b = 5)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+
+  expect_error(
+    Table$create(
+      tibble::tibble(a = 1:10, b = 5),
+      tibble::tibble(a = 1)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+})
+
+test_that("ARROW-11769 - grouping preserved in table creation", {
+  skip_if_not_available("dataset")
+
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  expect_identical(
+    tbl %>%
+      dplyr::group_by(fct, fct2) %>%
+      Table$create() %>%
+      dplyr::group_vars(),
+    c("fct", "fct2")
+  )
+})
+
+test_that("ARROW-12729 - length returns number of columns in Table", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  tab <- Table$create(!!!tbl)
+
+  expect_identical(length(tab), 3L)
 })
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
new file mode 100644
index 00000000000..42784b61442
--- /dev/null
+++ b/r/tests/testthat/test-altrep.R
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("altrep")
+
+skip_if(getRversion() <= "3.5.0")
+
+test_that("altrep vectors from int32 and dbl arrays with no nulls", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(1:1000)
+  v_dbl <- Array$create(as.numeric(1:1000))
+  c_int <- ChunkedArray$create(1:1000)
+  c_dbl <- ChunkedArray$create(as.numeric(1:1000))
+
+  expect_true(is_altrep_int_nonull(as.vector(v_int)))
+  expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+
+  expect_equal(c_int$num_chunks, 1L)
+  expect_true(is_altrep_int_nonull(as.vector(c_int)))
+  expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(1))))
+
+  expect_equal(c_dbl$num_chunks, 1L)
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl)))
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(1))))
+
+  withr::local_options(list(arrow.use_altrep = NULL))
+  expect_true(is_altrep_int_nonull(as.vector(v_int)))
+  expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+
+  withr::local_options(list(arrow.use_altrep = FALSE))
+  expect_false(is_altrep_int_nonull(as.vector(v_int)))
+  expect_false(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+})
+
+test_that("altrep vectors from int32 and dbl arrays with nulls", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(c(1L, NA, 3L))
+  v_dbl <- Array$create(c(1, NA, 3))
+  c_int <- ChunkedArray$create(c(1L, NA, 3L))
+  c_dbl <- ChunkedArray$create(c(1, NA, 3))
+
+  # cannot be altrep because one NA
+  expect_false(is_altrep_int_nonull(as.vector(v_int)))
+  expect_false(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+  expect_false(is_altrep_int_nonull(as.vector(c_int)))
+  expect_false(is_altrep_int_nonull(as.vector(c_int$Slice(1))))
+  expect_false(is_altrep_dbl_nonull(as.vector(c_dbl)))
+  expect_false(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(1))))
+
+  # but then, no NA beyond, so can be altrep again
+  expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(2))))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(2))))
+  expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(2))))
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(2))))
+
+  # chunked array with 2 chunks cannot be altrep
+  c_int <- ChunkedArray$create(0L, c(1L, NA, 3L))
+  c_dbl <- ChunkedArray$create(0, c(1, NA, 3))
+  expect_equal(c_int$num_chunks, 2L)
+  expect_equal(c_dbl$num_chunks, 2L)
+  expect_false(is_altrep_int_nonull(as.vector(c_int)))
+  expect_false(is_altrep_dbl_nonull(as.vector(c_dbl)))
+  expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(3))))
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(3))))
+})
+
+test_that("empty vectors are not altrep", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(integer())
+  v_dbl <- Array$create(numeric())
+
+  expect_false(is_altrep_int_nonull(as.vector(v_int)))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl)))
+})
+
+test_that("as.data.frame(<Table>, <RecordBatch>) can create altrep vectors", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+
+  table <- Table$create(int = c(1L, 2L, 3L), dbl = c(1, 2, 3))
+  df_table <- as.data.frame(table)
+  expect_true(is_altrep_int_nonull(df_table$int))
+  expect_true(is_altrep_dbl_nonull(df_table$dbl))
+
+  batch <- RecordBatch$create(int = c(1L, 2L, 3L), dbl = c(1, 2, 3))
+  df_batch <- as.data.frame(batch)
+  expect_true(is_altrep_int_nonull(df_batch$int))
+  expect_true(is_altrep_dbl_nonull(df_batch$dbl))
+})
diff --git a/r/tests/testthat/test-arrow-info.R b/r/tests/testthat/test-arrow-info.R
index 2a7af3aac67..9eac6081486 100644
--- a/r/tests/testthat/test-arrow-info.R
+++ b/r/tests/testthat/test-arrow-info.R
@@ -16,8 +16,8 @@
 # under the License.
 
 test_that("arrow_info()", {
-  expect_is(arrow_info(), "arrow_info")
+  expect_s3_class(arrow_info(), "arrow_info")
   expect_output(print(arrow_info()), "Arrow package version")
-  options(arrow.foo=FALSE)
+  options(arrow.foo = FALSE)
   expect_output(print(arrow_info()), "arrow.foo")
 })
diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R
index 2c901e1c96a..2259da54bab 100644
--- a/r/tests/testthat/test-arrow.R
+++ b/r/tests/testthat/test-arrow.R
@@ -50,7 +50,8 @@ r_only({
 
 test_that("arrow gracefully fails to load objects from other sessions (ARROW-10071)", {
   a <- Array$create(1:10)
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   saveRDS(a, tf)
 
   b <- readRDS(tf)
@@ -62,13 +63,17 @@ test_that("check for an ArrowObject in functions use std::shared_ptr", {
 })
 
 test_that("MemoryPool calls gc() to free memory when allocation fails (ARROW-10080)", {
+  # There is a valgrind error on this test because there cannot be memory allocated
+  # which is exactly what this test is checking, but we quiet this
+  skip_on_valgrind()
+
   env <- new.env()
   trace(gc, print = FALSE, tracer = function() {
-          env$gc_was_called <- TRUE
-        })
+    env$gc_was_called <- TRUE
+  })
   on.exit(untrace(gc))
   # We expect this should fail because we don't have this much memory,
   # but it should gc() and retry (and fail again)
-  expect_error(BufferOutputStream$create(2 ** 60))
+  expect_error(BufferOutputStream$create(2**60))
   expect_true(env$gc_was_called)
 })
diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R
index 3cf5d91ee5f..145a21de7ff 100644
--- a/r/tests/testthat/test-backwards-compatibility.R
+++ b/r/tests/testthat/test-backwards-compatibility.R
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# nolint start
 # To write a new version of a test file for a current version:
 # write_parquet(example_with_metadata, test_path("golden-files/data-arrow_2.0.0.parquet"))
 
@@ -30,17 +31,18 @@
 # # get example data into the global env
 # write_parquet(example_with_metadata, "arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet")
 # quit()/exit
+# nolint end
 
 skip_if(getRversion() < "3.5.0", "The serialization format changed in 3.5")
 
 expect_identical_with_metadata <- function(object, expected, ..., top_level = TRUE) {
   attrs_to_keep <- c("names", "class", "row.names")
   if (!top_level) {
-      # remove not-tbl and not-data.frame attributes
-      for (attribute in names(attributes(expected))) {
-        if (attribute %in% attrs_to_keep) next
-        attributes(expected)[[attribute]] <- NULL
-      }
+    # remove not-tbl and not-data.frame attributes
+    for (attribute in names(attributes(expected))) {
+      if (attribute %in% attrs_to_keep) next
+      attributes(expected)[[attribute]] <- NULL
+    }
   }
   expect_identical(object, expected, ...)
 }
@@ -76,13 +78,15 @@ test_that("reading a known Parquet file to dataframe with 1.0.1", {
 })
 
 for (comp in c("lz4", "uncompressed", "zstd")) {
+  # nolint start
   # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_lz4.feather"), compression = "lz4")
   # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_uncompressed.feather"), compression = "uncompressed")
   # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_zstd.feather"), compression = "zstd")
+  # nolint end
   test_that("reading a known Feather file to dataframe with 2.0.0", {
     skip_if_not_available("parquet")
     skip_if_not_available(comp)
-    feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp,".feather"))
+    feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp, ".feather"))
 
     df <- read_feather(feather_file)
     expect_identical_with_metadata(df, example_with_metadata)
@@ -91,7 +95,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) {
   test_that("reading a known Feather file to dataframe with 1.0.1", {
     skip_if_not_available("parquet")
     skip_if_not_available(comp)
-    feather_file <- test_path(paste0("golden-files/data-arrow_1.0.1_", comp,".feather"))
+    feather_file <- test_path(paste0("golden-files/data-arrow_1.0.1_", comp, ".feather"))
 
     df <- read_feather(feather_file)
     # 1.0.1 didn't save top-level metadata, so we need to remove it.
@@ -101,7 +105,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) {
   test_that("reading a known Feather file to dataframe with 0.17.0", {
     skip_if_not_available("parquet")
     skip_if_not_available(comp)
-    feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp,".feather"))
+    feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp, ".feather"))
 
     df <- read_feather(feather_file)
     # the metadata from 0.17.0 doesn't have the top level, the special class is
diff --git a/r/tests/testthat/test-buffer-reader.R b/r/tests/testthat/test-buffer-reader.R
index 94be16ad569..865ee7d4e4b 100644
--- a/r/tests/testthat/test-buffer-reader.R
+++ b/r/tests/testthat/test-buffer-reader.R
@@ -22,12 +22,12 @@ test_that("BufferReader can be created from R objects", {
   int <- BufferReader$create(integer(13))
   raw <- BufferReader$create(raw(16))
 
-  expect_is(num, "BufferReader")
-  expect_is(int, "BufferReader")
-  expect_is(raw, "BufferReader")
+  expect_r6_class(num, "BufferReader")
+  expect_r6_class(int, "BufferReader")
+  expect_r6_class(raw, "BufferReader")
 
-  expect_equal(num$GetSize(), 13*8)
-  expect_equal(int$GetSize(), 13*4)
+  expect_equal(num$GetSize(), 13 * 8)
+  expect_equal(int$GetSize(), 13 * 4)
   expect_equal(raw$GetSize(), 16)
 })
 
@@ -35,6 +35,6 @@ test_that("BufferReader can be created from Buffer", {
   buf <- buffer(raw(76))
   reader <- BufferReader$create(buf)
 
-  expect_is(reader, "BufferReader")
+  expect_r6_class(reader, "BufferReader")
   expect_equal(reader$GetSize(), 76)
 })
diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R
index c19f61196ee..0d24ab02537 100644
--- a/r/tests/testthat/test-buffer.R
+++ b/r/tests/testthat/test-buffer.R
@@ -20,38 +20,38 @@ context("Buffer")
 test_that("Buffer can be created from raw vector", {
   vec <- raw(123)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 123)
 })
 
 test_that("Buffer can be created from integer vector", {
   vec <- integer(17)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 17 * 4)
 })
 
 test_that("Buffer can be created from numeric vector", {
   vec <- numeric(17)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 17 * 8)
 })
 
 test_that("Buffer can be created from complex vector", {
   vec <- complex(3)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 3 * 16)
 })
 
 test_that("buffer buffer buffers buffers", {
-  expect_is(buffer(buffer(42)), "Buffer")
+  expect_r6_class(buffer(buffer(42)), "Buffer")
 })
 
 test_that("Other types can't be converted to Buffers", {
   expect_error(
-    buffer(data.frame(a="asdf")),
+    buffer(data.frame(a = "asdf")),
     "Cannot convert object of class data.frame to arrow::Buffer"
   )
 })
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index 17a82de810f..3964abcb65a 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -61,7 +61,7 @@ test_that("ChunkedArray", {
   expect_equal(z$length(), 5L)
   expect_equal(z$as_vector(), c(9:10, 1:3))
 
-  expect_chunked_roundtrip(list(c(1,2,3), c(4,5,6)), float64())
+  expect_chunked_roundtrip(list(c(1, 2, 3), c(4, 5, 6)), float64())
 
   # input validation
   expect_error(x$chunk(14), "subscript out of bounds")
@@ -94,8 +94,8 @@ test_that("ChunkedArray", {
 
 test_that("print ChunkedArray", {
   verify_output(test_path("test-chunked-array.txt"), {
-    chunked_array(c(1,2,3), c(4,5,6))
-    chunked_array(1:30, c(4,5,6))
+    chunked_array(c(1, 2, 3), c(4, 5, 6))
+    chunked_array(1:30, c(4, 5, 6))
     chunked_array(1:30)
     chunked_array(factor(c("a", "b")), factor(c("c", "d")))
   })
@@ -108,6 +108,22 @@ test_that("ChunkedArray handles !!! splicing", {
   expect_equal(x$num_chunks, 3L)
 })
 
+test_that("ChunkedArray handles Inf", {
+  data <- list(c(Inf, 2:10), c(1:3, Inf, 5L), 1:10)
+  x <- chunked_array(!!!data)
+  expect_equal(x$type, float64())
+  expect_equal(x$num_chunks, 3L)
+  expect_equal(length(x), 25L)
+  expect_equal(as.vector(x), c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10))
+
+  chunks <- x$chunks
+  expect_equal(as.vector(is.infinite(chunks[[2]])), is.infinite(data[[2]]))
+  expect_equal(
+    as.vector(is.infinite(x)),
+    c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]]))
+  )
+})
+
 test_that("ChunkedArray handles NA", {
   data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L))
   x <- chunked_array(!!!data)
@@ -177,7 +193,7 @@ test_that("ChunkedArray supports integer64 (ARROW-3716)", {
   expect_type_equal(zero, int64())
   ca <- ChunkedArray$create(zero, x)
   expect_type_equal(ca, int64())
-  expect_is(as.vector(ca), "integer64")
+  expect_s3_class(as.vector(ca), "integer64")
   expect_identical(as.vector(ca), c(bit64::as.integer64(0L), x))
 })
 
@@ -199,12 +215,12 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", {
   a <- chunked_array(1:10, 1:10)
   for (type in c(int_types, uint_types)) {
     casted <- a$cast(type)
-    expect_is(casted, "ChunkedArray")
+    expect_r6_class(casted, "ChunkedArray")
     expect_type_equal(casted$type, type)
   }
   # Also test casting to double(), not actually a type, a base R function but should be alias for float64
   dbl <- a$cast(double())
-  expect_is(dbl, "ChunkedArray")
+  expect_r6_class(dbl, "ChunkedArray")
   expect_type_equal(dbl$type, float64())
 })
 
@@ -257,14 +273,14 @@ test_that("chunked_array() uses the first ... to infer type", {
 })
 
 test_that("chunked_array() handles downcasting", {
-   a <- chunked_array(10L, 10)
-   expect_type_equal(a$type, int32())
-   expect_equal(as.vector(a), c(10L, 10L))
+  a <- chunked_array(10L, 10)
+  expect_type_equal(a$type, int32())
+  expect_equal(as.vector(a), c(10L, 10L))
 })
 
 test_that("chunked_array() makes chunks of the same type", {
   a <- chunked_array(10L, bit64::as.integer64(13), type = int64())
-  for(chunk in a$chunks) {
+  for (chunk in a$chunks) {
     expect_type_equal(chunk$type, int64())
   }
 })
@@ -312,30 +328,30 @@ test_that("[ ChunkedArray", {
   one_chunk <- chunked_array(2:11)
   x <- chunked_array(1:10, 31:40, 51:55)
   # Slice
-  expect_vector(x[8:12], c(8:10, 31:32))
+  expect_as_vector(x[8:12], c(8:10, 31:32))
   # Take from same chunk
-  expect_vector(x[c(11, 15, 12)], c(31, 35, 32))
+  expect_as_vector(x[c(11, 15, 12)], c(31, 35, 32))
   # Take from multiple chunks (calls Concatenate)
-  expect_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3))
+  expect_as_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3))
   # Take with Array (note these are 0-based)
   take1 <- Array$create(c(10L, 14L, 11L))
-  expect_vector(x[take1], c(31, 35, 32))
+  expect_as_vector(x[take1], c(31, 35, 32))
   # Take with ChunkedArray
   take2 <- ChunkedArray$create(c(10L, 14L), 11L)
-  expect_vector(x[take2], c(31, 35, 32))
+  expect_as_vector(x[take2], c(31, 35, 32))
 
   # Filter (with recycling)
-  expect_vector(
+  expect_as_vector(
     one_chunk[c(FALSE, TRUE, FALSE, FALSE, TRUE)],
     c(3, 6, 8, 11)
   )
   # Filter where both are 1-chunk
-  expect_vector(
+  expect_as_vector(
     one_chunk[ChunkedArray$create(rep(c(FALSE, TRUE, FALSE, FALSE, TRUE), 2))],
     c(3, 6, 8, 11)
   )
   # Filter multi-chunk with logical (-> Array)
-  expect_vector(
+  expect_as_vector(
     x[c(FALSE, TRUE, FALSE, FALSE, TRUE)],
     c(2, 5, 7, 10, 32, 35, 37, 40, 52, 55)
   )
@@ -343,7 +359,7 @@ test_that("[ ChunkedArray", {
   p1 <- c(FALSE, TRUE, FALSE, FALSE, TRUE)
   p2 <- c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE)
   filt <- ChunkedArray$create(p1, p2, p2)
-  expect_vector(
+  expect_as_vector(
     x[filt],
     c(2, 5, 6, 8, 9, 35, 36, 38, 39, 55)
   )
@@ -352,15 +368,15 @@ test_that("[ ChunkedArray", {
 test_that("ChunkedArray head/tail", {
   vec <- 11:20
   a <- ChunkedArray$create(11:15, 16:20)
-  expect_vector(head(a), head(vec))
-  expect_vector(head(a, 4), head(vec, 4))
-  expect_vector(head(a, 40), head(vec, 40))
-  expect_vector(head(a, -4), head(vec, -4))
-  expect_vector(head(a, -40), head(vec, -40))
-  expect_vector(tail(a), tail(vec))
-  expect_vector(tail(a, 4), tail(vec, 4))
-  expect_vector(tail(a, 40), tail(vec, 40))
-  expect_vector(tail(a, -40), tail(vec, -40))
+  expect_as_vector(head(a), head(vec))
+  expect_as_vector(head(a, 4), head(vec, 4))
+  expect_as_vector(head(a, 40), head(vec, 40))
+  expect_as_vector(head(a, -4), head(vec, -4))
+  expect_as_vector(head(a, -40), head(vec, -40))
+  expect_as_vector(tail(a), tail(vec))
+  expect_as_vector(tail(a, 4), tail(vec, 4))
+  expect_as_vector(tail(a, 40), tail(vec, 40))
+  expect_as_vector(tail(a, -40), tail(vec, -40))
 })
 
 test_that("ChunkedArray$Equals", {
@@ -391,12 +407,17 @@ test_that("Handling string data with embedded nuls", {
     as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
     as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
     as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
-    as.raw(c(0x74, 0x76))),
-    class = c("arrow_binary", "vctrs_vctr", "list"))
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
   chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8())
   expect_error(
     as.vector(chunked_array_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
@@ -410,4 +431,4 @@ test_that("Handling string data with embedded nuls", {
       fixed = TRUE
     )
   })
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 2208f581de9..eb1282e6ffb 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -28,7 +28,7 @@ test_that("list_compute_functions", {
 test_that("sum.Array", {
   ints <- 1:5
   a <- Array$create(ints)
-  expect_is(sum(a), "Scalar")
+  expect_r6_class(sum(a), "Scalar")
   expect_identical(as.integer(sum(a)), sum(ints))
 
   floats <- c(1.3, 2.4, 3)
@@ -37,8 +37,12 @@ test_that("sum.Array", {
 
   floats <- c(floats, NA)
   na <- Array$create(floats)
-  expect_identical(as.numeric(sum(na)), sum(floats))
-  expect_is(sum(na, na.rm = TRUE), "Scalar")
+  if (!grepl("devel", R.version.string)) {
+    # Valgrind on R-devel confuses NaN and NA_real_
+    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
+    expect_identical(as.numeric(sum(na)), sum(floats))
+  }
+  expect_r6_class(sum(na, na.rm = TRUE), "Scalar")
   expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE))
 
   bools <- c(TRUE, NA, TRUE, FALSE)
@@ -49,7 +53,7 @@ test_that("sum.Array", {
 
 test_that("sum.ChunkedArray", {
   a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
-  expect_is(sum(a), "Scalar")
+  expect_r6_class(sum(a), "Scalar")
   expect_true(is.na(as.vector(sum(a))))
   expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35)
 })
@@ -61,7 +65,6 @@ test_that("sum dots", {
 })
 
 test_that("sum.Scalar", {
-  skip("No sum method in arrow for Scalar: ARROW-9056")
   s <- Scalar$create(4)
   expect_identical(as.numeric(s), as.numeric(sum(s)))
 })
@@ -69,7 +72,7 @@ test_that("sum.Scalar", {
 test_that("mean.Array", {
   ints <- 1:4
   a <- Array$create(ints)
-  expect_is(mean(a), "Scalar")
+  expect_r6_class(mean(a), "Scalar")
   expect_identical(as.vector(mean(a)), mean(ints))
 
   floats <- c(1.3, 2.4, 3)
@@ -78,8 +81,12 @@ test_that("mean.Array", {
 
   floats <- c(floats, NA)
   na <- Array$create(floats)
-  expect_identical(as.vector(mean(na)), mean(floats))
-  expect_is(mean(na, na.rm = TRUE), "Scalar")
+  if (!grepl("devel", R.version.string)) {
+    # Valgrind on R-devel confuses NaN and NA_real_
+    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
+    expect_identical(as.vector(mean(na)), mean(floats))
+  }
+  expect_r6_class(mean(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE))
 
   bools <- c(TRUE, NA, TRUE, FALSE)
@@ -90,15 +97,14 @@ test_that("mean.Array", {
 
 test_that("mean.ChunkedArray", {
   a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
-  expect_is(mean(a), "Scalar")
+  expect_r6_class(mean(a), "Scalar")
   expect_true(is.na(as.vector(mean(a))))
-  expect_identical(as.vector(mean(a, na.rm = TRUE)), 35/13)
+  expect_identical(as.vector(mean(a, na.rm = TRUE)), 35 / 13)
 })
 
 test_that("mean.Scalar", {
-  skip("No mean method in arrow for Scalar: ARROW-9056")
   s <- Scalar$create(4)
-  expect_identical(as.vector(s), mean(s))
+  expect_equal(s, mean(s))
 })
 
 test_that("Bad input handling of call_function", {
@@ -111,7 +117,7 @@ test_that("Bad input handling of call_function", {
 test_that("min.Array", {
   ints <- 1:4
   a <- Array$create(ints)
-  expect_is(min(a), "Scalar")
+  expect_r6_class(min(a), "Scalar")
   expect_identical(as.vector(min(a)), min(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -121,7 +127,7 @@ test_that("min.Array", {
   floats <- c(floats, NA)
   na <- Array$create(floats)
   expect_identical(as.vector(min(na)), min(floats))
-  expect_is(min(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
@@ -133,7 +139,7 @@ test_that("min.Array", {
 test_that("max.Array", {
   ints <- 1:4
   a <- Array$create(ints)
-  expect_is(max(a), "Scalar")
+  expect_r6_class(max(a), "Scalar")
   expect_identical(as.vector(max(a)), max(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -143,7 +149,7 @@ test_that("max.Array", {
   floats <- c(floats, NA)
   na <- Array$create(floats)
   expect_identical(as.vector(max(na)), max(floats))
-  expect_is(max(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
@@ -155,7 +161,7 @@ test_that("max.Array", {
 test_that("min.ChunkedArray", {
   ints <- 1:4
   a <- ChunkedArray$create(ints)
-  expect_is(min(a), "Scalar")
+  expect_r6_class(min(a), "Scalar")
   expect_identical(as.vector(min(a)), min(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -165,7 +171,7 @@ test_that("min.ChunkedArray", {
   floats <- c(floats, NA)
   na <- ChunkedArray$create(floats)
   expect_identical(as.vector(min(na)), min(floats))
-  expect_is(min(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
@@ -177,7 +183,7 @@ test_that("min.ChunkedArray", {
 test_that("max.ChunkedArray", {
   ints <- 1:4
   a <- ChunkedArray$create(ints)
-  expect_is(max(a), "Scalar")
+  expect_r6_class(max(a), "Scalar")
   expect_identical(as.vector(max(a)), max(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -187,7 +193,7 @@ test_that("max.ChunkedArray", {
   floats <- c(floats, NA)
   na <- ChunkedArray$create(floats)
   expect_identical(as.vector(max(na)), max(floats))
-  expect_is(max(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
@@ -197,13 +203,19 @@ test_that("max.ChunkedArray", {
 })
 
 test_that("Edge cases", {
-  skip("ARROW-9054")
   a <- Array$create(NA)
   for (type in c(int32(), float64(), bool())) {
     expect_equal(as.vector(sum(a$cast(type), na.rm = TRUE)), sum(NA, na.rm = TRUE))
     expect_equal(as.vector(mean(a$cast(type), na.rm = TRUE)), mean(NA, na.rm = TRUE))
-    expect_equal(as.vector(min(a$cast(type), na.rm = TRUE)), min(NA, na.rm = TRUE))
-    expect_equal(as.vector(max(a$cast(type), na.rm = TRUE)), max(NA, na.rm = TRUE))
+    expect_equal(
+      as.vector(min(a$cast(type), na.rm = TRUE)),
+      # Suppress the base R warning about no non-missing arguments
+      suppressWarnings(min(NA, na.rm = TRUE))
+    )
+    expect_equal(
+      as.vector(max(a$cast(type), na.rm = TRUE)),
+      suppressWarnings(max(NA, na.rm = TRUE))
+    )
   }
 })
 
@@ -211,7 +223,7 @@ test_that("quantile.Array and quantile.ChunkedArray", {
   a <- Array$create(c(0, 1, 2, 3))
   ca <- ChunkedArray$create(c(0, 1), c(2, 3))
   probs <- c(0.49, 0.51)
-  for(ad in list(a, ca)) {
+  for (ad in list(a, ca)) {
     for (type in c(int32(), uint64(), float64())) {
       expect_equal(
         quantile(ad$cast(type), probs = probs, interpolation = "linear"),
@@ -335,6 +347,27 @@ test_that("match_arrow", {
 
   ca <- ChunkedArray$create(c(1, 4, 3, 1, 1, 3, 4))
   expect_equal(match_arrow(ca, tab), ChunkedArray$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L)))
+
+  sc <- Scalar$create(3)
+  expect_equal(match_arrow(sc, tab), Scalar$create(1L))
+
+  vec <- c(1, 2)
+  expect_equal(match_arrow(vec, tab), Array$create(c(3L, 2L)))
+})
+
+test_that("is_in", {
+  a <- Array$create(c(9, 4, 3))
+  tab <- c(4, 3, 2, 1)
+  expect_equal(is_in(a, tab), Array$create(c(FALSE, TRUE, TRUE)))
+
+  ca <- ChunkedArray$create(c(9, 4, 3))
+  expect_equal(is_in(ca, tab), ChunkedArray$create(c(FALSE, TRUE, TRUE)))
+
+  sc <- Scalar$create(3)
+  expect_equal(is_in(sc, tab), Scalar$create(TRUE))
+
+  vec <- c(1, 9)
+  expect_equal(is_in(vec, tab), Array$create(c(TRUE, FALSE)))
 })
 
 test_that("value_counts", {
@@ -351,3 +384,51 @@ test_that("value_counts", {
   expect_identical(as.data.frame(value_counts(a)), result_df)
   expect_identical(as.vector(value_counts(a)$counts), result_df$counts)
 })
+
+test_that("any.Array and any.ChunkedArray", {
+  data <- c(1:10, NA, NA)
+
+  expect_vector_equal(any(input > 5), data)
+  expect_vector_equal(any(input > 5, na.rm = TRUE), data)
+  expect_vector_equal(any(input < 1), data)
+  expect_vector_equal(any(input < 1, na.rm = TRUE), data)
+
+  data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE)
+
+  expect_vector_equal(any(input), data_logical)
+  expect_vector_equal(any(input, na.rm = FALSE), data_logical)
+  expect_vector_equal(any(input, na.rm = TRUE), data_logical)
+})
+
+test_that("all.Array and all.ChunkedArray", {
+  data <- c(1:10, NA, NA)
+
+  expect_vector_equal(all(input > 5), data)
+  expect_vector_equal(all(input > 5, na.rm = TRUE), data)
+
+  expect_vector_equal(all(input < 11), data)
+  expect_vector_equal(all(input < 11, na.rm = TRUE), data)
+
+  data_logical <- c(TRUE, TRUE, NA)
+
+  expect_vector_equal(all(input), data_logical)
+  expect_vector_equal(all(input, na.rm = TRUE), data_logical)
+})
+
+test_that("variance", {
+  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
+  arr <- Array$create(data)
+  chunked_arr <- ChunkedArray$create(data)
+
+  expect_equal(call_function("variance", arr, options = list(ddof = 5)), Scalar$create(34596))
+  expect_equal(call_function("variance", chunked_arr, options = list(ddof = 5)), Scalar$create(34596))
+})
+
+test_that("stddev", {
+  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
+  arr <- Array$create(data)
+  chunked_arr <- ChunkedArray$create(data)
+
+  expect_equal(call_function("stddev", arr, options = list(ddof = 5)), Scalar$create(186))
+  expect_equal(call_function("stddev", chunked_arr, options = list(ddof = 5)), Scalar$create(186))
+})
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index d3cd2eedf6d..40a5d3c4ace 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -23,7 +23,7 @@ test_that("Addition", {
   expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
   expect_identical(as.vector(a + 4L), c(5:8, NA_integer_))
   expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
-  expect_vector(a + 4L, c(5:8, NA_integer_))
+  expect_as_vector(a + 4L, c(5:8, NA_integer_))
   expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
 
   a8 <- a$cast(int8())
@@ -45,16 +45,20 @@ test_that("Subtraction", {
   a <- Array$create(c(1:4, NA_integer_))
   expect_equal(a - 3L, Array$create(c(-2:1, NA_integer_)))
 
-  expect_equal(Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)) - a,
-               Array$create(c(4.1, 4.1, 4.1, 4.1, NA_real_)))
+  expect_equal(
+    Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)) - a,
+    Array$create(c(4.1, 4.1, 4.1, 4.1, NA_real_))
+  )
 })
 
 test_that("Multiplication", {
   a <- Array$create(c(1:4, NA_integer_))
   expect_equal(a * 2L, Array$create(c(1:4 * 2L, NA_integer_)))
 
-  expect_equal((a * 0.5) * 3L,
-               Array$create(c(1.5, 3, 4.5, 6, NA_real_)))
+  expect_equal(
+    (a * 0.5) * 3L,
+    Array$create(c(1.5, 3, 4.5, 6, NA_real_))
+  )
 })
 
 test_that("Division", {
@@ -71,9 +75,11 @@ test_that("Division", {
   # the behavior of %/% matches R's (i.e. the integer of the quotient, not
   # simply dividing two integers)
   expect_equal(b / 2.2, Array$create(c(1:4 / 2.2, NA_real_)))
+  # nolint start
   # c(1:4) %/% 2.2 != c(1:4) %/% as.integer(2.2)
   # c(1:4) %/% 2.2             == c(0L, 0L, 1L, 1L)
   # c(1:4) %/% as.integer(2.2) == c(0L, 1L, 1L, 2L)
+  # nolint end
   expect_equal(b %/% 2.2, Array$create(c(0L, 0L, 1L, 1L, NA_integer_)))
 
   expect_equal(a %% 2, Array$create(c(1L, 0L, 1L, 0L, NA_integer_)))
@@ -81,9 +87,37 @@ test_that("Division", {
   expect_equal(b %% 2, Array$create(c(1:4 %% 2, NA_real_)))
 })
 
+test_that("Power", {
+  a <- Array$create(c(1:4, NA_integer_))
+  b <- a$cast(float64())
+  c <- a$cast(int64())
+  d <- a$cast(uint64())
+
+  expect_equal(a^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(a^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(a^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(a^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(b^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(b^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(b^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(b^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(c^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(c^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(c^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(c^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(d^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(d^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(d^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(d^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+})
+
 test_that("Dates casting", {
   a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
 
-  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-8919")
-  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
+  skip("ARROW-11090 (date/datetime arithmetic)")
+  # Error: NotImplemented: Function add_checked has no kernel matching input types (array[date32[day]], scalar[double])
+  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4) + 2), NA_integer_))
 })
diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index ba38d4ce37e..373237ff9a1 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -118,7 +118,6 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results
     sort(input, decreasing = FALSE, na.last = TRUE),
     tbl$dbl
   )
-  skip("is.na() evaluates to FALSE on Arrow NaN values (ARROW-12055)")
   expect_vector_equal(
     sort(input, decreasing = TRUE, na.last = NA),
     tbl$dbl
@@ -138,28 +137,21 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results
 })
 
 test_that("Table$SortIndices()", {
+  x <- Table$create(tbl)
   expect_identical(
-    {
-      x <- tbl %>% Table$create()
-      x$Take(x$SortIndices("chr")) %>% pull(chr)
-    },
+    as.vector(x$Take(x$SortIndices("chr"))$chr),
     sort(tbl$chr, na.last = TRUE)
   )
   expect_identical(
-    {
-      x <- tbl %>% Table$create()
-      x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE))) %>% collect()
-    },
+    as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))),
     tbl %>% arrange(int, dbl)
   )
 })
 
 test_that("RecordBatch$SortIndices()", {
+  x <- record_batch(tbl)
   expect_identical(
-    {
-      x <- tbl %>% record_batch()
-      x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE)) %>% collect()
-    },
+    as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))),
     tbl %>% arrange(desc(chr), desc(int), desc(dbl))
   )
 })
diff --git a/r/tests/testthat/test-compute-vector.R b/r/tests/testthat/test-compute-vector.R
index 0b184889bee..345da5656bf 100644
--- a/r/tests/testthat/test-compute-vector.R
+++ b/r/tests/testthat/test-compute-vector.R
@@ -18,7 +18,7 @@
 expect_bool_function_equal <- function(array_exp, r_exp) {
   # Assert that the Array operation returns a boolean array
   # and that its contents are equal to expected
-  expect_is(array_exp, "ArrowDatum")
+  expect_r6_class(array_exp, "ArrowDatum")
   expect_type_equal(array_exp, bool())
   expect_identical(as.vector(array_exp), r_exp)
 }
@@ -43,7 +43,7 @@ test_that("compare ops with Array", {
   expect_array_compares(Array$create(c(NA, 1:5)), 4)
   expect_array_compares(Array$create(as.numeric(c(NA, 1:5))), 4)
   expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5))))
-  expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5)), type=double()))
+  expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5)), type = double()))
 })
 
 test_that("compare ops with ChunkedArray", {
@@ -108,7 +108,7 @@ test_that("call_function validation", {
   )
   expect_error(
     call_function("filter", Array$create(1:4), 3),
-    'Argument 2 is of class numeric'
+    "Argument 2 is of class numeric"
   )
   expect_error(
     call_function("filter",
diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R
index d27706f060d..db8bb30585b 100644
--- a/r/tests/testthat/test-csv.R
+++ b/r/tests/testthat/test-csv.R
@@ -15,13 +15,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-context("CsvTableReader")
-
 # Not all types round trip via CSV 100% identical by default
 tbl <- example_data[, c("dbl", "lgl", "false", "chr")]
+tbl_no_dates <- tbl
 # Add a date to test its parsing
 tbl$date <- Sys.Date() + 1:10
 
+csv_file <- tempfile()
+
 test_that("Can read csv file", {
   tf <- tempfile()
   on.exit(unlink(tf))
@@ -61,8 +62,8 @@ test_that("read_delim_arrow parsing options: quote", {
   tf <- tempfile()
   on.exit(unlink(tf))
 
-  df <- data.frame(a=c(1, 2), b=c("'abc'", "'def'"))
-  write.table(df, sep=";", tf, row.names = FALSE, quote = FALSE)
+  df <- data.frame(a = c(1, 2), b = c("'abc'", "'def'"))
+  write.table(df, sep = ";", tf, row.names = FALSE, quote = FALSE)
   tab1 <- read_delim_arrow(tf, delim = ";", quote = "'")
 
   # Is this a problem?
@@ -70,7 +71,7 @@ test_that("read_delim_arrow parsing options: quote", {
   tab1$a <- as.numeric(tab1$a)
   expect_equivalent(
     tab1,
-    data.frame(a=c(1, 2), b=c("abc", "def"), stringsAsFactors = FALSE)
+    data.frame(a = c(1, 2), b = c("abc", "def"), stringsAsFactors = FALSE)
   )
 })
 
@@ -137,7 +138,7 @@ test_that("read_csv_arrow parsing options: na strings", {
     b = c(NA, "B", "C", NA),
     stringsAsFactors = FALSE
   )
-  write.csv(df, tf, row.names=FALSE)
+  write.csv(df, tf, row.names = FALSE)
   expect_equal(grep("NA", readLines(tf)), 2:5)
 
   tab1 <- read_csv_arrow(tf)
@@ -146,7 +147,7 @@ test_that("read_csv_arrow parsing options: na strings", {
 
   unlink(tf) # Delete and write to the same file name again
 
-  write.csv(df, tf, row.names=FALSE, na = "asdf")
+  write.csv(df, tf, row.names = FALSE, na = "asdf")
   expect_equal(grep("asdf", readLines(tf)), 2:5)
 
   tab2 <- read_csv_arrow(tf, na = "asdf")
@@ -179,7 +180,8 @@ test_that("read_csv_arrow() can detect compression from file name", {
 
 test_that("read_csv_arrow(schema=)", {
   tbl <- example_data[, "int"]
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, schema = schema(int = float64()), skip = 1)
@@ -188,7 +190,8 @@ test_that("read_csv_arrow(schema=)", {
 
 test_that("read_csv_arrow(col_types = <Schema>)", {
   tbl <- example_data[, "int"]
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, col_types = schema(int = float64()))
@@ -197,7 +200,8 @@ test_that("read_csv_arrow(col_types = <Schema>)", {
 
 test_that("read_csv_arrow(col_types=string, col_names)", {
   tbl <- example_data[, "int"]
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, col_names = "int", col_types = "d", skip = 1)
@@ -211,7 +215,8 @@ test_that("read_csv_arrow(col_types=string, col_names)", {
 
 test_that("read_csv_arrow() can read timestamps", {
   tbl <- tibble::tibble(time = as.POSIXct("2020-07-20 16:20", tz = "UTC"))
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, col_types = schema(time = timestamp(timezone = "UTC")))
@@ -222,7 +227,8 @@ test_that("read_csv_arrow() can read timestamps", {
 })
 
 test_that("read_csv_arrow(timestamp_parsers=)", {
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   tbl <- tibble::tibble(time = "23/09/2020")
   write.csv(tbl, tf, row.names = FALSE)
 
@@ -235,7 +241,8 @@ test_that("read_csv_arrow(timestamp_parsers=)", {
 })
 
 test_that("Skipping columns with null()", {
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   cols <- c("dbl", "lgl", "false", "chr")
   tbl <- example_data[, cols]
   write.csv(tbl, tf, row.names = FALSE)
@@ -245,7 +252,8 @@ test_that("Skipping columns with null()", {
 })
 
 test_that("Mix of guessing and declaring types", {
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   cols <- c("dbl", "lgl", "false", "chr")
   tbl <- example_data[, cols]
   write.csv(tbl, tf, row.names = FALSE)
@@ -256,3 +264,70 @@ test_that("Mix of guessing and declaring types", {
   df <- read_csv_arrow(tf, col_types = "d-?c", col_names = cols, skip = 1)
   expect_identical(df, tbl[, c("dbl", "false", "chr")])
 })
+
+
+test_that("Write a CSV file with header", {
+  tbl_out <- write_csv_arrow(tbl_no_dates, csv_file)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl_no_dates)
+
+  tbl_in <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in, tbl_no_dates)
+
+  skip("Doesn't yet work with date columns due to ARROW-12540")
+
+  tbl_out <- write_csv_arrow(tbl, csv_file)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl)
+
+  tbl_in <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in, tbl)
+})
+
+
+test_that("Write a CSV file with no header", {
+  tbl_out <- write_csv_arrow(tbl_no_dates, csv_file, include_header = FALSE)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl_no_dates)
+  tbl_in <- read_csv_arrow(csv_file, col_names = FALSE)
+
+  tbl_expected <- tbl_no_dates
+  names(tbl_expected) <- c("f0", "f1", "f2", "f3")
+
+  expect_identical(tbl_in, tbl_expected)
+})
+
+test_that("Write a CSV file with different batch sizes", {
+  tbl_out1 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 1)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out1, tbl_no_dates)
+  tbl_in1 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in1, tbl_no_dates)
+
+  tbl_out2 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 2)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out2, tbl_no_dates)
+  tbl_in2 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in2, tbl_no_dates)
+
+  tbl_out3 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 12)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out3, tbl_no_dates)
+  tbl_in3 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in3, tbl_no_dates)
+})
+
+test_that("Write a CSV file with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_csv_arrow(bad_input, csv_file),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
+test_that("Write a CSV file with invalid batch size", {
+  expect_error(
+    write_csv_arrow(tbl_no_dates, csv_file, batch_size = -1),
+    regexp = "batch_size not greater than 0"
+  )
+})
diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R
index a5ecb41de64..51d73b589c8 100644
--- a/r/tests/testthat/test-data-type.R
+++ b/r/tests/testthat/test-data-type.R
@@ -17,7 +17,7 @@
 
 context("DataType")
 
-test_that("null type works as expected",{
+test_that("null type works as expected", {
   x <- null()
   expect_equal(x$id, 0L)
   expect_equal(x$name, "null")
@@ -28,7 +28,7 @@ test_that("null type works as expected",{
   expect_equal(x$fields(), list())
 })
 
-test_that("boolean type work as expected",{
+test_that("boolean type work as expected", {
   x <- boolean()
   expect_equal(x$id, Type$BOOL)
   expect_equal(x$name, "bool")
@@ -40,7 +40,7 @@ test_that("boolean type work as expected",{
   expect_equal(x$bit_width, 1L)
 })
 
-test_that("int types works as expected",{
+test_that("int types works as expected", {
   x <- uint8()
   expect_equal(x$id, Type$UINT8)
   expect_equal(x$name, "uint8")
@@ -122,7 +122,7 @@ test_that("int types works as expected",{
   expect_equal(x$bit_width, 64L)
 })
 
-test_that("float types work as expected",{
+test_that("float types work as expected", {
   x <- float16()
   expect_equal(x$id, Type$HALF_FLOAT)
   expect_equal(x$name, "halffloat")
@@ -154,7 +154,7 @@ test_that("float types work as expected",{
   expect_equal(x$bit_width, 64L)
 })
 
-test_that("utf8 type works as expected",{
+test_that("utf8 type works as expected", {
   x <- utf8()
   expect_equal(x$id, Type$STRING)
   expect_equal(x$name, "utf8")
@@ -392,17 +392,16 @@ test_that("decimal type and validation", {
   expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100")
   expect_error(decimal(4, NA), '"scale" must be an integer')
 
-  expect_is(decimal(4, 2), "Decimal128Type")
-
+  expect_r6_class(decimal(4, 2), "Decimal128Type")
 })
 
 test_that("Binary", {
-  expect_is(binary(), "Binary")
+  expect_r6_class(binary(), "Binary")
   expect_equal(binary()$ToString(), "binary")
 })
 
 test_that("FixedSizeBinary", {
-  expect_is(fixed_size_binary(4), "FixedSizeBinary")
+  expect_r6_class(fixed_size_binary(4), "FixedSizeBinary")
   expect_equal(fixed_size_binary(4)$ToString(), "fixed_size_binary[4]")
 
   # input validation
@@ -411,3 +410,18 @@ test_that("FixedSizeBinary", {
   expect_error(fixed_size_binary("four"))
   expect_error(fixed_size_binary(c(2, 4)))
 })
+
+test_that("DataType to C-interface", {
+  datatype <- timestamp("ms", timezone = "Pacific/Marquesas")
+
+  # export the datatype via the C-interface
+  ptr <- allocate_arrow_schema()
+  datatype$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- DataType$import_from_c(ptr)
+  expect_equal(circle, datatype)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
+})
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 932c568cd38..1a71fea86c7 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -21,18 +21,22 @@ context("Dataset")
 
 library(dplyr)
 
-make_temp_dir <- function() {
-  path <- tempfile()
-  dir.create(path)
-  normalizePath(path, winslash = "/")
-}
-
 dataset_dir <- make_temp_dir()
 hive_dir <- make_temp_dir()
 ipc_dir <- make_temp_dir()
 csv_dir <- make_temp_dir()
 tsv_dir <- make_temp_dir()
 
+skip_if_multithreading_disabled <- function() {
+  is_32bit <- .Machine$sizeof.pointer < 8
+  is_old_r <- getRversion() < "4.0.0"
+  is_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
+  if (is_32bit && is_old_r && is_windows) {
+    skip("Multithreading does not work properly on this system")
+  }
+}
+
+
 first_date <- lubridate::ymd_hms("2015-04-29 03:12:39")
 df1 <- tibble(
   int = 1:10,
@@ -90,7 +94,7 @@ test_that("Setup (putting data in the dir)", {
   expect_length(dir(tsv_dir, recursive = TRUE), 2)
 })
 
-if(arrow_with_parquet()) {
+if (arrow_with_parquet()) {
   files <- c(
     file.path(dataset_dir, 1, "file1.parquet", fsep = "/"),
     file.path(dataset_dir, 2, "file2.parquet", fsep = "/")
@@ -100,9 +104,9 @@ if(arrow_with_parquet()) {
 test_that("Simple interface for datasets", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_is(ds$format, "ParquetFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
-  expect_is(ds, "Dataset")
+  expect_r6_class(ds$format, "ParquetFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds, "Dataset")
   expect_equivalent(
     ds %>%
       select(chr, dbl) %>%
@@ -145,13 +149,11 @@ test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
 
-  expect_warning(
-    expect_identical(
-      ds %>%
-        filter(chr == 'a') %>%
-        dim(),
-      c(NA, 7L)
-    )
+  expect_identical(
+    ds %>%
+      filter(chr == "a") %>%
+      dim(),
+    c(2L, 7L)
   )
   expect_equal(
     ds %>%
@@ -159,14 +161,12 @@ test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_
       dim(),
     c(20L, 3L)
   )
-  expect_warning(
-    expect_identical(
-      ds %>%
-        select(chr, fct, int) %>%
-        filter(chr == 'a') %>%
-        dim(),
-      c(NA, 3L)
-    )
+  expect_identical(
+    ds %>%
+      select(chr, fct, int) %>%
+      filter(chr == "a") %>%
+      dim(),
+    c(2L, 3L)
   )
 })
 
@@ -208,7 +208,7 @@ test_that("dataset from directory URI", {
   skip_if_not_available("parquet")
   uri <- paste0("file://", dataset_dir)
   ds <- open_dataset(uri, partitioning = schema(part = uint8()))
-  expect_is(ds, "Dataset")
+  expect_r6_class(ds, "Dataset")
   expect_equivalent(
     ds %>%
       select(chr, dbl) %>%
@@ -268,15 +268,17 @@ test_that("open_dataset errors on mixed paths and URIs", {
 
 test_that("Simple interface for datasets (custom ParquetFileFormat)", {
   skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()),
-                     format = FileFormat$create("parquet", dict_columns = c("chr")))
+  ds <- open_dataset(dataset_dir,
+    partitioning = schema(part = uint8()),
+    format = FileFormat$create("parquet", dict_columns = c("chr"))
+  )
   expect_type_equal(ds$schema$GetFieldByName("chr")$type, dictionary())
 })
 
 test_that("Hive partitioning", {
   skip_if_not_available("parquet")
   ds <- open_dataset(hive_dir, partitioning = hive_partition(other = utf8(), group = uint8()))
-  expect_is(ds, "Dataset")
+  expect_r6_class(ds, "Dataset")
   expect_equivalent(
     ds %>%
       filter(group == 2) %>%
@@ -327,12 +329,10 @@ test_that("Partitioning inference", {
 
 test_that("IPC/Feather format data", {
   ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
-  expect_is(ds$format, "IpcFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds$format, "IpcFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
   expect_identical(names(ds), c(names(df1), "part"))
-  expect_warning(
-    expect_identical(dim(ds), c(NA, 7L))
-  )
+  expect_identical(dim(ds), c(20L, 7L))
 
   expect_equivalent(
     ds %>%
@@ -354,14 +354,15 @@ test_that("IPC/Feather format data", {
 })
 
 test_that("CSV dataset", {
-  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-12181
+  skip_if_multithreading_disabled()
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
-  expect_is(ds$format, "CsvFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds$format, "CsvFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
   expect_identical(names(ds), c(names(df1), "part"))
-  expect_warning(
-    expect_identical(dim(ds), c(NA, 7L))
-  )
+  if (getRversion() >= "4.0.0") {
+    # CountRows segfaults on RTools35/R 3.6, so don't test it there
+    expect_identical(dim(ds), c(20L, 7L))
+  }
   expect_equivalent(
     ds %>%
       select(string = chr, integer = int, part) %>%
@@ -381,11 +382,13 @@ test_that("CSV dataset", {
 })
 
 test_that("CSV scan options", {
+  skip_if_multithreading_disabled()
   options <- FragmentScanOptions$create("text")
   expect_equal(options$type, "csv")
   options <- FragmentScanOptions$create("csv",
-                                        null_values = c("mynull"),
-                                        strings_can_be_null = TRUE)
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
   expect_equal(options$type, "csv")
 
   dst_dir <- make_temp_dir()
@@ -403,31 +406,38 @@ test_that("CSV scan options", {
   expect_equivalent(as.data.frame(tab), tibble(chr = c("foo", NA)))
 
   # Set default convert options in CsvFileFormat
-  csv_format <- CsvFileFormat$create(null_values = c("mynull"),
-                                     strings_can_be_null = TRUE)
+  csv_format <- CsvFileFormat$create(
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
   ds <- open_dataset(dst_dir, format = csv_format)
   expect_equivalent(ds %>% collect(), tibble(chr = c("foo", NA)))
 
   # Set both parse and convert options
   df <- tibble(chr = c("foo", "mynull"), chr2 = c("bar", "baz"))
   write.table(df, dst_file, row.names = FALSE, quote = FALSE, sep = "\t")
-  ds <- open_dataset(dst_dir, format = "csv",
-                     delimiter="\t",
-                     null_values = c("mynull"),
-                     strings_can_be_null = TRUE)
-  expect_equivalent(ds %>% collect(), tibble(chr = c("foo", NA),
-                                             chr2 = c("bar", "baz")))
+  ds <- open_dataset(dst_dir,
+    format = "csv",
+    delimiter = "\t",
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
+  expect_equivalent(ds %>% collect(), tibble(
+    chr = c("foo", NA),
+    chr2 = c("bar", "baz")
+  ))
 })
 
 test_that("compressed CSV dataset", {
+  skip_if_multithreading_disabled()
   skip_if_not_available("gzip")
   dst_dir <- make_temp_dir()
   dst_file <- file.path(dst_dir, "data.csv.gz")
   write.csv(df1, gzfile(dst_file), row.names = FALSE, quote = FALSE)
   format <- FileFormat$create("csv")
   ds <- open_dataset(dst_dir, format = format)
-  expect_is(ds$format, "CsvFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds$format, "CsvFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
 
   expect_equivalent(
     ds %>%
@@ -443,6 +453,7 @@ test_that("compressed CSV dataset", {
 })
 
 test_that("CSV dataset options", {
+  skip_if_multithreading_disabled()
   dst_dir <- make_temp_dir()
   dst_file <- file.path(dst_dir, "data.csv")
   df <- tibble(chr = letters[1:10])
@@ -455,7 +466,7 @@ test_that("CSV dataset options", {
     ds %>%
       select(string = a) %>%
       collect(),
-    df1[-1,] %>%
+    df1[-1, ] %>%
       select(string = chr)
   )
 
@@ -465,11 +476,12 @@ test_that("CSV dataset options", {
     ds %>%
       select(string = foo) %>%
       collect(),
-    tibble(foo = c(c('chr'), letters[1:10]))
+    tibble(foo = c(c("chr"), letters[1:10]))
   )
 })
 
 test_that("Other text delimited dataset", {
+  skip_if_multithreading_disabled()
   ds1 <- open_dataset(tsv_dir, partitioning = "part", format = "tsv")
   expect_equivalent(
     ds1 %>%
@@ -498,6 +510,7 @@ test_that("Other text delimited dataset", {
 })
 
 test_that("readr parse options", {
+  skip_if_multithreading_disabled()
   arrow_opts <- names(formals(CsvParseOptions$create))
   readr_opts <- names(formals(readr_to_csv_parse_options))
 
@@ -590,7 +603,7 @@ test_that("Creating UnionDataset", {
   ds1 <- open_dataset(file.path(dataset_dir, 1))
   ds2 <- open_dataset(file.path(dataset_dir, 2))
   union1 <- open_dataset(list(ds1, ds2))
-  expect_is(union1, "UnionDataset")
+  expect_r6_class(union1, "UnionDataset")
   expect_equivalent(
     union1 %>%
       select(chr, dbl) %>%
@@ -605,7 +618,7 @@ test_that("Creating UnionDataset", {
 
   # Now with the c() method
   union2 <- c(ds1, ds2)
-  expect_is(union2, "UnionDataset")
+  expect_r6_class(union2, "UnionDataset")
   expect_equivalent(
     union2 %>%
       select(chr, dbl) %>%
@@ -622,31 +635,18 @@ test_that("Creating UnionDataset", {
   expect_error(c(ds1, 42), "character")
 })
 
-test_that("InMemoryDataset", {
-  ds <- InMemoryDataset$create(rbind(df1, df2))
-  expect_is(ds, "InMemoryDataset")
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl) %>%
-      filter(dbl > 7 & dbl < 53L) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl")],
-      df2[1:2, c("chr", "dbl")]
-    )
-  )
-})
-
 test_that("map_batches", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = "part")
-  expect_equivalent(
-    ds %>%
-      filter(int > 5) %>%
-      select(int, lgl) %>%
-      map_batches(~summarize(., min_int = min(int))),
-    tibble(min_int = c(6L, 101L))
+  expect_warning(
+    expect_equivalent(
+      ds %>%
+        filter(int > 5) %>%
+        select(int, lgl) %>%
+        map_batches(~ summarize(., min_int = min(int))),
+      tibble(min_int = c(6L, 101L))
+    ),
+    "pulling data into R" # ARROW-13502
   )
 })
 
@@ -656,18 +656,6 @@ test_that("partitioning = NULL to ignore partition information (but why?)", {
   expect_identical(names(ds), names(df1)) # i.e. not c(names(df1), "group", "other")
 })
 
-test_that("filter() with is.na()", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(part, lgl) %>%
-      filter(!is.na(lgl), part == 1) %>%
-      collect(),
-    tibble(part = 1L, lgl = df1$lgl[!is.na(df1$lgl)])
-  )
-})
-
 test_that("filter() with is.nan()", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
@@ -702,103 +690,6 @@ test_that("filter() with %in%", {
   )
 })
 
-test_that("filter() with negative scalar", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      filter(part == 1) %>%
-      select(chr, int) %>%
-      filter(int > -2) %>%
-      collect(),
-    df1[, c("chr", "int")]
-  )
-
-  expect_equivalent(
-    ds %>%
-      filter(part == 1) %>%
-      select(chr, int) %>%
-      filter(int %in% -2) %>%
-      collect(),
-    df1[FALSE, c("chr", "int")]
-  )
-
-  expect_equivalent(
-    ds %>%
-      filter(part == 1) %>%
-      select(chr, int) %>%
-      filter(-int < -2) %>%
-      collect(),
-    df1[df1$int > 2, c("chr", "int")]
-  )
-})
-
-test_that("filter() with strings", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(chr, part) %>%
-      filter(chr == "b", part == 1) %>%
-      collect(),
-    tibble(chr = "b", part = 1)
-  )
-
-  skip_if_not_available("utf8proc")
-  expect_equivalent(
-    ds %>%
-      select(chr, part) %>%
-      filter(toupper(chr) == "B", part == 1) %>%
-      collect(),
-    tibble(chr = "b", part = 1)
-  )
-})
-
-test_that("filter() with arrow compute functions by name", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(part, lgl) %>%
-      filter(arrow_is_valid(lgl), arrow_equal(part, 1)) %>%
-      collect(),
-    ds %>%
-       select(part, lgl) %>%
-       filter(!is.na(lgl), part == 1L) %>%
-       collect()
-  )
-})
-
-test_that("filter() with .data", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(.data$int, .data$part) %>%
-      filter(.data$int == 3, .data$part == 1) %>%
-      collect(),
-    tibble(int = df1$int[3], part = 1)
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(.data$int, .data$part) %>%
-      filter(.data$int %in% c(6, 4, 3, 103, 107), .data$part == 1) %>%
-      collect(),
-    tibble(int = df1$int[c(3, 4, 6)], part = 1)
-  )
-
-  # and the .env pronoun too!
-  chr <- 1
-  expect_equivalent(
-    ds %>%
-      select(.data$int, .data$part) %>%
-      filter(.data$int %in% c(6, 4, 3, 103, 107), .data$part == .env$chr) %>%
-      collect(),
-    tibble(int = df1$int[c(3, 4, 6)], part = 1)
-  )
-})
-
 test_that("filter() on timestamp columns", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
@@ -858,109 +749,6 @@ test_that("filter() on date32 columns", {
   )
 })
 
-test_that("filter() with expressions", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_is(ds$format, "ParquetFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
-  expect_is(ds, "Dataset")
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl) %>%
-      filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl")],
-      df2[1:2, c("chr", "dbl")]
-    )
-  )
-
-  # check division's special casing.
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl) %>%
-      filter(dbl / 2 > 3.5 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl")],
-      df2[1:2, c("chr", "dbl")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %/% 2L > 3 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl", "int")],
-      df2[1:2, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %/% 2 > 3 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl", "int")],
-      df2[1:2, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %% 2L > 0 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
-      df2[1, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %% 2L > 0 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
-      df2[1, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %% 2 > 0 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
-      df2[1, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(dbl + int > 15 & dbl < 53L) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl", "int")],
-      df2[1:2, c("chr", "dbl", "int")]
-    )
-  )
-})
 
 test_that("mutate()", {
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
@@ -970,11 +758,11 @@ test_that("mutate()", {
     mutate(twice = int * 2)
   expect_output(
     print(mutated),
-"FileSystemDataset (query)
+    "FileSystemDataset (query)
 chr: string
 dbl: double
 int: int32
-twice: expr
+twice: double (multiply_checked(int, 2))
 
 * Filter: ((multiply_checked(dbl, 2) > 14) and (subtract_checked(dbl, 50) < 3))
 See $.data for the source Arrow object",
@@ -994,26 +782,6 @@ See $.data for the source Arrow object",
   )
 })
 
-test_that("transmute()", {
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  mutated <-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
-      transmute(twice = int * 2) %>%
-      collect() %>%
-      arrange(twice),
-    rbind(
-      df1[8:10, "int", drop = FALSE],
-      df2[1:2, "int", drop = FALSE]
-    ) %>%
-      transmute(
-        twice = int * 2
-      )
-  )
-})
-
 test_that("mutate() features not yet implemented", {
   expect_error(
     ds %>%
@@ -1024,66 +792,6 @@ test_that("mutate() features not yet implemented", {
   )
 })
 
-
-test_that("mutate() with scalar (length 1) literal inputs", {
-  expect_equal(
-    ds %>%
-      mutate(the_answer = 42) %>%
-      collect() %>%
-      pull(the_answer),
-    rep(42, nrow(ds))
-  )
-
-  expect_error(
-    ds %>% mutate(the_answer = c(42, 42)),
-    "In the_answer = c(42, 42), only values of size one are recycled\nCall collect() first to pull data into R.",
-    fixed = TRUE
-  )
-})
-
-test_that("mutate() with NULL inputs", {
-  expect_equal(
-    ds %>%
-      mutate(int = NULL) %>%
-      collect(),
-    ds %>%
-      select(-int) %>%
-      collect()
-  )
-})
-
-test_that("empty mutate()", {
-  expect_equal(
-    ds %>%
-      mutate() %>%
-      collect(),
-    ds %>%
-      collect()
-  )
-})
-
-test_that("transmute() with NULL inputs", {
-  expect_equal(
-    ds %>%
-      transmute(int = NULL) %>%
-      collect(),
-    ds %>%
-      select() %>%
-      collect()
-  )
-})
-
-test_that("empty transmute()", {
-  expect_equal(
-    ds %>%
-      transmute() %>%
-      collect(),
-    ds %>%
-      select() %>%
-      collect()
-  )
-})
-
 test_that("filter scalar validation doesn't crash (ARROW-7772)", {
   expect_error(
     ds %>%
@@ -1129,7 +837,7 @@ test_that("arrange()", {
 chr: string
 dbl: double
 int: int32
-twice: expr
+twice: double (multiply_checked(int, 2))
 
 * Filter: ((multiply_checked(dbl, 2) > 14) and (subtract_checked(dbl, 50) < 3))
 * Sorted by chr [asc], multiply_checked(int, 2) [desc], add_checked(dbl, int) [asc]
@@ -1162,9 +870,9 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
   tab2 <- ds %>% collect(as_data_frame = FALSE)
   expect_is(tab2, "Table")
 
-  tab3 <-  ds %>%
+  tab3 <- ds %>%
     mutate(negint = -int) %>%
-    filter(negint > - 100) %>%
+    filter(negint > -100) %>%
     arrange(chr) %>%
     select(negint) %>%
     compute()
@@ -1176,9 +884,9 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
     tibble(negint = -1:-10)
   )
 
-  tab4 <-  ds %>%
+  tab4 <- ds %>%
     mutate(negint = -int) %>%
-    filter(negint > - 100) %>%
+    filter(negint > -100) %>%
     arrange(chr) %>%
     select(negint) %>%
     collect(as_data_frame = FALSE)
@@ -1198,11 +906,10 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
   # the group_by() prevents compute() from returning a Table...
   expect_is(tab5, "arrow_dplyr_query")
 
-  # ... but $.data is a Table...
-  expect_is(tab5$.data, "Table")
+  # ... but $.data is a Table (InMemoryDataset)...
+  expect_r6_class(tab5$.data, "InMemoryDataset")
   # ... and the mutate() was evaluated
   expect_true("negint" %in% names(tab5$.data))
-
 })
 
 test_that("head/tail", {
@@ -1211,27 +918,27 @@ test_that("head/tail", {
   expect_equal(as.data.frame(head(ds)), head(df1))
   expect_equal(
     as.data.frame(head(ds, 12)),
-    rbind(df1, df2[1:2,])
+    rbind(df1, df2[1:2, ])
   )
   expect_equal(
     ds %>%
       filter(int > 6) %>%
       head() %>%
       as.data.frame(),
-    rbind(df1[7:10,], df2[1:2,])
+    rbind(df1[7:10, ], df2[1:2, ])
   )
 
   expect_equal(as.data.frame(tail(ds)), tail(df2))
   expect_equal(
     as.data.frame(tail(ds, 12)),
-    rbind(df1[9:10,], df2)
+    rbind(df1[9:10, ], df2)
   )
   expect_equal(
     ds %>%
       filter(int < 105) %>%
       tail() %>%
       as.data.frame(),
-    rbind(df1[9:10,], df2[1:4,])
+    rbind(df1[9:10, ], df2[1:4, ])
   )
 })
 
@@ -1282,17 +989,6 @@ test_that("dplyr method not implemented messages", {
     "Filter expression not supported for Arrow Datasets: dbl > max(dbl)\nCall collect() first to pull data into R.",
     fixed = TRUE
   )
-  # One explicit test of the full message
-  expect_error(
-    ds %>% summarize(mean(int)),
-    "summarize() is not currently implemented for Arrow Datasets. Call collect() first to pull data into R.",
-    fixed = TRUE
-  )
-  # Helper for everything else
-  expect_not_implemented <- function(x) {
-    expect_error(x, "is not currently implemented for Arrow Datasets")
-  }
-  expect_not_implemented(ds %>% filter(int == 1) %>% summarize(n()))
 })
 
 test_that("Dataset and query print methods", {
@@ -1314,7 +1010,7 @@ test_that("Dataset and query print methods", {
     ),
     fixed = TRUE
   )
-  expect_is(ds$metadata, "list")
+  expect_type(ds$metadata, "list")
   q <- select(ds, string = chr, lgl, integer = int)
   expect_output(
     print(q),
@@ -1346,18 +1042,47 @@ test_that("Dataset and query print methods", {
   )
 })
 
+test_that("Scanner$ScanBatches", {
+  ds <- open_dataset(ipc_dir, format = "feather")
+  batches <- ds$NewScan()$Finish()$ScanBatches()
+  table <- Table$create(!!!batches)
+  expect_equivalent(as.data.frame(table), rbind(df1, df2))
+
+  # use_async will always use the thread pool (even if it only uses
+  # one thread) and RTools 3.5 on Windows doesn't support this
+  skip_on_os("windows")
+  batches <- ds$NewScan()$UseAsync(TRUE)$Finish()$ScanBatches()
+  table <- Table$create(!!!batches)
+  expect_equivalent(as.data.frame(table), rbind(df1, df2))
+})
+
+test_that("Scanner$ToRecordBatchReader()", {
+  ds <- open_dataset(dataset_dir, partitioning = "part")
+  scan <- ds %>%
+    filter(part == 1) %>%
+    select(int, lgl) %>%
+    filter(int > 6) %>%
+    Scanner$create()
+  reader <- scan$ToRecordBatchReader()
+  expect_r6_class(reader, "RecordBatchReader")
+  expect_identical(
+    as.data.frame(reader$read_table()),
+    df1[df1$int > 6, c("int", "lgl")]
+  )
+})
+
 expect_scan_result <- function(ds, schm) {
   sb <- ds$NewScan()
-  expect_is(sb, "ScannerBuilder")
+  expect_r6_class(sb, "ScannerBuilder")
   expect_equal(sb$schema, schm)
 
   sb$Project(c("chr", "lgl"))
   sb$Filter(Expression$field_ref("dbl") == 8)
   scn <- sb$Finish()
-  expect_is(scn, "Scanner")
+  expect_r6_class(scn, "Scanner")
 
   tab <- scn$ToTable()
-  expect_is(tab, "Table")
+  expect_r6_class(tab, "Table")
 
   expect_equivalent(
     as.data.frame(tab),
@@ -1373,19 +1098,19 @@ test_that("Assembling a Dataset manually and getting a Table", {
 
   fmt <- FileFormat$create("parquet")
   factory <- FileSystemDatasetFactory$create(fs, selector, NULL, fmt, partitioning = partitioning)
-  expect_is(factory, "FileSystemDatasetFactory")
+  expect_r6_class(factory, "FileSystemDatasetFactory")
 
   schm <- factory$Inspect()
-  expect_is(schm, "Schema")
+  expect_r6_class(schm, "Schema")
 
   phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
   expect_equal(names(phys_schm), names(df1))
   expect_equal(names(schm), c(names(phys_schm), "part"))
 
   child <- factory$Finish(schm)
-  expect_is(child, "FileSystemDataset")
-  expect_is(child$schema, "Schema")
-  expect_is(child$format, "ParquetFileFormat")
+  expect_r6_class(child, "FileSystemDataset")
+  expect_r6_class(child$schema, "Schema")
+  expect_r6_class(child$format, "ParquetFileFormat")
   expect_equal(names(schm), names(child$schema))
   expect_equivalent(child$files, files)
 
@@ -1393,27 +1118,151 @@ test_that("Assembling a Dataset manually and getting a Table", {
   expect_scan_result(ds, schm)
 })
 
+test_that("URI-decoding with directory partitioning", {
+  root <- make_temp_dir()
+  fmt <- FileFormat$create("feather")
+  fs <- LocalFileSystem$create()
+  selector <- FileSelector$create(root, recursive = TRUE)
+  dir1 <- file.path(root, "2021-05-04 00%3A00%3A00", "%24")
+  dir.create(dir1, recursive = TRUE)
+  write_feather(df1, file.path(dir1, "data.feather"))
+
+  partitioning <- DirectoryPartitioning$create(
+    schema(date = timestamp(unit = "s"), string = utf8())
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_scan_result(ds, schm)
+
+  partitioning <- DirectoryPartitioning$create(
+    schema(date = timestamp(unit = "s"), string = utf8()),
+    segment_encoding = "none"
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  schm <- factory$Inspect()
+  expect_error(factory$Finish(schm), "Invalid: error parsing")
+
+  partitioning_factory <- DirectoryPartitioningFactory$create(
+    c("date", "string")
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  # Can't directly inspect partition expressions, so do it implicitly via scan
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00:00:00", string == "$") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+
+  partitioning_factory <- DirectoryPartitioningFactory$create(
+    c("date", "string"),
+    segment_encoding = "none"
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00%3A00%3A00", string == "%24") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+})
+
+test_that("URI-decoding with hive partitioning", {
+  root <- make_temp_dir()
+  fmt <- FileFormat$create("feather")
+  fs <- LocalFileSystem$create()
+  selector <- FileSelector$create(root, recursive = TRUE)
+  dir1 <- file.path(root, "date=2021-05-04 00%3A00%3A00", "string=%24")
+  dir.create(dir1, recursive = TRUE)
+  write_feather(df1, file.path(dir1, "data.feather"))
+
+  partitioning <- hive_partition(
+    date = timestamp(unit = "s"), string = utf8()
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  ds <- factory$Finish(schm)
+  expect_scan_result(ds, schm)
+
+  partitioning <- hive_partition(
+    date = timestamp(unit = "s"), string = utf8(), segment_encoding = "none"
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  expect_error(factory$Finish(schm), "Invalid: error parsing")
+
+  partitioning_factory <- hive_partition()
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  # Can't directly inspect partition expressions, so do it implicitly via scan
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00:00:00", string == "$") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+
+  partitioning_factory <- hive_partition(segment_encoding = "none")
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00%3A00%3A00", string == "%24") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+})
+
 test_that("Assembling multiple DatasetFactories with DatasetFactory", {
   skip_if_not_available("parquet")
   factory1 <- dataset_factory(file.path(dataset_dir, 1), format = "parquet")
-  expect_is(factory1, "FileSystemDatasetFactory")
+  expect_r6_class(factory1, "FileSystemDatasetFactory")
   factory2 <- dataset_factory(file.path(dataset_dir, 2), format = "parquet")
-  expect_is(factory2, "FileSystemDatasetFactory")
+  expect_r6_class(factory2, "FileSystemDatasetFactory")
 
   factory <- DatasetFactory$create(list(factory1, factory2))
-  expect_is(factory, "DatasetFactory")
+  expect_r6_class(factory, "DatasetFactory")
 
   schm <- factory$Inspect()
-  expect_is(schm, "Schema")
+  expect_r6_class(schm, "Schema")
 
   phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
   expect_equal(names(phys_schm), names(df1))
 
   ds <- factory$Finish(schm)
-  expect_is(ds, "UnionDataset")
-  expect_is(ds$schema, "Schema")
+  expect_r6_class(ds, "UnionDataset")
+  expect_r6_class(ds$schema, "Schema")
   expect_equal(names(schm), names(ds$schema))
-  expect_equivalent(map(ds$children, ~.$files), files)
+  expect_equivalent(map(ds$children, ~ .$files), files)
 
   expect_scan_result(ds, schm)
 })
@@ -1551,17 +1400,17 @@ test_that("Dataset writing: dplyr methods", {
   expect_true(dir.exists(dst_dir))
   expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
 
-  # select to specify schema
+  # select to specify schema (and rename)
   dst_dir2 <- tempfile()
   ds %>%
     group_by(int) %>%
-    select(chr, dbl) %>%
+    select(chr, dubs = dbl) %>%
     write_dataset(dst_dir2, format = "feather")
   new_ds <- open_dataset(dst_dir2, format = "feather")
 
   expect_equivalent(
     collect(new_ds) %>% arrange(int),
-    rbind(df1[c("chr", "dbl", "int")], df2[c("chr", "dbl", "int")])
+    rbind(df1[c("chr", "dbl", "int")], df2[c("chr", "dbl", "int")]) %>% rename(dubs = dbl)
   )
 
   # filter to restrict written rows
@@ -1575,6 +1424,19 @@ test_that("Dataset writing: dplyr methods", {
     new_ds %>% select(names(df1)) %>% collect(),
     df1 %>% filter(int == 4)
   )
+
+  # mutate
+  dst_dir3 <- tempfile()
+  ds %>%
+    filter(int == 4) %>%
+    mutate(twice = int * 2) %>%
+    write_dataset(dst_dir3, format = "feather")
+  new_ds <- open_dataset(dst_dir3, format = "feather")
+
+  expect_equivalent(
+    new_ds %>% select(c(names(df1), "twice")) %>% collect(),
+    df1 %>% filter(int == 4) %>% mutate(twice = int * 2)
+  )
 })
 
 test_that("Dataset writing: non-hive", {
@@ -1602,18 +1464,18 @@ test_that("Dataset writing: partition on null", {
   ds <- open_dataset(hive_dir)
 
   dst_dir <- tempfile()
-  partitioning = hive_partition(lgl = boolean())
+  partitioning <- hive_partition(lgl = boolean())
   write_dataset(ds, dst_dir, partitioning = partitioning)
   expect_true(dir.exists(dst_dir))
   expect_identical(dir(dst_dir), c("lgl=__HIVE_DEFAULT_PARTITION__", "lgl=false", "lgl=true"))
 
   dst_dir <- tempfile()
-  partitioning = hive_partition(lgl = boolean(), null_fallback="xyz")
+  partitioning <- hive_partition(lgl = boolean(), null_fallback = "xyz")
   write_dataset(ds, dst_dir, partitioning = partitioning)
   expect_true(dir.exists(dst_dir))
   expect_identical(dir(dst_dir), c("lgl=false", "lgl=true", "lgl=xyz"))
 
-  ds_readback <- open_dataset(dst_dir, partitioning = hive_partition(lgl = boolean(), null_fallback="xyz"))
+  ds_readback <- open_dataset(dst_dir, partitioning = hive_partition(lgl = boolean(), null_fallback = "xyz"))
 
   expect_identical(
     ds %>%
@@ -1747,15 +1609,36 @@ test_that("Writing a dataset: Parquet format options", {
   )
 })
 
+test_that("Writing a dataset: CSV format options", {
+  skip_if_multithreading_disabled()
+  df <- tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+    chr = letters[1:10],
+  )
+
+  dst_dir <- make_temp_dir()
+  write_dataset(df, dst_dir, format = "csv")
+  expect_true(dir.exists(dst_dir))
+  new_ds <- open_dataset(dst_dir, format = "csv")
+  expect_equivalent(new_ds %>% collect(), df)
+
+  dst_dir <- make_temp_dir()
+  write_dataset(df, dst_dir, format = "csv", include_header = FALSE)
+  expect_true(dir.exists(dst_dir))
+  new_ds <- open_dataset(dst_dir,
+    format = "csv",
+    column_names = c("int", "dbl", "lgl", "chr")
+  )
+  expect_equivalent(new_ds %>% collect(), df)
+})
+
 test_that("Dataset writing: unsupported features/input validation", {
   skip_if_not_available("parquet")
   expect_error(write_dataset(4), 'dataset must be a "Dataset"')
 
   ds <- open_dataset(hive_dir)
-  expect_error(
-    select(ds, integer = int) %>% write_dataset(ds),
-    "Renaming columns when writing a dataset is not yet supported"
-  )
   expect_error(
     write_dataset(ds, partitioning = c("int", "NOTACOLUMN"), format = "ipc"),
     'Invalid field name: "NOTACOLUMN"'
@@ -1778,3 +1661,19 @@ test_that("Collecting zero columns from a dataset doesn't return entire dataset"
     c(32, 0)
   )
 })
+
+# see https://issues.apache.org/jira/browse/ARROW-12791
+test_that("Error if no format specified and files are not parquet", {
+  skip_if_not_available("parquet")
+  expect_error(
+    open_dataset(csv_dir, partitioning = "part"),
+    "Did you mean to specify a 'format' other than the default (parquet)?",
+    fixed = TRUE
+  )
+  expect_failure(
+    expect_error(
+      open_dataset(csv_dir, partitioning = "part", format = "parquet"),
+      "Did you mean to specify a 'format'"
+    )
+  )
+})
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
new file mode 100644
index 00000000000..8235ef29948
--- /dev/null
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_grouping <- rep(c(1, 2), 5)
+
+test_that("summarize", {
+  expect_dplyr_equal(
+    input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      summarize(min_int = min(int)),
+    tbl,
+    warning = TRUE
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      summarize(min_int = min(int) / 2),
+    tbl,
+    warning = TRUE
+  )
+})
+
+test_that("Can aggregate in Arrow", {
+  expect_dplyr_equal(
+    input %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      summarize(total = sum(int)) %>%
+      collect(),
+    tbl,
+    # ARROW-13497: This is failing because the default is na.rm = FALSE
+    warning = TRUE
+  )
+})
+
+test_that("Group by sum on dataset", {
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int * 4, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl,
+    # ARROW-13497: This is failing because the default is na.rm = FALSE
+    warning = TRUE
+  )
+})
+
+test_that("Group by any/all", {
+  withr::local_options(list(arrow.debug = TRUE))
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(any(lgl, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(all(lgl, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  # ARROW-13497: na.rm option also is not being passed/received to any/all
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(any(has_words, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(all(has_words, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  skip("This seems to be calling base::nchar")
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(has_words = all(nchar(verses) < 0)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Filter and aggregate", {
+  expect_dplyr_equal(
+    input %>%
+      filter(some_grouping == 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 5) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(some_grouping == 2) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 5) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+})
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index b476c032945..fc24df58ca7 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 
 # randomize order of rows in test data
@@ -54,7 +56,7 @@ test_that("arrange() on integer, double, and character columns", {
   )
   expect_dplyr_equal(
     input %>%
-      mutate(zzz = int + dbl,) %>%
+      mutate(zzz = int + dbl, ) %>%
       arrange(zzz, chr) %>%
       collect(),
     tbl
@@ -137,19 +139,6 @@ test_that("arrange() on integer, double, and character columns", {
       collect(),
     tbl
   )
-  expect_warning(
-    expect_equal(
-      tbl %>%
-        Table$create() %>%
-        arrange(abs(int), dbl) %>%
-        collect(),
-      tbl %>%
-        arrange(abs(int), dbl) %>%
-        collect()
-    ),
-    "not supported in Arrow",
-    fixed = TRUE
-  )
 })
 
 test_that("arrange() on datetime columns", {
@@ -170,7 +159,6 @@ test_that("arrange() on datetime columns", {
 })
 
 test_that("arrange() on logical columns", {
-  skip("Sorting by bool columns is not supported (ARROW-12016)")
   expect_dplyr_equal(
     input %>%
       arrange(lgl, int) %>%
@@ -208,4 +196,11 @@ test_that("arrange() with bad inputs", {
     "not found",
     fixed = TRUE
   )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      arrange(desc(int, chr)),
+    "expects only one argument",
+    fixed = TRUE
+  )
 })
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index bac64297c5a..e56ee4be462 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
@@ -23,8 +25,8 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
-tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl))
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl)) # nolint
 
 test_that("filter() on is.na()", {
   expect_dplyr_equal(
@@ -115,6 +117,14 @@ test_that("filtering with arithmetic", {
       collect(),
     tbl
   )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl^2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("filtering with expression + autocasting", {
@@ -133,6 +143,14 @@ test_that("filtering with expression + autocasting", {
       collect(),
     tbl
   )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int^2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("More complex select/filter", {
@@ -168,7 +186,7 @@ test_that("Negative scalar values", {
       filter(some_negative %in% -1) %>%
       collect(),
     tbl
-    )
+  )
   expect_dplyr_equal(
     input %>%
       filter(int == -some_negative) %>%
@@ -177,7 +195,6 @@ test_that("Negative scalar values", {
   )
 })
 
-
 test_that("filter() with between()", {
   expect_dplyr_equal(
     input %>%
@@ -200,7 +217,7 @@ test_that("filter() with between()", {
       collect(),
     tbl %>%
       filter(dbl >= int, dbl <= dbl2)
-    )
+  )
 
   expect_error(
     tbl %>%
@@ -222,39 +239,10 @@ test_that("filter() with between()", {
       filter(between(chr, 1, 2)) %>%
       collect()
   )
-
 })
 
 test_that("filter() with string ops", {
   skip_if_not_available("utf8proc")
-  skip_if(getRversion() < "3.4.0", "R < 3.4")
-  # Extra instrumentation to ensure that we're calling Arrow compute here
-  # because many base R string functions implicitly call as.character,
-  # which means they still work on Arrays but actually force data into R
-  # 1) wrapper that raises a warning if as.character is called. Can't wrap
-  #    the whole test because as.character apparently gets called in other
-  #    (presumably legitimate) places
-  # 2) Wrap the test in expect_warning(expr, NA) to catch the warning
-  with_no_as_character <- function(expr) {
-    trace(
-      "as.character",
-      tracer = quote(warning("as.character was called")),
-      print = FALSE,
-      where = toupper
-    )
-    on.exit(untrace("as.character", where = toupper))
-    force(expr)
-  }
-
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        filter(dbl > 2, with_no_as_character(toupper(chr)) %in% c("D", "F")) %>%
-        collect(),
-      tbl
-    ),
-  NA)
-
   expect_dplyr_equal(
     input %>%
       filter(dbl > 2, str_length(verses) > 25) %>%
@@ -272,7 +260,7 @@ test_that("filter() with string ops", {
 
 test_that("filter environment scope", {
   # "object 'b_var' not found"
-  expect_dplyr_error(input %>% filter(batch, chr == b_var))
+  expect_dplyr_error(input %>% filter(chr == b_var), tbl)
 
   b_var <- "b"
   expect_dplyr_equal(
@@ -283,13 +271,13 @@ test_that("filter environment scope", {
   )
   # Also for functions
   # 'could not find function "isEqualTo"' because we haven't defined it yet
-  expect_dplyr_error(filter(batch, isEqualTo(int, 4)))
+  expect_dplyr_error(input %>% filter(isEqualTo(int, 4)), tbl)
 
   skip("Need to substitute in user defined function too")
   # TODO: fix this: this isEqualTo function is eagerly evaluating; it should
-  # instead yield array_expressions. Probably bc the parent env of the function
-  # has the Ops.Array methods defined; we need to move it so that the parent
-  # env is the data mask we use in the dplyr eval
+  # instead yield Expressions. Probably bc the parent env of the function
+  # has the Ops.Expression methods defined; we need to move it so that the
+  # parent env is the data mask we use in the dplyr eval
   isEqualTo <- function(x, y) x == y & !is.na(x)
   expect_dplyr_equal(
     input %>%
@@ -325,16 +313,26 @@ test_that("Filtering on a column that doesn't exist errors correctly", {
   })
 })
 
-test_that("Filtering with a function that doesn't have an Array/expr method still works", {
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        filter(int > 2, pnorm(dbl) > .99) %>%
-        collect(),
-      tbl
-    ),
-    'Filter expression not implemented in Arrow: pnorm(dbl) > 0.99; pulling data into R',
-    fixed = TRUE
+test_that("Filtering with unsupported functions", {
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 2, pnorm(dbl) > .99) %>%
+      collect(),
+    tbl,
+    warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R"
+  )
+  expect_dplyr_equal(
+    input %>%
+      filter(
+        nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
+        int > 2, # good
+        pnorm(dbl) > .99 # bad, opaque
+      ) %>%
+      collect(),
+    tbl,
+    warning = '\\* In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1, allowNA = TRUE not supported by Arrow
+\\* Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow
+pulling data into R'
   )
 })
 
@@ -389,11 +387,13 @@ test_that("filter() with .data pronoun", {
     tbl
   )
 
+  skip("test now faulty - code no longer gives error & outputs a empty tibble")
   # but there is an error if we don't override the masking with `.env`
   expect_dplyr_error(
-    tbl %>%
+    input %>%
       filter(.data$dbl > chr) %>%
       select(.data$chr, .data$int, .data$lgl) %>%
-      collect()
+      collect(),
+    tbl
   )
 })
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 6f5d5672d19..18be2a9304a 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
@@ -27,11 +29,12 @@ test_that("group_by groupings are recorded", {
       select(int, chr) %>%
       filter(int > 5) %>%
       summarize(min_int = min(int)),
-    tbl
+    tbl,
+    warning = TRUE
   )
 })
 
-test_that("group_by doesn't yet support creating/renaming", {
+test_that("group_by supports creating/renaming", {
   expect_dplyr_equal(
     input %>%
       group_by(chr, numbers = int) %>%
@@ -44,6 +47,12 @@ test_that("group_by doesn't yet support creating/renaming", {
       collect(),
     tbl
   )
+  expect_dplyr_equal(
+    input %>%
+      group_by(int > 4, lgl, foo = int > 5) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("ungroup", {
@@ -54,7 +63,8 @@ test_that("ungroup", {
       ungroup() %>%
       filter(int > 5) %>%
       summarize(min_int = min(int)),
-    tbl
+    tbl,
+    warning = TRUE
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
new file mode 100644
index 00000000000..cedbe064ba1
--- /dev/null
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -0,0 +1,181 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(lubridate)
+library(dplyr)
+
+# base::strptime() defaults to local timezone
+# but arrow's strptime defaults to UTC.
+# So that tests are consistent, set the local timezone to UTC
+# TODO: consider reevaluating this workaround after ARROW-12980
+withr::local_timezone("UTC")
+
+test_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "")
+test_df <- tibble::tibble(date = test_date)
+
+# We can support this feature after ARROW-12980 is merged
+test_that("timezone aware timestamps are not supported", {
+  tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "Pacific/Marquesas")
+  tz_aware_df <- tibble::tibble(date = tz_aware_date)
+
+  expect_error(
+    Table$create(tz_aware_df) %>%
+      mutate(x = wday(date)) %>%
+      collect(),
+    "Cannot extract components from timestamp with specific timezone"
+  )
+})
+
+# We can support this feature when ARROW-13138 is resolved
+test_that("date32 objects are not supported", {
+  date <- ymd("2017-01-01")
+  df <- tibble::tibble(date = date)
+
+  expect_error(
+    Table$create(df) %>%
+      mutate(x = year(date)) %>%
+      collect(),
+    "Function year has no kernel matching input types"
+  )
+})
+
+
+test_that("extract year from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = year(date)) %>%
+      collect(),
+    test_df,
+    check.tzone = FALSE
+  )
+})
+
+test_that("extract isoyear from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = isoyear(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract quarter from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = quarter(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract month from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = month(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract isoweek from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = isoweek(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract day from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = day(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract wday from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = wday(date)) %>%
+      collect(),
+    test_df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = wday(date, week_start = 3)) %>%
+      collect(),
+    test_df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = wday(date, week_start = 1)) %>%
+      collect(),
+    test_df
+  )
+
+  # We should be able to support the label argument after this ticket is resolved:
+  # https://issues.apache.org/jira/browse/ARROW-13133
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$wday(x, label = TRUE),
+    "Label argument not supported by Arrow"
+  )
+})
+
+test_that("extract yday from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = yday(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract hour from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = hour(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract minute from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = minute(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract second from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = second(date)) %>%
+      collect(),
+    test_df,
+    # arrow supports nanosecond resolution but lubridate does not
+    tolerance = 1e-6
+  )
+})
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 662f6d7478a..44127839108 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
@@ -23,10 +25,10 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
 
 test_that("mutate() is lazy", {
-  expect_is(
+  expect_s3_class(
     tbl %>% record_batch() %>% mutate(int = int + 6L),
     "arrow_dplyr_query"
   )
@@ -90,6 +92,41 @@ test_that("empty transmute()", {
   )
 })
 
+test_that("transmute() with unsupported arguments", {
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .keep = "all"),
+    "`transmute()` does not support the `.keep` argument",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .before = lgl),
+    "`transmute()` does not support the `.before` argument",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .after = chr),
+    "`transmute()` does not support the `.after` argument",
+    fixed = TRUE
+  )
+})
+
+test_that("transmute() defuses dots arguments (ARROW-13262)", {
+  expect_warning(
+    tbl %>%
+      Table$create() %>%
+      transmute(stringr::str_c(chr, chr)) %>%
+      collect(),
+    "Expression stringr::str_c(chr, chr) not supported in Arrow; pulling data into R",
+    fixed = TRUE
+  )
+})
+
 test_that("mutate and refer to previous mutants", {
   expect_dplyr_equal(
     input %>%
@@ -116,19 +153,21 @@ test_that("nchar() arguments", {
       collect(),
     tbl
   )
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(int, verses) %>%
-        mutate(
-          line_lengths = nchar(verses, type = "bytes", allowNA = TRUE),
-          longer = line_lengths * 10
-        ) %>%
-        filter(line_lengths > 15) %>%
-        collect(),
-      tbl
-    ),
-    "not supported"
+  # This tests the whole abandon_ship() machinery
+  expect_dplyr_equal(
+    input %>%
+      select(int, verses) %>%
+      mutate(
+        line_lengths = nchar(verses, type = "bytes", allowNA = TRUE),
+        longer = line_lengths * 10
+      ) %>%
+      filter(line_lengths > 15) %>%
+      collect(),
+    tbl,
+    warning = paste0(
+      "In nchar\\(verses, type = \"bytes\", allowNA = TRUE\\), ",
+      "allowNA = TRUE not supported by Arrow; pulling data into R"
+    )
   )
 })
 
@@ -151,8 +190,8 @@ test_that("mutate with unnamed expressions", {
     input %>%
       select(int, padded_strings) %>%
       mutate(
-        int,                   # bare column name
-        nchar(padded_strings)  # expression
+        int, # bare column name
+        nchar(padded_strings) # expression
       ) %>%
       filter(int > 5) %>%
       collect(),
@@ -173,7 +212,6 @@ test_that("mutate with reassigning same name", {
 })
 
 test_that("mutate with single value for recycling", {
-  skip("Not implemented (ARROW-11705")
   expect_dplyr_equal(
     input %>%
       select(int, padded_strings) %>%
@@ -214,29 +252,25 @@ test_that("dplyr::mutate's examples", {
   # Examples we don't support should succeed
   # but warn that they're pulling data into R to do so
 
-  # across + autosplicing: ARROW-11699
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(name, homeworld, species) %>%
-        mutate(across(!name, as.factor)) %>%
-        collect(),
-      starwars
-    ),
-    "Expression across.*not supported in Arrow"
+  # across and autosplicing: ARROW-11699
+  expect_dplyr_equal(
+    input %>%
+      select(name, homeworld, species) %>%
+      mutate(across(!name, as.factor)) %>%
+      collect(),
+    starwars,
+    warning = "Expression across.*not supported in Arrow"
   )
 
   # group_by then mutate
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(name, mass, homeworld) %>%
-        group_by(homeworld) %>%
-        mutate(rank = min_rank(desc(mass))) %>%
-        collect(),
-      starwars
-    ),
-    "not supported in Arrow"
+  expect_dplyr_equal(
+    input %>%
+      select(name, mass, homeworld) %>%
+      group_by(homeworld) %>%
+      mutate(rank = min_rank(desc(mass))) %>%
+      collect(),
+    starwars,
+    warning = TRUE
   )
 
   # `.before` and `.after` experimental args: ARROW-11701
@@ -306,16 +340,14 @@ test_that("dplyr::mutate's examples", {
   # The mutate operation may yield different results on grouped
   # tibbles because the expressions are computed within groups.
   # The following normalises `mass` by the global average:
-  # TODO(ARROW-11702)
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(name, mass, species) %>%
-        mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>%
-        collect(),
-      starwars
-    ),
-    "not supported in Arrow"
+  # TODO: ARROW-11702
+  expect_dplyr_equal(
+    input %>%
+      select(name, mass, species) %>%
+      mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>%
+      collect(),
+    starwars,
+    warning = TRUE
   )
 })
 
@@ -338,31 +370,31 @@ test_that("handle bad expressions", {
   })
 })
 
+test_that("Can't just add a vector column with mutate()", {
+  expect_warning(
+    expect_equal(
+      Table$create(tbl) %>%
+        select(int) %>%
+        mutate(again = 1:10),
+      tibble::tibble(int = tbl$int, again = 1:10)
+    ),
+    "In again = 1:10, only values of size one are recycled; pulling data into R"
+  )
+})
+
 test_that("print a mutated table", {
   expect_output(
     Table$create(tbl) %>%
       select(int) %>%
       mutate(twice = int * 2) %>%
       print(),
-'Table (query)
+    "InMemoryDataset (query)
 int: int32
-twice: expr
+twice: double (multiply_checked(int, 2))
 
-See $.data for the source Arrow object',
-  fixed = TRUE)
-
-  # Handling non-expressions/edge cases
-  expect_output(
-    Table$create(tbl) %>%
-      select(int) %>%
-      mutate(again = 1:10) %>%
-      print(),
-'Table (query)
-int: int32
-again: expr
-
-See $.data for the source Arrow object',
-  fixed = TRUE)
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
 })
 
 test_that("mutate and write_dataset", {
@@ -415,3 +447,34 @@ test_that("mutate and write_dataset", {
       summarize(mean = mean(integer))
   )
 })
+
+test_that("mutate and pmin/pmax", {
+  df <- tibble(
+    city = c("Chillan", "Valdivia", "Osorno"),
+    val1 = c(200, 300, NA),
+    val2 = c(100, NA, NA),
+    val3 = c(0, NA, NA)
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        max_val_1 = pmax(val1, val2, val3),
+        max_val_2 = pmax(val1, val2, val3, na.rm = TRUE),
+        min_val_1 = pmin(val1, val2, val3),
+        min_val_2 = pmin(val1, val2, val3, na.rm = TRUE)
+      ) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = TRUE),
+        min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = TRUE),
+      ) %>%
+      collect(),
+    df
+  )
+})
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index 5faf2436f55..b6b8f5a714a 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -15,10 +15,168 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
 skip_if_not_available("utf8proc")
 
 library(dplyr)
 library(stringr)
+library(stringi)
+
+test_that("paste, paste0, and str_c", {
+  df <- tibble(
+    v = c("A", "B", "C"),
+    w = c("a", "b", "c"),
+    x = c("d", NA_character_, "f"),
+    y = c(NA_character_, "h", "i"),
+    z = c(1.1, 2.2, NA)
+  )
+  x <- Expression$field_ref("x")
+  y <- Expression$field_ref("y")
+
+  # no NAs in data
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(v, w)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(v, w, sep = "-")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(v, w)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(v, w)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(v, w, sep = "+")) %>%
+      collect(),
+    df
+  )
+
+  # NAs in data
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, y)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, y, sep = "-")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(x, y)) %>%
+      collect(),
+    df
+  )
+
+  # non-character column in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(x, y, z)) %>%
+      collect(),
+    df
+  )
+
+  # literal string in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, "foo", y)) %>%
+      collect(),
+    df
+  )
+
+  # literal NA in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, NA, y)) %>%
+      collect(),
+    df
+  )
+
+  # expressions in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(x, toupper(y), as.character(z))) %>%
+      collect(),
+    df
+  )
+
+  # sep is literal NA
+  # errors in paste() (consistent with base::paste())
+  expect_error(
+    nse_funcs$paste(x, y, sep = NA_character_),
+    "Invalid separator"
+  )
+  # emits null in str_c() (consistent with stringr::str_c())
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(x, y, sep = NA_character_)) %>%
+      collect(),
+    df
+  )
+
+  # sep passed in dots to paste0 (which doesn't take a sep argument)
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(x, y, sep = "-")) %>%
+      collect(),
+    df
+  )
+
+  # known differences
+
+  # arrow allows the separator to be an array
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      transmute(result = paste(x, y, sep = w)) %>%
+      collect(),
+    df %>%
+      transmute(result = paste(x, w, y, sep = ""))
+  )
+
+  # expected errors
+
+  # collapse argument not supported
+  expect_error(
+    nse_funcs$paste(x, y, collapse = ""),
+    "collapse"
+  )
+  expect_error(
+    nse_funcs$paste0(x, y, collapse = ""),
+    "collapse"
+  )
+  expect_error(
+    nse_funcs$str_c(x, y, collapse = ""),
+    "collapse"
+  )
+
+  # literal vectors of length != 1 not supported
+  expect_error(
+    nse_funcs$paste(x, character(0), y),
+    "Literal vectors of length != 1 not supported in string concatenation"
+  )
+  expect_error(
+    nse_funcs$paste(x, c(",", ";"), y),
+    "Literal vectors of length != 1 not supported in string concatenation"
+  )
+})
 
 test_that("grepl with ignore.case = FALSE and fixed = TRUE", {
   df <- tibble(x = c("Foo", "bar"))
@@ -52,8 +210,7 @@ skip_if_not_available("re2")
 test_that("grepl", {
   df <- tibble(x = c("Foo", "bar"))
 
-  for(fixed in c(TRUE, FALSE)) {
-
+  for (fixed in c(TRUE, FALSE)) {
     expect_dplyr_equal(
       input %>%
         filter(grepl("Foo", x, fixed = fixed)) %>%
@@ -72,9 +229,7 @@ test_that("grepl", {
         collect(),
       df
     )
-
   }
-
 })
 
 test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
@@ -96,7 +251,6 @@ test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
       collect(),
     tibble(x = character(0))
   )
-
 })
 
 test_that("str_detect", {
@@ -144,14 +298,12 @@ test_that("str_detect", {
       collect(),
     df
   )
-
 })
 
 test_that("sub and gsub", {
   df <- tibble(x = c("Foo", "bar"))
 
-  for(fixed in c(TRUE, FALSE)) {
-
+  for (fixed in c(TRUE, FALSE)) {
     expect_dplyr_equal(
       input %>%
         transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>%
@@ -170,7 +322,6 @@ test_that("sub and gsub", {
         collect(),
       df
     )
-
   }
 })
 
@@ -200,18 +351,32 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
       collect(),
     df # unchanged
   )
-
 })
 
 test_that("str_replace and str_replace_all", {
   df <- tibble(x = c("Foo", "bar"))
 
+  expect_dplyr_equal(
+    input %>%
+      transmute(x = str_replace_all(x, "^F", "baz")) %>%
+      collect(),
+    df
+  )
+
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace_all(x, regex("^F"), "baz")) %>%
       collect(),
     df
   )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>%
+      collect(),
+    df
+  )
+
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>%
@@ -236,10 +401,163 @@ test_that("str_replace and str_replace_all", {
       collect(),
     df
   )
+})
+
+test_that("strsplit and str_split", {
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, "and")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, " +and +")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "and")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "and", n = 2)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, fixed("and"), n = 2)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, regex("and"), n = 2)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "Foo|bar", n = 2)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("arrow_*_split_whitespace functions", {
+
+  # use only ASCII whitespace characters
+  df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
+
+  # use only non-ASCII whitespace characters
+  df_utf8 <- tibble(x = c("Foo\u00A0and\u2000bar", "baz\u2006and\u1680qux\u3000and\u2008quux"))
+
+  df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", "and", "quux")))
+
+  # use default option values
+  expect_equivalent(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_split_whitespace(x)) %>%
+      collect(),
+    df_split
+  )
+  expect_equivalent(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(x = arrow_utf8_split_whitespace(x)) %>%
+      collect(),
+    df_split
+  )
+
+  # specify non-default option values
+  expect_equivalent(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(
+        x = arrow_ascii_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
+      ) %>%
+      collect(),
+    tibble(x = list(c("Foo\nand", "bar"), c("baz\tand qux and", "quux")))
+  )
+  expect_equivalent(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(
+        x = arrow_utf8_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
+      ) %>%
+      collect(),
+    tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux")))
+  )
+})
+
+test_that("errors and warnings in string splitting", {
+  # These conditions generate an error, but abandon_ship() catches the error,
+  # issues a warning, and pulls the data into R (if computing on InMemoryDataset)
+  # Elsewhere we test that abandon_ship() works,
+  # so here we can just call the functions directly
+
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$str_split(x, fixed("and", ignore_case = TRUE)),
+    "Case-insensitive string splitting not supported by Arrow"
+  )
+  expect_error(
+    nse_funcs$str_split(x, coll("and.?")),
+    "Pattern modifier `coll()` not supported by Arrow",
+    fixed = TRUE
+  )
+  expect_error(
+    nse_funcs$str_split(x, boundary(type = "word")),
+    "Pattern modifier `boundary()` not supported by Arrow",
+    fixed = TRUE
+  )
+  expect_error(
+    nse_funcs$str_split(x, "and", n = 0),
+    "Splitting strings into zero parts not supported by Arrow"
+  )
+
+  # This condition generates a warning
+  expect_warning(
+    nse_funcs$str_split(x, fixed("and"), simplify = TRUE),
+    "Argument 'simplify = TRUE' will be ignored"
+  )
+})
+
+test_that("errors and warnings in string detection and replacement", {
+  x <- Expression$field_ref("x")
+
+  expect_error(
+    nse_funcs$str_detect(x, boundary(type = "character")),
+    "Pattern modifier `boundary()` not supported by Arrow",
+    fixed = TRUE
+  )
+  expect_error(
+    nse_funcs$str_replace_all(x, coll("o", locale = "en"), "ó"),
+    "Pattern modifier `coll()` not supported by Arrow",
+    fixed = TRUE
+  )
 
+  # This condition generates a warning
+  expect_warning(
+    nse_funcs$str_replace_all(x, regex("o", multiline = TRUE), "u"),
+    "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
+  )
 })
 
-test_that("backreferences in pattern", {
+test_that("backreferences in pattern in string detection", {
   skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)")
   df <- tibble(x = c("Foo", "bar"))
 
@@ -251,7 +569,7 @@ test_that("backreferences in pattern", {
   )
 })
 
-test_that("backreferences (substitutions) in replacement", {
+test_that("backreferences (substitutions) in string replacement", {
   df <- tibble(x = c("Foo", "bar"))
 
   expect_dplyr_equal(
@@ -260,11 +578,16 @@ test_that("backreferences (substitutions) in replacement", {
         "(?:https?|ftp)://([^/\r\n]+)(/[^\r\n]*)?",
         "path `\\2` on server `\\1`",
         url
-        )
-      ) %>%
+      )) %>%
       collect(),
     tibble(url = "https://arrow.apache.org/docs/r/")
   )
+  expect_dplyr_equal(
+    input %>%
+      transmute(x = str_replace(x, "^(\\w)o(.*)", "\\1\\2p")) %>%
+      collect(),
+    df
+  )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
@@ -279,7 +602,7 @@ test_that("backreferences (substitutions) in replacement", {
   )
 })
 
-test_that("edge cases", {
+test_that("edge cases in string detection and replacement", {
 
   # in case-insensitive fixed match/replace, test that "\\E" in the search
   # string and backslashes in the replacement string are interpreted literally.
@@ -314,34 +637,435 @@ test_that("edge cases", {
       collect(),
     tibble(x = c("ABC"))
   )
+})
+
+test_that("strptime", {
+
+  # base::strptime() defaults to local timezone
+  # but arrow's strptime defaults to UTC.
+  # So that tests are consistent, set the local timezone to UTC
+  # TODO: consider reevaluating this workaround after ARROW-12980
+  withr::local_timezone("UTC")
+
+  t_string <- tibble(x = c("2018-10-07 19:04:05", NA))
+  t_stamp <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA))
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x)
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S")
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "ns")
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "s")
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  tstring <- tibble(x = c("08-05-2008", NA))
+  tstamp <- tibble(x = c(strptime("08-05-2008", format = "%m-%d-%Y"), NA))
+
+  expect_equal(
+    tstring %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%m-%d-%Y")
+      ) %>%
+      collect(),
+    tstamp,
+    check.tzone = FALSE
+  )
+})
 
+test_that("errors in strptime", {
+  # Error when tz is passed
+
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$strptime(x, tz = "PDT"),
+    "Time zone argument not supported by Arrow"
+  )
 })
 
-test_that("errors and warnings", {
-  df <- tibble(x = c("Foo", "bar"))
+test_that("arrow_find_substring and arrow_find_substring_regex", {
+  df <- tibble(x = c("Foo and Bar", "baz and qux and quux"))
 
-  # These conditions generate an error, but abandon_ship() catches the error,
-  # issues a warning, and pulls the data into R
-  expect_warning(
+  expect_equivalent(
     df %>%
       Table$create() %>%
-      filter(str_detect(x, boundary(type = "character"))) %>%
+      mutate(x = arrow_find_substring(x, options = list(pattern = "b"))) %>%
       collect(),
-    "not implemented"
+    tibble(x = c(-1, 0))
   )
-  expect_warning(
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring(
+        x,
+        options = list(pattern = "b", ignore_case = TRUE)
+      )) %>%
+      collect(),
+    tibble(x = c(8, 0))
+  )
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring_regex(
+        x,
+        options = list(pattern = "^[fb]")
+      )) %>%
+      collect(),
+    tibble(x = c(-1, 0))
+  )
+  expect_equivalent(
     df %>%
       Table$create() %>%
-      mutate(x = str_replace_all(x, coll("o", locale = "en"), "ó")) %>%
+      mutate(x = arrow_find_substring_regex(
+        x,
+        options = list(pattern = "[AEIOU]", ignore_case = TRUE)
+      )) %>%
       collect(),
-    "not supported"
+    tibble(x = c(1, 1))
   )
+})
 
-  # This condition generates a warning
-  expect_warning(
+test_that("stri_reverse and arrow_ascii_reverse functions", {
+  df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
+
+  df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = stri_reverse(x)) %>%
+      collect(),
+    df_utf8
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = stri_reverse(x)) %>%
+      collect(),
+    df_ascii
+  )
+
+  expect_equivalent(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_reverse(x)) %>%
+      collect(),
+    tibble(x = c("rab dna\nooF", "xuuq dna xuq dna\tzab"))
+  )
+
+  expect_error(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_reverse(x)) %>%
+      collect(),
+    "Invalid: Non-ASCII sequence in input"
+  )
+})
+
+test_that("str_like", {
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  # TODO: After new version of stringr with str_like has been released, update all
+  # these tests to use expect_dplyr_equal
+
+  # No match - entire string
+  expect_equivalent(
     df %>%
       Table$create() %>%
-      transmute(x = str_replace_all(x, regex("o", multiline = TRUE), "u")),
-    "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
+      mutate(x = str_like(x, "baz")) %>%
+      collect(),
+    tibble(x = c(FALSE, FALSE))
+  )
+
+  # Match - entire string
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "Foo and bar")) %>%
+      collect(),
+    tibble(x = c(TRUE, FALSE))
+  )
+
+  # Wildcard
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "f%", ignore_case = TRUE)) %>%
+      collect(),
+    tibble(x = c(TRUE, FALSE))
+  )
+
+  # Ignore case
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "f%", ignore_case = FALSE)) %>%
+      collect(),
+    tibble(x = c(FALSE, FALSE))
+  )
+
+  # Single character
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "_a%")) %>%
+      collect(),
+    tibble(x = c(FALSE, TRUE))
+  )
+
+  # This will give an error until a new version of stringr with str_like has been released
+  skip_if_not(packageVersion("stringr") > "1.4.0")
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_like(x, "%baz%")) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("str_pad", {
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 31)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 30, side = "right")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 31, side = "left", pad = "+")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 10, side = "left", pad = "+")) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 31, side = "both")) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("substr", {
+  df <- tibble(x = "Apache Arrow")
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 0, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, -1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 6, 1)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, -1, -2)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 9, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 8, 12)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, -5, -1)) %>%
+      collect(),
+    df
+  )
+
+  expect_error(
+    nse_funcs$substr("Apache Arrow", c(1, 2), 3),
+    "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  expect_error(
+    nse_funcs$substr("Apache Arrow", 1, c(2, 3)),
+    "`stop` must be length 1 - other lengths are not supported in Arrow"
+  )
+})
+
+test_that("substring", {
+  # nse_funcs$substring just calls nse_funcs$substr, tested extensively above
+  df <- tibble(x = "Apache Arrow")
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substring(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("str_sub", {
+  df <- tibble(x = "Apache Arrow")
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 0, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 6, 1)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -1, -2)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -1, 3)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 9, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 8, 12)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -5, -1)) %>%
+      collect(),
+    df
+  )
+
+  expect_error(
+    nse_funcs$str_sub("Apache Arrow", c(1, 2), 3),
+    "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  expect_error(
+    nse_funcs$str_sub("Apache Arrow", 1, c(2, 3)),
+    "`end` must be length 1 - other lengths are not supported in Arrow"
   )
 })
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index def7886a0bf..ed03c58a884 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
@@ -23,7 +25,7 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
 
 test_that("basic select/filter/collect", {
   batch <- record_batch(tbl)
@@ -32,7 +34,7 @@ test_that("basic select/filter/collect", {
     select(int, chr) %>%
     filter(int > 5)
 
-  expect_is(b2, "arrow_dplyr_query")
+  expect_s3_class(b2, "arrow_dplyr_query")
   t2 <- collect(b2)
   expect_equal(t2, tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")])
   # Test that the original object is not affected
@@ -57,31 +59,13 @@ test_that("Print method", {
       filter(int < 5) %>%
       select(int, chr) %>%
       print(),
-'RecordBatch (query)
+    'InMemoryDataset (query)
 int: int32
 chr: string
 
-* Filter: and(and(greater(dbl, 2), or(equal(chr, "d"), equal(chr, "f"))), less(int, 5))
+* Filter: (((dbl > 2) and ((chr == "d") or (chr == "f"))) and (int < 5))
 See $.data for the source Arrow object',
-  fixed = TRUE
-  )
-})
-
-test_that("summarize", {
-  expect_dplyr_equal(
-    input %>%
-      select(int, chr) %>%
-      filter(int > 5) %>%
-      summarize(min_int = min(int)),
-    tbl
-  )
-
-  expect_dplyr_equal(
-    input %>%
-      select(int, chr) %>%
-      filter(int > 5) %>%
-      summarize(min_int = min(int) / 2),
-    tbl
+    fixed = TRUE
   )
 })
 
@@ -187,7 +171,8 @@ test_that("collect(as_data_frame=FALSE)", {
     filter(int > 5) %>%
     collect(as_data_frame = FALSE)
 
-  expect_is(b2, "RecordBatch")
+  # collect(as_data_frame = FALSE) always returns Table now
+  expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
   expect_equal(as.data.frame(b2), expected)
 
@@ -195,7 +180,7 @@ test_that("collect(as_data_frame=FALSE)", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     collect(as_data_frame = FALSE)
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -203,13 +188,13 @@ test_that("collect(as_data_frame=FALSE)", {
     filter(int > 5) %>%
     group_by(int) %>%
     collect(as_data_frame = FALSE)
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
-    )
+  )
 })
 
 test_that("compute()", {
@@ -217,14 +202,14 @@ test_that("compute()", {
 
   b1 <- batch %>% compute()
 
-  expect_is(b1, "RecordBatch")
+  expect_r6_class(b1, "RecordBatch")
 
   b2 <- batch %>%
     select(int, chr) %>%
     filter(int > 5) %>%
     compute()
 
-  expect_is(b2, "RecordBatch")
+  expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
   expect_equal(as.data.frame(b2), expected)
 
@@ -232,7 +217,7 @@ test_that("compute()", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     compute()
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -240,7 +225,7 @@ test_that("compute()", {
     filter(int > 5) %>%
     group_by(int) %>%
     compute()
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
@@ -257,7 +242,7 @@ test_that("head", {
     filter(int > 5) %>%
     head(2)
 
-  expect_is(b2, "RecordBatch")
+  expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")][1:2, ]
   expect_equal(as.data.frame(b2), expected)
 
@@ -265,7 +250,7 @@ test_that("head", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     head(2)
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -273,13 +258,13 @@ test_that("head", {
     filter(int > 5) %>%
     group_by(int) %>%
     head(2)
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
-    )
+  )
 })
 
 test_that("tail", {
@@ -290,7 +275,7 @@ test_that("tail", {
     filter(int > 5) %>%
     tail(2)
 
-  expect_is(b2, "RecordBatch")
+  expect_r6_class(b2, "Table")
   expected <- tail(tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")], 2)
   expect_equal(as.data.frame(b2), expected)
 
@@ -298,7 +283,7 @@ test_that("tail", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     tail(2)
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -306,13 +291,13 @@ test_that("tail", {
     filter(int > 5) %>%
     group_by(int) %>%
     tail(2)
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
-    )
+  )
 })
 
 test_that("relocate", {
@@ -340,21 +325,30 @@ test_that("relocate", {
 })
 
 test_that("relocate with selection helpers", {
+  df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a")
   expect_dplyr_equal(
     input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(),
     df
   )
-  expect_error(
-    df %>% Table$create() %>% relocate(where(is.character)),
-    "Unsupported selection helper"
+  expect_dplyr_equal(
+    input %>% relocate(where(is.character)) %>% collect(),
+    df
   )
-  expect_error(
-    df %>% Table$create() %>% relocate(a, b, c, .after = where(is.character)),
-    "Unsupported selection helper"
+  expect_dplyr_equal(
+    input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(),
+    df
   )
-  expect_error(
-    df %>% Table$create() %>% relocate(d, e, f, .before = where(is.numeric)),
-    "Unsupported selection helper"
+  expect_dplyr_equal(
+    input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(),
+    df
+  )
+  # works after other dplyr verbs
+  expect_dplyr_equal(
+    input %>%
+      mutate(c = as.character(c)) %>%
+      relocate(d, e, f, .after = where(is.numeric)) %>%
+      collect(),
+    df
   )
 })
 
@@ -376,7 +370,7 @@ test_that("explicit type conversions with cast()", {
 
   for (type in types) {
     expect_type_equal(
-      {
+      object = {
         t1 <- Table$create(x = num_int32) %>%
           transmute(x = cast(x, type)) %>%
           compute()
@@ -385,7 +379,7 @@ test_that("explicit type conversions with cast()", {
       as_type(type)
     )
     expect_type_equal(
-      {
+      object = {
         t1 <- Table$create(x = num_int64) %>%
           transmute(x = cast(x, type)) %>%
           compute()
@@ -398,7 +392,7 @@ test_that("explicit type conversions with cast()", {
   # Arrow errors when truncating floats...
   expect_error(
     expect_type_equal(
-      {
+      object = {
         t1 <- Table$create(pi = pi) %>%
           transmute(three = cast(pi, int32())) %>%
           compute()
@@ -411,7 +405,7 @@ test_that("explicit type conversions with cast()", {
 
   # ... unless safe = FALSE (or allow_float_truncate = TRUE)
   expect_type_equal(
-    {
+    object = {
       t1 <- Table$create(pi = pi) %>%
         transmute(three = cast(pi, int32(), safe = FALSE)) %>%
         compute()
@@ -500,7 +494,381 @@ test_that("explicit type conversions with as.*()", {
   )
 })
 
+test_that("is.finite(), is.infinite(), is.nan()", {
+  df <- tibble(x = c(
+    -4.94065645841246544e-324, 1.79769313486231570e+308, 0,
+    NA_real_, NaN, Inf, -Inf
+  ))
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        is_fin = is.finite(x),
+        is_inf = is.infinite(x)
+      ) %>%
+      collect(),
+    df
+  )
+  # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850)
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        is_nan = is.nan(x)
+      ) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", {
+  df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7))
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        is_na = is.na(x)
+      ) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("type checks with is() giving Arrow types", {
+  # with class2=DataType
+  expect_equal(
+    Table$create(
+      i32 = Array$create(1, int32()),
+      dec = Array$create(pi)$cast(decimal(3, 2)),
+      f64 = Array$create(1.1, float64()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      i32_is_i32 = is(i32, int32()),
+      i32_is_dec = is(i32, decimal(3, 2)),
+      i32_is_i64 = is(i32, float64()),
+      i32_is_str = is(i32, arrow::string()),
+      dec_is_i32 = is(dec, int32()),
+      dec_is_dec = is(dec, decimal(3, 2)),
+      dec_is_i64 = is(dec, float64()),
+      dec_is_str = is(dec, arrow::string()),
+      f64_is_i32 = is(f64, int32()),
+      f64_is_dec = is(f64, decimal(3, 2)),
+      f64_is_i64 = is(f64, float64()),
+      f64_is_str = is(f64, arrow::string()),
+      str_is_i32 = is(str, int32()),
+      str_is_dec = is(str, decimal(3, 2)),
+      str_is_i64 = is(str, float64()),
+      str_is_str = is(str, arrow::string())
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(
+      TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,
+      FALSE, FALSE, FALSE, FALSE, TRUE
+    )
+  )
+  # with class2=string
+  expect_equal(
+    Table$create(
+      i32 = Array$create(1, int32()),
+      f64 = Array$create(1.1, float64()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      i32_is_i32 = is(i32, "int32"),
+      i32_is_i64 = is(i32, "double"),
+      i32_is_str = is(i32, "string"),
+      f64_is_i32 = is(f64, "int32"),
+      f64_is_i64 = is(f64, "double"),
+      f64_is_str = is(f64, "string"),
+      str_is_i32 = is(str, "int32"),
+      str_is_i64 = is(str, "double"),
+      str_is_str = is(str, "string")
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE)
+  )
+  # with class2=string alias
+  expect_equal(
+    Table$create(
+      f16 = Array$create(NA_real_, halffloat()),
+      f32 = Array$create(1.1, float()),
+      f64 = Array$create(2.2, float64()),
+      lgl = Array$create(TRUE, bool()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      f16_is_f16 = is(f16, "float16"),
+      f16_is_f32 = is(f16, "float32"),
+      f16_is_f64 = is(f16, "float64"),
+      f16_is_lgl = is(f16, "boolean"),
+      f16_is_str = is(f16, "utf8"),
+      f32_is_f16 = is(f32, "float16"),
+      f32_is_f32 = is(f32, "float32"),
+      f32_is_f64 = is(f32, "float64"),
+      f32_is_lgl = is(f32, "boolean"),
+      f32_is_str = is(f32, "utf8"),
+      f64_is_f16 = is(f64, "float16"),
+      f64_is_f32 = is(f64, "float32"),
+      f64_is_f64 = is(f64, "float64"),
+      f64_is_lgl = is(f64, "boolean"),
+      f64_is_str = is(f64, "utf8"),
+      lgl_is_f16 = is(lgl, "float16"),
+      lgl_is_f32 = is(lgl, "float32"),
+      lgl_is_f64 = is(lgl, "float64"),
+      lgl_is_lgl = is(lgl, "boolean"),
+      lgl_is_str = is(lgl, "utf8"),
+      str_is_f16 = is(str, "float16"),
+      str_is_f32 = is(str, "float32"),
+      str_is_f64 = is(str, "float64"),
+      str_is_lgl = is(str, "boolean"),
+      str_is_str = is(str, "utf8")
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(
+      TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
+      FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
+      FALSE, FALSE, TRUE
+    )
+  )
+})
+
+test_that("type checks with is() giving R types", {
+  library(bit64)
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is(chr, "character"),
+        chr_is_fct = is(chr, "factor"),
+        chr_is_int = is(chr, "integer"),
+        chr_is_i64 = is(chr, "integer64"),
+        chr_is_lst = is(chr, "list"),
+        chr_is_lgl = is(chr, "logical"),
+        chr_is_num = is(chr, "numeric"),
+        dbl_is_chr = is(dbl, "character"),
+        dbl_is_fct = is(dbl, "factor"),
+        dbl_is_int = is(dbl, "integer"),
+        dbl_is_i64 = is(dbl, "integer64"),
+        dbl_is_lst = is(dbl, "list"),
+        dbl_is_lgl = is(dbl, "logical"),
+        dbl_is_num = is(dbl, "numeric"),
+        fct_is_chr = is(fct, "character"),
+        fct_is_fct = is(fct, "factor"),
+        fct_is_int = is(fct, "integer"),
+        fct_is_i64 = is(fct, "integer64"),
+        fct_is_lst = is(fct, "list"),
+        fct_is_lgl = is(fct, "logical"),
+        fct_is_num = is(fct, "numeric"),
+        int_is_chr = is(int, "character"),
+        int_is_fct = is(int, "factor"),
+        int_is_int = is(int, "integer"),
+        int_is_i64 = is(int, "integer64"),
+        int_is_lst = is(int, "list"),
+        int_is_lgl = is(int, "logical"),
+        int_is_num = is(int, "numeric"),
+        lgl_is_chr = is(lgl, "character"),
+        lgl_is_fct = is(lgl, "factor"),
+        lgl_is_int = is(lgl, "integer"),
+        lgl_is_i64 = is(lgl, "integer64"),
+        lgl_is_lst = is(lgl, "list"),
+        lgl_is_lgl = is(lgl, "logical"),
+        lgl_is_num = is(lgl, "numeric")
+      ) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        i64_is_chr = is(i64, "character"),
+        i64_is_fct = is(i64, "factor"),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        # i64_is_int = is(i64, "integer"),
+        i64_is_i64 = is(i64, "integer64"),
+        i64_is_lst = is(i64, "list"),
+        i64_is_lgl = is(i64, "logical"),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        # i64_is_num = is(i64, "numeric"),
+        lst_is_chr = is(lst, "character"),
+        lst_is_fct = is(lst, "factor"),
+        lst_is_int = is(lst, "integer"),
+        lst_is_i64 = is(lst, "integer64"),
+        lst_is_lst = is(lst, "list"),
+        lst_is_lgl = is(lst, "logical"),
+        lst_is_num = is(lst, "numeric")
+      ) %>%
+      collect(),
+    tibble(
+      i64 = as.integer64(1:3),
+      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
+    )
+  )
+})
+
+test_that("type checks with is.*()", {
+  library(bit64)
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is.character(chr),
+        chr_is_dbl = is.double(chr),
+        chr_is_fct = is.factor(chr),
+        chr_is_int = is.integer(chr),
+        chr_is_i64 = is.integer64(chr),
+        chr_is_lst = is.list(chr),
+        chr_is_lgl = is.logical(chr),
+        chr_is_num = is.numeric(chr),
+        dbl_is_chr = is.character(dbl),
+        dbl_is_dbl = is.double(dbl),
+        dbl_is_fct = is.factor(dbl),
+        dbl_is_int = is.integer(dbl),
+        dbl_is_i64 = is.integer64(dbl),
+        dbl_is_lst = is.list(dbl),
+        dbl_is_lgl = is.logical(dbl),
+        dbl_is_num = is.numeric(dbl),
+        fct_is_chr = is.character(fct),
+        fct_is_dbl = is.double(fct),
+        fct_is_fct = is.factor(fct),
+        fct_is_int = is.integer(fct),
+        fct_is_i64 = is.integer64(fct),
+        fct_is_lst = is.list(fct),
+        fct_is_lgl = is.logical(fct),
+        fct_is_num = is.numeric(fct),
+        int_is_chr = is.character(int),
+        int_is_dbl = is.double(int),
+        int_is_fct = is.factor(int),
+        int_is_int = is.integer(int),
+        int_is_i64 = is.integer64(int),
+        int_is_lst = is.list(int),
+        int_is_lgl = is.logical(int),
+        int_is_num = is.numeric(int),
+        lgl_is_chr = is.character(lgl),
+        lgl_is_dbl = is.double(lgl),
+        lgl_is_fct = is.factor(lgl),
+        lgl_is_int = is.integer(lgl),
+        lgl_is_i64 = is.integer64(lgl),
+        lgl_is_lst = is.list(lgl),
+        lgl_is_lgl = is.logical(lgl),
+        lgl_is_num = is.numeric(lgl)
+      ) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        i64_is_chr = is.character(i64),
+        # TODO: investigate why this is not matching when testthat runs it
+        # i64_is_dbl = is.double(i64),
+        i64_is_fct = is.factor(i64),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        # i64_is_int = is.integer(i64),
+        i64_is_i64 = is.integer64(i64),
+        i64_is_lst = is.list(i64),
+        i64_is_lgl = is.logical(i64),
+        i64_is_num = is.numeric(i64),
+        lst_is_chr = is.character(lst),
+        lst_is_dbl = is.double(lst),
+        lst_is_fct = is.factor(lst),
+        lst_is_int = is.integer(lst),
+        lst_is_i64 = is.integer64(lst),
+        lst_is_lst = is.list(lst),
+        lst_is_lgl = is.logical(lst),
+        lst_is_num = is.numeric(lst)
+      ) %>%
+      collect(),
+    tibble(
+      i64 = as.integer64(1:3),
+      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
+    )
+  )
+})
+
+test_that("type checks with is_*()", {
+  library(rlang)
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is_character(chr),
+        chr_is_dbl = is_double(chr),
+        chr_is_int = is_integer(chr),
+        chr_is_lst = is_list(chr),
+        chr_is_lgl = is_logical(chr),
+        dbl_is_chr = is_character(dbl),
+        dbl_is_dbl = is_double(dbl),
+        dbl_is_int = is_integer(dbl),
+        dbl_is_lst = is_list(dbl),
+        dbl_is_lgl = is_logical(dbl),
+        int_is_chr = is_character(int),
+        int_is_dbl = is_double(int),
+        int_is_int = is_integer(int),
+        int_is_lst = is_list(int),
+        int_is_lgl = is_logical(int),
+        lgl_is_chr = is_character(lgl),
+        lgl_is_dbl = is_double(lgl),
+        lgl_is_int = is_integer(lgl),
+        lgl_is_lst = is_list(lgl),
+        lgl_is_lgl = is_logical(lgl)
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("type checks on expressions", {
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        a = is.character(as.character(int)),
+        b = is.integer(as.character(int)),
+        c = is.integer(int + int),
+        d = is.double(int + dbl),
+        e = is.logical(dbl > pi)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  # the code in the expectation below depends on RE2
+  skip_if_not_available("re2")
+
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        a = is.logical(grepl("[def]", chr))
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("type checks on R scalar literals", {
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is.character("foo"),
+        int_is_chr = is.character(42L),
+        int_is_int = is.integer(42L),
+        chr_is_int = is.integer("foo"),
+        dbl_is_num = is.numeric(3.14159),
+        int_is_num = is.numeric(42L),
+        chr_is_num = is.numeric("foo"),
+        dbl_is_dbl = is.double(3.14159),
+        chr_is_dbl = is.double("foo"),
+        lgl_is_lgl = is.logical(TRUE),
+        chr_is_lgl = is.logical("foo"),
+        fct_is_fct = is.factor(factor("foo", levels = c("foo", "bar", "baz"))),
+        chr_is_fct = is.factor("foo"),
+        lst_is_lst = is.list(list(c(a = "foo", b = "bar"))),
+        chr_is_lst = is.list("foo")
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
 test_that("as.factor()/dictionary_encode()", {
+  skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression")
   df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
   df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8))
 
@@ -524,7 +892,7 @@ test_that("as.factor()/dictionary_encode()", {
   # dictionary values with default null encoding behavior ("mask") omits
   # nulls from the dictionary values
   expect_equal(
-    {
+    object = {
       rb1 <- df1 %>%
         record_batch() %>%
         transmute(x = dictionary_encode(x)) %>%
@@ -538,7 +906,7 @@ test_that("as.factor()/dictionary_encode()", {
   # dictionary values with "encode" null encoding behavior includes nulls in
   # the dictionary values
   expect_equal(
-    {
+    object = {
       rb1 <- df1 %>%
         record_batch() %>%
         transmute(x = dictionary_encode(x, null_encoding_behavior = "encode")) %>%
@@ -548,7 +916,6 @@ test_that("as.factor()/dictionary_encode()", {
     },
     sort(unique(df1$x), na.last = TRUE)
   )
-
 })
 
 test_that("bad explicit type conversions with as.*()", {
@@ -559,8 +926,7 @@ test_that("bad explicit type conversions with as.*()", {
       input %>%
         transmute(lgl2chr = as.character(lgl)) %>%
         collect(),
-      tibble(lgl = c(TRUE, FALSE, NA)
-      )
+      tibble(lgl = c(TRUE, FALSE, NA))
     )
   )
 
@@ -587,5 +953,542 @@ test_that("bad explicit type conversions with as.*()", {
       tibble(chr = c("TRU", "FAX", ""))
     )
   )
+})
+
+test_that("No duplicate field names are allowed in an arrow_dplyr_query", {
+  expect_error(
+    Table$create(tbl, tbl) %>%
+      filter(int > 0),
+    regexp = paste0(
+      'The following field names were found more than once in the data: "int", "dbl", ',
+      '"dbl2", "lgl", "false", "chr", "fct", "verses", and "padded_strings"'
+    )
+  )
+})
+
+test_that("abs()", {
+  df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
+
+  expect_dplyr_equal(
+    input %>%
+      transmute(abs = abs(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("sign()", {
+  df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
+
+  expect_dplyr_equal(
+    input %>%
+      transmute(sign = sign(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("ceiling(), floor(), trunc()", {
+  df <- tibble(x = c(-1, -0.55, -0.5, -0.1, 0, 0.1, 0.5, 0.55, 1, NA, NaN))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        c = ceiling(x),
+        f = floor(x),
+        t = trunc(x)
+      ) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("log functions", {
+  df <- tibble(x = c(1:10, NA, NA))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x, base = exp(1))) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x, base = 2)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x, base = 10)) %>%
+      collect(),
+    df
+  )
+
+  expect_error(
+    nse_funcs$log(Expression$scalar(x), base = 5),
+    "`base` values other than exp(1), 2 and 10 not supported by Arrow",
+    fixed = TRUE
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = logb(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log1p(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log2(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log10(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("trig functions", {
+  df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = sin(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = cos(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = tan(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = asin(x)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = acos(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("if_else and ifelse", {
+  tbl <- example_data
+  tbl$another_chr <- tail(letters, 10)
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, 1, 0)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, int, 0L)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_error(
+    Table$create(tbl) %>%
+      mutate(
+        y = if_else(int > 5, 1, FALSE)
+      ) %>%
+      collect(),
+    "NotImplemented: Function if_else has no kernel matching input types"
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, 1, NA_real_)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = ifelse(int > 5, 1, 0)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(dbl > 5, TRUE, FALSE)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(chr %in% letters[1:3], 1L, 3L)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, "one", "zero")
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, chr, another_chr)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, "true", chr, missing = "MISSING")
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  # TODO: remove the mutate + warning after ARROW-13358 is merged and Arrow
+  # supports factors in if(_)else
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, fct, factor("a"))
+      ) %>%
+      collect() %>%
+      # This is a no-op on the Arrow side, but necessary to make the results equal
+      mutate(y = as.character(y)),
+    tbl,
+    warning = "Dictionaries .* are currently converted to strings .* in if_else and ifelse"
+  )
+
+  # detecting NA and NaN works just fine
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(is.na(dbl), chr, "false", missing = "MISSING")
+      ) %>%
+      collect(),
+    example_data_for_sorting
+  )
+
+  # However, currently comparisons with NaNs return false and not NaNs or NAs
+  skip("ARROW-13364")
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(dbl > 5, chr, another_chr, missing = "MISSING")
+      ) %>%
+      collect(),
+    example_data_for_sorting
+  )
+
+  skip("TODO: could? should? we support the autocasting in ifelse")
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = ifelse(int > 5, 1, FALSE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("case_when()", {
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(cw = case_when(int > 5 ~ 1, TRUE ~ 0)) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      filter(case_when(
+        dbl + int - 1.1 == dbl2 ~ TRUE,
+        NA ~ NA,
+        TRUE ~ FALSE
+      ) & !is.na(dbl2)) %>%
+      collect(),
+    tbl
+  )
+
+  # dplyr::case_when() errors if values on right side of formulas do not have
+  # exactly the same type, but the Arrow case_when kernel allows compatible types
+  expect_equal(
+    tbl %>%
+      mutate(i64 = as.integer64(1e10)) %>%
+      Table$create() %>%
+      transmute(cw = case_when(
+        is.na(fct) ~ int,
+        is.na(chr) ~ dbl,
+        TRUE ~ i64
+      )) %>%
+      collect(),
+    tbl %>%
+      transmute(
+        cw = ifelse(is.na(fct), int, ifelse(is.na(chr), dbl, 1e10))
+      )
+  )
+
+  # expected errors (which are caught by abandon_ship() and changed to warnings)
+  # TODO: Find a way to test these directly without abandon_ship() interfering
+  expect_error(
+    # no cases
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when()),
+      "case_when"
+    )
+  )
+  expect_error(
+    # argument not a formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(TRUE ~ FALSE, TRUE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical R scalar on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(0L ~ FALSE, TRUE ~ FALSE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical Arrow column reference on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(int ~ FALSE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical Arrow expression on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(dbl + 3.14159 ~ TRUE)),
+      "case_when"
+    )
+  )
+
+  skip("case_when does not yet support with variable-width types (ARROW-13222)")
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(lgl ~ "abc")) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(lgl ~ verses, !false ~ paste(chr, chr))) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = paste0(case_when(!(!(!(lgl))) ~ factor(chr), TRUE ~ fct), "!")
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("coalesce()", {
+  # character
+  df <- tibble(
+    w = c(NA_character_, NA_character_, NA_character_),
+    x = c(NA_character_, NA_character_, "c"),
+    y = c(NA_character_, "b", "c"),
+    z = c("a", "b", "c")
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
 
+  # integer
+  df <- tibble(
+    w = c(NA_integer_, NA_integer_, NA_integer_),
+    x = c(NA_integer_, NA_integer_, 3L),
+    y = c(NA_integer_, 2L, 3L),
+    z = 1:3
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # double with NaNs
+  df <- tibble(
+    w = c(NA_real_, NaN, NA_real_),
+    x = c(NA_real_, NaN, 3.3),
+    y = c(NA_real_, 2.2, 3.3),
+    z = c(1.1, 2.2, 3.3)
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+  # NaNs stay NaN and are not converted to NA in the results
+  # (testing this requires expect_identical())
+  expect_identical(
+    df %>% Table$create() %>% mutate(cwx = coalesce(w, x)) %>% collect(),
+    df %>% mutate(cwx = coalesce(w, x))
+  )
+  expect_identical(
+    df %>% Table$create() %>% transmute(cw = coalesce(w)) %>% collect(),
+    df %>% transmute(cw = coalesce(w))
+  )
+  expect_identical(
+    df %>% Table$create() %>% transmute(cn = coalesce(NaN)) %>% collect(),
+    df %>% transmute(cn = coalesce(NaN))
+  )
+  # singles stay single
+  expect_equal(
+    (df %>%
+      Table$create(schema = schema(
+        w = float32(),
+        x = float32(),
+        y = float32(),
+        z = float32()
+      )) %>%
+      transmute(c = coalesce(w, x, y, z)) %>%
+      compute()
+    )$schema[[1]]$type,
+    float32()
+  )
+  # with R literal values
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        c1 = coalesce(4.4),
+        c2 = coalesce(NA_real_),
+        c3 = coalesce(NaN),
+        c4 = coalesce(w, x, y, 5.5),
+        c5 = coalesce(w, x, y, NA_real_),
+        c6 = coalesce(w, x, y, NaN)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # factors
+  # TODO: remove the mutate + warning after ARROW-13390 is merged and Arrow
+  # supports factors in coalesce
+  df <- tibble(
+    x = factor("a", levels = c("a", "z")),
+    y = factor("b", levels = c("a", "b", "c"))
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(c = coalesce(x, y)) %>%
+      collect() %>%
+      # This is a no-op on the Arrow side, but necessary to make the results equal
+      mutate(c = as.character(c)),
+    df,
+    warning = "Dictionaries .* are currently converted to strings .* in coalesce"
+  )
+
+  # no arguments
+  expect_error(
+    nse_funcs$coalesce(),
+    "At least one argument must be supplied to coalesce()",
+    fixed = TRUE
+  )
 })
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
new file mode 100644
index 00000000000..cdfcb62d02d
--- /dev/null
+++ b/r/tests/testthat/test-duckdb.R
@@ -0,0 +1,189 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_installed("duckdb", minimum_version = "0.2.8")
+skip_if_not_installed("dbplyr")
+skip_if_not_available("dataset")
+# when we remove this, we should also remove the FALSE in run_duckdb_examples
+skip("These tests are flaking: https://github.com/duckdb/duckdb/issues/2100")
+library(duckdb)
+library(dplyr)
+
+test_that("to_duckdb", {
+  ds <- InMemoryDataset$create(example_data)
+
+  expect_identical(
+    ds %>%
+      to_duckdb() %>%
+      collect() %>%
+      # factors don't roundtrip
+      select(!fct),
+    select(example_data, !fct)
+  )
+
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      to_duckdb() %>%
+      group_by(lgl) %>%
+      summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE)) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+
+  # can group_by before the to_duckdb
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      group_by(lgl) %>%
+      to_duckdb() %>%
+      summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE)) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+})
+
+test_that("summarise(..., .engine)", {
+  ds <- InMemoryDataset$create(example_data)
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      group_by(lgl) %>%
+      summarise(
+        mean_int = mean(int, na.rm = TRUE),
+        mean_dbl = mean(dbl, na.rm = TRUE),
+        .engine = "duckdb"
+      ) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+})
+
+# The next set of tests use an already-extant connection to test features of
+# persistence and querying against the table without using the `tbl` itself, so
+# we need to create a connection separate from the ephemeral one that is made
+# with arrow_duck_connection()
+con <- dbConnect(duckdb::duckdb())
+dbExecute(con, "PRAGMA threads=2")
+on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
+
+# write one table to the connection so it is kept open
+DBI::dbWriteTable(con, "mtcars", mtcars)
+
+test_that("Joining, auto-cleanup", {
+  ds <- InMemoryDataset$create(example_data)
+
+  table_one_name <- "my_arrow_table_1"
+  table_one <- to_duckdb(ds, con = con, table_name = table_one_name)
+  table_two_name <- "my_arrow_table_2"
+  table_two <- to_duckdb(ds, con = con, table_name = table_two_name)
+
+  res <- dbGetQuery(
+    con,
+    paste0(
+      "SELECT * FROM ", table_one_name,
+      " INNER JOIN ", table_two_name,
+      " ON ", table_one_name, ".int = ", table_two_name, ".int"
+    )
+  )
+  expect_identical(dim(res), c(9L, 14L))
+
+  # clean up cleans up the tables
+  expect_true(all(c(table_one_name, table_two_name) %in% DBI::dbListTables(con)))
+  rm(table_one, table_two)
+  gc()
+  expect_false(any(c(table_one_name, table_two_name) %in% DBI::dbListTables(con)))
+})
+
+test_that("Joining, auto-cleanup disabling", {
+  ds <- InMemoryDataset$create(example_data)
+
+  table_three_name <- "my_arrow_table_3"
+  table_three <- to_duckdb(ds, con = con, table_name = table_three_name, auto_disconnect = FALSE)
+
+  # clean up does *not* clean these tables
+  expect_true(table_three_name %in% DBI::dbListTables(con))
+  rm(table_three)
+  gc()
+  # but because we aren't auto_disconnecting then we still have this table.
+  expect_true(table_three_name %in% DBI::dbListTables(con))
+})
+
+test_that("to_duckdb with a table", {
+  tab <- Table$create(example_data)
+
+  expect_identical(
+    tab %>%
+      to_duckdb() %>%
+      group_by(int > 4) %>%
+      summarise(
+        int_mean = mean(int, na.rm = TRUE),
+        dbl_mean = mean(dbl, na.rm = TRUE)
+      ) %>%
+      collect(),
+    tibble::tibble(
+      "int > 4" = c(FALSE, NA, TRUE),
+      int_mean = c(2, NA, 7.5),
+      dbl_mean = c(2.1, 4.1, 7.3)
+    )
+  )
+})
+
+test_that("to_duckdb passing a connection", {
+  ds <- InMemoryDataset$create(example_data)
+
+  con_separate <- dbConnect(duckdb::duckdb())
+  # we always want to test in parallel
+  dbExecute(con_separate, "PRAGMA threads=2")
+  on.exit(dbDisconnect(con_separate, shutdown = TRUE), add = TRUE)
+
+  # create a table to join to that we know is in our con_separate
+  new_df <- data.frame(
+    int = 1:10,
+    char = letters[26:17]
+  )
+  DBI::dbWriteTable(con_separate, "separate_join_table", new_df)
+
+  table_four <- ds %>%
+    select(int, lgl, dbl) %>%
+    to_duckdb(con = con_separate, auto_disconnect = FALSE)
+  table_four_name <- table_four$ops$x
+
+  result <- DBI::dbGetQuery(
+    con_separate,
+    paste0(
+      "SELECT * FROM ", table_four_name,
+      " INNER JOIN separate_join_table ",
+      "ON separate_join_table.int = ", table_four_name, ".int"
+    )
+  )
+
+  expect_identical(dim(result), c(9L, 5L))
+  expect_identical(result$char, new_df[new_df$int != 4, ]$char)
+})
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index d7eb6df63e3..034c4049a34 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -17,34 +17,6 @@
 
 context("Expressions")
 
-test_that("Can create an expression", {
-  expect_is(build_array_expression(">", Array$create(1:5), 4), "array_expression")
-})
-
-test_that("as.vector(array_expression)", {
-  expect_equal(as.vector(build_array_expression(">", Array$create(1:5), 4)), c(FALSE, FALSE, FALSE, FALSE, TRUE))
-})
-
-test_that("array_expression print method", {
-  expect_output(
-    print(build_array_expression(">", Array$create(1:5), 4)),
-    # Not ideal but it is informative
-    "greater(<Array>, 4)",
-    fixed = TRUE
-  )
-})
-
-test_that("array_refs", {
-  tab <- Table$create(a = 1:5)
-  ex <- build_array_expression(">", array_expression("array_ref", field_name = "a"), 4)
-  expect_is(ex, "array_expression")
-  expect_identical(ex$args[[1]]$args$field_name, "a")
-  expect_identical(find_array_refs(ex), "a")
-  out <- eval_array_expression(ex, tab)
-  expect_is(out, "ChunkedArray")
-  expect_equal(as.vector(out), c(FALSE, FALSE, FALSE, FALSE, TRUE))
-})
-
 test_that("C++ expressions", {
   skip_if_not_available("dataset")
   f <- Expression$field_ref("f")
@@ -55,45 +27,103 @@ test_that("C++ expressions", {
   i64 <- Expression$scalar(bit64::as.integer64(42))
   time <- Expression$scalar(hms::hms(56, 34, 12))
 
-  expect_is(f == g, "Expression")
-  expect_is(f == 4, "Expression")
-  expect_is(f == "", "Expression")
-  expect_is(f == NULL, "Expression")
-  expect_is(f == date, "Expression")
-  expect_is(f == i64, "Expression")
-  expect_is(f == time, "Expression")
+  expect_r6_class(f == g, "Expression")
+  expect_r6_class(f == 4, "Expression")
+  expect_r6_class(f == "", "Expression")
+  expect_r6_class(f == NULL, "Expression")
+  expect_r6_class(f == date, "Expression")
+  expect_r6_class(f == i64, "Expression")
+  expect_r6_class(f == time, "Expression")
   # can't seem to make this work right now because of R Ops.method dispatch
-  # expect_is(f == as.Date("2020-01-15"), "Expression")
-  expect_is(f == ts, "Expression")
-  expect_is(f <= 2L, "Expression")
-  expect_is(f != FALSE, "Expression")
-  expect_is(f > 4, "Expression")
-  expect_is(f < 4 & f > 2, "Expression")
-  expect_is(f < 4 | f > 2, "Expression")
-  expect_is(!(f < 4), "Expression")
+  # expect_r6_class(f == as.Date("2020-01-15"), "Expression") # nolint
+  expect_r6_class(f == ts, "Expression")
+  expect_r6_class(f <= 2L, "Expression")
+  expect_r6_class(f != FALSE, "Expression")
+  expect_r6_class(f > 4, "Expression")
+  expect_r6_class(f < 4 & f > 2, "Expression")
+  expect_r6_class(f < 4 | f > 2, "Expression")
+  expect_r6_class(!(f < 4), "Expression")
   expect_output(
     print(f > 4),
-    'Expression\n(f > 4)',
+    "Expression\n(f > 4)",
     fixed = TRUE
   )
+  expect_type_equal(
+    f$type(schema(f = float64())),
+    float64()
+  )
+  expect_type_equal(
+    (f > 4)$type(schema(f = float64())),
+    bool()
+  )
   # Interprets that as a list type
-  expect_is(f == c(1L, 2L), "Expression")
+  expect_r6_class(f == c(1L, 2L), "Expression")
+
+  expect_error(
+    Expression$create("add", 1, 2),
+    "Expression arguments must be Expression objects"
+  )
+})
+
+test_that("Field reference expression schemas and types", {
+  x <- Expression$field_ref("x")
+
+  # type() throws error when schema is NULL
+  expect_error(x$type(), "schema")
+
+  # type() returns type when schema is set
+  x$schema <- Schema$create(x = int32())
+  expect_equal(x$type(), int32())
 })
 
-test_that("Can create an expression", {
-  a <- Array$create(as.numeric(1:5))
-  expr <- array_expression("cast", a, options = list(to_type = int32()))
-  expect_is(expr, "array_expression")
-  expect_equal(eval_array_expression(expr), Array$create(1:5))
+test_that("Scalar expression schemas and types", {
+  # type() works on scalars without setting the schema
+  expect_equal(
+    Expression$scalar("foo")$type(),
+    arrow::string()
+  )
+  expect_equal(
+    Expression$scalar(42L)$type(),
+    int32()
+  )
+})
+
+test_that("Expression schemas and types", {
+  x <- Expression$field_ref("x")
+  y <- Expression$field_ref("y")
+  z <- Expression$scalar(42L)
 
-  b <- Array$create(0.5:4.5)
-  bad_expr <- array_expression("cast", b, options = list(to_type = int32()))
-  expect_is(bad_expr, "array_expression")
+  # type() throws error when both schemas are unset
   expect_error(
-    eval_array_expression(bad_expr),
-    "Invalid: Float value .* was truncated converting"
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() throws error when left schema is unset
+  y$schema <- Schema$create(y = float64())
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() throws error when right schema is unset
+  x$schema <- Schema$create(x = int32())
+  y$schema <- NULL
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() returns type when both schemas are set
+  y$schema <- Schema$create(y = float64())
+  expect_equal(
+    Expression$create("add_checked", x, y)$type(),
+    float64()
+  )
+
+  # type() returns type when one arg has schema set and one is scalar
+  expect_equal(
+    Expression$create("add_checked", x, z)$type(),
+    int32()
   )
-  expr <- array_expression("cast", b, options = list(to_type = int32(), allow_float_truncate = TRUE))
-  expect_is(expr, "array_expression")
-  expect_equal(eval_array_expression(expr), Array$create(0:4))
 })
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index abaae2c7195..80757b04f05 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -44,18 +44,18 @@ expect_feather_roundtrip <- function(write_fun) {
 
   # Read both back
   tab2 <- read_feather(tf2)
-  expect_is(tab2, "data.frame")
+  expect_s3_class(tab2, "data.frame")
 
   tab3 <- read_feather(tf3)
-  expect_is(tab3, "data.frame")
+  expect_s3_class(tab3, "data.frame")
 
   # reading directly from arrow::io::MemoryMappedFile
   tab4 <- read_feather(mmap_open(tf3))
-  expect_is(tab4, "data.frame")
+  expect_s3_class(tab4, "data.frame")
 
   # reading directly from arrow::io::ReadableFile
   tab5 <- read_feather(ReadableFile$create(tf3))
-  expect_is(tab5, "data.frame")
+  expect_s3_class(tab5, "data.frame")
 
   expect_equal(tib, tab2)
   expect_equal(tib, tab3)
@@ -103,9 +103,17 @@ test_that("write_feather option error handling", {
   expect_false(file.exists(tf))
 })
 
+test_that("write_feather with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_feather(bad_input, feather_file),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
 test_that("read_feather supports col_select = <names>", {
   tab1 <- read_feather(feather_file, col_select = c("x", "y"))
-  expect_is(tab1, "data.frame")
+  expect_s3_class(tab1, "data.frame")
 
   expect_equal(tib$x, tab1$x)
   expect_equal(tib$y, tab1$y)
@@ -113,7 +121,7 @@ test_that("read_feather supports col_select = <names>", {
 
 test_that("feather handles col_select = <integer>", {
   tab1 <- read_feather(feather_file, col_select = 1:2)
-  expect_is(tab1, "data.frame")
+  expect_s3_class(tab1, "data.frame")
 
   expect_equal(tib$x, tab1$x)
   expect_equal(tib$y, tab1$y)
@@ -135,7 +143,7 @@ test_that("feather handles col_select = <tidyselect helper>", {
 
 test_that("feather read/write round trip", {
   tab1 <- read_feather(feather_file, as_data_frame = FALSE)
-  expect_is(tab1, "Table")
+  expect_r6_class(tab1, "Table")
 
   expect_equal(tib, as.data.frame(tab1))
 })
@@ -143,7 +151,7 @@ test_that("feather read/write round trip", {
 test_that("Read feather from raw vector", {
   test_raw <- readBin(feather_file, what = "raw", n = 5000)
   df <- read_feather(test_raw)
-  expect_is(df, "data.frame")
+  expect_s3_class(df, "data.frame")
 })
 
 test_that("FeatherReader", {
@@ -224,7 +232,13 @@ unlink(feather_file)
 ft_file <- test_path("golden-files/data-arrow_2.0.0_lz4.feather")
 
 test_that("Error messages are shown when the compression algorithm lz4 is not found", {
-  msg <- "NotImplemented: Support for codec 'lz4' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'lz4')\n * ARROW_WITH_LZ4=ON (for just 'lz4')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+  msg <- paste0(
+    "NotImplemented: Support for codec 'lz4' not built\nIn order to read this file, ",
+    "you will need to reinstall arrow with additional features enabled.\nSet one of ",
+    "these environment variables before installing:\n\n * LIBARROW_MINIMAL=false ",
+    "(for all optional features, including 'lz4')\n * ARROW_WITH_LZ4=ON (for just 'lz4')",
+    "\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+  )
 
   if (codec_is_available("lz4")) {
     d <- read_feather(ft_file)
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index f72cb379a5e..a9ef5a32e36 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -36,3 +36,18 @@ test_that("Print method for field", {
     "Field\nzz: dictionary<values=string, indices=int32>"
   )
 })
+
+test_that("Field to C-interface", {
+  field <- field("x", time32("s"))
+
+  # export the field via the C-interface
+  ptr <- allocate_arrow_schema()
+  field$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Field$import_from_c(ptr)
+  expect_equal(circle, field)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
+})
diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R
index 918c495ec04..38b6f61269e 100644
--- a/r/tests/testthat/test-filesystem.R
+++ b/r/tests/testthat/test-filesystem.R
@@ -81,16 +81,16 @@ test_that("SubTreeFilesystem", {
   file.copy(DESCRIPTION, file.path(td, "DESCRIPTION"))
 
   st_fs <- SubTreeFileSystem$create(td)
-  expect_is(st_fs, "SubTreeFileSystem")
-  expect_is(st_fs, "FileSystem")
-  expect_is(st_fs$base_fs, "LocalFileSystem")
+  expect_r6_class(st_fs, "SubTreeFileSystem")
+  expect_r6_class(st_fs, "FileSystem")
+  expect_r6_class(st_fs$base_fs, "LocalFileSystem")
   expect_identical(
     capture.output(print(st_fs)),
     paste0("SubTreeFileSystem: ", "file://", st_fs$base_path)
   )
 
   # FIXME windows has a trailing slash for one but not the other
-  # expect_identical(normalizePath(st_fs$base_path), normalizePath(td))
+  # expect_identical(normalizePath(st_fs$base_path), normalizePath(td)) # nolint
 
   st_fs$CreateDir("test")
   st_fs$CopyFile("DESCRIPTION", "DESC.txt")
@@ -136,16 +136,18 @@ test_that("LocalFileSystem + Selector", {
 test_that("FileSystem$from_uri", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   fs_and_path <- FileSystem$from_uri("s3://ursa-labs-taxi-data")
-  expect_is(fs_and_path$fs, "S3FileSystem")
+  expect_r6_class(fs_and_path$fs, "S3FileSystem")
   expect_identical(fs_and_path$fs$region, "us-east-2")
 })
 
 test_that("SubTreeFileSystem$create() with URI", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   fs <- SubTreeFileSystem$create("s3://ursa-labs-taxi-data")
-  expect_is(fs, "SubTreeFileSystem")
+  expect_r6_class(fs, "SubTreeFileSystem")
   expect_identical(
     capture.output(print(fs)),
     "SubTreeFileSystem: s3://ursa-labs-taxi-data/"
@@ -155,16 +157,18 @@ test_that("SubTreeFileSystem$create() with URI", {
 test_that("S3FileSystem", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   s3fs <- S3FileSystem$create()
-  expect_is(s3fs, "S3FileSystem")
+  expect_r6_class(s3fs, "S3FileSystem")
 })
 
 test_that("s3_bucket", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   bucket <- s3_bucket("ursa-labs-r-test")
-  expect_is(bucket, "SubTreeFileSystem")
-  expect_is(bucket$base_fs, "S3FileSystem")
+  expect_r6_class(bucket, "SubTreeFileSystem")
+  expect_r6_class(bucket$base_fs, "S3FileSystem")
   expect_identical(bucket$region, "us-west-2")
   expect_identical(
     capture.output(print(bucket)),
diff --git a/r/tests/testthat/test-install-arrow.R b/r/tests/testthat/test-install-arrow.R
index d9d1cc74b02..c53ee829829 100644
--- a/r/tests/testthat/test-install-arrow.R
+++ b/r/tests/testthat/test-install-arrow.R
@@ -24,7 +24,7 @@ r_only({
     other <- "https://cran.fiocruz.br/"
 
     opts <- list(
-      repos=c(CRAN = "@CRAN@"),  # Restore defaul
+      repos = c(CRAN = "@CRAN@"), # Restore defaul
       arrow.dev_repo = ours
     )
     withr::with_options(opts, {
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index a35a465bf0b..668b040d868 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -25,7 +25,7 @@ test_that("Can read json file with scalars columns (ARROW-5503)", {
     { "hello": 3.25, "world": null }
     { "hello": 3.125, "world": null, "yo": "\u5fcd" }
     { "hello": 0.0, "world": true, "yo": null }
-  ', tf, useBytes=TRUE)
+  ', tf, useBytes = TRUE)
 
   tab1 <- read_json_arrow(tf, as_data_frame = FALSE)
   tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE)
@@ -52,15 +52,15 @@ test_that("read_json_arrow() converts to tibble", {
     { "hello": 3.25, "world": null }
     { "hello": 3.125, "world": null, "yo": "\u5fcd" }
     { "hello": 0.0, "world": true, "yo": null }
-  ', tf, useBytes=TRUE)
+  ', tf, useBytes = TRUE)
 
   tab1 <- read_json_arrow(tf)
   tab2 <- read_json_arrow(mmap_open(tf))
   tab3 <- read_json_arrow(ReadableFile$create(tf))
 
-  expect_is(tab1, "tbl_df")
-  expect_is(tab2, "tbl_df")
-  expect_is(tab3, "tbl_df")
+  expect_s3_class(tab1, "tbl_df")
+  expect_s3_class(tab2, "tbl_df")
+  expect_s3_class(tab3, "tbl_df")
 
   expect_equal(tab1, tab2)
   expect_equal(tab1, tab3)
@@ -86,6 +86,94 @@ test_that("read_json_arrow() supports col_select=", {
   expect_equal(names(tab2), c("hello", "world"))
 })
 
+test_that("read_json_arrow(schema=) with empty schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+
+  tab1 <- read_json_arrow(tf, schema = schema())
+
+  expect_identical(
+    tab1,
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      world = c(2L, 5L, 8L, 10L),
+      third_col = c(99L, 98L, 97L, 96L)
+    )
+  )
+})
+
+test_that("read_json_arrow(schema=) with partial schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+
+  tab1 <- read_json_arrow(tf, schema = schema(third_col = float64(), world = float64()))
+
+  expect_identical(
+    tab1,
+    tibble::tibble(
+      third_col = c(99, 98, 97, 96),
+      world = c(2, 5, 8, 10),
+      hello = c(3.5, 3.25, 3.125, 0)
+    )
+  )
+
+  tf2 <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": "99"}
+    { "hello": 3.25, "world": 5, "third_col": "98"}
+    { "hello": 3.125, "world": 8, "third_col": "97"}
+  ', tf2)
+
+  tab2 <- read_json_arrow(tf2, schema = schema(third_col = string(), world = float64()))
+
+  expect_identical(
+    tab2,
+    tibble::tibble(
+      third_col = c("99", "98", "97"),
+      world = c(2, 5, 8),
+      hello = c(3.5, 3.25, 3.125)
+    )
+  )
+})
+
+test_that("read_json_arrow(schema=) with full schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97}
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+
+  tab1 <- read_json_arrow(
+    tf,
+    schema = schema(
+      hello = float64(),
+      third_col = float64(),
+      world = float64()
+    )
+  )
+
+  expect_identical(
+    tab1,
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      third_col = c(99, 98, 97, 96),
+      world = c(2, 5, 8, 10)
+    )
+  )
+})
+
 test_that("Can read json file with nested columns (ARROW-5503)", {
   tf <- tempfile()
   on.exit(unlink(tf))
diff --git a/r/tests/testthat/test-memory-pool.R b/r/tests/testthat/test-memory-pool.R
index ab38cc71ffd..0aa18aadc20 100644
--- a/r/tests/testthat/test-memory-pool.R
+++ b/r/tests/testthat/test-memory-pool.R
@@ -18,8 +18,8 @@
 test_that("default_memory_pool and its attributes", {
   pool <- default_memory_pool()
   # Not integer bc can be >2gb, so we cast to double
-  expect_is(pool$bytes_allocated, "numeric")
-  expect_is(pool$max_memory, "numeric")
+  expect_type(pool$bytes_allocated, "double")
+  expect_type(pool$max_memory, "double")
   expect_true(pool$backend_name %in% c("system", "jemalloc", "mimalloc"))
 
   expect_true(all(supported_memory_backends() %in% c("system", "jemalloc", "mimalloc")))
diff --git a/r/tests/testthat/test-message-reader.R b/r/tests/testthat/test-message-reader.R
index 0bd6d66c544..340a3e3ed1e 100644
--- a/r/tests/testthat/test-message-reader.R
+++ b/r/tests/testthat/test-message-reader.R
@@ -24,10 +24,10 @@ test_that("MessageReader can be created from raw vectors", {
   reader <- MessageReader$create(bytes)
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$RECORD_BATCH)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
@@ -38,10 +38,10 @@ test_that("MessageReader can be created from raw vectors", {
   reader <- MessageReader$create(bytes)
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
@@ -52,16 +52,16 @@ test_that("MessageReader can be created from input stream", {
   bytes <- batch$serialize()
 
   stream <- BufferReader$create(bytes)
-  expect_is(stream, "BufferReader")
+  expect_r6_class(stream, "BufferReader")
 
   reader <- MessageReader$create(stream)
-  expect_is(reader, "MessageReader")
+  expect_r6_class(reader, "MessageReader")
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$RECORD_BATCH)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
@@ -70,16 +70,16 @@ test_that("MessageReader can be created from input stream", {
   bytes <- schema$serialize()
 
   stream <- BufferReader$create(bytes)
-  expect_is(stream, "BufferReader")
+  expect_r6_class(stream, "BufferReader")
 
   reader <- MessageReader$create(stream)
-  expect_is(reader, "MessageReader")
+  expect_r6_class(reader, "MessageReader")
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R
index c6cd9fe4b09..3fbb038272c 100644
--- a/r/tests/testthat/test-message.R
+++ b/r/tests/testthat/test-message.R
@@ -23,24 +23,24 @@ test_that("read_message can read from input stream", {
   stream <- BufferReader$create(bytes)
 
   message <- read_message(stream)
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$RECORD_BATCH)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- read_message(stream)
   expect_null(read_message(stream))
 })
 
 test_that("read_message() can read Schema messages", {
-  bytes <- schema(x=int32())$serialize()
+  bytes <- schema(x = int32())$serialize()
   stream <- BufferReader$create(bytes)
   message <- read_message(stream)
 
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- read_message(stream)
   expect_null(read_message(stream))
@@ -55,7 +55,7 @@ test_that("read_message() can handle raw vectors", {
   message_raw <- read_message(bytes)
   expect_equal(message_stream, message_raw)
 
-  bytes <- schema(x=int32())$serialize()
+  bytes <- schema(x = int32())$serialize()
   stream <- BufferReader$create(bytes)
   message_stream <- read_message(stream)
   message_raw <- read_message(bytes)
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 4e1895e82ec..bc6d285b333 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -113,7 +113,8 @@ test_that("Metadata serialization compression", {
   )
 
   # But we can disable compression
-  op <- options(arrow.compress_metadata = FALSE); on.exit(options(op))
+  op <- options(arrow.compress_metadata = FALSE)
+  on.exit(options(op))
 
   large_strings <- as.list(rep(make_string_of_size(1), 100))
   large <- .serialize_arrow_r_metadata(large_strings)
@@ -167,7 +168,7 @@ test_that("haven types roundtrip via feather", {
 
 test_that("Date/time type roundtrip", {
   rb <- record_batch(example_with_times)
-  expect_is(rb$schema$posixlt$type, "StructType")
+  expect_r6_class(rb$schema$posixlt$type, "StructType")
   expect_identical(as.data.frame(rb), example_with_times)
 })
 
@@ -205,3 +206,46 @@ test_that("metadata of list elements (ARROW-10386)", {
   expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar")
   expect_identical(attr(as.data.frame(tab)$x[[2]], "baz"), "qux")
 })
+
+
+test_that("metadata of list elements (ARROW-10386)", {
+  skip_if_not_available("dataset")
+  skip_if_not_available("parquet")
+
+  library(dplyr)
+
+  df <- tibble::tibble(
+    metadata = list(
+      structure(1, my_value_as_attr = 1),
+      structure(2, my_value_as_attr = 2),
+      structure(3, my_value_as_attr = 3),
+      structure(4, my_value_as_attr = 3)
+    ),
+    int = 1L:4L,
+    part = c(1, 3, 2, 1)
+  )
+
+  dst_dir <- make_temp_dir()
+  expect_warning(
+    write_dataset(df, dst_dir, partitioning = "part"),
+    "Row-level metadata is not compatible with datasets and will be discarded"
+  )
+
+  # but we need to write a dataset with row-level metadata to make sure when
+  # reading ones that have been written with them we warn appropriately
+  fake_func_name <- write_dataset
+  fake_func_name(df, dst_dir, partitioning = "part")
+
+  ds <- open_dataset(dst_dir)
+  expect_warning(
+    df_from_ds <- collect(ds),
+    "Row-level metadata is not compatible with this operation and has been ignored"
+  )
+  expect_equal(df_from_ds[c(1, 4, 3, 2), ], df, check.attributes = FALSE)
+
+  # however there is *no* warning if we don't select the metadata column
+  expect_warning(
+    df_from_ds <- ds %>% select(int) %>% collect(),
+    NA
+  )
+})
diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R
new file mode 100644
index 00000000000..3cd56cca64f
--- /dev/null
+++ b/r/tests/testthat/test-na-omit.R
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+data_no_na <- c(2:10)
+data_na <- c(data_no_na, NA_real_)
+
+test_that("na.fail on Scalar", {
+  scalar_na <- Scalar$create(NA)
+  scalar_one <- Scalar$create(1)
+  expect_as_vector(na.fail(scalar_one), 1)
+  expect_error(na.fail(scalar_na), "missing values in object")
+})
+
+test_that("na.omit on Array and ChunkedArray", {
+  expect_vector_equal(na.omit(input), data_no_na)
+  expect_vector_equal(na.omit(input), data_na, ignore_attr = TRUE)
+})
+
+test_that("na.exclude on Array and ChunkedArray", {
+  expect_vector_equal(na.exclude(input), data_no_na)
+  expect_vector_equal(na.exclude(input), data_na, ignore_attr = TRUE)
+})
+
+test_that("na.fail on Array and ChunkedArray", {
+  expect_vector_equal(na.fail(input), data_no_na, ignore_attr = TRUE)
+  expect_vector_error(na.fail(input), data_na)
+})
+
+test_that("na.fail on Scalar", {
+  scalar_one <- Scalar$create(1)
+  expect_error(na.fail(scalar_na), regexp = "missing values in object")
+  expect_as_vector(na.fail(scalar_one), na.fail(1))
+})
+
+test_that("na.omit on Table", {
+  tbl <- Table$create(example_data)
+  expect_equivalent(as.data.frame(na.omit(tbl)), na.omit(example_data))
+})
+
+test_that("na.exclude on Table", {
+  tbl <- Table$create(example_data)
+  expect_equivalent(as.data.frame(na.exclude(tbl)), na.exclude(example_data))
+})
+
+test_that("na.fail on Table", {
+  tbl <- Table$create(example_data)
+  expect_error(na.fail(tbl), "missing values in object")
+})
+
+test_that("na.omit on RecordBatch", {
+  batch <- record_batch(example_data)
+  expect_equivalent(as.data.frame(na.omit(batch)), na.omit(example_data))
+})
+
+test_that("na.exclude on RecordBatch", {
+  batch <- record_batch(example_data)
+  expect_equivalent(as.data.frame(na.exclude(batch)), na.omit(example_data))
+})
+
+test_that("na.fail on RecordBatch", {
+  batch <- record_batch(example_data)
+  expect_error(na.fail(batch), "missing values in object")
+})
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index 14e7aa78e05..41dcfe38c94 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -106,6 +106,14 @@ test_that("write_parquet() accepts RecordBatch too", {
   expect_equivalent(tab, Table$create(batch))
 })
 
+test_that("write_parquet() with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_parquet(bad_input, tempfile()),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
 test_that("write_parquet() can truncate timestamps", {
   tab <- Table$create(x1 = as.POSIXct("2020/06/03 18:00:00", tz = "UTC"))
   expect_type_equal(tab$x1, timestamp("us", "UTC"))
@@ -201,7 +209,7 @@ test_that("write_parquet() handles version argument", {
 })
 
 test_that("ParquetFileWriter raises an error for non-OutputStream sink", {
-  sch = schema(a = float32())
+  sch <- schema(a = float32())
   # ARROW-9946
   expect_error(
     ParquetFileWriter$create(schema = sch, sink = tempfile()),
@@ -211,7 +219,8 @@ test_that("ParquetFileWriter raises an error for non-OutputStream sink", {
 
 test_that("ParquetFileReader $ReadRowGroup(s) methods", {
   tab <- Table$create(x = 1:100)
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write_parquet(tab, tf, chunk_size = 10)
 
   reader <- ParquetFileReader$create(tf)
@@ -236,7 +245,13 @@ test_that("ParquetFileReader $ReadRowGroup(s) methods", {
 })
 
 test_that("Error messages are shown when the compression algorithm snappy is not found", {
-  msg <- "NotImplemented: Support for codec 'snappy' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'snappy')\n * ARROW_WITH_SNAPPY=ON (for just 'snappy')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+  msg <- paste0(
+    "NotImplemented: Support for codec 'snappy' not built\nIn order to read this file, ",
+    "you will need to reinstall arrow with additional features enabled.\nSet one of these ",
+    "environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional ",
+    "features, including 'snappy')\n * ARROW_WITH_SNAPPY=ON (for just 'snappy')\n\n",
+    "See https://arrow.apache.org/docs/r/articles/install.html for details"
+  )
 
   if (codec_is_available("snappy")) {
     d <- read_parquet(pq_file)
diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R
index dbd2ba9a8b2..c87f3a562ac 100644
--- a/r/tests/testthat/test-python-flight.R
+++ b/r/tests/testthat/test-python-flight.R
@@ -53,7 +53,6 @@ if (process_is_running("demo_flight_server")) {
     flight_put(client, example_with_times, path = flight_obj)
     expect_identical(as.data.frame(flight_get(client, flight_obj)), example_with_times)
   })
-
 } else {
   # Kinda hacky, let's put a skipped test here, just so we note that the tests
   # didn't run
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index b564bfee950..9e67219e19a 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -31,72 +31,115 @@ test_that("install_pyarrow", {
   reticulate::use_virtualenv("arrow-test")
 })
 
+skip_if_no_pyarrow()
+
 test_that("Array from Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow")
   py <- pa$array(c(1, 2, 3))
   expect_equal(py, Array$create(c(1, 2, 3)))
 })
 
 test_that("Array to Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow", convert = FALSE)
   r <- Array$create(c(1, 2, 3))
   py <- pa$concat_arrays(list(r))
-  expect_is(py, "pyarrow.lib.Array")
+  expect_s3_class(py, "pyarrow.lib.Array")
   expect_equal(reticulate::py_to_r(py), r)
 })
 
 test_that("RecordBatch to/from Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow", convert = FALSE)
   batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
   py <- reticulate::r_to_py(batch)
-  expect_is(py, "pyarrow.lib.RecordBatch")
+  expect_s3_class(py, "pyarrow.lib.RecordBatch")
   expect_equal(reticulate::py_to_r(py), batch)
 })
 
 test_that("Table and ChunkedArray from Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow", convert = FALSE)
   batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
   tab <- Table$create(batch, batch)
   pybatch <- reticulate::r_to_py(batch)
   pytab <- pa$Table$from_batches(list(pybatch, pybatch))
-  expect_is(pytab, "pyarrow.lib.Table")
-  expect_is(pytab[0], "pyarrow.lib.ChunkedArray")
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  expect_s3_class(pytab[0], "pyarrow.lib.ChunkedArray")
   expect_equal(reticulate::py_to_r(pytab[0]), tab$col1)
   expect_equal(reticulate::py_to_r(pytab), tab)
 })
 
 test_that("Table and ChunkedArray to Python", {
-  skip_if_no_pyarrow()
   batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
   tab <- Table$create(batch, batch)
 
   pychunked <- reticulate::r_to_py(tab$col1)
-  expect_is(pychunked, "pyarrow.lib.ChunkedArray")
+  expect_s3_class(pychunked, "pyarrow.lib.ChunkedArray")
   expect_equal(reticulate::py_to_r(pychunked), tab$col1)
 
   pytab <- reticulate::r_to_py(tab)
-  expect_is(pytab, "pyarrow.lib.Table")
+  expect_s3_class(pytab, "pyarrow.lib.Table")
   expect_equal(reticulate::py_to_r(pytab), tab)
 })
 
 test_that("RecordBatch with metadata roundtrip", {
-  skip_if_no_pyarrow()
   batch <- RecordBatch$create(example_with_times)
   pybatch <- reticulate::r_to_py(batch)
-  expect_is(pybatch, "pyarrow.lib.RecordBatch")
+  expect_s3_class(pybatch, "pyarrow.lib.RecordBatch")
   expect_equal(reticulate::py_to_r(pybatch), batch)
   expect_identical(as.data.frame(reticulate::py_to_r(pybatch)), example_with_times)
 })
 
 test_that("Table with metadata roundtrip", {
-  skip_if_no_pyarrow()
   tab <- Table$create(example_with_times)
   pytab <- reticulate::r_to_py(tab)
-  expect_is(pytab, "pyarrow.lib.Table")
+  expect_s3_class(pytab, "pyarrow.lib.Table")
   expect_equal(reticulate::py_to_r(pytab), tab)
   expect_identical(as.data.frame(reticulate::py_to_r(pytab)), example_with_times)
 })
+
+test_that("DataType roundtrip", {
+  r <- timestamp("ms", timezone = "Pacific/Marquesas")
+  py <- reticulate::r_to_py(r)
+  expect_s3_class(py, "pyarrow.lib.DataType")
+  expect_equal(reticulate::py_to_r(py), r)
+})
+
+test_that("Field roundtrip", {
+  r <- field("x", time32("s"))
+  py <- reticulate::r_to_py(r)
+  expect_s3_class(py, "pyarrow.lib.Field")
+  expect_equal(reticulate::py_to_r(py), r)
+})
+
+test_that("RecordBatchReader to python", {
+  library(dplyr)
+
+  tab <- Table$create(example_data)
+  scan <- tab %>%
+    select(int, lgl) %>%
+    filter(int > 6) %>%
+    Scanner$create()
+  reader <- scan$ToRecordBatchReader()
+  pyreader <- reticulate::r_to_py(reader)
+  expect_s3_class(pyreader, "pyarrow.lib.RecordBatchReader")
+  pytab <- pyreader$read_all()
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  back_to_r <- reticulate::py_to_r(pytab)
+  expect_r6_class(back_to_r, "Table")
+  expect_identical(
+    as.data.frame(back_to_r),
+    example_data %>%
+      select(int, lgl) %>%
+      filter(int > 6)
+  )
+})
+
+test_that("RecordBatchReader from python", {
+  tab <- Table$create(example_data)
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  pyreader <- reticulate::r_to_py(reader)
+  back_to_r <- reticulate::py_to_r(pyreader)
+  rt_table <- back_to_r$read_table()
+  expect_r6_class(rt_table, "Table")
+  expect_identical(as.data.frame(rt_table), example_data)
+})
diff --git a/r/tests/testthat/test-read-record-batch.R b/r/tests/testthat/test-read-record-batch.R
index 9383c476588..56f4e8e6e00 100644
--- a/r/tests/testthat/test-read-record-batch.R
+++ b/r/tests/testthat/test-read-record-batch.R
@@ -34,7 +34,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", {
 
   stream <- FileOutputStream$create(tf)
   writer <- RecordBatchFileWriter$create(stream, tab$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write_table(tab)
   writer$close()
   stream$close()
diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R
index a9ce5f12809..3b4205443cd 100644
--- a/r/tests/testthat/test-read-write.R
+++ b/r/tests/testthat/test-read-write.R
@@ -38,7 +38,7 @@ test_that("table round trip", {
   chunks_int <- chunked_array_int$chunks
   expect_equal(length(chunks_int), chunked_array_int$num_chunks)
   for (i in seq_along(chunks_int)) {
-    expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]])
+    expect_equal(chunked_array_int$chunk(i - 1L), chunks_int[[i]])
   }
 
   # ChunkedArray
@@ -51,7 +51,7 @@ test_that("table round trip", {
   chunks_dbl <- chunked_array_dbl$chunks
   expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks)
   for (i in seq_along(chunks_dbl)) {
-    expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]])
+    expect_equal(chunked_array_dbl$chunk(i - 1L), chunks_dbl[[i]])
   }
 
   # ChunkedArray
@@ -64,7 +64,7 @@ test_that("table round trip", {
   chunks_raw <- chunked_array_raw$chunks
   expect_equal(length(chunks_raw), chunked_array_raw$num_chunks)
   for (i in seq_along(chunks_raw)) {
-    expect_equal(chunked_array_raw$chunk(i-1L), chunks_raw[[i]])
+    expect_equal(chunked_array_raw$chunk(i - 1L), chunks_raw[[i]])
   }
   tf <- tempfile()
   write_feather(tbl, tf)
@@ -119,7 +119,7 @@ test_that("reading/writing a raw vector (sparklyr integration)", {
     as.data.frame(RecordBatchStreamReader$create(x)$read_next_batch())
   }
   bytes <- write_to_raw(example_data)
-  expect_is(bytes, "raw")
+  expect_type(bytes, "raw")
   expect_identical(read_from_raw_test(bytes), example_data)
   # this could just be `read_ipc_stream(x)`; propose that
   expect_identical(read_ipc_stream(bytes), example_data)
diff --git a/r/tests/testthat/test-record-batch-reader.R b/r/tests/testthat/test-record-batch-reader.R
index 9a5e4dd4cc0..483588ab4bb 100644
--- a/r/tests/testthat/test-record-batch-reader.R
+++ b/r/tests/testthat/test-record-batch-reader.R
@@ -28,7 +28,7 @@ test_that("RecordBatchStreamReader / Writer", {
   sink <- BufferOutputStream$create()
   expect_equal(sink$tell(), 0)
   writer <- RecordBatchStreamWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
@@ -36,19 +36,19 @@ test_that("RecordBatchStreamReader / Writer", {
   writer$close()
 
   buf <- sink$finish()
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
 
   reader <- RecordBatchStreamReader$create(buf)
-  expect_is(reader, "RecordBatchStreamReader")
+  expect_r6_class(reader, "RecordBatchStreamReader")
 
   batch1 <- reader$read_next_batch()
-  expect_is(batch1, "RecordBatch")
+  expect_r6_class(batch1, "RecordBatch")
   expect_equal(batch, batch1)
   batch2 <- reader$read_next_batch()
-  expect_is(batch2, "RecordBatch")
+  expect_r6_class(batch2, "RecordBatch")
   expect_equal(batch, batch2)
   batch3 <- reader$read_next_batch()
-  expect_is(batch3, "RecordBatch")
+  expect_r6_class(batch3, "RecordBatch")
   expect_equal(batch, batch3)
   expect_null(reader$read_next_batch())
 })
@@ -56,20 +56,20 @@ test_that("RecordBatchStreamReader / Writer", {
 test_that("RecordBatchFileReader / Writer", {
   sink <- BufferOutputStream$create()
   writer <- RecordBatchFileWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
   writer$close()
 
   buf <- sink$finish()
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
 
   reader <- RecordBatchFileReader$create(buf)
-  expect_is(reader, "RecordBatchFileReader")
+  expect_r6_class(reader, "RecordBatchFileReader")
 
   batch1 <- reader$get_batch(0)
-  expect_is(batch1, "RecordBatch")
+  expect_r6_class(batch1, "RecordBatch")
   expect_equal(batch, batch1)
 
   expect_equal(reader$num_record_batches, 3)
@@ -78,7 +78,7 @@ test_that("RecordBatchFileReader / Writer", {
 test_that("StreamReader read_table", {
   sink <- BufferOutputStream$create()
   writer <- RecordBatchStreamWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
@@ -93,7 +93,7 @@ test_that("StreamReader read_table", {
 test_that("FileReader read_table", {
   sink <- BufferOutputStream$create()
   writer <- RecordBatchFileWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
@@ -137,6 +137,6 @@ test_that("reader with 0 batches", {
 
   reader <- RecordBatchStreamReader$create(buf)
   tab <- reader$read_table()
-  expect_is(tab, "Table")
+  expect_r6_class(tab, "Table")
   expect_identical(dim(tab), c(0L, 1L))
 })
diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R
index d3493f8110a..94451e5351a 100644
--- a/r/tests/testthat/test-s3-minio.R
+++ b/r/tests/testthat/test-s3-minio.R
@@ -38,7 +38,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
       scheme = "http",
       endpoint_override = paste0("localhost:", minio_port)
     )
-    expect_is(fs, "S3FileSystem")
+    expect_r6_class(fs, "S3FileSystem")
     now <- as.character(as.numeric(Sys.time()))
     # If minio isn't running, this will hang for a few seconds and fail with a
     # curl timeout, causing `run_these` to be set to FALSE and skipping the tests
@@ -75,7 +75,6 @@ if (arrow_with_s3() && process_is_running("minio server")) {
   })
 
   if (arrow_with_dataset()) {
-
     library(dplyr)
 
     make_temp_dir <- function() {
@@ -183,7 +182,6 @@ if (arrow_with_s3() && process_is_running("minio server")) {
         rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")])
       )
     })
-
   }
 
   test_that("S3FileSystem input validation", {
diff --git a/r/tests/testthat/test-s3.R b/r/tests/testthat/test-s3.R
index 938e0c6fdb2..995730a7977 100644
--- a/r/tests/testthat/test-s3.R
+++ b/r/tests/testthat/test-s3.R
@@ -17,19 +17,22 @@
 
 context("S3 integration tests")
 
-run_these <- tryCatch({
-  if (arrow_with_s3() &&
+run_these <- tryCatch(
+  expr = {
+    if (arrow_with_s3() &&
       identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") &&
       !identical(Sys.getenv("AWS_ACCESS_KEY_ID"), "") &&
       !identical(Sys.getenv("AWS_SECRET_ACCESS_KEY"), "")) {
-    # See if we have access to the test bucket
-    bucket <- s3_bucket("ursa-labs-r-test")
-    bucket$GetFileInfo("")
-    TRUE
-  } else {
-    FALSE
-  }
-}, error = function(e) FALSE)
+      # See if we have access to the test bucket
+      bucket <- s3_bucket("ursa-labs-r-test")
+      bucket$GetFileInfo("")
+      TRUE
+    } else {
+      FALSE
+    }
+  },
+  error = function(e) FALSE
+)
 
 bucket_uri <- function(..., bucket = "s3://ursa-labs-r-test/%s?region=us-west-2") {
   segments <- paste(..., sep = "/")
diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R
index 501298a8021..566228cbcaa 100644
--- a/r/tests/testthat/test-scalar.R
+++ b/r/tests/testthat/test-scalar.R
@@ -19,14 +19,14 @@ context("Scalar")
 
 expect_scalar_roundtrip <- function(x, type) {
   s <- Scalar$create(x)
-  expect_is(s, "Scalar")
+  expect_r6_class(s, "Scalar")
   expect_type_equal(s$type, type)
   expect_identical(length(s), 1L)
   if (inherits(type, "NestedType")) {
     # Should this be? Missing if all elements are missing?
-    # expect_identical(is.na(s), all(is.na(x)))
+    # expect_identical(is.na(s), all(is.na(x))) # nolint
   } else {
-    expect_identical(is.na(s), is.na(x))
+    expect_identical(as.vector(is.na(s)), is.na(x))
     # MakeArrayFromScalar not implemented for list types
     expect_equal(as.vector(s), x)
   }
@@ -37,7 +37,7 @@ test_that("Scalar object roundtrip", {
   expect_scalar_roundtrip(2L, int32())
   expect_scalar_roundtrip(c(2, 4), list_of(float64()))
   expect_scalar_roundtrip(c(NA, NA), list_of(bool()))
-  expect_scalar_roundtrip(data.frame(a=2, b=4L), struct(a = double(), b = int32()))
+  expect_scalar_roundtrip(data.frame(a = 2, b = 4L), struct(a = double(), b = int32()))
 })
 
 test_that("Scalar print", {
@@ -87,7 +87,10 @@ test_that("Handling string data with embedded nuls", {
   scalar_with_nul <- Scalar$create(raws, binary())$cast(utf8())
   expect_error(
     as.vector(scalar_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
@@ -101,4 +104,4 @@ test_that("Handling string data with embedded nuls", {
       fixed = TRUE
     )
   })
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index ac888d94101..933ba4785a8 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -42,7 +42,7 @@ test_that("Schema $GetFieldByName", {
   schm <- schema(b = double(), c = string())
   expect_equal(schm$GetFieldByName("b"), field("b", double()))
   expect_null(schm$GetFieldByName("f"))
-  # TODO: schema(b = double(), b = string())$GetFieldByName("b")
+  # TODO: schema(b = double(), b = string())$GetFieldByName("b") # nolint
   # also returns NULL and probably should error bc duplicated names
 })
 
@@ -120,27 +120,27 @@ test_that("reading schema from Buffer", {
   # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter
   #       maybe there is an easier way to serialize a schema
   batch <- record_batch(x = 1:10)
-  expect_is(batch, "RecordBatch")
+  expect_r6_class(batch, "RecordBatch")
 
   stream <- BufferOutputStream$create()
   writer <- RecordBatchStreamWriter$create(stream, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$close()
 
   buffer <- stream$finish()
-  expect_is(buffer, "Buffer")
+  expect_r6_class(buffer, "Buffer")
 
   reader <- MessageReader$create(buffer)
-  expect_is(reader, "MessageReader")
+  expect_r6_class(reader, "MessageReader")
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
 
   stream <- BufferReader$create(buffer)
-  expect_is(stream, "BufferReader")
+  expect_r6_class(stream, "BufferReader")
   message <- read_message(stream)
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
 })
 
@@ -153,7 +153,7 @@ test_that("Input validation when creating a table with a schema", {
 
 test_that("Schema$Equals", {
   a <- schema(b = double(), c = bool())
-  b <- a$WithMetadata(list(some="metadata"))
+  b <- a$WithMetadata(list(some = "metadata"))
 
   # different metadata
   expect_failure(expect_equal(a, b))
@@ -173,4 +173,32 @@ test_that("unify_schemas", {
     unify_schemas(a, z),
     schema(b = double(), c = bool(), k = utf8())
   )
+  # returns NULL when any arg is NULL
+  expect_null(
+    unify_schemas(a, NULL, z)
+  )
+  # returns NULL when all args are NULL
+  expect_null(
+    unify_schemas(NULL, NULL)
+  )
+  # errors when no args
+  expect_error(
+    unify_schemas(),
+    "Must provide at least one schema to unify"
+  )
+})
+
+test_that("Schema to C-interface", {
+  schema <- schema(b = double(), c = bool())
+
+  # export the schema via the C-interface
+  ptr <- allocate_arrow_schema()
+  schema$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Schema$import_from_c(ptr)
+  expect_equal(circle, schema)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
 })
diff --git a/r/tests/testthat/test-thread-pool.R b/r/tests/testthat/test-thread-pool.R
index 6ac9eab6dc9..dab46269ca6 100644
--- a/r/tests/testthat/test-thread-pool.R
+++ b/r/tests/testthat/test-thread-pool.R
@@ -24,3 +24,11 @@ test_that("can set/get cpu thread pool capacity", {
   set_cpu_count(old)
   expect_equal(cpu_count(), old)
 })
+
+test_that("can set/get I/O thread pool capacity", {
+  old <- io_thread_count()
+  set_io_thread_count(19)
+  expect_equal(io_thread_count(), 19L)
+  set_io_thread_count(old)
+  expect_equal(io_thread_count(), old)
+})
diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R
index 56cef722556..d17b811974d 100644
--- a/r/tests/testthat/test-type.R
+++ b/r/tests/testthat/test-type.R
@@ -31,7 +31,7 @@ test_that("type() infers from R type", {
   expect_type_equal(type(1:10), int32())
   expect_type_equal(type(1), float64())
   expect_type_equal(type(TRUE), boolean())
-  expect_type_equal(type(raw()), int8())
+  expect_type_equal(type(raw()), uint8())
   expect_type_equal(type(""), utf8())
   expect_type_equal(
     type(example_data$fct),
@@ -104,5 +104,109 @@ test_that("Masked data type functions still work", {
     arrow::string()
   )
   rm(type)
+})
+
+test_that("Type strings are correctly canonicalized", {
+  # data types without arguments
+  expect_equal(canonical_type_str("int8"), int8()$ToString())
+  expect_equal(canonical_type_str("int16"), int16()$ToString())
+  expect_equal(canonical_type_str("int32"), int32()$ToString())
+  expect_equal(canonical_type_str("int64"), int64()$ToString())
+  expect_equal(canonical_type_str("uint8"), uint8()$ToString())
+  expect_equal(canonical_type_str("uint16"), uint16()$ToString())
+  expect_equal(canonical_type_str("uint32"), uint32()$ToString())
+  expect_equal(canonical_type_str("uint64"), uint64()$ToString())
+  expect_equal(canonical_type_str("float16"), float16()$ToString())
+  expect_equal(canonical_type_str("halffloat"), halffloat()$ToString())
+  expect_equal(canonical_type_str("float32"), float32()$ToString())
+  expect_equal(canonical_type_str("float"), float()$ToString())
+  expect_equal(canonical_type_str("float64"), float64()$ToString())
+  expect_equal(canonical_type_str("double"), float64()$ToString())
+  expect_equal(canonical_type_str("boolean"), boolean()$ToString())
+  expect_equal(canonical_type_str("bool"), bool()$ToString())
+  expect_equal(canonical_type_str("utf8"), utf8()$ToString())
+  expect_equal(canonical_type_str("large_utf8"), large_utf8()$ToString())
+  expect_equal(canonical_type_str("large_string"), large_utf8()$ToString())
+  expect_equal(canonical_type_str("binary"), binary()$ToString())
+  expect_equal(canonical_type_str("large_binary"), large_binary()$ToString())
+  expect_equal(canonical_type_str("string"), arrow::string()$ToString())
+  expect_equal(canonical_type_str("null"), null()$ToString())
+
+  # data types with arguments
+  expect_equal(
+    canonical_type_str("fixed_size_binary"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_binary(42)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("date32"),
+    sub("^([^([<]+).*$", "\\1", date32()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("date64"),
+    sub("^([^([<]+).*$", "\\1", date64()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("time32"),
+    sub("^([^([<]+).*$", "\\1", time32()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("time64"),
+    sub("^([^([<]+).*$", "\\1", time64()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("timestamp"),
+    sub("^([^([<]+).*$", "\\1", timestamp()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("decimal"),
+    sub("^([^([<]+).*$", "\\1", decimal(3, 2)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("struct"),
+    sub("^([^([<]+).*$", "\\1", struct(foo = int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("list_of"),
+    sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("list"),
+    sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("large_list_of"),
+    sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("large_list"),
+    sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("fixed_size_list_of"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("fixed_size_list"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString())
+  )
 
+  # unsupported data types
+  expect_error(
+    canonical_type_str("decimal128(3, 2)"),
+    "parameters"
+  )
+  expect_error(
+    canonical_type_str("list<item: int32>"),
+    "parameters"
+  )
+  expect_error(
+    canonical_type_str("time32[s]"),
+    "parameters"
+  )
+
+  # unrecognized data types
+  expect_error(
+    canonical_type_str("foo"),
+    "Unrecognized"
+  )
 })
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 8aacb3eb109..a321be8f42a 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -163,7 +163,7 @@ os_release <- function() {
       out$codename <- vals[["VERSION_CODENAME"]]
     } else {
       # This probably isn't right, maybe could extract codename from pretty name?
-      out$codename = vals[["PRETTY_NAME"]]
+      out$codename <- vals[["PRETTY_NAME"]]
     }
     out
   } else {
@@ -212,21 +212,30 @@ find_available_binary <- function(os) {
 download_source <- function() {
   tf1 <- tempfile()
   src_dir <- tempfile()
-  if (bintray_download(tf1)) {
-    # First try from bintray
-    cat("*** Successfully retrieved C++ source\n")
-    unzip(tf1, exdir = src_dir)
-    unlink(tf1)
-    src_dir <- paste0(src_dir, "/cpp")
-  } else if (apache_download(tf1)) {
-    # If that fails, try for an official release
-    cat("*** Successfully retrieved C++ source\n")
-    untar(tf1, exdir = src_dir)
-    unlink(tf1)
-    src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
+
+  # Given VERSION as x.y.z.p
+  p <- package_version(VERSION)[1, 4]
+  if (is.na(p) || p < 1000) {
+    # This is either just x.y.z or it has a small (R-only) patch version
+    # Download from the official Apache release, dropping the p
+    VERSION <- as.character(package_version(VERSION)[1, -4])
+    if (apache_download(VERSION, tf1)) {
+      untar(tf1, exdir = src_dir)
+      unlink(tf1)
+      src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
+    }
+  } else if (p != 9000) {
+    # This is a custom dev version (x.y.z.9999) or a nightly (x.y.z.20210505)
+    # (Don't try to download on the default dev .9000 version)
+    if (nightly_download(VERSION, tf1)) {
+      unzip(tf1, exdir = src_dir)
+      unlink(tf1)
+      src_dir <- paste0(src_dir, "/cpp")
+    }
   }
 
   if (dir.exists(src_dir)) {
+    cat("*** Successfully retrieved C++ source\n")
     options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
     # These scripts need to be executable
     system(
@@ -239,13 +248,13 @@ download_source <- function() {
   }
 }
 
-bintray_download <- function(destfile) {
-  source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
+nightly_download <- function(version, destfile) {
+  source_url <- paste0(arrow_repo, "src/arrow-", version, ".zip")
   try_download(source_url, destfile)
 }
 
-apache_download <- function(destfile, n_mirrors = 3) {
-  apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
+apache_download <- function(version, destfile, n_mirrors = 3) {
+  apache_path <- paste0("arrow/arrow-", version, "/apache-arrow-", version, ".tar.gz")
   apache_urls <- c(
     # This returns a different mirror each time
     rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),
@@ -261,7 +270,7 @@ apache_download <- function(destfile, n_mirrors = 3) {
   downloaded
 }
 
-find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
+find_local_source <- function(arrow_home = Sys.getenv("ARROW_SOURCE_HOME", "..")) {
   if (file.exists(paste0(arrow_home, "/cpp/src/arrow/api.h"))) {
     # We're in a git checkout of arrow, so we can build it
     cat("*** Found local C++ source\n")
@@ -317,6 +326,7 @@ build_libarrow <- function(src_dir, dst_dir) {
     CC = R_CMD_config("CC"),
     CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")),
     # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols
+    ARROW_R_CXXFLAGS = paste(Sys.getenv("ARROW_R_CXXFLAGS", ""), "-fno-lto"),
     LDFLAGS = R_CMD_config("LDFLAGS")
   )
   env_vars <- paste0(names(env_var_list), '="', env_var_list, '"', collapse = " ")
@@ -400,12 +410,18 @@ cmake_version <- function(cmd = "cmake") {
       which_line <- grep(pat, raw_version)
       package_version(sub(pat, "\\1", raw_version[which_line]))
     },
-    error = function(e) return(0)
+    error = function(e) {
+      return(0)
+    }
   )
 }
 
 with_s3_support <- function(env_vars) {
   arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
+  # but if ARROW_S3=OFF explicitly, we are definitely off, so override
+  if (toupper(Sys.getenv("ARROW_S3")) == "OFF") {
+    arrow_s3 <- FALSE
+  }
   if (arrow_s3) {
     # User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9
     # and make sure that we have curl and openssl system libs
@@ -427,7 +443,7 @@ with_s3_support <- function(env_vars) {
 with_mimalloc <- function(env_vars) {
   arrow_mimalloc <- toupper(Sys.getenv("ARROW_MIMALLOC")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
   if (arrow_mimalloc) {
-  # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9
+    # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9
     if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) {
       cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n")
       arrow_mimalloc <- FALSE
@@ -501,6 +517,6 @@ if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) {
       cat("*** Proceeding without C++ dependencies\n")
     }
   } else {
-   cat("*** Proceeding without C++ dependencies\n")
+    cat("*** Proceeding without C++ dependencies\n")
   }
 }
diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R
index f90becb7649..ccaa5c95d87 100644
--- a/r/tools/winlibs.R
+++ b/r/tools/winlibs.R
@@ -17,12 +17,12 @@
 
 args <- commandArgs(TRUE)
 VERSION <- args[1]
-if(!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))){
-  if(length(args) > 1){
+if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) {
+  if (length(args) > 1) {
     # Arg 2 would be the path/to/lib.zip
     localfile <- args[2]
     cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile))
-    if(!file.exists(localfile)){
+    if (!file.exists(localfile)) {
       cat(sprintf("*** %s does not exist; build will fail\n", localfile))
     }
     file.copy(localfile, "lib.zip")
diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd
index e38296828fb..4c5da501435 100644
--- a/r/vignettes/arrow.Rmd
+++ b/r/vignettes/arrow.Rmd
@@ -72,6 +72,45 @@ to other applications and services that use Arrow. One example is Spark: the
 move data to and from Spark, yielding [significant performance
 gains](http://arrow.apache.org/blog/2019/01/25/r-spark-improvements/).
 
+# Object hierarchy
+
+## Metadata objects
+
+Arrow defines the following classes for representing metadata:
+
+| Class      | Description                                        | How to create an instance        |
+| ---------- | -------------------------------------------------- | -------------------------------- |
+| `DataType` | attribute controlling how values are represented   | functions in `help("data-type")` |
+| `Field`    | a character string name and a `DataType`           | `field(name, type)`              |
+| `Schema`   | list of `Field`s                                   | `schema(...)`                    |
+
+## Data objects
+
+Arrow defines the following classes for representing zero-dimensional (scalar),
+one-dimensional (array/vector-like), and two-dimensional (tabular/data
+frame-like) data:
+
+| Dim | Class          | Description                               | How to create an instance                                                  |
+| --- | -------------- | ----------------------------------------- | -------------------------------------------------------------------------- |
+| 0   | `Scalar`       | single value and its `DataType`           | `Scalar$create(value, type)`                                               |
+| 1   | `Array`        | vector of values and its `DataType`       | `Array$create(vector, type)`                                               | 
+| 1   | `ChunkedArray` | vectors of values and their `DataType`    | `ChunkedArray$create(..., type)` or alias `chunked_array(..., type)`       |
+| 2   | `RecordBatch`  | list of `Array`s with a `Schema`          | `RecordBatch$create(...)` or alias `record_batch(...)`                     |
+| 2   | `Table`        | list of `ChunkedArray` with a `Schema`    | `Table$create(...)` or `arrow::read_*(file, as_data_frame = FALSE)`        |
+| 2   | `Dataset`      | list of `Table`s  with the same `Schema`  | `Dataset$create(sources, schema)` or alias `open_dataset(sources, schema)` |
+
+Each of these is defined as an `R6` class in the `arrow` R package and
+corresponds to a class of the same name in the Arrow C++ library. The `arrow`
+package provides a variety of `R6` and S3 methods for interacting with instances
+of these classes.
+
+For convenience, the `arrow` package also defines several synthetic classes that
+do not exist in the C++ library, including:
+
+* `ArrowDatum`: inherited by `Scalar`, `Array`, and `ChunkedArray`
+* `ArrowTabular`: inherited by `RecordBatch` and `Table`
+* `ArrowObject`: inherited by all Arrow objects
+
 # Internals
 
 ## Mapping of R <--> Arrow types
diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd
index 32389b95162..3f33cbae47c 100644
--- a/r/vignettes/dataset.Rmd
+++ b/r/vignettes/dataset.Rmd
@@ -8,46 +8,46 @@ vignette: >
 ---
 
 Apache Arrow lets you work efficiently with large, multi-file datasets.
-The `arrow` R package provides a `dplyr` interface to Arrow Datasets,
-as well as other tools for interactive exploration of Arrow data.
+The arrow R package provides a [dplyr](https://dplyr.tidyverse.org/) interface to Arrow Datasets,
+and other tools for interactive exploration of Arrow data.
 
-This vignette introduces Datasets and shows how to use `dplyr` to analyze them.
-It describes both what is possible to do with Arrow now
-and what is on the immediate development roadmap.
+This vignette introduces Datasets and shows how to use dplyr to analyze them.
 
 ## Example: NYC taxi data
 
 The [New York City taxi trip record data](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
 is widely used in big data exercises and competitions.
 For demonstration purposes, we have hosted a Parquet-formatted version
-of about 10 years of the trip data in a public AWS S3 bucket.
+of about ten years of the trip data in a public Amazon S3 bucket.
 
-The total file size is around 37 gigabytes, even in the efficient Parquet file format.
-That's bigger than memory on most people's computers,
-so we can't just read it all in and stack it into a single data frame.
+The total file size is around 37 gigabytes, even in the efficient Parquet file
+format. That's bigger than memory on most people's computers, so you can't just
+read it all in and stack it into a single data frame.
 
-In Windows and macOS binary packages, S3 support is included.
-On Linux when installing from source, S3 support is not enabled by default,
+In Windows (for R > 3.6) and macOS binary packages, S3 support is included.
+On Linux, when installing from source, S3 support is not enabled by default,
 and it has additional system requirements.
 See `vignette("install", package = "arrow")` for details.
-To see if your `arrow` installation has S3 support, run
+To see if your arrow installation has S3 support, run:
 
 ```{r}
 arrow::arrow_with_s3()
 ```
 
-Even with S3 support enabled network, speed will be a bottleneck unless your
+Even with S3 support enabled, network speed will be a bottleneck unless your
 machine is located in the same AWS region as the data. So, for this vignette,
-we assume that the NYC taxi dataset has been downloaded locally in a "nyc-taxi"
+we assume that the NYC taxi dataset has been downloaded locally in an "nyc-taxi"
 directory.
 
-If your `arrow` build has S3 support, you can sync the data locally with:
+### Retrieving data from a public Amazon S3 bucket
+
+If your arrow build has S3 support, you can sync the data locally with:
 
 ```{r, eval = FALSE}
 arrow::copy_files("s3://ursa-labs-taxi-data", "nyc-taxi")
 ```
 
-If your `arrow` build doesn't have S3 support, you can download the files
+If your arrow build doesn't have S3 support, you can download the files
 with some additional code:
 
 ```{r, eval = FALSE}
@@ -77,39 +77,51 @@ feel free to grab only a year or two of data.
 
 If you don't have the taxi data downloaded, the vignette will still run and will
 yield previously cached output for reference. To be explicit about which version
-is running, let's check whether we're running with live data:
+is running, let's check whether you're running with live data:
 
 ```{r}
 dir.exists("nyc-taxi")
 ```
 
-## Getting started
+## Opening the dataset
 
-Because `dplyr` is not necessary for many Arrow workflows,
+Because dplyr is not necessary for many Arrow workflows,
 it is an optional (`Suggests`) dependency. So, to work with Datasets,
-we need to load both `arrow` and `dplyr`.
+you need to load both arrow and dplyr.
 
 ```{r}
 library(arrow, warn.conflicts = FALSE)
 library(dplyr, warn.conflicts = FALSE)
 ```
 
-The first step is to create our Dataset object, pointing at the directory of data.
+The first step is to create a Dataset object, pointing at the directory of data.
 
 ```{r, eval = file.exists("nyc-taxi")}
 ds <- open_dataset("nyc-taxi", partitioning = c("year", "month"))
 ```
 
-The default file format for `open_dataset()` is Parquet; if we had a directory
-of Arrow format files, we could include `format = "arrow"` in the call.
-Other supported formats include: "feather" (an alias for "arrow", as Feather v2
-is the Arrow file format), "csv", "tsv" (for tab-delimited), and "text" for
-generic text-delimited files. For text files, you can pass any parsing options
-("delim", "quote", etc.) to `open_dataset()` that you would otherwise pass to
-`read_csv_arrow()`.
+The file format for `open_dataset()` is controlled by the `format` parameter, 
+which has a default value of `"parquet"`.  If you had a directory
+of Arrow format files, you could instead specify `format = "arrow"` in the call.
+
+Other supported formats include: 
+
+* `"feather"` or `"ipc"` (aliases for `"arrow"`, as Feather v2 is the Arrow file format)
+* `"csv"` (comma-delimited files) and `"tsv"` (tab-delimited files)
+* `"text"` (generic text-delimited files - use the `delimiter` argument to specify which to use)
+
+For text files, you can pass the following parsing options to `open_dataset()`:
+
+* `delim`
+* `quote`
+* `escape_double`
+* `escape_backslash`
+* `skip_empty_rows`
 
-The `partitioning` argument lets us specify how the file paths provide information
-about how the dataset is chunked into different files. Our files in this example
+For more information on the usage of these parameters, see `?read_delim_arrow()`.
+
+The `partitioning` argument lets you specify how the file paths provide information
+about how the dataset is chunked into different files. The files in this example
 have file paths like
 
 ```
@@ -118,13 +130,13 @@ have file paths like
 ...
 ```
 
-By providing a character vector to `partitioning`, we're saying that the first
-path segment gives the value for "year" and the second segment is "month".
-Every row in `2009/01/data.parquet` has a value of 2009 for "year"
-and 1 for "month", even though those columns may not actually be present in the file.
+By providing `c("year", "month")` to the `partitioning` argument, you're saying that the first
+path segment gives the value for `year`, and the second segment is `month`.
+Every row in `2009/01/data.parquet` has a value of 2009 for `year`
+and 1 for `month`, even though those columns may not be present in the file.
 
-Indeed, when we look at the dataset, we see that in addition to the columns present
-in every file, there are also columns "year" and "month".
+Indeed, when you look at the dataset, you can see that in addition to the columns present
+in every file, there are also columns `year` and `month` even though they are not present in the files themselves.
 
 ```{r, eval = file.exists("nyc-taxi")}
 ds
@@ -139,7 +151,7 @@ passenger_count: int8
 trip_distance: float
 pickup_longitude: float
 pickup_latitude: float
-rate_code_id: string
+rate_code_id: null
 store_and_fwd_flag: string
 dropoff_longitude: float
 dropoff_latitude: float
@@ -150,10 +162,6 @@ mta_tax: float
 tip_amount: float
 tolls_amount: float
 total_amount: float
-improvement_surcharge: float
-pickup_location_id: int32
-dropoff_location_id: int32
-congestion_surcharge: float
 year: int32
 month: int32
 
@@ -163,7 +171,7 @@ See $metadata for additional Schema metadata
 
 The other form of partitioning currently supported is [Hive](https://hive.apache.org/)-style,
 in which the partition variable names are included in the path segments.
-If we had saved our files in paths like
+If you had saved your files in paths like:
 
 ```
 year=2009/month=01/data.parquet
@@ -171,29 +179,29 @@ year=2009/month=02/data.parquet
 ...
 ```
 
-we would not have had to provide the names in `partitioning`:
-we could have just called `ds <- open_dataset("nyc-taxi")` and the partitions
+you would not have had to provide the names in `partitioning`;
+you could have just called `ds <- open_dataset("nyc-taxi")` and the partitions
 would have been detected automatically.
 
 ## Querying the dataset
 
-Up to this point, we haven't loaded any data: we have walked directories to find
-files, we've parsed file paths to identify partitions, and we've read the
-headers of the Parquet files to inspect their schemas so that we can make sure
-they all line up.
-
-In the current release, `arrow` supports methods for selecting a window of data:
-`select()`, `rename()`, and `filter()`. Aggregation is not yet supported,
-nor is deriving or projecting new columns, so before you call `summarize()` or
-`mutate()`, you'll need to `collect()` the data first,
-which pulls your selected window of data into an in-memory R data frame.
-While we could have made those methods `collect()` the data they needed
-automatically and invisibly to the end user,
-we thought it best to make it explicit when you're pulling data into memory
-so that you can construct your queries most efficiently
-and not be surprised when some query consumes way more resources than expected.
-
-Here's an example. Suppose I was curious about tipping behavior among the
+Up to this point, you haven't loaded any data. You've walked directories to find
+files, you've parsed file paths to identify partitions, and you've read the
+headers of the Parquet files to inspect their schemas so that you can make sure
+they all are as expected.
+
+In the current release, arrow supports the dplyr verbs `mutate()`, 
+`transmute()`, `select()`, `rename()`, `relocate()`, `filter()`, and 
+`arrange()`. Aggregation is not yet supported, so before you call `summarise()`
+or other verbs with aggregate functions, use `collect()` to pull the selected
+subset of the data into an in-memory R data frame.
+
+Suppose you attempt to call unsupported dplyr verbs or unimplemented functions
+in your query on an Arrow Dataset. In that case, the arrow package raises an error. However,
+for dplyr queries on Arrow Table objects (which are already in memory), the
+package automatically calls `collect()` before processing that dplyr verb.
+
+Here's an example: suppose that you are curious about tipping behavior among the
 longest taxi rides. Let's find the median tip percentage for rides with
 fares greater than $100 in 2015, broken down by the number of passengers:
 
@@ -201,10 +209,11 @@ fares greater than $100 in 2015, broken down by the number of passengers:
 system.time(ds %>%
   filter(total_amount > 100, year == 2015) %>%
   select(tip_amount, total_amount, passenger_count) %>%
+  mutate(tip_pct = 100 * tip_amount / total_amount) %>%
   group_by(passenger_count) %>%
   collect() %>%
-  summarize(
-    tip_pct = median(100 * tip_amount / total_amount),
+  summarise(
+    median_tip_pct = median(tip_pct),
     n = n()
   ) %>%
   print())
@@ -213,34 +222,37 @@ system.time(ds %>%
 ```{r, echo = FALSE, eval = !file.exists("nyc-taxi")}
 cat("
 # A tibble: 10 x 3
-   passenger_count tip_pct      n
-             <int>   <dbl>  <int>
- 1               0    9.84    380
- 2               1   16.7  143087
- 3               2   16.6   34418
- 4               3   14.4    8922
- 5               4   11.4    4771
- 6               5   16.7    5806
- 7               6   16.7    3338
- 8               7   16.7      11
- 9               8   16.7      32
-10               9   16.7      42
+   passenger_count median_tip_pct      n
+             <int>          <dbl>  <int>
+ 1               0           9.84    380
+ 2               1          16.7  143087
+ 3               2          16.6   34418
+ 4               3          14.4    8922
+ 5               4          11.4    4771
+ 6               5          16.7    5806
+ 7               6          16.7    3338
+ 8               7          16.7      11
+ 9               8          16.7      32
+10               9          16.7      42
 
    user  system elapsed
   4.436   1.012   1.402
 ")
 ```
 
-We just selected a window out of a dataset with around 2 billion rows
-and aggregated on it in under 2 seconds on my laptop. How does this work?
+You've just selected a subset out of a dataset with around 2 billion rows, computed
+a new column, and aggregated it in under 2 seconds on a modern laptop. How does
+this work?
 
-First, `select()`/`rename()`, `filter()`, and `group_by()`
-record their actions but don't evaluate on the data until you run `collect()`.
+First, `mutate()`/`transmute()`, `select()`/`rename()`/`relocate()`, `filter()`, 
+`group_by()`, and `arrange()` record their actions but don't evaluate on the
+data until you run `collect()`.
 
 ```{r, eval = file.exists("nyc-taxi")}
 ds %>%
   filter(total_amount > 100, year == 2015) %>%
   select(tip_amount, total_amount, passenger_count) %>%
+  mutate(tip_pct = 100 * tip_amount / total_amount) %>%
   group_by(passenger_count)
 ```
 
@@ -250,46 +262,66 @@ FileSystemDataset (query)
 tip_amount: float
 total_amount: float
 passenger_count: int8
+tip_pct: expr
 
-* Filter: ((total_amount > 100:double) and (year == 2015:double))
+* Filter: ((total_amount > 100) and (year == 2015))
 * Grouped by passenger_count
 See $.data for the source Arrow object
 ")
 ```
 
-This returns instantly and shows the window selection you've made, without
+This code returns an output instantly and shows the manipulations you've made, without
 loading data from the files. Because the evaluation of these queries is deferred,
-you can build up a query that selects down to a small window without generating
+you can build up a query that selects down to a small subset without generating
 intermediate datasets that would potentially be large.
 
 Second, all work is pushed down to the individual data files,
 and depending on the file format, chunks of data within the files. As a result,
-we can select a window of data from a much larger dataset by collecting the
-smaller slices from each file--we don't have to load the whole dataset in memory
-in order to slice from it.
+you can select a subset of data from a much larger dataset by collecting the
+smaller slices from each file—you don't have to load the whole dataset in 
+memory to slice from it.
 
-Third, because of partitioning, we can ignore some files entirely.
+Third, because of partitioning, you can ignore some files entirely.
 In this example, by filtering `year == 2015`, all files corresponding to other years
-are immediately excluded: we don't have to load them in order to find that no
+are immediately excluded: you don't have to load them in order to find that no
 rows match the filter. Relatedly, since Parquet files contain row groups with
-statistics on the data within, there may be entire chunks of data we can
+statistics on the data within, there may be entire chunks of data you can
 avoid scanning because they have no rows where `total_amount > 100`.
 
 ## More dataset options
 
 There are a few ways you can control the Dataset creation to adapt to special use cases.
-For one, you can specify a `schema` argument to declare the columns and their data types.
-This is useful if you have data files that have different storage schema
-(for example, a column could be `int32` in one and `int8` in another)
-and you want to ensure that the resulting Dataset has a specific type.
-To be clear, it's not necessary to specify a schema, even in this example of
-mixed integer types, because the Dataset constructor will reconcile differences like these.
-The schema specification just lets you declare what you want the result to be.
+
+### Work with files in a directory
+
+If you are working with a single file or a set of files that are not all in the 
+same directory, you can provide a file path or a vector of multiple file paths 
+to `open_dataset()`. This is useful if, for example, you have a single CSV file 
+that is too big to read into memory. You could pass the file path to 
+`open_dataset()`, use `group_by()` to partition the Dataset into manageable chunks, 
+then use `write_dataset()` to write each chunk to a separate Parquet file—all 
+without needing to read the full CSV file into R.
+
+### Explicitly declare column names and data types
+
+You can specify the `schema` argument to `open_dataset()` to declare the columns 
+and their data types. This is useful if you have data files that have different 
+storage schema (for example, a column could be `int32` in one and `int8` in 
+another) and you want to ensure that the resulting Dataset has a specific type.
+
+To be clear, it's not necessary to specify a schema, even in this example of 
+mixed integer types, because the Dataset constructor will reconcile differences
+like these. The schema specification just lets you declare what you want the 
+result to be.
+
+### Explicitly declare partition format
 
 Similarly, you can provide a Schema in the `partitioning` argument of `open_dataset()`
 in order to declare the types of the virtual columns that define the partitions.
-This would be useful, in our taxi dataset example, if you wanted to keep
-"month" as a string instead of an integer for some reason.
+This would be useful, in the taxi dataset example, if you wanted to keep
+`month` as a string instead of an integer.
+
+### Work with multiple data sources
 
 Another feature of Datasets is that they can be composed of multiple data sources.
 That is, you may have a directory of partitioned Parquet files in one location,
@@ -303,28 +335,31 @@ instead of a file path, or simply concatenate them like `big_dataset <- c(ds1, d
 
 As you can see, querying a large dataset can be made quite fast by storage in an
 efficient binary columnar format like Parquet or Feather and partitioning based on
-columns commonly used for filtering. However, we don't always get our data delivered
-to us that way. Sometimes we start with one giant CSV. Our first step in analyzing data
+columns commonly used for filtering. However, data isn't always stored that way.
+Sometimes you might start with one giant CSV. The first step in analyzing data 
 is cleaning is up and reshaping it into a more usable form.
 
-The `write_dataset()` function allows you to take a Dataset or other tabular data object---an Arrow `Table` or `RecordBatch`, or an R `data.frame`---and write it to a different file format, partitioned into multiple files.
+The `write_dataset()` function allows you to take a Dataset or another tabular 
+data object—an Arrow Table or RecordBatch, or an R data frame—and write
+it to a different file format, partitioned into multiple files.
 
-Assume we have a version of the NYC Taxi data as CSV:
+Assume that you have a version of the NYC Taxi data as CSV:
 
 ```r
 ds <- open_dataset("nyc-taxi/csv/", format = "csv")
 ```
 
-We can write it to a new location and translate the files to the Feather format
+You can write it to a new location and translate the files to the Feather format
 by calling `write_dataset()` on it:
 
 ```r
 write_dataset(ds, "nyc-taxi/feather", format = "feather")
 ```
 
-Next, let's imagine that the "payment_type" column is something we often filter on,
-so we want to partition the data by that variable. By doing so we ensure that a filter like
-`payment_type == 3` will touch only a subset of files where payment_type is always 3.
+Next, let's imagine that the `payment_type` column is something you often filter
+on, so you want to partition the data by that variable. By doing so you ensure
+that a filter like `payment_type == "Cash"` will touch only a subset of files
+where `payment_type` is always `"Cash"`.
 
 One natural way to express the columns you want to partition on is to use the
 `group_by()` method:
@@ -339,39 +374,41 @@ This will write files to a directory tree that looks like this:
 
 ```r
 system("tree nyc-taxi/feather")
+```
 
-# feather
-# ├── payment_type=1
-# │   └── part-5.feather
-# ├── payment_type=2
-# │   └── part-0.feather
-# ...
-# └── payment_type=5
-#     └── part-2.feather
-#
-# 5 directories, 25 files
+```
+## feather
+## ├── payment_type=1
+## │   └── part-18.feather
+## ├── payment_type=2
+## │   └── part-19.feather
+## ...
+## └── payment_type=UNK
+##     └── part-17.feather
+##
+## 18 directories, 23 files
 ```
 
-Note that the directory names are `payment_type=1` and similar:
+Note that the directory names are `payment_type=Cash` and similar:
 this is the Hive-style partitioning described above. This means that when
-we call `open_dataset()` on this directory, we don't have to declare what the
+you call `open_dataset()` on this directory, you don't have to declare what the
 partitions are because they can be read from the file paths.
-(To instead write bare values for partition segments,
-i.e. `1` rather than `payment_type=1`, call `write_dataset()` with `hive_style = FALSE`.)
+(To instead write bare values for partition segments, i.e. `Cash` rather than 
+`payment_type=Cash`, call `write_dataset()` with `hive_style = FALSE`.)
 
-Perhaps, though, `payment_type == 3` is the only data we ever care about,
-and we just want to drop the rest and have a smaller working set.
-For this, we can `filter()` them out when writing:
+Perhaps, though, `payment_type == "Cash"` is the only data you ever care about,
+and you just want to drop the rest and have a smaller working set.
+For this, you can `filter()` them out when writing:
 
 ```r
 ds %>%
-  filter(payment_type == 3) %>%
+  filter(payment_type == "Cash") %>%
   write_dataset("nyc-taxi/feather", format = "feather")
 ```
 
-The other thing we can do when writing datasets is select a subset of and/or reorder
-columns. Suppose we never care about `vendor_id`, and being a string column,
-it can take up a lot of space when we read it in, so let's drop it:
+The other thing you can do when writing datasets is select a subset of columns 
+or reorder them. Suppose you never care about `vendor_id`, and being a string column,
+it can take up a lot of space when you read it in, so let's drop it:
 
 ```r
 ds %>%
@@ -381,4 +418,4 @@ ds %>%
 ```
 
 Note that while you can select a subset of columns,
-you cannot currently rename columns when writing.
+you cannot currently rename columns when writing a dataset.
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
new file mode 100644
index 00000000000..f5435c06797
--- /dev/null
+++ b/r/vignettes/developing.Rmd
@@ -0,0 +1,542 @@
+---
+title: "Arrow R Developer Guide"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Arrow R Developer Guide}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup-options, include=FALSE}
+knitr::opts_chunk$set(error = TRUE, eval = FALSE)
+
+# Get environment variables describing what to evaluate
+run <- tolower(Sys.getenv("RUN_DEVDOCS", "false")) == "true"
+macos <- tolower(Sys.getenv("DEVDOCS_MACOS", "false")) == "true"
+ubuntu <- tolower(Sys.getenv("DEVDOCS_UBUNTU", "false")) == "true"
+sys_install <- tolower(Sys.getenv("DEVDOCS_SYSTEM_INSTALL", "false")) == "true"
+
+# Update the source knit_hook to save the chunk (if it is marked to be saved)
+knit_hooks_source <- knitr::knit_hooks$get("source")
+knitr::knit_hooks$set(source = function(x, options) {
+  # Extra paranoia about when this will write the chunks to the script, we will
+  # only save when:
+  #   * CI is true
+  #   * RUN_DEVDOCS is true
+  #   * options$save is TRUE (and a check that not NULL won't crash it)
+  if (as.logical(Sys.getenv("CI", FALSE)) && run && !is.null(options$save) && options$save)
+    cat(x, file = "script.sh", append = TRUE, sep = "\n")
+  # but hide the blocks we want hidden:
+  if (!is.null(options$hide) && options$hide) {
+    return(NULL)
+  }
+  knit_hooks_source(x, options)
+})
+```
+
+```{bash, save=run, hide=TRUE}
+# Stop on failure, echo input as we go
+set -e
+set -x
+```
+
+If you're looking to contribute to `arrow`, this document can help you set up a development environment that will enable you to write code and run tests locally. It outlines how to build the various components that make up the Arrow project and R package, as well as some common troubleshooting and workflows developers use. Many contributions can be accomplished with the instructions in [R-only development](#r-only-development). But if you're working on both the C++ library and the R package, the [Developer environment setup](#-developer-environment-setup) section will guide you through setting up a developer environment.
+
+This document is intended only for developers of Apache Arrow or the Arrow R package. Users of the package in R do not need to do any of this setup. If you're looking for how to install Arrow, see [the instructions in the readme](https://arrow.apache.org/docs/r/#installation); Linux users can find more details on building from source at `vignette("install", package = "arrow")`.
+
+This document is a work in progress and will grow + change as the Apache Arrow project grows and changes. We have tried to make these steps as robust as possible (in fact, we even test exactly these instructions on our nightly CI to ensure they don't become stale!), but certain custom configurations might conflict with these instructions and there are differences of opinion across developers about if and what the one true way to set up development environments like this is.  We also solicit any feedback you have about things that are confusing or additions you would like to see here. Please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues) if there you see anything that is confusing, odd, or just plain wrong.
+
+## R-only development
+
+Windows and macOS users who wish to contribute to the R package and
+don’t need to alter the Arrow C++ library may be able to obtain a
+recent version of the library without building from source. On macOS,
+you may install the C++ library using [Homebrew](https://brew.sh/):
+
+``` shell
+# For the released version:
+brew install apache-arrow
+# Or for a development version, you can try:
+brew install apache-arrow --HEAD
+```
+
+On Windows and Linux, you can download a .zip file with the arrow dependencies from the
+nightly repository.
+Windows users then can set the `RWINLIB_LOCAL` environment variable to point to that
+zip file before installing the `arrow` R package. On Linux, you'll need to create a `libarrow` directory inside the R package directory and unzip that file into it. Version numbers in that
+repository correspond to dates, and you will likely want the most recent.
+
+To see what nightlies are available, you can use Arrow's (or any other S3 client's) S3 listing functionality to see what is in the bucket `s3://arrow-r-nightly/libarrow/bin`:
+
+```
+nightly <- s3_bucket("arrow-r-nightly")
+nightly$ls("libarrow/bin")
+```
+
+## Developer environment setup
+
+If you need to alter both the Arrow C++ library and the R package code, or if you can’t get a binary version of the latest C++ library elsewhere, you’ll need to build it from source too. This section discusses how to set up a C++ build configured to work with the R package. For more general resources, see the [Arrow C++ developer
+guide](https://arrow.apache.org/docs/developers/cpp/building.html).
+
+There are four major steps to the process — the first three are relevant to all Arrow developers, and the last one is specific to the R bindings:
+
+1. Configuring the Arrow library build (using `cmake`) — this specifies how you want the build to go, what features to include, etc.
+2. Building the Arrow library — this actually compiles the Arrow library
+3. Install the Arrow library — this organizes and moves the compiled Arrow library files into the location specified in the configuration
+4. Building the R package — this builds the C++ code in the R package, and installs the R package for you
+
+### Install dependencies {.tabset}
+
+The Arrow C++ library will by default use system dependencies if suitable versions are found; if they are not present, it will build them during its own build process. The only dependencies that one needs to install outside of the build process are `cmake` (for configuring the build) and `openssl` if you are building with S3 support.
+
+For a faster build, you may choose to install on the system more C++ library dependencies (such as `lz4`, `zstd`, etc.) so that they don't need to be built from source in the Arrow build. This is optional.
+
+#### macOS
+```{bash, save=run & macos}
+brew install cmake openssl
+```
+
+#### Ubuntu
+```{bash, save=run & ubuntu}
+sudo apt install -y cmake libcurl4-openssl-dev libssl-dev
+```
+
+### Configure the Arrow build {.tabset}
+
+You can choose to build and then install the Arrow library into a user-defined directory or into a system-level directory. You only need to do one of these two options.
+
+It is recommended that you install the arrow library to a user-level directory to be used in development. This is so that the development version you are using doesn't overwrite a released version of Arrow you may have installed. You are also able to have more than one version of the Arrow library to link to with this approach (by using different `ARROW_HOME` directories for the different versions). This approach also matches the recommendations for other Arrow bindings like [Python](http://arrow.apache.org/docs/developers/python.html).
+
+#### Configure for installing to a user directory
+
+In this example we will install it to a directory called `dist` that has the same parent as our `arrow` checkout, but it could be named or located anywhere you would like. However, note that your installation of the Arrow R package will point to this directory and need it to remain intact for the package to continue to work. This is one reason we recommend *not* placing it inside of the arrow git checkout.
+
+```{bash, save=run & !sys_install}
+export ARROW_HOME=$(pwd)/dist
+mkdir $ARROW_HOME
+```
+
+_Special instructions on Linux:_ You will need to set `LD_LIBRARY_PATH` to the `lib` directory that is under where we set `$ARROW_HOME`, before launching R and using Arrow. One way to do this is to add it to your profile (we use `~/.bash_profile` here, but you might need to put this in a different file depending on your setup, e.g. if you use a shell other than `bash`). On macOS we do not need to do this because the macOS shared library paths are hardcoded to their locations during build time.
+
+```{bash, save=run & ubuntu & !sys_install}
+export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH
+echo "export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH" >> ~/.bash_profile
+```
+
+Now we can move into the arrow repository to start the build process. You will need to create a directory into which the C++ build will put its contents. It is recommended to make a `build` directory inside of the `cpp` directory of the Arrow git repository (it is git-ignored, so you won't accidentally check it in). And then, change directories to be inside `cpp/build`:
+
+```{bash, save=run & !sys_install}
+pushd arrow
+mkdir -p cpp/build
+pushd cpp/build
+```
+
+You’ll first call `cmake` to configure the build and then `make install`. For the R package, you’ll need to enable several features in the C++ library using `-D` flags:
+
+```{bash, save=run & !sys_install}
+cmake \
+  -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DARROW_COMPUTE=ON \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
+  -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
+  -DARROW_JEMALLOC=ON \
+  -DARROW_JSON=ON \
+  -DARROW_PARQUET=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZLIB=ON \
+  ..
+```
+
+`..` refers to the C++ source directory: we're in `cpp/build`, and the source is in `cpp`.
+
+#### Configure to install to a system directory
+
+If you would like to install Arrow as a system library you can do that as well. This is in some respects simpler, but if you already have Arrow libraries installed there, it would disrupt them and possibly require `sudo` permissions.
+
+Now we can move into the arrow repository to start the build process. You will need to create a directory into which the C++ build will put its contents. It is recommended to make a `build` directory inside of the `cpp` directory of the Arrow git repository (it is git-ignored, so you won't accidentally check it in). And then, change directories to be inside `cpp/build`:
+
+```{bash, save=run & sys_install}
+pushd arrow
+mkdir -p cpp/build
+pushd cpp/build
+```
+
+You’ll first call `cmake` to configure the build and then `make install`. For the R package, you’ll need to enable several features in the C++ library using `-D` flags:
+
+```{bash, save=run & sys_install}
+cmake \
+  -DARROW_COMPUTE=ON \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
+  -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
+  -DARROW_JEMALLOC=ON \
+  -DARROW_JSON=ON \
+  -DARROW_PARQUET=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZLIB=ON \
+  ..
+```
+
+`..` refers to the C++ source directory: we're in `cpp/build`, and the source is in `cpp`.
+
+### More Arrow features
+
+To enable optional features including: S3 support, an alternative memory allocator, and additional compression libraries, add some or all of these flags (the trailing `\` makes them easier to paste into a bash shell on a new line):
+
+``` shell
+  -DARROW_MIMALLOC=ON \
+  -DARROW_S3=ON \
+  -DARROW_WITH_BROTLI=ON \
+  -DARROW_WITH_BZ2=ON \
+  -DARROW_WITH_LZ4=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZSTD=ON \
+```
+
+Other flags that may be useful:
+
+* `-DBoost_SOURCE=BUNDLED` and `-DThrift_SOURCE=bundled`, for example, or any other dependency `*_SOURCE`, if you have a system version of a C++ dependency that doesn't work correctly with Arrow. This tells the build to compile its own version of the dependency from source.
+* `-DCMAKE_BUILD_TYPE=debug` or `-DCMAKE_BUILD_TYPE=relwithdebinfo` can be useful for debugging. You probably don't want to do this generally because a debug build is much slower at runtime than the default `release` build.
+
+_Note_ `cmake` is particularly sensitive to whitespacing, if you see errors, check that you don't have any errant whitespace around
+
+### Build Arrow
+
+You can add `-j#` between `make` and `install` here too to speed up compilation by running in parallel (where `#` is the number of cores you have available).
+
+```{bash, save=run & !(sys_install & ubuntu)}
+make -j8 install
+```
+
+If you are installing on linux, and you are installing to the system, you may
+need to use `sudo`:
+
+```{bash, save=run & sys_install & ubuntu}
+sudo make install
+```
+
+
+### Build the Arrow R package
+
+Once you’ve built the C++ library, you can install the R package and its
+dependencies, along with additional dev dependencies, from the git
+checkout:
+
+```{bash, save=run}
+popd # To go back to the root directory of the project, from cpp/build
+
+pushd r
+R -e 'install.packages("remotes"); remotes::install_deps(dependencies = TRUE)'
+
+R CMD INSTALL .
+```
+
+### Compilation flags
+
+If you need to set any compilation flags while building the C++
+extensions, you can use the `ARROW_R_CXXFLAGS` environment variable. For
+example, if you are using `perf` to profile the R extensions, you may
+need to set
+
+``` shell
+export ARROW_R_CXXFLAGS=-fno-omit-frame-pointer
+```
+
+### Developer Experience
+
+With the setups described here, you should not need to rebuild the Arrow library or even the C++ source in the R package as you iterated and work on the R package. The only time those should need to be rebuilt is if you have changed the C++ in the R package (and even then, `R CMD INSTALL .` should only need to recompile the files that have changed) _or_ if the Arrow library C++ has changed and there is a mismatch between the Arrow Library and the R package. If you find yourself rebuilding either or both each time you install the package or run tests, something is probably wrong with your set up.
+
+<details>
+<summary>For a full build: a `cmake` command with all of the R-relevant optional dependencies turned on. Development with other languages might require different flags as well. For example, to develop Python, you would need to also add `-DARROW_PYTHON=ON` (though all of the other flags used for Python are already included here).</summary>
+<p>
+
+``` shell
+cmake \
+  -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DARROW_COMPUTE=ON \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
+  -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
+  -DARROW_JEMALLOC=ON \
+  -DARROW_JSON=ON \
+  -DARROW_MIMALLOC=ON \
+  -DARROW_PARQUET=ON \
+  -DARROW_S3=ON \
+  -DARROW_WITH_BROTLI=ON \
+  -DARROW_WITH_BZ2=ON \
+  -DARROW_WITH_LZ4=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZLIB=ON \
+  -DARROW_WITH_ZSTD=ON \
+  ..
+```
+</p>
+</details>
+
+### Documentation
+
+The documentation for the R package uses features of `roxygen2` that haven't yet been released on CRAN, such as conditional inclusion of examples via the `@examplesIf` tag.  If you are making changes which require updating the documentation, please install the development version of `roxygen2` from GitHub.
+
+```{r}
+remotes::install_github("r-lib/roxygen2")
+```
+
+## Troubleshooting
+
+Note that after any change to the C++ library, you must reinstall it and
+run `make clean` or `git clean -fdx .` to remove any cached object code
+in the `r/src/` directory before reinstalling the R package. This is
+only necessary if you make changes to the C++ library source; you do not
+need to manually purge object files if you are only editing R or C++
+code inside `r/`.
+
+### Arrow library-R package mismatches
+
+If the Arrow library and the R package have diverged, you will see errors like:
+
+```
+Error: package or namespace load failed for ‘arrow’ in dyn.load(file, DLLpath = DLLpath, ...):
+ unable to load shared object '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so':
+  dlopen(/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so, 6): Symbol not found: __ZN5arrow2io16RandomAccessFile9ReadAsyncERKNS0_9IOContextExx
+  Referenced from: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so
+  Expected in: flat namespace
+ in /Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so
+Error: loading failed
+Execution halted
+ERROR: loading failed
+```
+
+To resolve this, try rebuilding the Arrow library from [Building Arrow above](#building-arrow).
+
+### Multiple versions of Arrow library
+
+If rebuilding the Arrow library doesn't work and you are [installing from a user-level directory](#installing-to-another-directory) and you already have a previous installation of libarrow in a system directory or you get you may get errors like the following when you install the R package:
+
+```
+Error: package or namespace load failed for ‘arrow’ in dyn.load(file, DLLpath = DLLpath, ...):
+ unable to load shared object '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so':
+  dlopen(/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: /usr/local/lib/libarrow.400.dylib
+  Referenced from: /usr/local/lib/libparquet.400.dylib
+  Reason: image not found
+```
+
+You need to make sure that you don't let R link to your system library when building arrow. You can do this a number of different ways:
+
+* Setting the `MAKEFLAGS` environment variable to `"LDFLAGS="` (see below for an example) this is the recommended way to accomplish this
+* Using {withr}'s `with_makevars(list(LDFLAGS = ""), ...)`
+* adding `LDFLAGS=` to your `~/.R/Makevars` file (the least recommended way, though it is a common debugging approach suggested online)
+
+```{bash, save=run & !sys_install & macos, hide=TRUE}
+# Setup troubleshooting section
+# install a system-level arrow on macOS
+brew install apache-arrow
+```
+
+
+```{bash, save=run & !sys_install & ubuntu, hide=TRUE}
+# Setup troubleshooting section
+# install a system-level arrow on Ubuntu
+sudo apt update
+sudo apt install -y -V ca-certificates lsb-release wget
+wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+sudo apt update
+sudo apt install -y -V libarrow-dev
+```
+
+```{bash, save=run & !sys_install & macos}
+MAKEFLAGS="LDFLAGS=" R CMD INSTALL .
+```
+
+
+### `rpath` issues
+
+If the package fails to install/load with an error like this:
+
+```
+  ** testing if installed package can be loaded from temporary location
+  Error: package or namespace load failed for 'arrow' in dyn.load(file, DLLpath = DLLpath, ...):
+  unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
+  dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
+```
+
+ensure that `-DARROW_INSTALL_NAME_RPATH=OFF` was passed (this is important on
+macOS to prevent problems at link time and is a no-op on other platforms).
+Alternatively, try setting the environment variable `R_LD_LIBRARY_PATH` to
+wherever Arrow C++ was put in `make install`, e.g. `export
+R_LD_LIBRARY_PATH=/usr/local/lib`, and retry installing the R package.
+
+When installing from source, if the R and C++ library versions do not
+match, installation may fail. If you’ve previously installed the
+libraries and want to upgrade the R package, you’ll need to update the
+Arrow C++ library first.
+
+For any other build/configuration challenges, see the [C++ developer
+guide](https://arrow.apache.org/docs/developers/cpp/building.html).
+
+
+## Using `remotes::install_github(...)`
+
+If you need an Arrow installation from a specific repository or at a specific ref,
+`remotes::install_github("apache/arrow/r", build = FALSE)`
+should work on most platforms (with the notable exception of Windows).
+The `build = FALSE` argument is important so that the installation can access the
+C++ source in the `cpp/` directory in `apache/arrow`.
+
+As with other installation methods, setting the environment variables `LIBARROW_MINIMAL=false` and `ARROW_R_DEV=true` will provide a more full-featured version of Arrow and provide more verbose output, respectively.
+
+For example, to install from the (fictional) branch `bugfix` from `apache/arrow` one could:
+
+```r
+Sys.setenv(LIBARROW_MINIMAL="false")
+remotes::install_github("apache/arrow/r@bugfix", build = FALSE)
+```
+
+Developers may wish to use this method of installing a specific commit
+separate from another Arrow development environment or system installation
+(e.g. we use this in [arrowbench](https://github.com/ursacomputing/arrowbench) to install development versions of arrow isolated from the system install). If you already have Arrow C++ libraries installed system-wide, you may need to set some additional variables in order to isolate this build from your system libraries:
+
+* Setting the environment variable `FORCE_BUNDLED_BUILD` to `true` will skip the `pkg-config` search for Arrow libraries and attempt to build from the same source at the repository+ref given.
+* You may also need to set the Makevars `CPPFLAGS` and `LDFLAGS` to `""` in order to prevent the installation process from attempting to link to already installed system versions of Arrow. One way to do this temporarily is wrapping your `remotes::install_github()` call like so: `withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github(...))`.
+
+## What happens when you `R CMD INSTALL`?
+
+There are a number of scripts that are triggered when `R CMD INSTALL .`. For Arrow users, these should all just work without configuration and pull in the most complete pieces (e.g. official binaries that we host) so the installation process is easy. However knowing about these scripts can help troubleshoot if things go wrong in them or things go wrong in an install:
+
+* `configure` and `configure.win` These scripts are triggered during `R CMD INSTALL .` on non-Windows and Windows platforms, respectively. They handle finding the Arrow library, setting up the build variables necessary, and writing the package Makevars file that is used to compile the C++ code in the R package.
+* `tools/nixlibs.R` This script is sometimes called by `configure` on Linux (or on any non-windows OS with the environment variable `FORCE_BUNDLED_BUILD=true`). This sets up the build process for our bundled builds (which is the default on linux). The operative logic is at the end of the script, but it will do the following (and it will stop with the first one that succeeds and some of the steps are only checked if they are enabled via an environment variable):
+  * Check if there is an already built libarrow in `arrow/r/libarrow-{version}`, use that to link against if it exists.
+  * Check if a binary is available from our hosted unofficial builds.
+  * Download the Arrow source and build the Arrow Library from source.
+  * `*** Proceed without C++` dependencies (this is an error and the package will not work, but if you see this message you know the previous steps have not succeeded/were not enabled)
+* `inst/build_arrow_static.sh` this script builds Arrow for a bundled, static build. It is called by `tools/nixlibs.R` when the Arrow library is being built. (If you're looking at this script, and you've gotten this far, it should look _incredibly_ familiar: it's basically the contents of this guide in script form — with a few important changes)
+
+## Styling and linting of the R code in the R package
+
+The R code in the package follows [the tidyverse style](https://style.tidyverse.org/). On PR submission (and on pushes) our CI will run linting and will flag possible errors on the pull request with annotations.
+
+To run the [lintr](https://github.com/jimhester/lintr) locally, install the lintr package (note, we currently use a fork that includes fixes not yet accepted upstream, see how lintr is being installed in the file `ci/docker/linux-apt-lint.dockerfile` for the current status) and then run `lintr::lint_package("arrow/r")`.
+
+One can automatically change the formatting of the code in the package using the [styler](https://styler.r-lib.org/) package. There are two ways to do this:
+
+1. Use the comment bot to do this automatically with the command `@github-actions autotune` on a PR and commit it back to the branch.
+2. Locally, with the command `make style` (for only the files changed), `make style-all` (for all files), or use `styler::style_pkg(exclude_files = c("tests/testthat/latin1.R", "data-raw/codegen.R"))` note the two excluded files which should not be styled.
+
+The styler package will fix many styling errors, thought not all lintr errors are automatically fixable with styler. The list of files we habitually do not style is in `r/.styler_excludes.R`.
+
+## Editing C++ code in the R package
+
+The `arrow` package uses some customized tools on top of `cpp11` to prepare its
+C++ code in `src/`. This is because we have some features that are only enabled
+and built conditionally during build time. If you change C++ code in the R
+package, you will need to set the `ARROW_R_DEV` environment variable to `true`
+(optionally, add it to your `~/.Renviron` file to persist across sessions) so
+that the `data-raw/codegen.R` file is used for code generation. The `Makefile`
+commands also handles this automatically.
+
+We use Google C++ style in our C++ code. The easiest way to accomplish this is
+use an editors/IDE that formats your code for you. Many popular editors/IDEs
+have support for running `clang-format` on C++ files when you save them.
+Installing/enabling the appropriate plugin may save you much frustration.
+
+Check for style errors with
+
+``` shell
+./lint.sh
+```
+
+Fix any style issues before committing with
+
+``` shell
+./lint.sh --fix
+```
+
+The lint script requires Python 3 and `clang-format-8`. If the command
+isn’t found, you can explicitly provide the path to it like
+`CLANG_FORMAT=$(which clang-format-8) ./lint.sh`. On macOS, you can get
+this by installing LLVM via Homebrew and running the script as
+`CLANG_FORMAT=$(brew --prefix llvm@8)/bin/clang-format ./lint.sh`
+
+_Note_ that the lint script requires Python 3 and the Python dependencies
+(note that `cmake_format is pinned to a specific version):
+
+* autopep8
+* flake8
+* cmake_format==0.5.2
+
+## Running tests
+
+Some tests are conditionally enabled based on the availability of certain
+features in the package build (S3 support, compression libraries, etc.).
+Others are generally skipped by default but can be enabled with environment
+variables or other settings:
+
+* All tests are skipped on Linux if the package builds without the C++ libarrow.
+  To make the build fail if libarrow is not available (as in, to test that
+  the C++ build was successful), set `TEST_R_WITH_ARROW=true`
+* Some tests are disabled unless `ARROW_R_DEV=true`
+* Tests that require allocating >2GB of memory to test Large types are disabled
+  unless `ARROW_LARGE_MEMORY_TESTS=true`
+* Integration tests against a real S3 bucket are disabled unless credentials
+  are set in `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`; these are available
+  on request
+* S3 tests using [MinIO](https://min.io/) locally are enabled if the
+  `minio server` process is found running. If you're running MinIO with custom
+  settings, you can set `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, and
+  `MINIO_PORT` to override the defaults.
+
+## Github workflows
+
+On a pull request, there are some actions you can trigger by commenting on the PR. We have additional CI checks that run nightly and can be requested on demand using an internal tool called [crosssbow](https://arrow.apache.org/docs/developers/crossbow.html). A few important GitHub comment commands include:
+
+* `@github-actions crossbow submit -g r` for all extended R CI tests
+* `@github-actions crossbow submit {task-name}` for running a specific task. See the `r:` group definition near the beginning of the [crossbow configuration](https://github.com/apache/arrow/blob/master/dev/tasks/tasks.yml) for a list of glob expression patterns that match names of items in the `tasks:` list below it.
+* `@github-actions autotune` will run and fix lint c++ linting errors + run R documentation (among other cleanup tasks) and commit them to the branch
+
+
+## Useful functions for Arrow developers
+
+Within an R session, these can help with package development:
+
+``` r
+# Load the dev package
+devtools::load_all()
+
+# Run the test suite, optionally filtering file names
+devtools::test(filter="^regexp$")
+# or the Makefile alternative from the arrow/r directory in a shell:
+make test file=regexp
+
+# Update roxygen documentation
+devtools::document()
+
+# To preview the documentation website
+pkgdown::build_site()
+
+# All package checks; see also below
+devtools::check()
+
+# See test coverage statistics
+covr::report()
+covr::package_coverage()
+```
+
+Any of those can be run from the command line by wrapping them in `R -e
+'$COMMAND'`. There’s also a `Makefile` to help with some common tasks
+from the command line (`make test`, `make doc`, `make clean`, etc.)
+
+### Full package validation
+
+``` shell
+R CMD build .
+R CMD check arrow_*.tar.gz --as-cran
+```
diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd
index c68136911da..47ae8944b71 100644
--- a/r/vignettes/install.Rmd
+++ b/r/vignettes/install.Rmd
@@ -1,8 +1,8 @@
 ---
-title: "Installing the Arrow Package"
+title: "Installing the Arrow Package on Linux"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Installing the Arrow Package}
+  %\VignetteIndexEntry{Installing the Arrow Package on Linux}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
@@ -168,31 +168,6 @@ which calls `tools/nixlibs.R`.
 If the C++ library is built from source, `inst/build_arrow_static.sh` is executed.
 This build script is also what is used to generate the prebuilt binaries.
 
-
-# Using `remotes::install_github(...)`
-
-If you need an Arrow installation from a specific repository or at a specific ref,
-`remotes::install_github("apache/arrow/r", build = FALSE)`
-should work on most platforms (with the notable exception of Windows).
-The `build = FALSE` argument is important so that the installation can access the
-C++ source in the `cpp/` directory in `apache/arrow`.
-
-As with other installation methods, setting the environment variables `LIBARROW_MINIMAL=false` and `ARROW_R_DEV=true` will provide a more full-featured version of Arrow and provide more verbose output, respectively.
-
-For example, to install from the (fictional) branch `bugfix` from `apache/arrow` one could:
-
-```r
-Sys.setenv(LIBARROW_MINIMAL="false")
-remotes::install_github("apache/arrow/r@bugfix", build = FALSE)
-```
-
-Developers may wish to use this method of installing a specific commit
-separate from another Arrow development environment or system installation
-(e.g. we use this in [arrowbench](https://github.com/ursacomputing/arrowbench) to install development versions of arrow isolated from the system install). If you already have Arrow C++ libraries installed system-wide, you may need to set some additional variables in order to isolate this build from your system libraries:
-
-* Setting the environment variable `FORCE_BUNDLED_BUILD` to `true` will skip the `pkg-config` search for Arrow libraries and attempt to build from the same source at the repository+ref given.
-* You may also need to set the Makevars `CPPFLAGS` and `LDFLAGS` to `""` in order to prevent the installation process from attempting to link to already installed system versions of Arrow. One way to do this temporarily is wrapping your `remotes::install_github()` call like so: `withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github(...))`.
-
 # Troubleshooting
 
 The intent is that `install.packages("arrow")` will just work and handle all C++
@@ -244,30 +219,6 @@ Similarly, if you're using Arrow system libraries, running `update.packages()`
 after a new release of the `arrow` package will likely fail unless you first
 update the system packages.
 
-## Using a local Arrow C++ build
-
-If you've built the Arrow C++ libraries locally from source
-but haven't installed them where `pkg-config` will find them,
-there are a few options for telling the R package how to locate them.
-You can set `PKG_CONFIG_PATH` to `/path/to/your/installation/pkgconfig`
-(that is, `PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/pkgconfig`,
-if you've set those variables).
-Alternatively, you can set the `INCLUDE_DIR` and `LIB_DIR` environment variables
-to point to their location.
-
-If the package fails to install/load with an error like this:
-
-```
-** testing if installed package can be loaded from temporary location
-Error: package or namespace load failed for 'arrow' in dyn.load(file, DLLpath = DLLpath, ...):
-unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
-dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
-```
-
-try setting the environment variable `R_LD_LIBRARY_PATH` to wherever Arrow C++
-was put in `make install`, e.g. `export R_LD_LIBRARY_PATH=/usr/local/lib`, and
-retry installing the R package.
-
 ## Using prebuilt binaries
 
 If the R package finds and downloads a prebuilt binary of the C++ library,
@@ -335,6 +286,17 @@ See discussion [here](https://issues.apache.org/jira/browse/ARROW-8556).
 
 ## Summary of build environment variables
 
+Some features are optional when you build Arrow from source. With the exception of `ARROW_S3`, these are all `ON` by default in the bundled C++ build, but you can set them to `OFF` to disable them.
+
+* `ARROW_S3`: If set to `ON` S3 support will be built as long as the 
+  dependencies are met; if they are not met, the build script will turn this `OFF` 
+* `ARROW_JEMALLOC` for the `jemalloc` memory allocator
+* `ARROW_PARQUET`
+* `ARROW_DATASET`
+* `ARROW_WITH_RE2` for the RE2 regular expression library, used in some string compute functions
+* `ARROW_WITH_UTF8PROC` for the UTF8Proc string library, used in many other string compute functions
+
+There are a number of other variables that affect the `configure` script and the bundled build script.
 By default, these are all unset. All boolean variables are case-insensitive.
 
 * `ARROW_USE_PKG_CONFIG`: If set to `false`, the configure script
@@ -379,7 +341,8 @@ By default, these are all unset. All boolean variables are case-insensitive.
   The directory will be created if it does not exist.
 * `CMAKE`: When building the C++ library from source, you can specify a
   `/path/to/cmake` to use a different version than whatever is found on the `$PATH`
-
+  
+  
 # Contributing
 
 As mentioned above, please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues)
diff --git a/ruby/red-arrow-cuda/.gitignore b/ruby/red-arrow-cuda/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-arrow-cuda/.gitignore
+++ b/ruby/red-arrow-cuda/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
index d4031e51793..dbaf09a029f 100644
--- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowCUDA
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-dataset/.gitignore b/ruby/red-arrow-dataset/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-arrow-dataset/.gitignore
+++ b/ruby/red-arrow-dataset/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
new file mode 100644
index 00000000000..a658fc3f2e0
--- /dev/null
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowDataset
+  class Dataset
+    class << self
+      def build(*args)
+        factory_class = ArrowDataset.const_get("#{name}Factory")
+        factory = factory_class.new(*args)
+        yield(factory)
+        factory.finish
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-fragment.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-fragment.rb
deleted file mode 100644
index 917d6c79d0d..00000000000
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-fragment.rb
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module ArrowDataset
-  class InMemoryFragment
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-    def initialize(schema, record_batches)
-      record_batches = record_batches.collect do |record_batch|
-        unless record_batch.is_a?(Arrow::RecordBatch)
-          record_batch = Arrow::RecordBatch.new(record_batch)
-        end
-        record_batch
-      end
-      initialize_raw(schema, record_batches)
-    end
-  end
-end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-scan-task.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-scan-task.rb
deleted file mode 100644
index 5e127e179c6..00000000000
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-scan-task.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module ArrowDataset
-  class InMemoryScanTask
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-    def initialize(record_batches, **options)
-      record_batches = record_batches.collect do |record_batch|
-        unless record_batch.is_a?(Arrow::RecordBatch)
-          record_batch = Arrow::RecordBatch.new(record_batch)
-        end
-        record_batch
-      end
-      options[:schema] ||= record_batches.first.schema
-      fragment = options.delete(:fragment)
-      fragment ||= InMemoryFragment.new(options[:schema], record_batches)
-      initialize_raw(record_batches, options, fragment)
-    end
-  end
-end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
index fcac52d268f..6a0dc5079d8 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
@@ -29,8 +29,7 @@ def post_load(repository, namespace)
     end
 
     def require_libraries
-      require "arrow-dataset/in-memory-scan-task"
-      require "arrow-dataset/scan-options"
+      require "arrow-dataset/dataset"
     end
   end
 end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/scan-options.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/scan-options.rb
deleted file mode 100644
index 1467743655b..00000000000
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/scan-options.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module ArrowDataset
-  class ScanOptions
-    class << self
-      def try_convert(value)
-        case value
-        when Hash
-          return nil unless value.key?(:schema)
-          options = new(value[:schema])
-          value.each do |name, value|
-            next if name == :schema
-            options.__send__("#{name}=", value)
-          end
-          options
-        else
-          nil
-        end
-      end
-    end
-  end
-end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
index 0f1e3541aa5..56e57651f96 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowDataset
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-dataset/test/helper.rb b/ruby/red-arrow-dataset/test/helper.rb
index 795df3beb01..7231eb1cb64 100644
--- a/ruby/red-arrow-dataset/test/helper.rb
+++ b/ruby/red-arrow-dataset/test/helper.rb
@@ -17,4 +17,6 @@
 
 require "arrow-dataset"
 
+require "tmpdir"
+
 require "test-unit"
diff --git a/ruby/red-arrow-dataset/test/test-file-system-dataset.rb b/ruby/red-arrow-dataset/test/test-file-system-dataset.rb
new file mode 100644
index 00000000000..17cbcb88d74
--- /dev/null
+++ b/ruby/red-arrow-dataset/test/test-file-system-dataset.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFileSystemDataset < Test::Unit::TestCase
+  def setup
+    Dir.mktmpdir do |tmpdir|
+      @dir = tmpdir
+      @path = File.join(@dir, "table.arrow")
+      @table = Arrow::Table.new(visible: [true, false, true],
+                                point: [1, 2, 3])
+      @table.save(@path)
+      @format = ArrowDataset::IPCFileFormat.new
+      yield
+    end
+  end
+
+  test(".build") do
+    dataset = ArrowDataset::FileSystemDataset.build(@format) do |factory|
+      factory.file_system = Arrow::LocalFileSystem.new
+      factory.add_path(File.expand_path(@path))
+    end
+    assert_equal(@table, dataset.to_table)
+  end
+end
diff --git a/ruby/red-arrow-dataset/test/test-in-memory-scan-task.rb b/ruby/red-arrow-dataset/test/test-in-memory-scan-task.rb
deleted file mode 100644
index 37f041d3159..00000000000
--- a/ruby/red-arrow-dataset/test/test-in-memory-scan-task.rb
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestInMemoryScanTask < Test::Unit::TestCase
-  def setup
-    @record_batches = [
-      Arrow::RecordBatch.new(visible: [true, false, true],
-                             point: [1, 2, 3]),
-    ]
-  end
-
-  sub_test_case(".new") do
-    test("[[Arrow::RecordBatch]]") do
-      scan_task = ArrowDataset::InMemoryScanTask.new(@record_batches)
-      assert_equal(@record_batches,
-                   scan_task.execute.to_a)
-    end
-  end
-end
diff --git a/ruby/red-arrow-dataset/test/test-scan-options.rb b/ruby/red-arrow-dataset/test/test-scan-options.rb
deleted file mode 100644
index a9a947ff88d..00000000000
--- a/ruby/red-arrow-dataset/test/test-scan-options.rb
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestScanOptions < Test::Unit::TestCase
-  def setup
-    @record_batches = [
-      Arrow::RecordBatch.new(visible: [true, false, true],
-                             point: [1, 2, 3]),
-    ]
-    @schema = @record_batches.first.schema
-  end
-
-  sub_test_case(".try_convert") do
-    def test_hash
-      batch_size = 1024
-      context = ArrowDataset::ScanOptions.try_convert(schema: @schema,
-                                                      batch_size: batch_size)
-      assert_equal([@schema, batch_size],
-                   [context.schema, context.batch_size])
-    end
-  end
-end
diff --git a/ruby/red-arrow-flight/.gitignore b/ruby/red-arrow-flight/.gitignore
new file mode 100644
index 00000000000..779545d9026
--- /dev/null
+++ b/ruby/red-arrow-flight/.gitignore
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+/pkg/
diff --git a/ruby/red-arrow-flight/Gemfile b/ruby/red-arrow-flight/Gemfile
new file mode 100644
index 00000000000..7c4cefcf39d
--- /dev/null
+++ b/ruby/red-arrow-flight/Gemfile
@@ -0,0 +1,24 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source "https://rubygems.org/"
+
+gemspec
+
+gem "red-arrow", path: "../red-arrow"
diff --git a/ruby/red-arrow-flight/LICENSE.txt b/ruby/red-arrow-flight/LICENSE.txt
new file mode 100644
index 00000000000..d6456956733
--- /dev/null
+++ b/ruby/red-arrow-flight/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/ruby/red-arrow-flight/NOTICE.txt b/ruby/red-arrow-flight/NOTICE.txt
new file mode 100644
index 00000000000..e08aeda8a41
--- /dev/null
+++ b/ruby/red-arrow-flight/NOTICE.txt
@@ -0,0 +1,2 @@
+Apache Arrow
+Copyright 2016 The Apache Software Foundation
diff --git a/ruby/red-arrow-flight/README.md b/ruby/red-arrow-flight/README.md
new file mode 100644
index 00000000000..e81f50f9a4f
--- /dev/null
+++ b/ruby/red-arrow-flight/README.md
@@ -0,0 +1,50 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Red Arrow Flight - Apache Arrow Flight Ruby
+
+Red Arrow Flight is the Ruby bindings of Apache Arrow Flight. Red Arrow Flight is based on GObject Introspection.
+
+[Apache Arrow Flight](https://arrow.apache.org/) is one of Apache Arrow components to read and write semantic flights stored in different locations and formats.
+
+[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
+
+Red Arrow Flight uses [Apache Arrow Flight GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow Flight.
+
+Apache Arrow Flight GLib is a C wrapper for [Apache Arrow Flight C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow Flight C++ directly. Apache Arrow Flight GLib is a bridge between Apache Arrow Flight C++ and GObject Introspection.
+
+gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow Flight uses GObject Introspection via gobject-introspection gem.
+
+## Install
+
+Install Apache Arrow Flight GLib before install Red Arrow Flight. Install Apache Arrow GLib before install Red Arrow. See [Apache Arrow install document](https://arrow.apache.org/install/) for details.
+
+Install Red Arrow Flight after you install Apache Arrow Flight GLib:
+
+```console
+$ gem install red-arrow-flight
+```
+
+## Usage
+
+```ruby
+require "arrow-flight"
+
+# TODO
+```
diff --git a/ruby/red-arrow-flight/Rakefile b/ruby/red-arrow-flight/Rakefile
new file mode 100644
index 00000000000..2bbe6e7619c
--- /dev/null
+++ b/ruby/red-arrow-flight/Rakefile
@@ -0,0 +1,41 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "rubygems"
+require "bundler/gem_helper"
+
+base_dir = File.join(File.dirname(__FILE__))
+
+helper = Bundler::GemHelper.new(base_dir)
+helper.install
+
+release_task = Rake::Task["release"]
+release_task.prerequisites.replace(["build", "release:rubygem_push"])
+
+desc "Run tests"
+task :test do
+  cd(base_dir) do
+    cd("dependency-check") do
+      ruby("-S", "rake")
+    end
+    ruby("test/run-test.rb")
+  end
+end
+
+task default: :test
diff --git a/ruby/red-arrow-flight/dependency-check/Rakefile b/ruby/red-arrow-flight/dependency-check/Rakefile
new file mode 100644
index 00000000000..6aca19609e4
--- /dev/null
+++ b/ruby/red-arrow-flight/dependency-check/Rakefile
@@ -0,0 +1,47 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "pkg-config"
+require "native-package-installer"
+require_relative "../lib/arrow-flight/version"
+
+case RUBY_PLATFORM
+when /mingw|mswin/
+  task :default => "nothing"
+else
+  task :default => "dependency:check"
+end
+
+task :nothing do
+end
+
+namespace :dependency do
+  desc "Check dependency"
+  task :check do
+    unless PKGConfig.check_version?("arrow-flight-glib",
+                                    ArrowFlight::Version::MAJOR,
+                                    ArrowFlight::Version::MINOR,
+                                    ArrowFlight::Version::MICRO)
+      unless NativePackageInstaller.install(:debian => "libarrow-flight-glib-dev",
+                                            :redhat => "arrow-flight-glib-devel")
+        exit(false)
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight.rb b/ruby/red-arrow-flight/lib/arrow-flight.rb
new file mode 100644
index 00000000000..2070f354a68
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight.rb
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "arrow"
+
+require "arrow-flight/version"
+
+require "arrow-flight/loader"
+
+module ArrowFlight
+  class Error < StandardError
+  end
+
+  Loader.load
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/call-options.rb b/ruby/red-arrow-flight/lib/arrow-flight/call-options.rb
new file mode 100644
index 00000000000..2030b2d3306
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/call-options.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class CallOptions
+    class << self
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |name, value|
+            options.__send__("#{name}=", value)
+          end
+          options
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/client-options.rb b/ruby/red-arrow-flight/lib/arrow-flight/client-options.rb
new file mode 100644
index 00000000000..2294b2133bb
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/client-options.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class ClientOptions
+    class << self
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |name, value|
+            options.__send__("#{name}=", value)
+          end
+          options
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/loader.rb b/ruby/red-arrow-flight/lib/arrow-flight/loader.rb
new file mode 100644
index 00000000000..2e8878d696e
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/loader.rb
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class Loader < GObjectIntrospection::Loader
+    class << self
+      def load
+        super("ArrowFlight", ArrowFlight)
+      end
+    end
+
+    private
+    def post_load(repository, namespace)
+      require_libraries
+      self.class.start_callback_dispatch_thread
+    end
+
+    def require_libraries
+      require "arrow-flight/call-options"
+      require "arrow-flight/client-options"
+      require "arrow-flight/location"
+      require "arrow-flight/server-options"
+      require "arrow-flight/ticket"
+    end
+
+    def should_unlock_gvl?(info, klass)
+      true
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/location.rb b/ruby/red-arrow-flight/lib/arrow-flight/location.rb
new file mode 100644
index 00000000000..d49178d04a3
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/location.rb
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class Location
+    class << self
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/server-options.rb b/ruby/red-arrow-flight/lib/arrow-flight/server-options.rb
new file mode 100644
index 00000000000..f28aed87ec3
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/server-options.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class ServerOptions
+    class << self
+      def try_convert(value)
+        case value
+        when Location
+          new(value)
+        when Hash
+          return nil unless value.key?(:location)
+          options = new(value[:location])
+          value.each do |name, value|
+            next if name == :location
+            options.__send__("#{name}=", value)
+          end
+          options
+        else
+          value = Location.try_convert(value)
+          return nil if value.nil?
+          try_convert(value)
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/ticket.rb b/ruby/red-arrow-flight/lib/arrow-flight/ticket.rb
new file mode 100644
index 00000000000..92afad386e3
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/ticket.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class Ticket
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
new file mode 100644
index 00000000000..75e8b2e522a
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  VERSION = "6.0.0-SNAPSHOT"
+
+  module Version
+    numbers, TAG = VERSION.split("-")
+    MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+    STRING = VERSION
+  end
+end
diff --git a/ruby/red-arrow-flight/red-arrow-flight.gemspec b/ruby/red-arrow-flight/red-arrow-flight.gemspec
new file mode 100644
index 00000000000..efe868ca741
--- /dev/null
+++ b/ruby/red-arrow-flight/red-arrow-flight.gemspec
@@ -0,0 +1,52 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "lib/arrow-flight/version"
+
+Gem::Specification.new do |spec|
+  spec.name = "red-arrow-flight"
+  version_components = [
+    ArrowFlight::Version::MAJOR.to_s,
+    ArrowFlight::Version::MINOR.to_s,
+    ArrowFlight::Version::MICRO.to_s,
+    ArrowFlight::Version::TAG,
+  ]
+  spec.version = version_components.compact.join(".")
+  spec.homepage = "https://arrow.apache.org/"
+  spec.authors = ["Apache Arrow Developers"]
+  spec.email = ["dev@arrow.apache.org"]
+
+  spec.summary = "Red Arrow Flight is the Ruby bindings of Apache Arrow Flight"
+  spec.description =
+    "Apache Arrow Flight is a general-purpose client-server framework to " +
+    "simplify high performance transport of large datasets over " +
+    "network interfaces."
+  spec.license = "Apache-2.0"
+  spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"]
+  spec.files += ["LICENSE.txt", "NOTICE.txt"]
+  spec.files += Dir.glob("lib/**/*.rb")
+  spec.test_files += Dir.glob("test/**/*")
+  spec.extensions = ["dependency-check/Rakefile"]
+
+  spec.add_runtime_dependency("red-arrow", "= #{spec.version}")
+
+  spec.add_development_dependency("bundler")
+  spec.add_development_dependency("rake")
+  spec.add_development_dependency("test-unit")
+end
diff --git a/ruby/red-arrow-flight/test/helper.rb b/ruby/red-arrow-flight/test/helper.rb
new file mode 100644
index 00000000000..cddfdea5fb3
--- /dev/null
+++ b/ruby/red-arrow-flight/test/helper.rb
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "arrow-flight"
+
+require "test-unit"
+
+require_relative "helper/server"
diff --git a/ruby/red-arrow-flight/test/helper/info-generator.rb b/ruby/red-arrow-flight/test/helper/info-generator.rb
new file mode 100644
index 00000000000..e5430fd6b5e
--- /dev/null
+++ b/ruby/red-arrow-flight/test/helper/info-generator.rb
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+module Helper
+  class InfoGenerator
+    def page_view_table
+      Arrow::Table.new("count" => Arrow::UInt64Array.new([1, 2, 3]),
+                       "private" => Arrow::BooleanArray.new([true, false, true]))
+    end
+
+    def page_view_descriptor
+      ArrowFlight::PathDescriptor.new(["page-view"])
+    end
+
+    def page_view_ticket
+      "page-view"
+    end
+
+    def page_view_endpoints
+      locations = [
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10000"),
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10001"),
+      ]
+      [
+        ArrowFlight::Endpoint.new(page_view_ticket, locations),
+      ]
+    end
+
+    def page_view
+      table = page_view_table
+      descriptor = page_view_descriptor
+      endpoints = page_view_endpoints
+      output = Arrow::ResizableBuffer.new(0)
+      table.save(output, format: :stream)
+      ArrowFlight::Info.new(table.schema,
+                            descriptor,
+                            endpoints,
+                            table.n_rows,
+                            output.size)
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/test/helper/server.rb b/ruby/red-arrow-flight/test/helper/server.rb
new file mode 100644
index 00000000000..269bb5f3d78
--- /dev/null
+++ b/ruby/red-arrow-flight/test/helper/server.rb
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "info-generator"
+
+module Helper
+  class Server < ArrowFlight::Server
+    type_register
+
+    private
+    def virtual_do_list_flights(context, criteria)
+      generator = InfoGenerator.new
+      [generator.page_view]
+    end
+
+    def virtual_do_do_get(context, ticket)
+      generator = InfoGenerator.new
+      if ticket.data.to_s != generator.page_view_ticket
+        raise Arrow::Error::Invalid.new("invalid ticket")
+      end
+      table = generator.page_view_table
+      ArrowFlight::RecordBatchStream.new(table)
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/test/run-test.rb b/ruby/red-arrow-flight/test/run-test.rb
new file mode 100755
index 00000000000..48d2c49e1d4
--- /dev/null
+++ b/ruby/red-arrow-flight/test/run-test.rb
@@ -0,0 +1,50 @@
+#!/usr/bin/env ruby
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+$VERBOSE = true
+
+require "pathname"
+
+(ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
+  RubyInstaller::Runtime.add_dll_directory(path)
+end
+
+base_dir = Pathname.new(__dir__).parent.expand_path
+arrow_base_dir = base_dir.parent + "red-arrow"
+
+lib_dir = base_dir + "lib"
+test_dir = base_dir + "test"
+
+arrow_lib_dir = arrow_base_dir + "lib"
+arrow_ext_dir = arrow_base_dir + "ext" + "arrow"
+
+build_dir = ENV["BUILD_DIR"]
+if build_dir
+  arrow_build_dir = Pathname.new(build_dir) + "red-arrow"
+else
+  arrow_build_dir = arrow_ext_dir
+end
+
+$LOAD_PATH.unshift(arrow_build_dir.to_s)
+$LOAD_PATH.unshift(arrow_lib_dir.to_s)
+$LOAD_PATH.unshift(lib_dir.to_s)
+
+require_relative "helper"
+
+exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/ruby/red-arrow-flight/test/test-client.rb b/ruby/red-arrow-flight/test/test-client.rb
new file mode 100644
index 00000000000..850d6f45790
--- /dev/null
+++ b/ruby/red-arrow-flight/test/test-client.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestClient < Test::Unit::TestCase
+  def setup
+    @server = nil
+    omit("Unstable on Windows") if Gem.win_platform?
+    @server = Helper::Server.new
+    @server.listen("grpc://127.0.0.1:0")
+    @location = "grpc://127.0.0.1:#{@server.port}"
+  end
+
+  def teardown
+    return if @server.nil?
+    @server.shutdown
+  end
+
+  def test_list_flights
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    assert_equal([generator.page_view],
+                 client.list_flights)
+  end
+
+  def test_do_get
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    reader = client.do_get(generator.page_view_ticket)
+    assert_equal(generator.page_view_table,
+                 reader.read_all)
+  end
+end
diff --git a/ruby/red-arrow-flight/test/test-location.rb b/ruby/red-arrow-flight/test/test-location.rb
new file mode 100644
index 00000000000..5edd5594f8a
--- /dev/null
+++ b/ruby/red-arrow-flight/test/test-location.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestLocation < Test::Unit::TestCase
+  sub_test_case(".try_convert") do
+    def test_string
+      location = ArrowFlight::Location.try_convert("grpc://127.0.0.1:2929")
+      assert_equal("grpc://127.0.0.1:2929",
+                   location.to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/test/test-ticket.rb b/ruby/red-arrow-flight/test/test-ticket.rb
new file mode 100644
index 00000000000..d8668be74ad
--- /dev/null
+++ b/ruby/red-arrow-flight/test/test-ticket.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTicket < Test::Unit::TestCase
+  sub_test_case(".try_convert") do
+    def test_string
+      ticket = ArrowFlight::Ticket.try_convert("data")
+      assert_equal("data",
+                   ticket.data.to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/.gitignore b/ruby/red-arrow/.gitignore
index e41483f2899..3330f865731 100644
--- a/ruby/red-arrow/.gitignore
+++ b/ruby/red-arrow/.gitignore
@@ -16,6 +16,7 @@
 # under the License.
 
 /.yardoc/
+/Gemfile.lock
 /doc/reference/
 /ext/arrow/Makefile
 /ext/arrow/mkmf.log
diff --git a/ruby/red-arrow/ext/arrow/arrow.cpp b/ruby/red-arrow/ext/arrow/arrow.cpp
index 6226ba0767c..86c8c8fb69f 100644
--- a/ruby/red-arrow/ext/arrow/arrow.cpp
+++ b/ruby/red-arrow/ext/arrow/arrow.cpp
@@ -18,6 +18,7 @@
  */
 
 #include "red-arrow.hpp"
+#include "memory-view.hpp"
 
 #include <ruby.hpp>
 
@@ -78,4 +79,6 @@ extern "C" void Init_arrow() {
   red_arrow::id_jd = rb_intern("jd");
   red_arrow::id_new = rb_intern("new");
   red_arrow::id_to_datetime = rb_intern("to_datetime");
+
+  red_arrow::memory_view::init(mArrow);
 }
diff --git a/ruby/red-arrow/ext/arrow/memory-view.cpp b/ruby/red-arrow/ext/arrow/memory-view.cpp
new file mode 100644
index 00000000000..a3135310c97
--- /dev/null
+++ b/ruby/red-arrow/ext/arrow/memory-view.cpp
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "memory-view.hpp"
+
+#include <arrow-glib/arrow-glib.hpp>
+#include <rbgobject.h>
+
+#include <ruby/version.h>
+
+#if RUBY_API_VERSION_MAJOR >= 3
+#  define HAVE_MEMORY_VIEW
+#  define private memory_view_private
+#  include <ruby/memory_view.h>
+#  undef private
+#endif
+
+#include <sstream>
+
+namespace red_arrow {
+  namespace memory_view {
+#ifdef HAVE_MEMORY_VIEW
+    // This is workaround for the following rb_memory_view_t problems
+    // in C++:
+    //
+    //   * Can't use "private" as member name
+    //   * Can't assign a value to "rb_memory_view_t::private"
+    //
+    // This has compatible layout with rb_memory_view_t.
+    struct memory_view {
+      VALUE obj;
+      void *data;
+      ssize_t byte_size;
+      bool readonly;
+      const char *format;
+      ssize_t item_size;
+      struct {
+        const rb_memory_view_item_component_t *components;
+        size_t length;
+      } item_desc;
+      ssize_t ndim;
+      const ssize_t *shape;
+      const ssize_t *strides;
+      const ssize_t *sub_offsets;
+      void *private_data;
+    };
+
+    struct PrivateData {
+      std::string format;
+    };
+
+    class PrimitiveArrayGetter : public arrow::ArrayVisitor {
+    public:
+      explicit PrimitiveArrayGetter(memory_view *view)
+        : view_(view) {
+      }
+
+      arrow::Status Visit(const arrow::BooleanArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        // Memory view doesn't support bit stream. We use one byte
+        // for 8 elements. Users can't calculate the number of
+        // elements from memory view but it's limitation of memory view.
+#ifdef ARROW_LITTLE_ENDIAN
+        view_->format = "b8";
+#else
+        view_->format = "B8";
+#endif
+        view_->item_size = 1;
+        view_->byte_size = (array.length() + 7) / 8;
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int8Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "c";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int16Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "s";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "l";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt8Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "C";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt16Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "S";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "L";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "Q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::FloatArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "f";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::DoubleArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "d";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::FixedSizeBinaryArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        auto priv = static_cast<PrivateData *>(view_->private_data);
+        const auto type =
+          std::static_pointer_cast<const arrow::FixedSizeBinaryType>(
+            array.type());
+        std::ostringstream output;
+        output << "C" << type->byte_width();
+        priv->format = output.str();
+        view_->format = priv->format.c_str();
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Date32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "l";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Date64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Time32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "l";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Time64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::TimestampArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Decimal128Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q2";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Decimal256Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q4";
+        return arrow::Status::OK();
+      }
+
+      private:
+      void fill(const arrow::Array& array) {
+        const auto array_data = array.data();
+        const auto data = array_data->GetValuesSafe<uint8_t>(1);
+        view_->data = const_cast<void *>(reinterpret_cast<const void *>(data));
+        const auto type =
+          std::static_pointer_cast<const arrow::FixedWidthType>(array.type());
+        view_->item_size = type->bit_width() / 8;
+        view_->byte_size = view_->item_size * array.length();
+      }
+
+      memory_view *view_;
+    };
+
+    bool primitive_array_get(VALUE obj, rb_memory_view_t *view, int flags) {
+      if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
+        return false;
+      }
+      auto view_ = reinterpret_cast<memory_view *>(view);
+      view_->obj = obj;
+      view_->private_data = new PrivateData();
+      auto array = GARROW_ARRAY(RVAL2GOBJ(obj));
+      auto arrow_array = garrow_array_get_raw(array);
+      PrimitiveArrayGetter getter(view_);
+      auto status = arrow_array->Accept(&getter);
+      if (!status.ok()) {
+        return false;
+      }
+      view_->readonly = true;
+      view_->ndim = 1;
+      view_->shape = NULL;
+      view_->strides = NULL;
+      view_->sub_offsets = NULL;
+      return true;
+    }
+
+    bool primitive_array_release(VALUE obj, rb_memory_view_t *view) {
+      auto view_ = reinterpret_cast<memory_view *>(view);
+      delete static_cast<PrivateData *>(view_->private_data);
+      return true;
+    }
+
+    bool primitive_array_available_p(VALUE obj) {
+      return true;
+    }
+
+    rb_memory_view_entry_t primitive_array_entry = {
+      primitive_array_get,
+      primitive_array_release,
+      primitive_array_available_p,
+    };
+
+    bool buffer_get(VALUE obj, rb_memory_view_t *view, int flags) {
+      if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
+        return false;
+      }
+      auto view_ = reinterpret_cast<memory_view *>(view);
+      view_->obj = obj;
+      auto buffer = GARROW_BUFFER(RVAL2GOBJ(obj));
+      auto arrow_buffer = garrow_buffer_get_raw(buffer);
+      view_->data =
+        const_cast<void *>(reinterpret_cast<const void *>(arrow_buffer->data()));
+      // Memory view doesn't support bit stream. We use one byte
+      // for 8 elements. Users can't calculate the number of
+      // elements from memory view but it's limitation of memory view.
+#ifdef ARROW_LITTLE_ENDIAN
+      view_->format = "b8";
+#else
+      view_->format = "B8";
+#endif
+      view_->item_size = 1;
+      view_->byte_size = arrow_buffer->size();
+      view_->readonly = true;
+      view_->ndim = 1;
+      view_->shape = NULL;
+      view_->strides = NULL;
+      view_->sub_offsets = NULL;
+      return true;
+    }
+
+    bool buffer_release(VALUE obj, rb_memory_view_t *view) {
+      return true;
+    }
+
+    bool buffer_available_p(VALUE obj) {
+      return true;
+    }
+
+    rb_memory_view_entry_t buffer_entry = {
+      buffer_get,
+      buffer_release,
+      buffer_available_p,
+    };
+#endif
+
+    void init(VALUE mArrow) {
+#ifdef HAVE_MEMORY_VIEW
+      auto cPrimitiveArray =
+        rb_const_get_at(mArrow, rb_intern("PrimitiveArray"));
+      rb_memory_view_register(cPrimitiveArray,
+                              &(red_arrow::memory_view::primitive_array_entry));
+
+      auto cBuffer = rb_const_get_at(mArrow, rb_intern("Buffer"));
+      rb_memory_view_register(cBuffer, &(red_arrow::memory_view::buffer_entry));
+#endif
+    }
+  }
+}
diff --git a/ruby/red-arrow/ext/arrow/memory-view.hpp b/ruby/red-arrow/ext/arrow/memory-view.hpp
new file mode 100644
index 00000000000..7a776462275
--- /dev/null
+++ b/ruby/red-arrow/ext/arrow/memory-view.hpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <ruby.hpp>
+
+namespace red_arrow {
+  namespace memory_view {
+    void init(VALUE mArrow);
+  }
+}
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb
index ae6125d7daf..c6c0daaec58 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/ruby/red-arrow/lib/arrow/array.rb
@@ -55,6 +55,18 @@ def [](i)
       end
     end
 
+    # @param other [Arrow::Array] The array to be compared.
+    # @param options [Arrow::EqualOptions, Hash] (nil)
+    #   The options to custom how to compare.
+    #
+    # @return [Boolean]
+    #   `true` if both of them have the same data, `false` otherwise.
+    #
+    # @since 5.0.0
+    def equal_array?(other, options=nil)
+      equal_options(other, options)
+    end
+
     def each
       return to_enum(__method__) unless block_given?
 
diff --git a/ruby/red-arrow/lib/arrow/buffer.rb b/ruby/red-arrow/lib/arrow/buffer.rb
index 1efd7972320..9f3a3f61bc5 100644
--- a/ruby/red-arrow/lib/arrow/buffer.rb
+++ b/ruby/red-arrow/lib/arrow/buffer.rb
@@ -17,12 +17,16 @@
 
 module Arrow
   class Buffer
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-
-    def initialize(data)
-      @data = data
-      initialize_raw(data)
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        else
+          nil
+        end
+      end
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb b/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb
new file mode 100644
index 00000000000..16669be93f5
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  module ConstructorArgumentsGCGuardable
+    def initialize(*args)
+      super
+      @arguments = args
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/datum.rb b/ruby/red-arrow/lib/arrow/datum.rb
new file mode 100644
index 00000000000..99d1dae32f8
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/datum.rb
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Datum
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Array
+          ArrayDatum.new(value)
+        when ChunkedArray
+          ChunkedArrayDatum.new(value)
+        when Scalar
+          ScalarDatum.new(value)
+        when ::Array
+          ArrayDatum.new(ArrayBuilder.build(value))
+        when Integer
+          case value
+          when (0..((2 ** 8) - 1))
+            try_convert(UInt8Scalar.new(value))
+          when ((-(2 ** 7))..((2 ** 7) - 1))
+            try_convert(Int8Scalar.new(value))
+          when (0..((2 ** 16) - 1))
+            try_convert(UInt16Scalar.new(value))
+          when ((-(2 ** 15))..((2 ** 15) - 1))
+            try_convert(Int16Scalar.new(value))
+          when (0..((2 ** 32) - 1))
+            try_convert(UInt32Scalar.new(value))
+          when ((-(2 ** 31))..((2 ** 31) - 1))
+            try_convert(Int32Scalar.new(value))
+          when (0..((2 ** 64) - 1))
+            try_convert(UInt64Scalar.new(value))
+          when ((-(2 ** 63))..((2 ** 63) - 1))
+            try_convert(Int64Scalar.new(value))
+          else
+            nil
+          end
+        when Float
+          try_convert(DoubleScalar.new(value))
+        when true, false
+          try_convert(BooleanScalar.new(value))
+        when String
+          if value.ascii_only? or value.encoding == Encoding::UTF_8
+            if value.bytesize <= ((2 ** 31) - 1)
+              try_convert(StringScalar.new(value))
+            else
+              try_convert(LargeStringScalar.new(value))
+            end
+          else
+            if value.bytesize <= ((2 ** 31) - 1)
+              try_convert(BinaryScalar.new(value))
+            else
+              try_convert(LargeBinaryScalar.new(value))
+            end
+          end
+        when Date
+          date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
+          try_convert(Date32Scalar.new(date32_value))
+        when Time
+          case value.unit
+          when TimeUnit::SECOND, TimeUnit::MILLI
+            data_type = Time32DataType.new(value.unit)
+            scalar_class = Time32Scalar
+          else
+            data_type = Time64DataType.new(value.unit)
+            scalar_class = Time64Scalar
+          end
+          try_convert(scalar_class.new(data_type, value.value))
+        when ::Time
+          data_type = TimestampDataType.new(:nano)
+          timestamp_value = value.to_i * 1_000_000_000 + value.nsec
+          try_convert(TimestampScalar.new(data_type, timestamp_value))
+        when Decimal128
+          data_type = TimestampDataType.new(:nano)
+          timestamp_value = value.to_i * 1_000_000_000 + value.nsec
+          try_convert(Decimal128Scalar.new(data_type, timestamp_value))
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/equal-options.rb b/ruby/red-arrow/lib/arrow/equal-options.rb
new file mode 100644
index 00000000000..4eb9964ad33
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/equal-options.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class EqualOptions
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |k, v|
+            setter = :"#{k}="
+            return unless options.respond_to?(setter)
+            options.__send__(setter, v)
+          end
+          options
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 81a4c2045f0..be0aee7b5dc 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -29,6 +29,7 @@ def load
     def post_load(repository, namespace)
       require_libraries
       require_extension_library
+      gc_guard
     end
 
     def require_libraries
@@ -52,6 +53,7 @@ def require_libraries
       require "arrow/date32-array-builder"
       require "arrow/date64-array"
       require "arrow/date64-array-builder"
+      require "arrow/datum"
       require "arrow/decimal128"
       require "arrow/decimal128-array"
       require "arrow/decimal128-array-builder"
@@ -63,6 +65,7 @@ def require_libraries
       require "arrow/dense-union-data-type"
       require "arrow/dictionary-array"
       require "arrow/dictionary-data-type"
+      require "arrow/equal-options"
       require "arrow/field"
       require "arrow/file-output-stream"
       require "arrow/fixed-size-binary-array"
@@ -78,8 +81,10 @@ def require_libraries
       require "arrow/record-batch-builder"
       require "arrow/record-batch-file-reader"
       require "arrow/record-batch-iterator"
+      require "arrow/record-batch-reader"
       require "arrow/record-batch-stream-reader"
       require "arrow/rolling-window"
+      require "arrow/scalar"
       require "arrow/schema"
       require "arrow/slicer"
       require "arrow/sort-key"
@@ -112,6 +117,27 @@ def require_extension_library
       require "arrow.so"
     end
 
+    def gc_guard
+      require "arrow/constructor-arguments-gc-guardable"
+
+      [
+        @base_module::BinaryScalar,
+        @base_module::Buffer,
+        @base_module::DenseUnionScalar,
+        @base_module::FixedSizeBinaryScalar,
+        @base_module::LargeBinaryScalar,
+        @base_module::LargeListScalar,
+        @base_module::LargeStringScalar,
+        @base_module::ListScalar,
+        @base_module::MapScalar,
+        @base_module::SparseUnionScalar,
+        @base_module::StringScalar,
+        @base_module::StructScalar,
+      ].each do |klass|
+        klass.prepend(ConstructorArgumentsGCGuardable)
+      end
+    end
+
     def load_object_info(info)
       super
 
@@ -164,6 +190,12 @@ def load_method_info(info, klass, method_name)
           method_name = "dup"
         end
         super(info, klass, method_name)
+      when "Arrow::BooleanScalar"
+        case method_name
+        when "value?"
+          method_name = "value"
+        end
+        super(info, klass, method_name)
       else
         super
       end
diff --git a/ruby/red-arrow/lib/arrow/record-batch-reader.rb b/ruby/red-arrow/lib/arrow/record-batch-reader.rb
new file mode 100644
index 00000000000..e030e4f3ba8
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/record-batch-reader.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class RecordBatchReader
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when ::Array
+          return nil if value.empty?
+          if value.all? {|v| v.is_a?(RecordBatch)}
+            new(value)
+          else
+            nil
+          end
+        when RecordBatch
+          new([value])
+        when Table
+          TableBatchReader.new(value)
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/scalar.rb b/ruby/red-arrow/lib/arrow/scalar.rb
new file mode 100644
index 00000000000..b2bf1ac5962
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/scalar.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Scalar
+    # @param other [Arrow::Scalar] The scalar to be compared.
+    # @param options [Arrow::EqualOptions, Hash] (nil)
+    #   The options to custom how to compare.
+    #
+    # @return [Boolean]
+    #   `true` if both of them have the same data, `false` otherwise.
+    #
+    # @since 5.0.0
+    def equal_scalar?(other, options=nil)
+      equal_options(other, options)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/table.rb b/ruby/red-arrow/lib/arrow/table.rb
index 98789d6d63a..e9bf3221304 100644
--- a/ruby/red-arrow/lib/arrow/table.rb
+++ b/ruby/red-arrow/lib/arrow/table.rb
@@ -442,8 +442,8 @@ def window(size: nil)
       RollingWindow.new(self, size)
     end
 
-    def save(path, options={})
-      saver = TableSaver.new(self, path, options)
+    def save(output, options={})
+      saver = TableSaver.new(self, output, options)
       saver.save
     end
 
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
index 052d586ed4f..6979bc80fc5 100644
--- a/ruby/red-arrow/lib/arrow/version.rb
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Arrow
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index f23ba9edc59..f75a51d8653 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -48,13 +48,14 @@ Gem::Specification.new do |spec|
 
   spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
   spec.add_runtime_dependency("extpp", ">= 0.0.7")
-  spec.add_runtime_dependency("gio2", ">= 3.3.6")
+  spec.add_runtime_dependency("gio2", ">= 3.4.7")
   spec.add_runtime_dependency("native-package-installer")
   spec.add_runtime_dependency("pkg-config")
 
   spec.add_development_dependency("benchmark-driver")
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("faker")
+  spec.add_development_dependency("fiddle", ">= 1.0.9")
   spec.add_development_dependency("rake")
   spec.add_development_dependency("redcarpet")
   spec.add_development_dependency("test-unit")
diff --git a/ruby/red-arrow/test/helper.rb b/ruby/red-arrow/test/helper.rb
index f7748b254a7..29e5f9cbcf4 100644
--- a/ruby/red-arrow/test/helper.rb
+++ b/ruby/red-arrow/test/helper.rb
@@ -17,6 +17,7 @@
 
 require "arrow"
 
+require "fiddle"
 require "pathname"
 require "tempfile"
 require "zlib"
diff --git a/ruby/red-arrow/test/test-array.rb b/ruby/red-arrow/test/test-array.rb
index 09355517d56..2b7112da6f3 100644
--- a/ruby/red-arrow/test/test-array.rb
+++ b/ruby/red-arrow/test/test-array.rb
@@ -64,6 +64,40 @@ def setup
       end
     end
 
+    sub_test_case("#equal_array?") do
+      test("no options") do
+        array1 = Arrow::FloatArray.new([1.1, Float::NAN])
+        array2 = Arrow::FloatArray.new([1.1, Float::NAN])
+        assert do
+          not array1.equal_array?(array2)
+        end
+      end
+
+      test("approx") do
+        array1 = Arrow::FloatArray.new([1.1])
+        array2 = Arrow::FloatArray.new([1.100001])
+        assert do
+          array1.equal_array?(array2, approx: true)
+        end
+      end
+
+      test("nans-equal") do
+        array1 = Arrow::FloatArray.new([1.1, Float::NAN])
+        array2 = Arrow::FloatArray.new([1.1, Float::NAN])
+        assert do
+          array1.equal_array?(array2, nans_equal: true)
+        end
+      end
+
+      test("absolute-tolerance") do
+        array1 = Arrow::FloatArray.new([1.1])
+        array2 = Arrow::FloatArray.new([1.101])
+        assert do
+          array1.equal_array?(array2, approx: true, absolute_tolerance: 0.01)
+        end
+      end
+    end
+
     sub_test_case("#cast") do
       test("Symbol") do
         assert_equal(Arrow::Int32Array.new([1, 2, 3]),
diff --git a/ruby/red-arrow/test/test-boolean-scalar.rb b/ruby/red-arrow/test/test-boolean-scalar.rb
new file mode 100644
index 00000000000..1053d1716a4
--- /dev/null
+++ b/ruby/red-arrow/test/test-boolean-scalar.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class BooleanScalarTest < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::BooleanScalar.new(true)
+  end
+
+  test("#value") do
+    assert_equal(true, @scalar.value)
+  end
+end
diff --git a/ruby/red-arrow/test/test-float-scalar.rb b/ruby/red-arrow/test/test-float-scalar.rb
new file mode 100644
index 00000000000..1117d772804
--- /dev/null
+++ b/ruby/red-arrow/test/test-float-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class FloatScalarTest < Test::Unit::TestCase
+  sub_test_case("#equal_scalar?") do
+    test("no options") do
+      scalar1 = Arrow::FloatScalar.new(1.1)
+      scalar2 = Arrow::FloatScalar.new(1.1000001)
+      assert do
+        not scalar1.equal_scalar?(scalar2)
+      end
+    end
+
+    test(":approx") do
+      scalar1 = Arrow::FloatScalar.new(1.1)
+      scalar2 = Arrow::FloatScalar.new(1.1000001)
+      assert do
+        scalar1.equal_scalar?(scalar2, approx: true)
+      end
+    end
+
+    test(":absolute_tolerance") do
+      scalar1 = Arrow::FloatScalar.new(1.1)
+      scalar2 = Arrow::FloatScalar.new(1.1001)
+      assert do
+        scalar1.equal_scalar?(scalar2,
+                              approx: true,
+                              absolute_tolerance: 0.001)
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-function.rb b/ruby/red-arrow/test/test-function.rb
new file mode 100644
index 00000000000..95667e66c2a
--- /dev/null
+++ b/ruby/red-arrow/test/test-function.rb
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class FunctionTest < Test::Unit::TestCase
+  sub_test_case("#execute") do
+    test("Arrow::Array") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        Arrow::BooleanArray.new([true, false, false]),
+        Arrow::BooleanArray.new([true, false, true]),
+      ]
+      assert_equal([true, false, true],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("Array") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        [true, false, false],
+        [true, false, true],
+      ]
+      assert_equal([true, false, true],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("Arrow::ChunkedArray") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        Arrow::ChunkedArray.new([
+                                  Arrow::BooleanArray.new([true]),
+                                  Arrow::BooleanArray.new([false, false]),
+                                ]),
+        Arrow::ChunkedArray.new([
+                                  Arrow::BooleanArray.new([true, false]),
+                                  Arrow::BooleanArray.new([true]),
+                                ]),
+      ]
+      assert_equal([true, false, true],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("Arrow::Scalar") do
+      add_function = Arrow::Function.find("add")
+      args = [
+        Arrow::Int8Array.new([1, 2, 3]),
+        Arrow::Int8Scalar.new(5),
+      ]
+      assert_equal([6, 7, 8],
+                   add_function.execute(args).value.to_a)
+    end
+
+    test("Integer") do
+      add_function = Arrow::Function.find("add")
+      args = [
+        [1, 2, 3],
+        5,
+      ]
+      assert_equal([6, 7, 8],
+                   add_function.execute(args).value.to_a)
+    end
+
+    test("Float") do
+      add_function = Arrow::Function.find("add")
+      args = [
+        [1, 2, 3],
+        5.1,
+      ]
+      assert_equal([6.1, 7.1, 8.1],
+                   add_function.execute(args).value.to_a)
+    end
+
+    test("true") do
+      and_function = Arrow::Function.find("and")
+      args = [
+        Arrow::BooleanArray.new([true, false, false]),
+        true,
+      ]
+      assert_equal([true, false, false],
+                   and_function.execute(args).value.to_a)
+    end
+
+    test("false") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        Arrow::BooleanArray.new([true, false, false]),
+        false,
+      ]
+      assert_equal([true, false, false],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("String") do
+      ascii_upper_function = Arrow::Function.find("ascii_upper")
+      args = [
+        "Hello",
+      ]
+      assert_equal("HELLO",
+                   ascii_upper_function.execute(args).value.to_s)
+    end
+
+    test("Date") do
+      cast_function = Arrow::Function.find("cast")
+      date = Date.new(2021, 6, 12)
+      args = [date]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::TimestampDataType.new(:second)
+      time = Time.utc(date.year,
+                      date.month,
+                      date.day)
+      assert_equal(Arrow::TimestampScalar.new(options.to_data_type,
+                                              time.to_i),
+                   cast_function.execute(args, options).value)
+    end
+
+    test("Arrow::Time: second") do
+      cast_function = Arrow::Function.find("cast")
+      arrow_time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
+                                   # 00:10:00
+                                   60 * 10)
+      args = [arrow_time]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::Time64DataType.new(:micro)
+      assert_equal(Arrow::Time64Scalar.new(options.to_data_type,
+                                           # 00:10:00.000000
+                                           60 * 10 * 1000 * 1000),
+                   cast_function.execute(args, options).value)
+    end
+
+    test("Arrow::Time: micro") do
+      cast_function = Arrow::Function.find("cast")
+      arrow_time = Arrow::Time.new(Arrow::TimeUnit::MICRO,
+                                   # 00:10:00.000000
+                                   60 * 10 * 1000 * 1000)
+      args = [arrow_time]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::Time32DataType.new(:second)
+      options.allow_time_truncate = true
+      assert_equal(Arrow::Time32Scalar.new(options.to_data_type,
+                                           # 00:10:00
+                                           60 * 10),
+                   cast_function.execute(args, options).value)
+    end
+
+    test("Time") do
+      cast_function = Arrow::Function.find("cast")
+      time = Time.utc(2021, 6, 12, 1, 2, 3, 1)
+      args = [time]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::TimestampDataType.new(:second)
+      options.allow_time_truncate = true
+      time = Time.utc(time.year,
+                      time.month,
+                      time.day,
+                      time.hour,
+                      time.min,
+                      time.sec)
+      assert_equal(Arrow::TimestampScalar.new(options.to_data_type,
+                                              time.to_i),
+                   cast_function.execute(args, options).value)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-memory-view.rb b/ruby/red-arrow/test/test-memory-view.rb
new file mode 100644
index 00000000000..0b9c98c407f
--- /dev/null
+++ b/ruby/red-arrow/test/test-memory-view.rb
@@ -0,0 +1,434 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class MemoryViewTest < Test::Unit::TestCase
+  def setup
+    unless Fiddle.const_defined?(:MemoryView)
+      omit("Fiddle::MemoryView is needed")
+    end
+    unless Fiddle::MemoryView.respond_to?(:export)
+      omit("Fiddle::MemoryView.export is needed")
+    end
+  end
+
+  def little_endian?
+    [1].pack("s") == [1].pack("s<")
+  end
+
+  test("BooleanArray") do
+    array = Arrow::BooleanArray.new([true] * 9)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      if little_endian?
+        template = "b"
+      else
+        template = "B"
+      end
+      assert_equal([
+                     "#{template}8",
+                     1,
+                     2,
+                     [(("1" * 9) + ("0" * 7))].pack("#{template}*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int8Array") do
+    values = [-(2 ** 7), 0, (2 ** 7) - 1]
+    array = Arrow::Int8Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "c",
+                     1,
+                     values.size,
+                     values.pack("c*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int16Array") do
+    values = [-(2 ** 15), 0, (2 ** 15) - 1]
+    array = Arrow::Int16Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "s",
+                     2,
+                     2 * values.size,
+                     values.pack("s*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int32Array") do
+    values = [-(2 ** 31), 0, (2 ** 31) - 1]
+    array = Arrow::Int32Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "l",
+                     4,
+                     4 * values.size,
+                     values.pack("l*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int64Array") do
+    values = [-(2 ** 63), 0, (2 ** 63) - 1]
+    array = Arrow::Int64Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt8Array") do
+    values = [0, (2 ** 8) - 1]
+    array = Arrow::UInt8Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "C",
+                     1,
+                     values.size,
+                     values.pack("C*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt16Array") do
+    values = [0, (2 ** 16) - 1]
+    array = Arrow::UInt16Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "S",
+                     2,
+                     2 * values.size,
+                     values.pack("S*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt32Array") do
+    values = [0, (2 ** 32) - 1]
+    array = Arrow::UInt32Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "L",
+                     4,
+                     4 * values.size,
+                     values.pack("L*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt64Array") do
+    values = [(2 ** 64) - 1]
+    array = Arrow::UInt64Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "Q",
+                     8,
+                     8 * values.size,
+                     values.pack("Q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("FloatArray") do
+    values = [-1.1, 0.0, 1.1]
+    array = Arrow::FloatArray.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "f",
+                     4,
+                     4 * values.size,
+                     values.pack("f*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("DoubleArray") do
+    values = [-1.1, 0.0, 1.1]
+    array = Arrow::DoubleArray.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "d",
+                     8,
+                     8 * values.size,
+                     values.pack("d*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("FixedSizeBinaryArray") do
+    values = ["\x01\x02", "\x03\x04", "\x05\x06"]
+    data_type = Arrow::FixedSizeBinaryDataType.new(2)
+    array = Arrow::FixedSizeBinaryArray.new(data_type, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "C2",
+                     2,
+                     2 * values.size,
+                     values.join("").b,
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Date32Array") do
+    n_days_since_epoch = 17406 # 2017-08-28
+    values = [n_days_since_epoch]
+    array = Arrow::Date32Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "l",
+                     4,
+                     4 * values.size,
+                     values.pack("l*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Date64Array") do
+    n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z
+    values = [n_msecs_since_epoch]
+    array = Arrow::Date64Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Time32Array") do
+    values = [1, 2, 3]
+    array = Arrow::Time32Array.new(:milli, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "l",
+                     4,
+                     4 * values.size,
+                     values.pack("l*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Time64Array") do
+    values = [1, 2, 3]
+    array = Arrow::Time64Array.new(:nano, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("TimestampArray") do
+    values = [1, 2, 3]
+    array = Arrow::TimestampArray.new(:micro, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Decimal128Array") do
+    values = [
+      Arrow::Decimal128.new("10.1"),
+      Arrow::Decimal128.new("11.1"),
+      Arrow::Decimal128.new("10.2"),
+    ]
+    data_type = Arrow::Decimal128DataType.new(3, 1)
+    array = Arrow::Decimal128Array.new(data_type, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q2",
+                     16,
+                     16 * values.size,
+                     values.collect {|value| value.to_bytes.to_s}.join(""),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Decimal256Array") do
+    values = [
+      Arrow::Decimal256.new("10.1"),
+      Arrow::Decimal256.new("11.1"),
+      Arrow::Decimal256.new("10.2"),
+    ]
+    data_type = Arrow::Decimal256DataType.new(3, 1)
+    array = Arrow::Decimal256Array.new(data_type, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q4",
+                     32,
+                     32 * values.size,
+                     values.collect {|value| value.to_bytes.to_s}.join(""),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Buffer") do
+    values = [0, nil, nil] * 3
+    array = Arrow::Int8Array.new(values)
+    buffer = array.null_bitmap
+    Fiddle::MemoryView.export(buffer) do |memory_view|
+      if little_endian?
+        template = "b"
+      else
+        template = "B"
+      end
+      assert_equal([
+                     "#{template}8",
+                     1,
+                     2,
+                     ["100" * 3].pack("#{template}*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-orc.rb b/ruby/red-arrow/test/test-orc.rb
index e534e075c1f..b882da0a1b5 100644
--- a/ruby/red-arrow/test/test-orc.rb
+++ b/ruby/red-arrow/test/test-orc.rb
@@ -118,39 +118,35 @@ def pp_values(values)
                        ]
                      ],
                      [
-                       "map: list<item: " +
-                       "struct<key: string, value: " +
-                       "struct<int1: int32, string1: string>>>",
+                       "map: map<string, struct<int1: int32, string1: string>>",
                        [
                          <<-MAP.chomp
 [
+  keys:
+  []
+  values:
   -- is_valid: all not null
-  -- child 0 type: string
+  -- child 0 type: int32
     []
-  -- child 1 type: struct<int1: int32, string1: string>
-    -- is_valid: all not null
-    -- child 0 type: int32
-      []
-    -- child 1 type: string
-      [],
+  -- child 1 type: string
+    [],
+  keys:
+  [
+    "chani",
+    "mauddib"
+  ]
+  values:
   -- is_valid: all not null
-  -- child 0 type: string
+  -- child 0 type: int32
+    [
+      5,
+      1
+    ]
+  -- child 1 type: string
     [
       "chani",
       "mauddib"
     ]
-  -- child 1 type: struct<int1: int32, string1: string>
-    -- is_valid: all not null
-    -- child 0 type: int32
-      [
-        5,
-        1
-      ]
-    -- child 1 type: string
-      [
-        "chani",
-        "mauddib"
-      ]
 ]
                          MAP
                        ],
diff --git a/ruby/red-arrow/test/test-record-batch-reader.rb b/ruby/red-arrow/test/test-record-batch-reader.rb
new file mode 100644
index 00000000000..1becdf5b63b
--- /dev/null
+++ b/ruby/red-arrow/test/test-record-batch-reader.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestRecordBatchReader < Test::Unit::TestCase
+  sub_test_case(".try_convert") do
+    test("Arrow::RecordBatch") do
+      record_batch =
+        Arrow::RecordBatch.new("count" => [1, 2, 3],
+                               "private" => [true, false, true])
+      reader = Arrow::RecordBatchReader.try_convert(record_batch)
+      assert_equal(record_batch,
+                   reader.read_next)
+    end
+
+    test("[Arrow::RecordBatch]") do
+      record_batch =
+        Arrow::RecordBatch.new("count" => [1, 2, 3],
+                               "private" => [true, false, true])
+      reader = Arrow::RecordBatchReader.try_convert([record_batch])
+      assert_equal(record_batch,
+                   reader.read_next)
+    end
+
+    test("Arrow::Table") do
+      table = Arrow::Table.new("count" => [1, 2, 3],
+                               "private" => [true, false, true])
+      reader = Arrow::RecordBatchReader.try_convert(table)
+      assert_equal(table,
+                   reader.read_all)
+    end
+  end
+end
diff --git a/ruby/red-gandiva/.gitignore b/ruby/red-gandiva/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-gandiva/.gitignore
+++ b/ruby/red-gandiva/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb
index 535df8b8c0d..2b38fb777d0 100644
--- a/ruby/red-gandiva/lib/gandiva/version.rb
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Gandiva
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/.gitignore b/ruby/red-parquet/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-parquet/.gitignore
+++ b/ruby/red-parquet/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
index b309873b7be..f803eb2be2e 100644
--- a/ruby/red-parquet/lib/parquet/version.rb
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Parquet
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-plasma/.gitignore b/ruby/red-plasma/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-plasma/.gitignore
+++ b/ruby/red-plasma/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-plasma/lib/plasma/version.rb b/ruby/red-plasma/lib/plasma/version.rb
index 991dd193937..80324291755 100644
--- a/ruby/red-plasma/lib/plasma/version.rb
+++ b/ruby/red-plasma/lib/plasma/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Plasma
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/run-cmake-format.py b/run-cmake-format.py
deleted file mode 100755
index 1ff103868d8..00000000000
--- a/run-cmake-format.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import argparse
-import fnmatch
-import hashlib
-import pathlib
-import subprocess
-import sys
-
-# Keep an explicit list of files to format as we don't want to reformat
-# files we imported from other location.
-PATTERNS = [
-    'ci/**/*.cmake',
-    'cpp/CMakeLists.txt',
-    'cpp/src/**/CMakeLists.txt',
-    'cpp/cmake_modules/*.cmake',
-    'go/**/CMakeLists.txt',
-    'java/**/CMakeLists.txt',
-    'matlab/**/CMakeLists.txt',
-]
-EXCLUDE = [
-    'cpp/cmake_modules/FindNumPy.cmake',
-    'cpp/cmake_modules/FindPythonLibsNew.cmake',
-    'cpp/cmake_modules/UseCython.cmake',
-    'cpp/src/arrow/util/config.h.cmake',
-]
-
-here = pathlib.Path(__file__).parent
-
-
-def find_cmake_files():
-    for pat in PATTERNS:
-        yield from here.glob(pat)
-
-
-def run_cmake_format(paths):
-    # cmake-format is fast enough that running in parallel doesn't seem
-    # necessary
-    # autosort is off because it breaks in cmake_format 5.1
-    #   See: https://github.com/cheshirekow/cmake_format/issues/111
-    cmd = ['cmake-format', '--in-place', '--autosort=false'] + paths
-    try:
-        subprocess.run(cmd, check=True)
-    except FileNotFoundError:
-        try:
-            import cmake_format
-        except ImportError:
-            raise ImportError(
-                "Please install cmake-format: `pip install cmake_format`")
-        else:
-            # Other error, re-raise
-            raise
-
-
-def check_cmake_format(paths):
-    hashes = {}
-    for p in paths:
-        contents = p.read_bytes()
-        hashes[p] = hashlib.sha256(contents).digest()
-
-    run_cmake_format(paths)
-
-    # Check contents didn't change
-    changed = []
-    for p in paths:
-        contents = p.read_bytes()
-        if hashes[p] != hashlib.sha256(contents).digest():
-            changed.append(p)
-
-    if changed:
-        items = "\n".join("- %s" % p for p in sorted(changed))
-        print("The following cmake files need re-formatting:\n%s" % (items,))
-        print()
-        print("Consider running `run-cmake-format.py`")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--check', action='store_true')
-    parser.add_argument('paths', nargs='*', type=pathlib.Path)
-    args = parser.parse_args()
-
-    paths = find_cmake_files()
-    if args.paths:
-        paths = set(paths) & set([path.resolve() for path in args.paths])
-    paths = [
-        path for path in paths
-        if path.relative_to(here).as_posix() not in EXCLUDE
-    ]
-    if args.check:
-        check_cmake_format(paths)
-    else:
-        run_cmake_format(paths)
diff --git a/rust/.gitignore b/rust/.gitignore
deleted file mode 100644
index 389f4ab254b..00000000000
--- a/rust/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-Cargo.lock
-target
-rusty-tags.vi
-.history
-.flatbuffers/
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
deleted file mode 100644
index de26f87c778..00000000000
--- a/rust/Cargo.toml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[workspace]
-members = [
-        "arrow",
-        "parquet",
-        "parquet_derive",
-        "parquet_derive_test",
-        "datafusion",
-        "datafusion-examples",
-        "arrow-flight",
-        "integration-testing",
-	"benchmarks",
-]
-
-# this package is excluded because it requires different compilation flags, thereby significantly changing
-# how it is compiled within the workspace, causing the whole workspace to be compiled from scratch
-# this way, this is a stand-alone package that compiles independently of the others.
-exclude = ["arrow-pyarrow-integration-testing", "ballista"]
diff --git a/rust/README.md b/rust/README.md
deleted file mode 100644
index 7fdef29bcdb..00000000000
--- a/rust/README.md
+++ /dev/null
@@ -1,186 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Native Rust implementation of Apache Arrow
-
-[![Coverage Status](https://codecov.io/gh/apache/arrow/rust/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/arrow?branch=master)
-
-Welcome to the implementation of Arrow, the popular in-memory columnar format, in [Rust](https://www.rust-lang.org/).
-
-This part of the Arrow project is divided in 4 main components:
-
-| Crate     | Description | Documentation |
-|-----------|-------------|---------------|
-|Arrow        | Core functionality (memory layout, arrays, low level computations) | [(README)](arrow/README.md) |
-|Parquet      | Parquet support | [(README)](parquet/README.md) |
-|Arrow-flight | Arrow data between processes | [(README)](arrow-flight/README.md) |
-|DataFusion   | In-memory query engine with SQL support | [(README)](datafusion/README.md) |
-|Ballista     | Distributed query execution | [(README)](ballista/README.md) |
-
-Independently, they support a vast array of functionality for in-memory computations.
-
-Together, they allow users to write an SQL query or a `DataFrame` (using the `datafusion` crate), run it against a parquet file (using the `parquet` crate), evaluate it in-memory using Arrow's columnar format (using the `arrow` crate), and send to another process (using the `arrow-flight` crate).
-
-Generally speaking, the `arrow` crate offers functionality to develop code that uses Arrow arrays, and `datafusion` offers most operations typically found in SQL, with the notable exceptions of:
-
-* `join`
-* `window` functions
-
-There are too many features to enumerate here, but some notable mentions:
-
-* `Arrow` implements all formats in the specification except certain dictionaries
-* `Arrow` supports SIMD operations to some of its vertical operations
-* `DataFusion` supports `async` execution
-* `DataFusion` supports user-defined functions, aggregates, and whole execution nodes
-
-You can find more details about each crate in their respective READMEs.
-
-## Arrow Rust Community
-
-We use the official [ASF Slack](https://s.apache.org/slack-invite) for informal discussions and coordination. This is 
-a great place to meet other contributors and get guidance on where to contribute. Join us in the `arrow-rust` channel.
-
-We use [ASF JIRA](https://issues.apache.org/jira/secure/Dashboard.jspa) as the system of record for new features
-and bug fixes and this plays a critical role in the release process.
-
-For design discussions we generally collaborate on Google documents and file a JIRA linking to the document.
-
-There is also a bi-weekly Rust-specific sync call for the Arrow Rust community. This is hosted on Google Meet
-at https://meet.google.com/ctp-yujs-aee on alternate Wednesday's at 09:00 US/Pacific, 12:00 US/Eastern. During 
-US daylight savings time this corresponds to 16:00 UTC and at other times this is 17:00 UTC.
-
-## Developer's guide to Arrow Rust
-
-### How to compile
-
-This is a standard cargo project with workspaces. To build it, you need to have `rust` and `cargo`:
-
-```bash
-cd /rust && cargo build
-```
-
-You can also use rust's official docker image:
-
-```bash
-docker run --rm -v $(pwd)/rust:/rust -it rust /bin/bash -c "cd /rust && cargo build"
-```
-
-The command above assumes that are in the root directory of the project, not in the same
-directory as this README.md.
-
-You can also compile specific workspaces:
-
-```bash
-cd /rust/arrow && cargo build
-```
-
-### Git Submodules
-
-Before running tests and examples, it is necessary to set up the local development environment.
-
-The tests rely on test data that is contained in git submodules.
-
-To pull down this data run the following:
-
-```bash
-git submodule update --init
-```
-
-This populates data in two git submodules:
-
-- `../cpp/submodules/parquet_testing/data` (sourced from https://github.com/apache/parquet-testing.git)
-- `../testing` (sourced from https://github.com/apache/arrow-testing)
-
-By default, `cargo test` will look for these directories at their
-standard location. The following environment variables can be used to override the location:
-
-```bash
-# Optionaly specify a different location for test data
-export PARQUET_TEST_DATA=$(cd ../cpp/submodules/parquet-testing/data; pwd)
-export ARROW_TEST_DATA=$(cd ../testing/data; pwd)
-```
-
-From here on, this is a pure Rust project and `cargo` can be used to run tests, benchmarks, docs and examples as usual.
-
-
-### Running the tests
-
-Run tests using the Rust standard `cargo test` command:
-
-```bash
-# run all tests.
-cargo test
-
-
-# run only tests for the arrow crate
-cargo test -p arrow
-```
-
-## Code Formatting
-
-Our CI uses `rustfmt` to check code formatting. Before submitting a
-PR be sure to run the following and check for lint issues:
-
-```bash
-cargo +stable fmt --all -- --check
-```
-
-## Clippy Lints
-
-We recommend using `clippy` for checking lints during development. While we do not yet enforce `clippy` checks, we recommend not introducing new `clippy` errors or warnings.
-
-Run the following to check for clippy lints.
-
-```
-cargo clippy
-```
-
-If you use Visual Studio Code with the `rust-analyzer` plugin, you can enable `clippy` to run each time you save a file. See https://users.rust-lang.org/t/how-to-use-clippy-in-vs-code-with-rust-analyzer/41881.
-
-One of the concerns with `clippy` is that it often produces a lot of false positives, or that some recommendations may hurt readability. We do not have a policy of which lints are ignored, but if you disagree with a `clippy` lint, you may disable the lint and briefly justify it.
-
-Search for `allow(clippy::` in the codebase to identify lints that are ignored/allowed. We currently prefer ignoring lints on the lowest unit possible.
-* If you are introducing a line that returns a lint warning or error, you may disable the lint on that line.
-* If you have several lints on a function or module, you may disable the lint on the function or module.
-* If a lint is pervasive across multiple modules, you may disable it at the crate level.
-
-## Git Pre-Commit Hook
-
-We can use [git pre-commit hook](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks) to automate various kinds of git pre-commit checking/formatting.
-
-Suppose you are in the root directory of the project.
-
-First check if the file already exists:
-
-```bash
-ls -l .git/hooks/pre-commit
-```
-
-If the file already exists, to avoid mistakenly **overriding**, you MAY have to check
-the link source or file content. Else if not exist, let's safely soft link [pre-commit.sh](pre-commit.sh) as file `.git/hooks/pre-commit`:
-
-```
-ln -s  ../../rust/pre-commit.sh .git/hooks/pre-commit
-```
-
-If sometimes you want to commit without checking, just run `git commit` with `--no-verify`:
-
-```bash
-git commit --no-verify -m "... commit message ..."
-```
diff --git a/rust/arrow-flight/Cargo.toml b/rust/arrow-flight/Cargo.toml
deleted file mode 100644
index c607b562cec..00000000000
--- a/rust/arrow-flight/Cargo.toml
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-flight"
-description = "Apache Arrow Flight"
-version = "4.0.0-SNAPSHOT"
-edition = "2018"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-license = "Apache-2.0"
-
-[dependencies]
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT" }
-tonic = "0.4"
-bytes = "1"
-prost = "0.7"
-prost-derive = "0.7"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-futures = { version = "0.3", default-features = false, features = ["alloc"]}
-
-[build-dependencies]
-tonic-build = "0.4"
-# Pin specific version of the tonic-build dependencies to avoid auto-generated
-# (and checked in) arrow.flight.protocol.rs from changing
-proc-macro2 = "=1.0.24"
-
-#[lib]
-#name = "flight"
-#path = "src/lib.rs"
diff --git a/rust/arrow-flight/README.md b/rust/arrow-flight/README.md
deleted file mode 100644
index ba63f65bc48..00000000000
--- a/rust/arrow-flight/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Apache Arrow Flight
-
-Apache Arrow Flight is a gRPC based protocol for exchanging Arrow data between processes. See the blog post [Introducing Apache Arrow Flight: A Framework for Fast Data Transport](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for more information.
-
-This crate simply provides the Rust implementation of the [Flight.proto](../../format/Flight.proto) gRPC protocol and provides an example that demonstrates how to build a Flight server implemented with Tonic.
-
-Note that building a Flight server also requires an implementation of Arrow IPC which is based on the Flatbuffers serialization framework. The Rust implementation of Arrow IPC is not yet complete although the generated Flatbuffers code is available as part of the core Arrow crate.
-
-
-
diff --git a/rust/arrow-flight/build.rs b/rust/arrow-flight/build.rs
deleted file mode 100644
index ca232551455..00000000000
--- a/rust/arrow-flight/build.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{
-    env,
-    fs::OpenOptions,
-    io::{Read, Write},
-    path::Path,
-};
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // avoid rerunning build if the file has not changed
-    println!("cargo:rerun-if-changed=../../format/Flight.proto");
-
-    // override the build location, in order to check in the changes to proto files
-    env::set_var("OUT_DIR", "src");
-
-    // The current working directory can vary depending on how the project is being
-    // built or released so we build an absolute path to the proto file
-    let path = Path::new("../../format/Flight.proto");
-    if path.exists() {
-        tonic_build::compile_protos("../../format/Flight.proto")?;
-        // read file contents to string
-        let mut file = OpenOptions::new()
-            .read(true)
-            .open("src/arrow.flight.protocol.rs")?;
-        let mut buffer = String::new();
-        file.read_to_string(&mut buffer)?;
-        // append warning that file was auto-generate
-        let mut file = OpenOptions::new()
-            .write(true)
-            .truncate(true)
-            .open("src/arrow.flight.protocol.rs")?;
-        file.write_all("// This file was automatically generated through the build.rs script, and should not be edited.\n\n".as_bytes())?;
-        file.write_all(buffer.as_bytes())?;
-    }
-
-    // As the proto file is checked in, the build should not fail if the file is not found
-    Ok(())
-}
diff --git a/rust/arrow-flight/examples/server.rs b/rust/arrow-flight/examples/server.rs
deleted file mode 100644
index 75d05378710..00000000000
--- a/rust/arrow-flight/examples/server.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-
-use futures::Stream;
-use tonic::transport::Server;
-use tonic::{Request, Response, Status, Streaming};
-
-use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
-    HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-
-#[derive(Clone)]
-pub struct FlightServiceImpl {}
-
-#[tonic::async_trait]
-impl FlightService for FlightServiceImpl {
-    type HandshakeStream = Pin<
-        Box<dyn Stream<Item = Result<HandshakeResponse, Status>> + Send + Sync + 'static>,
-    >;
-    type ListFlightsStream =
-        Pin<Box<dyn Stream<Item = Result<FlightInfo, Status>> + Send + Sync + 'static>>;
-    type DoGetStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-    type DoPutStream =
-        Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + Sync + 'static>>;
-    type DoActionStream = Pin<
-        Box<
-            dyn Stream<Item = Result<arrow_flight::Result, Status>>
-                + Send
-                + Sync
-                + 'static,
-        >,
-    >;
-    type ListActionsStream =
-        Pin<Box<dyn Stream<Item = Result<ActionType, Status>> + Send + Sync + 'static>>;
-    type DoExchangeStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        _request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_put(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let addr = "[::1]:50051".parse()?;
-    let service = FlightServiceImpl {};
-
-    let svc = FlightServiceServer::new(service);
-
-    Server::builder().add_service(svc).serve(addr).await?;
-
-    Ok(())
-}
diff --git a/rust/arrow-flight/src/arrow.flight.protocol.rs b/rust/arrow-flight/src/arrow.flight.protocol.rs
deleted file mode 100644
index 2a87d7b2683..00000000000
--- a/rust/arrow-flight/src/arrow.flight.protocol.rs
+++ /dev/null
@@ -1,1039 +0,0 @@
-// This file was automatically generated through the build.rs script, and should not be edited.
-
-///
-/// The request that a client provides to a server on handshake.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct HandshakeRequest {
-    ///
-    /// A defined protocol version
-    #[prost(uint64, tag = "1")]
-    pub protocol_version: u64,
-    ///
-    /// Arbitrary auth/handshake info.
-    #[prost(bytes = "vec", tag = "2")]
-    pub payload: ::prost::alloc::vec::Vec<u8>,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct HandshakeResponse {
-    ///
-    /// A defined protocol version
-    #[prost(uint64, tag = "1")]
-    pub protocol_version: u64,
-    ///
-    /// Arbitrary auth/handshake info.
-    #[prost(bytes = "vec", tag = "2")]
-    pub payload: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// A message for doing simple auth.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct BasicAuth {
-    #[prost(string, tag = "2")]
-    pub username: ::prost::alloc::string::String,
-    #[prost(string, tag = "3")]
-    pub password: ::prost::alloc::string::String,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Empty {}
-///
-/// Describes an available action, including both the name used for execution
-/// along with a short description of the purpose of the action.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ActionType {
-    #[prost(string, tag = "1")]
-    pub r#type: ::prost::alloc::string::String,
-    #[prost(string, tag = "2")]
-    pub description: ::prost::alloc::string::String,
-}
-///
-/// A service specific expression that can be used to return a limited set
-/// of available Arrow Flight streams.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Criteria {
-    #[prost(bytes = "vec", tag = "1")]
-    pub expression: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// An opaque action specific for the service.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Action {
-    #[prost(string, tag = "1")]
-    pub r#type: ::prost::alloc::string::String,
-    #[prost(bytes = "vec", tag = "2")]
-    pub body: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// An opaque result returned after executing an action.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Result {
-    #[prost(bytes = "vec", tag = "1")]
-    pub body: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// Wrap the result of a getSchema call
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct SchemaResult {
-    /// schema of the dataset as described in Schema.fbs::Schema.
-    #[prost(bytes = "vec", tag = "1")]
-    pub schema: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// The name or tag for a Flight. May be used as a way to retrieve or generate
-/// a flight or be used to expose a set of previously defined flights.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightDescriptor {
-    #[prost(enumeration = "flight_descriptor::DescriptorType", tag = "1")]
-    pub r#type: i32,
-    ///
-    /// Opaque value used to express a command. Should only be defined when
-    /// type = CMD.
-    #[prost(bytes = "vec", tag = "2")]
-    pub cmd: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// List of strings identifying a particular dataset. Should only be defined
-    /// when type = PATH.
-    #[prost(string, repeated, tag = "3")]
-    pub path: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
-}
-/// Nested message and enum types in `FlightDescriptor`.
-pub mod flight_descriptor {
-    ///
-    /// Describes what type of descriptor is defined.
-    #[derive(
-        Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration,
-    )]
-    #[repr(i32)]
-    pub enum DescriptorType {
-        /// Protobuf pattern, not used.
-        Unknown = 0,
-        ///
-        /// A named path that identifies a dataset. A path is composed of a string
-        /// or list of strings describing a particular dataset. This is conceptually
-        ///  similar to a path inside a filesystem.
-        Path = 1,
-        ///
-        /// An opaque command to generate a dataset.
-        Cmd = 2,
-    }
-}
-///
-/// The access coordinates for retrieval of a dataset. With a FlightInfo, a
-/// consumer is able to determine how to retrieve a dataset.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightInfo {
-    /// schema of the dataset as described in Schema.fbs::Schema.
-    #[prost(bytes = "vec", tag = "1")]
-    pub schema: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// The descriptor associated with this info.
-    #[prost(message, optional, tag = "2")]
-    pub flight_descriptor: ::core::option::Option<FlightDescriptor>,
-    ///
-    /// A list of endpoints associated with the flight. To consume the whole
-    /// flight, all endpoints must be consumed.
-    #[prost(message, repeated, tag = "3")]
-    pub endpoint: ::prost::alloc::vec::Vec<FlightEndpoint>,
-    /// Set these to -1 if unknown.
-    #[prost(int64, tag = "4")]
-    pub total_records: i64,
-    #[prost(int64, tag = "5")]
-    pub total_bytes: i64,
-}
-///
-/// A particular stream or split associated with a flight.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightEndpoint {
-    ///
-    /// Token used to retrieve this stream.
-    #[prost(message, optional, tag = "1")]
-    pub ticket: ::core::option::Option<Ticket>,
-    ///
-    /// A list of URIs where this ticket can be redeemed. If the list is
-    /// empty, the expectation is that the ticket can only be redeemed on the
-    /// current service where the ticket was generated.
-    #[prost(message, repeated, tag = "2")]
-    pub location: ::prost::alloc::vec::Vec<Location>,
-}
-///
-/// A location where a Flight service will accept retrieval of a particular
-/// stream given a ticket.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Location {
-    #[prost(string, tag = "1")]
-    pub uri: ::prost::alloc::string::String,
-}
-///
-/// An opaque identifier that the service can use to retrieve a particular
-/// portion of a stream.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Ticket {
-    #[prost(bytes = "vec", tag = "1")]
-    pub ticket: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// A batch of Arrow data as part of a stream of batches.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightData {
-    ///
-    /// The descriptor of the data. This is only relevant when a client is
-    /// starting a new DoPut stream.
-    #[prost(message, optional, tag = "1")]
-    pub flight_descriptor: ::core::option::Option<FlightDescriptor>,
-    ///
-    /// Header for message data as described in Message.fbs::Message.
-    #[prost(bytes = "vec", tag = "2")]
-    pub data_header: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// Application-defined metadata.
-    #[prost(bytes = "vec", tag = "3")]
-    pub app_metadata: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// The actual batch of Arrow data. Preferably handled with minimal-copies
-    /// coming last in the definition to help with sidecar patterns (it is
-    /// expected that some implementations will fetch this field off the wire
-    /// with specialized code to avoid extra memory copies).
-    #[prost(bytes = "vec", tag = "1000")]
-    pub data_body: ::prost::alloc::vec::Vec<u8>,
-}
-///*
-/// The response message associated with the submission of a DoPut.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct PutResult {
-    #[prost(bytes = "vec", tag = "1")]
-    pub app_metadata: ::prost::alloc::vec::Vec<u8>,
-}
-#[doc = r" Generated client implementations."]
-pub mod flight_service_client {
-    #![allow(unused_variables, dead_code, missing_docs)]
-    use tonic::codegen::*;
-    #[doc = ""]
-    #[doc = " A flight service is an endpoint for retrieving or storing Arrow data. A"]
-    #[doc = " flight service can expose one or more predefined endpoints that can be"]
-    #[doc = " accessed using the Arrow Flight Protocol. Additionally, a flight service"]
-    #[doc = " can expose a set of actions that are available."]
-    pub struct FlightServiceClient<T> {
-        inner: tonic::client::Grpc<T>,
-    }
-    impl FlightServiceClient<tonic::transport::Channel> {
-        #[doc = r" Attempt to create a new client by connecting to a given endpoint."]
-        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
-        where
-            D: std::convert::TryInto<tonic::transport::Endpoint>,
-            D::Error: Into<StdError>,
-        {
-            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
-            Ok(Self::new(conn))
-        }
-    }
-    impl<T> FlightServiceClient<T>
-    where
-        T: tonic::client::GrpcService<tonic::body::BoxBody>,
-        T::ResponseBody: Body + HttpBody + Send + 'static,
-        T::Error: Into<StdError>,
-        <T::ResponseBody as HttpBody>::Error: Into<StdError> + Send,
-    {
-        pub fn new(inner: T) -> Self {
-            let inner = tonic::client::Grpc::new(inner);
-            Self { inner }
-        }
-        pub fn with_interceptor(
-            inner: T,
-            interceptor: impl Into<tonic::Interceptor>,
-        ) -> Self {
-            let inner = tonic::client::Grpc::with_interceptor(inner, interceptor);
-            Self { inner }
-        }
-        #[doc = ""]
-        #[doc = " Handshake between client and server. Depending on the server, the"]
-        #[doc = " handshake may be required to determine the token that should be used for"]
-        #[doc = " future operations. Both request and response are streams to allow multiple"]
-        #[doc = " round-trips depending on auth mechanism."]
-        pub async fn handshake(
-            &mut self,
-            request: impl tonic::IntoStreamingRequest<Message = super::HandshakeRequest>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::HandshakeResponse>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/Handshake",
-            );
-            self.inner
-                .streaming(request.into_streaming_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Get a list of available streams given a particular criteria. Most flight"]
-        #[doc = " services will expose one or more streams that are readily available for"]
-        #[doc = " retrieval. This api allows listing the streams available for"]
-        #[doc = " consumption. A user can also provide a criteria. The criteria can limit"]
-        #[doc = " the subset of streams that can be listed via this interface. Each flight"]
-        #[doc = " service allows its own definition of how to consume criteria."]
-        pub async fn list_flights(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Criteria>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::FlightInfo>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/ListFlights",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get information about how the flight can be"]
-        #[doc = " consumed. This is a useful interface if the consumer of the interface"]
-        #[doc = " already can identify the specific flight to consume. This interface can"]
-        #[doc = " also allow a consumer to generate a flight stream through a specified"]
-        #[doc = " descriptor. For example, a flight descriptor might be something that"]
-        #[doc = " includes a SQL statement or a Pickled Python operation that will be"]
-        #[doc = " executed. In those cases, the descriptor will not be previously available"]
-        #[doc = " within the list of available streams provided by ListFlights but will be"]
-        #[doc = " available for consumption for the duration defined by the specific flight"]
-        #[doc = " service."]
-        pub async fn get_flight_info(
-            &mut self,
-            request: impl tonic::IntoRequest<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::FlightInfo>, tonic::Status> {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/GetFlightInfo",
-            );
-            self.inner.unary(request.into_request(), path, codec).await
-        }
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema"]
-        #[doc = " This is used when a consumer needs the Schema of flight stream. Similar to"]
-        #[doc = " GetFlightInfo this interface may generate a new flight that was not previously"]
-        #[doc = " available in ListFlights."]
-        pub async fn get_schema(
-            &mut self,
-            request: impl tonic::IntoRequest<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::SchemaResult>, tonic::Status> {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/GetSchema",
-            );
-            self.inner.unary(request.into_request(), path, codec).await
-        }
-        #[doc = ""]
-        #[doc = " Retrieve a single stream associated with a particular descriptor"]
-        #[doc = " associated with the referenced ticket. A Flight can be composed of one or"]
-        #[doc = " more streams where each stream can be retrieved using a separate opaque"]
-        #[doc = " ticket that the flight service uses for managing a collection of streams."]
-        pub async fn do_get(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Ticket>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::FlightData>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoGet",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Push a stream to the flight service associated with a particular"]
-        #[doc = " flight stream. This allows a client of a flight service to upload a stream"]
-        #[doc = " of data. Depending on the particular flight service, a client consumer"]
-        #[doc = " could be allowed to upload a single stream per descriptor or an unlimited"]
-        #[doc = " number. In the latter, the service might implement a 'seal' action that"]
-        #[doc = " can be applied to a descriptor once all streams are uploaded."]
-        pub async fn do_put(
-            &mut self,
-            request: impl tonic::IntoStreamingRequest<Message = super::FlightData>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::PutResult>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoPut",
-            );
-            self.inner
-                .streaming(request.into_streaming_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Open a bidirectional data channel for a given descriptor. This"]
-        #[doc = " allows clients to send and receive arbitrary Arrow data and"]
-        #[doc = " application-specific metadata in a single logical stream. In"]
-        #[doc = " contrast to DoGet/DoPut, this is more suited for clients"]
-        #[doc = " offloading computation (rather than storage) to a Flight service."]
-        pub async fn do_exchange(
-            &mut self,
-            request: impl tonic::IntoStreamingRequest<Message = super::FlightData>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::FlightData>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoExchange",
-            );
-            self.inner
-                .streaming(request.into_streaming_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Flight services can support an arbitrary number of simple actions in"]
-        #[doc = " addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut"]
-        #[doc = " operations that are potentially available. DoAction allows a flight client"]
-        #[doc = " to do a specific action against a flight service. An action includes"]
-        #[doc = " opaque request and response objects that are specific to the type action"]
-        #[doc = " being undertaken."]
-        pub async fn do_action(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Action>,
-        ) -> Result<tonic::Response<tonic::codec::Streaming<super::Result>>, tonic::Status>
-        {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoAction",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " A flight service exposes all of the available action types that it has"]
-        #[doc = " along with descriptions. This allows different flight consumers to"]
-        #[doc = " understand the capabilities of the flight service."]
-        pub async fn list_actions(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Empty>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::ActionType>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/ListActions",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-    }
-    impl<T: Clone> Clone for FlightServiceClient<T> {
-        fn clone(&self) -> Self {
-            Self {
-                inner: self.inner.clone(),
-            }
-        }
-    }
-    impl<T> std::fmt::Debug for FlightServiceClient<T> {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            write!(f, "FlightServiceClient {{ ... }}")
-        }
-    }
-}
-#[doc = r" Generated server implementations."]
-pub mod flight_service_server {
-    #![allow(unused_variables, dead_code, missing_docs)]
-    use tonic::codegen::*;
-    #[doc = "Generated trait containing gRPC methods that should be implemented for use with FlightServiceServer."]
-    #[async_trait]
-    pub trait FlightService: Send + Sync + 'static {
-        #[doc = "Server streaming response type for the Handshake method."]
-        type HandshakeStream: Stream<Item = Result<super::HandshakeResponse, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Handshake between client and server. Depending on the server, the"]
-        #[doc = " handshake may be required to determine the token that should be used for"]
-        #[doc = " future operations. Both request and response are streams to allow multiple"]
-        #[doc = " round-trips depending on auth mechanism."]
-        async fn handshake(
-            &self,
-            request: tonic::Request<tonic::Streaming<super::HandshakeRequest>>,
-        ) -> Result<tonic::Response<Self::HandshakeStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the ListFlights method."]
-        type ListFlightsStream: Stream<Item = Result<super::FlightInfo, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Get a list of available streams given a particular criteria. Most flight"]
-        #[doc = " services will expose one or more streams that are readily available for"]
-        #[doc = " retrieval. This api allows listing the streams available for"]
-        #[doc = " consumption. A user can also provide a criteria. The criteria can limit"]
-        #[doc = " the subset of streams that can be listed via this interface. Each flight"]
-        #[doc = " service allows its own definition of how to consume criteria."]
-        async fn list_flights(
-            &self,
-            request: tonic::Request<super::Criteria>,
-        ) -> Result<tonic::Response<Self::ListFlightsStream>, tonic::Status>;
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get information about how the flight can be"]
-        #[doc = " consumed. This is a useful interface if the consumer of the interface"]
-        #[doc = " already can identify the specific flight to consume. This interface can"]
-        #[doc = " also allow a consumer to generate a flight stream through a specified"]
-        #[doc = " descriptor. For example, a flight descriptor might be something that"]
-        #[doc = " includes a SQL statement or a Pickled Python operation that will be"]
-        #[doc = " executed. In those cases, the descriptor will not be previously available"]
-        #[doc = " within the list of available streams provided by ListFlights but will be"]
-        #[doc = " available for consumption for the duration defined by the specific flight"]
-        #[doc = " service."]
-        async fn get_flight_info(
-            &self,
-            request: tonic::Request<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::FlightInfo>, tonic::Status>;
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema"]
-        #[doc = " This is used when a consumer needs the Schema of flight stream. Similar to"]
-        #[doc = " GetFlightInfo this interface may generate a new flight that was not previously"]
-        #[doc = " available in ListFlights."]
-        async fn get_schema(
-            &self,
-            request: tonic::Request<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::SchemaResult>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoGet method."]
-        type DoGetStream: Stream<Item = Result<super::FlightData, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Retrieve a single stream associated with a particular descriptor"]
-        #[doc = " associated with the referenced ticket. A Flight can be composed of one or"]
-        #[doc = " more streams where each stream can be retrieved using a separate opaque"]
-        #[doc = " ticket that the flight service uses for managing a collection of streams."]
-        async fn do_get(
-            &self,
-            request: tonic::Request<super::Ticket>,
-        ) -> Result<tonic::Response<Self::DoGetStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoPut method."]
-        type DoPutStream: Stream<Item = Result<super::PutResult, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Push a stream to the flight service associated with a particular"]
-        #[doc = " flight stream. This allows a client of a flight service to upload a stream"]
-        #[doc = " of data. Depending on the particular flight service, a client consumer"]
-        #[doc = " could be allowed to upload a single stream per descriptor or an unlimited"]
-        #[doc = " number. In the latter, the service might implement a 'seal' action that"]
-        #[doc = " can be applied to a descriptor once all streams are uploaded."]
-        async fn do_put(
-            &self,
-            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-        ) -> Result<tonic::Response<Self::DoPutStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoExchange method."]
-        type DoExchangeStream: Stream<Item = Result<super::FlightData, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Open a bidirectional data channel for a given descriptor. This"]
-        #[doc = " allows clients to send and receive arbitrary Arrow data and"]
-        #[doc = " application-specific metadata in a single logical stream. In"]
-        #[doc = " contrast to DoGet/DoPut, this is more suited for clients"]
-        #[doc = " offloading computation (rather than storage) to a Flight service."]
-        async fn do_exchange(
-            &self,
-            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-        ) -> Result<tonic::Response<Self::DoExchangeStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoAction method."]
-        type DoActionStream: Stream<Item = Result<super::Result, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Flight services can support an arbitrary number of simple actions in"]
-        #[doc = " addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut"]
-        #[doc = " operations that are potentially available. DoAction allows a flight client"]
-        #[doc = " to do a specific action against a flight service. An action includes"]
-        #[doc = " opaque request and response objects that are specific to the type action"]
-        #[doc = " being undertaken."]
-        async fn do_action(
-            &self,
-            request: tonic::Request<super::Action>,
-        ) -> Result<tonic::Response<Self::DoActionStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the ListActions method."]
-        type ListActionsStream: Stream<Item = Result<super::ActionType, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " A flight service exposes all of the available action types that it has"]
-        #[doc = " along with descriptions. This allows different flight consumers to"]
-        #[doc = " understand the capabilities of the flight service."]
-        async fn list_actions(
-            &self,
-            request: tonic::Request<super::Empty>,
-        ) -> Result<tonic::Response<Self::ListActionsStream>, tonic::Status>;
-    }
-    #[doc = ""]
-    #[doc = " A flight service is an endpoint for retrieving or storing Arrow data. A"]
-    #[doc = " flight service can expose one or more predefined endpoints that can be"]
-    #[doc = " accessed using the Arrow Flight Protocol. Additionally, a flight service"]
-    #[doc = " can expose a set of actions that are available."]
-    #[derive(Debug)]
-    pub struct FlightServiceServer<T: FlightService> {
-        inner: _Inner<T>,
-    }
-    struct _Inner<T>(Arc<T>, Option<tonic::Interceptor>);
-    impl<T: FlightService> FlightServiceServer<T> {
-        pub fn new(inner: T) -> Self {
-            let inner = Arc::new(inner);
-            let inner = _Inner(inner, None);
-            Self { inner }
-        }
-        pub fn with_interceptor(
-            inner: T,
-            interceptor: impl Into<tonic::Interceptor>,
-        ) -> Self {
-            let inner = Arc::new(inner);
-            let inner = _Inner(inner, Some(interceptor.into()));
-            Self { inner }
-        }
-    }
-    impl<T, B> Service<http::Request<B>> for FlightServiceServer<T>
-    where
-        T: FlightService,
-        B: HttpBody + Send + Sync + 'static,
-        B::Error: Into<StdError> + Send + 'static,
-    {
-        type Response = http::Response<tonic::body::BoxBody>;
-        type Error = Never;
-        type Future = BoxFuture<Self::Response, Self::Error>;
-        fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
-            Poll::Ready(Ok(()))
-        }
-        fn call(&mut self, req: http::Request<B>) -> Self::Future {
-            let inner = self.inner.clone();
-            match req.uri().path() {
-                "/arrow.flight.protocol.FlightService/Handshake" => {
-                    #[allow(non_camel_case_types)]
-                    struct HandshakeSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::StreamingService<super::HandshakeRequest>
-                        for HandshakeSvc<T>
-                    {
-                        type Response = super::HandshakeResponse;
-                        type ResponseStream = T::HandshakeStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<
-                                tonic::Streaming<super::HandshakeRequest>,
-                            >,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).handshake(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = HandshakeSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/ListFlights" => {
-                    #[allow(non_camel_case_types)]
-                    struct ListFlightsSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Criteria>
-                        for ListFlightsSvc<T>
-                    {
-                        type Response = super::FlightInfo;
-                        type ResponseStream = T::ListFlightsStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Criteria>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).list_flights(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = ListFlightsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/GetFlightInfo" => {
-                    #[allow(non_camel_case_types)]
-                    struct GetFlightInfoSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::UnaryService<super::FlightDescriptor>
-                        for GetFlightInfoSvc<T>
-                    {
-                        type Response = super::FlightInfo;
-                        type Future =
-                            BoxFuture<tonic::Response<Self::Response>, tonic::Status>;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::FlightDescriptor>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut =
-                                async move { (*inner).get_flight_info(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1.clone();
-                        let inner = inner.0;
-                        let method = GetFlightInfoSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/GetSchema" => {
-                    #[allow(non_camel_case_types)]
-                    struct GetSchemaSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::UnaryService<super::FlightDescriptor>
-                        for GetSchemaSvc<T>
-                    {
-                        type Response = super::SchemaResult;
-                        type Future =
-                            BoxFuture<tonic::Response<Self::Response>, tonic::Status>;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::FlightDescriptor>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).get_schema(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1.clone();
-                        let inner = inner.0;
-                        let method = GetSchemaSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoGet" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoGetSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Ticket>
-                        for DoGetSvc<T>
-                    {
-                        type Response = super::FlightData;
-                        type ResponseStream = T::DoGetStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Ticket>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_get(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoGetSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoPut" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoPutSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::StreamingService<super::FlightData>
-                        for DoPutSvc<T>
-                    {
-                        type Response = super::PutResult;
-                        type ResponseStream = T::DoPutStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_put(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoPutSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoExchange" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoExchangeSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::StreamingService<super::FlightData>
-                        for DoExchangeSvc<T>
-                    {
-                        type Response = super::FlightData;
-                        type ResponseStream = T::DoExchangeStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_exchange(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoExchangeSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoAction" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoActionSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Action>
-                        for DoActionSvc<T>
-                    {
-                        type Response = super::Result;
-                        type ResponseStream = T::DoActionStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Action>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_action(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoActionSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/ListActions" => {
-                    #[allow(non_camel_case_types)]
-                    struct ListActionsSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Empty>
-                        for ListActionsSvc<T>
-                    {
-                        type Response = super::ActionType;
-                        type ResponseStream = T::ListActionsStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Empty>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).list_actions(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = ListActionsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                _ => Box::pin(async move {
-                    Ok(http::Response::builder()
-                        .status(200)
-                        .header("grpc-status", "12")
-                        .header("content-type", "application/grpc")
-                        .body(tonic::body::BoxBody::empty())
-                        .unwrap())
-                }),
-            }
-        }
-    }
-    impl<T: FlightService> Clone for FlightServiceServer<T> {
-        fn clone(&self) -> Self {
-            let inner = self.inner.clone();
-            Self { inner }
-        }
-    }
-    impl<T: FlightService> Clone for _Inner<T> {
-        fn clone(&self) -> Self {
-            Self(self.0.clone(), self.1.clone())
-        }
-    }
-    impl<T: std::fmt::Debug> std::fmt::Debug for _Inner<T> {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            write!(f, "{:?}", self.0)
-        }
-    }
-    impl<T: FlightService> tonic::transport::NamedService for FlightServiceServer<T> {
-        const NAME: &'static str = "arrow.flight.protocol.FlightService";
-    }
-}
diff --git a/rust/arrow-flight/src/lib.rs b/rust/arrow-flight/src/lib.rs
deleted file mode 100644
index 6af2e748678..00000000000
--- a/rust/arrow-flight/src/lib.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-include!("arrow.flight.protocol.rs");
-
-pub mod utils;
diff --git a/rust/arrow-flight/src/utils.rs b/rust/arrow-flight/src/utils.rs
deleted file mode 100644
index 659668c0baf..00000000000
--- a/rust/arrow-flight/src/utils.rs
+++ /dev/null
@@ -1,167 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities to assist with reading and writing Arrow data as Flight messages
-
-use std::convert::TryFrom;
-
-use crate::{FlightData, SchemaResult};
-
-use arrow::array::ArrayRef;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::{ArrowError, Result};
-use arrow::ipc::{convert, reader, writer, writer::EncodedData, writer::IpcWriteOptions};
-use arrow::record_batch::RecordBatch;
-
-/// Convert a `RecordBatch` to a vector of `FlightData` representing the bytes of the dictionaries
-/// and a `FlightData` representing the bytes of the batch's values
-pub fn flight_data_from_arrow_batch(
-    batch: &RecordBatch,
-    options: &IpcWriteOptions,
-) -> (Vec<FlightData>, FlightData) {
-    let data_gen = writer::IpcDataGenerator::default();
-    let mut dictionary_tracker = writer::DictionaryTracker::new(false);
-
-    let (encoded_dictionaries, encoded_batch) = data_gen
-        .encoded_batch(batch, &mut dictionary_tracker, &options)
-        .expect("DictionaryTracker configured above to not error on replacement");
-
-    let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
-    let flight_batch = encoded_batch.into();
-
-    (flight_dictionaries, flight_batch)
-}
-
-impl From<EncodedData> for FlightData {
-    fn from(data: EncodedData) -> Self {
-        FlightData {
-            data_header: data.ipc_message,
-            data_body: data.arrow_data,
-            ..Default::default()
-        }
-    }
-}
-
-/// Convert a `Schema` to `SchemaResult` by converting to an IPC message
-pub fn flight_schema_from_arrow_schema(
-    schema: &Schema,
-    options: &IpcWriteOptions,
-) -> SchemaResult {
-    SchemaResult {
-        schema: flight_schema_as_flatbuffer(schema, options),
-    }
-}
-
-/// Convert a `Schema` to `FlightData` by converting to an IPC message
-pub fn flight_data_from_arrow_schema(
-    schema: &Schema,
-    options: &IpcWriteOptions,
-) -> FlightData {
-    let data_header = flight_schema_as_flatbuffer(schema, options);
-    FlightData {
-        data_header,
-        ..Default::default()
-    }
-}
-
-/// Convert a `Schema` to bytes in the format expected in `FlightInfo.schema`
-pub fn ipc_message_from_arrow_schema(
-    arrow_schema: &Schema,
-    options: &IpcWriteOptions,
-) -> Result<Vec<u8>> {
-    let encoded_data = flight_schema_as_encoded_data(arrow_schema, options);
-
-    let mut schema = vec![];
-    arrow::ipc::writer::write_message(&mut schema, encoded_data, options)?;
-    Ok(schema)
-}
-
-fn flight_schema_as_flatbuffer(
-    arrow_schema: &Schema,
-    options: &IpcWriteOptions,
-) -> Vec<u8> {
-    let encoded_data = flight_schema_as_encoded_data(arrow_schema, options);
-    encoded_data.ipc_message
-}
-
-fn flight_schema_as_encoded_data(
-    arrow_schema: &Schema,
-    options: &IpcWriteOptions,
-) -> EncodedData {
-    let data_gen = writer::IpcDataGenerator::default();
-    data_gen.schema_to_bytes(arrow_schema, options)
-}
-
-/// Try convert `FlightData` into an Arrow Schema
-///
-/// Returns an error if the `FlightData` header is not a valid IPC schema
-impl TryFrom<&FlightData> for Schema {
-    type Error = ArrowError;
-    fn try_from(data: &FlightData) -> Result<Self> {
-        convert::schema_from_bytes(&data.data_header[..]).map_err(|err| {
-            ArrowError::ParseError(format!(
-                "Unable to convert flight data to Arrow schema: {}",
-                err
-            ))
-        })
-    }
-}
-
-/// Try convert `SchemaResult` into an Arrow Schema
-///
-/// Returns an error if the `FlightData` header is not a valid IPC schema
-impl TryFrom<&SchemaResult> for Schema {
-    type Error = ArrowError;
-    fn try_from(data: &SchemaResult) -> Result<Self> {
-        convert::schema_from_bytes(&data.schema[..]).map_err(|err| {
-            ArrowError::ParseError(format!(
-                "Unable to convert schema result to Arrow schema: {}",
-                err
-            ))
-        })
-    }
-}
-
-/// Convert a FlightData message to a RecordBatch
-pub fn flight_data_to_arrow_batch(
-    data: &FlightData,
-    schema: SchemaRef,
-    dictionaries_by_field: &[Option<ArrayRef>],
-) -> Result<RecordBatch> {
-    // check that the data_header is a record batch message
-    let message = arrow::ipc::root_as_message(&data.data_header[..]).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to get root as message: {:?}", err))
-    })?;
-
-    message
-        .header_as_record_batch()
-        .ok_or_else(|| {
-            ArrowError::ParseError(
-                "Unable to convert flight data header to a record batch".to_string(),
-            )
-        })
-        .map(|batch| {
-            reader::read_record_batch(
-                &data.data_body,
-                batch,
-                schema,
-                &dictionaries_by_field,
-            )
-        })?
-}
-
-// TODO: add more explicit conversion that exposes flight descriptor and metadata options
diff --git a/rust/arrow-pyarrow-integration-testing/.cargo/config b/rust/arrow-pyarrow-integration-testing/.cargo/config
deleted file mode 100644
index a127967f66c..00000000000
--- a/rust/arrow-pyarrow-integration-testing/.cargo/config
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[target.x86_64-apple-darwin]
-rustflags = [
-  "-C", "link-arg=-undefined",
-  "-C", "link-arg=dynamic_lookup",
-]
\ No newline at end of file
diff --git a/rust/arrow-pyarrow-integration-testing/.gitignore b/rust/arrow-pyarrow-integration-testing/.gitignore
deleted file mode 100644
index 82adb58b4d6..00000000000
--- a/rust/arrow-pyarrow-integration-testing/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-__pycache__
-venv
diff --git a/rust/arrow-pyarrow-integration-testing/Cargo.toml b/rust/arrow-pyarrow-integration-testing/Cargo.toml
deleted file mode 100644
index ef356b0902e..00000000000
--- a/rust/arrow-pyarrow-integration-testing/Cargo.toml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-pyarrow-integration-testing"
-description = ""
-version = "4.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow" ]
-edition = "2018"
-
-[lib]
-name = "arrow_pyarrow_integration_testing"
-crate-type = ["cdylib"]
-
-[dependencies]
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT" }
-pyo3 = { version = "0.12.1", features = ["extension-module"] }
-
-[package.metadata.maturin]
-requires-dist = ["pyarrow>=1"]
diff --git a/rust/arrow-pyarrow-integration-testing/README.md b/rust/arrow-pyarrow-integration-testing/README.md
deleted file mode 100644
index 7e78aa9ec70..00000000000
--- a/rust/arrow-pyarrow-integration-testing/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Arrow c integration
-
-This is a Rust crate that tests compatibility between Rust's Arrow implementation and PyArrow.
-
-Note that this crate uses two languages and an external ABI:
-* `Rust`
-* `Python`
-* C ABI privately exposed by `Pyarrow`.
-
-## Basic idea
-
-Pyarrow exposes a C ABI to convert arrow arrays from and to its C implementation, see [here](https://arrow.apache.org/docs/format/CDataInterface.html).
-
-This package uses the equivalent struct in Rust (`arrow::array::ArrowArray`), and verifies that
-we can use pyarrow's interface to move pointers from and to Rust.
-
-## Relevant literature
-
-* [Arrow's CDataInterface](https://arrow.apache.org/docs/format/CDataInterface.html)
-* [Rust's FFI](https://doc.rust-lang.org/nomicon/ffi.html)
-* [Pyarrow private binds](https://github.com/apache/arrow/blob/ae1d24efcc3f1ac2a876d8d9f544a34eb04ae874/python/pyarrow/array.pxi#L1226)
-* [PyO3](https://docs.rs/pyo3/0.12.1/pyo3/index.html)
-
-## How to develop
-
-```bash
-# prepare development environment (used to build wheel / install in development)
-python -m venv venv
-venv/bin/pip install maturin==0.8.2 toml==0.10.1 pyarrow==1.0.0
-```
-
-Whenever rust code changes (your changes or via git pull):
-
-```bash
-source venv/bin/activate
-maturin develop
-python -m unittest discover tests
-```
diff --git a/rust/arrow-pyarrow-integration-testing/pyproject.toml b/rust/arrow-pyarrow-integration-testing/pyproject.toml
deleted file mode 100644
index 27480690e06..00000000000
--- a/rust/arrow-pyarrow-integration-testing/pyproject.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[build-system]
-requires = ["maturin"]
-build-backend = "maturin"
diff --git a/rust/arrow-pyarrow-integration-testing/src/lib.rs b/rust/arrow-pyarrow-integration-testing/src/lib.rs
deleted file mode 100644
index 5b5462d9c15..00000000000
--- a/rust/arrow-pyarrow-integration-testing/src/lib.rs
+++ /dev/null
@@ -1,188 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This library demonstrates a minimal usage of Rust's C data interface to pass
-//! arrays from and to Python.
-
-use std::error;
-use std::fmt;
-use std::sync::Arc;
-
-use pyo3::exceptions::PyOSError;
-use pyo3::wrap_pyfunction;
-use pyo3::{libc::uintptr_t, prelude::*};
-
-use arrow::array::{make_array_from_raw, ArrayRef, Int64Array};
-use arrow::compute::kernels;
-use arrow::error::ArrowError;
-use arrow::ffi;
-
-/// an error that bridges ArrowError with a Python error
-#[derive(Debug)]
-enum PyO3ArrowError {
-    ArrowError(ArrowError),
-}
-
-impl fmt::Display for PyO3ArrowError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            PyO3ArrowError::ArrowError(ref e) => e.fmt(f),
-        }
-    }
-}
-
-impl error::Error for PyO3ArrowError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match *self {
-            // The cause is the underlying implementation error type. Is implicitly
-            // cast to the trait object `&error::Error`. This works because the
-            // underlying type already implements the `Error` trait.
-            PyO3ArrowError::ArrowError(ref e) => Some(e),
-        }
-    }
-}
-
-impl From<ArrowError> for PyO3ArrowError {
-    fn from(err: ArrowError) -> PyO3ArrowError {
-        PyO3ArrowError::ArrowError(err)
-    }
-}
-
-impl From<PyO3ArrowError> for PyErr {
-    fn from(err: PyO3ArrowError) -> PyErr {
-        PyOSError::new_err(err.to_string())
-    }
-}
-
-fn to_rust(ob: PyObject, py: Python) -> PyResult<ArrayRef> {
-    // prepare a pointer to receive the Array struct
-    let (array_pointer, schema_pointer) =
-        ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
-
-    // make the conversion through PyArrow's private API
-    // this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds
-    ob.call_method1(
-        py,
-        "_export_to_c",
-        (array_pointer as uintptr_t, schema_pointer as uintptr_t),
-    )?;
-
-    let array = unsafe { make_array_from_raw(array_pointer, schema_pointer) }
-        .map_err(|e| PyO3ArrowError::from(e))?;
-    Ok(array)
-}
-
-fn to_py(array: ArrayRef, py: Python) -> PyResult<PyObject> {
-    let (array_pointer, schema_pointer) =
-        array.to_raw().map_err(|e| PyO3ArrowError::from(e))?;
-
-    let pa = py.import("pyarrow")?;
-
-    let array = pa.getattr("Array")?.call_method1(
-        "_import_from_c",
-        (array_pointer as uintptr_t, schema_pointer as uintptr_t),
-    )?;
-    Ok(array.to_object(py))
-}
-
-/// Returns `array + array` of an int64 array.
-#[pyfunction]
-fn double(array: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // perform some operation
-    let array =
-        array
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .ok_or(PyO3ArrowError::ArrowError(ArrowError::ParseError(
-                "Expects an int64".to_string(),
-            )))?;
-    let array =
-        kernels::arithmetic::add(&array, &array).map_err(|e| PyO3ArrowError::from(e))?;
-    let array = Arc::new(array);
-
-    // export
-    to_py(array, py)
-}
-
-/// calls a lambda function that receives and returns an array
-/// whose result must be the array multiplied by two
-#[pyfunction]
-fn double_py(lambda: PyObject, py: Python) -> PyResult<bool> {
-    // create
-    let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
-    let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef;
-
-    // to py
-    let array = to_py(array, py)?;
-
-    let array = lambda.call1(py, (array,))?;
-
-    let array = to_rust(array, py)?;
-
-    Ok(array == expected)
-}
-
-/// Returns the substring
-#[pyfunction]
-fn substring(array: PyObject, start: i64, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // substring
-    let array = kernels::substring::substring(array.as_ref(), start, &None)
-        .map_err(|e| PyO3ArrowError::from(e))?;
-
-    // export
-    to_py(array, py)
-}
-
-/// Returns the concatenate
-#[pyfunction]
-fn concatenate(array: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // concat
-    let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])
-        .map_err(|e| PyO3ArrowError::from(e))?;
-
-    // export
-    to_py(array, py)
-}
-
-/// Converts to rust and back to python
-#[pyfunction]
-fn round_trip(array: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // export
-    to_py(array, py)
-}
-
-#[pymodule]
-fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
-    m.add_wrapped(wrap_pyfunction!(double))?;
-    m.add_wrapped(wrap_pyfunction!(double_py))?;
-    m.add_wrapped(wrap_pyfunction!(substring))?;
-    m.add_wrapped(wrap_pyfunction!(concatenate))?;
-    m.add_wrapped(wrap_pyfunction!(round_trip))?;
-    Ok(())
-}
diff --git a/rust/arrow-pyarrow-integration-testing/tests/test_sql.py b/rust/arrow-pyarrow-integration-testing/tests/test_sql.py
deleted file mode 100644
index c0de382057c..00000000000
--- a/rust/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# -*- coding: utf-8 -*-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import unittest
-
-import pyarrow
-import arrow_pyarrow_integration_testing
-
-
-class TestCase(unittest.TestCase):
-    def test_primitive_python(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array([1, 2, 3])
-        b = arrow_pyarrow_integration_testing.double(a)
-        self.assertEqual(b, pyarrow.array([2, 4, 6]))
-        del a
-        del b
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_primitive_rust(self):
-        """
-        Rust -> Python -> Rust
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-
-        def double(array):
-            array = array.to_pylist()
-            return pyarrow.array([x * 2 if x is not None else None for x in array])
-
-        is_correct = arrow_pyarrow_integration_testing.double_py(double)
-        self.assertTrue(is_correct)
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_string_python(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array(["a", None, "ccc"])
-        b = arrow_pyarrow_integration_testing.substring(a, 1)
-        self.assertEqual(b, pyarrow.array(["", None, "cc"]))
-        del a
-        del b
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_time32_python(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array([None, 1, 2], pyarrow.time32('s'))
-        b = arrow_pyarrow_integration_testing.concatenate(a)
-        expected = pyarrow.array([None, 1, 2] + [None, 1, 2], pyarrow.time32('s'))
-        self.assertEqual(b, expected)
-        del a
-        del b
-        del expected
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_list_array(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array([[], None, [1, 2], [4, 5, 6]], pyarrow.list_(pyarrow.int64()))
-        b = arrow_pyarrow_integration_testing.round_trip(a)
-
-        b.validate(full=True)
-        assert a.to_pylist() == b.to_pylist()
-        assert a.type == b.type
-        del a
-        del b
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-
-
diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
deleted file mode 100644
index 5ab1f8cc02b..00000000000
--- a/rust/arrow/Cargo.toml
+++ /dev/null
@@ -1,151 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow"
-version = "4.0.0-SNAPSHOT"
-description = "Rust implementation of Apache Arrow"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow" ]
-include = [
-    "benches/*.rs",
-    "src/**/*.rs",
-    "Cargo.toml",
-]
-edition = "2018"
-
-[lib]
-name = "arrow"
-path = "src/lib.rs"
-
-[dependencies]
-serde = { version = "1.0", features = ["rc"] }
-serde_derive = "1.0"
-serde_json = { version = "1.0", features = ["preserve_order"] }
-indexmap = "1.6"
-rand = "0.7"
-csv = "1.1"
-num = "0.3"
-regex = "1.3"
-lazy_static = "1.4"
-packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
-chrono = "0.4"
-flatbuffers = "^0.8"
-hex = "0.4"
-prettytable-rs = { version = "0.8.0", optional = true }
-lexical-core = "^0.7"
-
-[features]
-default = []
-avx512 = []
-simd = ["packed_simd"]
-prettyprint = ["prettytable-rs"]
-# this is only intended to be used in single-threaded programs: it verifies that
-# all allocated memory is being released (no memory leaks).
-# See README for details
-memory-check = []
-
-[dev-dependencies]
-criterion = "0.3"
-flate2 = "1"
-tempfile = "3"
-
-[build-dependencies]
-cfg_aliases = "0.1"
-
-[[bench]]
-name = "aggregate_kernels"
-harness = false
-
-[[bench]]
-name = "array_from_vec"
-harness = false
-
-[[bench]]
-name = "builder"
-harness = false
-
-[[bench]]
-name = "buffer_bit_ops"
-harness = false
-
-[[bench]]
-name = "boolean_kernels"
-harness = false
-
-[[bench]]
-name = "arithmetic_kernels"
-harness = false
-
-[[bench]]
-name = "cast_kernels"
-harness = false
-
-[[bench]]
-name = "comparison_kernels"
-harness = false
-
-[[bench]]
-name = "filter_kernels"
-harness = false
-
-[[bench]]
-name = "take_kernels"
-harness = false
-
-[[bench]]
-name = "length_kernel"
-harness = false
-
-[[bench]]
-name = "bit_length_kernel"
-harness = false
-
-[[bench]]
-name = "sort_kernel"
-harness = false
-
-[[bench]]
-name = "csv_writer"
-harness = false
-
-[[bench]]
-name = "json_reader"
-harness = false
-
-[[bench]]
-name = "equal"
-harness = false
-
-[[bench]]
-name = "array_slice"
-harness = false
-
-[[bench]]
-name = "concatenate_kernel"
-harness = false
-
-[[bench]]
-name = "mutable_array"
-harness = false
-
-[[bench]]
-name = "buffer_create"
-harness = false
diff --git a/rust/arrow/README.md b/rust/arrow/README.md
deleted file mode 100644
index ad085246a9f..00000000000
--- a/rust/arrow/README.md
+++ /dev/null
@@ -1,208 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Native Rust implementation of Apache Arrow
-
-[![Coverage Status](https://coveralls.io/repos/github/apache/arrow/badge.svg)](https://coveralls.io/github/apache/arrow)
-
-This crate contains a native Rust implementation of the [Arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html).
-
-## Developer's guide
-
-Common information for all Rust libraries in this project, including
-testing, code formatting, and lints, can be found in the main Arrow
-Rust [README.md](../README.md).
-
-Please refer to [lib.rs](src/lib.rs) for an introduction to this
-specific crate and its current functionality.
-
-### How to check memory allocations
-
-This crate heavily uses `unsafe` due to how memory is allocated in cache lines.
-We have a small tool to verify that this crate does not leak memory (beyond what the compiler already does)
-
-Run it with
-
-```bash
-cargo test --features memory-check --lib -- --test-threads 1
-```
-
-This runs all unit-tests on a single thread and counts all allocations and de-allocations.
-
-## Examples
-
-The examples folder shows how to construct some different types of Arrow
-arrays, including dynamic arrays created at runtime.
-
-Examples can be run using the `cargo run --example` command. For example:
-
-```bash
-cargo run --example builders
-cargo run --example dynamic_types
-cargo run --example read_csv
-```
-
-## IPC
-
-The expected flatc version is 1.12.0+, built from [flatbuffers](https://github.com/google/flatbuffers)
-master at fixed commit ID, by regen.sh.
-
-The IPC flatbuffer code was generated by running this command from the root of the project:
-
-```bash
-./regen.sh
-```
-
-The above script will run the `flatc` compiler and perform some adjustments to the source code:
-
-- Replace `type__` with `type_`
-- Remove `org::apache::arrow::flatbuffers` namespace
-- Add includes to each generated file
-
-## Features
-
-Arrow uses the following features:
-
-* `simd` - Arrow uses the [packed_simd](https://crates.io/crates/packed_simd) crate to optimize many of the
- implementations in the [compute](https://github.com/apache/arrow/tree/master/rust/arrow/src/compute)
- module using SIMD intrinsics. These optimizations are turned *off* by default.
- If the `simd` feature is enabled, an unstable version of Rust is required (we test with `nightly-2020-11-24`)
-* `flight` which contains useful functions to convert between the Flight wire format and Arrow data
-* `prettyprint` which is a utility for printing record batches
-
-Other than `simd` all the other features are enabled by default. Disabling `prettyprint` might be necessary in order to
-compile Arrow to the `wasm32-unknown-unknown` WASM target.
-
-## Guidelines in usage of `unsafe`
-
-[`unsafe`](https://doc.rust-lang.org/book/ch19-01-unsafe-rust.html) has a high maintenance cost because debugging and testing it is difficult, time consuming, often requires external tools (e.g. `valgrind`), and requires a higher-than-usual attention to details. Undefined behavior is particularly difficult to identify and test, and usage of `unsafe` is the [primary cause of undefined behavior](https://doc.rust-lang.org/reference/behavior-considered-undefined.html) in a program written in Rust.
-For two real world examples of where `unsafe` has consumed time in the past in this project see [#8545](https://github.com/apache/arrow/pull/8645) and [8829](https://github.com/apache/arrow/pull/8829)
-This crate only accepts the usage of `unsafe` code upon careful consideration, and strives to avoid it to the largest possible extent.
-
-### When can `unsafe` be used?
-
-Generally, `unsafe` should only be used when a `safe` counterpart is not available and there is no `safe` way to achieve additional performance in that area. The following is a summary of the current components of the crate that require `unsafe`:
-
-* alloc, dealloc and realloc of buffers along cache lines
-* Interpreting bytes as certain rust types, for access, representation and compute
-* Foreign interfaces (C data interface)
-* Inter-process communication (IPC)
-* SIMD
-* Performance (e.g. omit bounds checks, use of pointers to avoid bound checks)
-
-#### cache-line aligned memory management
-
-The arrow format recommends storing buffers aligned with cache lines, and this crate adopts this behavior.
-However, Rust's global allocator does not allocate memory aligned with cache-lines. As such, many of the low-level operations related to memory management require `unsafe`.
-
-#### Interpreting bytes
-
-The arrow format is specified in bytes (`u8`), which can be logically represented as certain types
-depending on the `DataType`.
-For many operations, such as access, representation, numerical computation and string manipulation,
-it is often necessary to interpret bytes as other physical types (e.g. `i32`).
-
-Usage of `unsafe` for the purpose of interpreting bytes in their corresponding type (according to the arrow specification) is allowed. Specifically, the pointer to the byte slice must be aligned to the type that it intends to represent and the length of the slice is a multiple of the size of the target type of the transmutation.
-
-#### FFI
-
-The arrow format declares an ABI for zero-copy from and to libraries that implement the specification
-(foreign interfaces). In Rust, receiving and sending pointers via FFI requires usage of `unsafe` due to
-the impossibility of the compiler to derive the invariants (such as lifetime, null pointers, and pointer alignment) from the source code alone as they are part of the FFI contract.
-
-#### IPC
-
-The arrow format declares a IPC protocol, which this crate supports. IPC is equivalent to a FFI in that the rust compiler can't reason about the contract's invariants.
-
-#### SIMD
-
-The API provided by the `packed_simd` library is currently `unsafe`. However, SIMD offers a significant performance improvement over non-SIMD operations.
-
-#### Performance
-
-Some operations are significantly faster when `unsafe` is used.
-
-A common usage of `unsafe` is to offer an API to access the `i`th element of an array (e.g. `UInt32Array`).
-This requires accessing the values buffer e.g. `array.buffers()[0]`, picking the slice
-`[i * size_of<i32>(), (i + 1) * size_of<i32>()]`, and then transmuting it to `i32`. In safe Rust,
-this operation requires boundary checks that are detrimental to performance.
-
-Usage of `unsafe` for performance reasons is justified only when all other alternatives have been exhausted and the performance benefits are sufficiently large (e.g. >~10%).
-
-### Considerations when introducing `unsafe`
-
-Usage of `unsafe` in this crate *must*:
-
-* not expose a public API as `safe` when there are necessary invariants for that API to be defined behavior.
-* have code documentation for why `safe` is not used / possible
-* have code documentation about which invariant the user needs to enforce to ensure [soundness](https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#soundness-of-code--of-a-library), or which
-* invariant is being preserved.
-* if applicable, use `debug_assert`s to relevant invariants (e.g. bound checks)
-
-Example of code documentation:
-
-```rust
-// JUSTIFICATION
-//  Benefit
-//      Describe the benefit of using unsafe. E.g.
-//      "30% performance degradation if the safe counterpart is used, see bench X."
-//  Soundness
-//      Describe why the code remains sound (according to the definition of rust's unsafe code guidelines). E.g.
-//      "We bounded check these values at initialization and the array is immutable."
-let ... = unsafe { ... };
-```
-
-When adding this documentation to existing code that is not sound and cannot trivially be fixed, we should file
-specific JIRA issues and reference them in these code comments. For example:
-
-```rust
-//  Soundness
-//      This is not sound because .... see https://issues.apache.org/jira/browse/ARROW-nnnnn
-```
-
-# Publishing to crates.io
-
-An Arrow committer can publish this crate after an official project release has
-been made to crates.io using the following instructions.
-
-Follow [these
-instructions](https://doc.rust-lang.org/cargo/reference/publishing.html) to
-create an account and login to crates.io before asking to be added as an owner
-of the [arrow crate](https://crates.io/crates/arrow).
-
-Checkout the tag for the version to be released. For example:
-
-```bash
-git checkout apache-arrow-0.11.0
-```
-
-If the Cargo.toml in this tag already contains `version = "0.11.0"` (as it
-should) then the crate can be published with the following command:
-
-```bash
-cargo publish
-```
-
-If the Cargo.toml does not have the correct version then it will be necessary
-to modify it manually. Since there is now a modified file locally that is not
-committed to GitHub it will be necessary to use the following command.
-
-```bash
-cargo publish --allow-dirty
-```
diff --git a/rust/arrow/benches/aggregate_kernels.rs b/rust/arrow/benches/aggregate_kernels.rs
deleted file mode 100644
index 1724b7349c5..00000000000
--- a/rust/arrow/benches/aggregate_kernels.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::compute::kernels::aggregate::*;
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn bench_sum(arr_a: &Float32Array) {
-    criterion::black_box(sum(&arr_a).unwrap());
-}
-
-fn bench_min(arr_a: &Float32Array) {
-    criterion::black_box(min(&arr_a).unwrap());
-}
-
-fn bench_max(arr_a: &Float32Array) {
-    criterion::black_box(max(&arr_a).unwrap());
-}
-
-fn bench_min_string(arr_a: &StringArray) {
-    criterion::black_box(min_string(&arr_a).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.0);
-
-    c.bench_function("sum 512", |b| b.iter(|| bench_sum(&arr_a)));
-    c.bench_function("min 512", |b| b.iter(|| bench_min(&arr_a)));
-    c.bench_function("max 512", |b| b.iter(|| bench_max(&arr_a)));
-
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.5);
-
-    c.bench_function("sum nulls 512", |b| b.iter(|| bench_sum(&arr_a)));
-    c.bench_function("min nulls 512", |b| b.iter(|| bench_min(&arr_a)));
-    c.bench_function("max nulls 512", |b| b.iter(|| bench_max(&arr_a)));
-
-    let arr_b = create_string_array::<i32>(512, 0.0);
-    c.bench_function("min string 512", |b| b.iter(|| bench_min_string(&arr_b)));
-
-    let arr_b = create_string_array::<i32>(512, 0.5);
-    c.bench_function("min nulls string 512", |b| {
-        b.iter(|| bench_min_string(&arr_b))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs
deleted file mode 100644
index 721157e2846..00000000000
--- a/rust/arrow/benches/arithmetic_kernels.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-use rand::Rng;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::compute::kernels::limit::*;
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-use arrow::{compute::kernels::arithmetic::*, util::test_util::seedable_rng};
-
-fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
-    let null_density = if with_nulls { 0.5 } else { 0.0 };
-    let array = create_primitive_array::<Float32Type>(size, null_density);
-    Arc::new(array)
-}
-
-fn bench_add(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(add(arr_a, arr_b).unwrap());
-}
-
-fn bench_subtract(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(subtract(&arr_a, &arr_b).unwrap());
-}
-
-fn bench_multiply(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(multiply(&arr_a, &arr_b).unwrap());
-}
-
-fn bench_divide(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(divide(&arr_a, &arr_b).unwrap());
-}
-
-fn bench_divide_scalar(array: &ArrayRef, divisor: f32) {
-    let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(divide_scalar(&array, divisor).unwrap());
-}
-
-fn bench_limit(arr_a: &ArrayRef, max: usize) {
-    criterion::black_box(limit(arr_a, max));
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_array(512, false);
-    let arr_b = create_array(512, false);
-    let scalar = seedable_rng().gen();
-
-    c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
-    c.bench_function("subtract 512", |b| {
-        b.iter(|| bench_subtract(&arr_a, &arr_b))
-    });
-    c.bench_function("multiply 512", |b| {
-        b.iter(|| bench_multiply(&arr_a, &arr_b))
-    });
-    c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
-    c.bench_function("divide_scalar 512", |b| {
-        b.iter(|| bench_divide_scalar(&arr_a, scalar))
-    });
-    c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a, 512)));
-
-    let arr_a_nulls = create_array(512, false);
-    let arr_b_nulls = create_array(512, false);
-    c.bench_function("add_nulls_512", |b| {
-        b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls))
-    });
-    c.bench_function("divide_nulls_512", |b| {
-        b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls))
-    });
-    c.bench_function("divide_scalar_nulls_512", |b| {
-        b.iter(|| bench_divide_scalar(&arr_a_nulls, scalar))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/array_from_vec.rs b/rust/arrow/benches/array_from_vec.rs
deleted file mode 100644
index 7740c6bc34e..00000000000
--- a/rust/arrow/benches/array_from_vec.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::buffer::Buffer;
-use arrow::datatypes::*;
-use std::{convert::TryFrom, sync::Arc};
-
-fn array_from_vec(n: usize) {
-    let mut v: Vec<u8> = Vec::with_capacity(n);
-    for i in 0..n {
-        v.push((i & 0xffff) as u8);
-    }
-    let arr_data = ArrayDataBuilder::new(DataType::Int32)
-        .add_buffer(Buffer::from(v))
-        .build();
-    criterion::black_box(Int32Array::from(arr_data));
-}
-
-fn array_string_from_vec(n: usize) {
-    let mut v: Vec<Option<&str>> = Vec::with_capacity(n);
-    for i in 0..n {
-        if i % 2 == 0 {
-            v.push(Some("hello world"));
-        } else {
-            v.push(None);
-        }
-    }
-    criterion::black_box(StringArray::from(v));
-}
-
-fn struct_array_values(
-    n: usize,
-) -> (
-    &'static str,
-    Vec<Option<&'static str>>,
-    &'static str,
-    Vec<Option<i32>>,
-) {
-    let mut strings: Vec<Option<&str>> = Vec::with_capacity(n);
-    let mut ints: Vec<Option<i32>> = Vec::with_capacity(n);
-    for _ in 0..n / 4 {
-        strings.extend_from_slice(&[Some("joe"), None, None, Some("mark")]);
-        ints.extend_from_slice(&[Some(1), Some(2), None, Some(4)]);
-    }
-    ("f1", strings, "f2", ints)
-}
-
-fn struct_array_from_vec(
-    field1: &str,
-    strings: &[Option<&str>],
-    field2: &str,
-    ints: &[Option<i32>],
-) {
-    let strings: ArrayRef = Arc::new(StringArray::from(strings.to_owned()));
-    let ints: ArrayRef = Arc::new(Int32Array::from(ints.to_owned()));
-
-    criterion::black_box(
-        StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap(),
-    );
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("array_from_vec 128", |b| b.iter(|| array_from_vec(128)));
-    c.bench_function("array_from_vec 256", |b| b.iter(|| array_from_vec(256)));
-    c.bench_function("array_from_vec 512", |b| b.iter(|| array_from_vec(512)));
-
-    c.bench_function("array_string_from_vec 128", |b| {
-        b.iter(|| array_string_from_vec(128))
-    });
-    c.bench_function("array_string_from_vec 256", |b| {
-        b.iter(|| array_string_from_vec(256))
-    });
-    c.bench_function("array_string_from_vec 512", |b| {
-        b.iter(|| array_string_from_vec(512))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(128);
-    c.bench_function("struct_array_from_vec 128", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(256);
-    c.bench_function("struct_array_from_vec 256", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(512);
-    c.bench_function("struct_array_from_vec 512", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(1024);
-    c.bench_function("struct_array_from_vec 1024", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/array_slice.rs b/rust/arrow/benches/array_slice.rs
deleted file mode 100644
index a535c80d217..00000000000
--- a/rust/arrow/benches/array_slice.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use std::sync::Arc;
-
-fn create_array_slice(array: &ArrayRef, length: usize) -> ArrayRef {
-    array.slice(0, length)
-}
-
-fn create_array_with_nulls(size: usize) -> ArrayRef {
-    let array: Float64Array = (0..size)
-        .map(|i| if i % 2 == 0 { Some(1.0) } else { None })
-        .collect();
-    Arc::new(array)
-}
-
-fn array_slice_benchmark(c: &mut Criterion) {
-    let array = create_array_with_nulls(4096);
-    c.bench_function("array_slice 128", |b| {
-        b.iter(|| create_array_slice(&array, 128))
-    });
-    c.bench_function("array_slice 512", |b| {
-        b.iter(|| create_array_slice(&array, 512))
-    });
-    c.bench_function("array_slice 2048", |b| {
-        b.iter(|| create_array_slice(&array, 2048))
-    });
-}
-
-criterion_group!(benches, array_slice_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/bit_length_kernel.rs b/rust/arrow/benches/bit_length_kernel.rs
deleted file mode 100644
index 51d31345712..00000000000
--- a/rust/arrow/benches/bit_length_kernel.rs
+++ /dev/null
@@ -1,46 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::{array::*, compute::kernels::length::bit_length};
-
-fn bench_bit_length(array: &StringArray) {
-    criterion::black_box(bit_length(array).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-        [&v[..], &v[..]].concat()
-    }
-
-    // double ["hello", " ", "world", "!"] 10 times
-    let mut values = vec!["one", "on", "o", ""];
-    for _ in 0..10 {
-        values = double_vec(values);
-    }
-    let array = StringArray::from(values);
-
-    c.bench_function("bit_length", |b| b.iter(|| bench_bit_length(&array)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/boolean_kernels.rs b/rust/arrow/benches/boolean_kernels.rs
deleted file mode 100644
index 6559c4e4caf..00000000000
--- a/rust/arrow/benches/boolean_kernels.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use arrow::util::bench_util::create_boolean_array;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::kernels::boolean as boolean_kernels;
-
-fn bench_and(lhs: &BooleanArray, rhs: &BooleanArray) {
-    criterion::black_box(boolean_kernels::and(lhs, rhs).unwrap());
-}
-
-fn bench_or(lhs: &BooleanArray, rhs: &BooleanArray) {
-    criterion::black_box(boolean_kernels::or(lhs, rhs).unwrap());
-}
-
-fn bench_not(array: &BooleanArray) {
-    criterion::black_box(boolean_kernels::not(&array).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let size = 2usize.pow(15);
-    let array1 = create_boolean_array(size, 0.0, 0.5);
-    let array2 = create_boolean_array(size, 0.0, 0.5);
-    c.bench_function("and", |b| b.iter(|| bench_and(&array1, &array2)));
-    c.bench_function("or", |b| b.iter(|| bench_or(&array1, &array2)));
-    c.bench_function("not", |b| b.iter(|| bench_not(&array1)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/buffer_bit_ops.rs b/rust/arrow/benches/buffer_bit_ops.rs
deleted file mode 100644
index 063f39c9272..00000000000
--- a/rust/arrow/benches/buffer_bit_ops.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::buffer::{Buffer, MutableBuffer};
-
-///  Helper function to create arrays
-fn create_buffer(size: usize) -> Buffer {
-    let mut result = MutableBuffer::new(size).with_bitset(size, false);
-
-    for i in 0..size {
-        result.as_slice_mut()[i] = 0b01010101 << i << (i % 4);
-    }
-
-    result.into()
-}
-
-fn bench_buffer_and(left: &Buffer, right: &Buffer) {
-    criterion::black_box((left & right).unwrap());
-}
-
-fn bench_buffer_or(left: &Buffer, right: &Buffer) {
-    criterion::black_box((left | right).unwrap());
-}
-
-fn bit_ops_benchmark(c: &mut Criterion) {
-    let left = create_buffer(512 * 10);
-    let right = create_buffer(512 * 10);
-
-    c.bench_function("buffer_bit_ops and", |b| {
-        b.iter(|| bench_buffer_and(&left, &right))
-    });
-
-    c.bench_function("buffer_bit_ops or", |b| {
-        b.iter(|| bench_buffer_or(&left, &right))
-    });
-}
-
-criterion_group!(benches, bit_ops_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/buffer_create.rs b/rust/arrow/benches/buffer_create.rs
deleted file mode 100644
index d628e031ce6..00000000000
--- a/rust/arrow/benches/buffer_create.rs
+++ /dev/null
@@ -1,190 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use arrow::util::test_util::seedable_rng;
-use criterion::Criterion;
-use rand::distributions::Uniform;
-use rand::Rng;
-
-extern crate arrow;
-
-use arrow::{
-    buffer::{Buffer, MutableBuffer},
-    datatypes::ToByteSlice,
-};
-
-fn mutable_buffer_from_iter(data: &[Vec<bool>]) -> Vec<Buffer> {
-    criterion::black_box(
-        data.iter()
-            .map(|vec| vec.iter().copied().collect::<MutableBuffer>().into())
-            .collect::<Vec<_>>(),
-    )
-}
-
-fn buffer_from_iter(data: &[Vec<bool>]) -> Vec<Buffer> {
-    criterion::black_box(
-        data.iter()
-            .map(|vec| vec.iter().copied().collect::<Buffer>())
-            .collect::<Vec<_>>(),
-    )
-}
-
-fn mutable_buffer_iter_bitset(data: &[Vec<bool>]) -> Vec<Buffer> {
-    criterion::black_box({
-        data.iter()
-            .map(|datum| {
-                let mut result = MutableBuffer::new((data.len() + 7) / 8)
-                    .with_bitset(datum.len(), false);
-                for (i, value) in datum.iter().enumerate() {
-                    if *value {
-                        unsafe {
-                            arrow::util::bit_util::set_bit_raw(result.as_mut_ptr(), i);
-                        }
-                    }
-                }
-                result.into()
-            })
-            .collect::<Vec<_>>()
-    })
-}
-
-fn mutable_iter_extend_from_slice(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut result = MutableBuffer::new(capacity);
-
-        data.iter().for_each(|vec| {
-            vec.iter()
-                .for_each(|elem| result.extend_from_slice(elem.to_byte_slice()))
-        });
-
-        result.into()
-    })
-}
-
-fn mutable_buffer(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut result = MutableBuffer::new(capacity);
-
-        data.iter().for_each(|vec| result.extend_from_slice(vec));
-
-        result.into()
-    })
-}
-
-fn mutable_buffer_extend(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut result = MutableBuffer::new(capacity);
-
-        data.iter()
-            .for_each(|vec| result.extend(vec.iter().copied()));
-
-        result.into()
-    })
-}
-
-fn from_slice(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut a = Vec::<u32>::with_capacity(capacity);
-
-        data.iter().for_each(|vec| a.extend(vec));
-
-        Buffer::from(a.to_byte_slice())
-    })
-}
-
-fn create_data(size: usize) -> Vec<Vec<u32>> {
-    let rng = &mut seedable_rng();
-    let range = Uniform::new(0, 33);
-
-    (0..size)
-        .map(|_| {
-            let size = rng.sample(range);
-            seedable_rng()
-                .sample_iter(&range)
-                .take(size as usize)
-                .collect()
-        })
-        .collect()
-}
-
-fn create_data_bool(size: usize) -> Vec<Vec<bool>> {
-    let rng = &mut seedable_rng();
-    let range = Uniform::new(0, 33);
-
-    (0..size)
-        .map(|_| {
-            let size = rng.sample(range);
-            seedable_rng()
-                .sample_iter(&range)
-                .take(size as usize)
-                .map(|x| x > 15)
-                .collect()
-        })
-        .collect()
-}
-fn benchmark(c: &mut Criterion) {
-    let size = 2usize.pow(15);
-    let data = create_data(size);
-
-    let bool_data = create_data_bool(size);
-    let cap = data.iter().map(|i| i.len()).sum();
-    let byte_cap = cap * std::mem::size_of::<u32>();
-
-    c.bench_function("mutable iter extend_from_slice", |b| {
-        b.iter(|| {
-            mutable_iter_extend_from_slice(
-                criterion::black_box(&data),
-                criterion::black_box(0),
-            )
-        })
-    });
-    c.bench_function("mutable", |b| {
-        b.iter(|| mutable_buffer(criterion::black_box(&data), criterion::black_box(0)))
-    });
-
-    c.bench_function("mutable extend", |b| {
-        b.iter(|| mutable_buffer_extend(&data, 0))
-    });
-
-    c.bench_function("mutable prepared", |b| {
-        b.iter(|| {
-            mutable_buffer(criterion::black_box(&data), criterion::black_box(byte_cap))
-        })
-    });
-
-    c.bench_function("from_slice", |b| {
-        b.iter(|| from_slice(criterion::black_box(&data), criterion::black_box(0)))
-    });
-    c.bench_function("from_slice prepared", |b| {
-        b.iter(|| from_slice(criterion::black_box(&data), criterion::black_box(cap)))
-    });
-
-    c.bench_function("MutableBuffer iter bitset", |b| {
-        b.iter(|| mutable_buffer_iter_bitset(criterion::black_box(&bool_data)))
-    });
-    c.bench_function("MutableBuffer::from_iter bool", |b| {
-        b.iter(|| mutable_buffer_from_iter(criterion::black_box(&bool_data)))
-    });
-    c.bench_function("Buffer::from_iter bool", |b| {
-        b.iter(|| buffer_from_iter(criterion::black_box(&bool_data)))
-    });
-}
-
-criterion_group!(benches, benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/builder.rs b/rust/arrow/benches/builder.rs
deleted file mode 100644
index fd9f319e397..00000000000
--- a/rust/arrow/benches/builder.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-extern crate criterion;
-extern crate rand;
-
-use std::mem::size_of;
-
-use criterion::*;
-use rand::distributions::Standard;
-
-use arrow::array::*;
-use arrow::util::test_util::seedable_rng;
-use rand::Rng;
-
-// Build arrays with 512k elements.
-const BATCH_SIZE: usize = 8 << 10;
-const NUM_BATCHES: usize = 64;
-
-fn bench_primitive(c: &mut Criterion) {
-    let data: [i64; BATCH_SIZE] = [100; BATCH_SIZE];
-
-    let mut group = c.benchmark_group("bench_primitive");
-    group.throughput(Throughput::Bytes(
-        ((data.len() * NUM_BATCHES * size_of::<i64>()) as u32).into(),
-    ));
-    group.bench_function("bench_primitive", |b| {
-        b.iter(|| {
-            let mut builder = Int64Builder::new(64);
-            for _ in 0..NUM_BATCHES {
-                let _ = black_box(builder.append_slice(&data[..]));
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-fn bench_primitive_nulls(c: &mut Criterion) {
-    let mut group = c.benchmark_group("bench_primitive_nulls");
-    group.bench_function("bench_primitive_nulls", |b| {
-        b.iter(|| {
-            let mut builder = UInt8Builder::new(64);
-            for _ in 0..NUM_BATCHES * BATCH_SIZE {
-                let _ = black_box(builder.append_null());
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-fn bench_bool(c: &mut Criterion) {
-    let data: Vec<bool> = seedable_rng()
-        .sample_iter(&Standard)
-        .take(BATCH_SIZE)
-        .collect();
-    let data_len = data.len();
-
-    let mut group = c.benchmark_group("bench_bool");
-    group.throughput(Throughput::Bytes(
-        ((data_len * NUM_BATCHES * size_of::<bool>()) as u32).into(),
-    ));
-    group.bench_function("bench_bool", |b| {
-        b.iter(|| {
-            let mut builder = BooleanBuilder::new(64);
-            for _ in 0..NUM_BATCHES {
-                let _ = black_box(builder.append_slice(&data[..]));
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-fn bench_string(c: &mut Criterion) {
-    const SAMPLE_STRING: &str = "sample string";
-    let mut group = c.benchmark_group("bench_primitive");
-    group.throughput(Throughput::Bytes(
-        ((BATCH_SIZE * NUM_BATCHES * SAMPLE_STRING.len()) as u32).into(),
-    ));
-    group.bench_function("bench_string", |b| {
-        b.iter(|| {
-            let mut builder = StringBuilder::new(64);
-            for _ in 0..NUM_BATCHES * BATCH_SIZE {
-                let _ = black_box(builder.append_value(SAMPLE_STRING));
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-criterion_group!(
-    benches,
-    bench_primitive,
-    bench_primitive_nulls,
-    bench_bool,
-    bench_string
-);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/cast_kernels.rs b/rust/arrow/benches/cast_kernels.rs
deleted file mode 100644
index d164e1facfd..00000000000
--- a/rust/arrow/benches/cast_kernels.rs
+++ /dev/null
@@ -1,185 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-use rand::distributions::{Distribution, Standard, Uniform};
-use rand::Rng;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::cast;
-use arrow::datatypes::*;
-use arrow::util::bench_util::*;
-use arrow::util::test_util::seedable_rng;
-
-fn build_array<T: ArrowPrimitiveType>(size: usize) -> ArrayRef
-where
-    Standard: Distribution<T::Native>,
-{
-    let array = create_primitive_array::<T>(size, 0.1);
-    Arc::new(array)
-}
-
-fn build_utf8_date_array(size: usize, with_nulls: bool) -> ArrayRef {
-    use chrono::NaiveDate;
-
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-    let mut builder = StringBuilder::new(size);
-    let range = Uniform::new(0, 737776);
-
-    for _ in 0..size {
-        if with_nulls && rng.gen::<f32>() > 0.8 {
-            builder.append_null().unwrap();
-        } else {
-            let string = NaiveDate::from_num_days_from_ce(rng.sample(range))
-                .format("%Y-%m-%d")
-                .to_string();
-            builder.append_value(&string).unwrap();
-        }
-    }
-    Arc::new(builder.finish())
-}
-
-fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
-    use chrono::NaiveDateTime;
-
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-    let mut builder = StringBuilder::new(size);
-    let range = Uniform::new(0, 1608071414123);
-
-    for _ in 0..size {
-        if with_nulls && rng.gen::<f32>() > 0.8 {
-            builder.append_null().unwrap();
-        } else {
-            let string = NaiveDateTime::from_timestamp(rng.sample(range), 0)
-                .format("%Y-%m-%dT%H:%M:%S")
-                .to_string();
-            builder.append_value(&string).unwrap();
-        }
-    }
-    Arc::new(builder.finish())
-}
-
-// cast array from specified primitive array type to desired data type
-fn cast_array(array: &ArrayRef, to_type: DataType) {
-    criterion::black_box(cast(array, &to_type).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let i32_array = build_array::<Int32Type>(512);
-    let i64_array = build_array::<Int64Type>(512);
-    let f32_array = build_array::<Float32Type>(512);
-    let f32_utf8_array = cast(&build_array::<Float32Type>(512), &DataType::Utf8).unwrap();
-
-    let f64_array = build_array::<Float64Type>(512);
-    let date64_array = build_array::<Date64Type>(512);
-    let date32_array = build_array::<Date32Type>(512);
-    let time32s_array = build_array::<Time32SecondType>(512);
-    let time64ns_array = build_array::<Time64NanosecondType>(512);
-    let time_ns_array = build_array::<TimestampNanosecondType>(512);
-    let time_ms_array = build_array::<TimestampMillisecondType>(512);
-    let utf8_date_array = build_utf8_date_array(512, true);
-    let utf8_date_time_array = build_utf8_date_time_array(512, true);
-
-    c.bench_function("cast int32 to int32 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Int32))
-    });
-    c.bench_function("cast int32 to uint32 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::UInt32))
-    });
-    c.bench_function("cast int32 to float32 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Float32))
-    });
-    c.bench_function("cast int32 to float64 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Float64))
-    });
-    c.bench_function("cast int32 to int64 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Int64))
-    });
-    c.bench_function("cast float32 to int32 512", |b| {
-        b.iter(|| cast_array(&f32_array, DataType::Int32))
-    });
-    c.bench_function("cast float64 to float32 512", |b| {
-        b.iter(|| cast_array(&f64_array, DataType::Float32))
-    });
-    c.bench_function("cast float64 to uint64 512", |b| {
-        b.iter(|| cast_array(&f64_array, DataType::UInt64))
-    });
-    c.bench_function("cast int64 to int32 512", |b| {
-        b.iter(|| cast_array(&i64_array, DataType::Int32))
-    });
-    c.bench_function("cast date64 to date32 512", |b| {
-        b.iter(|| cast_array(&date64_array, DataType::Date32))
-    });
-    c.bench_function("cast date32 to date64 512", |b| {
-        b.iter(|| cast_array(&date32_array, DataType::Date64))
-    });
-    c.bench_function("cast time32s to time32ms 512", |b| {
-        b.iter(|| cast_array(&time32s_array, DataType::Time32(TimeUnit::Millisecond)))
-    });
-    c.bench_function("cast time32s to time64us 512", |b| {
-        b.iter(|| cast_array(&time32s_array, DataType::Time64(TimeUnit::Microsecond)))
-    });
-    c.bench_function("cast time64ns to time32s 512", |b| {
-        b.iter(|| cast_array(&time64ns_array, DataType::Time32(TimeUnit::Second)))
-    });
-    c.bench_function("cast timestamp_ns to timestamp_s 512", |b| {
-        b.iter(|| {
-            cast_array(
-                &time_ns_array,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            )
-        })
-    });
-    c.bench_function("cast timestamp_ms to timestamp_ns 512", |b| {
-        b.iter(|| {
-            cast_array(
-                &time_ms_array,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            )
-        })
-    });
-    c.bench_function("cast utf8 to f32", |b| {
-        b.iter(|| cast_array(&f32_utf8_array, DataType::Float32))
-    });
-    c.bench_function("cast i64 to string 512", |b| {
-        b.iter(|| cast_array(&i64_array, DataType::Utf8))
-    });
-    c.bench_function("cast f32 to string 512", |b| {
-        b.iter(|| cast_array(&f32_array, DataType::Utf8))
-    });
-
-    c.bench_function("cast timestamp_ms to i64 512", |b| {
-        b.iter(|| cast_array(&time_ms_array, DataType::Int64))
-    });
-    c.bench_function("cast utf8 to date32 512", |b| {
-        b.iter(|| cast_array(&utf8_date_array, DataType::Date32))
-    });
-    c.bench_function("cast utf8 to date64 512", |b| {
-        b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/comparison_kernels.rs b/rust/arrow/benches/comparison_kernels.rs
deleted file mode 100644
index a3df556efcf..00000000000
--- a/rust/arrow/benches/comparison_kernels.rs
+++ /dev/null
@@ -1,201 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::compute::*;
-use arrow::datatypes::ArrowNumericType;
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn bench_eq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    eq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_eq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    eq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_neq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    neq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_neq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    neq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_lt<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    lt(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_lt_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    lt_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_lt_eq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    lt_eq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_lt_eq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    lt_eq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_gt<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    gt(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_gt_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    gt_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_gt_eq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    gt_eq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_gt_eq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    gt_eq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_like_utf8_scalar(arr_a: &StringArray, value_b: &str) {
-    like_utf8_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_nlike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
-    nlike_utf8_scalar(criterion::black_box(arr_a), criterion::black_box(value_b))
-        .unwrap();
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let size = 65536;
-    let arr_a = create_primitive_array_with_seed::<Float32Type>(size, 0.0, 42);
-    let arr_b = create_primitive_array_with_seed::<Float32Type>(size, 0.0, 43);
-
-    let arr_string = create_string_array::<i32>(size, 0.0);
-
-    c.bench_function("eq Float32", |b| b.iter(|| bench_eq(&arr_a, &arr_b)));
-    c.bench_function("eq scalar Float32", |b| {
-        b.iter(|| bench_eq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("neq Float32", |b| b.iter(|| bench_neq(&arr_a, &arr_b)));
-    c.bench_function("neq scalar Float32", |b| {
-        b.iter(|| bench_neq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("lt Float32", |b| b.iter(|| bench_lt(&arr_a, &arr_b)));
-    c.bench_function("lt scalar Float32", |b| {
-        b.iter(|| bench_lt_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("lt_eq Float32", |b| b.iter(|| bench_lt_eq(&arr_a, &arr_b)));
-    c.bench_function("lt_eq scalar Float32", |b| {
-        b.iter(|| bench_lt_eq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("gt Float32", |b| b.iter(|| bench_gt(&arr_a, &arr_b)));
-    c.bench_function("gt scalar Float32", |b| {
-        b.iter(|| bench_gt_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("gt_eq Float32", |b| b.iter(|| bench_gt_eq(&arr_a, &arr_b)));
-    c.bench_function("gt_eq scalar Float32", |b| {
-        b.iter(|| bench_gt_eq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("like_utf8 scalar equals", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx"))
-    });
-
-    c.bench_function("like_utf8 scalar contains", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx%"))
-    });
-
-    c.bench_function("like_utf8 scalar ends with", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx%"))
-    });
-
-    c.bench_function("like_utf8 scalar starts with", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx"))
-    });
-
-    c.bench_function("like_utf8 scalar complex", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "%xx_xx%xxx"))
-    });
-
-    c.bench_function("nlike_utf8 scalar equals", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx"))
-    });
-
-    c.bench_function("nlike_utf8 scalar contains", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx%"))
-    });
-
-    c.bench_function("nlike_utf8 scalar ends with", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx%"))
-    });
-
-    c.bench_function("nlike_utf8 scalar starts with", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx"))
-    });
-
-    c.bench_function("nlike_utf8 scalar complex", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xx_xx%xxx"))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/concatenate_kernel.rs b/rust/arrow/benches/concatenate_kernel.rs
deleted file mode 100644
index 3fff2abd179..00000000000
--- a/rust/arrow/benches/concatenate_kernel.rs
+++ /dev/null
@@ -1,66 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::concat;
-use arrow::datatypes::*;
-use arrow::util::bench_util::*;
-
-fn bench_concat(v1: &dyn Array, v2: &dyn Array) {
-    criterion::black_box(concat(&[v1, v2]).unwrap());
-}
-
-fn bench_concat_arrays(arrays: &[&dyn Array]) {
-    criterion::black_box(concat(arrays).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let v1 = create_primitive_array::<Int32Type>(1024, 0.0);
-    let v2 = create_primitive_array::<Int32Type>(1024, 0.0);
-    c.bench_function("concat i32 1024", |b| b.iter(|| bench_concat(&v1, &v2)));
-
-    let v1 = create_primitive_array::<Int32Type>(1024, 0.5);
-    let v2 = create_primitive_array::<Int32Type>(1024, 0.5);
-    c.bench_function("concat i32 nulls 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
-    });
-
-    let small_array = create_primitive_array::<Int32Type>(4, 0.0);
-    let arrays: Vec<_> = (0..1024).map(|_| &small_array as &dyn Array).collect();
-    c.bench_function("concat 1024 arrays i32 4", |b| {
-        b.iter(|| bench_concat_arrays(&arrays))
-    });
-
-    let v1 = create_string_array::<i32>(1024, 0.0);
-    let v2 = create_string_array::<i32>(1024, 0.0);
-    c.bench_function("concat str 1024", |b| b.iter(|| bench_concat(&v1, &v2)));
-
-    let v1 = create_string_array::<i32>(1024, 0.5);
-    let v2 = create_string_array::<i32>(1024, 0.5);
-    c.bench_function("concat str nulls 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/csv_writer.rs b/rust/arrow/benches/csv_writer.rs
deleted file mode 100644
index 9b018530938..00000000000
--- a/rust/arrow/benches/csv_writer.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-extern crate criterion;
-
-use criterion::*;
-
-use arrow::array::*;
-use arrow::csv;
-use arrow::datatypes::*;
-use arrow::record_batch::RecordBatch;
-use std::fs::File;
-use std::sync::Arc;
-
-fn record_batches_to_csv() {
-    let schema = Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::Float64, true),
-        Field::new("c3", DataType::UInt32, false),
-        Field::new("c3", DataType::Boolean, true),
-    ]);
-
-    let c1 = StringArray::from(vec![
-        "Lorem ipsum dolor sit amet",
-        "consectetur adipiscing elit",
-        "sed do eiusmod tempor",
-    ]);
-    let c2 = PrimitiveArray::<Float64Type>::from(vec![
-        Some(123.564532),
-        None,
-        Some(-556132.25),
-    ]);
-    let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-    let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-
-    let b = RecordBatch::try_new(
-        Arc::new(schema),
-        vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
-    )
-    .unwrap();
-    let file = File::create("target/bench_write_csv.csv").unwrap();
-    let mut writer = csv::Writer::new(file);
-    let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
-    #[allow(clippy::unit_arg)]
-    criterion::black_box(for batch in batches {
-        writer.write(batch).unwrap()
-    });
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("record_batches_to_csv", |b| b.iter(record_batches_to_csv));
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/equal.rs b/rust/arrow/benches/equal.rs
deleted file mode 100644
index af535506e86..00000000000
--- a/rust/arrow/benches/equal.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Allowed because we use `arr == arr` in benchmarks
-#![allow(clippy::eq_op)]
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn bench_equal<A: Array + PartialEq<A>>(arr_a: &A) {
-    criterion::black_box(arr_a == arr_a);
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.0);
-    c.bench_function("equal_512", |b| b.iter(|| bench_equal(&arr_a)));
-
-    let arr_a_nulls = create_primitive_array::<Float32Type>(512, 0.5);
-    c.bench_function("equal_nulls_512", |b| b.iter(|| bench_equal(&arr_a_nulls)));
-
-    let arr_a = create_string_array::<i32>(512, 0.0);
-    c.bench_function("equal_string_512", |b| b.iter(|| bench_equal(&arr_a)));
-
-    let arr_a_nulls = create_string_array::<i32>(512, 0.5);
-    c.bench_function("equal_string_nulls_512", |b| {
-        b.iter(|| bench_equal(&arr_a_nulls))
-    });
-
-    let arr_a = create_boolean_array(512, 0.0, 0.5);
-    c.bench_function("equal_bool_512", |b| b.iter(|| bench_equal(&arr_a)));
-
-    let arr_a = create_boolean_array(513, 0.0, 0.5);
-    c.bench_function("equal_bool_513", |b| b.iter(|| bench_equal(&arr_a)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/filter_kernels.rs b/rust/arrow/benches/filter_kernels.rs
deleted file mode 100644
index ca317b4676c..00000000000
--- a/rust/arrow/benches/filter_kernels.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-extern crate arrow;
-
-use arrow::compute::Filter;
-use arrow::util::bench_util::*;
-
-use arrow::array::*;
-use arrow::compute::{build_filter, filter};
-use arrow::datatypes::{Float32Type, UInt8Type};
-
-use criterion::{criterion_group, criterion_main, Criterion};
-
-fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) {
-    criterion::black_box(filter(data_array, filter_array).unwrap());
-}
-
-fn bench_built_filter<'a>(filter: &Filter<'a>, data: &impl Array) {
-    criterion::black_box(filter(&data.data()));
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let size = 65536;
-    let filter_array = create_boolean_array(size, 0.0, 0.5);
-    let dense_filter_array = create_boolean_array(size, 0.0, 1.0 - 1.0 / 1024.0);
-    let sparse_filter_array = create_boolean_array(size, 0.0, 1.0 / 1024.0);
-
-    let filter = build_filter(&filter_array).unwrap();
-    let dense_filter = build_filter(&dense_filter_array).unwrap();
-    let sparse_filter = build_filter(&sparse_filter_array).unwrap();
-
-    let data_array = create_primitive_array::<UInt8Type>(size, 0.0);
-
-    c.bench_function("filter u8", |b| {
-        b.iter(|| bench_filter(&data_array, &filter_array))
-    });
-    c.bench_function("filter u8 high selectivity", |b| {
-        b.iter(|| bench_filter(&data_array, &dense_filter_array))
-    });
-    c.bench_function("filter u8 low selectivity", |b| {
-        b.iter(|| bench_filter(&data_array, &sparse_filter_array))
-    });
-
-    c.bench_function("filter context u8", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context u8 high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context u8 low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-
-    let data_array = create_primitive_array::<UInt8Type>(size, 0.5);
-    c.bench_function("filter context u8 w NULLs", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context u8 w NULLs high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context u8 w NULLs low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-
-    let data_array = create_primitive_array::<Float32Type>(size, 0.5);
-    c.bench_function("filter f32", |b| {
-        b.iter(|| bench_filter(&data_array, &filter_array))
-    });
-    c.bench_function("filter context f32", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context f32 high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context f32 low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-
-    let data_array = create_string_array::<i32>(size, 0.5);
-    c.bench_function("filter context string", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context string high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context string low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/json_reader.rs b/rust/arrow/benches/json_reader.rs
deleted file mode 100644
index ef3ddf0537b..00000000000
--- a/rust/arrow/benches/json_reader.rs
+++ /dev/null
@@ -1,112 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-extern crate criterion;
-
-use criterion::*;
-
-use arrow::datatypes::*;
-use arrow::json::ReaderBuilder;
-use std::io::Cursor;
-use std::sync::Arc;
-
-fn json_primitive_to_record_batch() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, true),
-        Field::new("c2", DataType::Float64, true),
-        Field::new("c3", DataType::UInt32, true),
-        Field::new("c4", DataType::Boolean, true),
-    ]));
-    let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-    let json_content = r#"
-        {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false}
-        {"c1": "twelve", "c2": -55555555555555.2, "c3": 3}
-        {"c1": null, "c2": 3, "c3": 125, "c4": null}
-        {"c2": -35, "c3": 100.0, "c4": true}
-        {"c1": "fifteen", "c2": null, "c4": true}
-        {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false}
-        {"c1": "twelve", "c2": -55555555555555.2, "c3": 3}
-        {"c1": null, "c2": 3, "c3": 125, "c4": null}
-        {"c2": -35, "c3": 100.0, "c4": true}
-        {"c1": "fifteen", "c2": null, "c4": true}
-        "#;
-    let cursor = Cursor::new(json_content);
-    let mut reader = builder.build(cursor).unwrap();
-    #[allow(clippy::unit_arg)]
-    criterion::black_box({
-        reader.next().unwrap();
-    });
-}
-
-fn json_list_primitive_to_record_batch() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-            true,
-        ),
-        Field::new(
-            "c2",
-            DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-            true,
-        ),
-        Field::new(
-            "c3",
-            DataType::List(Box::new(Field::new("item", DataType::UInt32, true))),
-            true,
-        ),
-        Field::new(
-            "c4",
-            DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-            true,
-        ),
-    ]));
-    let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-    let json_content = r#"
-        {"c1": ["eleven"], "c2": [6.2222222225, -3.2, null], "c3": [5.0, 6], "c4": [false, true]}
-        {"c1": ["twelve"], "c2": [-55555555555555.2, 12500000.0], "c3": [3, 4, 5]}
-        {"c1": null, "c2": [3], "c3": [125, 127, 129], "c4": [null, false, true]}
-        {"c2": [-35], "c3": [100.0, 200.0], "c4": null}
-        {"c1": ["fifteen"], "c2": [null, 2.1, 1.5, -3], "c4": [true, false, null]}
-        {"c1": ["fifteen"], "c2": [], "c4": [true, false, null]}
-        {"c1": ["eleven"], "c2": [6.2222222225, -3.2, null], "c3": [5.0, 6], "c4": [false, true]}
-        {"c1": ["twelve"], "c2": [-55555555555555.2, 12500000.0], "c3": [3, 4, 5]}
-        {"c1": null, "c2": [3], "c3": [125, 127, 129], "c4": [null, false, true]}
-        {"c2": [-35], "c3": [100.0, 200.0], "c4": null}
-        {"c1": ["fifteen"], "c2": [null, 2.1, 1.5, -3], "c4": [true, false, null]}
-        {"c1": ["fifteen"], "c2": [], "c4": [true, false, null]}
-        "#;
-    let cursor = Cursor::new(json_content);
-    let mut reader = builder.build(cursor).unwrap();
-    #[allow(clippy::unit_arg)]
-    criterion::black_box({
-        reader.next().unwrap();
-    });
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("json_primitive_to_record_batch", |b| {
-        b.iter(json_primitive_to_record_batch)
-    });
-    c.bench_function("json_list_primitive_to_record_batch", |b| {
-        b.iter(json_list_primitive_to_record_batch)
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/length_kernel.rs b/rust/arrow/benches/length_kernel.rs
deleted file mode 100644
index b70f6374f8f..00000000000
--- a/rust/arrow/benches/length_kernel.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::kernels::length::length;
-
-fn bench_length(array: &StringArray) {
-    criterion::black_box(length(array).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-        [&v[..], &v[..]].concat()
-    }
-
-    // double ["hello", " ", "world", "!"] 10 times
-    let mut values = vec!["one", "on", "o", ""];
-    for _ in 0..10 {
-        values = double_vec(values);
-    }
-    let array = StringArray::from(values);
-
-    c.bench_function("length", |b| b.iter(|| bench_length(&array)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/mutable_array.rs b/rust/arrow/benches/mutable_array.rs
deleted file mode 100644
index 52da38a1d54..00000000000
--- a/rust/arrow/benches/mutable_array.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use rand::Rng;
-
-extern crate arrow;
-
-use arrow::util::test_util::seedable_rng;
-use arrow::{array::*, util::bench_util::create_string_array};
-
-fn create_slices(size: usize) -> Vec<(usize, usize)> {
-    let rng = &mut seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            let start = rng.gen_range(0, size / 2);
-            let end = rng.gen_range(start + 1, size);
-            (start, end)
-        })
-        .collect()
-}
-
-fn bench<T: Array>(v1: &T, slices: &[(usize, usize)]) {
-    let mut mutable = MutableArrayData::new(vec![v1.data_ref()], false, 5);
-    for (start, end) in slices {
-        mutable.extend(0, *start, *end)
-    }
-    mutable.freeze();
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let v1 = create_string_array::<i32>(1024, 0.0);
-    let v2 = create_slices(1024);
-    c.bench_function("mutable str 1024", |b| b.iter(|| bench(&v1, &v2)));
-
-    let v1 = create_string_array::<i32>(1024, 0.5);
-    let v2 = create_slices(1024);
-    c.bench_function("mutable str nulls 1024", |b| b.iter(|| bench(&v1, &v2)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/sort_kernel.rs b/rust/arrow/benches/sort_kernel.rs
deleted file mode 100644
index 74dc0ceae18..00000000000
--- a/rust/arrow/benches/sort_kernel.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::compute::kernels::sort::{lexsort, SortColumn};
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
-    let null_density = if with_nulls { 0.5 } else { 0.0 };
-    let array = create_primitive_array::<Float32Type>(size, null_density);
-    Arc::new(array)
-}
-
-fn bench_sort(arr_a: &ArrayRef, array_b: &ArrayRef, limit: Option<usize>) {
-    let columns = vec![
-        SortColumn {
-            values: arr_a.clone(),
-            options: None,
-        },
-        SortColumn {
-            values: array_b.clone(),
-            options: None,
-        },
-    ];
-
-    criterion::black_box(lexsort(&columns, limit).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_array(2u64.pow(10) as usize, false);
-    let arr_b = create_array(2u64.pow(10) as usize, false);
-
-    c.bench_function("sort 2^10", |b| b.iter(|| bench_sort(&arr_a, &arr_b, None)));
-
-    let arr_a = create_array(2u64.pow(12) as usize, false);
-    let arr_b = create_array(2u64.pow(12) as usize, false);
-
-    c.bench_function("sort 2^12", |b| b.iter(|| bench_sort(&arr_a, &arr_b, None)));
-
-    let arr_a = create_array(2u64.pow(10) as usize, true);
-    let arr_b = create_array(2u64.pow(10) as usize, true);
-
-    c.bench_function("sort nulls 2^10", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
-    });
-
-    let arr_a = create_array(2u64.pow(12) as usize, true);
-    let arr_b = create_array(2u64.pow(12) as usize, true);
-
-    c.bench_function("sort nulls 2^12", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
-    });
-
-    // with limit
-    {
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 10", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 100", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 1000", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 2^12", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, true);
-        let arr_b = create_array(2u64.pow(12) as usize, true);
-
-        c.bench_function("sort nulls 2^12 limit 10", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
-        });
-        c.bench_function("sort nulls 2^12 limit 100", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
-        });
-        c.bench_function("sort nulls 2^12 limit 1000", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
-        });
-        c.bench_function("sort nulls 2^12 limit 2^12", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
-        });
-    }
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/take_kernels.rs b/rust/arrow/benches/take_kernels.rs
deleted file mode 100644
index 2853eb5d476..00000000000
--- a/rust/arrow/benches/take_kernels.rs
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use rand::Rng;
-
-extern crate arrow;
-
-use arrow::compute::take;
-use arrow::datatypes::*;
-use arrow::util::test_util::seedable_rng;
-use arrow::{array::*, util::bench_util::*};
-
-fn create_random_index(size: usize, null_density: f32) -> UInt32Array {
-    let mut rng = seedable_rng();
-    let mut builder = UInt32Builder::new(size);
-    for _ in 0..size {
-        if rng.gen::<f32>() < null_density {
-            builder.append_null().unwrap()
-        } else {
-            let value = rng.gen_range::<u32, _, _>(0u32, size as u32);
-            builder.append_value(value).unwrap();
-        }
-    }
-    builder.finish()
-}
-
-fn bench_take(values: &dyn Array, indices: &UInt32Array) {
-    criterion::black_box(take(values, &indices, None).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let values = create_primitive_array::<Int32Type>(512, 0.0);
-    let indices = create_random_index(512, 0.0);
-    c.bench_function("take i32 512", |b| b.iter(|| bench_take(&values, &indices)));
-    let values = create_primitive_array::<Int32Type>(1024, 0.0);
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take i32 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let indices = create_random_index(512, 0.5);
-    c.bench_function("take i32 nulls 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-    let values = create_primitive_array::<Int32Type>(1024, 0.0);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take i32 nulls 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_boolean_array(512, 0.0, 0.5);
-    let indices = create_random_index(512, 0.0);
-    c.bench_function("take bool 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-    let values = create_boolean_array(1024, 0.0, 0.5);
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take bool 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_boolean_array(512, 0.0, 0.5);
-    let indices = create_random_index(512, 0.5);
-    c.bench_function("take bool nulls 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-    let values = create_boolean_array(1024, 0.0, 0.5);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take bool nulls 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(512, 0.0);
-    let indices = create_random_index(512, 0.0);
-    c.bench_function("take str 512", |b| b.iter(|| bench_take(&values, &indices)));
-
-    let values = create_string_array::<i32>(1024, 0.0);
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take str 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(512, 0.0);
-    let indices = create_random_index(512, 0.5);
-    c.bench_function("take str null indices 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(1024, 0.0);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take str null indices 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(1024, 0.5);
-
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take str null values 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(1024, 0.5);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take str null values null indices 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/build.rs b/rust/arrow/build.rs
deleted file mode 100644
index 2e3a711533c..00000000000
--- a/rust/arrow/build.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use cfg_aliases::cfg_aliases;
-
-fn main() {
-    println!("cargo:rerun-if-changed=build.rs");
-    // Setup cfg aliases
-    cfg_aliases! {
-        simd: { all(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"), feature = "simd") },
-    }
-}
diff --git a/rust/arrow/examples/builders.rs b/rust/arrow/examples/builders.rs
deleted file mode 100644
index 61cce0ed97a..00000000000
--- a/rust/arrow/examples/builders.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///! Many builders are available to easily create different types of arrow arrays
-extern crate arrow;
-
-use std::sync::Arc;
-
-use arrow::array::{
-    Array, ArrayData, BooleanArray, Int32Array, Int32Builder, ListArray, PrimitiveArray,
-    StringArray, StructArray,
-};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{DataType, Date64Type, Field, Time64NanosecondType, ToByteSlice};
-
-fn main() {
-    // Primitive Arrays
-    //
-    // Primitive arrays are arrays of fixed-width primitive types (bool, u8, u16, u32,
-    // u64, i8, i16, i32, i64, f32, f64)
-
-    // Create a new builder with a capacity of 100
-    let mut primitive_array_builder = Int32Builder::new(100);
-
-    // Append an individual primitive value
-    primitive_array_builder.append_value(55).unwrap();
-
-    // Append a null value
-    primitive_array_builder.append_null().unwrap();
-
-    // Append a slice of primitive values
-    primitive_array_builder.append_slice(&[39, 89, 12]).unwrap();
-
-    // Append lots of values
-    primitive_array_builder.append_null().unwrap();
-    primitive_array_builder
-        .append_slice(&(25..50).collect::<Vec<i32>>())
-        .unwrap();
-
-    // Build the `PrimitiveArray`
-    let primitive_array = primitive_array_builder.finish();
-    // Long arrays will have an ellipsis printed in the middle
-    println!("{:?}", primitive_array);
-
-    // Arrays can also be built from `Vec<Option<T>>`. `None`
-    // represents a null value in the array.
-    let date_array: PrimitiveArray<Date64Type> =
-        vec![Some(1550902545147), None, Some(1550902545147)].into();
-    println!("{:?}", date_array);
-
-    let time_array: PrimitiveArray<Time64NanosecondType> =
-        (0..100).collect::<Vec<i64>>().into();
-    println!("{:?}", time_array);
-
-    // We can build arrays directly from the underlying buffers.
-
-    // BinaryArrays are arrays of byte arrays, where each byte array
-    // is a slice of an underlying buffer.
-
-    // Array data: ["hello", null, "parquet"]
-    let values: [u8; 12] = [
-        b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-    ];
-    let offsets: [i32; 4] = [0, 5, 5, 12];
-
-    let array_data = ArrayData::builder(DataType::Utf8)
-        .len(3)
-        .add_buffer(Buffer::from(offsets.to_byte_slice()))
-        .add_buffer(Buffer::from(&values[..]))
-        .null_bit_buffer(Buffer::from([0b00000101]))
-        .build();
-    let binary_array = StringArray::from(array_data);
-    println!("{:?}", binary_array);
-
-    // ListArrays are similar to ByteArrays: they are arrays of other
-    // arrays, where each child array is a slice of the underlying
-    // buffer.
-    let value_data = ArrayData::builder(DataType::Int32)
-        .len(8)
-        .add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
-        .build();
-
-    // Construct a buffer for value offsets, for the nested array:
-    //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-    let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
-
-    // Construct a list array from the above two
-    let list_data_type =
-        DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-    let list_data = ArrayData::builder(list_data_type)
-        .len(3)
-        .add_buffer(value_offsets)
-        .add_child_data(value_data)
-        .build();
-    let list_array = ListArray::from(list_data);
-
-    println!("{:?}", list_array);
-
-    // StructArrays are arrays of tuples, where each tuple element is
-    // from a child array. (In other words, they're like zipping
-    // multiple columns into one and giving each subcolumn a label.)
-
-    // StructArrays can be constructed using the StructArray::from
-    // helper, which takes the underlying arrays and field types.
-    let struct_array = StructArray::from(vec![
-        (
-            Field::new("b", DataType::Boolean, false),
-            Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                as Arc<dyn Array>,
-        ),
-        (
-            Field::new("c", DataType::Int32, false),
-            Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
-        ),
-    ]);
-    println!("{:?}", struct_array);
-}
diff --git a/rust/arrow/examples/dynamic_types.rs b/rust/arrow/examples/dynamic_types.rs
deleted file mode 100644
index 58e41560e23..00000000000
--- a/rust/arrow/examples/dynamic_types.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///! This example demonstrates dealing with mixed types dynamically at runtime
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::datatypes::*;
-use arrow::error::Result;
-use arrow::record_batch::*;
-
-fn main() -> Result<()> {
-    // define schema
-    let schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new(
-            "nested",
-            DataType::Struct(vec![
-                Field::new("a", DataType::Utf8, false),
-                Field::new("b", DataType::Float64, false),
-                Field::new("c", DataType::Float64, false),
-            ]),
-            false,
-        ),
-    ]);
-
-    // create some data
-    let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-    let nested = StructArray::from(vec![
-        (
-            Field::new("a", DataType::Utf8, false),
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])) as Arc<dyn Array>,
-        ),
-        (
-            Field::new("b", DataType::Float64, false),
-            Arc::new(Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5])),
-        ),
-        (
-            Field::new("c", DataType::Float64, false),
-            Arc::new(Float64Array::from(vec![2.2, 3.3, 4.4, 5.5, 6.6])),
-        ),
-    ]);
-
-    // build a record batch
-    let batch =
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id), Arc::new(nested)])?;
-
-    process(&batch);
-    Ok(())
-}
-
-/// Create a new batch by performing a projection of id, nested.c
-fn process(batch: &RecordBatch) {
-    let id = batch.column(0);
-    let nested = batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<StructArray>()
-        .unwrap();
-
-    let _nested_b = nested
-        .column(1)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-    let nested_c: &Float64Array = nested
-        .column(2)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-
-    let projected_schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("sum", DataType::Float64, false),
-    ]);
-
-    let _ = RecordBatch::try_new(
-        Arc::new(projected_schema),
-        vec![
-            id.clone(), // NOTE: this is cloning the Arc not the array data
-            Arc::new(Float64Array::from(nested_c.data().clone())),
-        ],
-    );
-}
diff --git a/rust/arrow/examples/read_csv.rs b/rust/arrow/examples/read_csv.rs
deleted file mode 100644
index 9e2b9c34c86..00000000000
--- a/rust/arrow/examples/read_csv.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-
-use std::fs::File;
-use std::sync::Arc;
-
-use arrow::csv;
-use arrow::datatypes::{DataType, Field, Schema};
-#[cfg(feature = "prettyprint")]
-use arrow::util::pretty::print_batches;
-
-fn main() {
-    let schema = Schema::new(vec![
-        Field::new("city", DataType::Utf8, false),
-        Field::new("lat", DataType::Float64, false),
-        Field::new("lng", DataType::Float64, false),
-    ]);
-
-    let file = File::open("test/data/uk_cities.csv").unwrap();
-
-    let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
-    let _batch = csv.next().unwrap().unwrap();
-    #[cfg(feature = "prettyprint")]
-    {
-        print_batches(&[_batch]).unwrap();
-    }
-}
diff --git a/rust/arrow/examples/read_csv_infer_schema.rs b/rust/arrow/examples/read_csv_infer_schema.rs
deleted file mode 100644
index 93253e72cff..00000000000
--- a/rust/arrow/examples/read_csv_infer_schema.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-
-use arrow::csv;
-#[cfg(feature = "prettyprint")]
-use arrow::util::pretty::print_batches;
-use std::fs::File;
-
-fn main() {
-    let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
-    let builder = csv::ReaderBuilder::new()
-        .has_header(true)
-        .infer_schema(Some(100));
-    let mut csv = builder.build(file).unwrap();
-    let _batch = csv.next().unwrap().unwrap();
-    #[cfg(feature = "prettyprint")]
-    {
-        print_batches(&[_batch]).unwrap();
-    }
-}
diff --git a/rust/arrow/examples/tensor_builder.rs b/rust/arrow/examples/tensor_builder.rs
deleted file mode 100644
index 1ef53920e04..00000000000
--- a/rust/arrow/examples/tensor_builder.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///! Tensor builder example
-extern crate arrow;
-
-use arrow::array::*; //{Int32BufferBuilder, Float32BufferBuilder};
-use arrow::buffer::Buffer;
-use arrow::datatypes::ToByteSlice;
-use arrow::error::Result;
-use arrow::tensor::{Float32Tensor, Int32Tensor};
-
-fn main() -> Result<()> {
-    // Building a tensor using the buffer builder for Int32
-    // The buffer builder will pad the appended numbers
-    // to match the required size for each buffer
-    let mut builder = Int32BufferBuilder::new(16);
-    for i in 0..16 {
-        builder.append(i);
-    }
-    let buf = builder.finish();
-
-    // When building a tensor the buffer and shape are required
-    // The new function will estimate the expected stride for the
-    // storage data
-    let tensor = Int32Tensor::try_new(buf, Some(vec![2, 8]), None, None)?;
-    println!("Int32 Tensor");
-    println!("{:?}", tensor);
-
-    // Creating a tensor using float type buffer builder
-    let mut builder = Float32BufferBuilder::new(4);
-    builder.append(1.0);
-    builder.append(2.0);
-    builder.append(3.0);
-    builder.append(4.0);
-    let buf = builder.finish();
-
-    // When building the tensor the buffer and shape are necessary
-    // The new function will estimate the expected stride for the
-    // storage data
-    let tensor = Float32Tensor::try_new(buf, Some(vec![2, 2]), None, None)?;
-    println!("\nFloat32 Tensor");
-    println!("{:?}", tensor);
-
-    // In order to build a tensor from an array the function to_byte_slice add the
-    // required padding to the elements in the array.
-    let buf = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7, 9, 10].to_byte_slice());
-    let tensor = Int32Tensor::try_new(buf, Some(vec![2, 5]), None, None)?;
-    println!("\nInt32 Tensor");
-    println!("{:?}", tensor);
-
-    Ok(())
-}
diff --git a/rust/arrow/format-0ed34c83.patch b/rust/arrow/format-0ed34c83.patch
deleted file mode 100644
index 5da0a0c51f0..00000000000
--- a/rust/arrow/format-0ed34c83.patch
+++ /dev/null
@@ -1,220 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-diff --git a/format/Message.fbs b/format/Message.fbs
-index 1a7e0dfff..f1c18d765 100644
---- a/format/Message.fbs
-+++ b/format/Message.fbs
-@@ -28,7 +28,7 @@ namespace org.apache.arrow.flatbuf;
- /// Metadata about a field at some level of a nested type tree (but not
- /// its children).
- ///
--/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
-+/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
- /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
- /// null_count: 0} for its Int16 node, as separate FieldNode structs
- struct FieldNode {
-diff --git a/format/Schema.fbs b/format/Schema.fbs
-index 3b37e5d85..3b00dd478 100644
---- a/format/Schema.fbs
-+++ b/format/Schema.fbs
-@@ -110,10 +110,11 @@ table FixedSizeList {
- /// not enforced.
- ///
- /// Map
-+/// ```text
- ///   - child[0] entries: Struct
- ///     - child[0] key: K
- ///     - child[1] value: V
--///
-+/// ```
- /// Neither the "entries" field nor the "key" field may be nullable.
- ///
- /// The metadata is structured so that Arrow systems without special handling
-@@ -129,7 +130,7 @@ enum UnionMode:short { Sparse, Dense }
- /// A union is a complex type with children in Field
- /// By default ids in the type vector refer to the offsets in the children
- /// optionally typeIds provides an indirection between the child offset and the type id
--/// for each child typeIds[offset] is the id used in the type vector
-+/// for each child `typeIds[offset]` is the id used in the type vector
- table Union {
-   mode: UnionMode;
-   typeIds: [ int ]; // optional, describes typeid of each child.
-diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
-index 3fe8a7582..a6fd2f9e7 100644
---- a/format/SparseTensor.fbs
-+++ b/format/SparseTensor.fbs
-@@ -37,21 +37,21 @@ namespace org.apache.arrow.flatbuf;
- ///
- /// For example, let X be a 2x3x4x5 tensor, and it has the following
- /// 6 non-zero values:
--///
-+/// ```text
- ///   X[0, 1, 2, 0] := 1
- ///   X[1, 1, 2, 3] := 2
- ///   X[0, 2, 1, 0] := 3
- ///   X[0, 1, 3, 0] := 4
- ///   X[0, 1, 2, 1] := 5
- ///   X[1, 2, 0, 4] := 6
--///
-+/// ```
- /// In COO format, the index matrix of X is the following 4x6 matrix:
--///
-+/// ```text
- ///   [[0, 0, 0, 0, 1, 1],
- ///    [1, 1, 1, 2, 1, 2],
- ///    [2, 2, 3, 1, 2, 0],
- ///    [0, 1, 0, 0, 3, 4]]
--///
-+/// ```
- /// When isCanonical is true, the indices is sorted in lexicographical order
- /// (row-major order), and it does not have duplicated entries.  Otherwise,
- /// the indices may not be sorted, or may have duplicated entries.
-@@ -86,26 +86,27 @@ table SparseMatrixIndexCSX {
- 
-   /// indptrBuffer stores the location and size of indptr array that
-   /// represents the range of the rows.
--  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
-+  /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
-   /// The length of this array is 1 + (the number of rows), and the type
-   /// of index value is long.
-   ///
-   /// For example, let X be the following 6x4 matrix:
--  ///
-+  /// ```text
-   ///   X := [[0, 1, 2, 0],
-   ///         [0, 0, 3, 0],
-   ///         [0, 4, 0, 5],
-   ///         [0, 0, 0, 0],
-   ///         [6, 0, 7, 8],
-   ///         [0, 9, 0, 0]].
--  ///
-+  /// ```
-   /// The array of non-zero values in X is:
--  ///
-+  /// ```text
-   ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
--  ///
-+  /// ```
-   /// And the indptr of X is:
--  ///
-+  /// ```text
-   ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
-+  /// ```
-   indptrBuffer: Buffer (required);
- 
-   /// The type of values in indicesBuffer
-@@ -116,9 +117,9 @@ table SparseMatrixIndexCSX {
-   /// The type of index value is long.
-   ///
-   /// For example, the indices of the above X is:
--  ///
-+  /// ```text
-   ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
--  ///
-+  /// ```
-   /// Note that the indices are sorted in lexicographical order for each row.
-   indicesBuffer: Buffer (required);
- }
-@@ -126,7 +127,7 @@ table SparseMatrixIndexCSX {
- /// Compressed Sparse Fiber (CSF) sparse tensor index.
- table SparseTensorIndexCSF {
-   /// CSF is a generalization of compressed sparse row (CSR) index.
--  /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
-+  /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
-   ///
-   /// CSF index recursively compresses each dimension of a tensor into a set
-   /// of prefix trees. Each path from a root to leaf forms one tensor
-@@ -135,7 +136,7 @@ table SparseTensorIndexCSF {
-   ///
-   /// For example, let X be a 2x3x4x5 tensor and let it have the following
-   /// 8 non-zero values:
--  ///
-+  /// ```text
-   ///   X[0, 0, 0, 1] := 1
-   ///   X[0, 0, 0, 2] := 2
-   ///   X[0, 1, 0, 0] := 3
-@@ -144,9 +145,9 @@ table SparseTensorIndexCSF {
-   ///   X[1, 1, 1, 0] := 6
-   ///   X[1, 1, 1, 1] := 7
-   ///   X[1, 1, 1, 2] := 8
--  ///
-+  /// ```
-   /// As a prefix tree this would be represented as:
--  ///
-+  /// ```text
-   ///         0          1
-   ///        / \         |
-   ///       0   1        1
-@@ -154,24 +155,24 @@ table SparseTensorIndexCSF {
-   ///     0   0   1      1
-   ///    /|  /|   |    /| |
-   ///   1 2 0 2   0   0 1 2
--
-+  /// ```
-   /// The type of values in indptrBuffers
-   indptrType: Int (required);
- 
-   /// indptrBuffers stores the sparsity structure.
-   /// Each two consecutive dimensions in a tensor correspond to a buffer in
--  /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
--  /// and indptrBuffers[dim][i + 1] signify a range of nodes in
--  /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
-+  /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
-+  /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
-+  /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
-   ///
-   /// For example, the indptrBuffers for the above X is:
--  ///
-+  /// ```text
-   ///   indptrBuffer(X) = [
-   ///                       [0, 2, 3],
-   ///                       [0, 1, 3, 4],
-   ///                       [0, 2, 4, 5, 8]
-   ///                     ].
--  ///
-+  /// ```
-   indptrBuffers: [Buffer] (required);
- 
-   /// The type of values in indicesBuffers
-@@ -180,22 +181,22 @@ table SparseTensorIndexCSF {
-   /// indicesBuffers stores values of nodes.
-   /// Each tensor dimension corresponds to a buffer in indicesBuffers.
-   /// For example, the indicesBuffers for the above X is:
--  ///
-+  /// ```text
-   ///   indicesBuffer(X) = [
-   ///                        [0, 1],
-   ///                        [0, 1, 1],
-   ///                        [0, 0, 1, 1],
-   ///                        [1, 2, 0, 2, 0, 0, 1, 2]
-   ///                      ].
--  ///
-+  /// ```
-   indicesBuffers: [Buffer] (required);
- 
-   /// axisOrder stores the sequence in which dimensions were traversed to
-   /// produce the prefix tree.
-   /// For example, the axisOrder for the above X is:
--  ///
-+  /// ```text
-   ///   axisOrder(X) = [0, 1, 2, 3].
--  ///
-+  /// ```
-   axisOrder: [int] (required);
- }
-
diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh
deleted file mode 100755
index 9d384b6b63b..00000000000
--- a/rust/arrow/regen.sh
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-# Change to the toplevel Rust directory
-pushd $DIR/../../
-
-echo "Build flatc from source ..."
-
-FB_URL="https://github.com/google/flatbuffers"
-# https://github.com/google/flatbuffers/pull/6393
-FB_COMMIT="408cf5802415e1dea65fef7489a6c2f3740fb381"
-FB_DIR="rust/arrow/.flatbuffers"
-FLATC="$FB_DIR/bazel-bin/flatc"
-
-if [ -z $(which bazel) ]; then
-    echo "bazel is required to build flatc"
-    exit 1
-fi
-
-echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
-
-if [ ! -e $FB_DIR ]; then
-    echo "git clone $FB_URL ..."
-    git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
-else
-    echo "git pull $FB_URL ..."
-    git -C $FB_DIR pull
-fi
-
-echo "hard reset to $FB_COMMIT"
-git -C $FB_DIR reset --hard $FB_COMMIT
-
-pushd $FB_DIR
-echo "run: bazel build :flatc ..."
-bazel build :flatc
-popd
-
-FB_PATCH="rust/arrow/format-0ed34c83.patch"
-echo "Patch flatbuffer files with ${FB_PATCH} for cargo doc"
-echo "NOTE: the patch MAY need update in case of changes in format/*.fbs"
-git apply --check ${FB_PATCH} && git apply ${FB_PATCH}
-
-# Execute the code generation:
-$FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs
-
-# Reset changes to format/
-git checkout -- format
-
-# Now the files are wrongly named so we have to change that.
-popd
-pushd $DIR/src/ipc/gen
-
-PREFIX=$(cat <<'HEREDOC'
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use std::{cmp::Ordering, mem};
-use flatbuffers::EndianScalar;
-
-HEREDOC
-)
-
-SCHEMA_IMPORT="\nuse crate::ipc::gen::Schema::*;"
-SPARSE_TENSOR_IMPORT="\nuse crate::ipc::gen::SparseTensor::*;"
-TENSOR_IMPORT="\nuse crate::ipc::gen::Tensor::*;"
-
-# For flatbuffer(1.12.0+), remove: use crate::${name}::\*;
-names=("File" "Message" "Schema" "SparseTensor" "Tensor")
-
-# Remove all generated lines we don't need
-for f in `ls *.rs`; do
-    if [[ $f == "mod.rs" ]]; then
-        continue
-    fi
-
-    echo "Modifying: $f"
-    sed -i '' '/extern crate flatbuffers;/d' $f
-    sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f
-    sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f
-    sed -i '' '/pub mod org {/d' $f
-    sed -i '' '/pub mod apache {/d' $f
-    sed -i '' '/pub mod arrow {/d' $f
-    sed -i '' '/pub mod flatbuf {/d' $f
-    sed -i '' '/}  \/\/ pub mod flatbuf/d' $f
-    sed -i '' '/}  \/\/ pub mod arrow/d' $f
-    sed -i '' '/}  \/\/ pub mod apache/d' $f
-    sed -i '' '/}  \/\/ pub mod org/d' $f
-    sed -i '' '/use std::mem;/d' $f
-    sed -i '' '/use std::cmp::Ordering;/d' $f
-
-    # required by flatc 1.12.0+
-    sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
-    for name in ${names[@]}; do
-        sed -i '' "/use crate::${name}::\*;/d" $f
-        sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
-    done
-
-    # Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_".
-    sed -i '' 's/type__/type_/g' $f
-    sed -i '' 's/TYPE__/TYPE_/g' $f
-
-    # Some files need prefixes
-    if [[ $f == "File.rs" ]]; then 
-        # Now prefix the file with the static contents
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
-    elif [[ $f == "Message.rs" ]]; then
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${SPARSE_TENSOR_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
-    elif [[ $f == "SparseTensor.rs" ]]; then
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
-    elif [[ $f == "Tensor.rs" ]]; then
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
-    else
-        echo "${PREFIX}" | cat - $f > temp && mv temp $f
-    fi
-done
-
-# Return back to base directory
-popd
-cargo +stable fmt -- src/ipc/gen/*
-
-echo "DONE!"
-echo "Please run 'cargo doc' and 'cargo test' with nightly and stable, "
-echo "and fix possible errors or warnings!"
diff --git a/rust/arrow/src/alloc/alignment.rs b/rust/arrow/src/alloc/alignment.rs
deleted file mode 100644
index dbf4602f83a..00000000000
--- a/rust/arrow/src/alloc/alignment.rs
+++ /dev/null
@@ -1,119 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// NOTE: Below code is written for spatial/temporal prefetcher optimizations. Memory allocation
-// should align well with usage pattern of cache access and block sizes on layers of storage levels from
-// registers to non-volatile memory. These alignments are all cache aware alignments incorporated
-// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach mimicks Intel TBB's
-// cache_aligned_allocator which exploits cache locality and minimizes prefetch signals
-// resulting in less round trip time between the layers of storage.
-// For further info: https://software.intel.com/en-us/node/506094
-
-// 32-bit architecture and things other than netburst microarchitecture are using 64 bytes.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// Intel x86_64:
-// L2D streamer from L1:
-// Loads data or instructions from memory to the second-level cache. To use the streamer,
-// organize the data or instructions in blocks of 128 bytes, aligned on 128 bytes.
-// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86_64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// 24Kc:
-// Data Line Size
-// - https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
-// - https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips64")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for powerpc
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for the ppc 64
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// e.g.: sifive
-// - https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
-// in general all of them are the same.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "riscv")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// This size is same across all hardware for this architecture.
-// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "s390x")]
-pub const ALIGNMENT: usize = 1 << 8;
-
-// This size is same across all hardware for this architecture.
-// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// On ARM cache line sizes are fixed. both v6 and v7.
-// Need to add board specific or platform specific things later.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv6")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv7")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Operating Systems cache size determines this.
-// Currently no way to determine this without runtime inference.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "wasm32")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// Same as v6 and v7.
-// List goes like that:
-// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "arm")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Combined from 4 sectors. Volta says 128.
-// Prevent chunk optimizations better to go to the default size.
-// If you have smaller data with less padded functionality then use 32 with force option.
-// - https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx")]
-pub const ALIGNMENT: usize = 1 << 7;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// This size is same across all hardware for this architecture.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "aarch64")]
-pub const ALIGNMENT: usize = 1 << 6;
diff --git a/rust/arrow/src/alloc/mod.rs b/rust/arrow/src/alloc/mod.rs
deleted file mode 100644
index a225d32dd82..00000000000
--- a/rust/arrow/src/alloc/mod.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines memory-related functions, such as allocate/deallocate/reallocate memory
-//! regions, cache and allocation alignments.
-
-use std::mem::size_of;
-use std::ptr::NonNull;
-use std::{
-    alloc::{handle_alloc_error, Layout},
-    sync::atomic::AtomicIsize,
-};
-
-mod alignment;
-mod types;
-
-pub use alignment::ALIGNMENT;
-pub use types::NativeType;
-
-// If this number is not zero after all objects have been `drop`, there is a memory leak
-pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
-
-#[inline]
-unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
-    NonNull::new_unchecked(ALIGNMENT as *mut T)
-}
-
-/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values.
-/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have
-/// an unknown or non-zero value and is semantically similar to `malloc`.
-pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
-    unsafe {
-        if size == 0 {
-            null_pointer()
-        } else {
-            let size = size * size_of::<T>();
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
-
-            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
-            let raw_ptr = std::alloc::alloc(layout) as *mut T;
-            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
-        }
-    }
-}
-
-/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them.
-/// This is more performant than using [allocate_aligned] and setting all bytes to zero
-/// and is semantically similar to `calloc`.
-pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
-    unsafe {
-        if size == 0 {
-            null_pointer()
-        } else {
-            let size = size * size_of::<T>();
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
-
-            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
-            let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
-            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
-        }
-    }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the caller does not ensure all
-/// of the following:
-///
-/// * ptr must denote a block of memory currently allocated via this allocator,
-///
-/// * size must be the same size that was used to allocate that block of memory,
-pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
-    if ptr != null_pointer() {
-        let size = size * size_of::<T>();
-        ALLOCATIONS.fetch_sub(size as isize, std::sync::atomic::Ordering::SeqCst);
-        std::alloc::dealloc(
-            ptr.as_ptr() as *mut u8,
-            Layout::from_size_align_unchecked(size, ALIGNMENT),
-        );
-    }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the caller does not ensure all
-/// of the following:
-///
-/// * ptr must be currently allocated via this allocator,
-///
-/// * new_size must be greater than zero.
-///
-/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e.,
-/// the rounded value must be less than usize::MAX).
-pub unsafe fn reallocate<T: NativeType>(
-    ptr: NonNull<T>,
-    old_size: usize,
-    new_size: usize,
-) -> NonNull<T> {
-    let old_size = old_size * size_of::<T>();
-    let new_size = new_size * size_of::<T>();
-    if ptr == null_pointer() {
-        return allocate_aligned(new_size);
-    }
-
-    if new_size == 0 {
-        free_aligned(ptr, old_size);
-        return null_pointer();
-    }
-
-    ALLOCATIONS.fetch_add(
-        new_size as isize - old_size as isize,
-        std::sync::atomic::Ordering::SeqCst,
-    );
-    let raw_ptr = std::alloc::realloc(
-        ptr.as_ptr() as *mut u8,
-        Layout::from_size_align_unchecked(old_size, ALIGNMENT),
-        new_size,
-    ) as *mut T;
-    NonNull::new(raw_ptr).unwrap_or_else(|| {
-        handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT))
-    })
-}
diff --git a/rust/arrow/src/alloc/types.rs b/rust/arrow/src/alloc/types.rs
deleted file mode 100644
index c1f0ef99580..00000000000
--- a/rust/arrow/src/alloc/types.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::datatypes::DataType;
-
-/// A type that Rust's custom allocator knows how to allocate and deallocate.
-/// This is implemented for all Arrow's physical types whose in-memory representation
-/// matches Rust's physical types. Consider this trait sealed.
-/// # Safety
-/// Do not implement this trait.
-pub unsafe trait NativeType:
-    Sized + Copy + std::fmt::Debug + std::fmt::Display + PartialEq + Default + Sized + 'static
-{
-    type Bytes: AsRef<[u8]>;
-
-    /// Whether a DataType is a valid type for this physical representation.
-    fn is_valid(data_type: &DataType) -> bool;
-
-    /// How this type represents itself as bytes in little endianess.
-    /// This is used for IPC, where data is communicated with a specific endianess.
-    fn to_le_bytes(&self) -> Self::Bytes;
-}
-
-macro_rules! create_native {
-    ($native_ty:ty,$($impl_pattern:pat)|+) => {
-        unsafe impl NativeType for $native_ty {
-            type Bytes = [u8; std::mem::size_of::<Self>()];
-
-            #[inline]
-            fn to_le_bytes(&self) -> Self::Bytes {
-                Self::to_le_bytes(*self)
-            }
-
-            #[inline]
-            fn is_valid(data_type: &DataType) -> bool {
-                matches!(data_type, $($impl_pattern)|+)
-            }
-        }
-    };
-}
-
-create_native!(u8, DataType::UInt8);
-create_native!(u16, DataType::UInt16);
-create_native!(u32, DataType::UInt32);
-create_native!(u64, DataType::UInt64);
-create_native!(i8, DataType::Int8);
-create_native!(i16, DataType::Int16);
-create_native!(
-    i32,
-    DataType::Int32 | DataType::Date32 | DataType::Time32(_)
-);
-create_native!(
-    i64,
-    DataType::Int64 | DataType::Date64 | DataType::Time64(_) | DataType::Timestamp(_, _)
-);
-create_native!(f32, DataType::Float32);
-create_native!(f64, DataType::Float64);
diff --git a/rust/arrow/src/arch/avx512.rs b/rust/arrow/src/arch/avx512.rs
deleted file mode 100644
index 264532f3594..00000000000
--- a/rust/arrow/src/arch/avx512.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub(crate) const AVX512_U8X64_LANES: usize = 64;
-
-#[target_feature(enable = "avx512f")]
-pub(crate) unsafe fn avx512_bin_and(left: &[u8], right: &[u8], res: &mut [u8]) {
-    use core::arch::x86_64::{__m512i, _mm512_and_si512, _mm512_loadu_epi64};
-
-    let l: __m512i = _mm512_loadu_epi64(left.as_ptr() as *const _);
-    let r: __m512i = _mm512_loadu_epi64(right.as_ptr() as *const _);
-    let f = _mm512_and_si512(l, r);
-    let s = &f as *const __m512i as *const u8;
-    let d = res.get_unchecked_mut(0) as *mut _ as *mut u8;
-    std::ptr::copy_nonoverlapping(s, d, std::mem::size_of::<__m512i>());
-}
-
-#[target_feature(enable = "avx512f")]
-pub(crate) unsafe fn avx512_bin_or(left: &[u8], right: &[u8], res: &mut [u8]) {
-    use core::arch::x86_64::{__m512i, _mm512_loadu_epi64, _mm512_or_si512};
-
-    let l: __m512i = _mm512_loadu_epi64(left.as_ptr() as *const _);
-    let r: __m512i = _mm512_loadu_epi64(right.as_ptr() as *const _);
-    let f = _mm512_or_si512(l, r);
-    let s = &f as *const __m512i as *const u8;
-    let d = res.get_unchecked_mut(0) as *mut _ as *mut u8;
-    std::ptr::copy_nonoverlapping(s, d, std::mem::size_of::<__m512i>());
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_bitwise_and_avx512() {
-        let buf1 = [0b00110011u8; 64];
-        let buf2 = [0b11110000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe {
-            avx512_bin_and(&buf1, &buf2, &mut buf3);
-        };
-        for i in buf3.iter() {
-            assert_eq!(&0b00110000u8, i);
-        }
-    }
-
-    #[test]
-    fn test_bitwise_or_avx512() {
-        let buf1 = [0b00010011u8; 64];
-        let buf2 = [0b11100000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe {
-            avx512_bin_or(&buf1, &buf2, &mut buf3);
-        };
-        for i in buf3.iter() {
-            assert_eq!(&0b11110011u8, i);
-        }
-    }
-}
diff --git a/rust/arrow/src/arch/mod.rs b/rust/arrow/src/arch/mod.rs
deleted file mode 100644
index 56d8f4c0e2c..00000000000
--- a/rust/arrow/src/arch/mod.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///
-/// Arch module contains architecture specific code.
-/// Be aware that not all machines have these specific operations available.
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub(crate) mod avx512;
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
deleted file mode 100644
index 63d41dffd82..00000000000
--- a/rust/arrow/src/array/array.rs
+++ /dev/null
@@ -1,633 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt;
-use std::sync::Arc;
-use std::{any::Any, convert::TryFrom};
-
-use super::*;
-use crate::array::equal_json::JsonEqual;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::error::Result;
-use crate::ffi;
-
-/// Trait for dealing with different types of array at runtime when the type of the
-/// array is not known in advance.
-pub trait Array: fmt::Debug + Send + Sync + JsonEqual {
-    /// Returns the array as [`Any`](std::any::Any) so that it can be
-    /// downcasted to a specific implementation.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let batch = RecordBatch::try_new(
-    ///     Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
-    ///     vec![Arc::new(id)]
-    /// )?;
-    ///
-    /// let int32array = batch
-    ///     .column(0)
-    ///     .as_any()
-    ///     .downcast_ref::<Int32Array>()
-    ///     .expect("Failed to downcast");
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn as_any(&self) -> &Any;
-
-    /// Returns a reference to the underlying data of this array.
-    fn data(&self) -> &ArrayData;
-
-    /// Returns a reference-counted pointer to the underlying data of this array.
-    fn data_ref(&self) -> &ArrayData {
-        self.data()
-    }
-
-    /// Returns a reference to the [`DataType`](crate::datatypes::DataType) of this array.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::datatypes::DataType;
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    ///
-    /// assert_eq!(*array.data_type(), DataType::Int32);
-    /// ```
-    fn data_type(&self) -> &DataType {
-        self.data_ref().data_type()
-    }
-
-    /// Returns a zero-copy slice of this array with the indicated offset and length.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// // Make slice over the values [2, 3, 4]
-    /// let array_slice = array.slice(1, 3);
-    ///
-    /// assert_eq!(array_slice.as_ref(), &Int32Array::from(vec![2, 3, 4]));
-    /// ```
-    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
-        make_array(self.data_ref().slice(offset, length))
-    }
-
-    /// Returns the length (i.e., number of elements) of this array.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    ///
-    /// assert_eq!(array.len(), 5);
-    /// ```
-    fn len(&self) -> usize {
-        self.data_ref().len()
-    }
-
-    /// Returns whether this array is empty.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    ///
-    /// assert_eq!(array.is_empty(), false);
-    /// ```
-    fn is_empty(&self) -> bool {
-        self.data_ref().is_empty()
-    }
-
-    /// Returns the offset into the underlying data used by this array(-slice).
-    /// Note that the underlying data can be shared by many arrays.
-    /// This defaults to `0`.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// // Make slice over the values [2, 3, 4]
-    /// let array_slice = array.slice(1, 3);
-    ///
-    /// assert_eq!(array.offset(), 0);
-    /// assert_eq!(array_slice.offset(), 1);
-    /// ```
-    fn offset(&self) -> usize {
-        self.data_ref().offset()
-    }
-
-    /// Returns whether the element at `index` is null.
-    /// When using this function on a slice, the index is relative to the slice.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![Some(1), None]);
-    ///
-    /// assert_eq!(array.is_null(0), false);
-    /// assert_eq!(array.is_null(1), true);
-    /// ```
-    fn is_null(&self, index: usize) -> bool {
-        self.data_ref().is_null(index)
-    }
-
-    /// Returns whether the element at `index` is not null.
-    /// When using this function on a slice, the index is relative to the slice.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![Some(1), None]);
-    ///
-    /// assert_eq!(array.is_valid(0), true);
-    /// assert_eq!(array.is_valid(1), false);
-    /// ```
-    fn is_valid(&self, index: usize) -> bool {
-        self.data_ref().is_valid(index)
-    }
-
-    /// Returns the total number of null values in this array.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// // Construct an array with values [1, NULL, NULL]
-    /// let array = Int32Array::from(vec![Some(1), None, None]);
-    ///
-    /// assert_eq!(array.null_count(), 2);
-    /// ```
-    fn null_count(&self) -> usize {
-        self.data_ref().null_count()
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this array.
-    fn get_buffer_memory_size(&self) -> usize;
-
-    /// Returns the total number of bytes of memory occupied physically by this array.
-    fn get_array_memory_size(&self) -> usize;
-
-    /// returns two pointers that represent this array in the C Data Interface (FFI)
-    fn to_raw(
-        &self,
-    ) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> {
-        let data = self.data().clone();
-        let array = ffi::ArrowArray::try_from(data)?;
-        Ok(ffi::ArrowArray::into_raw(array))
-    }
-}
-
-/// A reference-counted reference to a generic `Array`.
-pub type ArrayRef = Arc<Array>;
-
-/// Constructs an array using the input `data`.
-/// Returns a reference-counted `Array` instance.
-pub fn make_array(data: ArrayData) -> ArrayRef {
-    match data.data_type() {
-        DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
-        DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
-        DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
-        DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
-        DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
-        DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
-        DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
-        DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
-        DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
-        DataType::Float16 => panic!("Float16 datatype not supported"),
-        DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
-        DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
-        DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
-        DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
-        DataType::Time32(TimeUnit::Second) => {
-            Arc::new(Time32SecondArray::from(data)) as ArrayRef
-        }
-        DataType::Time32(TimeUnit::Millisecond) => {
-            Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
-        }
-        DataType::Time64(TimeUnit::Microsecond) => {
-            Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Time64(TimeUnit::Nanosecond) => {
-            Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Second, _) => {
-            Arc::new(TimestampSecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            Arc::new(DurationSecondArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
-        DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
-        DataType::FixedSizeBinary(_) => {
-            Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef
-        }
-        DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
-        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
-        DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
-        DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
-        DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
-        DataType::Union(_) => Arc::new(UnionArray::from(data)) as ArrayRef,
-        DataType::FixedSizeList(_, _) => {
-            Arc::new(FixedSizeListArray::from(data)) as ArrayRef
-        }
-        DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
-            DataType::Int8 => {
-                Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef
-            }
-            DataType::Int16 => {
-                Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef
-            }
-            DataType::Int32 => {
-                Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef
-            }
-            DataType::Int64 => {
-                Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt8 => {
-                Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt16 => {
-                Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt32 => {
-                Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt64 => {
-                Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef
-            }
-            dt => panic!("Unexpected dictionary key type {:?}", dt),
-        },
-        DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
-        DataType::Decimal(_, _) => Arc::new(DecimalArray::from(data)) as ArrayRef,
-        dt => panic!("Unexpected data type {:?}", dt),
-    }
-}
-
-/// Creates a new empty array
-pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
-    let data = ArrayData::new_empty(data_type);
-    make_array(data)
-}
-/// Creates a new array of `data_type` of length `length` filled entirely of `NULL` values
-pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
-    // context: https://github.com/apache/arrow/pull/9469#discussion_r574761687
-    match data_type {
-        DataType::Null => Arc::new(NullArray::new(length)),
-        DataType::Boolean => {
-            let null_buf: Buffer = MutableBuffer::new_null(length).into();
-            make_array(ArrayData::new(
-                data_type.clone(),
-                length,
-                Some(length),
-                Some(null_buf.clone()),
-                0,
-                vec![null_buf],
-                vec![],
-            ))
-        }
-        DataType::Int8 => new_null_sized_array::<Int8Type>(data_type, length),
-        DataType::UInt8 => new_null_sized_array::<UInt8Type>(data_type, length),
-        DataType::Int16 => new_null_sized_array::<Int16Type>(data_type, length),
-        DataType::UInt16 => new_null_sized_array::<UInt16Type>(data_type, length),
-        DataType::Float16 => unreachable!(),
-        DataType::Int32 => new_null_sized_array::<Int32Type>(data_type, length),
-        DataType::UInt32 => new_null_sized_array::<UInt32Type>(data_type, length),
-        DataType::Float32 => new_null_sized_array::<Float32Type>(data_type, length),
-        DataType::Date32 => new_null_sized_array::<Date32Type>(data_type, length),
-        // expanding this into Date23{unit}Type results in needless branching
-        DataType::Time32(_) => new_null_sized_array::<Int32Type>(data_type, length),
-        DataType::Int64 => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::UInt64 => new_null_sized_array::<UInt64Type>(data_type, length),
-        DataType::Float64 => new_null_sized_array::<Float64Type>(data_type, length),
-        DataType::Date64 => new_null_sized_array::<Date64Type>(data_type, length),
-        // expanding this into Timestamp{unit}Type results in needless branching
-        DataType::Timestamp(_, _) => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::Time64(_) => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::Duration(_) => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::Interval(unit) => match unit {
-            IntervalUnit::YearMonth => {
-                new_null_sized_array::<IntervalYearMonthType>(data_type, length)
-            }
-            IntervalUnit::DayTime => {
-                new_null_sized_array::<IntervalDayTimeType>(data_type, length)
-            }
-        },
-        DataType::FixedSizeBinary(value_len) => make_array(ArrayData::new(
-            data_type.clone(),
-            length,
-            Some(length),
-            Some(MutableBuffer::new_null(length).into()),
-            0,
-            vec![Buffer::from(vec![0u8; *value_len as usize * length])],
-            vec![],
-        )),
-        DataType::Binary | DataType::Utf8 => {
-            new_null_binary_array::<i32>(data_type, length)
-        }
-        DataType::LargeBinary | DataType::LargeUtf8 => {
-            new_null_binary_array::<i64>(data_type, length)
-        }
-        DataType::List(field) => {
-            new_null_list_array::<i32>(data_type, field.data_type(), length)
-        }
-        DataType::LargeList(field) => {
-            new_null_list_array::<i64>(data_type, field.data_type(), length)
-        }
-        DataType::FixedSizeList(field, value_len) => make_array(ArrayData::new(
-            data_type.clone(),
-            length,
-            Some(length),
-            Some(MutableBuffer::new_null(length).into()),
-            0,
-            vec![],
-            vec![
-                new_null_array(field.data_type(), *value_len as usize * length)
-                    .data()
-                    .clone(),
-            ],
-        )),
-        DataType::Struct(fields) => make_array(ArrayData::new(
-            data_type.clone(),
-            length,
-            Some(length),
-            Some(MutableBuffer::new_null(length).into()),
-            0,
-            vec![],
-            fields
-                .iter()
-                .map(|field| ArrayData::new_empty(field.data_type()))
-                .collect(),
-        )),
-        DataType::Union(_) => {
-            unimplemented!("Creating null Union array not yet supported")
-        }
-        DataType::Dictionary(_, value) => {
-            make_array(ArrayData::new(
-                data_type.clone(),
-                length,
-                Some(length),
-                Some(MutableBuffer::new_null(length).into()),
-                0,
-                vec![MutableBuffer::new(0).into()], // values are empty
-                vec![new_empty_array(value.as_ref()).data().clone()],
-            ))
-        }
-        DataType::Decimal(_, _) => {
-            unimplemented!("Creating null Decimal array not yet supported")
-        }
-    }
-}
-
-#[inline]
-fn new_null_list_array<OffsetSize: OffsetSizeTrait>(
-    data_type: &DataType,
-    child_data_type: &DataType,
-    length: usize,
-) -> ArrayRef {
-    make_array(ArrayData::new(
-        data_type.clone(),
-        length,
-        Some(length),
-        Some(MutableBuffer::new_null(length).into()),
-        0,
-        vec![Buffer::from(
-            vec![OffsetSize::zero(); length + 1].to_byte_slice(),
-        )],
-        vec![ArrayData::new_empty(child_data_type)],
-    ))
-}
-
-#[inline]
-fn new_null_binary_array<OffsetSize: OffsetSizeTrait>(
-    data_type: &DataType,
-    length: usize,
-) -> ArrayRef {
-    make_array(ArrayData::new(
-        data_type.clone(),
-        length,
-        Some(length),
-        Some(MutableBuffer::new_null(length).into()),
-        0,
-        vec![
-            Buffer::from(vec![OffsetSize::zero(); length + 1].to_byte_slice()),
-            MutableBuffer::new(0).into(),
-        ],
-        vec![],
-    ))
-}
-
-#[inline]
-fn new_null_sized_array<T: ArrowPrimitiveType>(
-    data_type: &DataType,
-    length: usize,
-) -> ArrayRef {
-    make_array(ArrayData::new(
-        data_type.clone(),
-        length,
-        Some(length),
-        Some(MutableBuffer::new_null(length).into()),
-        0,
-        vec![Buffer::from(vec![0u8; length * T::get_byte_width()])],
-        vec![],
-    ))
-}
-
-/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
-/// # Safety
-/// Assumes that these pointers represent valid C Data Interfaces, both in memory
-/// representation and lifetime via the `release` mechanism.
-pub unsafe fn make_array_from_raw(
-    array: *const ffi::FFI_ArrowArray,
-    schema: *const ffi::FFI_ArrowSchema,
-) -> Result<ArrayRef> {
-    let array = ffi::ArrowArray::try_from_raw(array, schema)?;
-    let data = ArrayData::try_from(array)?;
-    Ok(make_array(data))
-}
-// Helper function for printing potentially long arrays.
-pub(super) fn print_long_array<A, F>(
-    array: &A,
-    f: &mut fmt::Formatter,
-    print_item: F,
-) -> fmt::Result
-where
-    A: Array,
-    F: Fn(&A, usize, &mut fmt::Formatter) -> fmt::Result,
-{
-    let head = std::cmp::min(10, array.len());
-
-    for i in 0..head {
-        if array.is_null(i) {
-            writeln!(f, "  null,")?;
-        } else {
-            write!(f, "  ")?;
-            print_item(&array, i, f)?;
-            writeln!(f, ",")?;
-        }
-    }
-    if array.len() > 10 {
-        if array.len() > 20 {
-            writeln!(f, "  ...{} elements...,", array.len() - 20)?;
-        }
-
-        let tail = std::cmp::max(head, array.len() - 10);
-
-        for i in tail..array.len() {
-            if array.is_null(i) {
-                writeln!(f, "  null,")?;
-            } else {
-                write!(f, "  ")?;
-                print_item(&array, i, f)?;
-                writeln!(f, ",")?;
-            }
-        }
-    }
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn test_empty_primitive() {
-        let array = new_empty_array(&DataType::Int32);
-        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(a.len(), 0);
-        let expected: &[i32] = &[];
-        assert_eq!(a.values(), expected);
-    }
-
-    #[test]
-    fn test_empty_variable_sized() {
-        let array = new_empty_array(&DataType::Utf8);
-        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(a.len(), 0);
-        assert_eq!(a.value_offsets()[0], 0i32);
-    }
-
-    #[test]
-    fn test_empty_list_primitive() {
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let array = new_empty_array(&data_type);
-        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(a.len(), 0);
-        assert_eq!(a.value_offsets()[0], 0i32);
-    }
-
-    #[test]
-    fn test_null_boolean() {
-        let array = new_null_array(&DataType::Boolean, 9);
-        let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(a.len(), 9);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_primitive() {
-        let array = new_null_array(&DataType::Int32, 9);
-        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(a.len(), 9);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_variable_sized() {
-        let array = new_null_array(&DataType::Utf8, 9);
-        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(a.len(), 9);
-        assert_eq!(a.value_offsets()[9], 0i32);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_list_primitive() {
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let array = new_null_array(&data_type, 9);
-        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(a.len(), 9);
-        assert_eq!(a.value_offsets()[9], 0i32);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_dictionary() {
-        let values = vec![None, None, None, None, None, None, None, None, None]
-            as Vec<Option<&str>>;
-
-        let array: DictionaryArray<Int8Type> = values.into_iter().collect();
-        let array = Arc::new(array) as ArrayRef;
-
-        let null_array = new_null_array(array.data_type(), 9);
-        assert_eq!(&array, &null_array);
-    }
-}
diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs
deleted file mode 100644
index bd04afa4c1f..00000000000
--- a/rust/arrow/src/array/array_binary.rs
+++ /dev/null
@@ -1,1157 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::{From, TryInto};
-use std::fmt;
-use std::mem;
-use std::{any::Any, iter::FromIterator};
-
-use super::{
-    array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData,
-    FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
-};
-use crate::buffer::Buffer;
-use crate::error::ArrowError;
-use crate::util::bit_util;
-use crate::{buffer::MutableBuffer, datatypes::DataType};
-
-/// Like OffsetSizeTrait, but specialized for Binary
-// This allow us to expose a constant datatype for the GenericBinaryArray
-pub trait BinaryOffsetSizeTrait: OffsetSizeTrait {
-    const DATA_TYPE: DataType;
-}
-
-impl BinaryOffsetSizeTrait for i32 {
-    const DATA_TYPE: DataType = DataType::Binary;
-}
-
-impl BinaryOffsetSizeTrait for i64 {
-    const DATA_TYPE: DataType = DataType::LargeBinary;
-}
-
-pub struct GenericBinaryArray<OffsetSize: BinaryOffsetSizeTrait> {
-    data: ArrayData,
-    value_offsets: RawPtrBox<OffsetSize>,
-    value_data: RawPtrBox<u8>,
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
-    /// Returns the length for value at index `i`.
-    #[inline]
-    pub fn value_length(&self, i: usize) -> OffsetSize {
-        let offsets = self.value_offsets();
-        offsets[i + 1] - offsets[i]
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[1].clone()
-    }
-
-    /// Returns the offset values in the offsets buffer
-    #[inline]
-    pub fn value_offsets(&self) -> &[OffsetSize] {
-        // Soundness
-        //     pointer alignment & location is ensured by RawPtrBox
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_offsets.as_ptr().add(self.data.offset()),
-                self.len() + 1,
-            )
-        }
-    }
-
-    /// Returns the element at index `i` as bytes slice
-    /// # Safety
-    /// Caller is responsible for ensuring that the index is within the bounds of the array
-    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
-        let end = *self.value_offsets().get_unchecked(i + 1);
-        let start = *self.value_offsets().get_unchecked(i);
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-
-        // Safety of `to_isize().unwrap()`
-        // `start` and `end` are &OffsetSize, which is a generic type that implements the
-        // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-        // both of which should cleanly cast to isize on an architecture that supports
-        // 32/64-bit offsets
-        std::slice::from_raw_parts(
-            self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-            (end - start).to_usize().unwrap(),
-        )
-    }
-
-    /// Returns the element at index `i` as bytes slice
-    pub fn value(&self, i: usize) -> &[u8] {
-        assert!(i < self.data.len(), "BinaryArray out of bounds access");
-        //Soundness: length checked above, offset buffer length is 1 larger than logical array length
-        let end = unsafe { self.value_offsets().get_unchecked(i + 1) };
-        let start = unsafe { self.value_offsets().get_unchecked(i) };
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-
-        // Safety of `to_isize().unwrap()`
-        // `start` and `end` are &OffsetSize, which is a generic type that implements the
-        // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-        // both of which should cleanly cast to isize on an architecture that supports
-        // 32/64-bit offsets
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-                (*end - *start).to_usize().unwrap(),
-            )
-        }
-    }
-
-    /// Creates a [GenericBinaryArray] from a vector of byte slices
-    pub fn from_vec(v: Vec<&[u8]>) -> Self {
-        let mut offsets = Vec::with_capacity(v.len() + 1);
-        let mut values = Vec::new();
-        let mut length_so_far: OffsetSize = OffsetSize::zero();
-        offsets.push(length_so_far);
-        for s in &v {
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s);
-        }
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        GenericBinaryArray::<OffsetSize>::from(array_data)
-    }
-
-    /// Creates a [GenericBinaryArray] from a vector of Optional (null) byte slices
-    pub fn from_opt_vec(v: Vec<Option<&[u8]>>) -> Self {
-        v.into_iter().collect()
-    }
-
-    fn from_list(v: GenericListArray<OffsetSize>) -> Self {
-        assert_eq!(
-            v.data_ref().child_data()[0].child_data().len(),
-            0,
-            "BinaryArray can only be created from list array of u8 values \
-             (i.e. List<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data_ref().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "BinaryArray can only be created from List<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(v.data_ref().buffers()[0].clone())
-            .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data_ref().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-}
-
-impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryArray<T> {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> GenericBinaryIter<'a, T> {
-        GenericBinaryIter::<'a, T>::new(&self)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let prefix = if OffsetSize::is_large() { "Large" } else { "" };
-
-        write!(f, "{}BinaryArray\n[\n", prefix)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> Array for GenericBinaryArray<OffsetSize> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [$name].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [$name].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> From<ArrayData>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.data_type(),
-            &<OffsetSize as BinaryOffsetSizeTrait>::DATA_TYPE,
-            "[Large]BinaryArray expects Datatype::[Large]Binary"
-        );
-        assert_eq!(
-            data.buffers().len(),
-            2,
-            "BinaryArray data should contain 2 buffers only (offsets and values)"
-        );
-        let offsets = data.buffers()[0].as_ptr();
-        let values = data.buffers()[1].as_ptr();
-        Self {
-            data,
-            value_offsets: unsafe { RawPtrBox::new(offsets) },
-            value_data: unsafe { RawPtrBox::new(values) },
-        }
-    }
-}
-
-impl<Ptr, OffsetSize: BinaryOffsetSizeTrait> FromIterator<Option<Ptr>>
-    for GenericBinaryArray<OffsetSize>
-where
-    Ptr: AsRef<[u8]>,
-{
-    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let mut offsets = Vec::with_capacity(data_len + 1);
-        let mut values = Vec::new();
-        let mut null_buf = MutableBuffer::new_null(data_len);
-        let mut length_so_far: OffsetSize = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        {
-            let null_slice = null_buf.as_slice_mut();
-
-            for (i, s) in iter.enumerate() {
-                if let Some(s) = s {
-                    let s = s.as_ref();
-                    bit_util::set_bit(null_slice, i);
-                    length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                    values.extend_from_slice(s);
-                }
-                // always add an element in offsets
-                offsets.push(length_so_far);
-            }
-        }
-
-        // calculate actual data_len, which may be different from the iterator's upper bound
-        let data_len = offsets.len() - 1;
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(data_len)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .null_bit_buffer(null_buf.into())
-            .build();
-        Self::from(array_data)
-    }
-}
-
-/// An array where each element is a byte whose maximum length is represented by a i32.
-pub type BinaryArray = GenericBinaryArray<i32>;
-
-/// An array where each element is a byte whose maximum length is represented by a i64.
-pub type LargeBinaryArray = GenericBinaryArray<i64>;
-
-impl<'a, T: BinaryOffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
-    type Item = Option<&'a [u8]>;
-    type IntoIter = GenericBinaryIter<'a, T>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        GenericBinaryIter::<'a, T>::new(self)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<Option<&[u8]>>>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn from(v: Vec<Option<&[u8]>>) -> Self {
-        GenericBinaryArray::<OffsetSize>::from_opt_vec(v)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<&[u8]>>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn from(v: Vec<&[u8]>) -> Self {
-        GenericBinaryArray::<OffsetSize>::from_vec(v)
-    }
-}
-
-impl<T: BinaryOffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> {
-    fn from(v: GenericListArray<T>) -> Self {
-        GenericBinaryArray::<T>::from_list(v)
-    }
-}
-
-/// A type of `FixedSizeListArray` whose elements are binaries.
-pub struct FixedSizeBinaryArray {
-    data: ArrayData,
-    value_data: RawPtrBox<u8>,
-    length: i32,
-}
-
-impl FixedSizeBinaryArray {
-    /// Returns the element at index `i` as a byte slice.
-    pub fn value(&self, i: usize) -> &[u8] {
-        assert!(
-            i < self.data.len(),
-            "FixedSizeBinaryArray out of bounds access"
-        );
-        let offset = i.checked_add(self.data.offset()).unwrap();
-        unsafe {
-            let pos = self.value_offset_at(offset);
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(pos as isize),
-                (self.value_offset_at(offset + 1) - pos) as usize,
-            )
-        }
-    }
-
-    /// Returns the offset for the element at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.data.offset() + i)
-    }
-
-    /// Returns the length for an element.
-    ///
-    /// All elements have the same length as the array is a fixed size.
-    #[inline]
-    pub fn value_length(&self) -> i32 {
-        self.length
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[0].clone()
-    }
-
-    /// Create an array from an iterable argument of sparse byte slices.
-    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
-    /// contain `None` items.
-    ///
-    /// # Examles
-    ///
-    /// ```
-    /// use arrow::array::FixedSizeBinaryArray;
-    /// let input_arg = vec![
-    ///     None,
-    ///     Some(vec![7, 8]),
-    ///     Some(vec![9, 10]),
-    ///     None,
-    ///     Some(vec![13, 14]),
-    ///     None,
-    /// ];
-    /// let array = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
-    /// ```
-    ///
-    /// # Errors
-    ///
-    /// Returns error if argument has length zero, or sizes of nested slices don't match.
-    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
-    where
-        T: Iterator<Item = Option<U>>,
-        U: AsRef<[u8]>,
-    {
-        let mut len = 0;
-        let mut size = None;
-        let mut byte = 0;
-        let mut null_buf = MutableBuffer::from_len_zeroed(0);
-        let mut buffer = MutableBuffer::from_len_zeroed(0);
-        let mut prepend = 0;
-        iter.try_for_each(|item| -> Result<(), ArrowError> {
-            // extend null bitmask by one byte per each 8 items
-            if byte == 0 {
-                null_buf.push(0u8);
-                byte = 8;
-            }
-            byte -= 1;
-
-            if let Some(slice) = item {
-                let slice = slice.as_ref();
-                if let Some(size) = size {
-                    if size != slice.len() {
-                        return Err(ArrowError::InvalidArgumentError(format!(
-                            "Nested array size mismatch: one is {}, and the other is {}",
-                            size,
-                            slice.len()
-                        )));
-                    }
-                } else {
-                    size = Some(slice.len());
-                    buffer.extend_zeros(slice.len() * prepend);
-                }
-                bit_util::set_bit(null_buf.as_slice_mut(), len);
-                buffer.extend_from_slice(slice);
-            } else if let Some(size) = size {
-                buffer.extend_zeros(size);
-            } else {
-                prepend += 1;
-            }
-
-            len += 1;
-
-            Ok(())
-        })?;
-
-        if len == 0 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Input iterable argument has no data".to_owned(),
-            ));
-        }
-
-        let size = size.unwrap_or(0);
-        let array_data = ArrayData::new(
-            DataType::FixedSizeBinary(size as i32),
-            len,
-            None,
-            Some(null_buf.into()),
-            0,
-            vec![buffer.into()],
-            vec![],
-        );
-        Ok(FixedSizeBinaryArray::from(array_data))
-    }
-
-    /// Create an array from an iterable argument of byte slices.
-    ///
-    /// # Examles
-    ///
-    /// ```
-    /// use arrow::array::FixedSizeBinaryArray;
-    /// let input_arg = vec![
-    ///     vec![1, 2],
-    ///     vec![3, 4],
-    ///     vec![5, 6],
-    /// ];
-    /// let array = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
-    /// ```
-    ///
-    /// # Errors
-    ///
-    /// Returns error if argument has length zero, or sizes of nested slices don't match.
-    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
-    where
-        T: Iterator<Item = U>,
-        U: AsRef<[u8]>,
-    {
-        let mut len = 0;
-        let mut size = None;
-        let mut buffer = MutableBuffer::from_len_zeroed(0);
-        iter.try_for_each(|item| -> Result<(), ArrowError> {
-            let slice = item.as_ref();
-            if let Some(size) = size {
-                if size != slice.len() {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "Nested array size mismatch: one is {}, and the other is {}",
-                        size,
-                        slice.len()
-                    )));
-                }
-            } else {
-                size = Some(slice.len());
-            }
-            buffer.extend_from_slice(slice);
-
-            len += 1;
-
-            Ok(())
-        })?;
-
-        if len == 0 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Input iterable argument has no data".to_owned(),
-            ));
-        }
-
-        let size = size.unwrap_or(0);
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(size as i32))
-            .len(len)
-            .add_buffer(buffer.into())
-            .build();
-        Ok(FixedSizeBinaryArray::from(array_data))
-    }
-
-    #[inline]
-    fn value_offset_at(&self, i: usize) -> i32 {
-        self.length * i as i32
-    }
-}
-
-impl From<ArrayData> for FixedSizeBinaryArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "FixedSizeBinaryArray data should contain 1 buffer only (values)"
-        );
-        let value_data = data.buffers()[0].as_ptr();
-        let length = match data.data_type() {
-            DataType::FixedSizeBinary(len) => *len,
-            _ => panic!("Expected data type to be FixedSizeBinary"),
-        };
-        Self {
-            data,
-            value_data: unsafe { RawPtrBox::new(value_data) },
-            length,
-        }
-    }
-}
-
-/// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array
-impl From<FixedSizeListArray> for FixedSizeBinaryArray {
-    fn from(v: FixedSizeListArray) -> Self {
-        assert_eq!(
-            v.data_ref().child_data()[0].child_data().len(),
-            0,
-            "FixedSizeBinaryArray can only be created from list array of u8 values \
-             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data_ref().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
-            .len(v.len())
-            .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data_ref().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-}
-
-impl fmt::Debug for FixedSizeBinaryArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl Array for FixedSizeBinaryArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeBinaryArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [FixedSizeBinaryArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-/// A type of `DecimalArray` whose elements are binaries.
-pub struct DecimalArray {
-    data: ArrayData,
-    value_data: RawPtrBox<u8>,
-    precision: usize,
-    scale: usize,
-    length: i32,
-}
-
-impl DecimalArray {
-    /// Returns the element at index `i` as i128.
-    pub fn value(&self, i: usize) -> i128 {
-        assert!(i < self.data.len(), "DecimalArray out of bounds access");
-        let offset = i.checked_add(self.data.offset()).unwrap();
-        let raw_val = unsafe {
-            let pos = self.value_offset_at(offset);
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(pos as isize),
-                (self.value_offset_at(offset + 1) - pos) as usize,
-            )
-        };
-        let as_array = raw_val.try_into();
-        match as_array {
-            Ok(v) if raw_val.len() == 16 => i128::from_le_bytes(v),
-            _ => panic!("DecimalArray elements are not 128bit integers."),
-        }
-    }
-
-    /// Returns the offset for the element at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.data.offset() + i)
-    }
-
-    /// Returns the length for an element.
-    ///
-    /// All elements have the same length as the array is a fixed size.
-    #[inline]
-    pub fn value_length(&self) -> i32 {
-        self.length
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[0].clone()
-    }
-
-    #[inline]
-    fn value_offset_at(&self, i: usize) -> i32 {
-        self.length * i as i32
-    }
-
-    pub fn from_fixed_size_list_array(
-        v: FixedSizeListArray,
-        precision: usize,
-        scale: usize,
-    ) -> Self {
-        assert_eq!(
-            v.data_ref().child_data()[0].child_data().len(),
-            0,
-            "DecimalArray can only be created from list array of u8 values \
-             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data_ref().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "DecimalArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(DataType::Decimal(precision, scale))
-            .len(v.len())
-            .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data_ref().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-    pub fn precision(&self) -> usize {
-        self.precision
-    }
-
-    pub fn scale(&self) -> usize {
-        self.scale
-    }
-}
-
-impl From<ArrayData> for DecimalArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "DecimalArray data should contain 1 buffer only (values)"
-        );
-        let values = data.buffers()[0].as_ptr();
-        let (precision, scale) = match data.data_type() {
-            DataType::Decimal(precision, scale) => (*precision, *scale),
-            _ => panic!("Expected data type to be Decimal"),
-        };
-        let length = 16;
-        Self {
-            data,
-            value_data: unsafe { RawPtrBox::new(values) },
-            precision,
-            scale,
-            length,
-        }
-    }
-}
-
-impl fmt::Debug for DecimalArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl Array for DecimalArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [DecimalArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [DecimalArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        array::{LargeListArray, ListArray},
-        datatypes::Field,
-    };
-
-    use super::*;
-
-    #[test]
-    fn test_binary_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data = ArrayData::builder(DataType::Binary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = BinaryArray::from(array_data);
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
-            binary_array.value_unchecked(0)
-        });
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(2)
-        );
-        assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
-            binary_array.value_unchecked(2)
-        });
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(7, binary_array.value_length(2));
-        for i in 0..3 {
-            assert!(binary_array.is_valid(i));
-            assert!(!binary_array.is_null(i));
-        }
-
-        // Test binary array with offset
-        let array_data = ArrayData::builder(DataType::Binary)
-            .len(4)
-            .offset(1)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = BinaryArray::from(array_data);
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(1)
-        );
-        assert_eq!(5, binary_array.value_offsets()[0]);
-        assert_eq!(0, binary_array.value_length(0));
-        assert_eq!(5, binary_array.value_offsets()[1]);
-        assert_eq!(7, binary_array.value_length(1));
-    }
-
-    #[test]
-    fn test_large_binary_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let offsets: [i64; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data = ArrayData::builder(DataType::LargeBinary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = LargeBinaryArray::from(array_data);
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
-            binary_array.value_unchecked(0)
-        });
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(2)
-        );
-        assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
-            binary_array.value_unchecked(2)
-        });
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(7, binary_array.value_length(2));
-        for i in 0..3 {
-            assert!(binary_array.is_valid(i));
-            assert!(!binary_array.is_null(i));
-        }
-
-        // Test binary array with offset
-        let array_data = ArrayData::builder(DataType::LargeBinary)
-            .len(4)
-            .offset(1)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = LargeBinaryArray::from(array_data);
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(1)
-        );
-        assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
-            binary_array.value_unchecked(1)
-        });
-        assert_eq!(5, binary_array.value_offsets()[0]);
-        assert_eq!(0, binary_array.value_length(0));
-        assert_eq!(5, binary_array.value_offsets()[1]);
-        assert_eq!(7, binary_array.value_length(1));
-    }
-
-    #[test]
-    fn test_binary_array_from_list_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let values_data = ArrayData::builder(DataType::UInt8)
-            .len(12)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data1 = ArrayData::builder(DataType::Binary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array1 = BinaryArray::from(array_data1);
-
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::UInt8, false)));
-        let array_data2 = ArrayData::builder(data_type)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(values_data)
-            .build();
-        let list_array = ListArray::from(array_data2);
-        let binary_array2 = BinaryArray::from(list_array);
-
-        assert_eq!(2, binary_array2.data().buffers().len());
-        assert_eq!(0, binary_array2.data().child_data().len());
-
-        assert_eq!(binary_array1.len(), binary_array2.len());
-        assert_eq!(binary_array1.null_count(), binary_array2.null_count());
-        assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
-        for i in 0..binary_array1.len() {
-            assert_eq!(binary_array1.value(i), binary_array2.value(i));
-            assert_eq!(binary_array1.value(i), unsafe {
-                binary_array2.value_unchecked(i)
-            });
-            assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
-        }
-    }
-
-    #[test]
-    fn test_large_binary_array_from_list_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let values_data = ArrayData::builder(DataType::UInt8)
-            .len(12)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let offsets: [i64; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data1 = ArrayData::builder(DataType::LargeBinary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array1 = LargeBinaryArray::from(array_data1);
-
-        let data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::UInt8, false)));
-        let array_data2 = ArrayData::builder(data_type)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(values_data)
-            .build();
-        let list_array = LargeListArray::from(array_data2);
-        let binary_array2 = LargeBinaryArray::from(list_array);
-
-        assert_eq!(2, binary_array2.data().buffers().len());
-        assert_eq!(0, binary_array2.data().child_data().len());
-
-        assert_eq!(binary_array1.len(), binary_array2.len());
-        assert_eq!(binary_array1.null_count(), binary_array2.null_count());
-        assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
-        for i in 0..binary_array1.len() {
-            assert_eq!(binary_array1.value(i), binary_array2.value(i));
-            assert_eq!(binary_array1.value(i), unsafe {
-                binary_array2.value_unchecked(i)
-            });
-            assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
-        }
-    }
-
-    fn test_generic_binary_array_from_opt_vec<T: BinaryOffsetSizeTrait>() {
-        let values: Vec<Option<&[u8]>> =
-            vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
-        let array = GenericBinaryArray::<T>::from_opt_vec(values);
-        assert_eq!(array.len(), 5);
-        assert_eq!(array.value(0), b"one");
-        assert_eq!(array.value(1), b"two");
-        assert_eq!(array.value(3), b"");
-        assert_eq!(array.value(4), b"three");
-        assert_eq!(array.is_null(0), false);
-        assert_eq!(array.is_null(1), false);
-        assert_eq!(array.is_null(2), true);
-        assert_eq!(array.is_null(3), false);
-        assert_eq!(array.is_null(4), false);
-    }
-
-    #[test]
-    fn test_large_binary_array_from_opt_vec() {
-        test_generic_binary_array_from_opt_vec::<i64>()
-    }
-
-    #[test]
-    fn test_binary_array_from_opt_vec() {
-        test_generic_binary_array_from_opt_vec::<i32>()
-    }
-
-    #[test]
-    fn test_binary_array_from_unbound_iter() {
-        // iterator that doesn't declare (upper) size bound
-        let value_iter = (0..)
-            .scan(0usize, |pos, i| {
-                if *pos < 10 {
-                    *pos += 1;
-                    Some(Some(format!("value {}", i)))
-                } else {
-                    // actually returns up to 10 values
-                    None
-                }
-            })
-            // limited using take()
-            .take(100);
-
-        let (_, upper_size_bound) = value_iter.size_hint();
-        // the upper bound, defined by take above, is 100
-        assert_eq!(upper_size_bound, Some(100));
-        let binary_array: BinaryArray = value_iter.collect();
-        // but the actual number of items in the array should be 10
-        assert_eq!(binary_array.len(), 10);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "assertion failed: `(left == right)`\n  left: `UInt32`,\n \
-                    right: `UInt8`: BinaryArray can only be created from List<u8> arrays, \
-                    mismatched data types."
-    )]
-    fn test_binary_array_from_incorrect_list_array() {
-        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
-        let values_data = ArrayData::builder(DataType::UInt32)
-            .len(12)
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::UInt32, false)));
-        let array_data = ArrayData::builder(data_type)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(values_data)
-            .build();
-        let list_array = ListArray::from(array_data);
-        BinaryArray::from(list_array);
-    }
-
-    #[test]
-    fn test_fixed_size_binary_array() {
-        let values: [u8; 15] = *b"hellotherearrow";
-
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(3)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
-        assert_eq!(3, fixed_size_binary_array.len());
-        assert_eq!(0, fixed_size_binary_array.null_count());
-        assert_eq!(
-            [b'h', b'e', b'l', b'l', b'o'],
-            fixed_size_binary_array.value(0)
-        );
-        assert_eq!(
-            [b't', b'h', b'e', b'r', b'e'],
-            fixed_size_binary_array.value(1)
-        );
-        assert_eq!(
-            [b'a', b'r', b'r', b'o', b'w'],
-            fixed_size_binary_array.value(2)
-        );
-        assert_eq!(5, fixed_size_binary_array.value_length());
-        assert_eq!(10, fixed_size_binary_array.value_offset(2));
-        for i in 0..3 {
-            assert!(fixed_size_binary_array.is_valid(i));
-            assert!(!fixed_size_binary_array.is_null(i));
-        }
-
-        // Test binary array with offset
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(2)
-            .offset(1)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
-        assert_eq!(
-            [b't', b'h', b'e', b'r', b'e'],
-            fixed_size_binary_array.value(0)
-        );
-        assert_eq!(
-            [b'a', b'r', b'r', b'o', b'w'],
-            fixed_size_binary_array.value(1)
-        );
-        assert_eq!(2, fixed_size_binary_array.len());
-        assert_eq!(5, fixed_size_binary_array.value_offset(0));
-        assert_eq!(5, fixed_size_binary_array.value_length());
-        assert_eq!(10, fixed_size_binary_array.value_offset(1));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "FixedSizeBinaryArray can only be created from list array of u8 values \
-                    (i.e. FixedSizeList<PrimitiveArray<u8>>)."
-    )]
-    fn test_fixed_size_binary_array_from_incorrect_list_array() {
-        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
-        let values_data = ArrayData::builder(DataType::UInt32)
-            .len(12)
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .add_child_data(ArrayData::builder(DataType::Boolean).build())
-            .build();
-
-        let array_data = ArrayData::builder(DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Binary, false)),
-            4,
-        ))
-        .len(3)
-        .add_child_data(values_data)
-        .build();
-        let list_array = FixedSizeListArray::from(array_data);
-        FixedSizeBinaryArray::from(list_array);
-    }
-
-    #[test]
-    #[should_panic(expected = "BinaryArray out of bounds access")]
-    fn test_binary_array_get_value_index_out_of_bound() {
-        let values: [u8; 12] =
-            [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-        let array_data = ArrayData::builder(DataType::Binary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = BinaryArray::from(array_data);
-        binary_array.value(4);
-    }
-
-    #[test]
-    fn test_binary_array_fmt_debug() {
-        let values: [u8; 15] = *b"hellotherearrow";
-
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(3)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let arr = FixedSizeBinaryArray::from(array_data);
-        assert_eq!(
-            "FixedSizeBinaryArray<5>\n[\n  [104, 101, 108, 108, 111],\n  [116, 104, 101, 114, 101],\n  [97, 114, 114, 111, 119],\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_decimal_array() {
-        // let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        // let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255];
-        let values: [u8; 32] = [
-            192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
-            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        ];
-        let array_data = ArrayData::builder(DataType::Decimal(23, 6))
-            .len(2)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let decimal_array = DecimalArray::from(array_data);
-        assert_eq!(8_887_000_000, decimal_array.value(0));
-        assert_eq!(-8_887_000_000, decimal_array.value(1));
-        assert_eq!(16, decimal_array.value_length());
-    }
-
-    #[test]
-    fn test_decimal_array_fmt_debug() {
-        let values: [u8; 32] = [
-            192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
-            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        ];
-        let array_data = ArrayData::builder(DataType::Decimal(23, 6))
-            .len(2)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let arr = DecimalArray::from(array_data);
-        assert_eq!(
-            "DecimalArray<23, 6>\n[\n  8887000000,\n  -8887000000,\n]",
-            format!("{:?}", arr)
-        );
-    }
-}
diff --git a/rust/arrow/src/array/array_boolean.rs b/rust/arrow/src/array/array_boolean.rs
deleted file mode 100644
index 2512a956db4..00000000000
--- a/rust/arrow/src/array/array_boolean.rs
+++ /dev/null
@@ -1,282 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::borrow::Borrow;
-use std::convert::From;
-use std::iter::{FromIterator, IntoIterator};
-use std::mem;
-use std::{any::Any, fmt};
-
-use super::*;
-use super::{array::print_long_array, raw_pointer::RawPtrBox};
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::util::bit_util;
-
-/// Array of bools
-pub struct BooleanArray {
-    data: ArrayData,
-    /// Pointer to the value array. The lifetime of this must be <= to the value buffer
-    /// stored in `data`, so it's safe to store.
-    raw_values: RawPtrBox<u8>,
-}
-
-impl fmt::Debug for BooleanArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "BooleanArray\n[\n")?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl BooleanArray {
-    /// Returns the length of this array.
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns whether this array is empty.
-    pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
-    }
-
-    // Returns a new boolean array builder
-    pub fn builder(capacity: usize) -> BooleanBuilder {
-        BooleanBuilder::new(capacity)
-    }
-
-    /// Returns a `Buffer` holding all the values of this array.
-    ///
-    /// Note this doesn't take the offset of this array into account.
-    pub fn values(&self) -> &Buffer {
-        &self.data.buffers()[0]
-    }
-
-    /// Returns the boolean value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    pub fn value(&self, i: usize) -> bool {
-        let offset = i + self.offset();
-        unsafe { bit_util::get_bit_raw(self.raw_values.as_ptr(), offset) }
-    }
-}
-
-impl Array for BooleanArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [BooleanArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [BooleanArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl From<Vec<bool>> for BooleanArray {
-    fn from(data: Vec<bool>) -> Self {
-        let mut mut_buf = MutableBuffer::new_null(data.len());
-        {
-            let mut_slice = mut_buf.as_slice_mut();
-            for (i, b) in data.iter().enumerate() {
-                if *b {
-                    bit_util::set_bit(mut_slice, i);
-                }
-            }
-        }
-        let array_data = ArrayData::builder(DataType::Boolean)
-            .len(data.len())
-            .add_buffer(mut_buf.into())
-            .build();
-        BooleanArray::from(array_data)
-    }
-}
-
-impl From<Vec<Option<bool>>> for BooleanArray {
-    fn from(data: Vec<Option<bool>>) -> Self {
-        BooleanArray::from_iter(data.iter())
-    }
-}
-
-impl From<ArrayData> for BooleanArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "BooleanArray data should contain a single buffer only (values buffer)"
-        );
-        let ptr = data.buffers()[0].as_ptr();
-        Self {
-            data,
-            raw_values: unsafe { RawPtrBox::new(ptr) },
-        }
-    }
-}
-
-impl<'a> IntoIterator for &'a BooleanArray {
-    type Item = Option<bool>;
-    type IntoIter = BooleanIter<'a>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        BooleanIter::<'a>::new(self)
-    }
-}
-
-impl<'a> BooleanArray {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> BooleanIter<'a> {
-        BooleanIter::<'a>::new(&self)
-    }
-}
-
-impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
-    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let num_bytes = bit_util::ceil(data_len, 8);
-        let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
-        let mut val_buf = MutableBuffer::from_len_zeroed(num_bytes);
-
-        let data = val_buf.as_slice_mut();
-
-        let null_slice = null_buf.as_slice_mut();
-        iter.enumerate().for_each(|(i, item)| {
-            if let Some(a) = item.borrow() {
-                bit_util::set_bit(null_slice, i);
-                if *a {
-                    bit_util::set_bit(data, i);
-                }
-            }
-        });
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            data_len,
-            None,
-            Some(null_buf.into()),
-            0,
-            vec![val_buf.into()],
-            vec![],
-        );
-        BooleanArray::from(data)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::buffer::Buffer;
-    use crate::datatypes::DataType;
-
-    #[test]
-    fn test_boolean_fmt_debug() {
-        let arr = BooleanArray::from(vec![true, false, false]);
-        assert_eq!(
-            "BooleanArray\n[\n  true,\n  false,\n  false,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_boolean_with_null_fmt_debug() {
-        let mut builder = BooleanArray::builder(3);
-        builder.append_value(true).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(false).unwrap();
-        let arr = builder.finish();
-        assert_eq!(
-            "BooleanArray\n[\n  true,\n  null,\n  false,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_boolean_array_from_vec() {
-        let buf = Buffer::from([10_u8]);
-        let arr = BooleanArray::from(vec![false, true, false, true]);
-        assert_eq!(&buf, arr.values());
-        assert_eq!(4, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..4 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i)
-        }
-    }
-
-    #[test]
-    fn test_boolean_array_from_vec_option() {
-        let buf = Buffer::from([10_u8]);
-        let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
-        assert_eq!(&buf, arr.values());
-        assert_eq!(4, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        for i in 0..4 {
-            if i == 2 {
-                assert!(arr.is_null(i));
-                assert!(!arr.is_valid(i));
-            } else {
-                assert!(!arr.is_null(i));
-                assert!(arr.is_valid(i));
-                assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i)
-            }
-        }
-    }
-
-    #[test]
-    fn test_boolean_array_builder() {
-        // Test building a boolean array with ArrayData builder and offset
-        // 000011011
-        let buf = Buffer::from([27_u8]);
-        let buf2 = buf.clone();
-        let data = ArrayData::builder(DataType::Boolean)
-            .len(5)
-            .offset(2)
-            .add_buffer(buf)
-            .build();
-        let arr = BooleanArray::from(data);
-        assert_eq!(&buf2, arr.values());
-        assert_eq!(5, arr.len());
-        assert_eq!(2, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..3 {
-            assert_eq!(i != 0, arr.value(i), "failed at {}", i);
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "BooleanArray data should contain a single buffer only \
-                               (values buffer)")]
-    fn test_boolean_array_invalid_buffer_len() {
-        let data = ArrayData::builder(DataType::Boolean).len(5).build();
-        BooleanArray::from(data);
-    }
-}
diff --git a/rust/arrow/src/array/array_dictionary.rs b/rust/arrow/src/array/array_dictionary.rs
deleted file mode 100644
index 5948658157e..00000000000
--- a/rust/arrow/src/array/array_dictionary.rs
+++ /dev/null
@@ -1,408 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::iter::IntoIterator;
-use std::mem;
-use std::{convert::From, iter::FromIterator};
-
-use super::{
-    make_array, Array, ArrayData, ArrayRef, PrimitiveArray, PrimitiveBuilder,
-    StringArray, StringBuilder, StringDictionaryBuilder,
-};
-use crate::datatypes::ArrowNativeType;
-use crate::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType, DataType};
-
-/// A dictionary array where each element is a single value indexed by an integer key.
-/// This is mostly used to represent strings or a limited set of primitive types as integers,
-/// for example when doing NLP analysis or representing chromosomes by name.
-///
-/// Example **with nullable** data:
-///
-/// ```
-/// use arrow::array::{DictionaryArray, Int8Array};
-/// use arrow::datatypes::Int8Type;
-/// let test = vec!["a", "a", "b", "c"];
-/// let array : DictionaryArray<Int8Type> = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect();
-/// assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)]));
-/// ```
-///
-/// Example **without nullable** data:
-///
-/// ```
-/// use arrow::array::{DictionaryArray, Int8Array};
-/// use arrow::datatypes::Int8Type;
-/// let test = vec!["a", "a", "b", "c"];
-/// let array : DictionaryArray<Int8Type> = test.into_iter().collect();
-/// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
-/// ```
-pub struct DictionaryArray<K: ArrowPrimitiveType> {
-    /// Data of this dictionary. Note that this is _not_ compatible with the C Data interface,
-    /// as, in the current implementation, `values` below are the first child of this struct.
-    data: ArrayData,
-
-    /// The keys of this dictionary. These are constructed from the buffer and null bitmap
-    /// of `data`.
-    /// Also, note that these do not correspond to the true values of this array. Rather, they map
-    /// to the real values.
-    keys: PrimitiveArray<K>,
-
-    /// Array of dictionary values (can by any DataType).
-    values: ArrayRef,
-
-    /// Values are ordered.
-    is_ordered: bool,
-}
-
-impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
-    /// Return an iterator to the keys of this dictionary.
-    pub fn keys(&self) -> &PrimitiveArray<K> {
-        &self.keys
-    }
-
-    /// Returns an array view of the keys of this dictionary
-    pub fn keys_array(&self) -> PrimitiveArray<K> {
-        let data = self.data_ref();
-        let keys_data = ArrayData::new(
-            K::DATA_TYPE,
-            data.len(),
-            Some(data.null_count()),
-            data.null_buffer().cloned(),
-            data.offset(),
-            data.buffers().to_vec(),
-            vec![],
-        );
-        PrimitiveArray::<K>::from(keys_data)
-    }
-
-    /// Returns the lookup key by doing reverse dictionary lookup
-    pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
-        let rd_buf: &StringArray =
-            self.values.as_any().downcast_ref::<StringArray>().unwrap();
-
-        (0..rd_buf.len())
-            .position(|i| rd_buf.value(i) == value)
-            .map(K::Native::from_usize)
-            .flatten()
-    }
-
-    /// Returns an `ArrayRef` to the dictionary values.
-    pub fn values(&self) -> ArrayRef {
-        self.values.clone()
-    }
-
-    /// Returns a clone of the value type of this list.
-    pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
-    }
-
-    /// The length of the dictionary is the length of the keys array.
-    pub fn len(&self) -> usize {
-        self.keys.len()
-    }
-
-    /// Whether this dictionary is empty
-    pub fn is_empty(&self) -> bool {
-        self.keys.is_empty()
-    }
-
-    // Currently exists for compatibility purposes with Arrow IPC.
-    pub fn is_ordered(&self) -> bool {
-        self.is_ordered
-    }
-}
-
-/// Constructs a `DictionaryArray` from an array data reference.
-impl<T: ArrowPrimitiveType> From<ArrayData> for DictionaryArray<T> {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "DictionaryArray data should contain a single buffer only (keys)."
-        );
-        assert_eq!(
-            data.child_data().len(),
-            1,
-            "DictionaryArray should contain a single child array (values)."
-        );
-
-        if let DataType::Dictionary(key_data_type, _) = data.data_type() {
-            if key_data_type.as_ref() != &T::DATA_TYPE {
-                panic!("DictionaryArray's data type must match.")
-            };
-            // create a zero-copy of the keys' data
-            let keys = PrimitiveArray::<T>::from(ArrayData::new(
-                T::DATA_TYPE,
-                data.len(),
-                Some(data.null_count()),
-                data.null_buffer().cloned(),
-                data.offset(),
-                data.buffers().to_vec(),
-                vec![],
-            ));
-            let values = make_array(data.child_data()[0].clone());
-            Self {
-                data,
-                keys,
-                values,
-                is_ordered: false,
-            }
-        } else {
-            panic!("DictionaryArray must have Dictionary data type.")
-        }
-    }
-}
-
-/// Constructs a `DictionaryArray` from an iterator of optional strings.
-impl<'a, T: ArrowPrimitiveType + ArrowDictionaryKeyType> FromIterator<Option<&'a str>>
-    for DictionaryArray<T>
-{
-    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
-        let it = iter.into_iter();
-        let (lower, _) = it.size_hint();
-        let key_builder = PrimitiveBuilder::<T>::new(lower);
-        let value_builder = StringBuilder::new(256);
-        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-        it.for_each(|i| {
-            if let Some(i) = i {
-                // Note: impl ... for Result<DictionaryArray<T>> fails with
-                // error[E0117]: only traits defined in the current crate can be implemented for arbitrary types
-                builder
-                    .append(i)
-                    .expect("Unable to append a value to a dictionary array.");
-            } else {
-                builder
-                    .append_null()
-                    .expect("Unable to append a null value to a dictionary array.");
-            }
-        });
-
-        builder.finish()
-    }
-}
-
-/// Constructs a `DictionaryArray` from an iterator of strings.
-impl<'a, T: ArrowPrimitiveType + ArrowDictionaryKeyType> FromIterator<&'a str>
-    for DictionaryArray<T>
-{
-    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
-        let it = iter.into_iter();
-        let (lower, _) = it.size_hint();
-        let key_builder = PrimitiveBuilder::<T>::new(lower);
-        let value_builder = StringBuilder::new(256);
-        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-        it.for_each(|i| {
-            builder
-                .append(i)
-                .expect("Unable to append a value to a dictionary array.");
-        });
-
-        builder.finish()
-    }
-}
-
-impl<T: ArrowPrimitiveType> Array for DictionaryArray<T> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    fn get_buffer_memory_size(&self) -> usize {
-        // Since both `keys` and `values` derive (are references from) `data`, we only need to account for `data`.
-        self.data.get_buffer_memory_size()
-    }
-
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size()
-            + self.keys.get_array_memory_size()
-            + self.values.get_array_memory_size()
-            + mem::size_of_val(self)
-    }
-}
-
-impl<T: ArrowPrimitiveType> fmt::Debug for DictionaryArray<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(
-            f,
-            "DictionaryArray {{keys: {:?} values: {:?}}}",
-            self.keys, self.values
-        )
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::{
-        array::Int16Array,
-        datatypes::{Int32Type, Int8Type, UInt32Type, UInt8Type},
-    };
-    use crate::{
-        array::Int16DictionaryArray, array::PrimitiveDictionaryBuilder,
-        datatypes::DataType,
-    };
-    use crate::{buffer::Buffer, datatypes::ToByteSlice};
-
-    #[test]
-    fn test_dictionary_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int8)
-            .len(8)
-            .add_buffer(Buffer::from(
-                &[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
-            ))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        let keys = Buffer::from(&[2_i16, 3, 4].to_byte_slice());
-
-        // Construct a dictionary array from the above two
-        let key_type = DataType::Int16;
-        let value_type = DataType::Int8;
-        let dict_data_type =
-            DataType::Dictionary(Box::new(key_type), Box::new(value_type));
-        let dict_data = ArrayData::builder(dict_data_type.clone())
-            .len(3)
-            .add_buffer(keys.clone())
-            .add_child_data(value_data.clone())
-            .build();
-        let dict_array = Int16DictionaryArray::from(dict_data);
-
-        let values = dict_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int8, dict_array.value_type());
-        assert_eq!(3, dict_array.len());
-
-        // Null count only makes sense in terms of the component arrays.
-        assert_eq!(0, dict_array.null_count());
-        assert_eq!(0, dict_array.values().null_count());
-        assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
-
-        // Now test with a non-zero offset
-        let dict_data = ArrayData::builder(dict_data_type)
-            .len(2)
-            .offset(1)
-            .add_buffer(keys)
-            .add_child_data(value_data.clone())
-            .build();
-        let dict_array = Int16DictionaryArray::from(dict_data);
-
-        let values = dict_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int8, dict_array.value_type());
-        assert_eq!(2, dict_array.len());
-        assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
-    }
-
-    #[test]
-    fn test_dictionary_array_fmt_debug() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        builder.append(12345678).unwrap();
-        builder.append_null().unwrap();
-        builder.append(22345678).unwrap();
-        let array = builder.finish();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n  0,\n  null,\n  1,\n] values: PrimitiveArray<UInt32>\n[\n  12345678,\n  22345678,\n]}\n",
-            format!("{:?}", array)
-        );
-
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(20);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        for _ in 0..20 {
-            builder.append(1).unwrap();
-        }
-        let array = builder.finish();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n] values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
-            format!("{:?}", array)
-        );
-    }
-
-    #[test]
-    fn test_dictionary_array_from_iter() {
-        let test = vec!["a", "a", "b", "c"];
-        let array: DictionaryArray<Int8Type> = test
-            .iter()
-            .map(|&x| if x == "b" { None } else { Some(x) })
-            .collect();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  null,\n  1,\n] values: StringArray\n[\n  \"a\",\n  \"c\",\n]}\n",
-            format!("{:?}", array)
-        );
-
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  1,\n  2,\n] values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
-            format!("{:?}", array)
-        );
-    }
-
-    #[test]
-    fn test_dictionary_array_reverse_lookup_key() {
-        let test = vec!["a", "a", "b", "c"];
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-
-        assert_eq!(array.lookup_key("c"), Some(2));
-
-        // Direction of building a dictionary is the iterator direction
-        let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-
-        assert_eq!(array.lookup_key("t1"), Some(2));
-        assert_eq!(array.lookup_key("non-existent"), None);
-    }
-
-    #[test]
-    fn test_dictionary_keys_as_primitive_array() {
-        let test = vec!["a", "b", "c", "a"];
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-
-        let keys = array.keys_array();
-        assert_eq!(&DataType::Int8, keys.data_type());
-        assert_eq!(0, keys.null_count());
-        assert_eq!(&[0, 1, 2, 0], keys.values());
-    }
-
-    #[test]
-    fn test_dictionary_keys_as_primitive_array_with_null() {
-        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
-        let array: DictionaryArray<Int32Type> = test.into_iter().collect();
-
-        let keys = array.keys_array();
-        assert_eq!(&DataType::Int32, keys.data_type());
-        assert_eq!(3, keys.null_count());
-
-        assert_eq!(true, keys.is_valid(0));
-        assert_eq!(false, keys.is_valid(1));
-        assert_eq!(true, keys.is_valid(2));
-        assert_eq!(false, keys.is_valid(3));
-        assert_eq!(false, keys.is_valid(4));
-        assert_eq!(true, keys.is_valid(5));
-
-        assert_eq!(0, keys.value(0));
-        assert_eq!(1, keys.value(2));
-        assert_eq!(0, keys.value(5));
-    }
-}
diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs
deleted file mode 100644
index 0e334631adf..00000000000
--- a/rust/arrow/src/array/array_list.rs
+++ /dev/null
@@ -1,1056 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::mem;
-
-use num::Num;
-
-use super::{
-    array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayData,
-    ArrayRef, BooleanBufferBuilder, GenericListArrayIter, PrimitiveArray,
-};
-use crate::{
-    buffer::MutableBuffer,
-    datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType, Field},
-    error::ArrowError,
-};
-
-/// trait declaring an offset size, relevant for i32 vs i64 array types.
-pub trait OffsetSizeTrait: ArrowNativeType + Num + Ord + std::ops::AddAssign {
-    fn is_large() -> bool;
-}
-
-impl OffsetSizeTrait for i32 {
-    #[inline]
-    fn is_large() -> bool {
-        false
-    }
-}
-
-impl OffsetSizeTrait for i64 {
-    #[inline]
-    fn is_large() -> bool {
-        true
-    }
-}
-
-pub struct GenericListArray<OffsetSize> {
-    data: ArrayData,
-    values: ArrayRef,
-    value_offsets: RawPtrBox<OffsetSize>,
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
-    /// Returns a reference to the values of this list.
-    pub fn values(&self) -> ArrayRef {
-        self.values.clone()
-    }
-
-    /// Returns a clone of the value type of this list.
-    pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
-    }
-
-    /// Returns ith value of this list array.
-    /// # Safety
-    /// Caller must ensure that the index is within the array bounds
-    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
-        let end = *self.value_offsets().get_unchecked(i + 1);
-        let start = *self.value_offsets().get_unchecked(i);
-        self.values
-            .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
-    }
-
-    /// Returns ith value of this list array.
-    pub fn value(&self, i: usize) -> ArrayRef {
-        let end = self.value_offsets()[i + 1];
-        let start = self.value_offsets()[i];
-        self.values
-            .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
-    }
-
-    /// Returns the offset values in the offsets buffer
-    #[inline]
-    pub fn value_offsets(&self) -> &[OffsetSize] {
-        // Soundness
-        //     pointer alignment & location is ensured by RawPtrBox
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_offsets.as_ptr().add(self.data.offset()),
-                self.len() + 1,
-            )
-        }
-    }
-
-    /// Returns the length for value at index `i`.
-    #[inline]
-    pub fn value_length(&self, i: usize) -> OffsetSize {
-        let offsets = self.value_offsets();
-        offsets[i + 1] - offsets[i]
-    }
-
-    /// constructs a new iterator
-    pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
-        GenericListArrayIter::<'a, OffsetSize>::new(&self)
-    }
-
-    #[inline]
-    fn get_type(data_type: &DataType) -> Option<&DataType> {
-        if OffsetSize::is_large() {
-            if let DataType::LargeList(child) = data_type {
-                Some(child.data_type())
-            } else {
-                None
-            }
-        } else if let DataType::List(child) = data_type {
-            Some(child.data_type())
-        } else {
-            None
-        }
-    }
-
-    /// Creates a [`GenericListArray`] from an iterator of primitive values
-    /// # Example
-    /// ```
-    /// # use arrow::array::ListArray;
-    /// # use arrow::datatypes::Int32Type;
-    /// let data = vec![
-    ///    Some(vec![Some(0), Some(1), Some(2)]),
-    ///    None,
-    ///    Some(vec![Some(3), None, Some(5)]),
-    ///    Some(vec![Some(6), Some(7)]),
-    /// ];
-    /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
-    /// println!("{:?}", list_array);
-    /// ```
-    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
-    where
-        T: ArrowPrimitiveType,
-        P: AsRef<[Option<<T as ArrowPrimitiveType>::Native>]>
-            + IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
-        I: IntoIterator<Item = Option<P>>,
-    {
-        let iterator = iter.into_iter();
-        let (lower, _) = iterator.size_hint();
-
-        let mut offsets =
-            MutableBuffer::new((lower + 1) * std::mem::size_of::<OffsetSize>());
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        let mut null_buf = BooleanBufferBuilder::new(lower);
-
-        let values: PrimitiveArray<T> = iterator
-            .filter_map(|maybe_slice| {
-                // regardless of whether the item is Some, the offsets and null buffers must be updated.
-                match &maybe_slice {
-                    Some(x) => {
-                        length_so_far +=
-                            OffsetSize::from_usize(x.as_ref().len()).unwrap();
-                        null_buf.append(true);
-                    }
-                    None => null_buf.append(false),
-                };
-                offsets.push(length_so_far);
-                maybe_slice
-            })
-            .flatten()
-            .collect();
-
-        let field = Box::new(Field::new("item", T::DATA_TYPE, true));
-        let data_type = if OffsetSize::is_large() {
-            DataType::LargeList(field)
-        } else {
-            DataType::List(field)
-        };
-        let data = ArrayData::builder(data_type)
-            .len(null_buf.len())
-            .add_buffer(offsets.into())
-            .add_child_data(values.data().clone())
-            .null_bit_buffer(null_buf.into())
-            .build();
-        Self::from(data)
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
-    fn from(data: ArrayData) -> Self {
-        Self::try_new_from_array_data(data).expect(
-            "Expected infallable creation of GenericListArray from ArrayDataRef failed",
-        )
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
-    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
-        if data.buffers().len() != 1 {
-            return Err(ArrowError::InvalidArgumentError(
-                format!("ListArray data should contain a single buffer only (value offsets), had {}",
-                        data.len())));
-        }
-
-        if data.child_data().len() != 1 {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "ListArray should contain a single child array (values array), had {}",
-                data.child_data().len()
-            )));
-        }
-
-        let values = data.child_data()[0].clone();
-
-        if let Some(child_data_type) = Self::get_type(data.data_type()) {
-            if values.data_type() != child_data_type {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "[Large]ListArray's child datatype {:?} does not \
-                             correspond to the List's datatype {:?}",
-                    values.data_type(),
-                    child_data_type
-                )));
-            }
-        } else {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
-                data.data_type()
-            )));
-        }
-
-        let values = make_array(values);
-        let value_offsets = data.buffers()[0].as_ptr();
-
-        let value_offsets = unsafe { RawPtrBox::<OffsetSize>::new(value_offsets) };
-        unsafe {
-            if !(*value_offsets.as_ptr().offset(0)).is_zero() {
-                return Err(ArrowError::InvalidArgumentError(String::from(
-                    "offsets do not start at zero",
-                )));
-            }
-        }
-        Ok(Self {
-            data,
-            values,
-            value_offsets,
-        })
-    }
-}
-
-impl<OffsetSize: 'static + OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [ListArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [ListArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericListArray<OffsetSize> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let prefix = if OffsetSize::is_large() { "Large" } else { "" };
-
-        write!(f, "{}ListArray\n[\n", prefix)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-/// A list array where each element is a variable-sized sequence of values with the same
-/// type whose memory offsets between elements are represented by a i32.
-pub type ListArray = GenericListArray<i32>;
-
-/// A list array where each element is a variable-sized sequence of values with the same
-/// type whose memory offsets between elements are represented by a i64.
-pub type LargeListArray = GenericListArray<i64>;
-
-/// A list array where each element is a fixed-size sequence of values with the same
-/// type whose maximum length is represented by a i32.
-pub struct FixedSizeListArray {
-    data: ArrayData,
-    values: ArrayRef,
-    length: i32,
-}
-
-impl FixedSizeListArray {
-    /// Returns a reference to the values of this list.
-    pub fn values(&self) -> ArrayRef {
-        self.values.clone()
-    }
-
-    /// Returns a clone of the value type of this list.
-    pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
-    }
-
-    /// Returns ith value of this list array.
-    pub fn value(&self, i: usize) -> ArrayRef {
-        self.values
-            .slice(self.value_offset(i) as usize, self.value_length() as usize)
-    }
-
-    /// Returns the offset for value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.data.offset() + i)
-    }
-
-    /// Returns the length for value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub const fn value_length(&self) -> i32 {
-        self.length
-    }
-
-    #[inline]
-    const fn value_offset_at(&self, i: usize) -> i32 {
-        i as i32 * self.length
-    }
-}
-
-impl From<ArrayData> for FixedSizeListArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            0,
-            "FixedSizeListArray data should not contain a buffer for value offsets"
-        );
-        assert_eq!(
-            data.child_data().len(),
-            1,
-            "FixedSizeListArray should contain a single child array (values array)"
-        );
-        let values = make_array(data.child_data()[0].clone());
-        let length = match data.data_type() {
-            DataType::FixedSizeList(_, len) => {
-                if *len > 0 {
-                    // check that child data is multiple of length
-                    assert_eq!(
-                        values.len() % *len as usize,
-                        0,
-                        "FixedSizeListArray child array length should be a multiple of {}",
-                        len
-                    );
-                }
-
-                *len
-            }
-            _ => {
-                panic!("FixedSizeListArray data should contain a FixedSizeList data type")
-            }
-        };
-        Self {
-            data,
-            values,
-            length,
-        }
-    }
-}
-
-impl Array for FixedSizeListArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeListArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [FixedSizeListArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size()
-            + self.values().get_array_memory_size()
-            + mem::size_of_val(self)
-    }
-}
-
-impl fmt::Debug for FixedSizeListArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "FixedSizeListArray<{}>\n[\n", self.value_length())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        alloc,
-        array::ArrayData,
-        array::Int32Array,
-        buffer::Buffer,
-        datatypes::Field,
-        datatypes::{Int32Type, ToByteSlice},
-        util::bit_util,
-    };
-
-    use super::*;
-
-    fn create_from_buffers() -> ListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data)
-    }
-
-    #[test]
-    fn test_from_iter_primitive() {
-        let data = vec![
-            Some(vec![Some(0), Some(1), Some(2)]),
-            Some(vec![Some(3), Some(4), Some(5)]),
-            Some(vec![Some(6), Some(7)]),
-        ];
-        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
-
-        let another = create_from_buffers();
-        assert_eq!(list_array, another)
-    }
-
-    #[test]
-    fn test_list_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type.clone())
-            .len(3)
-            .add_buffer(value_offsets.clone())
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        assert_eq!(
-            0,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            0,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-
-        // Now test with a non-zero offset
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .offset(1)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[1]);
-        assert_eq!(2, list_array.value_length(1));
-        assert_eq!(
-            3,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            3,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-    }
-
-    #[test]
-    fn test_large_list_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type.clone())
-            .len(3)
-            .add_buffer(value_offsets.clone())
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = LargeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        assert_eq!(
-            0,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            0,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-
-        // Now test with a non-zero offset
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .offset(1)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = LargeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[1]);
-        assert_eq!(2, list_array.value_length(1));
-        assert_eq!(
-            3,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            3,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-    }
-
-    #[test]
-    fn test_fixed_size_list_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(9)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8]))
-            .build();
-
-        // Construct a list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
-        let list_data = ArrayData::builder(list_data_type.clone())
-            .len(3)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(2));
-        assert_eq!(3, list_array.value_length());
-        assert_eq!(
-            0,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-
-        // Now test with a non-zero offset
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .offset(1)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(
-            3,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(6, list_array.value_offset(1));
-        assert_eq!(3, list_array.value_length());
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "FixedSizeListArray child array length should be a multiple of 3"
-    )]
-    fn test_fixed_size_list_array_unequal_children() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_child_data(value_data)
-            .build();
-        FixedSizeListArray::from(list_data);
-    }
-
-    #[test]
-    fn test_list_array_slice() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
-        // 01011001 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(9, list_array.len());
-        assert_eq!(4, list_array.null_count());
-        assert_eq!(2, list_array.value_offsets()[3]);
-        assert_eq!(2, list_array.value_length(3));
-
-        let sliced_array = list_array.slice(1, 6);
-        assert_eq!(6, sliced_array.len());
-        assert_eq!(1, sliced_array.offset());
-        assert_eq!(3, sliced_array.null_count());
-
-        for i in 0..sliced_array.len() {
-            if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
-                assert!(sliced_array.is_valid(i));
-            } else {
-                assert!(sliced_array.is_null(i));
-            }
-        }
-
-        // Check offset and length for each non-null value.
-        let sliced_list_array =
-            sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(2, sliced_list_array.value_offsets()[2]);
-        assert_eq!(2, sliced_list_array.value_length(2));
-        assert_eq!(4, sliced_list_array.value_offsets()[3]);
-        assert_eq!(2, sliced_list_array.value_length(3));
-        assert_eq!(6, sliced_list_array.value_offsets()[5]);
-        assert_eq!(3, sliced_list_array.value_length(5));
-    }
-
-    #[test]
-    fn test_large_list_array_slice() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
-        // 01011001 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = LargeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(9, list_array.len());
-        assert_eq!(4, list_array.null_count());
-        assert_eq!(2, list_array.value_offsets()[3]);
-        assert_eq!(2, list_array.value_length(3));
-
-        let sliced_array = list_array.slice(1, 6);
-        assert_eq!(6, sliced_array.len());
-        assert_eq!(1, sliced_array.offset());
-        assert_eq!(3, sliced_array.null_count());
-
-        for i in 0..sliced_array.len() {
-            if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
-                assert!(sliced_array.is_valid(i));
-            } else {
-                assert!(sliced_array.is_null(i));
-            }
-        }
-
-        // Check offset and length for each non-null value.
-        let sliced_list_array = sliced_array
-            .as_any()
-            .downcast_ref::<LargeListArray>()
-            .unwrap();
-        assert_eq!(2, sliced_list_array.value_offsets()[2]);
-        assert_eq!(2, sliced_list_array.value_length(2));
-        assert_eq!(4, sliced_list_array.value_offsets()[3]);
-        assert_eq!(2, sliced_list_array.value_length(3));
-        assert_eq!(6, sliced_list_array.value_offsets()[5]);
-        assert_eq!(3, sliced_list_array.value_length(5));
-    }
-
-    #[test]
-    #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
-    fn test_list_array_index_out_of_bound() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
-        // 01011001 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = LargeListArray::from(list_data);
-        assert_eq!(9, list_array.len());
-
-        list_array.value(10);
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_slice() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Set null buts for the nested array:
-        //  [[0, 1], null, null, [6, 7], [8, 9]]
-        // 01011001 00000001
-        let mut null_bits: [u8; 1] = [0; 1];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-
-        // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            2,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(5)
-            .add_child_data(value_data.clone())
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(5, list_array.len());
-        assert_eq!(2, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(3));
-        assert_eq!(2, list_array.value_length());
-
-        let sliced_array = list_array.slice(1, 4);
-        assert_eq!(4, sliced_array.len());
-        assert_eq!(1, sliced_array.offset());
-        assert_eq!(2, sliced_array.null_count());
-
-        for i in 0..sliced_array.len() {
-            if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
-                assert!(sliced_array.is_valid(i));
-            } else {
-                assert!(sliced_array.is_null(i));
-            }
-        }
-
-        // Check offset and length for each non-null value.
-        let sliced_list_array = sliced_array
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap();
-        assert_eq!(2, sliced_list_array.value_length());
-        assert_eq!(6, sliced_list_array.value_offset(2));
-        assert_eq!(8, sliced_list_array.value_offset(3));
-    }
-
-    #[test]
-    #[should_panic(expected = "assertion failed: (offset + length) <= self.len()")]
-    fn test_fixed_size_list_array_index_out_of_bound() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Set null buts for the nested array:
-        //  [[0, 1], null, null, [6, 7], [8, 9]]
-        // 01011001 00000001
-        let mut null_bits: [u8; 1] = [0; 1];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-
-        // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            2,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(5)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        list_array.value(10);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "ListArray data should contain a single buffer only (value offsets)"
-    )]
-    fn test_list_array_invalid_buffer_len() {
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "ListArray should contain a single child array (values array)"
-    )]
-    fn test_list_array_invalid_child_array_len() {
-        let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]);
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .build();
-        ListArray::from(list_data);
-    }
-
-    #[test]
-    #[should_panic(expected = "offsets do not start at zero")]
-    fn test_list_array_invalid_value_offset_start() {
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        let value_offsets = Buffer::from_slice_ref(&[2, 2, 5, 7]);
-
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data);
-    }
-
-    #[test]
-    #[should_panic(expected = "memory is not aligned")]
-    fn test_primitive_array_alignment() {
-        let ptr = alloc::allocate_aligned::<u8>(8);
-        let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
-        let buf2 = buf.slice(1);
-        let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build();
-        Int32Array::from(array_data);
-    }
-
-    #[test]
-    #[should_panic(expected = "memory is not aligned")]
-    fn test_list_array_alignment() {
-        let ptr = alloc::allocate_aligned::<u8>(8);
-        let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
-        let buf2 = buf.slice(1);
-
-        let values: [i32; 8] = [0; 8];
-        let value_data = ArrayData::builder(DataType::Int32)
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .add_buffer(buf2)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data);
-    }
-}
diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
deleted file mode 100644
index 9fdc0be33d8..00000000000
--- a/rust/arrow/src/array/array_primitive.rs
+++ /dev/null
@@ -1,905 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::borrow::Borrow;
-use std::convert::From;
-use std::fmt;
-use std::iter::{FromIterator, IntoIterator};
-use std::mem;
-
-use chrono::prelude::*;
-
-use super::array::print_long_array;
-use super::raw_pointer::RawPtrBox;
-use super::*;
-use crate::temporal_conversions;
-use crate::util::bit_util;
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    util::trusted_len_unzip,
-};
-
-/// Number of seconds in a day
-const SECONDS_IN_DAY: i64 = 86_400;
-/// Number of milliseconds in a second
-const MILLISECONDS: i64 = 1_000;
-/// Number of microseconds in a second
-const MICROSECONDS: i64 = 1_000_000;
-/// Number of nanoseconds in a second
-const NANOSECONDS: i64 = 1_000_000_000;
-
-/// Array whose elements are of primitive types.
-pub struct PrimitiveArray<T: ArrowPrimitiveType> {
-    /// Underlying ArrayData
-    /// # Safety
-    /// must have exactly one buffer, aligned to type T
-    data: ArrayData,
-    /// Pointer to the value array. The lifetime of this must be <= to the value buffer
-    /// stored in `data`, so it's safe to store.
-    /// # Safety
-    /// raw_values must have a value equivalent to `data.buffers()[0].raw_data()`
-    /// raw_values must have alignment for type T::NativeType
-    raw_values: RawPtrBox<T::Native>,
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
-    /// Returns the length of this array.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns whether this array is empty.
-    pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
-    }
-
-    /// Returns a slice of the values of this array
-    #[inline]
-    pub fn values(&self) -> &[T::Native] {
-        // Soundness
-        //     raw_values alignment & location is ensured by fn from(ArrayDataRef)
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.raw_values.as_ptr().add(self.data.offset()),
-                self.len(),
-            )
-        }
-    }
-
-    // Returns a new primitive array builder
-    pub fn builder(capacity: usize) -> PrimitiveBuilder<T> {
-        PrimitiveBuilder::<T>::new(capacity)
-    }
-
-    /// Returns the primitive value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    /// # Safety
-    /// caller must ensure that the passed in offset is less than the array len()
-    pub fn value(&self, i: usize) -> T::Native {
-        let offset = i + self.offset();
-        unsafe { *self.raw_values.as_ptr().add(offset) }
-    }
-
-    /// Creates a PrimitiveArray based on an iterator of values without nulls
-    pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
-        let val_buf: Buffer = iter.into_iter().collect();
-        let data = ArrayData::new(
-            T::DATA_TYPE,
-            val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
-            None,
-            None,
-            0,
-            vec![val_buf],
-            vec![],
-        );
-        PrimitiveArray::from(data)
-    }
-
-    /// Creates a PrimitiveArray based on a constant value with `count` elements
-    pub fn from_value(value: T::Native, count: usize) -> Self {
-        // # Safety: length is known
-        let val_buf = unsafe { Buffer::from_trusted_len_iter((0..count).map(|_| value)) };
-        let data = ArrayData::new(
-            T::DATA_TYPE,
-            val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
-            None,
-            None,
-            0,
-            vec![val_buf],
-            vec![],
-        );
-        PrimitiveArray::from(data)
-    }
-}
-
-impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [PrimitiveArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [PrimitiveArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of::<RawPtrBox<T::Native>>()
-    }
-}
-
-fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
-    match T::DATA_TYPE {
-        DataType::Date32 => Some(temporal_conversions::date32_to_datetime(v as i32)),
-        DataType::Date64 => Some(temporal_conversions::date64_to_datetime(v)),
-        DataType::Time32(_) | DataType::Time64(_) => None,
-        DataType::Timestamp(unit, _) => match unit {
-            TimeUnit::Second => Some(temporal_conversions::timestamp_s_to_datetime(v)),
-            TimeUnit::Millisecond => {
-                Some(temporal_conversions::timestamp_ms_to_datetime(v))
-            }
-            TimeUnit::Microsecond => {
-                Some(temporal_conversions::timestamp_us_to_datetime(v))
-            }
-            TimeUnit::Nanosecond => {
-                Some(temporal_conversions::timestamp_ns_to_datetime(v))
-            }
-        },
-        // interval is not yet fully documented [ARROW-3097]
-        DataType::Interval(_) => None,
-        _ => None,
-    }
-}
-
-fn as_date<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDate> {
-    as_datetime::<T>(v).map(|datetime| datetime.date())
-}
-
-fn as_time<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveTime> {
-    match T::DATA_TYPE {
-        DataType::Time32(unit) => {
-            // safe to immediately cast to u32 as `self.value(i)` is positive i32
-            let v = v as u32;
-            match unit {
-                TimeUnit::Second => Some(temporal_conversions::time32s_to_time(v as i32)),
-                TimeUnit::Millisecond => {
-                    Some(temporal_conversions::time32ms_to_time(v as i32))
-                }
-                _ => None,
-            }
-        }
-        DataType::Time64(unit) => match unit {
-            TimeUnit::Microsecond => Some(temporal_conversions::time64us_to_time(v)),
-            TimeUnit::Nanosecond => Some(temporal_conversions::time64ns_to_time(v)),
-            _ => None,
-        },
-        DataType::Timestamp(_, _) => as_datetime::<T>(v).map(|datetime| datetime.time()),
-        DataType::Date32 | DataType::Date64 => Some(NaiveTime::from_hms(0, 0, 0)),
-        DataType::Interval(_) => None,
-        _ => None,
-    }
-}
-
-impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T>
-where
-    i64: std::convert::From<T::Native>,
-{
-    /// Returns value as a chrono `NaiveDateTime`, handling time resolution
-    ///
-    /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned.
-    /// A valid value is expected, thus the user should first check for validity.
-    pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
-        as_datetime::<T>(i64::from(self.value(i)))
-    }
-
-    /// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
-    ///
-    /// If a data type cannot be converted to `NaiveDate`, a `None` is returned
-    pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> {
-        self.value_as_datetime(i).map(|datetime| datetime.date())
-    }
-
-    /// Returns a value as a chrono `NaiveTime`
-    ///
-    /// `Date32` and `Date64` return UTC midnight as they do not have time resolution
-    pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
-        as_time::<T>(i64::from(self.value(i)))
-    }
-}
-
-impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?;
-        print_long_array(self, f, |array, index, f| match T::DATA_TYPE {
-            DataType::Date32 | DataType::Date64 => {
-                let v = self.value(index).to_isize().unwrap() as i64;
-                match as_date::<T>(v) {
-                    Some(date) => write!(f, "{:?}", date),
-                    None => write!(f, "null"),
-                }
-            }
-            DataType::Time32(_) | DataType::Time64(_) => {
-                let v = self.value(index).to_isize().unwrap() as i64;
-                match as_time::<T>(v) {
-                    Some(time) => write!(f, "{:?}", time),
-                    None => write!(f, "null"),
-                }
-            }
-            DataType::Timestamp(_, _) => {
-                let v = self.value(index).to_isize().unwrap() as i64;
-                match as_datetime::<T>(v) {
-                    Some(datetime) => write!(f, "{:?}", datetime),
-                    None => write!(f, "null"),
-                }
-            }
-            _ => fmt::Debug::fmt(&array.value(index), f),
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> IntoIterator for &'a PrimitiveArray<T> {
-    type Item = Option<<T as ArrowPrimitiveType>::Native>;
-    type IntoIter = PrimitiveIter<'a, T>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        PrimitiveIter::<'a, T>::new(self)
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> PrimitiveIter<'a, T> {
-        PrimitiveIter::<'a, T>::new(&self)
-    }
-}
-
-impl<T: ArrowPrimitiveType, Ptr: Borrow<Option<<T as ArrowPrimitiveType>::Native>>>
-    FromIterator<Ptr> for PrimitiveArray<T>
-{
-    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (lower, _) = iter.size_hint();
-
-        let mut null_buf = BooleanBufferBuilder::new(lower);
-
-        let buffer: Buffer = iter
-            .map(|item| {
-                if let Some(a) = item.borrow() {
-                    null_buf.append(true);
-                    *a
-                } else {
-                    null_buf.append(false);
-                    // this ensures that null items on the buffer are not arbitrary.
-                    // This is important because falible operations can use null values (e.g. a vectorized "add")
-                    // which may panic (e.g. overflow if the number on the slots happen to be very large).
-                    T::Native::default()
-                }
-            })
-            .collect();
-
-        let data = ArrayData::new(
-            T::DATA_TYPE,
-            null_buf.len(),
-            None,
-            Some(null_buf.into()),
-            0,
-            vec![buffer],
-            vec![],
-        );
-        PrimitiveArray::from(data)
-    }
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
-    /// Creates a [`PrimitiveArray`] from an iterator of trusted length.
-    /// # Safety
-    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
-    /// I.e. that `size_hint().1` correctly reports its length.
-    #[inline]
-    pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
-    where
-        P: std::borrow::Borrow<Option<<T as ArrowPrimitiveType>::Native>>,
-        I: IntoIterator<Item = P>,
-    {
-        let iterator = iter.into_iter();
-        let (_, upper) = iterator.size_hint();
-        let len = upper.expect("trusted_len_unzip requires an upper limit");
-
-        let (null, buffer) = trusted_len_unzip(iterator);
-
-        let data =
-            ArrayData::new(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
-        PrimitiveArray::from(data)
-    }
-}
-
-// TODO: the macro is needed here because we'd get "conflicting implementations" error
-// otherwise with both `From<Vec<T::Native>>` and `From<Vec<Option<T::Native>>>`.
-// We should revisit this in future.
-macro_rules! def_numeric_from_vec {
-    ( $ty:ident ) => {
-        impl From<Vec<<$ty as ArrowPrimitiveType>::Native>> for PrimitiveArray<$ty> {
-            fn from(data: Vec<<$ty as ArrowPrimitiveType>::Native>) -> Self {
-                let array_data = ArrayData::builder($ty::DATA_TYPE)
-                    .len(data.len())
-                    .add_buffer(Buffer::from_slice_ref(&data))
-                    .build();
-                PrimitiveArray::from(array_data)
-            }
-        }
-
-        // Constructs a primitive array from a vector. Should only be used for testing.
-        impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>>
-            for PrimitiveArray<$ty>
-        {
-            fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self {
-                PrimitiveArray::from_iter(data.iter())
-            }
-        }
-    };
-}
-
-def_numeric_from_vec!(Int8Type);
-def_numeric_from_vec!(Int16Type);
-def_numeric_from_vec!(Int32Type);
-def_numeric_from_vec!(Int64Type);
-def_numeric_from_vec!(UInt8Type);
-def_numeric_from_vec!(UInt16Type);
-def_numeric_from_vec!(UInt32Type);
-def_numeric_from_vec!(UInt64Type);
-def_numeric_from_vec!(Float32Type);
-def_numeric_from_vec!(Float64Type);
-
-def_numeric_from_vec!(Date32Type);
-def_numeric_from_vec!(Date64Type);
-def_numeric_from_vec!(Time32SecondType);
-def_numeric_from_vec!(Time32MillisecondType);
-def_numeric_from_vec!(Time64MicrosecondType);
-def_numeric_from_vec!(Time64NanosecondType);
-def_numeric_from_vec!(IntervalYearMonthType);
-def_numeric_from_vec!(IntervalDayTimeType);
-def_numeric_from_vec!(DurationSecondType);
-def_numeric_from_vec!(DurationMillisecondType);
-def_numeric_from_vec!(DurationMicrosecondType);
-def_numeric_from_vec!(DurationNanosecondType);
-def_numeric_from_vec!(TimestampMillisecondType);
-def_numeric_from_vec!(TimestampMicrosecondType);
-
-impl<T: ArrowTimestampType> PrimitiveArray<T> {
-    /// Construct a timestamp array from a vec of i64 values and an optional timezone
-    pub fn from_vec(data: Vec<i64>, timezone: Option<String>) -> Self {
-        let array_data =
-            ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
-                .len(data.len())
-                .add_buffer(Buffer::from_slice_ref(&data))
-                .build();
-        PrimitiveArray::from(array_data)
-    }
-}
-
-impl<T: ArrowTimestampType> PrimitiveArray<T> {
-    /// Construct a timestamp array from a vec of Option<i64> values and an optional timezone
-    pub fn from_opt_vec(data: Vec<Option<i64>>, timezone: Option<String>) -> Self {
-        // TODO: duplicated from def_numeric_from_vec! macro, it looks possible to convert to generic
-        let data_len = data.len();
-        let mut null_buf = MutableBuffer::new_null(data_len);
-        let mut val_buf = MutableBuffer::new(data_len * mem::size_of::<i64>());
-
-        {
-            let null_slice = null_buf.as_slice_mut();
-            for (i, v) in data.iter().enumerate() {
-                if let Some(n) = v {
-                    bit_util::set_bit(null_slice, i);
-                    val_buf.push(*n);
-                } else {
-                    val_buf.push(0i64);
-                }
-            }
-        }
-
-        let array_data =
-            ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
-                .len(data_len)
-                .add_buffer(val_buf.into())
-                .null_bit_buffer(null_buf.into())
-                .build();
-        PrimitiveArray::from(array_data)
-    }
-}
-
-/// Constructs a `PrimitiveArray` from an array data reference.
-impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "PrimitiveArray data should contain a single buffer only (values buffer)"
-        );
-
-        let ptr = data.buffers()[0].as_ptr();
-        Self {
-            data,
-            raw_values: unsafe { RawPtrBox::new(ptr) },
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::thread;
-
-    use crate::buffer::Buffer;
-    use crate::datatypes::DataType;
-
-    #[test]
-    fn test_primitive_array_from_vec() {
-        let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]);
-        let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
-        assert_eq!(buf, arr.data.buffers()[0]);
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-
-        assert_eq!(64, arr.get_buffer_memory_size());
-        assert_eq!(136, arr.get_array_memory_size());
-    }
-
-    #[test]
-    fn test_primitive_array_from_vec_option() {
-        // Test building a primitive array with null values
-        let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(2, arr.null_count());
-        for i in 0..5 {
-            if i % 2 == 0 {
-                assert!(!arr.is_null(i));
-                assert!(arr.is_valid(i));
-                assert_eq!(i as i32, arr.value(i));
-            } else {
-                assert!(arr.is_null(i));
-                assert!(!arr.is_valid(i));
-            }
-        }
-
-        assert_eq!(128, arr.get_buffer_memory_size());
-        assert_eq!(216, arr.get_array_memory_size());
-    }
-
-    #[test]
-    fn test_date64_array_from_vec_option() {
-        // Test building a primitive array with null values
-        // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions
-        // work
-        let arr: PrimitiveArray<Date64Type> =
-            vec![Some(1550902545147), None, Some(1550902545147)].into();
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        for i in 0..3 {
-            if i % 2 == 0 {
-                assert!(!arr.is_null(i));
-                assert!(arr.is_valid(i));
-                assert_eq!(1550902545147, arr.value(i));
-                // roundtrip to and from datetime
-                assert_eq!(
-                    1550902545147,
-                    arr.value_as_datetime(i).unwrap().timestamp_millis()
-                );
-            } else {
-                assert!(arr.is_null(i));
-                assert!(!arr.is_valid(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_time32_millisecond_array_from_vec() {
-        // 1:        00:00:00.001
-        // 37800005: 10:30:00.005
-        // 86399210: 23:59:59.210
-        let arr: PrimitiveArray<Time32MillisecondType> =
-            vec![1, 37_800_005, 86_399_210].into();
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
-        for (i, formatted) in formatted.iter().enumerate().take(3) {
-            // check that we can't create dates or datetimes from time instances
-            assert_eq!(None, arr.value_as_datetime(i));
-            assert_eq!(None, arr.value_as_date(i));
-            let time = arr.value_as_time(i).unwrap();
-            assert_eq!(*formatted, time.format("%H:%M:%S%.3f").to_string());
-        }
-    }
-
-    #[test]
-    fn test_time64_nanosecond_array_from_vec() {
-        // Test building a primitive array with null values
-        // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions
-        // work
-
-        // 1e6:        00:00:00.001
-        // 37800005e6: 10:30:00.005
-        // 86399210e6: 23:59:59.210
-        let arr: PrimitiveArray<Time64NanosecondType> =
-            vec![1_000_000, 37_800_005_000_000, 86_399_210_000_000].into();
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
-        for (i, item) in formatted.iter().enumerate().take(3) {
-            // check that we can't create dates or datetimes from time instances
-            assert_eq!(None, arr.value_as_datetime(i));
-            assert_eq!(None, arr.value_as_date(i));
-            let time = arr.value_as_time(i).unwrap();
-            assert_eq!(*item, time.format("%H:%M:%S%.3f").to_string());
-        }
-    }
-
-    #[test]
-    fn test_interval_array_from_vec() {
-        // intervals are currently not treated specially, but are Int32 and Int64 arrays
-        let arr = IntervalYearMonthArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        // a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
-        let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-    }
-
-    #[test]
-    fn test_duration_array_from_vec() {
-        let arr = DurationSecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        let arr = DurationMillisecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        let arr = DurationMicrosecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        let arr = DurationNanosecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-    }
-
-    #[test]
-    fn test_timestamp_array_from_vec() {
-        let arr = TimestampSecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-
-        let arr = TimestampMillisecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-
-        let arr = TimestampMicrosecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-
-        let arr = TimestampNanosecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-    }
-
-    #[test]
-    fn test_primitive_array_slice() {
-        let arr = Int32Array::from(vec![
-            Some(0),
-            None,
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-            Some(6),
-            None,
-            None,
-        ]);
-        assert_eq!(9, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(4, arr.null_count());
-
-        let arr2 = arr.slice(2, 5);
-        assert_eq!(5, arr2.len());
-        assert_eq!(2, arr2.offset());
-        assert_eq!(1, arr2.null_count());
-
-        for i in 0..arr2.len() {
-            assert_eq!(i == 1, arr2.is_null(i));
-            assert_eq!(i != 1, arr2.is_valid(i));
-        }
-        let int_arr2 = arr2.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, int_arr2.values()[0]);
-        assert_eq!(&[4, 5, 6], &int_arr2.values()[2..5]);
-
-        let arr3 = arr2.slice(2, 3);
-        assert_eq!(3, arr3.len());
-        assert_eq!(4, arr3.offset());
-        assert_eq!(0, arr3.null_count());
-
-        let int_arr3 = arr3.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(&[4, 5, 6], int_arr3.values());
-        assert_eq!(4, int_arr3.value(0));
-        assert_eq!(5, int_arr3.value(1));
-        assert_eq!(6, int_arr3.value(2));
-    }
-
-    #[test]
-    fn test_boolean_array_slice() {
-        let arr = BooleanArray::from(vec![
-            Some(true),
-            None,
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-        ]);
-
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(3, arr.null_count());
-
-        let arr2 = arr.slice(3, 5);
-        assert_eq!(5, arr2.len());
-        assert_eq!(3, arr2.offset());
-        assert_eq!(1, arr2.null_count());
-
-        let bool_arr = arr2.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        assert_eq!(false, bool_arr.is_valid(0));
-
-        assert_eq!(true, bool_arr.is_valid(1));
-        assert_eq!(true, bool_arr.value(1));
-
-        assert_eq!(true, bool_arr.is_valid(2));
-        assert_eq!(false, bool_arr.value(2));
-
-        assert_eq!(true, bool_arr.is_valid(3));
-        assert_eq!(true, bool_arr.value(3));
-
-        assert_eq!(true, bool_arr.is_valid(4));
-        assert_eq!(false, bool_arr.value(4));
-    }
-
-    #[test]
-    fn test_int32_fmt_debug() {
-        let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
-        assert_eq!(
-            "PrimitiveArray<Int32>\n[\n  0,\n  1,\n  2,\n  3,\n  4,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_fmt_debug_up_to_20_elements() {
-        (1..=20).for_each(|i| {
-            let values = (0..i).collect::<Vec<i16>>();
-            let array_expected = format!(
-                "PrimitiveArray<Int16>\n[\n{}\n]",
-                values
-                    .iter()
-                    .map(|v| { format!("  {},", v) })
-                    .collect::<Vec<String>>()
-                    .join("\n")
-            );
-            let array = Int16Array::from(values);
-
-            assert_eq!(array_expected, format!("{:?}", array));
-        })
-    }
-
-    #[test]
-    fn test_int32_with_null_fmt_debug() {
-        let mut builder = Int32Array::builder(3);
-        builder.append_slice(&[0, 1]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_slice(&[3, 4]).unwrap();
-        let arr = builder.finish();
-        assert_eq!(
-            "PrimitiveArray<Int32>\n[\n  0,\n  1,\n  null,\n  3,\n  4,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_timestamp_fmt_debug() {
-        let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from_vec(
-                vec![1546214400000, 1546214400000, -1546214400000],
-                None,
-            );
-        assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, None)>\n[\n  2018-12-31T00:00:00,\n  2018-12-31T00:00:00,\n  1921-01-02T00:00:00,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_date32_fmt_debug() {
-        let arr: PrimitiveArray<Date32Type> = vec![12356, 13548, -365].into();
-        assert_eq!(
-            "PrimitiveArray<Date32>\n[\n  2003-10-31,\n  2007-02-04,\n  1969-01-01,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_time32second_fmt_debug() {
-        let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into();
-        assert_eq!(
-            "PrimitiveArray<Time32(Second)>\n[\n  02:00:01,\n  16:40:54,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "invalid time")]
-    fn test_time32second_invalid_neg() {
-        // The panic should come from chrono, not from arrow
-        let arr: PrimitiveArray<Time32SecondType> = vec![-7201, -60054].into();
-        println!("{:?}", arr);
-    }
-
-    #[test]
-    fn test_primitive_array_builder() {
-        // Test building a primitive array with ArrayData builder and offset
-        let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]);
-        let buf2 = buf.clone();
-        let data = ArrayData::builder(DataType::Int32)
-            .len(5)
-            .offset(2)
-            .add_buffer(buf)
-            .build();
-        let arr = Int32Array::from(data);
-        assert_eq!(buf2, arr.data.buffers()[0]);
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.null_count());
-        for i in 0..3 {
-            assert_eq!((i + 2) as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_from_iter_values() {
-        // Test building a primitive array with from_iter_values
-        let arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(0..10);
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.null_count());
-        for i in 0..10i32 {
-            assert_eq!(i, arr.value(i as usize));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_from_unbound_iter() {
-        // iterator that doesn't declare (upper) size bound
-        let value_iter = (0..)
-            .scan(0usize, |pos, i| {
-                if *pos < 10 {
-                    *pos += 1;
-                    Some(Some(i))
-                } else {
-                    // actually returns up to 10 values
-                    None
-                }
-            })
-            // limited using take()
-            .take(100);
-
-        let (_, upper_size_bound) = value_iter.size_hint();
-        // the upper bound, defined by take above, is 100
-        assert_eq!(upper_size_bound, Some(100));
-        let primitive_array: PrimitiveArray<Int32Type> = value_iter.collect();
-        // but the actual number of items in the array should be 10
-        assert_eq!(primitive_array.len(), 10);
-    }
-
-    #[test]
-    #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
-                               (values buffer)")]
-    fn test_primitive_array_invalid_buffer_len() {
-        let data = ArrayData::builder(DataType::Int32).len(5).build();
-        Int32Array::from(data);
-    }
-
-    #[test]
-    fn test_access_array_concurrently() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let ret = thread::spawn(move || a.value(3)).join();
-
-        assert!(ret.is_ok());
-        assert_eq!(8, ret.ok().unwrap());
-    }
-}
diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs
deleted file mode 100644
index 0519148e6f4..00000000000
--- a/rust/arrow/src/array/array_string.rs
+++ /dev/null
@@ -1,528 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::From;
-use std::fmt;
-use std::mem;
-use std::{any::Any, iter::FromIterator};
-
-use super::{
-    array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, GenericListArray,
-    GenericStringIter, OffsetSizeTrait,
-};
-use crate::buffer::Buffer;
-use crate::util::bit_util;
-use crate::{buffer::MutableBuffer, datatypes::DataType};
-
-/// Like OffsetSizeTrait, but specialized for Strings
-// This allow us to expose a constant datatype for the GenericStringArray
-pub trait StringOffsetSizeTrait: OffsetSizeTrait {
-    const DATA_TYPE: DataType;
-}
-
-impl StringOffsetSizeTrait for i32 {
-    const DATA_TYPE: DataType = DataType::Utf8;
-}
-
-impl StringOffsetSizeTrait for i64 {
-    const DATA_TYPE: DataType = DataType::LargeUtf8;
-}
-
-/// Generic struct for \[Large\]StringArray
-pub struct GenericStringArray<OffsetSize: StringOffsetSizeTrait> {
-    data: ArrayData,
-    value_offsets: RawPtrBox<OffsetSize>,
-    value_data: RawPtrBox<u8>,
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
-    /// Returns the length for the element at index `i`.
-    #[inline]
-    pub fn value_length(&self, i: usize) -> OffsetSize {
-        let offsets = self.value_offsets();
-        offsets[i + 1] - offsets[i]
-    }
-
-    /// Returns the offset values in the offsets buffer
-    #[inline]
-    pub fn value_offsets(&self) -> &[OffsetSize] {
-        // Soundness
-        //     pointer alignment & location is ensured by RawPtrBox
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_offsets.as_ptr().add(self.data.offset()),
-                self.len() + 1,
-            )
-        }
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[1].clone()
-    }
-
-    /// Returns the element at index
-    /// # Safety
-    /// caller is responsible for ensuring that index is within the array bounds
-    pub unsafe fn value_unchecked(&self, i: usize) -> &str {
-        let end = self.value_offsets().get_unchecked(i + 1);
-        let start = self.value_offsets().get_unchecked(i);
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-        // ISSUE: utf-8 well formedness is not checked
-
-        // Safety of `to_isize().unwrap()`
-        // `start` and `end` are &OffsetSize, which is a generic type that implements the
-        // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-        // both of which should cleanly cast to isize on an architecture that supports
-        // 32/64-bit offsets
-        let slice = std::slice::from_raw_parts(
-            self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-            (*end - *start).to_usize().unwrap(),
-        );
-        std::str::from_utf8_unchecked(slice)
-    }
-
-    /// Returns the element at index `i` as &str
-    pub fn value(&self, i: usize) -> &str {
-        assert!(i < self.data.len(), "StringArray out of bounds access");
-        //Soundness: length checked above, offset buffer length is 1 larger than logical array length
-        let end = unsafe { self.value_offsets().get_unchecked(i + 1) };
-        let start = unsafe { self.value_offsets().get_unchecked(i) };
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-        // ISSUE: utf-8 well formedness is not checked
-        unsafe {
-            // Safety of `to_isize().unwrap()`
-            // `start` and `end` are &OffsetSize, which is a generic type that implements the
-            // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-            // both of which should cleanly cast to isize on an architecture that supports
-            // 32/64-bit offsets
-            let slice = std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-                (*end - *start).to_usize().unwrap(),
-            );
-            std::str::from_utf8_unchecked(slice)
-        }
-    }
-
-    fn from_list(v: GenericListArray<OffsetSize>) -> Self {
-        assert_eq!(
-            v.data().child_data()[0].child_data().len(),
-            0,
-            "StringArray can only be created from list array of u8 values \
-             (i.e. List<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "StringArray can only be created from List<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(v.data().buffers()[0].clone())
-            .add_buffer(v.data().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-
-    pub(crate) fn from_vec(v: Vec<&str>) -> Self {
-        let mut offsets =
-            MutableBuffer::new((v.len() + 1) * std::mem::size_of::<OffsetSize>());
-        let mut values = MutableBuffer::new(0);
-
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for s in &v {
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s.as_bytes());
-        }
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(offsets.into())
-            .add_buffer(values.into())
-            .build();
-        Self::from(array_data)
-    }
-
-    pub(crate) fn from_opt_vec(v: Vec<Option<&str>>) -> Self {
-        v.into_iter().collect()
-    }
-
-    /// Creates a `GenericStringArray` based on an iterator of values without nulls
-    pub fn from_iter_values<Ptr, I: IntoIterator<Item = Ptr>>(iter: I) -> Self
-    where
-        Ptr: AsRef<str>,
-    {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let mut offsets =
-            MutableBuffer::new((data_len + 1) * std::mem::size_of::<OffsetSize>());
-        let mut values = MutableBuffer::new(0);
-
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for i in iter {
-            let s = i.as_ref();
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s.as_bytes());
-        }
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(data_len)
-            .add_buffer(offsets.into())
-            .add_buffer(values.into())
-            .build();
-        Self::from(array_data)
-    }
-}
-
-impl<'a, Ptr, OffsetSize: StringOffsetSizeTrait> FromIterator<Option<Ptr>>
-    for GenericStringArray<OffsetSize>
-where
-    Ptr: AsRef<str>,
-{
-    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let offset_size = std::mem::size_of::<OffsetSize>();
-        let mut offsets = MutableBuffer::new((data_len + 1) * offset_size);
-        let mut values = MutableBuffer::new(0);
-        let mut null_buf = MutableBuffer::new_null(data_len);
-        let null_slice = null_buf.as_slice_mut();
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for (i, s) in iter.enumerate() {
-            let value_bytes = if let Some(ref s) = s {
-                // set null bit
-                bit_util::set_bit(null_slice, i);
-                let s_bytes = s.as_ref().as_bytes();
-                length_so_far += OffsetSize::from_usize(s_bytes.len()).unwrap();
-                s_bytes
-            } else {
-                b""
-            };
-            values.extend_from_slice(value_bytes);
-            offsets.push(length_so_far);
-        }
-
-        // calculate actual data_len, which may be different from the iterator's upper bound
-        let data_len = (offsets.len() / offset_size) - 1;
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(data_len)
-            .add_buffer(offsets.into())
-            .add_buffer(values.into())
-            .null_bit_buffer(null_buf.into())
-            .build();
-        Self::from(array_data)
-    }
-}
-
-impl<'a, T: StringOffsetSizeTrait> IntoIterator for &'a GenericStringArray<T> {
-    type Item = Option<&'a str>;
-    type IntoIter = GenericStringIter<'a, T>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        GenericStringIter::<'a, T>::new(self)
-    }
-}
-
-impl<'a, T: StringOffsetSizeTrait> GenericStringArray<T> {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> GenericStringIter<'a, T> {
-        GenericStringIter::<'a, T>::new(&self)
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> fmt::Debug for GenericStringArray<OffsetSize> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let prefix = if OffsetSize::is_large() { "Large" } else { "" };
-
-        write!(f, "{}StringArray\n[\n", prefix)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> Array for GenericStringArray<OffsetSize> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [$name].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [$name].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> From<ArrayData>
-    for GenericStringArray<OffsetSize>
-{
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.data_type(),
-            &<OffsetSize as StringOffsetSizeTrait>::DATA_TYPE,
-            "[Large]StringArray expects Datatype::[Large]Utf8"
-        );
-        assert_eq!(
-            data.buffers().len(),
-            2,
-            "StringArray data should contain 2 buffers only (offsets and values)"
-        );
-        let offsets = data.buffers()[0].as_ptr();
-        let values = data.buffers()[1].as_ptr();
-        Self {
-            data,
-            value_offsets: unsafe { RawPtrBox::new(offsets) },
-            value_data: unsafe { RawPtrBox::new(values) },
-        }
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> From<Vec<Option<&str>>>
-    for GenericStringArray<OffsetSize>
-{
-    fn from(v: Vec<Option<&str>>) -> Self {
-        GenericStringArray::<OffsetSize>::from_opt_vec(v)
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> From<Vec<&str>>
-    for GenericStringArray<OffsetSize>
-{
-    fn from(v: Vec<&str>) -> Self {
-        GenericStringArray::<OffsetSize>::from_vec(v)
-    }
-}
-
-/// An array where each element is a variable-sized sequence of bytes representing a string
-/// whose maximum length (in bytes) is represented by a i32.
-pub type StringArray = GenericStringArray<i32>;
-
-/// An array where each element is a variable-sized sequence of bytes representing a string
-/// whose maximum length (in bytes) is represented by a i64.
-pub type LargeStringArray = GenericStringArray<i64>;
-
-impl<T: StringOffsetSizeTrait> From<GenericListArray<T>> for GenericStringArray<T> {
-    fn from(v: GenericListArray<T>) -> Self {
-        GenericStringArray::<T>::from_list(v)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::array::{ListBuilder, StringBuilder};
-
-    use super::*;
-
-    #[test]
-    fn test_string_array_from_u8_slice() {
-        let values: Vec<&str> = vec!["hello", "", "parquet"];
-
-        // Array data: ["hello", "", "parquet"]
-        let string_array = StringArray::from(values);
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("hello", unsafe { string_array.value_unchecked(0) });
-        assert_eq!("", string_array.value(1));
-        assert_eq!("", unsafe { string_array.value_unchecked(1) });
-        assert_eq!("parquet", string_array.value(2));
-        assert_eq!("parquet", unsafe { string_array.value_unchecked(2) });
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(7, string_array.value_length(2));
-        for i in 0..3 {
-            assert!(string_array.is_valid(i));
-            assert!(!string_array.is_null(i));
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "[Large]StringArray expects Datatype::[Large]Utf8")]
-    fn test_string_array_from_int() {
-        let array = LargeStringArray::from(vec!["a", "b"]);
-        StringArray::from(array.data().clone());
-    }
-
-    #[test]
-    fn test_large_string_array_from_u8_slice() {
-        let values: Vec<&str> = vec!["hello", "", "parquet"];
-
-        // Array data: ["hello", "", "parquet"]
-        let string_array = LargeStringArray::from(values);
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("hello", unsafe { string_array.value_unchecked(0) });
-        assert_eq!("", string_array.value(1));
-        assert_eq!("", unsafe { string_array.value_unchecked(1) });
-        assert_eq!("parquet", string_array.value(2));
-        assert_eq!("parquet", unsafe { string_array.value_unchecked(2) });
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(7, string_array.value_length(2));
-        for i in 0..3 {
-            assert!(string_array.is_valid(i));
-            assert!(!string_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_nested_string_array() {
-        let string_builder = StringBuilder::new(3);
-        let mut list_of_string_builder = ListBuilder::new(string_builder);
-
-        list_of_string_builder.values().append_value("foo").unwrap();
-        list_of_string_builder.values().append_value("bar").unwrap();
-        list_of_string_builder.append(true).unwrap();
-
-        list_of_string_builder
-            .values()
-            .append_value("foobar")
-            .unwrap();
-        list_of_string_builder.append(true).unwrap();
-        let list_of_strings = list_of_string_builder.finish();
-
-        assert_eq!(list_of_strings.len(), 2);
-
-        let first_slot = list_of_strings.value(0);
-        let first_list = first_slot.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(first_list.len(), 2);
-        assert_eq!(first_list.value(0), "foo");
-        assert_eq!(unsafe { first_list.value_unchecked(0) }, "foo");
-        assert_eq!(first_list.value(1), "bar");
-        assert_eq!(unsafe { first_list.value_unchecked(1) }, "bar");
-
-        let second_slot = list_of_strings.value(1);
-        let second_list = second_slot.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(second_list.len(), 1);
-        assert_eq!(second_list.value(0), "foobar");
-        assert_eq!(unsafe { second_list.value_unchecked(0) }, "foobar");
-    }
-
-    #[test]
-    #[should_panic(expected = "StringArray out of bounds access")]
-    fn test_string_array_get_value_index_out_of_bound() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-        let array_data = ArrayData::builder(DataType::Utf8)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let string_array = StringArray::from(array_data);
-        string_array.value(4);
-    }
-
-    #[test]
-    fn test_string_array_fmt_debug() {
-        let arr: StringArray = vec!["hello", "arrow"].into();
-        assert_eq!(
-            "StringArray\n[\n  \"hello\",\n  \"arrow\",\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_large_string_array_fmt_debug() {
-        let arr: LargeStringArray = vec!["hello", "arrow"].into();
-        assert_eq!(
-            "LargeStringArray\n[\n  \"hello\",\n  \"arrow\",\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_string_array_from_iter() {
-        let data = vec![Some("hello"), None, Some("arrow")];
-        // from Vec<Option<&str>>
-        let array1 = StringArray::from(data.clone());
-        // from Iterator<Option<&str>>
-        let array2: StringArray = data.clone().into_iter().collect();
-        // from Iterator<Option<String>>
-        let array3: StringArray =
-            data.into_iter().map(|x| x.map(|s| s.to_string())).collect();
-
-        assert_eq!(array1, array2);
-        assert_eq!(array2, array3);
-    }
-
-    #[test]
-    fn test_string_array_from_iter_values() {
-        let data = vec!["hello", "hello2"];
-        let array1 = StringArray::from_iter_values(data.iter());
-
-        assert_eq!(array1.value(0), "hello");
-        assert_eq!(array1.value(1), "hello2");
-    }
-
-    #[test]
-    fn test_string_array_from_unbound_iter() {
-        // iterator that doesn't declare (upper) size bound
-        let string_iter = (0..)
-            .scan(0usize, |pos, i| {
-                if *pos < 10 {
-                    *pos += 1;
-                    Some(Some(format!("value {}", i)))
-                } else {
-                    // actually returns up to 10 values
-                    None
-                }
-            })
-            // limited using take()
-            .take(100);
-
-        let (_, upper_size_bound) = string_iter.size_hint();
-        // the upper bound, defined by take above, is 100
-        assert_eq!(upper_size_bound, Some(100));
-        let string_array: StringArray = string_iter.collect();
-        // but the actual number of items in the array should be 10
-        assert_eq!(string_array.len(), 10);
-    }
-}
diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs
deleted file mode 100644
index 59ee527e5f8..00000000000
--- a/rust/arrow/src/array/array_struct.rs
+++ /dev/null
@@ -1,531 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::convert::{From, TryFrom};
-use std::fmt;
-use std::iter::IntoIterator;
-use std::mem;
-
-use super::{make_array, Array, ArrayData, ArrayRef};
-use crate::datatypes::DataType;
-use crate::error::{ArrowError, Result};
-use crate::{
-    buffer::{buffer_bin_or, Buffer},
-    datatypes::Field,
-};
-
-/// A nested array type where each child (called *field*) is represented by a separate
-/// array.
-pub struct StructArray {
-    data: ArrayData,
-    pub(crate) boxed_fields: Vec<ArrayRef>,
-}
-
-impl StructArray {
-    /// Returns the field at `pos`.
-    pub fn column(&self, pos: usize) -> &ArrayRef {
-        &self.boxed_fields[pos]
-    }
-
-    /// Return the number of fields in this struct array
-    pub fn num_columns(&self) -> usize {
-        self.boxed_fields.len()
-    }
-
-    /// Returns the fields of the struct array
-    pub fn columns(&self) -> Vec<&ArrayRef> {
-        self.boxed_fields.iter().collect()
-    }
-
-    /// Returns child array refs of the struct array
-    pub fn columns_ref(&self) -> Vec<ArrayRef> {
-        self.boxed_fields.clone()
-    }
-
-    /// Return field names in this struct array
-    pub fn column_names(&self) -> Vec<&str> {
-        match self.data.data_type() {
-            DataType::Struct(fields) => fields
-                .iter()
-                .map(|f| f.name().as_str())
-                .collect::<Vec<&str>>(),
-            _ => unreachable!("Struct array's data type is not struct!"),
-        }
-    }
-
-    /// Return child array whose field name equals to column_name
-    ///
-    /// Note: A schema can currently have duplicate field names, in which case
-    /// the first field will always be selected.
-    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
-    pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
-        self.column_names()
-            .iter()
-            .position(|c| c == &column_name)
-            .map(|pos| self.column(pos))
-    }
-}
-
-impl From<ArrayData> for StructArray {
-    fn from(data: ArrayData) -> Self {
-        let mut boxed_fields = vec![];
-        for cd in data.child_data() {
-            let child_data = if data.offset() != 0 || data.len() != cd.len() {
-                cd.slice(data.offset(), data.len())
-            } else {
-                cd.clone()
-            };
-            boxed_fields.push(make_array(child_data));
-        }
-        Self { data, boxed_fields }
-    }
-}
-
-impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
-    type Error = ArrowError;
-
-    /// builds a StructArray from a vector of names and arrays.
-    /// This errors if the values have a different length.
-    /// An entry is set to Null when all values are null.
-    fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self> {
-        let values_len = values.len();
-
-        // these will be populated
-        let mut fields = Vec::with_capacity(values_len);
-        let mut child_data = Vec::with_capacity(values_len);
-
-        // len: the size of the arrays.
-        let mut len: Option<usize> = None;
-        // null: the null mask of the arrays.
-        let mut null: Option<Buffer> = None;
-        for (field_name, array) in values {
-            let child_datum = array.data();
-            let child_datum_len = child_datum.len();
-            if let Some(len) = len {
-                if len != child_datum_len {
-                    return Err(ArrowError::InvalidArgumentError(
-                        format!("Array of field \"{}\" has length {}, but previous elements have length {}.
-                        All arrays in every entry in a struct array must have the same length.", field_name, child_datum_len, len)
-                    ));
-                }
-            } else {
-                len = Some(child_datum_len)
-            }
-            child_data.push(child_datum.clone());
-            fields.push(Field::new(
-                field_name,
-                array.data_type().clone(),
-                child_datum.null_buffer().is_some(),
-            ));
-
-            if let Some(child_null_buffer) = child_datum.null_buffer() {
-                let child_datum_offset = child_datum.offset();
-
-                null = Some(if let Some(null_buffer) = &null {
-                    buffer_bin_or(
-                        null_buffer,
-                        0,
-                        child_null_buffer,
-                        child_datum_offset,
-                        child_datum_len,
-                    )
-                } else {
-                    child_null_buffer.bit_slice(child_datum_offset, child_datum_len)
-                });
-            } else if null.is_some() {
-                // when one of the fields has no nulls, them there is no null in the array
-                null = None;
-            }
-        }
-        let len = len.unwrap();
-
-        let mut builder = ArrayData::builder(DataType::Struct(fields))
-            .len(len)
-            .child_data(child_data);
-        if let Some(null_buffer) = null {
-            builder = builder.null_bit_buffer(null_buffer);
-        }
-
-        Ok(StructArray::from(builder.build()))
-    }
-}
-
-impl Array for StructArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the length (i.e., number of elements) of this array
-    fn len(&self) -> usize {
-        self.data_ref().len()
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [StructArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [StructArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl From<Vec<(Field, ArrayRef)>> for StructArray {
-    fn from(v: Vec<(Field, ArrayRef)>) -> Self {
-        let (field_types, field_values): (Vec<_>, Vec<_>) = v.into_iter().unzip();
-
-        // Check the length of the child arrays
-        let length = field_values[0].len();
-        for i in 1..field_values.len() {
-            assert_eq!(
-                length,
-                field_values[i].len(),
-                "all child arrays of a StructArray must have the same length"
-            );
-            assert_eq!(
-                field_types[i].data_type(),
-                field_values[i].data().data_type(),
-                "the field data types must match the array data in a StructArray"
-            )
-        }
-
-        let data = ArrayData::builder(DataType::Struct(field_types))
-            .child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
-            .len(length)
-            .build();
-        Self::from(data)
-    }
-}
-
-impl fmt::Debug for StructArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "StructArray\n[\n")?;
-        for (child_index, name) in self.column_names().iter().enumerate() {
-            let column = self.column(child_index);
-            writeln!(
-                f,
-                "-- child {}: \"{}\" ({:?})",
-                child_index,
-                name,
-                column.data_type()
-            )?;
-            fmt::Debug::fmt(column, f)?;
-            writeln!(f)?;
-        }
-        write!(f, "]")
-    }
-}
-
-impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray {
-    fn from(pair: (Vec<(Field, ArrayRef)>, Buffer)) -> Self {
-        let (field_types, field_values): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
-
-        // Check the length of the child arrays
-        let length = field_values[0].len();
-        for i in 1..field_values.len() {
-            assert_eq!(
-                length,
-                field_values[i].len(),
-                "all child arrays of a StructArray must have the same length"
-            );
-            assert_eq!(
-                field_types[i].data_type(),
-                field_values[i].data().data_type(),
-                "the field data types must match the array data in a StructArray"
-            )
-        }
-
-        let data = ArrayData::builder(DataType::Struct(field_types))
-            .null_bit_buffer(pair.1)
-            .child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
-            .len(length)
-            .build();
-        Self::from(data)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::{
-        array::BooleanArray, array::Float32Array, array::Float64Array, array::Int32Array,
-        array::StringArray, bitmap::Bitmap,
-    };
-    use crate::{
-        array::Int64Array,
-        datatypes::{DataType, Field},
-    };
-    use crate::{buffer::Buffer, datatypes::ToByteSlice};
-
-    #[test]
-    fn test_struct_array_builder() {
-        let array = BooleanArray::from(vec![false, false, true, true]);
-        let boolean_data = array.data();
-        let array = Int64Array::from(vec![42, 28, 19, 31]);
-        let int_data = array.data();
-
-        let fields = vec![
-            Field::new("a", DataType::Boolean, false),
-            Field::new("b", DataType::Int64, false),
-        ];
-        let struct_array_data = ArrayData::builder(DataType::Struct(fields))
-            .len(4)
-            .add_child_data(boolean_data.clone())
-            .add_child_data(int_data.clone())
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(boolean_data, struct_array.column(0).data());
-        assert_eq!(int_data, struct_array.column(1).data());
-    }
-
-    #[test]
-    fn test_struct_array_from() {
-        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
-        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
-
-        let struct_array = StructArray::from(vec![
-            (
-                Field::new("b", DataType::Boolean, false),
-                boolean.clone() as ArrayRef,
-            ),
-            (
-                Field::new("c", DataType::Int32, false),
-                int.clone() as ArrayRef,
-            ),
-        ]);
-        assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
-        assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
-        assert_eq!(4, struct_array.len());
-        assert_eq!(0, struct_array.null_count());
-        assert_eq!(0, struct_array.offset());
-    }
-
-    /// validates that the in-memory representation follows [the spec](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
-    #[test]
-    fn test_struct_array_from_vec() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-        ]));
-        let ints: ArrayRef =
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
-
-        let arr =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-
-        let struct_data = arr.data();
-        assert_eq!(4, struct_data.len());
-        assert_eq!(1, struct_data.null_count());
-        assert_eq!(
-            // 00001011
-            &Some(Bitmap::from(Buffer::from(&[11_u8]))),
-            struct_data.null_bitmap()
-        );
-
-        let expected_string_data = ArrayData::builder(DataType::Utf8)
-            .len(4)
-            .null_bit_buffer(Buffer::from(&[9_u8]))
-            .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
-            .add_buffer(Buffer::from(b"joemark"))
-            .build();
-
-        let expected_int_data = ArrayData::builder(DataType::Int32)
-            .len(4)
-            .null_bit_buffer(Buffer::from(&[11_u8]))
-            .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
-            .build();
-
-        assert_eq!(&expected_string_data, arr.column(0).data());
-
-        // TODO: implement equality for ArrayData
-        assert_eq!(expected_int_data.len(), arr.column(1).data().len());
-        assert_eq!(
-            expected_int_data.null_count(),
-            arr.column(1).data().null_count()
-        );
-        assert_eq!(
-            expected_int_data.null_bitmap(),
-            arr.column(1).data().null_bitmap()
-        );
-        let expected_value_buf = expected_int_data.buffers()[0].clone();
-        let actual_value_buf = arr.column(1).data().buffers()[0].clone();
-        for i in 0..expected_int_data.len() {
-            if !expected_int_data.is_null(i) {
-                assert_eq!(
-                    expected_value_buf.as_slice()[i * 4..(i + 1) * 4],
-                    actual_value_buf.as_slice()[i * 4..(i + 1) * 4]
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_struct_array_from_vec_error() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            // 3 elements, not 4
-        ]));
-        let ints: ArrayRef =
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
-
-        let arr =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]);
-
-        match arr {
-            Err(ArrowError::InvalidArgumentError(e)) => {
-                assert!(e.starts_with("Array of field \"f2\" has length 4, but previous elements have length 3."));
-            }
-            _ => panic!("This test got an unexpected error type"),
-        };
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "the field data types must match the array data in a StructArray"
-    )]
-    fn test_struct_array_from_mismatched_types() {
-        StructArray::from(vec![
-            (
-                Field::new("b", DataType::Int16, false),
-                Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                    as Arc<Array>,
-            ),
-            (
-                Field::new("c", DataType::Utf8, false),
-                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
-            ),
-        ]);
-    }
-
-    #[test]
-    fn test_struct_array_slice() {
-        let boolean_data = ArrayData::builder(DataType::Boolean)
-            .len(5)
-            .add_buffer(Buffer::from([0b00010000]))
-            .null_bit_buffer(Buffer::from([0b00010001]))
-            .build();
-        let int_data = ArrayData::builder(DataType::Int32)
-            .len(5)
-            .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
-            .null_bit_buffer(Buffer::from([0b00000110]))
-            .build();
-
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, false));
-        field_types.push(Field::new("b", DataType::Int32, false));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            .add_child_data(boolean_data.clone())
-            .add_child_data(int_data.clone())
-            .null_bit_buffer(Buffer::from([0b00010111]))
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(5, struct_array.len());
-        assert_eq!(1, struct_array.null_count());
-        assert!(struct_array.is_valid(0));
-        assert!(struct_array.is_valid(1));
-        assert!(struct_array.is_valid(2));
-        assert!(struct_array.is_null(3));
-        assert!(struct_array.is_valid(4));
-        assert_eq!(&boolean_data, struct_array.column(0).data());
-        assert_eq!(&int_data, struct_array.column(1).data());
-
-        let c0 = struct_array.column(0);
-        let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(5, c0.len());
-        assert_eq!(3, c0.null_count());
-        assert!(c0.is_valid(0));
-        assert_eq!(false, c0.value(0));
-        assert!(c0.is_null(1));
-        assert!(c0.is_null(2));
-        assert!(c0.is_null(3));
-        assert!(c0.is_valid(4));
-        assert_eq!(true, c0.value(4));
-
-        let c1 = struct_array.column(1);
-        let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c1.len());
-        assert_eq!(3, c1.null_count());
-        assert!(c1.is_null(0));
-        assert!(c1.is_valid(1));
-        assert_eq!(28, c1.value(1));
-        assert!(c1.is_valid(2));
-        assert_eq!(42, c1.value(2));
-        assert!(c1.is_null(3));
-        assert!(c1.is_null(4));
-
-        let sliced_array = struct_array.slice(2, 3);
-        let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(3, sliced_array.len());
-        assert_eq!(2, sliced_array.offset());
-        assert_eq!(1, sliced_array.null_count());
-        assert!(sliced_array.is_valid(0));
-        assert!(sliced_array.is_null(1));
-        assert!(sliced_array.is_valid(2));
-
-        let sliced_c0 = sliced_array.column(0);
-        let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(3, sliced_c0.len());
-        assert_eq!(2, sliced_c0.offset());
-        assert!(sliced_c0.is_null(0));
-        assert!(sliced_c0.is_null(1));
-        assert!(sliced_c0.is_valid(2));
-        assert_eq!(true, sliced_c0.value(2));
-
-        let sliced_c1 = sliced_array.column(1);
-        let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(3, sliced_c1.len());
-        assert_eq!(2, sliced_c1.offset());
-        assert!(sliced_c1.is_valid(0));
-        assert_eq!(42, sliced_c1.value(0));
-        assert!(sliced_c1.is_null(1));
-        assert!(sliced_c1.is_null(2));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "all child arrays of a StructArray must have the same length"
-    )]
-    fn test_invalid_struct_child_array_lengths() {
-        StructArray::from(vec![
-            (
-                Field::new("b", DataType::Float32, false),
-                Arc::new(Float32Array::from(vec![1.1])) as Arc<Array>,
-            ),
-            (
-                Field::new("c", DataType::Float64, false),
-                Arc::new(Float64Array::from(vec![2.2, 3.3])),
-            ),
-        ]);
-    }
-}
diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs
deleted file mode 100644
index 083d5bba15b..00000000000
--- a/rust/arrow/src/array/array_union.rs
+++ /dev/null
@@ -1,831 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains the `UnionArray` type.
-//!
-//! Each slot in a `UnionArray` can have a value chosen from a number of types.  Each of the
-//! possible types are named like the fields of a [`StructArray`](crate::array::StructArray).
-//! A `UnionArray` can have two possible memory layouts, "dense" or "sparse".  For more information
-//! on please see the [specification](https://arrow.apache.org/docs/format/Columnar.html#union-layout).
-//!
-//! Builders are provided for `UnionArray`'s involving primitive types.  `UnionArray`'s of nested
-//! types are also supported but not via `UnionBuilder`, see the tests for examples.
-//!
-//! # Example: Dense Memory Layout
-//!
-//! ```
-//! use arrow::array::UnionBuilder;
-//! use arrow::datatypes::{Float64Type, Int32Type};
-//!
-//! # fn main() -> arrow::error::Result<()> {
-//! let mut builder = UnionBuilder::new_dense(3);
-//! builder.append::<Int32Type>("a", 1).unwrap();
-//! builder.append::<Float64Type>("b", 3.0).unwrap();
-//! builder.append::<Int32Type>("a", 4).unwrap();
-//! let union = builder.build().unwrap();
-//!
-//! assert_eq!(union.type_id(0), 0_i8);
-//! assert_eq!(union.type_id(1), 1_i8);
-//! assert_eq!(union.type_id(2), 0_i8);
-//!
-//! assert_eq!(union.value_offset(0), 0_i32);
-//! assert_eq!(union.value_offset(1), 0_i32);
-//! assert_eq!(union.value_offset(2), 1_i32);
-//!
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! # Example: Sparse Memory Layout
-//! ```
-//! use arrow::array::UnionBuilder;
-//! use arrow::datatypes::{Float64Type, Int32Type};
-//!
-//! # fn main() -> arrow::error::Result<()> {
-//! let mut builder = UnionBuilder::new_sparse(3);
-//! builder.append::<Int32Type>("a", 1).unwrap();
-//! builder.append::<Float64Type>("b", 3.0).unwrap();
-//! builder.append::<Int32Type>("a", 4).unwrap();
-//! let union = builder.build().unwrap();
-//!
-//! assert_eq!(union.type_id(0), 0_i8);
-//! assert_eq!(union.type_id(1), 1_i8);
-//! assert_eq!(union.type_id(2), 0_i8);
-//!
-//! assert_eq!(union.value_offset(0), 0_i32);
-//! assert_eq!(union.value_offset(1), 1_i32);
-//! assert_eq!(union.value_offset(2), 2_i32);
-//!
-//! # Ok(())
-//! # }
-//! ```
-use crate::array::{data::count_nulls, make_array, Array, ArrayData, ArrayRef};
-use crate::buffer::Buffer;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-use core::fmt;
-use std::any::Any;
-use std::mem;
-use std::mem::size_of;
-
-/// An Array that can represent slots of varying types.
-pub struct UnionArray {
-    data: ArrayData,
-    boxed_fields: Vec<ArrayRef>,
-}
-
-impl UnionArray {
-    /// Creates a new `UnionArray`.
-    ///
-    /// Accepts type ids, child arrays and optionally offsets (for dense unions) to create
-    /// a new `UnionArray`.  This method makes no attempt to validate the data provided by the
-    /// caller and assumes that each of the components are correct and consistent with each other.
-    /// See `try_new` for an alternative that validates the data provided.
-    ///
-    /// # Data Consistency
-    ///
-    /// The `type_ids` `Buffer` should contain `i8` values.  These values should be greater than
-    /// zero and must be less than the number of children provided in `child_arrays`.  These values
-    /// are used to index into the `child_arrays`.
-    ///
-    /// The `value_offsets` `Buffer` is only provided in the case of a dense union, sparse unions
-    /// should use `None`.  If provided the `value_offsets` `Buffer` should contain `i32` values.
-    /// These values should be greater than zero and must be less than the length of the overall
-    /// array.
-    ///
-    /// In both cases above we use signed integer types to maintain compatibility with other
-    /// Arrow implementations.
-    ///
-    /// In both of the cases above we are accepting `Buffer`'s which are assumed to be representing
-    /// `i8` and `i32` values respectively.  `Buffer` objects are untyped and no attempt is made
-    /// to ensure that the data provided is valid.
-    pub fn new(
-        type_ids: Buffer,
-        value_offsets: Option<Buffer>,
-        child_arrays: Vec<(Field, ArrayRef)>,
-        bitmap_data: Option<Buffer>,
-    ) -> Self {
-        let (field_types, field_values): (Vec<_>, Vec<_>) =
-            child_arrays.into_iter().unzip();
-        let len = type_ids.len();
-        let mut builder = ArrayData::builder(DataType::Union(field_types))
-            .add_buffer(type_ids)
-            .child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
-            .len(len);
-        if let Some(bitmap) = bitmap_data {
-            builder = builder.null_bit_buffer(bitmap)
-        }
-        let data = match value_offsets {
-            Some(b) => builder.add_buffer(b).build(),
-            None => builder.build(),
-        };
-        Self::from(data)
-    }
-    /// Attempts to create a new `UnionArray` and validates the inputs provided.
-    pub fn try_new(
-        type_ids: Buffer,
-        value_offsets: Option<Buffer>,
-        child_arrays: Vec<(Field, ArrayRef)>,
-        bitmap: Option<Buffer>,
-    ) -> Result<Self> {
-        if let Some(b) = &value_offsets {
-            let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len());
-            if ((type_ids.len() - nulls) * 4) != b.len() {
-                return Err(ArrowError::InvalidArgumentError(
-                    "Type Ids and Offsets represent a different number of array slots."
-                        .to_string(),
-                ));
-            }
-        }
-
-        // Check the type_ids
-        let type_id_slice: &[i8] = unsafe { type_ids.typed_data() };
-        let invalid_type_ids = type_id_slice
-            .iter()
-            .filter(|i| *i < &0)
-            .collect::<Vec<&i8>>();
-        if !invalid_type_ids.is_empty() {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Type Ids must be positive and cannot be greater than the number of \
-                child arrays, found:\n{:?}",
-                invalid_type_ids
-            )));
-        }
-
-        // Check the value offsets if provided
-        if let Some(offset_buffer) = &value_offsets {
-            let max_len = type_ids.len() as i32;
-            let offsets_slice: &[i32] = unsafe { offset_buffer.typed_data() };
-            let invalid_offsets = offsets_slice
-                .iter()
-                .filter(|i| *i < &0 || *i > &max_len)
-                .collect::<Vec<&i32>>();
-            if !invalid_offsets.is_empty() {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Offsets must be positive and within the length of the Array, \
-                    found:\n{:?}",
-                    invalid_offsets
-                )));
-            }
-        }
-
-        Ok(Self::new(type_ids, value_offsets, child_arrays, bitmap))
-    }
-
-    /// Accesses the child array for `type_id`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the `type_id` provided is less than zero or greater than the number of types
-    /// in the `Union`.
-    pub fn child(&self, type_id: i8) -> ArrayRef {
-        assert!(0 <= type_id);
-        assert!((type_id as usize) < self.boxed_fields.len());
-        self.boxed_fields[type_id as usize].clone()
-    }
-
-    /// Returns the `type_id` for the array slot at `index`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is greater than the length of the array.
-    pub fn type_id(&self, index: usize) -> i8 {
-        assert!(index - self.offset() < self.len());
-        self.data().buffers()[0].as_slice()[index] as i8
-    }
-
-    /// Returns the offset into the underlying values array for the array slot at `index`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is greater than the length of the array.
-    pub fn value_offset(&self, index: usize) -> i32 {
-        assert!(index - self.offset() < self.len());
-        if self.is_dense() {
-            // In format v4 unions had their own validity bitmap and offsets are compressed by omitting null values
-            // Starting with v5 unions don't have a validity bitmap and it's possible to directly index into the offsets buffer
-            let valid_slots = match self.data.null_buffer() {
-                Some(b) => b.count_set_bits_offset(0, index),
-                None => index,
-            };
-            self.data().buffers()[1].as_slice()[valid_slots * size_of::<i32>()] as i32
-        } else {
-            index as i32
-        }
-    }
-
-    /// Returns the array's value at `index`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is greater than the length of the array.
-    pub fn value(&self, index: usize) -> ArrayRef {
-        let type_id = self.type_id(self.offset() + index);
-        let value_offset = self.value_offset(self.offset() + index) as usize;
-        let child_data = self.boxed_fields[type_id as usize].clone();
-        child_data.slice(value_offset, 1)
-    }
-
-    /// Returns the names of the types in the union.
-    pub fn type_names(&self) -> Vec<&str> {
-        match self.data.data_type() {
-            DataType::Union(fields) => fields
-                .iter()
-                .map(|f| f.name().as_str())
-                .collect::<Vec<&str>>(),
-            _ => unreachable!("Union array's data type is not a union!"),
-        }
-    }
-
-    /// Returns whether the `UnionArray` is dense (or sparse if `false`).
-    fn is_dense(&self) -> bool {
-        self.data().buffers().len() == 2
-    }
-}
-
-impl From<ArrayData> for UnionArray {
-    fn from(data: ArrayData) -> Self {
-        let mut boxed_fields = vec![];
-        for cd in data.child_data() {
-            boxed_fields.push(make_array(cd.clone()));
-        }
-        Self { data, boxed_fields }
-    }
-}
-
-impl Array for UnionArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [UnionArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        let mut size = self.data.get_buffer_memory_size();
-        for field in &self.boxed_fields {
-            size += field.get_buffer_memory_size();
-        }
-        size
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [UnionArray].
-    fn get_array_memory_size(&self) -> usize {
-        let mut size = self.data.get_array_memory_size();
-        size += mem::size_of_val(self) - mem::size_of_val(&self.boxed_fields);
-        for field in &self.boxed_fields {
-            size += field.get_array_memory_size();
-        }
-        size
-    }
-}
-
-impl fmt::Debug for UnionArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let header = if self.is_dense() {
-            "UnionArray(Dense)\n["
-        } else {
-            "UnionArray(Sparse)\n["
-        };
-        writeln!(f, "{}", header)?;
-
-        writeln!(f, "-- type id buffer:")?;
-        writeln!(f, "{:?}", self.data().buffers()[0])?;
-
-        if self.is_dense() {
-            writeln!(f, "-- offsets buffer:")?;
-            writeln!(f, "{:?}", self.data().buffers()[1])?;
-        }
-
-        for (child_index, name) in self.type_names().iter().enumerate() {
-            let column = &self.boxed_fields[child_index];
-            writeln!(
-                f,
-                "-- child {}: \"{}\" ({:?})",
-                child_index,
-                *name,
-                column.data_type()
-            )?;
-            fmt::Debug::fmt(column, f)?;
-            writeln!(f)?;
-        }
-        writeln!(f, "]")
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::array::*;
-    use crate::buffer::Buffer;
-    use crate::datatypes::{DataType, Field};
-
-    #[test]
-    fn test_dense_i32() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int32Type>("b", 2).unwrap();
-        builder.append::<Int32Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Int32Type>("c", 5).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        builder.append::<Int32Type>("b", 7).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
-        let expected_value_offsets = vec![0_i32, 0, 0, 1, 1, 2, 1];
-        let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
-
-        // Check type ids
-        assert_eq!(
-            union.data().buffers()[0],
-            Buffer::from_slice_ref(&expected_type_ids)
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets
-        assert_eq!(
-            union.data().buffers()[1],
-            Buffer::from_slice_ref(&expected_value_offsets)
-        );
-        for (i, id) in expected_value_offsets.iter().enumerate() {
-            assert_eq!(&union.value_offset(i), id);
-        }
-
-        // Check data
-        assert_eq!(
-            union.data().child_data()[0].buffers()[0],
-            Buffer::from_slice_ref(&[1_i32, 4, 6])
-        );
-        assert_eq!(
-            union.data().child_data()[1].buffers()[0],
-            Buffer::from_slice_ref(&[2_i32, 7])
-        );
-        assert_eq!(
-            union.data().child_data()[2].buffers()[0],
-            Buffer::from_slice_ref(&[3_i32, 5]),
-        );
-
-        assert_eq!(expected_array_values.len(), union.len());
-        for (i, expected_value) in expected_array_values.iter().enumerate() {
-            assert_eq!(false, union.is_null(i));
-            let slot = union.value(i);
-            let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-            assert_eq!(slot.len(), 1);
-            let value = slot.value(0);
-            assert_eq!(expected_value, &value);
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Int64Type>("c", 5).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        assert_eq!(5, union.len());
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            assert_eq!(false, union.is_null(i));
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(3_i64, value);
-                }
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(5_i64, value);
-                }
-                4 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed_with_nulls() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 10).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        assert_eq!(5, union.len());
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(3_i64, value);
-                }
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(10_i32, value);
-                }
-                3 => assert!(union.is_null(i)),
-                4 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed_with_nulls_and_offset() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 10).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        let slice = union.slice(2, 3);
-        let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
-
-        assert_eq!(3, new_union.len());
-        for i in 0..new_union.len() {
-            let slot = new_union.value(i);
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(10_i32, value);
-                }
-                1 => assert!(new_union.is_null(i)),
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed_with_str() {
-        let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
-        let int_array = Int32Array::from(vec![5, 6]);
-        let float_array = Float64Array::from(vec![10.0]);
-
-        let type_ids = [1_i8, 0, 0, 2, 0, 1];
-        let value_offsets = [0_i32, 0, 1, 0, 2, 1];
-
-        let type_id_buffer = Buffer::from_slice_ref(&type_ids);
-        let value_offsets_buffer = Buffer::from_slice_ref(&value_offsets);
-
-        let mut children: Vec<(Field, Arc<Array>)> = Vec::new();
-        children.push((
-            Field::new("A", DataType::Utf8, false),
-            Arc::new(string_array),
-        ));
-        children.push((Field::new("B", DataType::Int32, false), Arc::new(int_array)));
-        children.push((
-            Field::new("C", DataType::Float64, false),
-            Arc::new(float_array),
-        ));
-        let array = UnionArray::try_new(
-            type_id_buffer,
-            Some(value_offsets_buffer),
-            children,
-            None,
-        )
-        .unwrap();
-
-        // Check type ids
-        assert_eq!(Buffer::from_slice_ref(&type_ids), array.data().buffers()[0]);
-        for (i, id) in type_ids.iter().enumerate() {
-            assert_eq!(id, &array.type_id(i));
-        }
-
-        // Check offsets
-        assert_eq!(
-            Buffer::from_slice_ref(&value_offsets),
-            array.data().buffers()[1]
-        );
-        for (i, id) in value_offsets.iter().enumerate() {
-            assert_eq!(id, &array.value_offset(i));
-        }
-
-        // Check values
-        assert_eq!(6, array.len());
-
-        let slot = array.value(0);
-        let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
-        assert_eq!(5, value);
-
-        let slot = array.value(1);
-        let value = slot
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .value(0);
-        assert_eq!("foo", value);
-
-        let slot = array.value(2);
-        let value = slot
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .value(0);
-        assert_eq!("bar", value);
-
-        let slot = array.value(3);
-        let value = slot
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap()
-            .value(0);
-        assert!(10.0 - value < f64::EPSILON);
-
-        let slot = array.value(4);
-        let value = slot
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .value(0);
-        assert_eq!("baz", value);
-
-        let slot = array.value(5);
-        let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
-        assert_eq!(6, value);
-    }
-
-    #[test]
-    fn test_sparse_i32() {
-        let mut builder = UnionBuilder::new_sparse(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int32Type>("b", 2).unwrap();
-        builder.append::<Int32Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Int32Type>("c", 5).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        builder.append::<Int32Type>("b", 7).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
-        let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
-
-        // Check type ids
-        assert_eq!(
-            Buffer::from_slice_ref(&expected_type_ids),
-            union.data().buffers()[0]
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets, sparse union should only have a single buffer
-        assert_eq!(union.data().buffers().len(), 1);
-
-        // Check data
-        assert_eq!(
-            union.data().child_data()[0].buffers()[0],
-            Buffer::from_slice_ref(&[1_i32, 0, 0, 4, 0, 6, 0]),
-        );
-        assert_eq!(
-            Buffer::from_slice_ref(&[0_i32, 2_i32, 0, 0, 0, 0, 7]),
-            union.data().child_data()[1].buffers()[0]
-        );
-        assert_eq!(
-            Buffer::from_slice_ref(&[0_i32, 0, 3_i32, 0, 5, 0, 0]),
-            union.data().child_data()[2].buffers()[0]
-        );
-
-        assert_eq!(expected_array_values.len(), union.len());
-        for (i, expected_value) in expected_array_values.iter().enumerate() {
-            assert_eq!(false, union.is_null(i));
-            let slot = union.value(i);
-            let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-            assert_eq!(slot.len(), 1);
-            let value = slot.value(0);
-            assert_eq!(expected_value, &value);
-        }
-    }
-
-    #[test]
-    fn test_sparse_mixed() {
-        let mut builder = UnionBuilder::new_sparse(5);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Float64Type>("c", 3.0).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Float64Type>("c", 5.0).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 1, 0, 1, 0];
-
-        // Check type ids
-        assert_eq!(
-            Buffer::from_slice_ref(&expected_type_ids),
-            union.data().buffers()[0]
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
-        assert_eq!(union.data().buffers().len(), 1);
-
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            assert_eq!(false, union.is_null(i));
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(value - 3_f64 < f64::EPSILON);
-                }
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(5_f64 - value < f64::EPSILON);
-                }
-                4 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_sparse_mixed_with_nulls() {
-        let mut builder = UnionBuilder::new_sparse(5);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Float64Type>("c", 3.0).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 0, 1, 0];
-
-        // Check type ids
-        assert_eq!(
-            Buffer::from_slice_ref(&expected_type_ids),
-            union.data().buffers()[0]
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
-        assert_eq!(union.data().buffers().len(), 1);
-
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => assert!(union.is_null(i)),
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(value - 3_f64 < f64::EPSILON);
-                }
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_sparse_mixed_with_nulls_and_offset() {
-        let mut builder = UnionBuilder::new_sparse(5);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Float64Type>("c", 3.0).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        let union = builder.build().unwrap();
-
-        let slice = union.slice(1, 4);
-        let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
-
-        assert_eq!(4, new_union.len());
-        for i in 0..new_union.len() {
-            let slot = new_union.value(i);
-            match i {
-                0 => assert!(new_union.is_null(i)),
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(false, new_union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(value - 3_f64 < f64::EPSILON);
-                }
-                2 => assert!(new_union.is_null(i)),
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, new_union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-}
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
deleted file mode 100644
index 38df92ebb46..00000000000
--- a/rust/arrow/src/array/builder.rs
+++ /dev/null
@@ -1,3171 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable
-//! of creating a [`Buffer`](crate::buffer::Buffer) which can be used
-//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
-//! object.
-
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt;
-use std::marker::PhantomData;
-use std::mem;
-use std::sync::Arc;
-
-use crate::array::*;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-
-///  Converts a `MutableBuffer` to a `BufferBuilder<T>`.
-///
-/// `slots` is the number of array slots currently represented in the `MutableBuffer`.
-pub(crate) fn mutable_buffer_to_builder<T: ArrowNativeType>(
-    mutable_buffer: MutableBuffer,
-    slots: usize,
-) -> BufferBuilder<T> {
-    BufferBuilder::<T> {
-        buffer: mutable_buffer,
-        len: slots,
-        _marker: PhantomData,
-    }
-}
-
-///  Converts a `BufferBuilder<T>` into its underlying `MutableBuffer`.
-///
-/// `From` is not implemented because associated type bounds are unstable.
-pub(crate) fn builder_to_mutable_buffer<T: ArrowNativeType>(
-    builder: BufferBuilder<T>,
-) -> MutableBuffer {
-    builder.buffer
-}
-
-/// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
-///
-/// A [`Buffer`](crate::buffer::Buffer) is the underlying data
-/// structure of Arrow's [`Arrays`](crate::array::Array).
-///
-/// For all supported types, there are type definitions for the
-/// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`.
-///
-/// # Example:
-///
-/// ```
-/// use arrow::array::UInt8BufferBuilder;
-///
-/// # fn main() -> arrow::error::Result<()> {
-/// let mut builder = UInt8BufferBuilder::new(100);
-/// builder.append_slice(&[42, 43, 44]);
-/// builder.append(45);
-/// let buffer = builder.finish();
-///
-/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
-/// # Ok(())
-/// # }
-/// ```
-#[derive(Debug)]
-pub struct BufferBuilder<T: ArrowNativeType> {
-    buffer: MutableBuffer,
-    len: usize,
-    _marker: PhantomData<T>,
-}
-
-impl<T: ArrowNativeType> BufferBuilder<T> {
-    /// Creates a new builder with initial capacity for _at least_ `capacity`
-    /// elements of type `T`.
-    ///
-    /// The capacity can later be manually adjusted with the
-    /// [`reserve()`](BufferBuilder::reserve) method.
-    /// Also the
-    /// [`append()`](BufferBuilder::append),
-    /// [`append_slice()`](BufferBuilder::append_slice) and
-    /// [`advance()`](BufferBuilder::advance)
-    /// methods automatically increase the capacity if needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    ///
-    /// assert!(builder.capacity() >= 10);
-    /// ```
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        let buffer = MutableBuffer::new(capacity * mem::size_of::<T>());
-
-        Self {
-            buffer,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-
-    /// Returns the current number of array elements in the internal buffer.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.len(), 1);
-    /// ```
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the internal buffer is empty.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.is_empty(), false);
-    /// ```
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns the actual capacity (number of elements) of the internal buffer.
-    ///
-    /// Note: the internal capacity returned by this method might be larger than
-    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
-    /// functions.
-    pub fn capacity(&self) -> usize {
-        let byte_capacity = self.buffer.capacity();
-        byte_capacity / std::mem::size_of::<T>()
-    }
-
-    /// Increases the number of elements in the internal buffer by `n`
-    /// and resizes the buffer as needed.
-    ///
-    /// The values of the newly added elements are 0.
-    /// This method is usually used when appending `NULL` values to the buffer
-    /// as they still require physical memory space.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.advance(2);
-    ///
-    /// assert_eq!(builder.len(), 2);
-    /// ```
-    #[inline]
-    pub fn advance(&mut self, i: usize) {
-        let new_buffer_len = (self.len + i) * mem::size_of::<T>();
-        self.buffer.resize(new_buffer_len, 0);
-        self.len += i;
-    }
-
-    /// Reserves memory for _at least_ `n` more elements of type `T`.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.reserve(10);
-    ///
-    /// assert!(builder.capacity() >= 20);
-    /// ```
-    #[inline]
-    pub fn reserve(&mut self, n: usize) {
-        self.buffer.reserve(n * mem::size_of::<T>());
-    }
-
-    /// Appends a value of type `T` into the builder,
-    /// growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.len(), 1);
-    /// ```
-    #[inline]
-    pub fn append(&mut self, v: T) {
-        self.reserve(1);
-        self.buffer.push(v);
-        self.len += 1;
-    }
-
-    /// Appends a value of type `T` into the builder N times,
-    /// growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_n(10, 42);
-    ///
-    /// assert_eq!(builder.len(), 10);
-    /// ```
-    #[inline]
-    pub fn append_n(&mut self, n: usize, v: T) {
-        self.reserve(n);
-        for _ in 0..n {
-            self.buffer.push(v);
-        }
-        self.len += n;
-    }
-
-    /// Appends a slice of type `T`, growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_slice(&[42, 44, 46]);
-    ///
-    /// assert_eq!(builder.len(), 3);
-    /// ```
-    #[inline]
-    pub fn append_slice(&mut self, slice: &[T]) {
-        self.buffer.extend_from_slice(slice);
-        self.len += slice.len();
-    }
-
-    /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer).
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_slice(&[42, 44, 46]);
-    ///
-    /// let buffer = builder.finish();
-    ///
-    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
-    /// ```
-    #[inline]
-    pub fn finish(&mut self) -> Buffer {
-        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.into()
-    }
-}
-
-#[derive(Debug)]
-pub struct BooleanBufferBuilder {
-    buffer: MutableBuffer,
-    len: usize,
-}
-
-impl BooleanBufferBuilder {
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        let byte_capacity = bit_util::ceil(capacity, 8);
-        let buffer = MutableBuffer::from_len_zeroed(byte_capacity);
-        Self { buffer, len: 0 }
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        self.buffer.capacity() * 8
-    }
-
-    #[inline]
-    pub fn advance(&mut self, additional: usize) {
-        let new_len = self.len + additional;
-        let new_len_bytes = bit_util::ceil(new_len, 8);
-        if new_len_bytes > self.buffer.len() {
-            self.buffer.resize(new_len_bytes, 0);
-        }
-        self.len = new_len;
-    }
-
-    /// Reserve space to at least `additional` new bits.
-    /// Capacity will be `>= self.len() + additional`.
-    /// New bytes are uninitialized and reading them is undefined behavior.
-    #[inline]
-    pub fn reserve(&mut self, additional: usize) {
-        let capacity = self.len + additional;
-        if capacity > self.capacity() {
-            // convert differential to bytes
-            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
-            self.buffer.reserve(additional);
-        }
-    }
-
-    #[inline]
-    pub fn append(&mut self, v: bool) {
-        self.advance(1);
-        if v {
-            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
-        }
-    }
-
-    #[inline]
-    pub fn append_n(&mut self, additional: usize, v: bool) {
-        self.advance(additional);
-        if additional > 0 && v {
-            let offset = self.len() - additional;
-            (0..additional).for_each(|i| unsafe {
-                bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i)
-            })
-        }
-    }
-
-    #[inline]
-    pub fn append_slice(&mut self, slice: &[bool]) {
-        let additional = slice.len();
-        self.advance(additional);
-
-        let offset = self.len() - additional;
-        for (i, v) in slice.iter().enumerate() {
-            if *v {
-                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
-            }
-        }
-    }
-
-    #[inline]
-    pub fn finish(&mut self) -> Buffer {
-        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.into()
-    }
-}
-
-impl From<BooleanBufferBuilder> for Buffer {
-    #[inline]
-    fn from(builder: BooleanBufferBuilder) -> Self {
-        builder.buffer.into()
-    }
-}
-
-/// Trait for dealing with different array builders at runtime
-pub trait ArrayBuilder: Any {
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize;
-
-    /// Returns whether number of array slots is zero
-    fn is_empty(&self) -> bool;
-
-    /// Builds the array
-    fn finish(&mut self) -> ArrayRef;
-
-    /// Returns the builder as a non-mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call non-mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_ref` to get a reference on the specific builder.
-    fn as_any(&self) -> &Any;
-
-    /// Returns the builder as a mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_mut` to get a reference on the specific builder.
-    fn as_any_mut(&mut self) -> &mut Any;
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any>;
-}
-
-///  Array builder for fixed-width primitive types
-#[derive(Debug)]
-pub struct BooleanBuilder {
-    values_builder: BooleanBufferBuilder,
-    bitmap_builder: BooleanBufferBuilder,
-}
-
-impl BooleanBuilder {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: usize) -> Self {
-        Self {
-            values_builder: BooleanBufferBuilder::new(capacity),
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-        }
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    pub fn capacity(&self) -> usize {
-        self.values_builder.capacity()
-    }
-
-    /// Appends a value of type `T` into the builder
-    #[inline]
-    pub fn append_value(&mut self, v: bool) -> Result<()> {
-        self.bitmap_builder.append(true);
-        self.values_builder.append(v);
-        Ok(())
-    }
-
-    /// Appends a null slot into the builder
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.bitmap_builder.append(false);
-        self.values_builder.advance(1);
-        Ok(())
-    }
-
-    /// Appends an `Option<T>` into the builder
-    #[inline]
-    pub fn append_option(&mut self, v: Option<bool>) -> Result<()> {
-        match v {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Appends a slice of type `T` into the builder
-    #[inline]
-    pub fn append_slice(&mut self, v: &[bool]) -> Result<()> {
-        self.bitmap_builder.append_n(v.len(), true);
-        self.values_builder.append_slice(v);
-        Ok(())
-    }
-
-    /// Appends values from a slice of type `T` and a validity boolean slice
-    #[inline]
-    pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<()> {
-        if values.len() != is_valid.len() {
-            return Err(ArrowError::InvalidArgumentError(
-                "Value and validity lengths must be equal".to_string(),
-            ));
-        }
-        self.bitmap_builder.append_slice(is_valid);
-        self.values_builder.append_slice(values);
-        Ok(())
-    }
-
-    /// Builds the [BooleanArray] and reset this builder.
-    pub fn finish(&mut self) -> BooleanArray {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let null_count = len - null_bit_buffer.count_set_bits();
-        let mut builder = ArrayData::builder(DataType::Boolean)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer);
-        }
-        let data = builder.build();
-        BooleanArray::from(data)
-    }
-}
-
-impl ArrayBuilder for BooleanBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.values_builder.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.values_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-///  Array builder for fixed-width primitive types
-#[derive(Debug)]
-pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
-    values_builder: BufferBuilder<T::Native>,
-    /// We only materialize the builder when we add `false`.
-    /// This optimization is **very** important for performance of `StringBuilder`.
-    bitmap_builder: Option<BooleanBufferBuilder>,
-}
-
-impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.values_builder.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.values_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: usize) -> Self {
-        Self {
-            values_builder: BufferBuilder::<T::Native>::new(capacity),
-            bitmap_builder: None,
-        }
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    pub fn capacity(&self) -> usize {
-        self.values_builder.capacity()
-    }
-
-    /// Appends a value of type `T` into the builder
-    #[inline]
-    pub fn append_value(&mut self, v: T::Native) -> Result<()> {
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append(true);
-        }
-        self.values_builder.append(v);
-        Ok(())
-    }
-
-    /// Appends a null slot into the builder
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.materialize_bitmap_builder();
-        self.bitmap_builder.as_mut().unwrap().append(false);
-        self.values_builder.advance(1);
-        Ok(())
-    }
-
-    /// Appends an `Option<T>` into the builder
-    #[inline]
-    pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> {
-        match v {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Appends a slice of type `T` into the builder
-    #[inline]
-    pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> {
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append_n(v.len(), true);
-        }
-        self.values_builder.append_slice(v);
-        Ok(())
-    }
-
-    /// Appends values from a slice of type `T` and a validity boolean slice
-    #[inline]
-    pub fn append_values(
-        &mut self,
-        values: &[T::Native],
-        is_valid: &[bool],
-    ) -> Result<()> {
-        if values.len() != is_valid.len() {
-            return Err(ArrowError::InvalidArgumentError(
-                "Value and validity lengths must be equal".to_string(),
-            ));
-        }
-        if is_valid.iter().any(|v| !*v) {
-            self.materialize_bitmap_builder();
-        }
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append_slice(is_valid);
-        }
-        self.values_builder.append_slice(values);
-        Ok(())
-    }
-
-    /// Builds the `PrimitiveArray` and reset this builder.
-    pub fn finish(&mut self) -> PrimitiveArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
-        let null_count = len
-            - null_bit_buffer
-                .as_ref()
-                .map(|b| b.count_set_bits())
-                .unwrap_or(len);
-        let mut builder = ArrayData::builder(T::DATA_TYPE)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
-        }
-        let data = builder.build();
-        PrimitiveArray::<T>::from(data)
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
-        let null_count = len
-            - null_bit_buffer
-                .as_ref()
-                .map(|b| b.count_set_bits())
-                .unwrap_or(len);
-        let data_type = DataType::Dictionary(
-            Box::new(T::DATA_TYPE),
-            Box::new(values.data_type().clone()),
-        );
-        let mut builder = ArrayData::builder(data_type)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
-        }
-        builder = builder.add_child_data(values.data().clone());
-        DictionaryArray::<T>::from(builder.build())
-    }
-
-    fn materialize_bitmap_builder(&mut self) {
-        if self.bitmap_builder.is_some() {
-            return;
-        }
-        let mut b = BooleanBufferBuilder::new(0);
-        b.reserve(self.values_builder.capacity());
-        b.append_n(self.values_builder.len, true);
-        self.bitmap_builder = Some(b);
-    }
-}
-
-///  Array builder for `ListArray`
-#[derive(Debug)]
-pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
-    offsets_builder: BufferBuilder<OffsetSize>,
-    bitmap_builder: BooleanBufferBuilder,
-    values_builder: T,
-    len: OffsetSize,
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
-    /// Creates a new `ListArrayBuilder` from a given values array builder
-    pub fn new(values_builder: T) -> Self {
-        let capacity = values_builder.len();
-        Self::with_capacity(values_builder, capacity)
-    }
-
-    /// Creates a new `ListArrayBuilder` from a given values array builder
-    /// `capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
-        let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
-        let len = OffsetSize::zero();
-        offsets_builder.append(len);
-        Self {
-            offsets_builder,
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-            values_builder,
-            len,
-        }
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
-    for GenericListBuilder<OffsetSize, T>
-where
-    T: 'static,
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.len.to_usize().unwrap()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == OffsetSize::zero()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
-where
-    T: 'static,
-{
-    /// Returns the child array builder as a mutable reference.
-    ///
-    /// This mutable reference can be used to append values into the child array builder,
-    /// but you must call `append` to delimit each distinct list value.
-    pub fn values(&mut self) -> &mut T {
-        &mut self.values_builder
-    }
-
-    /// Finish the current variable-length list array slot
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.offsets_builder
-            .append(OffsetSize::from_usize(self.values_builder.len()).unwrap());
-        self.bitmap_builder.append(is_valid);
-        self.len += OffsetSize::one();
-        Ok(())
-    }
-
-    /// Builds the `ListArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
-        let len = self.len();
-        self.len = OffsetSize::zero();
-        let values_arr = self
-            .values_builder
-            .as_any_mut()
-            .downcast_mut::<T>()
-            .unwrap()
-            .finish();
-        let values_data = values_arr.data();
-
-        let offset_buffer = self.offsets_builder.finish();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        self.offsets_builder.append(self.len);
-        let field = Box::new(Field::new(
-            "item",
-            values_data.data_type().clone(),
-            true, // TODO: find a consistent way of getting this
-        ));
-        let data_type = if OffsetSize::is_large() {
-            DataType::LargeList(field)
-        } else {
-            DataType::List(field)
-        };
-        let data = ArrayData::builder(data_type)
-            .len(len)
-            .add_buffer(offset_buffer)
-            .add_child_data(values_data.clone())
-            .null_bit_buffer(null_bit_buffer)
-            .build();
-
-        GenericListArray::<OffsetSize>::from(data)
-    }
-}
-
-pub type ListBuilder<T> = GenericListBuilder<i32, T>;
-pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
-
-///  Array builder for `ListArray`
-#[derive(Debug)]
-pub struct FixedSizeListBuilder<T: ArrayBuilder> {
-    bitmap_builder: BooleanBufferBuilder,
-    values_builder: T,
-    len: usize,
-    list_len: i32,
-}
-
-impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
-    /// Creates a new `FixedSizeListBuilder` from a given values array builder
-    /// `length` is the number of values within each array
-    pub fn new(values_builder: T, length: i32) -> Self {
-        let capacity = values_builder.len();
-        Self::with_capacity(values_builder, length, capacity)
-    }
-
-    /// Creates a new `FixedSizeListBuilder` from a given values array builder
-    /// `length` is the number of values within each array
-    /// `capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(values_builder: T, length: i32, capacity: usize) -> Self {
-        let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
-        offsets_builder.append(0);
-        Self {
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-            values_builder,
-            len: 0,
-            list_len: length,
-        }
-    }
-}
-
-impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
-where
-    T: 'static,
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<T: ArrayBuilder> FixedSizeListBuilder<T>
-where
-    T: 'static,
-{
-    /// Returns the child array builder as a mutable reference.
-    ///
-    /// This mutable reference can be used to append values into the child array builder,
-    /// but you must call `append` to delimit each distinct list value.
-    pub fn values(&mut self) -> &mut T {
-        &mut self.values_builder
-    }
-
-    pub fn value_length(&self) -> i32 {
-        self.list_len
-    }
-
-    /// Finish the current variable-length list array slot
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.bitmap_builder.append(is_valid);
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Builds the `FixedSizeListBuilder` and reset this builder.
-    pub fn finish(&mut self) -> FixedSizeListArray {
-        let len = self.len();
-        self.len = 0;
-        let values_arr = self
-            .values_builder
-            .as_any_mut()
-            .downcast_mut::<T>()
-            .unwrap()
-            .finish();
-        let values_data = values_arr.data();
-
-        // check that values_data length is multiple of len if we have data
-        if len != 0 {
-            assert!(
-                values_data.len() / len == self.list_len as usize,
-                "Values of FixedSizeList must have equal lengths, values have length {} and list has {}",
-                values_data.len() / len,
-                self.list_len
-            );
-        }
-
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let data = ArrayData::builder(DataType::FixedSizeList(
-            Box::new(Field::new("item", values_data.data_type().clone(), true)),
-            self.list_len,
-        ))
-        .len(len)
-        .add_child_data(values_data.clone())
-        .null_bit_buffer(null_bit_buffer)
-        .build();
-
-        FixedSizeListArray::from(data)
-    }
-}
-
-///  Array builder for `BinaryArray`
-#[derive(Debug)]
-pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
-pub type BinaryBuilder = GenericBinaryBuilder<i32>;
-pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
-
-#[derive(Debug)]
-pub struct GenericStringBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
-pub type StringBuilder = GenericStringBuilder<i32>;
-pub type LargeStringBuilder = GenericStringBuilder<i64>;
-
-#[derive(Debug)]
-pub struct FixedSizeBinaryBuilder {
-    builder: FixedSizeListBuilder<UInt8Builder>,
-}
-
-#[derive(Debug)]
-pub struct DecimalBuilder {
-    builder: FixedSizeListBuilder<UInt8Builder>,
-    precision: usize,
-    scale: usize,
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> ArrayBuilder
-    for GenericBinaryBuilder<OffsetSize>
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> ArrayBuilder
-    for GenericStringBuilder<OffsetSize>
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        let a = GenericStringBuilder::<OffsetSize>::finish(self);
-        Arc::new(a)
-    }
-}
-
-impl ArrayBuilder for FixedSizeBinaryBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl ArrayBuilder for DecimalBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
-    /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Appends a single byte value into the builder's values array.
-    ///
-    /// Note, when appending individual byte values you must call `append` to delimit each
-    /// distinct list value.
-    #[inline]
-    pub fn append_byte(&mut self, value: u8) -> Result<()> {
-        self.builder.values().append_value(value)?;
-        Ok(())
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `BinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
-        GenericBinaryArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> GenericStringBuilder<OffsetSize> {
-    /// Creates a new `StringBuilder`,
-    /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Creates a new `StringBuilder`,
-    /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    /// `item_capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(data_capacity);
-        Self {
-            builder: GenericListBuilder::with_capacity(values_builder, item_capacity),
-        }
-    }
-
-    /// Appends a string into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the string appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<str>) -> Result<()> {
-        self.builder
-            .values()
-            .append_slice(value.as_ref().as_bytes())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `StringArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericStringArray<OffsetSize> {
-        GenericStringArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl FixedSizeBinaryBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, byte_width: i32) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: FixedSizeListBuilder::new(values_builder, byte_width),
-        }
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        if self.builder.value_length() != value.as_ref().len() as i32 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
-            ));
-        }
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        let length: usize = self.builder.value_length() as usize;
-        self.builder.values().append_slice(&vec![0u8; length][..])?;
-        self.builder.append(false)
-    }
-
-    /// Builds the `FixedSizeBinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> FixedSizeBinaryArray {
-        FixedSizeBinaryArray::from(self.builder.finish())
-    }
-}
-
-impl DecimalBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        let byte_width = 16;
-        Self {
-            builder: FixedSizeListBuilder::new(values_builder, byte_width),
-            precision,
-            scale,
-        }
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: i128) -> Result<()> {
-        let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
-            value,
-            self.builder.value_length() as usize,
-        )?;
-        if self.builder.value_length() != value_as_bytes.len() as i32 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as DecimalBuilder value lengths".to_string()
-            ));
-        }
-        self.builder
-            .values()
-            .append_slice(value_as_bytes.as_slice())?;
-        self.builder.append(true)
-    }
-
-    fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
-        if size > 16 {
-            return Err(ArrowError::InvalidArgumentError(
-                "DecimalBuilder only supports values up to 16 bytes.".to_string(),
-            ));
-        }
-        let res = v.to_le_bytes();
-        let start_byte = 16 - size;
-        Ok(res[start_byte..16].to_vec())
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        let length: usize = self.builder.value_length() as usize;
-        self.builder.values().append_slice(&vec![0u8; length][..])?;
-        self.builder.append(false)
-    }
-
-    /// Builds the `DecimalArray` and reset this builder.
-    pub fn finish(&mut self) -> DecimalArray {
-        DecimalArray::from_fixed_size_list_array(
-            self.builder.finish(),
-            self.precision,
-            self.scale,
-        )
-    }
-}
-
-/// Array builder for Struct types.
-///
-/// Note that callers should make sure that methods of all the child field builders are
-/// properly called to maintain the consistency of the data structure.
-pub struct StructBuilder {
-    fields: Vec<Field>,
-    field_builders: Vec<Box<ArrayBuilder>>,
-    bitmap_builder: BooleanBufferBuilder,
-    len: usize,
-}
-
-impl fmt::Debug for StructBuilder {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("StructBuilder")
-            .field("fields", &self.fields)
-            .field("bitmap_builder", &self.bitmap_builder)
-            .field("len", &self.len)
-            .finish()
-    }
-}
-
-impl ArrayBuilder for StructBuilder {
-    /// Returns the number of array slots in the builder.
-    ///
-    /// Note that this always return the first child field builder's length, and it is
-    /// the caller's responsibility to maintain the consistency that all the child field
-    /// builder should have the equal number of elements.
-    fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Builds the array.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-
-    /// Returns the builder as a non-mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call non-mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_ref` to get a reference on the specific builder.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_mut` to get a reference on the specific builder.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-}
-
-/// Returns a builder with capacity `capacity` that corresponds to the datatype `DataType`
-/// This function is useful to construct arrays from an arbitrary vectors with known/expected
-/// schema.
-pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<ArrayBuilder> {
-    match datatype {
-        DataType::Null => unimplemented!(),
-        DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
-        DataType::Int8 => Box::new(Int8Builder::new(capacity)),
-        DataType::Int16 => Box::new(Int16Builder::new(capacity)),
-        DataType::Int32 => Box::new(Int32Builder::new(capacity)),
-        DataType::Int64 => Box::new(Int64Builder::new(capacity)),
-        DataType::UInt8 => Box::new(UInt8Builder::new(capacity)),
-        DataType::UInt16 => Box::new(UInt16Builder::new(capacity)),
-        DataType::UInt32 => Box::new(UInt32Builder::new(capacity)),
-        DataType::UInt64 => Box::new(UInt64Builder::new(capacity)),
-        DataType::Float32 => Box::new(Float32Builder::new(capacity)),
-        DataType::Float64 => Box::new(Float64Builder::new(capacity)),
-        DataType::Binary => Box::new(BinaryBuilder::new(capacity)),
-        DataType::FixedSizeBinary(len) => {
-            Box::new(FixedSizeBinaryBuilder::new(capacity, *len))
-        }
-        DataType::Decimal(precision, scale) => {
-            Box::new(DecimalBuilder::new(capacity, *precision, *scale))
-        }
-        DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
-        DataType::Date32 => Box::new(Date32Builder::new(capacity)),
-        DataType::Date64 => Box::new(Date64Builder::new(capacity)),
-        DataType::Time32(TimeUnit::Second) => {
-            Box::new(Time32SecondBuilder::new(capacity))
-        }
-        DataType::Time32(TimeUnit::Millisecond) => {
-            Box::new(Time32MillisecondBuilder::new(capacity))
-        }
-        DataType::Time64(TimeUnit::Microsecond) => {
-            Box::new(Time64MicrosecondBuilder::new(capacity))
-        }
-        DataType::Time64(TimeUnit::Nanosecond) => {
-            Box::new(Time64NanosecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Second, _) => {
-            Box::new(TimestampSecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            Box::new(TimestampMillisecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            Box::new(TimestampMicrosecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            Box::new(TimestampNanosecondBuilder::new(capacity))
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            Box::new(IntervalYearMonthBuilder::new(capacity))
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            Box::new(IntervalDayTimeBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            Box::new(DurationSecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            Box::new(DurationMillisecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            Box::new(DurationMicrosecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            Box::new(DurationNanosecondBuilder::new(capacity))
-        }
-        DataType::Struct(fields) => {
-            Box::new(StructBuilder::from_fields(fields.clone(), capacity))
-        }
-        t => panic!("Data type {:?} is not currently supported", t),
-    }
-}
-
-impl StructBuilder {
-    pub fn new(fields: Vec<Field>, field_builders: Vec<Box<ArrayBuilder>>) -> Self {
-        Self {
-            fields,
-            field_builders,
-            bitmap_builder: BooleanBufferBuilder::new(0),
-            len: 0,
-        }
-    }
-
-    pub fn from_fields(fields: Vec<Field>, capacity: usize) -> Self {
-        let mut builders = Vec::with_capacity(fields.len());
-        for field in &fields {
-            builders.push(make_builder(field.data_type(), capacity));
-        }
-        Self::new(fields, builders)
-    }
-
-    /// Returns a mutable reference to the child field builder at index `i`.
-    /// Result will be `None` if the input type `T` provided doesn't match the actual
-    /// field builder's type.
-    pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
-        self.field_builders[i].as_any_mut().downcast_mut::<T>()
-    }
-
-    /// Returns the number of fields for the struct this builder is building.
-    pub fn num_fields(&self) -> usize {
-        self.field_builders.len()
-    }
-
-    /// Appends an element (either null or non-null) to the struct. The actual elements
-    /// should be appended for each child sub-array in a consistent way.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.bitmap_builder.append(is_valid);
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Appends a null element to the struct.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `StructArray` and reset this builder.
-    pub fn finish(&mut self) -> StructArray {
-        let mut child_data = Vec::with_capacity(self.field_builders.len());
-        for f in &mut self.field_builders {
-            let arr = f.finish();
-            child_data.push(arr.data().clone());
-        }
-
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let null_count = self.len - null_bit_buffer.count_set_bits();
-        let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
-            .len(self.len)
-            .child_data(child_data);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer);
-        }
-
-        self.len = 0;
-
-        StructArray::from(builder.build())
-    }
-}
-
-/// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`.
-#[derive(Debug)]
-struct FieldData {
-    /// The type id for this field
-    type_id: i8,
-    /// The Arrow data type represented in the `values_buffer`, which is untyped
-    data_type: DataType,
-    /// A buffer containing the values for this field in raw bytes
-    values_buffer: Option<MutableBuffer>,
-    ///  The number of array slots represented by the buffer
-    slots: usize,
-    /// A builder for the bitmap if required (for Sparse Unions)
-    bitmap_builder: Option<BooleanBufferBuilder>,
-}
-
-impl FieldData {
-    /// Creates a new `FieldData`.
-    fn new(
-        type_id: i8,
-        data_type: DataType,
-        bitmap_builder: Option<BooleanBufferBuilder>,
-    ) -> Self {
-        Self {
-            type_id,
-            data_type,
-            values_buffer: Some(MutableBuffer::new(1)),
-            slots: 0,
-            bitmap_builder,
-        }
-    }
-
-    /// Appends a single value to this `FieldData`'s `values_buffer`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_to_values_buffer<T: ArrowPrimitiveType>(
-        &mut self,
-        v: T::Native,
-    ) -> Result<()> {
-        let values_buffer = self
-            .values_buffer
-            .take()
-            .expect("Values buffer was never created");
-        let mut builder: BufferBuilder<T::Native> =
-            mutable_buffer_to_builder(values_buffer, self.slots);
-        builder.append(v);
-        let mutable_buffer = builder_to_mutable_buffer(builder);
-        self.values_buffer = Some(mutable_buffer);
-
-        self.slots += 1;
-        if let Some(b) = &mut self.bitmap_builder {
-            b.append(true)
-        };
-        Ok(())
-    }
-
-    /// Appends a null to this `FieldData`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_null<T: ArrowPrimitiveType>(&mut self) -> Result<()> {
-        if let Some(b) = &mut self.bitmap_builder {
-            let values_buffer = self
-                .values_buffer
-                .take()
-                .expect("Values buffer was never created");
-            let mut builder: BufferBuilder<T::Native> =
-                mutable_buffer_to_builder(values_buffer, self.slots);
-            builder.advance(1);
-            let mutable_buffer = builder_to_mutable_buffer(builder);
-            self.values_buffer = Some(mutable_buffer);
-            self.slots += 1;
-            b.append(false);
-        };
-        Ok(())
-    }
-
-    /// Appends a null to this `FieldData` when the type is not known at compile time.
-    ///
-    /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null
-    /// slots to the fields that are not being appended to in the case of sparse unions.  This
-    /// method solves this problem by appending dynamically based on `DataType`.
-    ///
-    /// Note, this method does **not** update the length of the `UnionArray` (this is done by the
-    /// main append operation) and assumes that it is called from a method that is generic over `T`
-    /// where `T` satisfies the bound `ArrowPrimitiveType`.
-    fn append_null_dynamic(&mut self) -> Result<()> {
-        match self.data_type {
-            DataType::Null => unimplemented!(),
-            DataType::Int8 => self.append_null::<Int8Type>()?,
-            DataType::Int16 => self.append_null::<Int16Type>()?,
-            DataType::Int32
-            | DataType::Date32
-            | DataType::Time32(_)
-            | DataType::Interval(IntervalUnit::YearMonth) => {
-                self.append_null::<Int32Type>()?
-            }
-            DataType::Int64
-            | DataType::Timestamp(_, _)
-            | DataType::Date64
-            | DataType::Time64(_)
-            | DataType::Interval(IntervalUnit::DayTime)
-            | DataType::Duration(_) => self.append_null::<Int64Type>()?,
-            DataType::UInt8 => self.append_null::<UInt8Type>()?,
-            DataType::UInt16 => self.append_null::<UInt16Type>()?,
-            DataType::UInt32 => self.append_null::<UInt32Type>()?,
-            DataType::UInt64 => self.append_null::<UInt64Type>()?,
-            DataType::Float32 => self.append_null::<Float32Type>()?,
-            DataType::Float64 => self.append_null::<Float64Type>()?,
-            _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."),
-        };
-        Ok(())
-    }
-}
-
-/// Builder type for creating a new `UnionArray`.
-#[derive(Debug)]
-pub struct UnionBuilder {
-    /// The current number of slots in the array
-    len: usize,
-    /// Maps field names to `FieldData` instances which track the builders for that field
-    fields: HashMap<String, FieldData>,
-    /// Builder to keep track of type ids
-    type_id_builder: Int8BufferBuilder,
-    /// Builder to keep track of offsets (`None` for sparse unions)
-    value_offset_builder: Option<Int32BufferBuilder>,
-    /// Optional builder for null slots
-    bitmap_builder: Option<BooleanBufferBuilder>,
-}
-
-impl UnionBuilder {
-    /// Creates a new dense array builder.
-    pub fn new_dense(capacity: usize) -> Self {
-        Self {
-            len: 0,
-            fields: HashMap::default(),
-            type_id_builder: Int8BufferBuilder::new(capacity),
-            value_offset_builder: Some(Int32BufferBuilder::new(capacity)),
-            bitmap_builder: None,
-        }
-    }
-
-    /// Creates a new sparse array builder.
-    pub fn new_sparse(capacity: usize) -> Self {
-        Self {
-            len: 0,
-            fields: HashMap::default(),
-            type_id_builder: Int8BufferBuilder::new(capacity),
-            value_offset_builder: None,
-            bitmap_builder: None,
-        }
-    }
-
-    /// Appends a null to this builder.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        if self.bitmap_builder.is_none() {
-            let mut builder = BooleanBufferBuilder::new(self.len + 1);
-            for _ in 0..self.len {
-                builder.append(true);
-            }
-            self.bitmap_builder = Some(builder)
-        }
-        self.bitmap_builder
-            .as_mut()
-            .expect("Cannot be None")
-            .append(false);
-
-        self.type_id_builder.append(i8::default());
-
-        // Handle sparse union
-        if self.value_offset_builder.is_none() {
-            for (_, fd) in self.fields.iter_mut() {
-                fd.append_null_dynamic()?;
-            }
-        }
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Appends a value to this builder.
-    #[inline]
-    pub fn append<T: ArrowPrimitiveType>(
-        &mut self,
-        type_name: &str,
-        v: T::Native,
-    ) -> Result<()> {
-        let type_name = type_name.to_string();
-
-        let mut field_data = match self.fields.remove(&type_name) {
-            Some(data) => data,
-            None => match self.value_offset_builder {
-                Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE, None),
-                None => {
-                    let mut fd = FieldData::new(
-                        self.fields.len() as i8,
-                        T::DATA_TYPE,
-                        Some(BooleanBufferBuilder::new(1)),
-                    );
-                    for _ in 0..self.len {
-                        fd.append_null::<T>()?;
-                    }
-                    fd
-                }
-            },
-        };
-        self.type_id_builder.append(field_data.type_id);
-
-        match &mut self.value_offset_builder {
-            // Dense Union
-            Some(offset_builder) => {
-                offset_builder.append(field_data.slots as i32);
-            }
-            // Sparse Union
-            None => {
-                for (name, fd) in self.fields.iter_mut() {
-                    if name != &type_name {
-                        fd.append_null_dynamic()?;
-                    }
-                }
-            }
-        }
-        field_data.append_to_values_buffer::<T>(v)?;
-        self.fields.insert(type_name, field_data);
-
-        // Update the bitmap builder if it exists
-        if let Some(b) = &mut self.bitmap_builder {
-            b.append(true);
-        }
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Builds this builder creating a new `UnionArray`.
-    pub fn build(mut self) -> Result<UnionArray> {
-        let type_id_buffer = self.type_id_builder.finish();
-        let value_offsets_buffer = self.value_offset_builder.map(|mut b| b.finish());
-        let mut children = Vec::new();
-        for (
-            name,
-            FieldData {
-                type_id,
-                data_type,
-                values_buffer,
-                slots,
-                bitmap_builder,
-            },
-        ) in self.fields.into_iter()
-        {
-            let buffer = values_buffer
-                .expect("The `values_buffer` should only ever be None inside the `append` method.")
-                .into();
-            let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
-                .add_buffer(buffer)
-                .len(slots);
-            //                .build();
-            let arr_data_ref = match bitmap_builder {
-                Some(mut bb) => arr_data_builder.null_bit_buffer(bb.finish()).build(),
-                None => arr_data_builder.build(),
-            };
-            let array_ref = make_array(arr_data_ref);
-            children.push((type_id, (Field::new(&name, data_type, false), array_ref)))
-        }
-
-        children.sort_by(|a, b| {
-            a.0.partial_cmp(&b.0)
-                .expect("This will never be None as type ids are always i8 values.")
-        });
-        let children: Vec<_> = children.into_iter().map(|(_, b)| b).collect();
-        let bitmap = self.bitmap_builder.map(|mut b| b.finish());
-
-        UnionArray::try_new(type_id_buffer, value_offsets_buffer, children, bitmap)
-    }
-}
-
-/// Array builder for `DictionaryArray`. For example to map a set of byte indices
-/// to f32 values. Note that the use of a `HashMap` here will not scale to very large
-/// arrays or result in an ordered dictionary.
-#[derive(Debug)]
-pub struct PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    keys_builder: PrimitiveBuilder<K>,
-    values_builder: PrimitiveBuilder<V>,
-    map: HashMap<Box<[u8]>, K::Native>,
-}
-
-impl<K, V> PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder.
-    pub fn new(
-        keys_builder: PrimitiveBuilder<K>,
-        values_builder: PrimitiveBuilder<V>,
-    ) -> Self {
-        Self {
-            keys_builder,
-            values_builder,
-            map: HashMap::new(),
-        }
-    }
-}
-
-impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Returns the builder as an non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as an mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.keys_builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.keys_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<K, V> PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Append a primitive value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
-    #[inline]
-    pub fn append(&mut self, value: V::Native) -> Result<K::Native> {
-        if let Some(&key) = self.map.get(value.to_byte_slice()) {
-            // Append existing value.
-            self.keys_builder.append_value(key)?;
-            Ok(key)
-        } else {
-            // Append new value.
-            let key = K::Native::from_usize(self.values_builder.len())
-                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
-            self.values_builder.append_value(value)?;
-            self.keys_builder.append_value(key as K::Native)?;
-            self.map.insert(value.to_byte_slice().into(), key);
-            Ok(key)
-        }
-    }
-
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.keys_builder.append_null()
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish(&mut self) -> DictionaryArray<K> {
-        self.map.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
-    }
-}
-
-/// Array builder for `DictionaryArray` that stores Strings. For example to map a set of byte indices
-/// to String values. Note that the use of a `HashMap` here will not scale to very large
-/// arrays or result in an ordered dictionary.
-///
-/// ```
-/// use arrow::{
-///   array::{
-///     Int8Array, StringArray,
-///     PrimitiveBuilder, StringBuilder, StringDictionaryBuilder,
-///   },
-///   datatypes::Int8Type,
-/// };
-///
-/// // Create a dictionary array indexed by bytes whose values are Strings.
-/// // It can thus hold up to 256 distinct string values.
-///
-/// let key_builder = PrimitiveBuilder::<Int8Type>::new(100);
-/// let value_builder = StringBuilder::new(100);
-/// let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-///
-/// // The builder builds the dictionary value by value
-/// builder.append("abc").unwrap();
-/// builder.append_null().unwrap();
-/// builder.append("def").unwrap();
-/// builder.append("def").unwrap();
-/// builder.append("abc").unwrap();
-/// let array = builder.finish();
-///
-/// assert_eq!(
-///   array.keys(),
-///   &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
-/// );
-///
-/// // Values are polymorphic and so require a downcast.
-/// let av = array.values();
-/// let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-///
-/// assert_eq!(ava.value(0), "abc");
-/// assert_eq!(ava.value(1), "def");
-///
-/// ```
-#[derive(Debug)]
-pub struct StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    keys_builder: PrimitiveBuilder<K>,
-    values_builder: StringBuilder,
-    map: HashMap<Box<[u8]>, K::Native>,
-}
-
-impl<K> StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder.
-    pub fn new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self {
-        Self {
-            keys_builder,
-            values_builder,
-            map: HashMap::new(),
-        }
-    }
-
-    /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary
-    /// which is initialized with the given values.
-    /// The indices of those dictionary values are used as keys.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use arrow::datatypes::Int16Type;
-    /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder, Int16Array};
-    /// use std::convert::TryFrom;
-    ///
-    /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]);
-    ///
-    /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::<Int16Type>::new(3), &dictionary_values).unwrap();
-    /// builder.append("def").unwrap();
-    /// builder.append_null().unwrap();
-    /// builder.append("abc").unwrap();
-    ///
-    /// let dictionary_array = builder.finish();
-    ///
-    /// let keys = dictionary_array.keys();
-    ///
-    /// assert_eq!(keys, &Int16Array::from(vec![Some(2), None, Some(1)]));
-    /// ```
-    pub fn new_with_dictionary(
-        keys_builder: PrimitiveBuilder<K>,
-        dictionary_values: &StringArray,
-    ) -> Result<Self> {
-        let dict_len = dictionary_values.len();
-        let mut values_builder =
-            StringBuilder::with_capacity(dict_len, dictionary_values.value_data().len());
-        let mut map: HashMap<Box<[u8]>, K::Native> = HashMap::with_capacity(dict_len);
-        for i in 0..dict_len {
-            if dictionary_values.is_valid(i) {
-                let value = dictionary_values.value(i);
-                map.insert(
-                    value.as_bytes().into(),
-                    K::Native::from_usize(i)
-                        .ok_or(ArrowError::DictionaryKeyOverflowError)?,
-                );
-                values_builder.append_value(value)?;
-            } else {
-                values_builder.append_null()?;
-            }
-        }
-        Ok(Self {
-            keys_builder,
-            values_builder,
-            map,
-        })
-    }
-}
-
-impl<K> ArrayBuilder for StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Returns the builder as an non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as an mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.keys_builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.keys_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<K> StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Append a primitive value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
-    pub fn append(&mut self, value: impl AsRef<str>) -> Result<K::Native> {
-        if let Some(&key) = self.map.get(value.as_ref().as_bytes()) {
-            // Append existing value.
-            self.keys_builder.append_value(key)?;
-            Ok(key)
-        } else {
-            // Append new value.
-            let key = K::Native::from_usize(self.values_builder.len())
-                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
-            self.values_builder.append_value(value.as_ref())?;
-            self.keys_builder.append_value(key as K::Native)?;
-            self.map.insert(value.as_ref().as_bytes().into(), key);
-            Ok(key)
-        }
-    }
-
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.keys_builder.append_null()
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish(&mut self) -> DictionaryArray<K> {
-        self.map.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::array::Array;
-    use crate::bitmap::Bitmap;
-
-    #[test]
-    fn test_builder_i32_empty() {
-        let mut b = Int32BufferBuilder::new(5);
-        assert_eq!(0, b.len());
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(0, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_alloc_zero_bytes() {
-        let mut b = Int32BufferBuilder::new(0);
-        b.append(123);
-        let a = b.finish();
-        assert_eq!(4, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32() {
-        let mut b = Int32BufferBuilder::new(5);
-        for i in 0..5 {
-            b.append(i);
-        }
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(20, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_grow_buffer() {
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        for i in 0..20 {
-            b.append(i);
-        }
-        assert_eq!(32, b.capacity());
-        let a = b.finish();
-        assert_eq!(80, a.len());
-    }
-
-    #[test]
-    fn test_builder_finish() {
-        let mut b = Int32BufferBuilder::new(5);
-        assert_eq!(16, b.capacity());
-        for i in 0..10 {
-            b.append(i);
-        }
-        let mut a = b.finish();
-        assert_eq!(40, a.len());
-        assert_eq!(0, b.len());
-        assert_eq!(0, b.capacity());
-
-        // Try build another buffer after cleaning up.
-        for i in 0..20 {
-            b.append(i)
-        }
-        assert_eq!(32, b.capacity());
-        a = b.finish();
-        assert_eq!(80, a.len());
-    }
-
-    #[test]
-    fn test_reserve() {
-        let mut b = UInt8BufferBuilder::new(2);
-        assert_eq!(64, b.capacity());
-        b.reserve(64);
-        assert_eq!(64, b.capacity());
-        b.reserve(65);
-        assert_eq!(128, b.capacity());
-
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        b.reserve(16);
-        assert_eq!(16, b.capacity());
-        b.reserve(17);
-        assert_eq!(32, b.capacity());
-    }
-
-    #[test]
-    fn test_append_slice() {
-        let mut b = UInt8BufferBuilder::new(0);
-        b.append_slice(b"Hello, ");
-        b.append_slice(b"World!");
-        let buffer = b.finish();
-        assert_eq!(13, buffer.len());
-
-        let mut b = Int32BufferBuilder::new(0);
-        b.append_slice(&[32, 54]);
-        let buffer = b.finish();
-        assert_eq!(8, buffer.len());
-    }
-
-    #[test]
-    fn test_append_values() -> Result<()> {
-        let mut a = Int8Builder::new(0);
-        a.append_value(1)?;
-        a.append_null()?;
-        a.append_value(-2)?;
-        assert_eq!(a.len(), 3);
-
-        // append values
-        let values = &[1, 2, 3, 4];
-        let is_valid = &[true, true, false, true];
-        a.append_values(values, is_valid)?;
-
-        assert_eq!(a.len(), 7);
-        let array = a.finish();
-        assert_eq!(array.value(0), 1);
-        assert_eq!(array.is_null(1), true);
-        assert_eq!(array.value(2), -2);
-        assert_eq!(array.value(3), 1);
-        assert_eq!(array.value(4), 2);
-        assert_eq!(array.is_null(5), true);
-        assert_eq!(array.value(6), 4);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_write_bytes() {
-        let mut b = BooleanBufferBuilder::new(4);
-        b.append(false);
-        b.append(true);
-        b.append(false);
-        b.append(true);
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-
-        let mut b = BooleanBufferBuilder::new(4);
-        b.append_slice(&[false, true, false, true]);
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-    }
-
-    #[test]
-    fn test_boolean_array_builder_append_slice() {
-        let arr1 =
-            BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
-
-        let mut builder = BooleanArray::builder(0);
-        builder.append_slice(&[true, false]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(false).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1, arr2);
-    }
-
-    #[test]
-    fn test_boolean_array_builder_append_slice_large() {
-        let arr1 = BooleanArray::from(vec![true; 513]);
-
-        let mut builder = BooleanArray::builder(512);
-        builder.append_slice(&[true; 513]).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1, arr2);
-    }
-
-    #[test]
-    fn test_boolean_builder_increases_buffer_len() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanBufferBuilder::new(8);
-
-        for i in 0..16 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.append(true);
-            } else {
-                builder.append(false);
-            }
-        }
-        let buf2 = builder.finish();
-
-        assert_eq!(buf.len(), buf2.len());
-        assert_eq!(buf.as_slice(), buf2.as_slice());
-    }
-
-    #[test]
-    fn test_primitive_array_builder_i32() {
-        let mut builder = Int32Array::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_date32() {
-        let mut builder = Date32Array::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_timestamp_second() {
-        let mut builder = TimestampSecondArray::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i64, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_bool() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanArray::builder(10);
-        for i in 0..10 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.append_value(true).unwrap();
-            } else {
-                builder.append_value(false).unwrap();
-            }
-        }
-
-        let arr = builder.finish();
-        assert_eq!(&buf, arr.values());
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..10 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_option() {
-        let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_option(Some(0)).unwrap();
-        builder.append_option(None).unwrap();
-        builder.append_option(Some(2)).unwrap();
-        builder.append_option(None).unwrap();
-        builder.append_option(Some(4)).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_null() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_value(0).unwrap();
-        builder.append_value(2).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_slice() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_slice(&[0, 2]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_finish() {
-        let mut builder = Int32Builder::new(5);
-        builder.append_slice(&[2, 4, 6, 8]).unwrap();
-        let mut arr = builder.finish();
-        assert_eq!(4, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.append_slice(&[1, 3, 5, 7, 9]).unwrap();
-        arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_value(4).unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        let values = list_array.values().data().buffers()[0].clone();
-        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 3, 6, 8]),
-            list_array.data().buffers()[0].clone()
-        );
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_large_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = LargeListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_value(4).unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        let values = list_array.values().data().buffers()[0].clone();
-        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
-        assert_eq!(
-            Buffer::from_slice_ref(&[0i64, 3, 6, 8]),
-            list_array.data().buffers()[0].clone()
-        );
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_list_array_builder_nulls() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(3, list_array.value_offsets()[2]);
-        assert_eq!(3, list_array.value_length(2));
-    }
-
-    #[test]
-    fn test_large_list_array_builder_nulls() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = LargeListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(3, list_array.value_offsets()[2]);
-        assert_eq!(3, list_array.value_length(2));
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(2));
-        assert_eq!(3, list_array.value_length());
-    }
-
-    #[test]
-    fn test_list_array_builder_finish() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = ListBuilder::new(values_builder);
-
-        builder.values().append_slice(&[1, 2, 3]).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_slice(&[4, 5, 6]).unwrap();
-        builder.append(true).unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.values().append_slice(&[7, 8, 9]).unwrap();
-        builder.append(true).unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder_empty() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        let arr = builder.finish();
-        assert_eq!(0, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder_finish() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        builder.values().append_slice(&[1, 2, 3]).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_slice(&[4, 5, 6]).unwrap();
-        builder.append(true).unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.values().append_slice(&[7, 8, 9]).unwrap();
-        builder.append(true).unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_list_list_array_builder() {
-        let primitive_builder = Int32Builder::new(10);
-        let values_builder = ListBuilder::new(primitive_builder);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
-        builder.values().values().append_value(1).unwrap();
-        builder.values().values().append_value(2).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().values().append_value(3).unwrap();
-        builder.values().values().append_value(4).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.values().values().append_value(5).unwrap();
-        builder.values().values().append_value(6).unwrap();
-        builder.values().values().append_value(7).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().append(false).unwrap();
-        builder.values().values().append_value(8).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.append(false).unwrap();
-
-        builder.values().values().append_value(9).unwrap();
-        builder.values().values().append_value(10).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        let list_array = builder.finish();
-
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 2, 5, 5, 6]),
-            list_array.data().buffers()[0].clone()
-        );
-
-        assert_eq!(6, list_array.values().data().len());
-        assert_eq!(1, list_array.values().data().null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 2, 4, 7, 7, 8, 10]),
-            list_array.values().data().buffers()[0].clone()
-        );
-
-        assert_eq!(10, list_array.values().data().child_data()[0].len());
-        assert_eq!(0, list_array.values().data().child_data()[0].null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
-            list_array.values().data().child_data()[0].buffers()[0].clone()
-        );
-    }
-
-    #[test]
-    fn test_binary_array_builder() {
-        let mut builder = BinaryBuilder::new(20);
-
-        builder.append_byte(b'h').unwrap();
-        builder.append_byte(b'e').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_byte(b'w').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append_byte(b'r').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'd').unwrap();
-        builder.append(true).unwrap();
-
-        let binary_array = builder.finish();
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(5, binary_array.value_length(2));
-    }
-
-    #[test]
-    fn test_large_binary_array_builder() {
-        let mut builder = LargeBinaryBuilder::new(20);
-
-        builder.append_byte(b'h').unwrap();
-        builder.append_byte(b'e').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_byte(b'w').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append_byte(b'r').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'd').unwrap();
-        builder.append(true).unwrap();
-
-        let binary_array = builder.finish();
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(5, binary_array.value_length(2));
-    }
-
-    #[test]
-    fn test_string_array_builder() {
-        let mut builder = StringBuilder::new(20);
-
-        builder.append_value("hello").unwrap();
-        builder.append(true).unwrap();
-        builder.append_value("world").unwrap();
-
-        let string_array = builder.finish();
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("", string_array.value(1));
-        assert_eq!("world", string_array.value(2));
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(5, string_array.value_length(2));
-    }
-
-    #[test]
-    fn test_fixed_size_binary_builder() {
-        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
-
-        //  [b"hello", null, "arrow"]
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"arrow").unwrap();
-        let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish();
-
-        assert_eq!(
-            &DataType::FixedSizeBinary(5),
-            fixed_size_binary_array.data_type()
-        );
-        assert_eq!(3, fixed_size_binary_array.len());
-        assert_eq!(1, fixed_size_binary_array.null_count());
-        assert_eq!(10, fixed_size_binary_array.value_offset(2));
-        assert_eq!(5, fixed_size_binary_array.value_length());
-    }
-
-    #[test]
-    fn test_decimal_builder() {
-        let mut builder = DecimalBuilder::new(30, 23, 6);
-
-        builder.append_value(8_887_000_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(-8_887_000_000).unwrap();
-        let decimal_array: DecimalArray = builder.finish();
-
-        assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
-        assert_eq!(3, decimal_array.len());
-        assert_eq!(1, decimal_array.null_count());
-        assert_eq!(32, decimal_array.value_offset(2));
-        assert_eq!(16, decimal_array.value_length());
-    }
-
-    #[test]
-    fn test_string_array_builder_finish() {
-        let mut builder = StringBuilder::new(10);
-
-        builder.append_value("hello").unwrap();
-        builder.append_value("world").unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.append_value("arrow").unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_string_array_builder_append_string() {
-        let mut builder = StringBuilder::new(20);
-
-        let var = "hello".to_owned();
-        builder.append_value(&var).unwrap();
-        builder.append(true).unwrap();
-        builder.append_value("world").unwrap();
-
-        let string_array = builder.finish();
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("", string_array.value(1));
-        assert_eq!("world", string_array.value(2));
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(5, string_array.value_length(2));
-    }
-
-    #[test]
-    fn test_struct_array_builder() {
-        let string_builder = StringBuilder::new(4);
-        let int_builder = Int32Builder::new(4);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Utf8, false));
-        field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
-        fields.push(Field::new("f2", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        assert_eq!(2, builder.num_fields());
-
-        let string_builder = builder
-            .field_builder::<StringBuilder>(0)
-            .expect("builder at field 0 should be string builder");
-        string_builder.append_value("joe").unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_value("mark").unwrap();
-
-        let int_builder = builder
-            .field_builder::<Int32Builder>(1)
-            .expect("builder at field 1 should be int builder");
-        int_builder.append_value(1).unwrap();
-        int_builder.append_value(2).unwrap();
-        int_builder.append_null().unwrap();
-        int_builder.append_value(4).unwrap();
-
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_null().unwrap();
-        builder.append(true).unwrap();
-
-        let arr = builder.finish();
-
-        let struct_data = arr.data();
-        assert_eq!(4, struct_data.len());
-        assert_eq!(1, struct_data.null_count());
-        assert_eq!(
-            &Some(Bitmap::from(Buffer::from(&[11_u8]))),
-            struct_data.null_bitmap()
-        );
-
-        let expected_string_data = ArrayData::builder(DataType::Utf8)
-            .len(4)
-            .null_bit_buffer(Buffer::from(&[9_u8]))
-            .add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
-            .add_buffer(Buffer::from_slice_ref(b"joemark"))
-            .build();
-
-        let expected_int_data = ArrayData::builder(DataType::Int32)
-            .len(4)
-            .null_bit_buffer(Buffer::from_slice_ref(&[11_u8]))
-            .add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
-            .build();
-
-        assert_eq!(&expected_string_data, arr.column(0).data());
-
-        // TODO: implement equality for ArrayData
-        assert_eq!(expected_int_data.len(), arr.column(1).data().len());
-        assert_eq!(
-            expected_int_data.null_count(),
-            arr.column(1).data().null_count()
-        );
-        assert_eq!(
-            expected_int_data.null_bitmap(),
-            arr.column(1).data().null_bitmap()
-        );
-        let expected_value_buf = expected_int_data.buffers()[0].clone();
-        let actual_value_buf = arr.column(1).data().buffers()[0].clone();
-        for i in 0..expected_int_data.len() {
-            if !expected_int_data.is_null(i) {
-                assert_eq!(
-                    expected_value_buf.as_slice()[i * 4..(i + 1) * 4],
-                    actual_value_buf.as_slice()[i * 4..(i + 1) * 4]
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_struct_array_builder_finish() {
-        let int_builder = Int32Builder::new(10);
-        let bool_builder = BooleanBuilder::new(10);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
-        fields.push(Field::new("f2", DataType::Boolean, false));
-        field_builders.push(Box::new(bool_builder) as Box<ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        builder
-            .field_builder::<Int32Builder>(0)
-            .unwrap()
-            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-            .unwrap();
-        builder
-            .field_builder::<BooleanBuilder>(1)
-            .unwrap()
-            .append_slice(&[
-                false, true, false, true, false, true, false, true, false, true,
-            ])
-            .unwrap();
-
-        // Append slot values - all are valid.
-        for _ in 0..10 {
-            assert!(builder.append(true).is_ok())
-        }
-
-        assert_eq!(10, builder.len());
-
-        let arr = builder.finish();
-
-        assert_eq!(10, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder
-            .field_builder::<Int32Builder>(0)
-            .unwrap()
-            .append_slice(&[1, 3, 5, 7, 9])
-            .unwrap();
-        builder
-            .field_builder::<BooleanBuilder>(1)
-            .unwrap()
-            .append_slice(&[false, true, false, true, false])
-            .unwrap();
-
-        // Append slot values - all are valid.
-        for _ in 0..5 {
-            assert!(builder.append(true).is_ok())
-        }
-
-        assert_eq!(5, builder.len());
-
-        let arr = builder.finish();
-
-        assert_eq!(5, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_struct_array_builder_from_schema() {
-        let mut fields = Vec::new();
-        fields.push(Field::new("f1", DataType::Float32, false));
-        fields.push(Field::new("f2", DataType::Utf8, false));
-        let mut sub_fields = Vec::new();
-        sub_fields.push(Field::new("g1", DataType::Int32, false));
-        sub_fields.push(Field::new("g2", DataType::Boolean, false));
-        let struct_type = DataType::Struct(sub_fields);
-        fields.push(Field::new("f3", struct_type, false));
-
-        let mut builder = StructBuilder::from_fields(fields, 5);
-        assert_eq!(3, builder.num_fields());
-        assert!(builder.field_builder::<Float32Builder>(0).is_some());
-        assert!(builder.field_builder::<StringBuilder>(1).is_some());
-        assert!(builder.field_builder::<StructBuilder>(2).is_some());
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported"
-    )]
-    fn test_struct_array_builder_from_schema_unsupported_type() {
-        let mut fields = Vec::new();
-        fields.push(Field::new("f1", DataType::Int16, false));
-        let list_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true)));
-        fields.push(Field::new("f2", list_type, false));
-
-        let _ = StructBuilder::from_fields(fields, 5);
-    }
-
-    #[test]
-    fn test_struct_array_builder_field_builder_type_mismatch() {
-        let int_builder = Int32Builder::new(10);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
-    }
-
-    #[test]
-    fn test_primitive_dictionary_builder() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        builder.append(12345678).unwrap();
-        builder.append_null().unwrap();
-        builder.append(22345678).unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &UInt8Array::from(vec![Some(0), None, Some(1)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
-        let avs: &[u32] = ava.values();
-
-        assert_eq!(array.is_null(0), false);
-        assert_eq!(array.is_null(1), true);
-        assert_eq!(array.is_null(2), false);
-
-        assert_eq!(avs, &[12345678, 22345678]);
-    }
-
-    #[test]
-    fn test_string_dictionary_builder() {
-        let key_builder = PrimitiveBuilder::<Int8Type>::new(5);
-        let value_builder = StringBuilder::new(2);
-        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-
-        assert_eq!(ava.value(0), "abc");
-        assert_eq!(ava.value(1), "def");
-    }
-
-    #[test]
-    fn test_string_dictionary_builder_with_existing_dictionary() {
-        let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]);
-
-        let key_builder = PrimitiveBuilder::<Int8Type>::new(6);
-        let mut builder =
-            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
-                .unwrap();
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        builder.append("ghi").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-
-        assert_eq!(ava.is_valid(0), false);
-        assert_eq!(ava.value(1), "def");
-        assert_eq!(ava.value(2), "abc");
-        assert_eq!(ava.value(3), "ghi");
-    }
-
-    #[test]
-    fn test_string_dictionary_builder_with_reserved_null_value() {
-        let dictionary: Vec<Option<&str>> = vec![None];
-        let dictionary = StringArray::from(dictionary);
-
-        let key_builder = PrimitiveBuilder::<Int16Type>::new(4);
-        let mut builder =
-            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
-                .unwrap();
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(array.is_null(1), true);
-        assert_eq!(array.is_valid(1), false);
-
-        let keys = array.keys_array();
-
-        assert_eq!(keys.value(0), 1);
-        assert_eq!(keys.is_null(1), true);
-        // zero initialization is currently guaranteed by Buffer allocation and resizing
-        assert_eq!(keys.value(1), 0);
-        assert_eq!(keys.value(2), 2);
-        assert_eq!(keys.value(3), 1);
-    }
-
-    #[test]
-    #[should_panic(expected = "DictionaryKeyOverflowError")]
-    fn test_primitive_dictionary_overflow() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(257);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(257);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        // 256 unique keys.
-        for i in 0..256 {
-            builder.append(i + 1000).unwrap();
-        }
-        // Special error if the key overflows (256th entry)
-        builder.append(1257).unwrap();
-    }
-}
diff --git a/rust/arrow/src/array/cast.rs b/rust/arrow/src/array/cast.rs
deleted file mode 100644
index 0477f2831f9..00000000000
--- a/rust/arrow/src/array/cast.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines helper functions for force Array type downcast
-
-use crate::array::*;
-use crate::datatypes::*;
-
-/// Force downcast ArrayRef to PrimitiveArray<T>
-pub fn as_primitive_array<T>(arr: &ArrayRef) -> &PrimitiveArray<T>
-where
-    T: ArrowPrimitiveType,
-{
-    arr.as_any()
-        .downcast_ref::<PrimitiveArray<T>>()
-        .expect("Unable to downcast to primitive array")
-}
-
-/// Force downcast ArrayRef to DictionaryArray<T>
-pub fn as_dictionary_array<T>(arr: &ArrayRef) -> &DictionaryArray<T>
-where
-    T: ArrowDictionaryKeyType,
-{
-    arr.as_any()
-        .downcast_ref::<DictionaryArray<T>>()
-        .expect("Unable to downcast to dictionary array")
-}
-
-#[doc = "Force downcast ArrayRef to GenericListArray"]
-pub fn as_generic_list_array<S: OffsetSizeTrait>(arr: &ArrayRef) -> &GenericListArray<S> {
-    arr.as_any()
-        .downcast_ref::<GenericListArray<S>>()
-        .expect("Unable to downcast to list array")
-}
-
-#[doc = "Force downcast ArrayRef to ListArray"]
-#[inline]
-pub fn as_list_array(arr: &ArrayRef) -> &ListArray {
-    as_generic_list_array::<i32>(arr)
-}
-
-#[doc = "Force downcast ArrayRef to LargeListArray"]
-#[inline]
-pub fn as_large_list_array(arr: &ArrayRef) -> &LargeListArray {
-    as_generic_list_array::<i64>(arr)
-}
-
-macro_rules! array_downcast_fn {
-    ($name: ident, $arrty: ty, $arrty_str:expr) => {
-        #[doc = "Force downcast ArrayRef to "]
-        #[doc = $arrty_str]
-        pub fn $name(arr: &ArrayRef) -> &$arrty {
-            arr.as_any().downcast_ref::<$arrty>().expect(concat!(
-                "Unable to downcast to typed array through ",
-                stringify!($name)
-            ))
-        }
-    };
-
-    // use recursive macro to generate dynamic doc string for a given array type
-    ($name: ident, $arrty: ty) => {
-        array_downcast_fn!($name, $arrty, stringify!($arrty));
-    };
-}
-
-array_downcast_fn!(as_string_array, StringArray);
-array_downcast_fn!(as_largestring_array, LargeStringArray);
-array_downcast_fn!(as_boolean_array, BooleanArray);
-array_downcast_fn!(as_null_array, NullArray);
-array_downcast_fn!(as_struct_array, StructArray);
diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs
deleted file mode 100644
index 7ae3858e35c..00000000000
--- a/rust/arrow/src/array/data.rs
+++ /dev/null
@@ -1,679 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
-//! common attributes and operations for Arrow array.
-
-use std::mem;
-use std::sync::Arc;
-
-use crate::datatypes::{DataType, IntervalUnit};
-use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    util::bit_util,
-};
-
-use super::equal::equal;
-
-#[inline]
-pub(crate) fn count_nulls(
-    null_bit_buffer: Option<&Buffer>,
-    offset: usize,
-    len: usize,
-) -> usize {
-    if let Some(buf) = null_bit_buffer {
-        len.checked_sub(buf.count_set_bits_offset(offset, len))
-            .unwrap()
-    } else {
-        0
-    }
-}
-
-/// creates 2 [`MutableBuffer`]s with a given `capacity` (in slots).
-#[inline]
-pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuffer; 2] {
-    let empty_buffer = MutableBuffer::new(0);
-    match data_type {
-        DataType::Null => [empty_buffer, MutableBuffer::new(0)],
-        DataType::Boolean => {
-            let bytes = bit_util::ceil(capacity, 8);
-            let buffer = MutableBuffer::new(bytes);
-            [buffer, empty_buffer]
-        }
-        DataType::UInt8 => [
-            MutableBuffer::new(capacity * mem::size_of::<u8>()),
-            empty_buffer,
-        ],
-        DataType::UInt16 => [
-            MutableBuffer::new(capacity * mem::size_of::<u16>()),
-            empty_buffer,
-        ],
-        DataType::UInt32 => [
-            MutableBuffer::new(capacity * mem::size_of::<u32>()),
-            empty_buffer,
-        ],
-        DataType::UInt64 => [
-            MutableBuffer::new(capacity * mem::size_of::<u64>()),
-            empty_buffer,
-        ],
-        DataType::Int8 => [
-            MutableBuffer::new(capacity * mem::size_of::<i8>()),
-            empty_buffer,
-        ],
-        DataType::Int16 => [
-            MutableBuffer::new(capacity * mem::size_of::<i16>()),
-            empty_buffer,
-        ],
-        DataType::Int32 => [
-            MutableBuffer::new(capacity * mem::size_of::<i32>()),
-            empty_buffer,
-        ],
-        DataType::Int64 => [
-            MutableBuffer::new(capacity * mem::size_of::<i64>()),
-            empty_buffer,
-        ],
-        DataType::Float32 => [
-            MutableBuffer::new(capacity * mem::size_of::<f32>()),
-            empty_buffer,
-        ],
-        DataType::Float64 => [
-            MutableBuffer::new(capacity * mem::size_of::<f64>()),
-            empty_buffer,
-        ],
-        DataType::Date32 | DataType::Time32(_) => [
-            MutableBuffer::new(capacity * mem::size_of::<i32>()),
-            empty_buffer,
-        ],
-        DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Duration(_)
-        | DataType::Timestamp(_, _) => [
-            MutableBuffer::new(capacity * mem::size_of::<i64>()),
-            empty_buffer,
-        ],
-        DataType::Interval(IntervalUnit::YearMonth) => [
-            MutableBuffer::new(capacity * mem::size_of::<i32>()),
-            empty_buffer,
-        ],
-        DataType::Interval(IntervalUnit::DayTime) => [
-            MutableBuffer::new(capacity * mem::size_of::<i64>()),
-            empty_buffer,
-        ],
-        DataType::Utf8 | DataType::Binary => {
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
-            // safety: `unsafe` code assumes that this buffer is initialized with one element
-            buffer.push(0i32);
-            [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
-        }
-        DataType::LargeUtf8 | DataType::LargeBinary => {
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
-            // safety: `unsafe` code assumes that this buffer is initialized with one element
-            buffer.push(0i64);
-            [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
-        }
-        DataType::List(_) => {
-            // offset buffer always starts with a zero
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
-            buffer.push(0i32);
-            [buffer, empty_buffer]
-        }
-        DataType::LargeList(_) => {
-            // offset buffer always starts with a zero
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
-            buffer.push(0i64);
-            [buffer, empty_buffer]
-        }
-        DataType::FixedSizeBinary(size) => {
-            [MutableBuffer::new(capacity * *size as usize), empty_buffer]
-        }
-        DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
-            DataType::UInt8 => [
-                MutableBuffer::new(capacity * mem::size_of::<u8>()),
-                empty_buffer,
-            ],
-            DataType::UInt16 => [
-                MutableBuffer::new(capacity * mem::size_of::<u16>()),
-                empty_buffer,
-            ],
-            DataType::UInt32 => [
-                MutableBuffer::new(capacity * mem::size_of::<u32>()),
-                empty_buffer,
-            ],
-            DataType::UInt64 => [
-                MutableBuffer::new(capacity * mem::size_of::<u64>()),
-                empty_buffer,
-            ],
-            DataType::Int8 => [
-                MutableBuffer::new(capacity * mem::size_of::<i8>()),
-                empty_buffer,
-            ],
-            DataType::Int16 => [
-                MutableBuffer::new(capacity * mem::size_of::<i16>()),
-                empty_buffer,
-            ],
-            DataType::Int32 => [
-                MutableBuffer::new(capacity * mem::size_of::<i32>()),
-                empty_buffer,
-            ],
-            DataType::Int64 => [
-                MutableBuffer::new(capacity * mem::size_of::<i64>()),
-                empty_buffer,
-            ],
-            _ => unreachable!(),
-        },
-        DataType::Float16 => unreachable!(),
-        DataType::FixedSizeList(_, _) | DataType::Struct(_) => {
-            [empty_buffer, MutableBuffer::new(0)]
-        }
-        DataType::Decimal(_, _) => [
-            MutableBuffer::new(capacity * mem::size_of::<u8>()),
-            empty_buffer,
-        ],
-        DataType::Union(_) => unimplemented!(),
-    }
-}
-
-/// Maps 2 [`MutableBuffer`]s into a vector of [Buffer]s whose size depends on `data_type`.
-#[inline]
-pub(crate) fn into_buffers(
-    data_type: &DataType,
-    buffer1: MutableBuffer,
-    buffer2: MutableBuffer,
-) -> Vec<Buffer> {
-    match data_type {
-        DataType::Null | DataType::Struct(_) => vec![],
-        DataType::Utf8
-        | DataType::Binary
-        | DataType::LargeUtf8
-        | DataType::LargeBinary => vec![buffer1.into(), buffer2.into()],
-        _ => vec![buffer1.into()],
-    }
-}
-
-/// An generic representation of Arrow array data which encapsulates common attributes and
-/// operations for Arrow array. Specific operations for different arrays types (e.g.,
-/// primitive, list, struct) are implemented in `Array`.
-#[derive(Debug, Clone)]
-pub struct ArrayData {
-    /// The data type for this array data
-    data_type: DataType,
-
-    /// The number of elements in this array data
-    len: usize,
-
-    /// The number of null elements in this array data
-    null_count: usize,
-
-    /// The offset into this array data, in number of items
-    offset: usize,
-
-    /// The buffers for this array data. Note that depending on the array types, this
-    /// could hold different kinds of buffers (e.g., value buffer, value offset buffer)
-    /// at different positions.
-    buffers: Vec<Buffer>,
-
-    /// The child(ren) of this array. Only non-empty for nested types, currently
-    /// `ListArray` and `StructArray`.
-    child_data: Vec<ArrayData>,
-
-    /// The null bitmap. A `None` value for this indicates all values are non-null in
-    /// this array.
-    null_bitmap: Option<Bitmap>,
-}
-
-pub type ArrayDataRef = Arc<ArrayData>;
-
-impl ArrayData {
-    pub fn new(
-        data_type: DataType,
-        len: usize,
-        null_count: Option<usize>,
-        null_bit_buffer: Option<Buffer>,
-        offset: usize,
-        buffers: Vec<Buffer>,
-        child_data: Vec<ArrayData>,
-    ) -> Self {
-        let null_count = match null_count {
-            None => count_nulls(null_bit_buffer.as_ref(), offset, len),
-            Some(null_count) => null_count,
-        };
-        let null_bitmap = null_bit_buffer.map(Bitmap::from);
-        Self {
-            data_type,
-            len,
-            null_count,
-            offset,
-            buffers,
-            child_data,
-            null_bitmap,
-        }
-    }
-
-    /// Returns a builder to construct a `ArrayData` instance.
-    #[inline]
-    pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
-        ArrayDataBuilder::new(data_type)
-    }
-
-    /// Returns a reference to the data type of this array data
-    #[inline]
-    pub const fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    /// Returns a slice of buffers for this array data
-    pub fn buffers(&self) -> &[Buffer] {
-        &self.buffers[..]
-    }
-
-    /// Returns a slice of children data arrays
-    pub fn child_data(&self) -> &[ArrayData] {
-        &self.child_data[..]
-    }
-
-    /// Returns whether the element at index `i` is null
-    pub fn is_null(&self, i: usize) -> bool {
-        if let Some(ref b) = self.null_bitmap {
-            return !b.is_set(self.offset + i);
-        }
-        false
-    }
-
-    /// Returns a reference to the null bitmap of this array data
-    #[inline]
-    pub const fn null_bitmap(&self) -> &Option<Bitmap> {
-        &self.null_bitmap
-    }
-
-    /// Returns a reference to the null buffer of this array data.
-    pub fn null_buffer(&self) -> Option<&Buffer> {
-        self.null_bitmap().as_ref().map(|b| b.buffer_ref())
-    }
-
-    /// Returns whether the element at index `i` is not null
-    pub fn is_valid(&self, i: usize) -> bool {
-        if let Some(ref b) = self.null_bitmap {
-            return b.is_set(self.offset + i);
-        }
-        true
-    }
-
-    /// Returns the length (i.e., number of elements) of this array
-    #[inline]
-    pub const fn len(&self) -> usize {
-        self.len
-    }
-
-    // Returns whether array data is empty
-    #[inline]
-    pub const fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns the offset of this array
-    #[inline]
-    pub const fn offset(&self) -> usize {
-        self.offset
-    }
-
-    /// Returns the total number of nulls in this array
-    #[inline]
-    pub const fn null_count(&self) -> usize {
-        self.null_count
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [ArrayData].
-    pub fn get_buffer_memory_size(&self) -> usize {
-        let mut size = 0;
-        for buffer in &self.buffers {
-            size += buffer.capacity();
-        }
-        if let Some(bitmap) = &self.null_bitmap {
-            size += bitmap.get_buffer_memory_size()
-        }
-        for child in &self.child_data {
-            size += child.get_buffer_memory_size();
-        }
-        size
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [ArrayData].
-    pub fn get_array_memory_size(&self) -> usize {
-        let mut size = 0;
-        // Calculate size of the fields that don't have [get_array_memory_size] method internally.
-        size += mem::size_of_val(self)
-            - mem::size_of_val(&self.buffers)
-            - mem::size_of_val(&self.null_bitmap)
-            - mem::size_of_val(&self.child_data);
-
-        // Calculate rest of the fields top down which contain actual data
-        for buffer in &self.buffers {
-            size += mem::size_of_val(&buffer);
-            size += buffer.capacity();
-        }
-        if let Some(bitmap) = &self.null_bitmap {
-            size += bitmap.get_array_memory_size()
-        }
-        for child in &self.child_data {
-            size += child.get_array_memory_size();
-        }
-
-        size
-    }
-
-    /// Creates a zero-copy slice of itself. This creates a new [ArrayData]
-    /// with a different offset, len and a shifted null bitmap.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `offset + length > self.len()`.
-    pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
-        assert!((offset + length) <= self.len());
-
-        let mut new_data = self.clone();
-
-        new_data.len = length;
-        new_data.offset = offset + self.offset;
-
-        new_data.null_count =
-            count_nulls(new_data.null_buffer(), new_data.offset, new_data.len);
-
-        new_data
-    }
-
-    /// Returns the `buffer` as a slice of type `T` starting at self.offset
-    /// # Panics
-    /// This function panics if:
-    /// * the buffer is not byte-aligned with type T, or
-    /// * the datatype is `Boolean` (it corresponds to a bit-packed buffer where the offset is not applicable)
-    #[inline]
-    pub(crate) fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
-        let values = unsafe { self.buffers[buffer].as_slice().align_to::<T>() };
-        if !values.0.is_empty() || !values.2.is_empty() {
-            panic!("The buffer is not byte-aligned with its interpretation")
-        };
-        assert_ne!(self.data_type, DataType::Boolean);
-        &values.1[self.offset..]
-    }
-
-    /// Returns a new empty [ArrayData] valid for `data_type`.
-    pub(super) fn new_empty(data_type: &DataType) -> Self {
-        let buffers = new_buffers(data_type, 0);
-        let [buffer1, buffer2] = buffers;
-        let buffers = into_buffers(data_type, buffer1, buffer2);
-
-        let child_data = match data_type {
-            DataType::Null
-            | DataType::Boolean
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Timestamp(_, _)
-            | DataType::Utf8
-            | DataType::Binary
-            | DataType::LargeUtf8
-            | DataType::LargeBinary
-            | DataType::Interval(_)
-            | DataType::FixedSizeBinary(_)
-            | DataType::Decimal(_, _) => vec![],
-            DataType::List(field) => {
-                vec![Self::new_empty(field.data_type())]
-            }
-            DataType::FixedSizeList(field, _) => {
-                vec![Self::new_empty(field.data_type())]
-            }
-            DataType::LargeList(field) => {
-                vec![Self::new_empty(field.data_type())]
-            }
-            DataType::Struct(fields) => fields
-                .iter()
-                .map(|field| Self::new_empty(field.data_type()))
-                .collect(),
-            DataType::Union(_) => unimplemented!(),
-            DataType::Dictionary(_, data_type) => {
-                vec![Self::new_empty(data_type)]
-            }
-            DataType::Float16 => unreachable!(),
-        };
-
-        Self::new(data_type.clone(), 0, Some(0), None, 0, buffers, child_data)
-    }
-}
-
-impl PartialEq for ArrayData {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self, other)
-    }
-}
-
-/// Builder for `ArrayData` type
-#[derive(Debug)]
-pub struct ArrayDataBuilder {
-    data_type: DataType,
-    len: usize,
-    null_count: Option<usize>,
-    null_bit_buffer: Option<Buffer>,
-    offset: usize,
-    buffers: Vec<Buffer>,
-    child_data: Vec<ArrayData>,
-}
-
-impl ArrayDataBuilder {
-    #[inline]
-    pub const fn new(data_type: DataType) -> Self {
-        Self {
-            data_type,
-            len: 0,
-            null_count: None,
-            null_bit_buffer: None,
-            offset: 0,
-            buffers: vec![],
-            child_data: vec![],
-        }
-    }
-
-    #[inline]
-    pub const fn len(mut self, n: usize) -> Self {
-        self.len = n;
-        self
-    }
-
-    pub fn null_bit_buffer(mut self, buf: Buffer) -> Self {
-        self.null_bit_buffer = Some(buf);
-        self
-    }
-
-    #[inline]
-    pub const fn offset(mut self, n: usize) -> Self {
-        self.offset = n;
-        self
-    }
-
-    pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
-        self.buffers = v;
-        self
-    }
-
-    pub fn add_buffer(mut self, b: Buffer) -> Self {
-        self.buffers.push(b);
-        self
-    }
-
-    pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
-        self.child_data = v;
-        self
-    }
-
-    pub fn add_child_data(mut self, r: ArrayData) -> Self {
-        self.child_data.push(r);
-        self
-    }
-
-    pub fn build(self) -> ArrayData {
-        ArrayData::new(
-            self.data_type,
-            self.len,
-            self.null_count,
-            self.null_bit_buffer,
-            self.offset,
-            self.buffers,
-            self.child_data,
-        )
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::buffer::Buffer;
-    use crate::util::bit_util;
-
-    #[test]
-    fn test_new() {
-        let arr_data =
-            ArrayData::new(DataType::Boolean, 10, Some(1), None, 2, vec![], vec![]);
-        assert_eq!(10, arr_data.len());
-        assert_eq!(1, arr_data.null_count());
-        assert_eq!(2, arr_data.offset());
-        assert_eq!(0, arr_data.buffers().len());
-        assert_eq!(0, arr_data.child_data().len());
-    }
-
-    #[test]
-    fn test_builder() {
-        let child_arr_data = ArrayData::new(
-            DataType::Int32,
-            5,
-            Some(0),
-            None,
-            0,
-            vec![Buffer::from_slice_ref(&[1i32, 2, 3, 4, 5])],
-            vec![],
-        );
-        let v = vec![0, 1, 2, 3];
-        let b1 = Buffer::from(&v[..]);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(20)
-            .offset(5)
-            .add_buffer(b1)
-            .null_bit_buffer(Buffer::from(vec![
-                0b01011111, 0b10110101, 0b01100011, 0b00011110,
-            ]))
-            .add_child_data(child_arr_data.clone())
-            .build();
-
-        assert_eq!(20, arr_data.len());
-        assert_eq!(10, arr_data.null_count());
-        assert_eq!(5, arr_data.offset());
-        assert_eq!(1, arr_data.buffers().len());
-        assert_eq!(&[0, 1, 2, 3], arr_data.buffers()[0].as_slice());
-        assert_eq!(1, arr_data.child_data().len());
-        assert_eq!(child_arr_data, arr_data.child_data()[0]);
-    }
-
-    #[test]
-    fn test_null_count() {
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(16)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        assert_eq!(13, arr_data.null_count());
-
-        // Test with offset
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(12)
-            .offset(2)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        assert_eq!(10, arr_data.null_count());
-    }
-
-    #[test]
-    fn test_null_buffer_ref() {
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(16)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        assert!(arr_data.null_buffer().is_some());
-        assert_eq!(&bit_v, arr_data.null_buffer().unwrap().as_slice());
-    }
-
-    #[test]
-    fn test_slice() {
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let data = ArrayData::builder(DataType::Int32)
-            .len(16)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        let new_data = data.slice(1, 15);
-        assert_eq!(data.len() - 1, new_data.len());
-        assert_eq!(1, new_data.offset());
-        assert_eq!(data.null_count(), new_data.null_count());
-
-        // slice of a slice (removes one null)
-        let new_data = new_data.slice(1, 14);
-        assert_eq!(data.len() - 2, new_data.len());
-        assert_eq!(2, new_data.offset());
-        assert_eq!(data.null_count() - 1, new_data.null_count());
-    }
-
-    #[test]
-    fn test_equality() {
-        let int_data = ArrayData::builder(DataType::Int32).build();
-        let float_data = ArrayData::builder(DataType::Float32).build();
-        assert_ne!(int_data, float_data);
-    }
-
-    #[test]
-    fn test_count_nulls() {
-        let null_buffer = Some(Buffer::from(vec![0b00010110, 0b10011111]));
-        let count = count_nulls(null_buffer.as_ref(), 0, 16);
-        assert_eq!(count, 7);
-
-        let count = count_nulls(null_buffer.as_ref(), 4, 8);
-        assert_eq!(count, 3);
-    }
-}
diff --git a/rust/arrow/src/array/equal/boolean.rs b/rust/arrow/src/array/equal/boolean.rs
deleted file mode 100644
index 35c9786e49f..00000000000
--- a/rust/arrow/src/array/equal/boolean.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::util::bit_util::get_bit;
-
-use super::utils::{equal_bits, equal_len};
-
-pub(super) fn boolean_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    mut lhs_start: usize,
-    mut rhs_start: usize,
-    mut len: usize,
-) -> bool {
-    let lhs_values = lhs.buffers()[0].as_slice();
-    let rhs_values = rhs.buffers()[0].as_slice();
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        // Optimize performance for starting offset at u8 boundary.
-        if lhs_start % 8 == 0 && rhs_start % 8 == 0 {
-            let quot = len / 8;
-            if quot > 0
-                && !equal_len(
-                    lhs_values,
-                    rhs_values,
-                    lhs_start / 8 + lhs.offset(),
-                    rhs_start / 8 + rhs.offset(),
-                    quot,
-                )
-            {
-                return false;
-            }
-
-            // Calculate for suffix bits.
-            let rem = len % 8;
-            if rem == 0 {
-                return true;
-            } else {
-                let aligned_bits = len - rem;
-                lhs_start += aligned_bits;
-                rhs_start += aligned_bits;
-                len = rem
-            }
-        }
-
-        equal_bits(
-            lhs_values,
-            rhs_values,
-            lhs_start + lhs.offset(),
-            rhs_start + rhs.offset(),
-            len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-
-        let lhs_start = lhs.offset() + lhs_start;
-        let rhs_start = rhs.offset() + rhs_start;
-
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_bits(lhs_values, rhs_values, lhs_pos, rhs_pos, 1)
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/decimal.rs b/rust/arrow/src/array/equal/decimal.rs
deleted file mode 100644
index 1ee6ec9b543..00000000000
--- a/rust/arrow/src/array/equal/decimal.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::util::bit_util::get_bit;
-
-use super::utils::equal_len;
-
-pub(super) fn decimal_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let size = match lhs.data_type() {
-        DataType::Decimal(_, _) => 16,
-        _ => unreachable!(),
-    };
-
-    let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..];
-    let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_len(
-            lhs_values,
-            rhs_values,
-            size * lhs_start,
-            size * rhs_start,
-            size * len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_len(
-                        lhs_values,
-                        rhs_values,
-                        lhs_pos * size,
-                        rhs_pos * size,
-                        size, // 1 * size since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/dictionary.rs b/rust/arrow/src/array/equal/dictionary.rs
deleted file mode 100644
index 22add2494d2..00000000000
--- a/rust/arrow/src/array/equal/dictionary.rs
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::ArrowNativeType;
-use crate::util::bit_util::get_bit;
-
-use super::equal_range;
-
-pub(super) fn dictionary_equal<T: ArrowNativeType>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_keys = lhs.buffer::<T>(0);
-    let rhs_keys = rhs.buffer::<T>(0);
-
-    let lhs_values = &lhs.child_data()[0];
-    let rhs_values = &rhs.child_data()[0];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            equal_range(
-                lhs_values,
-                rhs_values,
-                lhs_values.null_buffer(),
-                rhs_values.null_buffer(),
-                lhs_keys[lhs_pos].to_usize().unwrap(),
-                rhs_keys[rhs_pos].to_usize().unwrap(),
-                1,
-            )
-        })
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_range(
-                        lhs_values,
-                        rhs_values,
-                        lhs_values.null_buffer(),
-                        rhs_values.null_buffer(),
-                        lhs_keys[lhs_pos].to_usize().unwrap(),
-                        rhs_keys[rhs_pos].to_usize().unwrap(),
-                        1,
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/fixed_binary.rs b/rust/arrow/src/array/equal/fixed_binary.rs
deleted file mode 100644
index 5f8f93232d5..00000000000
--- a/rust/arrow/src/array/equal/fixed_binary.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::util::bit_util::get_bit;
-
-use super::utils::equal_len;
-
-pub(super) fn fixed_binary_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let size = match lhs.data_type() {
-        DataType::FixedSizeBinary(i) => *i as usize,
-        _ => unreachable!(),
-    };
-
-    let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..];
-    let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_len(
-            lhs_values,
-            rhs_values,
-            size * lhs_start,
-            size * rhs_start,
-            size * len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_len(
-                        lhs_values,
-                        rhs_values,
-                        lhs_pos * size,
-                        rhs_pos * size,
-                        size, // 1 * size since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/fixed_list.rs b/rust/arrow/src/array/equal/fixed_list.rs
deleted file mode 100644
index e708a06efcd..00000000000
--- a/rust/arrow/src/array/equal/fixed_list.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::util::bit_util::get_bit;
-
-use super::equal_range;
-
-pub(super) fn fixed_list_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let size = match lhs.data_type() {
-        DataType::FixedSizeList(_, i) => *i as usize,
-        _ => unreachable!(),
-    };
-
-    let lhs_values = &lhs.child_data()[0];
-    let rhs_values = &rhs.child_data()[0];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_range(
-            lhs_values,
-            rhs_values,
-            lhs_values.null_buffer(),
-            rhs_values.null_buffer(),
-            size * lhs_start,
-            size * rhs_start,
-            size * len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_range(
-                        lhs_values,
-                        rhs_values,
-                        lhs_values.null_buffer(),
-                        rhs_values.null_buffer(),
-                        lhs_pos * size,
-                        rhs_pos * size,
-                        size, // 1 * size since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/list.rs b/rust/arrow/src/array/equal/list.rs
deleted file mode 100644
index 331cdc7c614..00000000000
--- a/rust/arrow/src/array/equal/list.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{
-    array::ArrayData,
-    array::{data::count_nulls, OffsetSizeTrait},
-    buffer::Buffer,
-    util::bit_util::get_bit,
-};
-
-use super::{equal_range, utils::child_logical_null_buffer};
-
-fn lengths_equal<T: OffsetSizeTrait>(lhs: &[T], rhs: &[T]) -> bool {
-    // invariant from `base_equal`
-    debug_assert_eq!(lhs.len(), rhs.len());
-
-    if lhs.is_empty() {
-        return true;
-    }
-
-    if lhs[0] == T::zero() && rhs[0] == T::zero() {
-        return lhs == rhs;
-    };
-
-    // The expensive case, e.g.
-    // [0, 2, 4, 6, 9] == [4, 6, 8, 10, 13]
-    lhs.windows(2)
-        .zip(rhs.windows(2))
-        .all(|(lhs_offsets, rhs_offsets)| {
-            // length of left == length of right
-            (lhs_offsets[1] - lhs_offsets[0]) == (rhs_offsets[1] - rhs_offsets[0])
-        })
-}
-
-#[allow(clippy::too_many_arguments)]
-#[inline]
-fn offset_value_equal<T: OffsetSizeTrait>(
-    lhs_values: &ArrayData,
-    rhs_values: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_offsets: &[T],
-    rhs_offsets: &[T],
-    lhs_pos: usize,
-    rhs_pos: usize,
-    len: usize,
-) -> bool {
-    let lhs_start = lhs_offsets[lhs_pos].to_usize().unwrap();
-    let rhs_start = rhs_offsets[rhs_pos].to_usize().unwrap();
-    let lhs_len = lhs_offsets[lhs_pos + len] - lhs_offsets[lhs_pos];
-    let rhs_len = rhs_offsets[rhs_pos + len] - rhs_offsets[rhs_pos];
-
-    lhs_len == rhs_len
-        && equal_range(
-            lhs_values,
-            rhs_values,
-            lhs_nulls,
-            rhs_nulls,
-            lhs_start,
-            rhs_start,
-            lhs_len.to_usize().unwrap(),
-        )
-}
-
-pub(super) fn list_equal<T: OffsetSizeTrait>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_offsets = lhs.buffer::<T>(0);
-    let rhs_offsets = rhs.buffer::<T>(0);
-
-    // There is an edge-case where a n-length list that has 0 children, results in panics.
-    // For example; an array with offsets [0, 0, 0, 0, 0] has 4 slots, but will have
-    // no valid children.
-    // Under logical equality, the child null bitmap will be an empty buffer, as there are
-    // no child values. This causes panics when trying to count set bits.
-    //
-    // We caught this by chance from an accidental test-case, but due to the nature of this
-    // crash only occuring on list equality checks, we are adding a check here, instead of
-    // on the buffer/bitmap utilities, as a length check would incur a penalty for almost all
-    // other use-cases.
-    //
-    // The solution is to check the number of child values from offsets, and return `true` if
-    // they = 0. Empty arrays are equal, so this is correct.
-    //
-    // It's unlikely that one would create a n-length list array with no values, where n > 0,
-    // however, one is more likely to slice into a list array and get a region that has 0
-    // child values.
-    // The test that triggered this behaviour had [4, 4] as a slice of 1 value slot.
-    let lhs_child_length = lhs_offsets.get(len).unwrap().to_usize().unwrap()
-        - lhs_offsets.first().unwrap().to_usize().unwrap();
-    let rhs_child_length = rhs_offsets.get(len).unwrap().to_usize().unwrap()
-        - rhs_offsets.first().unwrap().to_usize().unwrap();
-
-    if lhs_child_length == 0 && lhs_child_length == rhs_child_length {
-        return true;
-    }
-
-    let lhs_values = &lhs.child_data()[0];
-    let rhs_values = &rhs.child_data()[0];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    // compute the child logical bitmap
-    let child_lhs_nulls =
-        child_logical_null_buffer(lhs, lhs_nulls, lhs.child_data().get(0).unwrap());
-    let child_rhs_nulls =
-        child_logical_null_buffer(rhs, rhs_nulls, rhs.child_data().get(0).unwrap());
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        lengths_equal(
-            &lhs_offsets[lhs_start..lhs_start + len],
-            &rhs_offsets[rhs_start..rhs_start + len],
-        ) && equal_range(
-            lhs_values,
-            rhs_values,
-            child_lhs_nulls.as_ref(),
-            child_rhs_nulls.as_ref(),
-            lhs_offsets[lhs_start].to_usize().unwrap(),
-            rhs_offsets[rhs_start].to_usize().unwrap(),
-            (lhs_offsets[len] - lhs_offsets[lhs_start])
-                .to_usize()
-                .unwrap(),
-        )
-    } else {
-        // get a ref of the parent null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && offset_value_equal::<T>(
-                        lhs_values,
-                        rhs_values,
-                        child_lhs_nulls.as_ref(),
-                        child_rhs_nulls.as_ref(),
-                        lhs_offsets,
-                        rhs_offsets,
-                        lhs_pos,
-                        rhs_pos,
-                        1,
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs
deleted file mode 100644
index 0924fc193a6..00000000000
--- a/rust/arrow/src/array/equal/mod.rs
+++ /dev/null
@@ -1,1277 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Module containing functionality to compute array equality.
-//! This module uses [ArrayData] and does not
-//! depend on dynamic casting of `Array`.
-
-use super::{
-    Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray,
-    FixedSizeBinaryArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
-    GenericStringArray, NullArray, OffsetSizeTrait, PrimitiveArray,
-    StringOffsetSizeTrait, StructArray,
-};
-
-use crate::{
-    buffer::Buffer,
-    datatypes::{ArrowPrimitiveType, DataType, IntervalUnit},
-};
-
-mod boolean;
-mod decimal;
-mod dictionary;
-mod fixed_binary;
-mod fixed_list;
-mod list;
-mod null;
-mod primitive;
-mod structure;
-mod utils;
-mod variable_size;
-
-// these methods assume the same type, len and null count.
-// For this reason, they are not exposed and are instead used
-// to build the generic functions below (`equal_range` and `equal`).
-use boolean::boolean_equal;
-use decimal::decimal_equal;
-use dictionary::dictionary_equal;
-use fixed_binary::fixed_binary_equal;
-use fixed_list::fixed_list_equal;
-use list::list_equal;
-use null::null_equal;
-use primitive::primitive_equal;
-use structure::struct_equal;
-use variable_size::variable_sized_equal;
-
-impl PartialEq for dyn Array {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<T: Array> PartialEq<T> for dyn Array {
-    fn eq(&self, other: &T) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for NullArray {
-    fn eq(&self, other: &NullArray) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
-    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for BooleanArray {
-    fn eq(&self, other: &BooleanArray) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for FixedSizeBinaryArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for DecimalArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for FixedSizeListArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for StructArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-/// Compares the values of two [ArrayData] starting at `lhs_start` and `rhs_start` respectively
-/// for `len` slots. The null buffers `lhs_nulls` and `rhs_nulls` inherit parent nullability.
-///
-/// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
-/// This then affects the null count of the array, thus the merged nulls are passed separately
-/// as `lhs_nulls` and `rhs_nulls` variables to functions.
-/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
-#[inline]
-fn equal_values(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    match lhs.data_type() {
-        DataType::Null => null_equal(lhs, rhs, lhs_start, rhs_start, len),
-        DataType::Boolean => {
-            boolean_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::UInt8 => primitive_equal::<u8>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::UInt16 => primitive_equal::<u16>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::UInt32 => primitive_equal::<u32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::UInt64 => primitive_equal::<u64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int8 => primitive_equal::<i8>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int16 => primitive_equal::<i16>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int32 => primitive_equal::<i32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int64 => primitive_equal::<i64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Float32 => primitive_equal::<f32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Float64 => primitive_equal::<f64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => primitive_equal::<i32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Date64
-        | DataType::Interval(IntervalUnit::DayTime)
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_) => primitive_equal::<i64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Utf8 | DataType::Binary => variable_sized_equal::<i32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::LargeUtf8 | DataType::LargeBinary => variable_sized_equal::<i64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::FixedSizeBinary(_) => {
-            fixed_binary_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::Decimal(_, _) => {
-            decimal_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::List(_) => {
-            list_equal::<i32>(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::LargeList(_) => {
-            list_equal::<i64>(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::FixedSizeList(_, _) => {
-            fixed_list_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::Struct(_) => {
-            struct_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::Union(_) => unimplemented!("See ARROW-8576"),
-        DataType::Dictionary(data_type, _) => match data_type.as_ref() {
-            DataType::Int8 => dictionary_equal::<i8>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::Int16 => dictionary_equal::<i16>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::Int32 => dictionary_equal::<i32>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::Int64 => dictionary_equal::<i64>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt8 => dictionary_equal::<u8>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt16 => dictionary_equal::<u16>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt32 => dictionary_equal::<u32>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt64 => dictionary_equal::<u64>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            _ => unreachable!(),
-        },
-        DataType::Float16 => unreachable!(),
-    }
-}
-
-fn equal_range(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    utils::base_equal(lhs, rhs)
-        && utils::equal_nulls(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-}
-
-/// Logically compares two [ArrayData].
-/// Two arrays are logically equal if and only if:
-/// * their data types are equal
-/// * their lengths are equal
-/// * their null counts are equal
-/// * their null bitmaps are equal
-/// * each of their items are equal
-/// two items are equal when their in-memory representation is physically equal (i.e. same bit content).
-/// The physical comparison depend on the data type.
-/// # Panics
-/// This function may panic whenever any of the [ArrayData] does not follow the Arrow specification.
-/// (e.g. wrong number of buffers, buffer `len` does not correspond to the declared `len`)
-pub fn equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
-    let lhs_nulls = lhs.null_buffer();
-    let rhs_nulls = rhs.null_buffer();
-    utils::base_equal(lhs, rhs)
-        && lhs.null_count() == rhs.null_count()
-        && utils::equal_nulls(lhs, rhs, lhs_nulls, rhs_nulls, 0, 0, lhs.len())
-        && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, 0, 0, lhs.len())
-}
-
-#[cfg(test)]
-mod tests {
-    use std::convert::TryFrom;
-    use std::sync::Arc;
-
-    use crate::array::{
-        array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, BooleanArray,
-        DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray,
-        Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
-        StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
-    };
-    use crate::array::{GenericStringArray, Int32Array};
-    use crate::buffer::Buffer;
-    use crate::datatypes::{Field, Int16Type, ToByteSlice};
-
-    use super::*;
-
-    #[test]
-    fn test_null_equal() {
-        let a = NullArray::new(12);
-        let a = a.data();
-        let b = NullArray::new(12);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = NullArray::new(10);
-        let b = b.data();
-        test_equal(&a, &b, false);
-
-        // Test the case where offset != 0
-
-        let a_slice = a.slice(2, 3);
-        let b_slice = b.slice(1, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(5, 4);
-        let b_slice = b.slice(3, 3);
-        test_equal(&a_slice, &b_slice, false);
-    }
-
-    #[test]
-    fn test_boolean_equal() {
-        let a = BooleanArray::from(vec![false, false, true]);
-        let a = a.data();
-        let b = BooleanArray::from(vec![false, false, true]);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = BooleanArray::from(vec![false, false, false]);
-        let b = b.data();
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_boolean_equal_nulls() {
-        let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
-        let a = a.data();
-        let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = BooleanArray::from(vec![None, None, None, Some(true)]);
-        let b = b.data();
-        test_equal(&a, &b, false);
-
-        let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]);
-        let b = b.data();
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_boolean_equal_offset() {
-        let a = BooleanArray::from(vec![false, true, false, true, false, false, true]);
-        let a = a.data();
-        let b =
-            BooleanArray::from(vec![true, false, false, false, true, false, true, true]);
-        let b = b.data();
-        assert_eq!(equal(a, b), false);
-        assert_eq!(equal(b, a), false);
-
-        let a_slice = a.slice(2, 3);
-        let b_slice = b.slice(3, 3);
-        assert_eq!(equal(&a_slice, &b_slice), true);
-        assert_eq!(equal(&b_slice, &a_slice), true);
-
-        let a_slice = a.slice(3, 4);
-        let b_slice = b.slice(4, 4);
-        assert_eq!(equal(&a_slice, &b_slice), false);
-        assert_eq!(equal(&b_slice, &a_slice), false);
-
-        // Test the optimization cases where null_count == 0 and starts at 0 and len >= size_of(u8)
-
-        // Elements fill in `u8`'s exactly.
-        let mut vector = vec![false, false, true, true, true, true, true, true];
-        let a = BooleanArray::from(vector.clone());
-        let a = a.data();
-        let b = BooleanArray::from(vector.clone());
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        // Elements fill in `u8`s + suffix bits.
-        vector.push(true);
-        let a = BooleanArray::from(vector.clone());
-        let a = a.data();
-        let b = BooleanArray::from(vector);
-        let b = b.data();
-        test_equal(&a, &b, true);
-    }
-
-    #[test]
-    fn test_primitive() {
-        let cases = vec![
-            (
-                vec![Some(1), Some(2), Some(3)],
-                vec![Some(1), Some(2), Some(3)],
-                true,
-            ),
-            (
-                vec![Some(1), Some(2), Some(3)],
-                vec![Some(1), Some(2), Some(4)],
-                false,
-            ),
-            (
-                vec![Some(1), Some(2), None],
-                vec![Some(1), Some(2), None],
-                true,
-            ),
-            (
-                vec![Some(1), None, Some(3)],
-                vec![Some(1), Some(2), None],
-                false,
-            ),
-            (
-                vec![Some(1), None, None],
-                vec![Some(1), Some(2), None],
-                false,
-            ),
-        ];
-
-        for (lhs, rhs, expected) in cases {
-            let lhs = Int32Array::from(lhs);
-            let lhs = lhs.data();
-            let rhs = Int32Array::from(rhs);
-            let rhs = rhs.data();
-            test_equal(&lhs, &rhs, expected);
-        }
-    }
-
-    #[test]
-    fn test_primitive_slice() {
-        let cases = vec![
-            (
-                vec![Some(1), Some(2), Some(3)],
-                (0, 1),
-                vec![Some(1), Some(2), Some(3)],
-                (0, 1),
-                true,
-            ),
-            (
-                vec![Some(1), Some(2), Some(3)],
-                (1, 1),
-                vec![Some(1), Some(2), Some(3)],
-                (2, 1),
-                false,
-            ),
-            (
-                vec![Some(1), Some(2), None],
-                (1, 1),
-                vec![Some(1), None, Some(2)],
-                (2, 1),
-                true,
-            ),
-            (
-                vec![None, Some(2), None],
-                (1, 1),
-                vec![None, None, Some(2)],
-                (2, 1),
-                true,
-            ),
-            (
-                vec![Some(1), None, Some(2), None, Some(3)],
-                (2, 2),
-                vec![None, Some(2), None, Some(3)],
-                (1, 2),
-                true,
-            ),
-        ];
-
-        for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases {
-            let lhs = Int32Array::from(lhs);
-            let lhs = lhs.data();
-            let lhs = lhs.slice(slice_lhs.0, slice_lhs.1);
-            let rhs = Int32Array::from(rhs);
-            let rhs = rhs.data();
-            let rhs = rhs.slice(slice_rhs.0, slice_rhs.1);
-
-            test_equal(&lhs, &rhs, expected);
-        }
-    }
-
-    fn test_equal(lhs: &ArrayData, rhs: &ArrayData, expected: bool) {
-        // equality is symmetric
-        assert_eq!(equal(lhs, lhs), true, "\n{:?}\n{:?}", lhs, lhs);
-        assert_eq!(equal(rhs, rhs), true, "\n{:?}\n{:?}", rhs, rhs);
-
-        assert_eq!(equal(lhs, rhs), expected, "\n{:?}\n{:?}", lhs, rhs);
-        assert_eq!(equal(rhs, lhs), expected, "\n{:?}\n{:?}", rhs, lhs);
-    }
-
-    fn binary_cases() -> Vec<(Vec<Option<String>>, Vec<Option<String>>, bool)> {
-        let base = vec![
-            Some("hello".to_owned()),
-            None,
-            None,
-            Some("world".to_owned()),
-            None,
-            None,
-        ];
-        let not_base = vec![
-            Some("hello".to_owned()),
-            Some("foo".to_owned()),
-            None,
-            Some("world".to_owned()),
-            None,
-            None,
-        ];
-        vec![
-            (
-                vec![Some("hello".to_owned()), Some("world".to_owned())],
-                vec![Some("hello".to_owned()), Some("world".to_owned())],
-                true,
-            ),
-            (
-                vec![Some("hello".to_owned()), Some("world".to_owned())],
-                vec![Some("hello".to_owned()), Some("arrow".to_owned())],
-                false,
-            ),
-            (base.clone(), base.clone(), true),
-            (base, not_base, false),
-        ]
-    }
-
-    fn test_generic_string_equal<OffsetSize: StringOffsetSizeTrait>() {
-        let cases = binary_cases();
-
-        for (lhs, rhs, expected) in cases {
-            let lhs = lhs.iter().map(|x| x.as_deref()).collect();
-            let rhs = rhs.iter().map(|x| x.as_deref()).collect();
-            let lhs = GenericStringArray::<OffsetSize>::from_opt_vec(lhs);
-            let lhs = lhs.data();
-            let rhs = GenericStringArray::<OffsetSize>::from_opt_vec(rhs);
-            let rhs = rhs.data();
-            test_equal(lhs, rhs, expected);
-        }
-    }
-
-    #[test]
-    fn test_string_equal() {
-        test_generic_string_equal::<i32>()
-    }
-
-    #[test]
-    fn test_large_string_equal() {
-        test_generic_string_equal::<i64>()
-    }
-
-    fn test_generic_binary_equal<OffsetSize: BinaryOffsetSizeTrait>() {
-        let cases = binary_cases();
-
-        for (lhs, rhs, expected) in cases {
-            let lhs = lhs
-                .iter()
-                .map(|x| x.as_deref().map(|x| x.as_bytes()))
-                .collect();
-            let rhs = rhs
-                .iter()
-                .map(|x| x.as_deref().map(|x| x.as_bytes()))
-                .collect();
-            let lhs = GenericBinaryArray::<OffsetSize>::from_opt_vec(lhs);
-            let lhs = lhs.data();
-            let rhs = GenericBinaryArray::<OffsetSize>::from_opt_vec(rhs);
-            let rhs = rhs.data();
-            test_equal(lhs, rhs, expected);
-        }
-    }
-
-    #[test]
-    fn test_binary_equal() {
-        test_generic_binary_equal::<i32>()
-    }
-
-    #[test]
-    fn test_large_binary_equal() {
-        test_generic_binary_equal::<i64>()
-    }
-
-    #[test]
-    fn test_string_offset() {
-        let a = StringArray::from(vec![Some("a"), None, Some("b")]);
-        let a = a.data();
-        let a = a.slice(2, 1);
-        let b = StringArray::from(vec![Some("b")]);
-        let b = b.data();
-
-        test_equal(&a, &b, true);
-    }
-
-    #[test]
-    fn test_string_offset_larger() {
-        let a = StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]);
-        let a = a.data();
-        let b = StringArray::from(vec![None, Some("b"), None, Some("c")]);
-        let b = b.data();
-
-        test_equal(&a.slice(2, 2), &b.slice(0, 2), false);
-        test_equal(&a.slice(2, 2), &b.slice(1, 2), true);
-        test_equal(&a.slice(2, 2), &b.slice(2, 2), false);
-    }
-
-    #[test]
-    fn test_null() {
-        let a = NullArray::new(2);
-        let a = a.data();
-        let b = NullArray::new(2);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = NullArray::new(1);
-        let b = b.data();
-        test_equal(&a, &b, false);
-    }
-
-    fn create_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(data: T) -> ArrayData {
-        let mut builder = ListBuilder::new(Int32Builder::new(10));
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref()).unwrap();
-                builder.append(true).unwrap()
-            } else {
-                builder.append(false).unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_list_equal() {
-        let a = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        test_equal(&a, &b, true);
-
-        let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_list_null() {
-        let a =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
-        let b =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
-        test_equal(&a, &b, true);
-
-        let b = create_list_array(&[
-            Some(&[1, 2]),
-            None,
-            Some(&[5, 6]),
-            Some(&[3, 4]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, false);
-
-        let b =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
-        test_equal(&a, &b, false);
-
-        // a list where the nullness of values is determined by the list's bitmap
-        let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]);
-        let c = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(6)
-        .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
-        .add_child_data(c_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00001001]))
-        .build();
-
-        let d_values = Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            Some(4),
-            None,
-            None,
-        ]);
-        let d = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(6)
-        .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
-        .add_child_data(d_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00001001]))
-        .build();
-        test_equal(&c, &d, true);
-    }
-
-    // Test the case where offset != 0
-    #[test]
-    fn test_list_offsets() {
-        let a =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
-        let b =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(0, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(0, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(4, 1);
-        test_equal(&a_slice, &b_slice, true);
-    }
-
-    fn create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>(
-        data: T,
-    ) -> ArrayData {
-        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
-
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.append_value(v.as_ref()).unwrap();
-            } else {
-                builder.append_null().unwrap();
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_fixed_size_binary_equal() {
-        let a = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]);
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"arrow")]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_fixed_size_binary_null() {
-        let a = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]);
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world"), None]);
-        test_equal(&a, &b, false);
-
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"arrow")]);
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_fixed_size_binary_offsets() {
-        // Test the case where offset != 0
-        let a = create_fixed_size_binary_array(&[
-            Some(b"hello"),
-            None,
-            None,
-            Some(b"world"),
-            None,
-            None,
-        ]);
-        let b = create_fixed_size_binary_array(&[
-            Some(b"hello"),
-            None,
-            None,
-            Some(b"arrow"),
-            None,
-            None,
-        ]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(0, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(0, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(4, 1);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(3, 1);
-        let b_slice = b.slice(3, 1);
-        test_equal(&a_slice, &b_slice, false);
-    }
-
-    fn create_decimal_array(data: &[Option<i128>]) -> ArrayData {
-        let mut builder = DecimalBuilder::new(20, 23, 6);
-
-        for d in data {
-            if let Some(v) = d {
-                builder.append_value(*v).unwrap();
-            } else {
-                builder.append_null().unwrap();
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_decimal_equal() {
-        let a = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]);
-        let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]);
-        test_equal(&a, &b, true);
-
-        let b = create_decimal_array(&[Some(15_887_000_000), Some(-8_887_000_000)]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_decimal_null() {
-        let a = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]);
-        let b = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]);
-        test_equal(&a, &b, true);
-
-        let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000), None]);
-        test_equal(&a, &b, false);
-
-        let b = create_decimal_array(&[Some(15_887_000_000), None, Some(-8_887_000_000)]);
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_decimal_offsets() {
-        // Test the case where offset != 0
-        let a = create_decimal_array(&[
-            Some(8_887_000_000),
-            None,
-            None,
-            Some(-8_887_000_000),
-            None,
-            None,
-        ]);
-        let b = create_decimal_array(&[
-            None,
-            Some(8_887_000_000),
-            None,
-            None,
-            Some(15_887_000_000),
-            None,
-            None,
-        ]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(1, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(1, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(5, 1);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(3, 3);
-        let b_slice = b.slice(4, 3);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(1, 3);
-        let b_slice = b.slice(2, 3);
-        test_equal(&a_slice, &b_slice, false);
-
-        let b = create_decimal_array(&[
-            None,
-            None,
-            None,
-            Some(-8_887_000_000),
-            Some(-3_000),
-            None,
-        ]);
-        let a_slice = a.slice(1, 3);
-        let b_slice = b.slice(1, 3);
-        test_equal(&a_slice, &b_slice, true);
-    }
-
-    /// Create a fixed size list of 2 value lengths
-    fn create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
-        data: T,
-    ) -> ArrayData {
-        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(10), 3);
-
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref()).unwrap();
-                builder.append(true).unwrap()
-            } else {
-                for _ in 0..builder.value_length() {
-                    builder.values().append_null().unwrap();
-                }
-                builder.append(false).unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_fixed_size_list_equal() {
-        let a = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_fixed_list_null() {
-        let a = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            Some(&[7, 8, 9]),
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, false);
-
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[3, 6, 9]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_fixed_list_offsets() {
-        // Test the case where offset != 0
-        let a = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[3, 6, 9]),
-            None,
-            None,
-        ]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(0, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(0, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(4, 1);
-        test_equal(&a_slice, &b_slice, true);
-    }
-
-    #[test]
-    fn test_struct_equal() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let a =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-        let a = a.data();
-
-        let b = StructArray::try_from(vec![("f1", strings), ("f2", ints)]).unwrap();
-        let b = b.data();
-
-        test_equal(&a, &b, true);
-    }
-
-    #[test]
-    fn test_struct_equal_null() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-        let ints_non_null: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 0]));
-
-        let a = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(ints.data_ref().clone())
-        .build();
-        let a = crate::array::make_array(a);
-
-        let b = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(ints_non_null.data_ref().clone())
-        .build();
-        let b = crate::array::make_array(b);
-
-        test_equal(a.data_ref(), b.data_ref(), true);
-
-        // test with arrays that are not equal
-        let c_ints_non_null: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 0, 4]));
-        let c = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(c_ints_non_null.data_ref().clone())
-        .build();
-        let c = crate::array::make_array(c);
-
-        test_equal(a.data_ref(), c.data_ref(), false);
-
-        // test a nested struct
-        let a = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f3",
-            a.data_type().clone(),
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00011110]))
-        .len(5)
-        .add_child_data(a.data_ref().clone())
-        .build();
-        let a = crate::array::make_array(a);
-
-        // reconstruct b, but with different data where the first struct is null
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joanne"), // difference
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let b = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(ints_non_null.data_ref().clone())
-        .build();
-
-        let b = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f3",
-            b.data_type().clone(),
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00011110]))
-        .len(5)
-        .add_child_data(b)
-        .build();
-        let b = crate::array::make_array(b);
-
-        test_equal(a.data_ref(), b.data_ref(), true);
-    }
-
-    #[test]
-    fn test_struct_equal_null_variable_size() {
-        // the string arrays differ, but where the struct array is null
-        let strings1: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doel"),
-        ]));
-        let strings2: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joel"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-
-        let a = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f1",
-            DataType::Utf8,
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00001010]))
-        .len(5)
-        .add_child_data(strings1.data_ref().clone())
-        .build();
-        let a = crate::array::make_array(a);
-
-        let b = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f1",
-            DataType::Utf8,
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00001010]))
-        .len(5)
-        .add_child_data(strings2.data_ref().clone())
-        .build();
-        let b = crate::array::make_array(b);
-
-        test_equal(a.data_ref(), b.data_ref(), true);
-
-        // test with arrays that are not equal
-        let strings3: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("mark"),
-            None,
-            None,
-            Some("doe"),
-            Some("joe"),
-        ]));
-        let c = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f1",
-            DataType::Utf8,
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings3.data_ref().clone())
-        .build();
-        let c = crate::array::make_array(c);
-
-        test_equal(a.data_ref(), c.data_ref(), false);
-    }
-
-    fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData {
-        let values = StringArray::from(values.to_vec());
-        let mut builder = StringDictionaryBuilder::new_with_dictionary(
-            PrimitiveBuilder::<Int16Type>::new(3),
-            &values,
-        )
-        .unwrap();
-        for key in keys {
-            if let Some(v) = key {
-                builder.append(v).unwrap();
-            } else {
-                builder.append_null().unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_dictionary_equal() {
-        // (a, b, c), (1, 2, 1, 3) => (a, b, a, c)
-        let a = create_dictionary_array(
-            &["a", "b", "c"],
-            &[Some("a"), Some("b"), Some("a"), Some("c")],
-        );
-        // different representation (values and keys are swapped), same result
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), Some("b"), Some("a"), Some("c")],
-        );
-        test_equal(&a, &b, true);
-
-        // different len
-        let b =
-            create_dictionary_array(&["a", "c", "b"], &[Some("a"), Some("b"), Some("a")]);
-        test_equal(&a, &b, false);
-
-        // different key
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), Some("b"), Some("a"), Some("a")],
-        );
-        test_equal(&a, &b, false);
-
-        // different values, same keys
-        let b = create_dictionary_array(
-            &["a", "b", "d"],
-            &[Some("a"), Some("b"), Some("a"), Some("d")],
-        );
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_dictionary_equal_null() {
-        // (a, b, c), (1, 2, 1, 3) => (a, b, a, c)
-        let a = create_dictionary_array(
-            &["a", "b", "c"],
-            &[Some("a"), None, Some("a"), Some("c")],
-        );
-
-        // equal to self
-        test_equal(&a, &a, true);
-
-        // different representation (values and keys are swapped), same result
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), None, Some("a"), Some("c")],
-        );
-        test_equal(&a, &b, true);
-
-        // different null position
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), Some("b"), Some("a"), None],
-        );
-        test_equal(&a, &b, false);
-
-        // different key
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), None, Some("a"), Some("a")],
-        );
-        test_equal(&a, &b, false);
-
-        // different values, same keys
-        let b = create_dictionary_array(
-            &["a", "b", "d"],
-            &[Some("a"), None, Some("a"), Some("d")],
-        );
-        test_equal(&a, &b, false);
-    }
-}
diff --git a/rust/arrow/src/array/equal/null.rs b/rust/arrow/src/array/equal/null.rs
deleted file mode 100644
index f287a382507..00000000000
--- a/rust/arrow/src/array/equal/null.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-#[inline]
-pub(super) fn null_equal(
-    _lhs: &ArrayData,
-    _rhs: &ArrayData,
-    _lhs_start: usize,
-    _rhs_start: usize,
-    _len: usize,
-) -> bool {
-    // a null buffer's range is always true, as every element is by definition equal (to null).
-    // We only need to compare data_types
-    true
-}
diff --git a/rust/arrow/src/array/equal/primitive.rs b/rust/arrow/src/array/equal/primitive.rs
deleted file mode 100644
index db7587915c8..00000000000
--- a/rust/arrow/src/array/equal/primitive.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::mem::size_of;
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::util::bit_util::get_bit;
-
-use super::utils::equal_len;
-
-pub(super) fn primitive_equal<T>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let byte_width = size_of::<T>();
-    let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * byte_width..];
-    let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * byte_width..];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        // without nulls, we just need to compare slices
-        equal_len(
-            lhs_values,
-            rhs_values,
-            lhs_start * byte_width,
-            rhs_start * byte_width,
-            len * byte_width,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_len(
-                        lhs_values,
-                        rhs_values,
-                        lhs_pos * byte_width,
-                        rhs_pos * byte_width,
-                        byte_width, // 1 * byte_width since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs
deleted file mode 100644
index b3cc4029e9e..00000000000
--- a/rust/arrow/src/array/equal/structure.rs
+++ /dev/null
@@ -1,90 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{
-    array::data::count_nulls, array::ArrayData, buffer::Buffer, util::bit_util::get_bit,
-};
-
-use super::{equal_range, utils::child_logical_null_buffer};
-
-/// Compares the values of two [ArrayData] starting at `lhs_start` and `rhs_start` respectively
-/// for `len` slots. The null buffers `lhs_nulls` and `rhs_nulls` inherit parent nullability.
-///
-/// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
-/// This then affects the null count of the array, thus the merged nulls are passed separately
-/// as `lhs_nulls` and `rhs_nulls` variables to functions.
-/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
-fn equal_values(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    lhs.child_data()
-        .iter()
-        .zip(rhs.child_data())
-        .all(|(lhs_values, rhs_values)| {
-            // merge the null data
-            let lhs_merged_nulls = child_logical_null_buffer(lhs, lhs_nulls, lhs_values);
-            let rhs_merged_nulls = child_logical_null_buffer(rhs, rhs_nulls, rhs_values);
-            equal_range(
-                lhs_values,
-                rhs_values,
-                lhs_merged_nulls.as_ref(),
-                rhs_merged_nulls.as_ref(),
-                lhs_start,
-                rhs_start,
-                len,
-            )
-        })
-}
-
-pub(super) fn struct_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    // we have to recalculate null counts from the null buffers
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-            // if both struct and child had no null buffers,
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_pos, rhs_pos, 1)
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/utils.rs b/rust/arrow/src/array/equal/utils.rs
deleted file mode 100644
index d0108d23649..00000000000
--- a/rust/arrow/src/array/equal/utils.rs
+++ /dev/null
@@ -1,264 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData, OffsetSizeTrait};
-use crate::bitmap::Bitmap;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::DataType;
-use crate::util::bit_util;
-
-// whether bits along the positions are equal
-// `lhs_start`, `rhs_start` and `len` are _measured in bits_.
-#[inline]
-pub(super) fn equal_bits(
-    lhs_values: &[u8],
-    rhs_values: &[u8],
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    (0..len).all(|i| {
-        bit_util::get_bit(lhs_values, lhs_start + i)
-            == bit_util::get_bit(rhs_values, rhs_start + i)
-    })
-}
-
-#[inline]
-pub(super) fn equal_nulls(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-    if lhs_null_count > 0 || rhs_null_count > 0 {
-        let lhs_values = lhs_nulls.unwrap().as_slice();
-        let rhs_values = rhs_nulls.unwrap().as_slice();
-        equal_bits(
-            lhs_values,
-            rhs_values,
-            lhs_start + lhs.offset(),
-            rhs_start + rhs.offset(),
-            len,
-        )
-    } else {
-        true
-    }
-}
-
-#[inline]
-pub(super) fn base_equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
-    lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len()
-}
-
-// whether the two memory regions are equal
-#[inline]
-pub(super) fn equal_len(
-    lhs_values: &[u8],
-    rhs_values: &[u8],
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    lhs_values[lhs_start..(lhs_start + len)] == rhs_values[rhs_start..(rhs_start + len)]
-}
-
-/// Computes the logical validity bitmap of the array data using the
-/// parent's array data. The parent should be a list or struct, else
-/// the logical bitmap of the array is returned unaltered.
-///
-/// Parent data is passed along with the parent's logical bitmap, as
-/// nested arrays could have a logical bitmap different to the physical
-/// one on the `ArrayData`.
-pub(super) fn child_logical_null_buffer(
-    parent_data: &ArrayData,
-    logical_null_buffer: Option<&Buffer>,
-    child_data: &ArrayData,
-) -> Option<Buffer> {
-    let parent_len = parent_data.len();
-    let parent_bitmap = logical_null_buffer
-        .cloned()
-        .map(Bitmap::from)
-        .unwrap_or_else(|| {
-            let ceil = bit_util::ceil(parent_len, 8);
-            Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
-        });
-    let self_null_bitmap = child_data.null_bitmap().clone().unwrap_or_else(|| {
-        let ceil = bit_util::ceil(child_data.len(), 8);
-        Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
-    });
-    match parent_data.data_type() {
-        DataType::List(_) => Some(logical_list_bitmap::<i32>(
-            parent_data,
-            parent_bitmap,
-            self_null_bitmap,
-        )),
-        DataType::LargeList(_) => Some(logical_list_bitmap::<i64>(
-            parent_data,
-            parent_bitmap,
-            self_null_bitmap,
-        )),
-        DataType::FixedSizeList(_, len) => {
-            let len = *len as usize;
-            let array_offset = parent_data.offset();
-            let bitmap_len = bit_util::ceil(parent_len * len, 8);
-            let mut buffer = MutableBuffer::from_len_zeroed(bitmap_len);
-            let mut null_slice = buffer.as_slice_mut();
-            (array_offset..parent_len + array_offset).for_each(|index| {
-                let start = index * len;
-                let end = start + len;
-                let mask = parent_bitmap.is_set(index);
-                (start..end).for_each(|child_index| {
-                    if mask && self_null_bitmap.is_set(child_index) {
-                        bit_util::set_bit(&mut null_slice, child_index);
-                    }
-                });
-            });
-            Some(buffer.into())
-        }
-        DataType::Struct(_) => {
-            // Arrow implementations are free to pad data, which can result in null buffers not
-            // having the same length.
-            // Rust bitwise comparisons will return an error if left AND right is performed on
-            // buffers of different length.
-            // This might be a valid case during integration testing, where we read Arrow arrays
-            // from IPC data, which has padding.
-            //
-            // We first perform a bitwise comparison, and if there is an error, we revert to a
-            // slower method that indexes into the buffers one-by-one.
-            let result = &parent_bitmap & &self_null_bitmap;
-            if let Ok(bitmap) = result {
-                return Some(bitmap.bits);
-            }
-            // slow path
-            let array_offset = parent_data.offset();
-            let mut buffer = MutableBuffer::new_null(parent_len);
-            let mut null_slice = buffer.as_slice_mut();
-            (0..parent_len).for_each(|index| {
-                if parent_bitmap.is_set(index + array_offset)
-                    && self_null_bitmap.is_set(index + array_offset)
-                {
-                    bit_util::set_bit(&mut null_slice, index);
-                }
-            });
-            Some(buffer.into())
-        }
-        DataType::Union(_) => {
-            unimplemented!("Logical equality not yet implemented for union arrays")
-        }
-        DataType::Dictionary(_, _) => {
-            unimplemented!("Logical equality not yet implemented for nested dictionaries")
-        }
-        data_type => panic!("Data type {:?} is not a supported nested type", data_type),
-    }
-}
-
-// Calculate a list child's logical bitmap/buffer
-#[inline]
-fn logical_list_bitmap<OffsetSize: OffsetSizeTrait>(
-    parent_data: &ArrayData,
-    parent_bitmap: Bitmap,
-    child_bitmap: Bitmap,
-) -> Buffer {
-    let offsets = parent_data.buffer::<OffsetSize>(0);
-    let offset_start = offsets.first().unwrap().to_usize().unwrap();
-    let offset_len = offsets.get(parent_data.len()).unwrap().to_usize().unwrap();
-    let mut buffer = MutableBuffer::new_null(offset_len - offset_start);
-    let mut null_slice = buffer.as_slice_mut();
-
-    offsets
-        .windows(2)
-        .enumerate()
-        .take(offset_len - offset_start)
-        .for_each(|(index, window)| {
-            let start = window[0].to_usize().unwrap();
-            let end = window[1].to_usize().unwrap();
-            let mask = parent_bitmap.is_set(index);
-            (start..end).for_each(|child_index| {
-                if mask && child_bitmap.is_set(child_index) {
-                    bit_util::set_bit(&mut null_slice, child_index - offset_start);
-                }
-            });
-        });
-    buffer.into()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::datatypes::{Field, ToByteSlice};
-
-    #[test]
-    fn test_logical_null_buffer() {
-        let child_data = ArrayData::builder(DataType::Int32)
-            .len(11)
-            .add_buffer(Buffer::from(
-                vec![1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_byte_slice(),
-            ))
-            .build();
-
-        let data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            false,
-        ))))
-        .len(7)
-        .add_buffer(Buffer::from(vec![0, 0, 3, 5, 6, 9, 10, 11].to_byte_slice()))
-        .null_bit_buffer(Buffer::from(vec![0b01011010]))
-        .add_child_data(child_data.clone())
-        .build();
-
-        // Get the child logical null buffer. The child is non-nullable, but because the list has nulls,
-        // we expect the child to logically have some nulls, inherited from the parent:
-        // [1, 2, 3, null, null, 6, 7, 8, 9, null, 11]
-        let nulls = child_logical_null_buffer(
-            &data,
-            data.null_buffer(),
-            data.child_data().get(0).unwrap(),
-        );
-        let expected = Some(Buffer::from(vec![0b11100111, 0b00000101]));
-        assert_eq!(nulls, expected);
-
-        // test with offset
-        let data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            false,
-        ))))
-        .len(4)
-        .offset(3)
-        .add_buffer(Buffer::from(vec![0, 0, 3, 5, 6, 9, 10, 11].to_byte_slice()))
-        // the null_bit_buffer doesn't have an offset, i.e. cleared the 3 offset bits 0b[---]01011[010]
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .add_child_data(child_data)
-        .build();
-
-        let nulls = child_logical_null_buffer(
-            &data,
-            data.null_buffer(),
-            data.child_data().get(0).unwrap(),
-        );
-
-        let expected = Some(Buffer::from(vec![0b00101111]));
-        assert_eq!(nulls, expected);
-    }
-}
diff --git a/rust/arrow/src/array/equal/variable_size.rs b/rust/arrow/src/array/equal/variable_size.rs
deleted file mode 100644
index ecb3bc2a3c2..00000000000
--- a/rust/arrow/src/array/equal/variable_size.rs
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::buffer::Buffer;
-use crate::util::bit_util::get_bit;
-use crate::{
-    array::data::count_nulls,
-    array::{ArrayData, OffsetSizeTrait},
-};
-
-use super::utils::equal_len;
-
-fn offset_value_equal<T: OffsetSizeTrait>(
-    lhs_values: &[u8],
-    rhs_values: &[u8],
-    lhs_offsets: &[T],
-    rhs_offsets: &[T],
-    lhs_pos: usize,
-    rhs_pos: usize,
-    len: usize,
-) -> bool {
-    let lhs_start = lhs_offsets[lhs_pos].to_usize().unwrap();
-    let rhs_start = rhs_offsets[rhs_pos].to_usize().unwrap();
-    let lhs_len = lhs_offsets[lhs_pos + len] - lhs_offsets[lhs_pos];
-    let rhs_len = rhs_offsets[rhs_pos + len] - rhs_offsets[rhs_pos];
-
-    lhs_len == rhs_len
-        && equal_len(
-            lhs_values,
-            rhs_values,
-            lhs_start,
-            rhs_start,
-            lhs_len.to_usize().unwrap(),
-        )
-}
-
-pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_offsets = lhs.buffer::<T>(0);
-    let rhs_offsets = rhs.buffer::<T>(0);
-
-    // the offsets of the `ArrayData` are ignored as they are only applied to the offset buffer.
-    let lhs_values = lhs.buffers()[1].as_slice();
-    let rhs_values = rhs.buffers()[1].as_slice();
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0
-        && rhs_null_count == 0
-        && !lhs_values.is_empty()
-        && !rhs_values.is_empty()
-    {
-        offset_value_equal(
-            lhs_values,
-            rhs_values,
-            lhs_offsets,
-            rhs_offsets,
-            lhs_start,
-            rhs_start,
-            len,
-        )
-    } else {
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            // the null bits can still be `None`, so we don't unwrap
-            let lhs_is_null = !lhs_nulls
-                .map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
-                .unwrap_or(false);
-            let rhs_is_null = !rhs_nulls
-                .map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
-                .unwrap_or(false);
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && offset_value_equal(
-                        lhs_values,
-                        rhs_values,
-                        lhs_offsets,
-                        rhs_offsets,
-                        lhs_pos,
-                        rhs_pos,
-                        1,
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal_json.rs b/rust/arrow/src/array/equal_json.rs
deleted file mode 100644
index 043174b9ac8..00000000000
--- a/rust/arrow/src/array/equal_json.rs
+++ /dev/null
@@ -1,1113 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::*;
-use crate::datatypes::*;
-use array::Array;
-use hex::FromHex;
-use serde_json::value::Value::{Null as JNull, Object, String as JString};
-use serde_json::Value;
-
-/// Trait for comparing arrow array with json array
-pub trait JsonEqual {
-    /// Checks whether arrow array equals to json array.
-    fn equals_json(&self, json: &[&Value]) -> bool;
-
-    /// Checks whether arrow array equals to json array.
-    fn equals_json_values(&self, json: &[Value]) -> bool {
-        let refs = json.iter().collect::<Vec<&Value>>();
-
-        self.equals_json(&refs)
-    }
-}
-
-/// Implement array equals for numeric type
-impl<T: ArrowPrimitiveType> JsonEqual for PrimitiveArray<T> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        self.len() == json.len()
-            && (0..self.len()).all(|i| match json[i] {
-                Value::Null => self.is_null(i),
-                v => {
-                    self.is_valid(i)
-                        && Some(v) == self.value(i).into_json_value().as_ref()
-                }
-            })
-    }
-}
-
-/// Implement array equals for numeric type
-impl JsonEqual for BooleanArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        self.len() == json.len()
-            && (0..self.len()).all(|i| match json[i] {
-                Value::Null => self.is_null(i),
-                v => {
-                    self.is_valid(i)
-                        && Some(v) == self.value(i).into_json_value().as_ref()
-                }
-            })
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<Value> for PrimitiveArray<T> {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(array) => self.equals_json_values(&array),
-            _ => false,
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<PrimitiveArray<T>> for Value {
-    fn eq(&self, arrow: &PrimitiveArray<T>) -> bool {
-        match self {
-            Value::Array(array) => arrow.equals_json_values(&array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> JsonEqual for GenericListArray<OffsetSize> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            Value::Array(v) => self.is_valid(i) && self.value(i).equals_json_values(v),
-            Value::Null => self.is_null(i) || self.value_length(i).is_zero(),
-            _ => false,
-        })
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> PartialEq<Value> for GenericListArray<OffsetSize> {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> PartialEq<GenericListArray<OffsetSize>> for Value {
-    fn eq(&self, arrow: &GenericListArray<OffsetSize>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> JsonEqual for DictionaryArray<T> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        // todo: this is wrong: we must test the values also
-        self.keys().equals_json(json)
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<Value> for DictionaryArray<T> {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<DictionaryArray<T>> for Value {
-    fn eq(&self, arrow: &DictionaryArray<T>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for FixedSizeListArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            Value::Array(v) => self.is_valid(i) && self.value(i).equals_json_values(v),
-            Value::Null => self.is_null(i) || self.value_length() == 0,
-            _ => false,
-        })
-    }
-}
-
-impl PartialEq<Value> for FixedSizeListArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<FixedSizeListArray> for Value {
-    fn eq(&self, arrow: &FixedSizeListArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for StructArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        let all_object = json.iter().all(|v| matches!(v, Object(_) | JNull));
-
-        if !all_object {
-            return false;
-        }
-
-        for column_name in self.column_names() {
-            let json_values = json
-                .iter()
-                .map(|obj| obj.get(column_name).unwrap_or(&Value::Null))
-                .collect::<Vec<&Value>>();
-
-            if !self
-                .column_by_name(column_name)
-                .map(|arr| arr.equals_json(&json_values))
-                .unwrap_or(false)
-            {
-                return false;
-            }
-        }
-
-        true
-    }
-}
-
-impl PartialEq<Value> for StructArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<StructArray> for Value {
-    fn eq(&self, arrow: &StructArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> JsonEqual for GenericBinaryArray<OffsetSize> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => {
-                // binary data is sometimes hex encoded, this checks if bytes are equal,
-                // and if not converting to hex is attempted
-                self.is_valid(i)
-                    && (s.as_str().as_bytes() == self.value(i)
-                        || Vec::from_hex(s.as_str()) == Ok(self.value(i).to_vec()))
-            }
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq<Value>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq<GenericBinaryArray<OffsetSize>>
-    for Value
-{
-    fn eq(&self, arrow: &GenericBinaryArray<OffsetSize>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> JsonEqual for GenericStringArray<OffsetSize> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => self.is_valid(i) && s.as_str() == self.value(i),
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> PartialEq<Value>
-    for GenericStringArray<OffsetSize>
-{
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> PartialEq<GenericStringArray<OffsetSize>>
-    for Value
-{
-    fn eq(&self, arrow: &GenericStringArray<OffsetSize>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for FixedSizeBinaryArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => {
-                // binary data is sometimes hex encoded, this checks if bytes are equal,
-                // and if not converting to hex is attempted
-                self.is_valid(i)
-                    && (s.as_str().as_bytes() == self.value(i)
-                        || Vec::from_hex(s.as_str()) == Ok(self.value(i).to_vec()))
-            }
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl PartialEq<Value> for FixedSizeBinaryArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<FixedSizeBinaryArray> for Value {
-    fn eq(&self, arrow: &FixedSizeBinaryArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for DecimalArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => {
-                self.is_valid(i)
-                    && (s
-                        .parse::<i128>()
-                        .map_or_else(|_| false, |v| v == self.value(i)))
-            }
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl PartialEq<Value> for DecimalArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<DecimalArray> for Value {
-    fn eq(&self, arrow: &DecimalArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for UnionArray {
-    fn equals_json(&self, _json: &[&Value]) -> bool {
-        unimplemented!(
-            "Added to allow UnionArray to implement the Array trait: see ARROW-8547"
-        )
-    }
-}
-
-impl JsonEqual for NullArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        // all JSON values must be nulls
-        json.iter().all(|&v| v == &JNull)
-    }
-}
-
-impl PartialEq<NullArray> for Value {
-    fn eq(&self, arrow: &NullArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<Value> for NullArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::error::Result;
-    use std::{convert::TryFrom, sync::Arc};
-
-    fn create_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
-        builder: &mut ListBuilder<Int32Builder>,
-        data: T,
-    ) -> Result<ListArray> {
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref())?;
-                builder.append(true)?
-            } else {
-                builder.append(false)?
-            }
-        }
-        Ok(builder.finish())
-    }
-
-    /// Create a fixed size list of 2 value lengths
-    fn create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
-        builder: &mut FixedSizeListBuilder<Int32Builder>,
-        data: T,
-    ) -> Result<FixedSizeListArray> {
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref())?;
-                builder.append(true)?
-            } else {
-                for _ in 0..builder.value_length() {
-                    builder.values().append_null()?;
-                }
-                builder.append(false)?
-            }
-        }
-        Ok(builder.finish())
-    }
-
-    #[test]
-    fn test_primitive_json_equal() {
-        // Test equaled array
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                1, null, 2, 3
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequaled array
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                1, 1, 2, 3
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                1, 1
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test not json array type case
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-               "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_list_json_equal() {
-        // Test equal case
-        let arrow_array = create_list_array(
-            &mut ListBuilder::new(Int32Builder::new(10)),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                null,
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array = create_list_array(
-            &mut ListBuilder::new(Int32Builder::new(10)),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                [7, 8],
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array = create_list_array(
-            &mut ListBuilder::new(Int32Builder::new(10)),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-               "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_fixed_size_list_json_equal() {
-        // Test equal case
-        let arrow_array = create_fixed_size_list_array(
-            &mut FixedSizeListBuilder::new(Int32Builder::new(10), 3),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                null,
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        println!("{:?}", arrow_array);
-        println!("{:?}", json_array);
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array = create_fixed_size_list_array(
-            &mut FixedSizeListBuilder::new(Int32Builder::new(10), 3),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                [7, 8, 9],
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array = create_fixed_size_list_array(
-            &mut FixedSizeListBuilder::new(Int32Builder::new(10), 3),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-               "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_string_json_equal() {
-        // Test the equal case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None, None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "world",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None, None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                1,
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_binary_json_equal() {
-        // Test the equal case
-        let mut builder = BinaryBuilder::new(6);
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"world").unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        let arrow_array = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "world",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None, None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                1,
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_fixed_size_binary_json_equal() {
-        // Test the equal case
-        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"world").unwrap();
-        let arrow_array: FixedSizeBinaryArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                "world"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"world").unwrap();
-        let arrow_array: FixedSizeBinaryArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                "arrow"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "world"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                1
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_decimal_json_equal() {
-        // Test the equal case
-        let mut builder = DecimalBuilder::new(30, 23, 6);
-        builder.append_value(1_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(-250).unwrap();
-        let arrow_array: DecimalArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "1000",
-                null,
-                "-250"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        builder.append_value(1_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(55).unwrap();
-        let arrow_array: DecimalArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "1000",
-                null,
-                "-250"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "1000",
-                null,
-                null,
-                "55"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                1
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_struct_json_equal() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let arrow_array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-              {
-                "f1": "joe",
-                "f2": 1
-              },
-              {
-                "f2": 2
-              },
-              null,
-              {
-                "f1": "mark",
-                "f2": 4
-              },
-              {
-                "f1": "doe",
-                "f2": 5
-              }
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal length case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-              {
-                "f1": "joe",
-                "f2": 1
-              },
-              {
-                "f2": 2
-              },
-              null,
-              {
-                "f1": "mark",
-                "f2": 4
-              }
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-              {
-                "f1": "joe",
-                "f2": 1
-              }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test not all object case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-              {
-                "f1": "joe",
-                "f2": 1
-              },
-              2,
-              null,
-              {
-                "f1": "mark",
-                "f2": 4
-              }
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_null_json_equal() {
-        // Test equaled array
-        let arrow_array = NullArray::new(4);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                null, null, null, null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequaled array
-        let arrow_array = NullArray::new(2);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                null, null, null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-}
diff --git a/rust/arrow/src/array/ffi.rs b/rust/arrow/src/array/ffi.rs
deleted file mode 100644
index 450685bf522..00000000000
--- a/rust/arrow/src/array/ffi.rs
+++ /dev/null
@@ -1,168 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains functionality to load an ArrayData from the C Data Interface
-
-use std::convert::TryFrom;
-
-use crate::{
-    error::{ArrowError, Result},
-    ffi,
-};
-
-use super::ArrayData;
-use crate::datatypes::DataType;
-use crate::ffi::ArrowArray;
-
-impl TryFrom<ffi::ArrowArray> for ArrayData {
-    type Error = ArrowError;
-
-    fn try_from(value: ffi::ArrowArray) -> Result<Self> {
-        let child_data = value.children()?;
-
-        let child_type = if !child_data.is_empty() {
-            Some(child_data[0].data_type().clone())
-        } else {
-            None
-        };
-
-        let data_type = value.data_type(child_type)?;
-
-        let len = value.len();
-        let offset = value.offset();
-        let null_count = value.null_count();
-        let buffers = value.buffers()?;
-        let null_bit_buffer = value.null_bit_buffer();
-
-        Ok(ArrayData::new(
-            data_type,
-            len,
-            Some(null_count),
-            null_bit_buffer,
-            offset,
-            buffers,
-            child_data,
-        ))
-    }
-}
-
-impl TryFrom<ArrayData> for ffi::ArrowArray {
-    type Error = ArrowError;
-
-    fn try_from(value: ArrayData) -> Result<Self> {
-        // If parent is nullable, then children also must be nullable
-        // so we pass this nullable to the creation of hte child data
-        let nullable = match value.data_type() {
-            DataType::List(field) => field.is_nullable(),
-            DataType::LargeList(field) => field.is_nullable(),
-            _ => false,
-        };
-
-        let len = value.len();
-        let offset = value.offset() as usize;
-        let null_count = value.null_count();
-        let buffers = value.buffers().to_vec();
-        let null_buffer = value.null_buffer().cloned();
-        let child_data = value
-            .child_data()
-            .iter()
-            .map(|arr| {
-                let len = arr.len();
-                let offset = arr.offset() as usize;
-                let null_count = arr.null_count();
-                let buffers = arr.buffers().to_vec();
-                let null_buffer = arr.null_buffer().cloned();
-
-                // Note: the nullable comes from the parent data.
-                unsafe {
-                    ArrowArray::try_new(
-                        arr.data_type(),
-                        len,
-                        null_count,
-                        null_buffer,
-                        offset,
-                        buffers,
-                        vec![],
-                        nullable,
-                    )
-                    .expect("infallible")
-                }
-            })
-            .collect::<Vec<_>>();
-
-        unsafe {
-            ffi::ArrowArray::try_new(
-                value.data_type(),
-                len,
-                null_count,
-                null_buffer,
-                offset,
-                buffers,
-                child_data,
-                nullable,
-            )
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::error::Result;
-    use crate::{
-        array::{Array, ArrayData, Int64Array, UInt32Array, UInt64Array},
-        ffi::ArrowArray,
-    };
-    use std::convert::TryFrom;
-
-    fn test_round_trip(expected: &ArrayData) -> Result<()> {
-        // create a `ArrowArray` from the data.
-        let d1 = ArrowArray::try_from(expected.clone())?;
-
-        // here we export the array as 2 pointers. We would have no control over ownership if it was not for
-        // the release mechanism.
-        let (array, schema) = ArrowArray::into_raw(d1);
-
-        // simulate an external consumer by being the consumer
-        let d1 = unsafe { ArrowArray::try_from_raw(array, schema) }?;
-
-        let result = &ArrayData::try_from(d1)?;
-
-        assert_eq!(result, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn test_u32() -> Result<()> {
-        let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
-        let data = array.data();
-        test_round_trip(data)
-    }
-
-    #[test]
-    fn test_u64() -> Result<()> {
-        let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
-        let data = array.data();
-        test_round_trip(data)
-    }
-
-    #[test]
-    fn test_i64() -> Result<()> {
-        let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
-        let data = array.data();
-        test_round_trip(data)
-    }
-}
diff --git a/rust/arrow/src/array/iterator.rs b/rust/arrow/src/array/iterator.rs
deleted file mode 100644
index cd891ba7b5d..00000000000
--- a/rust/arrow/src/array/iterator.rs
+++ /dev/null
@@ -1,374 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::datatypes::ArrowPrimitiveType;
-
-use super::{
-    Array, ArrayRef, BinaryOffsetSizeTrait, BooleanArray, GenericBinaryArray,
-    GenericListArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
-    StringOffsetSizeTrait,
-};
-
-/// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray
-// Note: This implementation is based on std's [Vec]s' [IntoIter].
-#[derive(Debug)]
-pub struct PrimitiveIter<'a, T: ArrowPrimitiveType> {
-    array: &'a PrimitiveArray<T>,
-    current: usize,
-    current_end: usize,
-}
-
-impl<'a, T: ArrowPrimitiveType> PrimitiveIter<'a, T> {
-    /// create a new iterator
-    pub fn new(array: &'a PrimitiveArray<T>) -> Self {
-        PrimitiveIter::<T> {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> std::iter::Iterator for PrimitiveIter<'a, T> {
-    type Item = Option<T::Native>;
-
-    #[inline]
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.current == self.current_end {
-            None
-        } else if self.array.is_null(self.current) {
-            self.current += 1;
-            Some(None)
-        } else {
-            let old = self.current;
-            self.current += 1;
-            Some(Some(self.array.value(old)))
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.array.len() - self.current,
-            Some(self.array.len() - self.current),
-        )
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> std::iter::DoubleEndedIterator for PrimitiveIter<'a, T> {
-    fn next_back(&mut self) -> Option<Self::Item> {
-        if self.current_end == self.current {
-            None
-        } else {
-            self.current_end -= 1;
-            Some(if self.array.is_null(self.current_end) {
-                None
-            } else {
-                Some(self.array.value(self.current_end))
-            })
-        }
-    }
-}
-
-/// all arrays have known size.
-impl<'a, T: ArrowPrimitiveType> std::iter::ExactSizeIterator for PrimitiveIter<'a, T> {}
-
-/// an iterator that returns Some(bool) or None.
-// Note: This implementation is based on std's [Vec]s' [IntoIter].
-#[derive(Debug)]
-pub struct BooleanIter<'a> {
-    array: &'a BooleanArray,
-    current: usize,
-    current_end: usize,
-}
-
-impl<'a> BooleanIter<'a> {
-    /// create a new iterator
-    pub fn new(array: &'a BooleanArray) -> Self {
-        BooleanIter {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-impl<'a> std::iter::Iterator for BooleanIter<'a> {
-    type Item = Option<bool>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.current == self.current_end {
-            None
-        } else if self.array.is_null(self.current) {
-            self.current += 1;
-            Some(None)
-        } else {
-            let old = self.current;
-            self.current += 1;
-            Some(Some(self.array.value(old)))
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.array.len() - self.current,
-            Some(self.array.len() - self.current),
-        )
-    }
-}
-
-impl<'a> std::iter::DoubleEndedIterator for BooleanIter<'a> {
-    fn next_back(&mut self) -> Option<Self::Item> {
-        if self.current_end == self.current {
-            None
-        } else {
-            self.current_end -= 1;
-            Some(if self.array.is_null(self.current_end) {
-                None
-            } else {
-                Some(self.array.value(self.current_end))
-            })
-        }
-    }
-}
-
-/// all arrays have known size.
-impl<'a> std::iter::ExactSizeIterator for BooleanIter<'a> {}
-
-/// an iterator that returns `Some(&str)` or `None`, for string arrays
-#[derive(Debug)]
-pub struct GenericStringIter<'a, T>
-where
-    T: StringOffsetSizeTrait,
-{
-    array: &'a GenericStringArray<T>,
-    i: usize,
-    len: usize,
-}
-
-impl<'a, T: StringOffsetSizeTrait> GenericStringIter<'a, T> {
-    /// create a new iterator
-    pub fn new(array: &'a GenericStringArray<T>) -> Self {
-        GenericStringIter::<T> {
-            array,
-            i: 0,
-            len: array.len(),
-        }
-    }
-}
-
-impl<'a, T: StringOffsetSizeTrait> std::iter::Iterator for GenericStringIter<'a, T> {
-    type Item = Option<&'a str>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let i = self.i;
-        if i >= self.len {
-            None
-        } else if self.array.is_null(i) {
-            self.i += 1;
-            Some(None)
-        } else {
-            self.i += 1;
-            Some(Some(self.array.value(i)))
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len - self.i, Some(self.len - self.i))
-    }
-}
-
-/// all arrays have known size.
-impl<'a, T: StringOffsetSizeTrait> std::iter::ExactSizeIterator
-    for GenericStringIter<'a, T>
-{
-}
-
-/// an iterator that returns `Some(&[u8])` or `None`, for binary arrays
-#[derive(Debug)]
-pub struct GenericBinaryIter<'a, T>
-where
-    T: BinaryOffsetSizeTrait,
-{
-    array: &'a GenericBinaryArray<T>,
-    i: usize,
-    len: usize,
-}
-
-impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryIter<'a, T> {
-    /// create a new iterator
-    pub fn new(array: &'a GenericBinaryArray<T>) -> Self {
-        GenericBinaryIter::<T> {
-            array,
-            i: 0,
-            len: array.len(),
-        }
-    }
-}
-
-impl<'a, T: BinaryOffsetSizeTrait> std::iter::Iterator for GenericBinaryIter<'a, T> {
-    type Item = Option<&'a [u8]>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let i = self.i;
-        if i >= self.len {
-            None
-        } else if self.array.is_null(i) {
-            self.i += 1;
-            Some(None)
-        } else {
-            self.i += 1;
-            Some(Some(self.array.value(i)))
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len - self.i, Some(self.len - self.i))
-    }
-}
-
-#[derive(Debug)]
-pub struct GenericListArrayIter<'a, S>
-where
-    S: OffsetSizeTrait,
-{
-    array: &'a GenericListArray<S>,
-    i: usize,
-    len: usize,
-}
-
-impl<'a, S: OffsetSizeTrait> GenericListArrayIter<'a, S> {
-    pub fn new(array: &'a GenericListArray<S>) -> Self {
-        GenericListArrayIter::<S> {
-            array,
-            i: 0,
-            len: array.len(),
-        }
-    }
-}
-
-impl<'a, S: OffsetSizeTrait> std::iter::Iterator for GenericListArrayIter<'a, S> {
-    type Item = Option<ArrayRef>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let i = self.i;
-        if i >= self.len {
-            None
-        } else if self.array.is_null(i) {
-            self.i += 1;
-            Some(None)
-        } else {
-            self.i += 1;
-            Some(Some(self.array.value(i)))
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len - self.i, Some(self.len - self.i))
-    }
-}
-
-/// all arrays have known size.
-impl<'a, T: BinaryOffsetSizeTrait> std::iter::ExactSizeIterator
-    for GenericBinaryIter<'a, T>
-{
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use crate::array::{ArrayRef, BinaryArray, BooleanArray, Int32Array, StringArray};
-
-    #[test]
-    fn test_primitive_array_iter_round_trip() {
-        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-        let array = Arc::new(array) as ArrayRef;
-
-        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        // to and from iter, with a +1
-        let result: Int32Array = array.iter().map(|e| e.map(|e| e + 1)).collect();
-
-        let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_double_ended() {
-        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-        let mut a = array.iter();
-        assert_eq!(a.next(), Some(Some(0)));
-        assert_eq!(a.next(), Some(None));
-        assert_eq!(a.next_back(), Some(Some(4)));
-        assert_eq!(a.next_back(), Some(None));
-        assert_eq!(a.next_back(), Some(Some(2)));
-        // the two sides have met: None is returned by both
-        assert_eq!(a.next_back(), None);
-        assert_eq!(a.next(), None);
-    }
-
-    #[test]
-    fn test_string_array_iter_round_trip() {
-        let array =
-            StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
-        let array = Arc::new(array) as ArrayRef;
-
-        let array = array.as_any().downcast_ref::<StringArray>().unwrap();
-
-        // to and from iter, with a +1
-        let result: StringArray = array
-            .iter()
-            .map(|e| {
-                e.map(|e| {
-                    let mut a = e.to_string();
-                    a.push('b');
-                    a
-                })
-            })
-            .collect();
-
-        let expected =
-            StringArray::from(vec![Some("ab"), None, Some("aaab"), None, Some("aaaaab")]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_binary_array_iter_round_trip() {
-        let array = BinaryArray::from(vec![
-            Some(b"a" as &[u8]),
-            None,
-            Some(b"aaa"),
-            None,
-            Some(b"aaaaa"),
-        ]);
-
-        // to and from iter
-        let result: BinaryArray = array.iter().collect();
-
-        assert_eq!(result, array);
-    }
-
-    #[test]
-    fn test_boolean_array_iter_round_trip() {
-        let array = BooleanArray::from(vec![Some(true), None, Some(false)]);
-
-        // to and from iter
-        let result: BooleanArray = array.iter().collect();
-
-        assert_eq!(result, array);
-    }
-}
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
deleted file mode 100644
index 65cf30832e2..00000000000
--- a/rust/arrow/src/array/mod.rs
+++ /dev/null
@@ -1,283 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! The central type in Apache Arrow are arrays, represented
-//! by the [`Array` trait](crate::array::Array).
-//! An array represents a known-length sequence of values all
-//! having the same type.
-//!
-//! Internally, those values are represented by one or several
-//! [buffers](crate::buffer::Buffer), the number and meaning
-//! of which depend on the array’s data type, as documented in
-//! [the Arrow data layout specification](https://arrow.apache.org/docs/format/Columnar.html).
-//! For example, the type `Int16Array` represents an Apache
-//! Arrow array of 16-bit integers.
-//!
-//! Those buffers consist of the value data itself and an
-//! optional [bitmap buffer](crate::bitmap::Bitmap) that
-//! indicates which array entries are null values.
-//! The bitmap buffer can be entirely omitted if the array is
-//! known to have zero null values.
-//!
-//! There are concrete implementations of this trait for each
-//! data type, that help you access individual values of the
-//! array.
-//!
-//! # Building an Array
-//!
-//! Arrow's `Arrays` are immutable, but there is the trait
-//! [`ArrayBuilder`](crate::array::ArrayBuilder)
-//! that helps you with constructing new `Arrays`. As with the
-//! `Array` trait, there are builder implementations for all
-//! concrete array types.
-//!
-//! # Example
-//! ```
-//! extern crate arrow;
-//!
-//! use arrow::array::Int16Array;
-//!
-//! // Create a new builder with a capacity of 100
-//! let mut builder = Int16Array::builder(100);
-//!
-//! // Append a single primitive value
-//! builder.append_value(1).unwrap();
-//!
-//! // Append a null value
-//! builder.append_null().unwrap();
-//!
-//! // Append a slice of primitive values
-//! builder.append_slice(&[2, 3, 4]).unwrap();
-//!
-//! // Build the array
-//! let array = builder.finish();
-//!
-//! assert_eq!(
-//!     5,
-//!     array.len(),
-//!     "The array has 5 values, counting the null value"
-//! );
-//!
-//! assert_eq!(2, array.value(2), "Get the value with index 2");
-//!
-//! assert_eq!(
-//!     &array.values()[3..5],
-//!     &[3, 4],
-//!     "Get slice of len 2 starting at idx 3"
-//! )
-//! ```
-
-#[allow(clippy::module_inception)]
-mod array;
-mod array_binary;
-mod array_boolean;
-mod array_dictionary;
-mod array_list;
-mod array_primitive;
-mod array_string;
-mod array_struct;
-mod array_union;
-mod builder;
-mod cast;
-mod data;
-mod equal;
-mod equal_json;
-mod ffi;
-mod iterator;
-mod null;
-mod ord;
-mod raw_pointer;
-mod transform;
-
-use crate::datatypes::*;
-
-// --------------------- Array & ArrayData ---------------------
-
-pub use self::array::Array;
-pub use self::array::ArrayRef;
-pub use self::data::ArrayData;
-pub use self::data::ArrayDataBuilder;
-pub use self::data::ArrayDataRef;
-
-pub use self::array_binary::BinaryArray;
-pub use self::array_binary::DecimalArray;
-pub use self::array_binary::FixedSizeBinaryArray;
-pub use self::array_binary::LargeBinaryArray;
-pub use self::array_boolean::BooleanArray;
-pub use self::array_dictionary::DictionaryArray;
-pub use self::array_list::FixedSizeListArray;
-pub use self::array_list::LargeListArray;
-pub use self::array_list::ListArray;
-pub use self::array_primitive::PrimitiveArray;
-pub use self::array_string::LargeStringArray;
-pub use self::array_string::StringArray;
-pub use self::array_struct::StructArray;
-pub use self::array_union::UnionArray;
-pub use self::null::NullArray;
-
-pub use self::array::make_array;
-pub use self::array::new_empty_array;
-pub use self::array::new_null_array;
-
-pub type Int8Array = PrimitiveArray<Int8Type>;
-pub type Int16Array = PrimitiveArray<Int16Type>;
-pub type Int32Array = PrimitiveArray<Int32Type>;
-pub type Int64Array = PrimitiveArray<Int64Type>;
-pub type UInt8Array = PrimitiveArray<UInt8Type>;
-pub type UInt16Array = PrimitiveArray<UInt16Type>;
-pub type UInt32Array = PrimitiveArray<UInt32Type>;
-pub type UInt64Array = PrimitiveArray<UInt64Type>;
-pub type Float32Array = PrimitiveArray<Float32Type>;
-pub type Float64Array = PrimitiveArray<Float64Type>;
-
-pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
-pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
-pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
-pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
-pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
-pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
-pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
-pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
-
-pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
-pub type Date32Array = PrimitiveArray<Date32Type>;
-pub type Date64Array = PrimitiveArray<Date64Type>;
-pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
-pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
-pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
-pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
-pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
-pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
-pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
-pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
-
-pub use self::array_binary::BinaryOffsetSizeTrait;
-pub use self::array_binary::GenericBinaryArray;
-pub use self::array_list::GenericListArray;
-pub use self::array_list::OffsetSizeTrait;
-pub use self::array_string::GenericStringArray;
-pub use self::array_string::StringOffsetSizeTrait;
-
-// --------------------- Array Builder ---------------------
-
-pub use self::builder::BooleanBufferBuilder;
-pub use self::builder::BufferBuilder;
-
-pub type Int8BufferBuilder = BufferBuilder<i8>;
-pub type Int16BufferBuilder = BufferBuilder<i16>;
-pub type Int32BufferBuilder = BufferBuilder<i32>;
-pub type Int64BufferBuilder = BufferBuilder<i64>;
-pub type UInt8BufferBuilder = BufferBuilder<u8>;
-pub type UInt16BufferBuilder = BufferBuilder<u16>;
-pub type UInt32BufferBuilder = BufferBuilder<u32>;
-pub type UInt64BufferBuilder = BufferBuilder<u64>;
-pub type Float32BufferBuilder = BufferBuilder<f32>;
-pub type Float64BufferBuilder = BufferBuilder<f64>;
-
-pub type TimestampSecondBufferBuilder = BufferBuilder<TimestampSecondType>;
-pub type TimestampMillisecondBufferBuilder = BufferBuilder<TimestampMillisecondType>;
-pub type TimestampMicrosecondBufferBuilder = BufferBuilder<TimestampMicrosecondType>;
-pub type TimestampNanosecondBufferBuilder = BufferBuilder<TimestampNanosecondType>;
-pub type Date32BufferBuilder = BufferBuilder<Date32Type>;
-pub type Date64BufferBuilder = BufferBuilder<Date64Type>;
-pub type Time32SecondBufferBuilder = BufferBuilder<Time32SecondType>;
-pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
-pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
-pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
-pub type IntervalYearMonthBufferBuilder = BufferBuilder<IntervalYearMonthType>;
-pub type IntervalDayTimeBufferBuilder = BufferBuilder<IntervalDayTimeType>;
-pub type DurationSecondBufferBuilder = BufferBuilder<DurationSecondType>;
-pub type DurationMillisecondBufferBuilder = BufferBuilder<DurationMillisecondType>;
-pub type DurationMicrosecondBufferBuilder = BufferBuilder<DurationMicrosecondType>;
-pub type DurationNanosecondBufferBuilder = BufferBuilder<DurationNanosecondType>;
-
-pub use self::builder::ArrayBuilder;
-pub use self::builder::BinaryBuilder;
-pub use self::builder::BooleanBuilder;
-pub use self::builder::DecimalBuilder;
-pub use self::builder::FixedSizeBinaryBuilder;
-pub use self::builder::FixedSizeListBuilder;
-pub use self::builder::GenericStringBuilder;
-pub use self::builder::LargeBinaryBuilder;
-pub use self::builder::LargeListBuilder;
-pub use self::builder::LargeStringBuilder;
-pub use self::builder::ListBuilder;
-pub use self::builder::PrimitiveBuilder;
-pub use self::builder::PrimitiveDictionaryBuilder;
-pub use self::builder::StringBuilder;
-pub use self::builder::StringDictionaryBuilder;
-pub use self::builder::StructBuilder;
-pub use self::builder::UnionBuilder;
-
-pub type Int8Builder = PrimitiveBuilder<Int8Type>;
-pub type Int16Builder = PrimitiveBuilder<Int16Type>;
-pub type Int32Builder = PrimitiveBuilder<Int32Type>;
-pub type Int64Builder = PrimitiveBuilder<Int64Type>;
-pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
-pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
-pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
-pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
-pub type Float32Builder = PrimitiveBuilder<Float32Type>;
-pub type Float64Builder = PrimitiveBuilder<Float64Type>;
-
-pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
-pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
-pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
-pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
-pub type Date32Builder = PrimitiveBuilder<Date32Type>;
-pub type Date64Builder = PrimitiveBuilder<Date64Type>;
-pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
-pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
-pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
-pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
-pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
-pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
-pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
-pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
-pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
-pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
-
-pub use self::transform::MutableArrayData;
-
-// --------------------- Array Iterator ---------------------
-
-pub use self::iterator::*;
-
-// --------------------- Array Equality ---------------------
-
-pub use self::equal_json::JsonEqual;
-
-// --------------------- Array's values comparison ---------------------
-
-pub use self::ord::{build_compare, DynComparator};
-
-// --------------------- Array downcast helper functions ---------------------
-
-pub use self::cast::{
-    as_boolean_array, as_dictionary_array, as_generic_list_array, as_large_list_array,
-    as_largestring_array, as_list_array, as_null_array, as_primitive_array,
-    as_string_array, as_struct_array,
-};
-
-// ------------------------------ C Data Interface ---------------------------
-
-pub use self::array::make_array_from_raw;
diff --git a/rust/arrow/src/array/null.rs b/rust/arrow/src/array/null.rs
deleted file mode 100644
index 8e95bb00ed1..00000000000
--- a/rust/arrow/src/array/null.rs
+++ /dev/null
@@ -1,155 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains the `NullArray` type.
-//!
-//! A `NullArray` is a simplified array where all values are null.
-//!
-//! # Example: Create an array
-//!
-//! ```
-//! use arrow::array::{Array, NullArray};
-//!
-//! # fn main() -> arrow::error::Result<()> {
-//! let array = NullArray::new(10);
-//!
-//! assert_eq!(array.len(), 10);
-//! assert_eq!(array.null_count(), 10);
-//!
-//! # Ok(())
-//! # }
-//! ```
-
-use std::any::Any;
-use std::fmt;
-use std::mem;
-
-use crate::array::{Array, ArrayData};
-use crate::datatypes::*;
-
-/// An Array where all elements are nulls
-pub struct NullArray {
-    data: ArrayData,
-}
-
-impl NullArray {
-    /// Create a new null array of the specified length
-    pub fn new(length: usize) -> Self {
-        let array_data = ArrayData::builder(DataType::Null).len(length).build();
-        NullArray::from(array_data)
-    }
-}
-
-impl Array for NullArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns whether the element at `index` is null.
-    /// All elements of a `NullArray` are always null.
-    fn is_null(&self, _index: usize) -> bool {
-        true
-    }
-
-    /// Returns whether the element at `index` is valid.
-    /// All elements of a `NullArray` are always invalid.
-    fn is_valid(&self, _index: usize) -> bool {
-        false
-    }
-
-    /// Returns the total number of null values in this array.
-    /// The null count of a `NullArray` always equals its length.
-    fn null_count(&self) -> usize {
-        self.data_ref().len()
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [NullArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [NullArray].
-    fn get_array_memory_size(&self) -> usize {
-        mem::size_of_val(self)
-    }
-}
-
-impl From<ArrayData> for NullArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.data_type(),
-            &DataType::Null,
-            "NullArray data type should be Null"
-        );
-        assert_eq!(
-            data.buffers().len(),
-            0,
-            "NullArray data should contain 0 buffers"
-        );
-        assert!(
-            data.null_buffer().is_none(),
-            "NullArray data should not contain a null buffer, as no buffers are required"
-        );
-        Self { data }
-    }
-}
-
-impl fmt::Debug for NullArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "NullArray({})", self.len())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_null_array() {
-        let null_arr = NullArray::new(32);
-
-        assert_eq!(null_arr.len(), 32);
-        assert_eq!(null_arr.null_count(), 32);
-        assert_eq!(null_arr.is_valid(0), false);
-
-        assert_eq!(0, null_arr.get_buffer_memory_size());
-        assert_eq!(
-            null_arr.get_buffer_memory_size() + std::mem::size_of::<NullArray>(),
-            null_arr.get_array_memory_size()
-        );
-    }
-
-    #[test]
-    fn test_null_array_slice() {
-        let array1 = NullArray::new(32);
-
-        let array2 = array1.slice(8, 16);
-        assert_eq!(array2.len(), 16);
-        assert_eq!(array2.null_count(), 16);
-        assert_eq!(array2.offset(), 8);
-    }
-
-    #[test]
-    fn test_debug_null_array() {
-        let array = NullArray::new(1024 * 1024);
-        assert_eq!(format!("{:?}", array), "NullArray(1048576)");
-    }
-}
diff --git a/rust/arrow/src/array/ord.rs b/rust/arrow/src/array/ord.rs
deleted file mode 100644
index efd68b12264..00000000000
--- a/rust/arrow/src/array/ord.rs
+++ /dev/null
@@ -1,310 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains functions and function factories to compare arrays.
-
-use std::cmp::Ordering;
-
-use crate::array::*;
-use crate::datatypes::TimeUnit;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-use num::Float;
-
-/// Compare the values at two arbitrary indices in two arrays.
-pub type DynComparator<'a> = Box<dyn Fn(usize, usize) -> Ordering + 'a>;
-
-/// compares two floats, placing NaNs at last
-fn cmp_nans_last<T: Float>(a: &T, b: &T) -> Ordering {
-    match (a.is_nan(), b.is_nan()) {
-        (true, true) => Ordering::Equal,
-        (true, false) => Ordering::Greater,
-        (false, true) => Ordering::Less,
-        _ => a.partial_cmp(b).unwrap(),
-    }
-}
-
-fn compare_primitives<'a, T: ArrowPrimitiveType>(
-    left: &'a Array,
-    right: &'a Array,
-) -> DynComparator<'a>
-where
-    T::Native: Ord,
-{
-    let left = left.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    let right = right.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
-}
-
-fn compare_boolean<'a>(left: &'a Array, right: &'a Array) -> DynComparator<'a> {
-    let left = left.as_any().downcast_ref::<BooleanArray>().unwrap();
-    let right = right.as_any().downcast_ref::<BooleanArray>().unwrap();
-    Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
-}
-
-fn compare_float<'a, T: ArrowPrimitiveType>(
-    left: &'a Array,
-    right: &'a Array,
-) -> DynComparator<'a>
-where
-    T::Native: Float,
-{
-    let left = left.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    let right = right.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    Box::new(move |i, j| cmp_nans_last(&left.value(i), &right.value(j)))
-}
-
-fn compare_string<'a, T>(left: &'a Array, right: &'a Array) -> DynComparator<'a>
-where
-    T: StringOffsetSizeTrait,
-{
-    let left = left
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .unwrap();
-    let right = right
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .unwrap();
-    Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
-}
-
-fn compare_dict_string<'a, T>(left: &'a Array, right: &'a Array) -> DynComparator<'a>
-where
-    T: ArrowDictionaryKeyType,
-{
-    let left = left.as_any().downcast_ref::<DictionaryArray<T>>().unwrap();
-    let right = right.as_any().downcast_ref::<DictionaryArray<T>>().unwrap();
-    let left_keys = left.keys_array();
-    let right_keys = right.keys_array();
-
-    let left_values = StringArray::from(left.values().data().clone());
-    let right_values = StringArray::from(left.values().data().clone());
-
-    Box::new(move |i: usize, j: usize| {
-        let key_left = left_keys.value(i).to_usize().unwrap();
-        let key_right = right_keys.value(j).to_usize().unwrap();
-        let left = left_values.value(key_left);
-        let right = right_values.value(key_right);
-        left.cmp(&right)
-    })
-}
-
-/// returns a comparison function that compares two values at two different positions
-/// between the two arrays.
-/// The arrays' types must be equal.
-/// # Example
-/// ```
-/// use arrow::array::{build_compare, Int32Array};
-///
-/// # fn main() -> arrow::error::Result<()> {
-/// let array1 = Int32Array::from(vec![1, 2]);
-/// let array2 = Int32Array::from(vec![3, 4]);
-///
-/// let cmp = build_compare(&array1, &array2)?;
-///
-/// // 1 (index 0 of array1) is smaller than 4 (index 1 of array2)
-/// assert_eq!(std::cmp::Ordering::Less, (cmp)(0, 1));
-/// # Ok(())
-/// # }
-/// ```
-// This is a factory of comparisons.
-// The lifetime 'a enforces that we cannot use the closure beyond any of the array's lifetime.
-pub fn build_compare<'a>(left: &'a Array, right: &'a Array) -> Result<DynComparator<'a>> {
-    use DataType::*;
-    use IntervalUnit::*;
-    use TimeUnit::*;
-    Ok(match (left.data_type(), right.data_type()) {
-        (a, b) if a != b => {
-            return Err(ArrowError::InvalidArgumentError(
-                "Can't compare arrays of different types".to_string(),
-            ));
-        }
-        (Boolean, Boolean) => compare_boolean(left, right),
-        (UInt8, UInt8) => compare_primitives::<UInt8Type>(left, right),
-        (UInt16, UInt16) => compare_primitives::<UInt16Type>(left, right),
-        (UInt32, UInt32) => compare_primitives::<UInt32Type>(left, right),
-        (UInt64, UInt64) => compare_primitives::<UInt64Type>(left, right),
-        (Int8, Int8) => compare_primitives::<Int8Type>(left, right),
-        (Int16, Int16) => compare_primitives::<Int16Type>(left, right),
-        (Int32, Int32) => compare_primitives::<Int32Type>(left, right),
-        (Int64, Int64) => compare_primitives::<Int64Type>(left, right),
-        (Float32, Float32) => compare_float::<Float32Type>(left, right),
-        (Float64, Float64) => compare_float::<Float64Type>(left, right),
-        (Date32, Date32) => compare_primitives::<Date32Type>(left, right),
-        (Date64, Date64) => compare_primitives::<Date64Type>(left, right),
-        (Time32(Second), Time32(Second)) => {
-            compare_primitives::<Time32SecondType>(left, right)
-        }
-        (Time32(Millisecond), Time32(Millisecond)) => {
-            compare_primitives::<Time32MillisecondType>(left, right)
-        }
-        (Time64(Microsecond), Time64(Microsecond)) => {
-            compare_primitives::<Time64MicrosecondType>(left, right)
-        }
-        (Time64(Nanosecond), Time64(Nanosecond)) => {
-            compare_primitives::<Time64NanosecondType>(left, right)
-        }
-        (Timestamp(Second, _), Timestamp(Second, _)) => {
-            compare_primitives::<TimestampSecondType>(left, right)
-        }
-        (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => {
-            compare_primitives::<TimestampMillisecondType>(left, right)
-        }
-        (Timestamp(Microsecond, _), Timestamp(Microsecond, _)) => {
-            compare_primitives::<TimestampMicrosecondType>(left, right)
-        }
-        (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => {
-            compare_primitives::<TimestampNanosecondType>(left, right)
-        }
-        (Interval(YearMonth), Interval(YearMonth)) => {
-            compare_primitives::<IntervalYearMonthType>(left, right)
-        }
-        (Interval(DayTime), Interval(DayTime)) => {
-            compare_primitives::<IntervalDayTimeType>(left, right)
-        }
-        (Duration(Second), Duration(Second)) => {
-            compare_primitives::<DurationSecondType>(left, right)
-        }
-        (Duration(Millisecond), Duration(Millisecond)) => {
-            compare_primitives::<DurationMillisecondType>(left, right)
-        }
-        (Duration(Microsecond), Duration(Microsecond)) => {
-            compare_primitives::<DurationMicrosecondType>(left, right)
-        }
-        (Duration(Nanosecond), Duration(Nanosecond)) => {
-            compare_primitives::<DurationNanosecondType>(left, right)
-        }
-        (Utf8, Utf8) => compare_string::<i32>(left, right),
-        (LargeUtf8, LargeUtf8) => compare_string::<i64>(left, right),
-        (
-            Dictionary(key_type_lhs, value_type_lhs),
-            Dictionary(key_type_rhs, value_type_rhs),
-        ) => {
-            if value_type_lhs.as_ref() != &DataType::Utf8
-                || value_type_rhs.as_ref() != &DataType::Utf8
-            {
-                return Err(ArrowError::InvalidArgumentError(
-                    "Arrow still does not support comparisons of non-string dictionary arrays"
-                        .to_string(),
-                ));
-            }
-            match (key_type_lhs.as_ref(), key_type_rhs.as_ref()) {
-                (a, b) if a != b => {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "Can't compare arrays of different types".to_string(),
-                    ));
-                }
-                (UInt8, UInt8) => compare_dict_string::<UInt8Type>(left, right),
-                (UInt16, UInt16) => compare_dict_string::<UInt16Type>(left, right),
-                (UInt32, UInt32) => compare_dict_string::<UInt32Type>(left, right),
-                (UInt64, UInt64) => compare_dict_string::<UInt64Type>(left, right),
-                (Int8, Int8) => compare_dict_string::<Int8Type>(left, right),
-                (Int16, Int16) => compare_dict_string::<Int16Type>(left, right),
-                (Int32, Int32) => compare_dict_string::<Int32Type>(left, right),
-                (Int64, Int64) => compare_dict_string::<Int64Type>(left, right),
-                (lhs, _) => {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "Dictionaries do not support keys of type {:?}",
-                        lhs
-                    )))
-                }
-            }
-        }
-        (lhs, _) => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "The data type type {:?} has no natural order",
-                lhs
-            )))
-        }
-    })
-}
-
-#[cfg(test)]
-pub mod tests {
-    use super::*;
-    use crate::array::{Float64Array, Int32Array};
-    use crate::error::Result;
-    use std::cmp::Ordering;
-    use std::iter::FromIterator;
-
-    #[test]
-    fn test_i32() -> Result<()> {
-        let array = Int32Array::from(vec![1, 2]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        Ok(())
-    }
-
-    #[test]
-    fn test_i32_i32() -> Result<()> {
-        let array1 = Int32Array::from(vec![1]);
-        let array2 = Int32Array::from(vec![2]);
-
-        let cmp = build_compare(&array1, &array2)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 0));
-        Ok(())
-    }
-
-    #[test]
-    fn test_f64() -> Result<()> {
-        let array = Float64Array::from(vec![1.0, 2.0]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        Ok(())
-    }
-
-    #[test]
-    fn test_f64_nan() -> Result<()> {
-        let array = Float64Array::from(vec![1.0, f64::NAN]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        Ok(())
-    }
-
-    #[test]
-    fn test_f64_zeros() -> Result<()> {
-        let array = Float64Array::from(vec![-0.0, 0.0]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Equal, (cmp)(0, 1));
-        assert_eq!(Ordering::Equal, (cmp)(1, 0));
-        Ok(())
-    }
-
-    #[test]
-    fn test_dict() -> Result<()> {
-        let data = vec!["a", "b", "c", "a", "a", "c", "c"];
-        let array = DictionaryArray::<Int16Type>::from_iter(data.into_iter());
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        assert_eq!(Ordering::Equal, (cmp)(3, 4));
-        assert_eq!(Ordering::Greater, (cmp)(2, 3));
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/array/raw_pointer.rs b/rust/arrow/src/array/raw_pointer.rs
deleted file mode 100644
index 185e1cbe98a..00000000000
--- a/rust/arrow/src/array/raw_pointer.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::ptr::NonNull;
-
-/// This struct is highly `unsafe` and offers the possibility to self-reference a [arrow::Buffer] from [arrow::array::ArrayData].
-/// as a pointer to the beginning of its contents.
-pub(super) struct RawPtrBox<T> {
-    ptr: NonNull<T>,
-}
-
-impl<T> RawPtrBox<T> {
-    /// # Safety
-    /// The user must guarantee that:
-    /// * the contents where `ptr` points to are never `moved`. This is guaranteed when they are Pinned.
-    /// * the lifetime of this struct does not outlive the lifetime of `ptr`.
-    /// Failure to fulfill any the above conditions results in undefined behavior.
-    /// # Panic
-    /// This function panics if:
-    /// * `ptr` is null
-    /// * `ptr` is not aligned to a slice of type `T`. This is guaranteed if it was built from a slice of type `T`.
-    pub(super) unsafe fn new(ptr: *const u8) -> Self {
-        let ptr = NonNull::new(ptr as *mut u8).expect("Pointer cannot be null");
-        assert_eq!(
-            ptr.as_ptr().align_offset(std::mem::align_of::<T>()),
-            0,
-            "memory is not aligned"
-        );
-        Self { ptr: ptr.cast() }
-    }
-
-    pub(super) fn as_ptr(&self) -> *const T {
-        self.ptr.as_ptr()
-    }
-}
-
-unsafe impl<T> Send for RawPtrBox<T> {}
-unsafe impl<T> Sync for RawPtrBox<T> {}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    #[should_panic(expected = "memory is not aligned")]
-    fn test_primitive_array_alignment() {
-        let bytes = vec![0u8, 1u8];
-        unsafe { RawPtrBox::<u64>::new(bytes.as_ptr().offset(1)) };
-    }
-}
diff --git a/rust/arrow/src/array/transform/boolean.rs b/rust/arrow/src/array/transform/boolean.rs
deleted file mode 100644
index 18291497173..00000000000
--- a/rust/arrow/src/array/transform/boolean.rs
+++ /dev/null
@@ -1,45 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-use super::{
-    Extend, _MutableArrayData,
-    utils::{resize_for_bits, set_bits},
-};
-
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
-    let values = array.buffers()[0].as_slice();
-    Box::new(
-        move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-            let buffer = &mut mutable.buffer1;
-            resize_for_bits(buffer, mutable.len + len);
-            set_bits(
-                &mut buffer.as_slice_mut(),
-                values,
-                mutable.len,
-                array.offset() + start,
-                len,
-            );
-        },
-    )
-}
-
-pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
-    let buffer = &mut mutable.buffer1;
-    resize_for_bits(buffer, mutable.len + len);
-}
diff --git a/rust/arrow/src/array/transform/fixed_binary.rs b/rust/arrow/src/array/transform/fixed_binary.rs
deleted file mode 100644
index 36952d46a4d..00000000000
--- a/rust/arrow/src/array/transform/fixed_binary.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{array::ArrayData, datatypes::DataType};
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
-    let size = match array.data_type() {
-        DataType::FixedSizeBinary(i) => *i as usize,
-        _ => unreachable!(),
-    };
-
-    let values = &array.buffers()[0].as_slice()[array.offset() * size..];
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without null issues
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let buffer = &mut mutable.buffer1;
-                buffer.extend_from_slice(&values[start * size..(start + len) * size]);
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                // nulls present: append item by item, ignoring null entries
-                let values_buffer = &mut mutable.buffer1;
-
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // append value
-                        let bytes = &values[i * size..(i + 1) * size];
-                        values_buffer.extend_from_slice(bytes);
-                    } else {
-                        values_buffer.extend_zeros(size);
-                    }
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
-    let size = match mutable.data_type {
-        DataType::FixedSizeBinary(i) => i as usize,
-        _ => unreachable!(),
-    };
-
-    let values_buffer = &mut mutable.buffer1;
-    values_buffer.extend_zeros(len * size);
-}
diff --git a/rust/arrow/src/array/transform/list.rs b/rust/arrow/src/array/transform/list.rs
deleted file mode 100644
index 8eb2bd1778d..00000000000
--- a/rust/arrow/src/array/transform/list.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{ArrayData, OffsetSizeTrait};
-
-use super::{
-    Extend, _MutableArrayData,
-    utils::{extend_offsets, get_last_offset},
-};
-
-pub(super) fn build_extend<T: OffsetSizeTrait>(array: &ArrayData) -> Extend {
-    let offsets = array.buffer::<T>(0);
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without nullability checks
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                // offsets
-                extend_offsets::<T>(
-                    offset_buffer,
-                    last_offset,
-                    &offsets[start..start + len + 1],
-                );
-
-                mutable.child_data[0].extend(
-                    index,
-                    offsets[start].to_usize().unwrap(),
-                    offsets[start + len].to_usize().unwrap(),
-                )
-            },
-        )
-    } else {
-        // nulls present: append item by item, ignoring null entries
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                let delta_len = array.len() - array.null_count();
-                offset_buffer.reserve(delta_len * std::mem::size_of::<T>());
-
-                let child = &mut mutable.child_data[0];
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // compute the new offset
-                        last_offset += offsets[i + 1] - offsets[i];
-
-                        // append value
-                        child.extend(
-                            index,
-                            offsets[i].to_usize().unwrap(),
-                            offsets[i + 1].to_usize().unwrap(),
-                        );
-                    }
-                    // append offset
-                    offset_buffer.push(last_offset);
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls<T: OffsetSizeTrait>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
-    let offset_buffer = &mut mutable.buffer1;
-
-    // this is safe due to how offset is built. See details on `get_last_offset`
-    let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-    (0..len).for_each(|_| offset_buffer.push(last_offset))
-}
diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs
deleted file mode 100644
index 82dfc1c4707..00000000000
--- a/rust/arrow/src/array/transform/mod.rs
+++ /dev/null
@@ -1,1223 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{buffer::MutableBuffer, datatypes::DataType, util::bit_util};
-
-use super::{
-    data::{into_buffers, new_buffers},
-    ArrayData,
-};
-
-mod boolean;
-mod fixed_binary;
-mod list;
-mod null;
-mod primitive;
-mod structure;
-mod utils;
-mod variable_size;
-
-type ExtendNullBits<'a> = Box<Fn(&mut _MutableArrayData, usize, usize) + 'a>;
-// function that extends `[start..start+len]` to the mutable array.
-// this is dynamic because different data_types influence how buffers and childs are extended.
-type Extend<'a> = Box<Fn(&mut _MutableArrayData, usize, usize, usize) + 'a>;
-
-type ExtendNulls = Box<Fn(&mut _MutableArrayData, usize) -> ()>;
-
-/// A mutable [ArrayData] that knows how to freeze itself into an [ArrayData].
-/// This is just a data container.
-#[derive(Debug)]
-struct _MutableArrayData<'a> {
-    pub data_type: DataType,
-    pub null_count: usize,
-
-    pub len: usize,
-    pub null_buffer: MutableBuffer,
-
-    // arrow specification only allows up to 3 buffers (2 ignoring the nulls above).
-    // Thus, we place them in the stack to avoid bound checks and greater data locality.
-    pub buffer1: MutableBuffer,
-    pub buffer2: MutableBuffer,
-    pub child_data: Vec<MutableArrayData<'a>>,
-}
-
-impl<'a> _MutableArrayData<'a> {
-    fn freeze(self, dictionary: Option<ArrayData>) -> ArrayData {
-        let buffers = into_buffers(&self.data_type, self.buffer1, self.buffer2);
-
-        let child_data = match self.data_type {
-            DataType::Dictionary(_, _) => vec![dictionary.unwrap()],
-            _ => {
-                let mut child_data = Vec::with_capacity(self.child_data.len());
-                for child in self.child_data {
-                    child_data.push(child.freeze());
-                }
-                child_data
-            }
-        };
-        ArrayData::new(
-            self.data_type,
-            self.len,
-            Some(self.null_count),
-            if self.null_count > 0 {
-                Some(self.null_buffer.into())
-            } else {
-                None
-            },
-            0,
-            buffers,
-            child_data,
-        )
-    }
-}
-
-fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits {
-    if let Some(bitmap) = array.null_bitmap() {
-        let bytes = bitmap.bits.as_slice();
-        Box::new(move |mutable, start, len| {
-            utils::resize_for_bits(&mut mutable.null_buffer, mutable.len + len);
-            mutable.null_count += utils::set_bits(
-                mutable.null_buffer.as_slice_mut(),
-                bytes,
-                mutable.len,
-                array.offset() + start,
-                len,
-            );
-        })
-    } else if use_nulls {
-        Box::new(|mutable, _, len| {
-            utils::resize_for_bits(&mut mutable.null_buffer, mutable.len + len);
-            let write_data = mutable.null_buffer.as_slice_mut();
-            let offset = mutable.len;
-            (0..len).for_each(|i| {
-                bit_util::set_bit(write_data, offset + i);
-            });
-        })
-    } else {
-        Box::new(|_, _, _| {})
-    }
-}
-
-/// Struct to efficiently and interactively create an [ArrayData] from an existing [ArrayData] by
-/// copying chunks.
-/// The main use case of this struct is to perform unary operations to arrays of arbitrary types, such as `filter` and `take`.
-/// # Example:
-///
-/// ```
-/// use arrow::{array::{Int32Array, Array, MutableArrayData}};
-///
-/// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-/// let array = array.data();
-/// // Create a new `MutableArrayData` from an array and with a capacity of 4.
-/// // Capacity here is equivalent to `Vec::with_capacity`
-/// let arrays = vec![array];
-/// let mut mutable = MutableArrayData::new(arrays, false, 4);
-/// mutable.extend(0, 1, 3); // extend from the slice [1..3], [2,3]
-/// mutable.extend(0, 0, 3); // extend from the slice [0..3], [1,2,3]
-/// // `.freeze()` to convert `MutableArrayData` into a `ArrayData`.
-/// let new_array = Int32Array::from(mutable.freeze());
-/// assert_eq!(Int32Array::from(vec![2, 3, 1, 2, 3]), new_array);
-/// ```
-pub struct MutableArrayData<'a> {
-    arrays: Vec<&'a ArrayData>,
-    // The attributes in [_MutableArrayData] cannot be in [MutableArrayData] due to
-    // mutability invariants (interior mutability):
-    // [MutableArrayData] contains a function that can only mutate [_MutableArrayData], not
-    // [MutableArrayData] itself
-    data: _MutableArrayData<'a>,
-
-    // the child data of the `Array` in Dictionary arrays.
-    // This is not stored in `MutableArrayData` because these values constant and only needed
-    // at the end, when freezing [_MutableArrayData].
-    dictionary: Option<ArrayData>,
-
-    // function used to extend values from arrays. This function's lifetime is bound to the array
-    // because it reads values from it.
-    extend_values: Vec<Extend<'a>>,
-    // function used to extend nulls from arrays. This function's lifetime is bound to the array
-    // because it reads nulls from it.
-    extend_null_bits: Vec<ExtendNullBits<'a>>,
-
-    // function used to extend nulls.
-    // this is independent of the arrays and therefore has no lifetime.
-    extend_nulls: ExtendNulls,
-}
-
-impl<'a> std::fmt::Debug for MutableArrayData<'a> {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        // ignores the closures.
-        f.debug_struct("MutableArrayData")
-            .field("data", &self.data)
-            .finish()
-    }
-}
-
-fn build_extend(array: &ArrayData) -> Extend {
-    use crate::datatypes::*;
-    match array.data_type() {
-        DataType::Null => null::build_extend(array),
-        DataType::Boolean => boolean::build_extend(array),
-        DataType::UInt8 => primitive::build_extend::<u8>(array),
-        DataType::UInt16 => primitive::build_extend::<u16>(array),
-        DataType::UInt32 => primitive::build_extend::<u32>(array),
-        DataType::UInt64 => primitive::build_extend::<u64>(array),
-        DataType::Int8 => primitive::build_extend::<i8>(array),
-        DataType::Int16 => primitive::build_extend::<i16>(array),
-        DataType::Int32 => primitive::build_extend::<i32>(array),
-        DataType::Int64 => primitive::build_extend::<i64>(array),
-        DataType::Float32 => primitive::build_extend::<f32>(array),
-        DataType::Float64 => primitive::build_extend::<f64>(array),
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => {
-            primitive::build_extend::<i32>(array)
-        }
-        DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_)
-        | DataType::Interval(IntervalUnit::DayTime) => {
-            primitive::build_extend::<i64>(array)
-        }
-        DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
-        DataType::LargeUtf8 | DataType::LargeBinary => {
-            variable_size::build_extend::<i64>(array)
-        }
-        DataType::List(_) => list::build_extend::<i32>(array),
-        DataType::LargeList(_) => list::build_extend::<i64>(array),
-        DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
-            DataType::UInt8 => primitive::build_extend::<u8>(array),
-            DataType::UInt16 => primitive::build_extend::<u16>(array),
-            DataType::UInt32 => primitive::build_extend::<u32>(array),
-            DataType::UInt64 => primitive::build_extend::<u64>(array),
-            DataType::Int8 => primitive::build_extend::<i8>(array),
-            DataType::Int16 => primitive::build_extend::<i16>(array),
-            DataType::Int32 => primitive::build_extend::<i32>(array),
-            DataType::Int64 => primitive::build_extend::<i64>(array),
-            _ => unreachable!(),
-        },
-        DataType::Struct(_) => structure::build_extend(array),
-        DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array),
-        DataType::Float16 => unreachable!(),
-        /*
-        DataType::FixedSizeList(_, _) => {}
-        DataType::Union(_) => {}
-        */
-        _ => todo!("Take and filter operations still not supported for this datatype"),
-    }
-}
-
-fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
-    use crate::datatypes::*;
-    Box::new(match data_type {
-        DataType::Null => null::extend_nulls,
-        DataType::Boolean => boolean::extend_nulls,
-        DataType::UInt8 => primitive::extend_nulls::<u8>,
-        DataType::UInt16 => primitive::extend_nulls::<u16>,
-        DataType::UInt32 => primitive::extend_nulls::<u32>,
-        DataType::UInt64 => primitive::extend_nulls::<u64>,
-        DataType::Int8 => primitive::extend_nulls::<i8>,
-        DataType::Int16 => primitive::extend_nulls::<i16>,
-        DataType::Int32 => primitive::extend_nulls::<i32>,
-        DataType::Int64 => primitive::extend_nulls::<i64>,
-        DataType::Float32 => primitive::extend_nulls::<f32>,
-        DataType::Float64 => primitive::extend_nulls::<f64>,
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => primitive::extend_nulls::<i32>,
-        DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_)
-        | DataType::Interval(IntervalUnit::DayTime) => primitive::extend_nulls::<i64>,
-        DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
-        DataType::LargeUtf8 | DataType::LargeBinary => variable_size::extend_nulls::<i64>,
-        DataType::List(_) => list::extend_nulls::<i32>,
-        DataType::LargeList(_) => list::extend_nulls::<i64>,
-        DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
-            DataType::UInt8 => primitive::extend_nulls::<u8>,
-            DataType::UInt16 => primitive::extend_nulls::<u16>,
-            DataType::UInt32 => primitive::extend_nulls::<u32>,
-            DataType::UInt64 => primitive::extend_nulls::<u64>,
-            DataType::Int8 => primitive::extend_nulls::<i8>,
-            DataType::Int16 => primitive::extend_nulls::<i16>,
-            DataType::Int32 => primitive::extend_nulls::<i32>,
-            DataType::Int64 => primitive::extend_nulls::<i64>,
-            _ => unreachable!(),
-        },
-        DataType::Struct(_) => structure::extend_nulls,
-        DataType::FixedSizeBinary(_) => fixed_binary::extend_nulls,
-        DataType::Float16 => unreachable!(),
-        /*
-        DataType::FixedSizeList(_, _) => {}
-        DataType::Union(_) => {}
-        */
-        _ => todo!("Take and filter operations still not supported for this datatype"),
-    })
-}
-
-impl<'a> MutableArrayData<'a> {
-    /// returns a new [MutableArrayData] with capacity to `capacity` slots and specialized to create an
-    /// [ArrayData] from multiple `arrays`.
-    ///
-    /// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls
-    /// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls].
-    /// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used.
-    pub fn new(arrays: Vec<&'a ArrayData>, mut use_nulls: bool, capacity: usize) -> Self {
-        let data_type = arrays[0].data_type();
-        use crate::datatypes::*;
-
-        // if any of the arrays has nulls, insertions from any array requires setting bits
-        // as there is at least one array with nulls.
-        if arrays.iter().any(|array| array.null_count() > 0) {
-            use_nulls = true;
-        };
-
-        let [buffer1, buffer2] = new_buffers(data_type, capacity);
-
-        let child_data = match &data_type {
-            DataType::Null
-            | DataType::Boolean
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Timestamp(_, _)
-            | DataType::Utf8
-            | DataType::Binary
-            | DataType::LargeUtf8
-            | DataType::LargeBinary
-            | DataType::Interval(_)
-            | DataType::FixedSizeBinary(_) => vec![],
-            DataType::List(_) | DataType::LargeList(_) => {
-                let childs = arrays
-                    .iter()
-                    .map(|array| &array.child_data()[0])
-                    .collect::<Vec<_>>();
-                vec![MutableArrayData::new(childs, use_nulls, capacity)]
-            }
-            // the dictionary type just appends keys and clones the values.
-            DataType::Dictionary(_, _) => vec![],
-            DataType::Float16 => unreachable!(),
-            DataType::Struct(fields) => (0..fields.len())
-                .map(|i| {
-                    let child_arrays = arrays
-                        .iter()
-                        .map(|array| &array.child_data()[i])
-                        .collect::<Vec<_>>();
-                    MutableArrayData::new(child_arrays, use_nulls, capacity)
-                })
-                .collect::<Vec<_>>(),
-            _ => {
-                todo!("Take and filter operations still not supported for this datatype")
-            }
-        };
-
-        let dictionary = match &data_type {
-            DataType::Dictionary(_, _) => Some(arrays[0].child_data()[0].clone()),
-            _ => None,
-        };
-
-        let extend_nulls = build_extend_nulls(data_type);
-
-        let extend_null_bits = arrays
-            .iter()
-            .map(|array| build_extend_null_bits(array, use_nulls))
-            .collect();
-
-        let null_bytes = bit_util::ceil(capacity, 8);
-        let null_buffer = MutableBuffer::from_len_zeroed(null_bytes);
-
-        let extend_values = arrays.iter().map(|array| build_extend(array)).collect();
-
-        let data = _MutableArrayData {
-            data_type: data_type.clone(),
-            len: 0,
-            null_count: 0,
-            null_buffer,
-            buffer1,
-            buffer2,
-            child_data,
-        };
-        Self {
-            arrays,
-            data,
-            dictionary,
-            extend_values,
-            extend_null_bits,
-            extend_nulls,
-        }
-    }
-
-    /// Extends this [MutableArrayData] with elements from the bounded [ArrayData] at `start`
-    /// and for a size of `len`.
-    /// # Panic
-    /// This function panics if the range is out of bounds, i.e. if `start + len >= array.len()`.
-    pub fn extend(&mut self, index: usize, start: usize, end: usize) {
-        let len = end - start;
-        (self.extend_null_bits[index])(&mut self.data, start, len);
-        (self.extend_values[index])(&mut self.data, index, start, len);
-        self.data.len += len;
-    }
-
-    /// Extends this [MutableArrayData] with null elements, disregarding the bound arrays
-    pub fn extend_nulls(&mut self, len: usize) {
-        self.data.null_count += len;
-        (self.extend_nulls)(&mut self.data, len);
-        self.data.len += len;
-    }
-
-    /// Creates a [ArrayData] from the pushed regions up to this point, consuming `self`.
-    pub fn freeze(self) -> ArrayData {
-        self.data.freeze(self.dictionary)
-    }
-}
-
-/*
-#[cfg(test)]
-mod tests {
-    use std::{convert::TryFrom, sync::Arc};
-
-    use super::*;
-
-    use crate::{
-        array::{
-            Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray,
-            FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Array,
-            Int64Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
-            StringDictionaryBuilder, StructArray, UInt8Array,
-        },
-        buffer::Buffer,
-        datatypes::Field,
-    };
-    use crate::{
-        array::{ListArray, StringBuilder},
-        error::Result,
-    };
-
-    /// tests extending from a primitive array w/ offset nor nulls
-    #[test]
-    fn test_primitive() {
-        let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let b = b.data();
-        let arrays = vec![b];
-        let mut a = MutableArrayData::new(arrays, false, 3);
-        a.extend(0, 0, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected = UInt8Array::from(vec![Some(1), Some(2)]);
-        assert_eq!(array, expected);
-    }
-
-    /// tests extending from a primitive array with offset w/ nulls
-    #[test]
-    fn test_primitive_offset() {
-        let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let b = b.slice(1, 2);
-        let b = b.data();
-        let arrays = vec![b];
-        let mut a = MutableArrayData::new(arrays, false, 2);
-        a.extend(0, 0, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected = UInt8Array::from(vec![Some(2), Some(3)]);
-        assert_eq!(array, expected);
-    }
-
-    /// tests extending from a primitive array with offset and nulls
-    #[test]
-    fn test_primitive_null_offset() {
-        let b = UInt8Array::from(vec![Some(1), None, Some(3)]);
-        let b = b.slice(1, 2).data();
-        let arrays = vec![b];
-        let mut a = MutableArrayData::new(arrays, false, 2);
-        a.extend(0, 0, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected = UInt8Array::from(vec![None, Some(3)]);
-        assert_eq!(array, expected);
-    }
-
-    #[test]
-    fn test_primitive_null_offset_nulls() {
-        let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let b = b.slice(1, 2).data();
-        let arrays = vec![b];
-        let mut a = MutableArrayData::new(arrays, true, 2);
-        a.extend(0, 0, 2);
-        a.extend_nulls(3);
-        a.extend(0, 1, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected =
-            UInt8Array::from(vec![Some(2), Some(3), None, None, None, Some(3)]);
-        assert_eq!(array, expected);
-    }
-
-    #[test]
-    fn test_list_null_offset() -> Result<()> {
-        let int_builder = Int64Builder::new(24);
-        let mut builder = ListBuilder::<Int64Builder>::new(int_builder);
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[4, 5])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[6, 7, 8])?;
-        builder.append(true)?;
-        let array = builder.finish().data();
-        let arrays = vec![array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-        mutable.extend(0, 0, 1);
-
-        let result = mutable.freeze();
-        let array = ListArray::from(result);
-
-        let int_builder = Int64Builder::new(24);
-        let mut builder = ListBuilder::<Int64Builder>::new(int_builder);
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        let expected = builder.finish();
-
-        assert_eq!(array, expected);
-
-        Ok(())
-    }
-
-    /// tests extending from a variable-sized (strings and binary) array w/ offset with nulls
-    #[test]
-    fn test_variable_sized_nulls() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
-        let arrays = vec![array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![Some("bc"), None]);
-        assert_eq!(result, expected);
-    }
-
-    /// tests extending from a variable-sized (strings and binary) array
-    /// with an offset and nulls
-    #[test]
-    fn test_variable_sized_offsets() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
-        let array = array.slice(1, 3);
-
-        let arrays = vec![&array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 0, 3);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_string_offsets() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
-        let array = array.slice(1, 3);
-
-        let arrays = vec![&array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 0, 3);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_multiple_with_nulls() {
-        let array1 = StringArray::from(vec!["hello", "world"]).data();
-        let array2 = StringArray::from(vec![Some("1"), None]).data();
-
-        let arrays = vec![array1, array2];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 5);
-
-        mutable.extend(0, 0, 2);
-        mutable.extend(1, 0, 2);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected =
-            StringArray::from(vec![Some("hello"), Some("world"), Some("1"), None]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_string_null_offset_nulls() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
-        let array = array.slice(1, 3);
-
-        let arrays = vec![&array];
-
-        let mut mutable = MutableArrayData::new(arrays, true, 0);
-
-        mutable.extend(0, 1, 3);
-        mutable.extend_nulls(1);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![None, Some("defh"), None]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_bool() {
-        let array =
-            BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]).data();
-        let arrays = vec![array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-
-        let result = mutable.freeze();
-        let result = BooleanArray::from(result);
-
-        let expected = BooleanArray::from(vec![Some(true), None]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_null() {
-        let array1 = NullArray::new(10).data();
-        let array2 = NullArray::new(5).data();
-        let arrays = vec![array1, array2];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        mutable.extend(1, 0, 1);
-
-        let result = mutable.freeze();
-        let result = NullArray::from(result);
-
-        let expected = NullArray::new(3);
-        assert_eq!(result, expected);
-    }
-
-    fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData {
-        let values = StringArray::from(values.to_vec());
-        let mut builder = StringDictionaryBuilder::new_with_dictionary(
-            PrimitiveBuilder::<Int16Type>::new(3),
-            &values,
-        )
-        .unwrap();
-        for key in keys {
-            if let Some(v) = key {
-                builder.append(v).unwrap();
-            } else {
-                builder.append_null().unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_dictionary() {
-        // (a, b, c), (0, 1, 0, 2) => (a, b, a, c)
-        let array = create_dictionary_array(
-            &["a", "b", "c"],
-            &[Some("a"), Some("b"), None, Some("c")],
-        );
-        let arrays = vec![&array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-
-        let result = mutable.freeze();
-        let result = DictionaryArray::from(result);
-
-        let expected = Int16Array::from(vec![Some(1), None]);
-        assert_eq!(result.keys(), &expected);
-    }
-
-    #[test]
-    fn test_struct() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .data();
-        let arrays = vec![array];
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        let data = mutable.freeze();
-        let array = StructArray::from(data);
-
-        let expected = StructArray::try_from(vec![
-            ("f1", strings.slice(1, 2)),
-            ("f2", ints.slice(1, 2)),
-        ])
-        .unwrap();
-        assert_eq!(array, expected)
-    }
-
-    #[test]
-    fn test_struct_offset() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .slice(1, 3)
-                .data();
-        let arrays = vec![array.as_ref()];
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        let data = mutable.freeze();
-        let array = StructArray::from(Arc::new(data));
-
-        let expected_strings: ArrayRef =
-            Arc::new(StringArray::from(vec![None, Some("mark")]));
-        let expected = StructArray::try_from(vec![
-            ("f1", expected_strings),
-            ("f2", ints.slice(2, 2)),
-        ])
-        .unwrap();
-
-        assert_eq!(array, expected);
-    }
-
-    #[test]
-    fn test_struct_nulls() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .data();
-        let arrays = vec![array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        let data = mutable.freeze();
-        let array = StructArray::from(data);
-
-        let expected_string = Arc::new(StringArray::from(vec![None, None])) as ArrayRef;
-        let expected_int = Arc::new(Int32Array::from(vec![Some(2), None])) as ArrayRef;
-
-        let expected =
-            StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
-                .unwrap();
-        assert_eq!(array, expected)
-    }
-
-    #[test]
-    fn test_struct_many() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .data();
-        let arrays = vec![array, array];
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        mutable.extend(1, 0, 2);
-        let data = mutable.freeze();
-        let array = StructArray::from(data);
-
-        let expected_string =
-            Arc::new(StringArray::from(vec![None, None, Some("joe"), None])) as ArrayRef;
-        let expected_int =
-            Arc::new(Int32Array::from(vec![Some(2), None, Some(1), Some(2)])) as ArrayRef;
-
-        let expected =
-            StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
-                .unwrap();
-        assert_eq!(array, expected)
-    }
-
-    #[test]
-    fn test_binary_fixed_sized_offsets() {
-        let array = FixedSizeBinaryArray::try_from_iter(
-            vec![vec![0, 0], vec![0, 1], vec![0, 2]].into_iter(),
-        )
-        .expect("Failed to create FixedSizeBinaryArray from iterable")
-        .data();
-        let array = array.slice(1, 2);
-        // = [[0, 1], [0, 2]] due to the offset = 1
-
-        let arrays = vec![&array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 2);
-        mutable.extend(0, 0, 1);
-
-        let result = mutable.freeze();
-        let result = FixedSizeBinaryArray::from(result);
-
-        let expected =
-            FixedSizeBinaryArray::try_from_iter(vec![vec![0, 2], vec![0, 1]].into_iter())
-                .expect("Failed to create FixedSizeBinaryArray from iterable");
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_list_append() -> Result<()> {
-        let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(24));
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[4, 5])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[6, 7, 8])?;
-        builder.values().append_slice(&[9, 10, 11])?;
-        builder.append(true)?;
-        let a = builder.finish().data();
-
-        let a_builder = Int64Builder::new(24);
-        let mut a_builder = ListBuilder::<Int64Builder>::new(a_builder);
-        a_builder.values().append_slice(&[12, 13])?;
-        a_builder.append(true)?;
-        a_builder.append(true)?;
-        a_builder.values().append_slice(&[14, 15])?;
-        a_builder.append(true)?;
-        let b = a_builder.finish();
-
-        let b = b.data();
-        let c = b.slice(1, 2);
-
-        let mut mutable = MutableArrayData::new(vec![a, b, &c], false, 1);
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(2, 0, c.len());
-
-        let finished = mutable.freeze();
-
-        let expected_int_array = Int64Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            Some(7),
-            Some(8),
-            Some(9),
-            Some(10),
-            Some(11),
-            // append first array
-            Some(12),
-            Some(13),
-            Some(14),
-            Some(15),
-            // append second array
-            Some(14),
-            Some(15),
-        ]);
-        let list_value_offsets =
-            Buffer::from_slice_ref(&[0i32, 3, 5, 11, 13, 13, 15, 15, 17]);
-        let expected_list_data = ArrayData::new(
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
-            8,
-            None,
-            None,
-            0,
-            vec![list_value_offsets],
-            vec![expected_int_array.data().clone()],
-        );
-        assert_eq!(finished, expected_list_data);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_list_nulls_append() -> Result<()> {
-        let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(32));
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[4, 5])?;
-        builder.append(true)?;
-        builder.append(false)?;
-        builder.values().append_slice(&[6, 7, 8])?;
-        builder.values().append_null()?;
-        builder.values().append_null()?;
-        builder.values().append_slice(&[9, 10, 11])?;
-        builder.append(true)?;
-        let a = builder.finish();
-        let a = a.data();
-
-        let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(32));
-        builder.values().append_slice(&[12, 13])?;
-        builder.append(true)?;
-        builder.append(false)?;
-        builder.append(true)?;
-        builder.values().append_null()?;
-        builder.values().append_null()?;
-        builder.values().append_slice(&[14, 15])?;
-        builder.append(true)?;
-        let b = builder.finish();
-        let b = b.data();
-        let c = b.slice(1, 2);
-        let d = b.slice(2, 2);
-
-        let mut mutable = MutableArrayData::new(vec![a, b, &c, &d], false, 10);
-
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(2, 0, c.len());
-        mutable.extend(3, 0, d.len());
-        let result = mutable.freeze();
-
-        let expected_int_array = Int64Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            Some(7),
-            Some(8),
-            None,
-            None,
-            Some(9),
-            Some(10),
-            Some(11),
-            // second array
-            Some(12),
-            Some(13),
-            None,
-            None,
-            Some(14),
-            Some(15),
-            // slice(1, 2) results in no values added
-            None,
-            None,
-            Some(14),
-            Some(15),
-        ]);
-        let list_value_offsets =
-            Buffer::from_slice_ref(&[0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]);
-        let expected_list_data = ArrayData::new(
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
-            12,
-            None,
-            Some(Buffer::from(&[0b11011011, 0b1110])),
-            0,
-            vec![list_value_offsets],
-            vec![expected_int_array.data().clone()],
-        );
-        assert_eq!(result, expected_list_data);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_list_of_strings_append() -> Result<()> {
-        // [["alpha", "beta", None]]
-        let mut builder = ListBuilder::new(StringBuilder::new(32));
-        builder.values().append_value("Hello")?;
-        builder.values().append_value("Arrow")?;
-        builder.values().append_null()?;
-        builder.append(true)?;
-        let a = builder.finish().data();
-
-        // [["alpha", "beta"], [None], ["gamma", "delta", None]]
-        let mut builder = ListBuilder::new(StringBuilder::new(32));
-        builder.values().append_value("alpha")?;
-        builder.values().append_value("beta")?;
-        builder.append(true)?;
-        builder.values().append_null()?;
-        builder.append(true)?;
-        builder.values().append_value("gamma")?;
-        builder.values().append_value("delta")?;
-        builder.values().append_null()?;
-        builder.append(true)?;
-        let b = builder.finish().data();
-
-        let mut mutable = MutableArrayData::new(vec![a, b], false, 10);
-
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(1, 1, 3);
-        mutable.extend(1, 0, 0);
-        let result = mutable.freeze();
-
-        let expected_string_array = StringArray::from(vec![
-            // extend a[0..a.len()]
-            // a[0]
-            Some("Hello"),
-            Some("Arrow"),
-            None,
-            // extend b[0..b.len()]
-            // b[0]
-            Some("alpha"),
-            Some("beta"),
-            // b[1]
-            None,
-            // b[2]
-            Some("gamma"),
-            Some("delta"),
-            None,
-            // extend b[1..3]
-            // b[1]
-            None,
-            // b[2]
-            Some("gamma"),
-            Some("delta"),
-            None,
-            // extend b[0..0]
-        ]);
-        let list_value_offsets = Buffer::from_slice_ref(&[0, 3, 5, 6, 9, 10, 13]);
-        let expected_list_data = ArrayData::new(
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-            6,
-            None,
-            None,
-            0,
-            vec![list_value_offsets],
-            vec![expected_string_array.data().clone()],
-        );
-        assert_eq!(result, expected_list_data);
-        Ok(())
-    }
-
-    #[test]
-    fn test_fixed_size_binary_append() {
-        let a = vec![Some(vec![1, 2]), Some(vec![3, 4]), Some(vec![5, 6])];
-        let a = FixedSizeBinaryArray::try_from_sparse_iter(a.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable")
-            .data();
-
-        let b = vec![
-            None,
-            Some(vec![7, 8]),
-            Some(vec![9, 10]),
-            None,
-            Some(vec![13, 14]),
-            None,
-        ];
-        let b = FixedSizeBinaryArray::try_from_sparse_iter(b.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable")
-            .data();
-
-        let mut mutable = MutableArrayData::new(vec![a, b], false, 10);
-
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(1, 1, 4);
-        mutable.extend(1, 2, 3);
-        mutable.extend(1, 5, 5);
-        let result = mutable.freeze();
-
-        let expected = vec![
-            // a
-            Some(vec![1, 2]),
-            Some(vec![3, 4]),
-            Some(vec![5, 6]),
-            // b
-            None,
-            Some(vec![7, 8]),
-            Some(vec![9, 10]),
-            None,
-            Some(vec![13, 14]),
-            None,
-            // b[1..4]
-            Some(vec![7, 8]),
-            Some(vec![9, 10]),
-            None,
-            // b[2..3]
-            Some(vec![9, 10]),
-            // b[4..4]
-        ];
-        let expected = FixedSizeBinaryArray::try_from_sparse_iter(expected.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable")
-            .data();
-        assert_eq!(&result, expected.as_ref());
-    }
-
-    /*
-    // this is an old test used on a meanwhile removed dead code
-    // that is still useful when `MutableArrayData` supports fixed-size lists.
-    #[test]
-    fn test_fixed_size_list_append() -> Result<()> {
-        let int_builder = UInt16Builder::new(64);
-        let mut builder = FixedSizeListBuilder::<UInt16Builder>::new(int_builder, 2);
-        builder.values().append_slice(&[1, 2])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[3, 4])?;
-        builder.append(false)?;
-        builder.values().append_slice(&[5, 6])?;
-        builder.append(true)?;
-
-        let a_builder = UInt16Builder::new(64);
-        let mut a_builder = FixedSizeListBuilder::<UInt16Builder>::new(a_builder, 2);
-        a_builder.values().append_slice(&[7, 8])?;
-        a_builder.append(true)?;
-        a_builder.values().append_slice(&[9, 10])?;
-        a_builder.append(true)?;
-        a_builder.values().append_slice(&[11, 12])?;
-        a_builder.append(false)?;
-        a_builder.values().append_slice(&[13, 14])?;
-        a_builder.append(true)?;
-        a_builder.values().append_null()?;
-        a_builder.values().append_null()?;
-        a_builder.append(true)?;
-        let a = a_builder.finish();
-
-        // append array
-        builder.append_data(&[
-            a.data(),
-            a.slice(1, 3).data(),
-            a.slice(2, 1).data(),
-            a.slice(5, 0).data(),
-        ])?;
-        let finished = builder.finish();
-
-        let expected_int_array = UInt16Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            // append first array
-            Some(7),
-            Some(8),
-            Some(9),
-            Some(10),
-            Some(11),
-            Some(12),
-            Some(13),
-            Some(14),
-            None,
-            None,
-            // append slice(1, 3)
-            Some(9),
-            Some(10),
-            Some(11),
-            Some(12),
-            Some(13),
-            Some(14),
-            // append slice(2, 1)
-            Some(11),
-            Some(12),
-        ]);
-        let expected_list_data = ArrayData::new(
-            DataType::FixedSizeList(
-                Box::new(Field::new("item", DataType::UInt16, true)),
-                2,
-            ),
-            12,
-            None,
-            None,
-            0,
-            vec![],
-            vec![expected_int_array.data()],
-        );
-        let expected_list =
-            FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayData);
-        assert_eq!(&expected_list.values(), &finished.values());
-        assert_eq!(expected_list.len(), finished.len());
-
-        Ok(())
-    }
-    */
-}
- */
diff --git a/rust/arrow/src/array/transform/null.rs b/rust/arrow/src/array/transform/null.rs
deleted file mode 100644
index e1335e17971..00000000000
--- a/rust/arrow/src/array/transform/null.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend(_: &ArrayData) -> Extend {
-    Box::new(move |_, _, _, _| {})
-}
-
-pub(super) fn extend_nulls(_: &mut _MutableArrayData, _: usize) {}
diff --git a/rust/arrow/src/array/transform/primitive.rs b/rust/arrow/src/array/transform/primitive.rs
deleted file mode 100644
index 032bb4a8779..00000000000
--- a/rust/arrow/src/array/transform/primitive.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::mem::size_of;
-
-use crate::{array::ArrayData, datatypes::ArrowNativeType};
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend<T: ArrowNativeType>(array: &ArrayData) -> Extend {
-    let values = array.buffer::<T>(0);
-    Box::new(
-        move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-            mutable
-                .buffer1
-                .extend_from_slice(&values[start..start + len]);
-        },
-    )
-}
-
-pub(super) fn extend_nulls<T: ArrowNativeType>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
-    mutable.buffer1.extend_zeros(len * size_of::<T>());
-}
diff --git a/rust/arrow/src/array/transform/structure.rs b/rust/arrow/src/array/transform/structure.rs
deleted file mode 100644
index c019f5ac6a9..00000000000
--- a/rust/arrow/src/array/transform/structure.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
-    if array.null_count() == 0 {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                mutable.child_data.iter_mut().for_each(|child| {
-                    child.extend(
-                        index,
-                        array.offset() + start,
-                        array.offset() + start + len,
-                    )
-                })
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                (array.offset() + start..array.offset() + start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        mutable
-                            .child_data
-                            .iter_mut()
-                            .for_each(|child| child.extend(index, i, i + 1))
-                    } else {
-                        mutable
-                            .child_data
-                            .iter_mut()
-                            .for_each(|child| child.extend_nulls(1))
-                    }
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
-    mutable
-        .child_data
-        .iter_mut()
-        .for_each(|child| child.extend_nulls(len))
-}
diff --git a/rust/arrow/src/array/transform/utils.rs b/rust/arrow/src/array/transform/utils.rs
deleted file mode 100644
index 8c718c70c17..00000000000
--- a/rust/arrow/src/array/transform/utils.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{array::OffsetSizeTrait, buffer::MutableBuffer, util::bit_util};
-
-/// extends the `buffer` to be able to hold `len` bits, setting all bits of the new size to zero.
-#[inline]
-pub(super) fn resize_for_bits(buffer: &mut MutableBuffer, len: usize) {
-    let needed_bytes = bit_util::ceil(len, 8);
-    if buffer.len() < needed_bytes {
-        buffer.resize(needed_bytes, 0);
-    }
-}
-
-/// sets all bits on `write_data` on the range `[offset_write..offset_write+len]` to be equal to the
-/// bits on `data` on the range `[offset_read..offset_read+len]`
-pub(super) fn set_bits(
-    write_data: &mut [u8],
-    data: &[u8],
-    offset_write: usize,
-    offset_read: usize,
-    len: usize,
-) -> usize {
-    let mut count = 0;
-    (0..len).for_each(|i| {
-        if bit_util::get_bit(data, offset_read + i) {
-            bit_util::set_bit(write_data, offset_write + i);
-        } else {
-            count += 1;
-        }
-    });
-    count
-}
-
-pub(super) fn extend_offsets<T: OffsetSizeTrait>(
-    buffer: &mut MutableBuffer,
-    mut last_offset: T,
-    offsets: &[T],
-) {
-    buffer.reserve(offsets.len() * std::mem::size_of::<T>());
-    offsets.windows(2).for_each(|offsets| {
-        // compute the new offset
-        let length = offsets[1] - offsets[0];
-        last_offset += length;
-        buffer.push(last_offset);
-    });
-}
-
-#[inline]
-pub(super) unsafe fn get_last_offset<T: OffsetSizeTrait>(
-    offset_buffer: &MutableBuffer,
-) -> T {
-    // JUSTIFICATION
-    //  Benefit
-    //      20% performance improvement extend of variable sized arrays (see bench `mutable_array`)
-    //  Soundness
-    //      * offset buffer is always extended in slices of T and aligned accordingly.
-    //      * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid.
-    let (prefix, offsets, suffix) = offset_buffer.as_slice().align_to::<T>();
-    debug_assert!(prefix.is_empty() && suffix.is_empty());
-    *offsets.get_unchecked(offsets.len() - 1)
-}
diff --git a/rust/arrow/src/array/transform/variable_size.rs b/rust/arrow/src/array/transform/variable_size.rs
deleted file mode 100644
index c9304dbca20..00000000000
--- a/rust/arrow/src/array/transform/variable_size.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{
-    array::{ArrayData, OffsetSizeTrait},
-    buffer::MutableBuffer,
-};
-
-use super::{
-    Extend, _MutableArrayData,
-    utils::{extend_offsets, get_last_offset},
-};
-
-#[inline]
-fn extend_offset_values<T: OffsetSizeTrait>(
-    buffer: &mut MutableBuffer,
-    offsets: &[T],
-    values: &[u8],
-    start: usize,
-    len: usize,
-) {
-    let start_values = offsets[start].to_usize().unwrap();
-    let end_values = offsets[start + len].to_usize().unwrap();
-    let new_values = &values[start_values..end_values];
-    buffer.extend_from_slice(new_values);
-}
-
-pub(super) fn build_extend<T: OffsetSizeTrait>(array: &ArrayData) -> Extend {
-    let offsets = array.buffer::<T>(0);
-    let values = array.buffers()[1].as_slice();
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without null issues
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-                let values_buffer = &mut mutable.buffer2;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let last_offset = unsafe { get_last_offset(offset_buffer) };
-
-                extend_offsets::<T>(
-                    offset_buffer,
-                    last_offset,
-                    &offsets[start..start + len + 1],
-                );
-                // values
-                extend_offset_values::<T>(values_buffer, offsets, values, start, len);
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-                let values_buffer = &mut mutable.buffer2;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                // nulls present: append item by item, ignoring null entries
-                offset_buffer.reserve(len * std::mem::size_of::<T>());
-
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // compute the new offset
-                        let length = offsets[i + 1] - offsets[i];
-                        last_offset += length;
-
-                        // append value
-                        let bytes = &values[offsets[i].to_usize().unwrap()
-                            ..offsets[i + 1].to_usize().unwrap()];
-                        values_buffer.extend_from_slice(bytes);
-                    }
-                    // offsets are always present
-                    offset_buffer.push(last_offset);
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls<T: OffsetSizeTrait>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
-    let offset_buffer = &mut mutable.buffer1;
-
-    // this is safe due to how offset is built. See details on `get_last_offset`
-    let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-    (0..len).for_each(|_| offset_buffer.push(last_offset))
-}
diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs
deleted file mode 100644
index b977f550999..00000000000
--- a/rust/arrow/src/bitmap.rs
+++ /dev/null
@@ -1,157 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines a bitmap, which is used to track which values in an Arrow array are null.
-//! This is called a "validity bitmap" in the Arrow documentation.
-
-use crate::buffer::Buffer;
-use crate::error::Result;
-use crate::util::bit_util;
-use std::mem;
-
-use std::ops::{BitAnd, BitOr};
-
-#[derive(Debug, Clone)]
-pub struct Bitmap {
-    pub(crate) bits: Buffer,
-}
-
-impl Bitmap {
-    pub fn new(num_bits: usize) -> Self {
-        let num_bytes = num_bits / 8 + if num_bits % 8 > 0 { 1 } else { 0 };
-        let r = num_bytes % 64;
-        let len = if r == 0 {
-            num_bytes
-        } else {
-            num_bytes + 64 - r
-        };
-        Bitmap {
-            bits: Buffer::from(&vec![0xFF; len]),
-        }
-    }
-
-    pub fn len(&self) -> usize {
-        self.bits.len()
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.bits.is_empty()
-    }
-
-    pub fn is_set(&self, i: usize) -> bool {
-        assert!(i < (self.bits.len() << 3));
-        unsafe { bit_util::get_bit_raw(self.bits.as_ptr(), i) }
-    }
-
-    pub fn buffer_ref(&self) -> &Buffer {
-        &self.bits
-    }
-
-    pub fn into_buffer(self) -> Buffer {
-        self.bits
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [Bitmap].
-    pub fn get_buffer_memory_size(&self) -> usize {
-        self.bits.capacity()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [Bitmap].
-    pub fn get_array_memory_size(&self) -> usize {
-        self.bits.capacity() + mem::size_of_val(self)
-    }
-}
-
-impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {
-    type Output = Result<Bitmap>;
-
-    fn bitand(self, rhs: &'b Bitmap) -> Result<Bitmap> {
-        Ok(Bitmap::from((&self.bits & &rhs.bits)?))
-    }
-}
-
-impl<'a, 'b> BitOr<&'b Bitmap> for &'a Bitmap {
-    type Output = Result<Bitmap>;
-
-    fn bitor(self, rhs: &'b Bitmap) -> Result<Bitmap> {
-        Ok(Bitmap::from((&self.bits | &rhs.bits)?))
-    }
-}
-
-impl From<Buffer> for Bitmap {
-    fn from(buf: Buffer) -> Self {
-        Self { bits: buf }
-    }
-}
-
-impl PartialEq for Bitmap {
-    fn eq(&self, other: &Self) -> bool {
-        // buffer equality considers capacity, but here we want to only compare
-        // actual data contents
-        let self_len = self.bits.len();
-        let other_len = other.bits.len();
-        if self_len != other_len {
-            return false;
-        }
-        self.bits.as_slice()[..self_len] == other.bits.as_slice()[..self_len]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_bitmap_length() {
-        assert_eq!(64, Bitmap::new(63 * 8).len());
-        assert_eq!(64, Bitmap::new(64 * 8).len());
-        assert_eq!(128, Bitmap::new(65 * 8).len());
-    }
-
-    #[test]
-    fn test_bitwise_and() {
-        let bitmap1 = Bitmap::from(Buffer::from([0b01101010]));
-        let bitmap2 = Bitmap::from(Buffer::from([0b01001110]));
-        assert_eq!(
-            Bitmap::from(Buffer::from([0b01001010])),
-            (&bitmap1 & &bitmap2).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_bitwise_or() {
-        let bitmap1 = Bitmap::from(Buffer::from([0b01101010]));
-        let bitmap2 = Bitmap::from(Buffer::from([0b01001110]));
-        assert_eq!(
-            Bitmap::from(Buffer::from([0b01101110])),
-            (&bitmap1 | &bitmap2).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_bitmap_is_set() {
-        let bitmap = Bitmap::from(Buffer::from([0b01001010]));
-        assert_eq!(false, bitmap.is_set(0));
-        assert_eq!(true, bitmap.is_set(1));
-        assert_eq!(false, bitmap.is_set(2));
-        assert_eq!(true, bitmap.is_set(3));
-        assert_eq!(false, bitmap.is_set(4));
-        assert_eq!(false, bitmap.is_set(5));
-        assert_eq!(true, bitmap.is_set(6));
-        assert_eq!(false, bitmap.is_set(7));
-    }
-}
diff --git a/rust/arrow/src/buffer/immutable.rs b/rust/arrow/src/buffer/immutable.rs
deleted file mode 100644
index c09e4ddc48a..00000000000
--- a/rust/arrow/src/buffer/immutable.rs
+++ /dev/null
@@ -1,539 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt::Debug;
-use std::iter::FromIterator;
-use std::ptr::NonNull;
-use std::sync::Arc;
-use std::{convert::AsRef, usize};
-
-use crate::util::bit_chunk_iterator::BitChunks;
-use crate::{
-    bytes::{Bytes, Deallocation},
-    datatypes::ArrowNativeType,
-    ffi,
-};
-
-use super::ops::bitwise_unary_op_helper;
-use super::MutableBuffer;
-
-/// Buffer represents a contiguous memory region that can be shared with other buffers and across
-/// thread boundaries.
-#[derive(Clone, PartialEq, Debug)]
-pub struct Buffer {
-    /// the internal byte buffer.
-    data: Arc<Bytes>,
-
-    /// The offset into the buffer.
-    offset: usize,
-}
-
-impl Buffer {
-    /// Auxiliary method to create a new Buffer
-    #[inline]
-    pub fn from_bytes(bytes: Bytes) -> Self {
-        Buffer {
-            data: Arc::new(bytes),
-            offset: 0,
-        }
-    }
-
-    /// Initializes a [Buffer] from a slice of items.
-    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: &T) -> Self {
-        let slice = items.as_ref();
-        let len = slice.len();
-        let mut buffer = MutableBuffer::with_capacity(len);
-        buffer.extend_from_slice(slice);
-        buffer.into()
-    }
-
-    /// Creates a buffer from an existing memory region (must already be byte-aligned), this
-    /// `Buffer` will free this piece of memory when dropped.
-    ///
-    /// # Arguments
-    ///
-    /// * `ptr` - Pointer to raw parts
-    /// * `len` - Length of raw parts in **bytes**
-    /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
-    /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
-    pub unsafe fn from_raw_parts(ptr: NonNull<u8>, len: usize, capacity: usize) -> Self {
-        assert!(len <= capacity);
-        Buffer::build_with_arguments(ptr, len, Deallocation::Native(capacity))
-    }
-
-    /// Creates a buffer from an existing memory region (must already be byte-aligned), this
-    /// `Buffer` **does not** free this piece of memory when dropped.
-    ///
-    /// # Arguments
-    ///
-    /// * `ptr` - Pointer to raw parts
-    /// * `len` - Length of raw parts in **bytes**
-    /// * `data` - An [ffi::FFI_ArrowArray] with the data
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
-    /// bytes and that the foreign deallocator frees the region.
-    pub unsafe fn from_unowned(
-        ptr: NonNull<u8>,
-        len: usize,
-        data: Arc<ffi::FFI_ArrowArray>,
-    ) -> Self {
-        Buffer::build_with_arguments(ptr, len, Deallocation::Foreign(data))
-    }
-
-    /// Auxiliary method to create a new Buffer
-    unsafe fn build_with_arguments(
-        ptr: NonNull<u8>,
-        len: usize,
-        deallocation: Deallocation,
-    ) -> Self {
-        let bytes = Bytes::new(ptr, len, deallocation);
-        Buffer {
-            data: Arc::new(bytes),
-            offset: 0,
-        }
-    }
-
-    /// Returns the number of bytes in the buffer
-    pub fn len(&self) -> usize {
-        self.data.len() - self.offset
-    }
-
-    /// Returns the capacity of this buffer.
-    /// For exernally owned buffers, this returns zero
-    pub fn capacity(&self) -> usize {
-        self.data.capacity()
-    }
-
-    /// Returns whether the buffer is empty.
-    pub fn is_empty(&self) -> bool {
-        self.data.len() - self.offset == 0
-    }
-
-    /// Returns the byte slice stored in this buffer
-    pub fn as_slice(&self) -> &[u8] {
-        &self.data[self.offset..]
-    }
-
-    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
-    /// Doing so allows the same memory region to be shared between buffers.
-    /// # Panics
-    /// Panics iff `offset` is larger than `len`.
-    pub fn slice(&self, offset: usize) -> Self {
-        assert!(
-            offset <= self.len(),
-            "the offset of the new Buffer cannot exceed the existing length"
-        );
-        Self {
-            data: self.data.clone(),
-            offset: self.offset + offset,
-        }
-    }
-
-    /// Returns a pointer to the start of this buffer.
-    ///
-    /// Note that this should be used cautiously, and the returned pointer should not be
-    /// stored anywhere, to avoid dangling pointers.
-    pub fn as_ptr(&self) -> *const u8 {
-        unsafe { self.data.ptr().as_ptr().add(self.offset) }
-    }
-
-    /// View buffer as typed slice.
-    ///
-    /// # Safety
-    ///
-    /// `ArrowNativeType` is public so that it can be used as a trait bound for other public
-    /// components, such as the `ToByteSlice` trait.  However, this means that it can be
-    /// implemented by user defined types, which it is not intended for.
-    ///
-    /// Also `typed_data::<bool>` is unsafe as `0x00` and `0x01` are the only valid values for
-    /// `bool` in Rust.  However, `bool` arrays in Arrow are bit-packed which breaks this condition.
-    /// View buffer as typed slice.
-    pub unsafe fn typed_data<T: ArrowNativeType + num::Num>(&self) -> &[T] {
-        // JUSTIFICATION
-        //  Benefit
-        //      Many of the buffers represent specific types, and consumers of `Buffer` often need to re-interpret them.
-        //  Soundness
-        //      * The pointer is non-null by construction
-        //      * alignment asserted below.
-        let (prefix, offsets, suffix) = self.as_slice().align_to::<T>();
-        assert!(prefix.is_empty() && suffix.is_empty());
-        offsets
-    }
-
-    /// Returns a slice of this buffer starting at a certain bit offset.
-    /// If the offset is byte-aligned the returned buffer is a shallow clone,
-    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
-    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
-        if offset % 8 == 0 && len % 8 == 0 {
-            return self.slice(offset / 8);
-        }
-
-        bitwise_unary_op_helper(&self, offset, len, |a| a)
-    }
-
-    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
-    /// in larger chunks and starting at arbitrary bit offsets.
-    /// Note that both `offset` and `length` are measured in bits.
-    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
-        BitChunks::new(&self.as_slice(), offset, len)
-    }
-
-    /// Returns the number of 1-bits in this buffer.
-    pub fn count_set_bits(&self) -> usize {
-        let len_in_bits = self.len() * 8;
-        // self.offset is already taken into consideration by the bit_chunks implementation
-        self.count_set_bits_offset(0, len_in_bits)
-    }
-
-    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
-    /// inspected. Note that both `offset` and `length` are measured in bits.
-    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
-        let chunks = self.bit_chunks(offset, len);
-        let mut count = chunks.iter().map(|c| c.count_ones() as usize).sum();
-        count += chunks.remainder_bits().count_ones() as usize;
-
-        count
-    }
-}
-
-/// Creating a `Buffer` instance by copying the memory from a `AsRef<[u8]>` into a newly
-/// allocated memory region.
-impl<T: AsRef<[u8]>> From<T> for Buffer {
-    fn from(p: T) -> Self {
-        // allocate aligned memory buffer
-        let slice = p.as_ref();
-        let len = slice.len();
-        let mut buffer = MutableBuffer::new(len);
-        buffer.extend_from_slice(slice);
-        buffer.into()
-    }
-}
-
-/// Creating a `Buffer` instance by storing the boolean values into the buffer
-impl std::iter::FromIterator<bool> for Buffer {
-    fn from_iter<I>(iter: I) -> Self
-    where
-        I: IntoIterator<Item = bool>,
-    {
-        MutableBuffer::from_iter(iter).into()
-    }
-}
-
-impl std::ops::Deref for Buffer {
-    type Target = [u8];
-
-    fn deref(&self) -> &[u8] {
-        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
-    }
-}
-
-unsafe impl Sync for Buffer {}
-unsafe impl Send for Buffer {}
-
-impl From<MutableBuffer> for Buffer {
-    #[inline]
-    fn from(buffer: MutableBuffer) -> Self {
-        buffer.into_buffer()
-    }
-}
-
-impl Buffer {
-    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
-    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::Buffer;
-    /// let v = vec![1u32];
-    /// let iter = v.iter().map(|x| x * 2);
-    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
-    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
-    /// ```
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    // This implementation is required for two reasons:
-    // 1. there is no trait `TrustedLen` in stable rust and therefore
-    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
-    // 2. `from_trusted_len_iter` is faster.
-    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
-        iterator: I,
-    ) -> Self {
-        MutableBuffer::from_trusted_len_iter(iterator).into()
-    }
-
-    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
-    /// if any of the items of the iterator is an error.
-    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    pub unsafe fn try_from_trusted_len_iter<
-        E,
-        T: ArrowNativeType,
-        I: Iterator<Item = std::result::Result<T, E>>,
-    >(
-        iterator: I,
-    ) -> std::result::Result<Self, E> {
-        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
-    }
-}
-
-impl<T: ArrowNativeType> FromIterator<T> for Buffer {
-    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
-        let mut iterator = iter.into_iter();
-        let size = std::mem::size_of::<T>();
-
-        // first iteration, which will likely reserve sufficient space for the buffer.
-        let mut buffer = match iterator.next() {
-            None => MutableBuffer::new(0),
-            Some(element) => {
-                let (lower, _) = iterator.size_hint();
-                let mut buffer = MutableBuffer::new(lower.saturating_add(1) * size);
-                unsafe {
-                    std::ptr::write(buffer.as_mut_ptr() as *mut T, element);
-                    buffer.set_len(size);
-                }
-                buffer
-            }
-        };
-
-        buffer.extend_from_iter(iterator);
-        buffer.into()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::thread;
-
-    use super::*;
-
-    #[test]
-    fn test_buffer_data_equality() {
-        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
-        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
-        assert_eq!(buf1, buf2);
-
-        // slice with same offset should still preserve equality
-        let buf3 = buf1.slice(2);
-        assert_ne!(buf1, buf3);
-        let buf4 = buf2.slice(2);
-        assert_eq!(buf3, buf4);
-
-        // Different capacities should still preserve equality
-        let mut buf2 = MutableBuffer::new(65);
-        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
-
-        let buf2 = buf2.into();
-        assert_eq!(buf1, buf2);
-
-        // unequal because of different elements
-        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
-        assert_ne!(buf1, buf2);
-
-        // unequal because of different length
-        let buf2 = Buffer::from(&[0, 1, 2, 3]);
-        assert_ne!(buf1, buf2);
-    }
-
-    #[test]
-    fn test_from_raw_parts() {
-        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
-        assert_eq!(5, buf.len());
-        assert!(!buf.as_ptr().is_null());
-        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
-    }
-
-    #[test]
-    fn test_from_vec() {
-        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
-        assert_eq!(5, buf.len());
-        assert!(!buf.as_ptr().is_null());
-        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
-    }
-
-    #[test]
-    fn test_copy() {
-        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
-        let buf2 = buf;
-        assert_eq!(5, buf2.len());
-        assert_eq!(64, buf2.capacity());
-        assert!(!buf2.as_ptr().is_null());
-        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
-    }
-
-    #[test]
-    fn test_slice() {
-        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
-        let buf2 = buf.slice(2);
-
-        assert_eq!([6, 8, 10], buf2.as_slice());
-        assert_eq!(3, buf2.len());
-        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
-
-        let buf3 = buf2.slice(1);
-        assert_eq!([8, 10], buf3.as_slice());
-        assert_eq!(2, buf3.len());
-        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
-
-        let buf4 = buf.slice(5);
-        let empty_slice: [u8; 0] = [];
-        assert_eq!(empty_slice, buf4.as_slice());
-        assert_eq!(0, buf4.len());
-        assert!(buf4.is_empty());
-        assert_eq!(buf2.slice(2).as_slice(), &[10]);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "the offset of the new Buffer cannot exceed the existing length"
-    )]
-    fn test_slice_offset_out_of_bound() {
-        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
-        buf.slice(6);
-    }
-
-    #[test]
-    fn test_access_concurrently() {
-        let buffer = Buffer::from(vec![1, 2, 3, 4, 5]);
-        let buffer2 = buffer.clone();
-        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
-
-        let buffer_copy = thread::spawn(move || {
-            // access buffer in another thread.
-            buffer
-        })
-        .join();
-
-        assert!(buffer_copy.is_ok());
-        assert_eq!(buffer2, buffer_copy.ok().unwrap());
-    }
-
-    macro_rules! check_as_typed_data {
-        ($input: expr, $native_t: ty) => {{
-            let buffer = Buffer::from_slice_ref($input);
-            let slice: &[$native_t] = unsafe { buffer.typed_data::<$native_t>() };
-            assert_eq!($input, slice);
-        }};
-    }
-
-    #[test]
-    #[allow(clippy::float_cmp)]
-    fn test_as_typed_data() {
-        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
-        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
-        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
-        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
-        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
-        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
-        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
-        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
-        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
-        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
-    }
-
-    #[test]
-    fn test_count_bits() {
-        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits());
-        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits());
-        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits());
-        assert_eq!(6, Buffer::from(&[0b01001001, 0b01010010]).count_set_bits());
-        assert_eq!(16, Buffer::from(&[0b11111111, 0b11111111]).count_set_bits());
-    }
-
-    #[test]
-    fn test_count_bits_slice() {
-        assert_eq!(
-            0,
-            Buffer::from(&[0b11111111, 0b00000000])
-                .slice(1)
-                .count_set_bits()
-        );
-        assert_eq!(
-            8,
-            Buffer::from(&[0b11111111, 0b11111111])
-                .slice(1)
-                .count_set_bits()
-        );
-        assert_eq!(
-            3,
-            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
-                .slice(2)
-                .count_set_bits()
-        );
-        assert_eq!(
-            6,
-            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
-                .slice(1)
-                .count_set_bits()
-        );
-        assert_eq!(
-            16,
-            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
-                .slice(2)
-                .count_set_bits()
-        );
-    }
-
-    #[test]
-    fn test_count_bits_offset_slice() {
-        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
-        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
-        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
-        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
-        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
-        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
-        assert_eq!(
-            16,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
-        );
-        assert_eq!(
-            10,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
-        );
-        assert_eq!(
-            10,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
-        );
-        assert_eq!(
-            8,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
-        );
-        assert_eq!(
-            5,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
-        );
-        assert_eq!(
-            0,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
-        );
-        assert_eq!(
-            2,
-            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
-        );
-        assert_eq!(
-            4,
-            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
-        );
-    }
-}
diff --git a/rust/arrow/src/buffer/mod.rs b/rust/arrow/src/buffer/mod.rs
deleted file mode 100644
index cc5c63b1c37..00000000000
--- a/rust/arrow/src/buffer/mod.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains two main structs: [Buffer] and [MutableBuffer]. A buffer represents
-//! a contiguous memory region that can be shared via `offsets`.
-
-mod immutable;
-pub use immutable::*;
-mod mutable;
-pub use mutable::*;
-mod ops;
-pub(super) use ops::*;
-
-use crate::error::{ArrowError, Result};
-use std::ops::{BitAnd, BitOr, Not};
-
-impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
-    type Output = Result<Buffer>;
-
-    fn bitand(self, rhs: &'b Buffer) -> Result<Buffer> {
-        if self.len() != rhs.len() {
-            return Err(ArrowError::ComputeError(
-                "Buffers must be the same size to apply Bitwise AND.".to_string(),
-            ));
-        }
-
-        let len_in_bits = self.len() * 8;
-        Ok(buffer_bin_and(&self, 0, &rhs, 0, len_in_bits))
-    }
-}
-
-impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
-    type Output = Result<Buffer>;
-
-    fn bitor(self, rhs: &'b Buffer) -> Result<Buffer> {
-        if self.len() != rhs.len() {
-            return Err(ArrowError::ComputeError(
-                "Buffers must be the same size to apply Bitwise OR.".to_string(),
-            ));
-        }
-
-        let len_in_bits = self.len() * 8;
-
-        Ok(buffer_bin_or(&self, 0, &rhs, 0, len_in_bits))
-    }
-}
-
-impl Not for &Buffer {
-    type Output = Buffer;
-
-    fn not(self) -> Buffer {
-        let len_in_bits = self.len() * 8;
-        buffer_unary_not(&self, 0, len_in_bits)
-    }
-}
diff --git a/rust/arrow/src/buffer/mutable.rs b/rust/arrow/src/buffer/mutable.rs
deleted file mode 100644
index 3351be7d73d..00000000000
--- a/rust/arrow/src/buffer/mutable.rs
+++ /dev/null
@@ -1,745 +0,0 @@
-use std::ptr::NonNull;
-
-use crate::{
-    alloc,
-    bytes::{Bytes, Deallocation},
-    datatypes::{ArrowNativeType, ToByteSlice},
-    util::bit_util,
-};
-
-use super::Buffer;
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// A [`MutableBuffer`] is Arrow's interface to build a [`Buffer`] out of items or slices of items.
-/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to have its pointer aligned
-/// along cache lines and in multiple of 64 bytes.
-/// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
-/// to insert many items, and `into` to convert it to [`Buffer`].
-/// # Example
-/// ```
-/// # use arrow::buffer::{Buffer, MutableBuffer};
-/// let mut buffer = MutableBuffer::new(0);
-/// buffer.push(256u32);
-/// buffer.extend_from_slice(&[1u32]);
-/// let buffer: Buffer = buffer.into();
-/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0])
-/// ```
-#[derive(Debug)]
-pub struct MutableBuffer {
-    // dangling iff capacity = 0
-    data: NonNull<u8>,
-    // invariant: len <= capacity
-    len: usize,
-    capacity: usize,
-}
-
-impl MutableBuffer {
-    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        Self::with_capacity(capacity)
-    }
-
-    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
-    #[inline]
-    pub fn with_capacity(capacity: usize) -> Self {
-        let capacity = bit_util::round_upto_multiple_of_64(capacity);
-        let ptr = alloc::allocate_aligned(capacity);
-        Self {
-            data: ptr,
-            len: 0,
-            capacity,
-        }
-    }
-
-    /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
-    /// all bytes are guaranteed to be `0u8`.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::{Buffer, MutableBuffer};
-    /// let mut buffer = MutableBuffer::from_len_zeroed(127);
-    /// assert_eq!(buffer.len(), 127);
-    /// assert!(buffer.capacity() >= 127);
-    /// let data = buffer.as_slice_mut();
-    /// assert_eq!(data[126], 0u8);
-    /// ```
-    pub fn from_len_zeroed(len: usize) -> Self {
-        let new_capacity = bit_util::round_upto_multiple_of_64(len);
-        let ptr = alloc::allocate_aligned_zeroed(new_capacity);
-        Self {
-            data: ptr,
-            len,
-            capacity: new_capacity,
-        }
-    }
-
-    /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
-    /// This is useful to create a buffer for packed bitmaps.
-    pub fn new_null(len: usize) -> Self {
-        let num_bytes = bit_util::ceil(len, 8);
-        MutableBuffer::from_len_zeroed(num_bytes)
-    }
-
-    /// Set the bits in the range of `[0, end)` to 0 (if `val` is false), or 1 (if `val`
-    /// is true). Also extend the length of this buffer to be `end`.
-    ///
-    /// This is useful when one wants to clear (or set) the bits and then manipulate
-    /// the buffer directly (e.g., modifying the buffer by holding a mutable reference
-    /// from `data_mut()`).
-    pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
-        assert!(end <= self.capacity);
-        let v = if val { 255 } else { 0 };
-        unsafe {
-            std::ptr::write_bytes(self.data.as_ptr(), v, end);
-            self.len = end;
-        }
-        self
-    }
-
-    /// Ensure that `count` bytes from `start` contain zero bits
-    ///
-    /// This is used to initialize the bits in a buffer, however, it has no impact on the
-    /// `len` of the buffer and so can be used to initialize the memory region from
-    /// `len` to `capacity`.
-    pub fn set_null_bits(&mut self, start: usize, count: usize) {
-        assert!(start + count <= self.capacity);
-        unsafe {
-            std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count);
-        }
-    }
-
-    /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
-    /// `self.len + additional > capacity`.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::{Buffer, MutableBuffer};
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.reserve(253); // allocates for the first time
-    /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
-    /// let buffer: Buffer = buffer.into();
-    /// assert_eq!(buffer.len(), 253);
-    /// ```
-    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
-    // exits.
-    #[inline(always)]
-    pub fn reserve(&mut self, additional: usize) {
-        let required_cap = self.len + additional;
-        if required_cap > self.capacity {
-            // JUSTIFICATION
-            //  Benefit
-            //      necessity
-            //  Soundness
-            //      `self.data` is valid for `self.capacity`.
-            let (ptr, new_capacity) =
-                unsafe { reallocate(self.data, self.capacity, required_cap) };
-            self.data = ptr;
-            self.capacity = new_capacity;
-        }
-    }
-
-    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
-    /// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::{Buffer, MutableBuffer};
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.resize(253, 2); // allocates for the first time
-    /// assert_eq!(buffer.as_slice()[252], 2u8);
-    /// ```
-    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
-    // exits.
-    #[inline(always)]
-    pub fn resize(&mut self, new_len: usize, value: u8) {
-        if new_len > self.len {
-            let diff = new_len - self.len;
-            self.reserve(diff);
-            // write the value
-            unsafe { self.data.as_ptr().add(self.len).write_bytes(value, diff) };
-        }
-        // this truncates the buffer when new_len < self.len
-        self.len = new_len;
-    }
-
-    /// Returns whether this buffer is empty or not.
-    #[inline]
-    pub const fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns the length (the number of bytes written) in this buffer.
-    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
-    #[inline]
-    pub const fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns the total capacity in this buffer.
-    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
-    #[inline]
-    pub const fn capacity(&self) -> usize {
-        self.capacity
-    }
-
-    /// Clear all existing data from this buffer.
-    pub fn clear(&mut self) {
-        self.len = 0
-    }
-
-    /// Returns the data stored in this buffer as a slice.
-    pub fn as_slice(&self) -> &[u8] {
-        self
-    }
-
-    /// Returns the data stored in this buffer as a mutable slice.
-    pub fn as_slice_mut(&mut self) -> &mut [u8] {
-        self
-    }
-
-    /// Returns a raw pointer to this buffer's internal memory
-    /// This pointer is guaranteed to be aligned along cache-lines.
-    #[inline]
-    pub const fn as_ptr(&self) -> *const u8 {
-        self.data.as_ptr()
-    }
-
-    /// Returns a mutable raw pointer to this buffer's internal memory
-    /// This pointer is guaranteed to be aligned along cache-lines.
-    #[inline]
-    pub fn as_mut_ptr(&mut self) -> *mut u8 {
-        self.data.as_ptr()
-    }
-
-    #[deprecated(
-        since = "2.0.0",
-        note = "This method is deprecated in favour of `into` from the trait `Into`."
-    )]
-    /// Freezes this buffer and return an immutable version of it.
-    pub fn freeze(self) -> Buffer {
-        self.into_buffer()
-    }
-
-    #[inline]
-    pub(super) fn into_buffer(self) -> Buffer {
-        let bytes = unsafe {
-            Bytes::new(self.data, self.len, Deallocation::Native(self.capacity))
-        };
-        std::mem::forget(self);
-        Buffer::from_bytes(bytes)
-    }
-
-    /// View this buffer asa slice of a specific type.
-    /// # Safety
-    /// This function must only be used when this buffer was extended with items of type `T`.
-    /// Failure to do so results in undefined behavior.
-    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
-        unsafe {
-            let (prefix, offsets, suffix) = self.as_slice_mut().align_to_mut::<T>();
-            assert!(prefix.is_empty() && suffix.is_empty());
-            offsets
-        }
-    }
-
-    /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.extend_from_slice(&[2u32, 0]);
-    /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
-    /// ```
-    pub fn extend_from_slice<T: ToByteSlice>(&mut self, items: &[T]) {
-        let len = items.len();
-        let additional = len * std::mem::size_of::<T>();
-        self.reserve(additional);
-        unsafe {
-            let dst = self.data.as_ptr().add(self.len);
-            let src = items.as_ptr() as *const u8;
-            std::ptr::copy_nonoverlapping(src, dst, additional)
-        }
-        self.len += additional;
-    }
-
-    /// Extends the buffer with a new item, increasing its capacity if needed.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.push(256u32);
-    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
-    /// ```
-    #[inline]
-    pub fn push<T: ToByteSlice>(&mut self, item: T) {
-        let additional = std::mem::size_of::<T>();
-        self.reserve(additional);
-        unsafe {
-            let dst = self.data.as_ptr().add(self.len) as *mut T;
-            std::ptr::write(dst, item);
-        }
-        self.len += additional;
-    }
-
-    /// Extends the buffer with a new item, without checking for sufficient capacity
-    /// Safety
-    /// Caller must ensure that the capacity()-len()>=size_of<T>()
-    #[inline]
-    unsafe fn push_unchecked<T: ToByteSlice>(&mut self, item: T) {
-        let additional = std::mem::size_of::<T>();
-        let dst = self.data.as_ptr().add(self.len) as *mut T;
-        std::ptr::write(dst, item);
-        self.len += additional;
-    }
-
-    /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed.
-    #[inline]
-    pub fn extend_zeros(&mut self, additional: usize) {
-        self.resize(self.len + additional, 0);
-    }
-
-    /// # Safety
-    /// The caller must ensure that the buffer was properly initialized up to `len`.
-    #[inline]
-    pub(crate) unsafe fn set_len(&mut self, len: usize) {
-        assert!(len <= self.capacity());
-        self.len = len;
-    }
-}
-
-/// # Safety
-/// `ptr` must be allocated for `old_capacity`.
-#[inline]
-unsafe fn reallocate(
-    ptr: NonNull<u8>,
-    old_capacity: usize,
-    new_capacity: usize,
-) -> (NonNull<u8>, usize) {
-    let new_capacity = bit_util::round_upto_multiple_of_64(new_capacity);
-    let new_capacity = std::cmp::max(new_capacity, old_capacity * 2);
-    let ptr = alloc::reallocate(ptr, old_capacity, new_capacity);
-    (ptr, new_capacity)
-}
-
-impl<A: ArrowNativeType> Extend<A> for MutableBuffer {
-    #[inline]
-    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
-        let iterator = iter.into_iter();
-        self.extend_from_iter(iterator)
-    }
-}
-
-impl MutableBuffer {
-    #[inline]
-    pub(super) fn extend_from_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
-        &mut self,
-        mut iterator: I,
-    ) {
-        let size = std::mem::size_of::<T>();
-        let (lower, _) = iterator.size_hint();
-        let additional = lower * size;
-        self.reserve(additional);
-
-        // this is necessary because of https://github.com/rust-lang/rust/issues/32155
-        let mut len = SetLenOnDrop::new(&mut self.len);
-        let mut dst = unsafe { self.data.as_ptr().add(len.local_len) as *mut T };
-        let capacity = self.capacity;
-
-        while len.local_len + size <= capacity {
-            if let Some(item) = iterator.next() {
-                unsafe {
-                    std::ptr::write(dst, item);
-                    dst = dst.add(1);
-                }
-                len.local_len += size;
-            } else {
-                break;
-            }
-        }
-        drop(len);
-
-        iterator.for_each(|item| self.push(item));
-    }
-
-    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
-    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let v = vec![1u32];
-    /// let iter = v.iter().map(|x| x * 2);
-    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
-    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
-    /// ```
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    // This implementation is required for two reasons:
-    // 1. there is no trait `TrustedLen` in stable rust and therefore
-    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
-    // 2. `from_trusted_len_iter` is faster.
-    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
-        iterator: I,
-    ) -> Self {
-        let (_, upper) = iterator.size_hint();
-        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
-        let len = upper * std::mem::size_of::<T>();
-
-        let mut buffer = MutableBuffer::new(len);
-
-        let mut dst = buffer.data.as_ptr() as *mut T;
-        for item in iterator {
-            // note how there is no reserve here (compared with `extend_from_iter`)
-            std::ptr::write(dst, item);
-            dst = dst.add(1);
-        }
-        assert_eq!(
-            dst.offset_from(buffer.data.as_ptr() as *mut T) as usize,
-            upper,
-            "Trusted iterator length was not accurately reported"
-        );
-        buffer.len = len;
-        buffer
-    }
-
-    /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
-    /// # use arrow::buffer::MutableBuffer;
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let v = vec![false, true, false];
-    /// let iter = v.iter().map(|x| *x || true);
-    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
-    /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
-    /// ```
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    // This implementation is required for two reasons:
-    // 1. there is no trait `TrustedLen` in stable rust and therefore
-    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
-    // 2. `from_trusted_len_iter_bool` is faster.
-    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
-        mut iterator: I,
-    ) -> Self {
-        let (_, upper) = iterator.size_hint();
-        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
-
-        let mut result = {
-            let byte_capacity: usize = upper.saturating_add(7) / 8;
-            MutableBuffer::new(byte_capacity)
-        };
-
-        'a: loop {
-            let mut byte_accum: u8 = 0;
-            let mut mask: u8 = 1;
-
-            //collect (up to) 8 bits into a byte
-            while mask != 0 {
-                if let Some(value) = iterator.next() {
-                    byte_accum |= match value {
-                        true => mask,
-                        false => 0,
-                    };
-                    mask <<= 1;
-                } else {
-                    if mask != 1 {
-                        // Add last byte
-                        result.push_unchecked(byte_accum);
-                    }
-                    break 'a;
-                }
-            }
-
-            // Soundness: from_trusted_len
-            result.push_unchecked(byte_accum);
-        }
-        result
-    }
-
-    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
-    /// if any of the items of the iterator is an error.
-    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    pub unsafe fn try_from_trusted_len_iter<
-        E,
-        T: ArrowNativeType,
-        I: Iterator<Item = std::result::Result<T, E>>,
-    >(
-        iterator: I,
-    ) -> std::result::Result<Self, E> {
-        let (_, upper) = iterator.size_hint();
-        let upper = upper.expect("try_from_trusted_len_iter requires an upper limit");
-        let len = upper * std::mem::size_of::<T>();
-
-        let mut buffer = MutableBuffer::new(len);
-
-        let mut dst = buffer.data.as_ptr() as *mut T;
-        for item in iterator {
-            // note how there is no reserve here (compared with `extend_from_iter`)
-            std::ptr::write(dst, item?);
-            dst = dst.add(1);
-        }
-        assert_eq!(
-            dst.offset_from(buffer.data.as_ptr() as *mut T) as usize,
-            upper,
-            "Trusted iterator length was not accurately reported"
-        );
-        buffer.len = len;
-        Ok(buffer)
-    }
-}
-
-impl std::ops::Deref for MutableBuffer {
-    type Target = [u8];
-
-    fn deref(&self) -> &[u8] {
-        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) }
-    }
-}
-
-impl std::ops::DerefMut for MutableBuffer {
-    fn deref_mut(&mut self) -> &mut [u8] {
-        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
-    }
-}
-
-impl Drop for MutableBuffer {
-    fn drop(&mut self) {
-        unsafe { alloc::free_aligned(self.data, self.capacity) };
-    }
-}
-
-impl PartialEq for MutableBuffer {
-    fn eq(&self, other: &MutableBuffer) -> bool {
-        if self.len != other.len {
-            return false;
-        }
-        if self.capacity != other.capacity {
-            return false;
-        }
-        self.as_slice() == other.as_slice()
-    }
-}
-
-unsafe impl Sync for MutableBuffer {}
-unsafe impl Send for MutableBuffer {}
-
-struct SetLenOnDrop<'a> {
-    len: &'a mut usize,
-    local_len: usize,
-}
-
-impl<'a> SetLenOnDrop<'a> {
-    #[inline]
-    fn new(len: &'a mut usize) -> Self {
-        SetLenOnDrop {
-            local_len: *len,
-            len,
-        }
-    }
-}
-
-impl Drop for SetLenOnDrop<'_> {
-    #[inline]
-    fn drop(&mut self) {
-        *self.len = self.local_len;
-    }
-}
-
-/// Creating a `MutableBuffer` instance by setting bits according to the boolean values
-impl std::iter::FromIterator<bool> for MutableBuffer {
-    fn from_iter<I>(iter: I) -> Self
-    where
-        I: IntoIterator<Item = bool>,
-    {
-        let mut iterator = iter.into_iter();
-        let mut result = {
-            let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
-            MutableBuffer::new(byte_capacity)
-        };
-
-        loop {
-            let mut exhausted = false;
-            let mut byte_accum: u8 = 0;
-            let mut mask: u8 = 1;
-
-            //collect (up to) 8 bits into a byte
-            while mask != 0 {
-                if let Some(value) = iterator.next() {
-                    byte_accum |= match value {
-                        true => mask,
-                        false => 0,
-                    };
-                    mask <<= 1;
-                } else {
-                    exhausted = true;
-                    break;
-                }
-            }
-
-            // break if the iterator was exhausted before it provided a bool for this byte
-            if exhausted && mask == 1 {
-                break;
-            }
-
-            //ensure we have capacity to write the byte
-            if result.len() == result.capacity() {
-                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
-                let additional_byte_capacity = 1usize.saturating_add(
-                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
-                );
-                result.reserve(additional_byte_capacity)
-            }
-
-            // Soundness: capacity was allocated above
-            unsafe { result.push_unchecked(byte_accum) };
-            if exhausted {
-                break;
-            }
-        }
-        result
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_mutable_new() {
-        let buf = MutableBuffer::new(63);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(0, buf.len());
-        assert!(buf.is_empty());
-    }
-
-    #[test]
-    fn test_mutable_extend_from_slice() {
-        let mut buf = MutableBuffer::new(100);
-        buf.extend_from_slice(b"hello");
-        assert_eq!(5, buf.len());
-        assert_eq!(b"hello", buf.as_slice());
-
-        buf.extend_from_slice(b" world");
-        assert_eq!(11, buf.len());
-        assert_eq!(b"hello world", buf.as_slice());
-
-        buf.clear();
-        assert_eq!(0, buf.len());
-        buf.extend_from_slice(b"hello arrow");
-        assert_eq!(11, buf.len());
-        assert_eq!(b"hello arrow", buf.as_slice());
-    }
-
-    #[test]
-    fn mutable_extend_from_iter() {
-        let mut buf = MutableBuffer::new(0);
-        buf.extend(vec![1u32, 2]);
-        assert_eq!(8, buf.len());
-        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
-
-        buf.extend(vec![3u32, 4]);
-        assert_eq!(16, buf.len());
-        assert_eq!(
-            &[1u8, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
-            buf.as_slice()
-        );
-    }
-
-    #[test]
-    fn test_from_trusted_len_iter() {
-        let iter = vec![1u32, 2].into_iter();
-        let buf = unsafe { Buffer::from_trusted_len_iter(iter) };
-        assert_eq!(8, buf.len());
-        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
-    }
-
-    #[test]
-    fn test_mutable_reserve() {
-        let mut buf = MutableBuffer::new(1);
-        assert_eq!(64, buf.capacity());
-
-        // Reserving a smaller capacity should have no effect.
-        buf.reserve(10);
-        assert_eq!(64, buf.capacity());
-
-        buf.reserve(80);
-        assert_eq!(128, buf.capacity());
-
-        buf.reserve(129);
-        assert_eq!(256, buf.capacity());
-    }
-
-    #[test]
-    fn test_mutable_resize() {
-        let mut buf = MutableBuffer::new(1);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(0, buf.len());
-
-        buf.resize(20, 0);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(20, buf.len());
-
-        buf.resize(10, 0);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(10, buf.len());
-
-        buf.resize(100, 0);
-        assert_eq!(128, buf.capacity());
-        assert_eq!(100, buf.len());
-
-        buf.resize(30, 0);
-        assert_eq!(128, buf.capacity());
-        assert_eq!(30, buf.len());
-
-        buf.resize(0, 0);
-        assert_eq!(128, buf.capacity());
-        assert_eq!(0, buf.len());
-    }
-
-    #[test]
-    fn test_mutable_into() {
-        let mut buf = MutableBuffer::new(1);
-        buf.extend_from_slice(b"aaaa bbbb cccc dddd");
-        assert_eq!(19, buf.len());
-        assert_eq!(64, buf.capacity());
-        assert_eq!(b"aaaa bbbb cccc dddd", buf.as_slice());
-
-        let immutable_buf: Buffer = buf.into();
-        assert_eq!(19, immutable_buf.len());
-        assert_eq!(64, immutable_buf.capacity());
-        assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.as_slice());
-    }
-
-    #[test]
-    fn test_mutable_equal() {
-        let mut buf = MutableBuffer::new(1);
-        let mut buf2 = MutableBuffer::new(1);
-
-        buf.extend_from_slice(&[0xaa]);
-        buf2.extend_from_slice(&[0xaa, 0xbb]);
-        assert!(buf != buf2);
-
-        buf.extend_from_slice(&[0xbb]);
-        assert_eq!(buf, buf2);
-
-        buf2.reserve(65);
-        assert!(buf != buf2);
-    }
-}
diff --git a/rust/arrow/src/buffer/ops.rs b/rust/arrow/src/buffer/ops.rs
deleted file mode 100644
index fbcb9510944..00000000000
--- a/rust/arrow/src/buffer/ops.rs
+++ /dev/null
@@ -1,429 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[cfg(feature = "simd")]
-use crate::util::bit_util;
-#[cfg(feature = "simd")]
-use packed_simd::u8x64;
-
-#[cfg(feature = "avx512")]
-use crate::arch::avx512::*;
-use crate::util::bit_util::ceil;
-#[cfg(any(feature = "simd", feature = "avx512"))]
-use std::borrow::BorrowMut;
-
-use super::{Buffer, MutableBuffer};
-
-/// Apply a bitwise operation `simd_op` / `scalar_op` to two inputs using simd instructions and return the result as a Buffer.
-/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
-/// and the `scalar_op` gets applied to remaining bytes.
-/// Contrary to the non-simd version `bitwise_bin_op_helper`, the offset and length is specified in bytes
-/// and this version does not support operations starting at arbitrary bit offsets.
-#[cfg(simd)]
-pub fn bitwise_bin_op_simd_helper<F_SIMD, F_SCALAR>(
-    left: &Buffer,
-    left_offset: usize,
-    right: &Buffer,
-    right_offset: usize,
-    len: usize,
-    simd_op: F_SIMD,
-    scalar_op: F_SCALAR,
-) -> Buffer
-where
-    F_SIMD: Fn(u8x64, u8x64) -> u8x64,
-    F_SCALAR: Fn(u8, u8) -> u8,
-{
-    let mut result = MutableBuffer::new(len).with_bitset(len, false);
-    let lanes = u8x64::lanes();
-
-    let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
-    let mut right_chunks = right.as_slice()[right_offset..].chunks_exact(lanes);
-    let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-        .for_each(|(res, (left, right))| {
-            unsafe { bit_util::bitwise_bin_op_simd(&left, &right, res, &simd_op) };
-        });
-
-    result_chunks
-        .into_remainder()
-        .iter_mut()
-        .zip(
-            left_chunks
-                .remainder()
-                .iter()
-                .zip(right_chunks.remainder().iter()),
-        )
-        .for_each(|(res, (left, right))| {
-            *res = scalar_op(*left, *right);
-        });
-
-    result.into()
-}
-
-/// Apply a bitwise operation `simd_op` / `scalar_op` to one input using simd instructions and return the result as a Buffer.
-/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
-/// and the `scalar_op` gets applied to remaining bytes.
-/// Contrary to the non-simd version `bitwise_unary_op_helper`, the offset and length is specified in bytes
-/// and this version does not support operations starting at arbitrary bit offsets.
-#[cfg(simd)]
-pub fn bitwise_unary_op_simd_helper<F_SIMD, F_SCALAR>(
-    left: &Buffer,
-    left_offset: usize,
-    len: usize,
-    simd_op: F_SIMD,
-    scalar_op: F_SCALAR,
-) -> Buffer
-where
-    F_SIMD: Fn(u8x64) -> u8x64,
-    F_SCALAR: Fn(u8) -> u8,
-{
-    let mut result = MutableBuffer::new(len).with_bitset(len, false);
-    let lanes = u8x64::lanes();
-
-    let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
-    let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut())
-        .for_each(|(res, left)| unsafe {
-            let data_simd = u8x64::from_slice_unaligned_unchecked(left);
-            let simd_result = simd_op(data_simd);
-            simd_result.write_to_slice_unaligned_unchecked(res);
-        });
-
-    result_chunks
-        .into_remainder()
-        .iter_mut()
-        .zip(left_chunks.remainder().iter())
-        .for_each(|(res, left)| {
-            *res = scalar_op(*left);
-        });
-
-    result.into()
-}
-
-/// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
-/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
-pub fn bitwise_bin_op_helper<F>(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-    op: F,
-) -> Buffer
-where
-    F: Fn(u64, u64) -> u64,
-{
-    let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
-    let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
-
-    let chunks = left_chunks
-        .iter()
-        .zip(right_chunks.iter())
-        .map(|(left, right)| op(left, right));
-    // Soundness: `BitChunks` is a trusted len iterator
-    let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
-
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
-    buffer.extend_from_slice(rem);
-
-    buffer.into()
-}
-
-/// Apply a bitwise operation `op` to one input and return the result as a Buffer.
-/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
-pub fn bitwise_unary_op_helper<F>(
-    left: &Buffer,
-    offset_in_bits: usize,
-    len_in_bits: usize,
-    op: F,
-) -> Buffer
-where
-    F: Fn(u64) -> u64,
-{
-    // reserve capacity and set length so we can get a typed view of u64 chunks
-    let mut result =
-        MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
-
-    let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
-    let result_chunks = result.typed_data_mut::<u64>().iter_mut();
-
-    result_chunks
-        .zip(left_chunks.iter())
-        .for_each(|(res, left)| {
-            *res = op(left);
-        });
-
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
-    result.extend_from_slice(rem);
-
-    result.into()
-}
-
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        let len = len_in_bits / 8;
-        let left_offset = left_offset_in_bits / 8;
-        let right_offset = right_offset_in_bits / 8;
-
-        let mut result = MutableBuffer::new(len).with_bitset(len, false);
-
-        let mut left_chunks =
-            left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut right_chunks =
-            right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut result_chunks =
-            result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
-
-        result_chunks
-            .borrow_mut()
-            .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-            .for_each(|(res, (left, right))| unsafe {
-                avx512_bin_and(left, right, res);
-            });
-
-        result_chunks
-            .into_remainder()
-            .iter_mut()
-            .zip(
-                left_chunks
-                    .remainder()
-                    .iter()
-                    .zip(right_chunks.remainder().iter()),
-            )
-            .for_each(|(res, (left, right))| {
-                *res = *left & *right;
-            });
-
-        result.into()
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a & b,
-        )
-    }
-}
-
-#[cfg(all(feature = "simd", not(feature = "avx512")))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        bitwise_bin_op_simd_helper(
-            &left,
-            left_offset_in_bits / 8,
-            &right,
-            right_offset_in_bits / 8,
-            len_in_bits / 8,
-            |a, b| a & b,
-            |a, b| a & b,
-        )
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a & b,
-        )
-    }
-}
-
-// Note: do not target specific features like x86 without considering
-// other targets like wasm32, as those would fail to build
-#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    bitwise_bin_op_helper(
-        &left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a & b,
-    )
-}
-
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        let len = len_in_bits / 8;
-        let left_offset = left_offset_in_bits / 8;
-        let right_offset = right_offset_in_bits / 8;
-
-        let mut result = MutableBuffer::new(len).with_bitset(len, false);
-
-        let mut left_chunks =
-            left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut right_chunks =
-            right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut result_chunks =
-            result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
-
-        result_chunks
-            .borrow_mut()
-            .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-            .for_each(|(res, (left, right))| unsafe {
-                avx512_bin_or(left, right, res);
-            });
-
-        result_chunks
-            .into_remainder()
-            .iter_mut()
-            .zip(
-                left_chunks
-                    .remainder()
-                    .iter()
-                    .zip(right_chunks.remainder().iter()),
-            )
-            .for_each(|(res, (left, right))| {
-                *res = *left | *right;
-            });
-
-        result.into()
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a | b,
-        )
-    }
-}
-
-#[cfg(all(feature = "simd", not(feature = "avx512")))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        bitwise_bin_op_simd_helper(
-            &left,
-            left_offset_in_bits / 8,
-            &right,
-            right_offset_in_bits / 8,
-            len_in_bits / 8,
-            |a, b| a | b,
-            |a, b| a | b,
-        )
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a | b,
-        )
-    }
-}
-
-#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    bitwise_bin_op_helper(
-        &left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a | b,
-    )
-}
-
-pub fn buffer_unary_not(
-    left: &Buffer,
-    offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    // SIMD implementation if available and byte-aligned
-    #[cfg(simd)]
-    if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 {
-        return bitwise_unary_op_simd_helper(
-            &left,
-            offset_in_bits / 8,
-            len_in_bits / 8,
-            |a| !a,
-            |a| !a,
-        );
-    }
-    // Default implementation
-    #[allow(unreachable_code)]
-    {
-        bitwise_unary_op_helper(&left, offset_in_bits, len_in_bits, |a| !a)
-    }
-}
diff --git a/rust/arrow/src/bytes.rs b/rust/arrow/src/bytes.rs
deleted file mode 100644
index 38fa4439b42..00000000000
--- a/rust/arrow/src/bytes.rs
+++ /dev/null
@@ -1,159 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains an implementation of a contiguous immutable memory region that knows
-//! how to de-allocate itself, [`Bytes`].
-//! Note that this is a low-level functionality of this crate.
-
-use core::slice;
-use std::ptr::NonNull;
-use std::sync::Arc;
-use std::{fmt::Debug, fmt::Formatter};
-
-use crate::{alloc, ffi};
-
-/// Mode of deallocating memory regions
-pub enum Deallocation {
-    /// Native deallocation, using Rust deallocator with Arrow-specific memory aligment
-    Native(usize),
-    /// Foreign interface, via a callback
-    Foreign(Arc<ffi::FFI_ArrowArray>),
-}
-
-impl Debug for Deallocation {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        match self {
-            Deallocation::Native(capacity) => {
-                write!(f, "Deallocation::Native {{ capacity: {} }}", capacity)
-            }
-            Deallocation::Foreign(_) => {
-                write!(f, "Deallocation::Foreign {{ capacity: unknown }}")
-            }
-        }
-    }
-}
-
-/// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself.
-/// This structs' API is inspired by the `bytes::Bytes`, but it is not limited to using rust's
-/// global allocator nor u8 aligmnent.
-///
-/// In the most common case, this buffer is allocated using [`allocate_aligned`](memory::allocate_aligned)
-/// and deallocated accordingly [`free_aligned`](memory::free_aligned).
-/// When the region is allocated by an foreign allocator, [Deallocation::Foreign], this calls the
-/// foreign deallocator to deallocate the region when it is no longer needed.
-pub struct Bytes {
-    /// The raw pointer to be begining of the region
-    ptr: NonNull<u8>,
-
-    /// The number of bytes visible to this region. This is always smaller than its capacity (when avaliable).
-    len: usize,
-
-    /// how to deallocate this region
-    deallocation: Deallocation,
-}
-
-impl Bytes {
-    /// Takes ownership of an allocated memory region,
-    ///
-    /// # Arguments
-    ///
-    /// * `ptr` - Pointer to raw parts
-    /// * `len` - Length of raw parts in **bytes**
-    /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
-    /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
-    #[inline]
-    pub unsafe fn new(
-        ptr: std::ptr::NonNull<u8>,
-        len: usize,
-        deallocation: Deallocation,
-    ) -> Bytes {
-        Bytes {
-            ptr,
-            len,
-            deallocation,
-        }
-    }
-
-    fn as_slice(&self) -> &[u8] {
-        self
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    #[inline]
-    pub fn ptr(&self) -> NonNull<u8> {
-        self.ptr
-    }
-
-    pub fn capacity(&self) -> usize {
-        match self.deallocation {
-            Deallocation::Native(capacity) => capacity,
-            // we cannot determine this in general,
-            // and thus we state that this is externally-owned memory
-            Deallocation::Foreign(_) => 0,
-        }
-    }
-}
-
-impl Drop for Bytes {
-    #[inline]
-    fn drop(&mut self) {
-        match &self.deallocation {
-            Deallocation::Native(capacity) => {
-                unsafe { alloc::free_aligned::<u8>(self.ptr, *capacity) };
-            }
-            // foreign interface knows how to deallocate itself.
-            Deallocation::Foreign(_) => (),
-        }
-    }
-}
-
-impl std::ops::Deref for Bytes {
-    type Target = [u8];
-
-    fn deref(&self) -> &[u8] {
-        unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
-    }
-}
-
-impl PartialEq for Bytes {
-    fn eq(&self, other: &Bytes) -> bool {
-        self.as_slice() == other.as_slice()
-    }
-}
-
-impl Debug for Bytes {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "Bytes {{ ptr: {:?}, len: {}, data: ", self.ptr, self.len,)?;
-
-        f.debug_list().entries(self.iter()).finish()?;
-
-        write!(f, " }}")
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/aggregate.rs b/rust/arrow/src/compute/kernels/aggregate.rs
deleted file mode 100644
index d0e3f22f541..00000000000
--- a/rust/arrow/src/compute/kernels/aggregate.rs
+++ /dev/null
@@ -1,975 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines aggregations over Arrow arrays.
-
-use std::ops::Add;
-
-use crate::array::{
-    Array, BooleanArray, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait,
-};
-use crate::datatypes::{ArrowNativeType, ArrowNumericType};
-
-/// Generic test for NaN, the optimizer should be able to remove this for integer types.
-#[inline]
-fn is_nan<T: ArrowNativeType + PartialOrd + Copy>(a: T) -> bool {
-    #[allow(clippy::eq_op)]
-    !(a == a)
-}
-
-/// Helper macro to perform min/max of strings
-fn min_max_string<T: StringOffsetSizeTrait, F: Fn(&str, &str) -> bool>(
-    array: &GenericStringArray<T>,
-    cmp: F,
-) -> Option<&str> {
-    let null_count = array.null_count();
-
-    if null_count == array.len() {
-        return None;
-    }
-    let data = array.data();
-    let mut n;
-    if null_count == 0 {
-        n = array.value(0);
-        for i in 1..data.len() {
-            let item = array.value(i);
-            if cmp(&n, item) {
-                n = item;
-            }
-        }
-    } else {
-        n = "";
-        let mut has_value = false;
-
-        for i in 0..data.len() {
-            let item = array.value(i);
-            if data.is_valid(i) && (!has_value || cmp(&n, item)) {
-                has_value = true;
-                n = item;
-            }
-        }
-    }
-    Some(n)
-}
-
-/// Returns the minimum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-#[cfg(not(simd))]
-pub fn min<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    T::Native: ArrowNativeType,
-{
-    min_max_helper(array, |a, b| (is_nan(*a) & !is_nan(*b)) || a > b)
-}
-
-/// Returns the maximum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-#[cfg(not(simd))]
-pub fn max<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    T::Native: ArrowNativeType,
-{
-    min_max_helper(array, |a, b| (!is_nan(*a) & is_nan(*b)) || a < b)
-}
-
-/// Returns the maximum value in the string array, according to the natural order.
-pub fn max_string<T: StringOffsetSizeTrait>(
-    array: &GenericStringArray<T>,
-) -> Option<&str> {
-    min_max_string(array, |a, b| a < b)
-}
-
-/// Returns the minimum value in the string array, according to the natural order.
-pub fn min_string<T: StringOffsetSizeTrait>(
-    array: &GenericStringArray<T>,
-) -> Option<&str> {
-    min_max_string(array, |a, b| a > b)
-}
-
-/// Helper function to perform min/max lambda function on values from a numeric array.
-fn min_max_helper<T, F>(array: &PrimitiveArray<T>, cmp: F) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    F: Fn(&T::Native, &T::Native) -> bool,
-{
-    let null_count = array.null_count();
-
-    // Includes case array.len() == 0
-    if null_count == array.len() {
-        return None;
-    }
-
-    let data = array.data();
-    let m = array.values();
-    let mut n;
-
-    if null_count == 0 {
-        // optimized path for arrays without null values
-        n = m[1..]
-            .iter()
-            .fold(m[0], |max, item| if cmp(&max, item) { *item } else { max });
-    } else {
-        n = T::default_value();
-        let mut has_value = false;
-        for (i, item) in m.iter().enumerate() {
-            if data.is_valid(i) && (!has_value || cmp(&n, item)) {
-                has_value = true;
-                n = *item
-            }
-        }
-    }
-    Some(n)
-}
-
-/// Returns the minimum value in the boolean array.
-///
-/// ```
-/// use arrow::{
-///   array::BooleanArray,
-///   compute::min_boolean,
-/// };
-///
-/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
-/// assert_eq!(min_boolean(&a), Some(false))
-/// ```
-pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
-    // short circuit if all nulls / zero length array
-    if array.null_count() == array.len() {
-        return None;
-    }
-
-    // Note the min bool is false (0), so short circuit as soon as we see it
-    array
-        .iter()
-        .find(|&b| b == Some(false))
-        .flatten()
-        .or(Some(true))
-}
-
-/// Returns the maximum value in the boolean array
-///
-/// ```
-/// use arrow::{
-///   array::BooleanArray,
-///   compute::max_boolean,
-/// };
-///
-/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
-/// assert_eq!(max_boolean(&a), Some(true))
-/// ```
-pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
-    // short circuit if all nulls / zero length array
-    if array.null_count() == array.len() {
-        return None;
-    }
-
-    // Note the max bool is true (1), so short circuit as soon as we see it
-    array
-        .iter()
-        .find(|&b| b == Some(true))
-        .flatten()
-        .or(Some(false))
-}
-
-/// Returns the sum of values in the array.
-///
-/// Returns `None` if the array is empty or only contains null values.
-#[cfg(not(simd))]
-pub fn sum<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    T::Native: Add<Output = T::Native>,
-{
-    let null_count = array.null_count();
-
-    if null_count == array.len() {
-        return None;
-    }
-
-    let data: &[T::Native] = array.values();
-
-    match array.data().null_buffer() {
-        None => {
-            let sum = data.iter().fold(T::default_value(), |accumulator, value| {
-                accumulator + *value
-            });
-
-            Some(sum)
-        }
-        Some(buffer) => {
-            let mut sum = T::default_value();
-            let data_chunks = data.chunks_exact(64);
-            let remainder = data_chunks.remainder();
-
-            let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
-            data_chunks
-                .zip(bit_chunks.iter())
-                .for_each(|(chunk, mask)| {
-                    // index_mask has value 1 << i in the loop
-                    let mut index_mask = 1;
-                    chunk.iter().for_each(|value| {
-                        if (mask & index_mask) != 0 {
-                            sum = sum + *value;
-                        }
-                        index_mask <<= 1;
-                    });
-                });
-
-            let remainder_bits = bit_chunks.remainder_bits();
-
-            remainder.iter().enumerate().for_each(|(i, value)| {
-                if remainder_bits & (1 << i) != 0 {
-                    sum = sum + *value;
-                }
-            });
-
-            Some(sum)
-        }
-    }
-}
-
-#[cfg(simd)]
-mod simd {
-    use super::is_nan;
-    use crate::array::{Array, PrimitiveArray};
-    use crate::datatypes::ArrowNumericType;
-    use std::marker::PhantomData;
-    use std::ops::Add;
-
-    pub(super) trait SimdAggregate<T: ArrowNumericType> {
-        type ScalarAccumulator;
-        type SimdAccumulator;
-
-        /// Returns the accumulator for aggregating scalar values
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator;
-
-        /// Returns the accumulator for aggregating simd chunks of values
-        fn init_accumulator_chunk() -> Self::SimdAccumulator;
-
-        /// Updates the accumulator with the values of one chunk
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        );
-
-        /// Updates the accumulator with the values of one chunk according to the given vector mask
-        fn accumulate_chunk_nullable(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-            mask: T::SimdMask,
-        );
-
-        /// Updates the accumulator with one value
-        fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native);
-
-        /// Reduces the vector lanes of the simd accumulator and the scalar accumulator to a single value
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native>;
-    }
-
-    pub(super) struct SumAggregate<T: ArrowNumericType> {
-        phantom: PhantomData<T>,
-    }
-
-    impl<T: ArrowNumericType> SimdAggregate<T> for SumAggregate<T>
-    where
-        T::Native: Add<Output = T::Native>,
-    {
-        type ScalarAccumulator = T::Native;
-        type SimdAccumulator = T::Simd;
-
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator {
-            T::default_value()
-        }
-
-        fn init_accumulator_chunk() -> Self::SimdAccumulator {
-            T::init(Self::init_accumulator_scalar())
-        }
-
-        fn accumulate_chunk_non_null(accumulator: &mut T::Simd, chunk: T::Simd) {
-            *accumulator = *accumulator + chunk;
-        }
-
-        fn accumulate_chunk_nullable(
-            accumulator: &mut T::Simd,
-            chunk: T::Simd,
-            vecmask: T::SimdMask,
-        ) {
-            let zero = T::init(T::default_value());
-            let blended = T::mask_select(vecmask, chunk, zero);
-
-            *accumulator = *accumulator + blended;
-        }
-
-        fn accumulate_scalar(accumulator: &mut T::Native, value: T::Native) {
-            *accumulator = *accumulator + value
-        }
-
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native> {
-            // we can't use T::lanes() as the slice len because it is not const,
-            // instead always reserve the maximum number of lanes
-            let mut tmp = [T::default_value(); 64];
-            let slice = &mut tmp[0..T::lanes()];
-            T::write(simd_accumulator, slice);
-
-            let mut reduced = Self::init_accumulator_scalar();
-            slice
-                .iter()
-                .for_each(|value| Self::accumulate_scalar(&mut reduced, *value));
-
-            Self::accumulate_scalar(&mut reduced, scalar_accumulator);
-
-            // result can not be None because we checked earlier for the null count
-            Some(reduced)
-        }
-    }
-
-    pub(super) struct MinAggregate<T: ArrowNumericType> {
-        phantom: PhantomData<T>,
-    }
-
-    impl<T: ArrowNumericType> SimdAggregate<T> for MinAggregate<T>
-    where
-        T::Native: PartialOrd,
-    {
-        type ScalarAccumulator = (T::Native, bool);
-        type SimdAccumulator = (T::Simd, T::SimdMask);
-
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator {
-            (T::default_value(), false)
-        }
-
-        fn init_accumulator_chunk() -> Self::SimdAccumulator {
-            (T::init(T::default_value()), T::mask_init(false))
-        }
-
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        ) {
-            let acc_is_nan = !T::eq(accumulator.0, accumulator.0);
-            let is_lt = acc_is_nan | T::lt(chunk, accumulator.0);
-            let first_or_lt = !accumulator.1 | is_lt;
-
-            accumulator.0 = T::mask_select(first_or_lt, chunk, accumulator.0);
-            accumulator.1 = T::mask_init(true);
-        }
-
-        fn accumulate_chunk_nullable(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-            vecmask: T::SimdMask,
-        ) {
-            let acc_is_nan = !T::eq(accumulator.0, accumulator.0);
-            let is_lt = vecmask & (acc_is_nan | T::lt(chunk, accumulator.0));
-            let first_or_lt = !accumulator.1 | is_lt;
-
-            accumulator.0 = T::mask_select(first_or_lt, chunk, accumulator.0);
-            accumulator.1 |= vecmask;
-        }
-
-        fn accumulate_scalar(
-            accumulator: &mut Self::ScalarAccumulator,
-            value: T::Native,
-        ) {
-            if !accumulator.1 {
-                accumulator.0 = value;
-            } else {
-                let acc_is_nan = is_nan(accumulator.0);
-                if acc_is_nan || value < accumulator.0 {
-                    accumulator.0 = value
-                }
-            }
-            accumulator.1 = true
-        }
-
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native> {
-            // we can't use T::lanes() as the slice len because it is not const,
-            // instead always reserve the maximum number of lanes
-            let mut tmp = [T::default_value(); 64];
-            let slice = &mut tmp[0..T::lanes()];
-            T::write(simd_accumulator.0, slice);
-
-            let mut reduced = Self::init_accumulator_scalar();
-            slice
-                .iter()
-                .enumerate()
-                .filter(|(i, _value)| T::mask_get(&simd_accumulator.1, *i))
-                .for_each(|(_i, value)| Self::accumulate_scalar(&mut reduced, *value));
-
-            if scalar_accumulator.1 {
-                Self::accumulate_scalar(&mut reduced, scalar_accumulator.0);
-            }
-
-            if reduced.1 {
-                Some(reduced.0)
-            } else {
-                None
-            }
-        }
-    }
-
-    pub(super) struct MaxAggregate<T: ArrowNumericType> {
-        phantom: PhantomData<T>,
-    }
-
-    impl<T: ArrowNumericType> SimdAggregate<T> for MaxAggregate<T>
-    where
-        T::Native: PartialOrd,
-    {
-        type ScalarAccumulator = (T::Native, bool);
-        type SimdAccumulator = (T::Simd, T::SimdMask);
-
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator {
-            (T::default_value(), false)
-        }
-
-        fn init_accumulator_chunk() -> Self::SimdAccumulator {
-            (T::init(T::default_value()), T::mask_init(false))
-        }
-
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        ) {
-            let chunk_is_nan = !T::eq(chunk, chunk);
-            let is_gt = chunk_is_nan | T::gt(chunk, accumulator.0);
-            let first_or_gt = !accumulator.1 | is_gt;
-
-            accumulator.0 = T::mask_select(first_or_gt, chunk, accumulator.0);
-            accumulator.1 = T::mask_init(true);
-        }
-
-        fn accumulate_chunk_nullable(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-            vecmask: T::SimdMask,
-        ) {
-            let chunk_is_nan = !T::eq(chunk, chunk);
-            let is_gt = vecmask & (chunk_is_nan | T::gt(chunk, accumulator.0));
-            let first_or_gt = !accumulator.1 | is_gt;
-
-            accumulator.0 = T::mask_select(first_or_gt, chunk, accumulator.0);
-            accumulator.1 |= vecmask;
-        }
-
-        fn accumulate_scalar(
-            accumulator: &mut Self::ScalarAccumulator,
-            value: T::Native,
-        ) {
-            if !accumulator.1 {
-                accumulator.0 = value;
-            } else {
-                let value_is_nan = is_nan(value);
-                if value_is_nan || value > accumulator.0 {
-                    accumulator.0 = value
-                }
-            }
-            accumulator.1 = true;
-        }
-
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native> {
-            // we can't use T::lanes() as the slice len because it is not const,
-            // instead always reserve the maximum number of lanes
-            let mut tmp = [T::default_value(); 64];
-            let slice = &mut tmp[0..T::lanes()];
-            T::write(simd_accumulator.0, slice);
-
-            let mut reduced = Self::init_accumulator_scalar();
-            slice
-                .iter()
-                .enumerate()
-                .filter(|(i, _value)| T::mask_get(&simd_accumulator.1, *i))
-                .for_each(|(_i, value)| Self::accumulate_scalar(&mut reduced, *value));
-
-            if scalar_accumulator.1 {
-                Self::accumulate_scalar(&mut reduced, scalar_accumulator.0);
-            }
-
-            if reduced.1 {
-                Some(reduced.0)
-            } else {
-                None
-            }
-        }
-    }
-
-    pub(super) fn simd_aggregation<T: ArrowNumericType, A: SimdAggregate<T>>(
-        array: &PrimitiveArray<T>,
-    ) -> Option<T::Native> {
-        let null_count = array.null_count();
-
-        if null_count == array.len() {
-            return None;
-        }
-
-        let data: &[T::Native] = array.values();
-
-        let mut chunk_acc = A::init_accumulator_chunk();
-        let mut rem_acc = A::init_accumulator_scalar();
-
-        match array.data().null_buffer() {
-            None => {
-                let data_chunks = data.chunks_exact(64);
-                let remainder = data_chunks.remainder();
-
-                data_chunks.for_each(|chunk| {
-                    chunk.chunks_exact(T::lanes()).for_each(|chunk| {
-                        let chunk = T::load(&chunk);
-                        A::accumulate_chunk_non_null(&mut chunk_acc, chunk);
-                    });
-                });
-
-                remainder.iter().for_each(|value| {
-                    A::accumulate_scalar(&mut rem_acc, *value);
-                });
-            }
-            Some(buffer) => {
-                // process data in chunks of 64 elements since we also get 64 bits of validity information at a time
-                let data_chunks = data.chunks_exact(64);
-                let remainder = data_chunks.remainder();
-
-                let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
-                let remainder_bits = bit_chunks.remainder_bits();
-
-                data_chunks.zip(bit_chunks).for_each(|(chunk, mut mask)| {
-                    // split chunks further into slices corresponding to the vector length
-                    // the compiler is able to unroll this inner loop and remove bounds checks
-                    // since the outer chunk size (64) is always a multiple of the number of lanes
-                    chunk.chunks_exact(T::lanes()).for_each(|chunk| {
-                        let vecmask = T::mask_from_u64(mask);
-                        let chunk = T::load(&chunk);
-
-                        A::accumulate_chunk_nullable(&mut chunk_acc, chunk, vecmask);
-
-                        // skip the shift and avoid overflow for u8 type, which uses 64 lanes.
-                        mask >>= T::lanes() % 64;
-                    });
-                });
-
-                remainder.iter().enumerate().for_each(|(i, value)| {
-                    if remainder_bits & (1 << i) != 0 {
-                        A::accumulate_scalar(&mut rem_acc, *value)
-                    }
-                });
-            }
-        }
-
-        A::reduce(chunk_acc, rem_acc)
-    }
-}
-
-/// Returns the sum of values in the array.
-///
-/// Returns `None` if the array is empty or only contains null values.
-#[cfg(simd)]
-pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T::Native: Add<Output = T::Native>,
-{
-    use simd::*;
-
-    simd::simd_aggregation::<T, SumAggregate<T>>(&array)
-}
-
-#[cfg(simd)]
-/// Returns the minimum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T::Native: PartialOrd,
-{
-    use simd::*;
-
-    simd::simd_aggregation::<T, MinAggregate<T>>(&array)
-}
-
-#[cfg(simd)]
-/// Returns the maximum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T::Native: PartialOrd,
-{
-    use simd::*;
-
-    simd::simd_aggregation::<T, MaxAggregate<T>>(&array)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::*;
-    use crate::compute::add;
-
-    #[test]
-    fn test_primitive_array_sum() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        assert_eq!(15, sum(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_array_float_sum() {
-        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
-        assert!(16.5 - sum(&a).unwrap() < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_primitive_array_sum_with_nulls() {
-        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
-        assert_eq!(10, sum(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_array_sum_all_nulls() {
-        let a = Int32Array::from(vec![None, None, None]);
-        assert_eq!(None, sum(&a));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_64() {
-        let a: Int64Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(i) } else { None })
-            .collect();
-        let b: Int64Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_32() {
-        let a: Int32Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(i) } else { None })
-            .collect();
-        let b: Int32Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_16() {
-        let a: Int16Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(i) } else { None })
-            .collect();
-        let b: Int16Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_8() {
-        // include fewer values than other large tests so the result does not overflow the u8
-        let a: UInt8Array = (1..=100)
-            .map(|i| if i % 33 == 0 { Some(i) } else { None })
-            .collect();
-        let b: UInt8Array = (1..=100)
-            .map(|i| if i % 33 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 33 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_min_max() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        assert_eq!(5, min(&a).unwrap());
-        assert_eq!(9, max(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_array_min_max_with_nulls() {
-        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
-        assert_eq!(5, min(&a).unwrap());
-        assert_eq!(9, max(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_min_max_1() {
-        let a = Int32Array::from(vec![None, None, Some(5), Some(2)]);
-        assert_eq!(Some(2), min(&a));
-        assert_eq!(Some(5), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_large_nonnull_array() {
-        let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect();
-        // min/max are on boundaries of chunked data
-        assert_eq!(Some(1.0), min(&a));
-        assert_eq!(Some(256.0), max(&a));
-
-        // max is last value in remainder after chunking
-        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(255.0), max(&a));
-
-        // max is first value in remainder after chunking
-        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(257.0), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_large_nullable_array() {
-        let a: Float64Array = (0..256)
-            .map(|i| {
-                if (i + 1) % 3 == 0 {
-                    None
-                } else {
-                    Some((i + 1) as f64)
-                }
-            })
-            .collect();
-        // min/max are on boundaries of chunked data
-        assert_eq!(Some(1.0), min(&a));
-        assert_eq!(Some(256.0), max(&a));
-
-        let a: Float64Array = (0..256)
-            .map(|i| {
-                if i == 0 || i == 255 {
-                    None
-                } else {
-                    Some((i + 1) as f64)
-                }
-            })
-            .collect();
-        // boundaries of chunked data are null
-        assert_eq!(Some(2.0), min(&a));
-        assert_eq!(Some(255.0), max(&a));
-
-        let a: Float64Array = (0..256)
-            .map(|i| if i != 100 { None } else { Some((i) as f64) })
-            .collect();
-        // a single non-null value somewhere in the middle
-        assert_eq!(Some(100.0), min(&a));
-        assert_eq!(Some(100.0), max(&a));
-
-        // max is last value in remainder after chunking
-        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(255.0), max(&a));
-
-        // max is first value in remainder after chunking
-        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(257.0), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_edge_cases() {
-        let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect();
-        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
-        assert_eq!(Some(f64::NEG_INFINITY), max(&a));
-
-        let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect();
-        assert_eq!(Some(f64::MIN), min(&a));
-        assert_eq!(Some(f64::MIN), max(&a));
-
-        let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect();
-        assert_eq!(Some(f64::MAX), min(&a));
-        assert_eq!(Some(f64::MAX), max(&a));
-
-        let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect();
-        assert_eq!(Some(f64::INFINITY), min(&a));
-        assert_eq!(Some(f64::INFINITY), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_all_nans_non_null() {
-        let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect();
-        assert!(max(&a).unwrap().is_nan());
-        assert!(min(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_first_nan_nonnull() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 0 {
-                    Some(f64::NAN)
-                } else {
-                    Some(i as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_last_nan_nonnull() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 99 {
-                    Some(f64::NAN)
-                } else {
-                    Some((i + 1) as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_first_nan_nullable() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 0 {
-                    Some(f64::NAN)
-                } else if i % 2 == 0 {
-                    None
-                } else {
-                    Some(i as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_last_nan_nullable() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 99 {
-                    Some(f64::NAN)
-                } else if i % 2 == 0 {
-                    None
-                } else {
-                    Some(i as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_inf_and_nans() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                let x = match i % 10 {
-                    0 => f64::NEG_INFINITY,
-                    1 => f64::MIN,
-                    2 => f64::MAX,
-                    4 => f64::INFINITY,
-                    5 => f64::NAN,
-                    _ => i as f64,
-                };
-                Some(x)
-            })
-            .collect();
-        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_string_min_max_with_nulls() {
-        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), Some("c")]);
-        assert_eq!("a", min_string(&a).unwrap());
-        assert_eq!("c", max_string(&a).unwrap());
-    }
-
-    #[test]
-    fn test_string_min_max_all_nulls() {
-        let a = StringArray::from(vec![None, None]);
-        assert_eq!(None, min_string(&a));
-        assert_eq!(None, max_string(&a));
-    }
-
-    #[test]
-    fn test_string_min_max_1() {
-        let a = StringArray::from(vec![None, None, Some("b"), Some("a")]);
-        assert_eq!(Some("a"), min_string(&a));
-        assert_eq!(Some("b"), max_string(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_empty() {
-        let a = BooleanArray::from(vec![] as Vec<Option<bool>>);
-        assert_eq!(None, min_boolean(&a));
-        assert_eq!(None, max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_all_null() {
-        let a = BooleanArray::from(vec![None, None]);
-        assert_eq!(None, min_boolean(&a));
-        assert_eq!(None, max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_no_null() {
-        let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max() {
-        let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-
-        let a =
-            BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_smaller() {
-        let a = BooleanArray::from(vec![Some(false)]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(false), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![None, Some(false)]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(false), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![None, Some(true)]);
-        assert_eq!(Some(true), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![Some(true)]);
-        assert_eq!(Some(true), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs b/rust/arrow/src/compute/kernels/arithmetic.rs
deleted file mode 100644
index d7aadf144d4..00000000000
--- a/rust/arrow/src/compute/kernels/arithmetic.rs
+++ /dev/null
@@ -1,1009 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines basic arithmetic kernels for `PrimitiveArrays`.
-//!
-//! These kernels can leverage SIMD if available on your system.  Currently no runtime
-//! detection is provided, you should enable the specific SIMD intrinsics using
-//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
-//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
-
-use std::ops::{Add, Div, Mul, Neg, Sub};
-
-use num::{One, Zero};
-
-use crate::buffer::Buffer;
-#[cfg(simd)]
-use crate::buffer::MutableBuffer;
-#[cfg(not(simd))]
-use crate::compute::kernels::arity::unary;
-use crate::compute::util::combine_option_bitmap;
-use crate::datatypes;
-use crate::datatypes::ArrowNumericType;
-use crate::error::{ArrowError, Result};
-use crate::{array::*, util::bit_util};
-use num::traits::Pow;
-#[cfg(simd)]
-use std::borrow::BorrowMut;
-#[cfg(simd)]
-use std::slice::{ChunksExact, ChunksExactMut};
-
-/// SIMD vectorized version of `unary_math_op` above specialized for signed numerical values.
-#[cfg(simd)]
-fn simd_signed_unary_math_op<T, SIMD_OP, SCALAR_OP>(
-    array: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowSignedNumericType,
-    SIMD_OP: Fn(T::SignedSimd) -> T::SignedSimd,
-    SCALAR_OP: Fn(T::Native) -> T::Native,
-{
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, input_slice)| {
-            let simd_input = T::load_signed(input_slice);
-            let simd_result = T::signed_unary_op(simd_input, &simd_op);
-            T::write_signed(simd_result, result_slice);
-        });
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder.into_iter().zip(array_remainder).for_each(
-        |(scalar_result, scalar_input)| {
-            *scalar_result = scalar_op(*scalar_input);
-        },
-    );
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-#[cfg(simd)]
-fn simd_float_unary_math_op<T, SIMD_OP, SCALAR_OP>(
-    array: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowFloatNumericType,
-    SIMD_OP: Fn(T::Simd) -> T::Simd,
-    SCALAR_OP: Fn(T::Native) -> T::Native,
-{
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, input_slice)| {
-            let simd_input = T::load(input_slice);
-            let simd_result = T::unary_op(simd_input, &simd_op);
-            T::write(simd_result, result_slice);
-        });
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder.into_iter().zip(array_remainder).for_each(
-        |(scalar_result, scalar_input)| {
-            *scalar_result = scalar_op(*scalar_input);
-        },
-    );
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Helper function to perform math lambda function on values from two arrays. If either
-/// left or right value is null then the output value is also null, so `1 + null` is
-/// `null`.
-///
-/// # Errors
-///
-/// This function errors if the arrays have different lengths
-pub fn math_op<T, F>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    op: F,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> T::Native,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let values = left
-        .values()
-        .iter()
-        .zip(right.values().iter())
-        .map(|(l, r)| op(*l, *r));
-    // JUSTIFICATION
-    //  Benefit
-    //      ~60% speedup
-    //  Soundness
-    //      `values` is an iterator with a known size.
-    let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Helper function to divide two arrays.
-///
-/// # Errors
-///
-/// This function errors if:
-/// * the arrays have different lengths
-/// * a division by zero is found
-fn math_divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Div<Output = T::Native> + Zero,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let buffer = if let Some(b) = &null_bit_buffer {
-        let values = left.values().iter().zip(right.values()).enumerate().map(
-            |(i, (left, right))| {
-                let is_valid = unsafe { bit_util::get_bit_raw(b.as_ptr(), i) };
-                if is_valid {
-                    if right.is_zero() {
-                        Err(ArrowError::DivideByZero)
-                    } else {
-                        Ok(*left / *right)
-                    }
-                } else {
-                    Ok(T::default_value())
-                }
-            },
-        );
-        unsafe { Buffer::try_from_trusted_len_iter(values) }
-    } else {
-        // no value is null
-        let values = left
-            .values()
-            .iter()
-            .zip(right.values())
-            .map(|(left, right)| {
-                if right.is_zero() {
-                    Err(ArrowError::DivideByZero)
-                } else {
-                    Ok(*left / *right)
-                }
-            });
-        unsafe { Buffer::try_from_trusted_len_iter(values) }
-    }?;
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Scalar-divisor version of `math_divide`.
-fn math_divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Div<Output = T::Native> + Zero,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let values = array.values().iter().map(|value| *value / divisor);
-    let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized version of `math_op` above.
-#[cfg(simd)]
-fn simd_math_op<T, SIMD_OP, SCALAR_OP>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    SIMD_OP: Fn(T::Simd, T::Simd) -> T::Simd,
-    SCALAR_OP: Fn(T::Native, T::Native) -> T::Native,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let lanes = T::lanes();
-    let buffer_size = left.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut left_chunks = left.values().chunks_exact(lanes);
-    let mut right_chunks = right.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-        .for_each(|(result_slice, (left_slice, right_slice))| {
-            let simd_left = T::load(left_slice);
-            let simd_right = T::load(right_slice);
-            let simd_result = T::bin_op(simd_left, simd_right, &simd_op);
-            T::write(simd_result, result_slice);
-        });
-
-    let result_remainder = result_chunks.into_remainder();
-    let left_remainder = left_chunks.remainder();
-    let right_remainder = right_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(left_remainder.iter().zip(right_remainder.iter()))
-        .for_each(|(scalar_result, (scalar_left, scalar_right))| {
-            *scalar_result = scalar_op(*scalar_left, *scalar_right);
-        });
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized implementation of `left / right`.
-/// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero`
-/// is returned. The contents of no-valid lanes are undefined.
-#[cfg(simd)]
-#[inline]
-fn simd_checked_divide<T: ArrowNumericType>(
-    valid_mask: Option<u64>,
-    left: T::Simd,
-    right: T::Simd,
-) -> Result<T::Simd>
-where
-    T::Native: One + Zero,
-{
-    let zero = T::init(T::Native::zero());
-    let one = T::init(T::Native::one());
-
-    let right_no_invalid_zeros = match valid_mask {
-        Some(mask) => {
-            let simd_mask = T::mask_from_u64(mask);
-            // select `1` for invalid lanes, which will be a no-op during division later
-            T::mask_select(simd_mask, right, one)
-        }
-        None => right,
-    };
-
-    let zero_mask = T::eq(right_no_invalid_zeros, zero);
-
-    if T::mask_any(zero_mask) {
-        Err(ArrowError::DivideByZero)
-    } else {
-        Ok(T::bin_op(left, right_no_invalid_zeros, |a, b| a / b))
-    }
-}
-
-/// Scalar implementation of `left / right` for the remainder elements after complete chunks have been processed using SIMD.
-/// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` is returned.
-#[cfg(simd)]
-#[inline]
-fn simd_checked_divide_remainder<T: ArrowNumericType>(
-    valid_mask: Option<u64>,
-    left_chunks: ChunksExact<T::Native>,
-    right_chunks: ChunksExact<T::Native>,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Div<Output = T::Native>,
-{
-    let result_remainder = result_chunks.into_remainder();
-    let left_remainder = left_chunks.remainder();
-    let right_remainder = right_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(left_remainder.iter().zip(right_remainder.iter()))
-        .enumerate()
-        .try_for_each(|(i, (result_scalar, (left_scalar, right_scalar)))| {
-            if valid_mask.map(|mask| mask & (1 << i) != 0).unwrap_or(true) {
-                if *right_scalar == T::Native::zero() {
-                    return Err(ArrowError::DivideByZero);
-                }
-                *result_scalar = *left_scalar / *right_scalar;
-            }
-            Ok(())
-        })?;
-
-    Ok(())
-}
-
-/// Scalar-divisor version of `simd_checked_divide_remainder`.
-#[cfg(simd)]
-#[inline]
-fn simd_checked_divide_scalar_remainder<T: ArrowNumericType>(
-    array_chunks: ChunksExact<T::Native>,
-    divisor: T::Native,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Div<Output = T::Native>,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(array_remainder.iter())
-        .for_each(|(result_scalar, array_scalar)| {
-            *result_scalar = *array_scalar / divisor;
-        });
-
-    Ok(())
-}
-
-/// SIMD vectorized version of `divide`.
-///
-/// The divide kernels need their own implementation as there is a need to handle situations
-/// where a divide by `0` occurs.  This is complicated by `NULL` slots and padding.
-#[cfg(simd)]
-fn simd_divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Div<Output = T::Native>,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    // Create the combined `Bitmap`
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let lanes = T::lanes();
-    let buffer_size = left.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    match &null_bit_buffer {
-        Some(b) => {
-            // combine_option_bitmap returns a slice or new buffer starting at 0
-            let valid_chunks = b.bit_chunks(0, left.len());
-
-            // process data in chunks of 64 elements since we also get 64 bits of validity information at a time
-            let mut result_chunks = result.typed_data_mut().chunks_exact_mut(64);
-            let mut left_chunks = left.values().chunks_exact(64);
-            let mut right_chunks = right.values().chunks_exact(64);
-
-            valid_chunks
-                .iter()
-                .zip(
-                    result_chunks
-                        .borrow_mut()
-                        .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())),
-                )
-                .try_for_each(
-                    |(mut mask, (result_slice, (left_slice, right_slice)))| {
-                        // split chunks further into slices corresponding to the vector length
-                        // the compiler is able to unroll this inner loop and remove bounds checks
-                        // since the outer chunk size (64) is always a multiple of the number of lanes
-                        result_slice
-                            .chunks_exact_mut(lanes)
-                            .zip(left_slice.chunks_exact(lanes).zip(right_slice.chunks_exact(lanes)))
-                            .try_for_each(|(result_slice, (left_slice, right_slice))| -> Result<()> {
-                                let simd_left = T::load(left_slice);
-                                let simd_right = T::load(right_slice);
-
-                                let simd_result = simd_checked_divide::<T>(Some(mask), simd_left, simd_right)?;
-
-                                T::write(simd_result, result_slice);
-
-                                // skip the shift and avoid overflow for u8 type, which uses 64 lanes.
-                                mask >>= T::lanes() % 64;
-
-                                Ok(())
-                            })
-                    },
-                )?;
-
-            let valid_remainder = valid_chunks.remainder_bits();
-
-            simd_checked_divide_remainder::<T>(
-                Some(valid_remainder),
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-        None => {
-            let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-            let mut left_chunks = left.values().chunks_exact(lanes);
-            let mut right_chunks = right.values().chunks_exact(lanes);
-
-            result_chunks
-                .borrow_mut()
-                .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-                .try_for_each(
-                    |(result_slice, (left_slice, right_slice))| -> Result<()> {
-                        let simd_left = T::load(left_slice);
-                        let simd_right = T::load(right_slice);
-
-                        let simd_result =
-                            simd_checked_divide::<T>(None, simd_left, simd_right)?;
-
-                        T::write(simd_result, result_slice);
-
-                        Ok(())
-                    },
-                )?;
-
-            simd_checked_divide_remainder::<T>(
-                None,
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-    }
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized version of `divide_scalar`.
-#[cfg(simd)]
-fn simd_divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Div<Output = T::Native>,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, array_slice)| {
-            let simd_left = T::load(array_slice);
-            let simd_right = T::init(divisor);
-
-            let simd_result = T::bin_op(simd_left, simd_right, |a, b| a / b);
-            T::write(simd_result, result_slice);
-        });
-
-    simd_checked_divide_scalar_remainder::<T>(array_chunks, divisor, result_chunks)?;
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Perform `left + right` operation on two arrays. If either left or right value is null
-/// then the result is also null.
-pub fn add<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    #[cfg(simd)]
-    return simd_math_op(&left, &right, |a, b| a + b, |a, b| a + b);
-    #[cfg(not(simd))]
-    return math_op(left, right, |a, b| a + b);
-}
-
-/// Perform `left - right` operation on two arrays. If either left or right value is null
-/// then the result is also null.
-pub fn subtract<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    #[cfg(simd)]
-    return simd_math_op(&left, &right, |a, b| a - b, |a, b| a - b);
-    #[cfg(not(simd))]
-    return math_op(left, right, |a, b| a - b);
-}
-
-/// Perform `-` operation on an array. If value is null then the result is also null.
-pub fn negate<T>(array: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowSignedNumericType,
-    T::Native: Neg<Output = T::Native>,
-{
-    #[cfg(simd)]
-    return simd_signed_unary_math_op(array, |x| -x, |x| -x);
-    #[cfg(not(simd))]
-    return Ok(unary(array, |x| -x));
-}
-
-/// Raise array with floating point values to the power of a scalar.
-pub fn powf_scalar<T>(
-    array: &PrimitiveArray<T>,
-    raise: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowFloatNumericType,
-    T::Native: Pow<T::Native, Output = T::Native>,
-{
-    #[cfg(simd)]
-    {
-        let raise_vector = T::init(raise);
-        return simd_float_unary_math_op(
-            array,
-            |x| T::pow(x, raise_vector),
-            |x| x.pow(raise),
-        );
-    }
-    #[cfg(not(simd))]
-    return Ok(unary(array, |x| x.pow(raise)));
-}
-
-/// Perform `left * right` operation on two arrays. If either left or right value is null
-/// then the result is also null.
-pub fn multiply<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    #[cfg(simd)]
-    return simd_math_op(&left, &right, |a, b| a * b, |a, b| a * b);
-    #[cfg(not(simd))]
-    return math_op(left, right, |a, b| a * b);
-}
-
-/// Perform `left / right` operation on two arrays. If either left or right value is null
-/// then the result is also null. If any right hand value is zero then the result of this
-/// operation will be `Err(ArrowError::DivideByZero)`.
-pub fn divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero
-        + One,
-{
-    #[cfg(simd)]
-    return simd_divide(&left, &right);
-    #[cfg(not(simd))]
-    return math_divide(&left, &right);
-}
-
-/// Divide every value in an array by a scalar. If any value in the array is null then the
-/// result is also null. If the scalar is zero then the result of this operation will be
-/// `Err(ArrowError::DivideByZero)`.
-pub fn divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero
-        + One,
-{
-    #[cfg(simd)]
-    return simd_divide_scalar(&array, divisor);
-    #[cfg(not(simd))]
-    return math_divide_scalar(&array, divisor);
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::Int32Array;
-
-    #[test]
-    fn test_primitive_array_add() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
-        let c = add(&a, &b).unwrap();
-        assert_eq!(11, c.value(0));
-        assert_eq!(13, c.value(1));
-        assert_eq!(15, c.value(2));
-        assert_eq!(17, c.value(3));
-        assert_eq!(17, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_add_sliced() {
-        let a = Int32Array::from(vec![0, 0, 0, 5, 6, 7, 8, 9, 0]);
-        let b = Int32Array::from(vec![0, 0, 0, 6, 7, 8, 9, 8, 0]);
-        let a = a.slice(3, 5);
-        let b = b.slice(3, 5);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        assert_eq!(5, a.value(0));
-        assert_eq!(6, b.value(0));
-
-        let c = add(&a, &b).unwrap();
-        assert_eq!(5, c.len());
-        assert_eq!(11, c.value(0));
-        assert_eq!(13, c.value(1));
-        assert_eq!(15, c.value(2));
-        assert_eq!(17, c.value(3));
-        assert_eq!(17, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_add_mismatched_length() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8]);
-        let e = add(&a, &b)
-            .err()
-            .expect("should have failed due to different lengths");
-        assert_eq!(
-            "ComputeError(\"Cannot perform math operation on arrays of different length\")",
-            format!("{:?}", e)
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_subtract() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![5, 4, 3, 2, 1]);
-        let c = subtract(&a, &b).unwrap();
-        assert_eq!(-4, c.value(0));
-        assert_eq!(-2, c.value(1));
-        assert_eq!(0, c.value(2));
-        assert_eq!(2, c.value(3));
-        assert_eq!(4, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_multiply() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
-        let c = multiply(&a, &b).unwrap();
-        assert_eq!(30, c.value(0));
-        assert_eq!(42, c.value(1));
-        assert_eq!(56, c.value(2));
-        assert_eq!(72, c.value(3));
-        assert_eq!(72, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide() {
-        let a = Int32Array::from(vec![15, 15, 8, 1, 9]);
-        let b = Int32Array::from(vec![5, 6, 8, 9, 1]);
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(3, c.value(0));
-        assert_eq!(2, c.value(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide_scalar() {
-        let a = Int32Array::from(vec![15, 14, 9, 8, 1]);
-        let b = 3;
-        let c = divide_scalar(&a, b).unwrap();
-        let expected = Int32Array::from(vec![5, 4, 3, 2, 0]);
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_primitive_array_divide_sliced() {
-        let a = Int32Array::from(vec![0, 0, 0, 15, 15, 8, 1, 9, 0]);
-        let b = Int32Array::from(vec![0, 0, 0, 5, 6, 8, 9, 1, 0]);
-        let a = a.slice(3, 5);
-        let b = b.slice(3, 5);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(5, c.len());
-        assert_eq!(3, c.value(0));
-        assert_eq!(2, c.value(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide_with_nulls() {
-        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]);
-        let b = Int32Array::from(vec![Some(5), Some(6), Some(8), Some(9), None, None]);
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(3, c.value(0));
-        assert_eq!(true, c.is_null(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(true, c.is_null(4));
-        assert_eq!(true, c.is_null(5));
-    }
-
-    #[test]
-    fn test_primitive_array_divide_scalar_with_nulls() {
-        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]);
-        let b = 3;
-        let c = divide_scalar(&a, b).unwrap();
-        let expected =
-            Int32Array::from(vec![Some(5), None, Some(2), Some(0), Some(3), None]);
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_primitive_array_divide_with_nulls_sliced() {
-        let a = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            Some(15),
-            None,
-            Some(8),
-            Some(1),
-            Some(9),
-            None,
-            None,
-        ]);
-        let b = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            Some(5),
-            Some(6),
-            Some(8),
-            Some(9),
-            None,
-            None,
-            None,
-        ]);
-
-        let a = a.slice(8, 6);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        let b = b.slice(8, 6);
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(6, c.len());
-        assert_eq!(3, c.value(0));
-        assert_eq!(true, c.is_null(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(true, c.is_null(4));
-        assert_eq!(true, c.is_null(5));
-    }
-
-    #[test]
-    #[should_panic(expected = "DivideByZero")]
-    fn test_primitive_array_divide_by_zero() {
-        let a = Int32Array::from(vec![15]);
-        let b = Int32Array::from(vec![0]);
-        divide(&a, &b).unwrap();
-    }
-
-    #[test]
-    fn test_primitive_array_divide_f64() {
-        let a = Float64Array::from(vec![15.0, 15.0, 8.0]);
-        let b = Float64Array::from(vec![5.0, 6.0, 8.0]);
-        let c = divide(&a, &b).unwrap();
-        assert!(3.0 - c.value(0) < f64::EPSILON);
-        assert!(2.5 - c.value(1) < f64::EPSILON);
-        assert!(1.0 - c.value(2) < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_primitive_array_add_with_nulls() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), None]);
-        let b = Int32Array::from(vec![None, None, Some(6), Some(7)]);
-        let c = add(&a, &b).unwrap();
-        assert_eq!(true, c.is_null(0));
-        assert_eq!(true, c.is_null(1));
-        assert_eq!(false, c.is_null(2));
-        assert_eq!(true, c.is_null(3));
-        assert_eq!(13, c.value(2));
-    }
-
-    #[test]
-    fn test_primitive_array_negate() {
-        let a: Int64Array = (0..100).into_iter().map(Some).collect();
-        let actual = negate(&a).unwrap();
-        let expected: Int64Array = (0..100).into_iter().map(|i| Some(-i)).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_arithmetic_kernel_should_not_rely_on_padding() {
-        let a: UInt8Array = (0..128_u8).into_iter().map(Some).collect();
-        let a = a.slice(63, 65);
-        let a = a.as_any().downcast_ref::<UInt8Array>().unwrap();
-
-        let b: UInt8Array = (0..128_u8).into_iter().map(Some).collect();
-        let b = b.slice(63, 65);
-        let b = b.as_any().downcast_ref::<UInt8Array>().unwrap();
-
-        let actual = add(&a, &b).unwrap();
-        let actual: Vec<Option<u8>> = actual.iter().collect();
-        let expected: Vec<Option<u8>> = (63..63_u8 + 65_u8)
-            .into_iter()
-            .map(|i| Some(i + i))
-            .collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_primitive_array_raise_power_scalar() {
-        let a = Float64Array::from(vec![1.0, 2.0, 3.0]);
-        let actual = powf_scalar(&a, 2.0).unwrap();
-        let expected = Float64Array::from(vec![1.0, 4.0, 9.0]);
-        assert_eq!(expected, actual);
-        let a = Float64Array::from(vec![Some(1.0), None, Some(3.0)]);
-        let actual = powf_scalar(&a, 2.0).unwrap();
-        let expected = Float64Array::from(vec![Some(1.0), None, Some(9.0)]);
-        assert_eq!(expected, actual);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/arity.rs b/rust/arrow/src/compute/kernels/arity.rs
deleted file mode 100644
index 4aa7f3d6e5d..00000000000
--- a/rust/arrow/src/compute/kernels/arity.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernels suitable to perform operations to primitive arrays.
-
-use crate::array::{Array, ArrayData, PrimitiveArray};
-use crate::buffer::Buffer;
-use crate::datatypes::ArrowPrimitiveType;
-
-#[inline]
-fn into_primitive_array_data<I: ArrowPrimitiveType, O: ArrowPrimitiveType>(
-    array: &PrimitiveArray<I>,
-    buffer: Buffer,
-) -> ArrayData {
-    ArrayData::new(
-        O::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![buffer],
-        vec![],
-    )
-}
-
-/// Applies an unary and infalible function to a primitive array.
-/// This is the fastest way to perform an operation on a primitive array when
-/// the benefits of a vectorized operation outweights the cost of branching nulls and non-nulls.
-/// # Implementation
-/// This will apply the function for all values, including those on null slots.
-/// This implies that the operation must be infalible for any value of the corresponding type
-/// or this function may panic.
-/// # Example
-/// ```rust
-/// # use arrow::array::Int32Array;
-/// # use arrow::datatypes::Int32Type;
-/// # use arrow::compute::kernels::arity::unary;
-/// # fn main() {
-/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
-/// let c = unary::<_, _, Int32Type>(&array, |x| x * 2 + 1);
-/// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
-/// # }
-/// ```
-pub fn unary<I, F, O>(array: &PrimitiveArray<I>, op: F) -> PrimitiveArray<O>
-where
-    I: ArrowPrimitiveType,
-    O: ArrowPrimitiveType,
-    F: Fn(I::Native) -> O::Native,
-{
-    let values = array.values().iter().map(|v| op(*v));
-    // JUSTIFICATION
-    //  Benefit
-    //      ~60% speedup
-    //  Soundness
-    //      `values` is an iterator with a known size because arrays are sized.
-    let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
-
-    let data = into_primitive_array_data::<_, O>(array, buffer);
-    PrimitiveArray::<O>::from(data)
-}
diff --git a/rust/arrow/src/compute/kernels/boolean.rs b/rust/arrow/src/compute/kernels/boolean.rs
deleted file mode 100644
index b835d60637f..00000000000
--- a/rust/arrow/src/compute/kernels/boolean.rs
+++ /dev/null
@@ -1,1119 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines boolean kernels on Arrow `BooleanArray`'s, e.g. `AND`, `OR` and `NOT`.
-//!
-//! These kernels can leverage SIMD if available on your system.  Currently no runtime
-//! detection is provided, you should enable the specific SIMD intrinsics using
-//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
-//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
-
-use std::ops::Not;
-
-use crate::array::{Array, ArrayData, BooleanArray, PrimitiveArray};
-use crate::buffer::{
-    buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer,
-};
-use crate::compute::util::combine_option_bitmap;
-use crate::datatypes::{ArrowNumericType, DataType};
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util::{ceil, round_upto_multiple_of_64};
-use core::iter;
-use lexical_core::Integer;
-use std::iter::FromIterator;
-
-fn binary_boolean_kleene_kernel<F>(
-    left: &BooleanArray,
-    right: &BooleanArray,
-    op: F,
-) -> Result<BooleanArray>
-where
-    F: Fn(u64, u64, u64, u64) -> (u64, u64),
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform bitwise operation on arrays of different length".to_string(),
-        ));
-    }
-
-    // length and offset of boolean array is measured in bits
-    let len = left.len();
-    let left_offset = left.offset();
-    let right_offset = right.offset();
-
-    let left_buffer = left.values();
-    let right_buffer = right.values();
-
-    // If we do not have a validity bitmap, we just use an empty buffer
-    let (left_validity, left_validity_len) = left.data_ref().null_buffer().map_or_else(
-        || (Buffer::from_iter(iter::empty::<bool>()), 0),
-        |buffer| (buffer.clone(), len),
-    );
-    let (right_validity, right_validity_len) =
-        right.data_ref().null_buffer().map_or_else(
-            || (Buffer::from_iter(iter::empty::<bool>()), 0),
-            |buffer| (buffer.clone(), len),
-        );
-
-    let left_chunks = left_buffer.bit_chunks(left_offset, len);
-    let left_valid_chunks = left_validity.bit_chunks(left_offset, left_validity_len);
-    let right_chunks = right_buffer.bit_chunks(right_offset, len);
-    let right_valid_chunks = right_validity.bit_chunks(right_offset, right_validity_len);
-
-    // result length measured in bytes (incl. remainder)
-    let mut result_len = round_upto_multiple_of_64(len) / 8;
-    // if remainder is absent, the kleene_op code would always resize the result buffers,
-    // which is both unnecessary and expensive. We can prevent the resizing by always
-    // adding 8 additional bytes to the length of both buffers. All bits of these 8 bytes
-    // will always be 0 though.
-    if left_chunks.remainder_len().is_zero() {
-        result_len += 8;
-    }
-    let mut value_buffer = MutableBuffer::new(result_len);
-    let mut valid_buffer = MutableBuffer::new(result_len);
-
-    let kleene_op = |((left_data, left_valid), (right_data, right_valid)): (
-        (u64, u64),
-        (u64, u64),
-    )| {
-        let left_true = left_valid & left_data;
-        let left_false = left_valid & !left_data;
-
-        let right_true = right_valid & right_data;
-        let right_false = right_valid & !right_data;
-
-        let (value, valid) = op(left_true, left_false, right_true, right_false);
-
-        value_buffer.extend_from_slice(&[value]);
-        valid_buffer.extend_from_slice(&[valid]);
-    };
-
-    // To get rid off the additional remainder logic we would need an iterator
-    // which contains a possible remainder word.
-    let remainder = (
-        (
-            left_chunks.remainder_bits(),
-            left_valid_chunks.remainder_bits(),
-        ),
-        (
-            right_chunks.remainder_bits(),
-            right_valid_chunks.remainder_bits(),
-        ),
-    );
-
-    let base_iter = left_chunks
-        .iter()
-        .zip(left_valid_chunks.iter())
-        .zip(right_chunks.iter().zip(right_valid_chunks.iter()))
-        .chain(iter::once(remainder));
-
-    match (
-        left.data_ref().null_buffer().is_some(),
-        right.data_ref().null_buffer().is_some(),
-    ) {
-        (true, true) => base_iter.for_each(kleene_op),
-        (true, false) => base_iter
-            .map(|(left, (right_data, _))| (left, (right_data, u64::MAX)))
-            .for_each(kleene_op),
-        (false, true) => base_iter
-            .map(|((left_data, _), right)| ((left_data, u64::MAX), right))
-            .for_each(kleene_op),
-        (false, false) => base_iter
-            .map(|((left_data, _), (right_data, _))| {
-                ((left_data, u64::MAX), (right_data, u64::MAX))
-            })
-            .for_each(kleene_op),
-    };
-
-    let bool_buffer: Buffer = value_buffer.into();
-    let bool_valid_buffer: Buffer = valid_buffer.into();
-
-    let array_data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        Some(bool_valid_buffer),
-        left_offset,
-        vec![bool_buffer],
-        vec![],
-    );
-
-    Ok(BooleanArray::from(array_data))
-}
-
-/// Helper function to implement binary kernels
-fn binary_boolean_kernel<F>(
-    left: &BooleanArray,
-    right: &BooleanArray,
-    op: F,
-) -> Result<BooleanArray>
-where
-    F: Fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform bitwise operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let len = left.len();
-
-    let left_data = left.data_ref();
-    let right_data = right.data_ref();
-    let null_bit_buffer = combine_option_bitmap(&left_data, &right_data, len)?;
-
-    let left_buffer = &left_data.buffers()[0];
-    let right_buffer = &right_data.buffers()[0];
-    let left_offset = left.offset();
-    let right_offset = right.offset();
-
-    let values = op(&left_buffer, left_offset, &right_buffer, right_offset, len);
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        null_bit_buffer,
-        0,
-        vec![values],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Performs `AND` operation on two arrays. If either left or right value is null then the
-/// result is also null.
-/// # Error
-/// This function errors when the arrays have different lengths.
-/// # Example
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::and;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let b = BooleanArray::from(vec![Some(true), Some(true), Some(false)]);
-/// let and_ab = and(&a, &b)?;
-/// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    binary_boolean_kernel(&left, &right, buffer_bin_and)
-}
-
-/// Logical 'and' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` and `null` = `null`
-/// * `null` and `true` = `null`
-/// * `false` and `null` = `false`
-/// * `null` and `false` = `false`
-/// * `null` and `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'and' false is always false.
-/// For a different null behavior, see function \"and\".
-///
-/// # Example
-///
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::and_kleene;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let and_ab = and_kleene(&a, &b)?;
-/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None]));
-/// # Ok(())
-/// # }
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    if left.null_count().is_zero() && right.null_count().is_zero() {
-        return and(left, right);
-    }
-
-    let op = |left_true, left_false, right_true, right_false| {
-        (
-            left_true & right_true,
-            left_false | right_false | (left_true & right_true),
-        )
-    };
-
-    binary_boolean_kleene_kernel(left, right, op)
-}
-
-/// Performs `OR` operation on two arrays. If either left or right value is null then the
-/// result is also null.
-/// # Error
-/// This function errors when the arrays have different lengths.
-/// # Example
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::or;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let b = BooleanArray::from(vec![Some(true), Some(true), Some(false)]);
-/// let or_ab = or(&a, &b)?;
-/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    binary_boolean_kernel(&left, &right, buffer_bin_or)
-}
-
-/// Logical 'or' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` or `null` = `true`
-/// * `null` or `true` = `true`
-/// * `false` or `null` = `null`
-/// * `null` or `false` = `null`
-/// * `null` or `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'or' true is always true.
-/// For a different null behavior, see function \"or\".
-///
-/// # Example
-///
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::or_kleene;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let or_ab = or_kleene(&a, &b)?;
-/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None]));
-/// # Ok(())
-/// # }
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    if left.null_count().is_zero() && right.null_count().is_zero() {
-        return or(left, right);
-    }
-
-    let op = |left_true, left_false, right_true, right_false| {
-        (
-            left_true | right_true,
-            left_true | right_true | (left_false & right_false),
-        )
-    };
-
-    binary_boolean_kleene_kernel(left, right, op)
-}
-
-/// Performs unary `NOT` operation on an arrays. If value is null then the result is also
-/// null.
-/// # Error
-/// This function never errors. It returns an error for consistency.
-/// # Example
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::not;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let not_a = not(&a)?;
-/// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
-    let left_offset = left.offset();
-    let len = left.len();
-
-    let data = left.data_ref();
-    let null_bit_buffer = data
-        .null_bitmap()
-        .as_ref()
-        .map(|b| b.bits.slice(left_offset));
-
-    let values = buffer_unary_not(&data.buffers()[0], left_offset, len);
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        null_bit_buffer,
-        0,
-        vec![values],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Returns a non-null [BooleanArray] with whether each value of the array is null.
-/// # Error
-/// This function never errors.
-/// # Example
-/// ```rust
-/// # use arrow::error::Result;
-/// use arrow::array::BooleanArray;
-/// use arrow::compute::kernels::boolean::is_null;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let a_is_null = is_null(&a)?;
-/// assert_eq!(a_is_null, BooleanArray::from(vec![false, false, true]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn is_null(input: &Array) -> Result<BooleanArray> {
-    let len = input.len();
-
-    let output = match input.data_ref().null_buffer() {
-        None => {
-            let len_bytes = ceil(len, 8);
-            MutableBuffer::from_len_zeroed(len_bytes).into()
-        }
-        Some(buffer) => buffer_unary_not(buffer, input.offset(), len),
-    };
-
-    let data =
-        ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]);
-
-    Ok(BooleanArray::from(data))
-}
-
-/// Returns a non-null [BooleanArray] with whether each value of the array is not null.
-/// # Error
-/// This function never errors.
-/// # Example
-/// ```rust
-/// # use arrow::error::Result;
-/// use arrow::array::BooleanArray;
-/// use arrow::compute::kernels::boolean::is_not_null;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let a_is_not_null = is_not_null(&a)?;
-/// assert_eq!(a_is_not_null, BooleanArray::from(vec![true, true, false]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn is_not_null(input: &Array) -> Result<BooleanArray> {
-    let len = input.len();
-
-    let output = match input.data_ref().null_buffer() {
-        None => {
-            let len_bytes = ceil(len, 8);
-            MutableBuffer::new(len_bytes)
-                .with_bitset(len_bytes, true)
-                .into()
-        }
-        Some(buffer) => buffer.bit_slice(input.offset(), len),
-    };
-
-    let data =
-        ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]);
-
-    Ok(BooleanArray::from(data))
-}
-
-/// Copies original array, setting null bit to true if a secondary comparison boolean array is set to true.
-/// Typically used to implement NULLIF.
-// NOTE: For now this only supports Primitive Arrays.  Although the code could be made generic, the issue
-// is that currently the bitmap operations result in a final bitmap which is aligned to bit 0, and thus
-// the left array's data needs to be sliced to a new offset, and for non-primitive arrays shifting the
-// data might be too complicated.   In the future, to avoid shifting left array's data, we could instead
-// shift the final bitbuffer to the right, prepending with 0's instead.
-pub fn nullif<T>(
-    left: &PrimitiveArray<T>,
-    right: &BooleanArray,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-    let left_data = left.data();
-    let right_data = right.data();
-
-    // If left has no bitmap, create a new one with all values set for nullity op later
-    // left=0 (null)   right=null       output bitmap=null
-    // left=0          right=1          output bitmap=null
-    // left=1 (set)    right=null       output bitmap=set   (passthrough)
-    // left=1          right=1 & comp=true    output bitmap=null
-    // left=1          right=1 & comp=false   output bitmap=set
-    //
-    // Thus: result = left null bitmap & (!right_values | !right_bitmap)
-    //              OR left null bitmap & !(right_values & right_bitmap)
-    //
-    // Do the right expression !(right_values & right_bitmap) first since there are two steps
-    // TRICK: convert BooleanArray buffer as a bitmap for faster operation
-    let right_combo_buffer = match right.data().null_bitmap() {
-        Some(right_bitmap) => {
-            // NOTE: right values and bitmaps are combined and stay at bit offset right.offset()
-            (right.values() & &right_bitmap.bits).ok().map(|b| b.not())
-        }
-        None => Some(!right.values()),
-    };
-
-    // AND of original left null bitmap with right expression
-    // Here we take care of the possible offsets of the left and right arrays all at once.
-    let modified_null_buffer = match left_data.null_bitmap() {
-        Some(left_null_bitmap) => match right_combo_buffer {
-            Some(rcb) => Some(buffer_bin_and(
-                &left_null_bitmap.bits,
-                left_data.offset(),
-                &rcb,
-                right_data.offset(),
-                left_data.len(),
-            )),
-            None => Some(
-                left_null_bitmap
-                    .bits
-                    .bit_slice(left_data.offset(), left.len()),
-            ),
-        },
-        None => right_combo_buffer
-            .map(|rcb| rcb.bit_slice(right_data.offset(), right_data.len())),
-    };
-
-    // Align/shift left data on offset as needed, since new bitmaps are shifted and aligned to 0 already
-    // NOTE: this probably only works for primitive arrays.
-    let data_buffers = if left.offset() == 0 {
-        left_data.buffers().to_vec()
-    } else {
-        // Shift each data buffer by type's bit_width * offset.
-        left_data
-            .buffers()
-            .iter()
-            .map(|buf| buf.slice(left.offset() * T::get_byte_width()))
-            .collect::<Vec<_>>()
-    };
-
-    // Construct new array with same values but modified null bitmap
-    // TODO: shift data buffer as needed
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None, // force new to compute the number of null bits
-        modified_null_buffer,
-        0, // No need for offset since left data has been shifted
-        data_buffers,
-        left_data.child_data().to_vec(),
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::{ArrayRef, Int32Array};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_bool_array_and() {
-        let a = BooleanArray::from(vec![false, false, true, true]);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or() {
-        let a = BooleanArray::from(vec![false, false, true, true]);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-        let c = or(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, true, true, true]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = or(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_binary_boolean_kleene_kernel() {
-        // the kleene kernel is based on chunking and we want to also create
-        // cases, where the number of values is not a multiple of 64
-        for &value in [true, false].iter() {
-            for &is_valid in [true, false].iter() {
-                for &n in [0usize, 1, 63, 64, 65, 127, 128].iter() {
-                    let a = BooleanArray::from(vec![Some(true); n]);
-                    let b = BooleanArray::from(vec![None; n]);
-
-                    let result = binary_boolean_kleene_kernel(&a, &b, |_, _, _, _| {
-                        let tmp_value = if value { u64::MAX } else { 0 };
-                        let tmp_is_valid = if is_valid { u64::MAX } else { 0 };
-                        (tmp_value, tmp_is_valid)
-                    })
-                    .unwrap();
-
-                    assert_eq!(result.len(), n);
-                    (0..n).for_each(|idx| {
-                        assert_eq!(value, result.value(idx));
-                        assert_eq!(is_valid, result.is_valid(idx));
-                    });
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn test_bool_array_and_kleene_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = and_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            Some(false),
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_kleene_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = or_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            None,
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_kleene_right_sided_nulls() {
-        let a = BooleanArray::from(vec![false, false, false, true, true, true]);
-
-        // ensure null bitmap of a is absent
-        assert!(a.data_ref().null_bitmap().is_none());
-
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-
-        // ensure null bitmap of b is present
-        assert!(b.data_ref().null_bitmap().is_some());
-
-        let c = or_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_kleene_left_sided_nulls() {
-        let a = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-
-        // ensure null bitmap of b is absent
-        assert!(a.data_ref().null_bitmap().is_some());
-
-        let b = BooleanArray::from(vec![false, false, false, true, true, true]);
-
-        // ensure null bitmap of a is present
-        assert!(b.data_ref().null_bitmap().is_none());
-
-        let c = or_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_not() {
-        let a = BooleanArray::from(vec![false, true]);
-        let c = not(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![true, false]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_and_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_same_offset() {
-        let a = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, false, true,
-            true,
-        ]);
-        let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
-        ]);
-
-        let a = a.slice(8, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let b = b.slice(8, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_same_offset_mod8() {
-        let a = BooleanArray::from(vec![
-            false, false, true, true, false, false, false, false, false, false, false,
-            false,
-        ]);
-        let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
-        ]);
-
-        let a = a.slice(0, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let b = b.slice(8, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_offset1() {
-        let a = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, false, true,
-            true,
-        ]);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-
-        let a = a.slice(8, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_offset2() {
-        let a = BooleanArray::from(vec![false, false, true, true]);
-        let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
-        ]);
-
-        let b = b.slice(8, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_nulls_offset() {
-        let a = BooleanArray::from(vec![None, Some(false), Some(true), None, Some(true)]);
-        let a = a.slice(1, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let b = BooleanArray::from(vec![
-            None,
-            None,
-            Some(true),
-            Some(false),
-            Some(true),
-            Some(true),
-        ]);
-
-        let b = b.slice(2, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected =
-            BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_nonnull_array_is_null() {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
-
-        let res = is_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nonnull_array_with_offset_is_null() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1]);
-        let a = a.slice(8, 4);
-
-        let res = is_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nonnull_array_is_not_null() {
-        let a = Int32Array::from(vec![1, 2, 3, 4]);
-
-        let res = is_not_null(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![true, true, true, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nonnull_array_with_offset_is_not_null() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1]);
-        let a = a.slice(8, 4);
-
-        let res = is_not_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![true, true, true, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_is_null() {
-        let a = Int32Array::from(vec![Some(1), None, Some(3), None]);
-
-        let res = is_null(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![false, true, false, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_with_offset_is_null() {
-        let a = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            // offset 8, previous None values are skipped by the slice
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-            None,
-            None,
-        ]);
-        let a = a.slice(8, 4);
-
-        let res = is_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![false, true, false, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_is_not_null() {
-        let a = Int32Array::from(vec![Some(1), None, Some(3), None]);
-
-        let res = is_not_null(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![true, false, true, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_with_offset_is_not_null() {
-        let a = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            // offset 8, previous None values are skipped by the slice
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-            None,
-            None,
-        ]);
-        let a = a.slice(8, 4);
-
-        let res = is_not_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![true, false, true, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullif_int_array() {
-        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
-        let comp =
-            BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
-        let res = nullif(&a, &comp).unwrap();
-
-        let expected = Int32Array::from(vec![
-            Some(15),
-            None,
-            None, // comp true, slot 2 turned into null
-            Some(1),
-            // Even though comp array / right is null, should still pass through original value
-            // comp true, slot 2 turned into null
-            Some(9),
-        ]);
-
-        assert_eq!(expected, res);
-    }
-
-    #[test]
-    fn test_nullif_int_array_offset() {
-        let a = Int32Array::from(vec![None, Some(15), Some(8), Some(1), Some(9)]);
-        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let comp = BooleanArray::from(vec![
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
-        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let res = nullif(&a, &comp).unwrap();
-
-        let expected = Int32Array::from(vec![
-            Some(15), // False => keep it
-            Some(8),  // None => keep it
-            None,     // true => None
-        ]);
-        assert_eq!(&expected, &res)
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs
deleted file mode 100644
index de1516b0768..00000000000
--- a/rust/arrow/src/compute/kernels/cast.rs
+++ /dev/null
@@ -1,3843 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines cast kernels for `ArrayRef`, to convert `Array`s between
-//! supported datatypes.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::*;
-//! use arrow::compute::cast;
-//! use arrow::datatypes::DataType;
-//! use std::sync::Arc;
-//!
-//! let a = Int32Array::from(vec![5, 6, 7]);
-//! let array = Arc::new(a) as ArrayRef;
-//! let b = cast(&array, &DataType::Float64).unwrap();
-//! let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
-//! assert_eq!(5.0, c.value(0));
-//! assert_eq!(6.0, c.value(1));
-//! assert_eq!(7.0, c.value(2));
-//! ```
-
-use std::str;
-use std::sync::Arc;
-
-use crate::buffer::MutableBuffer;
-use crate::compute::kernels::arithmetic::{divide, multiply};
-use crate::compute::kernels::arity::unary;
-use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::{array::*, compute::take};
-use crate::{buffer::Buffer, util::serialization::lexical_to_string};
-use num::{NumCast, ToPrimitive};
-
-/// CastOptions provides a way to override the default cast behaviors
-#[derive(Debug)]
-pub struct CastOptions {
-    /// how to handle cast failures, either return NULL (safe=true) or return ERR (safe=false)
-    pub safe: bool,
-}
-
-pub const DEFAULT_CAST_OPTIONS: CastOptions = CastOptions { safe: true };
-
-/// Return true if a value of type `from_type` can be cast into a
-/// value of `to_type`. Note that such as cast may be lossy.
-///
-/// If this function returns true to stay consistent with the `cast` kernel below.
-pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
-    use self::DataType::*;
-    if from_type == to_type {
-        return true;
-    }
-
-    match (from_type, to_type) {
-        (Struct(_), _) => false,
-        (_, Struct(_)) => false,
-        (LargeList(list_from), LargeList(list_to)) => {
-            can_cast_types(list_from.data_type(), list_to.data_type())
-        }
-        (List(list_from), List(list_to)) => {
-            can_cast_types(list_from.data_type(), list_to.data_type())
-        }
-        (List(list_from), LargeList(list_to)) => {
-            list_from.data_type() == list_to.data_type()
-        }
-        (List(_), _) => false,
-        (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
-        (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()),
-        (Dictionary(_, from_value_type), Dictionary(_, to_value_type)) => {
-            can_cast_types(from_value_type, to_value_type)
-        }
-        (Dictionary(_, value_type), _) => can_cast_types(value_type, to_type),
-        (_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type),
-
-        (_, Boolean) => DataType::is_numeric(from_type),
-        (Boolean, _) => DataType::is_numeric(to_type) || to_type == &Utf8,
-
-        (Utf8, LargeUtf8) => true,
-        (LargeUtf8, Utf8) => true,
-        (Utf8, Date32) => true,
-        (Utf8, Date64) => true,
-        (Utf8, Timestamp(TimeUnit::Nanosecond, None)) => true,
-        (Utf8, _) => DataType::is_numeric(to_type),
-        (LargeUtf8, Date32) => true,
-        (LargeUtf8, Date64) => true,
-        (LargeUtf8, Timestamp(TimeUnit::Nanosecond, None)) => true,
-        (LargeUtf8, _) => DataType::is_numeric(to_type),
-        (_, Utf8) | (_, LargeUtf8) => {
-            DataType::is_numeric(from_type) || from_type == &Binary
-        }
-
-        // start numeric casts
-        (UInt8, UInt16) => true,
-        (UInt8, UInt32) => true,
-        (UInt8, UInt64) => true,
-        (UInt8, Int8) => true,
-        (UInt8, Int16) => true,
-        (UInt8, Int32) => true,
-        (UInt8, Int64) => true,
-        (UInt8, Float32) => true,
-        (UInt8, Float64) => true,
-
-        (UInt16, UInt8) => true,
-        (UInt16, UInt32) => true,
-        (UInt16, UInt64) => true,
-        (UInt16, Int8) => true,
-        (UInt16, Int16) => true,
-        (UInt16, Int32) => true,
-        (UInt16, Int64) => true,
-        (UInt16, Float32) => true,
-        (UInt16, Float64) => true,
-
-        (UInt32, UInt8) => true,
-        (UInt32, UInt16) => true,
-        (UInt32, UInt64) => true,
-        (UInt32, Int8) => true,
-        (UInt32, Int16) => true,
-        (UInt32, Int32) => true,
-        (UInt32, Int64) => true,
-        (UInt32, Float32) => true,
-        (UInt32, Float64) => true,
-
-        (UInt64, UInt8) => true,
-        (UInt64, UInt16) => true,
-        (UInt64, UInt32) => true,
-        (UInt64, Int8) => true,
-        (UInt64, Int16) => true,
-        (UInt64, Int32) => true,
-        (UInt64, Int64) => true,
-        (UInt64, Float32) => true,
-        (UInt64, Float64) => true,
-
-        (Int8, UInt8) => true,
-        (Int8, UInt16) => true,
-        (Int8, UInt32) => true,
-        (Int8, UInt64) => true,
-        (Int8, Int16) => true,
-        (Int8, Int32) => true,
-        (Int8, Int64) => true,
-        (Int8, Float32) => true,
-        (Int8, Float64) => true,
-
-        (Int16, UInt8) => true,
-        (Int16, UInt16) => true,
-        (Int16, UInt32) => true,
-        (Int16, UInt64) => true,
-        (Int16, Int8) => true,
-        (Int16, Int32) => true,
-        (Int16, Int64) => true,
-        (Int16, Float32) => true,
-        (Int16, Float64) => true,
-
-        (Int32, UInt8) => true,
-        (Int32, UInt16) => true,
-        (Int32, UInt32) => true,
-        (Int32, UInt64) => true,
-        (Int32, Int8) => true,
-        (Int32, Int16) => true,
-        (Int32, Int64) => true,
-        (Int32, Float32) => true,
-        (Int32, Float64) => true,
-
-        (Int64, UInt8) => true,
-        (Int64, UInt16) => true,
-        (Int64, UInt32) => true,
-        (Int64, UInt64) => true,
-        (Int64, Int8) => true,
-        (Int64, Int16) => true,
-        (Int64, Int32) => true,
-        (Int64, Float32) => true,
-        (Int64, Float64) => true,
-
-        (Float32, UInt8) => true,
-        (Float32, UInt16) => true,
-        (Float32, UInt32) => true,
-        (Float32, UInt64) => true,
-        (Float32, Int8) => true,
-        (Float32, Int16) => true,
-        (Float32, Int32) => true,
-        (Float32, Int64) => true,
-        (Float32, Float64) => true,
-
-        (Float64, UInt8) => true,
-        (Float64, UInt16) => true,
-        (Float64, UInt32) => true,
-        (Float64, UInt64) => true,
-        (Float64, Int8) => true,
-        (Float64, Int16) => true,
-        (Float64, Int32) => true,
-        (Float64, Int64) => true,
-        (Float64, Float32) => true,
-        // end numeric casts
-
-        // temporal casts
-        (Int32, Date32) => true,
-        (Int32, Date64) => true,
-        (Int32, Time32(_)) => true,
-        (Date32, Int32) => true,
-        (Date32, Int64) => true,
-        (Time32(_), Int32) => true,
-        (Int64, Date64) => true,
-        (Int64, Date32) => true,
-        (Int64, Time64(_)) => true,
-        (Date64, Int64) => true,
-        (Date64, Int32) => true,
-        (Time64(_), Int64) => true,
-        (Date32, Date64) => true,
-        (Date64, Date32) => true,
-        (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => true,
-        (Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => true,
-        (Time32(_), Time64(_)) => true,
-        (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => true,
-        (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => true,
-        (Time64(_), Time32(to_unit)) => {
-            matches!(to_unit, TimeUnit::Second | TimeUnit::Millisecond)
-        }
-        (Timestamp(_, _), Int64) => true,
-        (Int64, Timestamp(_, _)) => true,
-        (Timestamp(_, _), Timestamp(_, _)) => true,
-        (Timestamp(_, _), Date32) => true,
-        (Timestamp(_, _), Date64) => true,
-        // date64 to timestamp might not make sense,
-        (Int64, Duration(_)) => true,
-        (Null, Int32) => true,
-        (_, _) => false,
-    }
-}
-
-/// Cast `array` to the provided data type and return a new Array with
-/// type `to_type`, if possible.
-///
-/// Behavior:
-/// * Boolean to Utf8: `true` => '1', `false` => `0`
-/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
-///   in integer casts return null
-/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
-/// * List to List: the underlying data type is cast
-/// * Primitive to List: a list array with 1 value per slot is created
-/// * Date32 and Date64: precision lost when going to higher interval
-/// * Time32 and Time64: precision lost when going to higher interval
-/// * Timestamp and Date{32|64}: precision lost when going to higher interval
-/// * Temporal to/from backing primitive: zero-copy with data type change
-///
-/// Unsupported Casts
-/// * To or from `StructArray`
-/// * List to primitive
-/// * Utf8 to boolean
-/// * Interval and duration
-pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
-    cast_with_options(array, to_type, &DEFAULT_CAST_OPTIONS)
-}
-
-/// Cast `array` to the provided data type and return a new Array with
-/// type `to_type`, if possible. It accepts `CastOptions` to allow consumers
-/// to configure cast behavior.
-///
-/// Behavior:
-/// * Boolean to Utf8: `true` => '1', `false` => `0`
-/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
-///   in integer casts return null
-/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
-/// * List to List: the underlying data type is cast
-/// * Primitive to List: a list array with 1 value per slot is created
-/// * Date32 and Date64: precision lost when going to higher interval
-/// * Time32 and Time64: precision lost when going to higher interval
-/// * Timestamp and Date{32|64}: precision lost when going to higher interval
-/// * Temporal to/from backing primitive: zero-copy with data type change
-///
-/// Unsupported Casts
-/// * To or from `StructArray`
-/// * List to primitive
-/// * Utf8 to boolean
-/// * Interval and duration
-pub fn cast_with_options(
-    array: &ArrayRef,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use DataType::*;
-    let from_type = array.data_type();
-
-    // clone array if types are the same
-    if from_type == to_type {
-        return Ok(array.clone());
-    }
-    match (from_type, to_type) {
-        (Struct(_), _) => Err(ArrowError::CastError(
-            "Cannot cast from struct to other types".to_string(),
-        )),
-        (_, Struct(_)) => Err(ArrowError::CastError(
-            "Cannot cast to struct from other types".to_string(),
-        )),
-        (List(_), List(ref to)) => {
-            cast_list_inner::<i32>(array, to, to_type, cast_options)
-        }
-        (LargeList(_), LargeList(ref to)) => {
-            cast_list_inner::<i64>(array, to, to_type, cast_options)
-        }
-        (List(list_from), LargeList(list_to)) => {
-            if list_to.data_type() != list_from.data_type() {
-                Err(ArrowError::CastError(
-                    "cannot cast list to large-list with different child data".into(),
-                ))
-            } else {
-                cast_list_container::<i32, i64>(&**array, cast_options)
-            }
-        }
-        (LargeList(list_from), List(list_to)) => {
-            if list_to.data_type() != list_from.data_type() {
-                Err(ArrowError::CastError(
-                    "cannot cast large-list to list with different child data".into(),
-                ))
-            } else {
-                cast_list_container::<i64, i32>(&**array, cast_options)
-            }
-        }
-        (List(_), _) => Err(ArrowError::CastError(
-            "Cannot cast list to non-list data types".to_string(),
-        )),
-        (_, List(ref to)) => {
-            cast_primitive_to_list::<i32>(array, to, to_type, cast_options)
-        }
-        (_, LargeList(ref to)) => {
-            cast_primitive_to_list::<i64>(array, to, to_type, cast_options)
-        }
-        (Dictionary(index_type, _), _) => match **index_type {
-            DataType::Int8 => dictionary_cast::<Int8Type>(array, to_type, cast_options),
-            DataType::Int16 => dictionary_cast::<Int16Type>(array, to_type, cast_options),
-            DataType::Int32 => dictionary_cast::<Int32Type>(array, to_type, cast_options),
-            DataType::Int64 => dictionary_cast::<Int64Type>(array, to_type, cast_options),
-            DataType::UInt8 => dictionary_cast::<UInt8Type>(array, to_type, cast_options),
-            DataType::UInt16 => {
-                dictionary_cast::<UInt16Type>(array, to_type, cast_options)
-            }
-            DataType::UInt32 => {
-                dictionary_cast::<UInt32Type>(array, to_type, cast_options)
-            }
-            DataType::UInt64 => {
-                dictionary_cast::<UInt64Type>(array, to_type, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from dictionary type {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, Dictionary(index_type, value_type)) => match **index_type {
-            DataType::Int8 => {
-                cast_to_dictionary::<Int8Type>(array, value_type, cast_options)
-            }
-            DataType::Int16 => {
-                cast_to_dictionary::<Int16Type>(array, value_type, cast_options)
-            }
-            DataType::Int32 => {
-                cast_to_dictionary::<Int32Type>(array, value_type, cast_options)
-            }
-            DataType::Int64 => {
-                cast_to_dictionary::<Int64Type>(array, value_type, cast_options)
-            }
-            DataType::UInt8 => {
-                cast_to_dictionary::<UInt8Type>(array, value_type, cast_options)
-            }
-            DataType::UInt16 => {
-                cast_to_dictionary::<UInt16Type>(array, value_type, cast_options)
-            }
-            DataType::UInt32 => {
-                cast_to_dictionary::<UInt32Type>(array, value_type, cast_options)
-            }
-            DataType::UInt64 => {
-                cast_to_dictionary::<UInt64Type>(array, value_type, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from type {:?} to dictionary type {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, Boolean) => match from_type {
-            UInt8 => cast_numeric_to_bool::<UInt8Type>(array),
-            UInt16 => cast_numeric_to_bool::<UInt16Type>(array),
-            UInt32 => cast_numeric_to_bool::<UInt32Type>(array),
-            UInt64 => cast_numeric_to_bool::<UInt64Type>(array),
-            Int8 => cast_numeric_to_bool::<Int8Type>(array),
-            Int16 => cast_numeric_to_bool::<Int16Type>(array),
-            Int32 => cast_numeric_to_bool::<Int32Type>(array),
-            Int64 => cast_numeric_to_bool::<Int64Type>(array),
-            Float32 => cast_numeric_to_bool::<Float32Type>(array),
-            Float64 => cast_numeric_to_bool::<Float64Type>(array),
-            Utf8 => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (Boolean, _) => match to_type {
-            UInt8 => cast_bool_to_numeric::<UInt8Type>(array, cast_options),
-            UInt16 => cast_bool_to_numeric::<UInt16Type>(array, cast_options),
-            UInt32 => cast_bool_to_numeric::<UInt32Type>(array, cast_options),
-            UInt64 => cast_bool_to_numeric::<UInt64Type>(array, cast_options),
-            Int8 => cast_bool_to_numeric::<Int8Type>(array, cast_options),
-            Int16 => cast_bool_to_numeric::<Int16Type>(array, cast_options),
-            Int32 => cast_bool_to_numeric::<Int32Type>(array, cast_options),
-            Int64 => cast_bool_to_numeric::<Int64Type>(array, cast_options),
-            Float32 => cast_bool_to_numeric::<Float32Type>(array, cast_options),
-            Float64 => cast_bool_to_numeric::<Float64Type>(array, cast_options),
-            Utf8 => {
-                let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
-                Ok(Arc::new(
-                    array
-                        .iter()
-                        .map(|value| value.map(|value| if value { "1" } else { "0" }))
-                        .collect::<StringArray>(),
-                ))
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (Utf8, _) => match to_type {
-            LargeUtf8 => cast_str_container::<i32, i64>(&**array),
-            UInt8 => cast_string_to_numeric::<UInt8Type, i32>(array, cast_options),
-            UInt16 => cast_string_to_numeric::<UInt16Type, i32>(array, cast_options),
-            UInt32 => cast_string_to_numeric::<UInt32Type, i32>(array, cast_options),
-            UInt64 => cast_string_to_numeric::<UInt64Type, i32>(array, cast_options),
-            Int8 => cast_string_to_numeric::<Int8Type, i32>(array, cast_options),
-            Int16 => cast_string_to_numeric::<Int16Type, i32>(array, cast_options),
-            Int32 => cast_string_to_numeric::<Int32Type, i32>(array, cast_options),
-            Int64 => cast_string_to_numeric::<Int64Type, i32>(array, cast_options),
-            Float32 => cast_string_to_numeric::<Float32Type, i32>(array, cast_options),
-            Float64 => cast_string_to_numeric::<Float64Type, i32>(array, cast_options),
-            Date32 => cast_string_to_date32::<i32>(&**array, cast_options),
-            Date64 => cast_string_to_date64::<i32>(&**array, cast_options),
-            Timestamp(TimeUnit::Nanosecond, None) => {
-                cast_string_to_timestamp_ns::<i32>(&**array, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, Utf8) => match from_type {
-            LargeUtf8 => cast_str_container::<i64, i32>(&**array),
-            UInt8 => cast_numeric_to_string::<UInt8Type, i32>(array),
-            UInt16 => cast_numeric_to_string::<UInt16Type, i32>(array),
-            UInt32 => cast_numeric_to_string::<UInt32Type, i32>(array),
-            UInt64 => cast_numeric_to_string::<UInt64Type, i32>(array),
-            Int8 => cast_numeric_to_string::<Int8Type, i32>(array),
-            Int16 => cast_numeric_to_string::<Int16Type, i32>(array),
-            Int32 => cast_numeric_to_string::<Int32Type, i32>(array),
-            Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
-            Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
-            Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
-            Binary => {
-                let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
-                Ok(Arc::new(
-                    array
-                        .iter()
-                        .map(|maybe_value| match maybe_value {
-                            Some(value) => {
-                                let result = str::from_utf8(value);
-                                if cast_options.safe {
-                                    Ok(result.ok())
-                                } else {
-                                    Some(result.map_err(|_| {
-                                        ArrowError::CastError(
-                                            "Cannot cast binary to string".to_string(),
-                                        )
-                                    }))
-                                    .transpose()
-                                }
-                            }
-                            None => Ok(None),
-                        })
-                        .collect::<Result<StringArray>>()?,
-                ))
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, LargeUtf8) => match from_type {
-            UInt8 => cast_numeric_to_string::<UInt8Type, i64>(array),
-            UInt16 => cast_numeric_to_string::<UInt16Type, i64>(array),
-            UInt32 => cast_numeric_to_string::<UInt32Type, i64>(array),
-            UInt64 => cast_numeric_to_string::<UInt64Type, i64>(array),
-            Int8 => cast_numeric_to_string::<Int8Type, i64>(array),
-            Int16 => cast_numeric_to_string::<Int16Type, i64>(array),
-            Int32 => cast_numeric_to_string::<Int32Type, i64>(array),
-            Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
-            Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
-            Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
-            Binary => {
-                let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
-                Ok(Arc::new(
-                    array
-                        .iter()
-                        .map(|maybe_value| match maybe_value {
-                            Some(value) => {
-                                let result = str::from_utf8(value);
-                                if cast_options.safe {
-                                    Ok(result.ok())
-                                } else {
-                                    Some(result.map_err(|_| {
-                                        ArrowError::CastError(
-                                            "Cannot cast binary to string".to_string(),
-                                        )
-                                    }))
-                                    .transpose()
-                                }
-                            }
-                            None => Ok(None),
-                        })
-                        .collect::<Result<LargeStringArray>>()?,
-                ))
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (LargeUtf8, _) => match to_type {
-            UInt8 => cast_string_to_numeric::<UInt8Type, i64>(array, cast_options),
-            UInt16 => cast_string_to_numeric::<UInt16Type, i64>(array, cast_options),
-            UInt32 => cast_string_to_numeric::<UInt32Type, i64>(array, cast_options),
-            UInt64 => cast_string_to_numeric::<UInt64Type, i64>(array, cast_options),
-            Int8 => cast_string_to_numeric::<Int8Type, i64>(array, cast_options),
-            Int16 => cast_string_to_numeric::<Int16Type, i64>(array, cast_options),
-            Int32 => cast_string_to_numeric::<Int32Type, i64>(array, cast_options),
-            Int64 => cast_string_to_numeric::<Int64Type, i64>(array, cast_options),
-            Float32 => cast_string_to_numeric::<Float32Type, i64>(array, cast_options),
-            Float64 => cast_string_to_numeric::<Float64Type, i64>(array, cast_options),
-            Date32 => cast_string_to_date32::<i64>(&**array, cast_options),
-            Date64 => cast_string_to_date64::<i64>(&**array, cast_options),
-            Timestamp(TimeUnit::Nanosecond, None) => {
-                cast_string_to_timestamp_ns::<i64>(&**array, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-
-        // start numeric casts
-        (UInt8, UInt16) => cast_numeric_arrays::<UInt8Type, UInt16Type>(array),
-        (UInt8, UInt32) => cast_numeric_arrays::<UInt8Type, UInt32Type>(array),
-        (UInt8, UInt64) => cast_numeric_arrays::<UInt8Type, UInt64Type>(array),
-        (UInt8, Int8) => cast_numeric_arrays::<UInt8Type, Int8Type>(array),
-        (UInt8, Int16) => cast_numeric_arrays::<UInt8Type, Int16Type>(array),
-        (UInt8, Int32) => cast_numeric_arrays::<UInt8Type, Int32Type>(array),
-        (UInt8, Int64) => cast_numeric_arrays::<UInt8Type, Int64Type>(array),
-        (UInt8, Float32) => cast_numeric_arrays::<UInt8Type, Float32Type>(array),
-        (UInt8, Float64) => cast_numeric_arrays::<UInt8Type, Float64Type>(array),
-
-        (UInt16, UInt8) => cast_numeric_arrays::<UInt16Type, UInt8Type>(array),
-        (UInt16, UInt32) => cast_numeric_arrays::<UInt16Type, UInt32Type>(array),
-        (UInt16, UInt64) => cast_numeric_arrays::<UInt16Type, UInt64Type>(array),
-        (UInt16, Int8) => cast_numeric_arrays::<UInt16Type, Int8Type>(array),
-        (UInt16, Int16) => cast_numeric_arrays::<UInt16Type, Int16Type>(array),
-        (UInt16, Int32) => cast_numeric_arrays::<UInt16Type, Int32Type>(array),
-        (UInt16, Int64) => cast_numeric_arrays::<UInt16Type, Int64Type>(array),
-        (UInt16, Float32) => cast_numeric_arrays::<UInt16Type, Float32Type>(array),
-        (UInt16, Float64) => cast_numeric_arrays::<UInt16Type, Float64Type>(array),
-
-        (UInt32, UInt8) => cast_numeric_arrays::<UInt32Type, UInt8Type>(array),
-        (UInt32, UInt16) => cast_numeric_arrays::<UInt32Type, UInt16Type>(array),
-        (UInt32, UInt64) => cast_numeric_arrays::<UInt32Type, UInt64Type>(array),
-        (UInt32, Int8) => cast_numeric_arrays::<UInt32Type, Int8Type>(array),
-        (UInt32, Int16) => cast_numeric_arrays::<UInt32Type, Int16Type>(array),
-        (UInt32, Int32) => cast_numeric_arrays::<UInt32Type, Int32Type>(array),
-        (UInt32, Int64) => cast_numeric_arrays::<UInt32Type, Int64Type>(array),
-        (UInt32, Float32) => cast_numeric_arrays::<UInt32Type, Float32Type>(array),
-        (UInt32, Float64) => cast_numeric_arrays::<UInt32Type, Float64Type>(array),
-
-        (UInt64, UInt8) => cast_numeric_arrays::<UInt64Type, UInt8Type>(array),
-        (UInt64, UInt16) => cast_numeric_arrays::<UInt64Type, UInt16Type>(array),
-        (UInt64, UInt32) => cast_numeric_arrays::<UInt64Type, UInt32Type>(array),
-        (UInt64, Int8) => cast_numeric_arrays::<UInt64Type, Int8Type>(array),
-        (UInt64, Int16) => cast_numeric_arrays::<UInt64Type, Int16Type>(array),
-        (UInt64, Int32) => cast_numeric_arrays::<UInt64Type, Int32Type>(array),
-        (UInt64, Int64) => cast_numeric_arrays::<UInt64Type, Int64Type>(array),
-        (UInt64, Float32) => cast_numeric_arrays::<UInt64Type, Float32Type>(array),
-        (UInt64, Float64) => cast_numeric_arrays::<UInt64Type, Float64Type>(array),
-
-        (Int8, UInt8) => cast_numeric_arrays::<Int8Type, UInt8Type>(array),
-        (Int8, UInt16) => cast_numeric_arrays::<Int8Type, UInt16Type>(array),
-        (Int8, UInt32) => cast_numeric_arrays::<Int8Type, UInt32Type>(array),
-        (Int8, UInt64) => cast_numeric_arrays::<Int8Type, UInt64Type>(array),
-        (Int8, Int16) => cast_numeric_arrays::<Int8Type, Int16Type>(array),
-        (Int8, Int32) => cast_numeric_arrays::<Int8Type, Int32Type>(array),
-        (Int8, Int64) => cast_numeric_arrays::<Int8Type, Int64Type>(array),
-        (Int8, Float32) => cast_numeric_arrays::<Int8Type, Float32Type>(array),
-        (Int8, Float64) => cast_numeric_arrays::<Int8Type, Float64Type>(array),
-
-        (Int16, UInt8) => cast_numeric_arrays::<Int16Type, UInt8Type>(array),
-        (Int16, UInt16) => cast_numeric_arrays::<Int16Type, UInt16Type>(array),
-        (Int16, UInt32) => cast_numeric_arrays::<Int16Type, UInt32Type>(array),
-        (Int16, UInt64) => cast_numeric_arrays::<Int16Type, UInt64Type>(array),
-        (Int16, Int8) => cast_numeric_arrays::<Int16Type, Int8Type>(array),
-        (Int16, Int32) => cast_numeric_arrays::<Int16Type, Int32Type>(array),
-        (Int16, Int64) => cast_numeric_arrays::<Int16Type, Int64Type>(array),
-        (Int16, Float32) => cast_numeric_arrays::<Int16Type, Float32Type>(array),
-        (Int16, Float64) => cast_numeric_arrays::<Int16Type, Float64Type>(array),
-
-        (Int32, UInt8) => cast_numeric_arrays::<Int32Type, UInt8Type>(array),
-        (Int32, UInt16) => cast_numeric_arrays::<Int32Type, UInt16Type>(array),
-        (Int32, UInt32) => cast_numeric_arrays::<Int32Type, UInt32Type>(array),
-        (Int32, UInt64) => cast_numeric_arrays::<Int32Type, UInt64Type>(array),
-        (Int32, Int8) => cast_numeric_arrays::<Int32Type, Int8Type>(array),
-        (Int32, Int16) => cast_numeric_arrays::<Int32Type, Int16Type>(array),
-        (Int32, Int64) => cast_numeric_arrays::<Int32Type, Int64Type>(array),
-        (Int32, Float32) => cast_numeric_arrays::<Int32Type, Float32Type>(array),
-        (Int32, Float64) => cast_numeric_arrays::<Int32Type, Float64Type>(array),
-
-        (Int64, UInt8) => cast_numeric_arrays::<Int64Type, UInt8Type>(array),
-        (Int64, UInt16) => cast_numeric_arrays::<Int64Type, UInt16Type>(array),
-        (Int64, UInt32) => cast_numeric_arrays::<Int64Type, UInt32Type>(array),
-        (Int64, UInt64) => cast_numeric_arrays::<Int64Type, UInt64Type>(array),
-        (Int64, Int8) => cast_numeric_arrays::<Int64Type, Int8Type>(array),
-        (Int64, Int16) => cast_numeric_arrays::<Int64Type, Int16Type>(array),
-        (Int64, Int32) => cast_numeric_arrays::<Int64Type, Int32Type>(array),
-        (Int64, Float32) => cast_numeric_arrays::<Int64Type, Float32Type>(array),
-        (Int64, Float64) => cast_numeric_arrays::<Int64Type, Float64Type>(array),
-
-        (Float32, UInt8) => cast_numeric_arrays::<Float32Type, UInt8Type>(array),
-        (Float32, UInt16) => cast_numeric_arrays::<Float32Type, UInt16Type>(array),
-        (Float32, UInt32) => cast_numeric_arrays::<Float32Type, UInt32Type>(array),
-        (Float32, UInt64) => cast_numeric_arrays::<Float32Type, UInt64Type>(array),
-        (Float32, Int8) => cast_numeric_arrays::<Float32Type, Int8Type>(array),
-        (Float32, Int16) => cast_numeric_arrays::<Float32Type, Int16Type>(array),
-        (Float32, Int32) => cast_numeric_arrays::<Float32Type, Int32Type>(array),
-        (Float32, Int64) => cast_numeric_arrays::<Float32Type, Int64Type>(array),
-        (Float32, Float64) => cast_numeric_arrays::<Float32Type, Float64Type>(array),
-
-        (Float64, UInt8) => cast_numeric_arrays::<Float64Type, UInt8Type>(array),
-        (Float64, UInt16) => cast_numeric_arrays::<Float64Type, UInt16Type>(array),
-        (Float64, UInt32) => cast_numeric_arrays::<Float64Type, UInt32Type>(array),
-        (Float64, UInt64) => cast_numeric_arrays::<Float64Type, UInt64Type>(array),
-        (Float64, Int8) => cast_numeric_arrays::<Float64Type, Int8Type>(array),
-        (Float64, Int16) => cast_numeric_arrays::<Float64Type, Int16Type>(array),
-        (Float64, Int32) => cast_numeric_arrays::<Float64Type, Int32Type>(array),
-        (Float64, Int64) => cast_numeric_arrays::<Float64Type, Int64Type>(array),
-        (Float64, Float32) => cast_numeric_arrays::<Float64Type, Float32Type>(array),
-        // end numeric casts
-
-        // temporal casts
-        (Int32, Date32) => cast_array_data::<Date32Type>(array, to_type.clone()),
-        (Int32, Date64) => cast_with_options(
-            &cast_with_options(array, &DataType::Date32, &cast_options)?,
-            &DataType::Date64,
-            &cast_options,
-        ),
-        (Int32, Time32(TimeUnit::Second)) => {
-            cast_array_data::<Time32SecondType>(array, to_type.clone())
-        }
-        (Int32, Time32(TimeUnit::Millisecond)) => {
-            cast_array_data::<Time32MillisecondType>(array, to_type.clone())
-        }
-        // No support for microsecond/nanosecond with i32
-        (Date32, Int32) => cast_array_data::<Int32Type>(array, to_type.clone()),
-        (Date32, Int64) => cast_with_options(
-            &cast_with_options(array, &DataType::Int32, cast_options)?,
-            &DataType::Int64,
-            &cast_options,
-        ),
-        (Time32(_), Int32) => cast_array_data::<Int32Type>(array, to_type.clone()),
-        (Int64, Date64) => cast_array_data::<Date64Type>(array, to_type.clone()),
-        (Int64, Date32) => cast_with_options(
-            &cast_with_options(array, &DataType::Int32, &cast_options)?,
-            &DataType::Date32,
-            &cast_options,
-        ),
-        // No support for second/milliseconds with i64
-        (Int64, Time64(TimeUnit::Microsecond)) => {
-            cast_array_data::<Time64MicrosecondType>(array, to_type.clone())
-        }
-        (Int64, Time64(TimeUnit::Nanosecond)) => {
-            cast_array_data::<Time64NanosecondType>(array, to_type.clone())
-        }
-
-        (Date64, Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
-        (Date64, Int32) => cast_with_options(
-            &cast_with_options(array, &DataType::Int64, &cast_options)?,
-            &DataType::Int32,
-            &cast_options,
-        ),
-        (Time64(_), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
-        (Date32, Date64) => {
-            let date_array = array.as_any().downcast_ref::<Date32Array>().unwrap();
-
-            let values =
-                unary::<_, _, Date64Type>(date_array, |x| x as i64 * MILLISECONDS_IN_DAY);
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Date64, Date32) => {
-            let date_array = array.as_any().downcast_ref::<Date64Array>().unwrap();
-
-            let values = unary::<_, _, Date32Type>(date_array, |x| {
-                (x / MILLISECONDS_IN_DAY) as i32
-            });
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => {
-            let time_array = array.as_any().downcast_ref::<Time32SecondArray>().unwrap();
-
-            let values = unary::<_, _, Time32MillisecondType>(time_array, |x| {
-                x * MILLISECONDS as i32
-            });
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => {
-            let time_array = array
-                .as_any()
-                .downcast_ref::<Time32MillisecondArray>()
-                .unwrap();
-
-            let values = unary::<_, _, Time32SecondType>(time_array, |x| {
-                x / (MILLISECONDS as i32)
-            });
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        //(Time32(TimeUnit::Second), Time64(_)) => {},
-        (Time32(from_unit), Time64(to_unit)) => {
-            let time_array = Int32Array::from(array.data().clone());
-            // note: (numeric_cast + SIMD multiply) is faster than (cast & multiply)
-            let c: Int64Array = numeric_cast(&time_array);
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = time_unit_multiple(&to_unit);
-            // from is only smaller than to if 64milli/64second don't exist
-            let mult = Int64Array::from(vec![to_size / from_size; array.len()]);
-            let converted = multiply(&c, &mult)?;
-            let array_ref = Arc::new(converted) as ArrayRef;
-            use TimeUnit::*;
-            match to_unit {
-                Microsecond => cast_array_data::<TimestampMicrosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                Nanosecond => cast_array_data::<TimestampNanosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                _ => unreachable!("array type not supported"),
-            }
-        }
-        (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => {
-            let time_array = array
-                .as_any()
-                .downcast_ref::<Time64MicrosecondArray>()
-                .unwrap();
-
-            let values =
-                unary::<_, _, Time64NanosecondType>(time_array, |x| x * MILLISECONDS);
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => {
-            let time_array = array
-                .as_any()
-                .downcast_ref::<Time64NanosecondArray>()
-                .unwrap();
-
-            let values =
-                unary::<_, _, Time64MicrosecondType>(time_array, |x| x / MILLISECONDS);
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time64(from_unit), Time32(to_unit)) => {
-            let time_array = Int64Array::from(array.data().clone());
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = time_unit_multiple(&to_unit);
-            let divisor = from_size / to_size;
-            match to_unit {
-                TimeUnit::Second => {
-                    let values = unary::<_, _, Time32SecondType>(&time_array, |x| {
-                        (x as i64 / divisor) as i32
-                    });
-                    Ok(Arc::new(values) as ArrayRef)
-                }
-                TimeUnit::Millisecond => {
-                    let values = unary::<_, _, Time32MillisecondType>(&time_array, |x| {
-                        (x as i64 / divisor) as i32
-                    });
-                    Ok(Arc::new(values) as ArrayRef)
-                }
-                _ => unreachable!("array type not supported"),
-            }
-        }
-        (Timestamp(_, _), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
-        (Int64, Timestamp(to_unit, _)) => {
-            use TimeUnit::*;
-            match to_unit {
-                Second => cast_array_data::<TimestampSecondType>(array, to_type.clone()),
-                Millisecond => {
-                    cast_array_data::<TimestampMillisecondType>(array, to_type.clone())
-                }
-                Microsecond => {
-                    cast_array_data::<TimestampMicrosecondType>(array, to_type.clone())
-                }
-                Nanosecond => {
-                    cast_array_data::<TimestampNanosecondType>(array, to_type.clone())
-                }
-            }
-        }
-        (Timestamp(from_unit, _), Timestamp(to_unit, _)) => {
-            let time_array = Int64Array::from(array.data().clone());
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = time_unit_multiple(&to_unit);
-            // we either divide or multiply, depending on size of each unit
-            // units are never the same when the types are the same
-            let converted = if from_size >= to_size {
-                divide(
-                    &time_array,
-                    &Int64Array::from(vec![from_size / to_size; array.len()]),
-                )?
-            } else {
-                multiply(
-                    &time_array,
-                    &Int64Array::from(vec![to_size / from_size; array.len()]),
-                )?
-            };
-            let array_ref = Arc::new(converted) as ArrayRef;
-            use TimeUnit::*;
-            match to_unit {
-                Second => {
-                    cast_array_data::<TimestampSecondType>(&array_ref, to_type.clone())
-                }
-                Millisecond => cast_array_data::<TimestampMillisecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                Microsecond => cast_array_data::<TimestampMicrosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                Nanosecond => cast_array_data::<TimestampNanosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-            }
-        }
-        (Timestamp(from_unit, _), Date32) => {
-            let time_array = Int64Array::from(array.data().clone());
-            let from_size = time_unit_multiple(&from_unit) * SECONDS_IN_DAY;
-            let mut b = Date32Builder::new(array.len());
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    b.append_value((time_array.value(i) / from_size) as i32)?;
-                }
-            }
-
-            Ok(Arc::new(b.finish()) as ArrayRef)
-        }
-        (Timestamp(from_unit, _), Date64) => {
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = MILLISECONDS;
-
-            // Scale time_array by (to_size / from_size) using a
-            // single integer operation, but need to avoid integer
-            // math rounding down to zero
-
-            match to_size.cmp(&from_size) {
-                std::cmp::Ordering::Less => {
-                    let time_array = Date64Array::from(array.data().clone());
-                    Ok(Arc::new(divide(
-                        &time_array,
-                        &Date64Array::from(vec![from_size / to_size; array.len()]),
-                    )?) as ArrayRef)
-                }
-                std::cmp::Ordering::Equal => {
-                    cast_array_data::<Date64Type>(array, to_type.clone())
-                }
-                std::cmp::Ordering::Greater => {
-                    let time_array = Date64Array::from(array.data().clone());
-                    Ok(Arc::new(multiply(
-                        &time_array,
-                        &Date64Array::from(vec![to_size / from_size; array.len()]),
-                    )?) as ArrayRef)
-                }
-            }
-        }
-        // date64 to timestamp might not make sense,
-        (Int64, Duration(to_unit)) => {
-            use TimeUnit::*;
-            match to_unit {
-                Second => cast_array_data::<DurationSecondType>(array, to_type.clone()),
-                Millisecond => {
-                    cast_array_data::<DurationMillisecondType>(array, to_type.clone())
-                }
-                Microsecond => {
-                    cast_array_data::<DurationMicrosecondType>(array, to_type.clone())
-                }
-                Nanosecond => {
-                    cast_array_data::<DurationNanosecondType>(array, to_type.clone())
-                }
-            }
-        }
-
-        // null to primitive/flat types
-        (Null, Int32) => Ok(Arc::new(Int32Array::from(vec![None; array.len()]))),
-
-        (_, _) => Err(ArrowError::CastError(format!(
-            "Casting from {:?} to {:?} not supported",
-            from_type, to_type,
-        ))),
-    }
-}
-
-/// Get the time unit as a multiple of a second
-const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
-    match unit {
-        TimeUnit::Second => 1,
-        TimeUnit::Millisecond => MILLISECONDS,
-        TimeUnit::Microsecond => MICROSECONDS,
-        TimeUnit::Nanosecond => NANOSECONDS,
-    }
-}
-
-/// Number of seconds in a day
-const SECONDS_IN_DAY: i64 = 86_400;
-/// Number of milliseconds in a second
-const MILLISECONDS: i64 = 1_000;
-/// Number of microseconds in a second
-const MICROSECONDS: i64 = 1_000_000;
-/// Number of nanoseconds in a second
-const NANOSECONDS: i64 = 1_000_000_000;
-/// Number of milliseconds in a day
-const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS;
-/// Number of days between 0001-01-01 and 1970-01-01
-const EPOCH_DAYS_FROM_CE: i32 = 719_163;
-
-/// Cast an array by changing its array_data type to the desired type
-///
-/// Arrays should have the same primitive data type, otherwise this should fail.
-/// We do not perform this check on primitive data types as we only use this
-/// function internally, where it is guaranteed to be infallible.
-#[allow(clippy::unnecessary_wraps)]
-fn cast_array_data<TO>(array: &ArrayRef, to_type: DataType) -> Result<ArrayRef>
-where
-    TO: ArrowNumericType,
-{
-    let data = ArrayData::new(
-        to_type,
-        array.len(),
-        Some(array.null_count()),
-        array.data().null_bitmap().clone().map(|bitmap| bitmap.bits),
-        array.data().offset(),
-        array.data().buffers().to_vec(),
-        vec![],
-    );
-    Ok(Arc::new(PrimitiveArray::<TO>::from(data)) as ArrayRef)
-}
-
-/// Convert Array into a PrimitiveArray of type, and apply numeric cast
-#[allow(clippy::unnecessary_wraps)]
-fn cast_numeric_arrays<FROM, TO>(from: &ArrayRef) -> Result<ArrayRef>
-where
-    FROM: ArrowNumericType,
-    TO: ArrowNumericType,
-    FROM::Native: num::NumCast,
-    TO::Native: num::NumCast,
-{
-    Ok(Arc::new(numeric_cast::<FROM, TO>(
-        from.as_any()
-            .downcast_ref::<PrimitiveArray<FROM>>()
-            .unwrap(),
-    )))
-}
-
-/// Natural cast between numeric types
-fn numeric_cast<T, R>(from: &PrimitiveArray<T>) -> PrimitiveArray<R>
-where
-    T: ArrowNumericType,
-    R: ArrowNumericType,
-    T::Native: num::NumCast,
-    R::Native: num::NumCast,
-{
-    let iter = from
-        .iter()
-        .map(|v| v.and_then(num::cast::cast::<T::Native, R::Native>));
-    // Soundness:
-    //  The iterator is trustedLen because it comes from an `PrimitiveArray`.
-    unsafe { PrimitiveArray::<R>::from_trusted_len_iter(iter) }
-}
-
-/// Cast numeric types to Utf8
-#[allow(clippy::unnecessary_wraps)]
-fn cast_numeric_to_string<FROM, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
-where
-    FROM: ArrowNumericType,
-    FROM::Native: lexical_core::ToLexical,
-    OffsetSize: StringOffsetSizeTrait,
-{
-    Ok(Arc::new(numeric_to_string_cast::<FROM, OffsetSize>(
-        array
-            .as_any()
-            .downcast_ref::<PrimitiveArray<FROM>>()
-            .unwrap(),
-    )))
-}
-
-fn numeric_to_string_cast<T, OffsetSize>(
-    from: &PrimitiveArray<T>,
-) -> GenericStringArray<OffsetSize>
-where
-    T: ArrowPrimitiveType + ArrowNumericType,
-    T::Native: lexical_core::ToLexical,
-    OffsetSize: StringOffsetSizeTrait,
-{
-    from.iter()
-        .map(|maybe_value| maybe_value.map(lexical_to_string))
-        .collect()
-}
-
-/// Cast numeric types to Utf8
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_numeric<T, Offset: StringOffsetSizeTrait>(
-    from: &ArrayRef,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    T: ArrowNumericType,
-    <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
-{
-    Ok(Arc::new(string_to_numeric_cast::<T, Offset>(
-        from.as_any()
-            .downcast_ref::<GenericStringArray<Offset>>()
-            .unwrap(),
-        cast_options,
-    )?))
-}
-
-fn string_to_numeric_cast<T, Offset: StringOffsetSizeTrait>(
-    from: &GenericStringArray<Offset>,
-    cast_options: &CastOptions,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
-{
-    if cast_options.safe {
-        let iter = (0..from.len()).map(|i| {
-            if from.is_null(i) {
-                None
-            } else {
-                lexical_core::parse(from.value(i).as_bytes()).ok()
-            }
-        });
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        Ok(unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) })
-    } else {
-        let vec = (0..from.len())
-            .map(|i| {
-                if from.is_null(i) {
-                    Ok(None)
-                } else {
-                    let string = from.value(i);
-                    let result = lexical_core::parse(string.as_bytes());
-                    Some(result.map_err(|_| {
-                        ArrowError::CastError(format!(
-                            "Cannot cast string '{}' to value of {} type",
-                            string,
-                            std::any::type_name::<T>()
-                        ))
-                    }))
-                    .transpose()
-                }
-            })
-            .collect::<Result<Vec<_>>>()?;
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        Ok(unsafe { PrimitiveArray::<T>::from_trusted_len_iter(vec.iter()) })
-    }
-}
-
-/// Casts generic string arrays to Date32Array
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_date32<Offset: StringOffsetSizeTrait>(
-    array: &dyn Array,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use chrono::Datelike;
-    let string_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<Offset>>()
-        .unwrap();
-
-    let array = if cast_options.safe {
-        let iter = (0..string_array.len()).map(|i| {
-            if string_array.is_null(i) {
-                None
-            } else {
-                string_array
-                    .value(i)
-                    .parse::<chrono::NaiveDate>()
-                    .map(|date| date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
-                    .ok()
-            }
-        });
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date32Array::from_trusted_len_iter(iter) }
-    } else {
-        let vec = (0..string_array.len())
-            .map(|i| {
-                if string_array.is_null(i) {
-                    Ok(None)
-                } else {
-                    let string = string_array
-                        .value(i);
-
-                    let result = string
-                        .parse::<chrono::NaiveDate>()
-                        .map(|date| date.num_days_from_ce() - EPOCH_DAYS_FROM_CE);
-
-                    Some(result.map_err(|_| {
-                        ArrowError::CastError(
-                            format!("Cannot cast string '{}' to value of arrow::datatypes::types::Date32Type type", string),
-                        )
-                    }))
-                        .transpose()
-                }
-            })
-            .collect::<Result<Vec<Option<i32>>>>()?;
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date32Array::from_trusted_len_iter(vec.iter()) }
-    };
-
-    Ok(Arc::new(array) as ArrayRef)
-}
-
-/// Casts generic string arrays to Date64Array
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_date64<Offset: StringOffsetSizeTrait>(
-    array: &dyn Array,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    let string_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<Offset>>()
-        .unwrap();
-
-    let array = if cast_options.safe {
-        let iter = (0..string_array.len()).map(|i| {
-            if string_array.is_null(i) {
-                None
-            } else {
-                string_array
-                    .value(i)
-                    .parse::<chrono::NaiveDateTime>()
-                    .map(|datetime| datetime.timestamp_millis())
-                    .ok()
-            }
-        });
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date64Array::from_trusted_len_iter(iter) }
-    } else {
-        let vec = (0..string_array.len())
-            .map(|i| {
-                if string_array.is_null(i) {
-                    Ok(None)
-                } else {
-                let string = string_array
-                        .value(i);
-
-                    let result = string
-                        .parse::<chrono::NaiveDateTime>()
-                        .map(|datetime| datetime.timestamp_millis());
-
-                    Some(result.map_err(|_| {
-                        ArrowError::CastError(
-                            format!("Cannot cast string '{}' to value of arrow::datatypes::types::Date64Type type", string),
-                        )
-                    }))
-                        .transpose()
-                }
-            })
-            .collect::<Result<Vec<Option<i64>>>>()?;
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date64Array::from_trusted_len_iter(vec.iter()) }
-    };
-
-    Ok(Arc::new(array) as ArrayRef)
-}
-
-/// Casts generic string arrays to TimeStampNanosecondArray
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_timestamp_ns<Offset: StringOffsetSizeTrait>(
-    array: &dyn Array,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    let string_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<Offset>>()
-        .unwrap();
-
-    let array = if cast_options.safe {
-        let iter = (0..string_array.len()).map(|i| {
-            if string_array.is_null(i) {
-                None
-            } else {
-                string_to_timestamp_nanos(string_array.value(i)).ok()
-            }
-        });
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { TimestampNanosecondArray::from_trusted_len_iter(iter) }
-    } else {
-        let vec = (0..string_array.len())
-            .map(|i| {
-                if string_array.is_null(i) {
-                    Ok(None)
-                } else {
-                    let result = string_to_timestamp_nanos(string_array.value(i));
-                    Some(result).transpose()
-                }
-            })
-            .collect::<Result<Vec<Option<i64>>>>()?;
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { TimestampNanosecondArray::from_trusted_len_iter(vec.iter()) }
-    };
-
-    Ok(Arc::new(array) as ArrayRef)
-}
-
-/// Cast numeric types to Boolean
-///
-/// Any zero value returns `false` while non-zero returns `true`
-fn cast_numeric_to_bool<FROM>(from: &ArrayRef) -> Result<ArrayRef>
-where
-    FROM: ArrowNumericType,
-{
-    numeric_to_bool_cast::<FROM>(
-        from.as_any()
-            .downcast_ref::<PrimitiveArray<FROM>>()
-            .unwrap(),
-    )
-    .map(|to| Arc::new(to) as ArrayRef)
-}
-
-fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowPrimitiveType + ArrowNumericType,
-{
-    let mut b = BooleanBuilder::new(from.len());
-
-    for i in 0..from.len() {
-        if from.is_null(i) {
-            b.append_null()?;
-        } else if from.value(i) != T::default_value() {
-            b.append_value(true)?;
-        } else {
-            b.append_value(false)?;
-        }
-    }
-
-    Ok(b.finish())
-}
-
-/// Cast Boolean types to numeric
-///
-/// `false` returns 0 while `true` returns 1
-#[allow(clippy::unnecessary_wraps)]
-fn cast_bool_to_numeric<TO>(
-    from: &ArrayRef,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    TO: ArrowNumericType,
-    TO::Native: num::cast::NumCast,
-{
-    Ok(Arc::new(bool_to_numeric_cast::<TO>(
-        from.as_any().downcast_ref::<BooleanArray>().unwrap(),
-        cast_options,
-    )))
-}
-
-fn bool_to_numeric_cast<T>(
-    from: &BooleanArray,
-    _cast_options: &CastOptions,
-) -> PrimitiveArray<T>
-where
-    T: ArrowNumericType,
-    T::Native: num::NumCast,
-{
-    let iter = (0..from.len()).map(|i| {
-        if from.is_null(i) {
-            None
-        } else if from.value(i) {
-            // a workaround to cast a primitive to T::Native, infallible
-            num::cast::cast(1)
-        } else {
-            Some(T::default_value())
-        }
-    });
-    // Benefit:
-    //     20% performance improvement
-    // Soundness:
-    //     The iterator is trustedLen because it comes from a Range
-    unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) }
-}
-
-/// Attempts to cast an `ArrayDictionary` with index type K into
-/// `to_type` for supported types.
-///
-/// K is the key type
-fn dictionary_cast<K: ArrowDictionaryKeyType>(
-    array: &ArrayRef,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use DataType::*;
-
-    match to_type {
-        Dictionary(to_index_type, to_value_type) => {
-            let dict_array = array
-                .as_any()
-                .downcast_ref::<DictionaryArray<K>>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
-                    )
-                })?;
-
-            let keys_array: ArrayRef = Arc::new(dict_array.keys_array());
-            let values_array: ArrayRef = dict_array.values();
-            let cast_keys = cast_with_options(&keys_array, to_index_type, &cast_options)?;
-            let cast_values =
-                cast_with_options(&values_array, to_value_type, &cast_options)?;
-
-            // Failure to cast keys (because they don't fit in the
-            // target type) results in NULL values;
-            if cast_keys.null_count() > keys_array.null_count() {
-                return Err(ArrowError::ComputeError(format!(
-                    "Could not convert {} dictionary indexes from {:?} to {:?}",
-                    cast_keys.null_count() - keys_array.null_count(),
-                    keys_array.data_type(),
-                    to_index_type
-                )));
-            }
-
-            // keys are data, child_data is values (dictionary)
-            let data = ArrayData::new(
-                to_type.clone(),
-                cast_keys.len(),
-                Some(cast_keys.null_count()),
-                cast_keys
-                    .data()
-                    .null_bitmap()
-                    .clone()
-                    .map(|bitmap| bitmap.bits),
-                cast_keys.data().offset(),
-                cast_keys.data().buffers().to_vec(),
-                vec![cast_values.data().clone()],
-            );
-
-            // create the appropriate array type
-            let new_array: ArrayRef = match **to_index_type {
-                Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
-                Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
-                Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
-                Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
-                UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
-                UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
-                UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
-                UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
-                _ => {
-                    return Err(ArrowError::CastError(format!(
-                        "Unsupported type {:?} for dictionary index",
-                        to_index_type
-                    )))
-                }
-            };
-
-            Ok(new_array)
-        }
-        _ => unpack_dictionary::<K>(array, to_type, cast_options),
-    }
-}
-
-// Unpack a dictionary where the keys are of type <K> into a flattened array of type to_type
-fn unpack_dictionary<K>(
-    array: &ArrayRef,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    K: ArrowDictionaryKeyType,
-{
-    let dict_array = array
-        .as_any()
-        .downcast_ref::<DictionaryArray<K>>()
-        .ok_or_else(|| {
-            ArrowError::ComputeError(
-                "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
-            )
-        })?;
-
-    // attempt to cast the dict values to the target type
-    // use the take kernel to expand out the dictionary
-    let cast_dict_values =
-        cast_with_options(&dict_array.values(), to_type, cast_options)?;
-
-    // Note take requires first casting the indices to u32
-    let keys_array: ArrayRef = Arc::new(dict_array.keys_array());
-    let indicies = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?;
-    let u32_indicies =
-        indicies
-            .as_any()
-            .downcast_ref::<UInt32Array>()
-            .ok_or_else(|| {
-                ArrowError::ComputeError(
-                    "Internal Error: Cannot cast dict indices to UInt32".to_string(),
-                )
-            })?;
-
-    take(cast_dict_values.as_ref(), u32_indicies, None)
-}
-
-/// Attempts to encode an array into an `ArrayDictionary` with index
-/// type K and value (dictionary) type value_type
-///
-/// K is the key type
-fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
-    array: &ArrayRef,
-    dict_value_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use DataType::*;
-
-    match *dict_value_type {
-        Int8 => pack_numeric_to_dictionary::<K, Int8Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int16 => pack_numeric_to_dictionary::<K, Int16Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int32 => pack_numeric_to_dictionary::<K, Int32Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int64 => pack_numeric_to_dictionary::<K, Int64Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Utf8 => pack_string_to_dictionary::<K>(array, cast_options),
-        _ => Err(ArrowError::CastError(format!(
-            "Unsupported output type for dictionary packing: {:?}",
-            dict_value_type
-        ))),
-    }
-}
-
-// Packs the data from the primitive array of type <V> to a
-// DictionaryArray with keys of type K and values of value_type V
-fn pack_numeric_to_dictionary<K, V>(
-    array: &ArrayRef,
-    dict_value_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    K: ArrowDictionaryKeyType,
-    V: ArrowNumericType,
-{
-    // attempt to cast the source array values to the target value type (the dictionary values type)
-    let cast_values = cast_with_options(array, &dict_value_type, cast_options)?;
-    let values = cast_values
-        .as_any()
-        .downcast_ref::<PrimitiveArray<V>>()
-        .unwrap();
-
-    let keys_builder = PrimitiveBuilder::<K>::new(values.len());
-    let values_builder = PrimitiveBuilder::<V>::new(values.len());
-    let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-
-    // copy each element one at a time
-    for i in 0..values.len() {
-        if values.is_null(i) {
-            b.append_null()?;
-        } else {
-            b.append(values.value(i))?;
-        }
-    }
-    Ok(Arc::new(b.finish()))
-}
-
-// Packs the data as a StringDictionaryArray, if possible, with the
-// key types of K
-fn pack_string_to_dictionary<K>(
-    array: &ArrayRef,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    K: ArrowDictionaryKeyType,
-{
-    let cast_values = cast_with_options(array, &DataType::Utf8, cast_options)?;
-    let values = cast_values.as_any().downcast_ref::<StringArray>().unwrap();
-
-    let keys_builder = PrimitiveBuilder::<K>::new(values.len());
-    let values_builder = StringBuilder::new(values.len());
-    let mut b = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-    // copy each element one at a time
-    for i in 0..values.len() {
-        if values.is_null(i) {
-            b.append_null()?;
-        } else {
-            b.append(values.value(i))?;
-        }
-    }
-    Ok(Arc::new(b.finish()))
-}
-
-/// Helper function that takes a primitive array and casts to a (generic) list array.
-fn cast_primitive_to_list<OffsetSize: OffsetSizeTrait + NumCast>(
-    array: &ArrayRef,
-    to: &Field,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    // cast primitive to list's primitive
-    let cast_array = cast_with_options(array, to.data_type(), cast_options)?;
-    // create offsets, where if array.len() = 2, we have [0,1,2]
-    // Safety:
-    // Length of range can be trusted.
-    // Note: could not yet create a generic range in stable Rust.
-    let offsets = unsafe {
-        MutableBuffer::from_trusted_len_iter(
-            (0..=array.len()).map(|i| OffsetSize::from(i).expect("integer")),
-        )
-    };
-
-    let list_data = ArrayData::new(
-        to_type.clone(),
-        array.len(),
-        Some(cast_array.null_count()),
-        cast_array
-            .data()
-            .null_bitmap()
-            .clone()
-            .map(|bitmap| bitmap.bits),
-        0,
-        vec![offsets.into()],
-        vec![cast_array.data().clone()],
-    );
-    let list_array =
-        Arc::new(GenericListArray::<OffsetSize>::from(list_data)) as ArrayRef;
-
-    Ok(list_array)
-}
-
-/// Helper function that takes an Generic list container and casts the inner datatype.
-fn cast_list_inner<OffsetSize: OffsetSizeTrait>(
-    array: &Arc<dyn Array>,
-    to: &Field,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    let data = array.data_ref();
-    let underlying_array = make_array(data.child_data()[0].clone());
-    let cast_array = cast_with_options(&underlying_array, to.data_type(), cast_options)?;
-    let array_data = ArrayData::new(
-        to_type.clone(),
-        array.len(),
-        Some(cast_array.null_count()),
-        cast_array
-            .data()
-            .null_bitmap()
-            .clone()
-            .map(|bitmap| bitmap.bits),
-        array.offset(),
-        // reuse offset buffer
-        data.buffers().to_vec(),
-        vec![cast_array.data().clone()],
-    );
-    let list = GenericListArray::<OffsetSize>::from(array_data);
-    Ok(Arc::new(list) as ArrayRef)
-}
-
-/// Helper function to cast from `Utf8` to `LargeUtf8` and vice versa. If the `LargeUtf8` is too large for
-/// a `Utf8` array it will return an Error.
-fn cast_str_container<OffsetSizeFrom, OffsetSizeTo>(array: &dyn Array) -> Result<ArrayRef>
-where
-    OffsetSizeFrom: StringOffsetSizeTrait + ToPrimitive,
-    OffsetSizeTo: StringOffsetSizeTrait + NumCast + ArrowNativeType,
-{
-    let str_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<OffsetSizeFrom>>()
-        .unwrap();
-    let list_data = array.data();
-    let str_values_buf = str_array.value_data();
-
-    let offsets = unsafe { list_data.buffers()[0].typed_data::<OffsetSizeFrom>() };
-
-    let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
-    offsets.iter().try_for_each::<_, Result<_>>(|offset| {
-        let offset = OffsetSizeTo::from(*offset).ok_or_else(|| {
-            ArrowError::ComputeError(
-                "large-utf8 array too large to cast to utf8-array".into(),
-            )
-        })?;
-        offset_builder.append(offset);
-        Ok(())
-    })?;
-
-    let offset_buffer = offset_builder.finish();
-
-    let dtype = if matches!(std::mem::size_of::<OffsetSizeTo>(), 8) {
-        DataType::LargeUtf8
-    } else {
-        DataType::Utf8
-    };
-
-    let mut builder = ArrayData::builder(dtype)
-        .len(array.len())
-        .add_buffer(offset_buffer)
-        .add_buffer(str_values_buf);
-
-    if let Some(buf) = list_data.null_buffer() {
-        builder = builder.null_bit_buffer(buf.clone())
-    }
-    let data = builder.build();
-    Ok(Arc::new(GenericStringArray::<OffsetSizeTo>::from(data)))
-}
-
-/// Cast the container type of List/Largelist array but not the inner types.
-/// This function can leave the value data intact and only has to cast the offset dtypes.
-fn cast_list_container<OffsetSizeFrom, OffsetSizeTo>(
-    array: &dyn Array,
-    _cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
-    OffsetSizeTo: OffsetSizeTrait + NumCast,
-{
-    let data = array.data_ref();
-    // the value data stored by the list
-    let value_data = data.child_data()[0].clone();
-
-    let out_dtype = match array.data_type() {
-        DataType::List(value_type) => {
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeFrom>(),
-                std::mem::size_of::<i32>()
-            );
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeTo>(),
-                std::mem::size_of::<i64>()
-            );
-            DataType::LargeList(value_type.clone())
-        }
-        DataType::LargeList(value_type) => {
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeFrom>(),
-                std::mem::size_of::<i64>()
-            );
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeTo>(),
-                std::mem::size_of::<i32>()
-            );
-            if value_data.len() > i32::MAX as usize {
-                return Err(ArrowError::ComputeError(
-                    "LargeList too large to cast to List".into(),
-                ));
-            }
-            DataType::List(value_type.clone())
-        }
-        // implementation error
-        _ => unreachable!(),
-    };
-
-    let offsets = data.buffer::<OffsetSizeFrom>(0);
-
-    let iter = offsets.iter().map(|idx| {
-        let idx: OffsetSizeTo = NumCast::from(*idx).unwrap();
-        idx
-    });
-
-    // SAFETY
-    //      A slice produces a trusted length iterator
-    let offset_buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
-
-    // wrap up
-    let mut builder = ArrayData::builder(out_dtype)
-        .len(array.len())
-        .add_buffer(offset_buffer)
-        .add_child_data(value_data);
-
-    if let Some(buf) = data.null_buffer() {
-        builder = builder.null_bit_buffer(buf.clone())
-    }
-    let data = builder.build();
-    Ok(make_array(data))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{buffer::Buffer, util::display::array_value_to_string};
-
-    #[test]
-    fn test_cast_i32_to_f64() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Float64).unwrap();
-        let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert!(5.0 - c.value(0) < f64::EPSILON);
-        assert!(6.0 - c.value(1) < f64::EPSILON);
-        assert!(7.0 - c.value(2) < f64::EPSILON);
-        assert!(8.0 - c.value(3) < f64::EPSILON);
-        assert!(9.0 - c.value(4) < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_cast_i32_to_u8() {
-        let a = Int32Array::from(vec![-5, 6, -7, 8, 100000000]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::UInt8).unwrap();
-        let c = b.as_any().downcast_ref::<UInt8Array>().unwrap();
-        assert_eq!(false, c.is_valid(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(false, c.is_valid(2));
-        assert_eq!(8, c.value(3));
-        // overflows return None
-        assert_eq!(false, c.is_valid(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_u8_sliced() {
-        let a = Int32Array::from(vec![-5, 6, -7, 8, 100000000]);
-        let array = Arc::new(a) as ArrayRef;
-        assert_eq!(0, array.offset());
-        let array = array.slice(2, 3);
-        assert_eq!(2, array.offset());
-        let b = cast(&array, &DataType::UInt8).unwrap();
-        assert_eq!(3, b.len());
-        assert_eq!(0, b.offset());
-        let c = b.as_any().downcast_ref::<UInt8Array>().unwrap();
-        assert_eq!(false, c.is_valid(0));
-        assert_eq!(8, c.value(1));
-        // overflows return None
-        assert_eq!(false, c.is_valid(2));
-    }
-
-    #[test]
-    fn test_cast_i32_to_i32() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_list_i32() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-        )
-        .unwrap();
-        assert_eq!(5, b.len());
-        let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&[0, 1, 2, 3, 4, 5], arr.value_offsets());
-        assert_eq!(1, arr.value_length(0));
-        assert_eq!(1, arr.value_length(1));
-        assert_eq!(1, arr.value_length(2));
-        assert_eq!(1, arr.value_length(3));
-        assert_eq!(1, arr.value_length(4));
-        let values = arr.values();
-        let c = values.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_list_i32_nullable() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-        )
-        .unwrap();
-        assert_eq!(5, b.len());
-        assert_eq!(1, b.null_count());
-        let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&[0, 1, 2, 3, 4, 5], arr.value_offsets());
-        assert_eq!(1, arr.value_length(0));
-        assert_eq!(1, arr.value_length(1));
-        assert_eq!(1, arr.value_length(2));
-        assert_eq!(1, arr.value_length(3));
-        assert_eq!(1, arr.value_length(4));
-        let values = arr.values();
-        let c = values.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, c.null_count());
-        assert_eq!(5, c.value(0));
-        assert_eq!(false, c.is_valid(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_list_f64_nullable_sliced() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), Some(8), None, Some(10)]);
-        let array = Arc::new(a) as ArrayRef;
-        let array = array.slice(2, 4);
-        let b = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-        )
-        .unwrap();
-        assert_eq!(4, b.len());
-        assert_eq!(1, b.null_count());
-        let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&[0, 1, 2, 3, 4], arr.value_offsets());
-        assert_eq!(1, arr.value_length(0));
-        assert_eq!(1, arr.value_length(1));
-        assert_eq!(1, arr.value_length(2));
-        assert_eq!(1, arr.value_length(3));
-        let values = arr.values();
-        let c = values.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert_eq!(1, c.null_count());
-        assert!(7.0 - c.value(0) < f64::EPSILON);
-        assert!(8.0 - c.value(1) < f64::EPSILON);
-        assert_eq!(false, c.is_valid(2));
-        assert!(10.0 - c.value(3) < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_cast_utf8_to_i32() {
-        let a = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(false, c.is_valid(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(false, c.is_valid(4));
-    }
-
-    #[test]
-    fn test_cast_with_options_utf8_to_i32() {
-        let a = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
-        let array = Arc::new(a) as ArrayRef;
-        let result =
-            cast_with_options(&array, &DataType::Int32, &CastOptions { safe: false });
-        match result {
-            Ok(_) => panic!("expected error"),
-            Err(e) => {
-                assert!(e.to_string().contains(
-                    "Cast error: Cannot cast string 'seven' to value of arrow::datatypes::types::Int32Type type"
-                ))
-            }
-        }
-    }
-
-    #[test]
-    fn test_cast_bool_to_i32() {
-        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, c.value(0));
-        assert_eq!(0, c.value(1));
-        assert_eq!(false, c.is_valid(2));
-    }
-
-    #[test]
-    fn test_cast_bool_to_f64() {
-        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Float64).unwrap();
-        let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert!(1.0 - c.value(0) < f64::EPSILON);
-        assert!(0.0 - c.value(1) < f64::EPSILON);
-        assert_eq!(false, c.is_valid(2));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
-    )]
-    fn test_cast_int32_to_timestamp() {
-        let a = Int32Array::from(vec![Some(2), Some(10), None]);
-        let array = Arc::new(a) as ArrayRef;
-        cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
-    }
-
-    #[test]
-    fn test_cast_list_i32_to_list_u16() {
-        // Construct a value array
-        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000])
-            .data()
-            .clone();
-
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
-
-        let cast_array = cast(
-            &list_array,
-            &DataType::List(Box::new(Field::new("item", DataType::UInt16, true))),
-        )
-        .unwrap();
-        // 3 negative values should get lost when casting to unsigned,
-        // 1 value should overflow
-        assert_eq!(4, cast_array.null_count());
-        // offsets should be the same
-        assert_eq!(
-            list_array.data().buffers().to_vec(),
-            cast_array.data().buffers().to_vec()
-        );
-        let array = cast_array
-            .as_ref()
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        assert_eq!(DataType::UInt16, array.value_type());
-        assert_eq!(4, array.values().null_count());
-        assert_eq!(3, array.value_length(0));
-        assert_eq!(3, array.value_length(1));
-        assert_eq!(2, array.value_length(2));
-        let values = array.values();
-        let u16arr = values.as_any().downcast_ref::<UInt16Array>().unwrap();
-        assert_eq!(8, u16arr.len());
-        assert_eq!(4, u16arr.null_count());
-
-        assert_eq!(0, u16arr.value(0));
-        assert_eq!(0, u16arr.value(1));
-        assert_eq!(0, u16arr.value(2));
-        assert_eq!(false, u16arr.is_valid(3));
-        assert_eq!(false, u16arr.is_valid(4));
-        assert_eq!(false, u16arr.is_valid(5));
-        assert_eq!(2, u16arr.value(6));
-        assert_eq!(false, u16arr.is_valid(7));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
-    )]
-    fn test_cast_list_i32_to_list_timestamp() {
-        // Construct a value array
-        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000])
-            .data()
-            .clone();
-
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 9]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
-
-        cast(
-            &list_array,
-            &DataType::List(Box::new(Field::new(
-                "item",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                true,
-            ))),
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_cast_date32_to_date64() {
-        let a = Date32Array::from(vec![10000, 17890]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date64).unwrap();
-        let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
-        assert_eq!(864000000000, c.value(0));
-        assert_eq!(1545696000000, c.value(1));
-    }
-
-    #[test]
-    fn test_cast_date64_to_date32() {
-        let a = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_string_to_timestamp() {
-        let a1 = Arc::new(StringArray::from(vec![
-            Some("2020-09-08T12:00:00+00:00"),
-            Some("Not a valid date"),
-            None,
-        ])) as ArrayRef;
-        let a2 = Arc::new(LargeStringArray::from(vec![
-            Some("2020-09-08T12:00:00+00:00"),
-            Some("Not a valid date"),
-            None,
-        ])) as ArrayRef;
-        for array in &[a1, a2] {
-            let b =
-                cast(array, &DataType::Timestamp(TimeUnit::Nanosecond, None)).unwrap();
-            let c = b
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            assert_eq!(1599566400000000000, c.value(0));
-            assert!(c.is_null(1));
-            assert!(c.is_null(2));
-        }
-    }
-
-    #[test]
-    fn test_cast_date32_to_int32() {
-        let a = Date32Array::from(vec![10000, 17890]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-    }
-
-    #[test]
-    fn test_cast_int32_to_date32() {
-        let a = Int32Array::from(vec![10000, 17890]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-    }
-
-    #[test]
-    fn test_cast_timestamp_to_date32() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000000005), Some(1545696000001), None],
-            Some(String::from("UTC")),
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_timestamp_to_date64() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000000005), Some(1545696000001), None],
-            None,
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date64).unwrap();
-        let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
-        assert_eq!(864000000005, c.value(0));
-        assert_eq!(1545696000001, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_timestamp_to_i64() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000000005), Some(1545696000001), None],
-            Some("UTC".to_string()),
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int64).unwrap();
-        let c = b.as_any().downcast_ref::<Int64Array>().unwrap();
-        assert_eq!(&DataType::Int64, c.data_type());
-        assert_eq!(864000000005, c.value(0));
-        assert_eq!(1545696000001, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_between_timestamps() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000003005), Some(1545696002001), None],
-            None,
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
-        let c = b.as_any().downcast_ref::<TimestampSecondArray>().unwrap();
-        assert_eq!(864000003, c.value(0));
-        assert_eq!(1545696002, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_to_strings() {
-        let a = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
-        let out = cast(&a, &DataType::Utf8).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(out, vec![Some("1"), Some("2"), Some("3")]);
-        let out = cast(&a, &DataType::LargeUtf8).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<LargeStringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(out, vec![Some("1"), Some("2"), Some("3")]);
-    }
-
-    #[test]
-    fn test_str_to_str_casts() {
-        for data in vec![
-            vec![Some("foo"), Some("bar"), Some("ham")],
-            vec![Some("foo"), None, Some("bar")],
-        ] {
-            let a = Arc::new(LargeStringArray::from(data.clone())) as ArrayRef;
-            let to = cast(&a, &DataType::Utf8).unwrap();
-            let expect = a
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            let out = to
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            assert_eq!(expect, out);
-
-            let a = Arc::new(StringArray::from(data)) as ArrayRef;
-            let to = cast(&a, &DataType::LargeUtf8).unwrap();
-            let expect = a
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            let out = to
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            assert_eq!(expect, out);
-        }
-    }
-
-    #[test]
-    fn test_cast_from_f64() {
-        let f64_values: Vec<f64> = vec![
-            std::i64::MIN as f64,
-            std::i32::MIN as f64,
-            std::i16::MIN as f64,
-            std::i8::MIN as f64,
-            0_f64,
-            std::u8::MAX as f64,
-            std::u16::MAX as f64,
-            std::u32::MAX as f64,
-            std::u64::MAX as f64,
-        ];
-        let f64_array: ArrayRef = Arc::new(Float64Array::from(f64_values));
-
-        let f64_expected = vec![
-            "-9223372036854776000.0",
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967295.0",
-            "18446744073709552000.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&f64_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-9223372000000000000.0",
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967300.0",
-            "18446744000000000000.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&f64_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec![
-            "-9223372036854775808",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "4294967295",
-            "null",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&f64_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec![
-            "null",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "null",
-            "null",
-        ];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&f64_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec![
-            "null", "null", "-32768", "-128", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&f64_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec![
-            "null", "null", "null", "-128", "0", "null", "null", "null", "null",
-        ];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&f64_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "255",
-            "65535",
-            "4294967295",
-            "null",
-        ];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&f64_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "255",
-            "65535",
-            "4294967295",
-            "null",
-        ];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&f64_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec![
-            "null", "null", "null", "null", "0", "255", "65535", "null", "null",
-        ];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&f64_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec![
-            "null", "null", "null", "null", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&f64_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_f32() {
-        let f32_values: Vec<f32> = vec![
-            std::i32::MIN as f32,
-            std::i32::MIN as f32,
-            std::i16::MIN as f32,
-            std::i8::MIN as f32,
-            0_f32,
-            std::u8::MAX as f32,
-            std::u16::MAX as f32,
-            std::u32::MAX as f32,
-            std::u32::MAX as f32,
-        ];
-        let f32_array: ArrayRef = Arc::new(Float32Array::from(f32_values));
-
-        let f64_expected = vec![
-            "-2147483648.0",
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967296.0",
-            "4294967296.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&f32_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-2147483600.0",
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967300.0",
-            "4294967300.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&f32_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec![
-            "-2147483648",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "4294967296",
-            "4294967296",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&f32_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec![
-            "-2147483648",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "null",
-            "null",
-        ];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&f32_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec![
-            "null", "null", "-32768", "-128", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&f32_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec![
-            "null", "null", "null", "-128", "0", "null", "null", "null", "null",
-        ];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&f32_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "255",
-            "65535",
-            "4294967296",
-            "4294967296",
-        ];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&f32_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec![
-            "null", "null", "null", "null", "0", "255", "65535", "null", "null",
-        ];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&f32_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec![
-            "null", "null", "null", "null", "0", "255", "65535", "null", "null",
-        ];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&f32_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec![
-            "null", "null", "null", "null", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&f32_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint64() {
-        let u64_values: Vec<u64> = vec![
-            0,
-            std::u8::MAX as u64,
-            std::u16::MAX as u64,
-            std::u32::MAX as u64,
-            std::u64::MAX,
-        ];
-        let u64_array: ArrayRef = Arc::new(UInt64Array::from(u64_values));
-
-        let f64_expected = vec![
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967295.0",
-            "18446744073709552000.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u64_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967300.0",
-            "18446744000000000000.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u64_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255", "65535", "4294967295", "null"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u64_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255", "65535", "null", "null"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u64_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255", "null", "null", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u64_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null", "null", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u64_array, &DataType::Int8)
-        );
-
-        let u64_expected =
-            vec!["0", "255", "65535", "4294967295", "18446744073709551615"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u64_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255", "65535", "4294967295", "null"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u64_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255", "65535", "null", "null"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u64_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255", "null", "null", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u64_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint32() {
-        let u32_values: Vec<u32> = vec![
-            0,
-            std::u8::MAX as u32,
-            std::u16::MAX as u32,
-            std::u32::MAX as u32,
-        ];
-        let u32_array: ArrayRef = Arc::new(UInt32Array::from(u32_values));
-
-        let f64_expected = vec!["0.0", "255.0", "65535.0", "4294967295.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u32_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["0.0", "255.0", "65535.0", "4294967300.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u32_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255", "65535", "4294967295"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u32_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255", "65535", "null"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u32_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255", "null", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u32_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u32_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["0", "255", "65535", "4294967295"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u32_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255", "65535", "4294967295"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u32_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255", "65535", "null"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u32_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255", "null", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u32_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint16() {
-        let u16_values: Vec<u16> = vec![0, std::u8::MAX as u16, std::u16::MAX as u16];
-        let u16_array: ArrayRef = Arc::new(UInt16Array::from(u16_values));
-
-        let f64_expected = vec!["0.0", "255.0", "65535.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u16_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["0.0", "255.0", "65535.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u16_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u16_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u16_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u16_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u16_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u16_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u16_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u16_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u16_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint8() {
-        let u8_values: Vec<u8> = vec![0, std::u8::MAX];
-        let u8_array: ArrayRef = Arc::new(UInt8Array::from(u8_values));
-
-        let f64_expected = vec!["0.0", "255.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u8_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["0.0", "255.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u8_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u8_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u8_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u8_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u8_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["0", "255"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u8_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u8_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u8_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u8_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int64() {
-        let i64_values: Vec<i64> = vec![
-            std::i64::MIN,
-            std::i32::MIN as i64,
-            std::i16::MIN as i64,
-            std::i8::MIN as i64,
-            0,
-            std::i8::MAX as i64,
-            std::i16::MAX as i64,
-            std::i32::MAX as i64,
-            std::i64::MAX,
-        ];
-        let i64_array: ArrayRef = Arc::new(Int64Array::from(i64_values));
-
-        let f64_expected = vec![
-            "-9223372036854776000.0",
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483647.0",
-            "9223372036854776000.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i64_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-9223372000000000000.0",
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483600.0",
-            "9223372000000000000.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i64_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec![
-            "-9223372036854775808",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "9223372036854775807",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&i64_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec![
-            "null",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "null",
-        ];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&i64_array, &DataType::Int32)
-        );
-
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Date32Type>(&i64_array, &DataType::Date32)
-        );
-
-        let i16_expected = vec![
-            "null", "null", "-32768", "-128", "0", "127", "32767", "null", "null",
-        ];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i64_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec![
-            "null", "null", "null", "-128", "0", "127", "null", "null", "null",
-        ];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i64_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "9223372036854775807",
-        ];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i64_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "null",
-        ];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i64_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec![
-            "null", "null", "null", "null", "0", "127", "32767", "null", "null",
-        ];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i64_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec![
-            "null", "null", "null", "null", "0", "127", "null", "null", "null",
-        ];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i64_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int32() {
-        let i32_values: Vec<i32> = vec![
-            std::i32::MIN as i32,
-            std::i16::MIN as i32,
-            std::i8::MIN as i32,
-            0,
-            std::i8::MAX as i32,
-            std::i16::MAX as i32,
-            std::i32::MAX as i32,
-        ];
-        let i32_array: ArrayRef = Arc::new(Int32Array::from(i32_values));
-
-        let f64_expected = vec![
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483647.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i32_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483600.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i32_array, &DataType::Float32)
-        );
-
-        let i16_expected = vec!["null", "-32768", "-128", "0", "127", "32767", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i32_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["null", "null", "-128", "0", "127", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i32_array, &DataType::Int8)
-        );
-
-        let u64_expected =
-            vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i32_array, &DataType::UInt64)
-        );
-
-        let u32_expected =
-            vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i32_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["null", "null", "null", "0", "127", "32767", "null"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i32_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["null", "null", "null", "0", "127", "null", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i32_array, &DataType::UInt8)
-        );
-
-        // The date32 to date64 cast increases the numerical values in order to keep the same dates.
-        let i64_expected = vec![
-            "-185542587187200000",
-            "-2831155200000",
-            "-11059200000",
-            "0",
-            "10972800000",
-            "2831068800000",
-            "185542587100800000",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Date64Type>(&i32_array, &DataType::Date64)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int16() {
-        let i16_values: Vec<i16> = vec![
-            std::i16::MIN,
-            std::i8::MIN as i16,
-            0,
-            std::i8::MAX as i16,
-            std::i16::MAX,
-        ];
-        let i16_array: ArrayRef = Arc::new(Int16Array::from(i16_values));
-
-        let f64_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32767.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i16_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32767.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i16_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["-32768", "-128", "0", "127", "32767"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&i16_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["-32768", "-128", "0", "127", "32767"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&i16_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["-32768", "-128", "0", "127", "32767"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i16_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["null", "-128", "0", "127", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i16_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["null", "null", "0", "127", "32767"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i16_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["null", "null", "0", "127", "32767"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i16_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["null", "null", "0", "127", "32767"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i16_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["null", "null", "0", "127", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i16_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_date32() {
-        let i32_values: Vec<i32> = vec![
-            std::i32::MIN as i32,
-            std::i16::MIN as i32,
-            std::i8::MIN as i32,
-            0,
-            std::i8::MAX as i32,
-            std::i16::MAX as i32,
-            std::i32::MAX as i32,
-        ];
-        let date32_array: ArrayRef = Arc::new(Date32Array::from(i32_values));
-
-        let i64_expected = vec![
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&date32_array, &DataType::Int64)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int8() {
-        let i8_values: Vec<i8> = vec![std::i8::MIN, 0, std::i8::MAX];
-        let i8_array: ArrayRef = Arc::new(Int8Array::from(i8_values));
-
-        let f64_expected = vec!["-128.0", "0.0", "127.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i8_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["-128.0", "0.0", "127.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i8_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&i8_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&i8_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i8_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i8_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i8_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i8_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i8_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i8_array, &DataType::UInt8)
-        );
-    }
-
-    /// Convert `array` into a vector of strings by casting to data type dt
-    fn get_cast_values<T>(array: &ArrayRef, dt: &DataType) -> Vec<String>
-    where
-        T: ArrowNumericType,
-    {
-        let c = cast(&array, dt).unwrap();
-        let a = c.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-        let mut v: Vec<String> = vec![];
-        for i in 0..array.len() {
-            if a.is_null(i) {
-                v.push("null".to_string())
-            } else {
-                v.push(format!("{:?}", a.value(i)));
-            }
-        }
-        v
-    }
-
-    #[test]
-    fn test_cast_utf8_dict() {
-        // FROM a dictionary with of Utf8 values
-        use DataType::*;
-
-        let keys_builder = PrimitiveBuilder::<Int8Type>::new(10);
-        let values_builder = StringBuilder::new(10);
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-        builder.append("one").unwrap();
-        builder.append_null().unwrap();
-        builder.append("three").unwrap();
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let expected = vec!["one", "null", "three"];
-
-        // Test casting TO StringArray
-        let cast_type = Utf8;
-        let cast_array = cast(&array, &cast_type).expect("cast to UTF-8 failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        // Test casting TO Dictionary (with different index sizes)
-
-        let cast_type = Dictionary(Box::new(Int16), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(Int32), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(Int64), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt16), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt32), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt64), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-    }
-
-    #[test]
-    fn test_cast_dict_to_dict_bad_index_value_primitive() {
-        use DataType::*;
-        // test converting from an array that has indexes of a type
-        // that are out of bounds for a particular other kind of
-        // index.
-
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = PrimitiveBuilder::<Int64Type>::new(10);
-        let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-
-        // add 200 distinct values (which can be stored by a
-        // dictionary indexed by int32, but not a dictionary indexed
-        // with int8)
-        for i in 0..200 {
-            builder.append(i).unwrap();
-        }
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let res = cast(&array, &cast_type);
-        assert!(res.is_err());
-        let actual_error = format!("{:?}", res);
-        let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8";
-        assert!(
-            actual_error.contains(expected_error),
-            "did not find expected error '{}' in actual error '{}'",
-            actual_error,
-            expected_error
-        );
-    }
-
-    #[test]
-    fn test_cast_dict_to_dict_bad_index_value_utf8() {
-        use DataType::*;
-        // Same test as test_cast_dict_to_dict_bad_index_value but use
-        // string values (and encode the expected behavior here);
-
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = StringBuilder::new(10);
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        // add 200 distinct values (which can be stored by a
-        // dictionary indexed by int32, but not a dictionary indexed
-        // with int8)
-        for i in 0..200 {
-            let val = format!("val{}", i);
-            builder.append(&val).unwrap();
-        }
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let res = cast(&array, &cast_type);
-        assert!(res.is_err());
-        let actual_error = format!("{:?}", res);
-        let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8";
-        assert!(
-            actual_error.contains(expected_error),
-            "did not find expected error '{}' in actual error '{}'",
-            actual_error,
-            expected_error
-        );
-    }
-
-    #[test]
-    fn test_cast_primitive_dict() {
-        // FROM a dictionary with of INT32 values
-        use DataType::*;
-
-        let keys_builder = PrimitiveBuilder::<Int8Type>::new(10);
-        let values_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-        builder.append(1).unwrap();
-        builder.append_null().unwrap();
-        builder.append(3).unwrap();
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let expected = vec!["1", "null", "3"];
-
-        // Test casting TO PrimitiveArray, different dictionary type
-        let cast_array = cast(&array, &Utf8).expect("cast to UTF-8 failed");
-        assert_eq!(array_to_strings(&cast_array), expected);
-        assert_eq!(cast_array.data_type(), &Utf8);
-
-        let cast_array = cast(&array, &Int64).expect("cast to int64 failed");
-        assert_eq!(array_to_strings(&cast_array), expected);
-        assert_eq!(cast_array.data_type(), &Int64);
-    }
-
-    #[test]
-    fn test_cast_primitive_array_to_dict() {
-        use DataType::*;
-
-        let mut builder = PrimitiveBuilder::<Int32Type>::new(10);
-        builder.append_value(1).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(3).unwrap();
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let expected = vec!["1", "null", "3"];
-
-        // Cast to a dictionary (same value type, Int32)
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Int32));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        // Cast to a dictionary (different value type, Int8)
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Int8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-    }
-
-    #[test]
-    fn test_cast_string_array_to_dict() {
-        use DataType::*;
-
-        let array = Arc::new(StringArray::from(vec![Some("one"), None, Some("three")]))
-            as ArrayRef;
-
-        let expected = vec!["one", "null", "three"];
-
-        // Cast to a dictionary (same value type, Utf8)
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-    }
-
-    #[test]
-    fn test_cast_null_array_to_int32() {
-        let array = Arc::new(NullArray::new(6)) as ArrayRef;
-
-        let expected = Int32Array::from(vec![None; 6]);
-
-        // Cast to a dictionary (same value type, Utf8)
-        let cast_type = DataType::Int32;
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        let cast_array = as_primitive_array::<Int32Type>(&cast_array);
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(cast_array, &expected);
-    }
-
-    /// Print the `DictionaryArray` `array` as a vector of strings
-    fn array_to_strings(array: &ArrayRef) -> Vec<String> {
-        (0..array.len())
-            .map(|i| {
-                if array.is_null(i) {
-                    "null".to_string()
-                } else {
-                    array_value_to_string(array, i).expect("Convert array to String")
-                }
-            })
-            .collect()
-    }
-
-    #[test]
-    fn test_cast_utf8_to_date32() {
-        use chrono::NaiveDate;
-        let from_ymd = chrono::NaiveDate::from_ymd;
-        let since = chrono::NaiveDate::signed_duration_since;
-
-        let a = StringArray::from(vec![
-            "2000-01-01",          // valid date with leading 0s
-            "2000-2-2",            // valid date without leading 0s
-            "2000-00-00",          // invalid month and day
-            "2000-01-01T12:00:00", // date + time is invalid
-            "2000",                // just a year is invalid
-        ]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-
-        // test valid inputs
-        let date_value = since(NaiveDate::from_ymd(2000, 1, 1), from_ymd(1970, 1, 1))
-            .num_days() as i32;
-        assert_eq!(true, c.is_valid(0)); // "2000-01-01"
-        assert_eq!(date_value, c.value(0));
-
-        let date_value = since(NaiveDate::from_ymd(2000, 2, 2), from_ymd(1970, 1, 1))
-            .num_days() as i32;
-        assert_eq!(true, c.is_valid(1)); // "2000-2-2"
-        assert_eq!(date_value, c.value(1));
-
-        // test invalid inputs
-        assert_eq!(false, c.is_valid(2)); // "2000-00-00"
-        assert_eq!(false, c.is_valid(3)); // "2000-01-01T12:00:00"
-        assert_eq!(false, c.is_valid(4)); // "2000"
-    }
-
-    #[test]
-    fn test_cast_utf8_to_date64() {
-        let a = StringArray::from(vec![
-            "2000-01-01T12:00:00", // date + time valid
-            "2020-12-15T12:34:56", // date + time valid
-            "2020-2-2T12:34:56",   // valid date time without leading 0s
-            "2000-00-00T12:00:00", // invalid month and day
-            "2000-01-01 12:00:00", // missing the 'T'
-            "2000-01-01",          // just a date is invalid
-        ]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date64).unwrap();
-        let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
-
-        // test valid inputs
-        assert_eq!(true, c.is_valid(0)); // "2000-01-01T12:00:00"
-        assert_eq!(946728000000, c.value(0));
-        assert_eq!(true, c.is_valid(1)); // "2020-12-15T12:34:56"
-        assert_eq!(1608035696000, c.value(1));
-        assert_eq!(true, c.is_valid(2)); // "2020-2-2T12:34:56"
-        assert_eq!(1580646896000, c.value(2));
-
-        // test invalid inputs
-        assert_eq!(false, c.is_valid(3)); // "2000-00-00T12:00:00"
-        assert_eq!(false, c.is_valid(4)); // "2000-01-01 12:00:00"
-        assert_eq!(false, c.is_valid(5)); // "2000-01-01"
-    }
-
-    #[test]
-    fn test_can_cast_types() {
-        // this function attempts to ensure that can_cast_types stays
-        // in sync with cast.  It simply tries all combinations of
-        // types and makes sure that if `can_cast_types` returns
-        // true, so does `cast`
-
-        let all_types = get_all_types();
-
-        for array in get_arrays_of_all_types() {
-            for to_type in &all_types {
-                println!("Test casting {:?} --> {:?}", array.data_type(), to_type);
-                let cast_result = cast(&array, &to_type);
-                let reported_cast_ability = can_cast_types(array.data_type(), to_type);
-
-                // check for mismatch
-                match (cast_result, reported_cast_ability) {
-                    (Ok(_), false) => {
-                        panic!("Was able to cast array {:?} from {:?} to {:?} but can_cast_types reported false",
-                               array, array.data_type(), to_type)
-                    }
-                    (Err(e), true) => {
-                        panic!("Was not able to cast array {:?} from {:?} to {:?} but can_cast_types reported true. \
-                                Error was {:?}",
-                               array, array.data_type(), to_type, e)
-                    }
-                    // otherwise it was a match
-                    _ => {}
-                };
-            }
-        }
-    }
-
-    #[test]
-    fn test_cast_list_containers() {
-        // large-list to list
-        let array = Arc::new(make_large_list_array()) as ArrayRef;
-        let list_array = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("", DataType::Int32, false))),
-        )
-        .unwrap();
-        let actual = list_array.as_any().downcast_ref::<ListArray>().unwrap();
-        let expected = array.as_any().downcast_ref::<LargeListArray>().unwrap();
-
-        assert_eq!(&expected.value(0), &actual.value(0));
-        assert_eq!(&expected.value(1), &actual.value(1));
-        assert_eq!(&expected.value(2), &actual.value(2));
-
-        // list to large-list
-        let array = Arc::new(make_list_array()) as ArrayRef;
-        let large_list_array = cast(
-            &array,
-            &DataType::LargeList(Box::new(Field::new("", DataType::Int32, false))),
-        )
-        .unwrap();
-        let actual = large_list_array
-            .as_any()
-            .downcast_ref::<LargeListArray>()
-            .unwrap();
-        let expected = array.as_any().downcast_ref::<ListArray>().unwrap();
-
-        assert_eq!(&expected.value(0), &actual.value(0));
-        assert_eq!(&expected.value(1), &actual.value(1));
-        assert_eq!(&expected.value(2), &actual.value(2));
-    }
-
-    /// Create instances of arrays with varying types for cast tests
-    fn get_arrays_of_all_types() -> Vec<ArrayRef> {
-        let tz_name = String::from("America/New_York");
-        let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
-        vec![
-            Arc::new(BinaryArray::from(binary_data.clone())),
-            Arc::new(LargeBinaryArray::from(binary_data.clone())),
-            make_dictionary_primitive::<Int8Type>(),
-            make_dictionary_primitive::<Int16Type>(),
-            make_dictionary_primitive::<Int32Type>(),
-            make_dictionary_primitive::<Int64Type>(),
-            make_dictionary_primitive::<UInt8Type>(),
-            make_dictionary_primitive::<UInt16Type>(),
-            make_dictionary_primitive::<UInt32Type>(),
-            make_dictionary_primitive::<UInt64Type>(),
-            make_dictionary_utf8::<Int8Type>(),
-            make_dictionary_utf8::<Int16Type>(),
-            make_dictionary_utf8::<Int32Type>(),
-            make_dictionary_utf8::<Int64Type>(),
-            make_dictionary_utf8::<UInt8Type>(),
-            make_dictionary_utf8::<UInt16Type>(),
-            make_dictionary_utf8::<UInt32Type>(),
-            make_dictionary_utf8::<UInt64Type>(),
-            Arc::new(make_list_array()),
-            Arc::new(make_large_list_array()),
-            Arc::new(make_fixed_size_list_array()),
-            Arc::new(make_fixed_size_binary_array()),
-            Arc::new(StructArray::from(vec![
-                (
-                    Field::new("a", DataType::Boolean, false),
-                    Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                        as Arc<Array>,
-                ),
-                (
-                    Field::new("b", DataType::Int32, false),
-                    Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
-                ),
-            ])),
-            //Arc::new(make_union_array()),
-            Arc::new(NullArray::new(10)),
-            Arc::new(StringArray::from(vec!["foo", "bar"])),
-            Arc::new(LargeStringArray::from(vec!["foo", "bar"])),
-            Arc::new(BooleanArray::from(vec![true, false])),
-            Arc::new(Int8Array::from(vec![1, 2])),
-            Arc::new(Int16Array::from(vec![1, 2])),
-            Arc::new(Int32Array::from(vec![1, 2])),
-            Arc::new(Int64Array::from(vec![1, 2])),
-            Arc::new(UInt8Array::from(vec![1, 2])),
-            Arc::new(UInt16Array::from(vec![1, 2])),
-            Arc::new(UInt32Array::from(vec![1, 2])),
-            Arc::new(UInt64Array::from(vec![1, 2])),
-            Arc::new(Float32Array::from(vec![1.0, 2.0])),
-            Arc::new(Float64Array::from(vec![1.0, 2.0])),
-            Arc::new(TimestampSecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampMillisecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampMicrosecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampNanosecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampSecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name.clone()),
-            )),
-            Arc::new(TimestampMillisecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name.clone()),
-            )),
-            Arc::new(TimestampMicrosecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name.clone()),
-            )),
-            Arc::new(TimestampNanosecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name),
-            )),
-            Arc::new(Date32Array::from(vec![1000, 2000])),
-            Arc::new(Date64Array::from(vec![1000, 2000])),
-            Arc::new(Time32SecondArray::from(vec![1000, 2000])),
-            Arc::new(Time32MillisecondArray::from(vec![1000, 2000])),
-            Arc::new(Time64MicrosecondArray::from(vec![1000, 2000])),
-            Arc::new(Time64NanosecondArray::from(vec![1000, 2000])),
-            Arc::new(IntervalYearMonthArray::from(vec![1000, 2000])),
-            Arc::new(IntervalDayTimeArray::from(vec![1000, 2000])),
-            Arc::new(DurationSecondArray::from(vec![1000, 2000])),
-            Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
-            Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
-            Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
-        ]
-    }
-
-    fn make_list_array() -> ListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data)
-    }
-
-    fn make_large_list_array() -> LargeListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        LargeListArray::from(list_data)
-    }
-
-    fn make_fixed_size_list_array() -> FixedSizeListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, true)),
-            2,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(5)
-            .add_child_data(value_data)
-            .build();
-        FixedSizeListArray::from(list_data)
-    }
-
-    fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
-        let values: [u8; 15] = *b"hellotherearrow";
-
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(3)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        FixedSizeBinaryArray::from(array_data)
-    }
-
-    fn make_union_array() -> UnionArray {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("b", 2).unwrap();
-        builder.build().unwrap()
-    }
-
-    /// Creates a dictionary with primitive dictionary values, and keys of type K
-    fn make_dictionary_primitive<K: ArrowDictionaryKeyType>() -> ArrayRef {
-        let keys_builder = PrimitiveBuilder::<K>::new(2);
-        // Pick Int32 arbitrarily for dictionary values
-        let values_builder = PrimitiveBuilder::<Int32Type>::new(2);
-        let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-        b.append(1).unwrap();
-        b.append(2).unwrap();
-        Arc::new(b.finish())
-    }
-
-    /// Creates a dictionary with utf8 values, and keys of type K
-    fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
-        let keys_builder = PrimitiveBuilder::<K>::new(2);
-        // Pick Int32 arbitrarily for dictionary values
-        let values_builder = StringBuilder::new(2);
-        let mut b = StringDictionaryBuilder::new(keys_builder, values_builder);
-        b.append("foo").unwrap();
-        b.append("bar").unwrap();
-        Arc::new(b.finish())
-    }
-
-    // Get a selection of datatypes to try and cast to
-    fn get_all_types() -> Vec<DataType> {
-        use DataType::*;
-        let tz_name = String::from("America/New_York");
-
-        vec![
-            Null,
-            Boolean,
-            Int8,
-            Int16,
-            Int32,
-            UInt64,
-            UInt8,
-            UInt16,
-            UInt32,
-            UInt64,
-            Float16,
-            Float32,
-            Float64,
-            Timestamp(TimeUnit::Second, None),
-            Timestamp(TimeUnit::Millisecond, None),
-            Timestamp(TimeUnit::Microsecond, None),
-            Timestamp(TimeUnit::Nanosecond, None),
-            Timestamp(TimeUnit::Second, Some(tz_name.clone())),
-            Timestamp(TimeUnit::Millisecond, Some(tz_name.clone())),
-            Timestamp(TimeUnit::Microsecond, Some(tz_name.clone())),
-            Timestamp(TimeUnit::Nanosecond, Some(tz_name)),
-            Date32,
-            Date64,
-            Time32(TimeUnit::Second),
-            Time32(TimeUnit::Millisecond),
-            Time64(TimeUnit::Microsecond),
-            Time64(TimeUnit::Nanosecond),
-            Duration(TimeUnit::Second),
-            Duration(TimeUnit::Millisecond),
-            Duration(TimeUnit::Microsecond),
-            Duration(TimeUnit::Nanosecond),
-            Interval(IntervalUnit::YearMonth),
-            Interval(IntervalUnit::DayTime),
-            Binary,
-            FixedSizeBinary(10),
-            LargeBinary,
-            Utf8,
-            LargeUtf8,
-            List(Box::new(Field::new("item", DataType::Int8, true))),
-            List(Box::new(Field::new("item", DataType::Utf8, true))),
-            FixedSizeList(Box::new(Field::new("item", DataType::Int8, true)), 10),
-            FixedSizeList(Box::new(Field::new("item", DataType::Utf8, false)), 10),
-            LargeList(Box::new(Field::new("item", DataType::Int8, true))),
-            LargeList(Box::new(Field::new("item", DataType::Utf8, false))),
-            Struct(vec![
-                Field::new("f1", DataType::Int32, false),
-                Field::new("f2", DataType::Utf8, true),
-            ]),
-            Union(vec![
-                Field::new("f1", DataType::Int32, false),
-                Field::new("f2", DataType::Utf8, true),
-            ]),
-            Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)),
-            Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
-            Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
-        ]
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/cast_utils.rs b/rust/arrow/src/compute/kernels/cast_utils.rs
deleted file mode 100644
index a06bf421ea4..00000000000
--- a/rust/arrow/src/compute/kernels/cast_utils.rs
+++ /dev/null
@@ -1,299 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::error::{ArrowError, Result};
-use chrono::{prelude::*, LocalResult};
-
-/// Accepts a string in RFC3339 / ISO8601 standard format and some
-/// variants and converts it to a nanosecond precision timestamp.
-///
-/// Implements the `to_timestamp` function to convert a string to a
-/// timestamp, following the model of spark SQL’s to_`timestamp`.
-///
-/// In addition to RFC3339 / ISO8601 standard timestamps, it also
-/// accepts strings that use a space ` ` to separate the date and time
-/// as well as strings that have no explicit timezone offset.
-///
-/// Examples of accepted inputs:
-/// * `1997-01-31T09:26:56.123Z`        # RCF3339
-/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
-/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
-/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
-/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
-/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
-//
-/// Internally, this function uses the `chrono` library for the
-/// datetime parsing
-///
-/// We hope to extend this function in the future with a second
-/// parameter to specifying the format string.
-///
-/// ## Timestamp Precision
-///
-/// Function uses the maximum precision timestamps supported by
-/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
-/// means the range of dates that timestamps can represent is ~1677 AD
-/// to 2262 AM
-///
-///
-/// ## Timezone / Offset Handling
-///
-/// Numerical values of timestamps are stored compared to offset UTC.
-///
-/// This function intertprets strings without an explicit time zone as
-/// timestamps with offsets of the local time on the machine
-///
-/// For example, `1997-01-31 09:26:56.123Z` is interpreted as UTC, as
-/// it has an explicit timezone specifier (“Z” for Zulu/UTC)
-///
-/// `1997-01-31T09:26:56.123` is interpreted as a local timestamp in
-/// the timezone of the machine. For example, if
-/// the system timezone is set to Americas/New_York (UTC-5) the
-/// timestamp will be interpreted as though it were
-/// `1997-01-31T09:26:56.123-05:00`
-#[inline]
-pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
-    // Fast path:  RFC3339 timestamp (with a T)
-    // Example: 2020-09-08T13:42:29.190855Z
-    if let Ok(ts) = DateTime::parse_from_rfc3339(s) {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Implement quasi-RFC3339 support by trying to parse the
-    // timestamp with various other format specifiers to to support
-    // separating the date and time with a space ' ' rather than 'T' to be
-    // (more) compatible with Apache Spark SQL
-
-    // timezone offset, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855-05:00
-    if let Ok(ts) = DateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f%:z") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // with an explicit Z, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29Z
-    if let Ok(ts) = Utc.datetime_from_str(s, "%Y-%m-%d %H:%M:%S%.fZ") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Support timestamps without an explicit timezone offset, again
-    // to be compatible with what Apache Spark SQL does.
-
-    // without a timezone specifier as a local time, using T as a separator
-    // Example: 2020-09-08T13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using T as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08T13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08 13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // Note we don't pass along the error message from the underlying
-    // chrono parsing because we tried several different format
-    // strings and we don't know which the user was trying to
-    // match. Ths any of the specific error messages is likely to be
-    // be more confusing than helpful
-    Err(ArrowError::CastError(format!(
-        "Error parsing '{}' as timestamp",
-        s
-    )))
-}
-
-/// Converts the naive datetime (which has no specific timezone) to a
-/// nanosecond epoch timestamp relative to UTC.
-fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64> {
-    let l = Local {};
-
-    match l.from_local_datetime(&datetime) {
-        LocalResult::None => Err(ArrowError::CastError(format!(
-            "Error parsing '{}' as timestamp: local time representation is invalid",
-            s
-        ))),
-        LocalResult::Single(local_datetime) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-        // Ambiguous times can happen if the timestamp is exactly when
-        // a daylight savings time transition occurs, for example, and
-        // so the datetime could validly be said to be in two
-        // potential offsets. However, since we are about to convert
-        // to UTC anyways, we can pick one arbitrarily
-        LocalResult::Ambiguous(local_datetime, _) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn string_to_timestamp_timezone() -> Result<()> {
-        // Explicit timezone
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08T13:42:29.190855+00:00")?
-        );
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08T13:42:29.190855Z")?
-        );
-        assert_eq!(
-            1599572549000000000,
-            parse_timestamp("2020-09-08T13:42:29Z")?
-        ); // no fractional part
-        assert_eq!(
-            1599590549190855000,
-            parse_timestamp("2020-09-08T13:42:29.190855-05:00")?
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn string_to_timestamp_timezone_space() -> Result<()> {
-        // Ensure space rather than T between time and date is accepted
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08 13:42:29.190855+00:00")?
-        );
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08 13:42:29.190855Z")?
-        );
-        assert_eq!(
-            1599572549000000000,
-            parse_timestamp("2020-09-08 13:42:29Z")?
-        ); // no fractional part
-        assert_eq!(
-            1599590549190855000,
-            parse_timestamp("2020-09-08 13:42:29.190855-05:00")?
-        );
-        Ok(())
-    }
-
-    /// Interprets a naive_datetime (with no explicit timzone offset)
-    /// using the local timezone and returns the timestamp in UTC (0
-    /// offset)
-    fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 {
-        // Note: Use chrono APIs that are different than
-        // naive_datetime_to_timestamp to compute the utc offset to
-        // try and double check the logic
-        let utc_offset_secs = match Local.offset_from_local_datetime(&naive_datetime) {
-            LocalResult::Single(local_offset) => {
-                local_offset.fix().local_minus_utc() as i64
-            }
-            _ => panic!("Unexpected failure converting to local datetime"),
-        };
-        let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
-        naive_datetime.timestamp_nanos() - utc_offset_nanos
-    }
-
-    #[test]
-    fn string_to_timestamp_no_timezone() -> Result<()> {
-        // This test is designed to succeed in regardless of the local
-        // timezone the test machine is running. Thus it is still
-        // somewhat suceptable to bugs in the use of chrono
-        let naive_datetime = NaiveDateTime::new(
-            NaiveDate::from_ymd(2020, 9, 8),
-            NaiveTime::from_hms_nano(13, 42, 29, 190855),
-        );
-
-        // Ensure both T and ' ' variants work
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime),
-            parse_timestamp("2020-09-08T13:42:29.190855")?
-        );
-
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime),
-            parse_timestamp("2020-09-08 13:42:29.190855")?
-        );
-
-        // Also ensure that parsing timestamps with no fractional
-        // second part works as well
-        let naive_datetime_whole_secs = NaiveDateTime::new(
-            NaiveDate::from_ymd(2020, 9, 8),
-            NaiveTime::from_hms(13, 42, 29),
-        );
-
-        // Ensure both T and ' ' variants work
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime_whole_secs),
-            parse_timestamp("2020-09-08T13:42:29")?
-        );
-
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime_whole_secs),
-            parse_timestamp("2020-09-08 13:42:29")?
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn string_to_timestamp_invalid() {
-        // Test parsing invalid formats
-
-        // It would be nice to make these messages better
-        expect_timestamp_parse_error("", "Error parsing '' as timestamp");
-        expect_timestamp_parse_error("SS", "Error parsing 'SS' as timestamp");
-        expect_timestamp_parse_error(
-            "Wed, 18 Feb 2015 23:16:09 GMT",
-            "Error parsing 'Wed, 18 Feb 2015 23:16:09 GMT' as timestamp",
-        );
-    }
-
-    // Parse a timestamp to timestamp int with a useful human readable error message
-    fn parse_timestamp(s: &str) -> Result<i64> {
-        let result = string_to_timestamp_nanos(s);
-        if let Err(e) = &result {
-            eprintln!("Error parsing timestamp '{}': {:?}", s, e);
-        }
-        result
-    }
-
-    fn expect_timestamp_parse_error(s: &str, expected_err: &str) {
-        match string_to_timestamp_nanos(s) {
-            Ok(v) => panic!(
-                "Expected error '{}' while parsing '{}', but parsed {} instead",
-                expected_err, s, v
-            ),
-            Err(e) => {
-                assert!(e.to_string().contains(expected_err),
-                        "Can not find expected error '{}' while parsing '{}'. Actual error '{}'",
-                        expected_err, s, e);
-            }
-        }
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs
deleted file mode 100644
index a770ede21dc..00000000000
--- a/rust/arrow/src/compute/kernels/comparison.rs
+++ /dev/null
@@ -1,1619 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines basic comparison kernels for [`PrimitiveArray`]s.
-//!
-//! These kernels can leverage SIMD if available on your system.  Currently no runtime
-//! detection is provided, you should enable the specific SIMD intrinsics using
-//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
-//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
-
-use regex::Regex;
-use std::collections::HashMap;
-
-use crate::array::*;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::compute::util::combine_option_bitmap;
-use crate::datatypes::{ArrowNumericType, DataType};
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-
-/// Helper function to perform boolean lambda function on values from two arrays, this
-/// version does not attempt to use SIMD.
-macro_rules! compare_op {
-    ($left: expr, $right:expr, $op:expr) => {{
-        if $left.len() != $right.len() {
-            return Err(ArrowError::ComputeError(
-                "Cannot perform comparison operation on arrays of different length"
-                    .to_string(),
-            ));
-        }
-
-        let null_bit_buffer =
-            combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
-
-        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right.value(i)));
-        // same size as $left.len() and $right.len()
-        let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(buffer)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-macro_rules! compare_op_primitive {
-    ($left: expr, $right:expr, $op:expr) => {{
-        if $left.len() != $right.len() {
-            return Err(ArrowError::ComputeError(
-                "Cannot perform comparison operation on arrays of different length"
-                    .to_string(),
-            ));
-        }
-
-        let null_bit_buffer =
-            combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
-
-        let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
-        let lhs_chunks_iter = $left.values().chunks_exact(8);
-        let lhs_remainder = lhs_chunks_iter.remainder();
-        let rhs_chunks_iter = $right.values().chunks_exact(8);
-        let rhs_remainder = rhs_chunks_iter.remainder();
-        let chunks = $left.len() / 8;
-
-        values[..chunks]
-            .iter_mut()
-            .zip(lhs_chunks_iter)
-            .zip(rhs_chunks_iter)
-            .for_each(|((byte, lhs), rhs)| {
-                lhs.iter()
-                    .zip(rhs.iter())
-                    .enumerate()
-                    .for_each(|(i, (&lhs, &rhs))| {
-                        *byte |= if $op(lhs, rhs) { 1 << i } else { 0 };
-                    });
-            });
-
-        if !lhs_remainder.is_empty() {
-            let last = &mut values[chunks];
-            lhs_remainder
-                .iter()
-                .zip(rhs_remainder.iter())
-                .enumerate()
-                .for_each(|(i, (&lhs, &rhs))| {
-                    *last |= if $op(lhs, rhs) { 1 << i } else { 0 };
-                });
-        };
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(values)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-macro_rules! compare_op_scalar {
-    ($left: expr, $right:expr, $op:expr) => {{
-        let null_bit_buffer = $left.data().null_buffer().cloned();
-
-        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right));
-        // same as $left.len()
-        let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(buffer)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-macro_rules! compare_op_scalar_primitive {
-    ($left: expr, $right:expr, $op:expr) => {{
-        let null_bit_buffer = $left.data().null_buffer().cloned();
-
-        let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
-        let lhs_chunks_iter = $left.values().chunks_exact(8);
-        let lhs_remainder = lhs_chunks_iter.remainder();
-        let chunks = $left.len() / 8;
-
-        values[..chunks]
-            .iter_mut()
-            .zip(lhs_chunks_iter)
-            .for_each(|(byte, chunk)| {
-                chunk.iter().enumerate().for_each(|(i, &c_i)| {
-                    *byte |= if $op(c_i, $right) { 1 << i } else { 0 };
-                });
-            });
-        if !lhs_remainder.is_empty() {
-            let last = &mut values[chunks];
-            lhs_remainder.iter().enumerate().for_each(|(i, &lhs)| {
-                *last |= if $op(lhs, $right) { 1 << i } else { 0 };
-            });
-        };
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(values)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-/// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified
-/// comparison function.
-pub fn no_simd_compare_op<T, F>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    op: F,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> bool,
-{
-    compare_op_primitive!(left, right, op)
-}
-
-/// Evaluate `op(left, right)` for [`PrimitiveArray`] and scalar using
-/// a specified comparison function.
-pub fn no_simd_compare_op_scalar<T, F>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-    op: F,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> bool,
-{
-    compare_op_scalar_primitive!(left, right, op)
-}
-
-/// Perform SQL `left LIKE right` operation on [`StringArray`] / [`LargeStringArray`].
-///
-/// There are two wildcards supported with the LIKE operator:
-///
-/// 1. `%` - The percent sign represents zero, one, or multiple characters
-/// 2. `_` - The underscore represents a single character
-///
-/// For example:
-/// ```
-/// use arrow::array::{StringArray, BooleanArray};
-/// use arrow::compute::like_utf8;
-///
-/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
-/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A."]);
-///
-/// let result = like_utf8(&strings, &patterns).unwrap();
-/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
-/// ```
-pub fn like_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    let mut map = HashMap::new();
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let mut result = BooleanBufferBuilder::new(left.len());
-    for i in 0..left.len() {
-        let haystack = left.value(i);
-        let pat = right.value(i);
-        let re = if let Some(ref regex) = map.get(pat) {
-            regex
-        } else {
-            let re_pattern = pat.replace("%", ".*").replace("_", ".");
-            let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-                ArrowError::ComputeError(format!(
-                    "Unable to build regex from LIKE pattern: {}",
-                    e
-                ))
-            })?;
-            map.insert(pat, re);
-            map.get(pat).unwrap()
-        };
-
-        result.append(re.is_match(haystack));
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.finish()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-fn is_like_pattern(c: char) -> bool {
-    c == '%' || c == '_'
-}
-
-/// Perform SQL `left LIKE right` operation on [`StringArray`] /
-/// [`LargeStringArray`] and a scalar.
-///
-/// See the documentation on [`like_utf8`] for more details.
-pub fn like_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let bytes = bit_util::ceil(left.len(), 8);
-    let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
-    if !right.contains(is_like_pattern) {
-        // fast path, can use equals
-        for i in 0..left.len() {
-            if left.value(i) == right {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use starts_with
-        let starts_with = &right[..right.len() - 1];
-        for i in 0..left.len() {
-            if left.value(i).starts_with(starts_with) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
-        // fast path, can use ends_with
-        let ends_with = &right[1..];
-        for i in 0..left.len() {
-            if left.value(i).ends_with(ends_with) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    } else {
-        let re_pattern = right.replace("%", ".*").replace("_", ".");
-        let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-            ArrowError::ComputeError(format!(
-                "Unable to build regex from LIKE pattern: {}",
-                e
-            ))
-        })?;
-
-        for i in 0..left.len() {
-            let haystack = left.value(i);
-            if re.is_match(haystack) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    };
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![bool_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
-/// [`LargeStringArray`].
-///
-/// See the documentation on [`like_utf8`] for more details.
-pub fn nlike_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    let mut map = HashMap::new();
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let mut result = BooleanBufferBuilder::new(left.len());
-    for i in 0..left.len() {
-        let haystack = left.value(i);
-        let pat = right.value(i);
-        let re = if let Some(ref regex) = map.get(pat) {
-            regex
-        } else {
-            let re_pattern = pat.replace("%", ".*").replace("_", ".");
-            let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-                ArrowError::ComputeError(format!(
-                    "Unable to build regex from LIKE pattern: {}",
-                    e
-                ))
-            })?;
-            map.insert(pat, re);
-            map.get(pat).unwrap()
-        };
-
-        result.append(!re.is_match(haystack));
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.finish()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
-/// [`LargeStringArray`] and a scalar.
-///
-/// See the documentation on [`like_utf8`] for more details.
-pub fn nlike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let mut result = BooleanBufferBuilder::new(left.len());
-
-    if !right.contains(is_like_pattern) {
-        // fast path, can use equals
-        for i in 0..left.len() {
-            result.append(left.value(i) != right);
-        }
-    } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use ends_with
-        for i in 0..left.len() {
-            result.append(!left.value(i).starts_with(&right[..right.len() - 1]));
-        }
-    } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
-        // fast path, can use starts_with
-        for i in 0..left.len() {
-            result.append(!left.value(i).ends_with(&right[1..]));
-        }
-    } else {
-        let re_pattern = right.replace("%", ".*").replace("_", ".");
-        let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-            ArrowError::ComputeError(format!(
-                "Unable to build regex from LIKE pattern: {}",
-                e
-            ))
-        })?;
-        for i in 0..left.len() {
-            let haystack = left.value(i);
-            result.append(!re.is_match(haystack));
-        }
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.finish()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn eq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a == b)
-}
-
-/// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a == b)
-}
-
-/// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn neq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a != b)
-}
-
-/// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn neq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a != b)
-}
-
-/// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn lt_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a < b)
-}
-
-/// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn lt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a < b)
-}
-
-/// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn lt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a <= b)
-}
-
-/// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn lt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a <= b)
-}
-
-/// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn gt_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a > b)
-}
-
-/// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn gt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a > b)
-}
-
-/// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn gt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a >= b)
-}
-
-/// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn gt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a >= b)
-}
-
-/// Helper function to perform boolean lambda function on values from two arrays using
-/// SIMD.
-#[cfg(simd)]
-fn simd_compare_op<T, SIMD_OP, SCALAR_OP>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
-    SCALAR_OP: Fn(T::Native, T::Native) -> bool,
-{
-    use std::borrow::BorrowMut;
-
-    let len = left.len();
-    if len != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), len)?;
-
-    let lanes = T::lanes();
-    let buffer_size = bit_util::ceil(len, 8);
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    // this is currently the case for all our datatypes and allows us to always append full bytes
-    assert!(
-        lanes % 8 == 0,
-        "Number of vector lanes must be multiple of 8"
-    );
-    let mut left_chunks = left.values().chunks_exact(lanes);
-    let mut right_chunks = right.values().chunks_exact(lanes);
-
-    let result_remainder = left_chunks
-        .borrow_mut()
-        .zip(right_chunks.borrow_mut())
-        .fold(
-            result.typed_data_mut(),
-            |result_slice, (left_slice, right_slice)| {
-                let simd_left = T::load(left_slice);
-                let simd_right = T::load(right_slice);
-                let simd_result = simd_op(simd_left, simd_right);
-
-                let bitmask = T::mask_to_u64(&simd_result);
-                let bytes = bitmask.to_le_bytes();
-                &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
-
-                &mut result_slice[lanes / 8..]
-            },
-        );
-
-    let left_remainder = left_chunks.remainder();
-    let right_remainder = right_chunks.remainder();
-
-    assert_eq!(left_remainder.len(), right_remainder.len());
-
-    let remainder_bitmask = left_remainder
-        .iter()
-        .zip(right_remainder.iter())
-        .enumerate()
-        .fold(0_u64, |mut mask, (i, (scalar_left, scalar_right))| {
-            let bit = if scalar_op(*scalar_left, *scalar_right) {
-                1_u64
-            } else {
-                0_u64
-            };
-            mask |= bit << i;
-            mask
-        });
-    let remainder_mask_as_bytes =
-        &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
-    result_remainder.copy_from_slice(remainder_mask_as_bytes);
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Helper function to perform boolean lambda function on values from an array and a scalar value using
-/// SIMD.
-#[cfg(simd)]
-fn simd_compare_op_scalar<T, SIMD_OP, SCALAR_OP>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
-    SCALAR_OP: Fn(T::Native, T::Native) -> bool,
-{
-    use std::borrow::BorrowMut;
-
-    let len = left.len();
-
-    let lanes = T::lanes();
-    let buffer_size = bit_util::ceil(len, 8);
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    // this is currently the case for all our datatypes and allows us to always append full bytes
-    assert!(
-        lanes % 8 == 0,
-        "Number of vector lanes must be multiple of 8"
-    );
-    let mut left_chunks = left.values().chunks_exact(lanes);
-    let simd_right = T::init(right);
-
-    let result_remainder = left_chunks.borrow_mut().fold(
-        result.typed_data_mut(),
-        |result_slice, left_slice| {
-            let simd_left = T::load(left_slice);
-            let simd_result = simd_op(simd_left, simd_right);
-
-            let bitmask = T::mask_to_u64(&simd_result);
-            let bytes = bitmask.to_le_bytes();
-            &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
-
-            &mut result_slice[lanes / 8..]
-        },
-    );
-
-    let left_remainder = left_chunks.remainder();
-
-    let remainder_bitmask =
-        left_remainder
-            .iter()
-            .enumerate()
-            .fold(0_u64, |mut mask, (i, scalar_left)| {
-                let bit = if scalar_op(*scalar_left, right) {
-                    1_u64
-                } else {
-                    0_u64
-                };
-                mask |= bit << i;
-                mask
-            });
-    let remainder_mask_as_bytes =
-        &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
-    result_remainder.copy_from_slice(remainder_mask_as_bytes);
-
-    let null_bit_buffer = left
-        .data_ref()
-        .null_buffer()
-        .map(|b| b.bit_slice(left.offset(), left.len()));
-
-    // null count is the same as in the input since the right side of the scalar comparison cannot be null
-    let null_count = left.null_count();
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        Some(null_count),
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform `left == right` operation on two arrays.
-pub fn eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::eq, |a, b| a == b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a == b);
-}
-
-/// Perform `left == right` operation on an array and a scalar value.
-pub fn eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::eq, |a, b| a == b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a == b);
-}
-
-/// Perform `left != right` operation on two arrays.
-pub fn neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::ne, |a, b| a != b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a != b);
-}
-
-/// Perform `left != right` operation on an array and a scalar value.
-pub fn neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::ne, |a, b| a != b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a != b);
-}
-
-/// Perform `left < right` operation on two arrays. Null values are less than non-null
-/// values.
-pub fn lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::lt, |a, b| a < b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a < b);
-}
-
-/// Perform `left < right` operation on an array and a scalar value.
-/// Null values are less than non-null values.
-pub fn lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::lt, |a, b| a < b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a < b);
-}
-
-/// Perform `left <= right` operation on two arrays. Null values are less than non-null
-/// values.
-pub fn lt_eq<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::le, |a, b| a <= b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a <= b);
-}
-
-/// Perform `left <= right` operation on an array and a scalar value.
-/// Null values are less than non-null values.
-pub fn lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::le, |a, b| a <= b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a <= b);
-}
-
-/// Perform `left > right` operation on two arrays. Non-null values are greater than null
-/// values.
-pub fn gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::gt, |a, b| a > b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a > b);
-}
-
-/// Perform `left > right` operation on an array and a scalar value.
-/// Non-null values are greater than null values.
-pub fn gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::gt, |a, b| a > b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a > b);
-}
-
-/// Perform `left >= right` operation on two arrays. Non-null values are greater than null
-/// values.
-pub fn gt_eq<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::ge, |a, b| a >= b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a >= b);
-}
-
-/// Perform `left >= right` operation on an array and a scalar value.
-/// Non-null values are greater than null values.
-pub fn gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::ge, |a, b| a >= b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a >= b);
-}
-
-/// Checks if a [`GenericListArray`] contains a value in the [`PrimitiveArray`]
-pub fn contains<T, OffsetSize>(
-    left: &PrimitiveArray<T>,
-    right: &GenericListArray<OffsetSize>,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    OffsetSize: OffsetSizeTrait,
-{
-    let left_len = left.len();
-    if left_len != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let num_bytes = bit_util::ceil(left_len, 8);
-
-    let not_both_null_bit_buffer =
-        match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
-            Some(buff) => buff,
-            None => new_all_set_buffer(num_bytes),
-        };
-    let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
-
-    let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
-    // if both array slots are valid, check if list contains primitive
-    for i in 0..left_len {
-        if bit_util::get_bit(not_both_null_bitmap, i) {
-            let list = right.value(i);
-            let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-
-            for j in 0..list.len() {
-                if list.is_valid(j) && (left.value(i) == list.value(j)) {
-                    bit_util::set_bit(bool_slice, i);
-                    continue;
-                }
-            }
-        }
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        None,
-        0,
-        vec![bool_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`]
-pub fn contains_utf8<OffsetSize>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &ListArray,
-) -> Result<BooleanArray>
-where
-    OffsetSize: StringOffsetSizeTrait,
-{
-    let left_len = left.len();
-    if left_len != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let num_bytes = bit_util::ceil(left_len, 8);
-
-    let not_both_null_bit_buffer =
-        match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
-            Some(buff) => buff,
-            None => new_all_set_buffer(num_bytes),
-        };
-    let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
-
-    let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
-    let bool_slice = &mut bool_buf;
-
-    for i in 0..left_len {
-        // contains(null, null) = false
-        if bit_util::get_bit(not_both_null_bitmap, i) {
-            let list = right.value(i);
-            let list = list
-                .as_any()
-                .downcast_ref::<GenericStringArray<OffsetSize>>()
-                .unwrap();
-
-            for j in 0..list.len() {
-                if list.is_valid(j) && (left.value(i) == list.value(j)) {
-                    bit_util::set_bit(bool_slice, i);
-                    continue;
-                }
-            }
-        }
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        None,
-        0,
-        vec![bool_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-// create a buffer and fill it with valid bits
-#[inline]
-fn new_all_set_buffer(len: usize) -> Buffer {
-    let buffer = MutableBuffer::new(len);
-    let buffer = buffer.with_bitset(len, true);
-
-    buffer.into()
-}
-
-// disable wrapping inside literal vectors used for test data and assertions
-#[rustfmt::skip::macros(vec)]
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datatypes::Int8Type;
-    use crate::{array::Int32Array, array::Int64Array, datatypes::Field};
-
-    /// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
-    /// `A_VEC` and `B_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
-    /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
-    /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
-    macro_rules! cmp_i64 {
-        ($KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
-            let a = Int64Array::from($A_VEC);
-            let b = Int64Array::from($B_VEC);
-            let c = $KERNEL(&a, &b).unwrap();
-            assert_eq!(BooleanArray::from($EXPECTED), c);
-        };
-    }
-
-    /// Evaluate `KERNEL` with one vectors and one scalar as inputs and assert against the expected output.
-    /// `A_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
-    /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
-    /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
-    macro_rules! cmp_i64_scalar {
-        ($KERNEL:ident, $A_VEC:expr, $B:literal, $EXPECTED:expr) => {
-            let a = Int64Array::from($A_VEC);
-            let c = $KERNEL(&a, $B).unwrap();
-            assert_eq!(BooleanArray::from($EXPECTED), c);
-        };
-    }
-
-    #[test]
-    fn test_primitive_array_eq() {
-        cmp_i64!(
-            eq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![false, false, true, false, false, false, false, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_eq_scalar() {
-        cmp_i64_scalar!(
-            eq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![false, false, true, false, false, false, false, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_eq_with_slice() {
-        let a = Int32Array::from(vec![6, 7, 8, 8, 10]);
-        let b = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let b_slice = b.slice(5, 5);
-        let c = b_slice.as_any().downcast_ref().unwrap();
-        let d = eq(&c, &a).unwrap();
-        assert_eq!(true, d.value(0));
-        assert_eq!(true, d.value(1));
-        assert_eq!(true, d.value(2));
-        assert_eq!(false, d.value(3));
-        assert_eq!(true, d.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_neq() {
-        cmp_i64!(
-            neq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![true, true, false, true, true, true, true, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_neq_scalar() {
-        cmp_i64_scalar!(
-            neq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![true, true, false, true, true, true, true, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt() {
-        cmp_i64!(
-            lt,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![false, false, false, true, true, false, false, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_scalar() {
-        cmp_i64_scalar!(
-            lt_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![true, true, false, false, false, true, true, false, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_nulls() {
-        cmp_i64!(
-            lt,
-            vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
-            vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
-            vec![None, None, None, Some(false), None, None, None, Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_scalar_nulls() {
-        cmp_i64_scalar!(
-            lt_scalar,
-            vec![None, Some(1), Some(2), Some(3), None, Some(1), Some(2), Some(3), Some(2), None],
-            2,
-            vec![None, Some(true), Some(false), Some(false), None, Some(true), Some(false), Some(false), Some(false), None]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq() {
-        cmp_i64!(
-            lt_eq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![false, false, true, true, true, false, false, true, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq_scalar() {
-        cmp_i64_scalar!(
-            lt_eq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![true, true, true, false, false, true, true, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq_nulls() {
-        cmp_i64!(
-            lt_eq,
-            vec![None, None, Some(1), None, None, Some(1), None, None, Some(1)],
-            vec![None, Some(1), Some(0), None, Some(1), Some(2), None, None, Some(3)],
-            vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq_scalar_nulls() {
-        cmp_i64_scalar!(
-            lt_eq_scalar,
-            vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
-            1,
-            vec![None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt() {
-        cmp_i64!(
-            gt,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![true, true, false, false, false, true, true, false, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_scalar() {
-        cmp_i64_scalar!(
-            gt_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![false, false, false, true, true, false, false, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_nulls() {
-        cmp_i64!(
-            gt,
-            vec![None, None, Some(1), None, None, Some(2), None, None, Some(3)],
-            vec![None, Some(1), Some(1), None, Some(1), Some(1), None, Some(1), Some(1)],
-            vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_scalar_nulls() {
-        cmp_i64_scalar!(
-            gt_scalar,
-            vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
-            1,
-            vec![None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false), Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq() {
-        cmp_i64!(
-            gt_eq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![true, true, true, false, false, true, true, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq_scalar() {
-        cmp_i64_scalar!(
-            gt_eq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![false, false, true, true, true, false, false, true, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq_nulls() {
-        cmp_i64!(
-            gt_eq,
-            vec![None, None, Some(1), None, Some(1), Some(2), None, None, Some(1)],
-            vec![None, Some(1), None, None, Some(1), Some(1), None, Some(2), Some(2)],
-            vec![None, None, None, None, Some(true), Some(true), None, None, Some(false)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq_scalar_nulls() {
-        cmp_i64_scalar!(
-            gt_eq_scalar,
-            vec![None, Some(1), Some(2), None, Some(2), Some(3), None, Some(3), Some(4)],
-            2,
-            vec![None, Some(false), Some(true), None, Some(true), Some(true), None, Some(true), Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_compare_slice() {
-        let a: Int32Array = (0..100).map(Some).collect();
-        let a = a.slice(50, 50);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let b: Int32Array = (100..200).map(Some).collect();
-        let b = b.slice(50, 50);
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        let actual = lt(&a, &b).unwrap();
-        let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_primitive_array_compare_scalar_slice() {
-        let a: Int32Array = (0..100).map(Some).collect();
-        let a = a.slice(50, 50);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let actual = lt_scalar(&a, 200).unwrap();
-        let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_length_of_result_buffer() {
-        // `item_count` is chosen to not be a multiple of the number of SIMD lanes for this
-        // type (`Int8Type`), 64.
-        let item_count = 130;
-
-        let select_mask: BooleanArray = vec![true; item_count].into();
-
-        let array_a: PrimitiveArray<Int8Type> = vec![1; item_count].into();
-        let array_b: PrimitiveArray<Int8Type> = vec![2; item_count].into();
-        let result_mask = gt_eq(&array_a, &array_b).unwrap();
-
-        assert_eq!(
-            result_mask.data().buffers()[0].len(),
-            select_mask.data().buffers()[0].len()
-        );
-    }
-
-    // Expected behaviour:
-    // contains(1, [1, 2, null]) = true
-    // contains(3, [1, 2, null]) = false
-    // contains(null, [1, 2, null]) = false
-    // contains(null, null) = false
-    #[test]
-    fn test_contains() {
-        let value_data = Int32Array::from(vec![
-            Some(0),
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            None,
-            Some(7),
-        ])
-        .data()
-        .clone();
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 6, 9]);
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(4)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from([0b00001011]))
-            .build();
-
-        //  [[0, 1, 2], [3, 4, 5], null, [6, null, 7]]
-        let list_array = LargeListArray::from(list_data);
-
-        let nulls = Int32Array::from(vec![None, None, None, None]);
-        let nulls_result = contains(&nulls, &list_array).unwrap();
-        assert_eq!(
-            nulls_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![false, false, false, false]),
-        );
-
-        let values = Int32Array::from(vec![Some(0), Some(0), Some(0), Some(0)]);
-        let values_result = contains(&values, &list_array).unwrap();
-        assert_eq!(
-            values_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![true, false, false, false]),
-        );
-    }
-
-    // Expected behaviour:
-    // contains("ab", ["ab", "cd", null]) = true
-    // contains("ef", ["ab", "cd", null]) = false
-    // contains(null, ["ab", "cd", null]) = false
-    // contains(null, null) = false
-    #[test]
-    fn test_contains_utf8() {
-        let values_builder = StringBuilder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        builder.values().append_value("Lorem").unwrap();
-        builder.values().append_value("ipsum").unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value("sit").unwrap();
-        builder.values().append_value("amet").unwrap();
-        builder.values().append_value("Lorem").unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value("ipsum").unwrap();
-        builder.append(true).unwrap();
-
-        //  [["Lorem", "ipsum", null], ["sit", "amet", "Lorem"], null, ["ipsum"]]
-        // value_offsets = [0, 3, 6, 6]
-        let list_array = builder.finish();
-
-        let nulls = StringArray::from(vec![None, None, None, None]);
-        let nulls_result = contains_utf8(&nulls, &list_array).unwrap();
-        assert_eq!(
-            nulls_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![false, false, false, false]),
-        );
-
-        let values = StringArray::from(vec![
-            Some("Lorem"),
-            Some("Lorem"),
-            Some("Lorem"),
-            Some("Lorem"),
-        ]);
-        let values_result = contains_utf8(&values, &list_array).unwrap();
-        assert_eq!(
-            values_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![true, true, false, false]),
-        );
-    }
-
-    macro_rules! test_utf8 {
-        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
-            #[test]
-            fn $test_name() {
-                let left = StringArray::from($left);
-                let right = StringArray::from($right);
-                let res = $op(&left, &right).unwrap();
-                let expected = $expected;
-                assert_eq!(expected.len(), res.len());
-                for i in 0..res.len() {
-                    let v = res.value(i);
-                    assert_eq!(v, expected[i]);
-                }
-            }
-        };
-    }
-
-    macro_rules! test_utf8_scalar {
-        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
-            #[test]
-            fn $test_name() {
-                let left = StringArray::from($left);
-                let res = $op(&left, $right).unwrap();
-                let expected = $expected;
-                assert_eq!(expected.len(), res.len());
-                for i in 0..res.len() {
-                    let v = res.value(i);
-                    assert_eq!(
-                        v,
-                        expected[i],
-                        "unexpected result when comparing {} at position {} to {} ",
-                        left.value(i),
-                        i,
-                        $right
-                    );
-                }
-
-                let left = LargeStringArray::from($left);
-                let res = $op(&left, $right).unwrap();
-                let expected = $expected;
-                assert_eq!(expected.len(), res.len());
-                for i in 0..res.len() {
-                    let v = res.value(i);
-                    assert_eq!(
-                        v,
-                        expected[i],
-                        "unexpected result when comparing {} at position {} to {} ",
-                        left.value(i),
-                        i,
-                        $right
-                    );
-                }
-            }
-        };
-    }
-
-    test_utf8!(
-        test_utf8_array_like,
-        vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
-        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
-        like_utf8,
-        vec![true, true, true, false, false, true, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "%ar%",
-        like_utf8_scalar,
-        vec![true, true, false, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_start,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow%",
-        like_utf8_scalar,
-        vec![true, false, true, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_end,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "%arrow",
-        like_utf8_scalar,
-        vec![true, true, false, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_equals,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow",
-        like_utf8_scalar,
-        vec![true, false, false, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_one,
-        vec!["arrow", "arrows", "parrow", "arr"],
-        "arrow_",
-        like_utf8_scalar,
-        vec![false, true, false, false]
-    );
-
-    test_utf8!(
-        test_utf8_array_nlike,
-        vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
-        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
-        nlike_utf8,
-        vec![false, false, false, true, true, false, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "%ar%",
-        nlike_utf8_scalar,
-        vec![false, false, true, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_eq,
-        vec!["arrow", "arrow", "arrow", "arrow"],
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        eq_utf8,
-        vec![true, false, false, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_eq_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "arrow",
-        eq_utf8_scalar,
-        vec![true, false, false, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_start,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow%",
-        nlike_utf8_scalar,
-        vec![false, true, false, true]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_end,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "%arrow",
-        nlike_utf8_scalar,
-        vec![false, false, true, true]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_equals,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow",
-        nlike_utf8_scalar,
-        vec![false, true, true, true]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_one,
-        vec!["arrow", "arrows", "parrow", "arr"],
-        "arrow_",
-        nlike_utf8_scalar,
-        vec![true, false, true, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_neq,
-        vec!["arrow", "arrow", "arrow", "arrow"],
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        neq_utf8,
-        vec![false, true, true, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_neq_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "arrow",
-        neq_utf8_scalar,
-        vec![false, true, true, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_lt,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        lt_utf8,
-        vec![true, true, false, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_lt_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        lt_utf8_scalar,
-        vec![true, true, false, false]
-    );
-
-    test_utf8!(
-        test_utf8_array_lt_eq,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        lt_eq_utf8,
-        vec![true, true, true, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_lt_eq_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        lt_eq_utf8_scalar,
-        vec![true, true, true, false]
-    );
-
-    test_utf8!(
-        test_utf8_array_gt,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        gt_utf8,
-        vec![false, false, false, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_gt_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        gt_utf8_scalar,
-        vec![false, false, false, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_gt_eq,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        gt_eq_utf8,
-        vec![false, false, true, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_gt_eq_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        gt_eq_utf8_scalar,
-        vec![false, false, true, true]
-    );
-}
diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs
deleted file mode 100644
index 32880286a72..00000000000
--- a/rust/arrow/src/compute/kernels/concat.rs
+++ /dev/null
@@ -1,387 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines concat kernel for `ArrayRef`
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::{ArrayRef, StringArray};
-//! use arrow::compute::concat;
-//!
-//! let arr = concat(&[
-//!     &StringArray::from(vec!["hello", "world"]),
-//!     &StringArray::from(vec!["!"]),
-//! ]).unwrap();
-//! assert_eq!(arr.len(), 3);
-//! ```
-
-use crate::array::*;
-use crate::error::{ArrowError, Result};
-
-/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
-pub fn concat(arrays: &[&Array]) -> Result<ArrayRef> {
-    if arrays.is_empty() {
-        return Err(ArrowError::ComputeError(
-            "concat requires input of at least one array".to_string(),
-        ));
-    }
-
-    if arrays
-        .iter()
-        .any(|array| array.data_type() != arrays[0].data_type())
-    {
-        return Err(ArrowError::InvalidArgumentError(
-            "It is not possible to concatenate arrays of different data types."
-                .to_string(),
-        ));
-    }
-
-    let lengths = arrays.iter().map(|array| array.len()).collect::<Vec<_>>();
-    let capacity = lengths.iter().sum();
-
-    let arrays = arrays.iter().map(|a| a.data()).collect::<Vec<_>>();
-
-    let mut mutable = MutableArrayData::new(arrays, false, capacity);
-
-    for (i, len) in lengths.iter().enumerate() {
-        mutable.extend(i, 0, *len)
-    }
-
-    Ok(make_array(mutable.freeze()))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datatypes::*;
-    use std::sync::Arc;
-
-    #[test]
-    fn test_concat_empty_vec() {
-        let re = concat(&[]);
-        assert!(re.is_err());
-    }
-
-    #[test]
-    fn test_concat_incompatible_datatypes() {
-        let re = concat(&[
-            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
-            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
-        ]);
-        assert!(re.is_err());
-    }
-
-    #[test]
-    fn test_concat_string_arrays() -> Result<()> {
-        let arr = concat(&[
-            &StringArray::from(vec!["hello", "world"]),
-            &StringArray::from(vec!["2", "3", "4"]),
-            &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
-        ])?;
-
-        let expected_output = Arc::new(StringArray::from(vec![
-            Some("hello"),
-            Some("world"),
-            Some("2"),
-            Some("3"),
-            Some("4"),
-            Some("foo"),
-            Some("bar"),
-            None,
-            Some("baz"),
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_primitive_arrays() -> Result<()> {
-        let arr = concat(&[
-            &PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]),
-            &PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]),
-            &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
-        ])?;
-
-        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(-1),
-            Some(2),
-            None,
-            None,
-            Some(101),
-            Some(102),
-            Some(103),
-            None,
-            Some(256),
-            Some(512),
-            Some(1024),
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_primitive_array_slices() -> Result<()> {
-        let input_1 = PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(-1),
-            Some(2),
-            None,
-            None,
-        ])
-        .slice(1, 3);
-
-        let input_2 = PrimitiveArray::<Int64Type>::from(vec![
-            Some(101),
-            Some(102),
-            Some(103),
-            None,
-        ])
-        .slice(1, 3);
-        let arr = concat(&[input_1.as_ref(), input_2.as_ref()])?;
-
-        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(2),
-            None,
-            Some(102),
-            Some(103),
-            None,
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_boolean_primitive_arrays() -> Result<()> {
-        let arr = concat(&[
-            &BooleanArray::from(vec![
-                Some(true),
-                Some(true),
-                Some(false),
-                None,
-                None,
-                Some(false),
-            ]),
-            &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
-        ])?;
-
-        let expected_output = Arc::new(BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(false),
-            None,
-            None,
-            Some(false),
-            None,
-            Some(false),
-            Some(true),
-            Some(false),
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_primitive_list_arrays() -> Result<()> {
-        let list1 = vec![
-            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
-            Some(vec![]),
-            None,
-            Some(vec![Some(10)]),
-        ];
-        let list1_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
-
-        let list2 = vec![
-            None,
-            Some(vec![Some(100), None, Some(101)]),
-            Some(vec![Some(102)]),
-        ];
-        let list2_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
-
-        let list3 = vec![Some(vec![Some(1000), Some(1001)])];
-        let list3_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
-
-        let array_result = concat(&[&list1_array, &list2_array, &list3_array])?;
-
-        let expected = list1
-            .into_iter()
-            .chain(list2.into_iter())
-            .chain(list3.into_iter());
-        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
-
-        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_struct_arrays() -> Result<()> {
-        let field = Field::new("field", DataType::Int64, true);
-        let input_primitive_1: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]));
-        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
-
-        let input_primitive_2: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]));
-        let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
-
-        let input_primitive_3: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(256),
-                Some(512),
-                Some(1024),
-            ]));
-        let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
-
-        let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3])?;
-
-        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(-1),
-            Some(2),
-            None,
-            None,
-            Some(101),
-            Some(102),
-            Some(103),
-            None,
-            Some(256),
-            Some(512),
-            Some(1024),
-        ])) as ArrayRef;
-
-        let actual_primitive = arr
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap()
-            .column(0);
-        assert_eq!(actual_primitive, &expected_primitive_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_struct_array_slices() -> Result<()> {
-        let field = Field::new("field", DataType::Int64, true);
-        let input_primitive_1: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]));
-        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
-
-        let input_primitive_2: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]));
-        let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
-
-        let arr = concat(&[
-            input_struct_1.slice(1, 3).as_ref(),
-            input_struct_2.slice(1, 2).as_ref(),
-        ])?;
-
-        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(2),
-            None,
-            Some(102),
-            Some(103),
-        ])) as ArrayRef;
-
-        let actual_primitive = arr
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap()
-            .column(0);
-        assert_eq!(actual_primitive, &expected_primitive_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_string_array_slices() -> Result<()> {
-        let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
-        let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
-
-        let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])?;
-
-        let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
-
-        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(actual_output, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_string_array_with_null_slices() -> Result<()> {
-        let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
-        let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
-
-        let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])?;
-
-        let expected_output =
-            StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
-
-        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(actual_output, &expected_output);
-
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/filter.rs b/rust/arrow/src/compute/kernels/filter.rs
deleted file mode 100644
index 68feb0a546e..00000000000
--- a/rust/arrow/src/compute/kernels/filter.rs
+++ /dev/null
@@ -1,584 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines miscellaneous array kernels.
-
-use crate::error::Result;
-use crate::record_batch::RecordBatch;
-use crate::{array::*, util::bit_chunk_iterator::BitChunkIterator};
-use std::iter::Enumerate;
-
-/// Function that can filter arbitrary arrays
-pub type Filter<'a> = Box<Fn(&ArrayData) -> ArrayData + 'a>;
-
-/// Internal state of [SlicesIterator]
-#[derive(Debug, PartialEq)]
-enum State {
-    // it is iterating over bits of a mask (`u64`, steps of size of 1 slot)
-    Bits(u64),
-    // it is iterating over chunks (steps of size of 64 slots)
-    Chunks,
-    // it is iterating over the remainding bits (steps of size of 1 slot)
-    Remainder,
-    // nothing more to iterate.
-    Finish,
-}
-
-/// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose
-/// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be
-/// "taken" from an array to be filtered.
-#[derive(Debug)]
-pub(crate) struct SlicesIterator<'a> {
-    iter: Enumerate<BitChunkIterator<'a>>,
-    state: State,
-    filter_count: usize,
-    remainder_mask: u64,
-    remainder_len: usize,
-    chunk_len: usize,
-    len: usize,
-    start: usize,
-    on_region: bool,
-    current_chunk: usize,
-    current_bit: usize,
-}
-
-impl<'a> SlicesIterator<'a> {
-    pub(crate) fn new(filter: &'a BooleanArray) -> Self {
-        let values = &filter.data_ref().buffers()[0];
-
-        // this operation is performed before iteration
-        // because it is fast and allows reserving all the needed memory
-        let filter_count = values.count_set_bits_offset(filter.offset(), filter.len());
-
-        let chunks = values.bit_chunks(filter.offset(), filter.len());
-
-        Self {
-            iter: chunks.iter().enumerate(),
-            state: State::Chunks,
-            filter_count,
-            remainder_len: chunks.remainder_len(),
-            chunk_len: chunks.chunk_len(),
-            remainder_mask: chunks.remainder_bits(),
-            len: 0,
-            start: 0,
-            on_region: false,
-            current_chunk: 0,
-            current_bit: 0,
-        }
-    }
-
-    #[inline]
-    fn current_start(&self) -> usize {
-        self.current_chunk * 64 + self.current_bit
-    }
-
-    #[inline]
-    fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
-        while self.current_bit < max {
-            if (mask & (1 << self.current_bit)) != 0 {
-                if !self.on_region {
-                    self.start = self.current_start();
-                    self.on_region = true;
-                }
-                self.len += 1;
-            } else if self.on_region {
-                let result = (self.start, self.start + self.len);
-                self.len = 0;
-                self.on_region = false;
-                self.current_bit += 1;
-                return Some(result);
-            }
-            self.current_bit += 1;
-        }
-        self.current_bit = 0;
-        None
-    }
-
-    /// iterates over chunks.
-    #[inline]
-    fn iterate_chunks(&mut self) -> Option<(usize, usize)> {
-        while let Some((i, mask)) = self.iter.next() {
-            self.current_chunk = i;
-            if mask == 0 {
-                if self.on_region {
-                    let result = (self.start, self.start + self.len);
-                    self.len = 0;
-                    self.on_region = false;
-                    return Some(result);
-                }
-            } else if mask == 18446744073709551615u64 {
-                // = !0u64
-                if !self.on_region {
-                    self.start = self.current_start();
-                    self.on_region = true;
-                }
-                self.len += 64;
-            } else {
-                // there is a chunk that has a non-trivial mask => iterate over bits.
-                self.state = State::Bits(mask);
-                return None;
-            }
-        }
-        // no more chunks => start iterating over the remainder
-        self.current_chunk = self.chunk_len;
-        self.state = State::Remainder;
-        None
-    }
-}
-
-impl<'a> Iterator for SlicesIterator<'a> {
-    type Item = (usize, usize);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.state {
-            State::Chunks => {
-                match self.iterate_chunks() {
-                    None => {
-                        // iterating over chunks does not yield any new slice => continue to the next
-                        self.current_bit = 0;
-                        self.next()
-                    }
-                    other => other,
-                }
-            }
-            State::Bits(mask) => {
-                match self.iterate_bits(mask, 64) {
-                    None => {
-                        // iterating over bits does not yield any new slice => change back
-                        // to chunks and continue to the next
-                        self.state = State::Chunks;
-                        self.next()
-                    }
-                    other => other,
-                }
-            }
-            State::Remainder => {
-                match self.iterate_bits(self.remainder_mask, self.remainder_len) {
-                    None => {
-                        self.state = State::Finish;
-                        if self.on_region {
-                            Some((self.start, self.start + self.len))
-                        } else {
-                            None
-                        }
-                    }
-                    other => other,
-                }
-            }
-            State::Finish => None,
-        }
-    }
-}
-
-/// Returns a prepared function optimized to filter multiple arrays.
-/// Creating this function requires time, but using it is faster than [filter] when the
-/// same filter needs to be applied to multiple arrays (e.g. a multi-column `RecordBatch`).
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
-pub fn build_filter(filter: &BooleanArray) -> Result<Filter> {
-    let iter = SlicesIterator::new(filter);
-    let filter_count = iter.filter_count;
-    let chunks = iter.collect::<Vec<_>>();
-
-    Ok(Box::new(move |array: &ArrayData| {
-        let mut mutable = MutableArrayData::new(vec![array], false, filter_count);
-        chunks
-            .iter()
-            .for_each(|(start, end)| mutable.extend(0, *start, *end));
-        mutable.freeze()
-    }))
-}
-
-/// Filters an [Array], returning elements matching the filter (i.e. where the values are true).
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
-/// # Example
-/// ```rust
-/// # use arrow::array::{Int32Array, BooleanArray};
-/// # use arrow::error::Result;
-/// # use arrow::compute::kernels::filter::filter;
-/// # fn main() -> Result<()> {
-/// let array = Int32Array::from(vec![5, 6, 7, 8, 9]);
-/// let filter_array = BooleanArray::from(vec![true, false, false, true, false]);
-/// let c = filter(&array, &filter_array)?;
-/// let c = c.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(c, &Int32Array::from(vec![5, 8]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn filter(array: &Array, filter: &BooleanArray) -> Result<ArrayRef> {
-    let iter = SlicesIterator::new(filter);
-
-    let mut mutable =
-        MutableArrayData::new(vec![array.data_ref()], false, iter.filter_count);
-    iter.for_each(|(start, end)| mutable.extend(0, start, end));
-    let data = mutable.freeze();
-    Ok(make_array(data))
-}
-
-/// Returns a new [RecordBatch] with arrays containing only values matching the filter.
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
-pub fn filter_record_batch(
-    record_batch: &RecordBatch,
-    filter: &BooleanArray,
-) -> Result<RecordBatch> {
-    let filter = build_filter(filter)?;
-    let filtered_arrays = record_batch
-        .columns()
-        .iter()
-        .map(|a| make_array(filter(&a.data())))
-        .collect();
-    RecordBatch::try_new(record_batch.schema(), filtered_arrays)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{
-        buffer::Buffer,
-        datatypes::{DataType, Field},
-    };
-
-    macro_rules! def_temporal_test {
-        ($test:ident, $array_type: ident, $data: expr) => {
-            #[test]
-            fn $test() {
-                let a = $data;
-                let b = BooleanArray::from(vec![true, false, true, false]);
-                let c = filter(&a, &b).unwrap();
-                let d = c.as_ref().as_any().downcast_ref::<$array_type>().unwrap();
-                assert_eq!(2, d.len());
-                assert_eq!(1, d.value(0));
-                assert_eq!(3, d.value(1));
-            }
-        };
-    }
-
-    def_temporal_test!(
-        test_filter_date32,
-        Date32Array,
-        Date32Array::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_date64,
-        Date64Array,
-        Date64Array::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time32_second,
-        Time32SecondArray,
-        Time32SecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time32_millisecond,
-        Time32MillisecondArray,
-        Time32MillisecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time64_microsecond,
-        Time64MicrosecondArray,
-        Time64MicrosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time64_nanosecond,
-        Time64NanosecondArray,
-        Time64NanosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_second,
-        DurationSecondArray,
-        DurationSecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_millisecond,
-        DurationMillisecondArray,
-        DurationMillisecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_microsecond,
-        DurationMicrosecondArray,
-        DurationMicrosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_nanosecond,
-        DurationNanosecondArray,
-        DurationNanosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_timestamp_second,
-        TimestampSecondArray,
-        TimestampSecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-    def_temporal_test!(
-        test_filter_timestamp_millisecond,
-        TimestampMillisecondArray,
-        TimestampMillisecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-    def_temporal_test!(
-        test_filter_timestamp_microsecond,
-        TimestampMicrosecondArray,
-        TimestampMicrosecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-    def_temporal_test!(
-        test_filter_timestamp_nanosecond,
-        TimestampNanosecondArray,
-        TimestampNanosecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-
-    #[test]
-    fn test_filter_array_slice() {
-        let a_slice = Int32Array::from(vec![5, 6, 7, 8, 9]).slice(1, 4);
-        let a = a_slice.as_ref();
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        // filtering with sliced filter array is not currently supported
-        // let b_slice = BooleanArray::from(vec![true, false, false, true, false]).slice(1, 4);
-        // let b = b_slice.as_any().downcast_ref().unwrap();
-        let c = filter(a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(6, d.value(0));
-        assert_eq!(9, d.value(1));
-    }
-
-    #[test]
-    fn test_filter_array_low_density() {
-        // this test exercises the all 0's branch of the filter algorithm
-        let mut data_values = (1..=65).collect::<Vec<i32>>();
-        let mut filter_values =
-            (1..=65).map(|i| matches!(i % 65, 0)).collect::<Vec<bool>>();
-        // set up two more values after the batch
-        data_values.extend_from_slice(&[66, 67]);
-        filter_values.extend_from_slice(&[false, true]);
-        let a = Int32Array::from(data_values);
-        let b = BooleanArray::from(filter_values);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(65, d.value(0));
-        assert_eq!(67, d.value(1));
-    }
-
-    #[test]
-    fn test_filter_array_high_density() {
-        // this test exercises the all 1's branch of the filter algorithm
-        let mut data_values = (1..=65).map(Some).collect::<Vec<_>>();
-        let mut filter_values = (1..=65)
-            .map(|i| !matches!(i % 65, 0))
-            .collect::<Vec<bool>>();
-        // set second data value to null
-        data_values[1] = None;
-        // set up two more values after the batch
-        data_values.extend_from_slice(&[Some(66), None, Some(67), None]);
-        filter_values.extend_from_slice(&[false, true, true, true]);
-        let a = Int32Array::from(data_values);
-        let b = BooleanArray::from(filter_values);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(67, d.len());
-        assert_eq!(3, d.null_count());
-        assert_eq!(1, d.value(0));
-        assert_eq!(true, d.is_null(1));
-        assert_eq!(64, d.value(63));
-        assert_eq!(true, d.is_null(64));
-        assert_eq!(67, d.value(65));
-    }
-
-    #[test]
-    fn test_filter_string_array_simple() {
-        let a = StringArray::from(vec!["hello", " ", "world", "!"]);
-        let b = BooleanArray::from(vec![true, false, true, false]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!("hello", d.value(0));
-        assert_eq!("world", d.value(1));
-    }
-
-    #[test]
-    fn test_filter_primative_array_with_null() {
-        let a = Int32Array::from(vec![Some(5), None]);
-        let b = BooleanArray::from(vec![false, true]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, d.len());
-        assert_eq!(true, d.is_null(0));
-    }
-
-    #[test]
-    fn test_filter_string_array_with_null() {
-        let a = StringArray::from(vec![Some("hello"), None, Some("world"), None]);
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!("hello", d.value(0));
-        assert_eq!(false, d.is_null(0));
-        assert_eq!(true, d.is_null(1));
-    }
-
-    #[test]
-    fn test_filter_binary_array_with_null() {
-        let data: Vec<Option<&[u8]>> = vec![Some(b"hello"), None, Some(b"world"), None];
-        let a = BinaryArray::from(data);
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<BinaryArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(b"hello", d.value(0));
-        assert_eq!(false, d.is_null(0));
-        assert_eq!(true, d.is_null(1));
-    }
-
-    #[test]
-    fn test_filter_array_slice_with_null() {
-        let a_slice =
-            Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]).slice(1, 4);
-        let a = a_slice.as_ref();
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        // filtering with sliced filter array is not currently supported
-        // let b_slice = BooleanArray::from(vec![true, false, false, true, false]).slice(1, 4);
-        // let b = b_slice.as_any().downcast_ref().unwrap();
-        let c = filter(a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(true, d.is_null(0));
-        assert_eq!(false, d.is_null(1));
-        assert_eq!(9, d.value(1));
-    }
-
-    #[test]
-    fn test_filter_dictionary_array() {
-        let values = vec![Some("hello"), None, Some("world"), Some("!")];
-        let a: Int8DictionaryArray = values.iter().copied().collect();
-        let b = BooleanArray::from(vec![false, true, true, false]);
-        let c = filter(&a, &b).unwrap();
-        let d = c
-            .as_ref()
-            .as_any()
-            .downcast_ref::<Int8DictionaryArray>()
-            .unwrap();
-        let value_array = d.values();
-        let values = value_array.as_any().downcast_ref::<StringArray>().unwrap();
-        // values are cloned in the filtered dictionary array
-        assert_eq!(3, values.len());
-        // but keys are filtered
-        assert_eq!(2, d.len());
-        assert_eq!(true, d.is_null(0));
-        assert_eq!("world", values.value(d.keys().value(1) as usize));
-    }
-
-    #[test]
-    fn test_filter_string_array_with_negated_boolean_array() {
-        let a = StringArray::from(vec!["hello", " ", "world", "!"]);
-        let mut bb = BooleanBuilder::new(2);
-        bb.append_value(false).unwrap();
-        bb.append_value(true).unwrap();
-        bb.append_value(false).unwrap();
-        bb.append_value(true).unwrap();
-        let b = bb.finish();
-        let b = crate::compute::not(&b).unwrap();
-
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!("hello", d.value(0));
-        assert_eq!("world", d.value(1));
-    }
-
-    #[test]
-    fn test_filter_list_array() {
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8, 8]);
-
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(4)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from([0b00000111]))
-            .build();
-
-        //  a = [[0, 1, 2], [3, 4, 5], [6, 7], null]
-        let a = LargeListArray::from(list_data);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-        let result = filter(&a, &b).unwrap();
-
-        // expected: [[3, 4, 5], null]
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&[3, 4, 5]))
-            .build();
-
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 3]);
-
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let expected = ArrayData::builder(list_data_type)
-            .len(2)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from([0b00000001]))
-            .build();
-
-        assert_eq!(&make_array(expected), &result);
-    }
-
-    #[test]
-    fn test_slice_iterator_bits() {
-        let filter_values = (0..64).map(|i| i == 1).collect::<Vec<bool>>();
-        let filter = BooleanArray::from(filter_values);
-
-        let iter = SlicesIterator::new(&filter);
-        let filter_count = iter.filter_count;
-        let chunks = iter.collect::<Vec<_>>();
-
-        assert_eq!(chunks, vec![(1, 2)]);
-        assert_eq!(filter_count, 1);
-    }
-
-    #[test]
-    fn test_slice_iterator_bits1() {
-        let filter_values = (0..64).map(|i| i != 1).collect::<Vec<bool>>();
-        let filter = BooleanArray::from(filter_values);
-
-        let iter = SlicesIterator::new(&filter);
-        let filter_count = iter.filter_count;
-        let chunks = iter.collect::<Vec<_>>();
-
-        assert_eq!(chunks, vec![(0, 1), (2, 64)]);
-        assert_eq!(filter_count, 64 - 1);
-    }
-
-    #[test]
-    fn test_slice_iterator_chunk_and_bits() {
-        let filter_values = (0..130).map(|i| i % 62 != 0).collect::<Vec<bool>>();
-        let filter = BooleanArray::from(filter_values);
-
-        let iter = SlicesIterator::new(&filter);
-        let filter_count = iter.filter_count;
-        let chunks = iter.collect::<Vec<_>>();
-
-        assert_eq!(chunks, vec![(1, 62), (63, 124), (125, 130)]);
-        assert_eq!(filter_count, 61 + 61 + 5);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/length.rs b/rust/arrow/src/compute/kernels/length.rs
deleted file mode 100644
index 4d704d27078..00000000000
--- a/rust/arrow/src/compute/kernels/length.rs
+++ /dev/null
@@ -1,385 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernel for length of a string array
-
-use crate::{
-    array::*,
-    buffer::Buffer,
-    datatypes::{ArrowNativeType, ArrowPrimitiveType},
-};
-use crate::{
-    datatypes::{DataType, Int32Type, Int64Type},
-    error::{ArrowError, Result},
-};
-
-fn unary_offsets_string<O, F>(
-    array: &GenericStringArray<O>,
-    data_type: DataType,
-    op: F,
-) -> ArrayRef
-where
-    O: StringOffsetSizeTrait + ArrowNativeType,
-    F: Fn(O) -> O,
-{
-    // note: offsets are stored as u8, but they can be interpreted as OffsetSize
-    let offsets = &array.data_ref().buffers()[0];
-    // this is a 30% improvement over iterating over u8s and building OffsetSize, which
-    // justifies the usage of `unsafe`.
-    let slice: &[O] = &unsafe { offsets.typed_data::<O>() }[array.offset()..];
-
-    let lengths = slice.windows(2).map(|offset| op(offset[1] - offset[0]));
-
-    // JUSTIFICATION
-    //  Benefit
-    //      ~60% speedup
-    //  Soundness
-    //      `values` is an iterator with a known size.
-    let buffer = unsafe { Buffer::from_trusted_len_iter(lengths) };
-
-    let null_bit_buffer = array
-        .data_ref()
-        .null_bitmap()
-        .as_ref()
-        .map(|b| b.bits.clone());
-
-    let data = ArrayData::new(
-        data_type,
-        array.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    make_array(data)
-}
-
-fn octet_length<O: StringOffsetSizeTrait, T: ArrowPrimitiveType>(
-    array: &dyn Array,
-) -> ArrayRef
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<O>>()
-        .unwrap();
-    unary_offsets_string::<O, _>(array, T::DATA_TYPE, |x| x)
-}
-
-fn bit_length_impl<O: StringOffsetSizeTrait, T: ArrowPrimitiveType>(
-    array: &dyn Array,
-) -> ArrayRef
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<O>>()
-        .unwrap();
-    let bits_in_bytes = O::from_usize(8).unwrap();
-    unary_offsets_string::<O, _>(array, T::DATA_TYPE, |x| x * bits_in_bytes)
-}
-
-/// Returns an array of Int32/Int64 denoting the number of bytes in each string in the array.
-///
-/// * this only accepts StringArray/Utf8 and LargeString/LargeUtf8
-/// * length of null is null.
-/// * length is in number of bytes
-pub fn length(array: &Array) -> Result<ArrayRef> {
-    match array.data_type() {
-        DataType::Utf8 => Ok(octet_length::<i32, Int32Type>(array)),
-        DataType::LargeUtf8 => Ok(octet_length::<i64, Int64Type>(array)),
-        _ => Err(ArrowError::ComputeError(format!(
-            "length not supported for {:?}",
-            array.data_type()
-        ))),
-    }
-}
-
-/// Returns an array of Int32/Int64 denoting the number of bits in each string in the array.
-///
-/// * this only accepts StringArray/Utf8 and LargeString/LargeUtf8
-/// * bit_length of null is null.
-/// * bit_length is in number of bits
-pub fn bit_length(array: &Array) -> Result<ArrayRef> {
-    match array.data_type() {
-        DataType::Utf8 => Ok(bit_length_impl::<i32, Int32Type>(array)),
-        DataType::LargeUtf8 => Ok(bit_length_impl::<i64, Int64Type>(array)),
-        _ => Err(ArrowError::ComputeError(format!(
-            "bit_length not supported for {:?}",
-            array.data_type()
-        ))),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
-        fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-            [&v[..], &v[..]].concat()
-        }
-
-        // a large array
-        let mut values = vec!["one", "on", "o", ""];
-        let mut expected = vec![3, 2, 1, 0];
-        for _ in 0..10 {
-            values = double_vec(values);
-            expected = double_vec(expected);
-        }
-
-        vec![
-            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
-            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
-            (vec!["💖"], 1, vec![4]),
-            (values, 4096, expected),
-        ]
-    }
-
-    #[test]
-    fn length_test_string() -> Result<()> {
-        length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn length_test_large_string() -> Result<()> {
-        length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value as i64, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    fn length_null_cases() -> Vec<(Vec<Option<&'static str>>, usize, Vec<Option<i32>>)> {
-        vec![(
-            vec![Some("one"), None, Some("three"), Some("four")],
-            4,
-            vec![Some(3), None, Some(5), Some(4)],
-        )]
-    }
-
-    #[test]
-    fn length_null_string() -> Result<()> {
-        length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-
-                let expected: Int32Array = expected.into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn length_null_large_string() -> Result<()> {
-        length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-
-                // convert to i64
-                let expected: Int64Array = expected
-                    .iter()
-                    .map(|e| e.map(|e| e as i64))
-                    .collect::<Vec<_>>()
-                    .into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    /// Tests that length is not valid for u64.
-    #[test]
-    fn length_wrong_type() {
-        let array: UInt64Array = vec![1u64].into();
-
-        assert!(length(&array).is_err());
-    }
-
-    /// Tests with an offset
-    #[test]
-    fn length_offsets() -> Result<()> {
-        let a = StringArray::from(vec!["hello", " ", "world"]);
-        let b = make_array(
-            ArrayData::builder(DataType::Utf8)
-                .len(2)
-                .offset(1)
-                .buffers(a.data_ref().buffers().to_vec())
-                .build(),
-        );
-        let result = length(b.as_ref())?;
-
-        let expected = Int32Array::from(vec![1, 5]);
-        assert_eq!(expected.data(), result.data());
-
-        Ok(())
-    }
-
-    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
-        fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-            [&v[..], &v[..]].concat()
-        }
-
-        // a large array
-        let mut values = vec!["one", "on", "o", ""];
-        let mut expected = vec![24, 16, 8, 0];
-        for _ in 0..10 {
-            values = double_vec(values);
-            expected = double_vec(expected);
-        }
-
-        vec![
-            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
-            (vec!["💖"], 1, vec![32]),
-            (vec!["josé"], 1, vec![40]),
-            (values, 4096, expected),
-        ]
-    }
-
-    #[test]
-    fn bit_length_test_string() -> Result<()> {
-        bit_length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn bit_length_test_large_string() -> Result<()> {
-        bit_length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value as i64, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    fn bit_length_null_cases() -> Vec<(Vec<Option<&'static str>>, usize, Vec<Option<i32>>)>
-    {
-        vec![(
-            vec![Some("one"), None, Some("three"), Some("four")],
-            4,
-            vec![Some(24), None, Some(40), Some(32)],
-        )]
-    }
-
-    #[test]
-    fn bit_length_null_string() -> Result<()> {
-        bit_length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-
-                let expected: Int32Array = expected.into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn bit_length_null_large_string() -> Result<()> {
-        bit_length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-
-                // convert to i64
-                let expected: Int64Array = expected
-                    .iter()
-                    .map(|e| e.map(|e| e as i64))
-                    .collect::<Vec<_>>()
-                    .into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    /// Tests that bit_length is not valid for u64.
-    #[test]
-    fn bit_length_wrong_type() {
-        let array: UInt64Array = vec![1u64].into();
-
-        assert!(bit_length(&array).is_err());
-    }
-
-    /// Tests with an offset
-    #[test]
-    fn bit_length_offsets() -> Result<()> {
-        let a = StringArray::from(vec!["hello", " ", "world"]);
-        let b = make_array(
-            ArrayData::builder(DataType::Utf8)
-                .len(2)
-                .offset(1)
-                .buffers(a.data_ref().buffers().to_vec())
-                .build(),
-        );
-        let result = bit_length(b.as_ref())?;
-
-        let expected = Int32Array::from(vec![8, 40]);
-        assert_eq!(expected.data(), result.data());
-
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/limit.rs b/rust/arrow/src/compute/kernels/limit.rs
deleted file mode 100644
index 4b4b08572a2..00000000000
--- a/rust/arrow/src/compute/kernels/limit.rs
+++ /dev/null
@@ -1,200 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines miscellaneous array kernels.
-
-use crate::array::ArrayRef;
-
-/// Returns the array, taking only the number of elements specified
-///
-/// Limit performs a zero-copy slice of the array, and is a convenience method on slice
-/// where:
-/// * it performs a bounds-check on the array
-/// * it slices from offset 0
-pub fn limit(array: &ArrayRef, num_elements: usize) -> ArrayRef {
-    let lim = num_elements.min(array.len());
-    array.slice(0, lim)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::*;
-    use crate::buffer::Buffer;
-    use crate::datatypes::{DataType, Field};
-    use crate::util::bit_util;
-
-    use std::sync::Arc;
-
-    #[test]
-    fn test_limit_array() {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![5, 6, 7, 8, 9]));
-        let b = limit(&a, 3);
-        let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(3, c.len());
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-    }
-
-    #[test]
-    fn test_limit_string_array() {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["hello", " ", "world", "!"]));
-        let b = limit(&a, 2);
-        let c = b.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, c.len());
-        assert_eq!("hello", c.value(0));
-        assert_eq!(" ", c.value(1));
-    }
-
-    #[test]
-    fn test_limit_array_with_null() {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, Some(5)]));
-        let b = limit(&a, 1);
-        let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, c.len());
-        assert_eq!(true, c.is_null(0));
-    }
-
-    #[test]
-    fn test_limit_array_with_limit_too_large() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let a_ref: ArrayRef = Arc::new(a);
-        let b = limit(&a_ref, 6);
-        let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-
-        assert_eq!(5, c.len());
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_list_array_limit() {
-        // adapted from crate::array::test::test_list_array_slice
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, [2, 3], null, [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 2, 2, 4, 4, 6, 6, 9, 9, 10]);
-        // 01010101 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 2);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array: ArrayRef = Arc::new(ListArray::from(list_data));
-
-        let limit_array = limit(&list_array, 6);
-        assert_eq!(6, limit_array.len());
-        assert_eq!(0, limit_array.offset());
-        assert_eq!(3, limit_array.null_count());
-
-        // Check offset and length for each non-null value.
-        let limit_array: &ListArray =
-            limit_array.as_any().downcast_ref::<ListArray>().unwrap();
-
-        for i in 0..limit_array.len() {
-            let offset = limit_array.value_offsets()[i];
-            let length = limit_array.value_length(i);
-            if i % 2 == 0 {
-                assert_eq!(2, length);
-                assert_eq!(i as i32, offset);
-            } else {
-                assert_eq!(0, length);
-            }
-        }
-    }
-
-    #[test]
-    fn test_struct_array_limit() {
-        // adapted from crate::array::test::test_struct_array_slice
-        let boolean_data = ArrayData::builder(DataType::Boolean)
-            .len(5)
-            .add_buffer(Buffer::from([0b00010000]))
-            .null_bit_buffer(Buffer::from([0b00010001]))
-            .build();
-        let int_data = ArrayData::builder(DataType::Int32)
-            .len(5)
-            .add_buffer(Buffer::from_slice_ref(&[0, 28, 42, 0, 0]))
-            .null_bit_buffer(Buffer::from([0b00000110]))
-            .build();
-
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, false));
-        field_types.push(Field::new("b", DataType::Int32, false));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            .add_child_data(boolean_data.clone())
-            .add_child_data(int_data.clone())
-            .null_bit_buffer(Buffer::from([0b00010111]))
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(5, struct_array.len());
-        assert_eq!(1, struct_array.null_count());
-        assert_eq!(&boolean_data, struct_array.column(0).data());
-        assert_eq!(&int_data, struct_array.column(1).data());
-
-        let array: ArrayRef = Arc::new(struct_array);
-
-        let sliced_array = limit(&array, 3);
-        let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(3, sliced_array.len());
-        assert_eq!(0, sliced_array.offset());
-        assert_eq!(0, sliced_array.null_count());
-        assert!(sliced_array.is_valid(0));
-        assert!(sliced_array.is_valid(1));
-        assert!(sliced_array.is_valid(2));
-
-        let sliced_c0 = sliced_array.column(0);
-        let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(3, sliced_c0.len());
-        assert_eq!(0, sliced_c0.offset());
-        assert_eq!(2, sliced_c0.null_count());
-        assert!(sliced_c0.is_valid(0));
-        assert!(sliced_c0.is_null(1));
-        assert!(sliced_c0.is_null(2));
-        assert_eq!(false, sliced_c0.value(0));
-
-        let sliced_c1 = sliced_array.column(1);
-        let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(3, sliced_c1.len());
-        assert_eq!(0, sliced_c1.offset());
-        assert_eq!(1, sliced_c1.null_count());
-        assert!(sliced_c1.is_null(0));
-        assert_eq!(28, sliced_c1.value(1));
-        assert_eq!(42, sliced_c1.value(2));
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/mod.rs b/rust/arrow/src/compute/kernels/mod.rs
deleted file mode 100644
index 862f55fe2f2..00000000000
--- a/rust/arrow/src/compute/kernels/mod.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Computation kernels on Arrow Arrays
-
-pub mod aggregate;
-pub mod arithmetic;
-pub mod arity;
-pub mod boolean;
-pub mod cast;
-pub mod cast_utils;
-pub mod comparison;
-pub mod concat;
-pub mod filter;
-pub mod length;
-pub mod limit;
-pub mod regexp;
-pub mod sort;
-pub mod substring;
-pub mod take;
-pub mod temporal;
-pub mod window;
-pub mod zip;
diff --git a/rust/arrow/src/compute/kernels/regexp.rs b/rust/arrow/src/compute/kernels/regexp.rs
deleted file mode 100644
index 446d71d9f4a..00000000000
--- a/rust/arrow/src/compute/kernels/regexp.rs
+++ /dev/null
@@ -1,160 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernel to extract substrings based on a regular
-//! expression of a \[Large\]StringArray
-
-use crate::array::{
-    ArrayRef, GenericStringArray, GenericStringBuilder, ListBuilder,
-    StringOffsetSizeTrait,
-};
-use crate::error::{ArrowError, Result};
-use std::collections::HashMap;
-
-use std::sync::Arc;
-
-use regex::Regex;
-
-/// Extract all groups matched by a regular expression for a given String array.
-pub fn regexp_match<OffsetSize: StringOffsetSizeTrait>(
-    array: &GenericStringArray<OffsetSize>,
-    regex_array: &GenericStringArray<OffsetSize>,
-    flags_array: Option<&GenericStringArray<OffsetSize>>,
-) -> Result<ArrayRef> {
-    let mut patterns: HashMap<String, Regex> = HashMap::new();
-    let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::new(0);
-    let mut list_builder = ListBuilder::new(builder);
-
-    let complete_pattern = match flags_array {
-        Some(flags) => Box::new(regex_array.iter().zip(flags.iter()).map(
-            |(pattern, flags)| {
-                pattern.map(|pattern| match flags {
-                    Some(value) => format!("(?{}){}", value, pattern),
-                    None => pattern.to_string(),
-                })
-            },
-        )) as Box<dyn Iterator<Item = Option<String>>>,
-        None => Box::new(
-            regex_array
-                .iter()
-                .map(|pattern| pattern.map(|pattern| pattern.to_string())),
-        ),
-    };
-    array
-        .iter()
-        .zip(complete_pattern)
-        .map(|(value, pattern)| {
-            match (value, pattern) {
-                // Required for Postgres compatibility:
-                // SELECT regexp_match('foobarbequebaz', ''); = {""}
-                (Some(_), Some(pattern)) if pattern == *"" => {
-                    list_builder.values().append_value("")?;
-                    list_builder.append(true)?;
-                }
-                (Some(value), Some(pattern)) => {
-                    let existing_pattern = patterns.get(&pattern);
-                    let re = match existing_pattern {
-                        Some(re) => re.clone(),
-                        None => {
-                            let re = Regex::new(pattern.as_str()).map_err(|e| {
-                                ArrowError::ComputeError(format!(
-                                    "Regular expression did not compile: {:?}",
-                                    e
-                                ))
-                            })?;
-                            patterns.insert(pattern, re.clone());
-                            re
-                        }
-                    };
-                    match re.captures(value) {
-                        Some(caps) => {
-                            for m in caps.iter().skip(1) {
-                                if let Some(v) = m {
-                                    list_builder.values().append_value(v.as_str())?;
-                                }
-                            }
-                            list_builder.append(true)?
-                        }
-                        None => list_builder.append(false)?,
-                    }
-                }
-                _ => list_builder.append(false)?,
-            }
-            Ok(())
-        })
-        .collect::<Result<Vec<()>>>()?;
-    Ok(Arc::new(list_builder.finish()))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::{ListArray, StringArray};
-
-    #[test]
-    fn match_single_group() -> Result<()> {
-        let values = vec![
-            Some("abc-005-def"),
-            Some("X-7-5"),
-            Some("X545"),
-            None,
-            Some("foobarbequebaz"),
-            Some("foobarbequebaz"),
-        ];
-        let array = StringArray::from(values);
-        let mut pattern_values = vec![r".*-(\d*)-.*"; 4];
-        pattern_values.push(r"(bar)(bequ1e)");
-        pattern_values.push("");
-        let pattern = StringArray::from(pattern_values);
-        let actual = regexp_match(&array, &pattern, None)?;
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::new(0);
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.values().append_value("005")?;
-        expected_builder.append(true)?;
-        expected_builder.values().append_value("7")?;
-        expected_builder.append(true)?;
-        expected_builder.append(false)?;
-        expected_builder.append(false)?;
-        expected_builder.append(false)?;
-        expected_builder.values().append_value("")?;
-        expected_builder.append(true)?;
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
-        Ok(())
-    }
-
-    #[test]
-    fn match_single_group_with_flags() -> Result<()> {
-        let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None];
-        let array = StringArray::from(values);
-        let pattern = StringArray::from(vec![r"x.*-(\d*)-.*"; 4]);
-        let flags = StringArray::from(vec!["i"; 4]);
-        let actual = regexp_match(&array, &pattern, Some(&flags))?;
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::new(0);
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.append(false)?;
-        expected_builder.values().append_value("7")?;
-        expected_builder.append(true)?;
-        expected_builder.append(false)?;
-        expected_builder.append(false)?;
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/sort.rs b/rust/arrow/src/compute/kernels/sort.rs
deleted file mode 100644
index bf8eda353e6..00000000000
--- a/rust/arrow/src/compute/kernels/sort.rs
+++ /dev/null
@@ -1,2246 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines sort kernel for `ArrayRef`
-
-use std::cmp::Ordering;
-
-use crate::array::*;
-use crate::buffer::MutableBuffer;
-use crate::compute::take;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-use TimeUnit::*;
-
-/// Sort the `ArrayRef` using `SortOptions`.
-///
-/// Performs a stable sort on values and indices. Nulls are ordered according to the `nulls_first` flag in `options`.
-/// Floats are sorted using IEEE 754 totalOrder
-///
-/// Returns an `ArrowError::ComputeError(String)` if the array type is either unsupported by `sort_to_indices` or `take`.
-///
-/// # Example
-/// ```rust
-/// # use std::sync::Arc;
-/// # use arrow::array::{Int32Array, ArrayRef};
-/// # use arrow::error::Result;
-/// # use arrow::compute::kernels::sort::sort;
-/// # fn main() -> Result<()> {
-/// let array: ArrayRef = Arc::new(Int32Array::from(vec![5, 4, 3, 2, 1]));
-/// let sorted_array = sort(&array, None).unwrap();
-/// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(sorted_array, &Int32Array::from(vec![1, 2, 3, 4, 5]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn sort(values: &ArrayRef, options: Option<SortOptions>) -> Result<ArrayRef> {
-    let indices = sort_to_indices(values, options, None)?;
-    take(values.as_ref(), &indices, None)
-}
-
-/// Sort the `ArrayRef` partially.
-///
-/// If `limit` is specified, the resulting array will contain only
-/// first `limit` in the sort order. Any data data after the limit
-/// will be discarded.
-///
-/// Note: this is an unstable_sort, meaning it may not preserve the
-/// order of equal elements.
-///
-/// # Example
-/// ```rust
-/// # use std::sync::Arc;
-/// # use arrow::array::{Int32Array, ArrayRef};
-/// # use arrow::error::Result;
-/// # use arrow::compute::kernels::sort::{sort_limit, SortOptions};
-/// # fn main() -> Result<()> {
-/// let array: ArrayRef = Arc::new(Int32Array::from(vec![5, 4, 3, 2, 1]));
-///
-/// // Find the the top 2 items
-/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
-/// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(sorted_array, &Int32Array::from(vec![1, 2]));
-///
-/// // Find the bottom top 2 items
-/// let options = Some(SortOptions {
-///                  descending: true,
-///                  ..Default::default()
-///               });
-/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
-/// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(sorted_array, &Int32Array::from(vec![5, 4]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn sort_limit(
-    values: &ArrayRef,
-    options: Option<SortOptions>,
-    limit: Option<usize>,
-) -> Result<ArrayRef> {
-    let indices = sort_to_indices(values, options, limit)?;
-    take(values.as_ref(), &indices, None)
-}
-
-#[inline]
-fn sort_by<T, F>(array: &mut [T], limit: usize, cmp: F)
-where
-    F: FnMut(&T, &T) -> Ordering,
-{
-    if array.len() == limit {
-        array.sort_by(cmp);
-    } else {
-        partial_sort(array, limit, cmp);
-    }
-}
-
-// implements comparison using IEEE 754 total ordering for f32
-// Original implementation from https://doc.rust-lang.org/std/primitive.f64.html#method.total_cmp
-// TODO to change to use std when it becomes stable
-fn total_cmp_32(l: f32, r: f32) -> std::cmp::Ordering {
-    let mut left = l.to_bits() as i32;
-    let mut right = r.to_bits() as i32;
-
-    left ^= (((left >> 31) as u32) >> 1) as i32;
-    right ^= (((right >> 31) as u32) >> 1) as i32;
-
-    left.cmp(&right)
-}
-
-// implements comparison using IEEE 754 total ordering for f64
-// Original implementation from https://doc.rust-lang.org/std/primitive.f64.html#method.total_cmp
-// TODO to change to use std when it becomes stable
-fn total_cmp_64(l: f64, r: f64) -> std::cmp::Ordering {
-    let mut left = l.to_bits() as i64;
-    let mut right = r.to_bits() as i64;
-
-    left ^= (((left >> 63) as u64) >> 1) as i64;
-    right ^= (((right >> 63) as u64) >> 1) as i64;
-
-    left.cmp(&right)
-}
-
-fn cmp<T>(l: T, r: T) -> std::cmp::Ordering
-where
-    T: Ord,
-{
-    l.cmp(&r)
-}
-
-// partition indices into valid and null indices
-fn partition_validity(array: &ArrayRef) -> (Vec<u32>, Vec<u32>) {
-    match array.null_count() {
-        // faster path
-        0 => ((0..(array.len() as u32)).collect(), vec![]),
-        _ => {
-            let indices = 0..(array.len() as u32);
-            indices.partition(|index| array.is_valid(*index as usize))
-        }
-    }
-}
-
-/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-/// limit is an option for partial_sort
-pub fn sort_to_indices(
-    values: &ArrayRef,
-    options: Option<SortOptions>,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let options = options.unwrap_or_default();
-
-    let (v, n) = partition_validity(values);
-
-    match values.data_type() {
-        DataType::Boolean => sort_boolean(values, v, n, &options, limit),
-        DataType::Int8 => {
-            sort_primitive::<Int8Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Int16 => {
-            sort_primitive::<Int16Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Int32 => {
-            sort_primitive::<Int32Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Int64 => {
-            sort_primitive::<Int64Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt8 => {
-            sort_primitive::<UInt8Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt16 => {
-            sort_primitive::<UInt16Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt32 => {
-            sort_primitive::<UInt32Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt64 => {
-            sort_primitive::<UInt64Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Float32 => {
-            sort_primitive::<Float32Type, _>(values, v, n, total_cmp_32, &options, limit)
-        }
-        DataType::Float64 => {
-            sort_primitive::<Float64Type, _>(values, v, n, total_cmp_64, &options, limit)
-        }
-        DataType::Date32 => {
-            sort_primitive::<Date32Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Date64 => {
-            sort_primitive::<Date64Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time32(Second) => {
-            sort_primitive::<Time32SecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time32(Millisecond) => {
-            sort_primitive::<Time32MillisecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time64(Microsecond) => {
-            sort_primitive::<Time64MicrosecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time64(Nanosecond) => {
-            sort_primitive::<Time64NanosecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Timestamp(Second, _) => {
-            sort_primitive::<TimestampSecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Timestamp(Millisecond, _) => {
-            sort_primitive::<TimestampMillisecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Timestamp(Microsecond, _) => {
-            sort_primitive::<TimestampMicrosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Timestamp(Nanosecond, _) => {
-            sort_primitive::<TimestampNanosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            sort_primitive::<IntervalYearMonthType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            sort_primitive::<IntervalDayTimeType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            sort_primitive::<DurationSecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            sort_primitive::<DurationMillisecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            sort_primitive::<DurationMicrosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            sort_primitive::<DurationNanosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Utf8 => sort_string(values, v, n, &options, limit),
-        DataType::List(field) => match field.data_type() {
-            DataType::Int8 => sort_list::<i32, Int8Type>(values, v, n, &options, limit),
-            DataType::Int16 => sort_list::<i32, Int16Type>(values, v, n, &options, limit),
-            DataType::Int32 => sort_list::<i32, Int32Type>(values, v, n, &options, limit),
-            DataType::Int64 => sort_list::<i32, Int64Type>(values, v, n, &options, limit),
-            DataType::UInt8 => sort_list::<i32, UInt8Type>(values, v, n, &options, limit),
-            DataType::UInt16 => {
-                sort_list::<i32, UInt16Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt32 => {
-                sort_list::<i32, UInt32Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt64 => {
-                sort_list::<i32, UInt64Type>(values, v, n, &options, limit)
-            }
-            t => Err(ArrowError::ComputeError(format!(
-                "Sort not supported for list type {:?}",
-                t
-            ))),
-        },
-        DataType::LargeList(field) => match field.data_type() {
-            DataType::Int8 => sort_list::<i64, Int8Type>(values, v, n, &options, limit),
-            DataType::Int16 => sort_list::<i64, Int16Type>(values, v, n, &options, limit),
-            DataType::Int32 => sort_list::<i64, Int32Type>(values, v, n, &options, limit),
-            DataType::Int64 => sort_list::<i64, Int64Type>(values, v, n, &options, limit),
-            DataType::UInt8 => sort_list::<i64, UInt8Type>(values, v, n, &options, limit),
-            DataType::UInt16 => {
-                sort_list::<i64, UInt16Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt32 => {
-                sort_list::<i64, UInt32Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt64 => {
-                sort_list::<i64, UInt64Type>(values, v, n, &options, limit)
-            }
-            t => Err(ArrowError::ComputeError(format!(
-                "Sort not supported for list type {:?}",
-                t
-            ))),
-        },
-        DataType::FixedSizeList(field, _) => match field.data_type() {
-            DataType::Int8 => sort_list::<i32, Int8Type>(values, v, n, &options, limit),
-            DataType::Int16 => sort_list::<i32, Int16Type>(values, v, n, &options, limit),
-            DataType::Int32 => sort_list::<i32, Int32Type>(values, v, n, &options, limit),
-            DataType::Int64 => sort_list::<i32, Int64Type>(values, v, n, &options, limit),
-            DataType::UInt8 => sort_list::<i32, UInt8Type>(values, v, n, &options, limit),
-            DataType::UInt16 => {
-                sort_list::<i32, UInt16Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt32 => {
-                sort_list::<i32, UInt32Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt64 => {
-                sort_list::<i32, UInt64Type>(values, v, n, &options, limit)
-            }
-            t => Err(ArrowError::ComputeError(format!(
-                "Sort not supported for list type {:?}",
-                t
-            ))),
-        },
-        DataType::Dictionary(key_type, value_type)
-            if *value_type.as_ref() == DataType::Utf8 =>
-        {
-            match key_type.as_ref() {
-                DataType::Int8 => {
-                    sort_string_dictionary::<Int8Type>(values, v, n, &options, limit)
-                }
-                DataType::Int16 => {
-                    sort_string_dictionary::<Int16Type>(values, v, n, &options, limit)
-                }
-                DataType::Int32 => {
-                    sort_string_dictionary::<Int32Type>(values, v, n, &options, limit)
-                }
-                DataType::Int64 => {
-                    sort_string_dictionary::<Int64Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt8 => {
-                    sort_string_dictionary::<UInt8Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt16 => {
-                    sort_string_dictionary::<UInt16Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt32 => {
-                    sort_string_dictionary::<UInt32Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt64 => {
-                    sort_string_dictionary::<UInt64Type>(values, v, n, &options, limit)
-                }
-                t => Err(ArrowError::ComputeError(format!(
-                    "Sort not supported for dictionary key type {:?}",
-                    t
-                ))),
-            }
-        }
-        t => Err(ArrowError::ComputeError(format!(
-            "Sort not supported for data type {:?}",
-            t
-        ))),
-    }
-}
-
-/// Options that define how sort kernels should behave
-#[derive(Clone, Copy, Debug)]
-pub struct SortOptions {
-    /// Whether to sort in descending order
-    pub descending: bool,
-    /// Whether to sort nulls first
-    pub nulls_first: bool,
-}
-
-impl Default for SortOptions {
-    fn default() -> Self {
-        Self {
-            descending: false,
-            // default to nulls first to match spark's behavior
-            nulls_first: true,
-        }
-    }
-}
-
-/// Sort primitive values
-#[allow(clippy::unnecessary_wraps)]
-fn sort_boolean(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let values = values
-        .as_any()
-        .downcast_ref::<BooleanArray>()
-        .expect("Unable to downcast to boolean array");
-    let descending = options.descending;
-
-    // create tuples that are used for sorting
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, values.value(index as usize)))
-        .collect::<Vec<(u32, bool)>>();
-
-    let mut nulls = null_indices;
-
-    let valids_len = valids.len();
-    let nulls_len = nulls.len();
-
-    let mut len = values.len();
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1));
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse());
-        // reverse to keep a stable ordering
-        nulls.reverse();
-    }
-
-    // collect results directly into a buffer instead of a vec to avoid another aligned allocation
-    let mut result = MutableBuffer::new(values.len() * std::mem::size_of::<u32>());
-    // sets len to capacity so we can access the whole buffer as a typed slice
-    result.resize(values.len() * std::mem::size_of::<u32>(), 0);
-    let result_slice: &mut [u32] = result.typed_data_mut();
-
-    debug_assert_eq!(result_slice.len(), nulls_len + valids_len);
-
-    if options.nulls_first {
-        let size = nulls_len.min(len);
-        result_slice[0..nulls_len.min(len)].copy_from_slice(&nulls);
-        if nulls_len < len {
-            insert_valid_values(result_slice, nulls_len, &valids[0..len - size]);
-        }
-    } else {
-        // nulls last
-        let size = valids.len().min(len);
-        insert_valid_values(result_slice, 0, &valids[0..size]);
-        if len > size {
-            result_slice[valids_len..].copy_from_slice(&nulls[0..(len - valids_len)]);
-        }
-    }
-
-    let result_data = ArrayData::new(
-        DataType::UInt32,
-        len,
-        Some(0),
-        None,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-
-    Ok(UInt32Array::from(result_data))
-}
-
-/// Sort primitive values
-#[allow(clippy::unnecessary_wraps)]
-fn sort_primitive<T, F>(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    cmp: F,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array>
-where
-    T: ArrowPrimitiveType,
-    T::Native: std::cmp::PartialOrd,
-    F: Fn(T::Native, T::Native) -> std::cmp::Ordering,
-{
-    let values = as_primitive_array::<T>(values);
-    let descending = options.descending;
-
-    // create tuples that are used for sorting
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, values.value(index as usize)))
-        .collect::<Vec<(u32, T::Native)>>();
-
-    let mut nulls = null_indices;
-
-    let valids_len = valids.len();
-    let nulls_len = nulls.len();
-    let mut len = values.len();
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1));
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse());
-        // reverse to keep a stable ordering
-        nulls.reverse();
-    }
-
-    // collect results directly into a buffer instead of a vec to avoid another aligned allocation
-    let mut result = MutableBuffer::new(values.len() * std::mem::size_of::<u32>());
-    // sets len to capacity so we can access the whole buffer as a typed slice
-    result.resize(values.len() * std::mem::size_of::<u32>(), 0);
-    let result_slice: &mut [u32] = result.typed_data_mut();
-
-    debug_assert_eq!(result_slice.len(), nulls_len + valids_len);
-
-    if options.nulls_first {
-        let size = nulls_len.min(len);
-        result_slice[0..nulls_len.min(len)].copy_from_slice(&nulls);
-        if nulls_len < len {
-            insert_valid_values(result_slice, nulls_len, &valids[0..len - size]);
-        }
-    } else {
-        // nulls last
-        let size = valids.len().min(len);
-        insert_valid_values(result_slice, 0, &valids[0..size]);
-        if len > size {
-            result_slice[valids_len..].copy_from_slice(&nulls[0..(len - valids_len)]);
-        }
-    }
-
-    let result_data = ArrayData::new(
-        DataType::UInt32,
-        len,
-        Some(0),
-        None,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-
-    Ok(UInt32Array::from(result_data))
-}
-
-// insert valid and nan values in the correct order depending on the descending flag
-fn insert_valid_values<T>(result_slice: &mut [u32], offset: usize, valids: &[(u32, T)]) {
-    let valids_len = valids.len();
-    // helper to append the index part of the valid tuples
-    let append_valids = move |dst_slice: &mut [u32]| {
-        debug_assert_eq!(dst_slice.len(), valids_len);
-        dst_slice
-            .iter_mut()
-            .zip(valids.iter())
-            .for_each(|(dst, src)| *dst = src.0)
-    };
-
-    append_valids(&mut result_slice[offset..offset + valids.len()]);
-}
-
-/// Sort strings
-fn sort_string(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let values = as_string_array(values);
-
-    sort_string_helper(
-        values,
-        value_indices,
-        null_indices,
-        options,
-        limit,
-        |array, idx| array.value(idx as usize),
-    )
-}
-
-/// Sort dictionary encoded strings
-fn sort_string_dictionary<T: ArrowDictionaryKeyType>(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let values: &DictionaryArray<T> = as_dictionary_array::<T>(values);
-
-    let keys: &PrimitiveArray<T> = &values.keys_array();
-
-    let dict = values.values();
-    let dict: &StringArray = as_string_array(&dict);
-
-    sort_string_helper(
-        keys,
-        value_indices,
-        null_indices,
-        options,
-        limit,
-        |array: &PrimitiveArray<T>, idx| -> &str {
-            let key: T::Native = array.value(idx as usize);
-            dict.value(key.to_usize().unwrap())
-        },
-    )
-}
-
-/// shared implementation between dictionary encoded and plain string arrays
-#[inline]
-#[allow(clippy::unnecessary_wraps)]
-fn sort_string_helper<'a, A: Array, F>(
-    values: &'a A,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-    value_fn: F,
-) -> Result<UInt32Array>
-where
-    F: Fn(&'a A, u32) -> &str,
-{
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, value_fn(&values, index)))
-        .collect::<Vec<(u32, &str)>>();
-    let mut nulls = null_indices;
-    let descending = options.descending;
-    let mut len = values.len();
-    let nulls_len = nulls.len();
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1));
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse());
-        // reverse to keep a stable ordering
-        nulls.reverse();
-    }
-    // collect the order of valid tuplies
-    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
-
-    if options.nulls_first {
-        nulls.append(&mut valid_indices);
-        nulls.truncate(len);
-        return Ok(UInt32Array::from(nulls));
-    }
-
-    // no need to sort nulls as they are in the correct order already
-    valid_indices.append(&mut nulls);
-    valid_indices.truncate(len);
-    Ok(UInt32Array::from(valid_indices))
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn sort_list<S, T>(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    mut null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array>
-where
-    S: OffsetSizeTrait,
-    T: ArrowPrimitiveType,
-    T::Native: std::cmp::PartialOrd,
-{
-    let mut valids: Vec<(u32, ArrayRef)> = values
-        .as_any()
-        .downcast_ref::<FixedSizeListArray>()
-        .map_or_else(
-            || {
-                let values = as_generic_list_array::<S>(values);
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-            |values| {
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-        );
-
-    let mut len = values.len();
-    let nulls_len = null_indices.len();
-    let descending = options.descending;
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| {
-            cmp_array(a.1.as_ref(), b.1.as_ref())
-        });
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| {
-            cmp_array(a.1.as_ref(), b.1.as_ref()).reverse()
-        });
-        // reverse to keep a stable ordering
-        null_indices.reverse();
-    }
-
-    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
-    if options.nulls_first {
-        null_indices.append(&mut valid_indices);
-        null_indices.truncate(len);
-        return Ok(UInt32Array::from(null_indices));
-    }
-
-    valid_indices.append(&mut null_indices);
-    valid_indices.truncate(len);
-    Ok(UInt32Array::from(valid_indices))
-}
-
-/// Compare two `Array`s based on the ordering defined in [ord](crate::array::ord).
-fn cmp_array(a: &Array, b: &Array) -> Ordering {
-    let cmp_op = build_compare(a, b).unwrap();
-    let length = a.len().max(b.len());
-
-    for i in 0..length {
-        let result = cmp_op(i, i);
-        if result != Ordering::Equal {
-            return result;
-        }
-    }
-    Ordering::Equal
-}
-
-/// One column to be used in lexicographical sort
-#[derive(Clone, Debug)]
-pub struct SortColumn {
-    pub values: ArrayRef,
-    pub options: Option<SortOptions>,
-}
-
-/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
-///
-/// Performs a stable lexicographical sort on values and indices.
-///
-/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
-/// `lexsort_to_indices` or `take`.
-///
-/// Example:
-///
-/// ```
-/// use std::convert::From;
-/// use std::sync::Arc;
-/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray, as_primitive_array};
-/// use arrow::compute::kernels::sort::{SortColumn, SortOptions, lexsort};
-/// use arrow::datatypes::Int64Type;
-///
-/// let sorted_columns = lexsort(&vec![
-///     SortColumn {
-///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-///             None,
-///             Some(-2),
-///             Some(89),
-///             Some(-64),
-///             Some(101),
-///         ])) as ArrayRef,
-///         options: None,
-///     },
-///     SortColumn {
-///         values: Arc::new(StringArray::from(vec![
-///             Some("hello"),
-///             Some("world"),
-///             Some(","),
-///             Some("foobar"),
-///             Some("!"),
-///         ])) as ArrayRef,
-///         options: Some(SortOptions {
-///             descending: true,
-///             nulls_first: false,
-///         }),
-///     },
-/// ], None).unwrap();
-///
-/// assert_eq!(as_primitive_array::<Int64Type>(&sorted_columns[0]).value(1), -64);
-/// assert!(sorted_columns[0].is_null(0));
-/// ```
-pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>> {
-    let indices = lexsort_to_indices(columns, limit)?;
-    columns
-        .iter()
-        .map(|c| take(c.values.as_ref(), &indices, None))
-        .collect()
-}
-
-/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
-/// (`UInt32Array`) of indices.
-pub fn lexsort_to_indices(
-    columns: &[SortColumn],
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    if columns.is_empty() {
-        return Err(ArrowError::InvalidArgumentError(
-            "Sort requires at least one column".to_string(),
-        ));
-    }
-    if columns.len() == 1 {
-        // fallback to non-lexical sort
-        let column = &columns[0];
-        return sort_to_indices(&column.values, column.options, limit);
-    }
-
-    let row_count = columns[0].values.len();
-    if columns.iter().any(|item| item.values.len() != row_count) {
-        return Err(ArrowError::ComputeError(
-            "lexical sort columns have different row counts".to_string(),
-        ));
-    };
-
-    // map to data and DynComparator
-    let flat_columns = columns
-        .iter()
-        .map(
-            |column| -> Result<(&ArrayData, DynComparator, SortOptions)> {
-                // flatten and convert build comparators
-                // use ArrayData for is_valid checks later to avoid dynamic call
-                let values = column.values.as_ref();
-                let data = values.data_ref();
-                Ok((
-                    data,
-                    build_compare(values, values)?,
-                    column.options.unwrap_or_default(),
-                ))
-            },
-        )
-        .collect::<Result<Vec<(&ArrayData, DynComparator, SortOptions)>>>()?;
-
-    let lex_comparator = |a_idx: &usize, b_idx: &usize| -> Ordering {
-        for (data, comparator, sort_option) in flat_columns.iter() {
-            match (data.is_valid(*a_idx), data.is_valid(*b_idx)) {
-                (true, true) => {
-                    match (comparator)(*a_idx, *b_idx) {
-                        // equal, move on to next column
-                        Ordering::Equal => continue,
-                        order => {
-                            if sort_option.descending {
-                                return order.reverse();
-                            } else {
-                                return order;
-                            }
-                        }
-                    }
-                }
-                (false, true) => {
-                    return if sort_option.nulls_first {
-                        Ordering::Less
-                    } else {
-                        Ordering::Greater
-                    };
-                }
-                (true, false) => {
-                    return if sort_option.nulls_first {
-                        Ordering::Greater
-                    } else {
-                        Ordering::Less
-                    };
-                }
-                // equal, move on to next column
-                (false, false) => continue,
-            }
-        }
-
-        Ordering::Equal
-    };
-
-    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
-    let mut len = value_indices.len();
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    sort_by(&mut value_indices, len, lex_comparator);
-
-    Ok(UInt32Array::from(
-        (&value_indices)[0..len]
-            .iter()
-            .map(|i| *i as u32)
-            .collect::<Vec<u32>>(),
-    ))
-}
-
-/// It's unstable_sort, may not preserve the order of equal elements
-pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
-where
-    F: FnMut(&T, &T) -> Ordering,
-{
-    let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less);
-    before.sort_unstable_by(is_less);
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::compute::util::tests::{
-        build_fixed_size_list_nullable, build_generic_list_nullable,
-    };
-    use rand::rngs::StdRng;
-    use rand::{Rng, RngCore, SeedableRng};
-    use std::convert::TryFrom;
-    use std::iter::FromIterator;
-    use std::sync::Arc;
-
-    fn test_sort_to_indices_boolean_arrays(
-        data: Vec<Option<bool>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<u32>,
-    ) {
-        let output = BooleanArray::from(data);
-        let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
-        assert_eq!(output, expected)
-    }
-
-    fn test_sort_to_indices_primitive_arrays<T>(
-        data: Vec<Option<T::Native>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<u32>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
-        assert_eq!(output, expected)
-    }
-
-    fn test_sort_primitive_arrays<T>(
-        data: Vec<Option<T::Native>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<T::Native>>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
-        let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
-            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
-        };
-        assert_eq!(&output, &expected)
-    }
-
-    fn test_sort_to_indices_string_arrays(
-        data: Vec<Option<&str>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<u32>,
-    ) {
-        let output = StringArray::from(data);
-        let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
-        assert_eq!(output, expected)
-    }
-
-    fn test_sort_string_arrays(
-        data: Vec<Option<&str>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<&str>>,
-    ) {
-        let output = StringArray::from(data);
-        let expected = Arc::new(StringArray::from(expected_data)) as ArrayRef;
-        let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
-            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
-        };
-        assert_eq!(&output, &expected)
-    }
-
-    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
-        data: Vec<Option<&str>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<&str>>,
-    ) {
-        let array = DictionaryArray::<T>::from_iter(data.into_iter());
-        let array_values = array.values();
-        let dict = array_values
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .expect("Unable to get dictionary values");
-
-        let sorted = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap()
-            }
-            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
-        };
-        let sorted = sorted
-            .as_any()
-            .downcast_ref::<DictionaryArray<T>>()
-            .unwrap();
-        let sorted_values = sorted.values();
-        let sorted_dict = sorted_values
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .expect("Unable to get dictionary values");
-        let sorted_keys = sorted.keys_array();
-
-        assert_eq!(sorted_dict, dict);
-
-        let sorted_strings = StringArray::try_from(
-            (0..sorted.len())
-                .map(|i| {
-                    if sorted.is_valid(i) {
-                        Some(sorted_dict.value(sorted_keys.value(i).to_usize().unwrap()))
-                    } else {
-                        None
-                    }
-                })
-                .collect::<Vec<Option<&str>>>(),
-        )
-        .expect("Unable to create string array from dictionary");
-        let expected =
-            StringArray::try_from(expected_data).expect("Unable to create string array");
-
-        assert_eq!(sorted_strings, expected)
-    }
-
-    fn test_sort_list_arrays<T>(
-        data: Vec<Option<Vec<Option<T::Native>>>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
-        fixed_length: Option<i32>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        // for FixedSizedList
-        if let Some(length) = fixed_length {
-            let input = Arc::new(build_fixed_size_list_nullable(data.clone(), length));
-            let sorted = match limit {
-                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
-                _ => sort(&(input as ArrayRef), options).unwrap(),
-            };
-            let expected = Arc::new(build_fixed_size_list_nullable(
-                expected_data.clone(),
-                length,
-            )) as ArrayRef;
-
-            assert_eq!(&sorted, &expected);
-        }
-
-        // for List
-        let input = Arc::new(build_generic_list_nullable::<i32, T>(data.clone()));
-        let sorted = match limit {
-            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
-            _ => sort(&(input as ArrayRef), options).unwrap(),
-        };
-        let expected =
-            Arc::new(build_generic_list_nullable::<i32, T>(expected_data.clone()))
-                as ArrayRef;
-
-        assert_eq!(&sorted, &expected);
-
-        // for LargeList
-        let input = Arc::new(build_generic_list_nullable::<i64, T>(data));
-        let sorted = match limit {
-            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
-            _ => sort(&(input as ArrayRef), options).unwrap(),
-        };
-        let expected =
-            Arc::new(build_generic_list_nullable::<i64, T>(expected_data)) as ArrayRef;
-
-        assert_eq!(&sorted, &expected);
-    }
-
-    fn test_lex_sort_arrays(
-        input: Vec<SortColumn>,
-        expected_output: Vec<ArrayRef>,
-        limit: Option<usize>,
-    ) {
-        let sorted = lexsort(&input, limit).unwrap();
-
-        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
-            assert_eq!(result, expected);
-        }
-    }
-
-    #[test]
-    fn test_sort_to_indices_primitives() {
-        test_sort_to_indices_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Float32Type>(
-            vec![
-                None,
-                Some(-0.05),
-                Some(2.225),
-                Some(-1.01),
-                Some(-0.05),
-                None,
-            ],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Float64Type>(
-            vec![
-                None,
-                Some(-0.05),
-                Some(2.225),
-                Some(-1.01),
-                Some(-0.05),
-                None,
-            ],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-
-        // descending
-        test_sort_to_indices_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0], // [2, 4, 1, 3, 5, 0]
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float32Type>(
-            vec![
-                None,
-                Some(0.005),
-                Some(20.22),
-                Some(-10.3),
-                Some(0.005),
-                None,
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float64Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        // descending, nulls first
-        test_sort_to_indices_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float32Type>(
-            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float64Type>(
-            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-    }
-
-    #[test]
-    fn test_sort_boolean() {
-        // boolean
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            None,
-            None,
-            vec![0, 5, 1, 4, 2, 3],
-        );
-
-        // boolean, descending
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 3, 1, 4, 5, 0],
-        );
-
-        // boolean, descending, nulls first
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 3, 1, 4],
-        );
-
-        // boolean, descending, nulls first, limit
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![5, 0, 2],
-        );
-    }
-
-    #[test]
-    fn test_sort_primitives() {
-        // default case
-        test_sort_primitive_arrays::<UInt8Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-        test_sort_primitive_arrays::<UInt16Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-        test_sort_primitive_arrays::<UInt32Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-        test_sort_primitive_arrays::<UInt64Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-
-        // descending
-        test_sort_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-        test_sort_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-
-        // descending, nulls first
-        test_sort_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-        test_sort_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-        test_sort_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-
-        test_sort_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![None, None, Some(2)],
-        );
-
-        test_sort_primitive_arrays::<Float32Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-        );
-
-        // int8 nulls first
-        test_sort_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Float32Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
-        );
-
-        // limit
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            Some(2),
-            vec![Some(1.0), Some(f64::NAN)],
-        );
-
-        // limit with actual value
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![Some(1.0), Some(2.0), Some(3.0)],
-        );
-    }
-
-    #[test]
-    fn test_sort_to_indices_strings() {
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            None,
-            None,
-            vec![0, 3, 5, 1, 4, 2],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 4, 1, 5, 3, 0],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![0, 3, 5, 1, 4, 2],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![3, 0, 2, 4, 1, 5],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![3, 0, 2],
-        );
-    }
-
-    #[test]
-    fn test_sort_strings() {
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            None,
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-                None,
-                None,
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![None, None, Some("sad")],
-        );
-    }
-
-    #[test]
-    fn test_sort_string_dicts() {
-        test_sort_string_dict_arrays::<Int8Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            None,
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int16Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-                None,
-                None,
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int32Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int16Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int16Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![None, None, Some("sad")],
-        );
-    }
-
-    #[test]
-    fn test_sort_list() {
-        test_sort_list_arrays::<Int8Type>(
-            vec![
-                Some(vec![Some(1)]),
-                Some(vec![Some(4)]),
-                Some(vec![Some(2)]),
-                Some(vec![Some(3)]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some(vec![Some(1)]),
-                Some(vec![Some(2)]),
-                Some(vec![Some(3)]),
-                Some(vec![Some(4)]),
-            ],
-            Some(1),
-        );
-
-        test_sort_list_arrays::<Int32Type>(
-            vec![
-                Some(vec![Some(1), Some(0)]),
-                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
-                Some(vec![Some(1), Some(1)]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some(vec![Some(1), Some(0)]),
-                Some(vec![Some(1), Some(1)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
-                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
-            ],
-            None,
-        );
-
-        test_sort_list_arrays::<Int32Type>(
-            vec![
-                None,
-                Some(vec![Some(4), None, Some(2)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                None,
-                Some(vec![Some(3), Some(3), None]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), None]),
-                Some(vec![Some(4), None, Some(2)]),
-                None,
-                None,
-            ],
-            Some(3),
-        );
-
-        test_sort_list_arrays::<Int32Type>(
-            vec![
-                Some(vec![Some(1), Some(0)]),
-                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
-                Some(vec![Some(1), Some(1)]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            Some(2),
-            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
-            None,
-        );
-    }
-
-    #[test]
-    fn test_lex_sort_single_column() {
-        let input = vec![SortColumn {
-            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(17),
-                Some(2),
-                Some(-1),
-                Some(0),
-            ])) as ArrayRef,
-            options: None,
-        }];
-        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(0),
-            Some(2),
-            Some(17),
-        ])) as ArrayRef];
-        test_lex_sort_arrays(input.clone(), expected, None);
-
-        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(0),
-            Some(2),
-        ])) as ArrayRef];
-        test_lex_sort_arrays(input, expected, Some(3));
-    }
-
-    #[test]
-    fn test_lex_sort_unaligned_rows() {
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
-                    as ArrayRef,
-                options: None,
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
-                options: None,
-            },
-        ];
-        assert!(
-            lexsort(&input, None).is_err(),
-            "lexsort should reject columns with different row counts"
-        );
-    }
-
-    #[test]
-    fn test_lex_sort_mixed_types() {
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    Some(0),
-                    Some(2),
-                    Some(-1),
-                    Some(0),
-                ])) as ArrayRef,
-                options: None,
-            },
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
-                    Some(101),
-                    Some(8),
-                    Some(7),
-                    Some(102),
-                ])) as ArrayRef,
-                options: None,
-            },
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    Some(-1),
-                    Some(-2),
-                    Some(-3),
-                    Some(-4),
-                ])) as ArrayRef,
-                options: None,
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(0),
-                Some(0),
-                Some(2),
-            ])) as ArrayRef,
-            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
-                Some(7),
-                Some(101),
-                Some(102),
-                Some(8),
-            ])) as ArrayRef,
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-3),
-                Some(-1),
-                Some(-4),
-                Some(-2),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test mix of string and in64 with option
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    Some(0),
-                    Some(2),
-                    Some(-1),
-                    Some(0),
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("9"),
-                    Some("7"),
-                    Some("bar"),
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(2),
-                Some(0),
-                Some(0),
-                Some(-1),
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                Some("9"),
-                Some("foo"),
-                Some("bar"),
-                Some("7"),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test sort with nulls first
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    None,
-                    Some(-1),
-                    Some(2),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("world"),
-                    Some("hello"),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                None,
-                None,
-                Some(2),
-                Some(-1),
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                None,
-                Some("foo"),
-                Some("hello"),
-                Some("world"),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test sort with nulls last
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    None,
-                    Some(-1),
-                    Some(2),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("world"),
-                    Some("hello"),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(2),
-                Some(-1),
-                None,
-                None,
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                Some("hello"),
-                Some("world"),
-                Some("foo"),
-                None,
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test sort with opposite options
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    None,
-                    Some(-1),
-                    Some(2),
-                    Some(-1),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: false,
-                    nulls_first: false,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("bar"),
-                    Some("world"),
-                    Some("hello"),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                Some("hello"),
-                Some("bar"),
-                Some("world"),
-                None,
-                Some("foo"),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-    }
-
-    #[test]
-    fn test_partial_sort() {
-        let mut before: Vec<&str> = vec![
-            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
-        ];
-        let mut d = before.clone();
-        d.sort_unstable();
-
-        for last in 0..before.len() {
-            partial_sort(&mut before, last, |a, b| a.cmp(b));
-            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
-        }
-    }
-
-    #[test]
-    fn test_partial_rand_sort() {
-        let size = 1000u32;
-        let mut rng = StdRng::seed_from_u64(42);
-        let mut before: Vec<u32> = (0..size).map(|_| rng.gen::<u32>()).collect();
-        let mut d = before.clone();
-        let last = (rng.next_u32() % size) as usize;
-        d.sort_unstable();
-
-        partial_sort(&mut before, last, |a, b| a.cmp(b));
-        assert_eq!(&d[0..last], &before[0..last]);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/substring.rs b/rust/arrow/src/compute/kernels/substring.rs
deleted file mode 100644
index d9956b89687..00000000000
--- a/rust/arrow/src/compute/kernels/substring.rs
+++ /dev/null
@@ -1,269 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernel to extract a substring of a \[Large\]StringArray
-
-use crate::{array::*, buffer::Buffer};
-use crate::{
-    datatypes::DataType,
-    error::{ArrowError, Result},
-};
-
-#[allow(clippy::unnecessary_wraps)]
-fn generic_substring<OffsetSize: StringOffsetSizeTrait>(
-    array: &GenericStringArray<OffsetSize>,
-    start: OffsetSize,
-    length: &Option<OffsetSize>,
-) -> Result<ArrayRef> {
-    // compute current offsets
-    let offsets = array.data_ref().clone().buffers()[0].clone();
-    let offsets: &[OffsetSize] = unsafe { offsets.typed_data::<OffsetSize>() };
-
-    // compute null bitmap (copy)
-    let null_bit_buffer = array.data_ref().null_buffer().cloned();
-
-    // compute values
-    let values = &array.data_ref().buffers()[1];
-    let data = values.as_slice();
-
-    let mut new_values = Vec::new(); // we have no way to estimate how much this will be.
-    let mut new_offsets: Vec<OffsetSize> = Vec::with_capacity(array.len() + 1);
-
-    let mut length_so_far = OffsetSize::zero();
-    new_offsets.push(length_so_far);
-    (0..array.len()).for_each(|i| {
-        // the length of this entry
-        let length_i: OffsetSize = offsets[i + 1] - offsets[i];
-        // compute where we should start slicing this entry
-        let start = offsets[i]
-            + if start >= OffsetSize::zero() {
-                start
-            } else {
-                length_i + start
-            };
-
-        let start = start.max(offsets[i]).min(offsets[i + 1]);
-        // compute the length of the slice
-        let length: OffsetSize = length
-            .unwrap_or(length_i)
-            // .max(0) is not needed as it is guaranteed
-            .min(offsets[i + 1] - start); // so we do not go beyond this entry
-
-        length_so_far += length;
-
-        new_offsets.push(length_so_far);
-
-        // we need usize for ranges
-        let start = start.to_usize().unwrap();
-        let length = length.to_usize().unwrap();
-
-        new_values.extend_from_slice(&data[start..start + length]);
-    });
-
-    let data = ArrayData::new(
-        <OffsetSize as StringOffsetSizeTrait>::DATA_TYPE,
-        array.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![
-            Buffer::from_slice_ref(&new_offsets),
-            Buffer::from_slice_ref(&new_values),
-        ],
-        vec![],
-    );
-    Ok(make_array(data))
-}
-
-/// Returns an ArrayRef with a substring starting from `start` and with optional length `length` of each of the elements in `array`.
-/// `start` can be negative, in which case the start counts from the end of the string.
-/// this function errors when the passed array is not a \[Large\]String array.
-pub fn substring(array: &Array, start: i64, length: &Option<u64>) -> Result<ArrayRef> {
-    match array.data_type() {
-        DataType::LargeUtf8 => generic_substring(
-            array
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .expect("A large string is expected"),
-            start,
-            &length.map(|e| e as i64),
-        ),
-        DataType::Utf8 => generic_substring(
-            array
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .expect("A string is expected"),
-            start as i32,
-            &length.map(|e| e as i32),
-        ),
-        _ => Err(ArrowError::ComputeError(format!(
-            "substring does not support type {:?}",
-            array.data_type()
-        ))),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn with_nulls<T: 'static + Array + PartialEq + From<Vec<Option<&'static str>>>>(
-    ) -> Result<()> {
-        let cases = vec![
-            // identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                None,
-                vec![Some("hello"), None, Some("word")],
-            ),
-            // 0 length -> Nothing
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                Some(0),
-                vec![Some(""), None, Some("")],
-            ),
-            // high start -> Nothing
-            (
-                vec![Some("hello"), None, Some("word")],
-                1000,
-                Some(0),
-                vec![Some(""), None, Some("")],
-            ),
-            // high negative start -> identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                -1000,
-                None,
-                vec![Some("hello"), None, Some("word")],
-            ),
-            // high length -> identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                Some(1000),
-                vec![Some("hello"), None, Some("word")],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = T::from(array);
-                let result: ArrayRef = substring(&array, start, &length)?;
-                assert_eq!(array.len(), result.len());
-
-                let result = result.as_any().downcast_ref::<T>().unwrap();
-                let expected = T::from(expected);
-                assert_eq!(&expected, result);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn with_nulls_string() -> Result<()> {
-        with_nulls::<StringArray>()
-    }
-
-    #[test]
-    fn with_nulls_large_string() -> Result<()> {
-        with_nulls::<LargeStringArray>()
-    }
-
-    fn without_nulls<T: 'static + Array + PartialEq + From<Vec<Option<&'static str>>>>(
-    ) -> Result<()> {
-        let cases = vec![
-            // increase start
-            (
-                vec!["hello", "", "word"],
-                0,
-                None,
-                vec!["hello", "", "word"],
-            ),
-            (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]),
-            (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]),
-            (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]),
-            (vec!["hello", "", "word"], 10, None, vec!["", "", ""]),
-            // increase start negatively
-            (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]),
-            (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]),
-            (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]),
-            (
-                vec!["hello", "", "word"],
-                -10,
-                None,
-                vec!["hello", "", "word"],
-            ),
-            // increase length
-            (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]),
-            (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]),
-            (
-                vec!["hello", "", "word"],
-                1,
-                Some(3),
-                vec!["ell", "", "ord"],
-            ),
-            (
-                vec!["hello", "", "word"],
-                1,
-                Some(4),
-                vec!["ello", "", "ord"],
-            ),
-            (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]),
-            (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]),
-            (
-                vec!["hello", "", "word"],
-                -3,
-                Some(3),
-                vec!["llo", "", "ord"],
-            ),
-            (
-                vec!["hello", "", "word"],
-                -3,
-                Some(4),
-                vec!["llo", "", "ord"],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = StringArray::from(array);
-                let result = substring(&array, start, &length)?;
-                assert_eq!(array.len(), result.len());
-                let result = result.as_any().downcast_ref::<StringArray>().unwrap();
-                let expected = StringArray::from(expected);
-                assert_eq!(&expected, result,);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn without_nulls_string() -> Result<()> {
-        without_nulls::<StringArray>()
-    }
-
-    #[test]
-    fn without_nulls_large_string() -> Result<()> {
-        without_nulls::<LargeStringArray>()
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs
deleted file mode 100644
index 0217573dc5d..00000000000
--- a/rust/arrow/src/compute/kernels/take.rs
+++ /dev/null
@@ -1,1621 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines take kernel for [Array]
-
-use std::{ops::AddAssign, sync::Arc};
-
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::compute::util::{
-    take_value_indices_from_fixed_size_list, take_value_indices_from_list,
-};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-use crate::{array::*, buffer::buffer_bin_and};
-
-use num::{ToPrimitive, Zero};
-use TimeUnit::*;
-
-macro_rules! downcast_take {
-    ($type: ty, $values: expr, $indices: expr) => {{
-        let values = $values
-            .as_any()
-            .downcast_ref::<PrimitiveArray<$type>>()
-            .expect("Unable to downcast to a primitive array");
-        Ok(Arc::new(take_primitive::<$type, _>(&values, $indices)?))
-    }};
-}
-
-macro_rules! downcast_dict_take {
-    ($type: ty, $values: expr, $indices: expr) => {{
-        let values = $values
-            .as_any()
-            .downcast_ref::<DictionaryArray<$type>>()
-            .expect("Unable to downcast to a dictionary array");
-        Ok(Arc::new(take_dict::<$type, _>(values, $indices)?))
-    }};
-}
-
-/// Take elements by index from [Array], creating a new [Array] from those indexes.
-///
-/// # Errors
-/// This function errors whenever:
-/// * An index cannot be casted to `usize` (typically 32 bit architectures)
-/// * An index is out of bounds and `options` is set to check bounds.
-/// # Safety
-/// When `options` is not set to check bounds (default), taking indexes after `len` is undefined behavior.
-/// # Examples
-/// ```
-/// use arrow::array::{StringArray, UInt32Array};
-/// use arrow::error::Result;
-/// use arrow::compute::take;
-/// # fn main() -> Result<()> {
-/// let values = StringArray::from(vec!["zero", "one", "two"]);
-///
-/// // Take items at index 2, and 1:
-/// let indices = UInt32Array::from(vec![2, 1]);
-/// let taken = take(&values, &indices, None)?;
-/// let taken = taken.as_any().downcast_ref::<StringArray>().unwrap();
-///
-/// assert_eq!(*taken, StringArray::from(vec!["two", "one"]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn take<IndexType>(
-    values: &Array,
-    indices: &PrimitiveArray<IndexType>,
-    options: Option<TakeOptions>,
-) -> Result<ArrayRef>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    take_impl(values, indices, options)
-}
-
-fn take_impl<IndexType>(
-    values: &Array,
-    indices: &PrimitiveArray<IndexType>,
-    options: Option<TakeOptions>,
-) -> Result<ArrayRef>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let options = options.unwrap_or_default();
-    if options.check_bounds {
-        let len = values.len();
-        for i in 0..indices.len() {
-            if indices.is_valid(i) {
-                let ix = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                    ArrowError::ComputeError("Cast to usize failed".to_string())
-                })?;
-                if ix >= len {
-                    return Err(ArrowError::ComputeError(
-                    format!("Array index out of bounds, cannot get item at index {} from {} entries", ix, len))
-                );
-                }
-            }
-        }
-    }
-    match values.data_type() {
-        DataType::Boolean => {
-            let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();
-            Ok(Arc::new(take_boolean(values, indices)?))
-        }
-        DataType::Int8 => downcast_take!(Int8Type, values, indices),
-        DataType::Int16 => downcast_take!(Int16Type, values, indices),
-        DataType::Int32 => downcast_take!(Int32Type, values, indices),
-        DataType::Int64 => downcast_take!(Int64Type, values, indices),
-        DataType::UInt8 => downcast_take!(UInt8Type, values, indices),
-        DataType::UInt16 => downcast_take!(UInt16Type, values, indices),
-        DataType::UInt32 => downcast_take!(UInt32Type, values, indices),
-        DataType::UInt64 => downcast_take!(UInt64Type, values, indices),
-        DataType::Float32 => downcast_take!(Float32Type, values, indices),
-        DataType::Float64 => downcast_take!(Float64Type, values, indices),
-        DataType::Date32 => downcast_take!(Date32Type, values, indices),
-        DataType::Date64 => downcast_take!(Date64Type, values, indices),
-        DataType::Time32(Second) => downcast_take!(Time32SecondType, values, indices),
-        DataType::Time32(Millisecond) => {
-            downcast_take!(Time32MillisecondType, values, indices)
-        }
-        DataType::Time64(Microsecond) => {
-            downcast_take!(Time64MicrosecondType, values, indices)
-        }
-        DataType::Time64(Nanosecond) => {
-            downcast_take!(Time64NanosecondType, values, indices)
-        }
-        DataType::Timestamp(Second, _) => {
-            downcast_take!(TimestampSecondType, values, indices)
-        }
-        DataType::Timestamp(Millisecond, _) => {
-            downcast_take!(TimestampMillisecondType, values, indices)
-        }
-        DataType::Timestamp(Microsecond, _) => {
-            downcast_take!(TimestampMicrosecondType, values, indices)
-        }
-        DataType::Timestamp(Nanosecond, _) => {
-            downcast_take!(TimestampNanosecondType, values, indices)
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            downcast_take!(IntervalYearMonthType, values, indices)
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            downcast_take!(IntervalDayTimeType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            downcast_take!(DurationSecondType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            downcast_take!(DurationMillisecondType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            downcast_take!(DurationMicrosecondType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            downcast_take!(DurationNanosecondType, values, indices)
-        }
-        DataType::Utf8 => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericStringArray<i32>>()
-                .unwrap();
-            Ok(Arc::new(take_string::<i32, _>(values, indices)?))
-        }
-        DataType::LargeUtf8 => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericStringArray<i64>>()
-                .unwrap();
-            Ok(Arc::new(take_string::<i64, _>(values, indices)?))
-        }
-        DataType::List(_) => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericListArray<i32>>()
-                .unwrap();
-            Ok(Arc::new(take_list::<_, Int32Type>(values, indices)?))
-        }
-        DataType::LargeList(_) => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericListArray<i64>>()
-                .unwrap();
-            Ok(Arc::new(take_list::<_, Int64Type>(values, indices)?))
-        }
-        DataType::FixedSizeList(_, length) => {
-            let values = values
-                .as_any()
-                .downcast_ref::<FixedSizeListArray>()
-                .unwrap();
-            Ok(Arc::new(take_fixed_size_list(
-                values,
-                indices,
-                *length as u32,
-            )?))
-        }
-        DataType::Struct(fields) => {
-            let struct_: &StructArray =
-                values.as_any().downcast_ref::<StructArray>().unwrap();
-            let arrays: Result<Vec<ArrayRef>> = struct_
-                .columns()
-                .iter()
-                .map(|a| take_impl(a.as_ref(), indices, Some(options.clone())))
-                .collect();
-            let arrays = arrays?;
-            let pairs: Vec<(Field, ArrayRef)> =
-                fields.clone().into_iter().zip(arrays).collect();
-            Ok(Arc::new(StructArray::from(pairs)) as ArrayRef)
-        }
-        DataType::Dictionary(key_type, _) => match key_type.as_ref() {
-            DataType::Int8 => downcast_dict_take!(Int8Type, values, indices),
-            DataType::Int16 => downcast_dict_take!(Int16Type, values, indices),
-            DataType::Int32 => downcast_dict_take!(Int32Type, values, indices),
-            DataType::Int64 => downcast_dict_take!(Int64Type, values, indices),
-            DataType::UInt8 => downcast_dict_take!(UInt8Type, values, indices),
-            DataType::UInt16 => downcast_dict_take!(UInt16Type, values, indices),
-            DataType::UInt32 => downcast_dict_take!(UInt32Type, values, indices),
-            DataType::UInt64 => downcast_dict_take!(UInt64Type, values, indices),
-            t => unimplemented!("Take not supported for dictionary key type {:?}", t),
-        },
-        t => unimplemented!("Take not supported for data type {:?}", t),
-    }
-}
-
-/// Options that define how `take` should behave
-#[derive(Clone, Debug)]
-pub struct TakeOptions {
-    /// Perform bounds check before taking indices from values.
-    /// If enabled, an `ArrowError` is returned if the indices are out of bounds.
-    /// If not enabled, and indices exceed bounds, the kernel will panic.
-    pub check_bounds: bool,
-}
-
-impl Default for TakeOptions {
-    fn default() -> Self {
-        Self {
-            check_bounds: false,
-        }
-    }
-}
-
-#[inline(always)]
-fn maybe_usize<I: ArrowPrimitiveType>(index: I::Native) -> Result<usize> {
-    index
-        .to_usize()
-        .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))
-}
-
-// take implementation when neither values nor indices contain nulls
-fn take_no_nulls<T, I>(
-    values: &[T::Native],
-    indices: &[I::Native],
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-{
-    let values = indices
-        .iter()
-        .map(|index| Result::Ok(values[maybe_usize::<I>(*index)?]));
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    Ok((buffer, None))
-}
-
-// take implementation when only values contain nulls
-fn take_values_nulls<T, I>(
-    values: &PrimitiveArray<T>,
-    indices: &[I::Native],
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut nulls = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    let null_slice = nulls.as_slice_mut();
-    let mut null_count = 0;
-
-    let values_values = values.values();
-
-    let values = indices.iter().enumerate().map(|(i, index)| {
-        let index = maybe_usize::<I>(*index)?;
-        if values.is_null(index) {
-            null_count += 1;
-            bit_util::unset_bit(null_slice, i);
-        }
-        Result::Ok(values_values[index])
-    });
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    let nulls = if null_count == 0 {
-        // if only non-null values were taken
-        None
-    } else {
-        Some(nulls.into())
-    };
-
-    Ok((buffer, nulls))
-}
-
-// take implementation when only indices contain nulls
-fn take_indices_nulls<T, I>(
-    values: &[T::Native],
-    indices: &PrimitiveArray<I>,
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let values = indices.values().iter().map(|index| {
-        let index = maybe_usize::<I>(*index)?;
-        Result::Ok(match values.get(index) {
-            Some(value) => *value,
-            None => {
-                if indices.is_null(index) {
-                    T::Native::default()
-                } else {
-                    panic!("Out-of-bounds index {}", index)
-                }
-            }
-        })
-    });
-
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    Ok((buffer, indices.data_ref().null_buffer().cloned()))
-}
-
-// take implementation when both values and indices contain nulls
-fn take_values_indices_nulls<T, I>(
-    values: &PrimitiveArray<T>,
-    indices: &PrimitiveArray<I>,
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut nulls = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    let null_slice = nulls.as_slice_mut();
-    let mut null_count = 0;
-
-    let values_values = values.values();
-    let values = indices.iter().enumerate().map(|(i, index)| match index {
-        Some(index) => {
-            let index = maybe_usize::<I>(index)?;
-            if values.is_null(index) {
-                null_count += 1;
-                bit_util::unset_bit(null_slice, i);
-            }
-            Result::Ok(values_values[index])
-        }
-        None => {
-            null_count += 1;
-            bit_util::unset_bit(null_slice, i);
-            Ok(T::Native::default())
-        }
-    });
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    let nulls = if null_count == 0 {
-        // if only non-null values were taken
-        None
-    } else {
-        Some(nulls.into())
-    };
-
-    Ok((buffer, nulls))
-}
-
-/// `take` implementation for all primitive arrays
-///
-/// This checks if an `indices` slot is populated, and gets the value from `values`
-///  as the populated index.
-/// If the `indices` slot is null, a null value is returned.
-/// For example, given:
-///     values:  [1, 2, 3, null, 5]
-///     indices: [0, null, 4, 3]
-/// The result is: [1 (slot 0), null (null slot), 5 (slot 4), null (slot 3)]
-fn take_primitive<T, I>(
-    values: &PrimitiveArray<T>,
-    indices: &PrimitiveArray<I>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let indices_has_nulls = indices.null_count() > 0;
-    let values_has_nulls = values.null_count() > 0;
-    // note: this function should only panic when "an index is not null and out of bounds".
-    // if the index is null, its value is undefined and therefore we should not read from it.
-
-    let (buffer, nulls) = match (values_has_nulls, indices_has_nulls) {
-        (false, false) => {
-            // * no nulls
-            // * all `indices.values()` are valid
-            take_no_nulls::<T, I>(values.values(), indices.values())?
-        }
-        (true, false) => {
-            // * nulls come from `values` alone
-            // * all `indices.values()` are valid
-            take_values_nulls::<T, I>(values, indices.values())?
-        }
-        (false, true) => {
-            // in this branch it is unsound to read and use `index.values()`,
-            // as doing so is UB when they come from a null slot.
-            take_indices_nulls::<T, I>(values.values(), indices)?
-        }
-        (true, true) => {
-            // in this branch it is unsound to read and use `index.values()`,
-            // as doing so is UB when they come from a null slot.
-            take_values_indices_nulls::<T, I>(values, indices)?
-        }
-    };
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        indices.len(),
-        None,
-        nulls,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// `take` implementation for boolean arrays
-fn take_boolean<IndexType>(
-    values: &BooleanArray,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<BooleanArray>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let data_len = indices.len();
-
-    let num_byte = bit_util::ceil(data_len, 8);
-    let mut val_buf = MutableBuffer::from_len_zeroed(num_byte);
-
-    let val_slice = val_buf.as_slice_mut();
-
-    let null_count = values.null_count();
-
-    let nulls;
-    if null_count == 0 {
-        (0..data_len).try_for_each::<_, Result<()>>(|i| {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if values.value(index) {
-                bit_util::set_bit(val_slice, i);
-            }
-
-            Ok(())
-        })?;
-
-        nulls = indices.data_ref().null_buffer().cloned();
-    } else {
-        let mut null_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, true);
-        let null_slice = null_buf.as_slice_mut();
-
-        (0..data_len).try_for_each::<_, Result<()>>(|i| {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if values.is_null(index) {
-                bit_util::unset_bit(null_slice, i);
-            } else if values.value(index) {
-                bit_util::set_bit(val_slice, i);
-            }
-
-            Ok(())
-        })?;
-
-        nulls = match indices.data_ref().null_buffer() {
-            Some(buffer) => Some(buffer_bin_and(
-                buffer,
-                0,
-                &null_buf.into(),
-                0,
-                indices.len(),
-            )),
-            None => Some(null_buf.into()),
-        };
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        indices.len(),
-        None,
-        nulls,
-        0,
-        vec![val_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// `take` implementation for string arrays
-fn take_string<OffsetSize, IndexType>(
-    array: &GenericStringArray<OffsetSize>,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<GenericStringArray<OffsetSize>>
-where
-    OffsetSize: Zero + AddAssign + StringOffsetSizeTrait,
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let data_len = indices.len();
-
-    let bytes_offset = (data_len + 1) * std::mem::size_of::<OffsetSize>();
-    let mut offsets_buffer = MutableBuffer::from_len_zeroed(bytes_offset);
-
-    let offsets = offsets_buffer.typed_data_mut();
-    let mut values = MutableBuffer::new(0);
-    let mut length_so_far = OffsetSize::zero();
-    offsets[0] = length_so_far;
-
-    let nulls;
-    if array.null_count() == 0 && indices.null_count() == 0 {
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            let s = array.value(index);
-
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            values.extend_from_slice(s.as_bytes());
-            *offset = length_so_far;
-        }
-        nulls = None
-    } else if indices.null_count() == 0 {
-        let num_bytes = bit_util::ceil(data_len, 8);
-
-        let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        let null_slice = null_buf.as_slice_mut();
-
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if array.is_valid(index) {
-                let s = array.value(index);
-
-                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                values.extend_from_slice(s.as_bytes());
-            } else {
-                bit_util::unset_bit(null_slice, i);
-            }
-            *offset = length_so_far;
-        }
-        nulls = Some(null_buf.into());
-    } else if array.null_count() == 0 {
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            if indices.is_valid(i) {
-                let index =
-                    ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                        ArrowError::ComputeError("Cast to usize failed".to_string())
-                    })?;
-
-                let s = array.value(index);
-
-                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                values.extend_from_slice(s.as_bytes());
-            }
-            *offset = length_so_far;
-        }
-        nulls = indices.data_ref().null_buffer().cloned();
-    } else {
-        let num_bytes = bit_util::ceil(data_len, 8);
-
-        let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        let null_slice = null_buf.as_slice_mut();
-
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if array.is_valid(index) && indices.is_valid(i) {
-                let s = array.value(index);
-
-                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                values.extend_from_slice(s.as_bytes());
-            } else {
-                // set null bit
-                bit_util::unset_bit(null_slice, i);
-            }
-            *offset = length_so_far;
-        }
-
-        nulls = match indices.data_ref().null_buffer() {
-            Some(buffer) => {
-                Some(buffer_bin_and(buffer, 0, &null_buf.into(), 0, data_len))
-            }
-            None => Some(null_buf.into()),
-        };
-    }
-
-    let mut data = ArrayData::builder(<OffsetSize as StringOffsetSizeTrait>::DATA_TYPE)
-        .len(data_len)
-        .add_buffer(offsets_buffer.into())
-        .add_buffer(values.into());
-    if let Some(null_buffer) = nulls {
-        data = data.null_bit_buffer(null_buffer);
-    }
-    Ok(GenericStringArray::<OffsetSize>::from(data.build()))
-}
-
-/// `take` implementation for list arrays
-///
-/// Calculates the index and indexed offset for the inner array,
-/// applying `take` on the inner array, then reconstructing a list array
-/// with the indexed offsets
-fn take_list<IndexType, OffsetType>(
-    values: &GenericListArray<OffsetType::Native>,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<GenericListArray<OffsetType::Native>>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-    OffsetType: ArrowNumericType,
-    OffsetType::Native: ToPrimitive + OffsetSizeTrait,
-    PrimitiveArray<OffsetType>: From<Vec<Option<OffsetType::Native>>>,
-{
-    // TODO: Some optimizations can be done here such as if it is
-    // taking the whole list or a contiguous sublist
-    let (list_indices, offsets) =
-        take_value_indices_from_list::<IndexType, OffsetType>(values, indices)?;
-
-    let taken = take_impl::<OffsetType>(values.values().as_ref(), &list_indices, None)?;
-    // determine null count and null buffer, which are a function of `values` and `indices`
-    let mut null_count = 0;
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    {
-        let null_slice = null_buf.as_slice_mut();
-        offsets[..].windows(2).enumerate().for_each(
-            |(i, window): (usize, &[OffsetType::Native])| {
-                if window[0] == window[1] {
-                    // offsets are equal, slot is null
-                    bit_util::unset_bit(null_slice, i);
-                    null_count += 1;
-                }
-            },
-        );
-    }
-    let value_offsets = Buffer::from_slice_ref(&offsets);
-    // create a new list with taken data and computed null information
-    let list_data = ArrayDataBuilder::new(values.data_type().clone())
-        .len(indices.len())
-        .null_bit_buffer(null_buf.into())
-        .offset(0)
-        .add_child_data(taken.data().clone())
-        .add_buffer(value_offsets)
-        .build();
-    Ok(GenericListArray::<OffsetType::Native>::from(list_data))
-}
-
-/// `take` implementation for `FixedSizeListArray`
-///
-/// Calculates the index and indexed offset for the inner array,
-/// applying `take` on the inner array, then reconstructing a list array
-/// with the indexed offsets
-fn take_fixed_size_list<IndexType>(
-    values: &FixedSizeListArray,
-    indices: &PrimitiveArray<IndexType>,
-    length: <UInt32Type as ArrowPrimitiveType>::Native,
-) -> Result<FixedSizeListArray>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let list_indices = take_value_indices_from_fixed_size_list(values, indices, length)?;
-    let taken = take_impl::<UInt32Type>(values.values().as_ref(), &list_indices, None)?;
-
-    // determine null count and null buffer, which are a function of `values` and `indices`
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    let null_slice = null_buf.as_slice_mut();
-
-    for i in 0..indices.len() {
-        let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-            ArrowError::ComputeError("Cast to usize failed".to_string())
-        })?;
-        if !indices.is_valid(i) || values.is_null(index) {
-            bit_util::unset_bit(null_slice, i);
-        }
-    }
-
-    let list_data = ArrayDataBuilder::new(values.data_type().clone())
-        .len(indices.len())
-        .null_bit_buffer(null_buf.into())
-        .offset(0)
-        .add_child_data(taken.data().clone())
-        .build();
-
-    Ok(FixedSizeListArray::from(list_data))
-}
-
-/// `take` implementation for dictionary arrays
-///
-/// applies `take` to the keys of the dictionary array and returns a new dictionary array
-/// with the same dictionary values and reordered keys
-fn take_dict<T, I>(
-    values: &DictionaryArray<T>,
-    indices: &PrimitiveArray<I>,
-) -> Result<DictionaryArray<T>>
-where
-    T: ArrowPrimitiveType,
-    T::Native: num::Num,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let new_keys = take_primitive::<T, I>(&values.keys_array(), indices)?;
-    let new_keys_data = new_keys.data_ref();
-
-    let data = ArrayData::new(
-        values.data_type().clone(),
-        new_keys.len(),
-        Some(new_keys_data.null_count()),
-        new_keys_data.null_buffer().cloned(),
-        0,
-        new_keys_data.buffers().to_vec(),
-        values.data().child_data().to_vec(),
-    );
-
-    Ok(DictionaryArray::<T>::from(data))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::compute::util::tests::build_fixed_size_list_nullable;
-
-    fn test_take_boolean_arrays(
-        data: Vec<Option<bool>>,
-        index: &UInt32Array,
-        options: Option<TakeOptions>,
-        expected_data: Vec<Option<bool>>,
-    ) {
-        let output = BooleanArray::from(data);
-        let expected = Arc::new(BooleanArray::from(expected_data)) as ArrayRef;
-        let output = take(&output, index, options).unwrap();
-        assert_eq!(&output, &expected)
-    }
-
-    fn test_take_primitive_arrays<T>(
-        data: Vec<Option<T::Native>>,
-        index: &UInt32Array,
-        options: Option<TakeOptions>,
-        expected_data: Vec<Option<T::Native>>,
-    ) -> Result<()>
-    where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
-        let output = take(&output, index, options)?;
-        assert_eq!(&output, &expected);
-        Ok(())
-    }
-
-    fn test_take_impl_primitive_arrays<T, I>(
-        data: Vec<Option<T::Native>>,
-        index: &PrimitiveArray<I>,
-        options: Option<TakeOptions>,
-        expected_data: Vec<Option<T::Native>>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-        I: ArrowNumericType,
-        I::Native: ToPrimitive,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = PrimitiveArray::<T>::from(expected_data);
-        let output = take_impl(&output, index, options).unwrap();
-        let output = output.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-        assert_eq!(output, &expected)
-    }
-
-    // create a simple struct for testing purposes
-    fn create_test_struct() -> StructArray {
-        let boolean_data = BooleanArray::from(vec![true, false, false, true])
-            .data()
-            .clone();
-        let int_data = Int32Array::from(vec![42, 28, 19, 31]).data().clone();
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, true));
-        field_types.push(Field::new("b", DataType::Int32, true));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(4)
-            .add_child_data(boolean_data)
-            .add_child_data(int_data)
-            .build();
-        StructArray::from(struct_array_data)
-    }
-
-    #[test]
-    fn test_take_primitive_non_null_indices() {
-        let index = UInt32Array::from(vec![0, 5, 3, 1, 4, 2]);
-        test_take_primitive_arrays::<Int8Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            &index,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_primitive_non_null_values() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-        test_take_primitive_arrays::<Int8Type>(
-            vec![Some(0), Some(1), Some(2), Some(3), Some(4)],
-            &index,
-            None,
-            vec![Some(3), None, Some(1), Some(3), Some(2)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_primitive_non_null() {
-        let index = UInt32Array::from(vec![0, 5, 3, 1, 4, 2]);
-        test_take_primitive_arrays::<Int8Type>(
-            vec![Some(0), Some(3), Some(5), Some(2), Some(3), Some(1)],
-            &index,
-            None,
-            vec![Some(0), Some(1), Some(2), Some(3), Some(3), Some(5)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_primitive() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-
-        // int8
-        test_take_primitive_arrays::<Int8Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int16
-        test_take_primitive_arrays::<Int16Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int32
-        test_take_primitive_arrays::<Int32Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int64
-        test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // uint8
-        test_take_primitive_arrays::<UInt8Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // uint16
-        test_take_primitive_arrays::<UInt16Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // uint32
-        test_take_primitive_arrays::<UInt32Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int64
-        test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // interval_year_month
-        test_take_primitive_arrays::<IntervalYearMonthType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // interval_day_time
-        test_take_primitive_arrays::<IntervalDayTimeType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_second
-        test_take_primitive_arrays::<DurationSecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_millisecond
-        test_take_primitive_arrays::<DurationMillisecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_microsecond
-        test_take_primitive_arrays::<DurationMicrosecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_nanosecond
-        test_take_primitive_arrays::<DurationNanosecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // float32
-        test_take_primitive_arrays::<Float32Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        )
-        .unwrap();
-
-        // float64
-        test_take_primitive_arrays::<Float64Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_impl_primitive_with_int64_indices() {
-        let index = Int64Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-
-        // int16
-        test_take_impl_primitive_arrays::<Int16Type, Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        );
-
-        // int64
-        test_take_impl_primitive_arrays::<Int64Type, Int64Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        );
-
-        // uint64
-        test_take_impl_primitive_arrays::<UInt64Type, Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        );
-
-        // duration_millisecond
-        test_take_impl_primitive_arrays::<DurationMillisecondType, Int64Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        );
-
-        // float32
-        test_take_impl_primitive_arrays::<Float32Type, Int64Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        );
-    }
-
-    #[test]
-    fn test_take_impl_primitive_with_uint8_indices() {
-        let index = UInt8Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-
-        // int16
-        test_take_impl_primitive_arrays::<Int16Type, UInt8Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        );
-
-        // duration_millisecond
-        test_take_impl_primitive_arrays::<DurationMillisecondType, UInt8Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        );
-
-        // float32
-        test_take_impl_primitive_arrays::<Float32Type, UInt8Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        );
-    }
-
-    #[test]
-    fn test_take_primitive_bool() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-        // boolean
-        test_take_boolean_arrays(
-            vec![Some(false), None, Some(true), Some(false), None],
-            &index,
-            None,
-            vec![Some(false), None, None, Some(false), Some(true)],
-        );
-    }
-
-    fn _test_take_string<'a, K: 'static>()
-    where
-        K: Array + PartialEq + From<Vec<Option<&'a str>>>,
-    {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(4)]);
-
-        let array = K::from(vec![
-            Some("one"),
-            None,
-            Some("three"),
-            Some("four"),
-            Some("five"),
-        ]);
-        let actual = take(&array, &index, None).unwrap();
-        assert_eq!(actual.len(), index.len());
-
-        let actual = actual.as_any().downcast_ref::<K>().unwrap();
-
-        let expected =
-            K::from(vec![Some("four"), None, None, Some("four"), Some("five")]);
-
-        assert_eq!(actual, &expected);
-    }
-
-    #[test]
-    fn test_take_string() {
-        _test_take_string::<StringArray>()
-    }
-
-    #[test]
-    fn test_take_large_string() {
-        _test_take_string::<LargeStringArray>()
-    }
-
-    macro_rules! test_take_list {
-        ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
-            // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]]
-            let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3])
-                .data()
-                .clone();
-            // Construct offsets
-            let value_offsets: [$offset_type; 4] = [0, 3, 6, 8];
-            let value_offsets = Buffer::from_slice_ref(&value_offsets);
-            // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Box::new(Field::new(
-                "item",
-                DataType::Int32,
-                false,
-            )));
-            let list_data = ArrayData::builder(list_data_type.clone())
-                .len(3)
-                .add_buffer(value_offsets)
-                .add_child_data(value_data)
-                .build();
-            let list_array = $list_array_type::from(list_data);
-
-            // index returns: [[2,3], null, [-1,-2,-1], [2,3], [0,0,0]]
-            let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(2), Some(0)]);
-
-            let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
-
-            // construct a value array with expected results:
-            // [[2,3], null, [-1,-2,-1], [2,3], [0,0,0]]
-            let expected_data = Int32Array::from(vec![
-                Some(2),
-                Some(3),
-                Some(-1),
-                Some(-2),
-                Some(-1),
-                Some(2),
-                Some(3),
-                Some(0),
-                Some(0),
-                Some(0),
-            ])
-            .data()
-            .clone();
-            // construct offsets
-            let expected_offsets: [$offset_type; 6] = [0, 2, 2, 5, 7, 10];
-            let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
-            // construct list array from the two
-            let expected_list_data = ArrayData::builder(list_data_type)
-                .len(5)
-                // null buffer remains the same as only the indices have nulls
-                .null_bit_buffer(
-                    index.data().null_bitmap().as_ref().unwrap().bits.clone(),
-                )
-                .add_buffer(expected_offsets)
-                .add_child_data(expected_data)
-                .build();
-            let expected_list_array = $list_array_type::from(expected_list_data);
-
-            assert_eq!(a, &expected_list_array);
-        }};
-    }
-
-    macro_rules! test_take_list_with_value_nulls {
-        ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
-            // Construct a value array, [[0,null,0], [-1,-2,3], [null], [5,null]]
-            let value_data = Int32Array::from(vec![
-                Some(0),
-                None,
-                Some(0),
-                Some(-1),
-                Some(-2),
-                Some(3),
-                None,
-                Some(5),
-                None,
-            ])
-            .data()
-            .clone();
-            // Construct offsets
-            let value_offsets: [$offset_type; 5] = [0, 3, 6, 7, 9];
-            let value_offsets = Buffer::from_slice_ref(&value_offsets);
-            // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Box::new(Field::new(
-                "item",
-                DataType::Int32,
-                false,
-            )));
-            let list_data = ArrayData::builder(list_data_type.clone())
-                .len(4)
-                .add_buffer(value_offsets)
-                .null_bit_buffer(Buffer::from([0b10111101, 0b00000000]))
-                .add_child_data(value_data)
-                .build();
-            let list_array = $list_array_type::from(list_data);
-
-            // index returns: [[null], null, [-1,-2,3], [2,null], [0,null,0]]
-            let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
-
-            let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
-
-            // construct a value array with expected results:
-            // [[null], null, [-1,-2,3], [5,null], [0,null,0]]
-            let expected_data = Int32Array::from(vec![
-                None,
-                Some(-1),
-                Some(-2),
-                Some(3),
-                Some(5),
-                None,
-                Some(0),
-                None,
-                Some(0),
-            ])
-            .data()
-            .clone();
-            // construct offsets
-            let expected_offsets: [$offset_type; 6] = [0, 1, 1, 4, 6, 9];
-            let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
-            // construct list array from the two
-            let expected_list_data = ArrayData::builder(list_data_type)
-                .len(5)
-                // null buffer remains the same as only the indices have nulls
-                .null_bit_buffer(
-                    index.data().null_bitmap().as_ref().unwrap().bits.clone(),
-                )
-                .add_buffer(expected_offsets)
-                .add_child_data(expected_data)
-                .build();
-            let expected_list_array = $list_array_type::from(expected_list_data);
-
-            assert_eq!(a, &expected_list_array);
-        }};
-    }
-
-    macro_rules! test_take_list_with_nulls {
-        ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
-            // Construct a value array, [[0,null,0], [-1,-2,3], null, [5,null]]
-            let value_data = Int32Array::from(vec![
-                Some(0),
-                None,
-                Some(0),
-                Some(-1),
-                Some(-2),
-                Some(3),
-                Some(5),
-                None,
-            ])
-            .data()
-            .clone();
-            // Construct offsets
-            let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8];
-            let value_offsets = Buffer::from_slice_ref(&value_offsets);
-            // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Box::new(Field::new(
-                "item",
-                DataType::Int32,
-                false,
-            )));
-            let list_data = ArrayData::builder(list_data_type.clone())
-                .len(4)
-                .add_buffer(value_offsets)
-                .null_bit_buffer(Buffer::from([0b01111101]))
-                .add_child_data(value_data)
-                .build();
-            let list_array = $list_array_type::from(list_data);
-
-            // index returns: [null, null, [-1,-2,3], [5,null], [0,null,0]]
-            let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
-
-            let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
-
-            // construct a value array with expected results:
-            // [null, null, [-1,-2,3], [5,null], [0,null,0]]
-            let expected_data = Int32Array::from(vec![
-                Some(-1),
-                Some(-2),
-                Some(3),
-                Some(5),
-                None,
-                Some(0),
-                None,
-                Some(0),
-            ])
-            .data()
-            .clone();
-            // construct offsets
-            let expected_offsets: [$offset_type; 6] = [0, 0, 0, 3, 5, 8];
-            let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
-            // construct list array from the two
-            let mut null_bits: [u8; 1] = [0; 1];
-            bit_util::set_bit(&mut null_bits, 2);
-            bit_util::set_bit(&mut null_bits, 3);
-            bit_util::set_bit(&mut null_bits, 4);
-            let expected_list_data = ArrayData::builder(list_data_type)
-                .len(5)
-                // null buffer must be recalculated as both values and indices have nulls
-                .null_bit_buffer(Buffer::from(null_bits))
-                .add_buffer(expected_offsets)
-                .add_child_data(expected_data)
-                .build();
-            let expected_list_array = $list_array_type::from(expected_list_data);
-
-            assert_eq!(a, &expected_list_array);
-        }};
-    }
-
-    fn do_take_fixed_size_list_test<T>(
-        length: <Int32Type as ArrowPrimitiveType>::Native,
-        input_data: Vec<Option<Vec<Option<T::Native>>>>,
-        indices: Vec<<UInt32Type as ArrowPrimitiveType>::Native>,
-        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let indices = UInt32Array::from(indices);
-
-        let input_array = build_fixed_size_list_nullable::<T>(input_data, length);
-
-        let output = take_fixed_size_list(&input_array, &indices, length as u32).unwrap();
-
-        let expected = build_fixed_size_list_nullable::<T>(expected_data, length);
-
-        assert_eq!(&output, &expected)
-    }
-
-    #[test]
-    fn test_take_list() {
-        test_take_list!(i32, List, ListArray);
-    }
-
-    #[test]
-    fn test_take_large_list() {
-        test_take_list!(i64, LargeList, LargeListArray);
-    }
-
-    #[test]
-    fn test_take_list_with_value_nulls() {
-        test_take_list_with_value_nulls!(i32, List, ListArray);
-    }
-
-    #[test]
-    fn test_take_large_list_with_value_nulls() {
-        test_take_list_with_value_nulls!(i64, LargeList, LargeListArray);
-    }
-
-    #[test]
-    fn test_test_take_list_with_nulls() {
-        test_take_list_with_nulls!(i32, List, ListArray);
-    }
-
-    #[test]
-    fn test_test_take_large_list_with_nulls() {
-        test_take_list_with_nulls!(i64, LargeList, LargeListArray);
-    }
-
-    #[test]
-    fn test_take_fixed_size_list() {
-        do_take_fixed_size_list_test::<Int32Type>(
-            3,
-            vec![
-                Some(vec![None, Some(1), Some(2)]),
-                Some(vec![Some(3), Some(4), None]),
-                Some(vec![Some(6), Some(7), Some(8)]),
-            ],
-            vec![2, 1, 0],
-            vec![
-                Some(vec![Some(6), Some(7), Some(8)]),
-                Some(vec![Some(3), Some(4), None]),
-                Some(vec![None, Some(1), Some(2)]),
-            ],
-        );
-
-        do_take_fixed_size_list_test::<UInt8Type>(
-            1,
-            vec![
-                Some(vec![Some(1)]),
-                Some(vec![Some(2)]),
-                Some(vec![Some(3)]),
-                Some(vec![Some(4)]),
-                Some(vec![Some(5)]),
-                Some(vec![Some(6)]),
-                Some(vec![Some(7)]),
-                Some(vec![Some(8)]),
-            ],
-            vec![2, 7, 0],
-            vec![
-                Some(vec![Some(3)]),
-                Some(vec![Some(8)]),
-                Some(vec![Some(1)]),
-            ],
-        );
-
-        do_take_fixed_size_list_test::<UInt64Type>(
-            3,
-            vec![
-                Some(vec![Some(10), Some(11), Some(12)]),
-                Some(vec![Some(13), Some(14), Some(15)]),
-                None,
-                Some(vec![Some(16), Some(17), Some(18)]),
-            ],
-            vec![3, 2, 1, 2, 0],
-            vec![
-                Some(vec![Some(16), Some(17), Some(18)]),
-                None,
-                Some(vec![Some(13), Some(14), Some(15)]),
-                None,
-                Some(vec![Some(10), Some(11), Some(12)]),
-            ],
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
-    fn test_take_list_out_of_bounds() {
-        // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]]
-        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3])
-            .data()
-            .clone();
-        // Construct offsets
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let index = UInt32Array::from(vec![1000]);
-
-        // A panic is expected here since we have not supplied the check_bounds
-        // option.
-        take(&list_array, &index, None).unwrap();
-    }
-
-    #[test]
-    fn test_take_struct() {
-        let array = create_test_struct();
-
-        let index = UInt32Array::from(vec![0, 3, 1, 0, 2]);
-        let a = take(&array, &index, None).unwrap();
-        let a: &StructArray = a.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(index.len(), a.len());
-        assert_eq!(0, a.null_count());
-
-        let expected_bool_data = BooleanArray::from(vec![true, true, false, true, false])
-            .data()
-            .clone();
-        let expected_int_data = Int32Array::from(vec![42, 31, 28, 42, 19]).data().clone();
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, true));
-        field_types.push(Field::new("b", DataType::Int32, true));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            .add_child_data(expected_bool_data)
-            .add_child_data(expected_int_data)
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(a, &struct_array);
-    }
-
-    #[test]
-    fn test_take_struct_with_nulls() {
-        let array = create_test_struct();
-
-        let index = UInt32Array::from(vec![None, Some(3), Some(1), None, Some(0)]);
-        let a = take(&array, &index, None).unwrap();
-        let a: &StructArray = a.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(index.len(), a.len());
-        assert_eq!(0, a.null_count());
-
-        let expected_bool_data =
-            BooleanArray::from(vec![None, Some(true), Some(false), None, Some(true)])
-                .data()
-                .clone();
-        let expected_int_data =
-            Int32Array::from(vec![None, Some(31), Some(28), None, Some(42)])
-                .data()
-                .clone();
-
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, true));
-        field_types.push(Field::new("b", DataType::Int32, true));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            // TODO: see https://issues.apache.org/jira/browse/ARROW-5408 for why count != 2
-            .add_child_data(expected_bool_data)
-            .add_child_data(expected_int_data)
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-        assert_eq!(a, &struct_array);
-    }
-
-    #[test]
-    fn test_take_out_of_bounds() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(6)]);
-        let take_opt = TakeOptions { check_bounds: true };
-
-        // int64
-        let result = test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            Some(take_opt),
-            vec![None],
-        );
-        assert!(result.is_err());
-    }
-
-    #[test]
-    #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
-    fn test_take_out_of_bounds_panic() {
-        let index = UInt32Array::from(vec![Some(1000)]);
-
-        test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), Some(1), Some(2), Some(3)],
-            &index,
-            None,
-            vec![None],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_dict() {
-        let keys_builder = Int16Builder::new(8);
-        let values_builder = StringBuilder::new(4);
-
-        let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        dict_builder.append("foo").unwrap();
-        dict_builder.append("bar").unwrap();
-        dict_builder.append("").unwrap();
-        dict_builder.append_null().unwrap();
-        dict_builder.append("foo").unwrap();
-        dict_builder.append("bar").unwrap();
-        dict_builder.append("bar").unwrap();
-        dict_builder.append("foo").unwrap();
-
-        let array = dict_builder.finish();
-        let dict_values = array.values().clone();
-        let dict_values = dict_values.as_any().downcast_ref::<StringArray>().unwrap();
-
-        let indices = UInt32Array::from(vec![
-            Some(0), // first "foo"
-            Some(7), // last "foo"
-            None,    // null index should return null
-            Some(5), // second "bar"
-            Some(6), // another "bar"
-            Some(2), // empty string
-            Some(3), // input is null at this index
-        ]);
-
-        let result = take(&array, &indices, None).unwrap();
-        let result = result
-            .as_any()
-            .downcast_ref::<DictionaryArray<Int16Type>>()
-            .unwrap();
-
-        let result_values: StringArray = result.values().data().clone().into();
-
-        // dictionary values should stay the same
-        let expected_values = StringArray::from(vec!["foo", "bar", ""]);
-        assert_eq!(&expected_values, dict_values);
-        assert_eq!(&expected_values, &result_values);
-
-        let expected_keys = Int16Array::from(vec![
-            Some(0),
-            Some(0),
-            None,
-            Some(1),
-            Some(1),
-            Some(2),
-            None,
-        ]);
-        assert_eq!(result.keys(), &expected_keys);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/temporal.rs b/rust/arrow/src/compute/kernels/temporal.rs
deleted file mode 100644
index 63e412990fd..00000000000
--- a/rust/arrow/src/compute/kernels/temporal.rs
+++ /dev/null
@@ -1,187 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines temporal kernels for time and date related functions.
-
-use chrono::{Datelike, Timelike};
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-/// Extracts the hours of a given temporal array as an array of integers
-pub fn hour<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: std::convert::From<T::Native>,
-{
-    let mut b = Int32Builder::new(array.len());
-    match array.data_type() {
-        &DataType::Time32(_) | &DataType::Time64(_) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_time(i) {
-                        Some(time) => b.append_value(time.hour() as i32)?,
-                        None => b.append_null()?,
-                    };
-                }
-            }
-        }
-        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.hour() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
-        }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "hour does not support type {:?}",
-                    dt
-                )))
-            }
-        }
-    }
-
-    Ok(b.finish())
-}
-
-/// Extracts the years of a given temporal array as an array of integers
-pub fn year<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: std::convert::From<T::Native>,
-{
-    let mut b = Int32Builder::new(array.len());
-    match array.data_type() {
-        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.year() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
-        }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "year does not support type {:?}",
-                    dt
-                )))
-            }
-        }
-    }
-
-    Ok(b.finish())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_temporal_array_date64_hour() {
-        let a: PrimitiveArray<Date64Type> =
-            vec![Some(1514764800000), None, Some(1550636625000)].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(0, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(4, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_date32_hour() {
-        let a: PrimitiveArray<Date32Type> = vec![Some(15147), None, Some(15148)].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(0, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(0, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_time32_second_hour() {
-        let a: PrimitiveArray<Time32SecondType> = vec![37800, 86339].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(10, b.value(0));
-        assert_eq!(23, b.value(1));
-    }
-
-    #[test]
-    fn test_temporal_array_time64_micro_hour() {
-        let a: PrimitiveArray<Time64MicrosecondType> =
-            vec![37800000000, 86339000000].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(10, b.value(0));
-        assert_eq!(23, b.value(1));
-    }
-
-    #[test]
-    fn test_temporal_array_timestamp_micro_hour() {
-        let a: TimestampMicrosecondArray = vec![37800000000, 86339000000].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(10, b.value(0));
-        assert_eq!(23, b.value(1));
-    }
-
-    #[test]
-    fn test_temporal_array_date64_year() {
-        let a: PrimitiveArray<Date64Type> =
-            vec![Some(1514764800000), None, Some(1550636625000)].into();
-
-        let b = year(&a).unwrap();
-        assert_eq!(2018, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(2019, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_date32_year() {
-        let a: PrimitiveArray<Date32Type> = vec![Some(15147), None, Some(15448)].into();
-
-        let b = year(&a).unwrap();
-        assert_eq!(2011, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(2012, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_timestamp_micro_year() {
-        let a: TimestampMicrosecondArray =
-            vec![Some(1612025847000000), None, Some(1722015847000000)].into();
-
-        let b = year(&a).unwrap();
-        assert_eq!(2021, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(2024, b.value(2));
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/window.rs b/rust/arrow/src/compute/kernels/window.rs
deleted file mode 100644
index 82e712c3079..00000000000
--- a/rust/arrow/src/compute/kernels/window.rs
+++ /dev/null
@@ -1,109 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines windowing functions, like `shift`ing
-
-use crate::compute::concat;
-use num::{abs, clamp};
-
-use crate::{
-    array::{make_array, ArrayData, PrimitiveArray},
-    datatypes::ArrowPrimitiveType,
-    error::Result,
-};
-use crate::{
-    array::{Array, ArrayRef},
-    buffer::MutableBuffer,
-};
-
-/// Shifts array by defined number of items (to left or right)
-/// A positive value for `offset` shifts the array to the right
-/// a negative value shifts the array to the left.
-/// # Examples
-/// ```
-/// use arrow::array::Int32Array;
-/// use arrow::error::Result;
-/// use arrow::compute::shift;
-///
-/// let a: Int32Array = vec![Some(1), None, Some(4)].into();
-/// // shift array 1 element to the right
-/// let res = shift(&a, 1).unwrap();
-/// let expected: Int32Array = vec![None, Some(1), None].into();
-/// assert_eq!(res.as_ref(), &expected)
-/// ```
-pub fn shift<T>(values: &PrimitiveArray<T>, offset: i64) -> Result<ArrayRef>
-where
-    T: ArrowPrimitiveType,
-{
-    // Compute slice
-    let slice_offset = clamp(-offset, 0, values.len() as i64) as usize;
-    let length = values.len() - abs(offset) as usize;
-    let slice = values.slice(slice_offset, length);
-
-    // Generate array with remaining `null` items
-    let nulls = abs(offset as i64) as usize;
-
-    let mut null_array = MutableBuffer::new(nulls);
-    let mut null_data = MutableBuffer::new(nulls * T::get_byte_width());
-    null_array.extend_zeros(nulls);
-    null_data.extend_zeros(nulls * T::get_byte_width());
-
-    let null_data = ArrayData::new(
-        T::DATA_TYPE,
-        nulls as usize,
-        Some(nulls),
-        Some(null_array.into()),
-        0,
-        vec![null_data.into()],
-        vec![],
-    );
-
-    // Concatenate both arrays, add nulls after if shift > 0 else before
-    let null_arr = make_array(null_data);
-    if offset > 0 {
-        concat(&[null_arr.as_ref(), slice.as_ref()])
-    } else {
-        concat(&[slice.as_ref(), null_arr.as_ref()])
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::array::Int32Array;
-
-    use super::*;
-
-    #[test]
-    fn test_shift_neg() {
-        let a: Int32Array = vec![Some(1), None, Some(4)].into();
-        let res = shift(&a, -1).unwrap();
-
-        let expected: Int32Array = vec![None, Some(4), None].into();
-
-        assert_eq!(res.as_ref(), &expected);
-    }
-
-    #[test]
-    fn test_shift_pos() {
-        let a: Int32Array = vec![Some(1), None, Some(4)].into();
-        let res = shift(&a, 1).unwrap();
-
-        let expected: Int32Array = vec![None, Some(1), None].into();
-
-        assert_eq!(res.as_ref(), &expected);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/zip.rs b/rust/arrow/src/compute/kernels/zip.rs
deleted file mode 100644
index 0ee8e47bede..00000000000
--- a/rust/arrow/src/compute/kernels/zip.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::*;
-use crate::compute::SlicesIterator;
-use crate::error::{ArrowError, Result};
-
-/// Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy`
-/// are taken, where the mask evaluates `false` values of `falsy` are taken.
-///
-/// # Arguments
-/// * `mask` - Boolean values used to determine from which array to take the values.
-/// * `truthy` - Values of this array are taken if mask evaluates `true`
-/// * `falsy` - Values of this array are taken if mask evaluates `false`
-pub fn zip(
-    mask: &BooleanArray,
-    truthy: &dyn Array,
-    falsy: &dyn Array,
-) -> Result<ArrayRef> {
-    if truthy.data_type() != falsy.data_type() {
-        return Err(ArrowError::InvalidArgumentError(
-            "arguments need to have the same data type".into(),
-        ));
-    }
-    if truthy.len() != falsy.len() || falsy.len() != mask.len() {
-        return Err(ArrowError::InvalidArgumentError(
-            "all arrays should have the same length".into(),
-        ));
-    }
-    let falsy = falsy.data();
-    let truthy = truthy.data();
-
-    let mut mutable = MutableArrayData::new(vec![&*truthy, &*falsy], false, truthy.len());
-
-    // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
-    // fill with falsy values
-
-    // keep track of how much is filled
-    let mut filled = 0;
-
-    SlicesIterator::new(mask).for_each(|(start, end)| {
-        // the gap needs to be filled with falsy values
-        if start > filled {
-            mutable.extend(1, filled, start);
-        }
-        // fill with truthy values
-        mutable.extend(0, start, end);
-        filled = end;
-    });
-    // the remaining part is falsy
-    if filled < truthy.len() {
-        mutable.extend(1, filled, truthy.len());
-    }
-
-    let data = mutable.freeze();
-    Ok(make_array(data))
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_zip_kernel() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
-        let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
-        let mask = BooleanArray::from(vec![true, true, false, false, true]);
-        let out = zip(&mask, &a, &b).unwrap();
-        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
-        let expected = Int32Array::from(vec![Some(5), None, Some(6), Some(7), Some(1)]);
-        assert_eq!(actual, &expected);
-    }
-}
diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs
deleted file mode 100644
index be1aa277ca4..00000000000
--- a/rust/arrow/src/compute/mod.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Computation kernels on Arrow Arrays
-
-pub mod kernels;
-
-mod util;
-
-pub use self::kernels::aggregate::*;
-pub use self::kernels::arithmetic::*;
-pub use self::kernels::boolean::*;
-pub use self::kernels::cast::*;
-pub use self::kernels::comparison::*;
-pub use self::kernels::concat::*;
-pub use self::kernels::filter::*;
-pub use self::kernels::limit::*;
-pub use self::kernels::regexp::*;
-pub use self::kernels::sort::*;
-pub use self::kernels::take::*;
-pub use self::kernels::temporal::*;
-pub use self::kernels::window::*;
diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs
deleted file mode 100644
index 56de5948301..00000000000
--- a/rust/arrow/src/compute/util.rs
+++ /dev/null
@@ -1,463 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common utilities for computation kernels.
-
-use crate::array::*;
-use crate::buffer::{buffer_bin_and, buffer_bin_or, Buffer};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use num::{One, ToPrimitive, Zero};
-use std::ops::Add;
-
-/// Combines the null bitmaps of two arrays using a bitwise `and` operation.
-///
-/// This function is useful when implementing operations on higher level arrays.
-#[allow(clippy::unnecessary_wraps)]
-pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
-    len_in_bits: usize,
-) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                &l,
-                left_offset_in_bits,
-                &r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
-    }
-}
-
-/// Compares the null bitmaps of two arrays using a bitwise `or` operation.
-///
-/// This function is useful when implementing operations on higher level arrays.
-#[allow(clippy::unnecessary_wraps)]
-pub(super) fn compare_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
-    len_in_bits: usize,
-) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_or(
-                &l,
-                left_offset_in_bits,
-                &r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
-    }
-}
-
-/// Takes/filters a list array's inner data using the offsets of the list array.
-///
-/// Where a list array has indices `[0,2,5,10]`, taking indices of `[2,0]` returns
-/// an array of the indices `[5..10, 0..2]` and offsets `[0,5,7]` (5 elements and 2
-/// elements)
-pub(super) fn take_value_indices_from_list<IndexType, OffsetType>(
-    list: &GenericListArray<OffsetType::Native>,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<(PrimitiveArray<OffsetType>, Vec<OffsetType::Native>)>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-    OffsetType: ArrowNumericType,
-    OffsetType::Native: OffsetSizeTrait + Add + Zero + One,
-    PrimitiveArray<OffsetType>: From<Vec<Option<OffsetType::Native>>>,
-{
-    // TODO: benchmark this function, there might be a faster unsafe alternative
-    let offsets: &[OffsetType::Native] = list.value_offsets();
-
-    let mut new_offsets = Vec::with_capacity(indices.len());
-    let mut values = Vec::new();
-    let mut current_offset = OffsetType::Native::zero();
-    // add first offset
-    new_offsets.push(OffsetType::Native::zero());
-    // compute the value indices, and set offsets accordingly
-    for i in 0..indices.len() {
-        if indices.is_valid(i) {
-            let ix = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-            let start = offsets[ix];
-            let end = offsets[ix + 1];
-            current_offset += end - start;
-            new_offsets.push(current_offset);
-
-            let mut curr = start;
-
-            // if start == end, this slot is empty
-            while curr < end {
-                values.push(Some(curr));
-                curr += OffsetType::Native::one();
-            }
-        } else {
-            new_offsets.push(current_offset);
-        }
-    }
-
-    Ok((PrimitiveArray::<OffsetType>::from(values), new_offsets))
-}
-
-/// Takes/filters a fixed size list array's inner data using the offsets of the list array.
-pub(super) fn take_value_indices_from_fixed_size_list<IndexType>(
-    list: &FixedSizeListArray,
-    indices: &PrimitiveArray<IndexType>,
-    length: <UInt32Type as ArrowPrimitiveType>::Native,
-) -> Result<PrimitiveArray<UInt32Type>>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let mut values = vec![];
-
-    for i in 0..indices.len() {
-        if indices.is_valid(i) {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-            let start =
-                list.value_offset(index) as <UInt32Type as ArrowPrimitiveType>::Native;
-
-            values.extend(start..start + length);
-        }
-    }
-
-    Ok(PrimitiveArray::<UInt32Type>::from(values))
-}
-
-#[cfg(test)]
-pub(super) mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::datatypes::DataType;
-    use crate::util::bit_util;
-    use crate::{array::ArrayData, buffer::MutableBuffer};
-
-    fn make_data_with_null_bit_buffer(
-        len: usize,
-        offset: usize,
-        null_bit_buffer: Option<Buffer>,
-    ) -> Arc<ArrayData> {
-        // empty vec for buffers and children is not really correct, but for these tests we only care about the null bitmap
-        Arc::new(ArrayData::new(
-            DataType::UInt8,
-            len,
-            None,
-            null_bit_buffer,
-            offset,
-            vec![],
-            vec![],
-        ))
-    }
-
-    #[test]
-    fn test_combine_option_bitmap() {
-        let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
-        let some_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
-        let inverse_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
-        assert_eq!(
-            None,
-            combine_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b0])),
-            combine_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_compare_option_bitmap() {
-        let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
-        let some_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
-        let inverse_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
-        assert_eq!(
-            None,
-            compare_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            compare_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            compare_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            compare_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b11111111])),
-            compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
-        );
-    }
-
-    pub(crate) fn build_generic_list<S, T>(
-        data: Vec<Option<Vec<T::Native>>>,
-    ) -> GenericListArray<S>
-    where
-        S: OffsetSizeTrait + 'static,
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let data = data
-            .into_iter()
-            .map(|subarray| {
-                subarray.map(|item| {
-                    item.into_iter()
-                        .map(Some)
-                        .collect::<Vec<Option<T::Native>>>()
-                })
-            })
-            .collect();
-        build_generic_list_nullable(data)
-    }
-
-    pub(crate) fn build_generic_list_nullable<S, T>(
-        data: Vec<Option<Vec<Option<T::Native>>>>,
-    ) -> GenericListArray<S>
-    where
-        S: OffsetSizeTrait + 'static,
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        use std::any::TypeId;
-
-        let mut offset = vec![0];
-        let mut values = vec![];
-
-        let list_len = data.len();
-        let num_bytes = bit_util::ceil(list_len, 8);
-        let mut list_null_count = 0;
-        let mut list_bitmap = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        for (idx, array) in data.into_iter().enumerate() {
-            if let Some(mut array) = array {
-                values.append(&mut array);
-            } else {
-                list_null_count += 1;
-                bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx);
-            }
-            offset.push(values.len() as i64);
-        }
-
-        let value_data = PrimitiveArray::<T>::from(values).data().clone();
-        let (list_data_type, value_offsets) = if TypeId::of::<S>() == TypeId::of::<i32>()
-        {
-            (
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    T::DATA_TYPE,
-                    list_null_count == 0,
-                ))),
-                Buffer::from_slice_ref(
-                    &offset.into_iter().map(|x| x as i32).collect::<Vec<i32>>(),
-                ),
-            )
-        } else if TypeId::of::<S>() == TypeId::of::<i64>() {
-            (
-                DataType::LargeList(Box::new(Field::new(
-                    "item",
-                    T::DATA_TYPE,
-                    list_null_count == 0,
-                ))),
-                Buffer::from_slice_ref(&offset),
-            )
-        } else {
-            unreachable!()
-        };
-
-        let list_data = ArrayData::builder(list_data_type)
-            .len(list_len)
-            .null_bit_buffer(list_bitmap.into())
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-
-        GenericListArray::<S>::from(list_data)
-    }
-
-    pub(crate) fn build_fixed_size_list<T>(
-        data: Vec<Option<Vec<T::Native>>>,
-        length: <Int32Type as ArrowPrimitiveType>::Native,
-    ) -> FixedSizeListArray
-    where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let data = data
-            .into_iter()
-            .map(|subarray| {
-                subarray.map(|item| {
-                    item.into_iter()
-                        .map(Some)
-                        .collect::<Vec<Option<T::Native>>>()
-                })
-            })
-            .collect();
-        build_fixed_size_list_nullable(data, length)
-    }
-
-    pub(crate) fn build_fixed_size_list_nullable<T>(
-        list_values: Vec<Option<Vec<Option<T::Native>>>>,
-        length: <Int32Type as ArrowPrimitiveType>::Native,
-    ) -> FixedSizeListArray
-    where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let mut values = vec![];
-        let mut list_null_count = 0;
-        let list_len = list_values.len();
-
-        let num_bytes = bit_util::ceil(list_len, 8);
-        let mut list_bitmap = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        for (idx, list_element) in list_values.into_iter().enumerate() {
-            if let Some(items) = list_element {
-                // every sub-array should have the same length
-                debug_assert_eq!(length as usize, items.len());
-
-                values.extend(items.into_iter());
-            } else {
-                list_null_count += 1;
-                bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx);
-                values.extend(vec![None; length as usize].into_iter());
-            }
-        }
-
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", T::DATA_TYPE, list_null_count == 0)),
-            length,
-        );
-
-        let child_data = PrimitiveArray::<T>::from(values).data().clone();
-
-        let list_data = ArrayData::builder(list_data_type)
-            .len(list_len)
-            .null_bit_buffer(list_bitmap.into())
-            .add_child_data(child_data)
-            .build();
-
-        FixedSizeListArray::from(list_data)
-    }
-
-    #[test]
-    fn test_take_value_index_from_list() {
-        let list = build_generic_list::<i32, Int32Type>(vec![
-            Some(vec![0, 1]),
-            Some(vec![2, 3, 4]),
-            Some(vec![5, 6, 7, 8, 9]),
-        ]);
-        let indices = UInt32Array::from(vec![2, 0]);
-
-        let (indexed, offsets) = take_value_indices_from_list(&list, &indices).unwrap();
-
-        assert_eq!(indexed, Int32Array::from(vec![5, 6, 7, 8, 9, 0, 1]));
-        assert_eq!(offsets, vec![0, 5, 7]);
-    }
-
-    #[test]
-    fn test_take_value_index_from_large_list() {
-        let list = build_generic_list::<i64, Int32Type>(vec![
-            Some(vec![0, 1]),
-            Some(vec![2, 3, 4]),
-            Some(vec![5, 6, 7, 8, 9]),
-        ]);
-        let indices = UInt32Array::from(vec![2, 0]);
-
-        let (indexed, offsets) =
-            take_value_indices_from_list::<_, Int64Type>(&list, &indices).unwrap();
-
-        assert_eq!(indexed, Int64Array::from(vec![5, 6, 7, 8, 9, 0, 1]));
-        assert_eq!(offsets, vec![0, 5, 7]);
-    }
-
-    #[test]
-    fn test_take_value_index_from_fixed_list() {
-        let list = build_fixed_size_list_nullable::<Int32Type>(
-            vec![
-                Some(vec![Some(1), Some(2), None]),
-                Some(vec![Some(4), None, Some(6)]),
-                None,
-                Some(vec![None, Some(8), Some(9)]),
-            ],
-            3,
-        );
-
-        let indices = UInt32Array::from(vec![2, 1, 0]);
-        let indexed =
-            take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
-
-        assert_eq!(indexed, UInt32Array::from(vec![6, 7, 8, 3, 4, 5, 0, 1, 2]));
-
-        let indices = UInt32Array::from(vec![3, 2, 1, 2, 0]);
-        let indexed =
-            take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
-
-        assert_eq!(
-            indexed,
-            UInt32Array::from(vec![9, 10, 11, 6, 7, 8, 3, 4, 5, 6, 7, 8, 0, 1, 2])
-        );
-    }
-}
diff --git a/rust/arrow/src/csv/mod.rs b/rust/arrow/src/csv/mod.rs
deleted file mode 100644
index ffe82f33580..00000000000
--- a/rust/arrow/src/csv/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Transfer data between the Arrow memory format and CSV (comma-separated values).
-
-pub mod reader;
-pub mod writer;
-
-pub use self::reader::infer_schema_from_files;
-pub use self::reader::Reader;
-pub use self::reader::ReaderBuilder;
-pub use self::writer::Writer;
-pub use self::writer::WriterBuilder;
diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs
deleted file mode 100644
index 985c88b4978..00000000000
--- a/rust/arrow/src/csv/reader.rs
+++ /dev/null
@@ -1,1291 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CSV Reader
-//!
-//! This CSV reader allows CSV files to be read into the Arrow memory model. Records are
-//! loaded in batches and are then converted from row-based data to columnar data.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::csv;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use std::fs::File;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("city", DataType::Utf8, false),
-//!     Field::new("lat", DataType::Float64, false),
-//!     Field::new("lng", DataType::Float64, false),
-//! ]);
-//!
-//! let file = File::open("test/data/uk_cities.csv").unwrap();
-//!
-//! let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
-//! let batch = csv.next().unwrap().unwrap();
-//! ```
-
-use core::cmp::min;
-use lazy_static::lazy_static;
-use regex::{Regex, RegexBuilder};
-use std::collections::HashSet;
-use std::fmt;
-use std::fs::File;
-use std::io::{Read, Seek, SeekFrom};
-use std::sync::Arc;
-
-use csv as csv_crate;
-
-use crate::array::{ArrayRef, BooleanArray, PrimitiveArray, StringArray};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::record_batch::RecordBatch;
-
-use self::csv_crate::{ByteRecord, StringRecord};
-
-lazy_static! {
-    static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
-    static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
-    static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
-        .case_insensitive(true)
-        .build()
-        .unwrap();
-    static ref DATE_RE: Regex = Regex::new(r"^\d{4}-\d\d-\d\d$").unwrap();
-    static ref DATETIME_RE: Regex =
-        Regex::new(r"^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d$").unwrap();
-}
-
-/// Infer the data type of a record
-fn infer_field_schema(string: &str) -> DataType {
-    // when quoting is enabled in the reader, these quotes aren't escaped, we default to
-    // Utf8 for them
-    if string.starts_with('"') {
-        return DataType::Utf8;
-    }
-    // match regex in a particular order
-    if BOOLEAN_RE.is_match(string) {
-        DataType::Boolean
-    } else if DECIMAL_RE.is_match(string) {
-        DataType::Float64
-    } else if INTEGER_RE.is_match(string) {
-        DataType::Int64
-    } else if DATETIME_RE.is_match(string) {
-        DataType::Date64
-    } else if DATE_RE.is_match(string) {
-        DataType::Date32
-    } else {
-        DataType::Utf8
-    }
-}
-
-/// Infer the schema of a CSV file by reading through the first n records of the file,
-/// with `max_read_records` controlling the maximum number of records to read.
-///
-/// If `max_read_records` is not set, the whole file is read to infer its schema.
-///
-/// Return infered schema and number of records used for inference. This function does not change
-/// reader cursor offset.
-pub fn infer_file_schema<R: Read + Seek>(
-    reader: &mut R,
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<(Schema, usize)> {
-    let saved_offset = reader.seek(SeekFrom::Current(0))?;
-
-    let (schema, records_count) =
-        infer_reader_schema(reader, delimiter, max_read_records, has_header)?;
-
-    // return the reader seek back to the start
-    reader.seek(SeekFrom::Start(saved_offset))?;
-
-    Ok((schema, records_count))
-}
-
-/// Infer schema of CSV records provided by struct that implements `Read` trait.
-///
-/// `max_read_records` controlling the maximum number of records to read. If `max_read_records` is
-/// not set, all records are read to infer the schema.
-///
-/// Return infered schema and number of records used for inference.
-pub fn infer_reader_schema<R: Read>(
-    reader: &mut R,
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<(Schema, usize)> {
-    let mut csv_reader = csv_crate::ReaderBuilder::new()
-        .delimiter(delimiter)
-        .from_reader(reader);
-
-    // get or create header names
-    // when has_header is false, creates default column names with column_ prefix
-    let headers: Vec<String> = if has_header {
-        let headers = &csv_reader.headers()?.clone();
-        headers.iter().map(|s| s.to_string()).collect()
-    } else {
-        let first_record_count = &csv_reader.headers()?.len();
-        (0..*first_record_count)
-            .map(|i| format!("column_{}", i + 1))
-            .collect()
-    };
-
-    let header_length = headers.len();
-    // keep track of inferred field types
-    let mut column_types: Vec<HashSet<DataType>> = vec![HashSet::new(); header_length];
-    // keep track of columns with nulls
-    let mut nulls: Vec<bool> = vec![false; header_length];
-
-    let mut records_count = 0;
-    let mut fields = vec![];
-
-    let mut record = StringRecord::new();
-    let max_records = max_read_records.unwrap_or(usize::MAX);
-    while records_count < max_records {
-        if !csv_reader.read_record(&mut record)? {
-            break;
-        }
-        records_count += 1;
-
-        for i in 0..header_length {
-            if let Some(string) = record.get(i) {
-                if string.is_empty() {
-                    nulls[i] = true;
-                } else {
-                    column_types[i].insert(infer_field_schema(string));
-                }
-            }
-        }
-    }
-
-    // build schema from inference results
-    for i in 0..header_length {
-        let possibilities = &column_types[i];
-        let has_nulls = nulls[i];
-        let field_name = &headers[i];
-
-        // determine data type based on possible types
-        // if there are incompatible types, use DataType::Utf8
-        match possibilities.len() {
-            1 => {
-                for dtype in possibilities.iter() {
-                    fields.push(Field::new(&field_name, dtype.clone(), has_nulls));
-                }
-            }
-            2 => {
-                if possibilities.contains(&DataType::Int64)
-                    && possibilities.contains(&DataType::Float64)
-                {
-                    // we have an integer and double, fall down to double
-                    fields.push(Field::new(&field_name, DataType::Float64, has_nulls));
-                } else {
-                    // default to Utf8 for conflicting datatypes (e.g bool and int)
-                    fields.push(Field::new(&field_name, DataType::Utf8, has_nulls));
-                }
-            }
-            _ => fields.push(Field::new(&field_name, DataType::Utf8, has_nulls)),
-        }
-    }
-
-    Ok((Schema::new(fields), records_count))
-}
-
-/// Infer schema from a list of CSV files by reading through first n records
-/// with `max_read_records` controlling the maximum number of records to read.
-///
-/// Files will be read in the given order untill n records have been reached.
-///
-/// If `max_read_records` is not set, all files will be read fully to infer the schema.
-pub fn infer_schema_from_files(
-    files: &[String],
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<Schema> {
-    let mut schemas = vec![];
-    let mut records_to_read = max_read_records.unwrap_or(std::usize::MAX);
-
-    for fname in files.iter() {
-        let (schema, records_read) = infer_file_schema(
-            &mut File::open(fname)?,
-            delimiter,
-            Some(records_to_read),
-            has_header,
-        )?;
-        if records_read == 0 {
-            continue;
-        }
-        schemas.push(schema.clone());
-        records_to_read -= records_read;
-        if records_to_read == 0 {
-            break;
-        }
-    }
-
-    Schema::try_merge(schemas)
-}
-
-// optional bounds of the reader, of the form (min line, max line).
-type Bounds = Option<(usize, usize)>;
-
-/// CSV file reader
-pub struct Reader<R: Read> {
-    /// Explicit schema for the CSV file
-    schema: SchemaRef,
-    /// Optional projection for which columns to load (zero-based column indices)
-    projection: Option<Vec<usize>>,
-    /// File reader
-    reader: csv_crate::Reader<R>,
-    /// Current line number
-    line_number: usize,
-    /// Maximum number of rows to read
-    end: usize,
-    /// Number of records per batch
-    batch_size: usize,
-    /// Vector that can hold the `StringRecord`s of the batches
-    batch_records: Vec<StringRecord>,
-}
-
-impl<R> fmt::Debug for Reader<R>
-where
-    R: Read,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Reader")
-            .field("schema", &self.schema)
-            .field("projection", &self.projection)
-            .field("line_number", &self.line_number)
-            .finish()
-    }
-}
-
-impl<R: Read> Reader<R> {
-    /// Create a new CsvReader from any value that implements the `Read` trait.
-    ///
-    /// If reading a `File` or an input that supports `std::io::Read` and `std::io::Seek`;
-    /// you can customise the Reader, such as to enable schema inference, use
-    /// `ReaderBuilder`.
-    pub fn new(
-        reader: R,
-        schema: SchemaRef,
-        has_header: bool,
-        delimiter: Option<u8>,
-        batch_size: usize,
-        bounds: Bounds,
-        projection: Option<Vec<usize>>,
-    ) -> Self {
-        Self::from_reader(
-            reader, schema, has_header, delimiter, batch_size, bounds, projection,
-        )
-    }
-
-    /// Returns the schema of the reader, useful for getting the schema without reading
-    /// record batches
-    pub fn schema(&self) -> SchemaRef {
-        match &self.projection {
-            Some(projection) => {
-                let fields = self.schema.fields();
-                let projected_fields: Vec<Field> =
-                    projection.iter().map(|i| fields[*i].clone()).collect();
-
-                Arc::new(Schema::new(projected_fields))
-            }
-            None => self.schema.clone(),
-        }
-    }
-
-    /// Create a new CsvReader from a Reader
-    ///
-    /// This constructor allows you more flexibility in what records are processed by the
-    /// csv reader.
-    pub fn from_reader(
-        reader: R,
-        schema: SchemaRef,
-        has_header: bool,
-        delimiter: Option<u8>,
-        batch_size: usize,
-        bounds: Bounds,
-        projection: Option<Vec<usize>>,
-    ) -> Self {
-        let mut reader_builder = csv_crate::ReaderBuilder::new();
-        reader_builder.has_headers(has_header);
-
-        if let Some(c) = delimiter {
-            reader_builder.delimiter(c);
-        }
-
-        let mut csv_reader = reader_builder.from_reader(reader);
-
-        let (start, end) = match bounds {
-            None => (0, usize::MAX),
-            Some((start, end)) => (start, end),
-        };
-
-        // First we will skip `start` rows
-        // note that this skips by iteration. This is because in general it is not possible
-        // to seek in CSV. However, skiping still saves the burden of creating arrow arrays,
-        // which is a slow operation that scales with the number of columns
-
-        let mut record = ByteRecord::new();
-        // Skip first start items
-        for _ in 0..start {
-            let res = csv_reader.read_byte_record(&mut record);
-            if !res.unwrap_or(false) {
-                break;
-            }
-        }
-
-        // Initialize batch_records with StringRecords so they
-        // can be reused accross batches
-        let mut batch_records = Vec::with_capacity(batch_size);
-        batch_records.resize_with(batch_size, Default::default);
-
-        Self {
-            schema,
-            projection,
-            reader: csv_reader,
-            line_number: if has_header { start + 1 } else { start },
-            batch_size,
-            end,
-            batch_records,
-        }
-    }
-}
-
-impl<R: Read> Iterator for Reader<R> {
-    type Item = Result<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let remaining = self.end - self.line_number;
-
-        let mut read_records = 0;
-        for i in 0..min(self.batch_size, remaining) {
-            match self.reader.read_record(&mut self.batch_records[i]) {
-                Ok(true) => {
-                    read_records += 1;
-                }
-                Ok(false) => break,
-                Err(e) => {
-                    return Some(Err(ArrowError::ParseError(format!(
-                        "Error parsing line {}: {:?}",
-                        self.line_number + i,
-                        e
-                    ))))
-                }
-            }
-        }
-
-        // return early if no data was loaded
-        if read_records == 0 {
-            return None;
-        }
-
-        // parse the batches into a RecordBatch
-        let result = parse(
-            &self.batch_records[..read_records],
-            &self.schema.fields(),
-            Some(self.schema.metadata.clone()),
-            &self.projection,
-            self.line_number,
-        );
-
-        self.line_number += read_records;
-
-        Some(result)
-    }
-}
-
-/// parses a slice of [csv_crate::StringRecord] into a [array::record_batch::RecordBatch].
-fn parse(
-    rows: &[StringRecord],
-    fields: &[Field],
-    metadata: Option<std::collections::HashMap<String, String>>,
-    projection: &Option<Vec<usize>>,
-    line_number: usize,
-) -> Result<RecordBatch> {
-    let projection: Vec<usize> = match projection {
-        Some(ref v) => v.clone(),
-        None => fields.iter().enumerate().map(|(i, _)| i).collect(),
-    };
-
-    let arrays: Result<Vec<ArrayRef>> = projection
-        .iter()
-        .map(|i| {
-            let i = *i;
-            let field = &fields[i];
-            match field.data_type() {
-                &DataType::Boolean => build_boolean_array(line_number, rows, i),
-                &DataType::Int8 => {
-                    build_primitive_array::<Int8Type>(line_number, rows, i)
-                }
-                &DataType::Int16 => {
-                    build_primitive_array::<Int16Type>(line_number, rows, i)
-                }
-                &DataType::Int32 => {
-                    build_primitive_array::<Int32Type>(line_number, rows, i)
-                }
-                &DataType::Int64 => {
-                    build_primitive_array::<Int64Type>(line_number, rows, i)
-                }
-                &DataType::UInt8 => {
-                    build_primitive_array::<UInt8Type>(line_number, rows, i)
-                }
-                &DataType::UInt16 => {
-                    build_primitive_array::<UInt16Type>(line_number, rows, i)
-                }
-                &DataType::UInt32 => {
-                    build_primitive_array::<UInt32Type>(line_number, rows, i)
-                }
-                &DataType::UInt64 => {
-                    build_primitive_array::<UInt64Type>(line_number, rows, i)
-                }
-                &DataType::Float32 => {
-                    build_primitive_array::<Float32Type>(line_number, rows, i)
-                }
-                &DataType::Float64 => {
-                    build_primitive_array::<Float64Type>(line_number, rows, i)
-                }
-                &DataType::Date32 => {
-                    build_primitive_array::<Date32Type>(line_number, rows, i)
-                }
-                &DataType::Date64 => {
-                    build_primitive_array::<Date64Type>(line_number, rows, i)
-                }
-                &DataType::Timestamp(TimeUnit::Microsecond, _) => {
-                    build_primitive_array::<TimestampMicrosecondType>(
-                        line_number,
-                        rows,
-                        i,
-                    )
-                }
-                &DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                    build_primitive_array::<TimestampNanosecondType>(line_number, rows, i)
-                }
-                &DataType::Utf8 => Ok(Arc::new(
-                    rows.iter().map(|row| row.get(i)).collect::<StringArray>(),
-                ) as ArrayRef),
-                other => Err(ArrowError::ParseError(format!(
-                    "Unsupported data type {:?}",
-                    other
-                ))),
-            }
-        })
-        .collect();
-
-    let projected_fields: Vec<Field> =
-        projection.iter().map(|i| fields[*i].clone()).collect();
-
-    let projected_schema = Arc::new(match metadata {
-        None => Schema::new(projected_fields),
-        Some(metadata) => Schema::new_with_metadata(projected_fields, metadata),
-    });
-
-    arrays.and_then(|arr| RecordBatch::try_new(projected_schema, arr))
-}
-
-/// Specialized parsing implementations
-trait Parser: ArrowPrimitiveType {
-    fn parse(string: &str) -> Option<Self::Native> {
-        string.parse::<Self::Native>().ok()
-    }
-}
-
-impl Parser for Float32Type {
-    fn parse(string: &str) -> Option<f32> {
-        lexical_core::parse(string.as_bytes()).ok()
-    }
-}
-impl Parser for Float64Type {
-    fn parse(string: &str) -> Option<f64> {
-        lexical_core::parse(string.as_bytes()).ok()
-    }
-}
-
-impl Parser for UInt64Type {}
-
-impl Parser for UInt32Type {}
-
-impl Parser for UInt16Type {}
-
-impl Parser for UInt8Type {}
-
-impl Parser for Int64Type {}
-
-impl Parser for Int32Type {}
-
-impl Parser for Int16Type {}
-
-impl Parser for Int8Type {}
-
-/// Number of days between 0001-01-01 and 1970-01-01
-const EPOCH_DAYS_FROM_CE: i32 = 719_163;
-
-impl Parser for Date32Type {
-    fn parse(string: &str) -> Option<i32> {
-        use chrono::Datelike;
-
-        match Self::DATA_TYPE {
-            DataType::Date32 => {
-                let date = string.parse::<chrono::NaiveDate>().ok()?;
-                Self::Native::from_i32(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
-            }
-            _ => None,
-        }
-    }
-}
-
-impl Parser for Date64Type {
-    fn parse(string: &str) -> Option<i64> {
-        match Self::DATA_TYPE {
-            DataType::Date64 => {
-                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
-                Self::Native::from_i64(date_time.timestamp_millis())
-            }
-            _ => None,
-        }
-    }
-}
-
-impl Parser for TimestampNanosecondType {
-    fn parse(string: &str) -> Option<i64> {
-        match Self::DATA_TYPE {
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
-                Self::Native::from_i64(date_time.timestamp_nanos())
-            }
-            _ => None,
-        }
-    }
-}
-
-impl Parser for TimestampMicrosecondType {
-    fn parse(string: &str) -> Option<i64> {
-        match Self::DATA_TYPE {
-            DataType::Timestamp(TimeUnit::Microsecond, None) => {
-                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
-                Self::Native::from_i64(date_time.timestamp_nanos() / 1000)
-            }
-            _ => None,
-        }
-    }
-}
-
-fn parse_item<T: Parser>(string: &str) -> Option<T::Native> {
-    T::parse(string)
-}
-
-fn parse_bool(string: &str) -> Option<bool> {
-    if string.eq_ignore_ascii_case("false") {
-        Some(false)
-    } else if string.eq_ignore_ascii_case("true") {
-        Some(true)
-    } else {
-        None
-    }
-}
-
-// parses a specific column (col_idx) into an Arrow Array.
-fn build_primitive_array<T: ArrowPrimitiveType + Parser>(
-    line_number: usize,
-    rows: &[StringRecord],
-    col_idx: usize,
-) -> Result<ArrayRef> {
-    rows.iter()
-        .enumerate()
-        .map(|(row_index, row)| {
-            match row.get(col_idx) {
-                Some(s) => {
-                    if s.is_empty() {
-                        return Ok(None);
-                    }
-
-                    let parsed = parse_item::<T>(s);
-                    match parsed {
-                        Some(e) => Ok(Some(e)),
-                        None => Err(ArrowError::ParseError(format!(
-                            // TODO: we should surface the underlying error here.
-                            "Error while parsing value {} for column {} at line {}",
-                            s,
-                            col_idx,
-                            line_number + row_index
-                        ))),
-                    }
-                }
-                None => Ok(None),
-            }
-        })
-        .collect::<Result<PrimitiveArray<T>>>()
-        .map(|e| Arc::new(e) as ArrayRef)
-}
-
-// parses a specific column (col_idx) into an Arrow Array.
-fn build_boolean_array(
-    line_number: usize,
-    rows: &[StringRecord],
-    col_idx: usize,
-) -> Result<ArrayRef> {
-    rows.iter()
-        .enumerate()
-        .map(|(row_index, row)| {
-            match row.get(col_idx) {
-                Some(s) => {
-                    if s.is_empty() {
-                        return Ok(None);
-                    }
-
-                    let parsed = parse_bool(s);
-                    match parsed {
-                        Some(e) => Ok(Some(e)),
-                        None => Err(ArrowError::ParseError(format!(
-                            // TODO: we should surface the underlying error here.
-                            "Error while parsing value {} for column {} at line {}",
-                            s,
-                            col_idx,
-                            line_number + row_index
-                        ))),
-                    }
-                }
-                None => Ok(None),
-            }
-        })
-        .collect::<Result<BooleanArray>>()
-        .map(|e| Arc::new(e) as ArrayRef)
-}
-
-/// CSV file reader builder
-#[derive(Debug)]
-pub struct ReaderBuilder {
-    /// Optional schema for the CSV file
-    ///
-    /// If the schema is not supplied, the reader will try to infer the schema
-    /// based on the CSV structure.
-    schema: Option<SchemaRef>,
-    /// Whether the file has headers or not
-    ///
-    /// If schema inference is run on a file with no headers, default column names
-    /// are created.
-    has_header: bool,
-    /// An optional column delimiter. Defaults to `b','`
-    delimiter: Option<u8>,
-    /// Optional maximum number of records to read during schema inference
-    ///
-    /// If a number is not provided, all the records are read.
-    max_records: Option<usize>,
-    /// Batch size (number of records to load each time)
-    ///
-    /// The default batch size when using the `ReaderBuilder` is 1024 records
-    batch_size: usize,
-    /// The bounds over which to scan the reader. `None` starts from 0 and runs until EOF.
-    bounds: Bounds,
-    /// Optional projection for which columns to load (zero-based column indices)
-    projection: Option<Vec<usize>>,
-}
-
-impl Default for ReaderBuilder {
-    fn default() -> Self {
-        Self {
-            schema: None,
-            has_header: false,
-            delimiter: None,
-            max_records: None,
-            batch_size: 1024,
-            bounds: None,
-            projection: None,
-        }
-    }
-}
-
-impl ReaderBuilder {
-    /// Create a new builder for configuring CSV parsing options.
-    ///
-    /// To convert a builder into a reader, call `ReaderBuilder::build`
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// extern crate arrow;
-    ///
-    /// use arrow::csv;
-    /// use std::fs::File;
-    ///
-    /// fn example() -> csv::Reader<File> {
-    ///     let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
-    ///
-    ///     // create a builder, inferring the schema with the first 100 records
-    ///     let builder = csv::ReaderBuilder::new().infer_schema(Some(100));
-    ///
-    ///     let reader = builder.build(file).unwrap();
-    ///
-    ///     reader
-    /// }
-    /// ```
-    pub fn new() -> ReaderBuilder {
-        ReaderBuilder::default()
-    }
-
-    /// Set the CSV file's schema
-    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
-        self.schema = Some(schema);
-        self
-    }
-
-    /// Set whether the CSV file has headers
-    pub fn has_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
-        self
-    }
-
-    /// Set the CSV file's column delimiter as a byte character
-    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = Some(delimiter);
-        self
-    }
-
-    /// Set the CSV reader to infer the schema of the file
-    pub fn infer_schema(mut self, max_records: Option<usize>) -> Self {
-        // remove any schema that is set
-        self.schema = None;
-        self.max_records = max_records;
-        self
-    }
-
-    /// Set the batch size (number of records to load at one time)
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    /// Set the reader's column projection
-    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
-        self.projection = Some(projection);
-        self
-    }
-
-    /// Create a new `Reader` from the `ReaderBuilder`
-    pub fn build<R: Read + Seek>(self, mut reader: R) -> Result<Reader<R>> {
-        // check if schema should be inferred
-        let delimiter = self.delimiter.unwrap_or(b',');
-        let schema = match self.schema {
-            Some(schema) => schema,
-            None => {
-                let (inferred_schema, _) = infer_file_schema(
-                    &mut reader,
-                    delimiter,
-                    self.max_records,
-                    self.has_header,
-                )?;
-
-                Arc::new(inferred_schema)
-            }
-        };
-        Ok(Reader::from_reader(
-            reader,
-            schema,
-            self.has_header,
-            self.delimiter,
-            self.batch_size,
-            None,
-            self.projection.clone(),
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-    use std::io::{Cursor, Write};
-    use tempfile::NamedTempFile;
-
-    use crate::array::*;
-    use crate::datatypes::Field;
-
-    #[test]
-    fn test_csv() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(
-            file,
-            Arc::new(schema.clone()),
-            false,
-            None,
-            1024,
-            None,
-            None,
-        );
-        assert_eq!(Arc::new(schema), csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        // access data from a primitive array
-        let lat = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(57.653484 - lat.value(0) < f64::EPSILON);
-
-        // access data from a string array (ListArray<u8>)
-        let city = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-
-        assert_eq!("Aberdeen, Aberdeen City, UK", city.value(13));
-    }
-
-    #[test]
-    fn test_csv_schema_metadata() {
-        let mut metadata = std::collections::HashMap::new();
-        metadata.insert("foo".to_owned(), "bar".to_owned());
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new("city", DataType::Utf8, false),
-                Field::new("lat", DataType::Float64, false),
-                Field::new("lng", DataType::Float64, false),
-            ],
-            metadata.clone(),
-        );
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(
-            file,
-            Arc::new(schema.clone()),
-            false,
-            None,
-            1024,
-            None,
-            None,
-        );
-        assert_eq!(Arc::new(schema), csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        assert_eq!(&metadata, batch.schema().metadata());
-    }
-
-    #[test]
-    fn test_csv_from_buf_reader() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file_with_headers =
-            File::open("test/data/uk_cities_with_headers.csv").unwrap();
-        let file_without_headers = File::open("test/data/uk_cities.csv").unwrap();
-        let both_files = file_with_headers
-            .chain(Cursor::new("\n".to_string()))
-            .chain(file_without_headers);
-        let mut csv = Reader::from_reader(
-            both_files,
-            Arc::new(schema),
-            true,
-            None,
-            1024,
-            None,
-            None,
-        );
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(74, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-    }
-
-    #[test]
-    fn test_csv_with_schema_inference() {
-        let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
-
-        let builder = ReaderBuilder::new().has_header(true).infer_schema(None);
-
-        let mut csv = builder.build(file).unwrap();
-        let expected_schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-        assert_eq!(Arc::new(expected_schema), csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        // access data from a primitive array
-        let lat = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(57.653484 - lat.value(0) < f64::EPSILON);
-
-        // access data from a string array (ListArray<u8>)
-        let city = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-
-        assert_eq!("Aberdeen, Aberdeen City, UK", city.value(13));
-    }
-
-    #[test]
-    fn test_csv_with_schema_inference_no_headers() {
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let builder = ReaderBuilder::new().infer_schema(None);
-
-        let mut csv = builder.build(file).unwrap();
-
-        // csv field names should be 'column_{number}'
-        let schema = csv.schema();
-        assert_eq!("column_1", schema.field(0).name());
-        assert_eq!("column_2", schema.field(1).name());
-        assert_eq!("column_3", schema.field(2).name());
-        let batch = csv.next().unwrap().unwrap();
-        let batch_schema = batch.schema();
-
-        assert_eq!(schema, batch_schema);
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        // access data from a primitive array
-        let lat = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(57.653484 - lat.value(0) < f64::EPSILON);
-
-        // access data from a string array (ListArray<u8>)
-        let city = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-
-        assert_eq!("Aberdeen, Aberdeen City, UK", city.value(13));
-    }
-
-    #[test]
-    fn test_csv_with_projection() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(
-            file,
-            Arc::new(schema),
-            false,
-            None,
-            1024,
-            None,
-            Some(vec![0, 1]),
-        );
-        let projected_schema = Arc::new(Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-        ]));
-        assert_eq!(projected_schema, csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(projected_schema, batch.schema());
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(2, batch.num_columns());
-    }
-
-    #[test]
-    fn test_nulls() {
-        let schema = Schema::new(vec![
-            Field::new("c_int", DataType::UInt64, false),
-            Field::new("c_float", DataType::Float32, false),
-            Field::new("c_string", DataType::Utf8, false),
-        ]);
-
-        let file = File::open("test/data/null_test.csv").unwrap();
-
-        let mut csv = Reader::new(file, Arc::new(schema), true, None, 1024, None, None);
-        let batch = csv.next().unwrap().unwrap();
-
-        assert_eq!(false, batch.column(1).is_null(0));
-        assert_eq!(false, batch.column(1).is_null(1));
-        assert_eq!(true, batch.column(1).is_null(2));
-        assert_eq!(false, batch.column(1).is_null(3));
-        assert_eq!(false, batch.column(1).is_null(4));
-    }
-
-    #[test]
-    fn test_nulls_with_inference() {
-        let file = File::open("test/data/various_types.csv").unwrap();
-
-        let builder = ReaderBuilder::new()
-            .infer_schema(None)
-            .has_header(true)
-            .with_delimiter(b'|')
-            .with_batch_size(512)
-            .with_projection(vec![0, 1, 2, 3, 4, 5]);
-
-        let mut csv = builder.build(file).unwrap();
-        let batch = csv.next().unwrap().unwrap();
-
-        assert_eq!(5, batch.num_rows());
-        assert_eq!(6, batch.num_columns());
-
-        let schema = batch.schema();
-
-        assert_eq!(&DataType::Int64, schema.field(0).data_type());
-        assert_eq!(&DataType::Float64, schema.field(1).data_type());
-        assert_eq!(&DataType::Float64, schema.field(2).data_type());
-        assert_eq!(&DataType::Boolean, schema.field(3).data_type());
-        assert_eq!(&DataType::Date32, schema.field(4).data_type());
-        assert_eq!(&DataType::Date64, schema.field(5).data_type());
-
-        let names: Vec<&str> =
-            schema.fields().iter().map(|x| x.name().as_str()).collect();
-        assert_eq!(
-            names,
-            vec![
-                "c_int",
-                "c_float",
-                "c_string",
-                "c_bool",
-                "c_date",
-                "c_datetime"
-            ]
-        );
-
-        assert_eq!(false, schema.field(0).is_nullable());
-        assert_eq!(true, schema.field(1).is_nullable());
-        assert_eq!(true, schema.field(2).is_nullable());
-        assert_eq!(false, schema.field(3).is_nullable());
-        assert_eq!(true, schema.field(4).is_nullable());
-        assert_eq!(true, schema.field(5).is_nullable());
-
-        assert_eq!(false, batch.column(1).is_null(0));
-        assert_eq!(false, batch.column(1).is_null(1));
-        assert_eq!(true, batch.column(1).is_null(2));
-        assert_eq!(false, batch.column(1).is_null(3));
-        assert_eq!(false, batch.column(1).is_null(4));
-    }
-
-    #[test]
-    fn test_parse_invalid_csv() {
-        let file = File::open("test/data/various_types_invalid.csv").unwrap();
-
-        let schema = Schema::new(vec![
-            Field::new("c_int", DataType::UInt64, false),
-            Field::new("c_float", DataType::Float32, false),
-            Field::new("c_string", DataType::Utf8, false),
-            Field::new("c_bool", DataType::Boolean, false),
-        ]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .has_header(true)
-            .with_delimiter(b'|')
-            .with_batch_size(512)
-            .with_projection(vec![0, 1, 2, 3]);
-
-        let mut csv = builder.build(file).unwrap();
-        match csv.next() {
-            Some(e) => match e {
-                Err(e) => assert_eq!(
-                    "ParseError(\"Error while parsing value 4.x4 for column 1 at line 4\")",
-                    format!("{:?}", e)
-                ),
-                Ok(_) => panic!("should have failed"),
-            },
-            None => panic!("should have failed"),
-        }
-    }
-
-    #[test]
-    fn test_infer_field_schema() {
-        assert_eq!(infer_field_schema("A"), DataType::Utf8);
-        assert_eq!(infer_field_schema("\"123\""), DataType::Utf8);
-        assert_eq!(infer_field_schema("10"), DataType::Int64);
-        assert_eq!(infer_field_schema("10.2"), DataType::Float64);
-        assert_eq!(infer_field_schema("true"), DataType::Boolean);
-        assert_eq!(infer_field_schema("false"), DataType::Boolean);
-        assert_eq!(infer_field_schema("2020-11-08"), DataType::Date32);
-        assert_eq!(infer_field_schema("2020-11-08T14:20:01"), DataType::Date64);
-    }
-
-    #[test]
-    fn parse_date32() {
-        assert_eq!(parse_item::<Date32Type>("1970-01-01").unwrap(), 0);
-        assert_eq!(parse_item::<Date32Type>("2020-03-15").unwrap(), 18336);
-        assert_eq!(parse_item::<Date32Type>("1945-05-08").unwrap(), -9004);
-    }
-
-    #[test]
-    fn parse_date64() {
-        assert_eq!(parse_item::<Date64Type>("1970-01-01T00:00:00").unwrap(), 0);
-        assert_eq!(
-            parse_item::<Date64Type>("2018-11-13T17:11:10").unwrap(),
-            1542129070000
-        );
-        assert_eq!(
-            parse_item::<Date64Type>("2018-11-13T17:11:10.011").unwrap(),
-            1542129070011
-        );
-        assert_eq!(
-            parse_item::<Date64Type>("1900-02-28T12:34:56").unwrap(),
-            -2203932304000
-        );
-    }
-
-    #[test]
-    fn test_infer_schema_from_multiple_files() -> Result<()> {
-        let mut csv1 = NamedTempFile::new()?;
-        let mut csv2 = NamedTempFile::new()?;
-        let csv3 = NamedTempFile::new()?; // empty csv file should be skipped
-        let mut csv4 = NamedTempFile::new()?;
-        writeln!(csv1, "c1,c2,c3")?;
-        writeln!(csv1, "1,\"foo\",0.5")?;
-        writeln!(csv1, "3,\"bar\",1")?;
-        // reading csv2 will set c2 to optional
-        writeln!(csv2, "c1,c2,c3,c4")?;
-        writeln!(csv2, "10,,3.14,true")?;
-        // reading csv4 will set c3 to optional
-        writeln!(csv4, "c1,c2,c3")?;
-        writeln!(csv4, "10,\"foo\",")?;
-
-        let schema = infer_schema_from_files(
-            &[
-                csv3.path().to_str().unwrap().to_string(),
-                csv1.path().to_str().unwrap().to_string(),
-                csv2.path().to_str().unwrap().to_string(),
-                csv4.path().to_str().unwrap().to_string(),
-            ],
-            b',',
-            Some(3), // only csv1 and csv2 should be read
-            true,
-        )?;
-
-        assert_eq!(schema.fields().len(), 4);
-        assert_eq!(false, schema.field(0).is_nullable());
-        assert_eq!(true, schema.field(1).is_nullable());
-        assert_eq!(false, schema.field(2).is_nullable());
-        assert_eq!(false, schema.field(3).is_nullable());
-
-        assert_eq!(&DataType::Int64, schema.field(0).data_type());
-        assert_eq!(&DataType::Utf8, schema.field(1).data_type());
-        assert_eq!(&DataType::Float64, schema.field(2).data_type());
-        assert_eq!(&DataType::Boolean, schema.field(3).data_type());
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_bounded() {
-        let schema = Schema::new(vec![Field::new("int", DataType::UInt32, false)]);
-        let data = vec![
-            vec!["0"],
-            vec!["1"],
-            vec!["2"],
-            vec!["3"],
-            vec!["4"],
-            vec!["5"],
-            vec!["6"],
-        ];
-
-        let data = data
-            .iter()
-            .map(|x| x.join(","))
-            .collect::<Vec<_>>()
-            .join("\n");
-        let data = data.as_bytes();
-
-        let reader = std::io::Cursor::new(data);
-
-        let mut csv = Reader::new(
-            reader,
-            Arc::new(schema),
-            false,
-            None,
-            2,
-            // starting at row 2 and up to row 6.
-            Some((2, 6)),
-            Some(vec![0]),
-        );
-
-        let batch = csv.next().unwrap().unwrap();
-        let a = batch.column(0);
-        let a = a.as_any().downcast_ref::<UInt32Array>().unwrap();
-        assert_eq!(a, &UInt32Array::from(vec![2, 3]));
-
-        let batch = csv.next().unwrap().unwrap();
-        let a = batch.column(0);
-        let a = a.as_any().downcast_ref::<UInt32Array>().unwrap();
-        assert_eq!(a, &UInt32Array::from(vec![4, 5]));
-
-        assert!(csv.next().is_none());
-    }
-
-    #[test]
-    fn test_parsing_bool() {
-        // Encode the expected behavior of boolean parsing
-        assert_eq!(Some(true), parse_bool("true"));
-        assert_eq!(Some(true), parse_bool("tRUe"));
-        assert_eq!(Some(true), parse_bool("True"));
-        assert_eq!(Some(true), parse_bool("TRUE"));
-        assert_eq!(None, parse_bool("t"));
-        assert_eq!(None, parse_bool("T"));
-        assert_eq!(None, parse_bool(""));
-
-        assert_eq!(Some(false), parse_bool("false"));
-        assert_eq!(Some(false), parse_bool("fALse"));
-        assert_eq!(Some(false), parse_bool("False"));
-        assert_eq!(Some(false), parse_bool("FALSE"));
-        assert_eq!(None, parse_bool("f"));
-        assert_eq!(None, parse_bool("F"));
-        assert_eq!(None, parse_bool(""));
-    }
-
-    #[test]
-    fn test_parsing_float() {
-        assert_eq!(Some(12.34), parse_item::<Float64Type>("12.34"));
-        assert_eq!(Some(-12.34), parse_item::<Float64Type>("-12.34"));
-        assert_eq!(Some(12.0), parse_item::<Float64Type>("12"));
-        assert_eq!(Some(0.0), parse_item::<Float64Type>("0"));
-        assert!(parse_item::<Float64Type>("nan").unwrap().is_nan());
-        assert!(parse_item::<Float64Type>("NaN").unwrap().is_nan());
-        assert!(parse_item::<Float64Type>("inf").unwrap().is_infinite());
-        assert!(parse_item::<Float64Type>("inf").unwrap().is_sign_positive());
-        assert!(parse_item::<Float64Type>("-inf").unwrap().is_infinite());
-        assert!(parse_item::<Float64Type>("-inf")
-            .unwrap()
-            .is_sign_negative());
-        assert_eq!(None, parse_item::<Float64Type>(""));
-        assert_eq!(None, parse_item::<Float64Type>("dd"));
-        assert_eq!(None, parse_item::<Float64Type>("12.34.56"));
-    }
-}
diff --git a/rust/arrow/src/csv/writer.rs b/rust/arrow/src/csv/writer.rs
deleted file mode 100644
index e9d8565b2a5..00000000000
--- a/rust/arrow/src/csv/writer.rs
+++ /dev/null
@@ -1,651 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CSV Writer
-//!
-//! This CSV writer allows Arrow data (in record batches) to be written as CSV files.
-//! The writer does not support writing `ListArray` and `StructArray`.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::*;
-//! use arrow::csv;
-//! use arrow::datatypes::*;
-//! use arrow::record_batch::RecordBatch;
-//! use arrow::util::test_util::get_temp_file;
-//! use std::fs::File;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("c1", DataType::Utf8, false),
-//!     Field::new("c2", DataType::Float64, true),
-//!     Field::new("c3", DataType::UInt32, false),
-//!     Field::new("c3", DataType::Boolean, true),
-//! ]);
-//! let c1 = StringArray::from(vec![
-//!     "Lorem ipsum dolor sit amet",
-//!     "consectetur adipiscing elit",
-//!     "sed do eiusmod tempor",
-//! ]);
-//! let c2 = PrimitiveArray::<Float64Type>::from(vec![
-//!     Some(123.564532),
-//!     None,
-//!     Some(-556132.25),
-//! ]);
-//! let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-//! let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-//!
-//! let batch = RecordBatch::try_new(
-//!     Arc::new(schema),
-//!     vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
-//! )
-//! .unwrap();
-//!
-//! let file = get_temp_file("out.csv", &[]);
-//!
-//! let mut writer = csv::Writer::new(file);
-//! let batches = vec![&batch, &batch];
-//! for batch in batches {
-//!     writer.write(batch).unwrap();
-//! }
-//! ```
-
-use csv as csv_crate;
-
-use std::io::Write;
-
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::record_batch::RecordBatch;
-use crate::{array::*, util::serialization::lexical_to_string};
-const DEFAULT_DATE_FORMAT: &str = "%F";
-const DEFAULT_TIME_FORMAT: &str = "%T";
-const DEFAULT_TIMESTAMP_FORMAT: &str = "%FT%H:%M:%S.%9f";
-
-fn write_primitive_value<T>(array: &ArrayRef, i: usize) -> String
-where
-    T: ArrowNumericType,
-    T::Native: lexical_core::ToLexical,
-{
-    let c = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    lexical_to_string(c.value(i))
-}
-
-/// A CSV writer
-#[derive(Debug)]
-pub struct Writer<W: Write> {
-    /// The object to write to
-    writer: csv_crate::Writer<W>,
-    /// Column delimiter. Defaults to `b','`
-    delimiter: u8,
-    /// Whether file should be written with headers. Defaults to `true`
-    has_headers: bool,
-    /// The date format for date arrays
-    date_format: String,
-    /// The datetime format for datetime arrays
-    datetime_format: String,
-    /// The timestamp format for timestamp arrays
-    timestamp_format: String,
-    /// The time format for time arrays
-    time_format: String,
-    /// Is the beginning-of-writer
-    beginning: bool,
-}
-
-impl<W: Write> Writer<W> {
-    /// Create a new CsvWriter from a writable object, with default options
-    pub fn new(writer: W) -> Self {
-        let delimiter = b',';
-        let mut builder = csv_crate::WriterBuilder::new();
-        let writer = builder.delimiter(delimiter).from_writer(writer);
-        Writer {
-            writer,
-            delimiter,
-            has_headers: true,
-            date_format: DEFAULT_DATE_FORMAT.to_string(),
-            datetime_format: DEFAULT_TIMESTAMP_FORMAT.to_string(),
-            time_format: DEFAULT_TIME_FORMAT.to_string(),
-            timestamp_format: DEFAULT_TIMESTAMP_FORMAT.to_string(),
-            beginning: true,
-        }
-    }
-
-    /// Convert a record to a string vector
-    fn convert(
-        &self,
-        batch: &RecordBatch,
-        row_index: usize,
-        buffer: &mut [String],
-    ) -> Result<()> {
-        // TODO: it'd be more efficient if we could create `record: Vec<&[u8]>
-        for (col_index, item) in buffer.iter_mut().enumerate() {
-            let col = batch.column(col_index);
-            if col.is_null(row_index) {
-                // write an empty value
-                *item = "".to_string();
-                continue;
-            }
-            let string = match col.data_type() {
-                DataType::Float64 => write_primitive_value::<Float64Type>(col, row_index),
-                DataType::Float32 => write_primitive_value::<Float32Type>(col, row_index),
-                DataType::Int8 => write_primitive_value::<Int8Type>(col, row_index),
-                DataType::Int16 => write_primitive_value::<Int16Type>(col, row_index),
-                DataType::Int32 => write_primitive_value::<Int32Type>(col, row_index),
-                DataType::Int64 => write_primitive_value::<Int64Type>(col, row_index),
-                DataType::UInt8 => write_primitive_value::<UInt8Type>(col, row_index),
-                DataType::UInt16 => write_primitive_value::<UInt16Type>(col, row_index),
-                DataType::UInt32 => write_primitive_value::<UInt32Type>(col, row_index),
-                DataType::UInt64 => write_primitive_value::<UInt64Type>(col, row_index),
-                DataType::Boolean => {
-                    let c = col.as_any().downcast_ref::<BooleanArray>().unwrap();
-                    c.value(row_index).to_string()
-                }
-                DataType::Utf8 => {
-                    let c = col.as_any().downcast_ref::<StringArray>().unwrap();
-                    c.value(row_index).to_owned()
-                }
-                DataType::LargeUtf8 => {
-                    let c = col.as_any().downcast_ref::<LargeStringArray>().unwrap();
-                    c.value(row_index).to_owned()
-                }
-                DataType::Date32 => {
-                    let c = col.as_any().downcast_ref::<Date32Array>().unwrap();
-                    c.value_as_date(row_index)
-                        .unwrap()
-                        .format(&self.date_format)
-                        .to_string()
-                }
-                DataType::Date64 => {
-                    let c = col.as_any().downcast_ref::<Date64Array>().unwrap();
-                    c.value_as_datetime(row_index)
-                        .unwrap()
-                        .format(&self.datetime_format)
-                        .to_string()
-                }
-                DataType::Time32(TimeUnit::Second) => {
-                    let c = col.as_any().downcast_ref::<Time32SecondArray>().unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Time32(TimeUnit::Millisecond) => {
-                    let c = col
-                        .as_any()
-                        .downcast_ref::<Time32MillisecondArray>()
-                        .unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Time64(TimeUnit::Microsecond) => {
-                    let c = col
-                        .as_any()
-                        .downcast_ref::<Time64MicrosecondArray>()
-                        .unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Time64(TimeUnit::Nanosecond) => {
-                    let c = col
-                        .as_any()
-                        .downcast_ref::<Time64NanosecondArray>()
-                        .unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Timestamp(time_unit, _) => {
-                    use TimeUnit::*;
-                    let datetime = match time_unit {
-                        Second => col
-                            .as_any()
-                            .downcast_ref::<TimestampSecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                        Millisecond => col
-                            .as_any()
-                            .downcast_ref::<TimestampMillisecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                        Microsecond => col
-                            .as_any()
-                            .downcast_ref::<TimestampMicrosecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                        Nanosecond => col
-                            .as_any()
-                            .downcast_ref::<TimestampNanosecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                    };
-                    format!("{}", datetime.format(&self.timestamp_format))
-                }
-                t => {
-                    // List and Struct arrays not supported by the writer, any
-                    // other type needs to be implemented
-                    return Err(ArrowError::CsvError(format!(
-                        "CSV Writer does not support {:?} data type",
-                        t
-                    )));
-                }
-            };
-            *item = string;
-        }
-        Ok(())
-    }
-
-    /// Write a vector of record batches to a writable object
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        let num_columns = batch.num_columns();
-        if self.beginning {
-            if self.has_headers {
-                let mut headers: Vec<String> = Vec::with_capacity(num_columns);
-                batch
-                    .schema()
-                    .fields()
-                    .iter()
-                    .for_each(|field| headers.push(field.name().to_string()));
-                self.writer.write_record(&headers[..])?;
-            }
-            self.beginning = false;
-        }
-
-        let mut buffer = vec!["".to_string(); batch.num_columns()];
-
-        for row_index in 0..batch.num_rows() {
-            self.convert(batch, row_index, &mut buffer)?;
-            self.writer.write_record(&buffer)?;
-        }
-        self.writer.flush()?;
-
-        Ok(())
-    }
-}
-
-/// A CSV writer builder
-#[derive(Debug)]
-pub struct WriterBuilder {
-    /// Optional column delimiter. Defaults to `b','`
-    delimiter: Option<u8>,
-    /// Whether to write column names as file headers. Defaults to `true`
-    has_headers: bool,
-    /// Optional date format for date arrays
-    date_format: Option<String>,
-    /// Optional datetime format for datetime arrays
-    datetime_format: Option<String>,
-    /// Optional timestamp format for timestamp arrays
-    timestamp_format: Option<String>,
-    /// Optional time format for time arrays
-    time_format: Option<String>,
-}
-
-impl Default for WriterBuilder {
-    fn default() -> Self {
-        Self {
-            has_headers: true,
-            delimiter: None,
-            date_format: Some(DEFAULT_DATE_FORMAT.to_string()),
-            datetime_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            time_format: Some(DEFAULT_TIME_FORMAT.to_string()),
-            timestamp_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-        }
-    }
-}
-
-impl WriterBuilder {
-    /// Create a new builder for configuring CSV writing options.
-    ///
-    /// To convert a builder into a writer, call `WriterBuilder::build`
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// extern crate arrow;
-    ///
-    /// use arrow::csv;
-    /// use std::fs::File;
-    ///
-    /// fn example() -> csv::Writer<File> {
-    ///     let file = File::create("target/out.csv").unwrap();
-    ///
-    ///     // create a builder that doesn't write headers
-    ///     let builder = csv::WriterBuilder::new().has_headers(false);
-    ///     let writer = builder.build(file);
-    ///
-    ///     writer
-    /// }
-    /// ```
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set whether to write headers
-    pub fn has_headers(mut self, has_headers: bool) -> Self {
-        self.has_headers = has_headers;
-        self
-    }
-
-    /// Set the CSV file's column delimiter as a byte character
-    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = Some(delimiter);
-        self
-    }
-
-    /// Set the CSV file's date format
-    pub fn with_date_format(mut self, format: String) -> Self {
-        self.date_format = Some(format);
-        self
-    }
-
-    /// Set the CSV file's time format
-    pub fn with_time_format(mut self, format: String) -> Self {
-        self.time_format = Some(format);
-        self
-    }
-
-    /// Set the CSV file's timestamp format
-    pub fn with_timestamp_format(mut self, format: String) -> Self {
-        self.timestamp_format = Some(format);
-        self
-    }
-
-    /// Create a new `Writer`
-    pub fn build<W: Write>(self, writer: W) -> Writer<W> {
-        let delimiter = self.delimiter.unwrap_or(b',');
-        let mut builder = csv_crate::WriterBuilder::new();
-        let writer = builder.delimiter(delimiter).from_writer(writer);
-        Writer {
-            writer,
-            delimiter,
-            has_headers: self.has_headers,
-            date_format: self
-                .date_format
-                .unwrap_or_else(|| DEFAULT_DATE_FORMAT.to_string()),
-            datetime_format: self
-                .datetime_format
-                .unwrap_or_else(|| DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            time_format: self
-                .time_format
-                .unwrap_or_else(|| DEFAULT_TIME_FORMAT.to_string()),
-            timestamp_format: self
-                .timestamp_format
-                .unwrap_or_else(|| DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            beginning: true,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::csv::Reader;
-    use crate::datatypes::{Field, Schema};
-    use crate::util::string_writer::StringWriter;
-    use crate::util::test_util::get_temp_file;
-    use std::fs::File;
-    use std::io::{Cursor, Read};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_write_csv() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::UInt32, false),
-            Field::new("c4", DataType::Boolean, true),
-            Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true),
-            Field::new("c6", DataType::Time32(TimeUnit::Second), false),
-        ]);
-
-        let c1 = StringArray::from(vec![
-            "Lorem ipsum dolor sit amet",
-            "consectetur adipiscing elit",
-            "sed do eiusmod tempor",
-        ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
-        let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-        let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let c5 = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(1555584887378), Some(1555555555555)],
-            None,
-        );
-        let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(c1),
-                Arc::new(c2),
-                Arc::new(c3),
-                Arc::new(c4),
-                Arc::new(c5),
-                Arc::new(c6),
-            ],
-        )
-        .unwrap();
-
-        let file = get_temp_file("columns.csv", &[]);
-
-        let mut writer = Writer::new(file);
-        let batches = vec![&batch, &batch];
-        for batch in batches {
-            writer.write(batch).unwrap();
-        }
-        // check that file was written successfully
-        let mut file = File::open("target/debug/testdata/columns.csv").unwrap();
-        let mut buffer: Vec<u8> = vec![];
-        file.read_to_end(&mut buffer).unwrap();
-
-        assert_eq!(
-            r#"c1,c2,c3,c4,c5,c6
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-"#
-            .to_string(),
-            String::from_utf8(buffer).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_write_csv_custom_options() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::UInt32, false),
-            Field::new("c4", DataType::Boolean, true),
-            Field::new("c6", DataType::Time32(TimeUnit::Second), false),
-        ]);
-
-        let c1 = StringArray::from(vec![
-            "Lorem ipsum dolor sit amet",
-            "consectetur adipiscing elit",
-            "sed do eiusmod tempor",
-        ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
-        let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-        let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(c1),
-                Arc::new(c2),
-                Arc::new(c3),
-                Arc::new(c4),
-                Arc::new(c6),
-            ],
-        )
-        .unwrap();
-
-        let file = get_temp_file("custom_options.csv", &[]);
-
-        let builder = WriterBuilder::new()
-            .has_headers(false)
-            .with_delimiter(b'|')
-            .with_time_format("%r".to_string());
-        let mut writer = builder.build(file);
-        let batches = vec![&batch];
-        for batch in batches {
-            writer.write(batch).unwrap();
-        }
-
-        // check that file was written successfully
-        let mut file = File::open("target/debug/testdata/custom_options.csv").unwrap();
-        let mut buffer: Vec<u8> = vec![];
-        file.read_to_end(&mut buffer).unwrap();
-
-        assert_eq!(
-            "Lorem ipsum dolor sit amet|123.564532|3|true|12:20:34 AM\nconsectetur adipiscing elit||2|false|06:51:20 AM\nsed do eiusmod tempor|-556132.25|1||11:46:03 PM\n"
-            .to_string(),
-            String::from_utf8(buffer).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_export_csv_string() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::UInt32, false),
-            Field::new("c4", DataType::Boolean, true),
-            Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true),
-            Field::new("c6", DataType::Time32(TimeUnit::Second), false),
-        ]);
-
-        let c1 = StringArray::from(vec![
-            "Lorem ipsum dolor sit amet",
-            "consectetur adipiscing elit",
-            "sed do eiusmod tempor",
-        ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
-        let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-        let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let c5 = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(1555584887378), Some(1555555555555)],
-            None,
-        );
-        let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(c1),
-                Arc::new(c2),
-                Arc::new(c3),
-                Arc::new(c4),
-                Arc::new(c5),
-                Arc::new(c6),
-            ],
-        )
-        .unwrap();
-
-        let sw = StringWriter::new();
-        let mut writer = Writer::new(sw);
-        let batches = vec![&batch, &batch];
-        for batch in batches {
-            writer.write(batch).unwrap();
-        }
-
-        let left = "c1,c2,c3,c4,c5,c6
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03\n";
-        let right = writer.writer.into_inner().map(|s| s.to_string());
-        assert_eq!(Some(left.to_string()), right.ok());
-    }
-
-    #[test]
-    fn test_conversion_consistency() {
-        // test if we can serialize and deserialize whilst retaining the same type information/ precision
-
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Date32, false),
-            Field::new("c2", DataType::Date64, false),
-        ]);
-
-        let c1 = Date32Array::from(vec![3, 2, 1]);
-        let c2 = Date64Array::from(vec![3, 2, 1]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(c1), Arc::new(c2)],
-        )
-        .unwrap();
-
-        let builder = WriterBuilder::new().has_headers(false);
-
-        let mut buf: Cursor<Vec<u8>> = Default::default();
-        // drop the writer early to release the borrow.
-        {
-            let mut writer = builder.build(&mut buf);
-            writer.write(&batch).unwrap();
-        }
-        buf.set_position(0);
-
-        let mut reader = Reader::new(
-            buf,
-            Arc::new(schema),
-            false,
-            None,
-            3,
-            // starting at row 2 and up to row 6.
-            None,
-            None,
-        );
-        let rb = reader.next().unwrap().unwrap();
-        let c1 = rb.column(0).as_any().downcast_ref::<Date32Array>().unwrap();
-        let c2 = rb.column(1).as_any().downcast_ref::<Date64Array>().unwrap();
-
-        let actual = c1.into_iter().collect::<Vec<_>>();
-        let expected = vec![Some(3), Some(2), Some(1)];
-        assert_eq!(actual, expected);
-        let actual = c2.into_iter().collect::<Vec<_>>();
-        let expected = vec![Some(3), Some(2), Some(1)];
-        assert_eq!(actual, expected);
-    }
-}
diff --git a/rust/arrow/src/datatypes/datatype.rs b/rust/arrow/src/datatypes/datatype.rs
deleted file mode 100644
index 122cbdd5e47..00000000000
--- a/rust/arrow/src/datatypes/datatype.rs
+++ /dev/null
@@ -1,477 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt;
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{json, Value, Value::String as VString};
-
-use crate::error::{ArrowError, Result};
-
-use super::Field;
-
-/// The set of datatypes that are supported by this implementation of Apache Arrow.
-///
-/// The Arrow specification on data types includes some more types.
-/// See also [`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs)
-/// for Arrow's specification.
-///
-/// The variants of this enum include primitive fixed size types as well as parametric or
-/// nested types.
-/// Currently the Rust implementation supports the following  nested types:
-///  - `List<T>`
-///  - `Struct<T, U, V, ...>`
-///
-/// Nested types can themselves be nested within other arrays.
-/// For more information on these types please see
-/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout).
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum DataType {
-    /// Null type
-    Null,
-    /// A boolean datatype representing the values `true` and `false`.
-    Boolean,
-    /// A signed 8-bit integer.
-    Int8,
-    /// A signed 16-bit integer.
-    Int16,
-    /// A signed 32-bit integer.
-    Int32,
-    /// A signed 64-bit integer.
-    Int64,
-    /// An unsigned 8-bit integer.
-    UInt8,
-    /// An unsigned 16-bit integer.
-    UInt16,
-    /// An unsigned 32-bit integer.
-    UInt32,
-    /// An unsigned 64-bit integer.
-    UInt64,
-    /// A 16-bit floating point number.
-    Float16,
-    /// A 32-bit floating point number.
-    Float32,
-    /// A 64-bit floating point number.
-    Float64,
-    /// A timestamp with an optional timezone.
-    ///
-    /// Time is measured as a Unix epoch, counting the seconds from
-    /// 00:00:00.000 on 1 January 1970, excluding leap seconds,
-    /// as a 64-bit integer.
-    ///
-    /// The time zone is a string indicating the name of a time zone, one of:
-    ///
-    /// * As used in the Olson time zone database (the "tz database" or
-    ///   "tzdata"), such as "America/New_York"
-    /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-    Timestamp(TimeUnit, Option<String>),
-    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
-    /// in days (32 bits).
-    Date32,
-    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
-    /// in milliseconds (64 bits). Values are evenly divisible by 86400000.
-    Date64,
-    /// A 32-bit time representing the elapsed time since midnight in the unit of `TimeUnit`.
-    Time32(TimeUnit),
-    /// A 64-bit time representing the elapsed time since midnight in the unit of `TimeUnit`.
-    Time64(TimeUnit),
-    /// Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds.
-    Duration(TimeUnit),
-    /// A "calendar" interval which models types that don't necessarily
-    /// have a precise duration without the context of a base timestamp (e.g.
-    /// days can differ in length during day light savings time transitions).
-    Interval(IntervalUnit),
-    /// Opaque binary data of variable length.
-    Binary,
-    /// Opaque binary data of fixed size.
-    /// Enum parameter specifies the number of bytes per value.
-    FixedSizeBinary(i32),
-    /// Opaque binary data of variable length and 64-bit offsets.
-    LargeBinary,
-    /// A variable-length string in Unicode with UTF-8 encoding.
-    Utf8,
-    /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
-    LargeUtf8,
-    /// A list of some logical data type with variable length.
-    List(Box<Field>),
-    /// A list of some logical data type with fixed length.
-    FixedSizeList(Box<Field>, i32),
-    /// A list of some logical data type with variable length and 64-bit offsets.
-    LargeList(Box<Field>),
-    /// A nested datatype that contains a number of sub-fields.
-    Struct(Vec<Field>),
-    /// A nested datatype that can represent slots of differing types.
-    Union(Vec<Field>),
-    /// A dictionary encoded array (`key_type`, `value_type`), where
-    /// each array element is an index of `key_type` into an
-    /// associated dictionary of `value_type`.
-    ///
-    /// Dictionary arrays are used to store columns of `value_type`
-    /// that contain many repeated values using less memory, but with
-    /// a higher CPU overhead for some operations.
-    ///
-    /// This type mostly used to represent low cardinality string
-    /// arrays or a limited set of primitive types as integers.
-    Dictionary(Box<DataType>, Box<DataType>),
-    /// Decimal value with precision and scale
-    Decimal(usize, usize),
-}
-
-/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum TimeUnit {
-    /// Time in seconds.
-    Second,
-    /// Time in milliseconds.
-    Millisecond,
-    /// Time in microseconds.
-    Microsecond,
-    /// Time in nanoseconds.
-    Nanosecond,
-}
-
-/// YEAR_MONTH or DAY_TIME interval in SQL style.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum IntervalUnit {
-    /// Indicates the number of elapsed whole months, stored as 4-byte integers.
-    YearMonth,
-    /// Indicates the number of elapsed days and milliseconds,
-    /// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total).
-    DayTime,
-}
-
-impl fmt::Display for DataType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl DataType {
-    /// Parse a data type from a JSON representation.
-    pub(crate) fn from(json: &Value) -> Result<DataType> {
-        let default_field = Field::new("", DataType::Boolean, true);
-        match *json {
-            Value::Object(ref map) => match map.get("name") {
-                Some(s) if s == "null" => Ok(DataType::Null),
-                Some(s) if s == "bool" => Ok(DataType::Boolean),
-                Some(s) if s == "binary" => Ok(DataType::Binary),
-                Some(s) if s == "largebinary" => Ok(DataType::LargeBinary),
-                Some(s) if s == "utf8" => Ok(DataType::Utf8),
-                Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8),
-                Some(s) if s == "fixedsizebinary" => {
-                    // return a list with any type as its child isn't defined in the map
-                    if let Some(Value::Number(size)) = map.get("byteWidth") {
-                        Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as i32))
-                    } else {
-                        Err(ArrowError::ParseError(
-                            "Expecting a byteWidth for fixedsizebinary".to_string(),
-                        ))
-                    }
-                }
-                Some(s) if s == "decimal" => {
-                    // return a list with any type as its child isn't defined in the map
-                    let precision = match map.get("precision") {
-                        Some(p) => Ok(p.as_u64().unwrap() as usize),
-                        None => Err(ArrowError::ParseError(
-                            "Expecting a precision for decimal".to_string(),
-                        )),
-                    };
-                    let scale = match map.get("scale") {
-                        Some(s) => Ok(s.as_u64().unwrap() as usize),
-                        _ => Err(ArrowError::ParseError(
-                            "Expecting a scale for decimal".to_string(),
-                        )),
-                    };
-
-                    Ok(DataType::Decimal(precision?, scale?))
-                }
-                Some(s) if s == "floatingpoint" => match map.get("precision") {
-                    Some(p) if p == "HALF" => Ok(DataType::Float16),
-                    Some(p) if p == "SINGLE" => Ok(DataType::Float32),
-                    Some(p) if p == "DOUBLE" => Ok(DataType::Float64),
-                    _ => Err(ArrowError::ParseError(
-                        "floatingpoint precision missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "timestamp" => {
-                    let unit = match map.get("unit") {
-                        Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
-                        Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
-                        Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
-                        Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
-                        _ => Err(ArrowError::ParseError(
-                            "timestamp unit missing or invalid".to_string(),
-                        )),
-                    };
-                    let tz = match map.get("timezone") {
-                        None => Ok(None),
-                        Some(VString(tz)) => Ok(Some(tz.clone())),
-                        _ => Err(ArrowError::ParseError(
-                            "timezone must be a string".to_string(),
-                        )),
-                    };
-                    Ok(DataType::Timestamp(unit?, tz?))
-                }
-                Some(s) if s == "date" => match map.get("unit") {
-                    Some(p) if p == "DAY" => Ok(DataType::Date32),
-                    Some(p) if p == "MILLISECOND" => Ok(DataType::Date64),
-                    _ => Err(ArrowError::ParseError(
-                        "date unit missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "time" => {
-                    let unit = match map.get("unit") {
-                        Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
-                        Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
-                        Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
-                        Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
-                        _ => Err(ArrowError::ParseError(
-                            "time unit missing or invalid".to_string(),
-                        )),
-                    };
-                    match map.get("bitWidth") {
-                        Some(p) if p == 32 => Ok(DataType::Time32(unit?)),
-                        Some(p) if p == 64 => Ok(DataType::Time64(unit?)),
-                        _ => Err(ArrowError::ParseError(
-                            "time bitWidth missing or invalid".to_string(),
-                        )),
-                    }
-                }
-                Some(s) if s == "duration" => match map.get("unit") {
-                    Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)),
-                    Some(p) if p == "MILLISECOND" => {
-                        Ok(DataType::Duration(TimeUnit::Millisecond))
-                    }
-                    Some(p) if p == "MICROSECOND" => {
-                        Ok(DataType::Duration(TimeUnit::Microsecond))
-                    }
-                    Some(p) if p == "NANOSECOND" => {
-                        Ok(DataType::Duration(TimeUnit::Nanosecond))
-                    }
-                    _ => Err(ArrowError::ParseError(
-                        "time unit missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "interval" => match map.get("unit") {
-                    Some(p) if p == "DAY_TIME" => {
-                        Ok(DataType::Interval(IntervalUnit::DayTime))
-                    }
-                    Some(p) if p == "YEAR_MONTH" => {
-                        Ok(DataType::Interval(IntervalUnit::YearMonth))
-                    }
-                    _ => Err(ArrowError::ParseError(
-                        "interval unit missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "int" => match map.get("isSigned") {
-                    Some(&Value::Bool(true)) => match map.get("bitWidth") {
-                        Some(&Value::Number(ref n)) => match n.as_u64() {
-                            Some(8) => Ok(DataType::Int8),
-                            Some(16) => Ok(DataType::Int16),
-                            Some(32) => Ok(DataType::Int32),
-                            Some(64) => Ok(DataType::Int64),
-                            _ => Err(ArrowError::ParseError(
-                                "int bitWidth missing or invalid".to_string(),
-                            )),
-                        },
-                        _ => Err(ArrowError::ParseError(
-                            "int bitWidth missing or invalid".to_string(),
-                        )),
-                    },
-                    Some(&Value::Bool(false)) => match map.get("bitWidth") {
-                        Some(&Value::Number(ref n)) => match n.as_u64() {
-                            Some(8) => Ok(DataType::UInt8),
-                            Some(16) => Ok(DataType::UInt16),
-                            Some(32) => Ok(DataType::UInt32),
-                            Some(64) => Ok(DataType::UInt64),
-                            _ => Err(ArrowError::ParseError(
-                                "int bitWidth missing or invalid".to_string(),
-                            )),
-                        },
-                        _ => Err(ArrowError::ParseError(
-                            "int bitWidth missing or invalid".to_string(),
-                        )),
-                    },
-                    _ => Err(ArrowError::ParseError(
-                        "int signed missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "list" => {
-                    // return a list with any type as its child isn't defined in the map
-                    Ok(DataType::List(Box::new(default_field)))
-                }
-                Some(s) if s == "largelist" => {
-                    // return a largelist with any type as its child isn't defined in the map
-                    Ok(DataType::LargeList(Box::new(default_field)))
-                }
-                Some(s) if s == "fixedsizelist" => {
-                    // return a list with any type as its child isn't defined in the map
-                    if let Some(Value::Number(size)) = map.get("listSize") {
-                        Ok(DataType::FixedSizeList(
-                            Box::new(default_field),
-                            size.as_i64().unwrap() as i32,
-                        ))
-                    } else {
-                        Err(ArrowError::ParseError(
-                            "Expecting a listSize for fixedsizelist".to_string(),
-                        ))
-                    }
-                }
-                Some(s) if s == "struct" => {
-                    // return an empty `struct` type as its children aren't defined in the map
-                    Ok(DataType::Struct(vec![]))
-                }
-                Some(other) => Err(ArrowError::ParseError(format!(
-                    "invalid or unsupported type name: {} in {:?}",
-                    other, json
-                ))),
-                None => Err(ArrowError::ParseError("type name missing".to_string())),
-            },
-            _ => Err(ArrowError::ParseError(
-                "invalid json value type".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a JSON representation of the data type.
-    pub fn to_json(&self) -> Value {
-        match self {
-            DataType::Null => json!({"name": "null"}),
-            DataType::Boolean => json!({"name": "bool"}),
-            DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}),
-            DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}),
-            DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}),
-            DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}),
-            DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}),
-            DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}),
-            DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}),
-            DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}),
-            DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}),
-            DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}),
-            DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}),
-            DataType::Utf8 => json!({"name": "utf8"}),
-            DataType::LargeUtf8 => json!({"name": "largeutf8"}),
-            DataType::Binary => json!({"name": "binary"}),
-            DataType::LargeBinary => json!({"name": "largebinary"}),
-            DataType::FixedSizeBinary(byte_width) => {
-                json!({"name": "fixedsizebinary", "byteWidth": byte_width})
-            }
-            DataType::Struct(_) => json!({"name": "struct"}),
-            DataType::Union(_) => json!({"name": "union"}),
-            DataType::List(_) => json!({ "name": "list"}),
-            DataType::LargeList(_) => json!({ "name": "largelist"}),
-            DataType::FixedSizeList(_, length) => {
-                json!({"name":"fixedsizelist", "listSize": length})
-            }
-            DataType::Time32(unit) => {
-                json!({"name": "time", "bitWidth": 32, "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }})
-            }
-            DataType::Time64(unit) => {
-                json!({"name": "time", "bitWidth": 64, "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }})
-            }
-            DataType::Date32 => {
-                json!({"name": "date", "unit": "DAY"})
-            }
-            DataType::Date64 => {
-                json!({"name": "date", "unit": "MILLISECOND"})
-            }
-            DataType::Timestamp(unit, None) => {
-                json!({"name": "timestamp", "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }})
-            }
-            DataType::Timestamp(unit, Some(tz)) => {
-                json!({"name": "timestamp", "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }, "timezone": tz})
-            }
-            DataType::Interval(unit) => json!({"name": "interval", "unit": match unit {
-                IntervalUnit::YearMonth => "YEAR_MONTH",
-                IntervalUnit::DayTime => "DAY_TIME",
-            }}),
-            DataType::Duration(unit) => json!({"name": "duration", "unit": match unit {
-                TimeUnit::Second => "SECOND",
-                TimeUnit::Millisecond => "MILLISECOND",
-                TimeUnit::Microsecond => "MICROSECOND",
-                TimeUnit::Nanosecond => "NANOSECOND",
-            }}),
-            DataType::Dictionary(_, _) => json!({ "name": "dictionary"}),
-            DataType::Decimal(precision, scale) => {
-                json!({"name": "decimal", "precision": precision, "scale": scale})
-            }
-        }
-    }
-
-    /// Returns true if this type is numeric: (UInt*, Unit*, or Float*).
-    pub fn is_numeric(t: &DataType) -> bool {
-        use DataType::*;
-        matches!(
-            t,
-            UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Int8
-                | Int16
-                | Int32
-                | Int64
-                | Float32
-                | Float64
-        )
-    }
-
-    /// Compares the datatype with another, ignoring nested field names
-    /// and metadata.
-    pub(crate) fn equals_datatype(&self, other: &DataType) -> bool {
-        match (&self, other) {
-            (DataType::List(a), DataType::List(b))
-            | (DataType::LargeList(a), DataType::LargeList(b)) => {
-                a.is_nullable() == b.is_nullable()
-                    && a.data_type().equals_datatype(b.data_type())
-            }
-            (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => {
-                a_size == b_size
-                    && a.is_nullable() == b.is_nullable()
-                    && a.data_type().equals_datatype(b.data_type())
-            }
-            (DataType::Struct(a), DataType::Struct(b)) => {
-                a.len() == b.len()
-                    && a.iter().zip(b).all(|(a, b)| {
-                        a.is_nullable() == b.is_nullable()
-                            && a.data_type().equals_datatype(b.data_type())
-                    })
-            }
-            _ => self == other,
-        }
-    }
-}
diff --git a/rust/arrow/src/datatypes/field.rs b/rust/arrow/src/datatypes/field.rs
deleted file mode 100644
index a471f12ef95..00000000000
--- a/rust/arrow/src/datatypes/field.rs
+++ /dev/null
@@ -1,541 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::BTreeMap;
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{json, Value};
-
-use crate::error::{ArrowError, Result};
-
-use super::DataType;
-
-/// Contains the meta-data for a single relative type.
-///
-/// The `Schema` object is an ordered collection of `Field` objects.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub struct Field {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    dict_id: i64,
-    dict_is_ordered: bool,
-    /// A map of key-value pairs containing additional custom meta data.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    metadata: Option<BTreeMap<String, String>>,
-}
-
-impl Field {
-    /// Creates a new field
-    pub fn new(name: &str, data_type: DataType, nullable: bool) -> Self {
-        Field {
-            name: name.to_string(),
-            data_type,
-            nullable,
-            dict_id: 0,
-            dict_is_ordered: false,
-            metadata: None,
-        }
-    }
-
-    /// Creates a new field
-    pub fn new_dict(
-        name: &str,
-        data_type: DataType,
-        nullable: bool,
-        dict_id: i64,
-        dict_is_ordered: bool,
-    ) -> Self {
-        Field {
-            name: name.to_string(),
-            data_type,
-            nullable,
-            dict_id,
-            dict_is_ordered,
-            metadata: None,
-        }
-    }
-
-    /// Sets the `Field`'s optional custom metadata.
-    /// The metadata is set as `None` for empty map.
-    #[inline]
-    pub fn set_metadata(&mut self, metadata: Option<BTreeMap<String, String>>) {
-        // To make serde happy, convert Some(empty_map) to None.
-        self.metadata = None;
-        if let Some(v) = metadata {
-            if !v.is_empty() {
-                self.metadata = Some(v);
-            }
-        }
-    }
-
-    /// Returns the immutable reference to the `Field`'s optional custom metadata.
-    #[inline]
-    pub const fn metadata(&self) -> &Option<BTreeMap<String, String>> {
-        &self.metadata
-    }
-
-    /// Returns an immutable reference to the `Field`'s name.
-    #[inline]
-    pub const fn name(&self) -> &String {
-        &self.name
-    }
-
-    /// Returns an immutable reference to the `Field`'s  data-type.
-    #[inline]
-    pub const fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    /// Indicates whether this `Field` supports null values.
-    #[inline]
-    pub const fn is_nullable(&self) -> bool {
-        self.nullable
-    }
-
-    /// Returns the dictionary ID, if this is a dictionary type.
-    #[inline]
-    pub const fn dict_id(&self) -> Option<i64> {
-        match self.data_type {
-            DataType::Dictionary(_, _) => Some(self.dict_id),
-            _ => None,
-        }
-    }
-
-    /// Returns whether this `Field`'s dictionary is ordered, if this is a dictionary type.
-    #[inline]
-    pub const fn dict_is_ordered(&self) -> Option<bool> {
-        match self.data_type {
-            DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
-            _ => None,
-        }
-    }
-
-    /// Parse a `Field` definition from a JSON representation.
-    pub fn from(json: &Value) -> Result<Self> {
-        match *json {
-            Value::Object(ref map) => {
-                let name = match map.get("name") {
-                    Some(&Value::String(ref name)) => name.to_string(),
-                    _ => {
-                        return Err(ArrowError::ParseError(
-                            "Field missing 'name' attribute".to_string(),
-                        ));
-                    }
-                };
-                let nullable = match map.get("nullable") {
-                    Some(&Value::Bool(b)) => b,
-                    _ => {
-                        return Err(ArrowError::ParseError(
-                            "Field missing 'nullable' attribute".to_string(),
-                        ));
-                    }
-                };
-                let data_type = match map.get("type") {
-                    Some(t) => DataType::from(t)?,
-                    _ => {
-                        return Err(ArrowError::ParseError(
-                            "Field missing 'type' attribute".to_string(),
-                        ));
-                    }
-                };
-
-                // Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
-                let metadata = match map.get("metadata") {
-                    Some(&Value::Array(ref values)) => {
-                        let mut res: BTreeMap<String, String> = BTreeMap::new();
-                        for value in values {
-                            match value.as_object() {
-                                Some(map) => {
-                                    if map.len() != 2 {
-                                        return Err(ArrowError::ParseError(
-                                            "Field 'metadata' must have exact two entries for each key-value map".to_string(),
-                                        ));
-                                    }
-                                    if let (Some(k), Some(v)) =
-                                        (map.get("key"), map.get("value"))
-                                    {
-                                        if let (Some(k_str), Some(v_str)) =
-                                            (k.as_str(), v.as_str())
-                                        {
-                                            res.insert(
-                                                k_str.to_string().clone(),
-                                                v_str.to_string().clone(),
-                                            );
-                                        } else {
-                                            return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string()));
-                                        }
-                                    } else {
-                                        return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
-                                    }
-                                }
-                                _ => {
-                                    return Err(ArrowError::ParseError(
-                                        "Field 'metadata' contains non-object key-value pair".to_string(),
-                                    ));
-                                }
-                            }
-                        }
-                        Some(res)
-                    }
-                    // We also support map format, because Schema's metadata supports this.
-                    // See https://github.com/apache/arrow/pull/5907
-                    Some(&Value::Object(ref values)) => {
-                        let mut res: BTreeMap<String, String> = BTreeMap::new();
-                        for (k, v) in values {
-                            if let Some(str_value) = v.as_str() {
-                                res.insert(k.clone(), str_value.to_string().clone());
-                            } else {
-                                return Err(ArrowError::ParseError(
-                                    format!("Field 'metadata' contains non-string value for key {}", k),
-                                ));
-                            }
-                        }
-                        Some(res)
-                    }
-                    Some(_) => {
-                        return Err(ArrowError::ParseError(
-                            "Field `metadata` is not json array".to_string(),
-                        ));
-                    }
-                    _ => None,
-                };
-
-                // if data_type is a struct or list, get its children
-                let data_type = match data_type {
-                    DataType::List(_)
-                    | DataType::LargeList(_)
-                    | DataType::FixedSizeList(_, _) => match map.get("children") {
-                        Some(Value::Array(values)) => {
-                            if values.len() != 1 {
-                                return Err(ArrowError::ParseError(
-                                    "Field 'children' must have one element for a list data type".to_string(),
-                                ));
-                            }
-                            match data_type {
-                                    DataType::List(_) => {
-                                        DataType::List(Box::new(Self::from(&values[0])?))
-                                    }
-                                    DataType::LargeList(_) => {
-                                        DataType::LargeList(Box::new(Self::from(&values[0])?))
-                                    }
-                                    DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
-                                        Box::new(Self::from(&values[0])?),
-                                        int,
-                                    ),
-                                    _ => unreachable!(
-                                        "Data type should be a list, largelist or fixedsizelist"
-                                    ),
-                                }
-                        }
-                        Some(_) => {
-                            return Err(ArrowError::ParseError(
-                                "Field 'children' must be an array".to_string(),
-                            ))
-                        }
-                        None => {
-                            return Err(ArrowError::ParseError(
-                                "Field missing 'children' attribute".to_string(),
-                            ));
-                        }
-                    },
-                    DataType::Struct(mut fields) => match map.get("children") {
-                        Some(Value::Array(values)) => {
-                            let struct_fields: Result<Vec<Field>> =
-                                values.iter().map(|v| Field::from(v)).collect();
-                            fields.append(&mut struct_fields?);
-                            DataType::Struct(fields)
-                        }
-                        Some(_) => {
-                            return Err(ArrowError::ParseError(
-                                "Field 'children' must be an array".to_string(),
-                            ))
-                        }
-                        None => {
-                            return Err(ArrowError::ParseError(
-                                "Field missing 'children' attribute".to_string(),
-                            ));
-                        }
-                    },
-                    _ => data_type,
-                };
-
-                let mut dict_id = 0;
-                let mut dict_is_ordered = false;
-
-                let data_type = match map.get("dictionary") {
-                    Some(dictionary) => {
-                        let index_type = match dictionary.get("indexType") {
-                            Some(t) => DataType::from(t)?,
-                            _ => {
-                                return Err(ArrowError::ParseError(
-                                    "Field missing 'indexType' attribute".to_string(),
-                                ));
-                            }
-                        };
-                        dict_id = match dictionary.get("id") {
-                            Some(Value::Number(n)) => n.as_i64().unwrap(),
-                            _ => {
-                                return Err(ArrowError::ParseError(
-                                    "Field missing 'id' attribute".to_string(),
-                                ));
-                            }
-                        };
-                        dict_is_ordered = match dictionary.get("isOrdered") {
-                            Some(&Value::Bool(n)) => n,
-                            _ => {
-                                return Err(ArrowError::ParseError(
-                                    "Field missing 'isOrdered' attribute".to_string(),
-                                ));
-                            }
-                        };
-                        DataType::Dictionary(Box::new(index_type), Box::new(data_type))
-                    }
-                    _ => data_type,
-                };
-                Ok(Field {
-                    name,
-                    data_type,
-                    nullable,
-                    dict_id,
-                    dict_is_ordered,
-                    metadata,
-                })
-            }
-            _ => Err(ArrowError::ParseError(
-                "Invalid json value type for field".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a JSON representation of the `Field`.
-    pub fn to_json(&self) -> Value {
-        let children: Vec<Value> = match self.data_type() {
-            DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
-            DataType::List(field) => vec![field.to_json()],
-            DataType::LargeList(field) => vec![field.to_json()],
-            DataType::FixedSizeList(field, _) => vec![field.to_json()],
-            _ => vec![],
-        };
-        match self.data_type() {
-            DataType::Dictionary(ref index_type, ref value_type) => json!({
-                "name": self.name,
-                "nullable": self.nullable,
-                "type": value_type.to_json(),
-                "children": children,
-                "dictionary": {
-                    "id": self.dict_id,
-                    "indexType": index_type.to_json(),
-                    "isOrdered": self.dict_is_ordered
-                }
-            }),
-            _ => json!({
-                "name": self.name,
-                "nullable": self.nullable,
-                "type": self.data_type.to_json(),
-                "children": children
-            }),
-        }
-    }
-
-    /// Merge field into self if it is compatible. Struct will be merged recursively.
-    /// NOTE: `self` may be updated to unexpected state in case of merge failure.
-    ///
-    /// Example:
-    ///
-    /// ```
-    /// use arrow::datatypes::*;
-    ///
-    /// let mut field = Field::new("c1", DataType::Int64, false);
-    /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok());
-    /// assert!(field.is_nullable());
-    /// ```
-    pub fn try_merge(&mut self, from: &Field) -> Result<()> {
-        // merge metadata
-        match (self.metadata(), from.metadata()) {
-            (Some(self_metadata), Some(from_metadata)) => {
-                let mut merged = self_metadata.clone();
-                for (key, from_value) in from_metadata {
-                    if let Some(self_value) = self_metadata.get(key) {
-                        if self_value != from_value {
-                            return Err(ArrowError::SchemaError(format!(
-                                "Fail to merge field due to conflicting metadata data value for key {}", key),
-                            ));
-                        }
-                    } else {
-                        merged.insert(key.clone(), from_value.clone());
-                    }
-                }
-                self.set_metadata(Some(merged));
-            }
-            (None, Some(from_metadata)) => {
-                self.set_metadata(Some(from_metadata.clone()));
-            }
-            _ => {}
-        }
-        if from.dict_id != self.dict_id {
-            return Err(ArrowError::SchemaError(
-                "Fail to merge schema Field due to conflicting dict_id".to_string(),
-            ));
-        }
-        if from.dict_is_ordered != self.dict_is_ordered {
-            return Err(ArrowError::SchemaError(
-                "Fail to merge schema Field due to conflicting dict_is_ordered"
-                    .to_string(),
-            ));
-        }
-        match &mut self.data_type {
-            DataType::Struct(nested_fields) => match &from.data_type {
-                DataType::Struct(from_nested_fields) => {
-                    for from_field in from_nested_fields {
-                        let mut is_new_field = true;
-                        for self_field in nested_fields.iter_mut() {
-                            if self_field.name != from_field.name {
-                                continue;
-                            }
-                            is_new_field = false;
-                            self_field.try_merge(&from_field)?;
-                        }
-                        if is_new_field {
-                            nested_fields.push(from_field.clone());
-                        }
-                    }
-                }
-                _ => {
-                    return Err(ArrowError::SchemaError(
-                        "Fail to merge schema Field due to conflicting datatype"
-                            .to_string(),
-                    ));
-                }
-            },
-            DataType::Union(nested_fields) => match &from.data_type {
-                DataType::Union(from_nested_fields) => {
-                    for from_field in from_nested_fields {
-                        let mut is_new_field = true;
-                        for self_field in nested_fields.iter_mut() {
-                            if from_field == self_field {
-                                is_new_field = false;
-                                break;
-                            }
-                        }
-                        if is_new_field {
-                            nested_fields.push(from_field.clone());
-                        }
-                    }
-                }
-                _ => {
-                    return Err(ArrowError::SchemaError(
-                        "Fail to merge schema Field due to conflicting datatype"
-                            .to_string(),
-                    ));
-                }
-            },
-            DataType::Null
-            | DataType::Boolean
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Binary
-            | DataType::LargeBinary
-            | DataType::Interval(_)
-            | DataType::LargeList(_)
-            | DataType::List(_)
-            | DataType::Dictionary(_, _)
-            | DataType::FixedSizeList(_, _)
-            | DataType::FixedSizeBinary(_)
-            | DataType::Utf8
-            | DataType::LargeUtf8
-            | DataType::Decimal(_, _) => {
-                if self.data_type != from.data_type {
-                    return Err(ArrowError::SchemaError(
-                        "Fail to merge schema Field due to conflicting datatype"
-                            .to_string(),
-                    ));
-                }
-            }
-        }
-        if from.nullable {
-            self.nullable = from.nullable;
-        }
-
-        Ok(())
-    }
-
-    /// Check to see if `self` is a superset of `other` field. Superset is defined as:
-    ///
-    /// * if nullability doesn't match, self needs to be nullable
-    /// * self.metadata is a superset of other.metadata
-    /// * all other fields are equal
-    pub fn contains(&self, other: &Field) -> bool {
-        if self.name != other.name
-            || self.data_type != other.data_type
-            || self.dict_id != other.dict_id
-            || self.dict_is_ordered != other.dict_is_ordered
-        {
-            return false;
-        }
-
-        if self.nullable != other.nullable && !self.nullable {
-            return false;
-        }
-
-        // make sure self.metadata is a superset of other.metadata
-        match (&self.metadata, &other.metadata) {
-            (None, Some(_)) => {
-                return false;
-            }
-            (Some(self_meta), Some(other_meta)) => {
-                for (k, v) in other_meta.iter() {
-                    match self_meta.get(k) {
-                        Some(s) => {
-                            if s != v {
-                                return false;
-                            }
-                        }
-                        None => {
-                            return false;
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-
-        true
-    }
-}
-
-// TODO: improve display with crate https://crates.io/crates/derive_more ?
-impl std::fmt::Display for Field {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
diff --git a/rust/arrow/src/datatypes/mod.rs b/rust/arrow/src/datatypes/mod.rs
deleted file mode 100644
index 175b50b0177..00000000000
--- a/rust/arrow/src/datatypes/mod.rs
+++ /dev/null
@@ -1,1241 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the logical data types of Arrow arrays.
-//!
-//! The most important things you might be looking for are:
-//!  * [`Schema`](crate::datatypes::Schema) to describe a schema.
-//!  * [`Field`](crate::datatypes::Field) to describe one field within a schema.
-//!  * [`DataType`](crate::datatypes::DataType) to describe the type of a field.
-
-use std::sync::Arc;
-
-mod native;
-pub use native::*;
-mod field;
-pub use field::*;
-mod schema;
-pub use schema::*;
-mod numeric;
-pub use numeric::*;
-mod types;
-pub use types::*;
-mod datatype;
-pub use datatype::*;
-
-/// A reference-counted reference to a [`Schema`](crate::datatypes::Schema).
-pub type SchemaRef = Arc<Schema>;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use serde_json::Value::{Bool, Number as VNumber};
-    use serde_json::{Number, Value};
-    use std::{
-        collections::{BTreeMap, HashMap},
-        f32::NAN,
-    };
-
-    #[test]
-    fn test_list_datatype_equality() {
-        // tests that list type equality is checked while ignoring list names
-        let list_a = DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_b = DataType::List(Box::new(Field::new("array", DataType::Int32, true)));
-        let list_c = DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_d = DataType::List(Box::new(Field::new("item", DataType::UInt32, true)));
-        assert!(list_a.equals_datatype(&list_b));
-        assert!(!list_a.equals_datatype(&list_c));
-        assert!(!list_b.equals_datatype(&list_c));
-        assert!(!list_a.equals_datatype(&list_d));
-
-        let list_e =
-            DataType::FixedSizeList(Box::new(Field::new("item", list_a, false)), 3);
-        let list_f =
-            DataType::FixedSizeList(Box::new(Field::new("array", list_b, false)), 3);
-        let list_g = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::FixedSizeBinary(3), true)),
-            3,
-        );
-        assert!(list_e.equals_datatype(&list_f));
-        assert!(!list_e.equals_datatype(&list_g));
-        assert!(!list_f.equals_datatype(&list_g));
-
-        let list_h = DataType::Struct(vec![Field::new("f1", list_e, true)]);
-        let list_i = DataType::Struct(vec![Field::new("f1", list_f.clone(), true)]);
-        let list_j = DataType::Struct(vec![Field::new("f1", list_f.clone(), false)]);
-        let list_k = DataType::Struct(vec![
-            Field::new("f1", list_f.clone(), false),
-            Field::new("f2", list_g.clone(), false),
-            Field::new("f3", DataType::Utf8, true),
-        ]);
-        let list_l = DataType::Struct(vec![
-            Field::new("ff1", list_f.clone(), false),
-            Field::new("ff2", list_g.clone(), false),
-            Field::new("ff3", DataType::LargeUtf8, true),
-        ]);
-        let list_m = DataType::Struct(vec![
-            Field::new("ff1", list_f, false),
-            Field::new("ff2", list_g, false),
-            Field::new("ff3", DataType::Utf8, true),
-        ]);
-        assert!(list_h.equals_datatype(&list_i));
-        assert!(!list_h.equals_datatype(&list_j));
-        assert!(!list_k.equals_datatype(&list_l));
-        assert!(list_k.equals_datatype(&list_m));
-    }
-
-    #[test]
-    fn create_struct_type() {
-        let _person = DataType::Struct(vec![
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new(
-                "address",
-                DataType::Struct(vec![
-                    Field::new("street", DataType::Utf8, false),
-                    Field::new("zip", DataType::UInt16, false),
-                ]),
-                false,
-            ),
-        ]);
-    }
-
-    #[test]
-    fn serde_struct_type() {
-        let kv_array = [("k".to_string(), "v".to_string())];
-        let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
-
-        // Non-empty map: should be converted as JSON obj { ... }
-        let mut first_name = Field::new("first_name", DataType::Utf8, false);
-        first_name.set_metadata(Some(field_metadata));
-
-        // Empty map: should be omitted.
-        let mut last_name = Field::new("last_name", DataType::Utf8, false);
-        last_name.set_metadata(Some(BTreeMap::default()));
-
-        let person = DataType::Struct(vec![
-            first_name,
-            last_name,
-            Field::new(
-                "address",
-                DataType::Struct(vec![
-                    Field::new("street", DataType::Utf8, false),
-                    Field::new("zip", DataType::UInt16, false),
-                ]),
-                false,
-            ),
-        ]);
-
-        let serialized = serde_json::to_string(&person).unwrap();
-
-        // NOTE that this is testing the default (derived) serialization format, not the
-        // JSON format specified in metadata.md
-
-        assert_eq!(
-            "{\"Struct\":[\
-             {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\
-             {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
-             {\"name\":\"address\",\"data_type\":{\"Struct\":\
-             [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
-             {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\
-             ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}",
-            serialized
-        );
-
-        let deserialized = serde_json::from_str(&serialized).unwrap();
-
-        assert_eq!(person, deserialized);
-    }
-
-    #[test]
-    fn struct_field_to_json() {
-        let f = Field::new(
-            "address",
-            DataType::Struct(vec![
-                Field::new("street", DataType::Utf8, false),
-                Field::new("zip", DataType::UInt16, false),
-            ]),
-            false,
-        );
-        let value: Value = serde_json::from_str(
-            r#"{
-                "name": "address",
-                "nullable": false,
-                "type": {
-                    "name": "struct"
-                },
-                "children": [
-                    {
-                        "name": "street",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "zip",
-                        "nullable": false,
-                        "type": {
-                            "name": "int",
-                            "bitWidth": 16,
-                            "isSigned": false
-                        },
-                        "children": []
-                    }
-                ]
-            }"#,
-        )
-        .unwrap();
-        assert_eq!(value, f.to_json());
-    }
-
-    #[test]
-    fn primitive_field_to_json() {
-        let f = Field::new("first_name", DataType::Utf8, false);
-        let value: Value = serde_json::from_str(
-            r#"{
-                "name": "first_name",
-                "nullable": false,
-                "type": {
-                    "name": "utf8"
-                },
-                "children": []
-            }"#,
-        )
-        .unwrap();
-        assert_eq!(value, f.to_json());
-    }
-    #[test]
-    fn parse_struct_from_json() {
-        let json = r#"
-        {
-            "name": "address",
-            "type": {
-                "name": "struct"
-            },
-            "nullable": false,
-            "children": [
-                {
-                    "name": "street",
-                    "type": {
-                    "name": "utf8"
-                    },
-                    "nullable": false,
-                    "children": []
-                },
-                {
-                    "name": "zip",
-                    "type": {
-                    "name": "int",
-                    "isSigned": false,
-                    "bitWidth": 16
-                    },
-                    "nullable": false,
-                    "children": []
-                }
-            ]
-        }
-        "#;
-        let value: Value = serde_json::from_str(json).unwrap();
-        let dt = Field::from(&value).unwrap();
-
-        let expected = Field::new(
-            "address",
-            DataType::Struct(vec![
-                Field::new("street", DataType::Utf8, false),
-                Field::new("zip", DataType::UInt16, false),
-            ]),
-            false,
-        );
-
-        assert_eq!(expected, dt);
-    }
-
-    #[test]
-    fn parse_utf8_from_json() {
-        let json = "{\"name\":\"utf8\"}";
-        let value: Value = serde_json::from_str(json).unwrap();
-        let dt = DataType::from(&value).unwrap();
-        assert_eq!(DataType::Utf8, dt);
-    }
-
-    #[test]
-    fn parse_int32_from_json() {
-        let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
-        let value: Value = serde_json::from_str(json).unwrap();
-        let dt = DataType::from(&value).unwrap();
-        assert_eq!(DataType::Int32, dt);
-    }
-
-    #[test]
-    fn schema_json() {
-        // Add some custom metadata
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new("c1", DataType::Utf8, false),
-                Field::new("c2", DataType::Binary, false),
-                Field::new("c3", DataType::FixedSizeBinary(3), false),
-                Field::new("c4", DataType::Boolean, false),
-                Field::new("c5", DataType::Date32, false),
-                Field::new("c6", DataType::Date64, false),
-                Field::new("c7", DataType::Time32(TimeUnit::Second), false),
-                Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
-                Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false),
-                Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false),
-                Field::new("c11", DataType::Time64(TimeUnit::Second), false),
-                Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false),
-                Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
-                Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
-                Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false),
-                Field::new(
-                    "c16",
-                    DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())),
-                    false,
-                ),
-                Field::new(
-                    "c17",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".to_string()),
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c18",
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    false,
-                ),
-                Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
-                Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
-                Field::new(
-                    "c21",
-                    DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-                    false,
-                ),
-                Field::new(
-                    "c22",
-                    DataType::FixedSizeList(
-                        Box::new(Field::new("bools", DataType::Boolean, false)),
-                        5,
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c23",
-                    DataType::List(Box::new(Field::new(
-                        "inner_list",
-                        DataType::List(Box::new(Field::new(
-                            "struct",
-                            DataType::Struct(vec![]),
-                            true,
-                        ))),
-                        false,
-                    ))),
-                    true,
-                ),
-                Field::new(
-                    "c24",
-                    DataType::Struct(vec![
-                        Field::new("a", DataType::Utf8, false),
-                        Field::new("b", DataType::UInt16, false),
-                    ]),
-                    false,
-                ),
-                Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true),
-                Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true),
-                Field::new("c27", DataType::Duration(TimeUnit::Second), false),
-                Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false),
-                Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false),
-                Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
-                Field::new_dict(
-                    "c31",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new("c32", DataType::LargeBinary, true),
-                Field::new("c33", DataType::LargeUtf8, true),
-                Field::new(
-                    "c34",
-                    DataType::LargeList(Box::new(Field::new(
-                        "inner_large_list",
-                        DataType::LargeList(Box::new(Field::new(
-                            "struct",
-                            DataType::Struct(vec![]),
-                            false,
-                        ))),
-                        true,
-                    ))),
-                    true,
-                ),
-            ],
-            metadata,
-        );
-
-        let expected = schema.to_json();
-        let json = r#"{
-                "fields": [
-                    {
-                        "name": "c1",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c2",
-                        "nullable": false,
-                        "type": {
-                            "name": "binary"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c3",
-                        "nullable": false,
-                        "type": {
-                            "name": "fixedsizebinary",
-                            "byteWidth": 3
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c4",
-                        "nullable": false,
-                        "type": {
-                            "name": "bool"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c5",
-                        "nullable": false,
-                        "type": {
-                            "name": "date",
-                            "unit": "DAY"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c6",
-                        "nullable": false,
-                        "type": {
-                            "name": "date",
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c7",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c8",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c9",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "MICROSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c10",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c11",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c12",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c13",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "MICROSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c14",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c15",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c16",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "MILLISECOND",
-                            "timezone": "UTC"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c17",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "MICROSECOND",
-                            "timezone": "Africa/Johannesburg"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c18",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c19",
-                        "nullable": false,
-                        "type": {
-                            "name": "interval",
-                            "unit": "DAY_TIME"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c20",
-                        "nullable": false,
-                        "type": {
-                            "name": "interval",
-                            "unit": "YEAR_MONTH"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c21",
-                        "nullable": false,
-                        "type": {
-                            "name": "list"
-                        },
-                        "children": [
-                            {
-                                "name": "item",
-                                "nullable": true,
-                                "type": {
-                                    "name": "bool"
-                                },
-                                "children": []
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c22",
-                        "nullable": false,
-                        "type": {
-                            "name": "fixedsizelist",
-                            "listSize": 5
-                        },
-                        "children": [
-                            {
-                                "name": "bools",
-                                "nullable": false,
-                                "type": {
-                                    "name": "bool"
-                                },
-                                "children": []
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c23",
-                        "nullable": true,
-                        "type": {
-                            "name": "list"
-                        },
-                        "children": [
-                            {
-                                "name": "inner_list",
-                                "nullable": false,
-                                "type": {
-                                    "name": "list"
-                                },
-                                "children": [
-                                    {
-                                        "name": "struct",
-                                        "nullable": true,
-                                        "type": {
-                                            "name": "struct"
-                                        },
-                                        "children": []
-                                    }
-                                ]
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c24",
-                        "nullable": false,
-                        "type": {
-                            "name": "struct"
-                        },
-                        "children": [
-                            {
-                                "name": "a",
-                                "nullable": false,
-                                "type": {
-                                    "name": "utf8"
-                                },
-                                "children": []
-                            },
-                            {
-                                "name": "b",
-                                "nullable": false,
-                                "type": {
-                                    "name": "int",
-                                    "bitWidth": 16,
-                                    "isSigned": false
-                                },
-                                "children": []
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c25",
-                        "nullable": true,
-                        "type": {
-                            "name": "interval",
-                            "unit": "YEAR_MONTH"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c26",
-                        "nullable": true,
-                        "type": {
-                            "name": "interval",
-                            "unit": "DAY_TIME"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c27",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c28",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c29",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "MICROSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c30",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c31",
-                        "nullable": true,
-                        "children": [],
-                        "type": {
-                          "name": "utf8"
-                        },
-                        "dictionary": {
-                          "id": 123,
-                          "indexType": {
-                            "name": "int",
-                            "bitWidth": 32,
-                            "isSigned": true
-                          },
-                          "isOrdered": true
-                        }
-                    },
-                    {
-                        "name": "c32",
-                        "nullable": true,
-                        "type": {
-                          "name": "largebinary"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c33",
-                        "nullable": true,
-                        "type": {
-                          "name": "largeutf8"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c34",
-                        "nullable": true,
-                        "type": {
-                          "name": "largelist"
-                        },
-                        "children": [
-                            {
-                                "name": "inner_large_list",
-                                "nullable": true,
-                                "type": {
-                                    "name": "largelist"
-                                },
-                                "children": [
-                                    {
-                                        "name": "struct",
-                                        "nullable": false,
-                                        "type": {
-                                            "name": "struct"
-                                        },
-                                        "children": []
-                                    }
-                                ]
-                            }
-                        ]
-                    }
-                ],
-                "metadata" : {
-                    "Key": "Value"
-                }
-            }"#;
-        let value: Value = serde_json::from_str(&json).unwrap();
-        assert_eq!(expected, value);
-
-        // convert back to a schema
-        let value: Value = serde_json::from_str(&json).unwrap();
-        let schema2 = Schema::from(&value).unwrap();
-
-        assert_eq!(schema, schema2);
-
-        // Check that empty metadata produces empty value in JSON and can be parsed
-        let json = r#"{
-                "fields": [
-                    {
-                        "name": "c1",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    }
-                ],
-                "metadata": {}
-            }"#;
-        let value: Value = serde_json::from_str(&json).unwrap();
-        let schema = Schema::from(&value).unwrap();
-        assert!(schema.metadata.is_empty());
-
-        // Check that metadata field is not required in the JSON.
-        let json = r#"{
-                "fields": [
-                    {
-                        "name": "c1",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    }
-                ]
-            }"#;
-        let value: Value = serde_json::from_str(&json).unwrap();
-        let schema = Schema::from(&value).unwrap();
-        assert!(schema.metadata.is_empty());
-    }
-
-    #[test]
-    fn create_schema_string() {
-        let schema = person_schema();
-        assert_eq!(schema.to_string(),
-        "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \
-        Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-        Field { name: \"address\", data_type: Struct([\
-            Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-            Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\
-        ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-        Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }")
-    }
-
-    #[test]
-    fn schema_field_accessors() {
-        let schema = person_schema();
-
-        // test schema accessors
-        assert_eq!(schema.fields().len(), 4);
-
-        // test field accessors
-        let first_name = &schema.fields()[0];
-        assert_eq!(first_name.name(), "first_name");
-        assert_eq!(first_name.data_type(), &DataType::Utf8);
-        assert_eq!(first_name.is_nullable(), false);
-        assert_eq!(first_name.dict_id(), None);
-        assert_eq!(first_name.dict_is_ordered(), None);
-
-        let metadata = first_name.metadata();
-        assert!(metadata.is_some());
-        let md = metadata.as_ref().unwrap();
-        assert_eq!(md.len(), 1);
-        let key = md.get("k");
-        assert!(key.is_some());
-        assert_eq!(key.unwrap(), "v");
-
-        let interests = &schema.fields()[3];
-        assert_eq!(interests.name(), "interests");
-        assert_eq!(
-            interests.data_type(),
-            &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
-        );
-        assert_eq!(interests.dict_id(), Some(123));
-        assert_eq!(interests.dict_is_ordered(), Some(true));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
-    )]
-    fn schema_index_of() {
-        let schema = person_schema();
-        assert_eq!(schema.index_of("first_name").unwrap(), 0);
-        assert_eq!(schema.index_of("last_name").unwrap(), 1);
-        schema.index_of("nickname").unwrap();
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
-    )]
-    fn schema_field_with_name() {
-        let schema = person_schema();
-        assert_eq!(
-            schema.field_with_name("first_name").unwrap().name(),
-            "first_name"
-        );
-        assert_eq!(
-            schema.field_with_name("last_name").unwrap().name(),
-            "last_name"
-        );
-        schema.field_with_name("nickname").unwrap();
-    }
-
-    #[test]
-    fn schema_field_with_dict_id() {
-        let schema = person_schema();
-
-        let fields_dict_123: Vec<_> = schema
-            .fields_with_dict_id(123)
-            .iter()
-            .map(|f| f.name())
-            .collect();
-        assert_eq!(fields_dict_123, vec!["interests"]);
-
-        assert!(schema.fields_with_dict_id(456).is_empty());
-    }
-
-    #[test]
-    fn schema_equality() {
-        let schema1 = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::LargeBinary, true),
-        ]);
-        let schema2 = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::LargeBinary, true),
-        ]);
-
-        assert_eq!(schema1, schema2);
-
-        let schema3 = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float32, true),
-        ]);
-        let schema4 = Schema::new(vec![
-            Field::new("C1", DataType::Utf8, false),
-            Field::new("C2", DataType::Float64, true),
-        ]);
-
-        assert!(schema1 != schema3);
-        assert!(schema1 != schema4);
-        assert!(schema2 != schema3);
-        assert!(schema2 != schema4);
-        assert!(schema3 != schema4);
-
-        let mut f = Field::new("c1", DataType::Utf8, false);
-        f.set_metadata(Some(
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-        let schema5 = Schema::new(vec![
-            f,
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::LargeBinary, true),
-        ]);
-        assert!(schema1 != schema5);
-    }
-
-    #[test]
-    fn test_arrow_native_type_to_json() {
-        assert_eq!(Some(Bool(true)), true.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value());
-        assert_eq!(
-            Some(VNumber(Number::from_f64(0.01f64).unwrap())),
-            0.01.into_json_value()
-        );
-        assert_eq!(
-            Some(VNumber(Number::from_f64(0.01f64).unwrap())),
-            0.01f64.into_json_value()
-        );
-        assert_eq!(None, NAN.into_json_value());
-    }
-
-    fn person_schema() -> Schema {
-        let kv_array = [("k".to_string(), "v".to_string())];
-        let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
-        let mut first_name = Field::new("first_name", DataType::Utf8, false);
-        first_name.set_metadata(Some(field_metadata));
-
-        Schema::new(vec![
-            first_name,
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new(
-                "address",
-                DataType::Struct(vec![
-                    Field::new("street", DataType::Utf8, false),
-                    Field::new("zip", DataType::UInt16, false),
-                ]),
-                false,
-            ),
-            Field::new_dict(
-                "interests",
-                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-                true,
-                123,
-                true,
-            ),
-        ])
-    }
-
-    #[test]
-    fn test_try_merge_field_with_metadata() {
-        // 1. Different values for the same key should cause error.
-        let metadata1: BTreeMap<String, String> =
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        f1.set_metadata(Some(metadata1));
-
-        let metadata2: BTreeMap<String, String> =
-            [("foo".to_string(), "baz".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-        let mut f2 = Field::new("first_name", DataType::Utf8, false);
-        f2.set_metadata(Some(metadata2));
-
-        assert!(
-            Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
-                .is_err()
-        );
-
-        // 2. None + Some
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        let metadata2: BTreeMap<String, String> =
-            [("missing".to_string(), "value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-        let mut f2 = Field::new("first_name", DataType::Utf8, false);
-        f2.set_metadata(Some(metadata2));
-
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_some());
-        assert_eq!(
-            f1.metadata().as_ref().unwrap(),
-            f2.metadata().as_ref().unwrap()
-        );
-
-        // 3. Some + Some
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        f1.set_metadata(Some(
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-        let mut f2 = Field::new("first_name", DataType::Utf8, false);
-        f2.set_metadata(Some(
-            [("foo2".to_string(), "bar2".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_some());
-        assert_eq!(
-            f1.metadata().clone().unwrap(),
-            [
-                ("foo".to_string(), "bar".to_string()),
-                ("foo2".to_string(), "bar2".to_string())
-            ]
-            .iter()
-            .cloned()
-            .collect()
-        );
-
-        // 4. Some + None.
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        f1.set_metadata(Some(
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-        let f2 = Field::new("first_name", DataType::Utf8, false);
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_some());
-        assert_eq!(
-            f1.metadata().clone().unwrap(),
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect()
-        );
-
-        // 5. None + None.
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        let f2 = Field::new("first_name", DataType::Utf8, false);
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_none());
-    }
-
-    #[test]
-    fn test_schema_merge() -> Result<()> {
-        let merged = Schema::try_merge(vec![
-            Schema::new(vec![
-                Field::new("first_name", DataType::Utf8, false),
-                Field::new("last_name", DataType::Utf8, false),
-                Field::new(
-                    "address",
-                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)]),
-                    false,
-                ),
-            ]),
-            Schema::new_with_metadata(
-                vec![
-                    // nullable merge
-                    Field::new("last_name", DataType::Utf8, true),
-                    Field::new(
-                        "address",
-                        DataType::Struct(vec![
-                            // add new nested field
-                            Field::new("street", DataType::Utf8, false),
-                            // nullable merge on nested field
-                            Field::new("zip", DataType::UInt16, true),
-                        ]),
-                        false,
-                    ),
-                    // new field
-                    Field::new("number", DataType::Utf8, true),
-                ],
-                [("foo".to_string(), "bar".to_string())]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>(),
-            ),
-        ])?;
-
-        assert_eq!(
-            merged,
-            Schema::new_with_metadata(
-                vec![
-                    Field::new("first_name", DataType::Utf8, false),
-                    Field::new("last_name", DataType::Utf8, true),
-                    Field::new(
-                        "address",
-                        DataType::Struct(vec![
-                            Field::new("zip", DataType::UInt16, true),
-                            Field::new("street", DataType::Utf8, false),
-                        ]),
-                        false,
-                    ),
-                    Field::new("number", DataType::Utf8, true),
-                ],
-                [("foo".to_string(), "bar".to_string())]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>()
-            )
-        );
-
-        // support merge union fields
-        assert_eq!(
-            Schema::try_merge(vec![
-                Schema::new(vec![Field::new(
-                    "c1",
-                    DataType::Union(vec![
-                        Field::new("c11", DataType::Utf8, true),
-                        Field::new("c12", DataType::Utf8, true),
-                    ]),
-                    false
-                ),]),
-                Schema::new(vec![Field::new(
-                    "c1",
-                    DataType::Union(vec![
-                        Field::new("c12", DataType::Utf8, true),
-                        Field::new("c13", DataType::Time64(TimeUnit::Second), true),
-                    ]),
-                    false
-                ),])
-            ])?,
-            Schema::new(vec![Field::new(
-                "c1",
-                DataType::Union(vec![
-                    Field::new("c11", DataType::Utf8, true),
-                    Field::new("c12", DataType::Utf8, true),
-                    Field::new("c13", DataType::Time64(TimeUnit::Second), true),
-                ]),
-                false
-            ),]),
-        );
-
-        // incompatible field should throw error
-        assert!(Schema::try_merge(vec![
-            Schema::new(vec![
-                Field::new("first_name", DataType::Utf8, false),
-                Field::new("last_name", DataType::Utf8, false),
-            ]),
-            Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
-        ])
-        .is_err());
-
-        // incompatible metadata should throw error
-        assert!(Schema::try_merge(vec![
-            Schema::new_with_metadata(
-                vec![Field::new("first_name", DataType::Utf8, false)],
-                [("foo".to_string(), "bar".to_string()),]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>()
-            ),
-            Schema::new_with_metadata(
-                vec![Field::new("last_name", DataType::Utf8, false)],
-                [("foo".to_string(), "baz".to_string()),]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>()
-            )
-        ])
-        .is_err());
-
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/datatypes/native.rs b/rust/arrow/src/datatypes/native.rs
deleted file mode 100644
index 6e8cf892237..00000000000
--- a/rust/arrow/src/datatypes/native.rs
+++ /dev/null
@@ -1,333 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use serde_json::{Number, Value};
-
-use super::DataType;
-
-/// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.).
-pub trait JsonSerializable: 'static {
-    fn into_json_value(self) -> Option<Value>;
-}
-
-/// Trait expressing a Rust type that has the same in-memory representation
-/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits).
-/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers
-/// as is.
-pub trait ArrowNativeType:
-    std::fmt::Debug
-    + Send
-    + Sync
-    + Copy
-    + PartialOrd
-    + std::str::FromStr
-    + Default
-    + JsonSerializable
-{
-    /// Convert native type from usize.
-    #[inline]
-    fn from_usize(_: usize) -> Option<Self> {
-        None
-    }
-
-    /// Convert native type to usize.
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        None
-    }
-
-    /// Convert native type to isize.
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        None
-    }
-
-    /// Convert native type from i32.
-    #[inline]
-    fn from_i32(_: i32) -> Option<Self> {
-        None
-    }
-
-    /// Convert native type from i64.
-    #[inline]
-    fn from_i64(_: i64) -> Option<Self> {
-        None
-    }
-}
-
-/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
-/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
-pub trait ArrowPrimitiveType: 'static {
-    /// Corresponding Rust native type for the primitive type.
-    type Native: ArrowNativeType;
-
-    /// the corresponding Arrow data type of this primitive type.
-    const DATA_TYPE: DataType;
-
-    /// Returns the byte width of this primitive type.
-    fn get_byte_width() -> usize {
-        std::mem::size_of::<Self::Native>()
-    }
-
-    /// Returns a default value of this primitive type.
-    ///
-    /// This is useful for aggregate array ops like `sum()`, `mean()`.
-    fn default_value() -> Self::Native {
-        Default::default()
-    }
-}
-
-impl JsonSerializable for bool {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl JsonSerializable for i8 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for i8 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for i16 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for i16 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for i32 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for i32 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-
-    /// Convert native type from i32.
-    #[inline]
-    fn from_i32(val: i32) -> Option<Self> {
-        Some(val)
-    }
-}
-
-impl JsonSerializable for i64 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(Value::Number(Number::from(self)))
-    }
-}
-
-impl ArrowNativeType for i64 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-
-    /// Convert native type from i64.
-    #[inline]
-    fn from_i64(val: i64) -> Option<Self> {
-        Some(val)
-    }
-}
-
-impl JsonSerializable for u8 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u8 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for u16 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u16 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for u32 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u32 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for u64 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u64 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for f32 {
-    fn into_json_value(self) -> Option<Value> {
-        Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(Value::Number)
-    }
-}
-
-impl JsonSerializable for f64 {
-    fn into_json_value(self) -> Option<Value> {
-        Number::from_f64(self).map(Value::Number)
-    }
-}
-
-impl ArrowNativeType for f32 {}
-impl ArrowNativeType for f64 {}
-
-/// Allows conversion from supported Arrow types to a byte slice.
-pub trait ToByteSlice {
-    /// Converts this instance into a byte slice
-    fn to_byte_slice(&self) -> &[u8];
-}
-
-impl<T: ArrowNativeType> ToByteSlice for [T] {
-    #[inline]
-    fn to_byte_slice(&self) -> &[u8] {
-        let raw_ptr = self.as_ptr() as *const T as *const u8;
-        unsafe {
-            std::slice::from_raw_parts(raw_ptr, self.len() * std::mem::size_of::<T>())
-        }
-    }
-}
-
-impl<T: ArrowNativeType> ToByteSlice for T {
-    #[inline]
-    fn to_byte_slice(&self) -> &[u8] {
-        let raw_ptr = self as *const T as *const u8;
-        unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of::<T>()) }
-    }
-}
diff --git a/rust/arrow/src/datatypes/numeric.rs b/rust/arrow/src/datatypes/numeric.rs
deleted file mode 100644
index 0046398122b..00000000000
--- a/rust/arrow/src/datatypes/numeric.rs
+++ /dev/null
@@ -1,534 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[cfg(feature = "simd")]
-use packed_simd::*;
-#[cfg(feature = "simd")]
-use std::ops::{Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Sub};
-
-use super::*;
-
-/// A subtype of primitive type that represents numeric values.
-///
-/// SIMD operations are defined in this trait if available on the target system.
-#[cfg(simd)]
-pub trait ArrowNumericType: ArrowPrimitiveType
-where
-    Self::Simd: Add<Output = Self::Simd>
-        + Sub<Output = Self::Simd>
-        + Mul<Output = Self::Simd>
-        + Div<Output = Self::Simd>
-        + Copy,
-    Self::SimdMask: BitAnd<Output = Self::SimdMask>
-        + BitOr<Output = Self::SimdMask>
-        + BitAndAssign
-        + BitOrAssign
-        + Not<Output = Self::SimdMask>
-        + Copy,
-{
-    /// Defines the SIMD type that should be used for this numeric type
-    type Simd;
-
-    /// Defines the SIMD Mask type that should be used for this numeric type
-    type SimdMask;
-
-    /// The number of SIMD lanes available
-    fn lanes() -> usize;
-
-    /// Initializes a SIMD register to a constant value
-    fn init(value: Self::Native) -> Self::Simd;
-
-    /// Loads a slice into a SIMD register
-    fn load(slice: &[Self::Native]) -> Self::Simd;
-
-    /// Creates a new SIMD mask for this SIMD type filling it with `value`
-    fn mask_init(value: bool) -> Self::SimdMask;
-
-    /// Creates a new SIMD mask for this SIMD type from the lower-most bits of the given `mask`.
-    /// The number of bits used corresponds to the number of lanes of this type
-    fn mask_from_u64(mask: u64) -> Self::SimdMask;
-
-    /// Creates a bitmask from the given SIMD mask.
-    /// Each bit corresponds to one vector lane, starting with the least-significant bit.
-    fn mask_to_u64(mask: &Self::SimdMask) -> u64;
-
-    /// Gets the value of a single lane in a SIMD mask
-    fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool;
-
-    /// Sets the value of a single lane of a SIMD mask
-    fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask;
-
-    /// Selects elements of `a` and `b` using `mask`
-    fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd;
-
-    /// Returns `true` if any of the lanes in the mask are `true`
-    fn mask_any(mask: Self::SimdMask) -> bool;
-
-    /// Performs a SIMD binary operation
-    fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
-        left: Self::Simd,
-        right: Self::Simd,
-        op: F,
-    ) -> Self::Simd;
-
-    /// SIMD version of equal
-    fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of not equal
-    fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of less than
-    fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of less than or equal to
-    fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of greater than
-    fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of greater than or equal to
-    fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// Writes a SIMD result back to a slice
-    fn write(simd_result: Self::Simd, slice: &mut [Self::Native]);
-
-    fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd;
-}
-
-#[cfg(not(simd))]
-pub trait ArrowNumericType: ArrowPrimitiveType {}
-
-macro_rules! make_numeric_type {
-    ($impl_ty:ty, $native_ty:ty, $simd_ty:ident, $simd_mask_ty:ident) => {
-        #[cfg(simd)]
-        impl ArrowNumericType for $impl_ty {
-            type Simd = $simd_ty;
-
-            type SimdMask = $simd_mask_ty;
-
-            #[inline]
-            fn lanes() -> usize {
-                Self::Simd::lanes()
-            }
-
-            #[inline]
-            fn init(value: Self::Native) -> Self::Simd {
-                Self::Simd::splat(value)
-            }
-
-            #[inline]
-            fn load(slice: &[Self::Native]) -> Self::Simd {
-                unsafe { Self::Simd::from_slice_unaligned_unchecked(slice) }
-            }
-
-            #[inline]
-            fn mask_init(value: bool) -> Self::SimdMask {
-                Self::SimdMask::splat(value)
-            }
-
-            #[inline]
-            fn mask_from_u64(mask: u64) -> Self::SimdMask {
-                // this match will get removed by the compiler since the number of lanes is known at
-                // compile-time for each concrete numeric type
-                match Self::lanes() {
-                    8 => {
-                        // the bit position in each lane indicates the index of that lane
-                        let vecidx = i64x8::new(1, 2, 4, 8, 16, 32, 64, 128);
-
-                        // broadcast the lowermost 8 bits of mask to each lane
-                        let vecmask = i64x8::splat((mask & 0xFF) as i64);
-                        // compute whether the bit corresponding to each lanes index is set
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        // transmute is necessary because the different match arms return different
-                        // mask types, at runtime only one of those expressions will exist per type,
-                        // with the type being equal to `SimdMask`.
-                        unsafe { std::mem::transmute(vecmask) }
-                    }
-                    16 => {
-                        // same general logic as for 8 lanes, extended to 16 bits
-                        let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
-                        );
-
-                        let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        unsafe { std::mem::transmute(vecmask) }
-                    }
-                    32 => {
-                        // compute two separate m32x16 vector masks from  from the lower-most 32 bits of `mask`
-                        // and then combine them into one m16x32 vector mask by writing and reading a temporary
-                        let tmp = &mut [0_i16; 32];
-
-                        let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
-                        );
-
-                        let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i16x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[0..16]);
-
-                        let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i16x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[16..32]);
-
-                        unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
-                    }
-                    64 => {
-                        // compute four m32x16 vector masks from  from all 64 bits of `mask`
-                        // and convert them into one m8x64 vector mask by writing and reading a temporary
-                        let tmp = &mut [0_i8; 64];
-
-                        let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
-                        );
-
-                        let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[0..16]);
-
-                        let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[16..32]);
-
-                        let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[32..48]);
-
-                        let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[48..64]);
-
-                        unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
-                    }
-                    _ => panic!("Invalid number of vector lanes"),
-                }
-            }
-
-            #[inline]
-            fn mask_to_u64(mask: &Self::SimdMask) -> u64 {
-                mask.bitmask() as u64
-            }
-
-            #[inline]
-            fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool {
-                unsafe { mask.extract_unchecked(idx) }
-            }
-
-            #[inline]
-            fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask {
-                unsafe { mask.replace_unchecked(idx, value) }
-            }
-
-            /// Selects elements of `a` and `b` using `mask`
-            #[inline]
-            fn mask_select(
-                mask: Self::SimdMask,
-                a: Self::Simd,
-                b: Self::Simd,
-            ) -> Self::Simd {
-                mask.select(a, b)
-            }
-
-            #[inline]
-            fn mask_any(mask: Self::SimdMask) -> bool {
-                mask.any()
-            }
-
-            #[inline]
-            fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
-                left: Self::Simd,
-                right: Self::Simd,
-                op: F,
-            ) -> Self::Simd {
-                op(left, right)
-            }
-
-            #[inline]
-            fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.eq(right)
-            }
-
-            #[inline]
-            fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.ne(right)
-            }
-
-            #[inline]
-            fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.lt(right)
-            }
-
-            #[inline]
-            fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.le(right)
-            }
-
-            #[inline]
-            fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.gt(right)
-            }
-
-            #[inline]
-            fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.ge(right)
-            }
-
-            #[inline]
-            fn write(simd_result: Self::Simd, slice: &mut [Self::Native]) {
-                unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
-            }
-
-            #[inline]
-            fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
-                a: Self::Simd,
-                op: F,
-            ) -> Self::Simd {
-                op(a)
-            }
-        }
-
-        #[cfg(not(simd))]
-        impl ArrowNumericType for $impl_ty {}
-    };
-}
-
-make_numeric_type!(Int8Type, i8, i8x64, m8x64);
-make_numeric_type!(Int16Type, i16, i16x32, m16x32);
-make_numeric_type!(Int32Type, i32, i32x16, m32x16);
-make_numeric_type!(Int64Type, i64, i64x8, m64x8);
-make_numeric_type!(UInt8Type, u8, u8x64, m8x64);
-make_numeric_type!(UInt16Type, u16, u16x32, m16x32);
-make_numeric_type!(UInt32Type, u32, u32x16, m32x16);
-make_numeric_type!(UInt64Type, u64, u64x8, m64x8);
-make_numeric_type!(Float32Type, f32, f32x16, m32x16);
-make_numeric_type!(Float64Type, f64, f64x8, m64x8);
-
-make_numeric_type!(TimestampSecondType, i64, i64x8, m64x8);
-make_numeric_type!(TimestampMillisecondType, i64, i64x8, m64x8);
-make_numeric_type!(TimestampMicrosecondType, i64, i64x8, m64x8);
-make_numeric_type!(TimestampNanosecondType, i64, i64x8, m64x8);
-make_numeric_type!(Date32Type, i32, i32x16, m32x16);
-make_numeric_type!(Date64Type, i64, i64x8, m64x8);
-make_numeric_type!(Time32SecondType, i32, i32x16, m32x16);
-make_numeric_type!(Time32MillisecondType, i32, i32x16, m32x16);
-make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8);
-make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8);
-make_numeric_type!(IntervalYearMonthType, i32, i32x16, m32x16);
-make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8);
-make_numeric_type!(DurationSecondType, i64, i64x8, m64x8);
-make_numeric_type!(DurationMillisecondType, i64, i64x8, m64x8);
-make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8);
-make_numeric_type!(DurationNanosecondType, i64, i64x8, m64x8);
-
-/// A subtype of primitive type that represents signed numeric values.
-///
-/// SIMD operations are defined in this trait if available on the target system.
-#[cfg(simd)]
-pub trait ArrowSignedNumericType: ArrowNumericType
-where
-    Self::SignedSimd: Neg<Output = Self::SignedSimd>,
-{
-    /// Defines the SIMD type that should be used for this numeric type
-    type SignedSimd;
-
-    /// Loads a slice of signed numeric type into a SIMD register
-    fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd;
-
-    /// Performs a SIMD unary operation on signed numeric type
-    fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
-        a: Self::SignedSimd,
-        op: F,
-    ) -> Self::SignedSimd;
-
-    /// Writes a signed SIMD result back to a slice
-    fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]);
-}
-
-#[cfg(not(simd))]
-pub trait ArrowSignedNumericType: ArrowNumericType
-where
-    Self::Native: std::ops::Neg<Output = Self::Native>,
-{
-}
-
-macro_rules! make_signed_numeric_type {
-    ($impl_ty:ty, $simd_ty:ident) => {
-        #[cfg(simd)]
-        impl ArrowSignedNumericType for $impl_ty {
-            type SignedSimd = $simd_ty;
-
-            #[inline]
-            fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd {
-                unsafe { Self::SignedSimd::from_slice_unaligned_unchecked(slice) }
-            }
-
-            #[inline]
-            fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
-                a: Self::SignedSimd,
-                op: F,
-            ) -> Self::SignedSimd {
-                op(a)
-            }
-
-            #[inline]
-            fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]) {
-                unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
-            }
-        }
-
-        #[cfg(not(simd))]
-        impl ArrowSignedNumericType for $impl_ty {}
-    };
-}
-
-make_signed_numeric_type!(Int8Type, i8x64);
-make_signed_numeric_type!(Int16Type, i16x32);
-make_signed_numeric_type!(Int32Type, i32x16);
-make_signed_numeric_type!(Int64Type, i64x8);
-make_signed_numeric_type!(Float32Type, f32x16);
-make_signed_numeric_type!(Float64Type, f64x8);
-
-#[cfg(simd)]
-pub trait ArrowFloatNumericType: ArrowNumericType {
-    fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd;
-}
-
-#[cfg(not(simd))]
-pub trait ArrowFloatNumericType: ArrowNumericType {}
-
-macro_rules! make_float_numeric_type {
-    ($impl_ty:ty, $simd_ty:ident) => {
-        #[cfg(simd)]
-        impl ArrowFloatNumericType for $impl_ty {
-            #[inline]
-            fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd {
-                base.powf(raise)
-            }
-        }
-
-        #[cfg(not(simd))]
-        impl ArrowFloatNumericType for $impl_ty {}
-    };
-}
-
-make_float_numeric_type!(Float32Type, f32x16);
-make_float_numeric_type!(Float64Type, f64x8);
-
-#[cfg(all(test, simd_x86))]
-mod tests {
-    use crate::datatypes::{
-        ArrowNumericType, Float32Type, Float64Type, Int32Type, Int64Type, Int8Type,
-        UInt16Type,
-    };
-    use packed_simd::*;
-    use FromCast;
-
-    /// calculate the expected mask by iterating over all bits
-    macro_rules! expected_mask {
-        ($T:ty, $MASK:expr) => {{
-            let mask = $MASK;
-            // simd width of all types is currently 64 bytes -> 512 bits
-            let lanes = 64 / std::mem::size_of::<$T>();
-            // translate each set bit into a value of all ones (-1) of the correct type
-            (0..lanes)
-                .map(|i| (if (mask & (1 << i)) != 0 { -1 } else { 0 }))
-                .collect::<Vec<$T>>()
-        }};
-    }
-
-    #[test]
-    fn test_mask_f64() {
-        let mask = 0b10101010;
-        let actual = Float64Type::mask_from_u64(mask);
-        let expected = expected_mask!(i64, mask);
-        let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_u64() {
-        let mask = 0b01010101;
-        let actual = Int64Type::mask_from_u64(mask);
-        let expected = expected_mask!(i64, mask);
-        let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_f32() {
-        let mask = 0b10101010_10101010;
-        let actual = Float32Type::mask_from_u64(mask);
-        let expected = expected_mask!(i32, mask);
-        let expected =
-            m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_i32() {
-        let mask = 0b01010101_01010101;
-        let actual = Int32Type::mask_from_u64(mask);
-        let expected = expected_mask!(i32, mask);
-        let expected =
-            m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_u16() {
-        let mask = 0b01010101_01010101_10101010_10101010;
-        let actual = UInt16Type::mask_from_u64(mask);
-        let expected = expected_mask!(i16, mask);
-        dbg!(&expected);
-        let expected =
-            m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_i8() {
-        let mask =
-            0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
-        let actual = Int8Type::mask_from_u64(mask);
-        let expected = expected_mask!(i8, mask);
-        let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-}
diff --git a/rust/arrow/src/datatypes/schema.rs b/rust/arrow/src/datatypes/schema.rs
deleted file mode 100644
index ad89b29cacd..00000000000
--- a/rust/arrow/src/datatypes/schema.rs
+++ /dev/null
@@ -1,337 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::default::Default;
-use std::fmt;
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{json, Value};
-
-use crate::error::{ArrowError, Result};
-
-use super::Field;
-
-/// Describes the meta-data of an ordered sequence of relative types.
-///
-/// Note that this information is only part of the meta-data and not part of the physical
-/// memory layout.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
-pub struct Schema {
-    pub(crate) fields: Vec<Field>,
-    /// A map of key-value pairs containing additional meta data.
-    #[serde(skip_serializing_if = "HashMap::is_empty")]
-    pub(crate) metadata: HashMap<String, String>,
-}
-
-impl Schema {
-    /// Creates an empty `Schema`
-    pub fn empty() -> Self {
-        Self {
-            fields: vec![],
-            metadata: HashMap::new(),
-        }
-    }
-
-    /// Creates a new `Schema` from a sequence of `Field` values.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # extern crate arrow;
-    /// # use arrow::datatypes::{Field, DataType, Schema};
-    /// let field_a = Field::new("a", DataType::Int64, false);
-    /// let field_b = Field::new("b", DataType::Boolean, false);
-    ///
-    /// let schema = Schema::new(vec![field_a, field_b]);
-    /// ```
-    pub fn new(fields: Vec<Field>) -> Self {
-        Self::new_with_metadata(fields, HashMap::new())
-    }
-
-    /// Creates a new `Schema` from a sequence of `Field` values
-    /// and adds additional metadata in form of key value pairs.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # extern crate arrow;
-    /// # use arrow::datatypes::{Field, DataType, Schema};
-    /// # use std::collections::HashMap;
-    /// let field_a = Field::new("a", DataType::Int64, false);
-    /// let field_b = Field::new("b", DataType::Boolean, false);
-    ///
-    /// let mut metadata: HashMap<String, String> = HashMap::new();
-    /// metadata.insert("row_count".to_string(), "100".to_string());
-    ///
-    /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
-    /// ```
-    #[inline]
-    pub const fn new_with_metadata(
-        fields: Vec<Field>,
-        metadata: HashMap<String, String>,
-    ) -> Self {
-        Self { fields, metadata }
-    }
-
-    /// Merge schema into self if it is compatible. Struct fields will be merged recursively.
-    ///
-    /// Example:
-    ///
-    /// ```
-    /// use arrow::datatypes::*;
-    ///
-    /// let merged = Schema::try_merge(vec![
-    ///     Schema::new(vec![
-    ///         Field::new("c1", DataType::Int64, false),
-    ///         Field::new("c2", DataType::Utf8, false),
-    ///     ]),
-    ///     Schema::new(vec![
-    ///         Field::new("c1", DataType::Int64, true),
-    ///         Field::new("c2", DataType::Utf8, false),
-    ///         Field::new("c3", DataType::Utf8, false),
-    ///     ]),
-    /// ]).unwrap();
-    ///
-    /// assert_eq!(
-    ///     merged,
-    ///     Schema::new(vec![
-    ///         Field::new("c1", DataType::Int64, true),
-    ///         Field::new("c2", DataType::Utf8, false),
-    ///         Field::new("c3", DataType::Utf8, false),
-    ///     ]),
-    /// );
-    /// ```
-    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self> {
-        schemas
-            .into_iter()
-            .try_fold(Self::empty(), |mut merged, schema| {
-                let Schema { metadata, fields } = schema;
-                for (key, value) in metadata.into_iter() {
-                    // merge metadata
-                    if let Some(old_val) = merged.metadata.get(&key) {
-                        if old_val != &value {
-                            return Err(ArrowError::SchemaError(
-                                "Fail to merge schema due to conflicting metadata."
-                                    .to_string(),
-                            ));
-                        }
-                    }
-                    merged.metadata.insert(key, value);
-                }
-                // merge fields
-                for field in fields.into_iter() {
-                    let mut new_field = true;
-                    for merged_field in &mut merged.fields {
-                        if field.name() != merged_field.name() {
-                            continue;
-                        }
-                        new_field = false;
-                        merged_field.try_merge(&field)?
-                    }
-                    // found a new field, add to field list
-                    if new_field {
-                        merged.fields.push(field);
-                    }
-                }
-                Ok(merged)
-            })
-    }
-
-    /// Returns an immutable reference of the vector of `Field` instances.
-    #[inline]
-    pub const fn fields(&self) -> &Vec<Field> {
-        &self.fields
-    }
-
-    /// Returns an immutable reference of a specific `Field` instance selected using an
-    /// offset within the internal `fields` vector.
-    pub fn field(&self, i: usize) -> &Field {
-        &self.fields[i]
-    }
-
-    /// Returns an immutable reference of a specific `Field` instance selected by name.
-    pub fn field_with_name(&self, name: &str) -> Result<&Field> {
-        Ok(&self.fields[self.index_of(name)?])
-    }
-
-    /// Returns a vector of immutable references to all `Field` instances selected by
-    /// the dictionary ID they use.
-    pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
-        self.fields
-            .iter()
-            .filter(|f| f.dict_id() == Some(dict_id))
-            .collect()
-    }
-
-    /// Find the index of the column with the given name.
-    pub fn index_of(&self, name: &str) -> Result<usize> {
-        for i in 0..self.fields.len() {
-            if self.fields[i].name() == name {
-                return Ok(i);
-            }
-        }
-        let valid_fields: Vec<String> =
-            self.fields.iter().map(|f| f.name().clone()).collect();
-        Err(ArrowError::InvalidArgumentError(format!(
-            "Unable to get field named \"{}\". Valid fields: {:?}",
-            name, valid_fields
-        )))
-    }
-
-    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
-    #[inline]
-    pub const fn metadata(&self) -> &HashMap<String, String> {
-        &self.metadata
-    }
-
-    /// Look up a column by name and return a immutable reference to the column along with
-    /// its index.
-    pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
-        self.fields
-            .iter()
-            .enumerate()
-            .find(|&(_, c)| c.name() == name)
-    }
-
-    /// Generate a JSON representation of the `Schema`.
-    pub fn to_json(&self) -> Value {
-        json!({
-            "fields": self.fields.iter().map(|field| field.to_json()).collect::<Vec<Value>>(),
-            "metadata": serde_json::to_value(&self.metadata).unwrap()
-        })
-    }
-
-    /// Parse a `Schema` definition from a JSON representation.
-    pub fn from(json: &Value) -> Result<Self> {
-        match *json {
-            Value::Object(ref schema) => {
-                let fields = if let Some(Value::Array(fields)) = schema.get("fields") {
-                    fields
-                        .iter()
-                        .map(|f| Field::from(f))
-                        .collect::<Result<_>>()?
-                } else {
-                    return Err(ArrowError::ParseError(
-                        "Schema fields should be an array".to_string(),
-                    ));
-                };
-
-                let metadata = if let Some(value) = schema.get("metadata") {
-                    Self::from_metadata(value)?
-                } else {
-                    HashMap::default()
-                };
-
-                Ok(Self { fields, metadata })
-            }
-            _ => Err(ArrowError::ParseError(
-                "Invalid json value type for schema".to_string(),
-            )),
-        }
-    }
-
-    /// Parse a `metadata` definition from a JSON representation.
-    /// The JSON can either be an Object or an Array of Objects.
-    fn from_metadata(json: &Value) -> Result<HashMap<String, String>> {
-        match json {
-            Value::Array(_) => {
-                let mut hashmap = HashMap::new();
-                let values: Vec<MetadataKeyValue> = serde_json::from_value(json.clone())
-                    .map_err(|_| {
-                        ArrowError::JsonError(
-                            "Unable to parse object into key-value pair".to_string(),
-                        )
-                    })?;
-                for meta in values {
-                    hashmap.insert(meta.key.clone(), meta.value);
-                }
-                Ok(hashmap)
-            }
-            Value::Object(md) => md
-                .iter()
-                .map(|(k, v)| {
-                    if let Value::String(v) = v {
-                        Ok((k.to_string(), v.to_string()))
-                    } else {
-                        Err(ArrowError::ParseError(
-                            "metadata `value` field must be a string".to_string(),
-                        ))
-                    }
-                })
-                .collect::<Result<_>>(),
-            _ => Err(ArrowError::ParseError(
-                "`metadata` field must be an object".to_string(),
-            )),
-        }
-    }
-
-    /// Check to see if `self` is a superset of `other` schema. Here are the comparision rules:
-    ///
-    /// * `self` and `other` should contain the same number of fields
-    /// * for every field `f` in `other`, the field in `self` with corresponding index should be a
-    /// superset of `f`.
-    /// * self.metadata is a superset of other.metadata
-    ///
-    /// In other words, any record conforms to `other` should also conform to `self`.
-    pub fn contains(&self, other: &Schema) -> bool {
-        if self.fields.len() != other.fields.len() {
-            return false;
-        }
-
-        for (i, field) in other.fields.iter().enumerate() {
-            if !self.fields[i].contains(field) {
-                return false;
-            }
-        }
-
-        // make sure self.metadata is a superset of other.metadata
-        for (k, v) in &other.metadata {
-            match self.metadata.get(k) {
-                Some(s) => {
-                    if s != v {
-                        return false;
-                    }
-                }
-                None => {
-                    return false;
-                }
-            }
-        }
-
-        true
-    }
-}
-
-impl fmt::Display for Schema {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str(
-            &self
-                .fields
-                .iter()
-                .map(|c| c.to_string())
-                .collect::<Vec<String>>()
-                .join(", "),
-        )
-    }
-}
-
-#[derive(Deserialize)]
-struct MetadataKeyValue {
-    key: String,
-    value: String,
-}
diff --git a/rust/arrow/src/datatypes/types.rs b/rust/arrow/src/datatypes/types.rs
deleted file mode 100644
index 77a1783d191..00000000000
--- a/rust/arrow/src/datatypes/types.rs
+++ /dev/null
@@ -1,181 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::{ArrowPrimitiveType, DataType, IntervalUnit, TimeUnit};
-
-// BooleanType is special: its bit-width is not the size of the primitive type, and its `index`
-// operation assumes bit-packing.
-#[derive(Debug)]
-pub struct BooleanType {}
-
-impl BooleanType {
-    pub const DATA_TYPE: DataType = DataType::Boolean;
-}
-
-macro_rules! make_type {
-    ($name:ident, $native_ty:ty, $data_ty:expr) => {
-        #[derive(Debug)]
-        pub struct $name {}
-
-        impl ArrowPrimitiveType for $name {
-            type Native = $native_ty;
-            const DATA_TYPE: DataType = $data_ty;
-        }
-    };
-}
-
-make_type!(Int8Type, i8, DataType::Int8);
-make_type!(Int16Type, i16, DataType::Int16);
-make_type!(Int32Type, i32, DataType::Int32);
-make_type!(Int64Type, i64, DataType::Int64);
-make_type!(UInt8Type, u8, DataType::UInt8);
-make_type!(UInt16Type, u16, DataType::UInt16);
-make_type!(UInt32Type, u32, DataType::UInt32);
-make_type!(UInt64Type, u64, DataType::UInt64);
-make_type!(Float32Type, f32, DataType::Float32);
-make_type!(Float64Type, f64, DataType::Float64);
-make_type!(
-    TimestampSecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Second, None)
-);
-make_type!(
-    TimestampMillisecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Millisecond, None)
-);
-make_type!(
-    TimestampMicrosecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Microsecond, None)
-);
-make_type!(
-    TimestampNanosecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Nanosecond, None)
-);
-make_type!(Date32Type, i32, DataType::Date32);
-make_type!(Date64Type, i64, DataType::Date64);
-make_type!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second));
-make_type!(
-    Time32MillisecondType,
-    i32,
-    DataType::Time32(TimeUnit::Millisecond)
-);
-make_type!(
-    Time64MicrosecondType,
-    i64,
-    DataType::Time64(TimeUnit::Microsecond)
-);
-make_type!(
-    Time64NanosecondType,
-    i64,
-    DataType::Time64(TimeUnit::Nanosecond)
-);
-make_type!(
-    IntervalYearMonthType,
-    i32,
-    DataType::Interval(IntervalUnit::YearMonth)
-);
-make_type!(
-    IntervalDayTimeType,
-    i64,
-    DataType::Interval(IntervalUnit::DayTime)
-);
-make_type!(
-    DurationSecondType,
-    i64,
-    DataType::Duration(TimeUnit::Second)
-);
-make_type!(
-    DurationMillisecondType,
-    i64,
-    DataType::Duration(TimeUnit::Millisecond)
-);
-make_type!(
-    DurationMicrosecondType,
-    i64,
-    DataType::Duration(TimeUnit::Microsecond)
-);
-make_type!(
-    DurationNanosecondType,
-    i64,
-    DataType::Duration(TimeUnit::Nanosecond)
-);
-
-/// A subtype of primitive type that represents legal dictionary keys.
-/// See <https://arrow.apache.org/docs/format/Columnar.html>
-pub trait ArrowDictionaryKeyType: ArrowPrimitiveType {}
-
-impl ArrowDictionaryKeyType for Int8Type {}
-
-impl ArrowDictionaryKeyType for Int16Type {}
-
-impl ArrowDictionaryKeyType for Int32Type {}
-
-impl ArrowDictionaryKeyType for Int64Type {}
-
-impl ArrowDictionaryKeyType for UInt8Type {}
-
-impl ArrowDictionaryKeyType for UInt16Type {}
-
-impl ArrowDictionaryKeyType for UInt32Type {}
-
-impl ArrowDictionaryKeyType for UInt64Type {}
-
-/// A subtype of primitive type that represents temporal values.
-pub trait ArrowTemporalType: ArrowPrimitiveType {}
-
-impl ArrowTemporalType for TimestampSecondType {}
-impl ArrowTemporalType for TimestampMillisecondType {}
-impl ArrowTemporalType for TimestampMicrosecondType {}
-impl ArrowTemporalType for TimestampNanosecondType {}
-impl ArrowTemporalType for Date32Type {}
-impl ArrowTemporalType for Date64Type {}
-impl ArrowTemporalType for Time32SecondType {}
-impl ArrowTemporalType for Time32MillisecondType {}
-impl ArrowTemporalType for Time64MicrosecondType {}
-impl ArrowTemporalType for Time64NanosecondType {}
-// impl ArrowTemporalType for IntervalYearMonthType {}
-// impl ArrowTemporalType for IntervalDayTimeType {}
-
-/// A timestamp type allows us to create array builders that take a timestamp.
-pub trait ArrowTimestampType: ArrowTemporalType {
-    /// Returns the `TimeUnit` of this timestamp.
-    fn get_time_unit() -> TimeUnit;
-}
-
-impl ArrowTimestampType for TimestampSecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Second
-    }
-}
-impl ArrowTimestampType for TimestampMillisecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Millisecond
-    }
-}
-impl ArrowTimestampType for TimestampMicrosecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Microsecond
-    }
-}
-impl ArrowTimestampType for TimestampNanosecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Nanosecond
-    }
-}
diff --git a/rust/arrow/src/error.rs b/rust/arrow/src/error.rs
deleted file mode 100644
index 6bfa077f4ab..00000000000
--- a/rust/arrow/src/error.rs
+++ /dev/null
@@ -1,134 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines `ArrowError` for representing failures in various Arrow operations.
-use std::fmt::{Debug, Display, Formatter};
-use std::io::Write;
-
-use csv as csv_crate;
-use std::error::Error;
-
-/// Many different operations in the `arrow` crate return this error type.
-#[derive(Debug)]
-pub enum ArrowError {
-    /// Returned when functionality is not yet available.
-    NotYetImplemented(String),
-    ExternalError(Box<dyn Error + Send + Sync>),
-    CastError(String),
-    MemoryError(String),
-    ParseError(String),
-    SchemaError(String),
-    ComputeError(String),
-    DivideByZero,
-    CsvError(String),
-    JsonError(String),
-    IoError(String),
-    InvalidArgumentError(String),
-    ParquetError(String),
-    /// Error during import or export to/from the C Data Interface
-    CDataInterface(String),
-    DictionaryKeyOverflowError,
-}
-
-impl ArrowError {
-    /// Wraps an external error in an `ArrowError`.
-    pub fn from_external_error(
-        error: Box<dyn ::std::error::Error + Send + Sync>,
-    ) -> Self {
-        Self::ExternalError(error)
-    }
-}
-
-impl From<::std::io::Error> for ArrowError {
-    fn from(error: std::io::Error) -> Self {
-        ArrowError::IoError(error.to_string())
-    }
-}
-
-impl From<csv_crate::Error> for ArrowError {
-    fn from(error: csv_crate::Error) -> Self {
-        match error.kind() {
-            csv_crate::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()),
-            csv_crate::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
-                "Encountered UTF-8 error while reading CSV file: {}",
-                err.to_string()
-            )),
-            csv_crate::ErrorKind::UnequalLengths {
-                expected_len, len, ..
-            } => ArrowError::CsvError(format!(
-                "Encountered unequal lengths between records on CSV file. Expected {} \
-                 records, found {} records",
-                len, expected_len
-            )),
-            _ => ArrowError::CsvError("Error reading CSV file".to_string()),
-        }
-    }
-}
-
-impl From<::std::string::FromUtf8Error> for ArrowError {
-    fn from(error: std::string::FromUtf8Error) -> Self {
-        ArrowError::ParseError(error.to_string())
-    }
-}
-
-impl From<serde_json::Error> for ArrowError {
-    fn from(error: serde_json::Error) -> Self {
-        ArrowError::JsonError(error.to_string())
-    }
-}
-
-impl<W: Write> From<::std::io::IntoInnerError<W>> for ArrowError {
-    fn from(error: std::io::IntoInnerError<W>) -> Self {
-        ArrowError::IoError(error.to_string())
-    }
-}
-
-impl Display for ArrowError {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            ArrowError::NotYetImplemented(source) => {
-                write!(f, "Not yet implemented: {}", &source)
-            }
-            ArrowError::ExternalError(source) => write!(f, "External error: {}", &source),
-            ArrowError::CastError(desc) => write!(f, "Cast error: {}", desc),
-            ArrowError::MemoryError(desc) => write!(f, "Memory error: {}", desc),
-            ArrowError::ParseError(desc) => write!(f, "Parser error: {}", desc),
-            ArrowError::SchemaError(desc) => write!(f, "Schema error: {}", desc),
-            ArrowError::ComputeError(desc) => write!(f, "Compute error: {}", desc),
-            ArrowError::DivideByZero => write!(f, "Divide by zero error"),
-            ArrowError::CsvError(desc) => write!(f, "Csv error: {}", desc),
-            ArrowError::JsonError(desc) => write!(f, "Json error: {}", desc),
-            ArrowError::IoError(desc) => write!(f, "Io error: {}", desc),
-            ArrowError::InvalidArgumentError(desc) => {
-                write!(f, "Invalid argument error: {}", desc)
-            }
-            ArrowError::ParquetError(desc) => {
-                write!(f, "Parquet argument error: {}", desc)
-            }
-            ArrowError::CDataInterface(desc) => {
-                write!(f, "C Data interface error: {}", desc)
-            }
-            ArrowError::DictionaryKeyOverflowError => {
-                write!(f, "Dictionary key bigger than the key type")
-            }
-        }
-    }
-}
-
-impl Error for ArrowError {}
-
-pub type Result<T> = std::result::Result<T, ArrowError>;
diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs
deleted file mode 100644
index 3a6d031ebd8..00000000000
--- a/rust/arrow/src/ffi.rs
+++ /dev/null
@@ -1,997 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
-//!
-//! Generally, this module is divided in two main interfaces:
-//! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
-//! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
-//!
-//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
-//! `Buffer`, etc. This is handled by `ArrowArray`.
-//!
-//! ```rust
-//! # use std::sync::Arc;
-//! # use arrow::array::{Int32Array, Array, ArrayData, make_array_from_raw};
-//! # use arrow::error::{Result, ArrowError};
-//! # use arrow::compute::kernels::arithmetic;
-//! # use std::convert::TryFrom;
-//! # fn main() -> Result<()> {
-//! // create an array natively
-//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
-//!
-//! // export it
-//! let (array_ptr, schema_ptr) = array.to_raw()?;
-//!
-//! // consumed and used by something else...
-//!
-//! // import it
-//! let array = unsafe { make_array_from_raw(array_ptr, schema_ptr)? };
-//!
-//! // perform some operation
-//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
-//!     ArrowError::ParseError("Expects an int32".to_string()),
-//! )?;
-//! let array = arithmetic::add(&array, &array)?;
-//!
-//! // verify
-//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
-//!
-//! // (drop/release)
-//! Ok(())
-//! }
-//! ```
-
-/*
-# Design:
-
-Main assumptions:
-* A memory region is deallocated according it its own release mechanism.
-* Rust shares memory regions between arrays.
-* A memory region should be deallocated when no-one is using it.
-
-The design of this module is as follows:
-
-`ArrowArray` contains two `Arc`s, one per ABI-compatible `struct`, each containing data
-according to the C Data Interface. These Arcs are used for ref counting of the structs
-within Rust and lifetime management.
-
-Each ABI-compatible `struct` knowns how to `drop` itself, calling `release`.
-
-To import an array, unsafely create an `ArrowArray` from two pointers using [ArrowArray::try_from_raw].
-To export an array, create an `ArrowArray` using [ArrowArray::try_new].
-*/
-
-use std::{
-    convert::TryFrom,
-    ffi::CStr,
-    ffi::CString,
-    iter,
-    mem::{size_of, ManuallyDrop},
-    os::raw::c_char,
-    ptr::{self, NonNull},
-    sync::Arc,
-};
-
-use crate::array::ArrayData;
-use crate::buffer::Buffer;
-use crate::datatypes::{DataType, Field, TimeUnit};
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-
-/// ABI-compatible struct for `ArrowSchema` from C Data Interface
-/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
-/// This was created by bindgen
-#[repr(C)]
-#[derive(Debug)]
-pub struct FFI_ArrowSchema {
-    format: *const ::std::os::raw::c_char,
-    name: *const ::std::os::raw::c_char,
-    metadata: *const ::std::os::raw::c_char,
-    flags: i64,
-    n_children: i64,
-    children: *mut *mut FFI_ArrowSchema,
-    dictionary: *mut FFI_ArrowSchema,
-    release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowSchema)>,
-    private_data: *mut ::std::os::raw::c_void,
-}
-
-// callback used to drop [FFI_ArrowSchema] when it is exported.
-unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
-    let schema = &mut *schema;
-
-    // take ownership back to release it.
-    CString::from_raw(schema.format as *mut std::os::raw::c_char);
-
-    schema.release = None;
-}
-
-struct SchemaPrivateData {
-    children: Box<[*mut FFI_ArrowSchema]>,
-}
-
-impl FFI_ArrowSchema {
-    /// create a new [FFI_ArrowSchema] from a format.
-    fn new(
-        format: &str,
-        children: Vec<*mut FFI_ArrowSchema>,
-        nullable: bool,
-    ) -> FFI_ArrowSchema {
-        let children = children.into_boxed_slice();
-        let n_children = children.len() as i64;
-        let children_ptr = children.as_ptr() as *mut *mut FFI_ArrowSchema;
-
-        let flags = if nullable { 2 } else { 0 };
-
-        let private_data = Box::new(SchemaPrivateData { children });
-        // <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
-        FFI_ArrowSchema {
-            format: CString::new(format).unwrap().into_raw(),
-            // For child data a non null string is expected and is called item
-            name: CString::new("item").unwrap().into_raw(),
-            metadata: std::ptr::null_mut(),
-            flags,
-            n_children,
-            children: children_ptr,
-            dictionary: std::ptr::null_mut(),
-            release: Some(release_schema),
-            private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
-        }
-    }
-
-    /// create an empty [FFI_ArrowSchema]
-    fn empty() -> Self {
-        Self {
-            format: std::ptr::null_mut(),
-            name: std::ptr::null_mut(),
-            metadata: std::ptr::null_mut(),
-            flags: 0,
-            n_children: 0,
-            children: ptr::null_mut(),
-            dictionary: std::ptr::null_mut(),
-            release: None,
-            private_data: std::ptr::null_mut(),
-        }
-    }
-
-    /// returns the format of this schema.
-    pub fn format(&self) -> &str {
-        unsafe { CStr::from_ptr(self.format) }
-            .to_str()
-            .expect("The external API has a non-utf8 as format")
-    }
-}
-
-impl Drop for FFI_ArrowSchema {
-    fn drop(&mut self) {
-        match self.release {
-            None => (),
-            Some(release) => unsafe { release(self) },
-        };
-    }
-}
-
-/// maps a DataType `format` to a [DataType](arrow::datatypes::DataType).
-/// See https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings
-fn to_datatype(
-    format: &str,
-    child_type: Option<DataType>,
-    schema: &FFI_ArrowSchema,
-) -> Result<DataType> {
-    Ok(match format {
-        "n" => DataType::Null,
-        "b" => DataType::Boolean,
-        "c" => DataType::Int8,
-        "C" => DataType::UInt8,
-        "s" => DataType::Int16,
-        "S" => DataType::UInt16,
-        "i" => DataType::Int32,
-        "I" => DataType::UInt32,
-        "l" => DataType::Int64,
-        "L" => DataType::UInt64,
-        "e" => DataType::Float16,
-        "f" => DataType::Float32,
-        "g" => DataType::Float64,
-        "z" => DataType::Binary,
-        "Z" => DataType::LargeBinary,
-        "u" => DataType::Utf8,
-        "U" => DataType::LargeUtf8,
-        "tdD" => DataType::Date32,
-        "tdm" => DataType::Date64,
-        "tts" => DataType::Time32(TimeUnit::Second),
-        "ttm" => DataType::Time32(TimeUnit::Millisecond),
-        "ttu" => DataType::Time64(TimeUnit::Microsecond),
-        "ttn" => DataType::Time64(TimeUnit::Nanosecond),
-
-        // Note: The datatype null will only be created when called from ArrowArray::buffer_len
-        // at that point the child data is not yet known, but it is also not required to determine
-        // the buffer length of the list arrays.
-        "+l" => {
-            let nullable = schema.flags == 2;
-            // Safety
-            // Should be set as this is expected from the C FFI definition
-            debug_assert!(!schema.name.is_null());
-            let name = unsafe { CString::from_raw(schema.name as *mut c_char) }
-                .into_string()
-                .unwrap();
-            // prevent a double free
-            let name = ManuallyDrop::new(name);
-            DataType::List(Box::new(Field::new(
-                &name,
-                child_type.unwrap_or(DataType::Null),
-                nullable,
-            )))
-        }
-        "+L" => {
-            let nullable = schema.flags == 2;
-            // Safety
-            // Should be set as this is expected from the C FFI definition
-            debug_assert!(!schema.name.is_null());
-            let name = unsafe { CString::from_raw(schema.name as *mut c_char) }
-                .into_string()
-                .unwrap();
-            // prevent a double free
-            let name = ManuallyDrop::new(name);
-            DataType::LargeList(Box::new(Field::new(
-                &name,
-                child_type.unwrap_or(DataType::Null),
-                nullable,
-            )))
-        }
-        dt => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{}\" is not supported in the Rust implementation",
-                dt
-            )))
-        }
-    })
-}
-
-/// the inverse of [to_datatype]
-fn from_datatype(datatype: &DataType) -> Result<String> {
-    Ok(match datatype {
-        DataType::Null => "n",
-        DataType::Boolean => "b",
-        DataType::Int8 => "c",
-        DataType::UInt8 => "C",
-        DataType::Int16 => "s",
-        DataType::UInt16 => "S",
-        DataType::Int32 => "i",
-        DataType::UInt32 => "I",
-        DataType::Int64 => "l",
-        DataType::UInt64 => "L",
-        DataType::Float16 => "e",
-        DataType::Float32 => "f",
-        DataType::Float64 => "g",
-        DataType::Binary => "z",
-        DataType::LargeBinary => "Z",
-        DataType::Utf8 => "u",
-        DataType::LargeUtf8 => "U",
-        DataType::Date32 => "tdD",
-        DataType::Date64 => "tdm",
-        DataType::Time32(TimeUnit::Second) => "tts",
-        DataType::Time32(TimeUnit::Millisecond) => "ttm",
-        DataType::Time64(TimeUnit::Microsecond) => "ttu",
-        DataType::Time64(TimeUnit::Nanosecond) => "ttn",
-        DataType::List(_) => "+l",
-        DataType::LargeList(_) => "+L",
-        z => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" is still not supported in Rust implementation",
-                z
-            )))
-        }
-    }
-    .to_string())
-}
-
-// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
-// This is set by the Arrow specification
-fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
-    Ok(match (data_type, i) {
-        // the null buffer is bit sized
-        (_, 0) => 1,
-        // primitive types first buffer's size is given by the native types
-        (DataType::Boolean, 1) => 1,
-        (DataType::UInt8, 1) => size_of::<u8>() * 8,
-        (DataType::UInt16, 1) => size_of::<u16>() * 8,
-        (DataType::UInt32, 1) => size_of::<u32>() * 8,
-        (DataType::UInt64, 1) => size_of::<u64>() * 8,
-        (DataType::Int8, 1) => size_of::<i8>() * 8,
-        (DataType::Int16, 1) => size_of::<i16>() * 8,
-        (DataType::Int32, 1) | (DataType::Date32, 1) | (DataType::Time32(_), 1) => size_of::<i32>() * 8,
-        (DataType::Int64, 1) | (DataType::Date64, 1) | (DataType::Time64(_), 1) => size_of::<i64>() * 8,
-        (DataType::Float32, 1) => size_of::<f32>() * 8,
-        (DataType::Float64, 1) => size_of::<f64>() * 8,
-        // primitive types have a single buffer
-        (DataType::Boolean, _) |
-        (DataType::UInt8, _) |
-        (DataType::UInt16, _) |
-        (DataType::UInt32, _) |
-        (DataType::UInt64, _) |
-        (DataType::Int8, _) |
-        (DataType::Int16, _) |
-        (DataType::Int32, _) | (DataType::Date32, _) | (DataType::Time32(_), _) |
-        (DataType::Int64, _) | (DataType::Date64, _) | (DataType::Time64(_), _) |
-        (DataType::Float32, _) |
-        (DataType::Float64, _) => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" expects 2 buffers, but requested {}. Please verify that the C data interface is correctly implemented.",
-                data_type, i
-            )))
-        }
-        // Variable-sized binaries: have two buffers.
-        // "small": first buffer is i32, second is in bytes
-        (DataType::Utf8, 1) | (DataType::Binary, 1) | (DataType::List(_), 1) => size_of::<i32>() * 8,
-        (DataType::Utf8, 2) | (DataType::Binary, 2) | (DataType::List(_), 2) => size_of::<u8>() * 8,
-        (DataType::Utf8, _) | (DataType::Binary, _) | (DataType::List(_), _)=> {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" expects 3 buffers, but requested {}. Please verify that the C data interface is correctly implemented.",
-                data_type, i
-            )))
-        }
-        // Variable-sized binaries: have two buffers.
-        // LargeUtf8: first buffer is i64, second is in bytes
-        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => size_of::<i64>() * 8,
-        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> size_of::<u8>() * 8,
-        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" expects 3 buffers, but requested {}. Please verify that the C data interface is correctly implemented.",
-                data_type, i
-            )))
-        }
-        _ => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" is still not supported in Rust implementation",
-                data_type
-            )))
-        }
-    })
-}
-
-/// ABI-compatible struct for ArrowArray from C Data Interface
-/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
-/// This was created by bindgen
-#[repr(C)]
-#[derive(Debug)]
-pub struct FFI_ArrowArray {
-    pub(crate) length: i64,
-    pub(crate) null_count: i64,
-    pub(crate) offset: i64,
-    pub(crate) n_buffers: i64,
-    pub(crate) n_children: i64,
-    pub(crate) buffers: *mut *const ::std::os::raw::c_void,
-    children: *mut *mut FFI_ArrowArray,
-    dictionary: *mut FFI_ArrowArray,
-    release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArray)>,
-    // When exported, this MUST contain everything that is owned by this array.
-    // for example, any buffer pointed to in `buffers` must be here, as well as the `buffers` pointer
-    // itself.
-    // In other words, everything in [FFI_ArrowArray] must be owned by `private_data` and can assume
-    // that they do not outlive `private_data`.
-    private_data: *mut ::std::os::raw::c_void,
-}
-
-// callback used to drop [FFI_ArrowArray] when it is exported
-unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
-    if array.is_null() {
-        return;
-    }
-    let array = &mut *array;
-    // take ownership of `private_data`, therefore dropping it
-    Box::from_raw(array.private_data as *mut PrivateData);
-
-    array.release = None;
-}
-
-struct PrivateData {
-    buffers: Vec<Option<Buffer>>,
-    buffers_ptr: Box<[*const std::os::raw::c_void]>,
-    children: Box<[*mut FFI_ArrowArray]>,
-}
-
-impl FFI_ArrowArray {
-    /// creates a new `FFI_ArrowArray` from existing data.
-    /// # Safety
-    /// This method releases `buffers`. Consumers of this struct *must* call `release` before
-    /// releasing this struct, or contents in `buffers` leak.
-    unsafe fn new(
-        length: i64,
-        null_count: i64,
-        offset: i64,
-        n_buffers: i64,
-        buffers: Vec<Option<Buffer>>,
-        children: Vec<*mut FFI_ArrowArray>,
-    ) -> Self {
-        let buffers_ptr = buffers
-            .iter()
-            .map(|maybe_buffer| match maybe_buffer {
-                // note that `raw_data` takes into account the buffer's offset
-                Some(b) => b.as_ptr() as *const std::os::raw::c_void,
-                None => std::ptr::null(),
-            })
-            .collect::<Box<[_]>>();
-        let pointer = buffers_ptr.as_ptr() as *mut *const std::ffi::c_void;
-
-        let children = children.into_boxed_slice();
-        let children_ptr = children.as_ptr() as *mut *mut FFI_ArrowArray;
-        let n_children = children.len() as i64;
-
-        // create the private data owning everything.
-        // any other data must be added here, e.g. via a struct, to track lifetime.
-        let private_data = Box::new(PrivateData {
-            buffers,
-            buffers_ptr,
-            children,
-        });
-
-        Self {
-            length,
-            null_count,
-            offset,
-            n_buffers,
-            n_children,
-            buffers: pointer,
-            children: children_ptr,
-            dictionary: std::ptr::null_mut(),
-            release: Some(release_array),
-            private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
-        }
-    }
-
-    // create an empty `FFI_ArrowArray`, which can be used to import data into
-    fn empty() -> Self {
-        Self {
-            length: 0,
-            null_count: 0,
-            offset: 0,
-            n_buffers: 0,
-            n_children: 0,
-            buffers: std::ptr::null_mut(),
-            children: std::ptr::null_mut(),
-            dictionary: std::ptr::null_mut(),
-            release: None,
-            private_data: std::ptr::null_mut(),
-        }
-    }
-}
-
-/// returns a new buffer corresponding to the index `i` of the FFI array. It may not exist (null pointer).
-/// `bits` is the number of bits that the native type of this buffer has.
-/// The size of the buffer will be `ceil(self.length * bits, 8)`.
-/// # Panic
-/// This function panics if `i` is larger or equal to `n_buffers`.
-/// # Safety
-/// This function assumes that `ceil(self.length * bits, 8)` is the size of the buffer
-unsafe fn create_buffer(
-    array: Arc<FFI_ArrowArray>,
-    index: usize,
-    len: usize,
-) -> Option<Buffer> {
-    if array.buffers.is_null() {
-        return None;
-    }
-    let buffers = array.buffers as *mut *const u8;
-
-    assert!(index < array.n_buffers as usize);
-    let ptr = *buffers.add(index);
-
-    NonNull::new(ptr as *mut u8).map(|ptr| Buffer::from_unowned(ptr, len, array))
-}
-
-unsafe fn create_child_arrays(
-    array: Arc<FFI_ArrowArray>,
-    schema: Arc<FFI_ArrowSchema>,
-) -> Result<Vec<ArrayData>> {
-    (0..array.n_children as usize)
-        .map(|i| {
-            let arr_ptr = *array.children.add(i);
-            let schema_ptr = *schema.children.add(i);
-            let arrow_arr = ArrowArray::try_from_raw(
-                arr_ptr as *const FFI_ArrowArray,
-                schema_ptr as *const FFI_ArrowSchema,
-            )?;
-            ArrayData::try_from(arrow_arr)
-        })
-        .collect()
-}
-
-impl Drop for FFI_ArrowArray {
-    fn drop(&mut self) {
-        match self.release {
-            None => (),
-            Some(release) => unsafe { release(self) },
-        };
-    }
-}
-
-/// Struct used to move an Array from and to the C Data Interface.
-/// Its main responsibility is to expose functionality that requires
-/// both [FFI_ArrowArray] and [FFI_ArrowSchema].
-///
-/// This struct has two main paths:
-///
-/// ## Import from the C Data Interface
-/// * [ArrowArray::empty] to allocate memory to be filled by an external call
-/// * [ArrowArray::try_from_raw] to consume two non-null allocated pointers
-/// ## Export to the C Data Interface
-/// * [ArrowArray::try_new] to create a new [ArrowArray] from Rust-specific information
-/// * [ArrowArray::into_raw] to expose two pointers for [FFI_ArrowArray] and [FFI_ArrowSchema].
-///
-/// # Safety
-/// Whoever creates this struct is responsible for releasing their resources. Specifically,
-/// consumers *must* call [ArrowArray::into_raw] and take ownership of the individual pointers,
-/// calling [FFI_ArrowArray::release] and [FFI_ArrowSchema::release] accordingly.
-///
-/// Furthermore, this struct assumes that the incoming data agrees with the C data interface.
-#[derive(Debug)]
-pub struct ArrowArray {
-    // these are ref-counted because they can be shared by multiple buffers.
-    array: Arc<FFI_ArrowArray>,
-    schema: Arc<FFI_ArrowSchema>,
-}
-
-impl ArrowArray {
-    /// creates a new `ArrowArray`. This is used to export to the C Data Interface.
-    /// # Safety
-    /// See safety of [ArrowArray]
-    #[allow(clippy::too_many_arguments)]
-    pub unsafe fn try_new(
-        data_type: &DataType,
-        len: usize,
-        null_count: usize,
-        null_buffer: Option<Buffer>,
-        offset: usize,
-        buffers: Vec<Buffer>,
-        child_data: Vec<ArrowArray>,
-        nullable: bool,
-    ) -> Result<Self> {
-        let format = from_datatype(data_type)?;
-        // * insert the null buffer at the start
-        // * make all others `Option<Buffer>`.
-        let new_buffers = iter::once(null_buffer)
-            .chain(buffers.iter().map(|b| Some(b.clone())))
-            .collect::<Vec<_>>();
-
-        let mut ffi_arrow_arrays = Vec::with_capacity(child_data.len());
-        let mut ffi_arrow_schemas = Vec::with_capacity(child_data.len());
-
-        child_data.into_iter().for_each(|arrow_arr| {
-            let (arr, schema) = ArrowArray::into_raw(arrow_arr);
-            ffi_arrow_arrays.push(arr as *mut FFI_ArrowArray);
-            ffi_arrow_schemas.push(schema as *mut FFI_ArrowSchema);
-        });
-
-        let schema = Arc::new(FFI_ArrowSchema::new(&format, ffi_arrow_schemas, nullable));
-        let array = Arc::new(FFI_ArrowArray::new(
-            len as i64,
-            null_count as i64,
-            offset as i64,
-            new_buffers.len() as i64,
-            new_buffers,
-            ffi_arrow_arrays,
-        ));
-
-        Ok(ArrowArray { array, schema })
-    }
-
-    /// creates a new [ArrowArray] from two pointers. Used to import from the C Data Interface.
-    /// # Safety
-    /// See safety of [ArrowArray]
-    /// # Error
-    /// Errors if any of the pointers is null
-    pub unsafe fn try_from_raw(
-        array: *const FFI_ArrowArray,
-        schema: *const FFI_ArrowSchema,
-    ) -> Result<Self> {
-        if array.is_null() || schema.is_null() {
-            return Err(ArrowError::MemoryError(
-                "At least one of the pointers passed to `try_from_raw` is null"
-                    .to_string(),
-            ));
-        };
-        Ok(Self {
-            array: Arc::from_raw(array as *mut FFI_ArrowArray),
-            schema: Arc::from_raw(schema as *mut FFI_ArrowSchema),
-        })
-    }
-
-    /// creates a new empty [ArrowArray]. Used to import from the C Data Interface.
-    /// # Safety
-    /// See safety of [ArrowArray]
-    pub unsafe fn empty() -> Self {
-        let schema = Arc::new(FFI_ArrowSchema::empty());
-        let array = Arc::new(FFI_ArrowArray::empty());
-        ArrowArray { array, schema }
-    }
-
-    /// exports [ArrowArray] to the C Data Interface
-    pub fn into_raw(this: ArrowArray) -> (*const FFI_ArrowArray, *const FFI_ArrowSchema) {
-        (Arc::into_raw(this.array), Arc::into_raw(this.schema))
-    }
-
-    /// returns the null bit buffer.
-    /// Rust implementation uses a buffer that is not part of the array of buffers.
-    /// The C Data interface's null buffer is part of the array of buffers.
-    pub fn null_bit_buffer(&self) -> Option<Buffer> {
-        // similar to `self.buffer_len(0)`, but without `Result`.
-        let buffer_len = bit_util::ceil(self.array.length as usize, 8);
-
-        unsafe { create_buffer(self.array.clone(), 0, buffer_len) }
-    }
-
-    /// Returns the length, in bytes, of the buffer `i` (indexed according to the C data interface)
-    // Rust implementation uses fixed-sized buffers, which require knowledge of their `len`.
-    // for variable-sized buffers, such as the second buffer of a stringArray, we need
-    // to fetch offset buffer's len to build the second buffer.
-    fn buffer_len(&self, i: usize) -> Result<usize> {
-        // Inner type is not important for buffer length.
-        let data_type = &self.data_type(None)?;
-
-        Ok(match (data_type, i) {
-            (DataType::Utf8, 1)
-            | (DataType::LargeUtf8, 1)
-            | (DataType::Binary, 1)
-            | (DataType::LargeBinary, 1)
-            | (DataType::List(_), 1)
-            | (DataType::LargeList(_), 1) => {
-                // the len of the offset buffer (buffer 1) equals length + 1
-                let bits = bit_width(data_type, i)?;
-                debug_assert_eq!(bits % 8, 0);
-                (self.array.length as usize + 1) * (bits / 8)
-            }
-            (DataType::Utf8, 2) | (DataType::Binary, 2) | (DataType::List(_), 2) => {
-                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
-                let len = self.buffer_len(1)?;
-                // first buffer is the null buffer => add(1)
-                // we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
-                #[allow(clippy::cast_ptr_alignment)]
-                let offset_buffer = unsafe {
-                    *(self.array.buffers as *mut *const u8).add(1) as *const i32
-                };
-                // get last offset
-                (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
-            }
-            (DataType::LargeUtf8, 2)
-            | (DataType::LargeBinary, 2)
-            | (DataType::LargeList(_), 2) => {
-                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
-                let len = self.buffer_len(1)?;
-                // first buffer is the null buffer => add(1)
-                // we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
-                #[allow(clippy::cast_ptr_alignment)]
-                let offset_buffer = unsafe {
-                    *(self.array.buffers as *mut *const u8).add(1) as *const i64
-                };
-                // get last offset
-                (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
-            }
-            // buffer len of primitive types
-            _ => {
-                let bits = bit_width(data_type, i)?;
-                bit_util::ceil(self.array.length as usize * bits, 8)
-            }
-        })
-    }
-
-    /// returns all buffers, as organized by Rust (i.e. null buffer is skipped)
-    pub fn buffers(&self) -> Result<Vec<Buffer>> {
-        (0..self.array.n_buffers - 1)
-            .map(|index| {
-                // + 1: skip null buffer
-                let index = (index + 1) as usize;
-
-                let len = self.buffer_len(index)?;
-
-                unsafe { create_buffer(self.array.clone(), index, len) }.ok_or_else(
-                    || {
-                        ArrowError::CDataInterface(format!(
-                            "The external buffer at position {} is null.",
-                            index - 1
-                        ))
-                    },
-                )
-            })
-            .collect()
-    }
-
-    /// returns the child data of this array
-    pub fn children(&self) -> Result<Vec<ArrayData>> {
-        unsafe { create_child_arrays(self.array.clone(), self.schema.clone()) }
-    }
-
-    /// the length of the array
-    pub fn len(&self) -> usize {
-        self.array.length as usize
-    }
-
-    /// whether the array is empty
-    pub fn is_empty(&self) -> bool {
-        self.array.length == 0
-    }
-
-    /// the offset of the array
-    pub fn offset(&self) -> usize {
-        self.array.offset as usize
-    }
-
-    /// the null count of the array
-    pub fn null_count(&self) -> usize {
-        self.array.null_count as usize
-    }
-
-    /// the data_type as declared in the schema
-    pub fn data_type(&self, child_type: Option<DataType>) -> Result<DataType> {
-        to_datatype(self.schema.format(), child_type, self.schema.as_ref())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::{
-        make_array, Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray,
-        GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
-        OffsetSizeTrait, StringOffsetSizeTrait, Time32MillisecondArray,
-    };
-    use crate::compute::kernels;
-    use crate::datatypes::Field;
-    use std::convert::TryFrom;
-    use std::iter::FromIterator;
-
-    #[test]
-    fn test_round_trip() -> Result<()> {
-        // create an array natively
-        let array = Int32Array::from(vec![1, 2, 3]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
-        let array = kernels::arithmetic::add(&array, &array).unwrap();
-
-        // verify
-        assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
-
-        // (drop/release)
-        Ok(())
-    }
-    // case with nulls is tested in the docs, through the example on this module.
-
-    fn test_generic_string<Offset: StringOffsetSizeTrait>() -> Result<()> {
-        // create an array natively
-        let array =
-            GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap();
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericStringArray<Offset>>()
-            .unwrap();
-
-        // verify
-        let expected = GenericStringArray::<Offset>::from(vec![
-            Some("a"),
-            None,
-            Some("aaa"),
-            Some("a"),
-            None,
-            Some("aaa"),
-        ]);
-        assert_eq!(array, &expected);
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_string() -> Result<()> {
-        test_generic_string::<i32>()
-    }
-
-    #[test]
-    fn test_large_string() -> Result<()> {
-        test_generic_string::<i64>()
-    }
-
-    fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_iter(
-            [0usize, 3, 6, 8]
-                .iter()
-                .map(|i| Offset::from_usize(*i).unwrap()),
-        );
-
-        // Construct a list array from the above two
-        let list_data_type = match std::mem::size_of::<Offset>() {
-            4 => DataType::List(Box::new(Field::new("item", DataType::Int32, false))),
-            _ => {
-                DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)))
-            }
-        };
-
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-
-        // create an array natively
-        let array = GenericListArray::<Offset>::from(list_data.clone());
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // downcast
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericListArray<Offset>>()
-            .unwrap();
-
-        dbg!(&array);
-
-        // verify
-        let expected = GenericListArray::<Offset>::from(list_data);
-        assert_eq!(&array.value(0), &expected.value(0));
-        assert_eq!(&array.value(1), &expected.value(1));
-        assert_eq!(&array.value(2), &expected.value(2));
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_list() -> Result<()> {
-        test_generic_list::<i32>()
-    }
-
-    #[test]
-    fn test_large_list() -> Result<()> {
-        test_generic_list::<i64>()
-    }
-
-    fn test_generic_binary<Offset: BinaryOffsetSizeTrait>() -> Result<()> {
-        // create an array natively
-        let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
-        let array = GenericBinaryArray::<Offset>::from(array);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap();
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericBinaryArray<Offset>>()
-            .unwrap();
-
-        // verify
-        let expected: Vec<Option<&[u8]>> = vec![
-            Some(b"a"),
-            None,
-            Some(b"aaa"),
-            Some(b"a"),
-            None,
-            Some(b"aaa"),
-        ];
-        let expected = GenericBinaryArray::<Offset>::from(expected);
-        assert_eq!(array, &expected);
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_binary() -> Result<()> {
-        test_generic_binary::<i32>()
-    }
-
-    #[test]
-    fn test_large_binary() -> Result<()> {
-        test_generic_binary::<i64>()
-    }
-
-    #[test]
-    fn test_bool() -> Result<()> {
-        // create an array natively
-        let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let array = kernels::boolean::not(&array)?;
-
-        // verify
-        assert_eq!(
-            array,
-            BooleanArray::from(vec![None, Some(false), Some(true)])
-        );
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_time32() -> Result<()> {
-        // create an array natively
-        let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap();
-        let array = array
-            .as_any()
-            .downcast_ref::<Time32MillisecondArray>()
-            .unwrap();
-
-        // verify
-        assert_eq!(
-            array,
-            &Time32MillisecondArray::from(vec![
-                None,
-                Some(1),
-                Some(2),
-                None,
-                Some(1),
-                Some(2)
-            ])
-        );
-
-        // (drop/release)
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/ipc/convert.rs b/rust/arrow/src/ipc/convert.rs
deleted file mode 100644
index 59d4d0b9089..00000000000
--- a/rust/arrow/src/ipc/convert.rs
+++ /dev/null
@@ -1,871 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities for converting between IPC types and native Arrow types
-
-use crate::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit};
-use crate::error::{ArrowError, Result};
-use crate::ipc;
-
-use flatbuffers::{
-    FlatBufferBuilder, ForwardsUOffset, UnionWIPOffset, Vector, WIPOffset,
-};
-use std::collections::{BTreeMap, HashMap};
-
-use DataType::*;
-
-/// Serialize a schema in IPC format
-pub fn schema_to_fb(schema: &Schema) -> FlatBufferBuilder {
-    let mut fbb = FlatBufferBuilder::new();
-
-    let root = schema_to_fb_offset(&mut fbb, schema);
-
-    fbb.finish(root, None);
-
-    fbb
-}
-
-pub fn schema_to_fb_offset<'a>(
-    fbb: &mut FlatBufferBuilder<'a>,
-    schema: &Schema,
-) -> WIPOffset<ipc::Schema<'a>> {
-    let mut fields = vec![];
-    for field in schema.fields() {
-        let fb_field = build_field(fbb, field);
-        fields.push(fb_field);
-    }
-
-    let mut custom_metadata = vec![];
-    for (k, v) in schema.metadata() {
-        let fb_key_name = fbb.create_string(k.as_str());
-        let fb_val_name = fbb.create_string(v.as_str());
-
-        let mut kv_builder = ipc::KeyValueBuilder::new(fbb);
-        kv_builder.add_key(fb_key_name);
-        kv_builder.add_value(fb_val_name);
-        custom_metadata.push(kv_builder.finish());
-    }
-
-    let fb_field_list = fbb.create_vector(&fields);
-    let fb_metadata_list = fbb.create_vector(&custom_metadata);
-
-    let mut builder = ipc::SchemaBuilder::new(fbb);
-    builder.add_fields(fb_field_list);
-    builder.add_custom_metadata(fb_metadata_list);
-    builder.finish()
-}
-
-/// Convert an IPC Field to Arrow Field
-impl<'a> From<ipc::Field<'a>> for Field {
-    fn from(field: ipc::Field) -> Field {
-        let mut arrow_field = if let Some(dictionary) = field.dictionary() {
-            Field::new_dict(
-                field.name().unwrap(),
-                get_data_type(field, true),
-                field.nullable(),
-                dictionary.id(),
-                dictionary.isOrdered(),
-            )
-        } else {
-            Field::new(
-                field.name().unwrap(),
-                get_data_type(field, true),
-                field.nullable(),
-            )
-        };
-
-        let mut metadata = None;
-        if let Some(list) = field.custom_metadata() {
-            let mut metadata_map = BTreeMap::default();
-            for kv in list {
-                if let (Some(k), Some(v)) = (kv.key(), kv.value()) {
-                    metadata_map.insert(k.to_string(), v.to_string());
-                }
-            }
-            metadata = Some(metadata_map);
-        }
-
-        arrow_field.set_metadata(metadata);
-        arrow_field
-    }
-}
-
-/// Deserialize a Schema table from IPC format to Schema data type
-pub fn fb_to_schema(fb: ipc::Schema) -> Schema {
-    let mut fields: Vec<Field> = vec![];
-    let c_fields = fb.fields().unwrap();
-    let len = c_fields.len();
-    for i in 0..len {
-        let c_field: ipc::Field = c_fields.get(i);
-        match c_field.type_type() {
-            ipc::Type::Decimal if fb.endianness() == ipc::Endianness::Big => {
-                unimplemented!("Big Endian is not supported for Decimal!")
-            }
-            _ => (),
-        };
-        fields.push(c_field.into());
-    }
-
-    let mut metadata: HashMap<String, String> = HashMap::default();
-    if let Some(md_fields) = fb.custom_metadata() {
-        let len = md_fields.len();
-        for i in 0..len {
-            let kv = md_fields.get(i);
-            let k_str = kv.key();
-            let v_str = kv.value();
-            if let Some(k) = k_str {
-                if let Some(v) = v_str {
-                    metadata.insert(k.to_string(), v.to_string());
-                }
-            }
-        }
-    }
-    Schema::new_with_metadata(fields, metadata)
-}
-
-/// Deserialize an IPC message into a schema
-pub fn schema_from_bytes(bytes: &[u8]) -> Result<Schema> {
-    if let Ok(ipc) = ipc::root_as_message(bytes) {
-        if let Some(schema) = ipc.header_as_schema().map(fb_to_schema) {
-            Ok(schema)
-        } else {
-            Err(ArrowError::IoError(
-                "Unable to get head as schema".to_string(),
-            ))
-        }
-    } else {
-        Err(ArrowError::IoError(
-            "Unable to get root as message".to_string(),
-        ))
-    }
-}
-
-/// Get the Arrow data type from the flatbuffer Field table
-pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataType {
-    if let Some(dictionary) = field.dictionary() {
-        if may_be_dictionary {
-            let int = dictionary.indexType().unwrap();
-            let index_type = match (int.bitWidth(), int.is_signed()) {
-                (8, true) => DataType::Int8,
-                (8, false) => DataType::UInt8,
-                (16, true) => DataType::Int16,
-                (16, false) => DataType::UInt16,
-                (32, true) => DataType::Int32,
-                (32, false) => DataType::UInt32,
-                (64, true) => DataType::Int64,
-                (64, false) => DataType::UInt64,
-                _ => panic!("Unexpected bitwidth and signed"),
-            };
-            return DataType::Dictionary(
-                Box::new(index_type),
-                Box::new(get_data_type(field, false)),
-            );
-        }
-    }
-
-    match field.type_type() {
-        ipc::Type::Null => DataType::Null,
-        ipc::Type::Bool => DataType::Boolean,
-        ipc::Type::Int => {
-            let int = field.type_as_int().unwrap();
-            match (int.bitWidth(), int.is_signed()) {
-                (8, true) => DataType::Int8,
-                (8, false) => DataType::UInt8,
-                (16, true) => DataType::Int16,
-                (16, false) => DataType::UInt16,
-                (32, true) => DataType::Int32,
-                (32, false) => DataType::UInt32,
-                (64, true) => DataType::Int64,
-                (64, false) => DataType::UInt64,
-                z => panic!(
-                    "Int type with bit width of {} and signed of {} not supported",
-                    z.0, z.1
-                ),
-            }
-        }
-        ipc::Type::Binary => DataType::Binary,
-        ipc::Type::LargeBinary => DataType::LargeBinary,
-        ipc::Type::Utf8 => DataType::Utf8,
-        ipc::Type::LargeUtf8 => DataType::LargeUtf8,
-        ipc::Type::FixedSizeBinary => {
-            let fsb = field.type_as_fixed_size_binary().unwrap();
-            DataType::FixedSizeBinary(fsb.byteWidth())
-        }
-        ipc::Type::FloatingPoint => {
-            let float = field.type_as_floating_point().unwrap();
-            match float.precision() {
-                ipc::Precision::HALF => DataType::Float16,
-                ipc::Precision::SINGLE => DataType::Float32,
-                ipc::Precision::DOUBLE => DataType::Float64,
-                z => panic!("FloatingPoint type with precision of {:?} not supported", z),
-            }
-        }
-        ipc::Type::Date => {
-            let date = field.type_as_date().unwrap();
-            match date.unit() {
-                ipc::DateUnit::DAY => DataType::Date32,
-                ipc::DateUnit::MILLISECOND => DataType::Date64,
-                z => panic!("Date type with unit of {:?} not supported", z),
-            }
-        }
-        ipc::Type::Time => {
-            let time = field.type_as_time().unwrap();
-            match (time.bitWidth(), time.unit()) {
-                (32, ipc::TimeUnit::SECOND) => DataType::Time32(TimeUnit::Second),
-                (32, ipc::TimeUnit::MILLISECOND) => {
-                    DataType::Time32(TimeUnit::Millisecond)
-                }
-                (64, ipc::TimeUnit::MICROSECOND) => {
-                    DataType::Time64(TimeUnit::Microsecond)
-                }
-                (64, ipc::TimeUnit::NANOSECOND) => DataType::Time64(TimeUnit::Nanosecond),
-                z => panic!(
-                    "Time type with bit width of {} and unit of {:?} not supported",
-                    z.0, z.1
-                ),
-            }
-        }
-        ipc::Type::Timestamp => {
-            let timestamp = field.type_as_timestamp().unwrap();
-            let timezone: Option<String> = timestamp.timezone().map(|tz| tz.to_string());
-            match timestamp.unit() {
-                ipc::TimeUnit::SECOND => DataType::Timestamp(TimeUnit::Second, timezone),
-                ipc::TimeUnit::MILLISECOND => {
-                    DataType::Timestamp(TimeUnit::Millisecond, timezone)
-                }
-                ipc::TimeUnit::MICROSECOND => {
-                    DataType::Timestamp(TimeUnit::Microsecond, timezone)
-                }
-                ipc::TimeUnit::NANOSECOND => {
-                    DataType::Timestamp(TimeUnit::Nanosecond, timezone)
-                }
-                z => panic!("Timestamp type with unit of {:?} not supported", z),
-            }
-        }
-        ipc::Type::Interval => {
-            let interval = field.type_as_interval().unwrap();
-            match interval.unit() {
-                ipc::IntervalUnit::YEAR_MONTH => {
-                    DataType::Interval(IntervalUnit::YearMonth)
-                }
-                ipc::IntervalUnit::DAY_TIME => DataType::Interval(IntervalUnit::DayTime),
-                z => panic!("Interval type with unit of {:?} unsupported", z),
-            }
-        }
-        ipc::Type::Duration => {
-            let duration = field.type_as_duration().unwrap();
-            match duration.unit() {
-                ipc::TimeUnit::SECOND => DataType::Duration(TimeUnit::Second),
-                ipc::TimeUnit::MILLISECOND => DataType::Duration(TimeUnit::Millisecond),
-                ipc::TimeUnit::MICROSECOND => DataType::Duration(TimeUnit::Microsecond),
-                ipc::TimeUnit::NANOSECOND => DataType::Duration(TimeUnit::Nanosecond),
-                z => panic!("Duration type with unit of {:?} unsupported", z),
-            }
-        }
-        ipc::Type::List => {
-            let children = field.children().unwrap();
-            if children.len() != 1 {
-                panic!("expect a list to have one child")
-            }
-            DataType::List(Box::new(children.get(0).into()))
-        }
-        ipc::Type::LargeList => {
-            let children = field.children().unwrap();
-            if children.len() != 1 {
-                panic!("expect a large list to have one child")
-            }
-            DataType::LargeList(Box::new(children.get(0).into()))
-        }
-        ipc::Type::FixedSizeList => {
-            let children = field.children().unwrap();
-            if children.len() != 1 {
-                panic!("expect a list to have one child")
-            }
-            let fsl = field.type_as_fixed_size_list().unwrap();
-            DataType::FixedSizeList(Box::new(children.get(0).into()), fsl.listSize())
-        }
-        ipc::Type::Struct_ => {
-            let mut fields = vec![];
-            if let Some(children) = field.children() {
-                for i in 0..children.len() {
-                    fields.push(children.get(i).into());
-                }
-            };
-
-            DataType::Struct(fields)
-        }
-        ipc::Type::Decimal => {
-            let fsb = field.type_as_decimal().unwrap();
-            DataType::Decimal(fsb.precision() as usize, fsb.scale() as usize)
-        }
-        t => unimplemented!("Type {:?} not supported", t),
-    }
-}
-
-pub(crate) struct FBFieldType<'b> {
-    pub(crate) type_type: ipc::Type,
-    pub(crate) type_: WIPOffset<UnionWIPOffset>,
-    pub(crate) children: Option<WIPOffset<Vector<'b, ForwardsUOffset<ipc::Field<'b>>>>>,
-}
-
-/// Create an IPC Field from an Arrow Field
-pub(crate) fn build_field<'a>(
-    fbb: &mut FlatBufferBuilder<'a>,
-    field: &Field,
-) -> WIPOffset<ipc::Field<'a>> {
-    // Optional custom metadata.
-    let mut fb_metadata = None;
-    if let Some(metadata) = field.metadata() {
-        if !metadata.is_empty() {
-            let mut kv_vec = vec![];
-            for (k, v) in metadata {
-                let kv_args = ipc::KeyValueArgs {
-                    key: Some(fbb.create_string(k.as_str())),
-                    value: Some(fbb.create_string(v.as_str())),
-                };
-                let kv_offset = ipc::KeyValue::create(fbb, &kv_args);
-                kv_vec.push(kv_offset);
-            }
-            fb_metadata = Some(fbb.create_vector(&kv_vec));
-        }
-    };
-
-    let fb_field_name = fbb.create_string(field.name().as_str());
-    let field_type = get_fb_field_type(field.data_type(), field.is_nullable(), fbb);
-
-    let fb_dictionary = if let Dictionary(index_type, _) = field.data_type() {
-        Some(get_fb_dictionary(
-            index_type,
-            field
-                .dict_id()
-                .expect("All Dictionary types have `dict_id`"),
-            field
-                .dict_is_ordered()
-                .expect("All Dictionary types have `dict_is_ordered`"),
-            fbb,
-        ))
-    } else {
-        None
-    };
-
-    let mut field_builder = ipc::FieldBuilder::new(fbb);
-    field_builder.add_name(fb_field_name);
-    if let Some(dictionary) = fb_dictionary {
-        field_builder.add_dictionary(dictionary)
-    }
-    field_builder.add_type_type(field_type.type_type);
-    field_builder.add_nullable(field.is_nullable());
-    match field_type.children {
-        None => {}
-        Some(children) => field_builder.add_children(children),
-    };
-    field_builder.add_type_(field_type.type_);
-
-    if let Some(fb_metadata) = fb_metadata {
-        field_builder.add_custom_metadata(fb_metadata);
-    }
-
-    field_builder.finish()
-}
-
-/// Get the IPC type of a data type
-pub(crate) fn get_fb_field_type<'a>(
-    data_type: &DataType,
-    is_nullable: bool,
-    fbb: &mut FlatBufferBuilder<'a>,
-) -> FBFieldType<'a> {
-    // some IPC implementations expect an empty list for child data, instead of a null value.
-    // An empty field list is thus returned for primitive types
-    let empty_fields: Vec<WIPOffset<ipc::Field>> = vec![];
-    match data_type {
-        Null => FBFieldType {
-            type_type: ipc::Type::Null,
-            type_: ipc::NullBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        Boolean => FBFieldType {
-            type_type: ipc::Type::Bool,
-            type_: ipc::BoolBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        UInt8 | UInt16 | UInt32 | UInt64 => {
-            let children = fbb.create_vector(&empty_fields[..]);
-            let mut builder = ipc::IntBuilder::new(fbb);
-            builder.add_is_signed(false);
-            match data_type {
-                UInt8 => builder.add_bitWidth(8),
-                UInt16 => builder.add_bitWidth(16),
-                UInt32 => builder.add_bitWidth(32),
-                UInt64 => builder.add_bitWidth(64),
-                _ => {}
-            };
-            FBFieldType {
-                type_type: ipc::Type::Int,
-                type_: builder.finish().as_union_value(),
-                children: Some(children),
-            }
-        }
-        Int8 | Int16 | Int32 | Int64 => {
-            let children = fbb.create_vector(&empty_fields[..]);
-            let mut builder = ipc::IntBuilder::new(fbb);
-            builder.add_is_signed(true);
-            match data_type {
-                Int8 => builder.add_bitWidth(8),
-                Int16 => builder.add_bitWidth(16),
-                Int32 => builder.add_bitWidth(32),
-                Int64 => builder.add_bitWidth(64),
-                _ => {}
-            };
-            FBFieldType {
-                type_type: ipc::Type::Int,
-                type_: builder.finish().as_union_value(),
-                children: Some(children),
-            }
-        }
-        Float16 | Float32 | Float64 => {
-            let children = fbb.create_vector(&empty_fields[..]);
-            let mut builder = ipc::FloatingPointBuilder::new(fbb);
-            match data_type {
-                Float16 => builder.add_precision(ipc::Precision::HALF),
-                Float32 => builder.add_precision(ipc::Precision::SINGLE),
-                Float64 => builder.add_precision(ipc::Precision::DOUBLE),
-                _ => {}
-            };
-            FBFieldType {
-                type_type: ipc::Type::FloatingPoint,
-                type_: builder.finish().as_union_value(),
-                children: Some(children),
-            }
-        }
-        Binary => FBFieldType {
-            type_type: ipc::Type::Binary,
-            type_: ipc::BinaryBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        LargeBinary => FBFieldType {
-            type_type: ipc::Type::LargeBinary,
-            type_: ipc::LargeBinaryBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        Utf8 => FBFieldType {
-            type_type: ipc::Type::Utf8,
-            type_: ipc::Utf8Builder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        LargeUtf8 => FBFieldType {
-            type_type: ipc::Type::LargeUtf8,
-            type_: ipc::LargeUtf8Builder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        FixedSizeBinary(len) => {
-            let mut builder = ipc::FixedSizeBinaryBuilder::new(fbb);
-            builder.add_byteWidth(*len as i32);
-            FBFieldType {
-                type_type: ipc::Type::FixedSizeBinary,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Date32 => {
-            let mut builder = ipc::DateBuilder::new(fbb);
-            builder.add_unit(ipc::DateUnit::DAY);
-            FBFieldType {
-                type_type: ipc::Type::Date,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Date64 => {
-            let mut builder = ipc::DateBuilder::new(fbb);
-            builder.add_unit(ipc::DateUnit::MILLISECOND);
-            FBFieldType {
-                type_type: ipc::Type::Date,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Time32(unit) | Time64(unit) => {
-            let mut builder = ipc::TimeBuilder::new(fbb);
-            match unit {
-                TimeUnit::Second => {
-                    builder.add_bitWidth(32);
-                    builder.add_unit(ipc::TimeUnit::SECOND);
-                }
-                TimeUnit::Millisecond => {
-                    builder.add_bitWidth(32);
-                    builder.add_unit(ipc::TimeUnit::MILLISECOND);
-                }
-                TimeUnit::Microsecond => {
-                    builder.add_bitWidth(64);
-                    builder.add_unit(ipc::TimeUnit::MICROSECOND);
-                }
-                TimeUnit::Nanosecond => {
-                    builder.add_bitWidth(64);
-                    builder.add_unit(ipc::TimeUnit::NANOSECOND);
-                }
-            }
-            FBFieldType {
-                type_type: ipc::Type::Time,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Timestamp(unit, tz) => {
-            let tz = tz.clone().unwrap_or_else(String::new);
-            let tz_str = fbb.create_string(tz.as_str());
-            let mut builder = ipc::TimestampBuilder::new(fbb);
-            let time_unit = match unit {
-                TimeUnit::Second => ipc::TimeUnit::SECOND,
-                TimeUnit::Millisecond => ipc::TimeUnit::MILLISECOND,
-                TimeUnit::Microsecond => ipc::TimeUnit::MICROSECOND,
-                TimeUnit::Nanosecond => ipc::TimeUnit::NANOSECOND,
-            };
-            builder.add_unit(time_unit);
-            if !tz.is_empty() {
-                builder.add_timezone(tz_str);
-            }
-            FBFieldType {
-                type_type: ipc::Type::Timestamp,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Interval(unit) => {
-            let mut builder = ipc::IntervalBuilder::new(fbb);
-            let interval_unit = match unit {
-                IntervalUnit::YearMonth => ipc::IntervalUnit::YEAR_MONTH,
-                IntervalUnit::DayTime => ipc::IntervalUnit::DAY_TIME,
-            };
-            builder.add_unit(interval_unit);
-            FBFieldType {
-                type_type: ipc::Type::Interval,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Duration(unit) => {
-            let mut builder = ipc::DurationBuilder::new(fbb);
-            let time_unit = match unit {
-                TimeUnit::Second => ipc::TimeUnit::SECOND,
-                TimeUnit::Millisecond => ipc::TimeUnit::MILLISECOND,
-                TimeUnit::Microsecond => ipc::TimeUnit::MICROSECOND,
-                TimeUnit::Nanosecond => ipc::TimeUnit::NANOSECOND,
-            };
-            builder.add_unit(time_unit);
-            FBFieldType {
-                type_type: ipc::Type::Duration,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        List(ref list_type) => {
-            let child = build_field(fbb, list_type);
-            FBFieldType {
-                type_type: ipc::Type::List,
-                type_: ipc::ListBuilder::new(fbb).finish().as_union_value(),
-                children: Some(fbb.create_vector(&[child])),
-            }
-        }
-        LargeList(ref list_type) => {
-            let child = build_field(fbb, list_type);
-            FBFieldType {
-                type_type: ipc::Type::LargeList,
-                type_: ipc::LargeListBuilder::new(fbb).finish().as_union_value(),
-                children: Some(fbb.create_vector(&[child])),
-            }
-        }
-        FixedSizeList(ref list_type, len) => {
-            let child = build_field(fbb, list_type);
-            let mut builder = ipc::FixedSizeListBuilder::new(fbb);
-            builder.add_listSize(*len as i32);
-            FBFieldType {
-                type_type: ipc::Type::FixedSizeList,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&[child])),
-            }
-        }
-        Struct(fields) => {
-            // struct's fields are children
-            let mut children = vec![];
-            for field in fields {
-                let inner_types =
-                    get_fb_field_type(field.data_type(), field.is_nullable(), fbb);
-                let field_name = fbb.create_string(field.name());
-                children.push(ipc::Field::create(
-                    fbb,
-                    &ipc::FieldArgs {
-                        name: Some(field_name),
-                        nullable: field.is_nullable(),
-                        type_type: inner_types.type_type,
-                        type_: Some(inner_types.type_),
-                        dictionary: None,
-                        children: inner_types.children,
-                        custom_metadata: None,
-                    },
-                ));
-            }
-            FBFieldType {
-                type_type: ipc::Type::Struct_,
-                type_: ipc::Struct_Builder::new(fbb).finish().as_union_value(),
-                children: Some(fbb.create_vector(&children[..])),
-            }
-        }
-        Dictionary(_, value_type) => {
-            // In this library, the dictionary "type" is a logical construct. Here we
-            // pass through to the value type, as we've already captured the index
-            // type in the DictionaryEncoding metadata in the parent field
-            get_fb_field_type(value_type, is_nullable, fbb)
-        }
-        Decimal(precision, scale) => {
-            let mut builder = ipc::DecimalBuilder::new(fbb);
-            builder.add_precision(*precision as i32);
-            builder.add_scale(*scale as i32);
-            builder.add_bitWidth(128);
-            FBFieldType {
-                type_type: ipc::Type::Decimal,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        t => unimplemented!("Type {:?} not supported", t),
-    }
-}
-
-/// Create an IPC dictionary encoding
-pub(crate) fn get_fb_dictionary<'a>(
-    index_type: &DataType,
-    dict_id: i64,
-    dict_is_ordered: bool,
-    fbb: &mut FlatBufferBuilder<'a>,
-) -> WIPOffset<ipc::DictionaryEncoding<'a>> {
-    // We assume that the dictionary index type (as an integer) has already been
-    // validated elsewhere, and can safely assume we are dealing with integers
-    let mut index_builder = ipc::IntBuilder::new(fbb);
-
-    match *index_type {
-        Int8 | Int16 | Int32 | Int64 => index_builder.add_is_signed(true),
-        UInt8 | UInt16 | UInt32 | UInt64 => index_builder.add_is_signed(false),
-        _ => {}
-    }
-
-    match *index_type {
-        Int8 | UInt8 => index_builder.add_bitWidth(8),
-        Int16 | UInt16 => index_builder.add_bitWidth(16),
-        Int32 | UInt32 => index_builder.add_bitWidth(32),
-        Int64 | UInt64 => index_builder.add_bitWidth(64),
-        _ => {}
-    }
-
-    let index_builder = index_builder.finish();
-
-    let mut builder = ipc::DictionaryEncodingBuilder::new(fbb);
-    builder.add_id(dict_id);
-    builder.add_indexType(index_builder);
-    builder.add_isOrdered(dict_is_ordered);
-
-    builder.finish()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datatypes::{DataType, Field, Schema};
-
-    #[test]
-    fn convert_schema_round_trip() {
-        let md: HashMap<String, String> = [("Key".to_string(), "value".to_string())]
-            .iter()
-            .cloned()
-            .collect();
-        let field_md: BTreeMap<String, String> = [("k".to_string(), "v".to_string())]
-            .iter()
-            .cloned()
-            .collect();
-        let schema = Schema::new_with_metadata(
-            vec![
-                {
-                    let mut f = Field::new("uint8", DataType::UInt8, false);
-                    f.set_metadata(Some(field_md));
-                    f
-                },
-                Field::new("uint16", DataType::UInt16, true),
-                Field::new("uint32", DataType::UInt32, false),
-                Field::new("uint64", DataType::UInt64, true),
-                Field::new("int8", DataType::Int8, true),
-                Field::new("int16", DataType::Int16, false),
-                Field::new("int32", DataType::Int32, true),
-                Field::new("int64", DataType::Int64, false),
-                Field::new("float16", DataType::Float16, true),
-                Field::new("float32", DataType::Float32, false),
-                Field::new("float64", DataType::Float64, true),
-                Field::new("null", DataType::Null, false),
-                Field::new("bool", DataType::Boolean, false),
-                Field::new("date32", DataType::Date32, false),
-                Field::new("date64", DataType::Date64, true),
-                Field::new("time32[s]", DataType::Time32(TimeUnit::Second), true),
-                Field::new("time32[ms]", DataType::Time32(TimeUnit::Millisecond), false),
-                Field::new("time64[us]", DataType::Time64(TimeUnit::Microsecond), false),
-                Field::new("time64[ns]", DataType::Time64(TimeUnit::Nanosecond), true),
-                Field::new(
-                    "timestamp[s]",
-                    DataType::Timestamp(TimeUnit::Second, None),
-                    false,
-                ),
-                Field::new(
-                    "timestamp[ms]",
-                    DataType::Timestamp(TimeUnit::Millisecond, None),
-                    true,
-                ),
-                Field::new(
-                    "timestamp[us]",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".to_string()),
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "timestamp[ns]",
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    true,
-                ),
-                Field::new(
-                    "interval[ym]",
-                    DataType::Interval(IntervalUnit::YearMonth),
-                    true,
-                ),
-                Field::new(
-                    "interval[dt]",
-                    DataType::Interval(IntervalUnit::DayTime),
-                    true,
-                ),
-                Field::new("utf8", DataType::Utf8, false),
-                Field::new("binary", DataType::Binary, false),
-                Field::new(
-                    "list[u8]",
-                    DataType::List(Box::new(Field::new("item", DataType::UInt8, false))),
-                    true,
-                ),
-                Field::new(
-                    "list[struct<float32, int32, bool>]",
-                    DataType::List(Box::new(Field::new(
-                        "struct",
-                        DataType::Struct(vec![
-                            Field::new("float32", DataType::UInt8, false),
-                            Field::new("int32", DataType::Int32, true),
-                            Field::new("bool", DataType::Boolean, true),
-                        ]),
-                        true,
-                    ))),
-                    false,
-                ),
-                Field::new(
-                    "struct<int64, list[struct<date32, list[struct<>]>]>",
-                    DataType::Struct(vec![
-                        Field::new("int64", DataType::Int64, true),
-                        Field::new(
-                            "list[struct<date32, list[struct<>]>]",
-                            DataType::List(Box::new(Field::new(
-                                "struct",
-                                DataType::Struct(vec![
-                                    Field::new("date32", DataType::Date32, true),
-                                    Field::new(
-                                        "list[struct<>]",
-                                        DataType::List(Box::new(Field::new(
-                                            "struct",
-                                            DataType::Struct(vec![]),
-                                            false,
-                                        ))),
-                                        false,
-                                    ),
-                                ]),
-                                false,
-                            ))),
-                            false,
-                        ),
-                    ]),
-                    false,
-                ),
-                Field::new("struct<>", DataType::Struct(vec![]), true),
-                Field::new_dict(
-                    "dictionary<int32, utf8>",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new_dict(
-                    "dictionary<uint8, uint32>",
-                    DataType::Dictionary(
-                        Box::new(DataType::UInt8),
-                        Box::new(DataType::UInt32),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new("decimal<usize, usize>", DataType::Decimal(10, 6), false),
-            ],
-            md,
-        );
-
-        let fb = schema_to_fb(&schema);
-
-        // read back fields
-        let ipc = ipc::root_as_schema(fb.finished_data()).unwrap();
-        let schema2 = fb_to_schema(ipc);
-        assert_eq!(schema, schema2);
-    }
-
-    #[test]
-    fn schema_from_bytes() {
-        // bytes of a schema generated from python (0.14.0), saved as an `ipc::Message`.
-        // the schema is: Field("field1", DataType::UInt32, false)
-        let bytes: Vec<u8> = vec![
-            16, 0, 0, 0, 0, 0, 10, 0, 12, 0, 6, 0, 5, 0, 8, 0, 10, 0, 0, 0, 0, 1, 3, 0,
-            12, 0, 0, 0, 8, 0, 8, 0, 0, 0, 4, 0, 8, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 20,
-            0, 0, 0, 16, 0, 20, 0, 8, 0, 0, 0, 7, 0, 12, 0, 0, 0, 16, 0, 16, 0, 0, 0, 0,
-            0, 0, 2, 32, 0, 0, 0, 20, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 8, 0,
-            4, 0, 6, 0, 0, 0, 32, 0, 0, 0, 6, 0, 0, 0, 102, 105, 101, 108, 100, 49, 0, 0,
-            0, 0, 0, 0,
-        ];
-        let ipc = ipc::root_as_message(&bytes[..]).unwrap();
-        let schema = ipc.header_as_schema().unwrap();
-
-        // a message generated from Rust, same as the Python one
-        let bytes: Vec<u8> = vec![
-            16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 12, 0, 11, 0, 4, 0, 10, 0, 0, 0, 20, 0, 0,
-            0, 0, 0, 0, 1, 3, 0, 10, 0, 12, 0, 0, 0, 8, 0, 4, 0, 10, 0, 0, 0, 8, 0, 0, 0,
-            8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 16, 0, 0, 0, 12, 0, 18, 0, 12, 0, 0, 0,
-            11, 0, 4, 0, 12, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 2, 20, 0, 0, 0, 0, 0, 6, 0,
-            8, 0, 4, 0, 6, 0, 0, 0, 32, 0, 0, 0, 6, 0, 0, 0, 102, 105, 101, 108, 100, 49,
-            0, 0,
-        ];
-        let ipc2 = ipc::root_as_message(&bytes[..]).unwrap();
-        let schema2 = ipc.header_as_schema().unwrap();
-
-        assert_eq!(schema, schema2);
-        assert_eq!(ipc.version(), ipc2.version());
-        assert_eq!(ipc.header_type(), ipc2.header_type());
-        assert_eq!(ipc.bodyLength(), ipc2.bodyLength());
-        assert!(ipc.custom_metadata().is_none());
-        assert!(ipc2.custom_metadata().is_none());
-    }
-}
diff --git a/rust/arrow/src/ipc/gen/File.rs b/rust/arrow/src/ipc/gen/File.rs
deleted file mode 100644
index 04cbc644137..00000000000
--- a/rust/arrow/src/ipc/gen/File.rs
+++ /dev/null
@@ -1,491 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-// struct Block, aligned to 8
-#[repr(transparent)]
-#[derive(Clone, Copy, PartialEq)]
-pub struct Block(pub [u8; 24]);
-impl std::fmt::Debug for Block {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("Block")
-            .field("offset", &self.offset())
-            .field("metaDataLength", &self.metaDataLength())
-            .field("bodyLength", &self.bodyLength())
-            .finish()
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Block {}
-impl flatbuffers::SafeSliceAccess for Block {}
-impl<'a> flatbuffers::Follow<'a> for Block {
-    type Inner = &'a Block;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a Block>::follow(buf, loc)
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for &'a Block {
-    type Inner = &'a Block;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<Block>(buf, loc)
-    }
-}
-impl<'b> flatbuffers::Push for Block {
-    type Output = Block;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(self as *const Block as *const u8, Self::size())
-        };
-        dst.copy_from_slice(src);
-    }
-}
-impl<'b> flatbuffers::Push for &'b Block {
-    type Output = Block;
-
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(*self as *const Block as *const u8, Self::size())
-        };
-        dst.copy_from_slice(src);
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Block {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.in_buffer::<Self>(pos)
-    }
-}
-impl Block {
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(offset: i64, metaDataLength: i32, bodyLength: i64) -> Self {
-        let mut s = Self([0; 24]);
-        s.set_offset(offset);
-        s.set_metaDataLength(metaDataLength);
-        s.set_bodyLength(bodyLength);
-        s
-    }
-
-    /// Index to the start of the RecordBlock (note this is past the Message header)
-    pub fn offset(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[0..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_offset(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[0..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-
-    /// Length of the metadata
-    pub fn metaDataLength(&self) -> i32 {
-        let mut mem = core::mem::MaybeUninit::<i32>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[8..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i32>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_metaDataLength(&mut self, x: i32) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i32 as *const u8,
-                self.0[8..].as_mut_ptr(),
-                core::mem::size_of::<i32>(),
-            );
-        }
-    }
-
-    /// Length of the data (this is aligned so there can be a gap between this and
-    /// the metadata).
-    pub fn bodyLength(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[16..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_bodyLength(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[16..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-}
-
-pub enum FooterOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// Arrow File metadata
-///
-pub struct Footer<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Footer<'a> {
-    type Inner = Footer<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Footer<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Footer { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FooterArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Footer<'bldr>> {
-        let mut builder = FooterBuilder::new(_fbb);
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.recordBatches {
-            builder.add_recordBatches(x);
-        }
-        if let Some(x) = args.dictionaries {
-            builder.add_dictionaries(x);
-        }
-        if let Some(x) = args.schema {
-            builder.add_schema(x);
-        }
-        builder.add_version(args.version);
-        builder.finish()
-    }
-
-    pub const VT_VERSION: flatbuffers::VOffsetT = 4;
-    pub const VT_SCHEMA: flatbuffers::VOffsetT = 6;
-    pub const VT_DICTIONARIES: flatbuffers::VOffsetT = 8;
-    pub const VT_RECORDBATCHES: flatbuffers::VOffsetT = 10;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 12;
-
-    #[inline]
-    pub fn version(&self) -> MetadataVersion {
-        self._tab
-            .get::<MetadataVersion>(Footer::VT_VERSION, Some(MetadataVersion::V1))
-            .unwrap()
-    }
-    #[inline]
-    pub fn schema(&self) -> Option<Schema<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Schema>>(Footer::VT_SCHEMA, None)
-    }
-    #[inline]
-    pub fn dictionaries(&self) -> Option<&'a [Block]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Block>>>(
-                Footer::VT_DICTIONARIES,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    #[inline]
-    pub fn recordBatches(&self) -> Option<&'a [Block]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Block>>>(
-                Footer::VT_RECORDBATCHES,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    /// User-defined metadata
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Footer::VT_CUSTOM_METADATA, None)
-    }
-}
-
-impl flatbuffers::Verifiable for Footer<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<MetadataVersion>(&"version", Self::VT_VERSION, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Schema>>(
-                &"schema",
-                Self::VT_SCHEMA,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Block>>>(
-                &"dictionaries",
-                Self::VT_DICTIONARIES,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Block>>>(
-                &"recordBatches",
-                Self::VT_RECORDBATCHES,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<
-                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>,
-            >>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FooterArgs<'a> {
-    pub version: MetadataVersion,
-    pub schema: Option<flatbuffers::WIPOffset<Schema<'a>>>,
-    pub dictionaries: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Block>>>,
-    pub recordBatches: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Block>>>,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-}
-impl<'a> Default for FooterArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        FooterArgs {
-            version: MetadataVersion::V1,
-            schema: None,
-            dictionaries: None,
-            recordBatches: None,
-            custom_metadata: None,
-        }
-    }
-}
-pub struct FooterBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_version(&mut self, version: MetadataVersion) {
-        self.fbb_.push_slot::<MetadataVersion>(
-            Footer::VT_VERSION,
-            version,
-            MetadataVersion::V1,
-        );
-    }
-    #[inline]
-    pub fn add_schema(&mut self, schema: flatbuffers::WIPOffset<Schema<'b>>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<Schema>>(
-                Footer::VT_SCHEMA,
-                schema,
-            );
-    }
-    #[inline]
-    pub fn add_dictionaries(
-        &mut self,
-        dictionaries: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Block>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_DICTIONARIES,
-            dictionaries,
-        );
-    }
-    #[inline]
-    pub fn add_recordBatches(
-        &mut self,
-        recordBatches: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Block>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_RECORDBATCHES,
-            recordBatches,
-        );
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FooterBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FooterBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Footer<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Footer<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Footer");
-        ds.field("version", &self.version());
-        ds.field("schema", &self.schema());
-        ds.field("dictionaries", &self.dictionaries());
-        ds.field("recordBatches", &self.recordBatches());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_footer<'a>(buf: &'a [u8]) -> Footer<'a> {
-    unsafe { flatbuffers::root_unchecked::<Footer<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_footer<'a>(buf: &'a [u8]) -> Footer<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Footer<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Footer`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_footer_unchecked`.
-pub fn root_as_footer(buf: &[u8]) -> Result<Footer, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Footer>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Footer` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_footer_unchecked`.
-pub fn size_prefixed_root_as_footer(
-    buf: &[u8],
-) -> Result<Footer, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Footer>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Footer` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_footer_unchecked`.
-pub fn root_as_footer_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Footer<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Footer<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Footer` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_footer_unchecked`.
-pub fn size_prefixed_root_as_footer_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Footer<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Footer<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Footer and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Footer`.
-pub unsafe fn root_as_footer_unchecked(buf: &[u8]) -> Footer {
-    flatbuffers::root_unchecked::<Footer>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Footer and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Footer`.
-pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer {
-    flatbuffers::size_prefixed_root_unchecked::<Footer>(buf)
-}
-#[inline]
-pub fn finish_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Footer<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Footer<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/Message.rs b/rust/arrow/src/ipc/gen/Message.rs
deleted file mode 100644
index 7903844a1fe..00000000000
--- a/rust/arrow/src/ipc/gen/Message.rs
+++ /dev/null
@@ -1,1346 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use crate::ipc::gen::SparseTensor::*;
-use crate::ipc::gen::Tensor::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_COMPRESSION_TYPE: i8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_COMPRESSION_TYPE: i8 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_COMPRESSION_TYPE: [CompressionType; 2] =
-    [CompressionType::LZ4_FRAME, CompressionType::ZSTD];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct CompressionType(pub i8);
-#[allow(non_upper_case_globals)]
-impl CompressionType {
-    pub const LZ4_FRAME: Self = Self(0);
-    pub const ZSTD: Self = Self(1);
-
-    pub const ENUM_MIN: i8 = 0;
-    pub const ENUM_MAX: i8 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::LZ4_FRAME, Self::ZSTD];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::LZ4_FRAME => Some("LZ4_FRAME"),
-            Self::ZSTD => Some("ZSTD"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for CompressionType {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for CompressionType {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for CompressionType {
-    type Output = CompressionType;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for CompressionType {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for CompressionType {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for CompressionType {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_BODY_COMPRESSION_METHOD: i8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_BODY_COMPRESSION_METHOD: i8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_BODY_COMPRESSION_METHOD: [BodyCompressionMethod; 1] =
-    [BodyCompressionMethod::BUFFER];
-
-/// Provided for forward compatibility in case we need to support different
-/// strategies for compressing the IPC message body (like whole-body
-/// compression rather than buffer-level) in the future
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct BodyCompressionMethod(pub i8);
-#[allow(non_upper_case_globals)]
-impl BodyCompressionMethod {
-    /// Each constituent buffer is first compressed with the indicated
-    /// compressor, and then written with the uncompressed length in the first 8
-    /// bytes as a 64-bit little-endian signed integer followed by the compressed
-    /// buffer bytes (and then padding as required by the protocol). The
-    /// uncompressed length may be set to -1 to indicate that the data that
-    /// follows is not compressed, which can be useful for cases where
-    /// compression does not yield appreciable savings.
-    pub const BUFFER: Self = Self(0);
-
-    pub const ENUM_MIN: i8 = 0;
-    pub const ENUM_MAX: i8 = 0;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::BUFFER];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::BUFFER => Some("BUFFER"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for BodyCompressionMethod {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for BodyCompressionMethod {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for BodyCompressionMethod {
-    type Output = BodyCompressionMethod;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for BodyCompressionMethod {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for BodyCompressionMethod {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for BodyCompressionMethod {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_MESSAGE_HEADER: u8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_MESSAGE_HEADER: u8 = 5;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_MESSAGE_HEADER: [MessageHeader; 6] = [
-    MessageHeader::NONE,
-    MessageHeader::Schema,
-    MessageHeader::DictionaryBatch,
-    MessageHeader::RecordBatch,
-    MessageHeader::Tensor,
-    MessageHeader::SparseTensor,
-];
-
-/// ----------------------------------------------------------------------
-/// The root Message type
-/// This union enables us to easily send different message types without
-/// redundant storage, and in the future we can easily add new message types.
-///
-/// Arrow implementations do not need to implement all of the message types,
-/// which may include experimental metadata types. For maximum compatibility,
-/// it is best to send data using RecordBatch
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct MessageHeader(pub u8);
-#[allow(non_upper_case_globals)]
-impl MessageHeader {
-    pub const NONE: Self = Self(0);
-    pub const Schema: Self = Self(1);
-    pub const DictionaryBatch: Self = Self(2);
-    pub const RecordBatch: Self = Self(3);
-    pub const Tensor: Self = Self(4);
-    pub const SparseTensor: Self = Self(5);
-
-    pub const ENUM_MIN: u8 = 0;
-    pub const ENUM_MAX: u8 = 5;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::NONE,
-        Self::Schema,
-        Self::DictionaryBatch,
-        Self::RecordBatch,
-        Self::Tensor,
-        Self::SparseTensor,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::NONE => Some("NONE"),
-            Self::Schema => Some("Schema"),
-            Self::DictionaryBatch => Some("DictionaryBatch"),
-            Self::RecordBatch => Some("RecordBatch"),
-            Self::Tensor => Some("Tensor"),
-            Self::SparseTensor => Some("SparseTensor"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for MessageHeader {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-pub struct MessageHeaderUnionTableOffset {}
-impl<'a> flatbuffers::Follow<'a> for MessageHeader {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for MessageHeader {
-    type Output = MessageHeader;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for MessageHeader {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = u8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = u8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for MessageHeader {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        u8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for MessageHeader {}
-/// ----------------------------------------------------------------------
-/// Data structures for describing a table row batch (a collection of
-/// equal-length Arrow arrays)
-/// Metadata about a field at some level of a nested type tree (but not
-/// its children).
-///
-/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
-/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
-/// null_count: 0} for its Int16 node, as separate FieldNode structs
-// struct FieldNode, aligned to 8
-#[repr(transparent)]
-#[derive(Clone, Copy, PartialEq)]
-pub struct FieldNode(pub [u8; 16]);
-impl std::fmt::Debug for FieldNode {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("FieldNode")
-            .field("length", &self.length())
-            .field("null_count", &self.null_count())
-            .finish()
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for FieldNode {}
-impl flatbuffers::SafeSliceAccess for FieldNode {}
-impl<'a> flatbuffers::Follow<'a> for FieldNode {
-    type Inner = &'a FieldNode;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a FieldNode>::follow(buf, loc)
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for &'a FieldNode {
-    type Inner = &'a FieldNode;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<FieldNode>(buf, loc)
-    }
-}
-impl<'b> flatbuffers::Push for FieldNode {
-    type Output = FieldNode;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(
-                self as *const FieldNode as *const u8,
-                Self::size(),
-            )
-        };
-        dst.copy_from_slice(src);
-    }
-}
-impl<'b> flatbuffers::Push for &'b FieldNode {
-    type Output = FieldNode;
-
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(
-                *self as *const FieldNode as *const u8,
-                Self::size(),
-            )
-        };
-        dst.copy_from_slice(src);
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for FieldNode {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.in_buffer::<Self>(pos)
-    }
-}
-impl FieldNode {
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(length: i64, null_count: i64) -> Self {
-        let mut s = Self([0; 16]);
-        s.set_length(length);
-        s.set_null_count(null_count);
-        s
-    }
-
-    /// The number of value slots in the Arrow array at this level of a nested
-    /// tree
-    pub fn length(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[0..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_length(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[0..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-
-    /// The number of observed nulls. Fields with null_count == 0 may choose not
-    /// to write their physical validity bitmap out as a materialized buffer,
-    /// instead setting the length of the bitmap buffer to 0.
-    pub fn null_count(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[8..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_null_count(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[8..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-}
-
-pub enum BodyCompressionOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Optional compression for the memory buffers constituting IPC message
-/// bodies. Intended for use with RecordBatch but could be used for other
-/// message types
-pub struct BodyCompression<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for BodyCompression<'a> {
-    type Inner = BodyCompression<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> BodyCompression<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        BodyCompression { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args BodyCompressionArgs,
-    ) -> flatbuffers::WIPOffset<BodyCompression<'bldr>> {
-        let mut builder = BodyCompressionBuilder::new(_fbb);
-        builder.add_method(args.method);
-        builder.add_codec(args.codec);
-        builder.finish()
-    }
-
-    pub const VT_CODEC: flatbuffers::VOffsetT = 4;
-    pub const VT_METHOD: flatbuffers::VOffsetT = 6;
-
-    /// Compressor library
-    #[inline]
-    pub fn codec(&self) -> CompressionType {
-        self._tab
-            .get::<CompressionType>(
-                BodyCompression::VT_CODEC,
-                Some(CompressionType::LZ4_FRAME),
-            )
-            .unwrap()
-    }
-    /// Indicates the way the record batch body was compressed
-    #[inline]
-    pub fn method(&self) -> BodyCompressionMethod {
-        self._tab
-            .get::<BodyCompressionMethod>(
-                BodyCompression::VT_METHOD,
-                Some(BodyCompressionMethod::BUFFER),
-            )
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for BodyCompression<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<CompressionType>(&"codec", Self::VT_CODEC, false)?
-            .visit_field::<BodyCompressionMethod>(&"method", Self::VT_METHOD, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct BodyCompressionArgs {
-    pub codec: CompressionType,
-    pub method: BodyCompressionMethod,
-}
-impl<'a> Default for BodyCompressionArgs {
-    #[inline]
-    fn default() -> Self {
-        BodyCompressionArgs {
-            codec: CompressionType::LZ4_FRAME,
-            method: BodyCompressionMethod::BUFFER,
-        }
-    }
-}
-pub struct BodyCompressionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> BodyCompressionBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_codec(&mut self, codec: CompressionType) {
-        self.fbb_.push_slot::<CompressionType>(
-            BodyCompression::VT_CODEC,
-            codec,
-            CompressionType::LZ4_FRAME,
-        );
-    }
-    #[inline]
-    pub fn add_method(&mut self, method: BodyCompressionMethod) {
-        self.fbb_.push_slot::<BodyCompressionMethod>(
-            BodyCompression::VT_METHOD,
-            method,
-            BodyCompressionMethod::BUFFER,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> BodyCompressionBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        BodyCompressionBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<BodyCompression<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for BodyCompression<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("BodyCompression");
-        ds.field("codec", &self.codec());
-        ds.field("method", &self.method());
-        ds.finish()
-    }
-}
-pub enum RecordBatchOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A data header describing the shared memory layout of a "record" or "row"
-/// batch. Some systems call this a "row batch" internally and others a "record
-/// batch".
-pub struct RecordBatch<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for RecordBatch<'a> {
-    type Inner = RecordBatch<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> RecordBatch<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        RecordBatch { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args RecordBatchArgs<'args>,
-    ) -> flatbuffers::WIPOffset<RecordBatch<'bldr>> {
-        let mut builder = RecordBatchBuilder::new(_fbb);
-        builder.add_length(args.length);
-        if let Some(x) = args.compression {
-            builder.add_compression(x);
-        }
-        if let Some(x) = args.buffers {
-            builder.add_buffers(x);
-        }
-        if let Some(x) = args.nodes {
-            builder.add_nodes(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_LENGTH: flatbuffers::VOffsetT = 4;
-    pub const VT_NODES: flatbuffers::VOffsetT = 6;
-    pub const VT_BUFFERS: flatbuffers::VOffsetT = 8;
-    pub const VT_COMPRESSION: flatbuffers::VOffsetT = 10;
-
-    /// number of records / rows. The arrays in the batch should all have this
-    /// length
-    #[inline]
-    pub fn length(&self) -> i64 {
-        self._tab
-            .get::<i64>(RecordBatch::VT_LENGTH, Some(0))
-            .unwrap()
-    }
-    /// Nodes correspond to the pre-ordered flattened logical schema
-    #[inline]
-    pub fn nodes(&self) -> Option<&'a [FieldNode]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, FieldNode>>>(
-                RecordBatch::VT_NODES,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    /// Buffers correspond to the pre-ordered flattened buffer tree
-    ///
-    /// The number of buffers appended to this list depends on the schema. For
-    /// example, most primitive arrays will have 2 buffers, 1 for the validity
-    /// bitmap and 1 for the values. For struct arrays, there will only be a
-    /// single buffer for the validity (nulls) bitmap
-    #[inline]
-    pub fn buffers(&self) -> Option<&'a [Buffer]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Buffer>>>(
-                RecordBatch::VT_BUFFERS,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    /// Optional compression of the message body
-    #[inline]
-    pub fn compression(&self) -> Option<BodyCompression<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<BodyCompression>>(
-                RecordBatch::VT_COMPRESSION,
-                None,
-            )
-    }
-}
-
-impl flatbuffers::Verifiable for RecordBatch<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<i64>(&"length", Self::VT_LENGTH, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, FieldNode>>>(&"nodes", Self::VT_NODES, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>(&"buffers", Self::VT_BUFFERS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<BodyCompression>>(&"compression", Self::VT_COMPRESSION, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct RecordBatchArgs<'a> {
-    pub length: i64,
-    pub nodes: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, FieldNode>>>,
-    pub buffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Buffer>>>,
-    pub compression: Option<flatbuffers::WIPOffset<BodyCompression<'a>>>,
-}
-impl<'a> Default for RecordBatchArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        RecordBatchArgs {
-            length: 0,
-            nodes: None,
-            buffers: None,
-            compression: None,
-        }
-    }
-}
-pub struct RecordBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_length(&mut self, length: i64) {
-        self.fbb_
-            .push_slot::<i64>(RecordBatch::VT_LENGTH, length, 0);
-    }
-    #[inline]
-    pub fn add_nodes(
-        &mut self,
-        nodes: flatbuffers::WIPOffset<flatbuffers::Vector<'b, FieldNode>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(RecordBatch::VT_NODES, nodes);
-    }
-    #[inline]
-    pub fn add_buffers(
-        &mut self,
-        buffers: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Buffer>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            RecordBatch::VT_BUFFERS,
-            buffers,
-        );
-    }
-    #[inline]
-    pub fn add_compression(
-        &mut self,
-        compression: flatbuffers::WIPOffset<BodyCompression<'b>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<BodyCompression>>(
-                RecordBatch::VT_COMPRESSION,
-                compression,
-            );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> RecordBatchBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        RecordBatchBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<RecordBatch<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for RecordBatch<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("RecordBatch");
-        ds.field("length", &self.length());
-        ds.field("nodes", &self.nodes());
-        ds.field("buffers", &self.buffers());
-        ds.field("compression", &self.compression());
-        ds.finish()
-    }
-}
-pub enum DictionaryBatchOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// For sending dictionary encoding information. Any Field can be
-/// dictionary-encoded, but in this case none of its children may be
-/// dictionary-encoded.
-/// There is one vector / column per dictionary, but that vector / column
-/// may be spread across multiple dictionary batches by using the isDelta
-/// flag
-pub struct DictionaryBatch<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for DictionaryBatch<'a> {
-    type Inner = DictionaryBatch<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> DictionaryBatch<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        DictionaryBatch { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DictionaryBatchArgs<'args>,
-    ) -> flatbuffers::WIPOffset<DictionaryBatch<'bldr>> {
-        let mut builder = DictionaryBatchBuilder::new(_fbb);
-        builder.add_id(args.id);
-        if let Some(x) = args.data {
-            builder.add_data(x);
-        }
-        builder.add_isDelta(args.isDelta);
-        builder.finish()
-    }
-
-    pub const VT_ID: flatbuffers::VOffsetT = 4;
-    pub const VT_DATA: flatbuffers::VOffsetT = 6;
-    pub const VT_ISDELTA: flatbuffers::VOffsetT = 8;
-
-    #[inline]
-    pub fn id(&self) -> i64 {
-        self._tab
-            .get::<i64>(DictionaryBatch::VT_ID, Some(0))
-            .unwrap()
-    }
-    #[inline]
-    pub fn data(&self) -> Option<RecordBatch<'a>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<RecordBatch>>(
-            DictionaryBatch::VT_DATA,
-            None,
-        )
-    }
-    /// If isDelta is true the values in the dictionary are to be appended to a
-    /// dictionary with the indicated id. If isDelta is false this dictionary
-    /// should replace the existing dictionary.
-    #[inline]
-    pub fn isDelta(&self) -> bool {
-        self._tab
-            .get::<bool>(DictionaryBatch::VT_ISDELTA, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for DictionaryBatch<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i64>(&"id", Self::VT_ID, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<RecordBatch>>(
-                &"data",
-                Self::VT_DATA,
-                false,
-            )?
-            .visit_field::<bool>(&"isDelta", Self::VT_ISDELTA, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DictionaryBatchArgs<'a> {
-    pub id: i64,
-    pub data: Option<flatbuffers::WIPOffset<RecordBatch<'a>>>,
-    pub isDelta: bool,
-}
-impl<'a> Default for DictionaryBatchArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        DictionaryBatchArgs {
-            id: 0,
-            data: None,
-            isDelta: false,
-        }
-    }
-}
-pub struct DictionaryBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DictionaryBatchBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_id(&mut self, id: i64) {
-        self.fbb_.push_slot::<i64>(DictionaryBatch::VT_ID, id, 0);
-    }
-    #[inline]
-    pub fn add_data(&mut self, data: flatbuffers::WIPOffset<RecordBatch<'b>>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<RecordBatch>>(
-                DictionaryBatch::VT_DATA,
-                data,
-            );
-    }
-    #[inline]
-    pub fn add_isDelta(&mut self, isDelta: bool) {
-        self.fbb_
-            .push_slot::<bool>(DictionaryBatch::VT_ISDELTA, isDelta, false);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DictionaryBatchBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DictionaryBatchBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<DictionaryBatch<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for DictionaryBatch<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("DictionaryBatch");
-        ds.field("id", &self.id());
-        ds.field("data", &self.data());
-        ds.field("isDelta", &self.isDelta());
-        ds.finish()
-    }
-}
-pub enum MessageOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Message<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Message<'a> {
-    type Inner = Message<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Message<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Message { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args MessageArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Message<'bldr>> {
-        let mut builder = MessageBuilder::new(_fbb);
-        builder.add_bodyLength(args.bodyLength);
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.header {
-            builder.add_header(x);
-        }
-        builder.add_version(args.version);
-        builder.add_header_type(args.header_type);
-        builder.finish()
-    }
-
-    pub const VT_VERSION: flatbuffers::VOffsetT = 4;
-    pub const VT_HEADER_TYPE: flatbuffers::VOffsetT = 6;
-    pub const VT_HEADER: flatbuffers::VOffsetT = 8;
-    pub const VT_BODYLENGTH: flatbuffers::VOffsetT = 10;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 12;
-
-    #[inline]
-    pub fn version(&self) -> MetadataVersion {
-        self._tab
-            .get::<MetadataVersion>(Message::VT_VERSION, Some(MetadataVersion::V1))
-            .unwrap()
-    }
-    #[inline]
-    pub fn header_type(&self) -> MessageHeader {
-        self._tab
-            .get::<MessageHeader>(Message::VT_HEADER_TYPE, Some(MessageHeader::NONE))
-            .unwrap()
-    }
-    #[inline]
-    pub fn header(&self) -> Option<flatbuffers::Table<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                Message::VT_HEADER,
-                None,
-            )
-    }
-    #[inline]
-    pub fn bodyLength(&self) -> i64 {
-        self._tab
-            .get::<i64>(Message::VT_BODYLENGTH, Some(0))
-            .unwrap()
-    }
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Message::VT_CUSTOM_METADATA, None)
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_schema(&self) -> Option<Schema<'a>> {
-        if self.header_type() == MessageHeader::Schema {
-            self.header().map(Schema::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_dictionary_batch(&self) -> Option<DictionaryBatch<'a>> {
-        if self.header_type() == MessageHeader::DictionaryBatch {
-            self.header().map(DictionaryBatch::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_record_batch(&self) -> Option<RecordBatch<'a>> {
-        if self.header_type() == MessageHeader::RecordBatch {
-            self.header().map(RecordBatch::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_tensor(&self) -> Option<Tensor<'a>> {
-        if self.header_type() == MessageHeader::Tensor {
-            self.header().map(Tensor::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_sparse_tensor(&self) -> Option<SparseTensor<'a>> {
-        if self.header_type() == MessageHeader::SparseTensor {
-            self.header().map(SparseTensor::init_from_table)
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for Message<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<MetadataVersion>(&"version", Self::VT_VERSION, false)?
-     .visit_union::<MessageHeader, _>(&"header_type", Self::VT_HEADER_TYPE, &"header", Self::VT_HEADER, false, |key, v, pos| {
-        match key {
-          MessageHeader::Schema => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Schema>>("MessageHeader::Schema", pos),
-          MessageHeader::DictionaryBatch => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DictionaryBatch>>("MessageHeader::DictionaryBatch", pos),
-          MessageHeader::RecordBatch => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RecordBatch>>("MessageHeader::RecordBatch", pos),
-          MessageHeader::Tensor => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Tensor>>("MessageHeader::Tensor", pos),
-          MessageHeader::SparseTensor => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensor>>("MessageHeader::SparseTensor", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<i64>(&"bodyLength", Self::VT_BODYLENGTH, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct MessageArgs<'a> {
-    pub version: MetadataVersion,
-    pub header_type: MessageHeader,
-    pub header: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub bodyLength: i64,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-}
-impl<'a> Default for MessageArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        MessageArgs {
-            version: MetadataVersion::V1,
-            header_type: MessageHeader::NONE,
-            header: None,
-            bodyLength: 0,
-            custom_metadata: None,
-        }
-    }
-}
-pub struct MessageBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_version(&mut self, version: MetadataVersion) {
-        self.fbb_.push_slot::<MetadataVersion>(
-            Message::VT_VERSION,
-            version,
-            MetadataVersion::V1,
-        );
-    }
-    #[inline]
-    pub fn add_header_type(&mut self, header_type: MessageHeader) {
-        self.fbb_.push_slot::<MessageHeader>(
-            Message::VT_HEADER_TYPE,
-            header_type,
-            MessageHeader::NONE,
-        );
-    }
-    #[inline]
-    pub fn add_header(
-        &mut self,
-        header: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Message::VT_HEADER, header);
-    }
-    #[inline]
-    pub fn add_bodyLength(&mut self, bodyLength: i64) {
-        self.fbb_
-            .push_slot::<i64>(Message::VT_BODYLENGTH, bodyLength, 0);
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Message::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> MessageBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        MessageBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Message<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Message<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Message");
-        ds.field("version", &self.version());
-        ds.field("header_type", &self.header_type());
-        match self.header_type() {
-            MessageHeader::Schema => {
-                if let Some(x) = self.header_as_schema() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::DictionaryBatch => {
-                if let Some(x) = self.header_as_dictionary_batch() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::RecordBatch => {
-                if let Some(x) = self.header_as_record_batch() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::Tensor => {
-                if let Some(x) = self.header_as_tensor() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::SparseTensor => {
-                if let Some(x) = self.header_as_sparse_tensor() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("header", &x)
-            }
-        };
-        ds.field("bodyLength", &self.bodyLength());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_message<'a>(buf: &'a [u8]) -> Message<'a> {
-    unsafe { flatbuffers::root_unchecked::<Message<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_message<'a>(buf: &'a [u8]) -> Message<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Message<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Message`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_message_unchecked`.
-pub fn root_as_message(buf: &[u8]) -> Result<Message, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Message>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Message` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_message_unchecked`.
-pub fn size_prefixed_root_as_message(
-    buf: &[u8],
-) -> Result<Message, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Message>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Message` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_message_unchecked`.
-pub fn root_as_message_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Message<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Message<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Message` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_message_unchecked`.
-pub fn size_prefixed_root_as_message_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Message<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Message<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Message and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Message`.
-pub unsafe fn root_as_message_unchecked(buf: &[u8]) -> Message {
-    flatbuffers::root_unchecked::<Message>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Message and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Message`.
-pub unsafe fn size_prefixed_root_as_message_unchecked(buf: &[u8]) -> Message {
-    flatbuffers::size_prefixed_root_unchecked::<Message>(buf)
-}
-#[inline]
-pub fn finish_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Message<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Message<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/Schema.rs b/rust/arrow/src/ipc/gen/Schema.rs
deleted file mode 100644
index f37f9206cb7..00000000000
--- a/rust/arrow/src/ipc/gen/Schema.rs
+++ /dev/null
@@ -1,4586 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_METADATA_VERSION: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_METADATA_VERSION: i16 = 4;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_METADATA_VERSION: [MetadataVersion; 5] = [
-    MetadataVersion::V1,
-    MetadataVersion::V2,
-    MetadataVersion::V3,
-    MetadataVersion::V4,
-    MetadataVersion::V5,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct MetadataVersion(pub i16);
-#[allow(non_upper_case_globals)]
-impl MetadataVersion {
-    /// 0.1.0 (October 2016).
-    pub const V1: Self = Self(0);
-    /// 0.2.0 (February 2017). Non-backwards compatible with V1.
-    pub const V2: Self = Self(1);
-    /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
-    pub const V3: Self = Self(2);
-    /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
-    pub const V4: Self = Self(3);
-    /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
-    /// metadata and IPC messages). Implementations are recommended to provide a
-    /// V4 compatibility mode with V5 format changes disabled.
-    ///
-    /// Incompatible changes between V4 and V5:
-    /// - Union buffer layout has changed. In V5, Unions don't have a validity
-    ///   bitmap buffer.
-    pub const V5: Self = Self(4);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 4;
-    pub const ENUM_VALUES: &'static [Self] =
-        &[Self::V1, Self::V2, Self::V3, Self::V4, Self::V5];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::V1 => Some("V1"),
-            Self::V2 => Some("V2"),
-            Self::V3 => Some("V3"),
-            Self::V4 => Some("V4"),
-            Self::V5 => Some("V5"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for MetadataVersion {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for MetadataVersion {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for MetadataVersion {
-    type Output = MetadataVersion;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for MetadataVersion {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for MetadataVersion {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for MetadataVersion {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_FEATURE: i64 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_FEATURE: i64 = 2;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_FEATURE: [Feature; 3] = [
-    Feature::UNUSED,
-    Feature::DICTIONARY_REPLACEMENT,
-    Feature::COMPRESSED_BODY,
-];
-
-/// Represents Arrow Features that might not have full support
-/// within implementations. This is intended to be used in
-/// two scenarios:
-///  1.  A mechanism for readers of Arrow Streams
-///      and files to understand that the stream or file makes
-///      use of a feature that isn't supported or unknown to
-///      the implementation (and therefore can meet the Arrow
-///      forward compatibility guarantees).
-///  2.  A means of negotiating between a client and server
-///      what features a stream is allowed to use. The enums
-///      values here are intented to represent higher level
-///      features, additional details maybe negotiated
-///      with key-value pairs specific to the protocol.
-///
-/// Enums added to this list should be assigned power-of-two values
-/// to facilitate exchanging and comparing bitmaps for supported
-/// features.
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Feature(pub i64);
-#[allow(non_upper_case_globals)]
-impl Feature {
-    /// Needed to make flatbuffers happy.
-    pub const UNUSED: Self = Self(0);
-    /// The stream makes use of multiple full dictionaries with the
-    /// same ID and assumes clients implement dictionary replacement
-    /// correctly.
-    pub const DICTIONARY_REPLACEMENT: Self = Self(1);
-    /// The stream makes use of compressed bodies as described
-    /// in Message.fbs.
-    pub const COMPRESSED_BODY: Self = Self(2);
-
-    pub const ENUM_MIN: i64 = 0;
-    pub const ENUM_MAX: i64 = 2;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::UNUSED,
-        Self::DICTIONARY_REPLACEMENT,
-        Self::COMPRESSED_BODY,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::UNUSED => Some("UNUSED"),
-            Self::DICTIONARY_REPLACEMENT => Some("DICTIONARY_REPLACEMENT"),
-            Self::COMPRESSED_BODY => Some("COMPRESSED_BODY"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Feature {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for Feature {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i64>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Feature {
-    type Output = Feature;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i64>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Feature {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i64::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i64::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Feature {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i64::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Feature {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_UNION_MODE: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_UNION_MODE: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_UNION_MODE: [UnionMode; 2] = [UnionMode::Sparse, UnionMode::Dense];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct UnionMode(pub i16);
-#[allow(non_upper_case_globals)]
-impl UnionMode {
-    pub const Sparse: Self = Self(0);
-    pub const Dense: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::Sparse, Self::Dense];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::Sparse => Some("Sparse"),
-            Self::Dense => Some("Dense"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for UnionMode {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for UnionMode {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for UnionMode {
-    type Output = UnionMode;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for UnionMode {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for UnionMode {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for UnionMode {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_PRECISION: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_PRECISION: i16 = 2;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_PRECISION: [Precision; 3] =
-    [Precision::HALF, Precision::SINGLE, Precision::DOUBLE];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Precision(pub i16);
-#[allow(non_upper_case_globals)]
-impl Precision {
-    pub const HALF: Self = Self(0);
-    pub const SINGLE: Self = Self(1);
-    pub const DOUBLE: Self = Self(2);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 2;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::HALF, Self::SINGLE, Self::DOUBLE];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::HALF => Some("HALF"),
-            Self::SINGLE => Some("SINGLE"),
-            Self::DOUBLE => Some("DOUBLE"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Precision {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for Precision {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Precision {
-    type Output = Precision;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Precision {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Precision {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Precision {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_DATE_UNIT: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_DATE_UNIT: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_DATE_UNIT: [DateUnit; 2] = [DateUnit::DAY, DateUnit::MILLISECOND];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct DateUnit(pub i16);
-#[allow(non_upper_case_globals)]
-impl DateUnit {
-    pub const DAY: Self = Self(0);
-    pub const MILLISECOND: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::DAY, Self::MILLISECOND];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::DAY => Some("DAY"),
-            Self::MILLISECOND => Some("MILLISECOND"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for DateUnit {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for DateUnit {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for DateUnit {
-    type Output = DateUnit;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for DateUnit {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for DateUnit {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for DateUnit {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_TIME_UNIT: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_TIME_UNIT: i16 = 3;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_TIME_UNIT: [TimeUnit; 4] = [
-    TimeUnit::SECOND,
-    TimeUnit::MILLISECOND,
-    TimeUnit::MICROSECOND,
-    TimeUnit::NANOSECOND,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct TimeUnit(pub i16);
-#[allow(non_upper_case_globals)]
-impl TimeUnit {
-    pub const SECOND: Self = Self(0);
-    pub const MILLISECOND: Self = Self(1);
-    pub const MICROSECOND: Self = Self(2);
-    pub const NANOSECOND: Self = Self(3);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 3;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::SECOND,
-        Self::MILLISECOND,
-        Self::MICROSECOND,
-        Self::NANOSECOND,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::SECOND => Some("SECOND"),
-            Self::MILLISECOND => Some("MILLISECOND"),
-            Self::MICROSECOND => Some("MICROSECOND"),
-            Self::NANOSECOND => Some("NANOSECOND"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for TimeUnit {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for TimeUnit {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for TimeUnit {
-    type Output = TimeUnit;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for TimeUnit {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for TimeUnit {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for TimeUnit {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_INTERVAL_UNIT: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_INTERVAL_UNIT: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_INTERVAL_UNIT: [IntervalUnit; 2] =
-    [IntervalUnit::YEAR_MONTH, IntervalUnit::DAY_TIME];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct IntervalUnit(pub i16);
-#[allow(non_upper_case_globals)]
-impl IntervalUnit {
-    pub const YEAR_MONTH: Self = Self(0);
-    pub const DAY_TIME: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::YEAR_MONTH, Self::DAY_TIME];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::YEAR_MONTH => Some("YEAR_MONTH"),
-            Self::DAY_TIME => Some("DAY_TIME"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for IntervalUnit {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for IntervalUnit {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for IntervalUnit {
-    type Output = IntervalUnit;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for IntervalUnit {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for IntervalUnit {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for IntervalUnit {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_TYPE: u8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_TYPE: u8 = 21;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_TYPE: [Type; 22] = [
-    Type::NONE,
-    Type::Null,
-    Type::Int,
-    Type::FloatingPoint,
-    Type::Binary,
-    Type::Utf8,
-    Type::Bool,
-    Type::Decimal,
-    Type::Date,
-    Type::Time,
-    Type::Timestamp,
-    Type::Interval,
-    Type::List,
-    Type::Struct_,
-    Type::Union,
-    Type::FixedSizeBinary,
-    Type::FixedSizeList,
-    Type::Map,
-    Type::Duration,
-    Type::LargeBinary,
-    Type::LargeUtf8,
-    Type::LargeList,
-];
-
-/// ----------------------------------------------------------------------
-/// Top-level Type value, enabling extensible type-specific metadata. We can
-/// add new logical types to Type without breaking backwards compatibility
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Type(pub u8);
-#[allow(non_upper_case_globals)]
-impl Type {
-    pub const NONE: Self = Self(0);
-    pub const Null: Self = Self(1);
-    pub const Int: Self = Self(2);
-    pub const FloatingPoint: Self = Self(3);
-    pub const Binary: Self = Self(4);
-    pub const Utf8: Self = Self(5);
-    pub const Bool: Self = Self(6);
-    pub const Decimal: Self = Self(7);
-    pub const Date: Self = Self(8);
-    pub const Time: Self = Self(9);
-    pub const Timestamp: Self = Self(10);
-    pub const Interval: Self = Self(11);
-    pub const List: Self = Self(12);
-    pub const Struct_: Self = Self(13);
-    pub const Union: Self = Self(14);
-    pub const FixedSizeBinary: Self = Self(15);
-    pub const FixedSizeList: Self = Self(16);
-    pub const Map: Self = Self(17);
-    pub const Duration: Self = Self(18);
-    pub const LargeBinary: Self = Self(19);
-    pub const LargeUtf8: Self = Self(20);
-    pub const LargeList: Self = Self(21);
-
-    pub const ENUM_MIN: u8 = 0;
-    pub const ENUM_MAX: u8 = 21;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::NONE,
-        Self::Null,
-        Self::Int,
-        Self::FloatingPoint,
-        Self::Binary,
-        Self::Utf8,
-        Self::Bool,
-        Self::Decimal,
-        Self::Date,
-        Self::Time,
-        Self::Timestamp,
-        Self::Interval,
-        Self::List,
-        Self::Struct_,
-        Self::Union,
-        Self::FixedSizeBinary,
-        Self::FixedSizeList,
-        Self::Map,
-        Self::Duration,
-        Self::LargeBinary,
-        Self::LargeUtf8,
-        Self::LargeList,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::NONE => Some("NONE"),
-            Self::Null => Some("Null"),
-            Self::Int => Some("Int"),
-            Self::FloatingPoint => Some("FloatingPoint"),
-            Self::Binary => Some("Binary"),
-            Self::Utf8 => Some("Utf8"),
-            Self::Bool => Some("Bool"),
-            Self::Decimal => Some("Decimal"),
-            Self::Date => Some("Date"),
-            Self::Time => Some("Time"),
-            Self::Timestamp => Some("Timestamp"),
-            Self::Interval => Some("Interval"),
-            Self::List => Some("List"),
-            Self::Struct_ => Some("Struct_"),
-            Self::Union => Some("Union"),
-            Self::FixedSizeBinary => Some("FixedSizeBinary"),
-            Self::FixedSizeList => Some("FixedSizeList"),
-            Self::Map => Some("Map"),
-            Self::Duration => Some("Duration"),
-            Self::LargeBinary => Some("LargeBinary"),
-            Self::LargeUtf8 => Some("LargeUtf8"),
-            Self::LargeList => Some("LargeList"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Type {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-pub struct TypeUnionTableOffset {}
-impl<'a> flatbuffers::Follow<'a> for Type {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Type {
-    type Output = Type;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Type {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = u8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = u8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Type {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        u8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Type {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_DICTIONARY_KIND: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_DICTIONARY_KIND: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_DICTIONARY_KIND: [DictionaryKind; 1] = [DictionaryKind::DenseArray];
-
-/// ----------------------------------------------------------------------
-/// Dictionary encoding metadata
-/// Maintained for forwards compatibility, in the future
-/// Dictionaries might be explicit maps between integers and values
-/// allowing for non-contiguous index values
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct DictionaryKind(pub i16);
-#[allow(non_upper_case_globals)]
-impl DictionaryKind {
-    pub const DenseArray: Self = Self(0);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 0;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::DenseArray];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::DenseArray => Some("DenseArray"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for DictionaryKind {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for DictionaryKind {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for DictionaryKind {
-    type Output = DictionaryKind;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for DictionaryKind {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for DictionaryKind {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for DictionaryKind {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_ENDIANNESS: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_ENDIANNESS: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_ENDIANNESS: [Endianness; 2] = [Endianness::Little, Endianness::Big];
-
-/// ----------------------------------------------------------------------
-/// Endianness of the platform producing the data
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Endianness(pub i16);
-#[allow(non_upper_case_globals)]
-impl Endianness {
-    pub const Little: Self = Self(0);
-    pub const Big: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::Little, Self::Big];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::Little => Some("Little"),
-            Self::Big => Some("Big"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Endianness {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for Endianness {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Endianness {
-    type Output = Endianness;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Endianness {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Endianness {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Endianness {}
-/// ----------------------------------------------------------------------
-/// A Buffer represents a single contiguous memory segment
-// struct Buffer, aligned to 8
-#[repr(transparent)]
-#[derive(Clone, Copy, PartialEq)]
-pub struct Buffer(pub [u8; 16]);
-impl std::fmt::Debug for Buffer {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("Buffer")
-            .field("offset", &self.offset())
-            .field("length", &self.length())
-            .finish()
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Buffer {}
-impl flatbuffers::SafeSliceAccess for Buffer {}
-impl<'a> flatbuffers::Follow<'a> for Buffer {
-    type Inner = &'a Buffer;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a Buffer>::follow(buf, loc)
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for &'a Buffer {
-    type Inner = &'a Buffer;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<Buffer>(buf, loc)
-    }
-}
-impl<'b> flatbuffers::Push for Buffer {
-    type Output = Buffer;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(self as *const Buffer as *const u8, Self::size())
-        };
-        dst.copy_from_slice(src);
-    }
-}
-impl<'b> flatbuffers::Push for &'b Buffer {
-    type Output = Buffer;
-
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(
-                *self as *const Buffer as *const u8,
-                Self::size(),
-            )
-        };
-        dst.copy_from_slice(src);
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Buffer {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.in_buffer::<Self>(pos)
-    }
-}
-impl Buffer {
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(offset: i64, length: i64) -> Self {
-        let mut s = Self([0; 16]);
-        s.set_offset(offset);
-        s.set_length(length);
-        s
-    }
-
-    /// The relative offset into the shared memory page where the bytes for this
-    /// buffer starts
-    pub fn offset(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[0..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_offset(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[0..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-
-    /// The absolute length (in bytes) of the memory buffer. The memory is found
-    /// from offset (inclusive) to offset + length (non-inclusive). When building
-    /// messages using the encapsulated IPC message, padding bytes may be written
-    /// after a buffer, but such padding bytes do not need to be accounted for in
-    /// the size here.
-    pub fn length(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[8..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_length(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[8..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-}
-
-pub enum NullOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// These are stored in the flatbuffer in the Type union below
-pub struct Null<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Null<'a> {
-    type Inner = Null<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Null<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Null { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args NullArgs,
-    ) -> flatbuffers::WIPOffset<Null<'bldr>> {
-        let mut builder = NullBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Null<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct NullArgs {}
-impl<'a> Default for NullArgs {
-    #[inline]
-    fn default() -> Self {
-        NullArgs {}
-    }
-}
-pub struct NullBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> NullBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> NullBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        NullBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Null<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Null<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Null");
-        ds.finish()
-    }
-}
-pub enum Struct_Offset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
-/// (according to the physical memory layout). We used Struct_ here as
-/// Struct is a reserved word in Flatbuffers
-pub struct Struct_<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Struct_<'a> {
-    type Inner = Struct_<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Struct_<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Struct_ { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args Struct_Args,
-    ) -> flatbuffers::WIPOffset<Struct_<'bldr>> {
-        let mut builder = Struct_Builder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Struct_<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct Struct_Args {}
-impl<'a> Default for Struct_Args {
-    #[inline]
-    fn default() -> Self {
-        Struct_Args {}
-    }
-}
-pub struct Struct_Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> Struct_Builder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> Struct_Builder<'a, 'b> {
-        let start = _fbb.start_table();
-        Struct_Builder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Struct_<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Struct_<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Struct_");
-        ds.finish()
-    }
-}
-pub enum ListOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct List<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for List<'a> {
-    type Inner = List<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> List<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        List { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args ListArgs,
-    ) -> flatbuffers::WIPOffset<List<'bldr>> {
-        let mut builder = ListBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for List<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct ListArgs {}
-impl<'a> Default for ListArgs {
-    #[inline]
-    fn default() -> Self {
-        ListArgs {}
-    }
-}
-pub struct ListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> ListBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        ListBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<List<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for List<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("List");
-        ds.finish()
-    }
-}
-pub enum LargeListOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Same as List, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-pub struct LargeList<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for LargeList<'a> {
-    type Inner = LargeList<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> LargeList<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        LargeList { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args LargeListArgs,
-    ) -> flatbuffers::WIPOffset<LargeList<'bldr>> {
-        let mut builder = LargeListBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for LargeList<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct LargeListArgs {}
-impl<'a> Default for LargeListArgs {
-    #[inline]
-    fn default() -> Self {
-        LargeListArgs {}
-    }
-}
-pub struct LargeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> LargeListBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeListBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        LargeListBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<LargeList<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for LargeList<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("LargeList");
-        ds.finish()
-    }
-}
-pub enum FixedSizeListOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct FixedSizeList<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for FixedSizeList<'a> {
-    type Inner = FixedSizeList<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> FixedSizeList<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        FixedSizeList { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FixedSizeListArgs,
-    ) -> flatbuffers::WIPOffset<FixedSizeList<'bldr>> {
-        let mut builder = FixedSizeListBuilder::new(_fbb);
-        builder.add_listSize(args.listSize);
-        builder.finish()
-    }
-
-    pub const VT_LISTSIZE: flatbuffers::VOffsetT = 4;
-
-    /// Number of list items per value
-    #[inline]
-    pub fn listSize(&self) -> i32 {
-        self._tab
-            .get::<i32>(FixedSizeList::VT_LISTSIZE, Some(0))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for FixedSizeList<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"listSize", Self::VT_LISTSIZE, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FixedSizeListArgs {
-    pub listSize: i32,
-}
-impl<'a> Default for FixedSizeListArgs {
-    #[inline]
-    fn default() -> Self {
-        FixedSizeListArgs { listSize: 0 }
-    }
-}
-pub struct FixedSizeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FixedSizeListBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_listSize(&mut self, listSize: i32) {
-        self.fbb_
-            .push_slot::<i32>(FixedSizeList::VT_LISTSIZE, listSize, 0);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FixedSizeListBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FixedSizeListBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<FixedSizeList<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for FixedSizeList<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("FixedSizeList");
-        ds.field("listSize", &self.listSize());
-        ds.finish()
-    }
-}
-pub enum MapOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A Map is a logical nested type that is represented as
-///
-/// List<entries: Struct<key: K, value: V>>
-///
-/// In this layout, the keys and values are each respectively contiguous. We do
-/// not constrain the key and value types, so the application is responsible
-/// for ensuring that the keys are hashable and unique. Whether the keys are sorted
-/// may be set in the metadata for this field.
-///
-/// In a field with Map type, the field has a child Struct field, which then
-/// has two children: key type and the second the value type. The names of the
-/// child fields may be respectively "entries", "key", and "value", but this is
-/// not enforced.
-///
-/// Map
-/// ```text
-///   - child[0] entries: Struct
-///     - child[0] key: K
-///     - child[1] value: V
-/// ```
-/// Neither the "entries" field nor the "key" field may be nullable.
-///
-/// The metadata is structured so that Arrow systems without special handling
-/// for Map can make Map an alias for List. The "layout" attribute for the Map
-/// field must have the same contents as a List.
-pub struct Map<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Map<'a> {
-    type Inner = Map<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Map<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Map { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args MapArgs,
-    ) -> flatbuffers::WIPOffset<Map<'bldr>> {
-        let mut builder = MapBuilder::new(_fbb);
-        builder.add_keysSorted(args.keysSorted);
-        builder.finish()
-    }
-
-    pub const VT_KEYSSORTED: flatbuffers::VOffsetT = 4;
-
-    /// Set to true if the keys within each value are sorted
-    #[inline]
-    pub fn keysSorted(&self) -> bool {
-        self._tab
-            .get::<bool>(Map::VT_KEYSSORTED, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Map<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<bool>(&"keysSorted", Self::VT_KEYSSORTED, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct MapArgs {
-    pub keysSorted: bool,
-}
-impl<'a> Default for MapArgs {
-    #[inline]
-    fn default() -> Self {
-        MapArgs { keysSorted: false }
-    }
-}
-pub struct MapBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> MapBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_keysSorted(&mut self, keysSorted: bool) {
-        self.fbb_
-            .push_slot::<bool>(Map::VT_KEYSSORTED, keysSorted, false);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MapBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        MapBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Map<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Map<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Map");
-        ds.field("keysSorted", &self.keysSorted());
-        ds.finish()
-    }
-}
-pub enum UnionOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A union is a complex type with children in Field
-/// By default ids in the type vector refer to the offsets in the children
-/// optionally typeIds provides an indirection between the child offset and the type id
-/// for each child `typeIds[offset]` is the id used in the type vector
-pub struct Union<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Union<'a> {
-    type Inner = Union<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Union<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Union { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args UnionArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Union<'bldr>> {
-        let mut builder = UnionBuilder::new(_fbb);
-        if let Some(x) = args.typeIds {
-            builder.add_typeIds(x);
-        }
-        builder.add_mode(args.mode);
-        builder.finish()
-    }
-
-    pub const VT_MODE: flatbuffers::VOffsetT = 4;
-    pub const VT_TYPEIDS: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn mode(&self) -> UnionMode {
-        self._tab
-            .get::<UnionMode>(Union::VT_MODE, Some(UnionMode::Sparse))
-            .unwrap()
-    }
-    #[inline]
-    pub fn typeIds(&self) -> Option<flatbuffers::Vector<'a, i32>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
-                Union::VT_TYPEIDS,
-                None,
-            )
-    }
-}
-
-impl flatbuffers::Verifiable for Union<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<UnionMode>(&"mode", Self::VT_MODE, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
-                &"typeIds",
-                Self::VT_TYPEIDS,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct UnionArgs<'a> {
-    pub mode: UnionMode,
-    pub typeIds: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
-}
-impl<'a> Default for UnionArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        UnionArgs {
-            mode: UnionMode::Sparse,
-            typeIds: None,
-        }
-    }
-}
-pub struct UnionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> UnionBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_mode(&mut self, mode: UnionMode) {
-        self.fbb_
-            .push_slot::<UnionMode>(Union::VT_MODE, mode, UnionMode::Sparse);
-    }
-    #[inline]
-    pub fn add_typeIds(
-        &mut self,
-        typeIds: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Union::VT_TYPEIDS, typeIds);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> UnionBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        UnionBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Union<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Union<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Union");
-        ds.field("mode", &self.mode());
-        ds.field("typeIds", &self.typeIds());
-        ds.finish()
-    }
-}
-pub enum IntOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Int<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Int<'a> {
-    type Inner = Int<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Int<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Int { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args IntArgs,
-    ) -> flatbuffers::WIPOffset<Int<'bldr>> {
-        let mut builder = IntBuilder::new(_fbb);
-        builder.add_bitWidth(args.bitWidth);
-        builder.add_is_signed(args.is_signed);
-        builder.finish()
-    }
-
-    pub const VT_BITWIDTH: flatbuffers::VOffsetT = 4;
-    pub const VT_IS_SIGNED: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn bitWidth(&self) -> i32 {
-        self._tab.get::<i32>(Int::VT_BITWIDTH, Some(0)).unwrap()
-    }
-    #[inline]
-    pub fn is_signed(&self) -> bool {
-        self._tab
-            .get::<bool>(Int::VT_IS_SIGNED, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Int<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"bitWidth", Self::VT_BITWIDTH, false)?
-            .visit_field::<bool>(&"is_signed", Self::VT_IS_SIGNED, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct IntArgs {
-    pub bitWidth: i32,
-    pub is_signed: bool,
-}
-impl<'a> Default for IntArgs {
-    #[inline]
-    fn default() -> Self {
-        IntArgs {
-            bitWidth: 0,
-            is_signed: false,
-        }
-    }
-}
-pub struct IntBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> IntBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_bitWidth(&mut self, bitWidth: i32) {
-        self.fbb_.push_slot::<i32>(Int::VT_BITWIDTH, bitWidth, 0);
-    }
-    #[inline]
-    pub fn add_is_signed(&mut self, is_signed: bool) {
-        self.fbb_
-            .push_slot::<bool>(Int::VT_IS_SIGNED, is_signed, false);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IntBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        IntBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Int<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Int<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Int");
-        ds.field("bitWidth", &self.bitWidth());
-        ds.field("is_signed", &self.is_signed());
-        ds.finish()
-    }
-}
-pub enum FloatingPointOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct FloatingPoint<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for FloatingPoint<'a> {
-    type Inner = FloatingPoint<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> FloatingPoint<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        FloatingPoint { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FloatingPointArgs,
-    ) -> flatbuffers::WIPOffset<FloatingPoint<'bldr>> {
-        let mut builder = FloatingPointBuilder::new(_fbb);
-        builder.add_precision(args.precision);
-        builder.finish()
-    }
-
-    pub const VT_PRECISION: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn precision(&self) -> Precision {
-        self._tab
-            .get::<Precision>(FloatingPoint::VT_PRECISION, Some(Precision::HALF))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for FloatingPoint<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<Precision>(&"precision", Self::VT_PRECISION, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FloatingPointArgs {
-    pub precision: Precision,
-}
-impl<'a> Default for FloatingPointArgs {
-    #[inline]
-    fn default() -> Self {
-        FloatingPointArgs {
-            precision: Precision::HALF,
-        }
-    }
-}
-pub struct FloatingPointBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FloatingPointBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_precision(&mut self, precision: Precision) {
-        self.fbb_.push_slot::<Precision>(
-            FloatingPoint::VT_PRECISION,
-            precision,
-            Precision::HALF,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FloatingPointBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FloatingPointBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<FloatingPoint<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for FloatingPoint<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("FloatingPoint");
-        ds.field("precision", &self.precision());
-        ds.finish()
-    }
-}
-pub enum Utf8Offset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Unicode with UTF-8 encoding
-pub struct Utf8<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Utf8<'a> {
-    type Inner = Utf8<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Utf8<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Utf8 { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args Utf8Args,
-    ) -> flatbuffers::WIPOffset<Utf8<'bldr>> {
-        let mut builder = Utf8Builder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Utf8<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct Utf8Args {}
-impl<'a> Default for Utf8Args {
-    #[inline]
-    fn default() -> Self {
-        Utf8Args {}
-    }
-}
-pub struct Utf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> Utf8Builder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8Builder<'a, 'b> {
-        let start = _fbb.start_table();
-        Utf8Builder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Utf8<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Utf8<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Utf8");
-        ds.finish()
-    }
-}
-pub enum BinaryOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Opaque binary data
-pub struct Binary<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Binary<'a> {
-    type Inner = Binary<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Binary<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Binary { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args BinaryArgs,
-    ) -> flatbuffers::WIPOffset<Binary<'bldr>> {
-        let mut builder = BinaryBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Binary<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct BinaryArgs {}
-impl<'a> Default for BinaryArgs {
-    #[inline]
-    fn default() -> Self {
-        BinaryArgs {}
-    }
-}
-pub struct BinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> BinaryBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> BinaryBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        BinaryBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Binary<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Binary<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Binary");
-        ds.finish()
-    }
-}
-pub enum LargeUtf8Offset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Same as Utf8, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-pub struct LargeUtf8<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for LargeUtf8<'a> {
-    type Inner = LargeUtf8<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> LargeUtf8<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        LargeUtf8 { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args LargeUtf8Args,
-    ) -> flatbuffers::WIPOffset<LargeUtf8<'bldr>> {
-        let mut builder = LargeUtf8Builder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for LargeUtf8<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct LargeUtf8Args {}
-impl<'a> Default for LargeUtf8Args {
-    #[inline]
-    fn default() -> Self {
-        LargeUtf8Args {}
-    }
-}
-pub struct LargeUtf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> LargeUtf8Builder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeUtf8Builder<'a, 'b> {
-        let start = _fbb.start_table();
-        LargeUtf8Builder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<LargeUtf8<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for LargeUtf8<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("LargeUtf8");
-        ds.finish()
-    }
-}
-pub enum LargeBinaryOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Same as Binary, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-pub struct LargeBinary<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for LargeBinary<'a> {
-    type Inner = LargeBinary<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> LargeBinary<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        LargeBinary { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args LargeBinaryArgs,
-    ) -> flatbuffers::WIPOffset<LargeBinary<'bldr>> {
-        let mut builder = LargeBinaryBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for LargeBinary<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct LargeBinaryArgs {}
-impl<'a> Default for LargeBinaryArgs {
-    #[inline]
-    fn default() -> Self {
-        LargeBinaryArgs {}
-    }
-}
-pub struct LargeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> LargeBinaryBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeBinaryBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        LargeBinaryBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<LargeBinary<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for LargeBinary<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("LargeBinary");
-        ds.finish()
-    }
-}
-pub enum FixedSizeBinaryOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct FixedSizeBinary<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for FixedSizeBinary<'a> {
-    type Inner = FixedSizeBinary<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> FixedSizeBinary<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        FixedSizeBinary { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FixedSizeBinaryArgs,
-    ) -> flatbuffers::WIPOffset<FixedSizeBinary<'bldr>> {
-        let mut builder = FixedSizeBinaryBuilder::new(_fbb);
-        builder.add_byteWidth(args.byteWidth);
-        builder.finish()
-    }
-
-    pub const VT_BYTEWIDTH: flatbuffers::VOffsetT = 4;
-
-    /// Number of bytes per value
-    #[inline]
-    pub fn byteWidth(&self) -> i32 {
-        self._tab
-            .get::<i32>(FixedSizeBinary::VT_BYTEWIDTH, Some(0))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for FixedSizeBinary<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"byteWidth", Self::VT_BYTEWIDTH, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FixedSizeBinaryArgs {
-    pub byteWidth: i32,
-}
-impl<'a> Default for FixedSizeBinaryArgs {
-    #[inline]
-    fn default() -> Self {
-        FixedSizeBinaryArgs { byteWidth: 0 }
-    }
-}
-pub struct FixedSizeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FixedSizeBinaryBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_byteWidth(&mut self, byteWidth: i32) {
-        self.fbb_
-            .push_slot::<i32>(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FixedSizeBinaryBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FixedSizeBinaryBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<FixedSizeBinary<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for FixedSizeBinary<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("FixedSizeBinary");
-        ds.field("byteWidth", &self.byteWidth());
-        ds.finish()
-    }
-}
-pub enum BoolOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Bool<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Bool<'a> {
-    type Inner = Bool<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Bool<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Bool { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args BoolArgs,
-    ) -> flatbuffers::WIPOffset<Bool<'bldr>> {
-        let mut builder = BoolBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Bool<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct BoolArgs {}
-impl<'a> Default for BoolArgs {
-    #[inline]
-    fn default() -> Self {
-        BoolArgs {}
-    }
-}
-pub struct BoolBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> BoolBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BoolBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        BoolBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Bool<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Bool<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Bool");
-        ds.finish()
-    }
-}
-pub enum DecimalOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Exact decimal value represented as an integer value in two's
-/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
-/// are used. The representation uses the endianness indicated
-/// in the Schema.
-pub struct Decimal<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Decimal<'a> {
-    type Inner = Decimal<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Decimal<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Decimal { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DecimalArgs,
-    ) -> flatbuffers::WIPOffset<Decimal<'bldr>> {
-        let mut builder = DecimalBuilder::new(_fbb);
-        builder.add_bitWidth(args.bitWidth);
-        builder.add_scale(args.scale);
-        builder.add_precision(args.precision);
-        builder.finish()
-    }
-
-    pub const VT_PRECISION: flatbuffers::VOffsetT = 4;
-    pub const VT_SCALE: flatbuffers::VOffsetT = 6;
-    pub const VT_BITWIDTH: flatbuffers::VOffsetT = 8;
-
-    /// Total number of decimal digits
-    #[inline]
-    pub fn precision(&self) -> i32 {
-        self._tab
-            .get::<i32>(Decimal::VT_PRECISION, Some(0))
-            .unwrap()
-    }
-    /// Number of digits after the decimal point "."
-    #[inline]
-    pub fn scale(&self) -> i32 {
-        self._tab.get::<i32>(Decimal::VT_SCALE, Some(0)).unwrap()
-    }
-    /// Number of bits per value. The only accepted widths are 128 and 256.
-    /// We use bitWidth for consistency with Int::bitWidth.
-    #[inline]
-    pub fn bitWidth(&self) -> i32 {
-        self._tab
-            .get::<i32>(Decimal::VT_BITWIDTH, Some(128))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Decimal<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"precision", Self::VT_PRECISION, false)?
-            .visit_field::<i32>(&"scale", Self::VT_SCALE, false)?
-            .visit_field::<i32>(&"bitWidth", Self::VT_BITWIDTH, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DecimalArgs {
-    pub precision: i32,
-    pub scale: i32,
-    pub bitWidth: i32,
-}
-impl<'a> Default for DecimalArgs {
-    #[inline]
-    fn default() -> Self {
-        DecimalArgs {
-            precision: 0,
-            scale: 0,
-            bitWidth: 128,
-        }
-    }
-}
-pub struct DecimalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_precision(&mut self, precision: i32) {
-        self.fbb_
-            .push_slot::<i32>(Decimal::VT_PRECISION, precision, 0);
-    }
-    #[inline]
-    pub fn add_scale(&mut self, scale: i32) {
-        self.fbb_.push_slot::<i32>(Decimal::VT_SCALE, scale, 0);
-    }
-    #[inline]
-    pub fn add_bitWidth(&mut self, bitWidth: i32) {
-        self.fbb_
-            .push_slot::<i32>(Decimal::VT_BITWIDTH, bitWidth, 128);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DecimalBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DecimalBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Decimal<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Decimal<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Decimal");
-        ds.field("precision", &self.precision());
-        ds.field("scale", &self.scale());
-        ds.field("bitWidth", &self.bitWidth());
-        ds.finish()
-    }
-}
-pub enum DateOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
-/// epoch (1970-01-01), stored in either of two units:
-///
-/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
-///   leap seconds), where the values are evenly divisible by 86400000
-/// * Days (32 bits) since the UNIX epoch
-pub struct Date<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Date<'a> {
-    type Inner = Date<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Date<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Date { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DateArgs,
-    ) -> flatbuffers::WIPOffset<Date<'bldr>> {
-        let mut builder = DateBuilder::new(_fbb);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn unit(&self) -> DateUnit {
-        self._tab
-            .get::<DateUnit>(Date::VT_UNIT, Some(DateUnit::MILLISECOND))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Date<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<DateUnit>(&"unit", Self::VT_UNIT, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DateArgs {
-    pub unit: DateUnit,
-}
-impl<'a> Default for DateArgs {
-    #[inline]
-    fn default() -> Self {
-        DateArgs {
-            unit: DateUnit::MILLISECOND,
-        }
-    }
-}
-pub struct DateBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DateBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: DateUnit) {
-        self.fbb_
-            .push_slot::<DateUnit>(Date::VT_UNIT, unit, DateUnit::MILLISECOND);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DateBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DateBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Date<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Date<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Date");
-        ds.field("unit", &self.unit());
-        ds.finish()
-    }
-}
-pub enum TimeOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Time type. The physical storage type depends on the unit
-/// - SECOND and MILLISECOND: 32 bits
-/// - MICROSECOND and NANOSECOND: 64 bits
-pub struct Time<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Time<'a> {
-    type Inner = Time<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Time<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Time { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TimeArgs,
-    ) -> flatbuffers::WIPOffset<Time<'bldr>> {
-        let mut builder = TimeBuilder::new(_fbb);
-        builder.add_bitWidth(args.bitWidth);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-    pub const VT_BITWIDTH: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn unit(&self) -> TimeUnit {
-        self._tab
-            .get::<TimeUnit>(Time::VT_UNIT, Some(TimeUnit::MILLISECOND))
-            .unwrap()
-    }
-    #[inline]
-    pub fn bitWidth(&self) -> i32 {
-        self._tab.get::<i32>(Time::VT_BITWIDTH, Some(32)).unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Time<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<TimeUnit>(&"unit", Self::VT_UNIT, false)?
-            .visit_field::<i32>(&"bitWidth", Self::VT_BITWIDTH, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct TimeArgs {
-    pub unit: TimeUnit,
-    pub bitWidth: i32,
-}
-impl<'a> Default for TimeArgs {
-    #[inline]
-    fn default() -> Self {
-        TimeArgs {
-            unit: TimeUnit::MILLISECOND,
-            bitWidth: 32,
-        }
-    }
-}
-pub struct TimeBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TimeBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: TimeUnit) {
-        self.fbb_
-            .push_slot::<TimeUnit>(Time::VT_UNIT, unit, TimeUnit::MILLISECOND);
-    }
-    #[inline]
-    pub fn add_bitWidth(&mut self, bitWidth: i32) {
-        self.fbb_.push_slot::<i32>(Time::VT_BITWIDTH, bitWidth, 32);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TimeBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TimeBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Time<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Time<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Time");
-        ds.field("unit", &self.unit());
-        ds.field("bitWidth", &self.bitWidth());
-        ds.finish()
-    }
-}
-pub enum TimestampOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
-/// leap seconds, as a 64-bit integer. Note that UNIX time does not include
-/// leap seconds.
-///
-/// The Timestamp metadata supports both "time zone naive" and "time zone
-/// aware" timestamps. Read about the timezone attribute for more detail
-pub struct Timestamp<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Timestamp<'a> {
-    type Inner = Timestamp<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Timestamp<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Timestamp { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TimestampArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Timestamp<'bldr>> {
-        let mut builder = TimestampBuilder::new(_fbb);
-        if let Some(x) = args.timezone {
-            builder.add_timezone(x);
-        }
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-    pub const VT_TIMEZONE: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn unit(&self) -> TimeUnit {
-        self._tab
-            .get::<TimeUnit>(Timestamp::VT_UNIT, Some(TimeUnit::SECOND))
-            .unwrap()
-    }
-    /// The time zone is a string indicating the name of a time zone, one of:
-    ///
-    /// * As used in the Olson time zone database (the "tz database" or
-    ///   "tzdata"), such as "America/New_York"
-    /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-    ///
-    /// Whether a timezone string is present indicates different semantics about
-    /// the data:
-    ///
-    /// * If the time zone is null or equal to an empty string, the data is "time
-    ///   zone naive" and shall be displayed *as is* to the user, not localized
-    ///   to the locale of the user. This data can be though of as UTC but
-    ///   without having "UTC" as the time zone, it is not considered to be
-    ///   localized to any time zone
-    ///
-    /// * If the time zone is set to a valid value, values can be displayed as
-    ///   "localized" to that time zone, even though the underlying 64-bit
-    ///   integers are identical to the same data stored in UTC. Converting
-    ///   between time zones is a metadata-only operation and does not change the
-    ///   underlying values
-    #[inline]
-    pub fn timezone(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(Timestamp::VT_TIMEZONE, None)
-    }
-}
-
-impl flatbuffers::Verifiable for Timestamp<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<TimeUnit>(&"unit", Self::VT_UNIT, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"timezone",
-                Self::VT_TIMEZONE,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct TimestampArgs<'a> {
-    pub unit: TimeUnit,
-    pub timezone: Option<flatbuffers::WIPOffset<&'a str>>,
-}
-impl<'a> Default for TimestampArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        TimestampArgs {
-            unit: TimeUnit::SECOND,
-            timezone: None,
-        }
-    }
-}
-pub struct TimestampBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TimestampBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: TimeUnit) {
-        self.fbb_
-            .push_slot::<TimeUnit>(Timestamp::VT_UNIT, unit, TimeUnit::SECOND);
-    }
-    #[inline]
-    pub fn add_timezone(&mut self, timezone: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Timestamp::VT_TIMEZONE,
-            timezone,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TimestampBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TimestampBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Timestamp<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Timestamp<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Timestamp");
-        ds.field("unit", &self.unit());
-        ds.field("timezone", &self.timezone());
-        ds.finish()
-    }
-}
-pub enum IntervalOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Interval<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Interval<'a> {
-    type Inner = Interval<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Interval<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Interval { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args IntervalArgs,
-    ) -> flatbuffers::WIPOffset<Interval<'bldr>> {
-        let mut builder = IntervalBuilder::new(_fbb);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn unit(&self) -> IntervalUnit {
-        self._tab
-            .get::<IntervalUnit>(Interval::VT_UNIT, Some(IntervalUnit::YEAR_MONTH))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Interval<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<IntervalUnit>(&"unit", Self::VT_UNIT, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct IntervalArgs {
-    pub unit: IntervalUnit,
-}
-impl<'a> Default for IntervalArgs {
-    #[inline]
-    fn default() -> Self {
-        IntervalArgs {
-            unit: IntervalUnit::YEAR_MONTH,
-        }
-    }
-}
-pub struct IntervalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> IntervalBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: IntervalUnit) {
-        self.fbb_.push_slot::<IntervalUnit>(
-            Interval::VT_UNIT,
-            unit,
-            IntervalUnit::YEAR_MONTH,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> IntervalBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        IntervalBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Interval<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Interval<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Interval");
-        ds.field("unit", &self.unit());
-        ds.finish()
-    }
-}
-pub enum DurationOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Duration<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Duration<'a> {
-    type Inner = Duration<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Duration<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Duration { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DurationArgs,
-    ) -> flatbuffers::WIPOffset<Duration<'bldr>> {
-        let mut builder = DurationBuilder::new(_fbb);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn unit(&self) -> TimeUnit {
-        self._tab
-            .get::<TimeUnit>(Duration::VT_UNIT, Some(TimeUnit::MILLISECOND))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Duration<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<TimeUnit>(&"unit", Self::VT_UNIT, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DurationArgs {
-    pub unit: TimeUnit,
-}
-impl<'a> Default for DurationArgs {
-    #[inline]
-    fn default() -> Self {
-        DurationArgs {
-            unit: TimeUnit::MILLISECOND,
-        }
-    }
-}
-pub struct DurationBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DurationBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: TimeUnit) {
-        self.fbb_
-            .push_slot::<TimeUnit>(Duration::VT_UNIT, unit, TimeUnit::MILLISECOND);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DurationBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DurationBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Duration<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Duration<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Duration");
-        ds.field("unit", &self.unit());
-        ds.finish()
-    }
-}
-pub enum KeyValueOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// user defined key value pairs to add custom metadata to arrow
-/// key namespacing is the responsibility of the user
-pub struct KeyValue<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for KeyValue<'a> {
-    type Inner = KeyValue<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> KeyValue<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        KeyValue { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args KeyValueArgs<'args>,
-    ) -> flatbuffers::WIPOffset<KeyValue<'bldr>> {
-        let mut builder = KeyValueBuilder::new(_fbb);
-        if let Some(x) = args.value {
-            builder.add_value(x);
-        }
-        if let Some(x) = args.key {
-            builder.add_key(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_KEY: flatbuffers::VOffsetT = 4;
-    pub const VT_VALUE: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn key(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(KeyValue::VT_KEY, None)
-    }
-    #[inline]
-    pub fn value(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(KeyValue::VT_VALUE, None)
-    }
-}
-
-impl flatbuffers::Verifiable for KeyValue<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"key",
-                Self::VT_KEY,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"value",
-                Self::VT_VALUE,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct KeyValueArgs<'a> {
-    pub key: Option<flatbuffers::WIPOffset<&'a str>>,
-    pub value: Option<flatbuffers::WIPOffset<&'a str>>,
-}
-impl<'a> Default for KeyValueArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        KeyValueArgs {
-            key: None,
-            value: None,
-        }
-    }
-}
-pub struct KeyValueBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> KeyValueBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_key(&mut self, key: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(KeyValue::VT_KEY, key);
-    }
-    #[inline]
-    pub fn add_value(&mut self, value: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(KeyValue::VT_VALUE, value);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> KeyValueBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        KeyValueBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<KeyValue<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for KeyValue<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("KeyValue");
-        ds.field("key", &self.key());
-        ds.field("value", &self.value());
-        ds.finish()
-    }
-}
-pub enum DictionaryEncodingOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct DictionaryEncoding<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for DictionaryEncoding<'a> {
-    type Inner = DictionaryEncoding<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> DictionaryEncoding<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        DictionaryEncoding { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DictionaryEncodingArgs<'args>,
-    ) -> flatbuffers::WIPOffset<DictionaryEncoding<'bldr>> {
-        let mut builder = DictionaryEncodingBuilder::new(_fbb);
-        builder.add_id(args.id);
-        if let Some(x) = args.indexType {
-            builder.add_indexType(x);
-        }
-        builder.add_dictionaryKind(args.dictionaryKind);
-        builder.add_isOrdered(args.isOrdered);
-        builder.finish()
-    }
-
-    pub const VT_ID: flatbuffers::VOffsetT = 4;
-    pub const VT_INDEXTYPE: flatbuffers::VOffsetT = 6;
-    pub const VT_ISORDERED: flatbuffers::VOffsetT = 8;
-    pub const VT_DICTIONARYKIND: flatbuffers::VOffsetT = 10;
-
-    /// The known dictionary id in the application where this data is used. In
-    /// the file or streaming formats, the dictionary ids are found in the
-    /// DictionaryBatch messages
-    #[inline]
-    pub fn id(&self) -> i64 {
-        self._tab
-            .get::<i64>(DictionaryEncoding::VT_ID, Some(0))
-            .unwrap()
-    }
-    /// The dictionary indices are constrained to be non-negative integers. If
-    /// this field is null, the indices must be signed int32. To maximize
-    /// cross-language compatibility and performance, implementations are
-    /// recommended to prefer signed integer types over unsigned integer types
-    /// and to avoid uint64 indices unless they are required by an application.
-    #[inline]
-    pub fn indexType(&self) -> Option<Int<'a>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<Int>>(
-            DictionaryEncoding::VT_INDEXTYPE,
-            None,
-        )
-    }
-    /// By default, dictionaries are not ordered, or the order does not have
-    /// semantic meaning. In some statistical, applications, dictionary-encoding
-    /// is used to represent ordered categorical data, and we provide a way to
-    /// preserve that metadata here
-    #[inline]
-    pub fn isOrdered(&self) -> bool {
-        self._tab
-            .get::<bool>(DictionaryEncoding::VT_ISORDERED, Some(false))
-            .unwrap()
-    }
-    #[inline]
-    pub fn dictionaryKind(&self) -> DictionaryKind {
-        self._tab
-            .get::<DictionaryKind>(
-                DictionaryEncoding::VT_DICTIONARYKIND,
-                Some(DictionaryKind::DenseArray),
-            )
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for DictionaryEncoding<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i64>(&"id", Self::VT_ID, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indexType",
-                Self::VT_INDEXTYPE,
-                false,
-            )?
-            .visit_field::<bool>(&"isOrdered", Self::VT_ISORDERED, false)?
-            .visit_field::<DictionaryKind>(
-                &"dictionaryKind",
-                Self::VT_DICTIONARYKIND,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DictionaryEncodingArgs<'a> {
-    pub id: i64,
-    pub indexType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub isOrdered: bool,
-    pub dictionaryKind: DictionaryKind,
-}
-impl<'a> Default for DictionaryEncodingArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        DictionaryEncodingArgs {
-            id: 0,
-            indexType: None,
-            isOrdered: false,
-            dictionaryKind: DictionaryKind::DenseArray,
-        }
-    }
-}
-pub struct DictionaryEncodingBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DictionaryEncodingBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_id(&mut self, id: i64) {
-        self.fbb_.push_slot::<i64>(DictionaryEncoding::VT_ID, id, 0);
-    }
-    #[inline]
-    pub fn add_indexType(&mut self, indexType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            DictionaryEncoding::VT_INDEXTYPE,
-            indexType,
-        );
-    }
-    #[inline]
-    pub fn add_isOrdered(&mut self, isOrdered: bool) {
-        self.fbb_
-            .push_slot::<bool>(DictionaryEncoding::VT_ISORDERED, isOrdered, false);
-    }
-    #[inline]
-    pub fn add_dictionaryKind(&mut self, dictionaryKind: DictionaryKind) {
-        self.fbb_.push_slot::<DictionaryKind>(
-            DictionaryEncoding::VT_DICTIONARYKIND,
-            dictionaryKind,
-            DictionaryKind::DenseArray,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DictionaryEncodingBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DictionaryEncodingBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<DictionaryEncoding<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for DictionaryEncoding<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("DictionaryEncoding");
-        ds.field("id", &self.id());
-        ds.field("indexType", &self.indexType());
-        ds.field("isOrdered", &self.isOrdered());
-        ds.field("dictionaryKind", &self.dictionaryKind());
-        ds.finish()
-    }
-}
-pub enum FieldOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// A field represents a named column in a record / row batch or child of a
-/// nested type.
-pub struct Field<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Field<'a> {
-    type Inner = Field<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Field<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Field { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FieldArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Field<'bldr>> {
-        let mut builder = FieldBuilder::new(_fbb);
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.children {
-            builder.add_children(x);
-        }
-        if let Some(x) = args.dictionary {
-            builder.add_dictionary(x);
-        }
-        if let Some(x) = args.type_ {
-            builder.add_type_(x);
-        }
-        if let Some(x) = args.name {
-            builder.add_name(x);
-        }
-        builder.add_type_type(args.type_type);
-        builder.add_nullable(args.nullable);
-        builder.finish()
-    }
-
-    pub const VT_NAME: flatbuffers::VOffsetT = 4;
-    pub const VT_NULLABLE: flatbuffers::VOffsetT = 6;
-    pub const VT_TYPE_TYPE: flatbuffers::VOffsetT = 8;
-    pub const VT_TYPE_: flatbuffers::VOffsetT = 10;
-    pub const VT_DICTIONARY: flatbuffers::VOffsetT = 12;
-    pub const VT_CHILDREN: flatbuffers::VOffsetT = 14;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 16;
-
-    /// Name is not required, in i.e. a List
-    #[inline]
-    pub fn name(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(Field::VT_NAME, None)
-    }
-    /// Whether or not this field can contain nulls. Should be true in general.
-    #[inline]
-    pub fn nullable(&self) -> bool {
-        self._tab
-            .get::<bool>(Field::VT_NULLABLE, Some(false))
-            .unwrap()
-    }
-    #[inline]
-    pub fn type_type(&self) -> Type {
-        self._tab
-            .get::<Type>(Field::VT_TYPE_TYPE, Some(Type::NONE))
-            .unwrap()
-    }
-    /// This is the type of the decoded value if the field is dictionary encoded.
-    #[inline]
-    pub fn type_(&self) -> Option<flatbuffers::Table<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                Field::VT_TYPE_,
-                None,
-            )
-    }
-    /// Present only if the field is dictionary encoded.
-    #[inline]
-    pub fn dictionary(&self) -> Option<DictionaryEncoding<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>(
-                Field::VT_DICTIONARY,
-                None,
-            )
-    }
-    /// children apply only to nested data types like Struct, List and Union. For
-    /// primitive types children will have length 0.
-    #[inline]
-    pub fn children(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field>>,
-        >>(Field::VT_CHILDREN, None)
-    }
-    /// User-defined metadata
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Field::VT_CUSTOM_METADATA, None)
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_null(&self) -> Option<Null<'a>> {
-        if self.type_type() == Type::Null {
-            self.type_().map(Null::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_int(&self) -> Option<Int<'a>> {
-        if self.type_type() == Type::Int {
-            self.type_().map(Int::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_floating_point(&self) -> Option<FloatingPoint<'a>> {
-        if self.type_type() == Type::FloatingPoint {
-            self.type_().map(FloatingPoint::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_binary(&self) -> Option<Binary<'a>> {
-        if self.type_type() == Type::Binary {
-            self.type_().map(Binary::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_utf_8(&self) -> Option<Utf8<'a>> {
-        if self.type_type() == Type::Utf8 {
-            self.type_().map(Utf8::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_bool(&self) -> Option<Bool<'a>> {
-        if self.type_type() == Type::Bool {
-            self.type_().map(Bool::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_decimal(&self) -> Option<Decimal<'a>> {
-        if self.type_type() == Type::Decimal {
-            self.type_().map(Decimal::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_date(&self) -> Option<Date<'a>> {
-        if self.type_type() == Type::Date {
-            self.type_().map(Date::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_time(&self) -> Option<Time<'a>> {
-        if self.type_type() == Type::Time {
-            self.type_().map(Time::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_timestamp(&self) -> Option<Timestamp<'a>> {
-        if self.type_type() == Type::Timestamp {
-            self.type_().map(Timestamp::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_interval(&self) -> Option<Interval<'a>> {
-        if self.type_type() == Type::Interval {
-            self.type_().map(Interval::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_list(&self) -> Option<List<'a>> {
-        if self.type_type() == Type::List {
-            self.type_().map(List::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_struct_(&self) -> Option<Struct_<'a>> {
-        if self.type_type() == Type::Struct_ {
-            self.type_().map(Struct_::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_union(&self) -> Option<Union<'a>> {
-        if self.type_type() == Type::Union {
-            self.type_().map(Union::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_binary(&self) -> Option<FixedSizeBinary<'a>> {
-        if self.type_type() == Type::FixedSizeBinary {
-            self.type_().map(FixedSizeBinary::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_list(&self) -> Option<FixedSizeList<'a>> {
-        if self.type_type() == Type::FixedSizeList {
-            self.type_().map(FixedSizeList::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_map(&self) -> Option<Map<'a>> {
-        if self.type_type() == Type::Map {
-            self.type_().map(Map::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_duration(&self) -> Option<Duration<'a>> {
-        if self.type_type() == Type::Duration {
-            self.type_().map(Duration::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_binary(&self) -> Option<LargeBinary<'a>> {
-        if self.type_type() == Type::LargeBinary {
-            self.type_().map(LargeBinary::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_utf_8(&self) -> Option<LargeUtf8<'a>> {
-        if self.type_type() == Type::LargeUtf8 {
-            self.type_().map(LargeUtf8::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_list(&self) -> Option<LargeList<'a>> {
-        if self.type_type() == Type::LargeList {
-            self.type_().map(LargeList::init_from_table)
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for Field<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<flatbuffers::ForwardsUOffset<&str>>(&"name", Self::VT_NAME, false)?
-     .visit_field::<bool>(&"nullable", Self::VT_NULLABLE, false)?
-     .visit_union::<Type, _>(&"type_type", Self::VT_TYPE_TYPE, &"type_", Self::VT_TYPE_, false, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>(&"dictionary", Self::VT_DICTIONARY, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>>>(&"children", Self::VT_CHILDREN, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct FieldArgs<'a> {
-    pub name: Option<flatbuffers::WIPOffset<&'a str>>,
-    pub nullable: bool,
-    pub type_type: Type,
-    pub type_: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub dictionary: Option<flatbuffers::WIPOffset<DictionaryEncoding<'a>>>,
-    pub children: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>,
-        >,
-    >,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-}
-impl<'a> Default for FieldArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        FieldArgs {
-            name: None,
-            nullable: false,
-            type_type: Type::NONE,
-            type_: None,
-            dictionary: None,
-            children: None,
-            custom_metadata: None,
-        }
-    }
-}
-pub struct FieldBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FieldBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Field::VT_NAME, name);
-    }
-    #[inline]
-    pub fn add_nullable(&mut self, nullable: bool) {
-        self.fbb_
-            .push_slot::<bool>(Field::VT_NULLABLE, nullable, false);
-    }
-    #[inline]
-    pub fn add_type_type(&mut self, type_type: Type) {
-        self.fbb_
-            .push_slot::<Type>(Field::VT_TYPE_TYPE, type_type, Type::NONE);
-    }
-    #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Field::VT_TYPE_, type_);
-    }
-    #[inline]
-    pub fn add_dictionary(
-        &mut self,
-        dictionary: flatbuffers::WIPOffset<DictionaryEncoding<'b>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<DictionaryEncoding>>(
-                Field::VT_DICTIONARY,
-                dictionary,
-            );
-    }
-    #[inline]
-    pub fn add_children(
-        &mut self,
-        children: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Field<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Field::VT_CHILDREN, children);
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Field::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FieldBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FieldBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Field<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Field<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Field");
-        ds.field("name", &self.name());
-        ds.field("nullable", &self.nullable());
-        ds.field("type_type", &self.type_type());
-        match self.type_type() {
-            Type::Null => {
-                if let Some(x) = self.type_as_null() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Int => {
-                if let Some(x) = self.type_as_int() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FloatingPoint => {
-                if let Some(x) = self.type_as_floating_point() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Binary => {
-                if let Some(x) = self.type_as_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Utf8 => {
-                if let Some(x) = self.type_as_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Bool => {
-                if let Some(x) = self.type_as_bool() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Decimal => {
-                if let Some(x) = self.type_as_decimal() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Date => {
-                if let Some(x) = self.type_as_date() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Time => {
-                if let Some(x) = self.type_as_time() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Timestamp => {
-                if let Some(x) = self.type_as_timestamp() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Interval => {
-                if let Some(x) = self.type_as_interval() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::List => {
-                if let Some(x) = self.type_as_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Struct_ => {
-                if let Some(x) = self.type_as_struct_() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Union => {
-                if let Some(x) = self.type_as_union() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeBinary => {
-                if let Some(x) = self.type_as_fixed_size_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeList => {
-                if let Some(x) = self.type_as_fixed_size_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Map => {
-                if let Some(x) = self.type_as_map() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Duration => {
-                if let Some(x) = self.type_as_duration() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeBinary => {
-                if let Some(x) = self.type_as_large_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeUtf8 => {
-                if let Some(x) = self.type_as_large_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeList => {
-                if let Some(x) = self.type_as_large_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("type_", &x)
-            }
-        };
-        ds.field("dictionary", &self.dictionary());
-        ds.field("children", &self.children());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.finish()
-    }
-}
-pub enum SchemaOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// A Schema describes the columns in a row batch
-pub struct Schema<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Schema<'a> {
-    type Inner = Schema<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Schema<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Schema { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SchemaArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Schema<'bldr>> {
-        let mut builder = SchemaBuilder::new(_fbb);
-        if let Some(x) = args.features {
-            builder.add_features(x);
-        }
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.fields {
-            builder.add_fields(x);
-        }
-        builder.add_endianness(args.endianness);
-        builder.finish()
-    }
-
-    pub const VT_ENDIANNESS: flatbuffers::VOffsetT = 4;
-    pub const VT_FIELDS: flatbuffers::VOffsetT = 6;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 8;
-    pub const VT_FEATURES: flatbuffers::VOffsetT = 10;
-
-    /// endianness of the buffer
-    /// it is Little Endian by default
-    /// if endianness doesn't match the underlying system then the vectors need to be converted
-    #[inline]
-    pub fn endianness(&self) -> Endianness {
-        self._tab
-            .get::<Endianness>(Schema::VT_ENDIANNESS, Some(Endianness::Little))
-            .unwrap()
-    }
-    #[inline]
-    pub fn fields(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field>>,
-        >>(Schema::VT_FIELDS, None)
-    }
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Schema::VT_CUSTOM_METADATA, None)
-    }
-    /// Features used in the stream/file.
-    #[inline]
-    pub fn features(&self) -> Option<flatbuffers::Vector<'a, Feature>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Feature>>>(
-                Schema::VT_FEATURES,
-                None,
-            )
-    }
-}
-
-impl flatbuffers::Verifiable for Schema<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<Endianness>(&"endianness", Self::VT_ENDIANNESS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>>>(&"fields", Self::VT_FIELDS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Feature>>>(&"features", Self::VT_FEATURES, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct SchemaArgs<'a> {
-    pub endianness: Endianness,
-    pub fields: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>,
-        >,
-    >,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-    pub features: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Feature>>>,
-}
-impl<'a> Default for SchemaArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SchemaArgs {
-            endianness: Endianness::Little,
-            fields: None,
-            custom_metadata: None,
-            features: None,
-        }
-    }
-}
-pub struct SchemaBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_endianness(&mut self, endianness: Endianness) {
-        self.fbb_.push_slot::<Endianness>(
-            Schema::VT_ENDIANNESS,
-            endianness,
-            Endianness::Little,
-        );
-    }
-    #[inline]
-    pub fn add_fields(
-        &mut self,
-        fields: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Field<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Schema::VT_FIELDS, fields);
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Schema::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn add_features(
-        &mut self,
-        features: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Feature>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Schema::VT_FEATURES, features);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SchemaBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SchemaBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Schema<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Schema<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Schema");
-        ds.field("endianness", &self.endianness());
-        ds.field("fields", &self.fields());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.field("features", &self.features());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_schema<'a>(buf: &'a [u8]) -> Schema<'a> {
-    unsafe { flatbuffers::root_unchecked::<Schema<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_schema<'a>(buf: &'a [u8]) -> Schema<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Schema<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Schema`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_schema_unchecked`.
-pub fn root_as_schema(buf: &[u8]) -> Result<Schema, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Schema>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Schema` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_schema_unchecked`.
-pub fn size_prefixed_root_as_schema(
-    buf: &[u8],
-) -> Result<Schema, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Schema>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Schema` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_schema_unchecked`.
-pub fn root_as_schema_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Schema<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Schema<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Schema` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_schema_unchecked`.
-pub fn size_prefixed_root_as_schema_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Schema<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Schema<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Schema and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Schema`.
-pub unsafe fn root_as_schema_unchecked(buf: &[u8]) -> Schema {
-    flatbuffers::root_unchecked::<Schema>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Schema and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Schema`.
-pub unsafe fn size_prefixed_root_as_schema_unchecked(buf: &[u8]) -> Schema {
-    flatbuffers::size_prefixed_root_unchecked::<Schema>(buf)
-}
-#[inline]
-pub fn finish_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Schema<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Schema<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
deleted file mode 100644
index 5d12d4e3627..00000000000
--- a/rust/arrow/src/ipc/gen/SparseTensor.rs
+++ /dev/null
@@ -1,1902 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use crate::ipc::gen::Tensor::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_SPARSE_MATRIX_COMPRESSED_AXIS: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_SPARSE_MATRIX_COMPRESSED_AXIS: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_SPARSE_MATRIX_COMPRESSED_AXIS: [SparseMatrixCompressedAxis; 2] = [
-    SparseMatrixCompressedAxis::Row,
-    SparseMatrixCompressedAxis::Column,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct SparseMatrixCompressedAxis(pub i16);
-#[allow(non_upper_case_globals)]
-impl SparseMatrixCompressedAxis {
-    pub const Row: Self = Self(0);
-    pub const Column: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::Row, Self::Column];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::Row => Some("Row"),
-            Self::Column => Some("Column"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for SparseMatrixCompressedAxis {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for SparseMatrixCompressedAxis {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for SparseMatrixCompressedAxis {
-    type Output = SparseMatrixCompressedAxis;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for SparseMatrixCompressedAxis {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for SparseMatrixCompressedAxis {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for SparseMatrixCompressedAxis {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_SPARSE_TENSOR_INDEX: u8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_SPARSE_TENSOR_INDEX: u8 = 3;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_SPARSE_TENSOR_INDEX: [SparseTensorIndex; 4] = [
-    SparseTensorIndex::NONE,
-    SparseTensorIndex::SparseTensorIndexCOO,
-    SparseTensorIndex::SparseMatrixIndexCSX,
-    SparseTensorIndex::SparseTensorIndexCSF,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct SparseTensorIndex(pub u8);
-#[allow(non_upper_case_globals)]
-impl SparseTensorIndex {
-    pub const NONE: Self = Self(0);
-    pub const SparseTensorIndexCOO: Self = Self(1);
-    pub const SparseMatrixIndexCSX: Self = Self(2);
-    pub const SparseTensorIndexCSF: Self = Self(3);
-
-    pub const ENUM_MIN: u8 = 0;
-    pub const ENUM_MAX: u8 = 3;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::NONE,
-        Self::SparseTensorIndexCOO,
-        Self::SparseMatrixIndexCSX,
-        Self::SparseTensorIndexCSF,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::NONE => Some("NONE"),
-            Self::SparseTensorIndexCOO => Some("SparseTensorIndexCOO"),
-            Self::SparseMatrixIndexCSX => Some("SparseMatrixIndexCSX"),
-            Self::SparseTensorIndexCSF => Some("SparseTensorIndexCSF"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for SparseTensorIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-pub struct SparseTensorIndexUnionTableOffset {}
-impl<'a> flatbuffers::Follow<'a> for SparseTensorIndex {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for SparseTensorIndex {
-    type Output = SparseTensorIndex;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for SparseTensorIndex {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = u8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = u8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for SparseTensorIndex {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        u8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for SparseTensorIndex {}
-pub enum SparseTensorIndexCOOOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// EXPERIMENTAL: Data structures for sparse tensors
-/// Coordinate (COO) format of sparse tensor index.
-///
-/// COO's index list are represented as a NxM matrix,
-/// where N is the number of non-zero values,
-/// and M is the number of dimensions of a sparse tensor.
-///
-/// indicesBuffer stores the location and size of the data of this indices
-/// matrix.  The value type and the stride of the indices matrix is
-/// specified in indicesType and indicesStrides fields.
-///
-/// For example, let X be a 2x3x4x5 tensor, and it has the following
-/// 6 non-zero values:
-/// ```text
-///   X[0, 1, 2, 0] := 1
-///   X[1, 1, 2, 3] := 2
-///   X[0, 2, 1, 0] := 3
-///   X[0, 1, 3, 0] := 4
-///   X[0, 1, 2, 1] := 5
-///   X[1, 2, 0, 4] := 6
-/// ```
-/// In COO format, the index matrix of X is the following 4x6 matrix:
-/// ```text
-///   [[0, 0, 0, 0, 1, 1],
-///    [1, 1, 1, 2, 1, 2],
-///    [2, 2, 3, 1, 2, 0],
-///    [0, 1, 0, 0, 3, 4]]
-/// ```
-/// When isCanonical is true, the indices is sorted in lexicographical order
-/// (row-major order), and it does not have duplicated entries.  Otherwise,
-/// the indices may not be sorted, or may have duplicated entries.
-pub struct SparseTensorIndexCOO<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseTensorIndexCOO<'a> {
-    type Inner = SparseTensorIndexCOO<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseTensorIndexCOO<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseTensorIndexCOO { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseTensorIndexCOOArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseTensorIndexCOO<'bldr>> {
-        let mut builder = SparseTensorIndexCOOBuilder::new(_fbb);
-        if let Some(x) = args.indicesBuffer {
-            builder.add_indicesBuffer(x);
-        }
-        if let Some(x) = args.indicesStrides {
-            builder.add_indicesStrides(x);
-        }
-        if let Some(x) = args.indicesType {
-            builder.add_indicesType(x);
-        }
-        builder.add_isCanonical(args.isCanonical);
-        builder.finish()
-    }
-
-    pub const VT_INDICESTYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_INDICESSTRIDES: flatbuffers::VOffsetT = 6;
-    pub const VT_INDICESBUFFER: flatbuffers::VOffsetT = 8;
-    pub const VT_ISCANONICAL: flatbuffers::VOffsetT = 10;
-
-    /// The type of values in indicesBuffer
-    #[inline]
-    pub fn indicesType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseTensorIndexCOO::VT_INDICESTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// Non-negative byte offsets to advance one value cell along each dimension
-    /// If omitted, default to row-major order (C-like).
-    #[inline]
-    pub fn indicesStrides(&self) -> Option<flatbuffers::Vector<'a, i64>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i64>>>(
-                SparseTensorIndexCOO::VT_INDICESSTRIDES,
-                None,
-            )
-    }
-    /// The location and size of the indices matrix's data
-    #[inline]
-    pub fn indicesBuffer(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseTensorIndexCOO::VT_INDICESBUFFER, None)
-            .unwrap()
-    }
-    /// This flag is true if and only if the indices matrix is sorted in
-    /// row-major order, and does not have duplicated entries.
-    /// This sort order is the same as of Tensorflow's SparseTensor,
-    /// but it is inverse order of SciPy's canonical coo_matrix
-    /// (SciPy employs column-major order for its coo_matrix).
-    #[inline]
-    pub fn isCanonical(&self) -> bool {
-        self._tab
-            .get::<bool>(SparseTensorIndexCOO::VT_ISCANONICAL, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for SparseTensorIndexCOO<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indicesType",
-                Self::VT_INDICESTYPE,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>(
-                &"indicesStrides",
-                Self::VT_INDICESSTRIDES,
-                false,
-            )?
-            .visit_field::<Buffer>(&"indicesBuffer", Self::VT_INDICESBUFFER, true)?
-            .visit_field::<bool>(&"isCanonical", Self::VT_ISCANONICAL, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct SparseTensorIndexCOOArgs<'a> {
-    pub indicesType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indicesStrides: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i64>>>,
-    pub indicesBuffer: Option<&'a Buffer>,
-    pub isCanonical: bool,
-}
-impl<'a> Default for SparseTensorIndexCOOArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseTensorIndexCOOArgs {
-            indicesType: None, // required field
-            indicesStrides: None,
-            indicesBuffer: None, // required field
-            isCanonical: false,
-        }
-    }
-}
-pub struct SparseTensorIndexCOOBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseTensorIndexCOOBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseTensorIndexCOO::VT_INDICESTYPE,
-            indicesType,
-        );
-    }
-    #[inline]
-    pub fn add_indicesStrides(
-        &mut self,
-        indicesStrides: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCOO::VT_INDICESSTRIDES,
-            indicesStrides,
-        );
-    }
-    #[inline]
-    pub fn add_indicesBuffer(&mut self, indicesBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseTensorIndexCOO::VT_INDICESBUFFER,
-            indicesBuffer,
-        );
-    }
-    #[inline]
-    pub fn add_isCanonical(&mut self, isCanonical: bool) {
-        self.fbb_.push_slot::<bool>(
-            SparseTensorIndexCOO::VT_ISCANONICAL,
-            isCanonical,
-            false,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCOOBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseTensorIndexCOOBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseTensorIndexCOO<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_
-            .required(o, SparseTensorIndexCOO::VT_INDICESTYPE, "indicesType");
-        self.fbb_
-            .required(o, SparseTensorIndexCOO::VT_INDICESBUFFER, "indicesBuffer");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseTensorIndexCOO<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseTensorIndexCOO");
-        ds.field("indicesType", &self.indicesType());
-        ds.field("indicesStrides", &self.indicesStrides());
-        ds.field("indicesBuffer", &self.indicesBuffer());
-        ds.field("isCanonical", &self.isCanonical());
-        ds.finish()
-    }
-}
-pub enum SparseMatrixIndexCSXOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Compressed Sparse format, that is matrix-specific.
-pub struct SparseMatrixIndexCSX<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseMatrixIndexCSX<'a> {
-    type Inner = SparseMatrixIndexCSX<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseMatrixIndexCSX<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseMatrixIndexCSX { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseMatrixIndexCSXArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseMatrixIndexCSX<'bldr>> {
-        let mut builder = SparseMatrixIndexCSXBuilder::new(_fbb);
-        if let Some(x) = args.indicesBuffer {
-            builder.add_indicesBuffer(x);
-        }
-        if let Some(x) = args.indicesType {
-            builder.add_indicesType(x);
-        }
-        if let Some(x) = args.indptrBuffer {
-            builder.add_indptrBuffer(x);
-        }
-        if let Some(x) = args.indptrType {
-            builder.add_indptrType(x);
-        }
-        builder.add_compressedAxis(args.compressedAxis);
-        builder.finish()
-    }
-
-    pub const VT_COMPRESSEDAXIS: flatbuffers::VOffsetT = 4;
-    pub const VT_INDPTRTYPE: flatbuffers::VOffsetT = 6;
-    pub const VT_INDPTRBUFFER: flatbuffers::VOffsetT = 8;
-    pub const VT_INDICESTYPE: flatbuffers::VOffsetT = 10;
-    pub const VT_INDICESBUFFER: flatbuffers::VOffsetT = 12;
-
-    /// Which axis, row or column, is compressed
-    #[inline]
-    pub fn compressedAxis(&self) -> SparseMatrixCompressedAxis {
-        self._tab
-            .get::<SparseMatrixCompressedAxis>(
-                SparseMatrixIndexCSX::VT_COMPRESSEDAXIS,
-                Some(SparseMatrixCompressedAxis::Row),
-            )
-            .unwrap()
-    }
-    /// The type of values in indptrBuffer
-    #[inline]
-    pub fn indptrType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseMatrixIndexCSX::VT_INDPTRTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indptrBuffer stores the location and size of indptr array that
-    /// represents the range of the rows.
-    /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
-    /// The length of this array is 1 + (the number of rows), and the type
-    /// of index value is long.
-    ///
-    /// For example, let X be the following 6x4 matrix:
-    /// ```text
-    ///   X := [[0, 1, 2, 0],
-    ///         [0, 0, 3, 0],
-    ///         [0, 4, 0, 5],
-    ///         [0, 0, 0, 0],
-    ///         [6, 0, 7, 8],
-    ///         [0, 9, 0, 0]].
-    /// ```
-    /// The array of non-zero values in X is:
-    /// ```text
-    ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
-    /// ```
-    /// And the indptr of X is:
-    /// ```text
-    ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
-    /// ```
-    #[inline]
-    pub fn indptrBuffer(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseMatrixIndexCSX::VT_INDPTRBUFFER, None)
-            .unwrap()
-    }
-    /// The type of values in indicesBuffer
-    #[inline]
-    pub fn indicesType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseMatrixIndexCSX::VT_INDICESTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indicesBuffer stores the location and size of the array that
-    /// contains the column indices of the corresponding non-zero values.
-    /// The type of index value is long.
-    ///
-    /// For example, the indices of the above X is:
-    /// ```text
-    ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
-    /// ```
-    /// Note that the indices are sorted in lexicographical order for each row.
-    #[inline]
-    pub fn indicesBuffer(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseMatrixIndexCSX::VT_INDICESBUFFER, None)
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for SparseMatrixIndexCSX<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<SparseMatrixCompressedAxis>(
-                &"compressedAxis",
-                Self::VT_COMPRESSEDAXIS,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indptrType",
-                Self::VT_INDPTRTYPE,
-                true,
-            )?
-            .visit_field::<Buffer>(&"indptrBuffer", Self::VT_INDPTRBUFFER, true)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indicesType",
-                Self::VT_INDICESTYPE,
-                true,
-            )?
-            .visit_field::<Buffer>(&"indicesBuffer", Self::VT_INDICESBUFFER, true)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct SparseMatrixIndexCSXArgs<'a> {
-    pub compressedAxis: SparseMatrixCompressedAxis,
-    pub indptrType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indptrBuffer: Option<&'a Buffer>,
-    pub indicesType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indicesBuffer: Option<&'a Buffer>,
-}
-impl<'a> Default for SparseMatrixIndexCSXArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseMatrixIndexCSXArgs {
-            compressedAxis: SparseMatrixCompressedAxis::Row,
-            indptrType: None,    // required field
-            indptrBuffer: None,  // required field
-            indicesType: None,   // required field
-            indicesBuffer: None, // required field
-        }
-    }
-}
-pub struct SparseMatrixIndexCSXBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_compressedAxis(&mut self, compressedAxis: SparseMatrixCompressedAxis) {
-        self.fbb_.push_slot::<SparseMatrixCompressedAxis>(
-            SparseMatrixIndexCSX::VT_COMPRESSEDAXIS,
-            compressedAxis,
-            SparseMatrixCompressedAxis::Row,
-        );
-    }
-    #[inline]
-    pub fn add_indptrType(&mut self, indptrType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseMatrixIndexCSX::VT_INDPTRTYPE,
-            indptrType,
-        );
-    }
-    #[inline]
-    pub fn add_indptrBuffer(&mut self, indptrBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseMatrixIndexCSX::VT_INDPTRBUFFER,
-            indptrBuffer,
-        );
-    }
-    #[inline]
-    pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseMatrixIndexCSX::VT_INDICESTYPE,
-            indicesType,
-        );
-    }
-    #[inline]
-    pub fn add_indicesBuffer(&mut self, indicesBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseMatrixIndexCSX::VT_INDICESBUFFER,
-            indicesBuffer,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseMatrixIndexCSXBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseMatrixIndexCSXBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseMatrixIndexCSX<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDPTRTYPE, "indptrType");
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDPTRBUFFER, "indptrBuffer");
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDICESTYPE, "indicesType");
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDICESBUFFER, "indicesBuffer");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseMatrixIndexCSX<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseMatrixIndexCSX");
-        ds.field("compressedAxis", &self.compressedAxis());
-        ds.field("indptrType", &self.indptrType());
-        ds.field("indptrBuffer", &self.indptrBuffer());
-        ds.field("indicesType", &self.indicesType());
-        ds.field("indicesBuffer", &self.indicesBuffer());
-        ds.finish()
-    }
-}
-pub enum SparseTensorIndexCSFOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Compressed Sparse Fiber (CSF) sparse tensor index.
-pub struct SparseTensorIndexCSF<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseTensorIndexCSF<'a> {
-    type Inner = SparseTensorIndexCSF<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseTensorIndexCSF<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseTensorIndexCSF { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseTensorIndexCSFArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseTensorIndexCSF<'bldr>> {
-        let mut builder = SparseTensorIndexCSFBuilder::new(_fbb);
-        if let Some(x) = args.axisOrder {
-            builder.add_axisOrder(x);
-        }
-        if let Some(x) = args.indicesBuffers {
-            builder.add_indicesBuffers(x);
-        }
-        if let Some(x) = args.indicesType {
-            builder.add_indicesType(x);
-        }
-        if let Some(x) = args.indptrBuffers {
-            builder.add_indptrBuffers(x);
-        }
-        if let Some(x) = args.indptrType {
-            builder.add_indptrType(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_INDPTRTYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_INDPTRBUFFERS: flatbuffers::VOffsetT = 6;
-    pub const VT_INDICESTYPE: flatbuffers::VOffsetT = 8;
-    pub const VT_INDICESBUFFERS: flatbuffers::VOffsetT = 10;
-    pub const VT_AXISORDER: flatbuffers::VOffsetT = 12;
-
-    /// CSF is a generalization of compressed sparse row (CSR) index.
-    /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
-    ///
-    /// CSF index recursively compresses each dimension of a tensor into a set
-    /// of prefix trees. Each path from a root to leaf forms one tensor
-    /// non-zero index. CSF is implemented with two arrays of buffers and one
-    /// arrays of integers.
-    ///
-    /// For example, let X be a 2x3x4x5 tensor and let it have the following
-    /// 8 non-zero values:
-    /// ```text
-    ///   X[0, 0, 0, 1] := 1
-    ///   X[0, 0, 0, 2] := 2
-    ///   X[0, 1, 0, 0] := 3
-    ///   X[0, 1, 0, 2] := 4
-    ///   X[0, 1, 1, 0] := 5
-    ///   X[1, 1, 1, 0] := 6
-    ///   X[1, 1, 1, 1] := 7
-    ///   X[1, 1, 1, 2] := 8
-    /// ```
-    /// As a prefix tree this would be represented as:
-    /// ```text
-    ///         0          1
-    ///        / \         |
-    ///       0   1        1
-    ///      /   / \       |
-    ///     0   0   1      1
-    ///    /|  /|   |    /| |
-    ///   1 2 0 2   0   0 1 2
-    /// ```
-    /// The type of values in indptrBuffers
-    #[inline]
-    pub fn indptrType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseTensorIndexCSF::VT_INDPTRTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indptrBuffers stores the sparsity structure.
-    /// Each two consecutive dimensions in a tensor correspond to a buffer in
-    /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
-    /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
-    /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
-    ///
-    /// For example, the indptrBuffers for the above X is:
-    /// ```text
-    ///   indptrBuffer(X) = [
-    ///                       [0, 2, 3],
-    ///                       [0, 1, 3, 4],
-    ///                       [0, 2, 4, 5, 8]
-    ///                     ].
-    /// ```
-    #[inline]
-    pub fn indptrBuffers(&self) -> &'a [Buffer] {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Buffer>>>(
-                SparseTensorIndexCSF::VT_INDPTRBUFFERS,
-                None,
-            )
-            .map(|v| v.safe_slice())
-            .unwrap()
-    }
-    /// The type of values in indicesBuffers
-    #[inline]
-    pub fn indicesType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseTensorIndexCSF::VT_INDICESTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indicesBuffers stores values of nodes.
-    /// Each tensor dimension corresponds to a buffer in indicesBuffers.
-    /// For example, the indicesBuffers for the above X is:
-    /// ```text
-    ///   indicesBuffer(X) = [
-    ///                        [0, 1],
-    ///                        [0, 1, 1],
-    ///                        [0, 0, 1, 1],
-    ///                        [1, 2, 0, 2, 0, 0, 1, 2]
-    ///                      ].
-    /// ```
-    #[inline]
-    pub fn indicesBuffers(&self) -> &'a [Buffer] {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Buffer>>>(
-                SparseTensorIndexCSF::VT_INDICESBUFFERS,
-                None,
-            )
-            .map(|v| v.safe_slice())
-            .unwrap()
-    }
-    /// axisOrder stores the sequence in which dimensions were traversed to
-    /// produce the prefix tree.
-    /// For example, the axisOrder for the above X is:
-    /// ```text
-    ///   axisOrder(X) = [0, 1, 2, 3].
-    /// ```
-    #[inline]
-    pub fn axisOrder(&self) -> flatbuffers::Vector<'a, i32> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
-                SparseTensorIndexCSF::VT_AXISORDER,
-                None,
-            )
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for SparseTensorIndexCSF<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indptrType",
-                Self::VT_INDPTRTYPE,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>(
-                &"indptrBuffers",
-                Self::VT_INDPTRBUFFERS,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indicesType",
-                Self::VT_INDICESTYPE,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>(
-                &"indicesBuffers",
-                Self::VT_INDICESBUFFERS,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
-                &"axisOrder",
-                Self::VT_AXISORDER,
-                true,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct SparseTensorIndexCSFArgs<'a> {
-    pub indptrType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indptrBuffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Buffer>>>,
-    pub indicesType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indicesBuffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Buffer>>>,
-    pub axisOrder: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
-}
-impl<'a> Default for SparseTensorIndexCSFArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseTensorIndexCSFArgs {
-            indptrType: None,     // required field
-            indptrBuffers: None,  // required field
-            indicesType: None,    // required field
-            indicesBuffers: None, // required field
-            axisOrder: None,      // required field
-        }
-    }
-}
-pub struct SparseTensorIndexCSFBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseTensorIndexCSFBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_indptrType(&mut self, indptrType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseTensorIndexCSF::VT_INDPTRTYPE,
-            indptrType,
-        );
-    }
-    #[inline]
-    pub fn add_indptrBuffers(
-        &mut self,
-        indptrBuffers: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Buffer>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCSF::VT_INDPTRBUFFERS,
-            indptrBuffers,
-        );
-    }
-    #[inline]
-    pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseTensorIndexCSF::VT_INDICESTYPE,
-            indicesType,
-        );
-    }
-    #[inline]
-    pub fn add_indicesBuffers(
-        &mut self,
-        indicesBuffers: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Buffer>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCSF::VT_INDICESBUFFERS,
-            indicesBuffers,
-        );
-    }
-    #[inline]
-    pub fn add_axisOrder(
-        &mut self,
-        axisOrder: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCSF::VT_AXISORDER,
-            axisOrder,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCSFBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseTensorIndexCSFBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseTensorIndexCSF<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDPTRTYPE, "indptrType");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDPTRBUFFERS, "indptrBuffers");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDICESTYPE, "indicesType");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDICESBUFFERS, "indicesBuffers");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_AXISORDER, "axisOrder");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseTensorIndexCSF<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseTensorIndexCSF");
-        ds.field("indptrType", &self.indptrType());
-        ds.field("indptrBuffers", &self.indptrBuffers());
-        ds.field("indicesType", &self.indicesType());
-        ds.field("indicesBuffers", &self.indicesBuffers());
-        ds.field("axisOrder", &self.axisOrder());
-        ds.finish()
-    }
-}
-pub enum SparseTensorOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct SparseTensor<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseTensor<'a> {
-    type Inner = SparseTensor<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseTensor<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseTensor { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseTensorArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseTensor<'bldr>> {
-        let mut builder = SparseTensorBuilder::new(_fbb);
-        builder.add_non_zero_length(args.non_zero_length);
-        if let Some(x) = args.data {
-            builder.add_data(x);
-        }
-        if let Some(x) = args.sparseIndex {
-            builder.add_sparseIndex(x);
-        }
-        if let Some(x) = args.shape {
-            builder.add_shape(x);
-        }
-        if let Some(x) = args.type_ {
-            builder.add_type_(x);
-        }
-        builder.add_sparseIndex_type(args.sparseIndex_type);
-        builder.add_type_type(args.type_type);
-        builder.finish()
-    }
-
-    pub const VT_TYPE_TYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_TYPE_: flatbuffers::VOffsetT = 6;
-    pub const VT_SHAPE: flatbuffers::VOffsetT = 8;
-    pub const VT_NON_ZERO_LENGTH: flatbuffers::VOffsetT = 10;
-    pub const VT_SPARSEINDEX_TYPE: flatbuffers::VOffsetT = 12;
-    pub const VT_SPARSEINDEX: flatbuffers::VOffsetT = 14;
-    pub const VT_DATA: flatbuffers::VOffsetT = 16;
-
-    #[inline]
-    pub fn type_type(&self) -> Type {
-        self._tab
-            .get::<Type>(SparseTensor::VT_TYPE_TYPE, Some(Type::NONE))
-            .unwrap()
-    }
-    /// The type of data contained in a value cell.
-    /// Currently only fixed-width value types are supported,
-    /// no strings or nested types.
-    #[inline]
-    pub fn type_(&self) -> flatbuffers::Table<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                SparseTensor::VT_TYPE_,
-                None,
-            )
-            .unwrap()
-    }
-    /// The dimensions of the tensor, optionally named.
-    #[inline]
-    pub fn shape(
-        &self,
-    ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<
-                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim>>,
-            >>(SparseTensor::VT_SHAPE, None)
-            .unwrap()
-    }
-    /// The number of non-zero values in a sparse tensor.
-    #[inline]
-    pub fn non_zero_length(&self) -> i64 {
-        self._tab
-            .get::<i64>(SparseTensor::VT_NON_ZERO_LENGTH, Some(0))
-            .unwrap()
-    }
-    #[inline]
-    pub fn sparseIndex_type(&self) -> SparseTensorIndex {
-        self._tab
-            .get::<SparseTensorIndex>(
-                SparseTensor::VT_SPARSEINDEX_TYPE,
-                Some(SparseTensorIndex::NONE),
-            )
-            .unwrap()
-    }
-    /// Sparse tensor index
-    #[inline]
-    pub fn sparseIndex(&self) -> flatbuffers::Table<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                SparseTensor::VT_SPARSEINDEX,
-                None,
-            )
-            .unwrap()
-    }
-    /// The location and size of the tensor's data
-    #[inline]
-    pub fn data(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseTensor::VT_DATA, None)
-            .unwrap()
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_null(&self) -> Option<Null<'a>> {
-        if self.type_type() == Type::Null {
-            let u = self.type_();
-            Some(Null::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_int(&self) -> Option<Int<'a>> {
-        if self.type_type() == Type::Int {
-            let u = self.type_();
-            Some(Int::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_floating_point(&self) -> Option<FloatingPoint<'a>> {
-        if self.type_type() == Type::FloatingPoint {
-            let u = self.type_();
-            Some(FloatingPoint::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_binary(&self) -> Option<Binary<'a>> {
-        if self.type_type() == Type::Binary {
-            let u = self.type_();
-            Some(Binary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_utf_8(&self) -> Option<Utf8<'a>> {
-        if self.type_type() == Type::Utf8 {
-            let u = self.type_();
-            Some(Utf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_bool(&self) -> Option<Bool<'a>> {
-        if self.type_type() == Type::Bool {
-            let u = self.type_();
-            Some(Bool::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_decimal(&self) -> Option<Decimal<'a>> {
-        if self.type_type() == Type::Decimal {
-            let u = self.type_();
-            Some(Decimal::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_date(&self) -> Option<Date<'a>> {
-        if self.type_type() == Type::Date {
-            let u = self.type_();
-            Some(Date::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_time(&self) -> Option<Time<'a>> {
-        if self.type_type() == Type::Time {
-            let u = self.type_();
-            Some(Time::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_timestamp(&self) -> Option<Timestamp<'a>> {
-        if self.type_type() == Type::Timestamp {
-            let u = self.type_();
-            Some(Timestamp::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_interval(&self) -> Option<Interval<'a>> {
-        if self.type_type() == Type::Interval {
-            let u = self.type_();
-            Some(Interval::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_list(&self) -> Option<List<'a>> {
-        if self.type_type() == Type::List {
-            let u = self.type_();
-            Some(List::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_struct_(&self) -> Option<Struct_<'a>> {
-        if self.type_type() == Type::Struct_ {
-            let u = self.type_();
-            Some(Struct_::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_union(&self) -> Option<Union<'a>> {
-        if self.type_type() == Type::Union {
-            let u = self.type_();
-            Some(Union::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_binary(&self) -> Option<FixedSizeBinary<'a>> {
-        if self.type_type() == Type::FixedSizeBinary {
-            let u = self.type_();
-            Some(FixedSizeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_list(&self) -> Option<FixedSizeList<'a>> {
-        if self.type_type() == Type::FixedSizeList {
-            let u = self.type_();
-            Some(FixedSizeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_map(&self) -> Option<Map<'a>> {
-        if self.type_type() == Type::Map {
-            let u = self.type_();
-            Some(Map::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_duration(&self) -> Option<Duration<'a>> {
-        if self.type_type() == Type::Duration {
-            let u = self.type_();
-            Some(Duration::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_binary(&self) -> Option<LargeBinary<'a>> {
-        if self.type_type() == Type::LargeBinary {
-            let u = self.type_();
-            Some(LargeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_utf_8(&self) -> Option<LargeUtf8<'a>> {
-        if self.type_type() == Type::LargeUtf8 {
-            let u = self.type_();
-            Some(LargeUtf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_list(&self) -> Option<LargeList<'a>> {
-        if self.type_type() == Type::LargeList {
-            let u = self.type_();
-            Some(LargeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_tensor_index_coo(
-        &self,
-    ) -> Option<SparseTensorIndexCOO<'a>> {
-        if self.sparseIndex_type() == SparseTensorIndex::SparseTensorIndexCOO {
-            let u = self.sparseIndex();
-            Some(SparseTensorIndexCOO::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_matrix_index_csx(
-        &self,
-    ) -> Option<SparseMatrixIndexCSX<'a>> {
-        if self.sparseIndex_type() == SparseTensorIndex::SparseMatrixIndexCSX {
-            let u = self.sparseIndex();
-            Some(SparseMatrixIndexCSX::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_tensor_index_csf(
-        &self,
-    ) -> Option<SparseTensorIndexCSF<'a>> {
-        if self.sparseIndex_type() == SparseTensorIndex::SparseTensorIndexCSF {
-            let u = self.sparseIndex();
-            Some(SparseTensorIndexCSF::init_from_table(u))
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for SparseTensor<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_union::<Type, _>(&"type_type", Self::VT_TYPE_TYPE, &"type_", Self::VT_TYPE_, true, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>>>(&"shape", Self::VT_SHAPE, true)?
-     .visit_field::<i64>(&"non_zero_length", Self::VT_NON_ZERO_LENGTH, false)?
-     .visit_union::<SparseTensorIndex, _>(&"sparseIndex_type", Self::VT_SPARSEINDEX_TYPE, &"sparseIndex", Self::VT_SPARSEINDEX, true, |key, v, pos| {
-        match key {
-          SparseTensorIndex::SparseTensorIndexCOO => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCOO>>("SparseTensorIndex::SparseTensorIndexCOO", pos),
-          SparseTensorIndex::SparseMatrixIndexCSX => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseMatrixIndexCSX>>("SparseTensorIndex::SparseMatrixIndexCSX", pos),
-          SparseTensorIndex::SparseTensorIndexCSF => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCSF>>("SparseTensorIndex::SparseTensorIndexCSF", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<Buffer>(&"data", Self::VT_DATA, true)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct SparseTensorArgs<'a> {
-    pub type_type: Type,
-    pub type_: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub shape: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>>,
-        >,
-    >,
-    pub non_zero_length: i64,
-    pub sparseIndex_type: SparseTensorIndex,
-    pub sparseIndex: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub data: Option<&'a Buffer>,
-}
-impl<'a> Default for SparseTensorArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseTensorArgs {
-            type_type: Type::NONE,
-            type_: None, // required field
-            shape: None, // required field
-            non_zero_length: 0,
-            sparseIndex_type: SparseTensorIndex::NONE,
-            sparseIndex: None, // required field
-            data: None,        // required field
-        }
-    }
-}
-pub struct SparseTensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_type_type(&mut self, type_type: Type) {
-        self.fbb_
-            .push_slot::<Type>(SparseTensor::VT_TYPE_TYPE, type_type, Type::NONE);
-    }
-    #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(SparseTensor::VT_TYPE_, type_);
-    }
-    #[inline]
-    pub fn add_shape(
-        &mut self,
-        shape: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<TensorDim<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(SparseTensor::VT_SHAPE, shape);
-    }
-    #[inline]
-    pub fn add_non_zero_length(&mut self, non_zero_length: i64) {
-        self.fbb_
-            .push_slot::<i64>(SparseTensor::VT_NON_ZERO_LENGTH, non_zero_length, 0);
-    }
-    #[inline]
-    pub fn add_sparseIndex_type(&mut self, sparseIndex_type: SparseTensorIndex) {
-        self.fbb_.push_slot::<SparseTensorIndex>(
-            SparseTensor::VT_SPARSEINDEX_TYPE,
-            sparseIndex_type,
-            SparseTensorIndex::NONE,
-        );
-    }
-    #[inline]
-    pub fn add_sparseIndex(
-        &mut self,
-        sparseIndex: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensor::VT_SPARSEINDEX,
-            sparseIndex,
-        );
-    }
-    #[inline]
-    pub fn add_data(&mut self, data: &Buffer) {
-        self.fbb_
-            .push_slot_always::<&Buffer>(SparseTensor::VT_DATA, data);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseTensorBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseTensor<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_.required(o, SparseTensor::VT_TYPE_, "type_");
-        self.fbb_.required(o, SparseTensor::VT_SHAPE, "shape");
-        self.fbb_
-            .required(o, SparseTensor::VT_SPARSEINDEX, "sparseIndex");
-        self.fbb_.required(o, SparseTensor::VT_DATA, "data");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseTensor<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseTensor");
-        ds.field("type_type", &self.type_type());
-        match self.type_type() {
-            Type::Null => {
-                if let Some(x) = self.type_as_null() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Int => {
-                if let Some(x) = self.type_as_int() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FloatingPoint => {
-                if let Some(x) = self.type_as_floating_point() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Binary => {
-                if let Some(x) = self.type_as_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Utf8 => {
-                if let Some(x) = self.type_as_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Bool => {
-                if let Some(x) = self.type_as_bool() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Decimal => {
-                if let Some(x) = self.type_as_decimal() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Date => {
-                if let Some(x) = self.type_as_date() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Time => {
-                if let Some(x) = self.type_as_time() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Timestamp => {
-                if let Some(x) = self.type_as_timestamp() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Interval => {
-                if let Some(x) = self.type_as_interval() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::List => {
-                if let Some(x) = self.type_as_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Struct_ => {
-                if let Some(x) = self.type_as_struct_() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Union => {
-                if let Some(x) = self.type_as_union() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeBinary => {
-                if let Some(x) = self.type_as_fixed_size_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeList => {
-                if let Some(x) = self.type_as_fixed_size_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Map => {
-                if let Some(x) = self.type_as_map() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Duration => {
-                if let Some(x) = self.type_as_duration() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeBinary => {
-                if let Some(x) = self.type_as_large_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeUtf8 => {
-                if let Some(x) = self.type_as_large_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeList => {
-                if let Some(x) = self.type_as_large_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("type_", &x)
-            }
-        };
-        ds.field("shape", &self.shape());
-        ds.field("non_zero_length", &self.non_zero_length());
-        ds.field("sparseIndex_type", &self.sparseIndex_type());
-        match self.sparseIndex_type() {
-            SparseTensorIndex::SparseTensorIndexCOO => {
-                if let Some(x) = self.sparseIndex_as_sparse_tensor_index_coo() {
-                    ds.field("sparseIndex", &x)
-                } else {
-                    ds.field(
-                        "sparseIndex",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            SparseTensorIndex::SparseMatrixIndexCSX => {
-                if let Some(x) = self.sparseIndex_as_sparse_matrix_index_csx() {
-                    ds.field("sparseIndex", &x)
-                } else {
-                    ds.field(
-                        "sparseIndex",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            SparseTensorIndex::SparseTensorIndexCSF => {
-                if let Some(x) = self.sparseIndex_as_sparse_tensor_index_csf() {
-                    ds.field("sparseIndex", &x)
-                } else {
-                    ds.field(
-                        "sparseIndex",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("sparseIndex", &x)
-            }
-        };
-        ds.field("data", &self.data());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_sparse_tensor<'a>(buf: &'a [u8]) -> SparseTensor<'a> {
-    unsafe { flatbuffers::root_unchecked::<SparseTensor<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_sparse_tensor<'a>(buf: &'a [u8]) -> SparseTensor<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<SparseTensor<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `SparseTensor`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_sparse_tensor_unchecked`.
-pub fn root_as_sparse_tensor(
-    buf: &[u8],
-) -> Result<SparseTensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<SparseTensor>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `SparseTensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_sparse_tensor_unchecked`.
-pub fn size_prefixed_root_as_sparse_tensor(
-    buf: &[u8],
-) -> Result<SparseTensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<SparseTensor>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `SparseTensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_sparse_tensor_unchecked`.
-pub fn root_as_sparse_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<SparseTensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<SparseTensor<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `SparseTensor` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_sparse_tensor_unchecked`.
-pub fn size_prefixed_root_as_sparse_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<SparseTensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<SparseTensor<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a SparseTensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `SparseTensor`.
-pub unsafe fn root_as_sparse_tensor_unchecked(buf: &[u8]) -> SparseTensor {
-    flatbuffers::root_unchecked::<SparseTensor>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed SparseTensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `SparseTensor`.
-pub unsafe fn size_prefixed_root_as_sparse_tensor_unchecked(buf: &[u8]) -> SparseTensor {
-    flatbuffers::size_prefixed_root_unchecked::<SparseTensor>(buf)
-}
-#[inline]
-pub fn finish_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<SparseTensor<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<SparseTensor<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/Tensor.rs b/rust/arrow/src/ipc/gen/Tensor.rs
deleted file mode 100644
index 120636eaf1f..00000000000
--- a/rust/arrow/src/ipc/gen/Tensor.rs
+++ /dev/null
@@ -1,913 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-pub enum TensorDimOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// Data structures for dense tensors
-/// Shape data for a single axis in a tensor
-pub struct TensorDim<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for TensorDim<'a> {
-    type Inner = TensorDim<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> TensorDim<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        TensorDim { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TensorDimArgs<'args>,
-    ) -> flatbuffers::WIPOffset<TensorDim<'bldr>> {
-        let mut builder = TensorDimBuilder::new(_fbb);
-        builder.add_size_(args.size_);
-        if let Some(x) = args.name {
-            builder.add_name(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_SIZE_: flatbuffers::VOffsetT = 4;
-    pub const VT_NAME: flatbuffers::VOffsetT = 6;
-
-    /// Length of dimension
-    #[inline]
-    pub fn size_(&self) -> i64 {
-        self._tab.get::<i64>(TensorDim::VT_SIZE_, Some(0)).unwrap()
-    }
-    /// Name of the dimension, optional
-    #[inline]
-    pub fn name(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(TensorDim::VT_NAME, None)
-    }
-}
-
-impl flatbuffers::Verifiable for TensorDim<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i64>(&"size_", Self::VT_SIZE_, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"name",
-                Self::VT_NAME,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct TensorDimArgs<'a> {
-    pub size_: i64,
-    pub name: Option<flatbuffers::WIPOffset<&'a str>>,
-}
-impl<'a> Default for TensorDimArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        TensorDimArgs {
-            size_: 0,
-            name: None,
-        }
-    }
-}
-pub struct TensorDimBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TensorDimBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_size_(&mut self, size_: i64) {
-        self.fbb_.push_slot::<i64>(TensorDim::VT_SIZE_, size_, 0);
-    }
-    #[inline]
-    pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(TensorDim::VT_NAME, name);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TensorDimBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TensorDimBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<TensorDim<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for TensorDim<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("TensorDim");
-        ds.field("size_", &self.size_());
-        ds.field("name", &self.name());
-        ds.finish()
-    }
-}
-pub enum TensorOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Tensor<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Tensor<'a> {
-    type Inner = Tensor<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Tensor<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Tensor { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TensorArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Tensor<'bldr>> {
-        let mut builder = TensorBuilder::new(_fbb);
-        if let Some(x) = args.data {
-            builder.add_data(x);
-        }
-        if let Some(x) = args.strides {
-            builder.add_strides(x);
-        }
-        if let Some(x) = args.shape {
-            builder.add_shape(x);
-        }
-        if let Some(x) = args.type_ {
-            builder.add_type_(x);
-        }
-        builder.add_type_type(args.type_type);
-        builder.finish()
-    }
-
-    pub const VT_TYPE_TYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_TYPE_: flatbuffers::VOffsetT = 6;
-    pub const VT_SHAPE: flatbuffers::VOffsetT = 8;
-    pub const VT_STRIDES: flatbuffers::VOffsetT = 10;
-    pub const VT_DATA: flatbuffers::VOffsetT = 12;
-
-    #[inline]
-    pub fn type_type(&self) -> Type {
-        self._tab
-            .get::<Type>(Tensor::VT_TYPE_TYPE, Some(Type::NONE))
-            .unwrap()
-    }
-    /// The type of data contained in a value cell. Currently only fixed-width
-    /// value types are supported, no strings or nested types
-    #[inline]
-    pub fn type_(&self) -> flatbuffers::Table<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                Tensor::VT_TYPE_,
-                None,
-            )
-            .unwrap()
-    }
-    /// The dimensions of the tensor, optionally named
-    #[inline]
-    pub fn shape(
-        &self,
-    ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<
-                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim>>,
-            >>(Tensor::VT_SHAPE, None)
-            .unwrap()
-    }
-    /// Non-negative byte offsets to advance one value cell along each dimension
-    /// If omitted, default to row-major order (C-like).
-    #[inline]
-    pub fn strides(&self) -> Option<flatbuffers::Vector<'a, i64>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i64>>>(
-                Tensor::VT_STRIDES,
-                None,
-            )
-    }
-    /// The location and size of the tensor's data
-    #[inline]
-    pub fn data(&self) -> &'a Buffer {
-        self._tab.get::<Buffer>(Tensor::VT_DATA, None).unwrap()
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_null(&self) -> Option<Null<'a>> {
-        if self.type_type() == Type::Null {
-            let u = self.type_();
-            Some(Null::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_int(&self) -> Option<Int<'a>> {
-        if self.type_type() == Type::Int {
-            let u = self.type_();
-            Some(Int::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_floating_point(&self) -> Option<FloatingPoint<'a>> {
-        if self.type_type() == Type::FloatingPoint {
-            let u = self.type_();
-            Some(FloatingPoint::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_binary(&self) -> Option<Binary<'a>> {
-        if self.type_type() == Type::Binary {
-            let u = self.type_();
-            Some(Binary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_utf_8(&self) -> Option<Utf8<'a>> {
-        if self.type_type() == Type::Utf8 {
-            let u = self.type_();
-            Some(Utf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_bool(&self) -> Option<Bool<'a>> {
-        if self.type_type() == Type::Bool {
-            let u = self.type_();
-            Some(Bool::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_decimal(&self) -> Option<Decimal<'a>> {
-        if self.type_type() == Type::Decimal {
-            let u = self.type_();
-            Some(Decimal::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_date(&self) -> Option<Date<'a>> {
-        if self.type_type() == Type::Date {
-            let u = self.type_();
-            Some(Date::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_time(&self) -> Option<Time<'a>> {
-        if self.type_type() == Type::Time {
-            let u = self.type_();
-            Some(Time::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_timestamp(&self) -> Option<Timestamp<'a>> {
-        if self.type_type() == Type::Timestamp {
-            let u = self.type_();
-            Some(Timestamp::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_interval(&self) -> Option<Interval<'a>> {
-        if self.type_type() == Type::Interval {
-            let u = self.type_();
-            Some(Interval::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_list(&self) -> Option<List<'a>> {
-        if self.type_type() == Type::List {
-            let u = self.type_();
-            Some(List::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_struct_(&self) -> Option<Struct_<'a>> {
-        if self.type_type() == Type::Struct_ {
-            let u = self.type_();
-            Some(Struct_::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_union(&self) -> Option<Union<'a>> {
-        if self.type_type() == Type::Union {
-            let u = self.type_();
-            Some(Union::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_binary(&self) -> Option<FixedSizeBinary<'a>> {
-        if self.type_type() == Type::FixedSizeBinary {
-            let u = self.type_();
-            Some(FixedSizeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_list(&self) -> Option<FixedSizeList<'a>> {
-        if self.type_type() == Type::FixedSizeList {
-            let u = self.type_();
-            Some(FixedSizeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_map(&self) -> Option<Map<'a>> {
-        if self.type_type() == Type::Map {
-            let u = self.type_();
-            Some(Map::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_duration(&self) -> Option<Duration<'a>> {
-        if self.type_type() == Type::Duration {
-            let u = self.type_();
-            Some(Duration::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_binary(&self) -> Option<LargeBinary<'a>> {
-        if self.type_type() == Type::LargeBinary {
-            let u = self.type_();
-            Some(LargeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_utf_8(&self) -> Option<LargeUtf8<'a>> {
-        if self.type_type() == Type::LargeUtf8 {
-            let u = self.type_();
-            Some(LargeUtf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_list(&self) -> Option<LargeList<'a>> {
-        if self.type_type() == Type::LargeList {
-            let u = self.type_();
-            Some(LargeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for Tensor<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_union::<Type, _>(&"type_type", Self::VT_TYPE_TYPE, &"type_", Self::VT_TYPE_, true, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>>>(&"shape", Self::VT_SHAPE, true)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>(&"strides", Self::VT_STRIDES, false)?
-     .visit_field::<Buffer>(&"data", Self::VT_DATA, true)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct TensorArgs<'a> {
-    pub type_type: Type,
-    pub type_: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub shape: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>>,
-        >,
-    >,
-    pub strides: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i64>>>,
-    pub data: Option<&'a Buffer>,
-}
-impl<'a> Default for TensorArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        TensorArgs {
-            type_type: Type::NONE,
-            type_: None, // required field
-            shape: None, // required field
-            strides: None,
-            data: None, // required field
-        }
-    }
-}
-pub struct TensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_type_type(&mut self, type_type: Type) {
-        self.fbb_
-            .push_slot::<Type>(Tensor::VT_TYPE_TYPE, type_type, Type::NONE);
-    }
-    #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_TYPE_, type_);
-    }
-    #[inline]
-    pub fn add_shape(
-        &mut self,
-        shape: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<TensorDim<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_SHAPE, shape);
-    }
-    #[inline]
-    pub fn add_strides(
-        &mut self,
-        strides: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_STRIDES, strides);
-    }
-    #[inline]
-    pub fn add_data(&mut self, data: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(Tensor::VT_DATA, data);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TensorBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TensorBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Tensor<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_.required(o, Tensor::VT_TYPE_, "type_");
-        self.fbb_.required(o, Tensor::VT_SHAPE, "shape");
-        self.fbb_.required(o, Tensor::VT_DATA, "data");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Tensor<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Tensor");
-        ds.field("type_type", &self.type_type());
-        match self.type_type() {
-            Type::Null => {
-                if let Some(x) = self.type_as_null() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Int => {
-                if let Some(x) = self.type_as_int() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FloatingPoint => {
-                if let Some(x) = self.type_as_floating_point() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Binary => {
-                if let Some(x) = self.type_as_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Utf8 => {
-                if let Some(x) = self.type_as_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Bool => {
-                if let Some(x) = self.type_as_bool() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Decimal => {
-                if let Some(x) = self.type_as_decimal() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Date => {
-                if let Some(x) = self.type_as_date() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Time => {
-                if let Some(x) = self.type_as_time() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Timestamp => {
-                if let Some(x) = self.type_as_timestamp() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Interval => {
-                if let Some(x) = self.type_as_interval() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::List => {
-                if let Some(x) = self.type_as_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Struct_ => {
-                if let Some(x) = self.type_as_struct_() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Union => {
-                if let Some(x) = self.type_as_union() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeBinary => {
-                if let Some(x) = self.type_as_fixed_size_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeList => {
-                if let Some(x) = self.type_as_fixed_size_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Map => {
-                if let Some(x) = self.type_as_map() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Duration => {
-                if let Some(x) = self.type_as_duration() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeBinary => {
-                if let Some(x) = self.type_as_large_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeUtf8 => {
-                if let Some(x) = self.type_as_large_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeList => {
-                if let Some(x) = self.type_as_large_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("type_", &x)
-            }
-        };
-        ds.field("shape", &self.shape());
-        ds.field("strides", &self.strides());
-        ds.field("data", &self.data());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_tensor<'a>(buf: &'a [u8]) -> Tensor<'a> {
-    unsafe { flatbuffers::root_unchecked::<Tensor<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_tensor<'a>(buf: &'a [u8]) -> Tensor<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Tensor<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Tensor`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_tensor_unchecked`.
-pub fn root_as_tensor(buf: &[u8]) -> Result<Tensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Tensor>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Tensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_tensor_unchecked`.
-pub fn size_prefixed_root_as_tensor(
-    buf: &[u8],
-) -> Result<Tensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Tensor>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Tensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_tensor_unchecked`.
-pub fn root_as_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Tensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Tensor<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Tensor` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_tensor_unchecked`.
-pub fn size_prefixed_root_as_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Tensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Tensor<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Tensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Tensor`.
-pub unsafe fn root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
-    flatbuffers::root_unchecked::<Tensor>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Tensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Tensor`.
-pub unsafe fn size_prefixed_root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
-    flatbuffers::size_prefixed_root_unchecked::<Tensor>(buf)
-}
-#[inline]
-pub fn finish_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Tensor<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Tensor<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/mod.rs b/rust/arrow/src/ipc/gen/mod.rs
deleted file mode 100644
index ceeb6b2c5c7..00000000000
--- a/rust/arrow/src/ipc/gen/mod.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Generated code
-
-#![allow(non_snake_case)]
-
-#[allow(clippy::all)]
-pub mod File;
-#[allow(clippy::all)]
-pub mod Message;
-#[allow(clippy::all)]
-pub mod Schema;
-#[allow(clippy::all)]
-pub mod SparseTensor;
-#[allow(clippy::all)]
-pub mod Tensor;
diff --git a/rust/arrow/src/ipc/mod.rs b/rust/arrow/src/ipc/mod.rs
deleted file mode 100644
index a2d7103aacf..00000000000
--- a/rust/arrow/src/ipc/mod.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// TODO: (vcq): Protobuf codegen is not generating Debug impls.
-#![allow(missing_debug_implementations)]
-
-pub mod convert;
-pub mod reader;
-pub mod writer;
-
-#[allow(clippy::redundant_closure)]
-#[allow(clippy::needless_lifetimes)]
-#[allow(clippy::extra_unused_lifetimes)]
-#[allow(clippy::redundant_static_lifetimes)]
-#[allow(clippy::redundant_field_names)]
-pub mod gen;
-
-pub use self::gen::File::*;
-pub use self::gen::Message::*;
-pub use self::gen::Schema::*;
-pub use self::gen::SparseTensor::*;
-pub use self::gen::Tensor::*;
-
-const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
-const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
diff --git a/rust/arrow/src/ipc/reader.rs b/rust/arrow/src/ipc/reader.rs
deleted file mode 100644
index 3c893cdf2ff..00000000000
--- a/rust/arrow/src/ipc/reader.rs
+++ /dev/null
@@ -1,1160 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Arrow IPC File and Stream Readers
-//!
-//! The `FileReader` and `StreamReader` have similar interfaces,
-//! however the `FileReader` expects a reader that supports `Seek`ing
-
-use std::collections::HashMap;
-use std::io::{BufReader, Read, Seek, SeekFrom};
-use std::sync::Arc;
-
-use crate::array::*;
-use crate::buffer::Buffer;
-use crate::compute::cast;
-use crate::datatypes::{DataType, Field, IntervalUnit, Schema, SchemaRef};
-use crate::error::{ArrowError, Result};
-use crate::ipc;
-use crate::record_batch::{RecordBatch, RecordBatchReader};
-
-use ipc::CONTINUATION_MARKER;
-use DataType::*;
-
-/// Read a buffer based on offset and length
-fn read_buffer(buf: &ipc::Buffer, a_data: &[u8]) -> Buffer {
-    let start_offset = buf.offset() as usize;
-    let end_offset = start_offset + buf.length() as usize;
-    let buf_data = &a_data[start_offset..end_offset];
-    Buffer::from(&buf_data)
-}
-
-/// Coordinates reading arrays based on data types.
-///
-/// Notes:
-/// * In the IPC format, null buffers are always set, but may be empty. We discard them if an array has 0 nulls
-/// * Numeric values inside list arrays are often stored as 64-bit values regardless of their data type size.
-///   We thus:
-///     - check if the bit width of non-64-bit numbers is 64, and
-///     - read the buffer as 64-bit (signed integer or float), and
-///     - cast the 64-bit array to the appropriate data type
-fn create_array(
-    nodes: &[ipc::FieldNode],
-    data_type: &DataType,
-    data: &[u8],
-    buffers: &[ipc::Buffer],
-    dictionaries: &[Option<ArrayRef>],
-    mut node_index: usize,
-    mut buffer_index: usize,
-) -> (ArrayRef, usize, usize) {
-    use DataType::*;
-    let array = match data_type {
-        Utf8 | Binary | LargeBinary | LargeUtf8 => {
-            let array = create_primitive_array(
-                &nodes[node_index],
-                data_type,
-                buffers[buffer_index..buffer_index + 3]
-                    .iter()
-                    .map(|buf| read_buffer(buf, data))
-                    .collect(),
-            );
-            node_index += 1;
-            buffer_index += 3;
-            array
-        }
-        FixedSizeBinary(_) => {
-            let array = create_primitive_array(
-                &nodes[node_index],
-                data_type,
-                buffers[buffer_index..buffer_index + 2]
-                    .iter()
-                    .map(|buf| read_buffer(buf, data))
-                    .collect(),
-            );
-            node_index += 1;
-            buffer_index += 2;
-            array
-        }
-        List(ref list_field) | LargeList(ref list_field) => {
-            let list_node = &nodes[node_index];
-            let list_buffers: Vec<Buffer> = buffers[buffer_index..buffer_index + 2]
-                .iter()
-                .map(|buf| read_buffer(buf, data))
-                .collect();
-            node_index += 1;
-            buffer_index += 2;
-            let triple = create_array(
-                nodes,
-                list_field.data_type(),
-                data,
-                buffers,
-                dictionaries,
-                node_index,
-                buffer_index,
-            );
-            node_index = triple.1;
-            buffer_index = triple.2;
-
-            create_list_array(list_node, data_type, &list_buffers[..], triple.0)
-        }
-        FixedSizeList(ref list_field, _) => {
-            let list_node = &nodes[node_index];
-            let list_buffers: Vec<Buffer> = buffers[buffer_index..=buffer_index]
-                .iter()
-                .map(|buf| read_buffer(buf, data))
-                .collect();
-            node_index += 1;
-            buffer_index += 1;
-            let triple = create_array(
-                nodes,
-                list_field.data_type(),
-                data,
-                buffers,
-                dictionaries,
-                node_index,
-                buffer_index,
-            );
-            node_index = triple.1;
-            buffer_index = triple.2;
-
-            create_list_array(list_node, data_type, &list_buffers[..], triple.0)
-        }
-        Struct(struct_fields) => {
-            let struct_node = &nodes[node_index];
-            let null_buffer: Buffer = read_buffer(&buffers[buffer_index], data);
-            node_index += 1;
-            buffer_index += 1;
-
-            // read the arrays for each field
-            let mut struct_arrays = vec![];
-            // TODO investigate whether just knowing the number of buffers could
-            // still work
-            for struct_field in struct_fields {
-                let triple = create_array(
-                    nodes,
-                    struct_field.data_type(),
-                    data,
-                    buffers,
-                    dictionaries,
-                    node_index,
-                    buffer_index,
-                );
-                node_index = triple.1;
-                buffer_index = triple.2;
-                struct_arrays.push((struct_field.clone(), triple.0));
-            }
-            let null_count = struct_node.null_count() as usize;
-            let struct_array = if null_count > 0 {
-                // create struct array from fields, arrays and null data
-                StructArray::from((struct_arrays, null_buffer))
-            } else {
-                StructArray::from(struct_arrays)
-            };
-            Arc::new(struct_array)
-        }
-        // Create dictionary array from RecordBatch
-        Dictionary(_, _) => {
-            let index_node = &nodes[node_index];
-            let index_buffers: Vec<Buffer> = buffers[buffer_index..buffer_index + 2]
-                .iter()
-                .map(|buf| read_buffer(buf, data))
-                .collect();
-            let value_array = dictionaries[node_index].clone().unwrap();
-            node_index += 1;
-            buffer_index += 2;
-
-            create_dictionary_array(
-                index_node,
-                data_type,
-                &index_buffers[..],
-                value_array,
-            )
-        }
-        Null => {
-            let length = nodes[node_index].length() as usize;
-            let data = ArrayData::builder(data_type.clone())
-                .len(length)
-                .offset(0)
-                .build();
-            node_index += 1;
-            // no buffer increases
-            make_array(data)
-        }
-        _ => {
-            let array = create_primitive_array(
-                &nodes[node_index],
-                data_type,
-                buffers[buffer_index..buffer_index + 2]
-                    .iter()
-                    .map(|buf| read_buffer(buf, data))
-                    .collect(),
-            );
-            node_index += 1;
-            buffer_index += 2;
-            array
-        }
-    };
-    (array, node_index, buffer_index)
-}
-
-/// Reads the correct number of buffers based on data type and null_count, and creates a
-/// primitive array ref
-fn create_primitive_array(
-    field_node: &ipc::FieldNode,
-    data_type: &DataType,
-    buffers: Vec<Buffer>,
-) -> ArrayRef {
-    let length = field_node.length() as usize;
-    let null_count = field_node.null_count() as usize;
-    let array_data = match data_type {
-        Utf8 | Binary | LargeBinary | LargeUtf8 => {
-            // read 3 buffers
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..3].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        FixedSizeBinary(_) => {
-            // read 3 buffers
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..2].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        Int8
-        | Int16
-        | Int32
-        | UInt8
-        | UInt16
-        | UInt32
-        | Time32(_)
-        | Date32
-        | Interval(IntervalUnit::YearMonth) => {
-            if buffers[1].len() / 8 == length && length != 1 {
-                // interpret as a signed i64, and cast appropriately
-                let mut builder = ArrayData::builder(DataType::Int64)
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                let values = Arc::new(Int64Array::from(builder.build())) as ArrayRef;
-                // this cast is infallible, the unwrap is safe
-                let casted = cast(&values, data_type).unwrap();
-                casted.data().clone()
-            } else {
-                let mut builder = ArrayData::builder(data_type.clone())
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                builder.build()
-            }
-        }
-        Float32 => {
-            if buffers[1].len() / 8 == length && length != 1 {
-                // interpret as a f64, and cast appropriately
-                let mut builder = ArrayData::builder(DataType::Float64)
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                let values = Arc::new(Float64Array::from(builder.build())) as ArrayRef;
-                // this cast is infallible, the unwrap is safe
-                let casted = cast(&values, data_type).unwrap();
-                casted.data().clone()
-            } else {
-                let mut builder = ArrayData::builder(data_type.clone())
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                builder.build()
-            }
-        }
-        Boolean
-        | Int64
-        | UInt64
-        | Float64
-        | Time64(_)
-        | Timestamp(_, _)
-        | Date64
-        | Duration(_)
-        | Interval(IntervalUnit::DayTime) => {
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        Decimal(_, _) => {
-            // read 3 buffers
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..2].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        t => panic!("Data type {:?} either unsupported or not primitive", t),
-    };
-
-    make_array(array_data)
-}
-
-/// Reads the correct number of buffers based on list type and null_count, and creates a
-/// list array ref
-fn create_list_array(
-    field_node: &ipc::FieldNode,
-    data_type: &DataType,
-    buffers: &[Buffer],
-    child_array: ArrayRef,
-) -> ArrayRef {
-    if let DataType::List(_) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..2].to_vec())
-            .offset(0)
-            .child_data(vec![child_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else if let DataType::LargeList(_) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..2].to_vec())
-            .offset(0)
-            .child_data(vec![child_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else if let DataType::FixedSizeList(_, _) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..1].to_vec())
-            .offset(0)
-            .child_data(vec![child_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else {
-        panic!("Cannot create list array from {:?}", data_type)
-    }
-}
-
-/// Reads the correct number of buffers based on list type and null_count, and creates a
-/// list array ref
-fn create_dictionary_array(
-    field_node: &ipc::FieldNode,
-    data_type: &DataType,
-    buffers: &[Buffer],
-    value_array: ArrayRef,
-) -> ArrayRef {
-    if let DataType::Dictionary(_, _) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..2].to_vec())
-            .offset(0)
-            .child_data(vec![value_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else {
-        unreachable!("Cannot create dictionary array from {:?}", data_type)
-    }
-}
-
-/// Creates a record batch from binary data using the `ipc::RecordBatch` indexes and the `Schema`
-pub fn read_record_batch(
-    buf: &[u8],
-    batch: ipc::RecordBatch,
-    schema: SchemaRef,
-    dictionaries: &[Option<ArrayRef>],
-) -> Result<RecordBatch> {
-    let buffers = batch.buffers().ok_or_else(|| {
-        ArrowError::IoError("Unable to get buffers from IPC RecordBatch".to_string())
-    })?;
-    let field_nodes = batch.nodes().ok_or_else(|| {
-        ArrowError::IoError("Unable to get field nodes from IPC RecordBatch".to_string())
-    })?;
-    // keep track of buffer and node index, the functions that create arrays mutate these
-    let mut buffer_index = 0;
-    let mut node_index = 0;
-    let mut arrays = vec![];
-
-    // keep track of index as lists require more than one node
-    for field in schema.fields() {
-        let triple = create_array(
-            field_nodes,
-            field.data_type(),
-            &buf,
-            buffers,
-            dictionaries,
-            node_index,
-            buffer_index,
-        );
-        node_index = triple.1;
-        buffer_index = triple.2;
-        arrays.push(triple.0);
-    }
-
-    RecordBatch::try_new(schema, arrays)
-}
-
-/// Read the dictionary from the buffer and provided metadata,
-/// updating the `dictionaries_by_field` with the resulting dictionary
-pub fn read_dictionary(
-    buf: &[u8],
-    batch: ipc::DictionaryBatch,
-    schema: &Schema,
-    dictionaries_by_field: &mut [Option<ArrayRef>],
-) -> Result<()> {
-    if batch.isDelta() {
-        return Err(ArrowError::IoError(
-            "delta dictionary batches not supported".to_string(),
-        ));
-    }
-
-    let id = batch.id();
-    let fields_using_this_dictionary = schema.fields_with_dict_id(id);
-    let first_field = fields_using_this_dictionary.first().ok_or_else(|| {
-        ArrowError::InvalidArgumentError("dictionary id not found in schema".to_string())
-    })?;
-
-    // As the dictionary batch does not contain the type of the
-    // values array, we need to retrieve this from the schema.
-    // Get an array representing this dictionary's values.
-    let dictionary_values: ArrayRef = match first_field.data_type() {
-        DataType::Dictionary(_, ref value_type) => {
-            // Make a fake schema for the dictionary batch.
-            let schema = Schema {
-                fields: vec![Field::new("", value_type.as_ref().clone(), false)],
-                metadata: HashMap::new(),
-            };
-            // Read a single column
-            let record_batch = read_record_batch(
-                &buf,
-                batch.data().unwrap(),
-                Arc::new(schema),
-                &dictionaries_by_field,
-            )?;
-            Some(record_batch.column(0).clone())
-        }
-        _ => None,
-    }
-    .ok_or_else(|| {
-        ArrowError::InvalidArgumentError("dictionary id not found in schema".to_string())
-    })?;
-
-    // for all fields with this dictionary id, update the dictionaries vector
-    // in the reader. Note that a dictionary batch may be shared between many fields.
-    // We don't currently record the isOrdered field. This could be general
-    // attributes of arrays.
-    for (i, field) in schema.fields().iter().enumerate() {
-        if field.dict_id() == Some(id) {
-            // Add (possibly multiple) array refs to the dictionaries array.
-            dictionaries_by_field[i] = Some(dictionary_values.clone());
-        }
-    }
-
-    Ok(())
-}
-
-/// Arrow File reader
-pub struct FileReader<R: Read + Seek> {
-    /// Buffered file reader that supports reading and seeking
-    reader: BufReader<R>,
-
-    /// The schema that is read from the file header
-    schema: SchemaRef,
-
-    /// The blocks in the file
-    ///
-    /// A block indicates the regions in the file to read to get data
-    blocks: Vec<ipc::Block>,
-
-    /// A counter to keep track of the current block that should be read
-    current_block: usize,
-
-    /// The total number of blocks, which may contain record batches and other types
-    total_blocks: usize,
-
-    /// Optional dictionaries for each schema field.
-    ///
-    /// Dictionaries may be appended to in the streaming format.
-    dictionaries_by_field: Vec<Option<ArrayRef>>,
-
-    /// Metadata version
-    metadata_version: ipc::MetadataVersion,
-}
-
-impl<R: Read + Seek> FileReader<R> {
-    /// Try to create a new file reader
-    ///
-    /// Returns errors if the file does not meet the Arrow Format header and footer
-    /// requirements
-    pub fn try_new(reader: R) -> Result<Self> {
-        let mut reader = BufReader::new(reader);
-        // check if header and footer contain correct magic bytes
-        let mut magic_buffer: [u8; 6] = [0; 6];
-        reader.read_exact(&mut magic_buffer)?;
-        if magic_buffer != super::ARROW_MAGIC {
-            return Err(ArrowError::IoError(
-                "Arrow file does not contain correct header".to_string(),
-            ));
-        }
-        reader.seek(SeekFrom::End(-6))?;
-        reader.read_exact(&mut magic_buffer)?;
-        if magic_buffer != super::ARROW_MAGIC {
-            return Err(ArrowError::IoError(
-                "Arrow file does not contain correct footer".to_string(),
-            ));
-        }
-        // read footer length
-        let mut footer_size: [u8; 4] = [0; 4];
-        reader.seek(SeekFrom::End(-10))?;
-        reader.read_exact(&mut footer_size)?;
-        let footer_len = i32::from_le_bytes(footer_size);
-
-        // read footer
-        let mut footer_data = vec![0; footer_len as usize];
-        reader.seek(SeekFrom::End(-10 - footer_len as i64))?;
-        reader.read_exact(&mut footer_data)?;
-
-        let footer = ipc::root_as_footer(&footer_data[..]).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as footer: {:?}", err))
-        })?;
-
-        let blocks = footer.recordBatches().ok_or_else(|| {
-            ArrowError::IoError(
-                "Unable to get record batches from IPC Footer".to_string(),
-            )
-        })?;
-
-        let total_blocks = blocks.len();
-
-        let ipc_schema = footer.schema().unwrap();
-        let schema = ipc::convert::fb_to_schema(ipc_schema);
-
-        // Create an array of optional dictionary value arrays, one per field.
-        let mut dictionaries_by_field = vec![None; schema.fields().len()];
-        for block in footer.dictionaries().unwrap() {
-            // read length from end of offset
-            let mut message_size: [u8; 4] = [0; 4];
-            reader.seek(SeekFrom::Start(block.offset() as u64))?;
-            reader.read_exact(&mut message_size)?;
-            let footer_len = if message_size == CONTINUATION_MARKER {
-                reader.read_exact(&mut message_size)?;
-                i32::from_le_bytes(message_size)
-            } else {
-                i32::from_le_bytes(message_size)
-            };
-
-            let mut block_data = vec![0; footer_len as usize];
-
-            reader.read_exact(&mut block_data)?;
-
-            let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
-                ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
-            })?;
-
-            match message.header_type() {
-                ipc::MessageHeader::DictionaryBatch => {
-                    let batch = message.header_as_dictionary_batch().unwrap();
-
-                    // read the block that makes up the dictionary batch into a buffer
-                    let mut buf = vec![0; block.bodyLength() as usize];
-                    reader.seek(SeekFrom::Start(
-                        block.offset() as u64 + block.metaDataLength() as u64,
-                    ))?;
-                    reader.read_exact(&mut buf)?;
-
-                    read_dictionary(&buf, batch, &schema, &mut dictionaries_by_field)?;
-                }
-                t => {
-                    return Err(ArrowError::IoError(format!(
-                        "Expecting DictionaryBatch in dictionary blocks, found {:?}.",
-                        t
-                    )));
-                }
-            };
-        }
-
-        Ok(Self {
-            reader,
-            schema: Arc::new(schema),
-            blocks: blocks.to_vec(),
-            current_block: 0,
-            total_blocks,
-            dictionaries_by_field,
-            metadata_version: footer.version(),
-        })
-    }
-
-    /// Return the number of batches in the file
-    pub fn num_batches(&self) -> usize {
-        self.total_blocks
-    }
-
-    /// Return the schema of the file
-    pub fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Read a specific record batch
-    ///
-    /// Sets the current block to the index, allowing random reads
-    pub fn set_index(&mut self, index: usize) -> Result<()> {
-        if index >= self.total_blocks {
-            Err(ArrowError::IoError(format!(
-                "Cannot set batch to index {} from {} total batches",
-                index, self.total_blocks
-            )))
-        } else {
-            self.current_block = index;
-            Ok(())
-        }
-    }
-
-    fn maybe_next(&mut self) -> Result<Option<RecordBatch>> {
-        let block = self.blocks[self.current_block];
-        self.current_block += 1;
-
-        // read length
-        self.reader.seek(SeekFrom::Start(block.offset() as u64))?;
-        let mut meta_buf = [0; 4];
-        self.reader.read_exact(&mut meta_buf)?;
-        if meta_buf == CONTINUATION_MARKER {
-            // continuation marker encountered, read message next
-            self.reader.read_exact(&mut meta_buf)?;
-        }
-        let meta_len = i32::from_le_bytes(meta_buf);
-
-        let mut block_data = vec![0; meta_len as usize];
-        self.reader.read_exact(&mut block_data)?;
-
-        let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as footer: {:?}", err))
-        })?;
-
-        // some old test data's footer metadata is not set, so we account for that
-        if self.metadata_version != ipc::MetadataVersion::V1
-            && message.version() != self.metadata_version
-        {
-            return Err(ArrowError::IoError(
-                "Could not read IPC message as metadata versions mismatch".to_string(),
-            ));
-        }
-
-        match message.header_type() {
-            ipc::MessageHeader::Schema => Err(ArrowError::IoError(
-                "Not expecting a schema when messages are read".to_string(),
-            )),
-            ipc::MessageHeader::RecordBatch => {
-                let batch = message.header_as_record_batch().ok_or_else(|| {
-                    ArrowError::IoError(
-                        "Unable to read IPC message as record batch".to_string(),
-                    )
-                })?;
-                // read the block that makes up the record batch into a buffer
-                let mut buf = vec![0; block.bodyLength() as usize];
-                self.reader.seek(SeekFrom::Start(
-                    block.offset() as u64 + block.metaDataLength() as u64,
-                ))?;
-                self.reader.read_exact(&mut buf)?;
-
-                read_record_batch(
-                    &buf,
-                    batch,
-                    self.schema(),
-                    &self.dictionaries_by_field,
-                ).map(Some)
-            }
-            ipc::MessageHeader::NONE => {
-                Ok(None)
-            }
-            t => Err(ArrowError::IoError(format!(
-                "Reading types other than record batches not yet supported, unable to read {:?}", t
-            ))),
-        }
-    }
-}
-
-impl<R: Read + Seek> Iterator for FileReader<R> {
-    type Item = Result<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // get current block
-        if self.current_block < self.total_blocks {
-            self.maybe_next().transpose()
-        } else {
-            None
-        }
-    }
-}
-
-impl<R: Read + Seek> RecordBatchReader for FileReader<R> {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Arrow Stream reader
-pub struct StreamReader<R: Read> {
-    /// Buffered stream reader
-    reader: BufReader<R>,
-
-    /// The schema that is read from the stream's first message
-    schema: SchemaRef,
-
-    /// Optional dictionaries for each schema field.
-    ///
-    /// Dictionaries may be appended to in the streaming format.
-    dictionaries_by_field: Vec<Option<ArrayRef>>,
-
-    /// An indicator of whether the stream is complete.
-    ///
-    /// This value is set to `true` the first time the reader's `next()` returns `None`.
-    finished: bool,
-}
-
-impl<R: Read> StreamReader<R> {
-    /// Try to create a new stream reader
-    ///
-    /// The first message in the stream is the schema, the reader will fail if it does not
-    /// encounter a schema.
-    /// To check if the reader is done, use `is_finished(self)`
-    pub fn try_new(reader: R) -> Result<Self> {
-        let mut reader = BufReader::new(reader);
-        // determine metadata length
-        let mut meta_size: [u8; 4] = [0; 4];
-        reader.read_exact(&mut meta_size)?;
-        let meta_len = {
-            // If a continuation marker is encountered, skip over it and read
-            // the size from the next four bytes.
-            if meta_size == CONTINUATION_MARKER {
-                reader.read_exact(&mut meta_size)?;
-            }
-            i32::from_le_bytes(meta_size)
-        };
-
-        let mut meta_buffer = vec![0; meta_len as usize];
-        reader.read_exact(&mut meta_buffer)?;
-
-        let message = ipc::root_as_message(meta_buffer.as_slice()).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
-        })?;
-        // message header is a Schema, so read it
-        let ipc_schema: ipc::Schema = message.header_as_schema().ok_or_else(|| {
-            ArrowError::IoError("Unable to read IPC message as schema".to_string())
-        })?;
-        let schema = ipc::convert::fb_to_schema(ipc_schema);
-
-        // Create an array of optional dictionary value arrays, one per field.
-        let dictionaries_by_field = vec![None; schema.fields().len()];
-
-        Ok(Self {
-            reader,
-            schema: Arc::new(schema),
-            finished: false,
-            dictionaries_by_field,
-        })
-    }
-
-    /// Return the schema of the stream
-    pub fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Check if the stream is finished
-    pub fn is_finished(&self) -> bool {
-        self.finished
-    }
-
-    fn maybe_next(&mut self) -> Result<Option<RecordBatch>> {
-        if self.finished {
-            return Ok(None);
-        }
-        // determine metadata length
-        let mut meta_size: [u8; 4] = [0; 4];
-
-        match self.reader.read_exact(&mut meta_size) {
-            Ok(()) => (),
-            Err(e) => {
-                return if e.kind() == std::io::ErrorKind::UnexpectedEof {
-                    // Handle EOF without the "0xFFFFFFFF 0x00000000"
-                    // valid according to:
-                    // https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
-                    self.finished = true;
-                    Ok(None)
-                } else {
-                    Err(ArrowError::from(e))
-                };
-            }
-        }
-
-        let meta_len = {
-            // If a continuation marker is encountered, skip over it and read
-            // the size from the next four bytes.
-            if meta_size == CONTINUATION_MARKER {
-                self.reader.read_exact(&mut meta_size)?;
-            }
-            i32::from_le_bytes(meta_size)
-        };
-
-        if meta_len == 0 {
-            // the stream has ended, mark the reader as finished
-            self.finished = true;
-            return Ok(None);
-        }
-
-        let mut meta_buffer = vec![0; meta_len as usize];
-        self.reader.read_exact(&mut meta_buffer)?;
-
-        let vecs = &meta_buffer.to_vec();
-        let message = ipc::root_as_message(vecs).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
-        })?;
-
-        match message.header_type() {
-            ipc::MessageHeader::Schema => Err(ArrowError::IoError(
-                "Not expecting a schema when messages are read".to_string(),
-            )),
-            ipc::MessageHeader::RecordBatch => {
-                let batch = message.header_as_record_batch().ok_or_else(|| {
-                    ArrowError::IoError(
-                        "Unable to read IPC message as record batch".to_string(),
-                    )
-                })?;
-                // read the block that makes up the record batch into a buffer
-                let mut buf = vec![0; message.bodyLength() as usize];
-                self.reader.read_exact(&mut buf)?;
-
-                read_record_batch(&buf, batch, self.schema(), &self.dictionaries_by_field).map(Some)
-            }
-            ipc::MessageHeader::DictionaryBatch => {
-                let batch = message.header_as_dictionary_batch().ok_or_else(|| {
-                    ArrowError::IoError(
-                        "Unable to read IPC message as dictionary batch".to_string(),
-                    )
-                })?;
-                // read the block that makes up the dictionary batch into a buffer
-                let mut buf = vec![0; message.bodyLength() as usize];
-                self.reader.read_exact(&mut buf)?;
-
-                read_dictionary(
-                    &buf, batch, &self.schema, &mut self.dictionaries_by_field
-                )?;
-
-                // read the next message until we encounter a RecordBatch
-                self.maybe_next()
-            }
-            ipc::MessageHeader::NONE => {
-                Ok(None)
-            }
-            t => Err(ArrowError::IoError(
-                format!("Reading types other than record batches not yet supported, unable to read {:?} ", t)
-            )),
-        }
-    }
-}
-
-impl<R: Read> Iterator for StreamReader<R> {
-    type Item = Result<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.maybe_next().transpose()
-    }
-}
-
-impl<R: Read> RecordBatchReader for StreamReader<R> {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-
-    use flate2::read::GzDecoder;
-
-    use crate::util::integration_util::*;
-
-    #[test]
-    fn read_generated_files_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    #[should_panic(expected = "Big Endian is not supported for Decimal!")]
-    fn read_decimal_be_file_should_panic() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_decimal.arrow_file",
-                testdata
-            ))
-            .unwrap();
-        FileReader::try_new(file).unwrap();
-    }
-
-    #[test]
-    fn read_generated_be_files_should_work() {
-        // complementary to the previous test
-        let testdata = crate::util::test_util::arrow_test_data();
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/1.0.0-bigendian/{}.arrow_file",
-                testdata, path
-            ))
-            .unwrap();
-
-            FileReader::try_new(file).unwrap();
-        });
-    }
-
-    #[test]
-    fn read_generated_streams_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-            // the next batch must be empty
-            assert!(reader.next().is_none());
-            // the stream must indicate that it's finished
-            assert!(reader.is_finished());
-        });
-    }
-
-    #[test]
-    fn read_generated_files_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_generated_streams_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-            // the next batch must be empty
-            assert!(reader.next().is_none());
-            // the stream must indicate that it's finished
-            assert!(reader.is_finished());
-        });
-    }
-
-    #[test]
-    fn test_arrow_single_float_row() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Float32, false),
-            Field::new("b", DataType::Float32, false),
-            Field::new("c", DataType::Int32, false),
-            Field::new("d", DataType::Int32, false),
-        ]);
-        let arrays = vec![
-            Arc::new(Float32Array::from(vec![1.23])) as ArrayRef,
-            Arc::new(Float32Array::from(vec![-6.50])) as ArrayRef,
-            Arc::new(Int32Array::from(vec![2])) as ArrayRef,
-            Arc::new(Int32Array::from(vec![1])) as ArrayRef,
-        ];
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), arrays).unwrap();
-        // create stream writer
-        let file = File::create("target/debug/testdata/float.stream").unwrap();
-        let mut stream_writer =
-            crate::ipc::writer::StreamWriter::try_new(file, &schema).unwrap();
-        stream_writer.write(&batch).unwrap();
-        stream_writer.finish().unwrap();
-
-        // read stream back
-        let file = File::open("target/debug/testdata/float.stream").unwrap();
-        let reader = StreamReader::try_new(file).unwrap();
-
-        reader.for_each(|batch| {
-            let batch = batch.unwrap();
-            assert!(
-                batch
-                    .column(0)
-                    .as_any()
-                    .downcast_ref::<Float32Array>()
-                    .unwrap()
-                    .value(0)
-                    != 0.0
-            );
-            assert!(
-                batch
-                    .column(1)
-                    .as_any()
-                    .downcast_ref::<Float32Array>()
-                    .unwrap()
-                    .value(0)
-                    != 0.0
-            );
-        })
-    }
-
-    /// Read gzipped JSON file
-    fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let file = File::open(format!(
-            "{}/arrow-ipc-stream/integration/{}/{}.json.gz",
-            testdata, version, path
-        ))
-        .unwrap();
-        let mut gz = GzDecoder::new(&file);
-        let mut s = String::new();
-        gz.read_to_string(&mut s).unwrap();
-        // convert to Arrow JSON
-        let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
-        arrow_json
-    }
-}
diff --git a/rust/arrow/src/ipc/writer.rs b/rust/arrow/src/ipc/writer.rs
deleted file mode 100644
index a6df7b8a1eb..00000000000
--- a/rust/arrow/src/ipc/writer.rs
+++ /dev/null
@@ -1,1160 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Arrow IPC File and Stream Writers
-//!
-//! The `FileWriter` and `StreamWriter` have similar interfaces,
-//! however the `FileWriter` expects a reader that supports `Seek`ing
-
-use std::collections::HashMap;
-use std::io::{BufWriter, Write};
-
-use flatbuffers::FlatBufferBuilder;
-
-use crate::array::{ArrayData, ArrayRef};
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::ipc;
-use crate::record_batch::RecordBatch;
-use crate::util::bit_util;
-
-use ipc::CONTINUATION_MARKER;
-
-/// IPC write options used to control the behaviour of the writer
-#[derive(Debug)]
-pub struct IpcWriteOptions {
-    /// Write padding after memory buffers to this multiple of bytes.
-    /// Generally 8 or 64, defaults to 8
-    alignment: usize,
-    /// The legacy format is for releases before 0.15.0, and uses metadata V4
-    write_legacy_ipc_format: bool,
-    /// The metadata version to write. The Rust IPC writer supports V4+
-    ///
-    /// *Default versions per crate*
-    ///
-    /// When creating the default IpcWriteOptions, the following metadata versions are used:
-    ///
-    /// version 2.0.0: V4, with legacy format enabled
-    /// version 4.0.0: V5
-    metadata_version: ipc::MetadataVersion,
-}
-
-impl IpcWriteOptions {
-    /// Try create IpcWriteOptions, checking for incompatible settings
-    pub fn try_new(
-        alignment: usize,
-        write_legacy_ipc_format: bool,
-        metadata_version: ipc::MetadataVersion,
-    ) -> Result<Self> {
-        if alignment == 0 || alignment % 8 != 0 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Alignment should be greater than 0 and be a multiple of 8".to_string(),
-            ));
-        }
-        match metadata_version {
-            ipc::MetadataVersion::V1
-            | ipc::MetadataVersion::V2
-            | ipc::MetadataVersion::V3 => Err(ArrowError::InvalidArgumentError(
-                "Writing IPC metadata version 3 and lower not supported".to_string(),
-            )),
-            ipc::MetadataVersion::V4 => Ok(Self {
-                alignment,
-                write_legacy_ipc_format,
-                metadata_version,
-            }),
-            ipc::MetadataVersion::V5 => {
-                if write_legacy_ipc_format {
-                    Err(ArrowError::InvalidArgumentError(
-                        "Legacy IPC format only supported on metadata version 4"
-                            .to_string(),
-                    ))
-                } else {
-                    Ok(Self {
-                        alignment,
-                        write_legacy_ipc_format,
-                        metadata_version,
-                    })
-                }
-            }
-            z => panic!("Unsupported ipc::MetadataVersion {:?}", z),
-        }
-    }
-}
-
-impl Default for IpcWriteOptions {
-    fn default() -> Self {
-        Self {
-            alignment: 8,
-            write_legacy_ipc_format: false,
-            metadata_version: ipc::MetadataVersion::V5,
-        }
-    }
-}
-
-#[derive(Debug, Default)]
-pub struct IpcDataGenerator {}
-
-impl IpcDataGenerator {
-    pub fn schema_to_bytes(
-        &self,
-        schema: &Schema,
-        write_options: &IpcWriteOptions,
-    ) -> EncodedData {
-        let mut fbb = FlatBufferBuilder::new();
-        let schema = {
-            let fb = ipc::convert::schema_to_fb_offset(&mut fbb, schema);
-            fb.as_union_value()
-        };
-
-        let mut message = ipc::MessageBuilder::new(&mut fbb);
-        message.add_version(write_options.metadata_version);
-        message.add_header_type(ipc::MessageHeader::Schema);
-        message.add_bodyLength(0);
-        message.add_header(schema);
-        // TODO: custom metadata
-        let data = message.finish();
-        fbb.finish(data, None);
-
-        let data = fbb.finished_data();
-        EncodedData {
-            ipc_message: data.to_vec(),
-            arrow_data: vec![],
-        }
-    }
-
-    pub fn encoded_batch(
-        &self,
-        batch: &RecordBatch,
-        dictionary_tracker: &mut DictionaryTracker,
-        write_options: &IpcWriteOptions,
-    ) -> Result<(Vec<EncodedData>, EncodedData)> {
-        // TODO: handle nested dictionaries
-        let schema = batch.schema();
-        let mut encoded_dictionaries = Vec::with_capacity(schema.fields().len());
-
-        for (i, field) in schema.fields().iter().enumerate() {
-            let column = batch.column(i);
-
-            if let DataType::Dictionary(_key_type, _value_type) = column.data_type() {
-                let dict_id = field
-                    .dict_id()
-                    .expect("All Dictionary types have `dict_id`");
-                let dict_data = column.data();
-                let dict_values = &dict_data.child_data()[0];
-
-                let emit = dictionary_tracker.insert(dict_id, column)?;
-
-                if emit {
-                    encoded_dictionaries.push(self.dictionary_batch_to_bytes(
-                        dict_id,
-                        dict_values,
-                        write_options,
-                    ));
-                }
-            }
-        }
-
-        let encoded_message = self.record_batch_to_bytes(batch, write_options);
-
-        Ok((encoded_dictionaries, encoded_message))
-    }
-
-    /// Write a `RecordBatch` into two sets of bytes, one for the header (ipc::Message) and the
-    /// other for the batch's data
-    fn record_batch_to_bytes(
-        &self,
-        batch: &RecordBatch,
-        write_options: &IpcWriteOptions,
-    ) -> EncodedData {
-        let mut fbb = FlatBufferBuilder::new();
-
-        let mut nodes: Vec<ipc::FieldNode> = vec![];
-        let mut buffers: Vec<ipc::Buffer> = vec![];
-        let mut arrow_data: Vec<u8> = vec![];
-        let mut offset = 0;
-        for array in batch.columns() {
-            let array_data = array.data();
-            offset = write_array_data(
-                &array_data,
-                &mut buffers,
-                &mut arrow_data,
-                &mut nodes,
-                offset,
-                array.len(),
-                array.null_count(),
-            );
-        }
-
-        // write data
-        let buffers = fbb.create_vector(&buffers);
-        let nodes = fbb.create_vector(&nodes);
-
-        let root = {
-            let mut batch_builder = ipc::RecordBatchBuilder::new(&mut fbb);
-            batch_builder.add_length(batch.num_rows() as i64);
-            batch_builder.add_nodes(nodes);
-            batch_builder.add_buffers(buffers);
-            let b = batch_builder.finish();
-            b.as_union_value()
-        };
-        // create an ipc::Message
-        let mut message = ipc::MessageBuilder::new(&mut fbb);
-        message.add_version(write_options.metadata_version);
-        message.add_header_type(ipc::MessageHeader::RecordBatch);
-        message.add_bodyLength(arrow_data.len() as i64);
-        message.add_header(root);
-        let root = message.finish();
-        fbb.finish(root, None);
-        let finished_data = fbb.finished_data();
-
-        EncodedData {
-            ipc_message: finished_data.to_vec(),
-            arrow_data,
-        }
-    }
-
-    /// Write dictionary values into two sets of bytes, one for the header (ipc::Message) and the
-    /// other for the data
-    fn dictionary_batch_to_bytes(
-        &self,
-        dict_id: i64,
-        array_data: &ArrayData,
-        write_options: &IpcWriteOptions,
-    ) -> EncodedData {
-        let mut fbb = FlatBufferBuilder::new();
-
-        let mut nodes: Vec<ipc::FieldNode> = vec![];
-        let mut buffers: Vec<ipc::Buffer> = vec![];
-        let mut arrow_data: Vec<u8> = vec![];
-
-        write_array_data(
-            &array_data,
-            &mut buffers,
-            &mut arrow_data,
-            &mut nodes,
-            0,
-            array_data.len(),
-            array_data.null_count(),
-        );
-
-        // write data
-        let buffers = fbb.create_vector(&buffers);
-        let nodes = fbb.create_vector(&nodes);
-
-        let root = {
-            let mut batch_builder = ipc::RecordBatchBuilder::new(&mut fbb);
-            batch_builder.add_length(array_data.len() as i64);
-            batch_builder.add_nodes(nodes);
-            batch_builder.add_buffers(buffers);
-            batch_builder.finish()
-        };
-
-        let root = {
-            let mut batch_builder = ipc::DictionaryBatchBuilder::new(&mut fbb);
-            batch_builder.add_id(dict_id);
-            batch_builder.add_data(root);
-            batch_builder.finish().as_union_value()
-        };
-
-        let root = {
-            let mut message_builder = ipc::MessageBuilder::new(&mut fbb);
-            message_builder.add_version(write_options.metadata_version);
-            message_builder.add_header_type(ipc::MessageHeader::DictionaryBatch);
-            message_builder.add_bodyLength(arrow_data.len() as i64);
-            message_builder.add_header(root);
-            message_builder.finish()
-        };
-
-        fbb.finish(root, None);
-        let finished_data = fbb.finished_data();
-
-        EncodedData {
-            ipc_message: finished_data.to_vec(),
-            arrow_data,
-        }
-    }
-}
-
-/// Keeps track of dictionaries that have been written, to avoid emitting the same dictionary
-/// multiple times. Can optionally error if an update to an existing dictionary is attempted, which
-/// isn't allowed in the `FileWriter`.
-pub struct DictionaryTracker {
-    written: HashMap<i64, ArrayRef>,
-    error_on_replacement: bool,
-}
-
-impl DictionaryTracker {
-    pub fn new(error_on_replacement: bool) -> Self {
-        Self {
-            written: HashMap::new(),
-            error_on_replacement,
-        }
-    }
-
-    /// Keep track of the dictionary with the given ID and values. Behavior:
-    ///
-    /// * If this ID has been written already and has the same data, return `Ok(false)` to indicate
-    ///   that the dictionary was not actually inserted (because it's already been seen).
-    /// * If this ID has been written already but with different data, and this tracker is
-    ///   configured to return an error, return an error.
-    /// * If the tracker has not been configured to error on replacement or this dictionary
-    ///   has never been seen before, return `Ok(true)` to indicate that the dictionary was just
-    ///   inserted.
-    pub fn insert(&mut self, dict_id: i64, column: &ArrayRef) -> Result<bool> {
-        let dict_data = column.data();
-        let dict_values = &dict_data.child_data()[0];
-
-        // If a dictionary with this id was already emitted, check if it was the same.
-        if let Some(last) = self.written.get(&dict_id) {
-            if last.data().child_data()[0] == *dict_values {
-                // Same dictionary values => no need to emit it again
-                return Ok(false);
-            } else if self.error_on_replacement {
-                return Err(ArrowError::InvalidArgumentError(
-                    "Dictionary replacement detected when writing IPC file format. \
-                     Arrow IPC files only support a single dictionary for a given field \
-                     across all batches."
-                        .to_string(),
-                ));
-            }
-        }
-
-        self.written.insert(dict_id, column.clone());
-        Ok(true)
-    }
-}
-
-pub struct FileWriter<W: Write> {
-    /// The object to write to
-    writer: BufWriter<W>,
-    /// IPC write options
-    write_options: IpcWriteOptions,
-    /// A reference to the schema, used in validating record batches
-    schema: Schema,
-    /// The number of bytes between each block of bytes, as an offset for random access
-    block_offsets: usize,
-    /// Dictionary blocks that will be written as part of the IPC footer
-    dictionary_blocks: Vec<ipc::Block>,
-    /// Record blocks that will be written as part of the IPC footer
-    record_blocks: Vec<ipc::Block>,
-    /// Whether the writer footer has been written, and the writer is finished
-    finished: bool,
-    /// Keeps track of dictionaries that have been written
-    dictionary_tracker: DictionaryTracker,
-
-    data_gen: IpcDataGenerator,
-}
-
-impl<W: Write> FileWriter<W> {
-    /// Try create a new writer, with the schema written as part of the header
-    pub fn try_new(writer: W, schema: &Schema) -> Result<Self> {
-        let write_options = IpcWriteOptions::default();
-        Self::try_new_with_options(writer, schema, write_options)
-    }
-
-    /// Try create a new writer with IpcWriteOptions
-    pub fn try_new_with_options(
-        writer: W,
-        schema: &Schema,
-        write_options: IpcWriteOptions,
-    ) -> Result<Self> {
-        let data_gen = IpcDataGenerator::default();
-        let mut writer = BufWriter::new(writer);
-        // write magic to header
-        writer.write_all(&super::ARROW_MAGIC[..])?;
-        // create an 8-byte boundary after the header
-        writer.write_all(&[0, 0])?;
-        // write the schema, set the written bytes to the schema + header
-        let encoded_message = data_gen.schema_to_bytes(schema, &write_options);
-        let (meta, data) = write_message(&mut writer, encoded_message, &write_options)?;
-        Ok(Self {
-            writer,
-            write_options,
-            schema: schema.clone(),
-            block_offsets: meta + data + 8,
-            dictionary_blocks: vec![],
-            record_blocks: vec![],
-            finished: false,
-            dictionary_tracker: DictionaryTracker::new(true),
-            data_gen,
-        })
-    }
-
-    /// Write a record batch to the file
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write record batch to file writer as it is closed".to_string(),
-            ));
-        }
-
-        let (encoded_dictionaries, encoded_message) = self.data_gen.encoded_batch(
-            batch,
-            &mut self.dictionary_tracker,
-            &self.write_options,
-        )?;
-
-        for encoded_dictionary in encoded_dictionaries {
-            let (meta, data) =
-                write_message(&mut self.writer, encoded_dictionary, &self.write_options)?;
-
-            let block =
-                ipc::Block::new(self.block_offsets as i64, meta as i32, data as i64);
-            self.dictionary_blocks.push(block);
-            self.block_offsets += meta + data;
-        }
-
-        let (meta, data) =
-            write_message(&mut self.writer, encoded_message, &self.write_options)?;
-        // add a record block for the footer
-        let block = ipc::Block::new(
-            self.block_offsets as i64,
-            meta as i32, // TODO: is this still applicable?
-            data as i64,
-        );
-        self.record_blocks.push(block);
-        self.block_offsets += meta + data;
-        Ok(())
-    }
-
-    /// Write footer and closing tag, then mark the writer as done
-    pub fn finish(&mut self) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write footer to file writer as it is closed".to_string(),
-            ));
-        }
-
-        // write EOS
-        write_continuation(&mut self.writer, &self.write_options, 0)?;
-
-        let mut fbb = FlatBufferBuilder::new();
-        let dictionaries = fbb.create_vector(&self.dictionary_blocks);
-        let record_batches = fbb.create_vector(&self.record_blocks);
-        let schema = ipc::convert::schema_to_fb_offset(&mut fbb, &self.schema);
-
-        let root = {
-            let mut footer_builder = ipc::FooterBuilder::new(&mut fbb);
-            footer_builder.add_version(self.write_options.metadata_version);
-            footer_builder.add_schema(schema);
-            footer_builder.add_dictionaries(dictionaries);
-            footer_builder.add_recordBatches(record_batches);
-            footer_builder.finish()
-        };
-        fbb.finish(root, None);
-        let footer_data = fbb.finished_data();
-        self.writer.write_all(footer_data)?;
-        self.writer
-            .write_all(&(footer_data.len() as i32).to_le_bytes())?;
-        self.writer.write_all(&super::ARROW_MAGIC)?;
-        self.writer.flush()?;
-        self.finished = true;
-
-        Ok(())
-    }
-}
-
-pub struct StreamWriter<W: Write> {
-    /// The object to write to
-    writer: BufWriter<W>,
-    /// IPC write options
-    write_options: IpcWriteOptions,
-    /// A reference to the schema, used in validating record batches
-    schema: Schema,
-    /// Whether the writer footer has been written, and the writer is finished
-    finished: bool,
-    /// Keeps track of dictionaries that have been written
-    dictionary_tracker: DictionaryTracker,
-
-    data_gen: IpcDataGenerator,
-}
-
-impl<W: Write> StreamWriter<W> {
-    /// Try create a new writer, with the schema written as part of the header
-    pub fn try_new(writer: W, schema: &Schema) -> Result<Self> {
-        let write_options = IpcWriteOptions::default();
-        Self::try_new_with_options(writer, schema, write_options)
-    }
-
-    pub fn try_new_with_options(
-        writer: W,
-        schema: &Schema,
-        write_options: IpcWriteOptions,
-    ) -> Result<Self> {
-        let data_gen = IpcDataGenerator::default();
-        let mut writer = BufWriter::new(writer);
-        // write the schema, set the written bytes to the schema
-        let encoded_message = data_gen.schema_to_bytes(schema, &write_options);
-        write_message(&mut writer, encoded_message, &write_options)?;
-        Ok(Self {
-            writer,
-            write_options,
-            schema: schema.clone(),
-            finished: false,
-            dictionary_tracker: DictionaryTracker::new(false),
-            data_gen,
-        })
-    }
-
-    /// Write a record batch to the stream
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write record batch to stream writer as it is closed".to_string(),
-            ));
-        }
-
-        let (encoded_dictionaries, encoded_message) = self
-            .data_gen
-            .encoded_batch(batch, &mut self.dictionary_tracker, &self.write_options)
-            .expect("StreamWriter is configured to not error on dictionary replacement");
-
-        for encoded_dictionary in encoded_dictionaries {
-            write_message(&mut self.writer, encoded_dictionary, &self.write_options)?;
-        }
-
-        write_message(&mut self.writer, encoded_message, &self.write_options)?;
-        Ok(())
-    }
-
-    /// Write continuation bytes, and mark the stream as done
-    pub fn finish(&mut self) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write footer to stream writer as it is closed".to_string(),
-            ));
-        }
-
-        write_continuation(&mut self.writer, &self.write_options, 0)?;
-
-        self.finished = true;
-
-        Ok(())
-    }
-
-    /// Unwraps the BufWriter housed in StreamWriter.writer, returning the underlying
-    /// writer
-    ///
-    /// The buffer is flushed and the StreamWriter is finished before returning the
-    /// writer.
-    ///
-    /// # Errors
-    ///
-    /// An ['Err'] may be returned if an error occurs while finishing the StreamWriter
-    /// or while flushing the buffer.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # use arrow::datatypes::Schema;
-    /// # use arrow::ipc::writer::StreamWriter;
-    /// # use arrow::error::ArrowError;
-    /// # fn main() -> Result<(), ArrowError> {
-    /// // The result we expect from an empty schema
-    /// let expected = vec![
-    ///     255, 255, 255, 255,  64,   0,   0,   0,
-    ///      16,   0,   0,   0,   0,   0,  10,   0,
-    ///      14,   0,  12,   0,  11,   0,   4,   0,
-    ///      10,   0,   0,   0,  20,   0,   0,   0,
-    ///       0,   0,   0,   1,   4,   0,  10,   0,
-    ///      12,   0,   0,   0,   8,   0,   4,   0,
-    ///      10,   0,   0,   0,   8,   0,   0,   0,
-    ///       8,   0,   0,   0,   0,   0,   0,   0,
-    ///       0,   0,   0,   0,   0,   0,   0,   0,
-    ///     255, 255, 255, 255,   0,   0,   0,   0
-    /// ];
-    ///
-    /// let schema = Schema::new(vec![]);
-    /// let buffer: Vec<u8> = Vec::new();
-    /// let stream_writer = StreamWriter::try_new(buffer, &schema)?;
-    ///
-    /// assert_eq!(stream_writer.into_inner()?, expected);
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn into_inner(mut self) -> Result<W> {
-        if !self.finished {
-            self.finish()?;
-        }
-        self.writer.into_inner().map_err(ArrowError::from)
-    }
-}
-
-/// Stores the encoded data, which is an ipc::Message, and optional Arrow data
-pub struct EncodedData {
-    /// An encoded ipc::Message
-    pub ipc_message: Vec<u8>,
-    /// Arrow buffers to be written, should be an empty vec for schema messages
-    pub arrow_data: Vec<u8>,
-}
-/// Write a message's IPC data and buffers, returning metadata and buffer data lengths written
-pub fn write_message<W: Write>(
-    mut writer: W,
-    encoded: EncodedData,
-    write_options: &IpcWriteOptions,
-) -> Result<(usize, usize)> {
-    let arrow_data_len = encoded.arrow_data.len();
-    if arrow_data_len % 8 != 0 {
-        return Err(ArrowError::MemoryError(
-            "Arrow data not aligned".to_string(),
-        ));
-    }
-
-    let a = write_options.alignment - 1;
-    let buffer = encoded.ipc_message;
-    let flatbuf_size = buffer.len();
-    let prefix_size = if write_options.write_legacy_ipc_format {
-        4
-    } else {
-        8
-    };
-    let aligned_size = (flatbuf_size + prefix_size + a) & !a;
-    let padding_bytes = aligned_size - flatbuf_size - prefix_size;
-
-    write_continuation(
-        &mut writer,
-        &write_options,
-        (aligned_size - prefix_size) as i32,
-    )?;
-
-    // write the flatbuf
-    if flatbuf_size > 0 {
-        writer.write_all(&buffer)?;
-    }
-    // write padding
-    writer.write_all(&vec![0; padding_bytes])?;
-
-    // write arrow data
-    let body_len = if arrow_data_len > 0 {
-        write_body_buffers(&mut writer, &encoded.arrow_data)?
-    } else {
-        0
-    };
-
-    Ok((aligned_size, body_len))
-}
-
-fn write_body_buffers<W: Write>(mut writer: W, data: &[u8]) -> Result<usize> {
-    let len = data.len() as u32;
-    let pad_len = pad_to_8(len) as u32;
-    let total_len = len + pad_len;
-
-    // write body buffer
-    writer.write_all(data)?;
-    if pad_len > 0 {
-        writer.write_all(&vec![0u8; pad_len as usize][..])?;
-    }
-
-    writer.flush()?;
-    Ok(total_len as usize)
-}
-
-/// Write a record batch to the writer, writing the message size before the message
-/// if the record batch is being written to a stream
-fn write_continuation<W: Write>(
-    mut writer: W,
-    write_options: &IpcWriteOptions,
-    total_len: i32,
-) -> Result<usize> {
-    let mut written = 8;
-
-    // the version of the writer determines whether continuation markers should be added
-    match write_options.metadata_version {
-        ipc::MetadataVersion::V1
-        | ipc::MetadataVersion::V2
-        | ipc::MetadataVersion::V3 => {
-            unreachable!("Options with the metadata version cannot be created")
-        }
-        ipc::MetadataVersion::V4 => {
-            if !write_options.write_legacy_ipc_format {
-                // v0.15.0 format
-                writer.write_all(&CONTINUATION_MARKER)?;
-                written = 4;
-            }
-            writer.write_all(&total_len.to_le_bytes()[..])?;
-        }
-        ipc::MetadataVersion::V5 => {
-            // write continuation marker and message length
-            writer.write_all(&CONTINUATION_MARKER)?;
-            writer.write_all(&total_len.to_le_bytes()[..])?;
-        }
-        z => panic!("Unsupported ipc::MetadataVersion {:?}", z),
-    };
-
-    writer.flush()?;
-
-    Ok(written)
-}
-
-/// Write array data to a vector of bytes
-fn write_array_data(
-    array_data: &ArrayData,
-    mut buffers: &mut Vec<ipc::Buffer>,
-    mut arrow_data: &mut Vec<u8>,
-    mut nodes: &mut Vec<ipc::FieldNode>,
-    offset: i64,
-    num_rows: usize,
-    null_count: usize,
-) -> i64 {
-    let mut offset = offset;
-    nodes.push(ipc::FieldNode::new(num_rows as i64, null_count as i64));
-    // NullArray does not have any buffers, thus the null buffer is not generated
-    if array_data.data_type() != &DataType::Null {
-        // write null buffer if exists
-        let null_buffer = match array_data.null_buffer() {
-            None => {
-                // create a buffer and fill it with valid bits
-                let num_bytes = bit_util::ceil(num_rows, 8);
-                let buffer = MutableBuffer::new(num_bytes);
-                let buffer = buffer.with_bitset(num_bytes, true);
-                buffer.into()
-            }
-            Some(buffer) => buffer.clone(),
-        };
-
-        offset = write_buffer(&null_buffer, &mut buffers, &mut arrow_data, offset);
-    }
-
-    array_data.buffers().iter().for_each(|buffer| {
-        offset = write_buffer(buffer, &mut buffers, &mut arrow_data, offset);
-    });
-
-    if !matches!(array_data.data_type(), DataType::Dictionary(_, _)) {
-        // recursively write out nested structures
-        array_data.child_data().iter().for_each(|data_ref| {
-            // write the nested data (e.g list data)
-            offset = write_array_data(
-                data_ref,
-                &mut buffers,
-                &mut arrow_data,
-                &mut nodes,
-                offset,
-                data_ref.len(),
-                data_ref.null_count(),
-            );
-        });
-    }
-
-    offset
-}
-
-/// Write a buffer to a vector of bytes, and add its ipc::Buffer to a vector
-fn write_buffer(
-    buffer: &Buffer,
-    buffers: &mut Vec<ipc::Buffer>,
-    arrow_data: &mut Vec<u8>,
-    offset: i64,
-) -> i64 {
-    let len = buffer.len();
-    let pad_len = pad_to_8(len as u32);
-    let total_len: i64 = (len + pad_len) as i64;
-    // assert_eq!(len % 8, 0, "Buffer width not a multiple of 8 bytes");
-    buffers.push(ipc::Buffer::new(offset, total_len));
-    arrow_data.extend_from_slice(buffer.as_slice());
-    arrow_data.extend_from_slice(&vec![0u8; pad_len][..]);
-    offset + total_len
-}
-
-/// Calculate an 8-byte boundary and return the number of bytes needed to pad to 8 bytes
-#[inline]
-fn pad_to_8(len: u32) -> usize {
-    (((len + 7) & !7) - len) as usize
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-    use std::io::Read;
-    use std::sync::Arc;
-
-    use flate2::read::GzDecoder;
-    use ipc::MetadataVersion;
-
-    use crate::array::*;
-    use crate::datatypes::Field;
-    use crate::ipc::reader::*;
-    use crate::util::integration_util::*;
-
-    #[test]
-    fn test_write_file() {
-        let schema = Schema::new(vec![Field::new("field1", DataType::UInt32, false)]);
-        let values: Vec<Option<u32>> = vec![
-            Some(999),
-            None,
-            Some(235),
-            Some(123),
-            None,
-            None,
-            None,
-            None,
-            None,
-        ];
-        let array1 = UInt32Array::from(values);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(array1) as ArrayRef],
-        )
-        .unwrap();
-        {
-            let file = File::create("target/debug/testdata/arrow.arrow_file").unwrap();
-            let mut writer = FileWriter::try_new(file, &schema).unwrap();
-
-            writer.write(&batch).unwrap();
-            writer.finish().unwrap();
-        }
-
-        {
-            let file =
-                File::open(format!("target/debug/testdata/{}.arrow_file", "arrow"))
-                    .unwrap();
-            let mut reader = FileReader::try_new(file).unwrap();
-            while let Some(Ok(read_batch)) = reader.next() {
-                read_batch
-                    .columns()
-                    .iter()
-                    .zip(batch.columns())
-                    .for_each(|(a, b)| {
-                        assert_eq!(a.data_type(), b.data_type());
-                        assert_eq!(a.len(), b.len());
-                        assert_eq!(a.null_count(), b.null_count());
-                    });
-            }
-        }
-    }
-
-    fn write_null_file(options: IpcWriteOptions, suffix: &str) {
-        let schema = Schema::new(vec![
-            Field::new("nulls", DataType::Null, true),
-            Field::new("int32s", DataType::Int32, false),
-            Field::new("nulls2", DataType::Null, false),
-            Field::new("f64s", DataType::Float64, false),
-        ]);
-        let array1 = NullArray::new(32);
-        let array2 = Int32Array::from(vec![1; 32]);
-        let array3 = NullArray::new(32);
-        let array4 = Float64Array::from(vec![std::f64::NAN; 32]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(array1) as ArrayRef,
-                Arc::new(array2) as ArrayRef,
-                Arc::new(array3) as ArrayRef,
-                Arc::new(array4) as ArrayRef,
-            ],
-        )
-        .unwrap();
-        let file_name = format!("target/debug/testdata/nulls_{}.arrow_file", suffix);
-        {
-            let file = File::create(&file_name).unwrap();
-            let mut writer =
-                FileWriter::try_new_with_options(file, &schema, options).unwrap();
-
-            writer.write(&batch).unwrap();
-            writer.finish().unwrap();
-        }
-
-        {
-            let file = File::open(&file_name).unwrap();
-            let reader = FileReader::try_new(file).unwrap();
-            reader.for_each(|maybe_batch| {
-                maybe_batch
-                    .unwrap()
-                    .columns()
-                    .iter()
-                    .zip(batch.columns())
-                    .for_each(|(a, b)| {
-                        assert_eq!(a.data_type(), b.data_type());
-                        assert_eq!(a.len(), b.len());
-                        assert_eq!(a.null_count(), b.null_count());
-                    });
-            });
-        }
-    }
-    #[test]
-    fn test_write_null_file_v4() {
-        write_null_file(
-            IpcWriteOptions::try_new(8, false, MetadataVersion::V4).unwrap(),
-            "v4_a8",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(8, true, MetadataVersion::V4).unwrap(),
-            "v4_a8l",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(64, false, MetadataVersion::V4).unwrap(),
-            "v4_a64",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(64, true, MetadataVersion::V4).unwrap(),
-            "v4_a64l",
-        );
-    }
-
-    #[test]
-    fn test_write_null_file_v5() {
-        write_null_file(
-            IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap(),
-            "v5_a8",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(64, false, MetadataVersion::V5).unwrap(),
-            "v5_a64",
-        );
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_files_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read and rewrite the file to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.arrow_file",
-                    version, path
-                ))
-                .unwrap();
-                let mut writer = FileWriter::try_new(file, &reader.schema()).unwrap();
-                while let Some(Ok(batch)) = reader.next() {
-                    writer.write(&batch).unwrap();
-                }
-                writer.finish().unwrap();
-            }
-
-            let file = File::open(format!(
-                "target/debug/testdata/{}-{}.arrow_file",
-                version, path
-            ))
-            .unwrap();
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_streams_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let reader = StreamReader::try_new(file).unwrap();
-
-            // read and rewrite the stream to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.stream",
-                    version, path
-                ))
-                .unwrap();
-                let mut writer = StreamWriter::try_new(file, &reader.schema()).unwrap();
-                reader.for_each(|batch| {
-                    writer.write(&batch.unwrap()).unwrap();
-                });
-                writer.finish().unwrap();
-            }
-
-            let file =
-                File::open(format!("target/debug/testdata/{}-{}.stream", version, path))
-                    .unwrap();
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_files_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_custom_metadata",
-            "generated_datetime",
-            "generated_dictionary_unsigned",
-            "generated_dictionary",
-            // "generated_duplicate_fieldnames",
-            "generated_interval",
-            "generated_nested",
-            // "generated_nested_large_offsets",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_large_offsets",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            // "generated_recursive_nested",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read and rewrite the file to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.arrow_file",
-                    version, path
-                ))
-                .unwrap();
-                // write IPC version 5
-                let options =
-                    IpcWriteOptions::try_new(8, false, ipc::MetadataVersion::V5).unwrap();
-                let mut writer =
-                    FileWriter::try_new_with_options(file, &reader.schema(), options)
-                        .unwrap();
-                while let Some(Ok(batch)) = reader.next() {
-                    writer.write(&batch).unwrap();
-                }
-                writer.finish().unwrap();
-            }
-
-            let file = File::open(format!(
-                "target/debug/testdata/{}-{}.arrow_file",
-                version, path
-            ))
-            .unwrap();
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_streams_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_custom_metadata",
-            "generated_datetime",
-            "generated_dictionary_unsigned",
-            "generated_dictionary",
-            // "generated_duplicate_fieldnames",
-            "generated_interval",
-            "generated_nested",
-            // "generated_nested_large_offsets",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_large_offsets",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            // "generated_recursive_nested",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let reader = StreamReader::try_new(file).unwrap();
-
-            // read and rewrite the stream to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.stream",
-                    version, path
-                ))
-                .unwrap();
-                let options =
-                    IpcWriteOptions::try_new(8, false, ipc::MetadataVersion::V5).unwrap();
-                let mut writer =
-                    StreamWriter::try_new_with_options(file, &reader.schema(), options)
-                        .unwrap();
-                reader.for_each(|batch| {
-                    writer.write(&batch.unwrap()).unwrap();
-                });
-                writer.finish().unwrap();
-            }
-
-            let file =
-                File::open(format!("target/debug/testdata/{}-{}.stream", version, path))
-                    .unwrap();
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    /// Read gzipped JSON file
-    fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let file = File::open(format!(
-            "{}/arrow-ipc-stream/integration/{}/{}.json.gz",
-            testdata, version, path
-        ))
-        .unwrap();
-        let mut gz = GzDecoder::new(&file);
-        let mut s = String::new();
-        gz.read_to_string(&mut s).unwrap();
-        // convert to Arrow JSON
-        let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
-        arrow_json
-    }
-}
diff --git a/rust/arrow/src/json/mod.rs b/rust/arrow/src/json/mod.rs
deleted file mode 100644
index 6b3df188a47..00000000000
--- a/rust/arrow/src/json/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Transfer data between the Arrow memory format and JSON
-//! line-delimited records. See the module level documentation for the
-//! [`reader`] and [`writer`] for usage examples.
-
-pub mod reader;
-pub mod writer;
-
-pub use self::reader::Reader;
-pub use self::reader::ReaderBuilder;
-pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer};
diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs
deleted file mode 100644
index 31c496c9293..00000000000
--- a/rust/arrow/src/json/reader.rs
+++ /dev/null
@@ -1,2949 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! # JSON Reader
-//!
-//! This JSON reader allows JSON line-delimited files to be read into the Arrow memory
-//! model. Records are loaded in batches and are then converted from row-based data to
-//! columnar data.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use std::fs::File;
-//! use std::io::BufReader;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("a", DataType::Float64, false),
-//!     Field::new("b", DataType::Float64, false),
-//!     Field::new("c", DataType::Float64, false),
-//! ]);
-//!
-//! let file = File::open("test/data/basic.json").unwrap();
-//!
-//! let mut json = json::Reader::new(BufReader::new(file), Arc::new(schema), 1024, None);
-//! let batch = json.next().unwrap().unwrap();
-//! ```
-
-use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
-use std::iter::FromIterator;
-use std::sync::Arc;
-
-use indexmap::map::IndexMap as HashMap;
-use indexmap::set::IndexSet as HashSet;
-use serde_json::{map::Map as JsonMap, Value};
-
-use crate::buffer::MutableBuffer;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::record_batch::RecordBatch;
-use crate::util::bit_util;
-use crate::{array::*, buffer::Buffer};
-
-#[derive(Debug, Clone)]
-enum InferredType {
-    Scalar(HashSet<DataType>),
-    Array(Box<InferredType>),
-    Object(HashMap<String, InferredType>),
-    Any,
-}
-
-impl InferredType {
-    fn merge(&mut self, other: InferredType) -> Result<()> {
-        match (self, other) {
-            (InferredType::Array(s), InferredType::Array(o)) => {
-                s.merge(*o)?;
-            }
-            (InferredType::Scalar(self_hs), InferredType::Scalar(other_hs)) => {
-                other_hs.into_iter().for_each(|v| {
-                    self_hs.insert(v);
-                });
-            }
-            (InferredType::Object(self_map), InferredType::Object(other_map)) => {
-                for (k, v) in other_map {
-                    self_map.entry(k).or_insert(InferredType::Any).merge(v)?;
-                }
-            }
-            (s @ InferredType::Any, v) => {
-                *s = v;
-            }
-            (_, InferredType::Any) => {}
-            // convert a scalar type to a single-item scalar array type.
-            (
-                InferredType::Array(self_inner_type),
-                other_scalar @ InferredType::Scalar(_),
-            ) => {
-                self_inner_type.merge(other_scalar)?;
-            }
-            (s @ InferredType::Scalar(_), InferredType::Array(mut other_inner_type)) => {
-                other_inner_type.merge(s.clone())?;
-                *s = InferredType::Array(other_inner_type);
-            }
-            // incompatible types
-            (s, o) => {
-                return Err(ArrowError::JsonError(format!(
-                    "Incompatible type found during schema inference: {:?} v.s. {:?}",
-                    s, o,
-                )));
-            }
-        }
-
-        Ok(())
-    }
-}
-
-/// Coerce data type during inference
-///
-/// * `Int64` and `Float64` should be `Float64`
-/// * Lists and scalars are coerced to a list of a compatible scalar
-/// * All other types are coerced to `Utf8`
-fn coerce_data_type(dt: Vec<&DataType>) -> DataType {
-    let mut dt_iter = dt.into_iter().cloned();
-    let dt_init = dt_iter.next().unwrap_or(DataType::Utf8);
-
-    dt_iter.fold(dt_init, |l, r| match (l, r) {
-        (DataType::Boolean, DataType::Boolean) => DataType::Boolean,
-        (DataType::Int64, DataType::Int64) => DataType::Int64,
-        (DataType::Float64, DataType::Float64)
-        | (DataType::Float64, DataType::Int64)
-        | (DataType::Int64, DataType::Float64) => DataType::Float64,
-        (DataType::List(l), DataType::List(r)) => DataType::List(Box::new(Field::new(
-            "item",
-            coerce_data_type(vec![l.data_type(), r.data_type()]),
-            true,
-        ))),
-        // coerce scalar and scalar array into scalar array
-        (DataType::List(e), not_list) | (not_list, DataType::List(e)) => {
-            DataType::List(Box::new(Field::new(
-                "item",
-                coerce_data_type(vec![e.data_type(), &not_list]),
-                true,
-            )))
-        }
-        _ => DataType::Utf8,
-    })
-}
-
-fn generate_datatype(t: &InferredType) -> Result<DataType> {
-    Ok(match t {
-        InferredType::Scalar(hs) => coerce_data_type(hs.iter().collect()),
-        InferredType::Object(spec) => DataType::Struct(generate_fields(spec)?),
-        InferredType::Array(ele_type) => DataType::List(Box::new(Field::new(
-            "item",
-            generate_datatype(ele_type)?,
-            true,
-        ))),
-        InferredType::Any => DataType::Null,
-    })
-}
-
-fn generate_fields(spec: &HashMap<String, InferredType>) -> Result<Vec<Field>> {
-    spec.iter()
-        .map(|(k, types)| Ok(Field::new(k, generate_datatype(types)?, true)))
-        .collect()
-}
-
-/// Generate schema from JSON field names and inferred data types
-fn generate_schema(spec: HashMap<String, InferredType>) -> Result<Schema> {
-    Ok(Schema::new(generate_fields(&spec)?))
-}
-
-/// JSON file reader that produces a serde_json::Value iterator from a Read trait
-///
-/// # Example
-///
-/// ```
-/// use std::fs::File;
-/// use std::io::BufReader;
-/// use arrow::json::reader::ValueIter;
-///
-/// let mut reader =
-///     BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-/// let mut value_reader = ValueIter::new(&mut reader, None);
-/// for value in value_reader {
-///     println!("JSON value: {}", value.unwrap());
-/// }
-/// ```
-#[derive(Debug)]
-pub struct ValueIter<'a, R: Read> {
-    reader: &'a mut BufReader<R>,
-    max_read_records: Option<usize>,
-    record_count: usize,
-    // reuse line buffer to avoid allocation on each record
-    line_buf: String,
-}
-
-impl<'a, R: Read> ValueIter<'a, R> {
-    pub fn new(reader: &'a mut BufReader<R>, max_read_records: Option<usize>) -> Self {
-        Self {
-            reader,
-            max_read_records,
-            record_count: 0,
-            line_buf: String::new(),
-        }
-    }
-}
-
-impl<'a, R: Read> Iterator for ValueIter<'a, R> {
-    type Item = Result<Value>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if let Some(max) = self.max_read_records {
-            if self.record_count >= max {
-                return None;
-            }
-        }
-
-        loop {
-            self.line_buf.truncate(0);
-            match self.reader.read_line(&mut self.line_buf) {
-                Ok(0) => {
-                    // read_line returns 0 when stream reached EOF
-                    return None;
-                }
-                Err(e) => {
-                    return Some(Err(ArrowError::JsonError(format!(
-                        "Failed to read JSON record: {}",
-                        e
-                    ))));
-                }
-                _ => {
-                    let trimmed_s = self.line_buf.trim();
-                    if trimmed_s.is_empty() {
-                        // ignore empty lines
-                        continue;
-                    }
-
-                    self.record_count += 1;
-                    return Some(serde_json::from_str(trimmed_s).map_err(|e| {
-                        ArrowError::JsonError(format!("Not valid JSON: {}", e))
-                    }));
-                }
-            }
-        }
-    }
-}
-
-/// Infer the fields of a JSON file by reading the first n records of the file, with
-/// `max_read_records` controlling the maximum number of records to read.
-///
-/// If `max_read_records` is not set, the whole file is read to infer its field types.
-///
-/// Contrary to [`infer_json_schema`], this function will seek back to the start of the `reader`.
-/// That way, the `reader` can be used immediately afterwards to create a [`Reader`].
-///
-/// # Examples
-/// ```
-/// use std::fs::File;
-/// use std::io::BufReader;
-/// use arrow::json::reader::infer_json_schema_from_seekable;
-///
-/// let file = File::open("test/data/mixed_arrays.json").unwrap();
-/// // file's cursor's offset at 0
-/// let mut reader = BufReader::new(file);
-/// let inferred_schema = infer_json_schema_from_seekable(&mut reader, None).unwrap();
-/// // file's cursor's offset automatically set at 0
-/// ```
-pub fn infer_json_schema_from_seekable<R: Read + Seek>(
-    reader: &mut BufReader<R>,
-    max_read_records: Option<usize>,
-) -> Result<Schema> {
-    let schema = infer_json_schema(reader, max_read_records);
-    // return the reader seek back to the start
-    reader.seek(SeekFrom::Start(0))?;
-
-    schema
-}
-
-/// Infer the fields of a JSON file by reading the first n records of the buffer, with
-/// `max_read_records` controlling the maximum number of records to read.
-///
-/// If `max_read_records` is not set, the whole file is read to infer its field types.
-///
-/// This function will not seek back to the start of the `reader`. The user has to manage the
-/// original file's cursor. This function is useful when the `reader`'s cursor is not available
-/// (does not implement [`Seek`]), such is the case for compressed streams decoders.
-///
-/// # Examples
-/// ```
-/// use std::fs::File;
-/// use std::io::{BufReader, SeekFrom, Seek};
-/// use flate2::read::GzDecoder;
-/// use arrow::json::reader::infer_json_schema;
-///
-/// let mut file = File::open("test/data/mixed_arrays.json.gz").unwrap();
-///
-/// // file's cursor's offset at 0
-/// let mut reader = BufReader::new(GzDecoder::new(&file));
-/// let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
-/// // cursor's offset at end of file
-///
-/// // seek back to start so that the original file is usable again
-/// file.seek(SeekFrom::Start(0)).unwrap();
-/// ```
-pub fn infer_json_schema<R: Read>(
-    reader: &mut BufReader<R>,
-    max_read_records: Option<usize>,
-) -> Result<Schema> {
-    infer_json_schema_from_iterator(ValueIter::new(reader, max_read_records))
-}
-
-fn set_object_scalar_field_type(
-    field_types: &mut HashMap<String, InferredType>,
-    key: &str,
-    ftype: DataType,
-) -> Result<()> {
-    if !field_types.contains_key(key) {
-        field_types.insert(key.to_string(), InferredType::Scalar(HashSet::new()));
-    }
-
-    match field_types.get_mut(key).unwrap() {
-        InferredType::Scalar(hs) => {
-            hs.insert(ftype);
-            Ok(())
-        }
-        // in case of column contains both scalar type and scalar array type, we convert type of
-        // this column to scalar array.
-        scalar_array @ InferredType::Array(_) => {
-            let mut hs = HashSet::new();
-            hs.insert(ftype);
-            scalar_array.merge(InferredType::Scalar(hs))?;
-            Ok(())
-        }
-        t => Err(ArrowError::JsonError(format!(
-            "Expected scalar or scalar array JSON type, found: {:?}",
-            t,
-        ))),
-    }
-}
-
-fn infer_scalar_array_type(array: &[Value]) -> Result<InferredType> {
-    let mut hs = HashSet::new();
-
-    for v in array {
-        match v {
-            Value::Null => {}
-            Value::Number(n) => {
-                if n.is_i64() {
-                    hs.insert(DataType::Int64);
-                } else {
-                    hs.insert(DataType::Float64);
-                }
-            }
-            Value::Bool(_) => {
-                hs.insert(DataType::Boolean);
-            }
-            Value::String(_) => {
-                hs.insert(DataType::Utf8);
-            }
-            Value::Array(_) | Value::Object(_) => {
-                return Err(ArrowError::JsonError(format!(
-                    "Expected scalar value for scalar array, got: {:?}",
-                    v
-                )));
-            }
-        }
-    }
-
-    Ok(InferredType::Scalar(hs))
-}
-
-fn infer_nested_array_type(array: &[Value]) -> Result<InferredType> {
-    let mut inner_ele_type = InferredType::Any;
-
-    for v in array {
-        match v {
-            Value::Array(inner_array) => {
-                inner_ele_type.merge(infer_array_element_type(inner_array)?)?;
-            }
-            x => {
-                return Err(ArrowError::JsonError(format!(
-                    "Got non array element in nested array: {:?}",
-                    x
-                )));
-            }
-        }
-    }
-
-    Ok(InferredType::Array(Box::new(inner_ele_type)))
-}
-
-fn infer_struct_array_type(array: &[Value]) -> Result<InferredType> {
-    let mut field_types = HashMap::new();
-
-    for v in array {
-        match v {
-            Value::Object(map) => {
-                collect_field_types_from_object(&mut field_types, map)?;
-            }
-            _ => {
-                return Err(ArrowError::JsonError(format!(
-                    "Expected struct value for struct array, got: {:?}",
-                    v
-                )));
-            }
-        }
-    }
-
-    Ok(InferredType::Object(field_types))
-}
-
-fn infer_array_element_type(array: &[Value]) -> Result<InferredType> {
-    match array.iter().take(1).next() {
-        None => Ok(InferredType::Any), // empty array, return any type that can be updated later
-        Some(a) => match a {
-            Value::Array(_) => infer_nested_array_type(array),
-            Value::Object(_) => infer_struct_array_type(array),
-            _ => infer_scalar_array_type(array),
-        },
-    }
-}
-
-fn collect_field_types_from_object(
-    field_types: &mut HashMap<String, InferredType>,
-    map: &JsonMap<String, Value>,
-) -> Result<()> {
-    for (k, v) in map {
-        match v {
-            Value::Array(array) => {
-                let ele_type = infer_array_element_type(array)?;
-
-                if !field_types.contains_key(k) {
-                    match ele_type {
-                        InferredType::Scalar(_) => {
-                            field_types.insert(
-                                k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Scalar(
-                                    HashSet::new(),
-                                ))),
-                            );
-                        }
-                        InferredType::Object(_) => {
-                            field_types.insert(
-                                k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Object(
-                                    HashMap::new(),
-                                ))),
-                            );
-                        }
-                        InferredType::Any | InferredType::Array(_) => {
-                            // set inner type to any for nested array as well
-                            // so it can be updated properly from subsequent type merges
-                            field_types.insert(
-                                k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Any)),
-                            );
-                        }
-                    }
-                }
-
-                match field_types.get_mut(k).unwrap() {
-                    InferredType::Array(inner_type) => {
-                        inner_type.merge(ele_type)?;
-                    }
-                    // in case of column contains both scalar type and scalar array type, we
-                    // convert type of this column to scalar array.
-                    field_type @ InferredType::Scalar(_) => {
-                        field_type.merge(ele_type)?;
-                        *field_type = InferredType::Array(Box::new(field_type.clone()));
-                    }
-                    t => {
-                        return Err(ArrowError::JsonError(format!(
-                            "Expected array json type, found: {:?}",
-                            t,
-                        )));
-                    }
-                }
-            }
-            Value::Bool(_) => {
-                set_object_scalar_field_type(field_types, k, DataType::Boolean)?;
-            }
-            Value::Null => {
-                // do nothing, we treat json as nullable by default when
-                // inferring
-            }
-            Value::Number(n) => {
-                if n.is_f64() {
-                    set_object_scalar_field_type(field_types, k, DataType::Float64)?;
-                } else {
-                    // default to i64
-                    set_object_scalar_field_type(field_types, k, DataType::Int64)?;
-                }
-            }
-            Value::String(_) => {
-                set_object_scalar_field_type(field_types, k, DataType::Utf8)?;
-            }
-            Value::Object(inner_map) => {
-                if !field_types.contains_key(k) {
-                    field_types
-                        .insert(k.to_string(), InferredType::Object(HashMap::new()));
-                }
-                match field_types.get_mut(k).unwrap() {
-                    InferredType::Object(inner_field_types) => {
-                        collect_field_types_from_object(inner_field_types, inner_map)?;
-                    }
-                    t => {
-                        return Err(ArrowError::JsonError(format!(
-                            "Expected object json type, found: {:?}",
-                            t,
-                        )));
-                    }
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-/// Infer the fields of a JSON file by reading all items from the JSON Value Iterator.
-///
-/// The following type coercion logic is implemented:
-/// * `Int64` and `Float64` are converted to `Float64`
-/// * Lists and scalars are coerced to a list of a compatible scalar
-/// * All other cases are coerced to `Utf8` (String)
-///
-/// Note that the above coercion logic is different from what Spark has, where it would default to
-/// String type in case of List and Scalar values appeared in the same field.
-///
-/// The reason we diverge here is because we don't have utilities to deal with JSON data once it's
-/// interpreted as Strings. We should match Spark's behavior once we added more JSON parsing
-/// kernels in the future.
-pub fn infer_json_schema_from_iterator<I>(value_iter: I) -> Result<Schema>
-where
-    I: Iterator<Item = Result<Value>>,
-{
-    let mut field_types: HashMap<String, InferredType> = HashMap::new();
-
-    for record in value_iter {
-        match record? {
-            Value::Object(map) => {
-                collect_field_types_from_object(&mut field_types, &map)?;
-            }
-            value => {
-                return Err(ArrowError::JsonError(format!(
-                    "Expected JSON record to be an object, found {:?}",
-                    value
-                )));
-            }
-        };
-    }
-
-    generate_schema(field_types)
-}
-
-/// JSON values to Arrow record batch decoder. Decoder's next_batch method takes a JSON Value
-/// iterator as input and outputs Arrow record batch.
-///
-/// # Examples
-/// ```
-/// use arrow::json::reader::{Decoder, ValueIter, infer_json_schema};
-/// use std::fs::File;
-/// use std::io::{BufReader, Seek, SeekFrom};
-/// use std::sync::Arc;
-///
-/// let mut reader =
-///     BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-/// let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
-/// let batch_size = 1024;
-/// let decoder = Decoder::new(Arc::new(inferred_schema), batch_size, None);
-///
-/// // seek back to start so that the original file is usable again
-/// reader.seek(SeekFrom::Start(0)).unwrap();
-/// let mut value_reader = ValueIter::new(&mut reader, None);
-/// let batch = decoder.next_batch(&mut value_reader).unwrap().unwrap();
-/// assert_eq!(4, batch.num_rows());
-/// assert_eq!(4, batch.num_columns());
-/// ```
-#[derive(Debug)]
-pub struct Decoder {
-    /// Explicit schema for the JSON file
-    schema: SchemaRef,
-    /// Optional projection for which columns to load (case-sensitive names)
-    projection: Option<Vec<String>>,
-    /// Batch size (number of records to load each time)
-    batch_size: usize,
-}
-
-impl Decoder {
-    /// Create a new JSON decoder from any value that implements the `Iterator<Item=Result<Value>>`
-    /// trait.
-    pub fn new(
-        schema: SchemaRef,
-        batch_size: usize,
-        projection: Option<Vec<String>>,
-    ) -> Self {
-        Self {
-            schema,
-            projection,
-            batch_size,
-        }
-    }
-
-    /// Returns the schema of the reader, useful for getting the schema without reading
-    /// record batches
-    pub fn schema(&self) -> SchemaRef {
-        match &self.projection {
-            Some(projection) => {
-                let fields = self.schema.fields();
-                let projected_fields: Vec<Field> = fields
-                    .iter()
-                    .filter_map(|field| {
-                        if projection.contains(field.name()) {
-                            Some(field.clone())
-                        } else {
-                            None
-                        }
-                    })
-                    .collect();
-
-                Arc::new(Schema::new(projected_fields))
-            }
-            None => self.schema.clone(),
-        }
-    }
-
-    /// Read the next batch of records
-    pub fn next_batch<I>(&self, value_iter: &mut I) -> Result<Option<RecordBatch>>
-    where
-        I: Iterator<Item = Result<Value>>,
-    {
-        let mut rows: Vec<Value> = Vec::with_capacity(self.batch_size);
-
-        for value in value_iter.by_ref().take(self.batch_size) {
-            let v = value?;
-            match v {
-                Value::Object(_) => rows.push(v),
-                _ => {
-                    return Err(ArrowError::JsonError(format!(
-                        "Row needs to be of type object, got: {:?}",
-                        v
-                    )));
-                }
-            }
-        }
-        if rows.is_empty() {
-            // reached end of file
-            return Ok(None);
-        }
-
-        let rows = &rows[..];
-        let projection = self.projection.clone().unwrap_or_else(Vec::new);
-        let arrays = self.build_struct_array(rows, self.schema.fields(), &projection);
-
-        let projected_fields: Vec<Field> = if projection.is_empty() {
-            self.schema.fields().to_vec()
-        } else {
-            projection
-                .iter()
-                .map(|name| self.schema.column_with_name(name))
-                .filter_map(|c| c)
-                .map(|(_, field)| field.clone())
-                .collect()
-        };
-
-        let projected_schema = Arc::new(Schema::new(projected_fields));
-
-        arrays.and_then(|arr| RecordBatch::try_new(projected_schema, arr).map(Some))
-    }
-
-    fn build_wrapped_list_array(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-        key_type: &DataType,
-    ) -> Result<ArrayRef> {
-        match *key_type {
-            DataType::Int8 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int8),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int8Type>(&dtype, col_name, rows)
-            }
-            DataType::Int16 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int16),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int16Type>(&dtype, col_name, rows)
-            }
-            DataType::Int32 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int32),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int32Type>(&dtype, col_name, rows)
-            }
-            DataType::Int64 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int64),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int64Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt8 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt8),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt8Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt16 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt16),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt16Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt32 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt32),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt32Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt64 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt64),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt64Type>(&dtype, col_name, rows)
-            }
-            ref e => Err(ArrowError::JsonError(format!(
-                "Data type is currently not supported for dictionaries in list : {:?}",
-                e
-            ))),
-        }
-    }
-
-    #[inline(always)]
-    fn list_array_string_array_builder<DICT_TY>(
-        &self,
-        data_type: &DataType,
-        col_name: &str,
-        rows: &[Value],
-    ) -> Result<ArrayRef>
-    where
-        DICT_TY: ArrowPrimitiveType + ArrowDictionaryKeyType,
-    {
-        let mut builder: Box<dyn ArrayBuilder> = match data_type {
-            DataType::Utf8 => {
-                let values_builder = StringBuilder::new(rows.len() * 5);
-                Box::new(ListBuilder::new(values_builder))
-            }
-            DataType::Dictionary(_, _) => {
-                let values_builder =
-                    self.build_string_dictionary_builder::<DICT_TY>(rows.len() * 5)?;
-                Box::new(ListBuilder::new(values_builder))
-            }
-            e => {
-                return Err(ArrowError::JsonError(format!(
-                    "Nested list data builder type is not supported: {:?}",
-                    e
-                )))
-            }
-        };
-
-        for row in rows {
-            if let Some(value) = row.get(col_name) {
-                // value can be an array or a scalar
-                let vals: Vec<Option<String>> = if let Value::String(v) = value {
-                    vec![Some(v.to_string())]
-                } else if let Value::Array(n) = value {
-                    n.iter()
-                        .map(|v: &Value| {
-                            if v.is_string() {
-                                Some(v.as_str().unwrap().to_string())
-                            } else if v.is_array() || v.is_object() || v.is_null() {
-                                // implicitly drop nested values
-                                // TODO support deep-nesting
-                                None
-                            } else {
-                                Some(v.to_string())
-                            }
-                        })
-                        .collect()
-                } else if let Value::Null = value {
-                    vec![None]
-                } else if !value.is_object() {
-                    vec![Some(value.to_string())]
-                } else {
-                    return Err(ArrowError::JsonError(
-                        "Only scalars are currently supported in JSON arrays".to_string(),
-                    ));
-                };
-
-                // TODO: ARROW-10335: APIs of dictionary arrays and others are different. Unify
-                // them.
-                match data_type {
-                    DataType::Utf8 => {
-                        let builder = builder
-                            .as_any_mut()
-                            .downcast_mut::<ListBuilder<StringBuilder>>()
-                            .ok_or_else(||ArrowError::JsonError(
-                                "Cast failed for ListBuilder<StringBuilder> during nested data parsing".to_string(),
-                            ))?;
-                        for val in vals {
-                            if let Some(v) = val {
-                                builder.values().append_value(&v)?
-                            } else {
-                                builder.values().append_null()?
-                            };
-                        }
-
-                        // Append to the list
-                        builder.append(true)?;
-                    }
-                    DataType::Dictionary(_, _) => {
-                        let builder = builder.as_any_mut().downcast_mut::<ListBuilder<StringDictionaryBuilder<DICT_TY>>>().ok_or_else(||ArrowError::JsonError(
-                            "Cast failed for ListBuilder<StringDictionaryBuilder> during nested data parsing".to_string(),
-                        ))?;
-                        for val in vals {
-                            if let Some(v) = val {
-                                let _ = builder.values().append(&v)?;
-                            } else {
-                                builder.values().append_null()?
-                            };
-                        }
-
-                        // Append to the list
-                        builder.append(true)?;
-                    }
-                    e => {
-                        return Err(ArrowError::JsonError(format!(
-                            "Nested list data builder type is not supported: {:?}",
-                            e
-                        )))
-                    }
-                }
-            }
-        }
-
-        Ok(builder.finish() as ArrayRef)
-    }
-
-    #[inline(always)]
-    #[allow(clippy::unnecessary_wraps)]
-    fn build_string_dictionary_builder<T>(
-        &self,
-        row_len: usize,
-    ) -> Result<StringDictionaryBuilder<T>>
-    where
-        T: ArrowPrimitiveType + ArrowDictionaryKeyType,
-    {
-        let key_builder = PrimitiveBuilder::<T>::new(row_len);
-        let values_builder = StringBuilder::new(row_len * 5);
-        Ok(StringDictionaryBuilder::new(key_builder, values_builder))
-    }
-
-    #[inline(always)]
-    fn build_string_dictionary_array(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-        key_type: &DataType,
-        value_type: &DataType,
-    ) -> Result<ArrayRef> {
-        if let DataType::Utf8 = *value_type {
-            match *key_type {
-                DataType::Int8 => self.build_dictionary_array::<Int8Type>(rows, col_name),
-                DataType::Int16 => {
-                    self.build_dictionary_array::<Int16Type>(rows, col_name)
-                }
-                DataType::Int32 => {
-                    self.build_dictionary_array::<Int32Type>(rows, col_name)
-                }
-                DataType::Int64 => {
-                    self.build_dictionary_array::<Int64Type>(rows, col_name)
-                }
-                DataType::UInt8 => {
-                    self.build_dictionary_array::<UInt8Type>(rows, col_name)
-                }
-                DataType::UInt16 => {
-                    self.build_dictionary_array::<UInt16Type>(rows, col_name)
-                }
-                DataType::UInt32 => {
-                    self.build_dictionary_array::<UInt32Type>(rows, col_name)
-                }
-                DataType::UInt64 => {
-                    self.build_dictionary_array::<UInt64Type>(rows, col_name)
-                }
-                _ => Err(ArrowError::JsonError(
-                    "unsupported dictionary key type".to_string(),
-                )),
-            }
-        } else {
-            Err(ArrowError::JsonError(
-                "dictionary types other than UTF-8 not yet supported".to_string(),
-            ))
-        }
-    }
-
-    fn build_boolean_array(&self, rows: &[Value], col_name: &str) -> Result<ArrayRef> {
-        let mut builder = BooleanBuilder::new(rows.len());
-        for row in rows {
-            if let Some(value) = row.get(&col_name) {
-                if let Some(boolean) = value.as_bool() {
-                    builder.append_value(boolean)?
-                } else {
-                    builder.append_null()?;
-                }
-            } else {
-                builder.append_null()?;
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }
-
-    #[allow(clippy::unnecessary_wraps)]
-    fn build_primitive_array<T: ArrowPrimitiveType>(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-    ) -> Result<ArrayRef>
-    where
-        T: ArrowNumericType,
-        T::Native: num::NumCast,
-    {
-        Ok(Arc::new(
-            rows.iter()
-                .map(|row| {
-                    row.get(&col_name)
-                        .and_then(|value| value.as_f64())
-                        .and_then(num::cast::cast)
-                })
-                .collect::<PrimitiveArray<T>>(),
-        ))
-    }
-
-    /// Build a nested GenericListArray from a list of unnested `Value`s
-    fn build_nested_list_array<OffsetSize: OffsetSizeTrait>(
-        &self,
-        rows: &[Value],
-        list_field: &Field,
-    ) -> Result<ArrayRef> {
-        // build list offsets
-        let mut cur_offset = OffsetSize::zero();
-        let list_len = rows.len();
-        let num_list_bytes = bit_util::ceil(list_len, 8);
-        let mut offsets = Vec::with_capacity(list_len + 1);
-        let mut list_nulls = MutableBuffer::from_len_zeroed(num_list_bytes);
-        let list_nulls = list_nulls.as_slice_mut();
-        offsets.push(cur_offset);
-        rows.iter().enumerate().for_each(|(i, v)| {
-            if let Value::Array(a) = v {
-                cur_offset += OffsetSize::from_usize(a.len()).unwrap();
-                bit_util::set_bit(list_nulls, i);
-            } else if let Value::Null = v {
-                // value is null, not incremented
-            } else {
-                cur_offset += OffsetSize::one();
-            }
-            offsets.push(cur_offset);
-        });
-        let valid_len = cur_offset.to_usize().unwrap();
-        let array_data = match list_field.data_type() {
-            DataType::Null => NullArray::new(valid_len).data().clone(),
-            DataType::Boolean => {
-                let num_bytes = bit_util::ceil(valid_len, 8);
-                let mut bool_values = MutableBuffer::from_len_zeroed(num_bytes);
-                let mut bool_nulls =
-                    MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-                let mut curr_index = 0;
-                rows.iter().for_each(|v| {
-                    if let Value::Array(vs) = v {
-                        vs.iter().for_each(|value| {
-                            if let Value::Bool(child) = value {
-                                // if valid boolean, append value
-                                if *child {
-                                    bit_util::set_bit(
-                                        bool_values.as_slice_mut(),
-                                        curr_index,
-                                    );
-                                }
-                            } else {
-                                // null slot
-                                bit_util::unset_bit(
-                                    bool_nulls.as_slice_mut(),
-                                    curr_index,
-                                );
-                            }
-                            curr_index += 1;
-                        });
-                    }
-                });
-                ArrayData::builder(list_field.data_type().clone())
-                    .len(valid_len)
-                    .add_buffer(bool_values.into())
-                    .null_bit_buffer(bool_nulls.into())
-                    .build()
-            }
-            DataType::Int8 => self.read_primitive_list_values::<Int8Type>(rows),
-            DataType::Int16 => self.read_primitive_list_values::<Int16Type>(rows),
-            DataType::Int32 => self.read_primitive_list_values::<Int32Type>(rows),
-            DataType::Int64 => self.read_primitive_list_values::<Int64Type>(rows),
-            DataType::UInt8 => self.read_primitive_list_values::<UInt8Type>(rows),
-            DataType::UInt16 => self.read_primitive_list_values::<UInt16Type>(rows),
-            DataType::UInt32 => self.read_primitive_list_values::<UInt32Type>(rows),
-            DataType::UInt64 => self.read_primitive_list_values::<UInt64Type>(rows),
-            DataType::Float16 => {
-                return Err(ArrowError::JsonError("Float16 not supported".to_string()))
-            }
-            DataType::Float32 => self.read_primitive_list_values::<Float32Type>(rows),
-            DataType::Float64 => self.read_primitive_list_values::<Float64Type>(rows),
-            DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_) => {
-                return Err(ArrowError::JsonError(
-                    "Temporal types are not yet supported, see ARROW-4803".to_string(),
-                ))
-            }
-            DataType::Utf8 => {
-                StringArray::from_iter(flatten_json_string_values(rows).into_iter())
-                    .data()
-                    .clone()
-            }
-            DataType::LargeUtf8 => {
-                LargeStringArray::from_iter(flatten_json_string_values(rows).into_iter())
-                    .data()
-                    .clone()
-            }
-            DataType::List(field) => {
-                let child = self
-                    .build_nested_list_array::<i32>(&flatten_json_values(rows), field)?;
-                child.data().clone()
-            }
-            DataType::LargeList(field) => {
-                let child = self
-                    .build_nested_list_array::<i64>(&flatten_json_values(rows), field)?;
-                child.data().clone()
-            }
-            DataType::Struct(fields) => {
-                // extract list values, with non-lists converted to Value::Null
-                let array_item_count = rows
-                    .iter()
-                    .map(|row| match row {
-                        Value::Array(values) => values.len(),
-                        _ => 1,
-                    })
-                    .sum();
-                let num_bytes = bit_util::ceil(array_item_count, 8);
-                let mut null_buffer = MutableBuffer::from_len_zeroed(num_bytes);
-                let mut struct_index = 0;
-                let rows: Vec<Value> = rows
-                    .iter()
-                    .flat_map(|row| {
-                        if let Value::Array(values) = row {
-                            values.iter().for_each(|_| {
-                                bit_util::set_bit(
-                                    null_buffer.as_slice_mut(),
-                                    struct_index,
-                                );
-                                struct_index += 1;
-                            });
-                            values.clone()
-                        } else {
-                            struct_index += 1;
-                            vec![Value::Null]
-                        }
-                    })
-                    .collect();
-                let arrays =
-                    self.build_struct_array(rows.as_slice(), fields.as_slice(), &[])?;
-                let data_type = DataType::Struct(fields.clone());
-                let buf = null_buffer.into();
-                ArrayDataBuilder::new(data_type)
-                    .len(rows.len())
-                    .null_bit_buffer(buf)
-                    .child_data(arrays.into_iter().map(|a| a.data().clone()).collect())
-                    .build()
-            }
-            datatype => {
-                return Err(ArrowError::JsonError(format!(
-                    "Nested list of {:?} not supported",
-                    datatype
-                )));
-            }
-        };
-        // build list
-        let list_data = ArrayData::builder(DataType::List(Box::new(list_field.clone())))
-            .len(list_len)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(array_data)
-            .null_bit_buffer(list_nulls.into())
-            .build();
-        Ok(Arc::new(GenericListArray::<OffsetSize>::from(list_data)))
-    }
-
-    /// Builds the child values of a `StructArray`, falling short of constructing the StructArray.
-    /// The function does not construct the StructArray as some callers would want the child arrays.
-    ///
-    /// *Note*: The function is recursive, and will read nested structs.
-    ///
-    /// If `projection` is not empty, then all values are returned. The first level of projection
-    /// occurs at the `RecordBatch` level. No further projection currently occurs, but would be
-    /// useful if plucking values from a struct, e.g. getting `a.b.c.e` from `a.b.c.{d, e}`.
-    fn build_struct_array(
-        &self,
-        rows: &[Value],
-        struct_fields: &[Field],
-        projection: &[String],
-    ) -> Result<Vec<ArrayRef>> {
-        let arrays: Result<Vec<ArrayRef>> = struct_fields
-            .iter()
-            .filter(|field| projection.is_empty() || projection.contains(field.name()))
-            .map(|field| {
-                match field.data_type() {
-                    DataType::Null => {
-                        Ok(Arc::new(NullArray::new(rows.len())) as ArrayRef)
-                    }
-                    DataType::Boolean => self.build_boolean_array(rows, field.name()),
-                    DataType::Float64 => {
-                        self.build_primitive_array::<Float64Type>(rows, field.name())
-                    }
-                    DataType::Float32 => {
-                        self.build_primitive_array::<Float32Type>(rows, field.name())
-                    }
-                    DataType::Int64 => {
-                        self.build_primitive_array::<Int64Type>(rows, field.name())
-                    }
-                    DataType::Int32 => {
-                        self.build_primitive_array::<Int32Type>(rows, field.name())
-                    }
-                    DataType::Int16 => {
-                        self.build_primitive_array::<Int16Type>(rows, field.name())
-                    }
-                    DataType::Int8 => {
-                        self.build_primitive_array::<Int8Type>(rows, field.name())
-                    }
-                    DataType::UInt64 => {
-                        self.build_primitive_array::<UInt64Type>(rows, field.name())
-                    }
-                    DataType::UInt32 => {
-                        self.build_primitive_array::<UInt32Type>(rows, field.name())
-                    }
-                    DataType::UInt16 => {
-                        self.build_primitive_array::<UInt16Type>(rows, field.name())
-                    }
-                    DataType::UInt8 => {
-                        self.build_primitive_array::<UInt8Type>(rows, field.name())
-                    }
-                    // TODO: this is incomplete
-                    DataType::Timestamp(unit, _) => match unit {
-                        TimeUnit::Second => self
-                            .build_primitive_array::<TimestampSecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Microsecond => self
-                            .build_primitive_array::<TimestampMicrosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Millisecond => self
-                            .build_primitive_array::<TimestampMillisecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Nanosecond => self
-                            .build_primitive_array::<TimestampNanosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                    },
-                    DataType::Date64 => {
-                        self.build_primitive_array::<Date64Type>(rows, field.name())
-                    }
-                    DataType::Date32 => {
-                        self.build_primitive_array::<Date32Type>(rows, field.name())
-                    }
-                    DataType::Time64(unit) => match unit {
-                        TimeUnit::Microsecond => self
-                            .build_primitive_array::<Time64MicrosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Nanosecond => self
-                            .build_primitive_array::<Time64NanosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        t => Err(ArrowError::JsonError(format!(
-                            "TimeUnit {:?} not supported with Time64",
-                            t
-                        ))),
-                    },
-                    DataType::Time32(unit) => match unit {
-                        TimeUnit::Second => self
-                            .build_primitive_array::<Time32SecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Millisecond => self
-                            .build_primitive_array::<Time32MillisecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        t => Err(ArrowError::JsonError(format!(
-                            "TimeUnit {:?} not supported with Time32",
-                            t
-                        ))),
-                    },
-                    DataType::Utf8 => Ok(Arc::new(
-                        rows.iter()
-                            .map(|row| {
-                                let maybe_value = row.get(field.name());
-                                maybe_value.and_then(|value| value.as_str())
-                            })
-                            .collect::<StringArray>(),
-                    ) as ArrayRef),
-                    DataType::List(ref list_field) => {
-                        match list_field.data_type() {
-                            DataType::Dictionary(ref key_ty, _) => {
-                                self.build_wrapped_list_array(rows, field.name(), key_ty)
-                            }
-                            _ => {
-                                // extract rows by name
-                                let extracted_rows = rows
-                                    .iter()
-                                    .map(|row| {
-                                        row.get(field.name())
-                                            .cloned()
-                                            .unwrap_or(Value::Null)
-                                    })
-                                    .collect::<Vec<Value>>();
-                                self.build_nested_list_array::<i32>(
-                                    extracted_rows.as_slice(),
-                                    list_field,
-                                )
-                            }
-                        }
-                    }
-                    DataType::Dictionary(ref key_ty, ref val_ty) => self
-                        .build_string_dictionary_array(
-                            rows,
-                            field.name(),
-                            key_ty,
-                            val_ty,
-                        ),
-                    DataType::Struct(fields) => {
-                        let len = rows.len();
-                        let num_bytes = bit_util::ceil(len, 8);
-                        let mut null_buffer = MutableBuffer::from_len_zeroed(num_bytes);
-                        let struct_rows = rows
-                            .iter()
-                            .enumerate()
-                            .map(|(i, row)| {
-                                (
-                                    i,
-                                    row.as_object()
-                                        .map(|v| v.get(field.name()))
-                                        .flatten(),
-                                )
-                            })
-                            .map(|(i, v)| match v {
-                                // we want the field as an object, if it's not, we treat as null
-                                Some(Value::Object(value)) => {
-                                    bit_util::set_bit(null_buffer.as_slice_mut(), i);
-                                    Value::Object(value.clone())
-                                }
-                                _ => Value::Object(Default::default()),
-                            })
-                            .collect::<Vec<Value>>();
-                        let arrays =
-                            self.build_struct_array(&struct_rows, fields, &[])?;
-                        // construct a struct array's data in order to set null buffer
-                        let data_type = DataType::Struct(fields.clone());
-                        let data = ArrayDataBuilder::new(data_type)
-                            .len(len)
-                            .null_bit_buffer(null_buffer.into())
-                            .child_data(
-                                arrays.into_iter().map(|a| a.data().clone()).collect(),
-                            )
-                            .build();
-                        Ok(make_array(data))
-                    }
-                    _ => Err(ArrowError::JsonError(format!(
-                        "{:?} type is not supported",
-                        field.data_type()
-                    ))),
-                }
-            })
-            .collect();
-        arrays
-    }
-
-    #[inline(always)]
-    fn build_dictionary_array<T>(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-    ) -> Result<ArrayRef>
-    where
-        T::Native: num::NumCast,
-        T: ArrowPrimitiveType + ArrowDictionaryKeyType,
-    {
-        let mut builder: StringDictionaryBuilder<T> =
-            self.build_string_dictionary_builder(rows.len())?;
-        for row in rows {
-            if let Some(value) = row.get(&col_name) {
-                if let Some(str_v) = value.as_str() {
-                    builder.append(str_v).map(drop)?
-                } else {
-                    builder.append_null()?
-                }
-            } else {
-                builder.append_null()?
-            }
-        }
-        Ok(Arc::new(builder.finish()) as ArrayRef)
-    }
-
-    /// Read the primitive list's values into ArrayData
-    fn read_primitive_list_values<T>(&self, rows: &[Value]) -> ArrayData
-    where
-        T: ArrowPrimitiveType + ArrowNumericType,
-        T::Native: num::NumCast,
-    {
-        let values = rows
-            .iter()
-            .flat_map(|row| {
-                // read values from list
-                if let Value::Array(values) = row {
-                    values
-                        .iter()
-                        .map(|value| {
-                            let v: Option<T::Native> =
-                                value.as_f64().and_then(num::cast::cast);
-                            v
-                        })
-                        .collect::<Vec<Option<T::Native>>>()
-                } else if let Value::Number(value) = row {
-                    // handle the scalar number case
-                    let v: Option<T::Native> = value.as_f64().and_then(num::cast::cast);
-                    v.map(|v| vec![Some(v)]).unwrap_or_default()
-                } else {
-                    vec![]
-                }
-            })
-            .collect::<Vec<Option<T::Native>>>();
-        let array = PrimitiveArray::<T>::from_iter(values.iter());
-        array.data().clone()
-    }
-}
-
-/// Reads a JSON value as a string, regardless of its type.
-/// This is useful if the expected datatype is a string, in which case we preserve
-/// all the values regardless of they type.
-///
-/// Applying `value.to_string()` unfortunately results in an escaped string, which
-/// is not what we want.
-#[inline(always)]
-fn json_value_as_string(value: &Value) -> Option<String> {
-    match value {
-        Value::Null => None,
-        Value::String(string) => Some(string.clone()),
-        _ => Some(value.to_string()),
-    }
-}
-
-/// Flattens a list of JSON values, by flattening lists, and treating all other values as
-/// single-value lists.
-/// This is used to read into nested lists (list of list, list of struct) and non-dictionary lists.
-#[inline]
-fn flatten_json_values(values: &[Value]) -> Vec<Value> {
-    values
-        .iter()
-        .flat_map(|row| {
-            if let Value::Array(values) = row {
-                values.clone()
-            } else if let Value::Null = row {
-                vec![Value::Null]
-            } else {
-                // we interpret a scalar as a single-value list to minimise data loss
-                vec![row.clone()]
-            }
-        })
-        .collect()
-}
-
-/// Flattens a list into string values, dropping Value::Null in the process.
-/// This is useful for interpreting any JSON array as string, dropping nulls.
-/// See `json_value_as_string`.
-#[inline]
-fn flatten_json_string_values(values: &[Value]) -> Vec<Option<String>> {
-    values
-        .iter()
-        .flat_map(|row| {
-            if let Value::Array(values) = row {
-                values
-                    .iter()
-                    .map(json_value_as_string)
-                    .collect::<Vec<Option<_>>>()
-            } else if let Value::Null = row {
-                vec![]
-            } else {
-                vec![json_value_as_string(row)]
-            }
-        })
-        .collect::<Vec<Option<_>>>()
-}
-/// JSON file reader
-#[derive(Debug)]
-pub struct Reader<R: Read> {
-    reader: BufReader<R>,
-    /// JSON value decoder
-    decoder: Decoder,
-}
-
-impl<R: Read> Reader<R> {
-    /// Create a new JSON Reader from any value that implements the `Read` trait.
-    ///
-    /// If reading a `File`, you can customise the Reader, such as to enable schema
-    /// inference, use `ReaderBuilder`.
-    pub fn new(
-        reader: R,
-        schema: SchemaRef,
-        batch_size: usize,
-        projection: Option<Vec<String>>,
-    ) -> Self {
-        Self::from_buf_reader(BufReader::new(reader), schema, batch_size, projection)
-    }
-
-    /// Create a new JSON Reader from a `BufReader<R: Read>`
-    ///
-    /// To customize the schema, such as to enable schema inference, use `ReaderBuilder`
-    pub fn from_buf_reader(
-        reader: BufReader<R>,
-        schema: SchemaRef,
-        batch_size: usize,
-        projection: Option<Vec<String>>,
-    ) -> Self {
-        Self {
-            reader,
-            decoder: Decoder::new(schema, batch_size, projection),
-        }
-    }
-
-    /// Returns the schema of the reader, useful for getting the schema without reading
-    /// record batches
-    pub fn schema(&self) -> SchemaRef {
-        self.decoder.schema()
-    }
-
-    /// Read the next batch of records
-    #[allow(clippy::should_implement_trait)]
-    pub fn next(&mut self) -> Result<Option<RecordBatch>> {
-        self.decoder
-            .next_batch(&mut ValueIter::new(&mut self.reader, None))
-    }
-}
-
-/// JSON file reader builder
-#[derive(Debug)]
-pub struct ReaderBuilder {
-    /// Optional schema for the JSON file
-    ///
-    /// If the schema is not supplied, the reader will try to infer the schema
-    /// based on the JSON structure.
-    schema: Option<SchemaRef>,
-    /// Optional maximum number of records to read during schema inference
-    ///
-    /// If a number is not provided, all the records are read.
-    max_records: Option<usize>,
-    /// Batch size (number of records to load each time)
-    ///
-    /// The default batch size when using the `ReaderBuilder` is 1024 records
-    batch_size: usize,
-    /// Optional projection for which columns to load (zero-based column indices)
-    projection: Option<Vec<String>>,
-}
-
-impl Default for ReaderBuilder {
-    fn default() -> Self {
-        Self {
-            schema: None,
-            max_records: None,
-            batch_size: 1024,
-            projection: None,
-        }
-    }
-}
-
-impl ReaderBuilder {
-    /// Create a new builder for configuring JSON parsing options.
-    ///
-    /// To convert a builder into a reader, call `Reader::from_builder`
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// extern crate arrow;
-    ///
-    /// use arrow::json;
-    /// use std::fs::File;
-    ///
-    /// fn example() -> json::Reader<File> {
-    ///     let file = File::open("test/data/basic.json").unwrap();
-    ///
-    ///     // create a builder, inferring the schema with the first 100 records
-    ///     let builder = json::ReaderBuilder::new().infer_schema(Some(100));
-    ///
-    ///     let reader = builder.build::<File>(file).unwrap();
-    ///
-    ///     reader
-    /// }
-    /// ```
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set the JSON file's schema
-    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
-        self.schema = Some(schema);
-        self
-    }
-
-    /// Set the JSON reader to infer the schema of the file
-    pub fn infer_schema(mut self, max_records: Option<usize>) -> Self {
-        // remove any schema that is set
-        self.schema = None;
-        self.max_records = max_records;
-        self
-    }
-
-    /// Set the batch size (number of records to load at one time)
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    /// Set the reader's column projection
-    pub fn with_projection(mut self, projection: Vec<String>) -> Self {
-        self.projection = Some(projection);
-        self
-    }
-
-    /// Create a new `Reader` from the `ReaderBuilder`
-    pub fn build<R>(self, source: R) -> Result<Reader<R>>
-    where
-        R: Read + Seek,
-    {
-        let mut buf_reader = BufReader::new(source);
-
-        // check if schema should be inferred
-        let schema = match self.schema {
-            Some(schema) => schema,
-            None => Arc::new(infer_json_schema_from_seekable(
-                &mut buf_reader,
-                self.max_records,
-            )?),
-        };
-
-        Ok(Reader::from_buf_reader(
-            buf_reader,
-            schema,
-            self.batch_size,
-            self.projection,
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        buffer::Buffer,
-        datatypes::DataType::{Dictionary, List},
-    };
-
-    use super::*;
-    use flate2::read::GzDecoder;
-    use std::fs::File;
-    use std::io::Cursor;
-
-    #[test]
-    fn test_json_basic() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(0, a.0);
-        assert_eq!(&DataType::Int64, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(1, b.0);
-        assert_eq!(&DataType::Float64, b.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(2, c.0);
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(3, d.0);
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .unwrap();
-        assert_eq!(1, aa.value(0));
-        assert_eq!(-10, aa.value(1));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(2.0 - bb.value(0) < f64::EPSILON);
-        assert!(-3.5 - bb.value(1) < f64::EPSILON);
-        let cc = batch
-            .column(c.0)
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .unwrap();
-        assert_eq!(false, cc.value(0));
-        assert_eq!(true, cc.value(10));
-        let dd = batch
-            .column(d.0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        assert_eq!("4", dd.value(0));
-        assert_eq!("text", dd.value(8));
-    }
-
-    #[test]
-    fn test_json_basic_with_nulls() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int64, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(&DataType::Float64, b.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(11));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert_eq!(true, bb.is_valid(0));
-        assert_eq!(false, bb.is_valid(2));
-        assert_eq!(false, bb.is_valid(11));
-        let cc = batch
-            .column(c.0)
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .unwrap();
-        assert_eq!(true, cc.is_valid(0));
-        assert_eq!(false, cc.is_valid(4));
-        assert_eq!(false, cc.is_valid(11));
-        let dd = batch
-            .column(d.0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        assert_eq!(false, dd.is_valid(0));
-        assert_eq!(true, dd.is_valid(1));
-        assert_eq!(false, dd.is_valid(4));
-        assert_eq!(false, dd.is_valid(11));
-    }
-
-    #[test]
-    fn test_json_basic_schema() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Float32, false),
-            Field::new("c", DataType::Boolean, false),
-            Field::new("d", DataType::Utf8, false),
-        ]);
-
-        let mut reader: Reader<File> = Reader::new(
-            File::open("test/data/basic.json").unwrap(),
-            Arc::new(schema.clone()),
-            1024,
-            None,
-        );
-        let reader_schema = reader.schema();
-        assert_eq!(reader_schema, Arc::new(schema));
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = batch.schema();
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int32, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(&DataType::Float32, b.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        assert_eq!(1, aa.value(0));
-        // test that a 64bit value is returned as null due to overflowing
-        assert_eq!(false, aa.is_valid(11));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<Float32Array>()
-            .unwrap();
-        assert!(2.0 - bb.value(0) < f32::EPSILON);
-        assert!(-3.5 - bb.value(1) < f32::EPSILON);
-    }
-
-    #[test]
-    fn test_json_basic_schema_projection() {
-        // We test implicit and explicit projection:
-        // Implicit: omitting fields from a schema
-        // Explicit: supplying a vec of fields to take
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Float32, false),
-            Field::new("c", DataType::Boolean, false),
-        ]);
-
-        let mut reader: Reader<File> = Reader::new(
-            File::open("test/data/basic.json").unwrap(),
-            Arc::new(schema),
-            1024,
-            Some(vec!["a".to_string(), "c".to_string()]),
-        );
-        let reader_schema = reader.schema();
-        let expected_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("c", DataType::Boolean, false),
-        ]));
-        assert_eq!(reader_schema, expected_schema);
-
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(2, batch.num_columns());
-        assert_eq!(2, batch.schema().fields().len());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = batch.schema();
-        assert_eq!(reader_schema, schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(0, a.0);
-        assert_eq!(&DataType::Int32, a.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(1, c.0);
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-    }
-
-    #[test]
-    fn test_json_arrays() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/arrays.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = batch.schema();
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int64, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(
-            &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-            b.1.data_type()
-        );
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(
-            &DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-            c.1.data_type()
-        );
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .unwrap();
-        assert_eq!(1, aa.value(0));
-        assert_eq!(-10, aa.value(1));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let bb = bb.values();
-        let bb = bb.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert_eq!(9, bb.len());
-        assert!(2.0 - bb.value(0) < f64::EPSILON);
-        assert!(-6.1 - bb.value(5) < f64::EPSILON);
-        assert_eq!(false, bb.is_valid(7));
-
-        let cc = batch
-            .column(c.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let cc = cc.values();
-        let cc = cc.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(6, cc.len());
-        assert_eq!(false, cc.value(0));
-        assert_eq!(false, cc.value(4));
-        assert_eq!(false, cc.is_valid(5));
-    }
-
-    #[test]
-    fn test_invalid_json_infer_schema() {
-        let re = infer_json_schema_from_seekable(
-            &mut BufReader::new(
-                File::open("test/data/uk_cities_with_headers.csv").unwrap(),
-            ),
-            None,
-        );
-        assert_eq!(
-            re.err().unwrap().to_string(),
-            "Json error: Not valid JSON: expected value at line 1 column 1",
-        );
-    }
-
-    #[test]
-    fn test_invalid_json_read_record() {
-        let schema = Arc::new(Schema::new(vec![Field::new(
-            "a",
-            DataType::Struct(vec![Field::new("a", DataType::Utf8, true)]),
-            true,
-        )]));
-        let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/uk_cities_with_headers.csv").unwrap())
-            .unwrap();
-        assert_eq!(
-            reader.next().err().unwrap().to_string(),
-            "Json error: Not valid JSON: expected value at line 1 column 1",
-        );
-    }
-
-    #[test]
-    fn test_coersion_scalar_and_list() {
-        use crate::datatypes::DataType::*;
-
-        assert_eq!(
-            List(Box::new(Field::new("item", Float64, true))),
-            coerce_data_type(vec![
-                &Float64,
-                &List(Box::new(Field::new("item", Float64, true)))
-            ])
-        );
-        assert_eq!(
-            List(Box::new(Field::new("item", Float64, true))),
-            coerce_data_type(vec![
-                &Float64,
-                &List(Box::new(Field::new("item", Int64, true)))
-            ])
-        );
-        assert_eq!(
-            List(Box::new(Field::new("item", Int64, true))),
-            coerce_data_type(vec![
-                &Int64,
-                &List(Box::new(Field::new("item", Int64, true)))
-            ])
-        );
-        // boolean and number are incompatible, return utf8
-        assert_eq!(
-            List(Box::new(Field::new("item", Utf8, true))),
-            coerce_data_type(vec![
-                &Boolean,
-                &List(Box::new(Field::new("item", Float64, true)))
-            ])
-        );
-    }
-
-    #[test]
-    fn test_mixed_json_arrays() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/mixed_arrays.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        let mut file = File::open("test/data/mixed_arrays.json.gz").unwrap();
-        let mut reader = BufReader::new(GzDecoder::new(&file));
-        let schema = infer_json_schema(&mut reader, None).unwrap();
-        file.seek(SeekFrom::Start(0)).unwrap();
-
-        let reader = BufReader::new(GzDecoder::new(&file));
-        let mut reader = Reader::from_buf_reader(reader, Arc::new(schema), 64, None);
-        let batch_gz = reader.next().unwrap().unwrap();
-
-        for batch in vec![batch, batch_gz] {
-            assert_eq!(4, batch.num_columns());
-            assert_eq!(4, batch.num_rows());
-
-            let schema = batch.schema();
-
-            let a = schema.column_with_name("a").unwrap();
-            assert_eq!(&DataType::Int64, a.1.data_type());
-            let b = schema.column_with_name("b").unwrap();
-            assert_eq!(
-                &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-                b.1.data_type()
-            );
-            let c = schema.column_with_name("c").unwrap();
-            assert_eq!(
-                &DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-                c.1.data_type()
-            );
-            let d = schema.column_with_name("d").unwrap();
-            assert_eq!(
-                &DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                d.1.data_type()
-            );
-
-            let bb = batch
-                .column(b.0)
-                .as_any()
-                .downcast_ref::<ListArray>()
-                .unwrap();
-            let bb = bb.values();
-            let bb = bb.as_any().downcast_ref::<Float64Array>().unwrap();
-            assert_eq!(10, bb.len());
-            assert!(4.0 - bb.value(9) < f64::EPSILON);
-
-            let cc = batch
-                .column(c.0)
-                .as_any()
-                .downcast_ref::<ListArray>()
-                .unwrap();
-            // test that the list offsets are correct
-            assert_eq!(
-                cc.data().buffers()[0],
-                Buffer::from_slice_ref(&[0i32, 2, 2, 4, 5])
-            );
-            let cc = cc.values();
-            let cc = cc.as_any().downcast_ref::<BooleanArray>().unwrap();
-            let cc_expected = BooleanArray::from(vec![
-                Some(false),
-                Some(true),
-                Some(false),
-                None,
-                Some(false),
-            ]);
-            assert_eq!(cc.data_ref(), cc_expected.data_ref());
-
-            let dd: &ListArray = batch
-                .column(d.0)
-                .as_any()
-                .downcast_ref::<ListArray>()
-                .unwrap();
-            // test that the list offsets are correct
-            assert_eq!(
-                dd.data().buffers()[0],
-                Buffer::from_slice_ref(&[0i32, 1, 1, 2, 6])
-            );
-            let dd = dd.values();
-            let dd = dd.as_any().downcast_ref::<StringArray>().unwrap();
-            // values are 6 because a `d: null` is treated as a null slot
-            // and a list's null slot can be omitted from the child (i.e. same offset)
-            assert_eq!(6, dd.len());
-            assert_eq!("text", dd.value(1));
-            assert_eq!("1", dd.value(2));
-            assert_eq!("false", dd.value(3));
-            assert_eq!("array", dd.value(4));
-            assert_eq!("2.4", dd.value(5));
-        }
-    }
-
-    #[test]
-    fn test_nested_struct_json_arrays() {
-        let c_field = Field::new(
-            "c",
-            DataType::Struct(vec![Field::new("d", DataType::Utf8, true)]),
-            true,
-        );
-        let a_field = Field::new(
-            "a",
-            DataType::Struct(vec![
-                Field::new("b", DataType::Boolean, true),
-                c_field.clone(),
-            ]),
-            true,
-        );
-        let schema = Arc::new(Schema::new(vec![a_field.clone()]));
-        let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/nested_structs.json").unwrap())
-            .unwrap();
-
-        // build expected output
-        let d = StringArray::from(vec![Some("text"), None, Some("text"), None]);
-        let c = ArrayDataBuilder::new(c_field.data_type().clone())
-            .len(4)
-            .add_child_data(d.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00000101]))
-            .build();
-        let b = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]);
-        let a = ArrayDataBuilder::new(a_field.data_type().clone())
-            .len(4)
-            .add_child_data(b.data().clone())
-            .add_child_data(c)
-            .null_bit_buffer(Buffer::from(vec![0b00000111]))
-            .build();
-        let expected = make_array(a);
-
-        // compare `a` with result from json reader
-        let batch = reader.next().unwrap().unwrap();
-        let read = batch.column(0);
-        assert!(
-            expected.data_ref() == read.data_ref(),
-            "{:?} != {:?}",
-            expected.data(),
-            read.data(),
-        );
-    }
-
-    #[test]
-    fn test_nested_list_json_arrays() {
-        let c_field = Field::new(
-            "c",
-            DataType::Struct(vec![Field::new("d", DataType::Utf8, true)]),
-            true,
-        );
-        let a_struct_field = Field::new(
-            "a",
-            DataType::Struct(vec![
-                Field::new("b", DataType::Boolean, true),
-                c_field.clone(),
-            ]),
-            true,
-        );
-        let a_field =
-            Field::new("a", DataType::List(Box::new(a_struct_field.clone())), true);
-        let schema = Arc::new(Schema::new(vec![a_field.clone()]));
-        let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-        let json_content = r#"
-        {"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]}
-        {"a": [{"b": false, "c": null}]}
-        {"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]}
-        {"a": null}
-        {"a": []}
-        "#;
-        let mut reader = builder.build(Cursor::new(json_content)).unwrap();
-
-        // build expected output
-        let d = StringArray::from(vec![
-            Some("a_text"),
-            Some("b_text"),
-            None,
-            Some("c_text"),
-            Some("d_text"),
-            None,
-            None,
-        ]);
-        let c = ArrayDataBuilder::new(c_field.data_type().clone())
-            .len(7)
-            .add_child_data(d.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00111011]))
-            .build();
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            Some(false),
-            Some(true),
-            None,
-            Some(true),
-            None,
-        ]);
-        let a = ArrayDataBuilder::new(a_struct_field.data_type().clone())
-            .len(7)
-            .add_child_data(b.data().clone())
-            .add_child_data(c.clone())
-            .null_bit_buffer(Buffer::from(vec![0b00111111]))
-            .build();
-        let a_list = ArrayDataBuilder::new(a_field.data_type().clone())
-            .len(5)
-            .add_buffer(Buffer::from_slice_ref(&[0i32, 2, 3, 6, 6, 6]))
-            .add_child_data(a)
-            .null_bit_buffer(Buffer::from(vec![0b00010111]))
-            .build();
-        let expected = make_array(a_list);
-
-        // compare `a` with result from json reader
-        let batch = reader.next().unwrap().unwrap();
-        let read = batch.column(0);
-        assert_eq!(read.len(), 5);
-        // compare the arrays the long way around, to better detect differences
-        let read: &ListArray = read.as_any().downcast_ref::<ListArray>().unwrap();
-        let expected = expected.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(
-            read.data().buffers()[0],
-            Buffer::from_slice_ref(&[0i32, 2, 3, 6, 6, 6])
-        );
-        // compare list null buffers
-        assert_eq!(read.data().null_buffer(), expected.data().null_buffer());
-        // build struct from list
-        let struct_values = read.values();
-        let struct_array: &StructArray = struct_values
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap();
-        let expected_struct_values = expected.values();
-        let expected_struct_array = expected_struct_values
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap();
-
-        assert_eq!(7, struct_array.len());
-        assert_eq!(1, struct_array.null_count());
-        assert_eq!(7, expected_struct_array.len());
-        assert_eq!(1, expected_struct_array.null_count());
-        // test struct's nulls
-        assert_eq!(
-            struct_array.data().null_buffer(),
-            expected_struct_array.data().null_buffer()
-        );
-        // test struct's fields
-        let read_b = struct_array.column(0);
-        assert_eq!(b.data_ref(), read_b.data_ref());
-        let read_c = struct_array.column(1);
-        assert_eq!(&c, read_c.data_ref());
-        let read_c: &StructArray = read_c.as_any().downcast_ref::<StructArray>().unwrap();
-        let read_d = read_c.column(0);
-        assert_eq!(d.data_ref(), read_d.data_ref());
-
-        assert_eq!(read.data_ref(), expected.data_ref());
-    }
-
-    #[test]
-    fn test_dictionary_from_json_basic_with_nulls() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-
-        let dd = batch
-            .column(d.0)
-            .as_any()
-            .downcast_ref::<DictionaryArray<Int16Type>>()
-            .unwrap();
-        assert_eq!(false, dd.is_valid(0));
-        assert_eq!(true, dd.is_valid(1));
-        assert_eq!(true, dd.is_valid(2));
-        assert_eq!(false, dd.is_valid(11));
-
-        assert_eq!(
-            dd.keys(),
-            &Int16Array::from(vec![
-                None,
-                Some(0),
-                Some(1),
-                Some(0),
-                None,
-                None,
-                Some(0),
-                None,
-                Some(1),
-                Some(0),
-                Some(0),
-                None
-            ])
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_int8() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_int32() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_int64() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_skip_empty_lines() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let json_content = "
-        {\"a\": 1}
-
-        {\"a\": 2}
-
-        {\"a\": 3}";
-        let mut reader = builder.build(Cursor::new(json_content)).unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = reader.schema();
-        let c = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int64, c.1.data_type());
-    }
-
-    #[test]
-    fn test_row_type_validation() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let json_content = "
-        [1, \"hello\"]
-        \"world\"";
-        let re = builder.build(Cursor::new(json_content));
-        assert_eq!(
-            re.err().unwrap().to_string(),
-            r#"Json error: Expected JSON record to be an object, found Array([Number(1), String("hello")])"#,
-        );
-    }
-
-    #[test]
-    fn test_list_of_string_dictionary_from_json() {
-        let schema = Schema::new(vec![Field::new(
-            "events",
-            List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true,
-            ))),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/list_string_dict_nested.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let events = schema.column_with_name("events").unwrap();
-        assert_eq!(
-            &List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true
-            ))),
-            events.1.data_type()
-        );
-
-        let evs_list = batch
-            .column(events.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let evs_list = evs_list.values();
-        let evs_list = evs_list
-            .as_any()
-            .downcast_ref::<DictionaryArray<UInt64Type>>()
-            .unwrap();
-        assert_eq!(6, evs_list.len());
-        assert_eq!(true, evs_list.is_valid(1));
-        assert_eq!(DataType::Utf8, evs_list.value_type());
-
-        // dict from the events list
-        let dict_el = evs_list.values();
-        let dict_el = dict_el.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(3, dict_el.len());
-        assert_eq!("Elect Leader", dict_el.value(0));
-        assert_eq!("Do Ballot", dict_el.value(1));
-        assert_eq!("Send Data", dict_el.value(2));
-    }
-
-    #[test]
-    fn test_list_of_string_dictionary_from_json_with_nulls() {
-        let schema = Schema::new(vec![Field::new(
-            "events",
-            List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true,
-            ))),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(
-                File::open("test/data/list_string_dict_nested_nulls.json").unwrap(),
-            )
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let events = schema.column_with_name("events").unwrap();
-        assert_eq!(
-            &List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true
-            ))),
-            events.1.data_type()
-        );
-
-        let evs_list = batch
-            .column(events.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let evs_list = evs_list.values();
-        let evs_list = evs_list
-            .as_any()
-            .downcast_ref::<DictionaryArray<UInt64Type>>()
-            .unwrap();
-        assert_eq!(8, evs_list.len());
-        assert_eq!(true, evs_list.is_valid(1));
-        assert_eq!(DataType::Utf8, evs_list.value_type());
-
-        // dict from the events list
-        let dict_el = evs_list.values();
-        let dict_el = dict_el.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, evs_list.null_count());
-        assert_eq!(3, dict_el.len());
-        assert_eq!("Elect Leader", dict_el.value(0));
-        assert_eq!("Do Ballot", dict_el.value(1));
-        assert_eq!("Send Data", dict_el.value(2));
-    }
-
-    #[test]
-    fn test_dictionary_from_json_uint8() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_uint32() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_uint64() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_with_multiple_batches() {
-        let builder = ReaderBuilder::new()
-            .infer_schema(Some(4))
-            .with_batch_size(5);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-
-        let mut num_records = Vec::new();
-        while let Some(rb) = reader.next().unwrap() {
-            num_records.push(rb.num_rows());
-        }
-
-        assert_eq!(vec![5, 5, 2], num_records);
-    }
-
-    #[test]
-    fn test_json_infer_schema() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int64, true),
-            Field::new(
-                "b",
-                DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-                true,
-            ),
-            Field::new(
-                "c",
-                DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-                true,
-            ),
-            Field::new(
-                "d",
-                DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                true,
-            ),
-        ]);
-
-        let mut reader =
-            BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-        let inferred_schema = infer_json_schema_from_seekable(&mut reader, None).unwrap();
-
-        assert_eq!(inferred_schema, schema);
-
-        let file = File::open("test/data/mixed_arrays.json.gz").unwrap();
-        let mut reader = BufReader::new(GzDecoder::new(&file));
-        let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_json_infer_schema_nested_structs() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::Struct(vec![
-                    Field::new("a", DataType::Boolean, true),
-                    Field::new(
-                        "b",
-                        DataType::Struct(vec![Field::new("c", DataType::Utf8, true)]),
-                        true,
-                    ),
-                ]),
-                true,
-            ),
-            Field::new("c2", DataType::Int64, true),
-            Field::new("c3", DataType::Utf8, true),
-        ]);
-
-        let inferred_schema = infer_json_schema_from_iterator(
-            vec![
-                Ok(serde_json::json!({"c1": {"a": true, "b": {"c": "text"}}, "c2": 1})),
-                Ok(serde_json::json!({"c1": {"a": false, "b": null}, "c2": 0})),
-                Ok(serde_json::json!({"c1": {"a": true, "b": {"c": "text"}}, "c3": "ok"})),
-            ]
-            .into_iter(),
-        )
-        .unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_json_infer_schema_struct_in_list() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    DataType::Struct(vec![
-                        Field::new("a", DataType::Utf8, true),
-                        Field::new("b", DataType::Int64, true),
-                        Field::new("c", DataType::Boolean, true),
-                    ]),
-                    true,
-                ))),
-                true,
-            ),
-            Field::new("c2", DataType::Float64, true),
-            Field::new(
-                "c3",
-                // empty json array's inner types are inferred as null
-                DataType::List(Box::new(Field::new("item", DataType::Null, true))),
-                true,
-            ),
-        ]);
-
-        let inferred_schema = infer_json_schema_from_iterator(
-            vec![
-                Ok(serde_json::json!({
-                    "c1": [{"a": "foo", "b": 100}], "c2": 1, "c3": [],
-                })),
-                Ok(serde_json::json!({
-                    "c1": [{"a": "bar", "b": 2}, {"a": "foo", "c": true}], "c2": 0, "c3": [],
-                })),
-                Ok(serde_json::json!({"c1": [], "c2": 0.5, "c3": []})),
-            ]
-            .into_iter(),
-        )
-        .unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_json_infer_schema_nested_list() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                    true,
-                ))),
-                true,
-            ),
-            Field::new("c2", DataType::Float64, true),
-        ]);
-
-        let inferred_schema = infer_json_schema_from_iterator(
-            vec![
-                Ok(serde_json::json!({
-                    "c1": [],
-                    "c2": 12,
-                })),
-                Ok(serde_json::json!({
-                    "c1": [["a", "b"], ["c"]],
-                })),
-                Ok(serde_json::json!({
-                    "c1": [["foo"]],
-                    "c2": 0.11,
-                })),
-            ]
-            .into_iter(),
-        )
-        .unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_timestamp_from_json_seconds() {
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::Timestamp(TimeUnit::Second, None),
-            true,
-        )]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(
-            &DataType::Timestamp(TimeUnit::Second, None),
-            a.1.data_type()
-        );
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<TimestampSecondArray>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_timestamp_from_json_milliseconds() {
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            true,
-        )]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(
-            &DataType::Timestamp(TimeUnit::Millisecond, None),
-            a.1.data_type()
-        );
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<TimestampMillisecondArray>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_date_from_json_milliseconds() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Date64, true)]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Date64, a.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Date64Array>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_time_from_json_nanoseconds() {
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::Time64(TimeUnit::Nanosecond),
-            true,
-        )]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Time64(TimeUnit::Nanosecond), a.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Time64NanosecondArray>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_json_read_nested_list() {
-        let schema = Schema::new(vec![Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new(
-                "item",
-                DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                true,
-            ))),
-            true,
-        )]);
-
-        let decoder = Decoder::new(Arc::new(schema), 1024, None);
-        let batch = decoder
-            .next_batch(
-                &mut vec![
-                    Ok(serde_json::json!({
-                        "c1": [],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [["a", "b"], ["c"], ["e", "f"], ["g"], ["h"], ["i"], ["j"], ["k"]],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [["foo"], ["bar"]],
-                    })),
-                ]
-                .into_iter(),
-            )
-            .unwrap()
-            .unwrap();
-
-        assert_eq!(batch.num_columns(), 1);
-        assert_eq!(batch.num_rows(), 3);
-    }
-
-    #[test]
-    fn test_json_read_list_of_structs() {
-        let schema = Schema::new(vec![Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new(
-                "item",
-                DataType::Struct(vec![Field::new("a", DataType::Int64, true)]),
-                true,
-            ))),
-            true,
-        )]);
-
-        let decoder = Decoder::new(Arc::new(schema), 1024, None);
-        let batch = decoder
-            .next_batch(
-                // NOTE: total struct element count needs to be greater than
-                // bit_util::ceil(array_count, 8) to test validity bit buffer length calculation
-                // logic
-                &mut vec![
-                    Ok(serde_json::json!({
-                        "c1": [{"a": 1}],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [{"a": 2}, {"a": 3}, {"a": 4}, {"a": 5}, {"a": 6}, {"a": 7}],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [{"a": 10}, {"a": 11}],
-                    })),
-                ]
-                .into_iter(),
-            )
-            .unwrap()
-            .unwrap();
-
-        assert_eq!(batch.num_columns(), 1);
-        assert_eq!(batch.num_rows(), 3);
-    }
-}
diff --git a/rust/arrow/src/json/writer.rs b/rust/arrow/src/json/writer.rs
deleted file mode 100644
index c872b727d09..00000000000
--- a/rust/arrow/src/json/writer.rs
+++ /dev/null
@@ -1,969 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! # JSON Writer
-//!
-//! This JSON writer converts Arrow [`RecordBatch`]es into arrays of
-//! JSON objects or JSON formatted byte streams.
-//!
-//! ## Writing JSON Objects
-//!
-//! To serialize [`RecordBatch`]es into array of
-//! [JSON](https://docs.serde.rs/serde_json/) objects, use
-//! [`record_batches_to_json_rows`]:
-//!
-//! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
-//!
-//! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-//! let a = Int32Array::from(vec![1, 2, 3]);
-//! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-//!
-//! let json_rows = json::writer::record_batches_to_json_rows(&[batch]);
-//! assert_eq!(
-//!     serde_json::Value::Object(json_rows[1].clone()),
-//!     serde_json::json!({"a": 2}),
-//! );
-//! ```
-//!
-//! ## Writing JSON formatted byte streams
-//!
-//! To serialize [`RecordBatch`]es into line-delimited JSON bytes, use
-//! [`LineDelimitedWriter`]:
-//!
-//! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
-//!
-//! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-//! let a = Int32Array::from(vec![1, 2, 3]);
-//! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-//!
-//! // Write the record batch out as JSON
-//! let buf = Vec::new();
-//! let mut writer = json::LineDelimitedWriter::new(buf);
-//! writer.write_batches(&vec![batch]).unwrap();
-//! writer.finish().unwrap();
-//!
-//! // Get the underlying buffer back,
-//! let buf = writer.into_inner();
-//! assert_eq!(r#"{"a":1}
-//! {"a":2}
-//! {"a":3}
-//!"#, String::from_utf8(buf).unwrap())
-//! ```
-//!
-//! To serialize [`RecordBatch`]es into a well formed JSON array, use
-//! [`ArrayWriter`]:
-//!
-//! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
-//!
-//! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-//! let a = Int32Array::from(vec![1, 2, 3]);
-//! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-//!
-//! // Write the record batch out as a JSON array
-//! let buf = Vec::new();
-//! let mut writer = json::ArrayWriter::new(buf);
-//! writer.write_batches(&vec![batch]).unwrap();
-//! writer.finish().unwrap();
-//!
-//! // Get the underlying buffer back,
-//! let buf = writer.into_inner();
-//! assert_eq!(r#"[{"a":1},{"a":2},{"a":3}]"#, String::from_utf8(buf).unwrap())
-//! ```
-
-use std::iter;
-use std::{fmt::Debug, io::Write};
-
-use serde_json::map::Map as JsonMap;
-use serde_json::Value;
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::Result;
-use crate::record_batch::RecordBatch;
-
-fn primitive_array_to_json<T: ArrowPrimitiveType>(array: &ArrayRef) -> Vec<Value> {
-    as_primitive_array::<T>(array)
-        .iter()
-        .map(|maybe_value| match maybe_value {
-            Some(v) => v.into_json_value().unwrap_or(Value::Null),
-            None => Value::Null,
-        })
-        .collect()
-}
-
-fn struct_array_to_jsonmap_array(
-    array: &StructArray,
-    row_count: usize,
-) -> Vec<JsonMap<String, Value>> {
-    let inner_col_names = array.column_names();
-
-    let mut inner_objs = iter::repeat(JsonMap::new())
-        .take(row_count)
-        .collect::<Vec<JsonMap<String, Value>>>();
-
-    array
-        .columns()
-        .iter()
-        .enumerate()
-        .for_each(|(j, struct_col)| {
-            set_column_for_json_rows(
-                &mut inner_objs,
-                row_count,
-                struct_col,
-                inner_col_names[j],
-            );
-        });
-
-    inner_objs
-}
-
-/// Converts an arrow [`ArrayRef`] into a `Vec` of Serde JSON [`serde_json::Value`]'s
-pub fn array_to_json_array(array: &ArrayRef) -> Vec<Value> {
-    match array.data_type() {
-        DataType::Null => iter::repeat(Value::Null).take(array.len()).collect(),
-        DataType::Boolean => as_boolean_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => v.into(),
-                None => Value::Null,
-            })
-            .collect(),
-
-        DataType::Utf8 => as_string_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => v.into(),
-                None => Value::Null,
-            })
-            .collect(),
-        DataType::Int8 => primitive_array_to_json::<Int8Type>(array),
-        DataType::Int16 => primitive_array_to_json::<Int16Type>(array),
-        DataType::Int32 => primitive_array_to_json::<Int32Type>(array),
-        DataType::Int64 => primitive_array_to_json::<Int64Type>(array),
-        DataType::UInt8 => primitive_array_to_json::<UInt8Type>(array),
-        DataType::UInt16 => primitive_array_to_json::<UInt16Type>(array),
-        DataType::UInt32 => primitive_array_to_json::<UInt32Type>(array),
-        DataType::UInt64 => primitive_array_to_json::<UInt64Type>(array),
-        DataType::Float32 => primitive_array_to_json::<Float32Type>(array),
-        DataType::Float64 => primitive_array_to_json::<Float64Type>(array),
-        DataType::List(_) => as_list_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => Value::Array(array_to_json_array(&v)),
-                None => Value::Null,
-            })
-            .collect(),
-        DataType::LargeList(_) => as_large_list_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => Value::Array(array_to_json_array(&v)),
-                None => Value::Null,
-            })
-            .collect(),
-        DataType::Struct(_) => {
-            let jsonmaps =
-                struct_array_to_jsonmap_array(as_struct_array(array), array.len());
-            jsonmaps.into_iter().map(Value::Object).collect()
-        }
-        _ => {
-            panic!(
-                "Unsupported datatype for array conversion: {:#?}",
-                array.data_type()
-            );
-        }
-    }
-}
-
-macro_rules! set_column_by_array_type {
-    ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident) => {
-        let arr = $cast_fn($array);
-        $rows.iter_mut().zip(arr.iter()).take($row_count).for_each(
-            |(row, maybe_value)| {
-                if let Some(v) = maybe_value {
-                    row.insert($col_name.to_string(), v.into());
-                }
-            },
-        );
-    };
-}
-
-macro_rules! set_timestamp_column_by_array_type {
-    ($array_type:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident) => {
-        let arr = $array.as_any().downcast_ref::<$array_type>().unwrap();
-
-        $rows
-            .iter_mut()
-            .enumerate()
-            .take($row_count)
-            .for_each(|(i, row)| {
-                if !arr.is_null(i) {
-                    if let Some(v) = arr.value_as_datetime(i) {
-                        row.insert($col_name.to_string(), v.to_string().into());
-                    }
-                }
-            });
-    };
-}
-
-fn set_column_by_primitive_type<T: ArrowPrimitiveType>(
-    rows: &mut [JsonMap<String, Value>],
-    row_count: usize,
-    array: &ArrayRef,
-    col_name: &str,
-) {
-    let primitive_arr = as_primitive_array::<T>(array);
-
-    rows.iter_mut()
-        .zip(primitive_arr.iter())
-        .take(row_count)
-        .for_each(|(row, maybe_value)| {
-            // when value is null, we simply skip setting the key
-            if let Some(j) = maybe_value.and_then(|v| v.into_json_value()) {
-                row.insert(col_name.to_string(), j);
-            }
-        });
-}
-
-fn set_column_for_json_rows(
-    rows: &mut [JsonMap<String, Value>],
-    row_count: usize,
-    array: &ArrayRef,
-    col_name: &str,
-) {
-    match array.data_type() {
-        DataType::Int8 => {
-            set_column_by_primitive_type::<Int8Type>(rows, row_count, array, col_name)
-        }
-        DataType::Int16 => {
-            set_column_by_primitive_type::<Int16Type>(rows, row_count, array, col_name)
-        }
-        DataType::Int32 => {
-            set_column_by_primitive_type::<Int32Type>(rows, row_count, array, col_name)
-        }
-        DataType::Int64 => {
-            set_column_by_primitive_type::<Int64Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt8 => {
-            set_column_by_primitive_type::<UInt8Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt16 => {
-            set_column_by_primitive_type::<UInt16Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt32 => {
-            set_column_by_primitive_type::<UInt32Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt64 => {
-            set_column_by_primitive_type::<UInt64Type>(rows, row_count, array, col_name)
-        }
-        DataType::Float32 => {
-            set_column_by_primitive_type::<Float32Type>(rows, row_count, array, col_name)
-        }
-        DataType::Float64 => {
-            set_column_by_primitive_type::<Float64Type>(rows, row_count, array, col_name)
-        }
-        DataType::Null => {
-            // when value is null, we simply skip setting the key
-        }
-        DataType::Boolean => {
-            set_column_by_array_type!(as_boolean_array, col_name, rows, array, row_count);
-        }
-        DataType::Utf8 => {
-            set_column_by_array_type!(as_string_array, col_name, rows, array, row_count);
-        }
-        DataType::Timestamp(TimeUnit::Second, _) => {
-            set_timestamp_column_by_array_type!(
-                TimestampSecondArray,
-                col_name,
-                rows,
-                array,
-                row_count
-            );
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            set_timestamp_column_by_array_type!(
-                TimestampMillisecondArray,
-                col_name,
-                rows,
-                array,
-                row_count
-            );
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            set_timestamp_column_by_array_type!(
-                TimestampMicrosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count
-            );
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            set_timestamp_column_by_array_type!(
-                TimestampNanosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count
-            );
-        }
-        DataType::Struct(_) => {
-            let inner_objs =
-                struct_array_to_jsonmap_array(as_struct_array(array), row_count);
-            rows.iter_mut()
-                .take(row_count)
-                .zip(inner_objs.into_iter())
-                .for_each(|(row, obj)| {
-                    row.insert(col_name.to_string(), Value::Object(obj));
-                });
-        }
-        DataType::List(_) => {
-            let listarr = as_list_array(array);
-            rows.iter_mut()
-                .zip(listarr.iter())
-                .take(row_count)
-                .for_each(|(row, maybe_value)| {
-                    if let Some(v) = maybe_value {
-                        row.insert(
-                            col_name.to_string(),
-                            Value::Array(array_to_json_array(&v)),
-                        );
-                    }
-                });
-        }
-        DataType::LargeList(_) => {
-            let listarr = as_large_list_array(array);
-            rows.iter_mut()
-                .zip(listarr.iter())
-                .take(row_count)
-                .for_each(|(row, maybe_value)| {
-                    if let Some(v) = maybe_value {
-                        row.insert(
-                            col_name.to_string(),
-                            Value::Array(array_to_json_array(&v)),
-                        );
-                    }
-                });
-        }
-        _ => {
-            panic!("Unsupported datatype: {:#?}", array.data_type());
-        }
-    }
-}
-
-/// Converts an arrow [`RecordBatch`] into a `Vec` of Serde JSON
-/// [`JsonMap`]s (objects)
-pub fn record_batches_to_json_rows(
-    batches: &[RecordBatch],
-) -> Vec<JsonMap<String, Value>> {
-    let mut rows: Vec<JsonMap<String, Value>> = iter::repeat(JsonMap::new())
-        .take(batches.iter().map(|b| b.num_rows()).sum())
-        .collect();
-
-    if !rows.is_empty() {
-        let schema = batches[0].schema();
-        let mut base = 0;
-        batches.iter().for_each(|batch| {
-            let row_count = batch.num_rows();
-            batch.columns().iter().enumerate().for_each(|(j, col)| {
-                let col_name = schema.field(j).name();
-                set_column_for_json_rows(&mut rows[base..], row_count, col, col_name);
-            });
-            base += row_count;
-        });
-    }
-
-    rows
-}
-
-/// This trait defines how to format a sequence of JSON objects to a
-/// byte stream.
-pub trait JsonFormat: Debug + Default {
-    #[inline]
-    /// write any bytes needed at the start of the file to the writer
-    fn start_stream<W: Write>(&self, _writer: &mut W) -> Result<()> {
-        Ok(())
-    }
-
-    #[inline]
-    /// write any bytes needed for the start of each row
-    fn start_row<W: Write>(&self, _writer: &mut W, _is_first_row: bool) -> Result<()> {
-        Ok(())
-    }
-
-    #[inline]
-    /// write any bytes needed for the end of each row
-    fn end_row<W: Write>(&self, _writer: &mut W) -> Result<()> {
-        Ok(())
-    }
-
-    /// write any bytes needed for the start of each row
-    fn end_stream<W: Write>(&self, _writer: &mut W) -> Result<()> {
-        Ok(())
-    }
-}
-
-/// Produces JSON output with one record per line. For example
-///
-/// ```json
-/// {"foo":1}
-/// {"bar":1}
-///
-/// ```
-#[derive(Debug, Default)]
-pub struct LineDelimited {}
-
-impl JsonFormat for LineDelimited {
-    fn end_row<W: Write>(&self, writer: &mut W) -> Result<()> {
-        writer.write_all(b"\n")?;
-        Ok(())
-    }
-}
-
-/// Produces JSON output as a single JSON array. For example
-///
-/// ```json
-/// [{"foo":1},{"bar":1}]
-/// ```
-#[derive(Debug, Default)]
-pub struct JsonArray {}
-
-impl JsonFormat for JsonArray {
-    fn start_stream<W: Write>(&self, writer: &mut W) -> Result<()> {
-        writer.write_all(b"[")?;
-        Ok(())
-    }
-
-    fn start_row<W: Write>(&self, writer: &mut W, is_first_row: bool) -> Result<()> {
-        if !is_first_row {
-            writer.write_all(b",")?;
-        }
-        Ok(())
-    }
-
-    fn end_stream<W: Write>(&self, writer: &mut W) -> Result<()> {
-        writer.write_all(b"]")?;
-        Ok(())
-    }
-}
-
-/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON objects
-pub type LineDelimitedWriter<W> = Writer<W, LineDelimited>;
-
-/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays
-pub type ArrayWriter<W> = Writer<W, JsonArray>;
-
-/// A JSON writer which serializes [`RecordBatch`]es to a stream of
-/// `u8` encoded JSON objects. See the module level documentation for
-/// detailed usage and examples. The specific format of the stream is
-/// controlled by the [`JsonFormat`] type parameter.
-#[derive(Debug)]
-pub struct Writer<W, F>
-where
-    W: Write,
-    F: JsonFormat,
-{
-    /// Underlying writer to use to write bytes
-    writer: W,
-
-    /// Has the writer output any records yet?
-    started: bool,
-
-    /// Is the writer finished?
-    finished: bool,
-
-    /// Determines how the byte stream is formatted
-    format: F,
-}
-
-impl<W, F> Writer<W, F>
-where
-    W: Write,
-    F: JsonFormat,
-{
-    /// Construct a new writer
-    pub fn new(writer: W) -> Self {
-        Self {
-            writer,
-            started: false,
-            finished: false,
-            format: F::default(),
-        }
-    }
-
-    /// Write a single JSON row to the output writer
-    pub fn write_row(&mut self, row: &Value) -> Result<()> {
-        let is_first_row = !self.started;
-        if !self.started {
-            self.format.start_stream(&mut self.writer)?;
-            self.started = true;
-        }
-
-        self.format.start_row(&mut self.writer, is_first_row)?;
-        self.writer.write_all(&serde_json::to_vec(row)?)?;
-        self.format.end_row(&mut self.writer)?;
-        Ok(())
-    }
-
-    /// Convert the [`RecordBatch`] into JSON rows, and write them to the output
-    pub fn write_batches(&mut self, batches: &[RecordBatch]) -> Result<()> {
-        for row in record_batches_to_json_rows(batches) {
-            self.write_row(&Value::Object(row))?;
-        }
-        Ok(())
-    }
-
-    /// Finishes the output stream. This function must be called after
-    /// all record batches have been produced. (e.g. producing the final `']'` if writing
-    /// arrays.
-    pub fn finish(&mut self) -> Result<()> {
-        if self.started && !self.finished {
-            self.format.end_stream(&mut self.writer)?;
-            self.finished = true;
-        }
-        Ok(())
-    }
-
-    /// Unwraps this `Writer<W>`, returning the underlying writer
-    pub fn into_inner(self) -> W {
-        self.writer
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::fs::{read_to_string, File};
-    use std::sync::Arc;
-
-    use serde_json::json;
-
-    use crate::buffer::*;
-    use crate::json::reader::*;
-
-    use super::*;
-
-    #[test]
-    fn write_simple_rows() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Utf8, false),
-        ]);
-
-        let a = Int32Array::from(vec![Some(1), Some(2), Some(3), None, Some(5)]);
-        let b = StringArray::from(vec![Some("a"), Some("b"), Some("c"), Some("d"), None]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":1,"c2":"a"}
-{"c1":2,"c2":"b"}
-{"c1":3,"c2":"c"}
-{"c2":"d"}
-{"c1":5}
-"#
-        );
-    }
-
-    #[test]
-    fn write_timestamps() {
-        let ts_string = "2018-11-13T17:11:10.011375885995";
-        let ts_nanos = ts_string
-            .parse::<chrono::NaiveDateTime>()
-            .unwrap()
-            .timestamp_nanos();
-        let ts_micros = ts_nanos / 1000;
-        let ts_millis = ts_micros / 1000;
-        let ts_secs = ts_millis / 1000;
-
-        let arr_nanos =
-            TimestampNanosecondArray::from_opt_vec(vec![Some(ts_nanos), None], None);
-        let arr_micros =
-            TimestampMicrosecondArray::from_opt_vec(vec![Some(ts_micros), None], None);
-        let arr_millis =
-            TimestampMillisecondArray::from_opt_vec(vec![Some(ts_millis), None], None);
-        let arr_secs =
-            TimestampSecondArray::from_opt_vec(vec![Some(ts_secs), None], None);
-        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
-
-        let schema = Schema::new(vec![
-            Field::new("nanos", arr_nanos.data_type().clone(), false),
-            Field::new("micros", arr_micros.data_type().clone(), false),
-            Field::new("millis", arr_millis.data_type().clone(), false),
-            Field::new("secs", arr_secs.data_type().clone(), false),
-            Field::new("name", arr_names.data_type().clone(), false),
-        ]);
-        let schema = Arc::new(schema);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(arr_nanos),
-                Arc::new(arr_micros),
-                Arc::new(arr_millis),
-                Arc::new(arr_secs),
-                Arc::new(arr_names),
-            ],
-        )
-        .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"nanos":"2018-11-13 17:11:10.011375885","micros":"2018-11-13 17:11:10.011375","millis":"2018-11-13 17:11:10.011","secs":"2018-11-13 17:11:10","name":"a"}
-{"name":"b"}
-"#
-        );
-    }
-
-    #[test]
-    fn write_nested_structs() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::Struct(vec![
-                    Field::new("c11", DataType::Int32, false),
-                    Field::new(
-                        "c12",
-                        DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                        false,
-                    ),
-                ]),
-                false,
-            ),
-            Field::new("c2", DataType::Utf8, false),
-        ]);
-
-        let c1 = StructArray::from(vec![
-            (
-                Field::new("c11", DataType::Int32, false),
-                Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as ArrayRef,
-            ),
-            (
-                Field::new(
-                    "c12",
-                    DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                    false,
-                ),
-                Arc::new(StructArray::from(vec![(
-                    Field::new("c121", DataType::Utf8, false),
-                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
-                        as ArrayRef,
-                )])) as ArrayRef,
-            ),
-        ]);
-        let c2 = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"}
-{"c1":{"c12":{"c121":"f"}},"c2":"b"}
-{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}
-"#
-        );
-    }
-
-    #[test]
-    fn write_struct_with_list_field() {
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new("c_list", DataType::Utf8, false))),
-            false,
-        );
-        let field_c2 = Field::new("c2", DataType::Int32, false);
-        let schema = Schema::new(vec![field_c1.clone(), field_c2]);
-
-        let a_values = StringArray::from(vec!["a", "a1", "b", "c", "d", "e"]);
-        // list column rows: ["a", "a1"], ["b"], ["c"], ["d"], ["e"]
-        let a_value_offsets = Buffer::from(&[0, 2, 3, 4, 5, 6].to_byte_slice());
-        let a_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(5)
-            .add_buffer(a_value_offsets)
-            .add_child_data(a_values.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00011111]))
-            .build();
-        let a = ListArray::from(a_list_data);
-
-        let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":["a","a1"],"c2":1}
-{"c1":["b"],"c2":2}
-{"c1":["c"],"c2":3}
-{"c1":["d"],"c2":4}
-{"c1":["e"],"c2":5}
-"#
-        );
-    }
-
-    #[test]
-    fn write_nested_list() {
-        let list_inner_type = Field::new(
-            "a",
-            DataType::List(Box::new(Field::new("b", DataType::Int32, false))),
-            false,
-        );
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Box::new(list_inner_type.clone())),
-            false,
-        );
-        let field_c2 = Field::new("c2", DataType::Utf8, false);
-        let schema = Schema::new(vec![field_c1.clone(), field_c2]);
-
-        // list column rows: [[1, 2], [3]], [], [[4, 5, 6]]
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
-
-        let a_value_offsets = Buffer::from(&[0, 2, 3, 6].to_byte_slice());
-        // Construct a list array from the above two
-        let a_list_data = ArrayData::builder(list_inner_type.data_type().clone())
-            .len(3)
-            .add_buffer(a_value_offsets)
-            .null_bit_buffer(Buffer::from(vec![0b00000111]))
-            .add_child_data(a_values.data().clone())
-            .build();
-
-        let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
-        let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(3)
-            .add_buffer(c1_value_offsets)
-            .add_child_data(a_list_data)
-            .build();
-
-        let c1 = ListArray::from(c1_list_data);
-        let c2 = StringArray::from(vec![Some("foo"), Some("bar"), None]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":[[1,2],[3]],"c2":"foo"}
-{"c1":[],"c2":"bar"}
-{"c1":[[4,5,6]]}
-"#
-        );
-    }
-
-    #[test]
-    fn write_list_of_struct() {
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new(
-                "s",
-                DataType::Struct(vec![
-                    Field::new("c11", DataType::Int32, false),
-                    Field::new(
-                        "c12",
-                        DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                        false,
-                    ),
-                ]),
-                false,
-            ))),
-            true,
-        );
-        let field_c2 = Field::new("c2", DataType::Int32, false);
-        let schema = Schema::new(vec![field_c1.clone(), field_c2]);
-
-        let struct_values = StructArray::from(vec![
-            (
-                Field::new("c11", DataType::Int32, false),
-                Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as ArrayRef,
-            ),
-            (
-                Field::new(
-                    "c12",
-                    DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                    false,
-                ),
-                Arc::new(StructArray::from(vec![(
-                    Field::new("c121", DataType::Utf8, false),
-                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
-                        as ArrayRef,
-                )])) as ArrayRef,
-            ),
-        ]);
-
-        // list column rows (c1):
-        // [{"c11": 1, "c12": {"c121": "e"}}, {"c12": {"c121": "f"}}],
-        // null,
-        // [{"c11": 5, "c12": {"c121": "g"}}]
-        let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
-        let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(3)
-            .add_buffer(c1_value_offsets)
-            .add_child_data(struct_values.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00000101]))
-            .build();
-        let c1 = ListArray::from(c1_list_data);
-
-        let c2 = Int32Array::from(vec![1, 2, 3]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":[{"c11":1,"c12":{"c121":"e"}},{"c12":{"c121":"f"}}],"c2":1}
-{"c2":2}
-{"c1":[{"c11":5,"c12":{"c121":"g"}}],"c2":3}
-"#
-        );
-    }
-
-    fn test_write_for_file(test_file: &str) {
-        let builder = ReaderBuilder::new()
-            .infer_schema(None)
-            .with_batch_size(1024);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open(test_file).unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        let result = String::from_utf8(buf).unwrap();
-        let expected = read_to_string(test_file).unwrap();
-        for (r, e) in result.lines().zip(expected.lines()) {
-            let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
-            // remove null value from object to make comparision consistent:
-            if let Value::Object(obj) = expected_json {
-                expected_json = Value::Object(
-                    obj.into_iter().filter(|(_, v)| *v != Value::Null).collect(),
-                );
-            }
-            assert_eq!(serde_json::from_str::<Value>(r).unwrap(), expected_json,);
-        }
-    }
-
-    #[test]
-    fn write_basic_rows() {
-        test_write_for_file("test/data/basic.json");
-    }
-
-    #[test]
-    fn write_arrays() {
-        test_write_for_file("test/data/arrays.json");
-    }
-
-    #[test]
-    fn write_basic_nulls() {
-        test_write_for_file("test/data/basic_nulls.json");
-    }
-
-    #[test]
-    fn json_writer_empty() {
-        let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
-        writer.finish().unwrap();
-        assert_eq!(String::from_utf8(writer.into_inner()).unwrap(), "");
-    }
-    #[test]
-    fn json_writer_one_row() {
-        let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
-        let v = json!({ "an": "object" });
-        writer.write_row(&v).unwrap();
-        writer.finish().unwrap();
-        assert_eq!(
-            String::from_utf8(writer.into_inner()).unwrap(),
-            r#"[{"an":"object"}]"#
-        );
-    }
-
-    #[test]
-    fn json_writer_two_rows() {
-        let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
-        let v = json!({ "an": "object" });
-        writer.write_row(&v).unwrap();
-        let v = json!({ "another": "object" });
-        writer.write_row(&v).unwrap();
-        writer.finish().unwrap();
-        assert_eq!(
-            String::from_utf8(writer.into_inner()).unwrap(),
-            r#"[{"an":"object"},{"another":"object"}]"#
-        );
-    }
-}
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
deleted file mode 100644
index 30f968c9979..00000000000
--- a/rust/arrow/src/lib.rs
+++ /dev/null
@@ -1,162 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! A native Rust implementation of [Apache Arrow](https://arrow.apache.org), a cross-language
-//! development platform for in-memory data.
-//!
-//! ### DataType
-//!
-//! Every [`Array`](array::Array) in this crate has an associated [`DataType`](datatypes::DataType),
-//! that specifies how its data is layed in memory and represented.
-//! Thus, a central enum of this crate is [`DataType`](datatypes::DataType), that contains the set of valid
-//! DataTypes in the specification. For example, [`DataType::Utf8`](datatypes::DataType::Utf8).
-//!
-//! ## Array
-//!
-//! The central trait of this package is the dynamically-typed [`Array`](array::Array) that
-//! represents a fixed-sized, immutable, Send + Sync Array of nullable elements. An example of such an array is [`UInt32Array`](array::UInt32Array).
-//! One way to think about an arrow [`Array`](array::Array) is a `Arc<[Option<T>; len]>` where T can be anything ranging from an integer to a string, or even
-//! another [`Array`](array::Array).
-//!
-//! [`Arrays`](array::Array) have [`len()`](array::Array::len), [`data_type()`](array::Array::data_type), and the nullability of each of its elements,
-//! can be obtained via [`is_null(index)`](array::Array::is_null). To downcast an [`Array`](array::Array) to a specific implementation, you can use
-//!
-//! ```rust
-//! use arrow::array::{Array, UInt32Array};
-//! let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! assert_eq!(array.len(), 3);
-//! assert_eq!(array.value(0), 1);
-//! assert_eq!(array.is_null(1), true);
-//! ```
-//!
-//! To make the array dynamically typed, we wrap it in an [`Arc`](std::sync::Arc):
-//!
-//! ```rust
-//! # use std::sync::Arc;
-//! use arrow::datatypes::DataType;
-//! use arrow::array::{UInt32Array, ArrayRef};
-//! # let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! let array: ArrayRef = Arc::new(array);
-//! assert_eq!(array.len(), 3);
-//! // array.value() is not available in the dynamically-typed version
-//! assert_eq!(array.is_null(1), true);
-//! assert_eq!(array.data_type(), &DataType::UInt32);
-//! ```
-//!
-//! to downcast, use `as_any()`:
-//!
-//! ```rust
-//! # use std::sync::Arc;
-//! # use arrow::array::{UInt32Array, ArrayRef};
-//! # let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! # let array: ArrayRef = Arc::new(array);
-//! let array = array.as_any().downcast_ref::<UInt32Array>().unwrap();
-//! assert_eq!(array.value(0), 1);
-//! ```
-//!
-//! ## Memory and Buffers
-//!
-//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::ArrayData), that in turn
-//! is a collection of other [`ArrayData`](array::ArrayData) and [`Buffers`](buffer::Buffer).
-//! [`Buffers`](buffer::Buffer) is the central struct that array implementations use keep allocated memory and pointers.
-//! The [`MutableBuffer`](buffer::MutableBuffer) is the mutable counter-part of[`Buffer`](buffer::Buffer).
-//! These are the lowest abstractions of this crate, and are used throughout the crate to
-//! efficiently allocate, write, read and deallocate memory.
-//!
-//! ## Field, Schema and RecordBatch
-//!
-//! [`Field`](datatypes::Field) is a struct that contains an array's metadata (datatype and whether its values
-//! can be null), and a name. [`Schema`](datatypes::Schema) is a vector of fields with optional metadata.
-//! Together, they form the basis of a schematic representation of a group of [`Arrays`](array::Array).
-//!
-//! In fact, [`RecordBatch`](record_batch::RecordBatch) is a struct with a [`Schema`](datatypes::Schema) and a vector of
-//! [`Array`](array::Array)s, all with the same `len`. A record batch is the highest order struct that this crate currently offers
-//! and is broadly used to represent a table where each column in an `Array`.
-//!
-//! ## Compute
-//!
-//! This crate offers many operations (called kernels) to operate on `Array`s, that you can find at [compute::kernels].
-//! It has both vertical and horizontal operations, and some of them have an SIMD implementation.
-//!
-//! ## Status
-//!
-//! This crate has most of the implementation of the arrow specification. Specifically, it supports the following types:
-//!
-//! * All arrow primitive types, such as [`Int32Array`](array::UInt8Array), [`BooleanArray`](array::BooleanArray) and [`Float64Array`](array::Float64Array).
-//! * All arrow variable length types, such as [`StringArray`](array::StringArray) and [`BinaryArray`](array::BinaryArray)
-//! * All composite types such as [`StructArray`](array::StructArray) and [`ListArray`](array::ListArray)
-//! * Dictionary types  [`DictionaryArray`](array::DictionaryArray)
-
-//!
-//! This crate also implements many common vertical operations:
-//! * all mathematical binary operators, such as [`subtract`](compute::kernels::arithmetic::subtract)
-//! * all boolean binary operators such as [`equality`](compute::kernels::comparison::eq)
-//! * [`cast`](compute::kernels::cast::cast)
-//! * [`filter`](compute::kernels::filter::filter)
-//! * [`take`](compute::kernels::take::take) and [`limit`](compute::kernels::limit::limit)
-//! * [`sort`](compute::kernels::sort::sort)
-//! * some string operators such as [`substring`](compute::kernels::substring::substring) and [`length`](compute::kernels::length::length)
-//!
-//! as well as some horizontal operations, such as
-//!
-//! * [`min`](compute::kernels::aggregate::min) and [`max`](compute::kernels::aggregate::max)
-//! * [`sum`](compute::kernels::aggregate::sum)
-//!
-//! Finally, this crate implements some readers and writers to different formats:
-//!
-//! * json: [reader](json::reader::Reader)
-//! * csv: [reader](csv::reader::Reader) and [writer](csv::writer::Writer)
-//! * ipc: [reader](ipc::reader::StreamReader) and [writer](ipc::writer::FileWriter)
-//!
-//! The parquet implementation is on a [separate crate](https://crates.io/crates/parquet)
-
-#![cfg_attr(feature = "avx512", feature(stdsimd))]
-#![cfg_attr(feature = "avx512", feature(repr_simd))]
-#![cfg_attr(feature = "avx512", feature(avx512_target_feature))]
-#![allow(dead_code)]
-#![allow(non_camel_case_types)]
-#![deny(clippy::redundant_clone)]
-#![allow(
-    // introduced to ignore lint errors when upgrading from 2020-04-22 to 2020-11-14
-    clippy::float_equality_without_abs,
-    clippy::type_complexity,
-    // upper_case_acronyms lint was introduced in Rust 1.51.
-    // It is triggered in the ffi module, and ipc::gen, which we have no control over
-    clippy::upper_case_acronyms,
-    clippy::vec_init_then_push
-)]
-#![allow(bare_trait_objects)]
-#![warn(missing_debug_implementations)]
-
-pub mod alloc;
-mod arch;
-pub mod array;
-pub mod bitmap;
-pub mod buffer;
-mod bytes;
-pub mod compute;
-pub mod csv;
-pub mod datatypes;
-pub mod error;
-pub mod ffi;
-pub mod ipc;
-pub mod json;
-pub mod record_batch;
-pub mod temporal_conversions;
-pub mod tensor;
-pub mod util;
-mod zz_memory_check;
diff --git a/rust/arrow/src/record_batch.rs b/rust/arrow/src/record_batch.rs
deleted file mode 100644
index 93abb909d02..00000000000
--- a/rust/arrow/src/record_batch.rs
+++ /dev/null
@@ -1,434 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! A two-dimensional batch of column-oriented data with a defined
-//! [schema](crate::datatypes::Schema).
-
-use std::sync::Arc;
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-/// A two-dimensional batch of column-oriented data with a defined
-/// [schema](crate::datatypes::Schema).
-///
-/// A `RecordBatch` is a two-dimensional dataset of a number of
-/// contiguous arrays, each the same length.
-/// A record batch has a schema which must match its arrays’
-/// datatypes.
-///
-/// Record batches are a convenient unit of work for various
-/// serialization and computation functions, possibly incremental.
-/// See also [CSV reader](crate::csv::Reader) and
-/// [JSON reader](crate::json::Reader).
-#[derive(Clone, Debug)]
-pub struct RecordBatch {
-    schema: SchemaRef,
-    columns: Vec<Arc<Array>>,
-}
-
-impl RecordBatch {
-    /// Creates a `RecordBatch` from a schema and columns.
-    ///
-    /// Expects the following:
-    ///  * the vec of columns to not be empty
-    ///  * the schema and column data types to have equal lengths
-    ///    and match
-    ///  * each array in columns to have the same length
-    ///
-    /// If the conditions are not met, an error is returned.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false)
-    /// ]);
-    ///
-    /// let batch = RecordBatch::try_new(
-    ///     Arc::new(schema),
-    ///     vec![Arc::new(id_array)]
-    /// )?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self> {
-        let options = RecordBatchOptions::default();
-        Self::validate_new_batch(&schema, columns.as_slice(), &options)?;
-        Ok(RecordBatch { schema, columns })
-    }
-
-    /// Creates a `RecordBatch` from a schema and columns, with additional options,
-    /// such as whether to strictly validate field names.
-    ///
-    /// See [`RecordBatch::try_new`] for the expected conditions.
-    pub fn try_new_with_options(
-        schema: SchemaRef,
-        columns: Vec<ArrayRef>,
-        options: &RecordBatchOptions,
-    ) -> Result<Self> {
-        Self::validate_new_batch(&schema, columns.as_slice(), options)?;
-        Ok(RecordBatch { schema, columns })
-    }
-
-    /// Creates a new empty [`RecordBatch`].
-    pub fn new_empty(schema: SchemaRef) -> Self {
-        let columns = schema
-            .fields()
-            .iter()
-            .map(|field| new_empty_array(field.data_type()))
-            .collect();
-        RecordBatch { schema, columns }
-    }
-
-    /// Validate the schema and columns using [`RecordBatchOptions`]. Returns an error
-    /// if any validation check fails.
-    fn validate_new_batch(
-        schema: &SchemaRef,
-        columns: &[ArrayRef],
-        options: &RecordBatchOptions,
-    ) -> Result<()> {
-        // check that there are some columns
-        if columns.is_empty() {
-            return Err(ArrowError::InvalidArgumentError(
-                "at least one column must be defined to create a record batch"
-                    .to_string(),
-            ));
-        }
-        // check that number of fields in schema match column length
-        if schema.fields().len() != columns.len() {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "number of columns({}) must match number of fields({}) in schema",
-                columns.len(),
-                schema.fields().len(),
-            )));
-        }
-        // check that all columns have the same row count, and match the schema
-        let len = columns[0].data().len();
-
-        // This is a bit repetitive, but it is better to check the condition outside the loop
-        if options.match_field_names {
-            for (i, column) in columns.iter().enumerate() {
-                if column.len() != len {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "all columns in a record batch must have the same length"
-                            .to_string(),
-                    ));
-                }
-                if column.data_type() != schema.field(i).data_type() {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "column types must match schema types, expected {:?} but found {:?} at column index {}",
-                        schema.field(i).data_type(),
-                        column.data_type(),
-                        i)));
-                }
-            }
-        } else {
-            for (i, column) in columns.iter().enumerate() {
-                if column.len() != len {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "all columns in a record batch must have the same length"
-                            .to_string(),
-                    ));
-                }
-                if !column
-                    .data_type()
-                    .equals_datatype(schema.field(i).data_type())
-                {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "column types must match schema types, expected {:?} but found {:?} at column index {}",
-                        schema.field(i).data_type(),
-                        column.data_type(),
-                        i)));
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Returns the [`Schema`](crate::datatypes::Schema) of the record batch.
-    pub fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Returns the number of columns in the record batch.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false)
-    /// ]);
-    ///
-    /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)])?;
-    ///
-    /// assert_eq!(batch.num_columns(), 1);
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn num_columns(&self) -> usize {
-        self.columns.len()
-    }
-
-    /// Returns the number of rows in each column.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the `RecordBatch` contains no columns.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false)
-    /// ]);
-    ///
-    /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)])?;
-    ///
-    /// assert_eq!(batch.num_rows(), 5);
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn num_rows(&self) -> usize {
-        self.columns[0].data().len()
-    }
-
-    /// Get a reference to a column's array by index.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is outside of `0..num_columns`.
-    pub fn column(&self, index: usize) -> &ArrayRef {
-        &self.columns[index]
-    }
-
-    /// Get a reference to all columns in the record batch.
-    pub fn columns(&self) -> &[ArrayRef] {
-        &self.columns[..]
-    }
-}
-
-/// Options that control the behaviour used when creating a [`RecordBatch`].
-#[derive(Debug)]
-pub struct RecordBatchOptions {
-    /// Match field names of structs and lists. If set to `true`, the names must match.
-    pub match_field_names: bool,
-}
-
-impl Default for RecordBatchOptions {
-    fn default() -> Self {
-        Self {
-            match_field_names: true,
-        }
-    }
-}
-
-impl From<&StructArray> for RecordBatch {
-    /// Create a record batch from struct array.
-    ///
-    /// This currently does not flatten and nested struct types
-    fn from(struct_array: &StructArray) -> Self {
-        if let DataType::Struct(fields) = struct_array.data_type() {
-            let schema = Schema::new(fields.clone());
-            let columns = struct_array.boxed_fields.clone();
-            RecordBatch {
-                schema: Arc::new(schema),
-                columns,
-            }
-        } else {
-            unreachable!("unable to get datatype as struct")
-        }
-    }
-}
-
-impl From<RecordBatch> for StructArray {
-    fn from(batch: RecordBatch) -> Self {
-        batch
-            .schema
-            .fields
-            .iter()
-            .zip(batch.columns.iter())
-            .map(|t| (t.0.clone(), t.1.clone()))
-            .collect::<Vec<(Field, ArrayRef)>>()
-            .into()
-    }
-}
-
-/// Trait for types that can read `RecordBatch`'s.
-pub trait RecordBatchReader: Iterator<Item = Result<RecordBatch>> {
-    /// Returns the schema of this `RecordBatchReader`.
-    ///
-    /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this
-    /// reader should have the same schema as returned from this method.
-    fn schema(&self) -> SchemaRef;
-
-    /// Reads the next `RecordBatch`.
-    #[deprecated(
-        since = "2.0.0",
-        note = "This method is deprecated in favour of `next` from the trait Iterator."
-    )]
-    fn next_batch(&mut self) -> Result<Option<RecordBatch>> {
-        self.next().transpose()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn create_record_batch() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Utf8, false),
-        ]);
-
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
-
-        let record_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
-
-        assert_eq!(5, record_batch.num_rows());
-        assert_eq!(2, record_batch.num_columns());
-        assert_eq!(&DataType::Int32, record_batch.schema().field(0).data_type());
-        assert_eq!(&DataType::Utf8, record_batch.schema().field(1).data_type());
-        assert_eq!(5, record_batch.column(0).data().len());
-        assert_eq!(5, record_batch.column(1).data().len());
-    }
-
-    #[test]
-    fn create_record_batch_schema_mismatch() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let a = Int64Array::from(vec![1, 2, 3, 4, 5]);
-
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]);
-        assert!(!batch.is_ok());
-    }
-
-    #[test]
-    fn create_record_batch_field_name_mismatch() {
-        let struct_fields = vec![
-            Field::new("a1", DataType::Int32, false),
-            Field::new(
-                "a2",
-                DataType::List(Box::new(Field::new("item", DataType::Int8, false))),
-                false,
-            ),
-        ];
-        let struct_type = DataType::Struct(struct_fields);
-        let schema = Arc::new(Schema::new(vec![Field::new("a", struct_type, true)]));
-
-        let a1: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
-        let a2_child = Int8Array::from(vec![1, 2, 3, 4]);
-        let a2 = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
-            "array",
-            DataType::Int8,
-            false,
-        ))))
-        .add_child_data(a2_child.data().clone())
-        .len(2)
-        .add_buffer(Buffer::from(vec![0i32, 3, 4].to_byte_slice()))
-        .build();
-        let a2: ArrayRef = Arc::new(ListArray::from(a2));
-        let a = ArrayDataBuilder::new(DataType::Struct(vec![
-            Field::new("aa1", DataType::Int32, false),
-            Field::new("a2", a2.data_type().clone(), false),
-        ]))
-        .add_child_data(a1.data().clone())
-        .add_child_data(a2.data().clone())
-        .len(2)
-        .build();
-        let a: ArrayRef = Arc::new(StructArray::from(a));
-
-        // creating the batch with field name validation should fail
-        let batch = RecordBatch::try_new(schema.clone(), vec![a.clone()]);
-        assert!(batch.is_err());
-
-        // creating the batch without field name validation should pass
-        let options = RecordBatchOptions {
-            match_field_names: false,
-        };
-        let batch = RecordBatch::try_new_with_options(schema, vec![a], &options);
-        assert!(batch.is_ok());
-    }
-
-    #[test]
-    fn create_record_batch_record_mismatch() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
-        assert!(!batch.is_ok());
-    }
-
-    #[test]
-    fn create_record_batch_from_struct_array() {
-        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
-        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
-        let struct_array = StructArray::from(vec![
-            (
-                Field::new("b", DataType::Boolean, false),
-                boolean.clone() as ArrayRef,
-            ),
-            (
-                Field::new("c", DataType::Int32, false),
-                int.clone() as ArrayRef,
-            ),
-        ]);
-
-        let batch = RecordBatch::from(&struct_array);
-        assert_eq!(2, batch.num_columns());
-        assert_eq!(4, batch.num_rows());
-        assert_eq!(
-            struct_array.data_type(),
-            &DataType::Struct(batch.schema().fields().to_vec())
-        );
-        assert_eq!(batch.column(0).as_ref(), boolean.as_ref());
-        assert_eq!(batch.column(1).as_ref(), int.as_ref());
-    }
-}
diff --git a/rust/arrow/src/temporal_conversions.rs b/rust/arrow/src/temporal_conversions.rs
deleted file mode 100644
index 4033839e7d9..00000000000
--- a/rust/arrow/src/temporal_conversions.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Conversion methods for dates and times.
-
-use chrono::{NaiveDateTime, NaiveTime};
-
-/// Number of seconds in a day
-const SECONDS_IN_DAY: i64 = 86_400;
-/// Number of milliseconds in a second
-const MILLISECONDS: i64 = 1_000;
-/// Number of microseconds in a second
-const MICROSECONDS: i64 = 1_000_000;
-/// Number of nanoseconds in a second
-const NANOSECONDS: i64 = 1_000_000_000;
-
-/// converts a `i32` representing a `date32` to [`NaiveDateTime`]
-#[inline]
-pub fn date32_to_datetime(v: i32) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 0)
-}
-
-/// converts a `i64` representing a `date64` to [`NaiveDateTime`]
-#[inline]
-pub fn date64_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from milliseconds
-        v / MILLISECONDS,
-        // discard extracted seconds and convert milliseconds to nanoseconds
-        (v % MILLISECONDS * MICROSECONDS) as u32,
-    )
-}
-
-/// converts a `i32` representing a `time32(s)` to [`NaiveDateTime`]
-#[inline]
-pub fn time32s_to_time(v: i32) -> NaiveTime {
-    NaiveTime::from_num_seconds_from_midnight(v as u32, 0)
-}
-
-/// converts a `i32` representing a `time32(ms)` to [`NaiveDateTime`]
-#[inline]
-pub fn time32ms_to_time(v: i32) -> NaiveTime {
-    let v = v as i64;
-    NaiveTime::from_num_seconds_from_midnight(
-        // extract seconds from milliseconds
-        (v / MILLISECONDS) as u32,
-        // discard extracted seconds and convert milliseconds to
-        // nanoseconds
-        (v % MILLISECONDS * MICROSECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `time64(us)` to [`NaiveDateTime`]
-#[inline]
-pub fn time64us_to_time(v: i64) -> NaiveTime {
-    NaiveTime::from_num_seconds_from_midnight(
-        // extract seconds from microseconds
-        (v / MICROSECONDS) as u32,
-        // discard extracted seconds and convert microseconds to
-        // nanoseconds
-        (v % MICROSECONDS * MILLISECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `time64(ns)` to [`NaiveDateTime`]
-#[inline]
-pub fn time64ns_to_time(v: i64) -> NaiveTime {
-    NaiveTime::from_num_seconds_from_midnight(
-        // extract seconds from nanoseconds
-        (v / NANOSECONDS) as u32,
-        // discard extracted seconds
-        (v % NANOSECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `timestamp(s)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_s_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(v, 0)
-}
-
-/// converts a `i64` representing a `timestamp(ms)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_ms_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from milliseconds
-        v / MILLISECONDS,
-        // discard extracted seconds and convert milliseconds to nanoseconds
-        (v % MILLISECONDS * MICROSECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `timestamp(us)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_us_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from microseconds
-        v / MICROSECONDS,
-        // discard extracted seconds and convert microseconds to nanoseconds
-        (v % MICROSECONDS * MILLISECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `timestamp(ns)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_ns_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from nanoseconds
-        v / NANOSECONDS,
-        // discard extracted seconds
-        (v % NANOSECONDS) as u32,
-    )
-}
diff --git a/rust/arrow/src/tensor.rs b/rust/arrow/src/tensor.rs
deleted file mode 100644
index 35e45a25c38..00000000000
--- a/rust/arrow/src/tensor.rs
+++ /dev/null
@@ -1,495 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Arrow Tensor Type, defined in
-//! [`format/Tensor.fbs`](https://github.com/apache/arrow/blob/master/format/Tensor.fbs).
-
-use std::marker::PhantomData;
-use std::mem;
-
-use crate::buffer::Buffer;
-use crate::datatypes::*;
-
-use crate::error::{ArrowError, Result};
-
-/// Computes the strides required assuming a row major memory layout
-fn compute_row_major_strides<T: ArrowPrimitiveType>(
-    shape: &[usize],
-) -> Result<Vec<usize>> {
-    let mut remaining_bytes = mem::size_of::<T::Native>();
-
-    for i in shape {
-        if let Some(val) = remaining_bytes.checked_mul(*i) {
-            remaining_bytes = val;
-        } else {
-            return Err(ArrowError::ComputeError(
-                "overflow occurred when computing row major strides.".to_string(),
-            ));
-        }
-    }
-
-    let mut strides = Vec::<usize>::new();
-    for i in shape {
-        remaining_bytes /= *i;
-        strides.push(remaining_bytes);
-    }
-
-    Ok(strides)
-}
-
-/// Computes the strides required assuming a column major memory layout
-fn compute_column_major_strides<T: ArrowPrimitiveType>(
-    shape: &[usize],
-) -> Result<Vec<usize>> {
-    let mut remaining_bytes = mem::size_of::<T::Native>();
-    let mut strides = Vec::<usize>::new();
-
-    for i in shape {
-        strides.push(remaining_bytes);
-
-        if let Some(val) = remaining_bytes.checked_mul(*i) {
-            remaining_bytes = val;
-        } else {
-            return Err(ArrowError::ComputeError(
-                "overflow occurred when computing column major strides.".to_string(),
-            ));
-        }
-    }
-
-    Ok(strides)
-}
-
-/// Tensor of primitive types
-#[derive(Debug)]
-pub struct Tensor<'a, T: ArrowPrimitiveType> {
-    data_type: DataType,
-    buffer: Buffer,
-    shape: Option<Vec<usize>>,
-    strides: Option<Vec<usize>>,
-    names: Option<Vec<&'a str>>,
-    _marker: PhantomData<T>,
-}
-
-pub type BooleanTensor<'a> = Tensor<'a, BooleanType>;
-pub type Int8Tensor<'a> = Tensor<'a, Int8Type>;
-pub type Int16Tensor<'a> = Tensor<'a, Int16Type>;
-pub type Int32Tensor<'a> = Tensor<'a, Int32Type>;
-pub type Int64Tensor<'a> = Tensor<'a, Int64Type>;
-pub type UInt8Tensor<'a> = Tensor<'a, UInt8Type>;
-pub type UInt16Tensor<'a> = Tensor<'a, UInt16Type>;
-pub type UInt32Tensor<'a> = Tensor<'a, UInt32Type>;
-pub type UInt64Tensor<'a> = Tensor<'a, UInt64Type>;
-pub type Float32Tensor<'a> = Tensor<'a, Float32Type>;
-pub type Float64Tensor<'a> = Tensor<'a, Float64Type>;
-
-impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
-    /// Creates a new `Tensor`
-    pub fn try_new(
-        buffer: Buffer,
-        shape: Option<Vec<usize>>,
-        strides: Option<Vec<usize>>,
-        names: Option<Vec<&'a str>>,
-    ) -> Result<Self> {
-        match shape {
-            None => {
-                if buffer.len() != mem::size_of::<T::Native>() {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "underlying buffer should only contain a single tensor element"
-                            .to_string(),
-                    ));
-                }
-
-                if strides != None {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "expected None strides for tensor with no shape".to_string(),
-                    ));
-                }
-
-                if names != None {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "expected None names for tensor with no shape".to_string(),
-                    ));
-                }
-            }
-
-            Some(ref s) => {
-                if let Some(ref st) = strides {
-                    if st.len() != s.len() {
-                        return Err(ArrowError::InvalidArgumentError(
-                            "shape and stride dimensions differ".to_string(),
-                        ));
-                    }
-                }
-
-                if let Some(ref n) = names {
-                    if n.len() != s.len() {
-                        return Err(ArrowError::InvalidArgumentError(
-                            "number of dimensions and number of dimension names differ"
-                                .to_string(),
-                        ));
-                    }
-                }
-
-                let total_elements: usize = s.iter().product();
-                if total_elements != (buffer.len() / mem::size_of::<T::Native>()) {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "number of elements in buffer does not match dimensions"
-                            .to_string(),
-                    ));
-                }
-            }
-        };
-
-        // Checking that the tensor strides used for construction are correct
-        // otherwise a row major stride is calculated and used as value for the tensor
-        let tensor_strides = {
-            if let Some(st) = strides {
-                if let Some(ref s) = shape {
-                    if compute_row_major_strides::<T>(s)? == st
-                        || compute_column_major_strides::<T>(s)? == st
-                    {
-                        Some(st)
-                    } else {
-                        return Err(ArrowError::InvalidArgumentError(
-                            "the input stride does not match the selected shape"
-                                .to_string(),
-                        ));
-                    }
-                } else {
-                    Some(st)
-                }
-            } else if let Some(ref s) = shape {
-                Some(compute_row_major_strides::<T>(s)?)
-            } else {
-                None
-            }
-        };
-
-        Ok(Self {
-            data_type: T::DATA_TYPE,
-            buffer,
-            shape,
-            strides: tensor_strides,
-            names,
-            _marker: PhantomData,
-        })
-    }
-
-    /// Creates a new Tensor using row major memory layout
-    pub fn new_row_major(
-        buffer: Buffer,
-        shape: Option<Vec<usize>>,
-        names: Option<Vec<&'a str>>,
-    ) -> Result<Self> {
-        if let Some(ref s) = shape {
-            let strides = Some(compute_row_major_strides::<T>(&s)?);
-
-            Self::try_new(buffer, shape, strides, names)
-        } else {
-            Err(ArrowError::InvalidArgumentError(
-                "shape required to create row major tensor".to_string(),
-            ))
-        }
-    }
-
-    /// Creates a new Tensor using column major memory layout
-    pub fn new_column_major(
-        buffer: Buffer,
-        shape: Option<Vec<usize>>,
-        names: Option<Vec<&'a str>>,
-    ) -> Result<Self> {
-        if let Some(ref s) = shape {
-            let strides = Some(compute_column_major_strides::<T>(&s)?);
-
-            Self::try_new(buffer, shape, strides, names)
-        } else {
-            Err(ArrowError::InvalidArgumentError(
-                "shape required to create column major tensor".to_string(),
-            ))
-        }
-    }
-
-    /// The data type of the `Tensor`
-    pub fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    /// The sizes of the dimensions
-    pub fn shape(&self) -> Option<&Vec<usize>> {
-        self.shape.as_ref()
-    }
-
-    /// Returns a reference to the underlying `Buffer`
-    pub fn data(&self) -> &Buffer {
-        &self.buffer
-    }
-
-    /// The number of bytes between elements in each dimension
-    pub fn strides(&self) -> Option<&Vec<usize>> {
-        self.strides.as_ref()
-    }
-
-    /// The names of the dimensions
-    pub fn names(&self) -> Option<&Vec<&'a str>> {
-        self.names.as_ref()
-    }
-
-    /// The number of dimensions
-    pub fn ndim(&self) -> usize {
-        match &self.shape {
-            None => 0,
-            Some(v) => v.len(),
-        }
-    }
-
-    /// The name of dimension i
-    pub fn dim_name(&self, i: usize) -> Option<&'a str> {
-        self.names.as_ref().map(|ref names| names[i])
-    }
-
-    /// The total number of elements in the `Tensor`
-    pub fn size(&self) -> usize {
-        match self.shape {
-            None => 0,
-            Some(ref s) => s.iter().product(),
-        }
-    }
-
-    /// Indicates if the data is laid out contiguously in memory
-    pub fn is_contiguous(&self) -> Result<bool> {
-        Ok(self.is_row_major()? || self.is_column_major()?)
-    }
-
-    /// Indicates if the memory layout row major
-    pub fn is_row_major(&self) -> Result<bool> {
-        match self.shape {
-            None => Ok(false),
-            Some(ref s) => Ok(Some(compute_row_major_strides::<T>(s)?) == self.strides),
-        }
-    }
-
-    /// Indicates if the memory layout column major
-    pub fn is_column_major(&self) -> Result<bool> {
-        match self.shape {
-            None => Ok(false),
-            Some(ref s) => {
-                Ok(Some(compute_column_major_strides::<T>(s)?) == self.strides)
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::array::*;
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn test_compute_row_major_strides() {
-        assert_eq!(
-            vec![48_usize, 8],
-            compute_row_major_strides::<Int64Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![24_usize, 4],
-            compute_row_major_strides::<Int32Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![6_usize, 1],
-            compute_row_major_strides::<Int8Type>(&[4_usize, 6]).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_compute_column_major_strides() {
-        assert_eq!(
-            vec![8_usize, 32],
-            compute_column_major_strides::<Int64Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![4_usize, 16],
-            compute_column_major_strides::<Int32Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![1_usize, 4],
-            compute_column_major_strides::<Int8Type>(&[4_usize, 6]).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_zero_dim() {
-        let buf = Buffer::from(&[1]);
-        let tensor = UInt8Tensor::try_new(buf, None, None, None).unwrap();
-        assert_eq!(0, tensor.size());
-        assert_eq!(None, tensor.shape());
-        assert_eq!(None, tensor.names());
-        assert_eq!(0, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(false, tensor.is_column_major().unwrap());
-        assert_eq!(false, tensor.is_contiguous().unwrap());
-
-        let buf = Buffer::from(&[1, 2, 2, 2]);
-        let tensor = Int32Tensor::try_new(buf, None, None, None).unwrap();
-        assert_eq!(0, tensor.size());
-        assert_eq!(None, tensor.shape());
-        assert_eq!(None, tensor.names());
-        assert_eq!(0, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(false, tensor.is_column_major().unwrap());
-        assert_eq!(false, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_tensor() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let tensor = Int32Tensor::try_new(buf, Some(vec![2, 8]), None, None).unwrap();
-        assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![32_usize, 4]).as_ref(), tensor.strides());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(None, tensor.names());
-    }
-
-    #[test]
-    fn test_new_row_major() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let tensor = Int32Tensor::new_row_major(buf, Some(vec![2, 8]), None).unwrap();
-        assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![32_usize, 4]).as_ref(), tensor.strides());
-        assert_eq!(None, tensor.names());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(true, tensor.is_row_major().unwrap());
-        assert_eq!(false, tensor.is_column_major().unwrap());
-        assert_eq!(true, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_new_column_major() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let tensor = Int32Tensor::new_column_major(buf, Some(vec![2, 8]), None).unwrap();
-        assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![4_usize, 8]).as_ref(), tensor.strides());
-        assert_eq!(None, tensor.names());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(true, tensor.is_column_major().unwrap());
-        assert_eq!(true, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_with_names() {
-        let mut builder = Int64BufferBuilder::new(8);
-        for i in 0..8 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let names = vec!["Dim 1", "Dim 2"];
-        let tensor =
-            Int64Tensor::new_column_major(buf, Some(vec![2, 4]), Some(names)).unwrap();
-        assert_eq!(8, tensor.size());
-        assert_eq!(Some(vec![2_usize, 4]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![8_usize, 16]).as_ref(), tensor.strides());
-        assert_eq!("Dim 1", tensor.dim_name(0).unwrap());
-        assert_eq!("Dim 2", tensor.dim_name(1).unwrap());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(true, tensor.is_column_major().unwrap());
-        assert_eq!(true, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_inconsistent_strides() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result =
-            Int32Tensor::try_new(buf, Some(vec![2, 8]), Some(vec![2, 8, 1]), None);
-
-        if result.is_ok() {
-            panic!("shape and stride dimensions are different")
-        }
-    }
-
-    #[test]
-    fn test_inconsistent_names() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result = Int32Tensor::try_new(
-            buf,
-            Some(vec![2, 8]),
-            Some(vec![4, 8]),
-            Some(vec!["1", "2", "3"]),
-        );
-
-        if result.is_ok() {
-            panic!("dimensions and names have different shape")
-        }
-    }
-
-    #[test]
-    fn test_incorrect_shape() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result = Int32Tensor::try_new(buf, Some(vec![2, 6]), None, None);
-
-        if result.is_ok() {
-            panic!("number of elements does not match for the shape")
-        }
-    }
-
-    #[test]
-    fn test_incorrect_stride() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result = Int32Tensor::try_new(buf, Some(vec![2, 8]), Some(vec![30, 4]), None);
-
-        if result.is_ok() {
-            panic!("the input stride does not match the selected shape")
-        }
-    }
-}
diff --git a/rust/arrow/src/util/bench_util.rs b/rust/arrow/src/util/bench_util.rs
deleted file mode 100644
index fd0ece830a1..00000000000
--- a/rust/arrow/src/util/bench_util.rs
+++ /dev/null
@@ -1,155 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils to make benchmarking easier
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::util::test_util::seedable_rng;
-use rand::Rng;
-use rand::SeedableRng;
-use rand::{
-    distributions::{Alphanumeric, Distribution, Standard},
-    prelude::StdRng,
-};
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
-where
-    T: ArrowPrimitiveType,
-    Standard: Distribution<T::Native>,
-{
-    let mut rng = seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                Some(rng.gen())
-            }
-        })
-        .collect()
-}
-
-pub fn create_primitive_array_with_seed<T>(
-    size: usize,
-    null_density: f32,
-    seed: u64,
-) -> PrimitiveArray<T>
-where
-    T: ArrowPrimitiveType,
-    Standard: Distribution<T::Native>,
-{
-    let mut rng = StdRng::seed_from_u64(seed);
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                Some(rng.gen())
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_boolean_array(
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> BooleanArray
-where
-    Standard: Distribution<bool>,
-{
-    let mut rng = seedable_rng();
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                let value = rng.gen::<f32>() < true_density;
-                Some(value)
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_string_array<Offset: StringOffsetSizeTrait>(
-    size: usize,
-    null_density: f32,
-) -> GenericStringArray<Offset> {
-    let rng = &mut seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                let value = rng.sample_iter(&Alphanumeric).take(4).collect::<String>();
-                Some(value)
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) binary array of a given size and null density
-pub fn create_binary_array<Offset: BinaryOffsetSizeTrait>(
-    size: usize,
-    null_density: f32,
-) -> GenericBinaryArray<Offset> {
-    let rng = &mut seedable_rng();
-    let range_rng = &mut seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                let value = rng
-                    .sample_iter::<u8, _>(Standard)
-                    .take(range_rng.gen_range(0, 8))
-                    .collect::<Vec<u8>>();
-                Some(value)
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_fsb_array(
-    size: usize,
-    null_density: f32,
-    value_len: usize,
-) -> FixedSizeBinaryArray {
-    let rng = &mut seedable_rng();
-
-    FixedSizeBinaryArray::try_from_sparse_iter((0..size).map(|_| {
-        if rng.gen::<f32>() < null_density {
-            None
-        } else {
-            let value = rng
-                .sample_iter::<u8, _>(Standard)
-                .take(value_len)
-                .collect::<Vec<u8>>();
-            Some(value)
-        }
-    }))
-    .unwrap()
-}
diff --git a/rust/arrow/src/util/bit_chunk_iterator.rs b/rust/arrow/src/util/bit_chunk_iterator.rs
deleted file mode 100644
index b9145b7af86..00000000000
--- a/rust/arrow/src/util/bit_chunk_iterator.rs
+++ /dev/null
@@ -1,257 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-use crate::util::bit_util::ceil;
-use std::fmt::Debug;
-
-#[derive(Debug)]
-pub struct BitChunks<'a> {
-    buffer: &'a [u8],
-    /// offset inside a byte, guaranteed to be between 0 and 7 (inclusive)
-    bit_offset: usize,
-    /// number of complete u64 chunks
-    chunk_len: usize,
-    /// number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
-    remainder_len: usize,
-}
-
-impl<'a> BitChunks<'a> {
-    pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
-        assert!(ceil(offset + len, 8) <= buffer.len() * 8);
-
-        let byte_offset = offset / 8;
-        let bit_offset = offset % 8;
-
-        let chunk_bits = 8 * std::mem::size_of::<u64>();
-
-        let chunk_len = len / chunk_bits;
-        let remainder_len = len & (chunk_bits - 1);
-
-        BitChunks::<'a> {
-            buffer: &buffer[byte_offset..],
-            bit_offset,
-            chunk_len,
-            remainder_len,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct BitChunkIterator<'a> {
-    buffer: &'a [u8],
-    bit_offset: usize,
-    chunk_len: usize,
-    index: usize,
-}
-
-impl<'a> BitChunks<'a> {
-    /// Returns the number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
-    #[inline]
-    pub const fn remainder_len(&self) -> usize {
-        self.remainder_len
-    }
-
-    /// Returns the number of chunks
-    #[inline]
-    pub const fn chunk_len(&self) -> usize {
-        self.chunk_len
-    }
-
-    /// Returns the bitmask of remaining bits
-    #[inline]
-    pub fn remainder_bits(&self) -> u64 {
-        let bit_len = self.remainder_len;
-        if bit_len == 0 {
-            0
-        } else {
-            let bit_offset = self.bit_offset;
-            // number of bytes to read
-            // might be one more than sizeof(u64) if the offset is in the middle of a byte
-            let byte_len = ceil(bit_len + bit_offset, 8);
-            // pointer to remainder bytes after all complete chunks
-            let base = unsafe {
-                self.buffer
-                    .as_ptr()
-                    .add(self.chunk_len * std::mem::size_of::<u64>())
-            };
-
-            let mut bits = unsafe { std::ptr::read(base) } as u64 >> bit_offset;
-            for i in 1..byte_len {
-                let byte = unsafe { std::ptr::read(base.add(i)) };
-                bits |= (byte as u64) << (i * 8 - bit_offset);
-            }
-
-            bits & ((1 << bit_len) - 1)
-        }
-    }
-
-    /// Returns an iterator over chunks of 64 bits represented as an u64
-    #[inline]
-    pub const fn iter(&self) -> BitChunkIterator<'a> {
-        BitChunkIterator::<'a> {
-            buffer: self.buffer,
-            bit_offset: self.bit_offset,
-            chunk_len: self.chunk_len,
-            index: 0,
-        }
-    }
-}
-
-impl<'a> IntoIterator for BitChunks<'a> {
-    type Item = u64;
-    type IntoIter = BitChunkIterator<'a>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.iter()
-    }
-}
-
-impl Iterator for BitChunkIterator<'_> {
-    type Item = u64;
-
-    #[inline]
-    fn next(&mut self) -> Option<u64> {
-        let index = self.index;
-        if index >= self.chunk_len {
-            return None;
-        }
-
-        // cast to *const u64 should be fine since we are using read_unaligned below
-        #[allow(clippy::cast_ptr_alignment)]
-        let raw_data = self.buffer.as_ptr() as *const u64;
-
-        // bit-packed buffers are stored starting with the least-significant byte first
-        // so when reading as u64 on a big-endian machine, the bytes need to be swapped
-        let current = unsafe { std::ptr::read_unaligned(raw_data.add(index)).to_le() };
-
-        let combined = if self.bit_offset == 0 {
-            current
-        } else {
-            let next =
-                unsafe { std::ptr::read_unaligned(raw_data.add(index + 1)).to_le() };
-
-            current >> self.bit_offset
-                | (next & ((1 << self.bit_offset) - 1)) << (64 - self.bit_offset)
-        };
-
-        self.index = index + 1;
-
-        Some(combined)
-    }
-
-    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.chunk_len - self.index,
-            Some(self.chunk_len - self.index),
-        )
-    }
-}
-
-impl ExactSizeIterator for BitChunkIterator<'_> {
-    #[inline]
-    fn len(&self) -> usize {
-        self.chunk_len - self.index
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn test_iter_aligned() {
-        let input: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(0, 64);
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(vec![0x0706050403020100], result);
-    }
-
-    #[test]
-    fn test_iter_unaligned() {
-        let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(4, 64);
-
-        assert_eq!(0, bitchunks.remainder_len());
-        assert_eq!(0, bitchunks.remainder_bits());
-
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(
-            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
-            result
-        );
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_1_byte() {
-        let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(4, 66);
-
-        assert_eq!(2, bitchunks.remainder_len());
-        assert_eq!(0b00000011, bitchunks.remainder_bits());
-
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(
-            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
-            result
-        );
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_bits_across_bytes() {
-        let input: &[u8] = &[0b00111111, 0b11111100];
-        let buffer: Buffer = Buffer::from(input);
-
-        // remainder contains bits from both bytes
-        // result should be the highest 2 bits from first byte followed by lowest 5 bits of second bytes
-        let bitchunks = buffer.bit_chunks(6, 7);
-
-        assert_eq!(7, bitchunks.remainder_len());
-        assert_eq!(0b1110000, bitchunks.remainder_bits());
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_bits_large() {
-        let input: &[u8] = &[
-            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
-            0b11111111, 0b00000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(2, 63);
-
-        assert_eq!(63, bitchunks.remainder_len());
-        assert_eq!(
-            0b100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111,
-            bitchunks.remainder_bits()
-        );
-    }
-}
diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs
deleted file mode 100644
index 9fa8813e952..00000000000
--- a/rust/arrow/src/util/bit_util.rs
+++ /dev/null
@@ -1,322 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils for working with bits
-
-#[cfg(feature = "simd")]
-use packed_simd::u8x64;
-
-const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
-const UNSET_BIT_MASK: [u8; 8] = [
-    255 - 1,
-    255 - 2,
-    255 - 4,
-    255 - 8,
-    255 - 16,
-    255 - 32,
-    255 - 64,
-    255 - 128,
-];
-
-/// Returns the nearest number that is `>=` than `num` and is a multiple of 64
-#[inline]
-pub fn round_upto_multiple_of_64(num: usize) -> usize {
-    round_upto_power_of_2(num, 64)
-}
-
-/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
-/// be a power of 2.
-pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
-    debug_assert!(factor > 0 && (factor & (factor - 1)) == 0);
-    (num + (factor - 1)) & !(factor - 1)
-}
-
-/// Returns whether bit at position `i` in `data` is set or not
-#[inline]
-pub fn get_bit(data: &[u8], i: usize) -> bool {
-    (data[i >> 3] & BIT_MASK[i & 7]) != 0
-}
-
-/// Returns whether bit at position `i` in `data` is set or not.
-///
-/// # Safety
-///
-/// Note this doesn't do any bound checking, for performance reason. The caller is
-/// responsible to guarantee that `i` is within bounds.
-#[inline]
-pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
-    (*data.add(i >> 3) & BIT_MASK[i & 7]) != 0
-}
-
-/// Sets bit at position `i` for `data`
-#[inline]
-pub fn set_bit(data: &mut [u8], i: usize) {
-    data[i >> 3] |= BIT_MASK[i & 7];
-}
-
-/// Sets bit at position `i` for `data`
-///
-/// # Safety
-///
-/// Note this doesn't do any bound checking, for performance reason. The caller is
-/// responsible to guarantee that `i` is within bounds.
-#[inline]
-pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
-    *data.add(i >> 3) |= BIT_MASK[i & 7];
-}
-
-/// Sets bit at position `i` for `data` to 0
-#[inline]
-pub fn unset_bit(data: &mut [u8], i: usize) {
-    data[i >> 3] &= UNSET_BIT_MASK[i & 7];
-}
-
-/// Sets bit at position `i` for `data` to 0
-///
-/// # Safety
-///
-/// Note this doesn't do any bound checking, for performance reason. The caller is
-/// responsible to guarantee that `i` is within bounds.
-#[inline]
-pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) {
-    *data.add(i >> 3) &= UNSET_BIT_MASK[i & 7];
-}
-
-/// Returns the ceil of `value`/`divisor`
-#[inline]
-pub fn ceil(value: usize, divisor: usize) -> usize {
-    let (quot, rem) = (value / divisor, value % divisor);
-    if rem > 0 && divisor > 0 {
-        quot + 1
-    } else {
-        quot
-    }
-}
-
-/// Performs SIMD bitwise binary operations.
-///
-/// # Safety
-///
-/// Note that each slice should be 64 bytes and it is the callers responsibility to ensure
-/// that this is the case.  If passed slices larger than 64 bytes the operation will only
-/// be performed on the first 64 bytes.  Slices less than 64 bytes will panic.
-#[cfg(simd)]
-pub unsafe fn bitwise_bin_op_simd<F>(left: &[u8], right: &[u8], result: &mut [u8], op: F)
-where
-    F: Fn(u8x64, u8x64) -> u8x64,
-{
-    let left_simd = u8x64::from_slice_unaligned_unchecked(left);
-    let right_simd = u8x64::from_slice_unaligned_unchecked(right);
-    let simd_result = op(left_simd, right_simd);
-    simd_result.write_to_slice_unaligned_unchecked(result);
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::HashSet;
-
-    use super::*;
-    use crate::util::test_util::seedable_rng;
-    use rand::Rng;
-
-    #[test]
-    fn test_round_upto_multiple_of_64() {
-        assert_eq!(0, round_upto_multiple_of_64(0));
-        assert_eq!(64, round_upto_multiple_of_64(1));
-        assert_eq!(64, round_upto_multiple_of_64(63));
-        assert_eq!(64, round_upto_multiple_of_64(64));
-        assert_eq!(128, round_upto_multiple_of_64(65));
-        assert_eq!(192, round_upto_multiple_of_64(129));
-    }
-
-    #[test]
-    fn test_get_bit() {
-        // 00001101
-        assert_eq!(true, get_bit(&[0b00001101], 0));
-        assert_eq!(false, get_bit(&[0b00001101], 1));
-        assert_eq!(true, get_bit(&[0b00001101], 2));
-        assert_eq!(true, get_bit(&[0b00001101], 3));
-
-        // 01001001 01010010
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 0));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 1));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 2));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 3));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 4));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 5));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 6));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 7));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 8));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 9));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 10));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 11));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 12));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 13));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 14));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 15));
-    }
-
-    #[test]
-    fn test_get_bit_raw() {
-        const NUM_BYTE: usize = 10;
-        let mut buf = vec![0; NUM_BYTE];
-        let mut expected = vec![];
-        let mut rng = seedable_rng();
-        for i in 0..8 * NUM_BYTE {
-            let b = rng.gen_bool(0.5);
-            expected.push(b);
-            if b {
-                set_bit(&mut buf[..], i)
-            }
-        }
-
-        let raw_ptr = buf.as_ptr();
-        for (i, b) in expected.iter().enumerate() {
-            unsafe {
-                assert_eq!(*b, get_bit_raw(raw_ptr, i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_set_bit() {
-        let mut b = [0b00000010];
-        set_bit(&mut b, 0);
-        assert_eq!([0b00000011], b);
-        set_bit(&mut b, 1);
-        assert_eq!([0b00000011], b);
-        set_bit(&mut b, 7);
-        assert_eq!([0b10000011], b);
-    }
-
-    #[test]
-    fn test_unset_bit() {
-        let mut b = [0b11111101];
-        unset_bit(&mut b, 0);
-        assert_eq!([0b11111100], b);
-        unset_bit(&mut b, 1);
-        assert_eq!([0b11111100], b);
-        unset_bit(&mut b, 7);
-        assert_eq!([0b01111100], b);
-    }
-
-    #[test]
-    fn test_set_bit_raw() {
-        const NUM_BYTE: usize = 10;
-        let mut buf = vec![0; NUM_BYTE];
-        let mut expected = vec![];
-        let mut rng = seedable_rng();
-        for i in 0..8 * NUM_BYTE {
-            let b = rng.gen_bool(0.5);
-            expected.push(b);
-            if b {
-                unsafe {
-                    set_bit_raw(buf.as_mut_ptr(), i);
-                }
-            }
-        }
-
-        let raw_ptr = buf.as_ptr();
-        for (i, b) in expected.iter().enumerate() {
-            unsafe {
-                assert_eq!(*b, get_bit_raw(raw_ptr, i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_unset_bit_raw() {
-        const NUM_BYTE: usize = 10;
-        let mut buf = vec![255; NUM_BYTE];
-        let mut expected = vec![];
-        let mut rng = seedable_rng();
-        for i in 0..8 * NUM_BYTE {
-            let b = rng.gen_bool(0.5);
-            expected.push(b);
-            if !b {
-                unsafe {
-                    unset_bit_raw(buf.as_mut_ptr(), i);
-                }
-            }
-        }
-
-        let raw_ptr = buf.as_ptr();
-        for (i, b) in expected.iter().enumerate() {
-            unsafe {
-                assert_eq!(*b, get_bit_raw(raw_ptr, i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_get_set_bit_roundtrip() {
-        const NUM_BYTES: usize = 10;
-        const NUM_SETS: usize = 10;
-
-        let mut buffer: [u8; NUM_BYTES * 8] = [0; NUM_BYTES * 8];
-        let mut v = HashSet::new();
-        let mut rng = seedable_rng();
-        for _ in 0..NUM_SETS {
-            let offset = rng.gen_range(0, 8 * NUM_BYTES);
-            v.insert(offset);
-            set_bit(&mut buffer[..], offset);
-        }
-        for i in 0..NUM_BYTES * 8 {
-            assert_eq!(v.contains(&i), get_bit(&buffer[..], i));
-        }
-    }
-
-    #[test]
-    #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))]
-    fn test_ceil() {
-        assert_eq!(ceil(0, 1), 0);
-        assert_eq!(ceil(1, 1), 1);
-        assert_eq!(ceil(1, 2), 1);
-        assert_eq!(ceil(1, 8), 1);
-        assert_eq!(ceil(7, 8), 1);
-        assert_eq!(ceil(8, 8), 1);
-        assert_eq!(ceil(9, 8), 2);
-        assert_eq!(ceil(9, 9), 1);
-        assert_eq!(ceil(10000000000, 10), 1000000000);
-        assert_eq!(ceil(10, 10000000000), 1);
-        assert_eq!(ceil(10000000000, 1000000000), 10);
-    }
-
-    #[test]
-    #[cfg(simd)]
-    fn test_bitwise_and_simd() {
-        let buf1 = [0b00110011u8; 64];
-        let buf2 = [0b11110000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a & b) };
-        for i in buf3.iter() {
-            assert_eq!(&0b00110000u8, i);
-        }
-    }
-
-    #[test]
-    #[cfg(simd)]
-    fn test_bitwise_or_simd() {
-        let buf1 = [0b00110011u8; 64];
-        let buf2 = [0b11110000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a | b) };
-        for i in buf3.iter() {
-            assert_eq!(&0b11110011u8, i);
-        }
-    }
-}
diff --git a/rust/arrow/src/util/data_gen.rs b/rust/arrow/src/util/data_gen.rs
deleted file mode 100644
index cd1f25efea0..00000000000
--- a/rust/arrow/src/util/data_gen.rs
+++ /dev/null
@@ -1,347 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities to generate random arrays and batches
-
-use std::{convert::TryFrom, sync::Arc};
-
-use rand::{distributions::uniform::SampleUniform, Rng};
-
-use crate::error::{ArrowError, Result};
-use crate::record_batch::{RecordBatch, RecordBatchOptions};
-use crate::{array::*, datatypes::SchemaRef};
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    datatypes::*,
-};
-
-use super::{bench_util::*, bit_util, test_util::seedable_rng};
-
-/// Create a random [RecordBatch] from a schema
-pub fn create_random_batch(
-    schema: SchemaRef,
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<RecordBatch> {
-    let columns = schema
-        .fields()
-        .iter()
-        .map(|field| create_random_array(field, size, null_density, true_density))
-        .collect::<Result<Vec<ArrayRef>>>()?;
-
-    RecordBatch::try_new_with_options(
-        schema,
-        columns,
-        &RecordBatchOptions {
-            match_field_names: false,
-        },
-    )
-}
-
-/// Create a random [ArrayRef] from a [DataType] with a length,
-/// null density and true density (for [BooleanArray]).
-pub fn create_random_array(
-    field: &Field,
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<ArrayRef> {
-    // Override null density with 0.0 if the array is non-nullable
-    let null_density = match field.is_nullable() {
-        true => null_density,
-        false => 0.0,
-    };
-    use DataType::*;
-    Ok(match field.data_type() {
-        Null => Arc::new(NullArray::new(size)) as ArrayRef,
-        Boolean => Arc::new(create_boolean_array(size, null_density, true_density)),
-        Int8 => Arc::new(create_primitive_array::<Int8Type>(size, null_density)),
-        Int16 => Arc::new(create_primitive_array::<Int16Type>(size, null_density)),
-        Int32 => Arc::new(create_primitive_array::<Int32Type>(size, null_density)),
-        Int64 => Arc::new(create_primitive_array::<Int64Type>(size, null_density)),
-        UInt8 => Arc::new(create_primitive_array::<UInt8Type>(size, null_density)),
-        UInt16 => Arc::new(create_primitive_array::<UInt16Type>(size, null_density)),
-        UInt32 => Arc::new(create_primitive_array::<UInt32Type>(size, null_density)),
-        UInt64 => Arc::new(create_primitive_array::<UInt64Type>(size, null_density)),
-        Float16 => {
-            return Err(ArrowError::NotYetImplemented(
-                "Float16 is not implememted".to_string(),
-            ))
-        }
-        Float32 => Arc::new(create_primitive_array::<Float32Type>(size, null_density)),
-        Float64 => Arc::new(create_primitive_array::<Float64Type>(size, null_density)),
-        Timestamp(_, _) => {
-            let int64_array =
-                Arc::new(create_primitive_array::<Int64Type>(size, null_density))
-                    as ArrayRef;
-            return crate::compute::cast(&int64_array, field.data_type());
-        }
-        Date32 => Arc::new(create_primitive_array::<Date32Type>(size, null_density)),
-        Date64 => Arc::new(create_primitive_array::<Date64Type>(size, null_density)),
-        Time32(unit) => match unit {
-            TimeUnit::Second => Arc::new(create_primitive_array::<Time32SecondType>(
-                size,
-                null_density,
-            )) as ArrayRef,
-            TimeUnit::Millisecond => Arc::new(create_primitive_array::<
-                Time32MillisecondType,
-            >(size, null_density)),
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Unsupported unit {:?} for Time32",
-                    unit
-                )))
-            }
-        },
-        Time64(unit) => match unit {
-            TimeUnit::Microsecond => Arc::new(create_primitive_array::<
-                Time64MicrosecondType,
-            >(size, null_density)) as ArrayRef,
-            TimeUnit::Nanosecond => Arc::new(create_primitive_array::<
-                Time64NanosecondType,
-            >(size, null_density)),
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Unsupported unit {:?} for Time64",
-                    unit
-                )))
-            }
-        },
-        Utf8 => Arc::new(create_string_array::<i32>(size, null_density)),
-        LargeUtf8 => Arc::new(create_string_array::<i64>(size, null_density)),
-        Binary => Arc::new(create_binary_array::<i32>(size, null_density)),
-        LargeBinary => Arc::new(create_binary_array::<i64>(size, null_density)),
-        FixedSizeBinary(len) => {
-            Arc::new(create_fsb_array(size, null_density, *len as usize))
-        }
-        List(_) => create_random_list_array(field, size, null_density, true_density)?,
-        LargeList(_) => {
-            create_random_list_array(field, size, null_density, true_density)?
-        }
-        Struct(fields) => Arc::new(StructArray::try_from(
-            fields
-                .iter()
-                .map(|struct_field| {
-                    create_random_array(struct_field, size, null_density, true_density)
-                        .map(|array_ref| (struct_field.name().as_str(), array_ref))
-                })
-                .collect::<Result<Vec<(&str, ArrayRef)>>>()?,
-        )?),
-        other => {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "Generating random arrays not yet implemented for {:?}",
-                other
-            )))
-        }
-    })
-}
-
-#[inline]
-fn create_random_list_array(
-    field: &Field,
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<ArrayRef> {
-    // Override null density with 0.0 if the array is non-nullable
-    let null_density = match field.is_nullable() {
-        true => null_density,
-        false => 0.0,
-    };
-    let list_field;
-    let (offsets, child_len) = match field.data_type() {
-        DataType::List(f) => {
-            let (offsets, child_len) = create_random_offsets::<i32>(size, 0, 5);
-            list_field = f;
-            (Buffer::from(offsets.to_byte_slice()), child_len as usize)
-        }
-        DataType::LargeList(f) => {
-            let (offsets, child_len) = create_random_offsets::<i64>(size, 0, 5);
-            list_field = f;
-            (Buffer::from(offsets.to_byte_slice()), child_len as usize)
-        }
-        _ => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Cannot create list array for field {:?}",
-                field
-            )))
-        }
-    };
-
-    // Create list's child data
-    let child_array =
-        create_random_array(list_field, child_len as usize, null_density, true_density)?;
-    let child_data = child_array.data();
-    // Create list's null buffers, if it is nullable
-    let null_buffer = match field.is_nullable() {
-        true => Some(create_random_null_buffer(size, null_density)),
-        false => None,
-    };
-    let list_data = ArrayData::new(
-        field.data_type().clone(),
-        size,
-        None,
-        null_buffer,
-        0,
-        vec![offsets],
-        vec![child_data.clone()],
-    );
-    Ok(make_array(list_data))
-}
-
-/// Generate random offsets for list arrays
-fn create_random_offsets<T: OffsetSizeTrait + SampleUniform>(
-    size: usize,
-    min: T,
-    max: T,
-) -> (Vec<T>, T) {
-    let rng = &mut seedable_rng();
-
-    let mut current_offset = T::zero();
-
-    let mut offsets = Vec::with_capacity(size + 1);
-    offsets.push(current_offset);
-
-    (0..size).for_each(|_| {
-        current_offset += rng.gen_range(min, max);
-        offsets.push(current_offset);
-    });
-
-    (offsets, current_offset)
-}
-
-fn create_random_null_buffer(size: usize, null_density: f32) -> Buffer {
-    let mut rng = seedable_rng();
-    let mut mut_buf = MutableBuffer::new_null(size);
-    {
-        let mut_slice = mut_buf.as_slice_mut();
-        (0..size).for_each(|i| {
-            if rng.gen::<f32>() >= null_density {
-                bit_util::set_bit(mut_slice, i)
-            }
-        })
-    };
-    mut_buf.into()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_create_batch() {
-        let size = 32;
-        let fields = vec![Field::new("a", DataType::Int32, true)];
-        let schema = Schema::new(fields);
-        let schema_ref = Arc::new(schema);
-        let batch = create_random_batch(schema_ref.clone(), size, 0.35, 0.7).unwrap();
-
-        assert_eq!(batch.schema(), schema_ref);
-        assert_eq!(batch.num_columns(), schema_ref.fields().len());
-        for array in batch.columns() {
-            assert_eq!(array.len(), size);
-        }
-    }
-
-    #[test]
-    fn test_create_batch_non_null() {
-        let size = 32;
-        let fields = vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new(
-                "b",
-                DataType::List(Box::new(Field::new("item", DataType::LargeUtf8, true))),
-                false,
-            ),
-            Field::new("a", DataType::Int32, false),
-        ];
-        let schema = Schema::new(fields);
-        let schema_ref = Arc::new(schema);
-        let batch = create_random_batch(schema_ref.clone(), size, 0.35, 0.7).unwrap();
-
-        assert_eq!(batch.schema(), schema_ref);
-        assert_eq!(batch.num_columns(), schema_ref.fields().len());
-        for array in batch.columns() {
-            assert_eq!(array.null_count(), 0);
-        }
-        // Test that the list's child values are non-null
-        let b_array = batch.column(1);
-        let list_array = b_array.as_any().downcast_ref::<ListArray>().unwrap();
-        let child_array = make_array(list_array.data().child_data()[0].clone());
-        assert_eq!(child_array.null_count(), 0);
-        // There should be more values than the list, to show that it's a list
-        assert!(child_array.len() > list_array.len());
-    }
-
-    #[test]
-    fn test_create_struct_array() {
-        let size = 32;
-        let struct_fields = vec![
-            Field::new("b", DataType::Boolean, true),
-            Field::new(
-                "c",
-                DataType::LargeList(Box::new(Field::new(
-                    "item",
-                    DataType::List(Box::new(Field::new(
-                        "item",
-                        DataType::FixedSizeBinary(6),
-                        true,
-                    ))),
-                    false,
-                ))),
-                true,
-            ),
-            Field::new(
-                "d",
-                DataType::Struct(vec![
-                    Field::new("d_x", DataType::Int32, true),
-                    Field::new("d_y", DataType::Float32, false),
-                    Field::new("d_z", DataType::Binary, true),
-                ]),
-                true,
-            ),
-        ];
-        let field = Field::new("struct", DataType::Struct(struct_fields), true);
-        let array = create_random_array(&field, size, 0.2, 0.5).unwrap();
-
-        assert_eq!(array.len(), 32);
-        let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(struct_array.columns().len(), 3);
-
-        // Test that the nested list makes sense,
-        // i.e. its children's values are more than the parent, to show repetition
-        let col_c = struct_array.column_by_name("c").unwrap();
-        let col_c = col_c.as_any().downcast_ref::<LargeListArray>().unwrap();
-        assert_eq!(col_c.len(), size);
-        let col_c_values = col_c.values();
-        assert!(col_c_values.len() > size);
-        // col_c_values should be a list
-        let col_c_list = col_c_values.as_any().downcast_ref::<ListArray>().unwrap();
-        // Its values should be FixedSizeBinary(6)
-        let fsb = col_c_list.values();
-        assert_eq!(fsb.data_type(), &DataType::FixedSizeBinary(6));
-        assert!(fsb.len() > col_c_list.len());
-
-        // Test nested struct
-        let col_d = struct_array.column_by_name("d").unwrap();
-        let col_d = col_d.as_any().downcast_ref::<StructArray>().unwrap();
-        let col_d_y = col_d.column_by_name("d_y").unwrap();
-        assert_eq!(col_d_y.data_type(), &DataType::Float32);
-        assert_eq!(col_d_y.null_count(), 0);
-    }
-}
diff --git a/rust/arrow/src/util/display.rs b/rust/arrow/src/util/display.rs
deleted file mode 100644
index e40ababd233..00000000000
--- a/rust/arrow/src/util/display.rs
+++ /dev/null
@@ -1,298 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Functions for printing array values, as strings, for debugging
-//! purposes. See the `pretty` crate for additional functions for
-//! record batch pretty printing.
-
-use crate::array::Array;
-use crate::datatypes::{
-    ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
-    Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-};
-use crate::{array, datatypes::IntervalUnit};
-
-use array::DictionaryArray;
-
-use crate::error::{ArrowError, Result};
-
-macro_rules! make_string {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array.value($row).to_string()
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_interval_year_month {
-    ($column: ident, $row: ident) => {{
-        let array = $column
-            .as_any()
-            .downcast_ref::<array::IntervalYearMonthArray>()
-            .unwrap();
-
-        let s = if array.is_null($row) {
-            "NULL".to_string()
-        } else {
-            let interval = array.value($row) as f64;
-            let years = (interval / 12_f64).floor();
-            let month = interval - (years * 12_f64);
-
-            format!(
-                "{} years {} mons 0 days 0 hours 0 mins 0.00 secs",
-                years, month,
-            )
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_interval_day_time {
-    ($column: ident, $row: ident) => {{
-        let array = $column
-            .as_any()
-            .downcast_ref::<array::IntervalDayTimeArray>()
-            .unwrap();
-
-        let s = if array.is_null($row) {
-            "NULL".to_string()
-        } else {
-            let value: u64 = array.value($row) as u64;
-
-            let days_parts: i32 = ((value & 0xFFFFFFFF00000000) >> 32) as i32;
-            let milliseconds_part: i32 = (value & 0xFFFFFFFF) as i32;
-
-            let secs = milliseconds_part / 1000;
-            let mins = secs / 60;
-            let hours = mins / 60;
-
-            let secs = secs - (mins * 60);
-            let mins = mins - (hours * 60);
-
-            format!(
-                "0 years 0 mons {} days {} hours {} mins {}.{:02} secs",
-                days_parts,
-                hours,
-                mins,
-                secs,
-                (milliseconds_part % 1000),
-            )
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_date {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array
-                .value_as_date($row)
-                .map(|d| d.to_string())
-                .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_time {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array
-                .value_as_time($row)
-                .map(|d| d.to_string())
-                .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_datetime {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array
-                .value_as_datetime($row)
-                .map(|d| d.to_string())
-                .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
-        };
-
-        Ok(s)
-    }};
-}
-
-// It's not possible to do array.value($row).to_string() for &[u8], let's format it as hex
-macro_rules! make_string_hex {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            let mut tmp = "".to_string();
-
-            for character in array.value($row) {
-                tmp += &format!("{:02x}", character);
-            }
-
-            tmp
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_from_list {
-    ($column: ident, $row: ident) => {{
-        let list = $column
-            .as_any()
-            .downcast_ref::<array::ListArray>()
-            .ok_or(ArrowError::InvalidArgumentError(format!(
-                "Repl error: could not convert list column to list array."
-            )))?
-            .value($row);
-        let string_values = (0..list.len())
-            .map(|i| array_value_to_string(&list.clone(), i))
-            .collect::<Result<Vec<String>>>()?;
-        Ok(format!("[{}]", string_values.join(", ")))
-    }};
-}
-
-/// Get the value at the given row in an array as a String.
-///
-/// Note this function is quite inefficient and is unlikely to be
-/// suitable for converting large arrays or record batches.
-pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<String> {
-    if column.is_null(row) {
-        return Ok("".to_string());
-    }
-    match column.data_type() {
-        DataType::Utf8 => make_string!(array::StringArray, column, row),
-        DataType::LargeUtf8 => make_string!(array::LargeStringArray, column, row),
-        DataType::Binary => make_string_hex!(array::BinaryArray, column, row),
-        DataType::LargeBinary => make_string_hex!(array::LargeBinaryArray, column, row),
-        DataType::Boolean => make_string!(array::BooleanArray, column, row),
-        DataType::Int8 => make_string!(array::Int8Array, column, row),
-        DataType::Int16 => make_string!(array::Int16Array, column, row),
-        DataType::Int32 => make_string!(array::Int32Array, column, row),
-        DataType::Int64 => make_string!(array::Int64Array, column, row),
-        DataType::UInt8 => make_string!(array::UInt8Array, column, row),
-        DataType::UInt16 => make_string!(array::UInt16Array, column, row),
-        DataType::UInt32 => make_string!(array::UInt32Array, column, row),
-        DataType::UInt64 => make_string!(array::UInt64Array, column, row),
-        DataType::Float16 => make_string!(array::Float32Array, column, row),
-        DataType::Float32 => make_string!(array::Float32Array, column, row),
-        DataType::Float64 => make_string!(array::Float64Array, column, row),
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
-            make_string_datetime!(array::TimestampSecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
-            make_string_datetime!(array::TimestampMillisecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
-            make_string_datetime!(array::TimestampMicrosecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
-            make_string_datetime!(array::TimestampNanosecondArray, column, row)
-        }
-        DataType::Date32 => make_string_date!(array::Date32Array, column, row),
-        DataType::Date64 => make_string_date!(array::Date64Array, column, row),
-        DataType::Time32(unit) if *unit == TimeUnit::Second => {
-            make_string_time!(array::Time32SecondArray, column, row)
-        }
-        DataType::Time32(unit) if *unit == TimeUnit::Millisecond => {
-            make_string_time!(array::Time32MillisecondArray, column, row)
-        }
-        DataType::Time64(unit) if *unit == TimeUnit::Microsecond => {
-            make_string_time!(array::Time64MicrosecondArray, column, row)
-        }
-        DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
-            make_string_time!(array::Time64NanosecondArray, column, row)
-        }
-        DataType::Interval(unit) => match unit {
-            IntervalUnit::DayTime => {
-                make_string_interval_day_time!(column, row)
-            }
-            IntervalUnit::YearMonth => {
-                make_string_interval_year_month!(column, row)
-            }
-        },
-        DataType::List(_) => make_string_from_list!(column, row),
-        DataType::Dictionary(index_type, _value_type) => match **index_type {
-            DataType::Int8 => dict_array_value_to_string::<Int8Type>(column, row),
-            DataType::Int16 => dict_array_value_to_string::<Int16Type>(column, row),
-            DataType::Int32 => dict_array_value_to_string::<Int32Type>(column, row),
-            DataType::Int64 => dict_array_value_to_string::<Int64Type>(column, row),
-            DataType::UInt8 => dict_array_value_to_string::<UInt8Type>(column, row),
-            DataType::UInt16 => dict_array_value_to_string::<UInt16Type>(column, row),
-            DataType::UInt32 => dict_array_value_to_string::<UInt32Type>(column, row),
-            DataType::UInt64 => dict_array_value_to_string::<UInt64Type>(column, row),
-            _ => Err(ArrowError::InvalidArgumentError(format!(
-                "Pretty printing not supported for {:?} due to index type",
-                column.data_type()
-            ))),
-        },
-        _ => Err(ArrowError::InvalidArgumentError(format!(
-            "Pretty printing not implemented for {:?} type",
-            column.data_type()
-        ))),
-    }
-}
-
-/// Converts the value of the dictionary array at `row` to a String
-fn dict_array_value_to_string<K: ArrowPrimitiveType>(
-    colum: &array::ArrayRef,
-    row: usize,
-) -> Result<String> {
-    let dict_array = colum.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
-    let keys_array = dict_array.keys_array();
-
-    if keys_array.is_null(row) {
-        return Ok(String::from(""));
-    }
-
-    let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
-        ArrowError::InvalidArgumentError(format!(
-            "Can not convert value {:?} at index {:?} to usize for string conversion.",
-            keys_array.value(row),
-            row
-        ))
-    })?;
-
-    array_value_to_string(&dict_array.values(), dict_index)
-}
diff --git a/rust/arrow/src/util/integration_util.rs b/rust/arrow/src/util/integration_util.rs
deleted file mode 100644
index ec2c294cb4e..00000000000
--- a/rust/arrow/src/util/integration_util.rs
+++ /dev/null
@@ -1,957 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils for JSON integration testing
-//!
-//! These utilities define structs that read the integration JSON format for integration testing purposes.
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{Map as SJMap, Number as VNumber, Value};
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::Result;
-use crate::record_batch::{RecordBatch, RecordBatchReader};
-
-/// A struct that represents an Arrow file with a schema and record batches
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJson {
-    pub schema: ArrowJsonSchema,
-    pub batches: Vec<ArrowJsonBatch>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dictionaries: Option<Vec<ArrowJsonDictionaryBatch>>,
-}
-
-/// A struct that partially reads the Arrow JSON schema.
-///
-/// Fields are left as JSON `Value` as they vary by `DataType`
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonSchema {
-    pub fields: Vec<ArrowJsonField>,
-}
-
-/// Fields are left as JSON `Value` as they vary by `DataType`
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonField {
-    pub name: String,
-    #[serde(rename = "type")]
-    pub field_type: Value,
-    pub nullable: bool,
-    pub children: Vec<ArrowJsonField>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dictionary: Option<ArrowJsonFieldDictionary>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<Value>,
-}
-
-impl From<&Field> for ArrowJsonField {
-    fn from(field: &Field) -> Self {
-        let metadata_value = match field.metadata() {
-            Some(kv_list) => {
-                let mut array = Vec::new();
-                for (k, v) in kv_list {
-                    let mut kv_map = SJMap::new();
-                    kv_map.insert(k.clone(), Value::String(v.clone()));
-                    array.push(Value::Object(kv_map));
-                }
-                if !array.is_empty() {
-                    Some(Value::Array(array))
-                } else {
-                    None
-                }
-            }
-            _ => None,
-        };
-
-        Self {
-            name: field.name().to_string(),
-            field_type: field.data_type().to_json(),
-            nullable: field.is_nullable(),
-            children: vec![],
-            dictionary: None, // TODO: not enough info
-            metadata: metadata_value,
-        }
-    }
-}
-
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonFieldDictionary {
-    pub id: i64,
-    #[serde(rename = "indexType")]
-    pub index_type: DictionaryIndexType,
-    #[serde(rename = "isOrdered")]
-    pub is_ordered: bool,
-}
-
-#[derive(Deserialize, Serialize, Debug)]
-pub struct DictionaryIndexType {
-    pub name: String,
-    #[serde(rename = "isSigned")]
-    pub is_signed: bool,
-    #[serde(rename = "bitWidth")]
-    pub bit_width: i64,
-}
-
-/// A struct that partially reads the Arrow JSON record batch
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonBatch {
-    count: usize,
-    pub columns: Vec<ArrowJsonColumn>,
-}
-
-/// A struct that partially reads the Arrow JSON dictionary batch
-#[derive(Deserialize, Serialize, Debug)]
-#[allow(non_snake_case)]
-pub struct ArrowJsonDictionaryBatch {
-    pub id: i64,
-    pub data: ArrowJsonBatch,
-}
-
-/// A struct that partially reads the Arrow JSON column/array
-#[derive(Deserialize, Serialize, Clone, Debug)]
-pub struct ArrowJsonColumn {
-    name: String,
-    pub count: usize,
-    #[serde(rename = "VALIDITY")]
-    pub validity: Option<Vec<u8>>,
-    #[serde(rename = "DATA")]
-    pub data: Option<Vec<Value>>,
-    #[serde(rename = "OFFSET")]
-    pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are strings
-    pub children: Option<Vec<ArrowJsonColumn>>,
-}
-
-impl ArrowJson {
-    /// Compare the Arrow JSON with a record batch reader
-    pub fn equals_reader(&self, reader: &mut dyn RecordBatchReader) -> bool {
-        if !self.schema.equals_schema(&reader.schema()) {
-            return false;
-        }
-        self.batches.iter().all(|col| {
-            let batch = reader.next();
-            match batch {
-                Some(Ok(batch)) => col.equals_batch(&batch),
-                _ => false,
-            }
-        })
-    }
-}
-
-impl ArrowJsonSchema {
-    /// Compare the Arrow JSON schema with the Arrow `Schema`
-    fn equals_schema(&self, schema: &Schema) -> bool {
-        let field_len = self.fields.len();
-        if field_len != schema.fields().len() {
-            return false;
-        }
-        for i in 0..field_len {
-            let json_field = &self.fields[i];
-            let field = schema.field(i);
-            if !json_field.equals_field(field) {
-                return false;
-            }
-        }
-        true
-    }
-}
-
-impl ArrowJsonField {
-    /// Compare the Arrow JSON field with the Arrow `Field`
-    fn equals_field(&self, field: &Field) -> bool {
-        // convert to a field
-        match self.to_arrow_field() {
-            Ok(self_field) => {
-                assert_eq!(&self_field, field, "Arrow fields not the same");
-                true
-            }
-            Err(e) => {
-                eprintln!(
-                    "Encountered error while converting JSON field to Arrow field: {:?}",
-                    e
-                );
-                false
-            }
-        }
-    }
-
-    /// Convert to an Arrow Field
-    /// TODO: convert to use an Into
-    fn to_arrow_field(&self) -> Result<Field> {
-        // a bit regressive, but we have to convert the field to JSON in order to convert it
-        let field = serde_json::to_value(self)?;
-        Field::from(&field)
-    }
-}
-
-impl ArrowJsonBatch {
-    /// Compare the Arrow JSON record batch with a `RecordBatch`
-    fn equals_batch(&self, batch: &RecordBatch) -> bool {
-        if self.count != batch.num_rows() {
-            return false;
-        }
-        let num_columns = self.columns.len();
-        if num_columns != batch.num_columns() {
-            return false;
-        }
-        let schema = batch.schema();
-        self.columns
-            .iter()
-            .zip(batch.columns())
-            .zip(schema.fields())
-            .all(|((col, arr), field)| {
-                // compare each column based on its type
-                if &col.name != field.name() {
-                    return false;
-                }
-                let json_array: Vec<Value> = json_from_col(&col, field.data_type());
-                match field.data_type() {
-                    DataType::Null => {
-                        let arr: &NullArray =
-                            arr.as_any().downcast_ref::<NullArray>().unwrap();
-                        // NullArrays should have the same length, json_array is empty
-                        arr.len() == col.count
-                    }
-                    DataType::Boolean => {
-                        let arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int8 => {
-                        let arr = arr.as_any().downcast_ref::<Int8Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int16 => {
-                        let arr = arr.as_any().downcast_ref::<Int16Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int32 | DataType::Date32 | DataType::Time32(_) => {
-                        let arr = Int32Array::from(arr.data().clone());
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int64
-                    | DataType::Date64
-                    | DataType::Time64(_)
-                    | DataType::Timestamp(_, _)
-                    | DataType::Duration(_) => {
-                        let arr = Int64Array::from(arr.data().clone());
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Interval(IntervalUnit::YearMonth) => {
-                        let arr = IntervalYearMonthArray::from(arr.data().clone());
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Interval(IntervalUnit::DayTime) => {
-                        let arr = IntervalDayTimeArray::from(arr.data().clone());
-                        let x = json_array
-                            .iter()
-                            .map(|v| {
-                                match v {
-                                    Value::Null => Value::Null,
-                                    Value::Object(v) => {
-                                        // interval has days and milliseconds
-                                        let days: i32 =
-                                            v.get("days").unwrap().as_i64().unwrap()
-                                                as i32;
-                                        let milliseconds: i32 = v
-                                            .get("milliseconds")
-                                            .unwrap()
-                                            .as_i64()
-                                            .unwrap()
-                                            as i32;
-                                        let value: i64 = unsafe {
-                                            std::mem::transmute::<[i32; 2], i64>([
-                                                days,
-                                                milliseconds,
-                                            ])
-                                        };
-                                        Value::Number(VNumber::from(value))
-                                    }
-                                    // return null if Value is not an object
-                                    _ => Value::Null,
-                                }
-                            })
-                            .collect::<Vec<Value>>();
-                        arr.equals_json(&x.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt8 => {
-                        let arr = arr.as_any().downcast_ref::<UInt8Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt16 => {
-                        let arr = arr.as_any().downcast_ref::<UInt16Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt32 => {
-                        let arr = arr.as_any().downcast_ref::<UInt32Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt64 => {
-                        let arr = arr.as_any().downcast_ref::<UInt64Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Float32 => {
-                        let arr = arr.as_any().downcast_ref::<Float32Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Float64 => {
-                        let arr = arr.as_any().downcast_ref::<Float64Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Binary => {
-                        let arr = arr.as_any().downcast_ref::<BinaryArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::LargeBinary => {
-                        let arr =
-                            arr.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::FixedSizeBinary(_) => {
-                        let arr =
-                            arr.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Utf8 => {
-                        let arr = arr.as_any().downcast_ref::<StringArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::LargeUtf8 => {
-                        let arr =
-                            arr.as_any().downcast_ref::<LargeStringArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::List(_) => {
-                        let arr = arr.as_any().downcast_ref::<ListArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::LargeList(_) => {
-                        let arr = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::FixedSizeList(_, _) => {
-                        let arr =
-                            arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Struct(_) => {
-                        let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Decimal(_, _) => {
-                        let arr = arr.as_any().downcast_ref::<DecimalArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
-                        DataType::Int8 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int8DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::Int16 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int16DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::Int32 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int32DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::Int64 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int64DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt8 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt8DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt16 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt16DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt32 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt32DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt64 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt64DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        t => panic!("Unsupported dictionary comparison for {:?}", t),
-                    },
-                    t => panic!("Unsupported comparison for {:?}", t),
-                }
-            })
-    }
-
-    pub fn from_batch(batch: &RecordBatch) -> ArrowJsonBatch {
-        let mut json_batch = ArrowJsonBatch {
-            count: batch.num_rows(),
-            columns: Vec::with_capacity(batch.num_columns()),
-        };
-
-        for (col, field) in batch.columns().iter().zip(batch.schema().fields.iter()) {
-            let json_col = match field.data_type() {
-                DataType::Int8 => {
-                    let col = col.as_any().downcast_ref::<Int8Array>().unwrap();
-
-                    let mut validity: Vec<u8> = Vec::with_capacity(col.len());
-                    let mut data: Vec<Value> = Vec::with_capacity(col.len());
-
-                    for i in 0..col.len() {
-                        if col.is_null(i) {
-                            validity.push(1);
-                            data.push(0i8.into());
-                        } else {
-                            validity.push(0);
-                            data.push(col.value(i).into());
-                        }
-                    }
-
-                    ArrowJsonColumn {
-                        name: field.name().clone(),
-                        count: col.len(),
-                        validity: Some(validity),
-                        data: Some(data),
-                        offset: None,
-                        children: None,
-                    }
-                }
-                _ => ArrowJsonColumn {
-                    name: field.name().clone(),
-                    count: col.len(),
-                    validity: None,
-                    data: None,
-                    offset: None,
-                    children: None,
-                },
-            };
-
-            json_batch.columns.push(json_col);
-        }
-
-        json_batch
-    }
-}
-
-/// Convert an Arrow JSON column/array into a vector of `Value`
-fn json_from_col(col: &ArrowJsonColumn, data_type: &DataType) -> Vec<Value> {
-    match data_type {
-        DataType::List(field) => json_from_list_col(col, field.data_type()),
-        DataType::FixedSizeList(field, list_size) => {
-            json_from_fixed_size_list_col(col, field.data_type(), *list_size as usize)
-        }
-        DataType::Struct(fields) => json_from_struct_col(col, fields),
-        DataType::Int64
-        | DataType::UInt64
-        | DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_) => {
-            // convert int64 data from strings to numbers
-            let converted_col: Vec<Value> = col
-                .data
-                .clone()
-                .unwrap()
-                .iter()
-                .map(|v| {
-                    Value::Number(match v {
-                        Value::Number(number) => number.clone(),
-                        Value::String(string) => VNumber::from(
-                            string
-                                .parse::<i64>()
-                                .expect("Unable to parse string as i64"),
-                        ),
-                        t => panic!("Cannot convert {} to number", t),
-                    })
-                })
-                .collect();
-            merge_json_array(
-                col.validity.as_ref().unwrap().as_slice(),
-                converted_col.as_slice(),
-            )
-        }
-        DataType::Null => vec![],
-        _ => merge_json_array(
-            col.validity.as_ref().unwrap().as_slice(),
-            &col.data.clone().unwrap(),
-        ),
-    }
-}
-
-/// Merge VALIDITY and DATA vectors from a primitive data type into a `Value` vector with nulls
-fn merge_json_array(validity: &[u8], data: &[Value]) -> Vec<Value> {
-    validity
-        .iter()
-        .zip(data)
-        .map(|(v, d)| match v {
-            0 => Value::Null,
-            1 => d.clone(),
-            _ => panic!("Validity data should be 0 or 1"),
-        })
-        .collect()
-}
-
-/// Convert an Arrow JSON column/array of a `DataType::Struct` into a vector of `Value`
-fn json_from_struct_col(col: &ArrowJsonColumn, fields: &[Field]) -> Vec<Value> {
-    let mut values = Vec::with_capacity(col.count);
-
-    let children: Vec<Vec<Value>> = col
-        .children
-        .clone()
-        .unwrap()
-        .iter()
-        .zip(fields)
-        .map(|(child, field)| json_from_col(child, field.data_type()))
-        .collect();
-
-    // create a struct from children
-    for j in 0..col.count {
-        let mut map = serde_json::map::Map::new();
-        for i in 0..children.len() {
-            map.insert(fields[i].name().to_string(), children[i][j].clone());
-        }
-        values.push(Value::Object(map));
-    }
-
-    values
-}
-
-/// Convert an Arrow JSON column/array of a `DataType::List` into a vector of `Value`
-fn json_from_list_col(col: &ArrowJsonColumn, data_type: &DataType) -> Vec<Value> {
-    let mut values = Vec::with_capacity(col.count);
-
-    // get the inner array
-    let child = &col.children.clone().expect("list type must have children")[0];
-    let offsets: Vec<usize> = col
-        .offset
-        .clone()
-        .unwrap()
-        .iter()
-        .map(|o| match o {
-            Value::String(s) => s.parse::<usize>().unwrap(),
-            Value::Number(n) => n.as_u64().unwrap() as usize,
-            _ => panic!(
-                "Offsets should be numbers or strings that are convertible to numbers"
-            ),
-        })
-        .collect();
-    let inner = match data_type {
-        DataType::List(ref field) => json_from_col(child, field.data_type()),
-        DataType::Struct(fields) => json_from_struct_col(col, fields),
-        _ => merge_json_array(
-            child.validity.as_ref().unwrap().as_slice(),
-            &child.data.clone().unwrap(),
-        ),
-    };
-
-    for i in 0..col.count {
-        match &col.validity {
-            Some(validity) => match &validity[i] {
-                0 => values.push(Value::Null),
-                1 => {
-                    values.push(Value::Array(inner[offsets[i]..offsets[i + 1]].to_vec()))
-                }
-                _ => panic!("Validity data should be 0 or 1"),
-            },
-            None => {
-                // Null type does not have a validity vector
-            }
-        }
-    }
-
-    values
-}
-
-/// Convert an Arrow JSON column/array of a `DataType::List` into a vector of `Value`
-fn json_from_fixed_size_list_col(
-    col: &ArrowJsonColumn,
-    data_type: &DataType,
-    list_size: usize,
-) -> Vec<Value> {
-    let mut values = Vec::with_capacity(col.count);
-
-    // get the inner array
-    let child = &col.children.clone().expect("list type must have children")[0];
-    let inner = match data_type {
-        DataType::List(ref field) => json_from_col(child, field.data_type()),
-        DataType::FixedSizeList(ref field, _) => json_from_col(child, field.data_type()),
-        DataType::Struct(fields) => json_from_struct_col(col, fields),
-        _ => merge_json_array(
-            child.validity.as_ref().unwrap().as_slice(),
-            &child.data.clone().unwrap(),
-        ),
-    };
-
-    for i in 0..col.count {
-        match &col.validity {
-            Some(validity) => match &validity[i] {
-                0 => values.push(Value::Null),
-                1 => values.push(Value::Array(
-                    inner[(list_size * i)..(list_size * (i + 1))].to_vec(),
-                )),
-                _ => panic!("Validity data should be 0 or 1"),
-            },
-            None => {}
-        }
-    }
-
-    values
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-    use std::io::Read;
-    use std::sync::Arc;
-
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn test_schema_equality() {
-        let json = r#"
-        {
-            "fields": [
-                {
-                    "name": "c1",
-                    "type": {"name": "int", "isSigned": true, "bitWidth": 32},
-                    "nullable": true,
-                    "children": []
-                },
-                {
-                    "name": "c2",
-                    "type": {"name": "floatingpoint", "precision": "DOUBLE"},
-                    "nullable": true,
-                    "children": []
-                },
-                {
-                    "name": "c3",
-                    "type": {"name": "utf8"},
-                    "nullable": true,
-                    "children": []
-                },
-                {
-                    "name": "c4",
-                    "type": {
-                        "name": "list"
-                    },
-                    "nullable": true,
-                    "children": [
-                        {
-                            "name": "custom_item",
-                            "type": {
-                                "name": "int",
-                                "isSigned": true,
-                                "bitWidth": 32
-                            },
-                            "nullable": false,
-                            "children": []
-                        }
-                    ]
-                }
-            ]
-        }"#;
-        let json_schema: ArrowJsonSchema = serde_json::from_str(json).unwrap();
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, true),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::Utf8, true),
-            Field::new(
-                "c4",
-                DataType::List(Box::new(Field::new(
-                    "custom_item",
-                    DataType::Int32,
-                    false,
-                ))),
-                true,
-            ),
-        ]);
-        assert!(json_schema.equals_schema(&schema));
-    }
-
-    #[test]
-    fn test_arrow_data_equality() {
-        let secs_tz = Some("Europe/Budapest".to_string());
-        let millis_tz = Some("America/New_York".to_string());
-        let micros_tz = Some("UTC".to_string());
-        let nanos_tz = Some("Africa/Johannesburg".to_string());
-
-        let schema = Schema::new(vec![
-            {
-                let mut f =
-                    Field::new("bools-with-metadata-map", DataType::Boolean, true);
-                f.set_metadata(Some(
-                    [("k".to_string(), "v".to_string())]
-                        .iter()
-                        .cloned()
-                        .collect(),
-                ));
-                f
-            },
-            {
-                let mut f =
-                    Field::new("bools-with-metadata-vec", DataType::Boolean, true);
-                f.set_metadata(Some(
-                    [("k2".to_string(), "v2".to_string())]
-                        .iter()
-                        .cloned()
-                        .collect(),
-                ));
-                f
-            },
-            Field::new("bools", DataType::Boolean, true),
-            Field::new("int8s", DataType::Int8, true),
-            Field::new("int16s", DataType::Int16, true),
-            Field::new("int32s", DataType::Int32, true),
-            Field::new("int64s", DataType::Int64, true),
-            Field::new("uint8s", DataType::UInt8, true),
-            Field::new("uint16s", DataType::UInt16, true),
-            Field::new("uint32s", DataType::UInt32, true),
-            Field::new("uint64s", DataType::UInt64, true),
-            Field::new("float32s", DataType::Float32, true),
-            Field::new("float64s", DataType::Float64, true),
-            Field::new("date_days", DataType::Date32, true),
-            Field::new("date_millis", DataType::Date64, true),
-            Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
-            Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
-            Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
-            Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
-            Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
-            Field::new(
-                "ts_millis",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_micros",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_nanos",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_secs_tz",
-                DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
-                true,
-            ),
-            Field::new(
-                "ts_millis_tz",
-                DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
-                true,
-            ),
-            Field::new(
-                "ts_micros_tz",
-                DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
-                true,
-            ),
-            Field::new(
-                "ts_nanos_tz",
-                DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
-                true,
-            ),
-            Field::new("utf8s", DataType::Utf8, true),
-            Field::new(
-                "lists",
-                DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-                true,
-            ),
-            Field::new(
-                "structs",
-                DataType::Struct(vec![
-                    Field::new("int32s", DataType::Int32, true),
-                    Field::new("utf8s", DataType::Utf8, true),
-                ]),
-                true,
-            ),
-        ]);
-
-        let bools_with_metadata_map =
-            BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let bools_with_metadata_vec =
-            BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let bools = BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let int8s = Int8Array::from(vec![Some(1), None, Some(3)]);
-        let int16s = Int16Array::from(vec![Some(1), None, Some(3)]);
-        let int32s = Int32Array::from(vec![Some(1), None, Some(3)]);
-        let int64s = Int64Array::from(vec![Some(1), None, Some(3)]);
-        let uint8s = UInt8Array::from(vec![Some(1), None, Some(3)]);
-        let uint16s = UInt16Array::from(vec![Some(1), None, Some(3)]);
-        let uint32s = UInt32Array::from(vec![Some(1), None, Some(3)]);
-        let uint64s = UInt64Array::from(vec![Some(1), None, Some(3)]);
-        let float32s = Float32Array::from(vec![Some(1.0), None, Some(3.0)]);
-        let float64s = Float64Array::from(vec![Some(1.0), None, Some(3.0)]);
-        let date_days = Date32Array::from(vec![Some(1196848), None, None]);
-        let date_millis = Date64Array::from(vec![
-            Some(167903550396207),
-            Some(29923997007884),
-            Some(30612271819236),
-        ]);
-        let time_secs =
-            Time32SecondArray::from(vec![Some(27974), Some(78592), Some(43207)]);
-        let time_millis = Time32MillisecondArray::from(vec![
-            Some(6613125),
-            Some(74667230),
-            Some(52260079),
-        ]);
-        let time_micros =
-            Time64MicrosecondArray::from(vec![Some(62522958593), None, None]);
-        let time_nanos = Time64NanosecondArray::from(vec![
-            Some(73380123595985),
-            None,
-            Some(16584393546415),
-        ]);
-        let ts_secs = TimestampSecondArray::from_opt_vec(
-            vec![None, Some(193438817552), None],
-            None,
-        );
-        let ts_millis = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(38606916383008), Some(58113709376587)],
-            None,
-        );
-        let ts_micros =
-            TimestampMicrosecondArray::from_opt_vec(vec![None, None, None], None);
-        let ts_nanos = TimestampNanosecondArray::from_opt_vec(
-            vec![None, None, Some(-6473623571954960143)],
-            None,
-        );
-        let ts_secs_tz = TimestampSecondArray::from_opt_vec(
-            vec![None, Some(193438817552), None],
-            secs_tz,
-        );
-        let ts_millis_tz = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(38606916383008), Some(58113709376587)],
-            millis_tz,
-        );
-        let ts_micros_tz =
-            TimestampMicrosecondArray::from_opt_vec(vec![None, None, None], micros_tz);
-        let ts_nanos_tz = TimestampNanosecondArray::from_opt_vec(
-            vec![None, None, Some(-6473623571954960143)],
-            nanos_tz,
-        );
-        let utf8s = StringArray::from(vec![Some("aa"), None, Some("bbb")]);
-
-        let value_data = Int32Array::from(vec![None, Some(2), None, None]);
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 4, 4]);
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.data().clone())
-            .build();
-        let lists = ListArray::from(list_data);
-
-        let structs_int32s = Int32Array::from(vec![None, Some(-2), None]);
-        let structs_utf8s = StringArray::from(vec![None, None, Some("aaaaaa")]);
-        let structs = StructArray::from(vec![
-            (
-                Field::new("int32s", DataType::Int32, true),
-                Arc::new(structs_int32s) as ArrayRef,
-            ),
-            (
-                Field::new("utf8s", DataType::Utf8, true),
-                Arc::new(structs_utf8s) as ArrayRef,
-            ),
-        ]);
-
-        let record_batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(bools_with_metadata_map),
-                Arc::new(bools_with_metadata_vec),
-                Arc::new(bools),
-                Arc::new(int8s),
-                Arc::new(int16s),
-                Arc::new(int32s),
-                Arc::new(int64s),
-                Arc::new(uint8s),
-                Arc::new(uint16s),
-                Arc::new(uint32s),
-                Arc::new(uint64s),
-                Arc::new(float32s),
-                Arc::new(float64s),
-                Arc::new(date_days),
-                Arc::new(date_millis),
-                Arc::new(time_secs),
-                Arc::new(time_millis),
-                Arc::new(time_micros),
-                Arc::new(time_nanos),
-                Arc::new(ts_secs),
-                Arc::new(ts_millis),
-                Arc::new(ts_micros),
-                Arc::new(ts_nanos),
-                Arc::new(ts_secs_tz),
-                Arc::new(ts_millis_tz),
-                Arc::new(ts_micros_tz),
-                Arc::new(ts_nanos_tz),
-                Arc::new(utf8s),
-                Arc::new(lists),
-                Arc::new(structs),
-            ],
-        )
-        .unwrap();
-        let mut file = File::open("test/data/integration.json").unwrap();
-        let mut json = String::new();
-        file.read_to_string(&mut json).unwrap();
-        let arrow_json: ArrowJson = serde_json::from_str(&json).unwrap();
-        // test schemas
-        assert!(arrow_json.schema.equals_schema(&schema));
-        // test record batch
-        assert!(arrow_json.batches[0].equals_batch(&record_batch));
-    }
-}
diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs
deleted file mode 100644
index b2fd4f78661..00000000000
--- a/rust/arrow/src/util/mod.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod bench_util;
-pub mod bit_chunk_iterator;
-pub mod bit_util;
-pub mod data_gen;
-pub mod display;
-pub mod integration_util;
-#[cfg(feature = "prettyprint")]
-pub mod pretty;
-pub(crate) mod serialization;
-pub mod string_writer;
-pub mod test_util;
-
-mod trusted_len;
-pub(crate) use trusted_len::trusted_len_unzip;
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs
deleted file mode 100644
index f354899c1df..00000000000
--- a/rust/arrow/src/util/pretty.rs
+++ /dev/null
@@ -1,421 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities for printing record batches. Note this module is not
-//! available unless `feature = "prettyprint"` is enabled.
-
-use crate::{array::ArrayRef, record_batch::RecordBatch};
-
-use prettytable::format;
-use prettytable::{Cell, Row, Table};
-
-use crate::error::Result;
-
-use super::display::array_value_to_string;
-
-///! Create a visual representation of record batches
-pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<String> {
-    Ok(create_table(results)?.to_string())
-}
-
-///! Create a visual representation of columns
-pub fn pretty_format_columns(col_name: &str, results: &[ArrayRef]) -> Result<String> {
-    Ok(create_column(col_name, results)?.to_string())
-}
-
-///! Prints a visual representation of record batches to stdout
-pub fn print_batches(results: &[RecordBatch]) -> Result<()> {
-    create_table(results)?.printstd();
-    Ok(())
-}
-
-///! Prints a visual representation of a list of column to stdout
-pub fn print_columns(col_name: &str, results: &[ArrayRef]) -> Result<()> {
-    create_column(col_name, results)?.printstd();
-    Ok(())
-}
-
-///! Convert a series of record batches into a table
-fn create_table(results: &[RecordBatch]) -> Result<Table> {
-    let mut table = Table::new();
-    table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-
-    if results.is_empty() {
-        return Ok(table);
-    }
-
-    let schema = results[0].schema();
-
-    let mut header = Vec::new();
-    for field in schema.fields() {
-        header.push(Cell::new(&field.name()));
-    }
-    table.set_titles(Row::new(header));
-
-    for batch in results {
-        for row in 0..batch.num_rows() {
-            let mut cells = Vec::new();
-            for col in 0..batch.num_columns() {
-                let column = batch.column(col);
-                cells.push(Cell::new(&array_value_to_string(&column, row)?));
-            }
-            table.add_row(Row::new(cells));
-        }
-    }
-
-    Ok(table)
-}
-
-fn create_column(field: &str, columns: &[ArrayRef]) -> Result<Table> {
-    let mut table = Table::new();
-    table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-
-    if columns.is_empty() {
-        return Ok(table);
-    }
-
-    let header = vec![Cell::new(field)];
-    table.set_titles(Row::new(header));
-
-    for col in columns {
-        for row in 0..col.len() {
-            let cells = vec![Cell::new(&array_value_to_string(&col, row)?)];
-            table.add_row(Row::new(cells));
-        }
-    }
-
-    Ok(table)
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        array::{
-            self, new_null_array, Array, Date32Array, Date64Array, PrimitiveBuilder,
-            StringBuilder, StringDictionaryBuilder, Time32MillisecondArray,
-            Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-            TimestampMicrosecondArray, TimestampMillisecondArray,
-            TimestampNanosecondArray, TimestampSecondArray,
-        },
-        datatypes::{DataType, Field, Int32Type, Schema},
-    };
-
-    use super::*;
-    use std::sync::Arc;
-
-    #[test]
-    fn test_pretty_format_batches() -> Result<()> {
-        // define a schema.
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        // define data.
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(array::StringArray::from(vec![
-                    Some("a"),
-                    Some("b"),
-                    None,
-                    Some("d"),
-                ])),
-                Arc::new(array::Int32Array::from(vec![
-                    Some(1),
-                    None,
-                    Some(10),
-                    Some(100),
-                ])),
-            ],
-        )?;
-
-        let table = pretty_format_batches(&[batch])?;
-
-        let expected = vec![
-            "+---+-----+",
-            "| a | b   |",
-            "+---+-----+",
-            "| a | 1   |",
-            "| b |     |",
-            "|   | 10  |",
-            "| d | 100 |",
-            "+---+-----+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_pretty_format_columns() -> Result<()> {
-        let columns = vec![
-            Arc::new(array::StringArray::from(vec![
-                Some("a"),
-                Some("b"),
-                None,
-                Some("d"),
-            ])) as ArrayRef,
-            Arc::new(array::StringArray::from(vec![Some("e"), None, Some("g")])),
-        ];
-
-        let table = pretty_format_columns("a", &columns)?;
-
-        let expected = vec![
-            "+---+", "| a |", "+---+", "| a |", "| b |", "|   |", "| d |", "| e |",
-            "|   |", "| g |", "+---+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_pretty_format_null() {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),
-            Field::new("b", DataType::Int32, true),
-            Field::new("c", DataType::Null, true),
-        ]));
-
-        let num_rows = 4;
-        let arrays = schema
-            .fields()
-            .iter()
-            .map(|f| new_null_array(f.data_type(), num_rows))
-            .collect();
-
-        // define data (null)
-        let batch = RecordBatch::try_new(schema, arrays).unwrap();
-
-        let table = pretty_format_batches(&[batch]).unwrap();
-
-        let expected = vec![
-            "+---+---+---+",
-            "| a | b | c |",
-            "+---+---+---+",
-            "|   |   |   |",
-            "|   |   |   |",
-            "|   |   |   |",
-            "|   |   |   |",
-            "+---+---+---+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{:#?}", table);
-    }
-
-    #[test]
-    fn test_pretty_format_dictionary() -> Result<()> {
-        // define a schema.
-        let field_type =
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-        let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
-
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = StringBuilder::new(10);
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        builder.append("one")?;
-        builder.append_null()?;
-        builder.append("three")?;
-        let array = Arc::new(builder.finish());
-
-        let batch = RecordBatch::try_new(schema, vec![array])?;
-
-        let table = pretty_format_batches(&[batch])?;
-
-        let expected = vec![
-            "+-------+",
-            "| d1    |",
-            "+-------+",
-            "| one   |",
-            "|       |",
-            "| three |",
-            "+-------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-
-    /// Generate an array with type $ARRAYTYPE with a numeric value of
-    /// $VALUE, and compare $EXPECTED_RESULT to the output of
-    /// formatting that array with `pretty_format_batches`
-    macro_rules! check_datetime {
-        ($ARRAYTYPE:ident, $VALUE:expr, $EXPECTED_RESULT:expr) => {
-            let mut builder = $ARRAYTYPE::builder(10);
-            builder.append_value($VALUE).unwrap();
-            builder.append_null().unwrap();
-            let array = builder.finish();
-
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "f",
-                array.data_type().clone(),
-                true,
-            )]));
-            let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
-
-            let table = pretty_format_batches(&[batch]).expect("formatting batches");
-
-            let expected = $EXPECTED_RESULT;
-            let actual: Vec<&str> = table.lines().collect();
-
-            assert_eq!(expected, actual, "Actual result:\n\n{:#?}\n\n", actual);
-        };
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_second() {
-        let expected = vec![
-            "+---------------------+",
-            "| f                   |",
-            "+---------------------+",
-            "| 1970-05-09 14:25:11 |",
-            "|                     |",
-            "+---------------------+",
-        ];
-        check_datetime!(TimestampSecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_millisecond() {
-        let expected = vec![
-            "+-------------------------+",
-            "| f                       |",
-            "+-------------------------+",
-            "| 1970-01-01 03:05:11.111 |",
-            "|                         |",
-            "+-------------------------+",
-        ];
-        check_datetime!(TimestampMillisecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_microsecond() {
-        let expected = vec![
-            "+----------------------------+",
-            "| f                          |",
-            "+----------------------------+",
-            "| 1970-01-01 00:00:11.111111 |",
-            "|                            |",
-            "+----------------------------+",
-        ];
-        check_datetime!(TimestampMicrosecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_nanosecond() {
-        let expected = vec![
-            "+-------------------------------+",
-            "| f                             |",
-            "+-------------------------------+",
-            "| 1970-01-01 00:00:00.011111111 |",
-            "|                               |",
-            "+-------------------------------+",
-        ];
-        check_datetime!(TimestampNanosecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_date_32() {
-        let expected = vec![
-            "+------------+",
-            "| f          |",
-            "+------------+",
-            "| 1973-05-19 |",
-            "|            |",
-            "+------------+",
-        ];
-        check_datetime!(Date32Array, 1234, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_date_64() {
-        let expected = vec![
-            "+------------+",
-            "| f          |",
-            "+------------+",
-            "| 2005-03-18 |",
-            "|            |",
-            "+------------+",
-        ];
-        check_datetime!(Date64Array, 1111111100000, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_32_second() {
-        let expected = vec![
-            "+----------+",
-            "| f        |",
-            "+----------+",
-            "| 00:18:31 |",
-            "|          |",
-            "+----------+",
-        ];
-        check_datetime!(Time32SecondArray, 1111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_32_millisecond() {
-        let expected = vec![
-            "+--------------+",
-            "| f            |",
-            "+--------------+",
-            "| 03:05:11.111 |",
-            "|              |",
-            "+--------------+",
-        ];
-        check_datetime!(Time32MillisecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_64_microsecond() {
-        let expected = vec![
-            "+-----------------+",
-            "| f               |",
-            "+-----------------+",
-            "| 00:00:11.111111 |",
-            "|                 |",
-            "+-----------------+",
-        ];
-        check_datetime!(Time64MicrosecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_64_nanosecond() {
-        let expected = vec![
-            "+--------------------+",
-            "| f                  |",
-            "+--------------------+",
-            "| 00:00:00.011111111 |",
-            "|                    |",
-            "+--------------------+",
-        ];
-        check_datetime!(Time64NanosecondArray, 11111111, expected);
-    }
-}
diff --git a/rust/arrow/src/util/serialization.rs b/rust/arrow/src/util/serialization.rs
deleted file mode 100644
index 14d67ca117c..00000000000
--- a/rust/arrow/src/util/serialization.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// Converts numeric type to a `String`
-pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
-    let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
-    unsafe {
-        // JUSTIFICATION
-        //  Benefit
-        //      Allows using the faster serializer lexical core and convert to string
-        //  Soundness
-        //      Length of buf is set as written length afterwards. lexical_core
-        //      creates a valid string, so doesn't need to be checked.
-        let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
-        let len = lexical_core::write(n, slice).len();
-        buf.set_len(len);
-        String::from_utf8_unchecked(buf)
-    }
-}
diff --git a/rust/arrow/src/util/string_writer.rs b/rust/arrow/src/util/string_writer.rs
deleted file mode 100644
index 2a8175d1562..00000000000
--- a/rust/arrow/src/util/string_writer.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! String Writer
-//! This string writer encapsulates `std::string::String` and
-//! implements `std::io::Write` trait, which makes String as a
-//! writable object like File.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::*;
-//! use arrow::csv;
-//! use arrow::datatypes::*;
-//! use arrow::record_batch::RecordBatch;
-//! use arrow::util::string_writer::StringWriter;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("c1", DataType::Utf8, false),
-//!     Field::new("c2", DataType::Float64, true),
-//!     Field::new("c3", DataType::UInt32, false),
-//!     Field::new("c3", DataType::Boolean, true),
-//! ]);
-//! let c1 = StringArray::from(vec![
-//!     "Lorem ipsum dolor sit amet",
-//!     "consectetur adipiscing elit",
-//!     "sed do eiusmod tempor",
-//! ]);
-//! let c2 = PrimitiveArray::<Float64Type>::from(vec![
-//!     Some(123.564532),
-//!     None,
-//!     Some(-556132.25),
-//! ]);
-//! let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-//! let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-//!
-//! let batch = RecordBatch::try_new(
-//!     Arc::new(schema),
-//!     vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
-//! )
-//! .unwrap();
-//!
-//! let sw = StringWriter::new();
-//! let mut writer = csv::Writer::new(sw);
-//! writer.write(&batch).unwrap();
-//! ```
-
-use std::io::{Error, ErrorKind, Result, Write};
-
-#[derive(Debug)]
-pub struct StringWriter {
-    data: String,
-}
-
-impl StringWriter {
-    pub fn new() -> Self {
-        StringWriter {
-            data: String::new(),
-        }
-    }
-}
-
-impl Default for StringWriter {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ToString for StringWriter {
-    fn to_string(&self) -> String {
-        self.data.clone()
-    }
-}
-
-impl Write for StringWriter {
-    fn write(&mut self, buf: &[u8]) -> Result<usize> {
-        let string = match String::from_utf8(buf.to_vec()) {
-            Ok(x) => x,
-            Err(e) => {
-                return Err(Error::new(ErrorKind::InvalidData, e));
-            }
-        };
-        self.data.push_str(&string);
-        Ok(string.len())
-    }
-
-    fn flush(&mut self) -> Result<()> {
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/util/test_util.rs b/rust/arrow/src/util/test_util.rs
deleted file mode 100644
index b32ff429c9b..00000000000
--- a/rust/arrow/src/util/test_util.rs
+++ /dev/null
@@ -1,211 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils to make testing easier
-
-use rand::{rngs::StdRng, Rng, SeedableRng};
-use std::{env, error::Error, fs, io::Write, path::PathBuf};
-
-/// Returns a vector of size `n`, filled with randomly generated bytes.
-pub fn random_bytes(n: usize) -> Vec<u8> {
-    let mut result = vec![];
-    let mut rng = seedable_rng();
-    for _ in 0..n {
-        result.push(rng.gen_range(0, 255));
-    }
-    result
-}
-
-/// Returns fixed seedable RNG
-pub fn seedable_rng() -> StdRng {
-    StdRng::seed_from_u64(42)
-}
-
-/// Returns file handle for a temp file in 'target' directory with a provided content
-///
-/// TODO: Originates from `parquet` utils, can be merged in [ARROW-4064]
-pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
-    // build tmp path to a file in "target/debug/testdata"
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(file_name);
-
-    // write file content
-    let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
-    tmp_file.write_all(content).unwrap();
-    tmp_file.sync_all().unwrap();
-
-    // return file handle for both read and write
-    let file = fs::OpenOptions::new()
-        .read(true)
-        .write(true)
-        .open(path_buf.as_path());
-    assert!(file.is_ok());
-    file.unwrap()
-}
-
-/// Returns the arrow test data directory, which is by default stored
-/// in a git submodule rooted at `arrow/testing/data`.
-///
-/// The default can be overridden by the optional environment
-/// variable `ARROW_TEST_DATA`
-///
-/// panics when the directory can not be found.
-///
-/// Example:
-/// ```
-/// let testdata = arrow::util::test_util::arrow_test_data();
-/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
-/// assert!(std::path::PathBuf::from(csvdata).exists());
-/// ```
-pub fn arrow_test_data() -> String {
-    match get_data_dir("ARROW_TEST_DATA", "../../testing/data") {
-        Ok(pb) => pb.display().to_string(),
-        Err(err) => panic!("failed to get arrow data dir: {}", err),
-    }
-}
-
-/// Returns the parquest test data directory, which is by default
-/// stored in a git submodule rooted at
-/// `arrow/cpp/submodules/parquest-testing/data`.
-///
-/// The default can be overridden by the optional environment variable
-/// `PARQUET_TEST_DATA`
-///
-/// panics when the directory can not be found.
-///
-/// Example:
-/// ```
-/// let testdata = arrow::util::test_util::parquet_test_data();
-/// let filename = format!("{}/binary.parquet", testdata);
-/// assert!(std::path::PathBuf::from(filename).exists());
-/// ```
-pub fn parquet_test_data() -> String {
-    match get_data_dir(
-        "PARQUET_TEST_DATA",
-        "../../cpp/submodules/parquet-testing/data",
-    ) {
-        Ok(pb) => pb.display().to_string(),
-        Err(err) => panic!("failed to get parquet data dir: {}", err),
-    }
-}
-
-/// Returns a directory path for finding test data.
-///
-/// udf_env: name of an environment variable
-///
-/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
-///
-///  Returns either:
-/// The path referred to in `udf_env` if that variable is set and refers to a directory
-/// The submodule_data directory relative to CARGO_MANIFEST_PATH
-fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result<PathBuf, Box<dyn Error>> {
-    // Try user defined env.
-    if let Ok(dir) = env::var(udf_env) {
-        let trimmed = dir.trim().to_string();
-        if !trimmed.is_empty() {
-            let pb = PathBuf::from(trimmed);
-            if pb.is_dir() {
-                return Ok(pb);
-            } else {
-                return Err(format!(
-                    "the data dir `{}` defined by env {} not found",
-                    pb.display().to_string(),
-                    udf_env
-                )
-                .into());
-            }
-        }
-    }
-
-    // The env is undefined or its value is trimmed to empty, let's try default dir.
-
-    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
-    // set by `cargo run` or `cargo test`, see:
-    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
-    let dir = env!("CARGO_MANIFEST_DIR");
-
-    let pb = PathBuf::from(dir).join(submodule_data);
-    if pb.is_dir() {
-        Ok(pb)
-    } else {
-        Err(format!(
-            "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
-             HINT: try running `git submodule update --init`",
-            udf_env,
-            pb.display().to_string(),
-        ).into())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::env;
-
-    #[test]
-    fn test_data_dir() {
-        let udf_env = "get_data_dir";
-        let cwd = env::current_dir().unwrap();
-
-        let existing_pb = cwd.join("..");
-        let existing = existing_pb.display().to_string();
-        let existing_str = existing.as_str();
-
-        let non_existing = cwd.join("non-existing-dir").display().to_string();
-        let non_existing_str = non_existing.as_str();
-
-        env::set_var(udf_env, non_existing_str);
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_err());
-
-        env::set_var(udf_env, "");
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::set_var(udf_env, " ");
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::set_var(udf_env, existing_str);
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::remove_var(udf_env);
-        let res = get_data_dir(udf_env, non_existing_str);
-        assert!(res.is_err());
-
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-    }
-
-    #[test]
-    fn test_happy() {
-        let res = arrow_test_data();
-        assert!(PathBuf::from(res).is_dir());
-
-        let res = parquet_test_data();
-        assert!(PathBuf::from(res).is_dir());
-    }
-}
diff --git a/rust/arrow/src/util/trusted_len.rs b/rust/arrow/src/util/trusted_len.rs
deleted file mode 100644
index 84a66238b63..00000000000
--- a/rust/arrow/src/util/trusted_len.rs
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::bit_util;
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    datatypes::ArrowNativeType,
-};
-
-/// Creates two [`Buffer`]s from an iterator of `Option`.
-/// The first buffer corresponds to a bitmap buffer, the second one
-/// corresponds to a values buffer.
-/// # Safety
-/// The caller must ensure that `iterator` is `TrustedLen`.
-#[inline]
-pub(crate) unsafe fn trusted_len_unzip<I, P, T>(iterator: I) -> (Buffer, Buffer)
-where
-    T: ArrowNativeType,
-    P: std::borrow::Borrow<Option<T>>,
-    I: Iterator<Item = P>,
-{
-    let (_, upper) = iterator.size_hint();
-    let upper = upper.expect("trusted_len_unzip requires an upper limit");
-    let len = upper * std::mem::size_of::<T>();
-
-    let mut null = MutableBuffer::from_len_zeroed(upper.saturating_add(7) / 8);
-    let mut buffer = MutableBuffer::new(len);
-
-    let dst_null = null.as_mut_ptr();
-    let mut dst = buffer.as_mut_ptr() as *mut T;
-    for (i, item) in iterator.enumerate() {
-        let item = item.borrow();
-        if let Some(item) = item {
-            std::ptr::write(dst, *item);
-            bit_util::set_bit_raw(dst_null, i);
-        } else {
-            std::ptr::write(dst, T::default());
-        }
-        dst = dst.add(1);
-    }
-    assert_eq!(
-        dst.offset_from(buffer.as_ptr() as *mut T) as usize,
-        upper,
-        "Trusted iterator length was not accurately reported"
-    );
-    buffer.set_len(len);
-    (null.into(), buffer.into())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn trusted_len_unzip_good() {
-        let vec = vec![Some(1u32), None];
-        let (null, buffer) = unsafe { trusted_len_unzip(vec.iter()) };
-        assert_eq!(null.as_slice(), &[0b00000001]);
-        assert_eq!(buffer.as_slice(), &[1u8, 0, 0, 0, 0, 0, 0, 0]);
-    }
-
-    #[test]
-    #[should_panic(expected = "trusted_len_unzip requires an upper limit")]
-    fn trusted_len_unzip_panic() {
-        let iter = std::iter::repeat(Some(4i32));
-        unsafe { trusted_len_unzip(iter) };
-    }
-}
diff --git a/rust/arrow/src/zz_memory_check.rs b/rust/arrow/src/zz_memory_check.rs
deleted file mode 100644
index 70ec8ebdbdd..00000000000
--- a/rust/arrow/src/zz_memory_check.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This file is named like this so that it is the last one to be tested
-// It contains no content, it has a single test that verifies that there is no memory leak
-// on all unit-tests
-
-#[cfg(feature = "memory-check")]
-mod tests {
-    use crate::memory::ALLOCATIONS;
-
-    // verify that there is no data un-allocated
-    #[test]
-    fn test_memory_check() {
-        unsafe { assert_eq!(ALLOCATIONS.load(std::sync::atomic::Ordering::SeqCst), 0) }
-    }
-}
diff --git a/rust/arrow/test/data/arrays.json b/rust/arrow/test/data/arrays.json
deleted file mode 100644
index 5dbdd19ffc0..00000000000
--- a/rust/arrow/test/data/arrays.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":"4"}
-{"a":-10, "b":[2.0, 1.3, -6.1], "c":[true, true], "d":"4"}
-{"a":2, "b":[2.0, null, -6.1], "c":[false, null], "d":"text"}
diff --git a/rust/arrow/test/data/basic.json b/rust/arrow/test/data/basic.json
deleted file mode 100644
index dafd2dd2e42..00000000000
--- a/rust/arrow/test/data/basic.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":-10, "b":-3.5, "c":true, "d":"4"}
-{"a":2, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":7, "b":-3.5, "c":true, "d":"4"}
-{"a":1, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":5, "b":-3.5, "c":true, "d":"4"}
-{"a":1, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":1, "b":-3.5, "c":true, "d":"4"}
-{"a":100000000000000, "b":0.6, "c":false, "d":"text"}
\ No newline at end of file
diff --git a/rust/arrow/test/data/basic_nulls.json b/rust/arrow/test/data/basic_nulls.json
deleted file mode 100644
index 1451df7f57f..00000000000
--- a/rust/arrow/test/data/basic_nulls.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{"a":1, "b":2.0, "c":false}
-{"a":null, "b":-3.5, "c":true, "d":"4"}
-{"c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":7, "b":-3.5, "c":null, "d":null}
-{"a":1, "b":0.6, "c":false}
-{"a":1, "b":2.0, "d":"4"}
-{"a":5, "c":true}
-{"a":1, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":1, "b":-3.5, "c":true, "d":"4"}
-{}
\ No newline at end of file
diff --git a/rust/arrow/test/data/integration.json b/rust/arrow/test/data/integration.json
deleted file mode 100644
index 7e4a22cddba..00000000000
--- a/rust/arrow/test/data/integration.json
+++ /dev/null
@@ -1,808 +0,0 @@
-{
-  "schema": {
-    "fields": [
-      {
-        "name": "bools-with-metadata-map",
-        "type": {
-          "name": "bool"
-        },
-        "nullable": true,
-        "metadata": {
-          "k": "v"
-        },
-        "children": []
-      },
-      {
-        "name": "bools-with-metadata-vec",
-        "type": {
-          "name": "bool"
-        },
-        "nullable": true,
-        "metadata": [
-          {
-            "key": "k2",
-            "value": "v2"
-          }
-        ],
-        "children": []
-      },
-      {
-        "name": "bools",
-        "type": {
-          "name": "bool"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int8s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 8
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int16s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 16
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int32s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int64s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint8s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 8
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint16s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 16
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint32s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint64s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "float32s",
-        "type": {
-          "name": "floatingpoint",
-          "precision": "SINGLE"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "float64s",
-        "type": {
-          "name": "floatingpoint",
-          "precision": "DOUBLE"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "date_days",
-        "type": {
-          "name": "date",
-          "unit": "DAY"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "date_millis",
-        "type": {
-          "name": "date",
-          "unit": "MILLISECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_secs",
-        "type": {
-          "name": "time",
-          "unit": "SECOND",
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_millis",
-        "type": {
-          "name": "time",
-          "unit": "MILLISECOND",
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_micros",
-        "type": {
-          "name": "time",
-          "unit": "MICROSECOND",
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_nanos",
-        "type": {
-          "name": "time",
-          "unit": "NANOSECOND",
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_secs",
-        "type": {
-          "name": "timestamp",
-          "unit": "SECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_millis",
-        "type": {
-          "name": "timestamp",
-          "unit": "MILLISECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_micros",
-        "type": {
-          "name": "timestamp",
-          "unit": "MICROSECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_nanos",
-        "type": {
-          "name": "timestamp",
-          "unit": "NANOSECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_secs_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "SECOND",
-          "timezone": "Europe/Budapest"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_millis_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "MILLISECOND",
-          "timezone": "America/New_York"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_micros_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "MICROSECOND",
-          "timezone": "UTC"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_nanos_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "NANOSECOND",
-          "timezone": "Africa/Johannesburg"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "utf8s",
-        "type": {
-          "name": "utf8"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "lists",
-        "nullable": true,
-        "type": {
-          "name": "list"
-        },
-        "children": [
-          {
-            "name": "item",
-            "nullable": true,
-            "type": {
-              "name": "int",
-              "bitWidth": 32,
-              "isSigned": true
-            },
-            "children": []
-          }
-        ]
-      },
-      {
-        "name": "structs",
-        "type": {
-          "name": "struct"
-        },
-        "nullable": true,
-        "children": [
-          {
-            "name": "int32s",
-            "type": {
-              "name": "int",
-              "isSigned": true,
-              "bitWidth": 32
-            },
-            "nullable": true,
-            "children": []
-          },
-          {
-            "name": "utf8s",
-            "type": {
-              "name": "utf8"
-            },
-            "nullable": true,
-            "children": []
-          }
-        ]
-      }
-    ]
-  },
-  "batches": [
-    {
-      "count": 3,
-      "columns": [
-        {
-          "name": "bools-with-metadata-map",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            true,
-            true,
-            false
-          ]
-        },
-        {
-          "name": "bools-with-metadata-vec",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            true,
-            true,
-            false
-          ]
-        },
-        {
-          "name": "bools",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            true,
-            true,
-            false
-          ]
-        },
-        {
-          "name": "int8s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "int16s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "int32s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "int64s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint8s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint16s",
-          "count": 5,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint32s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint64s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "float32s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1.0,
-            2.0,
-            3.0
-          ]
-        },
-        {
-          "name": "float64s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1.0,
-            2.0,
-            3.0
-          ]
-        },
-        {
-          "name": "date_days",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            0
-          ],
-          "DATA": [
-            1196848,
-            2319603,
-            2755982
-          ]
-        },
-        {
-          "name": "date_millis",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            1
-          ],
-          "DATA": [
-            167903550396207,
-            29923997007884,
-            30612271819236
-          ]
-        },
-        {
-          "name": "time_secs",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            1
-          ],
-          "DATA": [
-            27974,
-            78592,
-            43207
-          ]
-        },
-        {
-          "name": "time_millis",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            1
-          ],
-          "DATA": [
-            6613125,
-            74667230,
-            52260079
-          ]
-        },
-        {
-          "name": "time_micros",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            0
-          ],
-          "DATA": [
-            62522958593,
-            13470380050,
-            50797036705
-          ]
-        },
-        {
-          "name": "time_nanos",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            73380123595985,
-            52520995325145,
-            16584393546415
-          ]
-        },
-        {
-          "name": "ts_secs",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            0
-          ],
-          "DATA": [
-            209869064422,
-            193438817552,
-            51757838205
-          ]
-        },
-        {
-          "name": "ts_millis",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            1
-          ],
-          "DATA": [
-            228315043570185,
-            38606916383008,
-            58113709376587
-          ]
-        },
-        {
-          "name": "ts_micros",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            0
-          ],
-          "DATA": [
-            133457416537791415,
-            129522736067409280,
-            177110451066832967
-          ]
-        },
-        {
-          "name": "ts_nanos",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            1
-          ],
-          "DATA": [
-            -804525722984600007,
-            8166038652634779458,
-            -6473623571954960143
-          ]
-        },
-        {
-          "name": "ts_secs_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            0
-          ],
-          "DATA": [
-            209869064422,
-            193438817552,
-            51757838205
-          ]
-        },
-        {
-          "name": "ts_millis_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            1
-          ],
-          "DATA": [
-            228315043570185,
-            38606916383008,
-            58113709376587
-          ]
-        },
-        {
-          "name": "ts_micros_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            0
-          ],
-          "DATA": [
-            133457416537791415,
-            129522736067409280,
-            177110451066832967
-          ]
-        },
-        {
-          "name": "ts_nanos_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            1
-          ],
-          "DATA": [
-            -804525722984600007,
-            8166038652634779458,
-            -6473623571954960143
-          ]
-        },
-        {
-          "name": "utf8s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "OFFSET": [
-            0,
-            2,
-            2,
-            5
-          ],
-          "DATA": [
-            "aa",
-            "",
-            "bbb"
-          ]
-        },
-        {
-          "name": "lists",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            0
-          ],
-          "OFFSET": [
-            0,
-            3,
-            4,
-            4
-          ],
-          "children": [
-            {
-              "name": "item",
-              "count": 4,
-              "VALIDITY": [
-                0,
-                1,
-                0,
-                0
-              ],
-              "DATA": [
-                1,
-                2,
-                3,
-                4
-              ]
-            }
-          ]
-        },
-        {
-          "name": "structs",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            0
-          ],
-          "children": [
-            {
-              "name": "int32s",
-              "count": 3,
-              "VALIDITY": [
-                0,
-                1,
-                0
-              ],
-              "DATA": [
-                -1,
-                -2,
-                -3
-              ]
-            },
-            {
-              "name": "utf8s",
-              "count": 3,
-              "VALIDITY": [
-                0,
-                0,
-                1
-              ],
-              "OFFSET": [
-                0,
-                0,
-                0,
-                7
-              ],
-              "DATA": [
-                "",
-                "",
-                "aaaaaa"
-              ]
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/rust/arrow/test/data/list_string_dict_nested.json b/rust/arrow/test/data/list_string_dict_nested.json
deleted file mode 100644
index d215b318bae..00000000000
--- a/rust/arrow/test/data/list_string_dict_nested.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{"machine": "a", "events": ["Elect Leader", "Do Ballot"]}
-{"machine": "b", "events": ["Do Ballot", "Send Data", "Elect Leader"]}
-{"machine": "c", "events": ["Send Data"]}
diff --git a/rust/arrow/test/data/list_string_dict_nested_nulls.json b/rust/arrow/test/data/list_string_dict_nested_nulls.json
deleted file mode 100644
index 9300b14ce27..00000000000
--- a/rust/arrow/test/data/list_string_dict_nested_nulls.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{"machine": "a", "events": [null, "Elect Leader", "Do Ballot"]}
-{"machine": "b", "events": ["Do Ballot", null, "Send Data", "Elect Leader"]}
-{"machine": "c", "events": ["Send Data"]}
diff --git a/rust/arrow/test/data/mixed_arrays.json b/rust/arrow/test/data/mixed_arrays.json
deleted file mode 100644
index 18987284a5b..00000000000
--- a/rust/arrow/test/data/mixed_arrays.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":4.1}
-{"a":-10, "b":[2.0, 1.3, -6.1], "c":null, "d":null}
-{"a":2, "b":[2.0, null, -6.1], "c":[false, null], "d":"text"}
-{"a":3, "b":4, "c": true, "d":[1, false, "array", 2.4]}
diff --git a/rust/arrow/test/data/mixed_arrays.json.gz b/rust/arrow/test/data/mixed_arrays.json.gz
deleted file mode 100644
index 0f6040092ff..00000000000
Binary files a/rust/arrow/test/data/mixed_arrays.json.gz and /dev/null differ
diff --git a/rust/arrow/test/data/nested_structs.json b/rust/arrow/test/data/nested_structs.json
deleted file mode 100644
index 32a3ac85c61..00000000000
--- a/rust/arrow/test/data/nested_structs.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{"a": {"b": true, "c": {"d": "text"}}}
-{"a": {"b": false, "c": null}}
-{"a": {"b": true, "c": {"d": "text"}}}
-{"a": 1}
\ No newline at end of file
diff --git a/rust/arrow/test/data/null_test.csv b/rust/arrow/test/data/null_test.csv
deleted file mode 100644
index 7e0dde53714..00000000000
--- a/rust/arrow/test/data/null_test.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-c_int,c_float,c_string,c_bool
-1,1.1,"1.11",True
-2,2.2,"2.22",TRUE
-3,,"3.33",true
-4,4.4,,False
-5,6.6,"",FALSE
\ No newline at end of file
diff --git a/rust/arrow/test/data/uk_cities.csv b/rust/arrow/test/data/uk_cities.csv
deleted file mode 100644
index db9e6da8c7a..00000000000
--- a/rust/arrow/test/data/uk_cities.csv
+++ /dev/null
@@ -1,37 +0,0 @@
-"Elgin, Scotland, the UK",57.653484,-3.335724
-"Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404
-"Solihull, Birmingham, UK",52.412811,-1.778197
-"Cardiff, Cardiff county, UK",51.481583,-3.179090
-"Eastbourne, East Sussex, UK",50.768036,0.290472
-"Oxford, Oxfordshire, UK",51.752022,-1.257677
-"London, UK",51.509865,-0.118092
-"Swindon, Swindon, UK",51.568535,-1.772232
-"Gravesend, Kent, UK",51.441883,0.370759
-"Northampton, Northamptonshire, UK",52.240479,-0.902656
-"Rugby, Warwickshire, UK",52.370876,-1.265032
-"Sutton Coldfield, West Midlands, UK",52.570385,-1.824042
-"Harlow, Essex, UK",51.772938,0.102310
-"Aberdeen, Aberdeen City, UK",57.149651,-2.099075
-"Swansea, Swansea, UK",51.621441,-3.943646
-"Chesterfield, Derbyshire, UK",53.235046,-1.421629
-"Londonderry, Derry, UK",55.006763,-7.318268
-"Salisbury, Wiltshire, UK",51.068787,-1.794472
-"Weymouth, Dorset, UK",50.614429,-2.457621
-"Wolverhampton, West Midlands, UK",52.591370,-2.110748
-"Preston, Lancashire, UK",53.765762,-2.692337
-"Bournemouth, UK",50.720806,-1.904755
-"Doncaster, South Yorkshire, UK",53.522820,-1.128462
-"Ayr, South Ayrshire, UK",55.458565,-4.629179
-"Hastings, East Sussex, UK",50.854259,0.573453
-"Bedford, UK",52.136436,-0.460739
-"Basildon, Essex, UK",51.572376,0.470009
-"Chippenham, Wiltshire, UK",51.458057,-2.116074
-"Belfast, UK",54.607868,-5.926437
-"Uckfield, East Sussex, UK",50.967941,0.085831
-"Worthing, West Sussex, UK",50.825024,-0.383835
-"Leeds, West Yorkshire, UK",53.801277,-1.548567
-"Kendal, Cumbria, UK",54.328506,-2.743870
-"Plymouth, UK",50.376289,-4.143841
-"Haverhill, Suffolk, UK",52.080875,0.444517
-"Frankton, Warwickshire, UK",52.328415,-1.377561
-"Inverness, the UK",57.477772,-4.224721
\ No newline at end of file
diff --git a/rust/arrow/test/data/uk_cities_with_headers.csv b/rust/arrow/test/data/uk_cities_with_headers.csv
deleted file mode 100644
index 92f5a17bdda..00000000000
--- a/rust/arrow/test/data/uk_cities_with_headers.csv
+++ /dev/null
@@ -1,38 +0,0 @@
-city,lat,lng
-"Elgin, Scotland, the UK",57.653484,-3.335724
-"Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404
-"Solihull, Birmingham, UK",52.412811,-1.778197
-"Cardiff, Cardiff county, UK",51.481583,-3.179090
-"Eastbourne, East Sussex, UK",50.768036,0.290472
-"Oxford, Oxfordshire, UK",51.752022,-1.257677
-"London, UK",51.509865,-0.118092
-"Swindon, Swindon, UK",51.568535,-1.772232
-"Gravesend, Kent, UK",51.441883,0.370759
-"Northampton, Northamptonshire, UK",52.240479,-0.902656
-"Rugby, Warwickshire, UK",52.370876,-1.265032
-"Sutton Coldfield, West Midlands, UK",52.570385,-1.824042
-"Harlow, Essex, UK",51.772938,0.102310
-"Aberdeen, Aberdeen City, UK",57.149651,-2.099075
-"Swansea, Swansea, UK",51.621441,-3.943646
-"Chesterfield, Derbyshire, UK",53.235046,-1.421629
-"Londonderry, Derry, UK",55.006763,-7.318268
-"Salisbury, Wiltshire, UK",51.068787,-1.794472
-"Weymouth, Dorset, UK",50.614429,-2.457621
-"Wolverhampton, West Midlands, UK",52.591370,-2.110748
-"Preston, Lancashire, UK",53.765762,-2.692337
-"Bournemouth, UK",50.720806,-1.904755
-"Doncaster, South Yorkshire, UK",53.522820,-1.128462
-"Ayr, South Ayrshire, UK",55.458565,-4.629179
-"Hastings, East Sussex, UK",50.854259,0.573453
-"Bedford, UK",52.136436,-0.460739
-"Basildon, Essex, UK",51.572376,0.470009
-"Chippenham, Wiltshire, UK",51.458057,-2.116074
-"Belfast, UK",54.607868,-5.926437
-"Uckfield, East Sussex, UK",50.967941,0.085831
-"Worthing, West Sussex, UK",50.825024,-0.383835
-"Leeds, West Yorkshire, UK",53.801277,-1.548567
-"Kendal, Cumbria, UK",54.328506,-2.743870
-"Plymouth, UK",50.376289,-4.143841
-"Haverhill, Suffolk, UK",52.080875,0.444517
-"Frankton, Warwickshire, UK",52.328415,-1.377561
-"Inverness, the UK",57.477772,-4.224721
\ No newline at end of file
diff --git a/rust/arrow/test/data/various_types.csv b/rust/arrow/test/data/various_types.csv
deleted file mode 100644
index 8f4466fbe6a..00000000000
--- a/rust/arrow/test/data/various_types.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-c_int|c_float|c_string|c_bool|c_date|c_datetime
-1|1.1|"1.11"|true|1970-01-01|1970-01-01T00:00:00
-2|2.2|"2.22"|true|2020-11-08|2020-11-08T01:00:00
-3||"3.33"|true|1969-12-31|1969-11-08T02:00:00
-4|4.4||false||
-5|6.6|""|false|1990-01-01|1990-01-01T03:00:00
\ No newline at end of file
diff --git a/rust/arrow/test/data/various_types_invalid.csv b/rust/arrow/test/data/various_types_invalid.csv
deleted file mode 100644
index 6f059cb73e6..00000000000
--- a/rust/arrow/test/data/various_types_invalid.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-c_int|c_float|c_string|c_bool
-1|1.1|"1.11"|true
-2|2.2|"2.22"|true
-3||"3.33"|true
-4|4.x4||false
-5|6.6|""|false
\ No newline at end of file
diff --git a/rust/ballista/README.md b/rust/ballista/README.md
deleted file mode 100644
index 288386f0161..00000000000
--- a/rust/ballista/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista: Distributed Compute with Apache Arrow
-
-Ballista is a distributed compute platform primarily implemented in Rust, and powered by Apache Arrow. It is built 
-on an architecture that allows other programming languages (such as Python, C++, and Java) to be supported as 
-first-class citizens without paying a penalty for serialization costs.
-
-The foundational technologies in Ballista are:
-
-- [Apache Arrow](https://arrow.apache.org/) memory model and compute kernels for efficient processing of data.
-- [Apache Arrow Flight Protocol](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for efficient 
-  data transfer between processes.
-- [Google Protocol Buffers](https://developers.google.com/protocol-buffers) for serializing query plans.
-- [Docker](https://www.docker.com/) for packaging up executors along with user-defined code.
-
-Ballista can be deployed as a standalone cluster and also supports [Kubernetes](https://kubernetes.io/). In either
-case, the scheduler can be configured to use [etcd](https://etcd.io/) as a backing store to (eventually) provide
-redundancy in the case of a scheduler failing.
-
-# How does this compare to Apache Spark?
-
-Although Ballista is largely inspired by Apache Spark, there are some key differences.
-
-- The choice of Rust as the main execution language means that memory usage is deterministic and avoids the overhead of
-  GC pauses.
-- Ballista is designed from the ground up to use columnar data, enabling a number of efficiencies such as vectorized
-  processing (SIMD and GPU) and efficient compression. Although Spark does have some columnar support, it is still
-  largely row-based today.
-- The combination of Rust and Arrow provides excellent memory efficiency and memory usage can be 5x - 10x lower than
-  Apache Spark in some cases, which means that more processing can fit on a single node, reducing the overhead of
-  distributed compute.
-- The use of Apache Arrow as the memory model and network protocol means that data can be exchanged between executors
-  in any programming language with minimal serialization overhead.
-
-# Status
-
-The Ballista project was donated to Apache Arrow in April 2021 and work is underway to integrate more tightly with 
-DataFusion.
-
-One of the goals is to implement a common scheduler that can seamlessly scale queries across cores in DataFusion and 
-across nodes in Ballista.
-
-Ballista issues are tracked in ASF JIRA [here](https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20component%20%3D%20%22Rust%20-%20Ballista%22)
-
-
-
diff --git a/rust/ballista/dev/build-rust-base.sh b/rust/ballista/dev/build-rust-base.sh
deleted file mode 100755
index ee4b32c8e69..00000000000
--- a/rust/ballista/dev/build-rust-base.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-BALLISTA_VERSION=0.4.2-SNAPSHOT
-set -e
-docker build -t ballistacompute/rust-base:$BALLISTA_VERSION -f docker/rust-base.dockerfile .
diff --git a/rust/ballista/dev/build-rust.sh b/rust/ballista/dev/build-rust.sh
deleted file mode 100755
index 1916f8efbef..00000000000
--- a/rust/ballista/dev/build-rust.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-BALLISTA_VERSION=0.4.2-SNAPSHOT
-
-set -e
-
-docker build -t ballistacompute/ballista-rust:$BALLISTA_VERSION -f docker/rust.dockerfile .
diff --git a/rust/ballista/dev/integration-tests.sh b/rust/ballista/dev/integration-tests.sh
deleted file mode 100755
index cc34a5ce91f..00000000000
--- a/rust/ballista/dev/integration-tests.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -e
-./dev/build-rust.sh
-pushd rust/benchmarks/tpch
-./tpch-gen.sh
-
-docker-compose up -d
-docker-compose run ballista-client ./run.sh
-docker-compose down
-
-popd
diff --git a/rust/ballista/docker/README.md b/rust/ballista/docker/README.md
deleted file mode 100644
index 8417d04c492..00000000000
--- a/rust/ballista/docker/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista Docker Images
-
-Pre-built docker images are available from [Docker Hub](https://hub.docker.com/orgs/ballistacompute/repositories) but here are the commands to build the images from source.
-
-Run these commands from the root directory of the project.
-
-```bash
-./dev/build-all.sh
-```
-
diff --git a/rust/ballista/docker/rust-base.dockerfile b/rust/ballista/docker/rust-base.dockerfile
deleted file mode 100644
index 4519225d219..00000000000
--- a/rust/ballista/docker/rust-base.dockerfile
+++ /dev/null
@@ -1,99 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-
-
-# Base image extends debian:buster-slim
-FROM rust:1.49.0-buster AS builder
-
-RUN apt update && apt -y install musl musl-dev musl-tools libssl-dev openssl
-
-#NOTE: the following was copied from https://github.com/emk/rust-musl-builder/blob/master/Dockerfile under Apache 2.0 license
-
-# The OpenSSL version to use. We parameterize this because many Rust
-# projects will fail to build with 1.1.
-#ARG OPENSSL_VERSION=1.0.2r
-ARG OPENSSL_VERSION=1.1.1b
-
-# Build a static library version of OpenSSL using musl-libc.  This is needed by
-# the popular Rust `hyper` crate.
-#
-# We point /usr/local/musl/include/linux at some Linux kernel headers (not
-# necessarily the right ones) in an effort to compile OpenSSL 1.1's "engine"
-# component. It's possible that this will cause bizarre and terrible things to
-# happen. There may be "sanitized" header
-RUN echo "Building OpenSSL" && \
-    ls /usr/include/linux && \
-    mkdir -p /usr/local/musl/include && \
-    ln -s /usr/include/linux /usr/local/musl/include/linux && \
-    ln -s /usr/include/x86_64-linux-gnu/asm /usr/local/musl/include/asm && \
-    ln -s /usr/include/asm-generic /usr/local/musl/include/asm-generic && \
-    cd /tmp && \
-    curl -LO "https://www.openssl.org/source/openssl-$OPENSSL_VERSION.tar.gz" && \
-    tar xvzf "openssl-$OPENSSL_VERSION.tar.gz" && cd "openssl-$OPENSSL_VERSION" && \
-    env CC=musl-gcc ./Configure no-shared no-zlib -fPIC --prefix=/usr/local/musl -DOPENSSL_NO_SECURE_MEMORY linux-x86_64 && \
-    env C_INCLUDE_PATH=/usr/local/musl/include/ make depend && \
-    env C_INCLUDE_PATH=/usr/local/musl/include/ make && \
-    make install && \
-    rm /usr/local/musl/include/linux /usr/local/musl/include/asm /usr/local/musl/include/asm-generic && \
-    rm -r /tmp/*
-
-RUN echo "Building zlib" && \
-    cd /tmp && \
-    ZLIB_VERSION=1.2.11 && \
-    curl -LO "http://zlib.net/zlib-$ZLIB_VERSION.tar.gz" && \
-    tar xzf "zlib-$ZLIB_VERSION.tar.gz" && cd "zlib-$ZLIB_VERSION" && \
-    CC=musl-gcc ./configure --static --prefix=/usr/local/musl && \
-    make && make install && \
-    rm -r /tmp/*
-
-RUN echo "Building libpq" && \
-    cd /tmp && \
-    POSTGRESQL_VERSION=11.2 && \
-    curl -LO "https://ftp.postgresql.org/pub/source/v$POSTGRESQL_VERSION/postgresql-$POSTGRESQL_VERSION.tar.gz" && \
-    tar xzf "postgresql-$POSTGRESQL_VERSION.tar.gz" && cd "postgresql-$POSTGRESQL_VERSION" && \
-    CC=musl-gcc CPPFLAGS=-I/usr/local/musl/include LDFLAGS=-L/usr/local/musl/lib ./configure --with-openssl --without-readline --prefix=/usr/local/musl && \
-    cd src/interfaces/libpq && make all-static-lib && make install-lib-static && \
-    cd ../../bin/pg_config && make && make install && \
-    rm -r /tmp/*
-
-ENV OPENSSL_DIR=/usr/local/musl/ \
-    OPENSSL_INCLUDE_DIR=/usr/local/musl/include/ \
-    DEP_OPENSSL_INCLUDE=/usr/local/musl/include/ \
-    OPENSSL_LIB_DIR=/usr/local/musl/lib/ \
-    OPENSSL_STATIC=1 \
-    PQ_LIB_STATIC_X86_64_UNKNOWN_LINUX_MUSL=1 \
-    PG_CONFIG_X86_64_UNKNOWN_LINUX_GNU=/usr/bin/pg_config \
-    PKG_CONFIG_ALLOW_CROSS=true \
-    PKG_CONFIG_ALL_STATIC=true \
-    LIBZ_SYS_STATIC=1 \
-    TARGET=musl
-
-# The content copied mentioned in the NOTE above ends here.
-
-## Download the target for static linking.
-RUN rustup target add x86_64-unknown-linux-musl
-RUN cargo install cargo-build-deps
-
-# prepare toolchain
-RUN rustup update && \
-    rustup component add rustfmt
\ No newline at end of file
diff --git a/rust/ballista/docker/rust.dockerfile b/rust/ballista/docker/rust.dockerfile
deleted file mode 100644
index 8b06af3dc78..00000000000
--- a/rust/ballista/docker/rust.dockerfile
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-
-ARG RELEASE_FLAG=--release
-FROM ballistacompute/rust-base:0.4.0-20210213 AS base
-WORKDIR /tmp/ballista
-RUN apt-get -y install cmake
-RUN cargo install cargo-chef 
-
-FROM base as planner
-COPY rust .
-RUN cargo chef prepare --recipe-path recipe.json
-
-FROM base as cacher
-COPY --from=planner /tmp/ballista/recipe.json recipe.json
-RUN cargo chef cook $RELEASE_FLAG --recipe-path recipe.json
-
-FROM base as builder
-COPY rust .
-COPY --from=cacher /tmp/ballista/target target
-ARG RELEASE_FLAG=--release
-
-# force build.rs to run to generate configure_me code.
-ENV FORCE_REBUILD='true'
-RUN cargo build $RELEASE_FLAG
-
-# put the executor on /executor (need to be copied from different places depending on FLAG)
-ENV RELEASE_FLAG=${RELEASE_FLAG}
-RUN if [ -z "$RELEASE_FLAG" ]; then mv /tmp/ballista/target/debug/ballista-executor /executor; else mv /tmp/ballista/target/release/ballista-executor /executor; fi
-
-# put the scheduler on /scheduler (need to be copied from different places depending on FLAG)
-ENV RELEASE_FLAG=${RELEASE_FLAG}
-RUN if [ -z "$RELEASE_FLAG" ]; then mv /tmp/ballista/target/debug/ballista-scheduler /scheduler; else mv /tmp/ballista/target/release/ballista-scheduler /scheduler; fi
-
-# put the tpch on /tpch (need to be copied from different places depending on FLAG)
-ENV RELEASE_FLAG=${RELEASE_FLAG}
-RUN if [ -z "$RELEASE_FLAG" ]; then mv /tmp/ballista/target/debug/tpch /tpch; else mv /tmp/ballista/target/release/tpch /tpch; fi
-
-# Copy the binary into a new container for a smaller docker image
-FROM ballistacompute/rust-base:0.4.0-20210213
-
-COPY --from=builder /executor /
-
-COPY --from=builder /scheduler /
-
-COPY --from=builder /tpch /
-
-ENV RUST_LOG=info
-ENV RUST_BACKTRACE=full
-
-CMD ["/executor", "--local"]
diff --git a/rust/ballista/docs/README.md b/rust/ballista/docs/README.md
deleted file mode 100644
index 44c831d3780..00000000000
--- a/rust/ballista/docs/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Ballista Developer Documentation
-
-This directory contains documentation for developers that are contributing to Ballista. If you are looking for 
-end-user documentation for a published release, please start with the 
-[Ballista User Guide](https://ballistacompute.org/docs/) instead.
-
-## Architecture & Design
-
-- Read the [Architecture Overview](architecture.md) to get an understanding of the scheduler and executor 
-  processes and how distributed query execution works.
-
-## Build, Test, Release
-
-- Setting up a [Rust development environment](dev-env-rust.md).
-- Setting up a [Java development environment](dev-env-jvm.md).
-- Notes on building [Rust docker images](rust-docker.md)  
-- [Integration Testing](integration-testing.md)
-- [Release process](release-process.md)
-
diff --git a/rust/ballista/docs/architecture.md b/rust/ballista/docs/architecture.md
deleted file mode 100644
index a73b53a0870..00000000000
--- a/rust/ballista/docs/architecture.md
+++ /dev/null
@@ -1,75 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Ballista Architecture
-
-## Overview
-
-Ballista allows queries to be executed in a distributed cluster. A cluster consists of one or 
-more scheduler processes and one or more executor processes. See the following sections in this document for more
-details about these components.
-
-The scheduler accepts logical query plans and translates them into physical query plans using DataFusion and then 
-runs a secondary planning/optimization process to translate the physical query plan into a distributed physical 
-query plan. 
-
-This process breaks a query down into a number of query stages that can be executed independently. There are 
-dependencies between query stages and these dependencies form a directionally-acyclic graph (DAG) because a query 
-stage cannot start until its child query stages have completed.
-
-Each query stage has one or more partitions that can be processed in parallel by the available 
-executors in the cluster. This is the basic unit of scalability in Ballista.
-
-The following diagram shows the flow of requests and responses between the client, scheduler, and executor 
-processes. 
-
-![Query Execution Flow](images/query-execution.png)
-
-## Scheduler Process
-
-The scheduler process implements a gRPC interface (defined in 
-[ballista.proto](../rust/ballista/proto/ballista.proto)). The interface provides the following methods:
-
-| Method               | Description                                                          |
-|----------------------|----------------------------------------------------------------------|
-| ExecuteQuery         | Submit a logical query plan or SQL query for execution               |
-| GetExecutorsMetadata | Retrieves a list of executors that have registered with a scheduler  |
-| GetFileMetadata      | Retrieve metadata about files available in the cluster file system   |
-| GetJobStatus         | Get the status of a submitted query                                  |
-| RegisterExecutor     | Executors call this method to register themselves with the scheduler |
-
-The scheduler can run in standalone mode, or can be run in clustered mode using etcd as backing store for state.
-
-## Executor Process
-
-The executor process implements the Apache Arrow Flight gRPC interface and is responsible for:
-
-- Executing query stages and persisting the results to disk in Apache Arrow IPC Format
-- Making query stage results available as Flights so that they can be retrieved by other executors as well as by 
-  clients
-
-## Rust Client
-
-The Rust client provides a DataFrame API that is a thin wrapper around the DataFusion DataFrame and provides
-the means for a client to build a query plan for execution.
-
-The client executes the query plan by submitting an `ExecuteLogicalPlan` request to the scheduler and then calls
-`GetJobStatus` to check for completion. On completion, the client receives a list of locations for the Flights 
-containing the results for the query and will then connect to the appropriate executor processes to retrieve 
-those results.
-
diff --git a/rust/ballista/docs/dev-env-rust.md b/rust/ballista/docs/dev-env-rust.md
deleted file mode 100644
index bf50c9d9c91..00000000000
--- a/rust/ballista/docs/dev-env-rust.md
+++ /dev/null
@@ -1,38 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Setting up a Rust development environment
-
-You will need a standard Rust development environment. The easiest way to achieve this is by using rustup: https://rustup.rs/
-
-## Install OpenSSL
-
-Follow instructions for [setting up OpenSSL](https://docs.rs/openssl/0.10.28/openssl/). For Ubuntu users, the following 
-command works.
-
-```bash
-sudo apt-get install pkg-config libssl-dev
-```
-
-## Install CMake
-
-You'll need cmake in order to compile some of ballista's dependencies. Ubuntu users can use the following command:
-
-```bash
-sudo apt-get install cmake
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/images/query-execution.png b/rust/ballista/docs/images/query-execution.png
deleted file mode 100644
index b35240282bc..00000000000
Binary files a/rust/ballista/docs/images/query-execution.png and /dev/null differ
diff --git a/rust/ballista/docs/integration-testing.md b/rust/ballista/docs/integration-testing.md
deleted file mode 100644
index 2a979b6ec34..00000000000
--- a/rust/ballista/docs/integration-testing.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Integration Testing
-
-Ballista has a [benchmark crate](https://github.com/ballista-compute/ballista/tree/main/rust/benchmarks/tpch) which is
-derived from TPC-H and this is currently the main form of integration testing. 
-
-The following command can be used to run the integration tests.
-
-```bash
-./dev/integration-tests.sh
-```
-
-Please refer to the
-[benchmark documentation](https://github.com/ballista-compute/ballista/blob/main/rust/benchmarks/tpch/README.md)
-for more information.
diff --git a/rust/ballista/docs/release-process.md b/rust/ballista/docs/release-process.md
deleted file mode 100644
index c6c45c3cf17..00000000000
--- a/rust/ballista/docs/release-process.md
+++ /dev/null
@@ -1,68 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Release Process
-
-These instructions are for project maintainers wishing to create public releases of Ballista.
-
-- Create a `release-0.4` branch or merge latest from `main` into an existing `release-0.4` branch.
-- Update version numbers using `./dev/bump-version.sh`
-- Run integration tests with `./dev/integration-tests.sh`
-- Push changes
-- Create `v0.4.x` release tag from the `release-0.4` branch
-- Publish Docker images
-- Publish crate if possible (if we're using a published version of Arrow)
-
-## Publishing Java artifacts to Maven Central
-
-The JVM artifacts are published to Maven central by uploading to sonatype. You will need to set the environment 
-variables `SONATYPE_USERNAME` and `SONATYPE_PASSWORD` to the correct values for your account and you will also need 
-verified GPG keys available for signing the artifacts (instructions tbd).
-
-Run the follow commands to publish the artifacts to a sonatype staging repository.
-
-```bash
-./dev/publish-jvm.sh
-```
-
-## Publishing Rust Artifacts
-
-Run the following script to publish the Rust crate to crates.io.
-
-```
-./dev/publish-rust.sh
-```
-
-## Publishing Docker Images
-
-Run the following script to publish the executor Docker images to Docker Hub.
-
-```
-./dev/publish-docker-images.sh
-```
-
-## GPG Notes
-
-Refer to [this article](https://help.github.com/en/github/authenticating-to-github/generating-a-new-gpg-key) for 
-instructions on setting up GPG keys. Some useful commands are:
-
-```bash
-gpg --full-generate-key
-gpg --export-secret-keys > ~/.gnupg/secring.gpg
-gpg --key-server keys.openpgp.org --send-keys KEYID
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/rust-docker.md b/rust/ballista/docs/rust-docker.md
deleted file mode 100644
index 0b94a1499a0..00000000000
--- a/rust/ballista/docs/rust-docker.md
+++ /dev/null
@@ -1,66 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-### How to build rust's docker image
-
-To build the docker image in development, use
-
-```
-docker build -f docker/rust.dockerfile -t ballistacompute/ballista-rust:latest .
-```
-
-This uses a multi-stage build, on which the build stage is called `builder`.
-Our github has this target cached, that we use to speed-up the build time:
-
-```
-export BUILDER_IMAGE=docker.pkg.github.com/ballista-compute/ballista/ballista-rust-builder:main
-
-docker login docker.pkg.github.com -u ... -p ...  # a personal access token to read from the read:packages
-docker pull $BUILDER_IMAGE
-
-docker build --cache-from $BUILDER_IMAGE -f docker/rust.dockerfile -t ballista:latest .
-```
-
-will build the image by re-using a cached image.
-
-### Docker images for development
-
-This project often requires testing on kubernetes. For this reason, we have a github workflow to push images to 
-github's registry, both from this repo and its forks.
-
-The basic principle is that every push to a git reference builds and publishes a docker image.
-Specifically, given a branch or tag `${REF}`,
-
-* `docker.pkg.github.com/ballista-compute/ballista/ballista-rust:${REF}` is the latest image from $REF
-* `docker.pkg.github.com/${USER}/ballista/ballista-rust:${REF}` is the latest image from $REF on your fork
-
-To pull them from a kubernetes cluster or your computer, you need to have a personal access token with scope `read:packages`,
-and login to the registry `docker.pkg.github.com`.
-
-The builder image - the large image with all the cargo caches - is available on the same registry as described above, and is also
-available in all forks and for all references.
-
-Please refer to the [rust workflow](.github/workflows/rust.yaml) and [rust dockerfile](docker/rust.dockerfile) for details on how we build and publish these images.
-
-### Get the binary
-
-If you do not aim to run this in docker but any linux-based machine, you can get the latest binary from a docker image on the registry: the binary is statically linked and thus runs on any linux-based machine. You can get it using
-
-```
-id=$(docker create $BUILDER_IMAGE) && docker cp $id:/executor executor && docker rm -v $id
-```
diff --git a/rust/ballista/docs/user-guide/.gitignore b/rust/ballista/docs/user-guide/.gitignore
deleted file mode 100644
index e662f99e328..00000000000
--- a/rust/ballista/docs/user-guide/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-ballista-book.tgz
-book
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/README.md b/rust/ballista/docs/user-guide/README.md
deleted file mode 100644
index 9ee3e90fcf6..00000000000
--- a/rust/ballista/docs/user-guide/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Ballista User Guide Source
-
-This directory contains the sources for the user guide that is published at https://ballistacompute.org/docs/.
-
-## Generate HTML
-
-```bash
-cargo install mdbook
-mdbook build
-```
-
-## Deploy User Guide to Web Site
-
-Requires ssh certificate to be available.
-
-```bash
-./deploy.sh
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/book.toml b/rust/ballista/docs/user-guide/book.toml
deleted file mode 100644
index cf1653d7455..00000000000
--- a/rust/ballista/docs/user-guide/book.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[book]
-authors = ["Andy Grove"]
-language = "en"
-multilingual = false
-src = "src"
-title = "Ballista User Guide"
diff --git a/rust/ballista/docs/user-guide/src/SUMMARY.md b/rust/ballista/docs/user-guide/src/SUMMARY.md
deleted file mode 100644
index c8fc2c8bd6a..00000000000
--- a/rust/ballista/docs/user-guide/src/SUMMARY.md
+++ /dev/null
@@ -1,30 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Summary
-
-- [Introduction](introduction.md)
-- [Create a Ballista Cluster](deployment.md)
-  - [Docker](standalone.md)
-  - [Docker Compose](docker-compose.md)
-  - [Kubernetes](kubernetes.md)
-  - [Ballista Configuration](configuration.md)
-- [Clients](clients.md)
-  - [Rust](client-rust.md)
-  - [Python](client-python.md)
-- [Frequently Asked Questions](faq.md)
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/client-rust.md b/rust/ballista/docs/user-guide/src/client-rust.md
deleted file mode 100644
index 048c10fc926..00000000000
--- a/rust/ballista/docs/user-guide/src/client-rust.md
+++ /dev/null
@@ -1,22 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-## Ballista Rust Client
-
-The Rust client supports a `DataFrame` API as well as SQL. See the 
-[TPC-H Benchmark Client](https://github.com/ballista-compute/ballista/tree/main/rust/benchmarks/tpch) for an example.
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/clients.md b/rust/ballista/docs/user-guide/src/clients.md
deleted file mode 100644
index 1e223dd8eb0..00000000000
--- a/rust/ballista/docs/user-guide/src/clients.md
+++ /dev/null
@@ -1,22 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-## Clients
-
-- [Rust](client-rust.md)
-- [Python](client-python.md)
diff --git a/rust/ballista/docs/user-guide/src/configuration.md b/rust/ballista/docs/user-guide/src/configuration.md
deleted file mode 100644
index 52b05b0e916..00000000000
--- a/rust/ballista/docs/user-guide/src/configuration.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Configuration 
-The rust executor and scheduler can be configured using toml files, environment variables and command line arguments. The specification for config options can be found in `rust/ballista/src/bin/[executor|scheduler]_config_spec.toml`. 
-
-Those files fully define Ballista's configuration. If there is a discrepancy between this documentation and the files, assume those files are correct.
-
-To get a list of command line arguments, run the binary with `--help`
-
-There is an example config file at `ballista/rust/ballista/examples/example_executor_config.toml`
-
-The order of precedence for arguments is: default config file < environment variables < specified config file < command line arguments. 
-
-The executor and scheduler will look for the default config file at `/etc/ballista/[executor|scheduler].toml` To specify a config file use the `--config-file` argument. 
-
-Environment variables are prefixed by `BALLISTA_EXECUTOR` or `BALLISTA_SCHEDULER` for the executor and scheduler respectively. Hyphens in command line arguments become underscores. For example, the `--scheduler-host` argument for the executor becomes `BALLISTA_EXECUTOR_SCHEDULER_HOST`
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/deployment.md b/rust/ballista/docs/user-guide/src/deployment.md
deleted file mode 100644
index 2432f2bebb1..00000000000
--- a/rust/ballista/docs/user-guide/src/deployment.md
+++ /dev/null
@@ -1,26 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Deployment
-
-Ballista is packaged as Docker images. Refer to the following guides to create a Ballista cluster:
-
-- [Create a cluster using Docker](standalone.md)
-- [Create a cluster using Docker Compose](docker-compose.md)
-- [Create a cluster using Kubernetes](kubernetes.md)
-
diff --git a/rust/ballista/docs/user-guide/src/docker-compose.md b/rust/ballista/docs/user-guide/src/docker-compose.md
deleted file mode 100644
index 2548e57e5a7..00000000000
--- a/rust/ballista/docs/user-guide/src/docker-compose.md
+++ /dev/null
@@ -1,55 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Installing Ballista with Docker Compose
-
-Docker Compose is a convenient way to launch a cluister when testing locally. The following Docker Compose example 
-demonstrates how to start a cluster using a single process that acts as both a scheduler and an executor, with a data 
-volume mounted into the container so that Ballista can access the host file system.
-
-```yaml
-version: '2.0'
-services:
-  etcd:
-    image: quay.io/coreos/etcd:v3.4.9
-    command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379"
-    ports:
-      - "2379:2379"
-  ballista-executor:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50051 --local"
-    environment:
-      - RUST_LOG=info
-    ports:
-      - "50050:50050"
-      - "50051:50051"
-    volumes:
-      - ./data:/data
-
-
-```
-
-With the above content saved to a `docker-compose.yaml` file, the following command can be used to start the single 
-node cluster.
-
-```bash
-docker-compose up
-```
-
-The scheduler listens on port 50050 and this is the port that clients will need to connect to.
diff --git a/rust/ballista/docs/user-guide/src/faq.md b/rust/ballista/docs/user-guide/src/faq.md
deleted file mode 100644
index b73a376988b..00000000000
--- a/rust/ballista/docs/user-guide/src/faq.md
+++ /dev/null
@@ -1,31 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Frequently Asked Questions
-
-## What is the relationship between Apache Arrow, DataFusion, and Ballista?
-
-Apache Arrow is a library which provides a standardized memory representation for columnar data. It also provides
-"kernels" for performing common operations on this data.
-
-DataFusion is a library for executing queries in-process using the Apache Arrow memory 
-model and computational kernels. It is designed to run within a single process, using threads 
-for parallel query execution. 
-
-Ballista is a distributed compute platform design to leverage DataFusion and other query
-execution libraries.
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/img/ballista-architecture.png b/rust/ballista/docs/user-guide/src/img/ballista-architecture.png
deleted file mode 100644
index 2f78f299c20..00000000000
Binary files a/rust/ballista/docs/user-guide/src/img/ballista-architecture.png and /dev/null differ
diff --git a/rust/ballista/docs/user-guide/src/introduction.md b/rust/ballista/docs/user-guide/src/introduction.md
deleted file mode 100644
index 59d7a1a2a5c..00000000000
--- a/rust/ballista/docs/user-guide/src/introduction.md
+++ /dev/null
@@ -1,52 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-## Overview
-
-Ballista is a distributed compute platform primarily implemented in Rust, and powered by Apache Arrow. It is 
-built on an architecture that allows other programming languages to be supported as first-class citizens without paying
-a penalty for serialization costs.
-
-The foundational technologies in Ballista are:
-
-- [Apache Arrow](https://arrow.apache.org/) memory model and compute kernels for efficient processing of data.
-- [Apache Arrow Flight Protocol](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for efficient data transfer between processes.
-- [Google Protocol Buffers](https://developers.google.com/protocol-buffers) for serializing query plans.
-- [Docker](https://www.docker.com/) for packaging up executors along with user-defined code.
-
-## Architecture
-
-The following diagram highlights some of the integrations that will be possible with this unique architecture. Note that not all components shown here are available yet.
-
-![Ballista Architecture Diagram](img/ballista-architecture.png)
-
-## How does this compare to Apache Spark?
-
-Although Ballista is largely inspired by Apache Spark, there are some key differences.
-
-- The choice of Rust as the main execution language means that memory usage is deterministic and avoids the overhead of GC pauses.
-- Ballista is designed from the ground up to use columnar data, enabling a number of efficiencies such as vectorized 
-processing (SIMD and GPU) and efficient compression. Although Spark does have some columnar support, it is still 
-largely row-based today.
-- The combination of Rust and Arrow provides excellent memory efficiency and memory usage can be 5x - 10x lower than Apache Spark in some cases, which means that more processing can fit on a single node, reducing the overhead of distributed compute.
-- The use of Apache Arrow as the memory model and network protocol means that data can be exchanged between executors in any programming language with minimal serialization overhead.
-  
-## Status
-
-Ballista is at the proof-of-concept phase currently but is under active development by a growing community.
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/kubernetes.md b/rust/ballista/docs/user-guide/src/kubernetes.md
deleted file mode 100644
index 8cd8beeb267..00000000000
--- a/rust/ballista/docs/user-guide/src/kubernetes.md
+++ /dev/null
@@ -1,216 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Deploying Ballista with Kubernetes
-
-Ballista can be deployed to any Kubernetes cluster using the following instructions. These instructions assume that
-you are already comfortable with managing Kubernetes deployments.
-
-The k8s deployment consists of:
-
-- k8s stateful set for one or more scheduler processes
-- k8s stateful set for one or more executor processes
-- k8s service to route traffic to the schedulers
-- k8s persistent volume and persistent volume claims to make local data accessible to Ballista
-
-## Limitations
-
-Ballista is at an early stage of development and therefore has some significant limitations:
-
-- There is no support for shared object stores such as S3. All data must exist locally on each node in the 
-  cluster, including where any client process runs  (until 
-  [#473](https://github.com/ballista-compute/ballista/issues/473) is resolved).
-- Only a single scheduler instance is currently supported unless the scheduler is configured to use `etcd` as a 
-  backing store.
-
-## Create Persistent Volume and Persistent Volume Claim 
-
-Copy the following yaml to a `pv.yaml` file and apply to the cluster to create a persistent volume and a persistent 
-volume claim so that the specified host directory is available to the containers. This is where any data should be 
-located so that Ballista can execute queries against it.
-
-```yaml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: data-pv
-  labels:
-    type: local
-spec:
-  storageClassName: manual
-  capacity:
-    storage: 10Gi
-  accessModes:
-    - ReadWriteOnce
-  hostPath:
-    path: "/mnt"
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: data-pv-claim
-spec:
-  storageClassName: manual
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 3Gi
-```
-
-To apply this yaml:
-
-```bash
-kubectl apply -f pv.yaml
-```
-
-You should see the following output:
-
-```bash
-persistentvolume/data-pv created
-persistentvolumeclaim/data-pv-claim created
-```
-
-## Deploying Ballista Scheduler and Executors
-
-Copy the following yaml to a `cluster.yaml` file.
-
-```yaml
-apiVersion: v1
-kind: Service
-metadata:
-  name: ballista-scheduler
-  labels:
-    app: ballista-scheduler
-spec:
-  ports:
-    - port: 50050
-      name: scheduler
-  clusterIP: None
-  selector:
-    app: ballista-scheduler
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: ballista-scheduler
-spec:
-  serviceName: "ballista-scheduler"
-  replicas: 1
-  selector:
-    matchLabels:
-      app: ballista-scheduler
-  template:
-    metadata:
-      labels:
-        app: ballista-scheduler
-        ballista-cluster: ballista
-    spec:
-      containers:
-      - name: ballista-scheduler
-        image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-        command: ["/scheduler"]
-        args: ["--port=50050"]
-        ports:
-          - containerPort: 50050
-            name: flight
-        volumeMounts:
-          - mountPath: /mnt
-            name: data
-      volumes:
-      - name: data
-        persistentVolumeClaim:
-          claimName: data-pv-claim
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: ballista-executor
-spec:
-  serviceName: "ballista-scheduler"
-  replicas: 2
-  selector:
-    matchLabels:
-      app: ballista-executor
-  template:
-    metadata:
-      labels:
-        app: ballista-executor
-        ballista-cluster: ballista
-    spec:
-      containers:
-        - name: ballista-executor
-          image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-          command: ["/executor"]
-          args: ["--port=50051", "--scheduler-host=ballista-scheduler", "--scheduler-port=50050", "--external-host=$(MY_POD_IP)"]
-          env:
-            - name: MY_POD_IP
-              valueFrom:
-                fieldRef:
-                  fieldPath: status.podIP            
-          ports:
-            - containerPort: 50051
-              name: flight
-          volumeMounts:
-            - mountPath: /mnt
-              name: data
-      volumes:
-        - name: data
-          persistentVolumeClaim:
-            claimName: data-pv-claim
-```
-
-```bash
-$ kubectl apply -f cluster.yaml
-```
-
-This should show the following output:
-
-```
-service/ballista-scheduler created
-statefulset.apps/ballista-scheduler created
-statefulset.apps/ballista-executor created
-```
-
-You can also check status by running `kubectl get pods`:
-
-```bash
-$ kubectl get pods
-NAME                   READY   STATUS    RESTARTS   AGE
-busybox                1/1     Running   0          16m
-ballista-scheduler-0   1/1     Running   0          42s
-ballista-executor-0    1/1     Running   2          42s
-ballista-executor-1    1/1     Running   0          26s
-```
-
-You can view the scheduler logs with `kubectl logs ballista-scheduler-0`:
-
-```
-$ kubectl logs ballista-scheduler-0
-[2021-02-19T00:24:01Z INFO  scheduler] Ballista v0.4.2-SNAPSHOT Scheduler listening on 0.0.0.0:50050
-[2021-02-19T00:24:16Z INFO  ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "b5e81711-1c5c-46ec-8522-d8b359793188", host: "10.1.23.149", port: 50051 }
-[2021-02-19T00:24:17Z INFO  ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "816e4502-a876-4ed8-b33f-86d243dcf63f", host: "10.1.23.150", port: 50051 }
-```
-
-## Deleting the Ballista cluster
-
-Run the following kubectl command to delete the cluster.
-
-```bash
-kubectl delete -f cluster.yaml
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/standalone.md b/rust/ballista/docs/user-guide/src/standalone.md
deleted file mode 100644
index e4c24fedd31..00000000000
--- a/rust/ballista/docs/user-guide/src/standalone.md
+++ /dev/null
@@ -1,92 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-## Deploying a standalone Ballista cluster
-
-### Start a Scheduler
-
-Start a scheduler using the following syntax:
-
-```bash
-docker run --network=host \
-  -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /scheduler --port 50050
-```
-
-Run `docker ps` to check that the process is running:
-
-```
-$ docker ps
-CONTAINER ID   IMAGE                                         COMMAND                  CREATED         STATUS         PORTS     NAMES
-59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --port 5…"   6 seconds ago   Up 5 seconds             affectionate_hofstadter
-```
-
-Run `docker logs CONTAINER_ID` to check the output from the process:
-
-```
-$ docker logs 59452ce72138
-[2021-02-14T18:32:20Z INFO  scheduler] Ballista v0.4.2-SNAPSHOT Scheduler listening on 0.0.0.0:50050
-```
-
-### Start executors
-
-Start one or more executor processes. Each executor process will need to listen on a different port.
-
-```bash
-docker run --network=host \
-  -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /executor --external-host localhost --port 50051 
-```
-
-Use `docker ps` to check that both the scheduer and executor(s) are now running:
-
-```
-$ docker ps
-CONTAINER ID   IMAGE                                         COMMAND                  CREATED         STATUS         PORTS     NAMES
-0746ce262a19   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/executor --externa…"   2 seconds ago   Up 1 second              naughty_mclean
-59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --port 5…"   4 minutes ago   Up 4 minutes             affectionate_hofstadter
-```
-
-Use `docker logs CONTAINER_ID` to check the output from the executor(s):
-
-```
-$ docker logs 0746ce262a19
-[2021-02-14T18:36:25Z INFO  executor] Running with config: ExecutorConfig { host: "localhost", port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 }
-[2021-02-14T18:36:25Z INFO  executor] Ballista v0.4.2-SNAPSHOT Rust Executor listening on 0.0.0.0:50051
-[2021-02-14T18:36:25Z INFO  executor] Starting registration with scheduler
-```
-
-The external host and port will be registered with the scheduler. The executors will discover other executors by 
-requesting a list of executors from the scheduler.
-
-### Using etcd as backing store
-
-_NOTE: This functionality is currently experimental_
-
-Ballista can optionally use [etcd](https://etcd.io/) as a backing store for the scheduler. 
-
-```bash
-docker run --network=host \
-  -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /scheduler --port 50050 \
-  --config-backend etcd \
-  --etcd-urls etcd:2379
-```
-
-Please refer to the [etcd](https://etcd.io/) web site for installation instructions. Etcd version 3.4.9 or later is 
-recommended.
diff --git a/rust/ballista/rust/.dockerignore b/rust/ballista/rust/.dockerignore
deleted file mode 100644
index 96f99a522ad..00000000000
--- a/rust/ballista/rust/.dockerignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-target
\ No newline at end of file
diff --git a/rust/ballista/rust/.gitignore b/rust/ballista/rust/.gitignore
deleted file mode 100644
index 97eec164046..00000000000
--- a/rust/ballista/rust/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-target
-temp
\ No newline at end of file
diff --git a/rust/ballista/rust/Cargo.toml b/rust/ballista/rust/Cargo.toml
deleted file mode 100644
index d1f588f3bd7..00000000000
--- a/rust/ballista/rust/Cargo.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[workspace]
-
-members = [
-    "benchmarks/tpch",
-    "client",
-    "core",
-    "executor",
-    "scheduler",
-]
-
-[profile.release]
-lto = true
-codegen-units = 1
diff --git a/rust/ballista/rust/benchmarks/tpch/.dockerignore b/rust/ballista/rust/benchmarks/tpch/.dockerignore
deleted file mode 100644
index 3a7d0fdaa06..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/.dockerignore
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-
-data
-target
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/.gitignore b/rust/ballista/rust/benchmarks/tpch/.gitignore
deleted file mode 100644
index 6320cd248dd..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-data
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/Cargo.toml b/rust/ballista/rust/benchmarks/tpch/Cargo.toml
deleted file mode 100644
index 55a0fe1330c..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/Cargo.toml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "tpch"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-edition = "2018"
-
-[dependencies]
-ballista = { path="../../client" }
-
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
-parquet = { git = "https://github.com/apache/arrow", rev="46161d2" }
-
-
-env_logger = "0.8"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-structopt = "0.3"
diff --git a/rust/ballista/rust/benchmarks/tpch/README.md b/rust/ballista/rust/benchmarks/tpch/README.md
deleted file mode 100644
index 6d77694b91b..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/README.md
+++ /dev/null
@@ -1,104 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# TPC-H Benchmarks
-
-TPC-H is an industry standard benchmark for testing databases and query engines. A command-line tool is available that
-can generate the raw test data at any given scale factor (scale factor refers to the amount of data to be generated).
-
-## Generating Test Data
-
-TPC-H data can be generated using the `tpch-gen.sh` script, which creates a Docker image containing the TPC-DS data
-generator.
-
-```bash
-./tpch-gen.sh
-```
-
-Data will be generated into the `data` subdirectory and will not be checked in because this directory has been added 
-to the `.gitignore` file.
-
-## Running the Benchmarks
-
-To run the benchmarks it is necessary to have at least one Ballista scheduler and one Ballista executor running.
-
-To run the scheduler from source:
-
-```bash
-cd $ARROW_HOME/rust/ballista/rust/scheduler
-RUST_LOG=info cargo run --release
-```
-
-By default the scheduler will bind to `0.0.0.0` and listen on port 50050.
-
-To run the executor from source:
-
-```bash
-cd $ARROW_HOME/rust/ballista/rust/executor
-RUST_LOG=info cargo run --release
-```
-
-By default the executor will bind to `0.0.0.0` and listen on port 50051.
-
-You can add SIMD/snmalloc/LTO flags to improve speed (with longer build times):
-
-```
-RUST_LOG=info RUSTFLAGS='-C target-cpu=native -C lto -C codegen-units=1 -C embed-bitcode' cargo run --release --bin executor --features "simd snmalloc" --target x86_64-unknown-linux-gnu
-```
-
-To run the benchmarks:
-
-```bash
-cd $ARROW_HOME/rust/ballista/rust/benchmarks/tpch
-cargo run --release benchmark --host localhost --port 50050 --query 1 --path $(pwd)/data --format tbl
-```
-
-## Running the Benchmarks on docker-compose
-
-To start a Rust scheduler and executor using Docker Compose:
-
-```bash
-cd $BALLISTA_HOME
-./dev/build-rust.sh
-cd $BALLISTA_HOME/rust/benchmarks/tpch
-docker-compose up
-```
-
-Then you can run the benchmark with:
-
-```bash
-docker-compose run ballista-client cargo run benchmark --host ballista-scheduler --port 50050 --query 1 --path /data --format tbl
-```
-
-## Expected output
-
-The result of query 1 should produce the following output when executed against the SF=1 dataset.
-
-```
-+--------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+----------------------+-------------+
-| l_returnflag | l_linestatus | sum_qty  | sum_base_price     | sum_disc_price     | sum_charge         | avg_qty            | avg_price          | avg_disc             | count_order |
-+--------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+----------------------+-------------+
-| A            | F            | 37734107 | 56586554400.73001  | 53758257134.870026 | 55909065222.82768  | 25.522005853257337 | 38273.12973462168  | 0.049985295838396455 | 1478493     |
-| N            | F            | 991417   | 1487504710.3799996 | 1413082168.0541    | 1469649223.1943746 | 25.516471920522985 | 38284.467760848296 | 0.05009342667421622  | 38854       |
-| N            | O            | 74476023 | 111701708529.50996 | 106118209986.10472 | 110367023144.56622 | 25.502229680934594 | 38249.1238377803   | 0.049996589476752576 | 2920373     |
-| R            | F            | 37719753 | 56568041380.90001  | 53741292684.60399  | 55889619119.83194  | 25.50579361269077  | 38250.854626099666 | 0.05000940583012587  | 1478870     |
-+--------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+----------------------+-------------+
-Query 1 iteration 0 took 1956.1 ms
-Query 1 avg time: 1956.11 ms
-```
diff --git a/rust/ballista/rust/benchmarks/tpch/docker-compose.yaml b/rust/ballista/rust/benchmarks/tpch/docker-compose.yaml
deleted file mode 100644
index f872ce16e2d..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/docker-compose.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-version: '2.0'
-services:
-  etcd:
-    image: quay.io/coreos/etcd:v3.4.9
-    command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379"
-  ballista-scheduler:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --port 50050"
-    environment:
-      - RUST_LOG=ballista=debug
-    volumes:
-      - ./data:/data
-    depends_on:
-      - etcd
-  ballista-executor-1:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50051 --external-host ballista-executor-1 --scheduler-host ballista-scheduler"
-    environment:
-      - RUST_LOG=info
-    volumes:
-      - ./data:/data
-    depends_on:
-      - ballista-scheduler
-  ballista-executor-2:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50052 --external-host ballista-executor-2 --scheduler-host ballista-scheduler"
-    environment:
-      - RUST_LOG=info
-    volumes:
-      - ./data:/data
-    depends_on:
-      - ballista-scheduler
-  ballista-client:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/bin/sh" # do nothing
-    working_dir: /ballista/benchmarks/tpch
-    environment:
-      - RUST_LOG=info
-    volumes:
-      - ./data:/data
-      - ../..:/ballista
-    depends_on:
-      - ballista-scheduler
-      - ballista-executor-1
-      - ballista-executor-2
-
diff --git a/rust/ballista/rust/benchmarks/tpch/entrypoint.sh b/rust/ballista/rust/benchmarks/tpch/entrypoint.sh
deleted file mode 100755
index 71c04324afd..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/entrypoint.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-cd /tpch-dbgen
-./dbgen -vf -s 1
-mv *.tbl /data
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q1.sql b/rust/ballista/rust/benchmarks/tpch/queries/q1.sql
deleted file mode 100644
index a0fcf159e20..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q1.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-select
-    l_returnflag,
-    l_linestatus,
-    sum(l_quantity) as sum_qty,
-    sum(l_extendedprice) as sum_base_price,
-    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
-    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
-    avg(l_quantity) as avg_qty,
-    avg(l_extendedprice) as avg_price,
-    avg(l_discount) as avg_disc,
-    count(*) as count_order
-from
-    lineitem
-where
-        l_shipdate <= date '1998-09-02'
-group by
-    l_returnflag,
-    l_linestatus
-order by
-    l_returnflag,
-    l_linestatus;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q10.sql b/rust/ballista/rust/benchmarks/tpch/queries/q10.sql
deleted file mode 100644
index cf45e43485f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q10.sql
+++ /dev/null
@@ -1,31 +0,0 @@
-select
-    c_custkey,
-    c_name,
-    sum(l_extendedprice * (1 - l_discount)) as revenue,
-    c_acctbal,
-    n_name,
-    c_address,
-    c_phone,
-    c_comment
-from
-    customer,
-    orders,
-    lineitem,
-    nation
-where
-        c_custkey = o_custkey
-  and l_orderkey = o_orderkey
-  and o_orderdate >= date '1993-10-01'
-  and o_orderdate < date '1994-01-01'
-  and l_returnflag = 'R'
-  and c_nationkey = n_nationkey
-group by
-    c_custkey,
-    c_name,
-    c_acctbal,
-    c_phone,
-    n_name,
-    c_address,
-    c_comment
-order by
-    revenue desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q11.sql b/rust/ballista/rust/benchmarks/tpch/queries/q11.sql
deleted file mode 100644
index c23ed1c71bf..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q11.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-select
-    ps_partkey,
-    sum(ps_supplycost * ps_availqty) as value
-from
-    partsupp,
-    supplier,
-    nation
-where
-    ps_suppkey = s_suppkey
-  and s_nationkey = n_nationkey
-  and n_name = 'GERMANY'
-group by
-    ps_partkey having
-    sum(ps_supplycost * ps_availqty) > (
-    select
-    sum(ps_supplycost * ps_availqty) * 0.0001
-    from
-    partsupp,
-    supplier,
-    nation
-    where
-    ps_suppkey = s_suppkey
-                  and s_nationkey = n_nationkey
-                  and n_name = 'GERMANY'
-    )
-order by
-    value desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q12.sql b/rust/ballista/rust/benchmarks/tpch/queries/q12.sql
deleted file mode 100644
index f8e6d960c84..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q12.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-select
-    l_shipmode,
-    sum(case
-            when o_orderpriority = '1-URGENT'
-                or o_orderpriority = '2-HIGH'
-                then 1
-            else 0
-        end) as high_line_count,
-    sum(case
-            when o_orderpriority <> '1-URGENT'
-                and o_orderpriority <> '2-HIGH'
-                then 1
-            else 0
-        end) as low_line_count
-from
-    lineitem
-        join
-    orders
-    on
-            l_orderkey = o_orderkey
-where
-        l_shipmode in ('MAIL', 'SHIP')
-  and l_commitdate < l_receiptdate
-  and l_shipdate < l_commitdate
-  and l_receiptdate >= date '1994-01-01'
-  and l_receiptdate < date '1995-01-01'
-group by
-    l_shipmode
-order by
-    l_shipmode;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q13.sql b/rust/ballista/rust/benchmarks/tpch/queries/q13.sql
deleted file mode 100644
index 4bfe8c35553..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q13.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-select
-    c_count,
-    count(*) as custdist
-from
-    (
-        select
-            c_custkey,
-            count(o_orderkey)
-        from
-            customer left outer join orders on
-                        c_custkey = o_custkey
-                    and o_comment not like '%special%requests%'
-        group by
-            c_custkey
-    ) as c_orders (c_custkey, c_count)
-group by
-    c_count
-order by
-    custdist desc,
-    c_count desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q14.sql b/rust/ballista/rust/benchmarks/tpch/queries/q14.sql
deleted file mode 100644
index d8ef6afaca9..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q14.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-select
-            100.00 * sum(case
-                             when p_type like 'PROMO%'
-                                 then l_extendedprice * (1 - l_discount)
-                             else 0
-            end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
-from
-    lineitem,
-    part
-where
-        l_partkey = p_partkey
-  and l_shipdate >= date '1995-09-01'
-  and l_shipdate < date '1995-10-01';
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q16.sql b/rust/ballista/rust/benchmarks/tpch/queries/q16.sql
deleted file mode 100644
index 36b7c07c164..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q16.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-select
-    p_brand,
-    p_type,
-    p_size,
-    count(distinct ps_suppkey) as supplier_cnt
-from
-    partsupp,
-    part
-where
-        p_partkey = ps_partkey
-  and p_brand <> 'Brand#45'
-  and p_type not like 'MEDIUM POLISHED%'
-  and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
-  and ps_suppkey not in (
-    select
-        s_suppkey
-    from
-        supplier
-    where
-            s_comment like '%Customer%Complaints%'
-)
-group by
-    p_brand,
-    p_type,
-    p_size
-order by
-    supplier_cnt desc,
-    p_brand,
-    p_type,
-    p_size;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q17.sql b/rust/ballista/rust/benchmarks/tpch/queries/q17.sql
deleted file mode 100644
index 1e65550634f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q17.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-select
-        sum(l_extendedprice) / 7.0 as avg_yearly
-from
-    lineitem,
-    part
-where
-        p_partkey = l_partkey
-  and p_brand = 'Brand#23'
-  and p_container = 'MED BOX'
-  and l_quantity < (
-    select
-            0.2 * avg(l_quantity)
-    from
-        lineitem
-    where
-            l_partkey = p_partkey
-);
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q18.sql b/rust/ballista/rust/benchmarks/tpch/queries/q18.sql
deleted file mode 100644
index 835de28a57b..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q18.sql
+++ /dev/null
@@ -1,32 +0,0 @@
-select
-    c_name,
-    c_custkey,
-    o_orderkey,
-    o_orderdate,
-    o_totalprice,
-    sum(l_quantity)
-from
-    customer,
-    orders,
-    lineitem
-where
-        o_orderkey in (
-        select
-            l_orderkey
-        from
-            lineitem
-        group by
-            l_orderkey having
-                sum(l_quantity) > 300
-    )
-  and c_custkey = o_custkey
-  and o_orderkey = l_orderkey
-group by
-    c_name,
-    c_custkey,
-    o_orderkey,
-    o_orderdate,
-    o_totalprice
-order by
-    o_totalprice desc,
-    o_orderdate;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q19.sql b/rust/ballista/rust/benchmarks/tpch/queries/q19.sql
deleted file mode 100644
index 56668e73f86..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q19.sql
+++ /dev/null
@@ -1,35 +0,0 @@
-select
-    sum(l_extendedprice* (1 - l_discount)) as revenue
-from
-    lineitem,
-    part
-where
-    (
-                p_partkey = l_partkey
-            and p_brand = 'Brand#12'
-            and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
-            and l_quantity >= 1 and l_quantity <= 1 + 10
-            and p_size between 1 and 5
-            and l_shipmode in ('AIR', 'AIR REG')
-            and l_shipinstruct = 'DELIVER IN PERSON'
-        )
-   or
-    (
-                p_partkey = l_partkey
-            and p_brand = 'Brand#23'
-            and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
-            and l_quantity >= 10 and l_quantity <= 10 + 10
-            and p_size between 1 and 10
-            and l_shipmode in ('AIR', 'AIR REG')
-            and l_shipinstruct = 'DELIVER IN PERSON'
-        )
-   or
-    (
-                p_partkey = l_partkey
-            and p_brand = 'Brand#34'
-            and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
-            and l_quantity >= 20 and l_quantity <= 20 + 10
-            and p_size between 1 and 15
-            and l_shipmode in ('AIR', 'AIR REG')
-            and l_shipinstruct = 'DELIVER IN PERSON'
-        );
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q2.sql b/rust/ballista/rust/benchmarks/tpch/queries/q2.sql
deleted file mode 100644
index f66af210205..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q2.sql
+++ /dev/null
@@ -1,43 +0,0 @@
-select
-    s_acctbal,
-    s_name,
-    n_name,
-    p_partkey,
-    p_mfgr,
-    s_address,
-    s_phone,
-    s_comment
-from
-    part,
-    supplier,
-    partsupp,
-    nation,
-    region
-where
-        p_partkey = ps_partkey
-  and s_suppkey = ps_suppkey
-  and p_size = 15
-  and p_type like '%BRASS'
-  and s_nationkey = n_nationkey
-  and n_regionkey = r_regionkey
-  and r_name = 'EUROPE'
-  and ps_supplycost = (
-    select
-        min(ps_supplycost)
-    from
-        partsupp,
-        supplier,
-        nation,
-        region
-    where
-            p_partkey = ps_partkey
-      and s_suppkey = ps_suppkey
-      and s_nationkey = n_nationkey
-      and n_regionkey = r_regionkey
-      and r_name = 'EUROPE'
-)
-order by
-    s_acctbal desc,
-    n_name,
-    s_name,
-    p_partkey;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q20.sql b/rust/ballista/rust/benchmarks/tpch/queries/q20.sql
deleted file mode 100644
index f0339a6013c..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q20.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-select
-    s_name,
-    s_address
-from
-    supplier,
-    nation
-where
-        s_suppkey in (
-        select
-            ps_suppkey
-        from
-            partsupp
-        where
-                ps_partkey in (
-                select
-                    p_partkey
-                from
-                    part
-                where
-                        p_name like 'forest%'
-            )
-          and ps_availqty > (
-            select
-                    0.5 * sum(l_quantity)
-            from
-                lineitem
-            where
-                    l_partkey = ps_partkey
-              and l_suppkey = ps_suppkey
-              and l_shipdate >= date '1994-01-01'
-              and l_shipdate < 'date 1994-01-01' + interval '1' year
-        )
-    )
-  and s_nationkey = n_nationkey
-  and n_name = 'CANADA'
-order by
-    s_name;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q21.sql b/rust/ballista/rust/benchmarks/tpch/queries/q21.sql
deleted file mode 100644
index 9d2fe32cee2..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q21.sql
+++ /dev/null
@@ -1,39 +0,0 @@
-select
-    s_name,
-    count(*) as numwait
-from
-    supplier,
-    lineitem l1,
-    orders,
-    nation
-where
-        s_suppkey = l1.l_suppkey
-  and o_orderkey = l1.l_orderkey
-  and o_orderstatus = 'F'
-  and l1.l_receiptdate > l1.l_commitdate
-  and exists (
-        select
-            *
-        from
-            lineitem l2
-        where
-                l2.l_orderkey = l1.l_orderkey
-          and l2.l_suppkey <> l1.l_suppkey
-    )
-  and not exists (
-        select
-            *
-        from
-            lineitem l3
-        where
-                l3.l_orderkey = l1.l_orderkey
-          and l3.l_suppkey <> l1.l_suppkey
-          and l3.l_receiptdate > l3.l_commitdate
-    )
-  and s_nationkey = n_nationkey
-  and n_name = 'SAUDI ARABIA'
-group by
-    s_name
-order by
-    numwait desc,
-    s_name;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q22.sql b/rust/ballista/rust/benchmarks/tpch/queries/q22.sql
deleted file mode 100644
index 90aea6fd74f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q22.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-select
-    cntrycode,
-    count(*) as numcust,
-    sum(c_acctbal) as totacctbal
-from
-    (
-        select
-            substring(c_phone from 1 for 2) as cntrycode,
-            c_acctbal
-        from
-            customer
-        where
-                substring(c_phone from 1 for 2) in
-                ('13', '31', '23', '29', '30', '18', '17')
-          and c_acctbal > (
-            select
-                avg(c_acctbal)
-            from
-                customer
-            where
-                    c_acctbal > 0.00
-              and substring(c_phone from 1 for 2) in
-                  ('13', '31', '23', '29', '30', '18', '17')
-        )
-          and not exists (
-                select
-                    *
-                from
-                    orders
-                where
-                        o_custkey = c_custkey
-            )
-    ) as custsale
-group by
-    cntrycode
-order by
-    cntrycode;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q3.sql b/rust/ballista/rust/benchmarks/tpch/queries/q3.sql
deleted file mode 100644
index 7dbc6d9ef67..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q3.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-select
-    l_orderkey,
-    sum(l_extendedprice * (1 - l_discount)) as revenue,
-    o_orderdate,
-    o_shippriority
-from
-    customer,
-    orders,
-    lineitem
-where
-        c_mktsegment = 'BUILDING'
-  and c_custkey = o_custkey
-  and l_orderkey = o_orderkey
-  and o_orderdate < date '1995-03-15'
-  and l_shipdate > date '1995-03-15'
-group by
-    l_orderkey,
-    o_orderdate,
-    o_shippriority
-order by
-    revenue desc,
-    o_orderdate;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q4.sql b/rust/ballista/rust/benchmarks/tpch/queries/q4.sql
deleted file mode 100644
index 74a620dbc8a..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q4.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-select
-    o_orderpriority,
-    count(*) as order_count
-from
-    orders
-where
-        o_orderdate >= '1993-07-01'
-  and o_orderdate < date '1993-07-01' + interval '3' month
-  and exists (
-        select
-            *
-        from
-            lineitem
-        where
-                l_orderkey = o_orderkey
-          and l_commitdate < l_receiptdate
-    )
-group by
-    o_orderpriority
-order by
-    o_orderpriority;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q5.sql b/rust/ballista/rust/benchmarks/tpch/queries/q5.sql
deleted file mode 100644
index 5a336b23118..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q5.sql
+++ /dev/null
@@ -1,24 +0,0 @@
-select
-    n_name,
-    sum(l_extendedprice * (1 - l_discount)) as revenue
-from
-    customer,
-    orders,
-    lineitem,
-    supplier,
-    nation,
-    region
-where
-        c_custkey = o_custkey
-  and l_orderkey = o_orderkey
-  and l_suppkey = s_suppkey
-  and c_nationkey = s_nationkey
-  and s_nationkey = n_nationkey
-  and n_regionkey = r_regionkey
-  and r_name = 'ASIA'
-  and o_orderdate >= date '1994-01-01'
-  and o_orderdate < date '1995-01-01'
-group by
-    n_name
-order by
-    revenue desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q6.sql b/rust/ballista/rust/benchmarks/tpch/queries/q6.sql
deleted file mode 100644
index 5806f980f80..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q6.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-select
-    sum(l_extendedprice * l_discount) as revenue
-from
-    lineitem
-where
-        l_shipdate >= date '1994-01-01'
-  and l_shipdate < date '1995-01-01'
-  and l_discount between 0.06 - 0.01 and 0.06 + 0.01
-  and l_quantity < 24;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q7.sql b/rust/ballista/rust/benchmarks/tpch/queries/q7.sql
deleted file mode 100644
index d53877c8dde..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q7.sql
+++ /dev/null
@@ -1,39 +0,0 @@
-select
-    supp_nation,
-    cust_nation,
-    l_year,
-    sum(volume) as revenue
-from
-    (
-        select
-            n1.n_name as supp_nation,
-            n2.n_name as cust_nation,
-            extract(year from l_shipdate) as l_year,
-            l_extendedprice * (1 - l_discount) as volume
-        from
-            supplier,
-            lineitem,
-            orders,
-            customer,
-            nation n1,
-            nation n2
-        where
-                s_suppkey = l_suppkey
-          and o_orderkey = l_orderkey
-          and c_custkey = o_custkey
-          and s_nationkey = n1.n_nationkey
-          and c_nationkey = n2.n_nationkey
-          and (
-                (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
-                or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
-            )
-          and l_shipdate between date '1995-01-01' and date '1996-12-31'
-    ) as shipping
-group by
-    supp_nation,
-    cust_nation,
-    l_year
-order by
-    supp_nation,
-    cust_nation,
-    l_year;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q8.sql b/rust/ballista/rust/benchmarks/tpch/queries/q8.sql
deleted file mode 100644
index 6ddb2a67475..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q8.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-select
-    o_year,
-    sum(case
-            when nation = 'BRAZIL' then volume
-            else 0
-        end) / sum(volume) as mkt_share
-from
-    (
-        select
-            extract(year from o_orderdate) as o_year,
-            l_extendedprice * (1 - l_discount) as volume,
-            n2.n_name as nation
-        from
-            part,
-            supplier,
-            lineitem,
-            orders,
-            customer,
-            nation n1,
-            nation n2,
-            region
-        where
-                p_partkey = l_partkey
-          and s_suppkey = l_suppkey
-          and l_orderkey = o_orderkey
-          and o_custkey = c_custkey
-          and c_nationkey = n1.n_nationkey
-          and n1.n_regionkey = r_regionkey
-          and r_name = 'AMERICA'
-          and s_nationkey = n2.n_nationkey
-          and o_orderdate between date '1995-01-01' and date '1996-12-31'
-          and p_type = 'ECONOMY ANODIZED STEEL'
-    ) as all_nations
-group by
-    o_year
-order by
-    o_year;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q9.sql b/rust/ballista/rust/benchmarks/tpch/queries/q9.sql
deleted file mode 100644
index 587bbc8a207..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q9.sql
+++ /dev/null
@@ -1,32 +0,0 @@
-select
-    nation,
-    o_year,
-    sum(amount) as sum_profit
-from
-    (
-        select
-            n_name as nation,
-            extract(year from o_orderdate) as o_year,
-            l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
-        from
-            part,
-            supplier,
-            lineitem,
-            partsupp,
-            orders,
-            nation
-        where
-                s_suppkey = l_suppkey
-          and ps_suppkey = l_suppkey
-          and ps_partkey = l_partkey
-          and p_partkey = l_partkey
-          and o_orderkey = l_orderkey
-          and s_nationkey = n_nationkey
-          and p_name like '%green%'
-    ) as profit
-group by
-    nation,
-    o_year
-order by
-    nation,
-    o_year desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/run.sh b/rust/ballista/rust/benchmarks/tpch/run.sh
deleted file mode 100755
index c8a36b6013c..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/run.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -e
-
-# This bash script is meant to be run inside the docker-compose environment. Check the README for instructions
-
-for query in 1 3 5 6 10 12
-do
-  /tpch benchmark --host ballista-scheduler --port 50050 --query $query --path /data --format tbl --iterations 1 --debug
-done
diff --git a/rust/ballista/rust/benchmarks/tpch/src/main.rs b/rust/ballista/rust/benchmarks/tpch/src/main.rs
deleted file mode 100644
index 1ba46ea1826..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/src/main.rs
+++ /dev/null
@@ -1,360 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Benchmark derived from TPC-H. This is not an official TPC-H benchmark.
-//!
-//! This is a modified version of the DataFusion version of these benchmarks.
-
-use std::collections::HashMap;
-use std::fs;
-use std::path::{Path, PathBuf};
-use std::time::Instant;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
-use ballista::prelude::*;
-use datafusion::prelude::*;
-use parquet::basic::Compression;
-use parquet::file::properties::WriterProperties;
-use structopt::StructOpt;
-
-#[derive(Debug, StructOpt)]
-struct BenchmarkOpt {
-    /// Ballista executor host
-    #[structopt(long = "host")]
-    host: String,
-
-    /// Ballista executor port
-    #[structopt(long = "port")]
-    port: u16,
-
-    /// Query number
-    #[structopt(long)]
-    query: usize,
-
-    /// Activate debug mode to see query results
-    #[structopt(long)]
-    debug: bool,
-
-    /// Number of iterations of each test run
-    #[structopt(long = "iterations", default_value = "1")]
-    iterations: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(long = "batch-size", default_value = "32768")]
-    batch_size: usize,
-
-    /// Path to data files
-    #[structopt(parse(from_os_str), required = true, long = "path")]
-    path: PathBuf,
-
-    /// File format: `csv`, `tbl` or `parquet`
-    #[structopt(long = "format")]
-    file_format: String,
-}
-
-#[derive(Debug, StructOpt)]
-struct ConvertOpt {
-    /// Path to csv files
-    #[structopt(parse(from_os_str), required = true, short = "i", long = "input")]
-    input_path: PathBuf,
-
-    /// Output path
-    #[structopt(parse(from_os_str), required = true, short = "o", long = "output")]
-    output_path: PathBuf,
-
-    /// Output file format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format")]
-    file_format: String,
-
-    /// Compression to use when writing Parquet files
-    #[structopt(short = "c", long = "compression", default_value = "snappy")]
-    compression: String,
-
-    /// Number of partitions to produce
-    #[structopt(short = "p", long = "partitions", default_value = "1")]
-    partitions: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "4096")]
-    batch_size: usize,
-}
-
-#[derive(Debug, StructOpt)]
-#[structopt(name = "TPC-H", about = "TPC-H Benchmarks.")]
-enum TpchOpt {
-    Benchmark(BenchmarkOpt),
-    Convert(ConvertOpt),
-}
-
-const TABLES: &[&str] = &[
-    "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
-];
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-    match TpchOpt::from_args() {
-        TpchOpt::Benchmark(opt) => benchmark(opt).await.map(|_| ()),
-        TpchOpt::Convert(opt) => convert_tbl(opt).await,
-    }
-}
-
-async fn benchmark(opt: BenchmarkOpt) -> Result<()> {
-    println!("Running benchmarks with the following options: {:?}", opt);
-
-    let mut settings = HashMap::new();
-    settings.insert("batch.size".to_owned(), format!("{}", opt.batch_size));
-
-    let ctx = BallistaContext::remote(opt.host.as_str(), opt.port, settings);
-
-    // register tables with Ballista context
-    let path = opt.path.to_str().unwrap();
-    let file_format = opt.file_format.as_str();
-    for table in TABLES {
-        match file_format {
-            // dbgen creates .tbl ('|' delimited) files without header
-            "tbl" => {
-                let path = format!("{}/{}.tbl", path, table);
-                let schema = get_schema(table);
-                let options = CsvReadOptions::new()
-                    .schema(&schema)
-                    .delimiter(b'|')
-                    .has_header(false)
-                    .file_extension(".tbl");
-                ctx.register_csv(table, &path, options)?;
-            }
-            "csv" => {
-                let path = format!("{}/{}", path, table);
-                let schema = get_schema(table);
-                let options = CsvReadOptions::new().schema(&schema).has_header(true);
-                ctx.register_csv(table, &path, options)?;
-            }
-            "parquet" => {
-                let path = format!("{}/{}", path, table);
-                ctx.register_parquet(table, &path)?;
-            }
-            other => {
-                unimplemented!("Invalid file format '{}'", other);
-            }
-        }
-    }
-
-    let mut millis = vec![];
-
-    // run benchmark
-    let sql = get_query_sql(opt.query)?;
-    println!("Running benchmark with query {}:\n {}", opt.query, sql);
-    for i in 0..opt.iterations {
-        let start = Instant::now();
-        let df = ctx.sql(&sql)?;
-        let mut batches = vec![];
-        let mut stream = df.collect().await?;
-        while let Some(result) = stream.next().await {
-            let batch = result?;
-            batches.push(batch);
-        }
-        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
-        millis.push(elapsed as f64);
-        println!("Query {} iteration {} took {:.1} ms", opt.query, i, elapsed);
-        if opt.debug {
-            pretty::print_batches(&batches)?;
-        }
-    }
-
-    let avg = millis.iter().sum::<f64>() / millis.len() as f64;
-    println!("Query {} avg time: {:.2} ms", opt.query, avg);
-
-    Ok(())
-}
-
-fn get_query_sql(query: usize) -> Result<String> {
-    if query > 0 && query < 23 {
-        let filename = format!("queries/q{}.sql", query);
-        Ok(fs::read_to_string(&filename).expect("failed to read query"))
-    } else {
-        Err(BallistaError::General(
-            "invalid query. Expected value between 1 and 22".to_owned(),
-        ))
-    }
-}
-
-async fn convert_tbl(opt: ConvertOpt) -> Result<()> {
-    let output_root_path = Path::new(&opt.output_path);
-    for table in TABLES {
-        let start = Instant::now();
-        let schema = get_schema(table);
-
-        let input_path = format!("{}/{}.tbl", opt.input_path.to_str().unwrap(), table);
-        let options = CsvReadOptions::new()
-            .schema(&schema)
-            .delimiter(b'|')
-            .file_extension(".tbl");
-
-        let config = ExecutionConfig::new().with_batch_size(opt.batch_size);
-        let mut ctx = ExecutionContext::with_config(config);
-
-        // build plan to read the TBL file
-        let mut csv = ctx.read_csv(&input_path, options)?;
-
-        // optionally, repartition the file
-        if opt.partitions > 1 {
-            csv = csv.repartition(Partitioning::RoundRobinBatch(opt.partitions))?
-        }
-
-        // create the physical plan
-        let csv = csv.to_logical_plan();
-        let csv = ctx.optimize(&csv)?;
-        let csv = ctx.create_physical_plan(&csv)?;
-
-        let output_path = output_root_path.join(table);
-        let output_path = output_path.to_str().unwrap().to_owned();
-
-        println!(
-            "Converting '{}' to {} files in directory '{}'",
-            &input_path, &opt.file_format, &output_path
-        );
-        match opt.file_format.as_str() {
-            "csv" => ctx.write_csv(csv, output_path).await?,
-            "parquet" => {
-                let compression = match opt.compression.as_str() {
-                    "none" => Compression::UNCOMPRESSED,
-                    "snappy" => Compression::SNAPPY,
-                    "brotli" => Compression::BROTLI,
-                    "gzip" => Compression::GZIP,
-                    "lz4" => Compression::LZ4,
-                    "lz0" => Compression::LZO,
-                    "zstd" => Compression::ZSTD,
-                    other => {
-                        return Err(BallistaError::NotImplemented(format!(
-                            "Invalid compression format: {}",
-                            other
-                        )))
-                    }
-                };
-                let props = WriterProperties::builder()
-                    .set_compression(compression)
-                    .build();
-                ctx.write_parquet(csv, output_path, Some(props)).await?
-            }
-            other => {
-                return Err(BallistaError::NotImplemented(format!(
-                    "Invalid output format: {}",
-                    other
-                )))
-            }
-        }
-        println!("Conversion completed in {} ms", start.elapsed().as_millis());
-    }
-
-    Ok(())
-}
-
-fn get_schema(table: &str) -> Schema {
-    // note that the schema intentionally uses signed integers so that any generated Parquet
-    // files can also be used to benchmark tools that only support signed integers, such as
-    // Apache Spark
-
-    match table {
-        "part" => Schema::new(vec![
-            Field::new("p_partkey", DataType::Int32, false),
-            Field::new("p_name", DataType::Utf8, false),
-            Field::new("p_mfgr", DataType::Utf8, false),
-            Field::new("p_brand", DataType::Utf8, false),
-            Field::new("p_type", DataType::Utf8, false),
-            Field::new("p_size", DataType::Int32, false),
-            Field::new("p_container", DataType::Utf8, false),
-            Field::new("p_retailprice", DataType::Float64, false),
-            Field::new("p_comment", DataType::Utf8, false),
-        ]),
-
-        "supplier" => Schema::new(vec![
-            Field::new("s_suppkey", DataType::Int32, false),
-            Field::new("s_name", DataType::Utf8, false),
-            Field::new("s_address", DataType::Utf8, false),
-            Field::new("s_nationkey", DataType::Int32, false),
-            Field::new("s_phone", DataType::Utf8, false),
-            Field::new("s_acctbal", DataType::Float64, false),
-            Field::new("s_comment", DataType::Utf8, false),
-        ]),
-
-        "partsupp" => Schema::new(vec![
-            Field::new("ps_partkey", DataType::Int32, false),
-            Field::new("ps_suppkey", DataType::Int32, false),
-            Field::new("ps_availqty", DataType::Int32, false),
-            Field::new("ps_supplycost", DataType::Float64, false),
-            Field::new("ps_comment", DataType::Utf8, false),
-        ]),
-
-        "customer" => Schema::new(vec![
-            Field::new("c_custkey", DataType::Int32, false),
-            Field::new("c_name", DataType::Utf8, false),
-            Field::new("c_address", DataType::Utf8, false),
-            Field::new("c_nationkey", DataType::Int32, false),
-            Field::new("c_phone", DataType::Utf8, false),
-            Field::new("c_acctbal", DataType::Float64, false),
-            Field::new("c_mktsegment", DataType::Utf8, false),
-            Field::new("c_comment", DataType::Utf8, false),
-        ]),
-
-        "orders" => Schema::new(vec![
-            Field::new("o_orderkey", DataType::Int32, false),
-            Field::new("o_custkey", DataType::Int32, false),
-            Field::new("o_orderstatus", DataType::Utf8, false),
-            Field::new("o_totalprice", DataType::Float64, false),
-            Field::new("o_orderdate", DataType::Date32, false),
-            Field::new("o_orderpriority", DataType::Utf8, false),
-            Field::new("o_clerk", DataType::Utf8, false),
-            Field::new("o_shippriority", DataType::Int32, false),
-            Field::new("o_comment", DataType::Utf8, false),
-        ]),
-
-        "lineitem" => Schema::new(vec![
-            Field::new("l_orderkey", DataType::Int32, false),
-            Field::new("l_partkey", DataType::Int32, false),
-            Field::new("l_suppkey", DataType::Int32, false),
-            Field::new("l_linenumber", DataType::Int32, false),
-            Field::new("l_quantity", DataType::Float64, false),
-            Field::new("l_extendedprice", DataType::Float64, false),
-            Field::new("l_discount", DataType::Float64, false),
-            Field::new("l_tax", DataType::Float64, false),
-            Field::new("l_returnflag", DataType::Utf8, false),
-            Field::new("l_linestatus", DataType::Utf8, false),
-            Field::new("l_shipdate", DataType::Date32, false),
-            Field::new("l_commitdate", DataType::Date32, false),
-            Field::new("l_receiptdate", DataType::Date32, false),
-            Field::new("l_shipinstruct", DataType::Utf8, false),
-            Field::new("l_shipmode", DataType::Utf8, false),
-            Field::new("l_comment", DataType::Utf8, false),
-        ]),
-
-        "nation" => Schema::new(vec![
-            Field::new("n_nationkey", DataType::Int32, false),
-            Field::new("n_name", DataType::Utf8, false),
-            Field::new("n_regionkey", DataType::Int32, false),
-            Field::new("n_comment", DataType::Utf8, false),
-        ]),
-
-        "region" => Schema::new(vec![
-            Field::new("r_regionkey", DataType::Int32, false),
-            Field::new("r_name", DataType::Utf8, false),
-            Field::new("r_comment", DataType::Utf8, false),
-        ]),
-
-        _ => unimplemented!(),
-    }
-}
diff --git a/rust/ballista/rust/benchmarks/tpch/tpch-gen.sh b/rust/ballista/rust/benchmarks/tpch/tpch-gen.sh
deleted file mode 100755
index f5147f55f2f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/tpch-gen.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-BALLISTA_VERSION=0.4.2-SNAPSHOT
-
-#set -e
-
-docker build -t ballistacompute/ballista-tpchgen:$BALLISTA_VERSION -f tpchgen.dockerfile .
-
-# Generate data into the ./data directory if it does not already exist
-FILE=./data/supplier.tbl
-if test -f "$FILE"; then
-    echo "$FILE exists."
-else
-  mkdir data 2>/dev/null
-  docker run -v `pwd`/data:/data -it --rm ballistacompute/ballista-tpchgen:$BALLISTA_VERSION
-  ls -l data
-fi
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/tpchgen.dockerfile b/rust/ballista/rust/benchmarks/tpch/tpchgen.dockerfile
deleted file mode 100644
index 7fc2e5005a5..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/tpchgen.dockerfile
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu
-
-RUN apt-get update && \
-    apt-get install -y git build-essential
-
-RUN git clone https://github.com/databricks/tpch-dbgen.git && \
-    cd tpch-dbgen && \
-    make
-
-WORKDIR /tpch-dbgen
-ADD entrypoint.sh /tpch-dbgen/
-
-VOLUME data
-
-ENTRYPOINT [ "bash", "./entrypoint.sh" ]
diff --git a/rust/ballista/rust/client/Cargo.toml b/rust/ballista/rust/client/Cargo.toml
deleted file mode 100644
index 966e2dcbb31..00000000000
--- a/rust/ballista/rust/client/Cargo.toml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista"
-description = "Ballista Distributed Compute"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-
-[dependencies]
-ballista-core = { path = "../core" }
-futures = "0.3"
-log = "0.4"
-tokio = "1.0"
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
diff --git a/rust/ballista/rust/client/README.md b/rust/ballista/rust/client/README.md
deleted file mode 100644
index 00bf3ea5ec6..00000000000
--- a/rust/ballista/rust/client/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista - Rust
-This crate contains the Ballista client library. For an example usage, please refer [here](../benchmarks/tpch/README.md).
-
diff --git a/rust/ballista/rust/client/src/columnar_batch.rs b/rust/ballista/rust/client/src/columnar_batch.rs
deleted file mode 100644
index d3ff8861faa..00000000000
--- a/rust/ballista/rust/client/src/columnar_batch.rs
+++ /dev/null
@@ -1,167 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, sync::Arc};
-
-use ballista_core::error::{ballista_error, Result};
-
-use arrow::{
-    array::ArrayRef,
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-use datafusion::scalar::ScalarValue;
-
-pub type MaybeColumnarBatch = Result<Option<ColumnarBatch>>;
-
-/// Batch of columnar data.
-#[allow(dead_code)]
-#[derive(Debug, Clone)]
-
-pub struct ColumnarBatch {
-    schema: Arc<Schema>,
-    columns: HashMap<String, ColumnarValue>,
-}
-
-impl ColumnarBatch {
-    pub fn from_arrow(batch: &RecordBatch) -> Self {
-        let columns = batch
-            .columns()
-            .iter()
-            .enumerate()
-            .map(|(i, array)| {
-                (
-                    batch.schema().field(i).name().clone(),
-                    ColumnarValue::Columnar(array.clone()),
-                )
-            })
-            .collect();
-
-        Self {
-            schema: batch.schema(),
-            columns,
-        }
-    }
-
-    pub fn from_values(values: &[ColumnarValue], schema: &Schema) -> Self {
-        let columns = schema
-            .fields()
-            .iter()
-            .enumerate()
-            .map(|(i, f)| (f.name().clone(), values[i].clone()))
-            .collect();
-
-        Self {
-            schema: Arc::new(schema.clone()),
-            columns,
-        }
-    }
-
-    pub fn to_arrow(&self) -> Result<RecordBatch> {
-        let arrays = self
-            .schema
-            .fields()
-            .iter()
-            .map(|c| {
-                match self.column(c.name())? {
-                    ColumnarValue::Columnar(array) => Ok(array.clone()),
-                    ColumnarValue::Scalar(_, _) => {
-                        // note that this can be implemented easily if needed
-                        Err(ballista_error("Cannot convert scalar value to Arrow array"))
-                    }
-                }
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        Ok(RecordBatch::try_new(self.schema.clone(), arrays)?)
-    }
-
-    pub fn schema(&self) -> Arc<Schema> {
-        self.schema.clone()
-    }
-
-    pub fn num_columns(&self) -> usize {
-        self.columns.len()
-    }
-
-    pub fn num_rows(&self) -> usize {
-        self.columns[self.schema.field(0).name()].len()
-    }
-
-    pub fn column(&self, name: &str) -> Result<&ColumnarValue> {
-        Ok(&self.columns[name])
-    }
-
-    pub fn memory_size(&self) -> usize {
-        self.columns.values().map(|c| c.memory_size()).sum()
-    }
-}
-
-/// A columnar value can either be a scalar value or an Arrow array.
-#[allow(dead_code)]
-#[derive(Debug, Clone)]
-
-pub enum ColumnarValue {
-    Scalar(ScalarValue, usize),
-    Columnar(ArrayRef),
-}
-
-impl ColumnarValue {
-    pub fn len(&self) -> usize {
-        match self {
-            ColumnarValue::Scalar(_, n) => *n,
-            ColumnarValue::Columnar(array) => array.len(),
-        }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    pub fn data_type(&self) -> &DataType {
-        match self {
-            ColumnarValue::Columnar(array) => array.data_type(),
-            ColumnarValue::Scalar(value, _) => match value {
-                ScalarValue::UInt8(_) => &DataType::UInt8,
-                ScalarValue::UInt16(_) => &DataType::UInt16,
-                ScalarValue::UInt32(_) => &DataType::UInt32,
-                ScalarValue::UInt64(_) => &DataType::UInt64,
-                ScalarValue::Int8(_) => &DataType::Int8,
-                ScalarValue::Int16(_) => &DataType::Int16,
-                ScalarValue::Int32(_) => &DataType::Int32,
-                ScalarValue::Int64(_) => &DataType::Int64,
-                ScalarValue::Float32(_) => &DataType::Float32,
-                ScalarValue::Float64(_) => &DataType::Float64,
-                _ => unimplemented!(),
-            },
-        }
-    }
-
-    pub fn to_arrow(&self) -> ArrayRef {
-        match self {
-            ColumnarValue::Columnar(array) => array.clone(),
-            ColumnarValue::Scalar(value, n) => value.to_array_of_size(*n),
-        }
-    }
-
-    pub fn memory_size(&self) -> usize {
-        match self {
-            ColumnarValue::Columnar(array) => array.get_array_memory_size(),
-            _ => 0,
-        }
-    }
-}
diff --git a/rust/ballista/rust/client/src/context.rs b/rust/ballista/rust/client/src/context.rs
deleted file mode 100644
index 8b2431f56c2..00000000000
--- a/rust/ballista/rust/client/src/context.rs
+++ /dev/null
@@ -1,396 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Distributed execution context.
-
-use std::path::PathBuf;
-use std::pin::Pin;
-use std::sync::{Arc, Mutex};
-use std::{collections::HashMap, convert::TryInto};
-use std::{fs, time::Duration};
-
-use ballista_core::serde::protobuf::scheduler_grpc_client::SchedulerGrpcClient;
-use ballista_core::serde::protobuf::{
-    execute_query_params::Query, job_status, ExecuteQueryParams, GetJobStatusParams,
-    GetJobStatusResult,
-};
-use ballista_core::{
-    client::BallistaClient,
-    datasource::DFTableAdapter,
-    error::{BallistaError, Result},
-    memory_stream::MemoryStream,
-};
-
-use arrow::datatypes::Schema;
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logical_plan::{DFSchema, Expr, LogicalPlan, Partitioning};
-use datafusion::physical_plan::csv::CsvReadOptions;
-use datafusion::{dataframe::DataFrame, physical_plan::RecordBatchStream};
-use log::{error, info};
-
-#[allow(dead_code)]
-struct BallistaContextState {
-    /// Scheduler host
-    scheduler_host: String,
-    /// Scheduler port
-    scheduler_port: u16,
-    /// Tables that have been registered with this context
-    tables: HashMap<String, LogicalPlan>,
-    /// General purpose settings
-    settings: HashMap<String, String>,
-}
-
-impl BallistaContextState {
-    pub fn new(
-        scheduler_host: String,
-        scheduler_port: u16,
-        settings: HashMap<String, String>,
-    ) -> Self {
-        Self {
-            scheduler_host,
-            scheduler_port,
-            tables: HashMap::new(),
-            settings,
-        }
-    }
-}
-
-#[allow(dead_code)]
-
-pub struct BallistaContext {
-    state: Arc<Mutex<BallistaContextState>>,
-}
-
-impl BallistaContext {
-    /// Create a context for executing queries against a remote Ballista scheduler instance
-    pub fn remote(host: &str, port: u16, settings: HashMap<String, String>) -> Self {
-        let state = BallistaContextState::new(host.to_owned(), port, settings);
-
-        Self {
-            state: Arc::new(Mutex::new(state)),
-        }
-    }
-
-    /// Create a DataFrame representing a Parquet table scan
-
-    pub fn read_parquet(&self, path: &str) -> Result<BallistaDataFrame> {
-        // convert to absolute path because the executor likely has a different working directory
-        let path = PathBuf::from(path);
-        let path = fs::canonicalize(&path)?;
-
-        // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = ExecutionContext::new();
-        let df = ctx.read_parquet(path.to_str().unwrap())?;
-        Ok(BallistaDataFrame::from(self.state.clone(), df))
-    }
-
-    /// Create a DataFrame representing a CSV table scan
-
-    pub fn read_csv(
-        &self,
-        path: &str,
-        options: CsvReadOptions,
-    ) -> Result<BallistaDataFrame> {
-        // convert to absolute path because the executor likely has a different working directory
-        let path = PathBuf::from(path);
-        let path = fs::canonicalize(&path)?;
-
-        // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = ExecutionContext::new();
-        let df = ctx.read_csv(path.to_str().unwrap(), options)?;
-        Ok(BallistaDataFrame::from(self.state.clone(), df))
-    }
-
-    /// Register a DataFrame as a table that can be referenced from a SQL query
-    pub fn register_table(&self, name: &str, table: &BallistaDataFrame) -> Result<()> {
-        let mut state = self.state.lock().unwrap();
-        state
-            .tables
-            .insert(name.to_owned(), table.to_logical_plan());
-        Ok(())
-    }
-
-    pub fn register_csv(
-        &self,
-        name: &str,
-        path: &str,
-        options: CsvReadOptions,
-    ) -> Result<()> {
-        let df = self.read_csv(path, options)?;
-        self.register_table(name, &df)
-    }
-
-    pub fn register_parquet(&self, name: &str, path: &str) -> Result<()> {
-        let df = self.read_parquet(path)?;
-        self.register_table(name, &df)
-    }
-
-    /// Create a DataFrame from a SQL statement
-    pub fn sql(&self, sql: &str) -> Result<BallistaDataFrame> {
-        // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = ExecutionContext::new();
-        // register tables
-        let state = self.state.lock().unwrap();
-        for (name, plan) in &state.tables {
-            let plan = ctx.optimize(plan)?;
-            let execution_plan = ctx.create_physical_plan(&plan)?;
-            ctx.register_table(name, Arc::new(DFTableAdapter::new(plan, execution_plan)));
-        }
-        let df = ctx.sql(sql)?;
-        Ok(BallistaDataFrame::from(self.state.clone(), df))
-    }
-}
-
-/// The Ballista DataFrame is a wrapper around the DataFusion DataFrame and overrides the
-/// `collect` method so that the query is executed against Ballista and not DataFusion.
-
-pub struct BallistaDataFrame {
-    /// Ballista context state
-    state: Arc<Mutex<BallistaContextState>>,
-    /// DataFusion DataFrame representing logical query plan
-    df: Arc<dyn DataFrame>,
-}
-
-impl BallistaDataFrame {
-    fn from(state: Arc<Mutex<BallistaContextState>>, df: Arc<dyn DataFrame>) -> Self {
-        Self { state, df }
-    }
-
-    pub async fn collect(&self) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        let scheduler_url = {
-            let state = self.state.lock().unwrap();
-
-            format!("http://{}:{}", state.scheduler_host, state.scheduler_port)
-        };
-
-        info!("Connecting to Ballista scheduler at {}", scheduler_url);
-
-        let mut scheduler = SchedulerGrpcClient::connect(scheduler_url).await?;
-
-        let plan = self.df.to_logical_plan();
-        let schema: Schema = plan.schema().as_ref().clone().into();
-
-        let job_id = scheduler
-            .execute_query(ExecuteQueryParams {
-                query: Some(Query::LogicalPlan((&plan).try_into()?)),
-            })
-            .await?
-            .into_inner()
-            .job_id;
-
-        loop {
-            let GetJobStatusResult { status } = scheduler
-                .get_job_status(GetJobStatusParams {
-                    job_id: job_id.clone(),
-                })
-                .await?
-                .into_inner();
-            let status = status.and_then(|s| s.status).ok_or_else(|| {
-                BallistaError::Internal("Received empty status message".to_owned())
-            })?;
-            let wait_future = tokio::time::sleep(Duration::from_millis(100));
-            match status {
-                job_status::Status::Queued(_) => {
-                    info!("Job {} still queued...", job_id);
-                    wait_future.await;
-                }
-                job_status::Status::Running(_) => {
-                    info!("Job {} is running...", job_id);
-                    wait_future.await;
-                }
-                job_status::Status::Failed(err) => {
-                    let msg = format!("Job {} failed: {}", job_id, err.error);
-                    error!("{}", msg);
-                    break Err(BallistaError::General(msg));
-                }
-                job_status::Status::Completed(completed) => {
-                    // TODO: use streaming. Probably need to change the signature of fetch_partition to achieve that
-                    let mut result = vec![];
-                    for location in completed.partition_location {
-                        let metadata = location.executor_meta.ok_or_else(|| {
-                            BallistaError::Internal(
-                                "Received empty executor metadata".to_owned(),
-                            )
-                        })?;
-                        let partition_id = location.partition_id.ok_or_else(|| {
-                            BallistaError::Internal(
-                                "Received empty partition id".to_owned(),
-                            )
-                        })?;
-                        let mut ballista_client = BallistaClient::try_new(
-                            metadata.host.as_str(),
-                            metadata.port as u16,
-                        )
-                        .await?;
-                        let stream = ballista_client
-                            .fetch_partition(
-                                &partition_id.job_id,
-                                partition_id.stage_id as usize,
-                                partition_id.partition_id as usize,
-                            )
-                            .await?;
-                        result.append(
-                            &mut datafusion::physical_plan::common::collect(stream)
-                                .await?,
-                        );
-                    }
-                    break Ok(Box::pin(MemoryStream::try_new(
-                        result,
-                        Arc::new(schema),
-                        None,
-                    )?));
-                }
-            };
-        }
-    }
-
-    pub fn select_columns(&self, columns: &[&str]) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df
-                .select_columns(columns)
-                .map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn select(&self, expr: &[Expr]) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.select(expr).map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn filter(&self, expr: Expr) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.filter(expr).map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn aggregate(
-        &self,
-        group_expr: &[Expr],
-        aggr_expr: &[Expr],
-    ) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df
-                .aggregate(group_expr, aggr_expr)
-                .map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn limit(&self, n: usize) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.limit(n).map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn sort(&self, expr: &[Expr]) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.sort(expr).map_err(BallistaError::from)?,
-        ))
-    }
-
-    // TODO lifetime issue
-    // pub fn join(&self, right: Arc<dyn DataFrame>, join_type: JoinType, left_cols: &[&str], right_cols: &[&str]) ->
-    // Result<BallistaDataFrame> {     Ok(Self::from(self.state.clone(), self.df.join(right, join_type, &left_cols,
-    // &right_cols).map_err(BallistaError::from)?)) }
-
-    pub fn repartition(
-        &self,
-        partitioning_scheme: Partitioning,
-    ) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df
-                .repartition(partitioning_scheme)
-                .map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn schema(&self) -> &DFSchema {
-        self.df.schema()
-    }
-
-    pub fn to_logical_plan(&self) -> LogicalPlan {
-        self.df.to_logical_plan()
-    }
-
-    pub fn explain(&self, verbose: bool) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.explain(verbose).map_err(BallistaError::from)?,
-        ))
-    }
-}
-
-// #[async_trait]
-// impl ExecutionContext for BallistaContext {
-//     async fn get_executor_ids(&self) -> Result<Vec<ExecutorMeta>> {
-//         match &self.config.discovery_mode {
-//             DiscoveryMode::Etcd => etcd_get_executors(&self.config.etcd_urls, "default").await,
-//             DiscoveryMode::Kubernetes => k8s_get_executors("default", "ballista").await,
-//             DiscoveryMode::Standalone => Err(ballista_error("Standalone mode not implemented yet")),
-//         }
-//     }
-//
-//     async fn execute_task(
-//         &self,
-//         executor_meta: ExecutorMeta,
-//         task: ExecutionTask,
-//     ) -> Result<ShuffleId> {
-//         // TODO what is the point of returning this info since it is based on input arg?
-//         let shuffle_id = ShuffleId::new(task.job_uuid, task.stage_id, task.partition_id);
-//
-//         let _ = execute_action(
-//             &executor_meta.host,
-//             executor_meta.port,
-//             &Action::Execute(task),
-//         )
-//         .await?;
-//
-//         Ok(shuffle_id)
-//     }
-//
-//     async fn read_shuffle(&self, shuffle_id: &ShuffleId) -> Result<Vec<ColumnarBatch>> {
-//         match self.shuffle_locations.get(shuffle_id) {
-//             Some(executor_meta) => {
-//                 let batches = execute_action(
-//                     &executor_meta.host,
-//                     executor_meta.port,
-//                     &Action::FetchShuffle(*shuffle_id),
-//                 )
-//                 .await?;
-//                 Ok(batches
-//                     .iter()
-//                     .map(|b| ColumnarBatch::from_arrow(b))
-//                     .collect())
-//             }
-//             _ => Err(ballista_error(&format!(
-//                 "Failed to resolve executor UUID for shuffle ID {:?}",
-//                 shuffle_id
-//             ))),
-//         }
-//     }
-//
-//     fn config(&self) -> ExecutorConfig {
-//         self.config.clone()
-//     }
-// }
diff --git a/rust/ballista/rust/client/src/lib.rs b/rust/ballista/rust/client/src/lib.rs
deleted file mode 100644
index c3c62918680..00000000000
--- a/rust/ballista/rust/client/src/lib.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod columnar_batch;
-pub mod context;
-pub mod prelude;
diff --git a/rust/ballista/rust/client/src/prelude.rs b/rust/ballista/rust/client/src/prelude.rs
deleted file mode 100644
index 2f940aef4c9..00000000000
--- a/rust/ballista/rust/client/src/prelude.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Prelude (common imports)
-
-pub use crate::context::BallistaContext;
-pub use ballista_core::error::{BallistaError, Result};
-
-pub use futures::StreamExt;
diff --git a/rust/ballista/rust/core/Cargo.toml b/rust/ballista/rust/core/Cargo.toml
deleted file mode 100644
index b6301918a1f..00000000000
--- a/rust/ballista/rust/core/Cargo.toml
+++ /dev/null
@@ -1,49 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista-core"
-description = "Ballista Distributed Compute"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-build = "build.rs"
-
-[features]
-simd = ["datafusion/simd"]
-
-[dependencies]
-async-trait = "0.1.36"
-futures = "0.3"
-log = "0.4"
-prost = "0.7"
-sqlparser = "0.8"
-tokio = "1.0"
-tonic = "0.4"
-uuid = { version = "0.8", features = ["v4"] }
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-arrow-flight = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
-
-
-[dev-dependencies]
-
-[build-dependencies]
-tonic-build = { version = "0.4" }
diff --git a/rust/ballista/rust/core/README.md b/rust/ballista/rust/core/README.md
deleted file mode 100644
index f97952b3f70..00000000000
--- a/rust/ballista/rust/core/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista - Rust
-This crate contains the core Ballista types.
diff --git a/rust/ballista/rust/core/build.rs b/rust/ballista/rust/core/build.rs
deleted file mode 100644
index 6ad153e87c8..00000000000
--- a/rust/ballista/rust/core/build.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-fn main() -> Result<(), String> {
-    // for use in docker build where file changes can be wonky
-    println!("cargo:rerun-if-env-changed=FORCE_REBUILD");
-
-    println!("cargo:rerun-if-changed=proto/ballista.proto");
-    tonic_build::configure()
-        .compile(&["proto/ballista.proto"], &["proto"])
-        .map_err(|e| format!("protobuf compilation failed: {}", e))
-}
diff --git a/rust/ballista/rust/core/proto/ballista.proto b/rust/ballista/rust/core/proto/ballista.proto
deleted file mode 100644
index ff0727b7887..00000000000
--- a/rust/ballista/rust/core/proto/ballista.proto
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-syntax = "proto3";
-
-package ballista.protobuf;
-
-option java_multiple_files = true;
-option java_package = "org.ballistacompute.protobuf";
-option java_outer_classname = "BallistaProto";
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Ballista Logical Plan
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-// logical expressions
-message LogicalExprNode {
-  oneof ExprType {
-    // column references
-    string column_name = 1;
-    
-    // alias
-    AliasNode alias = 2;
-
-    ScalarValue literal = 3;
-
-
-    // binary expressions
-    BinaryExprNode binary_expr = 4;
-    
-    // aggregate expressions
-    AggregateExprNode aggregate_expr = 5;
-    
-    // null checks
-    IsNull is_null_expr = 6;
-    IsNotNull is_not_null_expr = 7;
-    Not not_expr = 8;
-    
-    BetweenNode between = 9;
-    CaseNode case_ = 10;
-    CastNode cast = 11;
-    SortExprNode sort = 12;
-    NegativeNode negative = 13;
-    InListNode in_list = 14;
-    bool wildcard = 15;
-    ScalarFunctionNode scalar_function = 16;
-  }
-}
-
-message IsNull {
-  LogicalExprNode expr = 1;
-}
-
-message IsNotNull {
-  LogicalExprNode expr = 1;
-}
-
-message Not {
-  LogicalExprNode expr = 1;
-}
-
-message AliasNode {
-  LogicalExprNode expr = 1;
-  string alias = 2;
-}
-
-message BinaryExprNode {
-  LogicalExprNode l = 1;
-  LogicalExprNode r = 2;
-  string op = 3;
-}
-
-message NegativeNode {
-  LogicalExprNode expr = 1;
-}
-
-message InListNode {
-  LogicalExprNode expr = 1;
-  repeated LogicalExprNode list = 2;
-  bool negated = 3;
-}
-
-enum ScalarFunction {
-  SQRT = 0;
-  SIN = 1;
-  COS = 2;
-  TAN = 3;
-  ASIN = 4;
-  ACOS = 5;
-  ATAN = 6;
-  EXP = 7;
-  LOG = 8;
-  LOG2 = 9;
-  LOG10 = 10;
-  FLOOR = 11;
-  CEIL = 12;
-  ROUND = 13;
-  TRUNC = 14;
-  ABS = 15;
-  SIGNUM = 16;
-  OCTETLENGTH = 17;
-  CONCAT = 18;
-  LOWER = 19;
-  UPPER = 20;
-  TRIM = 21;
-  LTRIM = 22;
-  RTRIM = 23;
-  TOTIMESTAMP = 24;
-  ARRAY = 25;
-  NULLIF = 26;
-  DATETRUNC = 27;
-  MD5 = 28;
-  SHA224 = 29;
-  SHA256 = 30;
-  SHA384 = 31;
-  SHA512 = 32;
-}
-
-message ScalarFunctionNode {
-  ScalarFunction fun = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-enum AggregateFunction {
-  MIN = 0;
-  MAX = 1;
-  SUM = 2;
-  AVG = 3;
-  COUNT = 4;
-}
-
-message AggregateExprNode {
-  AggregateFunction aggr_function = 1;
-  LogicalExprNode expr = 2;
-}
-
-message BetweenNode {
-  LogicalExprNode expr = 1;
-  bool negated = 2;
-  LogicalExprNode low = 3;
-  LogicalExprNode high = 4;
-}
-
-message CaseNode {
-  LogicalExprNode expr = 1;
-  repeated WhenThen when_then_expr = 2;
-  LogicalExprNode else_expr = 3;
-}
-
-message WhenThen {
-  LogicalExprNode when_expr = 1;
-  LogicalExprNode then_expr = 2;
-}
-
-message CastNode {
-  LogicalExprNode expr = 1;
-  ArrowType arrow_type = 2;
-}
-
-message SortExprNode {
-  LogicalExprNode expr = 1;
-  bool asc = 2;
-  bool nulls_first = 3;
-}
-
-// LogicalPlan is a nested type
-message LogicalPlanNode {
-  oneof LogicalPlanType {
-    CsvTableScanNode csv_scan = 1;
-    ParquetTableScanNode parquet_scan = 2;
-    ProjectionNode projection = 3;
-    SelectionNode selection = 4;
-    LimitNode limit = 5;
-    AggregateNode aggregate = 6;
-    JoinNode join = 7;
-    SortNode sort = 8;
-    RepartitionNode repartition = 9;
-    EmptyRelationNode empty_relation = 10;
-    CreateExternalTableNode create_external_table = 11;
-    ExplainNode explain = 12;
-  }
-}
-
-message ProjectionColumns {
-  repeated string columns = 1;
-}
-
-message CsvTableScanNode {
-  string table_name = 1;
-  string path = 2;
-  bool has_header = 3;
-  string delimiter = 4;
-  string file_extension = 5;
-  ProjectionColumns projection = 6;
-  Schema schema = 7;
-  repeated LogicalExprNode filters = 8;
-}
-
-message ParquetTableScanNode {
-  string table_name = 1;
-  string path = 2;
-  ProjectionColumns projection = 3;
-  Schema schema = 4;
-  repeated LogicalExprNode filters = 5;
-}
-
-message ProjectionNode {
-  LogicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-message SelectionNode {
-  LogicalPlanNode input = 1;
-  LogicalExprNode expr = 2;
-}
-
-message SortNode{
-  LogicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-message RepartitionNode{
-  LogicalPlanNode input = 1;
-  oneof partition_method {
-    uint64 round_robin = 2;
-    HashRepartition hash = 3;
-  }
-}
-
-message HashRepartition {
-  repeated LogicalExprNode hash_expr = 1;
-  uint64 partition_count = 2;
-}
-
-message EmptyRelationNode{
-  bool produce_one_row = 1;
-}
-
-message CreateExternalTableNode{
-  string name = 1;
-  string location = 2;
-  FileType file_type = 3;
-  bool has_header = 4;
-  Schema schema = 5;
-}
-
-enum FileType{
-  NdJson = 0;
-  Parquet = 1;
-  CSV = 2;
-}
-
-message ExplainNode{
-  LogicalPlanNode input = 1;
-  bool verbose = 2;
-}
-
-message DfField{
-  string qualifier = 2;
-  Field field = 1;
-}
-
-message AggregateNode {
-  LogicalPlanNode input = 1;
-  repeated LogicalExprNode group_expr = 2;
-  repeated LogicalExprNode aggr_expr = 3;
-}
-
-enum JoinType {
-  INNER = 0;
-  LEFT = 1;
-  RIGHT = 2;
-}
-
-message JoinNode {
-  LogicalPlanNode left = 1;
-  LogicalPlanNode right = 2;
-  JoinType join_type = 3;
-  repeated string left_join_column = 4;
-  repeated string right_join_column = 5;
-}
-
-message LimitNode {
-  LogicalPlanNode input = 1;
-  uint32 limit = 2;
-}
-
-message SelectionExecNode {
-  LogicalExprNode expr = 1;
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Ballista Physical Plan
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-// PhysicalPlanNode is a nested type
-message PhysicalPlanNode {
-  oneof PhysicalPlanType {
-    ParquetScanExecNode parquet_scan = 1;
-    CsvScanExecNode csv_scan = 2;
-    EmptyExecNode empty = 3;
-    ProjectionExecNode projection = 4;
-    GlobalLimitExecNode global_limit = 6;
-    LocalLimitExecNode local_limit = 7;
-    HashAggregateExecNode hash_aggregate = 8;
-    HashJoinExecNode hash_join = 9;
-    ShuffleReaderExecNode shuffle_reader = 10;
-    SortExecNode sort = 11;
-    CoalesceBatchesExecNode coalesce_batches = 12;
-    FilterExecNode filter = 13;
-    MergeExecNode merge = 14;
-    UnresolvedShuffleExecNode unresolved = 15;
-    RepartitionExecNode repartition = 16;
-  }
-}
-
-message UnresolvedShuffleExecNode {
-  repeated uint32 query_stage_ids = 1;
-  Schema schema = 2;
-  uint32 partition_count = 3;
-}
-
-message FilterExecNode {
-  PhysicalPlanNode input = 1;
-  LogicalExprNode expr = 2;
-}
-
-message ParquetScanExecNode {
-  repeated string filename = 1;
-  repeated uint32 projection = 2;
-  uint32 num_partitions = 3;
-  uint32 batch_size = 4;
-}
-
-message CsvScanExecNode {
-  string path = 1;
-  repeated uint32 projection = 2;
-  Schema schema = 3;
-  string file_extension = 4;
-  bool has_header = 5;
-  uint32 batch_size = 6;
-  string delimiter = 7;
-  
-  // partition filenames
-  repeated string filename = 8;
-}
-
-message HashJoinExecNode {
-  PhysicalPlanNode left = 1;
-  PhysicalPlanNode right = 2;
-  repeated JoinOn on = 3;
-  JoinType join_type = 4;
-
-}
-
-message JoinOn {
-   string left = 1;
-   string right = 2;
-}
-
-
-message EmptyExecNode {
-  bool produce_one_row = 1;
-  Schema schema = 2;
-}
-
-message ProjectionExecNode {
-  PhysicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-  repeated string expr_name = 3;
-}
-
-enum AggregateMode {
-  PARTIAL = 0;
-  FINAL = 1;
-}
-
-message HashAggregateExecNode {
-  repeated LogicalExprNode group_expr = 1;
-  repeated LogicalExprNode aggr_expr = 2;
-  AggregateMode mode = 3;
-  PhysicalPlanNode input = 4;
-  repeated string group_expr_name = 5;
-  repeated string aggr_expr_name = 6;
-  // we need the input schema to the partial aggregate to pass to the final aggregate
-  Schema input_schema = 7;
-}
-
-message ShuffleReaderExecNode {
-  repeated PartitionLocation partition_location = 1;
-  Schema schema = 2;
-}
-
-message GlobalLimitExecNode {
-  PhysicalPlanNode input = 1;
-  uint32 limit = 2;
-}
-
-message LocalLimitExecNode {
-  PhysicalPlanNode input = 1;
-  uint32 limit = 2;
-}
-
-message SortExecNode {
-  PhysicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-message CoalesceBatchesExecNode {
-  PhysicalPlanNode input = 1;
-  uint32 target_batch_size = 2;
-}
-
-message MergeExecNode {
-  PhysicalPlanNode input = 1;
-}
-
-message RepartitionExecNode{
-  PhysicalPlanNode input = 1;
-  oneof partition_method {
-    uint64 round_robin = 2;
-    HashRepartition hash = 3;
-    uint64 unknown = 4;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Ballista Scheduling
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-message KeyValuePair {
-  string key = 1;
-  string value = 2;
-}
-
-message Action {
-
-  oneof ActionType {
-    // Execute a logical query plan
-    LogicalPlanNode query = 1;
-
-    // Execute one partition of a physical query plan
-    ExecutePartition execute_partition = 2;
-
-    // Fetch a partition from an executor
-    PartitionId fetch_partition = 3;
-  }
-  
-  // configuration settings
-  repeated KeyValuePair settings = 100;
-}
-
-message ExecutePartition {
-  string job_id = 1;
-  uint32 stage_id = 2;
-  repeated uint32 partition_id = 3;
-  PhysicalPlanNode plan = 4;
-  // The task could need to read partitions from other executors
-  repeated PartitionLocation partition_location = 5;
-}
-
-// Mapping from partition id to executor id
-message PartitionLocation {
-  PartitionId partition_id = 1;
-  ExecutorMetadata executor_meta = 2;
-  PartitionStats partition_stats = 3;
-}
-
-// Unique identifier for a materialized partition of data
-message PartitionId {
-  string job_id = 1;
-  uint32 stage_id = 2;
-  uint32 partition_id = 4;
-}
-
-message PartitionStats {
-  int64 num_rows = 1;
-  int64 num_batches = 2;
-  int64 num_bytes = 3;
-  repeated ColumnStats column_stats = 4;
-}
-
-message ColumnStats {
-  ScalarValue min_value = 1;
-  ScalarValue max_value = 2;
-  uint32 null_count = 3;
-  uint32 distinct_count = 4;
-}
-
-message ExecutorMetadata {
-  string id = 1;
-  string host = 2;
-  uint32 port = 3;
-}
-
-message GetExecutorMetadataParams {}
-
-message GetExecutorMetadataResult {
-  repeated ExecutorMetadata metadata = 1;
-}
-
-message RunningTask {
-  string executor_id = 1;
-}
-
-message FailedTask {
-  string error = 1;
-}
-
-message CompletedTask {
-  string executor_id = 1;
-}
-
-message TaskStatus {
-  PartitionId partition_id = 1;
-  oneof status {
-    RunningTask running = 2;
-    FailedTask failed = 3;
-    CompletedTask completed = 4;
-  }
-}
-
-message PollWorkParams {
-  ExecutorMetadata metadata = 1;
-  bool can_accept_task = 2;
-  // All tasks must be reported until they reach the failed or completed state
-  repeated TaskStatus task_status = 3;
-}
-
-message TaskDefinition {
-  PartitionId task_id = 1;
-  PhysicalPlanNode plan = 2;
-}
-
-message PollWorkResult {
-  TaskDefinition task = 1;
-}
-
-message ExecuteQueryParams {
-  oneof query {
-    LogicalPlanNode logical_plan = 1;
-    string sql = 2;
-  }}
-
-message ExecuteSqlParams {
-  string sql = 1;
-}
-
-message ExecuteQueryResult {
-  string job_id = 1;
-}
-
-message GetJobStatusParams {
-  string job_id = 1;
-}
-
-message CompletedJob {
-  repeated PartitionLocation partition_location = 1;
-}
-
-message QueuedJob {}
-
-// TODO: add progress report
-message RunningJob {}
-
-message FailedJob {
-  string error = 1;
-}
-
-message JobStatus {
-  oneof status {
-    QueuedJob queued = 1;
-    RunningJob running = 2;
-    FailedJob failed = 3;
-    CompletedJob completed = 4;
-  }
-}
-
-message GetJobStatusResult {
-  JobStatus status = 1;
-}
-
-message GetFileMetadataParams {
-  string path = 1;
-  FileType file_type = 2;
-}
-
-message GetFileMetadataResult {
-  Schema schema = 1;
-  repeated FilePartitionMetadata partitions = 2;
-}
-
-message FilePartitionMetadata {
-  repeated string filename = 1;
-}
-
-service SchedulerGrpc {
-  rpc GetExecutorsMetadata (GetExecutorMetadataParams) returns (GetExecutorMetadataResult) {}
-
-  // Executors must poll the scheduler for heartbeat and to receive tasks
-  rpc PollWork (PollWorkParams) returns (PollWorkResult) {}
-
-  rpc GetFileMetadata (GetFileMetadataParams) returns (GetFileMetadataResult) {}
-
-  rpc ExecuteQuery (ExecuteQueryParams) returns (ExecuteQueryResult) {}
-
-  rpc GetJobStatus (GetJobStatusParams) returns (GetJobStatusResult) {}
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Arrow Data Types
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-message Schema {
-  repeated Field columns = 1;
-}
-
-message Field {
-  // name of the field
-  string name = 1;
-  ArrowType arrow_type = 2;
-  bool nullable = 3;
-  // for complex data types like structs, unions
-  repeated Field children = 4;
-}
-
-message FixedSizeBinary{
-    int32 length = 1;
-}
-
-message Timestamp{
-    TimeUnit time_unit = 1;
-    string timezone = 2;
-}
-
-enum DateUnit{
-    Day = 0;
-    DateMillisecond = 1;
-}
-
-enum TimeUnit{
-    Second = 0;
-    TimeMillisecond = 1;
-    Microsecond = 2;
-    Nanosecond = 3;
-}
-
-enum IntervalUnit{
-    YearMonth = 0;
-    DayTime = 1;
-}
-
-message Decimal{
-    uint64 whole = 1;
-    uint64 fractional = 2;
-}
-
-message List{
-    Field field_type = 1;
-}
-
-message FixedSizeList{
-    Field field_type = 1;
-    int32 list_size = 2;
-}
-
-message Dictionary{
-    ArrowType key = 1;
-    ArrowType value = 2;
-}
-
-message Struct{
-    repeated Field sub_field_types = 1;
-}
-
-message Union{
-    repeated Field union_types = 1;
-}
-
-
-message ScalarListValue{
-    ScalarType datatype = 1;
-    repeated ScalarValue values = 2;
-}
-
-
-
-message ScalarValue{
-    oneof value{
-        bool   bool_value = 1;
-        string utf8_value = 2;
-        string large_utf8_value = 3;
-        int32  int8_value = 4;
-        int32  int16_value = 5;
-        int32  int32_value = 6;
-        int64  int64_value = 7;
-        uint32 uint8_value = 8;
-        uint32 uint16_value = 9;
-        uint32 uint32_value = 10;
-        uint64 uint64_value = 11;
-        float  float32_value = 12;
-        double float64_value = 13;
-        //Literal Date32 value always has a unit of day
-        int32  date_32_value = 14;
-        int64  time_microsecond_value = 15;
-        int64  time_nanosecond_value = 16;
-        ScalarListValue list_value = 17;
-        ScalarType null_list_value = 18;
-
-        PrimitiveScalarType null_value = 19;
-    }
-}
-
-// Contains all valid datafusion scalar type except for 
-// List
-enum PrimitiveScalarType{
-    
-    BOOL = 0;     // arrow::Type::BOOL
-    UINT8 = 1;    // arrow::Type::UINT8
-    INT8 = 2;     // arrow::Type::INT8
-    UINT16 = 3;   // represents arrow::Type fields in src/arrow/type.h
-    INT16 = 4;
-    UINT32 = 5;
-    INT32 = 6;
-    UINT64 = 7;
-    INT64 = 8;
-    FLOAT32 = 9;
-    FLOAT64 = 10;
-    UTF8 = 11;
-    LARGE_UTF8 = 12;
-    DATE32 = 13;
-    TIME_MICROSECOND = 14;
-    TIME_NANOSECOND = 15;
-    NULL = 16;
-}
-
-message ScalarType{
-    oneof datatype{
-        PrimitiveScalarType scalar = 1;
-        ScalarListType list = 2;
-    }
-}
-
-message ScalarListType{
-    repeated string field_names = 3;
-    PrimitiveScalarType deepest_type = 2;
-}
-
-// Broke out into multiple message types so that type 
-// metadata did not need to be in separate message
-//All types that are of the empty message types contain no additional metadata
-// about the type
-message ArrowType{
-    oneof arrow_type_enum{
-        EmptyMessage NONE = 1;     // arrow::Type::NA
-        EmptyMessage BOOL =  2;     // arrow::Type::BOOL
-        EmptyMessage UINT8 = 3;    // arrow::Type::UINT8
-        EmptyMessage INT8 =  4;     // arrow::Type::INT8
-        EmptyMessage UINT16 =5;   // represents arrow::Type fields in src/arrow/type.h
-        EmptyMessage INT16 = 6;
-        EmptyMessage UINT32 =7;
-        EmptyMessage INT32 = 8;
-        EmptyMessage UINT64 =9;
-        EmptyMessage INT64 =10 ;
-        EmptyMessage FLOAT16 =11 ;
-        EmptyMessage FLOAT32 =12 ; 
-        EmptyMessage FLOAT64 =13 ;
-        EmptyMessage UTF8 =14 ;
-        EmptyMessage LARGE_UTF8 = 32;
-        EmptyMessage BINARY =15 ;
-        int32 FIXED_SIZE_BINARY =16 ;
-        EmptyMessage LARGE_BINARY = 31;
-        EmptyMessage DATE32 =17 ;
-        EmptyMessage DATE64 =18 ;
-        TimeUnit DURATION = 19;
-        Timestamp TIMESTAMP =20 ;
-        TimeUnit TIME32 =21 ;
-        TimeUnit TIME64 =22 ;
-        IntervalUnit INTERVAL =23 ;
-        Decimal DECIMAL =24 ;
-        List LIST =25;
-        List LARGE_LIST = 26;
-        FixedSizeList FIXED_SIZE_LIST = 27;
-        Struct STRUCT =28;
-        Union UNION =29;
-        Dictionary DICTIONARY =30;
-    }
-}
-
-
-
-
-
-//Useful for representing an empty enum variant in rust
-// E.G. enum example{One, Two(i32)}
-// maps to 
-// message example{
-//    oneof{
-//        EmptyMessage One = 1;
-//        i32 Two = 2;
-//   }
-//}
-message EmptyMessage{}
diff --git a/rust/ballista/rust/core/src/client.rs b/rust/ballista/rust/core/src/client.rs
deleted file mode 100644
index f64f95f7cfe..00000000000
--- a/rust/ballista/rust/core/src/client.rs
+++ /dev/null
@@ -1,224 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Client API for sending requests to executors.
-
-use std::sync::Arc;
-use std::{collections::HashMap, pin::Pin};
-use std::{
-    convert::{TryFrom, TryInto},
-    task::{Context, Poll},
-};
-
-use crate::error::{ballista_error, BallistaError, Result};
-use crate::memory_stream::MemoryStream;
-use crate::serde::protobuf::{self};
-use crate::serde::scheduler::{
-    Action, ExecutePartition, ExecutePartitionResult, PartitionId, PartitionStats,
-};
-
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    array::{StringArray, StructArray},
-    error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{datatypes::Schema, datatypes::SchemaRef};
-use arrow_flight::utils::flight_data_to_arrow_batch;
-use arrow_flight::Ticket;
-use arrow_flight::{flight_service_client::FlightServiceClient, FlightData};
-use datafusion::physical_plan::common::collect;
-use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
-use datafusion::{logical_plan::LogicalPlan, physical_plan::RecordBatchStream};
-use futures::{Stream, StreamExt};
-use log::debug;
-use prost::Message;
-use tonic::Streaming;
-use uuid::Uuid;
-
-/// Client for interacting with Ballista executors.
-#[derive(Clone)]
-pub struct BallistaClient {
-    flight_client: FlightServiceClient<tonic::transport::channel::Channel>,
-}
-
-impl BallistaClient {
-    /// Create a new BallistaClient to connect to the executor listening on the specified
-    /// host and port
-
-    pub async fn try_new(host: &str, port: u16) -> Result<Self> {
-        let addr = format!("http://{}:{}", host, port);
-        debug!("BallistaClient connecting to {}", addr);
-        let flight_client =
-            FlightServiceClient::connect(addr.clone())
-                .await
-                .map_err(|e| {
-                    BallistaError::General(format!(
-                        "Error connecting to Ballista scheduler or executor at {}: {:?}",
-                        addr, e
-                    ))
-                })?;
-        debug!("BallistaClient connected OK");
-
-        Ok(Self { flight_client })
-    }
-
-    /// Execute one partition of a physical query plan against the executor
-    pub async fn execute_partition(
-        &mut self,
-        job_id: String,
-        stage_id: usize,
-        partition_id: Vec<usize>,
-        plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Vec<ExecutePartitionResult>> {
-        let action = Action::ExecutePartition(ExecutePartition {
-            job_id,
-            stage_id,
-            partition_id,
-            plan,
-            shuffle_locations: Default::default(),
-        });
-        let stream = self.execute_action(&action).await?;
-        let batches = collect(stream).await?;
-
-        batches
-            .iter()
-            .map(|batch| {
-                if batch.num_rows() != 1 {
-                    Err(BallistaError::General(
-                        "execute_partition received wrong number of rows".to_owned(),
-                    ))
-                } else {
-                    let path = batch
-                        .column(0)
-                        .as_any()
-                        .downcast_ref::<StringArray>()
-                        .expect(
-                            "execute_partition expected column 0 to be a StringArray",
-                        );
-
-                    let stats = batch
-                        .column(1)
-                        .as_any()
-                        .downcast_ref::<StructArray>()
-                        .expect(
-                            "execute_partition expected column 1 to be a StructArray",
-                        );
-
-                    Ok(ExecutePartitionResult::new(
-                        path.value(0),
-                        PartitionStats::from_arrow_struct_array(stats),
-                    ))
-                }
-            })
-            .collect::<Result<Vec<_>>>()
-    }
-
-    /// Fetch a partition from an executor
-    pub async fn fetch_partition(
-        &mut self,
-        job_id: &str,
-        stage_id: usize,
-        partition_id: usize,
-    ) -> Result<SendableRecordBatchStream> {
-        let action =
-            Action::FetchPartition(PartitionId::new(job_id, stage_id, partition_id));
-        self.execute_action(&action).await
-    }
-
-    /// Execute an action and retrieve the results
-    pub async fn execute_action(
-        &mut self,
-        action: &Action,
-    ) -> Result<SendableRecordBatchStream> {
-        let serialized_action: protobuf::Action = action.to_owned().try_into()?;
-
-        let mut buf: Vec<u8> = Vec::with_capacity(serialized_action.encoded_len());
-
-        serialized_action
-            .encode(&mut buf)
-            .map_err(|e| BallistaError::General(format!("{:?}", e)))?;
-
-        let request = tonic::Request::new(Ticket { ticket: buf });
-
-        let mut stream = self
-            .flight_client
-            .do_get(request)
-            .await
-            .map_err(|e| BallistaError::General(format!("{:?}", e)))?
-            .into_inner();
-
-        // the schema should be the first message returned, else client should error
-        match stream
-            .message()
-            .await
-            .map_err(|e| BallistaError::General(format!("{:?}", e)))?
-        {
-            Some(flight_data) => {
-                // convert FlightData to a stream
-                let schema = Arc::new(Schema::try_from(&flight_data)?);
-
-                // all the remaining stream messages should be dictionary and record batches
-                Ok(Box::pin(FlightDataStream::new(stream, schema)))
-            }
-            None => Err(ballista_error(
-                "Did not receive schema batch from flight server",
-            )),
-        }
-    }
-}
-
-struct FlightDataStream {
-    stream: Streaming<FlightData>,
-    schema: SchemaRef,
-}
-
-impl FlightDataStream {
-    pub fn new(stream: Streaming<FlightData>, schema: SchemaRef) -> Self {
-        Self { stream, schema }
-    }
-}
-
-impl Stream for FlightDataStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.stream.poll_next_unpin(cx).map(|x| match x {
-            Some(flight_data_chunk_result) => {
-                let converted_chunk = flight_data_chunk_result
-                    .map_err(|e| ArrowError::from_external_error(Box::new(e)))
-                    .and_then(|flight_data_chunk| {
-                        flight_data_to_arrow_batch(
-                            &flight_data_chunk,
-                            self.schema.clone(),
-                            &[],
-                        )
-                    });
-                Some(converted_chunk)
-            }
-            None => None,
-        })
-    }
-}
-
-impl RecordBatchStream for FlightDataStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/ballista/rust/core/src/datasource.rs b/rust/ballista/rust/core/src/datasource.rs
deleted file mode 100644
index 531f63df40e..00000000000
--- a/rust/ballista/rust/core/src/datasource.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{any::Any, sync::Arc};
-
-use arrow::datatypes::SchemaRef;
-use datafusion::error::Result as DFResult;
-use datafusion::{
-    datasource::{datasource::Statistics, TableProvider},
-    logical_plan::{Expr, LogicalPlan},
-    physical_plan::ExecutionPlan,
-};
-
-/// This ugly adapter is needed because we use DataFusion's logical plan when building queries
-/// and when we register tables with DataFusion's `ExecutionContext` we need to provide a
-/// TableProvider which is effectively a wrapper around a physical plan. We need to be able to
-/// register tables so that we can create logical plans from SQL statements that reference these
-/// tables.
-pub struct DFTableAdapter {
-    /// DataFusion logical plan
-    pub logical_plan: LogicalPlan,
-    /// DataFusion execution plan
-    plan: Arc<dyn ExecutionPlan>,
-}
-
-impl DFTableAdapter {
-    pub fn new(logical_plan: LogicalPlan, plan: Arc<dyn ExecutionPlan>) -> Self {
-        Self { logical_plan, plan }
-    }
-}
-
-impl TableProvider for DFTableAdapter {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.plan.schema()
-    }
-
-    fn scan(
-        &self,
-        _projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-    ) -> DFResult<Arc<dyn ExecutionPlan>> {
-        Ok(self.plan.clone())
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics {
-            num_rows: None,
-            total_byte_size: None,
-            column_statistics: None,
-        }
-    }
-}
diff --git a/rust/ballista/rust/core/src/error.rs b/rust/ballista/rust/core/src/error.rs
deleted file mode 100644
index d0155ce4b78..00000000000
--- a/rust/ballista/rust/core/src/error.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista error types
-
-use std::{
-    error::Error,
-    fmt::{Display, Formatter},
-    io, result,
-};
-
-use arrow::error::ArrowError;
-use datafusion::error::DataFusionError;
-use sqlparser::parser;
-
-pub type Result<T> = result::Result<T, BallistaError>;
-
-/// Ballista error
-#[derive(Debug)]
-pub enum BallistaError {
-    NotImplemented(String),
-    General(String),
-    Internal(String),
-    ArrowError(ArrowError),
-    DataFusionError(DataFusionError),
-    SqlError(parser::ParserError),
-    IoError(io::Error),
-    // ReqwestError(reqwest::Error),
-    //HttpError(http::Error),
-    // KubeAPIError(kube::error::Error),
-    // KubeAPIRequestError(k8s_openapi::RequestError),
-    // KubeAPIResponseError(k8s_openapi::ResponseError),
-    TonicError(tonic::transport::Error),
-    GrpcError(tonic::Status),
-    TokioError(tokio::task::JoinError),
-}
-
-impl<T> Into<Result<T>> for BallistaError {
-    fn into(self) -> Result<T> {
-        Err(self)
-    }
-}
-
-pub fn ballista_error(message: &str) -> BallistaError {
-    BallistaError::General(message.to_owned())
-}
-
-impl From<String> for BallistaError {
-    fn from(e: String) -> Self {
-        BallistaError::General(e)
-    }
-}
-
-impl From<ArrowError> for BallistaError {
-    fn from(e: ArrowError) -> Self {
-        BallistaError::ArrowError(e)
-    }
-}
-
-impl From<parser::ParserError> for BallistaError {
-    fn from(e: parser::ParserError) -> Self {
-        BallistaError::SqlError(e)
-    }
-}
-
-impl From<DataFusionError> for BallistaError {
-    fn from(e: DataFusionError) -> Self {
-        BallistaError::DataFusionError(e)
-    }
-}
-
-impl From<io::Error> for BallistaError {
-    fn from(e: io::Error) -> Self {
-        BallistaError::IoError(e)
-    }
-}
-
-// impl From<reqwest::Error> for BallistaError {
-//     fn from(e: reqwest::Error) -> Self {
-//         BallistaError::ReqwestError(e)
-//     }
-// }
-//
-// impl From<http::Error> for BallistaError {
-//     fn from(e: http::Error) -> Self {
-//         BallistaError::HttpError(e)
-//     }
-// }
-
-// impl From<kube::error::Error> for BallistaError {
-//     fn from(e: kube::error::Error) -> Self {
-//         BallistaError::KubeAPIError(e)
-//     }
-// }
-
-// impl From<k8s_openapi::RequestError> for BallistaError {
-//     fn from(e: k8s_openapi::RequestError) -> Self {
-//         BallistaError::KubeAPIRequestError(e)
-//     }
-// }
-
-// impl From<k8s_openapi::ResponseError> for BallistaError {
-//     fn from(e: k8s_openapi::ResponseError) -> Self {
-//         BallistaError::KubeAPIResponseError(e)
-//     }
-// }
-
-impl From<tonic::transport::Error> for BallistaError {
-    fn from(e: tonic::transport::Error) -> Self {
-        BallistaError::TonicError(e)
-    }
-}
-
-impl From<tonic::Status> for BallistaError {
-    fn from(e: tonic::Status) -> Self {
-        BallistaError::GrpcError(e)
-    }
-}
-
-impl From<tokio::task::JoinError> for BallistaError {
-    fn from(e: tokio::task::JoinError) -> Self {
-        BallistaError::TokioError(e)
-    }
-}
-
-impl Display for BallistaError {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            BallistaError::NotImplemented(ref desc) => {
-                write!(f, "Not implemented: {}", desc)
-            }
-            BallistaError::General(ref desc) => write!(f, "General error: {}", desc),
-            BallistaError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
-            BallistaError::DataFusionError(ref desc) => {
-                write!(f, "DataFusion error: {:?}", desc)
-            }
-            BallistaError::SqlError(ref desc) => write!(f, "SQL error: {:?}", desc),
-            BallistaError::IoError(ref desc) => write!(f, "IO error: {}", desc),
-            // BallistaError::ReqwestError(ref desc) => write!(f, "Reqwest error: {}", desc),
-            // BallistaError::HttpError(ref desc) => write!(f, "HTTP error: {}", desc),
-            // BallistaError::KubeAPIError(ref desc) => write!(f, "Kube API error: {}", desc),
-            // BallistaError::KubeAPIRequestError(ref desc) => {
-            //     write!(f, "KubeAPI request error: {}", desc)
-            // }
-            // BallistaError::KubeAPIResponseError(ref desc) => {
-            //     write!(f, "KubeAPI response error: {}", desc)
-            // }
-            BallistaError::TonicError(desc) => write!(f, "Tonic error: {}", desc),
-            BallistaError::GrpcError(desc) => write!(f, "Grpc error: {}", desc),
-            BallistaError::Internal(desc) => {
-                write!(f, "Internal Ballista error: {}", desc)
-            }
-            BallistaError::TokioError(desc) => write!(f, "Tokio join error: {}", desc),
-        }
-    }
-}
-
-impl Error for BallistaError {}
diff --git a/rust/ballista/rust/core/src/execution_plans/mod.rs b/rust/ballista/rust/core/src/execution_plans/mod.rs
deleted file mode 100644
index 1fb2010bd54..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains execution plans that are needed to distribute Datafusion's execution plans into
-//! several Ballista executors.
-
-mod query_stage;
-mod shuffle_reader;
-mod unresolved_shuffle;
-
-pub use query_stage::QueryStageExec;
-pub use shuffle_reader::ShuffleReaderExec;
-pub use unresolved_shuffle::UnresolvedShuffleExec;
diff --git a/rust/ballista/rust/core/src/execution_plans/query_stage.rs b/rust/ballista/rust/core/src/execution_plans/query_stage.rs
deleted file mode 100644
index d8822ea3138..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/query_stage.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-use std::{any::Any, pin::Pin};
-
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning};
-use datafusion::{error::Result, physical_plan::RecordBatchStream};
-use uuid::Uuid;
-
-/// QueryStageExec represents a section of a query plan that has consistent partitioning and
-/// can be executed as one unit with each partition being executed in parallel. The output of
-/// a query stage either forms the input of another query stage or can be the final result of
-/// a query.
-#[derive(Debug, Clone)]
-pub struct QueryStageExec {
-    /// Unique ID for the job (query) that this stage is a part of
-    pub job_id: String,
-    /// Unique query stage ID within the job
-    pub stage_id: usize,
-    /// Physical execution plan for this query stage
-    pub child: Arc<dyn ExecutionPlan>,
-}
-
-impl QueryStageExec {
-    /// Create a new query stage
-    pub fn try_new(
-        job_id: String,
-        stage_id: usize,
-        child: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        Ok(Self {
-            job_id,
-            stage_id,
-            child,
-        })
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for QueryStageExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.child.schema()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.child.output_partitioning()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.child.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        assert!(children.len() == 1);
-        Ok(Arc::new(QueryStageExec::try_new(
-            self.job_id.clone(),
-            self.stage_id,
-            children[0].clone(),
-        )?))
-    }
-
-    async fn execute(
-        &self,
-        partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        self.child.execute(partition).await
-    }
-}
diff --git a/rust/ballista/rust/core/src/execution_plans/shuffle_reader.rs b/rust/ballista/rust/core/src/execution_plans/shuffle_reader.rs
deleted file mode 100644
index bd8f6fdbbea..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/shuffle_reader.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-use std::{any::Any, pin::Pin};
-
-use crate::client::BallistaClient;
-use crate::memory_stream::MemoryStream;
-use crate::serde::scheduler::PartitionLocation;
-
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning};
-use datafusion::{
-    error::{DataFusionError, Result},
-    physical_plan::RecordBatchStream,
-};
-use log::info;
-
-/// ShuffleReaderExec reads partitions that have already been materialized by an executor.
-#[derive(Debug, Clone)]
-pub struct ShuffleReaderExec {
-    // The query stage that is responsible for producing the shuffle partitions that
-    // this operator will read
-    pub(crate) partition_location: Vec<PartitionLocation>,
-    pub(crate) schema: SchemaRef,
-}
-
-impl ShuffleReaderExec {
-    /// Create a new ShuffleReaderExec
-    pub fn try_new(
-        partition_meta: Vec<PartitionLocation>,
-        schema: SchemaRef,
-    ) -> Result<Self> {
-        Ok(Self {
-            partition_location: partition_meta,
-            schema,
-        })
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for ShuffleReaderExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partition_location.len())
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Plan(
-            "Ballista ShuffleReaderExec does not support with_new_children()".to_owned(),
-        ))
-    }
-
-    async fn execute(
-        &self,
-        partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        info!("ShuffleReaderExec::execute({})", partition);
-        let partition_location = &self.partition_location[partition];
-
-        let mut client = BallistaClient::try_new(
-            &partition_location.executor_meta.host,
-            partition_location.executor_meta.port,
-        )
-        .await
-        .map_err(|e| DataFusionError::Execution(format!("Ballista Error: {:?}", e)))?;
-
-        client
-            .fetch_partition(
-                &partition_location.partition_id.job_id,
-                partition_location.partition_id.stage_id,
-                partition,
-            )
-            .await
-            .map_err(|e| DataFusionError::Execution(format!("Ballista Error: {:?}", e)))
-    }
-}
diff --git a/rust/ballista/rust/core/src/execution_plans/unresolved_shuffle.rs b/rust/ballista/rust/core/src/execution_plans/unresolved_shuffle.rs
deleted file mode 100644
index a62a2513ff4..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/unresolved_shuffle.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-use std::{any::Any, pin::Pin};
-
-use crate::client::BallistaClient;
-use crate::memory_stream::MemoryStream;
-use crate::serde::scheduler::PartitionLocation;
-
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning};
-use datafusion::{
-    error::{DataFusionError, Result},
-    physical_plan::RecordBatchStream,
-};
-use log::info;
-
-/// UnresolvedShuffleExec represents a dependency on the results of several QueryStageExec nodes which haven't been computed yet.
-///
-/// An ExecutionPlan that contains an UnresolvedShuffleExec isn't ready for execution. The presence of this ExecutionPlan
-/// is used as a signal so the scheduler knows it can't start computation on a specific QueryStageExec.
-#[derive(Debug, Clone)]
-pub struct UnresolvedShuffleExec {
-    // The query stage ids which needs to be computed
-    pub query_stage_ids: Vec<usize>,
-
-    // The schema this node will have once it is replaced with a ShuffleReaderExec
-    pub schema: SchemaRef,
-
-    // The partition count this node will have once it is replaced with a ShuffleReaderExec
-    pub partition_count: usize,
-}
-
-impl UnresolvedShuffleExec {
-    /// Create a new UnresolvedShuffleExec
-    pub fn new(
-        query_stage_ids: Vec<usize>,
-        schema: SchemaRef,
-        partition_count: usize,
-    ) -> Self {
-        Self {
-            query_stage_ids,
-            schema,
-            partition_count,
-        }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for UnresolvedShuffleExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partition_count)
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Plan(
-            "Ballista UnresolvedShuffleExec does not support with_new_children()"
-                .to_owned(),
-        ))
-    }
-
-    async fn execute(
-        &self,
-        _partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        Err(DataFusionError::Plan(
-            "Ballista UnresolvedShuffleExec does not support execution".to_owned(),
-        ))
-    }
-}
diff --git a/rust/ballista/rust/core/src/lib.rs b/rust/ballista/rust/core/src/lib.rs
deleted file mode 100644
index 425dbab34c1..00000000000
--- a/rust/ballista/rust/core/src/lib.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Distributed Compute
-#![allow(unused_imports)]
-pub const BALLISTA_VERSION: &str = env!("CARGO_PKG_VERSION");
-
-pub fn print_version() {
-    println!("Ballista version: {}", BALLISTA_VERSION)
-}
-
-pub mod client;
-pub mod datasource;
-pub mod error;
-pub mod execution_plans;
-pub mod memory_stream;
-pub mod utils;
-
-#[macro_use]
-pub mod serde;
diff --git a/rust/ballista/rust/core/src/memory_stream.rs b/rust/ballista/rust/core/src/memory_stream.rs
deleted file mode 100644
index 8bf5e203f6d..00000000000
--- a/rust/ballista/rust/core/src/memory_stream.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This is copied from DataFusion because it is declared as `pub(crate)`. See
-//! https://issues.apache.org/jira/browse/ARROW-11276.
-
-use std::task::{Context, Poll};
-
-use arrow::{datatypes::SchemaRef, error::Result, record_batch::RecordBatch};
-use datafusion::physical_plan::RecordBatchStream;
-use futures::Stream;
-
-/// Iterator over batches
-
-pub struct MemoryStream {
-    /// Vector of record batches
-    data: Vec<RecordBatch>,
-    /// Schema representing the data
-    schema: SchemaRef,
-    /// Optional projection for which columns to load
-    projection: Option<Vec<usize>>,
-    /// Index into the data
-    index: usize,
-}
-
-impl MemoryStream {
-    /// Create an iterator for a vector of record batches
-
-    pub fn try_new(
-        data: Vec<RecordBatch>,
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        Ok(Self {
-            data,
-            schema,
-            projection,
-            index: 0,
-        })
-    }
-}
-
-impl Stream for MemoryStream {
-    type Item = Result<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(if self.index < self.data.len() {
-            self.index += 1;
-
-            let batch = &self.data[self.index - 1];
-
-            // apply projection
-            match &self.projection {
-                Some(columns) => Some(RecordBatch::try_new(
-                    self.schema.clone(),
-                    columns.iter().map(|i| batch.column(*i).clone()).collect(),
-                )),
-                None => Some(Ok(batch.clone())),
-            }
-        } else {
-            None
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.data.len(), Some(self.data.len()))
-    }
-}
-
-impl RecordBatchStream for MemoryStream {
-    /// Get the schema
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
deleted file mode 100644
index 087ebdbf507..00000000000
--- a/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
+++ /dev/null
@@ -1,1192 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Serde code to convert from protocol buffers to Rust data structures.
-
-use std::{
-    convert::{From, TryInto},
-    unimplemented,
-};
-
-use crate::error::BallistaError;
-use crate::serde::{proto_error, protobuf};
-use crate::{convert_box_required, convert_required};
-
-use arrow::datatypes::{DataType, Field, Schema};
-use datafusion::logical_plan::{
-    abs, acos, asin, atan, ceil, cos, exp, floor, log10, log2, round, signum, sin, sqrt,
-    tan, trunc, Expr, JoinType, LogicalPlan, LogicalPlanBuilder, Operator,
-};
-use datafusion::physical_plan::aggregates::AggregateFunction;
-use datafusion::physical_plan::csv::CsvReadOptions;
-use datafusion::scalar::ScalarValue;
-use protobuf::logical_plan_node::LogicalPlanType;
-use protobuf::{logical_expr_node::ExprType, scalar_type};
-
-// use uuid::Uuid;
-
-impl TryInto<LogicalPlan> for &protobuf::LogicalPlanNode {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<LogicalPlan, Self::Error> {
-        let plan = self.logical_plan_type.as_ref().ok_or_else(|| {
-            proto_error(format!(
-                "logical_plan::from_proto() Unsupported logical plan '{:?}'",
-                self
-            ))
-        })?;
-        match plan {
-            LogicalPlanType::Projection(projection) => {
-                let input: LogicalPlan = convert_box_required!(projection.input)?;
-                LogicalPlanBuilder::from(&input)
-                    .project(
-                        &projection
-                            .expr
-                            .iter()
-                            .map(|expr| expr.try_into())
-                            .collect::<Result<Vec<_>, _>>()?,
-                    )?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Selection(selection) => {
-                let input: LogicalPlan = convert_box_required!(selection.input)?;
-                LogicalPlanBuilder::from(&input)
-                    .filter(
-                        selection
-                            .expr
-                            .as_ref()
-                            .expect("expression required")
-                            .try_into()?,
-                    )?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Aggregate(aggregate) => {
-                let input: LogicalPlan = convert_box_required!(aggregate.input)?;
-                let group_expr = aggregate
-                    .group_expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                let aggr_expr = aggregate
-                    .aggr_expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                LogicalPlanBuilder::from(&input)
-                    .aggregate(&group_expr, &aggr_expr)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::CsvScan(scan) => {
-                let schema: Schema = convert_required!(scan.schema)?;
-                let options = CsvReadOptions::new()
-                    .schema(&schema)
-                    .delimiter(scan.delimiter.as_bytes()[0])
-                    .file_extension(&scan.file_extension)
-                    .has_header(scan.has_header);
-
-                let mut projection = None;
-                if let Some(column_names) = &scan.projection {
-                    let column_indices = column_names
-                        .columns
-                        .iter()
-                        .map(|name| schema.index_of(name))
-                        .collect::<Result<Vec<usize>, _>>()?;
-                    projection = Some(column_indices);
-                }
-
-                LogicalPlanBuilder::scan_csv(&scan.path, options, projection)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::ParquetScan(scan) => {
-                let projection = match scan.projection.as_ref() {
-                    None => None,
-                    Some(columns) => {
-                        let schema: Schema = convert_required!(scan.schema)?;
-                        let r: Result<Vec<usize>, _> = columns
-                            .columns
-                            .iter()
-                            .map(|col_name| {
-                                schema.fields().iter().position(|field| field.name() == col_name).ok_or_else(|| {
-                                    let column_names: Vec<&String> = schema.fields().iter().map(|f| f.name()).collect();
-                                    proto_error(format!(
-                                        "Parquet projection contains column name that is not present in schema. Column name: {}. Schema columns: {:?}",
-                                        col_name, column_names
-                                    ))
-                                })
-                            })
-                            .collect();
-                        Some(r?)
-                    }
-                };
-                LogicalPlanBuilder::scan_parquet(&scan.path, projection, 24)? //TODO concurrency
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Sort(sort) => {
-                let input: LogicalPlan = convert_box_required!(sort.input)?;
-                let sort_expr: Vec<Expr> = sort
-                    .expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<Expr>, _>>()?;
-                LogicalPlanBuilder::from(&input)
-                    .sort(&sort_expr)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Repartition(repartition) => {
-                use datafusion::logical_plan::Partitioning;
-                let input: LogicalPlan = convert_box_required!(repartition.input)?;
-                use protobuf::repartition_node::PartitionMethod;
-                let pb_partition_method = repartition.partition_method.clone().ok_or_else(|| {
-                    BallistaError::General(String::from(
-                        "Protobuf deserialization error, RepartitionNode was missing required field 'partition_method'",
-                    ))
-                })?;
-
-                let partitioning_scheme = match pb_partition_method {
-                    PartitionMethod::Hash(protobuf::HashRepartition {
-                        hash_expr: pb_hash_expr,
-                        partition_count,
-                    }) => Partitioning::Hash(
-                        pb_hash_expr
-                            .iter()
-                            .map(|pb_expr| pb_expr.try_into())
-                            .collect::<Result<Vec<_>, _>>()?,
-                        partition_count as usize,
-                    ),
-                    PartitionMethod::RoundRobin(batch_size) => {
-                        Partitioning::RoundRobinBatch(batch_size as usize)
-                    }
-                };
-
-                LogicalPlanBuilder::from(&input)
-                    .repartition(partitioning_scheme)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::EmptyRelation(empty_relation) => {
-                LogicalPlanBuilder::empty(empty_relation.produce_one_row)
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::CreateExternalTable(create_extern_table) => {
-                let pb_schema = (create_extern_table.schema.clone()).ok_or_else(|| {
-                    BallistaError::General(String::from(
-                        "Protobuf deserialization error, CreateExternalTableNode was missing required field schema.",
-                    ))
-                })?;
-
-                let pb_file_type: protobuf::FileType =
-                    create_extern_table.file_type.try_into()?;
-
-                Ok(LogicalPlan::CreateExternalTable {
-                    schema: pb_schema.try_into()?,
-                    name: create_extern_table.name.clone(),
-                    location: create_extern_table.location.clone(),
-                    file_type: pb_file_type.into(),
-                    has_header: create_extern_table.has_header,
-                })
-            }
-            LogicalPlanType::Explain(explain) => {
-                let input: LogicalPlan = convert_box_required!(explain.input)?;
-                LogicalPlanBuilder::from(&input)
-                    .explain(explain.verbose)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Limit(limit) => {
-                let input: LogicalPlan = convert_box_required!(limit.input)?;
-                LogicalPlanBuilder::from(&input)
-                    .limit(limit.limit as usize)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Join(join) => {
-                let left_keys: Vec<&str> =
-                    join.left_join_column.iter().map(|i| i.as_str()).collect();
-                let right_keys: Vec<&str> =
-                    join.right_join_column.iter().map(|i| i.as_str()).collect();
-                let join_type =
-                    protobuf::JoinType::from_i32(join.join_type).ok_or_else(|| {
-                        proto_error(format!(
-                            "Received a JoinNode message with unknown JoinType {}",
-                            join.join_type
-                        ))
-                    })?;
-                let join_type = match join_type {
-                    protobuf::JoinType::Inner => JoinType::Inner,
-                    protobuf::JoinType::Left => JoinType::Left,
-                    protobuf::JoinType::Right => JoinType::Right,
-                };
-                LogicalPlanBuilder::from(&convert_box_required!(join.left)?)
-                    .join(
-                        &convert_box_required!(join.right)?,
-                        join_type,
-                        &left_keys,
-                        &right_keys,
-                    )?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-        }
-    }
-}
-
-impl TryInto<datafusion::logical_plan::DFSchema> for protobuf::Schema {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::logical_plan::DFSchema, Self::Error> {
-        let schema: Schema = (&self).try_into()?;
-        schema.try_into().map_err(BallistaError::DataFusionError)
-    }
-}
-
-impl TryInto<datafusion::logical_plan::DFSchemaRef> for protobuf::Schema {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::logical_plan::DFSchemaRef, Self::Error> {
-        use datafusion::logical_plan::ToDFSchema;
-        let schema: Schema = (&self).try_into()?;
-        schema
-            .to_dfschema_ref()
-            .map_err(BallistaError::DataFusionError)
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::scalar_type::Datatype {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        use protobuf::scalar_type::Datatype;
-        Ok(match self {
-            Datatype::Scalar(scalar_type) => {
-                let pb_scalar_enum = protobuf::PrimitiveScalarType::from_i32(*scalar_type).ok_or_else(|| {
-                    proto_error(format!(
-                        "Protobuf deserialization error, scalar_type::Datatype missing was provided invalid enum variant: {}",
-                        *scalar_type
-                    ))
-                })?;
-                pb_scalar_enum.into()
-            }
-            Datatype::List(protobuf::ScalarListType {
-                deepest_type,
-                field_names,
-            }) => {
-                if field_names.is_empty() {
-                    return Err(proto_error(
-                        "Protobuf deserialization error: found no field names in ScalarListType message which requires at least one",
-                    ));
-                }
-                let pb_scalar_type = protobuf::PrimitiveScalarType::from_i32(
-                    *deepest_type,
-                )
-                .ok_or_else(|| {
-                    proto_error(format!(
-                        "Protobuf deserialization error: invalid i32 for scalar enum: {}",
-                        *deepest_type
-                    ))
-                })?;
-                //Because length is checked above it is safe to unwrap .last()
-                let mut scalar_type =
-                    arrow::datatypes::DataType::List(Box::new(Field::new(
-                        field_names.last().unwrap().as_str(),
-                        pb_scalar_type.into(),
-                        true,
-                    )));
-                //Iterate over field names in reverse order except for the last item in the vector
-                for name in field_names.iter().rev().skip(1) {
-                    let new_datatype = arrow::datatypes::DataType::List(Box::new(
-                        Field::new(name.as_str(), scalar_type, true),
-                    ));
-                    scalar_type = new_datatype;
-                }
-                scalar_type
-            }
-        })
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::arrow_type::ArrowTypeEnum {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        use arrow::datatypes::DataType;
-        use protobuf::arrow_type;
-        Ok(match self {
-            arrow_type::ArrowTypeEnum::None(_) => DataType::Null,
-            arrow_type::ArrowTypeEnum::Bool(_) => DataType::Boolean,
-            arrow_type::ArrowTypeEnum::Uint8(_) => DataType::UInt8,
-            arrow_type::ArrowTypeEnum::Int8(_) => DataType::Int8,
-            arrow_type::ArrowTypeEnum::Uint16(_) => DataType::UInt16,
-            arrow_type::ArrowTypeEnum::Int16(_) => DataType::Int16,
-            arrow_type::ArrowTypeEnum::Uint32(_) => DataType::UInt32,
-            arrow_type::ArrowTypeEnum::Int32(_) => DataType::Int32,
-            arrow_type::ArrowTypeEnum::Uint64(_) => DataType::UInt64,
-            arrow_type::ArrowTypeEnum::Int64(_) => DataType::Int64,
-            arrow_type::ArrowTypeEnum::Float16(_) => DataType::Float16,
-            arrow_type::ArrowTypeEnum::Float32(_) => DataType::Float32,
-            arrow_type::ArrowTypeEnum::Float64(_) => DataType::Float64,
-            arrow_type::ArrowTypeEnum::Utf8(_) => DataType::Utf8,
-            arrow_type::ArrowTypeEnum::LargeUtf8(_) => DataType::LargeUtf8,
-            arrow_type::ArrowTypeEnum::Binary(_) => DataType::Binary,
-            arrow_type::ArrowTypeEnum::FixedSizeBinary(size) => {
-                DataType::FixedSizeBinary(*size)
-            }
-            arrow_type::ArrowTypeEnum::LargeBinary(_) => DataType::LargeBinary,
-            arrow_type::ArrowTypeEnum::Date32(_) => DataType::Date32,
-            arrow_type::ArrowTypeEnum::Date64(_) => DataType::Date64,
-            arrow_type::ArrowTypeEnum::Duration(time_unit) => {
-                DataType::Duration(protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?)
-            }
-            arrow_type::ArrowTypeEnum::Timestamp(protobuf::Timestamp {
-                time_unit,
-                timezone,
-            }) => DataType::Timestamp(
-                protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?,
-                match timezone.len() {
-                    0 => None,
-                    _ => Some(timezone.to_owned()),
-                },
-            ),
-            arrow_type::ArrowTypeEnum::Time32(time_unit) => {
-                DataType::Time32(protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?)
-            }
-            arrow_type::ArrowTypeEnum::Time64(time_unit) => {
-                DataType::Time64(protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?)
-            }
-            arrow_type::ArrowTypeEnum::Interval(interval_unit) => DataType::Interval(
-                protobuf::IntervalUnit::from_i32_to_arrow(*interval_unit)?,
-            ),
-            arrow_type::ArrowTypeEnum::Decimal(protobuf::Decimal {
-                whole,
-                fractional,
-            }) => DataType::Decimal(*whole as usize, *fractional as usize),
-            arrow_type::ArrowTypeEnum::List(list) => {
-                let list_type: &protobuf::Field = list
-                    .as_ref()
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message missing required field 'field_type'"))?
-                    .as_ref();
-                DataType::List(Box::new(list_type.try_into()?))
-            }
-            arrow_type::ArrowTypeEnum::LargeList(list) => {
-                let list_type: &protobuf::Field = list
-                    .as_ref()
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message missing required field 'field_type'"))?
-                    .as_ref();
-                DataType::LargeList(Box::new(list_type.try_into()?))
-            }
-            arrow_type::ArrowTypeEnum::FixedSizeList(list) => {
-                let list_type: &protobuf::Field = list
-                    .as_ref()
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message missing required field 'field_type'"))?
-                    .as_ref();
-                let list_size = list.list_size;
-                DataType::FixedSizeList(Box::new(list_type.try_into()?), list_size)
-            }
-            arrow_type::ArrowTypeEnum::Struct(strct) => DataType::Struct(
-                strct
-                    .sub_field_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?,
-            ),
-            arrow_type::ArrowTypeEnum::Union(union) => DataType::Union(
-                union
-                    .union_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?,
-            ),
-            arrow_type::ArrowTypeEnum::Dictionary(dict) => {
-                let pb_key_datatype = dict
-                    .as_ref()
-                    .key
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message missing required field 'key'"))?;
-                let pb_value_datatype = dict
-                    .as_ref()
-                    .value
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message missing required field 'key'"))?;
-                let key_datatype: DataType = pb_key_datatype.as_ref().try_into()?;
-                let value_datatype: DataType = pb_value_datatype.as_ref().try_into()?;
-                DataType::Dictionary(Box::new(key_datatype), Box::new(value_datatype))
-            }
-        })
-    }
-}
-
-impl Into<arrow::datatypes::DataType> for protobuf::PrimitiveScalarType {
-    fn into(self) -> arrow::datatypes::DataType {
-        use arrow::datatypes::DataType;
-        match self {
-            protobuf::PrimitiveScalarType::Bool => DataType::Boolean,
-            protobuf::PrimitiveScalarType::Uint8 => DataType::UInt8,
-            protobuf::PrimitiveScalarType::Int8 => DataType::Int8,
-            protobuf::PrimitiveScalarType::Uint16 => DataType::UInt16,
-            protobuf::PrimitiveScalarType::Int16 => DataType::Int16,
-            protobuf::PrimitiveScalarType::Uint32 => DataType::UInt32,
-            protobuf::PrimitiveScalarType::Int32 => DataType::Int32,
-            protobuf::PrimitiveScalarType::Uint64 => DataType::UInt64,
-            protobuf::PrimitiveScalarType::Int64 => DataType::Int64,
-            protobuf::PrimitiveScalarType::Float32 => DataType::Float32,
-            protobuf::PrimitiveScalarType::Float64 => DataType::Float64,
-            protobuf::PrimitiveScalarType::Utf8 => DataType::Utf8,
-            protobuf::PrimitiveScalarType::LargeUtf8 => DataType::LargeUtf8,
-            protobuf::PrimitiveScalarType::Date32 => DataType::Date32,
-            protobuf::PrimitiveScalarType::TimeMicrosecond => {
-                DataType::Time64(arrow::datatypes::TimeUnit::Microsecond)
-            }
-            protobuf::PrimitiveScalarType::TimeNanosecond => {
-                DataType::Time64(arrow::datatypes::TimeUnit::Nanosecond)
-            }
-            protobuf::PrimitiveScalarType::Null => DataType::Null,
-        }
-    }
-}
-
-//Does not typecheck lists
-fn typechecked_scalar_value_conversion(
-    tested_type: &protobuf::scalar_value::Value,
-    required_type: protobuf::PrimitiveScalarType,
-) -> Result<datafusion::scalar::ScalarValue, BallistaError> {
-    use protobuf::scalar_value::Value;
-    use protobuf::PrimitiveScalarType;
-    Ok(match (tested_type, &required_type) {
-        (Value::BoolValue(v), PrimitiveScalarType::Bool) => {
-            ScalarValue::Boolean(Some(*v))
-        }
-        (Value::Int8Value(v), PrimitiveScalarType::Int8) => {
-            ScalarValue::Int8(Some(*v as i8))
-        }
-        (Value::Int16Value(v), PrimitiveScalarType::Int16) => {
-            ScalarValue::Int16(Some(*v as i16))
-        }
-        (Value::Int32Value(v), PrimitiveScalarType::Int32) => {
-            ScalarValue::Int32(Some(*v))
-        }
-        (Value::Int64Value(v), PrimitiveScalarType::Int64) => {
-            ScalarValue::Int64(Some(*v))
-        }
-        (Value::Uint8Value(v), PrimitiveScalarType::Uint8) => {
-            ScalarValue::UInt8(Some(*v as u8))
-        }
-        (Value::Uint16Value(v), PrimitiveScalarType::Uint16) => {
-            ScalarValue::UInt16(Some(*v as u16))
-        }
-        (Value::Uint32Value(v), PrimitiveScalarType::Uint32) => {
-            ScalarValue::UInt32(Some(*v))
-        }
-        (Value::Uint64Value(v), PrimitiveScalarType::Uint64) => {
-            ScalarValue::UInt64(Some(*v))
-        }
-        (Value::Float32Value(v), PrimitiveScalarType::Float32) => {
-            ScalarValue::Float32(Some(*v))
-        }
-        (Value::Float64Value(v), PrimitiveScalarType::Float64) => {
-            ScalarValue::Float64(Some(*v))
-        }
-        (Value::Date32Value(v), PrimitiveScalarType::Date32) => {
-            ScalarValue::Date32(Some(*v))
-        }
-        (Value::TimeMicrosecondValue(v), PrimitiveScalarType::TimeMicrosecond) => {
-            ScalarValue::TimeMicrosecond(Some(*v))
-        }
-        (Value::TimeNanosecondValue(v), PrimitiveScalarType::TimeMicrosecond) => {
-            ScalarValue::TimeNanosecond(Some(*v))
-        }
-        (Value::Utf8Value(v), PrimitiveScalarType::Utf8) => {
-            ScalarValue::Utf8(Some(v.to_owned()))
-        }
-        (Value::LargeUtf8Value(v), PrimitiveScalarType::LargeUtf8) => {
-            ScalarValue::LargeUtf8(Some(v.to_owned()))
-        }
-
-        (Value::NullValue(i32_enum), required_scalar_type) => {
-            if *i32_enum == *required_scalar_type as i32 {
-                let pb_scalar_type = PrimitiveScalarType::from_i32(*i32_enum).ok_or_else(|| {
-                    BallistaError::General(format!(
-                        "Invalid i32_enum={} when converting with PrimitiveScalarType::from_i32()",
-                        *i32_enum
-                    ))
-                })?;
-                let scalar_value: ScalarValue = match pb_scalar_type {
-                    PrimitiveScalarType::Bool => ScalarValue::Boolean(None),
-                    PrimitiveScalarType::Uint8 => ScalarValue::UInt8(None),
-                    PrimitiveScalarType::Int8 => ScalarValue::Int8(None),
-                    PrimitiveScalarType::Uint16 => ScalarValue::UInt16(None),
-                    PrimitiveScalarType::Int16 => ScalarValue::Int16(None),
-                    PrimitiveScalarType::Uint32 => ScalarValue::UInt32(None),
-                    PrimitiveScalarType::Int32 => ScalarValue::Int32(None),
-                    PrimitiveScalarType::Uint64 => ScalarValue::UInt64(None),
-                    PrimitiveScalarType::Int64 => ScalarValue::Int64(None),
-                    PrimitiveScalarType::Float32 => ScalarValue::Float32(None),
-                    PrimitiveScalarType::Float64 => ScalarValue::Float64(None),
-                    PrimitiveScalarType::Utf8 => ScalarValue::Utf8(None),
-                    PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None),
-                    PrimitiveScalarType::Date32 => ScalarValue::Date32(None),
-                    PrimitiveScalarType::TimeMicrosecond => {
-                        ScalarValue::TimeMicrosecond(None)
-                    }
-                    PrimitiveScalarType::TimeNanosecond => {
-                        ScalarValue::TimeNanosecond(None)
-                    }
-                    PrimitiveScalarType::Null => {
-                        return Err(proto_error(
-                            "Untyped scalar null is not a valid scalar value",
-                        ))
-                    }
-                };
-                scalar_value
-            } else {
-                return Err(proto_error("Could not convert to the proper type"));
-            }
-        }
-        _ => return Err(proto_error("Could not convert to the proper type")),
-    })
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::scalar_value::Value {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        use datafusion::scalar::ScalarValue;
-        use protobuf::PrimitiveScalarType;
-        let scalar = match self {
-            protobuf::scalar_value::Value::BoolValue(v) => ScalarValue::Boolean(Some(*v)),
-            protobuf::scalar_value::Value::Utf8Value(v) => {
-                ScalarValue::Utf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::LargeUtf8Value(v) => {
-                ScalarValue::LargeUtf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::Int8Value(v) => {
-                ScalarValue::Int8(Some(*v as i8))
-            }
-            protobuf::scalar_value::Value::Int16Value(v) => {
-                ScalarValue::Int16(Some(*v as i16))
-            }
-            protobuf::scalar_value::Value::Int32Value(v) => ScalarValue::Int32(Some(*v)),
-            protobuf::scalar_value::Value::Int64Value(v) => ScalarValue::Int64(Some(*v)),
-            protobuf::scalar_value::Value::Uint8Value(v) => {
-                ScalarValue::UInt8(Some(*v as u8))
-            }
-            protobuf::scalar_value::Value::Uint16Value(v) => {
-                ScalarValue::UInt16(Some(*v as u16))
-            }
-            protobuf::scalar_value::Value::Uint32Value(v) => {
-                ScalarValue::UInt32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Uint64Value(v) => {
-                ScalarValue::UInt64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float32Value(v) => {
-                ScalarValue::Float32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float64Value(v) => {
-                ScalarValue::Float64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Date32Value(v) => {
-                ScalarValue::Date32(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeMicrosecondValue(v) => {
-                ScalarValue::TimeMicrosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeNanosecondValue(v) => {
-                ScalarValue::TimeNanosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::ListValue(v) => v.try_into()?,
-            protobuf::scalar_value::Value::NullListValue(v) => {
-                ScalarValue::List(None, v.try_into()?)
-            }
-            protobuf::scalar_value::Value::NullValue(null_enum) => {
-                PrimitiveScalarType::from_i32(*null_enum)
-                    .ok_or_else(|| proto_error("Invalid scalar type"))?
-                    .try_into()?
-            }
-        };
-        Ok(scalar)
-    }
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::ScalarListValue {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        use protobuf::scalar_type::Datatype;
-        use protobuf::PrimitiveScalarType;
-        let protobuf::ScalarListValue { datatype, values } = self;
-        let pb_scalar_type = datatype
-            .as_ref()
-            .ok_or_else(|| proto_error("Protobuf deserialization error: ScalarListValue messsage missing required field 'datatype'"))?;
-        let scalar_type = pb_scalar_type
-            .datatype
-            .as_ref()
-            .ok_or_else(|| proto_error("Protobuf deserialization error: ScalarListValue.Datatype messsage missing required field 'datatype'"))?;
-        let scalar_values = match scalar_type {
-            Datatype::Scalar(scalar_type_i32) => {
-                let leaf_scalar_type =
-                    protobuf::PrimitiveScalarType::from_i32(*scalar_type_i32)
-                        .ok_or_else(|| {
-                            proto_error("Error converting i32 to basic scalar type")
-                        })?;
-                let typechecked_values: Vec<datafusion::scalar::ScalarValue> = values
-                    .iter()
-                    .map(|protobuf::ScalarValue { value: opt_value }| {
-                        let value = opt_value.as_ref().ok_or_else(|| {
-                            proto_error(
-                                "Protobuf deserialization error: missing required field 'value'",
-                            )
-                        })?;
-                        typechecked_scalar_value_conversion(value, leaf_scalar_type)
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-                datafusion::scalar::ScalarValue::List(
-                    Some(typechecked_values),
-                    leaf_scalar_type.into(),
-                )
-            }
-            Datatype::List(list_type) => {
-                let protobuf::ScalarListType {
-                    deepest_type,
-                    field_names,
-                } = &list_type;
-                let leaf_type =
-                    PrimitiveScalarType::from_i32(*deepest_type).ok_or_else(|| {
-                        proto_error("Error converting i32 to basic scalar type")
-                    })?;
-                let depth = field_names.len();
-
-                let typechecked_values: Vec<datafusion::scalar::ScalarValue> = if depth
-                    == 0
-                {
-                    return Err(proto_error(
-                        "Protobuf deserialization error, ScalarListType had no field names, requires at least one",
-                    ));
-                } else if depth == 1 {
-                    values
-                        .iter()
-                        .map(|protobuf::ScalarValue { value: opt_value }| {
-                            let value = opt_value
-                                .as_ref()
-                                .ok_or_else(|| proto_error("Protobuf deserialization error: missing required field 'value'"))?;
-                            typechecked_scalar_value_conversion(value, leaf_type)
-                        })
-                        .collect::<Result<Vec<_>, _>>()?
-                } else {
-                    values
-                        .iter()
-                        .map(|protobuf::ScalarValue { value: opt_value }| {
-                            let value = opt_value
-                                .as_ref()
-                                .ok_or_else(|| proto_error("Protobuf deserialization error: missing required field 'value'"))?;
-                            value.try_into()
-                        })
-                        .collect::<Result<Vec<_>, _>>()?
-                };
-                datafusion::scalar::ScalarValue::List(
-                    match typechecked_values.len() {
-                        0 => None,
-                        _ => Some(typechecked_values),
-                    },
-                    list_type.try_into()?,
-                )
-            }
-        };
-        Ok(scalar_values)
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::ScalarListType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        use protobuf::PrimitiveScalarType;
-        let protobuf::ScalarListType {
-            deepest_type,
-            field_names,
-        } = self;
-
-        let depth = field_names.len();
-        if depth == 0 {
-            return Err(proto_error(
-                "Protobuf deserialization error: Found a ScalarListType message with no field names, at least one is required",
-            ));
-        }
-
-        let mut curr_type = arrow::datatypes::DataType::List(Box::new(Field::new(
-            //Since checked vector is not empty above this is safe to unwrap
-            field_names.last().unwrap(),
-            PrimitiveScalarType::from_i32(*deepest_type)
-                .ok_or_else(|| {
-                    proto_error("Could not convert to datafusion scalar type")
-                })?
-                .into(),
-            true,
-        )));
-        //Iterates over field names in reverse order except for the last item in the vector
-        for name in field_names.iter().rev().skip(1) {
-            let temp_curr_type = arrow::datatypes::DataType::List(Box::new(Field::new(
-                name, curr_type, true,
-            )));
-            curr_type = temp_curr_type;
-        }
-        Ok(curr_type)
-    }
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for protobuf::PrimitiveScalarType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        use datafusion::scalar::ScalarValue;
-        Ok(match self {
-            protobuf::PrimitiveScalarType::Null => {
-                return Err(proto_error("Untyped null is an invalid scalar value"))
-            }
-            protobuf::PrimitiveScalarType::Bool => ScalarValue::Boolean(None),
-            protobuf::PrimitiveScalarType::Uint8 => ScalarValue::UInt8(None),
-            protobuf::PrimitiveScalarType::Int8 => ScalarValue::Int8(None),
-            protobuf::PrimitiveScalarType::Uint16 => ScalarValue::UInt16(None),
-            protobuf::PrimitiveScalarType::Int16 => ScalarValue::Int16(None),
-            protobuf::PrimitiveScalarType::Uint32 => ScalarValue::UInt32(None),
-            protobuf::PrimitiveScalarType::Int32 => ScalarValue::Int32(None),
-            protobuf::PrimitiveScalarType::Uint64 => ScalarValue::UInt64(None),
-            protobuf::PrimitiveScalarType::Int64 => ScalarValue::Int64(None),
-            protobuf::PrimitiveScalarType::Float32 => ScalarValue::Float32(None),
-            protobuf::PrimitiveScalarType::Float64 => ScalarValue::Float64(None),
-            protobuf::PrimitiveScalarType::Utf8 => ScalarValue::Utf8(None),
-            protobuf::PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None),
-            protobuf::PrimitiveScalarType::Date32 => ScalarValue::Date32(None),
-            protobuf::PrimitiveScalarType::TimeMicrosecond => {
-                ScalarValue::TimeMicrosecond(None)
-            }
-            protobuf::PrimitiveScalarType::TimeNanosecond => {
-                ScalarValue::TimeNanosecond(None)
-            }
-        })
-    }
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::ScalarValue {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        let value = self.value.as_ref().ok_or_else(|| {
-            proto_error("Protobuf deserialization error: missing required field 'value'")
-        })?;
-        Ok(match value {
-            protobuf::scalar_value::Value::BoolValue(v) => ScalarValue::Boolean(Some(*v)),
-            protobuf::scalar_value::Value::Utf8Value(v) => {
-                ScalarValue::Utf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::LargeUtf8Value(v) => {
-                ScalarValue::LargeUtf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::Int8Value(v) => {
-                ScalarValue::Int8(Some(*v as i8))
-            }
-            protobuf::scalar_value::Value::Int16Value(v) => {
-                ScalarValue::Int16(Some(*v as i16))
-            }
-            protobuf::scalar_value::Value::Int32Value(v) => ScalarValue::Int32(Some(*v)),
-            protobuf::scalar_value::Value::Int64Value(v) => ScalarValue::Int64(Some(*v)),
-            protobuf::scalar_value::Value::Uint8Value(v) => {
-                ScalarValue::UInt8(Some(*v as u8))
-            }
-            protobuf::scalar_value::Value::Uint16Value(v) => {
-                ScalarValue::UInt16(Some(*v as u16))
-            }
-            protobuf::scalar_value::Value::Uint32Value(v) => {
-                ScalarValue::UInt32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Uint64Value(v) => {
-                ScalarValue::UInt64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float32Value(v) => {
-                ScalarValue::Float32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float64Value(v) => {
-                ScalarValue::Float64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Date32Value(v) => {
-                ScalarValue::Date32(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeMicrosecondValue(v) => {
-                ScalarValue::TimeMicrosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeNanosecondValue(v) => {
-                ScalarValue::TimeNanosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::ListValue(scalar_list) => {
-                let protobuf::ScalarListValue {
-                    values,
-                    datatype: opt_scalar_type,
-                } = &scalar_list;
-                let pb_scalar_type = opt_scalar_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization err: ScalaListValue missing required field 'datatype'"))?;
-                let typechecked_values: Vec<ScalarValue> = values
-                    .iter()
-                    .map(|val| val.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                let scalar_type: arrow::datatypes::DataType =
-                    pb_scalar_type.try_into()?;
-                ScalarValue::List(Some(typechecked_values), scalar_type)
-            }
-            protobuf::scalar_value::Value::NullListValue(v) => {
-                let pb_datatype = v
-                    .datatype
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: NullListValue message missing required field 'datatyp'"))?;
-                ScalarValue::List(None, pb_datatype.try_into()?)
-            }
-            protobuf::scalar_value::Value::NullValue(v) => {
-                let null_type_enum = protobuf::PrimitiveScalarType::from_i32(*v)
-                    .ok_or_else(|| proto_error("Protobuf deserialization error found invalid enum variant for DatafusionScalar"))?;
-                null_type_enum.try_into()?
-            }
-        })
-    }
-}
-
-impl TryInto<Expr> for &protobuf::LogicalExprNode {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Expr, Self::Error> {
-        use protobuf::logical_expr_node::ExprType;
-
-        let expr_type = self
-            .expr_type
-            .as_ref()
-            .ok_or_else(|| proto_error("Unexpected empty logical expression"))?;
-        match expr_type {
-            ExprType::BinaryExpr(binary_expr) => Ok(Expr::BinaryExpr {
-                left: Box::new(parse_required_expr(&binary_expr.l)?),
-                op: from_proto_binary_op(&binary_expr.op)?,
-                right: Box::new(parse_required_expr(&binary_expr.r)?),
-            }),
-            ExprType::ColumnName(column_name) => Ok(Expr::Column(column_name.to_owned())),
-            ExprType::Literal(literal) => {
-                use datafusion::scalar::ScalarValue;
-                let scalar_value: datafusion::scalar::ScalarValue = literal.try_into()?;
-                Ok(Expr::Literal(scalar_value))
-            }
-            ExprType::AggregateExpr(expr) => {
-                let aggr_function =
-                    protobuf::AggregateFunction::from_i32(expr.aggr_function)
-                        .ok_or_else(|| {
-                            proto_error(format!(
-                                "Received an unknown aggregate function: {}",
-                                expr.aggr_function
-                            ))
-                        })?;
-                let fun = match aggr_function {
-                    protobuf::AggregateFunction::Min => AggregateFunction::Min,
-                    protobuf::AggregateFunction::Max => AggregateFunction::Max,
-                    protobuf::AggregateFunction::Sum => AggregateFunction::Sum,
-                    protobuf::AggregateFunction::Avg => AggregateFunction::Avg,
-                    protobuf::AggregateFunction::Count => AggregateFunction::Count,
-                };
-
-                Ok(Expr::AggregateFunction {
-                    fun,
-                    args: vec![parse_required_expr(&expr.expr)?],
-                    distinct: false, //TODO
-                })
-            }
-            ExprType::Alias(alias) => Ok(Expr::Alias(
-                Box::new(parse_required_expr(&alias.expr)?),
-                alias.alias.clone(),
-            )),
-            ExprType::IsNullExpr(is_null) => {
-                Ok(Expr::IsNull(Box::new(parse_required_expr(&is_null.expr)?)))
-            }
-            ExprType::IsNotNullExpr(is_not_null) => Ok(Expr::IsNotNull(Box::new(
-                parse_required_expr(&is_not_null.expr)?,
-            ))),
-            ExprType::NotExpr(not) => {
-                Ok(Expr::Not(Box::new(parse_required_expr(&not.expr)?)))
-            }
-            ExprType::Between(between) => Ok(Expr::Between {
-                expr: Box::new(parse_required_expr(&between.expr)?),
-                negated: between.negated,
-                low: Box::new(parse_required_expr(&between.low)?),
-                high: Box::new(parse_required_expr(&between.high)?),
-            }),
-            ExprType::Case(case) => {
-                let when_then_expr = case
-                    .when_then_expr
-                    .iter()
-                    .map(|e| {
-                        Ok((
-                            Box::new(match &e.when_expr {
-                                Some(e) => e.try_into(),
-                                None => Err(proto_error("Missing required expression")),
-                            }?),
-                            Box::new(match &e.then_expr {
-                                Some(e) => e.try_into(),
-                                None => Err(proto_error("Missing required expression")),
-                            }?),
-                        ))
-                    })
-                    .collect::<Result<Vec<(Box<Expr>, Box<Expr>)>, BallistaError>>()?;
-                Ok(Expr::Case {
-                    expr: parse_optional_expr(&case.expr)?.map(Box::new),
-                    when_then_expr,
-                    else_expr: parse_optional_expr(&case.else_expr)?.map(Box::new),
-                })
-            }
-            ExprType::Cast(cast) => {
-                let expr = Box::new(parse_required_expr(&cast.expr)?);
-                let arrow_type: &protobuf::ArrowType = cast
-                    .arrow_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: CastNode message missing required field 'arrow_type'"))?;
-                let data_type = arrow_type.try_into()?;
-                Ok(Expr::Cast { expr, data_type })
-            }
-            ExprType::Sort(sort) => Ok(Expr::Sort {
-                expr: Box::new(parse_required_expr(&sort.expr)?),
-                asc: sort.asc,
-                nulls_first: sort.nulls_first,
-            }),
-            ExprType::Negative(negative) => Ok(Expr::Negative(Box::new(
-                parse_required_expr(&negative.expr)?,
-            ))),
-            ExprType::InList(in_list) => Ok(Expr::InList {
-                expr: Box::new(parse_required_expr(&in_list.expr)?),
-                list: in_list
-                    .list
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, _>>()?,
-                negated: in_list.negated,
-            }),
-            ExprType::Wildcard(_) => Ok(Expr::Wildcard),
-            ExprType::ScalarFunction(expr) => {
-                let scalar_function = protobuf::ScalarFunction::from_i32(expr.fun)
-                    .ok_or_else(|| {
-                        proto_error(format!(
-                            "Received an unknown scalar function: {}",
-                            expr.fun
-                        ))
-                    })?;
-                match scalar_function {
-                    protobuf::ScalarFunction::Sqrt => {
-                        Ok(sqrt((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sin => Ok(sin((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Cos => Ok(cos((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Tan => Ok(tan((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Asin => Ok(asin(&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Acos => Ok(acos(&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Atan => {
-                        Ok(atan((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Exp => Ok(exp((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Log2 => {
-                        Ok(log2((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Log10 => {
-                        Ok(log10((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Floor => {
-                        Ok(floor((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Ceil => {
-                        Ok(ceil((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Round => {
-                        Ok(round((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Trunc => {
-                        Ok(trunc((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Abs => Ok(abs((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Signum => {
-                        Ok(signum((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Octetlength => {
-                        Ok(length((&expr.expr[0]).try_into()?))
-                    }
-                    // // protobuf::ScalarFunction::Concat => Ok(concat((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Lower => {
-                        Ok(lower((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Upper => {
-                        Ok(upper((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Trim => {
-                        Ok(trim((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Ltrim => {
-                        Ok(ltrim((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Rtrim => {
-                        Ok(rtrim((&expr.expr[0]).try_into()?))
-                    }
-                    // protobuf::ScalarFunction::Totimestamp => Ok(to_timestamp((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Array => Ok(array((&expr.expr[0]).try_into()?)),
-                    // // protobuf::ScalarFunction::Nullif => Ok(nulli((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Datetrunc => Ok(date_trunc((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Md5 => Ok(md5((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Sha224 => {
-                        Ok(sha224((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sha256 => {
-                        Ok(sha256((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sha384 => {
-                        Ok(sha384((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sha512 => {
-                        Ok(sha512((&expr.expr[0]).try_into()?))
-                    }
-                    _ => Err(proto_error(
-                        "Protobuf deserialization error: Unsupported scalar function",
-                    )),
-                }
-            }
-        }
-    }
-}
-
-fn from_proto_binary_op(op: &str) -> Result<Operator, BallistaError> {
-    match op {
-        "And" => Ok(Operator::And),
-        "Or" => Ok(Operator::Or),
-        "Eq" => Ok(Operator::Eq),
-        "NotEq" => Ok(Operator::NotEq),
-        "LtEq" => Ok(Operator::LtEq),
-        "Lt" => Ok(Operator::Lt),
-        "Gt" => Ok(Operator::Gt),
-        "GtEq" => Ok(Operator::GtEq),
-        "Plus" => Ok(Operator::Plus),
-        "Minus" => Ok(Operator::Minus),
-        "Multiply" => Ok(Operator::Multiply),
-        "Divide" => Ok(Operator::Divide),
-        "Like" => Ok(Operator::Like),
-        other => Err(proto_error(format!(
-            "Unsupported binary operator '{:?}'",
-            other
-        ))),
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::ScalarType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        let pb_scalartype = self.datatype.as_ref().ok_or_else(|| {
-            proto_error("ScalarType message missing required field 'datatype'")
-        })?;
-        pb_scalartype.try_into()
-    }
-}
-
-impl TryInto<Schema> for &protobuf::Schema {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Schema, BallistaError> {
-        let fields = self
-            .columns
-            .iter()
-            .map(|c| {
-                let pb_arrow_type_res = c
-                    .arrow_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Field message was missing required field 'arrow_type'"));
-                let pb_arrow_type: &protobuf::ArrowType = match pb_arrow_type_res {
-                    Ok(res) => res,
-                    Err(e) => return Err(e),
-                };
-                Ok(Field::new(&c.name, pb_arrow_type.try_into()?, c.nullable))
-            })
-            .collect::<Result<Vec<_>, _>>()?;
-        Ok(Schema::new(fields))
-    }
-}
-
-impl TryInto<arrow::datatypes::Field> for &protobuf::Field {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::Field, Self::Error> {
-        let pb_datatype = self.arrow_type.as_ref().ok_or_else(|| {
-            proto_error(
-                "Protobuf deserialization error: Field message missing required field 'arrow_type'",
-            )
-        })?;
-
-        Ok(arrow::datatypes::Field::new(
-            self.name.as_str(),
-            pb_datatype.as_ref().try_into()?,
-            self.nullable,
-        ))
-    }
-}
-
-use datafusion::physical_plan::datetime_expressions::{date_trunc, to_timestamp};
-use datafusion::prelude::{
-    array, length, lower, ltrim, md5, rtrim, sha224, sha256, sha384, sha512, trim, upper,
-};
-use std::convert::TryFrom;
-
-impl TryFrom<i32> for protobuf::FileType {
-    type Error = BallistaError;
-    fn try_from(value: i32) -> Result<Self, Self::Error> {
-        use protobuf::FileType;
-        match value {
-            _x if _x == FileType::NdJson as i32 => Ok(FileType::NdJson),
-            _x if _x == FileType::Parquet as i32 => Ok(FileType::Parquet),
-            _x if _x == FileType::Csv as i32 => Ok(FileType::Csv),
-            invalid => Err(BallistaError::General(format!(
-                "Attempted to convert invalid i32 to protobuf::Filetype: {}",
-                invalid
-            ))),
-        }
-    }
-}
-
-impl Into<datafusion::sql::parser::FileType> for protobuf::FileType {
-    fn into(self) -> datafusion::sql::parser::FileType {
-        use datafusion::sql::parser::FileType;
-        match self {
-            protobuf::FileType::NdJson => FileType::NdJson,
-            protobuf::FileType::Parquet => FileType::Parquet,
-            protobuf::FileType::Csv => FileType::CSV,
-        }
-    }
-}
-
-fn parse_required_expr(
-    p: &Option<Box<protobuf::LogicalExprNode>>,
-) -> Result<Expr, BallistaError> {
-    match p {
-        Some(expr) => expr.as_ref().try_into(),
-        None => Err(proto_error("Missing required expression")),
-    }
-}
-
-fn parse_optional_expr(
-    p: &Option<Box<protobuf::LogicalExprNode>>,
-) -> Result<Option<Expr>, BallistaError> {
-    match p {
-        Some(expr) => expr.as_ref().try_into().map(Some),
-        None => Ok(None),
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/mod.rs b/rust/ballista/rust/core/src/serde/logical_plan/mod.rs
deleted file mode 100644
index 50a529b6fa1..00000000000
--- a/rust/ballista/rust/core/src/serde/logical_plan/mod.rs
+++ /dev/null
@@ -1,929 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod from_proto;
-pub mod to_proto;
-
-#[cfg(test)]
-
-mod roundtrip_tests {
-
-    use super::super::{super::error::Result, protobuf};
-    use crate::error::BallistaError;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use core::panic;
-    use datafusion::physical_plan::functions::BuiltinScalarFunction::Sqrt;
-    use datafusion::{
-        logical_plan::{Expr, LogicalPlan, LogicalPlanBuilder},
-        physical_plan::csv::CsvReadOptions,
-        prelude::*,
-        scalar::ScalarValue,
-    };
-    use protobuf::arrow_type;
-    use std::convert::TryInto;
-
-    //Given a identity of a LogicalPlan converts it to protobuf and back, using debug formatting to test equality.
-    macro_rules! roundtrip_test {
-        ($initial_struct:ident, $proto_type:ty, $struct_type:ty) => {
-            let proto: $proto_type = (&$initial_struct).try_into()?;
-
-            let round_trip: $struct_type = (&proto).try_into()?;
-
-            assert_eq!(
-                format!("{:?}", $initial_struct),
-                format!("{:?}", round_trip)
-            );
-        };
-        ($initial_struct:ident, $struct_type:ty) => {
-            roundtrip_test!($initial_struct, protobuf::LogicalPlanNode, $struct_type);
-        };
-        ($initial_struct:ident) => {
-            roundtrip_test!($initial_struct, protobuf::LogicalPlanNode, LogicalPlan);
-        };
-    }
-
-    #[test]
-
-    fn roundtrip_repartition() -> Result<()> {
-        use datafusion::logical_plan::Partitioning;
-
-        let test_batch_sizes = [usize::MIN, usize::MAX, 43256];
-
-        let test_expr: Vec<Expr> = vec![
-            Expr::Column("c1".to_string()) + Expr::Column("c2".to_string()),
-            Expr::Literal((4.0).into()),
-        ];
-
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let plan = std::sync::Arc::new(
-            LogicalPlanBuilder::scan_csv(
-                "employee.csv",
-                CsvReadOptions::new().schema(&schema).has_header(true),
-                Some(vec![3, 4]),
-            )
-            .and_then(|plan| plan.sort(&[col("salary")]))
-            .and_then(|plan| plan.build())
-            .map_err(BallistaError::DataFusionError)?,
-        );
-
-        for batch_size in test_batch_sizes.iter() {
-            let rr_repartition = Partitioning::RoundRobinBatch(*batch_size);
-
-            let roundtrip_plan = LogicalPlan::Repartition {
-                input: plan.clone(),
-                partitioning_scheme: rr_repartition,
-            };
-
-            roundtrip_test!(roundtrip_plan);
-
-            let h_repartition = Partitioning::Hash(test_expr.clone(), *batch_size);
-
-            let roundtrip_plan = LogicalPlan::Repartition {
-                input: plan.clone(),
-                partitioning_scheme: h_repartition,
-            };
-
-            roundtrip_test!(roundtrip_plan);
-
-            let no_expr_hrepartition = Partitioning::Hash(Vec::new(), *batch_size);
-
-            let roundtrip_plan = LogicalPlan::Repartition {
-                input: plan.clone(),
-                partitioning_scheme: no_expr_hrepartition,
-            };
-
-            roundtrip_test!(roundtrip_plan);
-        }
-
-        Ok(())
-    }
-
-    fn new_box_field(
-        name: &str,
-        dt: DataType,
-        nullable: bool,
-    ) -> Box<arrow::datatypes::Field> {
-        Box::new(arrow::datatypes::Field::new(name, dt, nullable))
-    }
-
-    #[test]
-    fn scalar_values_error_serialization() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use datafusion::scalar::ScalarValue;
-        let should_fail_on_seralize: Vec<ScalarValue> = vec![
-            //Should fail due to inconsistent types
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::Int16(None),
-                    ScalarValue::Float32(Some(32.0)),
-                ]),
-                DataType::List(new_box_field("item", DataType::Int16, true)),
-            ),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::Float32(None),
-                    ScalarValue::Float32(Some(32.0)),
-                ]),
-                DataType::List(new_box_field("item", DataType::Int16, true)),
-            ),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::List(
-                        None,
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                    ScalarValue::List(
-                        Some(vec![
-                            ScalarValue::Float32(Some(-213.1)),
-                            ScalarValue::Float32(None),
-                            ScalarValue::Float32(Some(5.5)),
-                            ScalarValue::Float32(Some(2.0)),
-                            ScalarValue::Float32(Some(1.0)),
-                        ]),
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                    ScalarValue::List(
-                        None,
-                        DataType::List(new_box_field(
-                            "lists are typed inconsistently",
-                            DataType::Int16,
-                            true,
-                        )),
-                    ),
-                ]),
-                DataType::List(new_box_field(
-                    "level1",
-                    DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    true,
-                )),
-            ),
-        ];
-
-        for test_case in should_fail_on_seralize.into_iter() {
-            let res: Result<protobuf::ScalarValue> = (&test_case).try_into();
-            if let Ok(val) = res {
-                return Err(BallistaError::General(format!(
-                    "The value {:?} should not have been able to serialize. Serialized to :{:?}",
-                    test_case, val
-                )));
-            }
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn round_trip_scalar_values() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use datafusion::scalar::ScalarValue;
-        let should_pass: Vec<ScalarValue> = vec![
-            ScalarValue::Boolean(None),
-            ScalarValue::Float32(None),
-            ScalarValue::Float64(None),
-            ScalarValue::Int8(None),
-            ScalarValue::Int16(None),
-            ScalarValue::Int32(None),
-            ScalarValue::Int64(None),
-            ScalarValue::UInt8(None),
-            ScalarValue::UInt16(None),
-            ScalarValue::UInt32(None),
-            ScalarValue::UInt64(None),
-            ScalarValue::Utf8(None),
-            ScalarValue::LargeUtf8(None),
-            ScalarValue::List(None, DataType::Boolean),
-            ScalarValue::Date32(None),
-            ScalarValue::TimeMicrosecond(None),
-            ScalarValue::TimeNanosecond(None),
-            ScalarValue::Boolean(Some(true)),
-            ScalarValue::Boolean(Some(false)),
-            ScalarValue::Float32(Some(1.0)),
-            ScalarValue::Float32(Some(f32::MAX)),
-            ScalarValue::Float32(Some(f32::MIN)),
-            ScalarValue::Float32(Some(-2000.0)),
-            ScalarValue::Float64(Some(1.0)),
-            ScalarValue::Float64(Some(f64::MAX)),
-            ScalarValue::Float64(Some(f64::MIN)),
-            ScalarValue::Float64(Some(-2000.0)),
-            ScalarValue::Int8(Some(i8::MIN)),
-            ScalarValue::Int8(Some(i8::MAX)),
-            ScalarValue::Int8(Some(0)),
-            ScalarValue::Int8(Some(-15)),
-            ScalarValue::Int16(Some(i16::MIN)),
-            ScalarValue::Int16(Some(i16::MAX)),
-            ScalarValue::Int16(Some(0)),
-            ScalarValue::Int16(Some(-15)),
-            ScalarValue::Int32(Some(i32::MIN)),
-            ScalarValue::Int32(Some(i32::MAX)),
-            ScalarValue::Int32(Some(0)),
-            ScalarValue::Int32(Some(-15)),
-            ScalarValue::Int64(Some(i64::MIN)),
-            ScalarValue::Int64(Some(i64::MAX)),
-            ScalarValue::Int64(Some(0)),
-            ScalarValue::Int64(Some(-15)),
-            ScalarValue::UInt8(Some(u8::MAX)),
-            ScalarValue::UInt8(Some(0)),
-            ScalarValue::UInt16(Some(u16::MAX)),
-            ScalarValue::UInt16(Some(0)),
-            ScalarValue::UInt32(Some(u32::MAX)),
-            ScalarValue::UInt32(Some(0)),
-            ScalarValue::UInt64(Some(u64::MAX)),
-            ScalarValue::UInt64(Some(0)),
-            ScalarValue::Utf8(Some(String::from("Test string   "))),
-            ScalarValue::LargeUtf8(Some(String::from("Test Large utf8"))),
-            ScalarValue::Date32(Some(0)),
-            ScalarValue::Date32(Some(i32::MAX)),
-            ScalarValue::TimeNanosecond(Some(0)),
-            ScalarValue::TimeNanosecond(Some(i64::MAX)),
-            ScalarValue::TimeMicrosecond(Some(0)),
-            ScalarValue::TimeMicrosecond(Some(i64::MAX)),
-            ScalarValue::TimeMicrosecond(None),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::Float32(Some(-213.1)),
-                    ScalarValue::Float32(None),
-                    ScalarValue::Float32(Some(5.5)),
-                    ScalarValue::Float32(Some(2.0)),
-                    ScalarValue::Float32(Some(1.0)),
-                ]),
-                DataType::List(new_box_field("level1", DataType::Float32, true)),
-            ),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::List(
-                        None,
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                    ScalarValue::List(
-                        Some(vec![
-                            ScalarValue::Float32(Some(-213.1)),
-                            ScalarValue::Float32(None),
-                            ScalarValue::Float32(Some(5.5)),
-                            ScalarValue::Float32(Some(2.0)),
-                            ScalarValue::Float32(Some(1.0)),
-                        ]),
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                ]),
-                DataType::List(new_box_field(
-                    "level1",
-                    DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    true,
-                )),
-            ),
-        ];
-
-        for test_case in should_pass.into_iter() {
-            let proto: protobuf::ScalarValue = (&test_case).try_into()?;
-            let _roundtrip: ScalarValue = (&proto).try_into()?;
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn round_trip_scalar_types() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use arrow::datatypes::{IntervalUnit, TimeUnit};
-        let should_pass: Vec<DataType> = vec![
-            DataType::Boolean,
-            DataType::Int8,
-            DataType::Int16,
-            DataType::Int32,
-            DataType::Int64,
-            DataType::UInt8,
-            DataType::UInt16,
-            DataType::UInt32,
-            DataType::UInt64,
-            DataType::Float32,
-            DataType::Float64,
-            DataType::Date32,
-            DataType::Time64(TimeUnit::Microsecond),
-            DataType::Time64(TimeUnit::Nanosecond),
-            DataType::Utf8,
-            DataType::LargeUtf8,
-            //Recursive list tests
-            DataType::List(new_box_field("Level1", DataType::Boolean, true)),
-            DataType::List(new_box_field(
-                "Level1",
-                DataType::List(new_box_field("Level2", DataType::Date32, true)),
-                true,
-            )),
-        ];
-
-        let should_fail: Vec<DataType> = vec![
-            DataType::Null,
-            DataType::Float16,
-            //Add more timestamp tests
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            DataType::Date64,
-            DataType::Time32(TimeUnit::Second),
-            DataType::Time32(TimeUnit::Millisecond),
-            DataType::Time32(TimeUnit::Microsecond),
-            DataType::Time32(TimeUnit::Nanosecond),
-            DataType::Time64(TimeUnit::Second),
-            DataType::Time64(TimeUnit::Millisecond),
-            DataType::Duration(TimeUnit::Second),
-            DataType::Duration(TimeUnit::Millisecond),
-            DataType::Duration(TimeUnit::Microsecond),
-            DataType::Duration(TimeUnit::Nanosecond),
-            DataType::Interval(IntervalUnit::YearMonth),
-            DataType::Interval(IntervalUnit::DayTime),
-            DataType::Binary,
-            DataType::FixedSizeBinary(0),
-            DataType::FixedSizeBinary(1234),
-            DataType::FixedSizeBinary(-432),
-            DataType::LargeBinary,
-            DataType::Decimal(1345, 5431),
-            //Recursive list tests
-            DataType::List(new_box_field("Level1", DataType::Binary, true)),
-            DataType::List(new_box_field(
-                "Level1",
-                DataType::List(new_box_field(
-                    "Level2",
-                    DataType::FixedSizeBinary(53),
-                    false,
-                )),
-                true,
-            )),
-            //Fixed size lists
-            DataType::FixedSizeList(new_box_field("Level1", DataType::Binary, true), 4),
-            DataType::FixedSizeList(
-                new_box_field(
-                    "Level1",
-                    DataType::List(new_box_field(
-                        "Level2",
-                        DataType::FixedSizeBinary(53),
-                        false,
-                    )),
-                    true,
-                ),
-                41,
-            ),
-            //Struct Testing
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Dictionary(
-                Box::new(DataType::Utf8),
-                Box::new(DataType::Struct(vec![
-                    Field::new("nullable", DataType::Boolean, false),
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("datatype", DataType::Binary, false),
-                ])),
-            ),
-            DataType::Dictionary(
-                Box::new(DataType::Decimal(10, 50)),
-                Box::new(DataType::FixedSizeList(
-                    new_box_field("Level1", DataType::Binary, true),
-                    4,
-                )),
-            ),
-        ];
-
-        for test_case in should_pass.into_iter() {
-            let proto: protobuf::ScalarType = (&test_case).try_into()?;
-            let roundtrip: DataType = (&proto).try_into()?;
-            assert_eq!(format!("{:?}", test_case), format!("{:?}", roundtrip));
-        }
-
-        let mut success: Vec<DataType> = Vec::new();
-        for test_case in should_fail.into_iter() {
-            let proto: Result<protobuf::ScalarType> = (&test_case).try_into();
-            if proto.is_ok() {
-                success.push(test_case)
-            }
-        }
-        if !success.is_empty() {
-            return Err(BallistaError::General(format!(
-                "The following items which should have ressulted in an error completed successfully: {:?}",
-                success
-            )));
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn round_trip_datatype() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use arrow::datatypes::{IntervalUnit, TimeUnit};
-        let test_cases: Vec<DataType> = vec![
-            DataType::Null,
-            DataType::Boolean,
-            DataType::Int8,
-            DataType::Int16,
-            DataType::Int32,
-            DataType::Int64,
-            DataType::UInt8,
-            DataType::UInt16,
-            DataType::UInt32,
-            DataType::UInt64,
-            DataType::Float16,
-            DataType::Float32,
-            DataType::Float64,
-            //Add more timestamp tests
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            DataType::Date32,
-            DataType::Date64,
-            DataType::Time32(TimeUnit::Second),
-            DataType::Time32(TimeUnit::Millisecond),
-            DataType::Time32(TimeUnit::Microsecond),
-            DataType::Time32(TimeUnit::Nanosecond),
-            DataType::Time64(TimeUnit::Second),
-            DataType::Time64(TimeUnit::Millisecond),
-            DataType::Time64(TimeUnit::Microsecond),
-            DataType::Time64(TimeUnit::Nanosecond),
-            DataType::Duration(TimeUnit::Second),
-            DataType::Duration(TimeUnit::Millisecond),
-            DataType::Duration(TimeUnit::Microsecond),
-            DataType::Duration(TimeUnit::Nanosecond),
-            DataType::Interval(IntervalUnit::YearMonth),
-            DataType::Interval(IntervalUnit::DayTime),
-            DataType::Binary,
-            DataType::FixedSizeBinary(0),
-            DataType::FixedSizeBinary(1234),
-            DataType::FixedSizeBinary(-432),
-            DataType::LargeBinary,
-            DataType::Utf8,
-            DataType::LargeUtf8,
-            DataType::Decimal(1345, 5431),
-            //Recursive list tests
-            DataType::List(new_box_field("Level1", DataType::Binary, true)),
-            DataType::List(new_box_field(
-                "Level1",
-                DataType::List(new_box_field(
-                    "Level2",
-                    DataType::FixedSizeBinary(53),
-                    false,
-                )),
-                true,
-            )),
-            //Fixed size lists
-            DataType::FixedSizeList(new_box_field("Level1", DataType::Binary, true), 4),
-            DataType::FixedSizeList(
-                new_box_field(
-                    "Level1",
-                    DataType::List(new_box_field(
-                        "Level2",
-                        DataType::FixedSizeBinary(53),
-                        false,
-                    )),
-                    true,
-                ),
-                41,
-            ),
-            //Struct Testing
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Dictionary(
-                Box::new(DataType::Utf8),
-                Box::new(DataType::Struct(vec![
-                    Field::new("nullable", DataType::Boolean, false),
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("datatype", DataType::Binary, false),
-                ])),
-            ),
-            DataType::Dictionary(
-                Box::new(DataType::Decimal(10, 50)),
-                Box::new(DataType::FixedSizeList(
-                    new_box_field("Level1", DataType::Binary, true),
-                    4,
-                )),
-            ),
-        ];
-
-        for test_case in test_cases.into_iter() {
-            let proto: protobuf::ArrowType = (&test_case).into();
-            let roundtrip: DataType = (&proto).try_into()?;
-            assert_eq!(format!("{:?}", test_case), format!("{:?}", roundtrip));
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_null_scalar_values() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use arrow::datatypes::Field;
-        use datafusion::scalar::ScalarValue;
-        let test_types = vec![
-            ScalarValue::Boolean(None),
-            ScalarValue::Float32(None),
-            ScalarValue::Float64(None),
-            ScalarValue::Int8(None),
-            ScalarValue::Int16(None),
-            ScalarValue::Int32(None),
-            ScalarValue::Int64(None),
-            ScalarValue::UInt8(None),
-            ScalarValue::UInt16(None),
-            ScalarValue::UInt32(None),
-            ScalarValue::UInt64(None),
-            ScalarValue::Utf8(None),
-            ScalarValue::LargeUtf8(None),
-            ScalarValue::Date32(None),
-            ScalarValue::TimeMicrosecond(None),
-            ScalarValue::TimeNanosecond(None),
-            //ScalarValue::List(None, DataType::Boolean)
-        ];
-
-        for test_case in test_types.into_iter() {
-            let proto_scalar: protobuf::ScalarValue = (&test_case).try_into()?;
-            let returned_scalar: datafusion::scalar::ScalarValue =
-                (&proto_scalar).try_into()?;
-            assert_eq!(
-                format!("{:?}", &test_case),
-                format!("{:?}", returned_scalar)
-            );
-        }
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_create_external_table() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        use datafusion::logical_plan::ToDFSchema;
-
-        let df_schema_ref = schema.to_dfschema_ref()?;
-
-        use datafusion::sql::parser::FileType;
-
-        let filetypes: [FileType; 3] =
-            [FileType::NdJson, FileType::Parquet, FileType::CSV];
-
-        for file in filetypes.iter() {
-            let create_table_node = LogicalPlan::CreateExternalTable {
-                schema: df_schema_ref.clone(),
-                name: String::from("TestName"),
-                location: String::from("employee.csv"),
-                file_type: *file,
-                has_header: true,
-            };
-
-            roundtrip_test!(create_table_node);
-        }
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_explain() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let verbose_plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.sort(&[col("salary")]))
-        .and_then(|plan| plan.explain(true))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.sort(&[col("salary")]))
-        .and_then(|plan| plan.explain(false))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan);
-
-        roundtrip_test!(verbose_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_join() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let scan_plan = LogicalPlanBuilder::empty(false)
-            .build()
-            .map_err(BallistaError::DataFusionError)?;
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.join(&scan_plan, JoinType::Inner, &["id"], &["id"]))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan);
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_sort() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.sort(&[col("salary")]))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-        roundtrip_test!(plan);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_empty_relation() -> Result<()> {
-        let plan_false = LogicalPlanBuilder::empty(false)
-            .build()
-            .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan_false);
-
-        let plan_true = LogicalPlanBuilder::empty(true)
-            .build()
-            .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan_true);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_logical_plan() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.aggregate(&[col("state")], &[max(col("salary"))]))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_not() -> Result<()> {
-        let test_expr = Expr::Not(Box::new(Expr::Literal((1.0).into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_is_null() -> Result<()> {
-        let test_expr = Expr::IsNull(Box::new(Expr::Column("id".into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_is_not_null() -> Result<()> {
-        let test_expr = Expr::IsNotNull(Box::new(Expr::Column("id".into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_between() -> Result<()> {
-        let test_expr = Expr::Between {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            negated: true,
-            low: Box::new(Expr::Literal((2.0).into())),
-            high: Box::new(Expr::Literal((3.0).into())),
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_case() -> Result<()> {
-        let test_expr = Expr::Case {
-            expr: Some(Box::new(Expr::Literal((1.0).into()))),
-            when_then_expr: vec![(
-                Box::new(Expr::Literal((2.0).into())),
-                Box::new(Expr::Literal((3.0).into())),
-            )],
-            else_expr: Some(Box::new(Expr::Literal((4.0).into()))),
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_cast() -> Result<()> {
-        let test_expr = Expr::Cast {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            data_type: DataType::Boolean,
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_sort_expr() -> Result<()> {
-        let test_expr = Expr::Sort {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            asc: true,
-            nulls_first: true,
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_negative() -> Result<()> {
-        let test_expr = Expr::Negative(Box::new(Expr::Literal((1.0).into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_inlist() -> Result<()> {
-        let test_expr = Expr::InList {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            list: vec![Expr::Literal((2.0).into())],
-            negated: true,
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_wildcard() -> Result<()> {
-        let test_expr = Expr::Wildcard;
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_sqrt() -> Result<()> {
-        let test_expr = Expr::ScalarFunction {
-            fun: Sqrt,
-            args: vec![col("col")],
-        };
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs b/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
deleted file mode 100644
index 69b53502fc9..00000000000
--- a/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
+++ /dev/null
@@ -1,1239 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Serde code to convert Arrow schemas and DataFusion logical plans to Ballista protocol
-//! buffer format, allowing DataFusion logical plans to be serialized and transmitted between
-//! processes.
-
-use std::{
-    boxed,
-    convert::{TryFrom, TryInto},
-};
-
-use crate::datasource::DFTableAdapter;
-use crate::serde::{protobuf, BallistaError};
-
-use arrow::datatypes::{DataType, Schema};
-use datafusion::datasource::CsvFile;
-use datafusion::logical_plan::{Expr, JoinType, LogicalPlan};
-use datafusion::physical_plan::aggregates::AggregateFunction;
-use datafusion::{datasource::parquet::ParquetTable, logical_plan::exprlist_to_fields};
-use protobuf::{
-    arrow_type, logical_expr_node::ExprType, scalar_type, DateUnit, Field,
-    PrimitiveScalarType, ScalarListValue, ScalarType,
-};
-
-use super::super::proto_error;
-use datafusion::physical_plan::functions::BuiltinScalarFunction;
-
-impl protobuf::IntervalUnit {
-    pub fn from_arrow_interval_unit(
-        interval_unit: &arrow::datatypes::IntervalUnit,
-    ) -> Self {
-        match interval_unit {
-            arrow::datatypes::IntervalUnit::YearMonth => {
-                protobuf::IntervalUnit::YearMonth
-            }
-            arrow::datatypes::IntervalUnit::DayTime => protobuf::IntervalUnit::DayTime,
-        }
-    }
-
-    pub fn from_i32_to_arrow(
-        interval_unit_i32: i32,
-    ) -> Result<arrow::datatypes::IntervalUnit, BallistaError> {
-        let pb_interval_unit = protobuf::IntervalUnit::from_i32(interval_unit_i32);
-        use arrow::datatypes::IntervalUnit;
-        match pb_interval_unit {
-            Some(interval_unit) => Ok(match interval_unit {
-                protobuf::IntervalUnit::YearMonth => IntervalUnit::YearMonth,
-                protobuf::IntervalUnit::DayTime => IntervalUnit::DayTime,
-            }),
-            None => Err(proto_error(
-                "Error converting i32 to DateUnit: Passed invalid variant",
-            )),
-        }
-    }
-}
-/* Arrow changed dates to no longer have date unit
-
-impl protobuf::DateUnit {
-    pub fn from_arrow_date_unit(val: &arrow::datatypes::DateUnit) -> Self {
-        match val {
-            arrow::datatypes::DateUnit::Day => protobuf::DateUnit::Day,
-            arrow::datatypes::DateUnit::Millisecond => protobuf::DateUnit::DateMillisecond,
-        }
-    }
-    pub fn from_i32_to_arrow(date_unit_i32: i32) -> Result<arrow::datatypes::DateUnit, BallistaError> {
-        let pb_date_unit = protobuf::DateUnit::from_i32(date_unit_i32);
-        use arrow::datatypes::DateUnit;
-        match pb_date_unit {
-            Some(date_unit) => Ok(match date_unit {
-                protobuf::DateUnit::Day => DateUnit::Day,
-                protobuf::DateUnit::DateMillisecond => DateUnit::Millisecond,
-            }),
-            None => Err(proto_error("Error converting i32 to DateUnit: Passed invalid variant")),
-        }
-    }
-
-}*/
-
-impl protobuf::TimeUnit {
-    pub fn from_arrow_time_unit(val: &arrow::datatypes::TimeUnit) -> Self {
-        match val {
-            arrow::datatypes::TimeUnit::Second => protobuf::TimeUnit::Second,
-            arrow::datatypes::TimeUnit::Millisecond => {
-                protobuf::TimeUnit::TimeMillisecond
-            }
-            arrow::datatypes::TimeUnit::Microsecond => protobuf::TimeUnit::Microsecond,
-            arrow::datatypes::TimeUnit::Nanosecond => protobuf::TimeUnit::Nanosecond,
-        }
-    }
-    pub fn from_i32_to_arrow(
-        time_unit_i32: i32,
-    ) -> Result<arrow::datatypes::TimeUnit, BallistaError> {
-        let pb_time_unit = protobuf::TimeUnit::from_i32(time_unit_i32);
-        use arrow::datatypes::TimeUnit;
-        match pb_time_unit {
-            Some(time_unit) => Ok(match time_unit {
-                protobuf::TimeUnit::Second => TimeUnit::Second,
-                protobuf::TimeUnit::TimeMillisecond => TimeUnit::Millisecond,
-                protobuf::TimeUnit::Microsecond => TimeUnit::Microsecond,
-                protobuf::TimeUnit::Nanosecond => TimeUnit::Nanosecond,
-            }),
-            None => Err(proto_error(
-                "Error converting i32 to TimeUnit: Passed invalid variant",
-            )),
-        }
-    }
-}
-
-impl From<&arrow::datatypes::Field> for protobuf::Field {
-    fn from(field: &arrow::datatypes::Field) -> Self {
-        protobuf::Field {
-            name: field.name().to_owned(),
-            arrow_type: Some(Box::new(field.data_type().into())),
-            nullable: field.is_nullable(),
-            children: Vec::new(),
-        }
-    }
-}
-
-impl From<&arrow::datatypes::DataType> for protobuf::ArrowType {
-    fn from(val: &arrow::datatypes::DataType) -> protobuf::ArrowType {
-        protobuf::ArrowType {
-            arrow_type_enum: Some(val.into()),
-        }
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::ArrowType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        let pb_arrow_type = self.arrow_type_enum.as_ref().ok_or_else(|| {
-            proto_error(
-                "Protobuf deserialization error: ArrowType missing required field 'data_type'",
-            )
-        })?;
-        use arrow::datatypes::DataType;
-        Ok(match pb_arrow_type {
-            protobuf::arrow_type::ArrowTypeEnum::None(_) => DataType::Null,
-            protobuf::arrow_type::ArrowTypeEnum::Bool(_) => DataType::Boolean,
-            protobuf::arrow_type::ArrowTypeEnum::Uint8(_) => DataType::UInt8,
-            protobuf::arrow_type::ArrowTypeEnum::Int8(_) => DataType::Int8,
-            protobuf::arrow_type::ArrowTypeEnum::Uint16(_) => DataType::UInt16,
-            protobuf::arrow_type::ArrowTypeEnum::Int16(_) => DataType::Int16,
-            protobuf::arrow_type::ArrowTypeEnum::Uint32(_) => DataType::UInt32,
-            protobuf::arrow_type::ArrowTypeEnum::Int32(_) => DataType::Int32,
-            protobuf::arrow_type::ArrowTypeEnum::Uint64(_) => DataType::UInt64,
-            protobuf::arrow_type::ArrowTypeEnum::Int64(_) => DataType::Int64,
-            protobuf::arrow_type::ArrowTypeEnum::Float16(_) => DataType::Float16,
-            protobuf::arrow_type::ArrowTypeEnum::Float32(_) => DataType::Float32,
-            protobuf::arrow_type::ArrowTypeEnum::Float64(_) => DataType::Float64,
-            protobuf::arrow_type::ArrowTypeEnum::Utf8(_) => DataType::Utf8,
-            protobuf::arrow_type::ArrowTypeEnum::LargeUtf8(_) => DataType::LargeUtf8,
-            protobuf::arrow_type::ArrowTypeEnum::Binary(_) => DataType::Binary,
-            protobuf::arrow_type::ArrowTypeEnum::FixedSizeBinary(size) => {
-                DataType::FixedSizeBinary(*size)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::LargeBinary(_) => DataType::LargeBinary,
-            protobuf::arrow_type::ArrowTypeEnum::Date32(_) => DataType::Date32,
-            protobuf::arrow_type::ArrowTypeEnum::Date64(_) => DataType::Date64,
-            protobuf::arrow_type::ArrowTypeEnum::Duration(time_unit_i32) => {
-                DataType::Duration(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Timestamp(timestamp) => {
-                DataType::Timestamp(
-                    protobuf::TimeUnit::from_i32_to_arrow(timestamp.time_unit)?,
-                    match timestamp.timezone.is_empty() {
-                        true => None,
-                        false => Some(timestamp.timezone.to_owned()),
-                    },
-                )
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Time32(time_unit_i32) => {
-                DataType::Time32(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Time64(time_unit_i32) => {
-                DataType::Time64(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Interval(interval_unit_i32) => {
-                DataType::Interval(protobuf::IntervalUnit::from_i32_to_arrow(
-                    *interval_unit_i32,
-                )?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Decimal(protobuf::Decimal {
-                whole,
-                fractional,
-            }) => DataType::Decimal(*whole as usize, *fractional as usize),
-            protobuf::arrow_type::ArrowTypeEnum::List(boxed_list) => {
-                let field_ref = boxed_list
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
-                    .as_ref();
-                arrow::datatypes::DataType::List(Box::new(field_ref.try_into()?))
-            }
-            protobuf::arrow_type::ArrowTypeEnum::LargeList(boxed_list) => {
-                let field_ref = boxed_list
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
-                    .as_ref();
-                arrow::datatypes::DataType::LargeList(Box::new(field_ref.try_into()?))
-            }
-            protobuf::arrow_type::ArrowTypeEnum::FixedSizeList(boxed_list) => {
-                let fsl_ref = boxed_list.as_ref();
-                let pb_fieldtype = fsl_ref
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: FixedSizeList message was missing required field 'field_type'"))?;
-                arrow::datatypes::DataType::FixedSizeList(
-                    Box::new(pb_fieldtype.as_ref().try_into()?),
-                    fsl_ref.list_size,
-                )
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Struct(struct_type) => {
-                let fields = struct_type
-                    .sub_field_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                arrow::datatypes::DataType::Struct(fields)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Union(union) => {
-                let union_types = union
-                    .union_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                arrow::datatypes::DataType::Union(union_types)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Dictionary(boxed_dict) => {
-                let dict_ref = boxed_dict.as_ref();
-                let pb_key = dict_ref
-                    .key
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'key'"))?;
-                let pb_value = dict_ref
-                    .value
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'value'"))?;
-                arrow::datatypes::DataType::Dictionary(
-                    Box::new(pb_key.as_ref().try_into()?),
-                    Box::new(pb_value.as_ref().try_into()?),
-                )
-            }
-        })
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &Box<protobuf::List> {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        let list_ref = self.as_ref();
-        match &list_ref.field_type {
-            Some(pb_field) => {
-                let pb_field_ref = pb_field.as_ref();
-                let arrow_field: arrow::datatypes::Field = pb_field_ref.try_into()?;
-                Ok(arrow::datatypes::DataType::List(Box::new(arrow_field)))
-            }
-            None => Err(proto_error(
-                "List message missing required field 'field_type'",
-            )),
-        }
-    }
-}
-
-impl From<&arrow::datatypes::DataType> for protobuf::arrow_type::ArrowTypeEnum {
-    fn from(val: &arrow::datatypes::DataType) -> protobuf::arrow_type::ArrowTypeEnum {
-        use protobuf::arrow_type::ArrowTypeEnum;
-        use protobuf::ArrowType;
-        use protobuf::EmptyMessage;
-        match val {
-            DataType::Null => ArrowTypeEnum::None(EmptyMessage {}),
-            DataType::Boolean => ArrowTypeEnum::Bool(EmptyMessage {}),
-            DataType::Int8 => ArrowTypeEnum::Int8(EmptyMessage {}),
-            DataType::Int16 => ArrowTypeEnum::Int16(EmptyMessage {}),
-            DataType::Int32 => ArrowTypeEnum::Int32(EmptyMessage {}),
-            DataType::Int64 => ArrowTypeEnum::Int64(EmptyMessage {}),
-            DataType::UInt8 => ArrowTypeEnum::Uint8(EmptyMessage {}),
-            DataType::UInt16 => ArrowTypeEnum::Uint16(EmptyMessage {}),
-            DataType::UInt32 => ArrowTypeEnum::Uint32(EmptyMessage {}),
-            DataType::UInt64 => ArrowTypeEnum::Uint64(EmptyMessage {}),
-            DataType::Float16 => ArrowTypeEnum::Float16(EmptyMessage {}),
-            DataType::Float32 => ArrowTypeEnum::Float32(EmptyMessage {}),
-            DataType::Float64 => ArrowTypeEnum::Float64(EmptyMessage {}),
-            DataType::Timestamp(time_unit, timezone) => {
-                ArrowTypeEnum::Timestamp(protobuf::Timestamp {
-                    time_unit: protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-                    timezone: timezone.to_owned().unwrap_or_else(String::new),
-                })
-            }
-            DataType::Date32 => ArrowTypeEnum::Date32(EmptyMessage {}),
-            DataType::Date64 => ArrowTypeEnum::Date64(EmptyMessage {}),
-            DataType::Time32(time_unit) => ArrowTypeEnum::Time32(
-                protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-            ),
-            DataType::Time64(time_unit) => ArrowTypeEnum::Time64(
-                protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-            ),
-            DataType::Duration(time_unit) => ArrowTypeEnum::Duration(
-                protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-            ),
-            DataType::Interval(interval_unit) => ArrowTypeEnum::Interval(
-                protobuf::IntervalUnit::from_arrow_interval_unit(interval_unit) as i32,
-            ),
-            DataType::Binary => ArrowTypeEnum::Binary(EmptyMessage {}),
-            DataType::FixedSizeBinary(size) => ArrowTypeEnum::FixedSizeBinary(*size),
-            DataType::LargeBinary => ArrowTypeEnum::LargeBinary(EmptyMessage {}),
-            DataType::Utf8 => ArrowTypeEnum::Utf8(EmptyMessage {}),
-            DataType::LargeUtf8 => ArrowTypeEnum::LargeUtf8(EmptyMessage {}),
-            DataType::List(item_type) => ArrowTypeEnum::List(Box::new(protobuf::List {
-                field_type: Some(Box::new(item_type.as_ref().into())),
-            })),
-            DataType::FixedSizeList(item_type, size) => {
-                ArrowTypeEnum::FixedSizeList(Box::new(protobuf::FixedSizeList {
-                    field_type: Some(Box::new(item_type.as_ref().into())),
-                    list_size: *size,
-                }))
-            }
-            DataType::LargeList(item_type) => {
-                ArrowTypeEnum::LargeList(Box::new(protobuf::List {
-                    field_type: Some(Box::new(item_type.as_ref().into())),
-                }))
-            }
-            DataType::Struct(struct_fields) => ArrowTypeEnum::Struct(protobuf::Struct {
-                sub_field_types: struct_fields
-                    .iter()
-                    .map(|field| field.into())
-                    .collect::<Vec<_>>(),
-            }),
-            DataType::Union(union_types) => ArrowTypeEnum::Union(protobuf::Union {
-                union_types: union_types
-                    .iter()
-                    .map(|field| field.into())
-                    .collect::<Vec<_>>(),
-            }),
-            DataType::Dictionary(key_type, value_type) => {
-                ArrowTypeEnum::Dictionary(Box::new(protobuf::Dictionary {
-                    key: Some(Box::new(key_type.as_ref().into())),
-                    value: Some(Box::new(value_type.as_ref().into())),
-                }))
-            }
-            DataType::Decimal(whole, fractional) => {
-                ArrowTypeEnum::Decimal(protobuf::Decimal {
-                    whole: *whole as u64,
-                    fractional: *fractional as u64,
-                })
-            }
-        }
-    }
-}
-
-//Does not check if list subtypes are valid
-fn is_valid_scalar_type_no_list_check(datatype: &arrow::datatypes::DataType) -> bool {
-    match datatype {
-        DataType::Boolean
-        | DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::Int64
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64
-        | DataType::Float32
-        | DataType::Float64
-        | DataType::LargeUtf8
-        | DataType::Utf8
-        | DataType::Date32 => true,
-        DataType::Time64(time_unit) => matches!(
-            time_unit,
-            arrow::datatypes::TimeUnit::Microsecond
-                | arrow::datatypes::TimeUnit::Nanosecond
-        ),
-
-        DataType::List(_) => true,
-        _ => false,
-    }
-}
-
-impl TryFrom<&arrow::datatypes::DataType> for protobuf::scalar_type::Datatype {
-    type Error = BallistaError;
-    fn try_from(val: &arrow::datatypes::DataType) -> Result<Self, Self::Error> {
-        use protobuf::scalar_type;
-        use protobuf::Field;
-        use protobuf::{List, PrimitiveScalarType};
-        let scalar_value = match val {
-            DataType::Boolean => scalar_type::Datatype::Scalar(PrimitiveScalarType::Bool as i32),
-            DataType::Int8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int8 as i32),
-            DataType::Int16 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int16 as i32),
-            DataType::Int32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int32 as i32),
-            DataType::Int64 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int64 as i32),
-            DataType::UInt8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint8 as i32),
-            DataType::UInt16 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint16 as i32),
-            DataType::UInt32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint32 as i32),
-            DataType::UInt64 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint64 as i32),
-            DataType::Float32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Float32 as i32),
-            DataType::Float64 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Float64 as i32),
-            DataType::Date32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Date32 as i32),
-            DataType::Time64(time_unit) => match time_unit {
-                arrow::datatypes::TimeUnit::Microsecond => scalar_type::Datatype::Scalar(PrimitiveScalarType::TimeMicrosecond as i32),
-                arrow::datatypes::TimeUnit::Nanosecond => scalar_type::Datatype::Scalar(PrimitiveScalarType::TimeNanosecond as i32),
-                _ => {
-                    return Err(proto_error(format!(
-                        "Found invalid time unit for scalar value, only TimeUnit::Microsecond and TimeUnit::Nanosecond are valid time units: {:?}",
-                        time_unit
-                    )))
-                }
-            },
-            DataType::Utf8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Utf8 as i32),
-            DataType::LargeUtf8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::LargeUtf8 as i32),
-            DataType::List(field_type) => {
-                let mut field_names: Vec<String> = Vec::new();
-                let mut curr_field: &arrow::datatypes::Field = field_type.as_ref();
-                field_names.push(curr_field.name().to_owned());
-                //For each nested field check nested datatype, since datafusion scalars only support recursive lists with a leaf scalar type
-                // any other compound types are errors.
-
-                while let DataType::List(nested_field_type) = curr_field.data_type() {
-                    curr_field = nested_field_type.as_ref();
-                    field_names.push(curr_field.name().to_owned());
-                    if !is_valid_scalar_type_no_list_check(curr_field.data_type()) {
-                        return Err(proto_error(format!("{:?} is an invalid scalar type", curr_field)));
-                    }
-                }
-                let deepest_datatype = curr_field.data_type();
-                if !is_valid_scalar_type_no_list_check(deepest_datatype) {
-                    return Err(proto_error(format!("The list nested type {:?} is an invalid scalar type", curr_field)));
-                }
-                let pb_deepest_type: PrimitiveScalarType = match deepest_datatype {
-                    DataType::Boolean => PrimitiveScalarType::Bool,
-                    DataType::Int8 => PrimitiveScalarType::Int8,
-                    DataType::Int16 => PrimitiveScalarType::Int16,
-                    DataType::Int32 => PrimitiveScalarType::Int32,
-                    DataType::Int64 => PrimitiveScalarType::Int64,
-                    DataType::UInt8 => PrimitiveScalarType::Uint8,
-                    DataType::UInt16 => PrimitiveScalarType::Uint16,
-                    DataType::UInt32 => PrimitiveScalarType::Uint32,
-                    DataType::UInt64 => PrimitiveScalarType::Uint64,
-                    DataType::Float32 => PrimitiveScalarType::Float32,
-                    DataType::Float64 => PrimitiveScalarType::Float64,
-                    DataType::Date32 => PrimitiveScalarType::Date32,
-                    DataType::Time64(time_unit) => match time_unit {
-                        arrow::datatypes::TimeUnit::Microsecond => PrimitiveScalarType::TimeMicrosecond,
-                        arrow::datatypes::TimeUnit::Nanosecond => PrimitiveScalarType::TimeNanosecond,
-                        _ => {
-                            return Err(proto_error(format!(
-                                "Found invalid time unit for scalar value, only TimeUnit::Microsecond and TimeUnit::Nanosecond are valid time units: {:?}",
-                                time_unit
-                            )))
-                        }
-                    },
-
-                    DataType::Utf8 => PrimitiveScalarType::Utf8,
-                    DataType::LargeUtf8 => PrimitiveScalarType::LargeUtf8,
-                    _ => {
-                        return Err(proto_error(format!(
-                            "Error converting to Datatype to scalar type, {:?} is invalid as a datafusion scalar.",
-                            val
-                        )))
-                    }
-                };
-                protobuf::scalar_type::Datatype::List(protobuf::ScalarListType {
-                    field_names,
-                    deepest_type: pb_deepest_type as i32,
-                })
-            }
-            DataType::Null
-            | DataType::Float16
-            | DataType::Timestamp(_, _)
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Duration(_)
-            | DataType::Interval(_)
-            | DataType::Binary
-            | DataType::FixedSizeBinary(_)
-            | DataType::LargeBinary
-            | DataType::FixedSizeList(_, _)
-            | DataType::LargeList(_)
-            | DataType::Struct(_)
-            | DataType::Union(_)
-            | DataType::Dictionary(_, _)
-            | DataType::Decimal(_, _) => {
-                return Err(proto_error(format!(
-                    "Error converting to Datatype to scalar type, {:?} is invalid as a datafusion scalar.",
-                    val
-                )))
-            }
-        };
-        Ok(scalar_value)
-    }
-}
-
-impl TryFrom<&datafusion::scalar::ScalarValue> for protobuf::ScalarValue {
-    type Error = BallistaError;
-    fn try_from(
-        val: &datafusion::scalar::ScalarValue,
-    ) -> Result<protobuf::ScalarValue, Self::Error> {
-        use datafusion::scalar;
-        use protobuf::scalar_value::Value;
-        use protobuf::PrimitiveScalarType;
-        let scalar_val = match val {
-            scalar::ScalarValue::Boolean(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Bool, |s| Value::BoolValue(*s))
-            }
-            scalar::ScalarValue::Float32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Float32, |s| {
-                    Value::Float32Value(*s)
-                })
-            }
-            scalar::ScalarValue::Float64(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Float64, |s| {
-                    Value::Float64Value(*s)
-                })
-            }
-            scalar::ScalarValue::Int8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int8, |s| {
-                    Value::Int8Value(*s as i32)
-                })
-            }
-            scalar::ScalarValue::Int16(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int16, |s| {
-                    Value::Int16Value(*s as i32)
-                })
-            }
-            scalar::ScalarValue::Int32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int32, |s| Value::Int32Value(*s))
-            }
-            scalar::ScalarValue::Int64(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int64, |s| Value::Int64Value(*s))
-            }
-            scalar::ScalarValue::UInt8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint8, |s| {
-                    Value::Uint8Value(*s as u32)
-                })
-            }
-            scalar::ScalarValue::UInt16(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint16, |s| {
-                    Value::Uint16Value(*s as u32)
-                })
-            }
-            scalar::ScalarValue::UInt32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint32, |s| Value::Uint32Value(*s))
-            }
-            scalar::ScalarValue::UInt64(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint64, |s| Value::Uint64Value(*s))
-            }
-            scalar::ScalarValue::Utf8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Utf8, |s| {
-                    Value::Utf8Value(s.to_owned())
-                })
-            }
-            scalar::ScalarValue::LargeUtf8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::LargeUtf8, |s| {
-                    Value::LargeUtf8Value(s.to_owned())
-                })
-            }
-            scalar::ScalarValue::List(value, datatype) => {
-                println!("Current datatype of list: {:?}", datatype);
-                match value {
-                    Some(values) => {
-                        if values.is_empty() {
-                            protobuf::ScalarValue {
-                                value: Some(protobuf::scalar_value::Value::ListValue(
-                                    protobuf::ScalarListValue {
-                                        datatype: Some(datatype.try_into()?),
-                                        values: Vec::new(),
-                                    },
-                                )),
-                            }
-                        } else {
-                            let scalar_type = match datatype {
-                                DataType::List(field) => field.as_ref().data_type(),
-                                _ => todo!("Proper error handling"),
-                            };
-                            println!("Current scalar type for list: {:?}", scalar_type);
-                            let type_checked_values: Vec<protobuf::ScalarValue> = values
-                                .iter()
-                                .map(|scalar| match (scalar, scalar_type) {
-                                    (scalar::ScalarValue::List(_, arrow::datatypes::DataType::List(list_field)), arrow::datatypes::DataType::List(field)) => {
-                                        let scalar_datatype = field.data_type();
-                                        let list_datatype = list_field.data_type();
-                                        if std::mem::discriminant(list_datatype) != std::mem::discriminant(scalar_datatype) {
-                                            return Err(proto_error(format!(
-                                                "Protobuf serialization error: Lists with inconsistent typing {:?} and {:?} found within list",
-                                                list_datatype, scalar_datatype
-                                            )));
-                                        }
-                                        scalar.try_into()
-                                    }
-                                    (scalar::ScalarValue::Boolean(_), arrow::datatypes::DataType::Boolean) => scalar.try_into(),
-                                    (scalar::ScalarValue::Float32(_), arrow::datatypes::DataType::Float32) => scalar.try_into(),
-                                    (scalar::ScalarValue::Float64(_), arrow::datatypes::DataType::Float64) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int8(_), arrow::datatypes::DataType::Int8) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int16(_), arrow::datatypes::DataType::Int16) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int32(_), arrow::datatypes::DataType::Int32) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int64(_), arrow::datatypes::DataType::Int64) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt8(_), arrow::datatypes::DataType::UInt8) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt16(_), arrow::datatypes::DataType::UInt16) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt32(_), arrow::datatypes::DataType::UInt32) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt64(_), arrow::datatypes::DataType::UInt64) => scalar.try_into(),
-                                    (scalar::ScalarValue::Utf8(_), arrow::datatypes::DataType::Utf8) => scalar.try_into(),
-                                    (scalar::ScalarValue::LargeUtf8(_), arrow::datatypes::DataType::LargeUtf8) => scalar.try_into(),
-                                    _ => Err(proto_error(format!(
-                                        "Protobuf serialization error, {:?} was inconsistent with designated type {:?}",
-                                        scalar, datatype
-                                    ))),
-                                })
-                                .collect::<Result<Vec<_>, _>>()?;
-                            protobuf::ScalarValue {
-                                value: Some(protobuf::scalar_value::Value::ListValue(
-                                    protobuf::ScalarListValue {
-                                        datatype: Some(datatype.try_into()?),
-                                        values: type_checked_values,
-                                    },
-                                )),
-                            }
-                        }
-                    }
-                    None => protobuf::ScalarValue {
-                        value: Some(protobuf::scalar_value::Value::NullListValue(
-                            datatype.try_into()?,
-                        )),
-                    },
-                }
-            }
-            datafusion::scalar::ScalarValue::Date32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Date32, |s| Value::Date32Value(*s))
-            }
-            datafusion::scalar::ScalarValue::TimeMicrosecond(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::TimeMicrosecond, |s| {
-                    Value::TimeMicrosecondValue(*s)
-                })
-            }
-            datafusion::scalar::ScalarValue::TimeNanosecond(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::TimeNanosecond, |s| {
-                    Value::TimeNanosecondValue(*s)
-                })
-            }
-            _ => {
-                return Err(proto_error(format!(
-                    "Error converting to Datatype to scalar type, {:?} is invalid as a datafusion scalar.",
-                    val
-                )))
-            }
-        };
-        Ok(scalar_val)
-    }
-}
-
-impl TryInto<protobuf::LogicalPlanNode> for &LogicalPlan {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::LogicalPlanNode, Self::Error> {
-        use protobuf::logical_plan_node::LogicalPlanType;
-        match self {
-            LogicalPlan::TableScan {
-                table_name,
-                source,
-                filters,
-                projection,
-                ..
-            } => {
-                let schema = source.schema();
-
-                // unwrap the DFTableAdapter to get to the real TableProvider
-                let source = if let Some(adapter) =
-                    source.as_any().downcast_ref::<DFTableAdapter>()
-                {
-                    match &adapter.logical_plan {
-                        LogicalPlan::TableScan { source, .. } => Ok(source.as_any()),
-                        _ => Err(BallistaError::General(
-                            "Invalid LogicalPlan::TableScan".to_owned(),
-                        )),
-                    }
-                } else {
-                    Ok(source.as_any())
-                }?;
-
-                let projection = match projection {
-                    None => None,
-                    Some(columns) => {
-                        let column_names = columns
-                            .iter()
-                            .map(|i| schema.field(*i).name().to_owned())
-                            .collect();
-                        Some(protobuf::ProjectionColumns {
-                            columns: column_names,
-                        })
-                    }
-                };
-                let schema: protobuf::Schema = schema.as_ref().into();
-
-                let filters: Vec<protobuf::LogicalExprNode> = filters
-                    .iter()
-                    .map(|filter| filter.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                if let Some(parquet) = source.downcast_ref::<ParquetTable>() {
-                    Ok(protobuf::LogicalPlanNode {
-                        logical_plan_type: Some(LogicalPlanType::ParquetScan(
-                            protobuf::ParquetTableScanNode {
-                                table_name: table_name.to_owned(),
-                                path: parquet.path().to_owned(),
-                                projection,
-                                schema: Some(schema),
-                                filters,
-                            },
-                        )),
-                    })
-                } else if let Some(csv) = source.downcast_ref::<CsvFile>() {
-                    let delimiter = [csv.delimiter()];
-                    let delimiter = std::str::from_utf8(&delimiter).map_err(|_| {
-                        BallistaError::General("Invalid CSV delimiter".to_owned())
-                    })?;
-                    Ok(protobuf::LogicalPlanNode {
-                        logical_plan_type: Some(LogicalPlanType::CsvScan(
-                            protobuf::CsvTableScanNode {
-                                table_name: table_name.to_owned(),
-                                path: csv.path().to_owned(),
-                                projection,
-                                schema: Some(schema),
-                                has_header: csv.has_header(),
-                                delimiter: delimiter.to_string(),
-                                file_extension: csv.file_extension().to_string(),
-                                filters,
-                            },
-                        )),
-                    })
-                } else {
-                    Err(BallistaError::General(format!(
-                        "logical plan to_proto unsupported table provider {:?}",
-                        source
-                    )))
-                }
-            }
-            LogicalPlan::Projection { expr, input, .. } => {
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Projection(Box::new(
-                        protobuf::ProjectionNode {
-                            input: Some(Box::new(input.as_ref().try_into()?)),
-                            expr: expr
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Filter { predicate, input } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Selection(Box::new(
-                        protobuf::SelectionNode {
-                            input: Some(Box::new(input)),
-                            expr: Some(predicate.try_into()?),
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Aggregate {
-                input,
-                group_expr,
-                aggr_expr,
-                ..
-            } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Aggregate(Box::new(
-                        protobuf::AggregateNode {
-                            input: Some(Box::new(input)),
-                            group_expr: group_expr
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                            aggr_expr: aggr_expr
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Join {
-                left,
-                right,
-                on,
-                join_type,
-                ..
-            } => {
-                let left: protobuf::LogicalPlanNode = left.as_ref().try_into()?;
-                let right: protobuf::LogicalPlanNode = right.as_ref().try_into()?;
-                let join_type = match join_type {
-                    JoinType::Inner => protobuf::JoinType::Inner,
-                    JoinType::Left => protobuf::JoinType::Left,
-                    JoinType::Right => protobuf::JoinType::Right,
-                };
-                let left_join_column = on.iter().map(|on| on.0.to_owned()).collect();
-                let right_join_column = on.iter().map(|on| on.1.to_owned()).collect();
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Join(Box::new(
-                        protobuf::JoinNode {
-                            left: Some(Box::new(left)),
-                            right: Some(Box::new(right)),
-                            join_type: join_type.into(),
-                            left_join_column,
-                            right_join_column,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Limit { input, n } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Limit(Box::new(
-                        protobuf::LimitNode {
-                            input: Some(Box::new(input)),
-                            limit: *n as u32,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Sort { input, expr } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                let selection_expr: Vec<protobuf::LogicalExprNode> = expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, BallistaError>>()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Sort(Box::new(
-                        protobuf::SortNode {
-                            input: Some(Box::new(input)),
-                            expr: selection_expr,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Repartition {
-                input,
-                partitioning_scheme,
-            } => {
-                use datafusion::logical_plan::Partitioning;
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-
-                //Assumed common usize field was batch size
-                //Used u64 to avoid any nastyness involving large values, most data clusters are probably uniformly 64 bits any ways
-                use protobuf::repartition_node::PartitionMethod;
-
-                let pb_partition_method = match partitioning_scheme {
-                    Partitioning::Hash(exprs, partition_count) => {
-                        PartitionMethod::Hash(protobuf::HashRepartition {
-                            hash_expr: exprs
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                            partition_count: *partition_count as u64,
-                        })
-                    }
-                    Partitioning::RoundRobinBatch(batch_size) => {
-                        PartitionMethod::RoundRobin(*batch_size as u64)
-                    }
-                };
-
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Repartition(Box::new(
-                        protobuf::RepartitionNode {
-                            input: Some(Box::new(input)),
-                            partition_method: Some(pb_partition_method),
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::EmptyRelation {
-                produce_one_row, ..
-            } => Ok(protobuf::LogicalPlanNode {
-                logical_plan_type: Some(LogicalPlanType::EmptyRelation(
-                    protobuf::EmptyRelationNode {
-                        produce_one_row: *produce_one_row,
-                    },
-                )),
-            }),
-            LogicalPlan::CreateExternalTable {
-                name,
-                location,
-                file_type,
-                has_header,
-                schema: df_schema,
-            } => {
-                use datafusion::sql::parser::FileType;
-                let schema: Schema = df_schema.as_ref().clone().into();
-                let pb_schema: protobuf::Schema = (&schema).try_into().map_err(|e| {
-                    BallistaError::General(format!(
-                        "Could not convert schema into protobuf: {:?}",
-                        e
-                    ))
-                })?;
-
-                let pb_file_type: protobuf::FileType = match file_type {
-                    FileType::NdJson => protobuf::FileType::NdJson,
-                    FileType::Parquet => protobuf::FileType::Parquet,
-                    FileType::CSV => protobuf::FileType::Csv,
-                };
-
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::CreateExternalTable(
-                        protobuf::CreateExternalTableNode {
-                            name: name.clone(),
-                            location: location.clone(),
-                            file_type: pb_file_type as i32,
-                            has_header: *has_header,
-                            schema: Some(pb_schema),
-                        },
-                    )),
-                })
-            }
-            LogicalPlan::Explain { verbose, plan, .. } => {
-                let input: protobuf::LogicalPlanNode = plan.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Explain(Box::new(
-                        protobuf::ExplainNode {
-                            input: Some(Box::new(input)),
-                            verbose: *verbose,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Extension { .. } => unimplemented!(),
-            // _ => Err(BallistaError::General(format!(
-            //     "logical plan to_proto {:?}",
-            //     self
-            // ))),
-        }
-    }
-}
-
-fn create_proto_scalar<I, T: FnOnce(&I) -> protobuf::scalar_value::Value>(
-    v: &Option<I>,
-    null_arrow_type: protobuf::PrimitiveScalarType,
-    constructor: T,
-) -> protobuf::ScalarValue {
-    protobuf::ScalarValue {
-        value: Some(v.as_ref().map(constructor).unwrap_or(
-            protobuf::scalar_value::Value::NullValue(null_arrow_type as i32),
-        )),
-    }
-}
-
-impl TryInto<protobuf::LogicalExprNode> for &Expr {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::LogicalExprNode, Self::Error> {
-        use datafusion::scalar::ScalarValue;
-        use protobuf::scalar_value::Value;
-        match self {
-            Expr::Column(name) => {
-                let expr = protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::ColumnName(name.clone())),
-                };
-                Ok(expr)
-            }
-            Expr::Alias(expr, alias) => {
-                let alias = Box::new(protobuf::AliasNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    alias: alias.to_owned(),
-                });
-                let expr = protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Alias(alias)),
-                };
-                Ok(expr)
-            }
-            Expr::Literal(value) => {
-                let pb_value: protobuf::ScalarValue = value.try_into()?;
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Literal(pb_value)),
-                })
-            }
-            Expr::BinaryExpr { left, op, right } => {
-                let binary_expr = Box::new(protobuf::BinaryExprNode {
-                    l: Some(Box::new(left.as_ref().try_into()?)),
-                    r: Some(Box::new(right.as_ref().try_into()?)),
-                    op: format!("{:?}", op),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::BinaryExpr(binary_expr)),
-                })
-            }
-            Expr::AggregateFunction {
-                ref fun, ref args, ..
-            } => {
-                let aggr_function = match fun {
-                    AggregateFunction::Min => protobuf::AggregateFunction::Min,
-                    AggregateFunction::Max => protobuf::AggregateFunction::Max,
-                    AggregateFunction::Sum => protobuf::AggregateFunction::Sum,
-                    AggregateFunction::Avg => protobuf::AggregateFunction::Avg,
-                    AggregateFunction::Count => protobuf::AggregateFunction::Count,
-                };
-
-                let arg = &args[0];
-                let aggregate_expr = Box::new(protobuf::AggregateExprNode {
-                    aggr_function: aggr_function.into(),
-                    expr: Some(Box::new(arg.try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::AggregateExpr(aggregate_expr)),
-                })
-            }
-            Expr::ScalarVariable(_) => unimplemented!(),
-            Expr::ScalarFunction { ref fun, ref args } => {
-                let fun: protobuf::ScalarFunction = fun.try_into()?;
-                let expr: Vec<protobuf::LogicalExprNode> = args
-                    .iter()
-                    .map(|e| Ok(e.try_into()?))
-                    .collect::<Result<Vec<protobuf::LogicalExprNode>, BallistaError>>()?;
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(
-                        protobuf::logical_expr_node::ExprType::ScalarFunction(
-                            protobuf::ScalarFunctionNode {
-                                fun: fun.into(),
-                                expr,
-                            },
-                        ),
-                    ),
-                })
-            }
-            Expr::ScalarUDF { .. } => unimplemented!(),
-            Expr::AggregateUDF { .. } => unimplemented!(),
-            Expr::Not(expr) => {
-                let expr = Box::new(protobuf::Not {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::NotExpr(expr)),
-                })
-            }
-            Expr::IsNull(expr) => {
-                let expr = Box::new(protobuf::IsNull {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::IsNullExpr(expr)),
-                })
-            }
-            Expr::IsNotNull(expr) => {
-                let expr = Box::new(protobuf::IsNotNull {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::IsNotNullExpr(expr)),
-                })
-            }
-            Expr::Between {
-                expr,
-                negated,
-                low,
-                high,
-            } => {
-                let expr = Box::new(protobuf::BetweenNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    negated: *negated,
-                    low: Some(Box::new(low.as_ref().try_into()?)),
-                    high: Some(Box::new(high.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Between(expr)),
-                })
-            }
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            } => {
-                let when_then_expr = when_then_expr
-                    .iter()
-                    .map(|(w, t)| {
-                        Ok(protobuf::WhenThen {
-                            when_expr: Some(w.as_ref().try_into()?),
-                            then_expr: Some(t.as_ref().try_into()?),
-                        })
-                    })
-                    .collect::<Result<Vec<protobuf::WhenThen>, BallistaError>>()?;
-                let expr = Box::new(protobuf::CaseNode {
-                    expr: match expr {
-                        Some(e) => Some(Box::new(e.as_ref().try_into()?)),
-                        None => None,
-                    },
-                    when_then_expr,
-                    else_expr: match else_expr {
-                        Some(e) => Some(Box::new(e.as_ref().try_into()?)),
-                        None => None,
-                    },
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Case(expr)),
-                })
-            }
-            Expr::Cast { expr, data_type } => {
-                let expr = Box::new(protobuf::CastNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    arrow_type: Some(data_type.into()),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Cast(expr)),
-                })
-            }
-            Expr::Sort {
-                expr,
-                asc,
-                nulls_first,
-            } => {
-                let expr = Box::new(protobuf::SortExprNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    asc: *asc,
-                    nulls_first: *nulls_first,
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Sort(expr)),
-                })
-            }
-            Expr::Negative(expr) => {
-                let expr = Box::new(protobuf::NegativeNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(protobuf::logical_expr_node::ExprType::Negative(
-                        expr,
-                    )),
-                })
-            }
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => {
-                let expr = Box::new(protobuf::InListNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    list: list.iter().map(|expr| expr.try_into()).collect::<Result<
-                        Vec<_>,
-                        BallistaError,
-                    >>(
-                    )?,
-                    negated: *negated,
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(protobuf::logical_expr_node::ExprType::InList(expr)),
-                })
-            }
-            Expr::Wildcard => Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Wildcard(true)),
-            }),
-            // _ => Err(BallistaError::General(format!(
-            //     "logical expr to_proto {:?}",
-            //     self
-            // ))),
-        }
-    }
-}
-
-impl Into<protobuf::Schema> for &Schema {
-    fn into(self) -> protobuf::Schema {
-        protobuf::Schema {
-            columns: self
-                .fields()
-                .iter()
-                .map(protobuf::Field::from)
-                .collect::<Vec<_>>(),
-        }
-    }
-}
-
-impl TryFrom<&arrow::datatypes::DataType> for protobuf::ScalarType {
-    type Error = BallistaError;
-    fn try_from(value: &arrow::datatypes::DataType) -> Result<Self, Self::Error> {
-        let datatype = protobuf::scalar_type::Datatype::try_from(value)?;
-        Ok(protobuf::ScalarType {
-            datatype: Some(datatype),
-        })
-    }
-}
-
-impl TryInto<protobuf::ScalarFunction> for &BuiltinScalarFunction {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<protobuf::ScalarFunction, Self::Error> {
-        match self {
-            BuiltinScalarFunction::Sqrt => Ok(protobuf::ScalarFunction::Sqrt),
-            BuiltinScalarFunction::Sin => Ok(protobuf::ScalarFunction::Sin),
-            BuiltinScalarFunction::Cos => Ok(protobuf::ScalarFunction::Cos),
-            BuiltinScalarFunction::Tan => Ok(protobuf::ScalarFunction::Tan),
-            BuiltinScalarFunction::Asin => Ok(protobuf::ScalarFunction::Asin),
-            BuiltinScalarFunction::Acos => Ok(protobuf::ScalarFunction::Acos),
-            BuiltinScalarFunction::Atan => Ok(protobuf::ScalarFunction::Atan),
-            BuiltinScalarFunction::Exp => Ok(protobuf::ScalarFunction::Exp),
-            BuiltinScalarFunction::Log => Ok(protobuf::ScalarFunction::Log),
-            BuiltinScalarFunction::Log10 => Ok(protobuf::ScalarFunction::Log10),
-            BuiltinScalarFunction::Floor => Ok(protobuf::ScalarFunction::Floor),
-            BuiltinScalarFunction::Ceil => Ok(protobuf::ScalarFunction::Ceil),
-            BuiltinScalarFunction::Round => Ok(protobuf::ScalarFunction::Round),
-            BuiltinScalarFunction::Trunc => Ok(protobuf::ScalarFunction::Trunc),
-            BuiltinScalarFunction::Abs => Ok(protobuf::ScalarFunction::Abs),
-            BuiltinScalarFunction::OctetLength => {
-                Ok(protobuf::ScalarFunction::Octetlength)
-            }
-            BuiltinScalarFunction::Concat => Ok(protobuf::ScalarFunction::Concat),
-            BuiltinScalarFunction::Lower => Ok(protobuf::ScalarFunction::Lower),
-            BuiltinScalarFunction::Upper => Ok(protobuf::ScalarFunction::Upper),
-            BuiltinScalarFunction::Trim => Ok(protobuf::ScalarFunction::Trim),
-            BuiltinScalarFunction::Ltrim => Ok(protobuf::ScalarFunction::Ltrim),
-            BuiltinScalarFunction::Rtrim => Ok(protobuf::ScalarFunction::Rtrim),
-            BuiltinScalarFunction::ToTimestamp => {
-                Ok(protobuf::ScalarFunction::Totimestamp)
-            }
-            BuiltinScalarFunction::Array => Ok(protobuf::ScalarFunction::Array),
-            BuiltinScalarFunction::NullIf => Ok(protobuf::ScalarFunction::Nullif),
-            BuiltinScalarFunction::DateTrunc => Ok(protobuf::ScalarFunction::Datetrunc),
-            BuiltinScalarFunction::MD5 => Ok(protobuf::ScalarFunction::Md5),
-            BuiltinScalarFunction::SHA224 => Ok(protobuf::ScalarFunction::Sha224),
-            BuiltinScalarFunction::SHA256 => Ok(protobuf::ScalarFunction::Sha256),
-            BuiltinScalarFunction::SHA384 => Ok(protobuf::ScalarFunction::Sha384),
-            BuiltinScalarFunction::SHA512 => Ok(protobuf::ScalarFunction::Sha512),
-            _ => Err(BallistaError::General(format!(
-                "logical_plan::to_proto() unsupported scalar function {:?}",
-                self
-            ))),
-        }
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/mod.rs b/rust/ballista/rust/core/src/serde/mod.rs
deleted file mode 100644
index b96163999f3..00000000000
--- a/rust/ballista/rust/core/src/serde/mod.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This crate contains code generated from the Ballista Protocol Buffer Definition as well
-//! as convenience code for interacting with the generated code.
-
-use std::{convert::TryInto, io::Cursor};
-
-use crate::{error::BallistaError, serde::scheduler::Action as BallistaAction};
-
-use prost::Message;
-
-// include the generated protobuf source as a submodule
-#[allow(clippy::all)]
-pub mod protobuf {
-    include!(concat!(env!("OUT_DIR"), "/ballista.protobuf.rs"));
-}
-
-pub mod logical_plan;
-pub mod physical_plan;
-pub mod scheduler;
-
-pub fn decode_protobuf(bytes: &[u8]) -> Result<BallistaAction, BallistaError> {
-    let mut buf = Cursor::new(bytes);
-
-    protobuf::Action::decode(&mut buf)
-        .map_err(|e| BallistaError::Internal(format!("{:?}", e)))
-        .and_then(|node| node.try_into())
-}
-
-pub(crate) fn proto_error<S: Into<String>>(message: S) -> BallistaError {
-    BallistaError::General(message.into())
-}
-
-#[macro_export]
-macro_rules! convert_required {
-    ($PB:expr) => {{
-        if let Some(field) = $PB.as_ref() {
-            field.try_into()
-        } else {
-            Err(proto_error("Missing required field in protobuf"))
-        }
-    }};
-}
-
-#[macro_export]
-macro_rules! convert_box_required {
-    ($PB:expr) => {{
-        if let Some(field) = $PB.as_ref() {
-            field.as_ref().try_into()
-        } else {
-            Err(proto_error("Missing required field in protobuf"))
-        }
-    }};
-}
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs b/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
deleted file mode 100644
index cb04a3e8196..00000000000
--- a/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
+++ /dev/null
@@ -1,388 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Serde code to convert from protocol buffers to Rust data structures.
-
-use std::collections::HashMap;
-use std::convert::TryInto;
-use std::sync::Arc;
-
-use crate::error::BallistaError;
-use crate::execution_plans::{ShuffleReaderExec, UnresolvedShuffleExec};
-use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
-use crate::serde::protobuf::LogicalExprNode;
-use crate::serde::scheduler::PartitionLocation;
-use crate::serde::{proto_error, protobuf};
-use crate::{convert_box_required, convert_required};
-
-use arrow::datatypes::{DataType, Schema, SchemaRef};
-use datafusion::execution::context::{ExecutionConfig, ExecutionContextState};
-use datafusion::logical_plan::{DFSchema, Expr};
-use datafusion::physical_plan::aggregates::{create_aggregate_expr, AggregateFunction};
-use datafusion::physical_plan::expressions::col;
-use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::planner::DefaultPhysicalPlanner;
-use datafusion::physical_plan::{
-    coalesce_batches::CoalesceBatchesExec,
-    csv::CsvExec,
-    empty::EmptyExec,
-    expressions::{Avg, Column, PhysicalSortExpr},
-    filter::FilterExec,
-    hash_join::HashJoinExec,
-    hash_utils::JoinType,
-    limit::{GlobalLimitExec, LocalLimitExec},
-    parquet::ParquetExec,
-    projection::ProjectionExec,
-    repartition::RepartitionExec,
-    sort::{SortExec, SortOptions},
-    Partitioning,
-};
-use datafusion::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr};
-use datafusion::prelude::CsvReadOptions;
-use log::debug;
-use protobuf::logical_expr_node::ExprType;
-use protobuf::physical_plan_node::PhysicalPlanType;
-
-impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Arc<dyn ExecutionPlan>, Self::Error> {
-        let plan = self.physical_plan_type.as_ref().ok_or_else(|| {
-            proto_error(format!(
-                "physical_plan::from_proto() Unsupported physical plan '{:?}'",
-                self
-            ))
-        })?;
-        match plan {
-            PhysicalPlanType::Projection(projection) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(projection.input)?;
-                let exprs = projection
-                    .expr
-                    .iter()
-                    .zip(projection.expr_name.iter())
-                    .map(|(expr, name)| {
-                        compile_expr(expr, &input.schema()).map(|e| (e, name.to_string()))
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-                Ok(Arc::new(ProjectionExec::try_new(exprs, input)?))
-            }
-            PhysicalPlanType::Filter(filter) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(filter.input)?;
-                let predicate = compile_expr(
-                    filter.expr.as_ref().ok_or_else(|| {
-                        BallistaError::General(
-                            "filter (FilterExecNode) in PhysicalPlanNode is missing."
-                                .to_owned(),
-                        )
-                    })?,
-                    &input.schema(),
-                )?;
-                Ok(Arc::new(FilterExec::try_new(predicate, input)?))
-            }
-            PhysicalPlanType::CsvScan(scan) => {
-                let schema = Arc::new(convert_required!(scan.schema)?);
-                let options = CsvReadOptions::new()
-                    .has_header(scan.has_header)
-                    .file_extension(&scan.file_extension)
-                    .delimiter(scan.delimiter.as_bytes()[0])
-                    .schema(&schema);
-                // TODO we don't care what the DataFusion batch size was because Ballista will
-                // have its own configs. Hard-code for now.
-                let batch_size = 32768;
-                let projection = scan.projection.iter().map(|i| *i as usize).collect();
-                Ok(Arc::new(CsvExec::try_new(
-                    &scan.path,
-                    options,
-                    Some(projection),
-                    batch_size,
-                )?))
-            }
-            PhysicalPlanType::ParquetScan(scan) => {
-                let projection = scan.projection.iter().map(|i| *i as usize).collect();
-                let filenames: Vec<&str> =
-                    scan.filename.iter().map(|s| s.as_str()).collect();
-                Ok(Arc::new(ParquetExec::try_from_files(
-                    &filenames,
-                    Some(projection),
-                    None,
-                    scan.batch_size as usize,
-                    scan.num_partitions as usize,
-                )?))
-            }
-            PhysicalPlanType::CoalesceBatches(coalesce_batches) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(coalesce_batches.input)?;
-                Ok(Arc::new(CoalesceBatchesExec::new(
-                    input,
-                    coalesce_batches.target_batch_size as usize,
-                )))
-            }
-            PhysicalPlanType::Merge(merge) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(merge.input)?;
-                Ok(Arc::new(MergeExec::new(input)))
-            }
-            PhysicalPlanType::Repartition(repart) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(repart.input)?;
-                match repart.partition_method {
-                    Some(PartitionMethod::Hash(ref hash_part)) => {
-                        let expr = hash_part
-                            .hash_expr
-                            .iter()
-                            .map(|e| compile_expr(e, &input.schema()))
-                            .collect::<Result<Vec<Arc<dyn PhysicalExpr>>, _>>()?;
-
-                        Ok(Arc::new(RepartitionExec::try_new(
-                            input,
-                            Partitioning::Hash(
-                                expr,
-                                hash_part.partition_count.try_into().unwrap(),
-                            ),
-                        )?))
-                    }
-                    Some(PartitionMethod::RoundRobin(partition_count)) => {
-                        Ok(Arc::new(RepartitionExec::try_new(
-                            input,
-                            Partitioning::RoundRobinBatch(
-                                partition_count.try_into().unwrap(),
-                            ),
-                        )?))
-                    }
-                    Some(PartitionMethod::Unknown(partition_count)) => {
-                        Ok(Arc::new(RepartitionExec::try_new(
-                            input,
-                            Partitioning::UnknownPartitioning(
-                                partition_count.try_into().unwrap(),
-                            ),
-                        )?))
-                    }
-                    _ => Err(BallistaError::General(
-                        "Invalid partitioning scheme".to_owned(),
-                    )),
-                }
-            }
-            PhysicalPlanType::GlobalLimit(limit) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(limit.input)?;
-                Ok(Arc::new(GlobalLimitExec::new(input, limit.limit as usize)))
-            }
-            PhysicalPlanType::LocalLimit(limit) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(limit.input)?;
-                Ok(Arc::new(LocalLimitExec::new(input, limit.limit as usize)))
-            }
-            PhysicalPlanType::HashAggregate(hash_agg) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(hash_agg.input)?;
-                let mode = protobuf::AggregateMode::from_i32(hash_agg.mode).ok_or_else(|| {
-                    proto_error(format!(
-                        "Received a HashAggregateNode message with unknown AggregateMode {}",
-                        hash_agg.mode
-                    ))
-                })?;
-                let agg_mode: AggregateMode = match mode {
-                    protobuf::AggregateMode::Partial => AggregateMode::Partial,
-                    protobuf::AggregateMode::Final => AggregateMode::Final,
-                };
-
-                let group = hash_agg
-                    .group_expr
-                    .iter()
-                    .zip(hash_agg.group_expr_name.iter())
-                    .map(|(expr, name)| {
-                        compile_expr(expr, &input.schema()).map(|e| (e, name.to_string()))
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let logical_agg_expr: Vec<(Expr, String)> = hash_agg
-                    .aggr_expr
-                    .iter()
-                    .zip(hash_agg.aggr_expr_name.iter())
-                    .map(|(expr, name)| expr.try_into().map(|expr| (expr, name.clone())))
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let df_planner = DefaultPhysicalPlanner::default();
-                let ctx_state = ExecutionContextState {
-                    datasources: Default::default(),
-                    scalar_functions: Default::default(),
-                    var_provider: Default::default(),
-                    aggregate_functions: Default::default(),
-                    config: ExecutionConfig::new(),
-                };
-
-                let input_schema = hash_agg
-                    .input_schema
-                    .as_ref()
-                    .ok_or_else(|| {
-                        BallistaError::General(
-                            "input_schema in HashAggregateNode is missing.".to_owned(),
-                        )
-                    })?
-                    .clone();
-                let physical_schema: SchemaRef =
-                    SchemaRef::new((&input_schema).try_into()?);
-
-                let mut physical_aggr_expr = vec![];
-
-                for (expr, name) in &logical_agg_expr {
-                    match expr {
-                        Expr::AggregateFunction { fun, args, .. } => {
-                            let arg = df_planner
-                                .create_physical_expr(
-                                    &args[0],
-                                    &physical_schema,
-                                    &ctx_state,
-                                )
-                                .map_err(|e| {
-                                    BallistaError::General(format!("{:?}", e))
-                                })?;
-                            physical_aggr_expr.push(create_aggregate_expr(
-                                &fun,
-                                false,
-                                &[arg],
-                                &physical_schema,
-                                name.to_string(),
-                            )?);
-                        }
-                        _ => {
-                            return Err(BallistaError::General(
-                                "Invalid expression for HashAggregateExec".to_string(),
-                            ))
-                        }
-                    }
-                }
-                Ok(Arc::new(HashAggregateExec::try_new(
-                    agg_mode,
-                    group,
-                    physical_aggr_expr,
-                    input,
-                    Arc::new((&input_schema).try_into()?),
-                )?))
-            }
-            PhysicalPlanType::HashJoin(hashjoin) => {
-                let left: Arc<dyn ExecutionPlan> = convert_box_required!(hashjoin.left)?;
-                let right: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(hashjoin.right)?;
-                let on: Vec<(String, String)> = hashjoin
-                    .on
-                    .iter()
-                    .map(|col| (col.left.clone(), col.right.clone()))
-                    .collect();
-                let join_type = protobuf::JoinType::from_i32(hashjoin.join_type)
-                    .ok_or_else(|| {
-                        proto_error(format!(
-                            "Received a HashJoinNode message with unknown JoinType {}",
-                            hashjoin.join_type
-                        ))
-                    })?;
-                let join_type = match join_type {
-                    protobuf::JoinType::Inner => JoinType::Inner,
-                    protobuf::JoinType::Left => JoinType::Left,
-                    protobuf::JoinType::Right => JoinType::Right,
-                };
-                Ok(Arc::new(HashJoinExec::try_new(
-                    left, right, &on, &join_type,
-                )?))
-            }
-            PhysicalPlanType::ShuffleReader(shuffle_reader) => {
-                let schema = Arc::new(convert_required!(shuffle_reader.schema)?);
-                let partition_location: Vec<PartitionLocation> = shuffle_reader
-                    .partition_location
-                    .iter()
-                    .map(|p| p.clone().try_into())
-                    .collect::<Result<Vec<_>, BallistaError>>()?;
-                let shuffle_reader =
-                    ShuffleReaderExec::try_new(partition_location, schema)?;
-                Ok(Arc::new(shuffle_reader))
-            }
-            PhysicalPlanType::Empty(empty) => {
-                let schema = Arc::new(convert_required!(empty.schema)?);
-                Ok(Arc::new(EmptyExec::new(empty.produce_one_row, schema)))
-            }
-            PhysicalPlanType::Sort(sort) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(sort.input)?;
-                let exprs = sort
-                    .expr
-                    .iter()
-                    .map(|expr| {
-                        let expr = expr.expr_type.as_ref().ok_or_else(|| {
-                            proto_error(format!(
-                                "physical_plan::from_proto() Unexpected expr {:?}",
-                                self
-                            ))
-                        })?;
-                        if let protobuf::logical_expr_node::ExprType::Sort(sort_expr) = expr {
-                            let expr = sort_expr
-                                .expr
-                                .as_ref()
-                                .ok_or_else(|| {
-                                    proto_error(format!(
-                                        "physical_plan::from_proto() Unexpected sort expr {:?}",
-                                        self
-                                    ))
-                                })?
-                                .as_ref();
-                            Ok(PhysicalSortExpr {
-                                expr: compile_expr(expr, &input.schema())?,
-                                options: SortOptions {
-                                    descending: !sort_expr.asc,
-                                    nulls_first: sort_expr.nulls_first,
-                                },
-                            })
-                        } else {
-                            Err(BallistaError::General(format!(
-                                "physical_plan::from_proto() {:?}",
-                                self
-                            )))
-                        }
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-                // Update concurrency here in the future
-                Ok(Arc::new(SortExec::try_new(exprs, input)?))
-            }
-            PhysicalPlanType::Unresolved(unresolved_shuffle) => {
-                let schema = Arc::new(convert_required!(unresolved_shuffle.schema)?);
-                Ok(Arc::new(UnresolvedShuffleExec {
-                    query_stage_ids: unresolved_shuffle
-                        .query_stage_ids
-                        .iter()
-                        .map(|id| *id as usize)
-                        .collect(),
-                    schema,
-                    partition_count: unresolved_shuffle.partition_count as usize,
-                }))
-            }
-        }
-    }
-}
-
-fn compile_expr(
-    expr: &protobuf::LogicalExprNode,
-    schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>, BallistaError> {
-    let df_planner = DefaultPhysicalPlanner::default();
-    let state = ExecutionContextState {
-        datasources: HashMap::new(),
-        scalar_functions: HashMap::new(),
-        var_provider: HashMap::new(),
-        aggregate_functions: HashMap::new(),
-        config: ExecutionConfig::new(),
-    };
-    let expr: Expr = expr.try_into()?;
-    df_planner
-        .create_physical_expr(&expr, schema, &state)
-        .map_err(|e| BallistaError::General(format!("{:?}", e)))
-}
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/mod.rs b/rust/ballista/rust/core/src/serde/physical_plan/mod.rs
deleted file mode 100644
index a6f146c7384..00000000000
--- a/rust/ballista/rust/core/src/serde/physical_plan/mod.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod from_proto;
-pub mod to_proto;
-
-#[cfg(test)]
-mod roundtrip_tests {
-    use datafusion::physical_plan::hash_utils::JoinType;
-    use std::{convert::TryInto, sync::Arc};
-
-    use arrow::datatypes::{DataType, Schema};
-    use datafusion::physical_plan::ColumnarValue;
-    use datafusion::physical_plan::{
-        empty::EmptyExec,
-        expressions::{Avg, Column, PhysicalSortExpr},
-        hash_aggregate::{AggregateMode, HashAggregateExec},
-        hash_join::HashJoinExec,
-        limit::{GlobalLimitExec, LocalLimitExec},
-        sort::SortExec,
-        ExecutionPlan,
-    };
-    use datafusion::physical_plan::{
-        AggregateExpr, Distribution, Partitioning, PhysicalExpr,
-    };
-
-    use super::super::super::error::Result;
-    use super::super::protobuf;
-
-    fn roundtrip_test(exec_plan: Arc<dyn ExecutionPlan>) -> Result<()> {
-        let proto: protobuf::PhysicalPlanNode = exec_plan.clone().try_into()?;
-        let result_exec_plan: Arc<dyn ExecutionPlan> = (&proto).try_into()?;
-        assert_eq!(
-            format!("{:?}", exec_plan),
-            format!("{:?}", result_exec_plan)
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_empty() -> Result<()> {
-        roundtrip_test(Arc::new(EmptyExec::new(false, Arc::new(Schema::empty()))))
-    }
-
-    #[test]
-    fn roundtrip_local_limit() -> Result<()> {
-        roundtrip_test(Arc::new(LocalLimitExec::new(
-            Arc::new(EmptyExec::new(false, Arc::new(Schema::empty()))),
-            25,
-        )))
-    }
-
-    #[test]
-    fn roundtrip_global_limit() -> Result<()> {
-        roundtrip_test(Arc::new(GlobalLimitExec::new(
-            Arc::new(EmptyExec::new(false, Arc::new(Schema::empty()))),
-            25,
-        )))
-    }
-
-    #[test]
-    fn roundtrip_hash_join() -> Result<()> {
-        use arrow::datatypes::{DataType, Field, Schema};
-        let field_a = Field::new("col", DataType::Int64, false);
-        let schema_left = Schema::new(vec![field_a.clone()]);
-        let schema_right = Schema::new(vec![field_a]);
-
-        roundtrip_test(Arc::new(HashJoinExec::try_new(
-            Arc::new(EmptyExec::new(false, Arc::new(schema_left))),
-            Arc::new(EmptyExec::new(false, Arc::new(schema_right))),
-            &[("col".to_string(), "col".to_string())],
-            &JoinType::Inner,
-        )?))
-    }
-
-    fn col(name: &str) -> Arc<dyn PhysicalExpr> {
-        Arc::new(Column::new(name))
-    }
-
-    #[test]
-    fn rountrip_hash_aggregate() -> Result<()> {
-        use arrow::datatypes::{DataType, Field, Schema};
-        let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
-            vec![(col("a"), "unused".to_string())];
-
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![Arc::new(Avg::new(
-            col("b"),
-            "AVG(b)".to_string(),
-            DataType::Float64,
-        ))];
-
-        let field_a = Field::new("a", DataType::Int64, false);
-        let field_b = Field::new("b", DataType::Int64, false);
-        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
-
-        roundtrip_test(Arc::new(HashAggregateExec::try_new(
-            AggregateMode::Final,
-            groups.clone(),
-            aggregates.clone(),
-            Arc::new(EmptyExec::new(false, schema.clone())),
-            schema,
-        )?))
-    }
-
-    #[test]
-    fn roundtrip_filter_with_not_and_in_list() -> Result<()> {
-        use arrow::datatypes::{DataType, Field, Schema};
-        use datafusion::logical_plan::Operator;
-        use datafusion::physical_plan::{
-            expressions::{binary, lit, InListExpr, NotExpr},
-            filter::FilterExec,
-        };
-        use datafusion::scalar::ScalarValue;
-        let field_a = Field::new("a", DataType::Boolean, false);
-        let field_b = Field::new("b", DataType::Int64, false);
-        let field_c = Field::new("c", DataType::Int64, false);
-        let schema = Arc::new(Schema::new(vec![field_a, field_b, field_c]));
-        let not = Arc::new(NotExpr::new(col("a")));
-        let in_list = Arc::new(InListExpr::new(
-            col("b"),
-            vec![
-                lit(ScalarValue::Int64(Some(1))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            false,
-        ));
-        let and = binary(not, Operator::And, in_list, &schema)?;
-        roundtrip_test(Arc::new(FilterExec::try_new(
-            and,
-            Arc::new(EmptyExec::new(false, schema.clone())),
-        )?))
-    }
-
-    #[test]
-    fn roundtrip_sort() -> Result<()> {
-        use arrow::compute::kernels::sort::SortOptions;
-        use arrow::datatypes::{DataType, Field, Schema};
-        let field_a = Field::new("a", DataType::Boolean, false);
-        let field_b = Field::new("b", DataType::Int64, false);
-        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
-        let sort_exprs = vec![
-            PhysicalSortExpr {
-                expr: col("a"),
-                options: SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                },
-            },
-            PhysicalSortExpr {
-                expr: col("b"),
-                options: SortOptions {
-                    descending: false,
-                    nulls_first: true,
-                },
-            },
-        ];
-        roundtrip_test(Arc::new(SortExec::try_new(
-            sort_exprs,
-            Arc::new(EmptyExec::new(false, schema)),
-        )?))
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs b/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
deleted file mode 100644
index 24c69c4692a..00000000000
--- a/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
+++ /dev/null
@@ -1,547 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.language governing permissions and
-// limitations under the License.
-
-//! Serde code to convert Arrow schemas and DataFusion logical plans to Ballista protocol
-//! buffer format, allowing DataFusion physical plans to be serialized and transmitted between
-//! processes.
-
-use std::{
-    convert::{TryFrom, TryInto},
-    str::FromStr,
-    sync::Arc,
-};
-
-use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion::physical_plan::csv::CsvExec;
-use datafusion::physical_plan::expressions::CastExpr;
-use datafusion::physical_plan::expressions::{
-    CaseExpr, InListExpr, IsNotNullExpr, IsNullExpr, NegativeExpr, NotExpr,
-};
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::hash_aggregate::AggregateMode;
-use datafusion::physical_plan::hash_join::HashJoinExec;
-use datafusion::physical_plan::hash_utils::JoinType;
-use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use datafusion::physical_plan::parquet::ParquetExec;
-use datafusion::physical_plan::projection::ProjectionExec;
-use datafusion::physical_plan::sort::SortExec;
-use datafusion::{
-    physical_plan::expressions::{Count, Literal},
-    scalar::ScalarValue,
-};
-
-use datafusion::physical_plan::{
-    empty::EmptyExec,
-    expressions::{Avg, BinaryExpr, Column, Sum},
-    Partitioning,
-};
-use datafusion::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr};
-
-use datafusion::physical_plan::hash_aggregate::HashAggregateExec;
-use protobuf::physical_plan_node::PhysicalPlanType;
-
-use crate::execution_plans::{ShuffleReaderExec, UnresolvedShuffleExec};
-use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
-use crate::serde::{protobuf, BallistaError};
-use datafusion::physical_plan::functions::{BuiltinScalarFunction, ScalarFunctionExpr};
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::repartition::RepartitionExec;
-
-impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::PhysicalPlanNode, Self::Error> {
-        let plan = self.as_any();
-
-        if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            let expr = exec
-                .expr()
-                .iter()
-                .map(|expr| expr.0.clone().try_into())
-                .collect::<Result<Vec<_>, Self::Error>>()?;
-            let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Projection(Box::new(
-                    protobuf::ProjectionExecNode {
-                        input: Some(Box::new(input)),
-                        expr,
-                        expr_name,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<FilterExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Filter(Box::new(
-                    protobuf::FilterExecNode {
-                        input: Some(Box::new(input)),
-                        expr: Some(exec.predicate().clone().try_into()?),
-                    },
-                ))),
-            })
-        } else if let Some(limit) = plan.downcast_ref::<GlobalLimitExec>() {
-            let input: protobuf::PhysicalPlanNode =
-                limit.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::GlobalLimit(Box::new(
-                    protobuf::GlobalLimitExecNode {
-                        input: Some(Box::new(input)),
-                        limit: limit.limit() as u32,
-                    },
-                ))),
-            })
-        } else if let Some(limit) = plan.downcast_ref::<LocalLimitExec>() {
-            let input: protobuf::PhysicalPlanNode =
-                limit.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::LocalLimit(Box::new(
-                    protobuf::LocalLimitExecNode {
-                        input: Some(Box::new(input)),
-                        limit: limit.limit() as u32,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<HashJoinExec>() {
-            let left: protobuf::PhysicalPlanNode = exec.left().to_owned().try_into()?;
-            let right: protobuf::PhysicalPlanNode = exec.right().to_owned().try_into()?;
-            let on: Vec<protobuf::JoinOn> = exec
-                .on()
-                .iter()
-                .map(|tuple| protobuf::JoinOn {
-                    left: tuple.0.to_owned(),
-                    right: tuple.1.to_owned(),
-                })
-                .collect();
-            let join_type = match exec.join_type() {
-                JoinType::Inner => protobuf::JoinType::Inner,
-                JoinType::Left => protobuf::JoinType::Left,
-                JoinType::Right => protobuf::JoinType::Right,
-            };
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::HashJoin(Box::new(
-                    protobuf::HashJoinExecNode {
-                        left: Some(Box::new(left)),
-                        right: Some(Box::new(right)),
-                        on,
-                        join_type: join_type.into(),
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<HashAggregateExec>() {
-            let groups = exec
-                .group_expr()
-                .iter()
-                .map(|expr| expr.0.to_owned().try_into())
-                .collect::<Result<Vec<_>, BallistaError>>()?;
-            let group_names = exec
-                .group_expr()
-                .iter()
-                .map(|expr| expr.1.to_owned())
-                .collect();
-            let agg = exec
-                .aggr_expr()
-                .iter()
-                .map(|expr| expr.to_owned().try_into())
-                .collect::<Result<Vec<_>, BallistaError>>()?;
-            let agg_names = exec
-                .aggr_expr()
-                .iter()
-                .map(|expr| match expr.field() {
-                    Ok(field) => Ok(field.name().clone()),
-                    Err(e) => Err(BallistaError::DataFusionError(e)),
-                })
-                .collect::<Result<_, Self::Error>>()?;
-
-            let agg_mode = match exec.mode() {
-                AggregateMode::Partial => protobuf::AggregateMode::Partial,
-                AggregateMode::Final => protobuf::AggregateMode::Final,
-            };
-            let input_schema = exec.input_schema();
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::HashAggregate(Box::new(
-                    protobuf::HashAggregateExecNode {
-                        group_expr: groups,
-                        group_expr_name: group_names,
-                        aggr_expr: agg,
-                        aggr_expr_name: agg_names,
-                        mode: agg_mode as i32,
-                        input: Some(Box::new(input)),
-                        input_schema: Some(input_schema.as_ref().into()),
-                    },
-                ))),
-            })
-        } else if let Some(empty) = plan.downcast_ref::<EmptyExec>() {
-            let schema = empty.schema().as_ref().into();
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Empty(
-                    protobuf::EmptyExecNode {
-                        produce_one_row: empty.produce_one_row(),
-                        schema: Some(schema),
-                    },
-                )),
-            })
-        } else if let Some(coalesce_batches) = plan.downcast_ref::<CoalesceBatchesExec>()
-        {
-            let input: protobuf::PhysicalPlanNode =
-                coalesce_batches.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::CoalesceBatches(Box::new(
-                    protobuf::CoalesceBatchesExecNode {
-                        input: Some(Box::new(input)),
-                        target_batch_size: coalesce_batches.target_batch_size() as u32,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<CsvExec>() {
-            let delimiter = [*exec.delimiter().ok_or_else(|| {
-                BallistaError::General("Delimeter is not set for CsvExec".to_owned())
-            })?];
-            let delimiter = std::str::from_utf8(&delimiter).map_err(|_| {
-                BallistaError::General("Invalid CSV delimiter".to_owned())
-            })?;
-
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::CsvScan(
-                    protobuf::CsvScanExecNode {
-                        path: exec.path().to_owned(),
-                        filename: exec.filenames().to_vec(),
-                        projection: exec
-                            .projection()
-                            .ok_or_else(|| {
-                                BallistaError::General(
-                                    "projection in CsvExec dosn not exist.".to_owned(),
-                                )
-                            })?
-                            .iter()
-                            .map(|n| *n as u32)
-                            .collect(),
-                        file_extension: exec.file_extension().to_owned(),
-                        schema: Some(exec.file_schema().as_ref().into()),
-                        has_header: exec.has_header(),
-                        delimiter: delimiter.to_string(),
-                        batch_size: 32768,
-                    },
-                )),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<ParquetExec>() {
-            let filenames = exec
-                .partitions()
-                .iter()
-                .flat_map(|part| part.filenames().to_owned())
-                .collect();
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::ParquetScan(
-                    protobuf::ParquetScanExecNode {
-                        filename: filenames,
-                        projection: exec
-                            .projection()
-                            .as_ref()
-                            .iter()
-                            .map(|n| *n as u32)
-                            .collect(),
-                        num_partitions: exec.partitions().len() as u32,
-                        batch_size: exec.batch_size() as u32,
-                    },
-                )),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<ShuffleReaderExec>() {
-            let partition_location = exec
-                .partition_location
-                .iter()
-                .map(|l| l.clone().try_into())
-                .collect::<Result<_, _>>()?;
-
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::ShuffleReader(
-                    protobuf::ShuffleReaderExecNode {
-                        partition_location,
-                        schema: Some(exec.schema().as_ref().into()),
-                    },
-                )),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<MergeExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Merge(Box::new(
-                    protobuf::MergeExecNode {
-                        input: Some(Box::new(input)),
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<RepartitionExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-
-            let pb_partition_method = match exec.partitioning() {
-                Partitioning::Hash(exprs, partition_count) => {
-                    PartitionMethod::Hash(protobuf::HashRepartition {
-                        hash_expr: exprs
-                            .iter()
-                            .map(|expr| expr.clone().try_into())
-                            .collect::<Result<Vec<_>, BallistaError>>()?,
-                        partition_count: *partition_count as u64,
-                    })
-                }
-                Partitioning::RoundRobinBatch(partition_count) => {
-                    PartitionMethod::RoundRobin(*partition_count as u64)
-                }
-                Partitioning::UnknownPartitioning(partition_count) => {
-                    PartitionMethod::Unknown(*partition_count as u64)
-                }
-            };
-
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Repartition(Box::new(
-                    protobuf::RepartitionExecNode {
-                        input: Some(Box::new(input)),
-                        partition_method: Some(pb_partition_method),
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<SortExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            let expr = exec
-                .expr()
-                .iter()
-                .map(|expr| {
-                    let sort_expr = Box::new(protobuf::SortExprNode {
-                        expr: Some(Box::new(expr.expr.to_owned().try_into()?)),
-                        asc: !expr.options.descending,
-                        nulls_first: expr.options.nulls_first,
-                    });
-                    Ok(protobuf::LogicalExprNode {
-                        expr_type: Some(protobuf::logical_expr_node::ExprType::Sort(
-                            sort_expr,
-                        )),
-                    })
-                })
-                .collect::<Result<Vec<_>, Self::Error>>()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Sort(Box::new(
-                    protobuf::SortExecNode {
-                        input: Some(Box::new(input)),
-                        expr,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<UnresolvedShuffleExec>() {
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Unresolved(
-                    protobuf::UnresolvedShuffleExecNode {
-                        query_stage_ids: exec
-                            .query_stage_ids
-                            .iter()
-                            .map(|id| *id as u32)
-                            .collect(),
-                        schema: Some(exec.schema().as_ref().into()),
-                        partition_count: exec.partition_count as u32,
-                    },
-                )),
-            })
-        } else {
-            Err(BallistaError::General(format!(
-                "physical plan to_proto unsupported plan {:?}",
-                self
-            )))
-        }
-    }
-}
-
-impl TryInto<protobuf::LogicalExprNode> for Arc<dyn AggregateExpr> {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::LogicalExprNode, Self::Error> {
-        let aggr_function = if self.as_any().downcast_ref::<Avg>().is_some() {
-            Ok(protobuf::AggregateFunction::Avg.into())
-        } else if self.as_any().downcast_ref::<Sum>().is_some() {
-            Ok(protobuf::AggregateFunction::Sum.into())
-        } else if self.as_any().downcast_ref::<Count>().is_some() {
-            Ok(protobuf::AggregateFunction::Count.into())
-        } else {
-            Err(BallistaError::NotImplemented(format!(
-                "Aggregate function not supported: {:?}",
-                self
-            )))
-        }?;
-        let expressions: Vec<protobuf::LogicalExprNode> = self
-            .expressions()
-            .iter()
-            .map(|e| e.clone().try_into())
-            .collect::<Result<Vec<_>, BallistaError>>()?;
-        Ok(protobuf::LogicalExprNode {
-            expr_type: Some(protobuf::logical_expr_node::ExprType::AggregateExpr(
-                Box::new(protobuf::AggregateExprNode {
-                    aggr_function,
-                    expr: Some(Box::new(expressions[0].clone())),
-                }),
-            )),
-        })
-    }
-}
-
-impl TryFrom<Arc<dyn PhysicalExpr>> for protobuf::LogicalExprNode {
-    type Error = BallistaError;
-
-    fn try_from(value: Arc<dyn PhysicalExpr>) -> Result<Self, Self::Error> {
-        let expr = value.as_any();
-
-        if let Some(expr) = expr.downcast_ref::<Column>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::ColumnName(
-                    expr.name().to_owned(),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<BinaryExpr>() {
-            let binary_expr = Box::new(protobuf::BinaryExprNode {
-                l: Some(Box::new(expr.left().to_owned().try_into()?)),
-                r: Some(Box::new(expr.right().to_owned().try_into()?)),
-                op: format!("{:?}", expr.op()),
-            });
-
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::BinaryExpr(
-                    binary_expr,
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<CaseExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Case(Box::new(
-                    protobuf::CaseNode {
-                        expr: expr
-                            .expr()
-                            .as_ref()
-                            .map(|exp| exp.clone().try_into().map(Box::new))
-                            .transpose()?,
-                        when_then_expr: expr
-                            .when_then_expr()
-                            .iter()
-                            .map(|(when_expr, then_expr)| {
-                                try_parse_when_then_expr(when_expr, then_expr)
-                            })
-                            .collect::<Result<Vec<protobuf::WhenThen>, Self::Error>>()?,
-                        else_expr: expr
-                            .else_expr()
-                            .map(|a| a.clone().try_into().map(Box::new))
-                            .transpose()?,
-                    },
-                ))),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<NotExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::NotExpr(
-                    Box::new(protobuf::Not {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<IsNullExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::IsNullExpr(
-                    Box::new(protobuf::IsNull {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<IsNotNullExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::IsNotNullExpr(
-                    Box::new(protobuf::IsNotNull {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<InListExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(
-                    protobuf::logical_expr_node::ExprType::InList(
-                        Box::new(
-                            protobuf::InListNode {
-                                expr: Some(Box::new(expr.expr().to_owned().try_into()?)),
-                                list: expr
-                                    .list()
-                                    .iter()
-                                    .map(|a| a.clone().try_into())
-                                    .collect::<Result<
-                                    Vec<protobuf::LogicalExprNode>,
-                                    Self::Error,
-                                >>()?,
-                                negated: expr.negated(),
-                            },
-                        ),
-                    ),
-                ),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<NegativeExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Negative(
-                    Box::new(protobuf::NegativeNode {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(lit) = expr.downcast_ref::<Literal>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Literal(
-                    lit.value().try_into()?,
-                )),
-            })
-        } else if let Some(cast) = expr.downcast_ref::<CastExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Cast(Box::new(
-                    protobuf::CastNode {
-                        expr: Some(Box::new(cast.expr().clone().try_into()?)),
-                        arrow_type: Some(cast.cast_type().into()),
-                    },
-                ))),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<ScalarFunctionExpr>() {
-            let fun: BuiltinScalarFunction =
-                BuiltinScalarFunction::from_str(expr.name())?;
-            let fun: protobuf::ScalarFunction = (&fun).try_into()?;
-            let expr: Vec<protobuf::LogicalExprNode> = expr
-                .args()
-                .iter()
-                .map(|e| e.to_owned().try_into())
-                .collect::<Result<Vec<_>, _>>()?;
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::ScalarFunction(
-                    protobuf::ScalarFunctionNode {
-                        fun: fun.into(),
-                        expr,
-                    },
-                )),
-            })
-        } else {
-            Err(BallistaError::General(format!(
-                "physical_plan::to_proto() unsupported expression {:?}",
-                value
-            )))
-        }
-    }
-}
-
-fn try_parse_when_then_expr(
-    when_expr: &Arc<dyn PhysicalExpr>,
-    then_expr: &Arc<dyn PhysicalExpr>,
-) -> Result<protobuf::WhenThen, BallistaError> {
-    Ok(protobuf::WhenThen {
-        when_expr: Some(when_expr.clone().try_into()?),
-        then_expr: Some(then_expr.clone().try_into()?),
-    })
-}
diff --git a/rust/ballista/rust/core/src/serde/scheduler/from_proto.rs b/rust/ballista/rust/core/src/serde/scheduler/from_proto.rs
deleted file mode 100644
index fb1e4f812d0..00000000000
--- a/rust/ballista/rust/core/src/serde/scheduler/from_proto.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, convert::TryInto};
-
-use crate::error::BallistaError;
-use crate::serde::protobuf;
-use crate::serde::protobuf::action::ActionType;
-use crate::serde::scheduler::{
-    Action, ExecutePartition, PartitionId, PartitionLocation, PartitionStats,
-};
-
-use datafusion::logical_plan::LogicalPlan;
-use uuid::Uuid;
-
-impl TryInto<Action> for protobuf::Action {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Action, Self::Error> {
-        match self.action_type {
-            Some(ActionType::ExecutePartition(partition)) => {
-                Ok(Action::ExecutePartition(ExecutePartition::new(
-                    partition.job_id,
-                    partition.stage_id as usize,
-                    partition.partition_id.iter().map(|n| *n as usize).collect(),
-                    partition
-                        .plan
-                        .as_ref()
-                        .ok_or_else(|| {
-                            BallistaError::General(
-                                "PhysicalPlanNode in ExecutePartition is missing"
-                                    .to_owned(),
-                            )
-                        })?
-                        .try_into()?,
-                    HashMap::new(),
-                )))
-            }
-            Some(ActionType::FetchPartition(partition)) => {
-                Ok(Action::FetchPartition(partition.try_into()?))
-            }
-            _ => Err(BallistaError::General(
-                "scheduler::from_proto(Action) invalid or missing action".to_owned(),
-            )),
-        }
-    }
-}
-
-impl TryInto<PartitionId> for protobuf::PartitionId {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<PartitionId, Self::Error> {
-        Ok(PartitionId::new(
-            &self.job_id,
-            self.stage_id as usize,
-            self.partition_id as usize,
-        ))
-    }
-}
-
-impl Into<PartitionStats> for protobuf::PartitionStats {
-    fn into(self) -> PartitionStats {
-        PartitionStats::new(
-            foo(self.num_rows),
-            foo(self.num_batches),
-            foo(self.num_bytes),
-        )
-    }
-}
-
-fn foo(n: i64) -> Option<u64> {
-    if n < 0 {
-        None
-    } else {
-        Some(n as u64)
-    }
-}
-
-impl TryInto<PartitionLocation> for protobuf::PartitionLocation {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<PartitionLocation, Self::Error> {
-        Ok(PartitionLocation {
-            partition_id: self
-                .partition_id
-                .ok_or_else(|| {
-                    BallistaError::General(
-                        "partition_id in PartitionLocation is missing.".to_owned(),
-                    )
-                })?
-                .try_into()?,
-            executor_meta: self
-                .executor_meta
-                .ok_or_else(|| {
-                    BallistaError::General(
-                        "executor_meta in PartitionLocation is missing".to_owned(),
-                    )
-                })?
-                .into(),
-            partition_stats: self
-                .partition_stats
-                .ok_or_else(|| {
-                    BallistaError::General(
-                        "partition_stats in PartitionLocation is missing".to_owned(),
-                    )
-                })?
-                .into(),
-        })
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/scheduler/mod.rs b/rust/ballista/rust/core/src/serde/scheduler/mod.rs
deleted file mode 100644
index efee82dbdf3..00000000000
--- a/rust/ballista/rust/core/src/serde/scheduler/mod.rs
+++ /dev/null
@@ -1,261 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, sync::Arc};
-
-use arrow::array::{
-    ArrayBuilder, ArrayRef, StructArray, StructBuilder, UInt64Array, UInt64Builder,
-};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion::logical_plan::LogicalPlan;
-use datafusion::physical_plan::ExecutionPlan;
-use uuid::Uuid;
-
-use super::protobuf;
-use crate::error::BallistaError;
-
-pub mod from_proto;
-pub mod to_proto;
-
-/// Action that can be sent to an executor
-#[derive(Debug, Clone)]
-
-pub enum Action {
-    /// Execute a query and store the results in memory
-    ExecutePartition(ExecutePartition),
-    /// Collect a shuffle partition
-    FetchPartition(PartitionId),
-}
-
-/// Unique identifier for the output partition of an operator.
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct PartitionId {
-    pub job_id: String,
-    pub stage_id: usize,
-    pub partition_id: usize,
-}
-
-impl PartitionId {
-    pub fn new(job_id: &str, stage_id: usize, partition_id: usize) -> Self {
-        Self {
-            job_id: job_id.to_string(),
-            stage_id,
-            partition_id,
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct PartitionLocation {
-    pub partition_id: PartitionId,
-    pub executor_meta: ExecutorMeta,
-    pub partition_stats: PartitionStats,
-}
-
-/// Meta-data for an executor, used when fetching shuffle partitions from other executors
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct ExecutorMeta {
-    pub id: String,
-    pub host: String,
-    pub port: u16,
-}
-
-impl Into<protobuf::ExecutorMetadata> for ExecutorMeta {
-    fn into(self) -> protobuf::ExecutorMetadata {
-        protobuf::ExecutorMetadata {
-            id: self.id,
-            host: self.host,
-            port: self.port as u32,
-        }
-    }
-}
-
-impl From<protobuf::ExecutorMetadata> for ExecutorMeta {
-    fn from(meta: protobuf::ExecutorMetadata) -> Self {
-        Self {
-            id: meta.id,
-            host: meta.host,
-            port: meta.port as u16,
-        }
-    }
-}
-
-/// Summary of executed partition
-#[derive(Debug, Copy, Clone)]
-pub struct PartitionStats {
-    num_rows: Option<u64>,
-    num_batches: Option<u64>,
-    num_bytes: Option<u64>,
-}
-
-impl Default for PartitionStats {
-    fn default() -> Self {
-        Self {
-            num_rows: None,
-            num_batches: None,
-            num_bytes: None,
-        }
-    }
-}
-
-impl PartitionStats {
-    pub fn new(
-        num_rows: Option<u64>,
-        num_batches: Option<u64>,
-        num_bytes: Option<u64>,
-    ) -> Self {
-        Self {
-            num_rows,
-            num_batches,
-            num_bytes,
-        }
-    }
-
-    pub fn arrow_struct_repr(self) -> Field {
-        Field::new(
-            "partition_stats",
-            DataType::Struct(self.arrow_struct_fields()),
-            false,
-        )
-    }
-    fn arrow_struct_fields(self) -> Vec<Field> {
-        vec![
-            Field::new("num_rows", DataType::UInt64, false),
-            Field::new("num_batches", DataType::UInt64, false),
-            Field::new("num_bytes", DataType::UInt64, false),
-        ]
-    }
-
-    pub fn to_arrow_arrayref(&self) -> Result<Arc<StructArray>, BallistaError> {
-        let mut field_builders = Vec::new();
-
-        let mut num_rows_builder = UInt64Builder::new(1);
-        match self.num_rows {
-            Some(n) => num_rows_builder.append_value(n)?,
-            None => num_rows_builder.append_null()?,
-        }
-        field_builders.push(Box::new(num_rows_builder) as Box<dyn ArrayBuilder>);
-
-        let mut num_batches_builder = UInt64Builder::new(1);
-        match self.num_batches {
-            Some(n) => num_batches_builder.append_value(n)?,
-            None => num_batches_builder.append_null()?,
-        }
-        field_builders.push(Box::new(num_batches_builder) as Box<dyn ArrayBuilder>);
-
-        let mut num_bytes_builder = UInt64Builder::new(1);
-        match self.num_bytes {
-            Some(n) => num_bytes_builder.append_value(n)?,
-            None => num_bytes_builder.append_null()?,
-        }
-        field_builders.push(Box::new(num_bytes_builder) as Box<dyn ArrayBuilder>);
-
-        let mut struct_builder =
-            StructBuilder::new(self.arrow_struct_fields(), field_builders);
-        struct_builder.append(true)?;
-        Ok(Arc::new(struct_builder.finish()))
-    }
-
-    pub fn from_arrow_struct_array(struct_array: &StructArray) -> PartitionStats {
-        let num_rows = struct_array
-            .column_by_name("num_rows")
-            .expect("from_arrow_struct_array expected a field num_rows")
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .expect("from_arrow_struct_array expected num_rows to be a UInt64Array");
-        let num_batches = struct_array
-            .column_by_name("num_batches")
-            .expect("from_arrow_struct_array expected a field num_batches")
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .expect("from_arrow_struct_array expected num_batches to be a UInt64Array");
-        let num_bytes = struct_array
-            .column_by_name("num_bytes")
-            .expect("from_arrow_struct_array expected a field num_bytes")
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .expect("from_arrow_struct_array expected num_bytes to be a UInt64Array");
-        PartitionStats {
-            num_rows: Some(num_rows.value(0).to_owned()),
-            num_batches: Some(num_batches.value(0).to_owned()),
-            num_bytes: Some(num_bytes.value(0).to_owned()),
-        }
-    }
-}
-
-/// Task that can be sent to an executor to execute one stage of a query and write
-/// results out to disk
-#[derive(Debug, Clone)]
-pub struct ExecutePartition {
-    /// Unique ID representing this query execution
-    pub job_id: String,
-    /// Unique ID representing this query stage within the overall query
-    pub stage_id: usize,
-    /// The partitions to execute. The same plan could be sent to multiple executors and each
-    /// executor will execute a range of partitions per QueryStageTask
-    pub partition_id: Vec<usize>,
-    /// The physical plan for this query stage
-    pub plan: Arc<dyn ExecutionPlan>,
-    /// Location of shuffle partitions that this query stage may depend on
-    pub shuffle_locations: HashMap<PartitionId, ExecutorMeta>,
-}
-
-impl ExecutePartition {
-    pub fn new(
-        job_id: String,
-        stage_id: usize,
-        partition_id: Vec<usize>,
-        plan: Arc<dyn ExecutionPlan>,
-        shuffle_locations: HashMap<PartitionId, ExecutorMeta>,
-    ) -> Self {
-        Self {
-            job_id,
-            stage_id,
-            partition_id,
-            plan,
-            shuffle_locations,
-        }
-    }
-
-    pub fn key(&self) -> String {
-        format!("{}.{}.{:?}", self.job_id, self.stage_id, self.partition_id)
-    }
-}
-
-#[derive(Debug)]
-pub struct ExecutePartitionResult {
-    /// Path containing results for this partition
-    path: String,
-    stats: PartitionStats,
-}
-
-impl ExecutePartitionResult {
-    pub fn new(path: &str, stats: PartitionStats) -> Self {
-        Self {
-            path: path.to_owned(),
-            stats,
-        }
-    }
-
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-
-    pub fn statistics(&self) -> &PartitionStats {
-        &self.stats
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/scheduler/to_proto.rs b/rust/ballista/rust/core/src/serde/scheduler/to_proto.rs
deleted file mode 100644
index f581becdea1..00000000000
--- a/rust/ballista/rust/core/src/serde/scheduler/to_proto.rs
+++ /dev/null
@@ -1,90 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryInto;
-
-use crate::error::BallistaError;
-use crate::serde::protobuf;
-use crate::serde::protobuf::action::ActionType;
-use crate::serde::scheduler::{
-    Action, ExecutePartition, PartitionId, PartitionLocation, PartitionStats,
-};
-
-impl TryInto<protobuf::Action> for Action {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::Action, Self::Error> {
-        match self {
-            Action::ExecutePartition(partition) => Ok(protobuf::Action {
-                action_type: Some(ActionType::ExecutePartition(partition.try_into()?)),
-                settings: vec![],
-            }),
-            Action::FetchPartition(partition_id) => Ok(protobuf::Action {
-                action_type: Some(ActionType::FetchPartition(partition_id.into())),
-                settings: vec![],
-            }),
-        }
-    }
-}
-
-impl TryInto<protobuf::ExecutePartition> for ExecutePartition {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::ExecutePartition, Self::Error> {
-        Ok(protobuf::ExecutePartition {
-            job_id: self.job_id,
-            stage_id: self.stage_id as u32,
-            partition_id: self.partition_id.iter().map(|n| *n as u32).collect(),
-            plan: Some(self.plan.try_into()?),
-            partition_location: vec![],
-        })
-    }
-}
-
-impl Into<protobuf::PartitionId> for PartitionId {
-    fn into(self) -> protobuf::PartitionId {
-        protobuf::PartitionId {
-            job_id: self.job_id,
-            stage_id: self.stage_id as u32,
-            partition_id: self.partition_id as u32,
-        }
-    }
-}
-
-impl TryInto<protobuf::PartitionLocation> for PartitionLocation {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::PartitionLocation, Self::Error> {
-        Ok(protobuf::PartitionLocation {
-            partition_id: Some(self.partition_id.into()),
-            executor_meta: Some(self.executor_meta.into()),
-            partition_stats: Some(self.partition_stats.into()),
-        })
-    }
-}
-
-impl Into<protobuf::PartitionStats> for PartitionStats {
-    fn into(self) -> protobuf::PartitionStats {
-        let none_value = -1_i64;
-        protobuf::PartitionStats {
-            num_rows: self.num_rows.map(|n| n as i64).unwrap_or(none_value),
-            num_batches: self.num_batches.map(|n| n as i64).unwrap_or(none_value),
-            num_bytes: self.num_bytes.map(|n| n as i64).unwrap_or(none_value),
-            column_stats: vec![],
-        }
-    }
-}
diff --git a/rust/ballista/rust/core/src/utils.rs b/rust/ballista/rust/core/src/utils.rs
deleted file mode 100644
index d1c239a585e..00000000000
--- a/rust/ballista/rust/core/src/utils.rs
+++ /dev/null
@@ -1,309 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::io::{BufWriter, Write};
-use std::ops::Deref;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::Arc;
-use std::{fs::File, pin::Pin};
-
-use crate::error::{BallistaError, Result};
-use crate::execution_plans::{QueryStageExec, UnresolvedShuffleExec};
-use crate::memory_stream::MemoryStream;
-use crate::serde::scheduler::PartitionStats;
-use arrow::array::{
-    ArrayBuilder, ArrayRef, StructArray, StructBuilder, UInt64Array, UInt64Builder,
-};
-use arrow::datatypes::{DataType, Field};
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::FileWriter;
-use arrow::record_batch::RecordBatch;
-use datafusion::logical_plan::Operator;
-use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion::physical_plan::csv::CsvExec;
-use datafusion::physical_plan::expressions::{BinaryExpr, Column, Literal};
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::hash_aggregate::HashAggregateExec;
-use datafusion::physical_plan::hash_join::HashJoinExec;
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::parquet::ParquetExec;
-use datafusion::physical_plan::projection::ProjectionExec;
-use datafusion::physical_plan::sort::SortExec;
-use datafusion::physical_plan::{
-    AggregateExpr, ExecutionPlan, PhysicalExpr, RecordBatchStream,
-};
-use futures::StreamExt;
-
-/// Stream data to disk in Arrow IPC format
-
-pub async fn write_stream_to_disk(
-    stream: &mut Pin<Box<dyn RecordBatchStream + Send + Sync>>,
-    path: &str,
-) -> Result<PartitionStats> {
-    let file = File::create(&path).map_err(|e| {
-        BallistaError::General(format!(
-            "Failed to create partition file at {}: {:?}",
-            path, e
-        ))
-    })?;
-
-    let mut num_rows = 0;
-    let mut num_batches = 0;
-    let mut num_bytes = 0;
-    let mut writer = FileWriter::try_new(file, stream.schema().as_ref())?;
-
-    while let Some(result) = stream.next().await {
-        let batch = result?;
-
-        let batch_size_bytes: usize = batch
-            .columns()
-            .iter()
-            .map(|array| array.get_array_memory_size())
-            .sum();
-        num_batches += 1;
-        num_rows += batch.num_rows();
-        num_bytes += batch_size_bytes;
-        writer.write(&batch)?;
-    }
-    writer.finish()?;
-    Ok(PartitionStats::new(
-        Some(num_rows as u64),
-        Some(num_batches),
-        Some(num_bytes as u64),
-    ))
-}
-
-pub async fn collect_stream(
-    stream: &mut Pin<Box<dyn RecordBatchStream + Send + Sync>>,
-) -> Result<Vec<RecordBatch>> {
-    let mut batches = vec![];
-    while let Some(batch) = stream.next().await {
-        batches.push(batch?);
-    }
-    Ok(batches)
-}
-
-pub fn format_plan(plan: &dyn ExecutionPlan, indent: usize) -> Result<String> {
-    let operator_str =
-        if let Some(exec) = plan.as_any().downcast_ref::<HashAggregateExec>() {
-            format!(
-                "HashAggregateExec: groupBy={:?}, aggrExpr={:?}",
-                exec.group_expr()
-                    .iter()
-                    .map(|e| format_expr(e.0.as_ref()))
-                    .collect::<Vec<String>>(),
-                exec.aggr_expr()
-                    .iter()
-                    .map(|e| format_agg_expr(e.as_ref()))
-                    .collect::<Result<Vec<String>>>()?
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<HashJoinExec>() {
-            format!(
-                "HashJoinExec: joinType={:?}, on={:?}",
-                exec.join_type(),
-                exec.on()
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<ParquetExec>() {
-            let mut num_files = 0;
-            for part in exec.partitions() {
-                num_files += part.filenames().len();
-            }
-            format!(
-                "ParquetExec: partitions={}, files={}",
-                exec.partitions().len(),
-                num_files
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<CsvExec>() {
-            format!(
-                "CsvExec: {}; partitions={}",
-                &exec.path(),
-                exec.output_partitioning().partition_count()
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<FilterExec>() {
-            format!("FilterExec: {}", format_expr(exec.predicate().as_ref()))
-        } else if let Some(exec) = plan.as_any().downcast_ref::<QueryStageExec>() {
-            format!(
-                "QueryStageExec: job={}, stage={}",
-                exec.job_id, exec.stage_id
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<UnresolvedShuffleExec>() {
-            format!("UnresolvedShuffleExec: stages={:?}", exec.query_stage_ids)
-        } else if let Some(exec) = plan.as_any().downcast_ref::<CoalesceBatchesExec>() {
-            format!(
-                "CoalesceBatchesExec: batchSize={}",
-                exec.target_batch_size()
-            )
-        } else if plan.as_any().downcast_ref::<MergeExec>().is_some() {
-            "MergeExec".to_string()
-        } else {
-            let str = format!("{:?}", plan);
-            String::from(&str[0..120])
-        };
-
-    let children_str = plan
-        .children()
-        .iter()
-        .map(|c| format_plan(c.as_ref(), indent + 1))
-        .collect::<Result<Vec<String>>>()?
-        .join("\n");
-
-    let indent_str = "  ".repeat(indent);
-    if plan.children().is_empty() {
-        Ok(format!("{}{}{}", indent_str, &operator_str, children_str))
-    } else {
-        Ok(format!("{}{}\n{}", indent_str, &operator_str, children_str))
-    }
-}
-
-pub fn format_agg_expr(expr: &dyn AggregateExpr) -> Result<String> {
-    Ok(format!(
-        "{} {:?}",
-        expr.field()?.name(),
-        expr.expressions()
-            .iter()
-            .map(|e| format_expr(e.as_ref()))
-            .collect::<Vec<String>>()
-    ))
-}
-
-pub fn format_expr(expr: &dyn PhysicalExpr) -> String {
-    if let Some(e) = expr.as_any().downcast_ref::<Column>() {
-        e.name().to_string()
-    } else if let Some(e) = expr.as_any().downcast_ref::<Literal>() {
-        e.to_string()
-    } else if let Some(e) = expr.as_any().downcast_ref::<BinaryExpr>() {
-        format!("{} {} {}", e.left(), e.op(), e.right())
-    } else {
-        format!("{}", expr)
-    }
-}
-
-pub fn produce_diagram(filename: &str, stages: &[Arc<QueryStageExec>]) -> Result<()> {
-    let write_file = File::create(filename)?;
-    let mut w = BufWriter::new(&write_file);
-    writeln!(w, "digraph G {{")?;
-
-    // draw stages and entities
-    for stage in stages {
-        writeln!(w, "\tsubgraph cluster{} {{", stage.stage_id)?;
-        writeln!(w, "\t\tlabel = \"Stage {}\";", stage.stage_id)?;
-        let mut id = AtomicUsize::new(0);
-        build_exec_plan_diagram(
-            &mut w,
-            stage.child.as_ref(),
-            stage.stage_id,
-            &mut id,
-            true,
-        )?;
-        writeln!(w, "\t}}")?;
-    }
-
-    // draw relationships
-    for stage in stages {
-        let mut id = AtomicUsize::new(0);
-        build_exec_plan_diagram(
-            &mut w,
-            stage.child.as_ref(),
-            stage.stage_id,
-            &mut id,
-            false,
-        )?;
-    }
-
-    write!(w, "}}")?;
-    Ok(())
-}
-
-fn build_exec_plan_diagram(
-    w: &mut BufWriter<&File>,
-    plan: &dyn ExecutionPlan,
-    stage_id: usize,
-    id: &mut AtomicUsize,
-    draw_entity: bool,
-) -> Result<usize> {
-    let operator_str = if plan.as_any().downcast_ref::<HashAggregateExec>().is_some() {
-        "HashAggregateExec"
-    } else if plan.as_any().downcast_ref::<SortExec>().is_some() {
-        "SortExec"
-    } else if plan.as_any().downcast_ref::<ProjectionExec>().is_some() {
-        "ProjectionExec"
-    } else if plan.as_any().downcast_ref::<HashJoinExec>().is_some() {
-        "HashJoinExec"
-    } else if plan.as_any().downcast_ref::<ParquetExec>().is_some() {
-        "ParquetExec"
-    } else if plan.as_any().downcast_ref::<CsvExec>().is_some() {
-        "CsvExec"
-    } else if plan.as_any().downcast_ref::<FilterExec>().is_some() {
-        "FilterExec"
-    } else if plan.as_any().downcast_ref::<QueryStageExec>().is_some() {
-        "QueryStageExec"
-    } else if plan
-        .as_any()
-        .downcast_ref::<UnresolvedShuffleExec>()
-        .is_some()
-    {
-        "UnresolvedShuffleExec"
-    } else if plan
-        .as_any()
-        .downcast_ref::<CoalesceBatchesExec>()
-        .is_some()
-    {
-        "CoalesceBatchesExec"
-    } else if plan.as_any().downcast_ref::<MergeExec>().is_some() {
-        "MergeExec"
-    } else {
-        println!("Unknown: {:?}", plan);
-        "Unknown"
-    };
-
-    let node_id = id.load(Ordering::SeqCst);
-    id.store(node_id + 1, Ordering::SeqCst);
-
-    if draw_entity {
-        writeln!(
-            w,
-            "\t\tstage_{}_exec_{} [shape=box, label=\"{}\"];",
-            stage_id, node_id, operator_str
-        )?;
-    }
-    for child in plan.children() {
-        if let Some(shuffle) = child.as_any().downcast_ref::<UnresolvedShuffleExec>() {
-            if !draw_entity {
-                for y in &shuffle.query_stage_ids {
-                    writeln!(
-                        w,
-                        "\tstage_{}_exec_1 -> stage_{}_exec_{};",
-                        y, stage_id, node_id
-                    )?;
-                }
-            }
-        } else {
-            // relationships within same entity
-            let child_id =
-                build_exec_plan_diagram(w, child.as_ref(), stage_id, id, draw_entity)?;
-            if draw_entity {
-                writeln!(
-                    w,
-                    "\t\tstage_{}_exec_{} -> stage_{}_exec_{};",
-                    stage_id, child_id, stage_id, node_id
-                )?;
-            }
-        }
-    }
-    Ok(node_id)
-}
diff --git a/rust/ballista/rust/executor/Cargo.toml b/rust/ballista/rust/executor/Cargo.toml
deleted file mode 100644
index 743b62cc100..00000000000
--- a/rust/ballista/rust/executor/Cargo.toml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista-executor"
-description = "Ballista Distributed Compute - Executor"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-
-[features]
-default = ["snmalloc"]
-snmalloc = ["snmalloc-rs"]
-
-[dependencies]
-anyhow = "1"
-async-trait = "0.1.36"
-ballista-core = { path = "../core" }
-ballista-scheduler = { path = "../scheduler" }
-configure_me = "0.4.0"
-env_logger = "0.8"
-futures = "0.3"
-log = "0.4"
-snmalloc-rs = {version = "0.2", features= ["cache-friendly"], optional = true}
-tempfile = "3"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-tokio-stream = "0.1"
-tonic = "0.4"
-uuid = { version = "0.8", features = ["v4"] }
-
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-arrow-flight = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
-
-[dev-dependencies]
-
-[build-dependencies]
-configure_me_codegen = "0.4.0"
-
-[package.metadata.configure_me.bin]
-executor = "executor_config_spec.toml"
-
diff --git a/rust/ballista/rust/executor/README.md b/rust/ballista/rust/executor/README.md
deleted file mode 100644
index c0824e639fd..00000000000
--- a/rust/ballista/rust/executor/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista Executor - Rust
-This crate contains the Ballista Executor. It can be used both as a library or as a binary.
-
-## Run
-
-```bash
-RUST_LOG=info cargo run --release
-...
-[2021-02-11T05:30:13Z INFO  executor] Running with config: ExecutorConfig { host: "localhost", port: 50051, work_dir: "/var/folders/y8/fc61kyjd4n53tn444n72rjrm0000gn/T/.tmpv1LjN0", concurrent_tasks: 4 }
-```
-
-By default, the executor will bind to `localhost` and listen on port `50051`.
\ No newline at end of file
diff --git a/rust/ballista/rust/executor/build.rs b/rust/ballista/rust/executor/build.rs
deleted file mode 100644
index 1c9e32b0b89..00000000000
--- a/rust/ballista/rust/executor/build.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate configure_me_codegen;
-
-fn main() -> Result<(), String> {
-    println!("cargo:rerun-if-changed=executor_config_spec.toml");
-    configure_me_codegen::build_script_auto()
-        .map_err(|e| format!("configure_me code generation failed: {}", e))
-}
diff --git a/rust/ballista/rust/executor/examples/example_executor_config.toml b/rust/ballista/rust/executor/examples/example_executor_config.toml
deleted file mode 100644
index 0705016ff30..00000000000
--- a/rust/ballista/rust/executor/examples/example_executor_config.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# the default configuration location is "/etc/ballista/scheduler.toml"
-# if you include a specifc conf file using "--config-file = my_config_file.toml"
-# then that file will override environment variables, but not command line arguments
-namespace = "my_name_space"
-bind_host = "1.2.3.4"
\ No newline at end of file
diff --git a/rust/ballista/rust/executor/executor_config_spec.toml b/rust/ballista/rust/executor/executor_config_spec.toml
deleted file mode 100644
index cb47ca06423..00000000000
--- a/rust/ballista/rust/executor/executor_config_spec.toml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[general]
-name = "Ballista Executor"
-env_prefix = "BALLISTA_EXECUTOR"
-conf_file_param = "config_file"
-
-[[switch]]
-name = "version"
-doc = "Print version of this executable"
-
-[[param]]
-abbr = "n"
-name = "namespace"
-type = "String"
-doc = "Namespace for the ballista cluster that this executor will join. yippee"
-default = "std::string::String::from(\"ballista\")"
-
-[[param]]
-name = "scheduler_host"
-type = "String"
-default = "std::string::String::from(\"localhost\")"
-doc = "Scheduler host"
-
-[[param]]
-name = "scheduler_port"
-type = "u16"
-default = "50050"
-doc = "scheduler port"
-
-[[switch]]
-name = "local"
-doc = "Running in local mode will launch a standalone scheduler inside the executor process. This will create a single-executor cluster, and is useful for development scenarios."
-
-[[param]]
-name = "bind_host"
-type = "String"
-default = "std::string::String::from(\"0.0.0.0\")"
-doc = "Local IP address to bind to."
-
-[[param]]
-name = "external_host"
-type = "String"
-default = "std::string::String::from(\"localhost\")"
-doc = "Host name or IP address to register with scheduler so that other executors can connect to this executor."
-
-[[param]]
-abbr = "p"
-name = "port"
-type = "u16"
-default = "50051"
-doc = "bind port"
-
-[[param]]
-name = "work_dir"
-type = "String"
-doc = "Directory for temporary IPC files"
-
-[[param]]
-abbr = "c"
-name = "concurrent_tasks"
-type = "usize"
-default = "4"
-doc = "Max concurrent tasks."
\ No newline at end of file
diff --git a/rust/ballista/rust/executor/src/collect.rs b/rust/ballista/rust/executor/src/collect.rs
deleted file mode 100644
index a2f9d4c6360..00000000000
--- a/rust/ballista/rust/executor/src/collect.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! The CollectExec operator retrieves results from the cluster and returns them as a single
-//! vector of [RecordBatch].
-
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::{any::Any, pin::Pin};
-
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use async_trait::async_trait;
-use datafusion::error::DataFusionError;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
-use datafusion::{error::Result, physical_plan::RecordBatchStream};
-use futures::stream::SelectAll;
-use futures::Stream;
-
-/// The CollectExec operator retrieves results from the cluster and returns them as a single
-/// vector of [RecordBatch].
-#[derive(Debug, Clone)]
-pub struct CollectExec {
-    plan: Arc<dyn ExecutionPlan>,
-}
-
-impl CollectExec {
-    pub fn new(plan: Arc<dyn ExecutionPlan>) -> Self {
-        Self { plan }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CollectExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.plan.schema()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.plan.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        unimplemented!()
-    }
-
-    async fn execute(
-        &self,
-        partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        assert_eq!(0, partition);
-        let num_partitions = self.plan.output_partitioning().partition_count();
-
-        let mut futures = Vec::with_capacity(num_partitions);
-        for i in 0..num_partitions {
-            futures.push(self.plan.execute(i));
-        }
-
-        let mut streams = Vec::with_capacity(num_partitions);
-        for result in futures::future::join_all(futures).await {
-            match result {
-                Ok(stream) => {
-                    streams.push(stream);
-                }
-                Err(e) => {
-                    return Err(DataFusionError::Execution(format!(
-                        "BallistaError: {:?}",
-                        e
-                    )));
-                }
-            }
-        }
-
-        Ok(Box::pin(MergedRecordBatchStream {
-            schema: self.schema(),
-            select_all: Box::pin(futures::stream::select_all(streams)),
-        }))
-    }
-}
-
-struct MergedRecordBatchStream {
-    schema: SchemaRef,
-    select_all: Pin<Box<SelectAll<SendableRecordBatchStream>>>,
-}
-
-impl Stream for MergedRecordBatchStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.select_all.as_mut().poll_next(cx)
-    }
-}
-
-impl RecordBatchStream for MergedRecordBatchStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/ballista/rust/executor/src/execution_loop.rs b/rust/ballista/rust/executor/src/execution_loop.rs
deleted file mode 100644
index cf641ddcc5c..00000000000
--- a/rust/ballista/rust/executor/src/execution_loop.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryInto;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::mpsc::{Receiver, Sender, TryRecvError};
-use std::{sync::Arc, time::Duration};
-
-use datafusion::physical_plan::ExecutionPlan;
-use log::{debug, error, info, warn};
-use tonic::transport::Channel;
-
-use ballista_core::serde::scheduler::ExecutorMeta;
-use ballista_core::{
-    client::BallistaClient,
-    serde::protobuf::{
-        self, scheduler_grpc_client::SchedulerGrpcClient, task_status, FailedTask,
-        PartitionId, PollWorkParams, PollWorkResult, TaskDefinition, TaskStatus,
-    },
-};
-use protobuf::CompletedTask;
-
-pub async fn poll_loop(
-    mut scheduler: SchedulerGrpcClient<Channel>,
-    executor_client: BallistaClient,
-    executor_meta: ExecutorMeta,
-    concurrent_tasks: usize,
-) {
-    let executor_meta: protobuf::ExecutorMetadata = executor_meta.into();
-    let available_tasks_slots = Arc::new(AtomicUsize::new(concurrent_tasks));
-    let (task_status_sender, mut task_status_receiver) =
-        std::sync::mpsc::channel::<TaskStatus>();
-
-    loop {
-        debug!("Starting registration loop with scheduler");
-
-        let task_status: Vec<TaskStatus> =
-            sample_tasks_status(&mut task_status_receiver).await;
-
-        let poll_work_result: anyhow::Result<
-            tonic::Response<PollWorkResult>,
-            tonic::Status,
-        > = scheduler
-            .poll_work(PollWorkParams {
-                metadata: Some(executor_meta.clone()),
-                can_accept_task: available_tasks_slots.load(Ordering::SeqCst) > 0,
-                task_status,
-            })
-            .await;
-
-        let task_status_sender = task_status_sender.clone();
-
-        match poll_work_result {
-            Ok(result) => {
-                if let Some(task) = result.into_inner().task {
-                    run_received_tasks(
-                        executor_client.clone(),
-                        executor_meta.id.clone(),
-                        available_tasks_slots.clone(),
-                        task_status_sender,
-                        task,
-                    )
-                    .await;
-                }
-            }
-            Err(error) => {
-                warn!("Executor registration failed. If this continues to happen the executor might be marked as dead by the scheduler. Error: {}", error);
-            }
-        }
-
-        tokio::time::sleep(Duration::from_millis(250)).await;
-    }
-}
-
-async fn run_received_tasks(
-    mut executor_client: BallistaClient,
-    executor_id: String,
-    available_tasks_slots: Arc<AtomicUsize>,
-    task_status_sender: Sender<TaskStatus>,
-    task: TaskDefinition,
-) {
-    info!("Received task {:?}", task.task_id.as_ref().unwrap());
-    available_tasks_slots.fetch_sub(1, Ordering::SeqCst);
-    let plan: Arc<dyn ExecutionPlan> = (&task.plan.unwrap()).try_into().unwrap();
-    let task_id = task.task_id.unwrap();
-    // TODO: This is a convoluted way of executing the task. We should move the task
-    // execution code outside of the FlightService (data plane) into the control plane.
-
-    tokio::spawn(async move {
-        let execution_result = executor_client
-            .execute_partition(
-                task_id.job_id.clone(),
-                task_id.stage_id as usize,
-                vec![task_id.partition_id as usize],
-                plan,
-            )
-            .await;
-        info!("DONE WITH TASK: {:?}", execution_result);
-        available_tasks_slots.fetch_add(1, Ordering::SeqCst);
-        let _ = task_status_sender.send(as_task_status(
-            execution_result.map(|_| ()),
-            executor_id,
-            task_id,
-        ));
-    });
-}
-
-fn as_task_status(
-    execution_result: ballista_core::error::Result<()>,
-    executor_id: String,
-    task_id: PartitionId,
-) -> TaskStatus {
-    match execution_result {
-        Ok(_) => {
-            info!("Task {:?} finished", task_id);
-
-            TaskStatus {
-                partition_id: Some(task_id),
-                status: Some(task_status::Status::Completed(CompletedTask {
-                    executor_id,
-                })),
-            }
-        }
-        Err(e) => {
-            let error_msg = e.to_string();
-            info!("Task {:?} failed: {}", task_id, error_msg);
-
-            TaskStatus {
-                partition_id: Some(task_id),
-                status: Some(task_status::Status::Failed(FailedTask {
-                    error: format!("Task failed due to Tokio error: {}", error_msg),
-                })),
-            }
-        }
-    }
-}
-
-async fn sample_tasks_status(
-    task_status_receiver: &mut Receiver<TaskStatus>,
-) -> Vec<TaskStatus> {
-    let mut task_status: Vec<TaskStatus> = vec![];
-
-    loop {
-        match task_status_receiver.try_recv() {
-            anyhow::Result::Ok(status) => {
-                task_status.push(status);
-            }
-            Err(TryRecvError::Empty) => {
-                break;
-            }
-            Err(TryRecvError::Disconnected) => {
-                error!("Task statuses channel disconnected");
-            }
-        }
-    }
-
-    task_status
-}
diff --git a/rust/ballista/rust/executor/src/flight_service.rs b/rust/ballista/rust/executor/src/flight_service.rs
deleted file mode 100644
index 8fff3dbcade..00000000000
--- a/rust/ballista/rust/executor/src/flight_service.rs
+++ /dev/null
@@ -1,374 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implementation of the Apache Arrow Flight protocol that wraps an executor.
-
-use std::fs::File;
-use std::path::PathBuf;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::time::Instant;
-
-use crate::BallistaExecutor;
-use ballista_core::error::BallistaError;
-use ballista_core::serde::decode_protobuf;
-use ballista_core::serde::scheduler::{Action as BallistaAction, PartitionStats};
-use ballista_core::utils::{self, format_plan};
-
-use arrow::array::{ArrayRef, StringBuilder};
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::error::ArrowError;
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::IpcWriteOptions;
-use arrow::record_batch::RecordBatch;
-use arrow_flight::{
-    flight_service_server::FlightService, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
-    PutResult, SchemaResult, Ticket,
-};
-use datafusion::error::DataFusionError;
-use futures::{Stream, StreamExt};
-use log::{info, warn};
-use std::io::{Read, Seek};
-use tokio::sync::mpsc::channel;
-use tokio::task::JoinHandle;
-use tokio::{
-    sync::mpsc::{Receiver, Sender},
-    task,
-};
-use tokio_stream::wrappers::ReceiverStream;
-use tonic::{Request, Response, Status, Streaming};
-
-type FlightDataSender = Sender<Result<FlightData, Status>>;
-type FlightDataReceiver = Receiver<Result<FlightData, Status>>;
-
-/// Service implementing the Apache Arrow Flight Protocol
-#[derive(Clone)]
-pub struct BallistaFlightService {
-    executor: Arc<BallistaExecutor>,
-}
-
-impl BallistaFlightService {
-    pub fn new(executor: Arc<BallistaExecutor>) -> Self {
-        Self { executor }
-    }
-}
-
-type BoxedFlightStream<T> =
-    Pin<Box<dyn Stream<Item = Result<T, Status>> + Send + Sync + 'static>>;
-
-#[tonic::async_trait]
-impl FlightService for BallistaFlightService {
-    type DoActionStream = BoxedFlightStream<arrow_flight::Result>;
-    type DoExchangeStream = BoxedFlightStream<FlightData>;
-    type DoGetStream = BoxedFlightStream<FlightData>;
-    type DoPutStream = BoxedFlightStream<PutResult>;
-    type HandshakeStream = BoxedFlightStream<HandshakeResponse>;
-    type ListActionsStream = BoxedFlightStream<ActionType>;
-    type ListFlightsStream = BoxedFlightStream<FlightInfo>;
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        let ticket = request.into_inner();
-        info!("Received do_get request");
-
-        let action =
-            decode_protobuf(&ticket.ticket).map_err(|e| from_ballista_err(&e))?;
-
-        match &action {
-            BallistaAction::ExecutePartition(partition) => {
-                info!(
-                    "ExecutePartition: job={}, stage={}, partition={:?}\n{}",
-                    partition.job_id,
-                    partition.stage_id,
-                    partition.partition_id,
-                    format_plan(partition.plan.as_ref(), 0)
-                        .map_err(|e| from_ballista_err(&e))?
-                );
-
-                let mut tasks: Vec<JoinHandle<Result<_, BallistaError>>> = vec![];
-                for part in partition.partition_id.clone() {
-                    let work_dir = self.executor.config.work_dir.clone();
-                    let partition = partition.clone();
-                    tasks.push(tokio::spawn(async move {
-                        let mut path = PathBuf::from(&work_dir);
-                        path.push(partition.job_id);
-                        path.push(&format!("{}", partition.stage_id));
-                        path.push(&format!("{}", part));
-                        std::fs::create_dir_all(&path)?;
-
-                        path.push("data.arrow");
-                        let path = path.to_str().unwrap();
-                        info!("Writing results to {}", path);
-
-                        let now = Instant::now();
-
-                        // execute the query partition
-                        let mut stream = partition
-                            .plan
-                            .execute(part)
-                            .await
-                            .map_err(|e| from_datafusion_err(&e))?;
-
-                        // stream results to disk
-                        let stats = utils::write_stream_to_disk(&mut stream, &path)
-                            .await
-                            .map_err(|e| from_ballista_err(&e))?;
-
-                        info!(
-                            "Executed partition {} in {} seconds. Statistics: {:?}",
-                            part,
-                            now.elapsed().as_secs(),
-                            stats
-                        );
-
-                        let mut flights: Vec<Result<FlightData, Status>> = vec![];
-                        let options = arrow::ipc::writer::IpcWriteOptions::default();
-
-                        let schema = Arc::new(Schema::new(vec![
-                            Field::new("path", DataType::Utf8, false),
-                            stats.arrow_struct_repr(),
-                        ]));
-
-                        // build result set with summary of the partition execution status
-                        let mut c0 = StringBuilder::new(1);
-                        c0.append_value(&path).unwrap();
-                        let path: ArrayRef = Arc::new(c0.finish());
-
-                        let stats: ArrayRef = stats.to_arrow_arrayref()?;
-                        let results = vec![RecordBatch::try_new(
-                            schema,
-                            vec![path, stats],
-                        )
-                        .unwrap()];
-
-                        let mut batches: Vec<Result<FlightData, Status>> = results
-                            .iter()
-                            .flat_map(|batch| create_flight_iter(batch, &options))
-                            .collect();
-
-                        // append batch vector to schema vector, so that the first message sent is the schema
-                        flights.append(&mut batches);
-
-                        Ok(flights)
-                    }));
-                }
-
-                // wait for all partitions to complete
-                let results = futures::future::join_all(tasks).await;
-
-                // get results
-                let mut flights: Vec<Result<FlightData, Status>> = vec![];
-
-                // add an initial FlightData message that sends schema
-                let options = arrow::ipc::writer::IpcWriteOptions::default();
-                let stats = PartitionStats::default();
-                let schema = Arc::new(Schema::new(vec![
-                    Field::new("path", DataType::Utf8, false),
-                    stats.arrow_struct_repr(),
-                ]));
-                let schema_flight_data =
-                    arrow_flight::utils::flight_data_from_arrow_schema(
-                        schema.as_ref(),
-                        &options,
-                    );
-                flights.push(Ok(schema_flight_data));
-
-                // collect statistics from each executed partition
-                for result in results {
-                    let result = result.map_err(|e| {
-                        Status::internal(format!("Ballista Error: {:?}", e))
-                    })?;
-                    let batches = result.map_err(|e| {
-                        Status::internal(format!("Ballista Error: {:?}", e))
-                    })?;
-                    flights.extend_from_slice(&batches);
-                }
-
-                let output = futures::stream::iter(flights);
-                Ok(Response::new(Box::pin(output) as Self::DoGetStream))
-            }
-            BallistaAction::FetchPartition(partition_id) => {
-                // fetch a partition that was previously executed by this executor
-                info!("FetchPartition {:?}", partition_id);
-
-                let mut path = PathBuf::from(&self.executor.config.work_dir);
-                path.push(&partition_id.job_id);
-                path.push(&format!("{}", partition_id.stage_id));
-                path.push(&format!("{}", partition_id.partition_id));
-                path.push("data.arrow");
-                let path = path.to_str().unwrap();
-
-                info!("FetchPartition {:?} reading {}", partition_id, path);
-                let file = File::open(&path)
-                    .map_err(|e| {
-                        BallistaError::General(format!(
-                            "Failed to open partition file at {}: {:?}",
-                            path, e
-                        ))
-                    })
-                    .map_err(|e| from_ballista_err(&e))?;
-                let reader = FileReader::try_new(file).map_err(|e| from_arrow_err(&e))?;
-
-                let (tx, rx): (FlightDataSender, FlightDataReceiver) = channel(2);
-
-                // Arrow IPC reader does not implement Sync + Send so we need to use a channel
-                // to communicate
-                task::spawn(async move {
-                    if let Err(e) = stream_flight_data(reader, tx).await {
-                        warn!("Error streaming results: {:?}", e);
-                    }
-                });
-
-                Ok(Response::new(
-                    Box::pin(ReceiverStream::new(rx)) as Self::DoGetStream
-                ))
-            }
-        }
-    }
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("get_schema"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        Err(Status::unimplemented("get_flight_info"))
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("handshake"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("list_flights"))
-    }
-
-    async fn do_put(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        let mut request = request.into_inner();
-
-        while let Some(data) = request.next().await {
-            let _data = data?;
-        }
-
-        Err(Status::unimplemented("do_put"))
-    }
-
-    async fn do_action(
-        &self,
-        request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        let action = request.into_inner();
-
-        let _action =
-            decode_protobuf(&action.body.to_vec()).map_err(|e| from_ballista_err(&e))?;
-
-        Err(Status::unimplemented("do_action"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("list_actions"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("do_exchange"))
-    }
-}
-
-/// Convert a single RecordBatch into an iterator of FlightData (containing
-/// dictionaries and batches)
-fn create_flight_iter(
-    batch: &RecordBatch,
-    options: &IpcWriteOptions,
-) -> Box<dyn Iterator<Item = Result<FlightData, Status>>> {
-    let (flight_dictionaries, flight_batch) =
-        arrow_flight::utils::flight_data_from_arrow_batch(batch, &options);
-    Box::new(
-        flight_dictionaries
-            .into_iter()
-            .chain(std::iter::once(flight_batch))
-            .map(Ok),
-    )
-}
-
-async fn stream_flight_data<T>(
-    reader: FileReader<T>,
-    tx: FlightDataSender,
-) -> Result<(), Status>
-where
-    T: Read + Seek,
-{
-    let options = arrow::ipc::writer::IpcWriteOptions::default();
-    let schema_flight_data = arrow_flight::utils::flight_data_from_arrow_schema(
-        reader.schema().as_ref(),
-        &options,
-    );
-    send_response(&tx, Ok(schema_flight_data)).await?;
-
-    for batch in reader {
-        let batch_flight_data: Vec<_> = batch
-            .map(|b| create_flight_iter(&b, &options).collect())
-            .map_err(|e| from_arrow_err(&e))?;
-        for batch in &batch_flight_data {
-            send_response(&tx, batch.clone()).await?;
-        }
-    }
-    Ok(())
-}
-
-async fn send_response(
-    tx: &FlightDataSender,
-    data: Result<FlightData, Status>,
-) -> Result<(), Status> {
-    tx.send(data)
-        .await
-        .map_err(|e| Status::internal(format!("{:?}", e)))
-}
-
-fn from_arrow_err(e: &ArrowError) -> Status {
-    Status::internal(format!("ArrowError: {:?}", e))
-}
-
-fn from_ballista_err(e: &ballista_core::error::BallistaError) -> Status {
-    Status::internal(format!("Ballista Error: {:?}", e))
-}
-
-fn from_datafusion_err(e: &DataFusionError) -> Status {
-    Status::internal(format!("DataFusion Error: {:?}", e))
-}
diff --git a/rust/ballista/rust/executor/src/lib.rs b/rust/ballista/rust/executor/src/lib.rs
deleted file mode 100644
index 3d7bbaca3f1..00000000000
--- a/rust/ballista/rust/executor/src/lib.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Core executor logic for executing queries and storing results in memory.
-
-pub mod collect;
-pub mod flight_service;
-
-#[derive(Debug, Clone)]
-pub struct ExecutorConfig {
-    pub(crate) host: String,
-    pub(crate) port: u16,
-    /// Directory for temporary files, such as IPC files
-    pub(crate) work_dir: String,
-    pub(crate) concurrent_tasks: usize,
-}
-
-impl ExecutorConfig {
-    pub fn new(host: &str, port: u16, work_dir: &str, concurrent_tasks: usize) -> Self {
-        Self {
-            host: host.to_owned(),
-            port,
-            work_dir: work_dir.to_owned(),
-            concurrent_tasks,
-        }
-    }
-}
-
-#[allow(dead_code)]
-pub struct BallistaExecutor {
-    pub(crate) config: ExecutorConfig,
-}
-
-impl BallistaExecutor {
-    pub fn new(config: ExecutorConfig) -> Self {
-        Self { config }
-    }
-}
diff --git a/rust/ballista/rust/executor/src/main.rs b/rust/ballista/rust/executor/src/main.rs
deleted file mode 100644
index 2718ea3542f..00000000000
--- a/rust/ballista/rust/executor/src/main.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Rust executor binary.
-
-use std::sync::Arc;
-
-use anyhow::{Context, Result};
-use arrow_flight::flight_service_server::FlightServiceServer;
-use futures::future::MaybeDone;
-use log::info;
-use tempfile::TempDir;
-use tonic::transport::Server;
-use uuid::Uuid;
-
-use ballista_core::{
-    client::BallistaClient, serde::protobuf::scheduler_grpc_client::SchedulerGrpcClient,
-};
-use ballista_core::{
-    print_version, serde::protobuf::scheduler_grpc_server::SchedulerGrpcServer,
-    serde::scheduler::ExecutorMeta, BALLISTA_VERSION,
-};
-use ballista_executor::{
-    flight_service::BallistaFlightService, BallistaExecutor, ExecutorConfig,
-};
-use ballista_scheduler::{state::StandaloneClient, SchedulerServer};
-use config::prelude::*;
-
-mod execution_loop;
-
-#[macro_use]
-extern crate configure_me;
-
-#[allow(clippy::all, warnings)]
-mod config {
-    // Ideally we would use the include_config macro from configure_me, but then we cannot use
-    // #[allow(clippy::all)] to silence clippy warnings from the generated code
-    include!(concat!(env!("OUT_DIR"), "/executor_configure_me_config.rs"));
-}
-
-#[cfg(feature = "snmalloc")]
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-
-    // parse command-line arguments
-    let (opt, _remaining_args) =
-        Config::including_optional_config_files(&["/etc/ballista/executor.toml"])
-            .unwrap_or_exit();
-
-    if opt.version {
-        print_version();
-        std::process::exit(0);
-    }
-
-    let namespace = opt.namespace;
-    let external_host = opt.external_host;
-    let bind_host = opt.bind_host;
-    let port = opt.port;
-
-    let addr = format!("{}:{}", bind_host, port);
-    let addr = addr
-        .parse()
-        .with_context(|| format!("Could not parse address: {}", addr))?;
-
-    let scheduler_host = if opt.local {
-        external_host.to_owned()
-    } else {
-        opt.scheduler_host
-    };
-    let scheduler_port = opt.scheduler_port;
-    let scheduler_url = format!("http://{}:{}", scheduler_host, scheduler_port);
-
-    let work_dir = opt.work_dir.unwrap_or(
-        TempDir::new()?
-            .into_path()
-            .into_os_string()
-            .into_string()
-            .unwrap(),
-    );
-    let config =
-        ExecutorConfig::new(&external_host, port, &work_dir, opt.concurrent_tasks);
-    info!("Running with config: {:?}", config);
-
-    let executor_meta = ExecutorMeta {
-        id: Uuid::new_v4().to_string(), // assign this executor a unique ID
-        host: external_host.clone(),
-        port,
-    };
-
-    if opt.local {
-        info!("Running in local mode. Scheduler will be run in-proc");
-        let client = StandaloneClient::try_new_temporary()
-            .context("Could not create standalone config backend")?;
-        let server =
-            SchedulerGrpcServer::new(SchedulerServer::new(Arc::new(client), namespace));
-        let addr = format!("{}:{}", bind_host, scheduler_port);
-        let addr = addr
-            .parse()
-            .with_context(|| format!("Could not parse {}", addr))?;
-        info!(
-            "Ballista v{} Rust Scheduler listening on {:?}",
-            BALLISTA_VERSION, addr
-        );
-        let scheduler_future =
-            tokio::spawn(Server::builder().add_service(server).serve(addr));
-        let mut scheduler_result = futures::future::maybe_done(scheduler_future);
-
-        // Ensure scheduler is ready to receive connections
-        while SchedulerGrpcClient::connect(scheduler_url.clone())
-            .await
-            .is_err()
-        {
-            let scheduler_future = match scheduler_result {
-                MaybeDone::Future(f) => f,
-                MaybeDone::Done(Err(e)) => return Err(e).context("Tokio error"),
-                MaybeDone::Done(Ok(Err(e))) => {
-                    return Err(e).context("Scheduler failed to initialize correctly")
-                }
-                MaybeDone::Done(Ok(Ok(()))) => {
-                    return Err(anyhow::format_err!(
-                        "Scheduler unexpectedly finished successfully"
-                    ))
-                }
-                MaybeDone::Gone => {
-                    panic!("Received Gone from recently created MaybeDone")
-                }
-            };
-            tokio::time::sleep(std::time::Duration::from_millis(100)).await;
-            scheduler_result = futures::future::maybe_done(scheduler_future);
-        }
-    }
-
-    let scheduler = SchedulerGrpcClient::connect(scheduler_url)
-        .await
-        .context("Could not connect to scheduler")?;
-    let executor = Arc::new(BallistaExecutor::new(config));
-    let service = BallistaFlightService::new(executor);
-
-    let server = FlightServiceServer::new(service);
-    info!(
-        "Ballista v{} Rust Executor listening on {:?}",
-        BALLISTA_VERSION, addr
-    );
-    let server_future = tokio::spawn(Server::builder().add_service(server).serve(addr));
-    let client = BallistaClient::try_new(&external_host, port).await?;
-    tokio::spawn(execution_loop::poll_loop(
-        scheduler,
-        client,
-        executor_meta,
-        opt.concurrent_tasks,
-    ));
-
-    server_future
-        .await
-        .context("Tokio error")?
-        .context("Could not start executor server")?;
-    Ok(())
-}
diff --git a/rust/ballista/rust/scheduler/Cargo.toml b/rust/ballista/rust/scheduler/Cargo.toml
deleted file mode 100644
index 525e28a63cc..00000000000
--- a/rust/ballista/rust/scheduler/Cargo.toml
+++ /dev/null
@@ -1,61 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista-scheduler"
-description = "Ballista Distributed Compute - Scheduler"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-
-[features]
-default = ["etcd", "sled"]
-etcd = ["etcd-client"]
-sled = ["sled_package"]
-
-[dependencies]
-anyhow = "1"
-ballista-core = { path = "../core" }
-clap = "2"
-configure_me = "0.4.0"
-env_logger = "0.8"
-etcd-client = { version = "0.6", optional = true }
-futures = "0.3"
-log = "0.4"
-parse_arg = "0.1.3"
-prost = "0.7"
-rand = "0.8"
-serde = {version = "1", features = ["derive"]}
-sled_package = { package = "sled", version = "0.34", optional = true }
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-tonic = "0.4"
-
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
-
-[dev-dependencies]
-ballista-core = { path = "../core" }
-uuid = { version = "0.8", features = ["v4"] }
-
-[build-dependencies]
-configure_me_codegen = "0.4.0"
-
-[package.metadata.configure_me.bin]
-scheduler = "scheduler_config_spec.toml"
diff --git a/rust/ballista/rust/scheduler/README.md b/rust/ballista/rust/scheduler/README.md
deleted file mode 100644
index facc6d17698..00000000000
--- a/rust/ballista/rust/scheduler/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista Scheduler
-This crate contains the Ballista Scheduler. It can be used both as a library or as a binary.
-
-## Run
-
-```bash
-$ RUST_LOG=info cargo run --release
-...
-[2021-02-11T05:29:30Z INFO  scheduler] Ballista v0.4.2-SNAPSHOT Scheduler listening on 0.0.0.0:50050
-[2021-02-11T05:30:13Z INFO  ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "6d10f5d2-c8c3-4e0f-afdb-1f6ec9171321", host: "localhost", port: 50051 }
-```
-
-By default, the scheduler will bind to `localhost` and listen on port `50051`.
diff --git a/rust/ballista/rust/scheduler/build.rs b/rust/ballista/rust/scheduler/build.rs
deleted file mode 100644
index bae6a3bfe2e..00000000000
--- a/rust/ballista/rust/scheduler/build.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate configure_me_codegen;
-
-fn main() -> Result<(), String> {
-    println!("cargo:rerun-if-changed=scheduler_config_spec.toml");
-    configure_me_codegen::build_script_auto()
-        .map_err(|e| format!("configure_me code generation failed: {}", e))
-}
diff --git a/rust/ballista/rust/scheduler/scheduler_config_spec.toml b/rust/ballista/rust/scheduler/scheduler_config_spec.toml
deleted file mode 100644
index 560e9a2599b..00000000000
--- a/rust/ballista/rust/scheduler/scheduler_config_spec.toml
+++ /dev/null
@@ -1,60 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[general]
-name = "Ballista Scheduler"
-env_prefix = "BALLISTA_SCHEDULER"
-conf_file_param = "config_file"
-
-[[switch]]
-name = "version"
-doc = "Print version of this executable"
-
-[[param]]
-abbr = "b"
-name = "config_backend"
-type = "ballista_scheduler::ConfigBackend"
-doc = "The configuration backend for the scheduler, see ConfigBackend::variants() for options. Default: Standalone"
-default = "ballista_scheduler::ConfigBackend::Standalone"
-
-[[param]]
-abbr = "n"
-name = "namespace"
-type = "String"
-doc = "Namespace for the ballista cluster that this executor will join. Default: ballista"
-default = "std::string::String::from(\"ballista\")"
-
-[[param]]
-abbr = "e"
-name = "etcd_urls"
-type = "String"
-doc = "etcd urls for use when discovery mode is `etcd`. Default: localhost:2379"
-default = "std::string::String::from(\"localhost:2379\")"
-
-[[param]]
-abbr = "h"
-name = "bind_host"
-type = "String"
-default = "std::string::String::from(\"0.0.0.0\")"
-doc = "Local host name or IP address to bind to. Default: 0.0.0.0"
-
-[[param]]
-abbr = "p"
-name = "port"
-type = "u16"
-default = "50050"
-doc = "bind port. Default: 50050"
\ No newline at end of file
diff --git a/rust/ballista/rust/scheduler/src/lib.rs b/rust/ballista/rust/scheduler/src/lib.rs
deleted file mode 100644
index 8ad2cc7a448..00000000000
--- a/rust/ballista/rust/scheduler/src/lib.rs
+++ /dev/null
@@ -1,502 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Support for distributed schedulers, such as Kubernetes
-
-pub mod planner;
-pub mod state;
-
-#[cfg(test)]
-pub mod test_utils;
-
-use std::fmt;
-use std::{convert::TryInto, sync::Arc};
-
-use ballista_core::serde::protobuf::{
-    execute_query_params::Query, job_status, scheduler_grpc_server::SchedulerGrpc,
-    ExecuteQueryParams, ExecuteQueryResult, FailedJob, FilePartitionMetadata, FileType,
-    GetExecutorMetadataParams, GetExecutorMetadataResult, GetFileMetadataParams,
-    GetFileMetadataResult, GetJobStatusParams, GetJobStatusResult, JobStatus,
-    PartitionId, PollWorkParams, PollWorkResult, QueuedJob, RunningJob, TaskDefinition,
-    TaskStatus,
-};
-use ballista_core::serde::scheduler::ExecutorMeta;
-
-use clap::arg_enum;
-use datafusion::physical_plan::ExecutionPlan;
-#[cfg(feature = "sled")]
-extern crate sled_package as sled;
-
-// an enum used to configure the backend
-// needs to be visible to code generated by configure_me
-arg_enum! {
-    #[derive(Debug, serde::Deserialize)]
-    pub enum ConfigBackend {
-        Etcd,
-        Standalone
-    }
-}
-
-impl parse_arg::ParseArgFromStr for ConfigBackend {
-    fn describe_type<W: fmt::Write>(mut writer: W) -> fmt::Result {
-        write!(writer, "The configuration backend for the scheduler")
-    }
-}
-
-use crate::planner::DistributedPlanner;
-
-use datafusion::execution::context::ExecutionContext;
-use log::{debug, error, info, warn};
-use rand::{distributions::Alphanumeric, thread_rng, Rng};
-use tonic::{Request, Response};
-
-use self::state::{ConfigBackendClient, SchedulerState};
-use datafusion::physical_plan::parquet::ParquetExec;
-use std::time::Instant;
-
-pub struct SchedulerServer {
-    state: SchedulerState,
-    namespace: String,
-}
-
-impl SchedulerServer {
-    pub fn new(config: Arc<dyn ConfigBackendClient>, namespace: String) -> Self {
-        Self {
-            state: SchedulerState::new(config),
-            namespace,
-        }
-    }
-}
-
-#[tonic::async_trait]
-impl SchedulerGrpc for SchedulerServer {
-    async fn get_executors_metadata(
-        &self,
-        _request: Request<GetExecutorMetadataParams>,
-    ) -> std::result::Result<Response<GetExecutorMetadataResult>, tonic::Status> {
-        info!("Received get_executors_metadata request");
-        let result = self
-            .state
-            .get_executors_metadata(self.namespace.as_str())
-            .await
-            .map_err(|e| {
-                let msg = format!("Error reading executors metadata: {}", e);
-                error!("{}", msg);
-                tonic::Status::internal(msg)
-            })?
-            .into_iter()
-            .map(|meta| meta.into())
-            .collect();
-        Ok(Response::new(GetExecutorMetadataResult {
-            metadata: result,
-        }))
-    }
-
-    async fn poll_work(
-        &self,
-        request: Request<PollWorkParams>,
-    ) -> std::result::Result<Response<PollWorkResult>, tonic::Status> {
-        if let PollWorkParams {
-            metadata: Some(metadata),
-            can_accept_task,
-            task_status,
-        } = request.into_inner()
-        {
-            debug!("Received poll_work request for {:?}", metadata);
-            let metadata: ExecutorMeta = metadata.into();
-            let mut lock = self.state.lock().await.map_err(|e| {
-                let msg = format!("Could not lock the state: {}", e);
-                error!("{}", msg);
-                tonic::Status::internal(msg)
-            })?;
-            self.state
-                .save_executor_metadata(&self.namespace, metadata.clone())
-                .await
-                .map_err(|e| {
-                    let msg = format!("Could not save executor metadata: {}", e);
-                    error!("{}", msg);
-                    tonic::Status::internal(msg)
-                })?;
-            let task_status_empty = task_status.is_empty();
-            for task_status in task_status {
-                self.state
-                    .save_task_status(&self.namespace, &task_status)
-                    .await
-                    .map_err(|e| {
-                        let msg = format!("Could not save task status: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
-            }
-            let task = if can_accept_task {
-                let plan = self
-                    .state
-                    .assign_next_schedulable_task(&self.namespace, &metadata.id)
-                    .await
-                    .map_err(|e| {
-                        let msg = format!("Error finding next assignable task: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
-                if let Some((task, _plan)) = &plan {
-                    let partition_id = task.partition_id.as_ref().unwrap();
-                    info!(
-                        "Sending new task to {}: {}/{}/{}",
-                        metadata.id,
-                        partition_id.job_id,
-                        partition_id.stage_id,
-                        partition_id.partition_id
-                    );
-                }
-                plan.map(|(status, plan)| TaskDefinition {
-                    plan: Some(plan.try_into().unwrap()),
-                    task_id: status.partition_id,
-                })
-            } else {
-                None
-            };
-            // TODO: this should probably happen asynchronously with a watch on etc/sled
-            if !task_status_empty {
-                if let Err(e) = self.state.synchronize_job_status(&self.namespace).await {
-                    warn!("Could not synchronize jobs and tasks state: {}", e);
-                }
-            }
-            lock.unlock().await;
-            Ok(Response::new(PollWorkResult { task }))
-        } else {
-            warn!("Received invalid executor poll_work request");
-            Err(tonic::Status::invalid_argument(
-                "Missing metadata in request",
-            ))
-        }
-    }
-
-    async fn get_file_metadata(
-        &self,
-        request: Request<GetFileMetadataParams>,
-    ) -> std::result::Result<Response<GetFileMetadataResult>, tonic::Status> {
-        let GetFileMetadataParams { path, file_type } = request.into_inner();
-
-        let file_type: FileType = file_type.try_into().map_err(|e| {
-            let msg = format!("Error reading request: {}", e);
-            error!("{}", msg);
-            tonic::Status::internal(msg)
-        })?;
-
-        match file_type {
-            FileType::Parquet => {
-                let parquet_exec = ParquetExec::try_from_path(&path, None, None, 1024, 1)
-                    .map_err(|e| {
-                        let msg = format!("Error opening parquet files: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
-
-                //TODO include statistics and any other info needed to reconstruct ParquetExec
-                Ok(Response::new(GetFileMetadataResult {
-                    schema: Some(parquet_exec.schema().as_ref().into()),
-                    partitions: parquet_exec
-                        .partitions()
-                        .iter()
-                        .map(|part| FilePartitionMetadata {
-                            filename: part.filenames().to_vec(),
-                        })
-                        .collect(),
-                }))
-            }
-            //TODO implement for CSV
-            _ => Err(tonic::Status::unimplemented(
-                "get_file_metadata unsupported file type",
-            )),
-        }
-    }
-
-    async fn execute_query(
-        &self,
-        request: Request<ExecuteQueryParams>,
-    ) -> std::result::Result<Response<ExecuteQueryResult>, tonic::Status> {
-        if let ExecuteQueryParams { query: Some(query) } = request.into_inner() {
-            let plan = match query {
-                Query::LogicalPlan(logical_plan) => {
-                    // parse protobuf
-                    (&logical_plan).try_into().map_err(|e| {
-                        let msg = format!("Could not parse logical plan protobuf: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?
-                }
-                Query::Sql(sql) => {
-                    //TODO we can't just create a new context because we need a context that has
-                    // tables registered from previous SQL statements that have been executed
-                    let mut ctx = ExecutionContext::new();
-                    let df = ctx.sql(&sql).map_err(|e| {
-                        let msg = format!("Error parsing SQL: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
-                    df.to_logical_plan()
-                }
-            };
-            debug!("Received plan for execution: {:?}", plan);
-            let executors = self
-                .state
-                .get_executors_metadata(&self.namespace)
-                .await
-                .map_err(|e| {
-                    let msg = format!("Error reading executors metadata: {}", e);
-                    error!("{}", msg);
-                    tonic::Status::internal(msg)
-                })?;
-            debug!("Found executors: {:?}", executors);
-
-            let job_id: String = {
-                let mut rng = thread_rng();
-                std::iter::repeat(())
-                    .map(|()| rng.sample(Alphanumeric))
-                    .map(char::from)
-                    .take(7)
-                    .collect()
-            };
-
-            // Save placeholder job metadata
-            self.state
-                .save_job_metadata(
-                    &self.namespace,
-                    &job_id,
-                    &JobStatus {
-                        status: Some(job_status::Status::Queued(QueuedJob {})),
-                    },
-                )
-                .await
-                .map_err(|e| {
-                    tonic::Status::internal(format!("Could not save job metadata: {}", e))
-                })?;
-
-            let namespace = self.namespace.to_owned();
-            let state = self.state.clone();
-            let job_id_spawn = job_id.clone();
-            tokio::spawn(async move {
-                // create physical plan using DataFusion
-                let datafusion_ctx = ExecutionContext::new();
-                macro_rules! fail_job {
-                    ($code :expr) => {{
-                        match $code {
-                            Err(error) => {
-                                warn!("Job {} failed with {}", job_id_spawn, error);
-                                state
-                                    .save_job_metadata(
-                                        &namespace,
-                                        &job_id_spawn,
-                                        &JobStatus {
-                                            status: Some(job_status::Status::Failed(
-                                                FailedJob {
-                                                    error: format!("{}", error),
-                                                },
-                                            )),
-                                        },
-                                    )
-                                    .await
-                                    .unwrap();
-                                return;
-                            }
-                            Ok(value) => value,
-                        }
-                    }};
-                }
-
-                let start = Instant::now();
-
-                let optimized_plan =
-                    fail_job!(datafusion_ctx.optimize(&plan).map_err(|e| {
-                        let msg =
-                            format!("Could not create optimized logical plan: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-
-                debug!("Calculated optimized plan: {:?}", optimized_plan);
-
-                let plan = fail_job!(datafusion_ctx
-                    .create_physical_plan(&optimized_plan)
-                    .map_err(|e| {
-                        let msg = format!("Could not create physical plan: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-
-                info!(
-                    "DataFusion created physical plan in {} milliseconds",
-                    start.elapsed().as_millis(),
-                );
-
-                // create distributed physical plan using Ballista
-                if let Err(e) = state
-                    .save_job_metadata(
-                        &namespace,
-                        &job_id_spawn,
-                        &JobStatus {
-                            status: Some(job_status::Status::Running(RunningJob {})),
-                        },
-                    )
-                    .await
-                {
-                    warn!(
-                        "Could not update job {} status to running: {}",
-                        job_id_spawn, e
-                    );
-                }
-                let mut planner = fail_job!(DistributedPlanner::try_new(executors)
-                    .map_err(|e| {
-                        let msg = format!("Could not create distributed planner: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-                let stages = fail_job!(planner
-                    .plan_query_stages(&job_id_spawn, plan)
-                    .map_err(|e| {
-                        let msg = format!("Could not plan query stages: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-
-                // save stages into state
-                for stage in stages {
-                    fail_job!(state
-                        .save_stage_plan(
-                            &namespace,
-                            &job_id_spawn,
-                            stage.stage_id,
-                            stage.child.clone()
-                        )
-                        .await
-                        .map_err(|e| {
-                            let msg = format!("Could not save stage plan: {}", e);
-                            error!("{}", msg);
-                            tonic::Status::internal(msg)
-                        }));
-                    let num_partitions = stage.output_partitioning().partition_count();
-                    for partition_id in 0..num_partitions {
-                        let pending_status = TaskStatus {
-                            partition_id: Some(PartitionId {
-                                job_id: job_id_spawn.clone(),
-                                stage_id: stage.stage_id as u32,
-                                partition_id: partition_id as u32,
-                            }),
-                            status: None,
-                        };
-                        fail_job!(state
-                            .save_task_status(&namespace, &pending_status)
-                            .await
-                            .map_err(|e| {
-                                let msg = format!("Could not save task status: {}", e);
-                                error!("{}", msg);
-                                tonic::Status::internal(msg)
-                            }));
-                    }
-                }
-            });
-
-            Ok(Response::new(ExecuteQueryResult { job_id }))
-        } else {
-            Err(tonic::Status::internal("Error parsing request"))
-        }
-    }
-
-    async fn get_job_status(
-        &self,
-        request: Request<GetJobStatusParams>,
-    ) -> std::result::Result<Response<GetJobStatusResult>, tonic::Status> {
-        let job_id = request.into_inner().job_id;
-        debug!("Received get_job_status request for job {}", job_id);
-        let job_meta = self
-            .state
-            .get_job_metadata(&self.namespace, &job_id)
-            .await
-            .map_err(|e| {
-                let msg = format!("Error reading job metadata: {}", e);
-                error!("{}", msg);
-                tonic::Status::internal(msg)
-            })?;
-        Ok(Response::new(GetJobStatusResult {
-            status: Some(job_meta),
-        }))
-    }
-}
-
-#[cfg(all(test, feature = "sled"))]
-mod test {
-    use std::sync::Arc;
-
-    use tonic::Request;
-
-    use ballista_core::error::BallistaError;
-    use ballista_core::serde::protobuf::{ExecutorMetadata, PollWorkParams};
-
-    use super::{
-        state::{SchedulerState, StandaloneClient},
-        SchedulerGrpc, SchedulerServer,
-    };
-
-    #[tokio::test]
-    async fn test_poll_work() -> Result<(), BallistaError> {
-        let state = Arc::new(StandaloneClient::try_new_temporary()?);
-        let namespace = "default";
-        let scheduler = SchedulerServer::new(state.clone(), namespace.to_owned());
-        let state = SchedulerState::new(state);
-        let exec_meta = ExecutorMetadata {
-            id: "abc".to_owned(),
-            host: "".to_owned(),
-            port: 0,
-        };
-        let request: Request<PollWorkParams> = Request::new(PollWorkParams {
-            metadata: Some(exec_meta.clone()),
-            can_accept_task: false,
-            task_status: vec![],
-        });
-        let response = scheduler
-            .poll_work(request)
-            .await
-            .expect("Received error response")
-            .into_inner();
-        // no response task since we told the scheduler we didn't want to accept one
-        assert!(response.task.is_none());
-        // executor should be registered
-        assert_eq!(
-            state.get_executors_metadata(namespace).await.unwrap().len(),
-            1
-        );
-
-        let request: Request<PollWorkParams> = Request::new(PollWorkParams {
-            metadata: Some(exec_meta.clone()),
-            can_accept_task: true,
-            task_status: vec![],
-        });
-        let response = scheduler
-            .poll_work(request)
-            .await
-            .expect("Received error response")
-            .into_inner();
-        // still no response task since there are no tasks in the scheduelr
-        assert!(response.task.is_none());
-        // executor should be registered
-        assert_eq!(
-            state.get_executors_metadata(namespace).await.unwrap().len(),
-            1
-        );
-        Ok(())
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/main.rs b/rust/ballista/rust/scheduler/src/main.rs
deleted file mode 100644
index 785ffb47b17..00000000000
--- a/rust/ballista/rust/scheduler/src/main.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Rust scheduler binary.
-
-use std::{net::SocketAddr, sync::Arc};
-
-use anyhow::{Context, Result};
-use ballista_core::BALLISTA_VERSION;
-use ballista_core::{
-    print_version, serde::protobuf::scheduler_grpc_server::SchedulerGrpcServer,
-};
-#[cfg(feature = "etcd")]
-use ballista_scheduler::state::EtcdClient;
-#[cfg(feature = "sled")]
-use ballista_scheduler::state::StandaloneClient;
-use ballista_scheduler::{state::ConfigBackendClient, ConfigBackend, SchedulerServer};
-
-use log::info;
-use tonic::transport::Server;
-
-#[macro_use]
-extern crate configure_me;
-
-#[allow(clippy::all, warnings)]
-mod config {
-    // Ideally we would use the include_config macro from configure_me, but then we cannot use
-    // #[allow(clippy::all)] to silence clippy warnings from the generated code
-    include!(concat!(
-        env!("OUT_DIR"),
-        "/scheduler_configure_me_config.rs"
-    ));
-}
-use config::prelude::*;
-
-async fn start_server(
-    config_backend: Arc<dyn ConfigBackendClient>,
-    namespace: String,
-    addr: SocketAddr,
-) -> Result<()> {
-    info!(
-        "Ballista v{} Scheduler listening on {:?}",
-        BALLISTA_VERSION, addr
-    );
-    let server =
-        SchedulerGrpcServer::new(SchedulerServer::new(config_backend, namespace));
-    Ok(Server::builder()
-        .add_service(server)
-        .serve(addr)
-        .await
-        .context("Could not start grpc server")?)
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-
-    // parse options
-    let (opt, _remaining_args) =
-        Config::including_optional_config_files(&["/etc/ballista/scheduler.toml"])
-            .unwrap_or_exit();
-
-    if opt.version {
-        print_version();
-        std::process::exit(0);
-    }
-
-    let namespace = opt.namespace;
-    let bind_host = opt.bind_host;
-    let port = opt.port;
-
-    let addr = format!("{}:{}", bind_host, port);
-    let addr = addr.parse()?;
-
-    let client: Arc<dyn ConfigBackendClient> = match opt.config_backend {
-        #[cfg(not(any(feature = "sled", feature = "etcd")))]
-        _ => std::compile_error!(
-            "To build the scheduler enable at least one config backend feature (`etcd` or `sled`)"
-        ),
-        #[cfg(feature = "etcd")]
-        ConfigBackend::Etcd => {
-            let etcd = etcd_client::Client::connect(&[opt.etcd_urls], None)
-                .await
-                .context("Could not connect to etcd")?;
-            Arc::new(EtcdClient::new(etcd))
-        }
-        #[cfg(not(feature = "etcd"))]
-        ConfigBackend::Etcd => {
-            unimplemented!(
-                "build the scheduler with the `etcd` feature to use the etcd config backend"
-            )
-        }
-        #[cfg(feature = "sled")]
-        ConfigBackend::Standalone => {
-            // TODO: Use a real file and make path is configurable
-            Arc::new(
-                StandaloneClient::try_new_temporary()
-                    .context("Could not create standalone config backend")?,
-            )
-        }
-        #[cfg(not(feature = "sled"))]
-        ConfigBackend::Standalone => {
-            unimplemented!(
-                "build the scheduler with the `sled` feature to use the standalone config backend"
-            )
-        }
-    };
-    start_server(client, namespace, addr).await?;
-    Ok(())
-}
diff --git a/rust/ballista/rust/scheduler/src/planner.rs b/rust/ballista/rust/scheduler/src/planner.rs
deleted file mode 100644
index f06dcfdfcec..00000000000
--- a/rust/ballista/rust/scheduler/src/planner.rs
+++ /dev/null
@@ -1,485 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Distributed query execution
-//!
-//! This code is EXPERIMENTAL and still under development
-
-use std::pin::Pin;
-use std::sync::Arc;
-use std::time::Instant;
-use std::{collections::HashMap, future::Future};
-
-use ballista_core::client::BallistaClient;
-use ballista_core::datasource::DFTableAdapter;
-use ballista_core::error::{BallistaError, Result};
-use ballista_core::serde::scheduler::ExecutorMeta;
-use ballista_core::serde::scheduler::PartitionId;
-use ballista_core::utils::format_plan;
-use ballista_core::{
-    execution_plans::{QueryStageExec, ShuffleReaderExec, UnresolvedShuffleExec},
-    serde::scheduler::PartitionLocation,
-};
-use datafusion::execution::context::ExecutionContext;
-use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
-use datafusion::physical_plan::hash_join::HashJoinExec;
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::ExecutionPlan;
-use log::{debug, info};
-use tokio::task::JoinHandle;
-
-type SendableExecutionPlan =
-    Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send>>;
-type PartialQueryStageResult = (Arc<dyn ExecutionPlan>, Vec<Arc<QueryStageExec>>);
-
-pub struct DistributedPlanner {
-    executors: Vec<ExecutorMeta>,
-    next_stage_id: usize,
-}
-
-impl DistributedPlanner {
-    pub fn try_new(executors: Vec<ExecutorMeta>) -> Result<Self> {
-        if executors.is_empty() {
-            Err(BallistaError::General(
-                "DistributedPlanner requires at least one executor".to_owned(),
-            ))
-        } else {
-            Ok(Self {
-                executors,
-                next_stage_id: 0,
-            })
-        }
-    }
-}
-
-impl DistributedPlanner {
-    /// Execute a distributed query against a cluster, leaving the final results on the
-    /// executors. The [ExecutionPlan] returned by this method is guaranteed to be a
-    /// [ShuffleReaderExec] that can be used to fetch the final results from the executors
-    /// in parallel.
-    pub async fn execute_distributed_query(
-        &mut self,
-        job_id: String,
-        execution_plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let now = Instant::now();
-        let execution_plans = self.plan_query_stages(&job_id, execution_plan)?;
-
-        info!(
-            "DistributedPlanner created {} execution plans in {} seconds:",
-            execution_plans.len(),
-            now.elapsed().as_secs()
-        );
-
-        for plan in &execution_plans {
-            info!("{}", format_plan(plan.as_ref(), 0)?);
-        }
-
-        execute(execution_plans, self.executors.clone()).await
-    }
-
-    /// Returns a vector of ExecutionPlans, where the root node is a [QueryStageExec].
-    /// Plans that depend on the input of other plans will have leaf nodes of type [UnresolvedShuffleExec].
-    /// A [QueryStageExec] is created whenever the partitioning changes.
-    ///
-    /// Returns an empty vector if the execution_plan doesn't need to be sliced into several stages.
-    pub fn plan_query_stages(
-        &mut self,
-        job_id: &str,
-        execution_plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Vec<Arc<QueryStageExec>>> {
-        info!("planning query stages");
-        let (new_plan, mut stages) =
-            self.plan_query_stages_internal(job_id, execution_plan)?;
-        stages.push(create_query_stage(
-            job_id.to_string(),
-            self.next_stage_id(),
-            new_plan,
-        )?);
-        Ok(stages)
-    }
-
-    /// Returns a potentially modified version of the input execution_plan along with the resulting query stages.
-    /// This function is needed because the input execution_plan might need to be modified, but it might not hold a
-    /// compelte query stage (its parent might also belong to the same stage)
-    fn plan_query_stages_internal(
-        &mut self,
-        job_id: &str,
-        execution_plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<PartialQueryStageResult> {
-        // recurse down and replace children
-        if execution_plan.children().is_empty() {
-            return Ok((execution_plan, vec![]));
-        }
-
-        let mut stages = vec![];
-        let mut children = vec![];
-        for child in execution_plan.children() {
-            let (new_child, mut child_stages) =
-                self.plan_query_stages_internal(job_id, child.clone())?;
-            children.push(new_child);
-            stages.append(&mut child_stages);
-        }
-
-        if let Some(adapter) = execution_plan.as_any().downcast_ref::<DFTableAdapter>() {
-            let ctx = ExecutionContext::new();
-            Ok((ctx.create_physical_plan(&adapter.logical_plan)?, stages))
-        } else if let Some(merge) = execution_plan.as_any().downcast_ref::<MergeExec>() {
-            let query_stage = create_query_stage(
-                job_id.to_string(),
-                self.next_stage_id(),
-                merge.children()[0].clone(),
-            )?;
-            let unresolved_shuffle = Arc::new(UnresolvedShuffleExec::new(
-                vec![query_stage.stage_id],
-                query_stage.schema(),
-                query_stage.output_partitioning().partition_count(),
-            ));
-            stages.push(query_stage);
-            Ok((merge.with_new_children(vec![unresolved_shuffle])?, stages))
-        } else if let Some(agg) =
-            execution_plan.as_any().downcast_ref::<HashAggregateExec>()
-        {
-            //TODO should insert query stages in more generic way based on partitioning metadata
-            // and not specifically for this operator
-            match agg.mode() {
-                AggregateMode::Final => {
-                    let mut new_children: Vec<Arc<dyn ExecutionPlan>> = vec![];
-                    for child in &children {
-                        let new_stage = create_query_stage(
-                            job_id.to_string(),
-                            self.next_stage_id(),
-                            child.clone(),
-                        )?;
-                        new_children.push(Arc::new(UnresolvedShuffleExec::new(
-                            vec![new_stage.stage_id],
-                            new_stage.schema().clone(),
-                            new_stage.output_partitioning().partition_count(),
-                        )));
-                        stages.push(new_stage);
-                    }
-                    Ok((agg.with_new_children(new_children)?, stages))
-                }
-                AggregateMode::Partial => Ok((agg.with_new_children(children)?, stages)),
-            }
-        } else if let Some(join) = execution_plan.as_any().downcast_ref::<HashJoinExec>()
-        {
-            Ok((join.with_new_children(children)?, stages))
-        } else {
-            // TODO check for compatible partitioning schema, not just count
-            if execution_plan.output_partitioning().partition_count()
-                != children[0].output_partitioning().partition_count()
-            {
-                let mut new_children: Vec<Arc<dyn ExecutionPlan>> = vec![];
-                for child in &children {
-                    let new_stage = create_query_stage(
-                        job_id.to_string(),
-                        self.next_stage_id(),
-                        child.clone(),
-                    )?;
-                    new_children.push(Arc::new(UnresolvedShuffleExec::new(
-                        vec![new_stage.stage_id],
-                        new_stage.schema().clone(),
-                        new_stage.output_partitioning().partition_count(),
-                    )));
-                    stages.push(new_stage);
-                }
-                Ok((execution_plan.with_new_children(new_children)?, stages))
-            } else {
-                Ok((execution_plan.with_new_children(children)?, stages))
-            }
-        }
-    }
-
-    /// Generate a new stage ID
-    fn next_stage_id(&mut self) -> usize {
-        self.next_stage_id += 1;
-        self.next_stage_id
-    }
-}
-
-fn execute(
-    stages: Vec<Arc<QueryStageExec>>,
-    executors: Vec<ExecutorMeta>,
-) -> SendableExecutionPlan {
-    Box::pin(async move {
-        let mut partition_locations: HashMap<usize, Vec<PartitionLocation>> =
-            HashMap::new();
-        let mut result_partition_locations = vec![];
-        for stage in &stages {
-            debug!("execute() {}", &format!("{:?}", stage)[0..60]);
-            let stage = remove_unresolved_shuffles(stage.as_ref(), &partition_locations)?;
-            let stage = stage.as_any().downcast_ref::<QueryStageExec>().unwrap();
-            result_partition_locations = execute_query_stage(
-                &stage.job_id.clone(),
-                stage.stage_id,
-                stage.children()[0].clone(),
-                executors.clone(),
-            )
-            .await?;
-            partition_locations
-                .insert(stage.stage_id, result_partition_locations.clone());
-        }
-
-        let shuffle_reader: Arc<dyn ExecutionPlan> =
-            Arc::new(ShuffleReaderExec::try_new(
-                result_partition_locations,
-                stages.last().unwrap().schema(),
-            )?);
-        Ok(shuffle_reader)
-    })
-}
-
-pub fn remove_unresolved_shuffles(
-    stage: &dyn ExecutionPlan,
-    partition_locations: &HashMap<usize, Vec<PartitionLocation>>,
-) -> Result<Arc<dyn ExecutionPlan>> {
-    let mut new_children: Vec<Arc<dyn ExecutionPlan>> = vec![];
-    for child in stage.children() {
-        if let Some(unresolved_shuffle) =
-            child.as_any().downcast_ref::<UnresolvedShuffleExec>()
-        {
-            let mut relevant_locations = vec![];
-            for id in &unresolved_shuffle.query_stage_ids {
-                relevant_locations.append(
-                    &mut partition_locations
-                        .get(id)
-                        .ok_or_else(|| {
-                            BallistaError::General(
-                                "Missing partition location. Could not remove unresolved shuffles"
-                                    .to_owned(),
-                            )
-                        })?
-                        .clone(),
-                );
-            }
-            new_children.push(Arc::new(ShuffleReaderExec::try_new(
-                relevant_locations,
-                unresolved_shuffle.schema().clone(),
-            )?))
-        } else {
-            new_children.push(remove_unresolved_shuffles(
-                child.as_ref(),
-                partition_locations,
-            )?);
-        }
-    }
-    Ok(stage.with_new_children(new_children)?)
-}
-
-fn create_query_stage(
-    job_id: String,
-    stage_id: usize,
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Arc<QueryStageExec>> {
-    Ok(Arc::new(QueryStageExec::try_new(job_id, stage_id, plan)?))
-}
-
-/// Execute a query stage by sending each partition to an executor
-async fn execute_query_stage(
-    job_id: &str,
-    stage_id: usize,
-    plan: Arc<dyn ExecutionPlan>,
-    executors: Vec<ExecutorMeta>,
-) -> Result<Vec<PartitionLocation>> {
-    info!(
-        "execute_query_stage() stage_id={}\n{}",
-        stage_id,
-        format_plan(plan.as_ref(), 0)?
-    );
-
-    let partition_count = plan.output_partitioning().partition_count();
-
-    let num_chunks = partition_count / executors.len();
-    let num_chunks = num_chunks.max(1);
-    let partition_chunks: Vec<Vec<usize>> = (0..partition_count)
-        .collect::<Vec<usize>>()
-        .chunks(num_chunks)
-        .map(|r| r.to_vec())
-        .collect();
-
-    info!(
-        "Executing query stage with {} chunks of partition ranges",
-        partition_chunks.len()
-    );
-
-    let mut executions: Vec<JoinHandle<Result<Vec<PartitionLocation>>>> =
-        Vec::with_capacity(partition_count);
-    for i in 0..partition_chunks.len() {
-        let plan = plan.clone();
-        let executor_meta = executors[i % executors.len()].clone();
-        let partition_ids = partition_chunks[i].to_vec();
-        let job_id = job_id.to_owned();
-        executions.push(tokio::spawn(async move {
-            let mut client =
-                BallistaClient::try_new(&executor_meta.host, executor_meta.port).await?;
-            let stats = client
-                .execute_partition(job_id.clone(), stage_id, partition_ids.clone(), plan)
-                .await?;
-
-            Ok(partition_ids
-                .iter()
-                .map(|part| PartitionLocation {
-                    partition_id: PartitionId::new(&job_id, stage_id, *part),
-                    executor_meta: executor_meta.clone(),
-                    partition_stats: *stats[*part].statistics(),
-                })
-                .collect())
-        }));
-    }
-
-    // wait for all partitions to complete
-    let results = futures::future::join_all(executions).await;
-
-    // check for errors
-    let mut meta = Vec::with_capacity(partition_count);
-    for result in results {
-        match result {
-            Ok(partition_result) => {
-                let final_result = partition_result?;
-                debug!("Query stage partition result: {:?}", final_result);
-                meta.extend(final_result);
-            }
-            Err(e) => {
-                return Err(BallistaError::General(format!(
-                    "Query stage {} failed: {:?}",
-                    stage_id, e
-                )))
-            }
-        }
-    }
-
-    debug!(
-        "execute_query_stage() stage_id={} produced {:?}",
-        stage_id, meta
-    );
-
-    Ok(meta)
-}
-
-#[cfg(test)]
-mod test {
-    use crate::planner::DistributedPlanner;
-    use crate::test_utils::datafusion_test_context;
-    use ballista_core::error::BallistaError;
-    use ballista_core::execution_plans::UnresolvedShuffleExec;
-    use ballista_core::serde::protobuf;
-    use ballista_core::serde::scheduler::ExecutorMeta;
-    use ballista_core::utils::format_plan;
-    use datafusion::physical_plan::hash_aggregate::HashAggregateExec;
-    use datafusion::physical_plan::merge::MergeExec;
-    use datafusion::physical_plan::projection::ProjectionExec;
-    use datafusion::physical_plan::sort::SortExec;
-    use datafusion::physical_plan::ExecutionPlan;
-    use std::convert::TryInto;
-    use std::sync::Arc;
-    use uuid::Uuid;
-
-    macro_rules! downcast_exec {
-        ($exec: expr, $ty: ty) => {
-            $exec.as_any().downcast_ref::<$ty>().unwrap()
-        };
-    }
-
-    #[test]
-    fn test() -> Result<(), BallistaError> {
-        let mut ctx = datafusion_test_context("testdata")?;
-
-        // simplified form of TPC-H query 1
-        let df = ctx.sql(
-            "select l_returnflag, sum(l_extendedprice * 1) as sum_disc_price
-            from lineitem
-            group by l_returnflag
-            order by l_returnflag",
-        )?;
-
-        let plan = df.to_logical_plan();
-        let plan = ctx.optimize(&plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-
-        let mut planner = DistributedPlanner::try_new(vec![ExecutorMeta {
-            id: "".to_string(),
-            host: "".to_string(),
-            port: 0,
-        }])?;
-        let job_uuid = Uuid::new_v4();
-        let stages = planner.plan_query_stages(&job_uuid.to_string(), plan)?;
-        for stage in &stages {
-            println!("{}", format_plan(stage.as_ref(), 0)?);
-        }
-
-        /* Expected result:
-        QueryStageExec: job=f011432e-e424-4016-915d-e3d8b84f6dbd, stage=1
-         HashAggregateExec: groupBy=["l_returnflag"], aggrExpr=["SUM(l_extendedprice Multiply Int64(1)) [\"l_extendedprice * CAST(1 AS Float64)\"]"]
-          CsvExec: testdata/lineitem; partitions=2
-
-        QueryStageExec: job=f011432e-e424-4016-915d-e3d8b84f6dbd, stage=2
-         MergeExec
-          UnresolvedShuffleExec: stages=[1]
-
-        QueryStageExec: job=f011432e-e424-4016-915d-e3d8b84f6dbd, stage=3
-         SortExec { input: ProjectionExec { expr: [(Column { name: "l_returnflag" }, "l_returnflag"), (Column { name: "SUM(l_ext
-          ProjectionExec { expr: [(Column { name: "l_returnflag" }, "l_returnflag"), (Column { name: "SUM(l_extendedprice Multip
-           HashAggregateExec: groupBy=["l_returnflag"], aggrExpr=["SUM(l_extendedprice Multiply Int64(1)) [\"l_extendedprice * CAST(1 AS Float64)\"]"]
-            UnresolvedShuffleExec: stages=[2]
-        */
-
-        let sort = stages[2].children()[0].clone();
-        let sort = downcast_exec!(sort, SortExec);
-
-        let projection = sort.children()[0].clone();
-        println!("{:?}", projection);
-        let projection = downcast_exec!(projection, ProjectionExec);
-
-        let final_hash = projection.children()[0].clone();
-        let final_hash = downcast_exec!(final_hash, HashAggregateExec);
-
-        let unresolved_shuffle = final_hash.children()[0].clone();
-        let unresolved_shuffle =
-            downcast_exec!(unresolved_shuffle, UnresolvedShuffleExec);
-        assert_eq!(unresolved_shuffle.query_stage_ids, vec![2]);
-
-        let merge_exec = stages[1].children()[0].clone();
-        let merge_exec = downcast_exec!(merge_exec, MergeExec);
-
-        let unresolved_shuffle = merge_exec.children()[0].clone();
-        let unresolved_shuffle =
-            downcast_exec!(unresolved_shuffle, UnresolvedShuffleExec);
-        assert_eq!(unresolved_shuffle.query_stage_ids, vec![1]);
-
-        let partial_hash = stages[0].children()[0].clone();
-        let partial_hash_serde = roundtrip_operator(partial_hash.clone())?;
-
-        let partial_hash = downcast_exec!(partial_hash, HashAggregateExec);
-        let partial_hash_serde = downcast_exec!(partial_hash_serde, HashAggregateExec);
-
-        assert_eq!(
-            format!("{:?}", partial_hash),
-            format!("{:?}", partial_hash_serde)
-        );
-
-        Ok(())
-    }
-
-    fn roundtrip_operator(
-        plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>, BallistaError> {
-        let proto: protobuf::PhysicalPlanNode = plan.clone().try_into()?;
-        let result_exec_plan: Arc<dyn ExecutionPlan> = (&proto).try_into()?;
-        Ok(result_exec_plan)
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/state/etcd.rs b/rust/ballista/rust/scheduler/src/state/etcd.rs
deleted file mode 100644
index ced24613ebd..00000000000
--- a/rust/ballista/rust/scheduler/src/state/etcd.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Etcd config backend.
-
-use std::time::Duration;
-
-use crate::state::ConfigBackendClient;
-use ballista_core::error::{ballista_error, Result};
-
-use etcd_client::{GetOptions, LockResponse, PutOptions};
-use log::warn;
-
-use super::Lock;
-
-/// A [`ConfigBackendClient`] implementation that uses etcd to save cluster configuration.
-#[derive(Clone)]
-pub struct EtcdClient {
-    etcd: etcd_client::Client,
-}
-
-impl EtcdClient {
-    pub fn new(etcd: etcd_client::Client) -> Self {
-        Self { etcd }
-    }
-}
-
-#[tonic::async_trait]
-impl ConfigBackendClient for EtcdClient {
-    async fn get(&self, key: &str) -> Result<Vec<u8>> {
-        Ok(self
-            .etcd
-            .clone()
-            .get(key, None)
-            .await
-            .map_err(|e| ballista_error(&format!("etcd error {:?}", e)))?
-            .kvs()
-            .get(0)
-            .map(|kv| kv.value().to_owned())
-            .unwrap_or_default())
-    }
-
-    async fn get_from_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>> {
-        Ok(self
-            .etcd
-            .clone()
-            .get(prefix, Some(GetOptions::new().with_prefix()))
-            .await
-            .map_err(|e| ballista_error(&format!("etcd error {:?}", e)))?
-            .kvs()
-            .iter()
-            .map(|kv| (kv.key_str().unwrap().to_owned(), kv.value().to_owned()))
-            .collect())
-    }
-
-    async fn put(
-        &self,
-        key: String,
-        value: Vec<u8>,
-        lease_time: Option<Duration>,
-    ) -> Result<()> {
-        let mut etcd = self.etcd.clone();
-        let put_options = if let Some(lease_time) = lease_time {
-            etcd.lease_grant(lease_time.as_secs() as i64, None)
-                .await
-                .map(|lease| Some(PutOptions::new().with_lease(lease.id())))
-                .map_err(|e| {
-                    warn!("etcd lease grant failed: {:?}", e.to_string());
-                    ballista_error("etcd lease grant failed")
-                })?
-        } else {
-            None
-        };
-        etcd.put(key.clone(), value.clone(), put_options)
-            .await
-            .map_err(|e| {
-                warn!("etcd put failed: {}", e);
-                ballista_error("etcd put failed")
-            })
-            .map(|_| ())
-    }
-
-    async fn lock(&self) -> Result<Box<dyn Lock>> {
-        let mut etcd = self.etcd.clone();
-        let lock = etcd
-            .lock("/ballista_global_lock", None)
-            .await
-            .map_err(|e| {
-                warn!("etcd lock failed: {}", e);
-                ballista_error("etcd lock failed")
-            })?;
-        Ok(Box::new(EtcdLockGuard { etcd, lock }))
-    }
-}
-
-struct EtcdLockGuard {
-    etcd: etcd_client::Client,
-    lock: LockResponse,
-}
-
-// Cannot use Drop because we need this to be async
-#[tonic::async_trait]
-impl Lock for EtcdLockGuard {
-    async fn unlock(&mut self) {
-        self.etcd.unlock(self.lock.key()).await.unwrap();
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/state/mod.rs b/rust/ballista/rust/scheduler/src/state/mod.rs
deleted file mode 100644
index 614da05c0aa..00000000000
--- a/rust/ballista/rust/scheduler/src/state/mod.rs
+++ /dev/null
@@ -1,835 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{
-    any::type_name, collections::HashMap, convert::TryInto, sync::Arc, time::Duration,
-};
-
-use datafusion::physical_plan::ExecutionPlan;
-use log::{debug, info};
-use prost::Message;
-use tokio::sync::OwnedMutexGuard;
-
-use ballista_core::serde::protobuf::{
-    job_status, task_status, CompletedJob, CompletedTask, ExecutorMetadata, FailedJob,
-    FailedTask, JobStatus, PhysicalPlanNode, RunningJob, RunningTask, TaskStatus,
-};
-use ballista_core::serde::scheduler::PartitionStats;
-use ballista_core::{error::BallistaError, serde::scheduler::ExecutorMeta};
-use ballista_core::{
-    error::Result, execution_plans::UnresolvedShuffleExec,
-    serde::protobuf::PartitionLocation,
-};
-
-use super::planner::remove_unresolved_shuffles;
-
-#[cfg(feature = "etcd")]
-mod etcd;
-#[cfg(feature = "sled")]
-mod standalone;
-
-#[cfg(feature = "etcd")]
-pub use etcd::EtcdClient;
-#[cfg(feature = "sled")]
-pub use standalone::StandaloneClient;
-
-const LEASE_TIME: Duration = Duration::from_secs(60);
-
-/// A trait that contains the necessary methods to save and retrieve the state and configuration of a cluster.
-#[tonic::async_trait]
-pub trait ConfigBackendClient: Send + Sync {
-    /// Retrieve the data associated with a specific key.
-    ///
-    /// An empty vec is returned if the key does not exist.
-    async fn get(&self, key: &str) -> Result<Vec<u8>>;
-
-    /// Retrieve all data associated with a specific key.
-    async fn get_from_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>>;
-
-    /// Saves the value into the provided key, overriding any previous data that might have been associated to that key.
-    async fn put(
-        &self,
-        key: String,
-        value: Vec<u8>,
-        lease_time: Option<Duration>,
-    ) -> Result<()>;
-
-    async fn lock(&self) -> Result<Box<dyn Lock>>;
-}
-
-#[derive(Clone)]
-pub(super) struct SchedulerState {
-    config_client: Arc<dyn ConfigBackendClient>,
-}
-
-impl SchedulerState {
-    pub fn new(config_client: Arc<dyn ConfigBackendClient>) -> Self {
-        Self { config_client }
-    }
-
-    pub async fn get_executors_metadata(
-        &self,
-        namespace: &str,
-    ) -> Result<Vec<ExecutorMeta>> {
-        let mut result = vec![];
-
-        let entries = self
-            .config_client
-            .get_from_prefix(&get_executors_prefix(namespace))
-            .await?;
-        for (_key, entry) in entries {
-            let meta: ExecutorMetadata = decode_protobuf(&entry)?;
-            result.push(meta.into());
-        }
-        Ok(result)
-    }
-
-    pub async fn save_executor_metadata(
-        &self,
-        namespace: &str,
-        meta: ExecutorMeta,
-    ) -> Result<()> {
-        let key = get_executor_key(namespace, &meta.id);
-        let meta: ExecutorMetadata = meta.into();
-        let value: Vec<u8> = encode_protobuf(&meta)?;
-        self.config_client.put(key, value, Some(LEASE_TIME)).await
-    }
-
-    pub async fn save_job_metadata(
-        &self,
-        namespace: &str,
-        job_id: &str,
-        status: &JobStatus,
-    ) -> Result<()> {
-        debug!("Saving job metadata: {:?}", status);
-        let key = get_job_key(namespace, job_id);
-        let value = encode_protobuf(status)?;
-        self.config_client.put(key, value, None).await
-    }
-
-    pub async fn get_job_metadata(
-        &self,
-        namespace: &str,
-        job_id: &str,
-    ) -> Result<JobStatus> {
-        let key = get_job_key(namespace, job_id);
-        let value = &self.config_client.get(&key).await?;
-        if value.is_empty() {
-            return Err(BallistaError::General(format!(
-                "No job metadata found for {}",
-                key
-            )));
-        }
-        let value: JobStatus = decode_protobuf(value)?;
-        Ok(value)
-    }
-
-    pub async fn save_task_status(
-        &self,
-        namespace: &str,
-        status: &TaskStatus,
-    ) -> Result<()> {
-        let partition_id = status.partition_id.as_ref().unwrap();
-        let key = get_task_status_key(
-            namespace,
-            &partition_id.job_id,
-            partition_id.stage_id as usize,
-            partition_id.partition_id as usize,
-        );
-        let value = encode_protobuf(status)?;
-        self.config_client.put(key, value, None).await
-    }
-
-    pub async fn _get_task_status(
-        &self,
-        namespace: &str,
-        job_id: &str,
-        stage_id: usize,
-        partition_id: usize,
-    ) -> Result<TaskStatus> {
-        let key = get_task_status_key(namespace, job_id, stage_id, partition_id);
-        let value = &self.config_client.clone().get(&key).await?;
-        if value.is_empty() {
-            return Err(BallistaError::General(format!(
-                "No task status found for {}",
-                key
-            )));
-        }
-        let value: TaskStatus = decode_protobuf(value)?;
-        Ok(value)
-    }
-
-    // "Unnecessary" lifetime syntax due to https://github.com/rust-lang/rust/issues/63033
-    pub async fn save_stage_plan<'a>(
-        &'a self,
-        namespace: &'a str,
-        job_id: &'a str,
-        stage_id: usize,
-        plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<()> {
-        let key = get_stage_plan_key(namespace, job_id, stage_id);
-        let value = {
-            let proto: PhysicalPlanNode = plan.try_into()?;
-            encode_protobuf(&proto)?
-        };
-        self.config_client.clone().put(key, value, None).await
-    }
-
-    pub async fn get_stage_plan(
-        &self,
-        namespace: &str,
-        job_id: &str,
-        stage_id: usize,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let key = get_stage_plan_key(namespace, job_id, stage_id);
-        let value = &self.config_client.get(&key).await?;
-        if value.is_empty() {
-            return Err(BallistaError::General(format!(
-                "No stage plan found for {}",
-                key
-            )));
-        }
-        let value: PhysicalPlanNode = decode_protobuf(value)?;
-        Ok((&value).try_into()?)
-    }
-
-    pub async fn assign_next_schedulable_task(
-        &self,
-        namespace: &str,
-        executor_id: &str,
-    ) -> Result<Option<(TaskStatus, Arc<dyn ExecutionPlan>)>> {
-        let kvs: HashMap<String, Vec<u8>> = self
-            .config_client
-            .get_from_prefix(&get_task_prefix(namespace))
-            .await?
-            .into_iter()
-            .collect();
-        let executors = self.get_executors_metadata(namespace).await?;
-        'tasks: for (_key, value) in kvs.iter() {
-            let mut status: TaskStatus = decode_protobuf(&value)?;
-            if status.status.is_none() {
-                let partition = status.partition_id.as_ref().unwrap();
-                let plan = self
-                    .get_stage_plan(
-                        namespace,
-                        &partition.job_id,
-                        partition.stage_id as usize,
-                    )
-                    .await?;
-
-                // Let's try to resolve any unresolved shuffles we find
-                let unresolved_shuffles = find_unresolved_shuffles(&plan)?;
-                let mut partition_locations: HashMap<
-                    usize,
-                    Vec<ballista_core::serde::scheduler::PartitionLocation>,
-                > = HashMap::new();
-                for unresolved_shuffle in unresolved_shuffles {
-                    for stage_id in unresolved_shuffle.query_stage_ids {
-                        for partition_id in 0..unresolved_shuffle.partition_count {
-                            let referenced_task = kvs
-                                .get(&get_task_status_key(
-                                    namespace,
-                                    &partition.job_id,
-                                    stage_id,
-                                    partition_id,
-                                ))
-                                .unwrap();
-                            let referenced_task: TaskStatus =
-                                decode_protobuf(referenced_task)?;
-                            if let Some(task_status::Status::Completed(CompletedTask {
-                                executor_id,
-                            })) = referenced_task.status
-                            {
-                                let empty = vec![];
-                                let locations =
-                                    partition_locations.entry(stage_id).or_insert(empty);
-                                locations.push(
-                                    ballista_core::serde::scheduler::PartitionLocation {
-                                        partition_id:
-                                            ballista_core::serde::scheduler::PartitionId {
-                                                job_id: partition.job_id.clone(),
-                                                stage_id,
-                                                partition_id,
-                                            },
-                                        executor_meta: executors
-                                            .iter()
-                                            .find(|exec| exec.id == executor_id)
-                                            .unwrap()
-                                            .clone(),
-                                        partition_stats: PartitionStats::default(),
-                                    },
-                                );
-                            } else {
-                                continue 'tasks;
-                            }
-                        }
-                    }
-                }
-                let plan =
-                    remove_unresolved_shuffles(plan.as_ref(), &partition_locations)?;
-
-                // If we get here, there are no more unresolved shuffled and the task can be run
-                status.status = Some(task_status::Status::Running(RunningTask {
-                    executor_id: executor_id.to_owned(),
-                }));
-                self.save_task_status(namespace, &status).await?;
-                return Ok(Some((status, plan)));
-            }
-        }
-        Ok(None)
-    }
-
-    // Global lock for the state. We should get rid of this to be able to scale.
-    pub async fn lock(&self) -> Result<Box<dyn Lock>> {
-        self.config_client.lock().await
-    }
-
-    pub async fn synchronize_job_status(&self, namespace: &str) -> Result<()> {
-        let kvs = self
-            .config_client
-            .get_from_prefix(&get_job_prefix(namespace))
-            .await?;
-        let executors: HashMap<String, ExecutorMeta> = self
-            .get_executors_metadata(namespace)
-            .await?
-            .into_iter()
-            .map(|meta| (meta.id.to_string(), meta))
-            .collect();
-        for (key, value) in kvs {
-            let job_id = extract_job_id_from_key(&key)?;
-            let status: JobStatus = decode_protobuf(&value)?;
-            let new_status = self
-                .get_job_status_from_tasks(namespace, job_id, &executors)
-                .await?;
-            if let Some(new_status) = new_status {
-                if status != new_status {
-                    info!(
-                        "Changing status for job {} to {:?}",
-                        job_id, new_status.status
-                    );
-                    debug!("Old status: {:?}", status);
-                    debug!("New status: {:?}", new_status);
-                    self.save_job_metadata(namespace, job_id, &new_status)
-                        .await?;
-                }
-            }
-        }
-        Ok(())
-    }
-
-    async fn get_job_status_from_tasks(
-        &self,
-        namespace: &str,
-        job_id: &str,
-        executors: &HashMap<String, ExecutorMeta>,
-    ) -> Result<Option<JobStatus>> {
-        let statuses = self
-            .config_client
-            .get_from_prefix(&get_task_prefix_for_job(namespace, job_id))
-            .await?
-            .into_iter()
-            .map(|(_k, v)| decode_protobuf::<TaskStatus>(&v))
-            .collect::<Result<Vec<_>>>()?;
-        if statuses.is_empty() {
-            return Ok(None);
-        }
-
-        // Check for job completion
-        let mut job_status = statuses
-            .iter()
-            .map(|status| match &status.status {
-                Some(task_status::Status::Completed(CompletedTask { executor_id })) => {
-                    Ok((status, executor_id))
-                }
-                _ => Err(BallistaError::General("Task not completed".to_string())),
-            })
-            .collect::<Result<Vec<_>>>()
-            .ok()
-            .map(|info| {
-                let partition_location = info
-                    .into_iter()
-                    .map(|(status, execution_id)| PartitionLocation {
-                        partition_id: status.partition_id.to_owned(),
-                        executor_meta: executors
-                            .get(execution_id)
-                            .map(|e| e.clone().into()),
-                        partition_stats: None,
-                    })
-                    .collect();
-                job_status::Status::Completed(CompletedJob { partition_location })
-            });
-
-        if job_status.is_none() {
-            // Update other statuses
-            for status in statuses {
-                match status.status {
-                    Some(task_status::Status::Failed(FailedTask { error })) => {
-                        job_status =
-                            Some(job_status::Status::Failed(FailedJob { error }));
-                        break;
-                    }
-                    Some(task_status::Status::Running(_)) if job_status == None => {
-                        job_status = Some(job_status::Status::Running(RunningJob {}));
-                    }
-                    _ => (),
-                }
-            }
-        }
-        Ok(job_status.map(|status| JobStatus {
-            status: Some(status),
-        }))
-    }
-}
-
-#[tonic::async_trait]
-pub trait Lock: Send + Sync {
-    async fn unlock(&mut self);
-}
-
-#[tonic::async_trait]
-impl<T: Send + Sync> Lock for OwnedMutexGuard<T> {
-    async fn unlock(&mut self) {}
-}
-
-/// Returns the the unresolved shuffles in the execution plan
-fn find_unresolved_shuffles(
-    plan: &Arc<dyn ExecutionPlan>,
-) -> Result<Vec<UnresolvedShuffleExec>> {
-    if let Some(unresolved_shuffle) =
-        plan.as_any().downcast_ref::<UnresolvedShuffleExec>()
-    {
-        Ok(vec![unresolved_shuffle.clone()])
-    } else {
-        Ok(plan
-            .children()
-            .iter()
-            .map(|child| find_unresolved_shuffles(child))
-            .collect::<Result<Vec<_>>>()?
-            .into_iter()
-            .flatten()
-            .collect())
-    }
-}
-
-fn get_executors_prefix(namespace: &str) -> String {
-    format!("/ballista/{}/executors", namespace)
-}
-
-fn get_executor_key(namespace: &str, id: &str) -> String {
-    format!("{}/{}", get_executors_prefix(namespace), id)
-}
-
-fn get_job_prefix(namespace: &str) -> String {
-    format!("/ballista/{}/jobs", namespace)
-}
-
-fn extract_job_id_from_key(job_key: &str) -> Result<&str> {
-    job_key.split('/').nth(4).ok_or_else(|| {
-        BallistaError::Internal(format!("Unexpected job key: {}", job_key))
-    })
-}
-
-fn get_job_key(namespace: &str, id: &str) -> String {
-    format!("{}/{}", get_job_prefix(namespace), id)
-}
-
-fn get_task_prefix(namespace: &str) -> String {
-    format!("/ballista/{}/tasks", namespace)
-}
-
-fn get_task_prefix_for_job(namespace: &str, job_id: &str) -> String {
-    format!("{}/{}", get_task_prefix(namespace), job_id)
-}
-
-fn get_task_status_key(
-    namespace: &str,
-    job_id: &str,
-    stage_id: usize,
-    partition_id: usize,
-) -> String {
-    format!(
-        "{}/{}/{}",
-        get_task_prefix_for_job(namespace, job_id),
-        stage_id,
-        partition_id,
-    )
-}
-
-fn get_stage_plan_key(namespace: &str, job_id: &str, stage_id: usize) -> String {
-    format!("/ballista/{}/stages/{}/{}", namespace, job_id, stage_id,)
-}
-
-fn decode_protobuf<T: Message + Default>(bytes: &[u8]) -> Result<T> {
-    T::decode(bytes).map_err(|e| {
-        BallistaError::Internal(format!(
-            "Could not deserialize {}: {}",
-            type_name::<T>(),
-            e
-        ))
-    })
-}
-
-fn encode_protobuf<T: Message + Default>(msg: &T) -> Result<Vec<u8>> {
-    let mut value: Vec<u8> = Vec::with_capacity(msg.encoded_len());
-    msg.encode(&mut value).map_err(|e| {
-        BallistaError::Internal(format!(
-            "Could not serialize {}: {}",
-            type_name::<T>(),
-            e
-        ))
-    })?;
-    Ok(value)
-}
-
-#[cfg(all(test, feature = "sled"))]
-mod test {
-    use std::sync::Arc;
-
-    use ballista_core::serde::protobuf::{
-        job_status, task_status, CompletedTask, FailedTask, JobStatus, PartitionId,
-        QueuedJob, RunningJob, RunningTask, TaskStatus,
-    };
-    use ballista_core::{error::BallistaError, serde::scheduler::ExecutorMeta};
-
-    use super::{SchedulerState, StandaloneClient};
-
-    #[tokio::test]
-    async fn executor_metadata() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let meta = ExecutorMeta {
-            id: "123".to_owned(),
-            host: "localhost".to_owned(),
-            port: 123,
-        };
-        state.save_executor_metadata("test", meta.clone()).await?;
-        let result = state.get_executors_metadata("test").await?;
-        assert_eq!(vec![meta], result);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn executor_metadata_empty() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let meta = ExecutorMeta {
-            id: "123".to_owned(),
-            host: "localhost".to_owned(),
-            port: 123,
-        };
-        state.save_executor_metadata("test", meta.clone()).await?;
-        let result = state.get_executors_metadata("test2").await?;
-        assert!(result.is_empty());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn job_metadata() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let meta = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state.save_job_metadata("test", "job", &meta).await?;
-        let result = state.get_job_metadata("test", "job").await?;
-        assert!(result.status.is_some());
-        match result.status.unwrap() {
-            job_status::Status::Queued(_) => (),
-            _ => panic!("Unexpected status"),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn job_metadata_non_existant() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let meta = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state.save_job_metadata("test", "job", &meta).await?;
-        let result = state.get_job_metadata("test2", "job2").await;
-        assert!(result.is_err());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_status() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Failed(FailedTask {
-                error: "error".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: "job".to_owned(),
-                stage_id: 1,
-                partition_id: 2,
-            }),
-        };
-        state.save_task_status("test", &meta).await?;
-        let result = state._get_task_status("test", "job", 1, 2).await?;
-        assert!(result.status.is_some());
-        match result.status.unwrap() {
-            task_status::Status::Failed(_) => (),
-            _ => panic!("Unexpected status"),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_status_non_existant() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Failed(FailedTask {
-                error: "error".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: "job".to_owned(),
-                stage_id: 1,
-                partition_id: 2,
-            }),
-        };
-        state.save_task_status("test", &meta).await?;
-        let result = state._get_task_status("test", "job", 25, 2).await;
-        assert!(result.is_err());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_queued() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
-        assert_eq!(result, job_status);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_running() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Running(RunningTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
-        assert_eq!(result, job_status);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_running2() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        let meta = TaskStatus {
-            status: None,
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
-        assert_eq!(result, job_status);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_completed() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
-        match result.status.unwrap() {
-            job_status::Status::Completed(_) => (),
-            status => panic!("Received status: {:?}", status),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_completed2() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
-        match result.status.unwrap() {
-            job_status::Status::Completed(_) => (),
-            status => panic!("Received status: {:?}", status),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_failed() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Failed(FailedTask {
-                error: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        let meta = TaskStatus {
-            status: None,
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 2,
-            }),
-        };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
-        match result.status.unwrap() {
-            job_status::Status::Failed(_) => (),
-            status => panic!("Received status: {:?}", status),
-        }
-        Ok(())
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/state/standalone.rs b/rust/ballista/rust/scheduler/src/state/standalone.rs
deleted file mode 100644
index e07d45ece6b..00000000000
--- a/rust/ballista/rust/scheduler/src/state/standalone.rs
+++ /dev/null
@@ -1,161 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{sync::Arc, time::Duration};
-
-use crate::state::ConfigBackendClient;
-use ballista_core::error::{ballista_error, BallistaError, Result};
-
-use log::warn;
-use tokio::sync::Mutex;
-
-use super::Lock;
-
-/// A [`ConfigBackendClient`] implementation that uses file-based storage to save cluster configuration.
-#[derive(Clone)]
-pub struct StandaloneClient {
-    db: sled::Db,
-    lock: Arc<Mutex<()>>,
-}
-
-impl StandaloneClient {
-    /// Creates a StandaloneClient that saves data to the specified file.
-    pub fn try_new<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
-        Ok(Self {
-            db: sled::open(path).map_err(sled_to_ballista_error)?,
-            lock: Arc::new(Mutex::new(())),
-        })
-    }
-
-    /// Creates a StandaloneClient that saves data to a temp file.
-    pub fn try_new_temporary() -> Result<Self> {
-        Ok(Self {
-            db: sled::Config::new()
-                .temporary(true)
-                .open()
-                .map_err(sled_to_ballista_error)?,
-            lock: Arc::new(Mutex::new(())),
-        })
-    }
-}
-
-fn sled_to_ballista_error(e: sled::Error) -> BallistaError {
-    match e {
-        sled::Error::Io(io) => BallistaError::IoError(io),
-        _ => BallistaError::General(format!("{}", e)),
-    }
-}
-
-#[tonic::async_trait]
-impl ConfigBackendClient for StandaloneClient {
-    async fn get(&self, key: &str) -> Result<Vec<u8>> {
-        Ok(self
-            .db
-            .get(key)
-            .map_err(|e| ballista_error(&format!("sled error {:?}", e)))?
-            .map(|v| v.to_vec())
-            .unwrap_or_default())
-    }
-
-    async fn get_from_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>> {
-        Ok(self
-            .db
-            .scan_prefix(prefix)
-            .map(|v| {
-                v.map(|(key, value)| {
-                    (
-                        std::str::from_utf8(&key).unwrap().to_owned(),
-                        value.to_vec(),
-                    )
-                })
-            })
-            .collect::<std::result::Result<Vec<_>, _>>()
-            .map_err(|e| ballista_error(&format!("sled error {:?}", e)))?)
-    }
-
-    // TODO: support lease_time. See https://github.com/spacejam/sled/issues/1119 for how to approach this
-    async fn put(
-        &self,
-        key: String,
-        value: Vec<u8>,
-        _lease_time: Option<Duration>,
-    ) -> Result<()> {
-        self.db
-            .insert(key, value)
-            .map_err(|e| {
-                warn!("sled insert failed: {}", e);
-                ballista_error("sled insert failed")
-            })
-            .map(|_| ())
-    }
-
-    async fn lock(&self) -> Result<Box<dyn Lock>> {
-        Ok(Box::new(self.lock.clone().lock_owned().await))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::state::ConfigBackendClient;
-
-    use super::StandaloneClient;
-    use std::result::Result;
-
-    fn create_instance() -> Result<StandaloneClient, Box<dyn std::error::Error>> {
-        Ok(StandaloneClient::try_new_temporary()?)
-    }
-
-    #[tokio::test]
-    async fn put_read() -> Result<(), Box<dyn std::error::Error>> {
-        let client = create_instance()?;
-        let key = "key";
-        let value = "value".as_bytes();
-        client.put(key.to_owned(), value.to_vec(), None).await?;
-        assert_eq!(client.get(key).await?, value);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_empty() -> Result<(), Box<dyn std::error::Error>> {
-        let client = create_instance()?;
-        let key = "key";
-        let empty: &[u8] = &[];
-        assert_eq!(client.get(key).await?, empty);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_prefix() -> Result<(), Box<dyn std::error::Error>> {
-        let client = create_instance()?;
-        let key = "key";
-        let value = "value".as_bytes();
-        client
-            .put(format!("{}/1", key), value.to_vec(), None)
-            .await?;
-        client
-            .put(format!("{}/2", key), value.to_vec(), None)
-            .await?;
-        assert_eq!(
-            client.get_from_prefix(key).await?,
-            vec![
-                ("key/1".to_owned(), value.to_vec()),
-                ("key/2".to_owned(), value.to_vec())
-            ]
-        );
-        Ok(())
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/test_utils.rs b/rust/ballista/rust/scheduler/src/test_utils.rs
deleted file mode 100644
index 94397404777..00000000000
--- a/rust/ballista/rust/scheduler/src/test_utils.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use ballista_core::error::Result;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use datafusion::execution::context::ExecutionContext;
-use datafusion::physical_plan::csv::CsvReadOptions;
-
-pub const TPCH_TABLES: &[&str] = &[
-    "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
-];
-
-pub fn datafusion_test_context(path: &str) -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-    for table in TPCH_TABLES {
-        let schema = get_tpch_schema(table);
-        let options = CsvReadOptions::new()
-            .schema(&schema)
-            .delimiter(b'|')
-            .has_header(false)
-            .file_extension(".tbl");
-        let dir = format!("{}/{}", path, table);
-        ctx.register_csv(table, &dir, options)?;
-    }
-    Ok(ctx)
-}
-
-pub fn get_tpch_schema(table: &str) -> Schema {
-    // note that the schema intentionally uses signed integers so that any generated Parquet
-    // files can also be used to benchmark tools that only support signed integers, such as
-    // Apache Spark
-
-    match table {
-        "part" => Schema::new(vec![
-            Field::new("p_partkey", DataType::Int32, false),
-            Field::new("p_name", DataType::Utf8, false),
-            Field::new("p_mfgr", DataType::Utf8, false),
-            Field::new("p_brand", DataType::Utf8, false),
-            Field::new("p_type", DataType::Utf8, false),
-            Field::new("p_size", DataType::Int32, false),
-            Field::new("p_container", DataType::Utf8, false),
-            Field::new("p_retailprice", DataType::Float64, false),
-            Field::new("p_comment", DataType::Utf8, false),
-        ]),
-
-        "supplier" => Schema::new(vec![
-            Field::new("s_suppkey", DataType::Int32, false),
-            Field::new("s_name", DataType::Utf8, false),
-            Field::new("s_address", DataType::Utf8, false),
-            Field::new("s_nationkey", DataType::Int32, false),
-            Field::new("s_phone", DataType::Utf8, false),
-            Field::new("s_acctbal", DataType::Float64, false),
-            Field::new("s_comment", DataType::Utf8, false),
-        ]),
-
-        "partsupp" => Schema::new(vec![
-            Field::new("ps_partkey", DataType::Int32, false),
-            Field::new("ps_suppkey", DataType::Int32, false),
-            Field::new("ps_availqty", DataType::Int32, false),
-            Field::new("ps_supplycost", DataType::Float64, false),
-            Field::new("ps_comment", DataType::Utf8, false),
-        ]),
-
-        "customer" => Schema::new(vec![
-            Field::new("c_custkey", DataType::Int32, false),
-            Field::new("c_name", DataType::Utf8, false),
-            Field::new("c_address", DataType::Utf8, false),
-            Field::new("c_nationkey", DataType::Int32, false),
-            Field::new("c_phone", DataType::Utf8, false),
-            Field::new("c_acctbal", DataType::Float64, false),
-            Field::new("c_mktsegment", DataType::Utf8, false),
-            Field::new("c_comment", DataType::Utf8, false),
-        ]),
-
-        "orders" => Schema::new(vec![
-            Field::new("o_orderkey", DataType::Int32, false),
-            Field::new("o_custkey", DataType::Int32, false),
-            Field::new("o_orderstatus", DataType::Utf8, false),
-            Field::new("o_totalprice", DataType::Float64, false),
-            Field::new("o_orderdate", DataType::Date32, false),
-            Field::new("o_orderpriority", DataType::Utf8, false),
-            Field::new("o_clerk", DataType::Utf8, false),
-            Field::new("o_shippriority", DataType::Int32, false),
-            Field::new("o_comment", DataType::Utf8, false),
-        ]),
-
-        "lineitem" => Schema::new(vec![
-            Field::new("l_orderkey", DataType::Int32, false),
-            Field::new("l_partkey", DataType::Int32, false),
-            Field::new("l_suppkey", DataType::Int32, false),
-            Field::new("l_linenumber", DataType::Int32, false),
-            Field::new("l_quantity", DataType::Float64, false),
-            Field::new("l_extendedprice", DataType::Float64, false),
-            Field::new("l_discount", DataType::Float64, false),
-            Field::new("l_tax", DataType::Float64, false),
-            Field::new("l_returnflag", DataType::Utf8, false),
-            Field::new("l_linestatus", DataType::Utf8, false),
-            Field::new("l_shipdate", DataType::Date32, false),
-            Field::new("l_commitdate", DataType::Date32, false),
-            Field::new("l_receiptdate", DataType::Date32, false),
-            Field::new("l_shipinstruct", DataType::Utf8, false),
-            Field::new("l_shipmode", DataType::Utf8, false),
-            Field::new("l_comment", DataType::Utf8, false),
-        ]),
-
-        "nation" => Schema::new(vec![
-            Field::new("n_nationkey", DataType::Int32, false),
-            Field::new("n_name", DataType::Utf8, false),
-            Field::new("n_regionkey", DataType::Int32, false),
-            Field::new("n_comment", DataType::Utf8, false),
-        ]),
-
-        "region" => Schema::new(vec![
-            Field::new("r_regionkey", DataType::Int32, false),
-            Field::new("r_name", DataType::Utf8, false),
-            Field::new("r_comment", DataType::Utf8, false),
-        ]),
-
-        _ => unimplemented!(),
-    }
-}
diff --git a/rust/ballista/rust/scheduler/testdata/customer/customer.tbl b/rust/ballista/rust/scheduler/testdata/customer/customer.tbl
deleted file mode 100644
index afa5a739ab3..00000000000
--- a/rust/ballista/rust/scheduler/testdata/customer/customer.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag e|
-2|Customer#000000002|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-768-687-3665|121.65|AUTOMOBILE|l accounts. blithely ironic theodolites integrate boldly: caref|
-3|Customer#000000003|MG9kdTD2WBHm|1|11-719-748-3364|7498.12|AUTOMOBILE| deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov|
-4|Customer#000000004|XxVSJsLAGtn|4|14-128-190-5944|2866.83|MACHINERY| requests. final, regular ideas sleep final accou|
-5|Customer#000000005|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-750-942-6364|794.47|HOUSEHOLD|n accounts will have to unwind. foxes cajole accor|
-6|Customer#000000006|sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn|20|30-114-968-4951|7638.57|AUTOMOBILE|tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious|
-7|Customer#000000007|TcGe5gaZNgVePxU5kRrvXBfkasDTea|18|28-190-982-9759|9561.95|AUTOMOBILE|ainst the ironic, express theodolites. express, even pinto beans among the exp|
-8|Customer#000000008|I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5|17|27-147-574-9335|6819.74|BUILDING|among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide|
-9|Customer#000000009|xKiAFTjUsCuxfeleNqefumTrjS|8|18-338-906-3675|8324.07|FURNITURE|r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl|
-10|Customer#000000010|6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2|5|15-741-346-9870|2753.54|HOUSEHOLD|es regular deposits haggle. fur|
diff --git a/rust/ballista/rust/scheduler/testdata/lineitem/partition0.tbl b/rust/ballista/rust/scheduler/testdata/lineitem/partition0.tbl
deleted file mode 100644
index b7424c2138b..00000000000
--- a/rust/ballista/rust/scheduler/testdata/lineitem/partition0.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|
-1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |
-1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|
-1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de|
-1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re|
-1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex|
-2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|
-3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco|
-3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve|
-3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. |
diff --git a/rust/ballista/rust/scheduler/testdata/lineitem/partition1.tbl b/rust/ballista/rust/scheduler/testdata/lineitem/partition1.tbl
deleted file mode 100644
index b7424c2138b..00000000000
--- a/rust/ballista/rust/scheduler/testdata/lineitem/partition1.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|
-1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |
-1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|
-1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de|
-1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re|
-1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex|
-2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|
-3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco|
-3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve|
-3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. |
diff --git a/rust/ballista/rust/scheduler/testdata/nation/nation.tbl b/rust/ballista/rust/scheduler/testdata/nation/nation.tbl
deleted file mode 100644
index c31ad6be0fa..00000000000
--- a/rust/ballista/rust/scheduler/testdata/nation/nation.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-0|ALGERIA|0| haggle. carefully final deposits detect slyly agai|
-1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon|
-2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special |
-3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold|
-4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d|
-5|ETHIOPIA|0|ven packages wake quickly. regu|
-6|FRANCE|3|refully final requests. regular, ironi|
-7|GERMANY|3|l platelets. regular accounts x-ray: unusual, regular acco|
-8|INDIA|2|ss excuses cajole slyly across the packages. deposits print aroun|
-9|INDONESIA|2| slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull|
diff --git a/rust/ballista/rust/scheduler/testdata/orders/orders.tbl b/rust/ballista/rust/scheduler/testdata/orders/orders.tbl
deleted file mode 100644
index f5fa65b09a7..00000000000
--- a/rust/ballista/rust/scheduler/testdata/orders/orders.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|36901|O|173665.47|1996-01-02|5-LOW|Clerk#000000951|0|nstructions sleep furiously among |
-2|78002|O|46929.18|1996-12-01|1-URGENT|Clerk#000000880|0| foxes. pending accounts at the pending, silent asymptot|
-3|123314|F|193846.25|1993-10-14|5-LOW|Clerk#000000955|0|sly final accounts boost. carefully regular ideas cajole carefully. depos|
-4|136777|O|32151.78|1995-10-11|5-LOW|Clerk#000000124|0|sits. slyly regular warthogs cajole. regular, regular theodolites acro|
-5|44485|F|144659.20|1994-07-30|5-LOW|Clerk#000000925|0|quickly. bold deposits sleep slyly. packages use slyly|
-6|55624|F|58749.59|1992-02-21|4-NOT SPECIFIED|Clerk#000000058|0|ggle. special, final requests are against the furiously specia|
-7|39136|O|252004.18|1996-01-10|2-HIGH|Clerk#000000470|0|ly special requests |
-32|130057|O|208660.75|1995-07-16|2-HIGH|Clerk#000000616|0|ise blithely bold, regular requests. quickly unusual dep|
-33|66958|F|163243.98|1993-10-27|3-MEDIUM|Clerk#000000409|0|uriously. furiously final request|
-34|61001|O|58949.67|1998-07-21|3-MEDIUM|Clerk#000000223|0|ly final packages. fluffily final deposits wake blithely ideas. spe|
diff --git a/rust/ballista/rust/scheduler/testdata/part/part.tbl b/rust/ballista/rust/scheduler/testdata/part/part.tbl
deleted file mode 100644
index 0c6f0e2f3e1..00000000000
--- a/rust/ballista/rust/scheduler/testdata/part/part.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|goldenrod lavender spring chocolate lace|Manufacturer#1|Brand#13|PROMO BURNISHED COPPER|7|JUMBO PKG|901.00|ly. slyly ironi|
-2|blush thistle blue yellow saddle|Manufacturer#1|Brand#13|LARGE BRUSHED BRASS|1|LG CASE|902.00|lar accounts amo|
-3|spring green yellow purple cornsilk|Manufacturer#4|Brand#42|STANDARD POLISHED BRASS|21|WRAP CASE|903.00|egular deposits hag|
-4|cornflower chocolate smoke green pink|Manufacturer#3|Brand#34|SMALL PLATED BRASS|14|MED DRUM|904.00|p furiously r|
-5|forest brown coral puff cream|Manufacturer#3|Brand#32|STANDARD POLISHED TIN|15|SM PKG|905.00| wake carefully |
-6|bisque cornflower lawn forest magenta|Manufacturer#2|Brand#24|PROMO PLATED STEEL|4|MED BAG|906.00|sual a|
-7|moccasin green thistle khaki floral|Manufacturer#1|Brand#11|SMALL PLATED COPPER|45|SM BAG|907.00|lyly. ex|
-8|misty lace thistle snow royal|Manufacturer#4|Brand#44|PROMO BURNISHED TIN|41|LG DRUM|908.00|eposi|
-9|thistle dim navajo dark gainsboro|Manufacturer#4|Brand#43|SMALL BURNISHED STEEL|12|WRAP CASE|909.00|ironic foxe|
-10|linen pink saddle puff powder|Manufacturer#5|Brand#54|LARGE BURNISHED STEEL|44|LG CAN|910.01|ithely final deposit|
diff --git a/rust/ballista/rust/scheduler/testdata/partsupp/partsupp.tbl b/rust/ballista/rust/scheduler/testdata/partsupp/partsupp.tbl
deleted file mode 100644
index 45145385a16..00000000000
--- a/rust/ballista/rust/scheduler/testdata/partsupp/partsupp.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|2|3325|771.64|, even theodolites. regular, final theodolites eat after the carefully pending foxes. furiously regular deposits sleep slyly. carefully bold realms above the ironic dependencies haggle careful|
-1|2502|8076|993.49|ven ideas. quickly even packages print. pending multipliers must have to are fluff|
-1|5002|3956|337.09|after the fluffily ironic deposits? blithely special dependencies integrate furiously even excuses. blithely silent theodolites could have to haggle pending, express requests; fu|
-1|7502|4069|357.84|al, regular dependencies serve carefully after the quickly final pinto beans. furiously even deposits sleep quickly final, silent pinto beans. fluffily reg|
-2|3|8895|378.49|nic accounts. final accounts sleep furiously about the ironic, bold packages. regular, regular accounts|
-2|2503|4969|915.27|ptotes. quickly pending dependencies integrate furiously. fluffily ironic ideas impress blithely above the express accounts. furiously even epitaphs need to wak|
-2|5003|8539|438.37|blithely bold ideas. furiously stealthy packages sleep fluffily. slyly special deposits snooze furiously carefully regular accounts. regular deposits according to the accounts nag carefully slyl|
-2|7503|3025|306.39|olites. deposits wake carefully. even, express requests cajole. carefully regular ex|
-3|4|4651|920.92|ilent foxes affix furiously quickly unusual requests. even packages across the carefully even theodolites nag above the sp|
-3|2504|4093|498.13|ending dependencies haggle fluffily. regular deposits boost quickly carefully regular requests. deposits affix furiously around the pinto beans. ironic, unusual platelets across the p|
diff --git a/rust/ballista/rust/scheduler/testdata/region/region.tbl b/rust/ballista/rust/scheduler/testdata/region/region.tbl
deleted file mode 100644
index c5ebb63b621..00000000000
--- a/rust/ballista/rust/scheduler/testdata/region/region.tbl
+++ /dev/null
@@ -1,5 +0,0 @@
-0|AFRICA|lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to |
-1|AMERICA|hs use ironic, even requests. s|
-2|ASIA|ges. thinly even pinto beans ca|
-3|EUROPE|ly final courts cajole furiously final excuse|
-4|MIDDLE EAST|uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl|
diff --git a/rust/ballista/rust/scheduler/testdata/supplier/supplier.tbl b/rust/ballista/rust/scheduler/testdata/supplier/supplier.tbl
deleted file mode 100644
index d9c0e9f7e20..00000000000
--- a/rust/ballista/rust/scheduler/testdata/supplier/supplier.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|Supplier#000000001| N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ|17|27-918-335-1736|5755.94|each slyly above the careful|
-2|Supplier#000000002|89eJ5ksX3ImxJQBvxObC,|5|15-679-861-2259|4032.68| slyly bold instructions. idle dependen|
-3|Supplier#000000003|q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3|1|11-383-516-1199|4192.40|blithely silent requests after the express dependencies are sl|
-4|Supplier#000000004|Bk7ah4CK8SYQTepEmvMkkgMwg|15|25-843-787-7479|4641.08|riously even requests above the exp|
-5|Supplier#000000005|Gcdm2rJRzl5qlTVzc|11|21-151-690-3663|-283.84|. slyly regular pinto bea|
-6|Supplier#000000006|tQxuVm7s7CnK|14|24-696-997-4969|1365.79|final accounts. regular dolphins use against the furiously ironic decoys. |
-7|Supplier#000000007|s,4TicNGB4uO6PaSqNBUq|23|33-990-965-2201|6820.35|s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit|
-8|Supplier#000000008|9Sq4bBH2FQEmaFOocY45sRTxo6yuoG|17|27-498-742-3860|7627.85|al pinto beans. asymptotes haggl|
-9|Supplier#000000009|1KhUgZegwM3ua7dsYmekYBsK|10|20-403-398-8662|5302.37|s. unusual, even requests along the furiously regular pac|
-10|Supplier#000000010|Saygah3gYWMp72i PY|24|34-852-489-8585|3891.91|ing waters. regular requests ar|
diff --git a/rust/benchmarks/Cargo.toml b/rust/benchmarks/Cargo.toml
deleted file mode 100644
index 5cdf0f94ac3..00000000000
--- a/rust/benchmarks/Cargo.toml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-benchmarks"
-description = "Apache Arrow Benchmarks"
-version = "4.0.0-SNAPSHOT"
-edition = "2018"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-license = "Apache-2.0"
-publish = false
-
-[features]
-simd = ["datafusion/simd"]
-snmalloc = ["snmalloc-rs"]
-
-[dependencies]
-arrow = { path = "../arrow" }
-parquet = { path = "../parquet" }
-datafusion = { path = "../datafusion" }
-structopt = { version = "0.3", default-features = false }
-tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread"] }
-futures = "0.3"
-env_logger = "^0.8"
-mimalloc = { version = "0.1", optional = true, default-features = false }
-snmalloc-rs = {version = "0.2", optional = true, features= ["cache-friendly"] }
diff --git a/rust/benchmarks/README.md b/rust/benchmarks/README.md
deleted file mode 100644
index 7460477db4e..00000000000
--- a/rust/benchmarks/README.md
+++ /dev/null
@@ -1,120 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Apache Arrow Rust Benchmarks
-
-This crate contains benchmarks based on popular public data sets and open source benchmark suites, making it easy to
-run real-world benchmarks to help with performance and scalability testing and for comparing performance with other Arrow
-implementations as well as other query engines.
-
-Currently, only DataFusion benchmarks exist, but the plan is to add benchmarks for the arrow, flight, and parquet
-crates as well.
-
-## Benchmark derived from TPC-H
-
-These benchmarks are derived from the [TPC-H][1] benchmark.
-
-Data for this benchmark can be generated using the [tpch-dbgen][2] command-line tool. Run the following commands to
-clone the repository and build the source code.
-
-```bash
-git clone git@github.com:databricks/tpch-dbgen.git
-cd tpch-dbgen
-make
-export TPCH_DATA=$(pwd)
-```
-
-Data can now be generated with the following command. Note that `-s 1` means use Scale Factor 1 or ~1 GB of
-data. This value can be increased to generate larger data sets.
-
-```bash
-./dbgen -vf -s 1
-```
-
-The benchmark can then be run (assuming the data created from `dbgen` is in `/mnt/tpch-dbgen`) with a command such as:
-
-```bash
-cargo run --release --bin tpch -- benchmark --iterations 3 --path /mnt/tpch-dbgen --format tbl --query 1 --batch-size 4096
-```
-
-You can enable the features `simd` (to use SIMD instructions) and/or `mimalloc` or `snmalloc` (to use either the mimalloc or snmalloc allocator) as features by passing them in as `--features`:
-
-```
-cargo run --release --features "simd mimalloc" --bin tpch -- benchmark --iterations 3 --path /mnt/tpch-dbgen --format tbl --query 1 --batch-size 4096
-```
-
-The benchmark program also supports CSV and Parquet input file formats and a utility is provided to convert from `tbl`
-(generated by the `dbgen` utility) to CSV and Parquet.
-
-```bash
-cargo run --release --bin tpch -- convert --input /mnt/tpch-dbgen --output /mnt/tpch-parquet --format parquet
-```
-
-This utility does not yet provide support for changing the number of partitions when performing the conversion. Another
-option is to use the following Docker image to perform the conversion from `tbl` files to CSV or Parquet.
-
-```bash
-docker run -it ballistacompute/spark-benchmarks:0.4.0-SNAPSHOT
-  -h, --help   Show help message
-
-Subcommand: convert-tpch
-  -i, --input  <arg>
-      --input-format  <arg>
-  -o, --output  <arg>
-      --output-format  <arg>
-  -p, --partitions  <arg>
-  -h, --help                   Show help message
-```
-
-Note that it is necessary to mount volumes into the Docker container as appropriate so that the file conversion process
-can access files on the host system.
-
-Here is a full example that assumes that data is stored in the `/mnt` path on the host system.
-
-```bash
-docker run -v /mnt:/mnt -it ballistacompute/spark-benchmarks:0.4.0-SNAPSHOT \
-  convert-tpch \
-  --input /mnt/tpch/csv \
-  --input-format tbl \
-  --output /mnt/tpch/parquet \
-  --output-format parquet \
-  --partitions 64
-```
-
-## NYC Taxi Benchmark
-
-These benchmarks are based on the [New York Taxi and Limousine Commission][3] data set.
-
-```bash
-cargo run --release --bin nyctaxi -- --iterations 3 --path /mnt/nyctaxi/csv --format csv --batch-size 4096
-```
-
-Example output:
-
-```bash
-Running benchmarks with the following options: Opt { debug: false, iterations: 3, batch_size: 4096, path: "/mnt/nyctaxi/csv", file_format: "csv" }
-Executing 'fare_amt_by_passenger'
-Query 'fare_amt_by_passenger' iteration 0 took 7138 ms
-Query 'fare_amt_by_passenger' iteration 1 took 7599 ms
-Query 'fare_amt_by_passenger' iteration 2 took 7969 ms
-```
-
-[1]: http://www.tpc.org/tpch/
-[2]: https://github.com/databricks/tpch-dbgen
-[3]: https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
diff --git a/rust/benchmarks/src/bin/nyctaxi.rs b/rust/benchmarks/src/bin/nyctaxi.rs
deleted file mode 100644
index 005efca9488..00000000000
--- a/rust/benchmarks/src/bin/nyctaxi.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Apache Arrow Rust Benchmarks
-
-use std::collections::HashMap;
-use std::path::PathBuf;
-use std::process;
-use std::time::Instant;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
-use datafusion::error::Result;
-use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
-
-use datafusion::physical_plan::collect;
-use datafusion::physical_plan::csv::CsvReadOptions;
-use structopt::StructOpt;
-
-#[cfg(feature = "snmalloc")]
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[derive(Debug, StructOpt)]
-#[structopt(name = "Benchmarks", about = "Apache Arrow Rust Benchmarks.")]
-struct Opt {
-    /// Activate debug mode to see query results
-    #[structopt(short, long)]
-    debug: bool,
-
-    /// Number of iterations of each test run
-    #[structopt(short = "i", long = "iterations", default_value = "3")]
-    iterations: usize,
-
-    /// Number of threads for query execution
-    #[structopt(short = "c", long = "concurrency", default_value = "2")]
-    concurrency: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "8192")]
-    batch_size: usize,
-
-    /// Path to data files
-    #[structopt(parse(from_os_str), required = true, short = "p", long = "path")]
-    path: PathBuf,
-
-    /// File format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format", default_value = "csv")]
-    file_format: String,
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let opt = Opt::from_args();
-    println!("Running benchmarks with the following options: {:?}", opt);
-
-    let config = ExecutionConfig::new()
-        .with_concurrency(opt.concurrency)
-        .with_batch_size(opt.batch_size);
-    let mut ctx = ExecutionContext::with_config(config);
-
-    let path = opt.path.to_str().unwrap();
-
-    match opt.file_format.as_str() {
-        "csv" => {
-            let schema = nyctaxi_schema();
-            let options = CsvReadOptions::new().schema(&schema).has_header(true);
-            ctx.register_csv("tripdata", path, options)?
-        }
-        "parquet" => ctx.register_parquet("tripdata", path)?,
-        other => {
-            println!("Invalid file format '{}'", other);
-            process::exit(-1);
-        }
-    }
-
-    datafusion_sql_benchmarks(&mut ctx, opt.iterations, opt.debug).await
-}
-
-async fn datafusion_sql_benchmarks(
-    ctx: &mut ExecutionContext,
-    iterations: usize,
-    debug: bool,
-) -> Result<()> {
-    let mut queries = HashMap::new();
-    queries.insert("fare_amt_by_passenger", "SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM tripdata GROUP BY passenger_count");
-    for (name, sql) in &queries {
-        println!("Executing '{}'", name);
-        for i in 0..iterations {
-            let start = Instant::now();
-            execute_sql(ctx, sql, debug).await?;
-            println!(
-                "Query '{}' iteration {} took {} ms",
-                name,
-                i,
-                start.elapsed().as_millis()
-            );
-        }
-    }
-    Ok(())
-}
-
-async fn execute_sql(ctx: &mut ExecutionContext, sql: &str, debug: bool) -> Result<()> {
-    let plan = ctx.create_logical_plan(sql)?;
-    let plan = ctx.optimize(&plan)?;
-    if debug {
-        println!("Optimized logical plan:\n{:?}", plan);
-    }
-    let physical_plan = ctx.create_physical_plan(&plan)?;
-    let result = collect(physical_plan).await?;
-    if debug {
-        pretty::print_batches(&result)?;
-    }
-    Ok(())
-}
-
-fn nyctaxi_schema() -> Schema {
-    Schema::new(vec![
-        Field::new("VendorID", DataType::Utf8, true),
-        Field::new("tpep_pickup_datetime", DataType::Utf8, true),
-        Field::new("tpep_dropoff_datetime", DataType::Utf8, true),
-        Field::new("passenger_count", DataType::Int32, true),
-        Field::new("trip_distance", DataType::Utf8, true),
-        Field::new("RatecodeID", DataType::Utf8, true),
-        Field::new("store_and_fwd_flag", DataType::Utf8, true),
-        Field::new("PULocationID", DataType::Utf8, true),
-        Field::new("DOLocationID", DataType::Utf8, true),
-        Field::new("payment_type", DataType::Utf8, true),
-        Field::new("fare_amount", DataType::Float64, true),
-        Field::new("extra", DataType::Float64, true),
-        Field::new("mta_tax", DataType::Float64, true),
-        Field::new("tip_amount", DataType::Float64, true),
-        Field::new("tolls_amount", DataType::Float64, true),
-        Field::new("improvement_surcharge", DataType::Float64, true),
-        Field::new("total_amount", DataType::Float64, true),
-    ])
-}
diff --git a/rust/benchmarks/src/bin/tpch.rs b/rust/benchmarks/src/bin/tpch.rs
deleted file mode 100644
index 328a68dd6a6..00000000000
--- a/rust/benchmarks/src/bin/tpch.rs
+++ /dev/null
@@ -1,1692 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Benchmark derived from TPC-H. This is not an official TPC-H benchmark.
-
-use std::time::Instant;
-use std::{
-    path::{Path, PathBuf},
-    sync::Arc,
-};
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
-use datafusion::datasource::parquet::ParquetTable;
-use datafusion::datasource::{CsvFile, MemTable, TableProvider};
-use datafusion::error::{DataFusionError, Result};
-use datafusion::logical_plan::LogicalPlan;
-use datafusion::physical_plan::collect;
-use datafusion::prelude::*;
-
-use parquet::basic::Compression;
-use parquet::file::properties::WriterProperties;
-use structopt::StructOpt;
-
-#[cfg(feature = "snmalloc")]
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[cfg(feature = "mimalloc")]
-#[global_allocator]
-static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
-
-#[derive(Debug, StructOpt)]
-struct BenchmarkOpt {
-    /// Query number
-    #[structopt(short, long)]
-    query: usize,
-
-    /// Activate debug mode to see query results
-    #[structopt(short, long)]
-    debug: bool,
-
-    /// Number of iterations of each test run
-    #[structopt(short = "i", long = "iterations", default_value = "3")]
-    iterations: usize,
-
-    /// Number of threads to use for parallel execution
-    #[structopt(short = "c", long = "concurrency", default_value = "2")]
-    concurrency: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "8192")]
-    batch_size: usize,
-
-    /// Path to data files
-    #[structopt(parse(from_os_str), required = true, short = "p", long = "path")]
-    path: PathBuf,
-
-    /// File format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format", default_value = "csv")]
-    file_format: String,
-
-    /// Load the data into a MemTable before executing the query
-    #[structopt(short = "m", long = "mem-table")]
-    mem_table: bool,
-
-    /// Number of partitions to create when using MemTable as input
-    #[structopt(short = "n", long = "partitions", default_value = "8")]
-    partitions: usize,
-}
-
-#[derive(Debug, StructOpt)]
-struct ConvertOpt {
-    /// Path to csv files
-    #[structopt(parse(from_os_str), required = true, short = "i", long = "input")]
-    input_path: PathBuf,
-
-    /// Output path
-    #[structopt(parse(from_os_str), required = true, short = "o", long = "output")]
-    output_path: PathBuf,
-
-    /// Output file format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format")]
-    file_format: String,
-
-    /// Compression to use when writing Parquet files
-    #[structopt(short = "c", long = "compression", default_value = "snappy")]
-    compression: String,
-
-    /// Number of partitions to produce
-    #[structopt(short = "p", long = "partitions", default_value = "1")]
-    partitions: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "8192")]
-    batch_size: usize,
-}
-
-#[derive(Debug, StructOpt)]
-#[structopt(name = "TPC-H", about = "TPC-H Benchmarks.")]
-enum TpchOpt {
-    Benchmark(BenchmarkOpt),
-    Convert(ConvertOpt),
-}
-
-const TABLES: &[&str] = &[
-    "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
-];
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-    match TpchOpt::from_args() {
-        TpchOpt::Benchmark(opt) => benchmark(opt).await.map(|_| ()),
-        TpchOpt::Convert(opt) => convert_tbl(opt).await,
-    }
-}
-
-async fn benchmark(opt: BenchmarkOpt) -> Result<Vec<arrow::record_batch::RecordBatch>> {
-    println!("Running benchmarks with the following options: {:?}", opt);
-    let config = ExecutionConfig::new()
-        .with_concurrency(opt.concurrency)
-        .with_batch_size(opt.batch_size);
-    let mut ctx = ExecutionContext::with_config(config);
-
-    // register tables
-    for table in TABLES {
-        let table_provider = get_table(
-            opt.path.to_str().unwrap(),
-            table,
-            opt.file_format.as_str(),
-            opt.concurrency,
-        )?;
-        if opt.mem_table {
-            println!("Loading table '{}' into memory", table);
-            let start = Instant::now();
-
-            let memtable =
-                MemTable::load(table_provider, opt.batch_size, Some(opt.partitions))
-                    .await?;
-            println!(
-                "Loaded table '{}' into memory in {} ms",
-                table,
-                start.elapsed().as_millis()
-            );
-            ctx.register_table(*table, Arc::new(memtable))?;
-        } else {
-            ctx.register_table(*table, table_provider)?;
-        }
-    }
-
-    let mut millis = vec![];
-    // run benchmark
-    let mut result: Vec<arrow::record_batch::RecordBatch> = Vec::with_capacity(1);
-    for i in 0..opt.iterations {
-        let start = Instant::now();
-        let plan = create_logical_plan(&mut ctx, opt.query)?;
-        result = execute_query(&mut ctx, &plan, opt.debug).await?;
-        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
-        millis.push(elapsed as f64);
-        println!("Query {} iteration {} took {:.1} ms", opt.query, i, elapsed);
-    }
-
-    let avg = millis.iter().sum::<f64>() / millis.len() as f64;
-    println!("Query {} avg time: {:.2} ms", opt.query, avg);
-
-    Ok(result)
-}
-
-fn create_logical_plan(ctx: &mut ExecutionContext, query: usize) -> Result<LogicalPlan> {
-    match query {
-        // original
-        // 1 => ctx.create_logical_plan(
-        //     "select
-        //         l_returnflag,
-        //         l_linestatus,
-        //         sum(l_quantity) as sum_qty,
-        //         sum(l_extendedprice) as sum_base_price,
-        //         sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
-        //         sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
-        //         avg(l_quantity) as avg_qty,
-        //         avg(l_extendedprice) as avg_price,
-        //         avg(l_discount) as avg_disc,
-        //         count(*) as count_order
-        //     from
-        //         lineitem
-        //     where
-        //         l_shipdate <= date '1998-12-01' - interval '90' day (3)
-        //     group by
-        //         l_returnflag,
-        //         l_linestatus
-        //     order by
-        //         l_returnflag,
-        //         l_linestatus;"
-        // ),
-        1 => ctx.create_logical_plan(
-            "select
-                l_returnflag,
-                l_linestatus,
-                sum(l_quantity) as sum_qty,
-                sum(l_extendedprice) as sum_base_price,
-                sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
-                sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
-                avg(l_quantity) as avg_qty,
-                avg(l_extendedprice) as avg_price,
-                avg(l_discount) as avg_disc,
-                count(*) as count_order
-            from
-                lineitem
-            where
-                l_shipdate <= date '1998-09-02'
-            group by
-                l_returnflag,
-                l_linestatus
-            order by
-                l_returnflag,
-                l_linestatus;",
-        ),
-
-        2 => ctx.create_logical_plan(
-            "select
-                s_acctbal,
-                s_name,
-                n_name,
-                p_partkey,
-                p_mfgr,
-                s_address,
-                s_phone,
-                s_comment
-            from
-                part,
-                supplier,
-                partsupp,
-                nation,
-                region
-            where
-                p_partkey = ps_partkey
-                and s_suppkey = ps_suppkey
-                and p_size = 15
-                and p_type like '%BRASS'
-                and s_nationkey = n_nationkey
-                and n_regionkey = r_regionkey
-                and r_name = 'EUROPE'
-                and ps_supplycost = (
-                    select
-                        min(ps_supplycost)
-                    from
-                        partsupp,
-                        supplier,
-                        nation,
-                        region
-                    where
-                        p_partkey = ps_partkey
-                        and s_suppkey = ps_suppkey
-                        and s_nationkey = n_nationkey
-                        and n_regionkey = r_regionkey
-                        and r_name = 'EUROPE'
-                )
-            order by
-                s_acctbal desc,
-                n_name,
-                s_name,
-                p_partkey;",
-        ),
-
-        3 => ctx.create_logical_plan(
-            "select
-                l_orderkey,
-                sum(l_extendedprice * (1 - l_discount)) as revenue,
-                o_orderdate,
-                o_shippriority
-            from
-                customer,
-                orders,
-                lineitem
-            where
-                c_mktsegment = 'BUILDING'
-                and c_custkey = o_custkey
-                and l_orderkey = o_orderkey
-                and o_orderdate < date '1995-03-15'
-                and l_shipdate > date '1995-03-15'
-            group by
-                l_orderkey,
-                o_orderdate,
-                o_shippriority
-            order by
-                revenue desc,
-                o_orderdate;",
-        ),
-
-        4 => ctx.create_logical_plan(
-            "select
-                o_orderpriority,
-                count(*) as order_count
-            from
-                orders
-            where
-                o_orderdate >= '1993-07-01'
-                and o_orderdate < date '1993-07-01' + interval '3' month
-                and exists (
-                    select
-                        *
-                    from
-                        lineitem
-                    where
-                        l_orderkey = o_orderkey
-                        and l_commitdate < l_receiptdate
-                )
-            group by
-                o_orderpriority
-            order by
-                o_orderpriority;",
-        ),
-
-        // original
-        // 5 => ctx.create_logical_plan(
-        //     "select
-        //         n_name,
-        //         sum(l_extendedprice * (1 - l_discount)) as revenue
-        //     from
-        //         customer,
-        //         orders,
-        //         lineitem,
-        //         supplier,
-        //         nation,
-        //         region
-        //     where
-        //         c_custkey = o_custkey
-        //         and l_orderkey = o_orderkey
-        //         and l_suppkey = s_suppkey
-        //         and c_nationkey = s_nationkey
-        //         and s_nationkey = n_nationkey
-        //         and n_regionkey = r_regionkey
-        //         and r_name = 'ASIA'
-        //         and o_orderdate >= date '1994-01-01'
-        //         and o_orderdate < date '1994-01-01' + interval '1' year
-        //     group by
-        //         n_name
-        //     order by
-        //         revenue desc;"
-        // ),
-        5 => ctx.create_logical_plan(
-            "select
-                n_name,
-                sum(l_extendedprice * (1 - l_discount)) as revenue
-            from
-                customer,
-                orders,
-                lineitem,
-                supplier,
-                nation,
-                region
-            where
-                c_custkey = o_custkey
-                and l_orderkey = o_orderkey
-                and l_suppkey = s_suppkey
-                and c_nationkey = s_nationkey
-                and s_nationkey = n_nationkey
-                and n_regionkey = r_regionkey
-                and r_name = 'ASIA'
-                and o_orderdate >= date '1994-01-01'
-                and o_orderdate < date '1995-01-01'
-            group by
-                n_name
-            order by
-                revenue desc;",
-        ),
-
-        // original
-        // 6 => ctx.create_logical_plan(
-        //     "select
-        //         sum(l_extendedprice * l_discount) as revenue
-        //     from
-        //         lineitem
-        //     where
-        //         l_shipdate >= date '1994-01-01'
-        //         and l_shipdate < date '1994-01-01' + interval '1' year
-        //         and l_discount between .06 - 0.01 and .06 + 0.01
-        //         and l_quantity < 24;"
-        // ),
-        6 => ctx.create_logical_plan(
-            "select
-                sum(l_extendedprice * l_discount) as revenue
-            from
-                lineitem
-            where
-                l_shipdate >= date '1994-01-01'
-                and l_shipdate < date '1995-01-01'
-                and l_discount between .06 - 0.01 and .06 + 0.01
-                and l_quantity < 24;",
-        ),
-
-        7 => ctx.create_logical_plan(
-            "select
-                supp_nation,
-                cust_nation,
-                l_year,
-                sum(volume) as revenue
-            from
-                (
-                    select
-                        n1.n_name as supp_nation,
-                        n2.n_name as cust_nation,
-                        extract(year from l_shipdate) as l_year,
-                        l_extendedprice * (1 - l_discount) as volume
-                    from
-                        supplier,
-                        lineitem,
-                        orders,
-                        customer,
-                        nation n1,
-                        nation n2
-                    where
-                        s_suppkey = l_suppkey
-                        and o_orderkey = l_orderkey
-                        and c_custkey = o_custkey
-                        and s_nationkey = n1.n_nationkey
-                        and c_nationkey = n2.n_nationkey
-                        and (
-                            (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
-                            or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
-                        )
-                        and l_shipdate between date '1995-01-01' and date '1996-12-31'
-                ) as shipping
-            group by
-                supp_nation,
-                cust_nation,
-                l_year
-            order by
-                supp_nation,
-                cust_nation,
-                l_year;",
-        ),
-
-        8 => ctx.create_logical_plan(
-            "select
-                o_year,
-                sum(case
-                    when nation = 'BRAZIL' then volume
-                    else 0
-                end) / sum(volume) as mkt_share
-            from
-                (
-                    select
-                        extract(year from o_orderdate) as o_year,
-                        l_extendedprice * (1 - l_discount) as volume,
-                        n2.n_name as nation
-                    from
-                        part,
-                        supplier,
-                        lineitem,
-                        orders,
-                        customer,
-                        nation n1,
-                        nation n2,
-                        region
-                    where
-                        p_partkey = l_partkey
-                        and s_suppkey = l_suppkey
-                        and l_orderkey = o_orderkey
-                        and o_custkey = c_custkey
-                        and c_nationkey = n1.n_nationkey
-                        and n1.n_regionkey = r_regionkey
-                        and r_name = 'AMERICA'
-                        and s_nationkey = n2.n_nationkey
-                        and o_orderdate between date '1995-01-01' and date '1996-12-31'
-                        and p_type = 'ECONOMY ANODIZED STEEL'
-                ) as all_nations
-            group by
-                o_year
-            order by
-                o_year;",
-        ),
-
-        9 => ctx.create_logical_plan(
-            "select
-                nation,
-                o_year,
-                sum(amount) as sum_profit
-            from
-                (
-                    select
-                        n_name as nation,
-                        extract(year from o_orderdate) as o_year,
-                        l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
-                    from
-                        part,
-                        supplier,
-                        lineitem,
-                        partsupp,
-                        orders,
-                        nation
-                    where
-                        s_suppkey = l_suppkey
-                        and ps_suppkey = l_suppkey
-                        and ps_partkey = l_partkey
-                        and p_partkey = l_partkey
-                        and o_orderkey = l_orderkey
-                        and s_nationkey = n_nationkey
-                        and p_name like '%green%'
-                ) as profit
-            group by
-                nation,
-                o_year
-            order by
-                nation,
-                o_year desc;",
-        ),
-
-        // 10 => ctx.create_logical_plan(
-        //     "select
-        //         c_custkey,
-        //         c_name,
-        //         sum(l_extendedprice * (1 - l_discount)) as revenue,
-        //         c_acctbal,
-        //         n_name,
-        //         c_address,
-        //         c_phone,
-        //         c_comment
-        //     from
-        //         customer,
-        //         orders,
-        //         lineitem,
-        //         nation
-        //     where
-        //         c_custkey = o_custkey
-        //         and l_orderkey = o_orderkey
-        //         and o_orderdate >= date '1993-10-01'
-        //         and o_orderdate < date '1993-10-01' + interval '3' month
-        //         and l_returnflag = 'R'
-        //         and c_nationkey = n_nationkey
-        //     group by
-        //         c_custkey,
-        //         c_name,
-        //         c_acctbal,
-        //         c_phone,
-        //         n_name,
-        //         c_address,
-        //         c_comment
-        //     order by
-        //         revenue desc;"
-        // ),
-        10 => ctx.create_logical_plan(
-            "select
-                c_custkey,
-                c_name,
-                sum(l_extendedprice * (1 - l_discount)) as revenue,
-                c_acctbal,
-                n_name,
-                c_address,
-                c_phone,
-                c_comment
-            from
-                customer,
-                orders,
-                lineitem,
-                nation
-            where
-                c_custkey = o_custkey
-                and l_orderkey = o_orderkey
-                and o_orderdate >= date '1993-10-01'
-                and o_orderdate < date '1994-01-01'
-                and l_returnflag = 'R'
-                and c_nationkey = n_nationkey
-            group by
-                c_custkey,
-                c_name,
-                c_acctbal,
-                c_phone,
-                n_name,
-                c_address,
-                c_comment
-            order by
-                revenue desc;",
-        ),
-
-        11 => ctx.create_logical_plan(
-            "select
-                ps_partkey,
-                sum(ps_supplycost * ps_availqty) as value
-            from
-                partsupp,
-                supplier,
-                nation
-            where
-                ps_suppkey = s_suppkey
-                and s_nationkey = n_nationkey
-                and n_name = 'GERMANY'
-            group by
-                ps_partkey having
-                    sum(ps_supplycost * ps_availqty) > (
-                        select
-                            sum(ps_supplycost * ps_availqty) * 0.0001
-                        from
-                            partsupp,
-                            supplier,
-                            nation
-                        where
-                            ps_suppkey = s_suppkey
-                            and s_nationkey = n_nationkey
-                            and n_name = 'GERMANY'
-                    )
-            order by
-                value desc;",
-        ),
-
-        // original
-        // 12 => ctx.create_logical_plan(
-        //     "select
-        //         l_shipmode,
-        //         sum(case
-        //             when o_orderpriority = '1-URGENT'
-        //                 or o_orderpriority = '2-HIGH'
-        //                 then 1
-        //             else 0
-        //         end) as high_line_count,
-        //         sum(case
-        //             when o_orderpriority <> '1-URGENT'
-        //                 and o_orderpriority <> '2-HIGH'
-        //                 then 1
-        //             else 0
-        //         end) as low_line_count
-        //     from
-        //         orders,
-        //         lineitem
-        //     where
-        //         o_orderkey = l_orderkey
-        //         and l_shipmode in ('MAIL', 'SHIP')
-        //         and l_commitdate < l_receiptdate
-        //         and l_shipdate < l_commitdate
-        //         and l_receiptdate >= date '1994-01-01'
-        //         and l_receiptdate < date '1994-01-01' + interval '1' year
-        //     group by
-        //         l_shipmode
-        //     order by
-        //         l_shipmode;"
-        // ),
-        12 => ctx.create_logical_plan(
-            "select
-                l_shipmode,
-                sum(case
-                    when o_orderpriority = '1-URGENT'
-                        or o_orderpriority = '2-HIGH'
-                        then 1
-                    else 0
-                end) as high_line_count,
-                sum(case
-                    when o_orderpriority <> '1-URGENT'
-                        and o_orderpriority <> '2-HIGH'
-                        then 1
-                    else 0
-                end) as low_line_count
-            from
-                lineitem
-            join
-                orders
-            on
-                l_orderkey = o_orderkey
-            where
-                l_shipmode in ('MAIL', 'SHIP')
-                and l_commitdate < l_receiptdate
-                and l_shipdate < l_commitdate
-                and l_receiptdate >= date '1994-01-01'
-                and l_receiptdate < date '1995-01-01'
-            group by
-                l_shipmode
-            order by
-                l_shipmode;",
-        ),
-
-        13 => ctx.create_logical_plan(
-            "select
-                c_count,
-                count(*) as custdist
-            from
-                (
-                    select
-                        c_custkey,
-                        count(o_orderkey)
-                    from
-                        customer left outer join orders on
-                            c_custkey = o_custkey
-                            and o_comment not like '%special%requests%'
-                    group by
-                        c_custkey
-                ) as c_orders (c_custkey, c_count)
-            group by
-                c_count
-            order by
-                custdist desc,
-                c_count desc;",
-        ),
-
-        14 => ctx.create_logical_plan(
-            "select
-                100.00 * sum(case
-                    when p_type like 'PROMO%'
-                        then l_extendedprice * (1 - l_discount)
-                    else 0
-                end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
-            from
-                lineitem,
-                part
-            where
-                l_partkey = p_partkey
-                and l_shipdate >= date '1995-09-01'
-                and l_shipdate < date '1995-10-01';",
-        ),
-
-        15 => ctx.create_logical_plan(
-            "create view revenue0 (supplier_no, total_revenue) as
-                select
-                    l_suppkey,
-                    sum(l_extendedprice * (1 - l_discount))
-                from
-                    lineitem
-                where
-                    l_shipdate >= date '1996-01-01'
-                    and l_shipdate < date '1996-01-01' + interval '3' month
-                group by
-                    l_suppkey;
-
-            select
-                s_suppkey,
-                s_name,
-                s_address,
-                s_phone,
-                total_revenue
-            from
-                supplier,
-                revenue0
-            where
-                s_suppkey = supplier_no
-                and total_revenue = (
-                    select
-                        max(total_revenue)
-                    from
-                        revenue0
-                )
-            order by
-                s_suppkey;
-
-            drop view revenue0;",
-        ),
-
-        16 => ctx.create_logical_plan(
-            "select
-                p_brand,
-                p_type,
-                p_size,
-                count(distinct ps_suppkey) as supplier_cnt
-            from
-                partsupp,
-                part
-            where
-                p_partkey = ps_partkey
-                and p_brand <> 'Brand#45'
-                and p_type not like 'MEDIUM POLISHED%'
-                and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
-                and ps_suppkey not in (
-                    select
-                        s_suppkey
-                    from
-                        supplier
-                    where
-                        s_comment like '%Customer%Complaints%'
-                )
-            group by
-                p_brand,
-                p_type,
-                p_size
-            order by
-                supplier_cnt desc,
-                p_brand,
-                p_type,
-                p_size;",
-        ),
-
-        17 => ctx.create_logical_plan(
-            "select
-                sum(l_extendedprice) / 7.0 as avg_yearly
-            from
-                lineitem,
-                part
-            where
-                p_partkey = l_partkey
-                and p_brand = 'Brand#23'
-                and p_container = 'MED BOX'
-                and l_quantity < (
-                    select
-                        0.2 * avg(l_quantity)
-                    from
-                        lineitem
-                    where
-                        l_partkey = p_partkey
-                );",
-        ),
-
-        18 => ctx.create_logical_plan(
-            "select
-                c_name,
-                c_custkey,
-                o_orderkey,
-                o_orderdate,
-                o_totalprice,
-                sum(l_quantity)
-            from
-                customer,
-                orders,
-                lineitem
-            where
-                o_orderkey in (
-                    select
-                        l_orderkey
-                    from
-                        lineitem
-                    group by
-                        l_orderkey having
-                            sum(l_quantity) > 300
-                )
-                and c_custkey = o_custkey
-                and o_orderkey = l_orderkey
-            group by
-                c_name,
-                c_custkey,
-                o_orderkey,
-                o_orderdate,
-                o_totalprice
-            order by
-                o_totalprice desc,
-                o_orderdate;",
-        ),
-
-        19 => ctx.create_logical_plan(
-            "select
-                sum(l_extendedprice* (1 - l_discount)) as revenue
-            from
-                lineitem,
-                part
-            where
-                (
-                    p_partkey = l_partkey
-                    and p_brand = 'Brand#12'
-                    and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
-                    and l_quantity >= 1 and l_quantity <= 1 + 10
-                    and p_size between 1 and 5
-                    and l_shipmode in ('AIR', 'AIR REG')
-                    and l_shipinstruct = 'DELIVER IN PERSON'
-                )
-                or
-                (
-                    p_partkey = l_partkey
-                    and p_brand = 'Brand#23'
-                    and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
-                    and l_quantity >= 10 and l_quantity <= 10 + 10
-                    and p_size between 1 and 10
-                    and l_shipmode in ('AIR', 'AIR REG')
-                    and l_shipinstruct = 'DELIVER IN PERSON'
-                )
-                or
-                (
-                    p_partkey = l_partkey
-                    and p_brand = 'Brand#34'
-                    and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
-                    and l_quantity >= 20 and l_quantity <= 20 + 10
-                    and p_size between 1 and 15
-                    and l_shipmode in ('AIR', 'AIR REG')
-                    and l_shipinstruct = 'DELIVER IN PERSON'
-                );",
-        ),
-
-        20 => ctx.create_logical_plan(
-            "select
-                s_name,
-                s_address
-            from
-                supplier,
-                nation
-            where
-                s_suppkey in (
-                    select
-                        ps_suppkey
-                    from
-                        partsupp
-                    where
-                        ps_partkey in (
-                            select
-                                p_partkey
-                            from
-                                part
-                            where
-                                p_name like 'forest%'
-                        )
-                        and ps_availqty > (
-                            select
-                                0.5 * sum(l_quantity)
-                            from
-                                lineitem
-                            where
-                                l_partkey = ps_partkey
-                                and l_suppkey = ps_suppkey
-                                and l_shipdate >= date '1994-01-01'
-                                and l_shipdate < 'date 1994-01-01' + interval '1' year
-                        )
-                )
-                and s_nationkey = n_nationkey
-                and n_name = 'CANADA'
-            order by
-                s_name;",
-        ),
-
-        21 => ctx.create_logical_plan(
-            "select
-                s_name,
-                count(*) as numwait
-            from
-                supplier,
-                lineitem l1,
-                orders,
-                nation
-            where
-                s_suppkey = l1.l_suppkey
-                and o_orderkey = l1.l_orderkey
-                and o_orderstatus = 'F'
-                and l1.l_receiptdate > l1.l_commitdate
-                and exists (
-                    select
-                        *
-                    from
-                        lineitem l2
-                    where
-                        l2.l_orderkey = l1.l_orderkey
-                        and l2.l_suppkey <> l1.l_suppkey
-                )
-                and not exists (
-                    select
-                        *
-                    from
-                        lineitem l3
-                    where
-                        l3.l_orderkey = l1.l_orderkey
-                        and l3.l_suppkey <> l1.l_suppkey
-                        and l3.l_receiptdate > l3.l_commitdate
-                )
-                and s_nationkey = n_nationkey
-                and n_name = 'SAUDI ARABIA'
-            group by
-                s_name
-            order by
-                numwait desc,
-                s_name;",
-        ),
-
-        22 => ctx.create_logical_plan(
-            "select
-                cntrycode,
-                count(*) as numcust,
-                sum(c_acctbal) as totacctbal
-            from
-                (
-                    select
-                        substring(c_phone from 1 for 2) as cntrycode,
-                        c_acctbal
-                    from
-                        customer
-                    where
-                        substring(c_phone from 1 for 2) in
-                            ('13', '31', '23', '29', '30', '18', '17')
-                        and c_acctbal > (
-                            select
-                                avg(c_acctbal)
-                            from
-                                customer
-                            where
-                                c_acctbal > 0.00
-                                and substring(c_phone from 1 for 2) in
-                                    ('13', '31', '23', '29', '30', '18', '17')
-                        )
-                        and not exists (
-                            select
-                                *
-                            from
-                                orders
-                            where
-                                o_custkey = c_custkey
-                        )
-                ) as custsale
-            group by
-                cntrycode
-            order by
-                cntrycode;",
-        ),
-
-        _ => unimplemented!("invalid query. Expected value between 1 and 22"),
-    }
-}
-
-async fn execute_query(
-    ctx: &mut ExecutionContext,
-    plan: &LogicalPlan,
-    debug: bool,
-) -> Result<Vec<arrow::record_batch::RecordBatch>> {
-    if debug {
-        println!("Logical plan:\n{:?}", plan);
-    }
-    let plan = ctx.optimize(&plan)?;
-    if debug {
-        println!("Optimized logical plan:\n{:?}", plan);
-    }
-    let physical_plan = ctx.create_physical_plan(&plan)?;
-    let result = collect(physical_plan).await?;
-    if debug {
-        pretty::print_batches(&result)?;
-    }
-    Ok(result)
-}
-
-async fn convert_tbl(opt: ConvertOpt) -> Result<()> {
-    let output_root_path = Path::new(&opt.output_path);
-    for table in TABLES {
-        let start = Instant::now();
-        let schema = get_schema(table);
-
-        let input_path = format!("{}/{}.tbl", opt.input_path.to_str().unwrap(), table);
-        let options = CsvReadOptions::new()
-            .schema(&schema)
-            .delimiter(b'|')
-            .file_extension(".tbl");
-
-        let config = ExecutionConfig::new().with_batch_size(opt.batch_size);
-        let mut ctx = ExecutionContext::with_config(config);
-
-        // build plan to read the TBL file
-        let mut csv = ctx.read_csv(&input_path, options)?;
-
-        // optionally, repartition the file
-        if opt.partitions > 1 {
-            csv = csv.repartition(Partitioning::RoundRobinBatch(opt.partitions))?
-        }
-
-        // create the physical plan
-        let csv = csv.to_logical_plan();
-        let csv = ctx.optimize(&csv)?;
-        let csv = ctx.create_physical_plan(&csv)?;
-
-        let output_path = output_root_path.join(table);
-        let output_path = output_path.to_str().unwrap().to_owned();
-
-        println!(
-            "Converting '{}' to {} files in directory '{}'",
-            &input_path, &opt.file_format, &output_path
-        );
-        match opt.file_format.as_str() {
-            "csv" => ctx.write_csv(csv, output_path).await?,
-            "parquet" => {
-                let compression = match opt.compression.as_str() {
-                    "none" => Compression::UNCOMPRESSED,
-                    "snappy" => Compression::SNAPPY,
-                    "brotli" => Compression::BROTLI,
-                    "gzip" => Compression::GZIP,
-                    "lz4" => Compression::LZ4,
-                    "lz0" => Compression::LZO,
-                    "zstd" => Compression::ZSTD,
-                    other => {
-                        return Err(DataFusionError::NotImplemented(format!(
-                            "Invalid compression format: {}",
-                            other
-                        )))
-                    }
-                };
-                let props = WriterProperties::builder()
-                    .set_compression(compression)
-                    .build();
-                ctx.write_parquet(csv, output_path, Some(props)).await?
-            }
-            other => {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Invalid output format: {}",
-                    other
-                )))
-            }
-        }
-        println!("Conversion completed in {} ms", start.elapsed().as_millis());
-    }
-
-    Ok(())
-}
-
-fn get_table(
-    path: &str,
-    table: &str,
-    table_format: &str,
-    max_concurrency: usize,
-) -> Result<Arc<dyn TableProvider>> {
-    match table_format {
-        // dbgen creates .tbl ('|' delimited) files without header
-        "tbl" => {
-            let path = format!("{}/{}.tbl", path, table);
-            let schema = get_schema(table);
-            let options = CsvReadOptions::new()
-                .schema(&schema)
-                .delimiter(b'|')
-                .has_header(false)
-                .file_extension(".tbl");
-
-            Ok(Arc::new(CsvFile::try_new(&path, options)?))
-        }
-        "csv" => {
-            let path = format!("{}/{}", path, table);
-            let schema = get_schema(table);
-            let options = CsvReadOptions::new().schema(&schema).has_header(true);
-
-            Ok(Arc::new(CsvFile::try_new(&path, options)?))
-        }
-        "parquet" => {
-            let path = format!("{}/{}", path, table);
-            Ok(Arc::new(ParquetTable::try_new(&path, max_concurrency)?))
-        }
-        other => {
-            unimplemented!("Invalid file format '{}'", other);
-        }
-    }
-}
-
-fn get_schema(table: &str) -> Schema {
-    // note that the schema intentionally uses signed integers so that any generated Parquet
-    // files can also be used to benchmark tools that only support signed integers, such as
-    // Apache Spark
-
-    match table {
-        "part" => Schema::new(vec![
-            Field::new("p_partkey", DataType::Int32, false),
-            Field::new("p_name", DataType::Utf8, false),
-            Field::new("p_mfgr", DataType::Utf8, false),
-            Field::new("p_brand", DataType::Utf8, false),
-            Field::new("p_type", DataType::Utf8, false),
-            Field::new("p_size", DataType::Int32, false),
-            Field::new("p_container", DataType::Utf8, false),
-            Field::new("p_retailprice", DataType::Float64, false),
-            Field::new("p_comment", DataType::Utf8, false),
-        ]),
-
-        "supplier" => Schema::new(vec![
-            Field::new("s_suppkey", DataType::Int32, false),
-            Field::new("s_name", DataType::Utf8, false),
-            Field::new("s_address", DataType::Utf8, false),
-            Field::new("s_nationkey", DataType::Int32, false),
-            Field::new("s_phone", DataType::Utf8, false),
-            Field::new("s_acctbal", DataType::Float64, false),
-            Field::new("s_comment", DataType::Utf8, false),
-        ]),
-
-        "partsupp" => Schema::new(vec![
-            Field::new("ps_partkey", DataType::Int32, false),
-            Field::new("ps_suppkey", DataType::Int32, false),
-            Field::new("ps_availqty", DataType::Int32, false),
-            Field::new("ps_supplycost", DataType::Float64, false),
-            Field::new("ps_comment", DataType::Utf8, false),
-        ]),
-
-        "customer" => Schema::new(vec![
-            Field::new("c_custkey", DataType::Int32, false),
-            Field::new("c_name", DataType::Utf8, false),
-            Field::new("c_address", DataType::Utf8, false),
-            Field::new("c_nationkey", DataType::Int32, false),
-            Field::new("c_phone", DataType::Utf8, false),
-            Field::new("c_acctbal", DataType::Float64, false),
-            Field::new("c_mktsegment", DataType::Utf8, false),
-            Field::new("c_comment", DataType::Utf8, false),
-        ]),
-
-        "orders" => Schema::new(vec![
-            Field::new("o_orderkey", DataType::Int32, false),
-            Field::new("o_custkey", DataType::Int32, false),
-            Field::new("o_orderstatus", DataType::Utf8, false),
-            Field::new("o_totalprice", DataType::Float64, false),
-            Field::new("o_orderdate", DataType::Date32, false),
-            Field::new("o_orderpriority", DataType::Utf8, false),
-            Field::new("o_clerk", DataType::Utf8, false),
-            Field::new("o_shippriority", DataType::Int32, false),
-            Field::new("o_comment", DataType::Utf8, false),
-        ]),
-
-        "lineitem" => Schema::new(vec![
-            Field::new("l_orderkey", DataType::Int32, false),
-            Field::new("l_partkey", DataType::Int32, false),
-            Field::new("l_suppkey", DataType::Int32, false),
-            Field::new("l_linenumber", DataType::Int32, false),
-            Field::new("l_quantity", DataType::Float64, false),
-            Field::new("l_extendedprice", DataType::Float64, false),
-            Field::new("l_discount", DataType::Float64, false),
-            Field::new("l_tax", DataType::Float64, false),
-            Field::new("l_returnflag", DataType::Utf8, false),
-            Field::new("l_linestatus", DataType::Utf8, false),
-            Field::new("l_shipdate", DataType::Date32, false),
-            Field::new("l_commitdate", DataType::Date32, false),
-            Field::new("l_receiptdate", DataType::Date32, false),
-            Field::new("l_shipinstruct", DataType::Utf8, false),
-            Field::new("l_shipmode", DataType::Utf8, false),
-            Field::new("l_comment", DataType::Utf8, false),
-        ]),
-
-        "nation" => Schema::new(vec![
-            Field::new("n_nationkey", DataType::Int32, false),
-            Field::new("n_name", DataType::Utf8, false),
-            Field::new("n_regionkey", DataType::Int32, false),
-            Field::new("n_comment", DataType::Utf8, false),
-        ]),
-
-        "region" => Schema::new(vec![
-            Field::new("r_regionkey", DataType::Int32, false),
-            Field::new("r_name", DataType::Utf8, false),
-            Field::new("r_comment", DataType::Utf8, false),
-        ]),
-
-        _ => unimplemented!(),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::env;
-    use std::sync::Arc;
-
-    use arrow::array::*;
-    use arrow::record_batch::RecordBatch;
-    use arrow::util::display::array_value_to_string;
-
-    use datafusion::logical_plan::Expr;
-    use datafusion::logical_plan::Expr::Cast;
-
-    #[tokio::test]
-    async fn q1() -> Result<()> {
-        verify_query(1).await
-    }
-
-    #[tokio::test]
-    async fn q2() -> Result<()> {
-        verify_query(2).await
-    }
-
-    #[tokio::test]
-    async fn q3() -> Result<()> {
-        verify_query(3).await
-    }
-
-    #[tokio::test]
-    async fn q4() -> Result<()> {
-        verify_query(4).await
-    }
-
-    #[tokio::test]
-    async fn q5() -> Result<()> {
-        verify_query(5).await
-    }
-
-    #[tokio::test]
-    async fn q6() -> Result<()> {
-        verify_query(6).await
-    }
-
-    #[tokio::test]
-    async fn q7() -> Result<()> {
-        verify_query(7).await
-    }
-
-    #[tokio::test]
-    async fn q8() -> Result<()> {
-        verify_query(8).await
-    }
-
-    #[tokio::test]
-    async fn q9() -> Result<()> {
-        verify_query(9).await
-    }
-
-    #[tokio::test]
-    async fn q10() -> Result<()> {
-        verify_query(10).await
-    }
-
-    #[tokio::test]
-    async fn q11() -> Result<()> {
-        verify_query(11).await
-    }
-
-    #[tokio::test]
-    async fn q12() -> Result<()> {
-        verify_query(12).await
-    }
-
-    #[tokio::test]
-    async fn q13() -> Result<()> {
-        verify_query(13).await
-    }
-
-    #[tokio::test]
-    async fn q14() -> Result<()> {
-        verify_query(14).await
-    }
-
-    #[tokio::test]
-    async fn q15() -> Result<()> {
-        verify_query(15).await
-    }
-
-    #[tokio::test]
-    async fn q16() -> Result<()> {
-        verify_query(16).await
-    }
-
-    #[tokio::test]
-    async fn q17() -> Result<()> {
-        verify_query(17).await
-    }
-
-    #[tokio::test]
-    async fn q18() -> Result<()> {
-        verify_query(18).await
-    }
-
-    #[tokio::test]
-    async fn q19() -> Result<()> {
-        verify_query(19).await
-    }
-
-    #[tokio::test]
-    async fn q20() -> Result<()> {
-        verify_query(20).await
-    }
-
-    #[tokio::test]
-    async fn q21() -> Result<()> {
-        verify_query(21).await
-    }
-
-    #[tokio::test]
-    async fn q22() -> Result<()> {
-        verify_query(22).await
-    }
-
-    #[tokio::test]
-    async fn run_q1() -> Result<()> {
-        run_query(1).await
-    }
-
-    #[tokio::test]
-    async fn run_q3() -> Result<()> {
-        run_query(3).await
-    }
-
-    #[tokio::test]
-    async fn run_q5() -> Result<()> {
-        run_query(5).await
-    }
-
-    #[tokio::test]
-    async fn run_q6() -> Result<()> {
-        run_query(6).await
-    }
-
-    #[tokio::test]
-    async fn run_q10() -> Result<()> {
-        run_query(10).await
-    }
-
-    #[tokio::test]
-    async fn run_q12() -> Result<()> {
-        run_query(12).await
-    }
-
-    #[tokio::test]
-    async fn run_q14() -> Result<()> {
-        run_query(14).await
-    }
-
-    /// Specialised String representation
-    fn col_str(column: &ArrayRef, row_index: usize) -> String {
-        if column.is_null(row_index) {
-            return "NULL".to_string();
-        }
-
-        // Special case ListArray as there is no pretty print support for it yet
-        if let DataType::FixedSizeList(_, n) = column.data_type() {
-            let array = column
-                .as_any()
-                .downcast_ref::<FixedSizeListArray>()
-                .unwrap()
-                .value(row_index);
-
-            let mut r = Vec::with_capacity(*n as usize);
-            for i in 0..*n {
-                r.push(col_str(&array, i as usize));
-            }
-            return format!("[{}]", r.join(","));
-        }
-
-        array_value_to_string(column, row_index).unwrap()
-    }
-
-    /// Converts the results into a 2d array of strings, `result[row][column]`
-    /// Special cases nulls to NULL for testing
-    fn result_vec(results: &[RecordBatch]) -> Vec<Vec<String>> {
-        let mut result = vec![];
-        for batch in results {
-            for row_index in 0..batch.num_rows() {
-                let row_vec = batch
-                    .columns()
-                    .iter()
-                    .map(|column| col_str(column, row_index))
-                    .collect();
-                result.push(row_vec);
-            }
-        }
-        result
-    }
-
-    fn get_answer_schema(n: usize) -> Schema {
-        match n {
-            1 => Schema::new(vec![
-                Field::new("l_returnflag", DataType::Utf8, true),
-                Field::new("l_linestatus", DataType::Utf8, true),
-                Field::new("sum_qty", DataType::Float64, true),
-                Field::new("sum_base_price", DataType::Float64, true),
-                Field::new("sum_disc_price", DataType::Float64, true),
-                Field::new("sum_charge", DataType::Float64, true),
-                Field::new("avg_qty", DataType::Float64, true),
-                Field::new("avg_price", DataType::Float64, true),
-                Field::new("avg_disc", DataType::Float64, true),
-                Field::new("count_order", DataType::UInt64, true),
-            ]),
-
-            2 => Schema::new(vec![
-                Field::new("s_acctbal", DataType::Float64, true),
-                Field::new("s_name", DataType::Utf8, true),
-                Field::new("n_name", DataType::Utf8, true),
-                Field::new("p_partkey", DataType::Int32, true),
-                Field::new("p_mfgr", DataType::Utf8, true),
-                Field::new("s_address", DataType::Utf8, true),
-                Field::new("s_phone", DataType::Utf8, true),
-                Field::new("s_comment", DataType::Utf8, true),
-            ]),
-
-            3 => Schema::new(vec![
-                Field::new("l_orderkey", DataType::Int32, true),
-                Field::new("revenue", DataType::Float64, true),
-                Field::new("o_orderdate", DataType::Date32, true),
-                Field::new("o_shippriority", DataType::Int32, true),
-            ]),
-
-            4 => Schema::new(vec![
-                Field::new("o_orderpriority", DataType::Utf8, true),
-                Field::new("order_count", DataType::Int32, true),
-            ]),
-
-            5 => Schema::new(vec![
-                Field::new("n_name", DataType::Utf8, true),
-                Field::new("revenue", DataType::Float64, true),
-            ]),
-
-            6 => Schema::new(vec![Field::new("revenue", DataType::Float64, true)]),
-
-            7 => Schema::new(vec![
-                Field::new("supp_nation", DataType::Utf8, true),
-                Field::new("cust_nation", DataType::Utf8, true),
-                Field::new("l_year", DataType::Int32, true),
-                Field::new("revenue", DataType::Float64, true),
-            ]),
-
-            8 => Schema::new(vec![
-                Field::new("o_year", DataType::Int32, true),
-                Field::new("mkt_share", DataType::Float64, true),
-            ]),
-
-            9 => Schema::new(vec![
-                Field::new("nation", DataType::Utf8, true),
-                Field::new("o_year", DataType::Int32, true),
-                Field::new("sum_profit", DataType::Float64, true),
-            ]),
-
-            10 => Schema::new(vec![
-                Field::new("c_custkey", DataType::Int32, true),
-                Field::new("c_name", DataType::Utf8, true),
-                Field::new("revenue", DataType::Float64, true),
-                Field::new("c_acctbal", DataType::Float64, true),
-                Field::new("n_name", DataType::Utf8, true),
-                Field::new("c_address", DataType::Utf8, true),
-                Field::new("c_phone", DataType::Utf8, true),
-                Field::new("c_comment", DataType::Utf8, true),
-            ]),
-
-            11 => Schema::new(vec![
-                Field::new("ps_partkey", DataType::Int32, true),
-                Field::new("value", DataType::Float64, true),
-            ]),
-
-            12 => Schema::new(vec![
-                Field::new("l_shipmode", DataType::Utf8, true),
-                Field::new("high_line_count", DataType::Int64, true),
-                Field::new("low_line_count", DataType::Int64, true),
-            ]),
-
-            13 => Schema::new(vec![
-                Field::new("c_count", DataType::Int64, true),
-                Field::new("custdist", DataType::Int64, true),
-            ]),
-
-            14 => Schema::new(vec![Field::new("promo_revenue", DataType::Float64, true)]),
-
-            15 => Schema::new(vec![Field::new("promo_revenue", DataType::Float64, true)]),
-
-            16 => Schema::new(vec![
-                Field::new("p_brand", DataType::Utf8, true),
-                Field::new("p_type", DataType::Utf8, true),
-                Field::new("c_phone", DataType::Int32, true),
-                Field::new("c_comment", DataType::Int32, true),
-            ]),
-
-            17 => Schema::new(vec![Field::new("avg_yearly", DataType::Float64, true)]),
-
-            18 => Schema::new(vec![
-                Field::new("c_name", DataType::Utf8, true),
-                Field::new("c_custkey", DataType::Int32, true),
-                Field::new("o_orderkey", DataType::Int32, true),
-                Field::new("o_orderdate", DataType::Date32, true),
-                Field::new("o_totalprice", DataType::Float64, true),
-                Field::new("sum_l_quantity", DataType::Float64, true),
-            ]),
-
-            19 => Schema::new(vec![Field::new("revenue", DataType::Float64, true)]),
-
-            20 => Schema::new(vec![
-                Field::new("s_name", DataType::Utf8, true),
-                Field::new("s_address", DataType::Utf8, true),
-            ]),
-
-            21 => Schema::new(vec![
-                Field::new("s_name", DataType::Utf8, true),
-                Field::new("numwait", DataType::Int32, true),
-            ]),
-
-            22 => Schema::new(vec![
-                Field::new("cntrycode", DataType::Int32, true),
-                Field::new("numcust", DataType::Int32, true),
-                Field::new("totacctbal", DataType::Float64, true),
-            ]),
-
-            _ => unimplemented!(),
-        }
-    }
-
-    // convert expected schema to all utf8 so columns can be read as strings to be parsed separately
-    // this is due to the fact that the csv parser cannot handle leading/trailing spaces
-    fn string_schema(schema: Schema) -> Schema {
-        Schema::new(
-            schema
-                .fields()
-                .iter()
-                .map(|field| {
-                    Field::new(
-                        Field::name(&field),
-                        DataType::Utf8,
-                        Field::is_nullable(&field),
-                    )
-                })
-                .collect::<Vec<Field>>(),
-        )
-    }
-
-    // convert the schema to the same but with all columns set to nullable=true.
-    // this allows direct schema comparison ignoring nullable.
-    fn nullable_schema(schema: Arc<Schema>) -> Schema {
-        Schema::new(
-            schema
-                .fields()
-                .iter()
-                .map(|field| {
-                    Field::new(
-                        Field::name(&field),
-                        Field::data_type(&field).to_owned(),
-                        true,
-                    )
-                })
-                .collect::<Vec<Field>>(),
-        )
-    }
-
-    async fn run_query(n: usize) -> Result<()> {
-        // Tests running query with empty tables, to see whether they run succesfully.
-
-        let config = ExecutionConfig::new()
-            .with_concurrency(1)
-            .with_batch_size(10);
-        let mut ctx = ExecutionContext::with_config(config);
-
-        for &table in TABLES {
-            let schema = get_schema(table);
-            let batch = RecordBatch::new_empty(Arc::new(schema.to_owned()));
-
-            let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]])?;
-
-            ctx.register_table(table, Arc::new(provider))?;
-        }
-
-        let plan = create_logical_plan(&mut ctx, n)?;
-        execute_query(&mut ctx, &plan, false).await?;
-
-        Ok(())
-    }
-
-    async fn verify_query(n: usize) -> Result<()> {
-        if let Ok(path) = env::var("TPCH_DATA") {
-            // load expected answers from tpch-dbgen
-            // read csv as all strings, trim and cast to expected type as the csv string
-            // to value parser does not handle data with leading/trailing spaces
-            let mut ctx = ExecutionContext::new();
-            let schema = string_schema(get_answer_schema(n));
-            let options = CsvReadOptions::new()
-                .schema(&schema)
-                .delimiter(b'|')
-                .file_extension(".out");
-            let df = ctx.read_csv(&format!("{}/answers/q{}.out", path, n), options)?;
-            let df = df.select(
-                get_answer_schema(n)
-                    .fields()
-                    .iter()
-                    .map(|field| {
-                        Expr::Alias(
-                            Box::new(Cast {
-                                expr: Box::new(trim(col(Field::name(&field)))),
-                                data_type: Field::data_type(&field).to_owned(),
-                            }),
-                            Field::name(&field).to_string(),
-                        )
-                    })
-                    .collect::<Vec<Expr>>(),
-            )?;
-            let expected = df.collect().await?;
-
-            // run the query to compute actual results of the query
-            let opt = BenchmarkOpt {
-                query: n,
-                debug: false,
-                iterations: 1,
-                concurrency: 2,
-                batch_size: 8192,
-                path: PathBuf::from(path.to_string()),
-                file_format: "tbl".to_string(),
-                mem_table: false,
-                partitions: 16,
-            };
-            let actual = benchmark(opt).await?;
-
-            // assert schema equality without comparing nullable values
-            assert_eq!(
-                nullable_schema(expected[0].schema()),
-                nullable_schema(actual[0].schema())
-            );
-
-            // convert both datasets to Vec<Vec<String>> for simple comparison
-            let expected_vec = result_vec(&expected);
-            let actual_vec = result_vec(&actual);
-
-            // basic result comparison
-            assert_eq!(expected_vec.len(), actual_vec.len());
-
-            // compare each row. this works as all TPC-H queries have determinisically ordered results
-            for i in 0..actual_vec.len() {
-                assert_eq!(expected_vec[i], actual_vec[i]);
-            }
-        } else {
-            println!("TPCH_DATA environment variable not set, skipping test");
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion-examples/Cargo.toml b/rust/datafusion-examples/Cargo.toml
deleted file mode 100644
index c86e7ccbe3c..00000000000
--- a/rust/datafusion-examples/Cargo.toml
+++ /dev/null
@@ -1,39 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "datafusion-examples"
-description = "DataFusion usage examples"
-version = "4.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow", "query", "sql" ]
-edition = "2018"
-publish = false
-
-
-[dev-dependencies]
-datafusion = { path = "../datafusion" }
-arrow = { path = "../arrow" }
-prost = "0.7"
-arrow-flight = { path = "../arrow-flight" }
-tonic = "0.4"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
-futures = "0.3"
-num_cpus = "1.13.0"
diff --git a/rust/datafusion-examples/examples/README.md b/rust/datafusion-examples/examples/README.md
deleted file mode 100644
index 163ef3d952b..00000000000
--- a/rust/datafusion-examples/examples/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# DataFusion Examples
-
-## Single Process
-
-The examples `csv_sql.rs` and `parquet_sql.rs` demonstrate building a query plan from a SQL statement and then executing the query plan against local CSV and Parquet files, respectively.
-
-## Distributed
-
-The `flight-client.rs` and `flight-server.rs` examples demonstrate how to run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol.
\ No newline at end of file
diff --git a/rust/datafusion-examples/examples/csv_sql.rs b/rust/datafusion-examples/examples/csv_sql.rs
deleted file mode 100644
index 63fd36d44ce..00000000000
--- a/rust/datafusion-examples/examples/csv_sql.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::util::pretty;
-
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
-/// fetching results
-#[tokio::main]
-async fn main() -> Result<()> {
-    // create local execution context
-    let mut ctx = ExecutionContext::new();
-
-    let testdata = arrow::util::test_util::arrow_test_data();
-
-    // register csv file with the execution context
-    ctx.register_csv(
-        "aggregate_test_100",
-        &format!("{}/csv/aggregate_test_100.csv", testdata),
-        CsvReadOptions::new(),
-    )?;
-
-    // execute the query
-    let df = ctx.sql(
-        "SELECT c1, MIN(c12), MAX(c12) \
-        FROM aggregate_test_100 \
-        WHERE c11 > 0.1 AND c11 < 0.9 \
-        GROUP BY c1",
-    )?;
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/dataframe.rs b/rust/datafusion-examples/examples/dataframe.rs
deleted file mode 100644
index cba4d87f1e0..00000000000
--- a/rust/datafusion-examples/examples/dataframe.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::util::pretty;
-
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
-/// fetching results, using the DataFrame trait
-#[tokio::main]
-async fn main() -> Result<()> {
-    // create local execution context
-    let mut ctx = ExecutionContext::new();
-
-    let testdata = arrow::util::test_util::parquet_test_data();
-
-    let filename = &format!("{}/alltypes_plain.parquet", testdata);
-
-    // define the query using the DataFrame trait
-    let df = ctx
-        .read_parquet(filename)?
-        .select_columns(&["id", "bool_col", "timestamp_col"])?
-        .filter(col("id").gt(lit(1)))?;
-
-    // execute the query
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/dataframe_in_memory.rs b/rust/datafusion-examples/examples/dataframe_in_memory.rs
deleted file mode 100644
index de8552a3bba..00000000000
--- a/rust/datafusion-examples/examples/dataframe_in_memory.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::{Int32Array, StringArray};
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::record_batch::RecordBatch;
-use arrow::util::pretty;
-
-use datafusion::datasource::MemTable;
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates how to use the DataFrame API against in-memory data.
-#[tokio::main]
-async fn main() -> Result<()> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Int32, false),
-    ]));
-
-    // define data.
-    let batch = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
-            Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-        ],
-    )?;
-
-    // declare a new context. In spark API, this corresponds to a new spark SQLsession
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-    ctx.register_table("t", Arc::new(provider))?;
-    let df = ctx.table("t")?;
-
-    // construct an expression corresponding to "SELECT a, b FROM t WHERE b = 10" in SQL
-    let filter = col("b").eq(lit(10));
-
-    let df = df.select_columns(&["a", "b"])?.filter(filter)?;
-
-    // execute
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/flight_client.rs b/rust/datafusion-examples/examples/flight_client.rs
deleted file mode 100644
index 2c2954d5a02..00000000000
--- a/rust/datafusion-examples/examples/flight_client.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use arrow::datatypes::Schema;
-use arrow::util::pretty;
-
-use arrow_flight::flight_descriptor;
-use arrow_flight::flight_service_client::FlightServiceClient;
-use arrow_flight::utils::flight_data_to_arrow_batch;
-use arrow_flight::{FlightDescriptor, Ticket};
-
-/// This example shows how to wrap DataFusion with `FlightService` to support looking up schema information for
-/// Parquet files and executing SQL queries against them on a remote server.
-/// This example is run along-side the example `flight_server`.
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let testdata = arrow::util::test_util::parquet_test_data();
-
-    // Create Flight client
-    let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
-
-    // Call get_schema to get the schema of a Parquet file
-    let request = tonic::Request::new(FlightDescriptor {
-        r#type: flight_descriptor::DescriptorType::Path as i32,
-        cmd: vec![],
-        path: vec![format!("{}/alltypes_plain.parquet", testdata)],
-    });
-
-    let schema_result = client.get_schema(request).await?.into_inner();
-    let schema = Schema::try_from(&schema_result)?;
-    println!("Schema: {:?}", schema);
-
-    // Call do_get to execute a SQL query and receive results
-    let request = tonic::Request::new(Ticket {
-        ticket: "SELECT id FROM alltypes_plain".into(),
-    });
-
-    let mut stream = client.do_get(request).await?.into_inner();
-
-    // the schema should be the first message returned, else client should error
-    let flight_data = stream.message().await?.unwrap();
-    // convert FlightData to a stream
-    let schema = Arc::new(Schema::try_from(&flight_data)?);
-    println!("Schema: {:?}", schema);
-
-    // all the remaining stream messages should be dictionary and record batches
-    let mut results = vec![];
-    let dictionaries_by_field = vec![None; schema.fields().len()];
-    while let Some(flight_data) = stream.message().await? {
-        let record_batch = flight_data_to_arrow_batch(
-            &flight_data,
-            schema.clone(),
-            &dictionaries_by_field,
-        )?;
-        results.push(record_batch);
-    }
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/flight_server.rs b/rust/datafusion-examples/examples/flight_server.rs
deleted file mode 100644
index 79660dd1871..00000000000
--- a/rust/datafusion-examples/examples/flight_server.rs
+++ /dev/null
@@ -1,213 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-
-use futures::Stream;
-use tonic::transport::Server;
-use tonic::{Request, Response, Status, Streaming};
-
-use datafusion::datasource::parquet::ParquetTable;
-use datafusion::datasource::TableProvider;
-use datafusion::prelude::*;
-
-use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
-    HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-
-#[derive(Clone)]
-pub struct FlightServiceImpl {}
-
-#[tonic::async_trait]
-impl FlightService for FlightServiceImpl {
-    type HandshakeStream = Pin<
-        Box<dyn Stream<Item = Result<HandshakeResponse, Status>> + Send + Sync + 'static>,
-    >;
-    type ListFlightsStream =
-        Pin<Box<dyn Stream<Item = Result<FlightInfo, Status>> + Send + Sync + 'static>>;
-    type DoGetStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-    type DoPutStream =
-        Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + Sync + 'static>>;
-    type DoActionStream = Pin<
-        Box<
-            dyn Stream<Item = Result<arrow_flight::Result, Status>>
-                + Send
-                + Sync
-                + 'static,
-        >,
-    >;
-    type ListActionsStream =
-        Pin<Box<dyn Stream<Item = Result<ActionType, Status>> + Send + Sync + 'static>>;
-    type DoExchangeStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-
-    async fn get_schema(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        let request = request.into_inner();
-
-        let table = ParquetTable::try_new(&request.path[0], num_cpus::get()).unwrap();
-
-        let options = arrow::ipc::writer::IpcWriteOptions::default();
-        let schema_result = arrow_flight::utils::flight_schema_from_arrow_schema(
-            table.schema().as_ref(),
-            &options,
-        );
-
-        Ok(Response::new(schema_result))
-    }
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        let ticket = request.into_inner();
-        match std::str::from_utf8(&ticket.ticket) {
-            Ok(sql) => {
-                println!("do_get: {}", sql);
-
-                // create local execution context
-                let mut ctx = ExecutionContext::new();
-
-                let testdata = arrow::util::test_util::parquet_test_data();
-
-                // register parquet file with the execution context
-                ctx.register_parquet(
-                    "alltypes_plain",
-                    &format!("{}/alltypes_plain.parquet", testdata),
-                )
-                .map_err(to_tonic_err)?;
-
-                // create the DataFrame
-                let df = ctx.sql(sql).map_err(to_tonic_err)?;
-
-                // execute the query
-                let results = df.collect().await.map_err(to_tonic_err)?;
-                if results.is_empty() {
-                    return Err(Status::internal("There were no results from ticket"));
-                }
-
-                // add an initial FlightData message that sends schema
-                let options = arrow::ipc::writer::IpcWriteOptions::default();
-                let schema_flight_data =
-                    arrow_flight::utils::flight_data_from_arrow_schema(
-                        &df.schema().clone().into(),
-                        &options,
-                    );
-
-                let mut flights: Vec<Result<FlightData, Status>> =
-                    vec![Ok(schema_flight_data)];
-
-                let mut batches: Vec<Result<FlightData, Status>> = results
-                    .iter()
-                    .flat_map(|batch| {
-                        let (flight_dictionaries, flight_batch) =
-                            arrow_flight::utils::flight_data_from_arrow_batch(
-                                batch, &options,
-                            );
-                        flight_dictionaries
-                            .into_iter()
-                            .chain(std::iter::once(flight_batch))
-                            .map(Ok)
-                    })
-                    .collect();
-
-                // append batch vector to schema vector, so that the first message sent is the schema
-                flights.append(&mut batches);
-
-                let output = futures::stream::iter(flights);
-
-                Ok(Response::new(Box::pin(output) as Self::DoGetStream))
-            }
-            Err(e) => Err(Status::invalid_argument(format!("Invalid ticket: {:?}", e))),
-        }
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_put(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
-
-fn to_tonic_err(e: datafusion::error::DataFusionError) -> Status {
-    Status::internal(format!("{:?}", e))
-}
-
-/// This example shows how to wrap DataFusion with `FlightService` to support looking up schema information for
-/// Parquet files and executing SQL queries against them on a remote server.
-/// This example is run along-side the example `flight_client`.
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let addr = "0.0.0.0:50051".parse()?;
-    let service = FlightServiceImpl {};
-
-    let svc = FlightServiceServer::new(service);
-
-    println!("Listening on {:?}", addr);
-
-    Server::builder().add_service(svc).serve(addr).await?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/parquet_sql.rs b/rust/datafusion-examples/examples/parquet_sql.rs
deleted file mode 100644
index 8043d3296c8..00000000000
--- a/rust/datafusion-examples/examples/parquet_sql.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::util::pretty;
-
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
-/// fetching results
-#[tokio::main]
-async fn main() -> Result<()> {
-    // create local execution context
-    let mut ctx = ExecutionContext::new();
-
-    let testdata = arrow::util::test_util::parquet_test_data();
-
-    // register parquet file with the execution context
-    ctx.register_parquet(
-        "alltypes_plain",
-        &format!("{}/alltypes_plain.parquet", testdata),
-    )?;
-
-    // execute the query
-    let df = ctx.sql(
-        "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \
-        FROM alltypes_plain \
-        WHERE id > 1 AND tinyint_col < double_col",
-    )?;
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/simple_udaf.rs b/rust/datafusion-examples/examples/simple_udaf.rs
deleted file mode 100644
index 8086dfc47de..00000000000
--- a/rust/datafusion-examples/examples/simple_udaf.rs
+++ /dev/null
@@ -1,170 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// In this example we will declare a single-type, single return type UDAF that computes the geometric mean.
-/// The geometric mean is described here: https://en.wikipedia.org/wiki/Geometric_mean
-use arrow::{
-    array::Float32Array, array::Float64Array, datatypes::DataType,
-    record_batch::RecordBatch,
-};
-
-use datafusion::{error::Result, logical_plan::create_udaf, physical_plan::Accumulator};
-use datafusion::{prelude::*, scalar::ScalarValue};
-use std::sync::Arc;
-
-// create local execution context with an in-memory table
-fn create_context() -> Result<ExecutionContext> {
-    use arrow::datatypes::{Field, Schema};
-    use datafusion::datasource::MemTable;
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
-
-    // define data in two partitions
-    let batch1 = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
-    )?;
-    let batch2 = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float32Array::from(vec![64.0]))],
-    )?;
-
-    // declare a new context. In spark API, this corresponds to a new spark SQLsession
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![vec![batch1], vec![batch2]])?;
-    ctx.register_table("t", Arc::new(provider))?;
-    Ok(ctx)
-}
-
-/// A UDAF has state across multiple rows, and thus we require a `struct` with that state.
-#[derive(Debug)]
-struct GeometricMean {
-    n: u32,
-    prod: f64,
-}
-
-impl GeometricMean {
-    // how the struct is initialized
-    pub fn new() -> Self {
-        GeometricMean { n: 0, prod: 1.0 }
-    }
-}
-
-// UDAFs are built using the trait `Accumulator`, that offers DataFusion the necessary functions
-// to use them.
-impl Accumulator for GeometricMean {
-    // this function serializes our state to `ScalarValue`, which DataFusion uses
-    // to pass this state between execution stages.
-    // Note that this can be arbitrary data.
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![
-            ScalarValue::from(self.prod),
-            ScalarValue::from(self.n),
-        ])
-    }
-
-    // this function receives one entry per argument of this accumulator.
-    // DataFusion calls this function on every row, and expects this function to update the accumulator's state.
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        // this is a one-argument UDAF, and thus we use `0`.
-        let value = &values[0];
-        match value {
-            // here we map `ScalarValue` to our internal state. `Float64` indicates that this function
-            // only accepts Float64 as its argument (DataFusion does try to coerce arguments to this type)
-            //
-            // Note that `.map` here ensures that we ignore Nulls.
-            ScalarValue::Float64(e) => e.map(|value| {
-                self.prod *= value;
-                self.n += 1;
-            }),
-            _ => unreachable!(""),
-        };
-        Ok(())
-    }
-
-    // this function receives states from other accumulators (Vec<ScalarValue>)
-    // and updates the accumulator.
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        let prod = &states[0];
-        let n = &states[1];
-        match (prod, n) {
-            (ScalarValue::Float64(Some(prod)), ScalarValue::UInt32(Some(n))) => {
-                self.prod *= prod;
-                self.n += n;
-            }
-            _ => unreachable!(""),
-        };
-        Ok(())
-    }
-
-    // DataFusion expects this function to return the final value of this aggregator.
-    // in this case, this is the formula of the geometric mean
-    fn evaluate(&self) -> Result<ScalarValue> {
-        let value = self.prod.powf(1.0 / self.n as f64);
-        Ok(ScalarValue::from(value))
-    }
-
-    // Optimization hint: this trait also supports `update_batch` and `merge_batch`,
-    // that can be used to perform these operations on arrays instead of single values.
-    // By default, these methods call `update` and `merge` row by row
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let ctx = create_context()?;
-
-    // here is where we define the UDAF. We also declare its signature:
-    let geometric_mean = create_udaf(
-        // the name; used to represent it in plan descriptions and in the registry, to use in SQL.
-        "geo_mean",
-        // the input type; DataFusion guarantees that the first entry of `values` in `update` has this type.
-        DataType::Float64,
-        // the return type; DataFusion expects this to match the type returned by `evaluate`.
-        Arc::new(DataType::Float64),
-        // This is the accumulator factory; DataFusion uses it to create new accumulators.
-        Arc::new(|| Ok(Box::new(GeometricMean::new()))),
-        // This is the description of the state. `state()` must match the types here.
-        Arc::new(vec![DataType::Float64, DataType::UInt32]),
-    );
-
-    // get a DataFrame from the context
-    // this table has 1 column `a` f32 with values {2,4,8,64}, whose geometric mean is 8.0.
-    let df = ctx.table("t")?;
-
-    // perform the aggregation
-    let df = df.aggregate(vec![], vec![geometric_mean.call(vec![col("a")])])?;
-
-    // note that "a" is f32, not f64. DataFusion coerces it to match the UDAF's signature.
-
-    // execute the query
-    let results = df.collect().await?;
-
-    // downcast the array to the expected type
-    let result = results[0]
-        .column(0)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-
-    // verify that the calculation is correct
-    assert!((result.value(0) - 8.0).abs() < f64::EPSILON);
-    println!("The geometric mean of [2,4,8,64] is {}", result.value(0));
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/simple_udf.rs b/rust/datafusion-examples/examples/simple_udf.rs
deleted file mode 100644
index bfef1089a63..00000000000
--- a/rust/datafusion-examples/examples/simple_udf.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::{
-    array::{ArrayRef, Float32Array, Float64Array},
-    datatypes::DataType,
-    record_batch::RecordBatch,
-    util::pretty,
-};
-
-use datafusion::prelude::*;
-use datafusion::{error::Result, physical_plan::functions::make_scalar_function};
-use std::sync::Arc;
-
-// create local execution context with an in-memory table
-fn create_context() -> Result<ExecutionContext> {
-    use arrow::datatypes::{Field, Schema};
-    use datafusion::datasource::MemTable;
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Float32, false),
-        Field::new("b", DataType::Float64, false),
-    ]));
-
-    // define data.
-    let batch = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(Float32Array::from(vec![2.1, 3.1, 4.1, 5.1])),
-            Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-        ],
-    )?;
-
-    // declare a new context. In spark API, this corresponds to a new spark SQLsession
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-    ctx.register_table("t", Arc::new(provider))?;
-    Ok(ctx)
-}
-
-/// In this example we will declare a single-type, single return type UDF that exponentiates f64, a^b
-#[tokio::main]
-async fn main() -> Result<()> {
-    let mut ctx = create_context()?;
-
-    // First, declare the actual implementation of the calculation
-    let pow = |args: &[ArrayRef]| {
-        // in DataFusion, all `args` and output are dynamically-typed arrays, which means that we need to:
-        // 1. cast the values to the type we want
-        // 2. perform the computation for every element in the array (using a loop or SIMD) and construct the result
-
-        // this is guaranteed by DataFusion based on the function's signature.
-        assert_eq!(args.len(), 2);
-
-        // 1. cast both arguments to f64. These casts MUST be aligned with the signature or this function panics!
-        let base = &args[0]
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("cast failed");
-        let exponent = &args[1]
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("cast failed");
-
-        // this is guaranteed by DataFusion. We place it just to make it obvious.
-        assert_eq!(exponent.len(), base.len());
-
-        // 2. perform the computation
-        let array = base
-            .iter()
-            .zip(exponent.iter())
-            .map(|(base, exponent)| {
-                match (base, exponent) {
-                    // in arrow, any value can be null.
-                    // Here we decide to make our UDF to return null when either base or exponent is null.
-                    (Some(base), Some(exponent)) => Some(base.powf(exponent)),
-                    _ => None,
-                }
-            })
-            .collect::<Float64Array>();
-
-        // `Ok` because no error occurred during the calculation (we should add one if exponent was [0, 1[ and the base < 0 because that panics!)
-        // `Arc` because arrays are immutable, thread-safe, trait objects.
-        Ok(Arc::new(array) as ArrayRef)
-    };
-    // the function above expects an `ArrayRef`, but DataFusion may pass a scalar to a UDF.
-    // thus, we use `make_scalar_function` to decorare the closure so that it can handle both Arrays and Scalar values.
-    let pow = make_scalar_function(pow);
-
-    // Next:
-    // * give it a name so that it shows nicely when the plan is printed
-    // * declare what input it expects
-    // * declare its return type
-    let pow = create_udf(
-        "pow",
-        // expects two f64
-        vec![DataType::Float64, DataType::Float64],
-        // returns f64
-        Arc::new(DataType::Float64),
-        pow,
-    );
-
-    // at this point, we can use it or register it, depending on the use-case:
-    // * if the UDF is expected to be used throughout the program in different contexts,
-    //   we can register it, and call it later:
-    ctx.register_udf(pow.clone()); // clone is only required in this example because we show both usages
-
-    // * if the UDF is expected to be used directly in the scope, `.call` it directly:
-    let expr = pow.call(vec![col("a"), col("b")]);
-
-    // get a DataFrame from the context
-    let df = ctx.table("t")?;
-
-    // if we do not have `pow` in the scope and we registered it, we can get it from the registry
-    let pow = df.registry().udf("pow")?;
-    // equivalent to expr
-    let expr1 = pow.call(vec![col("a"), col("b")]);
-
-    // equivalent to `'SELECT pow(a, b), pow(a, b) AS pow1 FROM t'`
-    let df = df.select(vec![
-        expr,
-        // alias so that they have different column names
-        expr1.alias("pow1"),
-    ])?;
-
-    // note that "b" is f32, not f64. DataFusion coerces the types to match the UDF's signature.
-
-    // execute the query
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion/Cargo.toml b/rust/datafusion/Cargo.toml
deleted file mode 100644
index fd1c1b29590..00000000000
--- a/rust/datafusion/Cargo.toml
+++ /dev/null
@@ -1,99 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "datafusion"
-description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
-version = "4.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow", "query", "sql" ]
-include = [
-    "benches/*.rs",
-    "src/**/*.rs",
-    "Cargo.toml",
-]
-edition = "2018"
-
-[lib]
-name = "datafusion"
-path = "src/lib.rs"
-
-[[bin]]
-name = "datafusion-cli"
-path = "src/bin/main.rs"
-
-[features]
-default = ["cli", "crypto_expressions", "regex_expressions", "unicode_expressions"]
-cli = ["rustyline"]
-simd = ["arrow/simd"]
-crypto_expressions = ["md-5", "sha2"]
-regex_expressions = ["regex", "lazy_static"]
-unicode_expressions = ["unicode-segmentation"]
-
-[dependencies]
-ahash = "0.7"
-hashbrown = "0.11"
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT", features = ["prettyprint"] }
-parquet = { path = "../parquet", version = "4.0.0-SNAPSHOT", features = ["arrow"] }
-sqlparser = "0.9.0"
-clap = "2.33"
-rustyline = {version = "7.0", optional = true}
-paste = "^1.0"
-num_cpus = "1.13.0"
-chrono = "0.4"
-async-trait = "0.1.41"
-futures = "0.3"
-pin-project-lite= "^0.2.0"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
-tokio-stream = "0.1"
-log = "^0.4"
-md-5 = { version = "^0.9.1", optional = true }
-sha2 = { version = "^0.9.1", optional = true }
-ordered-float = "2.0"
-unicode-segmentation = { version = "^1.7.1", optional = true }
-regex = { version = "^1.4.3", optional = true }
-lazy_static = { version = "^1.4.0", optional = true }
-smallvec = { version = "1.6", features = ["union"] }
-
-[dev-dependencies]
-rand = "0.8"
-criterion = "0.3"
-tempfile = "3"
-doc-comment = "0.3"
-
-[[bench]]
-name = "aggregate_query_sql"
-harness = false
-
-[[bench]]
-name = "sort_limit_query_sql"
-harness = false
-
-[[bench]]
-name = "math_query_sql"
-harness = false
-
-[[bench]]
-name = "filter_query_sql"
-harness = false
-
-[[bench]]
-name = "scalar"
-harness = false
diff --git a/rust/datafusion/DEVELOPERS.md b/rust/datafusion/DEVELOPERS.md
deleted file mode 100644
index aa80cb71d3b..00000000000
--- a/rust/datafusion/DEVELOPERS.md
+++ /dev/null
@@ -1,92 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Developer's guide
-
-This section describes how you can get started at developing DataFusion.
-
-### Bootstrap environment
-
-DataFusion is written in Rust and it uses a standard rust toolkit:
-
-* `cargo build`
-* `cargo fmt` to format the code
-* `cargo test` to test
-* etc.
-
-## How to add a new scalar function
-
-Below is a checklist of what you need to do to add a new scalar function to DataFusion:
-
-* Add the actual implementation of the function:
-  * [here](src/physical_plan/string_expressions.rs) for string functions
-  * [here](src/physical_plan/math_expressions.rs) for math functions
-  * [here](src/physical_plan/datetime_expressions.rs) for datetime functions
-  * create a new module [here](src/physical_plan) for other functions
-* In [src/physical_plan/functions](src/physical_plan/functions.rs), add:
-  * a new variant to `BuiltinScalarFunction`
-  * a new entry to `FromStr` with the name of the function as called by SQL
-  * a new line in `return_type` with the expected return type of the function, given an incoming type
-  * a new line in `signature` with the signature of the function (number and types of its arguments)
-  * a new line in `create_physical_expr` mapping the built-in to the implementation
-  * tests to the function.
-* In [tests/sql.rs](tests/sql.rs), add a new test where the function is called through SQL against well known data and returns the expected result.
-* In [src/logical_plan/expr](src/logical_plan/expr.rs), add:
-  * a new entry of the `unary_scalar_expr!` macro for the new function.
-* In [src/logical_plan/mod](src/logical_plan/mod.rs), add:
-  * a new entry in the `pub use expr::{}` set.
-
-## How to add a new aggregate function
-
-Below is a checklist of what you need to do to add a new aggregate function to DataFusion:
-
-* Add the actual implementation of an `Accumulator` and `AggregateExpr`:
-  * [here](src/physical_plan/string_expressions.rs) for string functions
-  * [here](src/physical_plan/math_expressions.rs) for math functions
-  * [here](src/physical_plan/datetime_expressions.rs) for datetime functions
-  * create a new module [here](src/physical_plan) for other functions
-* In [src/physical_plan/aggregates](src/physical_plan/aggregates.rs), add:
-  * a new variant to `BuiltinAggregateFunction`
-  * a new entry to `FromStr` with the name of the function as called by SQL
-  * a new line in `return_type` with the expected return type of the function, given an incoming type
-  * a new line in `signature` with the signature of the function (number and types of its arguments)
-  * a new line in `create_aggregate_expr` mapping the built-in to the implementation
-  * tests to the function.
-* In [tests/sql.rs](tests/sql.rs), add a new test where the function is called through SQL against well known data and returns the expected result.
-
-## How to display plans graphically
-
-The query plans represented by `LogicalPlan` nodes can be graphically
-rendered using [Graphviz](http://www.graphviz.org/).
-
-To do so, save the output of the `display_graphviz` function to a file.:
-
-```rust
-// Create plan somehow...
-let mut output = File::create("/tmp/plan.dot")?;
-write!(output, "{}", plan.display_graphviz());
-```
-
-Then, use the `dot` command line tool to render it into a file that
-can be displayed. For example, the following command creates a
-`/tmp/plan.pdf` file:
-
-```bash
-dot -Tpdf < /tmp/plan.dot > /tmp/plan.pdf
-```
diff --git a/rust/datafusion/Dockerfile b/rust/datafusion/Dockerfile
deleted file mode 100644
index 97e82b4bbca..00000000000
--- a/rust/datafusion/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM rustlang/rust:nightly
-
-COPY format /arrow/format/
-COPY rust /arrow/rust/
-WORKDIR /arrow/rust/datafusion
-RUN cargo install --bin datafusion-cli --path .
-
-CMD ["datafusion-cli", "--data-path", "/data"]
diff --git a/rust/datafusion/README.md b/rust/datafusion/README.md
deleted file mode 100644
index d0bb7d38892..00000000000
--- a/rust/datafusion/README.md
+++ /dev/null
@@ -1,359 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# DataFusion
-
-<img src="docs/images/DataFusion-Logo-Dark.svg" width="256"/>
-
-DataFusion is an extensible query execution framework, written in
-Rust, that uses [Apache Arrow](https://arrow.apache.org) as its
-in-memory format.
-
-DataFusion supports both an SQL and a DataFrame API for building
-logical query plans as well as a query optimizer and execution engine
-capable of parallel execution against partitioned data sources (CSV
-and Parquet) using threads.
-
-## Use Cases
-
-DataFusion is used to create modern, fast and efficient data
-pipelines, ETL processes, and database systems, which need the
-performance of Rust and Apache Arrow and want to provide their users
-the convenience of an SQL interface or a DataFrame API.
-
-## Why DataFusion?
-
-* *High Performance*: Leveraging Rust and Arrow's memory model, DataFusion achieves very high performance
-* *Easy to Connect*: Being part of the Apache Arrow ecosystem (Arrow, Parquet and Flight), DataFusion works well with the rest of the big data ecosystem
-* *Easy to Embed*: Allowing extension at almost any point in its design, DataFusion can be tailored for your specific usecase
-* *High Quality*:  Extensively tested, both by itself and with the rest of the Arrow ecosystem, DataFusion can be used as the foundation for production systems.
-
-## Known Uses
-
-Here are some of the projects known to use DataFusion:
-
-* [Ballista](https://github.com/ballista-compute/ballista) Distributed Compute Platform
-* [Cloudfuse Buzz](https://github.com/cloudfuse-io/buzz-rust)
-* [Cube.js](https://github.com/cube-js/cube.js)
-* [datafusion-python](https://pypi.org/project/datafusion)
-* [delta-rs](https://github.com/delta-io/delta-rs)
-* [InfluxDB IOx](https://github.com/influxdata/influxdb_iox) Time Series Database
-* [ROAPI](https://github.com/roapi/roapi)
-
-(if you know of another project, please submit a PR to add a link!)
-
-## Example Usage
-
-Run a SQL query against data stored in a CSV:
-
-```rust
-use datafusion::prelude::*;
-use arrow::util::pretty::print_batches;
-use arrow::record_batch::RecordBatch;
-
-#[tokio::main]
-async fn main() -> datafusion::error::Result<()> {
-  // create the dataframe
-  let mut ctx = ExecutionContext::new();
-  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-
-  let mut ctx = ExecutionContext::new();
-  ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
-
-  // create a plan to run a SQL query
-  let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
-
-  // execute and print results
-  let results: Vec<RecordBatch> = df.collect().await?;
-  print_batches(&results)?;
-  Ok(())
-}
-```
-
-Use the DataFrame API to process data stored in a CSV:
-
-```rust
-use datafusion::prelude::*;
-use arrow::util::pretty::print_batches;
-use arrow::record_batch::RecordBatch;
-
-#[tokio::main]
-async fn main() -> datafusion::error::Result<()> {
-  // create the dataframe
-  let mut ctx = ExecutionContext::new();
-  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-
-  let df = df.filter(col("a").lt_eq(col("b")))?
-           .aggregate(vec![col("a")], vec![min(col("b"))])?
-           .limit(100)?;
-
-  // execute and print results
-  let results: Vec<RecordBatch> = df.collect().await?;
-  print_batches(&results)?;
-  Ok(())
-}
-```
-
-Both of these examples will produce
-
-```text
-+---+--------+
-| a | MIN(b) |
-+---+--------+
-| 1 | 2      |
-+---+--------+
-```
-
-
-
-## Using DataFusion as a library
-
-DataFusion is [published on crates.io](https://crates.io/crates/datafusion), and is [well documented on docs.rs](https://docs.rs/datafusion/).
-
-To get started, add the following to your `Cargo.toml` file:
-
-```toml
-[dependencies]
-datafusion = "4.0.0-SNAPSHOT"
-```
-
-## Using DataFusion as a binary
-
-DataFusion also includes a simple command-line interactive SQL utility. See the [CLI reference](docs/cli.md) for more information.
-
-# Status
-
-## General
-
-- [x] SQL Parser
-- [x] SQL Query Planner
-- [x] Query Optimizer
- - [x] Constant folding
- - [x] Join Reordering
- - [x] Limit Pushdown
- - [x] Projection push down
- - [x] Predicate push down
-- [x] Type coercion
-- [x] Parallel query execution
-
-## SQL Support
-
-- [x] Projection
-- [x] Filter (WHERE)
-- [x] Filter post-aggregate (HAVING)
-- [x] Limit
-- [x] Aggregate
-- [x] Common math functions
-- [x] cast
-- [x] try_cast
-- Postgres compatible String functions
-  - [x] ascii
-  - [x] bit_length
-  - [x] btrim
-  - [x] char_length
-  - [x] character_length
-  - [x] chr
-  - [x] concat
-  - [x] concat_ws
-  - [x] initcap
-  - [x] left
-  - [x] length
-  - [x] lpad
-  - [x] ltrim
-  - [x] octet_length
-  - [x] regexp_replace
-  - [x] repeat
-  - [x] replace
-  - [x] reverse
-  - [x] right
-  - [x] rpad
-  - [x] rtrim
-  - [x] split_part
-  - [x] starts_with
-  - [x] strpos
-  - [x] substr
-  - [x] to_hex
-  - [x] translate
-  - [x] trim
-- Miscellaneous/Boolean functions
-  - [x] nullif
-- Common date/time functions
-  - [ ] Basic date functions
-  - [ ] Basic time functions
-  - [x] Basic timestamp functions
-- nested functions
-  - [x] Array of columns
-- [x] Schema Queries
-  - [x] SHOW TABLES
-  - [x] SHOW COLUMNS
-  - [x] information_schema.{tables, columns}
-  - [ ] information_schema other views
-- [x] Sorting
-- [ ] Nested types
-- [ ] Lists
-- [x] Subqueries
-- [x] Common table expressions
-- [ ] Set Operations
-  - [x] UNION ALL
-  - [ ] UNION
-  - [ ] INTERSECT
-  - [ ] MINUS
-- [x] Joins
-  - [x] INNER JOIN
-  - [ ] CROSS JOIN
-  - [ ] OUTER JOIN
-- [ ] Window
-
-## Data Sources
-
-- [x] CSV
-- [x] Parquet primitive types
-- [ ] Parquet nested types
-
-
-## Extensibility
-
-DataFusion is designed to be extensible at all points. To that end, you can provide your own custom:
-
-- [x] User Defined Functions (UDFs)
-- [x] User Defined Aggregate Functions (UDAFs)
-- [x] User Defined Table Source (`TableProvider`) for tables
-- [x] User Defined `Optimizer` passes (plan rewrites)
-- [x] User Defined `LogicalPlan` nodes
-- [x] User Defined `ExecutionPlan` nodes
-
-
-# Supported SQL
-
-This library currently supports many SQL constructs, including
-
-* `CREATE EXTERNAL TABLE X STORED AS PARQUET LOCATION '...';` to register a table's locations
-* `SELECT ... FROM ...` together with any expression
-* `ALIAS` to name an expression
-* `CAST` to change types, including e.g. `Timestamp(Nanosecond, None)`
-* most mathematical unary and binary expressions such as `+`, `/`, `sqrt`, `tan`, `>=`.
-* `WHERE` to filter
-* `GROUP BY` together with one of the following aggregations: `MIN`, `MAX`, `COUNT`, `SUM`, `AVG`
-* `ORDER BY` together with an expression and optional `ASC` or `DESC` and also optional `NULLS FIRST` or `NULLS LAST`
-
-
-## Supported Functions
-
-DataFusion strives to implement a subset of the [PostgreSQL SQL dialect](https://www.postgresql.org/docs/current/functions.html) where possible. We explicitly choose a single dialect to maximize interoperability with other tools and allow reuse of the PostgreSQL documents and tutorials as much as possible.
-
-Currently, only a subset of the PosgreSQL dialect is implemented, and we will document any deviations.
-
-## Schema Metadata / Information Schema Support
-
-DataFusion supports the showing metadata about the tables available. This information can be accessed using the views of the ISO SQL `information_schema` schema or the DataFusion specific `SHOW TABLES` and `SHOW COLUMNS` commands.
-
-More information can be found in the [Postgres docs](https://www.postgresql.org/docs/13/infoschema-schema.html)).
-
-
-To show tables available for use in DataFusion, use the `SHOW TABLES`  command or the `information_schema.tables` view:
-
-```sql
-> show tables;
-+---------------+--------------------+------------+------------+
-| table_catalog | table_schema       | table_name | table_type |
-+---------------+--------------------+------------+------------+
-| datafusion    | public             | t          | BASE TABLE |
-| datafusion    | information_schema | tables     | VIEW       |
-+---------------+--------------------+------------+------------+
-
-> select * from information_schema.tables;
-
-+---------------+--------------------+------------+--------------+
-| table_catalog | table_schema       | table_name | table_type   |
-+---------------+--------------------+------------+--------------+
-| datafusion    | public             | t          | BASE TABLE   |
-| datafusion    | information_schema | TABLES     | SYSTEM TABLE |
-+---------------+--------------------+------------+--------------+
-```
-
-To show the schema of a table in DataFusion, use the `SHOW COLUMNS`  command or the or `information_schema.columns` view:
-
-```sql
-> show columns from t;
-+---------------+--------------+------------+-------------+-----------+-------------+
-| table_catalog | table_schema | table_name | column_name | data_type | is_nullable |
-+---------------+--------------+------------+-------------+-----------+-------------+
-| datafusion    | public       | t          | a           | Int32     | NO          |
-| datafusion    | public       | t          | b           | Utf8      | NO          |
-| datafusion    | public       | t          | c           | Float32   | NO          |
-+---------------+--------------+------------+-------------+-----------+-------------+
-
->   select table_name, column_name, ordinal_position, is_nullable, data_type from information_schema.columns;
-+------------+-------------+------------------+-------------+-----------+
-| table_name | column_name | ordinal_position | is_nullable | data_type |
-+------------+-------------+------------------+-------------+-----------+
-| t          | a           | 0                | NO          | Int32     |
-| t          | b           | 1                | NO          | Utf8      |
-| t          | c           | 2                | NO          | Float32   |
-+------------+-------------+------------------+-------------+-----------+
-```
-
-
-
-## Supported Data Types
-
-DataFusion uses Arrow, and thus the Arrow type system, for query
-execution. The SQL types from
-[sqlparser-rs](https://github.com/ballista-compute/sqlparser-rs/blob/main/src/ast/data_type.rs#L57)
-are mapped to Arrow types according to the following table
-
-
-| SQL Data Type   | Arrow DataType                   |
-| --------------- | -------------------------------- |
-| `CHAR`          | `Utf8`                           |
-| `VARCHAR`       | `Utf8`                           |
-| `UUID`          | *Not yet supported*              |
-| `CLOB`          | *Not yet supported*              |
-| `BINARY`        | *Not yet supported*              |
-| `VARBINARY`     | *Not yet supported*              |
-| `DECIMAL`       | `Float64`                        |
-| `FLOAT`         | `Float32`                        |
-| `SMALLINT`      | `Int16`                          |
-| `INT`           | `Int32`                          |
-| `BIGINT`        | `Int64`                          |
-| `REAL`          | `Float64`                        |
-| `DOUBLE`        | `Float64`                        |
-| `BOOLEAN`       | `Boolean`                        |
-| `DATE`          | `Date32`                         |
-| `TIME`          | `Time64(TimeUnit::Millisecond)`  |
-| `TIMESTAMP`     | `Date64`                         |
-| `INTERVAL`      | *Not yet supported*              |
-| `REGCLASS`      | *Not yet supported*              |
-| `TEXT`          | *Not yet supported*              |
-| `BYTEA`         | *Not yet supported*              |
-| `CUSTOM`        | *Not yet supported*              |
-| `ARRAY`         | *Not yet supported*              |
-
-
-# Architecture Overview
-
-There is no formal document describing DataFusion's architecture yet, but the following presentations offer a good overview of its different components and how they interact together.
-
-* (March 2021): The DataFusion architecture is described in *Query Engine Design and the Rust-Based DataFusion in Apache Arrow*: [recording](https://www.youtube.com/watch?v=K6eCAVEk4kU) (DataFusion content starts ~ 15 minutes in) and [slides](https://www.slideshare.net/influxdata/influxdb-iox-tech-talks-query-engine-design-and-the-rustbased-datafusion-in-apache-arrow-244161934)
-* (Feburary 2021): How DataFusion is used within the Ballista Project is described in *Ballista: Distributed Compute with Rust and Apache Arrow: [recording](https://www.youtube.com/watch?v=ZZHQaOap9pQ)
-
-
-# Developer's guide
-
-Please see [Developers Guide](DEVELOPERS.md) for information about developing DataFusion.
diff --git a/rust/datafusion/benches/aggregate_query_sql.rs b/rust/datafusion/benches/aggregate_query_sql.rs
deleted file mode 100644
index 8f1a97e198d..00000000000
--- a/rust/datafusion/benches/aggregate_query_sql.rs
+++ /dev/null
@@ -1,248 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng};
-use std::sync::{Arc, Mutex};
-use tokio::runtime::Runtime;
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::{
-    array::Float32Array,
-    array::Float64Array,
-    array::StringArray,
-    array::UInt64Array,
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-
-use datafusion::datasource::MemTable;
-use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
-
-pub fn seedable_rng() -> StdRng {
-    StdRng::seed_from_u64(42)
-}
-
-fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
-    // execute the query
-    let df = ctx.lock().unwrap().sql(&sql).unwrap();
-    rt.block_on(df.collect()).unwrap();
-}
-
-fn create_data(size: usize, null_density: f64) -> Vec<Option<f64>> {
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f64>() > null_density {
-                None
-            } else {
-                Some(rng.gen::<f64>())
-            }
-        })
-        .collect()
-}
-
-fn create_integer_data(size: usize, value_density: f64) -> Vec<Option<u64>> {
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f64>() > value_density {
-                None
-            } else {
-                Some(rng.gen::<u64>())
-            }
-        })
-        .collect()
-}
-
-fn create_context(
-    partitions_len: usize,
-    array_len: usize,
-    batch_size: usize,
-) -> Result<Arc<Mutex<ExecutionContext>>> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("utf8", DataType::Utf8, false),
-        Field::new("f32", DataType::Float32, false),
-        Field::new("f64", DataType::Float64, false),
-        // This field will contain integers randomly selected from a large
-        // range of values, i.e. [0, u64::MAX], such that there are none (or
-        // very few) repeated values.
-        Field::new("u64_wide", DataType::UInt64, false),
-        // This field will contain integers randomly selected from a narrow
-        // range of values such that there are a few distinct values, but they
-        // are repeated often.
-        Field::new("u64_narrow", DataType::UInt64, false),
-    ]));
-
-    let mut rng = seedable_rng();
-
-    // define data.
-    let partitions = (0..partitions_len)
-        .map(|_| {
-            (0..array_len / batch_size / partitions_len)
-                .map(|i| {
-                    // the 4 here is the number of different keys.
-                    // a higher number increase sparseness
-                    let vs = vec![0, 1, 2, 3];
-                    let keys: Vec<String> = (0..batch_size)
-                        .map(
-                            // use random numbers to avoid spurious compiler optimizations wrt to branching
-                            |_| format!("hi{:?}", vs.choose(&mut rng)),
-                        )
-                        .collect();
-                    let keys: Vec<&str> = keys.iter().map(|e| &**e).collect();
-
-                    let values = create_data(batch_size, 0.5);
-
-                    // Integer values between [0, u64::MAX].
-                    let integer_values_wide = create_integer_data(batch_size, 9.0);
-
-                    // Integer values between [0, 9].
-                    let integer_values_narrow_choices = (0..10).collect::<Vec<u64>>();
-                    let integer_values_narrow = (0..batch_size)
-                        .map(|_| *integer_values_narrow_choices.choose(&mut rng).unwrap())
-                        .collect::<Vec<u64>>();
-
-                    RecordBatch::try_new(
-                        schema.clone(),
-                        vec![
-                            Arc::new(StringArray::from(keys)),
-                            Arc::new(Float32Array::from(vec![i as f32; batch_size])),
-                            Arc::new(Float64Array::from(values)),
-                            Arc::new(UInt64Array::from(integer_values_wide)),
-                            Arc::new(UInt64Array::from(integer_values_narrow)),
-                        ],
-                    )
-                    .unwrap()
-                })
-                .collect::<Vec<_>>()
-        })
-        .collect::<Vec<_>>();
-
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, partitions)?;
-    ctx.register_table("t", Arc::new(provider))?;
-
-    Ok(Arc::new(Mutex::new(ctx)))
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let partitions_len = 8;
-    let array_len = 32768 * 2; // 2^16
-    let batch_size = 2048; // 2^11
-    let ctx = create_context(partitions_len, array_len, batch_size).unwrap();
-
-    c.bench_function("aggregate_query_no_group_by 15 12", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_no_group_by_min_max_f64", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT MIN(f64), MAX(f64) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_no_group_by_count_distinct_wide", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT COUNT(DISTINCT u64_wide) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_no_group_by_count_distinct_narrow", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT COUNT(DISTINCT u64_narrow) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT utf8, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t GROUP BY utf8",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by_with_filter", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT utf8, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t \
-                 WHERE f32 > 10 AND f32 < 20 GROUP BY utf8",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by_u64 15 12", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT u64_narrow, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t GROUP BY u64_narrow",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by_with_filter_u64 15 12", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT u64_narrow, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t \
-                 WHERE f32 > 10 AND f32 < 20 GROUP BY u64_narrow",
-            )
-        })
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/filter_query_sql.rs b/rust/datafusion/benches/filter_query_sql.rs
deleted file mode 100644
index 8600bdc88c6..00000000000
--- a/rust/datafusion/benches/filter_query_sql.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::{
-    array::{Float32Array, Float64Array},
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion::prelude::ExecutionContext;
-use datafusion::{datasource::MemTable, error::Result};
-use futures::executor::block_on;
-use std::sync::Arc;
-
-async fn query(ctx: &mut ExecutionContext, sql: &str) {
-    // execute the query
-    let df = ctx.sql(&sql).unwrap();
-    let results = df.collect().await.unwrap();
-
-    // display the relation
-    for _batch in results {
-        // println!("num_rows: {}", _batch.num_rows());
-    }
-}
-
-fn create_context(array_len: usize, batch_size: usize) -> Result<ExecutionContext> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("f32", DataType::Float32, false),
-        Field::new("f64", DataType::Float64, false),
-    ]));
-
-    // define data.
-    let batches = (0..array_len / batch_size)
-        .map(|i| {
-            RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Float32Array::from(vec![i as f32; batch_size])),
-                    Arc::new(Float64Array::from(vec![i as f64; batch_size])),
-                ],
-            )
-            .unwrap()
-        })
-        .collect::<Vec<_>>();
-
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![batches])?;
-    ctx.register_table("t", Arc::new(provider))?;
-
-    Ok(ctx)
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let array_len = 524_288; // 2^19
-    let batch_size = 4096; // 2^12
-
-    c.bench_function("filter_array", |b| {
-        let mut ctx = create_context(array_len, batch_size).unwrap();
-        b.iter(|| block_on(query(&mut ctx, "select f32, f64 from t where f32 >= f64")))
-    });
-
-    c.bench_function("filter_scalar", |b| {
-        let mut ctx = create_context(array_len, batch_size).unwrap();
-        b.iter(|| {
-            block_on(query(
-                &mut ctx,
-                "select f32, f64 from t where f32 >= 250 and f64 > 250",
-            ))
-        })
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/math_query_sql.rs b/rust/datafusion/benches/math_query_sql.rs
deleted file mode 100644
index 1aaa2d3403c..00000000000
--- a/rust/datafusion/benches/math_query_sql.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use std::sync::{Arc, Mutex};
-
-use tokio::runtime::Runtime;
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::{
-    array::{Float32Array, Float64Array},
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-use datafusion::error::Result;
-
-use datafusion::datasource::MemTable;
-use datafusion::execution::context::ExecutionContext;
-
-fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
-    // execute the query
-    let df = ctx.lock().unwrap().sql(&sql).unwrap();
-    rt.block_on(df.collect()).unwrap();
-}
-
-fn create_context(
-    array_len: usize,
-    batch_size: usize,
-) -> Result<Arc<Mutex<ExecutionContext>>> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("f32", DataType::Float32, false),
-        Field::new("f64", DataType::Float64, false),
-    ]));
-
-    // define data.
-    let batches = (0..array_len / batch_size)
-        .map(|i| {
-            RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Float32Array::from(vec![i as f32; batch_size])),
-                    Arc::new(Float64Array::from(vec![i as f64; batch_size])),
-                ],
-            )
-            .unwrap()
-        })
-        .collect::<Vec<_>>();
-
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![batches])?;
-    ctx.register_table("t", Arc::new(provider))?;
-
-    Ok(Arc::new(Mutex::new(ctx)))
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let array_len = 1048576; // 2^20
-    let batch_size = 512; // 2^9
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_20_9", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-
-    let array_len = 1048576; // 2^20
-    let batch_size = 4096; // 2^12
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_20_12", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-
-    let array_len = 4194304; // 2^22
-    let batch_size = 4096; // 2^12
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_22_12", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-
-    let array_len = 4194304; // 2^22
-    let batch_size = 16384; // 2^14
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_22_14", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/scalar.rs b/rust/datafusion/benches/scalar.rs
deleted file mode 100644
index 30f21a964d5..00000000000
--- a/rust/datafusion/benches/scalar.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion::scalar::ScalarValue;
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("to_array_of_size 100000", |b| {
-        let scalar = ScalarValue::Int32(Some(100));
-
-        b.iter(|| assert_eq!(scalar.to_array_of_size(100000).null_count(), 0))
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/sort_limit_query_sql.rs b/rust/datafusion/benches/sort_limit_query_sql.rs
deleted file mode 100644
index be065f32e00..00000000000
--- a/rust/datafusion/benches/sort_limit_query_sql.rs
+++ /dev/null
@@ -1,148 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use std::sync::{Arc, Mutex};
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::datatypes::{DataType, Field, Schema};
-
-use datafusion::datasource::{CsvFile, CsvReadOptions, MemTable};
-use datafusion::execution::context::ExecutionContext;
-
-use tokio::runtime::Runtime;
-
-fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
-    // execute the query
-    let df = ctx.lock().unwrap().sql(&sql).unwrap();
-    rt.block_on(df.collect()).unwrap();
-}
-
-fn create_context() -> Arc<Mutex<ExecutionContext>> {
-    // define schema for data source (csv file)
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::UInt32, false),
-        Field::new("c3", DataType::Int8, false),
-        Field::new("c4", DataType::Int16, false),
-        Field::new("c5", DataType::Int32, false),
-        Field::new("c6", DataType::Int64, false),
-        Field::new("c7", DataType::UInt8, false),
-        Field::new("c8", DataType::UInt16, false),
-        Field::new("c9", DataType::UInt32, false),
-        Field::new("c10", DataType::UInt64, false),
-        Field::new("c11", DataType::Float32, false),
-        Field::new("c12", DataType::Float64, false),
-        Field::new("c13", DataType::Utf8, false),
-    ]));
-
-    let testdata = arrow::util::test_util::arrow_test_data();
-
-    // create CSV data source
-    let csv = CsvFile::try_new(
-        &format!("{}/csv/aggregate_test_100.csv", testdata),
-        CsvReadOptions::new().schema(&schema),
-    )
-    .unwrap();
-
-    let rt = Runtime::new().unwrap();
-
-    let ctx_holder: Arc<Mutex<Vec<Arc<Mutex<ExecutionContext>>>>> =
-        Arc::new(Mutex::new(vec![]));
-
-    let partitions = 16;
-
-    rt.block_on(async {
-        let mem_table = MemTable::load(Arc::new(csv), 16 * 1024, Some(partitions))
-            .await
-            .unwrap();
-
-        // create local execution context
-        let mut ctx = ExecutionContext::new();
-        ctx.state.lock().unwrap().config.concurrency = 1;
-        ctx.register_table("aggregate_test_100", Arc::new(mem_table))
-            .unwrap();
-        ctx_holder.lock().unwrap().push(Arc::new(Mutex::new(ctx)))
-    });
-
-    let ctx = ctx_holder.lock().unwrap().get(0).unwrap().clone();
-    ctx
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("sort_and_limit_by_int", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c6, c10 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c6
-                 LIMIT 10",
-            )
-        })
-    });
-
-    c.bench_function("sort_and_limit_by_float", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c12 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c13
-                 LIMIT 10",
-            )
-        })
-    });
-
-    c.bench_function("sort_and_limit_lex_by_int", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c6, c10 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c6 DESC, c10 DESC
-                 LIMIT 10",
-            )
-        })
-    });
-
-    c.bench_function("sort_and_limit_lex_by_string", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c6, c10 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c1, c13
-                 LIMIT 10",
-            )
-        })
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/docs/cli.md b/rust/datafusion/docs/cli.md
deleted file mode 100644
index aeacdeee04a..00000000000
--- a/rust/datafusion/docs/cli.md
+++ /dev/null
@@ -1,95 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# DataFusion CLI
-
-The DataFusion CLI is a command-line interactive SQL utility that allows queries to be executed against CSV and Parquet files. It is a convenient way to try DataFusion out with your own data sources.
-
-## Run using Cargo
-
-Use the following commands to clone this repository and run the CLI. This will require the Rust toolchain to be installed. Rust can be installed from [https://rustup.rs/](https://rustup.rs/).
-
-```sh
-git clone https://github.com/apache/arrow
-cd arrow/rust/datafusion
-cargo run --bin datafusion-cli --release
-```
-
-## Run using Docker
-
-Use the following commands to clone this repository and build a Docker image containing the CLI tool. Note that there is `.dockerignore` file in the root of the repository that may need to be deleted in order for this to work.
-
-```sh
-git clone https://github.com/apache/arrow
-cd arrow
-docker build -f rust/datafusion/Dockerfile . --tag datafusion-cli
-docker run -it -v $(your_data_location):/data datafusion-cli
-```
-
-## Usage
-
-```
-USAGE:
-    datafusion-cli [OPTIONS]
-
-FLAGS:
-    -h, --help       Prints help information
-    -V, --version    Prints version information
-
-OPTIONS:
-    -c, --batch-size <batch-size>    The batch size of each query, default value is 1048576
-    -p, --data-path <data-path>      Path to your data, default to current directory
-```
-
-Type `exit` or `quit` to exit the CLI.
-
-## Registering Parquet Data Sources
-
-Parquet data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is not necessary to provide schema information for Parquet files.
-
-```sql
-CREATE EXTERNAL TABLE taxi 
-STORED AS PARQUET
-LOCATION '/mnt/nyctaxi/tripdata.parquet';
-```
-
-## Registering CSV Data Sources
-
-CSV data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is necessary to provide schema information for CSV files since DataFusion does not automatically infer the schema when using SQL to query CSV files.
-
-```sql
-CREATE EXTERNAL TABLE test (
-    c1  VARCHAR NOT NULL,
-    c2  INT NOT NULL,
-    c3  SMALLINT NOT NULL,
-    c4  SMALLINT NOT NULL,
-    c5  INT NOT NULL,
-    c6  BIGINT NOT NULL,
-    c7  SMALLINT NOT NULL,
-    c8  INT NOT NULL,
-    c9  BIGINT NOT NULL,
-    c10 VARCHAR NOT NULL,
-    c11 FLOAT NOT NULL,
-    c12 DOUBLE NOT NULL,
-    c13 VARCHAR NOT NULL
-)
-STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_test_100.csv';
-```
diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Dark.png b/rust/datafusion/docs/images/DataFusion-Logo-Dark.png
deleted file mode 100644
index cc60f12a0e4..00000000000
Binary files a/rust/datafusion/docs/images/DataFusion-Logo-Dark.png and /dev/null differ
diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Dark.svg b/rust/datafusion/docs/images/DataFusion-Logo-Dark.svg
deleted file mode 100644
index e16f244430e..00000000000
--- a/rust/datafusion/docs/images/DataFusion-Logo-Dark.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#f3971f;}.cls-3{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Dark</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43,23"/><path class="cls-1" d="M279.27,33.78c2.93,2.93-6.55,19.11-22,39.28"/><path class="cls-1" d="M208.83,127.75c-16.9,16.49-31,27-34,24.41"/><path class="cls-1" d="M227,19.05c3.86,0,7.25,18.31,9.11,39.88"/><path class="cls-1" d="M235.79,127.32c-1.91,19-5.12,37.24-8.75,37.24"/><path d="M0,.43H11.23a22,22,0,0,1,9.69,2.13,17.62,17.62,0,0,1,7.24,6.49A18.43,18.43,0,0,1,31,19.27a18.39,18.39,0,0,1-2.88,10.31,17.8,17.8,0,0,1-7.32,6.49,22.44,22.44,0,0,1-9.72,2.1H0ZM2.77,3.09V35.51H11a20,20,0,0,0,6.23-1,18.25,18.25,0,0,0,5.45-2.91,13.58,13.58,0,0,0,4-5.13,17.18,17.18,0,0,0,1.49-7.27,16.82,16.82,0,0,0-1.52-7.29,13.19,13.19,0,0,0-4-5.06A18.94,18.94,0,0,0,11.13,3.09Z"/><path d="M68.88,26H51.65L47,38.17H44.24L59,0H61.7L76.5,38.17H73.63ZM68,23.58,60.21,3.71,52.57,23.58Z"/><path d="M112.87,3.09H100.35V38.17H97.59V3.09H85.5V.43h27.37Z"/><path d="M146.45,26H129.21l-4.68,12.19h-2.72L136.56,0h2.71l14.8,38.17H151.2Zm-.93-2.4L137.78,3.71l-7.64,19.87Z"/><path d="M35.64,62.68H6.7a1.2,1.2,0,0,0-1.2,1.2V89.33a1.2,1.2,0,0,0,1.2,1.2H27.78a1.2,1.2,0,0,1,1.2,1.2v2.48a1.2,1.2,0,0,1-1.2,1.2H6.7a1.2,1.2,0,0,0-1.2,1.2v30.75a1,1,0,0,1-1,1H1.3a1,1,0,0,1-1-1V58.9a1.2,1.2,0,0,1,1.2-1.2h34.1a1.21,1.21,0,0,1,1.21,1.2v2.57A1.21,1.21,0,0,1,35.64,62.68Z"/><path d="M104,57.7a.94.94,0,0,1,1,.95v45q-.1,11.83-8,19.35t-19.85,7.51q-12.14,0-19.95-7.46t-7.91-19.5V58.65a1,1,0,0,1,1-.95h3.18a1,1,0,0,1,.95.95v45a21.46,21.46,0,0,0,6.61,15.76q6.42,6.12,16.17,6.12a22.55,22.55,0,0,0,16.12-6.12q6.47-6.11,6.57-15.76v-45a.94.94,0,0,1,.94-.95Z"/><path d="M139.56,55.41a29,29,0,0,1,13.79,3.05,1.09,1.09,0,0,1,.42,1.48l-1.3,2.37a1.1,1.1,0,0,1-1.38.48,31.23,31.23,0,0,0-11.93-2.5q-7.66,0-11.94,3.77A12.45,12.45,0,0,0,123,73.87a12.09,12.09,0,0,0,.5,3.62,8.63,8.63,0,0,0,1.85,3.18c.9,1,1.69,1.88,2.36,2.58a13.84,13.84,0,0,0,3.41,2.31l3.6,1.9,4.18,2.19,4,2.08q7.9,4.08,11.78,8.2t4,11.6q.1,9-6.61,14a26.07,26.07,0,0,1-16,5,36.05,36.05,0,0,1-10.7-1.69,37.09,37.09,0,0,1-7.41-3.07,1.1,1.1,0,0,1-.36-1.56L119,122a1.08,1.08,0,0,1,1.41-.35,33.62,33.62,0,0,0,16.54,4A20.46,20.46,0,0,0,148.32,122q5-3.5,4.87-10.47a13,13,0,0,0-1.19-5.19,12,12,0,0,0-3.37-4.13,31.66,31.66,0,0,0-4.27-3L138.73,96l-5.63-3.21-5.63-3.2a25.57,25.57,0,0,1-4.82-3.67,16,16,0,0,1-3.78-5.33,16.21,16.21,0,0,1-1.3-6.56q0-8.86,6.32-13.73T139.56,55.41Z"/><path d="M175,128.24h-3a1.09,1.09,0,0,1-1.09-1.09V58.79A1.09,1.09,0,0,1,172,57.7h3a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,175,128.24Z"/><path class="cls-2" d="M227,55.41a39.08,39.08,0,0,1,20.14,5.67A37.52,37.52,0,0,1,256,68.35,33.31,33.31,0,0,1,262.21,79a39.44,39.44,0,0,1,2.38,13.93q-.09,15.42-9.65,25.67Q244.31,130.23,227,130.53a39.35,39.35,0,0,1-10.05-1.34,39.75,39.75,0,0,1-18.85-11.7,33.13,33.13,0,0,1-6.22-10.64,39.18,39.18,0,0,1-2.39-13.93q0-16.11,10.79-26.81T227,55.41Zm0,4.68a32.37,32.37,0,0,0-14.18,3A27.74,27.74,0,0,0,202.71,71a37.51,37.51,0,0,0-5.77,10.5,33.82,33.82,0,0,0-2,11.39q0,14.13,9.05,23.53t23,9.4A31.48,31.48,0,0,0,244.35,121a30.34,30.34,0,0,0,11.14-12.24,35.59,35.59,0,0,0,3.63-15.87q0-14.33-9.05-23.53T227,60.09Z"/><path d="M328.12,128.24h-3.51a1.11,1.11,0,0,1-.9-.47L283.84,69.5a1.08,1.08,0,0,0-2,.61v57a1.09,1.09,0,0,1-1.09,1.09h-2.7a1.09,1.09,0,0,1-1.09-1.09V58.79a1.09,1.09,0,0,1,1.09-1.09h3.14a1.1,1.1,0,0,1,.89.47l40.25,58.49a1.09,1.09,0,0,0,2-.62V58.79a1.09,1.09,0,0,1,1.09-1.09h2.69a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,328.12,128.24Z"/><path d="M173.49,30.7a3.21,3.21,0,0,1,2.63.9,3.61,3.61,0,0,1,.95,2.59,3.34,3.34,0,0,1-1,2.48,3.46,3.46,0,0,1-2.58,1,3.52,3.52,0,0,1-2.49-6A3.38,3.38,0,0,1,173.49,30.7Z"/><path d="M279.46,152.37a2.92,2.92,0,0,1-2.49.42,11.78,11.78,0,0,1-2.29-.74,31.34,31.34,0,0,1-4.15-2.26,91.5,91.5,0,0,1-7.65-5.46c-4.92-3.86-9.58-8-14.16-12.28-9.15-8.5-17.79-17.52-26.19-26.75s-16.44-18.79-24.14-28.62q-5.73-7.41-11.08-15.11a156.2,156.2,0,0,1-9.82-16,.46.46,0,0,1,.81-.44h0c1.56,2.69,3.23,5.3,4.94,7.89s3.52,5.12,5.34,7.64c3.62,5,7.43,10,11.29,14.84,7.7,9.76,15.82,19.19,24.08,28.5S240.84,122.3,249.76,131c4.47,4.32,9.06,8.52,13.83,12.49a91.19,91.19,0,0,0,7.41,5.61,34.64,34.64,0,0,0,4,2.32,11.28,11.28,0,0,0,2.12.78,2.38,2.38,0,0,0,2-.21h0a.29.29,0,0,1,.4,0A.28.28,0,0,1,279.46,152.37Z"/><path d="M279.18,33.88a2.64,2.64,0,0,0-2.29-.18,11.63,11.63,0,0,0-2.3.92,36.9,36.9,0,0,0-4.29,2.61c-2.76,1.91-5.38,4-7.94,6.21-5.13,4.36-10.06,9-14.86,13.69-9.56,9.5-18.83,19.3-27.69,29.46s-17.56,20.47-25.69,31.22c-4.1,5.34-8.07,10.79-11.74,16.43-1.83,2.82-3.62,5.67-5.17,8.64a37.25,37.25,0,0,0-2.06,4.56,12.64,12.64,0,0,0-.62,2.38A2.52,2.52,0,0,0,175,152h0a.24.24,0,0,1,0,.33.22.22,0,0,1-.31,0,2.08,2.08,0,0,1-.66-1.23,4.34,4.34,0,0,1-.05-1.34,11.94,11.94,0,0,1,.53-2.54,37.13,37.13,0,0,1,1.92-4.75,98.74,98.74,0,0,1,4.91-8.91c3.57-5.77,7.44-11.33,11.43-16.8s8.21-10.76,12.48-16,8.63-10.42,13.14-15.46,9.07-10,13.74-14.92,9.44-9.69,14.37-14.33,10-9.18,15.24-13.41a100.81,100.81,0,0,1,8.22-6,36.53,36.53,0,0,1,4.45-2.5,12.69,12.69,0,0,1,2.43-.84,2.92,2.92,0,0,1,2.56.36.14.14,0,1,1-.17.21Z"/><path d="M227,19.35a3.59,3.59,0,0,0-2,.84,9.09,9.09,0,0,0-1.47,1.73,20.85,20.85,0,0,0-2.11,4.16,62.3,62.3,0,0,0-2.7,9,163.56,163.56,0,0,0-3.13,18.7,318.24,318.24,0,0,0-2,37.93q0,9.51.52,19c.32,6.33.81,12.65,1.54,18.94a171.5,171.5,0,0,0,3,18.75,66.25,66.25,0,0,0,2.7,9.08,22.89,22.89,0,0,0,2.08,4.21c.21.34.47.63.69.94a10.71,10.71,0,0,0,.81.84,4.11,4.11,0,0,0,1,.6,2.22,2.22,0,0,0,.52.2.72.72,0,0,1,.56.23h0a0,0,0,0,1,0,0h0a.74.74,0,0,1-.63.06,1.94,1.94,0,0,1-.59-.19,4.82,4.82,0,0,1-1.07-.64,10.1,10.1,0,0,1-.87-.86c-.25-.32-.53-.62-.75-.95a23.42,23.42,0,0,1-2.26-4.25,65.38,65.38,0,0,1-3-9.1,171.4,171.4,0,0,1-3.62-18.78c-.88-6.31-1.51-12.65-2-19s-.64-12.73-.68-19.1.18-12.75.55-19.11.9-12.72,1.78-19a164.9,164.9,0,0,1,3.56-18.81,64.19,64.19,0,0,1,3.07-9.11,21.75,21.75,0,0,1,2.35-4.24,9.39,9.39,0,0,1,1.7-1.81,4.19,4.19,0,0,1,2.4-.88.28.28,0,0,1,.29.29.29.29,0,0,1-.27.3Z"/><path class="cls-3" d="M173.48,29.89a4,4,0,0,1,3.25,1.1,4.46,4.46,0,0,1,1.17,3.2,4.16,4.16,0,0,1-1.23,3.07,4.33,4.33,0,0,1-3.19,1.23,4.3,4.3,0,1,1,0-8.6Z"/></g></g></svg>
\ No newline at end of file
diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Light.png b/rust/datafusion/docs/images/DataFusion-Logo-Light.png
deleted file mode 100644
index 8992213b0e6..00000000000
Binary files a/rust/datafusion/docs/images/DataFusion-Logo-Light.png and /dev/null differ
diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Light.svg b/rust/datafusion/docs/images/DataFusion-Logo-Light.svg
deleted file mode 100644
index b3bef2193dd..00000000000
--- a/rust/datafusion/docs/images/DataFusion-Logo-Light.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#fff;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#fff;}.cls-3{fill:#f3971f;}.cls-4{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Light</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43,23"/><path class="cls-1" d="M279.27,33.78c2.93,2.93-6.55,19.11-22,39.28"/><path class="cls-1" d="M208.83,127.75c-16.9,16.49-31,27-34,24.41"/><path class="cls-1" d="M227,19.05c3.86,0,7.25,18.31,9.11,39.88"/><path class="cls-1" d="M235.79,127.32c-1.91,19-5.12,37.24-8.75,37.24"/><path class="cls-2" d="M0,.43H11.23a22,22,0,0,1,9.69,2.13,17.62,17.62,0,0,1,7.24,6.49A18.43,18.43,0,0,1,31,19.27a18.39,18.39,0,0,1-2.88,10.31,17.8,17.8,0,0,1-7.32,6.49,22.44,22.44,0,0,1-9.72,2.1H0ZM2.77,3.09V35.51H11a20,20,0,0,0,6.23-1,18.25,18.25,0,0,0,5.45-2.91,13.58,13.58,0,0,0,4-5.13,17.18,17.18,0,0,0,1.49-7.27,16.82,16.82,0,0,0-1.52-7.29,13.19,13.19,0,0,0-4-5.06A18.94,18.94,0,0,0,11.13,3.09Z"/><path class="cls-2" d="M68.88,26H51.65L47,38.17H44.24L59,0H61.7L76.5,38.17H73.63ZM68,23.58,60.21,3.71,52.57,23.58Z"/><path class="cls-2" d="M112.87,3.09H100.35V38.17H97.59V3.09H85.5V.43h27.37Z"/><path class="cls-2" d="M146.45,26H129.21l-4.68,12.19h-2.72L136.56,0h2.71l14.8,38.17H151.2Zm-.93-2.4L137.78,3.71l-7.64,19.87Z"/><path class="cls-2" d="M35.64,62.68H6.7a1.2,1.2,0,0,0-1.2,1.2V89.33a1.2,1.2,0,0,0,1.2,1.2H27.78a1.2,1.2,0,0,1,1.2,1.2v2.48a1.2,1.2,0,0,1-1.2,1.2H6.7a1.2,1.2,0,0,0-1.2,1.2v30.75a1,1,0,0,1-1,1H1.3a1,1,0,0,1-1-1V58.9a1.2,1.2,0,0,1,1.2-1.2h34.1a1.21,1.21,0,0,1,1.21,1.2v2.57A1.21,1.21,0,0,1,35.64,62.68Z"/><path class="cls-2" d="M104,57.7a.94.94,0,0,1,1,.95v45q-.1,11.83-8,19.35t-19.85,7.51q-12.14,0-19.95-7.46t-7.91-19.5V58.65a1,1,0,0,1,1-.95h3.18a1,1,0,0,1,.95.95v45a21.46,21.46,0,0,0,6.61,15.76q6.42,6.12,16.17,6.12a22.55,22.55,0,0,0,16.12-6.12q6.47-6.11,6.57-15.76v-45a.94.94,0,0,1,.94-.95Z"/><path class="cls-2" d="M139.56,55.41a29,29,0,0,1,13.79,3.05,1.09,1.09,0,0,1,.42,1.48l-1.3,2.37a1.1,1.1,0,0,1-1.38.48,31.23,31.23,0,0,0-11.93-2.5q-7.66,0-11.94,3.77A12.45,12.45,0,0,0,123,73.87a12.09,12.09,0,0,0,.5,3.62,8.63,8.63,0,0,0,1.85,3.18c.9,1,1.69,1.88,2.36,2.58a13.84,13.84,0,0,0,3.41,2.31l3.6,1.9,4.18,2.19,4,2.08q7.9,4.08,11.78,8.2t4,11.6q.1,9-6.61,14a26.07,26.07,0,0,1-16,5,36.05,36.05,0,0,1-10.7-1.69,37.09,37.09,0,0,1-7.41-3.07,1.1,1.1,0,0,1-.36-1.56L119,122a1.08,1.08,0,0,1,1.41-.35,33.62,33.62,0,0,0,16.54,4A20.46,20.46,0,0,0,148.32,122q5-3.5,4.87-10.47a13,13,0,0,0-1.19-5.19,12,12,0,0,0-3.37-4.13,31.66,31.66,0,0,0-4.27-3L138.73,96l-5.63-3.21-5.63-3.2a25.57,25.57,0,0,1-4.82-3.67,16,16,0,0,1-3.78-5.33,16.21,16.21,0,0,1-1.3-6.56q0-8.86,6.32-13.73T139.56,55.41Z"/><path class="cls-2" d="M175,128.24h-3a1.09,1.09,0,0,1-1.09-1.09V58.79A1.09,1.09,0,0,1,172,57.7h3a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,175,128.24Z"/><path class="cls-3" d="M227,55.41a39.08,39.08,0,0,1,20.14,5.67A37.52,37.52,0,0,1,256,68.35,33.31,33.31,0,0,1,262.21,79a39.44,39.44,0,0,1,2.38,13.93q-.09,15.42-9.65,25.67Q244.31,130.23,227,130.53a39.35,39.35,0,0,1-10.05-1.34,39.75,39.75,0,0,1-18.85-11.7,33.13,33.13,0,0,1-6.22-10.64,39.18,39.18,0,0,1-2.39-13.93q0-16.11,10.79-26.81T227,55.41Zm0,4.68a32.37,32.37,0,0,0-14.18,3A27.74,27.74,0,0,0,202.71,71a37.51,37.51,0,0,0-5.77,10.5,33.82,33.82,0,0,0-2,11.39q0,14.13,9.05,23.53t23,9.4A31.48,31.48,0,0,0,244.35,121a30.34,30.34,0,0,0,11.14-12.24,35.59,35.59,0,0,0,3.63-15.87q0-14.33-9.05-23.53T227,60.09Z"/><path class="cls-2" d="M328.12,128.24h-3.51a1.11,1.11,0,0,1-.9-.47L283.84,69.5a1.08,1.08,0,0,0-2,.61v57a1.09,1.09,0,0,1-1.09,1.09h-2.7a1.09,1.09,0,0,1-1.09-1.09V58.79a1.09,1.09,0,0,1,1.09-1.09h3.14a1.1,1.1,0,0,1,.89.47l40.25,58.49a1.09,1.09,0,0,0,2-.62V58.79a1.09,1.09,0,0,1,1.09-1.09h2.69a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,328.12,128.24Z"/><path d="M173.49,30.7a3.21,3.21,0,0,1,2.63.9,3.61,3.61,0,0,1,.95,2.59,3.34,3.34,0,0,1-1,2.48,3.46,3.46,0,0,1-2.58,1,3.52,3.52,0,0,1-2.49-6A3.38,3.38,0,0,1,173.49,30.7Z"/><path class="cls-2" d="M279.46,152.37a2.92,2.92,0,0,1-2.49.42,11.78,11.78,0,0,1-2.29-.74,31.34,31.34,0,0,1-4.15-2.26,91.5,91.5,0,0,1-7.65-5.46c-4.92-3.86-9.58-8-14.16-12.28-9.15-8.5-17.79-17.52-26.19-26.75s-16.44-18.79-24.14-28.62q-5.73-7.41-11.08-15.11a156.2,156.2,0,0,1-9.82-16,.46.46,0,0,1,.81-.44h0c1.56,2.69,3.23,5.3,4.94,7.89s3.52,5.12,5.34,7.64c3.62,5,7.43,10,11.29,14.84,7.7,9.76,15.82,19.19,24.08,28.5S240.84,122.3,249.76,131c4.47,4.32,9.06,8.52,13.83,12.49a91.19,91.19,0,0,0,7.41,5.61,34.64,34.64,0,0,0,4,2.32,11.28,11.28,0,0,0,2.12.78,2.38,2.38,0,0,0,2-.21h0a.29.29,0,0,1,.4,0A.28.28,0,0,1,279.46,152.37Z"/><path class="cls-2" d="M279.18,33.88a2.64,2.64,0,0,0-2.29-.18,11.63,11.63,0,0,0-2.3.92,36.9,36.9,0,0,0-4.29,2.61c-2.76,1.91-5.38,4-7.94,6.21-5.13,4.36-10.06,9-14.86,13.69-9.56,9.5-18.83,19.3-27.69,29.46s-17.56,20.47-25.69,31.22c-4.1,5.34-8.07,10.79-11.74,16.43-1.83,2.82-3.62,5.67-5.17,8.64a37.25,37.25,0,0,0-2.06,4.56,12.64,12.64,0,0,0-.62,2.38A2.52,2.52,0,0,0,175,152h0a.24.24,0,0,1,0,.33.22.22,0,0,1-.31,0,2.08,2.08,0,0,1-.66-1.23,4.34,4.34,0,0,1-.05-1.34,11.94,11.94,0,0,1,.53-2.54,37.13,37.13,0,0,1,1.92-4.75,98.74,98.74,0,0,1,4.91-8.91c3.57-5.77,7.44-11.33,11.43-16.8s8.21-10.76,12.48-16,8.63-10.42,13.14-15.46,9.07-10,13.74-14.92,9.44-9.69,14.37-14.33,10-9.18,15.24-13.41a100.81,100.81,0,0,1,8.22-6,36.53,36.53,0,0,1,4.45-2.5,12.69,12.69,0,0,1,2.43-.84,2.92,2.92,0,0,1,2.56.36.14.14,0,1,1-.17.21Z"/><path class="cls-2" d="M227,19.35a3.59,3.59,0,0,0-2,.84,9.09,9.09,0,0,0-1.47,1.73,20.85,20.85,0,0,0-2.11,4.16,62.3,62.3,0,0,0-2.7,9,163.56,163.56,0,0,0-3.13,18.7,318.24,318.24,0,0,0-2,37.93q0,9.51.52,19c.32,6.33.81,12.65,1.54,18.94a171.5,171.5,0,0,0,3,18.75,66.25,66.25,0,0,0,2.7,9.08,22.89,22.89,0,0,0,2.08,4.21c.21.34.47.63.69.94a10.71,10.71,0,0,0,.81.84,4.11,4.11,0,0,0,1,.6,2.22,2.22,0,0,0,.52.2.72.72,0,0,1,.56.23h0a0,0,0,0,1,0,0h0a.74.74,0,0,1-.63.06,1.94,1.94,0,0,1-.59-.19,4.82,4.82,0,0,1-1.07-.64,10.1,10.1,0,0,1-.87-.86c-.25-.32-.53-.62-.75-.95a23.42,23.42,0,0,1-2.26-4.25,65.38,65.38,0,0,1-3-9.1,171.4,171.4,0,0,1-3.62-18.78c-.88-6.31-1.51-12.65-2-19s-.64-12.73-.68-19.1.18-12.75.55-19.11.9-12.72,1.78-19a164.9,164.9,0,0,1,3.56-18.81,64.19,64.19,0,0,1,3.07-9.11,21.75,21.75,0,0,1,2.35-4.24,9.39,9.39,0,0,1,1.7-1.81,4.19,4.19,0,0,1,2.4-.88.28.28,0,0,1,.29.29.29.29,0,0,1-.27.3Z"/><path class="cls-4" d="M173.48,29.89a4,4,0,0,1,3.25,1.1,4.46,4.46,0,0,1,1.17,3.2,4.16,4.16,0,0,1-1.23,3.07,4.33,4.33,0,0,1-3.19,1.23,4.3,4.3,0,1,1,0-8.6Z"/></g></g></svg>
\ No newline at end of file
diff --git a/rust/datafusion/src/bin/main.rs b/rust/datafusion/src/bin/main.rs
deleted file mode 100644
index deb5b796b2d..00000000000
--- a/rust/datafusion/src/bin/main.rs
+++ /dev/null
@@ -1,25 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Only bring in dependencies for the repl when the cli feature is enabled.
-#[cfg(feature = "cli")]
-mod repl;
-
-pub fn main() {
-    #[cfg(feature = "cli")]
-    repl::main()
-}
diff --git a/rust/datafusion/src/bin/repl.rs b/rust/datafusion/src/bin/repl.rs
deleted file mode 100644
index a6aec204c0d..00000000000
--- a/rust/datafusion/src/bin/repl.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(bare_trait_objects)]
-
-use arrow::util::pretty;
-use clap::{crate_version, App, Arg};
-use datafusion::error::Result;
-use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
-use rustyline::Editor;
-use std::env;
-use std::path::Path;
-use std::time::Instant;
-
-#[tokio::main]
-pub async fn main() {
-    let matches = App::new("DataFusion")
-        .version(crate_version!())
-        .about(
-            "DataFusion is an in-memory query engine that uses Apache Arrow \
-             as the memory model. It supports executing SQL queries against CSV and \
-             Parquet files as well as querying directly against in-memory data.",
-        )
-        .arg(
-            Arg::with_name("data-path")
-                .help("Path to your data, default to current directory")
-                .short("p")
-                .long("data-path")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::with_name("batch-size")
-                .help("The batch size of each query, default value is 1048576")
-                .short("c")
-                .long("batch-size")
-                .takes_value(true),
-        )
-        .get_matches();
-
-    if let Some(path) = matches.value_of("data-path") {
-        let p = Path::new(path);
-        env::set_current_dir(&p).unwrap();
-    };
-
-    let batch_size = matches
-        .value_of("batch-size")
-        .map(|size| size.parse::<usize>().unwrap())
-        .unwrap_or(1_048_576);
-
-    let mut ctx = ExecutionContext::with_config(
-        ExecutionConfig::new()
-            .with_batch_size(batch_size)
-            .with_information_schema(true),
-    );
-
-    let mut rl = Editor::<()>::new();
-    rl.load_history(".history").ok();
-
-    let mut query = "".to_owned();
-    loop {
-        let readline = rl.readline("> ");
-        match readline {
-            Ok(ref line) if is_exit_command(line) && query.is_empty() => {
-                break;
-            }
-            Ok(ref line) if line.trim_end().ends_with(';') => {
-                query.push_str(line.trim_end());
-                rl.add_history_entry(query.clone());
-                match exec_and_print(&mut ctx, query).await {
-                    Ok(_) => {}
-                    Err(err) => println!("{:?}", err),
-                }
-                query = "".to_owned();
-            }
-            Ok(ref line) => {
-                query.push_str(line);
-                query.push(' ');
-            }
-            Err(_) => {
-                break;
-            }
-        }
-    }
-
-    rl.save_history(".history").ok();
-}
-
-fn is_exit_command(line: &str) -> bool {
-    let line = line.trim_end().to_lowercase();
-    line == "quit" || line == "exit"
-}
-
-async fn exec_and_print(ctx: &mut ExecutionContext, sql: String) -> Result<()> {
-    let now = Instant::now();
-
-    let df = ctx.sql(&sql)?;
-    let results = df.collect().await?;
-
-    if results.is_empty() {
-        println!(
-            "0 rows in set. Query took {} seconds.",
-            now.elapsed().as_secs()
-        );
-        return Ok(());
-    }
-
-    pretty::print_batches(&results)?;
-
-    let row_count: usize = results.iter().map(|b| b.num_rows()).sum();
-
-    if row_count > 1 {
-        println!(
-            "{} row in set. Query took {} seconds.",
-            row_count,
-            now.elapsed().as_secs()
-        );
-    } else {
-        println!(
-            "{} rows in set. Query took {} seconds.",
-            row_count,
-            now.elapsed().as_secs()
-        );
-    }
-
-    Ok(())
-}
diff --git a/rust/datafusion/src/catalog/catalog.rs b/rust/datafusion/src/catalog/catalog.rs
deleted file mode 100644
index 30fea1f45f2..00000000000
--- a/rust/datafusion/src/catalog/catalog.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Describes the interface and built-in implementations of catalogs,
-//! representing collections of named schemas.
-
-use crate::catalog::schema::SchemaProvider;
-use std::any::Any;
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
-
-/// Represent a list of named catalogs
-pub trait CatalogList: Sync + Send {
-    /// Returns the catalog list as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Adds a new catalog to this catalog list
-    /// If a catalog of the same name existed before, it is replaced in the list and returned.
-    fn register_catalog(
-        &self,
-        name: String,
-        catalog: Arc<dyn CatalogProvider>,
-    ) -> Option<Arc<dyn CatalogProvider>>;
-
-    /// Retrieves the list of available catalog names
-    fn catalog_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific catalog by name, provided it exists.
-    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>;
-}
-
-/// Simple in-memory list of catalogs
-pub struct MemoryCatalogList {
-    /// Collection of catalogs containing schemas and ultimately TableProviders
-    pub catalogs: RwLock<HashMap<String, Arc<dyn CatalogProvider>>>,
-}
-
-impl MemoryCatalogList {
-    /// Instantiates a new `MemoryCatalogList` with an empty collection of catalogs
-    pub fn new() -> Self {
-        Self {
-            catalogs: RwLock::new(HashMap::new()),
-        }
-    }
-}
-
-impl CatalogList for MemoryCatalogList {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn register_catalog(
-        &self,
-        name: String,
-        catalog: Arc<dyn CatalogProvider>,
-    ) -> Option<Arc<dyn CatalogProvider>> {
-        let mut catalogs = self.catalogs.write().unwrap();
-        catalogs.insert(name, catalog)
-    }
-
-    fn catalog_names(&self) -> Vec<String> {
-        let catalogs = self.catalogs.read().unwrap();
-        catalogs.keys().map(|s| s.to_string()).collect()
-    }
-
-    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
-        let catalogs = self.catalogs.read().unwrap();
-        catalogs.get(name).cloned()
-    }
-}
-
-/// Represents a catalog, comprising a number of named schemas.
-pub trait CatalogProvider: Sync + Send {
-    /// Returns the catalog provider as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Retrieves the list of available schema names in this catalog.
-    fn schema_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific schema from the catalog by name, provided it exists.
-    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>>;
-}
-
-/// Simple in-memory implementation of a catalog.
-pub struct MemoryCatalogProvider {
-    schemas: RwLock<HashMap<String, Arc<dyn SchemaProvider>>>,
-}
-
-impl MemoryCatalogProvider {
-    /// Instantiates a new MemoryCatalogProvider with an empty collection of schemas.
-    pub fn new() -> Self {
-        Self {
-            schemas: RwLock::new(HashMap::new()),
-        }
-    }
-
-    /// Adds a new schema to this catalog.
-    /// If a schema of the same name existed before, it is replaced in the catalog and returned.
-    pub fn register_schema(
-        &self,
-        name: impl Into<String>,
-        schema: Arc<dyn SchemaProvider>,
-    ) -> Option<Arc<dyn SchemaProvider>> {
-        let mut schemas = self.schemas.write().unwrap();
-        schemas.insert(name.into(), schema)
-    }
-}
-
-impl CatalogProvider for MemoryCatalogProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema_names(&self) -> Vec<String> {
-        let schemas = self.schemas.read().unwrap();
-        schemas.keys().cloned().collect()
-    }
-
-    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
-        let schemas = self.schemas.read().unwrap();
-        schemas.get(name).cloned()
-    }
-}
diff --git a/rust/datafusion/src/catalog/information_schema.rs b/rust/datafusion/src/catalog/information_schema.rs
deleted file mode 100644
index 5a7b9d5b644..00000000000
--- a/rust/datafusion/src/catalog/information_schema.rs
+++ /dev/null
@@ -1,492 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implements the SQL [Information Schema] for DataFusion.
-//!
-//! Information Schema](https://en.wikipedia.org/wiki/Information_schema)
-
-use std::{any, sync::Arc};
-
-use arrow::{
-    array::{StringBuilder, UInt64Builder},
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::datasource::{MemTable, TableProvider};
-
-use super::{
-    catalog::{CatalogList, CatalogProvider},
-    schema::SchemaProvider,
-};
-
-const INFORMATION_SCHEMA: &str = "information_schema";
-const TABLES: &str = "tables";
-const COLUMNS: &str = "columns";
-
-/// Wraps another [`CatalogProvider`] and adds a "information_schema"
-/// schema that can introspect on tables in the catalog_list
-pub(crate) struct CatalogWithInformationSchema {
-    catalog_list: Arc<dyn CatalogList>,
-    /// wrapped provider
-    inner: Arc<dyn CatalogProvider>,
-}
-
-impl CatalogWithInformationSchema {
-    pub(crate) fn new(
-        catalog_list: Arc<dyn CatalogList>,
-        inner: Arc<dyn CatalogProvider>,
-    ) -> Self {
-        Self {
-            catalog_list,
-            inner,
-        }
-    }
-}
-
-impl CatalogProvider for CatalogWithInformationSchema {
-    fn as_any(&self) -> &dyn any::Any {
-        self
-    }
-
-    fn schema_names(&self) -> Vec<String> {
-        self.inner
-            .schema_names()
-            .into_iter()
-            .chain(std::iter::once(INFORMATION_SCHEMA.to_string()))
-            .collect::<Vec<String>>()
-    }
-
-    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
-        if name.eq_ignore_ascii_case(INFORMATION_SCHEMA) {
-            Some(Arc::new(InformationSchemaProvider {
-                catalog_list: self.catalog_list.clone(),
-            }))
-        } else {
-            self.inner.schema(name)
-        }
-    }
-}
-
-/// Implements the `information_schema` virtual schema and tables
-///
-/// The underlying tables in the `information_schema` are created on
-/// demand. This means that if more tables are added to the underlying
-/// providers, they will appear the next time the `information_schema`
-/// table is queried.
-struct InformationSchemaProvider {
-    catalog_list: Arc<dyn CatalogList>,
-}
-
-impl InformationSchemaProvider {
-    /// Construct the `information_schema.tables` virtual table
-    fn make_tables(&self) -> Arc<dyn TableProvider> {
-        // create a mem table with the names of tables
-        let mut builder = InformationSchemaTablesBuilder::new();
-
-        for catalog_name in self.catalog_list.catalog_names() {
-            let catalog = self.catalog_list.catalog(&catalog_name).unwrap();
-
-            for schema_name in catalog.schema_names() {
-                if schema_name != INFORMATION_SCHEMA {
-                    let schema = catalog.schema(&schema_name).unwrap();
-                    for table_name in schema.table_names() {
-                        builder.add_base_table(&catalog_name, &schema_name, table_name)
-                    }
-                }
-            }
-
-            // Add a final list for the information schema tables themselves
-            builder.add_system_table(&catalog_name, INFORMATION_SCHEMA, TABLES);
-            builder.add_system_table(&catalog_name, INFORMATION_SCHEMA, COLUMNS);
-        }
-
-        let mem_table: MemTable = builder.into();
-
-        Arc::new(mem_table)
-    }
-
-    /// Construct the `information_schema.columns` virtual table
-    fn make_columns(&self) -> Arc<dyn TableProvider> {
-        let mut builder = InformationSchemaColumnsBuilder::new();
-
-        for catalog_name in self.catalog_list.catalog_names() {
-            let catalog = self.catalog_list.catalog(&catalog_name).unwrap();
-
-            for schema_name in catalog.schema_names() {
-                if schema_name != INFORMATION_SCHEMA {
-                    let schema = catalog.schema(&schema_name).unwrap();
-                    for table_name in schema.table_names() {
-                        let table = schema.table(&table_name).unwrap();
-                        for (i, field) in table.schema().fields().iter().enumerate() {
-                            builder.add_column(
-                                &catalog_name,
-                                &schema_name,
-                                &table_name,
-                                field.name(),
-                                i,
-                                field.is_nullable(),
-                                field.data_type(),
-                            )
-                        }
-                    }
-                }
-            }
-        }
-
-        let mem_table: MemTable = builder.into();
-
-        Arc::new(mem_table)
-    }
-}
-
-impl SchemaProvider for InformationSchemaProvider {
-    fn as_any(&self) -> &(dyn any::Any + 'static) {
-        self
-    }
-
-    fn table_names(&self) -> Vec<String> {
-        vec![TABLES.to_string(), COLUMNS.to_string()]
-    }
-
-    fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        if name.eq_ignore_ascii_case("tables") {
-            Some(self.make_tables())
-        } else if name.eq_ignore_ascii_case("columns") {
-            Some(self.make_columns())
-        } else {
-            None
-        }
-    }
-}
-
-/// Builds the `information_schema.TABLE` table row by row
-///
-/// Columns are based on https://www.postgresql.org/docs/current/infoschema-columns.html
-struct InformationSchemaTablesBuilder {
-    catalog_names: StringBuilder,
-    schema_names: StringBuilder,
-    table_names: StringBuilder,
-    table_types: StringBuilder,
-}
-
-impl InformationSchemaTablesBuilder {
-    fn new() -> Self {
-        // StringBuilder requires providing an initial capacity, so
-        // pick 10 here arbitrarily as this is not performance
-        // critical code and the number of tables is unavailable here.
-        let default_capacity = 10;
-        Self {
-            catalog_names: StringBuilder::new(default_capacity),
-            schema_names: StringBuilder::new(default_capacity),
-            table_names: StringBuilder::new(default_capacity),
-            table_types: StringBuilder::new(default_capacity),
-        }
-    }
-
-    fn add_base_table(
-        &mut self,
-        catalog_name: impl AsRef<str>,
-        schema_name: impl AsRef<str>,
-        table_name: impl AsRef<str>,
-    ) {
-        // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-        self.table_types.append_value("BASE TABLE").unwrap();
-    }
-
-    fn add_system_table(
-        &mut self,
-        catalog_name: impl AsRef<str>,
-        schema_name: impl AsRef<str>,
-        table_name: impl AsRef<str>,
-    ) {
-        // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-        self.table_types.append_value("VIEW").unwrap();
-    }
-}
-
-impl From<InformationSchemaTablesBuilder> for MemTable {
-    fn from(value: InformationSchemaTablesBuilder) -> MemTable {
-        let schema = Schema::new(vec![
-            Field::new("table_catalog", DataType::Utf8, false),
-            Field::new("table_schema", DataType::Utf8, false),
-            Field::new("table_name", DataType::Utf8, false),
-            Field::new("table_type", DataType::Utf8, false),
-        ]);
-
-        let InformationSchemaTablesBuilder {
-            mut catalog_names,
-            mut schema_names,
-            mut table_names,
-            mut table_types,
-        } = value;
-
-        let schema = Arc::new(schema);
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(catalog_names.finish()),
-                Arc::new(schema_names.finish()),
-                Arc::new(table_names.finish()),
-                Arc::new(table_types.finish()),
-            ],
-        )
-        .unwrap();
-
-        MemTable::try_new(schema, vec![vec![batch]]).unwrap()
-    }
-}
-
-/// Builds the `information_schema.COLUMNS` table row by row
-///
-/// Columns are based on https://www.postgresql.org/docs/current/infoschema-columns.html
-struct InformationSchemaColumnsBuilder {
-    catalog_names: StringBuilder,
-    schema_names: StringBuilder,
-    table_names: StringBuilder,
-    column_names: StringBuilder,
-    ordinal_positions: UInt64Builder,
-    column_defaults: StringBuilder,
-    is_nullables: StringBuilder,
-    data_types: StringBuilder,
-    character_maximum_lengths: UInt64Builder,
-    character_octet_lengths: UInt64Builder,
-    numeric_precisions: UInt64Builder,
-    numeric_precision_radixes: UInt64Builder,
-    numeric_scales: UInt64Builder,
-    datetime_precisions: UInt64Builder,
-    interval_types: StringBuilder,
-}
-
-impl InformationSchemaColumnsBuilder {
-    fn new() -> Self {
-        // StringBuilder requires providing an initial capacity, so
-        // pick 10 here arbitrarily as this is not performance
-        // critical code and the number of tables is unavailable here.
-        let default_capacity = 10;
-        Self {
-            catalog_names: StringBuilder::new(default_capacity),
-            schema_names: StringBuilder::new(default_capacity),
-            table_names: StringBuilder::new(default_capacity),
-            column_names: StringBuilder::new(default_capacity),
-            ordinal_positions: UInt64Builder::new(default_capacity),
-            column_defaults: StringBuilder::new(default_capacity),
-            is_nullables: StringBuilder::new(default_capacity),
-            data_types: StringBuilder::new(default_capacity),
-            character_maximum_lengths: UInt64Builder::new(default_capacity),
-            character_octet_lengths: UInt64Builder::new(default_capacity),
-            numeric_precisions: UInt64Builder::new(default_capacity),
-            numeric_precision_radixes: UInt64Builder::new(default_capacity),
-            numeric_scales: UInt64Builder::new(default_capacity),
-            datetime_precisions: UInt64Builder::new(default_capacity),
-            interval_types: StringBuilder::new(default_capacity),
-        }
-    }
-
-    #[allow(clippy::too_many_arguments)]
-    fn add_column(
-        &mut self,
-        catalog_name: impl AsRef<str>,
-        schema_name: impl AsRef<str>,
-        table_name: impl AsRef<str>,
-        column_name: impl AsRef<str>,
-        column_position: usize,
-        is_nullable: bool,
-        data_type: &DataType,
-    ) {
-        use DataType::*;
-
-        // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-
-        self.column_names
-            .append_value(column_name.as_ref())
-            .unwrap();
-
-        self.ordinal_positions
-            .append_value(column_position as u64)
-            .unwrap();
-
-        // DataFusion does not support column default values, so null
-        self.column_defaults.append_null().unwrap();
-
-        // "YES if the column is possibly nullable, NO if it is known not nullable. "
-        let nullable_str = if is_nullable { "YES" } else { "NO" };
-        self.is_nullables.append_value(nullable_str).unwrap();
-
-        // "System supplied type" --> Use debug format of the datatype
-        self.data_types
-            .append_value(format!("{:?}", data_type))
-            .unwrap();
-
-        // "If data_type identifies a character or bit string type, the
-        // declared maximum length; null for all other data types or
-        // if no maximum length was declared."
-        //
-        // Arrow has no equivalent of VARCHAR(20), so we leave this as Null
-        let max_chars = None;
-        self.character_maximum_lengths
-            .append_option(max_chars)
-            .unwrap();
-
-        // "Maximum length, in bytes, for binary data, character data,
-        // or text and image data."
-        let char_len: Option<u64> = match data_type {
-            Utf8 | Binary => Some(i32::MAX as u64),
-            LargeBinary | LargeUtf8 => Some(i64::MAX as u64),
-            _ => None,
-        };
-        self.character_octet_lengths
-            .append_option(char_len)
-            .unwrap();
-
-        // numeric_precision: "If data_type identifies a numeric type, this column
-        // contains the (declared or implicit) precision of the type
-        // for this column. The precision indicates the number of
-        // significant digits. It can be expressed in decimal (base
-        // 10) or binary (base 2) terms, as specified in the column
-        // numeric_precision_radix. For all other data types, this
-        // column is null."
-        //
-        // numeric_radix: If data_type identifies a numeric type, this
-        // column indicates in which base the values in the columns
-        // numeric_precision and numeric_scale are expressed. The
-        // value is either 2 or 10. For all other data types, this
-        // column is null.
-        //
-        // numeric_scale: If data_type identifies an exact numeric
-        // type, this column contains the (declared or implicit) scale
-        // of the type for this column. The scale indicates the number
-        // of significant digits to the right of the decimal point. It
-        // can be expressed in decimal (base 10) or binary (base 2)
-        // terms, as specified in the column
-        // numeric_precision_radix. For all other data types, this
-        // column is null.
-        let (numeric_precision, numeric_radix, numeric_scale) = match data_type {
-            Int8 | UInt8 => (Some(8), Some(2), None),
-            Int16 | UInt16 => (Some(16), Some(2), None),
-            Int32 | UInt32 => (Some(32), Some(2), None),
-            // From max value of 65504 as explained on
-            // https://en.wikipedia.org/wiki/Half-precision_floating-point_format#Exponent_encoding
-            Float16 => (Some(15), Some(2), None),
-            // Numbers from postgres `real` type
-            Float32 => (Some(24), Some(2), None),
-            // Numbers from postgres `double` type
-            Float64 => (Some(24), Some(2), None),
-            Decimal(precision, scale) => {
-                (Some(*precision as u64), Some(10), Some(*scale as u64))
-            }
-            _ => (None, None, None),
-        };
-
-        self.numeric_precisions
-            .append_option(numeric_precision)
-            .unwrap();
-        self.numeric_precision_radixes
-            .append_option(numeric_radix)
-            .unwrap();
-        self.numeric_scales.append_option(numeric_scale).unwrap();
-
-        self.datetime_precisions.append_option(None).unwrap();
-        self.interval_types.append_null().unwrap();
-    }
-}
-
-impl From<InformationSchemaColumnsBuilder> for MemTable {
-    fn from(value: InformationSchemaColumnsBuilder) -> MemTable {
-        let schema = Schema::new(vec![
-            Field::new("table_catalog", DataType::Utf8, false),
-            Field::new("table_schema", DataType::Utf8, false),
-            Field::new("table_name", DataType::Utf8, false),
-            Field::new("column_name", DataType::Utf8, false),
-            Field::new("ordinal_position", DataType::UInt64, false),
-            Field::new("column_default", DataType::Utf8, false),
-            Field::new("is_nullable", DataType::Utf8, false),
-            Field::new("data_type", DataType::Utf8, false),
-            Field::new("character_maximum_length", DataType::UInt64, false),
-            Field::new("character_octet_length", DataType::UInt64, false),
-            Field::new("numeric_precision", DataType::UInt64, false),
-            Field::new("numeric_precision_radix", DataType::UInt64, false),
-            Field::new("numeric_scale", DataType::UInt64, false),
-            Field::new("datetime_precision", DataType::UInt64, false),
-            Field::new("interval_type", DataType::Utf8, false),
-        ]);
-
-        let InformationSchemaColumnsBuilder {
-            mut catalog_names,
-            mut schema_names,
-            mut table_names,
-            mut column_names,
-            mut ordinal_positions,
-            mut column_defaults,
-            mut is_nullables,
-            mut data_types,
-            mut character_maximum_lengths,
-            mut character_octet_lengths,
-            mut numeric_precisions,
-            mut numeric_precision_radixes,
-            mut numeric_scales,
-            mut datetime_precisions,
-            mut interval_types,
-        } = value;
-
-        let schema = Arc::new(schema);
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(catalog_names.finish()),
-                Arc::new(schema_names.finish()),
-                Arc::new(table_names.finish()),
-                Arc::new(column_names.finish()),
-                Arc::new(ordinal_positions.finish()),
-                Arc::new(column_defaults.finish()),
-                Arc::new(is_nullables.finish()),
-                Arc::new(data_types.finish()),
-                Arc::new(character_maximum_lengths.finish()),
-                Arc::new(character_octet_lengths.finish()),
-                Arc::new(numeric_precisions.finish()),
-                Arc::new(numeric_precision_radixes.finish()),
-                Arc::new(numeric_scales.finish()),
-                Arc::new(datetime_precisions.finish()),
-                Arc::new(interval_types.finish()),
-            ],
-        )
-        .unwrap();
-
-        MemTable::try_new(schema, vec![vec![batch]]).unwrap()
-    }
-}
diff --git a/rust/datafusion/src/catalog/mod.rs b/rust/datafusion/src/catalog/mod.rs
deleted file mode 100644
index 10591f07e37..00000000000
--- a/rust/datafusion/src/catalog/mod.rs
+++ /dev/null
@@ -1,146 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains interfaces and default implementations
-//! of table namespacing concepts, including catalogs and schemas.
-
-pub mod catalog;
-pub mod information_schema;
-pub mod schema;
-
-use crate::error::DataFusionError;
-use std::convert::TryFrom;
-
-/// Represents a resolved path to a table of the form "catalog.schema.table"
-#[derive(Clone, Copy)]
-pub struct ResolvedTableReference<'a> {
-    /// The catalog (aka database) containing the table
-    pub catalog: &'a str,
-    /// The schema containing the table
-    pub schema: &'a str,
-    /// The table name
-    pub table: &'a str,
-}
-
-/// Represents a path to a table that may require further resolution
-#[derive(Clone, Copy)]
-pub enum TableReference<'a> {
-    /// An unqualified table reference, e.g. "table"
-    Bare {
-        /// The table name
-        table: &'a str,
-    },
-    /// A partially resolved table reference, e.g. "schema.table"
-    Partial {
-        /// The schema containing the table
-        schema: &'a str,
-        /// The table name
-        table: &'a str,
-    },
-    /// A fully resolved table reference, e.g. "catalog.schema.table"
-    Full {
-        /// The catalog (aka database) containing the table
-        catalog: &'a str,
-        /// The schema containing the table
-        schema: &'a str,
-        /// The table name
-        table: &'a str,
-    },
-}
-
-impl<'a> TableReference<'a> {
-    /// Retrieve the actual table name, regardless of qualification
-    pub fn table(&self) -> &str {
-        match self {
-            Self::Full { table, .. }
-            | Self::Partial { table, .. }
-            | Self::Bare { table } => table,
-        }
-    }
-
-    /// Given a default catalog and schema, ensure this table reference is fully resolved
-    pub fn resolve(
-        self,
-        default_catalog: &'a str,
-        default_schema: &'a str,
-    ) -> ResolvedTableReference<'a> {
-        match self {
-            Self::Full {
-                catalog,
-                schema,
-                table,
-            } => ResolvedTableReference {
-                catalog,
-                schema,
-                table,
-            },
-            Self::Partial { schema, table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema,
-                table,
-            },
-            Self::Bare { table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema: default_schema,
-                table,
-            },
-        }
-    }
-}
-
-impl<'a> From<&'a str> for TableReference<'a> {
-    fn from(s: &'a str) -> Self {
-        Self::Bare { table: s }
-    }
-}
-
-impl<'a> From<ResolvedTableReference<'a>> for TableReference<'a> {
-    fn from(resolved: ResolvedTableReference<'a>) -> Self {
-        Self::Full {
-            catalog: resolved.catalog,
-            schema: resolved.schema,
-            table: resolved.table,
-        }
-    }
-}
-
-impl<'a> TryFrom<&'a sqlparser::ast::ObjectName> for TableReference<'a> {
-    type Error = DataFusionError;
-
-    fn try_from(value: &'a sqlparser::ast::ObjectName) -> Result<Self, Self::Error> {
-        let idents = &value.0;
-
-        match idents.len() {
-            1 => Ok(Self::Bare {
-                table: &idents[0].value,
-            }),
-            2 => Ok(Self::Partial {
-                schema: &idents[0].value,
-                table: &idents[1].value,
-            }),
-            3 => Ok(Self::Full {
-                catalog: &idents[0].value,
-                schema: &idents[1].value,
-                table: &idents[2].value,
-            }),
-            _ => Err(DataFusionError::Plan(format!(
-                "invalid table reference: {}",
-                value
-            ))),
-        }
-    }
-}
diff --git a/rust/datafusion/src/catalog/schema.rs b/rust/datafusion/src/catalog/schema.rs
deleted file mode 100644
index 0e39546a5f8..00000000000
--- a/rust/datafusion/src/catalog/schema.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Describes the interface and built-in implementations of schemas,
-//! representing collections of named tables.
-
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use std::any::Any;
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
-
-/// Represents a schema, comprising a number of named tables.
-pub trait SchemaProvider: Sync + Send {
-    /// Returns the schema provider as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Retrieves the list of available table names in this schema.
-    fn table_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific table from the schema by name, provided it exists.
-    fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>>;
-
-    /// If supported by the implementation, adds a new table to this schema.
-    /// If a table of the same name existed before, it is replaced in the schema and returned.
-    #[allow(unused_variables)]
-    fn register_table(
-        &self,
-        name: String,
-        table: Arc<dyn TableProvider>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        Err(DataFusionError::Execution(
-            "schema provider does not support registering tables".to_owned(),
-        ))
-    }
-
-    /// If supported by the implementation, removes an existing table from this schema and returns it.
-    /// If no table of that name exists, returns Ok(None).
-    #[allow(unused_variables)]
-    fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
-        Err(DataFusionError::Execution(
-            "schema provider does not support deregistering tables".to_owned(),
-        ))
-    }
-}
-
-/// Simple in-memory implementation of a schema.
-pub struct MemorySchemaProvider {
-    tables: RwLock<HashMap<String, Arc<dyn TableProvider>>>,
-}
-
-impl MemorySchemaProvider {
-    /// Instantiates a new MemorySchemaProvider with an empty collection of tables.
-    pub fn new() -> Self {
-        Self {
-            tables: RwLock::new(HashMap::new()),
-        }
-    }
-}
-
-impl SchemaProvider for MemorySchemaProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn table_names(&self) -> Vec<String> {
-        let tables = self.tables.read().unwrap();
-        tables.keys().cloned().collect()
-    }
-
-    fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        let tables = self.tables.read().unwrap();
-        tables.get(name).cloned()
-    }
-
-    fn register_table(
-        &self,
-        name: String,
-        table: Arc<dyn TableProvider>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        let mut tables = self.tables.write().unwrap();
-        Ok(tables.insert(name, table))
-    }
-
-    fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
-        let mut tables = self.tables.write().unwrap();
-        Ok(tables.remove(name))
-    }
-}
diff --git a/rust/datafusion/src/dataframe.rs b/rust/datafusion/src/dataframe.rs
deleted file mode 100644
index 9c7c2ef96d6..00000000000
--- a/rust/datafusion/src/dataframe.rs
+++ /dev/null
@@ -1,286 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFrame API for building and executing query plans.
-
-use crate::arrow::record_batch::RecordBatch;
-use crate::error::Result;
-use crate::logical_plan::{
-    DFSchema, Expr, FunctionRegistry, JoinType, LogicalPlan, Partitioning,
-};
-use std::sync::Arc;
-
-use async_trait::async_trait;
-
-/// DataFrame represents a logical set of rows with the same named columns.
-/// Similar to a [Pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) or
-/// [Spark DataFrame](https://spark.apache.org/docs/latest/sql-programming-guide.html)
-///
-/// DataFrames are typically created by the `read_csv` and `read_parquet` methods on the
-/// [ExecutionContext](../execution/context/struct.ExecutionContext.html) and can then be modified
-/// by calling the transformation methods, such as `filter`, `select`, `aggregate`, and `limit`
-/// to build up a query definition.
-///
-/// The query can be executed by calling the `collect` method.
-///
-/// ```
-/// # use datafusion::prelude::*;
-/// # use datafusion::error::Result;
-/// # fn main() -> Result<()> {
-/// let mut ctx = ExecutionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-/// let df = df.filter(col("a").lt_eq(col("b")))?
-///            .aggregate(vec![col("a")], vec![min(col("b"))])?
-///            .limit(100)?;
-/// let results = df.collect();
-/// # Ok(())
-/// # }
-/// ```
-#[async_trait]
-pub trait DataFrame: Send + Sync {
-    /// Filter the DataFrame by column. Returns a new DataFrame only containing the
-    /// specified columns.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.select_columns(&["a", "b"])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>>;
-
-    /// Create a projection based on arbitrary expressions.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.select(vec![col("a") * col("b"), col("c")])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn select(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
-
-    /// Filter a DataFrame to only include rows that match the specified filter expression.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.filter(col("a").lt_eq(col("b")))?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn filter(&self, expr: Expr) -> Result<Arc<dyn DataFrame>>;
-
-    /// Perform an aggregate query with optional grouping expressions.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    ///
-    /// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a"
-    /// let _ = df.aggregate(vec![col("a")], vec![min(col("b"))])?;
-    ///
-    /// // The following use is the equivalent of "SELECT MIN(b)"
-    /// let _ = df.aggregate(vec![], vec![min(col("b"))])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn aggregate(
-        &self,
-        group_expr: Vec<Expr>,
-        aggr_expr: Vec<Expr>,
-    ) -> Result<Arc<dyn DataFrame>>;
-
-    /// Limit the number of rows returned from this DataFrame.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.limit(100)?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn limit(&self, n: usize) -> Result<Arc<dyn DataFrame>>;
-
-    /// Calculate the union two [`DataFrame`]s.  The two [`DataFrame`]s must have exactly the same schema
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.union(df.clone())?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn union(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn DataFrame>>;
-
-    /// Sort the DataFrame by the specified sorting expressions. Any expression can be turned into
-    /// a sort expression by calling its [sort](../logical_plan/enum.Expr.html#method.sort) method.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false, false)])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
-
-    /// Join this DataFrame with another DataFrame using the specified columns as join keys
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?
-    ///   .select(vec![
-    ///     col("a").alias("a2"),
-    ///     col("b").alias("b2"),
-    ///     col("c").alias("c2")])?;
-    /// let join = left.join(right, JoinType::Inner, &["a", "b"], &["a2", "b2"])?;
-    /// let batches = join.collect().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn join(
-        &self,
-        right: Arc<dyn DataFrame>,
-        join_type: JoinType,
-        left_cols: &[&str],
-        right_cols: &[&str],
-    ) -> Result<Arc<dyn DataFrame>>;
-
-    /// Repartition a DataFrame based on a logical partitioning scheme.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn repartition(
-        &self,
-        partitioning_scheme: Partitioning,
-    ) -> Result<Arc<dyn DataFrame>>;
-
-    /// Executes this DataFrame and collects all results into a vector of RecordBatch.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let batches = df.collect().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    async fn collect(&self) -> Result<Vec<RecordBatch>>;
-
-    /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch
-    /// maintaining the input partitioning.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let batches = df.collect_partitioned().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    async fn collect_partitioned(&self) -> Result<Vec<Vec<RecordBatch>>>;
-
-    /// Returns the schema describing the output of this DataFrame in terms of columns returned,
-    /// where each column has a name, data type, and nullability attribute.
-
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let schema = df.schema();
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn schema(&self) -> &DFSchema;
-
-    /// Return the logical plan represented by this DataFrame.
-    fn to_logical_plan(&self) -> LogicalPlan;
-
-    /// Return a DataFrame with the explanation of its plan so far.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let batches = df.limit(100)?.explain(false)?.collect().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn explain(&self, verbose: bool) -> Result<Arc<dyn DataFrame>>;
-
-    /// Return a `FunctionRegistry` used to plan udf's calls
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let f = df.registry();
-    /// // use f.udf("name", vec![...]) to use the udf
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn registry(&self) -> Arc<dyn FunctionRegistry>;
-}
diff --git a/rust/datafusion/src/datasource/csv.rs b/rust/datafusion/src/datasource/csv.rs
deleted file mode 100644
index 6f6c9abe077..00000000000
--- a/rust/datafusion/src/datasource/csv.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CSV data source
-//!
-//! This CSV data source allows CSV files to be used as input for queries.
-//!
-//! Example:
-//!
-//! ```
-//! use datafusion::datasource::TableProvider;
-//! use datafusion::datasource::csv::{CsvFile, CsvReadOptions};
-//!
-//! let testdata = arrow::util::test_util::arrow_test_data();
-//! let csvdata = CsvFile::try_new(
-//!     &format!("{}/csv/aggregate_test_100.csv", testdata),
-//!     CsvReadOptions::new().delimiter(b'|'),
-//! ).unwrap();
-//! let schema = csvdata.schema();
-//! ```
-
-use arrow::datatypes::SchemaRef;
-use std::any::Any;
-use std::string::String;
-use std::sync::Arc;
-
-use crate::datasource::datasource::Statistics;
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Expr;
-use crate::physical_plan::csv::CsvExec;
-pub use crate::physical_plan::csv::CsvReadOptions;
-use crate::physical_plan::{common, ExecutionPlan};
-
-/// Represents a CSV file with a provided schema
-pub struct CsvFile {
-    /// Path to a single CSV file or a directory containing one of more CSV files
-    path: String,
-    schema: SchemaRef,
-    has_header: bool,
-    delimiter: u8,
-    file_extension: String,
-    statistics: Statistics,
-}
-
-impl CsvFile {
-    /// Attempt to initialize a new `CsvFile` from a file path
-    pub fn try_new(path: &str, options: CsvReadOptions) -> Result<Self> {
-        let schema = Arc::new(match options.schema {
-            Some(s) => s.clone(),
-            None => {
-                let mut filenames: Vec<String> = vec![];
-                common::build_file_list(path, &mut filenames, options.file_extension)?;
-                if filenames.is_empty() {
-                    return Err(DataFusionError::Plan(format!(
-                        "No files found at {path} with file extension {file_extension}",
-                        path = path,
-                        file_extension = options.file_extension
-                    )));
-                }
-                CsvExec::try_infer_schema(&filenames, &options)?
-            }
-        });
-
-        Ok(Self {
-            path: String::from(path),
-            schema,
-            has_header: options.has_header,
-            delimiter: options.delimiter,
-            file_extension: String::from(options.file_extension),
-            statistics: Statistics::default(),
-        })
-    }
-
-    /// Get the path for the CSV file(s) represented by this CsvFile instance
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-
-    /// Determine whether the CSV file(s) represented by this CsvFile instance have a header row
-    pub fn has_header(&self) -> bool {
-        self.has_header
-    }
-
-    /// Get the delimiter for the CSV file(s) represented by this CsvFile instance
-    pub fn delimiter(&self) -> u8 {
-        self.delimiter
-    }
-
-    /// Get the file extension for the CSV file(s) represented by this CsvFile instance
-    pub fn file_extension(&self) -> &str {
-        &self.file_extension
-    }
-}
-
-impl TableProvider for CsvFile {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        _filters: &[Expr],
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(CsvExec::try_new(
-            &self.path,
-            CsvReadOptions::new()
-                .schema(&self.schema)
-                .has_header(self.has_header)
-                .delimiter(self.delimiter)
-                .file_extension(self.file_extension.as_str()),
-            projection.clone(),
-            limit
-                .map(|l| std::cmp::min(l, batch_size))
-                .unwrap_or(batch_size),
-            limit,
-        )?))
-    }
-
-    fn statistics(&self) -> Statistics {
-        self.statistics.clone()
-    }
-}
diff --git a/rust/datafusion/src/datasource/datasource.rs b/rust/datafusion/src/datasource/datasource.rs
deleted file mode 100644
index e2b07336486..00000000000
--- a/rust/datafusion/src/datasource/datasource.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Data source traits
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::Result;
-use crate::logical_plan::Expr;
-use crate::physical_plan::ExecutionPlan;
-use crate::{arrow::datatypes::SchemaRef, scalar::ScalarValue};
-
-/// This table statistics are estimates.
-/// It can not be used directly in the precise compute
-#[derive(Debug, Clone, Default)]
-pub struct Statistics {
-    /// The number of table rows
-    pub num_rows: Option<usize>,
-    /// total byte of the table rows
-    pub total_byte_size: Option<usize>,
-    /// Statistics on a column level
-    pub column_statistics: Option<Vec<ColumnStatistics>>,
-}
-/// This table statistics are estimates about column
-#[derive(Clone, Debug, PartialEq)]
-pub struct ColumnStatistics {
-    /// Number of null values on column
-    pub null_count: Option<usize>,
-    /// Maximum value of column
-    pub max_value: Option<ScalarValue>,
-    /// Minimum value of column
-    pub min_value: Option<ScalarValue>,
-    /// Number of distinct values
-    pub distinct_count: Option<usize>,
-}
-
-/// Indicates whether and how a filter expression can be handled by a
-/// TableProvider for table scans.
-#[derive(Debug, Clone)]
-pub enum TableProviderFilterPushDown {
-    /// The expression cannot be used by the provider.
-    Unsupported,
-    /// The expression can be used to help minimise the data retrieved,
-    /// but the provider cannot guarantee that all returned tuples
-    /// satisfy the filter. The Filter plan node containing this expression
-    /// will be preserved.
-    Inexact,
-    /// The provider guarantees that all returned data satisfies this
-    /// filter expression. The Filter plan node containing this expression
-    /// will be removed.
-    Exact,
-}
-
-/// Source table
-pub trait TableProvider: Sync + Send {
-    /// Returns the table provider as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Get a reference to the schema for this table
-    fn schema(&self) -> SchemaRef;
-
-    /// Create an ExecutionPlan that will scan the table.
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        filters: &[Expr],
-        // limit can be used to reduce the amount scanned
-        // from the datasource as a performance optimization.
-        // If set, it contains the amount of rows needed by the `LogicalPlan`,
-        // The datasource should return *at least* this number of rows if available.
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// Returns the table Statistics
-    /// Statistics should be optional because not all data sources can provide statistics.
-    fn statistics(&self) -> Statistics;
-
-    /// Tests whether the table provider can make use of a filter expression
-    /// to optimise data retrieval.
-    fn supports_filter_pushdown(
-        &self,
-        _filter: &Expr,
-    ) -> Result<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Unsupported)
-    }
-}
diff --git a/rust/datafusion/src/datasource/empty.rs b/rust/datafusion/src/datasource/empty.rs
deleted file mode 100644
index e6140cdb8de..00000000000
--- a/rust/datafusion/src/datasource/empty.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! An empty plan that is usefull for testing and generating plans without mapping them to actual data.
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::datatypes::*;
-
-use crate::datasource::datasource::Statistics;
-use crate::datasource::TableProvider;
-use crate::error::Result;
-use crate::logical_plan::Expr;
-use crate::physical_plan::{empty::EmptyExec, ExecutionPlan};
-
-/// A table with a schema but no data.
-pub struct EmptyTable {
-    schema: SchemaRef,
-}
-
-impl EmptyTable {
-    /// Initialize a new `EmptyTable` from a schema.
-    pub fn new(schema: SchemaRef) -> Self {
-        Self { schema }
-    }
-}
-
-impl TableProvider for EmptyTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // even though there is no data, projections apply
-        let projection = match projection.clone() {
-            Some(p) => p,
-            None => (0..self.schema.fields().len()).collect(),
-        };
-        let projected_schema = Schema::new(
-            projection
-                .iter()
-                .map(|i| self.schema.field(*i).clone())
-                .collect(),
-        );
-        Ok(Arc::new(EmptyExec::new(false, Arc::new(projected_schema))))
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics {
-            num_rows: Some(0),
-            total_byte_size: Some(0),
-            column_statistics: None,
-        }
-    }
-}
diff --git a/rust/datafusion/src/datasource/memory.rs b/rust/datafusion/src/datasource/memory.rs
deleted file mode 100644
index af404808702..00000000000
--- a/rust/datafusion/src/datasource/memory.rs
+++ /dev/null
@@ -1,472 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! In-memory data source for presenting a Vec<RecordBatch> as a data source that can be
-//! queried by DataFusion. This allows data to be pre-loaded into memory and then
-//! repeatedly queried without incurring additional file I/O overhead.
-
-use futures::StreamExt;
-use log::debug;
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::datatypes::{Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Expr;
-use crate::physical_plan::common;
-use crate::physical_plan::memory::MemoryExec;
-use crate::physical_plan::ExecutionPlan;
-use crate::{
-    datasource::datasource::Statistics,
-    physical_plan::{repartition::RepartitionExec, Partitioning},
-};
-
-use super::datasource::ColumnStatistics;
-
-/// In-memory table
-pub struct MemTable {
-    schema: SchemaRef,
-    batches: Vec<Vec<RecordBatch>>,
-    statistics: Statistics,
-}
-
-// Calculates statistics based on partitions
-fn calculate_statistics(
-    schema: &SchemaRef,
-    partitions: &[Vec<RecordBatch>],
-) -> Statistics {
-    let num_rows: usize = partitions
-        .iter()
-        .flat_map(|batches| batches.iter().map(RecordBatch::num_rows))
-        .sum();
-
-    let mut null_count: Vec<usize> = vec![0; schema.fields().len()];
-    for partition in partitions.iter() {
-        for batch in partition {
-            for (i, array) in batch.columns().iter().enumerate() {
-                null_count[i] += array.null_count();
-            }
-        }
-    }
-
-    let column_statistics = Some(
-        null_count
-            .iter()
-            .map(|null_count| ColumnStatistics {
-                null_count: Some(*null_count),
-                distinct_count: None,
-                max_value: None,
-                min_value: None,
-            })
-            .collect(),
-    );
-
-    Statistics {
-        num_rows: Some(num_rows),
-        total_byte_size: None,
-        column_statistics,
-    }
-}
-
-impl MemTable {
-    /// Create a new in-memory table from the provided schema and record batches
-    pub fn try_new(schema: SchemaRef, partitions: Vec<Vec<RecordBatch>>) -> Result<Self> {
-        if partitions
-            .iter()
-            .flatten()
-            .all(|batches| schema.contains(&batches.schema()))
-        {
-            let statistics = calculate_statistics(&schema, &partitions);
-            debug!("MemTable statistics: {:?}", statistics);
-
-            Ok(Self {
-                schema,
-                batches: partitions,
-                statistics,
-            })
-        } else {
-            Err(DataFusionError::Plan(
-                "Mismatch between schema and batches".to_string(),
-            ))
-        }
-    }
-
-    /// Create a mem table by reading from another data source
-    pub async fn load(
-        t: Arc<dyn TableProvider>,
-        batch_size: usize,
-        output_partitions: Option<usize>,
-    ) -> Result<Self> {
-        let schema = t.schema();
-        let exec = t.scan(&None, batch_size, &[], None)?;
-        let partition_count = exec.output_partitioning().partition_count();
-
-        let tasks = (0..partition_count)
-            .map(|part_i| {
-                let exec = exec.clone();
-                tokio::spawn(async move {
-                    let stream = exec.execute(part_i).await?;
-                    common::collect(stream).await
-                })
-            })
-            // this collect *is needed* so that the join below can
-            // switch between tasks
-            .collect::<Vec<_>>();
-
-        let mut data: Vec<Vec<RecordBatch>> =
-            Vec::with_capacity(exec.output_partitioning().partition_count());
-        for task in tasks {
-            let result = task.await.expect("MemTable::load could not join task")?;
-            data.push(result);
-        }
-
-        let exec = MemoryExec::try_new(&data, schema.clone(), None)?;
-
-        if let Some(num_partitions) = output_partitions {
-            let exec = RepartitionExec::try_new(
-                Arc::new(exec),
-                Partitioning::RoundRobinBatch(num_partitions),
-            )?;
-
-            // execute and collect results
-            let mut output_partitions = vec![];
-            for i in 0..exec.output_partitioning().partition_count() {
-                // execute this *output* partition and collect all batches
-                let mut stream = exec.execute(i).await?;
-                let mut batches = vec![];
-                while let Some(result) = stream.next().await {
-                    batches.push(result?);
-                }
-                output_partitions.push(batches);
-            }
-
-            return MemTable::try_new(schema.clone(), output_partitions);
-        }
-        MemTable::try_new(schema.clone(), data)
-    }
-}
-
-impl TableProvider for MemTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let columns: Vec<usize> = match projection {
-            Some(p) => p.clone(),
-            None => {
-                let l = self.schema.fields().len();
-                let mut v = Vec::with_capacity(l);
-                for i in 0..l {
-                    v.push(i);
-                }
-                v
-            }
-        };
-
-        let projected_columns: Result<Vec<Field>> = columns
-            .iter()
-            .map(|i| {
-                if *i < self.schema.fields().len() {
-                    Ok(self.schema.field(*i).clone())
-                } else {
-                    Err(DataFusionError::Internal(
-                        "Projection index out of range".to_string(),
-                    ))
-                }
-            })
-            .collect();
-
-        let projected_schema = Arc::new(Schema::new(projected_columns?));
-
-        Ok(Arc::new(MemoryExec::try_new(
-            &self.batches.clone(),
-            projected_schema,
-            projection.clone(),
-        )?))
-    }
-
-    fn statistics(&self) -> Statistics {
-        self.statistics.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::array::Int32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use futures::StreamExt;
-    use std::collections::HashMap;
-
-    #[tokio::test]
-    async fn test_with_projection() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-            Field::new("d", DataType::Int32, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-                Arc::new(Int32Array::from(vec![None, None, Some(9)])),
-            ],
-        )?;
-
-        let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-
-        assert_eq!(provider.statistics().num_rows, Some(3));
-        assert_eq!(
-            provider.statistics().column_statistics,
-            Some(vec![
-                ColumnStatistics {
-                    null_count: Some(0),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-                ColumnStatistics {
-                    null_count: Some(0),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-                ColumnStatistics {
-                    null_count: Some(0),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-                ColumnStatistics {
-                    null_count: Some(2),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-            ])
-        );
-
-        // scan with projection
-        let exec = provider.scan(&Some(vec![2, 1]), 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        let batch2 = it.next().await.unwrap()?;
-        assert_eq!(2, batch2.schema().fields().len());
-        assert_eq!("c", batch2.schema().field(0).name());
-        assert_eq!("b", batch2.schema().field(1).name());
-        assert_eq!(2, batch2.num_columns());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_without_projection() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-
-        let exec = provider.scan(&None, 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        let batch1 = it.next().await.unwrap()?;
-        assert_eq!(3, batch1.schema().fields().len());
-        assert_eq!(3, batch1.num_columns());
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_invalid_projection() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-
-        let projection: Vec<usize> = vec![0, 4];
-
-        match provider.scan(&Some(projection), 1024, &[], None) {
-            Err(DataFusionError::Internal(e)) => {
-                assert_eq!("\"Projection index out of range\"", format!("{:?}", e))
-            }
-            _ => panic!("Scan should failed on invalid projection"),
-        };
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_validation_incompatible_column() -> Result<()> {
-        let schema1 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let schema2 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Float64, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema1,
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        match MemTable::try_new(schema2, vec![vec![batch]]) {
-            Err(DataFusionError::Plan(e)) => assert_eq!(
-                "\"Mismatch between schema and batches\"",
-                format!("{:?}", e)
-            ),
-            _ => panic!("MemTable::new should have failed due to schema mismatch"),
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_validation_different_column_count() -> Result<()> {
-        let schema1 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let schema2 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema1,
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![7, 5, 9])),
-            ],
-        )?;
-
-        match MemTable::try_new(schema2, vec![vec![batch]]) {
-            Err(DataFusionError::Plan(e)) => assert_eq!(
-                "\"Mismatch between schema and batches\"",
-                format!("{:?}", e)
-            ),
-            _ => panic!("MemTable::new should have failed due to schema mismatch"),
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_merged_schema() -> Result<()> {
-        let mut metadata = HashMap::new();
-        metadata.insert("foo".to_string(), "bar".to_string());
-
-        let schema1 = Schema::new_with_metadata(
-            vec![
-                Field::new("a", DataType::Int32, false),
-                Field::new("b", DataType::Int32, false),
-                Field::new("c", DataType::Int32, false),
-            ],
-            // test for comparing metadata
-            metadata,
-        );
-
-        let schema2 = Schema::new(vec![
-            // test for comparing nullability
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]);
-
-        let merged_schema = Schema::try_merge(vec![schema1.clone(), schema2.clone()])?;
-
-        let batch1 = RecordBatch::try_new(
-            Arc::new(schema1),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let batch2 = RecordBatch::try_new(
-            Arc::new(schema2),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let provider =
-            MemTable::try_new(Arc::new(merged_schema), vec![vec![batch1, batch2]])?;
-
-        let exec = provider.scan(&None, 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        let batch1 = it.next().await.unwrap()?;
-        assert_eq!(3, batch1.schema().fields().len());
-        assert_eq!(3, batch1.num_columns());
-        assert_eq!(provider.statistics().num_rows, Some(6));
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/datasource/mod.rs b/rust/datafusion/src/datasource/mod.rs
deleted file mode 100644
index 099098dd6f6..00000000000
--- a/rust/datafusion/src/datasource/mod.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFusion data sources
-
-pub mod csv;
-pub mod datasource;
-pub mod empty;
-pub mod memory;
-pub mod parquet;
-
-pub use self::csv::{CsvFile, CsvReadOptions};
-pub use self::datasource::TableProvider;
-pub use self::memory::MemTable;
diff --git a/rust/datafusion/src/datasource/parquet.rs b/rust/datafusion/src/datasource/parquet.rs
deleted file mode 100644
index 30e47df5f64..00000000000
--- a/rust/datafusion/src/datasource/parquet.rs
+++ /dev/null
@@ -1,373 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet data source
-
-use std::any::Any;
-use std::string::String;
-use std::sync::Arc;
-
-use arrow::datatypes::*;
-
-use crate::datasource::datasource::Statistics;
-use crate::datasource::TableProvider;
-use crate::error::Result;
-use crate::logical_plan::{combine_filters, Expr};
-use crate::physical_plan::parquet::ParquetExec;
-use crate::physical_plan::ExecutionPlan;
-
-use super::datasource::TableProviderFilterPushDown;
-
-/// Table-based representation of a `ParquetFile`.
-pub struct ParquetTable {
-    path: String,
-    schema: SchemaRef,
-    statistics: Statistics,
-    max_concurrency: usize,
-}
-
-impl ParquetTable {
-    /// Attempt to initialize a new `ParquetTable` from a file path.
-    pub fn try_new(path: &str, max_concurrency: usize) -> Result<Self> {
-        let parquet_exec = ParquetExec::try_from_path(path, None, None, 0, 1, None)?;
-        let schema = parquet_exec.schema();
-        Ok(Self {
-            path: path.to_string(),
-            schema,
-            statistics: parquet_exec.statistics().to_owned(),
-            max_concurrency,
-        })
-    }
-
-    /// Get the path for the Parquet file(s) represented by this ParquetTable instance
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-}
-
-impl TableProvider for ParquetTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this parquet file.
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn supports_filter_pushdown(
-        &self,
-        _filter: &Expr,
-    ) -> Result<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Inexact)
-    }
-
-    /// Scan the file(s), using the provided projection, and return one BatchIterator per
-    /// partition.
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        filters: &[Expr],
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let predicate = combine_filters(filters);
-        Ok(Arc::new(ParquetExec::try_from_path(
-            &self.path,
-            projection.clone(),
-            predicate,
-            limit
-                .map(|l| std::cmp::min(l, batch_size))
-                .unwrap_or(batch_size),
-            self.max_concurrency,
-            limit,
-        )?))
-    }
-
-    fn statistics(&self) -> Statistics {
-        self.statistics.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::array::{
-        BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array,
-        TimestampNanosecondArray,
-    };
-    use arrow::record_batch::RecordBatch;
-    use futures::StreamExt;
-
-    #[tokio::test]
-    async fn read_small_batches() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = None;
-        let exec = table.scan(&projection, 2, &[], None)?;
-        let stream = exec.execute(0).await?;
-
-        let _ = stream
-            .map(|batch| {
-                let batch = batch.unwrap();
-                assert_eq!(11, batch.num_columns());
-                assert_eq!(2, batch.num_rows());
-            })
-            .fold(0, |acc, _| async move { acc + 1i32 })
-            .await;
-
-        // test metadata
-        assert_eq!(table.statistics().num_rows, Some(8));
-        assert_eq!(table.statistics().total_byte_size, Some(671));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-
-        let x: Vec<String> = table
-            .schema()
-            .fields()
-            .iter()
-            .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
-            .collect();
-        let y = x.join("\n");
-        assert_eq!(
-            "id: Int32\n\
-             bool_col: Boolean\n\
-             tinyint_col: Int32\n\
-             smallint_col: Int32\n\
-             int_col: Int32\n\
-             bigint_col: Int64\n\
-             float_col: Float32\n\
-             double_col: Float64\n\
-             date_string_col: Binary\n\
-             string_col: Binary\n\
-             timestamp_col: Timestamp(Nanosecond, None)",
-            y
-        );
-
-        let projection = None;
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(11, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_bool_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![1]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .unwrap();
-        let mut values: Vec<bool> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!(
-            "[true, false, true, false, true, false, true, false]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_i32_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![0]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        let mut values: Vec<i32> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!("[4, 5, 6, 7, 2, 3, 0, 1]", format!("{:?}", values));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_i96_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![10]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<TimestampNanosecondArray>()
-            .unwrap();
-        let mut values: Vec<i64> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!("[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]", format!("{:?}", values));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_f32_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![6]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Float32Array>()
-            .unwrap();
-        let mut values: Vec<f32> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!(
-            "[0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_f64_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![7]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        let mut values: Vec<f64> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!(
-            "[0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_binary_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![9]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<BinaryArray>()
-            .unwrap();
-        let mut values: Vec<&str> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(std::str::from_utf8(array.value(i)).unwrap());
-        }
-
-        assert_eq!(
-            "[\"0\", \"1\", \"0\", \"1\", \"0\", \"1\", \"0\", \"1\"]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    fn load_table(name: &str) -> Result<Arc<dyn TableProvider>> {
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let filename = format!("{}/{}", testdata, name);
-        let table = ParquetTable::try_new(&filename, 2)?;
-        Ok(Arc::new(table))
-    }
-
-    async fn get_first_batch(
-        table: Arc<dyn TableProvider>,
-        projection: &Option<Vec<usize>>,
-    ) -> Result<RecordBatch> {
-        let exec = table.scan(projection, 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        it.next()
-            .await
-            .expect("should have received at least one batch")
-            .map_err(|e| e.into())
-    }
-
-    #[test]
-    fn combine_zero_filters() {
-        let result = combine_filters(&[]);
-        assert_eq!(result, None);
-    }
-
-    #[test]
-    fn combine_one_filter() {
-        use crate::logical_plan::{binary_expr, col, lit, Operator};
-        let filter = binary_expr(col("c1"), Operator::Lt, lit(1));
-        let result = combine_filters(&[filter.clone()]);
-        assert_eq!(result, Some(filter));
-    }
-
-    #[test]
-    fn combine_multiple_filters() {
-        use crate::logical_plan::{and, binary_expr, col, lit, Operator};
-        let filter1 = binary_expr(col("c1"), Operator::Lt, lit(1));
-        let filter2 = binary_expr(col("c2"), Operator::Lt, lit(2));
-        let filter3 = binary_expr(col("c3"), Operator::Lt, lit(3));
-        let result =
-            combine_filters(&[filter1.clone(), filter2.clone(), filter3.clone()]);
-        assert_eq!(result, Some(and(and(filter1, filter2), filter3)));
-    }
-}
diff --git a/rust/datafusion/src/error.rs b/rust/datafusion/src/error.rs
deleted file mode 100644
index 903faeabf69..00000000000
--- a/rust/datafusion/src/error.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFusion error types
-
-use std::error;
-use std::fmt::{Display, Formatter};
-use std::io;
-use std::result;
-
-use arrow::error::ArrowError;
-use parquet::errors::ParquetError;
-use sqlparser::parser::ParserError;
-
-/// Result type for operations that could result in an [DataFusionError]
-pub type Result<T> = result::Result<T, DataFusionError>;
-
-/// DataFusion error
-#[derive(Debug)]
-#[allow(missing_docs)]
-pub enum DataFusionError {
-    /// Error returned by arrow.
-    ArrowError(ArrowError),
-    /// Wraps an error from the Parquet crate
-    ParquetError(ParquetError),
-    /// Error associated to I/O operations and associated traits.
-    IoError(io::Error),
-    /// Error returned when SQL is syntactically incorrect.
-    SQL(ParserError),
-    /// Error returned on a branch that we know it is possible
-    /// but to which we still have no implementation for.
-    /// Often, these errors are tracked in our issue tracker.
-    NotImplemented(String),
-    /// Error returned as a consequence of an error in DataFusion.
-    /// This error should not happen in normal usage of DataFusion.
-    // DataFusions has internal invariants that we are unable to ask the compiler to check for us.
-    // This error is raised when one of those invariants is not verified during execution.
-    Internal(String),
-    /// This error happens whenever a plan is not valid. Examples include
-    /// impossible casts, schema inference not possible and non-unique column names.
-    Plan(String),
-    /// Error returned during execution of the query.
-    /// Examples include files not found, errors in parsing certain types.
-    Execution(String),
-}
-
-impl DataFusionError {
-    /// Wraps this [DataFusionError] as an [arrow::error::ArrowError].
-    pub fn into_arrow_external_error(self) -> ArrowError {
-        ArrowError::from_external_error(Box::new(self))
-    }
-}
-
-impl From<io::Error> for DataFusionError {
-    fn from(e: io::Error) -> Self {
-        DataFusionError::IoError(e)
-    }
-}
-
-impl From<ArrowError> for DataFusionError {
-    fn from(e: ArrowError) -> Self {
-        DataFusionError::ArrowError(e)
-    }
-}
-
-impl From<ParquetError> for DataFusionError {
-    fn from(e: ParquetError) -> Self {
-        DataFusionError::ParquetError(e)
-    }
-}
-
-impl From<ParserError> for DataFusionError {
-    fn from(e: ParserError) -> Self {
-        DataFusionError::SQL(e)
-    }
-}
-
-impl Display for DataFusionError {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match *self {
-            DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
-            DataFusionError::ParquetError(ref desc) => {
-                write!(f, "Parquet error: {}", desc)
-            }
-            DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
-            DataFusionError::SQL(ref desc) => {
-                write!(f, "SQL error: {:?}", desc)
-            }
-            DataFusionError::NotImplemented(ref desc) => {
-                write!(f, "This feature is not implemented: {}", desc)
-            }
-            DataFusionError::Internal(ref desc) => {
-                write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
-                    code and we would welcome that you file an bug report in our issue tracker", desc)
-            }
-            DataFusionError::Plan(ref desc) => {
-                write!(f, "Error during planning: {}", desc)
-            }
-            DataFusionError::Execution(ref desc) => {
-                write!(f, "Execution error: {}", desc)
-            }
-        }
-    }
-}
-
-impl error::Error for DataFusionError {}
diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
deleted file mode 100644
index ce0ea6d0050..00000000000
--- a/rust/datafusion/src/execution/context.rs
+++ /dev/null
@@ -1,2999 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! ExecutionContext contains methods for registering data sources and executing queries
-use crate::{
-    catalog::{
-        catalog::{CatalogList, MemoryCatalogList},
-        information_schema::CatalogWithInformationSchema,
-    },
-    optimizer::hash_build_probe_order::HashBuildProbeOrder,
-    physical_optimizer::optimizer::PhysicalOptimizerRule,
-};
-use log::debug;
-use std::fs;
-use std::path::Path;
-use std::string::String;
-use std::sync::Arc;
-use std::{
-    collections::{HashMap, HashSet},
-    sync::Mutex,
-};
-
-use futures::{StreamExt, TryStreamExt};
-use tokio::task::{self, JoinHandle};
-
-use arrow::csv;
-
-use crate::catalog::{
-    catalog::{CatalogProvider, MemoryCatalogProvider},
-    schema::{MemorySchemaProvider, SchemaProvider},
-    ResolvedTableReference, TableReference,
-};
-use crate::datasource::csv::CsvFile;
-use crate::datasource::parquet::ParquetTable;
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::execution::dataframe_impl::DataFrameImpl;
-use crate::logical_plan::{
-    FunctionRegistry, LogicalPlan, LogicalPlanBuilder, ToDFSchema,
-};
-use crate::optimizer::constant_folding::ConstantFolding;
-use crate::optimizer::filter_push_down::FilterPushDown;
-use crate::optimizer::limit_push_down::LimitPushDown;
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::projection_push_down::ProjectionPushDown;
-use crate::physical_optimizer::coalesce_batches::CoalesceBatches;
-use crate::physical_optimizer::merge_exec::AddMergeExec;
-use crate::physical_optimizer::repartition::Repartition;
-
-use crate::physical_plan::csv::CsvReadOptions;
-use crate::physical_plan::planner::DefaultPhysicalPlanner;
-use crate::physical_plan::udf::ScalarUDF;
-use crate::physical_plan::ExecutionPlan;
-use crate::physical_plan::PhysicalPlanner;
-use crate::sql::{
-    parser::{DFParser, FileType},
-    planner::{ContextProvider, SqlToRel},
-};
-use crate::variable::{VarProvider, VarType};
-use crate::{dataframe::DataFrame, physical_plan::udaf::AggregateUDF};
-use parquet::arrow::ArrowWriter;
-use parquet::file::properties::WriterProperties;
-
-/// ExecutionContext is the main interface for executing queries with DataFusion. The context
-/// provides the following functionality:
-///
-/// * Create DataFrame from a CSV or Parquet data source.
-/// * Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
-/// * Register a custom data source that can be referenced from a SQL query.
-/// * Execution a SQL query
-///
-/// The following example demonstrates how to use the context to execute a query against a CSV
-/// data source using the DataFrame API:
-///
-/// ```
-/// use datafusion::prelude::*;
-/// # use datafusion::error::Result;
-/// # fn main() -> Result<()> {
-/// let mut ctx = ExecutionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-/// let df = df.filter(col("a").lt_eq(col("b")))?
-///            .aggregate(vec![col("a")], vec![min(col("b"))])?
-///            .limit(100)?;
-/// let results = df.collect();
-/// # Ok(())
-/// # }
-/// ```
-///
-/// The following example demonstrates how to execute the same query using SQL:
-///
-/// ```
-/// use datafusion::prelude::*;
-///
-/// # use datafusion::error::Result;
-/// # fn main() -> Result<()> {
-/// let mut ctx = ExecutionContext::new();
-/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
-/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
-/// # Ok(())
-/// # }
-/// ```
-#[derive(Clone)]
-pub struct ExecutionContext {
-    /// Internal state for the context
-    pub state: Arc<Mutex<ExecutionContextState>>,
-}
-
-impl ExecutionContext {
-    /// Creates a new execution context using a default configuration.
-    pub fn new() -> Self {
-        Self::with_config(ExecutionConfig::new())
-    }
-
-    /// Creates a new execution context using the provided configuration.
-    pub fn with_config(config: ExecutionConfig) -> Self {
-        let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
-
-        if config.create_default_catalog_and_schema {
-            let default_catalog = MemoryCatalogProvider::new();
-
-            default_catalog.register_schema(
-                config.default_schema.clone(),
-                Arc::new(MemorySchemaProvider::new()),
-            );
-
-            let default_catalog: Arc<dyn CatalogProvider> = if config.information_schema {
-                Arc::new(CatalogWithInformationSchema::new(
-                    catalog_list.clone(),
-                    Arc::new(default_catalog),
-                ))
-            } else {
-                Arc::new(default_catalog)
-            };
-
-            catalog_list
-                .register_catalog(config.default_catalog.clone(), default_catalog);
-        }
-
-        Self {
-            state: Arc::new(Mutex::new(ExecutionContextState {
-                catalog_list,
-                scalar_functions: HashMap::new(),
-                var_provider: HashMap::new(),
-                aggregate_functions: HashMap::new(),
-                config,
-            })),
-        }
-    }
-
-    /// Creates a dataframe that will execute a SQL query.
-    pub fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>> {
-        let plan = self.create_logical_plan(sql)?;
-        match plan {
-            LogicalPlan::CreateExternalTable {
-                ref schema,
-                ref name,
-                ref location,
-                ref file_type,
-                ref has_header,
-            } => match file_type {
-                FileType::CSV => {
-                    self.register_csv(
-                        name,
-                        location,
-                        CsvReadOptions::new()
-                            .schema(&schema.as_ref().to_owned().into())
-                            .has_header(*has_header),
-                    )?;
-                    let plan = LogicalPlanBuilder::empty(false).build()?;
-                    Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan)))
-                }
-                FileType::Parquet => {
-                    self.register_parquet(name, location)?;
-                    let plan = LogicalPlanBuilder::empty(false).build()?;
-                    Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan)))
-                }
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Unsupported file type {:?}.",
-                    file_type
-                ))),
-            },
-
-            plan => Ok(Arc::new(DataFrameImpl::new(
-                self.state.clone(),
-                &self.optimize(&plan)?,
-            ))),
-        }
-    }
-
-    /// Creates a logical plan.
-    ///
-    /// This function is intended for internal use and should not be called directly.
-    pub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan> {
-        let statements = DFParser::parse_sql(sql)?;
-
-        if statements.len() != 1 {
-            return Err(DataFusionError::NotImplemented(
-                "The context currently only supports a single SQL statement".to_string(),
-            ));
-        }
-
-        // create a query planner
-        let state = self.state.lock().unwrap().clone();
-        let query_planner = SqlToRel::new(&state);
-        query_planner.statement_to_plan(&statements[0])
-    }
-
-    /// Registers a variable provider within this context.
-    pub fn register_variable(
-        &mut self,
-        variable_type: VarType,
-        provider: Arc<dyn VarProvider + Send + Sync>,
-    ) {
-        self.state
-            .lock()
-            .unwrap()
-            .var_provider
-            .insert(variable_type, provider);
-    }
-
-    /// Registers a scalar UDF within this context.
-    ///
-    /// Note in SQL queries, function names are looked up using
-    /// lowercase unless the query uses quotes. For example,
-    ///
-    /// `SELECT MY_FUNC(x)...` will look for a function named `"my_func"`
-    /// `SELECT "my_FUNC"(x)` will look for a function named `"my_FUNC"`
-    pub fn register_udf(&mut self, f: ScalarUDF) {
-        self.state
-            .lock()
-            .unwrap()
-            .scalar_functions
-            .insert(f.name.clone(), Arc::new(f));
-    }
-
-    /// Registers an aggregate UDF within this context.
-    ///
-    /// Note in SQL queries, aggregate names are looked up using
-    /// lowercase unless the query uses quotes. For example,
-    ///
-    /// `SELECT MY_UDAF(x)...` will look for an aggregate named `"my_udaf"`
-    /// `SELECT "my_UDAF"(x)` will look for an aggregate named `"my_UDAF"`
-    pub fn register_udaf(&mut self, f: AggregateUDF) {
-        self.state
-            .lock()
-            .unwrap()
-            .aggregate_functions
-            .insert(f.name.clone(), Arc::new(f));
-    }
-
-    /// Creates a DataFrame for reading a CSV data source.
-    pub fn read_csv(
-        &mut self,
-        filename: &str,
-        options: CsvReadOptions,
-    ) -> Result<Arc<dyn DataFrame>> {
-        Ok(Arc::new(DataFrameImpl::new(
-            self.state.clone(),
-            &LogicalPlanBuilder::scan_csv(&filename, options, None)?.build()?,
-        )))
-    }
-
-    /// Creates a DataFrame for reading a Parquet data source.
-    pub fn read_parquet(&mut self, filename: &str) -> Result<Arc<dyn DataFrame>> {
-        Ok(Arc::new(DataFrameImpl::new(
-            self.state.clone(),
-            &LogicalPlanBuilder::scan_parquet(
-                &filename,
-                None,
-                self.state.lock().unwrap().config.concurrency,
-            )?
-            .build()?,
-        )))
-    }
-
-    /// Creates a DataFrame for reading a custom TableProvider.
-    pub fn read_table(
-        &mut self,
-        provider: Arc<dyn TableProvider>,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let schema = provider.schema();
-        let table_scan = LogicalPlan::TableScan {
-            table_name: "".to_string(),
-            source: provider,
-            projected_schema: schema.to_dfschema_ref()?,
-            projection: None,
-            filters: vec![],
-            limit: None,
-        };
-        Ok(Arc::new(DataFrameImpl::new(
-            self.state.clone(),
-            &LogicalPlanBuilder::from(&table_scan).build()?,
-        )))
-    }
-
-    /// Registers a CSV data source so that it can be referenced from SQL statements
-    /// executed against this context.
-    pub fn register_csv(
-        &mut self,
-        name: &str,
-        filename: &str,
-        options: CsvReadOptions,
-    ) -> Result<()> {
-        self.register_table(name, Arc::new(CsvFile::try_new(filename, options)?))?;
-        Ok(())
-    }
-
-    /// Registers a Parquet data source so that it can be referenced from SQL statements
-    /// executed against this context.
-    pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()> {
-        let table = ParquetTable::try_new(
-            &filename,
-            self.state.lock().unwrap().config.concurrency,
-        )?;
-        self.register_table(name, Arc::new(table))?;
-        Ok(())
-    }
-
-    /// Registers a named catalog using a custom `CatalogProvider` so that
-    /// it can be referenced from SQL statements executed against this
-    /// context.
-    ///
-    /// Returns the `CatalogProvider` previously registered for this
-    /// name, if any
-    pub fn register_catalog(
-        &self,
-        name: impl Into<String>,
-        catalog: Arc<dyn CatalogProvider>,
-    ) -> Option<Arc<dyn CatalogProvider>> {
-        let name = name.into();
-
-        let state = self.state.lock().unwrap();
-        let catalog = if state.config.information_schema {
-            Arc::new(CatalogWithInformationSchema::new(
-                state.catalog_list.clone(),
-                catalog,
-            ))
-        } else {
-            catalog
-        };
-
-        state.catalog_list.register_catalog(name, catalog)
-    }
-
-    /// Retrieves a `CatalogProvider` instance by name
-    pub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
-        self.state.lock().unwrap().catalog_list.catalog(name)
-    }
-
-    /// Registers a table using a custom `TableProvider` so that
-    /// it can be referenced from SQL statements executed against this
-    /// context.
-    ///
-    /// Returns the `TableProvider` previously registered for this
-    /// reference, if any
-    pub fn register_table<'a>(
-        &'a mut self,
-        table_ref: impl Into<TableReference<'a>>,
-        provider: Arc<dyn TableProvider>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        let table_ref = table_ref.into();
-        self.state
-            .lock()
-            .unwrap()
-            .schema_for_ref(table_ref)?
-            .register_table(table_ref.table().to_owned(), provider)
-    }
-
-    /// Deregisters the given table.
-    ///
-    /// Returns the registered provider, if any
-    pub fn deregister_table<'a>(
-        &'a mut self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        let table_ref = table_ref.into();
-        self.state
-            .lock()
-            .unwrap()
-            .schema_for_ref(table_ref)?
-            .deregister_table(table_ref.table())
-    }
-
-    /// Retrieves a DataFrame representing a table previously registered by calling the
-    /// register_table function.
-    ///
-    /// Returns an error if no table has been registered with the provided reference.
-    pub fn table<'a>(
-        &self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let table_ref = table_ref.into();
-        let schema = self.state.lock().unwrap().schema_for_ref(table_ref)?;
-
-        match schema.table(table_ref.table()) {
-            Some(ref provider) => {
-                let schema = provider.schema();
-                let table_scan = LogicalPlan::TableScan {
-                    table_name: table_ref.table().to_owned(),
-                    source: Arc::clone(provider),
-                    projected_schema: schema.to_dfschema_ref()?,
-                    projection: None,
-                    filters: vec![],
-                    limit: None,
-                };
-                Ok(Arc::new(DataFrameImpl::new(
-                    self.state.clone(),
-                    &LogicalPlanBuilder::from(&table_scan).build()?,
-                )))
-            }
-            _ => Err(DataFusionError::Plan(format!(
-                "No table named '{}'",
-                table_ref.table()
-            ))),
-        }
-    }
-
-    /// Returns the set of available tables in the default catalog and schema.
-    ///
-    /// Use [`table`] to get a specific table.
-    ///
-    /// [`table`]: ExecutionContext::table
-    #[deprecated(
-        note = "Please use the catalog provider interface (`ExecutionContext::catalog`) to examine available catalogs, schemas, and tables"
-    )]
-    pub fn tables(&self) -> Result<HashSet<String>> {
-        Ok(self
-            .state
-            .lock()
-            .unwrap()
-            // a bare reference will always resolve to the default catalog and schema
-            .schema_for_ref(TableReference::Bare { table: "" })?
-            .table_names()
-            .iter()
-            .cloned()
-            .collect())
-    }
-
-    /// Optimizes the logical plan by applying optimizer rules.
-    pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        let optimizers = &self.state.lock().unwrap().config.optimizers;
-
-        let mut new_plan = plan.clone();
-        debug!("Logical plan:\n {:?}", plan);
-        for optimizer in optimizers {
-            new_plan = optimizer.optimize(&new_plan)?;
-        }
-        debug!("Optimized logical plan:\n {:?}", new_plan);
-        Ok(new_plan)
-    }
-
-    /// Creates a physical plan from a logical plan.
-    pub fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let state = self.state.lock().unwrap();
-        state
-            .config
-            .query_planner
-            .create_physical_plan(logical_plan, &state)
-    }
-
-    /// Executes a query and writes the results to a partitioned CSV file.
-    pub async fn write_csv(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        path: String,
-    ) -> Result<()> {
-        // create directory to contain the CSV files (one per partition)
-        let fs_path = Path::new(&path);
-        match fs::create_dir(fs_path) {
-            Ok(()) => {
-                let mut tasks = vec![];
-                for i in 0..plan.output_partitioning().partition_count() {
-                    let plan = plan.clone();
-                    let filename = format!("part-{}.csv", i);
-                    let path = fs_path.join(&filename);
-                    let file = fs::File::create(path)?;
-                    let mut writer = csv::Writer::new(file);
-                    let stream = plan.execute(i).await?;
-                    let handle: JoinHandle<Result<()>> = task::spawn(async move {
-                        stream
-                            .map(|batch| writer.write(&batch?))
-                            .try_collect()
-                            .await
-                            .map_err(DataFusionError::from)
-                    });
-                    tasks.push(handle);
-                }
-                futures::future::join_all(tasks).await;
-                Ok(())
-            }
-            Err(e) => Err(DataFusionError::Execution(format!(
-                "Could not create directory {}: {:?}",
-                path, e
-            ))),
-        }
-    }
-
-    /// Executes a query and writes the results to a partitioned Parquet file.
-    pub async fn write_parquet(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        path: String,
-        writer_properties: Option<WriterProperties>,
-    ) -> Result<()> {
-        // create directory to contain the Parquet files (one per partition)
-        let fs_path = Path::new(&path);
-        match fs::create_dir(fs_path) {
-            Ok(()) => {
-                let mut tasks = vec![];
-                for i in 0..plan.output_partitioning().partition_count() {
-                    let plan = plan.clone();
-                    let filename = format!("part-{}.parquet", i);
-                    let path = fs_path.join(&filename);
-                    let file = fs::File::create(path)?;
-                    let mut writer = ArrowWriter::try_new(
-                        file.try_clone().unwrap(),
-                        plan.schema(),
-                        writer_properties.clone(),
-                    )?;
-                    let stream = plan.execute(i).await?;
-                    let handle: JoinHandle<Result<()>> = task::spawn(async move {
-                        stream
-                            .map(|batch| writer.write(&batch?))
-                            .try_collect()
-                            .await
-                            .map_err(DataFusionError::from)?;
-                        writer.close().map_err(DataFusionError::from).map(|_| ())
-                    });
-                    tasks.push(handle);
-                }
-                futures::future::join_all(tasks).await;
-                Ok(())
-            }
-            Err(e) => Err(DataFusionError::Execution(format!(
-                "Could not create directory {}: {:?}",
-                path, e
-            ))),
-        }
-    }
-}
-
-impl From<Arc<Mutex<ExecutionContextState>>> for ExecutionContext {
-    fn from(state: Arc<Mutex<ExecutionContextState>>) -> Self {
-        ExecutionContext { state }
-    }
-}
-
-impl FunctionRegistry for ExecutionContext {
-    fn udfs(&self) -> HashSet<String> {
-        self.state.lock().unwrap().udfs()
-    }
-
-    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
-        self.state.lock().unwrap().udf(name)
-    }
-
-    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
-        self.state.lock().unwrap().udaf(name)
-    }
-}
-
-/// A planner used to add extensions to DataFusion logical and physical plans.
-pub trait QueryPlanner {
-    /// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-}
-
-/// The query planner used if no user defined planner is provided
-struct DefaultQueryPlanner {}
-
-impl QueryPlanner for DefaultQueryPlanner {
-    /// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let planner = DefaultPhysicalPlanner::default();
-        planner.create_physical_plan(logical_plan, ctx_state)
-    }
-}
-
-/// Configuration options for execution context
-#[derive(Clone)]
-pub struct ExecutionConfig {
-    /// Number of concurrent threads for query execution.
-    pub concurrency: usize,
-    /// Default batch size when reading data sources
-    pub batch_size: usize,
-    /// Responsible for optimizing a logical plan
-    optimizers: Vec<Arc<dyn OptimizerRule + Send + Sync>>,
-    /// Responsible for optimizing a physical execution plan
-    pub physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
-    /// Responsible for planning `LogicalPlan`s, and `ExecutionPlan`
-    query_planner: Arc<dyn QueryPlanner + Send + Sync>,
-    /// Default catalog name for table resolution
-    default_catalog: String,
-    /// Default schema name for table resolution
-    default_schema: String,
-    /// Whether the default catalog and schema should be created automatically
-    create_default_catalog_and_schema: bool,
-    /// Should DataFusion provide access to `information_schema`
-    /// virtual tables for displaying schema information
-    information_schema: bool,
-    /// Should DataFusion repartition data using the join keys to execute joins in parallel
-    /// using the provided `concurrency` level
-    pub repartition_joins: bool,
-}
-
-impl ExecutionConfig {
-    /// Create an execution config with default setting
-    pub fn new() -> Self {
-        Self {
-            concurrency: num_cpus::get(),
-            batch_size: 8192,
-            optimizers: vec![
-                Arc::new(ConstantFolding::new()),
-                Arc::new(ProjectionPushDown::new()),
-                Arc::new(FilterPushDown::new()),
-                Arc::new(HashBuildProbeOrder::new()),
-                Arc::new(LimitPushDown::new()),
-            ],
-            physical_optimizers: vec![
-                Arc::new(Repartition::new()),
-                Arc::new(CoalesceBatches::new()),
-                Arc::new(AddMergeExec::new()),
-            ],
-            query_planner: Arc::new(DefaultQueryPlanner {}),
-            default_catalog: "datafusion".to_owned(),
-            default_schema: "public".to_owned(),
-            create_default_catalog_and_schema: true,
-            information_schema: false,
-            repartition_joins: true,
-        }
-    }
-
-    /// Customize max_concurrency
-    pub fn with_concurrency(mut self, n: usize) -> Self {
-        // concurrency must be greater than zero
-        assert!(n > 0);
-        self.concurrency = n;
-        self
-    }
-
-    /// Customize batch size
-    pub fn with_batch_size(mut self, n: usize) -> Self {
-        // batch size must be greater than zero
-        assert!(n > 0);
-        self.batch_size = n;
-        self
-    }
-
-    /// Replace the default query planner
-    pub fn with_query_planner(
-        mut self,
-        query_planner: Arc<dyn QueryPlanner + Send + Sync>,
-    ) -> Self {
-        self.query_planner = query_planner;
-        self
-    }
-
-    /// Adds a new [`OptimizerRule`]
-    pub fn add_optimizer_rule(
-        mut self,
-        optimizer_rule: Arc<dyn OptimizerRule + Send + Sync>,
-    ) -> Self {
-        self.optimizers.push(optimizer_rule);
-        self
-    }
-
-    /// Adds a new [`PhysicalOptimizerRule`]
-    pub fn add_physical_optimizer_rule(
-        mut self,
-        optimizer_rule: Arc<dyn PhysicalOptimizerRule + Send + Sync>,
-    ) -> Self {
-        self.physical_optimizers.push(optimizer_rule);
-        self
-    }
-
-    /// Selects a name for the default catalog and schema
-    pub fn with_default_catalog_and_schema(
-        mut self,
-        catalog: impl Into<String>,
-        schema: impl Into<String>,
-    ) -> Self {
-        self.default_catalog = catalog.into();
-        self.default_schema = schema.into();
-        self
-    }
-
-    /// Controls whether the default catalog and schema will be automatically created
-    pub fn create_default_catalog_and_schema(mut self, create: bool) -> Self {
-        self.create_default_catalog_and_schema = create;
-        self
-    }
-
-    /// Enables or disables the inclusion of `information_schema` virtual tables
-    pub fn with_information_schema(mut self, enabled: bool) -> Self {
-        self.information_schema = enabled;
-        self
-    }
-
-    /// Enables or disables the use of repartitioning for joins to improve parallelism
-    pub fn with_repartition_joins(mut self, enabled: bool) -> Self {
-        self.repartition_joins = enabled;
-        self
-    }
-}
-
-/// Execution context for registering data sources and executing queries
-#[derive(Clone)]
-pub struct ExecutionContextState {
-    /// Collection of catalogs containing schemas and ultimately TableProviders
-    pub catalog_list: Arc<dyn CatalogList>,
-    /// Scalar functions that are registered with the context
-    pub scalar_functions: HashMap<String, Arc<ScalarUDF>>,
-    /// Variable provider that are registered with the context
-    pub var_provider: HashMap<VarType, Arc<dyn VarProvider + Send + Sync>>,
-    /// Aggregate functions registered in the context
-    pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
-    /// Context configuration
-    pub config: ExecutionConfig,
-}
-
-impl ExecutionContextState {
-    fn resolve_table_ref<'a>(
-        &'a self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> ResolvedTableReference<'a> {
-        table_ref
-            .into()
-            .resolve(&self.config.default_catalog, &self.config.default_schema)
-    }
-
-    fn schema_for_ref<'a>(
-        &'a self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> Result<Arc<dyn SchemaProvider>> {
-        let resolved_ref = self.resolve_table_ref(table_ref.into());
-
-        self.catalog_list
-            .catalog(resolved_ref.catalog)
-            .ok_or_else(|| {
-                DataFusionError::Plan(format!(
-                    "failed to resolve catalog: {}",
-                    resolved_ref.catalog
-                ))
-            })?
-            .schema(resolved_ref.schema)
-            .ok_or_else(|| {
-                DataFusionError::Plan(format!(
-                    "failed to resolve schema: {}",
-                    resolved_ref.schema
-                ))
-            })
-    }
-}
-
-impl ContextProvider for ExecutionContextState {
-    fn get_table_provider(&self, name: TableReference) -> Option<Arc<dyn TableProvider>> {
-        let resolved_ref = self.resolve_table_ref(name);
-        let schema = self.schema_for_ref(resolved_ref).ok()?;
-        schema.table(resolved_ref.table)
-    }
-
-    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-        self.scalar_functions.get(name).cloned()
-    }
-
-    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
-        self.aggregate_functions.get(name).cloned()
-    }
-}
-
-impl FunctionRegistry for ExecutionContextState {
-    fn udfs(&self) -> HashSet<String> {
-        self.scalar_functions.keys().cloned().collect()
-    }
-
-    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
-        let result = self.scalar_functions.get(name);
-
-        result.cloned().ok_or_else(|| {
-            DataFusionError::Plan(format!(
-                "There is no UDF named \"{}\" in the registry",
-                name
-            ))
-        })
-    }
-
-    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
-        let result = self.aggregate_functions.get(name);
-
-        result.cloned().ok_or_else(|| {
-            DataFusionError::Plan(format!(
-                "There is no UDAF named \"{}\" in the registry",
-                name
-            ))
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::functions::make_scalar_function;
-    use crate::physical_plan::{collect, collect_partitioned};
-    use crate::test;
-    use crate::variable::VarType;
-    use crate::{
-        assert_batches_eq, assert_batches_sorted_eq,
-        logical_plan::{col, create_udf, sum},
-    };
-    use crate::{
-        datasource::MemTable, logical_plan::create_udaf,
-        physical_plan::expressions::AvgAccumulator,
-    };
-    use arrow::array::{
-        Array, ArrayRef, BinaryArray, DictionaryArray, Float64Array, Int32Array,
-        Int64Array, LargeBinaryArray, LargeStringArray, StringArray,
-        TimestampNanosecondArray,
-    };
-    use arrow::compute::add;
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
-    use std::fs::File;
-    use std::thread::{self, JoinHandle};
-    use std::{io::prelude::*, sync::Mutex};
-    use tempfile::TempDir;
-    use test::*;
-
-    #[tokio::test]
-    async fn parallel_projection() -> Result<()> {
-        let partition_count = 4;
-        let results = execute("SELECT c1, c2 FROM test", partition_count).await?;
-
-        let expected = vec![
-            "+----+----+",
-            "| c1 | c2 |",
-            "+----+----+",
-            "| 3  | 1  |",
-            "| 3  | 2  |",
-            "| 3  | 3  |",
-            "| 3  | 4  |",
-            "| 3  | 5  |",
-            "| 3  | 6  |",
-            "| 3  | 7  |",
-            "| 3  | 8  |",
-            "| 3  | 9  |",
-            "| 3  | 10 |",
-            "| 2  | 1  |",
-            "| 2  | 2  |",
-            "| 2  | 3  |",
-            "| 2  | 4  |",
-            "| 2  | 5  |",
-            "| 2  | 6  |",
-            "| 2  | 7  |",
-            "| 2  | 8  |",
-            "| 2  | 9  |",
-            "| 2  | 10 |",
-            "| 1  | 1  |",
-            "| 1  | 2  |",
-            "| 1  | 3  |",
-            "| 1  | 4  |",
-            "| 1  | 5  |",
-            "| 1  | 6  |",
-            "| 1  | 7  |",
-            "| 1  | 8  |",
-            "| 1  | 9  |",
-            "| 1  | 10 |",
-            "| 0  | 1  |",
-            "| 0  | 2  |",
-            "| 0  | 3  |",
-            "| 0  | 4  |",
-            "| 0  | 5  |",
-            "| 0  | 6  |",
-            "| 0  | 7  |",
-            "| 0  | 8  |",
-            "| 0  | 9  |",
-            "| 0  | 10 |",
-            "+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn create_variable_expr() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let mut ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let variable_provider = test::variable::SystemVar::new();
-        ctx.register_variable(VarType::System, Arc::new(variable_provider));
-        let variable_provider = test::variable::UserDefinedVar::new();
-        ctx.register_variable(VarType::UserDefined, Arc::new(variable_provider));
-
-        let provider = test::create_table_dual();
-        ctx.register_table("dual", provider)?;
-
-        let results =
-            plan_and_collect(&mut ctx, "SELECT @@version, @name FROM dual").await?;
-
-        let expected = vec![
-            "+----------------------+------------------------+",
-            "| @@version            | @name                  |",
-            "+----------------------+------------------------+",
-            "| system-var-@@version | user-defined-var-@name |",
-            "+----------------------+------------------------+",
-        ];
-        assert_batches_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn register_deregister() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let mut ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let provider = test::create_table_dual();
-        ctx.register_table("dual", provider)?;
-
-        assert!(ctx.deregister_table("dual")?.is_some());
-        assert!(ctx.deregister_table("dual")?.is_none());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn parallel_query_with_filter() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let logical_plan =
-            ctx.create_logical_plan("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3")?;
-        let logical_plan = ctx.optimize(&logical_plan)?;
-
-        let physical_plan = ctx.create_physical_plan(&logical_plan)?;
-        println!("{:?}", physical_plan);
-
-        let results = collect_partitioned(physical_plan).await?;
-
-        // note that the order of partitions is not deterministic
-        let mut num_rows = 0;
-        for partition in &results {
-            for batch in partition {
-                num_rows += batch.num_rows();
-            }
-        }
-        assert_eq!(20, num_rows);
-
-        let results: Vec<RecordBatch> = results.into_iter().flatten().collect();
-        let expected = vec![
-            "+----+----+",
-            "| c1 | c2 |",
-            "+----+----+",
-            "| 1  | 1  |",
-            "| 1  | 10 |",
-            "| 1  | 2  |",
-            "| 1  | 3  |",
-            "| 1  | 4  |",
-            "| 1  | 5  |",
-            "| 1  | 6  |",
-            "| 1  | 7  |",
-            "| 1  | 8  |",
-            "| 1  | 9  |",
-            "| 2  | 1  |",
-            "| 2  | 10 |",
-            "| 2  | 2  |",
-            "| 2  | 3  |",
-            "| 2  | 4  |",
-            "| 2  | 5  |",
-            "| 2  | 6  |",
-            "| 2  | 7  |",
-            "| 2  | 8  |",
-            "| 2  | 9  |",
-            "+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn projection_on_table_scan() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let table = ctx.table("test")?;
-        let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
-            .project(vec![col("c2")])?
-            .build()?;
-
-        let optimized_plan = ctx.optimize(&logical_plan)?;
-        match &optimized_plan {
-            LogicalPlan::Projection { input, .. } => match &**input {
-                LogicalPlan::TableScan {
-                    source,
-                    projected_schema,
-                    ..
-                } => {
-                    assert_eq!(source.schema().fields().len(), 3);
-                    assert_eq!(projected_schema.fields().len(), 1);
-                }
-                _ => panic!("input to projection should be TableScan"),
-            },
-            _ => panic!("expect optimized_plan to be projection"),
-        }
-
-        let expected = "Projection: #c2\
-        \n  TableScan: test projection=Some([1])";
-        assert_eq!(format!("{:?}", optimized_plan), expected);
-
-        let physical_plan = ctx.create_physical_plan(&optimized_plan)?;
-
-        assert_eq!(1, physical_plan.schema().fields().len());
-        assert_eq!("c2", physical_plan.schema().field(0).name().as_str());
-
-        let batches = collect(physical_plan).await?;
-        assert_eq!(40, batches.iter().map(|x| x.num_rows()).sum::<usize>());
-
-        Ok(())
-    }
-
-    #[test]
-    fn preserve_nullability_on_projection() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let ctx = create_ctx(&tmp_dir, 1)?;
-
-        let schema: Schema = ctx.table("test").unwrap().schema().clone().into();
-        assert_eq!(schema.field_with_name("c1")?.is_nullable(), false);
-
-        let plan = LogicalPlanBuilder::scan_empty("", &schema, None)?
-            .project(vec![col("c1")])?
-            .build()?;
-
-        let plan = ctx.optimize(&plan)?;
-        let physical_plan = ctx.create_physical_plan(&Arc::new(plan))?;
-        assert_eq!(
-            physical_plan.schema().field_with_name("c1")?.is_nullable(),
-            false
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn projection_on_memory_scan() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]);
-        let schema = SchemaRef::new(schema);
-
-        let partitions = vec![vec![RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
-                Arc::new(Int32Array::from(vec![3, 12, 12, 120])),
-            ],
-        )?]];
-
-        let plan = LogicalPlanBuilder::scan_memory(partitions, schema, None)?
-            .project(vec![col("b")])?
-            .build()?;
-        assert_fields_eq(&plan, vec!["b"]);
-
-        let ctx = ExecutionContext::new();
-        let optimized_plan = ctx.optimize(&plan)?;
-        match &optimized_plan {
-            LogicalPlan::Projection { input, .. } => match &**input {
-                LogicalPlan::TableScan {
-                    source,
-                    projected_schema,
-                    ..
-                } => {
-                    assert_eq!(source.schema().fields().len(), 3);
-                    assert_eq!(projected_schema.fields().len(), 1);
-                }
-                _ => panic!("input to projection should be InMemoryScan"),
-            },
-            _ => panic!("expect optimized_plan to be projection"),
-        }
-
-        let expected = "Projection: #b\
-        \n  TableScan: projection=Some([1])";
-        assert_eq!(format!("{:?}", optimized_plan), expected);
-
-        let physical_plan = ctx.create_physical_plan(&optimized_plan)?;
-
-        assert_eq!(1, physical_plan.schema().fields().len());
-        assert_eq!("b", physical_plan.schema().field(0).name().as_str());
-
-        let batches = collect(physical_plan).await?;
-        assert_eq!(1, batches.len());
-        assert_eq!(1, batches[0].num_columns());
-        assert_eq!(4, batches[0].num_rows());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn sort() -> Result<()> {
-        let results =
-            execute("SELECT c1, c2 FROM test ORDER BY c1 DESC, c2 ASC", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected: Vec<&str> = vec![
-            "+----+----+",
-            "| c1 | c2 |",
-            "+----+----+",
-            "| 3  | 1  |",
-            "| 3  | 2  |",
-            "| 3  | 3  |",
-            "| 3  | 4  |",
-            "| 3  | 5  |",
-            "| 3  | 6  |",
-            "| 3  | 7  |",
-            "| 3  | 8  |",
-            "| 3  | 9  |",
-            "| 3  | 10 |",
-            "| 2  | 1  |",
-            "| 2  | 2  |",
-            "| 2  | 3  |",
-            "| 2  | 4  |",
-            "| 2  | 5  |",
-            "| 2  | 6  |",
-            "| 2  | 7  |",
-            "| 2  | 8  |",
-            "| 2  | 9  |",
-            "| 2  | 10 |",
-            "| 1  | 1  |",
-            "| 1  | 2  |",
-            "| 1  | 3  |",
-            "| 1  | 4  |",
-            "| 1  | 5  |",
-            "| 1  | 6  |",
-            "| 1  | 7  |",
-            "| 1  | 8  |",
-            "| 1  | 9  |",
-            "| 1  | 10 |",
-            "| 0  | 1  |",
-            "| 0  | 2  |",
-            "| 0  | 3  |",
-            "| 0  | 4  |",
-            "| 0  | 5  |",
-            "| 0  | 6  |",
-            "| 0  | 7  |",
-            "| 0  | 8  |",
-            "| 0  | 9  |",
-            "| 0  | 10 |",
-            "+----+----+",
-        ];
-
-        // Note it is important to NOT use assert_batches_sorted_eq
-        // here as we are testing the sortedness of the output
-        assert_batches_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn sort_empty() -> Result<()> {
-        // The predicate on this query purposely generates no results
-        let results = execute(
-            "SELECT c1, c2 FROM test WHERE c1 > 100000 ORDER BY c1 DESC, c2 ASC",
-            4,
-        )
-        .await
-        .unwrap();
-        assert_eq!(results.len(), 0);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate() -> Result<()> {
-        let results = execute("SELECT SUM(c1), SUM(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| SUM(c1) | SUM(c2) |",
-            "+---------+---------+",
-            "| 60      | 220     |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_empty() -> Result<()> {
-        // The predicate on this query purposely generates no results
-        let results = execute("SELECT SUM(c1), SUM(c2) FROM test where c1 > 100000", 4)
-            .await
-            .unwrap();
-
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| SUM(c1) | SUM(c2) |",
-            "+---------+---------+",
-            "|         |         |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_avg() -> Result<()> {
-        let results = execute("SELECT AVG(c1), AVG(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| AVG(c1) | AVG(c2) |",
-            "+---------+---------+",
-            "| 1.5     | 5.5     |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_max() -> Result<()> {
-        let results = execute("SELECT MAX(c1), MAX(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| MAX(c1) | MAX(c2) |",
-            "+---------+---------+",
-            "| 3       | 10      |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_min() -> Result<()> {
-        let results = execute("SELECT MIN(c1), MIN(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| MIN(c1) | MIN(c2) |",
-            "+---------+---------+",
-            "| 0       | 1       |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped() -> Result<()> {
-        let results = execute("SELECT c1, SUM(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | SUM(c2) |",
-            "+----+---------+",
-            "| 0  | 55      |",
-            "| 1  | 55      |",
-            "| 2  | 55      |",
-            "| 3  | 55      |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_avg() -> Result<()> {
-        let results = execute("SELECT c1, AVG(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | AVG(c2) |",
-            "+----+---------+",
-            "| 0  | 5.5     |",
-            "| 1  | 5.5     |",
-            "| 2  | 5.5     |",
-            "| 3  | 5.5     |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn boolean_literal() -> Result<()> {
-        let results =
-            execute("SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+------+",
-            "| c1 | c3   |",
-            "+----+------+",
-            "| 3  | true |",
-            "| 3  | true |",
-            "| 3  | true |",
-            "| 3  | true |",
-            "| 3  | true |",
-            "+----+------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_empty() -> Result<()> {
-        let results =
-            execute("SELECT c1, AVG(c2) FROM test WHERE c1 = 123 GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec!["++", "||", "++", "++"];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_max() -> Result<()> {
-        let results = execute("SELECT c1, MAX(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | MAX(c2) |",
-            "+----+---------+",
-            "| 0  | 10      |",
-            "| 1  | 10      |",
-            "| 2  | 10      |",
-            "| 3  | 10      |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_min() -> Result<()> {
-        let results = execute("SELECT c1, MIN(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | MIN(c2) |",
-            "+----+---------+",
-            "| 0  | 1       |",
-            "| 1  | 1       |",
-            "| 2  | 1       |",
-            "| 3  | 1       |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_partitioned() -> Result<()> {
-        // self join on partition id (workaround for duplicate column name)
-        let results = execute(
-            "SELECT 1 FROM test JOIN (SELECT c1 AS id1 FROM test) ON c1=id1",
-            4,
-        )
-        .await?;
-
-        assert_eq!(
-            results.iter().map(|b| b.num_rows()).sum::<usize>(),
-            4 * 10 * 10
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_basic() -> Result<()> {
-        let results = execute("SELECT COUNT(c1), COUNT(c2) FROM test", 1).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+-----------+-----------+",
-            "| COUNT(c1) | COUNT(c2) |",
-            "+-----------+-----------+",
-            "| 10        | 10        |",
-            "+-----------+-----------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_partitioned() -> Result<()> {
-        let results = execute("SELECT COUNT(c1), COUNT(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+-----------+-----------+",
-            "| COUNT(c1) | COUNT(c2) |",
-            "+-----------+-----------+",
-            "| 40        | 40        |",
-            "+-----------+-----------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_aggregated() -> Result<()> {
-        let results = execute("SELECT c1, COUNT(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+-----------+",
-            "| c1 | COUNT(c2) |",
-            "+----+-----------+",
-            "| 0  | 10        |",
-            "| 1  | 10        |",
-            "| 2  | 10        |",
-            "| 3  | 10        |",
-            "+----+-----------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn group_by_date_trunc() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = ExecutionContext::new();
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c2", DataType::UInt64, false),
-            Field::new(
-                "t1",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                false,
-            ),
-        ]));
-
-        // generate a partitioned file
-        for partition in 0..4 {
-            let filename = format!("partition-{}.{}", partition, "csv");
-            let file_path = tmp_dir.path().join(&filename);
-            let mut file = File::create(file_path)?;
-
-            // generate some data
-            for i in 0..10 {
-                let data = format!("{},2020-12-{}T00:00:00.000\n", i, i + 10);
-                file.write_all(data.as_bytes())?;
-            }
-        }
-
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new().schema(&schema).has_header(false),
-        )?;
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "SELECT date_trunc('week', t1) as week, SUM(c2) FROM test GROUP BY date_trunc('week', t1)"
-        ).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------------------+---------+",
-            "| week                | SUM(c2) |",
-            "+---------------------+---------+",
-            "| 2020-12-07 00:00:00 | 24      |",
-            "| 2020-12-14 00:00:00 | 156     |",
-            "+---------------------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn group_by_dictionary() {
-        async fn run_test_case<K: ArrowDictionaryKeyType>() {
-            let mut ctx = ExecutionContext::new();
-
-            // input data looks like:
-            // A, 1
-            // B, 2
-            // A, 2
-            // A, 4
-            // C, 1
-            // A, 1
-
-            let dict_array: DictionaryArray<K> =
-                vec!["A", "B", "A", "A", "C", "A"].into_iter().collect();
-            let dict_array = Arc::new(dict_array);
-
-            let val_array: Int64Array = vec![1, 2, 2, 4, 1, 1].into();
-            let val_array = Arc::new(val_array);
-
-            let schema = Arc::new(Schema::new(vec![
-                Field::new("dict", dict_array.data_type().clone(), false),
-                Field::new("val", val_array.data_type().clone(), false),
-            ]));
-
-            let batch = RecordBatch::try_new(schema.clone(), vec![dict_array, val_array])
-                .unwrap();
-
-            let provider = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap();
-            ctx.register_table("t", Arc::new(provider)).unwrap();
-
-            let results = plan_and_collect(
-                &mut ctx,
-                "SELECT dict, count(val) FROM t GROUP BY dict",
-            )
-            .await
-            .expect("ran plan correctly");
-
-            let expected = vec![
-                "+------+------------+",
-                "| dict | COUNT(val) |",
-                "+------+------------+",
-                "| A    | 4          |",
-                "| B    | 1          |",
-                "| C    | 1          |",
-                "+------+------------+",
-            ];
-            assert_batches_sorted_eq!(expected, &results);
-
-            // Now, use dict as an aggregate
-            let results =
-                plan_and_collect(&mut ctx, "SELECT val, count(dict) FROM t GROUP BY val")
-                    .await
-                    .expect("ran plan correctly");
-
-            let expected = vec![
-                "+-----+-------------+",
-                "| val | COUNT(dict) |",
-                "+-----+-------------+",
-                "| 1   | 3           |",
-                "| 2   | 2           |",
-                "| 4   | 1           |",
-                "+-----+-------------+",
-            ];
-            assert_batches_sorted_eq!(expected, &results);
-        }
-
-        run_test_case::<Int8Type>().await;
-        run_test_case::<Int16Type>().await;
-        run_test_case::<Int32Type>().await;
-        run_test_case::<Int64Type>().await;
-        run_test_case::<UInt8Type>().await;
-        run_test_case::<UInt16Type>().await;
-        run_test_case::<UInt32Type>().await;
-        run_test_case::<UInt64Type>().await;
-    }
-
-    async fn run_count_distinct_integers_aggregated_scenario(
-        partitions: Vec<Vec<(&str, u64)>>,
-    ) -> Result<Vec<RecordBatch>> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = ExecutionContext::new();
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c_group", DataType::Utf8, false),
-            Field::new("c_int8", DataType::Int8, false),
-            Field::new("c_int16", DataType::Int16, false),
-            Field::new("c_int32", DataType::Int32, false),
-            Field::new("c_int64", DataType::Int64, false),
-            Field::new("c_uint8", DataType::UInt8, false),
-            Field::new("c_uint16", DataType::UInt16, false),
-            Field::new("c_uint32", DataType::UInt32, false),
-            Field::new("c_uint64", DataType::UInt64, false),
-        ]));
-
-        for (i, partition) in partitions.iter().enumerate() {
-            let filename = format!("partition-{}.csv", i);
-            let file_path = tmp_dir.path().join(&filename);
-            let mut file = File::create(file_path)?;
-            for row in partition {
-                let row_str = format!(
-                    "{},{}\n",
-                    row.0,
-                    // Populate values for each of the integer fields in the
-                    // schema.
-                    (0..8)
-                        .map(|_| { row.1.to_string() })
-                        .collect::<Vec<_>>()
-                        .join(","),
-                );
-                file.write_all(row_str.as_bytes())?;
-            }
-        }
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new().schema(&schema).has_header(false),
-        )?;
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "
-              SELECT
-                c_group,
-                COUNT(c_uint64),
-                COUNT(DISTINCT c_int8),
-                COUNT(DISTINCT c_int16),
-                COUNT(DISTINCT c_int32),
-                COUNT(DISTINCT c_int64),
-                COUNT(DISTINCT c_uint8),
-                COUNT(DISTINCT c_uint16),
-                COUNT(DISTINCT c_uint32),
-                COUNT(DISTINCT c_uint64)
-              FROM test
-              GROUP BY c_group
-            ",
-        )
-        .await?;
-
-        Ok(results)
-    }
-
-    #[tokio::test]
-    async fn count_distinct_integers_aggregated_single_partition() -> Result<()> {
-        let partitions = vec![
-            // The first member of each tuple will be the value for the
-            // `c_group` column, and the second member will be the value for
-            // each of the int/uint fields.
-            vec![
-                ("a", 1),
-                ("a", 1),
-                ("a", 2),
-                ("b", 9),
-                ("c", 9),
-                ("c", 10),
-                ("c", 9),
-            ],
-        ];
-
-        let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec!
-[
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| a       | 3               | 2                      | 2                       | 2                       | 2                       | 2                       | 2                        | 2                        | 2                        |",
-    "| b       | 1               | 1                      | 1                       | 1                       | 1                       | 1                       | 1                        | 1                        | 1                        |",
-    "| c       | 3               | 2                      | 2                       | 2                       | 2                       | 2                       | 2                        | 2                        | 2                        |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_distinct_integers_aggregated_multiple_partitions() -> Result<()> {
-        let partitions = vec![
-            // The first member of each tuple will be the value for the
-            // `c_group` column, and the second member will be the value for
-            // each of the int/uint fields.
-            vec![("a", 1), ("a", 1), ("a", 2), ("b", 9), ("c", 9)],
-            vec![("a", 1), ("a", 3), ("b", 8), ("b", 9), ("b", 10), ("b", 11)],
-        ];
-
-        let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| a       | 5               | 3                      | 3                       | 3                       | 3                       | 3                       | 3                        | 3                        | 3                        |",
-    "| b       | 5               | 4                      | 4                       | 4                       | 4                       | 4                       | 4                        | 4                        | 4                        |",
-    "| c       | 1               | 1                      | 1                       | 1                       | 1                       | 1                       | 1                        | 1                        | 1                        |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate_with_alias() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let ctx = create_ctx(&tmp_dir, 1)?;
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::UInt32, false),
-        ]));
-
-        let plan = LogicalPlanBuilder::scan_empty("", schema.as_ref(), None)?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .project(vec![col("c1"), col("SUM(c2)").alias("total_salary")])?
-            .build()?;
-
-        let plan = ctx.optimize(&plan)?;
-
-        let physical_plan = ctx.create_physical_plan(&Arc::new(plan))?;
-        assert_eq!("c1", physical_plan.schema().field(0).name().as_str());
-        assert_eq!(
-            "total_salary",
-            physical_plan.schema().field(1).name().as_str()
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn limit() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-        ctx.register_table("t", test::table_with_sequence(1, 1000).unwrap())
-            .unwrap();
-
-        let results =
-            plan_and_collect(&mut ctx, "SELECT i FROM t ORDER BY i DESC limit 3")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+------+", "| i    |", "+------+", "| 1000 |", "| 999  |", "| 998  |",
-            "+------+",
-        ];
-
-        assert_batches_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT i FROM t ORDER BY i limit 3")
-            .await
-            .unwrap();
-
-        let expected = vec![
-            "+---+", "| i |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+",
-        ];
-
-        assert_batches_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT i FROM t limit 3")
-            .await
-            .unwrap();
-
-        // the actual rows are not guaranteed, so only check the count (should be 3)
-        let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-        assert_eq!(num_rows, 3);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn limit_multi_partitions() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-
-        let partitions = vec![
-            vec![test::make_partition(0)],
-            vec![test::make_partition(1)],
-            vec![test::make_partition(2)],
-            vec![test::make_partition(3)],
-            vec![test::make_partition(4)],
-            vec![test::make_partition(5)],
-        ];
-        let schema = partitions[0][0].schema();
-        let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap());
-
-        ctx.register_table("t", provider).unwrap();
-
-        // select all rows
-        let results = plan_and_collect(&mut ctx, "SELECT i FROM t").await.unwrap();
-
-        let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-        assert_eq!(num_rows, 15);
-
-        for limit in 1..10 {
-            let query = format!("SELECT i FROM t limit {}", limit);
-            let results = plan_and_collect(&mut ctx, &query).await.unwrap();
-
-            let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-            assert_eq!(num_rows, limit, "mismatch with query {}", query);
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_functions() {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let expected = vec![
-            "+---------+",
-            "| sqrt(i) |",
-            "+---------+",
-            "| 1       |",
-            "+---------+",
-        ];
-
-        let results = plan_and_collect(&mut ctx, "SELECT sqrt(i) FROM t")
-            .await
-            .unwrap();
-
-        assert_batches_sorted_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT SQRT(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-
-        // Using double quotes allows specifying the function name with capitalization
-        let err = plan_and_collect(&mut ctx, "SELECT \"SQRT\"(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function 'SQRT'"
-        );
-
-        let results = plan_and_collect(&mut ctx, "SELECT \"sqrt\"(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_user_defined_functions() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let myfunc = |args: &[ArrayRef]| Ok(Arc::clone(&args[0]));
-        let myfunc = make_scalar_function(myfunc);
-
-        ctx.register_udf(create_udf(
-            "MY_FUNC",
-            vec![DataType::Int32],
-            Arc::new(DataType::Int32),
-            myfunc,
-        ));
-
-        // doesn't work as it was registered with non lowercase
-        let err = plan_and_collect(&mut ctx, "SELECT MY_FUNC(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function \'my_func\'"
-        );
-
-        // Can call it if you put quotes
-        let result = plan_and_collect(&mut ctx, "SELECT \"MY_FUNC\"(i) FROM t").await?;
-
-        let expected = vec![
-            "+------------+",
-            "| MY_FUNC(i) |",
-            "+------------+",
-            "| 1          |",
-            "+------------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_aggregates() {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let expected = vec![
-            "+--------+",
-            "| MAX(i) |",
-            "+--------+",
-            "| 1      |",
-            "+--------+",
-        ];
-
-        let results = plan_and_collect(&mut ctx, "SELECT max(i) FROM t")
-            .await
-            .unwrap();
-
-        assert_batches_sorted_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT MAX(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-
-        // Using double quotes allows specifying the function name with capitalization
-        let err = plan_and_collect(&mut ctx, "SELECT \"MAX\"(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function 'MAX'"
-        );
-
-        let results = plan_and_collect(&mut ctx, "SELECT \"max\"(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        // Note capitalizaton
-        let my_avg = create_udaf(
-            "MY_AVG",
-            DataType::Float64,
-            Arc::new(DataType::Float64),
-            Arc::new(|| Ok(Box::new(AvgAccumulator::try_new(&DataType::Float64)?))),
-            Arc::new(vec![DataType::UInt64, DataType::Float64]),
-        );
-
-        ctx.register_udaf(my_avg);
-
-        // doesn't work as it was registered as non lowercase
-        let err = plan_and_collect(&mut ctx, "SELECT MY_AVG(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function \'my_avg\'"
-        );
-
-        // Can call it if you put quotes
-        let result = plan_and_collect(&mut ctx, "SELECT \"MY_AVG\"(i) FROM t").await?;
-
-        let expected = vec![
-            "+-----------+",
-            "| MY_AVG(i) |",
-            "+-----------+",
-            "| 1         |",
-            "+-----------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn write_csv_results() -> Result<()> {
-        // create partitioned input file and context
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 4)?;
-
-        // execute a simple query and write the results to CSV
-        let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out";
-        write_csv(&mut ctx, "SELECT c1, c2 FROM test", &out_dir).await?;
-
-        // create a new context and verify that the results were saved to a partitioned csv file
-        let mut ctx = ExecutionContext::new();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::UInt32, false),
-            Field::new("c2", DataType::UInt64, false),
-        ]));
-
-        // register each partition as well as the top level dir
-        let csv_read_option = CsvReadOptions::new().schema(&schema);
-        ctx.register_csv("part0", &format!("{}/part-0.csv", out_dir), csv_read_option)?;
-        ctx.register_csv("allparts", &out_dir, csv_read_option)?;
-
-        let part0 = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM part0").await?;
-        let allparts = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM allparts").await?;
-
-        let allparts_count: usize = allparts.iter().map(|batch| batch.num_rows()).sum();
-
-        assert_eq!(part0[0].schema(), allparts[0].schema());
-
-        assert_eq!(allparts_count, 40);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn write_parquet_results() -> Result<()> {
-        // create partitioned input file and context
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 4)?;
-
-        // execute a simple query and write the results to CSV
-        let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out";
-        write_parquet(&mut ctx, "SELECT c1, c2 FROM test", &out_dir, None).await?;
-
-        // create a new context and verify that the results were saved to a partitioned csv file
-        let mut ctx = ExecutionContext::new();
-
-        // register each partition as well as the top level dir
-        ctx.register_parquet("part0", &format!("{}/part-0.parquet", out_dir))?;
-        ctx.register_parquet("part1", &format!("{}/part-1.parquet", out_dir))?;
-        ctx.register_parquet("part2", &format!("{}/part-2.parquet", out_dir))?;
-        ctx.register_parquet("part3", &format!("{}/part-3.parquet", out_dir))?;
-        ctx.register_parquet("allparts", &out_dir)?;
-
-        let part0 = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM part0").await?;
-        let allparts = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM allparts").await?;
-
-        let allparts_count: usize = allparts.iter().map(|batch| batch.num_rows()).sum();
-
-        assert_eq!(part0[0].schema(), allparts[0].schema());
-
-        assert_eq!(allparts_count, 40);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn query_csv_with_custom_partition_extension() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-
-        // The main stipulation of this test: use a file extension that isn't .csv.
-        let file_extension = ".tst";
-
-        let mut ctx = ExecutionContext::new();
-        let schema = populate_csv_partitions(&tmp_dir, 2, file_extension)?;
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new()
-                .schema(&schema)
-                .file_extension(file_extension),
-        )?;
-        let results =
-            plan_and_collect(&mut ctx, "SELECT SUM(c1), SUM(c2), COUNT(*) FROM test")
-                .await?;
-
-        assert_eq!(results.len(), 1);
-        let expected = vec![
-            "+---------+---------+-----------------+",
-            "| SUM(c1) | SUM(c2) | COUNT(UInt8(1)) |",
-            "+---------+---------+-----------------+",
-            "| 10      | 110     | 20              |",
-            "+---------+---------+-----------------+",
-        ];
-        assert_batches_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[test]
-    fn send_context_to_threads() -> Result<()> {
-        // ensure ExecutionContexts can be used in a multi-threaded
-        // environment. Usecase is for concurrent planing.
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let ctx = Arc::new(Mutex::new(create_ctx(&tmp_dir, partition_count)?));
-
-        let threads: Vec<JoinHandle<Result<_>>> = (0..2)
-            .map(|_| ctx.clone())
-            .map(|ctx_clone| {
-                thread::spawn(move || {
-                    let ctx = ctx_clone.lock().expect("Locked context");
-                    // Ensure we can create logical plan code on a separate thread.
-                    ctx.create_logical_plan(
-                        "SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3",
-                    )
-                })
-            })
-            .collect();
-
-        for thread in threads {
-            thread.join().expect("Failed to join thread")?;
-        }
-        Ok(())
-    }
-    #[test]
-    fn ctx_sql_should_optimize_plan() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        let plan1 =
-            ctx.create_logical_plan("SELECT * FROM (SELECT 1) WHERE TRUE AND TRUE")?;
-
-        let opt_plan1 = ctx.optimize(&plan1)?;
-
-        let plan2 = ctx.sql("SELECT * FROM (SELECT 1) WHERE TRUE AND TRUE")?;
-
-        assert_eq!(
-            format!("{:?}", opt_plan1),
-            format!("{:?}", plan2.to_logical_plan())
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn scalar_udf() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
-            ],
-        )?;
-
-        let mut ctx = ExecutionContext::new();
-
-        let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]])?;
-        ctx.register_table("t", Arc::new(provider))?;
-
-        let myfunc = |args: &[ArrayRef]| {
-            let l = &args[0]
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .expect("cast failed");
-            let r = &args[1]
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .expect("cast failed");
-            Ok(Arc::new(add(l, r)?) as ArrayRef)
-        };
-        let myfunc = make_scalar_function(myfunc);
-
-        ctx.register_udf(create_udf(
-            "my_add",
-            vec![DataType::Int32, DataType::Int32],
-            Arc::new(DataType::Int32),
-            myfunc,
-        ));
-
-        // from here on, we may be in a different scope. We would still like to be able
-        // to call UDFs.
-
-        let t = ctx.table("t")?;
-
-        let plan = LogicalPlanBuilder::from(&t.to_logical_plan())
-            .project(vec![
-                col("a"),
-                col("b"),
-                ctx.udf("my_add")?.call(vec![col("a"), col("b")]),
-            ])?
-            .build()?;
-
-        assert_eq!(
-            format!("{:?}", plan),
-            "Projection: #a, #b, my_add(#a, #b)\n  TableScan: t projection=None"
-        );
-
-        let plan = ctx.optimize(&plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-        let result = collect(plan).await?;
-
-        let expected = vec![
-            "+-----+-----+-------------+",
-            "| a   | b   | my_add(a,b) |",
-            "+-----+-----+-------------+",
-            "| 1   | 2   | 3           |",
-            "| 10  | 12  | 22          |",
-            "| 10  | 12  | 22          |",
-            "| 100 | 120 | 220         |",
-            "+-----+-----+-------------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        let batch = &result[0];
-        let a = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to cast a");
-        let b = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to cast b");
-        let sum = batch
-            .column(2)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to cast sum");
-
-        assert_eq!(4, a.len());
-        assert_eq!(4, b.len());
-        assert_eq!(4, sum.len());
-        for i in 0..sum.len() {
-            assert_eq!(a.value(i) + b.value(i), sum.value(i));
-        }
-
-        ctx.deregister_table("t")?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn simple_avg() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let batch1 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
-        )?;
-        let batch2 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![4, 5]))],
-        )?;
-
-        let mut ctx = ExecutionContext::new();
-
-        let provider =
-            MemTable::try_new(Arc::new(schema), vec![vec![batch1], vec![batch2]])?;
-        ctx.register_table("t", Arc::new(provider))?;
-
-        let result = plan_and_collect(&mut ctx, "SELECT AVG(a) FROM t").await?;
-
-        let batch = &result[0];
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(1, batch.num_rows());
-
-        let values = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("failed to cast version");
-        assert_eq!(values.len(), 1);
-        // avg(1,2,3,4,5) = 3.0
-        assert_eq!(values.value(0), 3.0_f64);
-        Ok(())
-    }
-
-    /// tests the creation, registration and usage of a UDAF
-    #[tokio::test]
-    async fn simple_udaf() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let batch1 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
-        )?;
-        let batch2 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![4, 5]))],
-        )?;
-
-        let mut ctx = ExecutionContext::new();
-
-        let provider =
-            MemTable::try_new(Arc::new(schema), vec![vec![batch1], vec![batch2]])?;
-        ctx.register_table("t", Arc::new(provider))?;
-
-        // define a udaf, using a DataFusion's accumulator
-        let my_avg = create_udaf(
-            "my_avg",
-            DataType::Float64,
-            Arc::new(DataType::Float64),
-            Arc::new(|| Ok(Box::new(AvgAccumulator::try_new(&DataType::Float64)?))),
-            Arc::new(vec![DataType::UInt64, DataType::Float64]),
-        );
-
-        ctx.register_udaf(my_avg);
-
-        let result = plan_and_collect(&mut ctx, "SELECT MY_AVG(a) FROM t").await?;
-
-        let expected = vec![
-            "+-----------+",
-            "| my_avg(a) |",
-            "+-----------+",
-            "| 3         |",
-            "+-----------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn custom_query_planner() -> Result<()> {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_query_planner(Arc::new(MyQueryPlanner {})),
-        );
-
-        let df = ctx.sql("SELECT 1")?;
-        df.collect().await.expect_err("query not supported");
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_not_exist_by_default() {
-        let mut ctx = ExecutionContext::new();
-
-        let err = plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Table or CTE with name 'information_schema.tables' not found"
-        );
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_no_tables() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_tables_default_catalog() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        // Now, register an empty table
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | public             | t          | BASE TABLE |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        // Newly added tables should appear
-        ctx.register_table("t2", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "| datafusion    | public             | t          | BASE TABLE |",
-            "| datafusion    | public             | t2         | BASE TABLE |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_tables_with_multiple_catalogs() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-        schema
-            .register_table("t1".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-        schema
-            .register_table("t2".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-        catalog.register_schema("my_schema", Arc::new(schema));
-        ctx.register_catalog("my_catalog", Arc::new(catalog));
-
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-        schema
-            .register_table("t3".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-        catalog.register_schema("my_other_schema", Arc::new(schema));
-        ctx.register_catalog("my_other_catalog", Arc::new(catalog));
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+------------------+--------------------+------------+------------+",
-            "| table_catalog    | table_schema       | table_name | table_type |",
-            "+------------------+--------------------+------------+------------+",
-            "| datafusion       | information_schema | columns    | VIEW       |",
-            "| datafusion       | information_schema | tables     | VIEW       |",
-            "| my_catalog       | information_schema | columns    | VIEW       |",
-            "| my_catalog       | information_schema | tables     | VIEW       |",
-            "| my_catalog       | my_schema          | t1         | BASE TABLE |",
-            "| my_catalog       | my_schema          | t2         | BASE TABLE |",
-            "| my_other_catalog | information_schema | columns    | VIEW       |",
-            "| my_other_catalog | information_schema | tables     | VIEW       |",
-            "| my_other_catalog | my_other_schema    | t3         | BASE TABLE |",
-            "+------------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_tables_no_information_schema() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        // use show tables alias
-        let err = plan_and_collect(&mut ctx, "SHOW TABLES").await.unwrap_err();
-
-        assert_eq!(err.to_string(), "Error during planning: SHOW TABLES is not supported unless information_schema is enabled");
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_tables() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        // use show tables alias
-        let result = plan_and_collect(&mut ctx, "SHOW TABLES").await.unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "| datafusion    | public             | t          | BASE TABLE |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW tables").await.unwrap();
-
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_columns_no_information_schema() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let err = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t")
-            .await
-            .unwrap_err();
-
-        assert_eq!(err.to_string(), "Error during planning: SHOW COLUMNS is not supported unless information_schema is enabled");
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_columns_like_where() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let expected =
-            "Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported";
-
-        let err = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t LIKE 'f'")
-            .await
-            .unwrap_err();
-        assert_eq!(err.to_string(), expected);
-
-        let err =
-            plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t WHERE column_name = 'bar'")
-                .await
-                .unwrap_err();
-        assert_eq!(err.to_string(), expected);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_columns() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t")
-            .await
-            .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| table_catalog | table_schema | table_name | column_name | data_type | is_nullable |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| datafusion    | public       | t          | i           | Int32     | YES         |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW columns from t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &result);
-
-        // This isn't ideal but it is consistent behavior for `SELECT * from T`
-        let err = plan_and_collect(&mut ctx, "SHOW columns from T")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Unknown relation for SHOW COLUMNS: T"
-        );
-    }
-
-    // test errors with WHERE and LIKE
-    #[tokio::test]
-    async fn information_schema_show_columns_full_extended() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result = plan_and_collect(&mut ctx, "SHOW FULL COLUMNS FROM t")
-            .await
-            .unwrap();
-        let expected = vec![
-
-    "+---------------+--------------+------------+-------------+------------------+----------------+-------------+-----------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| table_catalog | table_schema | table_name | column_name | ordinal_position | column_default | is_nullable | data_type | character_maximum_length | character_octet_length | numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type |",
-    "+---------------+--------------+------------+-------------+------------------+----------------+-------------+-----------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| datafusion    | public       | t          | i           | 0                |                | YES         | Int32     |                          |                        | 32                | 2                       |               |                    |               |",
-    "+---------------+--------------+------------+-------------+------------------+----------------+-------------+-----------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW EXTENDED COLUMNS FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_table_table_names() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM public.t")
-            .await
-            .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| table_catalog | table_schema | table_name | column_name | data_type | is_nullable |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| datafusion    | public       | t          | i           | Int32     | YES         |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW columns from datafusion.public.t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &result);
-
-        let err = plan_and_collect(&mut ctx, "SHOW columns from t2")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Unknown relation for SHOW COLUMNS: t2"
-        );
-
-        let err = plan_and_collect(&mut ctx, "SHOW columns from datafusion.public.t2")
-            .await
-            .unwrap_err();
-        assert_eq!(err.to_string(), "Error during planning: Unknown relation for SHOW COLUMNS: datafusion.public.t2");
-    }
-
-    #[tokio::test]
-    async fn show_unsupported() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        let err = plan_and_collect(&mut ctx, "SHOW SOMETHING_UNKNOWN")
-            .await
-            .unwrap_err();
-
-        assert_eq!(err.to_string(), "This feature is not implemented: SHOW SOMETHING_UNKNOWN not implemented. Supported syntax: SHOW <TABLES>");
-    }
-
-    #[tokio::test]
-    async fn information_schema_columns_not_exist_by_default() {
-        let mut ctx = ExecutionContext::new();
-
-        let err = plan_and_collect(&mut ctx, "SELECT * from information_schema.columns")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Table or CTE with name 'information_schema.columns' not found"
-        );
-    }
-
-    fn table_with_many_types() -> Arc<dyn TableProvider> {
-        let schema = Schema::new(vec![
-            Field::new("int32_col", DataType::Int32, false),
-            Field::new("float64_col", DataType::Float64, true),
-            Field::new("utf8_col", DataType::Utf8, true),
-            Field::new("large_utf8_col", DataType::LargeUtf8, false),
-            Field::new("binary_col", DataType::Binary, false),
-            Field::new("large_binary_col", DataType::LargeBinary, false),
-            Field::new(
-                "timestamp_nanos",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-        ]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(Int32Array::from(vec![1])),
-                Arc::new(Float64Array::from(vec![1.0])),
-                Arc::new(StringArray::from(vec![Some("foo")])),
-                Arc::new(LargeStringArray::from(vec![Some("bar")])),
-                Arc::new(BinaryArray::from(vec![b"foo" as &[u8]])),
-                Arc::new(LargeBinaryArray::from(vec![b"foo" as &[u8]])),
-                Arc::new(TimestampNanosecondArray::from_opt_vec(
-                    vec![Some(123)],
-                    None,
-                )),
-            ],
-        )
-        .unwrap();
-        let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]]).unwrap();
-        Arc::new(provider)
-    }
-
-    #[tokio::test]
-    async fn information_schema_columns() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-
-        schema
-            .register_table("t1".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        schema
-            .register_table("t2".to_owned(), table_with_many_types())
-            .unwrap();
-        catalog.register_schema("my_schema", Arc::new(schema));
-        ctx.register_catalog("my_catalog", Arc::new(catalog));
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.columns")
-                .await
-                .unwrap();
-
-        let expected = vec![
-    "+---------------+--------------+------------+------------------+------------------+----------------+-------------+-----------------------------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| table_catalog | table_schema | table_name | column_name      | ordinal_position | column_default | is_nullable | data_type                   | character_maximum_length | character_octet_length | numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type |",
-    "+---------------+--------------+------------+------------------+------------------+----------------+-------------+-----------------------------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| my_catalog    | my_schema    | t1         | i                | 0                |                | YES         | Int32                       |                          |                        | 32                | 2                       |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | binary_col       | 4                |                | NO          | Binary                      |                          | 2147483647             |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | float64_col      | 1                |                | YES         | Float64                     |                          |                        | 24                | 2                       |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | int32_col        | 0                |                | NO          | Int32                       |                          |                        | 32                | 2                       |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | large_binary_col | 5                |                | NO          | LargeBinary                 |                          | 9223372036854775807    |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | large_utf8_col   | 3                |                | NO          | LargeUtf8                   |                          | 9223372036854775807    |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | timestamp_nanos  | 6                |                | NO          | Timestamp(Nanosecond, None) |                          |                        |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | utf8_col         | 2                |                | YES         | Utf8                        |                          | 2147483647             |                   |                         |               |                    |               |",
-    "+---------------+--------------+------------+------------------+------------------+----------------+-------------+-----------------------------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn disabled_default_catalog_and_schema() -> Result<()> {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().create_default_catalog_and_schema(false),
-        );
-
-        assert!(matches!(
-            ctx.register_table("test", test::table_with_sequence(1, 1)?),
-            Err(DataFusionError::Plan(_))
-        ));
-
-        assert!(matches!(
-            ctx.sql("select * from datafusion.public.test"),
-            Err(DataFusionError::Plan(_))
-        ));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn custom_catalog_and_schema() -> Result<()> {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new()
-                .create_default_catalog_and_schema(false)
-                .with_default_catalog_and_schema("my_catalog", "my_schema"),
-        );
-
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-        schema.register_table("test".to_owned(), test::table_with_sequence(1, 1)?)?;
-        catalog.register_schema("my_schema", Arc::new(schema));
-        ctx.register_catalog("my_catalog", Arc::new(catalog));
-
-        for table_ref in &["my_catalog.my_schema.test", "my_schema.test", "test"] {
-            let result = plan_and_collect(
-                &mut ctx,
-                &format!("SELECT COUNT(*) AS count FROM {}", table_ref),
-            )
-            .await?;
-
-            let expected = vec![
-                "+-------+",
-                "| count |",
-                "+-------+",
-                "| 1     |",
-                "+-------+",
-            ];
-            assert_batches_eq!(expected, &result);
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn cross_catalog_access() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-
-        let catalog_a = MemoryCatalogProvider::new();
-        let schema_a = MemorySchemaProvider::new();
-        schema_a
-            .register_table("table_a".to_owned(), test::table_with_sequence(1, 1)?)?;
-        catalog_a.register_schema("schema_a", Arc::new(schema_a));
-        ctx.register_catalog("catalog_a", Arc::new(catalog_a));
-
-        let catalog_b = MemoryCatalogProvider::new();
-        let schema_b = MemorySchemaProvider::new();
-        schema_b
-            .register_table("table_b".to_owned(), test::table_with_sequence(1, 2)?)?;
-        catalog_b.register_schema("schema_b", Arc::new(schema_b));
-        ctx.register_catalog("catalog_b", Arc::new(catalog_b));
-
-        let result = plan_and_collect(
-            &mut ctx,
-            "SELECT cat, SUM(i) AS total FROM (
-                    SELECT i, 'a' AS cat FROM catalog_a.schema_a.table_a
-                    UNION ALL
-                    SELECT i, 'b' AS cat FROM catalog_b.schema_b.table_b
-                )
-                GROUP BY cat
-                ORDER BY cat
-                ",
-        )
-        .await?;
-
-        let expected = vec![
-            "+-----+-------+",
-            "| cat | total |",
-            "+-----+-------+",
-            "| a   | 1     |",
-            "| b   | 3     |",
-            "+-----+-------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn create_external_table_with_timestamps() {
-        let mut ctx = ExecutionContext::new();
-
-        let data = "Jorge,2018-12-13T12:12:10.011\n\
-                    Andrew,2018-11-13T17:11:10.011";
-
-        let tmp_dir = TempDir::new().unwrap();
-        let file_path = tmp_dir.path().join("timestamps.csv");
-
-        // scope to ensure the file is closed and written
-        {
-            File::create(&file_path)
-                .expect("creating temp file")
-                .write_all(data.as_bytes())
-                .expect("writing data");
-        }
-
-        let sql = format!(
-            "CREATE EXTERNAL TABLE csv_with_timestamps (
-                  name VARCHAR,
-                  ts TIMESTAMP
-              )
-              STORED AS CSV
-              LOCATION '{}'
-              ",
-            file_path.to_str().expect("path is utf8")
-        );
-
-        plan_and_collect(&mut ctx, &sql)
-            .await
-            .expect("Executing CREATE EXTERNAL TABLE");
-
-        let sql = "SELECT * from csv_with_timestamps";
-        let result = plan_and_collect(&mut ctx, &sql).await.unwrap();
-        let expected = vec![
-            "+--------+-------------------------+",
-            "| name   | ts                      |",
-            "+--------+-------------------------+",
-            "| Andrew | 2018-11-13 17:11:10.011 |",
-            "| Jorge  | 2018-12-13 12:12:10.011 |",
-            "+--------+-------------------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    struct MyPhysicalPlanner {}
-
-    impl PhysicalPlanner for MyPhysicalPlanner {
-        fn create_physical_plan(
-            &self,
-            _logical_plan: &LogicalPlan,
-            _ctx_state: &ExecutionContextState,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            Err(DataFusionError::NotImplemented(
-                "query not supported".to_string(),
-            ))
-        }
-    }
-
-    struct MyQueryPlanner {}
-
-    impl QueryPlanner for MyQueryPlanner {
-        fn create_physical_plan(
-            &self,
-            logical_plan: &LogicalPlan,
-            ctx_state: &ExecutionContextState,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            let physical_planner = MyPhysicalPlanner {};
-            physical_planner.create_physical_plan(logical_plan, ctx_state)
-        }
-    }
-
-    /// Execute SQL and return results
-    async fn plan_and_collect(
-        ctx: &mut ExecutionContext,
-        sql: &str,
-    ) -> Result<Vec<RecordBatch>> {
-        ctx.sql(sql)?.collect().await
-    }
-
-    /// Execute SQL and return results
-    async fn execute(sql: &str, partition_count: usize) -> Result<Vec<RecordBatch>> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, partition_count)?;
-        plan_and_collect(&mut ctx, sql).await
-    }
-
-    /// Execute SQL and write results to partitioned csv files
-    async fn write_csv(
-        ctx: &mut ExecutionContext,
-        sql: &str,
-        out_dir: &str,
-    ) -> Result<()> {
-        let logical_plan = ctx.create_logical_plan(sql)?;
-        let logical_plan = ctx.optimize(&logical_plan)?;
-        let physical_plan = ctx.create_physical_plan(&logical_plan)?;
-        ctx.write_csv(physical_plan, out_dir.to_string()).await
-    }
-
-    /// Execute SQL and write results to partitioned parquet files
-    async fn write_parquet(
-        ctx: &mut ExecutionContext,
-        sql: &str,
-        out_dir: &str,
-        writer_properties: Option<WriterProperties>,
-    ) -> Result<()> {
-        let logical_plan = ctx.create_logical_plan(sql)?;
-        let logical_plan = ctx.optimize(&logical_plan)?;
-        let physical_plan = ctx.create_physical_plan(&logical_plan)?;
-        ctx.write_parquet(physical_plan, out_dir.to_string(), writer_properties)
-            .await
-    }
-
-    /// Generate CSV partitions within the supplied directory
-    fn populate_csv_partitions(
-        tmp_dir: &TempDir,
-        partition_count: usize,
-        file_extension: &str,
-    ) -> Result<SchemaRef> {
-        // define schema for data source (csv file)
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::UInt32, false),
-            Field::new("c2", DataType::UInt64, false),
-            Field::new("c3", DataType::Boolean, false),
-        ]));
-
-        // generate a partitioned file
-        for partition in 0..partition_count {
-            let filename = format!("partition-{}.{}", partition, file_extension);
-            let file_path = tmp_dir.path().join(&filename);
-            let mut file = File::create(file_path)?;
-
-            // generate some data
-            for i in 0..=10 {
-                let data = format!("{},{},{}\n", partition, i, i % 2 == 0);
-                file.write_all(data.as_bytes())?;
-            }
-        }
-
-        Ok(schema)
-    }
-
-    /// Generate a partitioned CSV file and register it with an execution context
-    fn create_ctx(tmp_dir: &TempDir, partition_count: usize) -> Result<ExecutionContext> {
-        let mut ctx =
-            ExecutionContext::with_config(ExecutionConfig::new().with_concurrency(8));
-
-        let schema = populate_csv_partitions(tmp_dir, partition_count, ".csv")?;
-
-        // register csv file with the execution context
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new().schema(&schema),
-        )?;
-
-        Ok(ctx)
-    }
-}
diff --git a/rust/datafusion/src/execution/dataframe_impl.rs b/rust/datafusion/src/execution/dataframe_impl.rs
deleted file mode 100644
index 2a0c39aa48e..00000000000
--- a/rust/datafusion/src/execution/dataframe_impl.rs
+++ /dev/null
@@ -1,374 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implementation of DataFrame API.
-
-use std::sync::{Arc, Mutex};
-
-use crate::arrow::record_batch::RecordBatch;
-use crate::error::Result;
-use crate::execution::context::{ExecutionContext, ExecutionContextState};
-use crate::logical_plan::{
-    col, DFSchema, Expr, FunctionRegistry, JoinType, LogicalPlan, LogicalPlanBuilder,
-    Partitioning,
-};
-use crate::{
-    dataframe::*,
-    physical_plan::{collect, collect_partitioned},
-};
-
-use async_trait::async_trait;
-
-/// Implementation of DataFrame API
-pub struct DataFrameImpl {
-    ctx_state: Arc<Mutex<ExecutionContextState>>,
-    plan: LogicalPlan,
-}
-
-impl DataFrameImpl {
-    /// Create a new Table based on an existing logical plan
-    pub fn new(ctx_state: Arc<Mutex<ExecutionContextState>>, plan: &LogicalPlan) -> Self {
-        Self {
-            ctx_state,
-            plan: plan.clone(),
-        }
-    }
-}
-
-#[async_trait]
-impl DataFrame for DataFrameImpl {
-    /// Apply a projection based on a list of column names
-    fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>> {
-        let fields = columns
-            .iter()
-            .map(|name| self.plan.schema().field_with_unqualified_name(name))
-            .collect::<Result<Vec<_>>>()?;
-        let expr: Vec<Expr> = fields.iter().map(|f| col(f.name())).collect();
-        self.select(expr)
-    }
-
-    /// Create a projection based on arbitrary expressions
-    fn select(&self, expr_list: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .project(expr_list)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Create a filter based on a predicate expression
-    fn filter(&self, predicate: Expr) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .filter(predicate)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Perform an aggregate query
-    fn aggregate(
-        &self,
-        group_expr: Vec<Expr>,
-        aggr_expr: Vec<Expr>,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .aggregate(group_expr, aggr_expr)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Limit the number of rows
-    fn limit(&self, n: usize) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan).limit(n)?.build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Sort by specified sorting expressions
-    fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan).sort(expr)?.build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Join with another DataFrame
-    fn join(
-        &self,
-        right: Arc<dyn DataFrame>,
-        join_type: JoinType,
-        left_cols: &[&str],
-        right_cols: &[&str],
-    ) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .join(&right.to_logical_plan(), join_type, left_cols, right_cols)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    fn repartition(
-        &self,
-        partitioning_scheme: Partitioning,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .repartition(partitioning_scheme)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Convert to logical plan
-    fn to_logical_plan(&self) -> LogicalPlan {
-        self.plan.clone()
-    }
-
-    // Convert the logical plan represented by this DataFrame into a physical plan and
-    // execute it
-    async fn collect(&self) -> Result<Vec<RecordBatch>> {
-        let state = self.ctx_state.lock().unwrap().clone();
-        let ctx = ExecutionContext::from(Arc::new(Mutex::new(state)));
-        let plan = ctx.optimize(&self.plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-        Ok(collect(plan).await?)
-    }
-
-    // Convert the logical plan represented by this DataFrame into a physical plan and
-    // execute it
-    async fn collect_partitioned(&self) -> Result<Vec<Vec<RecordBatch>>> {
-        let state = self.ctx_state.lock().unwrap().clone();
-        let ctx = ExecutionContext::from(Arc::new(Mutex::new(state)));
-        let plan = ctx.optimize(&self.plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-        Ok(collect_partitioned(plan).await?)
-    }
-
-    /// Returns the schema from the logical plan
-    fn schema(&self) -> &DFSchema {
-        self.plan.schema()
-    }
-
-    fn explain(&self, verbose: bool) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .explain(verbose)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    fn registry(&self) -> Arc<dyn FunctionRegistry> {
-        let registry = self.ctx_state.lock().unwrap().clone();
-        Arc::new(registry)
-    }
-
-    fn union(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .union(dataframe.to_logical_plan())?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::execution::context::ExecutionContext;
-    use crate::logical_plan::*;
-    use crate::{datasource::csv::CsvReadOptions, physical_plan::ColumnarValue};
-    use crate::{physical_plan::functions::ScalarFunctionImplementation, test};
-    use arrow::datatypes::DataType;
-
-    #[test]
-    fn select_columns() -> Result<()> {
-        // build plan using Table API
-        let t = test_table()?;
-        let t2 = t.select_columns(&["c1", "c2", "c11"])?;
-        let plan = t2.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn select_expr() -> Result<()> {
-        // build plan using Table API
-        let t = test_table()?;
-        let t2 = t.select(vec![col("c1"), col("c2"), col("c11")])?;
-        let plan = t2.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate() -> Result<()> {
-        // build plan using DataFrame API
-        let df = test_table()?;
-        let group_expr = vec![col("c1")];
-        let aggr_expr = vec![
-            min(col("c12")),
-            max(col("c12")),
-            avg(col("c12")),
-            sum(col("c12")),
-            count(col("c12")),
-            count_distinct(col("c12")),
-        ];
-
-        let df = df.aggregate(group_expr, aggr_expr)?;
-
-        let plan = df.to_logical_plan();
-
-        // build same plan using SQL API
-        let sql = "SELECT c1, MIN(c12), MAX(c12), AVG(c12), SUM(c12), COUNT(c12), COUNT(DISTINCT c12) \
-                   FROM aggregate_test_100 \
-                   GROUP BY c1";
-        let sql_plan = create_plan(sql)?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join() -> Result<()> {
-        let left = test_table()?.select_columns(&["c1", "c2"])?;
-        let right = test_table()?.select_columns(&["c1", "c3"])?;
-        let left_rows = left.collect().await?;
-        let right_rows = right.collect().await?;
-        let join = left.join(right, JoinType::Inner, &["c1"], &["c1"])?;
-        let join_rows = join.collect().await?;
-        assert_eq!(100, left_rows.iter().map(|x| x.num_rows()).sum::<usize>());
-        assert_eq!(100, right_rows.iter().map(|x| x.num_rows()).sum::<usize>());
-        assert_eq!(2008, join_rows.iter().map(|x| x.num_rows()).sum::<usize>());
-        Ok(())
-    }
-
-    #[test]
-    fn limit() -> Result<()> {
-        // build query using Table API
-        let t = test_table()?;
-        let t2 = t.select_columns(&["c1", "c2", "c11"])?.limit(10)?;
-        let plan = t2.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan =
-            create_plan("SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn explain() -> Result<()> {
-        // build query using Table API
-        let df = test_table()?;
-        let df = df
-            .select_columns(&["c1", "c2", "c11"])?
-            .limit(10)?
-            .explain(false)?;
-        let plan = df.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan =
-            create_plan("EXPLAIN SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn registry() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        register_aggregate_csv(&mut ctx)?;
-
-        // declare the udf
-        let my_fn: ScalarFunctionImplementation =
-            Arc::new(|_: &[ColumnarValue]| unimplemented!("my_fn is not implemented"));
-
-        // create and register the udf
-        ctx.register_udf(create_udf(
-            "my_fn",
-            vec![DataType::Float64],
-            Arc::new(DataType::Float64),
-            my_fn,
-        ));
-
-        // build query with a UDF using DataFrame API
-        let df = ctx.table("aggregate_test_100")?;
-
-        let f = df.registry();
-
-        let df = df.select(vec![f.udf("my_fn")?.call(vec![col("c12")])])?;
-        let plan = df.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan =
-            ctx.create_logical_plan("SELECT my_fn(c12) FROM aggregate_test_100")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn sendable() {
-        let df = test_table().unwrap();
-        // dataframes should be sendable between threads/tasks
-        let task = tokio::task::spawn(async move {
-            df.select_columns(&["c1"])
-                .expect("should be usable in a task")
-        });
-        task.await.expect("task completed successfully");
-    }
-
-    /// Compare the formatted string representation of two plans for equality
-    fn assert_same_plan(plan1: &LogicalPlan, plan2: &LogicalPlan) {
-        assert_eq!(format!("{:?}", plan1), format!("{:?}", plan2));
-    }
-
-    /// Create a logical plan from a SQL query
-    fn create_plan(sql: &str) -> Result<LogicalPlan> {
-        let mut ctx = ExecutionContext::new();
-        register_aggregate_csv(&mut ctx)?;
-        ctx.create_logical_plan(sql)
-    }
-
-    fn test_table() -> Result<Arc<dyn DataFrame + 'static>> {
-        let mut ctx = ExecutionContext::new();
-        register_aggregate_csv(&mut ctx)?;
-        ctx.table("aggregate_test_100")
-    }
-
-    fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
-        ctx.register_csv(
-            "aggregate_test_100",
-            &format!("{}/csv/aggregate_test_100.csv", testdata),
-            CsvReadOptions::new().schema(&schema.as_ref()),
-        )?;
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/execution/mod.rs b/rust/datafusion/src/execution/mod.rs
deleted file mode 100644
index ff44dd43f83..00000000000
--- a/rust/datafusion/src/execution/mod.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFusion query execution
-
-pub mod context;
-pub mod dataframe_impl;
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
deleted file mode 100644
index 44a8a686a49..00000000000
--- a/rust/datafusion/src/lib.rs
+++ /dev/null
@@ -1,211 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#![warn(missing_docs)]
-// Clippy lints, some should be disabled incrementally
-#![allow(
-    clippy::float_cmp,
-    clippy::from_over_into,
-    clippy::module_inception,
-    clippy::new_without_default,
-    clippy::type_complexity,
-    clippy::upper_case_acronyms
-)]
-
-//! [DataFusion](https://github.com/apache/arrow/tree/master/rust/datafusion)
-//! is an extensible query execution framework that uses
-//! [Apache Arrow](https://arrow.apache.org) as its in-memory format.
-//!
-//! DataFusion supports both an SQL and a DataFrame API for building logical query plans
-//! as well as a query optimizer and execution engine capable of parallel execution
-//! against partitioned data sources (CSV and Parquet) using threads.
-//!
-//! Below is an example of how to execute a query against data stored
-//! in a CSV file using a [`DataFrame`](dataframe::DataFrame):
-//!
-//! ```rust
-//! # use datafusion::prelude::*;
-//! # use datafusion::error::Result;
-//! # use arrow::record_batch::RecordBatch;
-//!
-//! # #[tokio::main]
-//! # async fn main() -> Result<()> {
-//! let mut ctx = ExecutionContext::new();
-//!
-//! // create the dataframe
-//! let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-//!
-//! // create a plan
-//! let df = df.filter(col("a").lt_eq(col("b")))?
-//!            .aggregate(vec![col("a")], vec![min(col("b"))])?
-//!            .limit(100)?;
-//!
-//! // execute the plan
-//! let results: Vec<RecordBatch> = df.collect().await?;
-//!
-//! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
-//!
-//! let expected = vec![
-//!     "+---+--------+",
-//!     "| a | MIN(b) |",
-//!     "+---+--------+",
-//!     "| 1 | 2      |",
-//!     "+---+--------+"
-//! ];
-//!
-//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! and how to execute a query against a CSV using SQL:
-//!
-//! ```
-//! # use datafusion::prelude::*;
-//! # use datafusion::error::Result;
-//! # use arrow::record_batch::RecordBatch;
-//!
-//! # #[tokio::main]
-//! # async fn main() -> Result<()> {
-//! let mut ctx = ExecutionContext::new();
-//!
-//! ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
-//!
-//! // create a plan
-//! let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
-//!
-//! // execute the plan
-//! let results: Vec<RecordBatch> = df.collect().await?;
-//!
-//! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
-//!
-//! let expected = vec![
-//!     "+---+--------+",
-//!     "| a | MIN(b) |",
-//!     "+---+--------+",
-//!     "| 1 | 2      |",
-//!     "+---+--------+"
-//! ];
-//!
-//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! ## Parse, Plan, Optimize, Execute
-//!
-//! DataFusion is a fully fledged query engine capable of performing complex operations.
-//! Specifically, when DataFusion receives an SQL query, there are different steps
-//! that it passes through until a result is obtained. Broadly, they are:
-//!
-//! 1. The string is parsed to an Abstract syntax tree (AST) using [sqlparser](https://docs.rs/sqlparser/0.6.1/sqlparser/).
-//! 2. The planner [`SqlToRel`](sql::planner::SqlToRel) converts logical expressions on the AST to logical expressions [`Expr`s](logical_plan::Expr).
-//! 3. The planner [`SqlToRel`](sql::planner::SqlToRel) converts logical nodes on the AST to a [`LogicalPlan`](logical_plan::LogicalPlan).
-//! 4. [`OptimizerRules`](optimizer::optimizer::OptimizerRule) are applied to the [`LogicalPlan`](logical_plan::LogicalPlan) to optimize it.
-//! 5. The [`LogicalPlan`](logical_plan::LogicalPlan) is converted to an [`ExecutionPlan`](physical_plan::ExecutionPlan) by a [`PhysicalPlanner`](physical_plan::PhysicalPlanner)
-//! 6. The [`ExecutionPlan`](physical_plan::ExecutionPlan) is executed against data through the [`ExecutionContext`](execution::context::ExecutionContext)
-//!
-//! With a [`DataFrame`](dataframe::DataFrame) API, steps 1-3 are not used as the DataFrame builds the [`LogicalPlan`](logical_plan::LogicalPlan) directly.
-//!
-//! Phases 1-5 are typically cheap when compared to phase 6, and thus DataFusion puts a
-//! lot of effort to ensure that phase 6 runs efficiently and without errors.
-//!
-//! DataFusion's planning is divided in two main parts: logical planning and physical planning.
-//!
-//! ### Logical plan
-//!
-//! Logical planning yields [`logical plans`](logical_plan::LogicalPlan) and [`logical expressions`](logical_plan::Expr).
-//! These are [`Schema`](arrow::datatypes::Schema)-aware traits that represent statements whose result is independent of how it should physically be executed.
-//!
-//! A [`LogicalPlan`](logical_plan::LogicalPlan) is a Direct Asyclic graph of other [`LogicalPlan`s](logical_plan::LogicalPlan) and each node contains logical expressions ([`Expr`s](logical_plan::Expr)).
-//! All of these are located in [`logical_plan`](logical_plan).
-//!
-//! ### Physical plan
-//!
-//! A Physical plan ([`ExecutionPlan`](physical_plan::ExecutionPlan)) is a plan that can be executed against data.
-//! Contrarily to a logical plan, the physical plan has concrete information about how the calculation
-//! should be performed (e.g. what Rust functions are used) and how data should be loaded into memory.
-//!
-//! [`ExecutionPlan`](physical_plan::ExecutionPlan) uses the Arrow format as its in-memory representation of data, through the [arrow] crate.
-//! We recommend going through [its documentation](arrow) for details on how the data is physically represented.
-//!
-//! A [`ExecutionPlan`](physical_plan::ExecutionPlan) is composed by nodes (implement the trait [`ExecutionPlan`](physical_plan::ExecutionPlan)),
-//! and each node is composed by physical expressions ([`PhysicalExpr`](physical_plan::PhysicalExpr))
-//! or aggreagate expressions ([`AggregateExpr`](physical_plan::AggregateExpr)).
-//! All of these are located in the module [`physical_plan`](physical_plan).
-//!
-//! Broadly speaking,
-//!
-//! * an [`ExecutionPlan`](physical_plan::ExecutionPlan) receives a partition number and asyncronosly returns
-//!   an iterator over [`RecordBatch`](arrow::record_batch::RecordBatch)
-//!   (a node-specific struct that implements [`RecordBatchReader`](arrow::record_batch::RecordBatchReader))
-//! * a [`PhysicalExpr`](physical_plan::PhysicalExpr) receives a [`RecordBatch`](arrow::record_batch::RecordBatch)
-//!   and returns an [`Array`](arrow::array::Array)
-//! * an [`AggregateExpr`](physical_plan::AggregateExpr) receives [`RecordBatch`es](arrow::record_batch::RecordBatch)
-//!   and returns a [`RecordBatch`](arrow::record_batch::RecordBatch) of a single row(*)
-//!
-//! (*) Technically, it aggregates the results on each partition and then merges the results into a single partition.
-//!
-//! The following physical nodes are currently implemented:
-//!
-//! * Projection: [`ProjectionExec`](physical_plan::projection::ProjectionExec)
-//! * Filter: [`FilterExec`](physical_plan::filter::FilterExec)
-//! * Hash and Grouped aggregations: [`HashAggregateExec`](physical_plan::hash_aggregate::HashAggregateExec)
-//! * Sort: [`SortExec`](physical_plan::sort::SortExec)
-//! * Merge (partitions): [`MergeExec`](physical_plan::merge::MergeExec)
-//! * Limit: [`LocalLimitExec`](physical_plan::limit::LocalLimitExec) and [`GlobalLimitExec`](physical_plan::limit::GlobalLimitExec)
-//! * Scan a CSV: [`CsvExec`](physical_plan::csv::CsvExec)
-//! * Scan a Parquet: [`ParquetExec`](physical_plan::parquet::ParquetExec)
-//! * Scan from memory: [`MemoryExec`](physical_plan::memory::MemoryExec)
-//! * Explain the plan: [`ExplainExec`](physical_plan::explain::ExplainExec)
-//!
-//! ## Customize
-//!
-//! DataFusion allows users to
-//! * extend the planner to use user-defined logical and physical nodes ([`QueryPlanner`](execution::context::QueryPlanner))
-//! * declare and use user-defined scalar functions ([`ScalarUDF`](physical_plan::udf::ScalarUDF))
-//! * declare and use user-defined aggregate functions ([`AggregateUDF`](physical_plan::udaf::AggregateUDF))
-//!
-//! you can find examples of each of them in examples section.
-
-extern crate arrow;
-extern crate sqlparser;
-
-pub mod catalog;
-pub mod dataframe;
-pub mod datasource;
-pub mod error;
-pub mod execution;
-pub mod logical_plan;
-pub mod optimizer;
-pub mod physical_optimizer;
-pub mod physical_plan;
-pub mod prelude;
-pub mod scalar;
-pub mod sql;
-pub mod variable;
-
-#[cfg(test)]
-pub mod test;
-
-#[macro_use]
-#[cfg(feature = "regex_expressions")]
-extern crate lazy_static;
-
-#[cfg(doctest)]
-doc_comment::doctest!("../README.md", readme_example_test);
diff --git a/rust/datafusion/src/logical_plan/builder.rs b/rust/datafusion/src/logical_plan/builder.rs
deleted file mode 100644
index fed82fd23b8..00000000000
--- a/rust/datafusion/src/logical_plan/builder.rs
+++ /dev/null
@@ -1,595 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides a builder for creating LogicalPlans
-
-use std::{collections::HashMap, sync::Arc};
-
-use arrow::{
-    datatypes::{Schema, SchemaRef},
-    record_batch::RecordBatch,
-};
-
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::{
-    datasource::{empty::EmptyTable, parquet::ParquetTable, CsvFile, MemTable},
-    prelude::CsvReadOptions,
-};
-
-use super::dfschema::ToDFSchema;
-use super::{
-    col, exprlist_to_fields, Expr, JoinType, LogicalPlan, PlanType, StringifiedPlan,
-};
-use crate::logical_plan::{DFField, DFSchema, DFSchemaRef, Partitioning};
-use std::collections::HashSet;
-
-/// Builder for logical plans
-///
-/// ```
-/// # use datafusion::prelude::*;
-/// # use datafusion::logical_plan::LogicalPlanBuilder;
-/// # use datafusion::error::Result;
-/// # use arrow::datatypes::{Schema, DataType, Field};
-/// #
-/// # fn main() -> Result<()> {
-/// #
-/// # fn employee_schema() -> Schema {
-/// #    Schema::new(vec![
-/// #           Field::new("id", DataType::Int32, false),
-/// #           Field::new("first_name", DataType::Utf8, false),
-/// #           Field::new("last_name", DataType::Utf8, false),
-/// #           Field::new("state", DataType::Utf8, false),
-/// #           Field::new("salary", DataType::Int32, false),
-/// #       ])
-/// #   }
-/// #
-/// // Create a plan similar to
-/// // SELECT last_name
-/// // FROM employees
-/// // WHERE salary < 1000
-/// let plan = LogicalPlanBuilder::scan_empty(
-///              "employee.csv",
-///              &employee_schema(),
-///              None,
-///            )?
-///            // Keep only rows where salary < 1000
-///            .filter(col("salary").lt_eq(lit(1000)))?
-///            // only show "last_name" in the final results
-///            .project(vec![col("last_name")])?
-///            .build()?;
-///
-/// # Ok(())
-/// # }
-/// ```
-pub struct LogicalPlanBuilder {
-    plan: LogicalPlan,
-}
-
-impl LogicalPlanBuilder {
-    /// Create a builder from an existing plan
-    pub fn from(plan: &LogicalPlan) -> Self {
-        Self { plan: plan.clone() }
-    }
-
-    /// Create an empty relation.
-    ///
-    /// `produce_one_row` set to true means this empty node needs to produce a placeholder row.
-    pub fn empty(produce_one_row: bool) -> Self {
-        Self::from(&LogicalPlan::EmptyRelation {
-            produce_one_row,
-            schema: DFSchemaRef::new(DFSchema::empty()),
-        })
-    }
-
-    /// Scan a memory data source
-    pub fn scan_memory(
-        partitions: Vec<Vec<RecordBatch>>,
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let provider = Arc::new(MemTable::try_new(schema, partitions)?);
-        Self::scan("", provider, projection)
-    }
-
-    /// Scan a CSV data source
-    pub fn scan_csv(
-        path: &str,
-        options: CsvReadOptions,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let provider = Arc::new(CsvFile::try_new(path, options)?);
-        Self::scan("", provider, projection)
-    }
-
-    /// Scan a Parquet data source
-    pub fn scan_parquet(
-        path: &str,
-        projection: Option<Vec<usize>>,
-        max_concurrency: usize,
-    ) -> Result<Self> {
-        let provider = Arc::new(ParquetTable::try_new(path, max_concurrency)?);
-        Self::scan("", provider, projection)
-    }
-
-    /// Scan an empty data source, mainly used in tests
-    pub fn scan_empty(
-        name: &str,
-        table_schema: &Schema,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let table_schema = Arc::new(table_schema.clone());
-        let provider = Arc::new(EmptyTable::new(table_schema));
-        Self::scan(name, provider, projection)
-    }
-
-    /// Convert a table provider into a builder with a TableScan
-    pub fn scan(
-        name: &str,
-        provider: Arc<dyn TableProvider>,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let schema = provider.schema();
-
-        let projected_schema = projection
-            .as_ref()
-            .map(|p| Schema::new(p.iter().map(|i| schema.field(*i).clone()).collect()))
-            .map_or(schema, SchemaRef::new)
-            .to_dfschema_ref()?;
-
-        let table_scan = LogicalPlan::TableScan {
-            table_name: name.to_string(),
-            source: provider,
-            projected_schema,
-            projection,
-            filters: vec![],
-            limit: None,
-        };
-
-        Ok(Self::from(&table_scan))
-    }
-
-    /// Apply a projection.
-    ///
-    /// # Errors
-    /// This function errors under any of the following conditions:
-    /// * Two or more expressions have the same name
-    /// * An invalid expression is used (e.g. a `sort` expression)
-    pub fn project(&self, expr: impl IntoIterator<Item = Expr>) -> Result<Self> {
-        let input_schema = self.plan.schema();
-        let mut projected_expr = vec![];
-        for e in expr {
-            match e {
-                Expr::Wildcard => {
-                    (0..input_schema.fields().len()).for_each(|i| {
-                        projected_expr.push(col(input_schema.field(i).name()))
-                    });
-                }
-                _ => projected_expr.push(e),
-            };
-        }
-
-        validate_unique_names("Projections", projected_expr.iter(), input_schema)?;
-
-        let schema = DFSchema::new(exprlist_to_fields(&projected_expr, input_schema)?)?;
-
-        Ok(Self::from(&LogicalPlan::Projection {
-            expr: projected_expr,
-            input: Arc::new(self.plan.clone()),
-            schema: DFSchemaRef::new(schema),
-        }))
-    }
-
-    /// Apply a filter
-    pub fn filter(&self, expr: Expr) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Filter {
-            predicate: expr,
-            input: Arc::new(self.plan.clone()),
-        }))
-    }
-
-    /// Apply a limit
-    pub fn limit(&self, n: usize) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Limit {
-            n,
-            input: Arc::new(self.plan.clone()),
-        }))
-    }
-
-    /// Apply a sort
-    pub fn sort(&self, expr: impl IntoIterator<Item = Expr>) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Sort {
-            expr: expr.into_iter().collect(),
-            input: Arc::new(self.plan.clone()),
-        }))
-    }
-
-    /// Apply a union
-    pub fn union(&self, plan: LogicalPlan) -> Result<Self> {
-        let schema = self.plan.schema();
-
-        if plan.schema() != schema {
-            return Err(DataFusionError::Plan(
-                "Schema's for union should be the same ".to_string(),
-            ));
-        }
-        // Add plan to existing union if possible
-        let mut inputs = match &self.plan {
-            LogicalPlan::Union { inputs, .. } => inputs.clone(),
-            _ => vec![self.plan.clone()],
-        };
-        inputs.push(plan);
-
-        Ok(Self::from(&LogicalPlan::Union {
-            inputs,
-            schema: schema.clone(),
-            alias: None,
-        }))
-    }
-
-    /// Apply a join
-    pub fn join(
-        &self,
-        right: &LogicalPlan,
-        join_type: JoinType,
-        left_keys: &[&str],
-        right_keys: &[&str],
-    ) -> Result<Self> {
-        if left_keys.len() != right_keys.len() {
-            Err(DataFusionError::Plan(
-                "left_keys and right_keys were not the same length".to_string(),
-            ))
-        } else {
-            let on: Vec<_> = left_keys
-                .iter()
-                .zip(right_keys.iter())
-                .map(|(x, y)| (x.to_string(), y.to_string()))
-                .collect::<Vec<_>>();
-            let join_schema =
-                build_join_schema(self.plan.schema(), right.schema(), &on, &join_type)?;
-            Ok(Self::from(&LogicalPlan::Join {
-                left: Arc::new(self.plan.clone()),
-                right: Arc::new(right.clone()),
-                on,
-                join_type,
-                schema: DFSchemaRef::new(join_schema),
-            }))
-        }
-    }
-
-    /// Repartition
-    pub fn repartition(&self, partitioning_scheme: Partitioning) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Repartition {
-            input: Arc::new(self.plan.clone()),
-            partitioning_scheme,
-        }))
-    }
-
-    /// Apply an aggregate: grouping on the `group_expr` expressions
-    /// and calculating `aggr_expr` aggregates for each distinct
-    /// value of the `group_expr`;
-    pub fn aggregate(
-        &self,
-        group_expr: impl IntoIterator<Item = Expr>,
-        aggr_expr: impl IntoIterator<Item = Expr>,
-    ) -> Result<Self> {
-        let group_expr = group_expr.into_iter().collect::<Vec<Expr>>();
-        let aggr_expr = aggr_expr.into_iter().collect::<Vec<Expr>>();
-
-        let all_expr = group_expr.iter().chain(aggr_expr.iter());
-
-        validate_unique_names("Aggregations", all_expr.clone(), self.plan.schema())?;
-
-        let aggr_schema =
-            DFSchema::new(exprlist_to_fields(all_expr, self.plan.schema())?)?;
-
-        Ok(Self::from(&LogicalPlan::Aggregate {
-            input: Arc::new(self.plan.clone()),
-            group_expr,
-            aggr_expr,
-            schema: DFSchemaRef::new(aggr_schema),
-        }))
-    }
-
-    /// Create an expression to represent the explanation of the plan
-    pub fn explain(&self, verbose: bool) -> Result<Self> {
-        let stringified_plans = vec![StringifiedPlan::new(
-            PlanType::LogicalPlan,
-            format!("{:#?}", self.plan.clone()),
-        )];
-
-        let schema = LogicalPlan::explain_schema();
-
-        Ok(Self::from(&LogicalPlan::Explain {
-            verbose,
-            plan: Arc::new(self.plan.clone()),
-            stringified_plans,
-            schema: schema.to_dfschema_ref()?,
-        }))
-    }
-
-    /// Build the plan
-    pub fn build(&self) -> Result<LogicalPlan> {
-        Ok(self.plan.clone())
-    }
-}
-
-/// Creates a schema for a join operation.
-/// The fields from the left side are first
-fn build_join_schema(
-    left: &DFSchema,
-    right: &DFSchema,
-    on: &[(String, String)],
-    join_type: &JoinType,
-) -> Result<DFSchema> {
-    let fields: Vec<DFField> = match join_type {
-        JoinType::Inner | JoinType::Left => {
-            // remove right-side join keys if they have the same names as the left-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left.fields().iter();
-
-            let right_fields = right
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-        JoinType::Right => {
-            // remove left-side join keys if they have the same names as the right-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            let right_fields = right.fields().iter();
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-    };
-    DFSchema::new(fields)
-}
-
-/// Errors if one or more expressions have equal names.
-fn validate_unique_names<'a>(
-    node_name: &str,
-    expressions: impl IntoIterator<Item = &'a Expr>,
-    input_schema: &DFSchema,
-) -> Result<()> {
-    let mut unique_names = HashMap::new();
-    expressions.into_iter().enumerate().try_for_each(|(position, expr)| {
-        let name = expr.name(input_schema)?;
-        match unique_names.get(&name) {
-            None => {
-                unique_names.insert(name, (position, expr));
-                Ok(())
-            },
-            Some((existing_position, existing_expr)) => {
-                Err(DataFusionError::Plan(
-                    format!("{} require unique expression names \
-                             but the expression \"{:?}\" at position {} and \"{:?}\" \
-                             at position {} have the same name. Consider aliasing (\"AS\") one of them.",
-                             node_name, existing_expr, existing_position, expr, position,
-                            )
-                ))
-            }
-        }
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::{DataType, Field};
-
-    use super::super::{lit, sum};
-    use super::*;
-
-    #[test]
-    fn plan_builder_simple() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )?
-        .filter(col("state").eq(lit("CO")))?
-        .project(vec![col("id")])?
-        .build()?;
-
-        let expected = "Projection: #id\
-        \n  Filter: #state Eq Utf8(\"CO\")\
-        \n    TableScan: employee.csv projection=Some([0, 3])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn plan_builder_aggregate() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![3, 4]),
-        )?
-        .aggregate(
-            vec![col("state")],
-            vec![sum(col("salary")).alias("total_salary")],
-        )?
-        .project(vec![col("state"), col("total_salary")])?
-        .build()?;
-
-        let expected = "Projection: #state, #total_salary\
-        \n  Aggregate: groupBy=[[#state]], aggr=[[SUM(#salary) AS total_salary]]\
-        \n    TableScan: employee.csv projection=Some([3, 4])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn plan_builder_sort() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![3, 4]),
-        )?
-        .sort(vec![
-            Expr::Sort {
-                expr: Box::new(col("state")),
-                asc: true,
-                nulls_first: true,
-            },
-            Expr::Sort {
-                expr: Box::new(col("total_salary")),
-                asc: false,
-                nulls_first: false,
-            },
-        ])?
-        .build()?;
-
-        let expected = "Sort: #state ASC NULLS FIRST, #total_salary DESC NULLS LAST\
-        \n  TableScan: employee.csv projection=Some([3, 4])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn plan_builder_union_combined_single_union() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![3, 4]),
-        )?;
-
-        let plan = plan
-            .union(plan.build()?)?
-            .union(plan.build()?)?
-            .union(plan.build()?)?
-            .build()?;
-
-        // output has only one union
-        let expected = "Union\
-        \n  TableScan: employee.csv projection=Some([3, 4])\
-        \n  TableScan: employee.csv projection=Some([3, 4])\
-        \n  TableScan: employee.csv projection=Some([3, 4])\
-        \n  TableScan: employee.csv projection=Some([3, 4])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn projection_non_unique_names() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )?
-        // two columns with the same name => error
-        .project(vec![col("id"), col("first_name").alias("id")]);
-
-        match plan {
-            Err(DataFusionError::Plan(e)) => {
-                assert_eq!(
-                    e,
-                    "Projections require unique expression names \
-                    but the expression \"#id\" at position 0 and \"#first_name AS id\" at \
-                    position 1 have the same name. Consider aliasing (\"AS\") one of them."
-                );
-                Ok(())
-            }
-            _ => Err(DataFusionError::Plan(
-                "Plan should have returned an DataFusionError::Plan".to_string(),
-            )),
-        }
-    }
-
-    #[test]
-    fn aggregate_non_unique_names() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )?
-        // two columns with the same name => error
-        .aggregate(vec![col("state")], vec![sum(col("salary")).alias("state")]);
-
-        match plan {
-            Err(DataFusionError::Plan(e)) => {
-                assert_eq!(
-                    e,
-                    "Aggregations require unique expression names \
-                    but the expression \"#state\" at position 0 and \"SUM(#salary) AS state\" at \
-                    position 1 have the same name. Consider aliasing (\"AS\") one of them."
-                );
-                Ok(())
-            }
-            _ => Err(DataFusionError::Plan(
-                "Plan should have returned an DataFusionError::Plan".to_string(),
-            )),
-        }
-    }
-
-    fn employee_schema() -> Schema {
-        Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ])
-    }
-
-    #[test]
-    fn stringified_plan() {
-        let stringified_plan =
-            StringifiedPlan::new(PlanType::LogicalPlan, "...the plan...");
-        assert!(stringified_plan.should_display(true));
-        assert!(stringified_plan.should_display(false)); // display in non verbose mode too
-
-        let stringified_plan =
-            StringifiedPlan::new(PlanType::PhysicalPlan, "...the plan...");
-        assert!(stringified_plan.should_display(true));
-        assert!(!stringified_plan.should_display(false));
-
-        let stringified_plan = StringifiedPlan::new(
-            PlanType::OptimizedLogicalPlan {
-                optimizer_name: "random opt pass".into(),
-            },
-            "...the plan...",
-        );
-        assert!(stringified_plan.should_display(true));
-        assert!(!stringified_plan.should_display(false));
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/dfschema.rs b/rust/datafusion/src/logical_plan/dfschema.rs
deleted file mode 100644
index 9adb22b43d0..00000000000
--- a/rust/datafusion/src/logical_plan/dfschema.rs
+++ /dev/null
@@ -1,521 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DFSchema is an extended schema struct that DataFusion uses to provide support for
-//! fields with optional relation names.
-
-use std::collections::HashSet;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use std::fmt::{Display, Formatter};
-
-/// A reference-counted reference to a `DFSchema`.
-pub type DFSchemaRef = Arc<DFSchema>;
-
-/// DFSchema wraps an Arrow schema and adds relation names
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct DFSchema {
-    /// Fields
-    fields: Vec<DFField>,
-}
-
-impl DFSchema {
-    /// Creates an empty `DFSchema`
-    pub fn empty() -> Self {
-        Self { fields: vec![] }
-    }
-
-    /// Create a new `DFSchema`
-    pub fn new(fields: Vec<DFField>) -> Result<Self> {
-        let mut qualified_names = HashSet::new();
-        let mut unqualified_names = HashSet::new();
-        for field in &fields {
-            if let Some(qualifier) = field.qualifier() {
-                if !qualified_names.insert((qualifier, field.name())) {
-                    return Err(DataFusionError::Plan(format!(
-                        "Schema contains duplicate qualified field name '{}'",
-                        field.qualified_name()
-                    )));
-                }
-            } else if !unqualified_names.insert(field.name()) {
-                return Err(DataFusionError::Plan(format!(
-                    "Schema contains duplicate unqualified field name '{}'",
-                    field.name()
-                )));
-            }
-        }
-
-        // check for mix of qualified and unqualified field with same unqualified name
-        // note that we need to sort the contents of the HashSet first so that errors are
-        // deterministic
-        let mut qualified_names = qualified_names
-            .iter()
-            .map(|(l, r)| (l.to_owned(), r.to_owned()))
-            .collect::<Vec<(&String, &String)>>();
-        qualified_names.sort_by(|a, b| {
-            let a = format!("{}.{}", a.0, a.1);
-            let b = format!("{}.{}", b.0, b.1);
-            a.cmp(&b)
-        });
-        for (qualifier, name) in &qualified_names {
-            if unqualified_names.contains(name) {
-                return Err(DataFusionError::Plan(format!(
-                    "Schema contains qualified field name '{}.{}' \
-                    and unqualified field name '{}' which would be ambiguous",
-                    qualifier, name, name
-                )));
-            }
-        }
-        Ok(Self { fields })
-    }
-
-    /// Create a `DFSchema` from an Arrow schema
-    pub fn try_from_qualified(qualifier: &str, schema: &Schema) -> Result<Self> {
-        Self::new(
-            schema
-                .fields()
-                .iter()
-                .map(|f| DFField {
-                    field: f.clone(),
-                    qualifier: Some(qualifier.to_owned()),
-                })
-                .collect(),
-        )
-    }
-
-    /// Combine two schemas
-    pub fn join(&self, schema: &DFSchema) -> Result<Self> {
-        let mut fields = self.fields.clone();
-        fields.extend_from_slice(schema.fields().as_slice());
-        Self::new(fields)
-    }
-
-    /// Get a list of fields
-    pub fn fields(&self) -> &Vec<DFField> {
-        &self.fields
-    }
-
-    /// Returns an immutable reference of a specific `Field` instance selected using an
-    /// offset within the internal `fields` vector
-    pub fn field(&self, i: usize) -> &DFField {
-        &self.fields[i]
-    }
-
-    /// Find the index of the column with the given name
-    pub fn index_of(&self, name: &str) -> Result<usize> {
-        for i in 0..self.fields.len() {
-            if self.fields[i].name() == name {
-                return Ok(i);
-            }
-        }
-        Err(DataFusionError::Plan(format!("No field named '{}'", name)))
-    }
-
-    /// Find the field with the given name
-    pub fn field_with_name(
-        &self,
-        relation_name: Option<&str>,
-        name: &str,
-    ) -> Result<DFField> {
-        if let Some(relation_name) = relation_name {
-            self.field_with_qualified_name(relation_name, name)
-        } else {
-            self.field_with_unqualified_name(name)
-        }
-    }
-
-    /// Find the field with the given name
-    pub fn field_with_unqualified_name(&self, name: &str) -> Result<DFField> {
-        let matches: Vec<&DFField> = self
-            .fields
-            .iter()
-            .filter(|field| field.name() == name)
-            .collect();
-        match matches.len() {
-            0 => Err(DataFusionError::Plan(format!("No field named '{}'", name))),
-            1 => Ok(matches[0].to_owned()),
-            _ => Err(DataFusionError::Plan(format!(
-                "Ambiguous reference to field named '{}'",
-                name
-            ))),
-        }
-    }
-
-    /// Find the field with the given qualified name
-    pub fn field_with_qualified_name(
-        &self,
-        relation_name: &str,
-        name: &str,
-    ) -> Result<DFField> {
-        let matches: Vec<&DFField> = self
-            .fields
-            .iter()
-            .filter(|field| {
-                field.qualifier == Some(relation_name.to_string()) && field.name() == name
-            })
-            .collect();
-        match matches.len() {
-            0 => Err(DataFusionError::Plan(format!(
-                "No field named '{}.{}'",
-                relation_name, name
-            ))),
-            1 => Ok(matches[0].to_owned()),
-            _ => Err(DataFusionError::Internal(format!(
-                "Ambiguous reference to qualified field named '{}.{}'",
-                relation_name, name
-            ))),
-        }
-    }
-}
-
-impl Into<Schema> for DFSchema {
-    /// Convert a schema into a DFSchema
-    fn into(self) -> Schema {
-        Schema::new(
-            self.fields
-                .into_iter()
-                .map(|f| {
-                    if f.qualifier().is_some() {
-                        Field::new(
-                            f.qualified_name().as_str(),
-                            f.data_type().to_owned(),
-                            f.is_nullable(),
-                        )
-                    } else {
-                        f.field
-                    }
-                })
-                .collect(),
-        )
-    }
-}
-
-/// Create a `DFSchema` from an Arrow schema
-impl TryFrom<Schema> for DFSchema {
-    type Error = DataFusionError;
-    fn try_from(schema: Schema) -> std::result::Result<Self, Self::Error> {
-        Self::new(
-            schema
-                .fields()
-                .iter()
-                .map(|f| DFField {
-                    field: f.clone(),
-                    qualifier: None,
-                })
-                .collect(),
-        )
-    }
-}
-
-impl Into<SchemaRef> for DFSchema {
-    fn into(self) -> SchemaRef {
-        SchemaRef::new(self.into())
-    }
-}
-
-/// Convenience trait to convert Schema like things to DFSchema and DFSchemaRef with fewer keystrokes
-pub trait ToDFSchema
-where
-    Self: Sized,
-{
-    /// Attempt to create a DSSchema
-    #[allow(clippy::wrong_self_convention)]
-    fn to_dfschema(self) -> Result<DFSchema>;
-
-    /// Attempt to create a DSSchemaRef
-    #[allow(clippy::wrong_self_convention)]
-    fn to_dfschema_ref(self) -> Result<DFSchemaRef> {
-        Ok(Arc::new(self.to_dfschema()?))
-    }
-}
-
-impl ToDFSchema for Schema {
-    fn to_dfschema(self) -> Result<DFSchema> {
-        DFSchema::try_from(self)
-    }
-}
-
-impl ToDFSchema for SchemaRef {
-    fn to_dfschema(self) -> Result<DFSchema> {
-        // Attempt to use the Schema directly if there are no other
-        // references, otherwise clone
-        match Self::try_unwrap(self) {
-            Ok(schema) => DFSchema::try_from(schema),
-            Err(schemaref) => DFSchema::try_from(schemaref.as_ref().clone()),
-        }
-    }
-}
-
-impl ToDFSchema for Vec<DFField> {
-    fn to_dfschema(self) -> Result<DFSchema> {
-        DFSchema::new(self)
-    }
-}
-
-impl Display for DFSchema {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            self.fields
-                .iter()
-                .map(|field| field.qualified_name())
-                .collect::<Vec<String>>()
-                .join(", ")
-        )
-    }
-}
-
-/// DFField wraps an Arrow field and adds an optional qualifier
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct DFField {
-    /// Optional qualifier (usually a table or relation name)
-    qualifier: Option<String>,
-    /// Arrow field definition
-    field: Field,
-}
-
-impl DFField {
-    /// Creates a new `DFField`
-    pub fn new(
-        qualifier: Option<&str>,
-        name: &str,
-        data_type: DataType,
-        nullable: bool,
-    ) -> Self {
-        DFField {
-            qualifier: qualifier.map(|s| s.to_owned()),
-            field: Field::new(name, data_type, nullable),
-        }
-    }
-
-    /// Create an unqualified field from an existing Arrow field
-    pub fn from(field: Field) -> Self {
-        Self {
-            qualifier: None,
-            field,
-        }
-    }
-
-    /// Create a qualified field from an existing Arrow field
-    pub fn from_qualified(qualifier: &str, field: Field) -> Self {
-        Self {
-            qualifier: Some(qualifier.to_owned()),
-            field,
-        }
-    }
-
-    /// Returns an immutable reference to the `DFField`'s unqualified name
-    pub fn name(&self) -> &String {
-        &self.field.name()
-    }
-
-    /// Returns an immutable reference to the `DFField`'s data-type
-    pub fn data_type(&self) -> &DataType {
-        &self.field.data_type()
-    }
-
-    /// Indicates whether this `DFField` supports null values
-    pub fn is_nullable(&self) -> bool {
-        self.field.is_nullable()
-    }
-
-    /// Returns a reference to the `DFField`'s qualified name
-    pub fn qualified_name(&self) -> String {
-        if let Some(relation_name) = &self.qualifier {
-            format!("{}.{}", relation_name, self.field.name())
-        } else {
-            self.field.name().to_owned()
-        }
-    }
-
-    /// Get the optional qualifier
-    pub fn qualifier(&self) -> Option<&String> {
-        self.qualifier.as_ref()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::datatypes::DataType;
-
-    #[test]
-    fn from_unqualified_field() {
-        let field = Field::new("c0", DataType::Boolean, true);
-        let field = DFField::from(field);
-        assert_eq!("c0", field.name());
-        assert_eq!("c0", field.qualified_name());
-    }
-
-    #[test]
-    fn from_qualified_field() {
-        let field = Field::new("c0", DataType::Boolean, true);
-        let field = DFField::from_qualified("t1", field);
-        assert_eq!("c0", field.name());
-        assert_eq!("t1.c0", field.qualified_name());
-    }
-
-    #[test]
-    fn from_unqualified_schema() -> Result<()> {
-        let schema = DFSchema::try_from(test_schema_1())?;
-        assert_eq!("c0, c1", schema.to_string());
-        Ok(())
-    }
-
-    #[test]
-    fn from_qualified_schema() -> Result<()> {
-        let schema = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        assert_eq!("t1.c0, t1.c1", schema.to_string());
-        Ok(())
-    }
-
-    #[test]
-    fn from_qualified_schema_into_arrow_schema() -> Result<()> {
-        let schema = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let arrow_schema: Schema = schema.into();
-        let expected = "Field { name: \"t1.c0\", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-        Field { name: \"t1.c1\", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }";
-        assert_eq!(expected, arrow_schema.to_string());
-        Ok(())
-    }
-
-    #[test]
-    fn join_qualified() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from_qualified("t2", &test_schema_1())?;
-        let join = left.join(&right)?;
-        assert_eq!("t1.c0, t1.c1, t2.c0, t2.c1", join.to_string());
-        // test valid access
-        assert!(join.field_with_qualified_name("t1", "c0").is_ok());
-        assert!(join.field_with_qualified_name("t2", "c0").is_ok());
-        // test invalid access
-        assert!(join.field_with_unqualified_name("c0").is_err());
-        assert!(join.field_with_unqualified_name("t1.c0").is_err());
-        assert!(join.field_with_unqualified_name("t2.c0").is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn join_qualified_duplicate() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let join = left.join(&right);
-        assert!(join.is_err());
-        assert_eq!(
-            "Error during planning: Schema contains duplicate \
-        qualified field name \'t1.c0\'",
-            &format!("{}", join.err().unwrap())
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn join_unqualified_duplicate() -> Result<()> {
-        let left = DFSchema::try_from(test_schema_1())?;
-        let right = DFSchema::try_from(test_schema_1())?;
-        let join = left.join(&right);
-        assert!(join.is_err());
-        assert_eq!(
-            "Error during planning: Schema contains duplicate \
-        unqualified field name \'c0\'",
-            &format!("{}", join.err().unwrap())
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn join_mixed() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from(test_schema_2())?;
-        let join = left.join(&right)?;
-        assert_eq!("t1.c0, t1.c1, c100, c101", join.to_string());
-        // test valid access
-        assert!(join.field_with_qualified_name("t1", "c0").is_ok());
-        assert!(join.field_with_unqualified_name("c0").is_ok());
-        assert!(join.field_with_unqualified_name("c100").is_ok());
-        assert!(join.field_with_name(None, "c100").is_ok());
-        // test invalid access
-        assert!(join.field_with_unqualified_name("t1.c0").is_err());
-        assert!(join.field_with_unqualified_name("t1.c100").is_err());
-        assert!(join.field_with_qualified_name("", "c100").is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn join_mixed_duplicate() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from(test_schema_1())?;
-        let join = left.join(&right);
-        assert!(join.is_err());
-        assert_eq!(
-            "Error during planning: Schema contains qualified \
-        field name \'t1.c0\' and unqualified field name \'c0\' which would be ambiguous",
-            &format!("{}", join.err().unwrap())
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn into() {
-        // Demonstrate how to convert back and forth between Schema, SchemaRef, DFSchema, and DFSchemaRef
-        let arrow_schema = Schema::new(vec![Field::new("c0", DataType::Int64, true)]);
-        let arrow_schema_ref = Arc::new(arrow_schema.clone());
-
-        let df_schema =
-            DFSchema::new(vec![DFField::new(None, "c0", DataType::Int64, true)]).unwrap();
-        let df_schema_ref = Arc::new(df_schema.clone());
-
-        {
-            let arrow_schema = arrow_schema.clone();
-            let arrow_schema_ref = arrow_schema_ref.clone();
-
-            assert_eq!(df_schema, arrow_schema.to_dfschema().unwrap());
-            assert_eq!(df_schema, arrow_schema_ref.to_dfschema().unwrap());
-        }
-
-        {
-            let arrow_schema = arrow_schema.clone();
-            let arrow_schema_ref = arrow_schema_ref.clone();
-
-            assert_eq!(df_schema_ref, arrow_schema.to_dfschema_ref().unwrap());
-            assert_eq!(df_schema_ref, arrow_schema_ref.to_dfschema_ref().unwrap());
-        }
-
-        // Now, consume the refs
-        assert_eq!(df_schema_ref, arrow_schema.to_dfschema_ref().unwrap());
-        assert_eq!(df_schema_ref, arrow_schema_ref.to_dfschema_ref().unwrap());
-    }
-
-    fn test_schema_1() -> Schema {
-        Schema::new(vec![
-            Field::new("c0", DataType::Boolean, true),
-            Field::new("c1", DataType::Boolean, true),
-        ])
-    }
-
-    fn test_schema_2() -> Schema {
-        Schema::new(vec![
-            Field::new("c100", DataType::Boolean, true),
-            Field::new("c101", DataType::Boolean, true),
-        ])
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/display.rs b/rust/datafusion/src/logical_plan/display.rs
deleted file mode 100644
index 76749b547a8..00000000000
--- a/rust/datafusion/src/logical_plan/display.rs
+++ /dev/null
@@ -1,270 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//! This module provides logic for displaying LogicalPlans in various styles
-
-use super::{LogicalPlan, PlanVisitor};
-use arrow::datatypes::Schema;
-use std::fmt;
-
-/// Formats plans with a single line per node. For example:
-///
-/// Projection: #id
-///    Filter: #state Eq Utf8(\"CO\")\
-///       CsvScan: employee.csv projection=Some([0, 3])";
-pub struct IndentVisitor<'a, 'b> {
-    f: &'a mut fmt::Formatter<'b>,
-    /// If true, includes summarized schema information
-    with_schema: bool,
-    indent: u32,
-}
-
-impl<'a, 'b> IndentVisitor<'a, 'b> {
-    /// Create a visitor that will write a formatted LogicalPlan to f. If `with_schema` is
-    /// true, includes schema information on each line.
-    pub fn new(f: &'a mut fmt::Formatter<'b>, with_schema: bool) -> Self {
-        Self {
-            f,
-            with_schema,
-            indent: 0,
-        }
-    }
-
-    fn write_indent(&mut self) -> fmt::Result {
-        for _ in 0..self.indent {
-            write!(self.f, "  ")?;
-        }
-        Ok(())
-    }
-}
-
-impl<'a, 'b> PlanVisitor for IndentVisitor<'a, 'b> {
-    type Error = fmt::Error;
-
-    fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result<bool, fmt::Error> {
-        if self.indent > 0 {
-            writeln!(self.f)?;
-        }
-        self.write_indent()?;
-
-        write!(self.f, "{}", plan.display())?;
-        if self.with_schema {
-            write!(
-                self.f,
-                " {}",
-                display_schema(&plan.schema().as_ref().to_owned().into())
-            )?;
-        }
-
-        self.indent += 1;
-        Ok(true)
-    }
-
-    fn post_visit(
-        &mut self,
-        _plan: &LogicalPlan,
-    ) -> std::result::Result<bool, fmt::Error> {
-        self.indent -= 1;
-        Ok(true)
-    }
-}
-
-/// Print the schema in a compact representation to `buf`
-///
-/// For example: `foo:Utf8` if `foo` can not be null, and
-/// `foo:Utf8;N` if `foo` is nullable.
-///
-/// ```
-/// use arrow::datatypes::{Field, Schema, DataType};
-/// # use datafusion::logical_plan::display_schema;
-/// let schema = Schema::new(vec![
-///     Field::new("id", DataType::Int32, false),
-///     Field::new("first_name", DataType::Utf8, true),
-///  ]);
-///
-///  assert_eq!(
-///      "[id:Int32, first_name:Utf8;N]",
-///      format!("{}", display_schema(&schema))
-///  );
-/// ```
-pub fn display_schema(schema: &Schema) -> impl fmt::Display + '_ {
-    struct Wrapper<'a>(&'a Schema);
-
-    impl<'a> fmt::Display for Wrapper<'a> {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-            write!(f, "[")?;
-            for (idx, field) in self.0.fields().iter().enumerate() {
-                if idx > 0 {
-                    write!(f, ", ")?;
-                }
-                let nullable_str = if field.is_nullable() { ";N" } else { "" };
-                write!(
-                    f,
-                    "{}:{:?}{}",
-                    field.name(),
-                    field.data_type(),
-                    nullable_str
-                )?;
-            }
-            write!(f, "]")
-        }
-    }
-    Wrapper(schema)
-}
-
-/// Logic related to creating DOT language graphs.
-#[derive(Default)]
-struct GraphvizBuilder {
-    id_gen: usize,
-}
-
-impl GraphvizBuilder {
-    fn next_id(&mut self) -> usize {
-        self.id_gen += 1;
-        self.id_gen
-    }
-
-    // write out the start of the subgraph cluster
-    fn start_cluster(&mut self, f: &mut fmt::Formatter, title: &str) -> fmt::Result {
-        writeln!(f, "  subgraph cluster_{}", self.next_id())?;
-        writeln!(f, "  {{")?;
-        writeln!(f, "    graph[label={}]", Self::quoted(title))
-    }
-
-    // write out the end of the subgraph cluster
-    fn end_cluster(&mut self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "  }}")
-    }
-
-    /// makes a quoted string suitable for inclusion in a graphviz chart
-    fn quoted(label: &str) -> String {
-        let label = label.replace('"', "_");
-        format!("\"{}\"", label)
-    }
-}
-
-/// Formats plans for graphical display using the `DOT` language. This
-/// format can be visualized using software from
-/// [`graphviz`](https://graphviz.org/)
-pub struct GraphvizVisitor<'a, 'b> {
-    f: &'a mut fmt::Formatter<'b>,
-    graphviz_builder: GraphvizBuilder,
-    /// If true, includes summarized schema information
-    with_schema: bool,
-
-    /// Holds the ids (as generated from `graphviz_builder` of all
-    /// parent nodes
-    parent_ids: Vec<usize>,
-}
-
-impl<'a, 'b> GraphvizVisitor<'a, 'b> {
-    pub fn new(f: &'a mut fmt::Formatter<'b>) -> Self {
-        Self {
-            f,
-            graphviz_builder: GraphvizBuilder::default(),
-            with_schema: false,
-            parent_ids: Vec::new(),
-        }
-    }
-
-    /// Sets a flag which controls if the output schema is displayed
-    pub fn set_with_schema(&mut self, with_schema: bool) {
-        self.with_schema = with_schema;
-    }
-
-    pub fn pre_visit_plan(&mut self, label: &str) -> fmt::Result {
-        self.graphviz_builder.start_cluster(self.f, label)
-    }
-
-    pub fn post_visit_plan(&mut self) -> fmt::Result {
-        self.graphviz_builder.end_cluster(self.f)
-    }
-}
-
-impl<'a, 'b> PlanVisitor for GraphvizVisitor<'a, 'b> {
-    type Error = fmt::Error;
-
-    fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result<bool, fmt::Error> {
-        let id = self.graphviz_builder.next_id();
-
-        // Create a new graph node for `plan` such as
-        // id [label="foo"]
-        let label = if self.with_schema {
-            format!(
-                "{}\\nSchema: {}",
-                plan.display(),
-                display_schema(&plan.schema().as_ref().to_owned().into())
-            )
-        } else {
-            format!("{}", plan.display())
-        };
-
-        writeln!(
-            self.f,
-            "    {}[shape=box label={}]",
-            id,
-            GraphvizBuilder::quoted(&label)
-        )?;
-
-        // Create an edge to our parent node, if any
-        //  parent_id -> id
-        if let Some(parent_id) = self.parent_ids.last() {
-            writeln!(
-                self.f,
-                "    {} -> {} [arrowhead=none, arrowtail=normal, dir=back]",
-                parent_id, id
-            )?;
-        }
-
-        self.parent_ids.push(id);
-        Ok(true)
-    }
-
-    fn post_visit(
-        &mut self,
-        _plan: &LogicalPlan,
-    ) -> std::result::Result<bool, fmt::Error> {
-        // always be non-empty as pre_visit always pushes
-        self.parent_ids.pop().unwrap();
-        Ok(true)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::{DataType, Field};
-
-    use super::*;
-
-    #[test]
-    fn test_display_empty_schema() {
-        let schema = Schema::new(vec![]);
-        assert_eq!("[]", format!("{}", display_schema(&schema)));
-    }
-
-    #[test]
-    fn test_display_schema() {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, true),
-        ]);
-
-        assert_eq!(
-            "[id:Int32, first_name:Utf8;N]",
-            format!("{}", display_schema(&schema))
-        );
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/expr.rs b/rust/datafusion/src/logical_plan/expr.rs
deleted file mode 100644
index fa9b9e0a249..00000000000
--- a/rust/datafusion/src/logical_plan/expr.rs
+++ /dev/null
@@ -1,1505 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides an `Expr` enum for representing expressions
-//! such as `col = 5` or `SUM(col)`. See examples on the [`Expr`] struct.
-
-pub use super::Operator;
-
-use std::fmt;
-use std::sync::Arc;
-
-use aggregates::{AccumulatorFunctionImplementation, StateTypeFunction};
-use arrow::{compute::can_cast_types, datatypes::DataType};
-
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::{DFField, DFSchema};
-use crate::physical_plan::{
-    aggregates, expressions::binary_operator_data_type, functions, udf::ScalarUDF,
-};
-use crate::{physical_plan::udaf::AggregateUDF, scalar::ScalarValue};
-use functions::{ReturnTypeFunction, ScalarFunctionImplementation, Signature};
-use std::collections::HashSet;
-
-/// `Expr` is a central struct of DataFusion's query API, and
-/// represent logical expressions such as `A + 1`, or `CAST(c1 AS
-/// int)`.
-///
-/// An `Expr` can compute its [DataType](arrow::datatypes::DataType)
-/// and nullability, and has functions for building up complex
-/// expressions.
-///
-/// # Examples
-///
-/// ## Create an expression `c1` referring to column named "c1"
-/// ```
-/// # use datafusion::logical_plan::*;
-/// let expr = col("c1");
-/// assert_eq!(expr, Expr::Column("c1".to_string()));
-/// ```
-///
-/// ## Create the expression `c1 + c2` to add columns "c1" and "c2" together
-/// ```
-/// # use datafusion::logical_plan::*;
-/// let expr = col("c1") + col("c2");
-///
-/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
-/// if let Expr::BinaryExpr { left, right, op } = expr {
-///   assert_eq!(*left, col("c1"));
-///   assert_eq!(*right, col("c2"));
-///   assert_eq!(op, Operator::Plus);
-/// }
-/// ```
-///
-/// ## Create expression `c1 = 42` to compare the value in coumn "c1" to the literal value `42`
-/// ```
-/// # use datafusion::logical_plan::*;
-/// # use datafusion::scalar::*;
-/// let expr = col("c1").eq(lit(42));
-///
-/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
-/// if let Expr::BinaryExpr { left, right, op } = expr {
-///   assert_eq!(*left, col("c1"));
-///   let scalar = ScalarValue::Int32(Some(42));
-///   assert_eq!(*right, Expr::Literal(scalar));
-///   assert_eq!(op, Operator::Eq);
-/// }
-/// ```
-#[derive(Clone, PartialEq)]
-pub enum Expr {
-    /// An expression with a specific name.
-    Alias(Box<Expr>, String),
-    /// A named reference to a field in a schema.
-    Column(String),
-    /// A named reference to a variable in a registry.
-    ScalarVariable(Vec<String>),
-    /// A constant value.
-    Literal(ScalarValue),
-    /// A binary expression such as "age > 21"
-    BinaryExpr {
-        /// Left-hand side of the expression
-        left: Box<Expr>,
-        /// The comparison operator
-        op: Operator,
-        /// Right-hand side of the expression
-        right: Box<Expr>,
-    },
-    /// Negation of an expression. The expression's type must be a boolean to make sense.
-    Not(Box<Expr>),
-    /// Whether an expression is not Null. This expression is never null.
-    IsNotNull(Box<Expr>),
-    /// Whether an expression is Null. This expression is never null.
-    IsNull(Box<Expr>),
-    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
-    Negative(Box<Expr>),
-    /// Whether an expression is between a given range.
-    Between {
-        /// The value to compare
-        expr: Box<Expr>,
-        /// Whether the expression is negated
-        negated: bool,
-        /// The low end of the range
-        low: Box<Expr>,
-        /// The high end of the range
-        high: Box<Expr>,
-    },
-    /// The CASE expression is similar to a series of nested if/else and there are two forms that
-    /// can be used. The first form consists of a series of boolean "when" expressions with
-    /// corresponding "then" expressions, and an optional "else" expression.
-    ///
-    /// CASE WHEN condition THEN result
-    ///      [WHEN ...]
-    ///      [ELSE result]
-    /// END
-    ///
-    /// The second form uses a base expression and then a series of "when" clauses that match on a
-    /// literal value.
-    ///
-    /// CASE expression
-    ///     WHEN value THEN result
-    ///     [WHEN ...]
-    ///     [ELSE result]
-    /// END
-    Case {
-        /// Optional base expression that can be compared to literal values in the "when" expressions
-        expr: Option<Box<Expr>>,
-        /// One or more when/then expressions
-        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
-        /// Optional "else" expression
-        else_expr: Option<Box<Expr>>,
-    },
-    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
-    /// This expression is guaranteed to have a fixed type.
-    Cast {
-        /// The expression being cast
-        expr: Box<Expr>,
-        /// The `DataType` the expression will yield
-        data_type: DataType,
-    },
-    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
-    /// This expression is guaranteed to have a fixed type.
-    TryCast {
-        /// The expression being cast
-        expr: Box<Expr>,
-        /// The `DataType` the expression will yield
-        data_type: DataType,
-    },
-    /// A sort expression, that can be used to sort values.
-    Sort {
-        /// The expression to sort on
-        expr: Box<Expr>,
-        /// The direction of the sort
-        asc: bool,
-        /// Whether to put Nulls before all other data values
-        nulls_first: bool,
-    },
-    /// Represents the call of a built-in scalar function with a set of arguments.
-    ScalarFunction {
-        /// The function
-        fun: functions::BuiltinScalarFunction,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-    },
-    /// Represents the call of a user-defined scalar function with arguments.
-    ScalarUDF {
-        /// The function
-        fun: Arc<ScalarUDF>,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-    },
-    /// Represents the call of an aggregate built-in function with arguments.
-    AggregateFunction {
-        /// Name of the function
-        fun: aggregates::AggregateFunction,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-        /// Whether this is a DISTINCT aggregation or not
-        distinct: bool,
-    },
-    /// aggregate function
-    AggregateUDF {
-        /// The function
-        fun: Arc<AggregateUDF>,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-    },
-    /// Returns whether the list contains the expr value.
-    InList {
-        /// The expression to compare
-        expr: Box<Expr>,
-        /// A list of values to compare against
-        list: Vec<Expr>,
-        /// Whether the expression is negated
-        negated: bool,
-    },
-    /// Represents a reference to all fields in a schema.
-    Wildcard,
-}
-
-impl Expr {
-    /// Returns the [arrow::datatypes::DataType] of the expression based on [arrow::datatypes::Schema].
-    ///
-    /// # Errors
-    ///
-    /// This function errors when it is not possible to compute its [arrow::datatypes::DataType].
-    /// This happens when e.g. the expression refers to a column that does not exist in the schema, or when
-    /// the expression is incorrectly typed (e.g. `[utf8] + [bool]`).
-    pub fn get_type(&self, schema: &DFSchema) -> Result<DataType> {
-        match self {
-            Expr::Alias(expr, _) => expr.get_type(schema),
-            Expr::Column(name) => Ok(schema
-                .field_with_unqualified_name(name)?
-                .data_type()
-                .clone()),
-            Expr::ScalarVariable(_) => Ok(DataType::Utf8),
-            Expr::Literal(l) => Ok(l.get_datatype()),
-            Expr::Case { when_then_expr, .. } => when_then_expr[0].1.get_type(schema),
-            Expr::Cast { data_type, .. } => Ok(data_type.clone()),
-            Expr::TryCast { data_type, .. } => Ok(data_type.clone()),
-            Expr::ScalarUDF { fun, args } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                Ok((fun.return_type)(&data_types)?.as_ref().clone())
-            }
-            Expr::ScalarFunction { fun, args } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                functions::return_type(fun, &data_types)
-            }
-            Expr::AggregateFunction { fun, args, .. } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                aggregates::return_type(fun, &data_types)
-            }
-            Expr::AggregateUDF { fun, args, .. } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                Ok((fun.return_type)(&data_types)?.as_ref().clone())
-            }
-            Expr::Not(_) => Ok(DataType::Boolean),
-            Expr::Negative(expr) => expr.get_type(schema),
-            Expr::IsNull(_) => Ok(DataType::Boolean),
-            Expr::IsNotNull(_) => Ok(DataType::Boolean),
-            Expr::BinaryExpr {
-                ref left,
-                ref right,
-                ref op,
-            } => binary_operator_data_type(
-                &left.get_type(schema)?,
-                op,
-                &right.get_type(schema)?,
-            ),
-            Expr::Sort { ref expr, .. } => expr.get_type(schema),
-            Expr::Between { .. } => Ok(DataType::Boolean),
-            Expr::InList { .. } => Ok(DataType::Boolean),
-            Expr::Wildcard => Err(DataFusionError::Internal(
-                "Wildcard expressions are not valid in a logical query plan".to_owned(),
-            )),
-        }
-    }
-
-    /// Returns the nullability of the expression based on [arrow::datatypes::Schema].
-    ///
-    /// # Errors
-    ///
-    /// This function errors when it is not possible to compute its nullability.
-    /// This happens when the expression refers to a column that does not exist in the schema.
-    pub fn nullable(&self, input_schema: &DFSchema) -> Result<bool> {
-        match self {
-            Expr::Alias(expr, _) => expr.nullable(input_schema),
-            Expr::Column(name) => Ok(input_schema
-                .field_with_unqualified_name(name)?
-                .is_nullable()),
-            Expr::Literal(value) => Ok(value.is_null()),
-            Expr::ScalarVariable(_) => Ok(true),
-            Expr::Case {
-                when_then_expr,
-                else_expr,
-                ..
-            } => {
-                // this expression is nullable if any of the input expressions are nullable
-                let then_nullable = when_then_expr
-                    .iter()
-                    .map(|(_, t)| t.nullable(input_schema))
-                    .collect::<Result<Vec<_>>>()?;
-                if then_nullable.contains(&true) {
-                    Ok(true)
-                } else if let Some(e) = else_expr {
-                    e.nullable(input_schema)
-                } else {
-                    Ok(false)
-                }
-            }
-            Expr::Cast { expr, .. } => expr.nullable(input_schema),
-            Expr::TryCast { .. } => Ok(true),
-            Expr::ScalarFunction { .. } => Ok(true),
-            Expr::ScalarUDF { .. } => Ok(true),
-            Expr::AggregateFunction { .. } => Ok(true),
-            Expr::AggregateUDF { .. } => Ok(true),
-            Expr::Not(expr) => expr.nullable(input_schema),
-            Expr::Negative(expr) => expr.nullable(input_schema),
-            Expr::IsNull(_) => Ok(false),
-            Expr::IsNotNull(_) => Ok(false),
-            Expr::BinaryExpr {
-                ref left,
-                ref right,
-                ..
-            } => Ok(left.nullable(input_schema)? || right.nullable(input_schema)?),
-            Expr::Sort { ref expr, .. } => expr.nullable(input_schema),
-            Expr::Between { ref expr, .. } => expr.nullable(input_schema),
-            Expr::InList { ref expr, .. } => expr.nullable(input_schema),
-            Expr::Wildcard => Err(DataFusionError::Internal(
-                "Wildcard expressions are not valid in a logical query plan".to_owned(),
-            )),
-        }
-    }
-
-    /// Returns the name of this expression based on [arrow::datatypes::Schema].
-    ///
-    /// This represents how a column with this expression is named when no alias is chosen
-    pub fn name(&self, input_schema: &DFSchema) -> Result<String> {
-        create_name(self, input_schema)
-    }
-
-    /// Returns a [arrow::datatypes::Field] compatible with this expression.
-    pub fn to_field(&self, input_schema: &DFSchema) -> Result<DFField> {
-        Ok(DFField::new(
-            None, //TODO  qualifier
-            &self.name(input_schema)?,
-            self.get_type(input_schema)?,
-            self.nullable(input_schema)?,
-        ))
-    }
-
-    /// Wraps this expression in a cast to a target [arrow::datatypes::DataType].
-    ///
-    /// # Errors
-    ///
-    /// This function errors when it is impossible to cast the
-    /// expression to the target [arrow::datatypes::DataType].
-    pub fn cast_to(self, cast_to_type: &DataType, schema: &DFSchema) -> Result<Expr> {
-        let this_type = self.get_type(schema)?;
-        if this_type == *cast_to_type {
-            Ok(self)
-        } else if can_cast_types(&this_type, cast_to_type) {
-            Ok(Expr::Cast {
-                expr: Box::new(self),
-                data_type: cast_to_type.clone(),
-            })
-        } else {
-            Err(DataFusionError::Plan(format!(
-                "Cannot automatically convert {:?} to {:?}",
-                this_type, cast_to_type
-            )))
-        }
-    }
-
-    /// Return `self == other`
-    pub fn eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Eq, other)
-    }
-
-    /// Return `self != other`
-    pub fn not_eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::NotEq, other)
-    }
-
-    /// Return `self > other`
-    pub fn gt(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Gt, other)
-    }
-
-    /// Return `self >= other`
-    pub fn gt_eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::GtEq, other)
-    }
-
-    /// Return `self < other`
-    pub fn lt(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Lt, other)
-    }
-
-    /// Return `self <= other`
-    pub fn lt_eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::LtEq, other)
-    }
-
-    /// Return `self && other`
-    pub fn and(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::And, other)
-    }
-
-    /// Return `self || other`
-    pub fn or(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Or, other)
-    }
-
-    /// Return `!self`
-    #[allow(clippy::should_implement_trait)]
-    pub fn not(self) -> Expr {
-        Expr::Not(Box::new(self))
-    }
-
-    /// Calculate the modulus of two expressions.
-    /// Return `self % other`
-    pub fn modulus(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Modulus, other)
-    }
-
-    /// Return `self LIKE other`
-    pub fn like(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Like, other)
-    }
-
-    /// Return `self NOT LIKE other`
-    pub fn not_like(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::NotLike, other)
-    }
-
-    /// Return `self AS name` alias expression
-    pub fn alias(self, name: &str) -> Expr {
-        Expr::Alias(Box::new(self), name.to_owned())
-    }
-
-    /// Return `self IN <list>` if `negated` is false, otherwise
-    /// return `self NOT IN <list>`.a
-    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
-        Expr::InList {
-            expr: Box::new(self),
-            list,
-            negated,
-        }
-    }
-
-    /// Return `IsNull(Box(self))
-    #[allow(clippy::wrong_self_convention)]
-    pub fn is_null(self) -> Expr {
-        Expr::IsNull(Box::new(self))
-    }
-
-    /// Return `IsNotNull(Box(self))
-    #[allow(clippy::wrong_self_convention)]
-    pub fn is_not_null(self) -> Expr {
-        Expr::IsNotNull(Box::new(self))
-    }
-
-    /// Create a sort expression from an existing expression.
-    ///
-    /// ```
-    /// # use datafusion::logical_plan::col;
-    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
-    /// ```
-    pub fn sort(self, asc: bool, nulls_first: bool) -> Expr {
-        Expr::Sort {
-            expr: Box::new(self),
-            asc,
-            nulls_first,
-        }
-    }
-
-    /// Performs a depth first walk of an expression and
-    /// its children, calling [`ExpressionVisitor::pre_visit`] and
-    /// `visitor.post_visit`.
-    ///
-    /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
-    /// separate expression algorithms from the structure of the
-    /// `Expr` tree and make it easier to add new types of expressions
-    /// and algorithms that walk the tree.
-    ///
-    /// For an expression tree such as
-    /// ```text
-    /// BinaryExpr (GT)
-    ///    left: Column("foo")
-    ///    right: Column("bar")
-    /// ```
-    ///
-    /// The nodes are visited using the following order
-    /// ```text
-    /// pre_visit(BinaryExpr(GT))
-    /// pre_visit(Column("foo"))
-    /// pre_visit(Column("bar"))
-    /// post_visit(Column("bar"))
-    /// post_visit(Column("bar"))
-    /// post_visit(BinaryExpr(GT))
-    /// ```
-    ///
-    /// If an Err result is returned, recursion is stopped immediately
-    ///
-    /// If `Recursion::Stop` is returned on a call to pre_visit, no
-    /// children of that expression are visited, nor is post_visit
-    /// called on that expression
-    ///
-    pub fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V> {
-        let visitor = match visitor.pre_visit(self)? {
-            Recursion::Continue(visitor) => visitor,
-            // If the recursion should stop, do not visit children
-            Recursion::Stop(visitor) => return Ok(visitor),
-        };
-
-        // recurse (and cover all expression types)
-        let visitor = match self {
-            Expr::Alias(expr, _) => expr.accept(visitor),
-            Expr::Column(..) => Ok(visitor),
-            Expr::ScalarVariable(..) => Ok(visitor),
-            Expr::Literal(..) => Ok(visitor),
-            Expr::BinaryExpr { left, right, .. } => {
-                let visitor = left.accept(visitor)?;
-                right.accept(visitor)
-            }
-            Expr::Not(expr) => expr.accept(visitor),
-            Expr::IsNotNull(expr) => expr.accept(visitor),
-            Expr::IsNull(expr) => expr.accept(visitor),
-            Expr::Negative(expr) => expr.accept(visitor),
-            Expr::Between {
-                expr, low, high, ..
-            } => {
-                let visitor = expr.accept(visitor)?;
-                let visitor = low.accept(visitor)?;
-                high.accept(visitor)
-            }
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            } => {
-                let visitor = if let Some(expr) = expr.as_ref() {
-                    expr.accept(visitor)
-                } else {
-                    Ok(visitor)
-                }?;
-                let visitor = when_then_expr.iter().try_fold(
-                    visitor,
-                    |visitor, (when, then)| {
-                        let visitor = when.accept(visitor)?;
-                        then.accept(visitor)
-                    },
-                )?;
-                if let Some(else_expr) = else_expr.as_ref() {
-                    else_expr.accept(visitor)
-                } else {
-                    Ok(visitor)
-                }
-            }
-            Expr::Cast { expr, .. } => expr.accept(visitor),
-            Expr::TryCast { expr, .. } => expr.accept(visitor),
-            Expr::Sort { expr, .. } => expr.accept(visitor),
-            Expr::ScalarFunction { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::ScalarUDF { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::AggregateFunction { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::AggregateUDF { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::InList { expr, list, .. } => {
-                let visitor = expr.accept(visitor)?;
-                list.iter()
-                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))
-            }
-            Expr::Wildcard => Ok(visitor),
-        }?;
-
-        visitor.post_visit(self)
-    }
-
-    /// Performs a depth first walk of an expression and its children
-    /// to rewrite an expression, consuming `self` producing a new
-    /// [`Expr`].
-    ///
-    /// Implements a modified version of the [visitor
-    /// pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
-    /// separate algorithms from the structure of the `Expr` tree and
-    /// make it easier to write new, efficient expression
-    /// transformation algorithms.
-    ///
-    /// For an expression tree such as
-    /// ```text
-    /// BinaryExpr (GT)
-    ///    left: Column("foo")
-    ///    right: Column("bar")
-    /// ```
-    ///
-    /// The nodes are visited using the following order
-    /// ```text
-    /// pre_visit(BinaryExpr(GT))
-    /// pre_visit(Column("foo"))
-    /// mutatate(Column("foo"))
-    /// pre_visit(Column("bar"))
-    /// mutate(Column("bar"))
-    /// mutate(BinaryExpr(GT))
-    /// ```
-    ///
-    /// If an Err result is returned, recursion is stopped immediately
-    ///
-    /// If [`false`] is returned on a call to pre_visit, no
-    /// children of that expression are visited, nor is mutate
-    /// called on that expression
-    ///
-    pub fn rewrite<R>(self, rewriter: &mut R) -> Result<Self>
-    where
-        R: ExprRewriter,
-    {
-        if !rewriter.pre_visit(&self)? {
-            return Ok(self);
-        };
-
-        // recurse into all sub expressions(and cover all expression types)
-        let expr = match self {
-            Expr::Alias(expr, name) => Expr::Alias(rewrite_boxed(expr, rewriter)?, name),
-            Expr::Column(name) => Expr::Column(name),
-            Expr::ScalarVariable(names) => Expr::ScalarVariable(names),
-            Expr::Literal(value) => Expr::Literal(value),
-            Expr::BinaryExpr { left, op, right } => Expr::BinaryExpr {
-                left: rewrite_boxed(left, rewriter)?,
-                op,
-                right: rewrite_boxed(right, rewriter)?,
-            },
-            Expr::Not(expr) => Expr::Not(rewrite_boxed(expr, rewriter)?),
-            Expr::IsNotNull(expr) => Expr::IsNotNull(rewrite_boxed(expr, rewriter)?),
-            Expr::IsNull(expr) => Expr::IsNull(rewrite_boxed(expr, rewriter)?),
-            Expr::Negative(expr) => Expr::Negative(rewrite_boxed(expr, rewriter)?),
-            Expr::Between {
-                expr,
-                low,
-                high,
-                negated,
-            } => Expr::Between {
-                expr: rewrite_boxed(expr, rewriter)?,
-                low: rewrite_boxed(low, rewriter)?,
-                high: rewrite_boxed(high, rewriter)?,
-                negated,
-            },
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            } => {
-                let expr = rewrite_option_box(expr, rewriter)?;
-                let when_then_expr = when_then_expr
-                    .into_iter()
-                    .map(|(when, then)| {
-                        Ok((
-                            rewrite_boxed(when, rewriter)?,
-                            rewrite_boxed(then, rewriter)?,
-                        ))
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                let else_expr = rewrite_option_box(else_expr, rewriter)?;
-
-                Expr::Case {
-                    expr,
-                    when_then_expr,
-                    else_expr,
-                }
-            }
-            Expr::Cast { expr, data_type } => Expr::Cast {
-                expr: rewrite_boxed(expr, rewriter)?,
-                data_type,
-            },
-            Expr::TryCast { expr, data_type } => Expr::TryCast {
-                expr: rewrite_boxed(expr, rewriter)?,
-                data_type,
-            },
-            Expr::Sort {
-                expr,
-                asc,
-                nulls_first,
-            } => Expr::Sort {
-                expr: rewrite_boxed(expr, rewriter)?,
-                asc,
-                nulls_first,
-            },
-            Expr::ScalarFunction { args, fun } => Expr::ScalarFunction {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-            },
-            Expr::ScalarUDF { args, fun } => Expr::ScalarUDF {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-            },
-            Expr::AggregateFunction {
-                args,
-                fun,
-                distinct,
-            } => Expr::AggregateFunction {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-                distinct,
-            },
-            Expr::AggregateUDF { args, fun } => Expr::AggregateUDF {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-            },
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => Expr::InList {
-                expr: rewrite_boxed(expr, rewriter)?,
-                list,
-                negated,
-            },
-            Expr::Wildcard => Expr::Wildcard,
-        };
-
-        // now rewrite this expression itself
-        rewriter.mutate(expr)
-    }
-}
-
-#[allow(clippy::boxed_local)]
-fn rewrite_boxed<R>(boxed_expr: Box<Expr>, rewriter: &mut R) -> Result<Box<Expr>>
-where
-    R: ExprRewriter,
-{
-    // TODO: It might be possible to avoid an allocation (the
-    // Box::new) below by reusing the box.
-    let expr: Expr = *boxed_expr;
-    let rewritten_expr = expr.rewrite(rewriter)?;
-    Ok(Box::new(rewritten_expr))
-}
-
-fn rewrite_option_box<R>(
-    option_box: Option<Box<Expr>>,
-    rewriter: &mut R,
-) -> Result<Option<Box<Expr>>>
-where
-    R: ExprRewriter,
-{
-    option_box
-        .map(|expr| rewrite_boxed(expr, rewriter))
-        .transpose()
-}
-
-/// rewrite a `Vec` of `Expr`s with the rewriter
-fn rewrite_vec<R>(v: Vec<Expr>, rewriter: &mut R) -> Result<Vec<Expr>>
-where
-    R: ExprRewriter,
-{
-    v.into_iter().map(|expr| expr.rewrite(rewriter)).collect()
-}
-
-/// Controls how the visitor recursion should proceed.
-pub enum Recursion<V: ExpressionVisitor> {
-    /// Attempt to visit all the children, recursively, of this expression.
-    Continue(V),
-    /// Do not visit the children of this expression, though the walk
-    /// of parents of this expression will not be affected
-    Stop(V),
-}
-
-/// Encode the traversal of an expression tree. When passed to
-/// `Expr::accept`, `ExpressionVisitor::visit` is invoked
-/// recursively on all nodes of an expression tree. See the comments
-/// on `Expr::accept` for details on its use
-pub trait ExpressionVisitor: Sized {
-    /// Invoked before any children of `expr` are visisted.
-    fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>>;
-
-    /// Invoked after all children of `expr` are visited. Default
-    /// implementation does nothing.
-    fn post_visit(self, _expr: &Expr) -> Result<Self> {
-        Ok(self)
-    }
-}
-
-/// Trait for potentially recursively rewriting an [`Expr`] expression
-/// tree. When passed to `Expr::rewrite`, `ExpressionVisitor::mutate` is
-/// invoked recursively on all nodes of an expression tree. See the
-/// comments on `Expr::rewrite` for details on its use
-pub trait ExprRewriter: Sized {
-    /// Invoked before any children of `expr` are rewritten /
-    /// visited. Default implementation returns `Ok(true)`
-    fn pre_visit(&mut self, _expr: &Expr) -> Result<bool> {
-        Ok(true)
-    }
-
-    /// Invoked after all children of `expr` have been mutated and
-    /// returns a potentially modified expr.
-    fn mutate(&mut self, expr: Expr) -> Result<Expr>;
-}
-
-pub struct CaseBuilder {
-    expr: Option<Box<Expr>>,
-    when_expr: Vec<Expr>,
-    then_expr: Vec<Expr>,
-    else_expr: Option<Box<Expr>>,
-}
-
-impl CaseBuilder {
-    pub fn when(&mut self, when: Expr, then: Expr) -> CaseBuilder {
-        self.when_expr.push(when);
-        self.then_expr.push(then);
-        CaseBuilder {
-            expr: self.expr.clone(),
-            when_expr: self.when_expr.clone(),
-            then_expr: self.then_expr.clone(),
-            else_expr: self.else_expr.clone(),
-        }
-    }
-    pub fn otherwise(&mut self, else_expr: Expr) -> Result<Expr> {
-        self.else_expr = Some(Box::new(else_expr));
-        self.build()
-    }
-
-    pub fn end(&self) -> Result<Expr> {
-        self.build()
-    }
-}
-
-impl CaseBuilder {
-    fn build(&self) -> Result<Expr> {
-        // collect all "then" expressions
-        let mut then_expr = self.then_expr.clone();
-        if let Some(e) = &self.else_expr {
-            then_expr.push(e.as_ref().to_owned());
-        }
-
-        let then_types: Vec<DataType> = then_expr
-            .iter()
-            .map(|e| match e {
-                Expr::Literal(_) => e.get_type(&DFSchema::empty()),
-                _ => Ok(DataType::Null),
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        if then_types.contains(&DataType::Null) {
-            // cannot verify types until execution type
-        } else {
-            let unique_types: HashSet<&DataType> = then_types.iter().collect();
-            if unique_types.len() != 1 {
-                return Err(DataFusionError::Plan(format!(
-                    "CASE expression 'then' values had multiple data types: {:?}",
-                    unique_types
-                )));
-            }
-        }
-
-        Ok(Expr::Case {
-            expr: self.expr.clone(),
-            when_then_expr: self
-                .when_expr
-                .iter()
-                .zip(self.then_expr.iter())
-                .map(|(w, t)| (Box::new(w.clone()), Box::new(t.clone())))
-                .collect(),
-            else_expr: self.else_expr.clone(),
-        })
-    }
-}
-
-/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression.
-pub fn case(expr: Expr) -> CaseBuilder {
-    CaseBuilder {
-        expr: Some(Box::new(expr)),
-        when_expr: vec![],
-        then_expr: vec![],
-        else_expr: None,
-    }
-}
-
-/// Create a CASE WHEN statement with boolean WHEN expressions and no base expression.
-pub fn when(when: Expr, then: Expr) -> CaseBuilder {
-    CaseBuilder {
-        expr: None,
-        when_expr: vec![when],
-        then_expr: vec![then],
-        else_expr: None,
-    }
-}
-
-/// return a new expression l <op> r
-pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(l),
-        op,
-        right: Box::new(r),
-    }
-}
-
-/// return a new expression with a logical AND
-pub fn and(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::And,
-        right: Box::new(right),
-    }
-}
-
-/// Combines an array of filter expressions into a single filter expression
-/// consisting of the input filter expressions joined with logical AND.
-/// Returns None if the filters array is empty.
-pub fn combine_filters(filters: &[Expr]) -> Option<Expr> {
-    if filters.is_empty() {
-        return None;
-    }
-    let combined_filter = filters
-        .iter()
-        .skip(1)
-        .fold(filters[0].clone(), |acc, filter| and(acc, filter.clone()));
-    Some(combined_filter)
-}
-
-/// return a new expression with a logical OR
-pub fn or(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::Or,
-        right: Box::new(right),
-    }
-}
-
-/// Create a column expression based on a column name
-pub fn col(name: &str) -> Expr {
-    Expr::Column(name.to_owned())
-}
-
-/// Create an expression to represent the min() aggregate function
-pub fn min(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Min,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the max() aggregate function
-pub fn max(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Max,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the sum() aggregate function
-pub fn sum(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Sum,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the avg() aggregate function
-pub fn avg(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Avg,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count() aggregate function
-pub fn count(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count(distinct) aggregate function
-pub fn count_distinct(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: true,
-        args: vec![expr],
-    }
-}
-
-/// Create an in_list expression
-pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
-    Expr::InList {
-        expr: Box::new(expr),
-        list,
-        negated,
-    }
-}
-
-/// Trait for converting a type to a [`Literal`] literal expression.
-pub trait Literal {
-    /// convert the value to a Literal expression
-    fn lit(&self) -> Expr;
-}
-
-impl Literal for &str {
-    fn lit(&self) -> Expr {
-        Expr::Literal(ScalarValue::Utf8(Some((*self).to_owned())))
-    }
-}
-
-impl Literal for String {
-    fn lit(&self) -> Expr {
-        Expr::Literal(ScalarValue::Utf8(Some((*self).to_owned())))
-    }
-}
-
-impl Literal for ScalarValue {
-    fn lit(&self) -> Expr {
-        Expr::Literal(self.clone())
-    }
-}
-
-macro_rules! make_literal {
-    ($TYPE:ty, $SCALAR:ident) => {
-        #[allow(missing_docs)]
-        impl Literal for $TYPE {
-            fn lit(&self) -> Expr {
-                Expr::Literal(ScalarValue::$SCALAR(Some(self.clone())))
-            }
-        }
-    };
-}
-
-make_literal!(bool, Boolean);
-make_literal!(f32, Float32);
-make_literal!(f64, Float64);
-make_literal!(i8, Int8);
-make_literal!(i16, Int16);
-make_literal!(i32, Int32);
-make_literal!(i64, Int64);
-make_literal!(u8, UInt8);
-make_literal!(u16, UInt16);
-make_literal!(u32, UInt32);
-make_literal!(u64, UInt64);
-
-/// Create a literal expression
-pub fn lit<T: Literal>(n: T) -> Expr {
-    n.lit()
-}
-
-/// Create an convenience function representing a unary scalar function
-macro_rules! unary_scalar_expr {
-    ($ENUM:ident, $FUNC:ident) => {
-        #[allow(missing_docs)]
-        pub fn $FUNC(e: Expr) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args: vec![e],
-            }
-        }
-    };
-}
-
-// generate methods for creating the supported unary expressions
-
-// math functions
-unary_scalar_expr!(Sqrt, sqrt);
-unary_scalar_expr!(Sin, sin);
-unary_scalar_expr!(Cos, cos);
-unary_scalar_expr!(Tan, tan);
-unary_scalar_expr!(Asin, asin);
-unary_scalar_expr!(Acos, acos);
-unary_scalar_expr!(Atan, atan);
-unary_scalar_expr!(Floor, floor);
-unary_scalar_expr!(Ceil, ceil);
-unary_scalar_expr!(Round, round);
-unary_scalar_expr!(Trunc, trunc);
-unary_scalar_expr!(Abs, abs);
-unary_scalar_expr!(Signum, signum);
-unary_scalar_expr!(Exp, exp);
-unary_scalar_expr!(Log, ln);
-unary_scalar_expr!(Log2, log2);
-unary_scalar_expr!(Log10, log10);
-
-// string functions
-unary_scalar_expr!(Ascii, ascii);
-unary_scalar_expr!(BitLength, bit_length);
-unary_scalar_expr!(Btrim, btrim);
-unary_scalar_expr!(CharacterLength, character_length);
-unary_scalar_expr!(CharacterLength, length);
-unary_scalar_expr!(Chr, chr);
-unary_scalar_expr!(Concat, concat);
-unary_scalar_expr!(ConcatWithSeparator, concat_ws);
-unary_scalar_expr!(InitCap, initcap);
-unary_scalar_expr!(Left, left);
-unary_scalar_expr!(Lower, lower);
-unary_scalar_expr!(Lpad, lpad);
-unary_scalar_expr!(Ltrim, ltrim);
-unary_scalar_expr!(MD5, md5);
-unary_scalar_expr!(OctetLength, octet_length);
-unary_scalar_expr!(RegexpMatch, regexp_match);
-unary_scalar_expr!(RegexpReplace, regexp_replace);
-unary_scalar_expr!(Replace, replace);
-unary_scalar_expr!(Repeat, repeat);
-unary_scalar_expr!(Reverse, reverse);
-unary_scalar_expr!(Right, right);
-unary_scalar_expr!(Rpad, rpad);
-unary_scalar_expr!(Rtrim, rtrim);
-unary_scalar_expr!(SHA224, sha224);
-unary_scalar_expr!(SHA256, sha256);
-unary_scalar_expr!(SHA384, sha384);
-unary_scalar_expr!(SHA512, sha512);
-unary_scalar_expr!(SplitPart, split_part);
-unary_scalar_expr!(StartsWith, starts_with);
-unary_scalar_expr!(Strpos, strpos);
-unary_scalar_expr!(Substr, substr);
-unary_scalar_expr!(ToHex, to_hex);
-unary_scalar_expr!(Translate, translate);
-unary_scalar_expr!(Trim, trim);
-unary_scalar_expr!(Upper, upper);
-
-/// returns an array of fixed size with each argument on it.
-pub fn array(args: Vec<Expr>) -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Array,
-        args,
-    }
-}
-
-/// Creates a new UDF with a specific signature and specific return type.
-/// This is a helper function to create a new UDF.
-/// The function `create_udf` returns a subset of all possible `ScalarFunction`:
-/// * the UDF has a fixed return type
-/// * the UDF has a fixed signature (e.g. [f64, f64])
-pub fn create_udf(
-    name: &str,
-    input_types: Vec<DataType>,
-    return_type: Arc<DataType>,
-    fun: ScalarFunctionImplementation,
-) -> ScalarUDF {
-    let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(return_type.clone()));
-    ScalarUDF::new(name, &Signature::Exact(input_types), &return_type, &fun)
-}
-
-/// Creates a new UDAF with a specific signature, state type and return type.
-/// The signature and state type must match the `Acumulator's implementation`.
-#[allow(clippy::rc_buffer)]
-pub fn create_udaf(
-    name: &str,
-    input_type: DataType,
-    return_type: Arc<DataType>,
-    accumulator: AccumulatorFunctionImplementation,
-    state_type: Arc<Vec<DataType>>,
-) -> AggregateUDF {
-    let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(return_type.clone()));
-    let state_type: StateTypeFunction = Arc::new(move |_| Ok(state_type.clone()));
-    AggregateUDF::new(
-        name,
-        &Signature::Exact(vec![input_type]),
-        &return_type,
-        &accumulator,
-        &state_type,
-    )
-}
-
-fn fmt_function(
-    f: &mut fmt::Formatter,
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-) -> fmt::Result {
-    let args: Vec<String> = args.iter().map(|arg| format!("{:?}", arg)).collect();
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
-}
-
-impl fmt::Debug for Expr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Expr::Alias(expr, alias) => write!(f, "{:?} AS {}", expr, alias),
-            Expr::Column(name) => write!(f, "#{}", name),
-            Expr::ScalarVariable(var_names) => write!(f, "{}", var_names.join(".")),
-            Expr::Literal(v) => write!(f, "{:?}", v),
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-                ..
-            } => {
-                write!(f, "CASE ")?;
-                if let Some(e) = expr {
-                    write!(f, "{:?} ", e)?;
-                }
-                for (w, t) in when_then_expr {
-                    write!(f, "WHEN {:?} THEN {:?} ", w, t)?;
-                }
-                if let Some(e) = else_expr {
-                    write!(f, "ELSE {:?} ", e)?;
-                }
-                write!(f, "END")
-            }
-            Expr::Cast { expr, data_type } => {
-                write!(f, "CAST({:?} AS {:?})", expr, data_type)
-            }
-            Expr::TryCast { expr, data_type } => {
-                write!(f, "TRY_CAST({:?} AS {:?})", expr, data_type)
-            }
-            Expr::Not(expr) => write!(f, "NOT {:?}", expr),
-            Expr::Negative(expr) => write!(f, "(- {:?})", expr),
-            Expr::IsNull(expr) => write!(f, "{:?} IS NULL", expr),
-            Expr::IsNotNull(expr) => write!(f, "{:?} IS NOT NULL", expr),
-            Expr::BinaryExpr { left, op, right } => {
-                write!(f, "{:?} {:?} {:?}", left, op, right)
-            }
-            Expr::Sort {
-                expr,
-                asc,
-                nulls_first,
-            } => {
-                if *asc {
-                    write!(f, "{:?} ASC", expr)?;
-                } else {
-                    write!(f, "{:?} DESC", expr)?;
-                }
-                if *nulls_first {
-                    write!(f, " NULLS FIRST")
-                } else {
-                    write!(f, " NULLS LAST")
-                }
-            }
-            Expr::ScalarFunction { fun, args, .. } => {
-                fmt_function(f, &fun.to_string(), false, args)
-            }
-            Expr::ScalarUDF { fun, ref args, .. } => {
-                fmt_function(f, &fun.name, false, args)
-            }
-            Expr::AggregateFunction {
-                fun,
-                distinct,
-                ref args,
-                ..
-            } => fmt_function(f, &fun.to_string(), *distinct, args),
-            Expr::AggregateUDF { fun, ref args, .. } => {
-                fmt_function(f, &fun.name, false, args)
-            }
-            Expr::Between {
-                expr,
-                negated,
-                low,
-                high,
-            } => {
-                if *negated {
-                    write!(f, "{:?} NOT BETWEEN {:?} AND {:?}", expr, low, high)
-                } else {
-                    write!(f, "{:?} BETWEEN {:?} AND {:?}", expr, low, high)
-                }
-            }
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => {
-                if *negated {
-                    write!(f, "{:?} NOT IN ({:?})", expr, list)
-                } else {
-                    write!(f, "{:?} IN ({:?})", expr, list)
-                }
-            }
-            Expr::Wildcard => write!(f, "*"),
-        }
-    }
-}
-
-fn create_function_name(
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-    input_schema: &DFSchema,
-) -> Result<String> {
-    let names: Vec<String> = args
-        .iter()
-        .map(|e| create_name(e, input_schema))
-        .collect::<Result<_>>()?;
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-    Ok(format!("{}({}{})", fun, distinct_str, names.join(",")))
-}
-
-/// Returns a readable name of an expression based on the input schema.
-/// This function recursively transverses the expression for names such as "CAST(a > 2)".
-fn create_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
-    match e {
-        Expr::Alias(_, name) => Ok(name.clone()),
-        Expr::Column(name) => Ok(name.clone()),
-        Expr::ScalarVariable(variable_names) => Ok(variable_names.join(".")),
-        Expr::Literal(value) => Ok(format!("{:?}", value)),
-        Expr::BinaryExpr { left, op, right } => {
-            let left = create_name(left, input_schema)?;
-            let right = create_name(right, input_schema)?;
-            Ok(format!("{} {:?} {}", left, op, right))
-        }
-        Expr::Case {
-            expr,
-            when_then_expr,
-            else_expr,
-        } => {
-            let mut name = "CASE ".to_string();
-            if let Some(e) = expr {
-                name += &format!("{:?} ", e);
-            }
-            for (w, t) in when_then_expr {
-                name += &format!("WHEN {:?} THEN {:?} ", w, t);
-            }
-            if let Some(e) = else_expr {
-                name += &format!("ELSE {:?} ", e);
-            }
-            name += "END";
-            Ok(name)
-        }
-        Expr::Cast { expr, data_type } => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("CAST({} AS {:?})", expr, data_type))
-        }
-        Expr::TryCast { expr, data_type } => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("TRY_CAST({} AS {:?})", expr, data_type))
-        }
-        Expr::Not(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("NOT {}", expr))
-        }
-        Expr::Negative(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("(- {})", expr))
-        }
-        Expr::IsNull(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("{} IS NULL", expr))
-        }
-        Expr::IsNotNull(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("{} IS NOT NULL", expr))
-        }
-        Expr::ScalarFunction { fun, args, .. } => {
-            create_function_name(&fun.to_string(), false, args, input_schema)
-        }
-        Expr::ScalarUDF { fun, args, .. } => {
-            create_function_name(&fun.name, false, args, input_schema)
-        }
-        Expr::AggregateFunction {
-            fun,
-            distinct,
-            args,
-            ..
-        } => create_function_name(&fun.to_string(), *distinct, args, input_schema),
-        Expr::AggregateUDF { fun, args } => {
-            let mut names = Vec::with_capacity(args.len());
-            for e in args {
-                names.push(create_name(e, input_schema)?);
-            }
-            Ok(format!("{}({})", fun.name, names.join(",")))
-        }
-        Expr::InList {
-            expr,
-            list,
-            negated,
-        } => {
-            let expr = create_name(expr, input_schema)?;
-            let list = list.iter().map(|expr| create_name(expr, input_schema));
-            if *negated {
-                Ok(format!("{} NOT IN ({:?})", expr, list))
-            } else {
-                Ok(format!("{} IN ({:?})", expr, list))
-            }
-        }
-        other => Err(DataFusionError::NotImplemented(format!(
-            "Physical plan does not support logical expression {:?}",
-            other
-        ))),
-    }
-}
-
-/// Create field meta-data from an expression, for use in a result set schema
-pub fn exprlist_to_fields<'a>(
-    expr: impl IntoIterator<Item = &'a Expr>,
-    input_schema: &DFSchema,
-) -> Result<Vec<DFField>> {
-    expr.into_iter().map(|e| e.to_field(input_schema)).collect()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::super::{col, lit, when};
-    use super::*;
-
-    #[test]
-    fn case_when_same_literal_then_types() -> Result<()> {
-        let _ = when(col("state").eq(lit("CO")), lit(303))
-            .when(col("state").eq(lit("NY")), lit(212))
-            .end()?;
-        Ok(())
-    }
-
-    #[test]
-    fn case_when_different_literal_then_types() {
-        let maybe_expr = when(col("state").eq(lit("CO")), lit(303))
-            .when(col("state").eq(lit("NY")), lit("212"))
-            .end();
-        assert!(maybe_expr.is_err());
-    }
-
-    #[test]
-    fn rewriter_visit() {
-        let mut rewriter = RecordingRewriter::default();
-        col("state").eq(lit("CO")).rewrite(&mut rewriter).unwrap();
-
-        assert_eq!(
-            rewriter.v,
-            vec![
-                "Previsited #state Eq Utf8(\"CO\")",
-                "Previsited #state",
-                "Mutated #state",
-                "Previsited Utf8(\"CO\")",
-                "Mutated Utf8(\"CO\")",
-                "Mutated #state Eq Utf8(\"CO\")"
-            ]
-        )
-    }
-
-    #[test]
-    fn filter_is_null_and_is_not_null() {
-        let col_null = Expr::Column("col1".to_string());
-        let col_not_null = Expr::Column("col2".to_string());
-        assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
-        assert_eq!(
-            format!("{:?}", col_not_null.is_not_null()),
-            "#col2 IS NOT NULL"
-        );
-    }
-
-    #[derive(Default)]
-    struct RecordingRewriter {
-        v: Vec<String>,
-    }
-    impl ExprRewriter for RecordingRewriter {
-        fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-            self.v.push(format!("Mutated {:?}", expr));
-            Ok(expr)
-        }
-
-        fn pre_visit(&mut self, expr: &Expr) -> Result<bool> {
-            self.v.push(format!("Previsited {:?}", expr));
-            Ok(true)
-        }
-    }
-
-    #[test]
-    fn rewriter_rewrite() {
-        let mut rewriter = FooBarRewriter {};
-
-        // rewrites "foo" --> "bar"
-        let rewritten = col("state").eq(lit("foo")).rewrite(&mut rewriter).unwrap();
-        assert_eq!(rewritten, col("state").eq(lit("bar")));
-
-        // doesn't wrewrite
-        let rewritten = col("state").eq(lit("baz")).rewrite(&mut rewriter).unwrap();
-        assert_eq!(rewritten, col("state").eq(lit("baz")));
-    }
-
-    /// rewrites all "foo" string literals to "bar"
-    struct FooBarRewriter {}
-    impl ExprRewriter for FooBarRewriter {
-        fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-            match expr {
-                Expr::Literal(scalar) => {
-                    if let ScalarValue::Utf8(Some(utf8_val)) = scalar {
-                        let utf8_val = if utf8_val == "foo" {
-                            "bar".to_string()
-                        } else {
-                            utf8_val
-                        };
-                        Ok(lit(utf8_val))
-                    } else {
-                        Ok(Expr::Literal(scalar))
-                    }
-                }
-                // otherwise, return the expression unchanged
-                expr => Ok(expr),
-            }
-        }
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/extension.rs b/rust/datafusion/src/logical_plan/extension.rs
deleted file mode 100644
index 43bf96ffb07..00000000000
--- a/rust/datafusion/src/logical_plan/extension.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module defines the interface for logical nodes
-use super::{Expr, LogicalPlan};
-use crate::logical_plan::DFSchemaRef;
-use std::{any::Any, collections::HashSet, fmt, sync::Arc};
-
-/// This defines the interface for `LogicalPlan` nodes that can be
-/// used to extend DataFusion with custom relational operators.
-///
-/// See the example in
-/// [user_defined_plan.rs](../../tests/user_defined_plan.rs) for an
-/// example of how to use this extension API
-pub trait UserDefinedLogicalNode: fmt::Debug {
-    /// Return a reference to self as Any, to support dynamic downcasting
-    fn as_any(&self) -> &dyn Any;
-
-    /// Return the logical plan's inputs
-    fn inputs(&self) -> Vec<&LogicalPlan>;
-
-    /// Return the output schema of this logical plan node
-    fn schema(&self) -> &DFSchemaRef;
-
-    /// returns all expressions in the current logical plan node. This
-    /// should not include expressions of any inputs (aka
-    /// non-recursively) These expressions are used for optimizer
-    /// passes and rewrites.
-    fn expressions(&self) -> Vec<Expr>;
-
-    /// A list of output columns (e.g. the names of columns in
-    /// self.schema()) for which predicates can not be pushed below
-    /// this node without changing the output.
-    ///
-    /// By default, this returns all columns and thus prevents any
-    /// predicates from being pushed below this node.
-    fn prevent_predicate_push_down_columns(&self) -> HashSet<String> {
-        // default (safe) is all columns in the schema.
-        self.schema()
-            .fields()
-            .iter()
-            .map(|f| f.name().clone())
-            .collect()
-    }
-
-    /// Write a single line, human readable string to `f` for use in explain plan
-    ///
-    /// For example: `TopK: k=10`
-    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result;
-
-    /// Create a new `ExtensionPlanNode` with the specified children
-    /// and expressions. This function is used during optimization
-    /// when the plan is being rewritten and a new instance of the
-    /// `ExtensionPlanNode` must be created.
-    ///
-    /// Note that exprs and inputs are in the same order as the result
-    /// of self.inputs and self.exprs.
-    ///
-    /// So, `self.from_template(exprs, ..).expressions() == exprs
-    fn from_template(
-        &self,
-        exprs: &[Expr],
-        inputs: &[LogicalPlan],
-    ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync>;
-}
diff --git a/rust/datafusion/src/logical_plan/mod.rs b/rust/datafusion/src/logical_plan/mod.rs
deleted file mode 100644
index f9be1ff9830..00000000000
--- a/rust/datafusion/src/logical_plan/mod.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides a logical query plan enum that can describe queries. Logical query
-//! plans can be created from a SQL statement or built programmatically via the Table API.
-//!
-//! Logical query plans can then be optimized and executed directly, or translated into
-//! physical query plans and executed.
-
-mod builder;
-mod dfschema;
-mod display;
-mod expr;
-mod extension;
-mod operators;
-mod plan;
-mod registry;
-pub use builder::LogicalPlanBuilder;
-pub use dfschema::{DFField, DFSchema, DFSchemaRef, ToDFSchema};
-pub use display::display_schema;
-pub use expr::{
-    abs, acos, and, array, ascii, asin, atan, avg, binary_expr, bit_length, btrim, case,
-    ceil, character_length, chr, col, combine_filters, concat, concat_ws, cos, count,
-    count_distinct, create_udaf, create_udf, exp, exprlist_to_fields, floor, in_list,
-    initcap, left, length, lit, ln, log10, log2, lower, lpad, ltrim, max, md5, min,
-    octet_length, or, regexp_match, regexp_replace, repeat, replace, reverse, right,
-    round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, split_part, sqrt,
-    starts_with, strpos, substr, sum, tan, to_hex, translate, trim, trunc, upper, when,
-    Expr, ExprRewriter, ExpressionVisitor, Literal, Recursion,
-};
-pub use extension::UserDefinedLogicalNode;
-pub use operators::Operator;
-pub use plan::{
-    JoinType, LogicalPlan, Partitioning, PlanType, PlanVisitor, StringifiedPlan,
-};
-pub use registry::FunctionRegistry;
diff --git a/rust/datafusion/src/logical_plan/operators.rs b/rust/datafusion/src/logical_plan/operators.rs
deleted file mode 100644
index 624635e6d9a..00000000000
--- a/rust/datafusion/src/logical_plan/operators.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{fmt, ops};
-
-use super::{binary_expr, Expr};
-
-/// Operators applied to expressions
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum Operator {
-    /// Expressions are equal
-    Eq,
-    /// Expressions are not equal
-    NotEq,
-    /// Left side is smaller than right side
-    Lt,
-    /// Left side is smaller or equal to right side
-    LtEq,
-    /// Left side is greater than right side
-    Gt,
-    /// Left side is greater or equal to right side
-    GtEq,
-    /// Addition
-    Plus,
-    /// Subtraction
-    Minus,
-    /// Multiplication operator, like `*`
-    Multiply,
-    /// Division operator, like `/`
-    Divide,
-    /// Remainder operator, like `%`
-    Modulus,
-    /// Logical AND, like `&&`
-    And,
-    /// Logical OR, like `||`
-    Or,
-    /// Matches a wildcard pattern
-    Like,
-    /// Does not match a wildcard pattern
-    NotLike,
-}
-
-impl fmt::Display for Operator {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let display = match &self {
-            Operator::Eq => "=",
-            Operator::NotEq => "!=",
-            Operator::Lt => "<",
-            Operator::LtEq => "<=",
-            Operator::Gt => ">",
-            Operator::GtEq => ">=",
-            Operator::Plus => "+",
-            Operator::Minus => "-",
-            Operator::Multiply => "*",
-            Operator::Divide => "/",
-            Operator::Modulus => "%",
-            Operator::And => "AND",
-            Operator::Or => "OR",
-            Operator::Like => "LIKE",
-            Operator::NotLike => "NOT LIKE",
-        };
-        write!(f, "{}", display)
-    }
-}
-
-impl ops::Add for Expr {
-    type Output = Self;
-
-    fn add(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Plus, rhs)
-    }
-}
-
-impl ops::Sub for Expr {
-    type Output = Self;
-
-    fn sub(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Minus, rhs)
-    }
-}
-
-impl ops::Mul for Expr {
-    type Output = Self;
-
-    fn mul(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Multiply, rhs)
-    }
-}
-
-impl ops::Div for Expr {
-    type Output = Self;
-
-    fn div(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Divide, rhs)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::prelude::lit;
-
-    #[test]
-    fn test_operators() {
-        assert_eq!(
-            format!("{:?}", lit(1u32) + lit(2u32)),
-            "UInt32(1) Plus UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) - lit(2u32)),
-            "UInt32(1) Minus UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) * lit(2u32)),
-            "UInt32(1) Multiply UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) / lit(2u32)),
-            "UInt32(1) Divide UInt32(2)"
-        );
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/plan.rs b/rust/datafusion/src/logical_plan/plan.rs
deleted file mode 100644
index d1b9b827a5a..00000000000
--- a/rust/datafusion/src/logical_plan/plan.rs
+++ /dev/null
@@ -1,1095 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//! This module contains the  `LogicalPlan` enum that describes queries
-//! via a logical query plan.
-
-use std::{
-    cmp::min,
-    fmt::{self, Display},
-    sync::Arc,
-};
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-
-use crate::datasource::TableProvider;
-use crate::sql::parser::FileType;
-
-use super::expr::Expr;
-use super::extension::UserDefinedLogicalNode;
-use super::{
-    col,
-    display::{GraphvizVisitor, IndentVisitor},
-};
-use crate::logical_plan::dfschema::DFSchemaRef;
-
-/// Join type
-#[derive(Debug, Clone, Copy)]
-pub enum JoinType {
-    /// Inner join
-    Inner,
-    /// Left join
-    Left,
-    /// Right join
-    Right,
-}
-
-/// A LogicalPlan represents the different types of relational
-/// operators (such as Projection, Filter, etc) and can be created by
-/// the SQL query planner and the DataFrame API.
-///
-/// A LogicalPlan represents transforming an input relation (table) to
-/// an output relation (table) with a (potentially) different
-/// schema. A plan represents a dataflow tree where data flows
-/// from leaves up to the root to produce the query result.
-#[derive(Clone)]
-pub enum LogicalPlan {
-    /// Evaluates an arbitrary list of expressions (essentially a
-    /// SELECT with an expression list) on its input.
-    Projection {
-        /// The list of expressions
-        expr: Vec<Expr>,
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-        /// The schema description of the output
-        schema: DFSchemaRef,
-    },
-    /// Filters rows from its input that do not match an
-    /// expression (essentially a WHERE clause with a predicate
-    /// expression).
-    ///
-    /// Semantically, `<predicate>` is evaluated for each row of the input;
-    /// If the value of `<predicate>` is true, the input row is passed to
-    /// the output. If the value of `<predicate>` is false, the row is
-    /// discarded.
-    Filter {
-        /// The predicate expression, which must have Boolean type.
-        predicate: Expr,
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-    },
-    /// Aggregates its input based on a set of grouping and aggregate
-    /// expressions (e.g. SUM).
-    Aggregate {
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-        /// Grouping expressions
-        group_expr: Vec<Expr>,
-        /// Aggregate expressions
-        aggr_expr: Vec<Expr>,
-        /// The schema description of the aggregate output
-        schema: DFSchemaRef,
-    },
-    /// Sorts its input according to a list of sort expressions.
-    Sort {
-        /// The sort expressions
-        expr: Vec<Expr>,
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-    },
-    /// Join two logical plans on one or more join columns
-    Join {
-        /// Left input
-        left: Arc<LogicalPlan>,
-        /// Right input
-        right: Arc<LogicalPlan>,
-        /// Equijoin clause expressed as pairs of (left, right) join columns
-        on: Vec<(String, String)>,
-        /// Join type
-        join_type: JoinType,
-        /// The output schema, containing fields from the left and right inputs
-        schema: DFSchemaRef,
-    },
-    /// Repartition the plan based on a partitioning scheme.
-    Repartition {
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-        /// The partitioning scheme
-        partitioning_scheme: Partitioning,
-    },
-    /// Union multiple inputs
-    Union {
-        /// Inputs to merge
-        inputs: Vec<LogicalPlan>,
-        /// Union schema. Should be the same for all inputs.
-        schema: DFSchemaRef,
-        /// Union output relation alias
-        alias: Option<String>,
-    },
-    /// Produces rows from a table provider by reference or from the context
-    TableScan {
-        /// The name of the table
-        table_name: String,
-        /// The source of the table
-        source: Arc<dyn TableProvider>,
-        /// Optional column indices to use as a projection
-        projection: Option<Vec<usize>>,
-        /// The schema description of the output
-        projected_schema: DFSchemaRef,
-        /// Optional expressions to be used as filters by the table provider
-        filters: Vec<Expr>,
-        /// Optional limit to skip reading
-        limit: Option<usize>,
-    },
-    /// Produces no rows: An empty relation with an empty schema
-    EmptyRelation {
-        /// Whether to produce a placeholder row
-        produce_one_row: bool,
-        /// The schema description of the output
-        schema: DFSchemaRef,
-    },
-    /// Produces the first `n` tuples from its input and discards the rest.
-    Limit {
-        /// The limit
-        n: usize,
-        /// The logical plan
-        input: Arc<LogicalPlan>,
-    },
-    /// Creates an external table.
-    CreateExternalTable {
-        /// The table schema
-        schema: DFSchemaRef,
-        /// The table name
-        name: String,
-        /// The physical location
-        location: String,
-        /// The file type of physical file
-        file_type: FileType,
-        /// Whether the CSV file contains a header
-        has_header: bool,
-    },
-    /// Produces a relation with string representations of
-    /// various parts of the plan
-    Explain {
-        /// Should extra (detailed, intermediate plans) be included?
-        verbose: bool,
-        /// The logical plan that is being EXPLAIN'd
-        plan: Arc<LogicalPlan>,
-        /// Represent the various stages plans have gone through
-        stringified_plans: Vec<StringifiedPlan>,
-        /// The output schema of the explain (2 columns of text)
-        schema: DFSchemaRef,
-    },
-    /// Extension operator defined outside of DataFusion
-    Extension {
-        /// The runtime extension operator
-        node: Arc<dyn UserDefinedLogicalNode + Send + Sync>,
-    },
-}
-
-impl LogicalPlan {
-    /// Get a reference to the logical plan's schema
-    pub fn schema(&self) -> &DFSchemaRef {
-        match self {
-            LogicalPlan::EmptyRelation { schema, .. } => &schema,
-            LogicalPlan::TableScan {
-                projected_schema, ..
-            } => &projected_schema,
-            LogicalPlan::Projection { schema, .. } => &schema,
-            LogicalPlan::Filter { input, .. } => input.schema(),
-            LogicalPlan::Aggregate { schema, .. } => &schema,
-            LogicalPlan::Sort { input, .. } => input.schema(),
-            LogicalPlan::Join { schema, .. } => &schema,
-            LogicalPlan::Repartition { input, .. } => input.schema(),
-            LogicalPlan::Limit { input, .. } => input.schema(),
-            LogicalPlan::CreateExternalTable { schema, .. } => &schema,
-            LogicalPlan::Explain { schema, .. } => &schema,
-            LogicalPlan::Extension { node } => &node.schema(),
-            LogicalPlan::Union { schema, .. } => &schema,
-        }
-    }
-
-    /// Get a vector of references to all schemas in every node of the logical plan
-    pub fn all_schemas(&self) -> Vec<&DFSchemaRef> {
-        match self {
-            LogicalPlan::TableScan {
-                projected_schema, ..
-            } => vec![&projected_schema],
-            LogicalPlan::Aggregate { input, schema, .. }
-            | LogicalPlan::Projection { input, schema, .. } => {
-                let mut schemas = input.all_schemas();
-                schemas.insert(0, &schema);
-                schemas
-            }
-            LogicalPlan::Join {
-                left,
-                right,
-                schema,
-                ..
-            } => {
-                let mut schemas = left.all_schemas();
-                schemas.extend(right.all_schemas());
-                schemas.insert(0, &schema);
-                schemas
-            }
-            LogicalPlan::Union { schema, .. } => {
-                vec![schema]
-            }
-            LogicalPlan::Extension { node } => vec![&node.schema()],
-            LogicalPlan::Explain { schema, .. }
-            | LogicalPlan::EmptyRelation { schema, .. }
-            | LogicalPlan::CreateExternalTable { schema, .. } => vec![&schema],
-            LogicalPlan::Limit { input, .. }
-            | LogicalPlan::Repartition { input, .. }
-            | LogicalPlan::Sort { input, .. }
-            | LogicalPlan::Filter { input, .. } => input.all_schemas(),
-        }
-    }
-
-    /// Returns the (fixed) output schema for explain plans
-    pub fn explain_schema() -> SchemaRef {
-        SchemaRef::new(Schema::new(vec![
-            Field::new("plan_type", DataType::Utf8, false),
-            Field::new("plan", DataType::Utf8, false),
-        ]))
-    }
-
-    /// returns all expressions (non-recursively) in the current
-    /// logical plan node. This does not include expressions in any
-    /// children
-    pub fn expressions(self: &LogicalPlan) -> Vec<Expr> {
-        match self {
-            LogicalPlan::Projection { expr, .. } => expr.clone(),
-            LogicalPlan::Filter { predicate, .. } => vec![predicate.clone()],
-            LogicalPlan::Repartition {
-                partitioning_scheme,
-                ..
-            } => match partitioning_scheme {
-                Partitioning::Hash(expr, _) => expr.clone(),
-                _ => vec![],
-            },
-            LogicalPlan::Aggregate {
-                group_expr,
-                aggr_expr,
-                ..
-            } => {
-                let mut result = group_expr.clone();
-                result.extend(aggr_expr.clone());
-                result
-            }
-            LogicalPlan::Join { on, .. } => {
-                on.iter().flat_map(|(l, r)| vec![col(l), col(r)]).collect()
-            }
-            LogicalPlan::Sort { expr, .. } => expr.clone(),
-            LogicalPlan::Extension { node } => node.expressions(),
-            // plans without expressions
-            LogicalPlan::TableScan { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::Limit { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. } => vec![],
-            LogicalPlan::Union { .. } => {
-                vec![]
-            }
-        }
-    }
-
-    /// returns all inputs of this `LogicalPlan` node. Does not
-    /// include inputs to inputs.
-    pub fn inputs(self: &LogicalPlan) -> Vec<&LogicalPlan> {
-        match self {
-            LogicalPlan::Projection { input, .. } => vec![input],
-            LogicalPlan::Filter { input, .. } => vec![input],
-            LogicalPlan::Repartition { input, .. } => vec![input],
-            LogicalPlan::Aggregate { input, .. } => vec![input],
-            LogicalPlan::Sort { input, .. } => vec![input],
-            LogicalPlan::Join { left, right, .. } => vec![left, right],
-            LogicalPlan::Limit { input, .. } => vec![input],
-            LogicalPlan::Extension { node } => node.inputs(),
-            LogicalPlan::Union { inputs, .. } => inputs.iter().collect(),
-            // plans without inputs
-            LogicalPlan::TableScan { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. } => vec![],
-        }
-    }
-}
-
-/// Logical partitioning schemes supported by the repartition operator.
-#[derive(Debug, Clone)]
-pub enum Partitioning {
-    /// Allocate batches using a round-robin algorithm and the specified number of partitions
-    RoundRobinBatch(usize),
-    /// Allocate rows based on a hash of one of more expressions and the specified number
-    /// of partitions.
-    /// This partitioning scheme is not yet fully supported. See <https://issues.apache.org/jira/browse/ARROW-11011>
-    Hash(Vec<Expr>, usize),
-}
-
-/// Trait that implements the [Visitor
-/// pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for a
-/// depth first walk of `LogicalPlan` nodes. `pre_visit` is called
-/// before any children are visited, and then `post_visit` is called
-/// after all children have been visited.
-////
-/// To use, define a struct that implements this trait and then invoke
-/// "LogicalPlan::accept".
-///
-/// For example, for a logical plan like:
-///
-/// Projection: #id
-///    Filter: #state Eq Utf8(\"CO\")\
-///       CsvScan: employee.csv projection=Some([0, 3])";
-///
-/// The sequence of visit operations would be:
-/// ```text
-/// visitor.pre_visit(Projection)
-/// visitor.pre_visit(Filter)
-/// visitor.pre_visit(CsvScan)
-/// visitor.post_visit(CsvScan)
-/// visitor.post_visit(Filter)
-/// visitor.post_visit(Projection)
-/// ```
-pub trait PlanVisitor {
-    /// The type of error returned by this visitor
-    type Error;
-
-    /// Invoked on a logical plan before any of its child inputs have been
-    /// visited. If Ok(true) is returned, the recursion continues. If
-    /// Err(..) or Ok(false) are returned, the recursion stops
-    /// immediately and the error, if any, is returned to `accept`
-    fn pre_visit(&mut self, plan: &LogicalPlan)
-        -> std::result::Result<bool, Self::Error>;
-
-    /// Invoked on a logical plan after all of its child inputs have
-    /// been visited. The return value is handled the same as the
-    /// return value of `pre_visit`. The provided default implementation
-    /// returns `Ok(true)`.
-    fn post_visit(
-        &mut self,
-        _plan: &LogicalPlan,
-    ) -> std::result::Result<bool, Self::Error> {
-        Ok(true)
-    }
-}
-
-impl LogicalPlan {
-    /// returns all inputs in the logical plan. Returns Ok(true) if
-    /// all nodes were visited, and Ok(false) if any call to
-    /// `pre_visit` or `post_visit` returned Ok(false) and may have
-    /// cut short the recursion
-    pub fn accept<V>(&self, visitor: &mut V) -> std::result::Result<bool, V::Error>
-    where
-        V: PlanVisitor,
-    {
-        if !visitor.pre_visit(self)? {
-            return Ok(false);
-        }
-
-        let recurse = match self {
-            LogicalPlan::Projection { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Filter { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Repartition { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Aggregate { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Sort { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Join { left, right, .. } => {
-                left.accept(visitor)? && right.accept(visitor)?
-            }
-            LogicalPlan::Union { inputs, .. } => {
-                for input in inputs {
-                    if !input.accept(visitor)? {
-                        return Ok(false);
-                    }
-                }
-                true
-            }
-            LogicalPlan::Limit { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Extension { node } => {
-                for input in node.inputs() {
-                    if !input.accept(visitor)? {
-                        return Ok(false);
-                    }
-                }
-                true
-            }
-            // plans without inputs
-            LogicalPlan::TableScan { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. } => true,
-        };
-        if !recurse {
-            return Ok(false);
-        }
-
-        if !visitor.post_visit(self)? {
-            return Ok(false);
-        }
-
-        Ok(true)
-    }
-}
-
-// Various implementations for printing out LogicalPlans
-impl LogicalPlan {
-    /// Return a `format`able structure that produces a single line
-    /// per node. For example:
-    ///
-    /// ```text
-    /// Projection: #id
-    ///    Filter: #state Eq Utf8(\"CO\")\
-    ///       CsvScan: employee.csv projection=Some([0, 3])
-    /// ```
-    ///
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display_indent
-    /// let display_string = format!("{}", plan.display_indent());
-    ///
-    /// assert_eq!("Filter: #id Eq Int32(5)\
-    ///              \n  TableScan: foo.csv projection=None",
-    ///             display_string);
-    /// ```
-    pub fn display_indent(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                let with_schema = false;
-                let mut visitor = IndentVisitor::new(f, with_schema);
-                self.0.accept(&mut visitor).unwrap();
-                Ok(())
-            }
-        }
-        Wrapper(self)
-    }
-
-    /// Return a `format`able structure that produces a single line
-    /// per node that includes the output schema. For example:
-    ///
-    /// ```text
-    /// Projection: #id [id:Int32]\
-    ///    Filter: #state Eq Utf8(\"CO\") [id:Int32, state:Utf8]\
-    ///      TableScan: employee.csv projection=Some([0, 3]) [id:Int32, state:Utf8]";
-    /// ```
-    ///
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display_indent_schema
-    /// let display_string = format!("{}", plan.display_indent_schema());
-    ///
-    /// assert_eq!("Filter: #id Eq Int32(5) [id:Int32]\
-    ///             \n  TableScan: foo.csv projection=None [id:Int32]",
-    ///             display_string);
-    /// ```
-    pub fn display_indent_schema(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                let with_schema = true;
-                let mut visitor = IndentVisitor::new(f, with_schema);
-                self.0.accept(&mut visitor).unwrap();
-                Ok(())
-            }
-        }
-        Wrapper(self)
-    }
-
-    /// Return a `format`able structure that produces lines meant for
-    /// graphical display using the `DOT` language. This format can be
-    /// visualized using software from
-    /// [`graphviz`](https://graphviz.org/)
-    ///
-    /// This currently produces two graphs -- one with the basic
-    /// structure, and one with additional details such as schema.
-    ///
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display_graphviz
-    /// let graphviz_string = format!("{}", plan.display_graphviz());
-    /// ```
-    ///
-    /// If graphviz string is saved to a file such as `/tmp/example.dot`, the following
-    /// commands can be used to render it as a pdf:
-    ///
-    /// ```bash
-    ///   dot -Tpdf < /tmp/example.dot  > /tmp/example.pdf
-    /// ```
-    ///
-    pub fn display_graphviz(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                writeln!(
-                    f,
-                    "// Begin DataFusion GraphViz Plan (see https://graphviz.org)"
-                )?;
-                writeln!(f, "digraph {{")?;
-
-                let mut visitor = GraphvizVisitor::new(f);
-
-                visitor.pre_visit_plan("LogicalPlan")?;
-                self.0.accept(&mut visitor).unwrap();
-                visitor.post_visit_plan()?;
-
-                visitor.set_with_schema(true);
-                visitor.pre_visit_plan("Detailed LogicalPlan")?;
-                self.0.accept(&mut visitor).unwrap();
-                visitor.post_visit_plan()?;
-
-                writeln!(f, "}}")?;
-                writeln!(f, "// End DataFusion GraphViz Plan")?;
-                Ok(())
-            }
-        }
-        Wrapper(self)
-    }
-
-    /// Return a `format`able structure with the a human readable
-    /// description of this LogicalPlan node per node, not including
-    /// children. For example:
-    ///
-    /// ```text
-    /// Projection: #id
-    /// ```
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display
-    /// let display_string = format!("{}", plan.display());
-    ///
-    /// assert_eq!("TableScan: foo.csv projection=None", display_string);
-    /// ```
-    pub fn display(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                match &*self.0 {
-                    LogicalPlan::EmptyRelation { .. } => write!(f, "EmptyRelation"),
-                    LogicalPlan::TableScan {
-                        ref table_name,
-                        ref projection,
-                        ref filters,
-                        ref limit,
-                        ..
-                    } => {
-                        let sep = " ".repeat(min(1, table_name.len()));
-                        write!(
-                            f,
-                            "TableScan: {}{}projection={:?}",
-                            table_name, sep, projection
-                        )?;
-
-                        if !filters.is_empty() {
-                            write!(f, ", filters={:?}", filters)?;
-                        }
-
-                        if let Some(n) = limit {
-                            write!(f, ", limit={}", n)?;
-                        }
-
-                        Ok(())
-                    }
-                    LogicalPlan::Projection { ref expr, .. } => {
-                        write!(f, "Projection: ")?;
-                        for (i, expr_item) in expr.iter().enumerate() {
-                            if i > 0 {
-                                write!(f, ", ")?;
-                            }
-                            write!(f, "{:?}", expr_item)?;
-                        }
-                        Ok(())
-                    }
-                    LogicalPlan::Filter {
-                        predicate: ref expr,
-                        ..
-                    } => write!(f, "Filter: {:?}", expr),
-                    LogicalPlan::Aggregate {
-                        ref group_expr,
-                        ref aggr_expr,
-                        ..
-                    } => write!(
-                        f,
-                        "Aggregate: groupBy=[{:?}], aggr=[{:?}]",
-                        group_expr, aggr_expr
-                    ),
-                    LogicalPlan::Sort { ref expr, .. } => {
-                        write!(f, "Sort: ")?;
-                        for (i, expr_item) in expr.iter().enumerate() {
-                            if i > 0 {
-                                write!(f, ", ")?;
-                            }
-                            write!(f, "{:?}", expr_item)?;
-                        }
-                        Ok(())
-                    }
-                    LogicalPlan::Join { on: ref keys, .. } => {
-                        let join_expr: Vec<String> =
-                            keys.iter().map(|(l, r)| format!("{} = {}", l, r)).collect();
-                        write!(f, "Join: {}", join_expr.join(", "))
-                    }
-                    LogicalPlan::Repartition {
-                        partitioning_scheme,
-                        ..
-                    } => match partitioning_scheme {
-                        Partitioning::RoundRobinBatch(n) => write!(
-                            f,
-                            "Repartition: RoundRobinBatch partition_count={}",
-                            n
-                        ),
-                        Partitioning::Hash(expr, n) => {
-                            let hash_expr: Vec<String> =
-                                expr.iter().map(|e| format!("{:?}", e)).collect();
-                            write!(
-                                f,
-                                "Repartition: Hash({}) partition_count={}",
-                                hash_expr.join(", "),
-                                n
-                            )
-                        }
-                    },
-                    LogicalPlan::Limit { ref n, .. } => write!(f, "Limit: {}", n),
-                    LogicalPlan::CreateExternalTable { ref name, .. } => {
-                        write!(f, "CreateExternalTable: {:?}", name)
-                    }
-                    LogicalPlan::Explain { .. } => write!(f, "Explain"),
-                    LogicalPlan::Union { .. } => write!(f, "Union"),
-                    LogicalPlan::Extension { ref node } => node.fmt_for_explain(f),
-                }
-            }
-        }
-        Wrapper(self)
-    }
-}
-
-impl fmt::Debug for LogicalPlan {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.display_indent().fmt(f)
-    }
-}
-
-/// Represents which type of plan
-#[derive(Debug, Clone, PartialEq)]
-pub enum PlanType {
-    /// The initial LogicalPlan provided to DataFusion
-    LogicalPlan,
-    /// The LogicalPlan which results from applying an optimizer pass
-    OptimizedLogicalPlan {
-        /// The name of the optimizer which produced this plan
-        optimizer_name: String,
-    },
-    /// The physical plan, prepared for execution
-    PhysicalPlan,
-}
-
-impl From<&PlanType> for String {
-    fn from(t: &PlanType) -> Self {
-        match t {
-            PlanType::LogicalPlan => "logical_plan".into(),
-            PlanType::OptimizedLogicalPlan { optimizer_name } => {
-                format!("logical_plan after {}", optimizer_name)
-            }
-            PlanType::PhysicalPlan => "physical_plan".into(),
-        }
-    }
-}
-
-/// Represents some sort of execution plan, in String form
-#[derive(Debug, Clone, PartialEq)]
-#[allow(clippy::rc_buffer)]
-pub struct StringifiedPlan {
-    /// An identifier of what type of plan this string represents
-    pub plan_type: PlanType,
-    /// The string representation of the plan
-    pub plan: Arc<String>,
-}
-
-impl StringifiedPlan {
-    /// Create a new Stringified plan of `plan_type` with string
-    /// representation `plan`
-    pub fn new(plan_type: PlanType, plan: impl Into<String>) -> Self {
-        StringifiedPlan {
-            plan_type,
-            plan: Arc::new(plan.into()),
-        }
-    }
-
-    /// returns true if this plan should be displayed. Generally
-    /// `verbose_mode = true` will display all available plans
-    pub fn should_display(&self, verbose_mode: bool) -> bool {
-        self.plan_type == PlanType::LogicalPlan || verbose_mode
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::super::{col, lit, LogicalPlanBuilder};
-    use super::*;
-
-    fn employee_schema() -> Schema {
-        Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ])
-    }
-
-    fn display_plan() -> LogicalPlan {
-        LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )
-        .unwrap()
-        .filter(col("state").eq(lit("CO")))
-        .unwrap()
-        .project(vec![col("id")])
-        .unwrap()
-        .build()
-        .unwrap()
-    }
-
-    #[test]
-    fn test_display_indent() {
-        let plan = display_plan();
-
-        let expected = "Projection: #id\
-        \n  Filter: #state Eq Utf8(\"CO\")\
-        \n    TableScan: employee.csv projection=Some([0, 3])";
-
-        assert_eq!(expected, format!("{}", plan.display_indent()));
-    }
-
-    #[test]
-    fn test_display_indent_schema() {
-        let plan = display_plan();
-
-        let expected = "Projection: #id [id:Int32]\
-                        \n  Filter: #state Eq Utf8(\"CO\") [id:Int32, state:Utf8]\
-                        \n    TableScan: employee.csv projection=Some([0, 3]) [id:Int32, state:Utf8]";
-
-        assert_eq!(expected, format!("{}", plan.display_indent_schema()));
-    }
-
-    #[test]
-    fn test_display_graphviz() {
-        let plan = display_plan();
-
-        // just test for a few key lines in the output rather than the
-        // whole thing to make test mainteance easier.
-        let graphviz = format!("{}", plan.display_graphviz());
-
-        assert!(
-            graphviz.contains(
-                r#"// Begin DataFusion GraphViz Plan (see https://graphviz.org)"#
-            ),
-            "\n{}",
-            plan.display_graphviz()
-        );
-        assert!(
-            graphviz.contains(
-                r#"[shape=box label="TableScan: employee.csv projection=Some([0, 3])"]"#
-            ),
-            "\n{}",
-            plan.display_graphviz()
-        );
-        assert!(graphviz.contains(r#"[shape=box label="TableScan: employee.csv projection=Some([0, 3])\nSchema: [id:Int32, state:Utf8]"]"#),
-                "\n{}", plan.display_graphviz());
-        assert!(
-            graphviz.contains(r#"// End DataFusion GraphViz Plan"#),
-            "\n{}",
-            plan.display_graphviz()
-        );
-    }
-
-    /// Tests for the Visitor trait and walking logical plan nodes
-
-    #[derive(Debug, Default)]
-    struct OkVisitor {
-        strings: Vec<String>,
-    }
-    impl PlanVisitor for OkVisitor {
-        type Error = String;
-
-        fn pre_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            let s = match plan {
-                LogicalPlan::Projection { .. } => "pre_visit Projection",
-                LogicalPlan::Filter { .. } => "pre_visit Filter",
-                LogicalPlan::TableScan { .. } => "pre_visit TableScan",
-                _ => unimplemented!("unknown plan type"),
-            };
-
-            self.strings.push(s.into());
-            Ok(true)
-        }
-
-        fn post_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            let s = match plan {
-                LogicalPlan::Projection { .. } => "post_visit Projection",
-                LogicalPlan::Filter { .. } => "post_visit Filter",
-                LogicalPlan::TableScan { .. } => "post_visit TableScan",
-                _ => unimplemented!("unknown plan type"),
-            };
-
-            self.strings.push(s.into());
-            Ok(true)
-        }
-    }
-
-    #[test]
-    fn visit_order() {
-        let mut visitor = OkVisitor::default();
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        assert!(res.is_ok());
-
-        assert_eq!(
-            visitor.strings,
-            vec![
-                "pre_visit Projection",
-                "pre_visit Filter",
-                "pre_visit TableScan",
-                "post_visit TableScan",
-                "post_visit Filter",
-                "post_visit Projection"
-            ]
-        );
-    }
-
-    #[derive(Debug, Default)]
-    /// Counter than counts to zero and returns true when it gets there
-    struct OptionalCounter {
-        val: Option<usize>,
-    }
-    impl OptionalCounter {
-        fn new(val: usize) -> Self {
-            Self { val: Some(val) }
-        }
-        // Decrements the counter by 1, if any, returning true if it hits zero
-        fn dec(&mut self) -> bool {
-            if Some(0) == self.val {
-                true
-            } else {
-                self.val = self.val.take().map(|i| i - 1);
-                false
-            }
-        }
-    }
-
-    #[derive(Debug, Default)]
-    /// Visitor that returns false after some number of visits
-    struct StoppingVisitor {
-        inner: OkVisitor,
-        /// When Some(0) returns false from pre_visit
-        return_false_from_pre_in: OptionalCounter,
-        /// When Some(0) returns false from post_visit
-        return_false_from_post_in: OptionalCounter,
-    }
-
-    impl PlanVisitor for StoppingVisitor {
-        type Error = String;
-
-        fn pre_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_false_from_pre_in.dec() {
-                return Ok(false);
-            }
-            self.inner.pre_visit(plan)
-        }
-
-        fn post_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_false_from_post_in.dec() {
-                return Ok(false);
-            }
-
-            self.inner.post_visit(plan)
-        }
-    }
-
-    /// test early stopping in pre-visit
-    #[test]
-    fn early_stopping_pre_visit() {
-        let mut visitor = StoppingVisitor {
-            return_false_from_pre_in: OptionalCounter::new(2),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        assert!(res.is_ok());
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec!["pre_visit Projection", "pre_visit Filter",]
-        );
-    }
-
-    #[test]
-    fn early_stopping_post_visit() {
-        let mut visitor = StoppingVisitor {
-            return_false_from_post_in: OptionalCounter::new(1),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        assert!(res.is_ok());
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec![
-                "pre_visit Projection",
-                "pre_visit Filter",
-                "pre_visit TableScan",
-                "post_visit TableScan",
-            ]
-        );
-    }
-
-    #[derive(Debug, Default)]
-    /// Visitor that returns an error after some number of visits
-    struct ErrorVisitor {
-        inner: OkVisitor,
-        /// When Some(0) returns false from pre_visit
-        return_error_from_pre_in: OptionalCounter,
-        /// When Some(0) returns false from post_visit
-        return_error_from_post_in: OptionalCounter,
-    }
-
-    impl PlanVisitor for ErrorVisitor {
-        type Error = String;
-
-        fn pre_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_error_from_pre_in.dec() {
-                return Err("Error in pre_visit".into());
-            }
-
-            self.inner.pre_visit(plan)
-        }
-
-        fn post_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_error_from_post_in.dec() {
-                return Err("Error in post_visit".into());
-            }
-
-            self.inner.post_visit(plan)
-        }
-    }
-
-    #[test]
-    fn error_pre_visit() {
-        let mut visitor = ErrorVisitor {
-            return_error_from_pre_in: OptionalCounter::new(2),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-
-        if let Err(e) = res {
-            assert_eq!("Error in pre_visit", e);
-        } else {
-            panic!("Expected an error");
-        }
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec!["pre_visit Projection", "pre_visit Filter",]
-        );
-    }
-
-    #[test]
-    fn error_post_visit() {
-        let mut visitor = ErrorVisitor {
-            return_error_from_post_in: OptionalCounter::new(1),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        if let Err(e) = res {
-            assert_eq!("Error in post_visit", e);
-        } else {
-            panic!("Expected an error");
-        }
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec![
-                "pre_visit Projection",
-                "pre_visit Filter",
-                "pre_visit TableScan",
-                "post_visit TableScan",
-            ]
-        );
-    }
-
-    fn test_plan() -> LogicalPlan {
-        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
-
-        LogicalPlanBuilder::scan_empty("", &schema, Some(vec![0]))
-            .unwrap()
-            .filter(col("state").eq(lit("CO")))
-            .unwrap()
-            .project(vec![col("id")])
-            .unwrap()
-            .build()
-            .unwrap()
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/registry.rs b/rust/datafusion/src/logical_plan/registry.rs
deleted file mode 100644
index d9b1839881d..00000000000
--- a/rust/datafusion/src/logical_plan/registry.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashSet, sync::Arc};
-
-use crate::error::Result;
-use crate::physical_plan::udaf::AggregateUDF;
-use crate::physical_plan::udf::ScalarUDF;
-
-/// A registry knows how to build logical expressions out of user-defined function' names
-pub trait FunctionRegistry {
-    /// Set of all available udfs.
-    fn udfs(&self) -> HashSet<String>;
-
-    /// Returns a reference to the udf named `name`.
-    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>>;
-
-    /// Returns a reference to the udaf named `name`.
-    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>>;
-}
diff --git a/rust/datafusion/src/optimizer/constant_folding.rs b/rust/datafusion/src/optimizer/constant_folding.rs
deleted file mode 100644
index 2fa03eb5c70..00000000000
--- a/rust/datafusion/src/optimizer/constant_folding.rs
+++ /dev/null
@@ -1,591 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Boolean comparision rule rewrites redudant comparison expression involing boolean literal into
-//! unary expression.
-
-use std::sync::Arc;
-
-use arrow::datatypes::DataType;
-
-use crate::error::Result;
-use crate::logical_plan::{DFSchemaRef, Expr, ExprRewriter, LogicalPlan, Operator};
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::utils;
-use crate::scalar::ScalarValue;
-
-/// Optimizer that simplifies comparison expressions involving boolean literals.
-///
-/// Recursively go through all expressionss and simplify the following cases:
-/// * `expr = true` and `expr != false` to `expr` when `expr` is of boolean type
-/// * `expr = false` and `expr != true` to `!expr` when `expr` is of boolean type
-/// * `true = true` and `false = false` to `true`
-/// * `false = true` and `true = false` to `false`
-/// * `!!expr` to `expr`
-/// * `expr = null` and `expr != null` to `null`
-pub struct ConstantFolding {}
-
-impl ConstantFolding {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl OptimizerRule for ConstantFolding {
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        // We need to pass down the all schemas within the plan tree to `optimize_expr` in order to
-        // to evaluate expression types. For example, a projection plan's schema will only include
-        // projected columns. With just the projected schema, it's not possible to infer types for
-        // expressions that references non-projected columns within the same project plan or its
-        // children plans.
-        let mut rewriter = ConstantRewriter {
-            schemas: plan.all_schemas(),
-        };
-
-        match plan {
-            LogicalPlan::Filter { predicate, input } => Ok(LogicalPlan::Filter {
-                predicate: predicate.clone().rewrite(&mut rewriter)?,
-                input: Arc::new(self.optimize(input)?),
-            }),
-            // Rest: recurse into plan, apply optimization where possible
-            LogicalPlan::Projection { .. }
-            | LogicalPlan::Aggregate { .. }
-            | LogicalPlan::Repartition { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Extension { .. }
-            | LogicalPlan::Sort { .. }
-            | LogicalPlan::Explain { .. }
-            | LogicalPlan::Limit { .. }
-            | LogicalPlan::Union { .. }
-            | LogicalPlan::Join { .. } => {
-                // apply the optimization to all inputs of the plan
-                let inputs = plan.inputs();
-                let new_inputs = inputs
-                    .iter()
-                    .map(|plan| self.optimize(plan))
-                    .collect::<Result<Vec<_>>>()?;
-
-                let expr = plan
-                    .expressions()
-                    .into_iter()
-                    .map(|e| e.rewrite(&mut rewriter))
-                    .collect::<Result<Vec<_>>>()?;
-
-                utils::from_plan(plan, &expr, &new_inputs)
-            }
-            LogicalPlan::TableScan { .. } | LogicalPlan::EmptyRelation { .. } => {
-                Ok(plan.clone())
-            }
-        }
-    }
-
-    fn name(&self) -> &str {
-        "constant_folding"
-    }
-}
-
-struct ConstantRewriter<'a> {
-    /// input schemas
-    schemas: Vec<&'a DFSchemaRef>,
-}
-
-impl<'a> ConstantRewriter<'a> {
-    fn is_boolean_type(&self, expr: &Expr) -> bool {
-        for schema in &self.schemas {
-            if let Ok(DataType::Boolean) = expr.get_type(schema) {
-                return true;
-            }
-        }
-
-        false
-    }
-}
-
-impl<'a> ExprRewriter for ConstantRewriter<'a> {
-    /// rewrite the expression simplifying any constant expressions
-    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-        let new_expr = match expr {
-            Expr::BinaryExpr { left, op, right } => match op {
-                Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l == r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => *right,
-                            Some(false) => Expr::Not(right),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => *left,
-                            Some(false) => Expr::Not(left),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Eq,
-                        right,
-                    },
-                },
-                Operator::NotEq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l != r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(right),
-                            Some(false) => *right,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(left),
-                            Some(false) => *left,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::NotEq,
-                        right,
-                    },
-                },
-                _ => Expr::BinaryExpr { left, op, right },
-            },
-            Expr::Not(inner) => {
-                // Not(Not(expr)) --> expr
-                if let Expr::Not(negated_inner) = *inner {
-                    *negated_inner
-                } else {
-                    Expr::Not(inner)
-                }
-            }
-            expr => {
-                // no rewrite possible
-                expr
-            }
-        };
-        Ok(new_expr)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::logical_plan::{
-        col, lit, max, min, DFField, DFSchema, LogicalPlanBuilder,
-    };
-
-    use arrow::datatypes::*;
-
-    fn test_table_scan() -> Result<LogicalPlan> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, false),
-            Field::new("b", DataType::Boolean, false),
-            Field::new("c", DataType::Boolean, false),
-            Field::new("d", DataType::UInt32, false),
-        ]);
-        LogicalPlanBuilder::scan_empty("test", &schema, None)?.build()
-    }
-
-    fn expr_test_schema() -> DFSchemaRef {
-        Arc::new(
-            DFSchema::new(vec![
-                DFField::new(None, "c1", DataType::Utf8, true),
-                DFField::new(None, "c2", DataType::Boolean, true),
-            ])
-            .unwrap(),
-        )
-    }
-
-    #[test]
-    fn optimize_expr_not_not() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(
-            (col("c2").not().not().not()).rewrite(&mut rewriter)?,
-            col("c2").not(),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_null_comparision() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        // x = null is always null
-        assert_eq!(
-            (lit(true).eq(lit(ScalarValue::Boolean(None)))).rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        // null != null is always null
-        assert_eq!(
-            (lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None))))
-                .rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        // x != null is always null
-        assert_eq!(
-            (col("c2").not_eq(lit(ScalarValue::Boolean(None)))).rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        // null = x is always null
-        assert_eq!(
-            (lit(ScalarValue::Boolean(None)).eq(col("c2"))).rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_eq() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(col("c2").get_type(&schema)?, DataType::Boolean);
-
-        // true = ture -> true
-        assert_eq!((lit(true).eq(lit(true))).rewrite(&mut rewriter)?, lit(true),);
-
-        // true = false -> false
-        assert_eq!(
-            (lit(true).eq(lit(false))).rewrite(&mut rewriter)?,
-            lit(false),
-        );
-
-        // c2 = true -> c2
-        assert_eq!((col("c2").eq(lit(true))).rewrite(&mut rewriter)?, col("c2"),);
-
-        // c2 = false => !c2
-        assert_eq!(
-            (col("c2").eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c2").not(),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_eq_skip_nonboolean_type() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        // When one of the operand is not of boolean type, folding the other boolean constant will
-        // change return type of expression to non-boolean.
-        //
-        // Make sure c1 column to be used in tests is not boolean type
-        assert_eq!(col("c1").get_type(&schema)?, DataType::Utf8);
-
-        // don't fold c1 = true
-        assert_eq!(
-            (col("c1").eq(lit(true))).rewrite(&mut rewriter)?,
-            col("c1").eq(lit(true)),
-        );
-
-        // don't fold c1 = false
-        assert_eq!(
-            (col("c1").eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c1").eq(lit(false)),
-        );
-
-        // test constant operands
-        assert_eq!(
-            (lit(1).eq(lit(true))).rewrite(&mut rewriter)?,
-            lit(1).eq(lit(true)),
-        );
-
-        assert_eq!(
-            (lit("a").eq(lit(false))).rewrite(&mut rewriter)?,
-            lit("a").eq(lit(false)),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_not_eq() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(col("c2").get_type(&schema)?, DataType::Boolean);
-
-        // c2 != true -> !c2
-        assert_eq!(
-            (col("c2").not_eq(lit(true))).rewrite(&mut rewriter)?,
-            col("c2").not(),
-        );
-
-        // c2 != false -> c2
-        assert_eq!(
-            (col("c2").not_eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c2"),
-        );
-
-        // test constant
-        assert_eq!(
-            (lit(true).not_eq(lit(true))).rewrite(&mut rewriter)?,
-            lit(false),
-        );
-
-        assert_eq!(
-            (lit(true).not_eq(lit(false))).rewrite(&mut rewriter)?,
-            lit(true),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_not_eq_skip_nonboolean_type() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        // when one of the operand is not of boolean type, folding the other boolean constant will
-        // change return type of expression to non-boolean.
-        assert_eq!(col("c1").get_type(&schema)?, DataType::Utf8);
-
-        assert_eq!(
-            (col("c1").not_eq(lit(true))).rewrite(&mut rewriter)?,
-            col("c1").not_eq(lit(true)),
-        );
-
-        assert_eq!(
-            (col("c1").not_eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c1").not_eq(lit(false)),
-        );
-
-        // test constants
-        assert_eq!(
-            (lit(1).not_eq(lit(true))).rewrite(&mut rewriter)?,
-            lit(1).not_eq(lit(true)),
-        );
-
-        assert_eq!(
-            (lit("a").not_eq(lit(false))).rewrite(&mut rewriter)?,
-            lit("a").not_eq(lit(false)),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_case_when_then_else() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(
-            (Box::new(Expr::Case {
-                expr: None,
-                when_then_expr: vec![(
-                    Box::new(col("c2").not_eq(lit(false))),
-                    Box::new(lit("ok").eq(lit(true))),
-                )],
-                else_expr: Some(Box::new(col("c2").eq(lit(true)))),
-            }))
-            .rewrite(&mut rewriter)?,
-            Expr::Case {
-                expr: None,
-                when_then_expr: vec![(
-                    Box::new(col("c2")),
-                    Box::new(lit("ok").eq(lit(true)))
-                )],
-                else_expr: Some(Box::new(col("c2"))),
-            }
-        );
-
-        Ok(())
-    }
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let rule = ConstantFolding::new();
-        let optimized_plan = rule.optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    #[test]
-    fn optimize_plan_eq_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").eq(lit(true)))?
-            .filter(col("c").eq(lit(false)))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: NOT #c\
-        \n    Filter: #b\
-        \n      TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_not_eq_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").not_eq(lit(true)))?
-            .filter(col("c").not_eq(lit(false)))?
-            .limit(1)?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Limit: 1\
-        \n    Filter: #c\
-        \n      Filter: NOT #b\
-        \n        TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_and_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").not_eq(lit(true)).and(col("c").eq(lit(true))))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: NOT #b And #c\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_or_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").not_eq(lit(true)).or(col("c").eq(lit(false))))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: NOT #b Or NOT #c\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_not_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").eq(lit(false)).not())?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: #b\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_support_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("d"), col("b").eq(lit(false))])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a, #d, NOT #b\
-        \n  TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_support_aggregate() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c"), col("b")])?
-            .aggregate(
-                vec![col("a"), col("c")],
-                vec![max(col("b").eq(lit(true))), min(col("b"))],
-            )?
-            .build()?;
-
-        let expected = "\
-        Aggregate: groupBy=[[#a, #c]], aggr=[[MAX(#b), MIN(#b)]]\
-        \n  Projection: #a, #c, #b\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/optimizer/filter_push_down.rs b/rust/datafusion/src/optimizer/filter_push_down.rs
deleted file mode 100644
index ec260a41dc5..00000000000
--- a/rust/datafusion/src/optimizer/filter_push_down.rs
+++ /dev/null
@@ -1,1021 +0,0 @@
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Filter Push Down optimizer rule ensures that filters are applied as early as possible in the plan
-
-use crate::datasource::datasource::TableProviderFilterPushDown;
-use crate::logical_plan::{and, LogicalPlan};
-use crate::logical_plan::{DFSchema, Expr};
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::utils;
-use crate::{error::Result, logical_plan::Operator};
-use std::{
-    collections::{HashMap, HashSet},
-    sync::Arc,
-};
-
-/// Filter Push Down optimizer rule pushes filter clauses down the plan
-/// # Introduction
-/// A filter-commutative operation is an operation whose result of filter(op(data)) = op(filter(data)).
-/// An example of a filter-commutative operation is a projection; a counter-example is `limit`.
-///
-/// The filter-commutative property is column-specific. An aggregate grouped by A on SUM(B)
-/// can commute with a filter that depends on A only, but does not commute with a filter that depends
-/// on SUM(B).
-///
-/// This optimizer commutes filters with filter-commutative operations to push the filters
-/// the closest possible to the scans, re-writing the filter expressions by every
-/// projection that changes the filter's expression.
-///
-/// Filter: #b Gt Int64(10)
-///     Projection: #a AS b
-///
-/// is optimized to
-///
-/// Projection: #a AS b
-///     Filter: #a Gt Int64(10)  <--- changed from #b to #a
-///
-/// This performs a single pass trought the plan. When it passes trought a filter, it stores that filter,
-/// and when it reaches a node that does not commute with it, it adds the filter to that place.
-/// When it passes through a projection, it re-writes the filter's expression taking into accoun that projection.
-/// When multiple filters would have been written, it `AND` their expressions into a single expression.
-pub struct FilterPushDown {}
-
-#[derive(Debug, Clone, Default)]
-struct State {
-    // (predicate, columns on the predicate)
-    filters: Vec<(Expr, HashSet<String>)>,
-}
-
-type Predicates<'a> = (Vec<&'a Expr>, Vec<&'a HashSet<String>>);
-
-/// returns all predicates in `state` that depend on any of `used_columns`
-fn get_predicates<'a>(
-    state: &'a State,
-    used_columns: &HashSet<String>,
-) -> Predicates<'a> {
-    state
-        .filters
-        .iter()
-        .filter(|(_, columns)| {
-            !columns
-                .intersection(used_columns)
-                .collect::<HashSet<_>>()
-                .is_empty()
-        })
-        .map(|&(ref a, ref b)| (a, b))
-        .unzip()
-}
-
-// returns 3 (potentially overlaping) sets of predicates:
-// * pushable to left: its columns are all on the left
-// * pushable to right: its columns is all on the right
-// * keep: the set of columns is not in only either left or right
-// Note that a predicate can be both pushed to the left and to the right.
-fn get_join_predicates<'a>(
-    state: &'a State,
-    left: &DFSchema,
-    right: &DFSchema,
-) -> (
-    Vec<&'a HashSet<String>>,
-    Vec<&'a HashSet<String>>,
-    Predicates<'a>,
-) {
-    let left_columns = &left
-        .fields()
-        .iter()
-        .map(|f| f.name().clone())
-        .collect::<HashSet<_>>();
-    let right_columns = &right
-        .fields()
-        .iter()
-        .map(|f| f.name().clone())
-        .collect::<HashSet<_>>();
-
-    let filters = state
-        .filters
-        .iter()
-        .map(|(predicate, columns)| {
-            (
-                (predicate, columns),
-                (
-                    columns,
-                    left_columns.intersection(columns).collect::<HashSet<_>>(),
-                    right_columns.intersection(columns).collect::<HashSet<_>>(),
-                ),
-            )
-        })
-        .collect::<Vec<_>>();
-
-    let pushable_to_left = filters
-        .iter()
-        .filter(|(_, (columns, left, _))| left.len() == columns.len())
-        .map(|((_, b), _)| *b)
-        .collect();
-    let pushable_to_right = filters
-        .iter()
-        .filter(|(_, (columns, _, right))| right.len() == columns.len())
-        .map(|((_, b), _)| *b)
-        .collect();
-    let keep = filters
-        .iter()
-        .filter(|(_, (columns, left, right))| {
-            // predicates whose columns are not in only one side of the join need to remain
-            let all_in_left = left.len() == columns.len();
-            let all_in_right = right.len() == columns.len();
-            !all_in_left && !all_in_right
-        })
-        .map(|((ref a, ref b), _)| (a, b))
-        .unzip();
-    (pushable_to_left, pushable_to_right, keep)
-}
-
-/// Optimizes the plan
-fn push_down(state: &State, plan: &LogicalPlan) -> Result<LogicalPlan> {
-    let new_inputs = plan
-        .inputs()
-        .iter()
-        .map(|input| optimize(input, state.clone()))
-        .collect::<Result<Vec<_>>>()?;
-
-    let expr = plan.expressions();
-    utils::from_plan(&plan, &expr, &new_inputs)
-}
-
-/// returns a new [LogicalPlan] that wraps `plan` in a [LogicalPlan::Filter] with
-/// its predicate be all `predicates` ANDed.
-fn add_filter(plan: LogicalPlan, predicates: &[&Expr]) -> LogicalPlan {
-    // reduce filters to a single filter with an AND
-    let predicate = predicates
-        .iter()
-        .skip(1)
-        .fold(predicates[0].clone(), |acc, predicate| {
-            and(acc, (*predicate).to_owned())
-        });
-
-    LogicalPlan::Filter {
-        predicate,
-        input: Arc::new(plan),
-    }
-}
-
-// remove all filters from `filters` that are in `predicate_columns`
-fn remove_filters(
-    filters: &[(Expr, HashSet<String>)],
-    predicate_columns: &[&HashSet<String>],
-) -> Vec<(Expr, HashSet<String>)> {
-    filters
-        .iter()
-        .filter(|(_, columns)| !predicate_columns.contains(&columns))
-        .cloned()
-        .collect::<Vec<_>>()
-}
-
-// keeps all filters from `filters` that are in `predicate_columns`
-fn keep_filters(
-    filters: &[(Expr, HashSet<String>)],
-    predicate_columns: &[&HashSet<String>],
-) -> Vec<(Expr, HashSet<String>)> {
-    filters
-        .iter()
-        .filter(|(_, columns)| predicate_columns.contains(&columns))
-        .cloned()
-        .collect::<Vec<_>>()
-}
-
-/// builds a new [LogicalPlan] from `plan` by issuing new [LogicalPlan::Filter] if any of the filters
-/// in `state` depend on the columns `used_columns`.
-fn issue_filters(
-    mut state: State,
-    used_columns: HashSet<String>,
-    plan: &LogicalPlan,
-) -> Result<LogicalPlan> {
-    let (predicates, predicate_columns) = get_predicates(&state, &used_columns);
-
-    if predicates.is_empty() {
-        // all filters can be pushed down => optimize inputs and return new plan
-        return push_down(&state, plan);
-    }
-
-    let plan = add_filter(plan.clone(), &predicates);
-
-    state.filters = remove_filters(&state.filters, &predicate_columns);
-
-    // continue optimization over all input nodes by cloning the current state (i.e. each node is independent)
-    push_down(&state, &plan)
-}
-
-/// converts "A AND B AND C" => [A, B, C]
-fn split_members<'a>(predicate: &'a Expr, predicates: &mut Vec<&'a Expr>) {
-    match predicate {
-        Expr::BinaryExpr {
-            right,
-            op: Operator::And,
-            left,
-        } => {
-            split_members(&left, predicates);
-            split_members(&right, predicates);
-        }
-        other => predicates.push(other),
-    }
-}
-
-fn optimize(plan: &LogicalPlan, mut state: State) -> Result<LogicalPlan> {
-    match plan {
-        LogicalPlan::Filter { input, predicate } => {
-            let mut predicates = vec![];
-            split_members(predicate, &mut predicates);
-
-            predicates
-                .into_iter()
-                .try_for_each::<_, Result<()>>(|predicate| {
-                    let mut columns: HashSet<String> = HashSet::new();
-                    utils::expr_to_column_names(predicate, &mut columns)?;
-                    // collect the predicate
-                    state.filters.push((predicate.clone(), columns));
-                    Ok(())
-                })?;
-
-            optimize(input, state)
-        }
-        LogicalPlan::Projection {
-            input,
-            expr,
-            schema,
-        } => {
-            // A projection is filter-commutable, but re-writes all predicate expressions
-            // collect projection.
-            let mut projection = HashMap::new();
-            schema.fields().iter().enumerate().for_each(|(i, field)| {
-                // strip alias, as they should not be part of filters
-                let expr = match &expr[i] {
-                    Expr::Alias(expr, _) => expr.as_ref().clone(),
-                    expr => expr.clone(),
-                };
-
-                projection.insert(field.name().clone(), expr);
-            });
-
-            // re-write all filters based on this projection
-            // E.g. in `Filter: #b\n  Projection: #a > 1 as b`, we can swap them, but the filter must be "#a > 1"
-            for (predicate, columns) in state.filters.iter_mut() {
-                *predicate = rewrite(predicate, &projection)?;
-
-                columns.clear();
-                utils::expr_to_column_names(predicate, columns)?;
-            }
-
-            // optimize inner
-            let new_input = optimize(input, state)?;
-
-            utils::from_plan(&plan, &expr, &[new_input])
-        }
-        LogicalPlan::Aggregate {
-            input, aggr_expr, ..
-        } => {
-            // An aggregate's aggreagate columns are _not_ filter-commutable => collect these:
-            // * columns whose aggregation expression depends on
-            // * the aggregation columns themselves
-
-            // construct set of columns that `aggr_expr` depends on
-            let mut used_columns = HashSet::new();
-            utils::exprlist_to_column_names(aggr_expr, &mut used_columns)?;
-
-            let agg_columns = aggr_expr
-                .iter()
-                .map(|x| x.name(input.schema()))
-                .collect::<Result<HashSet<_>>>()?;
-            used_columns.extend(agg_columns);
-
-            issue_filters(state, used_columns, plan)
-        }
-        LogicalPlan::Sort { .. } => {
-            // sort is filter-commutable
-            push_down(&state, plan)
-        }
-        LogicalPlan::Limit { input, .. } => {
-            // limit is _not_ filter-commutable => collect all columns from its input
-            let used_columns = input
-                .schema()
-                .fields()
-                .iter()
-                .map(|f| f.name().clone())
-                .collect::<HashSet<_>>();
-            issue_filters(state, used_columns, plan)
-        }
-        LogicalPlan::Join { left, right, .. } => {
-            let (pushable_to_left, pushable_to_right, keep) =
-                get_join_predicates(&state, &left.schema(), &right.schema());
-
-            let mut left_state = state.clone();
-            left_state.filters = keep_filters(&left_state.filters, &pushable_to_left);
-            let left = optimize(left, left_state)?;
-
-            let mut right_state = state.clone();
-            right_state.filters = keep_filters(&right_state.filters, &pushable_to_right);
-            let right = optimize(right, right_state)?;
-
-            // create a new Join with the new `left` and `right`
-            let expr = plan.expressions();
-            let plan = utils::from_plan(&plan, &expr, &[left, right])?;
-
-            if keep.0.is_empty() {
-                Ok(plan)
-            } else {
-                // wrap the join on the filter whose predicates must be kept
-                let plan = add_filter(plan, &keep.0);
-                state.filters = remove_filters(&state.filters, &keep.1);
-
-                Ok(plan)
-            }
-        }
-        LogicalPlan::TableScan {
-            source,
-            projected_schema,
-            filters,
-            projection,
-            table_name,
-            limit,
-        } => {
-            let mut used_columns = HashSet::new();
-            let mut new_filters = filters.clone();
-
-            for (filter_expr, cols) in &state.filters {
-                let (preserve_filter_node, add_to_provider) =
-                    match source.supports_filter_pushdown(filter_expr)? {
-                        TableProviderFilterPushDown::Unsupported => (true, false),
-                        TableProviderFilterPushDown::Inexact => (true, true),
-                        TableProviderFilterPushDown::Exact => (false, true),
-                    };
-
-                if preserve_filter_node {
-                    used_columns.extend(cols.clone());
-                }
-
-                if add_to_provider {
-                    new_filters.push(filter_expr.clone());
-                }
-            }
-
-            issue_filters(
-                state,
-                used_columns,
-                &LogicalPlan::TableScan {
-                    source: source.clone(),
-                    projection: projection.clone(),
-                    projected_schema: projected_schema.clone(),
-                    table_name: table_name.clone(),
-                    filters: new_filters,
-                    limit: *limit,
-                },
-            )
-        }
-        _ => {
-            // all other plans are _not_ filter-commutable
-            let used_columns = plan
-                .schema()
-                .fields()
-                .iter()
-                .map(|f| f.name().clone())
-                .collect::<HashSet<_>>();
-            issue_filters(state, used_columns, plan)
-        }
-    }
-}
-
-impl OptimizerRule for FilterPushDown {
-    fn name(&self) -> &str {
-        "filter_push_down"
-    }
-
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        optimize(plan, State::default())
-    }
-}
-
-impl FilterPushDown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-/// replaces columns by its name on the projection.
-fn rewrite(expr: &Expr, projection: &HashMap<String, Expr>) -> Result<Expr> {
-    let expressions = utils::expr_sub_expressions(&expr)?;
-
-    let expressions = expressions
-        .iter()
-        .map(|e| rewrite(e, &projection))
-        .collect::<Result<Vec<_>>>()?;
-
-    if let Expr::Column(name) = expr {
-        if let Some(expr) = projection.get(name) {
-            return Ok(expr.clone());
-        }
-    }
-
-    utils::rewrite_expression(&expr, &expressions)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datasource::datasource::Statistics;
-    use crate::datasource::TableProvider;
-    use crate::logical_plan::{lit, sum, DFSchema, Expr, LogicalPlanBuilder, Operator};
-    use crate::physical_plan::ExecutionPlan;
-    use crate::test::*;
-    use crate::{logical_plan::col, prelude::JoinType};
-    use arrow::datatypes::SchemaRef;
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let rule = FilterPushDown::new();
-        let optimized_plan = rule.optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    #[test]
-    fn filter_before_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter is before projection
-        let expected = "\
-            Projection: #a, #b\
-            \n  Filter: #a Eq Int64(1)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_after_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .limit(10)?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter is before single projection
-        let expected = "\
-            Filter: #a Eq Int64(1)\
-            \n  Limit: 10\
-            \n    Projection: #a, #b\
-            \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_jump_2_plans() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .project(vec![col("c"), col("b")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter is before double projection
-        let expected = "\
-            Projection: #c, #b\
-            \n  Projection: #a, #b, #c\
-            \n    Filter: #a Eq Int64(1)\
-            \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_move_agg() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], vec![sum(col("b")).alias("total_salary")])?
-            .filter(col("a").gt(lit(10i64)))?
-            .build()?;
-        // filter of key aggregation is commutative
-        let expected = "\
-            Aggregate: groupBy=[[#a]], aggr=[[SUM(#b) AS total_salary]]\
-            \n  Filter: #a Gt Int64(10)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_keep_agg() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], vec![sum(col("b")).alias("b")])?
-            .filter(col("b").gt(lit(10i64)))?
-            .build()?;
-        // filter of aggregate is after aggregation since they are non-commutative
-        let expected = "\
-            Filter: #b Gt Int64(10)\
-            \n  Aggregate: groupBy=[[#a]], aggr=[[SUM(#b) AS b]]\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that a filter is pushed to before a projection, the filter expression is correctly re-written
-    #[test]
-    fn alias() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .filter(col("b").eq(lit(1i64)))?
-            .build()?;
-        // filter is before projection
-        let expected = "\
-            Projection: #a AS b, #c\
-            \n  Filter: #a Eq Int64(1)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    fn add(left: Expr, right: Expr) -> Expr {
-        Expr::BinaryExpr {
-            left: Box::new(left),
-            op: Operator::Plus,
-            right: Box::new(right),
-        }
-    }
-
-    fn multiply(left: Expr, right: Expr) -> Expr {
-        Expr::BinaryExpr {
-            left: Box::new(left),
-            op: Operator::Multiply,
-            right: Box::new(right),
-        }
-    }
-
-    /// verifies that a filter is pushed to before a projection with a complex expression, the filter expression is correctly re-written
-    #[test]
-    fn complex_expression() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![
-                add(multiply(col("a"), lit(2)), col("c")).alias("b"),
-                col("c"),
-            ])?
-            .filter(col("b").eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #b Eq Int64(1)\
-            \n  Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-            \n    TableScan: test projection=None"
-        );
-
-        // filter is before projection
-        let expected = "\
-            Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-            \n  Filter: #a Multiply Int32(2) Plus #c Eq Int64(1)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that when a filter is pushed to after 2 projections, the filter expression is correctly re-written
-    #[test]
-    fn complex_plan() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![
-                add(multiply(col("a"), lit(2)), col("c")).alias("b"),
-                col("c"),
-            ])?
-            // second projection where we rename columns, just to make it difficult
-            .project(vec![multiply(col("b"), lit(3)).alias("a"), col("c")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #a Eq Int64(1)\
-            \n  Projection: #b Multiply Int32(3) AS a, #c\
-            \n    Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-            \n      TableScan: test projection=None"
-        );
-
-        // filter is before the projections
-        let expected = "\
-        Projection: #b Multiply Int32(3) AS a, #c\
-        \n  Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-        \n    Filter: #a Multiply Int32(2) Plus #c Multiply Int32(3) Eq Int64(1)\
-        \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that when two filters apply after an aggregation that only allows one to be pushed, one is pushed
-    /// and the other not.
-    #[test]
-    fn multi_filter() -> Result<()> {
-        // the aggregation allows one filter to pass (b), and the other one to not pass (SUM(c))
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .aggregate(vec![col("b")], vec![sum(col("c"))])?
-            .filter(col("b").gt(lit(10i64)))?
-            .filter(col("SUM(c)").gt(lit(10i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #SUM(c) Gt Int64(10)\
-            \n  Filter: #b Gt Int64(10)\
-            \n    Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-            \n      Projection: #a AS b, #c\
-            \n        TableScan: test projection=None"
-        );
-
-        // filter is before the projections
-        let expected = "\
-        Filter: #SUM(c) Gt Int64(10)\
-        \n  Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-        \n    Projection: #a AS b, #c\
-        \n      Filter: #a Gt Int64(10)\
-        \n        TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// verifies that when a filter with two predicates is applied after an aggregation that only allows one to be pushed, one is pushed
-    /// and the other not.
-    #[test]
-    fn split_filter() -> Result<()> {
-        // the aggregation allows one filter to pass (b), and the other one to not pass (SUM(c))
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .aggregate(vec![col("b")], vec![sum(col("c"))])?
-            .filter(and(
-                col("SUM(c)").gt(lit(10i64)),
-                and(col("b").gt(lit(10i64)), col("SUM(c)").lt(lit(20i64))),
-            ))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #SUM(c) Gt Int64(10) And #b Gt Int64(10) And #SUM(c) Lt Int64(20)\
-            \n  Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-            \n    Projection: #a AS b, #c\
-            \n      TableScan: test projection=None"
-        );
-
-        // filter is before the projections
-        let expected = "\
-        Filter: #SUM(c) Gt Int64(10) And #SUM(c) Lt Int64(20)\
-        \n  Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-        \n    Projection: #a AS b, #c\
-        \n      Filter: #a Gt Int64(10)\
-        \n        TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// verifies that when two limits are in place, we jump neither
-    #[test]
-    fn double_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .limit(20)?
-            .limit(10)?
-            .project(vec![col("a"), col("b")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter does not just any of the limits
-        let expected = "\
-            Projection: #a, #b\
-            \n  Filter: #a Eq Int64(1)\
-            \n    Limit: 10\
-            \n      Limit: 20\
-            \n        Projection: #a, #b\
-            \n          TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that filters with the same columns are correctly placed
-    #[test]
-    fn filter_2_breaks_limits() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .limit(1)?
-            .project(vec![col("a")])?
-            .filter(col("a").gt_eq(lit(1i64)))?
-            .build()?;
-        // Should be able to move both filters below the projections
-
-        // not part of the test
-        assert_eq!(
-            format!("{:?}", plan),
-            "Filter: #a GtEq Int64(1)\
-             \n  Projection: #a\
-             \n    Limit: 1\
-             \n      Filter: #a LtEq Int64(1)\
-             \n        Projection: #a\
-             \n          TableScan: test projection=None"
-        );
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: #a GtEq Int64(1)\
-        \n    Limit: 1\
-        \n      Projection: #a\
-        \n        Filter: #a LtEq Int64(1)\
-        \n          TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that filters to be placed on the same depth are ANDed
-    #[test]
-    fn two_filters_on_same_depth() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .limit(1)?
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .filter(col("a").gt_eq(lit(1i64)))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        // not part of the test
-        assert_eq!(
-            format!("{:?}", plan),
-            "Projection: #a\
-            \n  Filter: #a GtEq Int64(1)\
-            \n    Filter: #a LtEq Int64(1)\
-            \n      Limit: 1\
-            \n        TableScan: test projection=None"
-        );
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: #a GtEq Int64(1) And #a LtEq Int64(1)\
-        \n    Limit: 1\
-        \n      TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that filters on a plan with user nodes are not lost
-    /// (ARROW-10547)
-    #[test]
-    fn filters_user_defined_node() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .build()?;
-
-        let plan = crate::test::user_defined::new(plan);
-
-        let expected = "\
-            TestUserDefined\
-             \n  Filter: #a LtEq Int64(1)\
-             \n    TableScan: test projection=None";
-
-        // not part of the test
-        assert_eq!(format!("{:?}", plan), expected);
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// post-join predicates on a column common to both sides is pushed to both sides
-    #[test]
-    fn filter_join_on_common_independent() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(&table_scan).build()?;
-        let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
-            .build()?;
-        let plan = LogicalPlanBuilder::from(&left)
-            .join(&right, JoinType::Inner, &["a"], &["a"])?
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #a LtEq Int64(1)\
-            \n  Join: a = a\
-            \n    TableScan: test projection=None\
-            \n    Projection: #a\
-            \n      TableScan: test projection=None"
-        );
-
-        // filter sent to side before the join
-        let expected = "\
-        Join: a = a\
-        \n  Filter: #a LtEq Int64(1)\
-        \n    TableScan: test projection=None\
-        \n  Projection: #a\
-        \n    Filter: #a LtEq Int64(1)\
-        \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// post-join predicates with columns from both sides are not pushed
-    #[test]
-    fn filter_join_on_common_dependent() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c")])?
-            .build()?;
-        let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .build()?;
-        let plan = LogicalPlanBuilder::from(&left)
-            .join(&right, JoinType::Inner, &["a"], &["a"])?
-            // "b" and "c" are not shared by either side: they are only available together after the join
-            .filter(col("c").lt_eq(col("b")))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #c LtEq #b\
-            \n  Join: a = a\
-            \n    Projection: #a, #c\
-            \n      TableScan: test projection=None\
-            \n    Projection: #a, #b\
-            \n      TableScan: test projection=None"
-        );
-
-        // expected is equal: no push-down
-        let expected = &format!("{:?}", plan);
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// post-join predicates with columns from one side of a join are pushed only to that side
-    #[test]
-    fn filter_join_on_one_side() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .build()?;
-        let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c")])?
-            .build()?;
-        let plan = LogicalPlanBuilder::from(&left)
-            .join(&right, JoinType::Inner, &["a"], &["a"])?
-            .filter(col("b").lt_eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #b LtEq Int64(1)\
-            \n  Join: a = a\
-            \n    Projection: #a, #b\
-            \n      TableScan: test projection=None\
-            \n    Projection: #a, #c\
-            \n      TableScan: test projection=None"
-        );
-
-        let expected = "\
-        Join: a = a\
-        \n  Projection: #a, #b\
-        \n    Filter: #b LtEq Int64(1)\
-        \n      TableScan: test projection=None\
-        \n  Projection: #a, #c\
-        \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    struct PushDownProvider {
-        pub filter_support: TableProviderFilterPushDown,
-    }
-
-    impl TableProvider for PushDownProvider {
-        fn schema(&self) -> SchemaRef {
-            Arc::new(arrow::datatypes::Schema::new(vec![
-                arrow::datatypes::Field::new(
-                    "a",
-                    arrow::datatypes::DataType::Int32,
-                    true,
-                ),
-            ]))
-        }
-
-        fn scan(
-            &self,
-            _: &Option<Vec<usize>>,
-            _: usize,
-            _: &[Expr],
-            _: Option<usize>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!()
-        }
-
-        fn supports_filter_pushdown(
-            &self,
-            _: &Expr,
-        ) -> Result<TableProviderFilterPushDown> {
-            Ok(self.filter_support.clone())
-        }
-
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        fn statistics(&self) -> Statistics {
-            Statistics::default()
-        }
-    }
-
-    fn table_scan_with_pushdown_provider(
-        filter_support: TableProviderFilterPushDown,
-    ) -> Result<LogicalPlan> {
-        let test_provider = PushDownProvider { filter_support };
-
-        let table_scan = LogicalPlan::TableScan {
-            table_name: "".into(),
-            filters: vec![],
-            projected_schema: Arc::new(DFSchema::try_from_qualified(
-                "",
-                &*test_provider.schema(),
-            )?),
-            projection: None,
-            source: Arc::new(test_provider),
-            limit: None,
-        };
-
-        LogicalPlanBuilder::from(&table_scan)
-            .filter(col("a").eq(lit(1i64)))?
-            .build()
-    }
-
-    #[test]
-    fn filter_with_table_provider_exact() -> Result<()> {
-        let plan = table_scan_with_pushdown_provider(TableProviderFilterPushDown::Exact)?;
-
-        let expected = "\
-        TableScan: projection=None, filters=[#a Eq Int64(1)]";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_with_table_provider_inexact() -> Result<()> {
-        let plan =
-            table_scan_with_pushdown_provider(TableProviderFilterPushDown::Inexact)?;
-
-        let expected = "\
-        Filter: #a Eq Int64(1)\
-        \n  TableScan: projection=None, filters=[#a Eq Int64(1)]";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_with_table_provider_unsupported() -> Result<()> {
-        let plan =
-            table_scan_with_pushdown_provider(TableProviderFilterPushDown::Unsupported)?;
-
-        let expected = "\
-        Filter: #a Eq Int64(1)\
-        \n  TableScan: projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/optimizer/hash_build_probe_order.rs b/rust/datafusion/src/optimizer/hash_build_probe_order.rs
deleted file mode 100644
index f44050f0b72..00000000000
--- a/rust/datafusion/src/optimizer/hash_build_probe_order.rs
+++ /dev/null
@@ -1,257 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License
-
-//! Optimizer rule to switch build and probe order of hash join
-//! based on statistics of a `TableProvider`. If the number of
-//! rows of both sources is known, the order can be switched
-//! for a faster hash join.
-
-use std::sync::Arc;
-
-use crate::logical_plan::LogicalPlan;
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::{error::Result, prelude::JoinType};
-
-use super::utils;
-
-/// BuildProbeOrder reorders the build and probe phase of
-/// hash joins. This uses the amount of rows that a datasource has.
-/// The rule optimizes the order such that the left (build) side of the join
-/// is the smallest.
-/// If the information is not available, the order stays the same,
-/// so that it could be optimized manually in a query.
-pub struct HashBuildProbeOrder {}
-
-// Gets exact number of rows, if known by the statistics of the underlying
-fn get_num_rows(logical_plan: &LogicalPlan) -> Option<usize> {
-    match logical_plan {
-        LogicalPlan::TableScan { source, .. } => source.statistics().num_rows,
-        LogicalPlan::EmptyRelation {
-            produce_one_row, ..
-        } => {
-            if *produce_one_row {
-                Some(1)
-            } else {
-                Some(0)
-            }
-        }
-        LogicalPlan::Limit { n: limit, input } => {
-            let num_rows_input = get_num_rows(input);
-            num_rows_input.map(|rows| std::cmp::min(*limit, rows))
-        }
-        LogicalPlan::Aggregate { .. } => {
-            // we cannot yet predict how many rows will be produced by an aggregate because
-            // we do not know the cardinality of the grouping keys
-            None
-        }
-        LogicalPlan::Filter { .. } => {
-            // we cannot yet predict how many rows will be produced by a filter because
-            // we don't know how selective it is (how many rows it will filter out)
-            None
-        }
-        LogicalPlan::Join { .. } => {
-            // we cannot predict the cardinality of the join output
-            None
-        }
-        LogicalPlan::Repartition { .. } => {
-            // we cannot predict how rows will be repartitioned
-            None
-        }
-        // the following operators are special cases and not querying data
-        LogicalPlan::CreateExternalTable { .. } => None,
-        LogicalPlan::Explain { .. } => None,
-        // we do not support estimating rows with extensions yet
-        LogicalPlan::Extension { .. } => None,
-        // the following operators do not modify row count in any way
-        LogicalPlan::Projection { input, .. } => get_num_rows(input),
-        LogicalPlan::Sort { input, .. } => get_num_rows(input),
-        // Add number of rows of below plans
-        LogicalPlan::Union { inputs, .. } => {
-            inputs.iter().map(|plan| get_num_rows(plan)).sum()
-        }
-    }
-}
-
-// Finds out whether to swap left vs right order based on statistics
-fn should_swap_join_order(left: &LogicalPlan, right: &LogicalPlan) -> bool {
-    let left_rows = get_num_rows(left);
-    let right_rows = get_num_rows(right);
-
-    match (left_rows, right_rows) {
-        (Some(l), Some(r)) => l > r,
-        _ => false,
-    }
-}
-
-impl OptimizerRule for HashBuildProbeOrder {
-    fn name(&self) -> &str {
-        "hash_build_probe_order"
-    }
-
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        match plan {
-            // Main optimization rule, swaps order of left and right
-            // based on number of rows in each table
-            LogicalPlan::Join {
-                left,
-                right,
-                on,
-                join_type,
-                schema,
-            } => {
-                let left = self.optimize(left)?;
-                let right = self.optimize(right)?;
-                if should_swap_join_order(&left, &right) {
-                    // Swap left and right, change join type and (equi-)join key order
-                    Ok(LogicalPlan::Join {
-                        left: Arc::new(right),
-                        right: Arc::new(left),
-                        on: on
-                            .iter()
-                            .map(|(l, r)| (r.to_string(), l.to_string()))
-                            .collect(),
-                        join_type: swap_join_type(*join_type),
-                        schema: schema.clone(),
-                    })
-                } else {
-                    // Keep join as is
-                    Ok(LogicalPlan::Join {
-                        left: Arc::new(left),
-                        right: Arc::new(right),
-                        on: on.clone(),
-                        join_type: *join_type,
-                        schema: schema.clone(),
-                    })
-                }
-            }
-            // Rest: recurse into plan, apply optimization where possible
-            LogicalPlan::Projection { .. }
-            | LogicalPlan::Aggregate { .. }
-            | LogicalPlan::TableScan { .. }
-            | LogicalPlan::Limit { .. }
-            | LogicalPlan::Filter { .. }
-            | LogicalPlan::Repartition { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::Sort { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. }
-            | LogicalPlan::Union { .. }
-            | LogicalPlan::Extension { .. } => {
-                let expr = plan.expressions();
-
-                // apply the optimization to all inputs of the plan
-                let inputs = plan.inputs();
-                let new_inputs = inputs
-                    .iter()
-                    .map(|plan| self.optimize(plan))
-                    .collect::<Result<Vec<_>>>()?;
-
-                utils::from_plan(plan, &expr, &new_inputs)
-            }
-        }
-    }
-}
-
-impl HashBuildProbeOrder {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn swap_join_type(join_type: JoinType) -> JoinType {
-    match join_type {
-        JoinType::Inner => JoinType::Inner,
-        JoinType::Left => JoinType::Right,
-        JoinType::Right => JoinType::Left,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::sync::Arc;
-
-    use crate::{
-        datasource::{datasource::Statistics, TableProvider},
-        logical_plan::{DFSchema, Expr},
-        test::*,
-    };
-
-    struct TestTableProvider {
-        num_rows: usize,
-    }
-
-    impl TableProvider for TestTableProvider {
-        fn as_any(&self) -> &dyn std::any::Any {
-            unimplemented!()
-        }
-        fn schema(&self) -> arrow::datatypes::SchemaRef {
-            unimplemented!()
-        }
-
-        fn scan(
-            &self,
-            _projection: &Option<Vec<usize>>,
-            _batch_size: usize,
-            _filters: &[Expr],
-            _limit: Option<usize>,
-        ) -> Result<std::sync::Arc<dyn crate::physical_plan::ExecutionPlan>> {
-            unimplemented!()
-        }
-        fn statistics(&self) -> crate::datasource::datasource::Statistics {
-            Statistics {
-                num_rows: Some(self.num_rows),
-                total_byte_size: None,
-                column_statistics: None,
-            }
-        }
-    }
-
-    #[test]
-    fn test_num_rows() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        assert_eq!(get_num_rows(&table_scan), Some(0));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_swap_order() {
-        let lp_left = LogicalPlan::TableScan {
-            table_name: "left".to_string(),
-            projection: None,
-            source: Arc::new(TestTableProvider { num_rows: 1000 }),
-            projected_schema: Arc::new(DFSchema::empty()),
-            filters: vec![],
-            limit: None,
-        };
-
-        let lp_right = LogicalPlan::TableScan {
-            table_name: "right".to_string(),
-            projection: None,
-            source: Arc::new(TestTableProvider { num_rows: 100 }),
-            projected_schema: Arc::new(DFSchema::empty()),
-            filters: vec![],
-            limit: None,
-        };
-
-        assert!(should_swap_join_order(&lp_left, &lp_right));
-        assert!(!should_swap_join_order(&lp_right, &lp_left));
-    }
-}
diff --git a/rust/datafusion/src/optimizer/limit_push_down.rs b/rust/datafusion/src/optimizer/limit_push_down.rs
deleted file mode 100644
index 73a231f2248..00000000000
--- a/rust/datafusion/src/optimizer/limit_push_down.rs
+++ /dev/null
@@ -1,252 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Optimizer rule to push down LIMIT in the query plan
-//! It will push down through projection, limits (taking the smaller limit)
-use std::sync::Arc;
-
-use super::utils;
-use crate::error::Result;
-use crate::logical_plan::LogicalPlan;
-use crate::optimizer::optimizer::OptimizerRule;
-
-/// Optimization rule that tries pushes down LIMIT n
-/// where applicable to reduce the amount of scanned / processed data
-pub struct LimitPushDown {}
-
-impl LimitPushDown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn limit_push_down(
-    upper_limit: Option<usize>,
-    plan: &LogicalPlan,
-) -> Result<LogicalPlan> {
-    match (plan, upper_limit) {
-        (LogicalPlan::Limit { n, input }, upper_limit) => {
-            let smallest = upper_limit.map(|x| std::cmp::min(x, *n)).unwrap_or(*n);
-            Ok(LogicalPlan::Limit {
-                n: smallest,
-                // push down limit to plan (minimum of upper limit and current limit)
-                input: Arc::new(limit_push_down(Some(smallest), input.as_ref())?),
-            })
-        }
-        (
-            LogicalPlan::TableScan {
-                table_name,
-                source,
-                projection,
-                filters,
-                limit,
-                projected_schema,
-            },
-            Some(upper_limit),
-        ) => Ok(LogicalPlan::TableScan {
-            table_name: table_name.clone(),
-            source: source.clone(),
-            projection: projection.clone(),
-            filters: filters.clone(),
-            limit: limit
-                .map(|x| std::cmp::min(x, upper_limit))
-                .or(Some(upper_limit)),
-            projected_schema: projected_schema.clone(),
-        }),
-        (
-            LogicalPlan::Projection {
-                expr,
-                input,
-                schema,
-            },
-            upper_limit,
-        ) => {
-            // Push down limit directly (projection doesn't change number of rows)
-            Ok(LogicalPlan::Projection {
-                expr: expr.clone(),
-                input: Arc::new(limit_push_down(upper_limit, input.as_ref())?),
-                schema: schema.clone(),
-            })
-        }
-        (
-            LogicalPlan::Union {
-                inputs,
-                alias,
-                schema,
-            },
-            Some(upper_limit),
-        ) => {
-            // Push down limit through UNION
-            let new_inputs = inputs
-                .iter()
-                .map(|x| {
-                    Ok(LogicalPlan::Limit {
-                        n: upper_limit,
-                        input: Arc::new(limit_push_down(Some(upper_limit), x)?),
-                    })
-                })
-                .collect::<Result<_>>()?;
-            Ok(LogicalPlan::Union {
-                inputs: new_inputs,
-                alias: alias.clone(),
-                schema: schema.clone(),
-            })
-        }
-        // For other nodes we can't push down the limit
-        // But try to recurse and find other limit nodes to push down
-        _ => {
-            let expr = plan.expressions();
-
-            // apply the optimization to all inputs of the plan
-            let inputs = plan.inputs();
-            let new_inputs = inputs
-                .iter()
-                .map(|plan| limit_push_down(None, plan))
-                .collect::<Result<Vec<_>>>()?;
-
-            utils::from_plan(plan, &expr, &new_inputs)
-        }
-    }
-}
-
-impl OptimizerRule for LimitPushDown {
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        limit_push_down(None, plan)
-    }
-
-    fn name(&self) -> &str {
-        "limit_push_down"
-    }
-}
-#[cfg(test)]
-mod test {
-    use super::*;
-    use crate::{
-        logical_plan::{col, max, LogicalPlan, LogicalPlanBuilder},
-        test::*,
-    };
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let rule = LimitPushDown::new();
-        let optimized_plan = rule.optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    #[test]
-    fn limit_pushdown_projection_table_provider() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
-            .limit(1000)?
-            .build()?;
-
-        // Should push the limit down to table provider
-        // When it has a select
-        let expected = "Limit: 1000\
-        \n  Projection: #a\
-        \n    TableScan: test projection=None, limit=1000";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-    #[test]
-    fn limit_push_down_take_smaller_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .limit(1000)?
-            .limit(10)?
-            .build()?;
-
-        // Should push down the smallest limit
-        // Towards table scan
-        // This rule doesn't replace multiple limits
-        let expected = "Limit: 10\
-        \n  Limit: 10\
-        \n    TableScan: test projection=None, limit=10";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn limit_doesnt_push_down_aggregation() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], vec![max(col("b"))])?
-            .limit(1000)?
-            .build()?;
-
-        // Limit should *not* push down aggregate node
-        let expected = "Limit: 1000\
-        \n  Aggregate: groupBy=[[#a]], aggr=[[MAX(#b)]]\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn limit_should_push_down_union() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .union(LogicalPlanBuilder::from(&table_scan).build()?)?
-            .limit(1000)?
-            .build()?;
-
-        // Limit should push down through union
-        let expected = "Limit: 1000\
-        \n  Union\
-        \n    Limit: 1000\
-        \n      TableScan: test projection=None, limit=1000\
-        \n    Limit: 1000\
-        \n      TableScan: test projection=None, limit=1000";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn multi_stage_limit_recurses_to_deeper_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .limit(1000)?
-            .aggregate(vec![col("a")], vec![max(col("b"))])?
-            .limit(10)?
-            .build()?;
-
-        // Limit should use deeper LIMIT 1000, but Limit 10 shouldn't push down aggregation
-        let expected = "Limit: 10\
-        \n  Aggregate: groupBy=[[#a]], aggr=[[MAX(#b)]]\
-        \n    Limit: 1000\
-        \n      TableScan: test projection=None, limit=1000";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/optimizer/mod.rs b/rust/datafusion/src/optimizer/mod.rs
deleted file mode 100644
index dc59b64ff46..00000000000
--- a/rust/datafusion/src/optimizer/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains a query optimizer that operates against a logical plan and applies
-//! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion".
-
-pub mod constant_folding;
-pub mod filter_push_down;
-pub mod hash_build_probe_order;
-pub mod limit_push_down;
-pub mod optimizer;
-pub mod projection_push_down;
-pub mod utils;
diff --git a/rust/datafusion/src/optimizer/optimizer.rs b/rust/datafusion/src/optimizer/optimizer.rs
deleted file mode 100644
index dee8e06a5e3..00000000000
--- a/rust/datafusion/src/optimizer/optimizer.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Query optimizer traits
-
-use crate::error::Result;
-use crate::logical_plan::LogicalPlan;
-
-/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
-/// computes the same results, but in a potentially more efficient
-/// way.
-pub trait OptimizerRule {
-    /// Rewrite `plan` to an optimized form
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>;
-
-    /// A human readable name for this optimizer rule
-    fn name(&self) -> &str;
-}
diff --git a/rust/datafusion/src/optimizer/projection_push_down.rs b/rust/datafusion/src/optimizer/projection_push_down.rs
deleted file mode 100644
index 6b1cdfe18ca..00000000000
--- a/rust/datafusion/src/optimizer/projection_push_down.rs
+++ /dev/null
@@ -1,542 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Projection Push Down optimizer rule ensures that only referenced columns are
-//! loaded into memory
-
-use crate::error::Result;
-use crate::logical_plan::{DFField, DFSchema, DFSchemaRef, LogicalPlan, ToDFSchema};
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::utils;
-use arrow::datatypes::Schema;
-use arrow::error::Result as ArrowResult;
-use std::{collections::HashSet, sync::Arc};
-use utils::optimize_explain;
-
-/// Optimizer that removes unused projections and aggregations from plans
-/// This reduces both scans and
-pub struct ProjectionPushDown {}
-
-impl OptimizerRule for ProjectionPushDown {
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        // set of all columns refered by the plan (and thus considered required by the root)
-        let required_columns = plan
-            .schema()
-            .fields()
-            .iter()
-            .map(|f| f.name().clone())
-            .collect::<HashSet<String>>();
-        optimize_plan(self, plan, &required_columns, false)
-    }
-
-    fn name(&self) -> &str {
-        "projection_push_down"
-    }
-}
-
-impl ProjectionPushDown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn get_projected_schema(
-    schema: &Schema,
-    required_columns: &HashSet<String>,
-    has_projection: bool,
-) -> Result<(Vec<usize>, DFSchemaRef)> {
-    // once we reach the table scan, we can use the accumulated set of column
-    // names to construct the set of column indexes in the scan
-    //
-    // we discard non-existing columns because some column names are not part of the schema,
-    // e.g. when the column derives from an aggregation
-    let mut projection: Vec<usize> = required_columns
-        .iter()
-        .map(|name| schema.index_of(name))
-        .filter_map(ArrowResult::ok)
-        .collect();
-
-    if projection.is_empty() {
-        if has_projection {
-            // Ensure that we are reading at least one column from the table in case the query
-            // does not reference any columns directly such as "SELECT COUNT(1) FROM table"
-            projection.push(0);
-        } else {
-            // for table scan without projection, we default to return all columns
-            projection = schema
-                .fields()
-                .iter()
-                .enumerate()
-                .map(|(i, _)| i)
-                .collect::<Vec<usize>>();
-        }
-    }
-
-    // sort the projection otherwise we get non-deterministic behavior
-    projection.sort_unstable();
-
-    // create the projected schema
-    let mut projected_fields: Vec<DFField> = Vec::with_capacity(projection.len());
-    for i in &projection {
-        projected_fields.push(DFField::from(schema.fields()[*i].clone()));
-    }
-
-    Ok((projection, projected_fields.to_dfschema_ref()?))
-}
-
-/// Recursively transverses the logical plan removing expressions and that are not needed.
-fn optimize_plan(
-    optimizer: &ProjectionPushDown,
-    plan: &LogicalPlan,
-    required_columns: &HashSet<String>, // set of columns required up to this step
-    has_projection: bool,
-) -> Result<LogicalPlan> {
-    let mut new_required_columns = required_columns.clone();
-    match plan {
-        LogicalPlan::Projection {
-            input,
-            expr,
-            schema,
-        } => {
-            // projection:
-            // * remove any expression that is not required
-            // * construct the new set of required columns
-
-            let mut new_expr = Vec::new();
-            let mut new_fields = Vec::new();
-
-            // Gather all columns needed for expressions in this Projection
-            schema
-                .fields()
-                .iter()
-                .enumerate()
-                .try_for_each(|(i, field)| {
-                    if required_columns.contains(field.name()) {
-                        new_expr.push(expr[i].clone());
-                        new_fields.push(field.clone());
-
-                        // gather the new set of required columns
-                        utils::expr_to_column_names(&expr[i], &mut new_required_columns)
-                    } else {
-                        Ok(())
-                    }
-                })?;
-
-            let new_input =
-                optimize_plan(optimizer, &input, &new_required_columns, true)?;
-            if new_fields.is_empty() {
-                // no need for an expression at all
-                Ok(new_input)
-            } else {
-                Ok(LogicalPlan::Projection {
-                    expr: new_expr,
-                    input: Arc::new(new_input),
-                    schema: DFSchemaRef::new(DFSchema::new(new_fields)?),
-                })
-            }
-        }
-        LogicalPlan::Join {
-            left,
-            right,
-            on,
-            join_type,
-            schema,
-        } => {
-            for (l, r) in on {
-                new_required_columns.insert(l.to_owned());
-                new_required_columns.insert(r.to_owned());
-            }
-            Ok(LogicalPlan::Join {
-                left: Arc::new(optimize_plan(
-                    optimizer,
-                    &left,
-                    &new_required_columns,
-                    true,
-                )?),
-                right: Arc::new(optimize_plan(
-                    optimizer,
-                    &right,
-                    &new_required_columns,
-                    true,
-                )?),
-
-                join_type: *join_type,
-                on: on.clone(),
-                schema: schema.clone(),
-            })
-        }
-        LogicalPlan::Aggregate {
-            schema,
-            input,
-            group_expr,
-            aggr_expr,
-            ..
-        } => {
-            // aggregate:
-            // * remove any aggregate expression that is not required
-            // * construct the new set of required columns
-
-            utils::exprlist_to_column_names(group_expr, &mut new_required_columns)?;
-
-            // Gather all columns needed for expressions in this Aggregate
-            let mut new_aggr_expr = Vec::new();
-            aggr_expr.iter().try_for_each(|expr| {
-                let name = &expr.name(&schema)?;
-
-                if required_columns.contains(name) {
-                    new_aggr_expr.push(expr.clone());
-                    new_required_columns.insert(name.clone());
-
-                    // add to the new set of required columns
-                    utils::expr_to_column_names(expr, &mut new_required_columns)
-                } else {
-                    Ok(())
-                }
-            })?;
-
-            let new_schema = DFSchema::new(
-                schema
-                    .fields()
-                    .iter()
-                    .filter(|x| new_required_columns.contains(x.name()))
-                    .cloned()
-                    .collect(),
-            )?;
-
-            Ok(LogicalPlan::Aggregate {
-                group_expr: group_expr.clone(),
-                aggr_expr: new_aggr_expr,
-                input: Arc::new(optimize_plan(
-                    optimizer,
-                    &input,
-                    &new_required_columns,
-                    true,
-                )?),
-                schema: DFSchemaRef::new(new_schema),
-            })
-        }
-        // scans:
-        // * remove un-used columns from the scan projection
-        LogicalPlan::TableScan {
-            table_name,
-            source,
-            filters,
-            limit,
-            ..
-        } => {
-            let (projection, projected_schema) =
-                get_projected_schema(&source.schema(), required_columns, has_projection)?;
-
-            // return the table scan with projection
-            Ok(LogicalPlan::TableScan {
-                table_name: table_name.to_string(),
-                source: source.clone(),
-                projection: Some(projection),
-                projected_schema,
-                filters: filters.clone(),
-                limit: *limit,
-            })
-        }
-        LogicalPlan::Explain {
-            verbose,
-            plan,
-            stringified_plans,
-            schema,
-        } => {
-            let schema = schema.as_ref().to_owned().into();
-            optimize_explain(optimizer, *verbose, &*plan, stringified_plans, &schema)
-        }
-        // all other nodes: Add any additional columns used by
-        // expressions in this node to the list of required columns
-        LogicalPlan::Limit { .. }
-        | LogicalPlan::Filter { .. }
-        | LogicalPlan::Repartition { .. }
-        | LogicalPlan::EmptyRelation { .. }
-        | LogicalPlan::Sort { .. }
-        | LogicalPlan::CreateExternalTable { .. }
-        | LogicalPlan::Union { .. }
-        | LogicalPlan::Extension { .. } => {
-            let expr = plan.expressions();
-            // collect all required columns by this plan
-            utils::exprlist_to_column_names(&expr, &mut new_required_columns)?;
-
-            // apply the optimization to all inputs of the plan
-            let inputs = plan.inputs();
-            let new_inputs = inputs
-                .iter()
-                .map(|plan| {
-                    optimize_plan(optimizer, plan, &new_required_columns, has_projection)
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            utils::from_plan(plan, &expr, &new_inputs)
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::logical_plan::{col, lit};
-    use crate::logical_plan::{max, min, Expr, LogicalPlanBuilder};
-    use crate::test::*;
-    use arrow::datatypes::DataType;
-
-    #[test]
-    fn aggregate_no_group_by() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![], vec![max(col("b"))])?
-            .build()?;
-
-        let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
-        \n  TableScan: test projection=Some([1])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate_group_by() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("c")], vec![max(col("b"))])?
-            .build()?;
-
-        let expected = "Aggregate: groupBy=[[#c]], aggr=[[MAX(#b)]]\
-        \n  TableScan: test projection=Some([1, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate_no_group_by_with_filter() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("c"))?
-            .aggregate(vec![], vec![max(col("b"))])?
-            .build()?;
-
-        let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
-        \n  Filter: #c\
-        \n    TableScan: test projection=Some([1, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn cast() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let projection = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![Expr::Cast {
-                expr: Box::new(col("c")),
-                data_type: DataType::Float64,
-            }])?
-            .build()?;
-
-        let expected = "Projection: CAST(#c AS Float64)\
-        \n  TableScan: test projection=Some([2])";
-
-        assert_optimized_plan_eq(&projection, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn table_scan_projected_schema() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["a", "b"]);
-
-        let expected = "Projection: #a, #b\
-        \n  TableScan: test projection=Some([0, 1])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn table_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("c"), col("a")])?
-            .limit(5)?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["c", "a"]);
-
-        let expected = "Limit: 5\
-        \n  Projection: #c, #a\
-        \n    TableScan: test projection=Some([0, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn table_scan_without_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan).build()?;
-        // should expand projection to all columns without projection
-        let expected = "TableScan: test projection=Some([0, 1, 2])";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn table_scan_with_literal_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![lit(1_i64), lit(2_i64)])?
-            .build()?;
-        let expected = "Projection: Int64(1), Int64(2)\
-                      \n  TableScan: test projection=Some([0])";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// tests that it removes unused columns in projections
-    #[test]
-    fn table_unused_column() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        // we never use "b" in the first projection => remove it
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("c"), col("a"), col("b")])?
-            .filter(col("c").gt(lit(1)))?
-            .aggregate(vec![col("c")], vec![max(col("a"))])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["c", "MAX(a)"]);
-
-        let expected = "\
-        Aggregate: groupBy=[[#c]], aggr=[[MAX(#a)]]\
-        \n  Filter: #c Gt Int32(1)\
-        \n    Projection: #c, #a\
-        \n      TableScan: test projection=Some([0, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// tests that it removes un-needed projections
-    #[test]
-    fn table_unused_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        // there is no need for the first projection
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("b")])?
-            .project(vec![lit(1).alias("a")])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["a"]);
-
-        let expected = "\
-        Projection: Int32(1) AS a\
-        \n  TableScan: test projection=Some([0])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// tests that optimizing twice yields same plan
-    #[test]
-    fn test_double_optimization() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("b")])?
-            .project(vec![lit(1).alias("a")])?
-            .build()?;
-
-        let optimized_plan1 = optimize(&plan).expect("failed to optimize plan");
-        let optimized_plan2 =
-            optimize(&optimized_plan1).expect("failed to optimize plan");
-
-        let formatted_plan1 = format!("{:?}", optimized_plan1);
-        let formatted_plan2 = format!("{:?}", optimized_plan2);
-        assert_eq!(formatted_plan1, formatted_plan2);
-        Ok(())
-    }
-
-    /// tests that it removes an aggregate is never used downstream
-    #[test]
-    fn table_unused_aggregate() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        // we never use "min(b)" => remove it
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a"), col("c")], vec![max(col("b")), min(col("b"))])?
-            .filter(col("c").gt(lit(1)))?
-            .project(vec![col("c"), col("a"), col("MAX(b)")])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["c", "a", "MAX(b)"]);
-
-        let expected = "\
-        Projection: #c, #a, #MAX(b)\
-        \n  Filter: #c Gt Int32(1)\
-        \n    Aggregate: groupBy=[[#a, #c]], aggr=[[MAX(#b)]]\
-        \n      TableScan: test projection=Some([0, 1, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let optimized_plan = optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    fn optimize(plan: &LogicalPlan) -> Result<LogicalPlan> {
-        let rule = ProjectionPushDown::new();
-        rule.optimize(plan)
-    }
-}
diff --git a/rust/datafusion/src/optimizer/utils.rs b/rust/datafusion/src/optimizer/utils.rs
deleted file mode 100644
index fe1d0238191..00000000000
--- a/rust/datafusion/src/optimizer/utils.rs
+++ /dev/null
@@ -1,489 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Collection of utility functions that are leveraged by the query optimizer rules
-
-use std::{collections::HashSet, sync::Arc};
-
-use arrow::datatypes::Schema;
-
-use super::optimizer::OptimizerRule;
-use crate::logical_plan::{
-    Expr, LogicalPlan, Operator, Partitioning, PlanType, Recursion, StringifiedPlan,
-    ToDFSchema,
-};
-use crate::prelude::lit;
-use crate::scalar::ScalarValue;
-use crate::{
-    error::{DataFusionError, Result},
-    logical_plan::ExpressionVisitor,
-};
-
-const CASE_EXPR_MARKER: &str = "__DATAFUSION_CASE_EXPR__";
-const CASE_ELSE_MARKER: &str = "__DATAFUSION_CASE_ELSE__";
-
-/// Recursively walk a list of expression trees, collecting the unique set of column
-/// names referenced in the expression
-pub fn exprlist_to_column_names(
-    expr: &[Expr],
-    accum: &mut HashSet<String>,
-) -> Result<()> {
-    for e in expr {
-        expr_to_column_names(e, accum)?;
-    }
-    Ok(())
-}
-
-/// Recursively walk an expression tree, collecting the unique set of column names
-/// referenced in the expression
-struct ColumnNameVisitor<'a> {
-    accum: &'a mut HashSet<String>,
-}
-
-impl ExpressionVisitor for ColumnNameVisitor<'_> {
-    fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>> {
-        match expr {
-            Expr::Column(name) => {
-                self.accum.insert(name.clone());
-            }
-            Expr::ScalarVariable(var_names) => {
-                self.accum.insert(var_names.join("."));
-            }
-            Expr::Alias(_, _) => {}
-            Expr::Literal(_) => {}
-            Expr::BinaryExpr { .. } => {}
-            Expr::Not(_) => {}
-            Expr::IsNotNull(_) => {}
-            Expr::IsNull(_) => {}
-            Expr::Negative(_) => {}
-            Expr::Between { .. } => {}
-            Expr::Case { .. } => {}
-            Expr::Cast { .. } => {}
-            Expr::TryCast { .. } => {}
-            Expr::Sort { .. } => {}
-            Expr::ScalarFunction { .. } => {}
-            Expr::ScalarUDF { .. } => {}
-            Expr::AggregateFunction { .. } => {}
-            Expr::AggregateUDF { .. } => {}
-            Expr::InList { .. } => {}
-            Expr::Wildcard => {}
-        }
-        Ok(Recursion::Continue(self))
-    }
-}
-
-/// Recursively walk an expression tree, collecting the unique set of column names
-/// referenced in the expression
-pub fn expr_to_column_names(expr: &Expr, accum: &mut HashSet<String>) -> Result<()> {
-    expr.accept(ColumnNameVisitor { accum })?;
-    Ok(())
-}
-
-/// Create a `LogicalPlan::Explain` node by running `optimizer` on the
-/// input plan and capturing the resulting plan string
-pub fn optimize_explain(
-    optimizer: &impl OptimizerRule,
-    verbose: bool,
-    plan: &LogicalPlan,
-    stringified_plans: &[StringifiedPlan],
-    schema: &Schema,
-) -> Result<LogicalPlan> {
-    // These are the fields of LogicalPlan::Explain It might be nice
-    // to transform that enum Variant into its own struct and avoid
-    // passing the fields individually
-    let plan = Arc::new(optimizer.optimize(plan)?);
-    let mut stringified_plans = stringified_plans.to_vec();
-    let optimizer_name = optimizer.name().into();
-    stringified_plans.push(StringifiedPlan::new(
-        PlanType::OptimizedLogicalPlan { optimizer_name },
-        format!("{:#?}", plan),
-    ));
-    Ok(LogicalPlan::Explain {
-        verbose,
-        plan,
-        stringified_plans,
-        schema: schema.clone().to_dfschema_ref()?,
-    })
-}
-
-/// Convenience rule for writing optimizers: recursively invoke
-/// optimize on plan's children and then return a node of the same
-/// type. Useful for optimizer rules which want to leave the type
-/// of plan unchanged but still apply to the children.
-/// This also handles the case when the `plan` is a [`LogicalPlan::Explain`].
-pub fn optimize_children(
-    optimizer: &impl OptimizerRule,
-    plan: &LogicalPlan,
-) -> Result<LogicalPlan> {
-    if let LogicalPlan::Explain {
-        verbose,
-        plan,
-        stringified_plans,
-        schema,
-    } = plan
-    {
-        return optimize_explain(
-            optimizer,
-            *verbose,
-            &*plan,
-            stringified_plans,
-            &schema.as_ref().to_owned().into(),
-        );
-    }
-
-    let new_exprs = plan.expressions();
-    let new_inputs = plan
-        .inputs()
-        .into_iter()
-        .map(|plan| optimizer.optimize(plan))
-        .collect::<Result<Vec<_>>>()?;
-
-    from_plan(plan, &new_exprs, &new_inputs)
-}
-
-/// Returns a new logical plan based on the original one with inputs and expressions replaced
-pub fn from_plan(
-    plan: &LogicalPlan,
-    expr: &[Expr],
-    inputs: &[LogicalPlan],
-) -> Result<LogicalPlan> {
-    match plan {
-        LogicalPlan::Projection { schema, .. } => Ok(LogicalPlan::Projection {
-            expr: expr.to_vec(),
-            input: Arc::new(inputs[0].clone()),
-            schema: schema.clone(),
-        }),
-        LogicalPlan::Filter { .. } => Ok(LogicalPlan::Filter {
-            predicate: expr[0].clone(),
-            input: Arc::new(inputs[0].clone()),
-        }),
-        LogicalPlan::Repartition {
-            partitioning_scheme,
-            ..
-        } => match partitioning_scheme {
-            Partitioning::RoundRobinBatch(n) => Ok(LogicalPlan::Repartition {
-                partitioning_scheme: Partitioning::RoundRobinBatch(*n),
-                input: Arc::new(inputs[0].clone()),
-            }),
-            Partitioning::Hash(_, n) => Ok(LogicalPlan::Repartition {
-                partitioning_scheme: Partitioning::Hash(expr.to_owned(), *n),
-                input: Arc::new(inputs[0].clone()),
-            }),
-        },
-        LogicalPlan::Aggregate {
-            group_expr, schema, ..
-        } => Ok(LogicalPlan::Aggregate {
-            group_expr: expr[0..group_expr.len()].to_vec(),
-            aggr_expr: expr[group_expr.len()..].to_vec(),
-            input: Arc::new(inputs[0].clone()),
-            schema: schema.clone(),
-        }),
-        LogicalPlan::Sort { .. } => Ok(LogicalPlan::Sort {
-            expr: expr.to_vec(),
-            input: Arc::new(inputs[0].clone()),
-        }),
-        LogicalPlan::Join {
-            join_type,
-            on,
-            schema,
-            ..
-        } => Ok(LogicalPlan::Join {
-            left: Arc::new(inputs[0].clone()),
-            right: Arc::new(inputs[1].clone()),
-            join_type: *join_type,
-            on: on.clone(),
-            schema: schema.clone(),
-        }),
-        LogicalPlan::Limit { n, .. } => Ok(LogicalPlan::Limit {
-            n: *n,
-            input: Arc::new(inputs[0].clone()),
-        }),
-        LogicalPlan::Extension { node } => Ok(LogicalPlan::Extension {
-            node: node.from_template(expr, inputs),
-        }),
-        LogicalPlan::Union { schema, alias, .. } => Ok(LogicalPlan::Union {
-            inputs: inputs.to_vec(),
-            schema: schema.clone(),
-            alias: alias.clone(),
-        }),
-        LogicalPlan::EmptyRelation { .. }
-        | LogicalPlan::TableScan { .. }
-        | LogicalPlan::CreateExternalTable { .. }
-        | LogicalPlan::Explain { .. } => Ok(plan.clone()),
-    }
-}
-
-/// Returns all direct children `Expression`s of `expr`.
-/// E.g. if the expression is "(a + 1) + 1", it returns ["a + 1", "1"] (as Expr objects)
-pub fn expr_sub_expressions(expr: &Expr) -> Result<Vec<Expr>> {
-    match expr {
-        Expr::BinaryExpr { left, right, .. } => {
-            Ok(vec![left.as_ref().to_owned(), right.as_ref().to_owned()])
-        }
-        Expr::IsNull(e) => Ok(vec![e.as_ref().to_owned()]),
-        Expr::IsNotNull(e) => Ok(vec![e.as_ref().to_owned()]),
-        Expr::ScalarFunction { args, .. } => Ok(args.clone()),
-        Expr::ScalarUDF { args, .. } => Ok(args.clone()),
-        Expr::AggregateFunction { args, .. } => Ok(args.clone()),
-        Expr::AggregateUDF { args, .. } => Ok(args.clone()),
-        Expr::Case {
-            expr,
-            when_then_expr,
-            else_expr,
-            ..
-        } => {
-            let mut expr_list: Vec<Expr> = vec![];
-            if let Some(e) = expr {
-                expr_list.push(lit(CASE_EXPR_MARKER));
-                expr_list.push(e.as_ref().to_owned());
-            }
-            for (w, t) in when_then_expr {
-                expr_list.push(w.as_ref().to_owned());
-                expr_list.push(t.as_ref().to_owned());
-            }
-            if let Some(e) = else_expr {
-                expr_list.push(lit(CASE_ELSE_MARKER));
-                expr_list.push(e.as_ref().to_owned());
-            }
-            Ok(expr_list)
-        }
-        Expr::Cast { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::TryCast { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Column(_) => Ok(vec![]),
-        Expr::Alias(expr, ..) => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Literal(_) => Ok(vec![]),
-        Expr::ScalarVariable(_) => Ok(vec![]),
-        Expr::Not(expr) => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Negative(expr) => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Sort { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Between {
-            expr, low, high, ..
-        } => Ok(vec![
-            expr.as_ref().to_owned(),
-            low.as_ref().to_owned(),
-            high.as_ref().to_owned(),
-        ]),
-        Expr::InList { expr, list, .. } => {
-            let mut expr_list: Vec<Expr> = vec![expr.as_ref().to_owned()];
-            for list_expr in list {
-                expr_list.push(list_expr.to_owned());
-            }
-            Ok(expr_list)
-        }
-        Expr::Wildcard { .. } => Err(DataFusionError::Internal(
-            "Wildcard expressions are not valid in a logical query plan".to_owned(),
-        )),
-    }
-}
-
-/// returns a new expression where the expressions in `expr` are replaced by the ones in
-/// `expressions`.
-/// This is used in conjunction with ``expr_expressions`` to re-write expressions.
-pub fn rewrite_expression(expr: &Expr, expressions: &[Expr]) -> Result<Expr> {
-    match expr {
-        Expr::BinaryExpr { op, .. } => Ok(Expr::BinaryExpr {
-            left: Box::new(expressions[0].clone()),
-            op: *op,
-            right: Box::new(expressions[1].clone()),
-        }),
-        Expr::IsNull(_) => Ok(Expr::IsNull(Box::new(expressions[0].clone()))),
-        Expr::IsNotNull(_) => Ok(Expr::IsNotNull(Box::new(expressions[0].clone()))),
-        Expr::ScalarFunction { fun, .. } => Ok(Expr::ScalarFunction {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-        }),
-        Expr::ScalarUDF { fun, .. } => Ok(Expr::ScalarUDF {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-        }),
-        Expr::AggregateFunction { fun, distinct, .. } => Ok(Expr::AggregateFunction {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-            distinct: *distinct,
-        }),
-        Expr::AggregateUDF { fun, .. } => Ok(Expr::AggregateUDF {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-        }),
-        Expr::Case { .. } => {
-            let mut base_expr: Option<Box<Expr>> = None;
-            let mut when_then: Vec<(Box<Expr>, Box<Expr>)> = vec![];
-            let mut else_expr: Option<Box<Expr>> = None;
-            let mut i = 0;
-
-            while i < expressions.len() {
-                match &expressions[i] {
-                    Expr::Literal(ScalarValue::Utf8(Some(str)))
-                        if str == CASE_EXPR_MARKER =>
-                    {
-                        base_expr = Some(Box::new(expressions[i + 1].clone()));
-                        i += 2;
-                    }
-                    Expr::Literal(ScalarValue::Utf8(Some(str)))
-                        if str == CASE_ELSE_MARKER =>
-                    {
-                        else_expr = Some(Box::new(expressions[i + 1].clone()));
-                        i += 2;
-                    }
-                    _ => {
-                        when_then.push((
-                            Box::new(expressions[i].clone()),
-                            Box::new(expressions[i + 1].clone()),
-                        ));
-                        i += 2;
-                    }
-                }
-            }
-
-            Ok(Expr::Case {
-                expr: base_expr,
-                when_then_expr: when_then,
-                else_expr,
-            })
-        }
-        Expr::Cast { data_type, .. } => Ok(Expr::Cast {
-            expr: Box::new(expressions[0].clone()),
-            data_type: data_type.clone(),
-        }),
-        Expr::TryCast { data_type, .. } => Ok(Expr::TryCast {
-            expr: Box::new(expressions[0].clone()),
-            data_type: data_type.clone(),
-        }),
-        Expr::Alias(_, alias) => {
-            Ok(Expr::Alias(Box::new(expressions[0].clone()), alias.clone()))
-        }
-        Expr::Not(_) => Ok(Expr::Not(Box::new(expressions[0].clone()))),
-        Expr::Negative(_) => Ok(Expr::Negative(Box::new(expressions[0].clone()))),
-        Expr::Column(_) => Ok(expr.clone()),
-        Expr::Literal(_) => Ok(expr.clone()),
-        Expr::ScalarVariable(_) => Ok(expr.clone()),
-        Expr::Sort {
-            asc, nulls_first, ..
-        } => Ok(Expr::Sort {
-            expr: Box::new(expressions[0].clone()),
-            asc: *asc,
-            nulls_first: *nulls_first,
-        }),
-        Expr::Between { negated, .. } => {
-            let expr = Expr::BinaryExpr {
-                left: Box::new(Expr::BinaryExpr {
-                    left: Box::new(expressions[0].clone()),
-                    op: Operator::GtEq,
-                    right: Box::new(expressions[1].clone()),
-                }),
-                op: Operator::And,
-                right: Box::new(Expr::BinaryExpr {
-                    left: Box::new(expressions[0].clone()),
-                    op: Operator::LtEq,
-                    right: Box::new(expressions[2].clone()),
-                }),
-            };
-
-            if *negated {
-                Ok(Expr::Not(Box::new(expr)))
-            } else {
-                Ok(expr)
-            }
-        }
-        Expr::InList { .. } => Ok(expr.clone()),
-        Expr::Wildcard { .. } => Err(DataFusionError::Internal(
-            "Wildcard expressions are not valid in a logical query plan".to_owned(),
-        )),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::logical_plan::{col, LogicalPlanBuilder};
-    use arrow::datatypes::DataType;
-    use std::collections::HashSet;
-
-    #[test]
-    fn test_collect_expr() -> Result<()> {
-        let mut accum: HashSet<String> = HashSet::new();
-        expr_to_column_names(
-            &Expr::Cast {
-                expr: Box::new(col("a")),
-                data_type: DataType::Float64,
-            },
-            &mut accum,
-        )?;
-        expr_to_column_names(
-            &Expr::Cast {
-                expr: Box::new(col("a")),
-                data_type: DataType::Float64,
-            },
-            &mut accum,
-        )?;
-        assert_eq!(1, accum.len());
-        assert!(accum.contains("a"));
-        Ok(())
-    }
-
-    struct TestOptimizer {}
-
-    impl OptimizerRule for TestOptimizer {
-        fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-            Ok(plan.clone())
-        }
-
-        fn name(&self) -> &str {
-            "test_optimizer"
-        }
-    }
-
-    #[test]
-    fn test_optimize_explain() -> Result<()> {
-        let optimizer = TestOptimizer {};
-
-        let empty_plan = LogicalPlanBuilder::empty(false).build()?;
-        let schema = LogicalPlan::explain_schema();
-
-        let optimized_explain = optimize_explain(
-            &optimizer,
-            true,
-            &empty_plan,
-            &[StringifiedPlan::new(PlanType::LogicalPlan, "...")],
-            schema.as_ref(),
-        )?;
-
-        match &optimized_explain {
-            LogicalPlan::Explain {
-                verbose,
-                stringified_plans,
-                ..
-            } => {
-                assert_eq!(*verbose, true);
-
-                let expected_stringified_plans = vec![
-                    StringifiedPlan::new(PlanType::LogicalPlan, "..."),
-                    StringifiedPlan::new(
-                        PlanType::OptimizedLogicalPlan {
-                            optimizer_name: "test_optimizer".into(),
-                        },
-                        "EmptyRelation",
-                    ),
-                ];
-                assert_eq!(*stringified_plans, expected_stringified_plans);
-            }
-            _ => panic!("Expected explain plan but got {:?}", optimized_explain),
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_optimizer/coalesce_batches.rs b/rust/datafusion/src/physical_optimizer/coalesce_batches.rs
deleted file mode 100644
index 9af8911062d..00000000000
--- a/rust/datafusion/src/physical_optimizer/coalesce_batches.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CoalesceBatches optimizer that groups batches together rows
-//! in bigger batches to avoid overhead with small batches
-
-use super::optimizer::PhysicalOptimizerRule;
-use crate::{
-    error::Result,
-    physical_plan::{
-        coalesce_batches::CoalesceBatchesExec, filter::FilterExec,
-        hash_join::HashJoinExec, repartition::RepartitionExec,
-    },
-};
-use std::sync::Arc;
-
-/// Optimizer that introduces CoalesceBatchesExec to avoid overhead with small batches
-pub struct CoalesceBatches {}
-
-impl CoalesceBatches {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-impl PhysicalOptimizerRule for CoalesceBatches {
-    fn optimize(
-        &self,
-        plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
-        config: &crate::execution::context::ExecutionConfig,
-    ) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
-        // wrap operators in CoalesceBatches to avoid lots of tiny batches when we have
-        // highly selective filters
-        let children = plan
-            .children()
-            .iter()
-            .map(|child| self.optimize(child.clone(), config))
-            .collect::<Result<Vec<_>>>()?;
-
-        let plan_any = plan.as_any();
-        //TODO we should do this in a more generic way either by wrapping all operators
-        // or having an API so that operators can declare when their inputs or outputs
-        // need to be wrapped in a coalesce batches operator.
-        // See https://issues.apache.org/jira/browse/ARROW-11068
-        let wrap_in_coalesce = plan_any.downcast_ref::<FilterExec>().is_some()
-            || plan_any.downcast_ref::<HashJoinExec>().is_some()
-            || plan_any.downcast_ref::<RepartitionExec>().is_some();
-
-        //TODO we should also do this for HashAggregateExec but we need to update tests
-        // as part of this work - see https://issues.apache.org/jira/browse/ARROW-11068
-        // || plan_any.downcast_ref::<HashAggregateExec>().is_some();
-
-        if plan.children().is_empty() {
-            // leaf node, children cannot be replaced
-            Ok(plan.clone())
-        } else {
-            let plan = plan.with_new_children(children)?;
-            Ok(if wrap_in_coalesce {
-                //TODO we should add specific configuration settings for coalescing batches and
-                // we should do that once https://issues.apache.org/jira/browse/ARROW-11059 is
-                // implemented. For now, we choose half the configured batch size to avoid copies
-                // when a small number of rows are removed from a batch
-                let target_batch_size = config.batch_size / 2;
-                Arc::new(CoalesceBatchesExec::new(plan.clone(), target_batch_size))
-            } else {
-                plan.clone()
-            })
-        }
-    }
-
-    fn name(&self) -> &str {
-        "coalesce_batches"
-    }
-}
diff --git a/rust/datafusion/src/physical_optimizer/merge_exec.rs b/rust/datafusion/src/physical_optimizer/merge_exec.rs
deleted file mode 100644
index 255d1bc2458..00000000000
--- a/rust/datafusion/src/physical_optimizer/merge_exec.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! AddMergeExec adds MergeExec to merge plans
-//! with more partitions into one partition when the node
-//! needs a single partition
-use super::optimizer::PhysicalOptimizerRule;
-use crate::{
-    error::Result,
-    physical_plan::{merge::MergeExec, Distribution},
-};
-use std::sync::Arc;
-
-/// Introduces MergeExec
-pub struct AddMergeExec {}
-
-impl AddMergeExec {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl PhysicalOptimizerRule for AddMergeExec {
-    fn optimize(
-        &self,
-        plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
-        config: &crate::execution::context::ExecutionConfig,
-    ) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
-        if plan.children().is_empty() {
-            // leaf node, children cannot be replaced
-            Ok(plan.clone())
-        } else {
-            let children = plan
-                .children()
-                .iter()
-                .map(|child| self.optimize(child.clone(), config))
-                .collect::<Result<Vec<_>>>()?;
-            match plan.required_child_distribution() {
-                Distribution::UnspecifiedDistribution => plan.with_new_children(children),
-                Distribution::SinglePartition => plan.with_new_children(
-                    children
-                        .iter()
-                        .map(|child| {
-                            if child.output_partitioning().partition_count() == 1 {
-                                child.clone()
-                            } else {
-                                Arc::new(MergeExec::new(child.clone()))
-                            }
-                        })
-                        .collect(),
-                ),
-            }
-        }
-    }
-
-    fn name(&self) -> &str {
-        "add_merge_exec"
-    }
-}
diff --git a/rust/datafusion/src/physical_optimizer/mod.rs b/rust/datafusion/src/physical_optimizer/mod.rs
deleted file mode 100644
index eca63db9f3d..00000000000
--- a/rust/datafusion/src/physical_optimizer/mod.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains a query optimizer that operates against a physical plan and applies
-//! rules to a physical plan, such as "Repartition".
-
-pub mod coalesce_batches;
-pub mod merge_exec;
-pub mod optimizer;
-pub mod repartition;
diff --git a/rust/datafusion/src/physical_optimizer/optimizer.rs b/rust/datafusion/src/physical_optimizer/optimizer.rs
deleted file mode 100644
index e2f40ae9540..00000000000
--- a/rust/datafusion/src/physical_optimizer/optimizer.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Physical optimizer traits
-
-use std::sync::Arc;
-
-use crate::{
-    error::Result, execution::context::ExecutionConfig, physical_plan::ExecutionPlan,
-};
-
-/// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which
-/// computes the same results, but in a potentially more efficient
-/// way.
-pub trait PhysicalOptimizerRule {
-    /// Rewrite `plan` to an optimized form
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ExecutionConfig,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// A human readable name for this optimizer rule
-    fn name(&self) -> &str;
-}
diff --git a/rust/datafusion/src/physical_optimizer/repartition.rs b/rust/datafusion/src/physical_optimizer/repartition.rs
deleted file mode 100644
index 82f46f9cbbb..00000000000
--- a/rust/datafusion/src/physical_optimizer/repartition.rs
+++ /dev/null
@@ -1,186 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Repartition optimizer that introduces repartition nodes to increase the level of parallism available
-use std::sync::Arc;
-
-use super::optimizer::PhysicalOptimizerRule;
-use crate::physical_plan::{
-    empty::EmptyExec, repartition::RepartitionExec, ExecutionPlan,
-};
-use crate::physical_plan::{Distribution, Partitioning::*};
-use crate::{error::Result, execution::context::ExecutionConfig};
-
-/// Optimizer that introduces repartition to introduce more parallelism in the plan
-pub struct Repartition {}
-
-impl Repartition {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn optimize_concurrency(
-    concurrency: usize,
-    requires_single_partition: bool,
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Arc<dyn ExecutionPlan>> {
-    // Recurse into children bottom-up (added nodes should be as deep as possible)
-
-    let new_plan = if plan.children().is_empty() {
-        // leaf node - don't replace children
-        plan.clone()
-    } else {
-        let children = plan
-            .children()
-            .iter()
-            .map(|child| {
-                optimize_concurrency(
-                    concurrency,
-                    plan.required_child_distribution() == Distribution::SinglePartition,
-                    child.clone(),
-                )
-            })
-            .collect::<Result<_>>()?;
-        plan.with_new_children(children)?
-    };
-
-    let perform_repartition = match new_plan.output_partitioning() {
-        // Apply when underlying node has less than `self.concurrency` amount of concurrency
-        RoundRobinBatch(x) => x < concurrency,
-        UnknownPartitioning(x) => x < concurrency,
-        // we don't want to introduce partitioning after hash partitioning
-        // as the plan will likely depend on this
-        Hash(_, _) => false,
-    };
-
-    // TODO: EmptyExec causes failures with RepartitionExec
-    // But also not very useful to inlude
-    let is_empty_exec = plan.as_any().downcast_ref::<EmptyExec>().is_some();
-
-    if perform_repartition && !requires_single_partition && !is_empty_exec {
-        Ok(Arc::new(RepartitionExec::try_new(
-            new_plan,
-            RoundRobinBatch(concurrency),
-        )?))
-    } else {
-        Ok(new_plan)
-    }
-}
-
-impl PhysicalOptimizerRule for Repartition {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ExecutionConfig,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // Don't run optimizer if concurrency == 1
-        if config.concurrency == 1 {
-            Ok(plan)
-        } else {
-            optimize_concurrency(config.concurrency, true, plan)
-        }
-    }
-
-    fn name(&self) -> &str {
-        "repartition"
-    }
-}
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::Schema;
-
-    use super::*;
-    use crate::datasource::datasource::Statistics;
-    use crate::physical_plan::parquet::{ParquetExec, ParquetPartition};
-    use crate::physical_plan::projection::ProjectionExec;
-
-    #[test]
-    fn added_repartition_to_single_partition() -> Result<()> {
-        let parquet_project = ProjectionExec::try_new(
-            vec![],
-            Arc::new(ParquetExec::new(
-                vec![ParquetPartition {
-                    filenames: vec!["x".to_string()],
-                    statistics: Statistics::default(),
-                }],
-                Schema::empty(),
-                None,
-                None,
-                2048,
-                None,
-            )),
-        )?;
-
-        let optimizer = Repartition {};
-
-        let optimized = optimizer.optimize(
-            Arc::new(parquet_project),
-            &ExecutionConfig::new().with_concurrency(10),
-        )?;
-
-        assert_eq!(
-            optimized.children()[0]
-                .output_partitioning()
-                .partition_count(),
-            10
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn repartition_deepest_node() -> Result<()> {
-        let parquet_project = ProjectionExec::try_new(
-            vec![],
-            Arc::new(ProjectionExec::try_new(
-                vec![],
-                Arc::new(ParquetExec::new(
-                    vec![ParquetPartition {
-                        filenames: vec!["x".to_string()],
-                        statistics: Statistics::default(),
-                    }],
-                    Schema::empty(),
-                    None,
-                    None,
-                    2048,
-                    None,
-                )),
-            )?),
-        )?;
-
-        let optimizer = Repartition {};
-
-        let optimized = optimizer.optimize(
-            Arc::new(parquet_project),
-            &ExecutionConfig::new().with_concurrency(10),
-        )?;
-
-        // RepartitionExec is added to deepest node
-        assert!(optimized.children()[0]
-            .as_any()
-            .downcast_ref::<RepartitionExec>()
-            .is_none());
-        assert!(optimized.children()[0].children()[0]
-            .as_any()
-            .downcast_ref::<RepartitionExec>()
-            .is_some());
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/aggregates.rs b/rust/datafusion/src/physical_plan/aggregates.rs
deleted file mode 100644
index be90daa954d..00000000000
--- a/rust/datafusion/src/physical_plan/aggregates.rs
+++ /dev/null
@@ -1,245 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Declaration of built-in (aggregate) functions.
-//! This module contains built-in aggregates' enumeration and metadata.
-//!
-//! Generally, an aggregate has:
-//! * a signature
-//! * a return type, that is a function of the incoming argument's types
-//! * the computation, that must accept each valid signature
-//!
-//! * Signature: see `Signature`
-//! * Return type: a function `(arg_types) -> return_type`. E.g. for min, ([f32]) -> f32, ([f64]) -> f64.
-
-use super::{
-    functions::Signature,
-    type_coercion::{coerce, data_types},
-    Accumulator, AggregateExpr, PhysicalExpr,
-};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::distinct_expressions;
-use crate::physical_plan::expressions;
-use arrow::datatypes::{DataType, Schema};
-use expressions::{avg_return_type, sum_return_type};
-use std::{fmt, str::FromStr, sync::Arc};
-
-/// the implementation of an aggregate function
-pub type AccumulatorFunctionImplementation =
-    Arc<dyn Fn() -> Result<Box<dyn Accumulator>> + Send + Sync>;
-
-/// This signature corresponds to which types an aggregator serializes
-/// its state, given its return datatype.
-pub type StateTypeFunction =
-    Arc<dyn Fn(&DataType) -> Result<Arc<Vec<DataType>>> + Send + Sync>;
-
-/// Enum of all built-in scalar functions
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum AggregateFunction {
-    /// count
-    Count,
-    /// sum
-    Sum,
-    /// min
-    Min,
-    /// max
-    Max,
-    /// avg
-    Avg,
-}
-
-impl fmt::Display for AggregateFunction {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // uppercase of the debug.
-        write!(f, "{}", format!("{:?}", self).to_uppercase())
-    }
-}
-
-impl FromStr for AggregateFunction {
-    type Err = DataFusionError;
-    fn from_str(name: &str) -> Result<AggregateFunction> {
-        Ok(match name {
-            "min" => AggregateFunction::Min,
-            "max" => AggregateFunction::Max,
-            "count" => AggregateFunction::Count,
-            "avg" => AggregateFunction::Avg,
-            "sum" => AggregateFunction::Sum,
-            _ => {
-                return Err(DataFusionError::Plan(format!(
-                    "There is no built-in function named {}",
-                    name
-                )))
-            }
-        })
-    }
-}
-
-/// Returns the datatype of the scalar function
-pub fn return_type(fun: &AggregateFunction, arg_types: &[DataType]) -> Result<DataType> {
-    // Note that this function *must* return the same type that the respective physical expression returns
-    // or the execution panics.
-
-    // verify that this is a valid set of data types for this function
-    data_types(arg_types, &signature(fun))?;
-
-    match fun {
-        AggregateFunction::Count => Ok(DataType::UInt64),
-        AggregateFunction::Max | AggregateFunction::Min => Ok(arg_types[0].clone()),
-        AggregateFunction::Sum => sum_return_type(&arg_types[0]),
-        AggregateFunction::Avg => avg_return_type(&arg_types[0]),
-    }
-}
-
-/// Create a physical (function) expression.
-/// This function errors when `args`' can't be coerced to a valid argument type of the function.
-pub fn create_aggregate_expr(
-    fun: &AggregateFunction,
-    distinct: bool,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-    name: String,
-) -> Result<Arc<dyn AggregateExpr>> {
-    // coerce
-    let arg = coerce(args, input_schema, &signature(fun))?[0].clone();
-
-    let arg_types = args
-        .iter()
-        .map(|e| e.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    let return_type = return_type(&fun, &arg_types)?;
-
-    Ok(match (fun, distinct) {
-        (AggregateFunction::Count, false) => {
-            Arc::new(expressions::Count::new(arg, name, return_type))
-        }
-        (AggregateFunction::Count, true) => {
-            Arc::new(distinct_expressions::DistinctCount::new(
-                arg_types,
-                args.to_vec(),
-                name,
-                return_type,
-            ))
-        }
-        (AggregateFunction::Sum, false) => {
-            Arc::new(expressions::Sum::new(arg, name, return_type))
-        }
-        (AggregateFunction::Sum, true) => {
-            return Err(DataFusionError::NotImplemented(
-                "SUM(DISTINCT) aggregations are not available".to_string(),
-            ));
-        }
-        (AggregateFunction::Min, _) => {
-            Arc::new(expressions::Min::new(arg, name, return_type))
-        }
-        (AggregateFunction::Max, _) => {
-            Arc::new(expressions::Max::new(arg, name, return_type))
-        }
-        (AggregateFunction::Avg, false) => {
-            Arc::new(expressions::Avg::new(arg, name, return_type))
-        }
-        (AggregateFunction::Avg, true) => {
-            return Err(DataFusionError::NotImplemented(
-                "AVG(DISTINCT) aggregations are not available".to_string(),
-            ));
-        }
-    })
-}
-
-static NUMERICS: &[DataType] = &[
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Float32,
-    DataType::Float64,
-];
-
-/// the signatures supported by the function `fun`.
-fn signature(fun: &AggregateFunction) -> Signature {
-    // note: the physical expression must accept the type returned by this function or the execution panics.
-    match fun {
-        AggregateFunction::Count => Signature::Any(1),
-        AggregateFunction::Min | AggregateFunction::Max => {
-            let mut valid = vec![DataType::Utf8, DataType::LargeUtf8];
-            valid.extend_from_slice(NUMERICS);
-            Signature::Uniform(1, valid)
-        }
-        AggregateFunction::Avg | AggregateFunction::Sum => {
-            Signature::Uniform(1, NUMERICS.to_vec())
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-
-    #[test]
-    fn test_min_max() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Min, &[DataType::Utf8])?;
-        assert_eq!(DataType::Utf8, observed);
-
-        let observed = return_type(&AggregateFunction::Max, &[DataType::Int32])?;
-        assert_eq!(DataType::Int32, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_sum_no_utf8() {
-        let observed = return_type(&AggregateFunction::Sum, &[DataType::Utf8]);
-        assert!(observed.is_err());
-    }
-
-    #[test]
-    fn test_sum_upcasts() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Sum, &[DataType::UInt32])?;
-        assert_eq!(DataType::UInt64, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_count_return_type() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Count, &[DataType::Utf8])?;
-        assert_eq!(DataType::UInt64, observed);
-
-        let observed = return_type(&AggregateFunction::Count, &[DataType::Int8])?;
-        assert_eq!(DataType::UInt64, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_avg_return_type() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Avg, &[DataType::Float32])?;
-        assert_eq!(DataType::Float64, observed);
-
-        let observed = return_type(&AggregateFunction::Avg, &[DataType::Float64])?;
-        assert_eq!(DataType::Float64, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_avg_no_utf8() {
-        let observed = return_type(&AggregateFunction::Avg, &[DataType::Utf8]);
-        assert!(observed.is_err());
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/array_expressions.rs b/rust/datafusion/src/physical_plan/array_expressions.rs
deleted file mode 100644
index a7e03b70e5d..00000000000
--- a/rust/datafusion/src/physical_plan/array_expressions.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Array expressions
-
-use crate::error::{DataFusionError, Result};
-use arrow::array::*;
-use arrow::datatypes::DataType;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-
-macro_rules! downcast_vec {
-    ($ARGS:expr, $ARRAY_TYPE:ident) => {{
-        $ARGS
-            .iter()
-            .map(|e| match e.as_any().downcast_ref::<$ARRAY_TYPE>() {
-                Some(array) => Ok(array),
-                _ => Err(DataFusionError::Internal("failed to downcast".to_string())),
-            })
-    }};
-}
-
-macro_rules! array {
-    ($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
-        // downcast all arguments to their common format
-        let args =
-            downcast_vec!($ARGS, $ARRAY_TYPE).collect::<Result<Vec<&$ARRAY_TYPE>>>()?;
-
-        let mut builder = FixedSizeListBuilder::<$BUILDER_TYPE>::new(
-            <$BUILDER_TYPE>::new(args[0].len()),
-            args.len() as i32,
-        );
-        // for each entry in the array
-        for index in 0..args[0].len() {
-            for arg in &args {
-                if arg.is_null(index) {
-                    builder.values().append_null()?;
-                } else {
-                    builder.values().append_value(arg.value(index))?;
-                }
-            }
-            builder.append(true)?;
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn array_array(args: &[&dyn Array]) -> Result<ArrayRef> {
-    // do not accept 0 arguments.
-    if args.is_empty() {
-        return Err(DataFusionError::Internal(
-            "array requires at least one argument".to_string(),
-        ));
-    }
-
-    match args[0].data_type() {
-        DataType::Utf8 => array!(args, StringArray, StringBuilder),
-        DataType::LargeUtf8 => array!(args, LargeStringArray, LargeStringBuilder),
-        DataType::Boolean => array!(args, BooleanArray, BooleanBuilder),
-        DataType::Float32 => array!(args, Float32Array, Float32Builder),
-        DataType::Float64 => array!(args, Float64Array, Float64Builder),
-        DataType::Int8 => array!(args, Int8Array, Int8Builder),
-        DataType::Int16 => array!(args, Int16Array, Int16Builder),
-        DataType::Int32 => array!(args, Int32Array, Int32Builder),
-        DataType::Int64 => array!(args, Int64Array, Int64Builder),
-        DataType::UInt8 => array!(args, UInt8Array, UInt8Builder),
-        DataType::UInt16 => array!(args, UInt16Array, UInt16Builder),
-        DataType::UInt32 => array!(args, UInt32Array, UInt32Builder),
-        DataType::UInt64 => array!(args, UInt64Array, UInt64Builder),
-        data_type => Err(DataFusionError::NotImplemented(format!(
-            "Array is not implemented for type '{:?}'.",
-            data_type
-        ))),
-    }
-}
-
-/// put values in an array.
-pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arrays: Vec<&dyn Array> = values
-        .iter()
-        .map(|value| {
-            if let ColumnarValue::Array(value) = value {
-                Ok(value.as_ref())
-            } else {
-                Err(DataFusionError::NotImplemented(
-                    "Array is not implemented for scalar values.".to_string(),
-                ))
-            }
-        })
-        .collect::<Result<_>>()?;
-
-    Ok(ColumnarValue::Array(array_array(&arrays)?))
-}
-
-/// Currently supported types by the array function.
-/// The order of these types correspond to the order on which coercion applies
-/// This should thus be from least informative to most informative
-pub static SUPPORTED_ARRAY_TYPES: &[DataType] = &[
-    DataType::Boolean,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::Float32,
-    DataType::Float64,
-    DataType::Utf8,
-    DataType::LargeUtf8,
-];
diff --git a/rust/datafusion/src/physical_plan/coalesce_batches.rs b/rust/datafusion/src/physical_plan/coalesce_batches.rs
deleted file mode 100644
index b91e0b672eb..00000000000
--- a/rust/datafusion/src/physical_plan/coalesce_batches.rs
+++ /dev/null
@@ -1,316 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CoalesceBatchesExec combines small batches into larger batches for more efficient use of
-//! vectorized processing by upstream operators.
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{
-    ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
-};
-
-use arrow::compute::kernels::concat::concat;
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use async_trait::async_trait;
-use futures::stream::{Stream, StreamExt};
-use log::debug;
-
-/// CoalesceBatchesExec combines small batches into larger batches for more efficient use of
-/// vectorized processing by upstream operators.
-#[derive(Debug)]
-pub struct CoalesceBatchesExec {
-    /// The input plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
-}
-
-impl CoalesceBatchesExec {
-    /// Create a new CoalesceBatchesExec
-    pub fn new(input: Arc<dyn ExecutionPlan>, target_batch_size: usize) -> Self {
-        Self {
-            input,
-            target_batch_size,
-        }
-    }
-
-    /// The input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Minimum number of rows for coalesces batches
-    pub fn target_batch_size(&self) -> usize {
-        self.target_batch_size
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CoalesceBatchesExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        // The coalesce batches operator does not make any changes to the schema of its input
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        // The coalesce batches operator does not make any changes to the partitioning of its input
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(CoalesceBatchesExec::new(
-                children[0].clone(),
-                self.target_batch_size,
-            ))),
-            _ => Err(DataFusionError::Internal(
-                "CoalesceBatchesExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(CoalesceBatchesStream {
-            input: self.input.execute(partition).await?,
-            schema: self.input.schema(),
-            target_batch_size: self.target_batch_size,
-            buffer: Vec::new(),
-            buffered_rows: 0,
-            is_closed: false,
-        }))
-    }
-}
-
-struct CoalesceBatchesStream {
-    /// The input plan
-    input: SendableRecordBatchStream,
-    /// The input schema
-    schema: SchemaRef,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
-    /// Buffered batches
-    buffer: Vec<RecordBatch>,
-    /// Buffered row count
-    buffered_rows: usize,
-    /// Whether the stream has finished returning all of its data or not
-    is_closed: bool,
-}
-
-impl Stream for CoalesceBatchesStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.is_closed {
-            return Poll::Ready(None);
-        }
-        loop {
-            let input_batch = self.input.poll_next_unpin(cx);
-            match input_batch {
-                Poll::Ready(x) => match x {
-                    Some(Ok(ref batch)) => {
-                        if batch.num_rows() >= self.target_batch_size
-                            && self.buffer.is_empty()
-                        {
-                            return Poll::Ready(Some(Ok(batch.clone())));
-                        } else if batch.num_rows() == 0 {
-                            // discard empty batches
-                        } else {
-                            // add to the buffered batches
-                            self.buffer.push(batch.clone());
-                            self.buffered_rows += batch.num_rows();
-                            // check to see if we have enough batches yet
-                            if self.buffered_rows >= self.target_batch_size {
-                                // combine the batches and return
-                                let batch = concat_batches(
-                                    &self.schema,
-                                    &self.buffer,
-                                    self.buffered_rows,
-                                )?;
-                                // reset buffer state
-                                self.buffer.clear();
-                                self.buffered_rows = 0;
-                                // return batch
-                                return Poll::Ready(Some(Ok(batch)));
-                            }
-                        }
-                    }
-                    None => {
-                        self.is_closed = true;
-                        // we have reached the end of the input stream but there could still
-                        // be buffered batches
-                        if self.buffer.is_empty() {
-                            return Poll::Ready(None);
-                        } else {
-                            // combine the batches and return
-                            let batch = concat_batches(
-                                &self.schema,
-                                &self.buffer,
-                                self.buffered_rows,
-                            )?;
-                            // reset buffer state
-                            self.buffer.clear();
-                            self.buffered_rows = 0;
-                            // return batch
-                            return Poll::Ready(Some(Ok(batch)));
-                        }
-                    }
-                    other => return Poll::Ready(other),
-                },
-                Poll::Pending => return Poll::Pending,
-            }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // we can't predict the size of incoming batches so re-use the size hint from the input
-        self.input.size_hint()
-    }
-}
-
-impl RecordBatchStream for CoalesceBatchesStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Concatenates an array of `RecordBatch` into one batch
-pub fn concat_batches(
-    schema: &SchemaRef,
-    batches: &[RecordBatch],
-    row_count: usize,
-) -> ArrowResult<RecordBatch> {
-    if batches.is_empty() {
-        return Ok(RecordBatch::new_empty(schema.clone()));
-    }
-    let mut arrays = Vec::with_capacity(schema.fields().len());
-    for i in 0..schema.fields().len() {
-        let array = concat(
-            &batches
-                .iter()
-                .map(|batch| batch.column(i).as_ref())
-                .collect::<Vec<_>>(),
-        )?;
-        arrays.push(array);
-    }
-    debug!(
-        "Combined {} batches containing {} rows",
-        batches.len(),
-        row_count
-    );
-    RecordBatch::try_new(schema.clone(), arrays)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::{memory::MemoryExec, repartition::RepartitionExec};
-    use arrow::array::UInt32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_concat_batches() -> Result<()> {
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 10);
-        let partitions = vec![partition];
-
-        let output_partitions = coalesce_batches(&schema, partitions, 21).await?;
-        assert_eq!(1, output_partitions.len());
-
-        // input is 10 batches x 8 rows (80 rows)
-        // expected output is batches of at least 20 rows (except for the final batch)
-        let batches = &output_partitions[0];
-        assert_eq!(4, batches.len());
-        assert_eq!(24, batches[0].num_rows());
-        assert_eq!(24, batches[1].num_rows());
-        assert_eq!(24, batches[2].num_rows());
-        assert_eq!(8, batches[3].num_rows());
-
-        Ok(())
-    }
-
-    fn test_schema() -> Arc<Schema> {
-        Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
-    }
-
-    fn create_vec_batches(schema: &Arc<Schema>, num_batches: usize) -> Vec<RecordBatch> {
-        let batch = create_batch(schema);
-        let mut vec = Vec::with_capacity(num_batches);
-        for _ in 0..num_batches {
-            vec.push(batch.clone());
-        }
-        vec
-    }
-
-    fn create_batch(schema: &Arc<Schema>) -> RecordBatch {
-        RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
-        )
-        .unwrap()
-    }
-
-    async fn coalesce_batches(
-        schema: &SchemaRef,
-        input_partitions: Vec<Vec<RecordBatch>>,
-        target_batch_size: usize,
-    ) -> Result<Vec<Vec<RecordBatch>>> {
-        // create physical plan
-        let exec = MemoryExec::try_new(&input_partitions, schema.clone(), None)?;
-        let exec =
-            RepartitionExec::try_new(Arc::new(exec), Partitioning::RoundRobinBatch(1))?;
-        let exec: Arc<dyn ExecutionPlan> =
-            Arc::new(CoalesceBatchesExec::new(Arc::new(exec), target_batch_size));
-
-        // execute and collect results
-        let output_partition_count = exec.output_partitioning().partition_count();
-        let mut output_partitions = Vec::with_capacity(output_partition_count);
-        for i in 0..output_partition_count {
-            // execute this *output* partition and collect all batches
-            let mut stream = exec.execute(i).await?;
-            let mut batches = vec![];
-            while let Some(result) = stream.next().await {
-                batches.push(result?);
-            }
-            output_partitions.push(batches);
-        }
-        Ok(output_partitions)
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/common.rs b/rust/datafusion/src/physical_plan/common.rs
deleted file mode 100644
index 9de7ee2a32d..00000000000
--- a/rust/datafusion/src/physical_plan/common.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines common code used in execution plans
-
-use std::fs;
-use std::fs::metadata;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use futures::{Stream, TryStreamExt};
-
-/// Stream of record batches
-pub struct SizedRecordBatchStream {
-    schema: SchemaRef,
-    batches: Vec<Arc<RecordBatch>>,
-    index: usize,
-}
-
-impl SizedRecordBatchStream {
-    /// Create a new RecordBatchIterator
-    pub fn new(schema: SchemaRef, batches: Vec<Arc<RecordBatch>>) -> Self {
-        SizedRecordBatchStream {
-            schema,
-            index: 0,
-            batches,
-        }
-    }
-}
-
-impl Stream for SizedRecordBatchStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(if self.index < self.batches.len() {
-            self.index += 1;
-            Some(Ok(self.batches[self.index - 1].as_ref().clone()))
-        } else {
-            None
-        })
-    }
-}
-
-impl RecordBatchStream for SizedRecordBatchStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Create a vector of record batches from a stream
-pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatch>> {
-    stream
-        .try_collect::<Vec<_>>()
-        .await
-        .map_err(DataFusionError::from)
-}
-
-/// Recursively build a list of files in a directory with a given extension
-pub fn build_file_list(dir: &str, filenames: &mut Vec<String>, ext: &str) -> Result<()> {
-    let metadata = metadata(dir)?;
-    if metadata.is_file() {
-        if dir.ends_with(ext) {
-            filenames.push(dir.to_string());
-        }
-    } else {
-        for entry in fs::read_dir(dir)? {
-            let entry = entry?;
-            let path = entry.path();
-            if let Some(path_name) = path.to_str() {
-                if path.is_dir() {
-                    build_file_list(path_name, filenames, ext)?;
-                } else if path_name.ends_with(ext) {
-                    filenames.push(path_name.to_string());
-                }
-            } else {
-                return Err(DataFusionError::Plan("Invalid path".to_string()));
-            }
-        }
-    }
-    Ok(())
-}
diff --git a/rust/datafusion/src/physical_plan/crypto_expressions.rs b/rust/datafusion/src/physical_plan/crypto_expressions.rs
deleted file mode 100644
index 8ad876b24d0..00000000000
--- a/rust/datafusion/src/physical_plan/crypto_expressions.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Crypto expressions
-use std::sync::Arc;
-
-use md5::Md5;
-use sha2::{
-    digest::Output as SHA2DigestOutput, Digest as SHA2Digest, Sha224, Sha256, Sha384,
-    Sha512,
-};
-
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::ScalarValue,
-};
-use arrow::{
-    array::{Array, BinaryArray, GenericStringArray, StringOffsetSizeTrait},
-    datatypes::DataType,
-};
-
-use super::{string_expressions::unary_string_function, ColumnarValue};
-
-/// Computes the md5 of a string.
-fn md5_process(input: &str) -> String {
-    let mut digest = Md5::default();
-    digest.update(&input);
-
-    let mut result = String::new();
-
-    for byte in &digest.finalize() {
-        result.push_str(&format!("{:02x}", byte));
-    }
-
-    result
-}
-
-// It's not possible to return &[u8], because trait in trait without short lifetime
-fn sha_process<D: SHA2Digest + Default>(input: &str) -> SHA2DigestOutput<D> {
-    let mut digest = D::default();
-    digest.update(&input);
-
-    digest.finalize()
-}
-
-/// # Errors
-/// This function errors when:
-/// * the number of arguments is not 1
-/// * the first argument is not castable to a `GenericStringArray`
-fn unary_binary_function<T, R, F>(
-    args: &[&dyn Array],
-    op: F,
-    name: &str,
-) -> Result<BinaryArray>
-where
-    R: AsRef<[u8]>,
-    T: StringOffsetSizeTrait,
-    F: Fn(&str) -> R,
-{
-    if args.len() != 1 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but {} takes exactly one argument",
-            args.len(),
-            name,
-        )));
-    }
-
-    let array = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("failed to downcast to string".to_string())
-        })?;
-
-    // first map is the iterator, second is for the `Option<_>`
-    Ok(array.iter().map(|x| x.map(|x| op(x))).collect())
-}
-
-fn handle<F, R>(args: &[ColumnarValue], op: F, name: &str) -> Result<ColumnarValue>
-where
-    R: AsRef<[u8]>,
-    F: Fn(&str) -> R,
-{
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_binary_function::<
-                    i32,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            DataType::LargeUtf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_binary_function::<
-                    i64,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec());
-                Ok(ColumnarValue::Scalar(ScalarValue::Binary(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec());
-                Ok(ColumnarValue::Scalar(ScalarValue::Binary(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-    }
-}
-
-fn md5_array<T: StringOffsetSizeTrait>(
-    args: &[&dyn Array],
-) -> Result<GenericStringArray<i32>> {
-    unary_string_function::<T, i32, _, _>(args, md5_process, "md5")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(md5_array::<i32>(&[
-                a.as_ref()
-            ])?))),
-            DataType::LargeUtf8 => {
-                Ok(ColumnarValue::Array(Arc::new(md5_array::<i64>(&[
-                    a.as_ref()
-                ])?)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function md5",
-                other,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| md5_process(x));
-                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| md5_process(x));
-                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function md5",
-                other,
-            ))),
-        },
-    }
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha224(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha224>, "ssh224")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha256(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha256>, "sha256")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha384(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha384>, "sha384")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha512(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha512>, "sha512")
-}
diff --git a/rust/datafusion/src/physical_plan/csv.rs b/rust/datafusion/src/physical_plan/csv.rs
deleted file mode 100644
index 7ee5ae3fd90..00000000000
--- a/rust/datafusion/src/physical_plan/csv.rs
+++ /dev/null
@@ -1,401 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Execution plan for reading CSV files
-
-use std::any::Any;
-use std::fs::File;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::ExecutionPlan;
-use crate::physical_plan::{common, Partitioning};
-use arrow::csv;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use futures::Stream;
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use async_trait::async_trait;
-
-/// CSV file read option
-#[derive(Copy, Clone)]
-pub struct CsvReadOptions<'a> {
-    /// Does the CSV file have a header?
-    ///
-    /// If schema inference is run on a file with no headers, default column names
-    /// are created.
-    pub has_header: bool,
-    /// An optional column delimiter. Defaults to `b','`.
-    pub delimiter: u8,
-    /// An optional schema representing the CSV files. If None, CSV reader will try to infer it
-    /// based on data in file.
-    pub schema: Option<&'a Schema>,
-    /// Max number of rows to read from CSV files for schema inference if needed. Defaults to 1000.
-    pub schema_infer_max_records: usize,
-    /// File extension; only files with this extension are selected for data input.
-    /// Defaults to ".csv".
-    pub file_extension: &'a str,
-}
-
-impl<'a> CsvReadOptions<'a> {
-    /// Create a CSV read option with default presets
-    pub fn new() -> Self {
-        Self {
-            has_header: true,
-            schema: None,
-            schema_infer_max_records: 1000,
-            delimiter: b',',
-            file_extension: ".csv",
-        }
-    }
-
-    /// Configure has_header setting
-    pub fn has_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
-        self
-    }
-
-    /// Specify delimiter to use for CSV read
-    pub fn delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = delimiter;
-        self
-    }
-
-    /// Specify the file extension for CSV file selection
-    pub fn file_extension(mut self, file_extension: &'a str) -> Self {
-        self.file_extension = file_extension;
-        self
-    }
-
-    /// Configure delimiter setting with Option, None value will be ignored
-    pub fn delimiter_option(mut self, delimiter: Option<u8>) -> Self {
-        if let Some(d) = delimiter {
-            self.delimiter = d;
-        }
-        self
-    }
-
-    /// Specify schema to use for CSV read
-    pub fn schema(mut self, schema: &'a Schema) -> Self {
-        self.schema = Some(schema);
-        self
-    }
-
-    /// Configure number of max records to read for schema inference
-    pub fn schema_infer_max_records(mut self, max_records: usize) -> Self {
-        self.schema_infer_max_records = max_records;
-        self
-    }
-}
-
-/// Execution plan for scanning a CSV file
-#[derive(Debug, Clone)]
-pub struct CsvExec {
-    /// Path to directory containing partitioned CSV files with the same schema
-    path: String,
-    /// The individual files under path
-    filenames: Vec<String>,
-    /// Schema representing the CSV file
-    schema: SchemaRef,
-    /// Does the CSV file have a header?
-    has_header: bool,
-    /// An optional column delimiter. Defaults to `b','`
-    delimiter: Option<u8>,
-    /// File extension
-    file_extension: String,
-    /// Optional projection for which columns to load
-    projection: Option<Vec<usize>>,
-    /// Schema after the projection has been applied
-    projected_schema: SchemaRef,
-    /// Batch size
-    batch_size: usize,
-    /// Limit in nr. of rows
-    limit: Option<usize>,
-}
-
-impl CsvExec {
-    /// Create a new execution plan for reading a set of CSV files
-    pub fn try_new(
-        path: &str,
-        options: CsvReadOptions,
-        projection: Option<Vec<usize>>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        let file_extension = String::from(options.file_extension);
-
-        let mut filenames: Vec<String> = vec![];
-        common::build_file_list(path, &mut filenames, file_extension.as_str())?;
-        if filenames.is_empty() {
-            return Err(DataFusionError::Execution(format!(
-                "No files found at {path} with file extension {file_extension}",
-                path = path,
-                file_extension = file_extension.as_str()
-            )));
-        }
-
-        let schema = match options.schema {
-            Some(s) => s.clone(),
-            None => CsvExec::try_infer_schema(&filenames, &options)?,
-        };
-
-        let projected_schema = match &projection {
-            None => schema.clone(),
-            Some(p) => Schema::new(p.iter().map(|i| schema.field(*i).clone()).collect()),
-        };
-
-        Ok(Self {
-            path: path.to_string(),
-            filenames,
-            schema: Arc::new(schema),
-            has_header: options.has_header,
-            delimiter: Some(options.delimiter),
-            file_extension,
-            projection,
-            projected_schema: Arc::new(projected_schema),
-            batch_size,
-            limit,
-        })
-    }
-
-    /// Path to directory containing partitioned CSV files with the same schema
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-
-    /// The individual files under path
-    pub fn filenames(&self) -> &[String] {
-        &self.filenames
-    }
-
-    /// Does the CSV file have a header?
-    pub fn has_header(&self) -> bool {
-        self.has_header
-    }
-
-    /// An optional column delimiter. Defaults to `b','`
-    pub fn delimiter(&self) -> Option<&u8> {
-        self.delimiter.as_ref()
-    }
-
-    /// File extension
-    pub fn file_extension(&self) -> &str {
-        &self.file_extension
-    }
-
-    /// Get the schema of the CSV file
-    pub fn file_schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Optional projection for which columns to load
-    pub fn projection(&self) -> Option<&Vec<usize>> {
-        self.projection.as_ref()
-    }
-
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-
-    /// Limit
-    pub fn limit(&self) -> Option<usize> {
-        self.limit
-    }
-
-    /// Infer schema for given CSV dataset
-    pub fn try_infer_schema(
-        filenames: &[String],
-        options: &CsvReadOptions,
-    ) -> Result<Schema> {
-        Ok(csv::infer_schema_from_files(
-            filenames,
-            options.delimiter,
-            Some(options.schema_infer_max_records),
-            options.has_header,
-        )?)
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CsvExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.projected_schema.clone()
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.filenames.len())
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(CsvStream::try_new(
-            &self.filenames[partition],
-            self.schema.clone(),
-            self.has_header,
-            self.delimiter,
-            &self.projection,
-            self.batch_size,
-            self.limit,
-        )?))
-    }
-}
-
-/// Iterator over batches
-struct CsvStream {
-    /// Arrow CSV reader
-    reader: csv::Reader<File>,
-}
-
-impl CsvStream {
-    /// Create an iterator for a CSV file
-    pub fn try_new(
-        filename: &str,
-        schema: SchemaRef,
-        has_header: bool,
-        delimiter: Option<u8>,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        let file = File::open(filename)?;
-        let start_line = if has_header { 1 } else { 0 };
-        let bounds = limit.map(|x| (0, x + start_line));
-
-        let reader = csv::Reader::new(
-            file,
-            schema,
-            has_header,
-            delimiter,
-            batch_size,
-            bounds,
-            projection.clone(),
-        );
-
-        Ok(Self { reader })
-    }
-}
-
-impl Stream for CsvStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(self.reader.next())
-    }
-}
-
-impl RecordBatchStream for CsvStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.reader.schema()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::test::aggr_test_schema;
-    use futures::StreamExt;
-
-    #[tokio::test]
-    async fn csv_exec_with_projection() -> Result<()> {
-        let schema = aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let filename = "aggregate_test_100.csv";
-        let path = format!("{}/csv/{}", testdata, filename);
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            Some(vec![0, 2, 4]),
-            1024,
-            None,
-        )?;
-        assert_eq!(13, csv.schema.fields().len());
-        assert_eq!(3, csv.projected_schema.fields().len());
-        assert_eq!(13, csv.file_schema().fields().len());
-        assert_eq!(3, csv.schema().fields().len());
-        let mut stream = csv.execute(0).await?;
-        let batch = stream.next().await.unwrap()?;
-        assert_eq!(3, batch.num_columns());
-        let batch_schema = batch.schema();
-        assert_eq!(3, batch_schema.fields().len());
-        assert_eq!("c1", batch_schema.field(0).name());
-        assert_eq!("c3", batch_schema.field(1).name());
-        assert_eq!("c5", batch_schema.field(2).name());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn csv_exec_without_projection() -> Result<()> {
-        let schema = aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let filename = "aggregate_test_100.csv";
-        let path = format!("{}/csv/{}", testdata, filename);
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-        assert_eq!(13, csv.schema.fields().len());
-        assert_eq!(13, csv.projected_schema.fields().len());
-        assert_eq!(13, csv.file_schema().fields().len());
-        assert_eq!(13, csv.schema().fields().len());
-        let mut it = csv.execute(0).await?;
-        let batch = it.next().await.unwrap()?;
-        assert_eq!(13, batch.num_columns());
-        let batch_schema = batch.schema();
-        assert_eq!(13, batch_schema.fields().len());
-        assert_eq!("c1", batch_schema.field(0).name());
-        assert_eq!("c2", batch_schema.field(1).name());
-        assert_eq!("c3", batch_schema.field(2).name());
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/datetime_expressions.rs b/rust/datafusion/src/physical_plan/datetime_expressions.rs
deleted file mode 100644
index 3d363ce97d2..00000000000
--- a/rust/datafusion/src/physical_plan/datetime_expressions.rs
+++ /dev/null
@@ -1,559 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DateTime expressions
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::{ScalarType, ScalarValue},
-};
-use arrow::{
-    array::{Array, ArrayRef, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait},
-    datatypes::{ArrowPrimitiveType, DataType, TimestampNanosecondType},
-};
-use arrow::{
-    array::{
-        Date32Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray,
-        TimestampNanosecondArray, TimestampSecondArray,
-    },
-    compute::kernels::temporal,
-    datatypes::TimeUnit,
-    temporal_conversions::timestamp_ns_to_datetime,
-};
-use chrono::prelude::*;
-use chrono::Duration;
-use chrono::LocalResult;
-
-#[inline]
-/// Accepts a string in RFC3339 / ISO8601 standard format and some
-/// variants and converts it to a nanosecond precision timestamp.
-///
-/// Implements the `to_timestamp` function to convert a string to a
-/// timestamp, following the model of spark SQL’s to_`timestamp`.
-///
-/// In addition to RFC3339 / ISO8601 standard timestamps, it also
-/// accepts strings that use a space ` ` to separate the date and time
-/// as well as strings that have no explicit timezone offset.
-///
-/// Examples of accepted inputs:
-/// * `1997-01-31T09:26:56.123Z`        # RCF3339
-/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
-/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
-/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
-/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
-/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
-//
-/// Internally, this function uses the `chrono` library for the
-/// datetime parsing
-///
-/// We hope to extend this function in the future with a second
-/// parameter to specifying the format string.
-///
-/// ## Timestamp Precision
-///
-/// DataFusion uses the maximum precision timestamps supported by
-/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
-/// means the range of dates that timestamps can represent is ~1677 AD
-/// to 2262 AM
-///
-///
-/// ## Timezone / Offset Handling
-///
-/// By using the Arrow format, DataFusion inherits Arrow’s handling of
-/// timestamp values. Specifically, the stored numerical values of
-/// timestamps are stored compared to offset UTC.
-///
-/// This function intertprets strings without an explicit time zone as
-/// timestamps with offsets of the local time on the machine that ran
-/// the datafusion query
-///
-/// For example, `1997-01-31 09:26:56.123Z` is interpreted as UTC, as
-/// it has an explicit timezone specifier (“Z” for Zulu/UTC)
-///
-/// `1997-01-31T09:26:56.123` is interpreted as a local timestamp in
-/// the timezone of the machine that ran DataFusion. For example, if
-/// the system timezone is set to Americas/New_York (UTC-5) the
-/// timestamp will be interpreted as though it were
-/// `1997-01-31T09:26:56.123-05:00`
-fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
-    // Fast path:  RFC3339 timestamp (with a T)
-    // Example: 2020-09-08T13:42:29.190855Z
-    if let Ok(ts) = DateTime::parse_from_rfc3339(s) {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Implement quasi-RFC3339 support by trying to parse the
-    // timestamp with various other format specifiers to to support
-    // separating the date and time with a space ' ' rather than 'T' to be
-    // (more) compatible with Apache Spark SQL
-
-    // timezone offset, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855-05:00
-    if let Ok(ts) = DateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f%:z") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // with an explicit Z, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29Z
-    if let Ok(ts) = Utc.datetime_from_str(s, "%Y-%m-%d %H:%M:%S%.fZ") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Support timestamps without an explicit timezone offset, again
-    // to be compatible with what Apache Spark SQL does.
-
-    // without a timezone specifier as a local time, using T as a separator
-    // Example: 2020-09-08T13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using T as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08T13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08 13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // Note we don't pass along the error message from the underlying
-    // chrono parsing because we tried several different format
-    // strings and we don't know which the user was trying to
-    // match. Ths any of the specific error messages is likely to be
-    // be more confusing than helpful
-    Err(DataFusionError::Execution(format!(
-        "Error parsing '{}' as timestamp",
-        s
-    )))
-}
-
-/// Converts the naive datetime (which has no specific timezone) to a
-/// nanosecond epoch timestamp relative to UTC.
-fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64> {
-    let l = Local {};
-
-    match l.from_local_datetime(&datetime) {
-        LocalResult::None => Err(DataFusionError::Execution(format!(
-            "Error parsing '{}' as timestamp: local time representation is invalid",
-            s
-        ))),
-        LocalResult::Single(local_datetime) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-        // Ambiguous times can happen if the timestamp is exactly when
-        // a daylight savings time transition occurs, for example, and
-        // so the datetime could validly be said to be in two
-        // potential offsets. However, since we are about to convert
-        // to UTC anyways, we can pick one arbitrarily
-        LocalResult::Ambiguous(local_datetime, _) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-    }
-}
-
-// given a function `op` that maps a `&str` to a Result of an arrow native type,
-// returns a `PrimitiveArray` after the application
-// of the function to `args[0]`.
-/// # Errors
-/// This function errors iff:
-/// * the number of arguments is not 1 or
-/// * the first argument is not castable to a `GenericStringArray` or
-/// * the function `op` errors
-pub(crate) fn unary_string_to_primitive_function<'a, T, O, F>(
-    args: &[&'a dyn Array],
-    op: F,
-    name: &str,
-) -> Result<PrimitiveArray<O>>
-where
-    O: ArrowPrimitiveType,
-    T: StringOffsetSizeTrait,
-    F: Fn(&'a str) -> Result<O::Native>,
-{
-    if args.len() != 1 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but {} takes exactly one argument",
-            args.len(),
-            name,
-        )));
-    }
-
-    let array = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("failed to downcast to string".to_string())
-        })?;
-
-    // first map is the iterator, second is for the `Option<_>`
-    array.iter().map(|x| x.map(|x| op(x)).transpose()).collect()
-}
-
-// given an function that maps a `&str` to a arrow native type,
-// returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue`
-// depending on the `args`'s variant.
-fn handle<'a, O, F, S>(
-    args: &'a [ColumnarValue],
-    op: F,
-    name: &str,
-) -> Result<ColumnarValue>
-where
-    O: ArrowPrimitiveType,
-    S: ScalarType<O::Native>,
-    F: Fn(&'a str) -> Result<O::Native>,
-{
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(
-                unary_string_to_primitive_function::<i32, O, _>(&[a.as_ref()], op, name)?,
-            ))),
-            DataType::LargeUtf8 => Ok(ColumnarValue::Array(Arc::new(
-                unary_string_to_primitive_function::<i64, O, _>(&[a.as_ref()], op, name)?,
-            ))),
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name
-            ))),
-        },
-    }
-}
-
-/// to_timestamp SQL function
-pub fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampNanosecondType, _, TimestampNanosecondType>(
-        args,
-        string_to_timestamp_nanos,
-        "to_timestamp",
-    )
-}
-
-fn date_trunc_single(granularity: &str, value: i64) -> Result<i64> {
-    let value = timestamp_ns_to_datetime(value).with_nanosecond(0);
-    let value = match granularity {
-        "second" => value,
-        "minute" => value.and_then(|d| d.with_second(0)),
-        "hour" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0)),
-        "day" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0)),
-        "week" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0))
-            .map(|d| d - Duration::seconds(60 * 60 * 24 * d.weekday() as i64)),
-        "month" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0))
-            .and_then(|d| d.with_day0(0)),
-        "year" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0))
-            .and_then(|d| d.with_day0(0))
-            .and_then(|d| d.with_month0(0)),
-        unsupported => {
-            return Err(DataFusionError::Execution(format!(
-                "Unsupported date_trunc granularity: {}",
-                unsupported
-            )))
-        }
-    };
-    // `with_x(0)` are infalible because `0` are always a valid
-    Ok(value.unwrap().timestamp_nanos())
-}
-
-/// date_trunc SQL function
-pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let (granularity, array) = (&args[0], &args[1]);
-
-    let granularity =
-        if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = granularity {
-            v
-        } else {
-            return Err(DataFusionError::Execution(
-                "Granularity of `date_trunc` must be non-null scalar Utf8".to_string(),
-            ));
-        };
-
-    let f = |x: Option<i64>| x.map(|x| date_trunc_single(granularity, x)).transpose();
-
-    Ok(match array {
-        ColumnarValue::Scalar(scalar) => {
-            if let ScalarValue::TimeNanosecond(v) = scalar {
-                ColumnarValue::Scalar(ScalarValue::TimeNanosecond((f)(*v)?))
-            } else {
-                return Err(DataFusionError::Execution(
-                    "array of `date_trunc` must be non-null scalar Utf8".to_string(),
-                ));
-            }
-        }
-        ColumnarValue::Array(array) => {
-            let array = array
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            let array = array
-                .iter()
-                .map(f)
-                .collect::<Result<TimestampNanosecondArray>>()?;
-
-            ColumnarValue::Array(Arc::new(array))
-        }
-    })
-}
-
-macro_rules! extract_date_part {
-    ($ARRAY: expr, $FN:expr) => {
-        match $ARRAY.data_type() {
-            DataType::Date32 => {
-                let array = $ARRAY.as_any().downcast_ref::<Date32Array>().unwrap();
-                Ok($FN(array)?)
-            }
-            DataType::Date64 => {
-                let array = $ARRAY.as_any().downcast_ref::<Date64Array>().unwrap();
-                Ok($FN(array)?)
-            }
-            DataType::Timestamp(time_unit, None) => match time_unit {
-                TimeUnit::Second => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampSecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Millisecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampMillisecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Microsecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampMicrosecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Nanosecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampNanosecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-            },
-            datatype => Err(DataFusionError::Internal(format!(
-                "Extract does not support datatype {:?}",
-                datatype
-            ))),
-        }
-    };
-}
-
-/// DATE_PART SQL function
-pub fn date_part(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 2 {
-        return Err(DataFusionError::Execution(
-            "Expected two arguments in DATE_PART".to_string(),
-        ));
-    }
-    let (date_part, array) = (&args[0], &args[1]);
-
-    let date_part = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = date_part {
-        v
-    } else {
-        return Err(DataFusionError::Execution(
-            "First argument of `DATE_PART` must be non-null scalar Utf8".to_string(),
-        ));
-    };
-
-    let is_scalar = matches!(array, ColumnarValue::Scalar(_));
-
-    let array = match array {
-        ColumnarValue::Array(array) => array.clone(),
-        ColumnarValue::Scalar(scalar) => scalar.to_array(),
-    };
-
-    let arr = match date_part.to_lowercase().as_str() {
-        "hour" => extract_date_part!(array, temporal::hour),
-        "year" => extract_date_part!(array, temporal::year),
-        _ => Err(DataFusionError::Execution(format!(
-            "Date part '{}' not supported",
-            date_part
-        ))),
-    }?;
-
-    Ok(if is_scalar {
-        ColumnarValue::Scalar(ScalarValue::try_from_array(
-            &(Arc::new(arr) as ArrayRef),
-            0,
-        )?)
-    } else {
-        ColumnarValue::Array(Arc::new(arr))
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::{ArrayRef, Int64Array, StringBuilder};
-
-    use super::*;
-
-    #[test]
-    fn to_timestamp_arrays_and_nulls() -> Result<()> {
-        // ensure that arrow array implementation is wired up and handles nulls correctly
-
-        let mut string_builder = StringBuilder::new(2);
-        let mut ts_builder = TimestampNanosecondArray::builder(2);
-
-        string_builder.append_value("2020-09-08T13:42:29.190855Z")?;
-        ts_builder.append_value(1599572549190855000)?;
-
-        string_builder.append_null()?;
-        ts_builder.append_null()?;
-        let expected_timestamps = &ts_builder.finish() as &dyn Array;
-
-        let string_array =
-            ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
-        let parsed_timestamps = to_timestamp(&[string_array])
-            .expect("that to_timestamp parsed values without error");
-        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
-            assert_eq!(parsed_array.len(), 2);
-            assert_eq!(expected_timestamps, parsed_array.as_ref());
-        } else {
-            panic!("Expected a columnar array")
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn date_trunc_test() {
-        let cases = vec![
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "second",
-                "2020-09-08T13:42:29.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "minute",
-                "2020-09-08T13:42:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "hour",
-                "2020-09-08T13:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "day",
-                "2020-09-08T00:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "week",
-                "2020-09-07T00:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "month",
-                "2020-09-01T00:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "year",
-                "2020-01-01T00:00:00.000000Z",
-            ),
-            (
-                "2021-01-01T13:42:29.190855Z",
-                "week",
-                "2020-12-28T00:00:00.000000Z",
-            ),
-            (
-                "2020-01-01T13:42:29.190855Z",
-                "week",
-                "2019-12-30T00:00:00.000000Z",
-            ),
-        ];
-
-        cases.iter().for_each(|(original, granularity, expected)| {
-            let original = string_to_timestamp_nanos(original).unwrap();
-            let expected = string_to_timestamp_nanos(expected).unwrap();
-            let result = date_trunc_single(granularity, original).unwrap();
-            assert_eq!(result, expected);
-        });
-    }
-
-    #[test]
-    fn to_timestamp_invalid_input_type() -> Result<()> {
-        // pass the wrong type of input array to to_timestamp and test
-        // that we get an error.
-
-        let mut builder = Int64Array::builder(1);
-        builder.append_value(1)?;
-        let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
-
-        let expected_err =
-            "Internal error: Unsupported data type Int64 for function to_timestamp";
-        match to_timestamp(&[int64array]) {
-            Ok(_) => panic!("Expected error but got success"),
-            Err(e) => {
-                assert!(
-                    e.to_string().contains(expected_err),
-                    "Can not find expected error '{}'. Actual error '{}'",
-                    expected_err,
-                    e
-                );
-            }
-        }
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/distinct_expressions.rs b/rust/datafusion/src/physical_plan/distinct_expressions.rs
deleted file mode 100644
index 8534e9c8805..00000000000
--- a/rust/datafusion/src/physical_plan/distinct_expressions.rs
+++ /dev/null
@@ -1,557 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implementations for DISTINCT expressions, e.g. `COUNT(DISTINCT c)`
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::fmt::Debug;
-use std::hash::Hash;
-use std::sync::Arc;
-
-use arrow::datatypes::{DataType, Field};
-
-use ahash::RandomState;
-use std::collections::HashSet;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::group_scalar::GroupByScalar;
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-
-#[derive(Debug, PartialEq, Eq, Hash, Clone)]
-struct DistinctScalarValues(Vec<GroupByScalar>);
-
-fn format_state_name(name: &str, state_name: &str) -> String {
-    format!("{}[{}]", name, state_name)
-}
-
-/// Expression for a COUNT(DISTINCT) aggregation.
-#[derive(Debug)]
-pub struct DistinctCount {
-    /// Column name
-    name: String,
-    /// The DataType for the final count
-    data_type: DataType,
-    /// The DataType for each input argument
-    input_data_types: Vec<DataType>,
-    /// The input arguments
-    exprs: Vec<Arc<dyn PhysicalExpr>>,
-}
-
-impl DistinctCount {
-    /// Create a new COUNT(DISTINCT) aggregate function.
-    pub fn new(
-        input_data_types: Vec<DataType>,
-        exprs: Vec<Arc<dyn PhysicalExpr>>,
-        name: String,
-        data_type: DataType,
-    ) -> Self {
-        Self {
-            input_data_types,
-            exprs,
-            name,
-            data_type,
-        }
-    }
-}
-
-impl AggregateExpr for DistinctCount {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, self.data_type.clone(), true))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(self
-            .input_data_types
-            .iter()
-            .map(|data_type| {
-                Field::new(
-                    &format_state_name(&self.name, "count distinct"),
-                    DataType::List(Box::new(Field::new("item", data_type.clone(), true))),
-                    false,
-                )
-            })
-            .collect::<Vec<_>>())
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.exprs.clone()
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(DistinctCountAccumulator {
-            values: HashSet::default(),
-            data_types: self.input_data_types.clone(),
-            count_data_type: self.data_type.clone(),
-        }))
-    }
-}
-
-#[derive(Debug)]
-struct DistinctCountAccumulator {
-    values: HashSet<DistinctScalarValues, RandomState>,
-    data_types: Vec<DataType>,
-    count_data_type: DataType,
-}
-
-impl Accumulator for DistinctCountAccumulator {
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        // If a row has a NULL, it is not included in the final count.
-        if !values.iter().any(|v| v.is_null()) {
-            self.values.insert(DistinctScalarValues(
-                values
-                    .iter()
-                    .map(GroupByScalar::try_from)
-                    .collect::<Result<Vec<_>>>()?,
-            ));
-        }
-
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-
-        let col_values = states
-            .iter()
-            .map(|state| match state {
-                ScalarValue::List(Some(values), _) => Ok(values),
-                _ => Err(DataFusionError::Internal(format!(
-                    "Unexpected accumulator state {:?}",
-                    state
-                ))),
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        (0..col_values[0].len()).try_for_each(|row_index| {
-            let row_values = col_values
-                .iter()
-                .map(|col| col[row_index].clone())
-                .collect::<Vec<_>>();
-            self.update(&row_values)
-        })
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        let mut cols_out = self
-            .data_types
-            .iter()
-            .map(|data_type| ScalarValue::List(Some(Vec::new()), data_type.clone()))
-            .collect::<Vec<_>>();
-
-        let mut cols_vec = cols_out
-            .iter_mut()
-            .map(|c| match c {
-                ScalarValue::List(Some(ref mut v), _) => v,
-                _ => unreachable!(),
-            })
-            .collect::<Vec<_>>();
-
-        self.values.iter().for_each(|distinct_values| {
-            distinct_values.0.iter().enumerate().for_each(
-                |(col_index, distinct_value)| {
-                    cols_vec[col_index].push(ScalarValue::from(distinct_value));
-                },
-            )
-        });
-
-        Ok(cols_out)
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        match &self.count_data_type {
-            DataType::UInt64 => Ok(ScalarValue::UInt64(Some(self.values.len() as u64))),
-            t => Err(DataFusionError::Internal(format!(
-                "Invalid data type {:?} for count distinct aggregation",
-                t
-            ))),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use arrow::array::ArrayRef;
-    use arrow::array::{
-        Int16Array, Int32Array, Int64Array, Int8Array, ListArray, UInt16Array,
-        UInt32Array, UInt64Array, UInt8Array,
-    };
-    use arrow::array::{Int32Builder, ListBuilder, UInt64Builder};
-    use arrow::datatypes::DataType;
-
-    macro_rules! build_list {
-        ($LISTS:expr, $BUILDER_TYPE:ident) => {{
-            let mut builder = ListBuilder::new($BUILDER_TYPE::new(0));
-            for list in $LISTS.iter() {
-                match list {
-                    Some(values) => {
-                        for value in values.iter() {
-                            match value {
-                                Some(v) => builder.values().append_value((*v).into())?,
-                                None => builder.values().append_null()?,
-                            }
-                        }
-
-                        builder.append(true)?;
-                    }
-                    None => {
-                        builder.append(false)?;
-                    }
-                }
-            }
-
-            let array = Arc::new(builder.finish()) as ArrayRef;
-
-            Ok(array) as Result<ArrayRef>
-        }};
-    }
-
-    macro_rules! state_to_vec {
-        ($LIST:expr, $DATA_TYPE:ident, $PRIM_TY:ty) => {{
-            match $LIST {
-                ScalarValue::List(_, data_type) => match data_type {
-                    DataType::$DATA_TYPE => (),
-                    _ => panic!("Unexpected DataType for list"),
-                },
-                _ => panic!("Expected a ScalarValue::List"),
-            }
-
-            match $LIST {
-                ScalarValue::List(None, _) => None,
-                ScalarValue::List(Some(scalar_values), _) => {
-                    let vec = scalar_values
-                        .iter()
-                        .map(|scalar_value| match scalar_value {
-                            ScalarValue::$DATA_TYPE(value) => *value,
-                            _ => panic!("Unexpected ScalarValue variant"),
-                        })
-                        .collect::<Vec<Option<$PRIM_TY>>>();
-
-                    Some(vec)
-                }
-                _ => unreachable!(),
-            }
-        }};
-    }
-
-    fn collect_states<T: Ord + Clone, S: Ord + Clone>(
-        state1: &[Option<T>],
-        state2: &[Option<S>],
-    ) -> Vec<(Option<T>, Option<S>)> {
-        let mut states = state1
-            .iter()
-            .zip(state2.iter())
-            .map(|(l, r)| (l.clone(), r.clone()))
-            .collect::<Vec<(Option<T>, Option<S>)>>();
-        states.sort();
-        states
-    }
-
-    fn run_update_batch(arrays: &[ArrayRef]) -> Result<(Vec<ScalarValue>, ScalarValue)> {
-        let agg = DistinctCount::new(
-            arrays
-                .iter()
-                .map(|a| a.data_type().clone())
-                .collect::<Vec<_>>(),
-            vec![],
-            String::from("__col_name__"),
-            DataType::UInt64,
-        );
-
-        let mut accum = agg.create_accumulator()?;
-        accum.update_batch(arrays)?;
-
-        Ok((accum.state()?, accum.evaluate()?))
-    }
-
-    fn run_update(
-        data_types: &[DataType],
-        rows: &[Vec<ScalarValue>],
-    ) -> Result<(Vec<ScalarValue>, ScalarValue)> {
-        let agg = DistinctCount::new(
-            data_types.to_vec(),
-            vec![],
-            String::from("__col_name__"),
-            DataType::UInt64,
-        );
-
-        let mut accum = agg.create_accumulator()?;
-
-        for row in rows.iter() {
-            accum.update(row)?
-        }
-
-        Ok((accum.state()?, accum.evaluate()?))
-    }
-
-    fn run_merge_batch(arrays: &[ArrayRef]) -> Result<(Vec<ScalarValue>, ScalarValue)> {
-        let agg = DistinctCount::new(
-            arrays
-                .iter()
-                .map(|a| a.as_any().downcast_ref::<ListArray>().unwrap())
-                .map(|a| a.values().data_type().clone())
-                .collect::<Vec<_>>(),
-            vec![],
-            String::from("__col_name__"),
-            DataType::UInt64,
-        );
-
-        let mut accum = agg.create_accumulator()?;
-        accum.merge_batch(arrays)?;
-
-        Ok((accum.state()?, accum.evaluate()?))
-    }
-
-    macro_rules! test_count_distinct_update_batch_numeric {
-        ($ARRAY_TYPE:ident, $DATA_TYPE:ident, $PRIM_TYPE:ty) => {{
-            let values: Vec<Option<$PRIM_TYPE>> = vec![
-                Some(1),
-                Some(1),
-                None,
-                Some(3),
-                Some(2),
-                None,
-                Some(2),
-                Some(3),
-                Some(1),
-            ];
-
-            let arrays = vec![Arc::new($ARRAY_TYPE::from(values)) as ArrayRef];
-
-            let (states, result) = run_update_batch(&arrays)?;
-
-            let mut state_vec =
-                state_to_vec!(&states[0], $DATA_TYPE, $PRIM_TYPE).unwrap();
-            state_vec.sort();
-
-            assert_eq!(states.len(), 1);
-            assert_eq!(state_vec, vec![Some(1), Some(2), Some(3)]);
-            assert_eq!(result, ScalarValue::UInt64(Some(3)));
-
-            Ok(())
-        }};
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i8() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int8Array, Int8, i8)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i16() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int16Array, Int16, i16)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i32() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int32Array, Int32, i32)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i64() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int64Array, Int64, i64)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u8() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt8Array, UInt8, u8)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u16() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt16Array, UInt16, u16)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u32() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt32Array, UInt32, u32)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u64() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt64Array, UInt64, u64)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_all_nulls() -> Result<()> {
-        let arrays = vec![Arc::new(Int32Array::from(
-            vec![None, None, None, None] as Vec<Option<i32>>
-        )) as ArrayRef];
-
-        let (states, result) = run_update_batch(&arrays)?;
-
-        assert_eq!(states.len(), 1);
-        assert_eq!(state_to_vec!(&states[0], Int32, i32), Some(vec![]));
-        assert_eq!(result, ScalarValue::UInt64(Some(0)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update_batch_empty() -> Result<()> {
-        let arrays =
-            vec![Arc::new(Int32Array::from(vec![] as Vec<Option<i32>>)) as ArrayRef];
-
-        let (states, result) = run_update_batch(&arrays)?;
-
-        assert_eq!(states.len(), 1);
-        assert_eq!(state_to_vec!(&states[0], Int32, i32), Some(vec![]));
-        assert_eq!(result, ScalarValue::UInt64(Some(0)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update_batch_multiple_columns() -> Result<()> {
-        let array_int8: ArrayRef = Arc::new(Int8Array::from(vec![1, 1, 2]));
-        let array_int16: ArrayRef = Arc::new(Int16Array::from(vec![3, 3, 4]));
-        let arrays = vec![array_int8, array_int16];
-
-        let (states, result) = run_update_batch(&arrays)?;
-
-        let state_vec1 = state_to_vec!(&states[0], Int8, i8).unwrap();
-        let state_vec2 = state_to_vec!(&states[1], Int16, i16).unwrap();
-        let state_pairs = collect_states::<i8, i16>(&state_vec1, &state_vec2);
-
-        assert_eq!(states.len(), 2);
-        assert_eq!(
-            state_pairs,
-            vec![(Some(1_i8), Some(3_i16)), (Some(2_i8), Some(4_i16))]
-        );
-
-        assert_eq!(result, ScalarValue::UInt64(Some(2)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update() -> Result<()> {
-        let (states, result) = run_update(
-            &[DataType::Int32, DataType::UInt64],
-            &[
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(5)), ScalarValue::UInt64(Some(1))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(5)), ScalarValue::UInt64(Some(1))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(6))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(7))],
-                vec![ScalarValue::Int32(Some(2)), ScalarValue::UInt64(Some(7))],
-            ],
-        )?;
-
-        let state_vec1 = state_to_vec!(&states[0], Int32, i32).unwrap();
-        let state_vec2 = state_to_vec!(&states[1], UInt64, u64).unwrap();
-        let state_pairs = collect_states::<i32, u64>(&state_vec1, &state_vec2);
-
-        assert_eq!(states.len(), 2);
-        assert_eq!(
-            state_pairs,
-            vec![
-                (Some(-1_i32), Some(5_u64)),
-                (Some(-1_i32), Some(6_u64)),
-                (Some(-1_i32), Some(7_u64)),
-                (Some(2_i32), Some(7_u64)),
-                (Some(5_i32), Some(1_u64)),
-            ]
-        );
-        assert_eq!(result, ScalarValue::UInt64(Some(5)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update_with_nulls() -> Result<()> {
-        let (states, result) = run_update(
-            &[DataType::Int32, DataType::UInt64],
-            &[
-                // None of these updates contains a None, so these are accumulated.
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(-2)), ScalarValue::UInt64(Some(5))],
-                // Each of these updates contains at least one None, so these
-                // won't be accumulated.
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(None)],
-                vec![ScalarValue::Int32(None), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(None), ScalarValue::UInt64(None)],
-            ],
-        )?;
-
-        let state_vec1 = state_to_vec!(&states[0], Int32, i32).unwrap();
-        let state_vec2 = state_to_vec!(&states[1], UInt64, u64).unwrap();
-        let state_pairs = collect_states::<i32, u64>(&state_vec1, &state_vec2);
-
-        assert_eq!(states.len(), 2);
-        assert_eq!(
-            state_pairs,
-            vec![(Some(-2_i32), Some(5_u64)), (Some(-1_i32), Some(5_u64))]
-        );
-
-        assert_eq!(result, ScalarValue::UInt64(Some(2)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_merge_batch() -> Result<()> {
-        let state_in1 = build_list!(
-            vec![
-                Some(vec![Some(-1_i32), Some(-1_i32), Some(-2_i32), Some(-2_i32)]),
-                Some(vec![Some(-2_i32), Some(-3_i32)]),
-            ],
-            Int32Builder
-        )?;
-
-        let state_in2 = build_list!(
-            vec![
-                Some(vec![Some(5_u64), Some(6_u64), Some(5_u64), Some(7_u64)]),
-                Some(vec![Some(5_u64), Some(7_u64)]),
-            ],
-            UInt64Builder
-        )?;
-
-        let (states, result) = run_merge_batch(&[state_in1, state_in2])?;
-
-        let state_out_vec1 = state_to_vec!(&states[0], Int32, i32).unwrap();
-        let state_out_vec2 = state_to_vec!(&states[1], UInt64, u64).unwrap();
-        let state_pairs = collect_states::<i32, u64>(&state_out_vec1, &state_out_vec2);
-
-        assert_eq!(
-            state_pairs,
-            vec![
-                (Some(-3_i32), Some(7_u64)),
-                (Some(-2_i32), Some(5_u64)),
-                (Some(-2_i32), Some(7_u64)),
-                (Some(-1_i32), Some(5_u64)),
-                (Some(-1_i32), Some(6_u64)),
-            ]
-        );
-
-        assert_eq!(result, ScalarValue::UInt64(Some(5)));
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/empty.rs b/rust/datafusion/src/physical_plan/empty.rs
deleted file mode 100644
index 3011b289507..00000000000
--- a/rust/datafusion/src/physical_plan/empty.rs
+++ /dev/null
@@ -1,186 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! EmptyRelation execution plan
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::memory::MemoryStream;
-use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning};
-use arrow::array::NullArray;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-
-use super::SendableRecordBatchStream;
-
-use async_trait::async_trait;
-
-/// Execution plan for empty relation (produces no rows)
-#[derive(Debug)]
-pub struct EmptyExec {
-    /// Specifies whether this exec produces a row or not
-    produce_one_row: bool,
-    /// The schema for the produced row
-    schema: SchemaRef,
-}
-
-impl EmptyExec {
-    /// Create a new EmptyExec
-    pub fn new(produce_one_row: bool, schema: SchemaRef) -> Self {
-        EmptyExec {
-            produce_one_row,
-            schema,
-        }
-    }
-
-    /// Specifies whether this exec produces a row or not
-    pub fn produce_one_row(&self) -> bool {
-        self.produce_one_row
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for EmptyExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            0 => Ok(Arc::new(EmptyExec::new(false, self.schema.clone()))),
-            _ => Err(DataFusionError::Internal(
-                "EmptyExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // GlobalLimitExec has a single output partition
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "EmptyExec invalid partition {} (expected 0)",
-                partition
-            )));
-        }
-
-        // Makes a stream only contains one null element if needed
-        let data = if self.produce_one_row {
-            vec![RecordBatch::try_new(
-                Arc::new(Schema::new(vec![Field::new(
-                    "placeholder",
-                    DataType::Null,
-                    true,
-                )])),
-                vec![Arc::new(NullArray::new(1))],
-            )?]
-        } else {
-            vec![]
-        };
-
-        Ok(Box::pin(MemoryStream::try_new(
-            data,
-            self.schema.clone(),
-            None,
-        )?))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::common;
-    use crate::test;
-
-    #[tokio::test]
-    async fn empty() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let empty = EmptyExec::new(false, schema.clone());
-        assert_eq!(empty.schema(), schema);
-
-        // we should have no results
-        let iter = empty.execute(0).await?;
-        let batches = common::collect(iter).await?;
-        assert!(batches.is_empty());
-
-        Ok(())
-    }
-
-    #[test]
-    fn with_new_children() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let empty = EmptyExec::new(false, schema);
-
-        let empty2 = empty.with_new_children(vec![])?;
-        assert_eq!(empty.schema(), empty2.schema());
-
-        let too_many_kids = vec![empty2];
-        assert!(
-            empty.with_new_children(too_many_kids).is_err(),
-            "expected error when providing list of kids"
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn invalid_execute() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let empty = EmptyExec::new(false, schema);
-
-        // ask for the wrong partition
-        assert!(empty.execute(1).await.is_err());
-        assert!(empty.execute(20).await.is_err());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn produce_one_row() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let empty = EmptyExec::new(true, schema);
-
-        let iter = empty.execute(0).await?;
-        let batches = common::collect(iter).await?;
-
-        // should have one item
-        assert_eq!(batches.len(), 1);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/explain.rs b/rust/datafusion/src/physical_plan/explain.rs
deleted file mode 100644
index 26d2c94dc80..00000000000
--- a/rust/datafusion/src/physical_plan/explain.rs
+++ /dev/null
@@ -1,125 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the EXPLAIN operator
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::{
-    logical_plan::StringifiedPlan,
-    physical_plan::{common::SizedRecordBatchStream, ExecutionPlan},
-};
-use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
-
-use crate::physical_plan::Partitioning;
-
-use super::SendableRecordBatchStream;
-use async_trait::async_trait;
-
-/// Explain execution plan operator. This operator contains the string
-/// values of the various plans it has when it is created, and passes
-/// them to its output.
-#[derive(Debug, Clone)]
-pub struct ExplainExec {
-    /// The schema that this exec plan node outputs
-    schema: SchemaRef,
-    /// The strings to be printed
-    stringified_plans: Vec<StringifiedPlan>,
-}
-
-impl ExplainExec {
-    /// Create a new ExplainExec
-    pub fn new(schema: SchemaRef, stringified_plans: Vec<StringifiedPlan>) -> Self {
-        ExplainExec {
-            schema,
-            stringified_plans,
-        }
-    }
-
-    /// The strings to be printed
-    pub fn stringified_plans(&self) -> &[StringifiedPlan] {
-        &self.stringified_plans
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for ExplainExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "ExplainExec invalid partition {}",
-                partition
-            )));
-        }
-
-        let mut type_builder = StringBuilder::new(self.stringified_plans.len());
-        let mut plan_builder = StringBuilder::new(self.stringified_plans.len());
-
-        for p in &self.stringified_plans {
-            type_builder.append_value(&String::from(&p.plan_type))?;
-            plan_builder.append_value(&*p.plan)?;
-        }
-
-        let record_batch = RecordBatch::try_new(
-            self.schema.clone(),
-            vec![
-                Arc::new(type_builder.finish()),
-                Arc::new(plan_builder.finish()),
-            ],
-        )?;
-
-        Ok(Box::pin(SizedRecordBatchStream::new(
-            self.schema.clone(),
-            vec![Arc::new(record_batch)],
-        )))
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/average.rs b/rust/datafusion/src/physical_plan/expressions/average.rs
deleted file mode 100644
index 38644129dcd..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/average.rs
+++ /dev/null
@@ -1,293 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{ArrayRef, UInt64Array},
-    datatypes::Field,
-};
-
-use super::{format_state_name, sum};
-
-/// AVG aggregate expression
-#[derive(Debug)]
-pub struct Avg {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-/// function return type of an average
-pub fn avg_return_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::Int64
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64
-        | DataType::Float32
-        | DataType::Float64 => Ok(DataType::Float64),
-        other => Err(DataFusionError::Plan(format!(
-            "AVG does not support {:?}",
-            other
-        ))),
-    }
-}
-
-impl Avg {
-    /// Create a new AVG aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Avg {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, DataType::Float64, true))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![
-            Field::new(
-                &format_state_name(&self.name, "count"),
-                DataType::UInt64,
-                true,
-            ),
-            Field::new(
-                &format_state_name(&self.name, "sum"),
-                DataType::Float64,
-                true,
-            ),
-        ])
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(AvgAccumulator::try_new(
-            // avg is f64
-            &DataType::Float64,
-        )?))
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-}
-
-/// An accumulator to compute the average
-#[derive(Debug)]
-pub struct AvgAccumulator {
-    // sum is used for null
-    sum: ScalarValue,
-    count: u64,
-}
-
-impl AvgAccumulator {
-    /// Creates a new `AvgAccumulator`
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            sum: ScalarValue::try_from(datatype)?,
-            count: 0,
-        })
-    }
-}
-
-impl Accumulator for AvgAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![ScalarValue::from(self.count), self.sum.clone()])
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let values = &values[0];
-
-        self.count += (!values.is_null()) as u64;
-        self.sum = sum::sum(&self.sum, values)?;
-
-        Ok(())
-    }
-
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-
-        self.count += (values.len() - values.data().null_count()) as u64;
-        self.sum = sum::sum(&self.sum, &sum::sum_batch(values)?)?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        let count = &states[0];
-        // counts are summed
-        if let ScalarValue::UInt64(Some(c)) = count {
-            self.count += c
-        } else {
-            unreachable!()
-        };
-
-        // sums are summed
-        self.sum = sum::sum(&self.sum, &states[1])?;
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        let counts = states[0].as_any().downcast_ref::<UInt64Array>().unwrap();
-        // counts are summed
-        self.count += compute::sum(counts).unwrap_or(0);
-
-        // sums are summed
-        self.sum = sum::sum(&self.sum, &sum::sum_batch(&states[1])?)?;
-        Ok(())
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        match self.sum {
-            ScalarValue::Float64(e) => {
-                Ok(ScalarValue::Float64(e.map(|f| f / self.count as f64)))
-            }
-            _ => Err(DataFusionError::Internal(
-                "Sum should be f64 on average".to_string(),
-            )),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::{error::Result, generic_test_op};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, datatypes::*};
-
-    #[test]
-    fn avg_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Avg,
-            ScalarValue::from(3_f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Avg,
-            ScalarValue::from(3.25f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Avg,
-            ScalarValue::Float64(None),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Avg,
-            ScalarValue::from(3.0f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Avg,
-            ScalarValue::from(3_f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Avg,
-            ScalarValue::from(3_f64),
-            DataType::Float64
-        )
-    }
-
-    fn aggregate(
-        batch: &RecordBatch,
-        agg: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum = agg.create_accumulator()?;
-        let expr = agg.expressions();
-        let values = expr
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-            .collect::<Result<Vec<_>>>()?;
-        accum.update_batch(&values)?;
-        accum.evaluate()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/binary.rs b/rust/datafusion/src/physical_plan/expressions/binary.rs
deleted file mode 100644
index 5c2d9ce02f5..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/binary.rs
+++ /dev/null
@@ -1,1101 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{any::Any, sync::Arc};
-
-use arrow::array::*;
-use arrow::compute::kernels::arithmetic::{
-    add, divide, divide_scalar, multiply, subtract,
-};
-use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
-use arrow::compute::kernels::comparison::{eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow::compute::kernels::comparison::{
-    eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
-};
-use arrow::compute::kernels::comparison::{
-    eq_utf8, gt_eq_utf8, gt_utf8, like_utf8, like_utf8_scalar, lt_eq_utf8, lt_utf8,
-    neq_utf8, nlike_utf8, nlike_utf8_scalar,
-};
-use arrow::compute::kernels::comparison::{
-    eq_utf8_scalar, gt_eq_utf8_scalar, gt_utf8_scalar, lt_eq_utf8_scalar, lt_utf8_scalar,
-    neq_utf8_scalar,
-};
-use arrow::datatypes::{DataType, Schema, TimeUnit};
-use arrow::record_batch::RecordBatch;
-
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Operator;
-use crate::physical_plan::expressions::try_cast;
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::scalar::ScalarValue;
-
-use super::coercion::{eq_coercion, numerical_coercion, order_coercion, string_coercion};
-
-/// Binary expression
-#[derive(Debug)]
-pub struct BinaryExpr {
-    left: Arc<dyn PhysicalExpr>,
-    op: Operator,
-    right: Arc<dyn PhysicalExpr>,
-}
-
-impl BinaryExpr {
-    /// Create new binary expression
-    pub fn new(
-        left: Arc<dyn PhysicalExpr>,
-        op: Operator,
-        right: Arc<dyn PhysicalExpr>,
-    ) -> Self {
-        Self { left, op, right }
-    }
-
-    /// Get the left side of the binary expression
-    pub fn left(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.left
-    }
-
-    /// Get the right side of the binary expression
-    pub fn right(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.right
-    }
-
-    /// Get the operator for this binary expression
-    pub fn op(&self) -> &Operator {
-        &self.op
-    }
-}
-
-impl std::fmt::Display for BinaryExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} {} {}", self.left, self.op, self.right)
-    }
-}
-
-/// Invoke a compute kernel on a pair of binary data arrays
-macro_rules! compute_utf8_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new(paste::expr! {[<$OP _utf8>]}(&ll, &rr)?))
-    }};
-}
-
-/// Invoke a compute kernel on a data array and a scalar value
-macro_rules! compute_utf8_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        if let ScalarValue::Utf8(Some(string_value)) = $RIGHT {
-            Ok(Arc::new(paste::expr! {[<$OP _utf8_scalar>]}(
-                &ll,
-                &string_value,
-            )?))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "compute_utf8_op_scalar failed to cast literal value {}",
-                $RIGHT
-            )))
-        }
-    }};
-}
-
-/// Invoke a compute kernel on a data array and a scalar value
-macro_rules! compute_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        use std::convert::TryInto;
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        // generate the scalar function name, such as lt_scalar, from the $OP parameter
-        // (which could have a value of lt) and the suffix _scalar
-        Ok(Arc::new(paste::expr! {[<$OP _scalar>]}(
-            &ll,
-            $RIGHT.try_into()?,
-        )?))
-    }};
-}
-
-/// Invoke a compute kernel on array(s)
-macro_rules! compute_op {
-    // invoke binary operator
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?))
-    }};
-    // invoke unary operator
-    ($OPERAND:expr, $OP:ident, $DT:ident) => {{
-        let operand = $OPERAND
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&operand)?))
-    }};
-}
-
-macro_rules! binary_string_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on string array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
-macro_rules! binary_string_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on string arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a compute kernel on a pair of arrays
-/// The binary_primitive_array_op macro only evaluates for primitive types
-/// like integers and floats.
-macro_rules! binary_primitive_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on primitive arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a compute kernel on an array and a scalar
-/// The binary_primitive_array_op_scalar macro only evaluates for primitive
-/// types like integers and floats.
-macro_rules! binary_primitive_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Int8 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on primitive array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
-/// The binary_array_op_scalar macro includes types that extend beyond the primitive,
-/// such as Utf8 strings.
-#[macro_export]
-macro_rules! binary_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Int8 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float64Array),
-            DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray),
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                compute_op_scalar!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
-            }
-            DataType::Date32 => {
-                compute_op_scalar!($LEFT, $RIGHT, $OP, Date32Array)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on dyn array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
-/// The binary_array_op macro includes types that extend beyond the primitive,
-/// such as Utf8 strings.
-#[macro_export]
-macro_rules! binary_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op!($LEFT, $RIGHT, $OP, Float64Array),
-            DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                compute_op!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
-            }
-            DataType::Date32 => {
-                compute_op!($LEFT, $RIGHT, $OP, Date32Array)
-            }
-            DataType::Date64 => {
-                compute_op!($LEFT, $RIGHT, $OP, Date64Array)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on dyn arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a boolean kernel on a pair of arrays
-macro_rules! boolean_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("boolean_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("boolean_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?))
-    }};
-}
-
-/// Coercion rules for all binary operators. Returns the output type
-/// of applying `op` to an argument of `lhs_type` and `rhs_type`.
-fn common_binary_type(
-    lhs_type: &DataType,
-    op: &Operator,
-    rhs_type: &DataType,
-) -> Result<DataType> {
-    // This result MUST be compatible with `binary_coerce`
-    let result = match op {
-        Operator::And | Operator::Or => match (lhs_type, rhs_type) {
-            // logical binary boolean operators can only be evaluated in bools
-            (DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
-            _ => None,
-        },
-        // logical equality operators have their own rules, and always return a boolean
-        Operator::Eq | Operator::NotEq => eq_coercion(lhs_type, rhs_type),
-        // "like" operators operate on strings and always return a boolean
-        Operator::Like | Operator::NotLike => string_coercion(lhs_type, rhs_type),
-        // order-comparison operators have their own rules
-        Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
-            order_coercion(lhs_type, rhs_type)
-        }
-        // for math expressions, the final value of the coercion is also the return type
-        // because coercion favours higher information types
-        Operator::Plus | Operator::Minus | Operator::Divide | Operator::Multiply => {
-            numerical_coercion(lhs_type, rhs_type)
-        }
-        Operator::Modulus => {
-            return Err(DataFusionError::NotImplemented(
-                "Modulus operator is still not supported".to_string(),
-            ))
-        }
-    };
-
-    // re-write the error message of failed coercions to include the operator's information
-    match result {
-        None => Err(DataFusionError::Plan(
-            format!(
-                "'{:?} {} {:?}' can't be evaluated because there isn't a common type to coerce the types to",
-                lhs_type, op, rhs_type
-            ),
-        )),
-        Some(t) => Ok(t)
-    }
-}
-
-/// Returns the return type of a binary operator or an error when the binary operator cannot
-/// perform the computation between the argument's types, even after type coercion.
-///
-/// This function makes some assumptions about the underlying available computations.
-pub fn binary_operator_data_type(
-    lhs_type: &DataType,
-    op: &Operator,
-    rhs_type: &DataType,
-) -> Result<DataType> {
-    // validate that it is possible to perform the operation on incoming types.
-    // (or the return datatype cannot be infered)
-    let common_type = common_binary_type(lhs_type, op, rhs_type)?;
-
-    match op {
-        // operators that return a boolean
-        Operator::Eq
-        | Operator::NotEq
-        | Operator::And
-        | Operator::Or
-        | Operator::Like
-        | Operator::NotLike
-        | Operator::Lt
-        | Operator::Gt
-        | Operator::GtEq
-        | Operator::LtEq => Ok(DataType::Boolean),
-        // math operations return the same value as the common coerced type
-        Operator::Plus | Operator::Minus | Operator::Divide | Operator::Multiply => {
-            Ok(common_type)
-        }
-        Operator::Modulus => Err(DataFusionError::NotImplemented(
-            "Modulus operator is still not supported".to_string(),
-        )),
-    }
-}
-
-impl PhysicalExpr for BinaryExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        binary_operator_data_type(
-            &self.left.data_type(input_schema)?,
-            &self.op,
-            &self.right.data_type(input_schema)?,
-        )
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        Ok(self.left.nullable(input_schema)? || self.right.nullable(input_schema)?)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let left_value = self.left.evaluate(batch)?;
-        let right_value = self.right.evaluate(batch)?;
-        let left_data_type = left_value.data_type();
-        let right_data_type = right_value.data_type();
-
-        if left_data_type != right_data_type {
-            return Err(DataFusionError::Internal(format!(
-                "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                self.op, left_data_type, right_data_type
-            )));
-        }
-
-        let scalar_result = match (&left_value, &right_value) {
-            (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) => {
-                // if left is array and right is literal - use scalar operations
-                match &self.op {
-                    Operator::Lt => binary_array_op_scalar!(array, scalar.clone(), lt),
-                    Operator::LtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), lt_eq)
-                    }
-                    Operator::Gt => binary_array_op_scalar!(array, scalar.clone(), gt),
-                    Operator::GtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), gt_eq)
-                    }
-                    Operator::Eq => binary_array_op_scalar!(array, scalar.clone(), eq),
-                    Operator::NotEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), neq)
-                    }
-                    Operator::Like => {
-                        binary_string_array_op_scalar!(array, scalar.clone(), like)
-                    }
-                    Operator::NotLike => {
-                        binary_string_array_op_scalar!(array, scalar.clone(), nlike)
-                    }
-                    Operator::Divide => {
-                        binary_primitive_array_op_scalar!(array, scalar.clone(), divide)
-                    }
-                    // if scalar operation is not supported - fallback to array implementation
-                    _ => None,
-                }
-            }
-            (ColumnarValue::Scalar(scalar), ColumnarValue::Array(array)) => {
-                // if right is literal and left is array - reverse operator and parameters
-                match &self.op {
-                    Operator::Lt => binary_array_op_scalar!(array, scalar.clone(), gt),
-                    Operator::LtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), gt_eq)
-                    }
-                    Operator::Gt => binary_array_op_scalar!(array, scalar.clone(), lt),
-                    Operator::GtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), lt_eq)
-                    }
-                    Operator::Eq => binary_array_op_scalar!(array, scalar.clone(), eq),
-                    Operator::NotEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), neq)
-                    }
-                    // if scalar operation is not supported - fallback to array implementation
-                    _ => None,
-                }
-            }
-            (_, _) => None,
-        };
-
-        if let Some(result) = scalar_result {
-            return result.map(|a| ColumnarValue::Array(a));
-        }
-
-        // if both arrays or both literals - extract arrays and continue execution
-        let (left, right) = (
-            left_value.into_array(batch.num_rows()),
-            right_value.into_array(batch.num_rows()),
-        );
-
-        let result: Result<ArrayRef> = match &self.op {
-            Operator::Like => binary_string_array_op!(left, right, like),
-            Operator::NotLike => binary_string_array_op!(left, right, nlike),
-            Operator::Lt => binary_array_op!(left, right, lt),
-            Operator::LtEq => binary_array_op!(left, right, lt_eq),
-            Operator::Gt => binary_array_op!(left, right, gt),
-            Operator::GtEq => binary_array_op!(left, right, gt_eq),
-            Operator::Eq => binary_array_op!(left, right, eq),
-            Operator::NotEq => binary_array_op!(left, right, neq),
-            Operator::Plus => binary_primitive_array_op!(left, right, add),
-            Operator::Minus => binary_primitive_array_op!(left, right, subtract),
-            Operator::Multiply => binary_primitive_array_op!(left, right, multiply),
-            Operator::Divide => binary_primitive_array_op!(left, right, divide),
-            Operator::And => {
-                if left_data_type == DataType::Boolean {
-                    boolean_op!(left, right, and_kleene)
-                } else {
-                    return Err(DataFusionError::Internal(format!(
-                        "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                        self.op,
-                        left.data_type(),
-                        right.data_type()
-                    )));
-                }
-            }
-            Operator::Or => {
-                if left_data_type == DataType::Boolean {
-                    boolean_op!(left, right, or_kleene)
-                } else {
-                    return Err(DataFusionError::Internal(format!(
-                        "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                        self.op, left_data_type, right_data_type
-                    )));
-                }
-            }
-            Operator::Modulus => Err(DataFusionError::NotImplemented(
-                "Modulus operator is still not supported".to_string(),
-            )),
-        };
-        result.map(|a| ColumnarValue::Array(a))
-    }
-}
-
-/// return two physical expressions that are optionally coerced to a
-/// common type that the binary operator supports.
-fn binary_cast(
-    lhs: Arc<dyn PhysicalExpr>,
-    op: &Operator,
-    rhs: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)> {
-    let lhs_type = &lhs.data_type(input_schema)?;
-    let rhs_type = &rhs.data_type(input_schema)?;
-
-    let cast_type = common_binary_type(lhs_type, op, rhs_type)?;
-
-    Ok((
-        try_cast(lhs, input_schema, cast_type.clone())?,
-        try_cast(rhs, input_schema, cast_type)?,
-    ))
-}
-
-/// Create a binary expression whose arguments are correctly coerced.
-/// This function errors if it is not possible to coerce the arguments
-/// to computational types supported by the operator.
-pub fn binary(
-    lhs: Arc<dyn PhysicalExpr>,
-    op: Operator,
-    rhs: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let (l, r) = binary_cast(lhs, &op, rhs, input_schema)?;
-    Ok(Arc::new(BinaryExpr::new(l, op, r)))
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::{ArrowNumericType, Field, Int32Type, SchemaRef};
-    use arrow::util::display::array_value_to_string;
-
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-
-    // Create a binary expression without coercion. Used here when we do not want to coerce the expressions
-    // to valid types. Usage can result in an execution (after plan) error.
-    fn binary_simple(
-        l: Arc<dyn PhysicalExpr>,
-        op: Operator,
-        r: Arc<dyn PhysicalExpr>,
-    ) -> Arc<dyn PhysicalExpr> {
-        Arc::new(BinaryExpr::new(l, op, r))
-    }
-
-    #[test]
-    fn binary_comparison() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 4, 8, 16]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?;
-
-        // expression: "a < b"
-        let lt = binary_simple(col("a"), Operator::Lt, col("b"));
-        let result = lt.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.len(), 5);
-
-        let expected = vec![false, false, true, true, true];
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-        for (i, &expected_item) in expected.iter().enumerate().take(5) {
-            assert_eq!(result.value(i), expected_item);
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn binary_nested() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let a = Int32Array::from(vec![2, 4, 6, 8, 10]);
-        let b = Int32Array::from(vec![2, 5, 4, 8, 8]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?;
-
-        // expression: "a < b OR a == b"
-        let expr = binary_simple(
-            binary_simple(col("a"), Operator::Lt, col("b")),
-            Operator::Or,
-            binary_simple(col("a"), Operator::Eq, col("b")),
-        );
-        assert_eq!("a < b OR a = b", format!("{}", expr));
-
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.len(), 5);
-
-        let expected = vec![true, true, false, true, false];
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-        for (i, &expected_item) in expected.iter().enumerate().take(5) {
-            assert_eq!(result.value(i), expected_item);
-        }
-
-        Ok(())
-    }
-
-    // runs an end-to-end test of physical type coercion:
-    // 1. construct a record batch with two columns of type A and B
-    //  (*_ARRAY is the Rust Arrow array type, and *_TYPE is the DataType of the elements)
-    // 2. construct a physical expression of A OP B
-    // 3. evaluate the expression
-    // 4. verify that the resulting expression is of type C
-    // 5. verify that the results of evaluation are $VEC
-    macro_rules! test_coercion {
-        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $B_ARRAY:ident, $B_TYPE:expr, $B_VEC:expr, $OP:expr, $C_ARRAY:ident, $C_TYPE:expr, $VEC:expr) => {{
-            let schema = Schema::new(vec![
-                Field::new("a", $A_TYPE, false),
-                Field::new("b", $B_TYPE, false),
-            ]);
-            let a = $A_ARRAY::from($A_VEC);
-            let b = $B_ARRAY::from($B_VEC);
-            let batch = RecordBatch::try_new(
-                Arc::new(schema.clone()),
-                vec![Arc::new(a), Arc::new(b)],
-            )?;
-
-            // verify that we can construct the expression
-            let expression = binary(col("a"), $OP, col("b"), &schema)?;
-
-            // verify that the expression's type is correct
-            assert_eq!(expression.data_type(&schema)?, $C_TYPE);
-
-            // compute
-            let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-
-            // verify that the array's data_type is correct
-            assert_eq!(*result.data_type(), $C_TYPE);
-
-            // verify that the data itself is downcastable
-            let result = result
-                .as_any()
-                .downcast_ref::<$C_ARRAY>()
-                .expect("failed to downcast");
-            // verify that the result itself is correct
-            for (i, x) in $VEC.iter().enumerate() {
-                assert_eq!(result.value(i), *x);
-            }
-        }};
-    }
-
-    #[test]
-    fn test_type_coersion() -> Result<()> {
-        test_coercion!(
-            Int32Array,
-            DataType::Int32,
-            vec![1i32, 2i32],
-            UInt32Array,
-            DataType::UInt32,
-            vec![1u32, 2u32],
-            Operator::Plus,
-            Int32Array,
-            DataType::Int32,
-            vec![2i32, 4i32]
-        );
-        test_coercion!(
-            Int32Array,
-            DataType::Int32,
-            vec![1i32],
-            UInt16Array,
-            DataType::UInt16,
-            vec![1u16],
-            Operator::Plus,
-            Int32Array,
-            DataType::Int32,
-            vec![2i32]
-        );
-        test_coercion!(
-            Float32Array,
-            DataType::Float32,
-            vec![1f32],
-            UInt16Array,
-            DataType::UInt16,
-            vec![1u16],
-            Operator::Plus,
-            Float32Array,
-            DataType::Float32,
-            vec![2f32]
-        );
-        test_coercion!(
-            Float32Array,
-            DataType::Float32,
-            vec![2f32],
-            UInt16Array,
-            DataType::UInt16,
-            vec![1u16],
-            Operator::Multiply,
-            Float32Array,
-            DataType::Float32,
-            vec![2f32]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["hello world", "world"],
-            StringArray,
-            DataType::Utf8,
-            vec!["%hello%", "%hello%"],
-            Operator::Like,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, false]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13", "1995-01-26"],
-            Date32Array,
-            DataType::Date32,
-            vec![9112, 9156],
-            Operator::Eq,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, true]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13", "1995-01-26"],
-            Date32Array,
-            DataType::Date32,
-            vec![9113, 9154],
-            Operator::Lt,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, false]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13T12:34:56", "1995-01-26T01:23:45"],
-            Date64Array,
-            DataType::Date64,
-            vec![787322096000, 791083425000],
-            Operator::Eq,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, true]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13T12:34:56", "1995-01-26T01:23:45"],
-            Date64Array,
-            DataType::Date64,
-            vec![787322096001, 791083424999],
-            Operator::Lt,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, false]
-        );
-        Ok(())
-    }
-
-    // Note it would be nice to use the same test_coercion macro as
-    // above, but sadly the type of the values of the dictionary are
-    // not encoded in the rust type of the DictionaryArray. Thus there
-    // is no way at the time of this writing to create a dictionary
-    // array using the `From` trait
-    #[test]
-    fn test_dictionary_type_to_array_coersion() -> Result<()> {
-        // Test string  a string dictionary
-        let dict_type =
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-        let string_type = DataType::Utf8;
-
-        // build dictionary
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = arrow::array::StringBuilder::new(10);
-        let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        dict_builder.append("one")?;
-        dict_builder.append_null()?;
-        dict_builder.append("three")?;
-        dict_builder.append("four")?;
-        let dict_array = dict_builder.finish();
-
-        let str_array =
-            StringArray::from(vec![Some("not one"), Some("two"), None, Some("four")]);
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("dict", dict_type, true),
-            Field::new("str", string_type, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(dict_array), Arc::new(str_array)],
-        )?;
-
-        let expected = "false\n\n\ntrue";
-
-        // Test 1: dict = str
-
-        // verify that we can construct the expression
-        let expression = binary(col("dict"), Operator::Eq, col("str"), &schema)?;
-        assert_eq!(expression.data_type(&schema)?, DataType::Boolean);
-
-        // evaluate and verify the result type matched
-        let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.data_type(), &DataType::Boolean);
-
-        // verify that the result itself is correct
-        assert_eq!(expected, array_to_string(&result)?);
-
-        // Test 2: now test the other direction
-        // str = dict
-
-        // verify that we can construct the expression
-        let expression = binary(col("str"), Operator::Eq, col("dict"), &schema)?;
-        assert_eq!(expression.data_type(&schema)?, DataType::Boolean);
-
-        // evaluate and verify the result type matched
-        let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.data_type(), &DataType::Boolean);
-
-        // verify that the result itself is correct
-        assert_eq!(expected, array_to_string(&result)?);
-
-        Ok(())
-    }
-
-    // Convert the array to a newline delimited string of pretty printed values
-    fn array_to_string(array: &ArrayRef) -> Result<String> {
-        let s = (0..array.len())
-            .map(|i| array_value_to_string(array, i))
-            .collect::<std::result::Result<Vec<_>, arrow::error::ArrowError>>()?
-            .join("\n");
-        Ok(s)
-    }
-
-    #[test]
-    fn plus_op() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 4, 8, 16]);
-
-        apply_arithmetic::<Int32Type>(
-            Arc::new(schema),
-            vec![Arc::new(a), Arc::new(b)],
-            Operator::Plus,
-            Int32Array::from(vec![2, 4, 7, 12, 21]),
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn minus_op() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]));
-        let a = Arc::new(Int32Array::from(vec![1, 2, 4, 8, 16]));
-        let b = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-
-        apply_arithmetic::<Int32Type>(
-            schema.clone(),
-            vec![a.clone(), b.clone()],
-            Operator::Minus,
-            Int32Array::from(vec![0, 0, 1, 4, 11]),
-        )?;
-
-        // should handle have negative values in result (for signed)
-        apply_arithmetic::<Int32Type>(
-            schema,
-            vec![b, a],
-            Operator::Minus,
-            Int32Array::from(vec![0, 0, -1, -4, -11]),
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn multiply_op() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]));
-        let a = Arc::new(Int32Array::from(vec![4, 8, 16, 32, 64]));
-        let b = Arc::new(Int32Array::from(vec![2, 4, 8, 16, 32]));
-
-        apply_arithmetic::<Int32Type>(
-            schema,
-            vec![a, b],
-            Operator::Multiply,
-            Int32Array::from(vec![8, 32, 128, 512, 2048]),
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn divide_op() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]));
-        let a = Arc::new(Int32Array::from(vec![8, 32, 128, 512, 2048]));
-        let b = Arc::new(Int32Array::from(vec![2, 4, 8, 16, 32]));
-
-        apply_arithmetic::<Int32Type>(
-            schema,
-            vec![a, b],
-            Operator::Divide,
-            Int32Array::from(vec![4, 8, 16, 32, 64]),
-        )?;
-
-        Ok(())
-    }
-
-    fn apply_arithmetic<T: ArrowNumericType>(
-        schema: SchemaRef,
-        data: Vec<ArrayRef>,
-        op: Operator,
-        expected: PrimitiveArray<T>,
-    ) -> Result<()> {
-        let arithmetic_op = binary_simple(col("a"), op, col("b"));
-        let batch = RecordBatch::try_new(schema, data)?;
-        let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows());
-
-        assert_eq!(result.as_ref(), &expected);
-        Ok(())
-    }
-
-    fn apply_logic_op(
-        schema: SchemaRef,
-        left: BooleanArray,
-        right: BooleanArray,
-        op: Operator,
-        expected: BooleanArray,
-    ) -> Result<()> {
-        let arithmetic_op = binary_simple(col("a"), op, col("b"));
-        let data: Vec<ArrayRef> = vec![Arc::new(left), Arc::new(right)];
-        let batch = RecordBatch::try_new(schema, data)?;
-        let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows());
-
-        assert_eq!(result.as_ref(), &expected);
-        Ok(())
-    }
-
-    #[test]
-    fn and_with_nulls_op() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, true),
-            Field::new("b", DataType::Boolean, true),
-        ]);
-        let a = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            None,
-            None,
-        ]);
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            Some(false),
-            None,
-        ]);
-        apply_logic_op(Arc::new(schema), a, b, Operator::And, expected)?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn or_with_nulls_op() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, true),
-            Field::new("b", DataType::Boolean, true),
-        ]);
-        let a = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            None,
-            None,
-        ]);
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            None,
-            None,
-        ]);
-        apply_logic_op(Arc::new(schema), a, b, Operator::Or, expected)?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_coersion_error() -> Result<()> {
-        let expr =
-            common_binary_type(&DataType::Float32, &Operator::Plus, &DataType::Utf8);
-
-        if let Err(DataFusionError::Plan(e)) = expr {
-            assert_eq!(e, "'Float32 + Utf8' can't be evaluated because there isn't a common type to coerce the types to");
-            Ok(())
-        } else {
-            Err(DataFusionError::Internal(
-                "Coercion should have returned an DataFusionError::Internal".to_string(),
-            ))
-        }
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/case.rs b/rust/datafusion/src/physical_plan/expressions/case.rs
deleted file mode 100644
index e8c500e5ed6..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/case.rs
+++ /dev/null
@@ -1,597 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{any::Any, sync::Arc};
-
-use arrow::array::{self, *};
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-
-/// The CASE expression is similar to a series of nested if/else and there are two forms that
-/// can be used. The first form consists of a series of boolean "when" expressions with
-/// corresponding "then" expressions, and an optional "else" expression.
-///
-/// CASE WHEN condition THEN result
-///      [WHEN ...]
-///      [ELSE result]
-/// END
-///
-/// The second form uses a base expression and then a series of "when" clauses that match on a
-/// literal value.
-///
-/// CASE expression
-///     WHEN value THEN result
-///     [WHEN ...]
-///     [ELSE result]
-/// END
-#[derive(Debug)]
-pub struct CaseExpr {
-    /// Optional base expression that can be compared to literal values in the "when" expressions
-    expr: Option<Arc<dyn PhysicalExpr>>,
-    /// One or more when/then expressions
-    when_then_expr: Vec<(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)>,
-    /// Optional "else" expression
-    else_expr: Option<Arc<dyn PhysicalExpr>>,
-}
-
-impl std::fmt::Display for CaseExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "CASE ")?;
-        if let Some(e) = &self.expr {
-            write!(f, "{} ", e)?;
-        }
-        for (w, t) in &self.when_then_expr {
-            write!(f, "WHEN {} THEN {} ", w, t)?;
-        }
-        if let Some(e) = &self.else_expr {
-            write!(f, "ELSE {} ", e)?;
-        }
-        write!(f, "END")
-    }
-}
-
-impl CaseExpr {
-    /// Create a new CASE WHEN expression
-    pub fn try_new(
-        expr: Option<Arc<dyn PhysicalExpr>>,
-        when_then_expr: &[(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)],
-        else_expr: Option<Arc<dyn PhysicalExpr>>,
-    ) -> Result<Self> {
-        if when_then_expr.is_empty() {
-            Err(DataFusionError::Execution(
-                "There must be at least one WHEN clause".to_string(),
-            ))
-        } else {
-            Ok(Self {
-                expr,
-                when_then_expr: when_then_expr.to_vec(),
-                else_expr,
-            })
-        }
-    }
-
-    /// Optional base expression that can be compared to literal values in the "when" expressions
-    pub fn expr(&self) -> &Option<Arc<dyn PhysicalExpr>> {
-        &self.expr
-    }
-
-    /// One or more when/then expressions
-    pub fn when_then_expr(&self) -> &[(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)] {
-        &self.when_then_expr
-    }
-
-    /// Optional "else" expression
-    pub fn else_expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
-        self.else_expr.as_ref()
-    }
-}
-
-macro_rules! if_then_else {
-    ($BUILDER_TYPE:ty, $ARRAY_TYPE:ty, $BOOLS:expr, $TRUE:expr, $FALSE:expr) => {{
-        let true_values = $TRUE
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$ARRAY_TYPE>()
-            .expect("true_values downcast failed");
-
-        let false_values = $FALSE
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$ARRAY_TYPE>()
-            .expect("false_values downcast failed");
-
-        let mut builder = <$BUILDER_TYPE>::new($BOOLS.len());
-        for i in 0..$BOOLS.len() {
-            if $BOOLS.is_null(i) {
-                if false_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(false_values.value(i))?;
-                }
-            } else if $BOOLS.value(i) {
-                if true_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(true_values.value(i))?;
-                }
-            } else {
-                if false_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(false_values.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn if_then_else(
-    bools: &BooleanArray,
-    true_values: ArrayRef,
-    false_values: ArrayRef,
-    data_type: &DataType,
-) -> Result<ArrayRef> {
-    match data_type {
-        DataType::UInt8 => if_then_else!(
-            array::UInt8Builder,
-            array::UInt8Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt16 => if_then_else!(
-            array::UInt16Builder,
-            array::UInt16Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt32 => if_then_else!(
-            array::UInt32Builder,
-            array::UInt32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt64 => if_then_else!(
-            array::UInt64Builder,
-            array::UInt64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int8 => if_then_else!(
-            array::Int8Builder,
-            array::Int8Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int16 => if_then_else!(
-            array::Int16Builder,
-            array::Int16Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int32 => if_then_else!(
-            array::Int32Builder,
-            array::Int32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int64 => if_then_else!(
-            array::Int64Builder,
-            array::Int64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Float32 => if_then_else!(
-            array::Float32Builder,
-            array::Float32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Float64 => if_then_else!(
-            array::Float64Builder,
-            array::Float64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Utf8 => if_then_else!(
-            array::StringBuilder,
-            array::StringArray,
-            bools,
-            true_values,
-            false_values
-        ),
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
-macro_rules! make_null_array {
-    ($TY:ty, $N:expr) => {{
-        let mut builder = <$TY>::new($N);
-        for _ in 0..$N {
-            builder.append_null()?;
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn build_null_array(data_type: &DataType, num_rows: usize) -> Result<ArrayRef> {
-    match data_type {
-        DataType::UInt8 => make_null_array!(array::UInt8Builder, num_rows),
-        DataType::UInt16 => make_null_array!(array::UInt16Builder, num_rows),
-        DataType::UInt32 => make_null_array!(array::UInt32Builder, num_rows),
-        DataType::UInt64 => make_null_array!(array::UInt64Builder, num_rows),
-        DataType::Int8 => make_null_array!(array::Int8Builder, num_rows),
-        DataType::Int16 => make_null_array!(array::Int16Builder, num_rows),
-        DataType::Int32 => make_null_array!(array::Int32Builder, num_rows),
-        DataType::Int64 => make_null_array!(array::Int64Builder, num_rows),
-        DataType::Float32 => make_null_array!(array::Float32Builder, num_rows),
-        DataType::Float64 => make_null_array!(array::Float64Builder, num_rows),
-        DataType::Utf8 => make_null_array!(array::StringBuilder, num_rows),
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
-macro_rules! array_equals {
-    ($TY:ty, $L:expr, $R:expr) => {{
-        let when_value = $L
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$TY>()
-            .expect("array_equals downcast failed");
-
-        let base_value = $R
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$TY>()
-            .expect("array_equals downcast failed");
-
-        let mut builder = BooleanBuilder::new(when_value.len());
-        for row in 0..when_value.len() {
-            if when_value.is_valid(row) && base_value.is_valid(row) {
-                builder.append_value(when_value.value(row) == base_value.value(row))?;
-            } else {
-                builder.append_null()?;
-            }
-        }
-        Ok(builder.finish())
-    }};
-}
-
-fn array_equals(
-    data_type: &DataType,
-    when_value: ArrayRef,
-    base_value: ArrayRef,
-) -> Result<BooleanArray> {
-    match data_type {
-        DataType::UInt8 => array_equals!(array::UInt8Array, when_value, base_value),
-        DataType::UInt16 => array_equals!(array::UInt16Array, when_value, base_value),
-        DataType::UInt32 => array_equals!(array::UInt32Array, when_value, base_value),
-        DataType::UInt64 => array_equals!(array::UInt64Array, when_value, base_value),
-        DataType::Int8 => array_equals!(array::Int8Array, when_value, base_value),
-        DataType::Int16 => array_equals!(array::Int16Array, when_value, base_value),
-        DataType::Int32 => array_equals!(array::Int32Array, when_value, base_value),
-        DataType::Int64 => array_equals!(array::Int64Array, when_value, base_value),
-        DataType::Float32 => array_equals!(array::Float32Array, when_value, base_value),
-        DataType::Float64 => array_equals!(array::Float64Array, when_value, base_value),
-        DataType::Utf8 => array_equals!(array::StringArray, when_value, base_value),
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
-impl CaseExpr {
-    /// This function evaluates the form of CASE that matches an expression to fixed values.
-    ///
-    /// CASE expression
-    ///     WHEN value THEN result
-    ///     [WHEN ...]
-    ///     [ELSE result]
-    /// END
-    fn case_when_with_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let return_type = self.when_then_expr[0].1.data_type(&batch.schema())?;
-        let expr = self.expr.as_ref().unwrap();
-        let base_value = expr.evaluate(batch)?;
-        let base_type = expr.data_type(&batch.schema())?;
-        let base_value = base_value.into_array(batch.num_rows());
-
-        // start with the else condition, or nulls
-        let mut current_value: Option<ArrayRef> = if let Some(e) = &self.else_expr {
-            Some(e.evaluate(batch)?.into_array(batch.num_rows()))
-        } else {
-            Some(build_null_array(&return_type, batch.num_rows())?)
-        };
-
-        // walk backwards through the when/then expressions
-        for i in (0..self.when_then_expr.len()).rev() {
-            let i = i as usize;
-
-            let when_value = self.when_then_expr[i].0.evaluate(batch)?;
-            let when_value = when_value.into_array(batch.num_rows());
-
-            let then_value = self.when_then_expr[i].1.evaluate(batch)?;
-            let then_value = then_value.into_array(batch.num_rows());
-
-            // build boolean array representing which rows match the "when" value
-            let when_match = array_equals(&base_type, when_value, base_value.clone())?;
-
-            current_value = Some(if_then_else(
-                &when_match,
-                then_value,
-                current_value.unwrap(),
-                &return_type,
-            )?);
-        }
-
-        Ok(ColumnarValue::Array(current_value.unwrap()))
-    }
-
-    /// This function evaluates the form of CASE where each WHEN expression is a boolean
-    /// expression.
-    ///
-    /// CASE WHEN condition THEN result
-    ///      [WHEN ...]
-    ///      [ELSE result]
-    /// END
-    fn case_when_no_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let return_type = self.when_then_expr[0].1.data_type(&batch.schema())?;
-
-        // start with the else condition, or nulls
-        let mut current_value: Option<ArrayRef> = if let Some(e) = &self.else_expr {
-            Some(e.evaluate(batch)?.into_array(batch.num_rows()))
-        } else {
-            Some(build_null_array(&return_type, batch.num_rows())?)
-        };
-
-        // walk backwards through the when/then expressions
-        for i in (0..self.when_then_expr.len()).rev() {
-            let i = i as usize;
-
-            let when_value = self.when_then_expr[i].0.evaluate(batch)?;
-            let when_value = when_value.into_array(batch.num_rows());
-            let when_value = when_value
-                .as_ref()
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .expect("WHEN expression did not return a BooleanArray");
-
-            let then_value = self.when_then_expr[i].1.evaluate(batch)?;
-            let then_value = then_value.into_array(batch.num_rows());
-
-            current_value = Some(if_then_else(
-                &when_value,
-                then_value,
-                current_value.unwrap(),
-                &return_type,
-            )?);
-        }
-
-        Ok(ColumnarValue::Array(current_value.unwrap()))
-    }
-}
-
-impl PhysicalExpr for CaseExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        self.when_then_expr[0].1.data_type(input_schema)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        // this expression is nullable if any of the input expressions are nullable
-        let then_nullable = self
-            .when_then_expr
-            .iter()
-            .map(|(_, t)| t.nullable(input_schema))
-            .collect::<Result<Vec<_>>>()?;
-        if then_nullable.contains(&true) {
-            Ok(true)
-        } else if let Some(e) = &self.else_expr {
-            e.nullable(input_schema)
-        } else {
-            Ok(false)
-        }
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        if self.expr.is_some() {
-            // this use case evaluates "expr" and then compares the values with the "when"
-            // values
-            self.case_when_with_expr(batch)
-        } else {
-            // The "when" conditions all evaluate to boolean in this use case and can be
-            // arbitrary expressions
-            self.case_when_no_expr(batch)
-        }
-    }
-}
-
-/// Create a CASE expression
-pub fn case(
-    expr: Option<Arc<dyn PhysicalExpr>>,
-    when_thens: &[(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)],
-    else_expr: Option<Arc<dyn PhysicalExpr>>,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(CaseExpr::try_new(expr, when_thens, else_expr)?))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{
-        error::Result,
-        logical_plan::Operator,
-        physical_plan::expressions::{binary, col, lit},
-        scalar::ScalarValue,
-    };
-    use arrow::array::StringArray;
-    use arrow::datatypes::*;
-
-    #[test]
-    fn case_with_expr() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE a WHEN 'foo' THEN 123 WHEN 'bar' THEN 456 END
-        let when1 = lit(ScalarValue::Utf8(Some("foo".to_string())));
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = lit(ScalarValue::Utf8(Some("bar".to_string())));
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-
-        let expr = case(Some(col("a")), &[(when1, then1), (when2, then2)], None)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected = &Int32Array::from(vec![Some(123), None, None, Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    #[test]
-    fn case_with_expr_else() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE a WHEN 'foo' THEN 123 WHEN 'bar' THEN 456 ELSE 999 END
-        let when1 = lit(ScalarValue::Utf8(Some("foo".to_string())));
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = lit(ScalarValue::Utf8(Some("bar".to_string())));
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-        let else_value = lit(ScalarValue::Int32(Some(999)));
-
-        let expr = case(
-            Some(col("a")),
-            &[(when1, then1), (when2, then2)],
-            Some(else_value),
-        )?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected =
-            &Int32Array::from(vec![Some(123), Some(999), Some(999), Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    #[test]
-    fn case_without_expr() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE WHEN a = 'foo' THEN 123 WHEN a = 'bar' THEN 456 END
-        let when1 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("foo".to_string()))),
-            &batch.schema(),
-        )?;
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("bar".to_string()))),
-            &batch.schema(),
-        )?;
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-
-        let expr = case(None, &[(when1, then1), (when2, then2)], None)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected = &Int32Array::from(vec![Some(123), None, None, Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    #[test]
-    fn case_without_expr_else() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE WHEN a = 'foo' THEN 123 WHEN a = 'bar' THEN 456 ELSE 999 END
-        let when1 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("foo".to_string()))),
-            &batch.schema(),
-        )?;
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("bar".to_string()))),
-            &batch.schema(),
-        )?;
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-        let else_value = lit(ScalarValue::Int32(Some(999)));
-
-        let expr = case(None, &[(when1, then1), (when2, then2)], Some(else_value))?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected =
-            &Int32Array::from(vec![Some(123), Some(999), Some(999), Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    fn case_test_batch() -> Result<RecordBatch> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-        Ok(batch)
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/cast.rs b/rust/datafusion/src/physical_plan/expressions/cast.rs
deleted file mode 100644
index ba395f54d91..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/cast.rs
+++ /dev/null
@@ -1,301 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::compute::kernels;
-use arrow::compute::CastOptions;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-use compute::can_cast_types;
-
-/// provide Datafusion default cast options
-pub const DEFAULT_DATAFUSION_CAST_OPTIONS: CastOptions = CastOptions { safe: false };
-
-/// CAST expression casts an expression to a specific data type and returns a runtime error on invalid cast
-#[derive(Debug)]
-pub struct CastExpr {
-    /// The expression to cast
-    expr: Arc<dyn PhysicalExpr>,
-    /// The data type to cast to
-    cast_type: DataType,
-    /// Cast options
-    cast_options: CastOptions,
-}
-
-impl CastExpr {
-    /// Create a new CastExpr
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        cast_type: DataType,
-        cast_options: CastOptions,
-    ) -> Self {
-        Self {
-            expr,
-            cast_type,
-            cast_options,
-        }
-    }
-
-    /// The expression to cast
-    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.expr
-    }
-
-    /// The data type to cast to
-    pub fn cast_type(&self) -> &DataType {
-        &self.cast_type
-    }
-}
-
-impl fmt::Display for CastExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "CAST({} AS {:?})", self.expr, self.cast_type)
-    }
-}
-
-impl PhysicalExpr for CastExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.cast_type.clone())
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.expr.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let value = self.expr.evaluate(batch)?;
-        match value {
-            ColumnarValue::Array(array) => {
-                Ok(ColumnarValue::Array(kernels::cast::cast_with_options(
-                    &array,
-                    &self.cast_type,
-                    &self.cast_options,
-                )?))
-            }
-            ColumnarValue::Scalar(scalar) => {
-                let scalar_array = scalar.to_array();
-                let cast_array = kernels::cast::cast_with_options(
-                    &scalar_array,
-                    &self.cast_type,
-                    &self.cast_options,
-                )?;
-                let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?;
-                Ok(ColumnarValue::Scalar(cast_scalar))
-            }
-        }
-    }
-}
-
-/// Return a PhysicalExpression representing `expr` casted to
-/// `cast_type`, if any casting is needed.
-///
-/// Note that such casts may lose type information
-pub fn cast_with_options(
-    expr: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-    cast_type: DataType,
-    cast_options: CastOptions,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let expr_type = expr.data_type(input_schema)?;
-    if expr_type == cast_type {
-        Ok(expr.clone())
-    } else if can_cast_types(&expr_type, &cast_type) {
-        Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options)))
-    } else {
-        Err(DataFusionError::Internal(format!(
-            "Unsupported CAST from {:?} to {:?}",
-            expr_type, cast_type
-        )))
-    }
-}
-
-/// Return a PhysicalExpression representing `expr` casted to
-/// `cast_type`, if any casting is needed.
-///
-/// Note that such casts may lose type information
-pub fn cast(
-    expr: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-    cast_type: DataType,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    cast_with_options(
-        expr,
-        input_schema,
-        cast_type,
-        DEFAULT_DATAFUSION_CAST_OPTIONS,
-    )
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-    use arrow::array::{StringArray, Time64NanosecondArray};
-    use arrow::{
-        array::{Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array},
-        datatypes::*,
-    };
-
-    // runs an end-to-end test of physical type cast
-    // 1. construct a record batch with a column "a" of type A
-    // 2. construct a physical expression of CAST(a AS B)
-    // 3. evaluate the expression
-    // 4. verify that the resulting expression is of type B
-    // 5. verify that the resulting values are downcastable and correct
-    macro_rules! generic_test_cast {
-        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $TYPEARRAY:ident, $TYPE:expr, $VEC:expr, $CAST_OPTIONS:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $A_TYPE, false)]);
-            let a = $A_ARRAY::from($A_VEC);
-            let batch =
-                RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-            // verify that we can construct the expression
-            let expression = cast_with_options(col("a"), &schema, $TYPE, $CAST_OPTIONS)?;
-
-            // verify that its display is correct
-            assert_eq!(format!("CAST(a AS {:?})", $TYPE), format!("{}", expression));
-
-            // verify that the expression's type is correct
-            assert_eq!(expression.data_type(&schema)?, $TYPE);
-
-            // compute
-            let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-
-            // verify that the array's data_type is correct
-            assert_eq!(*result.data_type(), $TYPE);
-
-            // verify that the len is correct
-            assert_eq!(result.len(), $A_VEC.len());
-
-            // verify that the data itself is downcastable
-            let result = result
-                .as_any()
-                .downcast_ref::<$TYPEARRAY>()
-                .expect("failed to downcast");
-
-            // verify that the result itself is correct
-            for (i, x) in $VEC.iter().enumerate() {
-                match x {
-                    Some(x) => assert_eq!(result.value(i), *x),
-                    None => assert!(!result.is_valid(i)),
-                }
-            }
-        }};
-    }
-
-    #[test]
-    fn test_cast_i32_u32() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            UInt32Array,
-            DataType::UInt32,
-            vec![
-                Some(1_u32),
-                Some(2_u32),
-                Some(3_u32),
-                Some(4_u32),
-                Some(5_u32)
-            ],
-            DEFAULT_DATAFUSION_CAST_OPTIONS
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_cast_i32_utf8() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            StringArray,
-            DataType::Utf8,
-            vec![Some("1"), Some("2"), Some("3"), Some("4"), Some("5")],
-            DEFAULT_DATAFUSION_CAST_OPTIONS
-        );
-        Ok(())
-    }
-
-    #[allow(clippy::redundant_clone)]
-    #[test]
-    fn test_cast_i64_t64() -> Result<()> {
-        let original = vec![1, 2, 3, 4, 5];
-        let expected: Vec<Option<i64>> = original
-            .iter()
-            .map(|i| Some(Time64NanosecondArray::from(vec![*i]).value(0)))
-            .collect();
-        generic_test_cast!(
-            Int64Array,
-            DataType::Int64,
-            original.clone(),
-            TimestampNanosecondArray,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            expected,
-            DEFAULT_DATAFUSION_CAST_OPTIONS
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn invalid_cast() {
-        // Ensure a useful error happens at plan time if invalid casts are used
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let result = cast(col("a"), &schema, DataType::LargeBinary);
-        result.expect_err("expected Invalid CAST");
-    }
-
-    #[test]
-    fn invalid_cast_with_options_error() -> Result<()> {
-        // Ensure a useful error happens at plan time if invalid casts are used
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
-        let a = StringArray::from(vec!["9.1"]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-        let expression = cast_with_options(
-            col("a"),
-            &schema,
-            DataType::Int32,
-            DEFAULT_DATAFUSION_CAST_OPTIONS,
-        )?;
-        let result = expression.evaluate(&batch);
-
-        match result {
-            Ok(_) => panic!("expected error"),
-            Err(e) => {
-                assert!(e.to_string().contains(
-                    "Cast error: Cannot cast string '9.1' to value of arrow::datatypes::types::Int32Type type"
-                ))
-            }
-        }
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/coercion.rs b/rust/datafusion/src/physical_plan/expressions/coercion.rs
deleted file mode 100644
index e9949f5199e..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/coercion.rs
+++ /dev/null
@@ -1,208 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Coercion rules used to coerce types to match existing expressions' implementations
-
-use arrow::datatypes::DataType;
-
-/// Determine if a DataType is signed numeric or not
-pub fn is_signed_numeric(dt: &DataType) -> bool {
-    matches!(
-        dt,
-        DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-    )
-}
-
-/// Determine if a DataType is numeric or not
-pub fn is_numeric(dt: &DataType) -> bool {
-    is_signed_numeric(dt)
-        || match dt {
-            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => {
-                true
-            }
-            _ => false,
-        }
-}
-
-/// Coercion rules for dictionary values (aka the type of the  dictionary itself)
-fn dictionary_value_coercion(
-    lhs_type: &DataType,
-    rhs_type: &DataType,
-) -> Option<DataType> {
-    numerical_coercion(lhs_type, rhs_type).or_else(|| string_coercion(lhs_type, rhs_type))
-}
-
-/// Coercion rules for Dictionaries: the type that both lhs and rhs
-/// can be casted to for the purpose of a computation.
-///
-/// It would likely be preferable to cast primitive values to
-/// dictionaries, and thus avoid unpacking dictionary as well as doing
-/// faster comparisons. However, the arrow compute kernels (e.g. eq)
-/// don't have DictionaryArray support yet, so fall back to unpacking
-/// the dictionaries
-pub fn dictionary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    match (lhs_type, rhs_type) {
-        (
-            DataType::Dictionary(_lhs_index_type, lhs_value_type),
-            DataType::Dictionary(_rhs_index_type, rhs_value_type),
-        ) => dictionary_value_coercion(lhs_value_type, rhs_value_type),
-        (DataType::Dictionary(_index_type, value_type), _) => {
-            dictionary_value_coercion(value_type, rhs_type)
-        }
-        (_, DataType::Dictionary(_index_type, value_type)) => {
-            dictionary_value_coercion(lhs_type, value_type)
-        }
-        _ => None,
-    }
-}
-
-/// Coercion rules for Strings: the type that both lhs and rhs can be
-/// casted to for the purpose of a string computation
-pub fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
-    match (lhs_type, rhs_type) {
-        (Utf8, Utf8) => Some(Utf8),
-        (LargeUtf8, Utf8) => Some(LargeUtf8),
-        (Utf8, LargeUtf8) => Some(LargeUtf8),
-        (LargeUtf8, LargeUtf8) => Some(LargeUtf8),
-        _ => None,
-    }
-}
-
-/// Coercion rules for Temporal columns: the type that both lhs and rhs can be
-/// casted to for the purpose of a date computation
-pub fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
-    match (lhs_type, rhs_type) {
-        (Utf8, Date32) => Some(Date32),
-        (Date32, Utf8) => Some(Date32),
-        (Utf8, Date64) => Some(Date64),
-        (Date64, Utf8) => Some(Date64),
-        _ => None,
-    }
-}
-
-/// Coercion rule for numerical types: The type that both lhs and rhs
-/// can be casted to for numerical calculation, while maintaining
-/// maximum precision
-pub fn numerical_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
-
-    // error on any non-numeric type
-    if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
-        return None;
-    };
-
-    // same type => all good
-    if lhs_type == rhs_type {
-        return Some(lhs_type.clone());
-    }
-
-    // these are ordered from most informative to least informative so
-    // that the coercion removes the least amount of information
-    match (lhs_type, rhs_type) {
-        (Float64, _) => Some(Float64),
-        (_, Float64) => Some(Float64),
-
-        (_, Float32) => Some(Float32),
-        (Float32, _) => Some(Float32),
-
-        (Int64, _) => Some(Int64),
-        (_, Int64) => Some(Int64),
-
-        (Int32, _) => Some(Int32),
-        (_, Int32) => Some(Int32),
-
-        (Int16, _) => Some(Int16),
-        (_, Int16) => Some(Int16),
-
-        (Int8, _) => Some(Int8),
-        (_, Int8) => Some(Int8),
-
-        (UInt64, _) => Some(UInt64),
-        (_, UInt64) => Some(UInt64),
-
-        (UInt32, _) => Some(UInt32),
-        (_, UInt32) => Some(UInt32),
-
-        (UInt16, _) => Some(UInt16),
-        (_, UInt16) => Some(UInt16),
-
-        (UInt8, _) => Some(UInt8),
-        (_, UInt8) => Some(UInt8),
-
-        _ => None,
-    }
-}
-
-// coercion rules for equality operations. This is a superset of all numerical coercion rules.
-pub fn eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    if lhs_type == rhs_type {
-        // same type => equality is possible
-        return Some(lhs_type.clone());
-    }
-    numerical_coercion(lhs_type, rhs_type)
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type))
-        .or_else(|| temporal_coercion(lhs_type, rhs_type))
-}
-
-// coercion rules that assume an ordered set, such as "less than".
-// These are the union of all numerical coercion rules and all string coercion rules
-pub fn order_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    if lhs_type == rhs_type {
-        // same type => all good
-        return Some(lhs_type.clone());
-    }
-
-    numerical_coercion(lhs_type, rhs_type)
-        .or_else(|| string_coercion(lhs_type, rhs_type))
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type))
-        .or_else(|| temporal_coercion(lhs_type, rhs_type))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_dictionary_type_coersion() {
-        use DataType::*;
-
-        // TODO: In the future, this would ideally return Dictionary types and avoid unpacking
-        let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32));
-        let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), Some(Int32));
-
-        let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), None);
-
-        let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let rhs_type = Utf8;
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), Some(Utf8));
-
-        let lhs_type = Utf8;
-        let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), Some(Utf8));
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/column.rs b/rust/datafusion/src/physical_plan/expressions/column.rs
deleted file mode 100644
index 7e0304e51fe..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/column.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Column expression
-
-use std::sync::Arc;
-
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::error::Result;
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-
-/// Represents the column at a given index in a RecordBatch
-#[derive(Debug)]
-pub struct Column {
-    name: String,
-}
-
-impl Column {
-    /// Create a new column expression
-    pub fn new(name: &str) -> Self {
-        Self {
-            name: name.to_owned(),
-        }
-    }
-
-    /// Get the column name
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-}
-
-impl std::fmt::Display for Column {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.name)
-    }
-}
-
-impl PhysicalExpr for Column {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    /// Get the data type of this expression, given the schema of the input
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        Ok(input_schema
-            .field_with_name(&self.name)?
-            .data_type()
-            .clone())
-    }
-
-    /// Decide whehter this expression is nullable, given the schema of the input
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        Ok(input_schema.field_with_name(&self.name)?.is_nullable())
-    }
-
-    /// Evaluate the expression
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        Ok(ColumnarValue::Array(
-            batch.column(batch.schema().index_of(&self.name)?).clone(),
-        ))
-    }
-}
-
-/// Create a column expression
-pub fn col(name: &str) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Column::new(name))
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/count.rs b/rust/datafusion/src/physical_plan/expressions/count.rs
deleted file mode 100644
index 22459813b7e..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/count.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::Result;
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{ArrayRef, UInt64Array},
-    datatypes::Field,
-};
-
-use super::format_state_name;
-
-/// COUNT aggregate expression
-/// Returns the amount of non-null values of the given expression.
-#[derive(Debug)]
-pub struct Count {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl Count {
-    /// Create a new COUNT aggregate function.
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Count {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "count"),
-            self.data_type.clone(),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(CountAccumulator::new()))
-    }
-}
-
-#[derive(Debug)]
-struct CountAccumulator {
-    count: u64,
-}
-
-impl CountAccumulator {
-    /// new count accumulator
-    pub fn new() -> Self {
-        Self { count: 0 }
-    }
-}
-
-impl Accumulator for CountAccumulator {
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let array = &values[0];
-        self.count += (array.len() - array.data().null_count()) as u64;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let value = &values[0];
-        if !value.is_null() {
-            self.count += 1;
-        }
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        let count = &states[0];
-        if let ScalarValue::UInt64(Some(delta)) = count {
-            self.count += *delta;
-        } else {
-            unreachable!()
-        }
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        let counts = states[0].as_any().downcast_ref::<UInt64Array>().unwrap();
-        let delta = &compute::sum(counts);
-        if let Some(d) = delta {
-            self.count += *d;
-        }
-        Ok(())
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![ScalarValue::UInt64(Some(self.count))])
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(ScalarValue::UInt64(Some(self.count)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::expressions::tests::aggregate;
-    use crate::{error::Result, generic_test_op};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, datatypes::*};
-
-    #[test]
-    fn count_elements() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Count,
-            ScalarValue::from(5u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            None,
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Count,
-            ScalarValue::from(3u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(BooleanArray::from(vec![
-            None, None, None, None, None, None, None, None,
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Boolean,
-            Count,
-            ScalarValue::from(0u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_empty() -> Result<()> {
-        let a: Vec<bool> = vec![];
-        let a: ArrayRef = Arc::new(BooleanArray::from(a));
-        generic_test_op!(
-            a,
-            DataType::Boolean,
-            Count,
-            ScalarValue::from(0u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_utf8() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(StringArray::from(vec!["a", "bb", "ccc", "dddd", "ad"]));
-        generic_test_op!(
-            a,
-            DataType::Utf8,
-            Count,
-            ScalarValue::from(5u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_large_utf8() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(LargeStringArray::from(vec!["a", "bb", "ccc", "dddd", "ad"]));
-        generic_test_op!(
-            a,
-            DataType::LargeUtf8,
-            Count,
-            ScalarValue::from(5u64),
-            DataType::UInt64
-        )
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/in_list.rs b/rust/datafusion/src/physical_plan/expressions/in_list.rs
deleted file mode 100644
index 41f111006ea..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/in_list.rs
+++ /dev/null
@@ -1,458 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! InList expression
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::GenericStringArray;
-use arrow::array::{
-    ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
-    Int64Array, Int8Array, StringOffsetSizeTrait, UInt16Array, UInt32Array, UInt64Array,
-    UInt8Array,
-};
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::error::Result;
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::scalar::ScalarValue;
-
-/// InList
-#[derive(Debug)]
-pub struct InListExpr {
-    expr: Arc<dyn PhysicalExpr>,
-    list: Vec<Arc<dyn PhysicalExpr>>,
-    negated: bool,
-}
-
-macro_rules! make_contains {
-    ($ARRAY:expr, $LIST_VALUES:expr, $NEGATED:expr, $SCALAR_VALUE:ident, $ARRAY_TYPE:ident) => {{
-        let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
-
-        let mut contains_null = false;
-        let values = $LIST_VALUES
-            .iter()
-            .flat_map(|expr| match expr {
-                ColumnarValue::Scalar(s) => match s {
-                    ScalarValue::$SCALAR_VALUE(Some(v)) => Some(*v),
-                    ScalarValue::$SCALAR_VALUE(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    ScalarValue::Utf8(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    datatype => unimplemented!("Unexpected type {} for InList", datatype),
-                },
-                ColumnarValue::Array(_) => {
-                    unimplemented!("InList does not yet support nested columns.")
-                }
-            })
-            .collect::<Vec<_>>();
-
-        Ok(ColumnarValue::Array(Arc::new(
-            array
-                .iter()
-                .map(|x| {
-                    let contains = x.map(|x| values.contains(&x));
-                    match contains {
-                        Some(true) => {
-                            if $NEGATED {
-                                Some(false)
-                            } else {
-                                Some(true)
-                            }
-                        }
-                        Some(false) => {
-                            if contains_null {
-                                None
-                            } else if $NEGATED {
-                                Some(true)
-                            } else {
-                                Some(false)
-                            }
-                        }
-                        None => None,
-                    }
-                })
-                .collect::<BooleanArray>(),
-        )))
-    }};
-}
-
-impl InListExpr {
-    /// Create a new InList expression
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        list: Vec<Arc<dyn PhysicalExpr>>,
-        negated: bool,
-    ) -> Self {
-        Self {
-            expr,
-            list,
-            negated,
-        }
-    }
-
-    /// Input expression
-    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.expr
-    }
-
-    /// List to search in
-    pub fn list(&self) -> &[Arc<dyn PhysicalExpr>] {
-        &self.list
-    }
-
-    /// Is this negated e.g. NOT IN LIST
-    pub fn negated(&self) -> bool {
-        self.negated
-    }
-
-    /// Compare for specific utf8 types
-    #[allow(clippy::unnecessary_wraps)]
-    fn compare_utf8<T: StringOffsetSizeTrait>(
-        &self,
-        array: ArrayRef,
-        list_values: Vec<ColumnarValue>,
-        negated: bool,
-    ) -> Result<ColumnarValue> {
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .unwrap();
-
-        let mut contains_null = false;
-        let values = list_values
-            .iter()
-            .flat_map(|expr| match expr {
-                ColumnarValue::Scalar(s) => match s {
-                    ScalarValue::Utf8(Some(v)) => Some(v.as_str()),
-                    ScalarValue::Utf8(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    ScalarValue::LargeUtf8(Some(v)) => Some(v.as_str()),
-                    ScalarValue::LargeUtf8(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    datatype => unimplemented!("Unexpected type {} for InList", datatype),
-                },
-                ColumnarValue::Array(_) => {
-                    unimplemented!("InList does not yet support nested columns.")
-                }
-            })
-            .collect::<Vec<&str>>();
-
-        Ok(ColumnarValue::Array(Arc::new(
-            array
-                .iter()
-                .map(|x| {
-                    let contains = x.map(|x| values.contains(&x));
-                    match contains {
-                        Some(true) => {
-                            if negated {
-                                Some(false)
-                            } else {
-                                Some(true)
-                            }
-                        }
-                        Some(false) => {
-                            if contains_null {
-                                None
-                            } else if negated {
-                                Some(true)
-                            } else {
-                                Some(false)
-                            }
-                        }
-                        None => None,
-                    }
-                })
-                .collect::<BooleanArray>(),
-        )))
-    }
-}
-
-impl std::fmt::Display for InListExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if self.negated {
-            write!(f, "{} NOT IN ({:?})", self.expr, self.list)
-        } else {
-            write!(f, "{} IN ({:?})", self.expr, self.list)
-        }
-    }
-}
-
-impl PhysicalExpr for InListExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.expr.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let value = self.expr.evaluate(batch)?;
-        let value_data_type = value.data_type();
-        let list_values = self
-            .list
-            .iter()
-            .map(|expr| expr.evaluate(batch))
-            .collect::<Result<Vec<_>>>()?;
-
-        let array = match value {
-            ColumnarValue::Array(array) => array,
-            ColumnarValue::Scalar(scalar) => scalar.to_array(),
-        };
-
-        match value_data_type {
-            DataType::Float32 => {
-                make_contains!(array, list_values, self.negated, Float32, Float32Array)
-            }
-            DataType::Float64 => {
-                make_contains!(array, list_values, self.negated, Float64, Float64Array)
-            }
-            DataType::Int16 => {
-                make_contains!(array, list_values, self.negated, Int16, Int16Array)
-            }
-            DataType::Int32 => {
-                make_contains!(array, list_values, self.negated, Int32, Int32Array)
-            }
-            DataType::Int64 => {
-                make_contains!(array, list_values, self.negated, Int64, Int64Array)
-            }
-            DataType::Int8 => {
-                make_contains!(array, list_values, self.negated, Int8, Int8Array)
-            }
-            DataType::UInt16 => {
-                make_contains!(array, list_values, self.negated, UInt16, UInt16Array)
-            }
-            DataType::UInt32 => {
-                make_contains!(array, list_values, self.negated, UInt32, UInt32Array)
-            }
-            DataType::UInt64 => {
-                make_contains!(array, list_values, self.negated, UInt64, UInt64Array)
-            }
-            DataType::UInt8 => {
-                make_contains!(array, list_values, self.negated, UInt8, UInt8Array)
-            }
-            DataType::Boolean => {
-                make_contains!(array, list_values, self.negated, Boolean, BooleanArray)
-            }
-            DataType::Utf8 => self.compare_utf8::<i32>(array, list_values, self.negated),
-            DataType::LargeUtf8 => {
-                self.compare_utf8::<i64>(array, list_values, self.negated)
-            }
-            datatype => {
-                unimplemented!("InList does not support datatype {:?}.", datatype)
-            }
-        }
-    }
-}
-
-/// Creates a unary expression InList
-pub fn in_list(
-    expr: Arc<dyn PhysicalExpr>,
-    list: Vec<Arc<dyn PhysicalExpr>>,
-    negated: &bool,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(InListExpr::new(expr, list, *negated)))
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::{array::StringArray, datatypes::Field};
-
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::{col, lit};
-
-    // applies the in_list expr to an input batch and list
-    macro_rules! in_list {
-        ($BATCH:expr, $LIST:expr, $NEGATED:expr, $EXPECTED:expr) => {{
-            let expr = in_list(col("a"), $LIST, $NEGATED).unwrap();
-            let result = expr.evaluate(&$BATCH)?.into_array($BATCH.num_rows());
-            let result = result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .expect("failed to downcast to BooleanArray");
-            let expected = &BooleanArray::from($EXPECTED);
-            assert_eq!(expected, result);
-        }};
-    }
-
-    #[test]
-    fn in_list_utf8() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("a"), Some("d"), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), Some(false), None]);
-
-        // expression: "a not in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), Some(true), None]);
-
-        // expression: "a not in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None, None]);
-
-        // expression: "a not in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None, None]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_int64() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
-        let a = Int64Array::from(vec![Some(0), Some(2), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), Some(false), None]);
-
-        // expression: "a not in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), Some(true), None]);
-
-        // expression: "a in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None, None]);
-
-        // expression: "a not in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None, None]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_float64() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Float64, true)]);
-        let a = Float64Array::from(vec![Some(0.0), Some(0.2), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in (0.0, 0.2)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), Some(false), None]);
-
-        // expression: "a not in (0.0, 0.2)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), Some(true), None]);
-
-        // expression: "a in (0.0, 0.2, NULL)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None, None]);
-
-        // expression: "a not in (0.0, 0.2, NULL)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None, None]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_bool() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
-        let a = BooleanArray::from(vec![Some(true), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in (true)"
-        let list = vec![lit(ScalarValue::Boolean(Some(true)))];
-        in_list!(batch, list, &false, vec![Some(true), None]);
-
-        // expression: "a not in (true)"
-        let list = vec![lit(ScalarValue::Boolean(Some(true)))];
-        in_list!(batch, list, &true, vec![Some(false), None]);
-
-        // expression: "a in (true, NULL)"
-        let list = vec![
-            lit(ScalarValue::Boolean(Some(true))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None]);
-
-        // expression: "a not in (true, NULL)"
-        let list = vec![
-            lit(ScalarValue::Boolean(Some(true))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None]);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/is_not_null.rs b/rust/datafusion/src/physical_plan/expressions/is_not_null.rs
deleted file mode 100644
index 7ac2110b502..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/is_not_null.rs
+++ /dev/null
@@ -1,119 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! IS NOT NULL expression
-
-use std::{any::Any, sync::Arc};
-
-use arrow::compute;
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::{error::Result, scalar::ScalarValue};
-
-/// IS NOT NULL expression
-#[derive(Debug)]
-pub struct IsNotNullExpr {
-    /// The input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl IsNotNullExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl std::fmt::Display for IsNotNullExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} IS NOT NULL", self.arg)
-    }
-}
-
-impl PhysicalExpr for IsNotNullExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(false)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
-                compute::is_not_null(array.as_ref())?,
-            ))),
-            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
-                ScalarValue::Boolean(Some(!scalar.is_null())),
-            )),
-        }
-    }
-}
-
-/// Create an IS NOT NULL expression
-pub fn is_not_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(IsNotNullExpr::new(arg)))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use arrow::{
-        array::{BooleanArray, StringArray},
-        datatypes::*,
-        record_batch::RecordBatch,
-    };
-    use std::sync::Arc;
-
-    #[test]
-    fn is_not_null_op() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("foo"), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a is not null"
-        let expr = is_not_null(col("a")).unwrap();
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-
-        let expected = &BooleanArray::from(vec![true, false]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/is_null.rs b/rust/datafusion/src/physical_plan/expressions/is_null.rs
deleted file mode 100644
index dfa53f3f7d2..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/is_null.rs
+++ /dev/null
@@ -1,119 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! IS NULL expression
-
-use std::{any::Any, sync::Arc};
-
-use arrow::compute;
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::{error::Result, scalar::ScalarValue};
-
-/// IS NULL expression
-#[derive(Debug)]
-pub struct IsNullExpr {
-    /// Input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl IsNullExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl std::fmt::Display for IsNullExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} IS NULL", self.arg)
-    }
-}
-
-impl PhysicalExpr for IsNullExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(false)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
-                compute::is_null(array.as_ref())?,
-            ))),
-            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
-                ScalarValue::Boolean(Some(scalar.is_null())),
-            )),
-        }
-    }
-}
-
-/// Create an IS NULL expression
-pub fn is_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(IsNullExpr::new(arg)))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use arrow::{
-        array::{BooleanArray, StringArray},
-        datatypes::*,
-        record_batch::RecordBatch,
-    };
-    use std::sync::Arc;
-
-    #[test]
-    fn is_null_op() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("foo"), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a is null"
-        let expr = is_null(col("a")).unwrap();
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-
-        let expected = &BooleanArray::from(vec![false, true]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/literal.rs b/rust/datafusion/src/physical_plan/expressions/literal.rs
deleted file mode 100644
index 3110d39c87e..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/literal.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Literal expression
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::{error::Result, scalar::ScalarValue};
-
-/// Represents a literal value
-#[derive(Debug)]
-pub struct Literal {
-    value: ScalarValue,
-}
-
-impl Literal {
-    /// Create a literal value expression
-    pub fn new(value: ScalarValue) -> Self {
-        Self { value }
-    }
-
-    /// Get the scalar value
-    pub fn value(&self) -> &ScalarValue {
-        &self.value
-    }
-}
-
-impl std::fmt::Display for Literal {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.value)
-    }
-}
-
-impl PhysicalExpr for Literal {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.value.get_datatype())
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(self.value.is_null())
-    }
-
-    fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
-        Ok(ColumnarValue::Scalar(self.value.clone()))
-    }
-}
-
-/// Create a literal expression
-pub fn lit(value: ScalarValue) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Literal::new(value))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use arrow::array::Int32Array;
-    use arrow::datatypes::*;
-
-    #[test]
-    fn literal_i32() -> Result<()> {
-        // create an arbitrary record bacth
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let a = Int32Array::from(vec![Some(1), None, Some(3), Some(4), Some(5)]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // create and evaluate a literal expression
-        let literal_expr = lit(ScalarValue::from(42i32));
-        assert_eq!("42", format!("{}", literal_expr));
-
-        let literal_array = literal_expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let literal_array = literal_array.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        // note that the contents of the literal array are unrelated to the batch contents except for the length of the array
-        assert_eq!(literal_array.len(), 5); // 5 rows in the batch
-        for i in 0..literal_array.len() {
-            assert_eq!(literal_array.value(i), 42);
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/min_max.rs b/rust/datafusion/src/physical_plan/expressions/min_max.rs
deleted file mode 100644
index 2fd84a6cc70..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/min_max.rs
+++ /dev/null
@@ -1,612 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, LargeStringArray, StringArray, UInt16Array, UInt32Array, UInt64Array,
-        UInt8Array,
-    },
-    datatypes::Field,
-};
-
-use super::format_state_name;
-
-/// MAX aggregate expression
-#[derive(Debug)]
-pub struct Max {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl Max {
-    /// Create a new MAX aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Max {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "max"),
-            self.data_type.clone(),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MaxAccumulator::try_new(&self.data_type)?))
-    }
-}
-
-// Statically-typed version of min/max(array) -> ScalarValue for string types.
-macro_rules! typed_min_max_batch_string {
-    ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
-        let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let value = compute::$OP(array);
-        let value = value.and_then(|e| Some(e.to_string()));
-        ScalarValue::$SCALAR(value)
-    }};
-}
-
-// Statically-typed version of min/max(array) -> ScalarValue for non-string types.
-macro_rules! typed_min_max_batch {
-    ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
-        let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let value = compute::$OP(array);
-        ScalarValue::$SCALAR(value)
-    }};
-}
-
-// Statically-typed version of min/max(array) -> ScalarValue  for non-string types.
-// this is a macro to support both operations (min and max).
-macro_rules! min_max_batch {
-    ($VALUES:expr, $OP:ident) => {{
-        match $VALUES.data_type() {
-            // all types that have a natural order
-            DataType::Float64 => {
-                typed_min_max_batch!($VALUES, Float64Array, Float64, $OP)
-            }
-            DataType::Float32 => {
-                typed_min_max_batch!($VALUES, Float32Array, Float32, $OP)
-            }
-            DataType::Int64 => typed_min_max_batch!($VALUES, Int64Array, Int64, $OP),
-            DataType::Int32 => typed_min_max_batch!($VALUES, Int32Array, Int32, $OP),
-            DataType::Int16 => typed_min_max_batch!($VALUES, Int16Array, Int16, $OP),
-            DataType::Int8 => typed_min_max_batch!($VALUES, Int8Array, Int8, $OP),
-            DataType::UInt64 => typed_min_max_batch!($VALUES, UInt64Array, UInt64, $OP),
-            DataType::UInt32 => typed_min_max_batch!($VALUES, UInt32Array, UInt32, $OP),
-            DataType::UInt16 => typed_min_max_batch!($VALUES, UInt16Array, UInt16, $OP),
-            DataType::UInt8 => typed_min_max_batch!($VALUES, UInt8Array, UInt8, $OP),
-            other => {
-                // This should have been handled before
-                return Err(DataFusionError::Internal(format!(
-                    "Min/Max accumulator not implemented for type {:?}",
-                    other
-                )));
-            }
-        }
-    }};
-}
-
-/// dynamically-typed min(array) -> ScalarValue
-fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
-    Ok(match values.data_type() {
-        DataType::Utf8 => {
-            typed_min_max_batch_string!(values, StringArray, Utf8, min_string)
-        }
-        DataType::LargeUtf8 => {
-            typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, min_string)
-        }
-        _ => min_max_batch!(values, min),
-    })
-}
-
-/// dynamically-typed max(array) -> ScalarValue
-fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
-    Ok(match values.data_type() {
-        DataType::Utf8 => {
-            typed_min_max_batch_string!(values, StringArray, Utf8, max_string)
-        }
-        DataType::LargeUtf8 => {
-            typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, max_string)
-        }
-        _ => min_max_batch!(values, max),
-    })
-}
-
-// min/max of two non-string scalar values.
-macro_rules! typed_min_max {
-    ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident) => {{
-        ScalarValue::$SCALAR(match ($VALUE, $DELTA) {
-            (None, None) => None,
-            (Some(a), None) => Some(a.clone()),
-            (None, Some(b)) => Some(b.clone()),
-            (Some(a), Some(b)) => Some((*a).$OP(*b)),
-        })
-    }};
-}
-
-// min/max of two scalar string values.
-macro_rules! typed_min_max_string {
-    ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident) => {{
-        ScalarValue::$SCALAR(match ($VALUE, $DELTA) {
-            (None, None) => None,
-            (Some(a), None) => Some(a.clone()),
-            (None, Some(b)) => Some(b.clone()),
-            (Some(a), Some(b)) => Some((a).$OP(b).clone()),
-        })
-    }};
-}
-
-// min/max of two scalar values of the same type
-macro_rules! min_max {
-    ($VALUE:expr, $DELTA:expr, $OP:ident) => {{
-        Ok(match ($VALUE, $DELTA) {
-            (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
-                typed_min_max!(lhs, rhs, Float64, $OP)
-            }
-            (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
-                typed_min_max!(lhs, rhs, Float32, $OP)
-            }
-            (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt64, $OP)
-            }
-            (ScalarValue::UInt32(lhs), ScalarValue::UInt32(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt32, $OP)
-            }
-            (ScalarValue::UInt16(lhs), ScalarValue::UInt16(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt16, $OP)
-            }
-            (ScalarValue::UInt8(lhs), ScalarValue::UInt8(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt8, $OP)
-            }
-            (ScalarValue::Int64(lhs), ScalarValue::Int64(rhs)) => {
-                typed_min_max!(lhs, rhs, Int64, $OP)
-            }
-            (ScalarValue::Int32(lhs), ScalarValue::Int32(rhs)) => {
-                typed_min_max!(lhs, rhs, Int32, $OP)
-            }
-            (ScalarValue::Int16(lhs), ScalarValue::Int16(rhs)) => {
-                typed_min_max!(lhs, rhs, Int16, $OP)
-            }
-            (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => {
-                typed_min_max!(lhs, rhs, Int8, $OP)
-            }
-            (ScalarValue::Utf8(lhs), ScalarValue::Utf8(rhs)) => {
-                typed_min_max_string!(lhs, rhs, Utf8, $OP)
-            }
-            (ScalarValue::LargeUtf8(lhs), ScalarValue::LargeUtf8(rhs)) => {
-                typed_min_max_string!(lhs, rhs, LargeUtf8, $OP)
-            }
-            e => {
-                return Err(DataFusionError::Internal(format!(
-                    "MIN/MAX is not expected to receive a scalar {:?}",
-                    e
-                )))
-            }
-        })
-    }};
-}
-
-/// the minimum of two scalar values
-fn min(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    min_max!(lhs, rhs, min)
-}
-
-/// the maximum of two scalar values
-fn max(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    min_max!(lhs, rhs, max)
-}
-
-#[derive(Debug)]
-struct MaxAccumulator {
-    max: ScalarValue,
-}
-
-impl MaxAccumulator {
-    /// new max accumulator
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            max: ScalarValue::try_from(datatype)?,
-        })
-    }
-}
-
-impl Accumulator for MaxAccumulator {
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-        let delta = &max_batch(values)?;
-        self.max = max(&self.max, delta)?;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let value = &values[0];
-        self.max = max(&self.max, value)?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        self.update(states)
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        self.update_batch(states)
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.max.clone()])
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.max.clone())
-    }
-}
-
-/// MIN aggregate expression
-#[derive(Debug)]
-pub struct Min {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl Min {
-    /// Create a new MIN aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Min {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "min"),
-            self.data_type.clone(),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MinAccumulator::try_new(&self.data_type)?))
-    }
-}
-
-#[derive(Debug)]
-struct MinAccumulator {
-    min: ScalarValue,
-}
-
-impl MinAccumulator {
-    /// new min accumulator
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            min: ScalarValue::try_from(datatype)?,
-        })
-    }
-}
-
-impl Accumulator for MinAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.min.clone()])
-    }
-
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-        let delta = &min_batch(values)?;
-        self.min = min(&self.min, delta)?;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let value = &values[0];
-        self.min = min(&self.min, value)?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        self.update(states)
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        self.update_batch(states)
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.min.clone())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::expressions::tests::aggregate;
-    use crate::{error::Result, generic_test_op};
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
-
-    #[test]
-    fn max_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Max,
-            ScalarValue::from(5i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn min_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Min,
-            ScalarValue::from(1i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn max_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::Utf8,
-            Max,
-            ScalarValue::Utf8(Some("d".to_string())),
-            DataType::Utf8
-        )
-    }
-
-    #[test]
-    fn max_large_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(LargeStringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::LargeUtf8,
-            Max,
-            ScalarValue::LargeUtf8(Some("d".to_string())),
-            DataType::LargeUtf8
-        )
-    }
-
-    #[test]
-    fn min_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::Utf8,
-            Min,
-            ScalarValue::Utf8(Some("a".to_string())),
-            DataType::Utf8
-        )
-    }
-
-    #[test]
-    fn min_large_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(LargeStringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::LargeUtf8,
-            Min,
-            ScalarValue::LargeUtf8(Some("a".to_string())),
-            DataType::LargeUtf8
-        )
-    }
-
-    #[test]
-    fn max_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Max,
-            ScalarValue::from(5i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn min_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Min,
-            ScalarValue::from(1i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn max_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Max,
-            ScalarValue::Int32(None),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn min_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Min,
-            ScalarValue::Int32(None),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn max_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Max,
-            ScalarValue::from(5_u32),
-            DataType::UInt32
-        )
-    }
-
-    #[test]
-    fn min_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Min,
-            ScalarValue::from(1u32),
-            DataType::UInt32
-        )
-    }
-
-    #[test]
-    fn max_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Max,
-            ScalarValue::from(5_f32),
-            DataType::Float32
-        )
-    }
-
-    #[test]
-    fn min_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Min,
-            ScalarValue::from(1_f32),
-            DataType::Float32
-        )
-    }
-
-    #[test]
-    fn max_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Max,
-            ScalarValue::from(5_f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn min_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Min,
-            ScalarValue::from(1_f64),
-            DataType::Float64
-        )
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/mod.rs b/rust/datafusion/src/physical_plan/expressions/mod.rs
deleted file mode 100644
index 6e252205955..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/mod.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use arrow::compute::kernels::sort::{SortColumn, SortOptions};
-use arrow::record_batch::RecordBatch;
-
-mod average;
-#[macro_use]
-mod binary;
-mod case;
-mod cast;
-mod coercion;
-mod column;
-mod count;
-mod in_list;
-mod is_not_null;
-mod is_null;
-mod literal;
-mod min_max;
-mod negative;
-mod not;
-mod nullif;
-mod sum;
-mod try_cast;
-
-pub use average::{avg_return_type, Avg, AvgAccumulator};
-pub use binary::{binary, binary_operator_data_type, BinaryExpr};
-pub use case::{case, CaseExpr};
-pub use cast::{cast, cast_with_options, CastExpr};
-pub use column::{col, Column};
-pub use count::Count;
-pub use in_list::{in_list, InListExpr};
-pub use is_not_null::{is_not_null, IsNotNullExpr};
-pub use is_null::{is_null, IsNullExpr};
-pub use literal::{lit, Literal};
-pub use min_max::{Max, Min};
-pub use negative::{negative, NegativeExpr};
-pub use not::{not, NotExpr};
-pub use nullif::{nullif_func, SUPPORTED_NULLIF_TYPES};
-pub use sum::{sum_return_type, Sum};
-pub use try_cast::{try_cast, TryCastExpr};
-/// returns the name of the state
-pub fn format_state_name(name: &str, state_name: &str) -> String {
-    format!("{}[{}]", name, state_name)
-}
-
-/// Represents Sort operation for a column in a RecordBatch
-#[derive(Clone, Debug)]
-pub struct PhysicalSortExpr {
-    /// Physical expression representing the column to sort
-    pub expr: Arc<dyn PhysicalExpr>,
-    /// Option to specify how the given column should be sorted
-    pub options: SortOptions,
-}
-
-impl PhysicalSortExpr {
-    /// evaluate the sort expression into SortColumn that can be passed into arrow sort kernel
-    pub fn evaluate_to_sort_column(&self, batch: &RecordBatch) -> Result<SortColumn> {
-        let value_to_sort = self.expr.evaluate(batch)?;
-        let array_to_sort = match value_to_sort {
-            ColumnarValue::Array(array) => array,
-            ColumnarValue::Scalar(scalar) => {
-                return Err(DataFusionError::Internal(format!(
-                    "Sort operation is not applicable to scalar value {}",
-                    scalar
-                )));
-            }
-        };
-        Ok(SortColumn {
-            values: array_to_sort,
-            options: Some(self.options),
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{error::Result, physical_plan::AggregateExpr, scalar::ScalarValue};
-
-    /// macro to perform an aggregation and verify the result.
-    #[macro_export]
-    macro_rules! generic_test_op {
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $EXPECTED:expr, $EXPECTED_DATATYPE:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $DATATYPE, false)]);
-
-            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![$ARRAY])?;
-
-            let agg =
-                Arc::new(<$OP>::new(col("a"), "bla".to_string(), $EXPECTED_DATATYPE));
-            let actual = aggregate(&batch, agg)?;
-            let expected = ScalarValue::from($EXPECTED);
-
-            assert_eq!(expected, actual);
-
-            Ok(())
-        }};
-    }
-
-    pub fn aggregate(
-        batch: &RecordBatch,
-        agg: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum = agg.create_accumulator()?;
-        let expr = agg.expressions();
-        let values = expr
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-            .collect::<Result<Vec<_>>>()?;
-        accum.update_batch(&values)?;
-        accum.evaluate()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/negative.rs b/rust/datafusion/src/physical_plan/expressions/negative.rs
deleted file mode 100644
index 65010c6acd1..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/negative.rs
+++ /dev/null
@@ -1,133 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Negation (-) expression
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::ArrayRef;
-use arrow::compute::kernels::arithmetic::negate;
-use arrow::{
-    array::{Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array},
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-
-use super::coercion;
-
-/// Invoke a compute kernel on array(s)
-macro_rules! compute_op {
-    // invoke unary operator
-    ($OPERAND:expr, $OP:ident, $DT:ident) => {{
-        let operand = $OPERAND
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&operand)?))
-    }};
-}
-
-/// Negative expression
-#[derive(Debug)]
-pub struct NegativeExpr {
-    /// Input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl NegativeExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl std::fmt::Display for NegativeExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "(- {})", self.arg)
-    }
-}
-
-impl PhysicalExpr for NegativeExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        self.arg.data_type(input_schema)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.arg.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => {
-                let result: Result<ArrayRef> = match array.data_type() {
-                    DataType::Int8 => compute_op!(array, negate, Int8Array),
-                    DataType::Int16 => compute_op!(array, negate, Int16Array),
-                    DataType::Int32 => compute_op!(array, negate, Int32Array),
-                    DataType::Int64 => compute_op!(array, negate, Int64Array),
-                    DataType::Float32 => compute_op!(array, negate, Float32Array),
-                    DataType::Float64 => compute_op!(array, negate, Float64Array),
-                    _ => Err(DataFusionError::Internal(format!(
-                        "(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed numeric",
-                        self,
-                        array.data_type(),
-                    ))),
-                };
-                result.map(|a| ColumnarValue::Array(a))
-            }
-            ColumnarValue::Scalar(scalar) => {
-                Ok(ColumnarValue::Scalar(scalar.arithmetic_negate()))
-            }
-        }
-    }
-}
-
-/// Creates a unary expression NEGATIVE
-///
-/// # Errors
-///
-/// This function errors when the argument's type is not signed numeric
-pub fn negative(
-    arg: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let data_type = arg.data_type(input_schema)?;
-    if !coercion::is_signed_numeric(&data_type) {
-        Err(DataFusionError::Internal(
-            format!(
-                "(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed numeric",
-                arg, data_type,
-            ),
-        ))
-    } else {
-        Ok(Arc::new(NegativeExpr::new(arg)))
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/not.rs b/rust/datafusion/src/physical_plan/expressions/not.rs
deleted file mode 100644
index 23a1a46651d..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/not.rs
+++ /dev/null
@@ -1,158 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Not expression
-
-use std::any::Any;
-use std::fmt;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use crate::scalar::ScalarValue;
-use arrow::array::BooleanArray;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-
-/// Not expression
-#[derive(Debug)]
-pub struct NotExpr {
-    /// Input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl NotExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl fmt::Display for NotExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "NOT {}", self.arg)
-    }
-}
-
-impl PhysicalExpr for NotExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.arg.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => {
-                let array =
-                    array
-                        .as_any()
-                        .downcast_ref::<BooleanArray>()
-                        .ok_or_else(|| {
-                            DataFusionError::Internal(
-                                "boolean_op failed to downcast array".to_owned(),
-                            )
-                        })?;
-                Ok(ColumnarValue::Array(Arc::new(
-                    arrow::compute::kernels::boolean::not(array)?,
-                )))
-            }
-            ColumnarValue::Scalar(scalar) => {
-                use std::convert::TryInto;
-                let bool_value: bool = scalar.try_into()?;
-                Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(
-                    !bool_value,
-                ))))
-            }
-        }
-    }
-}
-
-/// Creates a unary expression NOT
-///
-/// # Errors
-///
-/// This function errors when the argument's type is not boolean
-pub fn not(
-    arg: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let data_type = arg.data_type(input_schema)?;
-    if data_type != DataType::Boolean {
-        Err(DataFusionError::Internal(format!(
-            "NOT '{:?}' can't be evaluated because the expression's type is {:?}, not boolean",
-            arg, data_type,
-        )))
-    } else {
-        Ok(Arc::new(NotExpr::new(arg)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-    use arrow::datatypes::*;
-
-    #[test]
-    fn neg_op() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
-
-        let expr = not(col("a"), &schema)?;
-        assert_eq!(expr.data_type(&schema)?, DataType::Boolean);
-        assert_eq!(expr.nullable(&schema)?, true);
-
-        let input = BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let expected = &BooleanArray::from(vec![Some(false), None, Some(true)]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(input)])?;
-
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-        assert_eq!(result, expected);
-
-        Ok(())
-    }
-
-    /// verify that expression errors when the input expression is not a boolean.
-    #[test]
-    fn neg_op_not_null() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-
-        let expr = not(col("a"), &schema);
-        assert!(expr.is_err());
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/nullif.rs b/rust/datafusion/src/physical_plan/expressions/nullif.rs
deleted file mode 100644
index 7cc58ed2318..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/nullif.rs
+++ /dev/null
@@ -1,188 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::scalar::ScalarValue;
-use arrow::array::Array;
-use arrow::array::{
-    ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, Int8Array, StringArray, TimestampNanosecondArray,
-    UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-};
-use arrow::compute::kernels::boolean::nullif;
-use arrow::compute::kernels::comparison::{eq, eq_scalar, eq_utf8, eq_utf8_scalar};
-use arrow::datatypes::{DataType, TimeUnit};
-
-/// Invoke a compute kernel on a primitive array and a Boolean Array
-macro_rules! compute_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?) as ArrayRef)
-    }};
-}
-
-/// Binary op between primitive and boolean arrays
-macro_rules! primitive_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for NULLIF/primitive/boolean operator",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Implements NULLIF(expr1, expr2)
-/// Args: 0 - left expr is any array
-///       1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
-///
-pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 2 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but NULLIF takes exactly two args",
-            args.len(),
-        )));
-    }
-
-    let (lhs, rhs) = (&args[0], &args[1]);
-
-    match (lhs, rhs) {
-        (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
-            let cond_array = binary_array_op_scalar!(lhs, rhs.clone(), eq).unwrap()?;
-
-            let array = primitive_bool_array_op!(lhs, *cond_array, nullif)?;
-
-            Ok(ColumnarValue::Array(array))
-        }
-        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
-            // Get args0 == args1 evaluated and produce a boolean array
-            let cond_array = binary_array_op!(lhs, rhs, eq)?;
-
-            // Now, invoke nullif on the result
-            let array = primitive_bool_array_op!(lhs, *cond_array, nullif)?;
-            Ok(ColumnarValue::Array(array))
-        }
-        _ => Err(DataFusionError::NotImplemented(
-            "nullif does not support a literal as first argument".to_string(),
-        )),
-    }
-}
-
-/// Currently supported types by the nullif function.
-/// The order of these types correspond to the order on which coercion applies
-/// This should thus be from least informative to most informative
-pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
-    DataType::Boolean,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::Float32,
-    DataType::Float64,
-];
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-
-    #[test]
-    fn nullif_int32() -> Result<()> {
-        let a = Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            None,
-            None,
-            Some(4),
-            Some(5),
-        ]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-
-        let result = nullif_func(&[a, lit_array])?;
-        let result = result.into_array(0);
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            None,
-            None,
-            Some(3),
-            None,
-            None,
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
-    }
-
-    #[test]
-    // Ensure that arrays with no nulls can also invoke NULLIF() correctly
-    fn nullif_int32_nonulls() -> Result<()> {
-        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
-
-        let result = nullif_func(&[a, lit_array])?;
-        let result = result.into_array(0);
-
-        let expected = Arc::new(Int32Array::from(vec![
-            None,
-            Some(3),
-            Some(10),
-            Some(7),
-            Some(8),
-            None,
-            Some(2),
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/sum.rs b/rust/datafusion/src/physical_plan/expressions/sum.rs
deleted file mode 100644
index 6f50894003d..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/sum.rs
+++ /dev/null
@@ -1,373 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    datatypes::Field,
-};
-
-use super::format_state_name;
-
-/// SUM aggregate expression
-#[derive(Debug)]
-pub struct Sum {
-    name: String,
-    data_type: DataType,
-    expr: Arc<dyn PhysicalExpr>,
-    nullable: bool,
-}
-
-/// function return type of a sum
-pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => {
-            Ok(DataType::Int64)
-        }
-        DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => {
-            Ok(DataType::UInt64)
-        }
-        DataType::Float32 => Ok(DataType::Float32),
-        DataType::Float64 => Ok(DataType::Float64),
-        other => Err(DataFusionError::Plan(format!(
-            "SUM does not support type \"{:?}\"",
-            other
-        ))),
-    }
-}
-
-impl Sum {
-    /// Create a new SUM aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Sum {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "sum"),
-            self.data_type.clone(),
-            self.nullable,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(SumAccumulator::try_new(&self.data_type)?))
-    }
-}
-
-#[derive(Debug)]
-struct SumAccumulator {
-    sum: ScalarValue,
-}
-
-impl SumAccumulator {
-    /// new sum accumulator
-    pub fn try_new(data_type: &DataType) -> Result<Self> {
-        Ok(Self {
-            sum: ScalarValue::try_from(data_type)?,
-        })
-    }
-}
-
-// returns the new value after sum with the new values, taking nullability into account
-macro_rules! typed_sum_delta_batch {
-    ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
-        let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let delta = compute::sum(array);
-        ScalarValue::$SCALAR(delta)
-    }};
-}
-
-// sums the array and returns a ScalarValue of its corresponding type.
-pub(super) fn sum_batch(values: &ArrayRef) -> Result<ScalarValue> {
-    Ok(match values.data_type() {
-        DataType::Float64 => typed_sum_delta_batch!(values, Float64Array, Float64),
-        DataType::Float32 => typed_sum_delta_batch!(values, Float32Array, Float32),
-        DataType::Int64 => typed_sum_delta_batch!(values, Int64Array, Int64),
-        DataType::Int32 => typed_sum_delta_batch!(values, Int32Array, Int32),
-        DataType::Int16 => typed_sum_delta_batch!(values, Int16Array, Int16),
-        DataType::Int8 => typed_sum_delta_batch!(values, Int8Array, Int8),
-        DataType::UInt64 => typed_sum_delta_batch!(values, UInt64Array, UInt64),
-        DataType::UInt32 => typed_sum_delta_batch!(values, UInt32Array, UInt32),
-        DataType::UInt16 => typed_sum_delta_batch!(values, UInt16Array, UInt16),
-        DataType::UInt8 => typed_sum_delta_batch!(values, UInt8Array, UInt8),
-        e => {
-            return Err(DataFusionError::Internal(format!(
-                "Sum is not expected to receive the type {:?}",
-                e
-            )))
-        }
-    })
-}
-
-// returns the sum of two scalar values, including coercion into $TYPE.
-macro_rules! typed_sum {
-    ($OLD_VALUE:expr, $DELTA:expr, $SCALAR:ident, $TYPE:ident) => {{
-        ScalarValue::$SCALAR(match ($OLD_VALUE, $DELTA) {
-            (None, None) => None,
-            (Some(a), None) => Some(a.clone()),
-            (None, Some(b)) => Some(b.clone() as $TYPE),
-            (Some(a), Some(b)) => Some(a + (*b as $TYPE)),
-        })
-    }};
-}
-
-pub(super) fn sum(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    Ok(match (lhs, rhs) {
-        // float64 coerces everything to f64
-        (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Float32(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int64(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int32(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int16(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int8(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt64(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt32(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt16(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt8(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        // float32 has no cast
-        (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
-            typed_sum!(lhs, rhs, Float32, f32)
-        }
-        // u64 coerces u* to u64
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt32(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt16(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt8(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        // i64 coerces i* to u64
-        (ScalarValue::Int64(lhs), ScalarValue::Int64(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        (ScalarValue::Int64(lhs), ScalarValue::Int32(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        (ScalarValue::Int64(lhs), ScalarValue::Int16(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        (ScalarValue::Int64(lhs), ScalarValue::Int8(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        e => {
-            return Err(DataFusionError::Internal(format!(
-                "Sum is not expected to receive a scalar {:?}",
-                e
-            )))
-        }
-    })
-}
-
-impl Accumulator for SumAccumulator {
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-        self.sum = sum(&self.sum, &sum_batch(values)?)?;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        // sum(v1, v2, v3) = v1 + v2 + v3
-        self.sum = sum(&self.sum, &values[0])?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        // sum(sum1, sum2) = sum1 + sum2
-        self.update(states)
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        // sum(sum1, sum2, sum3, ...) = sum1 + sum2 + sum3 + ...
-        self.update_batch(states)
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.sum.clone()])
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.sum.clone())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::{error::Result, generic_test_op};
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
-
-    #[test]
-    fn sum_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Sum,
-            ScalarValue::from(15i64),
-            DataType::Int64
-        )
-    }
-
-    #[test]
-    fn sum_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Sum,
-            ScalarValue::from(13i64),
-            DataType::Int64
-        )
-    }
-
-    #[test]
-    fn sum_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Sum,
-            ScalarValue::Int64(None),
-            DataType::Int64
-        )
-    }
-
-    #[test]
-    fn sum_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Sum,
-            ScalarValue::from(15u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn sum_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Sum,
-            ScalarValue::from(15_f32),
-            DataType::Float32
-        )
-    }
-
-    #[test]
-    fn sum_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Sum,
-            ScalarValue::from(15_f64),
-            DataType::Float64
-        )
-    }
-
-    fn aggregate(
-        batch: &RecordBatch,
-        agg: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum = agg.create_accumulator()?;
-        let expr = agg.expressions();
-        let values = expr
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-            .collect::<Result<Vec<_>>>()?;
-        accum.update_batch(&values)?;
-        accum.evaluate()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/try_cast.rs b/rust/datafusion/src/physical_plan/expressions/try_cast.rs
deleted file mode 100644
index 5e402fdea28..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/try_cast.rs
+++ /dev/null
@@ -1,247 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::compute::kernels;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-use compute::can_cast_types;
-
-/// TRY_CAST expression casts an expression to a specific data type and retuns NULL on invalid cast
-#[derive(Debug)]
-pub struct TryCastExpr {
-    /// The expression to cast
-    expr: Arc<dyn PhysicalExpr>,
-    /// The data type to cast to
-    cast_type: DataType,
-}
-
-impl TryCastExpr {
-    /// Create a new CastExpr
-    pub fn new(expr: Arc<dyn PhysicalExpr>, cast_type: DataType) -> Self {
-        Self { expr, cast_type }
-    }
-
-    /// The expression to cast
-    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.expr
-    }
-
-    /// The data type to cast to
-    pub fn cast_type(&self) -> &DataType {
-        &self.cast_type
-    }
-}
-
-impl fmt::Display for TryCastExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "CAST({} AS {:?})", self.expr, self.cast_type)
-    }
-}
-
-impl PhysicalExpr for TryCastExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.cast_type.clone())
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(true)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let value = self.expr.evaluate(batch)?;
-        match value {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(kernels::cast::cast(
-                &array,
-                &self.cast_type,
-            )?)),
-            ColumnarValue::Scalar(scalar) => {
-                let scalar_array = scalar.to_array();
-                let cast_array = kernels::cast::cast(&scalar_array, &self.cast_type)?;
-                let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?;
-                Ok(ColumnarValue::Scalar(cast_scalar))
-            }
-        }
-    }
-}
-
-/// Return a PhysicalExpression representing `expr` casted to
-/// `cast_type`, if any casting is needed.
-///
-/// Note that such casts may lose type information
-pub fn try_cast(
-    expr: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-    cast_type: DataType,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let expr_type = expr.data_type(input_schema)?;
-    if expr_type == cast_type {
-        Ok(expr.clone())
-    } else if can_cast_types(&expr_type, &cast_type) {
-        Ok(Arc::new(TryCastExpr::new(expr, cast_type)))
-    } else {
-        Err(DataFusionError::Internal(format!(
-            "Unsupported CAST from {:?} to {:?}",
-            expr_type, cast_type
-        )))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-    use arrow::array::{StringArray, Time64NanosecondArray};
-    use arrow::{
-        array::{Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array},
-        datatypes::*,
-    };
-
-    // runs an end-to-end test of physical type cast
-    // 1. construct a record batch with a column "a" of type A
-    // 2. construct a physical expression of CAST(a AS B)
-    // 3. evaluate the expression
-    // 4. verify that the resulting expression is of type B
-    // 5. verify that the resulting values are downcastable and correct
-    macro_rules! generic_test_cast {
-        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $TYPEARRAY:ident, $TYPE:expr, $VEC:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $A_TYPE, false)]);
-            let a = $A_ARRAY::from($A_VEC);
-            let batch =
-                RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-            // verify that we can construct the expression
-            let expression = try_cast(col("a"), &schema, $TYPE)?;
-
-            // verify that its display is correct
-            assert_eq!(format!("CAST(a AS {:?})", $TYPE), format!("{}", expression));
-
-            // verify that the expression's type is correct
-            assert_eq!(expression.data_type(&schema)?, $TYPE);
-
-            // compute
-            let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-
-            // verify that the array's data_type is correct
-            assert_eq!(*result.data_type(), $TYPE);
-
-            // verify that the len is correct
-            assert_eq!(result.len(), $A_VEC.len());
-
-            // verify that the data itself is downcastable
-            let result = result
-                .as_any()
-                .downcast_ref::<$TYPEARRAY>()
-                .expect("failed to downcast");
-
-            // verify that the result itself is correct
-            for (i, x) in $VEC.iter().enumerate() {
-                match x {
-                    Some(x) => assert_eq!(result.value(i), *x),
-                    None => assert!(!result.is_valid(i)),
-                }
-            }
-        }};
-    }
-
-    #[test]
-    fn test_cast_i32_u32() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            UInt32Array,
-            DataType::UInt32,
-            vec![
-                Some(1_u32),
-                Some(2_u32),
-                Some(3_u32),
-                Some(4_u32),
-                Some(5_u32)
-            ]
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_cast_i32_utf8() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            StringArray,
-            DataType::Utf8,
-            vec![Some("1"), Some("2"), Some("3"), Some("4"), Some("5")]
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_try_cast_utf8_i32() -> Result<()> {
-        generic_test_cast!(
-            StringArray,
-            DataType::Utf8,
-            vec!["a", "2", "3", "b", "5"],
-            Int32Array,
-            DataType::Int32,
-            vec![None, Some(2), Some(3), None, Some(5)]
-        );
-        Ok(())
-    }
-
-    #[allow(clippy::redundant_clone)]
-    #[test]
-    fn test_cast_i64_t64() -> Result<()> {
-        let original = vec![1, 2, 3, 4, 5];
-        let expected: Vec<Option<i64>> = original
-            .iter()
-            .map(|i| Some(Time64NanosecondArray::from(vec![*i]).value(0)))
-            .collect();
-        generic_test_cast!(
-            Int64Array,
-            DataType::Int64,
-            original.clone(),
-            TimestampNanosecondArray,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            expected
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn invalid_cast() {
-        // Ensure a useful error happens at plan time if invalid casts are used
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let result = try_cast(col("a"), &schema, DataType::LargeBinary);
-        result.expect_err("expected Invalid CAST");
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/filter.rs b/rust/datafusion/src/physical_plan/filter.rs
deleted file mode 100644
index 61af78db8ed..00000000000
--- a/rust/datafusion/src/physical_plan/filter.rs
+++ /dev/null
@@ -1,240 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! FilterExec evaluates a boolean predicate against all input batches to determine which rows to
-//! include in its output batches.
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ExecutionPlan, Partitioning, PhysicalExpr};
-use arrow::array::BooleanArray;
-use arrow::compute::filter_record_batch;
-use arrow::datatypes::{DataType, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use async_trait::async_trait;
-
-use futures::stream::{Stream, StreamExt};
-
-/// FilterExec evaluates a boolean predicate against all input batches to determine which rows to
-/// include in its output batches.
-#[derive(Debug)]
-pub struct FilterExec {
-    /// The expression to filter on. This expression must evaluate to a boolean value.
-    predicate: Arc<dyn PhysicalExpr>,
-    /// The input plan
-    input: Arc<dyn ExecutionPlan>,
-}
-
-impl FilterExec {
-    /// Create a FilterExec on an input
-    pub fn try_new(
-        predicate: Arc<dyn PhysicalExpr>,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        match predicate.data_type(input.schema().as_ref())? {
-            DataType::Boolean => Ok(Self {
-                predicate,
-                input: input.clone(),
-            }),
-            other => Err(DataFusionError::Plan(format!(
-                "Filter predicate must return boolean values, not {:?}",
-                other
-            ))),
-        }
-    }
-
-    /// The expression to filter on. This expression must evaluate to a boolean value.
-    pub fn predicate(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.predicate
-    }
-
-    /// The input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for FilterExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        // The filter operator does not make any changes to the schema of its input
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(FilterExec::try_new(
-                self.predicate.clone(),
-                children[0].clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "FilterExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(FilterExecStream {
-            schema: self.input.schema().clone(),
-            predicate: self.predicate.clone(),
-            input: self.input.execute(partition).await?,
-        }))
-    }
-}
-
-/// The FilterExec streams wraps the input iterator and applies the predicate expression to
-/// determine which rows to include in its output batches
-struct FilterExecStream {
-    /// Output schema, which is the same as the input schema for this operator
-    schema: SchemaRef,
-    /// The expression to filter on. This expression must evaluate to a boolean value.
-    predicate: Arc<dyn PhysicalExpr>,
-    /// The input partition to filter.
-    input: SendableRecordBatchStream,
-}
-
-fn batch_filter(
-    batch: &RecordBatch,
-    predicate: &Arc<dyn PhysicalExpr>,
-) -> ArrowResult<RecordBatch> {
-    predicate
-        .evaluate(&batch)
-        .map(|v| v.into_array(batch.num_rows()))
-        .map_err(DataFusionError::into_arrow_external_error)
-        .and_then(|array| {
-            array
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .ok_or_else(|| {
-                    DataFusionError::Internal(
-                        "Filter predicate evaluated to non-boolean value".to_string(),
-                    )
-                    .into_arrow_external_error()
-                })
-                // apply filter array to record batch
-                .and_then(|filter_array| filter_record_batch(batch, filter_array))
-        })
-}
-
-impl Stream for FilterExecStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.input.poll_next_unpin(cx).map(|x| match x {
-            Some(Ok(batch)) => Some(batch_filter(&batch, &self.predicate)),
-            other => other,
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // same number of record batches
-        self.input.size_hint()
-    }
-}
-
-impl RecordBatchStream for FilterExecStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::physical_plan::expressions::*;
-    use crate::physical_plan::ExecutionPlan;
-    use crate::scalar::ScalarValue;
-    use crate::test;
-    use crate::{logical_plan::Operator, physical_plan::collect};
-    use std::iter::Iterator;
-
-    #[tokio::test]
-    async fn simple_predicate() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let partitions = 4;
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let predicate: Arc<dyn PhysicalExpr> = binary(
-            binary(
-                col("c2"),
-                Operator::Gt,
-                lit(ScalarValue::from(1u32)),
-                &schema,
-            )?,
-            Operator::And,
-            binary(
-                col("c2"),
-                Operator::Lt,
-                lit(ScalarValue::from(4u32)),
-                &schema,
-            )?,
-            &schema,
-        )?;
-
-        let filter: Arc<dyn ExecutionPlan> =
-            Arc::new(FilterExec::try_new(predicate, Arc::new(csv))?);
-
-        let results = collect(filter).await?;
-
-        results
-            .iter()
-            .for_each(|batch| assert_eq!(13, batch.num_columns()));
-        let row_count: usize = results.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(41, row_count);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/functions.rs b/rust/datafusion/src/physical_plan/functions.rs
deleted file mode 100644
index 56365fec1dc..00000000000
--- a/rust/datafusion/src/physical_plan/functions.rs
+++ /dev/null
@@ -1,3767 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Declaration of built-in (scalar) functions.
-//! This module contains built-in functions' enumeration and metadata.
-//!
-//! Generally, a function has:
-//! * a signature
-//! * a return type, that is a function of the incoming argument's types
-//! * the computation, that must accept each valid signature
-//!
-//! * Signature: see `Signature`
-//! * Return type: a function `(arg_types) -> return_type`. E.g. for sqrt, ([f32]) -> f32, ([f64]) -> f64.
-//!
-//! This module also has a set of coercion rules to improve user experience: if an argument i32 is passed
-//! to a function that supports f64, it is coerced to f64.
-
-use super::{
-    type_coercion::{coerce, data_types},
-    ColumnarValue, PhysicalExpr,
-};
-use crate::physical_plan::array_expressions;
-use crate::physical_plan::datetime_expressions;
-use crate::physical_plan::expressions::{nullif_func, SUPPORTED_NULLIF_TYPES};
-use crate::physical_plan::math_expressions;
-use crate::physical_plan::string_expressions;
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::ScalarValue,
-};
-use arrow::{
-    array::ArrayRef,
-    compute::kernels::length::{bit_length, length},
-    datatypes::TimeUnit,
-    datatypes::{DataType, Field, Int32Type, Int64Type, Schema},
-    record_batch::RecordBatch,
-};
-use fmt::{Debug, Formatter};
-use std::{any::Any, fmt, str::FromStr, sync::Arc};
-
-/// A function's signature, which defines the function's supported argument types.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Signature {
-    /// arbitrary number of arguments of an common type out of a list of valid types
-    // A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])`
-    Variadic(Vec<DataType>),
-    /// arbitrary number of arguments of an arbitrary but equal type
-    // A function such as `array` is `VariadicEqual`
-    // The first argument decides the type used for coercion
-    VariadicEqual,
-    /// fixed number of arguments of an arbitrary but equal type out of a list of valid types
-    // A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
-    // A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
-    Uniform(usize, Vec<DataType>),
-    /// exact number of arguments of an exact type
-    Exact(Vec<DataType>),
-    /// fixed number of arguments of arbitrary types
-    Any(usize),
-    /// One of a list of signatures
-    OneOf(Vec<Signature>),
-}
-
-/// Scalar function
-pub type ScalarFunctionImplementation =
-    Arc<dyn Fn(&[ColumnarValue]) -> Result<ColumnarValue> + Send + Sync>;
-
-/// A function's return type
-pub type ReturnTypeFunction =
-    Arc<dyn Fn(&[DataType]) -> Result<Arc<DataType>> + Send + Sync>;
-
-/// Enum of all built-in scalar functions
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum BuiltinScalarFunction {
-    // math functions
-    /// abs
-    Abs,
-    /// acos
-    Acos,
-    /// asin
-    Asin,
-    /// atan
-    Atan,
-    /// ceil
-    Ceil,
-    /// cos
-    Cos,
-    /// exp
-    Exp,
-    /// floor
-    Floor,
-    /// log, also known as ln
-    Log,
-    /// log10
-    Log10,
-    /// log2
-    Log2,
-    /// round
-    Round,
-    /// signum
-    Signum,
-    /// sin
-    Sin,
-    /// sqrt
-    Sqrt,
-    /// tan
-    Tan,
-    /// trunc
-    Trunc,
-
-    // string functions
-    /// construct an array from columns
-    Array,
-    /// ascii
-    Ascii,
-    /// bit_length
-    BitLength,
-    /// btrim
-    Btrim,
-    /// character_length
-    CharacterLength,
-    /// chr
-    Chr,
-    /// concat
-    Concat,
-    /// concat_ws
-    ConcatWithSeparator,
-    /// date_part
-    DatePart,
-    /// date_trunc
-    DateTrunc,
-    /// initcap
-    InitCap,
-    /// left
-    Left,
-    /// lpad
-    Lpad,
-    /// lower
-    Lower,
-    /// ltrim
-    Ltrim,
-    /// md5
-    MD5,
-    /// nullif
-    NullIf,
-    /// octet_length
-    OctetLength,
-    /// regexp_replace
-    RegexpReplace,
-    /// repeat
-    Repeat,
-    /// replace
-    Replace,
-    /// reverse
-    Reverse,
-    /// right
-    Right,
-    /// rpad
-    Rpad,
-    /// rtrim
-    Rtrim,
-    /// sha224
-    SHA224,
-    /// sha256
-    SHA256,
-    /// sha384
-    SHA384,
-    /// Sha512
-    SHA512,
-    /// split_part
-    SplitPart,
-    /// starts_with
-    StartsWith,
-    /// strpos
-    Strpos,
-    /// substr
-    Substr,
-    /// to_hex
-    ToHex,
-    /// to_timestamp
-    ToTimestamp,
-    /// translate
-    Translate,
-    /// trim
-    Trim,
-    /// upper
-    Upper,
-    /// regexp_match
-    RegexpMatch,
-}
-
-impl fmt::Display for BuiltinScalarFunction {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // lowercase of the debug.
-        write!(f, "{}", format!("{:?}", self).to_lowercase())
-    }
-}
-
-impl FromStr for BuiltinScalarFunction {
-    type Err = DataFusionError;
-    fn from_str(name: &str) -> Result<BuiltinScalarFunction> {
-        Ok(match name {
-            // math functions
-            "abs" => BuiltinScalarFunction::Abs,
-            "acos" => BuiltinScalarFunction::Acos,
-            "asin" => BuiltinScalarFunction::Asin,
-            "atan" => BuiltinScalarFunction::Atan,
-            "ceil" => BuiltinScalarFunction::Ceil,
-            "cos" => BuiltinScalarFunction::Cos,
-            "exp" => BuiltinScalarFunction::Exp,
-            "floor" => BuiltinScalarFunction::Floor,
-            "log" => BuiltinScalarFunction::Log,
-            "log10" => BuiltinScalarFunction::Log10,
-            "log2" => BuiltinScalarFunction::Log2,
-            "round" => BuiltinScalarFunction::Round,
-            "signum" => BuiltinScalarFunction::Signum,
-            "sin" => BuiltinScalarFunction::Sin,
-            "sqrt" => BuiltinScalarFunction::Sqrt,
-            "tan" => BuiltinScalarFunction::Tan,
-            "trunc" => BuiltinScalarFunction::Trunc,
-
-            // string functions
-            "array" => BuiltinScalarFunction::Array,
-            "ascii" => BuiltinScalarFunction::Ascii,
-            "bit_length" => BuiltinScalarFunction::BitLength,
-            "btrim" => BuiltinScalarFunction::Btrim,
-            "char_length" => BuiltinScalarFunction::CharacterLength,
-            "character_length" => BuiltinScalarFunction::CharacterLength,
-            "concat" => BuiltinScalarFunction::Concat,
-            "concat_ws" => BuiltinScalarFunction::ConcatWithSeparator,
-            "chr" => BuiltinScalarFunction::Chr,
-            "date_part" => BuiltinScalarFunction::DatePart,
-            "date_trunc" => BuiltinScalarFunction::DateTrunc,
-            "initcap" => BuiltinScalarFunction::InitCap,
-            "left" => BuiltinScalarFunction::Left,
-            "length" => BuiltinScalarFunction::CharacterLength,
-            "lower" => BuiltinScalarFunction::Lower,
-            "lpad" => BuiltinScalarFunction::Lpad,
-            "ltrim" => BuiltinScalarFunction::Ltrim,
-            "md5" => BuiltinScalarFunction::MD5,
-            "nullif" => BuiltinScalarFunction::NullIf,
-            "octet_length" => BuiltinScalarFunction::OctetLength,
-            "regexp_replace" => BuiltinScalarFunction::RegexpReplace,
-            "repeat" => BuiltinScalarFunction::Repeat,
-            "replace" => BuiltinScalarFunction::Replace,
-            "reverse" => BuiltinScalarFunction::Reverse,
-            "right" => BuiltinScalarFunction::Right,
-            "rpad" => BuiltinScalarFunction::Rpad,
-            "rtrim" => BuiltinScalarFunction::Rtrim,
-            "sha224" => BuiltinScalarFunction::SHA224,
-            "sha256" => BuiltinScalarFunction::SHA256,
-            "sha384" => BuiltinScalarFunction::SHA384,
-            "sha512" => BuiltinScalarFunction::SHA512,
-            "split_part" => BuiltinScalarFunction::SplitPart,
-            "starts_with" => BuiltinScalarFunction::StartsWith,
-            "strpos" => BuiltinScalarFunction::Strpos,
-            "substr" => BuiltinScalarFunction::Substr,
-            "to_hex" => BuiltinScalarFunction::ToHex,
-            "to_timestamp" => BuiltinScalarFunction::ToTimestamp,
-            "translate" => BuiltinScalarFunction::Translate,
-            "trim" => BuiltinScalarFunction::Trim,
-            "upper" => BuiltinScalarFunction::Upper,
-            "regexp_match" => BuiltinScalarFunction::RegexpMatch,
-            _ => {
-                return Err(DataFusionError::Plan(format!(
-                    "There is no built-in function named {}",
-                    name
-                )))
-            }
-        })
-    }
-}
-
-/// Returns the datatype of the scalar function
-pub fn return_type(
-    fun: &BuiltinScalarFunction,
-    arg_types: &[DataType],
-) -> Result<DataType> {
-    // Note that this function *must* return the same type that the respective physical expression returns
-    // or the execution panics.
-
-    // verify that this is a valid set of data types for this function
-    data_types(&arg_types, &signature(fun))?;
-
-    if arg_types.is_empty() {
-        // functions currently cannot be evaluated without arguments, as they can't
-        // know the number of rows to return.
-        return Err(DataFusionError::Plan(format!(
-            "Function '{}' requires at least one argument",
-            fun
-        )));
-    }
-
-    // the return type of the built in function.
-    // Some built-in functions' return type depends on the incoming type.
-    match fun {
-        BuiltinScalarFunction::Array => Ok(DataType::FixedSizeList(
-            Box::new(Field::new("item", arg_types[0].clone(), true)),
-            arg_types.len() as i32,
-        )),
-        BuiltinScalarFunction::Ascii => Ok(DataType::Int32),
-        BuiltinScalarFunction::BitLength => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The bit_length function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Btrim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The btrim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::CharacterLength => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The character_length function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Chr => Ok(DataType::Utf8),
-        BuiltinScalarFunction::Concat => Ok(DataType::Utf8),
-        BuiltinScalarFunction::ConcatWithSeparator => Ok(DataType::Utf8),
-        BuiltinScalarFunction::DatePart => Ok(DataType::Int32),
-        BuiltinScalarFunction::DateTrunc => {
-            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
-        }
-        BuiltinScalarFunction::InitCap => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The initcap function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Left => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The left function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Lower => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The upper function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Lpad => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The lpad function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Ltrim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The ltrim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::MD5 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The md5 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::NullIf => {
-            // NULLIF has two args and they might get coerced, get a preview of this
-            let coerced_types = data_types(arg_types, &signature(fun));
-            coerced_types.map(|typs| typs[0].clone())
-        }
-        BuiltinScalarFunction::OctetLength => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The octet_length function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::RegexpReplace => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The regexp_replace function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Repeat => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The repeat function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Replace => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The replace function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Reverse => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The reverse function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Right => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The right function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Rpad => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The rpad function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Rtrim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The rtrim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA224 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha224 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA256 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha256 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA384 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha384 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA512 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha512 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SplitPart => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The split_part function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::StartsWith => Ok(DataType::Boolean),
-        BuiltinScalarFunction::Strpos => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The strpos function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Substr => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The substr function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::ToHex => Ok(match arg_types[0] {
-            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => {
-                DataType::Utf8
-            }
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The to_hex function can only accept integers.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::ToTimestamp => {
-            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
-        }
-        BuiltinScalarFunction::Translate => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The translate function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Trim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The trim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Upper => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The upper function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::RegexpMatch => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => {
-                DataType::List(Box::new(Field::new("item", DataType::LargeUtf8, true)))
-            }
-            DataType::Utf8 => {
-                DataType::List(Box::new(Field::new("item", DataType::Utf8, true)))
-            }
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The regexp_extract function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-
-        BuiltinScalarFunction::Abs
-        | BuiltinScalarFunction::Acos
-        | BuiltinScalarFunction::Asin
-        | BuiltinScalarFunction::Atan
-        | BuiltinScalarFunction::Ceil
-        | BuiltinScalarFunction::Cos
-        | BuiltinScalarFunction::Exp
-        | BuiltinScalarFunction::Floor
-        | BuiltinScalarFunction::Log
-        | BuiltinScalarFunction::Log10
-        | BuiltinScalarFunction::Log2
-        | BuiltinScalarFunction::Round
-        | BuiltinScalarFunction::Signum
-        | BuiltinScalarFunction::Sin
-        | BuiltinScalarFunction::Sqrt
-        | BuiltinScalarFunction::Tan
-        | BuiltinScalarFunction::Trunc => Ok(DataType::Float64),
-    }
-}
-
-#[cfg(feature = "crypto_expressions")]
-macro_rules! invoke_if_crypto_expressions_feature_flag {
-    ($FUNC:ident, $NAME:expr) => {{
-        use crate::physical_plan::crypto_expressions;
-        crypto_expressions::$FUNC
-    }};
-}
-
-#[cfg(not(feature = "crypto_expressions"))]
-macro_rules! invoke_if_crypto_expressions_feature_flag {
-    ($FUNC:ident, $NAME:expr) => {
-        |_: &[ColumnarValue]| -> Result<ColumnarValue> {
-            Err(DataFusionError::Internal(format!(
-                "function {} requires compilation with feature flag: crypto_expressions.",
-                $NAME
-            )))
-        }
-    };
-}
-
-#[cfg(feature = "regex_expressions")]
-macro_rules! invoke_if_regex_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {{
-        use crate::physical_plan::regex_expressions;
-        regex_expressions::$FUNC::<$T>
-    }};
-}
-
-#[cfg(not(feature = "regex_expressions"))]
-macro_rules! invoke_if_regex_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {
-        |_: &[ArrayRef]| -> Result<ArrayRef> {
-            Err(DataFusionError::Internal(format!(
-                "function {} requires compilation with feature flag: regex_expressions.",
-                $NAME
-            )))
-        }
-    };
-}
-
-#[cfg(feature = "unicode_expressions")]
-macro_rules! invoke_if_unicode_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {{
-        use crate::physical_plan::unicode_expressions;
-        unicode_expressions::$FUNC::<$T>
-    }};
-}
-
-#[cfg(not(feature = "unicode_expressions"))]
-macro_rules! invoke_if_unicode_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {
-        |_: &[ArrayRef]| -> Result<ArrayRef> {
-            Err(DataFusionError::Internal(format!(
-                "function {} requires compilation with feature flag: unicode_expressions.",
-                $NAME
-            )))
-        }
-    };
-}
-
-/// Create a physical (function) expression.
-/// This function errors when `args`' can't be coerced to a valid argument type of the function.
-pub fn create_physical_expr(
-    fun: &BuiltinScalarFunction,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let fun_expr: ScalarFunctionImplementation = Arc::new(match fun {
-        // math functions
-        BuiltinScalarFunction::Abs => math_expressions::abs,
-        BuiltinScalarFunction::Acos => math_expressions::acos,
-        BuiltinScalarFunction::Asin => math_expressions::asin,
-        BuiltinScalarFunction::Atan => math_expressions::atan,
-        BuiltinScalarFunction::Ceil => math_expressions::ceil,
-        BuiltinScalarFunction::Cos => math_expressions::cos,
-        BuiltinScalarFunction::Exp => math_expressions::exp,
-        BuiltinScalarFunction::Floor => math_expressions::floor,
-        BuiltinScalarFunction::Log => math_expressions::ln,
-        BuiltinScalarFunction::Log10 => math_expressions::log10,
-        BuiltinScalarFunction::Log2 => math_expressions::log2,
-        BuiltinScalarFunction::Round => math_expressions::round,
-        BuiltinScalarFunction::Signum => math_expressions::signum,
-        BuiltinScalarFunction::Sin => math_expressions::sin,
-        BuiltinScalarFunction::Sqrt => math_expressions::sqrt,
-        BuiltinScalarFunction::Tan => math_expressions::tan,
-        BuiltinScalarFunction::Trunc => math_expressions::trunc,
-
-        // string functions
-        BuiltinScalarFunction::Array => array_expressions::array,
-        BuiltinScalarFunction::Ascii => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::ascii::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::ascii::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function ascii",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::BitLength => |args| match &args[0] {
-            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
-            ColumnarValue::Scalar(v) => match v {
-                ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
-                    v.as_ref().map(|x| (x.len() * 8) as i32),
-                ))),
-                ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as i64)),
-                )),
-                _ => unreachable!(),
-            },
-        },
-        BuiltinScalarFunction::Btrim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::btrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::btrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function btrim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::CharacterLength => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    character_length,
-                    Int32Type,
-                    "character_length"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    character_length,
-                    Int64Type,
-                    "character_length"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function character_length",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Chr => {
-            |args| make_scalar_function(string_expressions::chr)(args)
-        }
-        BuiltinScalarFunction::Concat => string_expressions::concat,
-        BuiltinScalarFunction::ConcatWithSeparator => {
-            |args| make_scalar_function(string_expressions::concat_ws)(args)
-        }
-        BuiltinScalarFunction::DatePart => datetime_expressions::date_part,
-        BuiltinScalarFunction::DateTrunc => datetime_expressions::date_trunc,
-        BuiltinScalarFunction::InitCap => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::initcap::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::initcap::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function initcap",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Left => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(left, i32, "left");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(left, i64, "left");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function left",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Lower => string_expressions::lower,
-        BuiltinScalarFunction::Lpad => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(lpad, i32, "lpad");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(lpad, i64, "lpad");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function lpad",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Ltrim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::ltrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::ltrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function ltrim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::MD5 => {
-            invoke_if_crypto_expressions_feature_flag!(md5, "md5")
-        }
-        BuiltinScalarFunction::NullIf => nullif_func,
-        BuiltinScalarFunction::OctetLength => |args| match &args[0] {
-            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
-            ColumnarValue::Scalar(v) => match v {
-                ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
-                    v.as_ref().map(|x| x.len() as i32),
-                ))),
-                ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
-                )),
-                _ => unreachable!(),
-            },
-        },
-        BuiltinScalarFunction::RegexpMatch => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_match,
-                    i32,
-                    "regexp_match"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_match,
-                    i64,
-                    "regexp_match"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function regexp_match",
-                other
-            ))),
-        },
-        BuiltinScalarFunction::RegexpReplace => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_replace,
-                    i32,
-                    "regexp_replace"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_replace,
-                    i64,
-                    "regexp_replace"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function regexp_replace",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Repeat => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::repeat::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::repeat::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function repeat",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Replace => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::replace::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::replace::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function replace",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Reverse => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(reverse, i32, "reverse");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(reverse, i64, "reverse");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function reverse",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Right => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(right, i32, "right");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(right, i64, "right");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function right",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Rpad => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(rpad, i32, "rpad");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(rpad, i64, "rpad");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function rpad",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Rtrim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::rtrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::rtrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function rtrim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::SHA224 => {
-            invoke_if_crypto_expressions_feature_flag!(sha224, "sha224")
-        }
-        BuiltinScalarFunction::SHA256 => {
-            invoke_if_crypto_expressions_feature_flag!(sha256, "sha256")
-        }
-        BuiltinScalarFunction::SHA384 => {
-            invoke_if_crypto_expressions_feature_flag!(sha384, "sha384")
-        }
-        BuiltinScalarFunction::SHA512 => {
-            invoke_if_crypto_expressions_feature_flag!(sha512, "sha512")
-        }
-        BuiltinScalarFunction::SplitPart => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::split_part::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::split_part::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function split_part",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::StartsWith => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::starts_with::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::starts_with::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function starts_with",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Strpos => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    strpos, Int32Type, "strpos"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    strpos, Int64Type, "strpos"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function strpos",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Substr => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(substr, i32, "substr");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(substr, i64, "substr");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function substr",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::ToHex => |args| match args[0].data_type() {
-            DataType::Int32 => {
-                make_scalar_function(string_expressions::to_hex::<Int32Type>)(args)
-            }
-            DataType::Int64 => {
-                make_scalar_function(string_expressions::to_hex::<Int64Type>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function to_hex",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::ToTimestamp => datetime_expressions::to_timestamp,
-        BuiltinScalarFunction::Translate => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    translate,
-                    i32,
-                    "translate"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    translate,
-                    i64,
-                    "translate"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function translate",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Trim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::btrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::btrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function trim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Upper => string_expressions::upper,
-    });
-    // coerce
-    let args = coerce(args, input_schema, &signature(fun))?;
-
-    let arg_types = args
-        .iter()
-        .map(|e| e.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    Ok(Arc::new(ScalarFunctionExpr::new(
-        &format!("{}", fun),
-        fun_expr,
-        args,
-        &return_type(&fun, &arg_types)?,
-    )))
-}
-
-/// the signatures supported by the function `fun`.
-fn signature(fun: &BuiltinScalarFunction) -> Signature {
-    // note: the physical expression must accept the type returned by this function or the execution panics.
-
-    // for now, the list is small, as we do not have many built-in functions.
-    match fun {
-        BuiltinScalarFunction::Array => {
-            Signature::Variadic(array_expressions::SUPPORTED_ARRAY_TYPES.to_vec())
-        }
-        BuiltinScalarFunction::Concat | BuiltinScalarFunction::ConcatWithSeparator => {
-            Signature::Variadic(vec![DataType::Utf8])
-        }
-        BuiltinScalarFunction::Ascii
-        | BuiltinScalarFunction::BitLength
-        | BuiltinScalarFunction::CharacterLength
-        | BuiltinScalarFunction::InitCap
-        | BuiltinScalarFunction::Lower
-        | BuiltinScalarFunction::MD5
-        | BuiltinScalarFunction::OctetLength
-        | BuiltinScalarFunction::Reverse
-        | BuiltinScalarFunction::SHA224
-        | BuiltinScalarFunction::SHA256
-        | BuiltinScalarFunction::SHA384
-        | BuiltinScalarFunction::SHA512
-        | BuiltinScalarFunction::Trim
-        | BuiltinScalarFunction::Upper => {
-            Signature::Uniform(1, vec![DataType::Utf8, DataType::LargeUtf8])
-        }
-        BuiltinScalarFunction::Btrim
-        | BuiltinScalarFunction::Ltrim
-        | BuiltinScalarFunction::Rtrim => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8]),
-        ]),
-        BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
-            Signature::Uniform(1, vec![DataType::Int64])
-        }
-        BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
-            Signature::OneOf(vec![
-                Signature::Exact(vec![DataType::Utf8, DataType::Int64]),
-                Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64]),
-                Signature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Utf8]),
-                Signature::Exact(vec![
-                    DataType::LargeUtf8,
-                    DataType::Int64,
-                    DataType::Utf8,
-                ]),
-                Signature::Exact(vec![
-                    DataType::Utf8,
-                    DataType::Int64,
-                    DataType::LargeUtf8,
-                ]),
-                Signature::Exact(vec![
-                    DataType::LargeUtf8,
-                    DataType::Int64,
-                    DataType::LargeUtf8,
-                ]),
-            ])
-        }
-        BuiltinScalarFunction::Left
-        | BuiltinScalarFunction::Repeat
-        | BuiltinScalarFunction::Right => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64]),
-        ]),
-        BuiltinScalarFunction::ToTimestamp => Signature::Uniform(1, vec![DataType::Utf8]),
-        BuiltinScalarFunction::DateTrunc => Signature::Exact(vec![
-            DataType::Utf8,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-        ]),
-        BuiltinScalarFunction::DatePart => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Date32]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Date64]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Second, None),
-            ]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-            ]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-            ]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            ]),
-        ]),
-        BuiltinScalarFunction::SplitPart => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::Utf8, DataType::LargeUtf8, DataType::Int64]),
-            Signature::Exact(vec![
-                DataType::LargeUtf8,
-                DataType::LargeUtf8,
-                DataType::Int64,
-            ]),
-        ]),
-
-        BuiltinScalarFunction::Strpos | BuiltinScalarFunction::StartsWith => {
-            Signature::OneOf(vec![
-                Signature::Exact(vec![DataType::Utf8, DataType::Utf8]),
-                Signature::Exact(vec![DataType::Utf8, DataType::LargeUtf8]),
-                Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
-                Signature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
-            ])
-        }
-
-        BuiltinScalarFunction::Substr => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64, DataType::Int64]),
-        ]),
-
-        BuiltinScalarFunction::Replace | BuiltinScalarFunction::Translate => {
-            Signature::OneOf(vec![Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Utf8,
-                DataType::Utf8,
-            ])])
-        }
-        BuiltinScalarFunction::RegexpReplace => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8, DataType::Utf8]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Utf8,
-                DataType::Utf8,
-                DataType::Utf8,
-            ]),
-        ]),
-
-        BuiltinScalarFunction::NullIf => {
-            Signature::Uniform(2, SUPPORTED_NULLIF_TYPES.to_vec())
-        }
-        BuiltinScalarFunction::RegexpMatch => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8, DataType::Utf8]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8, DataType::Utf8]),
-        ]),
-        // math expressions expect 1 argument of type f64 or f32
-        // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
-        // return the best approximation for it (in f64).
-        // We accept f32 because in this case it is clear that the best approximation
-        // will be as good as the number of digits in the number
-        _ => Signature::Uniform(1, vec![DataType::Float64, DataType::Float32]),
-    }
-}
-
-/// Physical expression of a scalar function
-pub struct ScalarFunctionExpr {
-    fun: ScalarFunctionImplementation,
-    name: String,
-    args: Vec<Arc<dyn PhysicalExpr>>,
-    return_type: DataType,
-}
-
-impl Debug for ScalarFunctionExpr {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ScalarFunctionExpr")
-            .field("fun", &"<FUNC>")
-            .field("name", &self.name)
-            .field("args", &self.args)
-            .field("return_type", &self.return_type)
-            .finish()
-    }
-}
-
-impl ScalarFunctionExpr {
-    /// Create a new Scalar function
-    pub fn new(
-        name: &str,
-        fun: ScalarFunctionImplementation,
-        args: Vec<Arc<dyn PhysicalExpr>>,
-        return_type: &DataType,
-    ) -> Self {
-        Self {
-            fun,
-            name: name.to_owned(),
-            args,
-            return_type: return_type.clone(),
-        }
-    }
-
-    /// Get the scalar function implementation
-    pub fn fun(&self) -> &ScalarFunctionImplementation {
-        &self.fun
-    }
-
-    /// The name for this expression
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-
-    /// Input arguments
-    pub fn args(&self) -> &[Arc<dyn PhysicalExpr>] {
-        &self.args
-    }
-
-    /// Data type produced by this expression
-    pub fn return_type(&self) -> &DataType {
-        &self.return_type
-    }
-}
-
-impl fmt::Display for ScalarFunctionExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(
-            f,
-            "{}({})",
-            self.name,
-            self.args
-                .iter()
-                .map(|e| format!("{}", e))
-                .collect::<Vec<String>>()
-                .join(", ")
-        )
-    }
-}
-
-impl PhysicalExpr for ScalarFunctionExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.return_type.clone())
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(true)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        // evaluate the arguments
-        let inputs = self
-            .args
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .collect::<Result<Vec<_>>>()?;
-
-        // evaluate the function
-        let fun = self.fun.as_ref();
-        (fun)(&inputs)
-    }
-}
-
-/// decorates a function to handle [`ScalarValue`]s by coverting them to arrays before calling the function
-/// and vice-versa after evaluation.
-pub fn make_scalar_function<F>(inner: F) -> ScalarFunctionImplementation
-where
-    F: Fn(&[ArrayRef]) -> Result<ArrayRef> + Sync + Send + 'static,
-{
-    Arc::new(move |args: &[ColumnarValue]| {
-        // first, identify if any of the arguments is an Array. If yes, store its `len`,
-        // as any scalar will need to be converted to an array of len `len`.
-        let len = args
-            .iter()
-            .fold(Option::<usize>::None, |acc, arg| match arg {
-                ColumnarValue::Scalar(_) => acc,
-                ColumnarValue::Array(a) => Some(a.len()),
-            });
-
-        // to array
-        let args = if let Some(len) = len {
-            args.iter()
-                .map(|arg| arg.clone().into_array(len))
-                .collect::<Vec<ArrayRef>>()
-        } else {
-            args.iter()
-                .map(|arg| arg.clone().into_array(1))
-                .collect::<Vec<ArrayRef>>()
-        };
-
-        let result = (inner)(&args);
-
-        // maybe back to scalar
-        if len.is_some() {
-            result.map(ColumnarValue::Array)
-        } else {
-            ScalarValue::try_from_array(&result?, 0).map(ColumnarValue::Scalar)
-        }
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{
-        error::Result,
-        physical_plan::expressions::{col, lit},
-        scalar::ScalarValue,
-    };
-    use arrow::{
-        array::{
-            Array, ArrayRef, BinaryArray, BooleanArray, FixedSizeListArray, Float64Array,
-            Int32Array, ListArray, StringArray, UInt32Array, UInt64Array,
-        },
-        datatypes::Field,
-        record_batch::RecordBatch,
-    };
-
-    /// $FUNC function to test
-    /// $ARGS arguments (vec) to pass to function
-    /// $EXPECTED a Result<Option<$EXPECTED_TYPE>> where Result allows testing errors and Option allows testing Null
-    /// $EXPECTED_TYPE is the expected value type
-    /// $DATA_TYPE is the function to test result type
-    /// $ARRAY_TYPE is the column type after function applied
-    macro_rules! test_function {
-        ($FUNC:ident, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $DATA_TYPE: ident, $ARRAY_TYPE:ident) => {
-            // used to provide type annotation
-            let expected: Result<Option<$EXPECTED_TYPE>> = $EXPECTED;
-
-            // any type works here: we evaluate against a literal of `value`
-            let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-            let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
-
-            let expr =
-                create_physical_expr(&BuiltinScalarFunction::$FUNC, $ARGS, &schema)?;
-
-            // type is correct
-            assert_eq!(expr.data_type(&schema)?, DataType::$DATA_TYPE);
-
-            let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-
-            match expected {
-                Ok(expected) => {
-                    let result = expr.evaluate(&batch)?;
-                    let result = result.into_array(batch.num_rows());
-                    let result = result.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
-
-                    // value is correct
-                    match expected {
-                        Some(v) => assert_eq!(result.value(0), v),
-                        None => assert!(result.is_null(0)),
-                    };
-                }
-                Err(expected_error) => {
-                    // evaluate is expected error - cannot use .expect_err() due to Debug not being implemented
-                    match expr.evaluate(&batch) {
-                        Ok(_) => assert!(false, "expected error"),
-                        Err(error) => {
-                            assert_eq!(error.to_string(), expected_error.to_string());
-                        }
-                    }
-                }
-            };
-        };
-    }
-
-    #[test]
-    fn test_functions() -> Result<()> {
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("x".to_string())))],
-            Ok(Some(120)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("ésoj".to_string())))],
-            Ok(Some(233)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("💯".to_string())))],
-            Ok(Some(128175)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("💯a".to_string())))],
-            Ok(Some(128175)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            BitLength,
-            &[lit(ScalarValue::Utf8(Some("chars".to_string())))],
-            Ok(Some(40)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            BitLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Ok(Some(40)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            BitLength,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some("\n trim \n".to_string())))],
-            Ok(Some("\n trim \n")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(Some("xyxtrimyyx".to_string()))),
-                lit(ScalarValue::Utf8(Some("xyz".to_string()))),
-            ],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(Some("\nxyxtrimyyx\n".to_string()))),
-                lit(ScalarValue::Utf8(Some("xyz\n".to_string()))),
-            ],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("xyz".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(Some("xyxtrimyyx".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("chars".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Ok(Some(4)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Err(DataFusionError::Internal(
-                "function character_length requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(128175)))],
-            Ok(Some("💯")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(120)))],
-            Ok(Some("x")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(128175)))],
-            Ok(Some("💯")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(0)))],
-            Err(DataFusionError::Execution(
-                "null character not permitted.".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(i64::MAX)))],
-            Err(DataFusionError::Execution(
-                "requested character too large for encoding.".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Concat,
-            &[
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(Some("bb".to_string()))),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aabbcc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Concat,
-            &[
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aacc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Concat,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(Some("|".to_string()))),
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(Some("bb".to_string()))),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aa|bb|cc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(Some("|".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(Some("bb".to_string()))),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(Some("|".to_string()))),
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aa|cc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::Int32(Some(1)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::UInt32(Some(1)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::UInt64(Some(1)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::Float64(Some(1.0)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::Float32(Some(1.0)))],
-            Ok(Some((1.0_f32).exp() as f64)),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(Some("hi THOMAS".to_string())))],
-            Ok(Some("Hi Thomas")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Ok(Some("ab")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(200))),
-            ],
-            Ok(Some("abcde")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-2))),
-            ],
-            Ok(Some("abc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-200))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("joséé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(-3))),
-            ],
-            Ok(Some("joséé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Err(DataFusionError::Internal(
-                "function left requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some(" josé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("xyxhi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(21))),
-                lit(ScalarValue::Utf8(Some("abcdef".to_string()))),
-            ],
-            Ok(Some("abcdefabcdefabcdefahi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some(" ".to_string()))),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("".to_string()))),
-            ],
-            Ok(Some("hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("xyxyxyjosé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("éñ".to_string()))),
-            ],
-            Ok(Some("éñéñéñjosé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Err(DataFusionError::Internal(
-                "function lpad requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some("trim ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("\n trim ".to_string())))],
-            Ok(Some("\n trim ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some("34b7da764b21d298ef307d04d8152dc5")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some("d41d8cd98f00b204e9800998ecf8427e")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function md5 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("chars".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("Thomas".to_string()))),
-                lit(ScalarValue::Utf8(Some(".[mN]a.".to_string()))),
-                lit(ScalarValue::Utf8(Some("M".to_string()))),
-            ],
-            Ok(Some("ThM")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b..".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-            ],
-            Ok(Some("fooXbaz")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b..".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(Some("fooXX")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(Some("fooXarYXazY")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("ABCabcABC".to_string()))),
-                lit(ScalarValue::Utf8(Some("(abc)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-                lit(ScalarValue::Utf8(Some("gi".to_string()))),
-            ],
-            Ok(Some("XXX")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("ABCabcABC".to_string()))),
-                lit(ScalarValue::Utf8(Some("(abc)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-                lit(ScalarValue::Utf8(Some("i".to_string()))),
-            ],
-            Ok(Some("XabcABC")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "regex_expressions"))]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b..".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-            ],
-            Err(DataFusionError::Internal(
-                "function regexp_replace requires compilation with feature flag: regex_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[
-                lit(ScalarValue::Utf8(Some("Pg".to_string()))),
-                lit(ScalarValue::Int64(Some(4))),
-            ],
-            Ok(Some("PgPgPgPg")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(4))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[
-                lit(ScalarValue::Utf8(Some("Pg".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("abcde".to_string())))],
-            Ok(Some("edcba")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("loẅks".to_string())))],
-            Ok(Some("skẅol")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("loẅks".to_string())))],
-            Ok(Some("skẅol")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("abcde".to_string())))],
-            Err(DataFusionError::Internal(
-                "function reverse requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Ok(Some("de")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(200))),
-            ],
-            Ok(Some("abcde")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-2))),
-            ],
-            Ok(Some("cde")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-200))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("éésoj")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(-3))),
-            ],
-            Ok(Some("éésoj")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Err(DataFusionError::Internal(
-                "function right requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("josé ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("hi   ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("hixyx")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(21))),
-                lit(ScalarValue::Utf8(Some("abcdef".to_string()))),
-            ],
-            Ok(Some("hiabcdefabcdefabcdefa")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some(" ".to_string()))),
-            ],
-            Ok(Some("hi   ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("".to_string()))),
-            ],
-            Ok(Some("hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("joséxyxyxy")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("éñ".to_string()))),
-            ],
-            Ok(Some("josééñéñéñ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Err(DataFusionError::Internal(
-                "function rpad requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some(" trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim \n".to_string())))],
-            Ok(Some(" trim \n")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some(" trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some("trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                11u8, 246u8, 203u8, 98u8, 100u8, 156u8, 66u8, 169u8, 174u8, 56u8, 118u8,
-                171u8, 111u8, 109u8, 146u8, 173u8, 54u8, 203u8, 84u8, 20u8, 228u8, 149u8,
-                248u8, 135u8, 50u8, 146u8, 190u8, 77u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                209u8, 74u8, 2u8, 140u8, 42u8, 58u8, 43u8, 201u8, 71u8, 97u8, 2u8, 187u8,
-                40u8, 130u8, 52u8, 196u8, 21u8, 162u8, 176u8, 31u8, 130u8, 142u8, 166u8,
-                42u8, 197u8, 179u8, 228u8, 47u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha224 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                225u8, 96u8, 143u8, 117u8, 197u8, 215u8, 129u8, 63u8, 61u8, 64u8, 49u8,
-                203u8, 48u8, 191u8, 183u8, 134u8, 80u8, 125u8, 152u8, 19u8, 117u8, 56u8,
-                255u8, 142u8, 18u8, 138u8, 111u8, 247u8, 78u8, 132u8, 230u8, 67u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                227u8, 176u8, 196u8, 66u8, 152u8, 252u8, 28u8, 20u8, 154u8, 251u8, 244u8,
-                200u8, 153u8, 111u8, 185u8, 36u8, 39u8, 174u8, 65u8, 228u8, 100u8, 155u8,
-                147u8, 76u8, 164u8, 149u8, 153u8, 27u8, 120u8, 82u8, 184u8, 85u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha256 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                9u8, 111u8, 91u8, 104u8, 170u8, 119u8, 132u8, 142u8, 79u8, 223u8, 92u8,
-                28u8, 11u8, 53u8, 13u8, 226u8, 219u8, 250u8, 214u8, 15u8, 253u8, 124u8,
-                37u8, 217u8, 234u8, 7u8, 198u8, 193u8, 155u8, 138u8, 77u8, 85u8, 169u8,
-                24u8, 126u8, 177u8, 23u8, 197u8, 87u8, 136u8, 63u8, 88u8, 193u8, 109u8,
-                250u8, 195u8, 227u8, 67u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                56u8, 176u8, 96u8, 167u8, 81u8, 172u8, 150u8, 56u8, 76u8, 217u8, 50u8,
-                126u8, 177u8, 177u8, 227u8, 106u8, 33u8, 253u8, 183u8, 17u8, 20u8, 190u8,
-                7u8, 67u8, 76u8, 12u8, 199u8, 191u8, 99u8, 246u8, 225u8, 218u8, 39u8,
-                78u8, 222u8, 191u8, 231u8, 111u8, 101u8, 251u8, 213u8, 26u8, 210u8,
-                241u8, 72u8, 152u8, 185u8, 91u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha384 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                110u8, 27u8, 155u8, 63u8, 232u8, 64u8, 104u8, 14u8, 55u8, 5u8, 31u8,
-                122u8, 213u8, 233u8, 89u8, 214u8, 243u8, 154u8, 208u8, 248u8, 136u8,
-                93u8, 133u8, 81u8, 102u8, 245u8, 92u8, 101u8, 148u8, 105u8, 211u8, 200u8,
-                183u8, 129u8, 24u8, 196u8, 74u8, 42u8, 73u8, 199u8, 45u8, 219u8, 72u8,
-                28u8, 214u8, 216u8, 115u8, 16u8, 52u8, 225u8, 28u8, 192u8, 48u8, 7u8,
-                11u8, 168u8, 67u8, 169u8, 11u8, 52u8, 149u8, 203u8, 141u8, 62u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                207u8, 131u8, 225u8, 53u8, 126u8, 239u8, 184u8, 189u8, 241u8, 84u8, 40u8,
-                80u8, 214u8, 109u8, 128u8, 7u8, 214u8, 32u8, 228u8, 5u8, 11u8, 87u8,
-                21u8, 220u8, 131u8, 244u8, 169u8, 33u8, 211u8, 108u8, 233u8, 206u8, 71u8,
-                208u8, 209u8, 60u8, 93u8, 133u8, 242u8, 176u8, 255u8, 131u8, 24u8, 210u8,
-                135u8, 126u8, 236u8, 47u8, 99u8, 185u8, 49u8, 189u8, 71u8, 65u8, 122u8,
-                129u8, 165u8, 56u8, 50u8, 122u8, 249u8, 39u8, 218u8, 62u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha512 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit(ScalarValue::Utf8(Some("abc~@~def~@~ghi".to_string()))),
-                lit(ScalarValue::Utf8(Some("~@~".to_string()))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("def")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit(ScalarValue::Utf8(Some("abc~@~def~@~ghi".to_string()))),
-                lit(ScalarValue::Utf8(Some("~@~".to_string()))),
-                lit(ScalarValue::Int64(Some(20))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit(ScalarValue::Utf8(Some("abc~@~def~@~ghi".to_string()))),
-                lit(ScalarValue::Utf8(Some("~@~".to_string()))),
-                lit(ScalarValue::Int64(Some(-1))),
-            ],
-            Err(DataFusionError::Execution(
-                "field position must be greater than zero".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Utf8(Some("alph".to_string()))),
-            ],
-            Ok(Some(true)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Utf8(Some("blph".to_string()))),
-            ],
-            Ok(Some(false)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("alph".to_string()))),
-            ],
-            Ok(None),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("abc".to_string()))),
-                lit(ScalarValue::Utf8(Some("c".to_string()))),
-            ],
-            Ok(Some(3)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Utf8(Some("é".to_string()))),
-            ],
-            Ok(Some(4)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(Some("so".to_string()))),
-            ],
-            Ok(Some(6)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(Some("abc".to_string()))),
-            ],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("abc".to_string()))),
-            ],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Err(DataFusionError::Internal(
-                "function strpos requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("alphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("ésoj")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(1))),
-            ],
-            Ok(Some("alphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("lphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-            ],
-            Ok(Some("phabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(-3))),
-            ],
-            Ok(Some("alphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(30))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("ph")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(Some(20))),
-            ],
-            Ok(Some("phabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(None)),
-                lit(ScalarValue::Int64(Some(20))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(1))),
-                lit(ScalarValue::Int64(Some(-1))),
-            ],
-            Err(DataFusionError::Execution(
-                "negative substring length not allowed".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("és")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Err(DataFusionError::Internal(
-                "function substr requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Ok(Some("a2x5")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("é2íñ5".to_string()))),
-                lit(ScalarValue::Utf8(Some("éñí".to_string()))),
-                lit(ScalarValue::Utf8(Some("óü".to_string()))),
-            ],
-            Ok(Some("ó2ü5")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Err(DataFusionError::Internal(
-                "function translate requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit(ScalarValue::Utf8(Some("upper".to_string())))],
-            Ok(Some("UPPER")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit(ScalarValue::Utf8(Some("UPPER".to_string())))],
-            Ok(Some("UPPER")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_error() -> Result<()> {
-        let result = return_type(&BuiltinScalarFunction::Concat, &[]);
-        if result.is_ok() {
-            Err(DataFusionError::Plan(
-                "Function 'concat' cannot accept zero arguments".to_string(),
-            ))
-        } else {
-            Ok(())
-        }
-    }
-
-    fn generic_test_array(
-        value1: ArrayRef,
-        value2: ArrayRef,
-        expected_type: DataType,
-        expected: &str,
-    ) -> Result<()> {
-        // any type works here: we evaluate against a literal of `value`
-        let schema = Schema::new(vec![
-            Field::new("a", value1.data_type().clone(), false),
-            Field::new("b", value2.data_type().clone(), false),
-        ]);
-        let columns: Vec<ArrayRef> = vec![value1, value2];
-
-        let expr = create_physical_expr(
-            &BuiltinScalarFunction::Array,
-            &[col("a"), col("b")],
-            &schema,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            // type equals to a common coercion
-            DataType::FixedSizeList(Box::new(Field::new("item", expected_type, true)), 2)
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-
-        // downcast works
-        let result = result
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap();
-
-        // value is correct
-        assert_eq!(format!("{:?}", result.value(0)), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_array() -> Result<()> {
-        generic_test_array(
-            Arc::new(StringArray::from(vec!["aa"])),
-            Arc::new(StringArray::from(vec!["bb"])),
-            DataType::Utf8,
-            "StringArray\n[\n  \"aa\",\n  \"bb\",\n]",
-        )?;
-
-        // different types, to validate that casting happens
-        generic_test_array(
-            Arc::new(UInt32Array::from(vec![1u32])),
-            Arc::new(UInt64Array::from(vec![1u64])),
-            DataType::UInt64,
-            "PrimitiveArray<UInt64>\n[\n  1,\n  1,\n]",
-        )?;
-
-        // different types (another order), to validate that casting happens
-        generic_test_array(
-            Arc::new(UInt64Array::from(vec![1u64])),
-            Arc::new(UInt32Array::from(vec![1u32])),
-            DataType::UInt64,
-            "PrimitiveArray<UInt64>\n[\n  1,\n  1,\n]",
-        )
-    }
-
-    #[test]
-    #[cfg(feature = "regex_expressions")]
-    fn test_regexp_match() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
-
-        // concat(value, value)
-        let col_value: ArrayRef = Arc::new(StringArray::from(vec!["aaa-555"]));
-        let pattern = lit(ScalarValue::Utf8(Some(r".*-(\d*)".to_string())));
-        let columns: Vec<ArrayRef> = vec![col_value];
-        let expr = create_physical_expr(
-            &BuiltinScalarFunction::RegexpMatch,
-            &[col("a"), pattern],
-            &schema,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true)))
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-
-        // downcast works
-        let result = result.as_any().downcast_ref::<ListArray>().unwrap();
-        let first_row = result.value(0);
-        let first_row = first_row.as_any().downcast_ref::<StringArray>().unwrap();
-
-        // value is correct
-        let expected = "555".to_string();
-        assert_eq!(first_row.value(0), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    #[cfg(feature = "regex_expressions")]
-    fn test_regexp_match_all_literals() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        // concat(value, value)
-        let col_value = lit(ScalarValue::Utf8(Some("aaa-555".to_string())));
-        let pattern = lit(ScalarValue::Utf8(Some(r".*-(\d*)".to_string())));
-        let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
-        let expr = create_physical_expr(
-            &BuiltinScalarFunction::RegexpMatch,
-            &[col_value, pattern],
-            &schema,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true)))
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-
-        // downcast works
-        let result = result.as_any().downcast_ref::<ListArray>().unwrap();
-        let first_row = result.value(0);
-        let first_row = first_row.as_any().downcast_ref::<StringArray>().unwrap();
-
-        // value is correct
-        let expected = "555".to_string();
-        assert_eq!(first_row.value(0), expected);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/group_scalar.rs b/rust/datafusion/src/physical_plan/group_scalar.rs
deleted file mode 100644
index a55e1d7a9a3..00000000000
--- a/rust/datafusion/src/physical_plan/group_scalar.rs
+++ /dev/null
@@ -1,200 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines scalars used to construct groups, ex. in GROUP BY clauses.
-
-use ordered_float::OrderedFloat;
-use std::convert::{From, TryFrom};
-
-use crate::error::{DataFusionError, Result};
-use crate::scalar::ScalarValue;
-
-/// Enumeration of types that can be used in a GROUP BY expression
-#[derive(Debug, PartialEq, Eq, Hash, Clone)]
-pub(crate) enum GroupByScalar {
-    Float32(OrderedFloat<f32>),
-    Float64(OrderedFloat<f64>),
-    UInt8(u8),
-    UInt16(u16),
-    UInt32(u32),
-    UInt64(u64),
-    Int8(i8),
-    Int16(i16),
-    Int32(i32),
-    Int64(i64),
-    Utf8(Box<String>),
-    Boolean(bool),
-    TimeMillisecond(i64),
-    TimeMicrosecond(i64),
-    TimeNanosecond(i64),
-    Date32(i32),
-}
-
-impl TryFrom<&ScalarValue> for GroupByScalar {
-    type Error = DataFusionError;
-
-    fn try_from(scalar_value: &ScalarValue) -> Result<Self> {
-        Ok(match scalar_value {
-            ScalarValue::Float32(Some(v)) => {
-                GroupByScalar::Float32(OrderedFloat::from(*v))
-            }
-            ScalarValue::Float64(Some(v)) => {
-                GroupByScalar::Float64(OrderedFloat::from(*v))
-            }
-            ScalarValue::Boolean(Some(v)) => GroupByScalar::Boolean(*v),
-            ScalarValue::Int8(Some(v)) => GroupByScalar::Int8(*v),
-            ScalarValue::Int16(Some(v)) => GroupByScalar::Int16(*v),
-            ScalarValue::Int32(Some(v)) => GroupByScalar::Int32(*v),
-            ScalarValue::Int64(Some(v)) => GroupByScalar::Int64(*v),
-            ScalarValue::UInt8(Some(v)) => GroupByScalar::UInt8(*v),
-            ScalarValue::UInt16(Some(v)) => GroupByScalar::UInt16(*v),
-            ScalarValue::UInt32(Some(v)) => GroupByScalar::UInt32(*v),
-            ScalarValue::UInt64(Some(v)) => GroupByScalar::UInt64(*v),
-            ScalarValue::TimeMillisecond(Some(v)) => GroupByScalar::TimeMillisecond(*v),
-            ScalarValue::TimeMicrosecond(Some(v)) => GroupByScalar::TimeMicrosecond(*v),
-            ScalarValue::TimeNanosecond(Some(v)) => GroupByScalar::TimeNanosecond(*v),
-            ScalarValue::Utf8(Some(v)) => GroupByScalar::Utf8(Box::new(v.clone())),
-            ScalarValue::Float32(None)
-            | ScalarValue::Float64(None)
-            | ScalarValue::Boolean(None)
-            | ScalarValue::Int8(None)
-            | ScalarValue::Int16(None)
-            | ScalarValue::Int32(None)
-            | ScalarValue::Int64(None)
-            | ScalarValue::UInt8(None)
-            | ScalarValue::UInt16(None)
-            | ScalarValue::UInt32(None)
-            | ScalarValue::UInt64(None)
-            | ScalarValue::Utf8(None) => {
-                return Err(DataFusionError::Internal(format!(
-                    "Cannot convert a ScalarValue holding NULL ({:?})",
-                    scalar_value
-                )));
-            }
-            v => {
-                return Err(DataFusionError::Internal(format!(
-                    "Cannot convert a ScalarValue with associated DataType {:?}",
-                    v.get_datatype()
-                )))
-            }
-        })
-    }
-}
-
-impl From<&GroupByScalar> for ScalarValue {
-    fn from(group_by_scalar: &GroupByScalar) -> Self {
-        match group_by_scalar {
-            GroupByScalar::Float32(v) => ScalarValue::Float32(Some((*v).into())),
-            GroupByScalar::Float64(v) => ScalarValue::Float64(Some((*v).into())),
-            GroupByScalar::Boolean(v) => ScalarValue::Boolean(Some(*v)),
-            GroupByScalar::Int8(v) => ScalarValue::Int8(Some(*v)),
-            GroupByScalar::Int16(v) => ScalarValue::Int16(Some(*v)),
-            GroupByScalar::Int32(v) => ScalarValue::Int32(Some(*v)),
-            GroupByScalar::Int64(v) => ScalarValue::Int64(Some(*v)),
-            GroupByScalar::UInt8(v) => ScalarValue::UInt8(Some(*v)),
-            GroupByScalar::UInt16(v) => ScalarValue::UInt16(Some(*v)),
-            GroupByScalar::UInt32(v) => ScalarValue::UInt32(Some(*v)),
-            GroupByScalar::UInt64(v) => ScalarValue::UInt64(Some(*v)),
-            GroupByScalar::Utf8(v) => ScalarValue::Utf8(Some(v.to_string())),
-            GroupByScalar::TimeMillisecond(v) => ScalarValue::TimeMillisecond(Some(*v)),
-            GroupByScalar::TimeMicrosecond(v) => ScalarValue::TimeMicrosecond(Some(*v)),
-            GroupByScalar::TimeNanosecond(v) => ScalarValue::TimeNanosecond(Some(*v)),
-            GroupByScalar::Date32(v) => ScalarValue::Date32(Some(*v)),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::error::DataFusionError;
-
-    macro_rules! scalar_eq_test {
-        ($TYPE:expr, $VALUE:expr) => {{
-            let scalar_value = $TYPE($VALUE);
-            let a = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            let scalar_value = $TYPE($VALUE);
-            let b = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            assert_eq!(a, b);
-        }};
-    }
-
-    #[test]
-    fn test_scalar_ne_non_std() {
-        // Test only Scalars with non native Eq, Hash
-        scalar_eq_test!(ScalarValue::Float32, Some(1.0));
-        scalar_eq_test!(ScalarValue::Float64, Some(1.0));
-    }
-
-    macro_rules! scalar_ne_test {
-        ($TYPE:expr, $LVALUE:expr, $RVALUE:expr) => {{
-            let scalar_value = $TYPE($LVALUE);
-            let a = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            let scalar_value = $TYPE($RVALUE);
-            let b = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            assert_ne!(a, b);
-        }};
-    }
-
-    #[test]
-    fn test_scalar_eq_non_std() {
-        // Test only Scalars with non native Eq, Hash
-        scalar_ne_test!(ScalarValue::Float32, Some(1.0), Some(2.0));
-        scalar_ne_test!(ScalarValue::Float64, Some(1.0), Some(2.0));
-    }
-
-    #[test]
-    fn from_scalar_holding_none() {
-        let scalar_value = ScalarValue::Int8(None);
-        let result = GroupByScalar::try_from(&scalar_value);
-
-        match result {
-            Err(DataFusionError::Internal(error_message)) => assert_eq!(
-                error_message,
-                String::from("Cannot convert a ScalarValue holding NULL (Int8(NULL))")
-            ),
-            _ => panic!("Unexpected result"),
-        }
-    }
-
-    #[test]
-    fn from_scalar_unsupported() {
-        // Use any ScalarValue type not supported by GroupByScalar.
-        let scalar_value = ScalarValue::LargeUtf8(Some("1.1".to_string()));
-        let result = GroupByScalar::try_from(&scalar_value);
-
-        match result {
-            Err(DataFusionError::Internal(error_message)) => assert_eq!(
-                error_message,
-                String::from(
-                    "Cannot convert a ScalarValue with associated DataType LargeUtf8"
-                )
-            ),
-            _ => panic!("Unexpected result"),
-        }
-    }
-
-    #[test]
-    fn size_of_group_by_scalar() {
-        assert_eq!(std::mem::size_of::<GroupByScalar>(), 16);
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/hash_aggregate.rs b/rust/datafusion/src/physical_plan/hash_aggregate.rs
deleted file mode 100644
index 1a4cb17ea39..00000000000
--- a/rust/datafusion/src/physical_plan/hash_aggregate.rs
+++ /dev/null
@@ -1,1364 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the execution plan for the hash aggregate operation
-
-use std::any::Any;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use ahash::RandomState;
-use futures::{
-    stream::{Stream, StreamExt},
-    Future,
-};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr};
-use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning, PhysicalExpr};
-
-use arrow::{
-    array::{Array, UInt32Builder},
-    error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    compute,
-};
-use arrow::{
-    array::{BooleanArray, Date32Array, DictionaryArray},
-    compute::cast,
-    datatypes::{
-        ArrowDictionaryKeyType, ArrowNativeType, Int16Type, Int32Type, Int64Type,
-        Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-    },
-};
-use arrow::{
-    datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit},
-    record_batch::RecordBatch,
-};
-use hashbrown::HashMap;
-use ordered_float::OrderedFloat;
-use pin_project_lite::pin_project;
-
-use arrow::array::{
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-};
-use async_trait::async_trait;
-
-use super::{
-    expressions::Column, group_scalar::GroupByScalar, RecordBatchStream,
-    SendableRecordBatchStream,
-};
-
-/// Hash aggregate modes
-#[derive(Debug, Copy, Clone)]
-pub enum AggregateMode {
-    /// Partial aggregate that can be applied in parallel across input partitions
-    Partial,
-    /// Final aggregate that produces a single partition of output
-    Final,
-}
-
-/// Hash aggregate execution plan
-#[derive(Debug)]
-pub struct HashAggregateExec {
-    /// Aggregation mode (full, partial)
-    mode: AggregateMode,
-    /// Grouping expressions
-    group_expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-    /// Aggregate expressions
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-    /// Input plan, could be a partial aggregate or the input to the aggregate
-    input: Arc<dyn ExecutionPlan>,
-    /// Schema after the aggregate is applied
-    schema: SchemaRef,
-    /// Input schema before any aggregation is applied. For partial aggregate this will be the
-    /// same as input.schema() but for the final aggregate it will be the same as the input
-    /// to the partial aggregate
-    input_schema: SchemaRef,
-}
-
-fn create_schema(
-    input_schema: &Schema,
-    group_expr: &[(Arc<dyn PhysicalExpr>, String)],
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-    mode: AggregateMode,
-) -> Result<Schema> {
-    let mut fields = Vec::with_capacity(group_expr.len() + aggr_expr.len());
-    for (expr, name) in group_expr {
-        fields.push(Field::new(
-            name,
-            expr.data_type(&input_schema)?,
-            expr.nullable(&input_schema)?,
-        ))
-    }
-
-    match mode {
-        AggregateMode::Partial => {
-            // in partial mode, the fields of the accumulator's state
-            for expr in aggr_expr {
-                fields.extend(expr.state_fields()?.iter().cloned())
-            }
-        }
-        AggregateMode::Final => {
-            // in final mode, the field with the final result of the accumulator
-            for expr in aggr_expr {
-                fields.push(expr.field()?)
-            }
-        }
-    }
-
-    Ok(Schema::new(fields))
-}
-
-impl HashAggregateExec {
-    /// Create a new hash aggregate execution plan
-    pub fn try_new(
-        mode: AggregateMode,
-        group_expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-        input: Arc<dyn ExecutionPlan>,
-        input_schema: SchemaRef,
-    ) -> Result<Self> {
-        let schema = create_schema(&input.schema(), &group_expr, &aggr_expr, mode)?;
-
-        let schema = Arc::new(schema);
-
-        Ok(HashAggregateExec {
-            mode,
-            group_expr,
-            aggr_expr,
-            input,
-            schema,
-            input_schema,
-        })
-    }
-
-    /// Aggregation mode (full, partial)
-    pub fn mode(&self) -> &AggregateMode {
-        &self.mode
-    }
-
-    /// Grouping expressions
-    pub fn group_expr(&self) -> &[(Arc<dyn PhysicalExpr>, String)] {
-        &self.group_expr
-    }
-
-    /// Aggregate expressions
-    pub fn aggr_expr(&self) -> &[Arc<dyn AggregateExpr>] {
-        &self.aggr_expr
-    }
-
-    /// Input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Get the input schema before any aggregates are applied
-    pub fn input_schema(&self) -> SchemaRef {
-        self.input_schema.clone()
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for HashAggregateExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        match &self.mode {
-            AggregateMode::Partial => Distribution::UnspecifiedDistribution,
-            AggregateMode::Final => Distribution::SinglePartition,
-        }
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let input = self.input.execute(partition).await?;
-        let group_expr = self.group_expr.iter().map(|x| x.0.clone()).collect();
-
-        if self.group_expr.is_empty() {
-            Ok(Box::pin(HashAggregateStream::new(
-                self.mode,
-                self.schema.clone(),
-                self.aggr_expr.clone(),
-                input,
-            )))
-        } else {
-            Ok(Box::pin(GroupedHashAggregateStream::new(
-                self.mode,
-                self.schema.clone(),
-                group_expr,
-                self.aggr_expr.clone(),
-                input,
-            )))
-        }
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(HashAggregateExec::try_new(
-                self.mode,
-                self.group_expr.clone(),
-                self.aggr_expr.clone(),
-                children[0].clone(),
-                self.input_schema.clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "HashAggregateExec wrong number of children".to_string(),
-            )),
-        }
-    }
-}
-
-/*
-The architecture is the following:
-
-1. An accumulator has state that is updated on each batch.
-2. At the end of the aggregation (e.g. end of batches in a partition), the accumulator converts its state to a RecordBatch of a single row
-3. The RecordBatches of all accumulators are merged (`concatenate` in `rust/arrow`) together to a single RecordBatch.
-4. The state's RecordBatch is `merge`d to a new state
-5. The state is mapped to the final value
-
-Why:
-
-* Accumulators' state can be statically typed, but it is more efficient to transmit data from the accumulators via `Array`
-* The `merge` operation must have access to the state of the aggregators because it uses it to correctly merge
-* It uses Arrow's native dynamically typed object, `Array`.
-* Arrow shines in batch operations and both `merge` and `concatenate` of uniform types are very performant.
-
-Example: average
-
-* the state is `n: u32` and `sum: f64`
-* For every batch, we update them accordingly.
-* At the end of the accumulation (of a partition), we convert `n` and `sum` to a RecordBatch of 1 row and two columns: `[n, sum]`
-* The RecordBatch is (sent back / transmitted over network)
-* Once all N record batches arrive, `merge` is performed, which builds a RecordBatch with N rows and 2 columns.
-* Finally, `get_value` returns an array with one entry computed from the state
-*/
-pin_project! {
-    struct GroupedHashAggregateStream {
-        schema: SchemaRef,
-        #[pin]
-        output: futures::channel::oneshot::Receiver<ArrowResult<RecordBatch>>,
-        finished: bool,
-    }
-}
-
-fn group_aggregate_batch(
-    mode: &AggregateMode,
-    group_expr: &[Arc<dyn PhysicalExpr>],
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-    batch: RecordBatch,
-    mut accumulators: Accumulators,
-    aggregate_expressions: &[Vec<Arc<dyn PhysicalExpr>>],
-) -> Result<Accumulators> {
-    // evaluate the grouping expressions
-    let group_values = evaluate(group_expr, &batch)?;
-
-    // evaluate the aggregation expressions.
-    // We could evaluate them after the `take`, but since we need to evaluate all
-    // of them anyways, it is more performant to do it while they are together.
-    let aggr_input_values = evaluate_many(aggregate_expressions, &batch)?;
-
-    // create vector large enough to hold the grouping key
-    // this is an optimization to avoid allocating `key` on every row.
-    // it will be overwritten on every iteration of the loop below
-    let mut group_by_values = Vec::with_capacity(group_values.len());
-    for _ in 0..group_values.len() {
-        group_by_values.push(GroupByScalar::UInt32(0));
-    }
-
-    let mut group_by_values = group_by_values.into_boxed_slice();
-
-    let mut key = Vec::with_capacity(group_values.len());
-
-    // 1.1 construct the key from the group values
-    // 1.2 construct the mapping key if it does not exist
-    // 1.3 add the row' index to `indices`
-
-    // Make sure we can create the accumulators or otherwise return an error
-    create_accumulators(aggr_expr).map_err(DataFusionError::into_arrow_external_error)?;
-
-    // Keys received in this batch
-    let mut batch_keys = vec![];
-
-    for row in 0..batch.num_rows() {
-        // 1.1
-        create_key(&group_values, row, &mut key)
-            .map_err(DataFusionError::into_arrow_external_error)?;
-
-        accumulators
-            .raw_entry_mut()
-            .from_key(&key)
-            // 1.3
-            .and_modify(|_, (_, _, v)| {
-                if v.is_empty() {
-                    batch_keys.push(key.clone())
-                };
-                v.push(row as u32)
-            })
-            // 1.2
-            .or_insert_with(|| {
-                // We can safely unwrap here as we checked we can create an accumulator before
-                let accumulator_set = create_accumulators(aggr_expr).unwrap();
-                batch_keys.push(key.clone());
-                let _ = create_group_by_values(&group_values, row, &mut group_by_values);
-                (
-                    key.clone(),
-                    (group_by_values.clone(), accumulator_set, vec![row as u32]),
-                )
-            });
-    }
-
-    // Collect all indices + offsets based on keys in this vec
-    let mut batch_indices: UInt32Builder = UInt32Builder::new(0);
-    let mut offsets = vec![0];
-    let mut offset_so_far = 0;
-    for key in batch_keys.iter() {
-        let (_, _, indices) = accumulators.get_mut(key).unwrap();
-        batch_indices.append_slice(&indices)?;
-        offset_so_far += indices.len();
-        offsets.push(offset_so_far);
-    }
-    let batch_indices = batch_indices.finish();
-
-    // `Take` all values based on indices into Arrays
-    let values: Vec<Vec<Arc<dyn Array>>> = aggr_input_values
-        .iter()
-        .map(|array| {
-            array
-                .iter()
-                .map(|array| {
-                    compute::take(
-                        array.as_ref(),
-                        &batch_indices,
-                        None, // None: no index check
-                    )
-                    .unwrap()
-                })
-                .collect()
-            // 2.3
-        })
-        .collect();
-
-    // 2.1 for each key in this batch
-    // 2.2 for each aggregation
-    // 2.3 `slice` from each of its arrays the keys' values
-    // 2.4 update / merge the accumulator with the values
-    // 2.5 clear indices
-    batch_keys
-        .iter_mut()
-        .zip(offsets.windows(2))
-        .try_for_each(|(key, offsets)| {
-            let (_, accumulator_set, indices) = accumulators.get_mut(key).unwrap();
-            // 2.2
-            accumulator_set
-                .iter_mut()
-                .zip(values.iter())
-                .map(|(accumulator, aggr_array)| {
-                    (
-                        accumulator,
-                        aggr_array
-                            .iter()
-                            .map(|array| {
-                                // 2.3
-                                array.slice(offsets[0], offsets[1] - offsets[0])
-                            })
-                            .collect::<Vec<ArrayRef>>(),
-                    )
-                })
-                .try_for_each(|(accumulator, values)| match mode {
-                    AggregateMode::Partial => accumulator.update_batch(&values),
-                    AggregateMode::Final => {
-                        // note: the aggregation here is over states, not values, thus the merge
-                        accumulator.merge_batch(&values)
-                    }
-                })
-                // 2.5
-                .and({
-                    indices.clear();
-                    Ok(())
-                })
-        })?;
-    Ok(accumulators)
-}
-
-/// Appends a sequence of [u8] bytes for the value in `col[row]` to
-/// `vec` to be used as a key into the hash map for a dictionary type
-///
-/// Note that ideally, for dictionary encoded columns, we would be
-/// able to simply use the dictionary idicies themselves (no need to
-/// look up values) or possibly simply build the hash table entirely
-/// on the dictionary indexes.
-///
-/// This aproach would likely work (very) well for the common case,
-/// but it also has to to handle the case where the dictionary itself
-/// is not the same across all record batches (and thus indexes in one
-/// record batch may not correspond to the same index in another)
-fn dictionary_create_key_for_col<K: ArrowDictionaryKeyType>(
-    col: &ArrayRef,
-    row: usize,
-    vec: &mut Vec<u8>,
-) -> Result<()> {
-    let dict_col = col.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
-    // look up the index in the values dictionary
-    let keys_col = dict_col.keys_array();
-    let values_index = keys_col.value(row).to_usize().ok_or_else(|| {
-        DataFusionError::Internal(format!(
-            "Can not convert index to usize in dictionary of type creating group by value {:?}",
-            keys_col.data_type()
-        ))
-    })?;
-
-    create_key_for_col(&dict_col.values(), values_index, vec)
-}
-
-/// Appends a sequence of [u8] bytes for the value in `col[row]` to
-/// `vec` to be used as a key into the hash map
-fn create_key_for_col(col: &ArrayRef, row: usize, vec: &mut Vec<u8>) -> Result<()> {
-    match col.data_type() {
-        DataType::Boolean => {
-            let array = col.as_any().downcast_ref::<BooleanArray>().unwrap();
-            vec.extend_from_slice(&[array.value(row) as u8]);
-        }
-        DataType::Float32 => {
-            let array = col.as_any().downcast_ref::<Float32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Float64 => {
-            let array = col.as_any().downcast_ref::<Float64Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt8 => {
-            let array = col.as_any().downcast_ref::<UInt8Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt16 => {
-            let array = col.as_any().downcast_ref::<UInt16Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt32 => {
-            let array = col.as_any().downcast_ref::<UInt32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt64 => {
-            let array = col.as_any().downcast_ref::<UInt64Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Int8 => {
-            let array = col.as_any().downcast_ref::<Int8Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Int16 => {
-            let array = col.as_any().downcast_ref::<Int16Array>().unwrap();
-            vec.extend(array.value(row).to_le_bytes().iter());
-        }
-        DataType::Int32 => {
-            let array = col.as_any().downcast_ref::<Int32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Int64 => {
-            let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMillisecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMicrosecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Utf8 => {
-            let array = col.as_any().downcast_ref::<StringArray>().unwrap();
-            let value = array.value(row);
-            // store the size
-            vec.extend_from_slice(&value.len().to_le_bytes());
-            // store the string value
-            vec.extend_from_slice(value.as_bytes());
-        }
-        DataType::Date32 => {
-            let array = col.as_any().downcast_ref::<Date32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Dictionary(index_type, _) => match **index_type {
-            DataType::Int8 => {
-                dictionary_create_key_for_col::<Int8Type>(col, row, vec)?;
-            }
-            DataType::Int16 => {
-                dictionary_create_key_for_col::<Int16Type>(col, row, vec)?;
-            }
-            DataType::Int32 => {
-                dictionary_create_key_for_col::<Int32Type>(col, row, vec)?;
-            }
-            DataType::Int64 => {
-                dictionary_create_key_for_col::<Int64Type>(col, row, vec)?;
-            }
-            DataType::UInt8 => {
-                dictionary_create_key_for_col::<UInt8Type>(col, row, vec)?;
-            }
-            DataType::UInt16 => {
-                dictionary_create_key_for_col::<UInt16Type>(col, row, vec)?;
-            }
-            DataType::UInt32 => {
-                dictionary_create_key_for_col::<UInt32Type>(col, row, vec)?;
-            }
-            DataType::UInt64 => {
-                dictionary_create_key_for_col::<UInt64Type>(col, row, vec)?;
-            }
-            _ => return Err(DataFusionError::Internal(format!(
-                "Unsupported GROUP BY type (dictionary index type not supported creating key) {}",
-                col.data_type(),
-            ))),
-        },
-        _ => {
-            // This is internal because we should have caught this before.
-            return Err(DataFusionError::Internal(format!(
-                "Unsupported GROUP BY type creating key {}",
-                col.data_type(),
-            )));
-        }
-    }
-    Ok(())
-}
-
-/// Create a key `Vec<u8>` that is used as key for the hashmap
-pub(crate) fn create_key(
-    group_by_keys: &[ArrayRef],
-    row: usize,
-    vec: &mut Vec<u8>,
-) -> Result<()> {
-    vec.clear();
-    for col in group_by_keys {
-        create_key_for_col(col, row, vec)?
-    }
-    Ok(())
-}
-
-async fn compute_grouped_hash_aggregate(
-    mode: AggregateMode,
-    schema: SchemaRef,
-    group_expr: Vec<Arc<dyn PhysicalExpr>>,
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-    mut input: SendableRecordBatchStream,
-) -> ArrowResult<RecordBatch> {
-    // the expressions to evaluate the batch, one vec of expressions per aggregation
-    let aggregate_expressions = aggregate_expressions(&aggr_expr, &mode)
-        .map_err(DataFusionError::into_arrow_external_error)?;
-
-    // mapping key -> (set of accumulators, indices of the key in the batch)
-    // * the indexes are updated at each row
-    // * the accumulators are updated at the end of each batch
-    // * the indexes are `clear`ed at the end of each batch
-    //let mut accumulators: Accumulators = FnvHashMap::default();
-
-    // iterate over all input batches and update the accumulators
-    let mut accumulators = Accumulators::default();
-    while let Some(batch) = input.next().await {
-        let batch = batch?;
-        accumulators = group_aggregate_batch(
-            &mode,
-            &group_expr,
-            &aggr_expr,
-            batch,
-            accumulators,
-            &aggregate_expressions,
-        )
-        .map_err(DataFusionError::into_arrow_external_error)?;
-    }
-
-    create_batch_from_map(&mode, &accumulators, group_expr.len(), &schema)
-}
-
-impl GroupedHashAggregateStream {
-    /// Create a new HashAggregateStream
-    pub fn new(
-        mode: AggregateMode,
-        schema: SchemaRef,
-        group_expr: Vec<Arc<dyn PhysicalExpr>>,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-        input: SendableRecordBatchStream,
-    ) -> Self {
-        let (tx, rx) = futures::channel::oneshot::channel();
-
-        let schema_clone = schema.clone();
-        tokio::spawn(async move {
-            let result = compute_grouped_hash_aggregate(
-                mode,
-                schema_clone,
-                group_expr,
-                aggr_expr,
-                input,
-            )
-            .await;
-            tx.send(result)
-        });
-
-        GroupedHashAggregateStream {
-            schema,
-            output: rx,
-            finished: false,
-        }
-    }
-}
-
-type AccumulatorItem = Box<dyn Accumulator>;
-type Accumulators =
-    HashMap<Vec<u8>, (Box<[GroupByScalar]>, Vec<AccumulatorItem>, Vec<u32>), RandomState>;
-
-impl Stream for GroupedHashAggregateStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.finished {
-            return Poll::Ready(None);
-        }
-
-        // is the output ready?
-        let this = self.project();
-        let output_poll = this.output.poll(cx);
-
-        match output_poll {
-            Poll::Ready(result) => {
-                *this.finished = true;
-
-                // check for error in receiving channel and unwrap actual result
-                let result = match result {
-                    Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving
-                    Ok(result) => result,
-                };
-                Poll::Ready(Some(result))
-            }
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for GroupedHashAggregateStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Evaluates expressions against a record batch.
-fn evaluate(
-    expr: &[Arc<dyn PhysicalExpr>],
-    batch: &RecordBatch,
-) -> Result<Vec<ArrayRef>> {
-    expr.iter()
-        .map(|expr| expr.evaluate(&batch))
-        .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-        .collect::<Result<Vec<_>>>()
-}
-
-/// Evaluates expressions against a record batch.
-fn evaluate_many(
-    expr: &[Vec<Arc<dyn PhysicalExpr>>],
-    batch: &RecordBatch,
-) -> Result<Vec<Vec<ArrayRef>>> {
-    expr.iter()
-        .map(|expr| evaluate(expr, batch))
-        .collect::<Result<Vec<_>>>()
-}
-
-/// uses `state_fields` to build a vec of expressions required to merge the AggregateExpr' accumulator's state.
-fn merge_expressions(
-    expr: &Arc<dyn AggregateExpr>,
-) -> Result<Vec<Arc<dyn PhysicalExpr>>> {
-    Ok(expr
-        .state_fields()?
-        .iter()
-        .map(|f| Arc::new(Column::new(f.name())) as Arc<dyn PhysicalExpr>)
-        .collect::<Vec<_>>())
-}
-
-/// returns physical expressions to evaluate against a batch
-/// The expressions are different depending on `mode`:
-/// * Partial: AggregateExpr::expressions
-/// * Final: columns of `AggregateExpr::state_fields()`
-/// The return value is to be understood as:
-/// * index 0 is the aggregation
-/// * index 1 is the expression i of the aggregation
-fn aggregate_expressions(
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-    mode: &AggregateMode,
-) -> Result<Vec<Vec<Arc<dyn PhysicalExpr>>>> {
-    match mode {
-        AggregateMode::Partial => {
-            Ok(aggr_expr.iter().map(|agg| agg.expressions()).collect())
-        }
-        // in this mode, we build the merge expressions of the aggregation
-        AggregateMode::Final => Ok(aggr_expr
-            .iter()
-            .map(|agg| merge_expressions(agg))
-            .collect::<Result<Vec<_>>>()?),
-    }
-}
-
-pin_project! {
-    struct HashAggregateStream {
-        schema: SchemaRef,
-        #[pin]
-        output: futures::channel::oneshot::Receiver<ArrowResult<RecordBatch>>,
-        finished: bool,
-    }
-}
-
-async fn compute_hash_aggregate(
-    mode: AggregateMode,
-    schema: SchemaRef,
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-    mut input: SendableRecordBatchStream,
-) -> ArrowResult<RecordBatch> {
-    let mut accumulators = create_accumulators(&aggr_expr)
-        .map_err(DataFusionError::into_arrow_external_error)?;
-
-    let expressions = aggregate_expressions(&aggr_expr, &mode)
-        .map_err(DataFusionError::into_arrow_external_error)?;
-
-    let expressions = Arc::new(expressions);
-
-    // 1 for each batch, update / merge accumulators with the expressions' values
-    // future is ready when all batches are computed
-    while let Some(batch) = input.next().await {
-        let batch = batch?;
-        aggregate_batch(&mode, &batch, &mut accumulators, &expressions)
-            .map_err(DataFusionError::into_arrow_external_error)?;
-    }
-
-    // 2. convert values to a record batch
-    finalize_aggregation(&accumulators, &mode)
-        .map(|columns| RecordBatch::try_new(schema.clone(), columns))
-        .map_err(DataFusionError::into_arrow_external_error)?
-}
-
-impl HashAggregateStream {
-    /// Create a new HashAggregateStream
-    pub fn new(
-        mode: AggregateMode,
-        schema: SchemaRef,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-        input: SendableRecordBatchStream,
-    ) -> Self {
-        let (tx, rx) = futures::channel::oneshot::channel();
-
-        let schema_clone = schema.clone();
-        tokio::spawn(async move {
-            let result =
-                compute_hash_aggregate(mode, schema_clone, aggr_expr, input).await;
-            tx.send(result)
-        });
-
-        HashAggregateStream {
-            schema,
-            output: rx,
-            finished: false,
-        }
-    }
-}
-
-fn aggregate_batch(
-    mode: &AggregateMode,
-    batch: &RecordBatch,
-    accumulators: &mut [AccumulatorItem],
-    expressions: &[Vec<Arc<dyn PhysicalExpr>>],
-) -> Result<()> {
-    // 1.1 iterate accumulators and respective expressions together
-    // 1.2 evaluate expressions
-    // 1.3 update / merge accumulators with the expressions' values
-
-    // 1.1
-    accumulators
-        .iter_mut()
-        .zip(expressions)
-        .try_for_each(|(accum, expr)| {
-            // 1.2
-            let values = &expr
-                .iter()
-                .map(|e| e.evaluate(batch))
-                .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-                .collect::<Result<Vec<_>>>()?;
-
-            // 1.3
-            match mode {
-                AggregateMode::Partial => accum.update_batch(values),
-                AggregateMode::Final => accum.merge_batch(values),
-            }
-        })
-}
-
-impl Stream for HashAggregateStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.finished {
-            return Poll::Ready(None);
-        }
-
-        // is the output ready?
-        let this = self.project();
-        let output_poll = this.output.poll(cx);
-
-        match output_poll {
-            Poll::Ready(result) => {
-                *this.finished = true;
-
-                // check for error in receiving channel and unwrap actual result
-                let result = match result {
-                    Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving
-                    Ok(result) => result,
-                };
-
-                Poll::Ready(Some(result))
-            }
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for HashAggregateStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Given Vec<Vec<ArrayRef>>, concatenates the inners `Vec<ArrayRef>` into `ArrayRef`, returning `Vec<ArrayRef>`
-/// This assumes that `arrays` is not empty.
-fn concatenate(arrays: Vec<Vec<ArrayRef>>) -> ArrowResult<Vec<ArrayRef>> {
-    (0..arrays[0].len())
-        .map(|column| {
-            let array_list = arrays
-                .iter()
-                .map(|a| a[column].as_ref())
-                .collect::<Vec<_>>();
-            compute::concat(&array_list)
-        })
-        .collect::<ArrowResult<Vec<_>>>()
-}
-
-/// Create a RecordBatch with all group keys and accumulator' states or values.
-fn create_batch_from_map(
-    mode: &AggregateMode,
-    accumulators: &Accumulators,
-    num_group_expr: usize,
-    output_schema: &Schema,
-) -> ArrowResult<RecordBatch> {
-    // 1. for each key
-    // 2. create single-row ArrayRef with all group expressions
-    // 3. create single-row ArrayRef with all aggregate states or values
-    // 4. collect all in a vector per key of vec<ArrayRef>, vec[i][j]
-    // 5. concatenate the arrays over the second index [j] into a single vec<ArrayRef>.
-    let arrays = accumulators
-        .iter()
-        .map(|(_, (group_by_values, accumulator_set, _))| {
-            // 2.
-            let mut groups = (0..num_group_expr)
-                .map(|i| match &group_by_values[i] {
-                    GroupByScalar::Float32(n) => {
-                        Arc::new(Float32Array::from(vec![(*n).into()] as Vec<f32>))
-                            as ArrayRef
-                    }
-                    GroupByScalar::Float64(n) => {
-                        Arc::new(Float64Array::from(vec![(*n).into()] as Vec<f64>))
-                            as ArrayRef
-                    }
-                    GroupByScalar::Int8(n) => {
-                        Arc::new(Int8Array::from(vec![*n])) as ArrayRef
-                    }
-                    GroupByScalar::Int16(n) => Arc::new(Int16Array::from(vec![*n])),
-                    GroupByScalar::Int32(n) => Arc::new(Int32Array::from(vec![*n])),
-                    GroupByScalar::Int64(n) => Arc::new(Int64Array::from(vec![*n])),
-                    GroupByScalar::UInt8(n) => Arc::new(UInt8Array::from(vec![*n])),
-                    GroupByScalar::UInt16(n) => Arc::new(UInt16Array::from(vec![*n])),
-                    GroupByScalar::UInt32(n) => Arc::new(UInt32Array::from(vec![*n])),
-                    GroupByScalar::UInt64(n) => Arc::new(UInt64Array::from(vec![*n])),
-                    GroupByScalar::Utf8(str) => {
-                        Arc::new(StringArray::from(vec![&***str]))
-                    }
-                    GroupByScalar::Boolean(b) => Arc::new(BooleanArray::from(vec![*b])),
-                    GroupByScalar::TimeMillisecond(n) => {
-                        Arc::new(TimestampMillisecondArray::from(vec![*n]))
-                    }
-                    GroupByScalar::TimeMicrosecond(n) => {
-                        Arc::new(TimestampMicrosecondArray::from(vec![*n]))
-                    }
-                    GroupByScalar::TimeNanosecond(n) => {
-                        Arc::new(TimestampNanosecondArray::from_vec(vec![*n], None))
-                    }
-                    GroupByScalar::Date32(n) => Arc::new(Date32Array::from(vec![*n])),
-                })
-                .collect::<Vec<ArrayRef>>();
-
-            // 3.
-            groups.extend(
-                finalize_aggregation(accumulator_set, mode)
-                    .map_err(DataFusionError::into_arrow_external_error)?,
-            );
-
-            Ok(groups)
-        })
-        // 4.
-        .collect::<ArrowResult<Vec<Vec<ArrayRef>>>>()?;
-
-    let batch = if !arrays.is_empty() {
-        // 5.
-        let columns = concatenate(arrays)?;
-
-        // cast output if needed (e.g. for types like Dictionary where
-        // the intermediate GroupByScalar type was not the same as the
-        // output
-        let columns = columns
-            .iter()
-            .zip(output_schema.fields().iter())
-            .map(|(col, desired_field)| cast(col, desired_field.data_type()))
-            .collect::<ArrowResult<Vec<_>>>()?;
-
-        RecordBatch::try_new(Arc::new(output_schema.to_owned()), columns)?
-    } else {
-        RecordBatch::new_empty(Arc::new(output_schema.to_owned()))
-    };
-    Ok(batch)
-}
-
-fn create_accumulators(
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-) -> Result<Vec<AccumulatorItem>> {
-    aggr_expr
-        .iter()
-        .map(|expr| expr.create_accumulator())
-        .collect::<Result<Vec<_>>>()
-}
-
-/// returns a vector of ArrayRefs, where each entry corresponds to either the
-/// final value (mode = Final) or states (mode = Partial)
-fn finalize_aggregation(
-    accumulators: &[AccumulatorItem],
-    mode: &AggregateMode,
-) -> Result<Vec<ArrayRef>> {
-    match mode {
-        AggregateMode::Partial => {
-            // build the vector of states
-            let a = accumulators
-                .iter()
-                .map(|accumulator| accumulator.state())
-                .map(|value| {
-                    value.map(|e| {
-                        e.iter().map(|v| v.to_array()).collect::<Vec<ArrayRef>>()
-                    })
-                })
-                .collect::<Result<Vec<_>>>()?;
-            Ok(a.iter().flatten().cloned().collect::<Vec<_>>())
-        }
-        AggregateMode::Final => {
-            // merge the state to the final value
-            accumulators
-                .iter()
-                .map(|accumulator| accumulator.evaluate().map(|v| v.to_array()))
-                .collect::<Result<Vec<ArrayRef>>>()
-        }
-    }
-}
-
-/// Extract the value in `col[row]` from a dictionary a GroupByScalar
-fn dictionary_create_group_by_value<K: ArrowDictionaryKeyType>(
-    col: &ArrayRef,
-    row: usize,
-) -> Result<GroupByScalar> {
-    let dict_col = col.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
-    // look up the index in the values dictionary
-    let keys_col = dict_col.keys_array();
-    let values_index = keys_col.value(row).to_usize().ok_or_else(|| {
-        DataFusionError::Internal(format!(
-            "Can not convert index to usize in dictionary of type creating group by value {:?}",
-            keys_col.data_type()
-        ))
-    })?;
-
-    create_group_by_value(&dict_col.values(), values_index)
-}
-
-/// Extract the value in `col[row]` as a GroupByScalar
-fn create_group_by_value(col: &ArrayRef, row: usize) -> Result<GroupByScalar> {
-    match col.data_type() {
-        DataType::Float32 => {
-            let array = col.as_any().downcast_ref::<Float32Array>().unwrap();
-            Ok(GroupByScalar::Float32(OrderedFloat::from(array.value(row))))
-        }
-        DataType::Float64 => {
-            let array = col.as_any().downcast_ref::<Float64Array>().unwrap();
-            Ok(GroupByScalar::Float64(OrderedFloat::from(array.value(row))))
-        }
-        DataType::UInt8 => {
-            let array = col.as_any().downcast_ref::<UInt8Array>().unwrap();
-            Ok(GroupByScalar::UInt8(array.value(row)))
-        }
-        DataType::UInt16 => {
-            let array = col.as_any().downcast_ref::<UInt16Array>().unwrap();
-            Ok(GroupByScalar::UInt16(array.value(row)))
-        }
-        DataType::UInt32 => {
-            let array = col.as_any().downcast_ref::<UInt32Array>().unwrap();
-            Ok(GroupByScalar::UInt32(array.value(row)))
-        }
-        DataType::UInt64 => {
-            let array = col.as_any().downcast_ref::<UInt64Array>().unwrap();
-            Ok(GroupByScalar::UInt64(array.value(row)))
-        }
-        DataType::Int8 => {
-            let array = col.as_any().downcast_ref::<Int8Array>().unwrap();
-            Ok(GroupByScalar::Int8(array.value(row)))
-        }
-        DataType::Int16 => {
-            let array = col.as_any().downcast_ref::<Int16Array>().unwrap();
-            Ok(GroupByScalar::Int16(array.value(row)))
-        }
-        DataType::Int32 => {
-            let array = col.as_any().downcast_ref::<Int32Array>().unwrap();
-            Ok(GroupByScalar::Int32(array.value(row)))
-        }
-        DataType::Int64 => {
-            let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
-            Ok(GroupByScalar::Int64(array.value(row)))
-        }
-        DataType::Utf8 => {
-            let array = col.as_any().downcast_ref::<StringArray>().unwrap();
-            Ok(GroupByScalar::Utf8(Box::new(array.value(row).into())))
-        }
-        DataType::Boolean => {
-            let array = col.as_any().downcast_ref::<BooleanArray>().unwrap();
-            Ok(GroupByScalar::Boolean(array.value(row)))
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMillisecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeMillisecond(array.value(row)))
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMicrosecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeMicrosecond(array.value(row)))
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeNanosecond(array.value(row)))
-        }
-        DataType::Date32 => {
-            let array = col.as_any().downcast_ref::<Date32Array>().unwrap();
-            Ok(GroupByScalar::Date32(array.value(row)))
-        }
-        DataType::Dictionary(index_type, _) => match **index_type {
-            DataType::Int8 => dictionary_create_group_by_value::<Int8Type>(col, row),
-            DataType::Int16 => dictionary_create_group_by_value::<Int16Type>(col, row),
-            DataType::Int32 => dictionary_create_group_by_value::<Int32Type>(col, row),
-            DataType::Int64 => dictionary_create_group_by_value::<Int64Type>(col, row),
-            DataType::UInt8 => dictionary_create_group_by_value::<UInt8Type>(col, row),
-            DataType::UInt16 => dictionary_create_group_by_value::<UInt16Type>(col, row),
-            DataType::UInt32 => dictionary_create_group_by_value::<UInt32Type>(col, row),
-            DataType::UInt64 => dictionary_create_group_by_value::<UInt64Type>(col, row),
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported GROUP BY type (dictionary index type not supported) {}",
-                col.data_type(),
-            ))),
-        },
-        _ => Err(DataFusionError::NotImplemented(format!(
-            "Unsupported GROUP BY type {}",
-            col.data_type(),
-        ))),
-    }
-}
-
-/// Extract the values in `group_by_keys` arrow arrays into the target vector
-/// as GroupByScalar values
-pub(crate) fn create_group_by_values(
-    group_by_keys: &[ArrayRef],
-    row: usize,
-    vec: &mut Box<[GroupByScalar]>,
-) -> Result<()> {
-    for (i, col) in group_by_keys.iter().enumerate() {
-        vec[i] = create_group_by_value(col, row)?
-    }
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-
-    use arrow::array::Float64Array;
-
-    use super::*;
-    use crate::physical_plan::expressions::{col, Avg};
-    use crate::{assert_batches_sorted_eq, physical_plan::common};
-
-    use crate::physical_plan::merge::MergeExec;
-
-    /// some mock data to aggregates
-    fn some_data() -> (Arc<Schema>, Vec<RecordBatch>) {
-        // define a schema.
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::UInt32, false),
-            Field::new("b", DataType::Float64, false),
-        ]));
-
-        // define data.
-        (
-            schema.clone(),
-            vec![
-                RecordBatch::try_new(
-                    schema.clone(),
-                    vec![
-                        Arc::new(UInt32Array::from(vec![2, 3, 4, 4])),
-                        Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-                    ],
-                )
-                .unwrap(),
-                RecordBatch::try_new(
-                    schema,
-                    vec![
-                        Arc::new(UInt32Array::from(vec![2, 3, 3, 4])),
-                        Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-                    ],
-                )
-                .unwrap(),
-            ],
-        )
-    }
-
-    /// build the aggregates on the data from some_data() and check the results
-    async fn check_aggregates(input: Arc<dyn ExecutionPlan>) -> Result<()> {
-        let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
-            vec![(col("a"), "a".to_string())];
-
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![Arc::new(Avg::new(
-            col("b"),
-            "AVG(b)".to_string(),
-            DataType::Float64,
-        ))];
-
-        let input_schema = input.schema();
-        let partial_aggregate = Arc::new(HashAggregateExec::try_new(
-            AggregateMode::Partial,
-            groups.clone(),
-            aggregates.clone(),
-            input,
-            input_schema.clone(),
-        )?);
-
-        let result = common::collect(partial_aggregate.execute(0).await?).await?;
-
-        let expected = vec![
-            "+---+---------------+-------------+",
-            "| a | AVG(b)[count] | AVG(b)[sum] |",
-            "+---+---------------+-------------+",
-            "| 2 | 2             | 2           |",
-            "| 3 | 3             | 7           |",
-            "| 4 | 3             | 11          |",
-            "+---+---------------+-------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let merge = Arc::new(MergeExec::new(partial_aggregate));
-
-        let final_group: Vec<Arc<dyn PhysicalExpr>> =
-            (0..groups.len()).map(|i| col(&groups[i].1)).collect();
-
-        let merged_aggregate = Arc::new(HashAggregateExec::try_new(
-            AggregateMode::Final,
-            final_group
-                .iter()
-                .enumerate()
-                .map(|(i, expr)| (expr.clone(), groups[i].1.clone()))
-                .collect(),
-            aggregates,
-            merge,
-            input_schema,
-        )?);
-
-        let result = common::collect(merged_aggregate.execute(0).await?).await?;
-        assert_eq!(result.len(), 1);
-
-        let batch = &result[0];
-        assert_eq!(batch.num_columns(), 2);
-        assert_eq!(batch.num_rows(), 3);
-
-        let expected = vec![
-            "+---+--------------------+",
-            "| a | AVG(b)             |",
-            "+---+--------------------+",
-            "| 2 | 1                  |",
-            "| 3 | 2.3333333333333335 |", // 3, (2 + 3 + 2) / 3
-            "| 4 | 3.6666666666666665 |", // 4, (3 + 4 + 4) / 3
-            "+---+--------------------+",
-        ];
-
-        assert_batches_sorted_eq!(&expected, &result);
-        Ok(())
-    }
-
-    /// Define a test source that can yield back to runtime before returning its first item ///
-
-    #[derive(Debug)]
-    struct TestYieldingExec {
-        /// True if this exec should yield back to runtime the first time it is polled
-        pub yield_first: bool,
-    }
-
-    #[async_trait]
-    impl ExecutionPlan for TestYieldingExec {
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-        fn schema(&self) -> SchemaRef {
-            some_data().0
-        }
-
-        fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-            vec![]
-        }
-
-        fn output_partitioning(&self) -> Partitioning {
-            Partitioning::UnknownPartitioning(1)
-        }
-
-        fn with_new_children(
-            &self,
-            _: Vec<Arc<dyn ExecutionPlan>>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-
-        async fn execute(&self, _partition: usize) -> Result<SendableRecordBatchStream> {
-            let stream;
-            if self.yield_first {
-                stream = TestYieldingStream::New;
-            } else {
-                stream = TestYieldingStream::Yielded;
-            }
-            Ok(Box::pin(stream))
-        }
-    }
-
-    /// A stream using the demo data. If inited as new, it will first yield to runtime before returning records
-    enum TestYieldingStream {
-        New,
-        Yielded,
-        ReturnedBatch1,
-        ReturnedBatch2,
-    }
-
-    impl Stream for TestYieldingStream {
-        type Item = ArrowResult<RecordBatch>;
-
-        fn poll_next(
-            mut self: std::pin::Pin<&mut Self>,
-            cx: &mut Context<'_>,
-        ) -> Poll<Option<Self::Item>> {
-            match &*self {
-                TestYieldingStream::New => {
-                    *(self.as_mut()) = TestYieldingStream::Yielded;
-                    cx.waker().wake_by_ref();
-                    Poll::Pending
-                }
-                TestYieldingStream::Yielded => {
-                    *(self.as_mut()) = TestYieldingStream::ReturnedBatch1;
-                    Poll::Ready(Some(Ok(some_data().1[0].clone())))
-                }
-                TestYieldingStream::ReturnedBatch1 => {
-                    *(self.as_mut()) = TestYieldingStream::ReturnedBatch2;
-                    Poll::Ready(Some(Ok(some_data().1[1].clone())))
-                }
-                TestYieldingStream::ReturnedBatch2 => Poll::Ready(None),
-            }
-        }
-    }
-
-    impl RecordBatchStream for TestYieldingStream {
-        fn schema(&self) -> SchemaRef {
-            some_data().0
-        }
-    }
-
-    //// Tests ////
-
-    #[tokio::test]
-    async fn aggregate_source_not_yielding() -> Result<()> {
-        let input: Arc<dyn ExecutionPlan> =
-            Arc::new(TestYieldingExec { yield_first: false });
-
-        check_aggregates(input).await
-    }
-
-    #[tokio::test]
-    async fn aggregate_source_with_yielding() -> Result<()> {
-        let input: Arc<dyn ExecutionPlan> =
-            Arc::new(TestYieldingExec { yield_first: true });
-
-        check_aggregates(input).await
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/hash_join.rs b/rust/datafusion/src/physical_plan/hash_join.rs
deleted file mode 100644
index 401fe6580a9..00000000000
--- a/rust/datafusion/src/physical_plan/hash_join.rs
+++ /dev/null
@@ -1,1265 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the join plan for executing partitions in parallel and then joining the results
-//! into a set of partitions.
-
-use ahash::CallHasher;
-use ahash::RandomState;
-
-use arrow::{
-    array::{
-        ArrayData, ArrayRef, BooleanArray, LargeStringArray, PrimitiveArray,
-        TimestampMicrosecondArray, TimestampNanosecondArray, UInt32BufferBuilder,
-        UInt32Builder, UInt64BufferBuilder, UInt64Builder,
-    },
-    compute,
-    datatypes::{TimeUnit, UInt32Type, UInt64Type},
-};
-use smallvec::{smallvec, SmallVec};
-use std::time::Instant;
-use std::{any::Any, collections::HashSet};
-use std::{hash::Hasher, sync::Arc};
-
-use async_trait::async_trait;
-use futures::{Stream, StreamExt, TryStreamExt};
-use hashbrown::HashMap;
-use tokio::sync::Mutex;
-
-use arrow::array::Array;
-use arrow::datatypes::DataType;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use arrow::array::{
-    Int16Array, Int32Array, Int64Array, Int8Array, StringArray, UInt16Array, UInt32Array,
-    UInt64Array, UInt8Array,
-};
-
-use super::expressions::col;
-use super::{
-    hash_utils::{build_join_schema, check_join_is_valid, JoinOn, JoinType},
-    merge::MergeExec,
-};
-use crate::error::{DataFusionError, Result};
-
-use super::{ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream};
-use crate::physical_plan::coalesce_batches::concat_batches;
-use log::debug;
-
-// Maps a `u64` hash value based on the left ["on" values] to a list of indices with this key's value.
-// E.g. 1 -> [3, 6, 8] indicates that the column values map to rows 3, 6 and 8 for hash value 1
-// As the key is a hash value, we need to check possible hash collisions in the probe stage
-type JoinHashMap = HashMap<u64, SmallVec<[u64; 1]>, IdHashBuilder>;
-type JoinLeftData = Arc<(JoinHashMap, RecordBatch)>;
-
-/// join execution plan executes partitions in parallel and combines them into a set of
-/// partitions.
-#[derive(Debug)]
-pub struct HashJoinExec {
-    /// left (build) side which gets hashed
-    left: Arc<dyn ExecutionPlan>,
-    /// right (probe) side which are filtered by the hash table
-    right: Arc<dyn ExecutionPlan>,
-    /// Set of common columns used to join on
-    on: Vec<(String, String)>,
-    /// How the join is performed
-    join_type: JoinType,
-    /// The schema once the join is applied
-    schema: SchemaRef,
-    /// Build-side
-    build_side: Arc<Mutex<Option<JoinLeftData>>>,
-    /// Shares the `RandomState` for the hashing algorithm
-    random_state: RandomState,
-    /// Partitioning mode to use
-    mode: PartitionMode,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq)]
-/// Partitioning mode to use for hash join
-pub enum PartitionMode {
-    /// Left/right children are partitioned using the left and right keys
-    Partitioned,
-    /// Left side will collected into one partition
-    CollectLeft,
-}
-
-/// Information about the index and placement (left or right) of the columns
-struct ColumnIndex {
-    /// Index of the column
-    index: usize,
-    /// Whether the column is at the left or right side
-    is_left: bool,
-}
-
-impl HashJoinExec {
-    /// Tries to create a new [HashJoinExec].
-    /// # Error
-    /// This function errors when it is not possible to join the left and right sides on keys `on`.
-    pub fn try_new(
-        left: Arc<dyn ExecutionPlan>,
-        right: Arc<dyn ExecutionPlan>,
-        on: &JoinOn,
-        join_type: &JoinType,
-        partition_mode: PartitionMode,
-    ) -> Result<Self> {
-        let left_schema = left.schema();
-        let right_schema = right.schema();
-        check_join_is_valid(&left_schema, &right_schema, &on)?;
-
-        let schema = Arc::new(build_join_schema(
-            &left_schema,
-            &right_schema,
-            on,
-            &join_type,
-        ));
-
-        let on = on
-            .iter()
-            .map(|(l, r)| (l.to_string(), r.to_string()))
-            .collect();
-
-        let random_state = RandomState::with_seeds(0, 0, 0, 0);
-
-        Ok(HashJoinExec {
-            left,
-            right,
-            on,
-            join_type: *join_type,
-            schema,
-            build_side: Arc::new(Mutex::new(None)),
-            random_state,
-            mode: partition_mode,
-        })
-    }
-
-    /// left (build) side which gets hashed
-    pub fn left(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.left
-    }
-
-    /// right (probe) side which are filtered by the hash table
-    pub fn right(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.right
-    }
-
-    /// Set of common columns used to join on
-    pub fn on(&self) -> &[(String, String)] {
-        &self.on
-    }
-
-    /// How the join is performed
-    pub fn join_type(&self) -> &JoinType {
-        &self.join_type
-    }
-
-    /// Calculates column indices and left/right placement on input / output schemas and jointype
-    fn column_indices_from_schema(&self) -> ArrowResult<Vec<ColumnIndex>> {
-        let (primary_is_left, primary_schema, secondary_schema) = match self.join_type {
-            JoinType::Inner | JoinType::Left => {
-                (true, self.left.schema(), self.right.schema())
-            }
-            JoinType::Right => (false, self.right.schema(), self.left.schema()),
-        };
-        let mut column_indices = Vec::with_capacity(self.schema.fields().len());
-        for field in self.schema.fields() {
-            let (is_primary, index) = match primary_schema.index_of(field.name()) {
-                    Ok(i) => Ok((true, i)),
-                    Err(_) => {
-                        match secondary_schema.index_of(field.name()) {
-                            Ok(i) => Ok((false, i)),
-                            _ => Err(DataFusionError::Internal(
-                                format!("During execution, the column {} was not found in neither the left or right side of the join", field.name()).to_string()
-                            ))
-                        }
-                    }
-                }.map_err(DataFusionError::into_arrow_external_error)?;
-
-            let is_left =
-                is_primary && primary_is_left || !is_primary && !primary_is_left;
-            column_indices.push(ColumnIndex { index, is_left });
-        }
-
-        Ok(column_indices)
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for HashJoinExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.left.clone(), self.right.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            2 => Ok(Arc::new(HashJoinExec::try_new(
-                children[0].clone(),
-                children[1].clone(),
-                &self.on,
-                &self.join_type,
-                self.mode,
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "HashJoinExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.right.output_partitioning()
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let on_left = self.on.iter().map(|on| on.0.clone()).collect::<Vec<_>>();
-        // we only want to compute the build side once for PartitionMode::CollectLeft
-        let left_data = {
-            match self.mode {
-                PartitionMode::CollectLeft => {
-                    let mut build_side = self.build_side.lock().await;
-
-                    match build_side.as_ref() {
-                        Some(stream) => stream.clone(),
-                        None => {
-                            let start = Instant::now();
-
-                            // merge all left parts into a single stream
-                            let merge = MergeExec::new(self.left.clone());
-                            let stream = merge.execute(0).await?;
-
-                            // This operation performs 2 steps at once:
-                            // 1. creates a [JoinHashMap] of all batches from the stream
-                            // 2. stores the batches in a vector.
-                            let initial = (
-                                JoinHashMap::with_hasher(IdHashBuilder {}),
-                                Vec::new(),
-                                0,
-                                Vec::new(),
-                            );
-                            let (hashmap, batches, num_rows, _) = stream
-                                .try_fold(initial, |mut acc, batch| async {
-                                    let hash = &mut acc.0;
-                                    let values = &mut acc.1;
-                                    let offset = acc.2;
-                                    acc.3.clear();
-                                    acc.3.resize(batch.num_rows(), 0);
-                                    update_hash(
-                                        &on_left,
-                                        &batch,
-                                        hash,
-                                        offset,
-                                        &self.random_state,
-                                        &mut acc.3,
-                                    )
-                                    .unwrap();
-                                    acc.2 += batch.num_rows();
-                                    values.push(batch);
-                                    Ok(acc)
-                                })
-                                .await?;
-
-                            // Merge all batches into a single batch, so we
-                            // can directly index into the arrays
-                            let single_batch =
-                                concat_batches(&self.left.schema(), &batches, num_rows)?;
-
-                            let left_side = Arc::new((hashmap, single_batch));
-
-                            *build_side = Some(left_side.clone());
-
-                            debug!(
-                            "Built build-side of hash join containing {} rows in {} ms",
-                            num_rows,
-                            start.elapsed().as_millis()
-                        );
-
-                            left_side
-                        }
-                    }
-                }
-                PartitionMode::Partitioned => {
-                    let start = Instant::now();
-
-                    // Load 1 partition of left side in memory
-                    let stream = self.left.execute(partition).await?;
-
-                    // This operation performs 2 steps at once:
-                    // 1. creates a [JoinHashMap] of all batches from the stream
-                    // 2. stores the batches in a vector.
-                    let initial = (
-                        JoinHashMap::with_hasher(IdHashBuilder {}),
-                        Vec::new(),
-                        0,
-                        Vec::new(),
-                    );
-                    let (hashmap, batches, num_rows, _) = stream
-                        .try_fold(initial, |mut acc, batch| async {
-                            let hash = &mut acc.0;
-                            let values = &mut acc.1;
-                            let offset = acc.2;
-                            acc.3.clear();
-                            acc.3.resize(batch.num_rows(), 0);
-                            update_hash(
-                                &on_left,
-                                &batch,
-                                hash,
-                                offset,
-                                &self.random_state,
-                                &mut acc.3,
-                            )
-                            .unwrap();
-                            acc.2 += batch.num_rows();
-                            values.push(batch);
-                            Ok(acc)
-                        })
-                        .await?;
-
-                    // Merge all batches into a single batch, so we
-                    // can directly index into the arrays
-                    let single_batch =
-                        concat_batches(&self.left.schema(), &batches, num_rows)?;
-
-                    let left_side = Arc::new((hashmap, single_batch));
-
-                    debug!(
-                        "Built build-side {} of hash join containing {} rows in {} ms",
-                        partition,
-                        num_rows,
-                        start.elapsed().as_millis()
-                    );
-
-                    left_side
-                }
-            }
-        };
-
-        // we have the batches and the hash map with their keys. We can how create a stream
-        // over the right that uses this information to issue new batches.
-
-        let stream = self.right.execute(partition).await?;
-        let on_right = self.on.iter().map(|on| on.1.clone()).collect::<Vec<_>>();
-
-        let column_indices = self.column_indices_from_schema()?;
-        Ok(Box::pin(HashJoinStream {
-            schema: self.schema.clone(),
-            on_left,
-            on_right,
-            join_type: self.join_type,
-            left_data,
-            right: stream,
-            column_indices,
-            num_input_batches: 0,
-            num_input_rows: 0,
-            num_output_batches: 0,
-            num_output_rows: 0,
-            join_time: 0,
-            random_state: self.random_state.clone(),
-        }))
-    }
-}
-
-/// Updates `hash` with new entries from [RecordBatch] evaluated against the expressions `on`,
-/// assuming that the [RecordBatch] corresponds to the `index`th
-fn update_hash(
-    on: &[String],
-    batch: &RecordBatch,
-    hash: &mut JoinHashMap,
-    offset: usize,
-    random_state: &RandomState,
-    hashes_buffer: &mut Vec<u64>,
-) -> Result<()> {
-    // evaluate the keys
-    let keys_values = on
-        .iter()
-        .map(|name| Ok(col(name).evaluate(batch)?.into_array(batch.num_rows())))
-        .collect::<Result<Vec<_>>>()?;
-
-    // update the hash map
-    let hash_values = create_hashes(&keys_values, &random_state, hashes_buffer)?;
-
-    // insert hashes to key of the hashmap
-    for (row, hash_value) in hash_values.iter().enumerate() {
-        hash.raw_entry_mut()
-            .from_key_hashed_nocheck(*hash_value, hash_value)
-            .and_modify(|_, v| v.push((row + offset) as u64))
-            .or_insert_with(|| (*hash_value, smallvec![(row + offset) as u64]));
-    }
-    Ok(())
-}
-
-/// A stream that issues [RecordBatch]es as they arrive from the right  of the join.
-struct HashJoinStream {
-    /// Input schema
-    schema: Arc<Schema>,
-    /// columns from the left
-    on_left: Vec<String>,
-    /// columns from the right used to compute the hash
-    on_right: Vec<String>,
-    /// type of the join
-    join_type: JoinType,
-    /// information from the left
-    left_data: JoinLeftData,
-    /// right
-    right: SendableRecordBatchStream,
-    /// Information of index and left / right placement of columns
-    column_indices: Vec<ColumnIndex>,
-    /// number of input batches
-    num_input_batches: usize,
-    /// number of input rows
-    num_input_rows: usize,
-    /// number of batches produced
-    num_output_batches: usize,
-    /// number of rows produced
-    num_output_rows: usize,
-    /// total time for joining probe-side batches to the build-side batches
-    join_time: usize,
-    /// Random state used for hashing initialization
-    random_state: RandomState,
-}
-
-impl RecordBatchStream for HashJoinStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Returns a new [RecordBatch] by combining the `left` and `right` according to `indices`.
-/// The resulting batch has [Schema] `schema`.
-/// # Error
-/// This function errors when:
-/// *
-fn build_batch_from_indices(
-    schema: &Schema,
-    left: &RecordBatch,
-    right: &RecordBatch,
-    left_indices: UInt64Array,
-    right_indices: UInt32Array,
-    column_indices: &[ColumnIndex],
-) -> ArrowResult<RecordBatch> {
-    // build the columns of the new [RecordBatch]:
-    // 1. pick whether the column is from the left or right
-    // 2. based on the pick, `take` items from the different RecordBatches
-    let mut columns: Vec<Arc<dyn Array>> = Vec::with_capacity(schema.fields().len());
-
-    for column_index in column_indices {
-        let array = if column_index.is_left {
-            let array = left.column(column_index.index);
-            compute::take(array.as_ref(), &left_indices, None)?
-        } else {
-            let array = right.column(column_index.index);
-            compute::take(array.as_ref(), &right_indices, None)?
-        };
-        columns.push(array);
-    }
-    RecordBatch::try_new(Arc::new(schema.clone()), columns)
-}
-
-#[allow(clippy::too_many_arguments)]
-fn build_batch(
-    batch: &RecordBatch,
-    left_data: &JoinLeftData,
-    on_left: &[String],
-    on_right: &[String],
-    join_type: JoinType,
-    schema: &Schema,
-    column_indices: &[ColumnIndex],
-    random_state: &RandomState,
-) -> ArrowResult<RecordBatch> {
-    let (left_indices, right_indices) = build_join_indexes(
-        &left_data,
-        &batch,
-        join_type,
-        on_left,
-        on_right,
-        random_state,
-    )
-    .unwrap();
-
-    build_batch_from_indices(
-        schema,
-        &left_data.1,
-        batch,
-        left_indices,
-        right_indices,
-        column_indices,
-    )
-}
-
-/// returns a vector with (index from left, index from right).
-/// The size of this vector corresponds to the total size of a joined batch
-// For a join on column A:
-// left       right
-//     batch 1
-// A B         A D
-// ---------------
-// 1 a         3 6
-// 2 b         1 2
-// 3 c         2 4
-//     batch 2
-// A B         A D
-// ---------------
-// 1 a         5 10
-// 2 b         2 2
-// 4 d         1 1
-// indices (batch, batch_row)
-// left       right
-// (0, 2)     (0, 0)
-// (0, 0)     (0, 1)
-// (0, 1)     (0, 2)
-// (1, 0)     (0, 1)
-// (1, 1)     (0, 2)
-// (0, 1)     (1, 1)
-// (0, 0)     (1, 2)
-// (1, 1)     (1, 1)
-// (1, 0)     (1, 2)
-fn build_join_indexes(
-    left_data: &JoinLeftData,
-    right: &RecordBatch,
-    join_type: JoinType,
-    left_on: &[String],
-    right_on: &[String],
-    random_state: &RandomState,
-) -> Result<(UInt64Array, UInt32Array)> {
-    let keys_values = right_on
-        .iter()
-        .map(|name| Ok(col(name).evaluate(right)?.into_array(right.num_rows())))
-        .collect::<Result<Vec<_>>>()?;
-    let left_join_values = left_on
-        .iter()
-        .map(|name| {
-            Ok(col(name)
-                .evaluate(&left_data.1)?
-                .into_array(left_data.1.num_rows()))
-        })
-        .collect::<Result<Vec<_>>>()?;
-    let hashes_buffer = &mut vec![0; keys_values[0].len()];
-    let hash_values = create_hashes(&keys_values, &random_state, hashes_buffer)?;
-    let left = &left_data.0;
-
-    match join_type {
-        JoinType::Inner => {
-            // Using a buffer builder to avoid slower normal builder
-            let mut left_indices = UInt64BufferBuilder::new(0);
-            let mut right_indices = UInt32BufferBuilder::new(0);
-
-            // Visit all of the right rows
-            for (row, hash_value) in hash_values.iter().enumerate() {
-                // Get the hash and find it in the build index
-
-                // For every item on the left and right we check if it matches
-                // This possibly contains rows with hash collisions,
-                // So we have to check here whether rows are equal or not
-                if let Some(indices) = left.get(hash_value) {
-                    for &i in indices {
-                        // Check hash collisions
-                        if equal_rows(i as usize, row, &left_join_values, &keys_values)? {
-                            left_indices.append(i);
-                            right_indices.append(row as u32);
-                        }
-                    }
-                }
-            }
-            let left = ArrayData::builder(DataType::UInt64)
-                .len(left_indices.len())
-                .add_buffer(left_indices.finish())
-                .build();
-            let right = ArrayData::builder(DataType::UInt32)
-                .len(right_indices.len())
-                .add_buffer(right_indices.finish())
-                .build();
-
-            Ok((
-                PrimitiveArray::<UInt64Type>::from(left),
-                PrimitiveArray::<UInt32Type>::from(right),
-            ))
-        }
-        JoinType::Left => {
-            let mut left_indices = UInt64Builder::new(0);
-            let mut right_indices = UInt32Builder::new(0);
-
-            // Keep track of which item is visited in the build input
-            // TODO: this can be stored more efficiently with a marker
-            //       https://issues.apache.org/jira/browse/ARROW-11116
-            // TODO: Fix LEFT join with multiple right batches
-            //       https://issues.apache.org/jira/browse/ARROW-10971
-            let mut is_visited = HashSet::new();
-
-            // First visit all of the rows
-            for (row, hash_value) in hash_values.iter().enumerate() {
-                if let Some(indices) = left.get(hash_value) {
-                    for &i in indices {
-                        // Collision check
-                        if equal_rows(i as usize, row, &left_join_values, &keys_values)? {
-                            left_indices.append_value(i)?;
-                            right_indices.append_value(row as u32)?;
-                            is_visited.insert(i);
-                        }
-                    }
-                };
-            }
-            // Add the remaining left rows to the result set with None on the right side
-            for (_, indices) in left {
-                for i in indices.iter() {
-                    if !is_visited.contains(i) {
-                        left_indices.append_slice(&indices)?;
-                        right_indices.append_null()?;
-                    }
-                }
-            }
-            Ok((left_indices.finish(), right_indices.finish()))
-        }
-        JoinType::Right => {
-            let mut left_indices = UInt64Builder::new(0);
-            let mut right_indices = UInt32Builder::new(0);
-
-            for (row, hash_value) in hash_values.iter().enumerate() {
-                match left.get(hash_value) {
-                    Some(indices) => {
-                        for &i in indices {
-                            if equal_rows(
-                                i as usize,
-                                row,
-                                &left_join_values,
-                                &keys_values,
-                            )? {
-                                left_indices.append_value(i)?;
-                                right_indices.append_value(row as u32)?;
-                            }
-                        }
-                    }
-                    None => {
-                        // when no match, add the row with None for the left side
-                        left_indices.append_null()?;
-                        right_indices.append_value(row as u32)?;
-                    }
-                }
-            }
-            Ok((left_indices.finish(), right_indices.finish()))
-        }
-    }
-}
-use core::hash::BuildHasher;
-
-/// `Hasher` that returns the same `u64` value as a hash, to avoid re-hashing
-/// it when inserting/indexing or regrowing the `HashMap`
-struct IdHasher {
-    hash: u64,
-}
-
-impl Hasher for IdHasher {
-    fn finish(&self) -> u64 {
-        self.hash
-    }
-
-    fn write_u64(&mut self, i: u64) {
-        self.hash = i;
-    }
-
-    fn write(&mut self, _bytes: &[u8]) {
-        unreachable!("IdHasher should only be used for u64 keys")
-    }
-}
-
-#[derive(Debug)]
-struct IdHashBuilder {}
-
-impl BuildHasher for IdHashBuilder {
-    type Hasher = IdHasher;
-
-    fn build_hasher(&self) -> Self::Hasher {
-        IdHasher { hash: 0 }
-    }
-}
-
-// Combines two hashes into one hash
-fn combine_hashes(l: u64, r: u64) -> u64 {
-    let hash = (17 * 37u64).wrapping_add(l);
-    hash.wrapping_mul(37).wrapping_add(r)
-}
-
-macro_rules! equal_rows_elem {
-    ($array_type:ident, $l: ident, $r: ident, $left: ident, $right: ident) => {{
-        let left_array = $l.as_any().downcast_ref::<$array_type>().unwrap();
-        let right_array = $r.as_any().downcast_ref::<$array_type>().unwrap();
-
-        match (left_array.is_null($left), left_array.is_null($right)) {
-            (true, true) => true,
-            (false, false) => left_array.value($left) == right_array.value($right),
-            _ => false,
-        }
-    }};
-}
-
-/// Left and right row have equal values
-fn equal_rows(
-    left: usize,
-    right: usize,
-    left_arrays: &[ArrayRef],
-    right_arrays: &[ArrayRef],
-) -> Result<bool> {
-    let mut err = None;
-    let res = left_arrays
-        .iter()
-        .zip(right_arrays)
-        .all(|(l, r)| match l.data_type() {
-            DataType::Null => true,
-            DataType::Boolean => equal_rows_elem!(BooleanArray, l, r, left, right),
-            DataType::Int8 => equal_rows_elem!(Int8Array, l, r, left, right),
-            DataType::Int16 => equal_rows_elem!(Int16Array, l, r, left, right),
-            DataType::Int32 => equal_rows_elem!(Int32Array, l, r, left, right),
-            DataType::Int64 => equal_rows_elem!(Int64Array, l, r, left, right),
-            DataType::UInt8 => equal_rows_elem!(UInt8Array, l, r, left, right),
-            DataType::UInt16 => equal_rows_elem!(UInt16Array, l, r, left, right),
-            DataType::UInt32 => equal_rows_elem!(UInt32Array, l, r, left, right),
-            DataType::UInt64 => equal_rows_elem!(UInt64Array, l, r, left, right),
-            DataType::Timestamp(_, None) => {
-                equal_rows_elem!(Int64Array, l, r, left, right)
-            }
-            DataType::Utf8 => equal_rows_elem!(StringArray, l, r, left, right),
-            DataType::LargeUtf8 => equal_rows_elem!(LargeStringArray, l, r, left, right),
-            _ => {
-                // This is internal because we should have caught this before.
-                err = Some(Err(DataFusionError::Internal(
-                    "Unsupported data type in hasher".to_string(),
-                )));
-                false
-            }
-        });
-
-    err.unwrap_or(Ok(res))
-}
-
-macro_rules! hash_array {
-    ($array_type:ident, $column: ident, $ty: ident, $hashes: ident, $random_state: ident) => {
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-        if array.null_count() == 0 {
-            for (i, hash) in $hashes.iter_mut().enumerate() {
-                *hash =
-                    combine_hashes($ty::get_hash(&array.value(i), $random_state), *hash);
-            }
-        } else {
-            for (i, hash) in $hashes.iter_mut().enumerate() {
-                if !array.is_null(i) {
-                    *hash = combine_hashes(
-                        $ty::get_hash(&array.value(i), $random_state),
-                        *hash,
-                    );
-                }
-            }
-        }
-    };
-}
-
-/// Creates hash values for every element in the row based on the values in the columns
-pub fn create_hashes<'a>(
-    arrays: &[ArrayRef],
-    random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
-    for col in arrays {
-        match col.data_type() {
-            DataType::UInt8 => {
-                hash_array!(UInt8Array, col, u8, hashes_buffer, random_state);
-            }
-            DataType::UInt16 => {
-                hash_array!(UInt16Array, col, u16, hashes_buffer, random_state);
-            }
-            DataType::UInt32 => {
-                hash_array!(UInt32Array, col, u32, hashes_buffer, random_state);
-            }
-            DataType::UInt64 => {
-                hash_array!(UInt64Array, col, u64, hashes_buffer, random_state);
-            }
-            DataType::Int8 => {
-                hash_array!(Int8Array, col, i8, hashes_buffer, random_state);
-            }
-            DataType::Int16 => {
-                hash_array!(Int16Array, col, i16, hashes_buffer, random_state);
-            }
-            DataType::Int32 => {
-                hash_array!(Int32Array, col, i32, hashes_buffer, random_state);
-            }
-            DataType::Int64 => {
-                hash_array!(Int64Array, col, i64, hashes_buffer, random_state);
-            }
-            DataType::Timestamp(TimeUnit::Microsecond, None) => {
-                hash_array!(
-                    TimestampMicrosecondArray,
-                    col,
-                    i64,
-                    hashes_buffer,
-                    random_state
-                );
-            }
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                hash_array!(
-                    TimestampNanosecondArray,
-                    col,
-                    i64,
-                    hashes_buffer,
-                    random_state
-                );
-            }
-            DataType::Boolean => {
-                hash_array!(BooleanArray, col, u8, hashes_buffer, random_state);
-            }
-            DataType::Utf8 => {
-                hash_array!(StringArray, col, str, hashes_buffer, random_state);
-            }
-            _ => {
-                // This is internal because we should have caught this before.
-                return Err(DataFusionError::Internal(
-                    "Unsupported data type in hasher".to_string(),
-                ));
-            }
-        }
-    }
-    Ok(hashes_buffer)
-}
-
-impl Stream for HashJoinStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Option<Self::Item>> {
-        self.right
-            .poll_next_unpin(cx)
-            .map(|maybe_batch| match maybe_batch {
-                Some(Ok(batch)) => {
-                    let start = Instant::now();
-                    let result = build_batch(
-                        &batch,
-                        &self.left_data,
-                        &self.on_left,
-                        &self.on_right,
-                        self.join_type,
-                        &self.schema,
-                        &self.column_indices,
-                        &self.random_state,
-                    );
-                    self.num_input_batches += 1;
-                    self.num_input_rows += batch.num_rows();
-                    if let Ok(ref batch) = result {
-                        self.join_time += start.elapsed().as_millis() as usize;
-                        self.num_output_batches += 1;
-                        self.num_output_rows += batch.num_rows();
-                    }
-                    Some(result)
-                }
-                other => {
-                    debug!(
-                        "Processed {} probe-side input batches containing {} rows and \
-                        produced {} output batches containing {} rows in {} ms",
-                        self.num_input_batches,
-                        self.num_input_rows,
-                        self.num_output_batches,
-                        self.num_output_rows,
-                        self.join_time
-                    );
-                    other
-                }
-            })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        assert_batches_sorted_eq,
-        physical_plan::{common, memory::MemoryExec},
-        test::{build_table_i32, columns},
-    };
-
-    use super::*;
-    use std::sync::Arc;
-
-    fn build_table(
-        a: (&str, &Vec<i32>),
-        b: (&str, &Vec<i32>),
-        c: (&str, &Vec<i32>),
-    ) -> Arc<dyn ExecutionPlan> {
-        let batch = build_table_i32(a, b, c);
-        let schema = batch.schema();
-        Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap())
-    }
-
-    fn join(
-        left: Arc<dyn ExecutionPlan>,
-        right: Arc<dyn ExecutionPlan>,
-        on: &[(&str, &str)],
-        join_type: &JoinType,
-    ) -> Result<HashJoinExec> {
-        let on: Vec<_> = on
-            .iter()
-            .map(|(l, r)| (l.to_string(), r.to_string()))
-            .collect();
-        HashJoinExec::try_new(left, right, &on, join_type, PartitionMode::CollectLeft)
-    }
-
-    #[tokio::test]
-    async fn join_inner_one() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 5]), // this has a repetition
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b1", &vec![4, 5, 6]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 70 |",
-            "| 2  | 5  | 8  | 20 | 80 |",
-            "| 3  | 5  | 9  | 20 | 80 |",
-            "+----+----+----+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_inner_one_no_shared_column_names() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 5]), // this has a repetition
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b2", &vec![4, 5, 6]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b2")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | b2 | c2 |",
-            "+----+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 4  | 70 |",
-            "| 2  | 5  | 8  | 20 | 5  | 80 |",
-            "| 3  | 5  | 9  | 20 | 5  | 80 |",
-            "+----+----+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_inner_two() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 2]),
-            ("b2", &vec![1, 2, 2]),
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b2", &vec![1, 2, 2]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("a1", "a1"), ("b2", "b2")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b2", "c1", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-
-        let expected = vec![
-            "+----+----+----+----+",
-            "| a1 | b2 | c1 | c2 |",
-            "+----+----+----+----+",
-            "| 1  | 1  | 7  | 70 |",
-            "| 2  | 2  | 8  | 80 |",
-            "| 2  | 2  | 9  | 80 |",
-            "+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    /// Test where the left has 2 parts, the right with 1 part => 1 part
-    #[tokio::test]
-    async fn join_inner_one_two_parts_left() -> Result<()> {
-        let batch1 = build_table_i32(
-            ("a1", &vec![1, 2]),
-            ("b2", &vec![1, 2]),
-            ("c1", &vec![7, 8]),
-        );
-        let batch2 =
-            build_table_i32(("a1", &vec![2]), ("b2", &vec![2]), ("c1", &vec![9]));
-        let schema = batch1.schema();
-        let left = Arc::new(
-            MemoryExec::try_new(&[vec![batch1], vec![batch2]], schema, None).unwrap(),
-        );
-
-        let right = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b2", &vec![1, 2, 2]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("a1", "a1"), ("b2", "b2")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b2", "c1", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-
-        let expected = vec![
-            "+----+----+----+----+",
-            "| a1 | b2 | c1 | c2 |",
-            "+----+----+----+----+",
-            "| 1  | 1  | 7  | 70 |",
-            "| 2  | 2  | 8  | 80 |",
-            "| 2  | 2  | 9  | 80 |",
-            "+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    /// Test where the left has 1 part, the right has 2 parts => 2 parts
-    #[tokio::test]
-    async fn join_inner_one_two_parts_right() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 5]), // this has a repetition
-            ("c1", &vec![7, 8, 9]),
-        );
-
-        let batch1 = build_table_i32(
-            ("a2", &vec![10, 20]),
-            ("b1", &vec![4, 6]),
-            ("c2", &vec![70, 80]),
-        );
-        let batch2 =
-            build_table_i32(("a2", &vec![30]), ("b1", &vec![5]), ("c2", &vec![90]));
-        let schema = batch1.schema();
-        let right = Arc::new(
-            MemoryExec::try_new(&[vec![batch1], vec![batch2]], schema, None).unwrap(),
-        );
-
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "c2"]);
-
-        // first part
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 70 |",
-            "+----+----+----+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &batches);
-
-        // second part
-        let stream = join.execute(1).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 2  | 5  | 8  | 30 | 90 |",
-            "| 3  | 5  | 9  | 30 | 90 |",
-            "+----+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_left_one() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b1", &vec![4, 5, 6]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Left)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 70 |",
-            "| 2  | 5  | 8  | 20 | 80 |",
-            "| 3  | 7  | 9  |    |    |",
-            "+----+----+----+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_right_one() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 7]),
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b1", &vec![4, 5, 6]), // 6 does not exist on the left
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Right)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "c1", "a2", "b1", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | c1 | a2 | b1 | c2 |",
-            "+----+----+----+----+----+",
-            "|    |    | 30 | 6  | 90 |",
-            "| 1  | 7  | 10 | 4  | 70 |",
-            "| 2  | 8  | 20 | 5  | 80 |",
-            "+----+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[test]
-    fn join_with_hash_collision() -> Result<()> {
-        let mut hashmap_left = HashMap::with_hasher(IdHashBuilder {});
-        let left = build_table_i32(
-            ("a", &vec![10, 20]),
-            ("x", &vec![100, 200]),
-            ("y", &vec![200, 300]),
-        );
-
-        let random_state = RandomState::new();
-        let hashes_buff = &mut vec![0; left.num_rows()];
-        let hashes =
-            create_hashes(&[left.columns()[0].clone()], &random_state, hashes_buff)?;
-
-        // Create hash collisions
-        hashmap_left.insert(hashes[0], smallvec![0, 1]);
-        hashmap_left.insert(hashes[1], smallvec![0, 1]);
-
-        let right = build_table_i32(
-            ("a", &vec![10, 20]),
-            ("b", &vec![0, 0]),
-            ("c", &vec![30, 40]),
-        );
-
-        let left_data = JoinLeftData::new((hashmap_left, left));
-        let (l, r) = build_join_indexes(
-            &left_data,
-            &right,
-            JoinType::Inner,
-            &["a".to_string()],
-            &["a".to_string()],
-            &random_state,
-        )?;
-
-        let mut left_ids = UInt64Builder::new(0);
-        left_ids.append_value(0)?;
-        left_ids.append_value(1)?;
-
-        let mut right_ids = UInt32Builder::new(0);
-
-        right_ids.append_value(0)?;
-        right_ids.append_value(1)?;
-
-        assert_eq!(left_ids.finish(), l);
-
-        assert_eq!(right_ids.finish(), r);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/hash_utils.rs b/rust/datafusion/src/physical_plan/hash_utils.rs
deleted file mode 100644
index b26ff9bb5fc..00000000000
--- a/rust/datafusion/src/physical_plan/hash_utils.rs
+++ /dev/null
@@ -1,201 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Functionality used both on logical and physical plans
-
-use crate::error::{DataFusionError, Result};
-use arrow::datatypes::{Field, Schema};
-use std::collections::HashSet;
-
-/// All valid types of joins.
-#[derive(Clone, Copy, Debug)]
-pub enum JoinType {
-    /// Inner join
-    Inner,
-    /// Left
-    Left,
-    /// Right
-    Right,
-}
-
-/// The on clause of the join, as vector of (left, right) columns.
-pub type JoinOn = [(String, String)];
-
-/// Checks whether the schemas "left" and "right" and columns "on" represent a valid join.
-/// They are valid whenever their columns' intersection equals the set `on`
-pub fn check_join_is_valid(left: &Schema, right: &Schema, on: &JoinOn) -> Result<()> {
-    let left: HashSet<String> = left.fields().iter().map(|f| f.name().clone()).collect();
-    let right: HashSet<String> =
-        right.fields().iter().map(|f| f.name().clone()).collect();
-
-    check_join_set_is_valid(&left, &right, on)
-}
-
-/// Checks whether the sets left, right and on compose a valid join.
-/// They are valid whenever their intersection equals the set `on`
-fn check_join_set_is_valid(
-    left: &HashSet<String>,
-    right: &HashSet<String>,
-    on: &JoinOn,
-) -> Result<()> {
-    if on.is_empty() {
-        return Err(DataFusionError::Plan(
-            "The 'on' clause of a join cannot be empty".to_string(),
-        ));
-    }
-    let on_left = &on.iter().map(|on| on.0.to_string()).collect::<HashSet<_>>();
-    let left_missing = on_left.difference(left).collect::<HashSet<_>>();
-
-    let on_right = &on.iter().map(|on| on.1.to_string()).collect::<HashSet<_>>();
-    let right_missing = on_right.difference(right).collect::<HashSet<_>>();
-
-    if !left_missing.is_empty() | !right_missing.is_empty() {
-        return Err(DataFusionError::Plan(format!(
-                "The left or right side of the join does not have all columns on \"on\": \nMissing on the left: {:?}\nMissing on the right: {:?}",
-                left_missing,
-                right_missing,
-            )));
-    };
-
-    let remaining = right
-        .difference(on_right)
-        .cloned()
-        .collect::<HashSet<String>>();
-
-    let collisions = left.intersection(&remaining).collect::<HashSet<_>>();
-
-    if !collisions.is_empty() {
-        return Err(DataFusionError::Plan(format!(
-                "The left schema and the right schema have the following columns with the same name without being on the ON statement: {:?}. Consider aliasing them.",
-                collisions,
-            )));
-    };
-
-    Ok(())
-}
-
-/// Creates a schema for a join operation.
-/// The fields from the left side are first
-pub fn build_join_schema(
-    left: &Schema,
-    right: &Schema,
-    on: &JoinOn,
-    join_type: &JoinType,
-) -> Schema {
-    let fields: Vec<Field> = match join_type {
-        JoinType::Inner | JoinType::Left => {
-            // remove right-side join keys if they have the same names as the left-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left.fields().iter();
-
-            let right_fields = right
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-        JoinType::Right => {
-            // remove left-side join keys if they have the same names as the right-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            let right_fields = right.fields().iter();
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-    };
-    Schema::new(fields)
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-
-    fn check(left: &[&str], right: &[&str], on: &[(&str, &str)]) -> Result<()> {
-        let left = left.iter().map(|x| x.to_string()).collect::<HashSet<_>>();
-        let right = right.iter().map(|x| x.to_string()).collect::<HashSet<_>>();
-        let on: Vec<_> = on
-            .iter()
-            .map(|(l, r)| (l.to_string(), r.to_string()))
-            .collect();
-        check_join_set_is_valid(&left, &right, &on)
-    }
-
-    #[test]
-    fn check_valid() -> Result<()> {
-        let left = vec!["a", "b1"];
-        let right = vec!["a", "b2"];
-        let on = &[("a", "a")];
-
-        check(&left, &right, on)?;
-        Ok(())
-    }
-
-    #[test]
-    fn check_not_in_right() {
-        let left = vec!["a", "b"];
-        let right = vec!["b"];
-        let on = &[("a", "a")];
-
-        assert!(check(&left, &right, on).is_err());
-    }
-
-    #[test]
-    fn check_not_in_left() {
-        let left = vec!["b"];
-        let right = vec!["a"];
-        let on = &[("a", "a")];
-
-        assert!(check(&left, &right, on).is_err());
-    }
-
-    #[test]
-    fn check_collision() {
-        // column "a" would appear both in left and right
-        let left = vec!["a", "c"];
-        let right = vec!["a", "b"];
-        let on = &[("a", "b")];
-
-        assert!(check(&left, &right, on).is_err());
-    }
-
-    #[test]
-    fn check_in_right() {
-        let left = vec!["a", "c"];
-        let right = vec!["b"];
-        let on = &[("a", "b")];
-
-        assert!(check(&left, &right, on).is_ok());
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/limit.rs b/rust/datafusion/src/physical_plan/limit.rs
deleted file mode 100644
index c091196483f..00000000000
--- a/rust/datafusion/src/physical_plan/limit.rs
+++ /dev/null
@@ -1,338 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the LIMIT plan
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use futures::stream::Stream;
-use futures::stream::StreamExt;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning};
-use arrow::array::ArrayRef;
-use arrow::compute::limit;
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-
-use async_trait::async_trait;
-
-/// Limit execution plan
-#[derive(Debug)]
-pub struct GlobalLimitExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Maximum number of rows to return
-    limit: usize,
-}
-
-impl GlobalLimitExec {
-    /// Create a new MergeExec
-    pub fn new(input: Arc<dyn ExecutionPlan>, limit: usize) -> Self {
-        GlobalLimitExec { input, limit }
-    }
-
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Maximum number of rows to return
-    pub fn limit(&self) -> usize {
-        self.limit
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for GlobalLimitExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(GlobalLimitExec::new(
-                children[0].clone(),
-                self.limit,
-            ))),
-            _ => Err(DataFusionError::Internal(
-                "GlobalLimitExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // GlobalLimitExec has a single output partition
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "GlobalLimitExec invalid partition {}",
-                partition
-            )));
-        }
-
-        // GlobalLimitExec requires a single input partition
-        if 1 != self.input.output_partitioning().partition_count() {
-            return Err(DataFusionError::Internal(
-                "GlobalLimitExec requires a single input partition".to_owned(),
-            ));
-        }
-
-        let stream = self.input.execute(0).await?;
-        Ok(Box::pin(LimitStream::new(stream, self.limit)))
-    }
-}
-
-/// LocalLimitExec applies a limit to a single partition
-#[derive(Debug)]
-pub struct LocalLimitExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Maximum number of rows to return
-    limit: usize,
-}
-
-impl LocalLimitExec {
-    /// Create a new LocalLimitExec partition
-    pub fn new(input: Arc<dyn ExecutionPlan>, limit: usize) -> Self {
-        Self { input, limit }
-    }
-
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Maximum number of rows to return
-    pub fn limit(&self) -> usize {
-        self.limit
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for LocalLimitExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(LocalLimitExec::new(
-                children[0].clone(),
-                self.limit,
-            ))),
-            _ => Err(DataFusionError::Internal(
-                "LocalLimitExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let stream = self.input.execute(partition).await?;
-        Ok(Box::pin(LimitStream::new(stream, self.limit)))
-    }
-}
-
-/// Truncate a RecordBatch to maximum of n rows
-pub fn truncate_batch(batch: &RecordBatch, n: usize) -> RecordBatch {
-    let limited_columns: Vec<ArrayRef> = (0..batch.num_columns())
-        .map(|i| limit(batch.column(i), n))
-        .collect();
-
-    RecordBatch::try_new(batch.schema(), limited_columns).unwrap()
-}
-
-/// A Limit stream limits the stream to up to `limit` rows.
-struct LimitStream {
-    /// The maximum number of rows to produce
-    limit: usize,
-    /// The input to read from. This is set to None once the limit is
-    /// reached to enable early termination
-    input: Option<SendableRecordBatchStream>,
-    /// Copy of the input schema
-    schema: SchemaRef,
-    // the current number of rows which have been produced
-    current_len: usize,
-}
-
-impl LimitStream {
-    fn new(input: SendableRecordBatchStream, limit: usize) -> Self {
-        let schema = input.schema();
-        Self {
-            limit,
-            input: Some(input),
-            schema,
-            current_len: 0,
-        }
-    }
-
-    fn stream_limit(&mut self, batch: RecordBatch) -> Option<RecordBatch> {
-        if self.current_len == self.limit {
-            self.input = None; // clear input so it can be dropped early
-            None
-        } else if self.current_len + batch.num_rows() <= self.limit {
-            self.current_len += batch.num_rows();
-            Some(batch)
-        } else {
-            let batch_rows = self.limit - self.current_len;
-            self.current_len = self.limit;
-            self.input = None; // clear input so it can be dropped early
-            Some(truncate_batch(&batch, batch_rows))
-        }
-    }
-}
-
-impl Stream for LimitStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        match &mut self.input {
-            Some(input) => input.poll_next_unpin(cx).map(|x| match x {
-                Some(Ok(batch)) => Ok(self.stream_limit(batch)).transpose(),
-                other => other,
-            }),
-            // input has been cleared
-            None => Poll::Ready(None),
-        }
-    }
-}
-
-impl RecordBatchStream for LimitStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use common::collect;
-
-    use super::*;
-    use crate::physical_plan::common;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::physical_plan::merge::MergeExec;
-    use crate::test;
-
-    #[tokio::test]
-    async fn limit() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let num_partitions = 4;
-        let path =
-            test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        // input should have 4 partitions
-        assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
-
-        let limit = GlobalLimitExec::new(Arc::new(MergeExec::new(Arc::new(csv))), 7);
-
-        // the result should contain 4 batches (one per input partition)
-        let iter = limit.execute(0).await?;
-        let batches = common::collect(iter).await?;
-
-        // there should be a total of 100 rows
-        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(row_count, 7);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn limit_early_shutdown() -> Result<()> {
-        let batches = vec![
-            test::make_partition(5),
-            test::make_partition(10),
-            test::make_partition(15),
-            test::make_partition(20),
-            test::make_partition(25),
-        ];
-        let input = test::exec::TestStream::new(batches);
-
-        let index = input.index();
-        assert_eq!(index.value(), 0);
-
-        // limit of six needs to consume the entire first record batch
-        // (5 rows) and 1 row from the second (1 row)
-        let limit_stream = LimitStream::new(Box::pin(input), 6);
-        assert_eq!(index.value(), 0);
-
-        let results = collect(Box::pin(limit_stream)).await.unwrap();
-        let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-        // Only 6 rows should have been produced
-        assert_eq!(num_rows, 6);
-
-        // Only the first two batches should be consumed
-        assert_eq!(index.value(), 2);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/math_expressions.rs b/rust/datafusion/src/physical_plan/math_expressions.rs
deleted file mode 100644
index 382a15f8ccf..00000000000
--- a/rust/datafusion/src/physical_plan/math_expressions.rs
+++ /dev/null
@@ -1,118 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Math expressions
-
-use arrow::array::{make_array, Array, ArrayData, Float32Array, Float64Array};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{DataType, ToByteSlice};
-
-use super::{ColumnarValue, ScalarValue};
-use crate::error::{DataFusionError, Result};
-
-macro_rules! compute_op {
-    ($ARRAY:expr, $FUNC:ident, $TYPE:ident) => {{
-        let len = $ARRAY.len();
-        let result = (0..len)
-            .map(|i| $ARRAY.value(i).$FUNC() as f64)
-            .collect::<Vec<f64>>();
-        let data = ArrayData::new(
-            DataType::Float64,
-            len,
-            Some($ARRAY.null_count()),
-            $ARRAY.data().null_buffer().cloned(),
-            0,
-            vec![Buffer::from(result.to_byte_slice())],
-            vec![],
-        );
-        Ok(make_array(data))
-    }};
-}
-
-macro_rules! downcast_compute_op {
-    ($ARRAY:expr, $NAME:expr, $FUNC:ident, $TYPE:ident) => {{
-        let n = $ARRAY.as_any().downcast_ref::<$TYPE>();
-        match n {
-            Some(array) => compute_op!(array, $FUNC, $TYPE),
-            _ => Err(DataFusionError::Internal(format!(
-                "Invalid data type for {}",
-                $NAME
-            ))),
-        }
-    }};
-}
-
-macro_rules! unary_primitive_array_op {
-    ($VALUE:expr, $NAME:expr, $FUNC:ident) => {{
-        match ($VALUE) {
-            ColumnarValue::Array(array) => match array.data_type() {
-                DataType::Float32 => {
-                    let result = downcast_compute_op!(array, $NAME, $FUNC, Float32Array);
-                    Ok(ColumnarValue::Array(result?))
-                }
-                DataType::Float64 => {
-                    let result = downcast_compute_op!(array, $NAME, $FUNC, Float64Array);
-                    Ok(ColumnarValue::Array(result?))
-                }
-                other => Err(DataFusionError::Internal(format!(
-                    "Unsupported data type {:?} for function {}",
-                    other, $NAME,
-                ))),
-            },
-            ColumnarValue::Scalar(a) => match a {
-                ScalarValue::Float32(a) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Float64(a.map(|x| x.$FUNC() as f64)),
-                )),
-                ScalarValue::Float64(a) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Float64(a.map(|x| x.$FUNC())),
-                )),
-                _ => Err(DataFusionError::Internal(format!(
-                    "Unsupported data type {:?} for function {}",
-                    ($VALUE).data_type(),
-                    $NAME,
-                ))),
-            },
-        }
-    }};
-}
-
-macro_rules! math_unary_function {
-    ($NAME:expr, $FUNC:ident) => {
-        /// mathematical function that accepts f32 or f64 and returns f64
-        pub fn $FUNC(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-            unary_primitive_array_op!(&args[0], $NAME, $FUNC)
-        }
-    };
-}
-
-math_unary_function!("sqrt", sqrt);
-math_unary_function!("sin", sin);
-math_unary_function!("cos", cos);
-math_unary_function!("tan", tan);
-math_unary_function!("asin", asin);
-math_unary_function!("acos", acos);
-math_unary_function!("atan", atan);
-math_unary_function!("floor", floor);
-math_unary_function!("ceil", ceil);
-math_unary_function!("round", round);
-math_unary_function!("trunc", trunc);
-math_unary_function!("abs", abs);
-math_unary_function!("signum", signum);
-math_unary_function!("exp", exp);
-math_unary_function!("log", ln);
-math_unary_function!("log2", log2);
-math_unary_function!("log10", log10);
diff --git a/rust/datafusion/src/physical_plan/memory.rs b/rust/datafusion/src/physical_plan/memory.rs
deleted file mode 100644
index bef9bcc62df..00000000000
--- a/rust/datafusion/src/physical_plan/memory.rs
+++ /dev/null
@@ -1,161 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Execution plan for reading in-memory batches of data
-
-use std::any::Any;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use super::{ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use async_trait::async_trait;
-use futures::Stream;
-
-/// Execution plan for reading in-memory batches of data
-#[derive(Debug)]
-pub struct MemoryExec {
-    /// The partitions to query
-    partitions: Vec<Vec<RecordBatch>>,
-    /// Schema representing the data after the optional projection is applied
-    schema: SchemaRef,
-    /// Optional projection
-    projection: Option<Vec<usize>>,
-}
-
-#[async_trait]
-impl ExecutionPlan for MemoryExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partitions.len())
-    }
-
-    fn with_new_children(
-        &self,
-        _: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Internal(format!(
-            "Children cannot be replaced in {:?}",
-            self
-        )))
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(MemoryStream::try_new(
-            self.partitions[partition].clone(),
-            self.schema.clone(),
-            self.projection.clone(),
-        )?))
-    }
-}
-
-impl MemoryExec {
-    /// Create a new execution plan for reading in-memory record batches
-    pub fn try_new(
-        partitions: &[Vec<RecordBatch>],
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        Ok(Self {
-            partitions: partitions.to_vec(),
-            schema,
-            projection,
-        })
-    }
-}
-
-/// Iterator over batches
-pub(crate) struct MemoryStream {
-    /// Vector of record batches
-    data: Vec<RecordBatch>,
-    /// Schema representing the data
-    schema: SchemaRef,
-    /// Optional projection for which columns to load
-    projection: Option<Vec<usize>>,
-    /// Index into the data
-    index: usize,
-}
-
-impl MemoryStream {
-    /// Create an iterator for a vector of record batches
-    pub fn try_new(
-        data: Vec<RecordBatch>,
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        Ok(Self {
-            data,
-            schema,
-            projection,
-            index: 0,
-        })
-    }
-}
-
-impl Stream for MemoryStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(if self.index < self.data.len() {
-            self.index += 1;
-            let batch = &self.data[self.index - 1];
-            // apply projection
-            match &self.projection {
-                Some(columns) => Some(RecordBatch::try_new(
-                    self.schema.clone(),
-                    columns.iter().map(|i| batch.column(*i).clone()).collect(),
-                )),
-                None => Some(Ok(batch.clone())),
-            }
-        } else {
-            None
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.data.len(), Some(self.data.len()))
-    }
-}
-
-impl RecordBatchStream for MemoryStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/merge.rs b/rust/datafusion/src/physical_plan/merge.rs
deleted file mode 100644
index c66532b73cc..00000000000
--- a/rust/datafusion/src/physical_plan/merge.rs
+++ /dev/null
@@ -1,225 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the merge plan for executing partitions in parallel and then merging the results
-//! into a single partition
-
-use std::any::Any;
-use std::sync::Arc;
-
-use futures::channel::mpsc;
-use futures::sink::SinkExt;
-use futures::stream::StreamExt;
-use futures::Stream;
-
-use async_trait::async_trait;
-
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    datatypes::SchemaRef,
-    error::{ArrowError, Result as ArrowResult},
-};
-
-use super::RecordBatchStream;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::ExecutionPlan;
-use crate::physical_plan::Partitioning;
-
-use super::SendableRecordBatchStream;
-use pin_project_lite::pin_project;
-
-/// Merge execution plan executes partitions in parallel and combines them into a single
-/// partition. No guarantees are made about the order of the resulting partition.
-#[derive(Debug)]
-pub struct MergeExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-}
-
-impl MergeExec {
-    /// Create a new MergeExec
-    pub fn new(input: Arc<dyn ExecutionPlan>) -> Self {
-        MergeExec { input }
-    }
-
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for MergeExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(MergeExec::new(children[0].clone()))),
-            _ => Err(DataFusionError::Internal(
-                "MergeExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // MergeExec produces a single partition
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "MergeExec invalid partition {}",
-                partition
-            )));
-        }
-
-        let input_partitions = self.input.output_partitioning().partition_count();
-        match input_partitions {
-            0 => Err(DataFusionError::Internal(
-                "MergeExec requires at least one input partition".to_owned(),
-            )),
-            1 => {
-                // bypass any threading if there is a single partition
-                self.input.execute(0).await
-            }
-            _ => {
-                // use a stream that allows each sender to put in at
-                // least one result in an attempt to maximize
-                // parallelism.
-                let (sender, receiver) =
-                    mpsc::channel::<ArrowResult<RecordBatch>>(input_partitions);
-
-                // spawn independent tasks whose resulting streams (of batches)
-                // are sent to the channel for consumption.
-                for part_i in 0..input_partitions {
-                    let input = self.input.clone();
-                    let mut sender = sender.clone();
-                    tokio::spawn(async move {
-                        let mut stream = match input.execute(part_i).await {
-                            Err(e) => {
-                                // If send fails, plan being torn
-                                // down, no place to send the error
-                                let arrow_error = ArrowError::ExternalError(Box::new(e));
-                                sender.send(Err(arrow_error)).await.ok();
-                                return;
-                            }
-                            Ok(stream) => stream,
-                        };
-
-                        while let Some(item) = stream.next().await {
-                            // If send fails, plan being torn down,
-                            // there is no place to send the error
-                            sender.send(item).await.ok();
-                        }
-                    });
-                }
-
-                Ok(Box::pin(MergeStream {
-                    input: receiver,
-                    schema: self.schema(),
-                }))
-            }
-        }
-    }
-}
-
-pin_project! {
-    struct MergeStream {
-        schema: SchemaRef,
-        #[pin]
-        input: mpsc::Receiver<ArrowResult<RecordBatch>>,
-    }
-}
-
-impl Stream for MergeStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Option<Self::Item>> {
-        let this = self.project();
-        this.input.poll_next(cx)
-    }
-}
-
-impl RecordBatchStream for MergeStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::common;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::test;
-
-    #[tokio::test]
-    async fn merge() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let num_partitions = 4;
-        let path =
-            test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        // input should have 4 partitions
-        assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
-
-        let merge = MergeExec::new(Arc::new(csv));
-
-        // output of MergeExec should have a single partition
-        assert_eq!(merge.output_partitioning().partition_count(), 1);
-
-        // the result should contain 4 batches (one per input partition)
-        let iter = merge.execute(0).await?;
-        let batches = common::collect(iter).await?;
-        assert_eq!(batches.len(), num_partitions);
-
-        // there should be a total of 100 rows
-        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(row_count, 100);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/mod.rs b/rust/datafusion/src/physical_plan/mod.rs
deleted file mode 100644
index d529e98f75d..00000000000
--- a/rust/datafusion/src/physical_plan/mod.rs
+++ /dev/null
@@ -1,311 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Traits for physical query plan, supporting parallel execution for partitioned relations.
-
-use std::fmt::{Debug, Display};
-use std::sync::Arc;
-use std::{any::Any, pin::Pin};
-
-use crate::execution::context::ExecutionContextState;
-use crate::logical_plan::LogicalPlan;
-use crate::{error::Result, scalar::ScalarValue};
-use arrow::datatypes::{DataType, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use arrow::{array::ArrayRef, datatypes::Field};
-
-use async_trait::async_trait;
-use futures::stream::Stream;
-
-use self::merge::MergeExec;
-
-/// Trait for types that stream [arrow::record_batch::RecordBatch]
-pub trait RecordBatchStream: Stream<Item = ArrowResult<RecordBatch>> {
-    /// Returns the schema of this `RecordBatchStream`.
-    ///
-    /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this
-    /// stream should have the same schema as returned from this method.
-    fn schema(&self) -> SchemaRef;
-}
-
-/// Trait for a stream of record batches.
-pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send + Sync>>;
-
-/// Physical query planner that converts a `LogicalPlan` to an
-/// `ExecutionPlan` suitable for execution.
-pub trait PhysicalPlanner {
-    /// Create a physical plan from a logical plan
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-}
-
-/// Partition-aware execution plan for a relation
-#[async_trait]
-pub trait ExecutionPlan: Debug + Send + Sync {
-    /// Returns the execution plan as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef;
-    /// Specifies the output partitioning scheme of this plan
-    fn output_partitioning(&self) -> Partitioning;
-    /// Specifies the data distribution requirements of all the children for this operator
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
-    }
-    /// Get a list of child execution plans that provide the input for this plan. The returned list
-    /// will be empty for leaf nodes, will contain a single value for unary nodes, or two
-    /// values for binary nodes (such as joins).
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>>;
-    /// Returns a new plan where all children were replaced by new plans.
-    /// The size of `children` must be equal to the size of `ExecutionPlan::children()`.
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// creates an iterator
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream>;
-}
-
-/// Execute the [ExecutionPlan] and collect the results in memory
-pub async fn collect(plan: Arc<dyn ExecutionPlan>) -> Result<Vec<RecordBatch>> {
-    match plan.output_partitioning().partition_count() {
-        0 => Ok(vec![]),
-        1 => {
-            let it = plan.execute(0).await?;
-            common::collect(it).await
-        }
-        _ => {
-            // merge into a single partition
-            let plan = MergeExec::new(plan.clone());
-            // MergeExec must produce a single partition
-            assert_eq!(1, plan.output_partitioning().partition_count());
-            common::collect(plan.execute(0).await?).await
-        }
-    }
-}
-
-/// Execute the [ExecutionPlan] and collect the results in memory
-pub async fn collect_partitioned(
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Vec<Vec<RecordBatch>>> {
-    match plan.output_partitioning().partition_count() {
-        0 => Ok(vec![]),
-        1 => {
-            let it = plan.execute(0).await?;
-            Ok(vec![common::collect(it).await?])
-        }
-        _ => {
-            let mut partitions = vec![];
-            for i in 0..plan.output_partitioning().partition_count() {
-                partitions.push(common::collect(plan.execute(i).await?).await?)
-            }
-            Ok(partitions)
-        }
-    }
-}
-
-/// Partitioning schemes supported by operators.
-#[derive(Debug, Clone)]
-pub enum Partitioning {
-    /// Allocate batches using a round-robin algorithm and the specified number of partitions
-    RoundRobinBatch(usize),
-    /// Allocate rows based on a hash of one of more expressions and the specified
-    /// number of partitions
-    /// This partitioning scheme is not yet fully supported. See [ARROW-11011](https://issues.apache.org/jira/browse/ARROW-11011)
-    Hash(Vec<Arc<dyn PhysicalExpr>>, usize),
-    /// Unknown partitioning scheme with a known number of partitions
-    UnknownPartitioning(usize),
-}
-
-impl Partitioning {
-    /// Returns the number of partitions in this partitioning scheme
-    pub fn partition_count(&self) -> usize {
-        use Partitioning::*;
-        match self {
-            RoundRobinBatch(n) => *n,
-            Hash(_, n) => *n,
-            UnknownPartitioning(n) => *n,
-        }
-    }
-}
-
-/// Distribution schemes
-#[derive(Debug, Clone, PartialEq)]
-pub enum Distribution {
-    /// Unspecified distribution
-    UnspecifiedDistribution,
-    /// A single partition is required
-    SinglePartition,
-}
-
-/// Represents the result from an expression
-#[derive(Clone)]
-pub enum ColumnarValue {
-    /// Array of values
-    Array(ArrayRef),
-    /// A single value
-    Scalar(ScalarValue),
-}
-
-impl ColumnarValue {
-    fn data_type(&self) -> DataType {
-        match self {
-            ColumnarValue::Array(array_value) => array_value.data_type().clone(),
-            ColumnarValue::Scalar(scalar_value) => scalar_value.get_datatype(),
-        }
-    }
-
-    fn into_array(self, num_rows: usize) -> ArrayRef {
-        match self {
-            ColumnarValue::Array(array) => array,
-            ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(num_rows),
-        }
-    }
-}
-
-/// Expression that can be evaluated against a RecordBatch
-/// A Physical expression knows its type, nullability and how to evaluate itself.
-pub trait PhysicalExpr: Send + Sync + Display + Debug {
-    /// Returns the physical expression as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-    /// Get the data type of this expression, given the schema of the input
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType>;
-    /// Determine whether this expression is nullable, given the schema of the input
-    fn nullable(&self, input_schema: &Schema) -> Result<bool>;
-    /// Evaluate an expression against a RecordBatch
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue>;
-}
-
-/// An aggregate expression that:
-/// * knows its resulting field
-/// * knows how to create its accumulator
-/// * knows its accumulator's state's field
-/// * knows the expressions from whose its accumulator will receive values
-pub trait AggregateExpr: Send + Sync + Debug {
-    /// Returns the aggregate expression as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-    /// the field of the final result of this aggregation.
-    fn field(&self) -> Result<Field>;
-
-    /// the accumulator used to accumulate values from the expressions.
-    /// the accumulator expects the same number of arguments as `expressions` and must
-    /// return states with the same description as `state_fields`
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>>;
-
-    /// the fields that encapsulate the Accumulator's state
-    /// the number of fields here equals the number of states that the accumulator contains
-    fn state_fields(&self) -> Result<Vec<Field>>;
-
-    /// expressions that are passed to the Accumulator.
-    /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many.
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>>;
-}
-
-/// An accumulator represents a stateful object that lives throughout the evaluation of multiple rows and
-/// generically accumulates values. An accumulator knows how to:
-/// * update its state from inputs via `update`
-/// * convert its internal state to a vector of scalar values
-/// * update its state from multiple accumulators' states via `merge`
-/// * compute the final value from its internal state via `evaluate`
-pub trait Accumulator: Send + Sync + Debug {
-    /// Returns the state of the accumulator at the end of the accumulation.
-    // in the case of an average on which we track `sum` and `n`, this function should return a vector
-    // of two values, sum and n.
-    fn state(&self) -> Result<Vec<ScalarValue>>;
-
-    /// updates the accumulator's state from a vector of scalars.
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()>;
-
-    /// updates the accumulator's state from a vector of arrays.
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        };
-        (0..values[0].len()).try_for_each(|index| {
-            let v = values
-                .iter()
-                .map(|array| ScalarValue::try_from_array(array, index))
-                .collect::<Result<Vec<_>>>()?;
-            self.update(&v)
-        })
-    }
-
-    /// updates the accumulator's state from a vector of scalars.
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()>;
-
-    /// updates the accumulator's state from a vector of states.
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        };
-        (0..states[0].len()).try_for_each(|index| {
-            let v = states
-                .iter()
-                .map(|array| ScalarValue::try_from_array(array, index))
-                .collect::<Result<Vec<_>>>()?;
-            self.merge(&v)
-        })
-    }
-
-    /// returns its value based on its current state.
-    fn evaluate(&self) -> Result<ScalarValue>;
-}
-
-pub mod aggregates;
-pub mod array_expressions;
-pub mod coalesce_batches;
-pub mod common;
-#[cfg(feature = "crypto_expressions")]
-pub mod crypto_expressions;
-pub mod csv;
-pub mod datetime_expressions;
-pub mod distinct_expressions;
-pub mod empty;
-pub mod explain;
-pub mod expressions;
-pub mod filter;
-pub mod functions;
-pub mod group_scalar;
-pub mod hash_aggregate;
-pub mod hash_join;
-pub mod hash_utils;
-pub mod limit;
-pub mod math_expressions;
-pub mod memory;
-pub mod merge;
-pub mod parquet;
-pub mod planner;
-pub mod projection;
-#[cfg(feature = "regex_expressions")]
-pub mod regex_expressions;
-pub mod repartition;
-pub mod sort;
-pub mod string_expressions;
-pub mod type_coercion;
-pub mod udaf;
-pub mod udf;
-#[cfg(feature = "unicode_expressions")]
-pub mod unicode_expressions;
-pub mod union;
diff --git a/rust/datafusion/src/physical_plan/parquet.rs b/rust/datafusion/src/physical_plan/parquet.rs
deleted file mode 100644
index d41d6968fee..00000000000
--- a/rust/datafusion/src/physical_plan/parquet.rs
+++ /dev/null
@@ -1,1535 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Execution plan for reading Parquet files
-
-use std::fmt;
-use std::fs::File;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::{
-    any::Any,
-    collections::{HashMap, HashSet},
-};
-
-use super::{
-    planner::DefaultPhysicalPlanner, ColumnarValue, PhysicalExpr, RecordBatchStream,
-    SendableRecordBatchStream,
-};
-use crate::{
-    catalog::catalog::MemoryCatalogList,
-    physical_plan::{common, ExecutionPlan, Partitioning},
-};
-use crate::{
-    error::{DataFusionError, Result},
-    execution::context::ExecutionContextState,
-    logical_plan::{Expr, Operator},
-    optimizer::utils,
-    prelude::ExecutionConfig,
-};
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    array::new_null_array,
-    error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{
-    array::{make_array, ArrayData, ArrayRef, BooleanArray, BooleanBufferBuilder},
-    buffer::MutableBuffer,
-    datatypes::{DataType, Field, Schema, SchemaRef},
-};
-use parquet::file::{
-    metadata::RowGroupMetaData,
-    reader::{FileReader, SerializedFileReader},
-    statistics::Statistics as ParquetStatistics,
-};
-
-use fmt::Debug;
-use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
-use tokio::{
-    sync::mpsc::{channel, Receiver, Sender},
-    task,
-};
-use tokio_stream::wrappers::ReceiverStream;
-
-use crate::datasource::datasource::{ColumnStatistics, Statistics};
-use async_trait::async_trait;
-use futures::stream::{Stream, StreamExt};
-
-/// Execution plan for scanning one or more Parquet partitions
-#[derive(Debug, Clone)]
-pub struct ParquetExec {
-    /// Parquet partitions to read
-    partitions: Vec<ParquetPartition>,
-    /// Schema after projection is applied
-    schema: SchemaRef,
-    /// Projection for which columns to load
-    projection: Vec<usize>,
-    /// Batch size
-    batch_size: usize,
-    /// Statistics for the data set (sum of statistics for all partitions)
-    statistics: Statistics,
-    /// Optional predicate builder
-    predicate_builder: Option<RowGroupPredicateBuilder>,
-    /// Optional limit of the number of rows
-    limit: Option<usize>,
-}
-
-/// Represents one partition of a Parquet data set and this currently means one Parquet file.
-///
-/// In the future it would be good to support subsets of files based on ranges of row groups
-/// so that we can better parallelize reads of large files across available cores (see
-/// [ARROW-10995](https://issues.apache.org/jira/browse/ARROW-10995)).
-///
-/// We may also want to support reading Parquet files that are partitioned based on a key and
-/// in this case we would want this partition struct to represent multiple files for a given
-/// partition key (see [ARROW-11019](https://issues.apache.org/jira/browse/ARROW-11019)).
-#[derive(Debug, Clone)]
-pub struct ParquetPartition {
-    /// The Parquet filename for this partition
-    pub filenames: Vec<String>,
-    /// Statistics for this partition
-    pub statistics: Statistics,
-}
-
-impl ParquetExec {
-    /// Create a new Parquet reader execution plan based on the specified Parquet filename or
-    /// directory containing Parquet files
-    pub fn try_from_path(
-        path: &str,
-        projection: Option<Vec<usize>>,
-        predicate: Option<Expr>,
-        batch_size: usize,
-        max_concurrency: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        // build a list of filenames from the specified path, which could be a single file or
-        // a directory containing one or more parquet files
-        let mut filenames: Vec<String> = vec![];
-        common::build_file_list(path, &mut filenames, ".parquet")?;
-        if filenames.is_empty() {
-            Err(DataFusionError::Plan(format!(
-                "No Parquet files found at path {}",
-                path
-            )))
-        } else {
-            let filenames = filenames
-                .iter()
-                .map(|filename| filename.as_str())
-                .collect::<Vec<&str>>();
-            Self::try_from_files(
-                &filenames,
-                projection,
-                predicate,
-                batch_size,
-                max_concurrency,
-                limit,
-            )
-        }
-    }
-
-    /// Create a new Parquet reader execution plan based on the specified list of Parquet
-    /// files
-    pub fn try_from_files(
-        filenames: &[&str],
-        projection: Option<Vec<usize>>,
-        predicate: Option<Expr>,
-        batch_size: usize,
-        max_concurrency: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        // build a list of Parquet partitions with statistics and gather all unique schemas
-        // used in this data set
-        let mut schemas: Vec<Schema> = vec![];
-        let mut partitions = Vec::with_capacity(max_concurrency);
-        let filenames: Vec<String> = filenames.iter().map(|s| s.to_string()).collect();
-        let chunks = split_files(&filenames, max_concurrency);
-        let mut num_rows = 0;
-        let mut total_byte_size = 0;
-        let mut null_counts = Vec::new();
-        let mut limit_exhausted = false;
-        for chunk in chunks {
-            let mut filenames: Vec<String> =
-                chunk.iter().map(|x| x.to_string()).collect();
-            let mut total_files = 0;
-            for filename in &filenames {
-                total_files += 1;
-                let file = File::open(filename)?;
-                let file_reader = Arc::new(SerializedFileReader::new(file)?);
-                let mut arrow_reader = ParquetFileArrowReader::new(file_reader);
-                let meta_data = arrow_reader.get_metadata();
-                // collect all the unique schemas in this data set
-                let schema = arrow_reader.get_schema()?;
-                let num_fields = schema.fields().len();
-                if schemas.is_empty() || schema != schemas[0] {
-                    schemas.push(schema);
-                    null_counts = vec![0; num_fields]
-                }
-                for row_group_meta in meta_data.row_groups() {
-                    num_rows += row_group_meta.num_rows();
-                    total_byte_size += row_group_meta.total_byte_size();
-
-                    // Currently assumes every Parquet file has same schema
-                    // https://issues.apache.org/jira/browse/ARROW-11017
-                    let columns_null_counts = row_group_meta
-                        .columns()
-                        .iter()
-                        .flat_map(|c| c.statistics().map(|stats| stats.null_count()));
-
-                    for (i, cnt) in columns_null_counts.enumerate() {
-                        null_counts[i] += cnt
-                    }
-                    if limit.map(|x| num_rows >= x as i64).unwrap_or(false) {
-                        limit_exhausted = true;
-                        break;
-                    }
-                }
-            }
-
-            let column_stats = null_counts
-                .iter()
-                .map(|null_count| ColumnStatistics {
-                    null_count: Some(*null_count as usize),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                })
-                .collect();
-
-            let statistics = Statistics {
-                num_rows: Some(num_rows as usize),
-                total_byte_size: Some(total_byte_size as usize),
-                column_statistics: Some(column_stats),
-            };
-            // remove files that are not needed in case of limit
-            filenames.truncate(total_files);
-            partitions.push(ParquetPartition {
-                filenames,
-                statistics,
-            });
-            if limit_exhausted {
-                break;
-            }
-        }
-
-        // we currently get the schema information from the first file rather than do
-        // schema merging and this is a limitation.
-        // See https://issues.apache.org/jira/browse/ARROW-11017
-        if schemas.len() > 1 {
-            return Err(DataFusionError::Plan(format!(
-                "The Parquet files have {} different schemas and DataFusion does \
-                not yet support schema merging",
-                schemas.len()
-            )));
-        }
-        let schema = schemas[0].clone();
-        let predicate_builder = predicate.and_then(|predicate_expr| {
-            RowGroupPredicateBuilder::try_new(&predicate_expr, schema.clone()).ok()
-        });
-
-        Ok(Self::new(
-            partitions,
-            schema,
-            projection,
-            predicate_builder,
-            batch_size,
-            limit,
-        ))
-    }
-
-    /// Create a new Parquet reader execution plan with provided partitions and schema
-    pub fn new(
-        partitions: Vec<ParquetPartition>,
-        schema: Schema,
-        projection: Option<Vec<usize>>,
-        predicate_builder: Option<RowGroupPredicateBuilder>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Self {
-        let projection = match projection {
-            Some(p) => p,
-            None => (0..schema.fields().len()).collect(),
-        };
-
-        let projected_schema = Schema::new(
-            projection
-                .iter()
-                .map(|i| schema.field(*i).clone())
-                .collect(),
-        );
-
-        // sum the statistics
-        let mut num_rows: Option<usize> = None;
-        let mut total_byte_size: Option<usize> = None;
-        let mut null_counts: Vec<usize> = vec![0; schema.fields().len()];
-        let mut has_null_counts = false;
-        for part in &partitions {
-            if let Some(n) = part.statistics.num_rows {
-                num_rows = Some(num_rows.unwrap_or(0) + n)
-            }
-            if let Some(n) = part.statistics.total_byte_size {
-                total_byte_size = Some(total_byte_size.unwrap_or(0) + n)
-            }
-            if let Some(x) = &part.statistics.column_statistics {
-                let part_nulls: Vec<Option<usize>> =
-                    x.iter().map(|c| c.null_count).collect();
-                has_null_counts = true;
-
-                for &i in projection.iter() {
-                    null_counts[i] = part_nulls[i].unwrap_or(0);
-                }
-            }
-        }
-        let column_stats = if has_null_counts {
-            Some(
-                null_counts
-                    .iter()
-                    .map(|null_count| ColumnStatistics {
-                        null_count: Some(*null_count),
-                        distinct_count: None,
-                        max_value: None,
-                        min_value: None,
-                    })
-                    .collect(),
-            )
-        } else {
-            None
-        };
-
-        let statistics = Statistics {
-            num_rows,
-            total_byte_size,
-            column_statistics: column_stats,
-        };
-        Self {
-            partitions,
-            schema: Arc::new(projected_schema),
-            projection,
-            predicate_builder,
-            batch_size,
-            statistics,
-            limit,
-        }
-    }
-
-    /// Parquet partitions to read
-    pub fn partitions(&self) -> &[ParquetPartition] {
-        &self.partitions
-    }
-
-    /// Projection for which columns to load
-    pub fn projection(&self) -> &[usize] {
-        &self.projection
-    }
-
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-
-    /// Statistics for the data set (sum of statistics for all partitions)
-    pub fn statistics(&self) -> &Statistics {
-        &self.statistics
-    }
-}
-
-impl ParquetPartition {
-    /// Create a new parquet partition
-    pub fn new(filenames: Vec<String>, statistics: Statistics) -> Self {
-        Self {
-            filenames,
-            statistics,
-        }
-    }
-
-    /// The Parquet filename for this partition
-    pub fn filenames(&self) -> &[String] {
-        &self.filenames
-    }
-
-    /// Statistics for this partition
-    pub fn statistics(&self) -> &Statistics {
-        &self.statistics
-    }
-}
-
-#[derive(Debug, Clone)]
-/// Predicate builder used for generating of predicate functions, used to filter row group metadata
-pub struct RowGroupPredicateBuilder {
-    parquet_schema: Schema,
-    predicate_expr: Arc<dyn PhysicalExpr>,
-    stat_column_req: Vec<(String, StatisticsType, Field)>,
-}
-
-impl RowGroupPredicateBuilder {
-    /// Try to create a new instance of PredicateExpressionBuilder.
-    /// This will translate the filter expression into a statistics predicate expression
-    /// (for example (column / 2) = 4 becomes (column_min / 2) <= 4 && 4 <= (column_max / 2)),
-    /// then convert it to a DataFusion PhysicalExpression and cache it for later use by build_row_group_predicate.
-    pub fn try_new(expr: &Expr, parquet_schema: Schema) -> Result<Self> {
-        // build predicate expression once
-        let mut stat_column_req = Vec::<(String, StatisticsType, Field)>::new();
-        let logical_predicate_expr =
-            build_predicate_expression(expr, &parquet_schema, &mut stat_column_req)?;
-        // println!(
-        //     "RowGroupPredicateBuilder::try_new, logical_predicate_expr: {:?}",
-        //     logical_predicate_expr
-        // );
-        // build physical predicate expression
-        let stat_fields = stat_column_req
-            .iter()
-            .map(|(_, _, f)| f.clone())
-            .collect::<Vec<_>>();
-        let stat_schema = Schema::new(stat_fields);
-        let execution_context_state = ExecutionContextState {
-            catalog_list: Arc::new(MemoryCatalogList::new()),
-            scalar_functions: HashMap::new(),
-            var_provider: HashMap::new(),
-            aggregate_functions: HashMap::new(),
-            config: ExecutionConfig::new(),
-        };
-        let predicate_expr = DefaultPhysicalPlanner::default().create_physical_expr(
-            &logical_predicate_expr,
-            &stat_schema,
-            &execution_context_state,
-        )?;
-        // println!(
-        //     "RowGroupPredicateBuilder::try_new, predicate_expr: {:?}",
-        //     predicate_expr
-        // );
-        Ok(Self {
-            parquet_schema,
-            predicate_expr,
-            stat_column_req,
-        })
-    }
-
-    /// Generate a predicate function used to filter row group metadata.
-    /// This function takes a list of all row groups as parameter,
-    /// so that DataFusion's physical expressions can be re-used by
-    /// generating a RecordBatch, containing statistics arrays,
-    /// on which the physical predicate expression is executed to generate a row group filter array.
-    /// The generated filter array is then used in the returned closure to filter row groups.
-    pub fn build_row_group_predicate(
-        &self,
-        row_group_metadata: &[RowGroupMetaData],
-    ) -> Box<dyn Fn(&RowGroupMetaData, usize) -> bool> {
-        // build statistics record batch
-        let predicate_result = build_statistics_record_batch(
-            row_group_metadata,
-            &self.parquet_schema,
-            &self.stat_column_req,
-        )
-        .and_then(|statistics_batch| {
-            // execute predicate expression
-            self.predicate_expr.evaluate(&statistics_batch)
-        })
-        .and_then(|v| match v {
-            ColumnarValue::Array(array) => Ok(array),
-            ColumnarValue::Scalar(_) => Err(DataFusionError::Plan(
-                "predicate expression didn't return an array".to_string(),
-            )),
-        });
-
-        let predicate_array = match predicate_result {
-            Ok(array) => array,
-            // row group filter array could not be built
-            // return a closure which will not filter out any row groups
-            _ => return Box::new(|_r, _i| true),
-        };
-
-        let predicate_array = predicate_array.as_any().downcast_ref::<BooleanArray>();
-        match predicate_array {
-            // return row group predicate function
-            Some(array) => {
-                // when the result of the predicate expression for a row group is null / undefined,
-                // e.g. due to missing statistics, this row group can't be filtered out,
-                // so replace with true
-                let predicate_values =
-                    array.iter().map(|x| x.unwrap_or(true)).collect::<Vec<_>>();
-                Box::new(move |_, i| predicate_values[i])
-            }
-            // predicate result is not a BooleanArray
-            // return a closure which will not filter out any row groups
-            _ => Box::new(|_r, _i| true),
-        }
-    }
-}
-
-/// Build a RecordBatch from a list of RowGroupMetadata structs,
-/// creating arrays, one for each statistics column,
-/// as requested in the stat_column_req parameter.
-fn build_statistics_record_batch(
-    row_groups: &[RowGroupMetaData],
-    parquet_schema: &Schema,
-    stat_column_req: &[(String, StatisticsType, Field)],
-) -> Result<RecordBatch> {
-    let mut fields = Vec::<Field>::new();
-    let mut arrays = Vec::<ArrayRef>::new();
-    for (column_name, statistics_type, stat_field) in stat_column_req {
-        if let Some((column_index, _)) = parquet_schema.column_with_name(column_name) {
-            let statistics = row_groups
-                .iter()
-                .map(|g| g.column(column_index).statistics())
-                .collect::<Vec<_>>();
-            let array = build_statistics_array(
-                &statistics,
-                *statistics_type,
-                stat_field.data_type(),
-            );
-            fields.push(stat_field.clone());
-            arrays.push(array);
-        }
-    }
-    let schema = Arc::new(Schema::new(fields));
-    RecordBatch::try_new(schema, arrays)
-        .map_err(|err| DataFusionError::Plan(err.to_string()))
-}
-
-struct StatisticsExpressionBuilder<'a> {
-    column_name: String,
-    column_expr: &'a Expr,
-    scalar_expr: &'a Expr,
-    parquet_field: &'a Field,
-    stat_column_req: &'a mut Vec<(String, StatisticsType, Field)>,
-    reverse_operator: bool,
-}
-
-impl<'a> StatisticsExpressionBuilder<'a> {
-    fn try_new(
-        left: &'a Expr,
-        right: &'a Expr,
-        parquet_schema: &'a Schema,
-        stat_column_req: &'a mut Vec<(String, StatisticsType, Field)>,
-    ) -> Result<Self> {
-        // find column name; input could be a more complicated expression
-        let mut left_columns = HashSet::<String>::new();
-        utils::expr_to_column_names(left, &mut left_columns)?;
-        let mut right_columns = HashSet::<String>::new();
-        utils::expr_to_column_names(right, &mut right_columns)?;
-        let (column_expr, scalar_expr, column_names, reverse_operator) =
-            match (left_columns.len(), right_columns.len()) {
-                (1, 0) => (left, right, left_columns, false),
-                (0, 1) => (right, left, right_columns, true),
-                _ => {
-                    // if more than one column used in expression - not supported
-                    return Err(DataFusionError::Plan(
-                        "Multi-column expressions are not currently supported"
-                            .to_string(),
-                    ));
-                }
-            };
-        let column_name = column_names.iter().next().unwrap().clone();
-        let field = match parquet_schema.column_with_name(&column_name) {
-            Some((_, f)) => f,
-            _ => {
-                // field not found in parquet schema
-                return Err(DataFusionError::Plan(
-                    "Field not found in parquet schema".to_string(),
-                ));
-            }
-        };
-
-        Ok(Self {
-            column_name,
-            column_expr,
-            scalar_expr,
-            parquet_field: field,
-            stat_column_req,
-            reverse_operator,
-        })
-    }
-
-    fn correct_operator(&self, op: Operator) -> Operator {
-        if !self.reverse_operator {
-            return op;
-        }
-
-        match op {
-            Operator::Lt => Operator::Gt,
-            Operator::Gt => Operator::Lt,
-            Operator::LtEq => Operator::GtEq,
-            Operator::GtEq => Operator::LtEq,
-            _ => op,
-        }
-    }
-
-    // fn column_expr(&self) -> &Expr {
-    //     self.column_expr
-    // }
-
-    fn scalar_expr(&self) -> &Expr {
-        self.scalar_expr
-    }
-
-    // fn column_name(&self) -> &String {
-    //     &self.column_name
-    // }
-
-    fn is_stat_column_missing(&self, statistics_type: StatisticsType) -> bool {
-        self.stat_column_req
-            .iter()
-            .filter(|(c, t, _f)| c == &self.column_name && t == &statistics_type)
-            .count()
-            == 0
-    }
-
-    fn stat_column_expr(
-        &mut self,
-        stat_type: StatisticsType,
-        suffix: &str,
-    ) -> Result<Expr> {
-        let stat_column_name = format!("{}_{}", self.column_name, suffix);
-        let stat_field = Field::new(
-            stat_column_name.as_str(),
-            self.parquet_field.data_type().clone(),
-            self.parquet_field.is_nullable(),
-        );
-        if self.is_stat_column_missing(stat_type) {
-            // only add statistics column if not previously added
-            self.stat_column_req
-                .push((self.column_name.clone(), stat_type, stat_field));
-        }
-        rewrite_column_expr(
-            self.column_expr,
-            self.column_name.as_str(),
-            stat_column_name.as_str(),
-        )
-    }
-
-    fn min_column_expr(&mut self) -> Result<Expr> {
-        self.stat_column_expr(StatisticsType::Min, "min")
-    }
-
-    fn max_column_expr(&mut self) -> Result<Expr> {
-        self.stat_column_expr(StatisticsType::Max, "max")
-    }
-}
-
-/// replaces a column with an old name with a new name in an expression
-fn rewrite_column_expr(
-    expr: &Expr,
-    column_old_name: &str,
-    column_new_name: &str,
-) -> Result<Expr> {
-    let expressions = utils::expr_sub_expressions(&expr)?;
-    let expressions = expressions
-        .iter()
-        .map(|e| rewrite_column_expr(e, column_old_name, column_new_name))
-        .collect::<Result<Vec<_>>>()?;
-
-    if let Expr::Column(name) = expr {
-        if name == column_old_name {
-            return Ok(Expr::Column(column_new_name.to_string()));
-        }
-    }
-    utils::rewrite_expression(&expr, &expressions)
-}
-
-/// Translate logical filter expression into parquet statistics predicate expression
-fn build_predicate_expression(
-    expr: &Expr,
-    parquet_schema: &Schema,
-    stat_column_req: &mut Vec<(String, StatisticsType, Field)>,
-) -> Result<Expr> {
-    use crate::logical_plan;
-    // predicate expression can only be a binary expression
-    let (left, op, right) = match expr {
-        Expr::BinaryExpr { left, op, right } => (left, *op, right),
-        _ => {
-            // unsupported expression - replace with TRUE
-            // this can still be useful when multiple conditions are joined using AND
-            // such as: column > 10 AND TRUE
-            return Ok(logical_plan::lit(true));
-        }
-    };
-
-    if op == Operator::And || op == Operator::Or {
-        let left_expr =
-            build_predicate_expression(left, parquet_schema, stat_column_req)?;
-        let right_expr =
-            build_predicate_expression(right, parquet_schema, stat_column_req)?;
-        return Ok(logical_plan::binary_expr(left_expr, op, right_expr));
-    }
-
-    let expr_builder = StatisticsExpressionBuilder::try_new(
-        left,
-        right,
-        parquet_schema,
-        stat_column_req,
-    );
-    let mut expr_builder = match expr_builder {
-        Ok(builder) => builder,
-        // allow partial failure in predicate expression generation
-        // this can still produce a useful predicate when multiple conditions are joined using AND
-        Err(_) => {
-            return Ok(logical_plan::lit(true));
-        }
-    };
-    let corrected_op = expr_builder.correct_operator(op);
-    let statistics_expr = match corrected_op {
-        Operator::Eq => {
-            // column = literal => (min, max) = literal => min <= literal && literal <= max
-            // (column / 2) = 4 => (column_min / 2) <= 4 && 4 <= (column_max / 2)
-            let min_column_expr = expr_builder.min_column_expr()?;
-            let max_column_expr = expr_builder.max_column_expr()?;
-            min_column_expr
-                .lt_eq(expr_builder.scalar_expr().clone())
-                .and(expr_builder.scalar_expr().clone().lt_eq(max_column_expr))
-        }
-        Operator::Gt => {
-            // column > literal => (min, max) > literal => max > literal
-            expr_builder
-                .max_column_expr()?
-                .gt(expr_builder.scalar_expr().clone())
-        }
-        Operator::GtEq => {
-            // column >= literal => (min, max) >= literal => max >= literal
-            expr_builder
-                .max_column_expr()?
-                .gt_eq(expr_builder.scalar_expr().clone())
-        }
-        Operator::Lt => {
-            // column < literal => (min, max) < literal => min < literal
-            expr_builder
-                .min_column_expr()?
-                .lt(expr_builder.scalar_expr().clone())
-        }
-        Operator::LtEq => {
-            // column <= literal => (min, max) <= literal => min <= literal
-            expr_builder
-                .min_column_expr()?
-                .lt_eq(expr_builder.scalar_expr().clone())
-        }
-        // other expressions are not supported
-        _ => logical_plan::lit(true),
-    };
-    Ok(statistics_expr)
-}
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-enum StatisticsType {
-    Min,
-    Max,
-}
-
-fn build_statistics_array(
-    statistics: &[Option<&ParquetStatistics>],
-    statistics_type: StatisticsType,
-    data_type: &DataType,
-) -> ArrayRef {
-    let statistics_count = statistics.len();
-    let first_group_stats = statistics.iter().find(|s| s.is_some());
-    let first_group_stats = if let Some(Some(statistics)) = first_group_stats {
-        // found first row group with statistics defined
-        statistics
-    } else {
-        // no row group has statistics defined
-        return new_null_array(data_type, statistics_count);
-    };
-
-    let (data_size, arrow_type) = match first_group_stats {
-        ParquetStatistics::Int32(_) => (std::mem::size_of::<i32>(), DataType::Int32),
-        ParquetStatistics::Int64(_) => (std::mem::size_of::<i64>(), DataType::Int64),
-        ParquetStatistics::Float(_) => (std::mem::size_of::<f32>(), DataType::Float32),
-        ParquetStatistics::Double(_) => (std::mem::size_of::<f64>(), DataType::Float64),
-        ParquetStatistics::ByteArray(_) if data_type == &DataType::Utf8 => {
-            (0, DataType::Utf8)
-        }
-        _ => {
-            // type of statistics not supported
-            return new_null_array(data_type, statistics_count);
-        }
-    };
-
-    let statistics = statistics.iter().map(|s| {
-        s.filter(|s| s.has_min_max_set())
-            .map(|s| match statistics_type {
-                StatisticsType::Min => s.min_bytes(),
-                StatisticsType::Max => s.max_bytes(),
-            })
-    });
-
-    if arrow_type == DataType::Utf8 {
-        let data_size = statistics
-            .clone()
-            .map(|x| x.map(|b| b.len()).unwrap_or(0))
-            .sum();
-        let mut builder =
-            arrow::array::StringBuilder::with_capacity(statistics_count, data_size);
-        let string_statistics =
-            statistics.map(|x| x.and_then(|bytes| std::str::from_utf8(bytes).ok()));
-        for maybe_string in string_statistics {
-            match maybe_string {
-                Some(string_value) => builder.append_value(string_value).unwrap(),
-                None => builder.append_null().unwrap(),
-            };
-        }
-        return Arc::new(builder.finish());
-    }
-
-    let mut data_buffer = MutableBuffer::new(statistics_count * data_size);
-    let mut bitmap_builder = BooleanBufferBuilder::new(statistics_count);
-    let mut null_count = 0;
-    for s in statistics {
-        if let Some(stat_data) = s {
-            bitmap_builder.append(true);
-            data_buffer.extend_from_slice(stat_data);
-        } else {
-            bitmap_builder.append(false);
-            data_buffer.resize(data_buffer.len() + data_size, 0);
-            null_count += 1;
-        }
-    }
-
-    let mut builder = ArrayData::builder(arrow_type)
-        .len(statistics_count)
-        .add_buffer(data_buffer.into());
-    if null_count > 0 {
-        builder = builder.null_bit_buffer(bitmap_builder.finish());
-    }
-    let array_data = builder.build();
-    let statistics_array = make_array(array_data);
-    if statistics_array.data_type() == data_type {
-        return statistics_array;
-    }
-    // cast statistics array to required data type
-    arrow::compute::cast(&statistics_array, data_type)
-        .unwrap_or_else(|_| new_null_array(data_type, statistics_count))
-}
-
-#[async_trait]
-impl ExecutionPlan for ParquetExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partitions.len())
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // because the parquet implementation is not thread-safe, it is necessary to execute
-        // on a thread and communicate with channels
-        let (response_tx, response_rx): (
-            Sender<ArrowResult<RecordBatch>>,
-            Receiver<ArrowResult<RecordBatch>>,
-        ) = channel(2);
-
-        let filenames = self.partitions[partition].filenames.clone();
-        let projection = self.projection.clone();
-        let predicate_builder = self.predicate_builder.clone();
-        let batch_size = self.batch_size;
-        let limit = self.limit;
-
-        task::spawn_blocking(move || {
-            if let Err(e) = read_files(
-                &filenames,
-                &projection,
-                &predicate_builder,
-                batch_size,
-                response_tx,
-                limit,
-            ) {
-                println!("Parquet reader thread terminated due to error: {:?}", e);
-            }
-        });
-
-        Ok(Box::pin(ParquetStream {
-            schema: self.schema.clone(),
-            inner: ReceiverStream::new(response_rx),
-        }))
-    }
-}
-
-fn send_result(
-    response_tx: &Sender<ArrowResult<RecordBatch>>,
-    result: ArrowResult<RecordBatch>,
-) -> Result<()> {
-    // Note this function is running on its own blockng tokio thread so blocking here is ok.
-    response_tx
-        .blocking_send(result)
-        .map_err(|e| DataFusionError::Execution(e.to_string()))?;
-    Ok(())
-}
-
-fn read_files(
-    filenames: &[String],
-    projection: &[usize],
-    predicate_builder: &Option<RowGroupPredicateBuilder>,
-    batch_size: usize,
-    response_tx: Sender<ArrowResult<RecordBatch>>,
-    limit: Option<usize>,
-) -> Result<()> {
-    let mut total_rows = 0;
-    'outer: for filename in filenames {
-        let file = File::open(&filename)?;
-        let mut file_reader = SerializedFileReader::new(file)?;
-        if let Some(predicate_builder) = predicate_builder {
-            let row_group_predicate = predicate_builder
-                .build_row_group_predicate(file_reader.metadata().row_groups());
-            file_reader.filter_row_groups(&row_group_predicate);
-        }
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-        let mut batch_reader = arrow_reader
-            .get_record_reader_by_columns(projection.to_owned(), batch_size)?;
-        loop {
-            match batch_reader.next() {
-                Some(Ok(batch)) => {
-                    //println!("ParquetExec got new batch from {}", filename);
-                    total_rows += batch.num_rows();
-                    send_result(&response_tx, Ok(batch))?;
-                    if limit.map(|l| total_rows >= l).unwrap_or(false) {
-                        break 'outer;
-                    }
-                }
-                None => {
-                    break;
-                }
-                Some(Err(e)) => {
-                    let err_msg = format!(
-                        "Error reading batch from {}: {}",
-                        filename,
-                        e.to_string()
-                    );
-                    // send error to operator
-                    send_result(
-                        &response_tx,
-                        Err(ArrowError::ParquetError(err_msg.clone())),
-                    )?;
-                    // terminate thread with error
-                    return Err(DataFusionError::Execution(err_msg));
-                }
-            }
-        }
-    }
-
-    // finished reading files (dropping response_tx will close
-    // channel)
-    Ok(())
-}
-
-fn split_files(filenames: &[String], n: usize) -> Vec<&[String]> {
-    let mut chunk_size = filenames.len() / n;
-    if filenames.len() % n > 0 {
-        chunk_size += 1;
-    }
-    filenames.chunks(chunk_size).collect()
-}
-
-struct ParquetStream {
-    schema: SchemaRef,
-    inner: ReceiverStream<ArrowResult<RecordBatch>>,
-}
-
-impl Stream for ParquetStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.inner.poll_next_unpin(cx)
-    }
-}
-
-impl RecordBatchStream for ParquetStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::array::{Int32Array, StringArray};
-    use futures::StreamExt;
-    use parquet::basic::Type as PhysicalType;
-    use parquet::schema::types::SchemaDescPtr;
-
-    #[test]
-    fn test_split_files() {
-        let filenames = vec![
-            "a".to_string(),
-            "b".to_string(),
-            "c".to_string(),
-            "d".to_string(),
-            "e".to_string(),
-        ];
-
-        let chunks = split_files(&filenames, 1);
-        assert_eq!(1, chunks.len());
-        assert_eq!(5, chunks[0].len());
-
-        let chunks = split_files(&filenames, 2);
-        assert_eq!(2, chunks.len());
-        assert_eq!(3, chunks[0].len());
-        assert_eq!(2, chunks[1].len());
-
-        let chunks = split_files(&filenames, 5);
-        assert_eq!(5, chunks.len());
-        assert_eq!(1, chunks[0].len());
-        assert_eq!(1, chunks[1].len());
-        assert_eq!(1, chunks[2].len());
-        assert_eq!(1, chunks[3].len());
-        assert_eq!(1, chunks[4].len());
-
-        let chunks = split_files(&filenames, 123);
-        assert_eq!(5, chunks.len());
-        assert_eq!(1, chunks[0].len());
-        assert_eq!(1, chunks[1].len());
-        assert_eq!(1, chunks[2].len());
-        assert_eq!(1, chunks[3].len());
-        assert_eq!(1, chunks[4].len());
-    }
-
-    #[tokio::test]
-    async fn test() -> Result<()> {
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let filename = format!("{}/alltypes_plain.parquet", testdata);
-        let parquet_exec = ParquetExec::try_from_path(
-            &filename,
-            Some(vec![0, 1, 2]),
-            None,
-            1024,
-            4,
-            None,
-        )?;
-        assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
-
-        let mut results = parquet_exec.execute(0).await?;
-        let batch = results.next().await.unwrap()?;
-
-        assert_eq!(8, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        let schema = batch.schema();
-        let field_names: Vec<&str> =
-            schema.fields().iter().map(|f| f.name().as_str()).collect();
-        assert_eq!(vec!["id", "bool_col", "tinyint_col"], field_names);
-
-        let batch = results.next().await;
-        assert!(batch.is_none());
-
-        let batch = results.next().await;
-        assert!(batch.is_none());
-
-        let batch = results.next().await;
-        assert!(batch.is_none());
-
-        Ok(())
-    }
-
-    #[test]
-    fn build_statistics_array_int32() {
-        // build row group metadata array
-        let s1 = ParquetStatistics::int32(None, Some(10), None, 0, false);
-        let s2 = ParquetStatistics::int32(Some(2), Some(20), None, 0, false);
-        let s3 = ParquetStatistics::int32(Some(3), Some(30), None, 0, false);
-        let statistics = vec![Some(&s1), Some(&s2), Some(&s3)];
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &DataType::Int32);
-        let int32_array = statistics_array
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        let int32_vec = int32_array.into_iter().collect::<Vec<_>>();
-        assert_eq!(int32_vec, vec![None, Some(2), Some(3)]);
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Max, &DataType::Int32);
-        let int32_array = statistics_array
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        let int32_vec = int32_array.into_iter().collect::<Vec<_>>();
-        // here the first max value is None and not the Some(10) value which was actually set
-        // because the min value is None
-        assert_eq!(int32_vec, vec![None, Some(20), Some(30)]);
-    }
-
-    #[test]
-    fn build_statistics_array_utf8() {
-        // build row group metadata array
-        let s1 = ParquetStatistics::byte_array(None, Some("10".into()), None, 0, false);
-        let s2 = ParquetStatistics::byte_array(
-            Some("2".into()),
-            Some("20".into()),
-            None,
-            0,
-            false,
-        );
-        let s3 = ParquetStatistics::byte_array(
-            Some("3".into()),
-            Some("30".into()),
-            None,
-            0,
-            false,
-        );
-        let statistics = vec![Some(&s1), Some(&s2), Some(&s3)];
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &DataType::Utf8);
-        let string_array = statistics_array
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        let string_vec = string_array.into_iter().collect::<Vec<_>>();
-        assert_eq!(string_vec, vec![None, Some("2"), Some("3")]);
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Max, &DataType::Utf8);
-        let string_array = statistics_array
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        let string_vec = string_array.into_iter().collect::<Vec<_>>();
-        // here the first max value is None and not the Some("10") value which was actually set
-        // because the min value is None
-        assert_eq!(string_vec, vec![None, Some("20"), Some("30")]);
-    }
-
-    #[test]
-    fn build_statistics_array_empty_stats() {
-        let data_type = DataType::Int32;
-        let statistics = vec![];
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &data_type);
-        assert_eq!(statistics_array.len(), 0);
-
-        let statistics = vec![None, None];
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &data_type);
-        assert_eq!(statistics_array.len(), statistics.len());
-        assert_eq!(statistics_array.data_type(), &data_type);
-        for i in 0..statistics_array.len() {
-            assert_eq!(statistics_array.is_null(i), true);
-            assert_eq!(statistics_array.is_valid(i), false);
-        }
-    }
-
-    #[test]
-    fn build_statistics_array_unsupported_type() {
-        // boolean is not currently a supported type for statistics
-        let s1 = ParquetStatistics::boolean(Some(false), Some(true), None, 0, false);
-        let s2 = ParquetStatistics::boolean(Some(false), Some(true), None, 0, false);
-        let statistics = vec![Some(&s1), Some(&s2)];
-        let data_type = DataType::Boolean;
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &data_type);
-        assert_eq!(statistics_array.len(), statistics.len());
-        assert_eq!(statistics_array.data_type(), &data_type);
-        for i in 0..statistics_array.len() {
-            assert_eq!(statistics_array.is_null(i), true);
-            assert_eq!(statistics_array.is_valid(i), false);
-        }
-    }
-
-    #[test]
-    fn row_group_predicate_eq() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_min LtEq Int32(1) And Int32(1) LtEq #c1_max";
-
-        // test column on the left
-        let expr = col("c1").eq(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        // test column on the right
-        let expr = lit(1).eq(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_gt() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_max Gt Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").gt(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        // test column on the right
-        let expr = lit(1).lt(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_gt_eq() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_max GtEq Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").gt_eq(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-        // test column on the right
-        let expr = lit(1).lt_eq(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_lt() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_min Lt Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").lt(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        // test column on the right
-        let expr = lit(1).gt(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_lt_eq() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_min LtEq Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").lt_eq(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-        // test column on the right
-        let expr = lit(1).gt_eq(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_and() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-            Field::new("c3", DataType::Int32, false),
-        ]);
-        // test AND operator joining supported c1 < 1 expression and unsupported c2 > c3 expression
-        let expr = col("c1").lt(lit(1)).and(col("c2").lt(col("c3")));
-        let expected_expr = "#c1_min Lt Int32(1) And Boolean(true)";
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_or() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]);
-        // test OR operator joining supported c1 < 1 expression and unsupported c2 % 2 expression
-        let expr = col("c1").lt(lit(1)).or(col("c2").modulus(lit(2)));
-        let expected_expr = "#c1_min Lt Int32(1) Or Boolean(true)";
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_stat_column_req() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]);
-        let mut stat_column_req = vec![];
-        // c1 < 1 and (c2 = 2 or c2 = 3)
-        let expr = col("c1")
-            .lt(lit(1))
-            .and(col("c2").eq(lit(2)).or(col("c2").eq(lit(3))));
-        let expected_expr = "#c1_min Lt Int32(1) And #c2_min LtEq Int32(2) And Int32(2) LtEq #c2_max Or #c2_min LtEq Int32(3) And Int32(3) LtEq #c2_max";
-        let predicate_expr =
-            build_predicate_expression(&expr, &schema, &mut stat_column_req)?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-        // c1 < 1 should add c1_min
-        let c1_min_field = Field::new("c1_min", DataType::Int32, false);
-        assert_eq!(
-            stat_column_req[0],
-            ("c1".to_owned(), StatisticsType::Min, c1_min_field)
-        );
-        // c2 = 2 should add c2_min and c2_max
-        let c2_min_field = Field::new("c2_min", DataType::Int32, false);
-        assert_eq!(
-            stat_column_req[1],
-            ("c2".to_owned(), StatisticsType::Min, c2_min_field)
-        );
-        let c2_max_field = Field::new("c2_max", DataType::Int32, false);
-        assert_eq!(
-            stat_column_req[2],
-            ("c2".to_owned(), StatisticsType::Max, c2_max_field)
-        );
-        // c2 = 3 shouldn't add any new statistics fields
-        assert_eq!(stat_column_req.len(), 3);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_simple_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // int > 1 => c1_max > 1
-        let expr = col("c1").gt(lit(15));
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-
-        let schema_descr = get_test_schema_descr(vec![("c1", PhysicalType::INT32)]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(Some(1), Some(10), None, 0, false)],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(Some(11), Some(20), None, 0, false)],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        assert_eq!(row_group_filter, vec![false, true]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_missing_stats() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // int > 1 => c1_max > 1
-        let expr = col("c1").gt(lit(15));
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-
-        let schema_descr = get_test_schema_descr(vec![("c1", PhysicalType::INT32)]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(None, None, None, 0, false)],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(Some(11), Some(20), None, 0, false)],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        // missing statistics for first row group mean that the result from the predicate expression
-        // is null / undefined so the first row group can't be filtered out
-        assert_eq!(row_group_filter, vec![true, true]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_partial_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // test row group predicate with partially supported expression
-        // int > 1 and int % 2 => c1_max > 1 and true
-        let expr = col("c1").gt(lit(15)).and(col("c2").modulus(lit(2)));
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema.clone())?;
-
-        let schema_descr = get_test_schema_descr(vec![
-            ("c1", PhysicalType::INT32),
-            ("c2", PhysicalType::INT32),
-        ]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(1), Some(10), None, 0, false),
-                ParquetStatistics::int32(Some(1), Some(10), None, 0, false),
-            ],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(11), Some(20), None, 0, false),
-                ParquetStatistics::int32(Some(11), Some(20), None, 0, false),
-            ],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        // the first row group is still filtered out because the predicate expression can be partially evaluated
-        // when conditions are joined using AND
-        assert_eq!(row_group_filter, vec![false, true]);
-
-        // if conditions in predicate are joined with OR and an unsupported expression is used
-        // this bypasses the entire predicate expression and no row groups are filtered out
-        let expr = col("c1").gt(lit(15)).or(col("c2").modulus(lit(2)));
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        assert_eq!(row_group_filter, vec![true, true]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_unsupported_type() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // test row group predicate with unsupported statistics type (boolean)
-        // where a null array is generated for some statistics columns
-        // int > 1 and bool = true => c1_max > 1 and null
-        let expr = col("c1").gt(lit(15)).and(col("c2").eq(lit(true)));
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Boolean, false),
-        ]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-
-        let schema_descr = get_test_schema_descr(vec![
-            ("c1", PhysicalType::INT32),
-            ("c2", PhysicalType::BOOLEAN),
-        ]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(1), Some(10), None, 0, false),
-                ParquetStatistics::boolean(Some(false), Some(true), None, 0, false),
-            ],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(11), Some(20), None, 0, false),
-                ParquetStatistics::boolean(Some(false), Some(true), None, 0, false),
-            ],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        // no row group is filtered out because the predicate expression can't be evaluated
-        // when a null array is generated for a statistics column,
-        // because the null values propagate to the end result, making the predicate result undefined
-        assert_eq!(row_group_filter, vec![true, true]);
-
-        Ok(())
-    }
-
-    fn get_row_group_meta_data(
-        schema_descr: &SchemaDescPtr,
-        column_statistics: Vec<ParquetStatistics>,
-    ) -> RowGroupMetaData {
-        use parquet::file::metadata::ColumnChunkMetaData;
-        let mut columns = vec![];
-        for (i, s) in column_statistics.iter().enumerate() {
-            let column = ColumnChunkMetaData::builder(schema_descr.column(i))
-                .set_statistics(s.clone())
-                .build()
-                .unwrap();
-            columns.push(column);
-        }
-        RowGroupMetaData::builder(schema_descr.clone())
-            .set_num_rows(1000)
-            .set_total_byte_size(2000)
-            .set_column_metadata(columns)
-            .build()
-            .unwrap()
-    }
-
-    fn get_test_schema_descr(fields: Vec<(&str, PhysicalType)>) -> SchemaDescPtr {
-        use parquet::schema::types::{SchemaDescriptor, Type as SchemaType};
-        let mut schema_fields = fields
-            .iter()
-            .map(|(n, t)| {
-                Arc::new(SchemaType::primitive_type_builder(n, *t).build().unwrap())
-            })
-            .collect::<Vec<_>>();
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(&mut schema_fields)
-            .build()
-            .unwrap();
-
-        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/planner.rs b/rust/datafusion/src/physical_plan/planner.rs
deleted file mode 100644
index f9279ae48f0..00000000000
--- a/rust/datafusion/src/physical_plan/planner.rs
+++ /dev/null
@@ -1,1106 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Physical query planner
-
-use std::sync::Arc;
-
-use super::{
-    aggregates, empty::EmptyExec, expressions::binary, functions,
-    hash_join::PartitionMode, udaf, union::UnionExec,
-};
-use crate::error::{DataFusionError, Result};
-use crate::execution::context::ExecutionContextState;
-use crate::logical_plan::{
-    DFSchema, Expr, LogicalPlan, Operator, Partitioning as LogicalPartitioning, PlanType,
-    StringifiedPlan, UserDefinedLogicalNode,
-};
-use crate::physical_plan::explain::ExplainExec;
-use crate::physical_plan::expressions;
-use crate::physical_plan::expressions::{CaseExpr, Column, Literal, PhysicalSortExpr};
-use crate::physical_plan::filter::FilterExec;
-use crate::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
-use crate::physical_plan::hash_join::HashJoinExec;
-use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use crate::physical_plan::projection::ProjectionExec;
-use crate::physical_plan::repartition::RepartitionExec;
-use crate::physical_plan::sort::SortExec;
-use crate::physical_plan::udf;
-use crate::physical_plan::{hash_utils, Partitioning};
-use crate::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr, PhysicalPlanner};
-use crate::prelude::JoinType;
-use crate::scalar::ScalarValue;
-use crate::variable::VarType;
-use arrow::compute::can_cast_types;
-
-use arrow::compute::SortOptions;
-use arrow::datatypes::{Schema, SchemaRef};
-use expressions::col;
-use log::debug;
-
-/// This trait exposes the ability to plan an [`ExecutionPlan`] out of a [`LogicalPlan`].
-pub trait ExtensionPlanner {
-    /// Create a physical plan for a [`UserDefinedLogicalNode`].
-    /// This errors when the planner knows how to plan the concrete implementation of `node`
-    /// but errors while doing so, and `None` when the planner does not know how to plan the `node`
-    /// and wants to delegate the planning to another [`ExtensionPlanner`].
-    fn plan_extension(
-        &self,
-        node: &dyn UserDefinedLogicalNode,
-        inputs: &[Arc<dyn ExecutionPlan>],
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Option<Arc<dyn ExecutionPlan>>>;
-}
-
-/// Default single node physical query planner that converts a
-/// `LogicalPlan` to an `ExecutionPlan` suitable for execution.
-pub struct DefaultPhysicalPlanner {
-    extension_planners: Vec<Arc<dyn ExtensionPlanner + Send + Sync>>,
-}
-
-impl Default for DefaultPhysicalPlanner {
-    fn default() -> Self {
-        Self {
-            extension_planners: vec![],
-        }
-    }
-}
-
-impl PhysicalPlanner for DefaultPhysicalPlanner {
-    /// Create a physical plan from a logical plan
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let plan = self.create_initial_plan(logical_plan, ctx_state)?;
-        self.optimize_plan(plan, ctx_state)
-    }
-}
-
-impl DefaultPhysicalPlanner {
-    /// Create a physical planner that uses `extension_planners` to
-    /// plan user-defined logical nodes [`LogicalPlan::Extension`].
-    /// The planner uses the first [`ExtensionPlanner`] to return a non-`None`
-    /// plan.
-    pub fn with_extension_planners(
-        extension_planners: Vec<Arc<dyn ExtensionPlanner + Send + Sync>>,
-    ) -> Self {
-        Self { extension_planners }
-    }
-
-    /// Optimize a physical plan
-    fn optimize_plan(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let optimizers = &ctx_state.config.physical_optimizers;
-        debug!("Physical plan:\n{:?}", plan);
-
-        let mut new_plan = plan;
-        for optimizer in optimizers {
-            new_plan = optimizer.optimize(new_plan, &ctx_state.config)?;
-        }
-        debug!("Optimized physical plan:\n{:?}", new_plan);
-        Ok(new_plan)
-    }
-
-    /// Create a physical plan from a logical plan
-    fn create_initial_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let batch_size = ctx_state.config.batch_size;
-
-        match logical_plan {
-            LogicalPlan::TableScan {
-                source,
-                projection,
-                filters,
-                limit,
-                ..
-            } => source.scan(projection, batch_size, filters, *limit),
-            LogicalPlan::Aggregate {
-                input,
-                group_expr,
-                aggr_expr,
-                ..
-            } => {
-                // Initially need to perform the aggregate and then merge the partitions
-                let input_exec = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input_exec.schema();
-                let physical_input_schema = input_exec.as_ref().schema();
-                let logical_input_schema = input.as_ref().schema();
-
-                let groups = group_expr
-                    .iter()
-                    .map(|e| {
-                        tuple_err((
-                            self.create_physical_expr(
-                                e,
-                                &physical_input_schema,
-                                ctx_state,
-                            ),
-                            e.name(&logical_input_schema),
-                        ))
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                let aggregates = aggr_expr
-                    .iter()
-                    .map(|e| {
-                        self.create_aggregate_expr(
-                            e,
-                            &logical_input_schema,
-                            &physical_input_schema,
-                            ctx_state,
-                        )
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                let initial_aggr = Arc::new(HashAggregateExec::try_new(
-                    AggregateMode::Partial,
-                    groups.clone(),
-                    aggregates.clone(),
-                    input_exec,
-                    input_schema.clone(),
-                )?);
-
-                let final_group: Vec<Arc<dyn PhysicalExpr>> =
-                    (0..groups.len()).map(|i| col(&groups[i].1)).collect();
-
-                // construct a second aggregation, keeping the final column name equal to the first aggregation
-                // and the expressions corresponding to the respective aggregate
-                Ok(Arc::new(HashAggregateExec::try_new(
-                    AggregateMode::Final,
-                    final_group
-                        .iter()
-                        .enumerate()
-                        .map(|(i, expr)| (expr.clone(), groups[i].1.clone()))
-                        .collect(),
-                    aggregates,
-                    initial_aggr,
-                    input_schema,
-                )?))
-            }
-            LogicalPlan::Projection { input, expr, .. } => {
-                let input_exec = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.as_ref().schema();
-                let runtime_expr = expr
-                    .iter()
-                    .map(|e| {
-                        tuple_err((
-                            self.create_physical_expr(
-                                e,
-                                &input_exec.schema(),
-                                &ctx_state,
-                            ),
-                            e.name(&input_schema),
-                        ))
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                Ok(Arc::new(ProjectionExec::try_new(runtime_expr, input_exec)?))
-            }
-            LogicalPlan::Filter {
-                input, predicate, ..
-            } => {
-                let input = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.as_ref().schema();
-                let runtime_expr =
-                    self.create_physical_expr(predicate, &input_schema, ctx_state)?;
-                Ok(Arc::new(FilterExec::try_new(runtime_expr, input)?))
-            }
-            LogicalPlan::Union { inputs, .. } => {
-                let physical_plans = inputs
-                    .iter()
-                    .map(|input| self.create_initial_plan(input, ctx_state))
-                    .collect::<Result<Vec<_>>>()?;
-                Ok(Arc::new(UnionExec::new(physical_plans)))
-            }
-            LogicalPlan::Repartition {
-                input,
-                partitioning_scheme,
-            } => {
-                let input = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.schema();
-                let physical_partitioning = match partitioning_scheme {
-                    LogicalPartitioning::RoundRobinBatch(n) => {
-                        Partitioning::RoundRobinBatch(*n)
-                    }
-                    LogicalPartitioning::Hash(expr, n) => {
-                        let runtime_expr = expr
-                            .iter()
-                            .map(|e| {
-                                self.create_physical_expr(e, &input_schema, &ctx_state)
-                            })
-                            .collect::<Result<Vec<_>>>()?;
-                        Partitioning::Hash(runtime_expr, *n)
-                    }
-                };
-                Ok(Arc::new(RepartitionExec::try_new(
-                    input,
-                    physical_partitioning,
-                )?))
-            }
-            LogicalPlan::Sort { expr, input, .. } => {
-                let input = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.as_ref().schema();
-
-                let sort_expr = expr
-                    .iter()
-                    .map(|e| match e {
-                        Expr::Sort {
-                            expr,
-                            asc,
-                            nulls_first,
-                        } => self.create_physical_sort_expr(
-                            expr,
-                            &input_schema,
-                            SortOptions {
-                                descending: !*asc,
-                                nulls_first: *nulls_first,
-                            },
-                            ctx_state,
-                        ),
-                        _ => Err(DataFusionError::Plan(
-                            "Sort only accepts sort expressions".to_string(),
-                        )),
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                Ok(Arc::new(SortExec::try_new(sort_expr, input)?))
-            }
-            LogicalPlan::Join {
-                left,
-                right,
-                on: keys,
-                join_type,
-                ..
-            } => {
-                let left = self.create_initial_plan(left, ctx_state)?;
-                let right = self.create_initial_plan(right, ctx_state)?;
-                let physical_join_type = match join_type {
-                    JoinType::Inner => hash_utils::JoinType::Inner,
-                    JoinType::Left => hash_utils::JoinType::Left,
-                    JoinType::Right => hash_utils::JoinType::Right,
-                };
-                if ctx_state.config.concurrency > 1 && ctx_state.config.repartition_joins
-                {
-                    let left_expr = keys.iter().map(|x| col(&x.0)).collect();
-                    let right_expr = keys.iter().map(|x| col(&x.1)).collect();
-
-                    // Use hash partition by defualt to parallelize hash joins
-                    Ok(Arc::new(HashJoinExec::try_new(
-                        Arc::new(RepartitionExec::try_new(
-                            left,
-                            Partitioning::Hash(left_expr, ctx_state.config.concurrency),
-                        )?),
-                        Arc::new(RepartitionExec::try_new(
-                            right,
-                            Partitioning::Hash(right_expr, ctx_state.config.concurrency),
-                        )?),
-                        &keys,
-                        &physical_join_type,
-                        PartitionMode::Partitioned,
-                    )?))
-                } else {
-                    Ok(Arc::new(HashJoinExec::try_new(
-                        left,
-                        right,
-                        &keys,
-                        &physical_join_type,
-                        PartitionMode::CollectLeft,
-                    )?))
-                }
-            }
-            LogicalPlan::EmptyRelation {
-                produce_one_row,
-                schema,
-            } => Ok(Arc::new(EmptyExec::new(
-                *produce_one_row,
-                SchemaRef::new(schema.as_ref().to_owned().into()),
-            ))),
-            LogicalPlan::Limit { input, n, .. } => {
-                let limit = *n;
-                let input = self.create_initial_plan(input, ctx_state)?;
-
-                // GlobalLimitExec requires a single partition for input
-                let input = if input.output_partitioning().partition_count() == 1 {
-                    input
-                } else {
-                    // Apply a LocalLimitExec to each partition. The optimizer will also insert
-                    // a MergeExec between the GlobalLimitExec and LocalLimitExec
-                    Arc::new(LocalLimitExec::new(input, limit))
-                };
-
-                Ok(Arc::new(GlobalLimitExec::new(input, limit)))
-            }
-            LogicalPlan::CreateExternalTable { .. } => {
-                // There is no default plan for "CREATE EXTERNAL
-                // TABLE" -- it must be handled at a higher level (so
-                // that the appropriate table can be registered with
-                // the context)
-                Err(DataFusionError::Internal(
-                    "Unsupported logical plan: CreateExternalTable".to_string(),
-                ))
-            }
-            LogicalPlan::Explain {
-                verbose,
-                plan,
-                stringified_plans,
-                schema,
-            } => {
-                let input = self.create_initial_plan(plan, ctx_state)?;
-
-                let mut stringified_plans = stringified_plans
-                    .iter()
-                    .filter(|s| s.should_display(*verbose))
-                    .cloned()
-                    .collect::<Vec<_>>();
-
-                // add in the physical plan if requested
-                if *verbose {
-                    stringified_plans.push(StringifiedPlan::new(
-                        PlanType::PhysicalPlan,
-                        format!("{:#?}", input),
-                    ));
-                }
-                Ok(Arc::new(ExplainExec::new(
-                    SchemaRef::new(schema.as_ref().to_owned().into()),
-                    stringified_plans,
-                )))
-            }
-            LogicalPlan::Extension { node } => {
-                let inputs = node
-                    .inputs()
-                    .into_iter()
-                    .map(|input_plan| self.create_initial_plan(input_plan, ctx_state))
-                    .collect::<Result<Vec<_>>>()?;
-
-                let maybe_plan = self.extension_planners.iter().try_fold(
-                    None,
-                    |maybe_plan, planner| {
-                        if let Some(plan) = maybe_plan {
-                            Ok(Some(plan))
-                        } else {
-                            planner.plan_extension(node.as_ref(), &inputs, ctx_state)
-                        }
-                    },
-                )?;
-                let plan = maybe_plan.ok_or_else(|| DataFusionError::Plan(format!(
-                    "No installed planner was able to convert the custom node to an execution plan: {:?}", node
-                )))?;
-
-                // Ensure the ExecutionPlan's  schema matches the
-                // declared logical schema to catch and warn about
-                // logic errors when creating user defined plans.
-                if plan.schema() != node.schema().as_ref().to_owned().into() {
-                    Err(DataFusionError::Plan(format!(
-                        "Extension planner for {:?} created an ExecutionPlan with mismatched schema. \
-                         LogicalPlan schema: {:?}, ExecutionPlan schema: {:?}",
-                        node, node.schema(), plan.schema()
-                    )))
-                } else {
-                    Ok(plan)
-                }
-            }
-        }
-    }
-
-    /// Create a physical expression from a logical expression
-    pub fn create_physical_expr(
-        &self,
-        e: &Expr,
-        input_schema: &Schema,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn PhysicalExpr>> {
-        match e {
-            Expr::Alias(expr, ..) => {
-                Ok(self.create_physical_expr(expr, input_schema, ctx_state)?)
-            }
-            Expr::Column(name) => {
-                // check that name exists
-                input_schema.field_with_name(&name)?;
-                Ok(Arc::new(Column::new(name)))
-            }
-            Expr::Literal(value) => Ok(Arc::new(Literal::new(value.clone()))),
-            Expr::ScalarVariable(variable_names) => {
-                if &variable_names[0][0..2] == "@@" {
-                    match ctx_state.var_provider.get(&VarType::System) {
-                        Some(provider) => {
-                            let scalar_value =
-                                provider.get_value(variable_names.clone())?;
-                            Ok(Arc::new(Literal::new(scalar_value)))
-                        }
-                        _ => Err(DataFusionError::Plan(
-                            "No system variable provider found".to_string(),
-                        )),
-                    }
-                } else {
-                    match ctx_state.var_provider.get(&VarType::UserDefined) {
-                        Some(provider) => {
-                            let scalar_value =
-                                provider.get_value(variable_names.clone())?;
-                            Ok(Arc::new(Literal::new(scalar_value)))
-                        }
-                        _ => Err(DataFusionError::Plan(
-                            "No user defined variable provider found".to_string(),
-                        )),
-                    }
-                }
-            }
-            Expr::BinaryExpr { left, op, right } => {
-                let lhs = self.create_physical_expr(left, input_schema, ctx_state)?;
-                let rhs = self.create_physical_expr(right, input_schema, ctx_state)?;
-                binary(lhs, *op, rhs, input_schema)
-            }
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-                ..
-            } => {
-                let expr: Option<Arc<dyn PhysicalExpr>> = if let Some(e) = expr {
-                    Some(self.create_physical_expr(
-                        e.as_ref(),
-                        input_schema,
-                        ctx_state,
-                    )?)
-                } else {
-                    None
-                };
-                let when_expr = when_then_expr
-                    .iter()
-                    .map(|(w, _)| {
-                        self.create_physical_expr(w.as_ref(), input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                let then_expr = when_then_expr
-                    .iter()
-                    .map(|(_, t)| {
-                        self.create_physical_expr(t.as_ref(), input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                let when_then_expr: Vec<(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)> =
-                    when_expr
-                        .iter()
-                        .zip(then_expr.iter())
-                        .map(|(w, t)| (w.clone(), t.clone()))
-                        .collect();
-                let else_expr: Option<Arc<dyn PhysicalExpr>> = if let Some(e) = else_expr
-                {
-                    Some(self.create_physical_expr(
-                        e.as_ref(),
-                        input_schema,
-                        ctx_state,
-                    )?)
-                } else {
-                    None
-                };
-                Ok(Arc::new(CaseExpr::try_new(
-                    expr,
-                    &when_then_expr,
-                    else_expr,
-                )?))
-            }
-            Expr::Cast { expr, data_type } => expressions::cast(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-                data_type.clone(),
-            ),
-            Expr::TryCast { expr, data_type } => expressions::try_cast(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-                data_type.clone(),
-            ),
-            Expr::Not(expr) => expressions::not(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-            ),
-            Expr::Negative(expr) => expressions::negative(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-            ),
-            Expr::IsNull(expr) => expressions::is_null(self.create_physical_expr(
-                expr,
-                input_schema,
-                ctx_state,
-            )?),
-            Expr::IsNotNull(expr) => expressions::is_not_null(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-            ),
-            Expr::ScalarFunction { fun, args } => {
-                let physical_args = args
-                    .iter()
-                    .map(|e| self.create_physical_expr(e, input_schema, ctx_state))
-                    .collect::<Result<Vec<_>>>()?;
-                functions::create_physical_expr(fun, &physical_args, input_schema)
-            }
-            Expr::ScalarUDF { fun, args } => {
-                let mut physical_args = vec![];
-                for e in args {
-                    physical_args.push(self.create_physical_expr(
-                        e,
-                        input_schema,
-                        ctx_state,
-                    )?);
-                }
-
-                udf::create_physical_expr(
-                    fun.clone().as_ref(),
-                    &physical_args,
-                    input_schema,
-                )
-            }
-            Expr::Between {
-                expr,
-                negated,
-                low,
-                high,
-            } => {
-                let value_expr =
-                    self.create_physical_expr(expr, input_schema, ctx_state)?;
-                let low_expr = self.create_physical_expr(low, input_schema, ctx_state)?;
-                let high_expr =
-                    self.create_physical_expr(high, input_schema, ctx_state)?;
-
-                // rewrite the between into the two binary operators
-                let binary_expr = binary(
-                    binary(value_expr.clone(), Operator::GtEq, low_expr, input_schema)?,
-                    Operator::And,
-                    binary(value_expr.clone(), Operator::LtEq, high_expr, input_schema)?,
-                    input_schema,
-                );
-
-                if *negated {
-                    expressions::not(binary_expr?, input_schema)
-                } else {
-                    binary_expr
-                }
-            }
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => match expr.as_ref() {
-                Expr::Literal(ScalarValue::Utf8(None)) => {
-                    Ok(expressions::lit(ScalarValue::Boolean(None)))
-                }
-                _ => {
-                    let value_expr =
-                        self.create_physical_expr(expr, input_schema, ctx_state)?;
-                    let value_expr_data_type = value_expr.data_type(input_schema)?;
-
-                    let list_exprs =
-                        list.iter()
-                            .map(|expr| match expr {
-                                Expr::Literal(ScalarValue::Utf8(None)) => self
-                                    .create_physical_expr(expr, input_schema, ctx_state),
-                                _ => {
-                                    let list_expr = self.create_physical_expr(
-                                        expr,
-                                        input_schema,
-                                        ctx_state,
-                                    )?;
-                                    let list_expr_data_type =
-                                        list_expr.data_type(input_schema)?;
-
-                                    if list_expr_data_type == value_expr_data_type {
-                                        Ok(list_expr)
-                                    } else if can_cast_types(
-                                        &list_expr_data_type,
-                                        &value_expr_data_type,
-                                    ) {
-                                        expressions::cast(
-                                            list_expr,
-                                            input_schema,
-                                            value_expr.data_type(input_schema)?,
-                                        )
-                                    } else {
-                                        Err(DataFusionError::Plan(format!(
-                                            "Unsupported CAST from {:?} to {:?}",
-                                            list_expr_data_type, value_expr_data_type
-                                        )))
-                                    }
-                                }
-                            })
-                            .collect::<Result<Vec<_>>>()?;
-
-                    expressions::in_list(value_expr, list_exprs, negated)
-                }
-            },
-            other => Err(DataFusionError::NotImplemented(format!(
-                "Physical plan does not support logical expression {:?}",
-                other
-            ))),
-        }
-    }
-
-    /// Create an aggregate expression from a logical expression
-    pub fn create_aggregate_expr(
-        &self,
-        e: &Expr,
-        logical_input_schema: &DFSchema,
-        physical_input_schema: &Schema,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn AggregateExpr>> {
-        // unpack aliased logical expressions, e.g. "sum(col) as total"
-        let (name, e) = match e {
-            Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
-            _ => (e.name(logical_input_schema)?, e),
-        };
-
-        match e {
-            Expr::AggregateFunction {
-                fun,
-                distinct,
-                args,
-                ..
-            } => {
-                let args = args
-                    .iter()
-                    .map(|e| {
-                        self.create_physical_expr(e, physical_input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                aggregates::create_aggregate_expr(
-                    fun,
-                    *distinct,
-                    &args,
-                    physical_input_schema,
-                    name,
-                )
-            }
-            Expr::AggregateUDF { fun, args, .. } => {
-                let args = args
-                    .iter()
-                    .map(|e| {
-                        self.create_physical_expr(e, physical_input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                udaf::create_aggregate_expr(fun, &args, physical_input_schema, name)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Invalid aggregate expression '{:?}'",
-                other
-            ))),
-        }
-    }
-
-    /// Create an aggregate expression from a logical expression
-    pub fn create_physical_sort_expr(
-        &self,
-        e: &Expr,
-        input_schema: &Schema,
-        options: SortOptions,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<PhysicalSortExpr> {
-        Ok(PhysicalSortExpr {
-            expr: self.create_physical_expr(e, input_schema, ctx_state)?,
-            options,
-        })
-    }
-}
-
-fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
-    match value {
-        (Ok(e), Ok(e1)) => Ok((e, e1)),
-        (Err(e), Ok(_)) => Err(e),
-        (Ok(_), Err(e1)) => Err(e1),
-        (Err(e), Err(_)) => Err(e),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::{csv::CsvReadOptions, expressions, Partitioning};
-    use crate::prelude::ExecutionConfig;
-    use crate::scalar::ScalarValue;
-    use crate::{
-        catalog::catalog::MemoryCatalogList,
-        logical_plan::{DFField, DFSchema, DFSchemaRef},
-    };
-    use crate::{
-        logical_plan::{col, lit, sum, LogicalPlanBuilder},
-        physical_plan::SendableRecordBatchStream,
-    };
-    use arrow::datatypes::{DataType, Field, SchemaRef};
-    use async_trait::async_trait;
-    use fmt::Debug;
-    use std::{any::Any, collections::HashMap, fmt};
-
-    fn make_ctx_state() -> ExecutionContextState {
-        ExecutionContextState {
-            catalog_list: Arc::new(MemoryCatalogList::new()),
-            scalar_functions: HashMap::new(),
-            var_provider: HashMap::new(),
-            aggregate_functions: HashMap::new(),
-            config: ExecutionConfig::new(),
-        }
-    }
-
-    fn plan(logical_plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
-        let ctx_state = make_ctx_state();
-        let planner = DefaultPhysicalPlanner::default();
-        planner.create_physical_plan(logical_plan, &ctx_state)
-    }
-
-    #[test]
-    fn test_all_operators() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            // filter clause needs the type coercion rule applied
-            .filter(col("c7").lt(lit(5_u8)))?
-            .project(vec![col("c1"), col("c2")])?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .sort(vec![col("c1").sort(true, true)])?
-            .limit(10)?
-            .build()?;
-
-        let plan = plan(&logical_plan)?;
-
-        // verify that the plan correctly casts u8 to i64
-        // the cast here is implicit so has CastOptions with safe=true
-        let expected = "BinaryExpr { left: Column { name: \"c7\" }, op: Lt, right: TryCastExpr { expr: Literal { value: UInt8(5) }, cast_type: Int64 } }";
-        assert!(format!("{:?}", plan).contains(expected));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_create_not() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
-
-        let planner = DefaultPhysicalPlanner::default();
-
-        let expr =
-            planner.create_physical_expr(&col("a").not(), &schema, &make_ctx_state())?;
-        let expected = expressions::not(expressions::col("a"), &schema)?;
-
-        assert_eq!(format!("{:?}", expr), format!("{:?}", expected));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_with_csv_plan() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            .filter(col("c7").lt(col("c12")))?
-            .build()?;
-
-        let plan = plan(&logical_plan)?;
-
-        // c12 is f64, c7 is u8 -> cast c7 to f64
-        // the cast here is implicit so has CastOptions with safe=true
-        let expected = "predicate: BinaryExpr { left: TryCastExpr { expr: Column { name: \"c7\" }, cast_type: Float64 }, op: Lt, right: Column { name: \"c12\" } }";
-        assert!(format!("{:?}", plan).contains(expected));
-        Ok(())
-    }
-
-    #[test]
-    fn errors() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-
-        let bool_expr = col("c1").eq(col("c1"));
-        let cases = vec![
-            // utf8 < u32
-            col("c1").lt(col("c2")),
-            // utf8 AND utf8
-            col("c1").and(col("c1")),
-            // u8 AND u8
-            col("c3").and(col("c3")),
-            // utf8 = u32
-            col("c1").eq(col("c2")),
-            // utf8 = bool
-            col("c1").eq(bool_expr.clone()),
-            // u32 AND bool
-            col("c2").and(bool_expr),
-            // utf8 LIKE u32
-            col("c1").like(col("c2")),
-        ];
-        for case in cases {
-            let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-                .project(vec![case.clone()]);
-            let message = format!(
-                "Expression {:?} expected to error due to impossible coercion",
-                case
-            );
-            assert!(logical_plan.is_err(), "{}", message);
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn default_extension_planner() {
-        let ctx_state = make_ctx_state();
-        let planner = DefaultPhysicalPlanner::default();
-        let logical_plan = LogicalPlan::Extension {
-            node: Arc::new(NoOpExtensionNode::default()),
-        };
-        let plan = planner.create_physical_plan(&logical_plan, &ctx_state);
-
-        let expected_error =
-            "No installed planner was able to convert the custom node to an execution plan: NoOp";
-        match plan {
-            Ok(_) => panic!("Expected planning failure"),
-            Err(e) => assert!(
-                e.to_string().contains(expected_error),
-                "Error '{}' did not contain expected error '{}'",
-                e.to_string(),
-                expected_error
-            ),
-        }
-    }
-
-    #[test]
-    fn bad_extension_planner() {
-        // Test that creating an execution plan whose schema doesn't
-        // match the logical plan's schema generates an error.
-        let ctx_state = make_ctx_state();
-        let planner = DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(
-            BadExtensionPlanner {},
-        )]);
-
-        let logical_plan = LogicalPlan::Extension {
-            node: Arc::new(NoOpExtensionNode::default()),
-        };
-        let plan = planner.create_physical_plan(&logical_plan, &ctx_state);
-
-        let expected_error: &str = "Error during planning: \
-        Extension planner for NoOp created an ExecutionPlan with mismatched schema. \
-        LogicalPlan schema: DFSchema { fields: [\
-            DFField { qualifier: None, field: Field { \
-                name: \"a\", \
-                data_type: Int32, \
-                nullable: false, \
-                dict_id: 0, \
-                dict_is_ordered: false, \
-                metadata: None } }\
-        ] }, \
-        ExecutionPlan schema: Schema { fields: [\
-            Field { \
-                name: \"b\", \
-                data_type: Int32, \
-                nullable: false, \
-                dict_id: 0, \
-                dict_is_ordered: false, \
-                metadata: None }\
-        ], metadata: {} }";
-        match plan {
-            Ok(_) => panic!("Expected planning failure"),
-            Err(e) => assert!(
-                e.to_string().contains(expected_error),
-                "Error '{}' did not contain expected error '{}'",
-                e.to_string(),
-                expected_error
-            ),
-        }
-    }
-
-    #[test]
-    fn in_list_types() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-
-        // expression: "a in ('a', 1)"
-        let list = vec![
-            Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
-            Expr::Literal(ScalarValue::Int64(Some(1))),
-        ];
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            // filter clause needs the type coercion rule applied
-            .filter(col("c12").lt(lit(0.05)))?
-            .project(vec![col("c1").in_list(list, false)])?
-            .build()?;
-        let execution_plan = plan(&logical_plan)?;
-        // verify that the plan correctly adds cast from Int64(1) to Utf8
-        let expected = "InListExpr { expr: Column { name: \"c1\" }, list: [Literal { value: Utf8(\"a\") }, CastExpr { expr: Literal { value: Int64(1) }, cast_type: Utf8, cast_options: CastOptions { safe: false } }], negated: false }";
-        println!("{:?}", execution_plan);
-        assert!(format!("{:?}", execution_plan).contains(expected));
-
-        // expression: "a in (true, 'a')"
-        let list = vec![
-            Expr::Literal(ScalarValue::Boolean(Some(true))),
-            Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
-        ];
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            // filter clause needs the type coercion rule applied
-            .filter(col("c12").lt(lit(0.05)))?
-            .project(vec![col("c12").lt_eq(lit(0.025)).in_list(list, false)])?
-            .build()?;
-        let execution_plan = plan(&logical_plan);
-
-        let expected_error = "Unsupported CAST from Utf8 to Boolean";
-        match execution_plan {
-            Ok(_) => panic!("Expected planning failure"),
-            Err(e) => assert!(
-                e.to_string().contains(expected_error),
-                "Error '{}' did not contain expected error '{}'",
-                e.to_string(),
-                expected_error
-            ),
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn hash_agg_input_schema() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .build()?;
-
-        let execution_plan = plan(&logical_plan)?;
-        let final_hash_agg = execution_plan
-            .as_any()
-            .downcast_ref::<HashAggregateExec>()
-            .expect("hash aggregate");
-        assert_eq!("SUM(c2)", final_hash_agg.schema().field(1).name());
-        // we need access to the input to the partial aggregate so that other projects can
-        // implement serde
-        assert_eq!("c2", final_hash_agg.input_schema().field(1).name());
-
-        Ok(())
-    }
-
-    /// An example extension node that doesn't do anything
-    struct NoOpExtensionNode {
-        schema: DFSchemaRef,
-    }
-
-    impl Default for NoOpExtensionNode {
-        fn default() -> Self {
-            Self {
-                schema: DFSchemaRef::new(
-                    DFSchema::new(vec![DFField::new(None, "a", DataType::Int32, false)])
-                        .unwrap(),
-                ),
-            }
-        }
-    }
-
-    impl Debug for NoOpExtensionNode {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-            write!(f, "NoOp")
-        }
-    }
-
-    impl UserDefinedLogicalNode for NoOpExtensionNode {
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn inputs(&self) -> Vec<&LogicalPlan> {
-            vec![]
-        }
-
-        fn schema(&self) -> &DFSchemaRef {
-            &self.schema
-        }
-
-        fn expressions(&self) -> Vec<Expr> {
-            vec![]
-        }
-
-        fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
-            write!(f, "NoOp")
-        }
-
-        fn from_template(
-            &self,
-            _exprs: &[Expr],
-            _inputs: &[LogicalPlan],
-        ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync> {
-            unimplemented!("NoOp");
-        }
-    }
-
-    #[derive(Debug)]
-    struct NoOpExecutionPlan {
-        schema: SchemaRef,
-    }
-
-    #[async_trait]
-    impl ExecutionPlan for NoOpExecutionPlan {
-        /// Return a reference to Any that can be used for downcasting
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn schema(&self) -> SchemaRef {
-            self.schema.clone()
-        }
-
-        fn output_partitioning(&self) -> Partitioning {
-            Partitioning::UnknownPartitioning(1)
-        }
-
-        fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-            vec![]
-        }
-
-        fn with_new_children(
-            &self,
-            _children: Vec<Arc<dyn ExecutionPlan>>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!("NoOpExecutionPlan::with_new_children");
-        }
-
-        async fn execute(&self, _partition: usize) -> Result<SendableRecordBatchStream> {
-            unimplemented!("NoOpExecutionPlan::execute");
-        }
-    }
-
-    //  Produces an execution plan where the schema is mismatched from
-    //  the logical plan node.
-    struct BadExtensionPlanner {}
-
-    impl ExtensionPlanner for BadExtensionPlanner {
-        /// Create a physical plan for an extension node
-        fn plan_extension(
-            &self,
-            _node: &dyn UserDefinedLogicalNode,
-            _inputs: &[Arc<dyn ExecutionPlan>],
-            _ctx_state: &ExecutionContextState,
-        ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-            Ok(Some(Arc::new(NoOpExecutionPlan {
-                schema: SchemaRef::new(Schema::new(vec![Field::new(
-                    "b",
-                    DataType::Int32,
-                    false,
-                )])),
-            })))
-        }
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/projection.rs b/rust/datafusion/src/physical_plan/projection.rs
deleted file mode 100644
index a881beb453a..00000000000
--- a/rust/datafusion/src/physical_plan/projection.rs
+++ /dev/null
@@ -1,232 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the projection execution plan. A projection determines which columns or expressions
-//! are returned from a query. The SQL statement `SELECT a, b, a+b FROM t1` is an example
-//! of a projection on table `t1` where the expressions `a`, `b`, and `a+b` are the
-//! projection expressions. `SELECT` without `FROM` will only evaluate expressions.
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ExecutionPlan, Partitioning, PhysicalExpr};
-use arrow::datatypes::{Field, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use async_trait::async_trait;
-
-use futures::stream::Stream;
-use futures::stream::StreamExt;
-
-/// Execution plan for a projection
-#[derive(Debug)]
-pub struct ProjectionExec {
-    /// The projection expressions stored as tuples of (expression, output column name)
-    expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-    /// The schema once the projection has been applied to the input
-    schema: SchemaRef,
-    /// The input plan
-    input: Arc<dyn ExecutionPlan>,
-}
-
-impl ProjectionExec {
-    /// Create a projection on an input
-    pub fn try_new(
-        expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        let input_schema = input.schema();
-
-        let fields: Result<Vec<_>> = expr
-            .iter()
-            .map(|(e, name)| {
-                Ok(Field::new(
-                    name,
-                    e.data_type(&input_schema)?,
-                    e.nullable(&input_schema)?,
-                ))
-            })
-            .collect();
-
-        let schema = Arc::new(Schema::new(fields?));
-
-        Ok(Self {
-            expr,
-            schema,
-            input: input.clone(),
-        })
-    }
-
-    /// The projection expressions stored as tuples of (expression, output column name)
-    pub fn expr(&self) -> &[(Arc<dyn PhysicalExpr>, String)] {
-        &self.expr
-    }
-
-    /// The input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for ProjectionExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(ProjectionExec::try_new(
-                self.expr.clone(),
-                children[0].clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "ProjectionExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(ProjectionStream {
-            schema: self.schema.clone(),
-            expr: self.expr.iter().map(|x| x.0.clone()).collect(),
-            input: self.input.execute(partition).await?,
-        }))
-    }
-}
-
-fn batch_project(
-    batch: &RecordBatch,
-    expressions: &[Arc<dyn PhysicalExpr>],
-    schema: &SchemaRef,
-) -> ArrowResult<RecordBatch> {
-    expressions
-        .iter()
-        .map(|expr| expr.evaluate(&batch))
-        .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-        .collect::<Result<Vec<_>>>()
-        .map_or_else(
-            |e| Err(DataFusionError::into_arrow_external_error(e)),
-            |arrays| RecordBatch::try_new(schema.clone(), arrays),
-        )
-}
-
-/// Projection iterator
-struct ProjectionStream {
-    schema: SchemaRef,
-    expr: Vec<Arc<dyn PhysicalExpr>>,
-    input: SendableRecordBatchStream,
-}
-
-impl Stream for ProjectionStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.input.poll_next_unpin(cx).map(|x| match x {
-            Some(Ok(batch)) => Some(batch_project(&batch, &self.expr, &self.schema)),
-            other => other,
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // same number of record batches
-        self.input.size_hint()
-    }
-}
-
-impl RecordBatchStream for ProjectionStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::physical_plan::expressions::col;
-    use crate::test;
-    use futures::future;
-
-    #[tokio::test]
-    async fn project_first_column() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let partitions = 4;
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        // pick column c1 and name it column c1 in the output schema
-        let projection =
-            ProjectionExec::try_new(vec![(col("c1"), "c1".to_string())], Arc::new(csv))?;
-
-        let mut partition_count = 0;
-        let mut row_count = 0;
-        for partition in 0..projection.output_partitioning().partition_count() {
-            partition_count += 1;
-            let stream = projection.execute(partition).await?;
-
-            row_count += stream
-                .map(|batch| {
-                    let batch = batch.unwrap();
-                    assert_eq!(1, batch.num_columns());
-                    batch.num_rows()
-                })
-                .fold(0, |acc, x| future::ready(acc + x))
-                .await;
-        }
-        assert_eq!(partitions, partition_count);
-        assert_eq!(100, row_count);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/regex_expressions.rs b/rust/datafusion/src/physical_plan/regex_expressions.rs
deleted file mode 100644
index b526e7259ef..00000000000
--- a/rust/datafusion/src/physical_plan/regex_expressions.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! Regex expressions
-
-use std::any::type_name;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use arrow::array::{ArrayRef, GenericStringArray, StringOffsetSizeTrait};
-use arrow::compute;
-use hashbrown::HashMap;
-use regex::Regex;
-
-macro_rules! downcast_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<GenericStringArray<T>>()
-                ))
-            })?
-    }};
-}
-
-/// extract a specific group from a string column, using a regular expression
-pub fn regexp_match<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => compute::regexp_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T), None)
-        .map_err(DataFusionError::ArrowError),
-        3 => compute::regexp_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T),  Some(downcast_string_arg!(args[1], "flags", T)))
-        .map_err(DataFusionError::ArrowError),
-        other => Err(DataFusionError::Internal(format!(
-            "regexp_match was called with {} arguments. It requires at least 2 and at most 3.",
-            other
-        ))),
-    }
-}
-
-/// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
-/// used by regexp_replace
-fn regex_replace_posix_groups(replacement: &str) -> String {
-    lazy_static! {
-        static ref CAPTURE_GROUPS_RE: Regex = Regex::new("(\\\\)(\\d*)").unwrap();
-    }
-    CAPTURE_GROUPS_RE
-        .replace_all(replacement, "$${$2}")
-        .into_owned()
-}
-
-/// Replaces substring(s) matching a POSIX regular expression.
-///
-/// example: `regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'`
-pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    // creating Regex is expensive so create hashmap for memoization
-    let mut patterns: HashMap<String, Regex> = HashMap::new();
-
-    match args.len() {
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let pattern_array = downcast_string_arg!(args[1], "pattern", T);
-            let replacement_array = downcast_string_arg!(args[2], "replacement", T);
-
-            let result = string_array
-            .iter()
-            .zip(pattern_array.iter())
-            .zip(replacement_array.iter())
-            .map(|((string, pattern), replacement)| match (string, pattern, replacement) {
-                (Some(string), Some(pattern), Some(replacement)) => {
-                    let replacement = regex_replace_posix_groups(replacement);
-
-                    // if patterns hashmap already has regexp then use else else create and return
-                    let re = match patterns.get(pattern) {
-                        Some(re) => Ok(re.clone()),
-                        None => {
-                            match Regex::new(pattern) {
-                                Ok(re) => {
-                                    patterns.insert(pattern.to_string(), re.clone());
-                                    Ok(re)
-                                },
-                                Err(err) => Err(DataFusionError::Execution(err.to_string())),
-                            }
-                        }
-                    };
-
-                    Some(re.map(|re| re.replace(string, replacement.as_str()))).transpose()
-                }
-            _ => Ok(None)
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        4 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let pattern_array = downcast_string_arg!(args[1], "pattern", T);
-            let replacement_array = downcast_string_arg!(args[2], "replacement", T);
-            let flags_array = downcast_string_arg!(args[3], "flags", T);
-
-            let result = string_array
-            .iter()
-            .zip(pattern_array.iter())
-            .zip(replacement_array.iter())
-            .zip(flags_array.iter())
-            .map(|(((string, pattern), replacement), flags)| match (string, pattern, replacement, flags) {
-                (Some(string), Some(pattern), Some(replacement), Some(flags)) => {
-                    let replacement = regex_replace_posix_groups(replacement);
-
-                    // format flags into rust pattern
-                    let (pattern, replace_all) = if flags == "g" {
-                        (pattern.to_string(), true)
-                    } else if flags.contains('g') {
-                        (format!("(?{}){}", flags.to_string().replace("g", ""), pattern), true)
-                    } else {
-                        (format!("(?{}){}", flags, pattern), false)
-                    };
-
-                    // if patterns hashmap already has regexp then use else else create and return
-                    let re = match patterns.get(&pattern) {
-                        Some(re) => Ok(re.clone()),
-                        None => {
-                            match Regex::new(pattern.as_str()) {
-                                Ok(re) => {
-                                    patterns.insert(pattern, re.clone());
-                                    Ok(re)
-                                },
-                                Err(err) => Err(DataFusionError::Execution(err.to_string())),
-                            }
-                        }
-                    };
-
-                    Some(re.map(|re| {
-                        if replace_all {
-                            re.replace_all(string, replacement.as_str())
-                        } else {
-                            re.replace(string, replacement.as_str())
-                        }
-                    })).transpose()
-                }
-            _ => Ok(None)
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "regexp_replace was called with {} arguments. It requires at least 3 and at most 4.",
-            other
-        ))),
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/repartition.rs b/rust/datafusion/src/physical_plan/repartition.rs
deleted file mode 100644
index 7243550127b..00000000000
--- a/rust/datafusion/src/physical_plan/repartition.rs
+++ /dev/null
@@ -1,461 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! The repartition operator maps N input partitions to M output partitions based on a
-//! partitioning scheme.
-
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::{any::Any, collections::HashMap, vec};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ExecutionPlan, Partitioning};
-use arrow::record_batch::RecordBatch;
-use arrow::{array::Array, error::Result as ArrowResult};
-use arrow::{compute::take, datatypes::SchemaRef};
-use tokio_stream::wrappers::UnboundedReceiverStream;
-
-use super::{hash_join::create_hashes, RecordBatchStream, SendableRecordBatchStream};
-use async_trait::async_trait;
-
-use futures::stream::Stream;
-use futures::StreamExt;
-use tokio::sync::{
-    mpsc::{UnboundedReceiver, UnboundedSender},
-    Mutex,
-};
-use tokio::task::JoinHandle;
-
-type MaybeBatch = Option<ArrowResult<RecordBatch>>;
-
-/// The repartition operator maps N input partitions to M output partitions based on a
-/// partitioning scheme. No guarantees are made about the order of the resulting partitions.
-#[derive(Debug)]
-pub struct RepartitionExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Partitioning scheme to use
-    partitioning: Partitioning,
-    /// Channels for sending batches from input partitions to output partitions.
-    /// Key is the partition number
-    channels: Arc<
-        Mutex<
-            HashMap<usize, (UnboundedSender<MaybeBatch>, UnboundedReceiver<MaybeBatch>)>,
-        >,
-    >,
-}
-
-impl RepartitionExec {
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Partitioning scheme to use
-    pub fn partitioning(&self) -> &Partitioning {
-        &self.partitioning
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for RepartitionExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(RepartitionExec::try_new(
-                children[0].clone(),
-                self.partitioning.clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "RepartitionExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.partitioning.clone()
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // lock mutexes
-        let mut channels = self.channels.lock().await;
-
-        let num_input_partitions = self.input.output_partitioning().partition_count();
-        let num_output_partitions = self.partitioning.partition_count();
-
-        // if this is the first partition to be invoked then we need to set up initial state
-        if channels.is_empty() {
-            // create one channel per *output* partition
-            for partition in 0..num_output_partitions {
-                // Note that this operator uses unbounded channels to avoid deadlocks because
-                // the output partitions can be read in any order and this could cause input
-                // partitions to be blocked when sending data to output UnboundedReceivers that are not
-                // being read yet. This may cause high memory usage if the next operator is
-                // reading output partitions in order rather than concurrently. One workaround
-                // for this would be to add spill-to-disk capabilities.
-                let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<
-                    Option<ArrowResult<RecordBatch>>,
-                >();
-                channels.insert(partition, (sender, receiver));
-            }
-            // Use fixed random state
-            let random = ahash::RandomState::with_seeds(0, 0, 0, 0);
-
-            // launch one async task per *input* partition
-            for i in 0..num_input_partitions {
-                let random_state = random.clone();
-                let input = self.input.clone();
-                let mut txs: HashMap<_, _> = channels
-                    .iter()
-                    .map(|(partition, (tx, _rx))| (*partition, tx.clone()))
-                    .collect();
-                let partitioning = self.partitioning.clone();
-                let _: JoinHandle<Result<()>> = tokio::spawn(async move {
-                    let mut stream = input.execute(i).await?;
-                    let mut counter = 0;
-                    let hashes_buf = &mut vec![];
-
-                    while let Some(result) = stream.next().await {
-                        match &partitioning {
-                            Partitioning::RoundRobinBatch(_) => {
-                                let output_partition = counter % num_output_partitions;
-                                let tx = txs.get_mut(&output_partition).unwrap();
-                                tx.send(Some(result)).map_err(|e| {
-                                    DataFusionError::Execution(e.to_string())
-                                })?;
-                            }
-                            Partitioning::Hash(exprs, _) => {
-                                let input_batch = result?;
-                                let arrays = exprs
-                                    .iter()
-                                    .map(|expr| {
-                                        Ok(expr
-                                            .evaluate(&input_batch)?
-                                            .into_array(input_batch.num_rows()))
-                                    })
-                                    .collect::<Result<Vec<_>>>()?;
-                                hashes_buf.clear();
-                                hashes_buf.resize(arrays[0].len(), 0);
-                                // Hash arrays and compute buckets based on number of partitions
-                                let hashes =
-                                    create_hashes(&arrays, &random_state, hashes_buf)?;
-                                let mut indices = vec![vec![]; num_output_partitions];
-                                for (index, hash) in hashes.iter().enumerate() {
-                                    indices
-                                        [(*hash % num_output_partitions as u64) as usize]
-                                        .push(index as u64)
-                                }
-                                for (num_output_partition, partition_indices) in
-                                    indices.into_iter().enumerate()
-                                {
-                                    let indices = partition_indices.into();
-                                    // Produce batches based on indices
-                                    let columns = input_batch
-                                        .columns()
-                                        .iter()
-                                        .map(|c| {
-                                            take(c.as_ref(), &indices, None).map_err(
-                                                |e| {
-                                                    DataFusionError::Execution(
-                                                        e.to_string(),
-                                                    )
-                                                },
-                                            )
-                                        })
-                                        .collect::<Result<Vec<Arc<dyn Array>>>>()?;
-                                    let output_batch = RecordBatch::try_new(
-                                        input_batch.schema(),
-                                        columns,
-                                    );
-                                    let tx = txs.get_mut(&num_output_partition).unwrap();
-                                    tx.send(Some(output_batch)).map_err(|e| {
-                                        DataFusionError::Execution(e.to_string())
-                                    })?;
-                                }
-                            }
-                            other => {
-                                // this should be unreachable as long as the validation logic
-                                // in the constructor is kept up-to-date
-                                return Err(DataFusionError::NotImplemented(format!(
-                                    "Unsupported repartitioning scheme {:?}",
-                                    other
-                                )));
-                            }
-                        }
-                        counter += 1;
-                    }
-
-                    // notify each output partition that this input partition has no more data
-                    for (_, tx) in txs {
-                        tx.send(None)
-                            .map_err(|e| DataFusionError::Execution(e.to_string()))?;
-                    }
-                    Ok(())
-                });
-            }
-        }
-
-        // now return stream for the specified *output* partition which will
-        // read from the channel
-        Ok(Box::pin(RepartitionStream {
-            num_input_partitions,
-            num_input_partitions_processed: 0,
-            schema: self.input.schema(),
-            input: UnboundedReceiverStream::new(channels.remove(&partition).unwrap().1),
-        }))
-    }
-}
-
-impl RepartitionExec {
-    /// Create a new RepartitionExec
-    pub fn try_new(
-        input: Arc<dyn ExecutionPlan>,
-        partitioning: Partitioning,
-    ) -> Result<Self> {
-        Ok(RepartitionExec {
-            input,
-            partitioning,
-            channels: Arc::new(Mutex::new(HashMap::new())),
-        })
-    }
-}
-
-struct RepartitionStream {
-    /// Number of input partitions that will be sending batches to this output channel
-    num_input_partitions: usize,
-    /// Number of input partitions that have finished sending batches to this output channel
-    num_input_partitions_processed: usize,
-    /// Schema
-    schema: SchemaRef,
-    /// channel containing the repartitioned batches
-    input: UnboundedReceiverStream<Option<ArrowResult<RecordBatch>>>,
-}
-
-impl Stream for RepartitionStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        match self.input.poll_next_unpin(cx) {
-            Poll::Ready(Some(Some(v))) => Poll::Ready(Some(v)),
-            Poll::Ready(Some(None)) => {
-                self.num_input_partitions_processed += 1;
-                if self.num_input_partitions == self.num_input_partitions_processed {
-                    // all input partitions have finished sending batches
-                    Poll::Ready(None)
-                } else {
-                    // other partitions still have data to send
-                    self.poll_next(cx)
-                }
-            }
-            Poll::Ready(None) => Poll::Ready(None),
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for RepartitionStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::memory::MemoryExec;
-    use arrow::array::UInt32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
-
-    #[tokio::test]
-    async fn one_to_many_round_robin() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition];
-
-        // repartition from 1 input to 4 output
-        let output_partitions =
-            repartition(&schema, partitions, Partitioning::RoundRobinBatch(4)).await?;
-
-        assert_eq!(4, output_partitions.len());
-        assert_eq!(13, output_partitions[0].len());
-        assert_eq!(13, output_partitions[1].len());
-        assert_eq!(12, output_partitions[2].len());
-        assert_eq!(12, output_partitions[3].len());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn many_to_one_round_robin() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
-
-        // repartition from 3 input to 1 output
-        let output_partitions =
-            repartition(&schema, partitions, Partitioning::RoundRobinBatch(1)).await?;
-
-        assert_eq!(1, output_partitions.len());
-        assert_eq!(150, output_partitions[0].len());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn many_to_many_round_robin() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
-
-        // repartition from 3 input to 5 output
-        let output_partitions =
-            repartition(&schema, partitions, Partitioning::RoundRobinBatch(5)).await?;
-
-        assert_eq!(5, output_partitions.len());
-        assert_eq!(30, output_partitions[0].len());
-        assert_eq!(30, output_partitions[1].len());
-        assert_eq!(30, output_partitions[2].len());
-        assert_eq!(30, output_partitions[3].len());
-        assert_eq!(30, output_partitions[4].len());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn many_to_many_hash_partition() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
-
-        let output_partitions = repartition(
-            &schema,
-            partitions,
-            Partitioning::Hash(
-                vec![Arc::new(crate::physical_plan::expressions::Column::new(
-                    &"c0",
-                ))],
-                8,
-            ),
-        )
-        .await?;
-
-        let total_rows: usize = output_partitions.iter().map(|x| x.len()).sum();
-
-        assert_eq!(8, output_partitions.len());
-        assert_eq!(total_rows, 8 * 50 * 3);
-
-        Ok(())
-    }
-
-    fn test_schema() -> Arc<Schema> {
-        Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
-    }
-
-    fn create_vec_batches(schema: &Arc<Schema>, n: usize) -> Vec<RecordBatch> {
-        let batch = create_batch(schema);
-        let mut vec = Vec::with_capacity(n);
-        for _ in 0..n {
-            vec.push(batch.clone());
-        }
-        vec
-    }
-
-    fn create_batch(schema: &Arc<Schema>) -> RecordBatch {
-        RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
-        )
-        .unwrap()
-    }
-
-    async fn repartition(
-        schema: &SchemaRef,
-        input_partitions: Vec<Vec<RecordBatch>>,
-        partitioning: Partitioning,
-    ) -> Result<Vec<Vec<RecordBatch>>> {
-        // create physical plan
-        let exec = MemoryExec::try_new(&input_partitions, schema.clone(), None)?;
-        let exec = RepartitionExec::try_new(Arc::new(exec), partitioning)?;
-
-        // execute and collect results
-        let mut output_partitions = vec![];
-        for i in 0..exec.partitioning.partition_count() {
-            // execute this *output* partition and collect all batches
-            let mut stream = exec.execute(i).await?;
-            let mut batches = vec![];
-            while let Some(result) = stream.next().await {
-                batches.push(result?);
-            }
-            output_partitions.push(batches);
-        }
-        Ok(output_partitions)
-    }
-
-    #[tokio::test]
-    async fn many_to_many_round_robin_within_tokio_task() -> Result<()> {
-        let join_handle: JoinHandle<Result<Vec<Vec<RecordBatch>>>> =
-            tokio::spawn(async move {
-                // define input partitions
-                let schema = test_schema();
-                let partition = create_vec_batches(&schema, 50);
-                let partitions =
-                    vec![partition.clone(), partition.clone(), partition.clone()];
-
-                // repartition from 3 input to 5 output
-                repartition(&schema, partitions, Partitioning::RoundRobinBatch(5)).await
-            });
-
-        let output_partitions = join_handle
-            .await
-            .map_err(|e| DataFusionError::Internal(e.to_string()))??;
-
-        assert_eq!(5, output_partitions.len());
-        assert_eq!(30, output_partitions[0].len());
-        assert_eq!(30, output_partitions[1].len());
-        assert_eq!(30, output_partitions[2].len());
-        assert_eq!(30, output_partitions[3].len());
-        assert_eq!(30, output_partitions[4].len());
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/sort.rs b/rust/datafusion/src/physical_plan/sort.rs
deleted file mode 100644
index 994168c2efb..00000000000
--- a/rust/datafusion/src/physical_plan/sort.rs
+++ /dev/null
@@ -1,425 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the SORT plan
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use futures::stream::Stream;
-use futures::Future;
-
-use pin_project_lite::pin_project;
-
-pub use arrow::compute::SortOptions;
-use arrow::compute::{concat, lexsort_to_indices, take, SortColumn, TakeOptions};
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use arrow::{array::ArrayRef, error::ArrowError};
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::{common, Distribution, ExecutionPlan, Partitioning};
-
-use async_trait::async_trait;
-
-/// Sort execution plan
-#[derive(Debug)]
-pub struct SortExec {
-    /// Input schema
-    input: Arc<dyn ExecutionPlan>,
-    /// Sort expressions
-    expr: Vec<PhysicalSortExpr>,
-}
-
-impl SortExec {
-    /// Create a new sort execution plan
-    pub fn try_new(
-        expr: Vec<PhysicalSortExpr>,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        Ok(Self { expr, input })
-    }
-
-    /// Input schema
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Sort expressions
-    pub fn expr(&self) -> &[PhysicalSortExpr] {
-        &self.expr
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for SortExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(SortExec::try_new(
-                self.expr.clone(),
-                children[0].clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "SortExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "SortExec invalid partition {}",
-                partition
-            )));
-        }
-
-        // sort needs to operate on a single partition currently
-        if 1 != self.input.output_partitioning().partition_count() {
-            return Err(DataFusionError::Internal(
-                "SortExec requires a single input partition".to_owned(),
-            ));
-        }
-        let input = self.input.execute(0).await?;
-
-        Ok(Box::pin(SortStream::new(input, self.expr.clone())))
-    }
-}
-
-fn sort_batches(
-    batches: &[RecordBatch],
-    schema: &SchemaRef,
-    expr: &[PhysicalSortExpr],
-) -> ArrowResult<Option<RecordBatch>> {
-    if batches.is_empty() {
-        return Ok(None);
-    }
-    // combine all record batches into one for each column
-    let combined_batch = RecordBatch::try_new(
-        schema.clone(),
-        schema
-            .fields()
-            .iter()
-            .enumerate()
-            .map(|(i, _)| {
-                concat(
-                    &batches
-                        .iter()
-                        .map(|batch| batch.column(i).as_ref())
-                        .collect::<Vec<_>>(),
-                )
-            })
-            .collect::<ArrowResult<Vec<ArrayRef>>>()?,
-    )?;
-
-    // sort combined record batch
-    // TODO: pushup the limit expression to sort
-    let indices = lexsort_to_indices(
-        &expr
-            .iter()
-            .map(|e| e.evaluate_to_sort_column(&combined_batch))
-            .collect::<Result<Vec<SortColumn>>>()
-            .map_err(DataFusionError::into_arrow_external_error)?,
-        None,
-    )?;
-
-    // reorder all rows based on sorted indices
-    let sorted_batch = RecordBatch::try_new(
-        schema.clone(),
-        combined_batch
-            .columns()
-            .iter()
-            .map(|column| {
-                take(
-                    column.as_ref(),
-                    &indices,
-                    // disable bound check overhead since indices are already generated from
-                    // the same record batch
-                    Some(TakeOptions {
-                        check_bounds: false,
-                    }),
-                )
-            })
-            .collect::<ArrowResult<Vec<ArrayRef>>>()?,
-    );
-    sorted_batch.map(Some)
-}
-
-pin_project! {
-    struct SortStream {
-        #[pin]
-        output: futures::channel::oneshot::Receiver<ArrowResult<Option<RecordBatch>>>,
-        finished: bool,
-        schema: SchemaRef,
-    }
-}
-
-impl SortStream {
-    fn new(input: SendableRecordBatchStream, expr: Vec<PhysicalSortExpr>) -> Self {
-        let (tx, rx) = futures::channel::oneshot::channel();
-
-        let schema = input.schema();
-        tokio::spawn(async move {
-            let schema = input.schema();
-            let sorted_batch = common::collect(input)
-                .await
-                .map_err(DataFusionError::into_arrow_external_error)
-                .and_then(move |batches| sort_batches(&batches, &schema, &expr));
-
-            tx.send(sorted_batch)
-        });
-
-        Self {
-            output: rx,
-            finished: false,
-            schema,
-        }
-    }
-}
-
-impl Stream for SortStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        if self.finished {
-            return Poll::Ready(None);
-        }
-
-        // is the output ready?
-        let this = self.project();
-        let output_poll = this.output.poll(cx);
-
-        match output_poll {
-            Poll::Ready(result) => {
-                *this.finished = true;
-
-                // check for error in receiving channel and unwrap actual result
-                let result = match result {
-                    Err(e) => Some(Err(ArrowError::ExternalError(Box::new(e)))), // error receiving
-                    Ok(result) => result.transpose(),
-                };
-                Poll::Ready(result)
-            }
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for SortStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::merge::MergeExec;
-    use crate::physical_plan::{
-        collect,
-        csv::{CsvExec, CsvReadOptions},
-    };
-    use crate::test;
-    use arrow::array::*;
-    use arrow::datatypes::*;
-
-    #[tokio::test]
-    async fn test_sort() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let partitions = 4;
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let sort_exec = Arc::new(SortExec::try_new(
-            vec![
-                // c1 string column
-                PhysicalSortExpr {
-                    expr: col("c1"),
-                    options: SortOptions::default(),
-                },
-                // c2 uin32 column
-                PhysicalSortExpr {
-                    expr: col("c2"),
-                    options: SortOptions::default(),
-                },
-                // c7 uin8 column
-                PhysicalSortExpr {
-                    expr: col("c7"),
-                    options: SortOptions::default(),
-                },
-            ],
-            Arc::new(MergeExec::new(Arc::new(csv))),
-        )?);
-
-        let result: Vec<RecordBatch> = collect(sort_exec).await?;
-        assert_eq!(result.len(), 1);
-
-        let columns = result[0].columns();
-
-        let c1 = as_string_array(&columns[0]);
-        assert_eq!(c1.value(0), "a");
-        assert_eq!(c1.value(c1.len() - 1), "e");
-
-        let c2 = as_primitive_array::<UInt32Type>(&columns[1]);
-        assert_eq!(c2.value(0), 1);
-        assert_eq!(c2.value(c2.len() - 1), 5,);
-
-        let c7 = as_primitive_array::<UInt8Type>(&columns[6]);
-        assert_eq!(c7.value(0), 15);
-        assert_eq!(c7.value(c7.len() - 1), 254,);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_lex_sort_by_float() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Float32, true),
-            Field::new("b", DataType::Float64, true),
-        ]));
-
-        // define data.
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Float32Array::from(vec![
-                    Some(f32::NAN),
-                    None,
-                    None,
-                    Some(f32::NAN),
-                    Some(1.0_f32),
-                    Some(1.0_f32),
-                    Some(2.0_f32),
-                    Some(3.0_f32),
-                ])),
-                Arc::new(Float64Array::from(vec![
-                    Some(200.0_f64),
-                    Some(20.0_f64),
-                    Some(10.0_f64),
-                    Some(100.0_f64),
-                    Some(f64::NAN),
-                    None,
-                    None,
-                    Some(f64::NAN),
-                ])),
-            ],
-        )?;
-
-        let sort_exec = Arc::new(SortExec::try_new(
-            vec![
-                PhysicalSortExpr {
-                    expr: col("a"),
-                    options: SortOptions {
-                        descending: true,
-                        nulls_first: true,
-                    },
-                },
-                PhysicalSortExpr {
-                    expr: col("b"),
-                    options: SortOptions {
-                        descending: false,
-                        nulls_first: false,
-                    },
-                },
-            ],
-            Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?),
-        )?);
-
-        assert_eq!(DataType::Float32, *sort_exec.schema().field(0).data_type());
-        assert_eq!(DataType::Float64, *sort_exec.schema().field(1).data_type());
-
-        let result: Vec<RecordBatch> = collect(sort_exec).await?;
-        assert_eq!(result.len(), 1);
-
-        let columns = result[0].columns();
-
-        assert_eq!(DataType::Float32, *columns[0].data_type());
-        assert_eq!(DataType::Float64, *columns[1].data_type());
-
-        let a = as_primitive_array::<Float32Type>(&columns[0]);
-        let b = as_primitive_array::<Float64Type>(&columns[1]);
-
-        // convert result to strings to allow comparing to expected result containing NaN
-        let result: Vec<(Option<String>, Option<String>)> = (0..result[0].num_rows())
-            .map(|i| {
-                let aval = if a.is_valid(i) {
-                    Some(a.value(i).to_string())
-                } else {
-                    None
-                };
-                let bval = if b.is_valid(i) {
-                    Some(b.value(i).to_string())
-                } else {
-                    None
-                };
-                (aval, bval)
-            })
-            .collect();
-
-        let expected: Vec<(Option<String>, Option<String>)> = vec![
-            (None, Some("10".to_owned())),
-            (None, Some("20".to_owned())),
-            (Some("NaN".to_owned()), Some("100".to_owned())),
-            (Some("NaN".to_owned()), Some("200".to_owned())),
-            (Some("3".to_owned()), Some("NaN".to_owned())),
-            (Some("2".to_owned()), None),
-            (Some("1".to_owned()), Some("NaN".to_owned())),
-            (Some("1".to_owned()), None),
-        ];
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/string_expressions.rs b/rust/datafusion/src/physical_plan/string_expressions.rs
deleted file mode 100644
index 882fe30502f..00000000000
--- a/rust/datafusion/src/physical_plan/string_expressions.rs
+++ /dev/null
@@ -1,595 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! String expressions
-
-use std::any::type_name;
-use std::sync::Arc;
-
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::ScalarValue,
-};
-use arrow::{
-    array::{
-        Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, Int64Array,
-        PrimitiveArray, StringArray, StringOffsetSizeTrait,
-    },
-    datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
-};
-
-use super::ColumnarValue;
-
-macro_rules! downcast_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<GenericStringArray<T>>()
-                ))
-            })?
-    }};
-}
-
-macro_rules! downcast_primitive_array_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<PrimitiveArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<PrimitiveArray<T>>()
-                ))
-            })?
-    }};
-}
-
-macro_rules! downcast_arg {
-    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
-        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
-            DataFusionError::Internal(format!(
-                "could not cast {} to {}",
-                $NAME,
-                type_name::<$ARRAY_TYPE>()
-            ))
-        })?
-    }};
-}
-
-macro_rules! downcast_vec {
-    ($ARGS:expr, $ARRAY_TYPE:ident) => {{
-        $ARGS
-            .iter()
-            .map(|e| match e.as_any().downcast_ref::<$ARRAY_TYPE>() {
-                Some(array) => Ok(array),
-                _ => Err(DataFusionError::Internal("failed to downcast".to_string())),
-            })
-    }};
-}
-
-/// applies a unary expression to `args[0]` that is expected to be downcastable to
-/// a `GenericStringArray` and returns a `GenericStringArray` (which may have a different offset)
-/// # Errors
-/// This function errors when:
-/// * the number of arguments is not 1
-/// * the first argument is not castable to a `GenericStringArray`
-pub(crate) fn unary_string_function<'a, T, O, F, R>(
-    args: &[&'a dyn Array],
-    op: F,
-    name: &str,
-) -> Result<GenericStringArray<O>>
-where
-    R: AsRef<str>,
-    O: StringOffsetSizeTrait,
-    T: StringOffsetSizeTrait,
-    F: Fn(&'a str) -> R,
-{
-    if args.len() != 1 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but {} takes exactly one argument",
-            args.len(),
-            name,
-        )));
-    }
-
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    // first map is the iterator, second is for the `Option<_>`
-    Ok(string_array
-        .iter()
-        .map(|string| string.map(|string| op(string)))
-        .collect())
-}
-
-fn handle<'a, F, R>(args: &'a [ColumnarValue], op: F, name: &str) -> Result<ColumnarValue>
-where
-    R: AsRef<str>,
-    F: Fn(&'a str) -> R,
-{
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
-                    i32,
-                    i32,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            DataType::LargeUtf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
-                    i64,
-                    i64,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
-                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
-                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-    }
-}
-
-/// Returns the numeric code of the first character of the argument.
-/// ascii('x') = 120
-pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                let mut chars = string.chars();
-                chars.next().map_or(0, |v| v as i32)
-            })
-        })
-        .collect::<Int32Array>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Removes the longest string containing only characters in characters (a space by default) from the start and end of string.
-/// btrim('xyxtrimyyx', 'xyz') = 'trim'
-pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        1 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-
-            let result = string_array
-                .iter()
-                .map(|string| {
-                    string.map(|string: &str| {
-                        string.trim_start_matches(' ').trim_end_matches(' ')
-                    })
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let characters_array = downcast_string_arg!(args[1], "characters", T);
-
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .map(|(string, characters)| match (string, characters) {
-                    (None, _) => None,
-                    (_, None) => None,
-                    (Some(string), Some(characters)) => {
-                        let chars: Vec<char> = characters.chars().collect();
-                        Some(
-                            string
-                                .trim_start_matches(&chars[..])
-                                .trim_end_matches(&chars[..]),
-                        )
-                    }
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "btrim was called with {} arguments. It requires at least 1 and at most 2.",
-            other
-        ))),
-    }
-}
-
-/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
-/// chr(65) = 'A'
-pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let integer_array = downcast_arg!(args[0], "integer", Int64Array);
-
-    // first map is the iterator, second is for the `Option<_>`
-    let result = integer_array
-        .iter()
-        .map(|integer: Option<i64>| {
-            integer
-                .map(|integer| {
-                    if integer == 0 {
-                        Err(DataFusionError::Execution(
-                            "null character not permitted.".to_string(),
-                        ))
-                    } else {
-                        match core::char::from_u32(integer as u32) {
-                            Some(integer) => Ok(integer.to_string()),
-                            None => Err(DataFusionError::Execution(
-                                "requested character too large for encoding.".to_string(),
-                            )),
-                        }
-                    }
-                })
-                .transpose()
-        })
-        .collect::<Result<StringArray>>()?;
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
-/// concat('abcde', 2, NULL, 22) = 'abcde222'
-pub fn concat(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    // do not accept 0 arguments.
-    if args.is_empty() {
-        return Err(DataFusionError::Internal(format!(
-            "concat was called with {} arguments. It requires at least 1.",
-            args.len()
-        )));
-    }
-
-    // first, decide whether to return a scalar or a vector.
-    let mut return_array = args.iter().filter_map(|x| match x {
-        ColumnarValue::Array(array) => Some(array.len()),
-        _ => None,
-    });
-    if let Some(size) = return_array.next() {
-        let result = (0..size)
-            .map(|index| {
-                let mut owned_string: String = "".to_owned();
-                for arg in args {
-                    match arg {
-                        ColumnarValue::Scalar(ScalarValue::Utf8(maybe_value)) => {
-                            if let Some(value) = maybe_value {
-                                owned_string.push_str(value);
-                            }
-                        }
-                        ColumnarValue::Array(v) => {
-                            if v.is_valid(index) {
-                                let v = v.as_any().downcast_ref::<StringArray>().unwrap();
-                                owned_string.push_str(&v.value(index));
-                            }
-                        }
-                        _ => unreachable!(),
-                    }
-                }
-                Some(owned_string)
-            })
-            .collect::<StringArray>();
-
-        Ok(ColumnarValue::Array(Arc::new(result)))
-    } else {
-        // short avenue with only scalars
-        let initial = Some("".to_string());
-        let result = args.iter().fold(initial, |mut acc, rhs| {
-            if let Some(ref mut inner) = acc {
-                match rhs {
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) => {
-                        inner.push_str(v);
-                    }
-                    ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {}
-                    _ => unreachable!(""),
-                };
-            };
-            acc
-        });
-        Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
-    }
-}
-
-/// Concatenates all but the first argument, with separators. The first argument is used as the separator string, and should not be NULL. Other NULL arguments are ignored.
-/// concat_ws(',', 'abcde', 2, NULL, 22) = 'abcde,2,22'
-pub fn concat_ws(args: &[ArrayRef]) -> Result<ArrayRef> {
-    // downcast all arguments to strings
-    let args = downcast_vec!(args, StringArray).collect::<Result<Vec<&StringArray>>>()?;
-
-    // do not accept 0 or 1 arguments.
-    if args.len() < 2 {
-        return Err(DataFusionError::Internal(format!(
-            "concat_ws was called with {} arguments. It requires at least 2.",
-            args.len()
-        )));
-    }
-
-    // first map is the iterator, second is for the `Option<_>`
-    let result = args[0]
-        .iter()
-        .enumerate()
-        .map(|(index, x)| {
-            x.map(|sep: &str| {
-                let mut owned_string: String = "".to_owned();
-                for arg_index in 1..args.len() {
-                    let arg = &args[arg_index];
-                    if !arg.is_null(index) {
-                        owned_string.push_str(&arg.value(index));
-                        // if not last push separator
-                        if arg_index != args.len() - 1 {
-                            owned_string.push_str(&sep);
-                        }
-                    }
-                }
-                owned_string
-            })
-        })
-        .collect::<StringArray>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
-/// initcap('hi THOMAS') = 'Hi Thomas'
-pub fn initcap<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    // first map is the iterator, second is for the `Option<_>`
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                let mut char_vector = Vec::<char>::new();
-                let mut previous_character_letter_or_number = false;
-                for c in string.chars() {
-                    if previous_character_letter_or_number {
-                        char_vector.push(c.to_ascii_lowercase());
-                    } else {
-                        char_vector.push(c.to_ascii_uppercase());
-                    }
-                    previous_character_letter_or_number = ('A'..='Z').contains(&c)
-                        || ('a'..='z').contains(&c)
-                        || ('0'..='9').contains(&c);
-                }
-                char_vector.iter().collect::<String>()
-            })
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the string to all lower case.
-/// lower('TOM') = 'tom'
-pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, |string| string.to_ascii_lowercase(), "lower")
-}
-
-/// Removes the longest string containing only characters in characters (a space by default) from the start of string.
-/// ltrim('zzzytest', 'xyz') = 'test'
-pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        1 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-
-            let result = string_array
-                .iter()
-                .map(|string| string.map(|string: &str| string.trim_start_matches(' ')))
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let characters_array = downcast_string_arg!(args[1], "characters", T);
-
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .map(|(string, characters)| match (string, characters) {
-                    (Some(string), Some(characters)) => {
-                        let chars: Vec<char> = characters.chars().collect();
-                        Some(string.trim_start_matches(&chars[..]))
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "ltrim was called with {} arguments. It requires at least 1 and at most 2.",
-            other
-        ))),
-    }
-}
-
-/// Repeats string the specified number of times.
-/// repeat('Pg', 4) = 'PgPgPgPg'
-pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let number_array = downcast_arg!(args[1], "number", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(number_array.iter())
-        .map(|(string, number)| match (string, number) {
-            (Some(string), Some(number)) => Some(string.repeat(number as usize)),
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Replaces all occurrences in string of substring from with substring to.
-/// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
-pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let from_array = downcast_string_arg!(args[1], "from", T);
-    let to_array = downcast_string_arg!(args[2], "to", T);
-
-    let result = string_array
-        .iter()
-        .zip(from_array.iter())
-        .zip(to_array.iter())
-        .map(|((string, from), to)| match (string, from, to) {
-            (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)),
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Removes the longest string containing only characters in characters (a space by default) from the end of string.
-/// rtrim('testxxzx', 'xyz') = 'test'
-pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        1 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-
-            let result = string_array
-                .iter()
-                .map(|string| string.map(|string: &str| string.trim_end_matches(' ')))
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let characters_array = downcast_string_arg!(args[1], "characters", T);
-
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .map(|(string, characters)| match (string, characters) {
-                    (Some(string), Some(characters)) => {
-                        let chars: Vec<char> = characters.chars().collect();
-                        Some(string.trim_end_matches(&chars[..]))
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "rtrim was called with {} arguments. It requires at least 1 and at most 2.",
-            other
-        ))),
-    }
-}
-
-/// Splits string at occurrences of delimiter and returns the n'th field (counting from one).
-/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let delimiter_array = downcast_string_arg!(args[1], "delimiter", T);
-    let n_array = downcast_arg!(args[2], "n", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(delimiter_array.iter())
-        .zip(n_array.iter())
-        .map(|((string, delimiter), n)| match (string, delimiter, n) {
-            (Some(string), Some(delimiter), Some(n)) => {
-                if n <= 0 {
-                    Err(DataFusionError::Execution(
-                        "field position must be greater than zero".to_string(),
-                    ))
-                } else {
-                    let split_string: Vec<&str> = string.split(delimiter).collect();
-                    match split_string.get(n as usize - 1) {
-                        Some(s) => Ok(Some(*s)),
-                        None => Ok(Some("")),
-                    }
-                }
-            }
-            _ => Ok(None),
-        })
-        .collect::<Result<GenericStringArray<T>>>()?;
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Returns true if string starts with prefix.
-/// starts_with('alphabet', 'alph') = 't'
-pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let prefix_array = downcast_string_arg!(args[1], "prefix", T);
-
-    let result = string_array
-        .iter()
-        .zip(prefix_array.iter())
-        .map(|(string, prefix)| match (string, prefix) {
-            (Some(string), Some(prefix)) => Some(string.starts_with(prefix)),
-            _ => None,
-        })
-        .collect::<BooleanArray>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the number to its equivalent hexadecimal representation.
-/// to_hex(2147483647) = '7fffffff'
-pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let integer_array = downcast_primitive_array_arg!(args[0], "integer", T);
-
-    let result = integer_array
-        .iter()
-        .map(|integer| {
-            integer.map(|integer| format!("{:x}", integer.to_usize().unwrap()))
-        })
-        .collect::<GenericStringArray<i32>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the string to all upper case.
-/// upper('tom') = 'TOM'
-pub fn upper(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, |string| string.to_ascii_uppercase(), "upper")
-}
diff --git a/rust/datafusion/src/physical_plan/type_coercion.rs b/rust/datafusion/src/physical_plan/type_coercion.rs
deleted file mode 100644
index 24b51ba6069..00000000000
--- a/rust/datafusion/src/physical_plan/type_coercion.rs
+++ /dev/null
@@ -1,361 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Type coercion rules for functions with multiple valid signatures
-//!
-//! Coercion is performed automatically by DataFusion when the types
-//! of arguments passed to a function do not exacty match the types
-//! required by that function. In this case, DataFusion will attempt to
-//! *coerce* the arguments to types accepted by the function by
-//! inserting CAST operations.
-//!
-//! CAST operations added by coercion are lossless and never discard
-//! information. For example coercion from i32 -> i64 might be
-//! performed because all valid i32 values can be represented using an
-//! i64. However, i64 -> i32 is never performed as there are i64
-//! values which can not be represented by i32 values.
-
-use std::{sync::Arc, vec};
-
-use arrow::datatypes::{DataType, Schema, TimeUnit};
-
-use super::{functions::Signature, PhysicalExpr};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::expressions::try_cast;
-
-/// Returns `expressions` coerced to types compatible with
-/// `signature`, if possible.
-///
-/// See the module level documentation for more detail on coercion.
-pub fn coerce(
-    expressions: &[Arc<dyn PhysicalExpr>],
-    schema: &Schema,
-    signature: &Signature,
-) -> Result<Vec<Arc<dyn PhysicalExpr>>> {
-    let current_types = expressions
-        .iter()
-        .map(|e| e.data_type(schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    let new_types = data_types(&current_types, signature)?;
-
-    expressions
-        .iter()
-        .enumerate()
-        .map(|(i, expr)| try_cast(expr.clone(), &schema, new_types[i].clone()))
-        .collect::<Result<Vec<_>>>()
-}
-
-/// Returns the data types that each argument must be coerced to match
-/// `signature`.
-///
-/// See the module level documentation for more detail on coercion.
-pub fn data_types(
-    current_types: &[DataType],
-    signature: &Signature,
-) -> Result<Vec<DataType>> {
-    let valid_types = get_valid_types(signature, current_types)?;
-
-    if valid_types
-        .iter()
-        .any(|data_type| data_type == current_types)
-    {
-        return Ok(current_types.to_vec());
-    }
-
-    for valid_types in valid_types {
-        if let Some(types) = maybe_data_types(&valid_types, &current_types) {
-            return Ok(types);
-        }
-    }
-
-    // none possible -> Error
-    Err(DataFusionError::Plan(format!(
-        "Coercion from {:?} to the signature {:?} failed.",
-        current_types, signature
-    )))
-}
-
-fn get_valid_types(
-    signature: &Signature,
-    current_types: &[DataType],
-) -> Result<Vec<Vec<DataType>>> {
-    let valid_types = match signature {
-        Signature::Variadic(valid_types) => valid_types
-            .iter()
-            .map(|valid_type| current_types.iter().map(|_| valid_type.clone()).collect())
-            .collect(),
-        Signature::Uniform(number, valid_types) => valid_types
-            .iter()
-            .map(|valid_type| (0..*number).map(|_| valid_type.clone()).collect())
-            .collect(),
-        Signature::VariadicEqual => {
-            // one entry with the same len as current_types, whose type is `current_types[0]`.
-            vec![current_types
-                .iter()
-                .map(|_| current_types[0].clone())
-                .collect()]
-        }
-        Signature::Exact(valid_types) => vec![valid_types.clone()],
-        Signature::Any(number) => {
-            if current_types.len() != *number {
-                return Err(DataFusionError::Plan(format!(
-                    "The function expected {} arguments but received {}",
-                    number,
-                    current_types.len()
-                )));
-            }
-            vec![(0..*number).map(|i| current_types[i].clone()).collect()]
-        }
-        Signature::OneOf(types) => {
-            let mut r = vec![];
-            for s in types {
-                r.extend(get_valid_types(s, current_types)?);
-            }
-            r
-        }
-    };
-
-    Ok(valid_types)
-}
-
-/// Try to coerce current_types into valid_types.
-fn maybe_data_types(
-    valid_types: &[DataType],
-    current_types: &[DataType],
-) -> Option<Vec<DataType>> {
-    if valid_types.len() != current_types.len() {
-        return None;
-    }
-
-    let mut new_type = Vec::with_capacity(valid_types.len());
-    for (i, valid_type) in valid_types.iter().enumerate() {
-        let current_type = &current_types[i];
-
-        if current_type == valid_type {
-            new_type.push(current_type.clone())
-        } else {
-            // attempt to coerce
-            if can_coerce_from(valid_type, &current_type) {
-                new_type.push(valid_type.clone())
-            } else {
-                // not possible
-                return None;
-            }
-        }
-    }
-    Some(new_type)
-}
-
-/// Return true if a value of type `type_from` can be coerced
-/// (losslessly converted) into a value of `type_to`
-///
-/// See the module level documentation for more detail on coercion.
-pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
-    use self::DataType::*;
-    match type_into {
-        Int8 => matches!(type_from, Int8),
-        Int16 => matches!(type_from, Int8 | Int16 | UInt8),
-        Int32 => matches!(type_from, Int8 | Int16 | Int32 | UInt8 | UInt16),
-        Int64 => matches!(
-            type_from,
-            Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
-        ),
-        UInt8 => matches!(type_from, UInt8),
-        UInt16 => matches!(type_from, UInt8 | UInt16),
-        UInt32 => matches!(type_from, UInt8 | UInt16 | UInt32),
-        UInt64 => matches!(type_from, UInt8 | UInt16 | UInt32 | UInt64),
-        Float32 => matches!(
-            type_from,
-            Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 | Float32
-        ),
-        Float64 => matches!(
-            type_from,
-            Int8 | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-                | Float64
-        ),
-        Timestamp(TimeUnit::Nanosecond, None) => matches!(type_from, Timestamp(_, None)),
-        Utf8 => true,
-        _ => false,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use arrow::datatypes::{DataType, Field, Schema};
-
-    #[test]
-    fn test_maybe_data_types() {
-        // this vec contains: arg1, arg2, expected result
-        let cases = vec![
-            // 2 entries, same values
-            (
-                vec![DataType::UInt8, DataType::UInt16],
-                vec![DataType::UInt8, DataType::UInt16],
-                Some(vec![DataType::UInt8, DataType::UInt16]),
-            ),
-            // 2 entries, can coerse values
-            (
-                vec![DataType::UInt16, DataType::UInt16],
-                vec![DataType::UInt8, DataType::UInt16],
-                Some(vec![DataType::UInt16, DataType::UInt16]),
-            ),
-            // 0 entries, all good
-            (vec![], vec![], Some(vec![])),
-            // 2 entries, can't coerce
-            (
-                vec![DataType::Boolean, DataType::UInt16],
-                vec![DataType::UInt8, DataType::UInt16],
-                None,
-            ),
-            // u32 -> u16 is possible
-            (
-                vec![DataType::Boolean, DataType::UInt32],
-                vec![DataType::Boolean, DataType::UInt16],
-                Some(vec![DataType::Boolean, DataType::UInt32]),
-            ),
-        ];
-
-        for case in cases {
-            assert_eq!(maybe_data_types(&case.0, &case.1), case.2)
-        }
-    }
-
-    #[test]
-    fn test_coerce() -> Result<()> {
-        // create a schema
-        let schema = |t: Vec<DataType>| {
-            Schema::new(
-                t.iter()
-                    .enumerate()
-                    .map(|(i, t)| Field::new(&*format!("c{}", i), t.clone(), true))
-                    .collect(),
-            )
-        };
-
-        // create a vector of expressions
-        let expressions = |t: Vec<DataType>, schema| -> Result<Vec<_>> {
-            t.iter()
-                .enumerate()
-                .map(|(i, t)| try_cast(col(&format!("c{}", i)), &schema, t.clone()))
-                .collect::<Result<Vec<_>>>()
-        };
-
-        // create a case: input + expected result
-        let case =
-            |observed: Vec<DataType>, valid, expected: Vec<DataType>| -> Result<_> {
-                let schema = schema(observed.clone());
-                let expr = expressions(observed, schema.clone())?;
-                let expected = expressions(expected, schema.clone())?;
-                Ok((expr.clone(), schema, valid, expected))
-            };
-
-        let cases = vec![
-            // u16 -> u32
-            case(
-                vec![DataType::UInt16],
-                Signature::Uniform(1, vec![DataType::UInt32]),
-                vec![DataType::UInt32],
-            )?,
-            // same type
-            case(
-                vec![DataType::UInt32, DataType::UInt32],
-                Signature::Uniform(2, vec![DataType::UInt32]),
-                vec![DataType::UInt32, DataType::UInt32],
-            )?,
-            case(
-                vec![DataType::UInt32],
-                Signature::Uniform(1, vec![DataType::Float32, DataType::Float64]),
-                vec![DataType::Float32],
-            )?,
-            // u32 -> f32
-            case(
-                vec![DataType::UInt32, DataType::UInt32],
-                Signature::Variadic(vec![DataType::Float32]),
-                vec![DataType::Float32, DataType::Float32],
-            )?,
-            // u32 -> f32
-            case(
-                vec![DataType::Float32, DataType::UInt32],
-                Signature::VariadicEqual,
-                vec![DataType::Float32, DataType::Float32],
-            )?,
-            // common type is u64
-            case(
-                vec![DataType::UInt32, DataType::UInt64],
-                Signature::Variadic(vec![DataType::UInt32, DataType::UInt64]),
-                vec![DataType::UInt64, DataType::UInt64],
-            )?,
-            // f32 -> f32
-            case(
-                vec![DataType::Float32],
-                Signature::Any(1),
-                vec![DataType::Float32],
-            )?,
-        ];
-
-        for case in cases {
-            let observed = format!("{:?}", coerce(&case.0, &case.1, &case.2)?);
-            let expected = format!("{:?}", case.3);
-            assert_eq!(observed, expected);
-        }
-
-        // now cases that are expected to fail
-        let cases = vec![
-            // we do not know how to cast bool to UInt16 => fail
-            case(
-                vec![DataType::Boolean],
-                Signature::Uniform(1, vec![DataType::UInt16]),
-                vec![],
-            )?,
-            // u32 and bool are not uniform
-            case(
-                vec![DataType::UInt32, DataType::Boolean],
-                Signature::VariadicEqual,
-                vec![],
-            )?,
-            // bool is not castable to u32
-            case(
-                vec![DataType::Boolean, DataType::Boolean],
-                Signature::Variadic(vec![DataType::UInt32]),
-                vec![],
-            )?,
-            // expected two arguments
-            case(vec![DataType::UInt32], Signature::Any(2), vec![])?,
-        ];
-
-        for case in cases {
-            if coerce(&case.0, &case.1, &case.2).is_ok() {
-                return Err(DataFusionError::Plan(format!(
-                    "Error was expected in {:?}",
-                    case
-                )));
-            }
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/udaf.rs b/rust/datafusion/src/physical_plan/udaf.rs
deleted file mode 100644
index 3dc6aa402f5..00000000000
--- a/rust/datafusion/src/physical_plan/udaf.rs
+++ /dev/null
@@ -1,168 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains functions and structs supporting user-defined aggregate functions.
-
-use fmt::{Debug, Formatter};
-use std::any::Any;
-use std::fmt;
-
-use arrow::{
-    datatypes::Field,
-    datatypes::{DataType, Schema},
-};
-
-use crate::physical_plan::PhysicalExpr;
-use crate::{error::Result, logical_plan::Expr};
-
-use super::{
-    aggregates::AccumulatorFunctionImplementation,
-    aggregates::StateTypeFunction,
-    expressions::format_state_name,
-    functions::{ReturnTypeFunction, Signature},
-    type_coercion::coerce,
-    Accumulator, AggregateExpr,
-};
-use std::sync::Arc;
-
-/// Logical representation of a user-defined aggregate function (UDAF)
-/// A UDAF is different from a UDF in that it is stateful across batches.
-#[derive(Clone)]
-pub struct AggregateUDF {
-    /// name
-    pub name: String,
-    /// signature
-    pub signature: Signature,
-    /// Return type
-    pub return_type: ReturnTypeFunction,
-    /// actual implementation
-    pub accumulator: AccumulatorFunctionImplementation,
-    /// the accumulator's state's description as a function of the return type
-    pub state_type: StateTypeFunction,
-}
-
-impl Debug for AggregateUDF {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        f.debug_struct("AggregateUDF")
-            .field("name", &self.name)
-            .field("signature", &self.signature)
-            .field("fun", &"<FUNC>")
-            .finish()
-    }
-}
-
-impl PartialEq for AggregateUDF {
-    fn eq(&self, other: &Self) -> bool {
-        self.name == other.name && self.signature == other.signature
-    }
-}
-
-impl AggregateUDF {
-    /// Create a new AggregateUDF
-    pub fn new(
-        name: &str,
-        signature: &Signature,
-        return_type: &ReturnTypeFunction,
-        accumulator: &AccumulatorFunctionImplementation,
-        state_type: &StateTypeFunction,
-    ) -> Self {
-        Self {
-            name: name.to_owned(),
-            signature: signature.clone(),
-            return_type: return_type.clone(),
-            accumulator: accumulator.clone(),
-            state_type: state_type.clone(),
-        }
-    }
-
-    /// creates a logical expression with a call of the UDAF
-    /// This utility allows using the UDAF without requiring access to the registry.
-    pub fn call(&self, args: Vec<Expr>) -> Expr {
-        Expr::AggregateUDF {
-            fun: Arc::new(self.clone()),
-            args,
-        }
-    }
-}
-
-/// Creates a physical expression of the UDAF, that includes all necessary type coercion.
-/// This function errors when `args`' can't be coerced to a valid argument type of the UDAF.
-pub fn create_aggregate_expr(
-    fun: &AggregateUDF,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-    name: String,
-) -> Result<Arc<dyn AggregateExpr>> {
-    // coerce
-    let args = coerce(args, input_schema, &fun.signature)?;
-
-    let arg_types = args
-        .iter()
-        .map(|arg| arg.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    Ok(Arc::new(AggregateFunctionExpr {
-        fun: fun.clone(),
-        args: args.clone(),
-        data_type: (fun.return_type)(&arg_types)?.as_ref().clone(),
-        name,
-    }))
-}
-
-/// Physical aggregate expression of a UDAF.
-#[derive(Debug)]
-pub struct AggregateFunctionExpr {
-    fun: AggregateUDF,
-    args: Vec<Arc<dyn PhysicalExpr>>,
-    data_type: DataType,
-    name: String,
-}
-
-impl AggregateExpr for AggregateFunctionExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.args.clone()
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        let fields = (self.fun.state_type)(&self.data_type)?
-            .iter()
-            .enumerate()
-            .map(|(i, data_type)| {
-                Field::new(
-                    &format_state_name(&self.name, &format!("{}", i)),
-                    data_type.clone(),
-                    true,
-                )
-            })
-            .collect::<Vec<Field>>();
-
-        Ok(fields)
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, self.data_type.clone(), true))
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        (self.fun.accumulator)()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/udf.rs b/rust/datafusion/src/physical_plan/udf.rs
deleted file mode 100644
index 9189da47bd6..00000000000
--- a/rust/datafusion/src/physical_plan/udf.rs
+++ /dev/null
@@ -1,112 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! UDF support
-
-use fmt::{Debug, Formatter};
-use std::fmt;
-
-use arrow::datatypes::Schema;
-
-use crate::error::Result;
-use crate::{logical_plan::Expr, physical_plan::PhysicalExpr};
-
-use super::{
-    functions::{
-        ReturnTypeFunction, ScalarFunctionExpr, ScalarFunctionImplementation, Signature,
-    },
-    type_coercion::coerce,
-};
-use std::sync::Arc;
-
-/// Logical representation of a UDF.
-#[derive(Clone)]
-pub struct ScalarUDF {
-    /// name
-    pub name: String,
-    /// signature
-    pub signature: Signature,
-    /// Return type
-    pub return_type: ReturnTypeFunction,
-    /// actual implementation
-    pub fun: ScalarFunctionImplementation,
-}
-
-impl Debug for ScalarUDF {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ScalarUDF")
-            .field("name", &self.name)
-            .field("signature", &self.signature)
-            .field("fun", &"<FUNC>")
-            .finish()
-    }
-}
-
-impl PartialEq for ScalarUDF {
-    fn eq(&self, other: &Self) -> bool {
-        self.name == other.name && self.signature == other.signature
-    }
-}
-
-impl ScalarUDF {
-    /// Create a new ScalarUDF
-    pub fn new(
-        name: &str,
-        signature: &Signature,
-        return_type: &ReturnTypeFunction,
-        fun: &ScalarFunctionImplementation,
-    ) -> Self {
-        Self {
-            name: name.to_owned(),
-            signature: signature.clone(),
-            return_type: return_type.clone(),
-            fun: fun.clone(),
-        }
-    }
-
-    /// creates a logical expression with a call of the UDF
-    /// This utility allows using the UDF without requiring access to the registry.
-    pub fn call(&self, args: Vec<Expr>) -> Expr {
-        Expr::ScalarUDF {
-            fun: Arc::new(self.clone()),
-            args,
-        }
-    }
-}
-
-/// Create a physical expression of the UDF.
-/// This function errors when `args`' can't be coerced to a valid argument type of the UDF.
-pub fn create_physical_expr(
-    fun: &ScalarUDF,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    // coerce
-    let args = coerce(args, input_schema, &fun.signature)?;
-
-    let arg_types = args
-        .iter()
-        .map(|e| e.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    Ok(Arc::new(ScalarFunctionExpr::new(
-        &fun.name,
-        fun.fun.clone(),
-        args,
-        (fun.return_type)(&arg_types)?.as_ref(),
-    )))
-}
diff --git a/rust/datafusion/src/physical_plan/unicode_expressions.rs b/rust/datafusion/src/physical_plan/unicode_expressions.rs
deleted file mode 100644
index 787ea7ea267..00000000000
--- a/rust/datafusion/src/physical_plan/unicode_expressions.rs
+++ /dev/null
@@ -1,532 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! Unicode expressions
-
-use std::any::type_name;
-use std::cmp::Ordering;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use arrow::{
-    array::{
-        ArrayRef, GenericStringArray, Int64Array, PrimitiveArray, StringOffsetSizeTrait,
-    },
-    datatypes::{ArrowNativeType, ArrowPrimitiveType},
-};
-use hashbrown::HashMap;
-use unicode_segmentation::UnicodeSegmentation;
-
-macro_rules! downcast_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<GenericStringArray<T>>()
-                ))
-            })?
-    }};
-}
-
-macro_rules! downcast_arg {
-    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
-        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
-            DataFusionError::Internal(format!(
-                "could not cast {} to {}",
-                $NAME,
-                type_name::<$ARRAY_TYPE>()
-            ))
-        })?
-    }};
-}
-
-/// Returns number of characters in the string.
-/// character_length('josé') = 4
-pub fn character_length<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let string_array: &GenericStringArray<T::Native> = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("could not cast string to StringArray".to_string())
-        })?;
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                T::Native::from_usize(string.graphemes(true).count()).expect(
-                    "should not fail as graphemes.count will always return integer",
-                )
-            })
-        })
-        .collect::<PrimitiveArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
-/// left('abcde', 2) = 'ab'
-pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let n_array = downcast_arg!(args[1], "n", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(n_array.iter())
-        .map(|(string, n)| match (string, n) {
-            (Some(string), Some(n)) => match n.cmp(&0) {
-                Ordering::Less => {
-                    let graphemes = string.graphemes(true);
-                    let len = graphemes.clone().count() as i64;
-                    match n.abs().cmp(&len) {
-                        Ordering::Less => {
-                            Some(graphemes.take((len + n) as usize).collect::<String>())
-                        }
-                        Ordering::Equal => Some("".to_string()),
-                        Ordering::Greater => Some("".to_string()),
-                    }
-                }
-                Ordering::Equal => Some("".to_string()),
-                Ordering::Greater => {
-                    Some(string.graphemes(true).take(n as usize).collect::<String>())
-                }
-            },
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Extends the string to length 'length' by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right).
-/// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
-                        let length = length as usize;
-                        if length == 0 {
-                            Some("".to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            if length < graphemes.len() {
-                                Some(graphemes[..length].concat())
-                            } else {
-                                let mut s = string.to_string();
-                                s.insert_str(
-                                    0,
-                                    " ".repeat(length - graphemes.len()).as_str(),
-                                );
-                                Some(s)
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-            let fill_array = downcast_string_arg!(args[2], "fill", T);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .zip(fill_array.iter())
-                .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
-                        let length = length as usize;
-
-                        if length == 0 {
-                            Some("".to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            let fill_chars = fill.chars().collect::<Vec<char>>();
-
-                            if length < graphemes.len() {
-                                Some(graphemes[..length].concat())
-                            } else if fill_chars.is_empty() {
-                                Some(string.to_string())
-                            } else {
-                                let mut s = string.to_string();
-                                let mut char_vector =
-                                    Vec::<char>::with_capacity(length - graphemes.len());
-                                for l in 0..length - graphemes.len() {
-                                    char_vector.push(
-                                        *fill_chars.get(l % fill_chars.len()).unwrap(),
-                                    );
-                                }
-                                s.insert_str(
-                                    0,
-                                    char_vector.iter().collect::<String>().as_str(),
-                                );
-                                Some(s)
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "lpad was called with {} arguments. It requires at least 2 and at most 3.",
-            other
-        ))),
-    }
-}
-
-/// Reverses the order of the characters in the string.
-/// reverse('abcde') = 'edcba'
-pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| string.graphemes(true).rev().collect::<String>())
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
-/// right('abcde', 2) = 'de'
-pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let n_array = downcast_arg!(args[1], "n", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(n_array.iter())
-        .map(|(string, n)| match (string, n) {
-            (Some(string), Some(n)) => match n.cmp(&0) {
-                Ordering::Less => {
-                    let graphemes = string.graphemes(true).rev();
-                    let len = graphemes.clone().count() as i64;
-                    match n.abs().cmp(&len) {
-                        Ordering::Less => Some(
-                            graphemes
-                                .take((len + n) as usize)
-                                .collect::<Vec<&str>>()
-                                .iter()
-                                .rev()
-                                .copied()
-                                .collect::<String>(),
-                        ),
-                        Ordering::Equal => Some("".to_string()),
-                        Ordering::Greater => Some("".to_string()),
-                    }
-                }
-                Ordering::Equal => Some("".to_string()),
-                Ordering::Greater => Some(
-                    string
-                        .graphemes(true)
-                        .rev()
-                        .take(n as usize)
-                        .collect::<Vec<&str>>()
-                        .iter()
-                        .rev()
-                        .copied()
-                        .collect::<String>(),
-                ),
-            },
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
-/// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
-                        let length = length as usize;
-                        if length == 0 {
-                            Some("".to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            if length < graphemes.len() {
-                                Some(graphemes[..length].concat())
-                            } else {
-                                let mut s = string.to_string();
-                                s.push_str(" ".repeat(length - graphemes.len()).as_str());
-                                Some(s)
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-            let fill_array = downcast_string_arg!(args[2], "fill", T);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .zip(fill_array.iter())
-                .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
-                        let length = length as usize;
-                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                        let fill_chars = fill.chars().collect::<Vec<char>>();
-
-                        if length < graphemes.len() {
-                            Some(graphemes[..length].concat())
-                        } else if fill_chars.is_empty() {
-                            Some(string.to_string())
-                        } else {
-                            let mut s = string.to_string();
-                            let mut char_vector =
-                                Vec::<char>::with_capacity(length - graphemes.len());
-                            for l in 0..length - graphemes.len() {
-                                char_vector
-                                    .push(*fill_chars.get(l % fill_chars.len()).unwrap());
-                            }
-                            s.push_str(char_vector.iter().collect::<String>().as_str());
-                            Some(s)
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "rpad was called with {} arguments. It requires at least 2 and at most 3.",
-            other
-        ))),
-    }
-}
-
-/// Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)
-/// strpos('high', 'ig') = 2
-pub fn strpos<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let string_array: &GenericStringArray<T::Native> = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("could not cast string to StringArray".to_string())
-        })?;
-
-    let substring_array: &GenericStringArray<T::Native> = args[1]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal(
-                "could not cast substring to StringArray".to_string(),
-            )
-        })?;
-
-    let result = string_array
-        .iter()
-        .zip(substring_array.iter())
-        .map(|(string, substring)| match (string, substring) {
-            (Some(string), Some(substring)) => {
-                // the rfind method returns the byte index of the substring which may or may not be the same as the character index due to UTF8 encoding
-                // this method first finds the matching byte using rfind
-                // then maps that to the character index by matching on the grapheme_index of the byte_index
-                Some(
-                    T::Native::from_usize(string.to_string().rfind(substring).map_or(
-                        0,
-                        |byte_offset| {
-                            string
-                                .grapheme_indices(true)
-                                .collect::<Vec<(usize, &str)>>()
-                                .iter()
-                                .enumerate()
-                                .filter(|(_, (offset, _))| *offset == byte_offset)
-                                .map(|(index, _)| index)
-                                .collect::<Vec<usize>>()
-                                .first()
-                                .expect("should not fail as grapheme_indices and byte offsets are tightly coupled")
-                                .to_owned()
-                                + 1
-                        },
-                    ))
-                    .expect("should not fail due to map_or default value")
-                )
-            }
-            _ => None,
-        })
-        .collect::<PrimitiveArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).)
-/// substr('alphabet', 3) = 'phabet'
-/// substr('alphabet', 3, 2) = 'ph'
-pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let start_array = downcast_arg!(args[1], "start", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(start_array.iter())
-                .map(|(string, start)| match (string, start) {
-                    (Some(string), Some(start)) => {
-                        if start <= 0 {
-                            Some(string.to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            let start_pos = start as usize - 1;
-                            if graphemes.len() < start_pos {
-                                Some("".to_string())
-                            } else {
-                                Some(graphemes[start_pos..].concat())
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let start_array = downcast_arg!(args[1], "start", Int64Array);
-            let count_array = downcast_arg!(args[2], "count", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(start_array.iter())
-                .zip(count_array.iter())
-                .map(|((string, start), count)| match (string, start, count) {
-                    (Some(string), Some(start), Some(count)) => {
-                        if count < 0 {
-                            Err(DataFusionError::Execution(
-                                "negative substring length not allowed".to_string(),
-                            ))
-                        } else if start <= 0 {
-                            Ok(Some(string.to_string()))
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            let start_pos = start as usize - 1;
-                            let count_usize = count as usize;
-                            if graphemes.len() < start_pos {
-                                Ok(Some("".to_string()))
-                            } else if graphemes.len() < start_pos + count_usize {
-                                Ok(Some(graphemes[start_pos..].concat()))
-                            } else {
-                                Ok(Some(
-                                    graphemes[start_pos..start_pos + count_usize]
-                                        .concat(),
-                                ))
-                            }
-                        }
-                    }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "substr was called with {} arguments. It requires 2 or 3.",
-            other
-        ))),
-    }
-}
-
-/// Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.
-/// translate('12345', '143', 'ax') = 'a2x5'
-pub fn translate<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let from_array = downcast_string_arg!(args[1], "from", T);
-    let to_array = downcast_string_arg!(args[2], "to", T);
-
-    let result = string_array
-        .iter()
-        .zip(from_array.iter())
-        .zip(to_array.iter())
-        .map(|((string, from), to)| match (string, from, to) {
-            (Some(string), Some(from), Some(to)) => {
-                // create a hashmap of [char, index] to change from O(n) to O(1) for from list
-                let from_map: HashMap<&str, usize> = from
-                    .graphemes(true)
-                    .collect::<Vec<&str>>()
-                    .iter()
-                    .enumerate()
-                    .map(|(index, c)| (c.to_owned(), index))
-                    .collect();
-
-                let to = to.graphemes(true).collect::<Vec<&str>>();
-
-                Some(
-                    string
-                        .graphemes(true)
-                        .collect::<Vec<&str>>()
-                        .iter()
-                        .flat_map(|c| match from_map.get(*c) {
-                            Some(n) => to.get(*n).copied(),
-                            None => Some(*c),
-                        })
-                        .collect::<Vec<&str>>()
-                        .concat(),
-                )
-            }
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
diff --git a/rust/datafusion/src/physical_plan/union.rs b/rust/datafusion/src/physical_plan/union.rs
deleted file mode 100644
index cbab728a842..00000000000
--- a/rust/datafusion/src/physical_plan/union.rs
+++ /dev/null
@@ -1,143 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! The Union operator combines multiple inputs with the same schema
-
-use std::{any::Any, sync::Arc};
-
-use arrow::datatypes::SchemaRef;
-
-use super::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
-use crate::error::Result;
-use async_trait::async_trait;
-
-/// UNION ALL execution plan
-#[derive(Debug)]
-pub struct UnionExec {
-    /// Input execution plan
-    inputs: Vec<Arc<dyn ExecutionPlan>>,
-}
-
-impl UnionExec {
-    /// Create a new UnionExec
-    pub fn new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Self {
-        UnionExec { inputs }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for UnionExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.inputs[0].schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        self.inputs.clone()
-    }
-
-    /// Output of the union is the combination of all output partitions of the inputs
-    fn output_partitioning(&self) -> Partitioning {
-        // Sums all the output partitions
-        let num_partitions = self
-            .inputs
-            .iter()
-            .map(|plan| plan.output_partitioning().partition_count())
-            .sum();
-        // TODO: this loses partitioning info in case of same partitioning scheme (for example `Partitioning::Hash`)
-        // https://issues.apache.org/jira/browse/ARROW-11991
-        Partitioning::UnknownPartitioning(num_partitions)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(UnionExec::new(children)))
-    }
-
-    async fn execute(&self, mut partition: usize) -> Result<SendableRecordBatchStream> {
-        // find partition to execute
-        for input in self.inputs.iter() {
-            // Calculate whether partition belongs to the current partition
-            if partition < input.output_partitioning().partition_count() {
-                return input.execute(partition).await;
-            } else {
-                partition -= input.output_partitioning().partition_count();
-            }
-        }
-
-        Err(crate::error::DataFusionError::Execution(format!(
-            "Partition {} not found in Union",
-            partition
-        )))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::{
-        collect,
-        csv::{CsvExec, CsvReadOptions},
-    };
-    use crate::test;
-    use arrow::record_batch::RecordBatch;
-
-    #[tokio::test]
-    async fn test_union_partitions() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        // Create csv's with different partitioning
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", 4)?;
-        let path2 = test::create_partitioned_csv("aggregate_test_100.csv", 5)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let csv2 = CsvExec::try_new(
-            &path2,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let union_exec = Arc::new(UnionExec::new(vec![Arc::new(csv), Arc::new(csv2)]));
-
-        // Should have 9 partitions and 9 output batches
-        assert_eq!(union_exec.output_partitioning().partition_count(), 9);
-
-        let result: Vec<RecordBatch> = collect(union_exec).await?;
-        assert_eq!(result.len(), 9);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/prelude.rs b/rust/datafusion/src/prelude.rs
deleted file mode 100644
index 0edc82a98af..00000000000
--- a/rust/datafusion/src/prelude.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.pub},
-
-//! A "prelude" for users of the datafusion crate.
-//!
-//! Like the standard library's prelude, this module simplifies importing of
-//! common items. Unlike the standard prelude, the contents of this module must
-//! be imported manually:
-//!
-//! ```
-//! use datafusion::prelude::*;
-//! ```
-
-pub use crate::dataframe::DataFrame;
-pub use crate::execution::context::{ExecutionConfig, ExecutionContext};
-pub use crate::logical_plan::{
-    array, ascii, avg, bit_length, btrim, character_length, chr, col, concat, concat_ws,
-    count, create_udf, in_list, initcap, left, length, lit, lower, lpad, ltrim, max, md5,
-    min, octet_length, regexp_replace, repeat, replace, reverse, right, rpad, rtrim,
-    sha224, sha256, sha384, sha512, split_part, starts_with, strpos, substr, sum, to_hex,
-    translate, trim, upper, JoinType, Partitioning,
-};
-pub use crate::physical_plan::csv::CsvReadOptions;
diff --git a/rust/datafusion/src/scalar.rs b/rust/datafusion/src/scalar.rs
deleted file mode 100644
index b2367758493..00000000000
--- a/rust/datafusion/src/scalar.rs
+++ /dev/null
@@ -1,743 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides ScalarValue, an enum that can be used for storage of single elements
-
-use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
-
-use arrow::array::{
-    ArrayRef, Int16Builder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
-};
-use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
-use arrow::{
-    array::*,
-    datatypes::{ArrowNativeType, Float32Type, TimestampNanosecondType},
-};
-
-use crate::error::{DataFusionError, Result};
-
-/// Represents a dynamically typed, nullable single value.
-/// This is the single-valued counter-part of arrow’s `Array`.
-#[derive(Clone, PartialEq)]
-pub enum ScalarValue {
-    /// true or false value
-    Boolean(Option<bool>),
-    /// 32bit float
-    Float32(Option<f32>),
-    /// 64bit float
-    Float64(Option<f64>),
-    /// signed 8bit int
-    Int8(Option<i8>),
-    /// signed 16bit int
-    Int16(Option<i16>),
-    /// signed 32bit int
-    Int32(Option<i32>),
-    /// signed 64bit int
-    Int64(Option<i64>),
-    /// unsigned 8bit int
-    UInt8(Option<u8>),
-    /// unsigned 16bit int
-    UInt16(Option<u16>),
-    /// unsigned 32bit int
-    UInt32(Option<u32>),
-    /// unsigned 64bit int
-    UInt64(Option<u64>),
-    /// utf-8 encoded string.
-    Utf8(Option<String>),
-    /// utf-8 encoded string representing a LargeString's arrow type.
-    LargeUtf8(Option<String>),
-    /// binary
-    Binary(Option<Vec<u8>>),
-    /// large binary
-    LargeBinary(Option<Vec<u8>>),
-    /// list of nested ScalarValue
-    List(Option<Vec<ScalarValue>>, DataType),
-    /// Date stored as a signed 32bit int
-    Date32(Option<i32>),
-    /// Date stored as a signed 64bit int
-    Date64(Option<i64>),
-    /// Timestamp Milliseconds
-    TimeMillisecond(Option<i64>),
-    /// Timestamp Microseconds
-    TimeMicrosecond(Option<i64>),
-    /// Timestamp Nanoseconds
-    TimeNanosecond(Option<i64>),
-    /// Interval with YearMonth unit
-    IntervalYearMonth(Option<i32>),
-    /// Interval with DayTime unit
-    IntervalDayTime(Option<i64>),
-}
-
-macro_rules! typed_cast {
-    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
-        let array = $array.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        ScalarValue::$SCALAR(match array.is_null($index) {
-            true => None,
-            false => Some(array.value($index).into()),
-        })
-    }};
-}
-
-macro_rules! build_list {
-    ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr) => {{
-        match $VALUES {
-            // the return on the macro is necessary, to short-circuit and return ArrayRef
-            None => {
-                return new_null_array(
-                    &DataType::List(Box::new(Field::new(
-                        "item",
-                        DataType::$SCALAR_TY,
-                        true,
-                    ))),
-                    $SIZE,
-                )
-            }
-            Some(values) => {
-                let mut builder = ListBuilder::new($VALUE_BUILDER_TY::new(values.len()));
-
-                for _ in 0..$SIZE {
-                    for scalar_value in values {
-                        match scalar_value {
-                            ScalarValue::$SCALAR_TY(Some(v)) => {
-                                builder.values().append_value(v.clone()).unwrap()
-                            }
-                            ScalarValue::$SCALAR_TY(None) => {
-                                builder.values().append_null().unwrap();
-                            }
-                            _ => panic!("Incompatible ScalarValue for list"),
-                        };
-                    }
-                    builder.append(true).unwrap();
-                }
-
-                builder.finish()
-            }
-        }
-    }};
-}
-
-impl ScalarValue {
-    /// Getter for the `DataType` of the value
-    pub fn get_datatype(&self) -> DataType {
-        match self {
-            ScalarValue::Boolean(_) => DataType::Boolean,
-            ScalarValue::UInt8(_) => DataType::UInt8,
-            ScalarValue::UInt16(_) => DataType::UInt16,
-            ScalarValue::UInt32(_) => DataType::UInt32,
-            ScalarValue::UInt64(_) => DataType::UInt64,
-            ScalarValue::Int8(_) => DataType::Int8,
-            ScalarValue::Int16(_) => DataType::Int16,
-            ScalarValue::Int32(_) => DataType::Int32,
-            ScalarValue::Int64(_) => DataType::Int64,
-            ScalarValue::TimeMicrosecond(_) => {
-                DataType::Timestamp(TimeUnit::Microsecond, None)
-            }
-            ScalarValue::TimeNanosecond(_) => {
-                DataType::Timestamp(TimeUnit::Nanosecond, None)
-            }
-            ScalarValue::TimeMillisecond(_) => {
-                DataType::Timestamp(TimeUnit::Millisecond, None)
-            }
-            ScalarValue::Float32(_) => DataType::Float32,
-            ScalarValue::Float64(_) => DataType::Float64,
-            ScalarValue::Utf8(_) => DataType::Utf8,
-            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
-            ScalarValue::Binary(_) => DataType::Binary,
-            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
-            ScalarValue::List(_, data_type) => {
-                DataType::List(Box::new(Field::new("item", data_type.clone(), true)))
-            }
-            ScalarValue::Date32(_) => DataType::Date32,
-            ScalarValue::Date64(_) => DataType::Date64,
-            ScalarValue::IntervalYearMonth(_) => {
-                DataType::Interval(IntervalUnit::YearMonth)
-            }
-            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
-        }
-    }
-
-    /// Calculate arithmetic negation for a scalar value
-    pub fn arithmetic_negate(&self) -> Self {
-        match self {
-            ScalarValue::Boolean(None)
-            | ScalarValue::Int8(None)
-            | ScalarValue::Int16(None)
-            | ScalarValue::Int32(None)
-            | ScalarValue::Int64(None)
-            | ScalarValue::Float32(None) => self.clone(),
-            ScalarValue::Float64(Some(v)) => ScalarValue::Float64(Some(-v)),
-            ScalarValue::Float32(Some(v)) => ScalarValue::Float32(Some(-v)),
-            ScalarValue::Int8(Some(v)) => ScalarValue::Int8(Some(-v)),
-            ScalarValue::Int16(Some(v)) => ScalarValue::Int16(Some(-v)),
-            ScalarValue::Int32(Some(v)) => ScalarValue::Int32(Some(-v)),
-            ScalarValue::Int64(Some(v)) => ScalarValue::Int64(Some(-v)),
-            _ => panic!("Cannot run arithmetic negate on scalar value: {:?}", self),
-        }
-    }
-
-    /// whether this value is null or not.
-    pub fn is_null(&self) -> bool {
-        matches!(
-            *self,
-            ScalarValue::Boolean(None)
-                | ScalarValue::UInt8(None)
-                | ScalarValue::UInt16(None)
-                | ScalarValue::UInt32(None)
-                | ScalarValue::UInt64(None)
-                | ScalarValue::Int8(None)
-                | ScalarValue::Int16(None)
-                | ScalarValue::Int32(None)
-                | ScalarValue::Int64(None)
-                | ScalarValue::Float32(None)
-                | ScalarValue::Float64(None)
-                | ScalarValue::Utf8(None)
-                | ScalarValue::LargeUtf8(None)
-                | ScalarValue::List(None, _)
-                | ScalarValue::TimeMillisecond(None)
-                | ScalarValue::TimeMicrosecond(None)
-                | ScalarValue::TimeNanosecond(None)
-        )
-    }
-
-    /// Converts a scalar value into an 1-row array.
-    pub fn to_array(&self) -> ArrayRef {
-        self.to_array_of_size(1)
-    }
-
-    /// Converts a scalar value into an array of `size` rows.
-    pub fn to_array_of_size(&self, size: usize) -> ArrayRef {
-        match self {
-            ScalarValue::Boolean(e) => {
-                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
-            }
-            ScalarValue::Float64(e) => match e {
-                Some(value) => Arc::new(Float64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Float64, size),
-            },
-            ScalarValue::Float32(e) => match e {
-                Some(value) => Arc::new(Float32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Float32, size),
-            },
-            ScalarValue::Int8(e) => match e {
-                Some(value) => Arc::new(Int8Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int8, size),
-            },
-            ScalarValue::Int16(e) => match e {
-                Some(value) => Arc::new(Int16Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int16, size),
-            },
-            ScalarValue::Int32(e) => match e {
-                Some(value) => Arc::new(Int32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int32, size),
-            },
-            ScalarValue::Int64(e) => match e {
-                Some(value) => Arc::new(Int64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int64, size),
-            },
-            ScalarValue::UInt8(e) => match e {
-                Some(value) => Arc::new(UInt8Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt8, size),
-            },
-            ScalarValue::UInt16(e) => match e {
-                Some(value) => Arc::new(UInt16Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt16, size),
-            },
-            ScalarValue::UInt32(e) => match e {
-                Some(value) => Arc::new(UInt32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt32, size),
-            },
-            ScalarValue::UInt64(e) => match e {
-                Some(value) => Arc::new(UInt64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt64, size),
-            },
-            ScalarValue::TimeMillisecond(e) => match e {
-                Some(value) => Arc::new(TimestampMillisecondArray::from_iter_values(
-                    repeat(*value).take(size),
-                )),
-                None => new_null_array(
-                    &DataType::Timestamp(TimeUnit::Millisecond, None),
-                    size,
-                ),
-            },
-            ScalarValue::TimeMicrosecond(e) => match e {
-                Some(value) => {
-                    Arc::new(TimestampMicrosecondArray::from_value(*value, size))
-                }
-                None => new_null_array(
-                    &DataType::Timestamp(TimeUnit::Microsecond, None),
-                    size,
-                ),
-            },
-            ScalarValue::TimeNanosecond(e) => match e {
-                Some(value) => {
-                    Arc::new(TimestampNanosecondArray::from_value(*value, size))
-                }
-                None => {
-                    new_null_array(&DataType::Timestamp(TimeUnit::Nanosecond, None), size)
-                }
-            },
-            ScalarValue::Utf8(e) => match e {
-                Some(value) => {
-                    Arc::new(StringArray::from_iter_values(repeat(value).take(size)))
-                }
-                None => new_null_array(&DataType::Utf8, size),
-            },
-            ScalarValue::LargeUtf8(e) => match e {
-                Some(value) => {
-                    Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size)))
-                }
-                None => new_null_array(&DataType::LargeUtf8, size),
-            },
-            ScalarValue::Binary(e) => match e {
-                Some(value) => Arc::new(
-                    repeat(Some(value.as_slice()))
-                        .take(size)
-                        .collect::<BinaryArray>(),
-                ),
-                None => {
-                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryArray>())
-                }
-            },
-            ScalarValue::LargeBinary(e) => match e {
-                Some(value) => Arc::new(
-                    repeat(Some(value.as_slice()))
-                        .take(size)
-                        .collect::<LargeBinaryArray>(),
-                ),
-                None => Arc::new(
-                    repeat(None::<&str>)
-                        .take(size)
-                        .collect::<LargeBinaryArray>(),
-                ),
-            },
-            ScalarValue::List(values, data_type) => Arc::new(match data_type {
-                DataType::Int8 => build_list!(Int8Builder, Int8, values, size),
-                DataType::Int16 => build_list!(Int16Builder, Int16, values, size),
-                DataType::Int32 => build_list!(Int32Builder, Int32, values, size),
-                DataType::Int64 => build_list!(Int64Builder, Int64, values, size),
-                DataType::UInt8 => build_list!(UInt8Builder, UInt8, values, size),
-                DataType::UInt16 => build_list!(UInt16Builder, UInt16, values, size),
-                DataType::UInt32 => build_list!(UInt32Builder, UInt32, values, size),
-                DataType::UInt64 => build_list!(UInt64Builder, UInt64, values, size),
-                DataType::Utf8 => build_list!(StringBuilder, Utf8, values, size),
-                DataType::LargeUtf8 => {
-                    build_list!(LargeStringBuilder, LargeUtf8, values, size)
-                }
-                _ => panic!("Unexpected DataType for list"),
-            }),
-            ScalarValue::Date32(e) => match e {
-                Some(value) => Arc::new(Date32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Date32, size),
-            },
-            ScalarValue::Date64(e) => match e {
-                Some(value) => Arc::new(Date64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Date64, size),
-            },
-            ScalarValue::IntervalDayTime(e) => match e {
-                Some(value) => Arc::new(IntervalDayTimeArray::from_value(*value, size)),
-                None => new_null_array(&DataType::Interval(IntervalUnit::DayTime), size),
-            },
-            ScalarValue::IntervalYearMonth(e) => match e {
-                Some(value) => Arc::new(IntervalYearMonthArray::from_value(*value, size)),
-                None => {
-                    new_null_array(&DataType::Interval(IntervalUnit::YearMonth), size)
-                }
-            },
-        }
-    }
-
-    /// Converts a value in `array` at `index` into a ScalarValue
-    pub fn try_from_array(array: &ArrayRef, index: usize) -> Result<Self> {
-        Ok(match array.data_type() {
-            DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean),
-            DataType::Float64 => typed_cast!(array, index, Float64Array, Float64),
-            DataType::Float32 => typed_cast!(array, index, Float32Array, Float32),
-            DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64),
-            DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32),
-            DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16),
-            DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8),
-            DataType::Int64 => typed_cast!(array, index, Int64Array, Int64),
-            DataType::Int32 => typed_cast!(array, index, Int32Array, Int32),
-            DataType::Int16 => typed_cast!(array, index, Int16Array, Int16),
-            DataType::Int8 => typed_cast!(array, index, Int8Array, Int8),
-            DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8),
-            DataType::LargeUtf8 => typed_cast!(array, index, LargeStringArray, LargeUtf8),
-            DataType::List(nested_type) => {
-                let list_array =
-                    array.as_any().downcast_ref::<ListArray>().ok_or_else(|| {
-                        DataFusionError::Internal(
-                            "Failed to downcast ListArray".to_string(),
-                        )
-                    })?;
-                let value = match list_array.is_null(index) {
-                    true => None,
-                    false => {
-                        let nested_array = list_array.value(index);
-                        let scalar_vec = (0..nested_array.len())
-                            .map(|i| ScalarValue::try_from_array(&nested_array, i))
-                            .collect::<Result<Vec<_>>>()?;
-                        Some(scalar_vec)
-                    }
-                };
-                ScalarValue::List(value, nested_type.data_type().clone())
-            }
-            DataType::Date32 => {
-                typed_cast!(array, index, Date32Array, Date32)
-            }
-            DataType::Date64 => {
-                typed_cast!(array, index, Date64Array, Date64)
-            }
-            other => {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Can't create a scalar of array of type \"{:?}\"",
-                    other
-                )))
-            }
-        })
-    }
-}
-
-impl From<f64> for ScalarValue {
-    fn from(value: f64) -> Self {
-        ScalarValue::Float64(Some(value))
-    }
-}
-
-impl From<f32> for ScalarValue {
-    fn from(value: f32) -> Self {
-        ScalarValue::Float32(Some(value))
-    }
-}
-
-impl From<i8> for ScalarValue {
-    fn from(value: i8) -> Self {
-        ScalarValue::Int8(Some(value))
-    }
-}
-
-impl From<i16> for ScalarValue {
-    fn from(value: i16) -> Self {
-        ScalarValue::Int16(Some(value))
-    }
-}
-
-impl From<i32> for ScalarValue {
-    fn from(value: i32) -> Self {
-        ScalarValue::Int32(Some(value))
-    }
-}
-
-impl From<i64> for ScalarValue {
-    fn from(value: i64) -> Self {
-        ScalarValue::Int64(Some(value))
-    }
-}
-
-impl From<bool> for ScalarValue {
-    fn from(value: bool) -> Self {
-        ScalarValue::Boolean(Some(value))
-    }
-}
-
-impl From<u8> for ScalarValue {
-    fn from(value: u8) -> Self {
-        ScalarValue::UInt8(Some(value))
-    }
-}
-
-impl From<u16> for ScalarValue {
-    fn from(value: u16) -> Self {
-        ScalarValue::UInt16(Some(value))
-    }
-}
-
-impl From<u32> for ScalarValue {
-    fn from(value: u32) -> Self {
-        ScalarValue::UInt32(Some(value))
-    }
-}
-
-impl From<u64> for ScalarValue {
-    fn from(value: u64) -> Self {
-        ScalarValue::UInt64(Some(value))
-    }
-}
-
-macro_rules! impl_try_from {
-    ($SCALAR:ident, $NATIVE:ident) => {
-        impl TryFrom<ScalarValue> for $NATIVE {
-            type Error = DataFusionError;
-
-            fn try_from(value: ScalarValue) -> Result<Self> {
-                match value {
-                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
-                    _ => Err(DataFusionError::Internal(format!(
-                        "Cannot convert {:?} to {}",
-                        value,
-                        std::any::type_name::<Self>()
-                    ))),
-                }
-            }
-        }
-    };
-}
-
-impl_try_from!(Int8, i8);
-impl_try_from!(Int16, i16);
-
-// special implementation for i32 because of Date32
-impl TryFrom<ScalarValue> for i32 {
-    type Error = DataFusionError;
-
-    fn try_from(value: ScalarValue) -> Result<Self> {
-        match value {
-            ScalarValue::Int32(Some(inner_value))
-            | ScalarValue::Date32(Some(inner_value)) => Ok(inner_value),
-            _ => Err(DataFusionError::Internal(format!(
-                "Cannot convert {:?} to {}",
-                value,
-                std::any::type_name::<Self>()
-            ))),
-        }
-    }
-}
-
-// special implementation for i64 because of TimeNanosecond
-impl TryFrom<ScalarValue> for i64 {
-    type Error = DataFusionError;
-
-    fn try_from(value: ScalarValue) -> Result<Self> {
-        match value {
-            ScalarValue::Int64(Some(inner_value))
-            | ScalarValue::TimeNanosecond(Some(inner_value)) => Ok(inner_value),
-            _ => Err(DataFusionError::Internal(format!(
-                "Cannot convert {:?} to {}",
-                value,
-                std::any::type_name::<Self>()
-            ))),
-        }
-    }
-}
-
-impl_try_from!(UInt8, u8);
-impl_try_from!(UInt16, u16);
-impl_try_from!(UInt32, u32);
-impl_try_from!(UInt64, u64);
-impl_try_from!(Float32, f32);
-impl_try_from!(Float64, f64);
-impl_try_from!(Boolean, bool);
-
-impl TryFrom<&DataType> for ScalarValue {
-    type Error = DataFusionError;
-
-    fn try_from(datatype: &DataType) -> Result<Self> {
-        Ok(match datatype {
-            DataType::Boolean => ScalarValue::Boolean(None),
-            DataType::Float64 => ScalarValue::Float64(None),
-            DataType::Float32 => ScalarValue::Float32(None),
-            DataType::Int8 => ScalarValue::Int8(None),
-            DataType::Int16 => ScalarValue::Int16(None),
-            DataType::Int32 => ScalarValue::Int32(None),
-            DataType::Int64 => ScalarValue::Int64(None),
-            DataType::UInt8 => ScalarValue::UInt8(None),
-            DataType::UInt16 => ScalarValue::UInt16(None),
-            DataType::UInt32 => ScalarValue::UInt32(None),
-            DataType::UInt64 => ScalarValue::UInt64(None),
-            DataType::Utf8 => ScalarValue::Utf8(None),
-            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
-            DataType::List(ref nested_type) => {
-                ScalarValue::List(None, nested_type.data_type().clone())
-            }
-            _ => {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Can't create a scalar of type \"{:?}\"",
-                    datatype
-                )))
-            }
-        })
-    }
-}
-
-macro_rules! format_option {
-    ($F:expr, $EXPR:expr) => {{
-        match $EXPR {
-            Some(e) => write!($F, "{}", e),
-            None => write!($F, "NULL"),
-        }
-    }};
-}
-
-impl fmt::Display for ScalarValue {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            ScalarValue::Boolean(e) => format_option!(f, e)?,
-            ScalarValue::Float32(e) => format_option!(f, e)?,
-            ScalarValue::Float64(e) => format_option!(f, e)?,
-            ScalarValue::Int8(e) => format_option!(f, e)?,
-            ScalarValue::Int16(e) => format_option!(f, e)?,
-            ScalarValue::Int32(e) => format_option!(f, e)?,
-            ScalarValue::Int64(e) => format_option!(f, e)?,
-            ScalarValue::UInt8(e) => format_option!(f, e)?,
-            ScalarValue::UInt16(e) => format_option!(f, e)?,
-            ScalarValue::UInt32(e) => format_option!(f, e)?,
-            ScalarValue::UInt64(e) => format_option!(f, e)?,
-            ScalarValue::TimeMillisecond(e) => format_option!(f, e)?,
-            ScalarValue::TimeMicrosecond(e) => format_option!(f, e)?,
-            ScalarValue::TimeNanosecond(e) => format_option!(f, e)?,
-            ScalarValue::Utf8(e) => format_option!(f, e)?,
-            ScalarValue::LargeUtf8(e) => format_option!(f, e)?,
-            ScalarValue::Binary(e) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{}", v))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::LargeBinary(e) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{}", v))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::List(e, _) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{}", v))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::Date32(e) => format_option!(f, e)?,
-            ScalarValue::Date64(e) => format_option!(f, e)?,
-            ScalarValue::IntervalDayTime(e) => format_option!(f, e)?,
-            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
-        };
-        Ok(())
-    }
-}
-
-impl fmt::Debug for ScalarValue {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            ScalarValue::Boolean(_) => write!(f, "Boolean({})", self),
-            ScalarValue::Float32(_) => write!(f, "Float32({})", self),
-            ScalarValue::Float64(_) => write!(f, "Float64({})", self),
-            ScalarValue::Int8(_) => write!(f, "Int8({})", self),
-            ScalarValue::Int16(_) => write!(f, "Int16({})", self),
-            ScalarValue::Int32(_) => write!(f, "Int32({})", self),
-            ScalarValue::Int64(_) => write!(f, "Int64({})", self),
-            ScalarValue::UInt8(_) => write!(f, "UInt8({})", self),
-            ScalarValue::UInt16(_) => write!(f, "UInt16({})", self),
-            ScalarValue::UInt32(_) => write!(f, "UInt32({})", self),
-            ScalarValue::UInt64(_) => write!(f, "UInt64({})", self),
-            ScalarValue::TimeMillisecond(_) => write!(f, "TimeMillisecond({})", self),
-            ScalarValue::TimeMicrosecond(_) => write!(f, "TimeMicrosecond({})", self),
-            ScalarValue::TimeNanosecond(_) => write!(f, "TimeNanosecond({})", self),
-            ScalarValue::Utf8(None) => write!(f, "Utf8({})", self),
-            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{}\")", self),
-            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({})", self),
-            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{}\")", self),
-            ScalarValue::Binary(None) => write!(f, "Binary({})", self),
-            ScalarValue::Binary(Some(_)) => write!(f, "Binary(\"{}\")", self),
-            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({})", self),
-            ScalarValue::LargeBinary(Some(_)) => write!(f, "LargeBinary(\"{}\")", self),
-            ScalarValue::List(_, _) => write!(f, "List([{}])", self),
-            ScalarValue::Date32(_) => write!(f, "Date32(\"{}\")", self),
-            ScalarValue::Date64(_) => write!(f, "Date64(\"{}\")", self),
-            ScalarValue::IntervalDayTime(_) => {
-                write!(f, "IntervalDayTime(\"{}\")", self)
-            }
-            ScalarValue::IntervalYearMonth(_) => {
-                write!(f, "IntervalYearMonth(\"{}\")", self)
-            }
-        }
-    }
-}
-
-/// Trait used to map a NativeTime to a ScalarType.
-pub trait ScalarType<T: ArrowNativeType> {
-    /// returns a scalar from an optional T
-    fn scalar(r: Option<T>) -> ScalarValue;
-}
-
-impl ScalarType<f32> for Float32Type {
-    fn scalar(r: Option<f32>) -> ScalarValue {
-        ScalarValue::Float32(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampNanosecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimeNanosecond(r)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn scalar_list_null_to_array() {
-        let list_array_ref = ScalarValue::List(None, DataType::UInt64).to_array();
-        let list_array = list_array_ref.as_any().downcast_ref::<ListArray>().unwrap();
-
-        assert!(list_array.is_null(0));
-        assert_eq!(list_array.len(), 1);
-        assert_eq!(list_array.values().len(), 0);
-    }
-
-    #[test]
-    fn scalar_list_to_array() {
-        let list_array_ref = ScalarValue::List(
-            Some(vec![
-                ScalarValue::UInt64(Some(100)),
-                ScalarValue::UInt64(None),
-                ScalarValue::UInt64(Some(101)),
-            ]),
-            DataType::UInt64,
-        )
-        .to_array();
-
-        let list_array = list_array_ref.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(list_array.len(), 1);
-        assert_eq!(list_array.values().len(), 3);
-
-        let prim_array_ref = list_array.value(0);
-        let prim_array = prim_array_ref
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .unwrap();
-        assert_eq!(prim_array.len(), 3);
-        assert_eq!(prim_array.value(0), 100);
-        assert!(prim_array.is_null(1));
-        assert_eq!(prim_array.value(2), 101);
-    }
-}
diff --git a/rust/datafusion/src/sql/mod.rs b/rust/datafusion/src/sql/mod.rs
deleted file mode 100644
index 456ad4c2e36..00000000000
--- a/rust/datafusion/src/sql/mod.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides a SQL parser that translates SQL queries into an abstract syntax
-//! tree (AST), and a SQL query planner that creates a logical plan from the AST.
-
-pub mod parser;
-pub mod planner;
-mod utils;
diff --git a/rust/datafusion/src/sql/parser.rs b/rust/datafusion/src/sql/parser.rs
deleted file mode 100644
index 3637e882d2f..00000000000
--- a/rust/datafusion/src/sql/parser.rs
+++ /dev/null
@@ -1,380 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! SQL Parser
-//!
-//! Declares a SQL parser based on sqlparser that handles custom formats that we need.
-
-use sqlparser::{
-    ast::{ColumnDef, ColumnOptionDef, Statement as SQLStatement, TableConstraint},
-    dialect::{keywords::Keyword, Dialect, GenericDialect},
-    parser::{Parser, ParserError},
-    tokenizer::{Token, Tokenizer},
-};
-
-// Use `Parser::expected` instead, if possible
-macro_rules! parser_err {
-    ($MSG:expr) => {
-        Err(ParserError::ParserError($MSG.to_string()))
-    };
-}
-
-/// Types of files to parse as DataFrames
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum FileType {
-    /// Newline-delimited JSON
-    NdJson,
-    /// Apache Parquet columnar storage
-    Parquet,
-    /// Comma separated values
-    CSV,
-}
-
-/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
-#[derive(Debug, Clone, PartialEq)]
-pub struct CreateExternalTable {
-    /// Table name
-    pub name: String,
-    /// Optional schema
-    pub columns: Vec<ColumnDef>,
-    /// File type (Parquet, NDJSON, CSV)
-    pub file_type: FileType,
-    /// CSV Header row?
-    pub has_header: bool,
-    /// Path to file
-    pub location: String,
-}
-
-/// DataFusion Statement representations.
-///
-/// Tokens parsed by `DFParser` are converted into these values.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Statement {
-    /// ANSI SQL AST node
-    Statement(SQLStatement),
-    /// Extension: `CREATE EXTERNAL TABLE`
-    CreateExternalTable(CreateExternalTable),
-}
-
-/// SQL Parser
-pub struct DFParser<'a> {
-    parser: Parser<'a>,
-}
-
-impl<'a> DFParser<'a> {
-    /// Parse the specified tokens
-    pub fn new(sql: &str) -> Result<Self, ParserError> {
-        let dialect = &GenericDialect {};
-        DFParser::new_with_dialect(sql, dialect)
-    }
-
-    /// Parse the specified tokens with dialect
-    pub fn new_with_dialect(
-        sql: &str,
-        dialect: &'a dyn Dialect,
-    ) -> Result<Self, ParserError> {
-        let mut tokenizer = Tokenizer::new(dialect, sql);
-        let tokens = tokenizer.tokenize()?;
-
-        Ok(DFParser {
-            parser: Parser::new(tokens, dialect),
-        })
-    }
-
-    /// Parse a SQL statement and produce a set of statements with dialect
-    pub fn parse_sql(sql: &str) -> Result<Vec<Statement>, ParserError> {
-        let dialect = &GenericDialect {};
-        DFParser::parse_sql_with_dialect(sql, dialect)
-    }
-
-    /// Parse a SQL statement and produce a set of statements
-    pub fn parse_sql_with_dialect(
-        sql: &str,
-        dialect: &dyn Dialect,
-    ) -> Result<Vec<Statement>, ParserError> {
-        let mut parser = DFParser::new_with_dialect(sql, dialect)?;
-        let mut stmts = Vec::new();
-        let mut expecting_statement_delimiter = false;
-        loop {
-            // ignore empty statements (between successive statement delimiters)
-            while parser.parser.consume_token(&Token::SemiColon) {
-                expecting_statement_delimiter = false;
-            }
-
-            if parser.parser.peek_token() == Token::EOF {
-                break;
-            }
-            if expecting_statement_delimiter {
-                return parser.expected("end of statement", parser.parser.peek_token());
-            }
-
-            let statement = parser.parse_statement()?;
-            stmts.push(statement);
-            expecting_statement_delimiter = true;
-        }
-        Ok(stmts)
-    }
-
-    /// Report unexpected token
-    fn expected<T>(&self, expected: &str, found: Token) -> Result<T, ParserError> {
-        parser_err!(format!("Expected {}, found: {}", expected, found))
-    }
-
-    /// Parse a new expression
-    pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
-        match self.parser.peek_token() {
-            Token::Word(w) => {
-                match w.keyword {
-                    Keyword::CREATE => {
-                        // move one token forward
-                        self.parser.next_token();
-                        // use custom parsing
-                        self.parse_create()
-                    }
-                    _ => {
-                        // use the native parser
-                        Ok(Statement::Statement(self.parser.parse_statement()?))
-                    }
-                }
-            }
-            _ => {
-                // use the native parser
-                Ok(Statement::Statement(self.parser.parse_statement()?))
-            }
-        }
-    }
-
-    /// Parse a SQL CREATE statement
-    pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
-        if self.parser.parse_keyword(Keyword::EXTERNAL) {
-            self.parse_create_external_table()
-        } else {
-            Ok(Statement::Statement(self.parser.parse_create()?))
-        }
-    }
-
-    // This is a copy of the equivalent implementation in sqlparser.
-    fn parse_columns(
-        &mut self,
-    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), ParserError> {
-        let mut columns = vec![];
-        let mut constraints = vec![];
-        if !self.parser.consume_token(&Token::LParen)
-            || self.parser.consume_token(&Token::RParen)
-        {
-            return Ok((columns, constraints));
-        }
-
-        loop {
-            if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
-                constraints.push(constraint);
-            } else if let Token::Word(_) = self.parser.peek_token() {
-                let column_def = self.parse_column_def()?;
-                columns.push(column_def);
-            } else {
-                return self.expected(
-                    "column name or constraint definition",
-                    self.parser.peek_token(),
-                );
-            }
-            let comma = self.parser.consume_token(&Token::Comma);
-            if self.parser.consume_token(&Token::RParen) {
-                // allow a trailing comma, even though it's not in standard
-                break;
-            } else if !comma {
-                return self.expected(
-                    "',' or ')' after column definition",
-                    self.parser.peek_token(),
-                );
-            }
-        }
-
-        Ok((columns, constraints))
-    }
-
-    fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
-        let name = self.parser.parse_identifier()?;
-        let data_type = self.parser.parse_data_type()?;
-        let collation = if self.parser.parse_keyword(Keyword::COLLATE) {
-            Some(self.parser.parse_object_name()?)
-        } else {
-            None
-        };
-        let mut options = vec![];
-        loop {
-            if self.parser.parse_keyword(Keyword::CONSTRAINT) {
-                let name = Some(self.parser.parse_identifier()?);
-                if let Some(option) = self.parser.parse_optional_column_option()? {
-                    options.push(ColumnOptionDef { name, option });
-                } else {
-                    return self.expected(
-                        "constraint details after CONSTRAINT <name>",
-                        self.parser.peek_token(),
-                    );
-                }
-            } else if let Some(option) = self.parser.parse_optional_column_option()? {
-                options.push(ColumnOptionDef { name: None, option });
-            } else {
-                break;
-            };
-        }
-        Ok(ColumnDef {
-            name,
-            data_type,
-            collation,
-            options,
-        })
-    }
-
-    fn parse_create_external_table(&mut self) -> Result<Statement, ParserError> {
-        self.parser.expect_keyword(Keyword::TABLE)?;
-        let table_name = self.parser.parse_object_name()?;
-        let (columns, _) = self.parse_columns()?;
-        self.parser
-            .expect_keywords(&[Keyword::STORED, Keyword::AS])?;
-
-        // THIS is the main difference: we parse a different file format.
-        let file_type = self.parse_file_format()?;
-
-        let has_header = self.parse_csv_has_header();
-
-        self.parser.expect_keyword(Keyword::LOCATION)?;
-        let location = self.parser.parse_literal_string()?;
-
-        let create = CreateExternalTable {
-            name: table_name.to_string(),
-            columns,
-            file_type,
-            has_header,
-            location,
-        };
-        Ok(Statement::CreateExternalTable(create))
-    }
-
-    /// Parses the set of valid formats
-    fn parse_file_format(&mut self) -> Result<FileType, ParserError> {
-        match self.parser.next_token() {
-            Token::Word(w) => match &*w.value {
-                "PARQUET" => Ok(FileType::Parquet),
-                "NDJSON" => Ok(FileType::NdJson),
-                "CSV" => Ok(FileType::CSV),
-                _ => self.expected("one of PARQUET, NDJSON, or CSV", Token::Word(w)),
-            },
-            unexpected => self.expected("one of PARQUET, NDJSON, or CSV", unexpected),
-        }
-    }
-
-    fn consume_token(&mut self, expected: &str) -> bool {
-        if self.parser.peek_token().to_string() == *expected {
-            self.parser.next_token();
-            true
-        } else {
-            false
-        }
-    }
-
-    fn parse_csv_has_header(&mut self) -> bool {
-        self.consume_token("WITH")
-            & self.consume_token("HEADER")
-            & self.consume_token("ROW")
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use sqlparser::ast::{DataType, Ident};
-
-    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> {
-        let statements = DFParser::parse_sql(sql)?;
-        assert_eq!(
-            statements.len(),
-            1,
-            "Expected to parse exactly one statement"
-        );
-        assert_eq!(statements[0], expected);
-        Ok(())
-    }
-
-    /// Parses sql and asserts that the expected error message was found
-    fn expect_parse_error(sql: &str, expected_error: &str) {
-        match DFParser::parse_sql(sql) {
-            Ok(statements) => {
-                panic!(
-                    "Expected parse error for '{}', but was successful: {:?}",
-                    sql, statements
-                );
-            }
-            Err(e) => {
-                let error_message = e.to_string();
-                assert!(
-                    error_message.contains(expected_error),
-                    "Expected error '{}' not found in actual error '{}'",
-                    expected_error,
-                    error_message
-                );
-            }
-        }
-    }
-
-    fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
-        ColumnDef {
-            name: Ident {
-                value: name.into(),
-                quote_style: None,
-            },
-            data_type,
-            collation: None,
-            options: vec![],
-        }
-    }
-
-    #[test]
-    fn create_external_table() -> Result<(), ParserError> {
-        // positive case
-        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
-        let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
-            columns: vec![make_column_def("c1", DataType::Int)],
-            file_type: FileType::CSV,
-            has_header: false,
-            location: "foo.csv".into(),
-        });
-        expect_parse_ok(sql, expected)?;
-
-        // positive case: it is ok for parquet files not to have columns specified
-        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
-        let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
-            columns: vec![],
-            file_type: FileType::Parquet,
-            has_header: false,
-            location: "foo.parquet".into(),
-        });
-        expect_parse_ok(sql, expected)?;
-
-        // Error cases: Invalid type
-        let sql =
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS UNKNOWN_TYPE LOCATION 'foo.csv'";
-        expect_parse_error(
-            sql,
-            "Expected one of PARQUET, NDJSON, or CSV, found: UNKNOWN_TYPE",
-        );
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/sql/planner.rs b/rust/datafusion/src/sql/planner.rs
deleted file mode 100644
index f3cba232a23..00000000000
--- a/rust/datafusion/src/sql/planner.rs
+++ /dev/null
@@ -1,2723 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! SQL Query Planner (produces logical plan from SQL AST)
-
-use std::convert::TryInto;
-use std::str::FromStr;
-use std::sync::Arc;
-
-use crate::catalog::TableReference;
-use crate::datasource::TableProvider;
-use crate::logical_plan::Expr::Alias;
-use crate::logical_plan::{
-    and, lit, DFSchema, Expr, LogicalPlan, LogicalPlanBuilder, Operator, PlanType,
-    StringifiedPlan, ToDFSchema,
-};
-use crate::scalar::ScalarValue;
-use crate::{
-    error::{DataFusionError, Result},
-    physical_plan::udaf::AggregateUDF,
-};
-use crate::{
-    physical_plan::udf::ScalarUDF,
-    physical_plan::{aggregates, functions},
-    sql::parser::{CreateExternalTable, FileType, Statement as DFStatement},
-};
-
-use arrow::datatypes::*;
-use hashbrown::HashMap;
-
-use crate::prelude::JoinType;
-use sqlparser::ast::{
-    BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg,
-    Ident, Join, JoinConstraint, JoinOperator, ObjectName, Query, Select, SelectItem,
-    SetExpr, SetOperator, ShowStatementFilter, TableFactor, TableWithJoins,
-    UnaryOperator, Value,
-};
-use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
-use sqlparser::ast::{OrderByExpr, Statement};
-use sqlparser::parser::ParserError::ParserError;
-
-use super::{
-    parser::DFParser,
-    utils::{
-        can_columns_satisfy_exprs, expand_wildcard, expr_as_column_expr, extract_aliases,
-        find_aggregate_exprs, find_column_exprs, rebase_expr, resolve_aliases_to_exprs,
-    },
-};
-
-/// The ContextProvider trait allows the query planner to obtain meta-data about tables and
-/// functions referenced in SQL statements
-pub trait ContextProvider {
-    /// Getter for a datasource
-    fn get_table_provider(&self, name: TableReference) -> Option<Arc<dyn TableProvider>>;
-    /// Getter for a UDF description
-    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
-    /// Getter for a UDAF description
-    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>;
-}
-
-/// SQL query planner
-pub struct SqlToRel<'a, S: ContextProvider> {
-    schema_provider: &'a S,
-}
-
-impl<'a, S: ContextProvider> SqlToRel<'a, S> {
-    /// Create a new query planner
-    pub fn new(schema_provider: &'a S) -> Self {
-        SqlToRel { schema_provider }
-    }
-
-    /// Generate a logical plan from an DataFusion SQL statement
-    pub fn statement_to_plan(&self, statement: &DFStatement) -> Result<LogicalPlan> {
-        match statement {
-            DFStatement::CreateExternalTable(s) => self.external_table_to_plan(&s),
-            DFStatement::Statement(s) => self.sql_statement_to_plan(&s),
-        }
-    }
-
-    /// Generate a logical plan from an SQL statement
-    pub fn sql_statement_to_plan(&self, sql: &Statement) -> Result<LogicalPlan> {
-        match sql {
-            Statement::Explain {
-                verbose,
-                statement,
-                analyze: _,
-            } => self.explain_statement_to_plan(*verbose, &statement),
-            Statement::Query(query) => self.query_to_plan(&query),
-            Statement::ShowVariable { variable } => self.show_variable_to_plan(&variable),
-            Statement::ShowColumns {
-                extended,
-                full,
-                table_name,
-                filter,
-            } => self.show_columns_to_plan(*extended, *full, table_name, filter.as_ref()),
-            _ => Err(DataFusionError::NotImplemented(
-                "Only SELECT statements are implemented".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a logic plan from an SQL query
-    pub fn query_to_plan(&self, query: &Query) -> Result<LogicalPlan> {
-        self.query_to_plan_with_alias(query, None, &mut HashMap::new())
-    }
-
-    /// Generate a logic plan from an SQL query with optional alias
-    pub fn query_to_plan_with_alias(
-        &self,
-        query: &Query,
-        alias: Option<String>,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let set_expr = &query.body;
-        if let Some(with) = &query.with {
-            // Process CTEs from top to bottom
-            // do not allow self-references
-            for cte in &with.cte_tables {
-                // create logical plan & pass backreferencing CTEs
-                let logical_plan = self.query_to_plan_with_alias(
-                    &cte.query,
-                    Some(cte.alias.name.value.clone()),
-                    &mut ctes.clone(),
-                )?;
-                ctes.insert(cte.alias.name.value.clone(), logical_plan);
-            }
-        }
-        let plan = self.set_expr_to_plan(set_expr, alias, ctes)?;
-
-        let plan = self.order_by(&plan, &query.order_by)?;
-
-        self.limit(&plan, &query.limit)
-    }
-
-    fn set_expr_to_plan(
-        &self,
-        set_expr: &SetExpr,
-        alias: Option<String>,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        match set_expr {
-            SetExpr::Select(s) => self.select_to_plan(s.as_ref(), ctes),
-            SetExpr::SetOperation {
-                op,
-                left,
-                right,
-                all,
-            } => match (op, all) {
-                (SetOperator::Union, true) => {
-                    let left_plan = self.set_expr_to_plan(left.as_ref(), None, ctes)?;
-                    let right_plan = self.set_expr_to_plan(right.as_ref(), None, ctes)?;
-                    let inputs = vec![left_plan, right_plan]
-                        .into_iter()
-                        .flat_map(|p| match p {
-                            LogicalPlan::Union { inputs, .. } => inputs,
-                            x => vec![x],
-                        })
-                        .collect::<Vec<_>>();
-                    if inputs.is_empty() {
-                        return Err(DataFusionError::Plan(format!(
-                            "Empty UNION: {}",
-                            set_expr
-                        )));
-                    }
-                    if !inputs.iter().all(|s| s.schema() == inputs[0].schema()) {
-                        return Err(DataFusionError::Plan(
-                            "UNION ALL schemas are expected to be the same".to_string(),
-                        ));
-                    }
-                    Ok(LogicalPlan::Union {
-                        schema: inputs[0].schema().clone(),
-                        inputs,
-                        alias,
-                    })
-                }
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Only UNION ALL is supported, found {}",
-                    op
-                ))),
-            },
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Query {} not implemented yet",
-                set_expr
-            ))),
-        }
-    }
-
-    /// Generate a logical plan from a CREATE EXTERNAL TABLE statement
-    pub fn external_table_to_plan(
-        &self,
-        statement: &CreateExternalTable,
-    ) -> Result<LogicalPlan> {
-        let CreateExternalTable {
-            name,
-            columns,
-            file_type,
-            has_header,
-            location,
-        } = statement;
-
-        // semantic checks
-        match *file_type {
-            FileType::CSV => {
-                if columns.is_empty() {
-                    return Err(DataFusionError::Plan(
-                        "Column definitions required for CSV files. None found".into(),
-                    ));
-                }
-            }
-            FileType::Parquet => {
-                if !columns.is_empty() {
-                    return Err(DataFusionError::Plan(
-                        "Column definitions can not be specified for PARQUET files."
-                            .into(),
-                    ));
-                }
-            }
-            FileType::NdJson => {}
-        };
-
-        let schema = self.build_schema(&columns)?;
-
-        Ok(LogicalPlan::CreateExternalTable {
-            schema: schema.to_dfschema_ref()?,
-            name: name.clone(),
-            location: location.clone(),
-            file_type: *file_type,
-            has_header: *has_header,
-        })
-    }
-
-    /// Generate a plan for EXPLAIN ... that will print out a plan
-    ///
-    pub fn explain_statement_to_plan(
-        &self,
-        verbose: bool,
-        statement: &Statement,
-    ) -> Result<LogicalPlan> {
-        let plan = self.sql_statement_to_plan(&statement)?;
-
-        let stringified_plans = vec![StringifiedPlan::new(
-            PlanType::LogicalPlan,
-            format!("{:#?}", plan),
-        )];
-
-        let schema = LogicalPlan::explain_schema();
-        let plan = Arc::new(plan);
-
-        Ok(LogicalPlan::Explain {
-            verbose,
-            plan,
-            stringified_plans,
-            schema: schema.to_dfschema_ref()?,
-        })
-    }
-
-    fn build_schema(&self, columns: &[SQLColumnDef]) -> Result<Schema> {
-        let mut fields = Vec::new();
-
-        for column in columns {
-            let data_type = self.make_data_type(&column.data_type)?;
-            let allow_null = column
-                .options
-                .iter()
-                .any(|x| x.option == ColumnOption::Null);
-            fields.push(Field::new(&column.name.value, data_type, allow_null));
-        }
-
-        Ok(Schema::new(fields))
-    }
-
-    /// Maps the SQL type to the corresponding Arrow `DataType`
-    fn make_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
-        match sql_type {
-            SQLDataType::BigInt => Ok(DataType::Int64),
-            SQLDataType::Int => Ok(DataType::Int32),
-            SQLDataType::SmallInt => Ok(DataType::Int16),
-            SQLDataType::Char(_) | SQLDataType::Varchar(_) | SQLDataType::Text => {
-                Ok(DataType::Utf8)
-            }
-            SQLDataType::Decimal(_, _) => Ok(DataType::Float64),
-            SQLDataType::Float(_) => Ok(DataType::Float32),
-            SQLDataType::Real | SQLDataType::Double => Ok(DataType::Float64),
-            SQLDataType::Boolean => Ok(DataType::Boolean),
-            SQLDataType::Date => Ok(DataType::Date32),
-            SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Millisecond)),
-            SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "The SQL data type {:?} is not implemented",
-                sql_type
-            ))),
-        }
-    }
-
-    fn plan_from_tables(
-        &self,
-        from: &[TableWithJoins],
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<Vec<LogicalPlan>> {
-        match from.len() {
-            0 => Ok(vec![LogicalPlanBuilder::empty(true).build()?]),
-            _ => from
-                .iter()
-                .map(|t| self.plan_table_with_joins(t, ctes))
-                .collect::<Result<Vec<_>>>(),
-        }
-    }
-
-    fn plan_table_with_joins(
-        &self,
-        t: &TableWithJoins,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let left = self.create_relation(&t.relation, ctes)?;
-        match t.joins.len() {
-            0 => Ok(left),
-            n => {
-                let mut left = self.parse_relation_join(&left, &t.joins[0], ctes)?;
-                for i in 1..n {
-                    left = self.parse_relation_join(&left, &t.joins[i], ctes)?;
-                }
-                Ok(left)
-            }
-        }
-    }
-
-    fn parse_relation_join(
-        &self,
-        left: &LogicalPlan,
-        join: &Join,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let right = self.create_relation(&join.relation, ctes)?;
-        match &join.join_operator {
-            JoinOperator::LeftOuter(constraint) => {
-                self.parse_join(left, &right, constraint, JoinType::Left)
-            }
-            JoinOperator::RightOuter(constraint) => {
-                self.parse_join(left, &right, constraint, JoinType::Right)
-            }
-            JoinOperator::Inner(constraint) => {
-                self.parse_join(left, &right, constraint, JoinType::Inner)
-            }
-            other => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported JOIN operator {:?}",
-                other
-            ))),
-        }
-    }
-
-    fn parse_join(
-        &self,
-        left: &LogicalPlan,
-        right: &LogicalPlan,
-        constraint: &JoinConstraint,
-        join_type: JoinType,
-    ) -> Result<LogicalPlan> {
-        match constraint {
-            JoinConstraint::On(sql_expr) => {
-                let mut keys: Vec<(String, String)> = vec![];
-                let join_schema = left.schema().join(&right.schema())?;
-
-                // parse ON expression
-                let expr = self.sql_to_rex(sql_expr, &join_schema)?;
-
-                // extract join keys
-                extract_join_keys(&expr, &mut keys)?;
-                let left_keys: Vec<&str> =
-                    keys.iter().map(|pair| pair.0.as_str()).collect();
-                let right_keys: Vec<&str> =
-                    keys.iter().map(|pair| pair.1.as_str()).collect();
-
-                // return the logical plan representing the join
-                LogicalPlanBuilder::from(&left)
-                    .join(&right, join_type, &left_keys, &right_keys)?
-                    .build()
-            }
-            JoinConstraint::Using(idents) => {
-                let keys: Vec<&str> = idents.iter().map(|x| x.value.as_str()).collect();
-                LogicalPlanBuilder::from(&left)
-                    .join(&right, join_type, &keys, &keys)?
-                    .build()
-            }
-            JoinConstraint::Natural => {
-                // https://issues.apache.org/jira/browse/ARROW-10727
-                Err(DataFusionError::NotImplemented(
-                    "NATURAL JOIN is not supported (https://issues.apache.org/jira/browse/ARROW-10727)".to_string(),
-                ))
-            }
-            JoinConstraint::None => Err(DataFusionError::NotImplemented(
-                "NONE contraint is not supported".to_string(),
-            )),
-        }
-    }
-
-    fn create_relation(
-        &self,
-        relation: &TableFactor,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        match relation {
-            TableFactor::Table { name, .. } => {
-                let table_name = name.to_string();
-                let cte = ctes.get(&table_name);
-                match (
-                    cte,
-                    self.schema_provider.get_table_provider(name.try_into()?),
-                ) {
-                    (Some(cte_plan), _) => Ok(cte_plan.clone()),
-                    (_, Some(provider)) => {
-                        LogicalPlanBuilder::scan(&table_name, provider, None)?.build()
-                    }
-                    (_, None) => Err(DataFusionError::Plan(format!(
-                        "Table or CTE with name '{}' not found",
-                        name
-                    ))),
-                }
-            }
-            TableFactor::Derived {
-                subquery, alias, ..
-            } => self.query_to_plan_with_alias(
-                subquery,
-                alias.as_ref().map(|a| a.name.value.to_string()),
-                ctes,
-            ),
-            TableFactor::NestedJoin(table_with_joins) => {
-                self.plan_table_with_joins(table_with_joins, ctes)
-            }
-            // @todo Support TableFactory::TableFunction?
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported ast node {:?} in create_relation",
-                relation
-            ))),
-        }
-    }
-
-    /// Generate a logic plan from an SQL select
-    fn select_to_plan(
-        &self,
-        select: &Select,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let plans = self.plan_from_tables(&select.from, ctes)?;
-
-        let plan = match &select.selection {
-            Some(predicate_expr) => {
-                // build join schema
-                let mut fields = vec![];
-                for plan in &plans {
-                    fields.extend_from_slice(&plan.schema().fields());
-                }
-                let join_schema = DFSchema::new(fields)?;
-
-                let filter_expr = self.sql_to_rex(predicate_expr, &join_schema)?;
-
-                // look for expressions of the form `<column> = <column>`
-                let mut possible_join_keys = vec![];
-                extract_possible_join_keys(&filter_expr, &mut possible_join_keys)?;
-
-                let mut all_join_keys = vec![];
-                let mut left = plans[0].clone();
-                for right in plans.iter().skip(1) {
-                    let left_schema = left.schema();
-                    let right_schema = right.schema();
-                    let mut join_keys = vec![];
-                    for (l, r) in &possible_join_keys {
-                        if left_schema.field_with_unqualified_name(l).is_ok()
-                            && right_schema.field_with_unqualified_name(r).is_ok()
-                        {
-                            join_keys.push((l.as_str(), r.as_str()));
-                        } else if left_schema.field_with_unqualified_name(r).is_ok()
-                            && right_schema.field_with_unqualified_name(l).is_ok()
-                        {
-                            join_keys.push((r.as_str(), l.as_str()));
-                        }
-                    }
-                    if join_keys.is_empty() {
-                        return Err(DataFusionError::NotImplemented(
-                            "Cartesian joins are not supported".to_string(),
-                        ));
-                    } else {
-                        let left_keys: Vec<_> =
-                            join_keys.iter().map(|(l, _)| *l).collect();
-                        let right_keys: Vec<_> =
-                            join_keys.iter().map(|(_, r)| *r).collect();
-                        let builder = LogicalPlanBuilder::from(&left);
-                        left = builder
-                            .join(right, JoinType::Inner, &left_keys, &right_keys)?
-                            .build()?;
-                    }
-                    all_join_keys.extend_from_slice(&join_keys);
-                }
-
-                // remove join expressions from filter
-                match remove_join_expressions(&filter_expr, &all_join_keys)? {
-                    Some(filter_expr) => {
-                        LogicalPlanBuilder::from(&left).filter(filter_expr)?.build()
-                    }
-                    _ => Ok(left),
-                }
-            }
-            None => {
-                if plans.len() == 1 {
-                    Ok(plans[0].clone())
-                } else {
-                    Err(DataFusionError::NotImplemented(
-                        "Cartesian joins are not supported".to_string(),
-                    ))
-                }
-            }
-        };
-        let plan = plan?;
-
-        // The SELECT expressions, with wildcards expanded.
-        let select_exprs = self.prepare_select_exprs(&plan, &select.projection)?;
-
-        // Optionally the HAVING expression.
-        let having_expr_opt = select
-            .having
-            .as_ref()
-            .map::<Result<Expr>, _>(|having_expr| {
-                let having_expr = self.sql_expr_to_logical_expr(having_expr)?;
-
-                // This step "dereferences" any aliases in the HAVING clause.
-                //
-                // This is how we support queries with HAVING expressions that
-                // refer to aliased columns.
-                //
-                // For example:
-                //
-                //   SELECT c1 AS m FROM t HAVING m > 10;
-                //   SELECT c1, MAX(c2) AS m FROM t GROUP BY c1 HAVING m > 10;
-                //
-                // are rewritten as, respectively:
-                //
-                //   SELECT c1 AS m FROM t HAVING c1 > 10;
-                //   SELECT c1, MAX(c2) AS m FROM t GROUP BY c1 HAVING MAX(c2) > 10;
-                //
-                let having_expr = resolve_aliases_to_exprs(
-                    &having_expr,
-                    &extract_aliases(&select_exprs),
-                )?;
-
-                Ok(having_expr)
-            })
-            .transpose()?;
-
-        // The outer expressions we will search through for
-        // aggregates. Aggregates may be sourced from the SELECT...
-        let mut aggr_expr_haystack = select_exprs.clone();
-
-        // ... or from the HAVING.
-        if let Some(having_expr) = &having_expr_opt {
-            aggr_expr_haystack.push(having_expr.clone());
-        }
-
-        // All of the aggregate expressions (deduplicated).
-        let aggr_exprs = find_aggregate_exprs(&aggr_expr_haystack);
-
-        let (plan, select_exprs_post_aggr, having_expr_post_aggr_opt) =
-            if !select.group_by.is_empty() || !aggr_exprs.is_empty() {
-                self.aggregate(
-                    &plan,
-                    &select_exprs,
-                    &having_expr_opt,
-                    &select.group_by,
-                    aggr_exprs,
-                )?
-            } else {
-                if let Some(having_expr) = &having_expr_opt {
-                    let available_columns = select_exprs
-                        .iter()
-                        .map(|expr| expr_as_column_expr(expr, &plan))
-                        .collect::<Result<Vec<Expr>>>()?;
-
-                    // Ensure the HAVING expression is using only columns
-                    // provided by the SELECT.
-                    if !can_columns_satisfy_exprs(
-                        &available_columns,
-                        &[having_expr.clone()],
-                    )? {
-                        return Err(DataFusionError::Plan(
-                            "Having references column(s) not provided by the select"
-                                .to_owned(),
-                        ));
-                    }
-                }
-
-                (plan, select_exprs, having_expr_opt)
-            };
-
-        let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr_opt {
-            LogicalPlanBuilder::from(&plan)
-                .filter(having_expr_post_aggr)?
-                .build()?
-        } else {
-            plan
-        };
-
-        self.project(&plan, select_exprs_post_aggr, false)
-    }
-
-    /// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions.
-    ///
-    /// Wildcards are expanded into the concrete list of columns.
-    fn prepare_select_exprs(
-        &self,
-        plan: &LogicalPlan,
-        projection: &[SelectItem],
-    ) -> Result<Vec<Expr>> {
-        let input_schema = plan.schema();
-
-        Ok(projection
-            .iter()
-            .map(|expr| self.sql_select_to_rex(&expr, &input_schema))
-            .collect::<Result<Vec<Expr>>>()?
-            .iter()
-            .flat_map(|expr| expand_wildcard(&expr, &input_schema))
-            .collect::<Vec<Expr>>())
-    }
-
-    /// Wrap a plan in a projection
-    ///
-    /// If the `force` argument is `false`, the projection is applied only when
-    /// necessary, i.e., when the input fields are different than the
-    /// projection. Note that if the input fields are the same, but out of
-    /// order, the projection will be applied.
-    fn project(
-        &self,
-        input: &LogicalPlan,
-        expr: Vec<Expr>,
-        force: bool,
-    ) -> Result<LogicalPlan> {
-        self.validate_schema_satisfies_exprs(&input.schema(), &expr)?;
-        let plan = LogicalPlanBuilder::from(input).project(expr)?.build()?;
-
-        let project = force
-            || match input {
-                LogicalPlan::TableScan { .. } => true,
-                _ => plan.schema().fields() != input.schema().fields(),
-            };
-
-        if project {
-            Ok(plan)
-        } else {
-            Ok(input.clone())
-        }
-    }
-
-    fn aggregate(
-        &self,
-        input: &LogicalPlan,
-        select_exprs: &[Expr],
-        having_expr_opt: &Option<Expr>,
-        group_by: &[SQLExpr],
-        aggr_exprs: Vec<Expr>,
-    ) -> Result<(LogicalPlan, Vec<Expr>, Option<Expr>)> {
-        let group_by_exprs = group_by
-            .iter()
-            .map(|e| self.sql_to_rex(e, &input.schema()))
-            .collect::<Result<Vec<Expr>>>()?;
-
-        let aggr_projection_exprs = group_by_exprs
-            .iter()
-            .chain(aggr_exprs.iter())
-            .cloned()
-            .collect::<Vec<Expr>>();
-
-        let plan = LogicalPlanBuilder::from(&input)
-            .aggregate(group_by_exprs, aggr_exprs)?
-            .build()?;
-
-        // After aggregation, these are all of the columns that will be
-        // available to next phases of planning.
-        let column_exprs_post_aggr = aggr_projection_exprs
-            .iter()
-            .map(|expr| expr_as_column_expr(expr, input))
-            .collect::<Result<Vec<Expr>>>()?;
-
-        // Rewrite the SELECT expression to use the columns produced by the
-        // aggregation.
-        let select_exprs_post_aggr = select_exprs
-            .iter()
-            .map(|expr| rebase_expr(expr, &aggr_projection_exprs, input))
-            .collect::<Result<Vec<Expr>>>()?;
-
-        if !can_columns_satisfy_exprs(&column_exprs_post_aggr, &select_exprs_post_aggr)? {
-            return Err(DataFusionError::Plan(
-                "Projection references non-aggregate values".to_owned(),
-            ));
-        }
-
-        // Rewrite the HAVING expression to use the columns produced by the
-        // aggregation.
-        let having_expr_post_aggr_opt = if let Some(having_expr) = having_expr_opt {
-            let having_expr_post_aggr =
-                rebase_expr(having_expr, &aggr_projection_exprs, input)?;
-
-            if !can_columns_satisfy_exprs(
-                &column_exprs_post_aggr,
-                &[having_expr_post_aggr.clone()],
-            )? {
-                return Err(DataFusionError::Plan(
-                    "Having references non-aggregate values".to_owned(),
-                ));
-            }
-
-            Some(having_expr_post_aggr)
-        } else {
-            None
-        };
-
-        Ok((plan, select_exprs_post_aggr, having_expr_post_aggr_opt))
-    }
-
-    /// Wrap a plan in a limit
-    fn limit(&self, input: &LogicalPlan, limit: &Option<SQLExpr>) -> Result<LogicalPlan> {
-        match *limit {
-            Some(ref limit_expr) => {
-                let n = match self.sql_to_rex(&limit_expr, &input.schema())? {
-                    Expr::Literal(ScalarValue::Int64(Some(n))) => Ok(n as usize),
-                    _ => Err(DataFusionError::Plan(
-                        "Unexpected expression for LIMIT clause".to_string(),
-                    )),
-                }?;
-
-                LogicalPlanBuilder::from(&input).limit(n)?.build()
-            }
-            _ => Ok(input.clone()),
-        }
-    }
-
-    /// Wrap the logical in a sort
-    fn order_by(
-        &self,
-        plan: &LogicalPlan,
-        order_by: &[OrderByExpr],
-    ) -> Result<LogicalPlan> {
-        if order_by.is_empty() {
-            return Ok(plan.clone());
-        }
-
-        let input_schema = plan.schema();
-        let order_by_rex: Result<Vec<Expr>> = order_by
-            .iter()
-            .map(|e| {
-                Ok(Expr::Sort {
-                    expr: Box::new(self.sql_to_rex(&e.expr, &input_schema)?),
-                    // by default asc
-                    asc: e.asc.unwrap_or(true),
-                    // by default nulls first to be consistent with spark
-                    nulls_first: e.nulls_first.unwrap_or(true),
-                })
-            })
-            .collect();
-
-        LogicalPlanBuilder::from(&plan).sort(order_by_rex?)?.build()
-    }
-
-    /// Validate the schema provides all of the columns referenced in the expressions.
-    fn validate_schema_satisfies_exprs(
-        &self,
-        schema: &DFSchema,
-        exprs: &[Expr],
-    ) -> Result<()> {
-        find_column_exprs(exprs)
-            .iter()
-            .try_for_each(|col| match col {
-                Expr::Column(name) => {
-                    schema.field_with_unqualified_name(&name).map_err(|_| {
-                        DataFusionError::Plan(format!(
-                            "Invalid identifier '{}' for schema {}",
-                            name,
-                            schema.to_string()
-                        ))
-                    })?;
-                    Ok(())
-                }
-                _ => Err(DataFusionError::Internal("Not a column".to_string())),
-            })
-    }
-
-    /// Generate a relational expression from a select SQL expression
-    fn sql_select_to_rex(&self, sql: &SelectItem, schema: &DFSchema) -> Result<Expr> {
-        match sql {
-            SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
-            SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
-                Box::new(self.sql_to_rex(&expr, schema)?),
-                alias.value.clone(),
-            )),
-            SelectItem::Wildcard => Ok(Expr::Wildcard),
-            SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
-                "Qualified wildcards are not supported".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a relational expression from a SQL expression
-    pub fn sql_to_rex(&self, sql: &SQLExpr, schema: &DFSchema) -> Result<Expr> {
-        let expr = self.sql_expr_to_logical_expr(sql)?;
-        self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
-        Ok(expr)
-    }
-
-    fn sql_fn_arg_to_logical_expr(&self, sql: &FunctionArg) -> Result<Expr> {
-        match sql {
-            FunctionArg::Named { name: _, arg } => self.sql_expr_to_logical_expr(arg),
-            FunctionArg::Unnamed(value) => self.sql_expr_to_logical_expr(value),
-        }
-    }
-
-    fn sql_expr_to_logical_expr(&self, sql: &SQLExpr) -> Result<Expr> {
-        match sql {
-            SQLExpr::Value(Value::Number(n, _)) => match n.parse::<i64>() {
-                Ok(n) => Ok(lit(n)),
-                Err(_) => Ok(lit(n.parse::<f64>().unwrap())),
-            },
-            SQLExpr::Value(Value::SingleQuotedString(ref s)) => Ok(lit(s.clone())),
-
-            SQLExpr::Value(Value::Boolean(n)) => Ok(lit(*n)),
-
-            SQLExpr::Value(Value::Null) => Ok(Expr::Literal(ScalarValue::Utf8(None))),
-            SQLExpr::Extract { field, expr } => Ok(Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::DatePart,
-                args: vec![
-                    Expr::Literal(ScalarValue::Utf8(Some(format!("{}", field)))),
-                    self.sql_expr_to_logical_expr(expr)?,
-                ],
-            }),
-
-            SQLExpr::Value(Value::Interval {
-                value,
-                leading_field,
-                leading_precision,
-                last_field,
-                fractional_seconds_precision,
-            }) => self.sql_interval_to_literal(
-                value,
-                leading_field,
-                leading_precision,
-                last_field,
-                fractional_seconds_precision,
-            ),
-
-            SQLExpr::Identifier(ref id) => {
-                if &id.value[0..1] == "@" {
-                    let var_names = vec![id.value.clone()];
-                    Ok(Expr::ScalarVariable(var_names))
-                } else {
-                    Ok(Expr::Column(id.value.to_string()))
-                }
-            }
-
-            SQLExpr::CompoundIdentifier(ids) => {
-                let mut var_names = vec![];
-                for id in ids {
-                    var_names.push(id.value.clone());
-                }
-                if &var_names[0][0..1] == "@" {
-                    Ok(Expr::ScalarVariable(var_names))
-                } else {
-                    Err(DataFusionError::NotImplemented(format!(
-                        "Unsupported compound identifier '{:?}'",
-                        var_names,
-                    )))
-                }
-            }
-
-            SQLExpr::Wildcard => Ok(Expr::Wildcard),
-
-            SQLExpr::Case {
-                operand,
-                conditions,
-                results,
-                else_result,
-            } => {
-                let expr = if let Some(e) = operand {
-                    Some(Box::new(self.sql_expr_to_logical_expr(e)?))
-                } else {
-                    None
-                };
-                let when_expr = conditions
-                    .iter()
-                    .map(|e| self.sql_expr_to_logical_expr(e))
-                    .collect::<Result<Vec<_>>>()?;
-                let then_expr = results
-                    .iter()
-                    .map(|e| self.sql_expr_to_logical_expr(e))
-                    .collect::<Result<Vec<_>>>()?;
-                let else_expr = if let Some(e) = else_result {
-                    Some(Box::new(self.sql_expr_to_logical_expr(e)?))
-                } else {
-                    None
-                };
-
-                Ok(Expr::Case {
-                    expr,
-                    when_then_expr: when_expr
-                        .iter()
-                        .zip(then_expr.iter())
-                        .map(|(w, t)| (Box::new(w.to_owned()), Box::new(t.to_owned())))
-                        .collect(),
-                    else_expr,
-                })
-            }
-
-            SQLExpr::Cast {
-                ref expr,
-                ref data_type,
-            } => Ok(Expr::Cast {
-                expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                data_type: convert_data_type(data_type)?,
-            }),
-
-            SQLExpr::TryCast {
-                ref expr,
-                ref data_type,
-            } => Ok(Expr::TryCast {
-                expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                data_type: convert_data_type(data_type)?,
-            }),
-
-            SQLExpr::TypedString {
-                ref data_type,
-                ref value,
-            } => Ok(Expr::Cast {
-                expr: Box::new(lit(&**value)),
-                data_type: convert_data_type(data_type)?,
-            }),
-
-            SQLExpr::IsNull(ref expr) => {
-                Ok(Expr::IsNull(Box::new(self.sql_expr_to_logical_expr(expr)?)))
-            }
-
-            SQLExpr::IsNotNull(ref expr) => Ok(Expr::IsNotNull(Box::new(
-                self.sql_expr_to_logical_expr(expr)?,
-            ))),
-
-            SQLExpr::UnaryOp { ref op, ref expr } => match op {
-                UnaryOperator::Not => {
-                    Ok(Expr::Not(Box::new(self.sql_expr_to_logical_expr(expr)?)))
-                }
-                UnaryOperator::Plus => Ok(self.sql_expr_to_logical_expr(expr)?),
-                UnaryOperator::Minus => {
-                    match expr.as_ref() {
-                        // optimization: if it's a number literal, we applly the negative operator
-                        // here directly to calculate the new literal.
-                        SQLExpr::Value(Value::Number(n,_)) => match n.parse::<i64>() {
-                            Ok(n) => Ok(lit(-n)),
-                            Err(_) => Ok(lit(-n
-                                .parse::<f64>()
-                                .map_err(|_e| {
-                                    DataFusionError::Internal(format!(
-                                        "negative operator can be only applied to integer and float operands, got: {}",
-                                    n))
-                                })?)),
-                        },
-                        // not a literal, apply negative operator on expression
-                        _ => Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr(expr)?))),
-                    }
-                }
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Unsupported SQL unary operator {:?}",
-                    op
-                ))),
-            },
-
-            SQLExpr::Between {
-                ref expr,
-                ref negated,
-                ref low,
-                ref high,
-            } => Ok(Expr::Between {
-                expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                negated: *negated,
-                low: Box::new(self.sql_expr_to_logical_expr(&low)?),
-                high: Box::new(self.sql_expr_to_logical_expr(&high)?),
-            }),
-
-            SQLExpr::InList {
-                ref expr,
-                ref list,
-                ref negated,
-            } => {
-                let list_expr = list
-                    .iter()
-                    .map(|e| self.sql_expr_to_logical_expr(e))
-                    .collect::<Result<Vec<_>>>()?;
-
-                Ok(Expr::InList {
-                    expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                    list: list_expr,
-                    negated: *negated,
-                })
-            }
-
-            SQLExpr::BinaryOp {
-                ref left,
-                ref op,
-                ref right,
-            } => {
-                let operator = match *op {
-                    BinaryOperator::Gt => Ok(Operator::Gt),
-                    BinaryOperator::GtEq => Ok(Operator::GtEq),
-                    BinaryOperator::Lt => Ok(Operator::Lt),
-                    BinaryOperator::LtEq => Ok(Operator::LtEq),
-                    BinaryOperator::Eq => Ok(Operator::Eq),
-                    BinaryOperator::NotEq => Ok(Operator::NotEq),
-                    BinaryOperator::Plus => Ok(Operator::Plus),
-                    BinaryOperator::Minus => Ok(Operator::Minus),
-                    BinaryOperator::Multiply => Ok(Operator::Multiply),
-                    BinaryOperator::Divide => Ok(Operator::Divide),
-                    BinaryOperator::Modulus => Ok(Operator::Modulus),
-                    BinaryOperator::And => Ok(Operator::And),
-                    BinaryOperator::Or => Ok(Operator::Or),
-                    BinaryOperator::Like => Ok(Operator::Like),
-                    BinaryOperator::NotLike => Ok(Operator::NotLike),
-                    _ => Err(DataFusionError::NotImplemented(format!(
-                        "Unsupported SQL binary operator {:?}",
-                        op
-                    ))),
-                }?;
-
-                Ok(Expr::BinaryExpr {
-                    left: Box::new(self.sql_expr_to_logical_expr(&left)?),
-                    op: operator,
-                    right: Box::new(self.sql_expr_to_logical_expr(&right)?),
-                })
-            }
-
-            SQLExpr::Function(function) => {
-                let name = if function.name.0.len() > 1 {
-                    // DF doesn't handle compound identifiers
-                    // (e.g. "foo.bar") for function names yet
-                    function.name.to_string()
-                } else {
-                    // if there is a quote style, then don't normalize
-                    // the name, otherwise normalize to lowercase
-                    let ident = &function.name.0[0];
-                    match ident.quote_style {
-                        Some(_) => ident.value.clone(),
-                        None => ident.value.to_ascii_lowercase(),
-                    }
-                };
-
-                // first, scalar built-in
-                if let Ok(fun) = functions::BuiltinScalarFunction::from_str(&name) {
-                    let args = function
-                        .args
-                        .iter()
-                        .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                        .collect::<Result<Vec<Expr>>>()?;
-
-                    return Ok(Expr::ScalarFunction { fun, args });
-                };
-
-                // next, aggregate built-ins
-                if let Ok(fun) = aggregates::AggregateFunction::from_str(&name) {
-                    let args = if fun == aggregates::AggregateFunction::Count {
-                        function
-                            .args
-                            .iter()
-                            .map(|a| match a {
-                                FunctionArg::Unnamed(SQLExpr::Value(Value::Number(
-                                    _,
-                                    _,
-                                ))) => Ok(lit(1_u8)),
-                                FunctionArg::Unnamed(SQLExpr::Wildcard) => Ok(lit(1_u8)),
-                                _ => self.sql_fn_arg_to_logical_expr(a),
-                            })
-                            .collect::<Result<Vec<Expr>>>()?
-                    } else {
-                        function
-                            .args
-                            .iter()
-                            .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                            .collect::<Result<Vec<Expr>>>()?
-                    };
-
-                    return Ok(Expr::AggregateFunction {
-                        fun,
-                        distinct: function.distinct,
-                        args,
-                    });
-                };
-
-                // finally, user-defined functions (UDF) and UDAF
-                match self.schema_provider.get_function_meta(&name) {
-                    Some(fm) => {
-                        let args = function
-                            .args
-                            .iter()
-                            .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                            .collect::<Result<Vec<Expr>>>()?;
-
-                        Ok(Expr::ScalarUDF { fun: fm, args })
-                    }
-                    None => match self.schema_provider.get_aggregate_meta(&name) {
-                        Some(fm) => {
-                            let args = function
-                                .args
-                                .iter()
-                                .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                                .collect::<Result<Vec<Expr>>>()?;
-
-                            Ok(Expr::AggregateUDF { fun: fm, args })
-                        }
-                        _ => Err(DataFusionError::Plan(format!(
-                            "Invalid function '{}'",
-                            name
-                        ))),
-                    },
-                }
-            }
-
-            SQLExpr::Nested(e) => self.sql_expr_to_logical_expr(&e),
-
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported ast node {:?} in sqltorel",
-                sql
-            ))),
-        }
-    }
-
-    fn sql_interval_to_literal(
-        &self,
-        value: &str,
-        leading_field: &Option<DateTimeField>,
-        leading_precision: &Option<u64>,
-        last_field: &Option<DateTimeField>,
-        fractional_seconds_precision: &Option<u64>,
-    ) -> Result<Expr> {
-        if leading_field.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with leading_field {:?}",
-                leading_field
-            )));
-        }
-
-        if leading_precision.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with leading_precision {:?}",
-                leading_precision
-            )));
-        }
-
-        if last_field.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with last_field {:?}",
-                last_field
-            )));
-        }
-
-        if fractional_seconds_precision.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with fractional_seconds_precision {:?}",
-                fractional_seconds_precision
-            )));
-        }
-
-        const SECONDS_PER_HOUR: f32 = 3_600_f32;
-        const MILLIS_PER_SECOND: f32 = 1_000_f32;
-
-        // We are storing parts as integers, it's why we need to align parts fractional
-        // INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
-        // INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
-        let align_interval_parts = |month_part: f32,
-                                    mut day_part: f32,
-                                    mut milles_part: f32|
-         -> (i32, i32, f32) {
-            // Convert fractional month to days, It's not supported by Arrow types, but anyway
-            day_part += (month_part - (month_part as i32) as f32) * 30_f32;
-
-            // Convert fractional days to hours
-            milles_part += (day_part - ((day_part as i32) as f32))
-                * 24_f32
-                * SECONDS_PER_HOUR
-                * MILLIS_PER_SECOND;
-
-            (month_part as i32, day_part as i32, milles_part)
-        };
-
-        let calculate_from_part = |interval_period_str: &str,
-                                   interval_type: &str|
-         -> Result<(i32, i32, f32)> {
-            // @todo It's better to use Decimal in order to protect rounding errors
-            // Wait https://github.com/apache/arrow/pull/9232
-            let interval_period = match f32::from_str(interval_period_str) {
-                Ok(n) => n,
-                Err(_) => {
-                    return Err(DataFusionError::SQL(ParserError(format!(
-                        "Unsupported Interval Expression with value {:?}",
-                        value
-                    ))))
-                }
-            };
-
-            if interval_period > (i32::MAX as f32) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-
-            match interval_type.to_lowercase().as_str() {
-                "year" => Ok(align_interval_parts(interval_period * 12_f32, 0.0, 0.0)),
-                "month" => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
-                "day" | "days" => Ok(align_interval_parts(0.0, interval_period, 0.0)),
-                "hour" | "hours" => {
-                    Ok((0, 0, interval_period * SECONDS_PER_HOUR * MILLIS_PER_SECOND))
-                }
-                "minutes" | "minute" => {
-                    Ok((0, 0, interval_period * 60_f32 * MILLIS_PER_SECOND))
-                }
-                "seconds" | "second" => Ok((0, 0, interval_period * MILLIS_PER_SECOND)),
-                "milliseconds" | "millisecond" => Ok((0, 0, interval_period)),
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Invalid input syntax for type interval: {:?}",
-                    value
-                ))),
-            }
-        };
-
-        let mut result_month: i64 = 0;
-        let mut result_days: i64 = 0;
-        let mut result_millis: i64 = 0;
-
-        let mut parts = value.split_whitespace();
-
-        loop {
-            let interval_period_str = parts.next();
-            if interval_period_str.is_none() {
-                break;
-            }
-
-            let (diff_month, diff_days, diff_millis) = calculate_from_part(
-                interval_period_str.unwrap(),
-                parts.next().unwrap_or("second"),
-            )?;
-
-            result_month += diff_month as i64;
-
-            if result_month > (i32::MAX as i64) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-
-            result_days += diff_days as i64;
-
-            if result_days > (i32::MAX as i64) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-
-            result_millis += diff_millis as i64;
-
-            if result_millis > (i32::MAX as i64) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-        }
-
-        // Interval is tricky thing
-        // 1 day is not 24 hours because timezones, 1 year != 365/364! 30 days != 1 month
-        // The true way to store and calculate intervals is to store it as it defined
-        // Due the fact that Arrow supports only two types YearMonth (month) and DayTime (day, time)
-        // It's not possible to store complex intervals
-        // It's possible to do select (NOW() + INTERVAL '1 year') + INTERVAL '1 day'; as workaround
-        if result_month != 0 && (result_days != 0 || result_millis != 0) {
-            return Err(DataFusionError::NotImplemented(format!(
-                "DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: {:?}. Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL '1 year') + INTERVAL '1 day'",
-                value
-            )));
-        }
-
-        if result_month != 0 {
-            return Ok(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
-                result_month as i32,
-            ))));
-        }
-
-        let result: i64 = (result_days << 32) | result_millis;
-        Ok(Expr::Literal(ScalarValue::IntervalDayTime(Some(result))))
-    }
-
-    fn show_variable_to_plan(&self, variable: &[Ident]) -> Result<LogicalPlan> {
-        // Special case SHOW TABLES
-        let variable = ObjectName(variable.to_vec()).to_string();
-        if variable.as_str().eq_ignore_ascii_case("tables") {
-            if self.has_table("information_schema", "tables") {
-                let rewrite =
-                    DFParser::parse_sql("SELECT * FROM information_schema.tables;")?;
-                self.statement_to_plan(&rewrite[0])
-            } else {
-                Err(DataFusionError::Plan(
-                    "SHOW TABLES is not supported unless information_schema is enabled"
-                        .to_string(),
-                ))
-            }
-        } else {
-            Err(DataFusionError::NotImplemented(format!(
-                "SHOW {} not implemented. Supported syntax: SHOW <TABLES>",
-                variable
-            )))
-        }
-    }
-
-    fn show_columns_to_plan(
-        &self,
-        extended: bool,
-        full: bool,
-        table_name: &ObjectName,
-        filter: Option<&ShowStatementFilter>,
-    ) -> Result<LogicalPlan> {
-        if filter.is_some() {
-            return Err(DataFusionError::Plan(
-                "SHOW COLUMNS with WHERE or LIKE is not supported".to_string(),
-            ));
-        }
-
-        if !self.has_table("information_schema", "columns") {
-            return Err(DataFusionError::Plan(
-                "SHOW COLUMNS is not supported unless information_schema is enabled"
-                    .to_string(),
-            ));
-        }
-
-        if self
-            .schema_provider
-            .get_table_provider(table_name.try_into()?)
-            .is_none()
-        {
-            return Err(DataFusionError::Plan(format!(
-                "Unknown relation for SHOW COLUMNS: {}",
-                table_name
-            )));
-        }
-
-        // Figure out the where clause
-        let columns = vec!["table_name", "table_schema", "table_catalog"].into_iter();
-        let where_clause = table_name
-            .0
-            .iter()
-            .rev()
-            .zip(columns)
-            .map(|(ident, column_name)| {
-                format!(r#"{} = '{}'"#, column_name, ident.to_string())
-            })
-            .collect::<Vec<_>>()
-            .join(" AND ");
-
-        // treat both FULL and EXTENDED as the same
-        let select_list = if full || extended {
-            "*"
-        } else {
-            "table_catalog, table_schema, table_name, column_name, data_type, is_nullable"
-        };
-
-        let query = format!(
-            "SELECT {} FROM information_schema.columns WHERE {}",
-            select_list, where_clause
-        );
-
-        let rewrite = DFParser::parse_sql(&query)?;
-        self.statement_to_plan(&rewrite[0])
-    }
-
-    /// Return true if there is a table provider available for "schema.table"
-    fn has_table(&self, schema: &str, table: &str) -> bool {
-        let tables_reference = TableReference::Partial { schema, table };
-        self.schema_provider
-            .get_table_provider(tables_reference)
-            .is_some()
-    }
-}
-
-/// Remove join expressions from a filter expression
-fn remove_join_expressions(
-    expr: &Expr,
-    join_columns: &[(&str, &str)],
-) -> Result<Option<Expr>> {
-    match expr {
-        Expr::BinaryExpr { left, op, right } => match op {
-            Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                (Expr::Column(l), Expr::Column(r)) => {
-                    if join_columns.contains(&(l, r)) || join_columns.contains(&(r, l)) {
-                        Ok(None)
-                    } else {
-                        Ok(Some(expr.clone()))
-                    }
-                }
-                _ => Ok(Some(expr.clone())),
-            },
-            Operator::And => {
-                let l = remove_join_expressions(left, join_columns)?;
-                let r = remove_join_expressions(right, join_columns)?;
-                match (l, r) {
-                    (Some(ll), Some(rr)) => Ok(Some(and(ll, rr))),
-                    (Some(ll), _) => Ok(Some(ll)),
-                    (_, Some(rr)) => Ok(Some(rr)),
-                    _ => Ok(None),
-                }
-            }
-            _ => Ok(Some(expr.clone())),
-        },
-        _ => Ok(Some(expr.clone())),
-    }
-}
-
-/// Parse equijoin ON condition which could be a single Eq or multiple conjunctive Eqs
-///
-/// Examples
-///
-/// foo = bar
-/// foo = bar AND bar = baz AND ...
-///
-fn extract_join_keys(expr: &Expr, accum: &mut Vec<(String, String)>) -> Result<()> {
-    match expr {
-        Expr::BinaryExpr { left, op, right } => match op {
-            Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                (Expr::Column(l), Expr::Column(r)) => {
-                    accum.push((l.to_owned(), r.to_owned()));
-                    Ok(())
-                }
-                other => Err(DataFusionError::SQL(ParserError(format!(
-                    "Unsupported expression '{:?}' in JOIN condition",
-                    other
-                )))),
-            },
-            Operator::And => {
-                extract_join_keys(left, accum)?;
-                extract_join_keys(right, accum)
-            }
-            other => Err(DataFusionError::SQL(ParserError(format!(
-                "Unsupported expression '{:?}' in JOIN condition",
-                other
-            )))),
-        },
-        other => Err(DataFusionError::SQL(ParserError(format!(
-            "Unsupported expression '{:?}' in JOIN condition",
-            other
-        )))),
-    }
-}
-
-/// Extract join keys from a WHERE clause
-fn extract_possible_join_keys(
-    expr: &Expr,
-    accum: &mut Vec<(String, String)>,
-) -> Result<()> {
-    match expr {
-        Expr::BinaryExpr { left, op, right } => match op {
-            Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                (Expr::Column(l), Expr::Column(r)) => {
-                    accum.push((l.to_owned(), r.to_owned()));
-                    Ok(())
-                }
-                _ => Ok(()),
-            },
-            Operator::And => {
-                extract_possible_join_keys(left, accum)?;
-                extract_possible_join_keys(right, accum)
-            }
-            _ => Ok(()),
-        },
-        _ => Ok(()),
-    }
-}
-
-/// Convert SQL data type to relational representation of data type
-pub fn convert_data_type(sql: &SQLDataType) -> Result<DataType> {
-    match sql {
-        SQLDataType::Boolean => Ok(DataType::Boolean),
-        SQLDataType::SmallInt => Ok(DataType::Int16),
-        SQLDataType::Int => Ok(DataType::Int32),
-        SQLDataType::BigInt => Ok(DataType::Int64),
-        SQLDataType::Float(_) | SQLDataType::Real => Ok(DataType::Float64),
-        SQLDataType::Double => Ok(DataType::Float64),
-        SQLDataType::Char(_) | SQLDataType::Varchar(_) => Ok(DataType::Utf8),
-        SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-        SQLDataType::Date => Ok(DataType::Date32),
-        other => Err(DataFusionError::NotImplemented(format!(
-            "Unsupported SQL type {:?}",
-            other
-        ))),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datasource::empty::EmptyTable;
-    use crate::{logical_plan::create_udf, sql::parser::DFParser};
-    use functions::ScalarFunctionImplementation;
-
-    const PERSON_COLUMN_NAMES: &str =
-        "id, first_name, last_name, age, state, salary, birth_date";
-
-    #[test]
-    fn select_no_relation() {
-        quick_test(
-            "SELECT 1",
-            "Projection: Int64(1)\
-             \n  EmptyRelation",
-        );
-    }
-
-    #[test]
-    fn select_column_does_not_exist() {
-        let sql = "SELECT doesnotexist FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_repeated_column() {
-        let sql = "SELECT age, age FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#age\\\" at position 0 and \\\"#age\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_wildcard_with_repeated_column() {
-        let sql = "SELECT *, age FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#age\\\" at position 3 and \\\"#age\\\" at position 7 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_wildcard_with_repeated_column_but_is_aliased() {
-        quick_test(
-            "SELECT *, first_name AS fn from person",
-            "Projection: #id, #first_name, #last_name, #age, #state, #salary, #birth_date, #first_name AS fn\
-            \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_scalar_func_with_literal_no_relation() {
-        quick_test(
-            "SELECT sqrt(9)",
-            "Projection: sqrt(Int64(9))\
-             \n  EmptyRelation",
-        );
-    }
-
-    #[test]
-    fn select_simple_filter() {
-        let sql = "SELECT id, first_name, last_name \
-                   FROM person WHERE state = 'CO'";
-        let expected = "Projection: #id, #first_name, #last_name\
-                        \n  Filter: #state Eq Utf8(\"CO\")\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_filter_column_does_not_exist() {
-        let sql = "SELECT first_name FROM person WHERE doesnotexist = 'A'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_filter_cannot_use_alias() {
-        let sql = "SELECT first_name AS x FROM person WHERE x = 'A'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'x\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_neg_filter() {
-        let sql = "SELECT id, first_name, last_name \
-                   FROM person WHERE NOT state";
-        let expected = "Projection: #id, #first_name, #last_name\
-                        \n  Filter: NOT #state\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_compound_filter() {
-        let sql = "SELECT id, first_name, last_name \
-                   FROM person WHERE state = 'CO' AND age >= 21 AND age <= 65";
-        let expected = "Projection: #id, #first_name, #last_name\
-            \n  Filter: #state Eq Utf8(\"CO\") And #age GtEq Int64(21) And #age LtEq Int64(65)\
-            \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn test_timestamp_filter() {
-        let sql =
-            "SELECT state FROM person WHERE birth_date < CAST (158412331400600000 as timestamp)";
-
-        let expected = "Projection: #state\
-            \n  Filter: #birth_date Lt CAST(Int64(158412331400600000) AS Timestamp(Nanosecond, None))\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn test_date_filter() {
-        let sql =
-            "SELECT state FROM person WHERE birth_date < CAST ('2020-01-01' as date)";
-
-        let expected = "Projection: #state\
-            \n  Filter: #birth_date Lt CAST(Utf8(\"2020-01-01\") AS Date32)\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_all_boolean_operators() {
-        let sql = "SELECT age, first_name, last_name \
-                   FROM person \
-                   WHERE age = 21 \
-                   AND age != 21 \
-                   AND age > 21 \
-                   AND age >= 21 \
-                   AND age < 65 \
-                   AND age <= 65";
-        let expected = "Projection: #age, #first_name, #last_name\
-                        \n  Filter: #age Eq Int64(21) \
-                        And #age NotEq Int64(21) \
-                        And #age Gt Int64(21) \
-                        And #age GtEq Int64(21) \
-                        And #age Lt Int64(65) \
-                        And #age LtEq Int64(65)\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_between() {
-        let sql = "SELECT state FROM person WHERE age BETWEEN 21 AND 65";
-        let expected = "Projection: #state\
-            \n  Filter: #age BETWEEN Int64(21) AND Int64(65)\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_between_negated() {
-        let sql = "SELECT state FROM person WHERE age NOT BETWEEN 21 AND 65";
-        let expected = "Projection: #state\
-            \n  Filter: #age NOT BETWEEN Int64(21) AND Int64(65)\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_nested() {
-        let sql = "SELECT fn2, last_name
-                   FROM (
-                     SELECT fn1 as fn2, last_name, birth_date
-                     FROM (
-                       SELECT first_name AS fn1, last_name, birth_date, age
-                       FROM person
-                     )
-                   )";
-        let expected = "Projection: #fn2, #last_name\
-                        \n  Projection: #fn1 AS fn2, #last_name, #birth_date\
-                        \n    Projection: #first_name AS fn1, #last_name, #birth_date, #age\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_nested_with_filters() {
-        let sql = "SELECT fn1, age
-                   FROM (
-                     SELECT first_name AS fn1, age
-                     FROM person
-                     WHERE age > 20
-                   )
-                   WHERE fn1 = 'X' AND age < 30";
-
-        let expected = "Filter: #fn1 Eq Utf8(\"X\") And #age Lt Int64(30)\
-                        \n  Projection: #first_name AS fn1, #age\
-                        \n    Filter: #age Gt Int64(20)\
-                        \n      TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_with_having() {
-        let sql = "SELECT id, age
-                   FROM person
-                   HAVING age > 100 AND age < 200";
-        let expected = "Projection: #id, #age\
-                        \n  Filter: #age Gt Int64(100) And #age Lt Int64(200)\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_with_having_referencing_column_not_in_select() {
-        let sql = "SELECT id, age
-                   FROM person
-                   HAVING first_name = 'M'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references column(s) not provided by the select\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_with_having_referencing_column_nested_in_select_expression() {
-        let sql = "SELECT id, age + 1
-                   FROM person
-                   HAVING age > 100";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references column(s) not provided by the select\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_with_having_with_aggregate_not_in_select() {
-        let sql = "SELECT first_name
-                   FROM person
-                   HAVING MAX(age) > 100";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_aggregate_with_having_that_reuses_aggregate() {
-        let sql = "SELECT MAX(age)
-                   FROM person
-                   HAVING MAX(age) < 30";
-        let expected = "Filter: #MAX(age) Lt Int64(30)\
-                        \n  Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_having_with_aggregate_not_in_select() {
-        let sql = "SELECT MAX(age)
-                   FROM person
-                   HAVING MAX(first_name) > 'M'";
-        let expected = "Projection: #MAX(age)\
-                        \n  Filter: #MAX(first_name) Gt Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age), MAX(#first_name)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_having_referencing_column_not_in_select() {
-        let sql = "SELECT COUNT(*)
-                   FROM person
-                   HAVING first_name = 'M'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_aggregate_aliased_with_having_referencing_aggregate_by_its_alias() {
-        let sql = "SELECT MAX(age) as max_age
-                   FROM person
-                   HAVING max_age < 30";
-        let expected = "Projection: #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Lt Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_aliased_with_having_that_reuses_aggregate_but_not_by_its_alias() {
-        let sql = "SELECT MAX(age) as max_age
-                   FROM person
-                   HAVING MAX(age) < 30";
-        let expected = "Projection: #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Lt Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING first_name = 'M'";
-        let expected = "Filter: #first_name Eq Utf8(\"M\")\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_and_where() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   WHERE id > 5
-                   GROUP BY first_name
-                   HAVING MAX(age) < 100";
-        let expected = "Filter: #MAX(age) Lt Int64(100)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    Filter: #id Gt Int64(5)\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_and_where_filtering_on_aggregate_column(
-    ) {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   WHERE id > 5 AND age > 18
-                   GROUP BY first_name
-                   HAVING MAX(age) < 100";
-        let expected = "Filter: #MAX(age) Lt Int64(100)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    Filter: #id Gt Int64(5) And #age Gt Int64(18)\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_column_by_alias() {
-        let sql = "SELECT first_name AS fn, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 2 AND fn = 'M'";
-        let expected = "Projection: #first_name AS fn, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(2) And #first_name Eq Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_their_aliases(
-    ) {
-        let sql = "SELECT first_name AS fn, MAX(age) AS max_age
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 2 AND max_age < 5 AND first_name = 'M' AND fn = 'N'";
-        let expected = "Projection: #first_name AS fn, #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Gt Int64(2) And #MAX(age) Lt Int64(5) And #first_name Eq Utf8(\"M\") And #first_name Eq Utf8(\"N\")\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100";
-        let expected = "Filter: #MAX(age) Gt Int64(100)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 10 AND last_name = 'M'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_that_reuses_aggregate_multiple_times() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND MAX(age) < 200";
-        let expected = "Filter: #MAX(age) Gt Int64(100) And #MAX(age) Lt Int64(200)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_aggreagate_not_in_select() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND MIN(id) < 50";
-        let expected = "Projection: #first_name, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(100) And #MIN(id) Lt Int64(50)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age), MIN(#id)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_aliased_with_group_by_with_having_referencing_aggregate_by_its_alias(
-    ) {
-        let sql = "SELECT first_name, MAX(age) AS max_age
-                   FROM person
-                   GROUP BY first_name
-                   HAVING max_age > 100";
-        let expected = "Projection: #first_name, #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Gt Int64(100)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compound_aggregate_by_its_alias(
-    ) {
-        let sql = "SELECT first_name, MAX(age) + 1 AS max_age_plus_one
-                   FROM person
-                   GROUP BY first_name
-                   HAVING max_age_plus_one > 100";
-        let expected =
-            "Projection: #first_name, #MAX(age) Plus Int64(1) AS max_age_plus_one\
-                        \n  Filter: #MAX(age) Plus Int64(1) Gt Int64(100)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_derived_column_aggreagate_not_in_select(
-    ) {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND MIN(id - 2) < 50";
-        let expected = "Projection: #first_name, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(100) And #MIN(id Minus Int64(2)) Lt Int64(50)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age), MIN(#id Minus Int64(2))]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_count_star_not_in_select() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND COUNT(*) < 50";
-        let expected = "Projection: #first_name, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(100) And #COUNT(UInt8(1)) Lt Int64(50)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age), COUNT(UInt8(1))]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_binary_expr() {
-        let sql = "SELECT age + salary from person";
-        let expected = "Projection: #age Plus #salary\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_binary_expr_nested() {
-        let sql = "SELECT (age + salary)/2 from person";
-        let expected = "Projection: #age Plus #salary Divide Int64(2)\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_wildcard_with_groupby() {
-        quick_test(
-            "SELECT * FROM person GROUP BY id, first_name, last_name, age, state, salary, birth_date",
-            "Aggregate: groupBy=[[#id, #first_name, #last_name, #age, #state, #salary, #birth_date]], aggr=[[]]\
-             \n  TableScan: person projection=None",
-        );
-        quick_test(
-            "SELECT * FROM (SELECT first_name, last_name FROM person) GROUP BY first_name, last_name",
-            "Aggregate: groupBy=[[#first_name, #last_name]], aggr=[[]]\
-             \n  Projection: #first_name, #last_name\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate() {
-        quick_test(
-            "SELECT MIN(age) FROM person",
-            "Aggregate: groupBy=[[]], aggr=[[MIN(#age)]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn test_sum_aggregate() {
-        quick_test(
-            "SELECT SUM(age) from person",
-            "Aggregate: groupBy=[[]], aggr=[[SUM(#age)]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_column_does_not_exist() {
-        let sql = "SELECT MIN(doesnotexist) FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate() {
-        let sql = "SELECT MIN(age), MIN(age) FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age)\\\" at position 0 and \\\"#MIN(age)\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate_with_single_alias() {
-        quick_test(
-            "SELECT MIN(age), MIN(age) AS a FROM person",
-            "Projection: #MIN(age), #MIN(age) AS a\
-             \n  Aggregate: groupBy=[[]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate_with_unique_aliases() {
-        quick_test(
-            "SELECT MIN(age) AS a, MIN(age) AS b FROM person",
-            "Projection: #MIN(age) AS a, #MIN(age) AS b\
-             \n  Aggregate: groupBy=[[]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
-        let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age) AS a\\\" at position 0 and \\\"#MIN(age) AS a\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby() {
-        quick_test(
-            "SELECT state, MIN(age), MAX(age) FROM person GROUP BY state",
-            "Aggregate: groupBy=[[#state]], aggr=[[MIN(#age), MAX(#age)]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_with_aliases() {
-        quick_test(
-            "SELECT state AS a, MIN(age) AS b FROM person GROUP BY state",
-            "Projection: #state AS a, #MIN(age) AS b\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
-        let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#state AS a\\\" at position 0 and \\\"#MIN(age) AS a\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_column_unselected() {
-        quick_test(
-            "SELECT MIN(age), MAX(age) FROM person GROUP BY state",
-            "Projection: #MIN(age), #MAX(age)\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age), MAX(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_and_column_in_group_by_does_not_exist() {
-        let sql = "SELECT SUM(age) FROM person GROUP BY doesnotexist";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_and_column_in_aggregate_does_not_exist() {
-        let sql = "SELECT SUM(doesnotexist) FROM person GROUP BY first_name";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_interval_out_of_range() {
-        let sql = "SELECT INTERVAL '100000000000000000 day'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "NotImplemented(\"Interval field value out of range: \\\"100000000000000000 day\\\"\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_unsupported_complex_interval() {
-        let sql = "SELECT INTERVAL '1 year 1 day'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "NotImplemented(\"DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: \\\"1 year 1 day\\\". Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL \\\'1 year\\\') + INTERVAL \\\'1 day\\\'\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby() {
-        quick_test(
-            "SELECT MAX(first_name) FROM person GROUP BY first_name",
-            "Projection: #MAX(first_name)\
-             \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_cannot_use_alias() {
-        let sql = "SELECT state AS x, MAX(age) FROM person GROUP BY x";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'x\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_aggregate_repeated() {
-        let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age)\\\" at position 1 and \\\"#MIN(age)\\\" at position 2 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_aggregate_repeated_and_one_has_alias() {
-        quick_test(
-            "SELECT state, MIN(age), MIN(age) AS ma FROM person GROUP BY state",
-            "Projection: #state, #MIN(age), #MIN(age) AS ma\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        )
-    }
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_unselected() {
-        quick_test(
-            "SELECT MIN(first_name) FROM person GROUP BY age + 1",
-            "Projection: #MIN(first_name)\
-             \n  Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resolvable(
-    ) {
-        quick_test(
-            "SELECT age + 1, MIN(first_name) FROM person GROUP BY age + 1",
-            "Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n  TableScan: person projection=None",
-        );
-        quick_test(
-            "SELECT MIN(first_name), age + 1 FROM person GROUP BY age + 1",
-            "Projection: #MIN(first_name), #age Plus Int64(1)\
-             \n  Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_resolvable()
-    {
-        quick_test(
-            "SELECT ((age + 1) / 2) * (age + 1), MIN(first_name) FROM person GROUP BY age + 1",
-            "Projection: #age Plus Int64(1) Divide Int64(2) Multiply #age Plus Int64(1), #MIN(first_name)\
-             \n  Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_resolvable(
-    ) {
-        // The query should fail, because age + 9 is not in the group by.
-        let sql =
-            "SELECT ((age + 1) / 2) * (age + 9), MIN(first_name) FROM person GROUP BY age + 1";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_selected(
-    ) {
-        let sql = "SELECT age, MIN(first_name) FROM person GROUP BY age + 1";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_nested_in_binary_expr_with_groupby() {
-        quick_test(
-            "SELECT state, MIN(age) < 10 FROM person GROUP BY state",
-            "Projection: #state, #MIN(age) Lt Int64(10)\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_and_nested_groupby_column() {
-        quick_test(
-            "SELECT age + 1, MAX(first_name) FROM person GROUP BY age",
-            "Projection: #age Plus Int64(1), #MAX(first_name)\
-             \n  Aggregate: groupBy=[[#age]], aggr=[[MAX(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_aggregate_compounded_with_groupby_column() {
-        quick_test(
-            "SELECT age + MIN(salary) FROM person GROUP BY age",
-            "Projection: #age Plus #MIN(salary)\
-             \n  Aggregate: groupBy=[[#age]], aggr=[[MIN(#salary)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_aggregate_with_non_column_inner_expression_with_groupby() {
-        quick_test(
-            "SELECT state, MIN(age + 1) FROM person GROUP BY state",
-            "Aggregate: groupBy=[[#state]], aggr=[[MIN(#age Plus Int64(1))]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn test_wildcard() {
-        quick_test(
-            "SELECT * from person",
-            "Projection: #id, #first_name, #last_name, #age, #state, #salary, #birth_date\
-            \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_count_one() {
-        let sql = "SELECT COUNT(1) FROM person";
-        let expected = "Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_count_column() {
-        let sql = "SELECT COUNT(id) FROM person";
-        let expected = "Aggregate: groupBy=[[]], aggr=[[COUNT(#id)]]\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_scalar_func() {
-        let sql = "SELECT sqrt(age) FROM person";
-        let expected = "Projection: sqrt(#age)\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aliased_scalar_func() {
-        let sql = "SELECT sqrt(age) AS square_people FROM person";
-        let expected = "Projection: sqrt(#age) AS square_people\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_where_nullif_division() {
-        let sql = "SELECT c3/(c4+c5) \
-                   FROM aggregate_test_100 WHERE c3/nullif(c4+c5, 0) > 0.1";
-        let expected = "Projection: #c3 Divide #c4 Plus #c5\
-            \n  Filter: #c3 Divide nullif(#c4 Plus #c5, Int64(0)) Gt Float64(0.1)\
-            \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_where_with_negative_operator() {
-        let sql = "SELECT c3 FROM aggregate_test_100 WHERE c3 > -0.1 AND -c4 > 0";
-        let expected = "Projection: #c3\
-            \n  Filter: #c3 Gt Float64(-0.1) And (- #c4) Gt Int64(0)\
-            \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_where_with_positive_operator() {
-        let sql = "SELECT c3 FROM aggregate_test_100 WHERE c3 > +0.1 AND +c4 > 0";
-        let expected = "Projection: #c3\
-            \n  Filter: #c3 Gt Float64(0.1) And #c4 Gt Int64(0)\
-            \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_order_by() {
-        let sql = "SELECT id FROM person ORDER BY id";
-        let expected = "Sort: #id ASC NULLS FIRST\
-                        \n  Projection: #id\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_order_by_desc() {
-        let sql = "SELECT id FROM person ORDER BY id DESC";
-        let expected = "Sort: #id DESC NULLS FIRST\
-                        \n  Projection: #id\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_order_by_nulls_last() {
-        quick_test(
-            "SELECT id FROM person ORDER BY id DESC NULLS LAST",
-            "Sort: #id DESC NULLS LAST\
-            \n  Projection: #id\
-            \n    TableScan: person projection=None",
-        );
-
-        quick_test(
-            "SELECT id FROM person ORDER BY id NULLS LAST",
-            "Sort: #id ASC NULLS LAST\
-            \n  Projection: #id\
-            \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_group_by() {
-        let sql = "SELECT state FROM person GROUP BY state";
-        let expected = "Aggregate: groupBy=[[#state]], aggr=[[]]\
-                        \n  TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_group_by_columns_not_in_select() {
-        let sql = "SELECT MAX(age) FROM person GROUP BY state";
-        let expected = "Projection: #MAX(age)\
-                        \n  Aggregate: groupBy=[[#state]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_group_by_count_star() {
-        let sql = "SELECT state, COUNT(*) FROM person GROUP BY state";
-        let expected = "Aggregate: groupBy=[[#state]], aggr=[[COUNT(UInt8(1))]]\
-                        \n  TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_group_by_needs_projection() {
-        let sql = "SELECT COUNT(state), state FROM person GROUP BY state";
-        let expected = "\
-        Projection: #COUNT(state), #state\
-        \n  Aggregate: groupBy=[[#state]], aggr=[[COUNT(#state)]]\
-        \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_7480_1() {
-        let sql = "SELECT c1, MIN(c12) FROM aggregate_test_100 GROUP BY c1, c13";
-        let expected = "Projection: #c1, #MIN(c12)\
-                       \n  Aggregate: groupBy=[[#c1, #c13]], aggr=[[MIN(#c12)]]\
-                       \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_7480_2() {
-        let sql = "SELECT c1, c13, MIN(c12) FROM aggregate_test_100 GROUP BY c1";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn create_external_table_csv() {
-        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
-        let expected = "CreateExternalTable: \"t\"";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn create_external_table_csv_no_schema() {
-        let sql = "CREATE EXTERNAL TABLE t STORED AS CSV LOCATION 'foo.csv'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Column definitions required for CSV files. None found\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn create_external_table_parquet() {
-        let sql =
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS PARQUET LOCATION 'foo.parquet'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Column definitions can not be specified for PARQUET files.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn create_external_table_parquet_no_schema() {
-        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
-        let expected = "CreateExternalTable: \"t\"";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn equijoin_explicit_syntax() {
-        let sql = "SELECT id, order_id \
-            FROM person \
-            JOIN orders \
-            ON id = customer_id";
-        let expected = "Projection: #id, #order_id\
-        \n  Join: id = customer_id\
-        \n    TableScan: person projection=None\
-        \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn equijoin_explicit_syntax_3_tables() {
-        let sql = "SELECT id, order_id, l_description \
-            FROM person \
-            JOIN orders ON id = customer_id \
-            JOIN lineitem ON o_item_id = l_item_id";
-        let expected = "Projection: #id, #order_id, #l_description\
-            \n  Join: o_item_id = l_item_id\
-            \n    Join: id = customer_id\
-            \n      TableScan: person projection=None\
-            \n      TableScan: orders projection=None\
-            \n    TableScan: lineitem projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn boolean_literal_in_condition_expression() {
-        let sql = "SELECT order_id \
-        FROM orders \
-        WHERE delivered = false OR delivered = true";
-        let expected = "Projection: #order_id\
-            \n  Filter: #delivered Eq Boolean(false) Or #delivered Eq Boolean(true)\
-            \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn union() {
-        let sql = "SELECT order_id from orders UNION ALL SELECT order_id FROM orders";
-        let expected = "Union\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn union_4_combined_in_one() {
-        let sql = "SELECT order_id from orders
-                    UNION ALL SELECT order_id FROM orders
-                    UNION ALL SELECT order_id FROM orders
-                    UNION ALL SELECT order_id FROM orders";
-        let expected = "Union\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn union_schemas_should_be_same() {
-        let sql = "SELECT order_id from orders UNION ALL SELECT customer_id FROM orders";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"UNION ALL schemas are expected to be the same\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn only_union_all_supported() {
-        let sql = "SELECT order_id from orders EXCEPT SELECT order_id FROM orders";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "NotImplemented(\"Only UNION ALL is supported, found EXCEPT\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_typedstring() {
-        let sql = "SELECT date '2020-12-10' AS date FROM person";
-        let expected = "Projection: CAST(Utf8(\"2020-12-10\") AS Date32) AS date\
-            \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    fn logical_plan(sql: &str) -> Result<LogicalPlan> {
-        let planner = SqlToRel::new(&MockContextProvider {});
-        let result = DFParser::parse_sql(&sql);
-        let ast = result.unwrap();
-        planner.statement_to_plan(&ast[0])
-    }
-
-    /// Create logical plan, write with formatter, compare to expected output
-    fn quick_test(sql: &str, expected: &str) {
-        let plan = logical_plan(sql).unwrap();
-        assert_eq!(expected, format!("{:?}", plan));
-    }
-
-    struct MockContextProvider {}
-
-    impl ContextProvider for MockContextProvider {
-        fn get_table_provider(
-            &self,
-            name: TableReference,
-        ) -> Option<Arc<dyn TableProvider>> {
-            let schema = match name.table() {
-                "person" => Some(Schema::new(vec![
-                    Field::new("id", DataType::UInt32, false),
-                    Field::new("first_name", DataType::Utf8, false),
-                    Field::new("last_name", DataType::Utf8, false),
-                    Field::new("age", DataType::Int32, false),
-                    Field::new("state", DataType::Utf8, false),
-                    Field::new("salary", DataType::Float64, false),
-                    Field::new(
-                        "birth_date",
-                        DataType::Timestamp(TimeUnit::Nanosecond, None),
-                        false,
-                    ),
-                ])),
-                "orders" => Some(Schema::new(vec![
-                    Field::new("order_id", DataType::UInt32, false),
-                    Field::new("customer_id", DataType::UInt32, false),
-                    Field::new("o_item_id", DataType::Utf8, false),
-                    Field::new("qty", DataType::Int32, false),
-                    Field::new("price", DataType::Float64, false),
-                    Field::new("delivered", DataType::Boolean, false),
-                ])),
-                "lineitem" => Some(Schema::new(vec![
-                    Field::new("l_item_id", DataType::UInt32, false),
-                    Field::new("l_description", DataType::Utf8, false),
-                ])),
-                "aggregate_test_100" => Some(Schema::new(vec![
-                    Field::new("c1", DataType::Utf8, false),
-                    Field::new("c2", DataType::UInt32, false),
-                    Field::new("c3", DataType::Int8, false),
-                    Field::new("c4", DataType::Int16, false),
-                    Field::new("c5", DataType::Int32, false),
-                    Field::new("c6", DataType::Int64, false),
-                    Field::new("c7", DataType::UInt8, false),
-                    Field::new("c8", DataType::UInt16, false),
-                    Field::new("c9", DataType::UInt32, false),
-                    Field::new("c10", DataType::UInt64, false),
-                    Field::new("c11", DataType::Float32, false),
-                    Field::new("c12", DataType::Float64, false),
-                    Field::new("c13", DataType::Utf8, false),
-                ])),
-                _ => None,
-            };
-            schema.map(|s| -> Arc<dyn TableProvider> {
-                Arc::new(EmptyTable::new(Arc::new(s)))
-            })
-        }
-
-        fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-            let f: ScalarFunctionImplementation =
-                Arc::new(|_| Err(DataFusionError::NotImplemented("".to_string())));
-            match name {
-                "my_sqrt" => Some(Arc::new(create_udf(
-                    "my_sqrt",
-                    vec![DataType::Float64],
-                    Arc::new(DataType::Float64),
-                    f,
-                ))),
-                _ => None,
-            }
-        }
-
-        fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
-            unimplemented!()
-        }
-    }
-}
diff --git a/rust/datafusion/src/sql/utils.rs b/rust/datafusion/src/sql/utils.rs
deleted file mode 100644
index f41643d2ab4..00000000000
--- a/rust/datafusion/src/sql/utils.rs
+++ /dev/null
@@ -1,376 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::logical_plan::{DFSchema, Expr, LogicalPlan};
-use crate::{
-    error::{DataFusionError, Result},
-    logical_plan::{ExpressionVisitor, Recursion},
-};
-use std::collections::HashMap;
-
-/// Resolves an `Expr::Wildcard` to a collection of `Expr::Column`'s.
-pub(crate) fn expand_wildcard(expr: &Expr, schema: &DFSchema) -> Vec<Expr> {
-    match expr {
-        Expr::Wildcard => schema
-            .fields()
-            .iter()
-            .map(|f| Expr::Column(f.name().to_string()))
-            .collect::<Vec<Expr>>(),
-        _ => vec![expr.clone()],
-    }
-}
-
-/// Collect all deeply nested `Expr::AggregateFunction` and
-/// `Expr::AggregateUDF`. They are returned in order of occurrence (depth
-/// first), with duplicates omitted.
-pub(crate) fn find_aggregate_exprs(exprs: &[Expr]) -> Vec<Expr> {
-    find_exprs_in_exprs(exprs, &|nested_expr| {
-        matches!(
-            nested_expr,
-            Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. }
-        )
-    })
-}
-
-/// Collect all deeply nested `Expr::Column`'s. They are returned in order of
-/// appearance (depth first), with duplicates omitted.
-pub(crate) fn find_column_exprs(exprs: &[Expr]) -> Vec<Expr> {
-    find_exprs_in_exprs(exprs, &|nested_expr| matches!(nested_expr, Expr::Column(_)))
-}
-
-/// Search the provided `Expr`'s, and all of their nested `Expr`, for any that
-/// pass the provided test. The returned `Expr`'s are deduplicated and returned
-/// in order of appearance (depth first).
-fn find_exprs_in_exprs<F>(exprs: &[Expr], test_fn: &F) -> Vec<Expr>
-where
-    F: Fn(&Expr) -> bool,
-{
-    exprs
-        .iter()
-        .flat_map(|expr| find_exprs_in_expr(expr, test_fn))
-        .fold(vec![], |mut acc, expr| {
-            if !acc.contains(&expr) {
-                acc.push(expr)
-            }
-            acc
-        })
-}
-
-// Visitor that find expressions that match a particular predicate
-struct Finder<'a, F>
-where
-    F: Fn(&Expr) -> bool,
-{
-    test_fn: &'a F,
-    exprs: Vec<Expr>,
-}
-
-impl<'a, F> Finder<'a, F>
-where
-    F: Fn(&Expr) -> bool,
-{
-    /// Create a new finder with the `test_fn`
-    fn new(test_fn: &'a F) -> Self {
-        Self {
-            test_fn,
-            exprs: Vec::new(),
-        }
-    }
-}
-
-impl<'a, F> ExpressionVisitor for Finder<'a, F>
-where
-    F: Fn(&Expr) -> bool,
-{
-    fn pre_visit(mut self, expr: &Expr) -> Result<Recursion<Self>> {
-        if (self.test_fn)(expr) {
-            if !(self.exprs.contains(expr)) {
-                self.exprs.push(expr.clone())
-            }
-            // stop recursing down this expr once we find a match
-            return Ok(Recursion::Stop(self));
-        }
-
-        Ok(Recursion::Continue(self))
-    }
-}
-
-/// Search an `Expr`, and all of its nested `Expr`'s, for any that pass the
-/// provided test. The returned `Expr`'s are deduplicated and returned in order
-/// of appearance (depth first).
-fn find_exprs_in_expr<F>(expr: &Expr, test_fn: &F) -> Vec<Expr>
-where
-    F: Fn(&Expr) -> bool,
-{
-    let Finder { exprs, .. } = expr
-        .accept(Finder::new(test_fn))
-        // pre_visit always returns OK, so this will always too
-        .expect("no way to return error during recursion");
-    exprs
-}
-
-/// Convert any `Expr` to an `Expr::Column`.
-pub(crate) fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
-    match expr {
-        Expr::Column(_) => Ok(expr.clone()),
-        _ => Ok(Expr::Column(expr.name(&plan.schema())?)),
-    }
-}
-
-/// Rebuilds an `Expr` as a projection on top of a collection of `Expr`'s.
-///
-/// For example, the expression `a + b < 1` would require, as input, the 2
-/// individual columns, `a` and `b`. But, if the base expressions already
-/// contain the `a + b` result, then that may be used in lieu of the `a` and
-/// `b` columns.
-///
-/// This is useful in the context of a query like:
-///
-/// SELECT a + b < 1 ... GROUP BY a + b
-///
-/// where post-aggregation, `a + b` need not be a projection against the
-/// individual columns `a` and `b`, but rather it is a projection against the
-/// `a + b` found in the GROUP BY.
-pub(crate) fn rebase_expr(
-    expr: &Expr,
-    base_exprs: &[Expr],
-    plan: &LogicalPlan,
-) -> Result<Expr> {
-    clone_with_replacement(expr, &|nested_expr| {
-        if base_exprs.contains(nested_expr) {
-            Ok(Some(expr_as_column_expr(nested_expr, plan)?))
-        } else {
-            Ok(None)
-        }
-    })
-}
-
-/// Determines if the set of `Expr`'s are a valid projection on the input
-/// `Expr::Column`'s.
-pub(crate) fn can_columns_satisfy_exprs(
-    columns: &[Expr],
-    exprs: &[Expr],
-) -> Result<bool> {
-    columns.iter().try_for_each(|c| match c {
-        Expr::Column(_) => Ok(()),
-        _ => Err(DataFusionError::Internal(
-            "Expr::Column are required".to_string(),
-        )),
-    })?;
-
-    Ok(find_column_exprs(exprs).iter().all(|c| columns.contains(c)))
-}
-
-/// Returns a cloned `Expr`, but any of the `Expr`'s in the tree may be
-/// replaced/customized by the replacement function.
-///
-/// The replacement function is called repeatedly with `Expr`, starting with
-/// the argument `expr`, then descending depth-first through its
-/// descendants. The function chooses to replace or keep (clone) each `Expr`.
-///
-/// The function's return type is `Result<Option<Expr>>>`, where:
-///
-/// * `Ok(Some(replacement_expr))`: A replacement `Expr` is provided; it is
-///       swapped in at the particular node in the tree. Any nested `Expr` are
-///       not subject to cloning/replacement.
-/// * `Ok(None)`: A replacement `Expr` is not provided. The `Expr` is
-///       recreated, with all of its nested `Expr`'s subject to
-///       cloning/replacement.
-/// * `Err(err)`: Any error returned by the function is returned as-is by
-///       `clone_with_replacement()`.
-fn clone_with_replacement<F>(expr: &Expr, replacement_fn: &F) -> Result<Expr>
-where
-    F: Fn(&Expr) -> Result<Option<Expr>>,
-{
-    let replacement_opt = replacement_fn(expr)?;
-
-    match replacement_opt {
-        // If we were provided a replacement, use the replacement. Do not
-        // descend further.
-        Some(replacement) => Ok(replacement),
-        // No replacement was provided, clone the node and recursively call
-        // clone_with_replacement() on any nested expressions.
-        None => match expr {
-            Expr::AggregateFunction {
-                fun,
-                args,
-                distinct,
-            } => Ok(Expr::AggregateFunction {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-                distinct: *distinct,
-            }),
-            Expr::AggregateUDF { fun, args } => Ok(Expr::AggregateUDF {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-            }),
-            Expr::Alias(nested_expr, alias_name) => Ok(Expr::Alias(
-                Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                alias_name.clone(),
-            )),
-            Expr::Between {
-                expr: nested_expr,
-                negated,
-                low,
-                high,
-            } => Ok(Expr::Between {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                negated: *negated,
-                low: Box::new(clone_with_replacement(&**low, replacement_fn)?),
-                high: Box::new(clone_with_replacement(&**high, replacement_fn)?),
-            }),
-            Expr::InList {
-                expr: nested_expr,
-                list,
-                negated,
-            } => Ok(Expr::InList {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                list: list
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-                negated: *negated,
-            }),
-            Expr::BinaryExpr { left, right, op } => Ok(Expr::BinaryExpr {
-                left: Box::new(clone_with_replacement(&**left, replacement_fn)?),
-                op: *op,
-                right: Box::new(clone_with_replacement(&**right, replacement_fn)?),
-            }),
-            Expr::Case {
-                expr: case_expr_opt,
-                when_then_expr,
-                else_expr: else_expr_opt,
-            } => Ok(Expr::Case {
-                expr: match case_expr_opt {
-                    Some(case_expr) => Some(Box::new(clone_with_replacement(
-                        &**case_expr,
-                        replacement_fn,
-                    )?)),
-                    None => None,
-                },
-                when_then_expr: when_then_expr
-                    .iter()
-                    .map(|(a, b)| {
-                        Ok((
-                            Box::new(clone_with_replacement(&**a, replacement_fn)?),
-                            Box::new(clone_with_replacement(&**b, replacement_fn)?),
-                        ))
-                    })
-                    .collect::<Result<Vec<(_, _)>>>()?,
-                else_expr: match else_expr_opt {
-                    Some(else_expr) => Some(Box::new(clone_with_replacement(
-                        &**else_expr,
-                        replacement_fn,
-                    )?)),
-                    None => None,
-                },
-            }),
-            Expr::ScalarFunction { fun, args } => Ok(Expr::ScalarFunction {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-            }),
-            Expr::ScalarUDF { fun, args } => Ok(Expr::ScalarUDF {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|arg| clone_with_replacement(arg, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-            }),
-            Expr::Negative(nested_expr) => Ok(Expr::Negative(Box::new(
-                clone_with_replacement(&**nested_expr, replacement_fn)?,
-            ))),
-            Expr::Not(nested_expr) => Ok(Expr::Not(Box::new(clone_with_replacement(
-                &**nested_expr,
-                replacement_fn,
-            )?))),
-            Expr::IsNotNull(nested_expr) => Ok(Expr::IsNotNull(Box::new(
-                clone_with_replacement(&**nested_expr, replacement_fn)?,
-            ))),
-            Expr::IsNull(nested_expr) => Ok(Expr::IsNull(Box::new(
-                clone_with_replacement(&**nested_expr, replacement_fn)?,
-            ))),
-            Expr::Cast {
-                expr: nested_expr,
-                data_type,
-            } => Ok(Expr::Cast {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                data_type: data_type.clone(),
-            }),
-            Expr::TryCast {
-                expr: nested_expr,
-                data_type,
-            } => Ok(Expr::TryCast {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                data_type: data_type.clone(),
-            }),
-            Expr::Sort {
-                expr: nested_expr,
-                asc,
-                nulls_first,
-            } => Ok(Expr::Sort {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                asc: *asc,
-                nulls_first: *nulls_first,
-            }),
-            Expr::Column(_) | Expr::Literal(_) | Expr::ScalarVariable(_) => {
-                Ok(expr.clone())
-            }
-            Expr::Wildcard => Ok(Expr::Wildcard),
-        },
-    }
-}
-
-/// Returns mapping of each alias (`String`) to the expression (`Expr`) it is
-/// aliasing.
-pub(crate) fn extract_aliases(exprs: &[Expr]) -> HashMap<String, Expr> {
-    exprs
-        .iter()
-        .filter_map(|expr| match expr {
-            Expr::Alias(nested_expr, alias_name) => {
-                Some((alias_name.clone(), *nested_expr.clone()))
-            }
-            _ => None,
-        })
-        .collect::<HashMap<String, Expr>>()
-}
-
-/// Rebuilds an `Expr` with columns that refer to aliases replaced by the
-/// alias' underlying `Expr`.
-pub(crate) fn resolve_aliases_to_exprs(
-    expr: &Expr,
-    aliases: &HashMap<String, Expr>,
-) -> Result<Expr> {
-    clone_with_replacement(expr, &|nested_expr| match nested_expr {
-        Expr::Column(name) => {
-            if let Some(aliased_expr) = aliases.get(name) {
-                Ok(Some(aliased_expr.clone()))
-            } else {
-                Ok(None)
-            }
-        }
-        _ => Ok(None),
-    })
-}
diff --git a/rust/datafusion/src/test/exec.rs b/rust/datafusion/src/test/exec.rs
deleted file mode 100644
index 04cd29530c0..00000000000
--- a/rust/datafusion/src/test/exec.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Simple iterator over batches for use in testing
-
-use std::task::{Context, Poll};
-
-use arrow::{
-    datatypes::SchemaRef, error::Result as ArrowResult, record_batch::RecordBatch,
-};
-use futures::Stream;
-
-use crate::physical_plan::RecordBatchStream;
-
-/// Index into the data that has been returned so far
-#[derive(Debug, Default, Clone)]
-pub struct BatchIndex {
-    inner: std::sync::Arc<std::sync::Mutex<usize>>,
-}
-
-impl BatchIndex {
-    /// Return the current index
-    pub fn value(&self) -> usize {
-        let inner = self.inner.lock().unwrap();
-        *inner
-    }
-
-    // increment the current index by one
-    pub fn incr(&self) {
-        let mut inner = self.inner.lock().unwrap();
-        *inner += 1;
-    }
-}
-
-/// Iterator over batches
-#[derive(Debug, Default)]
-pub(crate) struct TestStream {
-    /// Vector of record batches
-    data: Vec<RecordBatch>,
-    /// Index into the data that has been returned so far
-    index: BatchIndex,
-}
-
-impl TestStream {
-    /// Create an iterator for a vector of record batches. Assumes at
-    /// least one entry in data (for the schema)
-    pub fn new(data: Vec<RecordBatch>) -> Self {
-        Self {
-            data,
-            ..Default::default()
-        }
-    }
-
-    /// Return a handle to the index counter for this stream
-    pub fn index(&self) -> BatchIndex {
-        self.index.clone()
-    }
-}
-
-impl Stream for TestStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        let next_batch = self.index.value();
-
-        Poll::Ready(if next_batch < self.data.len() {
-            let next_batch = self.index.value();
-            self.index.incr();
-            Some(Ok(self.data[next_batch].clone()))
-        } else {
-            None
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.data.len(), Some(self.data.len()))
-    }
-}
-
-impl RecordBatchStream for TestStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.data[0].schema()
-    }
-}
diff --git a/rust/datafusion/src/test/mod.rs b/rust/datafusion/src/test/mod.rs
deleted file mode 100644
index 57736189481..00000000000
--- a/rust/datafusion/src/test/mod.rs
+++ /dev/null
@@ -1,256 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common unit test utility methods
-
-use crate::datasource::{MemTable, TableProvider};
-use crate::error::Result;
-use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
-use array::ArrayRef;
-use arrow::array::{self, Int32Array};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use std::fs::File;
-use std::io::prelude::*;
-use std::io::{BufReader, BufWriter};
-use std::sync::Arc;
-use tempfile::TempDir;
-
-pub fn create_table_dual() -> Arc<dyn TableProvider> {
-    let dual_schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, false),
-    ]));
-    let batch = RecordBatch::try_new(
-        dual_schema.clone(),
-        vec![
-            Arc::new(array::Int32Array::from(vec![1])),
-            Arc::new(array::StringArray::from(vec!["a"])),
-        ],
-    )
-    .unwrap();
-    let provider = MemTable::try_new(dual_schema, vec![vec![batch]]).unwrap();
-    Arc::new(provider)
-}
-
-/// Generated partitioned copy of a CSV file
-pub fn create_partitioned_csv(filename: &str, partitions: usize) -> Result<String> {
-    let testdata = arrow::util::test_util::arrow_test_data();
-    let path = format!("{}/csv/{}", testdata, filename);
-
-    let tmp_dir = TempDir::new()?;
-
-    let mut writers = vec![];
-    for i in 0..partitions {
-        let filename = format!("partition-{}.csv", i);
-        let filename = tmp_dir.path().join(&filename);
-
-        let writer = BufWriter::new(File::create(&filename).unwrap());
-        writers.push(writer);
-    }
-
-    let f = File::open(&path)?;
-    let f = BufReader::new(f);
-    for (i, line) in f.lines().enumerate() {
-        let line = line.unwrap();
-
-        if i == 0 {
-            // write header to all partitions
-            for w in writers.iter_mut() {
-                w.write_all(line.as_bytes()).unwrap();
-                w.write_all(b"\n").unwrap();
-            }
-        } else {
-            // write data line to single partition
-            let partition = i % partitions;
-            writers[partition].write_all(line.as_bytes()).unwrap();
-            writers[partition].write_all(b"\n").unwrap();
-        }
-    }
-    for w in writers.iter_mut() {
-        w.flush().unwrap();
-    }
-
-    Ok(tmp_dir.into_path().to_str().unwrap().to_string())
-}
-
-/// Get the schema for the aggregate_test_* csv files
-pub fn aggr_test_schema() -> SchemaRef {
-    Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::UInt32, false),
-        Field::new("c3", DataType::Int8, false),
-        Field::new("c4", DataType::Int16, false),
-        Field::new("c5", DataType::Int32, false),
-        Field::new("c6", DataType::Int64, false),
-        Field::new("c7", DataType::UInt8, false),
-        Field::new("c8", DataType::UInt16, false),
-        Field::new("c9", DataType::UInt32, false),
-        Field::new("c10", DataType::UInt64, false),
-        Field::new("c11", DataType::Float32, false),
-        Field::new("c12", DataType::Float64, false),
-        Field::new("c13", DataType::Utf8, false),
-    ]))
-}
-
-/// some tests share a common table
-pub fn test_table_scan() -> Result<LogicalPlan> {
-    let schema = Schema::new(vec![
-        Field::new("a", DataType::UInt32, false),
-        Field::new("b", DataType::UInt32, false),
-        Field::new("c", DataType::UInt32, false),
-    ]);
-    LogicalPlanBuilder::scan_empty("test", &schema, None)?.build()
-}
-
-pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
-    let actual: Vec<String> = plan
-        .schema()
-        .fields()
-        .iter()
-        .map(|f| f.name().clone())
-        .collect();
-    assert_eq!(actual, expected);
-}
-
-/// returns a table with 3 columns of i32 in memory
-pub fn build_table_i32(
-    a: (&str, &Vec<i32>),
-    b: (&str, &Vec<i32>),
-    c: (&str, &Vec<i32>),
-) -> RecordBatch {
-    let schema = Schema::new(vec![
-        Field::new(a.0, DataType::Int32, false),
-        Field::new(b.0, DataType::Int32, false),
-        Field::new(c.0, DataType::Int32, false),
-    ]);
-
-    RecordBatch::try_new(
-        Arc::new(schema),
-        vec![
-            Arc::new(Int32Array::from(a.1.clone())),
-            Arc::new(Int32Array::from(b.1.clone())),
-            Arc::new(Int32Array::from(c.1.clone())),
-        ],
-    )
-    .unwrap()
-}
-
-/// Returns the column names on the schema
-pub fn columns(schema: &Schema) -> Vec<String> {
-    schema.fields().iter().map(|f| f.name().clone()).collect()
-}
-
-/// Return a new table provider that has a single Int32 column with
-/// values between `seq_start` and `seq_end`
-pub fn table_with_sequence(
-    seq_start: i32,
-    seq_end: i32,
-) -> Result<Arc<dyn TableProvider>> {
-    let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
-    let arr = Arc::new(Int32Array::from((seq_start..=seq_end).collect::<Vec<_>>()));
-    let partitions = vec![vec![RecordBatch::try_new(
-        schema.clone(),
-        vec![arr as ArrayRef],
-    )?]];
-    Ok(Arc::new(MemTable::try_new(schema, partitions)?))
-}
-
-/// Return a RecordBatch with a single Int32 array with values (0..sz)
-pub fn make_partition(sz: i32) -> RecordBatch {
-    let seq_start = 0;
-    let seq_end = sz;
-    let values = (seq_start..seq_end).collect::<Vec<_>>();
-    let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
-    let arr = Arc::new(Int32Array::from(values));
-    let arr = arr as ArrayRef;
-
-    RecordBatch::try_new(schema, vec![arr]).unwrap()
-}
-
-pub mod exec;
-pub mod user_defined;
-pub mod variable;
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings, with the result of pretty formatting record
-/// batches. This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
-
-        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings in a way that order does not matter.
-/// This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_sorted_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let mut expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        // sort except for header + footer
-        let num_lines = expected_lines.len();
-        if num_lines > 3 {
-            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
-        // fix for windows: \r\n -->
-
-        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        // sort except for header + footer
-        let num_lines = actual_lines.len();
-        if num_lines > 3 {
-            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
diff --git a/rust/datafusion/src/test/user_defined.rs b/rust/datafusion/src/test/user_defined.rs
deleted file mode 100644
index 9a850d52759..00000000000
--- a/rust/datafusion/src/test/user_defined.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Simple user defined logical plan node for testing
-
-use std::{
-    any::Any,
-    fmt::{self, Debug},
-    sync::Arc,
-};
-
-use crate::logical_plan::{DFSchemaRef, Expr, LogicalPlan, UserDefinedLogicalNode};
-
-/// Create a new user defined plan node, for testing
-pub fn new(input: LogicalPlan) -> LogicalPlan {
-    let node = Arc::new(TestUserDefinedPlanNode { input });
-    LogicalPlan::Extension { node }
-}
-
-struct TestUserDefinedPlanNode {
-    input: LogicalPlan,
-}
-
-impl Debug for TestUserDefinedPlanNode {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.fmt_for_explain(f)
-    }
-}
-
-impl UserDefinedLogicalNode for TestUserDefinedPlanNode {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        self.input.schema()
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "TestUserDefined")
-    }
-
-    fn from_template(
-        &self,
-        exprs: &[Expr],
-        inputs: &[LogicalPlan],
-    ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync> {
-        assert_eq!(inputs.len(), 1, "input size inconsistent");
-        assert_eq!(exprs.len(), 0, "expression size inconsistent");
-        Arc::new(TestUserDefinedPlanNode {
-            input: inputs[0].clone(),
-        })
-    }
-}
diff --git a/rust/datafusion/src/test/variable.rs b/rust/datafusion/src/test/variable.rs
deleted file mode 100644
index 47d1370e801..00000000000
--- a/rust/datafusion/src/test/variable.rs
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! System variable provider
-
-use crate::error::Result;
-use crate::scalar::ScalarValue;
-use crate::variable::VarProvider;
-
-/// System variable
-pub struct SystemVar {}
-
-impl SystemVar {
-    /// new system variable
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl VarProvider for SystemVar {
-    /// get system variable value
-    fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue> {
-        let s = format!("{}-{}", "system-var".to_string(), var_names.concat());
-        Ok(ScalarValue::Utf8(Some(s)))
-    }
-}
-
-/// user defined variable
-pub struct UserDefinedVar {}
-
-impl UserDefinedVar {
-    /// new user defined variable
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl VarProvider for UserDefinedVar {
-    /// Get user defined variable value
-    fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue> {
-        let s = format!("{}-{}", "user-defined-var".to_string(), var_names.concat());
-        Ok(ScalarValue::Utf8(Some(s)))
-    }
-}
diff --git a/rust/datafusion/src/variable/mod.rs b/rust/datafusion/src/variable/mod.rs
deleted file mode 100644
index db9ff7f0a60..00000000000
--- a/rust/datafusion/src/variable/mod.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Variable provider
-
-use crate::error::Result;
-use crate::scalar::ScalarValue;
-
-/// Variable type, system/user defined
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum VarType {
-    /// System variable, like @@version
-    System,
-    /// User defined variable, like @name
-    UserDefined,
-}
-
-/// A var provider for @variable
-pub trait VarProvider {
-    /// Get variable value
-    fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue>;
-}
diff --git a/rust/datafusion/tests/aggregate_simple.csv b/rust/datafusion/tests/aggregate_simple.csv
deleted file mode 100644
index 7a0256cb7db..00000000000
--- a/rust/datafusion/tests/aggregate_simple.csv
+++ /dev/null
@@ -1,16 +0,0 @@
-c1,c2,c3
-0.00001,0.000000000001,true
-0.00002,0.000000000002,false
-0.00002,0.000000000002,false
-0.00003,0.000000000003,true
-0.00003,0.000000000003,true
-0.00003,0.000000000003,true
-0.00004,0.000000000004,false
-0.00004,0.000000000004,false
-0.00004,0.000000000004,false
-0.00004,0.000000000004,false
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
\ No newline at end of file
diff --git a/rust/datafusion/tests/custom_sources.rs b/rust/datafusion/tests/custom_sources.rs
deleted file mode 100644
index a00dd6ac282..00000000000
--- a/rust/datafusion/tests/custom_sources.rs
+++ /dev/null
@@ -1,200 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::array::Int32Array;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use datafusion::error::{DataFusionError, Result};
-use datafusion::{
-    datasource::{datasource::Statistics, TableProvider},
-    physical_plan::collect,
-};
-
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logical_plan::{col, Expr, LogicalPlan, LogicalPlanBuilder};
-use datafusion::physical_plan::{
-    ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
-};
-
-use futures::stream::Stream;
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use async_trait::async_trait;
-
-//// Custom source dataframe tests ////
-
-struct CustomTableProvider;
-#[derive(Debug, Clone)]
-struct CustomExecutionPlan {
-    projection: Option<Vec<usize>>,
-}
-struct TestCustomRecordBatchStream {
-    /// the nb of batches of TEST_CUSTOM_RECORD_BATCH generated
-    nb_batch: i32,
-}
-macro_rules! TEST_CUSTOM_SCHEMA_REF {
-    () => {
-        Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]))
-    };
-}
-macro_rules! TEST_CUSTOM_RECORD_BATCH {
-    () => {
-        RecordBatch::try_new(
-            TEST_CUSTOM_SCHEMA_REF!(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
-            ],
-        )
-    };
-}
-
-impl RecordBatchStream for TestCustomRecordBatchStream {
-    fn schema(&self) -> SchemaRef {
-        TEST_CUSTOM_SCHEMA_REF!()
-    }
-}
-
-impl Stream for TestCustomRecordBatchStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: Pin<&mut Self>,
-        _cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.nb_batch > 0 {
-            self.get_mut().nb_batch -= 1;
-            Poll::Ready(Some(TEST_CUSTOM_RECORD_BATCH!()))
-        } else {
-            Poll::Ready(None)
-        }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CustomExecutionPlan {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-    fn schema(&self) -> SchemaRef {
-        let schema = TEST_CUSTOM_SCHEMA_REF!();
-        match &self.projection {
-            None => schema,
-            Some(p) => Arc::new(Schema::new(
-                p.iter().map(|i| schema.field(*i).clone()).collect(),
-            )),
-        }
-    }
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(
-                "Children cannot be replaced in CustomExecutionPlan".to_owned(),
-            ))
-        }
-    }
-    async fn execute(&self, _partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(TestCustomRecordBatchStream { nb_batch: 1 }))
-    }
-}
-
-impl TableProvider for CustomTableProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        TEST_CUSTOM_SCHEMA_REF!()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(CustomExecutionPlan {
-            projection: projection.clone(),
-        }))
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
-    }
-}
-
-#[tokio::test]
-async fn custom_source_dataframe() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-
-    let table = ctx.read_table(Arc::new(CustomTableProvider))?;
-    let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
-        .project(vec![col("c2")])?
-        .build()?;
-
-    let optimized_plan = ctx.optimize(&logical_plan)?;
-    match &optimized_plan {
-        LogicalPlan::Projection { input, .. } => match &**input {
-            LogicalPlan::TableScan {
-                source,
-                projected_schema,
-                ..
-            } => {
-                assert_eq!(source.schema().fields().len(), 2);
-                assert_eq!(projected_schema.fields().len(), 1);
-            }
-            _ => panic!("input to projection should be TableScan"),
-        },
-        _ => panic!("expect optimized_plan to be projection"),
-    }
-
-    let expected = "Projection: #c2\
-        \n  TableScan: projection=Some([1])";
-    assert_eq!(format!("{:?}", optimized_plan), expected);
-
-    let physical_plan = ctx.create_physical_plan(&optimized_plan)?;
-
-    assert_eq!(1, physical_plan.schema().fields().len());
-    assert_eq!("c2", physical_plan.schema().field(0).name().as_str());
-
-    let batches = collect(physical_plan).await?;
-    let origin_rec_batch = TEST_CUSTOM_RECORD_BATCH!()?;
-    assert_eq!(1, batches.len());
-    assert_eq!(1, batches[0].num_columns());
-    assert_eq!(origin_rec_batch.num_rows(), batches[0].num_rows());
-
-    Ok(())
-}
diff --git a/rust/datafusion/tests/customer.csv b/rust/datafusion/tests/customer.csv
deleted file mode 100644
index 2abcd659046..00000000000
--- a/rust/datafusion/tests/customer.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-andrew,100
-jorge,200
-andy,150
-paul,300
diff --git a/rust/datafusion/tests/dataframe.rs b/rust/datafusion/tests/dataframe.rs
deleted file mode 100644
index b93e21f4aba..00000000000
--- a/rust/datafusion/tests/dataframe.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::{
-    array::{Int32Array, StringArray},
-    record_batch::RecordBatch,
-};
-
-use datafusion::error::Result;
-use datafusion::{datasource::MemTable, prelude::JoinType};
-
-use datafusion::execution::context::ExecutionContext;
-
-#[tokio::test]
-async fn join() -> Result<()> {
-    let schema1 = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Int32, false),
-    ]));
-    let schema2 = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("c", DataType::Int32, false),
-    ]));
-
-    // define data.
-    let batch1 = RecordBatch::try_new(
-        schema1.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
-            Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-        ],
-    )?;
-    // define data.
-    let batch2 = RecordBatch::try_new(
-        schema2.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
-            Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-        ],
-    )?;
-
-    let mut ctx = ExecutionContext::new();
-
-    let table1 = MemTable::try_new(schema1, vec![vec![batch1]])?;
-    let table2 = MemTable::try_new(schema2, vec![vec![batch2]])?;
-
-    ctx.register_table("aa", Arc::new(table1))?;
-
-    let df1 = ctx.table("aa")?;
-
-    ctx.register_table("aaa", Arc::new(table2))?;
-
-    let df2 = ctx.table("aaa")?;
-
-    let a = df1.join(df2, JoinType::Inner, &["a"], &["a"])?;
-
-    let batches = a.collect().await?;
-
-    assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 4);
-
-    Ok(())
-}
diff --git a/rust/datafusion/tests/example.csv b/rust/datafusion/tests/example.csv
deleted file mode 100644
index 0eadb69396b..00000000000
--- a/rust/datafusion/tests/example.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-a,b,c
-1,2,3
\ No newline at end of file
diff --git a/rust/datafusion/tests/provider_filter_pushdown.rs b/rust/datafusion/tests/provider_filter_pushdown.rs
deleted file mode 100644
index 0bf67bea8b9..00000000000
--- a/rust/datafusion/tests/provider_filter_pushdown.rs
+++ /dev/null
@@ -1,177 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::array::{as_primitive_array, Int32Builder, UInt64Array};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use async_trait::async_trait;
-use datafusion::datasource::datasource::{
-    Statistics, TableProvider, TableProviderFilterPushDown,
-};
-use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logical_plan::Expr;
-use datafusion::physical_plan::common::SizedRecordBatchStream;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
-use datafusion::prelude::*;
-use datafusion::scalar::ScalarValue;
-use std::sync::Arc;
-
-fn create_batch(value: i32, num_rows: usize) -> Result<RecordBatch> {
-    let mut builder = Int32Builder::new(num_rows);
-    for _ in 0..num_rows {
-        builder.append_value(value)?;
-    }
-
-    Ok(RecordBatch::try_new(
-        Arc::new(Schema::new(vec![Field::new(
-            "flag",
-            DataType::Int32,
-            false,
-        )])),
-        vec![Arc::new(builder.finish())],
-    )?)
-}
-
-#[derive(Debug)]
-struct CustomPlan {
-    schema: SchemaRef,
-    batches: Vec<Arc<RecordBatch>>,
-}
-
-#[async_trait]
-impl ExecutionPlan for CustomPlan {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        _: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        unreachable!()
-    }
-
-    async fn execute(&self, _: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(SizedRecordBatchStream::new(
-            self.schema(),
-            self.batches.clone(),
-        )))
-    }
-}
-
-#[derive(Clone)]
-struct CustomProvider {
-    zero_batch: RecordBatch,
-    one_batch: RecordBatch,
-}
-
-impl TableProvider for CustomProvider {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.zero_batch.schema()
-    }
-
-    fn scan(
-        &self,
-        _: &Option<Vec<usize>>,
-        _: usize,
-        filters: &[Expr],
-        _: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match &filters[0] {
-            Expr::BinaryExpr { right, .. } => {
-                let int_value = match &**right {
-                    Expr::Literal(ScalarValue::Int64(i)) => i.unwrap(),
-                    _ => unimplemented!(),
-                };
-
-                Ok(Arc::new(CustomPlan {
-                    schema: self.zero_batch.schema(),
-                    batches: match int_value {
-                        0 => vec![Arc::new(self.zero_batch.clone())],
-                        1 => vec![Arc::new(self.one_batch.clone())],
-                        _ => vec![],
-                    },
-                }))
-            }
-            _ => Ok(Arc::new(CustomPlan {
-                schema: self.zero_batch.schema(),
-                batches: vec![],
-            })),
-        }
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
-    }
-
-    fn supports_filter_pushdown(&self, _: &Expr) -> Result<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Exact)
-    }
-}
-
-async fn assert_provider_row_count(value: i64, expected_count: u64) -> Result<()> {
-    let provider = CustomProvider {
-        zero_batch: create_batch(0, 10)?,
-        one_batch: create_batch(1, 5)?,
-    };
-
-    let mut ctx = ExecutionContext::new();
-    let df = ctx
-        .read_table(Arc::new(provider.clone()))?
-        .filter(col("flag").eq(lit(value)))?
-        .aggregate(vec![], vec![count(col("flag"))])?;
-
-    let results = df.collect().await?;
-    let result_col: &UInt64Array = as_primitive_array(results[0].column(0));
-    assert_eq!(result_col.value(0), expected_count);
-
-    ctx.register_table("data", Arc::new(provider))?;
-    let sql_results = ctx
-        .sql(&format!("select count(*) from data where flag = {}", value))?
-        .collect()
-        .await?;
-
-    let sql_result_col: &UInt64Array = as_primitive_array(sql_results[0].column(0));
-    assert_eq!(sql_result_col.value(0), expected_count);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_filter_pushdown_results() -> Result<()> {
-    assert_provider_row_count(0, 10).await?;
-    assert_provider_row_count(1, 5).await?;
-    assert_provider_row_count(2, 0).await?;
-    Ok(())
-}
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
deleted file mode 100644
index f4d4e65f3a4..00000000000
--- a/rust/datafusion/tests/sql.rs
+++ /dev/null
@@ -1,2707 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use chrono::prelude::*;
-use chrono::Duration;
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::{array::*, datatypes::TimeUnit};
-use arrow::{datatypes::Int32Type, datatypes::Int64Type, record_batch::RecordBatch};
-use arrow::{
-    datatypes::{DataType, Field, Schema, SchemaRef},
-    util::display::array_value_to_string,
-};
-
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logical_plan::LogicalPlan;
-use datafusion::prelude::create_udf;
-use datafusion::{
-    datasource::{csv::CsvReadOptions, MemTable},
-    physical_plan::collect,
-};
-use datafusion::{
-    error::{DataFusionError, Result},
-    physical_plan::ColumnarValue,
-};
-
-#[tokio::test]
-async fn nyc() -> Result<()> {
-    // schema for nyxtaxi csv files
-    let schema = Schema::new(vec![
-        Field::new("VendorID", DataType::Utf8, true),
-        Field::new("tpep_pickup_datetime", DataType::Utf8, true),
-        Field::new("tpep_dropoff_datetime", DataType::Utf8, true),
-        Field::new("passenger_count", DataType::Utf8, true),
-        Field::new("trip_distance", DataType::Float64, true),
-        Field::new("RatecodeID", DataType::Utf8, true),
-        Field::new("store_and_fwd_flag", DataType::Utf8, true),
-        Field::new("PULocationID", DataType::Utf8, true),
-        Field::new("DOLocationID", DataType::Utf8, true),
-        Field::new("payment_type", DataType::Utf8, true),
-        Field::new("fare_amount", DataType::Float64, true),
-        Field::new("extra", DataType::Float64, true),
-        Field::new("mta_tax", DataType::Float64, true),
-        Field::new("tip_amount", DataType::Float64, true),
-        Field::new("tolls_amount", DataType::Float64, true),
-        Field::new("improvement_surcharge", DataType::Float64, true),
-        Field::new("total_amount", DataType::Float64, true),
-    ]);
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_csv(
-        "tripdata",
-        "file.csv",
-        CsvReadOptions::new().schema(&schema),
-    )?;
-
-    let logical_plan = ctx.create_logical_plan(
-        "SELECT passenger_count, MIN(fare_amount), MAX(fare_amount) \
-         FROM tripdata GROUP BY passenger_count",
-    )?;
-
-    let optimized_plan = ctx.optimize(&logical_plan)?;
-
-    match &optimized_plan {
-        LogicalPlan::Aggregate { input, .. } => match input.as_ref() {
-            LogicalPlan::TableScan {
-                ref projected_schema,
-                ..
-            } => {
-                assert_eq!(2, projected_schema.fields().len());
-                assert_eq!(projected_schema.field(0).name(), "passenger_count");
-                assert_eq!(projected_schema.field(1).name(), "fare_amount");
-            }
-            _ => unreachable!(),
-        },
-        _ => unreachable!(false),
-    }
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn parquet_query() {
-    let mut ctx = ExecutionContext::new();
-    register_alltypes_parquet(&mut ctx);
-    // NOTE that string_col is actually a binary column and does not have the UTF8 logical type
-    // so we need an explicit cast
-    let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["4", "0"],
-        vec!["5", "1"],
-        vec!["6", "0"],
-        vec!["7", "1"],
-        vec!["2", "0"],
-        vec!["3", "1"],
-        vec!["0", "0"],
-        vec!["1", "1"],
-    ];
-
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn parquet_single_nan_schema() {
-    let mut ctx = ExecutionContext::new();
-    let testdata = arrow::util::test_util::parquet_test_data();
-    ctx.register_parquet("single_nan", &format!("{}/single_nan.parquet", testdata))
-        .unwrap();
-    let sql = "SELECT mycol FROM single_nan";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let results = collect(plan).await.unwrap();
-    for batch in results {
-        assert_eq!(1, batch.num_rows());
-        assert_eq!(1, batch.num_columns());
-    }
-}
-
-#[tokio::test]
-#[ignore = "Test ignored, will be enabled as part of the nested Parquet reader"]
-async fn parquet_list_columns() {
-    let mut ctx = ExecutionContext::new();
-    let testdata = arrow::util::test_util::parquet_test_data();
-    ctx.register_parquet(
-        "list_columns",
-        &format!("{}/list_columns.parquet", testdata),
-    )
-    .unwrap();
-
-    let schema = Arc::new(Schema::new(vec![
-        Field::new(
-            "int64_list",
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
-            true,
-        ),
-        Field::new(
-            "utf8_list",
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-            true,
-        ),
-    ]));
-
-    let sql = "SELECT int64_list, utf8_list FROM list_columns";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let results = collect(plan).await.unwrap();
-
-    //   int64_list              utf8_list
-    // 0  [1, 2, 3]        [abc, efg, hij]
-    // 1  [None, 1]                   None
-    // 2        [4]  [efg, None, hij, xyz]
-
-    assert_eq!(1, results.len());
-    let batch = &results[0];
-    assert_eq!(3, batch.num_rows());
-    assert_eq!(2, batch.num_columns());
-    assert_eq!(schema, batch.schema());
-
-    let int_list_array = batch
-        .column(0)
-        .as_any()
-        .downcast_ref::<ListArray>()
-        .unwrap();
-    let utf8_list_array = batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<ListArray>()
-        .unwrap();
-
-    assert_eq!(
-        int_list_array
-            .value(0)
-            .as_any()
-            .downcast_ref::<PrimitiveArray<Int64Type>>()
-            .unwrap(),
-        &PrimitiveArray::<Int64Type>::from(vec![Some(1), Some(2), Some(3),])
-    );
-
-    assert_eq!(
-        utf8_list_array
-            .value(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap(),
-        &StringArray::try_from(vec![Some("abc"), Some("efg"), Some("hij"),]).unwrap()
-    );
-
-    assert_eq!(
-        int_list_array
-            .value(1)
-            .as_any()
-            .downcast_ref::<PrimitiveArray<Int64Type>>()
-            .unwrap(),
-        &PrimitiveArray::<Int64Type>::from(vec![None, Some(1),])
-    );
-
-    assert!(utf8_list_array.is_null(1));
-
-    assert_eq!(
-        int_list_array
-            .value(2)
-            .as_any()
-            .downcast_ref::<PrimitiveArray<Int64Type>>()
-            .unwrap(),
-        &PrimitiveArray::<Int64Type>::from(vec![Some(4),])
-    );
-
-    let result = utf8_list_array.value(2);
-    let result = result.as_any().downcast_ref::<StringArray>().unwrap();
-
-    assert_eq!(result.value(0), "efg");
-    assert!(result.is_null(1));
-    assert_eq!(result.value(2), "hij");
-    assert_eq!(result.value(3), "xyz");
-}
-
-#[tokio::test]
-async fn csv_select_nested() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT o1, o2, c3
-               FROM (
-                 SELECT c1 AS o1, c2 + 1 AS o2, c3
-                 FROM (
-                   SELECT c1, c2, c3, c4
-                   FROM aggregate_test_100
-                   WHERE c1 = 'a' AND c2 >= 4
-                   ORDER BY c2 ASC, c3 ASC
-                 )
-               )";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["a", "5", "-101"],
-        vec!["a", "5", "-54"],
-        vec!["a", "5", "-38"],
-        vec!["a", "5", "65"],
-        vec!["a", "6", "-101"],
-        vec!["a", "6", "-31"],
-        vec!["a", "6", "36"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_count_star() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(*), COUNT(1) AS c, COUNT(c1) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100", "100", "100"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c12 FROM aggregate_test_100 WHERE c12 > 0.376 AND c12 < 0.4";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["e", "0.39144436569161134"],
-        vec!["d", "0.38870280983958583"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_negative_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c4 FROM aggregate_test_100 WHERE c3 < -55 AND -c4 > 30000";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["e", "-31500"], vec!["c", "-30187"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_negated_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE NOT(c1 != 'a')";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["21"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_is_not_null_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE c1 IS NOT NULL";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_is_null_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE c1 IS NULL";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_int_min_max() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c2, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c2";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["1", "0.05636955101974106", "0.9965400387585364"],
-        vec!["2", "0.16301110515739792", "0.991517828651004"],
-        vec!["3", "0.047343434291126085", "0.9293883502480845"],
-        vec!["4", "0.02182578039211991", "0.9237877978193884"],
-        vec!["5", "0.01479305307777301", "0.9723580396501548"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_float32() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_simple_csv(&mut ctx)?;
-
-    let sql =
-        "SELECT COUNT(*) as cnt, c1 FROM aggregate_simple GROUP BY c1 ORDER BY cnt DESC";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![
-        vec!["5", "0.00005"],
-        vec!["4", "0.00004"],
-        vec!["3", "0.00003"],
-        vec!["2", "0.00002"],
-        vec!["1", "0.00001"],
-    ];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_float64() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_simple_csv(&mut ctx)?;
-
-    let sql =
-        "SELECT COUNT(*) as cnt, c2 FROM aggregate_simple GROUP BY c2 ORDER BY cnt DESC";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![
-        vec!["5", "0.000000000005"],
-        vec!["4", "0.000000000004"],
-        vec!["3", "0.000000000003"],
-        vec!["2", "0.000000000002"],
-        vec!["1", "0.000000000001"],
-    ];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_boolean() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_simple_csv(&mut ctx)?;
-
-    let sql =
-        "SELECT COUNT(*) as cnt, c3 FROM aggregate_simple GROUP BY c3 ORDER BY cnt DESC";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![vec!["9", "true"], vec!["6", "false"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_two_columns() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c2, MIN(c3) FROM aggregate_test_100 GROUP BY c1, c2";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "1", "-85"],
-        vec!["a", "2", "-48"],
-        vec!["a", "3", "-72"],
-        vec!["a", "4", "-101"],
-        vec!["a", "5", "-101"],
-        vec!["b", "1", "12"],
-        vec!["b", "2", "-60"],
-        vec!["b", "3", "-101"],
-        vec!["b", "4", "-117"],
-        vec!["b", "5", "-82"],
-        vec!["c", "1", "-24"],
-        vec!["c", "2", "-117"],
-        vec!["c", "3", "-2"],
-        vec!["c", "4", "-90"],
-        vec!["c", "5", "-94"],
-        vec!["d", "1", "-99"],
-        vec!["d", "2", "93"],
-        vec!["d", "3", "-76"],
-        vec!["d", "4", "5"],
-        vec!["d", "5", "-59"],
-        vec!["e", "1", "36"],
-        vec!["e", "2", "-61"],
-        vec!["e", "3", "-95"],
-        vec!["e", "4", "-56"],
-        vec!["e", "5", "-86"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_and_having() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, MIN(c3) AS m FROM aggregate_test_100 GROUP BY c1 HAVING m < -100 AND MAX(c3) > 70";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["a", "-101"], vec!["c", "-117"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_and_having_and_where() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, MIN(c3) AS m
-               FROM aggregate_test_100
-               WHERE c1 IN ('a', 'b')
-               GROUP BY c1
-               HAVING m < -100 AND MAX(c3) > 70";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["a", "-101"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_having_without_group_by() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c2, c3 FROM aggregate_test_100 HAVING c2 >= 4 AND c3 > 90";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["c", "4", "123"],
-        vec!["c", "5", "118"],
-        vec!["d", "4", "102"],
-        vec!["e", "4", "96"],
-        vec!["e", "4", "97"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_avg_sqrt() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(custom_sqrt(c12)) FROM aggregate_test_100";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["0.6706002946036462"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-/// test that casting happens on udfs.
-/// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
-/// physical plan have the same schema.
-#[tokio::test]
-async fn csv_query_custom_udf_with_cast() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(custom_sqrt(c11)) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.6584408483418833"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-/// sqrt(f32) is slightly different than sqrt(CAST(f32 AS double)))
-#[tokio::test]
-async fn sqrt_f32_vs_f64() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    // sqrt(f32)'s plan passes
-    let sql = "SELECT avg(sqrt(c11)) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.6584408485889435"]];
-
-    assert_eq!(actual, expected);
-    let sql = "SELECT avg(sqrt(CAST(c11 AS double))) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.6584408483418833"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_error() -> Result<()> {
-    // sin(utf8) should error
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT sin(c1) FROM aggregate_test_100";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    Ok(())
-}
-
-// this query used to deadlock due to the call udf(udf())
-#[tokio::test]
-async fn csv_query_sqrt_sqrt() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT sqrt(sqrt(c12)) FROM aggregate_test_100 LIMIT 1";
-    let actual = execute(&mut ctx, sql).await;
-    // sqrt(sqrt(c12=0.9294097332465232)) = 0.9818650561397431
-    let expected = vec![vec!["0.9818650561397431"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn create_ctx() -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-
-    // register a custom UDF
-    ctx.register_udf(create_udf(
-        "custom_sqrt",
-        vec![DataType::Float64],
-        Arc::new(DataType::Float64),
-        Arc::new(custom_sqrt),
-    ));
-
-    Ok(ctx)
-}
-
-fn custom_sqrt(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arg = &args[0];
-    if let ColumnarValue::Array(v) = arg {
-        let input = v
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("cast failed");
-
-        let array: Float64Array = input.iter().map(|v| v.map(|x| x.sqrt())).collect();
-        Ok(ColumnarValue::Array(Arc::new(array)))
-    } else {
-        unimplemented!()
-    }
-}
-
-#[tokio::test]
-async fn csv_query_avg() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(c12) FROM aggregate_test_100";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["0.5089725099127211"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_avg() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, avg(c12) FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "0.48754517466109415"],
-        vec!["b", "0.41040709263815384"],
-        vec!["c", "0.6600456536439784"],
-        vec!["d", "0.48855379387549824"],
-        vec!["e", "0.48600669271341534"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_avg_with_projection() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(c12), c1 FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["0.41040709263815384", "b"],
-        vec!["0.48600669271341534", "e"],
-        vec!["0.48754517466109415", "a"],
-        vec!["0.48855379387549824", "d"],
-        vec!["0.6600456536439784", "c"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_avg_multi_batch() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(c12) FROM aggregate_test_100";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let results = collect(plan).await.unwrap();
-    let batch = &results[0];
-    let column = batch.column(0);
-    let array = column.as_any().downcast_ref::<Float64Array>().unwrap();
-    let actual = array.value(0);
-    let expected = 0.5089725;
-    // Due to float number's accuracy, different batch size will lead to different
-    // answers.
-    assert!((expected - actual).abs() < 0.01);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_nullif_divide_by_0() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c8/nullif(c7, 0) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let actual = &actual[80..90]; // We just want to compare rows 80-89
-    let expected = vec![
-        vec!["258"],
-        vec!["664"],
-        vec!["NULL"],
-        vec!["22"],
-        vec!["164"],
-        vec!["448"],
-        vec!["365"],
-        vec!["1640"],
-        vec!["671"],
-        vec!["203"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_count() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT count(c12) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_int_count() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, count(c12) FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "21"],
-        vec!["b", "19"],
-        vec!["c", "21"],
-        vec!["d", "18"],
-        vec!["e", "21"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_with_aliased_aggregate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, count(c12) AS count FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "21"],
-        vec!["b", "19"],
-        vec!["c", "21"],
-        vec!["d", "18"],
-        vec!["e", "21"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_string_min_max() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "0.02182578039211991", "0.9800193410444061"],
-        vec!["b", "0.04893135681998029", "0.9185813970744787"],
-        vec!["c", "0.0494924465469434", "0.991517828651004"],
-        vec!["d", "0.061029375346466685", "0.9748360509016578"],
-        vec!["e", "0.01479305307777301", "0.9965400387585364"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_cast() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT CAST(c12 AS float) FROM aggregate_test_100 WHERE c12 > 0.376 AND c12 < 0.4";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.39144436569161134"], vec!["0.38870280983958583"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_cast_literal() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql =
-        "SELECT c12, CAST(1 AS float) FROM aggregate_test_100 WHERE c12 > CAST(0 AS float) LIMIT 2";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["0.9294097332465232", "1"],
-        vec!["0.3114712539863804", "1"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn union_all() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    let sql = "SELECT 1 as x UNION ALL SELECT 2 as x";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_union_all() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql =
-        "SELECT c1 FROM aggregate_test_100 UNION ALL SELECT c1 FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    assert_eq!(actual.len(), 200);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1 FROM aggregate_test_100 LIMIT 2";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["c"], vec!["d"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit_bigger_than_nbr_of_rows() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c2 FROM aggregate_test_100 LIMIT 200";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["2"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["3"],
-        vec!["3"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["5"],
-        vec!["3"],
-        vec!["1"],
-        vec!["2"],
-        vec!["3"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-        vec!["4"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["5"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit_with_same_nbr_of_rows() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c2 FROM aggregate_test_100 LIMIT 100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["2"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["3"],
-        vec!["3"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["5"],
-        vec!["3"],
-        vec!["1"],
-        vec!["2"],
-        vec!["3"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-        vec!["4"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["5"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit_zero() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1 FROM aggregate_test_100 LIMIT 0";
-    let actual = execute(&mut ctx, sql).await;
-    let expected: Vec<Vec<String>> = vec![];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_create_external_table() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT c1, c2, c3, c4, c5, c6, c7, c8, c9, 10, c11, c12, c13 FROM aggregate_test_100 LIMIT 1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec![
-        "c",
-        "2",
-        "1",
-        "18109",
-        "2033001162",
-        "-6513304855495910254",
-        "25",
-        "43062",
-        "1491205016",
-        "10",
-        "0.110830784",
-        "0.9294097332465232",
-        "6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW",
-    ]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_external_table_count() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(c12) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_external_table_sum() {
-    let mut ctx = ExecutionContext::new();
-    // cast smallint and int to bigint to avoid overflow during calculation
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql =
-        "SELECT SUM(CAST(c7 AS BIGINT)), SUM(CAST(c8 AS BIGINT)) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["13060", "3017641"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_count_star() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(*) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_count_one() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn case_when() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE WHEN c1 = 'a' THEN 1 \
-             WHEN c1 = 'b' THEN 2 \
-             END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["NULL"], vec!["NULL"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn case_when_else() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE WHEN c1 = 'a' THEN 1 \
-             WHEN c1 = 'b' THEN 2 \
-             ELSE 999 END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["999"], vec!["999"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn case_when_with_base_expr() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE c1 WHEN 'a' THEN 1 \
-             WHEN 'b' THEN 2 \
-             END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["NULL"], vec!["NULL"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn case_when_else_with_base_expr() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE c1 WHEN 'a' THEN 1 \
-             WHEN 'b' THEN 2 \
-             ELSE 999 END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["999"], vec!["999"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-fn create_case_context() -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Utf8, true)]));
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(StringArray::from(vec![
-            Some("a"),
-            Some("b"),
-            Some("c"),
-            None,
-        ]))],
-    )?;
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-    ctx.register_table("t1", Arc::new(table))?;
-    Ok(ctx)
-}
-
-#[tokio::test]
-async fn equijoin() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1 JOIN t2 ON t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql = "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["33", "c", "NULL"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn right_join() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1 RIGHT JOIN t2 ON t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["NULL", "NULL", "w"],
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_using() -> Result<()> {
-    let mut ctx = create_join_context("id", "id")?;
-    let sql = "SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["33", "c", "NULL"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn equijoin_implicit_syntax() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn equijoin_implicit_syntax_with_filter() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql = "SELECT t1_id, t1_name, t2_name \
-        FROM t1, t2 \
-        WHERE t1_id > 0 \
-        AND t1_id = t2_id \
-        AND t2_id < 99 \
-        ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn equijoin_implicit_syntax_reversed() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE t2_id = t1_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn cartesian_join() -> Result<()> {
-    let ctx = create_join_context("t1_id", "t2_id")?;
-    let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id";
-    let maybe_plan = ctx.create_logical_plan(&sql);
-    assert_eq!(
-        "This feature is not implemented: Cartesian joins are not supported",
-        &format!("{}", maybe_plan.err().unwrap())
-    );
-    Ok(())
-}
-
-fn create_join_context(
-    column_left: &str,
-    column_right: &str,
-) -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new(column_left, DataType::UInt32, true),
-        Field::new("t1_name", DataType::Utf8, true),
-    ]));
-    let t1_data = RecordBatch::try_new(
-        t1_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![11, 22, 33, 44])),
-            Arc::new(StringArray::from(vec![
-                Some("a"),
-                Some("b"),
-                Some("c"),
-                Some("d"),
-            ])),
-        ],
-    )?;
-    let t1_table = MemTable::try_new(t1_schema, vec![vec![t1_data]])?;
-    ctx.register_table("t1", Arc::new(t1_table))?;
-
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new(column_right, DataType::UInt32, true),
-        Field::new("t2_name", DataType::Utf8, true),
-    ]));
-    let t2_data = RecordBatch::try_new(
-        t2_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![11, 22, 44, 55])),
-            Arc::new(StringArray::from(vec![
-                Some("z"),
-                Some("y"),
-                Some("x"),
-                Some("w"),
-            ])),
-        ],
-    )?;
-    let t2_table = MemTable::try_new(t2_schema, vec![vec![t2_data]])?;
-    ctx.register_table("t2", Arc::new(t2_table))?;
-
-    Ok(ctx)
-}
-
-fn create_join_context_qualified() -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::UInt32, true),
-        Field::new("b", DataType::UInt32, true),
-        Field::new("c", DataType::UInt32, true),
-    ]));
-    let t1_data = RecordBatch::try_new(
-        t1_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
-            Arc::new(UInt32Array::from(vec![10, 20, 30, 40])),
-            Arc::new(UInt32Array::from(vec![50, 60, 70, 80])),
-        ],
-    )?;
-    let t1_table = MemTable::try_new(t1_schema, vec![vec![t1_data]])?;
-    ctx.register_table("t1", Arc::new(t1_table))?;
-
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::UInt32, true),
-        Field::new("b", DataType::UInt32, true),
-        Field::new("c", DataType::UInt32, true),
-    ]));
-    let t2_data = RecordBatch::try_new(
-        t2_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![1, 2, 9, 4])),
-            Arc::new(UInt32Array::from(vec![100, 200, 300, 400])),
-            Arc::new(UInt32Array::from(vec![500, 600, 700, 800])),
-        ],
-    )?;
-    let t2_table = MemTable::try_new(t2_schema, vec![vec![t2_data]])?;
-    ctx.register_table("t2", Arc::new(t2_table))?;
-
-    Ok(ctx)
-}
-
-#[tokio::test]
-async fn csv_explain() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "EXPLAIN SELECT c1 FROM aggregate_test_100 where c2 > 10";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec![
-            "logical_plan",
-            "Projection: #c1\n  Filter: #c2 Gt Int64(10)\n    TableScan: aggregate_test_100 projection=None"
-        ]
-    ];
-    assert_eq!(expected, actual);
-
-    // Also, expect same result with lowercase explain
-    let sql = "explain SELECT c1 FROM aggregate_test_100 where c2 > 10";
-    let actual = execute(&mut ctx, sql).await;
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_explain_verbose() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "EXPLAIN VERBOSE SELECT c1 FROM aggregate_test_100 where c2 > 10";
-    let actual = execute(&mut ctx, sql).await;
-
-    // flatten to a single string
-    let actual = actual.into_iter().map(|r| r.join("\t")).collect::<String>();
-
-    // Don't actually test the contents of the debuging output (as
-    // that may change and keeping this test updated will be a
-    // pain). Instead just check for a few key pieces.
-    assert!(actual.contains("logical_plan"), "Actual: '{}'", actual);
-    assert!(actual.contains("physical_plan"), "Actual: '{}'", actual);
-    assert!(actual.contains("#c2 Gt Int64(10)"), "Actual: '{}'", actual);
-}
-
-fn aggr_test_schema() -> SchemaRef {
-    Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::UInt32, false),
-        Field::new("c3", DataType::Int8, false),
-        Field::new("c4", DataType::Int16, false),
-        Field::new("c5", DataType::Int32, false),
-        Field::new("c6", DataType::Int64, false),
-        Field::new("c7", DataType::UInt8, false),
-        Field::new("c8", DataType::UInt16, false),
-        Field::new("c9", DataType::UInt32, false),
-        Field::new("c10", DataType::UInt64, false),
-        Field::new("c11", DataType::Float32, false),
-        Field::new("c12", DataType::Float64, false),
-        Field::new("c13", DataType::Utf8, false),
-    ]))
-}
-
-async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
-    let testdata = arrow::util::test_util::arrow_test_data();
-
-    // TODO: The following c9 should be migrated to UInt32 and c10 should be UInt64 once
-    // unsigned is supported.
-    let df = ctx
-        .sql(&format!(
-            "
-    CREATE EXTERNAL TABLE aggregate_test_100 (
-        c1  VARCHAR NOT NULL,
-        c2  INT NOT NULL,
-        c3  SMALLINT NOT NULL,
-        c4  SMALLINT NOT NULL,
-        c5  INT NOT NULL,
-        c6  BIGINT NOT NULL,
-        c7  SMALLINT NOT NULL,
-        c8  INT NOT NULL,
-        c9  BIGINT NOT NULL,
-        c10 VARCHAR NOT NULL,
-        c11 FLOAT NOT NULL,
-        c12 DOUBLE NOT NULL,
-        c13 VARCHAR NOT NULL
-    )
-    STORED AS CSV
-    WITH HEADER ROW
-    LOCATION '{}/csv/aggregate_test_100.csv'
-    ",
-            testdata
-        ))
-        .expect("Creating dataframe for CREATE EXTERNAL TABLE");
-
-    // Mimic the CLI and execute the resulting plan -- even though it
-    // is effectively a no-op (returns zero rows)
-    let results = df.collect().await.expect("Executing CREATE EXTERNAL TABLE");
-    assert!(
-        results.is_empty(),
-        "Expected no rows from executing CREATE EXTERNAL TABLE"
-    );
-}
-
-fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> {
-    let testdata = arrow::util::test_util::arrow_test_data();
-    let schema = aggr_test_schema();
-    ctx.register_csv(
-        "aggregate_test_100",
-        &format!("{}/csv/aggregate_test_100.csv", testdata),
-        CsvReadOptions::new().schema(&schema),
-    )?;
-    Ok(())
-}
-
-fn register_aggregate_simple_csv(ctx: &mut ExecutionContext) -> Result<()> {
-    // It's not possible to use aggregate_test_100, not enought similar values to test grouping on floats
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Float32, false),
-        Field::new("c2", DataType::Float64, false),
-        Field::new("c3", DataType::Boolean, false),
-    ]));
-
-    ctx.register_csv(
-        "aggregate_simple",
-        "tests/aggregate_simple.csv",
-        CsvReadOptions::new().schema(&schema),
-    )?;
-    Ok(())
-}
-
-fn register_alltypes_parquet(ctx: &mut ExecutionContext) {
-    let testdata = arrow::util::test_util::parquet_test_data();
-    ctx.register_parquet(
-        "alltypes_plain",
-        &format!("{}/alltypes_plain.parquet", testdata),
-    )
-    .unwrap();
-}
-
-/// Execute query and return result set as 2-d table of Vecs
-/// `result[row][column]`
-async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec<Vec<String>> {
-    let msg = format!("Creating logical plan for '{}'", sql);
-    let plan = ctx.create_logical_plan(&sql).expect(&msg);
-    let logical_schema = plan.schema();
-
-    let msg = format!("Optimizing logical plan for '{}': {:?}", sql, plan);
-    let plan = ctx.optimize(&plan).expect(&msg);
-    let optimized_logical_schema = plan.schema();
-
-    let msg = format!("Creating physical plan for '{}': {:?}", sql, plan);
-    let plan = ctx.create_physical_plan(&plan).expect(&msg);
-
-    let msg = format!("Executing physical plan for '{}': {:?}", sql, plan);
-    let results = collect(plan).await.expect(&msg);
-
-    assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref());
-
-    result_vec(&results)
-}
-
-/// Specialised String representation
-fn col_str(column: &ArrayRef, row_index: usize) -> String {
-    if column.is_null(row_index) {
-        return "NULL".to_string();
-    }
-
-    // Special case ListArray as there is no pretty print support for it yet
-    if let DataType::FixedSizeList(_, n) = column.data_type() {
-        let array = column
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap()
-            .value(row_index);
-
-        let mut r = Vec::with_capacity(*n as usize);
-        for i in 0..*n {
-            r.push(col_str(&array, i as usize));
-        }
-        return format!("[{}]", r.join(","));
-    }
-
-    array_value_to_string(column, row_index)
-        .ok()
-        .unwrap_or_else(|| "???".to_string())
-}
-
-/// Converts the results into a 2d array of strings, `result[row][column]`
-/// Special cases nulls to NULL for testing
-fn result_vec(results: &[RecordBatch]) -> Vec<Vec<String>> {
-    let mut result = vec![];
-    for batch in results {
-        for row_index in 0..batch.num_rows() {
-            let row_vec = batch
-                .columns()
-                .iter()
-                .map(|column| col_str(column, row_index))
-                .collect();
-            result.push(row_vec);
-        }
-    }
-    result
-}
-
-async fn generic_query_length<T: 'static + Array + From<Vec<&'static str>>>(
-    datatype: DataType,
-) -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", datatype, false)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(T::from(vec!["", "a", "aa", "aaa"]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT length(c1) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0"], vec!["1"], vec!["2"], vec!["3"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "unicode_expressions"), ignore)]
-async fn query_length() -> Result<()> {
-    generic_query_length::<StringArray>(DataType::Utf8).await
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "unicode_expressions"), ignore)]
-async fn query_large_length() -> Result<()> {
-    generic_query_length::<LargeStringArray>(DataType::LargeUtf8).await
-}
-
-#[tokio::test]
-async fn query_not() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Boolean, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(BooleanArray::from(vec![
-            Some(false),
-            None,
-            Some(true),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT NOT c1 FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["true"], vec!["NULL"], vec!["false"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_concat() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::Int32, true),
-    ]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["", "a", "aa", "aaa"])),
-            Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])),
-        ],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT concat(c1, '-hi-', cast(c2 as varchar)) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["-hi-0"],
-        vec!["a-hi-1"],
-        vec!["aa-hi-"],
-        vec!["aaa-hi-3"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_array() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::Int32, true),
-    ]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["", "a", "aa", "aaa"])),
-            Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])),
-        ],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT array(c1, cast(c2 as varchar)) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["[,0]"],
-        vec!["[a,1]"],
-        vec!["[aa,NULL]"],
-        vec!["[aaa,3]"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_sum_cast() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    // c8 = i32; c9 = i64
-    let sql = "SELECT c8 + c9 FROM aggregate_test_100";
-    // check that the physical and logical schemas are equal
-    execute(&mut ctx, sql).await;
-}
-
-#[tokio::test]
-async fn query_where_neg_num() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-
-    // Negative numbers do not parse correctly as of Arrow 2.0.0
-    let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2 and c7 < 10";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["7", "45465"],
-        vec!["5", "40622"],
-        vec!["0", "61069"],
-        vec!["2", "20120"],
-        vec!["4", "39363"],
-    ];
-    assert_eq!(expected, actual);
-
-    // Also check floating point neg numbers
-    let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2.9 and c7 < 10";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["7", "45465"],
-        vec!["5", "40622"],
-        vec!["0", "61069"],
-        vec!["2", "20120"],
-        vec!["4", "39363"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn like() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(c1) FROM aggregate_test_100 WHERE c13 LIKE '%FB%'";
-    // check that the physical and logical schemas are equal
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-fn make_timestamp_nano_table() -> Result<Arc<MemTable>> {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), false),
-        Field::new("value", DataType::Int32, true),
-    ]));
-
-    let mut builder = TimestampNanosecondArray::builder(3);
-
-    builder.append_value(1599572549190855000)?; // 2020-09-08T13:42:29.190855+00:00
-    builder.append_value(1599568949190855000)?; // 2020-09-08T12:42:29.190855+00:00
-    builder.append_value(1599565349190855000)?; // 2020-09-08T11:42:29.190855+00:00
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(builder.finish()),
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
-        ],
-    )?;
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-    Ok(Arc::new(table))
-}
-
-#[tokio::test]
-async fn to_timestamp() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("ts_data", make_timestamp_nano_table()?)?;
-
-    let sql = "SELECT COUNT(*) FROM ts_data where ts > to_timestamp('2020-09-08T12:00:00+00:00')";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![vec!["2"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_is_null() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Float64, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float64Array::from(vec![
-            Some(1.0),
-            None,
-            Some(f64::NAN),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT c1 IS NULL FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["false"], vec!["true"], vec!["false"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_is_not_null() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Float64, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float64Array::from(vec![
-            Some(1.0),
-            None,
-            Some(f64::NAN),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT c1 IS NOT NULL FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["true"], vec!["false"], vec!["true"]];
-
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_count_distinct() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Int32Array::from(vec![
-            Some(0),
-            Some(1),
-            None,
-            Some(3),
-            Some(3),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT COUNT(DISTINCT c1) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["3".to_string()]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_on_string_dictionary() -> Result<()> {
-    // Test to ensure DataFusion can operate on dictionary types
-    // Use StringDictionary (32 bit indexes = keys)
-    let field_type =
-        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-    let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
-
-    let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-    let values_builder = StringBuilder::new(10);
-    let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-    builder.append("one")?;
-    builder.append_null()?;
-    builder.append("three")?;
-    let array = Arc::new(builder.finish());
-
-    let data = RecordBatch::try_new(schema.clone(), vec![array])?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-
-    // Basic SELECT
-    let sql = "SELECT * FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["one"], vec!["NULL"], vec!["three"]];
-    assert_eq!(expected, actual);
-
-    // basic filtering
-    let sql = "SELECT * FROM test WHERE d1 IS NOT NULL";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["one"], vec!["three"]];
-    assert_eq!(expected, actual);
-
-    // filtering with constant
-    let sql = "SELECT * FROM test WHERE d1 = 'three'";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["three"]];
-    assert_eq!(expected, actual);
-
-    // Expression evaluation
-    let sql = "SELECT concat(d1, '-foo') FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["one-foo"], vec!["-foo"], vec!["three-foo"]];
-    assert_eq!(expected, actual);
-
-    // aggregation
-    let sql = "SELECT COUNT(d1) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["2"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_without_from() -> Result<()> {
-    // Test for SELECT <expression> without FROM.
-    // Should evaluate expressions in project position.
-    let mut ctx = ExecutionContext::new();
-
-    let sql = "SELECT 1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-
-    let sql = "SELECT 1+2, 3/4, cos(0)";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["3", "0", "1"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_cte() -> Result<()> {
-    // Test for SELECT <expression> without FROM.
-    // Should evaluate expressions in project position.
-    let mut ctx = ExecutionContext::new();
-
-    // simple with
-    let sql = "WITH t AS (SELECT 1) SELECT * FROM t";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-
-    // with + union
-    let sql =
-        "WITH t AS (SELECT 1 AS a), u AS (SELECT 2 AS a) SELECT * FROM t UNION ALL SELECT * FROM u";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"]];
-    assert_eq!(expected, actual);
-
-    // with + join
-    let sql = "WITH t AS (SELECT 1 AS id1), u AS (SELECT 1 AS id2, 5 as x) SELECT x FROM t JOIN u ON (id1 = id2)";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["5"]];
-    assert_eq!(expected, actual);
-
-    // backward reference
-    let sql = "WITH t AS (SELECT 1 AS id1), u AS (SELECT * FROM t) SELECT * from u";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_cte_incorrect() -> Result<()> {
-    let ctx = ExecutionContext::new();
-
-    // self reference
-    let sql = "WITH t AS (SELECT * FROM t) SELECT * from u";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    assert_eq!(
-        format!("{}", plan.unwrap_err()),
-        "Error during planning: Table or CTE with name \'t\' not found"
-    );
-
-    // forward referencing
-    let sql = "WITH t AS (SELECT * FROM u), u AS (SELECT 1) SELECT * from u";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    assert_eq!(
-        format!("{}", plan.unwrap_err()),
-        "Error during planning: Table or CTE with name \'u\' not found"
-    );
-
-    // wrapping should hide u
-    let sql = "WITH t AS (WITH u as (SELECT 1) SELECT 1) SELECT * from u";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    assert_eq!(
-        format!("{}", plan.unwrap_err()),
-        "Error during planning: Table or CTE with name \'u\' not found"
-    );
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_scalar_minus_array() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Int32Array::from(vec![
-            Some(0),
-            Some(1),
-            None,
-            Some(3),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT 4 - c1 FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["4"], vec!["3"], vec!["NULL"], vec!["1"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-fn assert_float_eq<T>(expected: &[Vec<T>], received: &[Vec<String>])
-where
-    T: AsRef<str>,
-{
-    expected
-        .iter()
-        .flatten()
-        .zip(received.iter().flatten())
-        .for_each(|(l, r)| {
-            let (l, r) = (
-                l.as_ref().parse::<f64>().unwrap(),
-                r.as_str().parse::<f64>().unwrap(),
-            );
-            assert!((l - r).abs() <= 2.0 * f64::EPSILON);
-        });
-}
-
-#[tokio::test]
-async fn csv_between_expr() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c4 FROM aggregate_test_100 WHERE c12 BETWEEN 0.995 AND 1.0";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["10837"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_between_expr_negated() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c4 FROM aggregate_test_100 WHERE c12 NOT BETWEEN 0 AND 0.995";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["10837"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_group_by_date() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("date", DataType::Date32, false),
-        Field::new("cnt", DataType::Int32, false),
-    ]));
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(Date32Array::from(vec![
-                Some(100),
-                Some(100),
-                Some(100),
-                Some(101),
-                Some(101),
-                Some(101),
-            ])),
-            Arc::new(Int32Array::from(vec![
-                Some(1),
-                Some(2),
-                Some(3),
-                Some(3),
-                Some(3),
-                Some(3),
-            ])),
-        ],
-    )?;
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    ctx.register_table("dates", Arc::new(table))?;
-    let sql = "SELECT SUM(cnt) FROM dates GROUP BY date";
-    let actual = execute(&mut ctx, sql).await;
-    let mut actual: Vec<String> = actual.iter().flatten().cloned().collect();
-    actual.sort();
-    let expected = vec!["6", "9"];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn group_by_timestamp_millis() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-
-    let schema = Arc::new(Schema::new(vec![
-        Field::new(
-            "timestamp",
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            false,
-        ),
-        Field::new("count", DataType::Int32, false),
-    ]));
-    let base_dt = Utc.ymd(2018, 7, 1).and_hms(6, 0, 0); // 2018-Jul-01 06:00
-    let hour1 = Duration::hours(1);
-    let timestamps = vec![
-        base_dt.timestamp_millis(),
-        (base_dt + hour1).timestamp_millis(),
-        base_dt.timestamp_millis(),
-        base_dt.timestamp_millis(),
-        (base_dt + hour1).timestamp_millis(),
-        (base_dt + hour1).timestamp_millis(),
-    ];
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(TimestampMillisecondArray::from(timestamps)),
-            Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50, 60])),
-        ],
-    )?;
-    let t1_table = MemTable::try_new(schema, vec![vec![data]])?;
-    ctx.register_table("t1", Arc::new(t1_table)).unwrap();
-
-    let sql =
-        "SELECT timestamp, SUM(count) FROM t1 GROUP BY timestamp ORDER BY timestamp ASC";
-    let actual = execute(&mut ctx, sql).await;
-    let actual: Vec<String> = actual.iter().map(|row| row[1].clone()).collect();
-    let expected = vec!["80", "130"];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-macro_rules! test_expression {
-    ($SQL:expr, $EXPECTED:expr) => {
-        let mut ctx = ExecutionContext::new();
-        let sql = format!("SELECT {}", $SQL);
-        let actual = execute(&mut ctx, sql.as_str()).await;
-        assert_eq!($EXPECTED, actual[0][0]);
-    };
-}
-
-#[tokio::test]
-async fn test_boolean_expressions() -> Result<()> {
-    test_expression!("true", "true");
-    test_expression!("false", "false");
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "crypto_expressions"), ignore)]
-async fn test_crypto_expressions() -> Result<()> {
-    test_expression!("md5('tom')", "34b7da764b21d298ef307d04d8152dc5");
-    test_expression!("md5('')", "d41d8cd98f00b204e9800998ecf8427e");
-    test_expression!("md5(NULL)", "NULL");
-    test_expression!(
-        "sha224('tom')",
-        "0bf6cb62649c42a9ae3876ab6f6d92ad36cb5414e495f8873292be4d"
-    );
-    test_expression!(
-        "sha224('')",
-        "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f"
-    );
-    test_expression!("sha224(NULL)", "NULL");
-    test_expression!(
-        "sha256('tom')",
-        "e1608f75c5d7813f3d4031cb30bfb786507d98137538ff8e128a6ff74e84e643"
-    );
-    test_expression!(
-        "sha256('')",
-        "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
-    );
-    test_expression!("sha256(NULL)", "NULL");
-    test_expression!("sha384('tom')", "096f5b68aa77848e4fdf5c1c0b350de2dbfad60ffd7c25d9ea07c6c19b8a4d55a9187eb117c557883f58c16dfac3e343");
-    test_expression!("sha384('')", "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b");
-    test_expression!("sha384(NULL)", "NULL");
-    test_expression!("sha512('tom')", "6e1b9b3fe840680e37051f7ad5e959d6f39ad0f8885d855166f55c659469d3c8b78118c44a2a49c72ddb481cd6d8731034e11cc030070ba843a90b3495cb8d3e");
-    test_expression!("sha512('')", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e");
-    test_expression!("sha512(NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_interval_expressions() -> Result<()> {
-    test_expression!(
-        "interval '1'",
-        "0 years 0 mons 0 days 0 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '1 second'",
-        "0 years 0 mons 0 days 0 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '500 milliseconds'",
-        "0 years 0 mons 0 days 0 hours 0 mins 0.500 secs"
-    );
-    test_expression!(
-        "interval '5 second'",
-        "0 years 0 mons 0 days 0 hours 0 mins 5.00 secs"
-    );
-    test_expression!(
-        "interval '0.5 minute'",
-        "0 years 0 mons 0 days 0 hours 0 mins 30.00 secs"
-    );
-    test_expression!(
-        "interval '.5 minute'",
-        "0 years 0 mons 0 days 0 hours 0 mins 30.00 secs"
-    );
-    test_expression!(
-        "interval '5 minute'",
-        "0 years 0 mons 0 days 0 hours 5 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 minute 1 second'",
-        "0 years 0 mons 0 days 0 hours 5 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '1 hour'",
-        "0 years 0 mons 0 days 1 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 hour'",
-        "0 years 0 mons 0 days 5 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 day'",
-        "0 years 0 mons 1 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 day 1'",
-        "0 years 0 mons 1 days 0 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '0.5'",
-        "0 years 0 mons 0 days 0 hours 0 mins 0.500 secs"
-    );
-    test_expression!(
-        "interval '0.5 day 1'",
-        "0 years 0 mons 0 days 12 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '0.49 day'",
-        "0 years 0 mons 0 days 11 hours 45 mins 36.00 secs"
-    );
-    test_expression!(
-        "interval '0.499 day'",
-        "0 years 0 mons 0 days 11 hours 58 mins 33.596 secs"
-    );
-    test_expression!(
-        "interval '0.4999 day'",
-        "0 years 0 mons 0 days 11 hours 59 mins 51.364 secs"
-    );
-    test_expression!(
-        "interval '0.49999 day'",
-        "0 years 0 mons 0 days 11 hours 59 mins 59.136 secs"
-    );
-    test_expression!(
-        "interval '0.49999999999 day'",
-        "0 years 0 mons 0 days 12 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 day'",
-        "0 years 0 mons 5 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds'",
-        "0 years 0 mons 5 days 4 hours 3 mins 2.100 secs"
-    );
-    test_expression!(
-        "interval '0.5 month'",
-        "0 years 0 mons 15 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 month'",
-        "0 years 1 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 month'",
-        "0 years 5 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '13 month'",
-        "1 years 1 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '0.5 year'",
-        "0 years 6 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 year'",
-        "1 years 0 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '2 year'",
-        "2 years 0 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_string_expressions() -> Result<()> {
-    test_expression!("ascii('')", "0");
-    test_expression!("ascii('x')", "120");
-    test_expression!("ascii(NULL)", "NULL");
-    test_expression!("bit_length('')", "0");
-    test_expression!("bit_length('chars')", "40");
-    test_expression!("bit_length('josé')", "40");
-    test_expression!("bit_length(NULL)", "NULL");
-    test_expression!("btrim(' xyxtrimyyx ', NULL)", "NULL");
-    test_expression!("btrim(' xyxtrimyyx ')", "xyxtrimyyx");
-    test_expression!("btrim('\n xyxtrimyyx \n')", "\n xyxtrimyyx \n");
-    test_expression!("btrim('xyxtrimyyx', 'xyz')", "trim");
-    test_expression!("btrim('\nxyxtrimyyx\n', 'xyz\n')", "trim");
-    test_expression!("btrim(NULL, 'xyz')", "NULL");
-    test_expression!("chr(CAST(120 AS int))", "x");
-    test_expression!("chr(CAST(128175 AS int))", "💯");
-    test_expression!("chr(CAST(NULL AS int))", "NULL");
-    test_expression!("concat('a','b','c')", "abc");
-    test_expression!("concat('abcde', 2, NULL, 22)", "abcde222");
-    test_expression!("concat(NULL)", "");
-    test_expression!("concat_ws(',', 'abcde', 2, NULL, 22)", "abcde,2,22");
-    test_expression!("concat_ws('|','a','b','c')", "a|b|c");
-    test_expression!("concat_ws('|',NULL)", "");
-    test_expression!("concat_ws(NULL,'a',NULL,'b','c')", "NULL");
-    test_expression!("initcap('')", "");
-    test_expression!("initcap('hi THOMAS')", "Hi Thomas");
-    test_expression!("initcap(NULL)", "NULL");
-    test_expression!("lower('')", "");
-    test_expression!("lower('TOM')", "tom");
-    test_expression!("lower(NULL)", "NULL");
-    test_expression!("ltrim(' zzzytest ', NULL)", "NULL");
-    test_expression!("ltrim(' zzzytest ')", "zzzytest ");
-    test_expression!("ltrim('zzzytest', 'xyz')", "test");
-    test_expression!("ltrim(NULL, 'xyz')", "NULL");
-    test_expression!("octet_length('')", "0");
-    test_expression!("octet_length('chars')", "5");
-    test_expression!("octet_length('josé')", "5");
-    test_expression!("octet_length(NULL)", "NULL");
-    test_expression!("repeat('Pg', 4)", "PgPgPgPg");
-    test_expression!("repeat('Pg', CAST(NULL AS INT))", "NULL");
-    test_expression!("repeat(NULL, 4)", "NULL");
-    test_expression!("replace('abcdefabcdef', 'cd', 'XX')", "abXXefabXXef");
-    test_expression!("replace('abcdefabcdef', 'cd', NULL)", "NULL");
-    test_expression!("replace('abcdefabcdef', 'notmatch', 'XX')", "abcdefabcdef");
-    test_expression!("replace('abcdefabcdef', NULL, 'XX')", "NULL");
-    test_expression!("replace(NULL, 'cd', 'XX')", "NULL");
-    test_expression!("rtrim(' testxxzx ')", " testxxzx");
-    test_expression!("rtrim(' zzzytest ', NULL)", "NULL");
-    test_expression!("rtrim('testxxzx', 'xyz')", "test");
-    test_expression!("rtrim(NULL, 'xyz')", "NULL");
-    test_expression!("split_part('abc~@~def~@~ghi', '~@~', 2)", "def");
-    test_expression!("split_part('abc~@~def~@~ghi', '~@~', 20)", "");
-    test_expression!("split_part(NULL, '~@~', 20)", "NULL");
-    test_expression!("split_part('abc~@~def~@~ghi', NULL, 20)", "NULL");
-    test_expression!(
-        "split_part('abc~@~def~@~ghi', '~@~', CAST(NULL AS INT))",
-        "NULL"
-    );
-    test_expression!("starts_with('alphabet', 'alph')", "true");
-    test_expression!("starts_with('alphabet', 'blph')", "false");
-    test_expression!("starts_with(NULL, 'blph')", "NULL");
-    test_expression!("starts_with('alphabet', NULL)", "NULL");
-    test_expression!("to_hex(2147483647)", "7fffffff");
-    test_expression!("to_hex(9223372036854775807)", "7fffffffffffffff");
-    test_expression!("to_hex(CAST(NULL AS int))", "NULL");
-    test_expression!("trim(' tom ')", "tom");
-    test_expression!("trim(' tom')", "tom");
-    test_expression!("trim('')", "");
-    test_expression!("trim('tom ')", "tom");
-    test_expression!("upper('')", "");
-    test_expression!("upper('tom')", "TOM");
-    test_expression!("upper(NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "unicode_expressions"), ignore)]
-async fn test_unicode_expressions() -> Result<()> {
-    test_expression!("char_length('')", "0");
-    test_expression!("char_length('chars')", "5");
-    test_expression!("char_length('josé')", "4");
-    test_expression!("char_length(NULL)", "NULL");
-    test_expression!("character_length('')", "0");
-    test_expression!("character_length('chars')", "5");
-    test_expression!("character_length('josé')", "4");
-    test_expression!("character_length(NULL)", "NULL");
-    test_expression!("left('abcde', -2)", "abc");
-    test_expression!("left('abcde', -200)", "");
-    test_expression!("left('abcde', 0)", "");
-    test_expression!("left('abcde', 2)", "ab");
-    test_expression!("left('abcde', 200)", "abcde");
-    test_expression!("left('abcde', CAST(NULL AS INT))", "NULL");
-    test_expression!("left(NULL, 2)", "NULL");
-    test_expression!("left(NULL, CAST(NULL AS INT))", "NULL");
-    test_expression!("length('')", "0");
-    test_expression!("length('chars')", "5");
-    test_expression!("length('josé')", "4");
-    test_expression!("length(NULL)", "NULL");
-    test_expression!("lpad('hi', 5, 'xy')", "xyxhi");
-    test_expression!("lpad('hi', 0)", "");
-    test_expression!("lpad('hi', 21, 'abcdef')", "abcdefabcdefabcdefahi");
-    test_expression!("lpad('hi', 5, 'xy')", "xyxhi");
-    test_expression!("lpad('hi', 5, NULL)", "NULL");
-    test_expression!("lpad('hi', 5)", "   hi");
-    test_expression!("lpad('hi', CAST(NULL AS INT), 'xy')", "NULL");
-    test_expression!("lpad('hi', CAST(NULL AS INT))", "NULL");
-    test_expression!("lpad('xyxhi', 3)", "xyx");
-    test_expression!("lpad(NULL, 0)", "NULL");
-    test_expression!("lpad(NULL, 5, 'xy')", "NULL");
-    test_expression!("reverse('abcde')", "edcba");
-    test_expression!("reverse('loẅks')", "skẅol");
-    test_expression!("reverse(NULL)", "NULL");
-    test_expression!("right('abcde', -2)", "cde");
-    test_expression!("right('abcde', -200)", "");
-    test_expression!("right('abcde', 0)", "");
-    test_expression!("right('abcde', 2)", "de");
-    test_expression!("right('abcde', 200)", "abcde");
-    test_expression!("right('abcde', CAST(NULL AS INT))", "NULL");
-    test_expression!("right(NULL, 2)", "NULL");
-    test_expression!("right(NULL, CAST(NULL AS INT))", "NULL");
-    test_expression!("rpad('hi', 5, 'xy')", "hixyx");
-    test_expression!("rpad('hi', 0)", "");
-    test_expression!("rpad('hi', 21, 'abcdef')", "hiabcdefabcdefabcdefa");
-    test_expression!("rpad('hi', 5, 'xy')", "hixyx");
-    test_expression!("rpad('hi', 5, NULL)", "NULL");
-    test_expression!("rpad('hi', 5)", "hi   ");
-    test_expression!("rpad('hi', CAST(NULL AS INT), 'xy')", "NULL");
-    test_expression!("rpad('hi', CAST(NULL AS INT))", "NULL");
-    test_expression!("rpad('xyxhi', 3)", "xyx");
-    test_expression!("strpos('abc', 'c')", "3");
-    test_expression!("strpos('josé', 'é')", "4");
-    test_expression!("strpos('joséésoj', 'so')", "6");
-    test_expression!("strpos('joséésoj', 'abc')", "0");
-    test_expression!("strpos(NULL, 'abc')", "NULL");
-    test_expression!("strpos('joséésoj', NULL)", "NULL");
-    test_expression!("substr('alphabet', -3)", "alphabet");
-    test_expression!("substr('alphabet', 0)", "alphabet");
-    test_expression!("substr('alphabet', 1)", "alphabet");
-    test_expression!("substr('alphabet', 2)", "lphabet");
-    test_expression!("substr('alphabet', 3)", "phabet");
-    test_expression!("substr('alphabet', 30)", "");
-    test_expression!("substr('alphabet', CAST(NULL AS int))", "NULL");
-    test_expression!("substr('alphabet', 3, 2)", "ph");
-    test_expression!("substr('alphabet', 3, 20)", "phabet");
-    test_expression!("substr('alphabet', CAST(NULL AS int), 20)", "NULL");
-    test_expression!("substr('alphabet', 3, CAST(NULL AS int))", "NULL");
-    test_expression!("translate('12345', '143', 'ax')", "a2x5");
-    test_expression!("translate(NULL, '143', 'ax')", "NULL");
-    test_expression!("translate('12345', NULL, 'ax')", "NULL");
-    test_expression!("translate('12345', '143', NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "regex_expressions"), ignore)]
-async fn test_regex_expressions() -> Result<()> {
-    test_expression!("regexp_replace('ABCabcABC', '(abc)', 'X', 'gi')", "XXX");
-    test_expression!("regexp_replace('ABCabcABC', '(abc)', 'X', 'i')", "XabcABC");
-    test_expression!("regexp_replace('foobarbaz', 'b..', 'X', 'g')", "fooXX");
-    test_expression!("regexp_replace('foobarbaz', 'b..', 'X')", "fooXbaz");
-    test_expression!(
-        "regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g')",
-        "fooXarYXazY"
-    );
-    test_expression!(
-        "regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', NULL)",
-        "NULL"
-    );
-    test_expression!("regexp_replace('foobarbaz', 'b(..)', NULL, 'g')", "NULL");
-    test_expression!("regexp_replace('foobarbaz', NULL, 'X\\1Y', 'g')", "NULL");
-    test_expression!("regexp_replace('Thomas', '.[mN]a.', 'M')", "ThM");
-    test_expression!("regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g')", "NULL");
-    test_expression!("regexp_match('foobarbequebaz', '')", "[]");
-    test_expression!(
-        "regexp_match('foobarbequebaz', '(bar)(beque)')",
-        "[bar, beque]"
-    );
-    test_expression!("regexp_match('foobarbequebaz', '(ba3r)(bequ34e)')", "NULL");
-    test_expression!("regexp_match('aaa-0', '.*-(\\d)')", "[0]");
-    test_expression!("regexp_match('bb-1', '.*-(\\d)')", "[1]");
-    test_expression!("regexp_match('aa', '.*-(\\d)')", "NULL");
-    test_expression!("regexp_match(NULL, '.*-(\\d)')", "NULL");
-    test_expression!("regexp_match('aaa-0', NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_extract_date_part() -> Result<()> {
-    test_expression!("date_part('hour', CAST('2020-01-01' AS DATE))", "0");
-    test_expression!("EXTRACT(HOUR FROM CAST('2020-01-01' AS DATE))", "0");
-    test_expression!(
-        "EXTRACT(HOUR FROM to_timestamp('2020-09-08T12:00:00+00:00'))",
-        "12"
-    );
-    test_expression!("date_part('YEAR', CAST('2000-01-01' AS DATE))", "2000");
-    test_expression!(
-        "EXTRACT(year FROM to_timestamp('2020-09-08T12:00:00+00:00'))",
-        "2020"
-    );
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_in_list_scalar() -> Result<()> {
-    test_expression!("'a' IN ('a','b')", "true");
-    test_expression!("'c' IN ('a','b')", "false");
-    test_expression!("'c' NOT IN ('a','b')", "true");
-    test_expression!("'a' NOT IN ('a','b')", "false");
-    test_expression!("NULL IN ('a','b')", "NULL");
-    test_expression!("NULL NOT IN ('a','b')", "NULL");
-    test_expression!("'a' IN ('a','b',NULL)", "true");
-    test_expression!("'c' IN ('a','b',NULL)", "NULL");
-    test_expression!("'a' NOT IN ('a','b',NULL)", "false");
-    test_expression!("'c' NOT IN ('a','b',NULL)", "NULL");
-    test_expression!("0 IN (0,1,2)", "true");
-    test_expression!("3 IN (0,1,2)", "false");
-    test_expression!("3 NOT IN (0,1,2)", "true");
-    test_expression!("0 NOT IN (0,1,2)", "false");
-    test_expression!("NULL IN (0,1,2)", "NULL");
-    test_expression!("NULL NOT IN (0,1,2)", "NULL");
-    test_expression!("0 IN (0,1,2,NULL)", "true");
-    test_expression!("3 IN (0,1,2,NULL)", "NULL");
-    test_expression!("0 NOT IN (0,1,2,NULL)", "false");
-    test_expression!("3 NOT IN (0,1,2,NULL)", "NULL");
-    test_expression!("0.0 IN (0.0,0.1,0.2)", "true");
-    test_expression!("0.3 IN (0.0,0.1,0.2)", "false");
-    test_expression!("0.3 NOT IN (0.0,0.1,0.2)", "true");
-    test_expression!("0.0 NOT IN (0.0,0.1,0.2)", "false");
-    test_expression!("NULL IN (0.0,0.1,0.2)", "NULL");
-    test_expression!("NULL NOT IN (0.0,0.1,0.2)", "NULL");
-    test_expression!("0.0 IN (0.0,0.1,0.2,NULL)", "true");
-    test_expression!("0.3 IN (0.0,0.1,0.2,NULL)", "NULL");
-    test_expression!("0.0 NOT IN (0.0,0.1,0.2,NULL)", "false");
-    test_expression!("0.3 NOT IN (0.0,0.1,0.2,NULL)", "NULL");
-    test_expression!("'1' IN ('a','b',1)", "true");
-    test_expression!("'2' IN ('a','b',1)", "false");
-    test_expression!("'2' NOT IN ('a','b',1)", "true");
-    test_expression!("'1' NOT IN ('a','b',1)", "false");
-    test_expression!("NULL IN ('a','b',1)", "NULL");
-    test_expression!("NULL NOT IN ('a','b',1)", "NULL");
-    test_expression!("'1' IN ('a','b',NULL,1)", "true");
-    test_expression!("'2' IN ('a','b',NULL,1)", "NULL");
-    test_expression!("'1' NOT IN ('a','b',NULL,1)", "false");
-    test_expression!("'2' NOT IN ('a','b',NULL,1)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn in_list_array() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT
-            c1 IN ('a', 'c') AS utf8_in_true
-            ,c1 IN ('x', 'y') AS utf8_in_false
-            ,c1 NOT IN ('x', 'y') AS utf8_not_in_true
-            ,c1 NOT IN ('a', 'c') AS utf8_not_in_false
-            ,NULL IN ('a', 'c') AS utf8_in_null
-        FROM aggregate_test_100 WHERE c12 < 0.05";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["true", "false", "true", "false", "NULL"],
-        vec!["true", "false", "true", "false", "NULL"],
-        vec!["true", "false", "true", "false", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-// TODO Tests to prove correct implementation of INNER JOIN's with qualified names.
-//  https://issues.apache.org/jira/projects/ARROW/issues/ARROW-11432.
-#[tokio::test]
-#[ignore]
-async fn inner_join_qualified_names() -> Result<()> {
-    // Setup the statements that test qualified names function correctly.
-    let equivalent_sql = [
-        "SELECT t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
-            FROM t1
-            INNER JOIN t2 ON t1.a = t2.a
-            ORDER BY t1.a",
-        "SELECT t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
-            FROM t1
-            INNER JOIN t2 ON t2.a = t1.a
-            ORDER BY t1.a",
-    ];
-
-    let expected = vec![
-        vec!["1", "10", "50", "1", "100", "500"],
-        vec!["2", "20", "60", "2", "20", "600"],
-        vec!["4", "40", "80", "4", "400", "800"],
-    ];
-
-    for sql in equivalent_sql.iter() {
-        let mut ctx = create_join_context_qualified()?;
-        let actual = execute(&mut ctx, sql).await;
-        assert_eq!(expected, actual);
-    }
-    Ok(())
-}
-
-#[tokio::test]
-#[ignore = "https://issues.apache.org/jira/browse/ARROW-12266"]
-async fn inner_join_nulls() {
-    let sql = "SELECT * FROM (SELECT null AS id1) t1
-            INNER JOIN (SELECT null AS id2) t2 ON id1 = id2";
-
-    let expected: &[&[&str]] = &[&[]];
-
-    let mut ctx = create_join_context_qualified().unwrap();
-    let actual = execute(&mut ctx, sql).await;
-
-    // left and right shouldn't match anything
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn qualified_table_references() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-
-    for table_ref in &[
-        "aggregate_test_100",
-        "public.aggregate_test_100",
-        "datafusion.public.aggregate_test_100",
-    ] {
-        let sql = format!("SELECT COUNT(*) FROM {}", table_ref);
-        let results = execute(&mut ctx, &sql).await;
-        assert_eq!(results, vec![vec!["100"]]);
-    }
-    Ok(())
-}
-
-#[tokio::test]
-async fn invalid_qualified_table_references() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-
-    for table_ref in &[
-        "nonexistentschema.aggregate_test_100",
-        "nonexistentcatalog.public.aggregate_test_100",
-        "way.too.many.namespaces.as.ident.prefixes.aggregate_test_100",
-    ] {
-        let sql = format!("SELECT COUNT(*) FROM {}", table_ref);
-        assert!(matches!(ctx.sql(&sql), Err(DataFusionError::Plan(_))));
-    }
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_cast_expressions() -> Result<()> {
-    test_expression!("CAST('0' AS INT)", "0");
-    test_expression!("CAST(NULL AS INT)", "NULL");
-    test_expression!("TRY_CAST('0' AS INT)", "0");
-    test_expression!("TRY_CAST('x' AS INT)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_cast_expressions_error() -> Result<()> {
-    // sin(utf8) should error
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT CAST(c1 AS INT) FROM aggregate_test_100";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let result = collect(plan).await;
-
-    match result {
-        Ok(_) => panic!("expected error"),
-        Err(e) => {
-            assert!(e.to_string().contains(
-                "Cast error: Cannot cast string 'c' to value of arrow::datatypes::types::Int32Type type"
-            ))
-        }
-    }
-
-    Ok(())
-}
diff --git a/rust/datafusion/tests/user_defined_plan.rs b/rust/datafusion/tests/user_defined_plan.rs
deleted file mode 100644
index aae5c597d82..00000000000
--- a/rust/datafusion/tests/user_defined_plan.rs
+++ /dev/null
@@ -1,520 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains an end to end demonstration of creating
-//! a user defined operator in DataFusion.
-//!
-//! Specifically, it shows how to define a `TopKNode` that implements
-//! `ExtensionPlanNode`, add an OptimizerRule to rewrite a
-//! `LogicalPlan` to use that node a `LogicalPlan`, create an
-//! `ExecutionPlan` and finally produce results.
-//!
-//! # TopK Background:
-//!
-//! A "Top K" node is a common query optimization which is used for
-//! queries such as "find the top 3 customers by revenue". The
-//! (simplified) SQL for such a query might be:
-//!
-//! ```sql
-//! CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT)
-//!   STORED AS CSV location 'tests/customer.csv';
-//!
-//! SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3;
-//! ```
-//!
-//! And a naive plan would be:
-//!
-//! ```
-//! > explain SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3;
-//! +--------------+----------------------------------------+
-//! | plan_type    | plan                                   |
-//! +--------------+----------------------------------------+
-//! | logical_plan | Limit: 3                               |
-//! |              |   Sort: #revenue DESC NULLS FIRST      |
-//! |              |     Projection: #customer_id, #revenue |
-//! |              |       TableScan: sales projection=None |
-//! +--------------+----------------------------------------+
-//! ```
-//!
-//! While this plan produces the correct answer, the careful reader
-//! will note it fully sorts the input before discarding everything
-//! other than the top 3 elements.
-//!
-//! The same answer can be produced by simply keeping track of the top
-//! N elements, reducing the total amount of required buffer memory.
-//!
-
-use futures::{FutureExt, Stream, StreamExt, TryStreamExt};
-
-use arrow::{
-    array::{Int64Array, StringArray},
-    datatypes::SchemaRef,
-    error::ArrowError,
-    record_batch::RecordBatch,
-    util::pretty::pretty_format_batches,
-};
-use datafusion::{
-    error::{DataFusionError, Result},
-    execution::context::ExecutionContextState,
-    execution::context::QueryPlanner,
-    logical_plan::{Expr, LogicalPlan, UserDefinedLogicalNode},
-    optimizer::{optimizer::OptimizerRule, utils::optimize_children},
-    physical_plan::{
-        planner::{DefaultPhysicalPlanner, ExtensionPlanner},
-        Distribution, ExecutionPlan, Partitioning, PhysicalPlanner, RecordBatchStream,
-        SendableRecordBatchStream,
-    },
-    prelude::{ExecutionConfig, ExecutionContext},
-};
-use fmt::Debug;
-use std::task::{Context, Poll};
-use std::{any::Any, collections::BTreeMap, fmt, sync::Arc};
-
-use async_trait::async_trait;
-use datafusion::logical_plan::DFSchemaRef;
-
-/// Execute the specified sql and return the resulting record batches
-/// pretty printed as a String.
-async fn exec_sql(ctx: &mut ExecutionContext, sql: &str) -> Result<String> {
-    let df = ctx.sql(sql)?;
-    let batches = df.collect().await?;
-    pretty_format_batches(&batches).map_err(DataFusionError::ArrowError)
-}
-
-/// Create a test table.
-async fn setup_table(mut ctx: ExecutionContext) -> Result<ExecutionContext> {
-    let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/customer.csv'";
-
-    let expected = vec!["++", "++"];
-
-    let s = exec_sql(&mut ctx, sql).await?;
-    let actual = s.lines().collect::<Vec<_>>();
-
-    assert_eq!(expected, actual, "Creating table");
-    Ok(ctx)
-}
-
-const QUERY: &str =
-    "SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3";
-
-// Run the query using the specified execution context and compare it
-// to the known result
-async fn run_and_compare_query(
-    mut ctx: ExecutionContext,
-    description: &str,
-) -> Result<()> {
-    let expected = vec![
-        "+-------------+---------+",
-        "| customer_id | revenue |",
-        "+-------------+---------+",
-        "| paul        | 300     |",
-        "| jorge       | 200     |",
-        "| andy        | 150     |",
-        "+-------------+---------+",
-    ];
-
-    let s = exec_sql(&mut ctx, QUERY).await?;
-    let actual = s.lines().collect::<Vec<_>>();
-
-    assert_eq!(
-        expected,
-        actual,
-        "output mismatch for {}. Expectedn\n{}Actual:\n{}",
-        description,
-        expected.join("\n"),
-        s
-    );
-    Ok(())
-}
-
-#[tokio::test]
-// Run the query using default planners and optimizer
-async fn normal_query() -> Result<()> {
-    let ctx = setup_table(ExecutionContext::new()).await?;
-    run_and_compare_query(ctx, "Default context").await
-}
-
-#[tokio::test]
-// Run the query using topk optimization
-async fn topk_query() -> Result<()> {
-    // Note the only difference is that the top
-    let ctx = setup_table(make_topk_context()).await?;
-    run_and_compare_query(ctx, "Topk context").await
-}
-
-#[tokio::test]
-// Run EXPLAIN PLAN and show the plan was in fact rewritten
-async fn topk_plan() -> Result<()> {
-    let mut ctx = setup_table(make_topk_context()).await?;
-
-    let expected = vec![
-        "| logical_plan after topk                 | TopK: k=3                                      |",
-        "|                                         |   Projection: #customer_id, #revenue           |",
-        "|                                         |     TableScan: sales projection=Some([0, 1])   |",
-    ].join("\n");
-
-    let explain_query = format!("EXPLAIN VERBOSE {}", QUERY);
-    let actual_output = exec_sql(&mut ctx, &explain_query).await?;
-
-    // normalize newlines (output on windows uses \r\n)
-    let actual_output = actual_output.replace("\r\n", "\n");
-
-    assert!(actual_output.contains(&expected) , "Expected output not present in actual output\nExpected:\n---------\n{}\nActual:\n--------\n{}", expected, actual_output);
-    Ok(())
-}
-
-fn make_topk_context() -> ExecutionContext {
-    let config = ExecutionConfig::new()
-        .with_query_planner(Arc::new(TopKQueryPlanner {}))
-        .add_optimizer_rule(Arc::new(TopKOptimizerRule {}));
-
-    ExecutionContext::with_config(config)
-}
-
-// ------ The implementation of the TopK code follows -----
-
-struct TopKQueryPlanner {}
-
-impl QueryPlanner for TopKQueryPlanner {
-    /// Given a `LogicalPlan` created from above, create an
-    /// `ExecutionPlan` suitable for execution
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // Teach the default physical planner how to plan TopK nodes.
-        let physical_planner =
-            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(
-                TopKPlanner {},
-            )]);
-        // Delegate most work of physical planning to the default physical planner
-        physical_planner.create_physical_plan(logical_plan, ctx_state)
-    }
-}
-
-struct TopKOptimizerRule {}
-impl OptimizerRule for TopKOptimizerRule {
-    // Example rewrite pass to insert a user defined LogicalPlanNode
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        // Note: this code simply looks for the pattern of a Limit followed by a
-        // Sort and replaces it by a TopK node. It does not handle many
-        // edge cases (e.g multiple sort columns, sort ASC / DESC), etc.
-        if let LogicalPlan::Limit { ref n, ref input } = plan {
-            if let LogicalPlan::Sort {
-                ref expr,
-                ref input,
-            } = **input
-            {
-                if expr.len() == 1 {
-                    // we found a sort with a single sort expr, replace with a a TopK
-                    return Ok(LogicalPlan::Extension {
-                        node: Arc::new(TopKPlanNode {
-                            k: *n,
-                            input: self.optimize(input.as_ref())?,
-                            expr: expr[0].clone(),
-                        }),
-                    });
-                }
-            }
-        }
-
-        // If we didn't find the Limit/Sort combination, recurse as
-        // normal and build the result.
-        optimize_children(self, plan)
-    }
-
-    fn name(&self) -> &str {
-        "topk"
-    }
-}
-
-struct TopKPlanNode {
-    k: usize,
-    input: LogicalPlan,
-    /// The sort expression (this example only supports a single sort
-    /// expr)
-    expr: Expr,
-}
-
-impl Debug for TopKPlanNode {
-    /// For TopK, use explain format for the Debug format. Other types
-    /// of nodes may
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.fmt_for_explain(f)
-    }
-}
-
-impl UserDefinedLogicalNode for TopKPlanNode {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    /// Schema for TopK is the same as the input
-    fn schema(&self) -> &DFSchemaRef {
-        self.input.schema()
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![self.expr.clone()]
-    }
-
-    /// For example: `TopK: k=10`
-    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "TopK: k={}", self.k)
-    }
-
-    fn from_template(
-        &self,
-        exprs: &[Expr],
-        inputs: &[LogicalPlan],
-    ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync> {
-        assert_eq!(inputs.len(), 1, "input size inconsistent");
-        assert_eq!(exprs.len(), 1, "expression size inconsistent");
-        Arc::new(TopKPlanNode {
-            k: self.k,
-            input: inputs[0].clone(),
-            expr: exprs[0].clone(),
-        })
-    }
-}
-
-/// Physical planner for TopK nodes
-struct TopKPlanner {}
-
-impl ExtensionPlanner for TopKPlanner {
-    /// Create a physical plan for an extension node
-    fn plan_extension(
-        &self,
-        node: &dyn UserDefinedLogicalNode,
-        inputs: &[Arc<dyn ExecutionPlan>],
-        _ctx_state: &ExecutionContextState,
-    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-        Ok(
-            if let Some(topk_node) = node.as_any().downcast_ref::<TopKPlanNode>() {
-                assert_eq!(inputs.len(), 1, "Inconsistent number of inputs");
-                // figure out input name
-                Some(Arc::new(TopKExec {
-                    input: inputs[0].clone(),
-                    k: topk_node.k,
-                }))
-            } else {
-                None
-            },
-        )
-    }
-}
-
-/// Physical operator that implements TopK for u64 data types. This
-/// code is not general and is meant as an illustration only
-struct TopKExec {
-    input: Arc<dyn ExecutionPlan>,
-    /// The maxium number of values
-    k: usize,
-}
-
-impl Debug for TopKExec {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "TopKExec")
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for TopKExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(TopKExec {
-                input: children[0].clone(),
-                k: self.k,
-            })),
-            _ => Err(DataFusionError::Internal(
-                "TopKExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    /// Execute one partition and return an iterator over RecordBatch
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "TopKExec invalid partition {}",
-                partition
-            )));
-        }
-
-        Ok(Box::pin(TopKReader {
-            input: self.input.execute(partition).await?,
-            k: self.k,
-            done: false,
-        }))
-    }
-}
-
-// A very specialized TopK implementation
-struct TopKReader {
-    /// The input to read data from
-    input: SendableRecordBatchStream,
-    /// Maximum number of output values
-    k: usize,
-    /// Have we produced the output yet?
-    done: bool,
-}
-
-/// Keeps track of the revenue from customer_id and stores if it
-/// is the top values we have seen so far.
-fn add_row(
-    top_values: &mut BTreeMap<i64, String>,
-    customer_id: &str,
-    revenue: i64,
-    k: &usize,
-) {
-    top_values.insert(revenue, customer_id.into());
-    // only keep top k
-    while top_values.len() > *k {
-        remove_lowest_value(top_values)
-    }
-}
-
-fn remove_lowest_value(top_values: &mut BTreeMap<i64, String>) {
-    if !top_values.is_empty() {
-        let smallest_revenue = {
-            let (revenue, _) = top_values.iter().next().unwrap();
-            *revenue
-        };
-        top_values.remove(&smallest_revenue);
-    }
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn accumulate_batch(
-    input_batch: &RecordBatch,
-    mut top_values: BTreeMap<i64, String>,
-    k: &usize,
-) -> Result<BTreeMap<i64, String>> {
-    let num_rows = input_batch.num_rows();
-    // Assuming the input columns are
-    // column[0]: customer_id / UTF8
-    // column[1]: revenue: Int64
-    let customer_id = input_batch
-        .column(0)
-        .as_any()
-        .downcast_ref::<StringArray>()
-        .expect("Column 0 is not customer_id");
-
-    let revenue = input_batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<Int64Array>()
-        .expect("Column 1 is not revenue");
-
-    for row in 0..num_rows {
-        add_row(
-            &mut top_values,
-            customer_id.value(row),
-            revenue.value(row),
-            k,
-        );
-    }
-    Ok(top_values)
-}
-
-impl Stream for TopKReader {
-    type Item = std::result::Result<RecordBatch, ArrowError>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.done {
-            return Poll::Ready(None);
-        }
-        // this aggregates and thus returns a single RecordBatch.
-
-        // take this as immutable
-        let k = self.k;
-        let schema = self.schema();
-        let top_values = self
-            .input
-            .as_mut()
-            // Hard coded implementation for sales / customer_id example as BTree
-            .try_fold(
-                BTreeMap::<i64, String>::new(),
-                move |top_values, batch| async move {
-                    accumulate_batch(&batch, top_values, &k)
-                        .map_err(DataFusionError::into_arrow_external_error)
-                },
-            );
-
-        let top_values = top_values.map(|top_values| match top_values {
-            Ok(top_values) => {
-                // make output by walking over the map backwards (so values are descending)
-                let (revenue, customer): (Vec<i64>, Vec<&String>) =
-                    top_values.iter().rev().unzip();
-
-                let customer: Vec<&str> = customer.iter().map(|&s| &**s).collect();
-                Ok(RecordBatch::try_new(
-                    schema,
-                    vec![
-                        Arc::new(StringArray::from(customer)),
-                        Arc::new(Int64Array::from(revenue)),
-                    ],
-                )?)
-            }
-            Err(e) => Err(e),
-        });
-        let mut top_values = Box::pin(top_values.into_stream());
-
-        top_values.poll_next_unpin(cx).map(|batch| {
-            self.done = true;
-            batch
-        })
-    }
-}
-
-impl RecordBatchStream for TopKReader {
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-}
diff --git a/rust/integration-testing/Cargo.toml b/rust/integration-testing/Cargo.toml
deleted file mode 100644
index 12564c74f14..00000000000
--- a/rust/integration-testing/Cargo.toml
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-integration-testing"
-description = "Binaries used in the Arrow integration tests"
-version = "4.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-edition = "2018"
-publish = false
-
-[features]
-logging = ["tracing-subscriber"]
-
-[dependencies]
-arrow = { path = "../arrow" }
-arrow-flight = { path = "../arrow-flight" }
-async-trait = "0.1.41"
-clap = "2.33"
-futures = "0.3"
-hex = "0.4"
-prost = "0.7"
-serde = { version = "1.0", features = ["rc"] }
-serde_derive = "1.0"
-serde_json = { version = "1.0", features = ["preserve_order"] }
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-tonic = "0.4"
-tracing-subscriber = { version = "0.2.15", optional = true }
diff --git a/rust/integration-testing/README.md b/rust/integration-testing/README.md
deleted file mode 100644
index 66248deb346..00000000000
--- a/rust/integration-testing/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Apache Arrow Rust Integration Testing
-
-See [Integration.rst](../../docs/source/format/Integration.rst) for an overview of integration testing.
-
-This crate contains the following binaries, which are invoked by Archery during integration testing with other Arrow implementations.
-
-| Binary | Purpose |
-|--------|---------|
-| arrow-file-to-stream | Converts an Arrow file to an Arrow stream |
-| arrow-stream-to-file | Converts an Arrow stream to an Arrow file |
-| arrow-json-integration-test | Converts between Arrow and JSON formats |
diff --git a/rust/integration-testing/src/bin/arrow-file-to-stream.rs b/rust/integration-testing/src/bin/arrow-file-to-stream.rs
deleted file mode 100644
index d6bb0428c0f..00000000000
--- a/rust/integration-testing/src/bin/arrow-file-to-stream.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::env;
-use std::fs::File;
-use std::io::{self, BufReader};
-
-use arrow::error::Result;
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::StreamWriter;
-
-fn main() -> Result<()> {
-    let args: Vec<String> = env::args().collect();
-    let filename = &args[1];
-    let f = File::open(filename)?;
-    let reader = BufReader::new(f);
-    let mut reader = FileReader::try_new(reader)?;
-    let schema = reader.schema();
-
-    let mut writer = StreamWriter::try_new(io::stdout(), &schema)?;
-
-    reader.try_for_each(|batch| {
-        let batch = batch?;
-        writer.write(&batch)
-    })?;
-    writer.finish()?;
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/arrow-json-integration-test.rs b/rust/integration-testing/src/bin/arrow-json-integration-test.rs
deleted file mode 100644
index 257802028b2..00000000000
--- a/rust/integration-testing/src/bin/arrow-json-integration-test.rs
+++ /dev/null
@@ -1,180 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fs::File;
-
-use clap::{App, Arg};
-
-use arrow::error::{ArrowError, Result};
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::FileWriter;
-use arrow::util::integration_util::*;
-use arrow_integration_testing::read_json_file;
-
-fn main() -> Result<()> {
-    let matches = App::new("rust arrow-json-integration-test")
-        .arg(Arg::with_name("integration")
-            .long("integration"))
-        .arg(Arg::with_name("arrow")
-            .long("arrow")
-            .help("path to ARROW file")
-            .takes_value(true))
-        .arg(Arg::with_name("json")
-            .long("json")
-            .help("path to JSON file")
-            .takes_value(true))
-        .arg(Arg::with_name("mode")
-            .long("mode")
-            .help("mode of integration testing tool (ARROW_TO_JSON, JSON_TO_ARROW, VALIDATE)")
-            .takes_value(true)
-            .default_value("VALIDATE"))
-        .arg(Arg::with_name("verbose")
-            .long("verbose")
-            .help("enable/disable verbose mode"))
-        .get_matches();
-
-    let arrow_file = matches
-        .value_of("arrow")
-        .expect("must provide path to arrow file");
-    let json_file = matches
-        .value_of("json")
-        .expect("must provide path to json file");
-    let mode = matches.value_of("mode").unwrap();
-    let verbose = true; //matches.value_of("verbose").is_some();
-
-    match mode {
-        "JSON_TO_ARROW" => json_to_arrow(json_file, arrow_file, verbose),
-        "ARROW_TO_JSON" => arrow_to_json(arrow_file, json_file, verbose),
-        "VALIDATE" => validate(arrow_file, json_file, verbose),
-        _ => panic!("mode {} not supported", mode),
-    }
-}
-
-fn json_to_arrow(json_name: &str, arrow_name: &str, verbose: bool) -> Result<()> {
-    if verbose {
-        eprintln!("Converting {} to {}", json_name, arrow_name);
-    }
-
-    let json_file = read_json_file(json_name)?;
-
-    let arrow_file = File::create(arrow_name)?;
-    let mut writer = FileWriter::try_new(arrow_file, &json_file.schema)?;
-
-    for b in json_file.batches {
-        writer.write(&b)?;
-    }
-
-    writer.finish()?;
-
-    Ok(())
-}
-
-fn arrow_to_json(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
-    if verbose {
-        eprintln!("Converting {} to {}", arrow_name, json_name);
-    }
-
-    let arrow_file = File::open(arrow_name)?;
-    let reader = FileReader::try_new(arrow_file)?;
-
-    let mut fields: Vec<ArrowJsonField> = vec![];
-    for f in reader.schema().fields() {
-        fields.push(ArrowJsonField::from(f));
-    }
-    let schema = ArrowJsonSchema { fields };
-
-    let batches = reader
-        .map(|batch| Ok(ArrowJsonBatch::from_batch(&batch?)))
-        .collect::<Result<Vec<_>>>()?;
-
-    let arrow_json = ArrowJson {
-        schema,
-        batches,
-        dictionaries: None,
-    };
-
-    let json_file = File::create(json_name)?;
-    serde_json::to_writer(&json_file, &arrow_json).unwrap();
-
-    Ok(())
-}
-
-fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
-    if verbose {
-        eprintln!("Validating {} and {}", arrow_name, json_name);
-    }
-
-    // open JSON file
-    let json_file = read_json_file(json_name)?;
-
-    // open Arrow file
-    let arrow_file = File::open(arrow_name)?;
-    let mut arrow_reader = FileReader::try_new(arrow_file)?;
-    let arrow_schema = arrow_reader.schema().as_ref().to_owned();
-
-    // compare schemas
-    if json_file.schema != arrow_schema {
-        return Err(ArrowError::ComputeError(format!(
-            "Schemas do not match. JSON: {:?}. Arrow: {:?}",
-            json_file.schema, arrow_schema
-        )));
-    }
-
-    let json_batches = &json_file.batches;
-
-    // compare number of batches
-    assert!(
-        json_batches.len() == arrow_reader.num_batches(),
-        "JSON batches and Arrow batches are unequal"
-    );
-
-    if verbose {
-        eprintln!(
-            "Schemas match. JSON file has {} batches.",
-            json_batches.len()
-        );
-    }
-
-    for json_batch in json_batches {
-        if let Some(Ok(arrow_batch)) = arrow_reader.next() {
-            // compare batches
-            let num_columns = arrow_batch.num_columns();
-            assert!(num_columns == json_batch.num_columns());
-            assert!(arrow_batch.num_rows() == json_batch.num_rows());
-
-            for i in 0..num_columns {
-                assert_eq!(
-                    arrow_batch.column(i).data(),
-                    json_batch.column(i).data(),
-                    "Arrow and JSON batch columns not the same"
-                );
-            }
-        } else {
-            return Err(ArrowError::ComputeError(
-                "no more arrow batches left".to_owned(),
-            ));
-        }
-    }
-
-    if arrow_reader.next().is_some() {
-        return Err(ArrowError::ComputeError(
-            "no more json batches left".to_owned(),
-        ));
-    }
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/arrow-stream-to-file.rs b/rust/integration-testing/src/bin/arrow-stream-to-file.rs
deleted file mode 100644
index f81d42e6eda..00000000000
--- a/rust/integration-testing/src/bin/arrow-stream-to-file.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io;
-
-use arrow::error::Result;
-use arrow::ipc::reader::StreamReader;
-use arrow::ipc::writer::FileWriter;
-
-fn main() -> Result<()> {
-    let mut arrow_stream_reader = StreamReader::try_new(io::stdin())?;
-    let schema = arrow_stream_reader.schema();
-
-    let mut writer = FileWriter::try_new(io::stdout(), &schema)?;
-
-    arrow_stream_reader.try_for_each(|batch| writer.write(&batch?))?;
-    writer.finish()?;
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/flight-test-integration-client.rs b/rust/integration-testing/src/bin/flight-test-integration-client.rs
deleted file mode 100644
index 1901553109f..00000000000
--- a/rust/integration-testing/src/bin/flight-test-integration-client.rs
+++ /dev/null
@@ -1,62 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_integration_testing::flight_client_scenarios;
-
-use clap::{App, Arg};
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-#[tokio::main]
-async fn main() -> Result {
-    #[cfg(feature = "logging")]
-    tracing_subscriber::fmt::init();
-
-    let matches = App::new("rust flight-test-integration-client")
-        .arg(Arg::with_name("host").long("host").takes_value(true))
-        .arg(Arg::with_name("port").long("port").takes_value(true))
-        .arg(Arg::with_name("path").long("path").takes_value(true))
-        .arg(
-            Arg::with_name("scenario")
-                .long("scenario")
-                .takes_value(true),
-        )
-        .get_matches();
-
-    let host = matches.value_of("host").expect("Host is required");
-    let port = matches.value_of("port").expect("Port is required");
-
-    match matches.value_of("scenario") {
-        Some("middleware") => {
-            flight_client_scenarios::middleware::run_scenario(host, port).await?
-        }
-        Some("auth:basic_proto") => {
-            flight_client_scenarios::auth_basic_proto::run_scenario(host, port).await?
-        }
-        Some(scenario_name) => unimplemented!("Scenario not found: {}", scenario_name),
-        None => {
-            let path = matches
-                .value_of("path")
-                .expect("Path is required if scenario is not specified");
-            flight_client_scenarios::integration_test::run_scenario(host, port, path)
-                .await?;
-        }
-    }
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/flight-test-integration-server.rs b/rust/integration-testing/src/bin/flight-test-integration-server.rs
deleted file mode 100644
index b1b280743c3..00000000000
--- a/rust/integration-testing/src/bin/flight-test-integration-server.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use clap::{App, Arg};
-
-use arrow_integration_testing::flight_server_scenarios;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-#[tokio::main]
-async fn main() -> Result {
-    #[cfg(feature = "logging")]
-    tracing_subscriber::fmt::init();
-
-    let matches = App::new("rust flight-test-integration-server")
-        .about("Integration testing server for Flight.")
-        .arg(Arg::with_name("port").long("port").takes_value(true))
-        .arg(
-            Arg::with_name("scenario")
-                .long("scenario")
-                .takes_value(true),
-        )
-        .get_matches();
-
-    let port = matches.value_of("port").unwrap_or("0");
-
-    match matches.value_of("scenario") {
-        Some("middleware") => {
-            flight_server_scenarios::middleware::scenario_setup(port).await?
-        }
-        Some("auth:basic_proto") => {
-            flight_server_scenarios::auth_basic_proto::scenario_setup(port).await?
-        }
-        Some(scenario_name) => unimplemented!("Scenario not found: {}", scenario_name),
-        None => {
-            flight_server_scenarios::integration_test::scenario_setup(port).await?;
-        }
-    }
-    Ok(())
-}
diff --git a/rust/integration-testing/src/flight_client_scenarios.rs b/rust/integration-testing/src/flight_client_scenarios.rs
deleted file mode 100644
index 66cced5f4c2..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod auth_basic_proto;
-pub mod integration_test;
-pub mod middleware;
diff --git a/rust/integration-testing/src/flight_client_scenarios/auth_basic_proto.rs b/rust/integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
deleted file mode 100644
index 5e8cd467198..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
+++ /dev/null
@@ -1,109 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{AUTH_PASSWORD, AUTH_USERNAME};
-
-use arrow_flight::{
-    flight_service_client::FlightServiceClient, BasicAuth, HandshakeRequest,
-};
-use futures::{stream, StreamExt};
-use prost::Message;
-use tonic::{metadata::MetadataValue, Request, Status};
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-type Client = FlightServiceClient<tonic::transport::Channel>;
-
-pub async fn run_scenario(host: &str, port: &str) -> Result {
-    let url = format!("http://{}:{}", host, port);
-    let mut client = FlightServiceClient::connect(url).await?;
-
-    let action = arrow_flight::Action::default();
-
-    let resp = client.do_action(Request::new(action.clone())).await;
-    // This client is unauthenticated and should fail.
-    match resp {
-        Err(e) => {
-            if e.code() != tonic::Code::Unauthenticated {
-                return Err(Box::new(Status::internal(format!(
-                    "Expected UNAUTHENTICATED but got {:?}",
-                    e
-                ))));
-            }
-        }
-        Ok(other) => {
-            return Err(Box::new(Status::internal(format!(
-                "Expected UNAUTHENTICATED but got {:?}",
-                other
-            ))));
-        }
-    }
-
-    let token = authenticate(&mut client, AUTH_USERNAME, AUTH_PASSWORD)
-        .await
-        .expect("must respond successfully from handshake");
-
-    let mut request = Request::new(action);
-    let metadata = request.metadata_mut();
-    metadata.insert_bin(
-        "auth-token-bin",
-        MetadataValue::from_bytes(token.as_bytes()),
-    );
-
-    let resp = client.do_action(request).await?;
-    let mut resp = resp.into_inner();
-
-    let r = resp
-        .next()
-        .await
-        .expect("No response received")
-        .expect("Invalid response received");
-
-    let body = String::from_utf8(r.body).unwrap();
-    assert_eq!(body, AUTH_USERNAME);
-
-    Ok(())
-}
-
-async fn authenticate(
-    client: &mut Client,
-    username: &str,
-    password: &str,
-) -> Result<String> {
-    let auth = BasicAuth {
-        username: username.into(),
-        password: password.into(),
-    };
-    let mut payload = vec![];
-    auth.encode(&mut payload)?;
-
-    let req = stream::once(async {
-        HandshakeRequest {
-            payload,
-            ..HandshakeRequest::default()
-        }
-    });
-
-    let rx = client.handshake(Request::new(req)).await?;
-    let mut rx = rx.into_inner();
-
-    let r = rx.next().await.expect("must respond from handshake")?;
-    assert!(rx.next().await.is_none(), "must not respond a second time");
-
-    Ok(String::from_utf8(r.payload).unwrap())
-}
diff --git a/rust/integration-testing/src/flight_client_scenarios/integration_test.rs b/rust/integration-testing/src/flight_client_scenarios/integration_test.rs
deleted file mode 100644
index ff61b5ce2db..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios/integration_test.rs
+++ /dev/null
@@ -1,271 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{read_json_file, ArrowFile};
-
-use arrow::{
-    array::ArrayRef,
-    datatypes::SchemaRef,
-    ipc::{self, reader, writer},
-    record_batch::RecordBatch,
-};
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    utils::flight_data_to_arrow_batch, FlightData, FlightDescriptor, Location, Ticket,
-};
-use futures::{channel::mpsc, sink::SinkExt, stream, StreamExt};
-use tonic::{Request, Streaming};
-
-use std::sync::Arc;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-type Client = FlightServiceClient<tonic::transport::Channel>;
-
-pub async fn run_scenario(host: &str, port: &str, path: &str) -> Result {
-    let url = format!("http://{}:{}", host, port);
-
-    let client = FlightServiceClient::connect(url).await?;
-
-    let ArrowFile {
-        schema, batches, ..
-    } = read_json_file(path)?;
-
-    let schema = Arc::new(schema);
-
-    let mut descriptor = FlightDescriptor::default();
-    descriptor.set_type(DescriptorType::Path);
-    descriptor.path = vec![path.to_string()];
-
-    upload_data(
-        client.clone(),
-        schema.clone(),
-        descriptor.clone(),
-        batches.clone(),
-    )
-    .await?;
-    verify_data(client, descriptor, schema, &batches).await?;
-
-    Ok(())
-}
-
-async fn upload_data(
-    mut client: Client,
-    schema: SchemaRef,
-    descriptor: FlightDescriptor,
-    original_data: Vec<RecordBatch>,
-) -> Result {
-    let (mut upload_tx, upload_rx) = mpsc::channel(10);
-
-    let options = arrow::ipc::writer::IpcWriteOptions::default();
-    let mut schema_flight_data =
-        arrow_flight::utils::flight_data_from_arrow_schema(&schema, &options);
-    schema_flight_data.flight_descriptor = Some(descriptor.clone());
-    upload_tx.send(schema_flight_data).await?;
-
-    let mut original_data_iter = original_data.iter().enumerate();
-
-    if let Some((counter, first_batch)) = original_data_iter.next() {
-        let metadata = counter.to_string().into_bytes();
-        // Preload the first batch into the channel before starting the request
-        send_batch(&mut upload_tx, &metadata, first_batch, &options).await?;
-
-        let outer = client.do_put(Request::new(upload_rx)).await?;
-        let mut inner = outer.into_inner();
-
-        let r = inner
-            .next()
-            .await
-            .expect("No response received")
-            .expect("Invalid response received");
-        assert_eq!(metadata, r.app_metadata);
-
-        // Stream the rest of the batches
-        for (counter, batch) in original_data_iter {
-            let metadata = counter.to_string().into_bytes();
-            send_batch(&mut upload_tx, &metadata, batch, &options).await?;
-
-            let r = inner
-                .next()
-                .await
-                .expect("No response received")
-                .expect("Invalid response received");
-            assert_eq!(metadata, r.app_metadata);
-        }
-        drop(upload_tx);
-        assert!(
-            inner.next().await.is_none(),
-            "Should not receive more results"
-        );
-    } else {
-        drop(upload_tx);
-        client.do_put(Request::new(upload_rx)).await?;
-    }
-
-    Ok(())
-}
-
-async fn send_batch(
-    upload_tx: &mut mpsc::Sender<FlightData>,
-    metadata: &[u8],
-    batch: &RecordBatch,
-    options: &writer::IpcWriteOptions,
-) -> Result {
-    let (dictionary_flight_data, mut batch_flight_data) =
-        arrow_flight::utils::flight_data_from_arrow_batch(batch, &options);
-
-    upload_tx
-        .send_all(&mut stream::iter(dictionary_flight_data).map(Ok))
-        .await?;
-
-    // Only the record batch's FlightData gets app_metadata
-    batch_flight_data.app_metadata = metadata.to_vec();
-    upload_tx.send(batch_flight_data).await?;
-    Ok(())
-}
-
-async fn verify_data(
-    mut client: Client,
-    descriptor: FlightDescriptor,
-    expected_schema: SchemaRef,
-    expected_data: &[RecordBatch],
-) -> Result {
-    let resp = client.get_flight_info(Request::new(descriptor)).await?;
-    let info = resp.into_inner();
-
-    assert!(
-        !info.endpoint.is_empty(),
-        "No endpoints returned from Flight server",
-    );
-    for endpoint in info.endpoint {
-        let ticket = endpoint
-            .ticket
-            .expect("No ticket returned from Flight server");
-
-        assert!(
-            !endpoint.location.is_empty(),
-            "No locations returned from Flight server",
-        );
-        for location in endpoint.location {
-            consume_flight_location(
-                location,
-                ticket.clone(),
-                &expected_data,
-                expected_schema.clone(),
-            )
-            .await?;
-        }
-    }
-
-    Ok(())
-}
-
-async fn consume_flight_location(
-    location: Location,
-    ticket: Ticket,
-    expected_data: &[RecordBatch],
-    schema: SchemaRef,
-) -> Result {
-    let mut location = location;
-    // The other Flight implementations use the `grpc+tcp` scheme, but the Rust http libs
-    // don't recognize this as valid.
-    location.uri = location.uri.replace("grpc+tcp://", "grpc://");
-
-    let mut client = FlightServiceClient::connect(location.uri).await?;
-    let resp = client.do_get(ticket).await?;
-    let mut resp = resp.into_inner();
-
-    // We already have the schema from the FlightInfo, but the server sends it again as the
-    // first FlightData. Ignore this one.
-    let _schema_again = resp.next().await.unwrap();
-
-    let mut dictionaries_by_field = vec![None; schema.fields().len()];
-
-    for (counter, expected_batch) in expected_data.iter().enumerate() {
-        let data = receive_batch_flight_data(
-            &mut resp,
-            schema.clone(),
-            &mut dictionaries_by_field,
-        )
-        .await
-        .unwrap_or_else(|| {
-            panic!(
-                "Got fewer batches than expected, received so far: {} expected: {}",
-                counter,
-                expected_data.len(),
-            )
-        });
-
-        let metadata = counter.to_string().into_bytes();
-        assert_eq!(metadata, data.app_metadata);
-
-        let actual_batch =
-            flight_data_to_arrow_batch(&data, schema.clone(), &dictionaries_by_field)
-                .expect("Unable to convert flight data to Arrow batch");
-
-        assert_eq!(expected_batch.schema(), actual_batch.schema());
-        assert_eq!(expected_batch.num_columns(), actual_batch.num_columns());
-        assert_eq!(expected_batch.num_rows(), actual_batch.num_rows());
-        let schema = expected_batch.schema();
-        for i in 0..expected_batch.num_columns() {
-            let field = schema.field(i);
-            let field_name = field.name();
-
-            let expected_data = expected_batch.column(i).data();
-            let actual_data = actual_batch.column(i).data();
-
-            assert_eq!(expected_data, actual_data, "Data for field {}", field_name);
-        }
-    }
-
-    assert!(
-        resp.next().await.is_none(),
-        "Got more batches than the expected: {}",
-        expected_data.len(),
-    );
-
-    Ok(())
-}
-
-async fn receive_batch_flight_data(
-    resp: &mut Streaming<FlightData>,
-    schema: SchemaRef,
-    dictionaries_by_field: &mut [Option<ArrayRef>],
-) -> Option<FlightData> {
-    let mut data = resp.next().await?.ok()?;
-    let mut message = arrow::ipc::root_as_message(&data.data_header[..])
-        .expect("Error parsing first message");
-
-    while message.header_type() == ipc::MessageHeader::DictionaryBatch {
-        reader::read_dictionary(
-            &data.data_body,
-            message
-                .header_as_dictionary_batch()
-                .expect("Error parsing dictionary"),
-            &schema,
-            dictionaries_by_field,
-        )
-        .expect("Error reading dictionary");
-
-        data = resp.next().await?.ok()?;
-        message = arrow::ipc::root_as_message(&data.data_header[..])
-            .expect("Error parsing message");
-    }
-
-    Some(data)
-}
diff --git a/rust/integration-testing/src/flight_client_scenarios/middleware.rs b/rust/integration-testing/src/flight_client_scenarios/middleware.rs
deleted file mode 100644
index cbca879dca5..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios/middleware.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    FlightDescriptor,
-};
-use tonic::{Request, Status};
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn run_scenario(host: &str, port: &str) -> Result {
-    let url = format!("http://{}:{}", host, port);
-    let conn = tonic::transport::Endpoint::new(url)?.connect().await?;
-    let mut client = FlightServiceClient::with_interceptor(conn, middleware_interceptor);
-
-    let mut descriptor = FlightDescriptor::default();
-    descriptor.set_type(DescriptorType::Cmd);
-    descriptor.cmd = b"".to_vec();
-
-    // This call is expected to fail.
-    match client
-        .get_flight_info(Request::new(descriptor.clone()))
-        .await
-    {
-        Ok(_) => return Err(Box::new(Status::internal("Expected call to fail"))),
-        Err(e) => {
-            let headers = e.metadata();
-            let middleware_header = headers.get("x-middleware");
-            let value = middleware_header.map(|v| v.to_str().unwrap()).unwrap_or("");
-
-            if value != "expected value" {
-                let msg = format!(
-                    "On failing call: Expected to receive header 'x-middleware: expected value', \
-                     but instead got: '{}'",
-                    value
-                );
-                return Err(Box::new(Status::internal(msg)));
-            }
-        }
-    }
-
-    // This call should succeed
-    descriptor.cmd = b"success".to_vec();
-    let resp = client.get_flight_info(Request::new(descriptor)).await?;
-
-    let headers = resp.metadata();
-    let middleware_header = headers.get("x-middleware");
-    let value = middleware_header.map(|v| v.to_str().unwrap()).unwrap_or("");
-
-    if value != "expected value" {
-        let msg = format!(
-            "On success call: Expected to receive header 'x-middleware: expected value', \
-            but instead got: '{}'",
-            value
-        );
-        return Err(Box::new(Status::internal(msg)));
-    }
-
-    Ok(())
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn middleware_interceptor(mut req: Request<()>) -> Result<Request<()>, Status> {
-    let metadata = req.metadata_mut();
-    metadata.insert("x-middleware", "expected value".parse().unwrap());
-    Ok(req)
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios.rs b/rust/integration-testing/src/flight_server_scenarios.rs
deleted file mode 100644
index 9163b692086..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::net::SocketAddr;
-
-use arrow_flight::{FlightEndpoint, Location, Ticket};
-use tokio::net::TcpListener;
-
-pub mod auth_basic_proto;
-pub mod integration_test;
-pub mod middleware;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn listen_on(port: &str) -> Result<SocketAddr> {
-    let addr: SocketAddr = format!("0.0.0.0:{}", port).parse()?;
-
-    let listener = TcpListener::bind(addr).await?;
-    let addr = listener.local_addr()?;
-
-    Ok(addr)
-}
-
-pub fn endpoint(ticket: &str, location_uri: impl Into<String>) -> FlightEndpoint {
-    FlightEndpoint {
-        ticket: Some(Ticket {
-            ticket: ticket.as_bytes().to_vec(),
-        }),
-        location: vec![Location {
-            uri: location_uri.into(),
-        }],
-    }
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs b/rust/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
deleted file mode 100644
index ea7ad3c3385..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
+++ /dev/null
@@ -1,225 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-use std::sync::Arc;
-
-use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, BasicAuth, Criteria, Empty, FlightData, FlightDescriptor,
-    FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
-use tokio::sync::Mutex;
-use tonic::{
-    metadata::MetadataMap, transport::Server, Request, Response, Status, Streaming,
-};
-type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-use prost::Message;
-
-use crate::{AUTH_PASSWORD, AUTH_USERNAME};
-
-pub async fn scenario_setup(port: &str) -> Result {
-    let service = AuthBasicProtoScenarioImpl {
-        username: AUTH_USERNAME.into(),
-        password: AUTH_PASSWORD.into(),
-        peer_identity: Arc::new(Mutex::new(None)),
-    };
-    let addr = super::listen_on(port).await?;
-    let svc = FlightServiceServer::new(service);
-
-    let server = Server::builder().add_service(svc).serve(addr);
-
-    // NOTE: Log output used in tests to signal server is ready
-    println!("Server listening on localhost:{}", addr.port());
-    server.await?;
-    Ok(())
-}
-
-#[derive(Clone)]
-pub struct AuthBasicProtoScenarioImpl {
-    username: Arc<str>,
-    password: Arc<str>,
-    peer_identity: Arc<Mutex<Option<String>>>,
-}
-
-impl AuthBasicProtoScenarioImpl {
-    async fn check_auth(
-        &self,
-        metadata: &MetadataMap,
-    ) -> Result<GrpcServerCallContext, Status> {
-        let token = metadata
-            .get_bin("auth-token-bin")
-            .and_then(|v| v.to_bytes().ok())
-            .and_then(|b| String::from_utf8(b.to_vec()).ok());
-        self.is_valid(token).await
-    }
-
-    async fn is_valid(
-        &self,
-        token: Option<String>,
-    ) -> Result<GrpcServerCallContext, Status> {
-        match token {
-            Some(t) if t == *self.username => Ok(GrpcServerCallContext {
-                peer_identity: self.username.to_string(),
-            }),
-            _ => Err(Status::unauthenticated("Invalid token")),
-        }
-    }
-}
-
-struct GrpcServerCallContext {
-    peer_identity: String,
-}
-
-impl GrpcServerCallContext {
-    pub fn peer_identity(&self) -> &str {
-        &self.peer_identity
-    }
-}
-
-#[tonic::async_trait]
-impl FlightService for AuthBasicProtoScenarioImpl {
-    type HandshakeStream = TonicStream<Result<HandshakeResponse, Status>>;
-    type ListFlightsStream = TonicStream<Result<FlightInfo, Status>>;
-    type DoGetStream = TonicStream<Result<FlightData, Status>>;
-    type DoPutStream = TonicStream<Result<PutResult, Status>>;
-    type DoActionStream = TonicStream<Result<arrow_flight::Result, Status>>;
-    type ListActionsStream = TonicStream<Result<ActionType, Status>>;
-    type DoExchangeStream = TonicStream<Result<FlightData, Status>>;
-
-    async fn get_schema(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn handshake(
-        &self,
-        request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        let (tx, rx) = mpsc::channel(10);
-
-        tokio::spawn({
-            let username = self.username.clone();
-            let password = self.password.clone();
-
-            async move {
-                let requests = request.into_inner();
-
-                requests
-                    .for_each(move |req| {
-                        let mut tx = tx.clone();
-                        let req = req.expect("Error reading handshake request");
-                        let HandshakeRequest { payload, .. } = req;
-
-                        let auth = BasicAuth::decode(&*payload)
-                            .expect("Error parsing handshake request");
-
-                        let resp = if *auth.username == *username
-                            && *auth.password == *password
-                        {
-                            Ok(HandshakeResponse {
-                                payload: username.as_bytes().to_vec(),
-                                ..HandshakeResponse::default()
-                            })
-                        } else {
-                            Err(Status::unauthenticated(format!(
-                                "Don't know user {}",
-                                auth.username
-                            )))
-                        };
-
-                        async move {
-                            tx.send(resp)
-                                .await
-                                .expect("Error sending handshake response");
-                        }
-                    })
-                    .await;
-            }
-        });
-
-        Ok(Response::new(Box::pin(rx)))
-    }
-
-    async fn list_flights(
-        &self,
-        request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_put(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        let flight_context = self.check_auth(request.metadata()).await?;
-        // Respond with the authenticated username.
-        let buf = flight_context.peer_identity().as_bytes().to_vec();
-        let result = arrow_flight::Result { body: buf };
-        let output = futures::stream::once(async { Ok(result) });
-        Ok(Response::new(Box::pin(output) as Self::DoActionStream))
-    }
-
-    async fn list_actions(
-        &self,
-        request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios/integration_test.rs b/rust/integration-testing/src/flight_server_scenarios/integration_test.rs
deleted file mode 100644
index ee42a47c9a4..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios/integration_test.rs
+++ /dev/null
@@ -1,385 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::convert::TryFrom;
-use std::pin::Pin;
-use std::sync::Arc;
-
-use arrow::{
-    array::ArrayRef,
-    datatypes::Schema,
-    datatypes::SchemaRef,
-    ipc::{self, reader},
-    record_batch::RecordBatch,
-};
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
-    HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
-use tokio::sync::Mutex;
-use tonic::{transport::Server, Request, Response, Status, Streaming};
-
-type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn scenario_setup(port: &str) -> Result {
-    let addr = super::listen_on(port).await?;
-
-    let service = FlightServiceImpl {
-        server_location: format!("grpc+tcp://{}", addr),
-        ..Default::default()
-    };
-    let svc = FlightServiceServer::new(service);
-
-    let server = Server::builder().add_service(svc).serve(addr);
-
-    // NOTE: Log output used in tests to signal server is ready
-    println!("Server listening on localhost:{}", addr.port());
-    server.await?;
-    Ok(())
-}
-
-#[derive(Debug, Clone)]
-struct IntegrationDataset {
-    schema: Schema,
-    chunks: Vec<RecordBatch>,
-}
-
-#[derive(Clone, Default)]
-pub struct FlightServiceImpl {
-    server_location: String,
-    uploaded_chunks: Arc<Mutex<HashMap<String, IntegrationDataset>>>,
-}
-
-impl FlightServiceImpl {
-    fn endpoint_from_path(&self, path: &str) -> FlightEndpoint {
-        super::endpoint(path, &self.server_location)
-    }
-}
-
-#[tonic::async_trait]
-impl FlightService for FlightServiceImpl {
-    type HandshakeStream = TonicStream<Result<HandshakeResponse, Status>>;
-    type ListFlightsStream = TonicStream<Result<FlightInfo, Status>>;
-    type DoGetStream = TonicStream<Result<FlightData, Status>>;
-    type DoPutStream = TonicStream<Result<PutResult, Status>>;
-    type DoActionStream = TonicStream<Result<arrow_flight::Result, Status>>;
-    type ListActionsStream = TonicStream<Result<ActionType, Status>>;
-    type DoExchangeStream = TonicStream<Result<FlightData, Status>>;
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        let ticket = request.into_inner();
-
-        let key = String::from_utf8(ticket.ticket.to_vec())
-            .map_err(|e| Status::invalid_argument(format!("Invalid ticket: {:?}", e)))?;
-
-        let uploaded_chunks = self.uploaded_chunks.lock().await;
-
-        let flight = uploaded_chunks.get(&key).ok_or_else(|| {
-            Status::not_found(format!("Could not find flight. {}", key))
-        })?;
-
-        let options = arrow::ipc::writer::IpcWriteOptions::default();
-
-        let schema = std::iter::once({
-            Ok(arrow_flight::utils::flight_data_from_arrow_schema(
-                &flight.schema,
-                &options,
-            ))
-        });
-
-        let batches = flight
-            .chunks
-            .iter()
-            .enumerate()
-            .flat_map(|(counter, batch)| {
-                let (dictionary_flight_data, mut batch_flight_data) =
-                    arrow_flight::utils::flight_data_from_arrow_batch(batch, &options);
-
-                // Only the record batch's FlightData gets app_metadata
-                let metadata = counter.to_string().into_bytes();
-                batch_flight_data.app_metadata = metadata;
-
-                dictionary_flight_data
-                    .into_iter()
-                    .chain(std::iter::once(batch_flight_data))
-                    .map(Ok)
-            });
-
-        let output = futures::stream::iter(schema.chain(batches).collect::<Vec<_>>());
-
-        Ok(Response::new(Box::pin(output) as Self::DoGetStream))
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        let descriptor = request.into_inner();
-
-        match descriptor.r#type {
-            t if t == DescriptorType::Path as i32 => {
-                let path = &descriptor.path;
-                if path.is_empty() {
-                    return Err(Status::invalid_argument("Invalid path"));
-                }
-
-                let uploaded_chunks = self.uploaded_chunks.lock().await;
-                let flight = uploaded_chunks.get(&path[0]).ok_or_else(|| {
-                    Status::not_found(format!("Could not find flight. {}", path[0]))
-                })?;
-
-                let endpoint = self.endpoint_from_path(&path[0]);
-
-                let total_records: usize =
-                    flight.chunks.iter().map(|chunk| chunk.num_rows()).sum();
-
-                let options = arrow::ipc::writer::IpcWriteOptions::default();
-                let schema = arrow_flight::utils::ipc_message_from_arrow_schema(
-                    &flight.schema,
-                    &options,
-                )
-                .expect(
-                    "Could not generate schema bytes from schema stored by a DoPut; \
-                         this should be impossible",
-                );
-
-                let info = FlightInfo {
-                    schema,
-                    flight_descriptor: Some(descriptor.clone()),
-                    endpoint: vec![endpoint],
-                    total_records: total_records as i64,
-                    total_bytes: -1,
-                };
-
-                Ok(Response::new(info))
-            }
-            other => Err(Status::unimplemented(format!("Request type: {}", other))),
-        }
-    }
-
-    async fn do_put(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        let mut input_stream = request.into_inner();
-        let flight_data = input_stream
-            .message()
-            .await?
-            .ok_or_else(|| Status::invalid_argument("Must send some FlightData"))?;
-
-        let descriptor = flight_data
-            .flight_descriptor
-            .clone()
-            .ok_or_else(|| Status::invalid_argument("Must have a descriptor"))?;
-
-        if descriptor.r#type != DescriptorType::Path as i32 || descriptor.path.is_empty()
-        {
-            return Err(Status::invalid_argument("Must specify a path"));
-        }
-
-        let key = descriptor.path[0].clone();
-
-        let schema = Schema::try_from(&flight_data)
-            .map_err(|e| Status::invalid_argument(format!("Invalid schema: {:?}", e)))?;
-        let schema_ref = Arc::new(schema.clone());
-
-        let (response_tx, response_rx) = mpsc::channel(10);
-
-        let uploaded_chunks = self.uploaded_chunks.clone();
-
-        tokio::spawn(async {
-            let mut error_tx = response_tx.clone();
-            if let Err(e) = save_uploaded_chunks(
-                uploaded_chunks,
-                schema_ref,
-                input_stream,
-                response_tx,
-                schema,
-                key,
-            )
-            .await
-            {
-                error_tx.send(Err(e)).await.expect("Error sending error")
-            }
-        });
-
-        Ok(Response::new(Box::pin(response_rx) as Self::DoPutStream))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
-
-async fn send_app_metadata(
-    tx: &mut mpsc::Sender<Result<PutResult, Status>>,
-    app_metadata: &[u8],
-) -> Result<(), Status> {
-    tx.send(Ok(PutResult {
-        app_metadata: app_metadata.to_vec(),
-    }))
-    .await
-    .map_err(|e| Status::internal(format!("Could not send PutResult: {:?}", e)))
-}
-
-async fn record_batch_from_message(
-    message: ipc::Message<'_>,
-    data_body: &[u8],
-    schema_ref: SchemaRef,
-    dictionaries_by_field: &[Option<ArrayRef>],
-) -> Result<RecordBatch, Status> {
-    let ipc_batch = message.header_as_record_batch().ok_or_else(|| {
-        Status::internal("Could not parse message header as record batch")
-    })?;
-
-    let arrow_batch_result = reader::read_record_batch(
-        data_body,
-        ipc_batch,
-        schema_ref,
-        &dictionaries_by_field,
-    );
-
-    arrow_batch_result.map_err(|e| {
-        Status::internal(format!("Could not convert to RecordBatch: {:?}", e))
-    })
-}
-
-async fn dictionary_from_message(
-    message: ipc::Message<'_>,
-    data_body: &[u8],
-    schema_ref: SchemaRef,
-    dictionaries_by_field: &mut [Option<ArrayRef>],
-) -> Result<(), Status> {
-    let ipc_batch = message.header_as_dictionary_batch().ok_or_else(|| {
-        Status::internal("Could not parse message header as dictionary batch")
-    })?;
-
-    let dictionary_batch_result =
-        reader::read_dictionary(data_body, ipc_batch, &schema_ref, dictionaries_by_field);
-    dictionary_batch_result.map_err(|e| {
-        Status::internal(format!("Could not convert to Dictionary: {:?}", e))
-    })
-}
-
-async fn save_uploaded_chunks(
-    uploaded_chunks: Arc<Mutex<HashMap<String, IntegrationDataset>>>,
-    schema_ref: Arc<Schema>,
-    mut input_stream: Streaming<FlightData>,
-    mut response_tx: mpsc::Sender<Result<PutResult, Status>>,
-    schema: Schema,
-    key: String,
-) -> Result<(), Status> {
-    let mut chunks = vec![];
-    let mut uploaded_chunks = uploaded_chunks.lock().await;
-
-    let mut dictionaries_by_field = vec![None; schema_ref.fields().len()];
-
-    while let Some(Ok(data)) = input_stream.next().await {
-        let message = arrow::ipc::root_as_message(&data.data_header[..])
-            .map_err(|e| Status::internal(format!("Could not parse message: {:?}", e)))?;
-
-        match message.header_type() {
-            ipc::MessageHeader::Schema => {
-                return Err(Status::internal(
-                    "Not expecting a schema when messages are read",
-                ))
-            }
-            ipc::MessageHeader::RecordBatch => {
-                send_app_metadata(&mut response_tx, &data.app_metadata).await?;
-
-                let batch = record_batch_from_message(
-                    message,
-                    &data.data_body,
-                    schema_ref.clone(),
-                    &dictionaries_by_field,
-                )
-                .await?;
-
-                chunks.push(batch);
-            }
-            ipc::MessageHeader::DictionaryBatch => {
-                dictionary_from_message(
-                    message,
-                    &data.data_body,
-                    schema_ref.clone(),
-                    &mut dictionaries_by_field,
-                )
-                .await?;
-            }
-            t => {
-                return Err(Status::internal(format!(
-                    "Reading types other than record batches not yet supported, \
-                                              unable to read {:?}",
-                    t
-                )));
-            }
-        }
-    }
-
-    let dataset = IntegrationDataset { schema, chunks };
-    uploaded_chunks.insert(key, dataset);
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios/middleware.rs b/rust/integration-testing/src/flight_server_scenarios/middleware.rs
deleted file mode 100644
index 1416acc4088..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios/middleware.rs
+++ /dev/null
@@ -1,150 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
-    PutResult, SchemaResult, Ticket,
-};
-use futures::Stream;
-use tonic::{transport::Server, Request, Response, Status, Streaming};
-
-type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn scenario_setup(port: &str) -> Result {
-    let service = MiddlewareScenarioImpl {};
-    let svc = FlightServiceServer::new(service);
-    let addr = super::listen_on(port).await?;
-
-    let server = Server::builder().add_service(svc).serve(addr);
-
-    // NOTE: Log output used in tests to signal server is ready
-    println!("Server listening on localhost:{}", addr.port());
-    server.await?;
-    Ok(())
-}
-
-#[derive(Clone, Default)]
-pub struct MiddlewareScenarioImpl {}
-
-#[tonic::async_trait]
-impl FlightService for MiddlewareScenarioImpl {
-    type HandshakeStream = TonicStream<Result<HandshakeResponse, Status>>;
-    type ListFlightsStream = TonicStream<Result<FlightInfo, Status>>;
-    type DoGetStream = TonicStream<Result<FlightData, Status>>;
-    type DoPutStream = TonicStream<Result<PutResult, Status>>;
-    type DoActionStream = TonicStream<Result<arrow_flight::Result, Status>>;
-    type ListActionsStream = TonicStream<Result<ActionType, Status>>;
-    type DoExchangeStream = TonicStream<Result<FlightData, Status>>;
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        _request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        let middleware_header = request.metadata().get("x-middleware").cloned();
-
-        let descriptor = request.into_inner();
-
-        if descriptor.r#type == DescriptorType::Cmd as i32 && descriptor.cmd == b"success"
-        {
-            // Return a fake location - the test doesn't read it
-            let endpoint = super::endpoint("foo", "grpc+tcp://localhost:10010");
-
-            let info = FlightInfo {
-                flight_descriptor: Some(descriptor),
-                endpoint: vec![endpoint],
-                ..Default::default()
-            };
-
-            let mut response = Response::new(info);
-            if let Some(value) = middleware_header {
-                response.metadata_mut().insert("x-middleware", value);
-            }
-
-            return Ok(response);
-        }
-
-        let mut status = Status::unknown("Unknown");
-        if let Some(value) = middleware_header {
-            status.metadata_mut().insert("x-middleware", value);
-        }
-
-        Err(status)
-    }
-
-    async fn do_put(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
diff --git a/rust/integration-testing/src/lib.rs b/rust/integration-testing/src/lib.rs
deleted file mode 100644
index 22eed0395c5..00000000000
--- a/rust/integration-testing/src/lib.rs
+++ /dev/null
@@ -1,601 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common code used in the integration test binaries
-
-use hex::decode;
-use serde_json::Value;
-
-use arrow::util::integration_util::ArrowJsonBatch;
-
-use arrow::array::*;
-use arrow::datatypes::{DataType, Field, IntervalUnit, Schema};
-use arrow::error::{ArrowError, Result};
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    buffer::Buffer,
-    buffer::MutableBuffer,
-    datatypes::ToByteSlice,
-    util::{bit_util, integration_util::*},
-};
-
-use std::collections::HashMap;
-use std::fs::File;
-use std::io::BufReader;
-use std::sync::Arc;
-
-/// The expected username for the basic auth integration test.
-pub const AUTH_USERNAME: &str = "arrow";
-/// The expected password for the basic auth integration test.
-pub const AUTH_PASSWORD: &str = "flight";
-
-pub mod flight_client_scenarios;
-pub mod flight_server_scenarios;
-
-pub struct ArrowFile {
-    pub schema: Schema,
-    // we can evolve this into a concrete Arrow type
-    // this is temporarily not being read from
-    pub _dictionaries: HashMap<i64, ArrowJsonDictionaryBatch>,
-    pub batches: Vec<RecordBatch>,
-}
-
-pub fn read_json_file(json_name: &str) -> Result<ArrowFile> {
-    let json_file = File::open(json_name)?;
-    let reader = BufReader::new(json_file);
-    let arrow_json: Value = serde_json::from_reader(reader).unwrap();
-    let schema = Schema::from(&arrow_json["schema"])?;
-    // read dictionaries
-    let mut dictionaries = HashMap::new();
-    if let Some(dicts) = arrow_json.get("dictionaries") {
-        for d in dicts
-            .as_array()
-            .expect("Unable to get dictionaries as array")
-        {
-            let json_dict: ArrowJsonDictionaryBatch = serde_json::from_value(d.clone())
-                .expect("Unable to get dictionary from JSON");
-            // TODO: convert to a concrete Arrow type
-            dictionaries.insert(json_dict.id, json_dict);
-        }
-    }
-
-    let mut batches = vec![];
-    for b in arrow_json["batches"].as_array().unwrap() {
-        let json_batch: ArrowJsonBatch = serde_json::from_value(b.clone()).unwrap();
-        let batch = record_batch_from_json(&schema, json_batch, Some(&dictionaries))?;
-        batches.push(batch);
-    }
-    Ok(ArrowFile {
-        schema,
-        _dictionaries: dictionaries,
-        batches,
-    })
-}
-
-fn record_batch_from_json(
-    schema: &Schema,
-    json_batch: ArrowJsonBatch,
-    json_dictionaries: Option<&HashMap<i64, ArrowJsonDictionaryBatch>>,
-) -> Result<RecordBatch> {
-    let mut columns = vec![];
-
-    for (field, json_col) in schema.fields().iter().zip(json_batch.columns) {
-        let col = array_from_json(field, json_col, json_dictionaries)?;
-        columns.push(col);
-    }
-
-    RecordBatch::try_new(Arc::new(schema.clone()), columns)
-}
-
-/// Construct an Arrow array from a partially typed JSON column
-fn array_from_json(
-    field: &Field,
-    json_col: ArrowJsonColumn,
-    dictionaries: Option<&HashMap<i64, ArrowJsonDictionaryBatch>>,
-) -> Result<ArrayRef> {
-    match field.data_type() {
-        DataType::Null => Ok(Arc::new(NullArray::new(json_col.count))),
-        DataType::Boolean => {
-            let mut b = BooleanBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_bool().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Int8 => {
-            let mut b = Int8Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_i64().ok_or_else(|| {
-                        ArrowError::JsonError(format!(
-                            "Unable to get {:?} as int64",
-                            value
-                        ))
-                    })? as i8),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Int16 => {
-            let mut b = Int16Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_i64().unwrap() as i16),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Int32
-        | DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => {
-            let mut b = Int32Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_i64().unwrap() as i32),
-                    _ => b.append_null(),
-                }?;
-            }
-            let array = Arc::new(b.finish()) as ArrayRef;
-            arrow::compute::cast(&array, field.data_type())
-        }
-        DataType::Int64
-        | DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_)
-        | DataType::Interval(IntervalUnit::DayTime) => {
-            let mut b = Int64Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(match value {
-                        Value::Number(n) => n.as_i64().unwrap(),
-                        Value::String(s) => {
-                            s.parse().expect("Unable to parse string as i64")
-                        }
-                        _ => panic!("Unable to parse {:?} as number", value),
-                    }),
-                    _ => b.append_null(),
-                }?;
-            }
-            let array = Arc::new(b.finish()) as ArrayRef;
-            arrow::compute::cast(&array, field.data_type())
-        }
-        DataType::UInt8 => {
-            let mut b = UInt8Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_u64().unwrap() as u8),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::UInt16 => {
-            let mut b = UInt16Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_u64().unwrap() as u16),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::UInt32 => {
-            let mut b = UInt32Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_u64().unwrap() as u32),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::UInt64 => {
-            let mut b = UInt64Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(
-                        value
-                            .as_str()
-                            .unwrap()
-                            .parse()
-                            .expect("Unable to parse string as u64"),
-                    ),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Float32 => {
-            let mut b = Float32Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_f64().unwrap() as f32),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Float64 => {
-            let mut b = Float64Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_f64().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Binary => {
-            let mut b = BinaryBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => {
-                        let v = decode(value.as_str().unwrap()).unwrap();
-                        b.append_value(&v)
-                    }
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::LargeBinary => {
-            let mut b = LargeBinaryBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => {
-                        let v = decode(value.as_str().unwrap()).unwrap();
-                        b.append_value(&v)
-                    }
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Utf8 => {
-            let mut b = StringBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_str().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::LargeUtf8 => {
-            let mut b = LargeStringBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_str().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::FixedSizeBinary(len) => {
-            let mut b = FixedSizeBinaryBuilder::new(json_col.count, *len);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => {
-                        let v = hex::decode(value.as_str().unwrap()).unwrap();
-                        b.append_value(&v)
-                    }
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::List(child_field) => {
-            let null_buf = create_null_buf(&json_col);
-            let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                &child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
-            let offsets: Vec<i32> = json_col
-                .offset
-                .unwrap()
-                .iter()
-                .map(|v| v.as_i64().unwrap() as i32)
-                .collect();
-            let list_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .offset(0)
-                .add_buffer(Buffer::from(&offsets.to_byte_slice()))
-                .add_child_data(child_array.data().clone())
-                .null_bit_buffer(null_buf)
-                .build();
-            Ok(Arc::new(ListArray::from(list_data)))
-        }
-        DataType::LargeList(child_field) => {
-            let null_buf = create_null_buf(&json_col);
-            let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                &child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
-            let offsets: Vec<i64> = json_col
-                .offset
-                .unwrap()
-                .iter()
-                .map(|v| match v {
-                    Value::Number(n) => n.as_i64().unwrap(),
-                    Value::String(s) => s.parse::<i64>().unwrap(),
-                    _ => panic!("64-bit offset must be either string or number"),
-                })
-                .collect();
-            let list_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .offset(0)
-                .add_buffer(Buffer::from(&offsets.to_byte_slice()))
-                .add_child_data(child_array.data().clone())
-                .null_bit_buffer(null_buf)
-                .build();
-            Ok(Arc::new(LargeListArray::from(list_data)))
-        }
-        DataType::FixedSizeList(child_field, _) => {
-            let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                &child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
-            let null_buf = create_null_buf(&json_col);
-            let list_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .add_child_data(child_array.data().clone())
-                .null_bit_buffer(null_buf)
-                .build();
-            Ok(Arc::new(FixedSizeListArray::from(list_data)))
-        }
-        DataType::Struct(fields) => {
-            // construct struct with null data
-            let null_buf = create_null_buf(&json_col);
-            let mut array_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .null_bit_buffer(null_buf);
-
-            for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
-                let array = array_from_json(field, col, dictionaries)?;
-                array_data = array_data.add_child_data(array.data().clone());
-            }
-
-            let array = StructArray::from(array_data.build());
-            Ok(Arc::new(array))
-        }
-        DataType::Dictionary(key_type, value_type) => {
-            let dict_id = field.dict_id().ok_or_else(|| {
-                ArrowError::JsonError(format!(
-                    "Unable to find dict_id for field {:?}",
-                    field
-                ))
-            })?;
-            // find dictionary
-            let dictionary = dictionaries
-                .ok_or_else(|| {
-                    ArrowError::JsonError(format!(
-                        "Unable to find any dictionaries for field {:?}",
-                        field
-                    ))
-                })?
-                .get(&dict_id);
-            match dictionary {
-                Some(dictionary) => dictionary_array_from_json(
-                    field, json_col, key_type, value_type, dictionary,
-                ),
-                None => Err(ArrowError::JsonError(format!(
-                    "Unable to find dictionary for field {:?}",
-                    field
-                ))),
-            }
-        }
-        t => Err(ArrowError::JsonError(format!(
-            "data type {:?} not supported",
-            t
-        ))),
-    }
-}
-
-fn dictionary_array_from_json(
-    field: &Field,
-    json_col: ArrowJsonColumn,
-    dict_key: &DataType,
-    dict_value: &DataType,
-    dictionary: &ArrowJsonDictionaryBatch,
-) -> Result<ArrayRef> {
-    match dict_key {
-        DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::Int64
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64 => {
-            let null_buf = create_null_buf(&json_col);
-
-            // build the key data into a buffer, then construct values separately
-            let key_field = Field::new_dict(
-                "key",
-                dict_key.clone(),
-                field.is_nullable(),
-                field
-                    .dict_id()
-                    .expect("Dictionary fields must have a dict_id value"),
-                field
-                    .dict_is_ordered()
-                    .expect("Dictionary fields must have a dict_is_ordered value"),
-            );
-            let keys = array_from_json(&key_field, json_col, None)?;
-            // note: not enough info on nullability of dictionary
-            let value_field = Field::new("value", dict_value.clone(), true);
-            println!("dictionary value type: {:?}", dict_value);
-            let values =
-                array_from_json(&value_field, dictionary.data.columns[0].clone(), None)?;
-
-            // convert key and value to dictionary data
-            let dict_data = ArrayData::builder(field.data_type().clone())
-                .len(keys.len())
-                .add_buffer(keys.data().buffers()[0].clone())
-                .null_bit_buffer(null_buf)
-                .add_child_data(values.data().clone())
-                .build();
-
-            let array = match dict_key {
-                DataType::Int8 => {
-                    Arc::new(Int8DictionaryArray::from(dict_data)) as ArrayRef
-                }
-                DataType::Int16 => Arc::new(Int16DictionaryArray::from(dict_data)),
-                DataType::Int32 => Arc::new(Int32DictionaryArray::from(dict_data)),
-                DataType::Int64 => Arc::new(Int64DictionaryArray::from(dict_data)),
-                DataType::UInt8 => Arc::new(UInt8DictionaryArray::from(dict_data)),
-                DataType::UInt16 => Arc::new(UInt16DictionaryArray::from(dict_data)),
-                DataType::UInt32 => Arc::new(UInt32DictionaryArray::from(dict_data)),
-                DataType::UInt64 => Arc::new(UInt64DictionaryArray::from(dict_data)),
-                _ => unreachable!(),
-            };
-            Ok(array)
-        }
-        _ => Err(ArrowError::JsonError(format!(
-            "Dictionary key type {:?} not supported",
-            dict_key
-        ))),
-    }
-}
-
-/// A helper to create a null buffer from a Vec<bool>
-fn create_null_buf(json_col: &ArrowJsonColumn) -> Buffer {
-    let num_bytes = bit_util::ceil(json_col.count, 8);
-    let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
-    json_col
-        .validity
-        .clone()
-        .unwrap()
-        .iter()
-        .enumerate()
-        .for_each(|(i, v)| {
-            let null_slice = null_buf.as_slice_mut();
-            if *v != 0 {
-                bit_util::set_bit(null_slice, i);
-            }
-        });
-    null_buf.into()
-}
diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml
deleted file mode 100644
index e171196fbc8..00000000000
--- a/rust/parquet/Cargo.toml
+++ /dev/null
@@ -1,78 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "parquet"
-version = "4.0.0-SNAPSHOT"
-license = "Apache-2.0"
-description = "Apache Parquet implementation in Rust"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-keywords = [ "arrow", "parquet", "hadoop" ]
-readme = "README.md"
-build = "build.rs"
-edition = "2018"
-
-[dependencies]
-# update note: pin `parquet-format` to specific version until it does not break at minor
-# version, see ARROW-11187.
-parquet-format = "~2.6.1"
-byteorder = "1"
-thrift = "0.13"
-snap = { version = "1.0", optional = true }
-brotli = { version = "3.3", optional = true }
-flate2 = { version = "1.0", optional = true }
-lz4 = { version = "1.23", optional = true }
-zstd = { version = "0.7", optional = true }
-chrono = "0.4"
-num-bigint = "0.3"
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT", optional = true }
-base64 = { version = "0.12", optional = true }
-clap = { version = "2.33.3", optional = true }
-serde_json = { version = "1.0", features = ["preserve_order"], optional = true }
-
-[dev-dependencies]
-criterion = "0.3"
-rand = "0.8"
-snap = "1.0"
-brotli = "3.3"
-flate2 = "1.0"
-lz4 = "1.23"
-zstd = "0.7"
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT" }
-serde_json = { version = "1.0", features = ["preserve_order"] }
-
-[features]
-default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
-cli = ["serde_json", "base64", "clap"]
-
-[[ bin ]]
-name = "parquet-read"
-required-features = ["cli"]
-
-[[ bin ]]
-name = "parquet-schema"
-required-features = ["cli"]
-
-[[ bin ]]
-name = "parquet-rowcount"
-required-features = ["cli"]
-
-[[bench]]
-name = "arrow_writer"
-harness = false
diff --git a/rust/parquet/README.md b/rust/parquet/README.md
deleted file mode 100644
index 6abbbc7aaee..00000000000
--- a/rust/parquet/README.md
+++ /dev/null
@@ -1,126 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# An Apache Parquet implementation in Rust
-
-## Usage
-Add this to your Cargo.toml:
-```toml
-[dependencies]
-parquet = "4.0.0-SNAPSHOT"
-```
-
-and this to your crate root:
-```rust
-extern crate parquet;
-```
-
-Example usage of reading data:
-```rust
-use std::fs::File;
-use std::path::Path;
-use parquet::file::reader::{FileReader, SerializedFileReader};
-
-let file = File::open(&Path::new("/path/to/file")).unwrap();
-let reader = SerializedFileReader::new(file).unwrap();
-let mut iter = reader.get_row_iter(None).unwrap();
-while let Some(record) = iter.next() {
-    println!("{}", record);
-}
-```
-See [crate documentation](https://docs.rs/crate/parquet/4.0.0-SNAPSHOT) on available API.
-
-## Upgrading from versions prior to 4.0
-
-If you are upgrading from version 3.0 or previous of this crate, you
-likely need to change your code to use [`ConvertedType`] rather than
-[`LogicalType`] to preserve existing behaviour in your code.
-
-Version 2.4.0 of the Parquet format introduced a `LogicalType` to replace the existing `ConvertedType`.
-This crate used `parquet::basic::LogicalType` to map to the `ConvertedType`, but this has been renamed to `parquet::basic::ConvertedType` from version 4.0 of this crate.
-
-The `ConvertedType` is deprecated in the format, but is still written
-to preserve backward compatibility.
-It is preferred that `LogicalType` is used, as it supports nanosecond
-precision timestamps without using the deprecated `Int96` Parquet type.
-
-## Supported Parquet Version
-- Parquet-format 2.6.0
-
-To update Parquet format to a newer version, check if [parquet-format](https://github.com/sunchao/parquet-format-rs)
-version is available. Then simply update version of `parquet-format` crate in Cargo.toml.
-
-## Features
-- [X] All encodings supported
-- [X] All compression codecs supported
-- [X] Read support
-  - [X] Primitive column value readers
-  - [X] Row record reader
-  - [X] Arrow record reader
-- [ ] Statistics support
-- [X] Write support
-  - [X] Primitive column value writers
-  - [ ] Row record writer
-  - [X] Arrow record writer
-- [ ] Predicate pushdown
-- [X] Parquet format 2.6.0 support
-
-## Requirements
-
-Parquet requires LLVM.  Our windows CI image includes LLVM but to build the libraries locally windows
-users will have to install LLVM. Follow [this](https://github.com/appveyor/ci/issues/2651) link for info.
-
-## Build
-Run `cargo build` or `cargo build --release` to build in release mode.
-Some features take advantage of SSE4.2 instructions, which can be
-enabled by adding `RUSTFLAGS="-C target-feature=+sse4.2"` before the
-`cargo build` command.
-
-## Test
-Run `cargo test` for unit tests. To also run tests related to the binaries, use `cargo test --features cli`.
-
-## Binaries
-The following binaries are provided (use `cargo install --features cli` to install them):
-- **parquet-schema** for printing Parquet file schema and metadata.
-`Usage: parquet-schema <file-path>`, where `file-path` is the path to a Parquet file. Use `-v/--verbose` flag
-to print full metadata or schema only (when not specified only schema will be printed).
-
-- **parquet-read** for reading records from a Parquet file.
-`Usage: parquet-read <file-path> [num-records]`, where `file-path` is the path to a Parquet file,
-and `num-records` is the number of records to read from a file (when not specified all records will
-be printed). Use `-j/--json` to print records in JSON lines format.
-
-- **parquet-rowcount** for reporting the number of records in one or more Parquet files.
-`Usage: parquet-rowcount <file-paths>...`, where `<file-paths>...` is a space separated list of one or more
-files to read.
-
-If you see `Library not loaded` error, please make sure `LD_LIBRARY_PATH` is set properly:
-```
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(rustc --print sysroot)/lib
-```
-
-## Benchmarks
-Run `cargo bench` for benchmarks.
-
-## Docs
-To build documentation, run `cargo doc --no-deps`.
-To compile and view in the browser, run `cargo doc --no-deps --open`.
-
-## License
-Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
diff --git a/rust/parquet/benches/arrow_writer.rs b/rust/parquet/benches/arrow_writer.rs
deleted file mode 100644
index 069ed39d103..00000000000
--- a/rust/parquet/benches/arrow_writer.rs
+++ /dev/null
@@ -1,202 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::{Criterion, Throughput};
-
-extern crate arrow;
-extern crate parquet;
-
-use std::sync::Arc;
-
-use arrow::datatypes::*;
-use arrow::{record_batch::RecordBatch, util::data_gen::*};
-use parquet::{
-    arrow::ArrowWriter, errors::Result, file::writer::InMemoryWriteableCursor,
-};
-
-fn create_primitive_bench_batch(
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<RecordBatch> {
-    let fields = vec![
-        Field::new("_1", DataType::Int8, true),
-        Field::new("_2", DataType::Int16, true),
-        Field::new("_3", DataType::Int32, true),
-        Field::new("_4", DataType::Int64, true),
-        Field::new("_5", DataType::UInt8, true),
-        Field::new("_6", DataType::UInt16, true),
-        Field::new("_7", DataType::UInt32, true),
-        Field::new("_8", DataType::UInt64, true),
-        Field::new("_9", DataType::Float32, true),
-        Field::new("_10", DataType::Float64, true),
-        Field::new("_11", DataType::Date32, true),
-        Field::new("_12", DataType::Date64, true),
-        Field::new("_13", DataType::Time32(TimeUnit::Second), true),
-        Field::new("_14", DataType::Time32(TimeUnit::Millisecond), true),
-        Field::new("_15", DataType::Time64(TimeUnit::Microsecond), true),
-        Field::new("_16", DataType::Time64(TimeUnit::Nanosecond), true),
-        Field::new("_17", DataType::Utf8, true),
-        Field::new("_18", DataType::LargeUtf8, true),
-        Field::new("_19", DataType::Boolean, true),
-    ];
-    let schema = Schema::new(fields);
-    Ok(create_random_batch(
-        Arc::new(schema),
-        size,
-        null_density,
-        true_density,
-    )?)
-}
-
-fn _create_nested_bench_batch(
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<RecordBatch> {
-    let fields = vec![
-        Field::new(
-            "_1",
-            DataType::Struct(vec![
-                Field::new("_1", DataType::Int8, true),
-                Field::new(
-                    "_2",
-                    DataType::Struct(vec![
-                        Field::new("_1", DataType::Int8, true),
-                        Field::new(
-                            "_1",
-                            DataType::Struct(vec![
-                                Field::new("_1", DataType::Int8, true),
-                                Field::new("_2", DataType::Utf8, true),
-                            ]),
-                            true,
-                        ),
-                        Field::new("_2", DataType::UInt8, true),
-                    ]),
-                    true,
-                ),
-            ]),
-            true,
-        ),
-        Field::new(
-            "_2",
-            DataType::LargeList(Box::new(Field::new(
-                "item",
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    DataType::Struct(vec![
-                        Field::new(
-                            "_1",
-                            DataType::Struct(vec![
-                                Field::new("_1", DataType::Int8, true),
-                                Field::new("_2", DataType::Int16, true),
-                                Field::new("_3", DataType::Int32, true),
-                            ]),
-                            true,
-                        ),
-                        Field::new(
-                            "_2",
-                            DataType::List(Box::new(Field::new(
-                                "",
-                                DataType::FixedSizeBinary(2),
-                                true,
-                            ))),
-                            true,
-                        ),
-                    ]),
-                    true,
-                ))),
-                true,
-            ))),
-            true,
-        ),
-    ];
-    let schema = Schema::new(fields);
-    Ok(create_random_batch(
-        Arc::new(schema),
-        size,
-        null_density,
-        true_density,
-    )?)
-}
-
-#[inline]
-fn write_batch(batch: &RecordBatch) -> Result<()> {
-    // Write batch to an in-memory writer
-    let cursor = InMemoryWriteableCursor::default();
-    let mut writer = ArrowWriter::try_new(cursor, batch.schema(), None)?;
-
-    writer.write(&batch)?;
-    writer.close()?;
-    Ok(())
-}
-
-fn bench_primitive_writer(c: &mut Criterion) {
-    let batch = create_primitive_bench_batch(1024, 0.25, 0.75).unwrap();
-    let mut group = c.benchmark_group("write_batch primitive");
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("1024 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    let batch = create_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    group.finish();
-}
-
-// This bench triggers a write error, it is ignored for now
-fn _bench_nested_writer(c: &mut Criterion) {
-    let batch = _create_nested_bench_batch(1024, 0.25, 0.75).unwrap();
-    let mut group = c.benchmark_group("write_batch nested");
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("1024 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    let batch = create_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    group.finish();
-}
-
-criterion_group!(benches, bench_primitive_writer);
-criterion_main!(benches);
diff --git a/rust/parquet/build.rs b/rust/parquet/build.rs
deleted file mode 100644
index b42b2a4babf..00000000000
--- a/rust/parquet/build.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::process::Command;
-
-fn main() {
-    // Set Parquet version, build hash and "created by" string.
-    let version = env!("CARGO_PKG_VERSION");
-    let mut created_by = format!("parquet-rs version {}", version);
-    if let Ok(git_hash) = run(Command::new("git").arg("rev-parse").arg("HEAD")) {
-        created_by.push_str(format!(" (build {})", git_hash).as_str());
-        println!("cargo:rustc-env=PARQUET_BUILD={}", git_hash);
-    }
-    println!("cargo:rustc-env=PARQUET_VERSION={}", version);
-    println!("cargo:rustc-env=PARQUET_CREATED_BY={}", created_by);
-}
-
-/// Runs command and returns either content of stdout for successful execution,
-/// or an error message otherwise.
-fn run(command: &mut Command) -> Result<String, String> {
-    println!("Running: `{:?}`", command);
-    match command.output() {
-        Ok(ref output) if output.status.success() => {
-            Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
-        }
-        Ok(ref output) => Err(format!("Failed: `{:?}` ({})", command, output.status)),
-        Err(error) => Err(format!("Failed: `{:?}` ({})", command, error)),
-    }
-}
diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs
deleted file mode 100644
index a906147a8f9..00000000000
--- a/rust/parquet/src/arrow/array_reader.rs
+++ /dev/null
@@ -1,2530 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::cmp::{max, min};
-use std::collections::{HashMap, HashSet};
-use std::marker::PhantomData;
-use std::mem::size_of;
-use std::result::Result::Ok;
-use std::sync::Arc;
-use std::vec::Vec;
-
-use arrow::array::{
-    new_empty_array, Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryArray,
-    BinaryBuilder, BooleanArray, BooleanBufferBuilder, BooleanBuilder, DecimalBuilder,
-    FixedSizeBinaryArray, FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder,
-    Int32Array, Int64Array, OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder,
-    StringArray, StringBuilder, StructArray,
-};
-use arrow::buffer::{Buffer, MutableBuffer};
-use arrow::datatypes::{
-    ArrowPrimitiveType, BooleanType as ArrowBooleanType, DataType as ArrowType,
-    Date32Type as ArrowDate32Type, Date64Type as ArrowDate64Type,
-    DurationMicrosecondType as ArrowDurationMicrosecondType,
-    DurationMillisecondType as ArrowDurationMillisecondType,
-    DurationNanosecondType as ArrowDurationNanosecondType,
-    DurationSecondType as ArrowDurationSecondType, Field,
-    Float32Type as ArrowFloat32Type, Float64Type as ArrowFloat64Type,
-    Int16Type as ArrowInt16Type, Int32Type as ArrowInt32Type,
-    Int64Type as ArrowInt64Type, Int8Type as ArrowInt8Type, IntervalUnit, Schema,
-    Time32MillisecondType as ArrowTime32MillisecondType,
-    Time32SecondType as ArrowTime32SecondType,
-    Time64MicrosecondType as ArrowTime64MicrosecondType,
-    Time64NanosecondType as ArrowTime64NanosecondType, TimeUnit as ArrowTimeUnit,
-    TimestampMicrosecondType as ArrowTimestampMicrosecondType,
-    TimestampMillisecondType as ArrowTimestampMillisecondType,
-    TimestampNanosecondType as ArrowTimestampNanosecondType,
-    TimestampSecondType as ArrowTimestampSecondType, ToByteSlice,
-    UInt16Type as ArrowUInt16Type, UInt32Type as ArrowUInt32Type,
-    UInt64Type as ArrowUInt64Type, UInt8Type as ArrowUInt8Type,
-};
-use arrow::util::bit_util;
-
-use crate::arrow::converter::{
-    BinaryArrayConverter, BinaryConverter, Converter, DecimalArrayConverter,
-    DecimalConverter, FixedLenBinaryConverter, FixedSizeArrayConverter,
-    Int96ArrayConverter, Int96Converter, IntervalDayTimeArrayConverter,
-    IntervalDayTimeConverter, IntervalYearMonthArrayConverter,
-    IntervalYearMonthConverter, LargeBinaryArrayConverter, LargeBinaryConverter,
-    LargeUtf8ArrayConverter, LargeUtf8Converter, Utf8ArrayConverter, Utf8Converter,
-};
-use crate::arrow::record_reader::RecordReader;
-use crate::arrow::schema::parquet_to_arrow_field;
-use crate::basic::{ConvertedType, Repetition, Type as PhysicalType};
-use crate::column::page::PageIterator;
-use crate::column::reader::ColumnReaderImpl;
-use crate::data_type::{
-    BoolType, ByteArrayType, DataType, DoubleType, FixedLenByteArrayType, FloatType,
-    Int32Type, Int64Type, Int96Type,
-};
-use crate::errors::{ParquetError, ParquetError::ArrowError, Result};
-use crate::file::reader::{FilePageIterator, FileReader};
-use crate::schema::types::{
-    ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, Type, TypePtr,
-};
-use crate::schema::visitor::TypeVisitor;
-use std::any::Any;
-
-/// Array reader reads parquet data into arrow array.
-pub trait ArrayReader {
-    fn as_any(&self) -> &dyn Any;
-
-    /// Returns the arrow type of this array reader.
-    fn get_data_type(&self) -> &ArrowType;
-
-    /// Reads at most `batch_size` records into an arrow array and return it.
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef>;
-
-    /// Returns the definition levels of data from last call of `next_batch`.
-    /// The result is used by parent array reader to calculate its own definition
-    /// levels and repetition levels, so that its parent can calculate null bitmap.
-    fn get_def_levels(&self) -> Option<&[i16]>;
-
-    /// Return the repetition levels of data from last call of `next_batch`.
-    /// The result is used by parent array reader to calculate its own definition
-    /// levels and repetition levels, so that its parent can calculate null bitmap.
-    fn get_rep_levels(&self) -> Option<&[i16]>;
-}
-
-/// A NullArrayReader reads Parquet columns stored as null int32s with an Arrow
-/// NullArray type.
-pub struct NullArrayReader<T: DataType> {
-    data_type: ArrowType,
-    pages: Box<dyn PageIterator>,
-    def_levels_buffer: Option<Buffer>,
-    rep_levels_buffer: Option<Buffer>,
-    column_desc: ColumnDescPtr,
-    record_reader: RecordReader<T>,
-    _type_marker: PhantomData<T>,
-}
-
-impl<T: DataType> NullArrayReader<T> {
-    /// Construct null array reader.
-    pub fn new(
-        mut pages: Box<dyn PageIterator>,
-        column_desc: ColumnDescPtr,
-    ) -> Result<Self> {
-        let mut record_reader = RecordReader::<T>::new(column_desc.clone());
-        if let Some(page_reader) = pages.next() {
-            record_reader.set_page_reader(page_reader?)?;
-        }
-
-        Ok(Self {
-            data_type: ArrowType::Null,
-            pages,
-            def_levels_buffer: None,
-            rep_levels_buffer: None,
-            column_desc,
-            record_reader,
-            _type_marker: PhantomData,
-        })
-    }
-}
-
-/// Implementation of primitive array reader.
-impl<T: DataType> ArrayReader for NullArrayReader<T> {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type of primitive array.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    /// Reads at most `batch_size` records into array.
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        let mut records_read = 0usize;
-        while records_read < batch_size {
-            let records_to_read = batch_size - records_read;
-
-            // NB can be 0 if at end of page
-            let records_read_once = self.record_reader.read_records(records_to_read)?;
-            records_read += records_read_once;
-
-            // Record reader exhausted
-            if records_read_once < records_to_read {
-                if let Some(page_reader) = self.pages.next() {
-                    // Read from new page reader
-                    self.record_reader.set_page_reader(page_reader?)?;
-                } else {
-                    // Page reader also exhausted
-                    break;
-                }
-            }
-        }
-
-        // convert to arrays
-        let array = arrow::array::NullArray::new(records_read);
-
-        // save definition and repetition buffers
-        self.def_levels_buffer = self.record_reader.consume_def_levels()?;
-        self.rep_levels_buffer = self.record_reader.consume_rep_levels()?;
-        self.record_reader.reset();
-        Ok(Arc::new(array))
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Primitive array readers are leaves of array reader tree. They accept page iterator
-/// and read them into primitive arrays.
-pub struct PrimitiveArrayReader<T: DataType> {
-    data_type: ArrowType,
-    pages: Box<dyn PageIterator>,
-    def_levels_buffer: Option<Buffer>,
-    rep_levels_buffer: Option<Buffer>,
-    column_desc: ColumnDescPtr,
-    record_reader: RecordReader<T>,
-    _type_marker: PhantomData<T>,
-}
-
-impl<T: DataType> PrimitiveArrayReader<T> {
-    /// Construct primitive array reader.
-    pub fn new(
-        mut pages: Box<dyn PageIterator>,
-        column_desc: ColumnDescPtr,
-        arrow_type: Option<ArrowType>,
-    ) -> Result<Self> {
-        // Check if Arrow type is specified, else create it from Parquet type
-        let data_type = match arrow_type {
-            Some(t) => t,
-            None => parquet_to_arrow_field(column_desc.as_ref())?
-                .data_type()
-                .clone(),
-        };
-
-        let mut record_reader = RecordReader::<T>::new(column_desc.clone());
-        if let Some(page_reader) = pages.next() {
-            record_reader.set_page_reader(page_reader?)?;
-        }
-
-        Ok(Self {
-            data_type,
-            pages,
-            def_levels_buffer: None,
-            rep_levels_buffer: None,
-            column_desc,
-            record_reader,
-            _type_marker: PhantomData,
-        })
-    }
-}
-
-/// Implementation of primitive array reader.
-impl<T: DataType> ArrayReader for PrimitiveArrayReader<T> {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type of primitive array.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    /// Reads at most `batch_size` records into array.
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        let mut records_read = 0usize;
-        while records_read < batch_size {
-            let records_to_read = batch_size - records_read;
-
-            // NB can be 0 if at end of page
-            let records_read_once = self.record_reader.read_records(records_to_read)?;
-            records_read += records_read_once;
-
-            // Record reader exhausted
-            if records_read_once < records_to_read {
-                if let Some(page_reader) = self.pages.next() {
-                    // Read from new page reader
-                    self.record_reader.set_page_reader(page_reader?)?;
-                } else {
-                    // Page reader also exhausted
-                    break;
-                }
-            }
-        }
-
-        let arrow_data_type = match T::get_physical_type() {
-            PhysicalType::BOOLEAN => ArrowBooleanType::DATA_TYPE,
-            PhysicalType::INT32 => ArrowInt32Type::DATA_TYPE,
-            PhysicalType::INT64 => ArrowInt64Type::DATA_TYPE,
-            PhysicalType::FLOAT => ArrowFloat32Type::DATA_TYPE,
-            PhysicalType::DOUBLE => ArrowFloat64Type::DATA_TYPE,
-            PhysicalType::INT96
-            | PhysicalType::BYTE_ARRAY
-            | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                unreachable!(
-                    "PrimitiveArrayReaders don't support complex physical types"
-                );
-            }
-        };
-
-        // Convert to arrays by using the Parquet phyisical type.
-        // The physical types are then cast to Arrow types if necessary
-
-        let mut record_data = self.record_reader.consume_record_data()?;
-
-        if T::get_physical_type() == PhysicalType::BOOLEAN {
-            let mut boolean_buffer = BooleanBufferBuilder::new(record_data.len());
-
-            for e in record_data.as_slice() {
-                boolean_buffer.append(*e > 0);
-            }
-            record_data = boolean_buffer.finish();
-        }
-
-        let mut array_data = ArrayDataBuilder::new(arrow_data_type)
-            .len(self.record_reader.num_values())
-            .add_buffer(record_data);
-
-        if let Some(b) = self.record_reader.consume_bitmap_buffer()? {
-            array_data = array_data.null_bit_buffer(b);
-        }
-
-        let array = match T::get_physical_type() {
-            PhysicalType::BOOLEAN => {
-                Arc::new(BooleanArray::from(array_data.build())) as ArrayRef
-            }
-            PhysicalType::INT32 => {
-                Arc::new(PrimitiveArray::<ArrowInt32Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::INT64 => {
-                Arc::new(PrimitiveArray::<ArrowInt64Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::FLOAT => {
-                Arc::new(PrimitiveArray::<ArrowFloat32Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::DOUBLE => {
-                Arc::new(PrimitiveArray::<ArrowFloat64Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::INT96
-            | PhysicalType::BYTE_ARRAY
-            | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                unreachable!(
-                    "PrimitiveArrayReaders don't support complex physical types"
-                );
-            }
-        };
-
-        // cast to Arrow type
-        // We make a strong assumption here that the casts should be infallible.
-        // If the cast fails because of incompatible datatypes, then there might
-        // be a bigger problem with how Arrow schemas are converted to Parquet.
-        //
-        // As there is not always a 1:1 mapping between Arrow and Parquet, there
-        // are datatypes which we must convert explicitly.
-        // These are:
-        // - date64: we should cast int32 to date32, then date32 to date64.
-        let target_type = self.get_data_type();
-        let array = match target_type {
-            ArrowType::Date64 => {
-                // this is cheap as it internally reinterprets the data
-                let a = arrow::compute::cast(&array, &ArrowType::Date32)?;
-                arrow::compute::cast(&a, target_type)?
-            }
-            ArrowType::Decimal(p, s) => {
-                let mut builder = DecimalBuilder::new(array.len(), *p, *s);
-                match array.data_type() {
-                    ArrowType::Int32 => {
-                        let values = array.as_any().downcast_ref::<Int32Array>().unwrap();
-                        for maybe_value in values.iter() {
-                            match maybe_value {
-                                Some(value) => builder.append_value(value as i128)?,
-                                None => builder.append_null()?,
-                            }
-                        }
-                    }
-                    ArrowType::Int64 => {
-                        let values = array.as_any().downcast_ref::<Int64Array>().unwrap();
-                        for maybe_value in values.iter() {
-                            match maybe_value {
-                                Some(value) => builder.append_value(value as i128)?,
-                                None => builder.append_null()?,
-                            }
-                        }
-                    }
-                    _ => {
-                        return Err(ArrowError(format!(
-                            "Cannot convert {:?} to decimal",
-                            array.data_type()
-                        )))
-                    }
-                }
-                Arc::new(builder.finish()) as ArrayRef
-            }
-            _ => arrow::compute::cast(&array, target_type)?,
-        };
-
-        // save definition and repetition buffers
-        self.def_levels_buffer = self.record_reader.consume_def_levels()?;
-        self.rep_levels_buffer = self.record_reader.consume_rep_levels()?;
-        self.record_reader.reset();
-        Ok(array)
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Primitive array readers are leaves of array reader tree. They accept page iterator
-/// and read them into primitive arrays.
-pub struct ComplexObjectArrayReader<T, C>
-where
-    T: DataType,
-    C: Converter<Vec<Option<T::T>>, ArrayRef> + 'static,
-{
-    data_type: ArrowType,
-    pages: Box<dyn PageIterator>,
-    def_levels_buffer: Option<Vec<i16>>,
-    rep_levels_buffer: Option<Vec<i16>>,
-    column_desc: ColumnDescPtr,
-    column_reader: Option<ColumnReaderImpl<T>>,
-    converter: C,
-    _parquet_type_marker: PhantomData<T>,
-    _converter_marker: PhantomData<C>,
-}
-
-impl<T, C> ArrayReader for ComplexObjectArrayReader<T, C>
-where
-    T: DataType,
-    C: Converter<Vec<Option<T::T>>, ArrayRef> + 'static,
-{
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        // Try to initialize column reader
-        if self.column_reader.is_none() {
-            self.next_column_reader()?;
-        }
-
-        let mut data_buffer: Vec<T::T> = Vec::with_capacity(batch_size);
-        data_buffer.resize_with(batch_size, T::T::default);
-
-        let mut def_levels_buffer = if self.column_desc.max_def_level() > 0 {
-            let mut buf: Vec<i16> = Vec::with_capacity(batch_size);
-            buf.resize_with(batch_size, || 0);
-            Some(buf)
-        } else {
-            None
-        };
-
-        let mut rep_levels_buffer = if self.column_desc.max_rep_level() > 0 {
-            let mut buf: Vec<i16> = Vec::with_capacity(batch_size);
-            buf.resize_with(batch_size, || 0);
-            Some(buf)
-        } else {
-            None
-        };
-
-        let mut num_read = 0;
-
-        while self.column_reader.is_some() && num_read < batch_size {
-            let num_to_read = batch_size - num_read;
-            let cur_data_buf = &mut data_buffer[num_read..];
-            let cur_def_levels_buf =
-                def_levels_buffer.as_mut().map(|b| &mut b[num_read..]);
-            let cur_rep_levels_buf =
-                rep_levels_buffer.as_mut().map(|b| &mut b[num_read..]);
-            let (data_read, levels_read) =
-                self.column_reader.as_mut().unwrap().read_batch(
-                    num_to_read,
-                    cur_def_levels_buf,
-                    cur_rep_levels_buf,
-                    cur_data_buf,
-                )?;
-
-            // Fill space
-            if levels_read > data_read {
-                def_levels_buffer.iter().for_each(|def_levels_buffer| {
-                    let (mut level_pos, mut data_pos) = (levels_read, data_read);
-                    while level_pos > 0 && data_pos > 0 {
-                        if def_levels_buffer[num_read + level_pos - 1]
-                            == self.column_desc.max_def_level()
-                        {
-                            cur_data_buf.swap(level_pos - 1, data_pos - 1);
-                            level_pos -= 1;
-                            data_pos -= 1;
-                        } else {
-                            level_pos -= 1;
-                        }
-                    }
-                });
-            }
-
-            let values_read = max(levels_read, data_read);
-            num_read += values_read;
-            // current page exhausted && page iterator exhausted
-            if values_read < num_to_read && !self.next_column_reader()? {
-                break;
-            }
-        }
-
-        data_buffer.truncate(num_read);
-        def_levels_buffer
-            .iter_mut()
-            .for_each(|buf| buf.truncate(num_read));
-        rep_levels_buffer
-            .iter_mut()
-            .for_each(|buf| buf.truncate(num_read));
-
-        self.def_levels_buffer = def_levels_buffer;
-        self.rep_levels_buffer = rep_levels_buffer;
-
-        let data: Vec<Option<T::T>> = if self.def_levels_buffer.is_some() {
-            data_buffer
-                .into_iter()
-                .zip(self.def_levels_buffer.as_ref().unwrap().iter())
-                .map(|(t, def_level)| {
-                    if *def_level == self.column_desc.max_def_level() {
-                        Some(t)
-                    } else {
-                        None
-                    }
-                })
-                .collect()
-        } else {
-            data_buffer.into_iter().map(Some).collect()
-        };
-
-        let mut array = self.converter.convert(data)?;
-
-        if let ArrowType::Dictionary(_, _) = self.data_type {
-            array = arrow::compute::cast(&array, &self.data_type)?;
-        }
-
-        Ok(array)
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_levels_buffer.as_deref()
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_levels_buffer.as_deref()
-    }
-}
-
-impl<T, C> ComplexObjectArrayReader<T, C>
-where
-    T: DataType,
-    C: Converter<Vec<Option<T::T>>, ArrayRef> + 'static,
-{
-    fn new(
-        pages: Box<dyn PageIterator>,
-        column_desc: ColumnDescPtr,
-        converter: C,
-        arrow_type: Option<ArrowType>,
-    ) -> Result<Self> {
-        let data_type = match arrow_type {
-            Some(t) => t,
-            None => parquet_to_arrow_field(column_desc.as_ref())?
-                .data_type()
-                .clone(),
-        };
-
-        Ok(Self {
-            data_type,
-            pages,
-            def_levels_buffer: None,
-            rep_levels_buffer: None,
-            column_desc,
-            column_reader: None,
-            converter,
-            _parquet_type_marker: PhantomData,
-            _converter_marker: PhantomData,
-        })
-    }
-
-    fn next_column_reader(&mut self) -> Result<bool> {
-        Ok(match self.pages.next() {
-            Some(page) => {
-                self.column_reader =
-                    Some(ColumnReaderImpl::<T>::new(self.column_desc.clone(), page?));
-                true
-            }
-            None => false,
-        })
-    }
-}
-
-/// Implementation of list array reader.
-pub struct ListArrayReader<OffsetSize: OffsetSizeTrait> {
-    item_reader: Box<dyn ArrayReader>,
-    data_type: ArrowType,
-    item_type: ArrowType,
-    list_def_level: i16,
-    list_rep_level: i16,
-    def_level_buffer: Option<Buffer>,
-    rep_level_buffer: Option<Buffer>,
-    _marker: PhantomData<OffsetSize>,
-}
-
-impl<OffsetSize: OffsetSizeTrait> ListArrayReader<OffsetSize> {
-    /// Construct list array reader.
-    pub fn new(
-        item_reader: Box<dyn ArrayReader>,
-        data_type: ArrowType,
-        item_type: ArrowType,
-        def_level: i16,
-        rep_level: i16,
-    ) -> Self {
-        Self {
-            item_reader,
-            data_type,
-            item_type,
-            list_def_level: def_level,
-            list_rep_level: rep_level,
-            def_level_buffer: None,
-            rep_level_buffer: None,
-            _marker: PhantomData,
-        }
-    }
-}
-
-macro_rules! remove_primitive_array_indices {
-    ($arr: expr, $item_type:ty, $indices:expr) => {{
-        let array_data = match $arr.as_any().downcast_ref::<PrimitiveArray<$item_type>>() {
-            Some(a) => a,
-            _ => return Err(ParquetError::General(format!("Error generating next batch for ListArray: {:?} cannot be downcast to PrimitiveArray", $arr))),
-        };
-        let mut builder = PrimitiveBuilder::<$item_type>::new($arr.len());
-        for i in 0..array_data.len() {
-            if !$indices.contains(&i) {
-                if array_data.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(array_data.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-macro_rules! remove_array_indices_custom_builder {
-    ($arr: expr, $array_type:ty, $item_builder:ident, $indices:expr) => {{
-        let array_data = match $arr.as_any().downcast_ref::<$array_type>() {
-            Some(a) => a,
-            _ => return Err(ParquetError::General(format!("Error generating next batch for ListArray: {:?} cannot be downcast to PrimitiveArray", $arr))),
-        };
-        let mut builder = $item_builder::new(array_data.len());
-
-        for i in 0..array_data.len() {
-            if !$indices.contains(&i) {
-                if array_data.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(array_data.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-macro_rules! remove_fixed_size_binary_array_indices {
-    ($arr: expr, $array_type:ty, $item_builder:ident, $indices:expr, $len:expr) => {{
-        let array_data = match $arr.as_any().downcast_ref::<$array_type>() {
-            Some(a) => a,
-            _ => return Err(ParquetError::General(format!("Error generating next batch for ListArray: {:?} cannot be downcast to PrimitiveArray", $arr))),
-        };
-        let mut builder = FixedSizeBinaryBuilder::new(array_data.len(), $len);
-        for i in 0..array_data.len() {
-            if !$indices.contains(&i) {
-                if array_data.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(array_data.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn remove_indices(
-    arr: ArrayRef,
-    item_type: ArrowType,
-    indices: Vec<usize>,
-) -> Result<ArrayRef> {
-    match item_type {
-        ArrowType::UInt8 => remove_primitive_array_indices!(arr, ArrowUInt8Type, indices),
-        ArrowType::UInt16 => {
-            remove_primitive_array_indices!(arr, ArrowUInt16Type, indices)
-        }
-        ArrowType::UInt32 => {
-            remove_primitive_array_indices!(arr, ArrowUInt32Type, indices)
-        }
-        ArrowType::UInt64 => {
-            remove_primitive_array_indices!(arr, ArrowUInt64Type, indices)
-        }
-        ArrowType::Int8 => remove_primitive_array_indices!(arr, ArrowInt8Type, indices),
-        ArrowType::Int16 => remove_primitive_array_indices!(arr, ArrowInt16Type, indices),
-        ArrowType::Int32 => remove_primitive_array_indices!(arr, ArrowInt32Type, indices),
-        ArrowType::Int64 => remove_primitive_array_indices!(arr, ArrowInt64Type, indices),
-        ArrowType::Float32 => {
-            remove_primitive_array_indices!(arr, ArrowFloat32Type, indices)
-        }
-        ArrowType::Float64 => {
-            remove_primitive_array_indices!(arr, ArrowFloat64Type, indices)
-        }
-        ArrowType::Boolean => {
-            remove_array_indices_custom_builder!(
-                arr,
-                BooleanArray,
-                BooleanBuilder,
-                indices
-            )
-        }
-        ArrowType::Date32 => {
-            remove_primitive_array_indices!(arr, ArrowDate32Type, indices)
-        }
-        ArrowType::Date64 => {
-            remove_primitive_array_indices!(arr, ArrowDate64Type, indices)
-        }
-        ArrowType::Time32(ArrowTimeUnit::Second) => {
-            remove_primitive_array_indices!(arr, ArrowTime32SecondType, indices)
-        }
-        ArrowType::Time32(ArrowTimeUnit::Millisecond) => {
-            remove_primitive_array_indices!(arr, ArrowTime32MillisecondType, indices)
-        }
-        ArrowType::Time64(ArrowTimeUnit::Microsecond) => {
-            remove_primitive_array_indices!(arr, ArrowTime64MicrosecondType, indices)
-        }
-        ArrowType::Time64(ArrowTimeUnit::Nanosecond) => {
-            remove_primitive_array_indices!(arr, ArrowTime64NanosecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Second) => {
-            remove_primitive_array_indices!(arr, ArrowDurationSecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Millisecond) => {
-            remove_primitive_array_indices!(arr, ArrowDurationMillisecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Microsecond) => {
-            remove_primitive_array_indices!(arr, ArrowDurationMicrosecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Nanosecond) => {
-            remove_primitive_array_indices!(arr, ArrowDurationNanosecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Second, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampSecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Millisecond, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampMillisecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Microsecond, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampMicrosecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Nanosecond, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampNanosecondType, indices)
-        }
-        ArrowType::Utf8 => {
-            remove_array_indices_custom_builder!(arr, StringArray, StringBuilder, indices)
-        }
-        ArrowType::Binary => {
-            remove_array_indices_custom_builder!(arr, BinaryArray, BinaryBuilder, indices)
-        }
-        ArrowType::FixedSizeBinary(size) => remove_fixed_size_binary_array_indices!(
-            arr,
-            FixedSizeBinaryArray,
-            FixedSizeBinaryBuilder,
-            indices,
-            size
-        ),
-        _ => Err(ParquetError::General(format!(
-            "ListArray of type List({:?}) is not supported by array_reader",
-            item_type
-        ))),
-    }
-}
-
-/// Implementation of ListArrayReader. Nested lists and lists of structs are not yet supported.
-impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type.
-    /// This must be a List.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        let next_batch_array = self.item_reader.next_batch(batch_size)?;
-        let item_type = self.item_reader.get_data_type().clone();
-
-        if next_batch_array.len() == 0 {
-            return Ok(new_empty_array(&self.data_type));
-        }
-        let def_levels = self
-            .item_reader
-            .get_def_levels()
-            .ok_or_else(|| ArrowError("item_reader def levels are None.".to_string()))?;
-        let rep_levels = self
-            .item_reader
-            .get_rep_levels()
-            .ok_or_else(|| ArrowError("item_reader rep levels are None.".to_string()))?;
-
-        if !((def_levels.len() == rep_levels.len())
-            && (rep_levels.len() == next_batch_array.len()))
-        {
-            return Err(ArrowError(
-                "Expected item_reader def_levels and rep_levels to be same length as batch".to_string(),
-            ));
-        }
-
-        // List definitions can be encoded as 4 values:
-        // - n + 0: the list slot is null
-        // - n + 1: the list slot is not null, but is empty (i.e. [])
-        // - n + 2: the list slot is not null, but its child is empty (i.e. [ null ])
-        // - n + 3: the list slot is not null, and its child is not empty
-        // Where n is the max definition level of the list's parent.
-        // If a Parquet schema's only leaf is the list, then n = 0.
-
-        // TODO: ARROW-10391 - add a test case with a non-nullable child, check if max is 3
-        let list_field_type = match self.get_data_type() {
-            ArrowType::List(field)
-            | ArrowType::FixedSizeList(field, _)
-            | ArrowType::LargeList(field) => field,
-            _ => {
-                // Panic: this is safe as we only write lists from list datatypes
-                unreachable!()
-            }
-        };
-        let max_list_def_range = if list_field_type.is_nullable() { 3 } else { 2 };
-        let max_list_definition = *(def_levels.iter().max().unwrap());
-        // TODO: ARROW-10391 - Find a reliable way of validating deeply-nested lists
-        // debug_assert!(
-        //     max_list_definition >= max_list_def_range,
-        //     "Lift definition max less than range"
-        // );
-        let list_null_def = max_list_definition - max_list_def_range;
-        let list_empty_def = max_list_definition - 1;
-        let mut null_list_indices: Vec<usize> = Vec::new();
-        for i in 0..def_levels.len() {
-            if def_levels[i] == list_null_def {
-                null_list_indices.push(i);
-            }
-        }
-        let batch_values = match null_list_indices.len() {
-            0 => next_batch_array.clone(),
-            _ => remove_indices(next_batch_array.clone(), item_type, null_list_indices)?,
-        };
-
-        // null list has def_level = 0
-        // empty list has def_level = 1
-        // null item in a list has def_level = 2
-        // non-null item has def_level = 3
-        // first item in each list has rep_level = 0, subsequent items have rep_level = 1
-
-        let mut offsets: Vec<OffsetSize> = Vec::new();
-        let mut cur_offset = OffsetSize::zero();
-        for i in 0..rep_levels.len() {
-            if rep_levels[i] == 0 {
-                offsets.push(cur_offset)
-            }
-            if def_levels[i] >= list_empty_def {
-                cur_offset += OffsetSize::one();
-            }
-        }
-        offsets.push(cur_offset);
-
-        let num_bytes = bit_util::ceil(offsets.len(), 8);
-        let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
-        let null_slice = null_buf.as_slice_mut();
-        let mut list_index = 0;
-        for i in 0..rep_levels.len() {
-            if rep_levels[i] == 0 && def_levels[i] != 0 {
-                bit_util::set_bit(null_slice, list_index);
-            }
-            if rep_levels[i] == 0 {
-                list_index += 1;
-            }
-        }
-        let value_offsets = Buffer::from(&offsets.to_byte_slice());
-
-        let list_data = ArrayData::builder(self.get_data_type().clone())
-            .len(offsets.len() - 1)
-            .add_buffer(value_offsets)
-            .add_child_data(batch_values.data().clone())
-            .null_bit_buffer(null_buf.into())
-            .offset(next_batch_array.offset())
-            .build();
-
-        let result_array = GenericListArray::<OffsetSize>::from(list_data);
-        Ok(Arc::new(result_array))
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Implementation of struct array reader.
-pub struct StructArrayReader {
-    children: Vec<Box<dyn ArrayReader>>,
-    data_type: ArrowType,
-    struct_def_level: i16,
-    struct_rep_level: i16,
-    def_level_buffer: Option<Buffer>,
-    rep_level_buffer: Option<Buffer>,
-}
-
-impl StructArrayReader {
-    /// Construct struct array reader.
-    pub fn new(
-        data_type: ArrowType,
-        children: Vec<Box<dyn ArrayReader>>,
-        def_level: i16,
-        rep_level: i16,
-    ) -> Self {
-        Self {
-            data_type,
-            children,
-            struct_def_level: def_level,
-            struct_rep_level: rep_level,
-            def_level_buffer: None,
-            rep_level_buffer: None,
-        }
-    }
-}
-
-impl ArrayReader for StructArrayReader {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type.
-    /// This must be a struct.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    /// Read `batch_size` struct records.
-    ///
-    /// Definition levels of struct array is calculated as following:
-    /// ```ignore
-    /// def_levels[i] = min(child1_def_levels[i], child2_def_levels[i], ...,
-    /// childn_def_levels[i]);
-    /// ```
-    ///
-    /// Repetition levels of struct array is calculated as following:
-    /// ```ignore
-    /// rep_levels[i] = child1_rep_levels[i];
-    /// ```
-    ///
-    /// The null bitmap of struct array is calculated from def_levels:
-    /// ```ignore
-    /// null_bitmap[i] = (def_levels[i] >= self.def_level);
-    /// ```
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        if self.children.is_empty() {
-            self.def_level_buffer = None;
-            self.rep_level_buffer = None;
-            return Ok(Arc::new(StructArray::from(Vec::new())));
-        }
-
-        let children_array = self
-            .children
-            .iter_mut()
-            .map(|reader| reader.next_batch(batch_size))
-            .try_fold(
-                Vec::new(),
-                |mut result, child_array| -> Result<Vec<ArrayRef>> {
-                    result.push(child_array?);
-                    Ok(result)
-                },
-            )?;
-
-        // check that array child data has same size
-        let children_array_len =
-            children_array.first().map(|arr| arr.len()).ok_or_else(|| {
-                general_err!("Struct array reader should have at least one child!")
-            })?;
-
-        let all_children_len_eq = children_array
-            .iter()
-            .all(|arr| arr.len() == children_array_len);
-        if !all_children_len_eq {
-            return Err(general_err!("Not all children array length are the same!"));
-        }
-
-        // calculate struct def level data
-        let buffer_size = children_array_len * size_of::<i16>();
-        let mut def_level_data_buffer = MutableBuffer::new(buffer_size);
-        def_level_data_buffer.resize(buffer_size, 0);
-
-        let def_level_data = def_level_data_buffer.typed_data_mut();
-
-        def_level_data
-            .iter_mut()
-            .for_each(|v| *v = self.struct_def_level);
-
-        for child in &self.children {
-            if let Some(current_child_def_levels) = child.get_def_levels() {
-                if current_child_def_levels.len() != children_array_len {
-                    return Err(general_err!("Child array length are not equal!"));
-                } else {
-                    for i in 0..children_array_len {
-                        def_level_data[i] =
-                            min(def_level_data[i], current_child_def_levels[i]);
-                    }
-                }
-            }
-        }
-
-        // calculate bitmap for current array
-        let mut bitmap_builder = BooleanBufferBuilder::new(children_array_len);
-        for def_level in def_level_data {
-            let not_null = *def_level >= self.struct_def_level;
-            bitmap_builder.append(not_null);
-        }
-
-        // Now we can build array data
-        let array_data = ArrayDataBuilder::new(self.data_type.clone())
-            .len(children_array_len)
-            .null_bit_buffer(bitmap_builder.finish())
-            .child_data(
-                children_array
-                    .iter()
-                    .map(|x| x.data().clone())
-                    .collect::<Vec<ArrayData>>(),
-            )
-            .build();
-
-        // calculate struct rep level data, since struct doesn't add to repetition
-        // levels, here we just need to keep repetition levels of first array
-        // TODO: Verify that all children array reader has same repetition levels
-        let rep_level_data = self
-            .children
-            .first()
-            .ok_or_else(|| {
-                general_err!("Struct array reader should have at least one child!")
-            })?
-            .get_rep_levels()
-            .map(|data| -> Result<Buffer> {
-                let mut buffer = Int16BufferBuilder::new(children_array_len);
-                buffer.append_slice(data);
-                Ok(buffer.finish())
-            })
-            .transpose()?;
-
-        self.def_level_buffer = Some(def_level_data_buffer.into());
-        self.rep_level_buffer = rep_level_data;
-        Ok(Arc::new(StructArray::from(array_data)))
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Create array reader from parquet schema, column indices, and parquet file reader.
-pub fn build_array_reader<T>(
-    parquet_schema: SchemaDescPtr,
-    arrow_schema: Schema,
-    column_indices: T,
-    file_reader: Arc<dyn FileReader>,
-) -> Result<Box<dyn ArrayReader>>
-where
-    T: IntoIterator<Item = usize>,
-{
-    let mut leaves = HashMap::<*const Type, usize>::new();
-
-    let mut filtered_root_names = HashSet::<String>::new();
-
-    for c in column_indices {
-        let column = parquet_schema.column(c).self_type() as *const Type;
-
-        leaves.insert(column, c);
-
-        let root = parquet_schema.get_column_root_ptr(c);
-        filtered_root_names.insert(root.name().to_string());
-    }
-
-    if leaves.is_empty() {
-        return Err(general_err!("Can't build array reader without columns!"));
-    }
-
-    // Only pass root fields that take part in the projection
-    // to avoid traversal of columns that are not read.
-    // TODO: also prune unread parts of the tree in child structures
-    let filtered_root_fields = parquet_schema
-        .root_schema()
-        .get_fields()
-        .iter()
-        .filter(|field| filtered_root_names.contains(field.name()))
-        .cloned()
-        .collect::<Vec<_>>();
-
-    let proj = Type::GroupType {
-        basic_info: parquet_schema.root_schema().get_basic_info().clone(),
-        fields: filtered_root_fields,
-    };
-
-    ArrayReaderBuilder::new(
-        Arc::new(proj),
-        Arc::new(arrow_schema),
-        Arc::new(leaves),
-        file_reader,
-    )
-    .build_array_reader()
-}
-
-/// Used to build array reader.
-struct ArrayReaderBuilder {
-    root_schema: TypePtr,
-    arrow_schema: Arc<Schema>,
-    // Key: columns that need to be included in final array builder
-    // Value: column index in schema
-    columns_included: Arc<HashMap<*const Type, usize>>,
-    file_reader: Arc<dyn FileReader>,
-}
-
-/// Used in type visitor.
-#[derive(Clone)]
-struct ArrayReaderBuilderContext {
-    def_level: i16,
-    rep_level: i16,
-    path: ColumnPath,
-}
-
-impl Default for ArrayReaderBuilderContext {
-    fn default() -> Self {
-        Self {
-            def_level: 0i16,
-            rep_level: 0i16,
-            path: ColumnPath::new(Vec::new()),
-        }
-    }
-}
-
-/// Create array reader by visiting schema.
-impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a ArrayReaderBuilderContext>
-    for ArrayReaderBuilder
-{
-    /// Build array reader for primitive type.
-    /// Currently we don't have a list reader implementation, so repeated type is not
-    /// supported yet.
-    fn visit_primitive(
-        &mut self,
-        cur_type: TypePtr,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        if self.is_included(cur_type.as_ref()) {
-            let mut new_context = context.clone();
-            new_context.path.append(vec![cur_type.name().to_string()]);
-
-            match cur_type.get_basic_info().repetition() {
-                Repetition::REPEATED => {
-                    new_context.def_level += 1;
-                    new_context.rep_level += 1;
-                }
-                Repetition::OPTIONAL => {
-                    new_context.def_level += 1;
-                }
-                _ => (),
-            }
-
-            let reader =
-                self.build_for_primitive_type_inner(cur_type.clone(), &new_context)?;
-
-            if cur_type.get_basic_info().repetition() == Repetition::REPEATED {
-                Err(ArrowError(
-                    "Reading repeated field is not supported yet!".to_string(),
-                ))
-            } else {
-                Ok(Some(reader))
-            }
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Build array reader for struct type.
-    fn visit_struct(
-        &mut self,
-        cur_type: Arc<Type>,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        let mut new_context = context.clone();
-        new_context.path.append(vec![cur_type.name().to_string()]);
-
-        if cur_type.get_basic_info().has_repetition() {
-            match cur_type.get_basic_info().repetition() {
-                Repetition::REPEATED => {
-                    new_context.def_level += 1;
-                    new_context.rep_level += 1;
-                }
-                Repetition::OPTIONAL => {
-                    new_context.def_level += 1;
-                }
-                _ => (),
-            }
-        }
-
-        if let Some(reader) = self.build_for_struct_type_inner(&cur_type, &new_context)? {
-            if cur_type.get_basic_info().has_repetition()
-                && cur_type.get_basic_info().repetition() == Repetition::REPEATED
-            {
-                Err(ArrowError(
-                    "Reading repeated field is not supported yet!".to_string(),
-                ))
-            } else {
-                Ok(Some(reader))
-            }
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Build array reader for map type.
-    /// Currently this is not supported.
-    fn visit_map(
-        &mut self,
-        _cur_type: Arc<Type>,
-        _context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        Err(ArrowError(
-            "Reading parquet map array into arrow is not supported yet!".to_string(),
-        ))
-    }
-
-    /// Build array reader for list type.
-    fn visit_list_with_item(
-        &mut self,
-        list_type: Arc<Type>,
-        item_type: Arc<Type>,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        let list_child = &list_type
-            .get_fields()
-            .first()
-            .ok_or_else(|| ArrowError("List field must have a child.".to_string()))?;
-        let mut new_context = context.clone();
-
-        new_context.path.append(vec![list_type.name().to_string()]);
-
-        match list_type.get_basic_info().repetition() {
-            Repetition::REPEATED => {
-                new_context.def_level += 1;
-                new_context.rep_level += 1;
-            }
-            Repetition::OPTIONAL => {
-                new_context.def_level += 1;
-            }
-            _ => (),
-        }
-
-        match list_child.get_basic_info().repetition() {
-            Repetition::REPEATED => {
-                new_context.def_level += 1;
-                new_context.rep_level += 1;
-            }
-            Repetition::OPTIONAL => {
-                new_context.def_level += 1;
-            }
-            _ => (),
-        }
-
-        let item_reader = self
-            .dispatch(item_type.clone(), &new_context)
-            .unwrap()
-            .unwrap();
-
-        let item_reader_type = item_reader.get_data_type().clone();
-
-        match item_reader_type {
-            ArrowType::List(_)
-            | ArrowType::FixedSizeList(_, _)
-            | ArrowType::Struct(_)
-            | ArrowType::Dictionary(_, _) => Err(ArrowError(format!(
-                "reading List({:?}) into arrow not supported yet",
-                item_type
-            ))),
-            _ => {
-                let arrow_type = self
-                    .arrow_schema
-                    .field_with_name(list_type.name())
-                    .ok()
-                    .map(|f| f.data_type().to_owned())
-                    .unwrap_or_else(|| {
-                        ArrowType::List(Box::new(Field::new(
-                            list_type.name(),
-                            item_reader_type.clone(),
-                            list_type.is_optional(),
-                        )))
-                    });
-
-                let list_array_reader: Box<dyn ArrayReader> = match arrow_type {
-                    ArrowType::List(_) => Box::new(ListArrayReader::<i32>::new(
-                        item_reader,
-                        arrow_type,
-                        item_reader_type,
-                        new_context.def_level,
-                        new_context.rep_level,
-                    )),
-                    ArrowType::LargeList(_) => Box::new(ListArrayReader::<i64>::new(
-                        item_reader,
-                        arrow_type,
-                        item_reader_type,
-                        new_context.def_level,
-                        new_context.rep_level,
-                    )),
-
-                    _ => {
-                        return Err(ArrowError(format!(
-                        "creating ListArrayReader with type {:?} should be unreachable",
-                        arrow_type
-                    )))
-                    }
-                };
-
-                Ok(Some(list_array_reader))
-            }
-        }
-    }
-}
-
-impl<'a> ArrayReaderBuilder {
-    /// Construct array reader builder.
-    fn new(
-        root_schema: TypePtr,
-        arrow_schema: Arc<Schema>,
-        columns_included: Arc<HashMap<*const Type, usize>>,
-        file_reader: Arc<dyn FileReader>,
-    ) -> Self {
-        Self {
-            root_schema,
-            arrow_schema,
-            columns_included,
-            file_reader,
-        }
-    }
-
-    /// Main entry point.
-    fn build_array_reader(&mut self) -> Result<Box<dyn ArrayReader>> {
-        let context = ArrayReaderBuilderContext::default();
-
-        self.visit_struct(self.root_schema.clone(), &context)
-            .and_then(|reader_opt| {
-                reader_opt.ok_or_else(|| general_err!("Failed to build array reader!"))
-            })
-    }
-
-    // Utility functions
-
-    /// Check whether one column in included in this array reader builder.
-    fn is_included(&self, t: &Type) -> bool {
-        self.columns_included.contains_key(&(t as *const Type))
-    }
-
-    /// Creates primitive array reader for each primitive type.
-    fn build_for_primitive_type_inner(
-        &self,
-        cur_type: TypePtr,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Box<dyn ArrayReader>> {
-        let column_desc = Arc::new(ColumnDescriptor::new(
-            cur_type.clone(),
-            context.def_level,
-            context.rep_level,
-            context.path.clone(),
-        ));
-        let page_iterator = Box::new(FilePageIterator::new(
-            self.columns_included[&(cur_type.as_ref() as *const Type)],
-            self.file_reader.clone(),
-        )?);
-
-        let arrow_type: Option<ArrowType> = match self.get_arrow_field(&cur_type, context)
-        {
-            Some(f) => Some(f.data_type().clone()),
-            _ => None,
-        };
-
-        match cur_type.get_physical_type() {
-            PhysicalType::BOOLEAN => Ok(Box::new(PrimitiveArrayReader::<BoolType>::new(
-                page_iterator,
-                column_desc,
-                arrow_type,
-            )?)),
-            PhysicalType::INT32 => {
-                if let Some(ArrowType::Null) = arrow_type {
-                    Ok(Box::new(NullArrayReader::<Int32Type>::new(
-                        page_iterator,
-                        column_desc,
-                    )?))
-                } else {
-                    Ok(Box::new(PrimitiveArrayReader::<Int32Type>::new(
-                        page_iterator,
-                        column_desc,
-                        arrow_type,
-                    )?))
-                }
-            }
-            PhysicalType::INT64 => Ok(Box::new(PrimitiveArrayReader::<Int64Type>::new(
-                page_iterator,
-                column_desc,
-                arrow_type,
-            )?)),
-            PhysicalType::INT96 => {
-                // get the optional timezone information from arrow type
-                let timezone = arrow_type
-                    .as_ref()
-                    .map(|data_type| {
-                        if let ArrowType::Timestamp(_, tz) = data_type {
-                            tz.clone()
-                        } else {
-                            None
-                        }
-                    })
-                    .flatten();
-                let converter = Int96Converter::new(Int96ArrayConverter { timezone });
-                Ok(Box::new(ComplexObjectArrayReader::<
-                    Int96Type,
-                    Int96Converter,
-                >::new(
-                    page_iterator,
-                    column_desc,
-                    converter,
-                    arrow_type,
-                )?))
-            }
-            PhysicalType::FLOAT => Ok(Box::new(PrimitiveArrayReader::<FloatType>::new(
-                page_iterator,
-                column_desc,
-                arrow_type,
-            )?)),
-            PhysicalType::DOUBLE => {
-                Ok(Box::new(PrimitiveArrayReader::<DoubleType>::new(
-                    page_iterator,
-                    column_desc,
-                    arrow_type,
-                )?))
-            }
-            PhysicalType::BYTE_ARRAY => {
-                if cur_type.get_basic_info().converted_type() == ConvertedType::UTF8 {
-                    if let Some(ArrowType::LargeUtf8) = arrow_type {
-                        let converter =
-                            LargeUtf8Converter::new(LargeUtf8ArrayConverter {});
-                        Ok(Box::new(ComplexObjectArrayReader::<
-                            ByteArrayType,
-                            LargeUtf8Converter,
-                        >::new(
-                            page_iterator,
-                            column_desc,
-                            converter,
-                            arrow_type,
-                        )?))
-                    } else {
-                        let converter = Utf8Converter::new(Utf8ArrayConverter {});
-                        Ok(Box::new(ComplexObjectArrayReader::<
-                            ByteArrayType,
-                            Utf8Converter,
-                        >::new(
-                            page_iterator,
-                            column_desc,
-                            converter,
-                            arrow_type,
-                        )?))
-                    }
-                } else if let Some(ArrowType::LargeBinary) = arrow_type {
-                    let converter =
-                        LargeBinaryConverter::new(LargeBinaryArrayConverter {});
-                    Ok(Box::new(ComplexObjectArrayReader::<
-                        ByteArrayType,
-                        LargeBinaryConverter,
-                    >::new(
-                        page_iterator,
-                        column_desc,
-                        converter,
-                        arrow_type,
-                    )?))
-                } else {
-                    let converter = BinaryConverter::new(BinaryArrayConverter {});
-                    Ok(Box::new(ComplexObjectArrayReader::<
-                        ByteArrayType,
-                        BinaryConverter,
-                    >::new(
-                        page_iterator,
-                        column_desc,
-                        converter,
-                        arrow_type,
-                    )?))
-                }
-            }
-            PhysicalType::FIXED_LEN_BYTE_ARRAY
-                if cur_type.get_basic_info().converted_type()
-                    == ConvertedType::DECIMAL =>
-            {
-                let converter = DecimalConverter::new(DecimalArrayConverter::new(
-                    cur_type.get_precision(),
-                    cur_type.get_scale(),
-                ));
-                Ok(Box::new(ComplexObjectArrayReader::<
-                    FixedLenByteArrayType,
-                    DecimalConverter,
-                >::new(
-                    page_iterator,
-                    column_desc,
-                    converter,
-                    arrow_type,
-                )?))
-            }
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                if cur_type.get_basic_info().converted_type() == ConvertedType::INTERVAL {
-                    let byte_width = match *cur_type {
-                        Type::PrimitiveType {
-                            ref type_length, ..
-                        } => *type_length,
-                        _ => {
-                            return Err(ArrowError(
-                                "Expected a physical type, not a group type".to_string(),
-                            ))
-                        }
-                    };
-                    if byte_width != 12 {
-                        return Err(ArrowError(format!(
-                            "Parquet interval type should have length of 12, found {}",
-                            byte_width
-                        )));
-                    }
-                    match arrow_type {
-                        Some(ArrowType::Interval(IntervalUnit::DayTime)) => {
-                            let converter = IntervalDayTimeConverter::new(
-                                IntervalDayTimeArrayConverter {},
-                            );
-                            Ok(Box::new(ComplexObjectArrayReader::<
-                                FixedLenByteArrayType,
-                                IntervalDayTimeConverter,
-                            >::new(
-                                page_iterator,
-                                column_desc,
-                                converter,
-                                arrow_type,
-                            )?))
-                        }
-                        Some(ArrowType::Interval(IntervalUnit::YearMonth)) => {
-                            let converter = IntervalYearMonthConverter::new(
-                                IntervalYearMonthArrayConverter {},
-                            );
-                            Ok(Box::new(ComplexObjectArrayReader::<
-                                FixedLenByteArrayType,
-                                IntervalYearMonthConverter,
-                            >::new(
-                                page_iterator,
-                                column_desc,
-                                converter,
-                                arrow_type,
-                            )?))
-                        }
-                        Some(t) => Err(ArrowError(format!(
-                            "Cannot write a Parquet interval to {:?}",
-                            t
-                        ))),
-                        None => {
-                            // we do not support an interval not matched to an Arrow type,
-                            // because we risk data loss as we won't know which of the 12 bytes
-                            // are or should be populated
-                            Err(ArrowError(
-                                "Cannot write a Parquet interval with no Arrow type specified.
-                                There is a risk of data loss as Arrow either supports YearMonth or
-                                DayTime precision. Without the Arrow type, we cannot infer the type.
-                                ".to_string()
-                            ))
-                        }
-                    }
-                } else {
-                    let byte_width = match *cur_type {
-                        Type::PrimitiveType {
-                            ref type_length, ..
-                        } => *type_length,
-                        _ => {
-                            return Err(ArrowError(
-                                "Expected a physical type, not a group type".to_string(),
-                            ))
-                        }
-                    };
-                    let converter = FixedLenBinaryConverter::new(
-                        FixedSizeArrayConverter::new(byte_width),
-                    );
-                    Ok(Box::new(ComplexObjectArrayReader::<
-                        FixedLenByteArrayType,
-                        FixedLenBinaryConverter,
-                    >::new(
-                        page_iterator,
-                        column_desc,
-                        converter,
-                        arrow_type,
-                    )?))
-                }
-            }
-        }
-    }
-
-    /// Constructs struct array reader without considering repetition.
-    fn build_for_struct_type_inner(
-        &mut self,
-        cur_type: &Type,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        let mut fields = Vec::with_capacity(cur_type.get_fields().len());
-        let mut children_reader = Vec::with_capacity(cur_type.get_fields().len());
-
-        for child in cur_type.get_fields() {
-            let mut struct_context = context.clone();
-            if let Some(child_reader) = self.dispatch(child.clone(), context)? {
-                // TODO: this results in calling get_arrow_field twice, it could be reused
-                // from child_reader above, by making child_reader carry its `Field`
-                struct_context.path.append(vec![child.name().to_string()]);
-                let field = match self.get_arrow_field(child, &struct_context) {
-                    Some(f) => f.clone(),
-                    _ => Field::new(
-                        child.name(),
-                        child_reader.get_data_type().clone(),
-                        child.is_optional(),
-                    ),
-                };
-                fields.push(field);
-                children_reader.push(child_reader);
-            }
-        }
-
-        if !fields.is_empty() {
-            let arrow_type = ArrowType::Struct(fields);
-            Ok(Some(Box::new(StructArrayReader::new(
-                arrow_type,
-                children_reader,
-                context.def_level,
-                context.rep_level,
-            ))))
-        } else {
-            Ok(None)
-        }
-    }
-
-    fn get_arrow_field(
-        &self,
-        cur_type: &Type,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Option<&Field> {
-        let parts: Vec<&str> = context
-            .path
-            .parts()
-            .iter()
-            .map(|x| -> &str { x })
-            .collect::<Vec<&str>>();
-
-        // If the parts length is one it'll have the top level "schema" type. If
-        // it's two then it'll be a top-level type that we can get from the arrow
-        // schema directly.
-        if parts.len() <= 2 {
-            self.arrow_schema.field_with_name(cur_type.name()).ok()
-        } else {
-            // If it's greater than two then we need to traverse the type path
-            // until we find the actual field we're looking for.
-            let mut field: Option<&Field> = None;
-
-            for (i, part) in parts.iter().enumerate().skip(1) {
-                if i == 1 {
-                    field = self.arrow_schema.field_with_name(part).ok();
-                } else if let Some(f) = field {
-                    if let ArrowType::Struct(fields) = f.data_type() {
-                        field = fields.iter().find(|f| f.name() == part)
-                    } else {
-                        field = None
-                    }
-                } else {
-                    field = None
-                }
-            }
-            field
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::arrow::converter::Utf8Converter;
-    use crate::arrow::schema::parquet_to_arrow_schema;
-    use crate::basic::{Encoding, Type as PhysicalType};
-    use crate::column::page::{Page, PageReader};
-    use crate::data_type::{ByteArray, DataType, Int32Type, Int64Type};
-    use crate::errors::Result;
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
-    use crate::util::test_common::page_util::{
-        DataPageBuilder, DataPageBuilderImpl, InMemoryPageIterator,
-    };
-    use crate::util::test_common::{get_test_file, make_pages};
-    use arrow::array::{
-        Array, ArrayRef, LargeListArray, ListArray, PrimitiveArray, StringArray,
-        StructArray,
-    };
-    use arrow::datatypes::{
-        ArrowPrimitiveType, DataType as ArrowType, Date32Type as ArrowDate32, Field,
-        Int32Type as ArrowInt32, Int64Type as ArrowInt64,
-        Time32MillisecondType as ArrowTime32MillisecondArray,
-        Time64MicrosecondType as ArrowTime64MicrosecondArray,
-        TimestampMicrosecondType as ArrowTimestampMicrosecondType,
-        TimestampMillisecondType as ArrowTimestampMillisecondType,
-    };
-    use rand::distributions::uniform::SampleUniform;
-    use rand::{thread_rng, Rng};
-    use std::any::Any;
-    use std::collections::VecDeque;
-    use std::sync::Arc;
-
-    fn make_column_chunks<T: DataType>(
-        column_desc: ColumnDescPtr,
-        encoding: Encoding,
-        num_levels: usize,
-        min_value: T::T,
-        max_value: T::T,
-        def_levels: &mut Vec<i16>,
-        rep_levels: &mut Vec<i16>,
-        values: &mut Vec<T::T>,
-        page_lists: &mut Vec<Vec<Page>>,
-        use_v2: bool,
-        num_chunks: usize,
-    ) where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        for _i in 0..num_chunks {
-            let mut pages = VecDeque::new();
-            let mut data = Vec::new();
-            let mut page_def_levels = Vec::new();
-            let mut page_rep_levels = Vec::new();
-
-            make_pages::<T>(
-                column_desc.clone(),
-                encoding,
-                1,
-                num_levels,
-                min_value,
-                max_value,
-                &mut page_def_levels,
-                &mut page_rep_levels,
-                &mut data,
-                &mut pages,
-                use_v2,
-            );
-
-            def_levels.append(&mut page_def_levels);
-            rep_levels.append(&mut page_rep_levels);
-            values.append(&mut data);
-            page_lists.push(Vec::from(pages));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_reader_empty_pages() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REQUIRED INT32 leaf;
-        }
-        ";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let column_desc = schema.column(0);
-        let page_iterator = EmptyPageIterator::new(schema);
-
-        let mut array_reader = PrimitiveArrayReader::<Int32Type>::new(
-            Box::new(page_iterator),
-            column_desc,
-            None,
-        )
-        .unwrap();
-
-        // expect no values to be read
-        let array = array_reader.next_batch(50).unwrap();
-        assert!(array.is_empty());
-    }
-
-    #[test]
-    fn test_primitive_array_reader_data() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REQUIRED INT32 leaf;
-        }
-        ";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let column_desc = schema.column(0);
-
-        // Construct page iterator
-        {
-            let mut data = Vec::new();
-            let mut page_lists = Vec::new();
-            make_column_chunks::<Int32Type>(
-                column_desc.clone(),
-                Encoding::PLAIN,
-                100,
-                1,
-                200,
-                &mut Vec::new(),
-                &mut Vec::new(),
-                &mut data,
-                &mut page_lists,
-                true,
-                2,
-            );
-            let page_iterator =
-                InMemoryPageIterator::new(schema, column_desc.clone(), page_lists);
-
-            let mut array_reader = PrimitiveArrayReader::<Int32Type>::new(
-                Box::new(page_iterator),
-                column_desc,
-                None,
-            )
-            .unwrap();
-
-            // Read first 50 values, which are all from the first column chunk
-            let array = array_reader.next_batch(50).unwrap();
-            let array = array
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap();
-
-            assert_eq!(
-                &PrimitiveArray::<ArrowInt32>::from(data[0..50].to_vec()),
-                array
-            );
-
-            // Read next 100 values, the first 50 ones are from the first column chunk,
-            // and the last 50 ones are from the second column chunk
-            let array = array_reader.next_batch(100).unwrap();
-            let array = array
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap();
-
-            assert_eq!(
-                &PrimitiveArray::<ArrowInt32>::from(data[50..150].to_vec()),
-                array
-            );
-
-            // Try to read 100 values, however there are only 50 values
-            let array = array_reader.next_batch(100).unwrap();
-            let array = array
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap();
-
-            assert_eq!(
-                &PrimitiveArray::<ArrowInt32>::from(data[150..200].to_vec()),
-                array
-            );
-        }
-    }
-
-    macro_rules! test_primitive_array_reader_one_type {
-        ($arrow_parquet_type:ty, $physical_type:expr, $converted_type_str:expr, $result_arrow_type:ty, $result_arrow_cast_type:ty, $result_primitive_type:ty) => {{
-            let message_type = format!(
-                "
-            message test_schema {{
-              REQUIRED {:?} leaf ({});
-          }}
-            ",
-                $physical_type, $converted_type_str
-            );
-            let schema = parse_message_type(&message_type)
-                .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-                .unwrap();
-
-            let column_desc = schema.column(0);
-
-            // Construct page iterator
-            {
-                let mut data = Vec::new();
-                let mut page_lists = Vec::new();
-                make_column_chunks::<$arrow_parquet_type>(
-                    column_desc.clone(),
-                    Encoding::PLAIN,
-                    100,
-                    1,
-                    200,
-                    &mut Vec::new(),
-                    &mut Vec::new(),
-                    &mut data,
-                    &mut page_lists,
-                    true,
-                    2,
-                );
-                let page_iterator = InMemoryPageIterator::new(
-                    schema.clone(),
-                    column_desc.clone(),
-                    page_lists,
-                );
-                let mut array_reader = PrimitiveArrayReader::<$arrow_parquet_type>::new(
-                    Box::new(page_iterator),
-                    column_desc.clone(),
-                    None,
-                )
-                .expect("Unable to get array reader");
-
-                let array = array_reader
-                    .next_batch(50)
-                    .expect("Unable to get batch from reader");
-
-                let result_data_type = <$result_arrow_type>::DATA_TYPE;
-                let array = array
-                    .as_any()
-                    .downcast_ref::<PrimitiveArray<$result_arrow_type>>()
-                    .expect(
-                        format!(
-                            "Unable to downcast {:?} to {:?}",
-                            array.data_type(),
-                            result_data_type
-                        )
-                        .as_str(),
-                    );
-
-                // create expected array as primitive, and cast to result type
-                let expected = PrimitiveArray::<$result_arrow_cast_type>::from(
-                    data[0..50]
-                        .iter()
-                        .map(|x| *x as $result_primitive_type)
-                        .collect::<Vec<$result_primitive_type>>(),
-                );
-                let expected = Arc::new(expected) as ArrayRef;
-                let expected = arrow::compute::cast(&expected, &result_data_type)
-                    .expect("Unable to cast expected array");
-                assert_eq!(expected.data_type(), &result_data_type);
-                let expected = expected
-                    .as_any()
-                    .downcast_ref::<PrimitiveArray<$result_arrow_type>>()
-                    .expect(
-                        format!(
-                            "Unable to downcast expected {:?} to {:?}",
-                            expected.data_type(),
-                            result_data_type
-                        )
-                        .as_str(),
-                    );
-                assert_eq!(expected, array);
-            }
-        }};
-    }
-
-    #[test]
-    fn test_primitive_array_reader_temporal_types() {
-        test_primitive_array_reader_one_type!(
-            Int32Type,
-            PhysicalType::INT32,
-            "DATE",
-            ArrowDate32,
-            ArrowInt32,
-            i32
-        );
-        test_primitive_array_reader_one_type!(
-            Int32Type,
-            PhysicalType::INT32,
-            "TIME_MILLIS",
-            ArrowTime32MillisecondArray,
-            ArrowInt32,
-            i32
-        );
-        test_primitive_array_reader_one_type!(
-            Int64Type,
-            PhysicalType::INT64,
-            "TIME_MICROS",
-            ArrowTime64MicrosecondArray,
-            ArrowInt64,
-            i64
-        );
-        test_primitive_array_reader_one_type!(
-            Int64Type,
-            PhysicalType::INT64,
-            "TIMESTAMP_MILLIS",
-            ArrowTimestampMillisecondType,
-            ArrowInt64,
-            i64
-        );
-        test_primitive_array_reader_one_type!(
-            Int64Type,
-            PhysicalType::INT64,
-            "TIMESTAMP_MICROS",
-            ArrowTimestampMicrosecondType,
-            ArrowInt64,
-            i64
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_reader_def_and_rep_levels() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-            REPEATED Group test_mid {
-                OPTIONAL INT32 leaf;
-            }
-        }
-        ";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let column_desc = schema.column(0);
-
-        // Construct page iterator
-        {
-            let mut def_levels = Vec::new();
-            let mut rep_levels = Vec::new();
-            let mut page_lists = Vec::new();
-            make_column_chunks::<Int32Type>(
-                column_desc.clone(),
-                Encoding::PLAIN,
-                100,
-                1,
-                200,
-                &mut def_levels,
-                &mut rep_levels,
-                &mut Vec::new(),
-                &mut page_lists,
-                true,
-                2,
-            );
-
-            let page_iterator =
-                InMemoryPageIterator::new(schema, column_desc.clone(), page_lists);
-
-            let mut array_reader = PrimitiveArrayReader::<Int32Type>::new(
-                Box::new(page_iterator),
-                column_desc,
-                None,
-            )
-            .unwrap();
-
-            let mut accu_len: usize = 0;
-
-            // Read first 50 values, which are all from the first column chunk
-            let array = array_reader.next_batch(50).unwrap();
-            assert_eq!(
-                Some(&def_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_def_levels()
-            );
-            assert_eq!(
-                Some(&rep_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_rep_levels()
-            );
-            accu_len += array.len();
-
-            // Read next 100 values, the first 50 ones are from the first column chunk,
-            // and the last 50 ones are from the second column chunk
-            let array = array_reader.next_batch(100).unwrap();
-            assert_eq!(
-                Some(&def_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_def_levels()
-            );
-            assert_eq!(
-                Some(&rep_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_rep_levels()
-            );
-            accu_len += array.len();
-
-            // Try to read 100 values, however there are only 50 values
-            let array = array_reader.next_batch(100).unwrap();
-            assert_eq!(
-                Some(&def_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_def_levels()
-            );
-            assert_eq!(
-                Some(&rep_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_rep_levels()
-            );
-        }
-    }
-
-    #[test]
-    fn test_complex_array_reader_no_pages() {
-        let message_type = "
-        message test_schema {
-            REPEATED Group test_mid {
-                OPTIONAL BYTE_ARRAY leaf (UTF8);
-            }
-        }
-        ";
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-        let column_desc = schema.column(0);
-        let pages: Vec<Vec<Page>> = Vec::new();
-        let page_iterator = InMemoryPageIterator::new(schema, column_desc.clone(), pages);
-
-        let converter = Utf8Converter::new(Utf8ArrayConverter {});
-        let mut array_reader =
-            ComplexObjectArrayReader::<ByteArrayType, Utf8Converter>::new(
-                Box::new(page_iterator),
-                column_desc,
-                converter,
-                None,
-            )
-            .unwrap();
-
-        let values_per_page = 100; // this value is arbitrary in this test - the result should always be an array of 0 length
-        let array = array_reader.next_batch(values_per_page).unwrap();
-        assert_eq!(array.len(), 0);
-    }
-
-    #[test]
-    fn test_complex_array_reader_def_and_rep_levels() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-            REPEATED Group test_mid {
-                OPTIONAL BYTE_ARRAY leaf (UTF8);
-            }
-        }
-        ";
-        let num_pages = 2;
-        let values_per_page = 100;
-        let str_base = "Hello World";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let max_def_level = schema.column(0).max_def_level();
-        let max_rep_level = schema.column(0).max_rep_level();
-
-        assert_eq!(max_def_level, 2);
-        assert_eq!(max_rep_level, 1);
-
-        let mut rng = thread_rng();
-        let column_desc = schema.column(0);
-        let mut pages: Vec<Vec<Page>> = Vec::new();
-
-        let mut rep_levels = Vec::with_capacity(num_pages * values_per_page);
-        let mut def_levels = Vec::with_capacity(num_pages * values_per_page);
-        let mut all_values = Vec::with_capacity(num_pages * values_per_page);
-
-        for i in 0..num_pages {
-            let mut values = Vec::with_capacity(values_per_page);
-
-            for _ in 0..values_per_page {
-                let def_level = rng.gen_range(0..max_def_level + 1);
-                let rep_level = rng.gen_range(0..max_rep_level + 1);
-                if def_level == max_def_level {
-                    let len = rng.gen_range(1..str_base.len());
-                    let slice = &str_base[..len];
-                    values.push(ByteArray::from(slice));
-                    all_values.push(Some(slice.to_string()));
-                } else {
-                    all_values.push(None)
-                }
-                rep_levels.push(rep_level);
-                def_levels.push(def_level)
-            }
-
-            let range = i * values_per_page..(i + 1) * values_per_page;
-            let mut pb =
-                DataPageBuilderImpl::new(column_desc.clone(), values.len() as u32, true);
-
-            pb.add_rep_levels(max_rep_level, &rep_levels.as_slice()[range.clone()]);
-            pb.add_def_levels(max_def_level, &def_levels.as_slice()[range]);
-            pb.add_values::<ByteArrayType>(Encoding::PLAIN, values.as_slice());
-
-            let data_page = pb.consume();
-            pages.push(vec![data_page]);
-        }
-
-        let page_iterator = InMemoryPageIterator::new(schema, column_desc.clone(), pages);
-
-        let converter = Utf8Converter::new(Utf8ArrayConverter {});
-        let mut array_reader =
-            ComplexObjectArrayReader::<ByteArrayType, Utf8Converter>::new(
-                Box::new(page_iterator),
-                column_desc,
-                converter,
-                None,
-            )
-            .unwrap();
-
-        let mut accu_len: usize = 0;
-
-        let array = array_reader.next_batch(values_per_page / 2).unwrap();
-        assert_eq!(array.len(), values_per_page / 2);
-        assert_eq!(
-            Some(&def_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(&rep_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_rep_levels()
-        );
-        accu_len += array.len();
-
-        // Read next values_per_page values, the first values_per_page/2 ones are from the first column chunk,
-        // and the last values_per_page/2 ones are from the second column chunk
-        let array = array_reader.next_batch(values_per_page).unwrap();
-        assert_eq!(array.len(), values_per_page);
-        assert_eq!(
-            Some(&def_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(&rep_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_rep_levels()
-        );
-        let strings = array.as_any().downcast_ref::<StringArray>().unwrap();
-        for i in 0..array.len() {
-            if array.is_valid(i) {
-                assert_eq!(
-                    all_values[i + accu_len].as_ref().unwrap().as_str(),
-                    strings.value(i)
-                )
-            } else {
-                assert_eq!(all_values[i + accu_len], None)
-            }
-        }
-        accu_len += array.len();
-
-        // Try to read values_per_page values, however there are only values_per_page/2 values
-        let array = array_reader.next_batch(values_per_page).unwrap();
-        assert_eq!(array.len(), values_per_page / 2);
-        assert_eq!(
-            Some(&def_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(&rep_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_rep_levels()
-        );
-    }
-
-    /// Array reader for test.
-    struct InMemoryArrayReader {
-        data_type: ArrowType,
-        array: ArrayRef,
-        def_levels: Option<Vec<i16>>,
-        rep_levels: Option<Vec<i16>>,
-    }
-
-    impl InMemoryArrayReader {
-        pub fn new(
-            data_type: ArrowType,
-            array: ArrayRef,
-            def_levels: Option<Vec<i16>>,
-            rep_levels: Option<Vec<i16>>,
-        ) -> Self {
-            Self {
-                data_type,
-                array,
-                def_levels,
-                rep_levels,
-            }
-        }
-    }
-
-    impl ArrayReader for InMemoryArrayReader {
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn get_data_type(&self) -> &ArrowType {
-            &self.data_type
-        }
-
-        fn next_batch(&mut self, _batch_size: usize) -> Result<ArrayRef> {
-            Ok(self.array.clone())
-        }
-
-        fn get_def_levels(&self) -> Option<&[i16]> {
-            self.def_levels.as_deref()
-        }
-
-        fn get_rep_levels(&self) -> Option<&[i16]> {
-            self.rep_levels.as_deref()
-        }
-    }
-
-    /// Iterator for testing reading empty columns
-    struct EmptyPageIterator {
-        schema: SchemaDescPtr,
-    }
-
-    impl EmptyPageIterator {
-        fn new(schema: SchemaDescPtr) -> Self {
-            EmptyPageIterator { schema }
-        }
-    }
-
-    impl Iterator for EmptyPageIterator {
-        type Item = Result<Box<dyn PageReader>>;
-
-        fn next(&mut self) -> Option<Self::Item> {
-            None
-        }
-    }
-
-    impl PageIterator for EmptyPageIterator {
-        fn schema(&mut self) -> Result<SchemaDescPtr> {
-            Ok(self.schema.clone())
-        }
-
-        fn column_schema(&mut self) -> Result<ColumnDescPtr> {
-            Ok(self.schema.column(0))
-        }
-    }
-
-    #[test]
-    fn test_struct_array_reader() {
-        let array_1 = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![1, 2, 3, 4, 5]));
-        let array_reader_1 = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array_1.clone(),
-            Some(vec![0, 1, 2, 3, 1]),
-            Some(vec![1, 1, 1, 1, 1]),
-        );
-
-        let array_2 = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![5, 4, 3, 2, 1]));
-        let array_reader_2 = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array_2.clone(),
-            Some(vec![0, 1, 3, 1, 2]),
-            Some(vec![1, 1, 1, 1, 1]),
-        );
-
-        let struct_type = ArrowType::Struct(vec![
-            Field::new("f1", array_1.data_type().clone(), true),
-            Field::new("f2", array_2.data_type().clone(), true),
-        ]);
-
-        let mut struct_array_reader = StructArrayReader::new(
-            struct_type,
-            vec![Box::new(array_reader_1), Box::new(array_reader_2)],
-            1,
-            1,
-        );
-
-        let struct_array = struct_array_reader.next_batch(5).unwrap();
-        let struct_array = struct_array.as_any().downcast_ref::<StructArray>().unwrap();
-
-        assert_eq!(5, struct_array.len());
-        assert_eq!(
-            vec![true, false, false, false, false],
-            (0..5)
-                .map(|idx| struct_array.data_ref().is_null(idx))
-                .collect::<Vec<bool>>()
-        );
-        assert_eq!(
-            Some(vec![0, 1, 1, 1, 1].as_slice()),
-            struct_array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(vec![1, 1, 1, 1, 1].as_slice()),
-            struct_array_reader.get_rep_levels()
-        );
-    }
-
-    #[test]
-    fn test_create_array_reader() {
-        let file = get_test_file("nulls.snappy.parquet");
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
-
-        let file_metadata = file_reader.metadata().file_metadata();
-        let arrow_schema = parquet_to_arrow_schema(
-            file_metadata.schema_descr(),
-            file_metadata.key_value_metadata(),
-        )
-        .unwrap();
-
-        let array_reader = build_array_reader(
-            file_reader.metadata().file_metadata().schema_descr_ptr(),
-            arrow_schema,
-            vec![0usize].into_iter(),
-            file_reader,
-        )
-        .unwrap();
-
-        // Create arrow types
-        let arrow_type = ArrowType::Struct(vec![Field::new(
-            "b_struct",
-            ArrowType::Struct(vec![Field::new("b_c_int", ArrowType::Int32, true)]),
-            true,
-        )]);
-
-        assert_eq!(array_reader.get_data_type(), &arrow_type);
-    }
-
-    #[test]
-    fn test_list_array_reader() {
-        // [[1, null, 2], null, [3, 4]]
-        let array = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-        ]));
-        let item_array_reader = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array,
-            Some(vec![3, 2, 3, 0, 3, 3]),
-            Some(vec![0, 1, 1, 0, 0, 1]),
-        );
-
-        let mut list_array_reader = ListArrayReader::<i32>::new(
-            Box::new(item_array_reader),
-            ArrowType::List(Box::new(Field::new("item", ArrowType::Int32, true))),
-            ArrowType::Int32,
-            1,
-            1,
-        );
-
-        let next_batch = list_array_reader.next_batch(1024).unwrap();
-        let list_array = next_batch.as_any().downcast_ref::<ListArray>().unwrap();
-
-        assert_eq!(3, list_array.len());
-        // This passes as I expect
-        assert_eq!(1, list_array.null_count());
-
-        assert_eq!(
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(1), None, Some(2)])
-        );
-
-        assert!(list_array.is_null(1));
-
-        assert_eq!(
-            list_array
-                .value(2)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(3), Some(4)])
-        );
-    }
-
-    #[test]
-    fn test_large_list_array_reader() {
-        // [[1, null, 2], null, [3, 4]]
-        let array = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-        ]));
-        let item_array_reader = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array,
-            Some(vec![3, 2, 3, 0, 3, 3]),
-            Some(vec![0, 1, 1, 0, 0, 1]),
-        );
-
-        let mut list_array_reader = ListArrayReader::<i64>::new(
-            Box::new(item_array_reader),
-            ArrowType::LargeList(Box::new(Field::new("item", ArrowType::Int32, true))),
-            ArrowType::Int32,
-            1,
-            1,
-        );
-
-        let next_batch = list_array_reader.next_batch(1024).unwrap();
-        let list_array = next_batch
-            .as_any()
-            .downcast_ref::<LargeListArray>()
-            .unwrap();
-
-        assert_eq!(3, list_array.len());
-
-        assert_eq!(
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(1), None, Some(2)])
-        );
-
-        assert!(list_array.is_null(1));
-
-        assert_eq!(
-            list_array
-                .value(2)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(3), Some(4)])
-        );
-    }
-}
diff --git a/rust/parquet/src/arrow/arrow_reader.rs b/rust/parquet/src/arrow/arrow_reader.rs
deleted file mode 100644
index 83fb0a2f7e9..00000000000
--- a/rust/parquet/src/arrow/arrow_reader.rs
+++ /dev/null
@@ -1,671 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains reader which reads parquet data into arrow array.
-
-use crate::arrow::array_reader::{build_array_reader, ArrayReader, StructArrayReader};
-use crate::arrow::schema::parquet_to_arrow_schema;
-use crate::arrow::schema::{
-    parquet_to_arrow_schema_by_columns, parquet_to_arrow_schema_by_root_columns,
-};
-use crate::errors::{ParquetError, Result};
-use crate::file::metadata::ParquetMetaData;
-use crate::file::reader::FileReader;
-use arrow::datatypes::{DataType as ArrowType, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::{RecordBatch, RecordBatchReader};
-use arrow::{array::StructArray, error::ArrowError};
-use std::sync::Arc;
-
-/// Arrow reader api.
-/// With this api, user can get arrow schema from parquet file, and read parquet data
-/// into arrow arrays.
-pub trait ArrowReader {
-    type RecordReader: RecordBatchReader;
-
-    /// Read parquet schema and convert it into arrow schema.
-    fn get_schema(&mut self) -> Result<Schema>;
-
-    /// Read parquet schema and convert it into arrow schema.
-    /// This schema only includes columns identified by `column_indices`.
-    /// To select leaf columns (i.e. `a.b.c` instead of `a`), set `leaf_columns = true`
-    fn get_schema_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        leaf_columns: bool,
-    ) -> Result<Schema>
-    where
-        T: IntoIterator<Item = usize>;
-
-    /// Returns record batch reader from whole parquet file.
-    ///
-    /// # Arguments
-    ///
-    /// `batch_size`: The size of each record batch returned from this reader. Only the
-    /// last batch may contain records less than this size, otherwise record batches
-    /// returned from this reader should contains exactly `batch_size` elements.
-    fn get_record_reader(&mut self, batch_size: usize) -> Result<Self::RecordReader>;
-
-    /// Returns record batch reader whose record batch contains columns identified by
-    /// `column_indices`.
-    ///
-    /// # Arguments
-    ///
-    /// `column_indices`: The columns that should be included in record batches.
-    /// `batch_size`: Please refer to `get_record_reader`.
-    fn get_record_reader_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        batch_size: usize,
-    ) -> Result<Self::RecordReader>
-    where
-        T: IntoIterator<Item = usize>;
-}
-
-pub struct ParquetFileArrowReader {
-    file_reader: Arc<dyn FileReader>,
-}
-
-impl ArrowReader for ParquetFileArrowReader {
-    type RecordReader = ParquetRecordBatchReader;
-
-    fn get_schema(&mut self) -> Result<Schema> {
-        let file_metadata = self.file_reader.metadata().file_metadata();
-        parquet_to_arrow_schema(
-            file_metadata.schema_descr(),
-            file_metadata.key_value_metadata(),
-        )
-    }
-
-    fn get_schema_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        leaf_columns: bool,
-    ) -> Result<Schema>
-    where
-        T: IntoIterator<Item = usize>,
-    {
-        let file_metadata = self.file_reader.metadata().file_metadata();
-        if leaf_columns {
-            parquet_to_arrow_schema_by_columns(
-                file_metadata.schema_descr(),
-                column_indices,
-                file_metadata.key_value_metadata(),
-            )
-        } else {
-            parquet_to_arrow_schema_by_root_columns(
-                file_metadata.schema_descr(),
-                column_indices,
-                file_metadata.key_value_metadata(),
-            )
-        }
-    }
-
-    fn get_record_reader(
-        &mut self,
-        batch_size: usize,
-    ) -> Result<ParquetRecordBatchReader> {
-        let column_indices = 0..self
-            .file_reader
-            .metadata()
-            .file_metadata()
-            .schema_descr()
-            .num_columns();
-
-        self.get_record_reader_by_columns(column_indices, batch_size)
-    }
-
-    fn get_record_reader_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        batch_size: usize,
-    ) -> Result<ParquetRecordBatchReader>
-    where
-        T: IntoIterator<Item = usize>,
-    {
-        let array_reader = build_array_reader(
-            self.file_reader
-                .metadata()
-                .file_metadata()
-                .schema_descr_ptr(),
-            self.get_schema()?,
-            column_indices,
-            self.file_reader.clone(),
-        )?;
-
-        ParquetRecordBatchReader::try_new(batch_size, array_reader)
-    }
-}
-
-impl ParquetFileArrowReader {
-    pub fn new(file_reader: Arc<dyn FileReader>) -> Self {
-        Self { file_reader }
-    }
-
-    // Expose the reader metadata
-    pub fn get_metadata(&mut self) -> ParquetMetaData {
-        self.file_reader.metadata().clone()
-    }
-}
-
-pub struct ParquetRecordBatchReader {
-    batch_size: usize,
-    array_reader: Box<dyn ArrayReader>,
-    schema: SchemaRef,
-}
-
-impl Iterator for ParquetRecordBatchReader {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.array_reader.next_batch(self.batch_size) {
-            Err(error) => Some(Err(error.into())),
-            Ok(array) => {
-                let struct_array =
-                    array.as_any().downcast_ref::<StructArray>().ok_or_else(|| {
-                        ArrowError::ParquetError(
-                            "Struct array reader should return struct array".to_string(),
-                        )
-                    });
-                match struct_array {
-                    Err(err) => Some(Err(err)),
-                    Ok(e) => {
-                        match RecordBatch::try_new(self.schema.clone(), e.columns_ref()) {
-                            Err(err) => Some(Err(err)),
-                            Ok(record_batch) => {
-                                if record_batch.num_rows() > 0 {
-                                    Some(Ok(record_batch))
-                                } else {
-                                    None
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-impl RecordBatchReader for ParquetRecordBatchReader {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-impl ParquetRecordBatchReader {
-    pub fn try_new(
-        batch_size: usize,
-        array_reader: Box<dyn ArrayReader>,
-    ) -> Result<Self> {
-        // Check that array reader is struct array reader
-        array_reader
-            .as_any()
-            .downcast_ref::<StructArrayReader>()
-            .ok_or_else(|| general_err!("The input must be struct array reader!"))?;
-
-        let schema = match array_reader.get_data_type() {
-            ArrowType::Struct(ref fields) => Schema::new(fields.clone()),
-            _ => unreachable!("Struct array reader's data type is not struct!"),
-        };
-
-        Ok(Self {
-            batch_size,
-            array_reader,
-            schema: Arc::new(schema),
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::arrow::arrow_reader::{ArrowReader, ParquetFileArrowReader};
-    use crate::arrow::converter::{
-        Converter, FixedSizeArrayConverter, FromConverter, IntervalDayTimeArrayConverter,
-        Utf8ArrayConverter,
-    };
-    use crate::column::writer::get_typed_column_writer_mut;
-    use crate::data_type::{
-        BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray,
-        FixedLenByteArrayType, Int32Type,
-    };
-    use crate::errors::Result;
-    use crate::file::properties::WriterProperties;
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::file::writer::{FileWriter, SerializedFileWriter};
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::TypePtr;
-    use crate::util::test_common::{get_temp_filename, RandGen};
-    use arrow::array::*;
-    use arrow::record_batch::RecordBatchReader;
-    use rand::RngCore;
-    use serde_json::json;
-    use serde_json::Value::{Array as JArray, Null as JNull, Object as JObject};
-    use std::cmp::min;
-    use std::convert::TryFrom;
-    use std::fs::File;
-    use std::path::{Path, PathBuf};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_arrow_reader_all_columns() {
-        let json_values = get_json_array("parquet/generated_simple_numerics/blogs.json");
-
-        let parquet_file_reader =
-            get_test_reader("parquet/generated_simple_numerics/blogs.parquet");
-
-        let max_len = parquet_file_reader.metadata().file_metadata().num_rows() as usize;
-
-        let mut arrow_reader = ParquetFileArrowReader::new(parquet_file_reader);
-
-        let mut record_batch_reader = arrow_reader
-            .get_record_reader(60)
-            .expect("Failed to read into array!");
-
-        // Verify that the schema was correctly parsed
-        let original_schema = arrow_reader.get_schema().unwrap().fields().clone();
-        assert_eq!(original_schema, *record_batch_reader.schema().fields());
-
-        compare_batch_json(&mut record_batch_reader, json_values, max_len);
-    }
-
-    #[test]
-    fn test_arrow_reader_single_column() {
-        let json_values = get_json_array("parquet/generated_simple_numerics/blogs.json");
-
-        let projected_json_values = json_values
-            .into_iter()
-            .map(|value| match value {
-                JObject(fields) => {
-                    json!({ "blog_id": fields.get("blog_id").unwrap_or(&JNull).clone()})
-                }
-                _ => panic!("Input should be json object array!"),
-            })
-            .collect::<Vec<_>>();
-
-        let parquet_file_reader =
-            get_test_reader("parquet/generated_simple_numerics/blogs.parquet");
-
-        let max_len = parquet_file_reader.metadata().file_metadata().num_rows() as usize;
-
-        let mut arrow_reader = ParquetFileArrowReader::new(parquet_file_reader);
-
-        let mut record_batch_reader = arrow_reader
-            .get_record_reader_by_columns(vec![2], 60)
-            .expect("Failed to read into array!");
-
-        // Verify that the schema was correctly parsed
-        let original_schema = arrow_reader.get_schema().unwrap().fields().clone();
-        assert_eq!(1, record_batch_reader.schema().fields().len());
-        assert_eq!(original_schema[1], record_batch_reader.schema().fields()[0]);
-
-        compare_batch_json(&mut record_batch_reader, projected_json_values, max_len);
-    }
-
-    #[test]
-    fn test_bool_single_column_reader_test() {
-        let message_type = "
-        message test_schema {
-          REQUIRED BOOLEAN leaf;
-        }
-        ";
-
-        let converter = FromConverter::new();
-        run_single_column_reader_tests::<
-            BoolType,
-            BooleanArray,
-            FromConverter<Vec<Option<bool>>, BooleanArray>,
-            BoolType,
-        >(2, message_type, &converter);
-    }
-
-    struct RandFixedLenGen {}
-
-    impl RandGen<FixedLenByteArrayType> for RandFixedLenGen {
-        fn gen(len: i32) -> FixedLenByteArray {
-            let mut v = vec![0u8; len as usize];
-            rand::thread_rng().fill_bytes(&mut v);
-            ByteArray::from(v).into()
-        }
-    }
-
-    #[test]
-    fn test_fixed_length_binary_column_reader() {
-        let message_type = "
-        message test_schema {
-          REQUIRED FIXED_LEN_BYTE_ARRAY (20) leaf;
-        }
-        ";
-
-        let converter = FixedSizeArrayConverter::new(20);
-        run_single_column_reader_tests::<
-            FixedLenByteArrayType,
-            FixedSizeBinaryArray,
-            FixedSizeArrayConverter,
-            RandFixedLenGen,
-        >(20, message_type, &converter);
-    }
-
-    #[test]
-    fn test_interval_day_time_column_reader() {
-        let message_type = "
-        message test_schema {
-          REQUIRED FIXED_LEN_BYTE_ARRAY (12) leaf (INTERVAL);
-        }
-        ";
-
-        let converter = IntervalDayTimeArrayConverter {};
-        run_single_column_reader_tests::<
-            FixedLenByteArrayType,
-            IntervalDayTimeArray,
-            IntervalDayTimeArrayConverter,
-            RandFixedLenGen,
-        >(12, message_type, &converter);
-    }
-
-    struct RandUtf8Gen {}
-
-    impl RandGen<ByteArrayType> for RandUtf8Gen {
-        fn gen(len: i32) -> ByteArray {
-            Int32Type::gen(len).to_string().as_str().into()
-        }
-    }
-
-    #[test]
-    fn test_utf8_single_column_reader_test() {
-        let message_type = "
-        message test_schema {
-          REQUIRED BINARY leaf (UTF8);
-        }
-        ";
-
-        let converter = Utf8ArrayConverter {};
-        run_single_column_reader_tests::<
-            ByteArrayType,
-            StringArray,
-            Utf8ArrayConverter,
-            RandUtf8Gen,
-        >(2, message_type, &converter);
-    }
-
-    #[test]
-    fn test_read_decimal_file() {
-        use arrow::array::DecimalArray;
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let file_variants = vec![("fixed_length", 25), ("int32", 4), ("int64", 10)];
-        for (prefix, target_precision) in file_variants {
-            let path = format!("{}/{}_decimal.parquet", testdata, prefix);
-            let parquet_reader =
-                SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-            let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-
-            let mut record_reader = arrow_reader.get_record_reader(32).unwrap();
-
-            let batch = record_reader.next().unwrap().unwrap();
-            assert_eq!(batch.num_rows(), 24);
-            let col = batch
-                .column(0)
-                .as_any()
-                .downcast_ref::<DecimalArray>()
-                .unwrap();
-
-            let expected = 1..25;
-
-            assert_eq!(col.precision(), target_precision);
-            assert_eq!(col.scale(), 2);
-
-            for (i, v) in expected.enumerate() {
-                assert_eq!(col.value(i), v * 100_i128);
-            }
-        }
-    }
-
-    /// Parameters for single_column_reader_test
-    #[derive(Debug)]
-    struct TestOptions {
-        /// Number of row group to write to parquet (row group size =
-        /// num_row_groups / num_rows)
-        num_row_groups: usize,
-        /// Total number of rows
-        num_rows: usize,
-        /// Size of batches to read back
-        record_batch_size: usize,
-        /// Total number of batches to attempt to read.
-        /// `record_batch_size` * `num_iterations` should be greater
-        /// than `num_rows` to ensure the data can be read back completely
-        num_iterations: usize,
-    }
-
-    /// Create a parquet file and then read it using
-    /// `ParquetFileArrowReader` using a standard set of parameters
-    /// `opts`.
-    ///
-    /// `rand_max` represents the maximum size of value to pass to to
-    /// value generator
-    fn run_single_column_reader_tests<T, A, C, G>(
-        rand_max: i32,
-        message_type: &str,
-        converter: &C,
-    ) where
-        T: DataType,
-        G: RandGen<T>,
-        A: PartialEq + Array + 'static,
-        C: Converter<Vec<Option<T::T>>, A> + 'static,
-    {
-        let all_options = vec![
-            // choose record_batch_batch (15) so batches cross row
-            // group boundaries (50 rows in 2 row groups) cases.
-            TestOptions {
-                num_row_groups: 2,
-                num_rows: 100,
-                record_batch_size: 15,
-                num_iterations: 50,
-            },
-            // choose record_batch_batch (5) so batches sometime fall
-            // on row group boundaries and (25 rows in 3 row groups
-            // --> row groups of 10, 10, and 5). Tests buffer
-            // refilling edge cases.
-            TestOptions {
-                num_row_groups: 3,
-                num_rows: 25,
-                record_batch_size: 5,
-                num_iterations: 50,
-            },
-            // Choose record_batch_size (25) so all batches fall
-            // exactly on row group boundary (25). Tests buffer
-            // refilling edge cases.
-            TestOptions {
-                num_row_groups: 4,
-                num_rows: 100,
-                record_batch_size: 25,
-                num_iterations: 50,
-            },
-        ];
-
-        all_options.into_iter().for_each(|opts| {
-            // Print out options to facilitate debugging failures on CI
-            println!("Running with Test Options: {:?}", opts);
-            single_column_reader_test::<T, A, C, G>(
-                opts,
-                rand_max,
-                message_type,
-                converter,
-            )
-        });
-    }
-
-    /// Create a parquet file and then read it using
-    /// `ParquetFileArrowReader` using the parameters described in
-    /// `opts`.
-    fn single_column_reader_test<T, A, C, G>(
-        opts: TestOptions,
-        rand_max: i32,
-        message_type: &str,
-        converter: &C,
-    ) where
-        T: DataType,
-        G: RandGen<T>,
-        A: PartialEq + Array + 'static,
-        C: Converter<Vec<Option<T::T>>, A> + 'static,
-    {
-        let values: Vec<Vec<T::T>> = (0..opts.num_row_groups)
-            .map(|_| G::gen_vec(rand_max, opts.num_rows))
-            .collect();
-
-        let path = get_temp_filename();
-
-        let schema = parse_message_type(message_type).map(Arc::new).unwrap();
-
-        generate_single_column_file_with_data::<T>(&values, path.as_path(), schema)
-            .unwrap();
-
-        let parquet_reader =
-            SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-
-        let mut record_reader = arrow_reader
-            .get_record_reader(opts.record_batch_size)
-            .unwrap();
-
-        let expected_data: Vec<Option<T::T>> = values
-            .iter()
-            .flat_map(|v| v.iter())
-            .map(|b| Some(b.clone()))
-            .collect();
-
-        for i in 0..opts.num_iterations {
-            let start = i * opts.record_batch_size;
-
-            let batch = record_reader.next();
-            if start < expected_data.len() {
-                let end = min(start + opts.record_batch_size, expected_data.len());
-                assert!(batch.is_some());
-
-                let mut data = vec![];
-                data.extend_from_slice(&expected_data[start..end]);
-
-                assert_eq!(
-                    &converter.convert(data).unwrap(),
-                    batch
-                        .unwrap()
-                        .unwrap()
-                        .column(0)
-                        .as_any()
-                        .downcast_ref::<A>()
-                        .unwrap()
-                );
-            } else {
-                assert!(batch.is_none());
-            }
-        }
-    }
-
-    fn generate_single_column_file_with_data<T: DataType>(
-        values: &[Vec<T::T>],
-        path: &Path,
-        schema: TypePtr,
-    ) -> Result<parquet_format::FileMetaData> {
-        let file = File::create(path)?;
-        let writer_props = Arc::new(WriterProperties::builder().build());
-
-        let mut writer = SerializedFileWriter::new(file, schema, writer_props)?;
-
-        for v in values {
-            let mut row_group_writer = writer.next_row_group()?;
-            let mut column_writer = row_group_writer
-                .next_column()?
-                .expect("Column writer is none!");
-
-            get_typed_column_writer_mut::<T>(&mut column_writer)
-                .write_batch(v, None, None)?;
-
-            row_group_writer.close_column(column_writer)?;
-            writer.close_row_group(row_group_writer)?
-        }
-
-        writer.close()
-    }
-
-    fn get_test_reader(file_name: &str) -> Arc<dyn FileReader> {
-        let file = get_test_file(file_name);
-
-        let reader =
-            SerializedFileReader::new(file).expect("Failed to create serialized reader");
-
-        Arc::new(reader)
-    }
-
-    fn get_test_file(file_name: &str) -> File {
-        let mut path = PathBuf::new();
-        path.push(arrow::util::test_util::arrow_test_data());
-        path.push(file_name);
-
-        File::open(path.as_path()).expect("File not found!")
-    }
-
-    fn get_json_array(filename: &str) -> Vec<serde_json::Value> {
-        match serde_json::from_reader(get_test_file(filename))
-            .expect("Failed to read json value from file!")
-        {
-            JArray(values) => values,
-            _ => panic!("Input should be json array!"),
-        }
-    }
-
-    fn compare_batch_json(
-        record_batch_reader: &mut dyn RecordBatchReader,
-        json_values: Vec<serde_json::Value>,
-        max_len: usize,
-    ) {
-        for i in 0..20 {
-            let array: Option<StructArray> = record_batch_reader
-                .next()
-                .map(|r| r.expect("Failed to read record batch!").into());
-
-            let (start, end) = (i * 60_usize, (i + 1) * 60_usize);
-
-            if start < max_len {
-                assert!(array.is_some());
-                assert_ne!(0, array.as_ref().unwrap().len());
-                let end = min(end, max_len);
-                let json = JArray(Vec::from(&json_values[start..end]));
-                assert_eq!(array.unwrap(), json)
-            } else {
-                assert!(array.is_none());
-            }
-        }
-    }
-
-    #[test]
-    fn test_read_structs() {
-        // This particular test file has columns of struct types where there is
-        // a column that has the same name as one of the struct fields
-        // (see: ARROW-11452)
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let path = format!("{}/nested_structs.rust.parquet", testdata);
-        let parquet_file_reader =
-            SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_file_reader));
-        let record_batch_reader = arrow_reader
-            .get_record_reader(60)
-            .expect("Failed to read into array!");
-
-        for batch in record_batch_reader {
-            batch.unwrap();
-        }
-    }
-}
diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
deleted file mode 100644
index 5716aaeacb7..00000000000
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ /dev/null
@@ -1,1402 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains writer which writes arrow data into parquet data.
-
-use std::sync::Arc;
-
-use arrow::array as arrow_array;
-use arrow::datatypes::{DataType as ArrowDataType, IntervalUnit, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use arrow_array::Array;
-
-use super::levels::LevelInfo;
-use super::schema::{
-    add_encoded_arrow_schema_to_metadata, decimal_length_from_precision,
-};
-
-use crate::column::writer::ColumnWriter;
-use crate::errors::{ParquetError, Result};
-use crate::file::properties::WriterProperties;
-use crate::{
-    data_type::*,
-    file::writer::{FileWriter, ParquetWriter, RowGroupWriter, SerializedFileWriter},
-};
-
-/// Arrow writer
-///
-/// Writes Arrow `RecordBatch`es to a Parquet writer
-pub struct ArrowWriter<W: ParquetWriter> {
-    /// Underlying Parquet writer
-    writer: SerializedFileWriter<W>,
-    /// A copy of the Arrow schema.
-    ///
-    /// The schema is used to verify that each record batch written has the correct schema
-    arrow_schema: SchemaRef,
-}
-
-impl<W: 'static + ParquetWriter> ArrowWriter<W> {
-    /// Try to create a new Arrow writer
-    ///
-    /// The writer will fail if:
-    ///  * a `SerializedFileWriter` cannot be created from the ParquetWriter
-    ///  * the Arrow schema contains unsupported datatypes such as Unions
-    pub fn try_new(
-        writer: W,
-        arrow_schema: SchemaRef,
-        props: Option<WriterProperties>,
-    ) -> Result<Self> {
-        let schema = crate::arrow::arrow_to_parquet_schema(&arrow_schema)?;
-        // add serialized arrow schema
-        let mut props = props.unwrap_or_else(|| WriterProperties::builder().build());
-        add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props);
-
-        let file_writer = SerializedFileWriter::new(
-            writer.try_clone()?,
-            schema.root_schema_ptr(),
-            Arc::new(props),
-        )?;
-
-        Ok(Self {
-            writer: file_writer,
-            arrow_schema,
-        })
-    }
-
-    /// Write a RecordBatch to writer
-    ///
-    /// *NOTE:* The writer currently does not support all Arrow data types
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        // validate batch schema against writer's supplied schema
-        if self.arrow_schema != batch.schema() {
-            return Err(ParquetError::ArrowError(
-                "Record batch schema does not match writer schema".to_string(),
-            ));
-        }
-        // compute the definition and repetition levels of the batch
-        let batch_level = LevelInfo::new_from_batch(batch);
-        let mut row_group_writer = self.writer.next_row_group()?;
-        for (array, field) in batch.columns().iter().zip(batch.schema().fields()) {
-            let mut levels = batch_level.calculate_array_levels(array, field);
-            write_leaves(&mut row_group_writer, array, &mut levels)?;
-        }
-
-        self.writer.close_row_group(row_group_writer)
-    }
-
-    /// Close and finalize the underlying Parquet writer
-    pub fn close(&mut self) -> Result<parquet_format::FileMetaData> {
-        self.writer.close()
-    }
-}
-
-/// Convenience method to get the next ColumnWriter from the RowGroupWriter
-#[inline]
-#[allow(clippy::borrowed_box)]
-fn get_col_writer(
-    row_group_writer: &mut Box<dyn RowGroupWriter>,
-) -> Result<ColumnWriter> {
-    let col_writer = row_group_writer
-        .next_column()?
-        .expect("Unable to get column writer");
-    Ok(col_writer)
-}
-
-#[allow(clippy::borrowed_box)]
-fn write_leaves(
-    mut row_group_writer: &mut Box<dyn RowGroupWriter>,
-    array: &arrow_array::ArrayRef,
-    mut levels: &mut Vec<LevelInfo>,
-) -> Result<()> {
-    match array.data_type() {
-        ArrowDataType::Null
-        | ArrowDataType::Boolean
-        | ArrowDataType::Int8
-        | ArrowDataType::Int16
-        | ArrowDataType::Int32
-        | ArrowDataType::Int64
-        | ArrowDataType::UInt8
-        | ArrowDataType::UInt16
-        | ArrowDataType::UInt32
-        | ArrowDataType::UInt64
-        | ArrowDataType::Float32
-        | ArrowDataType::Float64
-        | ArrowDataType::Timestamp(_, _)
-        | ArrowDataType::Date32
-        | ArrowDataType::Date64
-        | ArrowDataType::Time32(_)
-        | ArrowDataType::Time64(_)
-        | ArrowDataType::Duration(_)
-        | ArrowDataType::Interval(_)
-        | ArrowDataType::LargeBinary
-        | ArrowDataType::Binary
-        | ArrowDataType::Utf8
-        | ArrowDataType::LargeUtf8
-        | ArrowDataType::Decimal(_, _)
-        | ArrowDataType::FixedSizeBinary(_) => {
-            let mut col_writer = get_col_writer(&mut row_group_writer)?;
-            write_leaf(
-                &mut col_writer,
-                array,
-                levels.pop().expect("Levels exhausted"),
-            )?;
-            row_group_writer.close_column(col_writer)?;
-            Ok(())
-        }
-        ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
-            // write the child list
-            let data = array.data();
-            let child_array = arrow_array::make_array(data.child_data()[0].clone());
-            write_leaves(&mut row_group_writer, &child_array, &mut levels)?;
-            Ok(())
-        }
-        ArrowDataType::Struct(_) => {
-            let struct_array: &arrow_array::StructArray = array
-                .as_any()
-                .downcast_ref::<arrow_array::StructArray>()
-                .expect("Unable to get struct array");
-            for field in struct_array.columns() {
-                write_leaves(&mut row_group_writer, field, &mut levels)?;
-            }
-            Ok(())
-        }
-        ArrowDataType::Dictionary(_, value_type) => {
-            // cast dictionary to a primitive
-            let array = arrow::compute::cast(array, value_type)?;
-
-            let mut col_writer = get_col_writer(&mut row_group_writer)?;
-            write_leaf(
-                &mut col_writer,
-                &array,
-                levels.pop().expect("Levels exhausted"),
-            )?;
-            row_group_writer.close_column(col_writer)?;
-            Ok(())
-        }
-        ArrowDataType::Float16 => Err(ParquetError::ArrowError(
-            "Float16 arrays not supported".to_string(),
-        )),
-        ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => {
-            Err(ParquetError::NYI(
-                format!(
-                    "Attempting to write an Arrow type {:?} to parquet that is not yet implemented", 
-                    array.data_type()
-                )
-            ))
-        }
-    }
-}
-
-fn write_leaf(
-    writer: &mut ColumnWriter,
-    column: &arrow_array::ArrayRef,
-    levels: LevelInfo,
-) -> Result<i64> {
-    let indices = levels.filter_array_indices();
-    let written = match writer {
-        ColumnWriter::Int32ColumnWriter(ref mut typed) => {
-            // If the column is a Date64, we cast it to a Date32, and then interpret that as Int32
-            let array = if let ArrowDataType::Date64 = column.data_type() {
-                let array = arrow::compute::cast(column, &ArrowDataType::Date32)?;
-                arrow::compute::cast(&array, &ArrowDataType::Int32)?
-            } else {
-                arrow::compute::cast(column, &ArrowDataType::Int32)?
-            };
-            let array = array
-                .as_any()
-                .downcast_ref::<arrow_array::Int32Array>()
-                .expect("Unable to get int32 array");
-            typed.write_batch(
-                get_numeric_array_slice::<Int32Type, _>(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::BoolColumnWriter(ref mut typed) => {
-            let array = column
-                .as_any()
-                .downcast_ref::<arrow_array::BooleanArray>()
-                .expect("Unable to get boolean array");
-            typed.write_batch(
-                get_bool_array_slice(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::Int64ColumnWriter(ref mut typed) => {
-            let values = match column.data_type() {
-                ArrowDataType::Int64 => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::Int64Array>()
-                        .expect("Unable to get i64 array");
-                    get_numeric_array_slice::<Int64Type, _>(&array, &indices)
-                }
-                _ => {
-                    let array = arrow::compute::cast(column, &ArrowDataType::Int64)?;
-                    let array = array
-                        .as_any()
-                        .downcast_ref::<arrow_array::Int64Array>()
-                        .expect("Unable to get i64 array");
-                    get_numeric_array_slice::<Int64Type, _>(&array, &indices)
-                }
-            };
-            typed.write_batch(
-                values.as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::Int96ColumnWriter(ref mut _typed) => {
-            unreachable!("Currently unreachable because data type not supported")
-        }
-        ColumnWriter::FloatColumnWriter(ref mut typed) => {
-            let array = column
-                .as_any()
-                .downcast_ref::<arrow_array::Float32Array>()
-                .expect("Unable to get Float32 array");
-            typed.write_batch(
-                get_numeric_array_slice::<FloatType, _>(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::DoubleColumnWriter(ref mut typed) => {
-            let array = column
-                .as_any()
-                .downcast_ref::<arrow_array::Float64Array>()
-                .expect("Unable to get Float64 array");
-            typed.write_batch(
-                get_numeric_array_slice::<DoubleType, _>(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::ByteArrayColumnWriter(ref mut typed) => match column.data_type() {
-            ArrowDataType::Binary => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::BinaryArray>()
-                    .expect("Unable to get BinaryArray array");
-                typed.write_batch(
-                    get_binary_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            ArrowDataType::Utf8 => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::StringArray>()
-                    .expect("Unable to get LargeBinaryArray array");
-                typed.write_batch(
-                    get_string_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            ArrowDataType::LargeBinary => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::LargeBinaryArray>()
-                    .expect("Unable to get LargeBinaryArray array");
-                typed.write_batch(
-                    get_large_binary_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            ArrowDataType::LargeUtf8 => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::LargeStringArray>()
-                    .expect("Unable to get LargeUtf8 array");
-                typed.write_batch(
-                    get_large_string_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            _ => unreachable!("Currently unreachable because data type not supported"),
-        },
-        ColumnWriter::FixedLenByteArrayColumnWriter(ref mut typed) => {
-            let bytes = match column.data_type() {
-                ArrowDataType::Interval(interval_unit) => match interval_unit {
-                    IntervalUnit::YearMonth => {
-                        let array = column
-                            .as_any()
-                            .downcast_ref::<arrow_array::IntervalYearMonthArray>()
-                            .unwrap();
-                        get_interval_ym_array_slice(&array, &indices)
-                    }
-                    IntervalUnit::DayTime => {
-                        let array = column
-                            .as_any()
-                            .downcast_ref::<arrow_array::IntervalDayTimeArray>()
-                            .unwrap();
-                        get_interval_dt_array_slice(&array, &indices)
-                    }
-                },
-                ArrowDataType::FixedSizeBinary(_) => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::FixedSizeBinaryArray>()
-                        .unwrap();
-                    get_fsb_array_slice(&array, &indices)
-                }
-                ArrowDataType::Decimal(_, _) => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::DecimalArray>()
-                        .unwrap();
-                    get_decimal_array_slice(&array, &indices)
-                }
-                _ => {
-                    return Err(ParquetError::NYI(
-                        "Attempting to write an Arrow type that is not yet implemented"
-                            .to_string(),
-                    ));
-                }
-            };
-            typed.write_batch(
-                bytes.as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-    };
-    Ok(written as i64)
-}
-
-macro_rules! def_get_binary_array_fn {
-    ($name:ident, $ty:ty) => {
-        fn $name(array: &$ty) -> Vec<ByteArray> {
-            let mut values = Vec::with_capacity(array.len() - array.null_count());
-            for i in 0..array.len() {
-                if array.is_valid(i) {
-                    let bytes: Vec<u8> = array.value(i).into();
-                    let bytes = ByteArray::from(bytes);
-                    values.push(bytes);
-                }
-            }
-            values
-        }
-    };
-}
-
-def_get_binary_array_fn!(get_binary_array, arrow_array::BinaryArray);
-def_get_binary_array_fn!(get_string_array, arrow_array::StringArray);
-def_get_binary_array_fn!(get_large_binary_array, arrow_array::LargeBinaryArray);
-def_get_binary_array_fn!(get_large_string_array, arrow_array::LargeStringArray);
-
-/// Get the underlying numeric array slice, skipping any null values.
-/// If there are no null values, it might be quicker to get the slice directly instead of
-/// calling this function.
-fn get_numeric_array_slice<T, A>(
-    array: &arrow_array::PrimitiveArray<A>,
-    indices: &[usize],
-) -> Vec<T::T>
-where
-    T: DataType,
-    A: arrow::datatypes::ArrowNumericType,
-    T::T: From<A::Native>,
-{
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        values.push(array.value(*i).into())
-    }
-    values
-}
-
-fn get_bool_array_slice(
-    array: &arrow_array::BooleanArray,
-    indices: &[usize],
-) -> Vec<bool> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        values.push(array.value(*i))
-    }
-    values
-}
-
-/// Returns 12-byte values representing 3 values of months, days and milliseconds (4-bytes each).
-/// An Arrow YearMonth interval only stores months, thus only the first 4 bytes are populated.
-fn get_interval_ym_array_slice(
-    array: &arrow_array::IntervalYearMonthArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        let mut value = array.value(*i).to_le_bytes().to_vec();
-        let mut suffix = vec![0; 8];
-        value.append(&mut suffix);
-        values.push(FixedLenByteArray::from(ByteArray::from(value)))
-    }
-    values
-}
-
-/// Returns 12-byte values representing 3 values of months, days and milliseconds (4-bytes each).
-/// An Arrow DayTime interval only stores days and millis, thus the first 4 bytes are not populated.
-fn get_interval_dt_array_slice(
-    array: &arrow_array::IntervalDayTimeArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        let mut prefix = vec![0; 4];
-        let mut value = array.value(*i).to_le_bytes().to_vec();
-        prefix.append(&mut value);
-        debug_assert_eq!(prefix.len(), 12);
-        values.push(FixedLenByteArray::from(ByteArray::from(prefix)));
-    }
-    values
-}
-
-fn get_decimal_array_slice(
-    array: &arrow_array::DecimalArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    let size = decimal_length_from_precision(array.precision());
-    for i in indices {
-        let as_be_bytes = array.value(*i).to_be_bytes();
-        let resized_value = as_be_bytes[(16 - size)..].to_vec();
-        values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
-    }
-    values
-}
-
-fn get_fsb_array_slice(
-    array: &arrow_array::FixedSizeBinaryArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        let value = array.value(*i).to_vec();
-        values.push(FixedLenByteArray::from(ByteArray::from(value)))
-    }
-    values
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::io::Seek;
-    use std::sync::Arc;
-
-    use arrow::datatypes::ToByteSlice;
-    use arrow::datatypes::{DataType, Field, Schema, UInt32Type, UInt8Type};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, buffer::Buffer};
-
-    use crate::arrow::{ArrowReader, ParquetFileArrowReader};
-    use crate::file::{reader::SerializedFileReader, writer::InMemoryWriteableCursor};
-    use crate::util::test_common::get_temp_file;
-
-    #[test]
-    fn arrow_writer() {
-        // define schema
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-        ]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-
-        // build a record batch
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(a), Arc::new(b)],
-        )
-        .unwrap();
-
-        let file = get_temp_file("test_arrow_writer.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(file, Arc::new(schema), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-    }
-
-    #[test]
-    fn roundtrip_bytes() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-
-        // build a record batch
-        let expected_batch =
-            RecordBatch::try_new(schema.clone(), vec![Arc::new(a), Arc::new(b)]).unwrap();
-
-        let cursor = InMemoryWriteableCursor::default();
-
-        {
-            let mut writer = ArrowWriter::try_new(cursor.clone(), schema, None).unwrap();
-            writer.write(&expected_batch).unwrap();
-            writer.close().unwrap();
-        }
-
-        let buffer = cursor.into_inner().unwrap();
-
-        let cursor = crate::file::serialized_reader::SliceableCursor::new(buffer);
-        let reader = SerializedFileReader::new(cursor).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let actual_batch = record_batch_reader
-            .next()
-            .expect("No batch found")
-            .expect("Unable to get batch");
-
-        assert_eq!(expected_batch.schema(), actual_batch.schema());
-        assert_eq!(expected_batch.num_columns(), actual_batch.num_columns());
-        assert_eq!(expected_batch.num_rows(), actual_batch.num_rows());
-        for i in 0..expected_batch.num_columns() {
-            let expected_data = expected_batch.column(i).data().clone();
-            let actual_data = actual_batch.column(i).data().clone();
-
-            assert_eq!(expected_data, actual_data);
-        }
-    }
-
-    #[test]
-    fn arrow_writer_non_null() {
-        // define schema
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-        // build a record batch
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)]).unwrap();
-
-        let file = get_temp_file("test_arrow_writer_non_null.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(file, Arc::new(schema), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-    }
-
-    #[test]
-    fn arrow_writer_list() {
-        // define schema
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-            false,
-        )]);
-
-        // create some data
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[1], [2, 3], null, [4, 5, 6], [7, 8, 9, 10]]
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-
-        // Construct a list array from the above two
-        let a_list_data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(5)
-        .add_buffer(a_value_offsets)
-        .add_child_data(a_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00011011]))
-        .build();
-        let a = ListArray::from(a_list_data);
-
-        // build a record batch
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)]).unwrap();
-
-        // I think this setup is incorrect because this should pass
-        assert_eq!(batch.column(0).data().null_count(), 1);
-
-        let file = get_temp_file("test_arrow_writer_list.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(file, Arc::new(schema), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-    }
-
-    #[test]
-    fn arrow_writer_binary() {
-        let string_field = Field::new("a", DataType::Utf8, false);
-        let binary_field = Field::new("b", DataType::Binary, false);
-        let schema = Schema::new(vec![string_field, binary_field]);
-
-        let raw_string_values = vec!["foo", "bar", "baz", "quux"];
-        let raw_binary_values = vec![
-            b"foo".to_vec(),
-            b"bar".to_vec(),
-            b"baz".to_vec(),
-            b"quux".to_vec(),
-        ];
-        let raw_binary_value_refs = raw_binary_values
-            .iter()
-            .map(|x| x.as_slice())
-            .collect::<Vec<_>>();
-
-        let string_values = StringArray::from(raw_string_values.clone());
-        let binary_values = BinaryArray::from(raw_binary_value_refs);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(string_values), Arc::new(binary_values)],
-        )
-        .unwrap();
-
-        let mut file = get_temp_file("test_arrow_writer_binary.parquet", &[]);
-        let mut writer =
-            ArrowWriter::try_new(file.try_clone().unwrap(), Arc::new(schema), None)
-                .unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-
-        file.seek(std::io::SeekFrom::Start(0)).unwrap();
-        let file_reader = SerializedFileReader::new(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let batch = record_batch_reader.next().unwrap().unwrap();
-        let string_col = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        let binary_col = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<BinaryArray>()
-            .unwrap();
-
-        for i in 0..batch.num_rows() {
-            assert_eq!(string_col.value(i), raw_string_values[i]);
-            assert_eq!(binary_col.value(i), raw_binary_values[i].as_slice());
-        }
-    }
-
-    #[test]
-    fn arrow_writer_decimal() {
-        let decimal_field = Field::new("a", DataType::Decimal(5, 2), false);
-        let schema = Schema::new(vec![decimal_field]);
-
-        let mut dec_builder = DecimalBuilder::new(4, 5, 2);
-        dec_builder.append_value(10_000).unwrap();
-        dec_builder.append_value(50_000).unwrap();
-        dec_builder.append_value(0).unwrap();
-        dec_builder.append_value(-100).unwrap();
-
-        let raw_decimal_i128_values: Vec<i128> = vec![10_000, 50_000, 0, -100];
-        let decimal_values = dec_builder.finish();
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(decimal_values)],
-        )
-        .unwrap();
-
-        let mut file = get_temp_file("test_arrow_writer_decimal.parquet", &[]);
-        let mut writer =
-            ArrowWriter::try_new(file.try_clone().unwrap(), Arc::new(schema), None)
-                .unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-
-        file.seek(std::io::SeekFrom::Start(0)).unwrap();
-        let file_reader = SerializedFileReader::new(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let batch = record_batch_reader.next().unwrap().unwrap();
-        let decimal_col = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<DecimalArray>()
-            .unwrap();
-
-        for i in 0..batch.num_rows() {
-            assert_eq!(decimal_col.value(i), raw_decimal_i128_values[i]);
-        }
-    }
-
-    #[test]
-    #[ignore = "See ARROW-11294, data is correct but list field name is incorrect"]
-    fn arrow_writer_complex() {
-        // define schema
-        let struct_field_d = Field::new("d", DataType::Float64, true);
-        let struct_field_f = Field::new("f", DataType::Float32, true);
-        let struct_field_g = Field::new(
-            "g",
-            DataType::List(Box::new(Field::new("item", DataType::Int16, true))),
-            true,
-        );
-        let struct_field_e = Field::new(
-            "e",
-            DataType::Struct(vec![struct_field_f.clone(), struct_field_g.clone()]),
-            true,
-        );
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-            Field::new(
-                "c",
-                DataType::Struct(vec![struct_field_d.clone(), struct_field_e.clone()]),
-                true, // NB: this test fails if value is false. Why?
-            ),
-        ]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-        let d = Float64Array::from(vec![None, None, None, Some(1.0), None]);
-        let f = Float32Array::from(vec![Some(0.0), None, Some(333.3), None, Some(5.25)]);
-
-        let g_value = Int16Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[1], [2, 3], [], [4, 5, 6], [7, 8, 9, 10]]
-        let g_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-
-        // Construct a list array from the above two
-        let g_list_data = ArrayData::builder(struct_field_g.data_type().clone())
-            .len(5)
-            .add_buffer(g_value_offsets)
-            .add_child_data(g_value.data().clone())
-            // .null_bit_buffer(Buffer::from(vec![0b00011011])) // TODO: add to test after resolving other issues
-            .build();
-        let g = ListArray::from(g_list_data);
-
-        let e = StructArray::from(vec![
-            (struct_field_f, Arc::new(f) as ArrayRef),
-            (struct_field_g, Arc::new(g) as ArrayRef),
-        ]);
-
-        let c = StructArray::from(vec![
-            (struct_field_d, Arc::new(d) as ArrayRef),
-            (struct_field_e, Arc::new(e) as ArrayRef),
-        ]);
-
-        // build a record batch
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![Arc::new(a), Arc::new(b), Arc::new(c)],
-        )
-        .unwrap();
-
-        roundtrip("test_arrow_writer_complex.parquet", batch);
-    }
-
-    #[test]
-    fn arrow_writer_2_level_struct() {
-        // tests writing <struct<struct<primitive>>
-        let field_c = Field::new("c", DataType::Int32, true);
-        let field_b = Field::new("b", DataType::Struct(vec![field_c]), true);
-        let field_a = Field::new("a", DataType::Struct(vec![field_b.clone()]), true);
-        let schema = Schema::new(vec![field_a.clone()]);
-
-        // create data
-        let c = Int32Array::from(vec![Some(1), None, Some(3), None, None, Some(6)]);
-        let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
-            .len(6)
-            .null_bit_buffer(Buffer::from(vec![0b00100111]))
-            .add_child_data(c.data().clone())
-            .build();
-        let b = StructArray::from(b_data);
-        let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
-            .len(6)
-            .null_bit_buffer(Buffer::from(vec![0b00101111]))
-            .add_child_data(b.data().clone())
-            .build();
-        let a = StructArray::from(a_data);
-
-        assert_eq!(a.null_count(), 1);
-        assert_eq!(a.column(0).null_count(), 2);
-
-        // build a racord batch
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-
-        roundtrip("test_arrow_writer_2_level_struct.parquet", batch);
-    }
-
-    #[test]
-    fn arrow_writer_2_level_struct_non_null() {
-        // tests writing <struct<struct<primitive>>
-        let field_c = Field::new("c", DataType::Int32, false);
-        let field_b = Field::new("b", DataType::Struct(vec![field_c]), false);
-        let field_a = Field::new("a", DataType::Struct(vec![field_b.clone()]), false);
-        let schema = Schema::new(vec![field_a.clone()]);
-
-        // create data
-        let c = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
-        let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
-            .len(6)
-            .add_child_data(c.data().clone())
-            .build();
-        let b = StructArray::from(b_data);
-        let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
-            .len(6)
-            .add_child_data(b.data().clone())
-            .build();
-        let a = StructArray::from(a_data);
-
-        assert_eq!(a.null_count(), 0);
-        assert_eq!(a.column(0).null_count(), 0);
-
-        // build a racord batch
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-
-        roundtrip("test_arrow_writer_2_level_struct_non_null.parquet", batch);
-    }
-
-    #[test]
-    #[ignore = "The levels generated are correct, but because of field_a being non-nullable, we cannot write record"]
-    fn arrow_writer_2_level_struct_mixed_null() {
-        // tests writing <struct<struct<primitive>>
-        let field_c = Field::new("c", DataType::Int32, false);
-        let field_b = Field::new("b", DataType::Struct(vec![field_c]), true);
-        let field_a = Field::new("a", DataType::Struct(vec![field_b.clone()]), false);
-        let schema = Schema::new(vec![field_a.clone()]);
-
-        // create data
-        let c = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
-        let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
-            .len(6)
-            .null_bit_buffer(Buffer::from(vec![0b00100111]))
-            .add_child_data(c.data().clone())
-            .build();
-        let b = StructArray::from(b_data);
-        // a intentionally has no null buffer, to test that this is handled correctly
-        let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
-            .len(6)
-            .add_child_data(b.data().clone())
-            .build();
-        let a = StructArray::from(a_data);
-
-        assert_eq!(a.null_count(), 0);
-        assert_eq!(a.column(0).null_count(), 2);
-
-        // build a racord batch
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-
-        roundtrip("test_arrow_writer_2_level_struct_mixed_null.parquet", batch);
-    }
-
-    const SMALL_SIZE: usize = 4;
-
-    fn roundtrip(filename: &str, expected_batch: RecordBatch) {
-        let file = get_temp_file(filename, &[]);
-
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            expected_batch.schema(),
-            None,
-        )
-        .expect("Unable to write file");
-        writer.write(&expected_batch).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let actual_batch = record_batch_reader
-            .next()
-            .expect("No batch found")
-            .expect("Unable to get batch");
-
-        assert_eq!(expected_batch.schema(), actual_batch.schema());
-        assert_eq!(expected_batch.num_columns(), actual_batch.num_columns());
-        assert_eq!(expected_batch.num_rows(), actual_batch.num_rows());
-        for i in 0..expected_batch.num_columns() {
-            let expected_data = expected_batch.column(i).data();
-            let actual_data = actual_batch.column(i).data();
-
-            assert_eq!(expected_data, actual_data);
-            // assert_eq!(expected_data, actual_data, "L: {:#?}\nR: {:#?}", expected_data, actual_data);
-        }
-    }
-
-    fn one_column_roundtrip(filename: &str, values: ArrayRef, nullable: bool) {
-        let schema = Schema::new(vec![Field::new(
-            "col",
-            values.data_type().clone(),
-            nullable,
-        )]);
-        let expected_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
-
-        roundtrip(filename, expected_batch);
-    }
-
-    fn values_required<A, I>(iter: I, filename: &str)
-    where
-        A: From<Vec<I::Item>> + Array + 'static,
-        I: IntoIterator,
-    {
-        let raw_values: Vec<_> = iter.into_iter().collect();
-        let values = Arc::new(A::from(raw_values));
-        one_column_roundtrip(filename, values, false);
-    }
-
-    fn values_optional<A, I>(iter: I, filename: &str)
-    where
-        A: From<Vec<Option<I::Item>>> + Array + 'static,
-        I: IntoIterator,
-    {
-        let optional_raw_values: Vec<_> = iter
-            .into_iter()
-            .enumerate()
-            .map(|(i, v)| if i % 2 == 0 { None } else { Some(v) })
-            .collect();
-        let optional_values = Arc::new(A::from(optional_raw_values));
-        one_column_roundtrip(filename, optional_values, true);
-    }
-
-    fn required_and_optional<A, I>(iter: I, filename: &str)
-    where
-        A: From<Vec<I::Item>> + From<Vec<Option<I::Item>>> + Array + 'static,
-        I: IntoIterator + Clone,
-    {
-        values_required::<A, I>(iter.clone(), filename);
-        values_optional::<A, I>(iter, filename);
-    }
-
-    #[test]
-    fn all_null_primitive_single_column() {
-        let values = Arc::new(Int32Array::from(vec![None; SMALL_SIZE]));
-        one_column_roundtrip("all_null_primitive_single_column", values, true);
-    }
-    #[test]
-    fn null_single_column() {
-        let values = Arc::new(NullArray::new(SMALL_SIZE));
-        one_column_roundtrip("null_single_column", values, true);
-        // null arrays are always nullable, a test with non-nullable nulls fails
-    }
-
-    #[test]
-    fn bool_single_column() {
-        required_and_optional::<BooleanArray, _>(
-            [true, false].iter().cycle().copied().take(SMALL_SIZE),
-            "bool_single_column",
-        );
-    }
-
-    #[test]
-    fn i8_single_column() {
-        required_and_optional::<Int8Array, _>(0..SMALL_SIZE as i8, "i8_single_column");
-    }
-
-    #[test]
-    fn i16_single_column() {
-        required_and_optional::<Int16Array, _>(0..SMALL_SIZE as i16, "i16_single_column");
-    }
-
-    #[test]
-    fn i32_single_column() {
-        required_and_optional::<Int32Array, _>(0..SMALL_SIZE as i32, "i32_single_column");
-    }
-
-    #[test]
-    fn i64_single_column() {
-        required_and_optional::<Int64Array, _>(0..SMALL_SIZE as i64, "i64_single_column");
-    }
-
-    #[test]
-    fn u8_single_column() {
-        required_and_optional::<UInt8Array, _>(0..SMALL_SIZE as u8, "u8_single_column");
-    }
-
-    #[test]
-    fn u16_single_column() {
-        required_and_optional::<UInt16Array, _>(
-            0..SMALL_SIZE as u16,
-            "u16_single_column",
-        );
-    }
-
-    #[test]
-    fn u32_single_column() {
-        required_and_optional::<UInt32Array, _>(
-            0..SMALL_SIZE as u32,
-            "u32_single_column",
-        );
-    }
-
-    #[test]
-    fn u64_single_column() {
-        required_and_optional::<UInt64Array, _>(
-            0..SMALL_SIZE as u64,
-            "u64_single_column",
-        );
-    }
-
-    #[test]
-    fn f32_single_column() {
-        required_and_optional::<Float32Array, _>(
-            (0..SMALL_SIZE).map(|i| i as f32),
-            "f32_single_column",
-        );
-    }
-
-    #[test]
-    fn f64_single_column() {
-        required_and_optional::<Float64Array, _>(
-            (0..SMALL_SIZE).map(|i| i as f64),
-            "f64_single_column",
-        );
-    }
-
-    // The timestamp array types don't implement From<Vec<T>> because they need the timezone
-    // argument, and they also doesn't support building from a Vec<Option<T>>, so call
-    // one_column_roundtrip manually instead of calling required_and_optional for these tests.
-
-    #[test]
-    fn timestamp_second_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampSecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_second_single_column", values, false);
-    }
-
-    #[test]
-    fn timestamp_millisecond_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampMillisecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_millisecond_single_column", values, false);
-    }
-
-    #[test]
-    fn timestamp_microsecond_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampMicrosecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_microsecond_single_column", values, false);
-    }
-
-    #[test]
-    fn timestamp_nanosecond_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampNanosecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_nanosecond_single_column", values, false);
-    }
-
-    #[test]
-    fn date32_single_column() {
-        required_and_optional::<Date32Array, _>(
-            0..SMALL_SIZE as i32,
-            "date32_single_column",
-        );
-    }
-
-    #[test]
-    fn date64_single_column() {
-        // Date64 must be a multiple of 86400000, see ARROW-10925
-        required_and_optional::<Date64Array, _>(
-            (0..(SMALL_SIZE as i64 * 86400000)).step_by(86400000),
-            "date64_single_column",
-        );
-    }
-
-    #[test]
-    fn time32_second_single_column() {
-        required_and_optional::<Time32SecondArray, _>(
-            0..SMALL_SIZE as i32,
-            "time32_second_single_column",
-        );
-    }
-
-    #[test]
-    fn time32_millisecond_single_column() {
-        required_and_optional::<Time32MillisecondArray, _>(
-            0..SMALL_SIZE as i32,
-            "time32_millisecond_single_column",
-        );
-    }
-
-    #[test]
-    fn time64_microsecond_single_column() {
-        required_and_optional::<Time64MicrosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "time64_microsecond_single_column",
-        );
-    }
-
-    #[test]
-    fn time64_nanosecond_single_column() {
-        required_and_optional::<Time64NanosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "time64_nanosecond_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_second_single_column() {
-        required_and_optional::<DurationSecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_second_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_millisecond_single_column() {
-        required_and_optional::<DurationMillisecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_millisecond_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_microsecond_single_column() {
-        required_and_optional::<DurationMicrosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_microsecond_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_nanosecond_single_column() {
-        required_and_optional::<DurationNanosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_nanosecond_single_column",
-        );
-    }
-
-    #[test]
-    fn interval_year_month_single_column() {
-        required_and_optional::<IntervalYearMonthArray, _>(
-            0..SMALL_SIZE as i32,
-            "interval_year_month_single_column",
-        );
-    }
-
-    #[test]
-    fn interval_day_time_single_column() {
-        required_and_optional::<IntervalDayTimeArray, _>(
-            0..SMALL_SIZE as i64,
-            "interval_day_time_single_column",
-        );
-    }
-
-    #[test]
-    fn binary_single_column() {
-        let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
-        let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
-
-        // BinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required`
-        values_required::<BinaryArray, _>(many_vecs_iter, "binary_single_column");
-    }
-
-    #[test]
-    fn large_binary_single_column() {
-        let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
-        let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
-
-        // LargeBinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required`
-        values_required::<LargeBinaryArray, _>(
-            many_vecs_iter,
-            "large_binary_single_column",
-        );
-    }
-
-    #[test]
-    fn fixed_size_binary_single_column() {
-        let mut builder = FixedSizeBinaryBuilder::new(16, 4);
-        builder.append_value(b"0123").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"8910").unwrap();
-        builder.append_value(b"1112").unwrap();
-        let array = Arc::new(builder.finish());
-
-        one_column_roundtrip("timestamp_millisecond_single_column", array, true);
-    }
-
-    #[test]
-    fn string_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE).map(|i| i.to_string()).collect();
-        let raw_strs = raw_values.iter().map(|s| s.as_str());
-
-        required_and_optional::<StringArray, _>(raw_strs, "string_single_column");
-    }
-
-    #[test]
-    fn large_string_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE).map(|i| i.to_string()).collect();
-        let raw_strs = raw_values.iter().map(|s| s.as_str());
-
-        required_and_optional::<LargeStringArray, _>(
-            raw_strs,
-            "large_string_single_column",
-        );
-    }
-
-    #[test]
-    fn list_single_column() {
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-        let a_list_data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true, // TODO: why does this fail when false? Is it related to logical nulls?
-        ))))
-        .len(5)
-        .add_buffer(a_value_offsets)
-        .null_bit_buffer(Buffer::from(vec![0b00011011]))
-        .add_child_data(a_values.data().clone())
-        .build();
-
-        assert_eq!(a_list_data.null_count(), 1);
-
-        let a = ListArray::from(a_list_data);
-        let values = Arc::new(a);
-
-        one_column_roundtrip("list_single_column", values, true);
-    }
-
-    #[test]
-    fn large_list_single_column() {
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0i64, 1, 3, 3, 6, 10].to_byte_slice());
-        let a_list_data = ArrayData::builder(DataType::LargeList(Box::new(Field::new(
-            "large_item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(5)
-        .add_buffer(a_value_offsets)
-        .add_child_data(a_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00011011]))
-        .build();
-
-        // I think this setup is incorrect because this should pass
-        assert_eq!(a_list_data.null_count(), 1);
-
-        let a = LargeListArray::from(a_list_data);
-        let values = Arc::new(a);
-
-        one_column_roundtrip("large_list_single_column", values, true);
-    }
-
-    #[test]
-    fn struct_single_column() {
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let struct_field_a = Field::new("f", DataType::Int32, false);
-        let s = StructArray::from(vec![(struct_field_a, Arc::new(a_values) as ArrayRef)]);
-
-        let values = Arc::new(s);
-        one_column_roundtrip("struct_single_column", values, false);
-    }
-
-    #[test]
-    fn arrow_writer_string_dictionary() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![Field::new_dict(
-            "dictionary",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-            42,
-            true,
-        )]));
-
-        // create some data
-        let d: Int32DictionaryArray = [Some("alpha"), None, Some("beta"), Some("alpha")]
-            .iter()
-            .copied()
-            .collect();
-
-        // build a record batch
-        let expected_batch = RecordBatch::try_new(schema, vec![Arc::new(d)]).unwrap();
-
-        roundtrip(
-            "test_arrow_writer_string_dictionary.parquet",
-            expected_batch,
-        );
-    }
-
-    #[test]
-    fn arrow_writer_primitive_dictionary() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![Field::new_dict(
-            "dictionary",
-            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::UInt32)),
-            true,
-            42,
-            true,
-        )]));
-
-        // create some data
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        builder.append(12345678).unwrap();
-        builder.append_null().unwrap();
-        builder.append(22345678).unwrap();
-        builder.append(12345678).unwrap();
-        let d = builder.finish();
-
-        // build a record batch
-        let expected_batch = RecordBatch::try_new(schema, vec![Arc::new(d)]).unwrap();
-
-        roundtrip(
-            "test_arrow_writer_primitive_dictionary.parquet",
-            expected_batch,
-        );
-    }
-
-    #[test]
-    fn arrow_writer_string_dictionary_unsigned_index() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![Field::new_dict(
-            "dictionary",
-            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
-            true,
-            42,
-            true,
-        )]));
-
-        // create some data
-        let d: UInt8DictionaryArray = [Some("alpha"), None, Some("beta"), Some("alpha")]
-            .iter()
-            .copied()
-            .collect();
-
-        // build a record batch
-        let expected_batch = RecordBatch::try_new(schema, vec![Arc::new(d)]).unwrap();
-
-        roundtrip(
-            "test_arrow_writer_string_dictionary_unsigned_index.parquet",
-            expected_batch,
-        );
-    }
-}
diff --git a/rust/parquet/src/arrow/converter.rs b/rust/parquet/src/arrow/converter.rs
deleted file mode 100644
index 1672be9c046..00000000000
--- a/rust/parquet/src/arrow/converter.rs
+++ /dev/null
@@ -1,454 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data_type::{ByteArray, DataType, FixedLenByteArray, Int96};
-// TODO: clean up imports (best done when there are few moving parts)
-use arrow::array::{
-    Array, ArrayRef, BinaryBuilder, DecimalBuilder, FixedSizeBinaryBuilder,
-    IntervalDayTimeArray, IntervalDayTimeBuilder, IntervalYearMonthArray,
-    IntervalYearMonthBuilder, LargeBinaryBuilder, LargeStringBuilder, PrimitiveBuilder,
-    PrimitiveDictionaryBuilder, StringBuilder, StringDictionaryBuilder,
-};
-use arrow::compute::cast;
-use std::convert::{From, TryInto};
-use std::sync::Arc;
-
-use crate::errors::Result;
-use arrow::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType};
-
-use arrow::array::{
-    BinaryArray, DecimalArray, DictionaryArray, FixedSizeBinaryArray, LargeBinaryArray,
-    LargeStringArray, PrimitiveArray, StringArray, TimestampNanosecondArray,
-};
-use std::marker::PhantomData;
-
-use crate::data_type::Int32Type as ParquetInt32Type;
-use arrow::datatypes::Int32Type;
-
-/// A converter is used to consume record reader's content and convert it to arrow
-/// primitive array.
-pub trait Converter<S, T> {
-    /// This method converts record reader's buffered content into arrow array.
-    /// It will consume record reader's data, but will not reset record reader's
-    /// state.
-    fn convert(&self, source: S) -> Result<T>;
-}
-
-pub struct FixedSizeArrayConverter {
-    byte_width: i32,
-}
-
-impl FixedSizeArrayConverter {
-    pub fn new(byte_width: i32) -> Self {
-        Self { byte_width }
-    }
-}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, FixedSizeBinaryArray>
-    for FixedSizeArrayConverter
-{
-    fn convert(
-        &self,
-        source: Vec<Option<FixedLenByteArray>>,
-    ) -> Result<FixedSizeBinaryArray> {
-        let mut builder = FixedSizeBinaryBuilder::new(source.len(), self.byte_width);
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.data()),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct DecimalArrayConverter {
-    precision: i32,
-    scale: i32,
-}
-
-impl DecimalArrayConverter {
-    pub fn new(precision: i32, scale: i32) -> Self {
-        Self { precision, scale }
-    }
-
-    fn from_bytes_to_i128(b: &[u8]) -> i128 {
-        assert!(b.len() <= 16, "DecimalArray supports only up to size 16");
-        let first_bit = b[0] & 128u8 == 128u8;
-        let mut result = if first_bit { [255u8; 16] } else { [0u8; 16] };
-        for (i, v) in b.iter().enumerate() {
-            result[i + (16 - b.len())] = *v;
-        }
-        i128::from_be_bytes(result)
-    }
-}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, DecimalArray> for DecimalArrayConverter {
-    fn convert(&self, source: Vec<Option<FixedLenByteArray>>) -> Result<DecimalArray> {
-        let mut builder = DecimalBuilder::new(
-            source.len(),
-            self.precision as usize,
-            self.scale as usize,
-        );
-        for v in source {
-            match v {
-                Some(array) => {
-                    builder.append_value(Self::from_bytes_to_i128(array.data()))
-                }
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-/// An Arrow Interval converter, which reads the first 4 bytes of a Parquet interval,
-/// and interprets it as an i32 value representing the Arrow YearMonth value
-pub struct IntervalYearMonthArrayConverter {}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, IntervalYearMonthArray>
-    for IntervalYearMonthArrayConverter
-{
-    fn convert(
-        &self,
-        source: Vec<Option<FixedLenByteArray>>,
-    ) -> Result<IntervalYearMonthArray> {
-        let mut builder = IntervalYearMonthBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(i32::from_le_bytes(
-                    array.data()[0..4].try_into().unwrap(),
-                )),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-/// An Arrow Interval converter, which reads the last 8 bytes of a Parquet interval,
-/// and interprets it as an i32 value representing the Arrow DayTime value
-pub struct IntervalDayTimeArrayConverter {}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, IntervalDayTimeArray>
-    for IntervalDayTimeArrayConverter
-{
-    fn convert(
-        &self,
-        source: Vec<Option<FixedLenByteArray>>,
-    ) -> Result<IntervalDayTimeArray> {
-        let mut builder = IntervalDayTimeBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(i64::from_le_bytes(
-                    array.data()[4..12].try_into().unwrap(),
-                )),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct Int96ArrayConverter {
-    pub timezone: Option<String>,
-}
-
-impl Converter<Vec<Option<Int96>>, TimestampNanosecondArray> for Int96ArrayConverter {
-    fn convert(&self, source: Vec<Option<Int96>>) -> Result<TimestampNanosecondArray> {
-        Ok(TimestampNanosecondArray::from_opt_vec(
-            source
-                .into_iter()
-                .map(|int96| int96.map(|val| val.to_i64() * 1_000_000))
-                .collect(),
-            self.timezone.clone(),
-        ))
-    }
-}
-
-pub struct Utf8ArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, StringArray> for Utf8ArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<StringArray> {
-        let data_size = source
-            .iter()
-            .map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0))
-            .sum();
-
-        let mut builder = StringBuilder::with_capacity(source.len(), data_size);
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.as_utf8()?),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct LargeUtf8ArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, LargeStringArray> for LargeUtf8ArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeStringArray> {
-        let data_size = source
-            .iter()
-            .map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0))
-            .sum();
-
-        let mut builder = LargeStringBuilder::with_capacity(source.len(), data_size);
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.as_utf8()?),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct BinaryArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, BinaryArray> for BinaryArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<BinaryArray> {
-        let mut builder = BinaryBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.data()),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct LargeBinaryArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, LargeBinaryArray> for LargeBinaryArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeBinaryArray> {
-        let mut builder = LargeBinaryBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.data()),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct StringDictionaryArrayConverter {}
-
-impl<K: ArrowDictionaryKeyType> Converter<Vec<Option<ByteArray>>, DictionaryArray<K>>
-    for StringDictionaryArrayConverter
-{
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<DictionaryArray<K>> {
-        let data_size = source
-            .iter()
-            .map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0))
-            .sum();
-
-        let keys_builder = PrimitiveBuilder::<K>::new(source.len());
-        let values_builder = StringBuilder::with_capacity(source.len(), data_size);
-
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-        for v in source {
-            match v {
-                Some(array) => {
-                    let _ = builder.append(array.as_utf8()?)?;
-                }
-                None => builder.append_null()?,
-            }
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
-{
-    _dict_value_source_marker: PhantomData<DictValueSourceType>,
-    _dict_value_target_marker: PhantomData<DictValueTargetType>,
-    _parquet_marker: PhantomData<ParquetType>,
-}
-
-impl<DictValueSourceType, DictValueTargetType, ParquetType>
-    DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
-{
-    pub fn new() -> Self {
-        Self {
-            _dict_value_source_marker: PhantomData,
-            _dict_value_target_marker: PhantomData,
-            _parquet_marker: PhantomData,
-        }
-    }
-}
-
-impl<K, DictValueSourceType, DictValueTargetType, ParquetType>
-    Converter<Vec<Option<<ParquetType as DataType>::T>>, DictionaryArray<K>>
-    for DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
-where
-    K: ArrowPrimitiveType,
-    DictValueSourceType: ArrowPrimitiveType,
-    DictValueTargetType: ArrowPrimitiveType,
-    ParquetType: DataType,
-    PrimitiveArray<DictValueSourceType>: From<Vec<Option<<ParquetType as DataType>::T>>>,
-{
-    fn convert(
-        &self,
-        source: Vec<Option<<ParquetType as DataType>::T>>,
-    ) -> Result<DictionaryArray<K>> {
-        let keys_builder = PrimitiveBuilder::<K>::new(source.len());
-        let values_builder = PrimitiveBuilder::<DictValueTargetType>::new(source.len());
-
-        let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-
-        let source_array: Arc<dyn Array> =
-            Arc::new(PrimitiveArray::<DictValueSourceType>::from(source));
-        let target_array = cast(&source_array, &DictValueTargetType::DATA_TYPE)?;
-        let target = target_array
-            .as_any()
-            .downcast_ref::<PrimitiveArray<DictValueTargetType>>()
-            .unwrap();
-
-        for i in 0..target.len() {
-            if target.is_null(i) {
-                builder.append_null()?;
-            } else {
-                let _ = builder.append(target.value(i))?;
-            }
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub type Utf8Converter =
-    ArrayRefConverter<Vec<Option<ByteArray>>, StringArray, Utf8ArrayConverter>;
-pub type LargeUtf8Converter =
-    ArrayRefConverter<Vec<Option<ByteArray>>, LargeStringArray, LargeUtf8ArrayConverter>;
-pub type BinaryConverter =
-    ArrayRefConverter<Vec<Option<ByteArray>>, BinaryArray, BinaryArrayConverter>;
-pub type LargeBinaryConverter = ArrayRefConverter<
-    Vec<Option<ByteArray>>,
-    LargeBinaryArray,
-    LargeBinaryArrayConverter,
->;
-pub type StringDictionaryConverter<T> = ArrayRefConverter<
-    Vec<Option<ByteArray>>,
-    DictionaryArray<T>,
-    StringDictionaryArrayConverter,
->;
-pub type DictionaryConverter<K, SV, TV, P> = ArrayRefConverter<
-    Vec<Option<<P as DataType>::T>>,
-    DictionaryArray<K>,
-    DictionaryArrayConverter<SV, TV, P>,
->;
-pub type PrimitiveDictionaryConverter<K, V> = ArrayRefConverter<
-    Vec<Option<<ParquetInt32Type as DataType>::T>>,
-    DictionaryArray<K>,
-    DictionaryArrayConverter<Int32Type, V, ParquetInt32Type>,
->;
-
-pub type Int96Converter =
-    ArrayRefConverter<Vec<Option<Int96>>, TimestampNanosecondArray, Int96ArrayConverter>;
-
-pub type FixedLenBinaryConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    FixedSizeBinaryArray,
-    FixedSizeArrayConverter,
->;
-pub type IntervalYearMonthConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    IntervalYearMonthArray,
-    IntervalYearMonthArrayConverter,
->;
-pub type IntervalDayTimeConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    IntervalDayTimeArray,
-    IntervalDayTimeArrayConverter,
->;
-
-pub type DecimalConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    DecimalArray,
-    DecimalArrayConverter,
->;
-
-pub struct FromConverter<S, T> {
-    _source: PhantomData<S>,
-    _dest: PhantomData<T>,
-}
-
-impl<S, T> FromConverter<S, T>
-where
-    T: From<S>,
-{
-    pub fn new() -> Self {
-        Self {
-            _source: PhantomData,
-            _dest: PhantomData,
-        }
-    }
-}
-
-impl<S, T> Converter<S, T> for FromConverter<S, T>
-where
-    T: From<S>,
-{
-    fn convert(&self, source: S) -> Result<T> {
-        Ok(T::from(source))
-    }
-}
-
-pub struct ArrayRefConverter<S, A, C> {
-    _source: PhantomData<S>,
-    _array: PhantomData<A>,
-    converter: C,
-}
-
-impl<S, A, C> ArrayRefConverter<S, A, C>
-where
-    A: Array + 'static,
-    C: Converter<S, A> + 'static,
-{
-    pub fn new(converter: C) -> Self {
-        Self {
-            _source: PhantomData,
-            _array: PhantomData,
-            converter,
-        }
-    }
-}
-
-impl<S, A, C> Converter<S, ArrayRef> for ArrayRefConverter<S, A, C>
-where
-    A: Array + 'static,
-    C: Converter<S, A> + 'static,
-{
-    fn convert(&self, source: S) -> Result<ArrayRef> {
-        self.converter
-            .convert(source)
-            .map(|array| Arc::new(array) as ArrayRef)
-    }
-}
diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs
deleted file mode 100644
index 2168670bb59..00000000000
--- a/rust/parquet/src/arrow/levels.rs
+++ /dev/null
@@ -1,1411 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet definition and repetition levels
-//!
-//! Contains the algorithm for computing definition and repetition levels.
-//! The algorithm works by tracking the slots of an array that should
-//! ultimately be populated when writing to Parquet.
-//! Parquet achieves nesting through definition levels and repetition levels \[1\].
-//! Definition levels specify how many optional fields in the part for the column
-//! are defined.
-//! Repetition levels specify at what repeated field (list) in the path a column
-//! is defined.
-//!
-//! In a nested data structure such as `a.b.c`, one can see levels as defining
-//! whether a record is defined at `a`, `a.b`, or `a.b.c`.
-//! Optional fields are nullable fields, thus if all 3 fields
-//! are nullable, the maximum definition could be = 3 if there are no lists.
-//!
-//! The algorithm in this module computes the necessary information to enable
-//! the writer to keep track of which columns are at which levels, and to extract
-//! the correct values at the correct slots from Arrow arrays.
-//!
-//! It works by walking a record batch's arrays, keeping track of what values
-//! are non-null, their positions and computing what their levels are.
-//!
-//! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding)
-
-use arrow::array::{make_array, ArrayRef, StructArray};
-use arrow::datatypes::{DataType, Field};
-use arrow::record_batch::RecordBatch;
-
-/// Keeps track of the level information per array that is needed to write an Arrow array to Parquet.
-///
-/// When a nested schema is traversed, intermediate [LevelInfo] structs are created to track
-/// the state of parent arrays. When a primitive Arrow array is encountered, a final [LevelInfo]
-/// is created, and this is what is used to index into the array when writing data to Parquet.
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub(crate) struct LevelInfo {
-    /// Array's definition levels
-    pub definition: Vec<i16>,
-    /// Array's optional repetition levels
-    pub repetition: Option<Vec<i16>>,
-    /// Array's offsets, 64-bit is used to accommodate large offset arrays
-    pub array_offsets: Vec<i64>,
-    // TODO: Convert to an Arrow Buffer after ARROW-10766 is merged.
-    /// Array's logical validity mask, whcih gets unpacked for list children.
-    /// If the parent of an array is null, all children are logically treated as
-    /// null. This mask keeps track of that.
-    ///
-    pub array_mask: Vec<bool>,
-    /// The maximum definition at this level, 0 at the record batch
-    pub max_definition: i16,
-    /// Whether this array or any of its parents is a list
-    pub is_list: bool,
-    /// Whether the current array is nullable (affects definition levels)
-    pub is_nullable: bool,
-}
-
-impl LevelInfo {
-    /// Create a new [LevelInfo] from a record batch.
-    ///
-    /// This is a convenience function to populate the starting point of the traversal.
-    pub(crate) fn new_from_batch(batch: &RecordBatch) -> Self {
-        let num_rows = batch.num_rows();
-        Self {
-            // a batch has no definition level yet
-            definition: vec![0; num_rows],
-            // a batch has no repetition as it is not a list
-            repetition: None,
-            // a batch has sequential offsets, should be num_rows + 1
-            array_offsets: (0..=(num_rows as i64)).collect(),
-            // all values at a batch-level are non-null
-            array_mask: vec![true; num_rows],
-            max_definition: 0,
-            is_list: false,
-            // a batch is treated as nullable even though it has no nulls,
-            // this is required to compute nested type levels correctly
-            is_nullable: false,
-        }
-    }
-
-    /// Compute nested levels of the Arrow array, recursing into lists and structs.
-    ///
-    /// Returns a list of `LevelInfo`, where each level is for nested primitive arrays.
-    pub(crate) fn calculate_array_levels(
-        &self,
-        array: &ArrayRef,
-        field: &Field,
-    ) -> Vec<Self> {
-        let (array_offsets, array_mask) = Self::get_array_offsets_and_masks(array);
-        match array.data_type() {
-            DataType::Null => vec![Self {
-                definition: self.definition.clone(),
-                repetition: self.repetition.clone(),
-                array_offsets: self.array_offsets.clone(),
-                array_mask,
-                max_definition: self.max_definition.max(1),
-                is_list: self.is_list,
-                is_nullable: true, // always nullable as all values are nulls
-            }],
-            DataType::Boolean
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Utf8
-            | DataType::LargeUtf8
-            | DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Interval(_)
-            | DataType::Binary
-            | DataType::LargeBinary
-            | DataType::Decimal(_, _)
-            | DataType::FixedSizeBinary(_) => {
-                // we return a vector of 1 value to represent the primitive
-                vec![self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    false,
-                    field.is_nullable(),
-                )]
-            }
-            DataType::List(list_field) | DataType::LargeList(list_field) => {
-                // Calculate the list level
-                let list_level = self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    true,
-                    field.is_nullable(),
-                );
-
-                // Construct the child array of the list, and get its offset + mask
-                let array_data = array.data();
-                let child_data = array_data.child_data().get(0).unwrap();
-                let child_array = make_array(child_data.clone());
-                let (child_offsets, child_mask) =
-                    Self::get_array_offsets_and_masks(&child_array);
-
-                match child_array.data_type() {
-                    // TODO: The behaviour of a <list<null>> is untested
-                    DataType::Null => vec![list_level],
-                    DataType::Boolean
-                    | DataType::Int8
-                    | DataType::Int16
-                    | DataType::Int32
-                    | DataType::Int64
-                    | DataType::UInt8
-                    | DataType::UInt16
-                    | DataType::UInt32
-                    | DataType::UInt64
-                    | DataType::Float16
-                    | DataType::Float32
-                    | DataType::Float64
-                    | DataType::Timestamp(_, _)
-                    | DataType::Date32
-                    | DataType::Date64
-                    | DataType::Time32(_)
-                    | DataType::Time64(_)
-                    | DataType::Duration(_)
-                    | DataType::Interval(_)
-                    | DataType::Binary
-                    | DataType::LargeBinary
-                    | DataType::Utf8
-                    | DataType::LargeUtf8
-                    | DataType::Dictionary(_, _)
-                    | DataType::Decimal(_, _)
-                    | DataType::FixedSizeBinary(_) => {
-                        vec![list_level.calculate_child_levels(
-                            child_offsets,
-                            child_mask,
-                            false,
-                            list_field.is_nullable(),
-                        )]
-                    }
-                    DataType::List(_) | DataType::LargeList(_) | DataType::Struct(_) => {
-                        list_level.calculate_array_levels(&child_array, list_field)
-                    }
-                    DataType::FixedSizeList(_, _) => unimplemented!(),
-                    DataType::Union(_) => unimplemented!(),
-                }
-            }
-            DataType::FixedSizeList(_, _) => unimplemented!(),
-            DataType::Struct(struct_fields) => {
-                let struct_array: &StructArray = array
-                    .as_any()
-                    .downcast_ref::<StructArray>()
-                    .expect("Unable to get struct array");
-                let struct_level = self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    false,
-                    field.is_nullable(),
-                );
-                let mut struct_levels = vec![];
-                struct_array
-                    .columns()
-                    .into_iter()
-                    .zip(struct_fields)
-                    .for_each(|(child_array, child_field)| {
-                        let mut levels =
-                            struct_level.calculate_array_levels(child_array, child_field);
-                        struct_levels.append(&mut levels);
-                    });
-                struct_levels
-            }
-            DataType::Union(_) => unimplemented!(),
-            DataType::Dictionary(_, _) => {
-                // Need to check for these cases not implemented in C++:
-                // - "Writing DictionaryArray with nested dictionary type not yet supported"
-                // - "Writing DictionaryArray with null encoded in dictionary type not yet supported"
-                // vec![self.get_primitive_def_levels(array, field, array_mask)]
-                vec![self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    false,
-                    field.is_nullable(),
-                )]
-            }
-        }
-    }
-
-    /// Calculate child/leaf array levels.
-    ///
-    /// The algorithm works by incrementing definitions of array values based on whether:
-    /// - a value is optional or required (is_nullable)
-    /// - a list value is repeated + optional or required (is_list)
-    ///
-    /// A record batch always starts at a populated definition = level 0.
-    /// When a batch only has a primitive, i.e. `<batch<primitive[a]>>, column `a`
-    /// can only have a maximum level of 1 if it is not null.
-    /// If it is not null, we increment by 1, such that the null slots will = level 1.
-    /// The above applies to types that have no repetition (anything not a list or map).
-    ///
-    /// If a batch has lists, then we increment by up to 2 levels:
-    /// - 1 level for the list (repeated)
-    /// - 1 level if the list itself is nullable (optional)
-    ///
-    /// A list's child then gets incremented using the above rules.
-    ///
-    /// *Exceptions*
-    ///
-    /// There are 2 exceptions from the above rules:
-    ///
-    /// 1. When at the root of the schema: We always increment the
-    /// level regardless of whether the child is nullable or not. If we do not do
-    /// this, we could have a non-nullable array having a definition of 0.
-    ///
-    /// 2. List parent, non-list child: We always increment the level in this case,
-    /// regardless of whether the child is nullable or not.
-    ///
-    /// *Examples*
-    ///
-    /// A batch with only a primitive that's non-nullable. `<primitive[required]>`:
-    /// * We don't increment the definition level as the array is not optional.
-    /// * This would leave us with a definition of 0, so the first exception applies.
-    /// * The definition level becomes 1.
-    ///
-    /// A batch with only a primitive that's nullable. `<primitive[optional]>`:
-    /// * The definition level becomes 1, as we increment it once.
-    ///
-    /// A batch with a single non-nullable list (both list and child not null):
-    /// * We calculate the level twice, for the list, and for the child.
-    /// * At the list, the level becomes 1, where 0 indicates that the list is
-    ///  empty, and 1 says it's not (determined through offsets).
-    /// * At the primitive level, the second exception applies. The level becomes 2.
-    fn calculate_child_levels(
-        &self,
-        // we use 64-bit offsets to also accommodate large arrays
-        array_offsets: Vec<i64>,
-        array_mask: Vec<bool>,
-        is_list: bool,
-        is_nullable: bool,
-    ) -> Self {
-        let min_len = *(array_offsets.last().unwrap()) as usize;
-        let mut definition = Vec::with_capacity(min_len);
-        let mut repetition = Vec::with_capacity(min_len);
-        let mut merged_array_mask = Vec::with_capacity(min_len);
-
-        // determine the total level increment based on data types
-        let max_definition = match is_list {
-            false => {
-                // first exception, start of a batch, and not list
-                if self.max_definition == 0 {
-                    1
-                } else if self.is_list {
-                    // second exception, always increment after a list
-                    self.max_definition + 1
-                } else {
-                    self.max_definition + is_nullable as i16
-                }
-            }
-            true => self.max_definition + 1 + is_nullable as i16,
-        };
-
-        match (self.is_list, is_list) {
-            (false, false) => {
-                self.definition
-                    .iter()
-                    .zip(array_mask.into_iter().zip(&self.array_mask))
-                    .for_each(|(def, (child_mask, parent_mask))| {
-                        merged_array_mask.push(*parent_mask && child_mask);
-                        match (parent_mask, child_mask) {
-                            (true, true) => {
-                                definition.push(max_definition);
-                            }
-                            (true, false) => {
-                                // The child is only legally null if its array is nullable.
-                                // Thus parent's max_definition is lower
-                                definition.push(if *def <= self.max_definition {
-                                    *def
-                                } else {
-                                    self.max_definition
-                                });
-                            }
-                            // if the parent was false, retain its definitions
-                            (false, _) => {
-                                definition.push(*def);
-                            }
-                        }
-                    });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: self.repetition.clone(), // it's None
-                    array_offsets,
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: false,
-                    is_nullable,
-                }
-            }
-            (true, true) => {
-                // parent is a list or descendant of a list, and child is a list
-                let reps = self.repetition.clone().unwrap();
-                // Calculate the 2 list hierarchy definitions in advance
-                // List is not empty, but null
-                let l2 = max_definition - is_nullable as i16;
-                // List is not empty, and not null
-                let l3 = max_definition;
-
-                let mut nulls_seen = 0;
-
-                self.array_offsets.windows(2).for_each(|w| {
-                    let start = w[0] as usize;
-                    let end = w[1] as usize;
-                    let parent_len = end - start;
-
-                    if parent_len == 0 {
-                        // If the parent length is 0, there won't be a slot for the child
-                        let index = start + nulls_seen;
-                        definition.push(self.definition[index]);
-                        repetition.push(0);
-                        merged_array_mask.push(self.array_mask[index]);
-                        nulls_seen += 1;
-                    } else {
-                        (start..end).for_each(|parent_index| {
-                            let index = parent_index + nulls_seen;
-
-                            // parent is either defined at this level, or earlier
-                            let parent_def = self.definition[index];
-                            let parent_rep = reps[index];
-                            let parent_mask = self.array_mask[index];
-
-                            // valid parent, index into children
-                            let child_start = array_offsets[parent_index] as usize;
-                            let child_end = array_offsets[parent_index + 1] as usize;
-                            let child_len = child_end - child_start;
-                            let child_mask = array_mask[parent_index];
-                            let merged_mask = parent_mask && child_mask;
-
-                            if child_len == 0 {
-                                definition.push(parent_def);
-                                repetition.push(parent_rep);
-                                merged_array_mask.push(merged_mask);
-                            } else {
-                                (child_start..child_end).for_each(|child_index| {
-                                    let rep = match (
-                                        parent_index == start,
-                                        child_index == child_start,
-                                    ) {
-                                        (true, true) => parent_rep,
-                                        (true, false) => parent_rep + 2,
-                                        (false, true) => parent_rep,
-                                        (false, false) => parent_rep + 1,
-                                    };
-
-                                    definition.push(if !parent_mask {
-                                        parent_def
-                                    } else if child_mask {
-                                        l3
-                                    } else {
-                                        l2
-                                    });
-                                    repetition.push(rep);
-                                    merged_array_mask.push(merged_mask);
-                                });
-                            }
-                        });
-                    }
-                });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: Some(repetition),
-                    array_offsets,
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: true,
-                    is_nullable,
-                }
-            }
-            (true, false) => {
-                // List and primitive (or struct).
-                // The list can have more values than the primitive, indicating that there
-                // are slots where the list is empty. We use a counter to track this behaviour.
-                let mut nulls_seen = 0;
-
-                // let child_max_definition = list_max_definition + is_nullable as i16;
-                // child values are a function of parent list offsets
-                let reps = self.repetition.as_deref().unwrap();
-                self.array_offsets.windows(2).for_each(|w| {
-                    let start = w[0] as usize;
-                    let end = w[1] as usize;
-                    let parent_len = end - start;
-
-                    if parent_len == 0 {
-                        let index = start + nulls_seen;
-                        definition.push(self.definition[index]);
-                        repetition.push(reps[index]);
-                        merged_array_mask.push(self.array_mask[index]);
-                        nulls_seen += 1;
-                    } else {
-                        // iterate through the array, adjusting child definitions for nulls
-                        (start..end).for_each(|child_index| {
-                            let index = child_index + nulls_seen;
-                            let child_mask = array_mask[child_index];
-                            let parent_mask = self.array_mask[index];
-                            let parent_def = self.definition[index];
-
-                            if !parent_mask || parent_def < self.max_definition {
-                                definition.push(parent_def);
-                                repetition.push(reps[index]);
-                                merged_array_mask.push(parent_mask);
-                            } else {
-                                definition.push(max_definition - !child_mask as i16);
-                                repetition.push(reps[index]);
-                                merged_array_mask.push(child_mask);
-                            }
-                        });
-                    }
-                });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: Some(repetition),
-                    array_offsets: self.array_offsets.clone(),
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: true,
-                    is_nullable,
-                }
-            }
-            (false, true) => {
-                // Encountering a list for the first time.
-                // Calculate the 2 list hierarchy definitions in advance
-
-                // List is not empty, but null (if nullable)
-                let l2 = max_definition - is_nullable as i16;
-                // List is not empty, and not null
-                let l3 = max_definition;
-
-                self.definition
-                    .iter()
-                    .enumerate()
-                    .for_each(|(parent_index, def)| {
-                        let child_from = array_offsets[parent_index];
-                        let child_to = array_offsets[parent_index + 1];
-                        let child_len = child_to - child_from;
-                        let child_mask = array_mask[parent_index];
-                        let parent_mask = self.array_mask[parent_index];
-
-                        match (parent_mask, child_len) {
-                            (true, 0) => {
-                                // empty slot that is valid, i.e. {"parent": {"child": [] } }
-                                definition.push(if child_mask {
-                                    l2
-                                } else {
-                                    self.max_definition
-                                });
-                                repetition.push(0);
-                                merged_array_mask.push(child_mask);
-                            }
-                            (false, 0) => {
-                                definition.push(*def);
-                                repetition.push(0);
-                                merged_array_mask.push(child_mask);
-                            }
-                            (true, _) => {
-                                (child_from..child_to).for_each(|child_index| {
-                                    definition.push(if child_mask { l3 } else { l2 });
-                                    // mark the first child slot as 0, and the next as 1
-                                    repetition.push(if child_index == child_from {
-                                        0
-                                    } else {
-                                        1
-                                    });
-                                    merged_array_mask.push(child_mask);
-                                });
-                            }
-                            (false, _) => {
-                                (child_from..child_to).for_each(|child_index| {
-                                    definition.push(*def);
-                                    // mark the first child slot as 0, and the next as 1
-                                    repetition.push(if child_index == child_from {
-                                        0
-                                    } else {
-                                        1
-                                    });
-                                    merged_array_mask.push(false);
-                                });
-                            }
-                        }
-                    });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: Some(repetition),
-                    array_offsets,
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: true,
-                    is_nullable,
-                }
-            }
-        }
-    }
-
-    /// Get the offsets of an array as 64-bit values, and validity masks as booleans
-    /// - Primitive, binary and struct arrays' offsets will be a sequence, masks obtained
-    ///   from validity bitmap
-    /// - List array offsets will be the value offsets, masks are computed from offsets
-    fn get_array_offsets_and_masks(array: &ArrayRef) -> (Vec<i64>, Vec<bool>) {
-        match array.data_type() {
-            DataType::Null
-            | DataType::Boolean
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Interval(_)
-            | DataType::Binary
-            | DataType::LargeBinary
-            | DataType::Utf8
-            | DataType::LargeUtf8
-            | DataType::Struct(_)
-            | DataType::Dictionary(_, _)
-            | DataType::Decimal(_, _) => {
-                let array_mask = match array.data().null_buffer() {
-                    Some(buf) => get_bool_array_slice(buf, array.offset(), array.len()),
-                    None => vec![true; array.len()],
-                };
-                ((0..=(array.len() as i64)).collect(), array_mask)
-            }
-            DataType::List(_) => {
-                let data = array.data();
-                let offsets = unsafe { data.buffers()[0].typed_data::<i32>() };
-                let offsets = offsets
-                    .to_vec()
-                    .into_iter()
-                    .map(|v| v as i64)
-                    .collect::<Vec<i64>>();
-                let masks = offsets.windows(2).map(|w| w[1] > w[0]).collect();
-                (offsets, masks)
-            }
-            DataType::LargeList(_) => {
-                let offsets =
-                    unsafe { array.data().buffers()[0].typed_data::<i64>() }.to_vec();
-                let masks = offsets.windows(2).map(|w| w[1] > w[0]).collect();
-                (offsets, masks)
-            }
-            DataType::FixedSizeBinary(value_len) => {
-                let array_mask = match array.data().null_buffer() {
-                    Some(buf) => get_bool_array_slice(buf, array.offset(), array.len()),
-                    None => vec![true; array.len()],
-                };
-                let value_len = *value_len as i64;
-                (
-                    (0..=(array.len() as i64)).map(|v| v * value_len).collect(),
-                    array_mask,
-                )
-            }
-            DataType::FixedSizeList(_, _) | DataType::Union(_) => {
-                unimplemented!("Getting offsets not yet implemented")
-            }
-        }
-    }
-
-    /// Given a level's information, calculate the offsets required to index an array correctly.
-    pub(crate) fn filter_array_indices(&self) -> Vec<usize> {
-        // happy path if not dealing with lists
-        if !self.is_list {
-            return self
-                .definition
-                .iter()
-                .enumerate()
-                .filter_map(|(i, def)| {
-                    if *def == self.max_definition {
-                        Some(i)
-                    } else {
-                        None
-                    }
-                })
-                .collect();
-        }
-        let mut filtered = vec![];
-        // remove slots that are false from definition_mask
-        let mut index = 0;
-        self.definition.iter().for_each(|def| {
-            if *def == self.max_definition {
-                filtered.push(index);
-            }
-            if *def >= self.max_definition - self.is_nullable as i16 {
-                index += 1;
-            }
-        });
-        filtered
-    }
-}
-
-/// Convert an Arrow buffer to a boolean array slice
-/// TODO: this was created for buffers, so might not work for bool array, might be slow too
-#[inline]
-fn get_bool_array_slice(
-    buffer: &arrow::buffer::Buffer,
-    offset: usize,
-    len: usize,
-) -> Vec<bool> {
-    let data = buffer.as_slice();
-    (offset..(len + offset))
-        .map(|i| arrow::util::bit_util::get_bit(data, i))
-        .collect()
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::{
-        array::ListArray,
-        array::{Array, ArrayData, Int32Array},
-        buffer::Buffer,
-        datatypes::Schema,
-    };
-    use arrow::{
-        array::{Float32Array, Float64Array, Int16Array},
-        datatypes::ToByteSlice,
-    };
-
-    use super::*;
-
-    #[test]
-    fn test_calculate_array_levels_twitter_example() {
-        // based on the example at https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet.html
-        // [[a, b, c], [d, e, f, g]], [[h], [i,j]]
-        let parent_levels = LevelInfo {
-            definition: vec![0, 0],
-            repetition: None,
-            array_offsets: vec![0, 1, 2], // 2 records, root offsets always sequential
-            array_mask: vec![true, true], // both lists defined
-            max_definition: 0,
-            is_list: false,     // root is never list
-            is_nullable: false, // root in example is non-nullable
-        };
-        // offset into array, each level1 has 2 values
-        let array_offsets = vec![0, 2, 4];
-        let array_mask = vec![true, true];
-
-        // calculate level1 levels
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            false,
-        );
-        //
-        let expected_levels = LevelInfo {
-            definition: vec![1, 1, 1, 1],
-            repetition: Some(vec![0, 1, 0, 1]),
-            array_offsets,
-            array_mask: vec![true, true, true, true],
-            max_definition: 1,
-            is_list: true,
-            is_nullable: false,
-        };
-        // the separate asserts make it easier to see what's failing
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        // this assert is to help if there are more variables added to the struct
-        assert_eq!(&levels, &expected_levels);
-
-        // level2
-        let parent_levels = levels;
-        let array_offsets = vec![0, 3, 7, 8, 10];
-        let array_mask = vec![true, true, true, true];
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            false,
-        );
-        let expected_levels = LevelInfo {
-            definition: vec![2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
-            repetition: Some(vec![0, 2, 2, 1, 2, 2, 2, 0, 1, 2]),
-            array_offsets,
-            array_mask: vec![true; 10],
-            max_definition: 2,
-            is_list: true,
-            is_nullable: false,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_one_level_1() {
-        // This test calculates the levels for a non-null primitive array
-        let parent_levels = LevelInfo {
-            definition: vec![0; 10],
-            repetition: None,
-            array_offsets: (0..=10).collect(),
-            array_mask: vec![true; 10],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-        let array_offsets: Vec<i64> = (0..=10).collect();
-        let array_mask = vec![true; 10];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask.clone(),
-            false,
-            false,
-        );
-        let expected_levels = LevelInfo {
-            definition: vec![1; 10],
-            repetition: None,
-            array_offsets,
-            array_mask,
-            max_definition: 1,
-            is_list: false,
-            is_nullable: false,
-        };
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_one_level_2() {
-        // This test calculates the levels for a non-null primitive array
-        let parent_levels = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: (0..=5).collect(),
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-        let array_offsets: Vec<i64> = (0..=5).collect();
-        let array_mask = vec![true, false, true, true, false];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask.clone(),
-            false,
-            true,
-        );
-        let expected_levels = LevelInfo {
-            definition: vec![1, 0, 1, 1, 0],
-            repetition: None,
-            array_offsets,
-            array_mask,
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_array_levels_1() {
-        // if all array values are defined (e.g. batch<list<_>>)
-        // [[0], [1], [2], [3], [4]]
-        let parent_levels = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-        let array_offsets = vec![0, 2, 2, 4, 8, 11];
-        let array_mask = vec![true, false, true, true, true];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        // array: [[0, 0], _1_, [2, 2], [3, 3, 3, 3], [4, 4, 4]]
-        // all values are defined as we do not have nulls on the root (batch)
-        // repetition:
-        //   0: 0, 1
-        //   1:
-        //   2: 0, 1
-        //   3: 0, 1, 1, 1
-        //   4: 0, 1, 1
-        let expected_levels = LevelInfo {
-            definition: vec![2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2],
-            repetition: Some(vec![0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1]),
-            array_offsets,
-            array_mask: vec![
-                true, true, false, true, true, true, true, true, true, true, true, true,
-            ],
-            max_definition: 2,
-            is_list: true,
-            is_nullable: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_array_levels_2() {
-        // If some values are null
-        //
-        // This emulates an array in the form: <struct<list<?>>
-        // with values:
-        // - 0: [0, 1], but is null because of the struct
-        // - 1: []
-        // - 2: [2, 3], but is null because of the struct
-        // - 3: [4, 5, 6, 7]
-        // - 4: [8, 9, 10]
-        //
-        // If the first values of a list are null due to a parent, we have to still account for them
-        // while indexing, because they would affect the way the child is indexed
-        // i.e. in the above example, we have to know that [0, 1] has to be skipped
-        let parent_levels = LevelInfo {
-            definition: vec![0, 1, 0, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![false, true, false, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        let array_offsets = vec![0, 2, 2, 4, 8, 11];
-        let array_mask = vec![true, false, true, true, true];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        let expected_levels = LevelInfo {
-            // 0 1 [2] are 0 (not defined at level 1)
-            // [2] is 1, but has 0 slots so is not populated (defined at level 1 only)
-            // 2 3 [4] are 0
-            // 4 5 6 7 [8] are 1 (defined at level 1 only)
-            // 8 9 10 [11] are 2 (defined at both levels)
-            definition: vec![0, 0, 1, 0, 0, 3, 3, 3, 3, 3, 3, 3],
-            repetition: Some(vec![0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1]),
-            array_offsets,
-            array_mask: vec![
-                false, false, false, false, false, true, true, true, true, true, true,
-                true,
-            ],
-            max_definition: 3,
-            is_nullable: true,
-            is_list: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-
-        // nested lists (using previous test)
-        let nested_parent_levels = levels;
-        let array_offsets = vec![0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22];
-        let array_mask = vec![
-            true, true, true, true, true, true, true, true, true, true, true,
-        ];
-        let levels = nested_parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        let expected_levels = LevelInfo {
-            // (def: 0) 0 1 [2] are 0 (take parent)
-            // (def: 0) 2 3 [4] are 0 (take parent)
-            // (def: 0) 4 5 [6] are 0 (take parent)
-            // (def: 0) 6 7 [8] are 0 (take parent)
-            // (def: 1) 8 9 [10] are 1 (take parent)
-            // (def: 1) 10 11 [12] are 1 (take parent)
-            // (def: 1) 12 23 [14] are 1 (take parent)
-            // (def: 1) 14 15 [16] are 1 (take parent)
-            // (def: 2) 16 17 [18] are 2 (defined at all levels)
-            // (def: 2) 18 19 [20] are 2 (defined at all levels)
-            // (def: 2) 20 21 [22] are 2 (defined at all levels)
-            //
-            // 0 1 [2] are 0 (not defined at level 1)
-            // [2] is 1, but has 0 slots so is not populated (defined at level 1 only)
-            // 2 3 [4] are 0
-            // 4 5 6 7 [8] are 1 (defined at level 1 only)
-            // 8 9 10 [11] are 2 (defined at both levels)
-            //
-            // 0: [[100, 101], [102, 103]]
-            // 1: []
-            // 2: [[104, 105], [106, 107]]
-            // 3: [[108, 109], [110, 111], [112, 113], [114, 115]]
-            // 4: [[116, 117], [118, 119], [120, 121]]
-            definition: vec![
-                0, 0, 0, 0, 1, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-            ],
-            repetition: Some(vec![
-                0, 2, 1, 2, 0, 0, 2, 1, 2, 0, 2, 1, 2, 1, 2, 1, 2, 0, 2, 1, 2, 1, 2,
-            ]),
-            array_offsets,
-            array_mask: vec![
-                false, false, false, false, false, false, false, false, false, true,
-                true, true, true, true, true, true, true, true, true, true, true, true,
-                true,
-            ],
-            max_definition: 5,
-            is_nullable: true,
-            is_list: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_array_levels_nested_list() {
-        // if all array values are defined (e.g. batch<list<_>>)
-        // The array at this level looks like:
-        // 0: [a]
-        // 1: [a]
-        // 2: [a]
-        // 3: [a]
-        let parent_levels = LevelInfo {
-            definition: vec![1, 1, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4],
-            array_mask: vec![true, true, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: false,
-        };
-        // 0: null ([], but mask is false, so it's not just an empty list)
-        // 1: [1, 2, 3]
-        // 2: [4, 5]
-        // 3: [6, 7]
-        let array_offsets = vec![0, 1, 4, 6, 8];
-        let array_mask = vec![false, true, true, true];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        // 0: [null], level 1 is defined, but not 2
-        // 1: [1, 2, 3]
-        // 2: [4, 5]
-        // 3: [6, 7]
-        let expected_levels = LevelInfo {
-            definition: vec![2, 3, 3, 3, 3, 3, 3, 3],
-            repetition: Some(vec![0, 0, 1, 1, 0, 1, 0, 1]),
-            array_offsets,
-            array_mask: vec![false, true, true, true, true, true, true, true],
-            max_definition: 3,
-            is_list: true,
-            is_nullable: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-
-        // nested lists (using previous test)
-        let nested_parent_levels = levels;
-        // 0: [null] (was a populated null slot at the parent)
-        // 1: [201]
-        // 2: [202, 203]
-        // 3: null ([])
-        // 4: [204, 205, 206]
-        // 5: [207, 208, 209, 210]
-        // 6: [] (tests a non-null empty list slot)
-        // 7: [211, 212, 213, 214, 215]
-        let array_offsets = vec![0, 1, 2, 4, 4, 7, 11, 11, 16];
-        // logically, the fist slot of the mask is false
-        let array_mask = vec![true, true, true, false, true, true, true, true];
-        let levels = nested_parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        // We have 7 array values, and at least 15 primitives (from array_offsets)
-        // 0: (-)[null], parent was null, no value populated here
-        // 1: (0)[201], (1)[202, 203], (2)[[null]]
-        // 2: (3)[204, 205, 206], (4)[207, 208, 209, 210]
-        // 3: (5)[[]], (6)[211, 212, 213, 214, 215]
-        //
-        // In a JSON syntax with the schema: <struct<list<list<primitive>>>>, this translates into:
-        // 0: {"struct": [ null ]}
-        // 1: {"struct": [ [201], [202, 203], [] ]}
-        // 2: {"struct": [ [204, 205, 206], [207, 208, 209, 210] ]}
-        // 3: {"struct": [ [], [211, 212, 213, 214, 215] ]}
-        let expected_levels = LevelInfo {
-            definition: vec![2, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5],
-            repetition: Some(vec![0, 0, 1, 2, 1, 0, 2, 2, 1, 2, 2, 2, 0, 1, 2, 2, 2, 2]),
-            array_mask: vec![
-                false, true, true, true, false, true, true, true, true, true, true, true,
-                true, true, true, true, true, true,
-            ],
-            array_offsets,
-            is_list: true,
-            is_nullable: true,
-            max_definition: 5,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_nested_struct_levels() {
-        // tests a <struct[a]<struct[b]<int[c]>>
-        // array:
-        //  - {a: {b: {c: 1}}}
-        //  - {a: {b: {c: null}}}
-        //  - {a: {b: {c: 3}}}
-        //  - {a: {b: null}}
-        //  - {a: null}}
-        //  - {a: {b: {c: 6}}}
-        let a_levels = LevelInfo {
-            definition: vec![1, 1, 1, 1, 0, 1],
-            repetition: None,
-            array_offsets: (0..=6).collect(),
-            array_mask: vec![true, true, true, true, false, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        // b's offset and mask
-        let b_offsets: Vec<i64> = (0..=6).collect();
-        let b_mask = vec![true, true, true, false, false, true];
-        // b's expected levels
-        let b_expected_levels = LevelInfo {
-            definition: vec![2, 2, 2, 1, 0, 2],
-            repetition: None,
-            array_offsets: (0..=6).collect(),
-            array_mask: vec![true, true, true, false, false, true],
-            max_definition: 2,
-            is_list: false,
-            is_nullable: true,
-        };
-        let b_levels =
-            a_levels.calculate_child_levels(b_offsets.clone(), b_mask, false, true);
-        assert_eq!(&b_expected_levels, &b_levels);
-
-        // c's offset and mask
-        let c_offsets = b_offsets;
-        let c_mask = vec![true, false, true, false, false, true];
-        // c's expected levels
-        let c_expected_levels = LevelInfo {
-            definition: vec![3, 2, 3, 1, 0, 3],
-            repetition: None,
-            array_offsets: c_offsets.clone(),
-            array_mask: vec![true, false, true, false, false, true],
-            max_definition: 3,
-            is_list: false,
-            is_nullable: true,
-        };
-        let c_levels = b_levels.calculate_child_levels(c_offsets, c_mask, false, true);
-        assert_eq!(&c_expected_levels, &c_levels);
-    }
-
-    #[test]
-    fn list_single_column() {
-        // this tests the level generation from the arrow_writer equivalent test
-
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-        let a_list_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let a_list_data = ArrayData::builder(a_list_type.clone())
-            .len(5)
-            .add_buffer(a_value_offsets)
-            .null_bit_buffer(Buffer::from(vec![0b00011011]))
-            .add_child_data(a_values.data().clone())
-            .build();
-
-        assert_eq!(a_list_data.null_count(), 1);
-
-        let a = ListArray::from(a_list_data);
-        let values = Arc::new(a);
-
-        let schema = Schema::new(vec![Field::new("item", a_list_type, true)]);
-
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
-
-        let expected_batch_level = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: (0..=5).collect(),
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-
-        let batch_level = LevelInfo::new_from_batch(&batch);
-        assert_eq!(&batch_level, &expected_batch_level);
-
-        // calculate the list's level
-        let mut levels = vec![];
-        batch
-            .columns()
-            .iter()
-            .zip(batch.schema().fields())
-            .for_each(|(array, field)| {
-                let mut array_levels = batch_level.calculate_array_levels(array, field);
-                levels.append(&mut array_levels);
-            });
-        assert_eq!(levels.len(), 1);
-
-        let list_level = levels.get(0).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3],
-            repetition: Some(vec![0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1]),
-            array_offsets: vec![0, 1, 3, 3, 6, 10],
-            array_mask: vec![
-                true, true, true, false, true, true, true, true, true, true, true,
-            ],
-            max_definition: 3,
-            is_list: true,
-            is_nullable: true,
-        };
-        assert_eq!(&list_level.definition, &expected_level.definition);
-        assert_eq!(&list_level.repetition, &expected_level.repetition);
-        assert_eq!(&list_level.array_offsets, &expected_level.array_offsets);
-        assert_eq!(&list_level.array_mask, &expected_level.array_mask);
-        assert_eq!(&list_level.max_definition, &expected_level.max_definition);
-        assert_eq!(&list_level.is_list, &expected_level.is_list);
-        assert_eq!(&list_level.is_nullable, &expected_level.is_nullable);
-        assert_eq!(list_level, &expected_level);
-    }
-
-    #[test]
-    fn mixed_struct_list() {
-        // this tests the level generation from the equivalent arrow_writer_complex test
-
-        // define schema
-        let struct_field_d = Field::new("d", DataType::Float64, true);
-        let struct_field_f = Field::new("f", DataType::Float32, true);
-        let struct_field_g = Field::new(
-            "g",
-            DataType::List(Box::new(Field::new("items", DataType::Int16, false))),
-            false,
-        );
-        let struct_field_e = Field::new(
-            "e",
-            DataType::Struct(vec![struct_field_f.clone(), struct_field_g.clone()]),
-            true,
-        );
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-            Field::new(
-                "c",
-                DataType::Struct(vec![struct_field_d.clone(), struct_field_e.clone()]),
-                false,
-            ),
-        ]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-        let d = Float64Array::from(vec![None, None, None, Some(1.0), None]);
-        let f = Float32Array::from(vec![Some(0.0), None, Some(333.3), None, Some(5.25)]);
-
-        let g_value = Int16Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[1], [2, 3], null, [4, 5, 6], [7, 8, 9, 10]]
-        let g_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-
-        // Construct a list array from the above two
-        let g_list_data = ArrayData::builder(struct_field_g.data_type().clone())
-            .len(5)
-            .add_buffer(g_value_offsets)
-            .add_child_data(g_value.data().clone())
-            .build();
-        let g = ListArray::from(g_list_data);
-
-        let e = StructArray::from(vec![
-            (struct_field_f, Arc::new(f) as ArrayRef),
-            (struct_field_g, Arc::new(g) as ArrayRef),
-        ]);
-
-        let c = StructArray::from(vec![
-            (struct_field_d, Arc::new(d) as ArrayRef),
-            (struct_field_e, Arc::new(e) as ArrayRef),
-        ]);
-
-        // build a record batch
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![Arc::new(a), Arc::new(b), Arc::new(c)],
-        )
-        .unwrap();
-
-        //////////////////////////////////////////////
-        let expected_batch_level = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: (0..=5).collect(),
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-
-        let batch_level = LevelInfo::new_from_batch(&batch);
-        assert_eq!(&batch_level, &expected_batch_level);
-
-        // calculate the list's level
-        let mut levels = vec![];
-        batch
-            .columns()
-            .iter()
-            .zip(batch.schema().fields())
-            .for_each(|(array, field)| {
-                let mut array_levels = batch_level.calculate_array_levels(array, field);
-                levels.append(&mut array_levels);
-            });
-        assert_eq!(levels.len(), 5);
-
-        // test "a" levels
-        let list_level = levels.get(0).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![1, 1, 1, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: false,
-        };
-        assert_eq!(list_level, &expected_level);
-
-        // test "b" levels
-        let list_level = levels.get(1).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![1, 0, 0, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, false, false, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(list_level, &expected_level);
-
-        // test "d" levels
-        let list_level = levels.get(2).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![1, 1, 1, 2, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![false, false, false, true, false],
-            max_definition: 2,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(list_level, &expected_level);
-
-        // test "f" levels
-        let list_level = levels.get(3).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![3, 2, 3, 2, 3],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, false, true, false, true],
-            max_definition: 3,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(list_level, &expected_level);
-    }
-
-    #[test]
-    fn test_filter_array_indices() {
-        let level = LevelInfo {
-            definition: vec![3, 3, 3, 1, 3, 3, 3],
-            repetition: Some(vec![0, 1, 1, 0, 0, 1, 1]),
-            array_offsets: vec![0, 3, 3, 6],
-            array_mask: vec![true, true, true, false, true, true, true],
-            max_definition: 3,
-            is_list: true,
-            is_nullable: true,
-        };
-
-        let expected = vec![0, 1, 2, 3, 4, 5];
-        let filter = level.filter_array_indices();
-        assert_eq!(expected, filter);
-    }
-}
diff --git a/rust/parquet/src/arrow/mod.rs b/rust/parquet/src/arrow/mod.rs
deleted file mode 100644
index b1aa39ebafa..00000000000
--- a/rust/parquet/src/arrow/mod.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [Apache Arrow](http://arrow.apache.org/) is a cross-language development platform for
-//! in-memory data.
-//!
-//! This mod provides API for converting between arrow and parquet.
-//!
-//! # Example of reading parquet file into arrow record batch
-//!
-//! ```rust, no_run
-//! use arrow::record_batch::RecordBatchReader;
-//! use parquet::file::reader::SerializedFileReader;
-//! use parquet::arrow::{ParquetFileArrowReader, ArrowReader};
-//! use std::sync::Arc;
-//! use std::fs::File;
-//!
-//! let file = File::open("parquet.file").unwrap();
-//! let file_reader = SerializedFileReader::new(file).unwrap();
-//! let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-//!
-//! println!("Converted arrow schema is: {}", arrow_reader.get_schema().unwrap());
-//! println!("Arrow schema after projection is: {}",
-//!    arrow_reader.get_schema_by_columns(vec![2, 4, 6], true).unwrap());
-//!
-//! let mut record_batch_reader = arrow_reader.get_record_reader(2048).unwrap();
-//!
-//! for maybe_record_batch in record_batch_reader {
-//!    let record_batch = maybe_record_batch.unwrap();
-//!    if record_batch.num_rows() > 0 {
-//!        println!("Read {} records.", record_batch.num_rows());
-//!    } else {
-//!        println!("End of file!");
-//!    }
-//!}
-//! ```
-
-pub(in crate::arrow) mod array_reader;
-pub mod arrow_reader;
-pub mod arrow_writer;
-pub(in crate::arrow) mod converter;
-pub(in crate::arrow) mod levels;
-pub(in crate::arrow) mod record_reader;
-pub mod schema;
-
-pub use self::arrow_reader::ArrowReader;
-pub use self::arrow_reader::ParquetFileArrowReader;
-pub use self::arrow_writer::ArrowWriter;
-pub use self::schema::{
-    arrow_to_parquet_schema, parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns,
-    parquet_to_arrow_schema_by_root_columns,
-};
-
-/// Schema metadata key used to store serialized Arrow IPC schema
-pub const ARROW_SCHEMA_META_KEY: &str = "ARROW:schema";
diff --git a/rust/parquet/src/arrow/record_reader.rs b/rust/parquet/src/arrow/record_reader.rs
deleted file mode 100644
index 7e3b6a847e7..00000000000
--- a/rust/parquet/src/arrow/record_reader.rs
+++ /dev/null
@@ -1,794 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::cmp::{max, min};
-use std::mem::{replace, size_of};
-
-use crate::column::{page::PageReader, reader::ColumnReaderImpl};
-use crate::data_type::DataType;
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use arrow::array::BooleanBufferBuilder;
-use arrow::bitmap::Bitmap;
-use arrow::buffer::{Buffer, MutableBuffer};
-
-const MIN_BATCH_SIZE: usize = 1024;
-
-/// A `RecordReader` is a stateful column reader that delimits semantic records.
-pub struct RecordReader<T: DataType> {
-    column_desc: ColumnDescPtr,
-
-    records: MutableBuffer,
-    def_levels: Option<MutableBuffer>,
-    rep_levels: Option<MutableBuffer>,
-    null_bitmap: Option<BooleanBufferBuilder>,
-    column_reader: Option<ColumnReaderImpl<T>>,
-
-    /// Number of records accumulated in records
-    num_records: usize,
-    /// Number of values `num_records` contains.
-    num_values: usize,
-
-    values_seen: usize,
-    /// Starts from 1, number of values have been written to buffer
-    values_written: usize,
-    in_middle_of_record: bool,
-}
-
-impl<T: DataType> RecordReader<T> {
-    pub fn new(column_schema: ColumnDescPtr) -> Self {
-        let (def_levels, null_map) = if column_schema.max_def_level() > 0 {
-            (
-                Some(MutableBuffer::new(MIN_BATCH_SIZE)),
-                Some(BooleanBufferBuilder::new(0)),
-            )
-        } else {
-            (None, None)
-        };
-
-        let rep_levels = if column_schema.max_rep_level() > 0 {
-            Some(MutableBuffer::new(MIN_BATCH_SIZE))
-        } else {
-            None
-        };
-
-        Self {
-            records: MutableBuffer::new(MIN_BATCH_SIZE),
-            def_levels,
-            rep_levels,
-            null_bitmap: null_map,
-            column_reader: None,
-            column_desc: column_schema,
-            num_records: 0,
-            num_values: 0,
-            values_seen: 0,
-            values_written: 0,
-            in_middle_of_record: false,
-        }
-    }
-
-    /// Set the current page reader.
-    pub fn set_page_reader(&mut self, page_reader: Box<dyn PageReader>) -> Result<()> {
-        self.column_reader =
-            Some(ColumnReaderImpl::new(self.column_desc.clone(), page_reader));
-        Ok(())
-    }
-
-    /// Try to read `num_records` of column data into internal buffer.
-    ///
-    /// # Returns
-    ///
-    /// Number of actual records read.
-    pub fn read_records(&mut self, num_records: usize) -> Result<usize> {
-        if self.column_reader.is_none() {
-            return Ok(0);
-        }
-
-        let mut records_read = 0;
-
-        // Used to mark whether we have reached the end of current
-        // column chunk
-        let mut end_of_column = false;
-
-        loop {
-            // Try to find some records from buffers that has been read into memory
-            // but not counted as seen records.
-            records_read += self.split_records(num_records - records_read)?;
-
-            // Since page reader contains complete records, so if we reached end of a
-            // page reader, we should reach the end of a record
-            if end_of_column
-                && self.values_seen >= self.values_written
-                && self.in_middle_of_record
-            {
-                self.num_records += 1;
-                self.num_values = self.values_seen;
-                self.in_middle_of_record = false;
-                records_read += 1;
-            }
-
-            if (records_read >= num_records) || end_of_column {
-                break;
-            }
-
-            let batch_size = max(num_records - records_read, MIN_BATCH_SIZE);
-
-            // Try to more value from parquet pages
-            let values_read = self.read_one_batch(batch_size)?;
-            if values_read < batch_size {
-                end_of_column = true;
-            }
-        }
-
-        Ok(records_read)
-    }
-
-    /// Returns number of records stored in buffer.
-    pub fn num_records(&self) -> usize {
-        self.num_records
-    }
-
-    /// Return number of values stored in buffer.
-    /// If the parquet column is not repeated, it should be equals to `num_records`,
-    /// otherwise it should be larger than or equal to `num_records`.
-    pub fn num_values(&self) -> usize {
-        self.num_values
-    }
-
-    /// Returns definition level data.
-    /// The implementation has side effects. It will create a new buffer to hold those
-    /// definition level values that have already been read into memory but not counted
-    /// as record values, e.g. those from `self.num_values` to `self.values_written`.
-    pub fn consume_def_levels(&mut self) -> Result<Option<Buffer>> {
-        let new_buffer = if let Some(ref mut def_levels_buf) = &mut self.def_levels {
-            let num_left_values = self.values_written - self.num_values;
-            // create an empty buffer, as it will be resized below
-            let mut new_buffer = MutableBuffer::new(0);
-            let num_bytes = num_left_values * size_of::<i16>();
-            let new_len = self.num_values * size_of::<i16>();
-
-            new_buffer.resize(num_bytes, 0);
-
-            let new_def_levels = new_buffer.as_slice_mut();
-            let left_def_levels = &def_levels_buf.as_slice_mut()[new_len..];
-
-            new_def_levels[0..num_bytes].copy_from_slice(&left_def_levels[0..num_bytes]);
-
-            def_levels_buf.resize(new_len, 0);
-            Some(new_buffer)
-        } else {
-            None
-        };
-
-        Ok(replace(&mut self.def_levels, new_buffer).map(|x| x.into()))
-    }
-
-    /// Return repetition level data.
-    /// The side effect is similar to `consume_def_levels`.
-    pub fn consume_rep_levels(&mut self) -> Result<Option<Buffer>> {
-        // TODO: Optimize to reduce the copy
-        let new_buffer = if let Some(ref mut rep_levels_buf) = &mut self.rep_levels {
-            let num_left_values = self.values_written - self.num_values;
-            // create an empty buffer, as it will be resized below
-            let mut new_buffer = MutableBuffer::new(0);
-            let num_bytes = num_left_values * size_of::<i16>();
-            let new_len = self.num_values * size_of::<i16>();
-
-            new_buffer.resize(num_bytes, 0);
-
-            let new_rep_levels = new_buffer.as_slice_mut();
-            let left_rep_levels = &rep_levels_buf.as_slice_mut()[new_len..];
-
-            new_rep_levels[0..num_bytes].copy_from_slice(&left_rep_levels[0..num_bytes]);
-
-            rep_levels_buf.resize(new_len, 0);
-
-            Some(new_buffer)
-        } else {
-            None
-        };
-
-        Ok(replace(&mut self.rep_levels, new_buffer).map(|x| x.into()))
-    }
-
-    /// Returns currently stored buffer data.
-    /// The side effect is similar to `consume_def_levels`.
-    pub fn consume_record_data(&mut self) -> Result<Buffer> {
-        // TODO: Optimize to reduce the copy
-        let num_left_values = self.values_written - self.num_values;
-        // create an empty buffer, as it will be resized below
-        let mut new_buffer = MutableBuffer::new(0);
-        let num_bytes = num_left_values * T::get_type_size();
-        let new_len = self.num_values * T::get_type_size();
-
-        new_buffer.resize(num_bytes, 0);
-
-        let new_records = new_buffer.as_slice_mut();
-        let left_records = &mut self.records.as_slice_mut()[new_len..];
-
-        new_records[0..num_bytes].copy_from_slice(&left_records[0..num_bytes]);
-
-        self.records.resize(new_len, 0);
-
-        Ok(replace(&mut self.records, new_buffer).into())
-    }
-
-    /// Returns currently stored null bitmap data.
-    /// The side effect is similar to `consume_def_levels`.
-    pub fn consume_bitmap_buffer(&mut self) -> Result<Option<Buffer>> {
-        // TODO: Optimize to reduce the copy
-        if self.column_desc.max_def_level() > 0 {
-            assert!(self.null_bitmap.is_some());
-            let num_left_values = self.values_written - self.num_values;
-            let new_bitmap_builder = Some(BooleanBufferBuilder::new(max(
-                MIN_BATCH_SIZE,
-                num_left_values,
-            )));
-
-            let old_bitmap = replace(&mut self.null_bitmap, new_bitmap_builder)
-                .map(|mut builder| builder.finish())
-                .unwrap();
-
-            let old_bitmap = Bitmap::from(old_bitmap);
-
-            for i in self.num_values..self.values_written {
-                self.null_bitmap
-                    .as_mut()
-                    .unwrap()
-                    .append(old_bitmap.is_set(i));
-            }
-
-            Ok(Some(old_bitmap.into_buffer()))
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Reset state of record reader.
-    /// Should be called after consuming data, e.g. `consume_rep_levels`,
-    /// `consume_rep_levels`, `consume_record_data` and `consume_bitmap_buffer`.
-    pub fn reset(&mut self) {
-        self.values_written -= self.num_values;
-        self.num_records = 0;
-        self.num_values = 0;
-        self.values_seen = 0;
-        self.in_middle_of_record = false;
-    }
-
-    /// Returns bitmap data.
-    pub fn consume_bitmap(&mut self) -> Result<Option<Bitmap>> {
-        self.consume_bitmap_buffer()
-            .map(|buffer| buffer.map(Bitmap::from))
-    }
-
-    /// Try to read one batch of data.
-    fn read_one_batch(&mut self, batch_size: usize) -> Result<usize> {
-        // Reserve spaces
-        self.records
-            .resize(self.records.len() + batch_size * T::get_type_size(), 0);
-        if let Some(ref mut buf) = self.rep_levels {
-            buf.resize(buf.len() + batch_size * size_of::<i16>(), 0);
-        }
-        if let Some(ref mut buf) = self.def_levels {
-            buf.resize(buf.len() + batch_size * size_of::<i16>(), 0);
-        }
-
-        let values_written = self.values_written;
-
-        // Convert mutable buffer spaces to mutable slices
-        let (prefix, values, suffix) =
-            unsafe { self.records.as_slice_mut().align_to_mut::<T::T>() };
-        assert!(prefix.is_empty() && suffix.is_empty());
-        let values = &mut values[values_written..];
-
-        let def_levels = self.def_levels.as_mut().map(|buf| {
-            let (prefix, def_levels, suffix) =
-                unsafe { buf.as_slice_mut().align_to_mut::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            &mut def_levels[values_written..]
-        });
-
-        let rep_levels = self.rep_levels.as_mut().map(|buf| {
-            let (prefix, rep_levels, suffix) =
-                unsafe { buf.as_slice_mut().align_to_mut::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            &mut rep_levels[values_written..]
-        });
-
-        let (values_read, levels_read) = self
-            .column_reader
-            .as_mut()
-            .unwrap()
-            .read_batch(batch_size, def_levels, rep_levels, values)?;
-
-        // get new references for the def levels.
-        let def_levels = self.def_levels.as_ref().map(|buf| {
-            let (prefix, def_levels, suffix) =
-                unsafe { buf.as_slice().align_to::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            &def_levels[values_written..]
-        });
-
-        let max_def_level = self.column_desc.max_def_level();
-
-        if values_read < levels_read {
-            let def_levels = def_levels.ok_or_else(|| {
-                general_err!(
-                    "Definition levels should exist when data is less than levels!"
-                )
-            })?;
-
-            // Fill spaces in column data with default values
-            let mut values_pos = values_read;
-            let mut level_pos = levels_read;
-
-            while level_pos > values_pos {
-                if def_levels[level_pos - 1] == max_def_level {
-                    // This values is not empty
-                    // We use swap rather than assign here because T::T doesn't
-                    // implement Copy
-                    values.swap(level_pos - 1, values_pos - 1);
-                    values_pos -= 1;
-                } else {
-                    values[level_pos - 1] = T::T::default();
-                }
-
-                level_pos -= 1;
-            }
-        }
-
-        // Fill in bitmap data
-        if let Some(null_buffer) = self.null_bitmap.as_mut() {
-            let def_levels = def_levels.ok_or_else(|| {
-                general_err!(
-                    "Definition levels should exist when data is less than levels!"
-                )
-            })?;
-            (0..levels_read)
-                .for_each(|idx| null_buffer.append(def_levels[idx] == max_def_level));
-        }
-
-        let values_read = max(values_read, levels_read);
-        self.set_values_written(self.values_written + values_read)?;
-        Ok(values_read)
-    }
-
-    /// Split values into records according repetition definition and returns number of
-    /// records read.
-    #[allow(clippy::unnecessary_wraps)]
-    fn split_records(&mut self, records_to_read: usize) -> Result<usize> {
-        let rep_levels = self.rep_levels.as_ref().map(|buf| {
-            let (prefix, rep_levels, suffix) =
-                unsafe { buf.as_slice().align_to::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            rep_levels
-        });
-
-        match rep_levels {
-            Some(buf) => {
-                let mut records_read = 0;
-
-                while (self.values_seen < self.values_written)
-                    && (records_read < records_to_read)
-                {
-                    if buf[self.values_seen] == 0 {
-                        if self.in_middle_of_record {
-                            records_read += 1;
-                            self.num_records += 1;
-                            self.num_values = self.values_seen;
-                        }
-                        self.in_middle_of_record = true;
-                    }
-                    self.values_seen += 1;
-                }
-
-                Ok(records_read)
-            }
-            None => {
-                let records_read =
-                    min(records_to_read, self.values_written - self.values_seen);
-                self.num_records += records_read;
-                self.num_values += records_read;
-                self.values_seen += records_read;
-                self.in_middle_of_record = false;
-
-                Ok(records_read)
-            }
-        }
-    }
-
-    #[allow(clippy::unnecessary_wraps)]
-    fn set_values_written(&mut self, new_values_written: usize) -> Result<()> {
-        self.values_written = new_values_written;
-        self.records
-            .resize(self.values_written * T::get_type_size(), 0);
-
-        let new_levels_len = self.values_written * size_of::<i16>();
-
-        if let Some(ref mut buf) = self.rep_levels {
-            buf.resize(new_levels_len, 0)
-        };
-
-        if let Some(ref mut buf) = self.def_levels {
-            buf.resize(new_levels_len, 0)
-        };
-
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::RecordReader;
-    use crate::basic::Encoding;
-    use crate::column::page::Page;
-    use crate::column::page::PageReader;
-    use crate::data_type::Int32Type;
-    use crate::errors::Result;
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::SchemaDescriptor;
-    use crate::util::test_common::page_util::{DataPageBuilder, DataPageBuilderImpl};
-    use arrow::array::{BooleanBufferBuilder, Int16BufferBuilder, Int32BufferBuilder};
-    use arrow::bitmap::Bitmap;
-    use std::sync::Arc;
-
-    struct TestPageReader {
-        pages: Box<dyn Iterator<Item = Page>>,
-    }
-
-    impl TestPageReader {
-        pub fn new(pages: Vec<Page>) -> Self {
-            Self {
-                pages: Box::new(pages.into_iter()),
-            }
-        }
-    }
-
-    impl PageReader for TestPageReader {
-        fn get_next_page(&mut self) -> Result<Option<Page>> {
-            Ok(self.pages.next())
-        }
-    }
-
-    #[test]
-    fn test_read_required_records() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REQUIRED INT32 leaf;
-        }
-        ";
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        // First page
-
-        // Records data:
-        // test_schema
-        //   leaf: 4
-        // test_schema
-        //   leaf: 7
-        // test_schema
-        //   leaf: 6
-        // test_schema
-        //   left: 3
-        // test_schema
-        //   left: 2
-        {
-            let values = [4, 7, 6, 3, 2];
-            let mut pb = DataPageBuilderImpl::new(desc.clone(), 5, true);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(2).unwrap());
-            assert_eq!(2, record_reader.num_records());
-            assert_eq!(2, record_reader.num_values());
-            assert_eq!(3, record_reader.read_records(3).unwrap());
-            assert_eq!(5, record_reader.num_records());
-            assert_eq!(5, record_reader.num_values());
-        }
-
-        // Second page
-
-        // Records data:
-        // test_schema
-        //   leaf: 8
-        // test_schema
-        //   leaf: 9
-        {
-            let values = [8, 9];
-            let mut pb = DataPageBuilderImpl::new(desc, 2, true);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(10).unwrap());
-            assert_eq!(7, record_reader.num_records());
-            assert_eq!(7, record_reader.num_values());
-        }
-
-        let mut bb = Int32BufferBuilder::new(7);
-        bb.append_slice(&[4, 7, 6, 3, 2, 8, 9]);
-        let expected_buffer = bb.finish();
-        assert_eq!(
-            expected_buffer,
-            record_reader.consume_record_data().unwrap()
-        );
-        assert_eq!(None, record_reader.consume_def_levels().unwrap());
-        assert_eq!(None, record_reader.consume_bitmap().unwrap());
-    }
-
-    #[test]
-    fn test_read_optional_records() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          OPTIONAL Group test_struct {
-            OPTIONAL INT32 leaf;
-          }
-        }
-        ";
-
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        // First page
-
-        // Records data:
-        // test_schema
-        //   test_struct
-        // test_schema
-        //   test_struct
-        //     left: 7
-        // test_schema
-        // test_schema
-        //   test_struct
-        //     leaf: 6
-        // test_schema
-        //   test_struct
-        //     leaf: 6
-        {
-            let values = [7, 6, 3];
-            //empty, non-empty, empty, non-empty, non-empty
-            let def_levels = [1i16, 2i16, 0i16, 2i16, 2i16];
-            let mut pb = DataPageBuilderImpl::new(desc.clone(), 5, true);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(2).unwrap());
-            assert_eq!(2, record_reader.num_records());
-            assert_eq!(2, record_reader.num_values());
-            assert_eq!(3, record_reader.read_records(3).unwrap());
-            assert_eq!(5, record_reader.num_records());
-            assert_eq!(5, record_reader.num_values());
-        }
-
-        // Second page
-
-        // Records data:
-        // test_schema
-        // test_schema
-        //   test_struct
-        //     left: 8
-        {
-            let values = [8];
-            //empty, non-empty
-            let def_levels = [0i16, 2i16];
-            let mut pb = DataPageBuilderImpl::new(desc, 2, true);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(10).unwrap());
-            assert_eq!(7, record_reader.num_records());
-            assert_eq!(7, record_reader.num_values());
-        }
-
-        // Verify result record data
-        let mut bb = Int32BufferBuilder::new(7);
-        bb.append_slice(&[0, 7, 0, 6, 3, 0, 8]);
-        let expected_buffer = bb.finish();
-        assert_eq!(
-            expected_buffer,
-            record_reader.consume_record_data().unwrap()
-        );
-
-        // Verify result def levels
-        let mut bb = Int16BufferBuilder::new(7);
-        bb.append_slice(&[1i16, 2i16, 0i16, 2i16, 2i16, 0i16, 2i16]);
-        let expected_def_levels = bb.finish();
-        assert_eq!(
-            Some(expected_def_levels),
-            record_reader.consume_def_levels().unwrap()
-        );
-
-        // Verify bitmap
-        let mut bb = BooleanBufferBuilder::new(7);
-        bb.append_slice(&[false, true, false, true, true, false, true]);
-        let expected_bitmap = Bitmap::from(bb.finish());
-        assert_eq!(
-            Some(expected_bitmap),
-            record_reader.consume_bitmap().unwrap()
-        );
-    }
-
-    #[test]
-    fn test_read_repeated_records() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REPEATED Group test_struct {
-            REPEATED  INT32 leaf;
-          }
-        }
-        ";
-
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        // First page
-
-        // Records data:
-        // test_schema
-        //   test_struct
-        //     leaf: 4
-        // test_schema
-        // test_schema
-        //   test_struct
-        //   test_struct
-        //     leaf: 7
-        //     leaf: 6
-        //     leaf: 3
-        //   test_struct
-        //     leaf: 2
-        {
-            let values = [4, 7, 6, 3, 2];
-            let def_levels = [2i16, 0i16, 1i16, 2i16, 2i16, 2i16, 2i16];
-            let rep_levels = [0i16, 0i16, 0i16, 1i16, 2i16, 2i16, 1i16];
-            let mut pb = DataPageBuilderImpl::new(desc.clone(), 7, true);
-            pb.add_rep_levels(2, &rep_levels);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-
-            assert_eq!(1, record_reader.read_records(1).unwrap());
-            assert_eq!(1, record_reader.num_records());
-            assert_eq!(1, record_reader.num_values());
-            assert_eq!(2, record_reader.read_records(3).unwrap());
-            assert_eq!(3, record_reader.num_records());
-            assert_eq!(7, record_reader.num_values());
-        }
-
-        // Second page
-
-        // Records data:
-        // test_schema
-        //   test_struct
-        //     leaf: 8
-        //     leaf: 9
-        {
-            let values = [8, 9];
-            let def_levels = [2i16, 2i16];
-            let rep_levels = [0i16, 2i16];
-            let mut pb = DataPageBuilderImpl::new(desc, 2, true);
-            pb.add_rep_levels(2, &rep_levels);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-
-            assert_eq!(1, record_reader.read_records(10).unwrap());
-            assert_eq!(4, record_reader.num_records());
-            assert_eq!(9, record_reader.num_values());
-        }
-
-        // Verify result record data
-        let mut bb = Int32BufferBuilder::new(9);
-        bb.append_slice(&[4, 0, 0, 7, 6, 3, 2, 8, 9]);
-        let expected_buffer = bb.finish();
-        assert_eq!(
-            expected_buffer,
-            record_reader.consume_record_data().unwrap()
-        );
-
-        // Verify result def levels
-        let mut bb = Int16BufferBuilder::new(9);
-        bb.append_slice(&[2i16, 0i16, 1i16, 2i16, 2i16, 2i16, 2i16, 2i16, 2i16]);
-        let expected_def_levels = bb.finish();
-        assert_eq!(
-            Some(expected_def_levels),
-            record_reader.consume_def_levels().unwrap()
-        );
-
-        // Verify bitmap
-        let mut bb = BooleanBufferBuilder::new(9);
-        bb.append_slice(&[true, false, false, true, true, true, true, true, true]);
-        let expected_bitmap = Bitmap::from(bb.finish());
-        assert_eq!(
-            Some(expected_bitmap),
-            record_reader.consume_bitmap().unwrap()
-        );
-    }
-
-    #[test]
-    fn test_read_more_than_one_batch() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REPEATED  INT32 leaf;
-        }
-        ";
-
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        {
-            let values = [100; 5000];
-            let def_levels = [1i16; 5000];
-            let mut rep_levels = [1i16; 5000];
-            for idx in 0..1000 {
-                rep_levels[idx * 5] = 0i16;
-            }
-
-            let mut pb = DataPageBuilderImpl::new(desc, 5000, true);
-            pb.add_rep_levels(1, &rep_levels);
-            pb.add_def_levels(1, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-
-            assert_eq!(1000, record_reader.read_records(1000).unwrap());
-            assert_eq!(1000, record_reader.num_records());
-            assert_eq!(5000, record_reader.num_values());
-        }
-    }
-}
diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs
deleted file mode 100644
index b15bb7e4140..00000000000
--- a/rust/parquet/src/arrow/schema.rs
+++ /dev/null
@@ -1,1945 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Provides API for converting parquet schema to arrow schema and vice versa.
-//!
-//! The main interfaces for converting parquet schema to arrow schema  are
-//! `parquet_to_arrow_schema`, `parquet_to_arrow_schema_by_columns` and
-//! `parquet_to_arrow_field`.
-//!
-//! The interfaces for converting arrow schema to parquet schema is coming.
-
-use std::collections::{HashMap, HashSet};
-use std::sync::Arc;
-
-use arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit};
-use arrow::ipc::writer;
-
-use crate::errors::{ParquetError::ArrowError, Result};
-use crate::file::{metadata::KeyValue, properties::WriterProperties};
-use crate::schema::types::{ColumnDescriptor, SchemaDescriptor, Type, TypePtr};
-use crate::{
-    basic::{
-        ConvertedType, DecimalType, IntType, LogicalType, Repetition, TimeType,
-        TimeUnit as ParquetTimeUnit, TimestampType, Type as PhysicalType,
-    },
-    errors::ParquetError,
-};
-
-/// Convert Parquet schema to Arrow schema including optional metadata.
-/// Attempts to decode any existing Arrow schema metadata, falling back
-/// to converting the Parquet schema column-wise
-pub fn parquet_to_arrow_schema(
-    parquet_schema: &SchemaDescriptor,
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Result<Schema> {
-    let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default();
-    metadata
-        .remove(super::ARROW_SCHEMA_META_KEY)
-        .map(|encoded| get_arrow_schema_from_metadata(&encoded))
-        .unwrap_or(parquet_to_arrow_schema_by_columns(
-            parquet_schema,
-            0..parquet_schema.columns().len(),
-            key_value_metadata,
-        ))
-}
-
-/// Convert parquet schema to arrow schema including optional metadata,
-/// only preserving some root columns.
-/// This is useful if we have columns `a.b`, `a.c.e` and `a.d`,
-/// and want `a` with all its child fields
-pub fn parquet_to_arrow_schema_by_root_columns<T>(
-    parquet_schema: &SchemaDescriptor,
-    column_indices: T,
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Result<Schema>
-where
-    T: IntoIterator<Item = usize>,
-{
-    // Reconstruct the index ranges of the parent columns
-    // An Arrow struct gets represented by 1+ columns based on how many child fields the
-    // struct has. This means that getting fields 1 and 2 might return the struct twice,
-    // if field 1 is the struct having say 3 fields, and field 2 is a primitive.
-    //
-    // The below gets the parent columns, and counts the number of child fields in each parent,
-    // such that we would end up with:
-    // - field 1 - columns: [0, 1, 2]
-    // - field 2 - columns: [3]
-    let mut parent_columns = vec![];
-    let mut curr_name = "";
-    let mut prev_name = "";
-    let mut indices = vec![];
-    (0..(parquet_schema.num_columns())).for_each(|i| {
-        let p_type = parquet_schema.get_column_root(i);
-        curr_name = p_type.get_basic_info().name();
-        if prev_name.is_empty() {
-            // first index
-            indices.push(i);
-            prev_name = curr_name;
-        } else if curr_name != prev_name {
-            prev_name = curr_name;
-            parent_columns.push((curr_name.to_string(), indices.clone()));
-            indices = vec![i];
-        } else {
-            indices.push(i);
-        }
-    });
-    // push the last column if indices has values
-    if !indices.is_empty() {
-        parent_columns.push((curr_name.to_string(), indices));
-    }
-
-    // gather the required leaf columns
-    let leaf_columns = column_indices
-        .into_iter()
-        .flat_map(|i| parent_columns[i].1.clone());
-
-    parquet_to_arrow_schema_by_columns(parquet_schema, leaf_columns, key_value_metadata)
-}
-
-/// Convert parquet schema to arrow schema including optional metadata,
-/// only preserving some leaf columns.
-pub fn parquet_to_arrow_schema_by_columns<T>(
-    parquet_schema: &SchemaDescriptor,
-    column_indices: T,
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Result<Schema>
-where
-    T: IntoIterator<Item = usize>,
-{
-    let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default();
-    let arrow_schema_metadata = metadata
-        .remove(super::ARROW_SCHEMA_META_KEY)
-        .map(|encoded| get_arrow_schema_from_metadata(&encoded))
-        .map_or(Ok(None), |v| v.map(Some))?;
-
-    // add the Arrow metadata to the Parquet metadata
-    if let Some(arrow_schema) = &arrow_schema_metadata {
-        arrow_schema.metadata().iter().for_each(|(k, v)| {
-            metadata.insert(k.clone(), v.clone());
-        });
-    }
-
-    let mut base_nodes = Vec::new();
-    let mut base_nodes_set = HashSet::new();
-    let mut leaves = HashSet::new();
-
-    enum FieldType<'a> {
-        Parquet(&'a Type),
-        Arrow(Field),
-    }
-
-    for c in column_indices {
-        let column = parquet_schema.column(c);
-        let name = column.name();
-
-        if let Some(field) = arrow_schema_metadata
-            .as_ref()
-            .and_then(|schema| schema.field_with_name(name).ok().cloned())
-        {
-            base_nodes.push(FieldType::Arrow(field));
-        } else {
-            let column = column.self_type() as *const Type;
-            let root = parquet_schema.get_column_root(c);
-            let root_raw_ptr = root as *const Type;
-
-            leaves.insert(column);
-            if !base_nodes_set.contains(&root_raw_ptr) {
-                base_nodes.push(FieldType::Parquet(root));
-                base_nodes_set.insert(root_raw_ptr);
-            }
-        }
-    }
-
-    base_nodes
-        .into_iter()
-        .map(|t| match t {
-            FieldType::Parquet(t) => ParquetTypeConverter::new(t, &leaves).to_field(),
-            FieldType::Arrow(f) => Ok(Some(f)),
-        })
-        .collect::<Result<Vec<Option<Field>>>>()
-        .map(|result| result.into_iter().filter_map(|f| f).collect::<Vec<Field>>())
-        .map(|fields| Schema::new_with_metadata(fields, metadata))
-}
-
-/// Try to convert Arrow schema metadata into a schema
-fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
-    let decoded = base64::decode(encoded_meta);
-    match decoded {
-        Ok(bytes) => {
-            let slice = if bytes[0..4] == [255u8; 4] {
-                &bytes[8..]
-            } else {
-                bytes.as_slice()
-            };
-            match arrow::ipc::root_as_message(slice) {
-                Ok(message) => message
-                    .header_as_schema()
-                    .map(arrow::ipc::convert::fb_to_schema)
-                    .ok_or(ArrowError("the message is not Arrow Schema".to_string())),
-                Err(err) => {
-                    // The flatbuffers implementation returns an error on verification error.
-                    Err(ArrowError(format!(
-                        "Unable to get root as message stored in {}: {:?}",
-                        super::ARROW_SCHEMA_META_KEY,
-                        err
-                    )))
-                }
-            }
-        }
-        Err(err) => {
-            // The C++ implementation returns an error if the schema can't be parsed.
-            Err(ArrowError(format!(
-                "Unable to decode the encoded schema stored in {}, {:?}",
-                super::ARROW_SCHEMA_META_KEY,
-                err
-            )))
-        }
-    }
-}
-
-/// Encodes the Arrow schema into the IPC format, and base64 encodes it
-fn encode_arrow_schema(schema: &Schema) -> String {
-    let options = writer::IpcWriteOptions::default();
-    let data_gen = arrow::ipc::writer::IpcDataGenerator::default();
-    let mut serialized_schema = data_gen.schema_to_bytes(&schema, &options);
-
-    // manually prepending the length to the schema as arrow uses the legacy IPC format
-    // TODO: change after addressing ARROW-9777
-    let schema_len = serialized_schema.ipc_message.len();
-    let mut len_prefix_schema = Vec::with_capacity(schema_len + 8);
-    len_prefix_schema.append(&mut vec![255u8, 255, 255, 255]);
-    len_prefix_schema.append((schema_len as u32).to_le_bytes().to_vec().as_mut());
-    len_prefix_schema.append(&mut serialized_schema.ipc_message);
-
-    base64::encode(&len_prefix_schema)
-}
-
-/// Mutates writer metadata by storing the encoded Arrow schema.
-/// If there is an existing Arrow schema metadata, it is replaced.
-pub(crate) fn add_encoded_arrow_schema_to_metadata(
-    schema: &Schema,
-    props: &mut WriterProperties,
-) {
-    let encoded = encode_arrow_schema(schema);
-
-    let schema_kv = KeyValue {
-        key: super::ARROW_SCHEMA_META_KEY.to_string(),
-        value: Some(encoded),
-    };
-
-    let mut meta = props.key_value_metadata.clone().unwrap_or_default();
-    // check if ARROW:schema exists, and overwrite it
-    let schema_meta = meta
-        .iter()
-        .enumerate()
-        .find(|(_, kv)| kv.key.as_str() == super::ARROW_SCHEMA_META_KEY);
-    match schema_meta {
-        Some((i, _)) => {
-            meta.remove(i);
-            meta.push(schema_kv);
-        }
-        None => {
-            meta.push(schema_kv);
-        }
-    }
-    props.key_value_metadata = Some(meta);
-}
-
-/// Convert arrow schema to parquet schema
-pub fn arrow_to_parquet_schema(schema: &Schema) -> Result<SchemaDescriptor> {
-    let fields: Result<Vec<TypePtr>> = schema
-        .fields()
-        .iter()
-        .map(|field| arrow_to_parquet_type(field).map(Arc::new))
-        .collect();
-    let group = Type::group_type_builder("arrow_schema")
-        .with_fields(&mut fields?)
-        .build()?;
-    Ok(SchemaDescriptor::new(Arc::new(group)))
-}
-
-fn parse_key_value_metadata(
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Option<HashMap<String, String>> {
-    match key_value_metadata {
-        Some(key_values) => {
-            let map: HashMap<String, String> = key_values
-                .iter()
-                .filter_map(|kv| {
-                    kv.value
-                        .as_ref()
-                        .map(|value| (kv.key.clone(), value.clone()))
-                })
-                .collect();
-
-            if map.is_empty() {
-                None
-            } else {
-                Some(map)
-            }
-        }
-        None => None,
-    }
-}
-
-/// Convert parquet column schema to arrow field.
-pub fn parquet_to_arrow_field(parquet_column: &ColumnDescriptor) -> Result<Field> {
-    let schema = parquet_column.self_type();
-
-    let mut leaves = HashSet::new();
-    leaves.insert(parquet_column.self_type() as *const Type);
-
-    ParquetTypeConverter::new(schema, &leaves)
-        .to_field()
-        .map(|opt| opt.unwrap())
-}
-
-pub fn decimal_length_from_precision(precision: usize) -> usize {
-    (10.0_f64.powi(precision as i32).log2() / 8.0).ceil() as usize
-}
-
-/// Convert an arrow field to a parquet `Type`
-fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
-    let name = field.name().as_str();
-    let repetition = if field.is_nullable() {
-        Repetition::OPTIONAL
-    } else {
-        Repetition::REQUIRED
-    };
-    // create type from field
-    match field.data_type() {
-        DataType::Null => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::UNKNOWN(Default::default())))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Boolean => Type::primitive_type_builder(name, PhysicalType::BOOLEAN)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int8 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int16 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: true,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int32 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int64 => Type::primitive_type_builder(name, PhysicalType::INT64)
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt8 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt16 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt32 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt64 => Type::primitive_type_builder(name, PhysicalType::INT64)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Float16 => Err(ArrowError("Float16 arrays not supported".to_string())),
-        DataType::Float32 => Type::primitive_type_builder(name, PhysicalType::FLOAT)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Float64 => Type::primitive_type_builder(name, PhysicalType::DOUBLE)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Timestamp(time_unit, zone) => Type::primitive_type_builder(
-            name,
-            PhysicalType::INT64,
-        )
-        .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
-            is_adjusted_to_u_t_c: matches!(zone, Some(z) if !z.as_str().is_empty()),
-            unit: match time_unit {
-                TimeUnit::Second => ParquetTimeUnit::MILLIS(Default::default()),
-                TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()),
-                TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
-            },
-        })))
-        .with_repetition(repetition)
-        .build(),
-        DataType::Date32 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::DATE(Default::default())))
-            .with_repetition(repetition)
-            .build(),
-        // date64 is cast to date32
-        DataType::Date64 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::DATE(Default::default())))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Time32(_) => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: ParquetTimeUnit::MILLIS(Default::default()),
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Time64(unit) => Type::primitive_type_builder(name, PhysicalType::INT64)
-            .with_logical_type(Some(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: match unit {
-                    TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                    TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
-                    u => unreachable!("Invalid unit for Time64: {:?}", u),
-                },
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Duration(_) => Err(ArrowError(
-            "Converting Duration to parquet not supported".to_string(),
-        )),
-        DataType::Interval(_) => {
-            Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                .with_converted_type(ConvertedType::INTERVAL)
-                .with_repetition(repetition)
-                .with_length(12)
-                .build()
-        }
-        DataType::Binary | DataType::LargeBinary => {
-            Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::FixedSizeBinary(length) => {
-            Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                .with_repetition(repetition)
-                .with_length(*length)
-                .build()
-        }
-        DataType::Decimal(precision, scale) => {
-            // Decimal precision determines the Parquet physical type to use.
-            // TODO(ARROW-12018): Enable the below after ARROW-10818 Decimal support
-            //
-            // let (physical_type, length) = if *precision > 1 && *precision <= 9 {
-            //     (PhysicalType::INT32, -1)
-            // } else if *precision <= 18 {
-            //     (PhysicalType::INT64, -1)
-            // } else {
-            //     (
-            //         PhysicalType::FIXED_LEN_BYTE_ARRAY,
-            //         decimal_length_from_precision(*precision) as i32,
-            //     )
-            // };
-            Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                .with_repetition(repetition)
-                .with_length(decimal_length_from_precision(*precision) as i32)
-                .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                    scale: *scale as i32,
-                    precision: *precision as i32,
-                })))
-                .with_precision(*precision as i32)
-                .with_scale(*scale as i32)
-                .build()
-        }
-        DataType::Utf8 | DataType::LargeUtf8 => {
-            Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
-                .with_logical_type(Some(LogicalType::STRING(Default::default())))
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::List(f) | DataType::FixedSizeList(f, _) | DataType::LargeList(f) => {
-            Type::group_type_builder(name)
-                .with_fields(&mut vec![Arc::new(
-                    Type::group_type_builder("list")
-                        .with_fields(&mut vec![Arc::new(arrow_to_parquet_type(f)?)])
-                        .with_repetition(Repetition::REPEATED)
-                        .build()?,
-                )])
-                .with_logical_type(Some(LogicalType::LIST(Default::default())))
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::Struct(fields) => {
-            if fields.is_empty() {
-                return Err(ArrowError(
-                    "Parquet does not support writing empty structs".to_string(),
-                ));
-            }
-            // recursively convert children to types/nodes
-            let fields: Result<Vec<TypePtr>> = fields
-                .iter()
-                .map(|f| arrow_to_parquet_type(f).map(Arc::new))
-                .collect();
-            Type::group_type_builder(name)
-                .with_fields(&mut fields?)
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::Union(_) => unimplemented!("See ARROW-8817."),
-        DataType::Dictionary(_, ref value) => {
-            // Dictionary encoding not handled at the schema level
-            let dict_field = Field::new(name, *value.clone(), field.is_nullable());
-            arrow_to_parquet_type(&dict_field)
-        }
-    }
-}
-/// This struct is used to group methods and data structures used to convert parquet
-/// schema together.
-struct ParquetTypeConverter<'a> {
-    schema: &'a Type,
-    /// This is the columns that need to be converted to arrow schema.
-    columns_to_convert: &'a HashSet<*const Type>,
-}
-
-impl<'a> ParquetTypeConverter<'a> {
-    fn new(schema: &'a Type, columns_to_convert: &'a HashSet<*const Type>) -> Self {
-        Self {
-            schema,
-            columns_to_convert,
-        }
-    }
-
-    fn clone_with_schema(&self, other: &'a Type) -> Self {
-        Self {
-            schema: other,
-            columns_to_convert: self.columns_to_convert,
-        }
-    }
-}
-
-impl ParquetTypeConverter<'_> {
-    // Public interfaces.
-
-    /// Converts parquet schema to arrow data type.
-    ///
-    /// This function discards schema name.
-    ///
-    /// If this schema is a primitive type and not included in the leaves, the result is
-    /// Ok(None).
-    ///
-    /// If this schema is a group type and none of its children is reserved in the
-    /// conversion, the result is Ok(None).
-    fn to_data_type(&self) -> Result<Option<DataType>> {
-        match self.schema {
-            Type::PrimitiveType { .. } => self.to_primitive_type(),
-            Type::GroupType { .. } => self.to_group_type(),
-        }
-    }
-
-    /// Converts parquet schema to arrow field.
-    ///
-    /// This method is roughly the same as
-    /// [`to_data_type`](`ParquetTypeConverter::to_data_type`), except it reserves schema
-    /// name.
-    fn to_field(&self) -> Result<Option<Field>> {
-        self.to_data_type().map(|opt| {
-            opt.map(|dt| Field::new(self.schema.name(), dt, self.is_nullable()))
-        })
-    }
-
-    // Utility functions.
-
-    /// Checks whether this schema is nullable.
-    fn is_nullable(&self) -> bool {
-        let basic_info = self.schema.get_basic_info();
-        if basic_info.has_repetition() {
-            match basic_info.repetition() {
-                Repetition::OPTIONAL => true,
-                Repetition::REPEATED => true,
-                Repetition::REQUIRED => false,
-            }
-        } else {
-            false
-        }
-    }
-
-    fn is_repeated(&self) -> bool {
-        let basic_info = self.schema.get_basic_info();
-
-        basic_info.has_repetition() && basic_info.repetition() == Repetition::REPEATED
-    }
-
-    fn is_self_included(&self) -> bool {
-        self.columns_to_convert
-            .contains(&(self.schema as *const Type))
-    }
-
-    // Functions for primitive types.
-
-    /// Entry point for converting parquet primitive type to arrow type.
-    ///
-    /// This function takes care of repetition.
-    fn to_primitive_type(&self) -> Result<Option<DataType>> {
-        if self.is_self_included() {
-            self.to_primitive_type_inner().map(|dt| {
-                if self.is_repeated() {
-                    Some(DataType::List(Box::new(Field::new(
-                        self.schema.name(),
-                        dt,
-                        self.is_nullable(),
-                    ))))
-                } else {
-                    Some(dt)
-                }
-            })
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Converting parquet primitive type to arrow data type.
-    fn to_primitive_type_inner(&self) -> Result<DataType> {
-        match self.schema.get_physical_type() {
-            PhysicalType::BOOLEAN => Ok(DataType::Boolean),
-            PhysicalType::INT32 => self.from_int32(),
-            PhysicalType::INT64 => self.from_int64(),
-            PhysicalType::INT96 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-            PhysicalType::FLOAT => Ok(DataType::Float32),
-            PhysicalType::DOUBLE => Ok(DataType::Float64),
-            PhysicalType::BYTE_ARRAY => self.from_byte_array(),
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => self.from_fixed_len_byte_array(),
-        }
-    }
-
-    fn from_int32(&self) -> Result<DataType> {
-        match (
-            self.schema.get_basic_info().logical_type(),
-            self.schema.get_basic_info().converted_type(),
-        ) {
-            (None, ConvertedType::NONE) => Ok(DataType::Int32),
-            (Some(LogicalType::INTEGER(t)), _) => match (t.bit_width, t.is_signed) {
-                (8, true) => Ok(DataType::Int8),
-                (16, true) => Ok(DataType::Int16),
-                (32, true) => Ok(DataType::Int32),
-                (8, false) => Ok(DataType::UInt8),
-                (16, false) => Ok(DataType::UInt16),
-                (32, false) => Ok(DataType::UInt32),
-                _ => Err(ArrowError(format!(
-                    "Cannot create INT32 physical type from {:?}",
-                    t
-                ))),
-            },
-            (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
-            (Some(LogicalType::DATE(_)), _) => Ok(DataType::Date32),
-            (Some(LogicalType::TIME(t)), _) => match t.unit {
-                ParquetTimeUnit::MILLIS(_) => Ok(DataType::Time32(TimeUnit::Millisecond)),
-                _ => Err(ArrowError(format!(
-                    "Cannot create INT32 physical type from {:?}",
-                    t.unit
-                ))),
-            },
-            (None, ConvertedType::UINT_8) => Ok(DataType::UInt8),
-            (None, ConvertedType::UINT_16) => Ok(DataType::UInt16),
-            (None, ConvertedType::UINT_32) => Ok(DataType::UInt32),
-            (None, ConvertedType::INT_8) => Ok(DataType::Int8),
-            (None, ConvertedType::INT_16) => Ok(DataType::Int16),
-            (None, ConvertedType::INT_32) => Ok(DataType::Int32),
-            (None, ConvertedType::DATE) => Ok(DataType::Date32),
-            (None, ConvertedType::TIME_MILLIS) => {
-                Ok(DataType::Time32(TimeUnit::Millisecond))
-            }
-            (None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
-            (logical, converted) => Err(ArrowError(format!(
-                "Unable to convert parquet INT32 logical type {:?} or converted type {}",
-                logical, converted
-            ))),
-        }
-    }
-
-    fn from_int64(&self) -> Result<DataType> {
-        match (
-            self.schema.get_basic_info().logical_type(),
-            self.schema.get_basic_info().converted_type(),
-        ) {
-            (None, ConvertedType::NONE) => Ok(DataType::Int64),
-            (Some(LogicalType::INTEGER(t)), _) if t.bit_width == 64 => {
-                match t.is_signed {
-                    true => Ok(DataType::Int64),
-                    false => Ok(DataType::UInt64),
-                }
-            }
-            (Some(LogicalType::TIME(t)), _) => match t.unit {
-                ParquetTimeUnit::MILLIS(_) => Err(ArrowError(
-                    "Cannot create INT64 from MILLIS time unit".to_string(),
-                )),
-                ParquetTimeUnit::MICROS(_) => Ok(DataType::Time64(TimeUnit::Microsecond)),
-                ParquetTimeUnit::NANOS(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)),
-            },
-            (Some(LogicalType::TIMESTAMP(t)), _) => Ok(DataType::Timestamp(
-                match t.unit {
-                    ParquetTimeUnit::MILLIS(_) => TimeUnit::Millisecond,
-                    ParquetTimeUnit::MICROS(_) => TimeUnit::Microsecond,
-                    ParquetTimeUnit::NANOS(_) => TimeUnit::Nanosecond,
-                },
-                if t.is_adjusted_to_u_t_c {
-                    Some("UTC".to_string())
-                } else {
-                    None
-                },
-            )),
-            (None, ConvertedType::INT_64) => Ok(DataType::Int64),
-            (None, ConvertedType::UINT_64) => Ok(DataType::UInt64),
-            (None, ConvertedType::TIME_MICROS) => {
-                Ok(DataType::Time64(TimeUnit::Microsecond))
-            }
-            (None, ConvertedType::TIMESTAMP_MILLIS) => {
-                Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
-            }
-            (None, ConvertedType::TIMESTAMP_MICROS) => {
-                Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
-            }
-            (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
-            (None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
-            (logical, converted) => Err(ArrowError(format!(
-                "Unable to convert parquet INT64 logical type {:?} or converted type {}",
-                logical, converted
-            ))),
-        }
-    }
-
-    fn from_fixed_len_byte_array(&self) -> Result<DataType> {
-        match (
-            self.schema.get_basic_info().logical_type(),
-            self.schema.get_basic_info().converted_type(),
-        ) {
-            (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
-            (None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
-            (None, ConvertedType::INTERVAL) => {
-                // There is currently no reliable way of determining which IntervalUnit
-                // to return. Thus without the original Arrow schema, the results
-                // would be incorrect if all 12 bytes of the interval are populated
-                Ok(DataType::Interval(IntervalUnit::DayTime))
-            }
-            _ => {
-                let byte_width = match self.schema {
-                    Type::PrimitiveType {
-                        ref type_length, ..
-                    } => *type_length,
-                    _ => {
-                        return Err(ArrowError(
-                            "Expected a physical type, not a group type".to_string(),
-                        ))
-                    }
-                };
-
-                Ok(DataType::FixedSizeBinary(byte_width))
-            }
-        }
-    }
-
-    fn to_decimal(&self) -> DataType {
-        assert!(self.schema.is_primitive());
-        DataType::Decimal(
-            self.schema.get_precision() as usize,
-            self.schema.get_scale() as usize,
-        )
-    }
-
-    fn from_byte_array(&self) -> Result<DataType> {
-        match (self.schema.get_basic_info().logical_type(), self.schema.get_basic_info().converted_type()) {
-            (Some(LogicalType::STRING(_)), _) => Ok(DataType::Utf8),
-            (Some(LogicalType::JSON(_)), _) => Ok(DataType::Binary),
-            (Some(LogicalType::BSON(_)), _) => Ok(DataType::Binary),
-            (Some(LogicalType::ENUM(_)), _) => Ok(DataType::Binary),
-            (None, ConvertedType::NONE) => Ok(DataType::Binary),
-            (None, ConvertedType::JSON) => Ok(DataType::Binary),
-            (None, ConvertedType::BSON) => Ok(DataType::Binary),
-            (None, ConvertedType::ENUM) => Ok(DataType::Binary),
-            (None, ConvertedType::UTF8) => Ok(DataType::Utf8),
-            (logical, converted) => Err(ArrowError(format!(
-                "Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}",
-                logical, converted
-            ))),
-        }
-    }
-
-    // Functions for group types.
-
-    /// Entry point for converting parquet group type.
-    ///
-    /// This function takes care of logical type and repetition.
-    fn to_group_type(&self) -> Result<Option<DataType>> {
-        if self.is_repeated() {
-            self.to_struct().map(|opt| {
-                opt.map(|dt| {
-                    DataType::List(Box::new(Field::new(
-                        self.schema.name(),
-                        dt,
-                        self.is_nullable(),
-                    )))
-                })
-            })
-        } else {
-            match (
-                self.schema.get_basic_info().logical_type(),
-                self.schema.get_basic_info().converted_type(),
-            ) {
-                (Some(LogicalType::LIST(_)), _) => self.to_list(),
-                (None, ConvertedType::LIST) => self.to_list(),
-                _ => self.to_struct(),
-            }
-        }
-    }
-
-    /// Converts a parquet group type to arrow struct.
-    fn to_struct(&self) -> Result<Option<DataType>> {
-        match self.schema {
-            Type::PrimitiveType { .. } => Err(ParquetError::General(format!(
-                "{:?} is a struct type, and can't be processed as primitive.",
-                self.schema
-            ))),
-            Type::GroupType {
-                basic_info: _,
-                fields,
-            } => fields
-                .iter()
-                .map(|field_ptr| self.clone_with_schema(field_ptr).to_field())
-                .collect::<Result<Vec<Option<Field>>>>()
-                .map(|result| {
-                    result.into_iter().filter_map(|f| f).collect::<Vec<Field>>()
-                })
-                .map(|fields| {
-                    if fields.is_empty() {
-                        None
-                    } else {
-                        Some(DataType::Struct(fields))
-                    }
-                }),
-        }
-    }
-
-    /// Converts a parquet list to arrow list.
-    ///
-    /// To fully understand this algorithm, please refer to
-    /// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md).
-    fn to_list(&self) -> Result<Option<DataType>> {
-        match self.schema {
-            Type::PrimitiveType { .. } => Err(ParquetError::General(format!(
-                "{:?} is a list type and can't be processed as primitive.",
-                self.schema
-            ))),
-            Type::GroupType {
-                basic_info: _,
-                fields,
-            } if fields.len() == 1 => {
-                let list_item = fields.first().unwrap();
-                let item_converter = self.clone_with_schema(list_item);
-
-                let item_type = match list_item.as_ref() {
-                    Type::PrimitiveType { .. } => {
-                        if item_converter.is_repeated() {
-                            item_converter.to_primitive_type_inner().map(Some)
-                        } else {
-                            Err(ArrowError(
-                                "Primitive element type of list must be repeated."
-                                    .to_string(),
-                            ))
-                        }
-                    }
-                    Type::GroupType {
-                        basic_info: _,
-                        fields,
-                    } => {
-                        if fields.len() > 1 {
-                            item_converter.to_struct()
-                        } else if fields.len() == 1
-                            && list_item.name() != "array"
-                            && list_item.name() != format!("{}_tuple", self.schema.name())
-                        {
-                            let nested_item = fields.first().unwrap();
-                            let nested_item_converter =
-                                self.clone_with_schema(nested_item);
-
-                            nested_item_converter.to_data_type()
-                        } else {
-                            item_converter.to_struct()
-                        }
-                    }
-                };
-
-                // Check that the name of the list child is "list", in which case we
-                // get the child nullability and name (normally "element") from the nested
-                // group type.
-                // Without this step, the child incorrectly inherits the parent's optionality
-                let (list_item_name, item_is_optional) = match &item_converter.schema {
-                    Type::GroupType { basic_info, fields }
-                        if basic_info.name() == "list" && fields.len() == 1 =>
-                    {
-                        let field = fields.first().unwrap();
-                        (field.name(), field.is_optional())
-                    }
-                    _ => (list_item.name(), list_item.is_optional()),
-                };
-
-                item_type.map(|opt| {
-                    opt.map(|dt| {
-                        DataType::List(Box::new(Field::new(
-                            list_item_name,
-                            dt,
-                            item_is_optional,
-                        )))
-                    })
-                })
-            }
-            _ => Err(ArrowError(
-                "Group element type of list can only contain one field.".to_string(),
-            )),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::{collections::HashMap, convert::TryFrom, sync::Arc};
-
-    use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
-
-    use crate::file::{metadata::KeyValue, reader::SerializedFileReader};
-    use crate::{
-        arrow::{ArrowReader, ArrowWriter, ParquetFileArrowReader},
-        schema::{parser::parse_message_type, types::SchemaDescriptor},
-        util::test_common::get_temp_file,
-    };
-
-    #[test]
-    fn test_flat_primitives() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32   int8  (INT_8);
-            REQUIRED INT32   int16 (INT_16);
-            REQUIRED INT32   uint8 (INTEGER(8,false));
-            REQUIRED INT32   uint16 (INTEGER(16,false));
-            REQUIRED INT32   int32;
-            REQUIRED INT64   int64 ;
-            OPTIONAL DOUBLE  double;
-            OPTIONAL FLOAT   float;
-            OPTIONAL BINARY  string (UTF8);
-            OPTIONAL BINARY  string_2 (STRING);
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-            Field::new("int16", DataType::Int16, false),
-            Field::new("uint8", DataType::UInt8, false),
-            Field::new("uint16", DataType::UInt16, false),
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("double", DataType::Float64, true),
-            Field::new("float", DataType::Float32, true),
-            Field::new("string", DataType::Utf8, true),
-            Field::new("string_2", DataType::Utf8, true),
-        ];
-
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-    }
-
-    #[test]
-    fn test_byte_array_fields() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BYTE_ARRAY binary;
-            REQUIRED FIXED_LEN_BYTE_ARRAY (20) fixed_binary;
-        }
-        ";
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-
-        let arrow_fields = vec![
-            Field::new("binary", DataType::Binary, false),
-            Field::new("fixed_binary", DataType::FixedSizeBinary(20), false),
-        ];
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-    }
-
-    #[test]
-    fn test_duplicate_fields() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32 int8 (INT_8);
-        }
-        ";
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-        ];
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-
-        let converted_arrow_schema = parquet_to_arrow_schema_by_columns(
-            &parquet_schema,
-            vec![0usize, 1usize],
-            &None,
-        )
-        .unwrap();
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-    }
-
-    #[test]
-    fn test_parquet_lists() {
-        let mut arrow_fields = Vec::new();
-
-        // LIST encoding example taken from parquet-format/LogicalTypes.md
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP my_list (LIST) {
-            REPEATED GROUP list {
-              OPTIONAL BINARY element (UTF8);
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP list {
-              REQUIRED BINARY element (UTF8);
-            }
-          }
-          OPTIONAL GROUP array_of_arrays (LIST) {
-            REPEATED GROUP list {
-              REQUIRED GROUP element (LIST) {
-                REPEATED GROUP list {
-                  REQUIRED INT32 element;
-                }
-              }
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP element {
-              REQUIRED BINARY str (UTF8);
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED INT32 element;
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP element {
-              REQUIRED BINARY str (UTF8);
-              REQUIRED INT32 num;
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP array {
-              REQUIRED BINARY str (UTF8);
-            }
-
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP my_list_tuple {
-              REQUIRED BINARY str (UTF8);
-            }
-          }
-          REPEATED INT32 name;
-        }
-        ";
-
-        // // List<String> (list non-null, elements nullable)
-        // required group my_list (LIST) {
-        //   repeated group list {
-        //     optional binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
-                false,
-            ));
-        }
-
-        // // List<String> (list nullable, elements non-null)
-        // optional group my_list (LIST) {
-        //   repeated group list {
-        //     required binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
-                true,
-            ));
-        }
-
-        // Element types can be nested structures. For example, a list of lists:
-        //
-        // // List<List<Integer>>
-        // optional group array_of_arrays (LIST) {
-        //   repeated group list {
-        //     required group element (LIST) {
-        //       repeated group list {
-        //         required int32 element;
-        //       }
-        //     }
-        //   }
-        // }
-        {
-            let arrow_inner_list =
-                DataType::List(Box::new(Field::new("element", DataType::Int32, false)));
-            arrow_fields.push(Field::new(
-                "array_of_arrays",
-                DataType::List(Box::new(Field::new("element", arrow_inner_list, false))),
-                true,
-            ));
-        }
-
-        // // List<String> (list nullable, elements non-null)
-        // optional group my_list (LIST) {
-        //   repeated group element {
-        //     required binary str (UTF8);
-        //   };
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
-                true,
-            ));
-        }
-
-        // // List<Integer> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated int32 element;
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
-                true,
-            ));
-        }
-
-        // // List<Tuple<String, Integer>> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated group element {
-        //     required binary str (UTF8);
-        //     required int32 num;
-        //   };
-        // }
-        {
-            let arrow_struct = DataType::Struct(vec![
-                Field::new("str", DataType::Utf8, false),
-                Field::new("num", DataType::Int32, false),
-            ]);
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", arrow_struct, true))),
-                true,
-            ));
-        }
-
-        // // List<OneTuple<String>> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated group array {
-        //     required binary str (UTF8);
-        //   };
-        // }
-        // Special case: group is named array
-        {
-            let arrow_struct =
-                DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("array", arrow_struct, true))),
-                true,
-            ));
-        }
-
-        // // List<OneTuple<String>> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated group my_list_tuple {
-        //     required binary str (UTF8);
-        //   };
-        // }
-        // Special case: group named ends in _tuple
-        {
-            let arrow_struct =
-                DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, true))),
-                true,
-            ));
-        }
-
-        // One-level encoding: Only allows required lists with required cells
-        //   repeated value_type name
-        {
-            arrow_fields.push(Field::new(
-                "name",
-                DataType::List(Box::new(Field::new("name", DataType::Int32, true))),
-                true,
-            ));
-        }
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_parquet_list_nullable() {
-        let mut arrow_fields = Vec::new();
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP my_list1 (LIST) {
-            REPEATED GROUP list {
-              OPTIONAL BINARY element (UTF8);
-            }
-          }
-          OPTIONAL GROUP my_list2 (LIST) {
-            REPEATED GROUP list {
-              REQUIRED BINARY element (UTF8);
-            }
-          }
-          REQUIRED GROUP my_list3 (LIST) {
-            REPEATED GROUP list {
-              REQUIRED BINARY element (UTF8);
-            }
-          }
-        }
-        ";
-
-        // // List<String> (list non-null, elements nullable)
-        // required group my_list1 (LIST) {
-        //   repeated group list {
-        //     optional binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list1",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
-                false,
-            ));
-        }
-
-        // // List<String> (list nullable, elements non-null)
-        // optional group my_list2 (LIST) {
-        //   repeated group list {
-        //     required binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list2",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
-                true,
-            ));
-        }
-
-        // // List<String> (list non-null, elements non-null)
-        // repeated group my_list3 (LIST) {
-        //   repeated group list {
-        //     required binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list3",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
-                false,
-            ));
-        }
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_nested_schema() {
-        let mut arrow_fields = Vec::new();
-        {
-            let group1_fields = vec![
-                Field::new("leaf1", DataType::Boolean, false),
-                Field::new("leaf2", DataType::Int32, false),
-            ];
-            let group1_struct =
-                Field::new("group1", DataType::Struct(group1_fields), false);
-            arrow_fields.push(group1_struct);
-
-            let leaf3_field = Field::new("leaf3", DataType::Int64, false);
-            arrow_fields.push(leaf3_field);
-        }
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP group1 {
-            REQUIRED BOOLEAN leaf1;
-            REQUIRED INT32 leaf2;
-          }
-          REQUIRED INT64 leaf3;
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_nested_schema_partial() {
-        let mut arrow_fields = Vec::new();
-        {
-            let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)];
-            let group1 = Field::new("group1", DataType::Struct(group1_fields), false);
-            arrow_fields.push(group1);
-
-            let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)];
-            let group2 = Field::new("group2", DataType::Struct(group2_fields), false);
-            arrow_fields.push(group2);
-
-            arrow_fields.push(Field::new("leaf5", DataType::Int64, false));
-        }
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP group1 {
-            REQUIRED INT64 leaf1;
-            REQUIRED INT64 leaf2;
-          }
-          REQUIRED  GROUP group2 {
-            REQUIRED INT64 leaf3;
-            REQUIRED INT64 leaf4;
-          }
-          REQUIRED INT64 leaf5;
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        // Expected partial arrow schema (columns 0, 3, 4):
-        // required group group1 {
-        //   required int64 leaf1;
-        // }
-        // required group group2 {
-        //   required int64 leaf4;
-        // }
-        // required int64 leaf5;
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema_by_columns(&parquet_schema, vec![0, 3, 4], &None)
-                .unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_nested_schema_partial_ordering() {
-        let mut arrow_fields = Vec::new();
-        {
-            let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)];
-            let group2 = Field::new("group2", DataType::Struct(group2_fields), false);
-            arrow_fields.push(group2);
-
-            arrow_fields.push(Field::new("leaf5", DataType::Int64, false));
-
-            let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)];
-            let group1 = Field::new("group1", DataType::Struct(group1_fields), false);
-            arrow_fields.push(group1);
-        }
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP group1 {
-            REQUIRED INT64 leaf1;
-            REQUIRED INT64 leaf2;
-          }
-          REQUIRED  GROUP group2 {
-            REQUIRED INT64 leaf3;
-            REQUIRED INT64 leaf4;
-          }
-          REQUIRED INT64 leaf5;
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        // Expected partial arrow schema (columns 3, 4, 0):
-        // required group group1 {
-        //   required int64 leaf1;
-        // }
-        // required group group2 {
-        //   required int64 leaf4;
-        // }
-        // required int64 leaf5;
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema_by_columns(&parquet_schema, vec![3, 4, 0], &None)
-                .unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_repeated_nested_schema() {
-        let mut arrow_fields = Vec::new();
-        {
-            arrow_fields.push(Field::new("leaf1", DataType::Int32, true));
-
-            let inner_group_list = Field::new(
-                "innerGroup",
-                DataType::List(Box::new(Field::new(
-                    "innerGroup",
-                    DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]),
-                    true,
-                ))),
-                true,
-            );
-
-            let outer_group_list = Field::new(
-                "outerGroup",
-                DataType::List(Box::new(Field::new(
-                    "outerGroup",
-                    DataType::Struct(vec![
-                        Field::new("leaf2", DataType::Int32, true),
-                        inner_group_list,
-                    ]),
-                    true,
-                ))),
-                true,
-            );
-            arrow_fields.push(outer_group_list);
-        }
-
-        let message_type = "
-        message test_schema {
-          OPTIONAL INT32 leaf1;
-          REPEATED GROUP outerGroup {
-            OPTIONAL INT32 leaf2;
-            REPEATED GROUP innerGroup {
-              OPTIONAL INT32 leaf3;
-            }
-          }
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_column_desc_to_field() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32   int8  (INT_8);
-            REQUIRED INT32   uint8 (INTEGER(8,false));
-            REQUIRED INT32   int16 (INT_16);
-            REQUIRED INT32   uint16 (INTEGER(16,false));
-            REQUIRED INT32   int32;
-            REQUIRED INT64   int64;
-            OPTIONAL DOUBLE  double;
-            OPTIONAL FLOAT   float;
-            OPTIONAL BINARY  string (UTF8);
-            REPEATED BOOLEAN bools;
-            OPTIONAL INT32   date       (DATE);
-            OPTIONAL INT32   time_milli (TIME_MILLIS);
-            OPTIONAL INT64   time_micro (TIME_MICROS);
-            OPTIONAL INT64   time_nano (TIME(NANOS,false));
-            OPTIONAL INT64   ts_milli (TIMESTAMP_MILLIS);
-            REQUIRED INT64   ts_micro (TIMESTAMP_MICROS);
-            REQUIRED INT64   ts_nano (TIMESTAMP(NANOS,true));
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_fields = parquet_schema
-            .columns()
-            .iter()
-            .map(|c| parquet_to_arrow_field(c).unwrap())
-            .collect::<Vec<Field>>();
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-            Field::new("uint8", DataType::UInt8, false),
-            Field::new("int16", DataType::Int16, false),
-            Field::new("uint16", DataType::UInt16, false),
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("double", DataType::Float64, true),
-            Field::new("float", DataType::Float32, true),
-            Field::new("string", DataType::Utf8, true),
-            Field::new(
-                "bools",
-                DataType::List(Box::new(Field::new("bools", DataType::Boolean, true))),
-                true,
-            ),
-            Field::new("date", DataType::Date32, true),
-            Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
-            Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
-            Field::new("time_nano", DataType::Time64(TimeUnit::Nanosecond), true),
-            Field::new(
-                "ts_milli",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_micro",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                false,
-            ),
-            Field::new(
-                "ts_nano",
-                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_string())),
-                false,
-            ),
-        ];
-
-        assert_eq!(arrow_fields, converted_arrow_fields);
-    }
-
-    #[test]
-    fn test_field_to_column_desc() {
-        let message_type = "
-        message arrow_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32   int8  (INT_8);
-            REQUIRED INT32   int16 (INTEGER(16,true));
-            REQUIRED INT32   int32;
-            REQUIRED INT64   int64;
-            OPTIONAL DOUBLE  double;
-            OPTIONAL FLOAT   float;
-            OPTIONAL BINARY  string (STRING);
-            OPTIONAL GROUP   bools (LIST) {
-                REPEATED GROUP list {
-                    OPTIONAL BOOLEAN element;
-                }
-            }
-            REQUIRED GROUP   bools_non_null (LIST) {
-                REPEATED GROUP list {
-                    REQUIRED BOOLEAN element;
-                }
-            }
-            OPTIONAL INT32   date       (DATE);
-            OPTIONAL INT32   time_milli (TIME(MILLIS,false));
-            OPTIONAL INT64   time_micro (TIME_MICROS);
-            OPTIONAL INT64   ts_milli (TIMESTAMP_MILLIS);
-            REQUIRED INT64   ts_micro (TIMESTAMP(MICROS,false));
-            REQUIRED GROUP struct {
-                REQUIRED BOOLEAN bools;
-                REQUIRED INT32 uint32 (INTEGER(32,false));
-                REQUIRED GROUP   int32 (LIST) {
-                    REPEATED GROUP list {
-                        OPTIONAL INT32 element;
-                    }
-                }
-            }
-            REQUIRED BINARY  dictionary_strings (STRING);
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-            Field::new("int16", DataType::Int16, false),
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("double", DataType::Float64, true),
-            Field::new("float", DataType::Float32, true),
-            Field::new("string", DataType::Utf8, true),
-            Field::new(
-                "bools",
-                DataType::List(Box::new(Field::new("element", DataType::Boolean, true))),
-                true,
-            ),
-            Field::new(
-                "bools_non_null",
-                DataType::List(Box::new(Field::new("element", DataType::Boolean, false))),
-                false,
-            ),
-            Field::new("date", DataType::Date32, true),
-            Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
-            Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
-            Field::new(
-                "ts_milli",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_micro",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                false,
-            ),
-            Field::new(
-                "struct",
-                DataType::Struct(vec![
-                    Field::new("bools", DataType::Boolean, false),
-                    Field::new("uint32", DataType::UInt32, false),
-                    Field::new(
-                        "int32",
-                        DataType::List(Box::new(Field::new(
-                            "element",
-                            DataType::Int32,
-                            true,
-                        ))),
-                        false,
-                    ),
-                ]),
-                false,
-            ),
-            Field::new(
-                "dictionary_strings",
-                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-                false,
-            ),
-        ];
-        let arrow_schema = Schema::new(arrow_fields);
-        let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema).unwrap();
-
-        assert_eq!(
-            parquet_schema.columns().len(),
-            converted_arrow_schema.columns().len()
-        );
-        parquet_schema
-            .columns()
-            .iter()
-            .zip(converted_arrow_schema.columns())
-            .for_each(|(a, b)| {
-                // Only check logical type if it's set on the Parquet side.
-                // This is because the Arrow conversion always sets logical type,
-                // even if there wasn't originally one.
-                // This is not an issue, but is an inconvenience for this test.
-                match a.logical_type() {
-                    Some(_) => {
-                        assert_eq!(a, b)
-                    }
-                    None => {
-                        assert_eq!(a.name(), b.name());
-                        assert_eq!(a.physical_type(), b.physical_type());
-                        assert_eq!(a.converted_type(), b.converted_type());
-                    }
-                };
-            });
-    }
-
-    #[test]
-    #[should_panic(expected = "Parquet does not support writing empty structs")]
-    fn test_empty_struct_field() {
-        let arrow_fields = vec![Field::new("struct", DataType::Struct(vec![]), false)];
-        let arrow_schema = Schema::new(arrow_fields);
-        let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema);
-
-        assert!(converted_arrow_schema.is_err());
-        converted_arrow_schema.unwrap();
-    }
-
-    #[test]
-    fn test_metadata() {
-        let message_type = "
-        message test_schema {
-            OPTIONAL BINARY  string (STRING);
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let mut key_value_metadata: Vec<KeyValue> = Vec::new();
-        key_value_metadata.push(KeyValue::new("foo".to_owned(), Some("bar".to_owned())));
-        key_value_metadata.push(KeyValue::new("baz".to_owned(), None));
-
-        let mut expected_metadata: HashMap<String, String> = HashMap::new();
-        expected_metadata.insert("foo".to_owned(), "bar".to_owned());
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &Some(key_value_metadata)).unwrap();
-
-        assert_eq!(converted_arrow_schema.metadata(), &expected_metadata);
-    }
-
-    #[test]
-    fn test_arrow_schema_roundtrip() -> Result<()> {
-        // This tests the roundtrip of an Arrow schema
-        // Fields that are commented out fail roundtrip tests or are unsupported by the writer
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new("c1", DataType::Utf8, false),
-                Field::new("c2", DataType::Binary, false),
-                Field::new("c3", DataType::FixedSizeBinary(3), false),
-                Field::new("c4", DataType::Boolean, false),
-                Field::new("c5", DataType::Date32, false),
-                Field::new("c6", DataType::Date64, false),
-                Field::new("c7", DataType::Time32(TimeUnit::Second), false),
-                Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
-                Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
-                Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
-                Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false),
-                Field::new(
-                    "c16",
-                    DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())),
-                    false,
-                ),
-                Field::new(
-                    "c17",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".to_string()),
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c18",
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    false,
-                ),
-                Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
-                Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
-                Field::new(
-                    "c21",
-                    DataType::List(Box::new(Field::new("list", DataType::Boolean, true))),
-                    false,
-                ),
-                // Field::new(
-                //     "c22",
-                //     DataType::FixedSizeList(Box::new(DataType::Boolean), 5),
-                //     false,
-                // ),
-                // Field::new(
-                //     "c23",
-                //     DataType::List(Box::new(DataType::LargeList(Box::new(
-                //         DataType::Struct(vec![
-                //             Field::new("a", DataType::Int16, true),
-                //             Field::new("b", DataType::Float64, false),
-                //         ]),
-                //     )))),
-                //     true,
-                // ),
-                Field::new(
-                    "c24",
-                    DataType::Struct(vec![
-                        Field::new("a", DataType::Utf8, false),
-                        Field::new("b", DataType::UInt16, false),
-                    ]),
-                    false,
-                ),
-                Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true),
-                Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true),
-                // Field::new("c27", DataType::Duration(TimeUnit::Second), false),
-                // Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false),
-                // Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false),
-                // Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
-                Field::new_dict(
-                    "c31",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new("c32", DataType::LargeBinary, true),
-                Field::new("c33", DataType::LargeUtf8, true),
-                // Field::new(
-                //     "c34",
-                //     DataType::LargeList(Box::new(DataType::List(Box::new(
-                //         DataType::Struct(vec![
-                //             Field::new("a", DataType::Int16, true),
-                //             Field::new("b", DataType::Float64, true),
-                //         ]),
-                //     )))),
-                //     true,
-                // ),
-                Field::new("c35", DataType::Null, true),
-                Field::new("c36", DataType::Decimal(2, 1), false),
-                Field::new("c37", DataType::Decimal(50, 20), false),
-                Field::new("c38", DataType::Decimal(18, 12), true),
-            ],
-            metadata,
-        );
-
-        // write to an empty parquet file so that schema is serialized
-        let file = get_temp_file("test_arrow_schema_roundtrip.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            Arc::new(schema.clone()),
-            None,
-        )?;
-        writer.close()?;
-
-        // read file back
-        let parquet_reader = SerializedFileReader::try_from(file)?;
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-        let read_schema = arrow_reader.get_schema()?;
-        assert_eq!(schema, read_schema);
-
-        // read all fields by columns
-        let partial_read_schema =
-            arrow_reader.get_schema_by_columns(0..(schema.fields().len()), false)?;
-        assert_eq!(schema, partial_read_schema);
-
-        Ok(())
-    }
-
-    #[test]
-    #[ignore = "Roundtrip of lists currently fails because we don't check their types correctly in the Arrow schema"]
-    fn test_arrow_schema_roundtrip_lists() -> Result<()> {
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new(
-                    "c21",
-                    DataType::List(Box::new(Field::new(
-                        "array",
-                        DataType::Boolean,
-                        true,
-                    ))),
-                    false,
-                ),
-                Field::new(
-                    "c22",
-                    DataType::FixedSizeList(
-                        Box::new(Field::new("items", DataType::Boolean, false)),
-                        5,
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c23",
-                    DataType::List(Box::new(Field::new(
-                        "items",
-                        DataType::LargeList(Box::new(Field::new(
-                            "items",
-                            DataType::Struct(vec![
-                                Field::new("a", DataType::Int16, true),
-                                Field::new("b", DataType::Float64, false),
-                            ]),
-                            true,
-                        ))),
-                        true,
-                    ))),
-                    true,
-                ),
-            ],
-            metadata,
-        );
-
-        // write to an empty parquet file so that schema is serialized
-        let file = get_temp_file("test_arrow_schema_roundtrip_lists.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            Arc::new(schema.clone()),
-            None,
-        )?;
-        writer.close()?;
-
-        // read file back
-        let parquet_reader = SerializedFileReader::try_from(file)?;
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-        let read_schema = arrow_reader.get_schema()?;
-        assert_eq!(schema, read_schema);
-
-        // read all fields by columns
-        let partial_read_schema =
-            arrow_reader.get_schema_by_columns(0..(schema.fields().len()), false)?;
-        assert_eq!(schema, partial_read_schema);
-
-        Ok(())
-    }
-}
diff --git a/rust/parquet/src/basic.rs b/rust/parquet/src/basic.rs
deleted file mode 100644
index 631257e0ed1..00000000000
--- a/rust/parquet/src/basic.rs
+++ /dev/null
@@ -1,1969 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains Rust mappings for Thrift definition.
-//! Refer to `parquet.thrift` file to see raw definitions.
-
-use std::{convert, fmt, result, str};
-
-use parquet_format as parquet;
-
-use crate::errors::ParquetError;
-
-// Re-export parquet_format types used in this module
-pub use parquet_format::{
-    BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType,
-    NullType, StringType, TimeType, TimeUnit, TimestampType, UUIDType,
-};
-
-// ----------------------------------------------------------------------
-// Types from the Thrift definition
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::Type`
-
-/// Types supported by Parquet.
-/// These physical types are intended to be used in combination with the encodings to
-/// control the on disk storage format.
-/// For example INT16 is not included as a type since a good encoding of INT32
-/// would handle this.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Type {
-    BOOLEAN,
-    INT32,
-    INT64,
-    INT96,
-    FLOAT,
-    DOUBLE,
-    BYTE_ARRAY,
-    FIXED_LEN_BYTE_ARRAY,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::ConvertedType`
-
-/// Common types (converted types) used by frameworks when using Parquet.
-/// This helps map between types in those frameworks to the base types in Parquet.
-/// This is only metadata and not needed to read or write the data.
-///
-/// This struct was renamed from `LogicalType` in version 4.0.0.
-/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum ConvertedType {
-    NONE,
-    /// A BYTE_ARRAY actually contains UTF8 encoded chars.
-    UTF8,
-
-    /// A map is converted as an optional field containing a repeated key/value pair.
-    MAP,
-
-    /// A key/value pair is converted into a group of two fields.
-    MAP_KEY_VALUE,
-
-    /// A list is converted into an optional field containing a repeated field for its
-    /// values.
-    LIST,
-
-    /// An enum is converted into a binary field
-    ENUM,
-
-    /// A decimal value.
-    /// This may be used to annotate binary or fixed primitive types. The
-    /// underlying byte array stores the unscaled value encoded as two's
-    /// complement using big-endian byte order (the most significant byte is the
-    /// zeroth element).
-    ///
-    /// This must be accompanied by a (maximum) precision and a scale in the
-    /// SchemaElement. The precision specifies the number of digits in the decimal
-    /// and the scale stores the location of the decimal point. For example 1.23
-    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
-    /// 2 digits over).
-    DECIMAL,
-
-    /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
-    DATE,
-
-    /// The total number of milliseconds since midnight. The value is stored as an INT32
-    /// physical type.
-    TIME_MILLIS,
-
-    /// The total number of microseconds since midnight. The value is stored as an INT64
-    /// physical type.
-    TIME_MICROS,
-
-    /// Date and time recorded as milliseconds since the Unix epoch.
-    /// Recorded as a physical type of INT64.
-    TIMESTAMP_MILLIS,
-
-    /// Date and time recorded as microseconds since the Unix epoch.
-    /// The value is stored as an INT64 physical type.
-    TIMESTAMP_MICROS,
-
-    /// An unsigned 8 bit integer value stored as INT32 physical type.
-    UINT_8,
-
-    /// An unsigned 16 bit integer value stored as INT32 physical type.
-    UINT_16,
-
-    /// An unsigned 32 bit integer value stored as INT32 physical type.
-    UINT_32,
-
-    /// An unsigned 64 bit integer value stored as INT64 physical type.
-    UINT_64,
-
-    /// A signed 8 bit integer value stored as INT32 physical type.
-    INT_8,
-
-    /// A signed 16 bit integer value stored as INT32 physical type.
-    INT_16,
-
-    /// A signed 32 bit integer value stored as INT32 physical type.
-    INT_32,
-
-    /// A signed 64 bit integer value stored as INT64 physical type.
-    INT_64,
-
-    /// A JSON document embedded within a single UTF8 column.
-    JSON,
-
-    /// A BSON document embedded within a single BINARY column.
-    BSON,
-
-    /// An interval of time.
-    ///
-    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
-    /// This data is composed of three separate little endian unsigned integers.
-    /// Each stores a component of a duration of time. The first integer identifies
-    /// the number of months associated with the duration, the second identifies
-    /// the number of days associated with the duration and the third identifies
-    /// the number of milliseconds associated with the provided duration.
-    /// This duration of time is independent of any particular timezone or date.
-    INTERVAL,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::LogicalType`
-
-/// Logical types used by version 2.4.0+ of the Parquet format.
-///
-/// This is an *entirely new* struct as of version
-/// 4.0.0. The struct previously named `LogicalType` was renamed to
-/// [`ConvertedType`]. Please see the README.md for more details.
-#[derive(Debug, Clone, PartialEq)]
-pub enum LogicalType {
-    STRING(StringType),
-    MAP(MapType),
-    LIST(ListType),
-    ENUM(EnumType),
-    DECIMAL(DecimalType),
-    DATE(DateType),
-    TIME(TimeType),
-    TIMESTAMP(TimestampType),
-    INTEGER(IntType),
-    UNKNOWN(NullType),
-    JSON(JsonType),
-    BSON(BsonType),
-    UUID(UUIDType),
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::FieldRepetitionType`
-
-/// Representation of field types in schema.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Repetition {
-    /// Field is required (can not be null) and each record has exactly 1 value.
-    REQUIRED,
-    /// Field is optional (can be null) and each record has 0 or 1 values.
-    OPTIONAL,
-    /// Field is repeated and can contain 0 or more values.
-    REPEATED,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::Encoding`
-
-/// Encodings supported by Parquet.
-/// Not all encodings are valid for all types. These enums are also used to specify the
-/// encoding of definition and repetition levels.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum Encoding {
-    /// Default byte encoding.
-    /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
-    /// - INT32 - 4 bytes per value, stored as little-endian.
-    /// - INT64 - 8 bytes per value, stored as little-endian.
-    /// - FLOAT - 4 bytes per value, stored as little-endian.
-    /// - DOUBLE - 8 bytes per value, stored as little-endian.
-    /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
-    /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
-    PLAIN,
-
-    /// **Deprecated** dictionary encoding.
-    ///
-    /// The values in the dictionary are encoded using PLAIN encoding.
-    /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
-    /// PLAIN encoding is used for dictionary page.
-    PLAIN_DICTIONARY,
-
-    /// Group packed run length encoding.
-    ///
-    /// Usable for definition/repetition levels encoding and boolean values.
-    RLE,
-
-    /// Bit packed encoding.
-    ///
-    /// This can only be used if the data has a known max width.
-    /// Usable for definition/repetition levels encoding.
-    BIT_PACKED,
-
-    /// Delta encoding for integers, either INT32 or INT64.
-    ///
-    /// Works best on sorted data.
-    DELTA_BINARY_PACKED,
-
-    /// Encoding for byte arrays to separate the length values and the data.
-    ///
-    /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
-    DELTA_LENGTH_BYTE_ARRAY,
-
-    /// Incremental encoding for byte arrays.
-    ///
-    /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
-    /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
-    DELTA_BYTE_ARRAY,
-
-    /// Dictionary encoding.
-    ///
-    /// The ids are encoded using the RLE encoding.
-    RLE_DICTIONARY,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::CompressionCodec`
-
-/// Supported compression algorithms.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Compression {
-    UNCOMPRESSED,
-    SNAPPY,
-    GZIP,
-    LZO,
-    BROTLI,
-    LZ4,
-    ZSTD,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::PageType`
-
-/// Available data pages for Parquet file format.
-/// Note that some of the page types may not be supported.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum PageType {
-    DATA_PAGE,
-    INDEX_PAGE,
-    DICTIONARY_PAGE,
-    DATA_PAGE_V2,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::ColumnOrder`
-
-/// Sort order for page and column statistics.
-///
-/// Types are associated with sort orders and column stats are aggregated using a sort
-/// order, and a sort order should be considered when comparing values with statistics
-/// min/max.
-///
-/// See reference in
-/// <https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h>
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum SortOrder {
-    /// Signed (either value or legacy byte-wise) comparison.
-    SIGNED,
-    /// Unsigned (depending on physical type either value or byte-wise) comparison.
-    UNSIGNED,
-    /// Comparison is undefined.
-    UNDEFINED,
-}
-
-/// Column order that specifies what method was used to aggregate min/max values for
-/// statistics.
-///
-/// If column order is undefined, then it is the legacy behaviour and all values should
-/// be compared as signed values/bytes.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum ColumnOrder {
-    /// Column uses the order defined by its logical or physical type
-    /// (if there is no logical type), parquet-format 2.4.0+.
-    TYPE_DEFINED_ORDER(SortOrder),
-    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
-    /// Sort order is always SIGNED.
-    UNDEFINED,
-}
-
-impl ColumnOrder {
-    /// Returns sort order for a physical/logical type.
-    pub fn get_sort_order(
-        logical_type: Option<LogicalType>,
-        converted_type: ConvertedType,
-        physical_type: Type,
-    ) -> SortOrder {
-        // TODO: Should this take converted and logical type, for compatibility?
-        match logical_type {
-            Some(logical) => match logical {
-                LogicalType::STRING(_)
-                | LogicalType::ENUM(_)
-                | LogicalType::JSON(_)
-                | LogicalType::BSON(_) => SortOrder::UNSIGNED,
-                LogicalType::INTEGER(t) => match t.is_signed {
-                    true => SortOrder::SIGNED,
-                    false => SortOrder::UNSIGNED,
-                },
-                LogicalType::MAP(_) | LogicalType::LIST(_) => SortOrder::UNDEFINED,
-                LogicalType::DECIMAL(_) => SortOrder::SIGNED,
-                LogicalType::DATE(_) => SortOrder::SIGNED,
-                LogicalType::TIME(_) => SortOrder::SIGNED,
-                LogicalType::TIMESTAMP(_) => SortOrder::SIGNED,
-                LogicalType::UNKNOWN(_) => SortOrder::UNDEFINED,
-                LogicalType::UUID(_) => SortOrder::UNSIGNED,
-            },
-            // Fall back to converted type
-            None => Self::get_converted_sort_order(converted_type, physical_type),
-        }
-    }
-
-    fn get_converted_sort_order(
-        converted_type: ConvertedType,
-        physical_type: Type,
-    ) -> SortOrder {
-        match converted_type {
-            // Unsigned byte-wise comparison.
-            ConvertedType::UTF8
-            | ConvertedType::JSON
-            | ConvertedType::BSON
-            | ConvertedType::ENUM => SortOrder::UNSIGNED,
-
-            ConvertedType::INT_8
-            | ConvertedType::INT_16
-            | ConvertedType::INT_32
-            | ConvertedType::INT_64 => SortOrder::SIGNED,
-
-            ConvertedType::UINT_8
-            | ConvertedType::UINT_16
-            | ConvertedType::UINT_32
-            | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
-
-            // Signed comparison of the represented value.
-            ConvertedType::DECIMAL => SortOrder::SIGNED,
-
-            ConvertedType::DATE => SortOrder::SIGNED,
-
-            ConvertedType::TIME_MILLIS
-            | ConvertedType::TIME_MICROS
-            | ConvertedType::TIMESTAMP_MILLIS
-            | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
-
-            ConvertedType::INTERVAL => SortOrder::UNDEFINED,
-
-            ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
-                SortOrder::UNDEFINED
-            }
-
-            // Fall back to physical type.
-            ConvertedType::NONE => Self::get_default_sort_order(physical_type),
-        }
-    }
-
-    /// Returns default sort order based on physical type.
-    fn get_default_sort_order(physical_type: Type) -> SortOrder {
-        match physical_type {
-            // Order: false, true
-            Type::BOOLEAN => SortOrder::UNSIGNED,
-            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
-            Type::INT96 => SortOrder::UNDEFINED,
-            // Notes to remember when comparing float/double values:
-            // If the min is a NaN, it should be ignored.
-            // If the max is a NaN, it should be ignored.
-            // If the min is +0, the row group may contain -0 values as well.
-            // If the max is -0, the row group may contain +0 values as well.
-            // When looking for NaN values, min and max should be ignored.
-            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
-            // Unsigned byte-wise comparison
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
-        }
-    }
-
-    /// Returns sort order associated with this column order.
-    pub fn sort_order(&self) -> SortOrder {
-        match *self {
-            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
-            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
-        }
-    }
-}
-
-impl fmt::Display for Type {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for ConvertedType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for Repetition {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for Encoding {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for Compression {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for PageType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for SortOrder {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for ColumnOrder {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::Type <=> Type conversion
-
-impl convert::From<parquet::Type> for Type {
-    fn from(value: parquet::Type) -> Self {
-        match value {
-            parquet::Type::Boolean => Type::BOOLEAN,
-            parquet::Type::Int32 => Type::INT32,
-            parquet::Type::Int64 => Type::INT64,
-            parquet::Type::Int96 => Type::INT96,
-            parquet::Type::Float => Type::FLOAT,
-            parquet::Type::Double => Type::DOUBLE,
-            parquet::Type::ByteArray => Type::BYTE_ARRAY,
-            parquet::Type::FixedLenByteArray => Type::FIXED_LEN_BYTE_ARRAY,
-        }
-    }
-}
-
-impl convert::From<Type> for parquet::Type {
-    fn from(value: Type) -> Self {
-        match value {
-            Type::BOOLEAN => parquet::Type::Boolean,
-            Type::INT32 => parquet::Type::Int32,
-            Type::INT64 => parquet::Type::Int64,
-            Type::INT96 => parquet::Type::Int96,
-            Type::FLOAT => parquet::Type::Float,
-            Type::DOUBLE => parquet::Type::Double,
-            Type::BYTE_ARRAY => parquet::Type::ByteArray,
-            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FixedLenByteArray,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::ConvertedType <=> ConvertedType conversion
-
-impl convert::From<Option<parquet::ConvertedType>> for ConvertedType {
-    fn from(option: Option<parquet::ConvertedType>) -> Self {
-        match option {
-            None => ConvertedType::NONE,
-            Some(value) => match value {
-                parquet::ConvertedType::Utf8 => ConvertedType::UTF8,
-                parquet::ConvertedType::Map => ConvertedType::MAP,
-                parquet::ConvertedType::MapKeyValue => ConvertedType::MAP_KEY_VALUE,
-                parquet::ConvertedType::List => ConvertedType::LIST,
-                parquet::ConvertedType::Enum => ConvertedType::ENUM,
-                parquet::ConvertedType::Decimal => ConvertedType::DECIMAL,
-                parquet::ConvertedType::Date => ConvertedType::DATE,
-                parquet::ConvertedType::TimeMillis => ConvertedType::TIME_MILLIS,
-                parquet::ConvertedType::TimeMicros => ConvertedType::TIME_MICROS,
-                parquet::ConvertedType::TimestampMillis => {
-                    ConvertedType::TIMESTAMP_MILLIS
-                }
-                parquet::ConvertedType::TimestampMicros => {
-                    ConvertedType::TIMESTAMP_MICROS
-                }
-                parquet::ConvertedType::Uint8 => ConvertedType::UINT_8,
-                parquet::ConvertedType::Uint16 => ConvertedType::UINT_16,
-                parquet::ConvertedType::Uint32 => ConvertedType::UINT_32,
-                parquet::ConvertedType::Uint64 => ConvertedType::UINT_64,
-                parquet::ConvertedType::Int8 => ConvertedType::INT_8,
-                parquet::ConvertedType::Int16 => ConvertedType::INT_16,
-                parquet::ConvertedType::Int32 => ConvertedType::INT_32,
-                parquet::ConvertedType::Int64 => ConvertedType::INT_64,
-                parquet::ConvertedType::Json => ConvertedType::JSON,
-                parquet::ConvertedType::Bson => ConvertedType::BSON,
-                parquet::ConvertedType::Interval => ConvertedType::INTERVAL,
-            },
-        }
-    }
-}
-
-impl convert::From<ConvertedType> for Option<parquet::ConvertedType> {
-    fn from(value: ConvertedType) -> Self {
-        match value {
-            ConvertedType::NONE => None,
-            ConvertedType::UTF8 => Some(parquet::ConvertedType::Utf8),
-            ConvertedType::MAP => Some(parquet::ConvertedType::Map),
-            ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MapKeyValue),
-            ConvertedType::LIST => Some(parquet::ConvertedType::List),
-            ConvertedType::ENUM => Some(parquet::ConvertedType::Enum),
-            ConvertedType::DECIMAL => Some(parquet::ConvertedType::Decimal),
-            ConvertedType::DATE => Some(parquet::ConvertedType::Date),
-            ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TimeMillis),
-            ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TimeMicros),
-            ConvertedType::TIMESTAMP_MILLIS => {
-                Some(parquet::ConvertedType::TimestampMillis)
-            }
-            ConvertedType::TIMESTAMP_MICROS => {
-                Some(parquet::ConvertedType::TimestampMicros)
-            }
-            ConvertedType::UINT_8 => Some(parquet::ConvertedType::Uint8),
-            ConvertedType::UINT_16 => Some(parquet::ConvertedType::Uint16),
-            ConvertedType::UINT_32 => Some(parquet::ConvertedType::Uint32),
-            ConvertedType::UINT_64 => Some(parquet::ConvertedType::Uint64),
-            ConvertedType::INT_8 => Some(parquet::ConvertedType::Int8),
-            ConvertedType::INT_16 => Some(parquet::ConvertedType::Int16),
-            ConvertedType::INT_32 => Some(parquet::ConvertedType::Int32),
-            ConvertedType::INT_64 => Some(parquet::ConvertedType::Int64),
-            ConvertedType::JSON => Some(parquet::ConvertedType::Json),
-            ConvertedType::BSON => Some(parquet::ConvertedType::Bson),
-            ConvertedType::INTERVAL => Some(parquet::ConvertedType::Interval),
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::LogicalType <=> LogicalType conversion
-
-impl convert::From<parquet::LogicalType> for LogicalType {
-    fn from(value: parquet::LogicalType) -> Self {
-        match value {
-            parquet::LogicalType::STRING(t) => LogicalType::STRING(t),
-            parquet::LogicalType::MAP(t) => LogicalType::MAP(t),
-            parquet::LogicalType::LIST(t) => LogicalType::LIST(t),
-            parquet::LogicalType::ENUM(t) => LogicalType::ENUM(t),
-            parquet::LogicalType::DECIMAL(t) => LogicalType::DECIMAL(t),
-            parquet::LogicalType::DATE(t) => LogicalType::DATE(t),
-            parquet::LogicalType::TIME(t) => LogicalType::TIME(t),
-            parquet::LogicalType::TIMESTAMP(t) => LogicalType::TIMESTAMP(t),
-            parquet::LogicalType::INTEGER(t) => LogicalType::INTEGER(t),
-            parquet::LogicalType::UNKNOWN(t) => LogicalType::UNKNOWN(t),
-            parquet::LogicalType::JSON(t) => LogicalType::JSON(t),
-            parquet::LogicalType::BSON(t) => LogicalType::BSON(t),
-            parquet::LogicalType::UUID(t) => LogicalType::UUID(t),
-        }
-    }
-}
-
-impl convert::From<LogicalType> for parquet::LogicalType {
-    fn from(value: LogicalType) -> Self {
-        match value {
-            LogicalType::STRING(t) => parquet::LogicalType::STRING(t),
-            LogicalType::MAP(t) => parquet::LogicalType::MAP(t),
-            LogicalType::LIST(t) => parquet::LogicalType::LIST(t),
-            LogicalType::ENUM(t) => parquet::LogicalType::ENUM(t),
-            LogicalType::DECIMAL(t) => parquet::LogicalType::DECIMAL(t),
-            LogicalType::DATE(t) => parquet::LogicalType::DATE(t),
-            LogicalType::TIME(t) => parquet::LogicalType::TIME(t),
-            LogicalType::TIMESTAMP(t) => parquet::LogicalType::TIMESTAMP(t),
-            LogicalType::INTEGER(t) => parquet::LogicalType::INTEGER(t),
-            LogicalType::UNKNOWN(t) => parquet::LogicalType::UNKNOWN(t),
-            LogicalType::JSON(t) => parquet::LogicalType::JSON(t),
-            LogicalType::BSON(t) => parquet::LogicalType::BSON(t),
-            LogicalType::UUID(t) => parquet::LogicalType::UUID(t),
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// LogicalType <=> ConvertedType conversion
-
-// Note: To prevent type loss when converting from ConvertedType to LogicalType,
-// the conversion from ConvertedType -> LogicalType is not implemented.
-// Such type loss includes:
-// - Not knowing the decimal scale and precision of ConvertedType
-// - Time and timestamp nanosecond precision, that is not supported in ConvertedType.
-
-impl From<Option<LogicalType>> for ConvertedType {
-    fn from(value: Option<LogicalType>) -> Self {
-        match value {
-            Some(value) => match value {
-                LogicalType::STRING(_) => ConvertedType::UTF8,
-                LogicalType::MAP(_) => ConvertedType::MAP,
-                LogicalType::LIST(_) => ConvertedType::LIST,
-                LogicalType::ENUM(_) => ConvertedType::ENUM,
-                LogicalType::DECIMAL(_) => ConvertedType::DECIMAL,
-                LogicalType::DATE(_) => ConvertedType::DATE,
-                LogicalType::TIME(t) => match t.unit {
-                    TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
-                    TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
-                    TimeUnit::NANOS(_) => ConvertedType::NONE,
-                },
-                LogicalType::TIMESTAMP(t) => match t.unit {
-                    TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
-                    TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
-                    TimeUnit::NANOS(_) => ConvertedType::NONE,
-                },
-                LogicalType::INTEGER(t) => match (t.bit_width, t.is_signed) {
-                    (8, true) => ConvertedType::INT_8,
-                    (16, true) => ConvertedType::INT_16,
-                    (32, true) => ConvertedType::INT_32,
-                    (64, true) => ConvertedType::INT_64,
-                    (8, false) => ConvertedType::UINT_8,
-                    (16, false) => ConvertedType::UINT_16,
-                    (32, false) => ConvertedType::UINT_32,
-                    (64, false) => ConvertedType::UINT_64,
-                    t => panic!("Integer type {:?} is not supported", t),
-                },
-                LogicalType::UNKNOWN(_) => ConvertedType::NONE,
-                LogicalType::JSON(_) => ConvertedType::JSON,
-                LogicalType::BSON(_) => ConvertedType::BSON,
-                LogicalType::UUID(_) => ConvertedType::NONE,
-            },
-            None => ConvertedType::NONE,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::FieldRepetitionType <=> Repetition conversion
-
-impl convert::From<parquet::FieldRepetitionType> for Repetition {
-    fn from(value: parquet::FieldRepetitionType) -> Self {
-        match value {
-            parquet::FieldRepetitionType::Required => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::Repeated => Repetition::REPEATED,
-        }
-    }
-}
-
-impl convert::From<Repetition> for parquet::FieldRepetitionType {
-    fn from(value: Repetition) -> Self {
-        match value {
-            Repetition::REQUIRED => parquet::FieldRepetitionType::Required,
-            Repetition::OPTIONAL => parquet::FieldRepetitionType::Optional,
-            Repetition::REPEATED => parquet::FieldRepetitionType::Repeated,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::Encoding <=> Encoding conversion
-
-impl convert::From<parquet::Encoding> for Encoding {
-    fn from(value: parquet::Encoding) -> Self {
-        match value {
-            parquet::Encoding::Plain => Encoding::PLAIN,
-            parquet::Encoding::PlainDictionary => Encoding::PLAIN_DICTIONARY,
-            parquet::Encoding::Rle => Encoding::RLE,
-            parquet::Encoding::BitPacked => Encoding::BIT_PACKED,
-            parquet::Encoding::DeltaBinaryPacked => Encoding::DELTA_BINARY_PACKED,
-            parquet::Encoding::DeltaLengthByteArray => Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            parquet::Encoding::DeltaByteArray => Encoding::DELTA_BYTE_ARRAY,
-            parquet::Encoding::RleDictionary => Encoding::RLE_DICTIONARY,
-        }
-    }
-}
-
-impl convert::From<Encoding> for parquet::Encoding {
-    fn from(value: Encoding) -> Self {
-        match value {
-            Encoding::PLAIN => parquet::Encoding::Plain,
-            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PlainDictionary,
-            Encoding::RLE => parquet::Encoding::Rle,
-            Encoding::BIT_PACKED => parquet::Encoding::BitPacked,
-            Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DeltaBinaryPacked,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DeltaLengthByteArray,
-            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DeltaByteArray,
-            Encoding::RLE_DICTIONARY => parquet::Encoding::RleDictionary,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::CompressionCodec <=> Compression conversion
-
-impl convert::From<parquet::CompressionCodec> for Compression {
-    fn from(value: parquet::CompressionCodec) -> Self {
-        match value {
-            parquet::CompressionCodec::Uncompressed => Compression::UNCOMPRESSED,
-            parquet::CompressionCodec::Snappy => Compression::SNAPPY,
-            parquet::CompressionCodec::Gzip => Compression::GZIP,
-            parquet::CompressionCodec::Lzo => Compression::LZO,
-            parquet::CompressionCodec::Brotli => Compression::BROTLI,
-            parquet::CompressionCodec::Lz4 => Compression::LZ4,
-            parquet::CompressionCodec::Zstd => Compression::ZSTD,
-        }
-    }
-}
-
-impl convert::From<Compression> for parquet::CompressionCodec {
-    fn from(value: Compression) -> Self {
-        match value {
-            Compression::UNCOMPRESSED => parquet::CompressionCodec::Uncompressed,
-            Compression::SNAPPY => parquet::CompressionCodec::Snappy,
-            Compression::GZIP => parquet::CompressionCodec::Gzip,
-            Compression::LZO => parquet::CompressionCodec::Lzo,
-            Compression::BROTLI => parquet::CompressionCodec::Brotli,
-            Compression::LZ4 => parquet::CompressionCodec::Lz4,
-            Compression::ZSTD => parquet::CompressionCodec::Zstd,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::PageType <=> PageType conversion
-
-impl convert::From<parquet::PageType> for PageType {
-    fn from(value: parquet::PageType) -> Self {
-        match value {
-            parquet::PageType::DataPage => PageType::DATA_PAGE,
-            parquet::PageType::IndexPage => PageType::INDEX_PAGE,
-            parquet::PageType::DictionaryPage => PageType::DICTIONARY_PAGE,
-            parquet::PageType::DataPageV2 => PageType::DATA_PAGE_V2,
-        }
-    }
-}
-
-impl convert::From<PageType> for parquet::PageType {
-    fn from(value: PageType) -> Self {
-        match value {
-            PageType::DATA_PAGE => parquet::PageType::DataPage,
-            PageType::INDEX_PAGE => parquet::PageType::IndexPage,
-            PageType::DICTIONARY_PAGE => parquet::PageType::DictionaryPage,
-            PageType::DATA_PAGE_V2 => parquet::PageType::DataPageV2,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// String conversions for schema parsing.
-
-impl str::FromStr for Repetition {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            "REQUIRED" => Ok(Repetition::REQUIRED),
-            "OPTIONAL" => Ok(Repetition::OPTIONAL),
-            "REPEATED" => Ok(Repetition::REPEATED),
-            other => Err(general_err!("Invalid repetition {}", other)),
-        }
-    }
-}
-
-impl str::FromStr for Type {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            "BOOLEAN" => Ok(Type::BOOLEAN),
-            "INT32" => Ok(Type::INT32),
-            "INT64" => Ok(Type::INT64),
-            "INT96" => Ok(Type::INT96),
-            "FLOAT" => Ok(Type::FLOAT),
-            "DOUBLE" => Ok(Type::DOUBLE),
-            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
-            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
-            other => Err(general_err!("Invalid type {}", other)),
-        }
-    }
-}
-
-impl str::FromStr for ConvertedType {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            "NONE" => Ok(ConvertedType::NONE),
-            "UTF8" => Ok(ConvertedType::UTF8),
-            "MAP" => Ok(ConvertedType::MAP),
-            "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
-            "LIST" => Ok(ConvertedType::LIST),
-            "ENUM" => Ok(ConvertedType::ENUM),
-            "DECIMAL" => Ok(ConvertedType::DECIMAL),
-            "DATE" => Ok(ConvertedType::DATE),
-            "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
-            "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
-            "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
-            "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
-            "UINT_8" => Ok(ConvertedType::UINT_8),
-            "UINT_16" => Ok(ConvertedType::UINT_16),
-            "UINT_32" => Ok(ConvertedType::UINT_32),
-            "UINT_64" => Ok(ConvertedType::UINT_64),
-            "INT_8" => Ok(ConvertedType::INT_8),
-            "INT_16" => Ok(ConvertedType::INT_16),
-            "INT_32" => Ok(ConvertedType::INT_32),
-            "INT_64" => Ok(ConvertedType::INT_64),
-            "JSON" => Ok(ConvertedType::JSON),
-            "BSON" => Ok(ConvertedType::BSON),
-            "INTERVAL" => Ok(ConvertedType::INTERVAL),
-            other => Err(general_err!("Invalid converted type {}", other)),
-        }
-    }
-}
-
-impl str::FromStr for LogicalType {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            // The type is a placeholder that gets updated elsewhere
-            "INTEGER" => Ok(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false,
-            })),
-            "MAP" => Ok(LogicalType::MAP(MapType {})),
-            "LIST" => Ok(LogicalType::LIST(ListType {})),
-            "ENUM" => Ok(LogicalType::ENUM(EnumType {})),
-            "DECIMAL" => Ok(LogicalType::DECIMAL(DecimalType {
-                precision: -1,
-                scale: -1,
-            })),
-            "DATE" => Ok(LogicalType::DATE(DateType {})),
-            "TIME" => Ok(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
-            })),
-            "TIMESTAMP" => Ok(LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
-            })),
-            "STRING" => Ok(LogicalType::STRING(StringType {})),
-            "JSON" => Ok(LogicalType::JSON(JsonType {})),
-            "BSON" => Ok(LogicalType::BSON(BsonType {})),
-            "UUID" => Ok(LogicalType::UUID(UUIDType {})),
-            "UNKNOWN" => Ok(LogicalType::UNKNOWN(NullType {})),
-            "INTERVAL" => Err(general_err!("Interval logical type not yet supported")),
-            other => Err(general_err!("Invalid logical type {}", other)),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_display_type() {
-        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
-        assert_eq!(Type::INT32.to_string(), "INT32");
-        assert_eq!(Type::INT64.to_string(), "INT64");
-        assert_eq!(Type::INT96.to_string(), "INT96");
-        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
-        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
-        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
-        assert_eq!(
-            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
-            "FIXED_LEN_BYTE_ARRAY"
-        );
-    }
-
-    #[test]
-    fn test_from_type() {
-        assert_eq!(Type::from(parquet::Type::Boolean), Type::BOOLEAN);
-        assert_eq!(Type::from(parquet::Type::Int32), Type::INT32);
-        assert_eq!(Type::from(parquet::Type::Int64), Type::INT64);
-        assert_eq!(Type::from(parquet::Type::Int96), Type::INT96);
-        assert_eq!(Type::from(parquet::Type::Float), Type::FLOAT);
-        assert_eq!(Type::from(parquet::Type::Double), Type::DOUBLE);
-        assert_eq!(Type::from(parquet::Type::ByteArray), Type::BYTE_ARRAY);
-        assert_eq!(
-            Type::from(parquet::Type::FixedLenByteArray),
-            Type::FIXED_LEN_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_into_type() {
-        assert_eq!(parquet::Type::Boolean, Type::BOOLEAN.into());
-        assert_eq!(parquet::Type::Int32, Type::INT32.into());
-        assert_eq!(parquet::Type::Int64, Type::INT64.into());
-        assert_eq!(parquet::Type::Int96, Type::INT96.into());
-        assert_eq!(parquet::Type::Float, Type::FLOAT.into());
-        assert_eq!(parquet::Type::Double, Type::DOUBLE.into());
-        assert_eq!(parquet::Type::ByteArray, Type::BYTE_ARRAY.into());
-        assert_eq!(
-            parquet::Type::FixedLenByteArray,
-            Type::FIXED_LEN_BYTE_ARRAY.into()
-        );
-    }
-
-    #[test]
-    fn test_from_string_into_type() {
-        assert_eq!(
-            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
-            Type::BOOLEAN
-        );
-        assert_eq!(
-            Type::INT32.to_string().parse::<Type>().unwrap(),
-            Type::INT32
-        );
-        assert_eq!(
-            Type::INT64.to_string().parse::<Type>().unwrap(),
-            Type::INT64
-        );
-        assert_eq!(
-            Type::INT96.to_string().parse::<Type>().unwrap(),
-            Type::INT96
-        );
-        assert_eq!(
-            Type::FLOAT.to_string().parse::<Type>().unwrap(),
-            Type::FLOAT
-        );
-        assert_eq!(
-            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
-            Type::DOUBLE
-        );
-        assert_eq!(
-            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
-            Type::BYTE_ARRAY
-        );
-        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
-        assert_eq!(
-            Type::FIXED_LEN_BYTE_ARRAY
-                .to_string()
-                .parse::<Type>()
-                .unwrap(),
-            Type::FIXED_LEN_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_display_converted_type() {
-        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
-        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
-        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
-        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
-        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
-        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
-        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
-        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
-        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
-        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
-        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MILLIS.to_string(),
-            "TIMESTAMP_MILLIS"
-        );
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MICROS.to_string(),
-            "TIMESTAMP_MICROS"
-        );
-        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
-        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
-        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
-        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
-        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
-        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
-        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
-        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
-        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
-        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
-        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
-    }
-
-    #[test]
-    fn test_from_converted_type() {
-        let parquet_conv_none: Option<parquet::ConvertedType> = None;
-        assert_eq!(ConvertedType::from(parquet_conv_none), ConvertedType::NONE);
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Utf8)),
-            ConvertedType::UTF8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Map)),
-            ConvertedType::MAP
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::MapKeyValue)),
-            ConvertedType::MAP_KEY_VALUE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::List)),
-            ConvertedType::LIST
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Enum)),
-            ConvertedType::ENUM
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Decimal)),
-            ConvertedType::DECIMAL
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Date)),
-            ConvertedType::DATE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimeMillis)),
-            ConvertedType::TIME_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimeMicros)),
-            ConvertedType::TIME_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimestampMillis)),
-            ConvertedType::TIMESTAMP_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimestampMicros)),
-            ConvertedType::TIMESTAMP_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint8)),
-            ConvertedType::UINT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint16)),
-            ConvertedType::UINT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint32)),
-            ConvertedType::UINT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint64)),
-            ConvertedType::UINT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int8)),
-            ConvertedType::INT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int16)),
-            ConvertedType::INT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int32)),
-            ConvertedType::INT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int64)),
-            ConvertedType::INT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Json)),
-            ConvertedType::JSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Bson)),
-            ConvertedType::BSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Interval)),
-            ConvertedType::INTERVAL
-        );
-    }
-
-    #[test]
-    fn test_into_converted_type() {
-        let converted_type: Option<parquet::ConvertedType> = None;
-        assert_eq!(converted_type, ConvertedType::NONE.into());
-        assert_eq!(
-            Some(parquet::ConvertedType::Utf8),
-            ConvertedType::UTF8.into()
-        );
-        assert_eq!(Some(parquet::ConvertedType::Map), ConvertedType::MAP.into());
-        assert_eq!(
-            Some(parquet::ConvertedType::MapKeyValue),
-            ConvertedType::MAP_KEY_VALUE.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::List),
-            ConvertedType::LIST.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Enum),
-            ConvertedType::ENUM.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Decimal),
-            ConvertedType::DECIMAL.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Date),
-            ConvertedType::DATE.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimeMillis),
-            ConvertedType::TIME_MILLIS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimeMicros),
-            ConvertedType::TIME_MICROS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimestampMillis),
-            ConvertedType::TIMESTAMP_MILLIS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimestampMicros),
-            ConvertedType::TIMESTAMP_MICROS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint8),
-            ConvertedType::UINT_8.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint16),
-            ConvertedType::UINT_16.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint32),
-            ConvertedType::UINT_32.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint64),
-            ConvertedType::UINT_64.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int8),
-            ConvertedType::INT_8.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int16),
-            ConvertedType::INT_16.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int32),
-            ConvertedType::INT_32.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int64),
-            ConvertedType::INT_64.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Json),
-            ConvertedType::JSON.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Bson),
-            ConvertedType::BSON.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Interval),
-            ConvertedType::INTERVAL.into()
-        );
-    }
-
-    #[test]
-    fn test_from_string_into_converted_type() {
-        assert_eq!(
-            ConvertedType::NONE
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::UTF8
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UTF8
-        );
-        assert_eq!(
-            ConvertedType::MAP
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::MAP
-        );
-        assert_eq!(
-            ConvertedType::MAP_KEY_VALUE
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::MAP_KEY_VALUE
-        );
-        assert_eq!(
-            ConvertedType::LIST
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::LIST
-        );
-        assert_eq!(
-            ConvertedType::ENUM
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::ENUM
-        );
-        assert_eq!(
-            ConvertedType::DECIMAL
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::DECIMAL
-        );
-        assert_eq!(
-            ConvertedType::DATE
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::DATE
-        );
-        assert_eq!(
-            ConvertedType::TIME_MILLIS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIME_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::TIME_MICROS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIME_MICROS
-        );
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MILLIS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIMESTAMP_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MICROS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIMESTAMP_MICROS
-        );
-        assert_eq!(
-            ConvertedType::UINT_8
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_8
-        );
-        assert_eq!(
-            ConvertedType::UINT_16
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_16
-        );
-        assert_eq!(
-            ConvertedType::UINT_32
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_32
-        );
-        assert_eq!(
-            ConvertedType::UINT_64
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_64
-        );
-        assert_eq!(
-            ConvertedType::INT_8
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_8
-        );
-        assert_eq!(
-            ConvertedType::INT_16
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_16
-        );
-        assert_eq!(
-            ConvertedType::INT_32
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_32
-        );
-        assert_eq!(
-            ConvertedType::INT_64
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_64
-        );
-        assert_eq!(
-            ConvertedType::JSON
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::JSON
-        );
-        assert_eq!(
-            ConvertedType::BSON
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::BSON
-        );
-        assert_eq!(
-            ConvertedType::INTERVAL
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INTERVAL
-        );
-    }
-
-    #[test]
-    fn test_logical_to_converted_type() {
-        let logical_none: Option<LogicalType> = None;
-        assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::DECIMAL(DecimalType {
-                precision: 20,
-                scale: 5
-            }))),
-            ConvertedType::DECIMAL
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::BSON(Default::default()))),
-            ConvertedType::BSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::JSON(Default::default()))),
-            ConvertedType::JSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::STRING(Default::default()))),
-            ConvertedType::UTF8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::DATE(Default::default()))),
-            ConvertedType::DATE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIME(TimeType {
-                unit: TimeUnit::MILLIS(Default::default()),
-                is_adjusted_to_u_t_c: true,
-            }))),
-            ConvertedType::TIME_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIME(TimeType {
-                unit: TimeUnit::MICROS(Default::default()),
-                is_adjusted_to_u_t_c: true,
-            }))),
-            ConvertedType::TIME_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIME(TimeType {
-                unit: TimeUnit::NANOS(Default::default()),
-                is_adjusted_to_u_t_c: false,
-            }))),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
-                unit: TimeUnit::MILLIS(Default::default()),
-                is_adjusted_to_u_t_c: true,
-            }))),
-            ConvertedType::TIMESTAMP_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
-                unit: TimeUnit::MICROS(Default::default()),
-                is_adjusted_to_u_t_c: false,
-            }))),
-            ConvertedType::TIMESTAMP_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
-                unit: TimeUnit::NANOS(Default::default()),
-                is_adjusted_to_u_t_c: false,
-            }))),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true
-            }))),
-            ConvertedType::INT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: true
-            }))),
-            ConvertedType::INT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: true
-            }))),
-            ConvertedType::INT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: true
-            }))),
-            ConvertedType::INT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::LIST(Default::default()))),
-            ConvertedType::LIST
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::MAP(Default::default()))),
-            ConvertedType::MAP
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::UUID(Default::default()))),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::ENUM(Default::default()))),
-            ConvertedType::ENUM
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::UNKNOWN(Default::default()))),
-            ConvertedType::NONE
-        );
-    }
-
-    #[test]
-    fn test_display_repetition() {
-        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
-        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
-        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
-    }
-
-    #[test]
-    fn test_from_repetition() {
-        assert_eq!(
-            Repetition::from(parquet::FieldRepetitionType::Required),
-            Repetition::REQUIRED
-        );
-        assert_eq!(
-            Repetition::from(parquet::FieldRepetitionType::Optional),
-            Repetition::OPTIONAL
-        );
-        assert_eq!(
-            Repetition::from(parquet::FieldRepetitionType::Repeated),
-            Repetition::REPEATED
-        );
-    }
-
-    #[test]
-    fn test_into_repetition() {
-        assert_eq!(
-            parquet::FieldRepetitionType::Required,
-            Repetition::REQUIRED.into()
-        );
-        assert_eq!(
-            parquet::FieldRepetitionType::Optional,
-            Repetition::OPTIONAL.into()
-        );
-        assert_eq!(
-            parquet::FieldRepetitionType::Repeated,
-            Repetition::REPEATED.into()
-        );
-    }
-
-    #[test]
-    fn test_from_string_into_repetition() {
-        assert_eq!(
-            Repetition::REQUIRED
-                .to_string()
-                .parse::<Repetition>()
-                .unwrap(),
-            Repetition::REQUIRED
-        );
-        assert_eq!(
-            Repetition::OPTIONAL
-                .to_string()
-                .parse::<Repetition>()
-                .unwrap(),
-            Repetition::OPTIONAL
-        );
-        assert_eq!(
-            Repetition::REPEATED
-                .to_string()
-                .parse::<Repetition>()
-                .unwrap(),
-            Repetition::REPEATED
-        );
-    }
-
-    #[test]
-    fn test_display_encoding() {
-        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
-        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
-        assert_eq!(Encoding::RLE.to_string(), "RLE");
-        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
-        assert_eq!(
-            Encoding::DELTA_BINARY_PACKED.to_string(),
-            "DELTA_BINARY_PACKED"
-        );
-        assert_eq!(
-            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
-            "DELTA_LENGTH_BYTE_ARRAY"
-        );
-        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
-        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
-    }
-
-    #[test]
-    fn test_from_encoding() {
-        assert_eq!(Encoding::from(parquet::Encoding::Plain), Encoding::PLAIN);
-        assert_eq!(
-            Encoding::from(parquet::Encoding::PlainDictionary),
-            Encoding::PLAIN_DICTIONARY
-        );
-        assert_eq!(Encoding::from(parquet::Encoding::Rle), Encoding::RLE);
-        assert_eq!(
-            Encoding::from(parquet::Encoding::BitPacked),
-            Encoding::BIT_PACKED
-        );
-        assert_eq!(
-            Encoding::from(parquet::Encoding::DeltaBinaryPacked),
-            Encoding::DELTA_BINARY_PACKED
-        );
-        assert_eq!(
-            Encoding::from(parquet::Encoding::DeltaLengthByteArray),
-            Encoding::DELTA_LENGTH_BYTE_ARRAY
-        );
-        assert_eq!(
-            Encoding::from(parquet::Encoding::DeltaByteArray),
-            Encoding::DELTA_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_into_encoding() {
-        assert_eq!(parquet::Encoding::Plain, Encoding::PLAIN.into());
-        assert_eq!(
-            parquet::Encoding::PlainDictionary,
-            Encoding::PLAIN_DICTIONARY.into()
-        );
-        assert_eq!(parquet::Encoding::Rle, Encoding::RLE.into());
-        assert_eq!(parquet::Encoding::BitPacked, Encoding::BIT_PACKED.into());
-        assert_eq!(
-            parquet::Encoding::DeltaBinaryPacked,
-            Encoding::DELTA_BINARY_PACKED.into()
-        );
-        assert_eq!(
-            parquet::Encoding::DeltaLengthByteArray,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
-        );
-        assert_eq!(
-            parquet::Encoding::DeltaByteArray,
-            Encoding::DELTA_BYTE_ARRAY.into()
-        );
-    }
-
-    #[test]
-    fn test_display_compression() {
-        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
-        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
-        assert_eq!(Compression::GZIP.to_string(), "GZIP");
-        assert_eq!(Compression::LZO.to_string(), "LZO");
-        assert_eq!(Compression::BROTLI.to_string(), "BROTLI");
-        assert_eq!(Compression::LZ4.to_string(), "LZ4");
-        assert_eq!(Compression::ZSTD.to_string(), "ZSTD");
-    }
-
-    #[test]
-    fn test_from_compression() {
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Uncompressed),
-            Compression::UNCOMPRESSED
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Snappy),
-            Compression::SNAPPY
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Gzip),
-            Compression::GZIP
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Lzo),
-            Compression::LZO
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Brotli),
-            Compression::BROTLI
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Lz4),
-            Compression::LZ4
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Zstd),
-            Compression::ZSTD
-        );
-    }
-
-    #[test]
-    fn test_into_compression() {
-        assert_eq!(
-            parquet::CompressionCodec::Uncompressed,
-            Compression::UNCOMPRESSED.into()
-        );
-        assert_eq!(
-            parquet::CompressionCodec::Snappy,
-            Compression::SNAPPY.into()
-        );
-        assert_eq!(parquet::CompressionCodec::Gzip, Compression::GZIP.into());
-        assert_eq!(parquet::CompressionCodec::Lzo, Compression::LZO.into());
-        assert_eq!(
-            parquet::CompressionCodec::Brotli,
-            Compression::BROTLI.into()
-        );
-        assert_eq!(parquet::CompressionCodec::Lz4, Compression::LZ4.into());
-        assert_eq!(parquet::CompressionCodec::Zstd, Compression::ZSTD.into());
-    }
-
-    #[test]
-    fn test_display_page_type() {
-        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
-        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
-        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
-        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
-    }
-
-    #[test]
-    fn test_from_page_type() {
-        assert_eq!(
-            PageType::from(parquet::PageType::DataPage),
-            PageType::DATA_PAGE
-        );
-        assert_eq!(
-            PageType::from(parquet::PageType::IndexPage),
-            PageType::INDEX_PAGE
-        );
-        assert_eq!(
-            PageType::from(parquet::PageType::DictionaryPage),
-            PageType::DICTIONARY_PAGE
-        );
-        assert_eq!(
-            PageType::from(parquet::PageType::DataPageV2),
-            PageType::DATA_PAGE_V2
-        );
-    }
-
-    #[test]
-    fn test_into_page_type() {
-        assert_eq!(parquet::PageType::DataPage, PageType::DATA_PAGE.into());
-        assert_eq!(parquet::PageType::IndexPage, PageType::INDEX_PAGE.into());
-        assert_eq!(
-            parquet::PageType::DictionaryPage,
-            PageType::DICTIONARY_PAGE.into()
-        );
-        assert_eq!(parquet::PageType::DataPageV2, PageType::DATA_PAGE_V2.into());
-    }
-
-    #[test]
-    fn test_display_sort_order() {
-        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
-        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
-        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
-    }
-
-    #[test]
-    fn test_display_column_order() {
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
-            "TYPE_DEFINED_ORDER(SIGNED)"
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
-            "TYPE_DEFINED_ORDER(UNSIGNED)"
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
-            "TYPE_DEFINED_ORDER(UNDEFINED)"
-        );
-        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
-    }
-
-    #[test]
-    fn test_column_order_get_logical_type_sort_order() {
-        // Helper to check the order in a list of values.
-        // Only logical type is checked.
-        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
-            for tpe in types {
-                assert_eq!(
-                    ColumnOrder::get_sort_order(
-                        Some(tpe),
-                        ConvertedType::NONE,
-                        Type::BYTE_ARRAY
-                    ),
-                    expected_order
-                );
-            }
-        }
-
-        // Unsigned comparison (physical type does not matter)
-        let unsigned = vec![
-            LogicalType::STRING(Default::default()),
-            LogicalType::JSON(Default::default()),
-            LogicalType::BSON(Default::default()),
-            LogicalType::ENUM(Default::default()),
-            LogicalType::UUID(Default::default()),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: false,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: false,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: false,
-            }),
-        ];
-        check_sort_order(unsigned, SortOrder::UNSIGNED);
-
-        // Signed comparison (physical type does not matter)
-        let signed = vec![
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::DECIMAL(DecimalType {
-                scale: 20,
-                precision: 4,
-            }),
-            LogicalType::DATE(Default::default()),
-            LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(Default::default()),
-            }),
-            LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MICROS(Default::default()),
-            }),
-            LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: true,
-                unit: TimeUnit::NANOS(Default::default()),
-            }),
-            LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(Default::default()),
-            }),
-            LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MICROS(Default::default()),
-            }),
-            LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: true,
-                unit: TimeUnit::NANOS(Default::default()),
-            }),
-        ];
-        check_sort_order(signed, SortOrder::SIGNED);
-
-        // Undefined comparison
-        let undefined = vec![
-            LogicalType::LIST(Default::default()),
-            LogicalType::MAP(Default::default()),
-        ];
-        check_sort_order(undefined, SortOrder::UNDEFINED);
-    }
-
-    #[test]
-    fn test_column_order_get_coverted_type_sort_order() {
-        // Helper to check the order in a list of values.
-        // Only converted type is checked.
-        fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
-            for tpe in types {
-                assert_eq!(
-                    ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
-                    expected_order
-                );
-            }
-        }
-
-        // Unsigned comparison (physical type does not matter)
-        let unsigned = vec![
-            ConvertedType::UTF8,
-            ConvertedType::JSON,
-            ConvertedType::BSON,
-            ConvertedType::ENUM,
-            ConvertedType::UINT_8,
-            ConvertedType::UINT_16,
-            ConvertedType::UINT_32,
-            ConvertedType::UINT_64,
-        ];
-        check_sort_order(unsigned, SortOrder::UNSIGNED);
-
-        // Signed comparison (physical type does not matter)
-        let signed = vec![
-            ConvertedType::INT_8,
-            ConvertedType::INT_16,
-            ConvertedType::INT_32,
-            ConvertedType::INT_64,
-            ConvertedType::DECIMAL,
-            ConvertedType::DATE,
-            ConvertedType::TIME_MILLIS,
-            ConvertedType::TIME_MICROS,
-            ConvertedType::TIMESTAMP_MILLIS,
-            ConvertedType::TIMESTAMP_MICROS,
-        ];
-        check_sort_order(signed, SortOrder::SIGNED);
-
-        // Undefined comparison
-        let undefined = vec![
-            ConvertedType::LIST,
-            ConvertedType::MAP,
-            ConvertedType::MAP_KEY_VALUE,
-            ConvertedType::INTERVAL,
-        ];
-        check_sort_order(undefined, SortOrder::UNDEFINED);
-
-        // Check None logical type
-        // This should return a sort order for byte array type.
-        check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
-    }
-
-    #[test]
-    fn test_column_order_get_default_sort_order() {
-        // Comparison based on physical type
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
-            SortOrder::UNSIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::INT32),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::INT64),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::INT96),
-            SortOrder::UNDEFINED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::FLOAT),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::DOUBLE),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
-            SortOrder::UNSIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
-            SortOrder::UNSIGNED
-        );
-    }
-
-    #[test]
-    fn test_column_order_sort_order() {
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
-            SortOrder::UNSIGNED
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
-            SortOrder::UNDEFINED
-        );
-        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
-    }
-}
diff --git a/rust/parquet/src/bin/parquet-read.rs b/rust/parquet/src/bin/parquet-read.rs
deleted file mode 100644
index aa3b8272dad..00000000000
--- a/rust/parquet/src/bin/parquet-read.rs
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Binary file to read data from a Parquet file.
-//!
-//! # Install
-//!
-//! `parquet-read` can be installed using `cargo`:
-//! ```
-//! cargo install parquet
-//! ```
-//! After this `parquet-read` should be globally available:
-//! ```
-//! parquet-read XYZ.parquet
-//! ```
-//!
-//! The binary can also be built from the source code and run as follows:
-//! ```
-//! cargo run --bin parquet-read XYZ.parquet
-//! ```
-//!
-//! # Usage
-//! ```
-//! parquet-read <file-path> [num-records]
-//! ```
-//!
-//! ## Flags
-//!     -h, --help       Prints help information
-//!     -j, --json       Print Parquet file in JSON lines Format
-//!     -V, --version    Prints version information
-//!
-//! ## Args
-//!     <file-path>      Path to a Parquet file
-//!     <num-records>    Number of records to read. When not provided, all records are read.
-//!
-//! Note that `parquet-read` reads full file schema, no projection or filtering is
-//! applied.
-
-extern crate parquet;
-
-use std::{env, fs::File, path::Path};
-
-use clap::{crate_authors, crate_version, App, Arg};
-
-use parquet::file::reader::{FileReader, SerializedFileReader};
-use parquet::record::Row;
-
-fn main() {
-    let app = App::new("parquet-read")
-        .version(crate_version!())
-        .author(crate_authors!())
-        .about("Read data from a Parquet file and print output in console, in either built-in or JSON format")
-        .arg(
-            Arg::with_name("file_path")
-                .value_name("file-path")
-                .required(true)
-                .index(1)
-                .help("Path to a parquet file"),
-        )
-        .arg(
-            Arg::with_name("num_records")
-                .value_name("num-records")
-                .index(2)
-                .help(
-                    "Number of records to read. When not provided, all records are read.",
-                ),
-        )
-        .arg(
-            Arg::with_name("json")
-                .short("j")
-                .long("json")
-                .takes_value(false)
-                .help("Print Parquet file in JSON lines format"),
-        );
-
-    let matches = app.get_matches();
-    let filename = matches.value_of("file_path").unwrap();
-    let num_records: Option<usize> = if matches.is_present("num_records") {
-        match matches.value_of("num_records").unwrap().parse() {
-            Ok(value) => Some(value),
-            Err(e) => panic!("Error when reading value for [num-records], {}", e),
-        }
-    } else {
-        None
-    };
-
-    let json = matches.is_present("json");
-    let path = Path::new(&filename);
-    let file = File::open(&path).unwrap();
-    let parquet_reader = SerializedFileReader::new(file).unwrap();
-
-    // Use full schema as projected schema
-    let mut iter = parquet_reader.get_row_iter(None).unwrap();
-
-    let mut start = 0;
-    let end = num_records.unwrap_or(0);
-    let all_records = num_records.is_none();
-
-    while all_records || start < end {
-        match iter.next() {
-            Some(row) => print_row(&row, json),
-            None => break,
-        }
-        start += 1;
-    }
-}
-
-fn print_row(row: &Row, json: bool) {
-    if json {
-        println!("{}", row.to_json_value())
-    } else {
-        println!("{}", row.to_string());
-    }
-}
diff --git a/rust/parquet/src/bin/parquet-rowcount.rs b/rust/parquet/src/bin/parquet-rowcount.rs
deleted file mode 100644
index 3c61bab882a..00000000000
--- a/rust/parquet/src/bin/parquet-rowcount.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Binary file to return the number of rows found from Parquet file(s).
-//!
-//! # Install
-//!
-//! `parquet-rowcount` can be installed using `cargo`:
-//! ```
-//! cargo install parquet
-//! ```
-//! After this `parquet-rowcount` should be globally available:
-//! ```
-//! parquet-rowcount XYZ.parquet
-//! ```
-//!
-//! The binary can also be built from the source code and run as follows:
-//! ```
-//! cargo run --bin parquet-rowcount XYZ.parquet ABC.parquet ZXC.parquet
-//! ```
-//!
-//! # Usage
-//! ```
-//! parquet-rowcount <file-paths>...
-//! ```
-//!
-//! ## Flags
-//!     -h, --help       Prints help information
-//!     -V, --version    Prints version information
-//!
-//! ## Args
-//!     <file-paths>...    List of Parquet files to read from
-//!
-//! Note that `parquet-rowcount` reads full file schema, no projection or filtering is
-//! applied.
-
-extern crate parquet;
-
-use std::{env, fs::File, path::Path};
-
-use clap::{crate_authors, crate_version, App, Arg};
-
-use parquet::file::reader::{FileReader, SerializedFileReader};
-
-fn main() {
-    let matches = App::new("parquet-rowcount")
-        .version(crate_version!())
-        .author(crate_authors!())
-        .about("Return number of rows in Parquet file")
-        .arg(
-            Arg::with_name("file_paths")
-                .value_name("file-paths")
-                .required(true)
-                .multiple(true)
-                .help("List of Parquet files to read from separated by space"),
-        )
-        .get_matches();
-
-    let filenames: Vec<&str> = matches.values_of("file_paths").unwrap().collect();
-    for filename in &filenames {
-        let path = Path::new(filename);
-        let file = File::open(path).unwrap();
-        let parquet_reader = SerializedFileReader::new(file).unwrap();
-        let row_group_metadata = parquet_reader.metadata().row_groups();
-        let mut total_num_rows = 0;
-
-        for group_metadata in row_group_metadata {
-            total_num_rows += group_metadata.num_rows();
-        }
-
-        eprintln!("File {}: rowcount={}", filename, total_num_rows);
-    }
-}
diff --git a/rust/parquet/src/bin/parquet-schema.rs b/rust/parquet/src/bin/parquet-schema.rs
deleted file mode 100644
index 1b806372b10..00000000000
--- a/rust/parquet/src/bin/parquet-schema.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Binary file to print the schema and metadata of a Parquet file.
-//!
-//! # Install
-//!
-//! `parquet-schema` can be installed using `cargo`:
-//! ```
-//! cargo install parquet
-//! ```
-//! After this `parquet-schema` should be globally available:
-//! ```
-//! parquet-schema XYZ.parquet
-//! ```
-//!
-//! The binary can also be built from the source code and run as follows:
-//! ```
-//! cargo run --bin parquet-schema XYZ.parquet
-//! ```
-//!
-//! # Usage
-//! ```
-//! parquet-schema [FLAGS] <file-path>
-//! ```
-//!
-//! ## Flags
-//!     -h, --help       Prints help information
-//!     -V, --version    Prints version information
-//!     -v, --verbose    Enable printing full file metadata
-//!
-//! ## Args
-//!     <file-path>    Path to a Parquet file
-//!
-//! Note that `verbose` is an optional boolean flag that allows to print schema only,
-//! when not provided or print full file metadata when provided.
-
-extern crate parquet;
-
-use std::{env, fs::File, path::Path};
-
-use clap::{crate_authors, crate_version, App, Arg};
-
-use parquet::{
-    file::reader::{FileReader, SerializedFileReader},
-    schema::printer::{print_file_metadata, print_parquet_metadata},
-};
-
-fn main() {
-    let matches = App::new("parquet-schema")
-        .version(crate_version!())
-        .author(crate_authors!())
-        .arg(
-            Arg::with_name("file_path")
-                .value_name("file-path")
-                .required(true)
-                .index(1)
-                .help("Path to a Parquet file"),
-        )
-        .arg(
-            Arg::with_name("verbose")
-                .short("v")
-                .long("verbose")
-                .takes_value(false)
-                .help("Enable printing full file metadata"),
-        )
-        .get_matches();
-
-    let filename = matches.value_of("file_path").unwrap();
-    let path = Path::new(&filename);
-    let file = match File::open(&path) {
-        Err(e) => panic!("Error when opening file {}: {}", path.display(), e),
-        Ok(f) => f,
-    };
-    let verbose = matches.is_present("verbose");
-
-    match SerializedFileReader::new(file) {
-        Err(e) => panic!("Error when parsing Parquet file: {}", e),
-        Ok(parquet_reader) => {
-            let metadata = parquet_reader.metadata();
-            println!("Metadata for file: {}", &filename);
-            println!();
-            if verbose {
-                print_parquet_metadata(&mut std::io::stdout(), &metadata);
-            } else {
-                print_file_metadata(&mut std::io::stdout(), &metadata.file_metadata());
-            }
-        }
-    }
-}
diff --git a/rust/parquet/src/column/mod.rs b/rust/parquet/src/column/mod.rs
deleted file mode 100644
index 7ed7bfc256e..00000000000
--- a/rust/parquet/src/column/mod.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Low level column reader and writer APIs.
-//!
-//! This API is designed for reading and writing column values, definition and repetition
-//! levels directly.
-//!
-//! # Example of writing and reading data
-//!
-//! Data has the following format:
-//! ```text
-//! +---------------+
-//! |         values|
-//! +---------------+
-//! |[1, 2]         |
-//! |[3, null, null]|
-//! +---------------+
-//! ```
-//!
-//! The example uses column writer and reader APIs to write raw values, definition and
-//! repetition levels and read them to verify write/read correctness.
-//!
-//! ```rust,no_run
-//! use std::{fs, path::Path, sync::Arc};
-//!
-//! use parquet::{
-//!     column::{reader::ColumnReader, writer::ColumnWriter},
-//!     file::{
-//!         properties::WriterProperties,
-//!         reader::{FileReader, SerializedFileReader},
-//!         writer::{FileWriter, SerializedFileWriter},
-//!     },
-//!     schema::parser::parse_message_type,
-//! };
-//!
-//! let path = Path::new("/path/to/column_sample.parquet");
-//!
-//! // Writing data using column writer API.
-//!
-//! let message_type = "
-//!   message schema {
-//!     optional group values (LIST) {
-//!       repeated group list {
-//!         optional INT32 element;
-//!       }
-//!     }
-//!   }
-//! ";
-//! let schema = Arc::new(parse_message_type(message_type).unwrap());
-//! let props = Arc::new(WriterProperties::builder().build());
-//! let file = fs::File::create(path).unwrap();
-//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-//! let mut row_group_writer = writer.next_row_group().unwrap();
-//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
-//!     match col_writer {
-//!         // You can also use `get_typed_column_writer` method to extract typed writer.
-//!         ColumnWriter::Int32ColumnWriter(ref mut typed_writer) => {
-//!             typed_writer
-//!                 .write_batch(&[1, 2, 3], Some(&[3, 3, 3, 2, 2]), Some(&[0, 1, 0, 1, 1]))
-//!                 .unwrap();
-//!         }
-//!         _ => {}
-//!     }
-//!     row_group_writer.close_column(col_writer).unwrap();
-//! }
-//! writer.close_row_group(row_group_writer).unwrap();
-//! writer.close().unwrap();
-//!
-//! // Reading data using column reader API.
-//!
-//! let file = fs::File::open(path).unwrap();
-//! let reader = SerializedFileReader::new(file).unwrap();
-//! let metadata = reader.metadata();
-//!
-//! let mut res = Ok((0, 0));
-//! let mut values = vec![0; 8];
-//! let mut def_levels = vec![0; 8];
-//! let mut rep_levels = vec![0; 8];
-//!
-//! for i in 0..metadata.num_row_groups() {
-//!     let row_group_reader = reader.get_row_group(i).unwrap();
-//!     let row_group_metadata = metadata.row_group(i);
-//!
-//!     for j in 0..row_group_metadata.num_columns() {
-//!         let mut column_reader = row_group_reader.get_column_reader(j).unwrap();
-//!         match column_reader {
-//!             // You can also use `get_typed_column_reader` method to extract typed reader.
-//!             ColumnReader::Int32ColumnReader(ref mut typed_reader) => {
-//!                 res = typed_reader.read_batch(
-//!                     8, // batch size
-//!                     Some(&mut def_levels),
-//!                     Some(&mut rep_levels),
-//!                     &mut values,
-//!                 );
-//!             }
-//!             _ => {}
-//!         }
-//!     }
-//! }
-//!
-//! assert_eq!(res, Ok((3, 5)));
-//! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]);
-//! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]);
-//! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]);
-//! ```
-
-pub mod page;
-pub mod reader;
-pub mod writer;
diff --git a/rust/parquet/src/column/page.rs b/rust/parquet/src/column/page.rs
deleted file mode 100644
index b3515780884..00000000000
--- a/rust/parquet/src/column/page.rs
+++ /dev/null
@@ -1,306 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains Parquet Page definitions and page reader interface.
-
-use crate::basic::{Encoding, PageType};
-use crate::errors::Result;
-use crate::file::{metadata::ColumnChunkMetaData, statistics::Statistics};
-use crate::schema::types::{ColumnDescPtr, SchemaDescPtr};
-use crate::util::memory::ByteBufferPtr;
-
-/// Parquet Page definition.
-///
-/// List of supported pages.
-/// These are 1-to-1 mapped from the equivalent Thrift definitions, except `buf` which
-/// used to store uncompressed bytes of the page.
-pub enum Page {
-    DataPage {
-        buf: ByteBufferPtr,
-        num_values: u32,
-        encoding: Encoding,
-        def_level_encoding: Encoding,
-        rep_level_encoding: Encoding,
-        statistics: Option<Statistics>,
-    },
-    DataPageV2 {
-        buf: ByteBufferPtr,
-        num_values: u32,
-        encoding: Encoding,
-        num_nulls: u32,
-        num_rows: u32,
-        def_levels_byte_len: u32,
-        rep_levels_byte_len: u32,
-        is_compressed: bool,
-        statistics: Option<Statistics>,
-    },
-    DictionaryPage {
-        buf: ByteBufferPtr,
-        num_values: u32,
-        encoding: Encoding,
-        is_sorted: bool,
-    },
-}
-
-impl Page {
-    /// Returns [`PageType`](crate::basic::PageType) for this page.
-    pub fn page_type(&self) -> PageType {
-        match self {
-            Page::DataPage { .. } => PageType::DATA_PAGE,
-            Page::DataPageV2 { .. } => PageType::DATA_PAGE_V2,
-            Page::DictionaryPage { .. } => PageType::DICTIONARY_PAGE,
-        }
-    }
-
-    /// Returns internal byte buffer reference for this page.
-    pub fn buffer(&self) -> &ByteBufferPtr {
-        match self {
-            Page::DataPage { ref buf, .. } => &buf,
-            Page::DataPageV2 { ref buf, .. } => &buf,
-            Page::DictionaryPage { ref buf, .. } => &buf,
-        }
-    }
-
-    /// Returns number of values in this page.
-    pub fn num_values(&self) -> u32 {
-        match self {
-            Page::DataPage { num_values, .. } => *num_values,
-            Page::DataPageV2 { num_values, .. } => *num_values,
-            Page::DictionaryPage { num_values, .. } => *num_values,
-        }
-    }
-
-    /// Returns this page [`Encoding`](crate::basic::Encoding).
-    pub fn encoding(&self) -> Encoding {
-        match self {
-            Page::DataPage { encoding, .. } => *encoding,
-            Page::DataPageV2 { encoding, .. } => *encoding,
-            Page::DictionaryPage { encoding, .. } => *encoding,
-        }
-    }
-
-    /// Returns optional [`Statistics`](crate::file::statistics::Statistics).
-    pub fn statistics(&self) -> Option<&Statistics> {
-        match self {
-            Page::DataPage { ref statistics, .. } => statistics.as_ref(),
-            Page::DataPageV2 { ref statistics, .. } => statistics.as_ref(),
-            Page::DictionaryPage { .. } => None,
-        }
-    }
-}
-
-/// Helper struct to represent pages with potentially compressed buffer (data page v1) or
-/// compressed and concatenated buffer (def levels + rep levels + compressed values for
-/// data page v2).
-///
-/// The difference with `Page` is that `Page` buffer is always uncompressed.
-pub struct CompressedPage {
-    compressed_page: Page,
-    uncompressed_size: usize,
-}
-
-impl CompressedPage {
-    /// Creates `CompressedPage` from a page with potentially compressed buffer and
-    /// uncompressed size.
-    pub fn new(compressed_page: Page, uncompressed_size: usize) -> Self {
-        Self {
-            compressed_page,
-            uncompressed_size,
-        }
-    }
-
-    /// Returns page type.
-    pub fn page_type(&self) -> PageType {
-        self.compressed_page.page_type()
-    }
-
-    /// Returns underlying page with potentially compressed buffer.
-    pub fn compressed_page(&self) -> &Page {
-        &self.compressed_page
-    }
-
-    /// Returns uncompressed size in bytes.
-    pub fn uncompressed_size(&self) -> usize {
-        self.uncompressed_size
-    }
-
-    /// Returns compressed size in bytes.
-    ///
-    /// Note that it is assumed that buffer is compressed, but it may not be. In this
-    /// case compressed size will be equal to uncompressed size.
-    pub fn compressed_size(&self) -> usize {
-        self.compressed_page.buffer().len()
-    }
-
-    /// Number of values in page.
-    pub fn num_values(&self) -> u32 {
-        self.compressed_page.num_values()
-    }
-
-    /// Returns encoding for values in page.
-    pub fn encoding(&self) -> Encoding {
-        self.compressed_page.encoding()
-    }
-
-    /// Returns slice of compressed buffer in the page.
-    pub fn data(&self) -> &[u8] {
-        self.compressed_page.buffer().data()
-    }
-}
-
-/// Contains page write metrics.
-pub struct PageWriteSpec {
-    pub page_type: PageType,
-    pub uncompressed_size: usize,
-    pub compressed_size: usize,
-    pub num_values: u32,
-    pub offset: u64,
-    pub bytes_written: u64,
-}
-
-impl PageWriteSpec {
-    /// Creates new spec with default page write metrics.
-    pub fn new() -> Self {
-        Self {
-            page_type: PageType::DATA_PAGE,
-            uncompressed_size: 0,
-            compressed_size: 0,
-            num_values: 0,
-            offset: 0,
-            bytes_written: 0,
-        }
-    }
-}
-
-/// API for reading pages from a column chunk.
-/// This offers a iterator like API to get the next page.
-pub trait PageReader {
-    /// Gets the next page in the column chunk associated with this reader.
-    /// Returns `None` if there are no pages left.
-    fn get_next_page(&mut self) -> Result<Option<Page>>;
-}
-
-/// API for writing pages in a column chunk.
-///
-/// It is reasonable to assume that all pages will be written in the correct order, e.g.
-/// dictionary page followed by data pages, or a set of data pages, etc.
-pub trait PageWriter {
-    /// Writes a page into the output stream/sink.
-    /// Returns `PageWriteSpec` that contains information about written page metrics,
-    /// including number of bytes, size, number of values, offset, etc.
-    ///
-    /// This method is called for every compressed page we write into underlying buffer,
-    /// either data page or dictionary page.
-    fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec>;
-
-    /// Writes column chunk metadata into the output stream/sink.
-    ///
-    /// This method is called once before page writer is closed, normally when writes are
-    /// finalised in column writer.
-    fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()>;
-
-    /// Closes resources and flushes underlying sink.
-    /// Page writer should not be used after this method is called.
-    fn close(&mut self) -> Result<()>;
-}
-
-/// An iterator over pages of some specific column in a parquet file.
-pub trait PageIterator: Iterator<Item = Result<Box<dyn PageReader>>> {
-    /// Get schema of parquet file.
-    fn schema(&mut self) -> Result<SchemaDescPtr>;
-
-    /// Get column schema of this page iterator.
-    fn column_schema(&mut self) -> Result<ColumnDescPtr>;
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_page() {
-        let data_page = Page::DataPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            def_level_encoding: Encoding::RLE,
-            rep_level_encoding: Encoding::RLE,
-            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
-        };
-        assert_eq!(data_page.page_type(), PageType::DATA_PAGE);
-        assert_eq!(data_page.buffer().data(), vec![0, 1, 2].as_slice());
-        assert_eq!(data_page.num_values(), 10);
-        assert_eq!(data_page.encoding(), Encoding::PLAIN);
-        assert_eq!(
-            data_page.statistics(),
-            Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
-        );
-
-        let data_page_v2 = Page::DataPageV2 {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            num_nulls: 5,
-            num_rows: 20,
-            def_levels_byte_len: 30,
-            rep_levels_byte_len: 40,
-            is_compressed: false,
-            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
-        };
-        assert_eq!(data_page_v2.page_type(), PageType::DATA_PAGE_V2);
-        assert_eq!(data_page_v2.buffer().data(), vec![0, 1, 2].as_slice());
-        assert_eq!(data_page_v2.num_values(), 10);
-        assert_eq!(data_page_v2.encoding(), Encoding::PLAIN);
-        assert_eq!(
-            data_page_v2.statistics(),
-            Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
-        );
-
-        let dict_page = Page::DictionaryPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            is_sorted: false,
-        };
-        assert_eq!(dict_page.page_type(), PageType::DICTIONARY_PAGE);
-        assert_eq!(dict_page.buffer().data(), vec![0, 1, 2].as_slice());
-        assert_eq!(dict_page.num_values(), 10);
-        assert_eq!(dict_page.encoding(), Encoding::PLAIN);
-        assert_eq!(dict_page.statistics(), None);
-    }
-
-    #[test]
-    fn test_compressed_page() {
-        let data_page = Page::DataPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            def_level_encoding: Encoding::RLE,
-            rep_level_encoding: Encoding::RLE,
-            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
-        };
-
-        let cpage = CompressedPage::new(data_page, 5);
-
-        assert_eq!(cpage.page_type(), PageType::DATA_PAGE);
-        assert_eq!(cpage.uncompressed_size(), 5);
-        assert_eq!(cpage.compressed_size(), 3);
-        assert_eq!(cpage.num_values(), 10);
-        assert_eq!(cpage.encoding(), Encoding::PLAIN);
-        assert_eq!(cpage.data(), &[0, 1, 2]);
-    }
-}
diff --git a/rust/parquet/src/column/reader.rs b/rust/parquet/src/column/reader.rs
deleted file mode 100644
index 1181565bdcf..00000000000
--- a/rust/parquet/src/column/reader.rs
+++ /dev/null
@@ -1,1356 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains column reader API.
-
-use std::{
-    cmp::{max, min},
-    collections::HashMap,
-};
-
-use super::page::{Page, PageReader};
-use crate::basic::*;
-use crate::data_type::*;
-use crate::encodings::{
-    decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder},
-    levels::LevelDecoder,
-};
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::memory::ByteBufferPtr;
-
-/// Column reader for a Parquet type.
-pub enum ColumnReader {
-    BoolColumnReader(ColumnReaderImpl<BoolType>),
-    Int32ColumnReader(ColumnReaderImpl<Int32Type>),
-    Int64ColumnReader(ColumnReaderImpl<Int64Type>),
-    Int96ColumnReader(ColumnReaderImpl<Int96Type>),
-    FloatColumnReader(ColumnReaderImpl<FloatType>),
-    DoubleColumnReader(ColumnReaderImpl<DoubleType>),
-    ByteArrayColumnReader(ColumnReaderImpl<ByteArrayType>),
-    FixedLenByteArrayColumnReader(ColumnReaderImpl<FixedLenByteArrayType>),
-}
-
-/// Gets a specific column reader corresponding to column descriptor `col_descr`. The
-/// column reader will read from pages in `col_page_reader`.
-pub fn get_column_reader(
-    col_descr: ColumnDescPtr,
-    col_page_reader: Box<dyn PageReader>,
-) -> ColumnReader {
-    match col_descr.physical_type() {
-        Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
-            ColumnReaderImpl::new(col_descr, col_page_reader),
-        ),
-    }
-}
-
-/// Gets a typed column reader for the specific type `T`, by "up-casting" `col_reader` of
-/// non-generic type to a generic column reader type `ColumnReaderImpl`.
-///
-/// Panics if actual enum value for `col_reader` does not match the type `T`.
-pub fn get_typed_column_reader<T: DataType>(
-    col_reader: ColumnReader,
-) -> ColumnReaderImpl<T> {
-    T::get_column_reader(col_reader).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column reader into a typed column reader for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Typed value reader for a particular primitive column.
-pub struct ColumnReaderImpl<T: DataType> {
-    descr: ColumnDescPtr,
-    def_level_decoder: Option<LevelDecoder>,
-    rep_level_decoder: Option<LevelDecoder>,
-    page_reader: Box<dyn PageReader>,
-    current_encoding: Option<Encoding>,
-
-    // The total number of values stored in the data page.
-    num_buffered_values: u32,
-
-    // The number of values from the current data page that has been decoded into memory
-    // so far.
-    num_decoded_values: u32,
-
-    // Cache of decoders for existing encodings
-    decoders: HashMap<Encoding, Box<dyn Decoder<T>>>,
-}
-
-impl<T: DataType> ColumnReaderImpl<T> {
-    /// Creates new column reader based on column descriptor and page reader.
-    pub fn new(descr: ColumnDescPtr, page_reader: Box<dyn PageReader>) -> Self {
-        Self {
-            descr,
-            def_level_decoder: None,
-            rep_level_decoder: None,
-            page_reader,
-            current_encoding: None,
-            num_buffered_values: 0,
-            num_decoded_values: 0,
-            decoders: HashMap::new(),
-        }
-    }
-
-    /// Reads a batch of values of at most `batch_size`.
-    ///
-    /// This will try to read from the row group, and fills up at most `batch_size` values
-    /// for `def_levels`, `rep_levels` and `values`. It will stop either when the row
-    /// group is depleted or `batch_size` values has been read, or there is no space
-    /// in the input slices (values/definition levels/repetition levels).
-    ///
-    /// Note that in case the field being read is not required, `values` could contain
-    /// less values than `def_levels`. Also note that this will skip reading def / rep
-    /// levels if the field is required / not repeated, respectively.
-    ///
-    /// If `def_levels` or `rep_levels` is `None`, this will also skip reading the
-    /// respective levels. This is useful when the caller of this function knows in
-    /// advance that the field is required and non-repeated, therefore can avoid
-    /// allocating memory for the levels data. Note that if field has definition
-    /// levels, but caller provides None, there might be inconsistency between
-    /// levels/values (see comments below).
-    ///
-    /// Returns a tuple where the first element is the actual number of values read,
-    /// and the second element is the actual number of levels read.
-    #[inline]
-    pub fn read_batch(
-        &mut self,
-        batch_size: usize,
-        mut def_levels: Option<&mut [i16]>,
-        mut rep_levels: Option<&mut [i16]>,
-        values: &mut [T::T],
-    ) -> Result<(usize, usize)> {
-        let mut values_read = 0;
-        let mut levels_read = 0;
-
-        // Compute the smallest batch size we can read based on provided slices
-        let mut batch_size = min(batch_size, values.len());
-        if let Some(ref levels) = def_levels {
-            batch_size = min(batch_size, levels.len());
-        }
-        if let Some(ref levels) = rep_levels {
-            batch_size = min(batch_size, levels.len());
-        }
-
-        // Read exhaustively all pages until we read all batch_size values/levels
-        // or there are no more values/levels to read.
-        while max(values_read, levels_read) < batch_size {
-            if !self.has_next()? {
-                break;
-            }
-
-            // Batch size for the current iteration
-            let iter_batch_size = {
-                // Compute approximate value based on values decoded so far
-                let mut adjusted_size = min(
-                    batch_size,
-                    (self.num_buffered_values - self.num_decoded_values) as usize,
-                );
-
-                // Adjust batch size by taking into account how much data there
-                // to read. As batch_size is also smaller than value and level
-                // slices (if available), this ensures that available space is not
-                // exceeded.
-                adjusted_size = min(adjusted_size, batch_size - values_read);
-                adjusted_size = min(adjusted_size, batch_size - levels_read);
-
-                adjusted_size
-            };
-
-            let mut values_to_read = 0;
-            let mut num_def_levels = 0;
-            let mut num_rep_levels = 0;
-
-            // If the field is required and non-repeated, there are no definition levels
-            if self.descr.max_def_level() > 0 && def_levels.as_ref().is_some() {
-                if let Some(ref mut levels) = def_levels {
-                    num_def_levels = self.read_def_levels(
-                        &mut levels[levels_read..levels_read + iter_batch_size],
-                    )?;
-                    for i in levels_read..levels_read + num_def_levels {
-                        if levels[i] == self.descr.max_def_level() {
-                            values_to_read += 1;
-                        }
-                    }
-                }
-            } else {
-                // If max definition level == 0, then it is REQUIRED field, read all
-                // values. If definition levels are not provided, we still
-                // read all values.
-                values_to_read = iter_batch_size;
-            }
-
-            if self.descr.max_rep_level() > 0 && rep_levels.is_some() {
-                if let Some(ref mut levels) = rep_levels {
-                    num_rep_levels = self.read_rep_levels(
-                        &mut levels[levels_read..levels_read + iter_batch_size],
-                    )?;
-
-                    // If definition levels are defined, check that rep levels == def
-                    // levels
-                    if def_levels.is_some() {
-                        assert_eq!(
-                            num_def_levels, num_rep_levels,
-                            "Number of decoded rep / def levels did not match"
-                        );
-                    }
-                }
-            }
-
-            // At this point we have read values, definition and repetition levels.
-            // If both definition and repetition levels are defined, their counts
-            // should be equal. Values count is always less or equal to definition levels.
-            //
-            // Note that if field is not required, but no definition levels are provided,
-            // we would read values of batch size and (if provided, of course) repetition
-            // levels of batch size - [!] they will not be synced, because only definition
-            // levels enforce number of non-null values to read.
-
-            let curr_values_read =
-                self.read_values(&mut values[values_read..values_read + values_to_read])?;
-
-            // Update all "return" counters and internal state.
-
-            // This is to account for when def or rep levels are not provided
-            let curr_levels_read = max(num_def_levels, num_rep_levels);
-            self.num_decoded_values += max(curr_levels_read, curr_values_read) as u32;
-            levels_read += curr_levels_read;
-            values_read += curr_values_read;
-        }
-
-        Ok((values_read, levels_read))
-    }
-
-    /// Reads a new page and set up the decoders for levels, values or dictionary.
-    /// Returns false if there's no page left.
-    fn read_new_page(&mut self) -> Result<bool> {
-        #[allow(while_true)]
-        while true {
-            match self.page_reader.get_next_page()? {
-                // No more page to read
-                None => return Ok(false),
-                Some(current_page) => {
-                    match current_page {
-                        // 1. Dictionary page: configure dictionary for this page.
-                        p @ Page::DictionaryPage { .. } => {
-                            self.configure_dictionary(p)?;
-                            continue;
-                        }
-                        // 2. Data page v1
-                        Page::DataPage {
-                            buf,
-                            num_values,
-                            encoding,
-                            def_level_encoding,
-                            rep_level_encoding,
-                            statistics: _,
-                        } => {
-                            self.num_buffered_values = num_values;
-                            self.num_decoded_values = 0;
-
-                            let mut buffer_ptr = buf;
-
-                            if self.descr.max_rep_level() > 0 {
-                                let mut rep_decoder = LevelDecoder::v1(
-                                    rep_level_encoding,
-                                    self.descr.max_rep_level(),
-                                );
-                                let total_bytes = rep_decoder.set_data(
-                                    self.num_buffered_values as usize,
-                                    buffer_ptr.all(),
-                                );
-                                buffer_ptr = buffer_ptr.start_from(total_bytes);
-                                self.rep_level_decoder = Some(rep_decoder);
-                            }
-
-                            if self.descr.max_def_level() > 0 {
-                                let mut def_decoder = LevelDecoder::v1(
-                                    def_level_encoding,
-                                    self.descr.max_def_level(),
-                                );
-                                let total_bytes = def_decoder.set_data(
-                                    self.num_buffered_values as usize,
-                                    buffer_ptr.all(),
-                                );
-                                buffer_ptr = buffer_ptr.start_from(total_bytes);
-                                self.def_level_decoder = Some(def_decoder);
-                            }
-
-                            // Data page v1 does not have offset, all content of buffer
-                            // should be passed
-                            self.set_current_page_encoding(
-                                encoding,
-                                &buffer_ptr,
-                                0,
-                                num_values as usize,
-                            )?;
-                            return Ok(true);
-                        }
-                        // 3. Data page v2
-                        Page::DataPageV2 {
-                            buf,
-                            num_values,
-                            encoding,
-                            num_nulls: _,
-                            num_rows: _,
-                            def_levels_byte_len,
-                            rep_levels_byte_len,
-                            is_compressed: _,
-                            statistics: _,
-                        } => {
-                            self.num_buffered_values = num_values;
-                            self.num_decoded_values = 0;
-
-                            let mut offset = 0;
-
-                            // DataPage v2 only supports RLE encoding for repetition
-                            // levels
-                            if self.descr.max_rep_level() > 0 {
-                                let mut rep_decoder =
-                                    LevelDecoder::v2(self.descr.max_rep_level());
-                                let bytes_read = rep_decoder.set_data_range(
-                                    self.num_buffered_values as usize,
-                                    &buf,
-                                    offset,
-                                    rep_levels_byte_len as usize,
-                                );
-                                offset += bytes_read;
-                                self.rep_level_decoder = Some(rep_decoder);
-                            }
-
-                            // DataPage v2 only supports RLE encoding for definition
-                            // levels
-                            if self.descr.max_def_level() > 0 {
-                                let mut def_decoder =
-                                    LevelDecoder::v2(self.descr.max_def_level());
-                                let bytes_read = def_decoder.set_data_range(
-                                    self.num_buffered_values as usize,
-                                    &buf,
-                                    offset,
-                                    def_levels_byte_len as usize,
-                                );
-                                offset += bytes_read;
-                                self.def_level_decoder = Some(def_decoder);
-                            }
-
-                            self.set_current_page_encoding(
-                                encoding,
-                                &buf,
-                                offset,
-                                num_values as usize,
-                            )?;
-                            return Ok(true);
-                        }
-                    };
-                }
-            }
-        }
-
-        Ok(true)
-    }
-
-    /// Resolves and updates encoding and set decoder for the current page
-    fn set_current_page_encoding(
-        &mut self,
-        mut encoding: Encoding,
-        buffer_ptr: &ByteBufferPtr,
-        offset: usize,
-        len: usize,
-    ) -> Result<()> {
-        if encoding == Encoding::PLAIN_DICTIONARY {
-            encoding = Encoding::RLE_DICTIONARY;
-        }
-
-        let decoder = if encoding == Encoding::RLE_DICTIONARY {
-            self.decoders
-                .get_mut(&encoding)
-                .expect("Decoder for dict should have been set")
-        } else {
-            // Search cache for data page decoder
-            #[allow(clippy::map_entry)]
-            if !self.decoders.contains_key(&encoding) {
-                // Initialize decoder for this page
-                let data_decoder = get_decoder::<T>(self.descr.clone(), encoding)?;
-                self.decoders.insert(encoding, data_decoder);
-            }
-            self.decoders.get_mut(&encoding).unwrap()
-        };
-
-        decoder.set_data(buffer_ptr.start_from(offset), len as usize)?;
-        self.current_encoding = Some(encoding);
-        Ok(())
-    }
-
-    #[inline]
-    fn has_next(&mut self) -> Result<bool> {
-        if self.num_buffered_values == 0
-            || self.num_buffered_values == self.num_decoded_values
-        {
-            // TODO: should we return false if read_new_page() = true and
-            // num_buffered_values = 0?
-            if !self.read_new_page()? {
-                Ok(false)
-            } else {
-                Ok(self.num_buffered_values != 0)
-            }
-        } else {
-            Ok(true)
-        }
-    }
-
-    #[inline]
-    fn read_rep_levels(&mut self, buffer: &mut [i16]) -> Result<usize> {
-        let level_decoder = self
-            .rep_level_decoder
-            .as_mut()
-            .expect("rep_level_decoder be set");
-        level_decoder.get(buffer)
-    }
-
-    #[inline]
-    fn read_def_levels(&mut self, buffer: &mut [i16]) -> Result<usize> {
-        let level_decoder = self
-            .def_level_decoder
-            .as_mut()
-            .expect("def_level_decoder be set");
-        level_decoder.get(buffer)
-    }
-
-    #[inline]
-    fn read_values(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        let encoding = self
-            .current_encoding
-            .expect("current_encoding should be set");
-        let current_decoder = self
-            .decoders
-            .get_mut(&encoding)
-            .unwrap_or_else(|| panic!("decoder for encoding {} should be set", encoding));
-        current_decoder.get(buffer)
-    }
-
-    #[inline]
-    fn configure_dictionary(&mut self, page: Page) -> Result<bool> {
-        let mut encoding = page.encoding();
-        if encoding == Encoding::PLAIN || encoding == Encoding::PLAIN_DICTIONARY {
-            encoding = Encoding::RLE_DICTIONARY
-        }
-
-        if self.decoders.contains_key(&encoding) {
-            return Err(general_err!("Column cannot have more than one dictionary"));
-        }
-
-        if encoding == Encoding::RLE_DICTIONARY {
-            let mut dictionary = PlainDecoder::<T>::new(self.descr.type_length());
-            let num_values = page.num_values();
-            dictionary.set_data(page.buffer().clone(), num_values as usize)?;
-
-            let mut decoder = DictDecoder::new();
-            decoder.set_dict(Box::new(dictionary))?;
-            self.decoders.insert(encoding, Box::new(decoder));
-            Ok(true)
-        } else {
-            Err(nyi_err!(
-                "Invalid/Unsupported encoding type for dictionary: {}",
-                encoding
-            ))
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::distributions::uniform::SampleUniform;
-    use std::{collections::VecDeque, sync::Arc, vec::IntoIter};
-
-    use crate::basic::Type as PhysicalType;
-    use crate::column::page::Page;
-    use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
-    use crate::util::test_common::make_pages;
-
-    const NUM_LEVELS: usize = 128;
-    const NUM_PAGES: usize = 2;
-    const MAX_DEF_LEVEL: i16 = 5;
-    const MAX_REP_LEVEL: i16 = 5;
-
-    // Macro to generate test cases
-    macro_rules! test {
-        // branch for generating i32 cases
-        ($test_func:ident, i32, $func:ident, $def_level:expr, $rep_level:expr,
-     $num_pages:expr, $num_levels:expr, $batch_size:expr, $min:expr, $max:expr) => {
-            test_internal!(
-                $test_func,
-                Int32Type,
-                get_test_int32_type,
-                $func,
-                $def_level,
-                $rep_level,
-                $num_pages,
-                $num_levels,
-                $batch_size,
-                $min,
-                $max
-            );
-        };
-        // branch for generating i64 cases
-        ($test_func:ident, i64, $func:ident, $def_level:expr, $rep_level:expr,
-     $num_pages:expr, $num_levels:expr, $batch_size:expr, $min:expr, $max:expr) => {
-            test_internal!(
-                $test_func,
-                Int64Type,
-                get_test_int64_type,
-                $func,
-                $def_level,
-                $rep_level,
-                $num_pages,
-                $num_levels,
-                $batch_size,
-                $min,
-                $max
-            );
-        };
-    }
-
-    macro_rules! test_internal {
-        ($test_func:ident, $ty:ident, $pty:ident, $func:ident, $def_level:expr,
-     $rep_level:expr, $num_pages:expr, $num_levels:expr, $batch_size:expr,
-     $min:expr, $max:expr) => {
-            #[test]
-            fn $test_func() {
-                let desc = Arc::new(ColumnDescriptor::new(
-                    Arc::new($pty()),
-                    $def_level,
-                    $rep_level,
-                    ColumnPath::new(Vec::new()),
-                ));
-                let mut tester = ColumnReaderTester::<$ty>::new();
-                tester.$func(desc, $num_pages, $num_levels, $batch_size, $min, $max);
-            }
-        };
-    }
-
-    test!(
-        test_read_plain_v1_int32,
-        i32,
-        plain_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32,
-        i32,
-        plain_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int32_uneven,
-        i32,
-        plain_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32_uneven,
-        i32,
-        plain_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int32_multi_page,
-        i32,
-        plain_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32_multi_page,
-        i32,
-        plain_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    // test cases when column descriptor has MAX_DEF_LEVEL = 0 and MAX_REP_LEVEL = 0
-    test!(
-        test_read_plain_v1_int32_required_non_repeated,
-        i32,
-        plain_v1,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32_required_non_repeated,
-        i32,
-        plain_v2,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int64,
-        i64,
-        plain_v1,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64,
-        i64,
-        plain_v2,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int64_uneven,
-        i64,
-        plain_v1,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64_uneven,
-        i64,
-        plain_v2,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int64_multi_page,
-        i64,
-        plain_v1,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64_multi_page,
-        i64,
-        plain_v2,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    // test cases when column descriptor has MAX_DEF_LEVEL = 0 and MAX_REP_LEVEL = 0
-    test!(
-        test_read_plain_v1_int64_required_non_repeated,
-        i64,
-        plain_v1,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64_required_non_repeated,
-        i64,
-        plain_v2,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    test!(
-        test_read_dict_v1_int32_small,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        2,
-        2,
-        16,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32_small,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        2,
-        2,
-        16,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int32,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int32_uneven,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32_uneven,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int32_multi_page,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32_multi_page,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int64,
-        i64,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int64,
-        i64,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-
-    #[test]
-    fn test_read_batch_values_only() {
-        test_read_batch_int32(16, &mut [0; 10], None, None); // < batch_size
-        test_read_batch_int32(16, &mut [0; 16], None, None); // == batch_size
-        test_read_batch_int32(16, &mut [0; 51], None, None); // > batch_size
-    }
-
-    #[test]
-    fn test_read_batch_values_def_levels() {
-        test_read_batch_int32(16, &mut [0; 10], Some(&mut [0; 10]), None);
-        test_read_batch_int32(16, &mut [0; 16], Some(&mut [0; 16]), None);
-        test_read_batch_int32(16, &mut [0; 51], Some(&mut [0; 51]), None);
-    }
-
-    #[test]
-    fn test_read_batch_values_rep_levels() {
-        test_read_batch_int32(16, &mut [0; 10], None, Some(&mut [0; 10]));
-        test_read_batch_int32(16, &mut [0; 16], None, Some(&mut [0; 16]));
-        test_read_batch_int32(16, &mut [0; 51], None, Some(&mut [0; 51]));
-    }
-
-    #[test]
-    fn test_read_batch_different_buf_sizes() {
-        test_read_batch_int32(16, &mut [0; 8], Some(&mut [0; 9]), Some(&mut [0; 7]));
-        test_read_batch_int32(16, &mut [0; 1], Some(&mut [0; 9]), Some(&mut [0; 3]));
-    }
-
-    #[test]
-    fn test_read_batch_values_def_rep_levels() {
-        test_read_batch_int32(
-            128,
-            &mut [0; 128],
-            Some(&mut [0; 128]),
-            Some(&mut [0; 128]),
-        );
-    }
-
-    #[test]
-    fn test_read_batch_adjust_after_buffering_page() {
-        // This test covers scenario when buffering new page results in setting number
-        // of decoded values to 0, resulting on reading `batch_size` of values, but it is
-        // larger than we can insert into slice (affects values and levels).
-        //
-        // Note: values are chosen to reproduce the issue.
-        //
-        let primitive_type = get_test_int32_type();
-        let desc = Arc::new(ColumnDescriptor::new(
-            Arc::new(primitive_type),
-            1,
-            1,
-            ColumnPath::new(Vec::new()),
-        ));
-
-        let num_pages = 2;
-        let num_levels = 4;
-        let batch_size = 5;
-        let values = &mut vec![0; 7];
-        let def_levels = &mut vec![0; 7];
-        let rep_levels = &mut vec![0; 7];
-
-        let mut tester = ColumnReaderTester::<Int32Type>::new();
-        tester.test_read_batch(
-            desc,
-            Encoding::RLE_DICTIONARY,
-            num_pages,
-            num_levels,
-            batch_size,
-            std::i32::MIN,
-            std::i32::MAX,
-            values,
-            Some(def_levels),
-            Some(rep_levels),
-            false,
-        );
-    }
-
-    // ----------------------------------------------------------------------
-    // Helper methods to make pages and test
-    //
-    // # Overview
-    //
-    // Most of the test functionality is implemented in `ColumnReaderTester`, which
-    // provides some general data page test methods:
-    // - `test_read_batch_general`
-    // - `test_read_batch`
-    //
-    // There are also some high level wrappers that are part of `ColumnReaderTester`:
-    // - `plain_v1` -> call `test_read_batch_general` with data page v1 and plain encoding
-    // - `plain_v2` -> call `test_read_batch_general` with data page v2 and plain encoding
-    // - `dict_v1` -> call `test_read_batch_general` with data page v1 + dictionary page
-    // - `dict_v2` -> call `test_read_batch_general` with data page v2 + dictionary page
-    //
-    // And even higher level wrappers that simplify testing of almost the same test cases:
-    // - `get_test_int32_type`, provides dummy schema type
-    // - `get_test_int64_type`, provides dummy schema type
-    // - `test_read_batch_int32`, wrapper for `read_batch` tests, since they are basically
-    //   the same, just different def/rep levels and batch size.
-    //
-    // # Page assembly
-    //
-    // Page construction and generation of values, definition and repetition levels
-    // happens in `make_pages` function.
-    // All values are randomly generated based on provided min/max, levels are calculated
-    // based on provided max level for column descriptor (which is basically either int32
-    // or int64 type in tests) and `levels_per_page` variable.
-    //
-    // We use `DataPageBuilder` and its implementation `DataPageBuilderImpl` to actually
-    // turn values, definition and repetition levels into data pages (either v1 or v2).
-    //
-    // Those data pages are then stored as part of `TestPageReader` (we just pass vector
-    // of generated pages directly), which implements `PageReader` interface.
-    //
-    // # Comparison
-    //
-    // This allows us to pass test page reader into column reader, so we can test
-    // functionality of column reader - see `test_read_batch`, where we create column
-    // reader -> typed column reader, buffer values in `read_batch` method and compare
-    // output with generated data.
-
-    // Returns dummy Parquet `Type` for primitive field, because most of our tests use
-    // INT32 physical type.
-    fn get_test_int32_type() -> SchemaType {
-        SchemaType::primitive_type_builder("a", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_32)
-            .with_length(-1)
-            .build()
-            .expect("build() should be OK")
-    }
-
-    // Returns dummy Parquet `Type` for INT64 physical type.
-    fn get_test_int64_type() -> SchemaType {
-        SchemaType::primitive_type_builder("a", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_64)
-            .with_length(-1)
-            .build()
-            .expect("build() should be OK")
-    }
-
-    // Tests `read_batch()` functionality for INT32.
-    //
-    // This is a high level wrapper on `ColumnReaderTester` that allows us to specify some
-    // boilerplate code for setting up definition/repetition levels and column descriptor.
-    fn test_read_batch_int32(
-        batch_size: usize,
-        values: &mut [i32],
-        def_levels: Option<&mut [i16]>,
-        rep_levels: Option<&mut [i16]>,
-    ) {
-        let primitive_type = get_test_int32_type();
-        // make field is required based on provided slices of levels
-        let max_def_level = if def_levels.is_some() {
-            MAX_DEF_LEVEL
-        } else {
-            0
-        };
-        let max_rep_level = if def_levels.is_some() {
-            MAX_REP_LEVEL
-        } else {
-            0
-        };
-
-        let desc = Arc::new(ColumnDescriptor::new(
-            Arc::new(primitive_type),
-            max_def_level,
-            max_rep_level,
-            ColumnPath::new(Vec::new()),
-        ));
-        let mut tester = ColumnReaderTester::<Int32Type>::new();
-        tester.test_read_batch(
-            desc,
-            Encoding::RLE_DICTIONARY,
-            NUM_PAGES,
-            NUM_LEVELS,
-            batch_size,
-            std::i32::MIN,
-            std::i32::MAX,
-            values,
-            def_levels,
-            rep_levels,
-            false,
-        );
-    }
-
-    struct ColumnReaderTester<T: DataType>
-    where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        rep_levels: Vec<i16>,
-        def_levels: Vec<i16>,
-        values: Vec<T::T>,
-    }
-
-    impl<T: DataType> ColumnReaderTester<T>
-    where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        pub fn new() -> Self {
-            Self {
-                rep_levels: Vec::new(),
-                def_levels: Vec::new(),
-                values: Vec::new(),
-            }
-        }
-
-        // Method to generate and test data pages v1
-        fn plain_v1(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::PLAIN,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                false,
-            );
-        }
-
-        // Method to generate and test data pages v2
-        fn plain_v2(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::PLAIN,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                true,
-            );
-        }
-
-        // Method to generate and test dictionary page + data pages v1
-        fn dict_v1(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::RLE_DICTIONARY,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                false,
-            );
-        }
-
-        // Method to generate and test dictionary page + data pages v2
-        fn dict_v2(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::RLE_DICTIONARY,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                true,
-            );
-        }
-
-        // Helper function for the general case of `read_batch()` where `values`,
-        // `def_levels` and `rep_levels` are always provided with enough space.
-        fn test_read_batch_general(
-            &mut self,
-            desc: ColumnDescPtr,
-            encoding: Encoding,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-            use_v2: bool,
-        ) {
-            let mut def_levels = vec![0; num_levels * num_pages];
-            let mut rep_levels = vec![0; num_levels * num_pages];
-            let mut values = vec![T::T::default(); num_levels * num_pages];
-            self.test_read_batch(
-                desc,
-                encoding,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                &mut values,
-                Some(&mut def_levels),
-                Some(&mut rep_levels),
-                use_v2,
-            );
-        }
-
-        // Helper function to test `read_batch()` method with custom buffers for values,
-        // definition and repetition levels.
-        fn test_read_batch(
-            &mut self,
-            desc: ColumnDescPtr,
-            encoding: Encoding,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-            values: &mut [T::T],
-            mut def_levels: Option<&mut [i16]>,
-            mut rep_levels: Option<&mut [i16]>,
-            use_v2: bool,
-        ) {
-            let mut pages = VecDeque::new();
-            make_pages::<T>(
-                desc.clone(),
-                encoding,
-                num_pages,
-                num_levels,
-                min,
-                max,
-                &mut self.def_levels,
-                &mut self.rep_levels,
-                &mut self.values,
-                &mut pages,
-                use_v2,
-            );
-            let max_def_level = desc.max_def_level();
-            let page_reader = TestPageReader::new(Vec::from(pages));
-            let column_reader: ColumnReader =
-                get_column_reader(desc, Box::new(page_reader));
-            let mut typed_column_reader = get_typed_column_reader::<T>(column_reader);
-
-            let mut curr_values_read = 0;
-            let mut curr_levels_read = 0;
-            let mut done = false;
-            while !done {
-                let actual_def_levels =
-                    def_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
-                let actual_rep_levels =
-                    rep_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
-
-                let (values_read, levels_read) = typed_column_reader
-                    .read_batch(
-                        batch_size,
-                        actual_def_levels,
-                        actual_rep_levels,
-                        &mut values[curr_values_read..],
-                    )
-                    .expect("read_batch() should be OK");
-
-                if values_read == 0 && levels_read == 0 {
-                    done = true;
-                }
-
-                curr_values_read += values_read;
-                curr_levels_read += levels_read;
-            }
-
-            assert!(
-                values.len() >= curr_values_read,
-                "values.len() >= values_read"
-            );
-            assert_eq!(
-                &values[0..curr_values_read],
-                &self.values[0..curr_values_read],
-                "values content doesn't match"
-            );
-
-            if let Some(ref levels) = def_levels {
-                assert!(
-                    levels.len() >= curr_levels_read,
-                    "def_levels.len() >= levels_read"
-                );
-                assert_eq!(
-                    &levels[0..curr_levels_read],
-                    &self.def_levels[0..curr_levels_read],
-                    "definition levels content doesn't match"
-                );
-            }
-
-            if let Some(ref levels) = rep_levels {
-                assert!(
-                    levels.len() >= curr_levels_read,
-                    "rep_levels.len() >= levels_read"
-                );
-                assert_eq!(
-                    &levels[0..curr_levels_read],
-                    &self.rep_levels[0..curr_levels_read],
-                    "repetition levels content doesn't match"
-                );
-            }
-
-            if def_levels.is_none() && rep_levels.is_none() {
-                assert!(
-                    curr_levels_read == 0,
-                    "expected to read 0 levels, found {}",
-                    curr_levels_read
-                );
-            } else if def_levels.is_some() && max_def_level > 0 {
-                assert!(
-                    curr_levels_read >= curr_values_read,
-                    "expected levels read to be greater than values read"
-                );
-            }
-        }
-    }
-
-    struct TestPageReader {
-        pages: IntoIter<Page>,
-    }
-
-    impl TestPageReader {
-        pub fn new(pages: Vec<Page>) -> Self {
-            Self {
-                pages: pages.into_iter(),
-            }
-        }
-    }
-
-    impl PageReader for TestPageReader {
-        fn get_next_page(&mut self) -> Result<Option<Page>> {
-            Ok(self.pages.next())
-        }
-    }
-}
diff --git a/rust/parquet/src/column/writer.rs b/rust/parquet/src/column/writer.rs
deleted file mode 100644
index 0b56594c0b6..00000000000
--- a/rust/parquet/src/column/writer.rs
+++ /dev/null
@@ -1,1908 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains column writer API.
-use std::{cmp, collections::VecDeque, convert::TryFrom, marker::PhantomData, sync::Arc};
-
-use crate::basic::{Compression, Encoding, PageType, Type};
-use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
-use crate::compression::{create_codec, Codec};
-use crate::data_type::AsBytes;
-use crate::data_type::*;
-use crate::encodings::{
-    encoding::{get_encoder, DictEncoder, Encoder},
-    levels::{max_buffer_size, LevelEncoder},
-};
-use crate::errors::{ParquetError, Result};
-use crate::file::statistics::Statistics;
-use crate::file::{
-    metadata::ColumnChunkMetaData,
-    properties::{WriterProperties, WriterPropertiesPtr, WriterVersion},
-};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::bit_util::FromBytes;
-use crate::util::memory::{ByteBufferPtr, MemTracker};
-
-/// Column writer for a Parquet type.
-pub enum ColumnWriter {
-    BoolColumnWriter(ColumnWriterImpl<BoolType>),
-    Int32ColumnWriter(ColumnWriterImpl<Int32Type>),
-    Int64ColumnWriter(ColumnWriterImpl<Int64Type>),
-    Int96ColumnWriter(ColumnWriterImpl<Int96Type>),
-    FloatColumnWriter(ColumnWriterImpl<FloatType>),
-    DoubleColumnWriter(ColumnWriterImpl<DoubleType>),
-    ByteArrayColumnWriter(ColumnWriterImpl<ByteArrayType>),
-    FixedLenByteArrayColumnWriter(ColumnWriterImpl<FixedLenByteArrayType>),
-}
-
-pub enum Level {
-    Page,
-    Column,
-}
-
-macro_rules! gen_stats_section {
-    ($physical_ty: ty, $stat_fn: ident, $min: ident, $max: ident, $distinct: ident, $nulls: ident) => {{
-        let min = $min.as_ref().and_then(|v| {
-            Some(read_num_bytes!(
-                $physical_ty,
-                v.as_bytes().len(),
-                &v.as_bytes()
-            ))
-        });
-        let max = $max.as_ref().and_then(|v| {
-            Some(read_num_bytes!(
-                $physical_ty,
-                v.as_bytes().len(),
-                &v.as_bytes()
-            ))
-        });
-        Statistics::$stat_fn(min, max, $distinct, $nulls, false)
-    }};
-}
-
-/// Gets a specific column writer corresponding to column descriptor `descr`.
-pub fn get_column_writer(
-    descr: ColumnDescPtr,
-    props: WriterPropertiesPtr,
-    page_writer: Box<dyn PageWriter>,
-) -> ColumnWriter {
-    match descr.physical_type() {
-        Type::BOOLEAN => ColumnWriter::BoolColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT32 => ColumnWriter::Int32ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT64 => ColumnWriter::Int64ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT96 => ColumnWriter::Int96ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::FLOAT => ColumnWriter::FloatColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::DOUBLE => ColumnWriter::DoubleColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::BYTE_ARRAY => ColumnWriter::ByteArrayColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::FIXED_LEN_BYTE_ARRAY => ColumnWriter::FixedLenByteArrayColumnWriter(
-            ColumnWriterImpl::new(descr, props, page_writer),
-        ),
-    }
-}
-
-/// Gets a typed column writer for the specific type `T`, by "up-casting" `col_writer` of
-/// non-generic type to a generic column writer type `ColumnWriterImpl`.
-///
-/// Panics if actual enum value for `col_writer` does not match the type `T`.
-pub fn get_typed_column_writer<T: DataType>(
-    col_writer: ColumnWriter,
-) -> ColumnWriterImpl<T> {
-    T::get_column_writer(col_writer).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column writer into a typed column writer for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Similar to `get_typed_column_writer` but returns a reference.
-pub fn get_typed_column_writer_ref<T: DataType>(
-    col_writer: &ColumnWriter,
-) -> &ColumnWriterImpl<T> {
-    T::get_column_writer_ref(col_writer).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column writer into a typed column writer for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Similar to `get_typed_column_writer` but returns a reference.
-pub fn get_typed_column_writer_mut<T: DataType>(
-    col_writer: &mut ColumnWriter,
-) -> &mut ColumnWriterImpl<T> {
-    T::get_column_writer_mut(col_writer).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column writer into a typed column writer for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Typed column writer for a primitive column.
-pub struct ColumnWriterImpl<T: DataType> {
-    // Column writer properties
-    descr: ColumnDescPtr,
-    props: WriterPropertiesPtr,
-    page_writer: Box<dyn PageWriter>,
-    has_dictionary: bool,
-    dict_encoder: Option<DictEncoder<T>>,
-    encoder: Box<dyn Encoder<T>>,
-    codec: Compression,
-    compressor: Option<Box<dyn Codec>>,
-    // Metrics per page
-    num_buffered_values: u32,
-    num_buffered_encoded_values: u32,
-    num_buffered_rows: u32,
-    min_page_value: Option<T::T>,
-    max_page_value: Option<T::T>,
-    num_page_nulls: u64,
-    page_distinct_count: Option<u64>,
-    // Metrics per column writer
-    total_bytes_written: u64,
-    total_rows_written: u64,
-    total_uncompressed_size: u64,
-    total_compressed_size: u64,
-    total_num_values: u64,
-    dictionary_page_offset: Option<u64>,
-    data_page_offset: Option<u64>,
-    min_column_value: Option<T::T>,
-    max_column_value: Option<T::T>,
-    num_column_nulls: u64,
-    column_distinct_count: Option<u64>,
-    // Reused buffers
-    def_levels_sink: Vec<i16>,
-    rep_levels_sink: Vec<i16>,
-    data_pages: VecDeque<CompressedPage>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> ColumnWriterImpl<T> {
-    pub fn new(
-        descr: ColumnDescPtr,
-        props: WriterPropertiesPtr,
-        page_writer: Box<dyn PageWriter>,
-    ) -> Self {
-        let codec = props.compression(descr.path());
-        let compressor = create_codec(codec).unwrap();
-
-        // Optionally set dictionary encoder.
-        let dict_encoder = if props.dictionary_enabled(descr.path())
-            && has_dictionary_support(T::get_physical_type(), &props)
-        {
-            Some(DictEncoder::new(descr.clone(), Arc::new(MemTracker::new())))
-        } else {
-            None
-        };
-
-        // Whether or not this column writer has a dictionary encoding.
-        let has_dictionary = dict_encoder.is_some();
-
-        // Set either main encoder or fallback encoder.
-        let fallback_encoder = get_encoder(
-            descr.clone(),
-            props
-                .encoding(descr.path())
-                .unwrap_or_else(|| fallback_encoding(T::get_physical_type(), &props)),
-            Arc::new(MemTracker::new()),
-        )
-        .unwrap();
-
-        Self {
-            descr,
-            props,
-            page_writer,
-            has_dictionary,
-            dict_encoder,
-            encoder: fallback_encoder,
-            codec,
-            compressor,
-            num_buffered_values: 0,
-            num_buffered_encoded_values: 0,
-            num_buffered_rows: 0,
-            total_bytes_written: 0,
-            total_rows_written: 0,
-            total_uncompressed_size: 0,
-            total_compressed_size: 0,
-            total_num_values: 0,
-            dictionary_page_offset: None,
-            data_page_offset: None,
-            def_levels_sink: vec![],
-            rep_levels_sink: vec![],
-            data_pages: VecDeque::new(),
-            min_page_value: None,
-            max_page_value: None,
-            num_page_nulls: 0,
-            page_distinct_count: None,
-            min_column_value: None,
-            max_column_value: None,
-            num_column_nulls: 0,
-            column_distinct_count: None,
-            _phantom: PhantomData,
-        }
-    }
-
-    fn write_batch_internal(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-        min: &Option<T::T>,
-        max: &Option<T::T>,
-        null_count: Option<u64>,
-        distinct_count: Option<u64>,
-    ) -> Result<usize> {
-        // We check for DataPage limits only after we have inserted the values. If a user
-        // writes a large number of values, the DataPage size can be well above the limit.
-        //
-        // The purpose of this chunking is to bound this. Even if a user writes large
-        // number of values, the chunking will ensure that we add data page at a
-        // reasonable pagesize limit.
-
-        // TODO: find out why we don't account for size of levels when we estimate page
-        // size.
-
-        // Find out the minimal length to prevent index out of bound errors.
-        let mut min_len = values.len();
-        if let Some(levels) = def_levels {
-            min_len = cmp::min(min_len, levels.len());
-        }
-        if let Some(levels) = rep_levels {
-            min_len = cmp::min(min_len, levels.len());
-        }
-
-        // Find out number of batches to process.
-        let write_batch_size = self.props.write_batch_size();
-        let num_batches = min_len / write_batch_size;
-
-        // Process pre-calculated statistics
-        match (min, max) {
-            (Some(min), Some(max)) => {
-                if self.min_column_value.as_ref().map_or(true, |v| v > min) {
-                    self.min_column_value = Some(min.clone());
-                }
-                if self.max_column_value.as_ref().map_or(true, |v| v < max) {
-                    self.max_column_value = Some(max.clone());
-                }
-            }
-            (None, Some(_)) | (Some(_), None) => {
-                panic!("min/max should be both set or both None")
-            }
-            (None, None) => {}
-        }
-
-        if let Some(distinct) = distinct_count {
-            self.column_distinct_count =
-                Some(self.column_distinct_count.unwrap_or(0) + distinct);
-        }
-
-        if let Some(nulls) = null_count {
-            self.num_column_nulls += nulls;
-        }
-
-        let calculate_page_stats = (min.is_none() || max.is_none())
-            && null_count.is_none()
-            && distinct_count.is_none();
-
-        let mut values_offset = 0;
-        let mut levels_offset = 0;
-        for _ in 0..num_batches {
-            values_offset += self.write_mini_batch(
-                &values[values_offset..values_offset + write_batch_size],
-                def_levels.map(|lv| &lv[levels_offset..levels_offset + write_batch_size]),
-                rep_levels.map(|lv| &lv[levels_offset..levels_offset + write_batch_size]),
-                calculate_page_stats,
-            )?;
-            levels_offset += write_batch_size;
-        }
-
-        values_offset += self.write_mini_batch(
-            &values[values_offset..],
-            def_levels.map(|lv| &lv[levels_offset..]),
-            rep_levels.map(|lv| &lv[levels_offset..]),
-            calculate_page_stats,
-        )?;
-
-        // Return total number of values processed.
-        Ok(values_offset)
-    }
-
-    /// Writes batch of values, definition levels and repetition levels.
-    /// Returns number of values processed (written).
-    ///
-    /// If definition and repetition levels are provided, we write fully those levels and
-    /// select how many values to write (this number will be returned), since number of
-    /// actual written values may be smaller than provided values.
-    ///
-    /// If only values are provided, then all values are written and the length of
-    /// of the values buffer is returned.
-    ///
-    /// Definition and/or repetition levels can be omitted, if values are
-    /// non-nullable and/or non-repeated.
-    pub fn write_batch(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-    ) -> Result<usize> {
-        self.write_batch_internal(
-            values, def_levels, rep_levels, &None, &None, None, None,
-        )
-    }
-
-    /// Writer may optionally provide pre-calculated statistics for this batch, in which case we do
-    /// not calculate page level statistics as this will defeat the purpose of speeding up the write
-    /// process with pre-calculated statistics.
-    pub fn write_batch_with_statistics(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-        min: &Option<T::T>,
-        max: &Option<T::T>,
-        nulls_count: Option<u64>,
-        distinct_count: Option<u64>,
-    ) -> Result<usize> {
-        self.write_batch_internal(
-            values,
-            def_levels,
-            rep_levels,
-            min,
-            max,
-            nulls_count,
-            distinct_count,
-        )
-    }
-
-    /// Returns total number of bytes written by this column writer so far.
-    /// This value is also returned when column writer is closed.
-    pub fn get_total_bytes_written(&self) -> u64 {
-        self.total_bytes_written
-    }
-
-    /// Returns total number of rows written by this column writer so far.
-    /// This value is also returned when column writer is closed.
-    pub fn get_total_rows_written(&self) -> u64 {
-        self.total_rows_written
-    }
-
-    /// Finalises writes and closes the column writer.
-    /// Returns total bytes written, total rows written and column chunk metadata.
-    pub fn close(mut self) -> Result<(u64, u64, ColumnChunkMetaData)> {
-        if self.dict_encoder.is_some() {
-            self.write_dictionary_page()?;
-        }
-        self.flush_data_pages()?;
-        let metadata = self.write_column_metadata()?;
-        self.dict_encoder = None;
-        self.page_writer.close()?;
-
-        Ok((self.total_bytes_written, self.total_rows_written, metadata))
-    }
-
-    /// Writes mini batch of values, definition and repetition levels.
-    /// This allows fine-grained processing of values and maintaining a reasonable
-    /// page size.
-    fn write_mini_batch(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-        calculate_page_stats: bool,
-    ) -> Result<usize> {
-        let mut values_to_write = 0;
-
-        // Check if number of definition levels is the same as number of repetition
-        // levels.
-        if let (Some(def), Some(rep)) = (def_levels, rep_levels) {
-            if def.len() != rep.len() {
-                return Err(general_err!(
-                    "Inconsistent length of definition and repetition levels: {} != {}",
-                    def.len(),
-                    rep.len()
-                ));
-            }
-        }
-
-        // Process definition levels and determine how many values to write.
-        let num_values = if self.descr.max_def_level() > 0 {
-            let levels = def_levels.ok_or_else(|| {
-                general_err!(
-                    "Definition levels are required, because max definition level = {}",
-                    self.descr.max_def_level()
-                )
-            })?;
-
-            for &level in levels {
-                if level == self.descr.max_def_level() {
-                    values_to_write += 1;
-                } else if calculate_page_stats {
-                    self.num_page_nulls += 1
-                }
-            }
-
-            self.write_definition_levels(levels);
-            u32::try_from(levels.len()).unwrap()
-        } else {
-            values_to_write = values.len();
-            u32::try_from(values_to_write).unwrap()
-        };
-
-        // Process repetition levels and determine how many rows we are about to process.
-        if self.descr.max_rep_level() > 0 {
-            // A row could contain more than one value.
-            let levels = rep_levels.ok_or_else(|| {
-                general_err!(
-                    "Repetition levels are required, because max repetition level = {}",
-                    self.descr.max_rep_level()
-                )
-            })?;
-
-            // Count the occasions where we start a new row
-            for &level in levels {
-                self.num_buffered_rows += (level == 0) as u32
-            }
-
-            self.write_repetition_levels(levels);
-        } else {
-            // Each value is exactly one row.
-            // Equals to the number of values, we count nulls as well.
-            self.num_buffered_rows += num_values;
-        }
-
-        // Check that we have enough values to write.
-        let values_to_write = values.get(0..values_to_write).ok_or_else(|| {
-            general_err!(
-                "Expected to write {} values, but have only {}",
-                values_to_write,
-                values.len()
-            )
-        })?;
-
-        if calculate_page_stats {
-            for val in values_to_write {
-                self.update_page_min_max(val);
-            }
-        }
-
-        self.write_values(values_to_write)?;
-
-        self.num_buffered_values += num_values;
-        self.num_buffered_encoded_values += u32::try_from(values_to_write.len()).unwrap();
-
-        if self.should_add_data_page() {
-            self.add_data_page(calculate_page_stats)?;
-        }
-
-        if self.should_dict_fallback() {
-            self.dict_fallback()?;
-        }
-
-        Ok(values_to_write.len())
-    }
-
-    #[inline]
-    fn write_definition_levels(&mut self, def_levels: &[i16]) {
-        self.def_levels_sink.extend_from_slice(def_levels);
-    }
-
-    #[inline]
-    fn write_repetition_levels(&mut self, rep_levels: &[i16]) {
-        self.rep_levels_sink.extend_from_slice(rep_levels);
-    }
-
-    #[inline]
-    fn write_values(&mut self, values: &[T::T]) -> Result<()> {
-        match self.dict_encoder {
-            Some(ref mut encoder) => encoder.put(values),
-            None => self.encoder.put(values),
-        }
-    }
-
-    /// Returns true if we need to fall back to non-dictionary encoding.
-    ///
-    /// We can only fall back if dictionary encoder is set and we have exceeded dictionary
-    /// size.
-    #[inline]
-    fn should_dict_fallback(&self) -> bool {
-        match self.dict_encoder {
-            Some(ref encoder) => {
-                encoder.dict_encoded_size() >= self.props.dictionary_pagesize_limit()
-            }
-            None => false,
-        }
-    }
-
-    /// Returns true if there is enough data for a data page, false otherwise.
-    #[inline]
-    fn should_add_data_page(&self) -> bool {
-        match self.dict_encoder {
-            Some(ref encoder) => {
-                encoder.estimated_data_encoded_size() >= self.props.data_pagesize_limit()
-            }
-            None => {
-                self.encoder.estimated_data_encoded_size()
-                    >= self.props.data_pagesize_limit()
-            }
-        }
-    }
-
-    /// Performs dictionary fallback.
-    /// Prepares and writes dictionary and all data pages into page writer.
-    fn dict_fallback(&mut self) -> Result<()> {
-        // At this point we know that we need to fall back.
-        self.write_dictionary_page()?;
-        self.flush_data_pages()?;
-        self.dict_encoder = None;
-        Ok(())
-    }
-
-    /// Adds data page.
-    /// Data page is either buffered in case of dictionary encoding or written directly.
-    fn add_data_page(&mut self, calculate_page_stat: bool) -> Result<()> {
-        // Extract encoded values
-        let value_bytes = match self.dict_encoder {
-            Some(ref mut encoder) => encoder.write_indices()?,
-            None => self.encoder.flush_buffer()?,
-        };
-
-        // Select encoding based on current encoder and writer version (v1 or v2).
-        let encoding = if self.dict_encoder.is_some() {
-            self.props.dictionary_data_page_encoding()
-        } else {
-            self.encoder.encoding()
-        };
-
-        let max_def_level = self.descr.max_def_level();
-        let max_rep_level = self.descr.max_rep_level();
-
-        let page_statistics = if calculate_page_stat {
-            self.update_column_min_max();
-            self.num_column_nulls += self.num_page_nulls;
-            Some(self.make_page_statistics())
-        } else {
-            None
-        };
-
-        let compressed_page = match self.props.writer_version() {
-            WriterVersion::PARQUET_1_0 => {
-                let mut buffer = vec![];
-
-                if max_rep_level > 0 {
-                    buffer.extend_from_slice(
-                        &self.encode_levels_v1(
-                            Encoding::RLE,
-                            &self.rep_levels_sink[..],
-                            max_rep_level,
-                        )?[..],
-                    );
-                }
-
-                if max_def_level > 0 {
-                    buffer.extend_from_slice(
-                        &self.encode_levels_v1(
-                            Encoding::RLE,
-                            &self.def_levels_sink[..],
-                            max_def_level,
-                        )?[..],
-                    );
-                }
-
-                buffer.extend_from_slice(value_bytes.data());
-                let uncompressed_size = buffer.len();
-
-                if let Some(ref mut cmpr) = self.compressor {
-                    let mut compressed_buf = Vec::with_capacity(value_bytes.data().len());
-                    cmpr.compress(&buffer[..], &mut compressed_buf)?;
-                    buffer = compressed_buf;
-                }
-
-                let data_page = Page::DataPage {
-                    buf: ByteBufferPtr::new(buffer),
-                    num_values: self.num_buffered_values,
-                    encoding,
-                    def_level_encoding: Encoding::RLE,
-                    rep_level_encoding: Encoding::RLE,
-                    statistics: page_statistics,
-                };
-
-                CompressedPage::new(data_page, uncompressed_size)
-            }
-            WriterVersion::PARQUET_2_0 => {
-                let mut rep_levels_byte_len = 0;
-                let mut def_levels_byte_len = 0;
-                let mut buffer = vec![];
-
-                if max_rep_level > 0 {
-                    let levels =
-                        self.encode_levels_v2(&self.rep_levels_sink[..], max_rep_level)?;
-                    rep_levels_byte_len = levels.len();
-                    buffer.extend_from_slice(&levels[..]);
-                }
-
-                if max_def_level > 0 {
-                    let levels =
-                        self.encode_levels_v2(&self.def_levels_sink[..], max_def_level)?;
-                    def_levels_byte_len = levels.len();
-                    buffer.extend_from_slice(&levels[..]);
-                }
-
-                let uncompressed_size =
-                    rep_levels_byte_len + def_levels_byte_len + value_bytes.len();
-
-                // Data Page v2 compresses values only.
-                match self.compressor {
-                    Some(ref mut cmpr) => {
-                        cmpr.compress(value_bytes.data(), &mut buffer)?;
-                    }
-                    None => buffer.extend_from_slice(value_bytes.data()),
-                }
-
-                let data_page = Page::DataPageV2 {
-                    buf: ByteBufferPtr::new(buffer),
-                    num_values: self.num_buffered_values,
-                    encoding,
-                    num_nulls: self.num_buffered_values
-                        - self.num_buffered_encoded_values,
-                    num_rows: self.num_buffered_rows,
-                    def_levels_byte_len: def_levels_byte_len as u32,
-                    rep_levels_byte_len: rep_levels_byte_len as u32,
-                    is_compressed: self.compressor.is_some(),
-                    statistics: page_statistics,
-                };
-
-                CompressedPage::new(data_page, uncompressed_size)
-            }
-        };
-
-        // Check if we need to buffer data page or flush it to the sink directly.
-        if self.dict_encoder.is_some() {
-            self.data_pages.push_back(compressed_page);
-        } else {
-            self.write_data_page(compressed_page)?;
-        }
-
-        // Update total number of rows.
-        self.total_rows_written += self.num_buffered_rows as u64;
-
-        // Reset state.
-        self.rep_levels_sink.clear();
-        self.def_levels_sink.clear();
-        self.num_buffered_values = 0;
-        self.num_buffered_encoded_values = 0;
-        self.num_buffered_rows = 0;
-        self.min_page_value = None;
-        self.max_page_value = None;
-        self.num_page_nulls = 0;
-        self.page_distinct_count = None;
-
-        Ok(())
-    }
-
-    /// Finalises any outstanding data pages and flushes buffered data pages from
-    /// dictionary encoding into underlying sink.
-    #[inline]
-    fn flush_data_pages(&mut self) -> Result<()> {
-        // Write all outstanding data to a new page.
-        let calculate_page_stats =
-            self.min_page_value.is_some() && self.max_page_value.is_some();
-        if self.num_buffered_values > 0 {
-            self.add_data_page(calculate_page_stats)?;
-        }
-
-        while let Some(page) = self.data_pages.pop_front() {
-            self.write_data_page(page)?;
-        }
-
-        Ok(())
-    }
-
-    /// Assembles and writes column chunk metadata.
-    fn write_column_metadata(&mut self) -> Result<ColumnChunkMetaData> {
-        let total_compressed_size = self.total_compressed_size as i64;
-        let total_uncompressed_size = self.total_uncompressed_size as i64;
-        let num_values = self.total_num_values as i64;
-        let dict_page_offset = self.dictionary_page_offset.map(|v| v as i64);
-        // If data page offset is not set, then no pages have been written
-        let data_page_offset = self.data_page_offset.unwrap_or(0) as i64;
-
-        let file_offset;
-        let mut encodings = Vec::new();
-
-        if self.has_dictionary {
-            assert!(dict_page_offset.is_some(), "Dictionary offset is not set");
-            file_offset = dict_page_offset.unwrap() + total_compressed_size;
-            // NOTE: This should be in sync with writing dictionary pages.
-            encodings.push(self.props.dictionary_page_encoding());
-            encodings.push(self.props.dictionary_data_page_encoding());
-            // Fallback to alternative encoding, add it to the list.
-            if self.dict_encoder.is_none() {
-                encodings.push(self.encoder.encoding());
-            }
-        } else {
-            file_offset = data_page_offset + total_compressed_size;
-            encodings.push(self.encoder.encoding());
-        }
-        // We use only RLE level encoding for data page v1 and data page v2.
-        encodings.push(Encoding::RLE);
-
-        let statistics = self.make_column_statistics();
-        let metadata = ColumnChunkMetaData::builder(self.descr.clone())
-            .set_compression(self.codec)
-            .set_encodings(encodings)
-            .set_file_offset(file_offset)
-            .set_total_compressed_size(total_compressed_size)
-            .set_total_uncompressed_size(total_uncompressed_size)
-            .set_num_values(num_values)
-            .set_data_page_offset(data_page_offset)
-            .set_dictionary_page_offset(dict_page_offset)
-            .set_statistics(statistics)
-            .build()?;
-
-        self.page_writer.write_metadata(&metadata)?;
-
-        Ok(metadata)
-    }
-
-    /// Encodes definition or repetition levels for Data Page v1.
-    #[inline]
-    fn encode_levels_v1(
-        &self,
-        encoding: Encoding,
-        levels: &[i16],
-        max_level: i16,
-    ) -> Result<Vec<u8>> {
-        let size = max_buffer_size(encoding, max_level, levels.len());
-        let mut encoder = LevelEncoder::v1(encoding, max_level, vec![0; size]);
-        encoder.put(&levels)?;
-        encoder.consume()
-    }
-
-    /// Encodes definition or repetition levels for Data Page v2.
-    /// Encoding is always RLE.
-    #[inline]
-    fn encode_levels_v2(&self, levels: &[i16], max_level: i16) -> Result<Vec<u8>> {
-        let size = max_buffer_size(Encoding::RLE, max_level, levels.len());
-        let mut encoder = LevelEncoder::v2(max_level, vec![0; size]);
-        encoder.put(&levels)?;
-        encoder.consume()
-    }
-
-    /// Writes compressed data page into underlying sink and updates global metrics.
-    #[inline]
-    fn write_data_page(&mut self, page: CompressedPage) -> Result<()> {
-        let page_spec = self.page_writer.write_page(page)?;
-        self.update_metrics_for_page(page_spec);
-        Ok(())
-    }
-
-    /// Writes dictionary page into underlying sink.
-    #[inline]
-    fn write_dictionary_page(&mut self) -> Result<()> {
-        let compressed_page = {
-            let encoder = self
-                .dict_encoder
-                .as_ref()
-                .ok_or_else(|| general_err!("Dictionary encoder is not set"))?;
-
-            let is_sorted = encoder.is_sorted();
-            let num_values = encoder.num_entries();
-            let mut values_buf = encoder.write_dict()?;
-            let uncompressed_size = values_buf.len();
-
-            if let Some(ref mut cmpr) = self.compressor {
-                let mut output_buf = Vec::with_capacity(uncompressed_size);
-                cmpr.compress(values_buf.data(), &mut output_buf)?;
-                values_buf = ByteBufferPtr::new(output_buf);
-            }
-
-            let dict_page = Page::DictionaryPage {
-                buf: values_buf,
-                num_values: num_values as u32,
-                encoding: self.props.dictionary_page_encoding(),
-                is_sorted,
-            };
-            CompressedPage::new(dict_page, uncompressed_size)
-        };
-
-        let page_spec = self.page_writer.write_page(compressed_page)?;
-        self.update_metrics_for_page(page_spec);
-        Ok(())
-    }
-
-    /// Updates column writer metrics with each page metadata.
-    #[inline]
-    fn update_metrics_for_page(&mut self, page_spec: PageWriteSpec) {
-        self.total_uncompressed_size += page_spec.uncompressed_size as u64;
-        self.total_compressed_size += page_spec.compressed_size as u64;
-        self.total_num_values += page_spec.num_values as u64;
-        self.total_bytes_written += page_spec.bytes_written;
-
-        match page_spec.page_type {
-            PageType::DATA_PAGE | PageType::DATA_PAGE_V2 => {
-                if self.data_page_offset.is_none() {
-                    self.data_page_offset = Some(page_spec.offset);
-                }
-            }
-            PageType::DICTIONARY_PAGE => {
-                assert!(
-                    self.dictionary_page_offset.is_none(),
-                    "Dictionary offset is already set"
-                );
-                self.dictionary_page_offset = Some(page_spec.offset);
-            }
-            _ => {}
-        }
-    }
-
-    /// Returns reference to the underlying page writer.
-    /// This method is intended to use in tests only.
-    fn get_page_writer_ref(&self) -> &dyn PageWriter {
-        self.page_writer.as_ref()
-    }
-
-    fn make_column_statistics(&self) -> Statistics {
-        self.make_typed_statistics(Level::Column)
-    }
-
-    fn make_page_statistics(&self) -> Statistics {
-        self.make_typed_statistics(Level::Page)
-    }
-
-    pub fn make_typed_statistics(&self, level: Level) -> Statistics {
-        let (min, max, distinct, nulls) = match level {
-            Level::Page => (
-                self.min_page_value.as_ref(),
-                self.max_page_value.as_ref(),
-                self.page_distinct_count,
-                self.num_page_nulls,
-            ),
-            Level::Column => (
-                self.min_column_value.as_ref(),
-                self.max_column_value.as_ref(),
-                self.column_distinct_count,
-                self.num_column_nulls,
-            ),
-        };
-        match self.descr.physical_type() {
-            Type::INT32 => gen_stats_section!(i32, int32, min, max, distinct, nulls),
-            Type::BOOLEAN => gen_stats_section!(i32, int32, min, max, distinct, nulls),
-            Type::INT64 => gen_stats_section!(i64, int64, min, max, distinct, nulls),
-            Type::INT96 => gen_stats_section!(Int96, int96, min, max, distinct, nulls),
-            Type::FLOAT => gen_stats_section!(f32, float, min, max, distinct, nulls),
-            Type::DOUBLE => gen_stats_section!(f64, double, min, max, distinct, nulls),
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
-                let min = min.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
-                let max = max.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
-                Statistics::byte_array(min, max, distinct, nulls, false)
-            }
-        }
-    }
-
-    fn update_page_min_max(&mut self, val: &T::T) {
-        if self.min_page_value.as_ref().map_or(true, |min| min > val) {
-            self.min_page_value = Some(val.clone());
-        }
-        if self.max_page_value.as_ref().map_or(true, |max| max < val) {
-            self.max_page_value = Some(val.clone());
-        }
-    }
-
-    fn update_column_min_max(&mut self) {
-        if self
-            .min_column_value
-            .as_ref()
-            .map_or(true, |min| min > self.min_page_value.as_ref().unwrap())
-        {
-            self.min_column_value = self.min_page_value.clone();
-        }
-        if self
-            .max_column_value
-            .as_ref()
-            .map_or(true, |max| max < self.max_page_value.as_ref().unwrap())
-        {
-            self.max_column_value = self.max_page_value.clone();
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// Encoding support for column writer.
-// This mirrors parquet-mr default encodings for writes. See:
-// https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV1ValuesWriterFactory.java
-// https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV2ValuesWriterFactory.java
-
-/// Trait to define default encoding for types, including whether or not the type
-/// supports dictionary encoding.
-trait EncodingWriteSupport {
-    /// Returns true if dictionary is supported for column writer, false otherwise.
-    fn has_dictionary_support(props: &WriterProperties) -> bool;
-}
-
-/// Returns encoding for a column when no other encoding is provided in writer properties.
-fn fallback_encoding(kind: Type, props: &WriterProperties) -> Encoding {
-    match (kind, props.writer_version()) {
-        (Type::BOOLEAN, WriterVersion::PARQUET_2_0) => Encoding::RLE,
-        (Type::INT32, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
-        (Type::INT64, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
-        (Type::BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
-        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => {
-            Encoding::DELTA_BYTE_ARRAY
-        }
-        _ => Encoding::PLAIN,
-    }
-}
-
-/// Returns true if dictionary is supported for column writer, false otherwise.
-fn has_dictionary_support(kind: Type, props: &WriterProperties) -> bool {
-    match (kind, props.writer_version()) {
-        // Booleans do not support dict encoding and should use a fallback encoding.
-        (Type::BOOLEAN, _) => false,
-        // Dictionary encoding was not enabled in PARQUET 1.0
-        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_1_0) => false,
-        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => true,
-        _ => true,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use rand::distributions::uniform::SampleUniform;
-
-    use crate::column::{
-        page::PageReader,
-        reader::{get_column_reader, get_typed_column_reader, ColumnReaderImpl},
-    };
-    use crate::file::{
-        properties::WriterProperties, reader::SerializedPageReader,
-        writer::SerializedPageWriter,
-    };
-    use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
-    use crate::util::{
-        io::{FileSink, FileSource},
-        test_common::{get_temp_file, random_numbers_range},
-    };
-
-    use super::*;
-
-    #[test]
-    fn test_column_writer_inconsistent_def_rep_length() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 1, props);
-        let res = writer.write_batch(&[1, 2, 3, 4], Some(&[1, 1, 1]), Some(&[0, 0]));
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Inconsistent length of definition and repetition levels: 3 != 2"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_invalid_def_levels() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
-        let res = writer.write_batch(&[1, 2, 3, 4], None, None);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Definition levels are required, because max definition level = 1"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_invalid_rep_levels() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 1, props);
-        let res = writer.write_batch(&[1, 2, 3, 4], None, None);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Repetition levels are required, because max repetition level = 1"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_not_enough_values_to_write() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
-        let res = writer.write_batch(&[1, 2], Some(&[1, 1, 1, 1]), None);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Expected to write 4 values, but have only 2"
-            );
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary offset is already set")]
-    fn test_column_writer_write_only_one_dictionary_page() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
-        // First page should be correctly written.
-        let res = writer.write_dictionary_page();
-        assert!(res.is_ok());
-        writer.write_dictionary_page().unwrap();
-    }
-
-    #[test]
-    fn test_column_writer_error_when_writing_disabled_dictionary() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_dictionary_enabled(false)
-                .build(),
-        );
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
-        let res = writer.write_dictionary_page();
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Dictionary encoder is not set"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_boolean_type_does_not_support_dictionary() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_dictionary_enabled(true)
-                .build(),
-        );
-        let mut writer = get_test_column_writer::<BoolType>(page_writer, 0, 0, props);
-        writer
-            .write_batch(&[true, false, true, false], None, None)
-            .unwrap();
-
-        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
-        // PlainEncoder uses bit writer to write boolean values, which all fit into 1
-        // byte.
-        assert_eq!(bytes_written, 1);
-        assert_eq!(rows_written, 4);
-        assert_eq!(metadata.encodings(), &vec![Encoding::PLAIN, Encoding::RLE]);
-        assert_eq!(metadata.num_values(), 4); // just values
-        assert_eq!(metadata.dictionary_page_offset(), None);
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_bool() {
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[true, false],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[true, false],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[true, false],
-            None,
-            &[Encoding::RLE, Encoding::RLE],
-        );
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[true, false],
-            None,
-            &[Encoding::RLE, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_int32() {
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::DELTA_BINARY_PACKED, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_int64() {
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::DELTA_BINARY_PACKED, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_int96() {
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[Int96::from(vec![1, 2, 3])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[Int96::from(vec![1, 2, 3])],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[Int96::from(vec![1, 2, 3])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[Int96::from(vec![1, 2, 3])],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_float() {
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_double() {
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_byte_array() {
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[ByteArray::from(vec![1u8])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[ByteArray::from(vec![1u8])],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[ByteArray::from(vec![1u8])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[ByteArray::from(vec![1u8])],
-            None,
-            &[Encoding::DELTA_BYTE_ARRAY, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_fixed_len_byte_array() {
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[ByteArray::from(vec![1u8]).into()],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[ByteArray::from(vec![1u8]).into()],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[ByteArray::from(vec![1u8]).into()],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[ByteArray::from(vec![1u8]).into()],
-            None,
-            &[Encoding::DELTA_BYTE_ARRAY, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_check_metadata() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
-
-        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
-        assert_eq!(bytes_written, 20);
-        assert_eq!(rows_written, 4);
-        assert_eq!(
-            metadata.encodings(),
-            &vec![Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE]
-        );
-        assert_eq!(metadata.num_values(), 8); // dictionary + value indexes
-        assert_eq!(metadata.compressed_size(), 20);
-        assert_eq!(metadata.uncompressed_size(), 20);
-        assert_eq!(metadata.data_page_offset(), 0);
-        assert_eq!(metadata.dictionary_page_offset(), Some(0));
-        if let Some(stats) = metadata.statistics() {
-            assert!(stats.has_min_max_set());
-            assert_eq!(stats.null_count(), 0);
-            assert_eq!(stats.distinct_count(), None);
-            if let Statistics::Int32(stats) = stats {
-                assert_eq!(stats.min(), &1);
-                assert_eq!(stats.max(), &4);
-            } else {
-                panic!("expecting Statistics::Int32");
-            }
-        } else {
-            panic!("metadata missing statistics");
-        }
-    }
-
-    #[test]
-    fn test_column_writer_precalculated_statistics() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer
-            .write_batch_with_statistics(
-                &[1, 2, 3, 4],
-                None,
-                None,
-                &Some(-17),
-                &Some(9000),
-                Some(21),
-                Some(55),
-            )
-            .unwrap();
-
-        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
-        assert_eq!(bytes_written, 20);
-        assert_eq!(rows_written, 4);
-        assert_eq!(
-            metadata.encodings(),
-            &vec![Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE]
-        );
-        assert_eq!(metadata.num_values(), 8); // dictionary + value indexes
-        assert_eq!(metadata.compressed_size(), 20);
-        assert_eq!(metadata.uncompressed_size(), 20);
-        assert_eq!(metadata.data_page_offset(), 0);
-        assert_eq!(metadata.dictionary_page_offset(), Some(0));
-        if let Some(stats) = metadata.statistics() {
-            assert!(stats.has_min_max_set());
-            assert_eq!(stats.null_count(), 21);
-            assert_eq!(stats.distinct_count().unwrap_or(0), 55);
-            if let Statistics::Int32(stats) = stats {
-                assert_eq!(stats.min(), &-17);
-                assert_eq!(stats.max(), &9000);
-            } else {
-                panic!("expecting Statistics::Int32");
-            }
-        } else {
-            panic!("metadata missing statistics");
-        }
-    }
-
-    #[test]
-    fn test_column_writer_empty_column_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip::<Int32Type>("test_col_writer_rnd_1", props, &[], None, None);
-    }
-
-    #[test]
-    fn test_column_writer_non_nullable_values_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_2",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            0,
-            0,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_nullable_non_repeated_values_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip_random::<Int32Type>(
-            "test_column_writer_nullable_non_repeated_values_roundtrip",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            0,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_nullable_repeated_values_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_3",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_dictionary_fallback_small_data_page() {
-        let props = WriterProperties::builder()
-            .set_dictionary_pagesize_limit(32)
-            .set_data_pagesize_limit(32)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_4",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_small_write_batch_size() {
-        for i in &[1usize, 2, 5, 10, 11, 1023] {
-            let props = WriterProperties::builder().set_write_batch_size(*i).build();
-
-            column_roundtrip_random::<Int32Type>(
-                "test_col_writer_rnd_5",
-                props,
-                1024,
-                std::i32::MIN,
-                std::i32::MAX,
-                10,
-                10,
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_dictionary_disabled_v1() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_1_0)
-            .set_dictionary_enabled(false)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_6",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_dictionary_disabled_v2() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_2_0)
-            .set_dictionary_enabled(false)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_7",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_compression_v1() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_1_0)
-            .set_compression(Compression::SNAPPY)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_8",
-            props,
-            2048,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_compression_v2() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_2_0)
-            .set_compression(Compression::SNAPPY)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_9",
-            props,
-            2048,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_add_data_pages_with_dict() {
-        // ARROW-5129: Test verifies that we add data page in case of dictionary encoding
-        // and no fallback occurred so far.
-        let file = get_temp_file("test_column_writer_add_data_pages_with_dict", &[]);
-        let sink = FileSink::new(&file);
-        let page_writer = Box::new(SerializedPageWriter::new(sink));
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_data_pagesize_limit(15) // actually each page will have size 15-18 bytes
-                .set_write_batch_size(3) // write 3 values at a time
-                .build(),
-        );
-        let data = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(data, None, None).unwrap();
-        let (bytes_written, _, _) = writer.close().unwrap();
-
-        // Read pages and check the sequence
-        let source = FileSource::new(&file, 0, bytes_written as usize);
-        let mut page_reader = Box::new(
-            SerializedPageReader::new(
-                source,
-                data.len() as i64,
-                Compression::UNCOMPRESSED,
-                Int32Type::get_physical_type(),
-            )
-            .unwrap(),
-        );
-        let mut res = Vec::new();
-        while let Some(page) = page_reader.get_next_page().unwrap() {
-            res.push((page.page_type(), page.num_values()));
-        }
-        assert_eq!(
-            res,
-            vec![
-                (PageType::DICTIONARY_PAGE, 10),
-                (PageType::DATA_PAGE, 3),
-                (PageType::DATA_PAGE, 3),
-                (PageType::DATA_PAGE, 3),
-                (PageType::DATA_PAGE, 1)
-            ]
-        );
-    }
-
-    /// Performs write-read roundtrip with randomly generated values and levels.
-    /// `max_size` is maximum number of values or levels (if `max_def_level` > 0) to write
-    /// for a column.
-    fn column_roundtrip_random<T: DataType>(
-        file_name: &str,
-        props: WriterProperties,
-        max_size: usize,
-        min_value: T::T,
-        max_value: T::T,
-        max_def_level: i16,
-        max_rep_level: i16,
-    ) where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        let mut num_values: usize = 0;
-
-        let mut buf: Vec<i16> = Vec::new();
-        let def_levels = if max_def_level > 0 {
-            random_numbers_range(max_size, 0, max_def_level + 1, &mut buf);
-            for &dl in &buf[..] {
-                if dl == max_def_level {
-                    num_values += 1;
-                }
-            }
-            Some(&buf[..])
-        } else {
-            num_values = max_size;
-            None
-        };
-
-        let mut buf: Vec<i16> = Vec::new();
-        let rep_levels = if max_rep_level > 0 {
-            random_numbers_range(max_size, 0, max_rep_level + 1, &mut buf);
-            Some(&buf[..])
-        } else {
-            None
-        };
-
-        let mut values: Vec<T::T> = Vec::new();
-        random_numbers_range(num_values, min_value, max_value, &mut values);
-
-        column_roundtrip::<T>(file_name, props, &values[..], def_levels, rep_levels);
-    }
-
-    /// Performs write-read roundtrip and asserts written values and levels.
-    fn column_roundtrip<'a, T: DataType>(
-        file_name: &'a str,
-        props: WriterProperties,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-    ) {
-        let file = get_temp_file(file_name, &[]);
-        let sink = FileSink::new(&file);
-        let page_writer = Box::new(SerializedPageWriter::new(sink));
-
-        let max_def_level = match def_levels {
-            Some(buf) => *buf.iter().max().unwrap_or(&0i16),
-            None => 0i16,
-        };
-
-        let max_rep_level = match rep_levels {
-            Some(buf) => *buf.iter().max().unwrap_or(&0i16),
-            None => 0i16,
-        };
-
-        let mut max_batch_size = values.len();
-        if let Some(levels) = def_levels {
-            max_batch_size = cmp::max(max_batch_size, levels.len());
-        }
-        if let Some(levels) = rep_levels {
-            max_batch_size = cmp::max(max_batch_size, levels.len());
-        }
-
-        let mut writer = get_test_column_writer::<T>(
-            page_writer,
-            max_def_level,
-            max_rep_level,
-            Arc::new(props),
-        );
-
-        let values_written = writer.write_batch(values, def_levels, rep_levels).unwrap();
-        assert_eq!(values_written, values.len());
-        let (bytes_written, rows_written, column_metadata) = writer.close().unwrap();
-
-        let source = FileSource::new(&file, 0, bytes_written as usize);
-        let page_reader = Box::new(
-            SerializedPageReader::new(
-                source,
-                column_metadata.num_values(),
-                column_metadata.compression(),
-                T::get_physical_type(),
-            )
-            .unwrap(),
-        );
-        let reader =
-            get_test_column_reader::<T>(page_reader, max_def_level, max_rep_level);
-
-        let mut actual_values = vec![T::T::default(); max_batch_size];
-        let mut actual_def_levels = def_levels.map(|_| vec![0i16; max_batch_size]);
-        let mut actual_rep_levels = rep_levels.map(|_| vec![0i16; max_batch_size]);
-
-        let (values_read, levels_read) = read_fully(
-            reader,
-            max_batch_size,
-            actual_def_levels.as_mut(),
-            actual_rep_levels.as_mut(),
-            actual_values.as_mut_slice(),
-        );
-
-        // Assert values, definition and repetition levels.
-
-        assert_eq!(&actual_values[..values_read], values);
-        match actual_def_levels {
-            Some(ref vec) => assert_eq!(Some(&vec[..levels_read]), def_levels),
-            None => assert_eq!(None, def_levels),
-        }
-        match actual_rep_levels {
-            Some(ref vec) => assert_eq!(Some(&vec[..levels_read]), rep_levels),
-            None => assert_eq!(None, rep_levels),
-        }
-
-        // Assert written rows.
-
-        if let Some(levels) = actual_rep_levels {
-            let mut actual_rows_written = 0;
-            for l in levels {
-                if l == 0 {
-                    actual_rows_written += 1;
-                }
-            }
-            assert_eq!(actual_rows_written, rows_written);
-        } else if actual_def_levels.is_some() {
-            assert_eq!(levels_read as u64, rows_written);
-        } else {
-            assert_eq!(values_read as u64, rows_written);
-        }
-    }
-
-    /// Performs write of provided values and returns column metadata of those values.
-    /// Used to test encoding support for column writer.
-    fn column_write_and_get_metadata<T: DataType>(
-        props: WriterProperties,
-        values: &[T::T],
-    ) -> ColumnChunkMetaData {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(props);
-        let mut writer = get_test_column_writer::<T>(page_writer, 0, 0, props);
-        writer.write_batch(values, None, None).unwrap();
-        let (_, _, metadata) = writer.close().unwrap();
-        metadata
-    }
-
-    // Function to use in tests for EncodingWriteSupport. This checks that dictionary
-    // offset and encodings to make sure that column writer uses provided by trait
-    // encodings.
-    fn check_encoding_write_support<T: DataType>(
-        version: WriterVersion,
-        dict_enabled: bool,
-        data: &[T::T],
-        dictionary_page_offset: Option<i64>,
-        encodings: &[Encoding],
-    ) {
-        let props = WriterProperties::builder()
-            .set_writer_version(version)
-            .set_dictionary_enabled(dict_enabled)
-            .build();
-        let meta = column_write_and_get_metadata::<T>(props, data);
-        assert_eq!(meta.dictionary_page_offset(), dictionary_page_offset);
-        assert_eq!(meta.encodings(), &encodings);
-    }
-
-    /// Reads one batch of data, considering that batch is large enough to capture all of
-    /// the values and levels.
-    fn read_fully<T: DataType>(
-        mut reader: ColumnReaderImpl<T>,
-        batch_size: usize,
-        mut def_levels: Option<&mut Vec<i16>>,
-        mut rep_levels: Option<&mut Vec<i16>>,
-        values: &mut [T::T],
-    ) -> (usize, usize) {
-        let actual_def_levels = def_levels.as_mut().map(|vec| &mut vec[..]);
-        let actual_rep_levels = rep_levels.as_mut().map(|vec| &mut vec[..]);
-        reader
-            .read_batch(batch_size, actual_def_levels, actual_rep_levels, values)
-            .unwrap()
-    }
-
-    /// Returns column writer.
-    fn get_test_column_writer<T: DataType>(
-        page_writer: Box<dyn PageWriter>,
-        max_def_level: i16,
-        max_rep_level: i16,
-        props: WriterPropertiesPtr,
-    ) -> ColumnWriterImpl<T> {
-        let descr = Arc::new(get_test_column_descr::<T>(max_def_level, max_rep_level));
-        let column_writer = get_column_writer(descr, props, page_writer);
-        get_typed_column_writer::<T>(column_writer)
-    }
-
-    /// Returns column reader.
-    fn get_test_column_reader<T: DataType>(
-        page_reader: Box<dyn PageReader>,
-        max_def_level: i16,
-        max_rep_level: i16,
-    ) -> ColumnReaderImpl<T> {
-        let descr = Arc::new(get_test_column_descr::<T>(max_def_level, max_rep_level));
-        let column_reader = get_column_reader(descr, page_reader);
-        get_typed_column_reader::<T>(column_reader)
-    }
-
-    /// Returns descriptor for primitive column.
-    fn get_test_column_descr<T: DataType>(
-        max_def_level: i16,
-        max_rep_level: i16,
-    ) -> ColumnDescriptor {
-        let path = ColumnPath::from("col");
-        let tpe = SchemaType::primitive_type_builder("col", T::get_physical_type())
-            // length is set for "encoding support" tests for FIXED_LEN_BYTE_ARRAY type,
-            // it should be no-op for other types
-            .with_length(1)
-            .build()
-            .unwrap();
-        ColumnDescriptor::new(Arc::new(tpe), max_def_level, max_rep_level, path)
-    }
-
-    /// Returns page writer that collects pages without serializing them.
-    fn get_test_page_writer() -> Box<dyn PageWriter> {
-        Box::new(TestPageWriter {})
-    }
-
-    struct TestPageWriter {}
-
-    impl PageWriter for TestPageWriter {
-        fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec> {
-            let mut res = PageWriteSpec::new();
-            res.page_type = page.page_type();
-            res.uncompressed_size = page.uncompressed_size();
-            res.compressed_size = page.compressed_size();
-            res.num_values = page.num_values();
-            res.offset = 0;
-            res.bytes_written = page.data().len() as u64;
-            Ok(res)
-        }
-
-        fn write_metadata(&mut self, _metadata: &ColumnChunkMetaData) -> Result<()> {
-            Ok(())
-        }
-
-        fn close(&mut self) -> Result<()> {
-            Ok(())
-        }
-    }
-}
diff --git a/rust/parquet/src/compression.rs b/rust/parquet/src/compression.rs
deleted file mode 100644
index a1155971fbd..00000000000
--- a/rust/parquet/src/compression.rs
+++ /dev/null
@@ -1,393 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains codec interface and supported codec implementations.
-//!
-//! See [`Compression`](crate::basic::Compression) enum for all available compression
-//! algorithms.
-//!
-//! # Example
-//!
-//! ```no_run
-//! use parquet::{basic::Compression, compression::create_codec};
-//!
-//! let mut codec = match create_codec(Compression::SNAPPY) {
-//!     Ok(Some(codec)) => codec,
-//!     _ => panic!(),
-//! };
-//!
-//! let data = vec![b'p', b'a', b'r', b'q', b'u', b'e', b't'];
-//! let mut compressed = vec![];
-//! codec.compress(&data[..], &mut compressed).unwrap();
-//!
-//! let mut output = vec![];
-//! codec.decompress(&compressed[..], &mut output).unwrap();
-//!
-//! assert_eq!(output, data);
-//! ```
-
-use crate::basic::Compression as CodecType;
-use crate::errors::{ParquetError, Result};
-
-/// Parquet compression codec interface.
-pub trait Codec {
-    /// Compresses data stored in slice `input_buf` and writes the compressed result
-    /// to `output_buf`.
-    /// Note that you'll need to call `clear()` before reusing the same `output_buf`
-    /// across different `compress` calls.
-    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()>;
-
-    /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
-    /// Returns the total number of bytes written.
-    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>)
-        -> Result<usize>;
-}
-
-/// Given the compression type `codec`, returns a codec used to compress and decompress
-/// bytes for the compression type.
-/// This returns `None` if the codec type is `UNCOMPRESSED`.
-pub fn create_codec(codec: CodecType) -> Result<Option<Box<dyn Codec>>> {
-    match codec {
-        #[cfg(any(feature = "brotli", test))]
-        CodecType::BROTLI => Ok(Some(Box::new(BrotliCodec::new()))),
-        #[cfg(any(feature = "flate2", test))]
-        CodecType::GZIP => Ok(Some(Box::new(GZipCodec::new()))),
-        #[cfg(any(feature = "snap", test))]
-        CodecType::SNAPPY => Ok(Some(Box::new(SnappyCodec::new()))),
-        #[cfg(any(feature = "lz4", test))]
-        CodecType::LZ4 => Ok(Some(Box::new(LZ4Codec::new()))),
-        #[cfg(any(feature = "zstd", test))]
-        CodecType::ZSTD => Ok(Some(Box::new(ZSTDCodec::new()))),
-        CodecType::UNCOMPRESSED => Ok(None),
-        _ => Err(nyi_err!("The codec type {} is not supported yet", codec)),
-    }
-}
-
-#[cfg(any(feature = "snap", test))]
-mod snappy_codec {
-    use snap::raw::{decompress_len, max_compress_len, Decoder, Encoder};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    /// Codec for Snappy compression format.
-    pub struct SnappyCodec {
-        decoder: Decoder,
-        encoder: Encoder,
-    }
-
-    impl SnappyCodec {
-        /// Creates new Snappy compression codec.
-        pub(crate) fn new() -> Self {
-            Self {
-                decoder: Decoder::new(),
-                encoder: Encoder::new(),
-            }
-        }
-    }
-
-    impl Codec for SnappyCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let len = decompress_len(input_buf)?;
-            output_buf.resize(len, 0);
-            self.decoder
-                .decompress(input_buf, output_buf)
-                .map_err(|e| e.into())
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let output_buf_len = output_buf.len();
-            let required_len = max_compress_len(input_buf.len());
-            output_buf.resize(output_buf_len + required_len, 0);
-            let n = self
-                .encoder
-                .compress(input_buf, &mut output_buf[output_buf_len..])?;
-            output_buf.truncate(output_buf_len + n);
-            Ok(())
-        }
-    }
-}
-#[cfg(any(feature = "snap", test))]
-pub use snappy_codec::*;
-
-#[cfg(any(feature = "flate2", test))]
-mod gzip_codec {
-
-    use std::io::{Read, Write};
-
-    use flate2::{read, write, Compression};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    /// Codec for GZIP compression algorithm.
-    pub struct GZipCodec {}
-
-    impl GZipCodec {
-        /// Creates new GZIP compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    impl Codec for GZipCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let mut decoder = read::GzDecoder::new(input_buf);
-            decoder.read_to_end(output_buf).map_err(|e| e.into())
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = write::GzEncoder::new(output_buf, Compression::default());
-            encoder.write_all(input_buf)?;
-            encoder.try_finish().map_err(|e| e.into())
-        }
-    }
-}
-#[cfg(any(feature = "flate2", test))]
-pub use gzip_codec::*;
-
-#[cfg(any(feature = "brotli", test))]
-mod brotli_codec {
-
-    use std::io::{Read, Write};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    const BROTLI_DEFAULT_BUFFER_SIZE: usize = 4096;
-    const BROTLI_DEFAULT_COMPRESSION_QUALITY: u32 = 1; // supported levels 0-9
-    const BROTLI_DEFAULT_LG_WINDOW_SIZE: u32 = 22; // recommended between 20-22
-
-    /// Codec for Brotli compression algorithm.
-    pub struct BrotliCodec {}
-
-    impl BrotliCodec {
-        /// Creates new Brotli compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    impl Codec for BrotliCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            brotli::Decompressor::new(input_buf, BROTLI_DEFAULT_BUFFER_SIZE)
-                .read_to_end(output_buf)
-                .map_err(|e| e.into())
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = brotli::CompressorWriter::new(
-                output_buf,
-                BROTLI_DEFAULT_BUFFER_SIZE,
-                BROTLI_DEFAULT_COMPRESSION_QUALITY,
-                BROTLI_DEFAULT_LG_WINDOW_SIZE,
-            );
-            encoder.write_all(input_buf)?;
-            encoder.flush().map_err(|e| e.into())
-        }
-    }
-}
-#[cfg(any(feature = "brotli", test))]
-pub use brotli_codec::*;
-
-#[cfg(any(feature = "lz4", test))]
-mod lz4_codec {
-    use std::io::{Read, Write};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    const LZ4_BUFFER_SIZE: usize = 4096;
-
-    /// Codec for LZ4 compression algorithm.
-    pub struct LZ4Codec {}
-
-    impl LZ4Codec {
-        /// Creates new LZ4 compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    impl Codec for LZ4Codec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let mut decoder = lz4::Decoder::new(input_buf)?;
-            let mut buffer: [u8; LZ4_BUFFER_SIZE] = [0; LZ4_BUFFER_SIZE];
-            let mut total_len = 0;
-            loop {
-                let len = decoder.read(&mut buffer)?;
-                if len == 0 {
-                    break;
-                }
-                total_len += len;
-                output_buf.write_all(&buffer[0..len])?;
-            }
-            Ok(total_len)
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = lz4::EncoderBuilder::new().build(output_buf)?;
-            let mut from = 0;
-            loop {
-                let to = std::cmp::min(from + LZ4_BUFFER_SIZE, input_buf.len());
-                encoder.write_all(&input_buf[from..to])?;
-                from += LZ4_BUFFER_SIZE;
-                if from >= input_buf.len() {
-                    break;
-                }
-            }
-            encoder.finish().1.map_err(|e| e.into())
-        }
-    }
-}
-#[cfg(any(feature = "lz4", test))]
-pub use lz4_codec::*;
-
-#[cfg(any(feature = "zstd", test))]
-mod zstd_codec {
-    use std::io::{self, Write};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    /// Codec for Zstandard compression algorithm.
-    pub struct ZSTDCodec {}
-
-    impl ZSTDCodec {
-        /// Creates new Zstandard compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    /// Compression level (1-21) for ZSTD. Choose 1 here for better compression speed.
-    const ZSTD_COMPRESSION_LEVEL: i32 = 1;
-
-    impl Codec for ZSTDCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let mut decoder = zstd::Decoder::new(input_buf)?;
-            match io::copy(&mut decoder, output_buf) {
-                Ok(n) => Ok(n as usize),
-                Err(e) => Err(e.into()),
-            }
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = zstd::Encoder::new(output_buf, ZSTD_COMPRESSION_LEVEL)?;
-            encoder.write_all(input_buf)?;
-            match encoder.finish() {
-                Ok(_) => Ok(()),
-                Err(e) => Err(e.into()),
-            }
-        }
-    }
-}
-#[cfg(any(feature = "zstd", test))]
-pub use zstd_codec::*;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::util::test_common::*;
-
-    fn test_roundtrip(c: CodecType, data: &[u8]) {
-        let mut c1 = create_codec(c).unwrap().unwrap();
-        let mut c2 = create_codec(c).unwrap().unwrap();
-
-        // Compress with c1
-        let mut compressed = Vec::new();
-        let mut decompressed = Vec::new();
-        c1.compress(data, &mut compressed)
-            .expect("Error when compressing");
-
-        // Decompress with c2
-        let mut decompressed_size = c2
-            .decompress(compressed.as_slice(), &mut decompressed)
-            .expect("Error when decompressing");
-        assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
-        assert_eq!(data, decompressed.as_slice());
-
-        compressed.clear();
-
-        // Compress with c2
-        c2.compress(data, &mut compressed)
-            .expect("Error when compressing");
-
-        // Decompress with c1
-        decompressed_size = c1
-            .decompress(compressed.as_slice(), &mut decompressed)
-            .expect("Error when decompressing");
-        assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
-        assert_eq!(data, decompressed.as_slice());
-    }
-
-    fn test_codec(c: CodecType) {
-        let sizes = vec![100, 10000, 100000];
-        for size in sizes {
-            let data = random_bytes(size);
-            test_roundtrip(c, &data);
-        }
-    }
-
-    #[test]
-    fn test_codec_snappy() {
-        test_codec(CodecType::SNAPPY);
-    }
-
-    #[test]
-    fn test_codec_gzip() {
-        test_codec(CodecType::GZIP);
-    }
-
-    #[test]
-    fn test_codec_brotli() {
-        test_codec(CodecType::BROTLI);
-    }
-
-    #[test]
-    fn test_codec_lz4() {
-        test_codec(CodecType::LZ4);
-    }
-
-    #[test]
-    fn test_codec_zstd() {
-        test_codec(CodecType::ZSTD);
-    }
-}
diff --git a/rust/parquet/src/data_type.rs b/rust/parquet/src/data_type.rs
deleted file mode 100644
index aa1def3db97..00000000000
--- a/rust/parquet/src/data_type.rs
+++ /dev/null
@@ -1,1358 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Data types that connect Parquet physical types with their Rust-specific
-//! representations.
-use std::cmp::Ordering;
-use std::fmt;
-use std::mem;
-use std::ops::{Deref, DerefMut};
-use std::str::from_utf8;
-
-use byteorder::{BigEndian, ByteOrder};
-
-use crate::basic::Type;
-use crate::column::reader::{ColumnReader, ColumnReaderImpl};
-use crate::column::writer::{ColumnWriter, ColumnWriterImpl};
-use crate::errors::{ParquetError, Result};
-use crate::util::{
-    bit_util::{from_ne_slice, FromBytes},
-    memory::{ByteBuffer, ByteBufferPtr},
-};
-
-/// Rust representation for logical type INT96, value is backed by an array of `u32`.
-/// The type only takes 12 bytes, without extra padding.
-#[derive(Clone, Debug, PartialOrd)]
-pub struct Int96 {
-    value: Option<[u32; 3]>,
-}
-
-impl Int96 {
-    /// Creates new INT96 type struct with no data set.
-    pub fn new() -> Self {
-        Self { value: None }
-    }
-
-    /// Returns underlying data as slice of [`u32`].
-    #[inline]
-    pub fn data(&self) -> &[u32] {
-        self.value
-            .as_ref()
-            .expect("set_data should have been called")
-    }
-
-    /// Sets data for this INT96 type.
-    #[inline]
-    pub fn set_data(&mut self, elem0: u32, elem1: u32, elem2: u32) {
-        self.value = Some([elem0, elem1, elem2]);
-    }
-
-    /// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch
-    pub fn to_i64(&self) -> i64 {
-        const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
-        const SECONDS_PER_DAY: i64 = 86_400;
-        const MILLIS_PER_SECOND: i64 = 1_000;
-
-        let day = self.data()[2] as i64;
-        let nanoseconds = ((self.data()[1] as i64) << 32) + self.data()[0] as i64;
-        let seconds = (day - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY;
-
-        seconds * MILLIS_PER_SECOND + nanoseconds / 1_000_000
-    }
-}
-
-impl Default for Int96 {
-    fn default() -> Self {
-        Self { value: None }
-    }
-}
-
-impl PartialEq for Int96 {
-    fn eq(&self, other: &Int96) -> bool {
-        match (&self.value, &other.value) {
-            (Some(v1), Some(v2)) => v1 == v2,
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl From<Vec<u32>> for Int96 {
-    fn from(buf: Vec<u32>) -> Self {
-        assert_eq!(buf.len(), 3);
-        let mut result = Self::new();
-        result.set_data(buf[0], buf[1], buf[2]);
-        result
-    }
-}
-
-impl fmt::Display for Int96 {
-    #[cold]
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self.data())
-    }
-}
-
-/// Rust representation for BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY Parquet physical types.
-/// Value is backed by a byte buffer.
-#[derive(Clone, Debug)]
-pub struct ByteArray {
-    data: Option<ByteBufferPtr>,
-}
-
-impl PartialOrd for ByteArray {
-    fn partial_cmp(&self, other: &ByteArray) -> Option<Ordering> {
-        if self.data.is_some() && other.data.is_some() {
-            match self.len().cmp(&other.len()) {
-                Ordering::Greater => Some(Ordering::Greater),
-                Ordering::Less => Some(Ordering::Less),
-                Ordering::Equal => {
-                    for (v1, v2) in self.data().iter().zip(other.data().iter()) {
-                        match v1.cmp(v2) {
-                            Ordering::Greater => return Some(Ordering::Greater),
-                            Ordering::Less => return Some(Ordering::Less),
-                            _ => {}
-                        }
-                    }
-                    Some(Ordering::Equal)
-                }
-            }
-        } else {
-            None
-        }
-    }
-}
-
-impl ByteArray {
-    /// Creates new byte array with no data set.
-    #[inline]
-    pub fn new() -> Self {
-        ByteArray { data: None }
-    }
-
-    /// Gets length of the underlying byte buffer.
-    #[inline]
-    pub fn len(&self) -> usize {
-        assert!(self.data.is_some());
-        self.data.as_ref().unwrap().len()
-    }
-
-    /// Checks if the underlying buffer is empty.
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    /// Returns slice of data.
-    #[inline]
-    pub fn data(&self) -> &[u8] {
-        self.data
-            .as_ref()
-            .expect("set_data should have been called")
-            .as_ref()
-    }
-
-    /// Set data from another byte buffer.
-    #[inline]
-    pub fn set_data(&mut self, data: ByteBufferPtr) {
-        self.data = Some(data);
-    }
-
-    /// Returns `ByteArray` instance with slice of values for a data.
-    #[inline]
-    pub fn slice(&self, start: usize, len: usize) -> Self {
-        Self::from(
-            self.data
-                .as_ref()
-                .expect("set_data should have been called")
-                .range(start, len),
-        )
-    }
-
-    pub fn as_utf8(&self) -> Result<&str> {
-        self.data
-            .as_ref()
-            .map(|ptr| ptr.as_ref())
-            .ok_or_else(|| general_err!("Can't convert empty byte array to utf8"))
-            .and_then(|bytes| from_utf8(bytes).map_err(|e| e.into()))
-    }
-}
-
-impl From<Vec<u8>> for ByteArray {
-    fn from(buf: Vec<u8>) -> ByteArray {
-        Self {
-            data: Some(ByteBufferPtr::new(buf)),
-        }
-    }
-}
-
-impl<'a> From<&'a str> for ByteArray {
-    fn from(s: &'a str) -> ByteArray {
-        let mut v = Vec::new();
-        v.extend_from_slice(s.as_bytes());
-        Self {
-            data: Some(ByteBufferPtr::new(v)),
-        }
-    }
-}
-
-impl From<ByteBufferPtr> for ByteArray {
-    fn from(ptr: ByteBufferPtr) -> ByteArray {
-        Self { data: Some(ptr) }
-    }
-}
-
-impl From<ByteBuffer> for ByteArray {
-    fn from(mut buf: ByteBuffer) -> ByteArray {
-        Self {
-            data: Some(buf.consume()),
-        }
-    }
-}
-
-impl Default for ByteArray {
-    fn default() -> Self {
-        ByteArray { data: None }
-    }
-}
-
-impl PartialEq for ByteArray {
-    fn eq(&self, other: &ByteArray) -> bool {
-        match (&self.data, &other.data) {
-            (Some(d1), Some(d2)) => d1.as_ref() == d2.as_ref(),
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl fmt::Display for ByteArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self.data())
-    }
-}
-
-/// Wrapper type for performance reasons, this represents `FIXED_LEN_BYTE_ARRAY` but in all other
-/// considerations behaves the same as `ByteArray`
-///
-/// # Performance notes:
-/// This type is a little unfortunate, without it the compiler generates code that takes quite a
-/// big hit on the CPU pipeline. Essentially the previous version stalls awaiting the result of
-/// `T::get_physical_type() == Type::FIXED_LEN_BYTE_ARRAY`.
-///
-/// Its debatable if this is wanted, it is out of spec for what parquet documents as its base
-/// types, although there are code paths in the Rust (and potentially the C++) versions that
-/// warrant this.
-///
-/// With this wrapper type the compiler generates more targetted code paths matching the higher
-/// level logical types, removing the data-hazard from all decoding and encoding paths.
-#[repr(transparent)]
-#[derive(Clone, Debug, Default)]
-pub struct FixedLenByteArray(ByteArray);
-
-impl PartialEq for FixedLenByteArray {
-    fn eq(&self, other: &FixedLenByteArray) -> bool {
-        self.0.eq(&other.0)
-    }
-}
-
-impl PartialEq<ByteArray> for FixedLenByteArray {
-    fn eq(&self, other: &ByteArray) -> bool {
-        self.0.eq(other)
-    }
-}
-
-impl PartialEq<FixedLenByteArray> for ByteArray {
-    fn eq(&self, other: &FixedLenByteArray) -> bool {
-        self.eq(&other.0)
-    }
-}
-
-impl fmt::Display for FixedLenByteArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.0.fmt(f)
-    }
-}
-
-impl PartialOrd for FixedLenByteArray {
-    fn partial_cmp(&self, other: &FixedLenByteArray) -> Option<Ordering> {
-        self.0.partial_cmp(&other.0)
-    }
-}
-
-impl PartialOrd<FixedLenByteArray> for ByteArray {
-    fn partial_cmp(&self, other: &FixedLenByteArray) -> Option<Ordering> {
-        self.partial_cmp(&other.0)
-    }
-}
-
-impl PartialOrd<ByteArray> for FixedLenByteArray {
-    fn partial_cmp(&self, other: &ByteArray) -> Option<Ordering> {
-        self.0.partial_cmp(other)
-    }
-}
-
-impl Deref for FixedLenByteArray {
-    type Target = ByteArray;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl DerefMut for FixedLenByteArray {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
-impl From<ByteArray> for FixedLenByteArray {
-    fn from(other: ByteArray) -> Self {
-        Self(other)
-    }
-}
-
-impl From<FixedLenByteArray> for ByteArray {
-    fn from(other: FixedLenByteArray) -> Self {
-        other.0
-    }
-}
-
-/// Rust representation for Decimal values.
-///
-/// This is not a representation of Parquet physical type, but rather a wrapper for
-/// DECIMAL logical type, and serves as container for raw parts of decimal values:
-/// unscaled value in bytes, precision and scale.
-#[derive(Clone, Debug)]
-pub enum Decimal {
-    /// Decimal backed by `i32`.
-    Int32 {
-        value: [u8; 4],
-        precision: i32,
-        scale: i32,
-    },
-    /// Decimal backed by `i64`.
-    Int64 {
-        value: [u8; 8],
-        precision: i32,
-        scale: i32,
-    },
-    /// Decimal backed by byte array.
-    Bytes {
-        value: ByteArray,
-        precision: i32,
-        scale: i32,
-    },
-}
-
-impl Decimal {
-    /// Creates new decimal value from `i32`.
-    pub fn from_i32(value: i32, precision: i32, scale: i32) -> Self {
-        let mut bytes = [0; 4];
-        BigEndian::write_i32(&mut bytes, value);
-        Decimal::Int32 {
-            value: bytes,
-            precision,
-            scale,
-        }
-    }
-
-    /// Creates new decimal value from `i64`.
-    pub fn from_i64(value: i64, precision: i32, scale: i32) -> Self {
-        let mut bytes = [0; 8];
-        BigEndian::write_i64(&mut bytes, value);
-        Decimal::Int64 {
-            value: bytes,
-            precision,
-            scale,
-        }
-    }
-
-    /// Creates new decimal value from `ByteArray`.
-    pub fn from_bytes(value: ByteArray, precision: i32, scale: i32) -> Self {
-        Decimal::Bytes {
-            value,
-            precision,
-            scale,
-        }
-    }
-
-    /// Returns bytes of unscaled value.
-    pub fn data(&self) -> &[u8] {
-        match *self {
-            Decimal::Int32 { ref value, .. } => value,
-            Decimal::Int64 { ref value, .. } => value,
-            Decimal::Bytes { ref value, .. } => value.data(),
-        }
-    }
-
-    /// Returns decimal precision.
-    pub fn precision(&self) -> i32 {
-        match *self {
-            Decimal::Int32 { precision, .. } => precision,
-            Decimal::Int64 { precision, .. } => precision,
-            Decimal::Bytes { precision, .. } => precision,
-        }
-    }
-
-    /// Returns decimal scale.
-    pub fn scale(&self) -> i32 {
-        match *self {
-            Decimal::Int32 { scale, .. } => scale,
-            Decimal::Int64 { scale, .. } => scale,
-            Decimal::Bytes { scale, .. } => scale,
-        }
-    }
-}
-
-impl Default for Decimal {
-    fn default() -> Self {
-        Self::from_i32(0, 0, 0)
-    }
-}
-
-impl PartialEq for Decimal {
-    fn eq(&self, other: &Decimal) -> bool {
-        self.precision() == other.precision()
-            && self.scale() == other.scale()
-            && self.data() == other.data()
-    }
-}
-
-/// Converts an instance of data type to a slice of bytes as `u8`.
-pub trait AsBytes {
-    /// Returns slice of bytes for this data type.
-    fn as_bytes(&self) -> &[u8];
-}
-
-/// Converts an slice of a data type to a slice of bytes.
-pub trait SliceAsBytes: Sized {
-    /// Returns slice of bytes for a slice of this data type.
-    fn slice_as_bytes(self_: &[Self]) -> &[u8];
-    /// Return the internal representation as a mutable slice
-    ///
-    /// # Safety
-    /// If modified you are _required_ to ensure the internal representation
-    /// is valid and correct for the actual raw data
-    unsafe fn slice_as_bytes_mut(self_: &mut [Self]) -> &mut [u8];
-}
-
-impl AsBytes for [u8] {
-    fn as_bytes(&self) -> &[u8] {
-        self
-    }
-}
-
-macro_rules! gen_as_bytes {
-    ($source_ty:ident) => {
-        impl AsBytes for $source_ty {
-            #[allow(clippy::size_of_in_element_count)]
-            fn as_bytes(&self) -> &[u8] {
-                unsafe {
-                    std::slice::from_raw_parts(
-                        self as *const $source_ty as *const u8,
-                        std::mem::size_of::<$source_ty>(),
-                    )
-                }
-            }
-        }
-
-        impl SliceAsBytes for $source_ty {
-            #[inline]
-            #[allow(clippy::size_of_in_element_count)]
-            fn slice_as_bytes(self_: &[Self]) -> &[u8] {
-                unsafe {
-                    std::slice::from_raw_parts(
-                        self_.as_ptr() as *const u8,
-                        std::mem::size_of::<$source_ty>() * self_.len(),
-                    )
-                }
-            }
-
-            #[inline]
-            #[allow(clippy::size_of_in_element_count)]
-            unsafe fn slice_as_bytes_mut(self_: &mut [Self]) -> &mut [u8] {
-                std::slice::from_raw_parts_mut(
-                    self_.as_mut_ptr() as *mut u8,
-                    std::mem::size_of::<$source_ty>() * self_.len(),
-                )
-            }
-        }
-    };
-}
-
-gen_as_bytes!(i8);
-gen_as_bytes!(i16);
-gen_as_bytes!(i32);
-gen_as_bytes!(i64);
-gen_as_bytes!(u8);
-gen_as_bytes!(u16);
-gen_as_bytes!(u32);
-gen_as_bytes!(u64);
-gen_as_bytes!(f32);
-gen_as_bytes!(f64);
-
-macro_rules! unimplemented_slice_as_bytes {
-    ($ty: ty) => {
-        impl SliceAsBytes for $ty {
-            fn slice_as_bytes(_self: &[Self]) -> &[u8] {
-                unimplemented!()
-            }
-
-            unsafe fn slice_as_bytes_mut(_self: &mut [Self]) -> &mut [u8] {
-                unimplemented!()
-            }
-        }
-    };
-}
-
-// TODO - Can Int96 and bool be implemented in these terms?
-unimplemented_slice_as_bytes!(Int96);
-unimplemented_slice_as_bytes!(bool);
-unimplemented_slice_as_bytes!(ByteArray);
-unimplemented_slice_as_bytes!(FixedLenByteArray);
-
-impl AsBytes for bool {
-    fn as_bytes(&self) -> &[u8] {
-        unsafe { std::slice::from_raw_parts(self as *const bool as *const u8, 1) }
-    }
-}
-
-impl AsBytes for Int96 {
-    fn as_bytes(&self) -> &[u8] {
-        unsafe {
-            std::slice::from_raw_parts(self.data() as *const [u32] as *const u8, 12)
-        }
-    }
-}
-
-impl AsBytes for ByteArray {
-    fn as_bytes(&self) -> &[u8] {
-        self.data()
-    }
-}
-
-impl AsBytes for FixedLenByteArray {
-    fn as_bytes(&self) -> &[u8] {
-        self.data()
-    }
-}
-
-impl AsBytes for Decimal {
-    fn as_bytes(&self) -> &[u8] {
-        self.data()
-    }
-}
-
-impl AsBytes for Vec<u8> {
-    fn as_bytes(&self) -> &[u8] {
-        self.as_slice()
-    }
-}
-
-impl<'a> AsBytes for &'a str {
-    fn as_bytes(&self) -> &[u8] {
-        (self as &str).as_bytes()
-    }
-}
-
-impl AsBytes for str {
-    fn as_bytes(&self) -> &[u8] {
-        (self as &str).as_bytes()
-    }
-}
-
-pub(crate) mod private {
-    use crate::encodings::decoding::PlainDecoderDetails;
-    use crate::util::bit_util::{BitReader, BitWriter};
-    use crate::util::memory::ByteBufferPtr;
-
-    use byteorder::ByteOrder;
-    use std::convert::TryInto;
-
-    use super::{ParquetError, Result, SliceAsBytes};
-
-    pub type BitIndex = u64;
-
-    /// Sealed trait to start to remove specialisation from implementations
-    ///
-    /// This is done to force the associated value type to be unimplementable outside of this
-    /// crate, and thus hint to the type system (and end user) traits are public for the contract
-    /// and not for extension.
-    pub trait ParquetValueType:
-        std::cmp::PartialEq
-        + std::fmt::Debug
-        + std::fmt::Display
-        + std::default::Default
-        + std::clone::Clone
-        + super::AsBytes
-        + super::FromBytes
-        + super::SliceAsBytes
-        + PartialOrd
-    {
-        /// Encode the value directly from a higher level encoder
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            bit_writer: &mut BitWriter,
-        ) -> Result<()>;
-
-        /// Establish the data that will be decoded in a buffer
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        );
-
-        /// Decode the value from a given buffer for a higher level decoder
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize>;
-
-        /// Return the encoded size for a type
-        fn dict_encoding_size(&self) -> (usize, usize) {
-            (std::mem::size_of::<Self>(), 1)
-        }
-
-        /// Return the value as i64 if possible
-        ///
-        /// This is essentially the same as `std::convert::TryInto<i64>` but can
-        /// implemented for `f32` and `f64`, types that would fail orphan rules
-        fn as_i64(&self) -> Result<i64> {
-            Err(general_err!("Type cannot be converted to i64"))
-        }
-
-        /// Return the value as u64 if possible
-        ///
-        /// This is essentially the same as `std::convert::TryInto<u64>` but can
-        /// implemented for `f32` and `f64`, types that would fail orphan rules
-        fn as_u64(&self) -> Result<u64> {
-            self.as_i64()
-                .map_err(|_| general_err!("Type cannot be converted to u64"))
-                .map(|x| x as u64)
-        }
-
-        /// Return the value as an Any to allow for downcasts without transmutation
-        fn as_any(&self) -> &dyn std::any::Any;
-
-        /// Return the value as an mutable Any to allow for downcasts without transmutation
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any;
-    }
-
-    impl ParquetValueType for bool {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            _: &mut W,
-            bit_writer: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                bit_writer.put_value(*value as u64, 1);
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.bit_reader.replace(BitReader::new(data));
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            let bit_reader = decoder.bit_reader.as_mut().unwrap();
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            let values_read = bit_reader.get_batch(&mut buffer[..num_values], 1);
-            decoder.num_values -= values_read;
-            Ok(values_read)
-        }
-
-        #[inline]
-        fn as_i64(&self) -> Result<i64> {
-            Ok(*self as i64)
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-
-    /// Hopelessly unsafe function that emulates `num::as_ne_bytes`
-    ///
-    /// It is not recommended to use this outside of this private module as, while it
-    /// _should_ work for primitive values, it is little better than a transmutation
-    /// and can act as a backdoor into mis-interpreting types as arbitary byte slices
-    #[inline]
-    fn as_raw<'a, T>(value: *const T) -> &'a [u8] {
-        unsafe {
-            let value = value as *const u8;
-            std::slice::from_raw_parts(value, std::mem::size_of::<T>())
-        }
-    }
-
-    macro_rules! impl_from_raw {
-        ($ty: ty, $self: ident => $as_i64: block) => {
-            impl ParquetValueType for $ty {
-                #[inline]
-                fn encode<W: std::io::Write>(values: &[Self], writer: &mut W, _: &mut BitWriter) -> Result<()> {
-                    let raw = unsafe {
-                        std::slice::from_raw_parts(
-                            values.as_ptr() as *const u8,
-                            std::mem::size_of::<$ty>() * values.len(),
-                        )
-                    };
-                    writer.write_all(raw)?;
-
-                    Ok(())
-                }
-
-                #[inline]
-                fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize) {
-                    decoder.data.replace(data);
-                    decoder.start = 0;
-                    decoder.num_values = num_values;
-                }
-
-                #[inline]
-                fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
-                    let data = decoder.data.as_ref().expect("set_data should have been called");
-                    let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-                    let bytes_left = data.len() - decoder.start;
-                    let bytes_to_decode = std::mem::size_of::<Self>() * num_values;
-
-                    if bytes_left < bytes_to_decode {
-                        return Err(eof_err!("Not enough bytes to decode"));
-                    }
-
-                    // SAFETY: Raw types should be as per the standard rust bit-vectors
-                    unsafe {
-                        let raw_buffer = &mut Self::slice_as_bytes_mut(buffer)[..bytes_to_decode];
-                        raw_buffer.copy_from_slice(data.range(decoder.start, bytes_to_decode).as_ref());
-                    };
-                    decoder.start += bytes_to_decode;
-                    decoder.num_values -= num_values;
-
-                    Ok(num_values)
-                }
-
-                #[inline]
-                fn as_i64(&$self) -> Result<i64> {
-                    $as_i64
-                }
-
-                #[inline]
-                fn as_any(&self) -> &dyn std::any::Any {
-                    self
-                }
-
-                #[inline]
-                fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-                    self
-                }
-            }
-        }
-    }
-
-    impl_from_raw!(i32, self => { Ok(*self as i64) });
-    impl_from_raw!(i64, self => { Ok(*self) });
-    impl_from_raw!(f32, self => { Err(general_err!("Type cannot be converted to i64")) });
-    impl_from_raw!(f64, self => { Err(general_err!("Type cannot be converted to i64")) });
-
-    impl ParquetValueType for super::Int96 {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            _: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                let raw = unsafe {
-                    std::slice::from_raw_parts(
-                        value.data() as *const [u32] as *const u8,
-                        12,
-                    )
-                };
-                writer.write_all(raw)?;
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.data.replace(data);
-            decoder.start = 0;
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            // TODO - Remove the duplication between this and the general slice method
-            let data = decoder
-                .data
-                .as_ref()
-                .expect("set_data should have been called");
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            let bytes_left = data.len() - decoder.start;
-            let bytes_to_decode = 12 * num_values;
-
-            if bytes_left < bytes_to_decode {
-                return Err(eof_err!("Not enough bytes to decode"));
-            }
-
-            let data_range = data.range(decoder.start, bytes_to_decode);
-            let bytes: &[u8] = data_range.data();
-            decoder.start += bytes_to_decode;
-
-            let mut pos = 0; // position in byte array
-            for i in 0..num_values {
-                let elem0 = byteorder::LittleEndian::read_u32(&bytes[pos..pos + 4]);
-                let elem1 = byteorder::LittleEndian::read_u32(&bytes[pos + 4..pos + 8]);
-                let elem2 = byteorder::LittleEndian::read_u32(&bytes[pos + 8..pos + 12]);
-
-                buffer[i]
-                    .as_mut_any()
-                    .downcast_mut::<Self>()
-                    .unwrap()
-                    .set_data(elem0, elem1, elem2);
-
-                pos += 12;
-            }
-            decoder.num_values -= num_values;
-
-            Ok(num_values)
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-
-    // TODO - Why does macro importing fail?
-    /// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
-    /// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
-    /// compile.
-    /// This is copied and modified from byteorder crate.
-    macro_rules! read_num_bytes {
-        ($ty:ty, $size:expr, $src:expr) => {{
-            assert!($size <= $src.len());
-            let mut buffer =
-                <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
-            buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
-            <$ty>::from_ne_bytes(buffer)
-        }};
-    }
-
-    impl ParquetValueType for super::ByteArray {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            _: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                let len: u32 = value.len().try_into().unwrap();
-                writer.write_all(&len.to_ne_bytes())?;
-                let raw = value.data();
-                writer.write_all(raw)?;
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.data.replace(data);
-            decoder.start = 0;
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            let data = decoder
-                .data
-                .as_mut()
-                .expect("set_data should have been called");
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            for i in 0..num_values {
-                let len: usize =
-                    read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
-                        as usize;
-                decoder.start += std::mem::size_of::<u32>();
-
-                if data.len() < decoder.start + len {
-                    return Err(eof_err!("Not enough bytes to decode"));
-                }
-
-                let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
-
-                val.set_data(data.range(decoder.start, len));
-                decoder.start += len;
-            }
-            decoder.num_values -= num_values;
-
-            Ok(num_values)
-        }
-
-        #[inline]
-        fn dict_encoding_size(&self) -> (usize, usize) {
-            (std::mem::size_of::<u32>(), self.len())
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-
-    impl ParquetValueType for super::FixedLenByteArray {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            _: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                let raw = value.data();
-                writer.write_all(raw)?;
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.data.replace(data);
-            decoder.start = 0;
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            assert!(decoder.type_length > 0);
-
-            let data = decoder
-                .data
-                .as_mut()
-                .expect("set_data should have been called");
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            for i in 0..num_values {
-                let len = decoder.type_length as usize;
-
-                if data.len() < decoder.start + len {
-                    return Err(eof_err!("Not enough bytes to decode"));
-                }
-
-                let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
-
-                val.set_data(data.range(decoder.start, len));
-                decoder.start += len;
-            }
-            decoder.num_values -= num_values;
-
-            Ok(num_values)
-        }
-
-        #[inline]
-        fn dict_encoding_size(&self) -> (usize, usize) {
-            (std::mem::size_of::<u32>(), self.len())
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-}
-
-/// Contains the Parquet physical type information as well as the Rust primitive type
-/// presentation.
-pub trait DataType: 'static {
-    type T: private::ParquetValueType;
-
-    /// Returns Parquet physical type.
-    fn get_physical_type() -> Type;
-
-    /// Returns size in bytes for Rust representation of the physical type.
-    fn get_type_size() -> usize;
-
-    fn get_column_reader(column_writer: ColumnReader) -> Option<ColumnReaderImpl<Self>>
-    where
-        Self: Sized;
-
-    fn get_column_writer(column_writer: ColumnWriter) -> Option<ColumnWriterImpl<Self>>
-    where
-        Self: Sized;
-
-    fn get_column_writer_ref(
-        column_writer: &ColumnWriter,
-    ) -> Option<&ColumnWriterImpl<Self>>
-    where
-        Self: Sized;
-
-    fn get_column_writer_mut(
-        column_writer: &mut ColumnWriter,
-    ) -> Option<&mut ColumnWriterImpl<Self>>
-    where
-        Self: Sized;
-}
-
-// Workaround bug in specialization
-pub trait SliceAsBytesDataType: DataType
-where
-    Self::T: SliceAsBytes,
-{
-}
-
-impl<T> SliceAsBytesDataType for T
-where
-    T: DataType,
-    <T as DataType>::T: SliceAsBytes,
-{
-}
-
-macro_rules! make_type {
-    ($name:ident, $physical_ty:path, $reader_ident: ident, $writer_ident: ident, $native_ty:ty, $size:expr) => {
-        #[derive(Clone)]
-        pub struct $name {}
-
-        impl DataType for $name {
-            type T = $native_ty;
-
-            fn get_physical_type() -> Type {
-                $physical_ty
-            }
-
-            fn get_type_size() -> usize {
-                $size
-            }
-
-            fn get_column_reader(
-                column_writer: ColumnReader,
-            ) -> Option<ColumnReaderImpl<Self>> {
-                match column_writer {
-                    ColumnReader::$reader_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-
-            fn get_column_writer(
-                column_writer: ColumnWriter,
-            ) -> Option<ColumnWriterImpl<Self>> {
-                match column_writer {
-                    ColumnWriter::$writer_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-
-            fn get_column_writer_ref(
-                column_writer: &ColumnWriter,
-            ) -> Option<&ColumnWriterImpl<Self>> {
-                match column_writer {
-                    ColumnWriter::$writer_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-
-            fn get_column_writer_mut(
-                column_writer: &mut ColumnWriter,
-            ) -> Option<&mut ColumnWriterImpl<Self>> {
-                match column_writer {
-                    ColumnWriter::$writer_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-        }
-    };
-}
-
-// Generate struct definitions for all physical types
-
-make_type!(
-    BoolType,
-    Type::BOOLEAN,
-    BoolColumnReader,
-    BoolColumnWriter,
-    bool,
-    1
-);
-make_type!(
-    Int32Type,
-    Type::INT32,
-    Int32ColumnReader,
-    Int32ColumnWriter,
-    i32,
-    4
-);
-make_type!(
-    Int64Type,
-    Type::INT64,
-    Int64ColumnReader,
-    Int64ColumnWriter,
-    i64,
-    8
-);
-make_type!(
-    Int96Type,
-    Type::INT96,
-    Int96ColumnReader,
-    Int96ColumnWriter,
-    Int96,
-    mem::size_of::<Int96>()
-);
-make_type!(
-    FloatType,
-    Type::FLOAT,
-    FloatColumnReader,
-    FloatColumnWriter,
-    f32,
-    4
-);
-make_type!(
-    DoubleType,
-    Type::DOUBLE,
-    DoubleColumnReader,
-    DoubleColumnWriter,
-    f64,
-    8
-);
-make_type!(
-    ByteArrayType,
-    Type::BYTE_ARRAY,
-    ByteArrayColumnReader,
-    ByteArrayColumnWriter,
-    ByteArray,
-    mem::size_of::<ByteArray>()
-);
-make_type!(
-    FixedLenByteArrayType,
-    Type::FIXED_LEN_BYTE_ARRAY,
-    FixedLenByteArrayColumnReader,
-    FixedLenByteArrayColumnWriter,
-    FixedLenByteArray,
-    mem::size_of::<FixedLenByteArray>()
-);
-
-impl FromBytes for Int96 {
-    type Buffer = [u8; 12];
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unimplemented!()
-    }
-    fn from_be_bytes(_bs: Self::Buffer) -> Self {
-        unimplemented!()
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        let mut i = Int96::new();
-        i.set_data(
-            from_ne_slice(&bs[0..4]),
-            from_ne_slice(&bs[4..8]),
-            from_ne_slice(&bs[8..12]),
-        );
-        i
-    }
-}
-
-// FIXME Needed to satisfy the constraint of many decoding functions but ByteArray does not
-// appear to actual be converted directly from bytes
-impl FromBytes for ByteArray {
-    type Buffer = [u8; 8];
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_be_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        ByteArray::from(bs.to_vec())
-    }
-}
-
-impl FromBytes for FixedLenByteArray {
-    type Buffer = [u8; 8];
-
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_be_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        Self(ByteArray::from(bs.to_vec()))
-    }
-}
-
-/// Macro to reduce repetition in making type assertions on the physical type against `T`
-macro_rules! ensure_phys_ty {
-    ($($ty: pat)|+ , $err: literal) => {
-        match T::get_physical_type() {
-            $($ty => (),)*
-            _ => panic!($err),
-        };
-    }
-}
-
-#[cfg(test)]
-#[allow(clippy::float_cmp, clippy::approx_constant)]
-mod tests {
-    use super::*;
-
-    #[test]
-    #[allow(clippy::string_lit_as_bytes)]
-    fn test_as_bytes() {
-        assert_eq!(false.as_bytes(), &[0]);
-        assert_eq!(true.as_bytes(), &[1]);
-        assert_eq!(7_i32.as_bytes(), &[7, 0, 0, 0]);
-        assert_eq!(555_i32.as_bytes(), &[43, 2, 0, 0]);
-        assert_eq!(555_u32.as_bytes(), &[43, 2, 0, 0]);
-        assert_eq!(i32::max_value().as_bytes(), &[255, 255, 255, 127]);
-        assert_eq!(i32::min_value().as_bytes(), &[0, 0, 0, 128]);
-        assert_eq!(7_i64.as_bytes(), &[7, 0, 0, 0, 0, 0, 0, 0]);
-        assert_eq!(555_i64.as_bytes(), &[43, 2, 0, 0, 0, 0, 0, 0]);
-        assert_eq!(
-            (i64::max_value()).as_bytes(),
-            &[255, 255, 255, 255, 255, 255, 255, 127]
-        );
-        assert_eq!((i64::min_value()).as_bytes(), &[0, 0, 0, 0, 0, 0, 0, 128]);
-        assert_eq!(3.14_f32.as_bytes(), &[195, 245, 72, 64]);
-        assert_eq!(3.14_f64.as_bytes(), &[31, 133, 235, 81, 184, 30, 9, 64]);
-        assert_eq!("hello".as_bytes(), &[b'h', b'e', b'l', b'l', b'o']);
-        assert_eq!(
-            Vec::from("hello".as_bytes()).as_bytes(),
-            &[b'h', b'e', b'l', b'l', b'o']
-        );
-
-        // Test Int96
-        let i96 = Int96::from(vec![1, 2, 3]);
-        assert_eq!(i96.as_bytes(), &[1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0]);
-
-        // Test ByteArray
-        let ba = ByteArray::from(vec![1, 2, 3]);
-        assert_eq!(ba.as_bytes(), &[1, 2, 3]);
-
-        // Test Decimal
-        let decimal = Decimal::from_i32(123, 5, 2);
-        assert_eq!(decimal.as_bytes(), &[0, 0, 0, 123]);
-        let decimal = Decimal::from_i64(123, 5, 2);
-        assert_eq!(decimal.as_bytes(), &[0, 0, 0, 0, 0, 0, 0, 123]);
-        let decimal = Decimal::from_bytes(ByteArray::from(vec![1, 2, 3]), 5, 2);
-        assert_eq!(decimal.as_bytes(), &[1, 2, 3]);
-    }
-
-    #[test]
-    fn test_int96_from() {
-        assert_eq!(
-            Int96::from(vec![1, 12345, 1234567890]).data(),
-            &[1, 12345, 1234567890]
-        );
-    }
-
-    #[test]
-    fn test_byte_array_from() {
-        assert_eq!(
-            ByteArray::from(vec![b'A', b'B', b'C']).data(),
-            &[b'A', b'B', b'C']
-        );
-        assert_eq!(ByteArray::from("ABC").data(), &[b'A', b'B', b'C']);
-        assert_eq!(
-            ByteArray::from(ByteBufferPtr::new(vec![1u8, 2u8, 3u8, 4u8, 5u8])).data(),
-            &[1u8, 2u8, 3u8, 4u8, 5u8]
-        );
-        let mut buf = ByteBuffer::new();
-        buf.set_data(vec![6u8, 7u8, 8u8, 9u8, 10u8]);
-        assert_eq!(ByteArray::from(buf).data(), &[6u8, 7u8, 8u8, 9u8, 10u8]);
-    }
-
-    #[test]
-    fn test_decimal_partial_eq() {
-        assert_eq!(Decimal::default(), Decimal::from_i32(0, 0, 0));
-        assert_eq!(Decimal::from_i32(222, 5, 2), Decimal::from_i32(222, 5, 2));
-        assert_eq!(
-            Decimal::from_bytes(ByteArray::from(vec![0, 0, 0, 3]), 5, 2),
-            Decimal::from_i32(3, 5, 2)
-        );
-
-        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(111, 5, 2));
-        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(222, 6, 2));
-        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(222, 5, 3));
-
-        assert!(Decimal::from_i64(222, 5, 2) != Decimal::from_i32(222, 5, 2));
-    }
-
-    #[test]
-    fn test_byte_array_ord() {
-        let ba1 = ByteArray::from(vec![1, 2, 3]);
-        let ba11 = ByteArray::from(vec![1, 2, 3]);
-        let ba2 = ByteArray::from(vec![3, 4]);
-        let ba3 = ByteArray::from(vec![1, 2, 4]);
-        let ba4 = ByteArray::from(vec![]);
-        let ba5 = ByteArray::from(vec![2, 2, 3]);
-
-        assert!(ba1 > ba2);
-        assert!(ba3 > ba1);
-        assert!(ba1 > ba4);
-        assert_eq!(ba1, ba11);
-        assert!(ba5 > ba1);
-    }
-}
diff --git a/rust/parquet/src/encodings/decoding.rs b/rust/parquet/src/encodings/decoding.rs
deleted file mode 100644
index b73ebf0285c..00000000000
--- a/rust/parquet/src/encodings/decoding.rs
+++ /dev/null
@@ -1,1387 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains all supported decoders for Parquet.
-
-use std::{cmp, marker::PhantomData, mem};
-
-use super::rle::RleDecoder;
-
-use crate::basic::*;
-use crate::data_type::private::*;
-use crate::data_type::*;
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::{
-    bit_util::{self, BitReader, FromBytes},
-    memory::{ByteBuffer, ByteBufferPtr},
-};
-
-// ----------------------------------------------------------------------
-// Decoders
-
-/// A Parquet decoder for the data type `T`.
-pub trait Decoder<T: DataType> {
-    /// Sets the data to decode to be `data`, which should contain `num_values` of values
-    /// to decode.
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()>;
-
-    /// Consumes values from this decoder and write the results to `buffer`. This will try
-    /// to fill up `buffer`.
-    ///
-    /// Returns the actual number of values decoded, which should be equal to
-    /// `buffer.len()` unless the remaining number of values is less than
-    /// `buffer.len()`.
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize>;
-
-    /// Consume values from this decoder and write the results to `buffer`, leaving
-    /// "spaces" for null values.
-    ///
-    /// `null_count` is the number of nulls we expect to see in `buffer`, after reading.
-    /// `valid_bits` stores the valid bit for each value in the buffer. It should contain
-    ///   at least number of bits that equal to `buffer.len()`.
-    ///
-    /// Returns the actual number of values decoded.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `null_count` is greater than `buffer.len()`.
-    fn get_spaced(
-        &mut self,
-        buffer: &mut [T::T],
-        null_count: usize,
-        valid_bits: &[u8],
-    ) -> Result<usize> {
-        assert!(buffer.len() >= null_count);
-
-        // TODO: check validity of the input arguments?
-        if null_count == 0 {
-            return self.get(buffer);
-        }
-
-        let num_values = buffer.len();
-        let values_to_read = num_values - null_count;
-        let values_read = self.get(buffer)?;
-        if values_read != values_to_read {
-            return Err(general_err!(
-                "Number of values read: {}, doesn't match expected: {}",
-                values_read,
-                values_to_read
-            ));
-        }
-        let mut values_to_move = values_read;
-        for i in (0..num_values).rev() {
-            if bit_util::get_bit(valid_bits, i) {
-                values_to_move -= 1;
-                buffer.swap(i, values_to_move);
-            }
-        }
-
-        Ok(num_values)
-    }
-
-    /// Returns the number of values left in this decoder stream.
-    fn values_left(&self) -> usize;
-
-    /// Returns the encoding for this decoder.
-    fn encoding(&self) -> Encoding;
-}
-
-/// Gets a decoder for the column descriptor `descr` and encoding type `encoding`.
-///
-/// NOTE: the primitive type in `descr` MUST match the data type `T`, otherwise
-/// disastrous consequence could occur.
-pub fn get_decoder<T: DataType>(
-    descr: ColumnDescPtr,
-    encoding: Encoding,
-) -> Result<Box<dyn Decoder<T>>> {
-    let decoder: Box<dyn Decoder<T>> = match encoding {
-        Encoding::PLAIN => Box::new(PlainDecoder::new(descr.type_length())),
-        Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
-            return Err(general_err!(
-                "Cannot initialize this encoding through this function"
-            ));
-        }
-        Encoding::RLE => Box::new(RleValueDecoder::new()),
-        Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackDecoder::new()),
-        Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayDecoder::new()),
-        Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayDecoder::new()),
-        e => return Err(nyi_err!("Encoding {} is not supported", e)),
-    };
-    Ok(decoder)
-}
-
-// ----------------------------------------------------------------------
-// PLAIN Decoding
-
-#[derive(Default)]
-pub struct PlainDecoderDetails {
-    // The remaining number of values in the byte array
-    pub(crate) num_values: usize,
-
-    // The current starting index in the byte array. Not used when `T` is bool.
-    pub(crate) start: usize,
-
-    // The length for the type `T`. Only used when `T` is `FixedLenByteArrayType`
-    pub(crate) type_length: i32,
-
-    // The byte array to decode from. Not set if `T` is bool.
-    pub(crate) data: Option<ByteBufferPtr>,
-
-    // Read `data` bit by bit. Only set if `T` is bool.
-    pub(crate) bit_reader: Option<BitReader>,
-}
-
-/// Plain decoding that supports all types.
-/// Values are encoded back to back. For native types, data is encoded as little endian.
-/// Floating point types are encoded in IEEE.
-/// See [`PlainEncoder`](crate::encoding::PlainEncoder) for more information.
-pub struct PlainDecoder<T: DataType> {
-    // The binary details needed for decoding
-    inner: PlainDecoderDetails,
-
-    // To allow `T` in the generic parameter for this struct. This doesn't take any
-    // space.
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> PlainDecoder<T> {
-    /// Creates new plain decoder.
-    pub fn new(type_length: i32) -> Self {
-        PlainDecoder {
-            inner: PlainDecoderDetails {
-                type_length,
-                num_values: 0,
-                start: 0,
-                data: None,
-                bit_reader: None,
-            },
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Decoder<T> for PlainDecoder<T> {
-    #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        T::T::set_data(&mut self.inner, data, num_values);
-        Ok(())
-    }
-
-    #[inline]
-    fn values_left(&self) -> usize {
-        self.inner.num_values
-    }
-
-    #[inline]
-    fn encoding(&self) -> Encoding {
-        Encoding::PLAIN
-    }
-
-    #[inline]
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        T::T::decode(buffer, &mut self.inner)
-    }
-}
-
-// ----------------------------------------------------------------------
-// RLE_DICTIONARY/PLAIN_DICTIONARY Decoding
-
-/// Dictionary decoder.
-/// The dictionary encoding builds a dictionary of values encountered in a given column.
-/// The dictionary is be stored in a dictionary page per column chunk.
-/// See [`DictEncoder`](crate::encoding::DictEncoder) for more information.
-pub struct DictDecoder<T: DataType> {
-    // The dictionary, which maps ids to the values
-    dictionary: Vec<T::T>,
-
-    // Whether `dictionary` has been initialized
-    has_dictionary: bool,
-
-    // The decoder for the value ids
-    rle_decoder: Option<RleDecoder>,
-
-    // Number of values left in the data stream
-    num_values: usize,
-}
-
-impl<T: DataType> DictDecoder<T> {
-    /// Creates new dictionary decoder.
-    pub fn new() -> Self {
-        Self {
-            dictionary: vec![],
-            has_dictionary: false,
-            rle_decoder: None,
-            num_values: 0,
-        }
-    }
-
-    /// Decodes and sets values for dictionary using `decoder` decoder.
-    pub fn set_dict(&mut self, mut decoder: Box<dyn Decoder<T>>) -> Result<()> {
-        let num_values = decoder.values_left();
-        self.dictionary.resize(num_values, T::T::default());
-        let _ = decoder.get(&mut self.dictionary)?;
-        self.has_dictionary = true;
-        Ok(())
-    }
-}
-
-impl<T: DataType> Decoder<T> for DictDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        // First byte in `data` is bit width
-        let bit_width = data.as_ref()[0];
-        let mut rle_decoder = RleDecoder::new(bit_width);
-        rle_decoder.set_data(data.start_from(1));
-        self.num_values = num_values;
-        self.rle_decoder = Some(rle_decoder);
-        Ok(())
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        assert!(self.rle_decoder.is_some());
-        assert!(self.has_dictionary, "Must call set_dict() first!");
-
-        let rle = self.rle_decoder.as_mut().unwrap();
-        let num_values = cmp::min(buffer.len(), self.num_values);
-        rle.get_batch_with_dict(&self.dictionary[..], buffer, num_values)
-    }
-
-    /// Number of values left in this decoder stream
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::RLE_DICTIONARY
-    }
-}
-
-// ----------------------------------------------------------------------
-// RLE Decoding
-
-/// RLE/Bit-Packing hybrid decoding for values.
-/// Currently is used only for data pages v2 and supports boolean types.
-/// See [`RleValueEncoder`](crate::encoding::RleValueEncoder) for more information.
-pub struct RleValueDecoder<T: DataType> {
-    values_left: usize,
-    decoder: RleDecoder,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> RleValueDecoder<T> {
-    pub fn new() -> Self {
-        Self {
-            values_left: 0,
-            decoder: RleDecoder::new(1),
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
-    #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        // Only support RLE value reader for boolean values with bit width of 1.
-        ensure_phys_ty!(Type::BOOLEAN, "RleValueDecoder only supports BoolType");
-
-        // We still need to remove prefix of i32 from the stream.
-        const I32_SIZE: usize = mem::size_of::<i32>();
-        let data_size = read_num_bytes!(i32, I32_SIZE, data.as_ref()) as usize;
-        self.decoder = RleDecoder::new(1);
-        self.decoder.set_data(data.range(I32_SIZE, data_size));
-        self.values_left = num_values;
-        Ok(())
-    }
-
-    #[inline]
-    fn values_left(&self) -> usize {
-        self.values_left
-    }
-
-    #[inline]
-    fn encoding(&self) -> Encoding {
-        Encoding::RLE
-    }
-
-    #[inline]
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        let num_values = cmp::min(buffer.len(), self.values_left);
-        let values_read = self.decoder.get_batch(&mut buffer[..num_values])?;
-        self.values_left -= values_read;
-        Ok(values_read)
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BINARY_PACKED Decoding
-
-/// Delta binary packed decoder.
-/// Supports INT32 and INT64 types.
-/// See [`DeltaBitPackEncoder`](crate::encoding::DeltaBitPackEncoder) for more
-/// information.
-pub struct DeltaBitPackDecoder<T: DataType> {
-    bit_reader: BitReader,
-    initialized: bool,
-
-    // Header info
-    num_values: usize,
-    num_mini_blocks: i64,
-    values_per_mini_block: usize,
-    values_current_mini_block: usize,
-    first_value: i64,
-    first_value_read: bool,
-
-    // Per block info
-    min_delta: i64,
-    mini_block_idx: usize,
-    delta_bit_width: u8,
-    delta_bit_widths: ByteBuffer,
-    deltas_in_mini_block: Vec<T::T>, // eagerly loaded deltas for a mini block
-    use_batch: bool,
-
-    current_value: i64,
-
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaBitPackDecoder<T> {
-    /// Creates new delta bit packed decoder.
-    pub fn new() -> Self {
-        Self {
-            bit_reader: BitReader::from(vec![]),
-            initialized: false,
-            num_values: 0,
-            num_mini_blocks: 0,
-            values_per_mini_block: 0,
-            values_current_mini_block: 0,
-            first_value: 0,
-            first_value_read: false,
-            min_delta: 0,
-            mini_block_idx: 0,
-            delta_bit_width: 0,
-            delta_bit_widths: ByteBuffer::new(),
-            deltas_in_mini_block: vec![],
-            use_batch: mem::size_of::<T::T>() == 4,
-            current_value: 0,
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Returns underlying bit reader offset.
-    pub fn get_offset(&self) -> usize {
-        assert!(self.initialized, "Bit reader is not initialized");
-        self.bit_reader.get_byte_offset()
-    }
-
-    /// Initializes new mini block.
-    #[inline]
-    fn init_block(&mut self) -> Result<()> {
-        self.min_delta = self
-            .bit_reader
-            .get_zigzag_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'min_delta'"))?;
-
-        let mut widths = vec![];
-        for _ in 0..self.num_mini_blocks {
-            let w = self
-                .bit_reader
-                .get_aligned::<u8>(1)
-                .ok_or_else(|| eof_err!("Not enough data to decode 'width'"))?;
-            widths.push(w);
-        }
-
-        self.delta_bit_widths.set_data(widths);
-        self.mini_block_idx = 0;
-        self.delta_bit_width = self.delta_bit_widths.data()[0];
-        self.values_current_mini_block = self.values_per_mini_block;
-        Ok(())
-    }
-
-    /// Loads delta into mini block.
-    #[inline]
-    fn load_deltas_in_mini_block(&mut self) -> Result<()>
-    where
-        T::T: FromBytes,
-    {
-        self.deltas_in_mini_block.clear();
-        if self.use_batch {
-            self.deltas_in_mini_block
-                .resize(self.values_current_mini_block, T::T::default());
-            let loaded = self.bit_reader.get_batch::<T::T>(
-                &mut self.deltas_in_mini_block[..],
-                self.delta_bit_width as usize,
-            );
-            assert!(loaded == self.values_current_mini_block);
-        } else {
-            for _ in 0..self.values_current_mini_block {
-                // TODO: load one batch at a time similar to int32
-                let delta = self
-                    .bit_reader
-                    .get_value::<T::T>(self.delta_bit_width as usize)
-                    .ok_or_else(|| eof_err!("Not enough data to decode 'delta'"))?;
-                self.deltas_in_mini_block.push(delta);
-            }
-        }
-
-        Ok(())
-    }
-}
-
-impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T> {
-    // # of total values is derived from encoding
-    #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, _index: usize) -> Result<()> {
-        self.bit_reader = BitReader::new(data);
-        self.initialized = true;
-
-        let block_size = self
-            .bit_reader
-            .get_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'block_size'"))?;
-        self.num_mini_blocks = self
-            .bit_reader
-            .get_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'num_mini_blocks'"))?;
-        self.num_values = self
-            .bit_reader
-            .get_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'num_values'"))?
-            as usize;
-        self.first_value = self
-            .bit_reader
-            .get_zigzag_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'first_value'"))?;
-
-        // Reset decoding state
-        self.first_value_read = false;
-        self.mini_block_idx = 0;
-        self.delta_bit_widths.clear();
-        self.values_current_mini_block = 0;
-
-        self.values_per_mini_block = (block_size / self.num_mini_blocks) as usize;
-        assert!(self.values_per_mini_block % 8 == 0);
-
-        Ok(())
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        assert!(self.initialized, "Bit reader is not initialized");
-
-        let num_values = cmp::min(buffer.len(), self.num_values);
-        for i in 0..num_values {
-            if !self.first_value_read {
-                self.set_decoded_value(buffer, i, self.first_value);
-                self.current_value = self.first_value;
-                self.first_value_read = true;
-                continue;
-            }
-
-            if self.values_current_mini_block == 0 {
-                self.mini_block_idx += 1;
-                if self.mini_block_idx < self.delta_bit_widths.size() {
-                    self.delta_bit_width =
-                        self.delta_bit_widths.data()[self.mini_block_idx];
-                    self.values_current_mini_block = self.values_per_mini_block;
-                } else {
-                    self.init_block()?;
-                }
-                self.load_deltas_in_mini_block()?;
-            }
-
-            // we decrement values in current mini block, so we need to invert index for
-            // delta
-            let delta = self.get_delta(
-                self.deltas_in_mini_block.len() - self.values_current_mini_block,
-            );
-            // It is OK for deltas to contain "overflowed" values after encoding,
-            // e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and
-            // restore original value.
-            self.current_value = self.current_value.wrapping_add(self.min_delta);
-            self.current_value = self.current_value.wrapping_add(delta as i64);
-            self.set_decoded_value(buffer, i, self.current_value);
-            self.values_current_mini_block -= 1;
-        }
-
-        self.num_values -= num_values;
-        Ok(num_values)
-    }
-
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BINARY_PACKED
-    }
-}
-
-/// Helper trait to define specific conversions when decoding values
-trait DeltaBitPackDecoderConversion<T: DataType> {
-    /// Sets decoded value based on type `T`.
-    fn get_delta(&self, index: usize) -> i64;
-
-    fn set_decoded_value(&self, buffer: &mut [T::T], index: usize, value: i64);
-}
-
-impl<T: DataType> DeltaBitPackDecoderConversion<T> for DeltaBitPackDecoder<T> {
-    #[inline]
-    fn get_delta(&self, index: usize) -> i64 {
-        ensure_phys_ty!(
-            Type::INT32 | Type::INT64,
-            "DeltaBitPackDecoder only supports Int32Type and Int64Type"
-        );
-        self.deltas_in_mini_block[index].as_i64().unwrap()
-    }
-
-    #[inline]
-    fn set_decoded_value(&self, buffer: &mut [T::T], index: usize, value: i64) {
-        match T::get_physical_type() {
-            Type::INT32 => {
-                let val = buffer[index].as_mut_any().downcast_mut::<i32>().unwrap();
-
-                *val = value as i32;
-            }
-            Type::INT64 => {
-                let val = buffer[index].as_mut_any().downcast_mut::<i64>().unwrap();
-
-                *val = value;
-            }
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
-        };
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY Decoding
-
-/// Delta length byte array decoder.
-/// Only applied to byte arrays to separate the length values and the data, the lengths
-/// are encoded using DELTA_BINARY_PACKED encoding.
-/// See [`DeltaLengthByteArrayEncoder`](crate::encoding::DeltaLengthByteArrayEncoder)
-/// for more information.
-pub struct DeltaLengthByteArrayDecoder<T: DataType> {
-    // Lengths for each byte array in `data`
-    // TODO: add memory tracker to this
-    lengths: Vec<i32>,
-
-    // Current index into `lengths`
-    current_idx: usize,
-
-    // Concatenated byte array data
-    data: Option<ByteBufferPtr>,
-
-    // Offset into `data`, always point to the beginning of next byte array.
-    offset: usize,
-
-    // Number of values left in this decoder stream
-    num_values: usize,
-
-    // Placeholder to allow `T` as generic parameter
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaLengthByteArrayDecoder<T> {
-    /// Creates new delta length byte array decoder.
-    pub fn new() -> Self {
-        Self {
-            lengths: vec![],
-            current_idx: 0,
-            data: None,
-            offset: 0,
-            num_values: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Decoder<T> for DeltaLengthByteArrayDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY => {
-                let mut len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
-                len_decoder.set_data(data.all(), num_values)?;
-                let num_lengths = len_decoder.values_left();
-                self.lengths.resize(num_lengths, 0);
-                len_decoder.get(&mut self.lengths[..])?;
-
-                self.data = Some(data.start_from(len_decoder.get_offset()));
-                self.offset = 0;
-                self.current_idx = 0;
-                self.num_values = num_lengths;
-                Ok(())
-            }
-            _ => Err(general_err!(
-                "DeltaLengthByteArrayDecoder only support ByteArrayType"
-            )),
-        }
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY => {
-                assert!(self.data.is_some());
-
-                let data = self.data.as_ref().unwrap();
-                let num_values = cmp::min(buffer.len(), self.num_values);
-                for i in 0..num_values {
-                    let len = self.lengths[self.current_idx] as usize;
-
-                    buffer[i]
-                        .as_mut_any()
-                        .downcast_mut::<ByteArray>()
-                        .unwrap()
-                        .set_data(data.range(self.offset, len));
-
-                    self.offset += len;
-                    self.current_idx += 1;
-                }
-
-                self.num_values -= num_values;
-                Ok(num_values)
-            }
-            _ => Err(general_err!(
-                "DeltaLengthByteArrayDecoder only support ByteArrayType"
-            )),
-        }
-    }
-
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_LENGTH_BYTE_ARRAY
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY Decoding
-
-/// Delta byte array decoder.
-/// Prefix lengths are encoded using `DELTA_BINARY_PACKED` encoding, Suffixes are stored
-/// using `DELTA_LENGTH_BYTE_ARRAY` encoding.
-/// See [`DeltaByteArrayEncoder`](crate::encoding::DeltaByteArrayEncoder) for more
-/// information.
-pub struct DeltaByteArrayDecoder<T: DataType> {
-    // Prefix lengths for each byte array
-    // TODO: add memory tracker to this
-    prefix_lengths: Vec<i32>,
-
-    // The current index into `prefix_lengths`,
-    current_idx: usize,
-
-    // Decoder for all suffixes, the # of which should be the same as
-    // `prefix_lengths.len()`
-    suffix_decoder: Option<DeltaLengthByteArrayDecoder<ByteArrayType>>,
-
-    // The last byte array, used to derive the current prefix
-    previous_value: Vec<u8>,
-
-    // Number of values left
-    num_values: usize,
-
-    // Placeholder to allow `T` as generic parameter
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaByteArrayDecoder<T> {
-    /// Creates new delta byte array decoder.
-    pub fn new() -> Self {
-        Self {
-            prefix_lengths: vec![],
-            current_idx: 0,
-            suffix_decoder: None,
-            previous_value: vec![],
-            num_values: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<'m, T: DataType> Decoder<T> for DeltaByteArrayDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
-                let mut prefix_len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
-                prefix_len_decoder.set_data(data.all(), num_values)?;
-                let num_prefixes = prefix_len_decoder.values_left();
-                self.prefix_lengths.resize(num_prefixes, 0);
-                prefix_len_decoder.get(&mut self.prefix_lengths[..])?;
-
-                let mut suffix_decoder = DeltaLengthByteArrayDecoder::new();
-                suffix_decoder
-                    .set_data(data.start_from(prefix_len_decoder.get_offset()), num_values)?;
-                self.suffix_decoder = Some(suffix_decoder);
-                self.num_values = num_prefixes;
-                self.current_idx = 0;
-                self.previous_value.clear();
-                Ok(())
-            }
-            _ => {
-                Err(general_err!(
-                    "DeltaByteArrayDecoder only supports ByteArrayType and FixedLenByteArrayType"
-                ))
-            }
-        }
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        match T::get_physical_type() {
-            ty @ Type::BYTE_ARRAY | ty @ Type::FIXED_LEN_BYTE_ARRAY => {
-                let num_values = cmp::min(buffer.len(), self.num_values);
-                let mut v: [ByteArray; 1] = [ByteArray::new(); 1];
-                for i in 0..num_values {
-                    // Process suffix
-                    // TODO: this is awkward - maybe we should add a non-vectorized API?
-                    let suffix_decoder = self.suffix_decoder.as_mut().expect("decoder not initialized");
-                    suffix_decoder.get(&mut v[..])?;
-                    let suffix = v[0].data();
-
-                    // Extract current prefix length, can be 0
-                    let prefix_len = self.prefix_lengths[self.current_idx] as usize;
-
-                    // Concatenate prefix with suffix
-                    let mut result = Vec::new();
-                    result.extend_from_slice(&self.previous_value[0..prefix_len]);
-                    result.extend_from_slice(suffix);
-
-                    let data = ByteBufferPtr::new(result.clone());
-
-                    match ty {
-                        Type::BYTE_ARRAY => buffer[i]
-                            .as_mut_any()
-                            .downcast_mut::<ByteArray>()
-                            .unwrap()
-                            .set_data(data),
-                        Type::FIXED_LEN_BYTE_ARRAY => buffer[i]
-                            .as_mut_any()
-                            .downcast_mut::<FixedLenByteArray>()
-                            .unwrap()
-                            .set_data(data),
-                        _ => unreachable!(),
-                    };
-
-                    self.previous_value = result;
-                    self.current_idx += 1;
-                }
-
-                self.num_values -= num_values;
-                Ok(num_values)
-            }
-            _ => {
-                Err(general_err!(
-                    "DeltaByteArrayDecoder only supports ByteArrayType and FixedLenByteArrayType"
-                ))
-            }
-        }
-    }
-
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BYTE_ARRAY
-    }
-}
-
-#[cfg(test)]
-#[allow(clippy::approx_constant)]
-mod tests {
-    use super::{super::encoding::*, *};
-
-    use std::sync::Arc;
-
-    use crate::schema::types::{
-        ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType,
-    };
-    use crate::util::{
-        bit_util::set_array_bit, memory::MemTracker, test_common::RandGen,
-    };
-
-    #[test]
-    fn test_get_decoders() {
-        // supported encodings
-        create_and_check_decoder::<Int32Type>(Encoding::PLAIN, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
-        create_and_check_decoder::<BoolType>(Encoding::RLE, None);
-
-        // error when initializing
-        create_and_check_decoder::<Int32Type>(
-            Encoding::RLE_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-        create_and_check_decoder::<Int32Type>(
-            Encoding::PLAIN_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-
-        // unsupported
-        create_and_check_decoder::<Int32Type>(
-            Encoding::BIT_PACKED,
-            Some(nyi_err!("Encoding BIT_PACKED is not supported")),
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int32() {
-        let data = vec![42, 18, 52];
-        let data_bytes = Int32Type::to_byte_array(&data[..]);
-        let mut buffer = vec![0; 3];
-        test_plain_decode::<Int32Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int32_spaced() {
-        let data = [42, 18, 52];
-        let expected_data = [0, 42, 0, 18, 0, 0, 52, 0];
-        let data_bytes = Int32Type::to_byte_array(&data[..]);
-        let mut buffer = vec![0; 8];
-        let num_nulls = 5;
-        let valid_bits = [0b01001010];
-        test_plain_decode_spaced::<Int32Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            num_nulls,
-            &valid_bits,
-            &expected_data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int64() {
-        let data = vec![42, 18, 52];
-        let data_bytes = Int64Type::to_byte_array(&data[..]);
-        let mut buffer = vec![0; 3];
-        test_plain_decode::<Int64Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_float() {
-        let data = vec![3.14, 2.414, 12.51];
-        let data_bytes = FloatType::to_byte_array(&data[..]);
-        let mut buffer = vec![0.0; 3];
-        test_plain_decode::<FloatType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_double() {
-        let data = vec![3.14f64, 2.414f64, 12.51f64];
-        let data_bytes = DoubleType::to_byte_array(&data[..]);
-        let mut buffer = vec![0.0f64; 3];
-        test_plain_decode::<DoubleType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int96() {
-        let mut data = vec![Int96::new(); 4];
-        data[0].set_data(11, 22, 33);
-        data[1].set_data(44, 55, 66);
-        data[2].set_data(10, 20, 30);
-        data[3].set_data(40, 50, 60);
-        let data_bytes = Int96Type::to_byte_array(&data[..]);
-        let mut buffer = vec![Int96::new(); 4];
-        test_plain_decode::<Int96Type>(
-            ByteBufferPtr::new(data_bytes),
-            4,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_bool() {
-        let data = vec![
-            false, true, false, false, true, false, true, true, false, true,
-        ];
-        let data_bytes = BoolType::to_byte_array(&data[..]);
-        let mut buffer = vec![false; 10];
-        test_plain_decode::<BoolType>(
-            ByteBufferPtr::new(data_bytes),
-            10,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_byte_array() {
-        let mut data = vec![ByteArray::new(); 2];
-        data[0].set_data(ByteBufferPtr::new(String::from("hello").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("parquet").into_bytes()));
-        let data_bytes = ByteArrayType::to_byte_array(&data[..]);
-        let mut buffer = vec![ByteArray::new(); 2];
-        test_plain_decode::<ByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
-            2,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_fixed_len_byte_array() {
-        let mut data = vec![FixedLenByteArray::default(); 3];
-        data[0].set_data(ByteBufferPtr::new(String::from("bird").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("come").into_bytes()));
-        data[2].set_data(ByteBufferPtr::new(String::from("flow").into_bytes()));
-        let data_bytes = FixedLenByteArrayType::to_byte_array(&data[..]);
-        let mut buffer = vec![FixedLenByteArray::default(); 3];
-        test_plain_decode::<FixedLenByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            4,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    fn test_plain_decode<T: DataType>(
-        data: ByteBufferPtr,
-        num_values: usize,
-        type_length: i32,
-        buffer: &mut [T::T],
-        expected: &[T::T],
-    ) {
-        let mut decoder: PlainDecoder<T> = PlainDecoder::new(type_length);
-        let result = decoder.set_data(data, num_values);
-        assert!(result.is_ok());
-        let result = decoder.get(buffer);
-        assert!(result.is_ok());
-        assert_eq!(decoder.values_left(), 0);
-        assert_eq!(buffer, expected);
-    }
-
-    fn test_plain_decode_spaced<T: DataType>(
-        data: ByteBufferPtr,
-        num_values: usize,
-        type_length: i32,
-        buffer: &mut [T::T],
-        num_nulls: usize,
-        valid_bits: &[u8],
-        expected: &[T::T],
-    ) {
-        let mut decoder: PlainDecoder<T> = PlainDecoder::new(type_length);
-        let result = decoder.set_data(data, num_values);
-        assert!(result.is_ok());
-        let result = decoder.get_spaced(buffer, num_nulls, valid_bits);
-        assert!(result.is_ok());
-        assert_eq!(num_values + num_nulls, result.unwrap());
-        assert_eq!(decoder.values_left(), 0);
-        assert_eq!(buffer, expected);
-    }
-
-    #[test]
-    #[should_panic(expected = "RleValueEncoder only supports BoolType")]
-    fn test_rle_value_encode_int32_not_supported() {
-        let mut encoder = RleValueEncoder::<Int32Type>::new();
-        encoder.put(&[1, 2, 3, 4]).unwrap();
-    }
-
-    #[test]
-    #[should_panic(expected = "RleValueDecoder only supports BoolType")]
-    fn test_rle_value_decode_int32_not_supported() {
-        let mut decoder = RleValueDecoder::<Int32Type>::new();
-        decoder
-            .set_data(ByteBufferPtr::new(vec![5, 0, 0, 0]), 1)
-            .unwrap();
-    }
-
-    #[test]
-    fn test_rle_value_decode_bool_decode() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![
-            BoolType::gen_vec(-1, 256),
-            BoolType::gen_vec(-1, 257),
-            BoolType::gen_vec(-1, 126),
-        ];
-        test_rle_value_decode::<BoolType>(data);
-    }
-
-    #[test]
-    #[should_panic(expected = "Bit reader is not initialized")]
-    fn test_delta_bit_packed_not_initialized_offset() {
-        // Fail if set_data() is not called before get_offset()
-        let decoder = DeltaBitPackDecoder::<Int32Type>::new();
-        decoder.get_offset();
-    }
-
-    #[test]
-    #[should_panic(expected = "Bit reader is not initialized")]
-    fn test_delta_bit_packed_not_initialized_get() {
-        // Fail if set_data() is not called before get()
-        let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
-        let mut buffer = vec![];
-        decoder.get(&mut buffer).unwrap();
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_empty() {
-        let data = vec![vec![0; 0]];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_repeat() {
-        let block_data = vec![
-            1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
-            3, 4, 5, 6, 7, 8,
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_uneven() {
-        let block_data = vec![1, -2, 3, -4, 5, 6, 7, 8, 9, 10, 11];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_same_values() {
-        let block_data = vec![
-            127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
-            127,
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-
-        let block_data = vec![
-            -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
-            -127, -127, -127,
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_min_max() {
-        let block_data = vec![
-            i32::min_value(),
-            i32::max_value(),
-            i32::min_value(),
-            i32::max_value(),
-            i32::min_value(),
-            i32::max_value(),
-            i32::min_value(),
-            i32::max_value(),
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_multiple_blocks() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![
-            Int32Type::gen_vec(-1, 64),
-            Int32Type::gen_vec(-1, 128),
-            Int32Type::gen_vec(-1, 64),
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_data_across_blocks() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![Int32Type::gen_vec(-1, 256), Int32Type::gen_vec(-1, 257)];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_with_empty_blocks() {
-        let data = vec![
-            Int32Type::gen_vec(-1, 128),
-            vec![0; 0],
-            Int32Type::gen_vec(-1, 64),
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int64_empty() {
-        let data = vec![vec![0; 0]];
-        test_delta_bit_packed_decode::<Int64Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int64_min_max() {
-        let block_data = vec![
-            i64::min_value(),
-            i64::max_value(),
-            i64::min_value(),
-            i64::max_value(),
-            i64::min_value(),
-            i64::max_value(),
-            i64::min_value(),
-            i64::max_value(),
-        ];
-        test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int64_multiple_blocks() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![
-            Int64Type::gen_vec(-1, 64),
-            Int64Type::gen_vec(-1, 128),
-            Int64Type::gen_vec(-1, 64),
-        ];
-        test_delta_bit_packed_decode::<Int64Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_decoder_sample() {
-        let data_bytes = vec![
-            128, 1, 4, 3, 58, 28, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        ];
-        let buffer = ByteBufferPtr::new(data_bytes);
-        let mut decoder: DeltaBitPackDecoder<Int32Type> = DeltaBitPackDecoder::new();
-        decoder.set_data(buffer, 3).unwrap();
-        // check exact offsets, because when reading partial values we end up with
-        // some data not being read from bit reader
-        assert_eq!(decoder.get_offset(), 5);
-        let mut result = vec![0, 0, 0];
-        decoder.get(&mut result).unwrap();
-        assert_eq!(decoder.get_offset(), 34);
-        assert_eq!(result, vec![29, 43, 89]);
-    }
-
-    #[test]
-    fn test_delta_byte_array_same_arrays() {
-        let data = vec![
-            vec![ByteArray::from(vec![1, 2, 3, 4, 5, 6])],
-            vec![
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-            ],
-            vec![
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-            ],
-        ];
-        test_delta_byte_array_decode(data);
-    }
-
-    #[test]
-    fn test_delta_byte_array_unique_arrays() {
-        let data = vec![
-            vec![ByteArray::from(vec![1])],
-            vec![ByteArray::from(vec![2, 3]), ByteArray::from(vec![4, 5, 6])],
-            vec![
-                ByteArray::from(vec![7, 8]),
-                ByteArray::from(vec![9, 0, 1, 2]),
-            ],
-        ];
-        test_delta_byte_array_decode(data);
-    }
-
-    #[test]
-    fn test_delta_byte_array_single_array() {
-        let data = vec![vec![ByteArray::from(vec![1, 2, 3, 4, 5, 6])]];
-        test_delta_byte_array_decode(data);
-    }
-
-    fn test_rle_value_decode<T: DataType>(data: Vec<Vec<T::T>>) {
-        test_encode_decode::<T>(data, Encoding::RLE);
-    }
-
-    fn test_delta_bit_packed_decode<T: DataType>(data: Vec<Vec<T::T>>) {
-        test_encode_decode::<T>(data, Encoding::DELTA_BINARY_PACKED);
-    }
-
-    fn test_delta_byte_array_decode(data: Vec<Vec<ByteArray>>) {
-        test_encode_decode::<ByteArrayType>(data, Encoding::DELTA_BYTE_ARRAY);
-    }
-
-    // Input data represents vector of data slices to write (test multiple `put()` calls)
-    // For example,
-    //   vec![vec![1, 2, 3]] invokes `put()` once and writes {1, 2, 3}
-    //   vec![vec![1, 2], vec![3]] invokes `put()` twice and writes {1, 2, 3}
-    fn test_encode_decode<T: DataType>(data: Vec<Vec<T::T>>, encoding: Encoding) {
-        // Type length should not really matter for encode/decode test,
-        // otherwise change it based on type
-        let col_descr = create_test_col_desc_ptr(-1, T::get_physical_type());
-
-        // Encode data
-        let mut encoder =
-            get_encoder::<T>(col_descr.clone(), encoding, Arc::new(MemTracker::new()))
-                .expect("get encoder");
-
-        for v in &data[..] {
-            encoder.put(&v[..]).expect("ok to encode");
-        }
-        let bytes = encoder.flush_buffer().expect("ok to flush buffer");
-
-        // Flatten expected data as contiguous array of values
-        let expected: Vec<T::T> = data.iter().flat_map(|s| s.clone()).collect();
-
-        // Decode data and compare with original
-        let mut decoder = get_decoder::<T>(col_descr, encoding).expect("get decoder");
-
-        let mut result = vec![T::T::default(); expected.len()];
-        decoder
-            .set_data(bytes, expected.len())
-            .expect("ok to set data");
-        let mut result_num_values = 0;
-        while decoder.values_left() > 0 {
-            result_num_values += decoder
-                .get(&mut result[result_num_values..])
-                .expect("ok to decode");
-        }
-        assert_eq!(result_num_values, expected.len());
-        assert_eq!(result, expected);
-    }
-
-    fn create_and_check_decoder<T: DataType>(
-        encoding: Encoding,
-        err: Option<ParquetError>,
-    ) {
-        let descr = create_test_col_desc_ptr(-1, T::get_physical_type());
-        let decoder = get_decoder::<T>(descr, encoding);
-        match err {
-            Some(parquet_error) => {
-                assert!(decoder.is_err());
-                assert_eq!(decoder.err().unwrap(), parquet_error);
-            }
-            None => {
-                assert!(decoder.is_ok());
-                assert_eq!(decoder.unwrap().encoding(), encoding);
-            }
-        }
-    }
-
-    // Creates test column descriptor.
-    fn create_test_col_desc_ptr(type_len: i32, t: Type) -> ColumnDescPtr {
-        let ty = SchemaType::primitive_type_builder("t", t)
-            .with_length(type_len)
-            .build()
-            .unwrap();
-        Arc::new(ColumnDescriptor::new(
-            Arc::new(ty),
-            0,
-            0,
-            ColumnPath::new(vec![]),
-        ))
-    }
-
-    fn usize_to_bytes(v: usize) -> [u8; 4] {
-        (v as u32).to_ne_bytes()
-    }
-
-    /// A util trait to convert slices of different types to byte arrays
-    trait ToByteArray<T: DataType> {
-        #[allow(clippy::wrong_self_convention)]
-        fn to_byte_array(data: &[T::T]) -> Vec<u8>;
-    }
-
-    macro_rules! to_byte_array_impl {
-        ($ty: ty) => {
-            impl ToByteArray<$ty> for $ty {
-                fn to_byte_array(data: &[<$ty as DataType>::T]) -> Vec<u8> {
-                    <$ty as DataType>::T::slice_as_bytes(data).to_vec()
-                }
-            }
-        };
-    }
-
-    to_byte_array_impl!(Int32Type);
-    to_byte_array_impl!(Int64Type);
-    to_byte_array_impl!(FloatType);
-    to_byte_array_impl!(DoubleType);
-
-    impl ToByteArray<BoolType> for BoolType {
-        fn to_byte_array(data: &[bool]) -> Vec<u8> {
-            let mut v = vec![];
-            for i in 0..data.len() {
-                if i % 8 == 0 {
-                    v.push(0);
-                }
-                if data[i] {
-                    set_array_bit(&mut v[..], i);
-                }
-            }
-            v
-        }
-    }
-
-    impl ToByteArray<Int96Type> for Int96Type {
-        fn to_byte_array(data: &[Int96]) -> Vec<u8> {
-            let mut v = vec![];
-            for d in data {
-                v.extend_from_slice(d.as_bytes());
-            }
-            v
-        }
-    }
-
-    impl ToByteArray<ByteArrayType> for ByteArrayType {
-        fn to_byte_array(data: &[ByteArray]) -> Vec<u8> {
-            let mut v = vec![];
-            for d in data {
-                let buf = d.data();
-                let len = &usize_to_bytes(buf.len());
-                v.extend_from_slice(len);
-                v.extend(buf);
-            }
-            v
-        }
-    }
-
-    impl ToByteArray<FixedLenByteArrayType> for FixedLenByteArrayType {
-        fn to_byte_array(data: &[FixedLenByteArray]) -> Vec<u8> {
-            let mut v = vec![];
-            for d in data {
-                let buf = d.data();
-                v.extend(buf);
-            }
-            v
-        }
-    }
-}
diff --git a/rust/parquet/src/encodings/encoding.rs b/rust/parquet/src/encodings/encoding.rs
deleted file mode 100644
index d04273817e1..00000000000
--- a/rust/parquet/src/encodings/encoding.rs
+++ /dev/null
@@ -1,1334 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains all supported encoders for Parquet.
-
-use std::{cmp, io::Write, marker::PhantomData};
-
-use crate::basic::*;
-use crate::data_type::private::ParquetValueType;
-use crate::data_type::*;
-use crate::encodings::rle::RleEncoder;
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::{
-    bit_util::{self, log2, num_required_bits, BitWriter},
-    hash_util,
-    memory::{Buffer, ByteBuffer, ByteBufferPtr, MemTrackerPtr},
-};
-
-// ----------------------------------------------------------------------
-// Encoders
-
-/// An Parquet encoder for the data type `T`.
-///
-/// Currently this allocates internal buffers for the encoded values. After done putting
-/// values, caller should call `flush_buffer()` to get an immutable buffer pointer.
-pub trait Encoder<T: DataType> {
-    /// Encodes data from `values`.
-    fn put(&mut self, values: &[T::T]) -> Result<()>;
-
-    /// Encodes data from `values`, which contains spaces for null values, that is
-    /// identified by `valid_bits`.
-    ///
-    /// Returns the number of non-null values encoded.
-    fn put_spaced(&mut self, values: &[T::T], valid_bits: &[u8]) -> Result<usize> {
-        let num_values = values.len();
-        let mut buffer = Vec::with_capacity(num_values);
-        // TODO: this is pretty inefficient. Revisit in future.
-        for i in 0..num_values {
-            if bit_util::get_bit(valid_bits, i) {
-                buffer.push(values[i].clone());
-            }
-        }
-        self.put(&buffer[..])?;
-        Ok(buffer.len())
-    }
-
-    /// Returns the encoding type of this encoder.
-    fn encoding(&self) -> Encoding;
-
-    /// Returns an estimate of the encoded data, in bytes.
-    /// Method call must be O(1).
-    fn estimated_data_encoded_size(&self) -> usize;
-
-    /// Flushes the underlying byte buffer that's being processed by this encoder, and
-    /// return the immutable copy of it. This will also reset the internal state.
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr>;
-}
-
-/// Gets a encoder for the particular data type `T` and encoding `encoding`. Memory usage
-/// for the encoder instance is tracked by `mem_tracker`.
-pub fn get_encoder<T: DataType>(
-    desc: ColumnDescPtr,
-    encoding: Encoding,
-    mem_tracker: MemTrackerPtr,
-) -> Result<Box<dyn Encoder<T>>> {
-    let encoder: Box<dyn Encoder<T>> = match encoding {
-        Encoding::PLAIN => Box::new(PlainEncoder::new(desc, mem_tracker, vec![])),
-        Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
-            return Err(general_err!(
-                "Cannot initialize this encoding through this function"
-            ));
-        }
-        Encoding::RLE => Box::new(RleValueEncoder::new()),
-        Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackEncoder::new()),
-        Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayEncoder::new()),
-        Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayEncoder::new()),
-        e => return Err(nyi_err!("Encoding {} is not supported", e)),
-    };
-    Ok(encoder)
-}
-
-// ----------------------------------------------------------------------
-// Plain encoding
-
-/// Plain encoding that supports all types.
-/// Values are encoded back to back.
-/// The plain encoding is used whenever a more efficient encoding can not be used.
-/// It stores the data in the following format:
-/// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
-/// - INT32 - 4 bytes per value, stored as little-endian.
-/// - INT64 - 8 bytes per value, stored as little-endian.
-/// - FLOAT - 4 bytes per value, stored as IEEE little-endian.
-/// - DOUBLE - 8 bytes per value, stored as IEEE little-endian.
-/// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
-/// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
-pub struct PlainEncoder<T: DataType> {
-    buffer: ByteBuffer,
-    bit_writer: BitWriter,
-    desc: ColumnDescPtr,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> PlainEncoder<T> {
-    /// Creates new plain encoder.
-    pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr, vec: Vec<u8>) -> Self {
-        let mut byte_buffer = ByteBuffer::new().with_mem_tracker(mem_tracker);
-        byte_buffer.set_data(vec);
-        Self {
-            buffer: byte_buffer,
-            bit_writer: BitWriter::new(256),
-            desc,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for PlainEncoder<T> {
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::PLAIN
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.buffer.size() + self.bit_writer.bytes_written()
-    }
-
-    #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        self.buffer.write_all(self.bit_writer.flush_buffer())?;
-        self.buffer.flush()?;
-        self.bit_writer.clear();
-
-        Ok(self.buffer.consume())
-    }
-
-    #[inline]
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        T::T::encode(values, &mut self.buffer, &mut self.bit_writer)?;
-        Ok(())
-    }
-}
-
-// ----------------------------------------------------------------------
-// Dictionary encoding
-
-const INITIAL_HASH_TABLE_SIZE: usize = 1024;
-const MAX_HASH_LOAD: f32 = 0.7;
-const HASH_SLOT_EMPTY: i32 = -1;
-
-/// Dictionary encoder.
-/// The dictionary encoding builds a dictionary of values encountered in a given column.
-/// The dictionary page is written first, before the data pages of the column chunk.
-///
-/// Dictionary page format: the entries in the dictionary - in dictionary order -
-/// using the plain encoding.
-///
-/// Data page format: the bit width used to encode the entry ids stored as 1 byte
-/// (max bit width = 32), followed by the values encoded using RLE/Bit packed described
-/// above (with the given bit width).
-pub struct DictEncoder<T: DataType> {
-    // Descriptor for the column to be encoded.
-    desc: ColumnDescPtr,
-
-    // Size of the table. **Must be** a power of 2.
-    hash_table_size: usize,
-
-    // Store `hash_table_size` - 1, so that `j & mod_bitmask` is equivalent to
-    // `j % hash_table_size`, but uses far fewer CPU cycles.
-    mod_bitmask: u32,
-
-    // Stores indices which map (many-to-one) to the values in the `uniques` array.
-    // Here we are using fix-sized array with linear probing.
-    // A slot with `HASH_SLOT_EMPTY` indicates the slot is not currently occupied.
-    hash_slots: Buffer<i32>,
-
-    // Indices that have not yet be written out by `write_indices()`.
-    buffered_indices: Buffer<i32>,
-
-    // The unique observed values.
-    uniques: Buffer<T::T>,
-
-    // Size in bytes needed to encode this dictionary.
-    uniques_size_in_bytes: usize,
-
-    // Tracking memory usage for the various data structures in this struct.
-    mem_tracker: MemTrackerPtr,
-}
-
-impl<T: DataType> DictEncoder<T> {
-    /// Creates new dictionary encoder.
-    pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr) -> Self {
-        let mut slots = Buffer::new().with_mem_tracker(mem_tracker.clone());
-        slots.resize(INITIAL_HASH_TABLE_SIZE, -1);
-        Self {
-            desc,
-            hash_table_size: INITIAL_HASH_TABLE_SIZE,
-            mod_bitmask: (INITIAL_HASH_TABLE_SIZE - 1) as u32,
-            hash_slots: slots,
-            buffered_indices: Buffer::new().with_mem_tracker(mem_tracker.clone()),
-            uniques: Buffer::new().with_mem_tracker(mem_tracker.clone()),
-            uniques_size_in_bytes: 0,
-            mem_tracker,
-        }
-    }
-
-    /// Returns true if dictionary entries are sorted, false otherwise.
-    #[inline]
-    pub fn is_sorted(&self) -> bool {
-        // Sorting is not supported currently.
-        false
-    }
-
-    /// Returns number of unique values (keys) in the dictionary.
-    pub fn num_entries(&self) -> usize {
-        self.uniques.size()
-    }
-
-    /// Returns size of unique values (keys) in the dictionary, in bytes.
-    pub fn dict_encoded_size(&self) -> usize {
-        self.uniques_size_in_bytes
-    }
-
-    /// Writes out the dictionary values with PLAIN encoding in a byte buffer, and return
-    /// the result.
-    #[inline]
-    pub fn write_dict(&self) -> Result<ByteBufferPtr> {
-        let mut plain_encoder =
-            PlainEncoder::<T>::new(self.desc.clone(), self.mem_tracker.clone(), vec![]);
-        plain_encoder.put(self.uniques.data())?;
-        plain_encoder.flush_buffer()
-    }
-
-    /// Writes out the dictionary values with RLE encoding in a byte buffer, and return
-    /// the result.
-    pub fn write_indices(&mut self) -> Result<ByteBufferPtr> {
-        // TODO: the caller should allocate the buffer
-        let buffer_len = self.estimated_data_encoded_size();
-        let mut buffer: Vec<u8> = vec![0; buffer_len as usize];
-        buffer[0] = self.bit_width() as u8;
-        self.mem_tracker.alloc(buffer.capacity() as i64);
-
-        // Write bit width in the first byte
-        buffer.write_all((self.bit_width() as u8).as_bytes())?;
-        let mut encoder = RleEncoder::new_from_buf(self.bit_width(), buffer, 1);
-        for index in self.buffered_indices.data() {
-            if !encoder.put(*index as u64)? {
-                return Err(general_err!("Encoder doesn't have enough space"));
-            }
-        }
-        self.buffered_indices.clear();
-        Ok(ByteBufferPtr::new(encoder.consume()?))
-    }
-
-    #[inline]
-    #[allow(clippy::unnecessary_wraps)]
-    fn put_one(&mut self, value: &T::T) -> Result<()> {
-        let mut j = (hash_util::hash(value, 0) & self.mod_bitmask) as usize;
-        let mut index = self.hash_slots[j];
-
-        while index != HASH_SLOT_EMPTY && self.uniques[index as usize] != *value {
-            j += 1;
-            if j == self.hash_table_size {
-                j = 0;
-            }
-            index = self.hash_slots[j];
-        }
-
-        if index == HASH_SLOT_EMPTY {
-            index = self.insert_fresh_slot(j, value.clone());
-        }
-
-        self.buffered_indices.push(index);
-        Ok(())
-    }
-
-    #[inline(never)]
-    fn insert_fresh_slot(&mut self, slot: usize, value: T::T) -> i32 {
-        let index = self.uniques.size() as i32;
-        self.hash_slots[slot] = index;
-
-        let (base_size, num_elements) = value.dict_encoding_size();
-
-        let unique_size = match T::get_physical_type() {
-            Type::BYTE_ARRAY => base_size + num_elements,
-            Type::FIXED_LEN_BYTE_ARRAY => self.desc.type_length() as usize,
-            _ => base_size,
-        };
-
-        self.uniques_size_in_bytes += unique_size;
-        self.uniques.push(value);
-
-        if self.uniques.size() > (self.hash_table_size as f32 * MAX_HASH_LOAD) as usize {
-            self.double_table_size();
-        }
-
-        index
-    }
-
-    #[inline]
-    fn bit_width(&self) -> u8 {
-        let num_entries = self.uniques.size();
-        if num_entries == 0 {
-            0
-        } else if num_entries == 1 {
-            1
-        } else {
-            log2(num_entries as u64) as u8
-        }
-    }
-
-    fn double_table_size(&mut self) {
-        let new_size = self.hash_table_size * 2;
-        let mut new_hash_slots = Buffer::new().with_mem_tracker(self.mem_tracker.clone());
-        new_hash_slots.resize(new_size, HASH_SLOT_EMPTY);
-        for i in 0..self.hash_table_size {
-            let index = self.hash_slots[i];
-            if index == HASH_SLOT_EMPTY {
-                continue;
-            }
-            let value = &self.uniques[index as usize];
-            let mut j = (hash_util::hash(value, 0) & ((new_size - 1) as u32)) as usize;
-            let mut slot = new_hash_slots[j];
-            while slot != HASH_SLOT_EMPTY && self.uniques[slot as usize] != *value {
-                j += 1;
-                if j == new_size {
-                    j = 0;
-                }
-                slot = new_hash_slots[j];
-            }
-
-            new_hash_slots[j] = index;
-        }
-
-        self.hash_table_size = new_size;
-        self.mod_bitmask = (new_size - 1) as u32;
-        self.hash_slots = new_hash_slots;
-    }
-}
-
-impl<T: DataType> Encoder<T> for DictEncoder<T> {
-    #[inline]
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        for i in values {
-            self.put_one(&i)?
-        }
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::PLAIN_DICTIONARY
-    }
-
-    #[inline]
-    fn estimated_data_encoded_size(&self) -> usize {
-        let bit_width = self.bit_width();
-        1 + RleEncoder::min_buffer_size(bit_width)
-            + RleEncoder::max_buffer_size(bit_width, self.buffered_indices.size())
-    }
-
-    #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        self.write_indices()
-    }
-}
-
-// ----------------------------------------------------------------------
-// RLE encoding
-
-const DEFAULT_RLE_BUFFER_LEN: usize = 1024;
-
-/// RLE/Bit-Packing hybrid encoding for values.
-/// Currently is used only for data pages v2 and supports boolean types.
-pub struct RleValueEncoder<T: DataType> {
-    // Buffer with raw values that we collect,
-    // when flushing buffer they are encoded using RLE encoder
-    encoder: Option<RleEncoder>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> RleValueEncoder<T> {
-    /// Creates new rle value encoder.
-    pub fn new() -> Self {
-        Self {
-            encoder: None,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for RleValueEncoder<T> {
-    #[inline]
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        ensure_phys_ty!(Type::BOOLEAN, "RleValueEncoder only supports BoolType");
-
-        if self.encoder.is_none() {
-            self.encoder = Some(RleEncoder::new(1, DEFAULT_RLE_BUFFER_LEN));
-        }
-        let rle_encoder = self.encoder.as_mut().unwrap();
-        for value in values {
-            let value = value.as_u64()?;
-            if !rle_encoder.put(value)? {
-                return Err(general_err!("RLE buffer is full"));
-            }
-        }
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::RLE
-    }
-
-    #[inline]
-    fn estimated_data_encoded_size(&self) -> usize {
-        match self.encoder {
-            Some(ref enc) => enc.len(),
-            None => 0,
-        }
-    }
-
-    #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        ensure_phys_ty!(Type::BOOLEAN, "RleValueEncoder only supports BoolType");
-        let rle_encoder = self
-            .encoder
-            .as_mut()
-            .expect("RLE value encoder is not initialized");
-
-        // Flush all encoder buffers and raw values
-        let encoded_data = {
-            let buf = rle_encoder.flush_buffer()?;
-
-            // Note that buf does not have any offset, all data is encoded bytes
-            let len = (buf.len() as i32).to_le();
-            let len_bytes = len.as_bytes();
-            let mut encoded_data = vec![];
-            encoded_data.extend_from_slice(len_bytes);
-            encoded_data.extend_from_slice(buf);
-            encoded_data
-        };
-        // Reset rle encoder for the next batch
-        rle_encoder.clear();
-
-        Ok(ByteBufferPtr::new(encoded_data))
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BINARY_PACKED encoding
-
-const MAX_PAGE_HEADER_WRITER_SIZE: usize = 32;
-const MAX_BIT_WRITER_SIZE: usize = 10 * 1024 * 1024;
-const DEFAULT_BLOCK_SIZE: usize = 128;
-const DEFAULT_NUM_MINI_BLOCKS: usize = 4;
-
-/// Delta bit packed encoder.
-/// Consists of a header followed by blocks of delta encoded values binary packed.
-///
-/// Delta-binary-packing:
-/// ```shell
-///   [page-header] [block 1], [block 2], ... [block N]
-/// ```
-///
-/// Each page header consists of:
-/// ```shell
-///   [block size] [number of miniblocks in a block] [total value count] [first value]
-/// ```
-///
-/// Each block consists of:
-/// ```shell
-///   [min delta] [list of bitwidths of miniblocks] [miniblocks]
-/// ```
-///
-/// Current implementation writes values in `put` method, multiple calls to `put` to
-/// existing block or start new block if block size is exceeded. Calling `flush_buffer`
-/// writes out all data and resets internal state, including page header.
-///
-/// Supports only INT32 and INT64.
-pub struct DeltaBitPackEncoder<T: DataType> {
-    page_header_writer: BitWriter,
-    bit_writer: BitWriter,
-    total_values: usize,
-    first_value: i64,
-    current_value: i64,
-    block_size: usize,
-    mini_block_size: usize,
-    num_mini_blocks: usize,
-    values_in_block: usize,
-    deltas: Vec<i64>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaBitPackEncoder<T> {
-    /// Creates new delta bit packed encoder.
-    pub fn new() -> Self {
-        let block_size = DEFAULT_BLOCK_SIZE;
-        let num_mini_blocks = DEFAULT_NUM_MINI_BLOCKS;
-        let mini_block_size = block_size / num_mini_blocks;
-        assert!(mini_block_size % 8 == 0);
-        Self::assert_supported_type();
-
-        DeltaBitPackEncoder {
-            page_header_writer: BitWriter::new(MAX_PAGE_HEADER_WRITER_SIZE),
-            bit_writer: BitWriter::new(MAX_BIT_WRITER_SIZE),
-            total_values: 0,
-            first_value: 0,
-            current_value: 0, // current value to keep adding deltas
-            block_size,       // can write fewer values than block size for last block
-            mini_block_size,
-            num_mini_blocks,
-            values_in_block: 0, // will be at most block_size
-            deltas: vec![0; block_size],
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Writes page header for blocks, this method is invoked when we are done encoding
-    /// values. It is also okay to encode when no values have been provided
-    fn write_page_header(&mut self) {
-        // We ignore the result of each 'put' operation, because
-        // MAX_PAGE_HEADER_WRITER_SIZE is chosen to fit all header values and
-        // guarantees that writes will not fail.
-
-        // Write the size of each block
-        self.page_header_writer.put_vlq_int(self.block_size as u64);
-        // Write the number of mini blocks
-        self.page_header_writer
-            .put_vlq_int(self.num_mini_blocks as u64);
-        // Write the number of all values (including non-encoded first value)
-        self.page_header_writer
-            .put_vlq_int(self.total_values as u64);
-        // Write first value
-        self.page_header_writer.put_zigzag_vlq_int(self.first_value);
-    }
-
-    // Write current delta buffer (<= 'block size' values) into bit writer
-    #[inline(never)]
-    fn flush_block_values(&mut self) -> Result<()> {
-        if self.values_in_block == 0 {
-            return Ok(());
-        }
-
-        let mut min_delta = i64::max_value();
-        for i in 0..self.values_in_block {
-            min_delta = cmp::min(min_delta, self.deltas[i]);
-        }
-
-        // Write min delta
-        self.bit_writer.put_zigzag_vlq_int(min_delta);
-
-        // Slice to store bit width for each mini block
-        let offset = self.bit_writer.skip(self.num_mini_blocks)?;
-
-        for i in 0..self.num_mini_blocks {
-            // Find how many values we need to encode - either block size or whatever
-            // values left
-            let n = cmp::min(self.mini_block_size, self.values_in_block);
-            if n == 0 {
-                break;
-            }
-
-            // Compute the max delta in current mini block
-            let mut max_delta = i64::min_value();
-            for j in 0..n {
-                max_delta =
-                    cmp::max(max_delta, self.deltas[i * self.mini_block_size + j]);
-            }
-
-            // Compute bit width to store (max_delta - min_delta)
-            let bit_width = num_required_bits(self.subtract_u64(max_delta, min_delta));
-            self.bit_writer.write_at(offset + i, bit_width as u8);
-
-            // Encode values in current mini block using min_delta and bit_width
-            for j in 0..n {
-                let packed_value = self
-                    .subtract_u64(self.deltas[i * self.mini_block_size + j], min_delta);
-                self.bit_writer.put_value(packed_value, bit_width);
-            }
-
-            // Pad the last block (n < mini_block_size)
-            for _ in n..self.mini_block_size {
-                self.bit_writer.put_value(0, bit_width);
-            }
-
-            self.values_in_block -= n;
-        }
-
-        assert!(
-            self.values_in_block == 0,
-            "Expected 0 values in block, found {}",
-            self.values_in_block
-        );
-        Ok(())
-    }
-}
-
-// Implementation is shared between Int32Type and Int64Type,
-// see `DeltaBitPackEncoderConversion` below for specifics.
-impl<T: DataType> Encoder<T> for DeltaBitPackEncoder<T> {
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        // Define values to encode, initialize state
-        let mut idx = if self.total_values == 0 {
-            self.first_value = self.as_i64(values, 0);
-            self.current_value = self.first_value;
-            1
-        } else {
-            0
-        };
-        // Add all values (including first value)
-        self.total_values += values.len();
-
-        // Write block
-        while idx < values.len() {
-            let value = self.as_i64(values, idx);
-            self.deltas[self.values_in_block] = self.subtract(value, self.current_value);
-            self.current_value = value;
-            idx += 1;
-            self.values_in_block += 1;
-            if self.values_in_block == self.block_size {
-                self.flush_block_values()?;
-            }
-        }
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BINARY_PACKED
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.bit_writer.bytes_written()
-    }
-
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        // Write remaining values
-        self.flush_block_values()?;
-        // Write page header with total values
-        self.write_page_header();
-
-        let mut buffer = ByteBuffer::new();
-        buffer.write_all(self.page_header_writer.flush_buffer())?;
-        buffer.write_all(self.bit_writer.flush_buffer())?;
-        buffer.flush()?;
-
-        // Reset state
-        self.page_header_writer.clear();
-        self.bit_writer.clear();
-        self.total_values = 0;
-        self.first_value = 0;
-        self.current_value = 0;
-        self.values_in_block = 0;
-
-        Ok(buffer.consume())
-    }
-}
-
-/// Helper trait to define specific conversions and subtractions when computing deltas
-trait DeltaBitPackEncoderConversion<T: DataType> {
-    // Method should panic if type is not supported, otherwise no-op
-    fn assert_supported_type();
-
-    fn as_i64(&self, values: &[T::T], index: usize) -> i64;
-
-    fn subtract(&self, left: i64, right: i64) -> i64;
-
-    fn subtract_u64(&self, left: i64, right: i64) -> u64;
-}
-
-impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
-    #[inline]
-    fn assert_supported_type() {
-        ensure_phys_ty!(
-            Type::INT32 | Type::INT64,
-            "DeltaBitPackDecoder only supports Int32Type and Int64Type"
-        );
-    }
-
-    #[inline]
-    fn as_i64(&self, values: &[T::T], index: usize) -> i64 {
-        values[index]
-            .as_i64()
-            .expect("DeltaBitPackDecoder only supports Int32Type and Int64Type")
-    }
-
-    #[inline]
-    fn subtract(&self, left: i64, right: i64) -> i64 {
-        // It is okay for values to overflow, wrapping_sub wrapping around at the boundary
-        match T::get_physical_type() {
-            Type::INT32 => (left as i32).wrapping_sub(right as i32) as i64,
-            Type::INT64 => left.wrapping_sub(right),
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
-        }
-    }
-
-    #[inline]
-    fn subtract_u64(&self, left: i64, right: i64) -> u64 {
-        match T::get_physical_type() {
-            // Conversion of i32 -> u32 -> u64 is to avoid non-zero left most bytes in int repr
-            Type::INT32 => (left as i32).wrapping_sub(right as i32) as u32 as u64,
-            Type::INT64 => left.wrapping_sub(right) as u64,
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY encoding
-
-/// Encoding for byte arrays to separate the length values and the data.
-/// The lengths are encoded using DELTA_BINARY_PACKED encoding, data is
-/// stored as raw bytes.
-pub struct DeltaLengthByteArrayEncoder<T: DataType> {
-    // length encoder
-    len_encoder: DeltaBitPackEncoder<Int32Type>,
-    // byte array data
-    data: Vec<ByteArray>,
-    // data size in bytes of encoded values
-    encoded_size: usize,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaLengthByteArrayEncoder<T> {
-    /// Creates new delta length byte array encoder.
-    pub fn new() -> Self {
-        Self {
-            len_encoder: DeltaBitPackEncoder::new(),
-            data: vec![],
-            encoded_size: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for DeltaLengthByteArrayEncoder<T> {
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        ensure_phys_ty!(
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY,
-            "DeltaLengthByteArrayEncoder only supports ByteArrayType"
-        );
-
-        let val_it = || {
-            values
-                .iter()
-                .map(|x| x.as_any().downcast_ref::<ByteArray>().unwrap())
-        };
-
-        let lengths: Vec<i32> =
-            val_it().map(|byte_array| byte_array.len() as i32).collect();
-        self.len_encoder.put(&lengths)?;
-        for byte_array in val_it() {
-            self.encoded_size += byte_array.len();
-            self.data.push(byte_array.clone());
-        }
-
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_LENGTH_BYTE_ARRAY
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.len_encoder.estimated_data_encoded_size() + self.encoded_size
-    }
-
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        ensure_phys_ty!(
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY,
-            "DeltaLengthByteArrayEncoder only supports ByteArrayType"
-        );
-
-        let mut total_bytes = vec![];
-        let lengths = self.len_encoder.flush_buffer()?;
-        total_bytes.extend_from_slice(lengths.data());
-        self.data.iter().for_each(|byte_array| {
-            total_bytes.extend_from_slice(byte_array.data());
-        });
-        self.data.clear();
-        self.encoded_size = 0;
-
-        Ok(ByteBufferPtr::new(total_bytes))
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY encoding
-
-/// Encoding for byte arrays, prefix lengths are encoded using DELTA_BINARY_PACKED
-/// encoding, followed by suffixes with DELTA_LENGTH_BYTE_ARRAY encoding.
-pub struct DeltaByteArrayEncoder<T: DataType> {
-    prefix_len_encoder: DeltaBitPackEncoder<Int32Type>,
-    suffix_writer: DeltaLengthByteArrayEncoder<ByteArrayType>,
-    previous: Vec<u8>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaByteArrayEncoder<T> {
-    /// Creates new delta byte array encoder.
-    pub fn new() -> Self {
-        Self {
-            prefix_len_encoder: DeltaBitPackEncoder::new(),
-            suffix_writer: DeltaLengthByteArrayEncoder::new(),
-            previous: vec![],
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for DeltaByteArrayEncoder<T> {
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        let mut prefix_lengths: Vec<i32> = vec![];
-        let mut suffixes: Vec<ByteArray> = vec![];
-
-        let values = values.iter()
-            .map(|x| x.as_any())
-            .map(|x| match T::get_physical_type() {
-                Type::BYTE_ARRAY => x.downcast_ref::<ByteArray>().unwrap(),
-                Type::FIXED_LEN_BYTE_ARRAY => x.downcast_ref::<FixedLenByteArray>().unwrap(),
-                _ => panic!(
-                    "DeltaByteArrayEncoder only supports ByteArrayType and FixedLenByteArrayType"
-                )
-            });
-
-        for byte_array in values {
-            let current = byte_array.data();
-            // Maximum prefix length that is shared between previous value and current
-            // value
-            let prefix_len = cmp::min(self.previous.len(), current.len());
-            let mut match_len = 0;
-            while match_len < prefix_len && self.previous[match_len] == current[match_len]
-            {
-                match_len += 1;
-            }
-            prefix_lengths.push(match_len as i32);
-            suffixes.push(byte_array.slice(match_len, byte_array.len() - match_len));
-            // Update previous for the next prefix
-            self.previous.clear();
-            self.previous.extend_from_slice(current);
-        }
-        self.prefix_len_encoder.put(&prefix_lengths)?;
-        self.suffix_writer.put(&suffixes)?;
-
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BYTE_ARRAY
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.prefix_len_encoder.estimated_data_encoded_size()
-            + self.suffix_writer.estimated_data_encoded_size()
-    }
-
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
-                // TODO: investigate if we can merge lengths and suffixes
-                // without copying data into new vector.
-                let mut total_bytes = vec![];
-                // Insert lengths ...
-                let lengths = self.prefix_len_encoder.flush_buffer()?;
-                total_bytes.extend_from_slice(lengths.data());
-                // ... followed by suffixes
-                let suffixes = self.suffix_writer.flush_buffer()?;
-                total_bytes.extend_from_slice(suffixes.data());
-
-                self.previous.clear();
-                Ok(ByteBufferPtr::new(total_bytes))
-            }
-            _ => panic!(
-                "DeltaByteArrayEncoder only supports ByteArrayType and FixedLenByteArrayType"
-            )
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder};
-    use crate::schema::types::{
-        ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType,
-    };
-    use crate::util::{
-        memory::MemTracker,
-        test_common::{random_bytes, RandGen},
-    };
-
-    const TEST_SET_SIZE: usize = 1024;
-
-    #[test]
-    fn test_get_encoders() {
-        // supported encodings
-        create_and_check_encoder::<Int32Type>(Encoding::PLAIN, None);
-        create_and_check_encoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
-        create_and_check_encoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
-        create_and_check_encoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
-        create_and_check_encoder::<BoolType>(Encoding::RLE, None);
-
-        // error when initializing
-        create_and_check_encoder::<Int32Type>(
-            Encoding::RLE_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-        create_and_check_encoder::<Int32Type>(
-            Encoding::PLAIN_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-
-        // unsupported
-        create_and_check_encoder::<Int32Type>(
-            Encoding::BIT_PACKED,
-            Some(nyi_err!("Encoding BIT_PACKED is not supported")),
-        );
-    }
-
-    #[test]
-    fn test_bool() {
-        BoolType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        BoolType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        BoolType::test(Encoding::RLE, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_i32() {
-        Int32Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        Int32Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        Int32Type::test(Encoding::DELTA_BINARY_PACKED, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_i64() {
-        Int64Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        Int64Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        Int64Type::test(Encoding::DELTA_BINARY_PACKED, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_i96() {
-        Int96Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        Int96Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_float() {
-        FloatType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        FloatType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_double() {
-        DoubleType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        DoubleType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_byte_array() {
-        ByteArrayType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        ByteArrayType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        ByteArrayType::test(Encoding::DELTA_LENGTH_BYTE_ARRAY, TEST_SET_SIZE, -1);
-        ByteArrayType::test(Encoding::DELTA_BYTE_ARRAY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_fixed_lenbyte_array() {
-        FixedLenByteArrayType::test(Encoding::PLAIN, TEST_SET_SIZE, 100);
-        FixedLenByteArrayType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, 100);
-        FixedLenByteArrayType::test(Encoding::DELTA_BYTE_ARRAY, TEST_SET_SIZE, 100);
-    }
-
-    #[test]
-    fn test_dict_encoded_size() {
-        fn run_test<T: DataType>(
-            type_length: i32,
-            values: &[T::T],
-            expected_size: usize,
-        ) {
-            let mut encoder = create_test_dict_encoder::<T>(type_length);
-            assert_eq!(encoder.dict_encoded_size(), 0);
-            encoder.put(values).unwrap();
-            assert_eq!(encoder.dict_encoded_size(), expected_size);
-            // We do not reset encoded size of the dictionary keys after flush_buffer
-            encoder.flush_buffer().unwrap();
-            assert_eq!(encoder.dict_encoded_size(), expected_size);
-        }
-
-        // Only 2 variations of values 1 byte each
-        run_test::<BoolType>(-1, &[true, false, true, false, true], 2);
-        run_test::<Int32Type>(-1, &[1i32, 2i32, 3i32, 4i32, 5i32], 20);
-        run_test::<Int64Type>(-1, &[1i64, 2i64, 3i64, 4i64, 5i64], 40);
-        run_test::<FloatType>(-1, &[1f32, 2f32, 3f32, 4f32, 5f32], 20);
-        run_test::<DoubleType>(-1, &[1f64, 2f64, 3f64, 4f64, 5f64], 40);
-        // Int96: len + reference
-        run_test::<Int96Type>(
-            -1,
-            &[Int96::from(vec![1, 2, 3]), Int96::from(vec![2, 3, 4])],
-            32,
-        );
-        run_test::<ByteArrayType>(
-            -1,
-            &[ByteArray::from("abcd"), ByteArray::from("efj")],
-            15,
-        );
-        run_test::<FixedLenByteArrayType>(
-            2,
-            &[ByteArray::from("ab").into(), ByteArray::from("bc").into()],
-            4,
-        );
-    }
-
-    #[test]
-    fn test_estimated_data_encoded_size() {
-        fn run_test<T: DataType>(
-            encoding: Encoding,
-            type_length: i32,
-            values: &[T::T],
-            initial_size: usize,
-            max_size: usize,
-            flush_size: usize,
-        ) {
-            let mut encoder = match encoding {
-                Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
-                    Box::new(create_test_dict_encoder::<T>(type_length))
-                }
-                _ => create_test_encoder::<T>(type_length, encoding),
-            };
-            assert_eq!(encoder.estimated_data_encoded_size(), initial_size);
-
-            encoder.put(values).unwrap();
-            assert_eq!(encoder.estimated_data_encoded_size(), max_size);
-
-            encoder.flush_buffer().unwrap();
-            assert_eq!(encoder.estimated_data_encoded_size(), flush_size);
-        }
-
-        // PLAIN
-        run_test::<Int32Type>(Encoding::PLAIN, -1, &[123; 1024], 0, 4096, 0);
-
-        // DICTIONARY
-        // NOTE: The final size is almost the same because the dictionary entries are
-        // preserved after encoded values have been written.
-        run_test::<Int32Type>(Encoding::RLE_DICTIONARY, -1, &[123, 1024], 11, 68, 66);
-
-        // DELTA_BINARY_PACKED
-        run_test::<Int32Type>(Encoding::DELTA_BINARY_PACKED, -1, &[123; 1024], 0, 35, 0);
-
-        // RLE
-        let mut values = vec![];
-        values.extend_from_slice(&[true; 16]);
-        values.extend_from_slice(&[false; 16]);
-        run_test::<BoolType>(Encoding::RLE, -1, &values, 0, 2, 0);
-
-        // DELTA_LENGTH_BYTE_ARRAY
-        run_test::<ByteArrayType>(
-            Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            -1,
-            &[ByteArray::from("ab"), ByteArray::from("abc")],
-            0,
-            5, // only value bytes, length encoder is not flushed yet
-            0,
-        );
-
-        // DELTA_BYTE_ARRAY
-        run_test::<ByteArrayType>(
-            Encoding::DELTA_BYTE_ARRAY,
-            -1,
-            &[ByteArray::from("ab"), ByteArray::from("abc")],
-            0,
-            3, // only suffix bytes, length encoder is not flushed yet
-            0,
-        );
-    }
-
-    // See: https://github.com/sunchao/parquet-rs/issues/47
-    #[test]
-    fn test_issue_47() {
-        let mut encoder =
-            create_test_encoder::<ByteArrayType>(0, Encoding::DELTA_BYTE_ARRAY);
-        let mut decoder =
-            create_test_decoder::<ByteArrayType>(0, Encoding::DELTA_BYTE_ARRAY);
-
-        let mut input = vec![];
-        input.push(ByteArray::from("aa"));
-        input.push(ByteArray::from("aaa"));
-        input.push(ByteArray::from("aa"));
-        input.push(ByteArray::from("aaa"));
-        let mut output = vec![ByteArray::default(); input.len()];
-
-        let mut result =
-            put_and_get(&mut encoder, &mut decoder, &input[..2], &mut output[..2]);
-        assert!(
-            result.is_ok(),
-            "first put_and_get() failed with: {}",
-            result.unwrap_err()
-        );
-        result = put_and_get(&mut encoder, &mut decoder, &input[2..], &mut output[2..]);
-        assert!(
-            result.is_ok(),
-            "second put_and_get() failed with: {}",
-            result.unwrap_err()
-        );
-        assert_eq!(output, input);
-    }
-
-    trait EncodingTester<T: DataType> {
-        fn test(enc: Encoding, total: usize, type_length: i32) {
-            let result = match enc {
-                Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
-                    Self::test_dict_internal(total, type_length)
-                }
-                enc => Self::test_internal(enc, total, type_length),
-            };
-
-            assert!(
-                result.is_ok(),
-                "Expected result to be OK but got err:\n {}",
-                result.unwrap_err()
-            );
-        }
-
-        fn test_internal(enc: Encoding, total: usize, type_length: i32) -> Result<()>;
-
-        fn test_dict_internal(total: usize, type_length: i32) -> Result<()>;
-    }
-
-    impl<T: DataType + RandGen<T>> EncodingTester<T> for T {
-        fn test_internal(enc: Encoding, total: usize, type_length: i32) -> Result<()> {
-            let mut encoder = create_test_encoder::<T>(type_length, enc);
-            let mut decoder = create_test_decoder::<T>(type_length, enc);
-            let mut values = <T as RandGen<T>>::gen_vec(type_length, total);
-            let mut result_data = vec![T::T::default(); total];
-
-            // Test put/get spaced.
-            let num_bytes = bit_util::ceil(total as i64, 8);
-            let valid_bits = random_bytes(num_bytes as usize);
-            let values_written = encoder.put_spaced(&values[..], &valid_bits[..])?;
-            let data = encoder.flush_buffer()?;
-            decoder.set_data(data, values_written)?;
-            let _ = decoder.get_spaced(
-                &mut result_data[..],
-                values.len() - values_written,
-                &valid_bits[..],
-            )?;
-
-            // Check equality
-            for i in 0..total {
-                if bit_util::get_bit(&valid_bits[..], i) {
-                    assert_eq!(result_data[i], values[i]);
-                } else {
-                    assert_eq!(result_data[i], T::T::default());
-                }
-            }
-
-            let mut actual_total = put_and_get(
-                &mut encoder,
-                &mut decoder,
-                &values[..],
-                &mut result_data[..],
-            )?;
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            // Encode more data after flush and test with decoder
-
-            values = <T as RandGen<T>>::gen_vec(type_length, total);
-            actual_total = put_and_get(
-                &mut encoder,
-                &mut decoder,
-                &values[..],
-                &mut result_data[..],
-            )?;
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            Ok(())
-        }
-
-        fn test_dict_internal(total: usize, type_length: i32) -> Result<()> {
-            let mut encoder = create_test_dict_encoder::<T>(type_length);
-            let mut values = <T as RandGen<T>>::gen_vec(type_length, total);
-            encoder.put(&values[..])?;
-
-            let mut data = encoder.flush_buffer()?;
-            let mut decoder = create_test_dict_decoder::<T>();
-            let mut dict_decoder = PlainDecoder::<T>::new(type_length);
-            dict_decoder.set_data(encoder.write_dict()?, encoder.num_entries())?;
-            decoder.set_dict(Box::new(dict_decoder))?;
-            let mut result_data = vec![T::T::default(); total];
-            decoder.set_data(data, total)?;
-            let mut actual_total = decoder.get(&mut result_data)?;
-
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            // Encode more data after flush and test with decoder
-
-            values = <T as RandGen<T>>::gen_vec(type_length, total);
-            encoder.put(&values[..])?;
-            data = encoder.flush_buffer()?;
-
-            let mut dict_decoder = PlainDecoder::<T>::new(type_length);
-            dict_decoder.set_data(encoder.write_dict()?, encoder.num_entries())?;
-            decoder.set_dict(Box::new(dict_decoder))?;
-            decoder.set_data(data, total)?;
-            actual_total = decoder.get(&mut result_data)?;
-
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            Ok(())
-        }
-    }
-
-    fn put_and_get<T: DataType>(
-        encoder: &mut Box<dyn Encoder<T>>,
-        decoder: &mut Box<dyn Decoder<T>>,
-        input: &[T::T],
-        output: &mut [T::T],
-    ) -> Result<usize> {
-        encoder.put(input)?;
-        let data = encoder.flush_buffer()?;
-        decoder.set_data(data, input.len())?;
-        decoder.get(output)
-    }
-
-    fn create_and_check_encoder<T: DataType>(
-        encoding: Encoding,
-        err: Option<ParquetError>,
-    ) {
-        let descr = create_test_col_desc_ptr(-1, T::get_physical_type());
-        let mem_tracker = Arc::new(MemTracker::new());
-        let encoder = get_encoder::<T>(descr, encoding, mem_tracker);
-        match err {
-            Some(parquet_error) => {
-                assert!(encoder.is_err());
-                assert_eq!(encoder.err().unwrap(), parquet_error);
-            }
-            None => {
-                assert!(encoder.is_ok());
-                assert_eq!(encoder.unwrap().encoding(), encoding);
-            }
-        }
-    }
-
-    // Creates test column descriptor.
-    fn create_test_col_desc_ptr(type_len: i32, t: Type) -> ColumnDescPtr {
-        let ty = SchemaType::primitive_type_builder("t", t)
-            .with_length(type_len)
-            .build()
-            .unwrap();
-        Arc::new(ColumnDescriptor::new(
-            Arc::new(ty),
-            0,
-            0,
-            ColumnPath::new(vec![]),
-        ))
-    }
-
-    fn create_test_encoder<T: DataType>(
-        type_len: i32,
-        enc: Encoding,
-    ) -> Box<dyn Encoder<T>> {
-        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
-        let mem_tracker = Arc::new(MemTracker::new());
-        get_encoder(desc, enc, mem_tracker).unwrap()
-    }
-
-    fn create_test_decoder<T: DataType>(
-        type_len: i32,
-        enc: Encoding,
-    ) -> Box<dyn Decoder<T>> {
-        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
-        get_decoder(desc, enc).unwrap()
-    }
-
-    fn create_test_dict_encoder<T: DataType>(type_len: i32) -> DictEncoder<T> {
-        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
-        let mem_tracker = Arc::new(MemTracker::new());
-        DictEncoder::<T>::new(desc, mem_tracker)
-    }
-
-    fn create_test_dict_decoder<T: DataType>() -> DictDecoder<T> {
-        DictDecoder::<T>::new()
-    }
-}
diff --git a/rust/parquet/src/encodings/levels.rs b/rust/parquet/src/encodings/levels.rs
deleted file mode 100644
index 6727589f17e..00000000000
--- a/rust/parquet/src/encodings/levels.rs
+++ /dev/null
@@ -1,563 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cmp, mem};
-
-use super::rle::{RleDecoder, RleEncoder};
-
-use crate::basic::Encoding;
-use crate::data_type::AsBytes;
-use crate::errors::{ParquetError, Result};
-use crate::util::{
-    bit_util::{ceil, log2, BitReader, BitWriter},
-    memory::ByteBufferPtr,
-};
-
-/// Computes max buffer size for level encoder/decoder based on encoding, max
-/// repetition/definition level and number of total buffered values (includes null
-/// values).
-#[inline]
-pub fn max_buffer_size(
-    encoding: Encoding,
-    max_level: i16,
-    num_buffered_values: usize,
-) -> usize {
-    let bit_width = log2(max_level as u64 + 1) as u8;
-    match encoding {
-        Encoding::RLE => {
-            RleEncoder::max_buffer_size(bit_width, num_buffered_values)
-                + RleEncoder::min_buffer_size(bit_width)
-        }
-        Encoding::BIT_PACKED => {
-            ceil((num_buffered_values * bit_width as usize) as i64, 8) as usize
-        }
-        _ => panic!("Unsupported encoding type {}", encoding),
-    }
-}
-
-/// Encoder for definition/repetition levels.
-/// Currently only supports RLE and BIT_PACKED (dev/null) encoding, including v2.
-pub enum LevelEncoder {
-    RLE(RleEncoder),
-    RLE_V2(RleEncoder),
-    BIT_PACKED(u8, BitWriter),
-}
-
-impl LevelEncoder {
-    /// Creates new level encoder based on encoding, max level and underlying byte buffer.
-    /// For bit packed encoding it is assumed that buffer is already allocated with
-    /// `levels::max_buffer_size` method.
-    ///
-    /// Used to encode levels for Data Page v1.
-    ///
-    /// Panics, if encoding is not supported.
-    pub fn v1(encoding: Encoding, max_level: i16, byte_buffer: Vec<u8>) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        match encoding {
-            Encoding::RLE => LevelEncoder::RLE(RleEncoder::new_from_buf(
-                bit_width,
-                byte_buffer,
-                mem::size_of::<i32>(),
-            )),
-            Encoding::BIT_PACKED => {
-                // Here we set full byte buffer without adjusting for num_buffered_values,
-                // because byte buffer will already be allocated with size from
-                // `max_buffer_size()` method.
-                LevelEncoder::BIT_PACKED(
-                    bit_width,
-                    BitWriter::new_from_buf(byte_buffer, 0),
-                )
-            }
-            _ => panic!("Unsupported encoding type {}", encoding),
-        }
-    }
-
-    /// Creates new level encoder based on RLE encoding. Used to encode Data Page v2
-    /// repetition and definition levels.
-    pub fn v2(max_level: i16, byte_buffer: Vec<u8>) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        LevelEncoder::RLE_V2(RleEncoder::new_from_buf(bit_width, byte_buffer, 0))
-    }
-
-    /// Put/encode levels vector into this level encoder.
-    /// Returns number of encoded values that are less than or equal to length of the
-    /// input buffer.
-    ///
-    /// RLE and BIT_PACKED level encoders return Err() when internal buffer overflows or
-    /// flush fails.
-    #[inline]
-    pub fn put(&mut self, buffer: &[i16]) -> Result<usize> {
-        let mut num_encoded = 0;
-        match *self {
-            LevelEncoder::RLE(ref mut encoder)
-            | LevelEncoder::RLE_V2(ref mut encoder) => {
-                for value in buffer {
-                    if !encoder.put(*value as u64)? {
-                        return Err(general_err!("RLE buffer is full"));
-                    }
-                    num_encoded += 1;
-                }
-                encoder.flush()?;
-            }
-            LevelEncoder::BIT_PACKED(bit_width, ref mut encoder) => {
-                for value in buffer {
-                    if !encoder.put_value(*value as u64, bit_width as usize) {
-                        return Err(general_err!("Not enough bytes left"));
-                    }
-                    num_encoded += 1;
-                }
-                encoder.flush();
-            }
-        }
-        Ok(num_encoded)
-    }
-
-    /// Finalizes level encoder, flush all intermediate buffers and return resulting
-    /// encoded buffer. Returned buffer is already truncated to encoded bytes only.
-    #[inline]
-    pub fn consume(self) -> Result<Vec<u8>> {
-        match self {
-            LevelEncoder::RLE(encoder) => {
-                let mut encoded_data = encoder.consume()?;
-                // Account for the buffer offset
-                let encoded_len = encoded_data.len() - mem::size_of::<i32>();
-                let len = (encoded_len as i32).to_le();
-                let len_bytes = len.as_bytes();
-                encoded_data[0..len_bytes.len()].copy_from_slice(len_bytes);
-                Ok(encoded_data)
-            }
-            LevelEncoder::RLE_V2(encoder) => encoder.consume(),
-            LevelEncoder::BIT_PACKED(_, encoder) => Ok(encoder.consume()),
-        }
-    }
-}
-
-/// Decoder for definition/repetition levels.
-/// Currently only supports RLE and BIT_PACKED encoding for Data Page v1 and
-/// RLE for Data Page v2.
-pub enum LevelDecoder {
-    RLE(Option<usize>, RleDecoder),
-    RLE_V2(Option<usize>, RleDecoder),
-    BIT_PACKED(Option<usize>, u8, BitReader),
-}
-
-impl LevelDecoder {
-    /// Creates new level decoder based on encoding and max definition/repetition level.
-    /// This method only initializes level decoder, `set_data` method must be called
-    /// before reading any value.
-    ///
-    /// Used to encode levels for Data Page v1.
-    ///
-    /// Panics if encoding is not supported
-    pub fn v1(encoding: Encoding, max_level: i16) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        match encoding {
-            Encoding::RLE => LevelDecoder::RLE(None, RleDecoder::new(bit_width)),
-            Encoding::BIT_PACKED => {
-                LevelDecoder::BIT_PACKED(None, bit_width, BitReader::from(Vec::new()))
-            }
-            _ => panic!("Unsupported encoding type {}", encoding),
-        }
-    }
-
-    /// Creates new level decoder based on RLE encoding.
-    /// Used to decode Data Page v2 repetition and definition levels.
-    ///
-    /// To set data for this decoder, use `set_data_range` method.
-    pub fn v2(max_level: i16) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        LevelDecoder::RLE_V2(None, RleDecoder::new(bit_width))
-    }
-
-    /// Sets data for this level decoder, and returns total number of bytes set.
-    /// This is used for Data Page v1 levels.
-    ///
-    /// `data` is encoded data as byte buffer, `num_buffered_values` represents total
-    /// number of values that is expected.
-    ///
-    /// Both RLE and BIT_PACKED level decoders set `num_buffered_values` as total number
-    /// of values that they can return and track num values.
-    #[inline]
-    pub fn set_data(&mut self, num_buffered_values: usize, data: ByteBufferPtr) -> usize {
-        match *self {
-            LevelDecoder::RLE(ref mut num_values, ref mut decoder) => {
-                *num_values = Some(num_buffered_values);
-                let i32_size = mem::size_of::<i32>();
-                let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
-                decoder.set_data(data.range(i32_size, data_size));
-                i32_size + data_size
-            }
-            LevelDecoder::BIT_PACKED(ref mut num_values, bit_width, ref mut decoder) => {
-                *num_values = Some(num_buffered_values);
-                // Set appropriate number of bytes: if max size is larger than buffer -
-                // set full buffer
-                let num_bytes =
-                    ceil((num_buffered_values * bit_width as usize) as i64, 8);
-                let data_size = cmp::min(num_bytes as usize, data.len());
-                decoder.reset(data.range(data.start(), data_size));
-                data_size
-            }
-            _ => panic!(),
-        }
-    }
-
-    /// Sets byte array explicitly when start position `start` and length `len` are known
-    /// in advance. Only supported by RLE level decoder and used for Data Page v2 levels.
-    /// Returns number of total bytes set for this decoder (len).
-    #[inline]
-    pub fn set_data_range(
-        &mut self,
-        num_buffered_values: usize,
-        data: &ByteBufferPtr,
-        start: usize,
-        len: usize,
-    ) -> usize {
-        match *self {
-            LevelDecoder::RLE_V2(ref mut num_values, ref mut decoder) => {
-                decoder.set_data(data.range(start, len));
-                *num_values = Some(num_buffered_values);
-                len
-            }
-            _ => panic!(
-                "set_data_range() method is only supported by RLE v2 encoding type"
-            ),
-        }
-    }
-
-    /// Returns true if data is set for decoder, false otherwise.
-    #[inline]
-    pub fn is_data_set(&self) -> bool {
-        match self {
-            LevelDecoder::RLE(ref num_values, _) => num_values.is_some(),
-            LevelDecoder::RLE_V2(ref num_values, _) => num_values.is_some(),
-            LevelDecoder::BIT_PACKED(ref num_values, ..) => num_values.is_some(),
-        }
-    }
-
-    /// Decodes values and puts them into `buffer`.
-    /// Returns number of values that were successfully decoded (less than or equal to
-    /// buffer length).
-    #[inline]
-    pub fn get(&mut self, buffer: &mut [i16]) -> Result<usize> {
-        assert!(self.is_data_set(), "No data set for decoding");
-        match *self {
-            LevelDecoder::RLE(ref mut num_values, ref mut decoder)
-            | LevelDecoder::RLE_V2(ref mut num_values, ref mut decoder) => {
-                // Max length we can read
-                let len = cmp::min(num_values.unwrap(), buffer.len());
-                let values_read = decoder.get_batch::<i16>(&mut buffer[0..len])?;
-                *num_values = num_values.map(|len| len - values_read);
-                Ok(values_read)
-            }
-            LevelDecoder::BIT_PACKED(ref mut num_values, bit_width, ref mut decoder) => {
-                // When extracting values from bit reader, it might return more values
-                // than left because of padding to a full byte, we use
-                // num_values to track precise number of values.
-                let len = cmp::min(num_values.unwrap(), buffer.len());
-                let values_read =
-                    decoder.get_batch::<i16>(&mut buffer[..len], bit_width as usize);
-                *num_values = num_values.map(|len| len - values_read);
-                Ok(values_read)
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::util::test_common::random_numbers_range;
-
-    fn test_internal_roundtrip(enc: Encoding, levels: &[i16], max_level: i16, v2: bool) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        encoder.put(&levels).expect("put() should be OK");
-        let encoded_levels = encoder.consume().expect("consume() should be OK");
-
-        let byte_buf = ByteBufferPtr::new(encoded_levels);
-        let mut decoder;
-        if v2 {
-            decoder = LevelDecoder::v2(max_level);
-            decoder.set_data_range(levels.len(), &byte_buf, 0, byte_buf.len());
-        } else {
-            decoder = LevelDecoder::v1(enc, max_level);
-            decoder.set_data(levels.len(), byte_buf);
-        };
-
-        let mut buffer = vec![0; levels.len()];
-        let num_decoded = decoder.get(&mut buffer).expect("get() should be OK");
-        assert_eq!(num_decoded, levels.len());
-        assert_eq!(buffer, levels);
-    }
-
-    // Performs incremental read until all bytes are read
-    fn test_internal_roundtrip_incremental(
-        enc: Encoding,
-        levels: &[i16],
-        max_level: i16,
-        v2: bool,
-    ) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        encoder.put(&levels).expect("put() should be OK");
-        let encoded_levels = encoder.consume().expect("consume() should be OK");
-
-        let byte_buf = ByteBufferPtr::new(encoded_levels);
-        let mut decoder;
-        if v2 {
-            decoder = LevelDecoder::v2(max_level);
-            decoder.set_data_range(levels.len(), &byte_buf, 0, byte_buf.len());
-        } else {
-            decoder = LevelDecoder::v1(enc, max_level);
-            decoder.set_data(levels.len(), byte_buf);
-        }
-
-        let mut buffer = vec![0; levels.len() * 2];
-        let mut total_decoded = 0;
-        let mut safe_stop = levels.len() * 2; // still terminate in case of issues in the code
-        while safe_stop > 0 {
-            safe_stop -= 1;
-            let num_decoded = decoder
-                .get(&mut buffer[total_decoded..total_decoded + 1])
-                .expect("get() should be OK");
-            if num_decoded == 0 {
-                break;
-            }
-            total_decoded += num_decoded;
-        }
-        assert!(
-            safe_stop > 0,
-            "Failed to read values incrementally, reached safe stop"
-        );
-        assert_eq!(total_decoded, levels.len());
-        assert_eq!(&buffer[0..levels.len()], levels);
-    }
-
-    // Tests encoding/decoding of values when output buffer is larger than number of
-    // encoded values
-    fn test_internal_roundtrip_underflow(
-        enc: Encoding,
-        levels: &[i16],
-        max_level: i16,
-        v2: bool,
-    ) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        // Encode only one value
-        let num_encoded = encoder.put(&levels[0..1]).expect("put() should be OK");
-        let encoded_levels = encoder.consume().expect("consume() should be OK");
-        assert_eq!(num_encoded, 1);
-
-        let byte_buf = ByteBufferPtr::new(encoded_levels);
-        let mut decoder;
-        // Set one encoded value as `num_buffered_values`
-        if v2 {
-            decoder = LevelDecoder::v2(max_level);
-            decoder.set_data_range(1, &byte_buf, 0, byte_buf.len());
-        } else {
-            decoder = LevelDecoder::v1(enc, max_level);
-            decoder.set_data(1, byte_buf);
-        }
-
-        let mut buffer = vec![0; levels.len()];
-        let num_decoded = decoder.get(&mut buffer).expect("get() should be OK");
-        assert_eq!(num_decoded, num_encoded);
-        assert_eq!(buffer[0..num_decoded], levels[0..num_decoded]);
-    }
-
-    // Tests when encoded values are larger than encoder's buffer
-    fn test_internal_roundtrip_overflow(
-        enc: Encoding,
-        levels: &[i16],
-        max_level: i16,
-        v2: bool,
-    ) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        let mut found_err = false;
-        // Insert a large number of values, so we run out of space
-        for _ in 0..100 {
-            if let Err(err) = encoder.put(&levels) {
-                assert!(format!("{}", err).contains("Not enough bytes left"));
-                found_err = true;
-                break;
-            };
-        }
-        if !found_err {
-            panic!("Failed test: no buffer overflow");
-        }
-    }
-
-    #[test]
-    fn test_roundtrip_one() {
-        let levels = vec![0, 1, 1, 1, 1, 0, 0, 0, 0, 1];
-        let max_level = 1;
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip() {
-        let levels = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let max_level = 10;
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_incremental() {
-        let levels = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let max_level = 10;
-        test_internal_roundtrip_incremental(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip_incremental(
-            Encoding::BIT_PACKED,
-            &levels,
-            max_level,
-            false,
-        );
-        test_internal_roundtrip_incremental(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_all_zeros() {
-        let levels = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let max_level = 1;
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_random() {
-        // This test is mainly for bit packed level encoder/decoder
-        let mut levels = Vec::new();
-        let max_level = 5;
-        random_numbers_range::<i16>(120, 0, max_level, &mut levels);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_underflow() {
-        let levels = vec![1, 1, 2, 3, 2, 1, 1, 2, 3, 1];
-        let max_level = 3;
-        test_internal_roundtrip_underflow(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip_underflow(
-            Encoding::BIT_PACKED,
-            &levels,
-            max_level,
-            false,
-        );
-        test_internal_roundtrip_underflow(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_overflow() {
-        let levels = vec![1, 1, 2, 3, 2, 1, 1, 2, 3, 1];
-        let max_level = 3;
-        test_internal_roundtrip_overflow(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip_overflow(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip_overflow(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_rle_decoder_set_data_range() {
-        // Buffer containing both repetition and definition levels
-        let buffer = ByteBufferPtr::new(vec![5, 198, 2, 5, 42, 168, 10, 0, 2, 3, 36, 73]);
-
-        let max_rep_level = 1;
-        let mut decoder = LevelDecoder::v2(max_rep_level);
-        assert_eq!(decoder.set_data_range(10, &buffer, 0, 3), 3);
-        let mut result = vec![0; 10];
-        let num_decoded = decoder.get(&mut result).expect("get() should be OK");
-        assert_eq!(num_decoded, 10);
-        assert_eq!(result, vec![0, 1, 1, 0, 0, 0, 1, 1, 0, 1]);
-
-        let max_def_level = 2;
-        let mut decoder = LevelDecoder::v2(max_def_level);
-        assert_eq!(decoder.set_data_range(10, &buffer, 3, 5), 5);
-        let mut result = vec![0; 10];
-        let num_decoded = decoder.get(&mut result).expect("get() should be OK");
-        assert_eq!(num_decoded, 10);
-        assert_eq!(result, vec![2, 2, 2, 0, 0, 2, 2, 2, 2, 2]);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "set_data_range() method is only supported by RLE v2 encoding type"
-    )]
-    fn test_bit_packed_decoder_set_data_range() {
-        // Buffer containing both repetition and definition levels
-        let buffer = ByteBufferPtr::new(vec![1, 2, 3, 4, 5]);
-        let max_level = 1;
-        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_level);
-        decoder.set_data_range(10, &buffer, 0, 3);
-    }
-
-    #[test]
-    fn test_bit_packed_decoder_set_data() {
-        // Test the maximum size that is assigned based on number of values and buffer
-        // length
-        let buffer = ByteBufferPtr::new(vec![1, 2, 3, 4, 5]);
-        let max_level = 1;
-        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_level);
-        // This should reset to entire buffer
-        assert_eq!(decoder.set_data(1024, buffer.all()), buffer.len());
-        // This should set smallest num bytes
-        assert_eq!(decoder.set_data(3, buffer.all()), 1);
-    }
-
-    #[test]
-    #[should_panic(expected = "No data set for decoding")]
-    fn test_rle_level_decoder_get_no_set_data() {
-        // `get()` normally panics because bit_reader is not set for RLE decoding
-        // we have explicit check now in set_data
-        let max_rep_level = 2;
-        let mut decoder = LevelDecoder::v1(Encoding::RLE, max_rep_level);
-        let mut buffer = vec![0; 16];
-        decoder.get(&mut buffer).unwrap();
-    }
-
-    #[test]
-    #[should_panic(expected = "No data set for decoding")]
-    fn test_bit_packed_level_decoder_get_no_set_data() {
-        let max_rep_level = 2;
-        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_rep_level);
-        let mut buffer = vec![0; 16];
-        decoder.get(&mut buffer).unwrap();
-    }
-}
diff --git a/rust/parquet/src/encodings/mod.rs b/rust/parquet/src/encodings/mod.rs
deleted file mode 100644
index 33b1e233d89..00000000000
--- a/rust/parquet/src/encodings/mod.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod decoding;
-pub mod encoding;
-pub mod levels;
-mod rle;
diff --git a/rust/parquet/src/encodings/rle.rs b/rust/parquet/src/encodings/rle.rs
deleted file mode 100644
index b2a23da7c0b..00000000000
--- a/rust/parquet/src/encodings/rle.rs
+++ /dev/null
@@ -1,831 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cmp, mem::size_of};
-
-use crate::errors::{ParquetError, Result};
-use crate::util::{
-    bit_util::{self, from_ne_slice, BitReader, BitWriter, FromBytes},
-    memory::ByteBufferPtr,
-};
-
-/// Rle/Bit-Packing Hybrid Encoding
-/// The grammar for this encoding looks like the following (copied verbatim
-/// from <https://github.com/Parquet/parquet-format/blob/master/Encodings.md>):
-///
-/// rle-bit-packed-hybrid: <length> <encoded-data>
-/// length := length of the <encoded-data> in bytes stored as 4 bytes little endian
-/// encoded-data := <run>*
-/// run := <bit-packed-run> | <rle-run>
-/// bit-packed-run := <bit-packed-header> <bit-packed-values>
-/// bit-packed-header := varint-encode(<bit-pack-count> << 1 | 1)
-/// we always bit-pack a multiple of 8 values at a time, so we only store the number of
-/// values / 8
-/// bit-pack-count := (number of values in this run) / 8
-/// bit-packed-values := *see 1 below*
-/// rle-run := <rle-header> <repeated-value>
-/// rle-header := varint-encode( (number of times repeated) << 1)
-/// repeated-value := value that is repeated, using a fixed-width of
-/// round-up-to-next-byte(bit-width)
-
-/// Maximum groups per bit-packed run. Current value is 64.
-const MAX_GROUPS_PER_BIT_PACKED_RUN: usize = 1 << 6;
-const MAX_VALUES_PER_BIT_PACKED_RUN: usize = MAX_GROUPS_PER_BIT_PACKED_RUN * 8;
-const MAX_WRITER_BUF_SIZE: usize = 1 << 10;
-
-/// A RLE/Bit-Packing hybrid encoder.
-// TODO: tracking memory usage
-pub struct RleEncoder {
-    // Number of bits needed to encode the value. Must be in the range of [0, 64].
-    bit_width: u8,
-
-    // Underlying writer which holds an internal buffer.
-    bit_writer: BitWriter,
-
-    // The maximum byte size a single run can take.
-    max_run_byte_size: usize,
-
-    // Buffered values for bit-packed runs.
-    buffered_values: [u64; 8],
-
-    // Number of current buffered values. Must be less than 8.
-    num_buffered_values: usize,
-
-    // The current (also last) value that was written and the count of how many
-    // times in a row that value has been seen.
-    current_value: u64,
-
-    // The number of repetitions for `current_value`. If this gets too high we'd
-    // switch to use RLE encoding.
-    repeat_count: usize,
-
-    // Number of bit-packed values in the current run. This doesn't include values
-    // in `buffered_values`.
-    bit_packed_count: usize,
-
-    // The position of the indicator byte in the `bit_writer`.
-    indicator_byte_pos: i64,
-}
-
-impl RleEncoder {
-    pub fn new(bit_width: u8, buffer_len: usize) -> Self {
-        let buffer = vec![0; buffer_len];
-        RleEncoder::new_from_buf(bit_width, buffer, 0)
-    }
-
-    /// Initialize the encoder from existing `buffer` and the starting offset `start`.
-    pub fn new_from_buf(bit_width: u8, buffer: Vec<u8>, start: usize) -> Self {
-        assert!(bit_width <= 64, "bit_width ({}) out of range.", bit_width);
-        let max_run_byte_size = RleEncoder::min_buffer_size(bit_width);
-        assert!(
-            buffer.len() >= max_run_byte_size,
-            "buffer length {} must be greater than {}",
-            buffer.len(),
-            max_run_byte_size
-        );
-        let bit_writer = BitWriter::new_from_buf(buffer, start);
-        RleEncoder {
-            bit_width,
-            bit_writer,
-            max_run_byte_size,
-            buffered_values: [0; 8],
-            num_buffered_values: 0,
-            current_value: 0,
-            repeat_count: 0,
-            bit_packed_count: 0,
-            indicator_byte_pos: -1,
-        }
-    }
-
-    /// Returns the minimum buffer size needed to use the encoder for `bit_width`.
-    /// This is the maximum length of a single run for `bit_width`.
-    pub fn min_buffer_size(bit_width: u8) -> usize {
-        let max_bit_packed_run_size = 1 + bit_util::ceil(
-            (MAX_VALUES_PER_BIT_PACKED_RUN * bit_width as usize) as i64,
-            8,
-        );
-        let max_rle_run_size =
-            bit_util::MAX_VLQ_BYTE_LEN + bit_util::ceil(bit_width as i64, 8) as usize;
-        std::cmp::max(max_bit_packed_run_size as usize, max_rle_run_size)
-    }
-
-    /// Returns the maximum buffer size takes to encode `num_values` values with
-    /// `bit_width`.
-    pub fn max_buffer_size(bit_width: u8, num_values: usize) -> usize {
-        // First the maximum size for bit-packed run
-        let bytes_per_run = bit_width;
-        let num_runs = bit_util::ceil(num_values as i64, 8) as usize;
-        let bit_packed_max_size = num_runs + num_runs * bytes_per_run as usize;
-
-        // Second the maximum size for RLE run
-        let min_rle_run_size = 1 + bit_util::ceil(bit_width as i64, 8) as usize;
-        let rle_max_size =
-            bit_util::ceil(num_values as i64, 8) as usize * min_rle_run_size;
-        std::cmp::max(bit_packed_max_size, rle_max_size) as usize
-    }
-
-    /// Encodes `value`, which must be representable with `bit_width` bits.
-    /// Returns true if the value fits in buffer, false if it doesn't, or
-    /// error if something is wrong.
-    #[inline]
-    pub fn put(&mut self, value: u64) -> Result<bool> {
-        // This function buffers 8 values at a time. After seeing 8 values, it
-        // decides whether the current run should be encoded in bit-packed or RLE.
-        if self.current_value == value {
-            self.repeat_count += 1;
-            if self.repeat_count > 8 {
-                // A continuation of last value. No need to buffer.
-                return Ok(true);
-            }
-        } else {
-            if self.repeat_count >= 8 {
-                // The current RLE run has ended and we've gathered enough. Flush first.
-                assert_eq!(self.bit_packed_count, 0);
-                self.flush_rle_run()?;
-            }
-            self.repeat_count = 1;
-            self.current_value = value;
-        }
-
-        self.buffered_values[self.num_buffered_values] = value;
-        self.num_buffered_values += 1;
-        if self.num_buffered_values == 8 {
-            // Buffered values are full. Flush them.
-            assert_eq!(self.bit_packed_count % 8, 0);
-            self.flush_buffered_values()?;
-        }
-
-        Ok(true)
-    }
-
-    #[inline]
-    pub fn buffer(&self) -> &[u8] {
-        self.bit_writer.buffer()
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.bit_writer.bytes_written()
-    }
-
-    #[inline]
-    pub fn consume(mut self) -> Result<Vec<u8>> {
-        self.flush()?;
-        Ok(self.bit_writer.consume())
-    }
-
-    /// Borrow equivalent of the `consume` method.
-    /// Call `clear()` after invoking this method.
-    #[inline]
-    pub fn flush_buffer(&mut self) -> Result<&[u8]> {
-        self.flush()?;
-        Ok(self.bit_writer.flush_buffer())
-    }
-
-    /// Clears the internal state so this encoder can be reused (e.g., after becoming
-    /// full).
-    #[inline]
-    pub fn clear(&mut self) {
-        self.bit_writer.clear();
-        self.num_buffered_values = 0;
-        self.current_value = 0;
-        self.repeat_count = 0;
-        self.bit_packed_count = 0;
-        self.indicator_byte_pos = -1;
-    }
-
-    /// Flushes all remaining values and return the final byte buffer maintained by the
-    /// internal writer.
-    #[inline]
-    pub fn flush(&mut self) -> Result<()> {
-        if self.bit_packed_count > 0
-            || self.repeat_count > 0
-            || self.num_buffered_values > 0
-        {
-            let all_repeat = self.bit_packed_count == 0
-                && (self.repeat_count == self.num_buffered_values
-                    || self.num_buffered_values == 0);
-            if self.repeat_count > 0 && all_repeat {
-                self.flush_rle_run()?;
-            } else {
-                // Buffer the last group of bit-packed values to 8 by padding with 0s.
-                if self.num_buffered_values > 0 {
-                    while self.num_buffered_values < 8 {
-                        self.buffered_values[self.num_buffered_values] = 0;
-                        self.num_buffered_values += 1;
-                    }
-                }
-                self.bit_packed_count += self.num_buffered_values;
-                self.flush_bit_packed_run(true)?;
-                self.repeat_count = 0;
-            }
-        }
-        Ok(())
-    }
-
-    fn flush_rle_run(&mut self) -> Result<()> {
-        assert!(self.repeat_count > 0);
-        let indicator_value = self.repeat_count << 1;
-        let mut result = self.bit_writer.put_vlq_int(indicator_value as u64);
-        result &= self.bit_writer.put_aligned(
-            self.current_value,
-            bit_util::ceil(self.bit_width as i64, 8) as usize,
-        );
-        if !result {
-            return Err(general_err!("Failed to write RLE run"));
-        }
-        self.num_buffered_values = 0;
-        self.repeat_count = 0;
-        Ok(())
-    }
-
-    fn flush_bit_packed_run(&mut self, update_indicator_byte: bool) -> Result<()> {
-        if self.indicator_byte_pos < 0 {
-            self.indicator_byte_pos = self.bit_writer.skip(1)? as i64;
-        }
-
-        // Write all buffered values as bit-packed literals
-        for i in 0..self.num_buffered_values {
-            let _ = self
-                .bit_writer
-                .put_value(self.buffered_values[i], self.bit_width as usize);
-        }
-        self.num_buffered_values = 0;
-        if update_indicator_byte {
-            // Write the indicator byte to the reserved position in `bit_writer`
-            let num_groups = self.bit_packed_count / 8;
-            let indicator_byte = ((num_groups << 1) | 1) as u8;
-            if !self.bit_writer.put_aligned_offset(
-                indicator_byte,
-                1,
-                self.indicator_byte_pos as usize,
-            ) {
-                return Err(general_err!("Not enough space to write indicator byte"));
-            }
-            self.indicator_byte_pos = -1;
-            self.bit_packed_count = 0;
-        }
-        Ok(())
-    }
-
-    #[inline(never)]
-    fn flush_buffered_values(&mut self) -> Result<()> {
-        if self.repeat_count >= 8 {
-            self.num_buffered_values = 0;
-            if self.bit_packed_count > 0 {
-                // In this case we choose RLE encoding. Flush the current buffered values
-                // as bit-packed encoding.
-                assert_eq!(self.bit_packed_count % 8, 0);
-                self.flush_bit_packed_run(true)?
-            }
-            return Ok(());
-        }
-
-        self.bit_packed_count += self.num_buffered_values;
-        let num_groups = self.bit_packed_count / 8;
-        if num_groups + 1 >= MAX_GROUPS_PER_BIT_PACKED_RUN {
-            // We've reached the maximum value that can be hold in a single bit-packed
-            // run.
-            assert!(self.indicator_byte_pos >= 0);
-            self.flush_bit_packed_run(true)?;
-        } else {
-            self.flush_bit_packed_run(false)?;
-        }
-        self.repeat_count = 0;
-        Ok(())
-    }
-}
-
-/// A RLE/Bit-Packing hybrid decoder.
-pub struct RleDecoder {
-    // Number of bits used to encode the value. Must be between [0, 64].
-    bit_width: u8,
-
-    // Bit reader loaded with input buffer.
-    bit_reader: Option<BitReader>,
-
-    // Buffer used when `bit_reader` is not `None`, for batch reading.
-    index_buf: [i32; 1024],
-
-    // The remaining number of values in RLE for this run
-    rle_left: u32,
-
-    // The remaining number of values in Bit-Packing for this run
-    bit_packed_left: u32,
-
-    // The current value for the case of RLE mode
-    current_value: Option<u64>,
-}
-
-impl RleDecoder {
-    pub fn new(bit_width: u8) -> Self {
-        RleDecoder {
-            bit_width,
-            rle_left: 0,
-            bit_packed_left: 0,
-            bit_reader: None,
-            index_buf: [0; 1024],
-            current_value: None,
-        }
-    }
-
-    #[inline]
-    pub fn set_data(&mut self, data: ByteBufferPtr) {
-        if let Some(ref mut bit_reader) = self.bit_reader {
-            bit_reader.reset(data);
-        } else {
-            self.bit_reader = Some(BitReader::new(data));
-        }
-
-        let _ = self.reload();
-    }
-
-    // These functions inline badly, they tend to inline and then create very large loop unrolls
-    // that damage L1d-cache occupancy. This results in a ~18% performance drop
-    #[inline(never)]
-    pub fn get<T: FromBytes>(&mut self) -> Result<Option<T>> {
-        assert!(size_of::<T>() <= 8);
-
-        while self.rle_left == 0 && self.bit_packed_left == 0 {
-            if !self.reload() {
-                return Ok(None);
-            }
-        }
-
-        let value = if self.rle_left > 0 {
-            let rle_value = from_ne_slice(
-                &self
-                    .current_value
-                    .as_mut()
-                    .expect("current_value should be Some")
-                    .to_ne_bytes(),
-            );
-            self.rle_left -= 1;
-            rle_value
-        } else {
-            // self.bit_packed_left > 0
-            let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be Some");
-            let bit_packed_value = bit_reader
-                .get_value(self.bit_width as usize)
-                .ok_or_else(|| eof_err!("Not enough data for 'bit_packed_value'"))?;
-            self.bit_packed_left -= 1;
-            bit_packed_value
-        };
-
-        Ok(Some(value))
-    }
-
-    #[inline(never)]
-    pub fn get_batch<T: FromBytes>(&mut self, buffer: &mut [T]) -> Result<usize> {
-        assert!(size_of::<T>() <= 8);
-
-        let mut values_read = 0;
-        while values_read < buffer.len() {
-            if self.rle_left > 0 {
-                let num_values =
-                    cmp::min(buffer.len() - values_read, self.rle_left as usize);
-                for i in 0..num_values {
-                    let repeated_value = from_ne_slice(
-                        &self.current_value.as_mut().unwrap().to_ne_bytes(),
-                    );
-                    buffer[values_read + i] = repeated_value;
-                }
-                self.rle_left -= num_values as u32;
-                values_read += num_values;
-            } else if self.bit_packed_left > 0 {
-                let mut num_values =
-                    cmp::min(buffer.len() - values_read, self.bit_packed_left as usize);
-                let bit_reader =
-                    self.bit_reader.as_mut().expect("bit_reader should be set");
-
-                num_values = bit_reader.get_batch::<T>(
-                    &mut buffer[values_read..values_read + num_values],
-                    self.bit_width as usize,
-                );
-                self.bit_packed_left -= num_values as u32;
-                values_read += num_values;
-            } else if !self.reload() {
-                break;
-            }
-        }
-
-        Ok(values_read)
-    }
-
-    #[inline(never)]
-    pub fn get_batch_with_dict<T>(
-        &mut self,
-        dict: &[T],
-        buffer: &mut [T],
-        max_values: usize,
-    ) -> Result<usize>
-    where
-        T: Default + Clone,
-    {
-        assert!(buffer.len() >= max_values);
-
-        let mut values_read = 0;
-        while values_read < max_values {
-            if self.rle_left > 0 {
-                let num_values =
-                    cmp::min(max_values - values_read, self.rle_left as usize);
-                let dict_idx = self.current_value.unwrap() as usize;
-                for i in 0..num_values {
-                    buffer[values_read + i].clone_from(&dict[dict_idx]);
-                }
-                self.rle_left -= num_values as u32;
-                values_read += num_values;
-            } else if self.bit_packed_left > 0 {
-                let bit_reader =
-                    self.bit_reader.as_mut().expect("bit_reader should be set");
-
-                let mut num_values =
-                    cmp::min(max_values - values_read, self.bit_packed_left as usize);
-
-                num_values = cmp::min(num_values, self.index_buf.len());
-                loop {
-                    num_values = bit_reader.get_batch::<i32>(
-                        &mut self.index_buf[..num_values],
-                        self.bit_width as usize,
-                    );
-                    for i in 0..num_values {
-                        buffer[values_read + i]
-                            .clone_from(&dict[self.index_buf[i] as usize])
-                    }
-                    self.bit_packed_left -= num_values as u32;
-                    values_read += num_values;
-                    if num_values < self.index_buf.len() {
-                        break;
-                    }
-                }
-            } else if !self.reload() {
-                break;
-            }
-        }
-
-        Ok(values_read)
-    }
-
-    #[inline]
-    fn reload(&mut self) -> bool {
-        let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be set");
-
-        if let Some(indicator_value) = bit_reader.get_vlq_int() {
-            if indicator_value & 1 == 1 {
-                self.bit_packed_left = ((indicator_value >> 1) * 8) as u32;
-            } else {
-                self.rle_left = (indicator_value >> 1) as u32;
-                let value_width = bit_util::ceil(self.bit_width as i64, 8);
-                self.current_value = bit_reader.get_aligned::<u64>(value_width as usize);
-                assert!(self.current_value.is_some());
-            }
-            true
-        } else {
-            false
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::{self, distributions::Standard, thread_rng, Rng, SeedableRng};
-
-    use crate::util::memory::ByteBufferPtr;
-
-    const MAX_WIDTH: usize = 32;
-
-    #[test]
-    fn test_rle_decode_int32() {
-        // Test data: 0-7 with bit width 3
-        // 00000011 10001000 11000110 11111010
-        let data = ByteBufferPtr::new(vec![0x03, 0x88, 0xC6, 0xFA]);
-        let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
-        let mut buffer = vec![0; 8];
-        let expected = vec![0, 1, 2, 3, 4, 5, 6, 7];
-        let result = decoder.get_batch::<i32>(&mut buffer);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-    }
-
-    #[test]
-    fn test_rle_consume_flush_buffer() {
-        let data = vec![1, 1, 1, 2, 2, 3, 3, 3];
-        let mut encoder1 = RleEncoder::new(3, 256);
-        let mut encoder2 = RleEncoder::new(3, 256);
-        for value in data {
-            encoder1.put(value as u64).unwrap();
-            encoder2.put(value as u64).unwrap();
-        }
-        let res1 = encoder1.flush_buffer().unwrap();
-        let res2 = encoder2.consume().unwrap();
-        assert_eq!(res1, &res2[..]);
-    }
-
-    #[test]
-    fn test_rle_decode_bool() {
-        // RLE test data: 50 1s followed by 50 0s
-        // 01100100 00000001 01100100 00000000
-        let data1 = ByteBufferPtr::new(vec![0x64, 0x01, 0x64, 0x00]);
-
-        // Bit-packing test data: alternating 1s and 0s, 100 total
-        // 100 / 8 = 13 groups
-        // 00011011 10101010 ... 00001010
-        let data2 = ByteBufferPtr::new(vec![
-            0x1B, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA,
-            0x0A,
-        ]);
-
-        let mut decoder: RleDecoder = RleDecoder::new(1);
-        decoder.set_data(data1);
-        let mut buffer = vec![false; 100];
-        let mut expected = vec![];
-        for i in 0..100 {
-            if i < 50 {
-                expected.push(true);
-            } else {
-                expected.push(false);
-            }
-        }
-        let result = decoder.get_batch::<bool>(&mut buffer);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-
-        decoder.set_data(data2);
-        let mut buffer = vec![false; 100];
-        let mut expected = vec![];
-        for i in 0..100 {
-            if i % 2 == 0 {
-                expected.push(false);
-            } else {
-                expected.push(true);
-            }
-        }
-        let result = decoder.get_batch::<bool>(&mut buffer);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-    }
-
-    #[test]
-    fn test_rle_decode_with_dict_int32() {
-        // Test RLE encoding: 3 0s followed by 4 1s followed by 5 2s
-        // 00000110 00000000 00001000 00000001 00001010 00000010
-        let dict = vec![10, 20, 30];
-        let data = ByteBufferPtr::new(vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02]);
-        let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
-        let mut buffer = vec![0; 12];
-        let expected = vec![10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30, 30];
-        let result = decoder.get_batch_with_dict::<i32>(&dict, &mut buffer, 12);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-
-        // Test bit-pack encoding: 345345345455 (2 groups: 8 and 4)
-        // 011 100 101 011 100 101 011 100 101 100 101 101
-        // 00000011 01100011 11000111 10001110 00000011 01100101 00001011
-        let dict = vec!["aaa", "bbb", "ccc", "ddd", "eee", "fff"];
-        let data = ByteBufferPtr::new(vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B]);
-        let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
-        let mut buffer = vec![""; 12];
-        let expected = vec![
-            "ddd", "eee", "fff", "ddd", "eee", "fff", "ddd", "eee", "fff", "eee", "fff",
-            "fff",
-        ];
-        let result = decoder.get_batch_with_dict::<&str>(
-            dict.as_slice(),
-            buffer.as_mut_slice(),
-            12,
-        );
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-    }
-
-    fn validate_rle(
-        values: &[i64],
-        bit_width: u8,
-        expected_encoding: Option<&[u8]>,
-        expected_len: i32,
-    ) {
-        let buffer_len = 64 * 1024;
-        let mut encoder = RleEncoder::new(bit_width, buffer_len);
-        for v in values {
-            let result = encoder.put(*v as u64);
-            assert!(result.is_ok());
-        }
-        let buffer = ByteBufferPtr::new(encoder.consume().expect("Expect consume() OK"));
-        if expected_len != -1 {
-            assert_eq!(buffer.len(), expected_len as usize);
-        }
-        if let Some(b) = expected_encoding {
-            assert_eq!(buffer.as_ref(), b);
-        }
-
-        // Verify read
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.all());
-        for v in values {
-            let val: i64 = decoder
-                .get()
-                .expect("get() should be OK")
-                .expect("get() should return more value");
-            assert_eq!(val, *v);
-        }
-
-        // Verify batch read
-        decoder.set_data(buffer);
-        let mut values_read: Vec<i64> = vec![0; values.len()];
-        decoder
-            .get_batch(&mut values_read[..])
-            .expect("get_batch() should be OK");
-        assert_eq!(&values_read[..], values);
-    }
-
-    #[test]
-    fn test_rle_specific_sequences() {
-        let mut expected_buffer = Vec::new();
-        let mut values = Vec::new();
-        for _ in 0..50 {
-            values.push(0);
-        }
-        for _ in 0..50 {
-            values.push(1);
-        }
-        expected_buffer.push(50 << 1);
-        expected_buffer.push(0);
-        expected_buffer.push(50 << 1);
-        expected_buffer.push(1);
-
-        for width in 1..9 {
-            validate_rle(&values[..], width, Some(&expected_buffer[..]), 4);
-        }
-        for width in 9..MAX_WIDTH + 1 {
-            validate_rle(
-                &values[..],
-                width as u8,
-                None,
-                2 * (1 + bit_util::ceil(width as i64, 8) as i32),
-            );
-        }
-
-        // Test 100 0's and 1's alternating
-        values.clear();
-        expected_buffer.clear();
-        for i in 0..101 {
-            values.push(i % 2);
-        }
-        let num_groups = bit_util::ceil(100, 8) as u8;
-        expected_buffer.push(((num_groups << 1) as u8) | 1);
-        for _ in 1..(100 / 8) + 1 {
-            expected_buffer.push(0b10101010);
-        }
-        // For the last 4 0 and 1's, padded with 0.
-        expected_buffer.push(0b00001010);
-        validate_rle(
-            &values,
-            1,
-            Some(&expected_buffer[..]),
-            1 + num_groups as i32,
-        );
-        for width in 2..MAX_WIDTH + 1 {
-            let num_values = bit_util::ceil(100, 8) * 8;
-            validate_rle(
-                &values,
-                width as u8,
-                None,
-                1 + bit_util::ceil(width as i64 * num_values, 8) as i32,
-            );
-        }
-    }
-
-    // `validate_rle` on `num_vals` with width `bit_width`. If `value` is -1, that value
-    // is used, otherwise alternating values are used.
-    fn test_rle_values(bit_width: usize, num_vals: usize, value: i32) {
-        let mod_val = if bit_width == 64 {
-            1
-        } else {
-            1u64 << bit_width
-        };
-        let mut values: Vec<i64> = vec![];
-        for v in 0..num_vals {
-            let val = if value == -1 {
-                v as i64 % mod_val as i64
-            } else {
-                value as i64
-            };
-            values.push(val);
-        }
-        validate_rle(&values, bit_width as u8, None, -1);
-    }
-
-    #[test]
-    fn test_values() {
-        for width in 1..MAX_WIDTH + 1 {
-            test_rle_values(width, 1, -1);
-            test_rle_values(width, 1024, -1);
-            test_rle_values(width, 1024, 0);
-            test_rle_values(width, 1024, 1);
-        }
-    }
-
-    #[test]
-    fn test_rle_specific_roundtrip() {
-        let bit_width = 1;
-        let buffer_len = RleEncoder::min_buffer_size(bit_width);
-        let values: Vec<i16> = vec![0, 1, 1, 1, 1, 0, 0, 0, 0, 1];
-        let mut encoder = RleEncoder::new(bit_width, buffer_len);
-        for v in &values {
-            assert!(encoder.put(*v as u64).expect("put() should be OK"));
-        }
-        let buffer = encoder.consume().expect("consume() should be OK");
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(ByteBufferPtr::new(buffer));
-        let mut actual_values: Vec<i16> = vec![0; values.len()];
-        decoder
-            .get_batch(&mut actual_values)
-            .expect("get_batch() should be OK");
-        assert_eq!(actual_values, values);
-    }
-
-    fn test_round_trip(values: &[i32], bit_width: u8) {
-        let buffer_len = 64 * 1024;
-        let mut encoder = RleEncoder::new(bit_width, buffer_len);
-        for v in values {
-            let result = encoder.put(*v as u64).expect("put() should be OK");
-            assert!(result, "put() should not return false");
-        }
-
-        let buffer =
-            ByteBufferPtr::new(encoder.consume().expect("consume() should be OK"));
-
-        // Verify read
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.all());
-        for v in values {
-            let val = decoder
-                .get::<i32>()
-                .expect("get() should be OK")
-                .expect("get() should return value");
-            assert_eq!(val, *v);
-        }
-
-        // Verify batch read
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer);
-        let mut values_read: Vec<i32> = vec![0; values.len()];
-        decoder
-            .get_batch(&mut values_read[..])
-            .expect("get_batch() should be OK");
-        assert_eq!(&values_read[..], values);
-    }
-
-    #[test]
-    fn test_random() {
-        let seed_len = 32;
-        let niters = 50;
-        let ngroups = 1000;
-        let max_group_size = 15;
-        let mut values = vec![];
-
-        for _ in 0..niters {
-            values.clear();
-            let rng = thread_rng();
-            let seed_vec: Vec<u8> =
-                rng.sample_iter::<u8, _>(&Standard).take(seed_len).collect();
-            let mut seed = [0u8; 32];
-            seed.copy_from_slice(&seed_vec[0..seed_len]);
-            let mut gen = rand::rngs::StdRng::from_seed(seed);
-
-            let mut parity = false;
-            for _ in 0..ngroups {
-                let mut group_size = gen.gen_range(1..20);
-                if group_size > max_group_size {
-                    group_size = 1;
-                }
-                for _ in 0..group_size {
-                    values.push(parity as i32);
-                }
-                parity = !parity;
-            }
-            let bit_width = bit_util::num_required_bits(values.len() as u64);
-            assert!(bit_width < 64);
-            test_round_trip(&values[..], bit_width as u8);
-        }
-    }
-}
diff --git a/rust/parquet/src/errors.rs b/rust/parquet/src/errors.rs
deleted file mode 100644
index 021c1f063f8..00000000000
--- a/rust/parquet/src/errors.rs
+++ /dev/null
@@ -1,146 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common Parquet errors and macros.
-
-use std::{cell, convert, io, result, str};
-
-#[cfg(any(feature = "arrow", test))]
-use arrow::error::ArrowError;
-
-#[derive(Debug, PartialEq)]
-pub enum ParquetError {
-    /// General Parquet error.
-    /// Returned when code violates normal workflow of working with Parquet files.
-    General(String),
-    /// "Not yet implemented" Parquet error.
-    /// Returned when functionality is not yet available.
-    NYI(String),
-    /// "End of file" Parquet error.
-    /// Returned when IO related failures occur, e.g. when there are not enough bytes to
-    /// decode.
-    EOF(String),
-    #[cfg(any(feature = "arrow", test))]
-    /// Arrow error.
-    /// Returned when reading into arrow or writing from arrow.
-    ArrowError(String),
-    IndexOutOfBound(usize, usize),
-}
-
-impl std::fmt::Display for ParquetError {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match *self {
-            ParquetError::General(ref message) => {
-                write!(fmt, "Parquet error: {}", message)
-            }
-            ParquetError::NYI(ref message) => write!(fmt, "NYI: {}", message),
-            ParquetError::EOF(ref message) => write!(fmt, "EOF: {}", message),
-            #[cfg(any(feature = "arrow", test))]
-            ParquetError::ArrowError(ref message) => write!(fmt, "Arrow: {}", message),
-            ParquetError::IndexOutOfBound(ref index, ref bound) => {
-                write!(fmt, "Index {} out of bound: {}", index, bound)
-            }
-        }
-    }
-}
-
-impl std::error::Error for ParquetError {
-    fn cause(&self) -> Option<&dyn ::std::error::Error> {
-        None
-    }
-}
-
-impl From<io::Error> for ParquetError {
-    fn from(e: io::Error) -> ParquetError {
-        ParquetError::General(format!("underlying IO error: {}", e))
-    }
-}
-
-#[cfg(any(feature = "snap", test))]
-impl From<snap::Error> for ParquetError {
-    fn from(e: snap::Error) -> ParquetError {
-        ParquetError::General(format!("underlying snap error: {}", e))
-    }
-}
-
-impl From<thrift::Error> for ParquetError {
-    fn from(e: thrift::Error) -> ParquetError {
-        ParquetError::General(format!("underlying Thrift error: {}", e))
-    }
-}
-
-impl From<cell::BorrowMutError> for ParquetError {
-    fn from(e: cell::BorrowMutError) -> ParquetError {
-        ParquetError::General(format!("underlying borrow error: {}", e))
-    }
-}
-
-impl From<str::Utf8Error> for ParquetError {
-    fn from(e: str::Utf8Error) -> ParquetError {
-        ParquetError::General(format!("underlying utf8 error: {}", e))
-    }
-}
-
-#[cfg(any(feature = "arrow", test))]
-impl From<ArrowError> for ParquetError {
-    fn from(e: ArrowError) -> ParquetError {
-        ParquetError::ArrowError(format!("underlying Arrow error: {}", e))
-    }
-}
-
-/// A specialized `Result` for Parquet errors.
-pub type Result<T> = result::Result<T, ParquetError>;
-
-// ----------------------------------------------------------------------
-// Conversion from `ParquetError` to other types of `Error`s
-
-impl convert::From<ParquetError> for io::Error {
-    fn from(e: ParquetError) -> Self {
-        io::Error::new(io::ErrorKind::Other, e)
-    }
-}
-
-// ----------------------------------------------------------------------
-// Convenient macros for different errors
-
-macro_rules! general_err {
-    ($fmt:expr) => (ParquetError::General($fmt.to_owned()));
-    ($fmt:expr, $($args:expr),*) => (ParquetError::General(format!($fmt, $($args),*)));
-    ($e:expr, $fmt:expr) => (ParquetError::General($fmt.to_owned(), $e));
-    ($e:ident, $fmt:expr, $($args:tt),*) => (
-        ParquetError::General(&format!($fmt, $($args),*), $e));
-}
-
-macro_rules! nyi_err {
-    ($fmt:expr) => (ParquetError::NYI($fmt.to_owned()));
-    ($fmt:expr, $($args:expr),*) => (ParquetError::NYI(format!($fmt, $($args),*)));
-}
-
-macro_rules! eof_err {
-    ($fmt:expr) => (ParquetError::EOF($fmt.to_owned()));
-    ($fmt:expr, $($args:expr),*) => (ParquetError::EOF(format!($fmt, $($args),*)));
-}
-
-// ----------------------------------------------------------------------
-// Convert parquet error into other errors
-
-#[cfg(any(feature = "arrow", test))]
-impl Into<ArrowError> for ParquetError {
-    fn into(self) -> ArrowError {
-        ArrowError::ParquetError(format!("{}", self))
-    }
-}
diff --git a/rust/parquet/src/file/footer.rs b/rust/parquet/src/file/footer.rs
deleted file mode 100644
index 2e572944868..00000000000
--- a/rust/parquet/src/file/footer.rs
+++ /dev/null
@@ -1,263 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{
-    cmp::min,
-    io::{Cursor, Read, Seek, SeekFrom},
-    sync::Arc,
-};
-
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
-use thrift::protocol::TCompactInputProtocol;
-
-use crate::basic::ColumnOrder;
-
-use crate::errors::{ParquetError, Result};
-use crate::file::{
-    metadata::*, reader::ChunkReader, DEFAULT_FOOTER_READ_SIZE, FOOTER_SIZE,
-    PARQUET_MAGIC,
-};
-
-use crate::schema::types::{self, SchemaDescriptor};
-
-/// Layout of Parquet file
-/// +---------------------------+-----+---+
-/// |      Rest of file         |  B  | A |
-/// +---------------------------+-----+---+
-/// where A: parquet footer, B: parquet metadata.
-///
-/// The reader first reads DEFAULT_FOOTER_SIZE bytes from the end of the file.
-/// If it is not enough according to the length indicated in the footer, it reads more bytes.
-pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaData> {
-    // check file is large enough to hold footer
-    let file_size = chunk_reader.len();
-    if file_size < (FOOTER_SIZE as u64) {
-        return Err(general_err!(
-            "Invalid Parquet file. Size is smaller than footer"
-        ));
-    }
-
-    // read and cache up to DEFAULT_FOOTER_READ_SIZE bytes from the end and process the footer
-    let default_end_len = min(DEFAULT_FOOTER_READ_SIZE, chunk_reader.len() as usize);
-    let mut default_end_reader = chunk_reader
-        .get_read(chunk_reader.len() - default_end_len as u64, default_end_len)?;
-    let mut default_len_end_buf = vec![0; default_end_len];
-    default_end_reader.read_exact(&mut default_len_end_buf)?;
-
-    // check this is indeed a parquet file
-    if default_len_end_buf[default_end_len - 4..] != PARQUET_MAGIC {
-        return Err(general_err!("Invalid Parquet file. Corrupt footer"));
-    }
-
-    // get the metadata length from the footer
-    let metadata_len = LittleEndian::read_i32(
-        &default_len_end_buf[default_end_len - 8..default_end_len - 4],
-    ) as i64;
-    if metadata_len < 0 {
-        return Err(general_err!(
-            "Invalid Parquet file. Metadata length is less than zero ({})",
-            metadata_len
-        ));
-    }
-    let footer_metadata_len = FOOTER_SIZE + metadata_len as usize;
-
-    // build up the reader covering the entire metadata
-    let mut default_end_cursor = Cursor::new(default_len_end_buf);
-    let metadata_read: Box<dyn Read>;
-    if footer_metadata_len > file_size as usize {
-        return Err(general_err!(
-            "Invalid Parquet file. Metadata start is less than zero ({})",
-            file_size as i64 - footer_metadata_len as i64
-        ));
-    } else if footer_metadata_len < DEFAULT_FOOTER_READ_SIZE {
-        // the whole metadata is in the bytes we already read
-        default_end_cursor.seek(SeekFrom::End(-(footer_metadata_len as i64)))?;
-        metadata_read = Box::new(default_end_cursor);
-    } else {
-        // the end of file read by default is not long enough, read missing bytes
-        let complementary_end_read = chunk_reader.get_read(
-            file_size - footer_metadata_len as u64,
-            FOOTER_SIZE + metadata_len as usize - default_end_len,
-        )?;
-        metadata_read = Box::new(complementary_end_read.chain(default_end_cursor));
-    }
-
-    // TODO: row group filtering
-    let mut prot = TCompactInputProtocol::new(metadata_read);
-    let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot)
-        .map_err(|e| ParquetError::General(format!("Could not parse metadata: {}", e)))?;
-    let schema = types::from_thrift(&t_file_metadata.schema)?;
-    let schema_descr = Arc::new(SchemaDescriptor::new(schema));
-    let mut row_groups = Vec::new();
-    for rg in t_file_metadata.row_groups {
-        row_groups.push(RowGroupMetaData::from_thrift(schema_descr.clone(), rg)?);
-    }
-    let column_orders = parse_column_orders(t_file_metadata.column_orders, &schema_descr);
-
-    let file_metadata = FileMetaData::new(
-        t_file_metadata.version,
-        t_file_metadata.num_rows,
-        t_file_metadata.created_by,
-        t_file_metadata.key_value_metadata,
-        schema_descr,
-        column_orders,
-    );
-    Ok(ParquetMetaData::new(file_metadata, row_groups))
-}
-
-/// Parses column orders from Thrift definition.
-/// If no column orders are defined, returns `None`.
-fn parse_column_orders(
-    t_column_orders: Option<Vec<TColumnOrder>>,
-    schema_descr: &SchemaDescriptor,
-) -> Option<Vec<ColumnOrder>> {
-    match t_column_orders {
-        Some(orders) => {
-            // Should always be the case
-            assert_eq!(
-                orders.len(),
-                schema_descr.num_columns(),
-                "Column order length mismatch"
-            );
-            let mut res = Vec::new();
-            for (i, column) in schema_descr.columns().iter().enumerate() {
-                match orders[i] {
-                    TColumnOrder::TYPEORDER(_) => {
-                        let sort_order = ColumnOrder::get_sort_order(
-                            column.logical_type(),
-                            column.converted_type(),
-                            column.physical_type(),
-                        );
-                        res.push(ColumnOrder::TYPE_DEFINED_ORDER(sort_order));
-                    }
-                }
-            }
-            Some(res)
-        }
-        None => None,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::basic::SortOrder;
-    use crate::basic::Type;
-    use crate::schema::types::Type as SchemaType;
-    use crate::util::test_common::get_temp_file;
-    use parquet_format::TypeDefinedOrder;
-
-    #[test]
-    fn test_parse_metadata_size_smaller_than_footer() {
-        let test_file = get_temp_file("corrupt-1.parquet", &[]);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Size is smaller than footer")
-        );
-    }
-
-    #[test]
-    fn test_parse_metadata_corrupt_footer() {
-        let test_file = get_temp_file("corrupt-2.parquet", &[1, 2, 3, 4, 5, 6, 7, 8]);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Corrupt footer")
-        );
-    }
-
-    #[test]
-    fn test_parse_metadata_invalid_length() {
-        let test_file =
-            get_temp_file("corrupt-3.parquet", &[0, 0, 0, 255, b'P', b'A', b'R', b'1']);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!(
-                "Invalid Parquet file. Metadata length is less than zero (-16777216)"
-            )
-        );
-    }
-
-    #[test]
-    fn test_parse_metadata_invalid_start() {
-        let test_file =
-            get_temp_file("corrupt-4.parquet", &[255, 0, 0, 0, b'P', b'A', b'R', b'1']);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Metadata start is less than zero (-255)")
-        );
-    }
-
-    #[test]
-    fn test_metadata_column_orders_parse() {
-        // Define simple schema, we do not need to provide logical types.
-        let mut fields = vec![
-            Arc::new(
-                SchemaType::primitive_type_builder("col1", Type::INT32)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                SchemaType::primitive_type_builder("col2", Type::FLOAT)
-                    .build()
-                    .unwrap(),
-            ),
-        ];
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(&mut fields)
-            .build()
-            .unwrap();
-        let schema_descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let t_column_orders = Some(vec![
-            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
-            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
-        ]);
-
-        assert_eq!(
-            parse_column_orders(t_column_orders, &schema_descr),
-            Some(vec![
-                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED),
-                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
-            ])
-        );
-
-        // Test when no column orders are defined.
-        assert_eq!(parse_column_orders(None, &schema_descr), None);
-    }
-
-    #[test]
-    #[should_panic(expected = "Column order length mismatch")]
-    fn test_metadata_column_orders_len_mismatch() {
-        let schema = SchemaType::group_type_builder("schema").build().unwrap();
-        let schema_descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let t_column_orders =
-            Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
-
-        parse_column_orders(t_column_orders, &schema_descr);
-    }
-}
diff --git a/rust/parquet/src/file/metadata.rs b/rust/parquet/src/file/metadata.rs
deleted file mode 100644
index 150c42c578a..00000000000
--- a/rust/parquet/src/file/metadata.rs
+++ /dev/null
@@ -1,789 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains information about available Parquet metadata.
-//!
-//! The hierarchy of metadata is as follows:
-//!
-//! [`ParquetMetaData`](struct.ParquetMetaData.html) contains
-//! [`FileMetaData`](struct.FileMetaData.html) and zero or more
-//! [`RowGroupMetaData`](struct.RowGroupMetaData.html) for each row group.
-//!
-//! [`FileMetaData`](struct.FileMetaData.html) includes file version, application specific
-//! metadata.
-//!
-//! Each [`RowGroupMetaData`](struct.RowGroupMetaData.html) contains information about row
-//! group and one or more [`ColumnChunkMetaData`](struct.ColumnChunkMetaData.html) for
-//! each column chunk.
-//!
-//! [`ColumnChunkMetaData`](struct.ColumnChunkMetaData.html) has information about column
-//! chunk (primitive leaf column), including encoding/compression, number of values, etc.
-
-use std::sync::Arc;
-
-use parquet_format::{ColumnChunk, ColumnMetaData, RowGroup};
-
-use crate::basic::{ColumnOrder, Compression, Encoding, Type};
-use crate::errors::{ParquetError, Result};
-use crate::file::statistics::{self, Statistics};
-use crate::schema::types::{
-    ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor,
-    Type as SchemaType,
-};
-
-/// Global Parquet metadata.
-#[derive(Debug, Clone)]
-pub struct ParquetMetaData {
-    file_metadata: FileMetaData,
-    row_groups: Vec<RowGroupMetaData>,
-}
-
-impl ParquetMetaData {
-    /// Creates Parquet metadata from file metadata and a list of row group metadata `Arc`s
-    /// for each available row group.
-    pub fn new(file_metadata: FileMetaData, row_groups: Vec<RowGroupMetaData>) -> Self {
-        ParquetMetaData {
-            file_metadata,
-            row_groups,
-        }
-    }
-
-    /// Returns file metadata as reference.
-    pub fn file_metadata(&self) -> &FileMetaData {
-        &self.file_metadata
-    }
-
-    /// Returns number of row groups in this file.
-    pub fn num_row_groups(&self) -> usize {
-        self.row_groups.len()
-    }
-
-    /// Returns row group metadata for `i`th position.
-    /// Position should be less than number of row groups `num_row_groups`.
-    pub fn row_group(&self, i: usize) -> &RowGroupMetaData {
-        &self.row_groups[i]
-    }
-
-    /// Returns slice of row groups in this file.
-    pub fn row_groups(&self) -> &[RowGroupMetaData] {
-        &self.row_groups
-    }
-}
-
-pub type KeyValue = parquet_format::KeyValue;
-
-/// Reference counted pointer for [`FileMetaData`].
-pub type FileMetaDataPtr = Arc<FileMetaData>;
-
-/// Metadata for a Parquet file.
-#[derive(Debug, Clone)]
-pub struct FileMetaData {
-    version: i32,
-    num_rows: i64,
-    created_by: Option<String>,
-    key_value_metadata: Option<Vec<KeyValue>>,
-    schema_descr: SchemaDescPtr,
-    column_orders: Option<Vec<ColumnOrder>>,
-}
-
-impl FileMetaData {
-    /// Creates new file metadata.
-    pub fn new(
-        version: i32,
-        num_rows: i64,
-        created_by: Option<String>,
-        key_value_metadata: Option<Vec<KeyValue>>,
-        schema_descr: SchemaDescPtr,
-        column_orders: Option<Vec<ColumnOrder>>,
-    ) -> Self {
-        FileMetaData {
-            version,
-            num_rows,
-            created_by,
-            key_value_metadata,
-            schema_descr,
-            column_orders,
-        }
-    }
-
-    /// Returns version of this file.
-    pub fn version(&self) -> i32 {
-        self.version
-    }
-
-    /// Returns number of rows in the file.
-    pub fn num_rows(&self) -> i64 {
-        self.num_rows
-    }
-
-    /// String message for application that wrote this file.
-    ///
-    /// This should have the following format:
-    /// `<application> version <application version> (build <application build hash>)`.
-    ///
-    /// ```shell
-    /// parquet-mr version 1.8.0 (build 0fda28af84b9746396014ad6a415b90592a98b3b)
-    /// ```
-    pub fn created_by(&self) -> &Option<String> {
-        &self.created_by
-    }
-
-    /// Returns key_value_metadata of this file.
-    pub fn key_value_metadata(&self) -> &Option<Vec<KeyValue>> {
-        &self.key_value_metadata
-    }
-
-    /// Returns Parquet ['Type`] that describes schema in this file.
-    pub fn schema(&self) -> &SchemaType {
-        self.schema_descr.root_schema()
-    }
-
-    /// Returns a reference to schema descriptor.
-    pub fn schema_descr(&self) -> &SchemaDescriptor {
-        &self.schema_descr
-    }
-
-    /// Returns reference counted clone for schema descriptor.
-    pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
-        self.schema_descr.clone()
-    }
-
-    /// Column (sort) order used for `min` and `max` values of each column in this file.
-    ///
-    /// Each column order corresponds to one column, determined by its position in the
-    /// list, matching the position of the column in the schema.
-    ///
-    /// When `None` is returned, there are no column orders available, and each column
-    /// should be assumed to have undefined (legacy) column order.
-    pub fn column_orders(&self) -> Option<&Vec<ColumnOrder>> {
-        self.column_orders.as_ref()
-    }
-
-    /// Returns column order for `i`th column in this file.
-    /// If column orders are not available, returns undefined (legacy) column order.
-    pub fn column_order(&self, i: usize) -> ColumnOrder {
-        self.column_orders
-            .as_ref()
-            .map(|data| data[i])
-            .unwrap_or(ColumnOrder::UNDEFINED)
-    }
-}
-
-/// Reference counted pointer for [`RowGroupMetaData`].
-pub type RowGroupMetaDataPtr = Arc<RowGroupMetaData>;
-
-/// Metadata for a row group.
-#[derive(Debug, Clone)]
-pub struct RowGroupMetaData {
-    columns: Vec<ColumnChunkMetaData>,
-    num_rows: i64,
-    total_byte_size: i64,
-    schema_descr: SchemaDescPtr,
-}
-
-impl RowGroupMetaData {
-    /// Returns builer for row group metadata.
-    pub fn builder(schema_descr: SchemaDescPtr) -> RowGroupMetaDataBuilder {
-        RowGroupMetaDataBuilder::new(schema_descr)
-    }
-
-    /// Number of columns in this row group.
-    pub fn num_columns(&self) -> usize {
-        self.columns.len()
-    }
-
-    /// Returns column chunk metadata for `i`th column.
-    pub fn column(&self, i: usize) -> &ColumnChunkMetaData {
-        &self.columns[i]
-    }
-
-    /// Returns slice of column chunk metadata.
-    pub fn columns(&self) -> &[ColumnChunkMetaData] {
-        &self.columns
-    }
-
-    /// Number of rows in this row group.
-    pub fn num_rows(&self) -> i64 {
-        self.num_rows
-    }
-
-    /// Total byte size of all uncompressed column data in this row group.
-    pub fn total_byte_size(&self) -> i64 {
-        self.total_byte_size
-    }
-
-    /// Total size of all compressed column data in this row group.
-    pub fn compressed_size(&self) -> i64 {
-        self.columns.iter().map(|c| c.total_compressed_size).sum()
-    }
-
-    /// Returns reference to a schema descriptor.
-    pub fn schema_descr(&self) -> &SchemaDescriptor {
-        self.schema_descr.as_ref()
-    }
-
-    /// Returns reference counted clone of schema descriptor.
-    pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
-        self.schema_descr.clone()
-    }
-
-    /// Method to convert from Thrift.
-    pub fn from_thrift(
-        schema_descr: SchemaDescPtr,
-        mut rg: RowGroup,
-    ) -> Result<RowGroupMetaData> {
-        assert_eq!(schema_descr.num_columns(), rg.columns.len());
-        let total_byte_size = rg.total_byte_size;
-        let num_rows = rg.num_rows;
-        let mut columns = vec![];
-        for (c, d) in rg.columns.drain(0..).zip(schema_descr.columns()) {
-            let cc = ColumnChunkMetaData::from_thrift(d.clone(), c)?;
-            columns.push(cc);
-        }
-        Ok(RowGroupMetaData {
-            columns,
-            num_rows,
-            total_byte_size,
-            schema_descr,
-        })
-    }
-
-    /// Method to convert to Thrift.
-    pub fn to_thrift(&self) -> RowGroup {
-        RowGroup {
-            columns: self.columns().iter().map(|v| v.to_thrift()).collect(),
-            total_byte_size: self.total_byte_size,
-            num_rows: self.num_rows,
-            sorting_columns: None,
-        }
-    }
-}
-
-/// Builder for row group metadata.
-pub struct RowGroupMetaDataBuilder {
-    columns: Vec<ColumnChunkMetaData>,
-    schema_descr: SchemaDescPtr,
-    num_rows: i64,
-    total_byte_size: i64,
-}
-
-impl RowGroupMetaDataBuilder {
-    /// Creates new builder from schema descriptor.
-    fn new(schema_descr: SchemaDescPtr) -> Self {
-        Self {
-            columns: Vec::with_capacity(schema_descr.num_columns()),
-            schema_descr,
-            num_rows: 0,
-            total_byte_size: 0,
-        }
-    }
-
-    /// Sets number of rows in this row group.
-    pub fn set_num_rows(mut self, value: i64) -> Self {
-        self.num_rows = value;
-        self
-    }
-
-    /// Sets total size in bytes for this row group.
-    pub fn set_total_byte_size(mut self, value: i64) -> Self {
-        self.total_byte_size = value;
-        self
-    }
-
-    /// Sets column metadata for this row group.
-    pub fn set_column_metadata(mut self, value: Vec<ColumnChunkMetaData>) -> Self {
-        self.columns = value;
-        self
-    }
-
-    /// Builds row group metadata.
-    pub fn build(self) -> Result<RowGroupMetaData> {
-        if self.schema_descr.num_columns() != self.columns.len() {
-            return Err(general_err!(
-                "Column length mismatch: {} != {}",
-                self.schema_descr.num_columns(),
-                self.columns.len()
-            ));
-        }
-
-        Ok(RowGroupMetaData {
-            columns: self.columns,
-            num_rows: self.num_rows,
-            total_byte_size: self.total_byte_size,
-            schema_descr: self.schema_descr,
-        })
-    }
-}
-
-/// Metadata for a column chunk.
-#[derive(Debug, Clone)]
-pub struct ColumnChunkMetaData {
-    column_type: Type,
-    column_path: ColumnPath,
-    column_descr: ColumnDescPtr,
-    encodings: Vec<Encoding>,
-    file_path: Option<String>,
-    file_offset: i64,
-    num_values: i64,
-    compression: Compression,
-    total_compressed_size: i64,
-    total_uncompressed_size: i64,
-    data_page_offset: i64,
-    index_page_offset: Option<i64>,
-    dictionary_page_offset: Option<i64>,
-    statistics: Option<Statistics>,
-}
-
-/// Represents common operations for a column chunk.
-impl ColumnChunkMetaData {
-    /// Returns builder for column chunk metadata.
-    pub fn builder(column_descr: ColumnDescPtr) -> ColumnChunkMetaDataBuilder {
-        ColumnChunkMetaDataBuilder::new(column_descr)
-    }
-
-    /// File where the column chunk is stored.
-    ///
-    /// If not set, assumed to belong to the same file as the metadata.
-    /// This path is relative to the current file.
-    pub fn file_path(&self) -> Option<&String> {
-        self.file_path.as_ref()
-    }
-
-    /// Byte offset in `file_path()`.
-    pub fn file_offset(&self) -> i64 {
-        self.file_offset
-    }
-
-    /// Type of this column. Must be primitive.
-    pub fn column_type(&self) -> Type {
-        self.column_type
-    }
-
-    /// Path (or identifier) of this column.
-    pub fn column_path(&self) -> &ColumnPath {
-        &self.column_path
-    }
-
-    /// Descriptor for this column.
-    pub fn column_descr(&self) -> &ColumnDescriptor {
-        self.column_descr.as_ref()
-    }
-
-    /// Reference counted clone of descriptor for this column.
-    pub fn column_descr_ptr(&self) -> ColumnDescPtr {
-        self.column_descr.clone()
-    }
-
-    /// All encodings used for this column.
-    pub fn encodings(&self) -> &Vec<Encoding> {
-        &self.encodings
-    }
-
-    /// Total number of values in this column chunk.
-    pub fn num_values(&self) -> i64 {
-        self.num_values
-    }
-
-    /// Compression for this column.
-    pub fn compression(&self) -> Compression {
-        self.compression
-    }
-
-    /// Returns the total compressed data size of this column chunk.
-    pub fn compressed_size(&self) -> i64 {
-        self.total_compressed_size
-    }
-
-    /// Returns the total uncompressed data size of this column chunk.
-    pub fn uncompressed_size(&self) -> i64 {
-        self.total_uncompressed_size
-    }
-
-    /// Returns the offset for the column data.
-    pub fn data_page_offset(&self) -> i64 {
-        self.data_page_offset
-    }
-
-    /// Returns `true` if this column chunk contains a index page, `false` otherwise.
-    pub fn has_index_page(&self) -> bool {
-        self.index_page_offset.is_some()
-    }
-
-    /// Returns the offset for the index page.
-    pub fn index_page_offset(&self) -> Option<i64> {
-        self.index_page_offset
-    }
-
-    /// Returns `true` if this column chunk contains a dictionary page, `false` otherwise.
-    pub fn has_dictionary_page(&self) -> bool {
-        self.dictionary_page_offset.is_some()
-    }
-
-    /// Returns the offset for the dictionary page, if any.
-    pub fn dictionary_page_offset(&self) -> Option<i64> {
-        self.dictionary_page_offset
-    }
-
-    /// Returns the offset and length in bytes of the column chunk within the file
-    pub fn byte_range(&self) -> (u64, u64) {
-        let col_start = if self.has_dictionary_page() {
-            self.dictionary_page_offset().unwrap()
-        } else {
-            self.data_page_offset()
-        };
-        let col_len = self.compressed_size();
-        assert!(
-            col_start >= 0 && col_len >= 0,
-            "column start and length should not be negative"
-        );
-        (col_start as u64, col_len as u64)
-    }
-
-    /// Returns statistics that are set for this column chunk,
-    /// or `None` if no statistics are available.
-    pub fn statistics(&self) -> Option<&Statistics> {
-        self.statistics.as_ref()
-    }
-
-    /// Method to convert from Thrift.
-    pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
-        if cc.meta_data.is_none() {
-            return Err(general_err!("Expected to have column metadata"));
-        }
-        let mut col_metadata: ColumnMetaData = cc.meta_data.unwrap();
-        let column_type = Type::from(col_metadata.type_);
-        let column_path = ColumnPath::new(col_metadata.path_in_schema);
-        let encodings = col_metadata
-            .encodings
-            .drain(0..)
-            .map(Encoding::from)
-            .collect();
-        let compression = Compression::from(col_metadata.codec);
-        let file_path = cc.file_path;
-        let file_offset = cc.file_offset;
-        let num_values = col_metadata.num_values;
-        let total_compressed_size = col_metadata.total_compressed_size;
-        let total_uncompressed_size = col_metadata.total_uncompressed_size;
-        let data_page_offset = col_metadata.data_page_offset;
-        let index_page_offset = col_metadata.index_page_offset;
-        let dictionary_page_offset = col_metadata.dictionary_page_offset;
-        let statistics = statistics::from_thrift(column_type, col_metadata.statistics);
-        let result = ColumnChunkMetaData {
-            column_type,
-            column_path,
-            column_descr,
-            encodings,
-            file_path,
-            file_offset,
-            num_values,
-            compression,
-            total_compressed_size,
-            total_uncompressed_size,
-            data_page_offset,
-            index_page_offset,
-            dictionary_page_offset,
-            statistics,
-        };
-        Ok(result)
-    }
-
-    /// Method to convert to Thrift.
-    pub fn to_thrift(&self) -> ColumnChunk {
-        let column_metadata = ColumnMetaData {
-            type_: self.column_type.into(),
-            encodings: self.encodings().iter().map(|&v| v.into()).collect(),
-            path_in_schema: Vec::from(self.column_path.as_ref()),
-            codec: self.compression.into(),
-            num_values: self.num_values,
-            total_uncompressed_size: self.total_uncompressed_size,
-            total_compressed_size: self.total_compressed_size,
-            key_value_metadata: None,
-            data_page_offset: self.data_page_offset,
-            index_page_offset: self.index_page_offset,
-            dictionary_page_offset: self.dictionary_page_offset,
-            statistics: statistics::to_thrift(self.statistics.as_ref()),
-            encoding_stats: None,
-        };
-
-        ColumnChunk {
-            file_path: self.file_path().cloned(),
-            file_offset: self.file_offset,
-            meta_data: Some(column_metadata),
-            offset_index_offset: None,
-            offset_index_length: None,
-            column_index_offset: None,
-            column_index_length: None,
-        }
-    }
-}
-
-/// Builder for column chunk metadata.
-pub struct ColumnChunkMetaDataBuilder {
-    column_descr: ColumnDescPtr,
-    encodings: Vec<Encoding>,
-    file_path: Option<String>,
-    file_offset: i64,
-    num_values: i64,
-    compression: Compression,
-    total_compressed_size: i64,
-    total_uncompressed_size: i64,
-    data_page_offset: i64,
-    index_page_offset: Option<i64>,
-    dictionary_page_offset: Option<i64>,
-    statistics: Option<Statistics>,
-}
-
-impl ColumnChunkMetaDataBuilder {
-    /// Creates new column chunk metadata builder.
-    fn new(column_descr: ColumnDescPtr) -> Self {
-        Self {
-            column_descr,
-            encodings: Vec::new(),
-            file_path: None,
-            file_offset: 0,
-            num_values: 0,
-            compression: Compression::UNCOMPRESSED,
-            total_compressed_size: 0,
-            total_uncompressed_size: 0,
-            data_page_offset: 0,
-            index_page_offset: None,
-            dictionary_page_offset: None,
-            statistics: None,
-        }
-    }
-
-    /// Sets list of encodings for this column chunk.
-    pub fn set_encodings(mut self, encodings: Vec<Encoding>) -> Self {
-        self.encodings = encodings;
-        self
-    }
-
-    /// Sets optional file path for this column chunk.
-    pub fn set_file_path(mut self, value: String) -> Self {
-        self.file_path = Some(value);
-        self
-    }
-
-    /// Sets file offset in bytes.
-    pub fn set_file_offset(mut self, value: i64) -> Self {
-        self.file_offset = value;
-        self
-    }
-
-    /// Sets number of values.
-    pub fn set_num_values(mut self, value: i64) -> Self {
-        self.num_values = value;
-        self
-    }
-
-    /// Sets compression.
-    pub fn set_compression(mut self, value: Compression) -> Self {
-        self.compression = value;
-        self
-    }
-
-    /// Sets total compressed size in bytes.
-    pub fn set_total_compressed_size(mut self, value: i64) -> Self {
-        self.total_compressed_size = value;
-        self
-    }
-
-    /// Sets total uncompressed size in bytes.
-    pub fn set_total_uncompressed_size(mut self, value: i64) -> Self {
-        self.total_uncompressed_size = value;
-        self
-    }
-
-    /// Sets data page offset in bytes.
-    pub fn set_data_page_offset(mut self, value: i64) -> Self {
-        self.data_page_offset = value;
-        self
-    }
-
-    /// Sets optional dictionary page ofset in bytes.
-    pub fn set_dictionary_page_offset(mut self, value: Option<i64>) -> Self {
-        self.dictionary_page_offset = value;
-        self
-    }
-
-    /// Sets optional index page offset in bytes.
-    pub fn set_index_page_offset(mut self, value: Option<i64>) -> Self {
-        self.index_page_offset = value;
-        self
-    }
-
-    /// Sets statistics for this column chunk.
-    pub fn set_statistics(mut self, value: Statistics) -> Self {
-        self.statistics = Some(value);
-        self
-    }
-
-    /// Builds column chunk metadata.
-    pub fn build(self) -> Result<ColumnChunkMetaData> {
-        Ok(ColumnChunkMetaData {
-            column_type: self.column_descr.physical_type(),
-            column_path: self.column_descr.path().clone(),
-            column_descr: self.column_descr,
-            encodings: self.encodings,
-            file_path: self.file_path,
-            file_offset: self.file_offset,
-            num_values: self.num_values,
-            compression: self.compression,
-            total_compressed_size: self.total_compressed_size,
-            total_uncompressed_size: self.total_uncompressed_size,
-            data_page_offset: self.data_page_offset,
-            index_page_offset: self.index_page_offset,
-            dictionary_page_offset: self.dictionary_page_offset,
-            statistics: self.statistics,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_row_group_metadata_thrift_conversion() {
-        let schema_descr = get_test_schema_descr();
-
-        let mut columns = vec![];
-        for ptr in schema_descr.columns() {
-            let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
-            columns.push(column);
-        }
-        let row_group_meta = RowGroupMetaData::builder(schema_descr.clone())
-            .set_num_rows(1000)
-            .set_total_byte_size(2000)
-            .set_column_metadata(columns)
-            .build()
-            .unwrap();
-
-        let row_group_exp = row_group_meta.to_thrift();
-        let row_group_res =
-            RowGroupMetaData::from_thrift(schema_descr, row_group_exp.clone())
-                .unwrap()
-                .to_thrift();
-
-        assert_eq!(row_group_res, row_group_exp);
-    }
-
-    #[test]
-    fn test_row_group_metadata_thrift_conversion_empty() {
-        let schema_descr = get_test_schema_descr();
-
-        let row_group_meta = RowGroupMetaData::builder(schema_descr).build();
-
-        assert!(row_group_meta.is_err());
-        if let Err(e) = row_group_meta {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Column length mismatch: 2 != 0"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_chunk_metadata_thrift_conversion() {
-        let column_descr = get_test_schema_descr().column(0);
-
-        let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
-            .set_encodings(vec![Encoding::PLAIN, Encoding::RLE])
-            .set_file_path("file_path".to_owned())
-            .set_file_offset(100)
-            .set_num_values(1000)
-            .set_compression(Compression::SNAPPY)
-            .set_total_compressed_size(2000)
-            .set_total_uncompressed_size(3000)
-            .set_data_page_offset(4000)
-            .set_dictionary_page_offset(Some(5000))
-            .build()
-            .unwrap();
-
-        let col_chunk_exp = col_metadata.to_thrift();
-
-        let col_chunk_res =
-            ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone())
-                .unwrap()
-                .to_thrift();
-
-        assert_eq!(col_chunk_res, col_chunk_exp);
-    }
-
-    #[test]
-    fn test_column_chunk_metadata_thrift_conversion_empty() {
-        let column_descr = get_test_schema_descr().column(0);
-
-        let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
-            .build()
-            .unwrap();
-
-        let col_chunk_exp = col_metadata.to_thrift();
-        let col_chunk_res =
-            ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone())
-                .unwrap()
-                .to_thrift();
-
-        assert_eq!(col_chunk_res, col_chunk_exp);
-    }
-
-    #[test]
-    fn test_compressed_size() {
-        let schema_descr = get_test_schema_descr();
-
-        let mut columns = vec![];
-        for column_descr in schema_descr.columns() {
-            let column = ColumnChunkMetaData::builder(column_descr.clone())
-                .set_total_compressed_size(500)
-                .set_total_uncompressed_size(700)
-                .build()
-                .unwrap();
-            columns.push(column);
-        }
-        let row_group_meta = RowGroupMetaData::builder(schema_descr)
-            .set_num_rows(1000)
-            .set_column_metadata(columns)
-            .build()
-            .unwrap();
-
-        let compressed_size_res: i64 = row_group_meta.compressed_size();
-        let compressed_size_exp: i64 = 1000;
-
-        assert_eq!(compressed_size_res, compressed_size_exp);
-    }
-
-    /// Returns sample schema descriptor so we can create column metadata.
-    fn get_test_schema_descr() -> SchemaDescPtr {
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(&mut vec![
-                Arc::new(
-                    SchemaType::primitive_type_builder("a", Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-                Arc::new(
-                    SchemaType::primitive_type_builder("b", Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-            ])
-            .build()
-            .unwrap();
-
-        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
-    }
-}
diff --git a/rust/parquet/src/file/mod.rs b/rust/parquet/src/file/mod.rs
deleted file mode 100644
index f85de98ccab..00000000000
--- a/rust/parquet/src/file/mod.rs
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Main entrypoint for working with Parquet API.
-//!
-//! Provides access to file and row group readers and writers, record API, metadata, etc.
-//!
-//! See [`reader::SerializedFileReader`](reader/struct.SerializedFileReader.html) or
-//! [`writer::SerializedFileWriter`](writer/struct.SerializedFileWriter.html) for a
-//! starting reference, [`metadata::ParquetMetaData`](metadata/index.html) for file
-//! metadata, and [`statistics`](statistics/index.html) for working with statistics.
-//!
-//! # Example of writing a new file
-//!
-//! ```rust,no_run
-//! use std::{fs, path::Path, sync::Arc};
-//!
-//! use parquet::{
-//!     file::{
-//!         properties::WriterProperties,
-//!         writer::{FileWriter, SerializedFileWriter},
-//!     },
-//!     schema::parser::parse_message_type,
-//! };
-//!
-//! let path = Path::new("/path/to/sample.parquet");
-//!
-//! let message_type = "
-//!   message schema {
-//!     REQUIRED INT32 b;
-//!   }
-//! ";
-//! let schema = Arc::new(parse_message_type(message_type).unwrap());
-//! let props = Arc::new(WriterProperties::builder().build());
-//! let file = fs::File::create(&path).unwrap();
-//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-//! let mut row_group_writer = writer.next_row_group().unwrap();
-//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
-//!     // ... write values to a column writer
-//!     row_group_writer.close_column(col_writer).unwrap();
-//! }
-//! writer.close_row_group(row_group_writer).unwrap();
-//! writer.close().unwrap();
-//!
-//! let bytes = fs::read(&path).unwrap();
-//! assert_eq!(&bytes[0..4], &[b'P', b'A', b'R', b'1']);
-//! ```
-//! # Example of reading an existing file
-//!
-//! ```rust,no_run
-//! use parquet::file::reader::{FileReader, SerializedFileReader};
-//! use std::{fs::File, path::Path};
-//!
-//! let path = Path::new("/path/to/sample.parquet");
-//! if let Ok(file) = File::open(&path) {
-//!     let reader = SerializedFileReader::new(file).unwrap();
-//!
-//!     let parquet_metadata = reader.metadata();
-//!     assert_eq!(parquet_metadata.num_row_groups(), 1);
-//!
-//!     let row_group_reader = reader.get_row_group(0).unwrap();
-//!     assert_eq!(row_group_reader.num_columns(), 1);
-//! }
-//! ```
-//! # Example of reading multiple files
-//!
-//! ```rust,no_run
-//! use parquet::file::reader::SerializedFileReader;
-//! use std::convert::TryFrom;
-//!
-//! let paths = vec![
-//!     "/path/to/sample.parquet/part-1.snappy.parquet",
-//!     "/path/to/sample.parquet/part-2.snappy.parquet"
-//! ];
-//! // Create a reader for each file and flat map rows
-//! let rows = paths.iter()
-//!     .map(|p| SerializedFileReader::try_from(*p).unwrap())
-//!     .flat_map(|r| r.into_iter());
-//!
-//! for row in rows {
-//!     println!("{}", row);
-//! }
-//! ```
-pub mod footer;
-pub mod metadata;
-pub mod properties;
-pub mod reader;
-pub mod serialized_reader;
-pub mod statistics;
-pub mod writer;
-
-const FOOTER_SIZE: usize = 8;
-const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
-
-/// The number of bytes read at the end of the parquet file on first read
-const DEFAULT_FOOTER_READ_SIZE: usize = 64 * 1024;
diff --git a/rust/parquet/src/file/properties.rs b/rust/parquet/src/file/properties.rs
deleted file mode 100644
index b0b25f9b952..00000000000
--- a/rust/parquet/src/file/properties.rs
+++ /dev/null
@@ -1,679 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Writer properties.
-//!
-//! # Usage
-//!
-//! ```rust
-//! use parquet::{
-//!     basic::{Compression, Encoding},
-//!     file::properties::*,
-//!     schema::types::ColumnPath,
-//! };
-//!
-//! // Create properties with default configuration.
-//! let props = WriterProperties::builder().build();
-//!
-//! // Use properties builder to set certain options and assemble the configuration.
-//! let props = WriterProperties::builder()
-//!     .set_writer_version(WriterVersion::PARQUET_1_0)
-//!     .set_encoding(Encoding::PLAIN)
-//!     .set_column_encoding(ColumnPath::from("col1"), Encoding::DELTA_BINARY_PACKED)
-//!     .set_compression(Compression::SNAPPY)
-//!     .build();
-//!
-//! assert_eq!(props.writer_version(), WriterVersion::PARQUET_1_0);
-//! assert_eq!(
-//!     props.encoding(&ColumnPath::from("col1")),
-//!     Some(Encoding::DELTA_BINARY_PACKED)
-//! );
-//! assert_eq!(
-//!     props.encoding(&ColumnPath::from("col2")),
-//!     Some(Encoding::PLAIN)
-//! );
-//! ```
-
-use std::{collections::HashMap, sync::Arc};
-
-use crate::basic::{Compression, Encoding};
-use crate::file::metadata::KeyValue;
-use crate::schema::types::ColumnPath;
-
-const DEFAULT_PAGE_SIZE: usize = 1024 * 1024;
-const DEFAULT_WRITE_BATCH_SIZE: usize = 1024;
-const DEFAULT_WRITER_VERSION: WriterVersion = WriterVersion::PARQUET_1_0;
-const DEFAULT_COMPRESSION: Compression = Compression::UNCOMPRESSED;
-const DEFAULT_DICTIONARY_ENABLED: bool = true;
-const DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT: usize = DEFAULT_PAGE_SIZE;
-const DEFAULT_STATISTICS_ENABLED: bool = true;
-const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
-const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 128 * 1024 * 1024;
-const DEFAULT_CREATED_BY: &str = env!("PARQUET_CREATED_BY");
-
-/// Parquet writer version.
-///
-/// Basic constant, which is not part of the Thrift definition.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum WriterVersion {
-    PARQUET_1_0,
-    PARQUET_2_0,
-}
-
-impl WriterVersion {
-    /// Returns writer version as `i32`.
-    pub fn as_num(&self) -> i32 {
-        match self {
-            WriterVersion::PARQUET_1_0 => 1,
-            WriterVersion::PARQUET_2_0 => 2,
-        }
-    }
-}
-
-/// Reference counted writer properties.
-pub type WriterPropertiesPtr = Arc<WriterProperties>;
-
-/// Writer properties.
-///
-/// All properties except the key-value metadata are immutable,
-/// use [`WriterPropertiesBuilder`] to assemble these properties.
-#[derive(Debug, Clone)]
-pub struct WriterProperties {
-    data_pagesize_limit: usize,
-    dictionary_pagesize_limit: usize,
-    write_batch_size: usize,
-    max_row_group_size: usize,
-    writer_version: WriterVersion,
-    created_by: String,
-    pub(crate) key_value_metadata: Option<Vec<KeyValue>>,
-    default_column_properties: ColumnProperties,
-    column_properties: HashMap<ColumnPath, ColumnProperties>,
-}
-
-impl WriterProperties {
-    /// Returns builder for writer properties with default values.
-    pub fn builder() -> WriterPropertiesBuilder {
-        WriterPropertiesBuilder::with_defaults()
-    }
-
-    /// Returns data page size limit.
-    pub fn data_pagesize_limit(&self) -> usize {
-        self.data_pagesize_limit
-    }
-
-    /// Returns dictionary page size limit.
-    pub fn dictionary_pagesize_limit(&self) -> usize {
-        self.dictionary_pagesize_limit
-    }
-
-    /// Returns configured batch size for writes.
-    ///
-    /// When writing a batch of data, this setting allows to split it internally into
-    /// smaller batches so we can better estimate the size of a page currently being
-    /// written.
-    pub fn write_batch_size(&self) -> usize {
-        self.write_batch_size
-    }
-
-    /// Returns max size for a row group.
-    pub fn max_row_group_size(&self) -> usize {
-        self.max_row_group_size
-    }
-
-    /// Returns configured writer version.
-    pub fn writer_version(&self) -> WriterVersion {
-        self.writer_version
-    }
-
-    /// Returns `created_by` string.
-    pub fn created_by(&self) -> &str {
-        &self.created_by
-    }
-
-    /// Returns `key_value_metadata` KeyValue pairs.
-    pub fn key_value_metadata(&self) -> &Option<Vec<KeyValue>> {
-        &self.key_value_metadata
-    }
-
-    /// Returns encoding for a data page, when dictionary encoding is enabled.
-    /// This is not configurable.
-    #[inline]
-    pub fn dictionary_data_page_encoding(&self) -> Encoding {
-        // PLAIN_DICTIONARY encoding is deprecated in writer version 1.
-        // Dictionary values are encoded using RLE_DICTIONARY encoding.
-        Encoding::RLE_DICTIONARY
-    }
-
-    /// Returns encoding for dictionary page, when dictionary encoding is enabled.
-    /// This is not configurable.
-    #[inline]
-    pub fn dictionary_page_encoding(&self) -> Encoding {
-        // PLAIN_DICTIONARY is deprecated in writer version 1.
-        // Dictionary is encoded using plain encoding.
-        Encoding::PLAIN
-    }
-
-    /// Returns encoding for a column, if set.
-    /// In case when dictionary is enabled, returns fallback encoding.
-    ///
-    /// If encoding is not set, then column writer will choose the best encoding
-    /// based on the column type.
-    pub fn encoding(&self, col: &ColumnPath) -> Option<Encoding> {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.encoding())
-            .or_else(|| self.default_column_properties.encoding())
-    }
-
-    /// Returns compression codec for a column.
-    pub fn compression(&self, col: &ColumnPath) -> Compression {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.compression())
-            .or_else(|| self.default_column_properties.compression())
-            .unwrap_or(DEFAULT_COMPRESSION)
-    }
-
-    /// Returns `true` if dictionary encoding is enabled for a column.
-    pub fn dictionary_enabled(&self, col: &ColumnPath) -> bool {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.dictionary_enabled())
-            .or_else(|| self.default_column_properties.dictionary_enabled())
-            .unwrap_or(DEFAULT_DICTIONARY_ENABLED)
-    }
-
-    /// Returns `true` if statistics are enabled for a column.
-    pub fn statistics_enabled(&self, col: &ColumnPath) -> bool {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.statistics_enabled())
-            .or_else(|| self.default_column_properties.statistics_enabled())
-            .unwrap_or(DEFAULT_STATISTICS_ENABLED)
-    }
-
-    /// Returns max size for statistics.
-    /// Only applicable if statistics are enabled.
-    pub fn max_statistics_size(&self, col: &ColumnPath) -> usize {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.max_statistics_size())
-            .or_else(|| self.default_column_properties.max_statistics_size())
-            .unwrap_or(DEFAULT_MAX_STATISTICS_SIZE)
-    }
-}
-
-/// Writer properties builder.
-pub struct WriterPropertiesBuilder {
-    data_pagesize_limit: usize,
-    dictionary_pagesize_limit: usize,
-    write_batch_size: usize,
-    max_row_group_size: usize,
-    writer_version: WriterVersion,
-    created_by: String,
-    key_value_metadata: Option<Vec<KeyValue>>,
-    default_column_properties: ColumnProperties,
-    column_properties: HashMap<ColumnPath, ColumnProperties>,
-}
-
-impl WriterPropertiesBuilder {
-    /// Returns default state of the builder.
-    fn with_defaults() -> Self {
-        Self {
-            data_pagesize_limit: DEFAULT_PAGE_SIZE,
-            dictionary_pagesize_limit: DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT,
-            write_batch_size: DEFAULT_WRITE_BATCH_SIZE,
-            max_row_group_size: DEFAULT_MAX_ROW_GROUP_SIZE,
-            writer_version: DEFAULT_WRITER_VERSION,
-            created_by: DEFAULT_CREATED_BY.to_string(),
-            key_value_metadata: None,
-            default_column_properties: ColumnProperties::new(),
-            column_properties: HashMap::new(),
-        }
-    }
-
-    /// Finalizes the configuration and returns immutable writer properties struct.
-    pub fn build(self) -> WriterProperties {
-        WriterProperties {
-            data_pagesize_limit: self.data_pagesize_limit,
-            dictionary_pagesize_limit: self.dictionary_pagesize_limit,
-            write_batch_size: self.write_batch_size,
-            max_row_group_size: self.max_row_group_size,
-            writer_version: self.writer_version,
-            created_by: self.created_by,
-            key_value_metadata: self.key_value_metadata,
-            default_column_properties: self.default_column_properties,
-            column_properties: self.column_properties,
-        }
-    }
-
-    // ----------------------------------------------------------------------
-    // Writer properties related to a file
-
-    /// Sets writer version.
-    pub fn set_writer_version(mut self, value: WriterVersion) -> Self {
-        self.writer_version = value;
-        self
-    }
-
-    /// Sets data page size limit.
-    pub fn set_data_pagesize_limit(mut self, value: usize) -> Self {
-        self.data_pagesize_limit = value;
-        self
-    }
-
-    /// Sets dictionary page size limit.
-    pub fn set_dictionary_pagesize_limit(mut self, value: usize) -> Self {
-        self.dictionary_pagesize_limit = value;
-        self
-    }
-
-    /// Sets write batch size.
-    pub fn set_write_batch_size(mut self, value: usize) -> Self {
-        self.write_batch_size = value;
-        self
-    }
-
-    /// Sets max size for a row group.
-    pub fn set_max_row_group_size(mut self, value: usize) -> Self {
-        self.max_row_group_size = value;
-        self
-    }
-
-    /// Sets "created by" property.
-    pub fn set_created_by(mut self, value: String) -> Self {
-        self.created_by = value;
-        self
-    }
-
-    /// Sets "key_value_metadata" property.
-    pub fn set_key_value_metadata(mut self, value: Option<Vec<KeyValue>>) -> Self {
-        self.key_value_metadata = value;
-        self
-    }
-
-    // ----------------------------------------------------------------------
-    // Setters for any column (global)
-
-    /// Sets encoding for any column.
-    ///
-    /// If dictionary is not enabled, this is treated as a primary encoding for all
-    /// columns. In case when dictionary is enabled for any column, this value is
-    /// considered to be a fallback encoding for that column.
-    ///
-    /// Panics if user tries to set dictionary encoding here, regardless of dictionary
-    /// encoding flag being set.
-    pub fn set_encoding(mut self, value: Encoding) -> Self {
-        self.default_column_properties.set_encoding(value);
-        self
-    }
-
-    /// Sets compression codec for any column.
-    pub fn set_compression(mut self, value: Compression) -> Self {
-        self.default_column_properties.set_compression(value);
-        self
-    }
-
-    /// Sets flag to enable/disable dictionary encoding for any column.
-    ///
-    /// Use this method to set dictionary encoding, instead of explicitly specifying
-    /// encoding in `set_encoding` method.
-    pub fn set_dictionary_enabled(mut self, value: bool) -> Self {
-        self.default_column_properties.set_dictionary_enabled(value);
-        self
-    }
-
-    /// Sets flag to enable/disable statistics for any column.
-    pub fn set_statistics_enabled(mut self, value: bool) -> Self {
-        self.default_column_properties.set_statistics_enabled(value);
-        self
-    }
-
-    /// Sets max statistics size for any column.
-    /// Applicable only if statistics are enabled.
-    pub fn set_max_statistics_size(mut self, value: usize) -> Self {
-        self.default_column_properties
-            .set_max_statistics_size(value);
-        self
-    }
-
-    // ----------------------------------------------------------------------
-    // Setters for a specific column
-
-    /// Helper method to get existing or new mutable reference of column properties.
-    #[inline]
-    fn get_mut_props(&mut self, col: ColumnPath) -> &mut ColumnProperties {
-        self.column_properties
-            .entry(col)
-            .or_insert(ColumnProperties::new())
-    }
-
-    /// Sets encoding for a column.
-    /// Takes precedence over globally defined settings.
-    ///
-    /// If dictionary is not enabled, this is treated as a primary encoding for this
-    /// column. In case when dictionary is enabled for this column, either through
-    /// global defaults or explicitly, this value is considered to be a fallback
-    /// encoding for this column.
-    ///
-    /// Panics if user tries to set dictionary encoding here, regardless of dictionary
-    /// encoding flag being set.
-    pub fn set_column_encoding(mut self, col: ColumnPath, value: Encoding) -> Self {
-        self.get_mut_props(col).set_encoding(value);
-        self
-    }
-
-    /// Sets compression codec for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_compression(mut self, col: ColumnPath, value: Compression) -> Self {
-        self.get_mut_props(col).set_compression(value);
-        self
-    }
-
-    /// Sets flag to enable/disable dictionary encoding for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_dictionary_enabled(mut self, col: ColumnPath, value: bool) -> Self {
-        self.get_mut_props(col).set_dictionary_enabled(value);
-        self
-    }
-
-    /// Sets flag to enable/disable statistics for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_statistics_enabled(mut self, col: ColumnPath, value: bool) -> Self {
-        self.get_mut_props(col).set_statistics_enabled(value);
-        self
-    }
-
-    /// Sets max size for statistics for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_max_statistics_size(
-        mut self,
-        col: ColumnPath,
-        value: usize,
-    ) -> Self {
-        self.get_mut_props(col).set_max_statistics_size(value);
-        self
-    }
-}
-
-/// Container for column properties that can be changed as part of writer.
-///
-/// If a field is `None`, it means that no specific value has been set for this column,
-/// so some subsequent or default value must be used.
-#[derive(Debug, Clone, PartialEq)]
-struct ColumnProperties {
-    encoding: Option<Encoding>,
-    codec: Option<Compression>,
-    dictionary_enabled: Option<bool>,
-    statistics_enabled: Option<bool>,
-    max_statistics_size: Option<usize>,
-}
-
-impl ColumnProperties {
-    /// Initialise column properties with default values.
-    fn new() -> Self {
-        Self {
-            encoding: None,
-            codec: None,
-            dictionary_enabled: None,
-            statistics_enabled: None,
-            max_statistics_size: None,
-        }
-    }
-
-    /// Sets encoding for this column.
-    ///
-    /// If dictionary is not enabled, this is treated as a primary encoding for a column.
-    /// In case when dictionary is enabled for a column, this value is considered to
-    /// be a fallback encoding.
-    ///
-    /// Panics if user tries to set dictionary encoding here, regardless of dictionary
-    /// encoding flag being set. Use `set_dictionary_enabled` method to enable dictionary
-    /// for a column.
-    fn set_encoding(&mut self, value: Encoding) {
-        if value == Encoding::PLAIN_DICTIONARY || value == Encoding::RLE_DICTIONARY {
-            panic!("Dictionary encoding can not be used as fallback encoding");
-        }
-        self.encoding = Some(value);
-    }
-
-    /// Sets compression codec for this column.
-    fn set_compression(&mut self, value: Compression) {
-        self.codec = Some(value);
-    }
-
-    /// Sets whether or not dictionary encoding is enabled for this column.
-    fn set_dictionary_enabled(&mut self, enabled: bool) {
-        self.dictionary_enabled = Some(enabled);
-    }
-
-    /// Sets whether or not statistics are enabled for this column.
-    fn set_statistics_enabled(&mut self, enabled: bool) {
-        self.statistics_enabled = Some(enabled);
-    }
-
-    /// Sets max size for statistics for this column.
-    fn set_max_statistics_size(&mut self, value: usize) {
-        self.max_statistics_size = Some(value);
-    }
-
-    /// Returns optional encoding for this column.
-    fn encoding(&self) -> Option<Encoding> {
-        self.encoding
-    }
-
-    /// Returns optional compression codec for this column.
-    fn compression(&self) -> Option<Compression> {
-        self.codec
-    }
-
-    /// Returns `Some(true)` if dictionary encoding is enabled for this column, if
-    /// disabled then returns `Some(false)`. If result is `None`, then no setting has
-    /// been provided.
-    fn dictionary_enabled(&self) -> Option<bool> {
-        self.dictionary_enabled
-    }
-
-    /// Returns `Some(true)` if statistics are enabled for this column, if disabled then
-    /// returns `Some(false)`. If result is `None`, then no setting has been provided.
-    fn statistics_enabled(&self) -> Option<bool> {
-        self.statistics_enabled
-    }
-
-    /// Returns optional max size in bytes for statistics.
-    fn max_statistics_size(&self) -> Option<usize> {
-        self.max_statistics_size
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_writer_version() {
-        assert_eq!(WriterVersion::PARQUET_1_0.as_num(), 1);
-        assert_eq!(WriterVersion::PARQUET_2_0.as_num(), 2);
-    }
-
-    #[test]
-    fn test_writer_properties_default_settings() {
-        let props = WriterProperties::builder().build();
-        assert_eq!(props.data_pagesize_limit(), DEFAULT_PAGE_SIZE);
-        assert_eq!(
-            props.dictionary_pagesize_limit(),
-            DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT
-        );
-        assert_eq!(props.write_batch_size(), DEFAULT_WRITE_BATCH_SIZE);
-        assert_eq!(props.max_row_group_size(), DEFAULT_MAX_ROW_GROUP_SIZE);
-        assert_eq!(props.writer_version(), DEFAULT_WRITER_VERSION);
-        assert_eq!(props.created_by(), DEFAULT_CREATED_BY);
-        assert_eq!(props.key_value_metadata(), &None);
-        assert_eq!(props.encoding(&ColumnPath::from("col")), None);
-        assert_eq!(
-            props.compression(&ColumnPath::from("col")),
-            DEFAULT_COMPRESSION
-        );
-        assert_eq!(
-            props.dictionary_enabled(&ColumnPath::from("col")),
-            DEFAULT_DICTIONARY_ENABLED
-        );
-        assert_eq!(
-            props.statistics_enabled(&ColumnPath::from("col")),
-            DEFAULT_STATISTICS_ENABLED
-        );
-        assert_eq!(
-            props.max_statistics_size(&ColumnPath::from("col")),
-            DEFAULT_MAX_STATISTICS_SIZE
-        );
-    }
-
-    #[test]
-    fn test_writer_properties_dictionary_encoding() {
-        // dictionary encoding is not configurable, and it should be the same for both
-        // writer version 1 and 2.
-        for version in &[WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0] {
-            let props = WriterProperties::builder()
-                .set_writer_version(*version)
-                .build();
-            assert_eq!(props.dictionary_page_encoding(), Encoding::PLAIN);
-            assert_eq!(
-                props.dictionary_data_page_encoding(),
-                Encoding::RLE_DICTIONARY
-            );
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_plain_dictionary_is_fallback() {
-        // Should panic when user specifies dictionary encoding as fallback encoding.
-        WriterProperties::builder()
-            .set_encoding(Encoding::PLAIN_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_rle_dictionary_is_fallback() {
-        // Should panic when user specifies dictionary encoding as fallback encoding.
-        WriterProperties::builder()
-            .set_encoding(Encoding::RLE_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_dictionary_is_enabled() {
-        WriterProperties::builder()
-            .set_dictionary_enabled(true)
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_dictionary_is_disabled() {
-        WriterProperties::builder()
-            .set_dictionary_enabled(false)
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    fn test_writer_properties_builder() {
-        let props = WriterProperties::builder()
-            // file settings
-            .set_writer_version(WriterVersion::PARQUET_2_0)
-            .set_data_pagesize_limit(10)
-            .set_dictionary_pagesize_limit(20)
-            .set_write_batch_size(30)
-            .set_max_row_group_size(40)
-            .set_created_by("default".to_owned())
-            .set_key_value_metadata(Some(vec![KeyValue::new(
-                "key".to_string(),
-                "value".to_string(),
-            )]))
-            // global column settings
-            .set_encoding(Encoding::DELTA_BINARY_PACKED)
-            .set_compression(Compression::GZIP)
-            .set_dictionary_enabled(false)
-            .set_statistics_enabled(false)
-            .set_max_statistics_size(50)
-            // specific column settings
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
-            .set_column_compression(ColumnPath::from("col"), Compression::SNAPPY)
-            .set_column_dictionary_enabled(ColumnPath::from("col"), true)
-            .set_column_statistics_enabled(ColumnPath::from("col"), true)
-            .set_column_max_statistics_size(ColumnPath::from("col"), 123)
-            .build();
-
-        assert_eq!(props.writer_version(), WriterVersion::PARQUET_2_0);
-        assert_eq!(props.data_pagesize_limit(), 10);
-        assert_eq!(props.dictionary_pagesize_limit(), 20);
-        assert_eq!(props.write_batch_size(), 30);
-        assert_eq!(props.max_row_group_size(), 40);
-        assert_eq!(props.created_by(), "default");
-        assert_eq!(
-            props.key_value_metadata(),
-            &Some(vec![KeyValue::new("key".to_string(), "value".to_string(),)])
-        );
-
-        assert_eq!(
-            props.encoding(&ColumnPath::from("a")),
-            Some(Encoding::DELTA_BINARY_PACKED)
-        );
-        assert_eq!(props.compression(&ColumnPath::from("a")), Compression::GZIP);
-        assert_eq!(props.dictionary_enabled(&ColumnPath::from("a")), false);
-        assert_eq!(props.statistics_enabled(&ColumnPath::from("a")), false);
-        assert_eq!(props.max_statistics_size(&ColumnPath::from("a")), 50);
-
-        assert_eq!(
-            props.encoding(&ColumnPath::from("col")),
-            Some(Encoding::RLE)
-        );
-        assert_eq!(
-            props.compression(&ColumnPath::from("col")),
-            Compression::SNAPPY
-        );
-        assert_eq!(props.dictionary_enabled(&ColumnPath::from("col")), true);
-        assert_eq!(props.statistics_enabled(&ColumnPath::from("col")), true);
-        assert_eq!(props.max_statistics_size(&ColumnPath::from("col")), 123);
-    }
-
-    #[test]
-    fn test_writer_properties_builder_partial_defaults() {
-        let props = WriterProperties::builder()
-            .set_encoding(Encoding::DELTA_BINARY_PACKED)
-            .set_compression(Compression::GZIP)
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
-            .build();
-
-        assert_eq!(
-            props.encoding(&ColumnPath::from("col")),
-            Some(Encoding::RLE)
-        );
-        assert_eq!(
-            props.compression(&ColumnPath::from("col")),
-            Compression::GZIP
-        );
-        assert_eq!(
-            props.dictionary_enabled(&ColumnPath::from("col")),
-            DEFAULT_DICTIONARY_ENABLED
-        );
-    }
-}
diff --git a/rust/parquet/src/file/reader.rs b/rust/parquet/src/file/reader.rs
deleted file mode 100644
index aa8ba83a6c0..00000000000
--- a/rust/parquet/src/file/reader.rs
+++ /dev/null
@@ -1,206 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains file reader API and provides methods to access file metadata, row group
-//! readers to read individual column chunks, or access record iterator.
-
-use std::{boxed::Box, io::Read, sync::Arc};
-
-use crate::column::page::PageIterator;
-use crate::column::{page::PageReader, reader::ColumnReader};
-use crate::errors::{ParquetError, Result};
-use crate::file::metadata::*;
-pub use crate::file::serialized_reader::{SerializedFileReader, SerializedPageReader};
-use crate::record::reader::RowIter;
-use crate::schema::types::{ColumnDescPtr, SchemaDescPtr, Type as SchemaType};
-
-use crate::basic::Type;
-
-use crate::column::reader::ColumnReaderImpl;
-
-/// Length should return the total number of bytes in the input source.
-/// It's mainly used to read the metadata, which is at the end of the source.
-#[allow(clippy::len_without_is_empty)]
-pub trait Length {
-    /// Returns the amount of bytes of the inner source.
-    fn len(&self) -> u64;
-}
-
-/// The ChunkReader trait generates readers of chunks of a source.
-/// For a file system reader, each chunk might contain a clone of File bounded on a given range.
-/// For an object store reader, each read can be mapped to a range request.
-pub trait ChunkReader: Length {
-    type T: Read;
-    /// get a serialy readeable slice of the current reader
-    /// This should fail if the slice exceeds the current bounds
-    fn get_read(&self, start: u64, length: usize) -> Result<Self::T>;
-}
-
-// ----------------------------------------------------------------------
-// APIs for file & row group readers
-
-/// Parquet file reader API. With this, user can get metadata information about the
-/// Parquet file, can get reader for each row group, and access record iterator.
-pub trait FileReader {
-    /// Get metadata information about this file.
-    fn metadata(&self) -> &ParquetMetaData;
-
-    /// Get the total number of row groups for this file.
-    fn num_row_groups(&self) -> usize;
-
-    /// Get the `i`th row group reader. Note this doesn't do bound check.
-    fn get_row_group(&self, i: usize) -> Result<Box<dyn RowGroupReader + '_>>;
-
-    /// Get full iterator of `Row`s from a file (over all row groups).
-    ///
-    /// Iterator will automatically load the next row group to advance.
-    ///
-    /// Projected schema can be a subset of or equal to the file schema, when it is None,
-    /// full file schema is assumed.
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
-}
-
-/// Parquet row group reader API. With this, user can get metadata information about the
-/// row group, as well as readers for each individual column chunk.
-pub trait RowGroupReader {
-    /// Get metadata information about this row group.
-    fn metadata(&self) -> &RowGroupMetaData;
-
-    /// Get the total number of column chunks in this row group.
-    fn num_columns(&self) -> usize;
-
-    /// Get page reader for the `i`th column chunk.
-    fn get_column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>>;
-
-    /// Get value reader for the `i`th column chunk.
-    fn get_column_reader(&self, i: usize) -> Result<ColumnReader> {
-        let schema_descr = self.metadata().schema_descr();
-        let col_descr = schema_descr.column(i);
-        let col_page_reader = self.get_column_page_reader(i)?;
-        let col_reader = match col_descr.physical_type() {
-            Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(
-                ColumnReaderImpl::new(col_descr, col_page_reader),
-            ),
-            Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
-                ColumnReaderImpl::new(col_descr, col_page_reader),
-            ),
-        };
-        Ok(col_reader)
-    }
-
-    /// Get iterator of `Row`s from this row group.
-    ///
-    /// Projected schema can be a subset of or equal to the file schema, when it is None,
-    /// full file schema is assumed.
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
-}
-
-// ----------------------------------------------------------------------
-// Iterator
-
-/// Implementation of page iterator for parquet file.
-pub struct FilePageIterator {
-    column_index: usize,
-    row_group_indices: Box<dyn Iterator<Item = usize>>,
-    file_reader: Arc<dyn FileReader>,
-}
-
-impl FilePageIterator {
-    /// Creates a page iterator for all row groups in file.
-    pub fn new(column_index: usize, file_reader: Arc<dyn FileReader>) -> Result<Self> {
-        let num_row_groups = file_reader.metadata().num_row_groups();
-
-        let row_group_indices = Box::new(0..num_row_groups);
-
-        Self::with_row_groups(column_index, row_group_indices, file_reader)
-    }
-
-    /// Create page iterator from parquet file reader with only some row groups.
-    pub fn with_row_groups(
-        column_index: usize,
-        row_group_indices: Box<dyn Iterator<Item = usize>>,
-        file_reader: Arc<dyn FileReader>,
-    ) -> Result<Self> {
-        // Check that column_index is valid
-        let num_columns = file_reader
-            .metadata()
-            .file_metadata()
-            .schema_descr()
-            .num_columns();
-
-        if column_index >= num_columns {
-            return Err(ParquetError::IndexOutOfBound(column_index, num_columns));
-        }
-
-        // We don't check iterators here because iterator may be infinite
-        Ok(Self {
-            column_index,
-            row_group_indices,
-            file_reader,
-        })
-    }
-}
-
-impl Iterator for FilePageIterator {
-    type Item = Result<Box<dyn PageReader>>;
-
-    fn next(&mut self) -> Option<Result<Box<dyn PageReader>>> {
-        self.row_group_indices.next().map(|row_group_index| {
-            self.file_reader
-                .get_row_group(row_group_index)
-                .and_then(|r| r.get_column_page_reader(self.column_index))
-        })
-    }
-}
-
-impl PageIterator for FilePageIterator {
-    fn schema(&mut self) -> Result<SchemaDescPtr> {
-        Ok(self
-            .file_reader
-            .metadata()
-            .file_metadata()
-            .schema_descr_ptr())
-    }
-
-    fn column_schema(&mut self) -> Result<ColumnDescPtr> {
-        self.schema().map(|s| s.column(self.column_index))
-    }
-}
diff --git a/rust/parquet/src/file/serialized_reader.rs b/rust/parquet/src/file/serialized_reader.rs
deleted file mode 100644
index 0877e622ce4..00000000000
--- a/rust/parquet/src/file/serialized_reader.rs
+++ /dev/null
@@ -1,771 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains implementations of the reader traits FileReader, RowGroupReader and PageReader
-//! Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM)
-
-use std::{convert::TryFrom, fs::File, io::Read, path::Path, sync::Arc};
-
-use parquet_format::{PageHeader, PageType};
-use thrift::protocol::TCompactInputProtocol;
-
-use crate::basic::{Compression, Encoding, Type};
-use crate::column::page::{Page, PageReader};
-use crate::compression::{create_codec, Codec};
-use crate::errors::{ParquetError, Result};
-use crate::file::{footer, metadata::*, reader::*, statistics};
-use crate::record::reader::RowIter;
-use crate::record::Row;
-use crate::schema::types::Type as SchemaType;
-use crate::util::{io::TryClone, memory::ByteBufferPtr};
-
-// export `SliceableCursor` and `FileSource` publically so clients can
-// re-use the logic in their own ParquetFileWriter wrappers
-pub use crate::util::{cursor::SliceableCursor, io::FileSource};
-
-// ----------------------------------------------------------------------
-// Implementations of traits facilitating the creation of a new reader
-
-impl Length for File {
-    fn len(&self) -> u64 {
-        self.metadata().map(|m| m.len()).unwrap_or(0u64)
-    }
-}
-
-impl TryClone for File {
-    fn try_clone(&self) -> std::io::Result<Self> {
-        self.try_clone()
-    }
-}
-
-impl ChunkReader for File {
-    type T = FileSource<File>;
-
-    fn get_read(&self, start: u64, length: usize) -> Result<Self::T> {
-        Ok(FileSource::new(self, start, length))
-    }
-}
-
-impl Length for SliceableCursor {
-    fn len(&self) -> u64 {
-        SliceableCursor::len(self)
-    }
-}
-
-impl ChunkReader for SliceableCursor {
-    type T = SliceableCursor;
-
-    fn get_read(&self, start: u64, length: usize) -> Result<Self::T> {
-        self.slice(start, length).map_err(|e| e.into())
-    }
-}
-
-impl TryFrom<File> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(file: File) -> Result<Self> {
-        Self::new(file)
-    }
-}
-
-impl<'a> TryFrom<&'a Path> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(path: &Path) -> Result<Self> {
-        let file = File::open(path)?;
-        Self::try_from(file)
-    }
-}
-
-impl TryFrom<String> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(path: String) -> Result<Self> {
-        Self::try_from(Path::new(&path))
-    }
-}
-
-impl<'a> TryFrom<&'a str> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(path: &str) -> Result<Self> {
-        Self::try_from(Path::new(&path))
-    }
-}
-
-/// Conversion into a [`RowIter`](crate::record::reader::RowIter)
-/// using the full file schema over all row groups.
-impl IntoIterator for SerializedFileReader<File> {
-    type Item = Row;
-    type IntoIter = RowIter<'static>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        RowIter::from_file_into(Box::new(self))
-    }
-}
-
-// ----------------------------------------------------------------------
-// Implementations of file & row group readers
-
-/// A serialized implementation for Parquet [`FileReader`].
-pub struct SerializedFileReader<R: ChunkReader> {
-    chunk_reader: Arc<R>,
-    metadata: ParquetMetaData,
-}
-
-impl<R: 'static + ChunkReader> SerializedFileReader<R> {
-    /// Creates file reader from a Parquet file.
-    /// Returns error if Parquet file does not exist or is corrupt.
-    pub fn new(chunk_reader: R) -> Result<Self> {
-        let metadata = footer::parse_metadata(&chunk_reader)?;
-        Ok(Self {
-            chunk_reader: Arc::new(chunk_reader),
-            metadata,
-        })
-    }
-
-    /// Filters row group metadata to only those row groups,
-    /// for which the predicate function returns true
-    pub fn filter_row_groups(
-        &mut self,
-        predicate: &dyn Fn(&RowGroupMetaData, usize) -> bool,
-    ) {
-        let mut filtered_row_groups = Vec::<RowGroupMetaData>::new();
-        for (i, row_group_metadata) in self.metadata.row_groups().iter().enumerate() {
-            if predicate(row_group_metadata, i) {
-                filtered_row_groups.push(row_group_metadata.clone());
-            }
-        }
-        self.metadata = ParquetMetaData::new(
-            self.metadata.file_metadata().clone(),
-            filtered_row_groups,
-        );
-    }
-}
-
-impl<R: 'static + ChunkReader> FileReader for SerializedFileReader<R> {
-    fn metadata(&self) -> &ParquetMetaData {
-        &self.metadata
-    }
-
-    fn num_row_groups(&self) -> usize {
-        self.metadata.num_row_groups()
-    }
-
-    fn get_row_group(&self, i: usize) -> Result<Box<dyn RowGroupReader + '_>> {
-        let row_group_metadata = self.metadata.row_group(i);
-        // Row groups should be processed sequentially.
-        let f = Arc::clone(&self.chunk_reader);
-        Ok(Box::new(SerializedRowGroupReader::new(
-            f,
-            row_group_metadata,
-        )))
-    }
-
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
-        RowIter::from_file(projection, self)
-    }
-}
-
-/// A serialized implementation for Parquet [`RowGroupReader`].
-pub struct SerializedRowGroupReader<'a, R: ChunkReader> {
-    chunk_reader: Arc<R>,
-    metadata: &'a RowGroupMetaData,
-}
-
-impl<'a, R: ChunkReader> SerializedRowGroupReader<'a, R> {
-    /// Creates new row group reader from a file and row group metadata.
-    fn new(chunk_reader: Arc<R>, metadata: &'a RowGroupMetaData) -> Self {
-        Self {
-            chunk_reader,
-            metadata,
-        }
-    }
-}
-
-impl<'a, R: 'static + ChunkReader> RowGroupReader for SerializedRowGroupReader<'a, R> {
-    fn metadata(&self) -> &RowGroupMetaData {
-        &self.metadata
-    }
-
-    fn num_columns(&self) -> usize {
-        self.metadata.num_columns()
-    }
-
-    // TODO: fix PARQUET-816
-    fn get_column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>> {
-        let col = self.metadata.column(i);
-        let (col_start, col_length) = col.byte_range();
-        let file_chunk = self.chunk_reader.get_read(col_start, col_length as usize)?;
-        let page_reader = SerializedPageReader::new(
-            file_chunk,
-            col.num_values(),
-            col.compression(),
-            col.column_descr().physical_type(),
-        )?;
-        Ok(Box::new(page_reader))
-    }
-
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
-        RowIter::from_row_group(projection, self)
-    }
-}
-
-/// A serialized implementation for Parquet [`PageReader`].
-pub struct SerializedPageReader<T: Read> {
-    // The file source buffer which references exactly the bytes for the column trunk
-    // to be read by this page reader.
-    buf: T,
-
-    // The compression codec for this column chunk. Only set for non-PLAIN codec.
-    decompressor: Option<Box<dyn Codec>>,
-
-    // The number of values we have seen so far.
-    seen_num_values: i64,
-
-    // The number of total values in this column chunk.
-    total_num_values: i64,
-
-    // Column chunk type.
-    physical_type: Type,
-}
-
-impl<T: Read> SerializedPageReader<T> {
-    /// Creates a new serialized page reader from file source.
-    pub fn new(
-        buf: T,
-        total_num_values: i64,
-        compression: Compression,
-        physical_type: Type,
-    ) -> Result<Self> {
-        let decompressor = create_codec(compression)?;
-        let result = Self {
-            buf,
-            total_num_values,
-            seen_num_values: 0,
-            decompressor,
-            physical_type,
-        };
-        Ok(result)
-    }
-
-    /// Reads Page header from Thrift.
-    fn read_page_header(&mut self) -> Result<PageHeader> {
-        let mut prot = TCompactInputProtocol::new(&mut self.buf);
-        let page_header = PageHeader::read_from_in_protocol(&mut prot)?;
-        Ok(page_header)
-    }
-}
-
-impl<T: Read> PageReader for SerializedPageReader<T> {
-    fn get_next_page(&mut self) -> Result<Option<Page>> {
-        while self.seen_num_values < self.total_num_values {
-            let page_header = self.read_page_header()?;
-
-            // When processing data page v2, depending on enabled compression for the
-            // page, we should account for uncompressed data ('offset') of
-            // repetition and definition levels.
-            //
-            // We always use 0 offset for other pages other than v2, `true` flag means
-            // that compression will be applied if decompressor is defined
-            let mut offset: usize = 0;
-            let mut can_decompress = true;
-
-            if let Some(ref header_v2) = page_header.data_page_header_v2 {
-                offset = (header_v2.definition_levels_byte_length
-                    + header_v2.repetition_levels_byte_length)
-                    as usize;
-                // When is_compressed flag is missing the page is considered compressed
-                can_decompress = header_v2.is_compressed.unwrap_or(true);
-            }
-
-            let compressed_len = page_header.compressed_page_size as usize - offset;
-            let uncompressed_len = page_header.uncompressed_page_size as usize - offset;
-            // We still need to read all bytes from buffered stream
-            let mut buffer = vec![0; offset + compressed_len];
-            self.buf.read_exact(&mut buffer)?;
-
-            // TODO: page header could be huge because of statistics. We should set a
-            // maximum page header size and abort if that is exceeded.
-            if let Some(decompressor) = self.decompressor.as_mut() {
-                if can_decompress {
-                    let mut decompressed_buffer = Vec::with_capacity(uncompressed_len);
-                    let decompressed_size = decompressor
-                        .decompress(&buffer[offset..], &mut decompressed_buffer)?;
-                    if decompressed_size != uncompressed_len {
-                        return Err(general_err!(
-              "Actual decompressed size doesn't match the expected one ({} vs {})",
-              decompressed_size,
-              uncompressed_len
-            ));
-                    }
-                    if offset == 0 {
-                        buffer = decompressed_buffer;
-                    } else {
-                        // Prepend saved offsets to the buffer
-                        buffer.truncate(offset);
-                        buffer.append(&mut decompressed_buffer);
-                    }
-                }
-            }
-
-            let result = match page_header.type_ {
-                PageType::DictionaryPage => {
-                    assert!(page_header.dictionary_page_header.is_some());
-                    let dict_header =
-                        page_header.dictionary_page_header.as_ref().unwrap();
-                    let is_sorted = dict_header.is_sorted.unwrap_or(false);
-                    Page::DictionaryPage {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: dict_header.num_values as u32,
-                        encoding: Encoding::from(dict_header.encoding),
-                        is_sorted,
-                    }
-                }
-                PageType::DataPage => {
-                    assert!(page_header.data_page_header.is_some());
-                    let header = page_header.data_page_header.unwrap();
-                    self.seen_num_values += header.num_values as i64;
-                    Page::DataPage {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: header.num_values as u32,
-                        encoding: Encoding::from(header.encoding),
-                        def_level_encoding: Encoding::from(
-                            header.definition_level_encoding,
-                        ),
-                        rep_level_encoding: Encoding::from(
-                            header.repetition_level_encoding,
-                        ),
-                        statistics: statistics::from_thrift(
-                            self.physical_type,
-                            header.statistics,
-                        ),
-                    }
-                }
-                PageType::DataPageV2 => {
-                    assert!(page_header.data_page_header_v2.is_some());
-                    let header = page_header.data_page_header_v2.unwrap();
-                    let is_compressed = header.is_compressed.unwrap_or(true);
-                    self.seen_num_values += header.num_values as i64;
-                    Page::DataPageV2 {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: header.num_values as u32,
-                        encoding: Encoding::from(header.encoding),
-                        num_nulls: header.num_nulls as u32,
-                        num_rows: header.num_rows as u32,
-                        def_levels_byte_len: header.definition_levels_byte_length as u32,
-                        rep_levels_byte_len: header.repetition_levels_byte_length as u32,
-                        is_compressed,
-                        statistics: statistics::from_thrift(
-                            self.physical_type,
-                            header.statistics,
-                        ),
-                    }
-                }
-                _ => {
-                    // For unknown page type (e.g., INDEX_PAGE), skip and read next.
-                    continue;
-                }
-            };
-            return Ok(Some(result));
-        }
-
-        // We are at the end of this column chunk and no more page left. Return None.
-        Ok(None)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::basic::ColumnOrder;
-    use crate::record::RowAccessor;
-    use crate::schema::parser::parse_message_type;
-    use crate::util::test_common::{get_test_file, get_test_path};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_cursor_and_file_has_the_same_behaviour() {
-        let mut buf: Vec<u8> = Vec::new();
-        get_test_file("alltypes_plain.parquet")
-            .read_to_end(&mut buf)
-            .unwrap();
-        let cursor = SliceableCursor::new(buf);
-        let read_from_cursor = SerializedFileReader::new(cursor).unwrap();
-
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let read_from_file = SerializedFileReader::new(test_file).unwrap();
-
-        let file_iter = read_from_file.get_row_iter(None).unwrap();
-        let cursor_iter = read_from_cursor.get_row_iter(None).unwrap();
-
-        assert!(file_iter.eq(cursor_iter));
-    }
-
-    #[test]
-    fn test_file_reader_try_from() {
-        // Valid file path
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let test_path_buf = get_test_path("alltypes_plain.parquet");
-        let test_path = test_path_buf.as_path();
-        let test_path_str = test_path.to_str().unwrap();
-
-        let reader = SerializedFileReader::try_from(test_file);
-        assert!(reader.is_ok());
-
-        let reader = SerializedFileReader::try_from(test_path);
-        assert!(reader.is_ok());
-
-        let reader = SerializedFileReader::try_from(test_path_str);
-        assert!(reader.is_ok());
-
-        let reader = SerializedFileReader::try_from(test_path_str.to_string());
-        assert!(reader.is_ok());
-
-        // Invalid file path
-        let test_path = Path::new("invalid.parquet");
-        let test_path_str = test_path.to_str().unwrap();
-
-        let reader = SerializedFileReader::try_from(test_path);
-        assert!(reader.is_err());
-
-        let reader = SerializedFileReader::try_from(test_path_str);
-        assert!(reader.is_err());
-
-        let reader = SerializedFileReader::try_from(test_path_str.to_string());
-        assert!(reader.is_err());
-    }
-
-    #[test]
-    fn test_file_reader_into_iter() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let vec = vec![path.clone(), path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| r.into_iter())
-            .flat_map(|r| r.get_int(0))
-            .collect::<Vec<_>>();
-
-        // rows in the parquet file are not sorted by "id"
-        // each file contains [id:4, id:5, id:6, id:7, id:2, id:3, id:0, id:1]
-        assert_eq!(vec, vec![4, 5, 6, 7, 2, 3, 0, 1, 4, 5, 6, 7, 2, 3, 0, 1]);
-    }
-
-    #[test]
-    fn test_file_reader_into_iter_project() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let result = vec![path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| {
-                let schema = "message schema { OPTIONAL INT32 id; }";
-                let proj = parse_message_type(&schema).ok();
-
-                r.into_iter().project(proj).unwrap()
-            })
-            .map(|r| format!("{}", r))
-            .collect::<Vec<_>>()
-            .join(",");
-
-        assert_eq!(
-            result,
-            "{id: 4},{id: 5},{id: 6},{id: 7},{id: 2},{id: 3},{id: 0},{id: 1}"
-        );
-    }
-
-    #[test]
-    fn test_reuse_file_chunk() {
-        // This test covers the case of maintaining the correct start position in a file
-        // stream for each column reader after initializing and moving to the next one
-        // (without necessarily reading the entire column).
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let reader = SerializedFileReader::new(test_file).unwrap();
-        let row_group = reader.get_row_group(0).unwrap();
-
-        let mut page_readers = Vec::new();
-        for i in 0..row_group.num_columns() {
-            page_readers.push(row_group.get_column_page_reader(i).unwrap());
-        }
-
-        // Now buffer each col reader, we do not expect any failures like:
-        // General("underlying Thrift error: end of file")
-        for mut page_reader in page_readers {
-            assert!(page_reader.get_next_page().is_ok());
-        }
-    }
-
-    #[test]
-    fn test_file_reader() {
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let reader_result = SerializedFileReader::new(test_file);
-        assert!(reader_result.is_ok());
-        let reader = reader_result.unwrap();
-
-        // Test contents in Parquet metadata
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 1);
-
-        // Test contents in file metadata
-        let file_metadata = metadata.file_metadata();
-        assert!(file_metadata.created_by().is_some());
-        assert_eq!(
-      file_metadata.created_by().as_ref().unwrap(),
-      "impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)"
-    );
-        assert!(file_metadata.key_value_metadata().is_none());
-        assert_eq!(file_metadata.num_rows(), 8);
-        assert_eq!(file_metadata.version(), 1);
-        assert_eq!(file_metadata.column_orders(), None);
-
-        // Test contents in row group metadata
-        let row_group_metadata = metadata.row_group(0);
-        assert_eq!(row_group_metadata.num_columns(), 11);
-        assert_eq!(row_group_metadata.num_rows(), 8);
-        assert_eq!(row_group_metadata.total_byte_size(), 671);
-        // Check each column order
-        for i in 0..row_group_metadata.num_columns() {
-            assert_eq!(file_metadata.column_order(i), ColumnOrder::UNDEFINED);
-        }
-
-        // Test row group reader
-        let row_group_reader_result = reader.get_row_group(0);
-        assert!(row_group_reader_result.is_ok());
-        let row_group_reader: Box<dyn RowGroupReader> = row_group_reader_result.unwrap();
-        assert_eq!(
-            row_group_reader.num_columns(),
-            row_group_metadata.num_columns()
-        );
-        assert_eq!(
-            row_group_reader.metadata().total_byte_size(),
-            row_group_metadata.total_byte_size()
-        );
-
-        // Test page readers
-        // TODO: test for every column
-        let page_reader_0_result = row_group_reader.get_column_page_reader(0);
-        assert!(page_reader_0_result.is_ok());
-        let mut page_reader_0: Box<dyn PageReader> = page_reader_0_result.unwrap();
-        let mut page_count = 0;
-        while let Ok(Some(page)) = page_reader_0.get_next_page() {
-            let is_expected_page = match page {
-                Page::DictionaryPage {
-                    buf,
-                    num_values,
-                    encoding,
-                    is_sorted,
-                } => {
-                    assert_eq!(buf.len(), 32);
-                    assert_eq!(num_values, 8);
-                    assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
-                    assert_eq!(is_sorted, false);
-                    true
-                }
-                Page::DataPage {
-                    buf,
-                    num_values,
-                    encoding,
-                    def_level_encoding,
-                    rep_level_encoding,
-                    statistics,
-                } => {
-                    assert_eq!(buf.len(), 11);
-                    assert_eq!(num_values, 8);
-                    assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
-                    assert_eq!(def_level_encoding, Encoding::RLE);
-                    assert_eq!(rep_level_encoding, Encoding::BIT_PACKED);
-                    assert!(statistics.is_none());
-                    true
-                }
-                _ => false,
-            };
-            assert!(is_expected_page);
-            page_count += 1;
-        }
-        assert_eq!(page_count, 2);
-    }
-
-    #[test]
-    fn test_file_reader_datapage_v2() {
-        let test_file = get_test_file("datapage_v2.snappy.parquet");
-        let reader_result = SerializedFileReader::new(test_file);
-        assert!(reader_result.is_ok());
-        let reader = reader_result.unwrap();
-
-        // Test contents in Parquet metadata
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 1);
-
-        // Test contents in file metadata
-        let file_metadata = metadata.file_metadata();
-        assert!(file_metadata.created_by().is_some());
-        assert_eq!(
-            file_metadata.created_by().as_ref().unwrap(),
-            "parquet-mr version 1.8.1 (build 4aba4dae7bb0d4edbcf7923ae1339f28fd3f7fcf)"
-        );
-        assert!(file_metadata.key_value_metadata().is_some());
-        assert_eq!(
-            file_metadata.key_value_metadata().to_owned().unwrap().len(),
-            1
-        );
-
-        assert_eq!(file_metadata.num_rows(), 5);
-        assert_eq!(file_metadata.version(), 1);
-        assert_eq!(file_metadata.column_orders(), None);
-
-        let row_group_metadata = metadata.row_group(0);
-
-        // Check each column order
-        for i in 0..row_group_metadata.num_columns() {
-            assert_eq!(file_metadata.column_order(i), ColumnOrder::UNDEFINED);
-        }
-
-        // Test row group reader
-        let row_group_reader_result = reader.get_row_group(0);
-        assert!(row_group_reader_result.is_ok());
-        let row_group_reader: Box<dyn RowGroupReader> = row_group_reader_result.unwrap();
-        assert_eq!(
-            row_group_reader.num_columns(),
-            row_group_metadata.num_columns()
-        );
-        assert_eq!(
-            row_group_reader.metadata().total_byte_size(),
-            row_group_metadata.total_byte_size()
-        );
-
-        // Test page readers
-        // TODO: test for every column
-        let page_reader_0_result = row_group_reader.get_column_page_reader(0);
-        assert!(page_reader_0_result.is_ok());
-        let mut page_reader_0: Box<dyn PageReader> = page_reader_0_result.unwrap();
-        let mut page_count = 0;
-        while let Ok(Some(page)) = page_reader_0.get_next_page() {
-            let is_expected_page = match page {
-                Page::DictionaryPage {
-                    buf,
-                    num_values,
-                    encoding,
-                    is_sorted,
-                } => {
-                    assert_eq!(buf.len(), 7);
-                    assert_eq!(num_values, 1);
-                    assert_eq!(encoding, Encoding::PLAIN);
-                    assert_eq!(is_sorted, false);
-                    true
-                }
-                Page::DataPageV2 {
-                    buf,
-                    num_values,
-                    encoding,
-                    num_nulls,
-                    num_rows,
-                    def_levels_byte_len,
-                    rep_levels_byte_len,
-                    is_compressed,
-                    statistics,
-                } => {
-                    assert_eq!(buf.len(), 4);
-                    assert_eq!(num_values, 5);
-                    assert_eq!(encoding, Encoding::RLE_DICTIONARY);
-                    assert_eq!(num_nulls, 1);
-                    assert_eq!(num_rows, 5);
-                    assert_eq!(def_levels_byte_len, 2);
-                    assert_eq!(rep_levels_byte_len, 0);
-                    assert_eq!(is_compressed, true);
-                    assert!(statistics.is_some());
-                    true
-                }
-                _ => false,
-            };
-            assert!(is_expected_page);
-            page_count += 1;
-        }
-        assert_eq!(page_count, 2);
-    }
-
-    #[test]
-    fn test_page_iterator() {
-        let file = get_test_file("alltypes_plain.parquet");
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
-
-        let mut page_iterator = FilePageIterator::new(0, file_reader.clone()).unwrap();
-
-        // read first page
-        let page = page_iterator.next();
-        assert!(page.is_some());
-        assert!(page.unwrap().is_ok());
-
-        // reach end of file
-        let page = page_iterator.next();
-        assert!(page.is_none());
-
-        let row_group_indices = Box::new(0..1);
-        let mut page_iterator =
-            FilePageIterator::with_row_groups(0, row_group_indices, file_reader).unwrap();
-
-        // read first page
-        let page = page_iterator.next();
-        assert!(page.is_some());
-        assert!(page.unwrap().is_ok());
-
-        // reach end of file
-        let page = page_iterator.next();
-        assert!(page.is_none());
-    }
-
-    #[test]
-    fn test_file_reader_key_value_metadata() {
-        let file = get_test_file("binary.parquet");
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
-
-        let metadata = file_reader
-            .metadata
-            .file_metadata()
-            .key_value_metadata()
-            .as_ref()
-            .unwrap();
-
-        assert_eq!(metadata.len(), 3);
-
-        assert_eq!(metadata.get(0).unwrap().key, "parquet.proto.descriptor");
-
-        assert_eq!(metadata.get(1).unwrap().key, "writer.model.name");
-        assert_eq!(metadata.get(1).unwrap().value, Some("protobuf".to_owned()));
-
-        assert_eq!(metadata.get(2).unwrap().key, "parquet.proto.class");
-        assert_eq!(
-            metadata.get(2).unwrap().value,
-            Some("foo.baz.Foobaz$Event".to_owned())
-        );
-    }
-
-    #[test]
-    fn test_file_reader_filter_row_groups() -> Result<()> {
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let mut reader = SerializedFileReader::new(test_file)?;
-
-        // test initial number of row groups
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 1);
-
-        // test filtering out all row groups
-        reader.filter_row_groups(&|_, _| false);
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 0);
-
-        Ok(())
-    }
-}
diff --git a/rust/parquet/src/file/statistics.rs b/rust/parquet/src/file/statistics.rs
deleted file mode 100644
index 4f5d0e94bef..00000000000
--- a/rust/parquet/src/file/statistics.rs
+++ /dev/null
@@ -1,664 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains definitions for working with Parquet statistics.
-//!
-//! Though some common methods are available on enum, use pattern match to extract
-//! actual min and max values from statistics, see below:
-//!
-//! ```rust
-//! use parquet::file::statistics::Statistics;
-//!
-//! let stats = Statistics::int32(Some(1), Some(10), None, 3, true);
-//! assert_eq!(stats.null_count(), 3);
-//! assert!(stats.has_min_max_set());
-//! assert!(stats.is_min_max_deprecated());
-//!
-//! match stats {
-//!     Statistics::Int32(ref typed) => {
-//!         assert_eq!(*typed.min(), 1);
-//!         assert_eq!(*typed.max(), 10);
-//!     }
-//!     _ => {}
-//! }
-//! ```
-
-use std::{cmp, fmt};
-
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format::Statistics as TStatistics;
-
-use crate::basic::Type;
-use crate::data_type::*;
-use crate::util::bit_util::from_ne_slice;
-
-// Macro to generate methods create Statistics.
-macro_rules! statistics_new_func {
-    ($func:ident, $vtype:ty, $stat:ident) => {
-        pub fn $func(
-            min: $vtype,
-            max: $vtype,
-            distinct: Option<u64>,
-            nulls: u64,
-            is_deprecated: bool,
-        ) -> Self {
-            Statistics::$stat(TypedStatistics::new(
-                min,
-                max,
-                distinct,
-                nulls,
-                is_deprecated,
-            ))
-        }
-    };
-}
-
-// Macro to generate getter functions for Statistics.
-macro_rules! statistics_enum_func {
-    ($self:ident, $func:ident) => {{
-        match *$self {
-            Statistics::Boolean(ref typed) => typed.$func(),
-            Statistics::Int32(ref typed) => typed.$func(),
-            Statistics::Int64(ref typed) => typed.$func(),
-            Statistics::Int96(ref typed) => typed.$func(),
-            Statistics::Float(ref typed) => typed.$func(),
-            Statistics::Double(ref typed) => typed.$func(),
-            Statistics::ByteArray(ref typed) => typed.$func(),
-            Statistics::FixedLenByteArray(ref typed) => typed.$func(),
-        }
-    }};
-}
-
-/// Converts Thrift definition into `Statistics`.
-pub fn from_thrift(
-    physical_type: Type,
-    thrift_stats: Option<TStatistics>,
-) -> Option<Statistics> {
-    match thrift_stats {
-        Some(stats) => {
-            // Number of nulls recorded, when it is not available, we just mark it as 0.
-            let null_count = stats.null_count.unwrap_or(0);
-            assert!(
-                null_count >= 0,
-                "Statistics null count is negative ({})",
-                null_count
-            );
-
-            // Generic null count.
-            let null_count = null_count as u64;
-            // Generic distinct count (count of distinct values occurring)
-            let distinct_count = stats.distinct_count.map(|value| value as u64);
-            // Whether or not statistics use deprecated min/max fields.
-            let old_format = stats.min_value.is_none() && stats.max_value.is_none();
-            // Generic min value as bytes.
-            let min = if old_format {
-                stats.min
-            } else {
-                stats.min_value
-            };
-            // Generic max value as bytes.
-            let max = if old_format {
-                stats.max
-            } else {
-                stats.max_value
-            };
-
-            // Values are encoded using PLAIN encoding definition, except that
-            // variable-length byte arrays do not include a length prefix.
-            //
-            // Instead of using actual decoder, we manually convert values.
-            let res = match physical_type {
-                Type::BOOLEAN => Statistics::boolean(
-                    min.map(|data| data[0] != 0),
-                    max.map(|data| data[0] != 0),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::INT32 => Statistics::int32(
-                    min.map(|data| LittleEndian::read_i32(&data)),
-                    max.map(|data| LittleEndian::read_i32(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::INT64 => Statistics::int64(
-                    min.map(|data| LittleEndian::read_i64(&data)),
-                    max.map(|data| LittleEndian::read_i64(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::INT96 => {
-                    // INT96 statistics may not be correct, because comparison is signed
-                    // byte-wise, not actual timestamps. It is recommended to ignore
-                    // min/max statistics for INT96 columns.
-                    let min = min.map(|data| {
-                        assert_eq!(data.len(), 12);
-                        from_ne_slice::<Int96>(&data)
-                    });
-                    let max = max.map(|data| {
-                        assert_eq!(data.len(), 12);
-                        from_ne_slice::<Int96>(&data)
-                    });
-                    Statistics::int96(min, max, distinct_count, null_count, old_format)
-                }
-                Type::FLOAT => Statistics::float(
-                    min.map(|data| LittleEndian::read_f32(&data)),
-                    max.map(|data| LittleEndian::read_f32(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::DOUBLE => Statistics::double(
-                    min.map(|data| LittleEndian::read_f64(&data)),
-                    max.map(|data| LittleEndian::read_f64(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::BYTE_ARRAY => Statistics::byte_array(
-                    min.map(ByteArray::from),
-                    max.map(ByteArray::from),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::FIXED_LEN_BYTE_ARRAY => Statistics::fixed_len_byte_array(
-                    min.map(ByteArray::from).map(FixedLenByteArray::from),
-                    max.map(ByteArray::from).map(FixedLenByteArray::from),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-            };
-
-            Some(res)
-        }
-        None => None,
-    }
-}
-
-// Convert Statistics into Thrift definition.
-pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
-    let stats = stats?;
-
-    let mut thrift_stats = TStatistics {
-        max: None,
-        min: None,
-        null_count: if stats.has_nulls() {
-            Some(stats.null_count() as i64)
-        } else {
-            None
-        },
-        distinct_count: stats.distinct_count().map(|value| value as i64),
-        max_value: None,
-        min_value: None,
-    };
-
-    // Get min/max if set.
-    let (min, max) = if stats.has_min_max_set() {
-        (
-            Some(stats.min_bytes().to_vec()),
-            Some(stats.max_bytes().to_vec()),
-        )
-    } else {
-        (None, None)
-    };
-
-    if stats.is_min_max_deprecated() {
-        thrift_stats.min = min;
-        thrift_stats.max = max;
-    } else {
-        thrift_stats.min_value = min;
-        thrift_stats.max_value = max;
-    }
-
-    Some(thrift_stats)
-}
-
-/// Statistics for a column chunk and data page.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Statistics {
-    Boolean(TypedStatistics<BoolType>),
-    Int32(TypedStatistics<Int32Type>),
-    Int64(TypedStatistics<Int64Type>),
-    Int96(TypedStatistics<Int96Type>),
-    Float(TypedStatistics<FloatType>),
-    Double(TypedStatistics<DoubleType>),
-    ByteArray(TypedStatistics<ByteArrayType>),
-    FixedLenByteArray(TypedStatistics<FixedLenByteArrayType>),
-}
-
-impl Statistics {
-    statistics_new_func![boolean, Option<bool>, Boolean];
-
-    statistics_new_func![int32, Option<i32>, Int32];
-
-    statistics_new_func![int64, Option<i64>, Int64];
-
-    statistics_new_func![int96, Option<Int96>, Int96];
-
-    statistics_new_func![float, Option<f32>, Float];
-
-    statistics_new_func![double, Option<f64>, Double];
-
-    statistics_new_func![byte_array, Option<ByteArray>, ByteArray];
-
-    statistics_new_func![
-        fixed_len_byte_array,
-        Option<FixedLenByteArray>,
-        FixedLenByteArray
-    ];
-
-    /// Returns `true` if statistics have old `min` and `max` fields set.
-    /// This means that the column order is likely to be undefined, which, for old files
-    /// could mean a signed sort order of values.
-    ///
-    /// Refer to [`ColumnOrder`](crate::basic::ColumnOrder) and
-    /// [`SortOrder`](crate::basic::SortOrder) for more information.
-    pub fn is_min_max_deprecated(&self) -> bool {
-        statistics_enum_func![self, is_min_max_deprecated]
-    }
-
-    /// Returns optional value of number of distinct values occurring.
-    /// When it is `None`, the value should be ignored.
-    pub fn distinct_count(&self) -> Option<u64> {
-        statistics_enum_func![self, distinct_count]
-    }
-
-    /// Returns number of null values for the column.
-    /// Note that this includes all nulls when column is part of the complex type.
-    pub fn null_count(&self) -> u64 {
-        statistics_enum_func![self, null_count]
-    }
-
-    /// Returns `true` if statistics collected any null values, `false` otherwise.
-    pub fn has_nulls(&self) -> bool {
-        self.null_count() > 0
-    }
-
-    /// Returns `true` if min value and max value are set.
-    /// Normally both min/max values will be set to `Some(value)` or `None`.
-    pub fn has_min_max_set(&self) -> bool {
-        statistics_enum_func![self, has_min_max_set]
-    }
-
-    /// Returns slice of bytes that represent min value.
-    /// Panics if min value is not set.
-    pub fn min_bytes(&self) -> &[u8] {
-        statistics_enum_func![self, min_bytes]
-    }
-
-    /// Returns slice of bytes that represent max value.
-    /// Panics if max value is not set.
-    pub fn max_bytes(&self) -> &[u8] {
-        statistics_enum_func![self, max_bytes]
-    }
-
-    /// Returns physical type associated with statistics.
-    pub fn physical_type(&self) -> Type {
-        match self {
-            Statistics::Boolean(_) => Type::BOOLEAN,
-            Statistics::Int32(_) => Type::INT32,
-            Statistics::Int64(_) => Type::INT64,
-            Statistics::Int96(_) => Type::INT96,
-            Statistics::Float(_) => Type::FLOAT,
-            Statistics::Double(_) => Type::DOUBLE,
-            Statistics::ByteArray(_) => Type::BYTE_ARRAY,
-            Statistics::FixedLenByteArray(_) => Type::FIXED_LEN_BYTE_ARRAY,
-        }
-    }
-}
-
-impl fmt::Display for Statistics {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Statistics::Boolean(typed) => write!(f, "{}", typed),
-            Statistics::Int32(typed) => write!(f, "{}", typed),
-            Statistics::Int64(typed) => write!(f, "{}", typed),
-            Statistics::Int96(typed) => write!(f, "{}", typed),
-            Statistics::Float(typed) => write!(f, "{}", typed),
-            Statistics::Double(typed) => write!(f, "{}", typed),
-            Statistics::ByteArray(typed) => write!(f, "{}", typed),
-            Statistics::FixedLenByteArray(typed) => write!(f, "{}", typed),
-        }
-    }
-}
-
-/// Typed implementation for [`Statistics`].
-#[derive(Clone)]
-pub struct TypedStatistics<T: DataType> {
-    min: Option<T::T>,
-    max: Option<T::T>,
-    // Distinct count could be omitted in some cases
-    distinct_count: Option<u64>,
-    null_count: u64,
-    is_min_max_deprecated: bool,
-}
-
-impl<T: DataType> TypedStatistics<T> {
-    /// Creates new typed statistics.
-    pub fn new(
-        min: Option<T::T>,
-        max: Option<T::T>,
-        distinct_count: Option<u64>,
-        null_count: u64,
-        is_min_max_deprecated: bool,
-    ) -> Self {
-        Self {
-            min,
-            max,
-            distinct_count,
-            null_count,
-            is_min_max_deprecated,
-        }
-    }
-
-    /// Returns min value of the statistics.
-    ///
-    /// Panics if min value is not set, e.g. all values are `null`.
-    /// Use `has_min_max_set` method to check that.
-    pub fn min(&self) -> &T::T {
-        self.min.as_ref().unwrap()
-    }
-
-    /// Returns max value of the statistics.
-    ///
-    /// Panics if max value is not set, e.g. all values are `null`.
-    /// Use `has_min_max_set` method to check that.
-    pub fn max(&self) -> &T::T {
-        self.max.as_ref().unwrap()
-    }
-
-    /// Returns min value as bytes of the statistics.
-    ///
-    /// Panics if min value is not set, use `has_min_max_set` method to check
-    /// if values are set.
-    pub fn min_bytes(&self) -> &[u8] {
-        self.min().as_bytes()
-    }
-
-    /// Returns max value as bytes of the statistics.
-    ///
-    /// Panics if max value is not set, use `has_min_max_set` method to check
-    /// if values are set.
-    pub fn max_bytes(&self) -> &[u8] {
-        self.max().as_bytes()
-    }
-
-    /// Whether or not min and max values are set.
-    /// Normally both min/max values will be set to `Some(value)` or `None`.
-    fn has_min_max_set(&self) -> bool {
-        self.min.is_some() && self.max.is_some()
-    }
-
-    /// Returns optional value of number of distinct values occurring.
-    fn distinct_count(&self) -> Option<u64> {
-        self.distinct_count
-    }
-
-    /// Returns null count.
-    fn null_count(&self) -> u64 {
-        self.null_count
-    }
-
-    /// Returns `true` if statistics were created using old min/max fields.
-    fn is_min_max_deprecated(&self) -> bool {
-        self.is_min_max_deprecated
-    }
-}
-
-impl<T: DataType> fmt::Display for TypedStatistics<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{{")?;
-        write!(f, "min: ")?;
-        match self.min {
-            Some(ref value) => write!(f, "{}", value)?,
-            None => write!(f, "N/A")?,
-        }
-        write!(f, ", max: ")?;
-        match self.max {
-            Some(ref value) => write!(f, "{}", value)?,
-            None => write!(f, "N/A")?,
-        }
-        write!(f, ", distinct_count: ")?;
-        match self.distinct_count {
-            Some(value) => write!(f, "{}", value)?,
-            None => write!(f, "N/A")?,
-        }
-        write!(f, ", null_count: {}", self.null_count)?;
-        write!(f, ", min_max_deprecated: {}", self.is_min_max_deprecated)?;
-        write!(f, "}}")
-    }
-}
-
-impl<T: DataType> fmt::Debug for TypedStatistics<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(
-            f,
-            "{{min: {:?}, max: {:?}, distinct_count: {:?}, null_count: {}, \
-             min_max_deprecated: {}}}",
-            self.min,
-            self.max,
-            self.distinct_count,
-            self.null_count,
-            self.is_min_max_deprecated
-        )
-    }
-}
-
-impl<T: DataType> cmp::PartialEq for TypedStatistics<T> {
-    fn eq(&self, other: &TypedStatistics<T>) -> bool {
-        self.min == other.min
-            && self.max == other.max
-            && self.distinct_count == other.distinct_count
-            && self.null_count == other.null_count
-            && self.is_min_max_deprecated == other.is_min_max_deprecated
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_statistics_min_max_bytes() {
-        let stats = Statistics::int32(Some(-123), Some(234), None, 1, false);
-        assert!(stats.has_min_max_set());
-        assert_eq!(stats.min_bytes(), (-123).as_bytes());
-        assert_eq!(stats.max_bytes(), 234.as_bytes());
-
-        let stats = Statistics::byte_array(
-            Some(ByteArray::from(vec![1, 2, 3])),
-            Some(ByteArray::from(vec![3, 4, 5])),
-            None,
-            1,
-            true,
-        );
-        assert!(stats.has_min_max_set());
-        assert_eq!(stats.min_bytes(), &[1, 2, 3]);
-        assert_eq!(stats.max_bytes(), &[3, 4, 5]);
-    }
-
-    #[test]
-    #[should_panic(expected = "Statistics null count is negative (-10)")]
-    fn test_statistics_negative_null_count() {
-        let thrift_stats = TStatistics {
-            max: None,
-            min: None,
-            null_count: Some(-10),
-            distinct_count: None,
-            max_value: None,
-            min_value: None,
-        };
-
-        from_thrift(Type::INT32, Some(thrift_stats));
-    }
-
-    #[test]
-    fn test_statistics_thrift_none() {
-        assert_eq!(from_thrift(Type::INT32, None), None);
-        assert_eq!(from_thrift(Type::BYTE_ARRAY, None), None);
-    }
-
-    #[test]
-    fn test_statistics_debug() {
-        let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
-        assert_eq!(
-            format!("{:?}", stats),
-            "Int32({min: Some(1), max: Some(12), distinct_count: None, null_count: 12, \
-             min_max_deprecated: true})"
-        );
-
-        let stats = Statistics::int32(None, None, None, 7, false);
-        assert_eq!(
-            format!("{:?}", stats),
-            "Int32({min: None, max: None, distinct_count: None, null_count: 7, \
-             min_max_deprecated: false})"
-        )
-    }
-
-    #[test]
-    fn test_statistics_display() {
-        let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
-        assert_eq!(
-            format!("{}", stats),
-            "{min: 1, max: 12, distinct_count: N/A, null_count: 12, min_max_deprecated: true}"
-        );
-
-        let stats = Statistics::int64(None, None, None, 7, false);
-        assert_eq!(
-            format!("{}", stats),
-            "{min: N/A, max: N/A, distinct_count: N/A, null_count: 7, min_max_deprecated: \
-             false}"
-        );
-
-        let stats = Statistics::int96(
-            Some(Int96::from(vec![1, 0, 0])),
-            Some(Int96::from(vec![2, 3, 4])),
-            None,
-            3,
-            true,
-        );
-        assert_eq!(
-            format!("{}", stats),
-            "{min: [1, 0, 0], max: [2, 3, 4], distinct_count: N/A, null_count: 3, \
-             min_max_deprecated: true}"
-        );
-
-        let stats = Statistics::byte_array(
-            Some(ByteArray::from(vec![1u8])),
-            Some(ByteArray::from(vec![2u8])),
-            Some(5),
-            7,
-            false,
-        );
-        assert_eq!(
-            format!("{}", stats),
-            "{min: [1], max: [2], distinct_count: 5, null_count: 7, min_max_deprecated: false}"
-        );
-    }
-
-    #[test]
-    fn test_statistics_partial_eq() {
-        let expected = Statistics::int32(Some(12), Some(45), None, 11, true);
-
-        assert!(Statistics::int32(Some(12), Some(45), None, 11, true) == expected);
-        assert!(Statistics::int32(Some(11), Some(45), None, 11, true) != expected);
-        assert!(Statistics::int32(Some(12), Some(44), None, 11, true) != expected);
-        assert!(Statistics::int32(Some(12), Some(45), None, 23, true) != expected);
-        assert!(Statistics::int32(Some(12), Some(45), None, 11, false) != expected);
-
-        assert!(
-            Statistics::int32(Some(12), Some(45), None, 11, false)
-                != Statistics::int64(Some(12), Some(45), None, 11, false)
-        );
-
-        assert!(
-            Statistics::boolean(Some(false), Some(true), None, 0, true)
-                != Statistics::double(Some(1.2), Some(4.5), None, 0, true)
-        );
-
-        assert!(
-            Statistics::byte_array(
-                Some(ByteArray::from(vec![1, 2, 3])),
-                Some(ByteArray::from(vec![1, 2, 3])),
-                None,
-                0,
-                true
-            ) != Statistics::fixed_len_byte_array(
-                Some(ByteArray::from(vec![1, 2, 3]).into()),
-                Some(ByteArray::from(vec![1, 2, 3]).into()),
-                None,
-                0,
-                true
-            )
-        );
-    }
-
-    #[test]
-    fn test_statistics_from_thrift() {
-        // Helper method to check statistics conversion.
-        fn check_stats(stats: Statistics) {
-            let tpe = stats.physical_type();
-            let thrift_stats = to_thrift(Some(&stats));
-            assert_eq!(from_thrift(tpe, thrift_stats), Some(stats));
-        }
-
-        check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true));
-        check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true));
-        check_stats(Statistics::boolean(Some(false), Some(true), None, 0, false));
-        check_stats(Statistics::boolean(Some(true), Some(true), None, 7, true));
-        check_stats(Statistics::boolean(Some(false), Some(false), None, 7, true));
-        check_stats(Statistics::boolean(None, None, None, 7, true));
-
-        check_stats(Statistics::int32(Some(-100), Some(500), None, 7, true));
-        check_stats(Statistics::int32(Some(-100), Some(500), None, 0, false));
-        check_stats(Statistics::int32(None, None, None, 7, true));
-
-        check_stats(Statistics::int64(Some(-100), Some(200), None, 7, true));
-        check_stats(Statistics::int64(Some(-100), Some(200), None, 0, false));
-        check_stats(Statistics::int64(None, None, None, 7, true));
-
-        check_stats(Statistics::float(Some(1.2), Some(3.4), None, 7, true));
-        check_stats(Statistics::float(Some(1.2), Some(3.4), None, 0, false));
-        check_stats(Statistics::float(None, None, None, 7, true));
-
-        check_stats(Statistics::double(Some(1.2), Some(3.4), None, 7, true));
-        check_stats(Statistics::double(Some(1.2), Some(3.4), None, 0, false));
-        check_stats(Statistics::double(None, None, None, 7, true));
-
-        check_stats(Statistics::byte_array(
-            Some(ByteArray::from(vec![1, 2, 3])),
-            Some(ByteArray::from(vec![3, 4, 5])),
-            None,
-            7,
-            true,
-        ));
-        check_stats(Statistics::byte_array(None, None, None, 7, true));
-
-        check_stats(Statistics::fixed_len_byte_array(
-            Some(ByteArray::from(vec![1, 2, 3]).into()),
-            Some(ByteArray::from(vec![3, 4, 5]).into()),
-            None,
-            7,
-            true,
-        ));
-        check_stats(Statistics::fixed_len_byte_array(None, None, None, 7, true));
-    }
-}
diff --git a/rust/parquet/src/file/writer.rs b/rust/parquet/src/file/writer.rs
deleted file mode 100644
index e1c2dc6b616..00000000000
--- a/rust/parquet/src/file/writer.rs
+++ /dev/null
@@ -1,1164 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains file writer API, and provides methods to write row groups and columns by
-//! using row group writers and column writers respectively.
-
-use std::{
-    io::{Seek, SeekFrom, Write},
-    sync::Arc,
-};
-
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format as parquet;
-use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol};
-
-use crate::basic::PageType;
-use crate::column::{
-    page::{CompressedPage, Page, PageWriteSpec, PageWriter},
-    writer::{get_column_writer, ColumnWriter},
-};
-use crate::errors::{ParquetError, Result};
-use crate::file::{
-    metadata::*, properties::WriterPropertiesPtr,
-    statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC,
-};
-use crate::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr};
-use crate::util::io::{FileSink, Position};
-
-// Exposed publically so client code can implement [`ParquetWriter`]
-pub use crate::util::io::TryClone;
-
-// Exposed publically for convenience of writing Parquet to a buffer of bytes
-pub use crate::util::cursor::InMemoryWriteableCursor;
-
-// ----------------------------------------------------------------------
-// APIs for file & row group writers
-
-/// Parquet file writer API.
-/// Provides methods to write row groups sequentially.
-///
-/// The main workflow should be as following:
-/// - Create file writer, this will open a new file and potentially write some metadata.
-/// - Request a new row group writer by calling `next_row_group`.
-/// - Once finished writing row group, close row group writer by passing it into
-/// `close_row_group` method - this will finalise row group metadata and update metrics.
-/// - Write subsequent row groups, if necessary.
-/// - After all row groups have been written, close the file writer using `close` method.
-pub trait FileWriter {
-    /// Creates new row group from this file writer.
-    /// In case of IO error or Thrift error, returns `Err`.
-    ///
-    /// There is no limit on a number of row groups in a file; however, row groups have
-    /// to be written sequentially. Every time the next row group is requested, the
-    /// previous row group must be finalised and closed using `close_row_group` method.
-    fn next_row_group(&mut self) -> Result<Box<dyn RowGroupWriter>>;
-
-    /// Finalises and closes row group that was created using `next_row_group` method.
-    /// After calling this method, the next row group is available for writes.
-    fn close_row_group(
-        &mut self,
-        row_group_writer: Box<dyn RowGroupWriter>,
-    ) -> Result<()>;
-
-    /// Closes and finalises file writer, returning the file metadata.
-    ///
-    /// All row groups must be appended before this method is called.
-    /// No writes are allowed after this point.
-    ///
-    /// Can be called multiple times. It is up to implementation to either result in
-    /// no-op, or return an `Err` for subsequent calls.
-    fn close(&mut self) -> Result<parquet::FileMetaData>;
-}
-
-/// Parquet row group writer API.
-/// Provides methods to access column writers in an iterator-like fashion, order is
-/// guaranteed to match the order of schema leaves (column descriptors).
-///
-/// All columns should be written sequentially; the main workflow is:
-/// - Request the next column using `next_column` method - this will return `None` if no
-/// more columns are available to write.
-/// - Once done writing a column, close column writer with `close_column` method - this
-/// will finalise column chunk metadata and update row group metrics.
-/// - Once all columns have been written, close row group writer with `close` method -
-/// it will return row group metadata and is no-op on already closed row group.
-pub trait RowGroupWriter {
-    /// Returns the next column writer, if available; otherwise returns `None`.
-    /// In case of any IO error or Thrift error, or if row group writer has already been
-    /// closed returns `Err`.
-    ///
-    /// To request the next column writer, the previous one must be finalised and closed
-    /// using `close_column`.
-    fn next_column(&mut self) -> Result<Option<ColumnWriter>>;
-
-    /// Closes column writer that was created using `next_column` method.
-    /// This should be called before requesting the next column writer.
-    fn close_column(&mut self, column_writer: ColumnWriter) -> Result<()>;
-
-    /// Closes this row group writer and returns row group metadata.
-    /// After calling this method row group writer must not be used.
-    ///
-    /// It is recommended to call this method before requesting another row group, but it
-    /// will be closed automatically before returning a new row group.
-    ///
-    /// Can be called multiple times. In subsequent calls will result in no-op and return
-    /// already created row group metadata.
-    fn close(&mut self) -> Result<RowGroupMetaDataPtr>;
-}
-
-// ----------------------------------------------------------------------
-// Serialized impl for file & row group writers
-
-pub trait ParquetWriter: Write + Seek + TryClone {}
-impl<T: Write + Seek + TryClone> ParquetWriter for T {}
-
-/// A serialized implementation for Parquet [`FileWriter`].
-/// See documentation on file writer for more information.
-pub struct SerializedFileWriter<W: ParquetWriter> {
-    buf: W,
-    schema: TypePtr,
-    descr: SchemaDescPtr,
-    props: WriterPropertiesPtr,
-    total_num_rows: i64,
-    row_groups: Vec<RowGroupMetaDataPtr>,
-    previous_writer_closed: bool,
-    is_closed: bool,
-}
-
-impl<W: ParquetWriter> SerializedFileWriter<W> {
-    /// Creates new file writer.
-    pub fn new(
-        mut buf: W,
-        schema: TypePtr,
-        properties: WriterPropertiesPtr,
-    ) -> Result<Self> {
-        Self::start_file(&mut buf)?;
-        Ok(Self {
-            buf,
-            schema: schema.clone(),
-            descr: Arc::new(SchemaDescriptor::new(schema)),
-            props: properties,
-            total_num_rows: 0,
-            row_groups: Vec::new(),
-            previous_writer_closed: true,
-            is_closed: false,
-        })
-    }
-
-    /// Writes magic bytes at the beginning of the file.
-    fn start_file(buf: &mut W) -> Result<()> {
-        buf.write_all(&PARQUET_MAGIC)?;
-        Ok(())
-    }
-
-    /// Finalises active row group writer, otherwise no-op.
-    fn finalise_row_group_writer(
-        &mut self,
-        mut row_group_writer: Box<dyn RowGroupWriter>,
-    ) -> Result<()> {
-        let row_group_metadata = row_group_writer.close()?;
-        self.total_num_rows += row_group_metadata.num_rows();
-        self.row_groups.push(row_group_metadata);
-        Ok(())
-    }
-
-    /// Assembles and writes metadata at the end of the file.
-    fn write_metadata(&mut self) -> Result<parquet::FileMetaData> {
-        let file_metadata = parquet::FileMetaData {
-            version: self.props.writer_version().as_num(),
-            schema: types::to_thrift(self.schema.as_ref())?,
-            num_rows: self.total_num_rows as i64,
-            row_groups: self
-                .row_groups
-                .as_slice()
-                .iter()
-                .map(|v| v.to_thrift())
-                .collect(),
-            key_value_metadata: self.props.key_value_metadata().to_owned(),
-            created_by: Some(self.props.created_by().to_owned()),
-            column_orders: None,
-        };
-
-        // Write file metadata
-        let start_pos = self.buf.seek(SeekFrom::Current(0))?;
-        {
-            let mut protocol = TCompactOutputProtocol::new(&mut self.buf);
-            file_metadata.write_to_out_protocol(&mut protocol)?;
-            protocol.flush()?;
-        }
-        let end_pos = self.buf.seek(SeekFrom::Current(0))?;
-
-        // Write footer
-        let mut footer_buffer: [u8; FOOTER_SIZE] = [0; FOOTER_SIZE];
-        let metadata_len = (end_pos - start_pos) as i32;
-        LittleEndian::write_i32(&mut footer_buffer, metadata_len);
-        (&mut footer_buffer[4..]).write_all(&PARQUET_MAGIC)?;
-        self.buf.write_all(&footer_buffer)?;
-        Ok(file_metadata)
-    }
-
-    #[inline]
-    fn assert_closed(&self) -> Result<()> {
-        if self.is_closed {
-            Err(general_err!("File writer is closed"))
-        } else {
-            Ok(())
-        }
-    }
-
-    #[inline]
-    fn assert_previous_writer_closed(&self) -> Result<()> {
-        if !self.previous_writer_closed {
-            Err(general_err!("Previous row group writer was not closed"))
-        } else {
-            Ok(())
-        }
-    }
-}
-
-impl<W: 'static + ParquetWriter> FileWriter for SerializedFileWriter<W> {
-    #[inline]
-    fn next_row_group(&mut self) -> Result<Box<dyn RowGroupWriter>> {
-        self.assert_closed()?;
-        self.assert_previous_writer_closed()?;
-        let row_group_writer = SerializedRowGroupWriter::new(
-            self.descr.clone(),
-            self.props.clone(),
-            &self.buf,
-        );
-        self.previous_writer_closed = false;
-        Ok(Box::new(row_group_writer))
-    }
-
-    #[inline]
-    fn close_row_group(
-        &mut self,
-        row_group_writer: Box<dyn RowGroupWriter>,
-    ) -> Result<()> {
-        self.assert_closed()?;
-        let res = self.finalise_row_group_writer(row_group_writer);
-        self.previous_writer_closed = res.is_ok();
-        res
-    }
-
-    #[inline]
-    fn close(&mut self) -> Result<parquet::FileMetaData> {
-        self.assert_closed()?;
-        self.assert_previous_writer_closed()?;
-        let metadata = self.write_metadata()?;
-        self.is_closed = true;
-        Ok(metadata)
-    }
-}
-
-/// A serialized implementation for Parquet [`RowGroupWriter`].
-/// Coordinates writing of a row group with column writers.
-/// See documentation on row group writer for more information.
-pub struct SerializedRowGroupWriter<W: ParquetWriter> {
-    descr: SchemaDescPtr,
-    props: WriterPropertiesPtr,
-    buf: W,
-    total_rows_written: Option<u64>,
-    total_bytes_written: u64,
-    column_index: usize,
-    previous_writer_closed: bool,
-    row_group_metadata: Option<RowGroupMetaDataPtr>,
-    column_chunks: Vec<ColumnChunkMetaData>,
-}
-
-impl<W: 'static + ParquetWriter> SerializedRowGroupWriter<W> {
-    pub fn new(
-        schema_descr: SchemaDescPtr,
-        properties: WriterPropertiesPtr,
-        buf: &W,
-    ) -> Self {
-        let num_columns = schema_descr.num_columns();
-        Self {
-            descr: schema_descr,
-            props: properties,
-            buf: buf.try_clone().unwrap(),
-            total_rows_written: None,
-            total_bytes_written: 0,
-            column_index: 0,
-            previous_writer_closed: true,
-            row_group_metadata: None,
-            column_chunks: Vec::with_capacity(num_columns),
-        }
-    }
-
-    /// Checks and finalises current column writer.
-    fn finalise_column_writer(&mut self, writer: ColumnWriter) -> Result<()> {
-        let (bytes_written, rows_written, metadata) = match writer {
-            ColumnWriter::BoolColumnWriter(typed) => typed.close()?,
-            ColumnWriter::Int32ColumnWriter(typed) => typed.close()?,
-            ColumnWriter::Int64ColumnWriter(typed) => typed.close()?,
-            ColumnWriter::Int96ColumnWriter(typed) => typed.close()?,
-            ColumnWriter::FloatColumnWriter(typed) => typed.close()?,
-            ColumnWriter::DoubleColumnWriter(typed) => typed.close()?,
-            ColumnWriter::ByteArrayColumnWriter(typed) => typed.close()?,
-            ColumnWriter::FixedLenByteArrayColumnWriter(typed) => typed.close()?,
-        };
-
-        // Update row group writer metrics
-        self.total_bytes_written += bytes_written;
-        self.column_chunks.push(metadata);
-        if let Some(rows) = self.total_rows_written {
-            if rows != rows_written {
-                return Err(general_err!(
-                    "Incorrect number of rows, expected {} != {} rows",
-                    rows,
-                    rows_written
-                ));
-            }
-        } else {
-            self.total_rows_written = Some(rows_written);
-        }
-
-        Ok(())
-    }
-
-    #[inline]
-    fn assert_closed(&self) -> Result<()> {
-        if self.row_group_metadata.is_some() {
-            Err(general_err!("Row group writer is closed"))
-        } else {
-            Ok(())
-        }
-    }
-
-    #[inline]
-    fn assert_previous_writer_closed(&self) -> Result<()> {
-        if !self.previous_writer_closed {
-            Err(general_err!("Previous column writer was not closed"))
-        } else {
-            Ok(())
-        }
-    }
-}
-
-impl<W: 'static + ParquetWriter> RowGroupWriter for SerializedRowGroupWriter<W> {
-    #[inline]
-    fn next_column(&mut self) -> Result<Option<ColumnWriter>> {
-        self.assert_closed()?;
-        self.assert_previous_writer_closed()?;
-
-        if self.column_index >= self.descr.num_columns() {
-            return Ok(None);
-        }
-        let sink = FileSink::new(&self.buf);
-        let page_writer = Box::new(SerializedPageWriter::new(sink));
-        let column_writer = get_column_writer(
-            self.descr.column(self.column_index),
-            self.props.clone(),
-            page_writer,
-        );
-        self.column_index += 1;
-        self.previous_writer_closed = false;
-
-        Ok(Some(column_writer))
-    }
-
-    #[inline]
-    fn close_column(&mut self, column_writer: ColumnWriter) -> Result<()> {
-        let res = self.finalise_column_writer(column_writer);
-        self.previous_writer_closed = res.is_ok();
-        res
-    }
-
-    #[inline]
-    fn close(&mut self) -> Result<RowGroupMetaDataPtr> {
-        if self.row_group_metadata.is_none() {
-            self.assert_previous_writer_closed()?;
-
-            let column_chunks = std::mem::take(&mut self.column_chunks);
-            let row_group_metadata = RowGroupMetaData::builder(self.descr.clone())
-                .set_column_metadata(column_chunks)
-                .set_total_byte_size(self.total_bytes_written as i64)
-                .set_num_rows(self.total_rows_written.unwrap_or(0) as i64)
-                .build()?;
-
-            self.row_group_metadata = Some(Arc::new(row_group_metadata));
-        }
-
-        let metadata = self.row_group_metadata.as_ref().unwrap().clone();
-        Ok(metadata)
-    }
-}
-
-/// A serialized implementation for Parquet [`PageWriter`].
-/// Writes and serializes pages and metadata into output stream.
-///
-/// `SerializedPageWriter` should not be used after calling `close()`.
-pub struct SerializedPageWriter<T: Write + Position> {
-    sink: T,
-}
-
-impl<T: Write + Position> SerializedPageWriter<T> {
-    /// Creates new page writer.
-    pub fn new(sink: T) -> Self {
-        Self { sink }
-    }
-
-    /// Serializes page header into Thrift.
-    /// Returns number of bytes that have been written into the sink.
-    #[inline]
-    fn serialize_page_header(&mut self, header: parquet::PageHeader) -> Result<usize> {
-        let start_pos = self.sink.pos();
-        {
-            let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
-            header.write_to_out_protocol(&mut protocol)?;
-            protocol.flush()?;
-        }
-        Ok((self.sink.pos() - start_pos) as usize)
-    }
-
-    /// Serializes column chunk into Thrift.
-    /// Returns Ok() if there are not errors serializing and writing data into the sink.
-    #[inline]
-    fn serialize_column_chunk(&mut self, chunk: parquet::ColumnChunk) -> Result<()> {
-        let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
-        chunk.write_to_out_protocol(&mut protocol)?;
-        protocol.flush()?;
-        Ok(())
-    }
-}
-
-impl<T: Write + Position> PageWriter for SerializedPageWriter<T> {
-    fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec> {
-        let uncompressed_size = page.uncompressed_size();
-        let compressed_size = page.compressed_size();
-        let num_values = page.num_values();
-        let encoding = page.encoding();
-        let page_type = page.page_type();
-
-        let mut page_header = parquet::PageHeader {
-            type_: page_type.into(),
-            uncompressed_page_size: uncompressed_size as i32,
-            compressed_page_size: compressed_size as i32,
-            // TODO: Add support for crc checksum
-            crc: None,
-            data_page_header: None,
-            index_page_header: None,
-            dictionary_page_header: None,
-            data_page_header_v2: None,
-        };
-
-        match *page.compressed_page() {
-            Page::DataPage {
-                def_level_encoding,
-                rep_level_encoding,
-                ref statistics,
-                ..
-            } => {
-                let data_page_header = parquet::DataPageHeader {
-                    num_values: num_values as i32,
-                    encoding: encoding.into(),
-                    definition_level_encoding: def_level_encoding.into(),
-                    repetition_level_encoding: rep_level_encoding.into(),
-                    statistics: statistics_to_thrift(statistics.as_ref()),
-                };
-                page_header.data_page_header = Some(data_page_header);
-            }
-            Page::DataPageV2 {
-                num_nulls,
-                num_rows,
-                def_levels_byte_len,
-                rep_levels_byte_len,
-                is_compressed,
-                ref statistics,
-                ..
-            } => {
-                let data_page_header_v2 = parquet::DataPageHeaderV2 {
-                    num_values: num_values as i32,
-                    num_nulls: num_nulls as i32,
-                    num_rows: num_rows as i32,
-                    encoding: encoding.into(),
-                    definition_levels_byte_length: def_levels_byte_len as i32,
-                    repetition_levels_byte_length: rep_levels_byte_len as i32,
-                    is_compressed: Some(is_compressed),
-                    statistics: statistics_to_thrift(statistics.as_ref()),
-                };
-                page_header.data_page_header_v2 = Some(data_page_header_v2);
-            }
-            Page::DictionaryPage { is_sorted, .. } => {
-                let dictionary_page_header = parquet::DictionaryPageHeader {
-                    num_values: num_values as i32,
-                    encoding: encoding.into(),
-                    is_sorted: Some(is_sorted),
-                };
-                page_header.dictionary_page_header = Some(dictionary_page_header);
-            }
-        }
-
-        let start_pos = self.sink.pos();
-
-        let header_size = self.serialize_page_header(page_header)?;
-        self.sink.write_all(page.data())?;
-
-        let mut spec = PageWriteSpec::new();
-        spec.page_type = page_type;
-        spec.uncompressed_size = uncompressed_size + header_size;
-        spec.compressed_size = compressed_size + header_size;
-        spec.offset = start_pos;
-        spec.bytes_written = self.sink.pos() - start_pos;
-        // Number of values is incremented for data pages only
-        if page_type == PageType::DATA_PAGE || page_type == PageType::DATA_PAGE_V2 {
-            spec.num_values = num_values;
-        }
-
-        Ok(spec)
-    }
-
-    fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()> {
-        self.serialize_column_chunk(metadata.to_thrift())
-    }
-
-    fn close(&mut self) -> Result<()> {
-        self.sink.flush()?;
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::{fs::File, io::Cursor};
-
-    use crate::basic::{Compression, Encoding, IntType, LogicalType, Repetition, Type};
-    use crate::column::page::PageReader;
-    use crate::compression::{create_codec, Codec};
-    use crate::file::{
-        properties::{WriterProperties, WriterVersion},
-        reader::{FileReader, SerializedFileReader, SerializedPageReader},
-        statistics::{from_thrift, to_thrift, Statistics},
-    };
-    use crate::record::RowAccessor;
-    use crate::util::{memory::ByteBufferPtr, test_common::get_temp_file};
-
-    #[test]
-    fn test_file_writer_error_after_close() {
-        let file = get_temp_file("test_file_writer_error_after_close", &[]);
-        let schema = Arc::new(types::Type::group_type_builder("schema").build().unwrap());
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        writer.close().unwrap();
-        {
-            let res = writer.next_row_group();
-            assert!(res.is_err());
-            if let Err(err) = res {
-                assert_eq!(format!("{}", err), "Parquet error: File writer is closed");
-            }
-        }
-        {
-            let res = writer.close();
-            assert!(res.is_err());
-            if let Err(err) = res {
-                assert_eq!(format!("{}", err), "Parquet error: File writer is closed");
-            }
-        }
-    }
-
-    #[test]
-    fn test_row_group_writer_error_after_close() {
-        let file = get_temp_file("test_file_writer_row_group_error_after_close", &[]);
-        let schema = Arc::new(types::Type::group_type_builder("schema").build().unwrap());
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        let mut row_group_writer = writer.next_row_group().unwrap();
-        row_group_writer.close().unwrap();
-
-        let res = row_group_writer.next_column();
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Row group writer is closed"
-            );
-        }
-    }
-
-    #[test]
-    fn test_row_group_writer_error_not_all_columns_written() {
-        let file =
-            get_temp_file("test_row_group_writer_error_not_all_columns_written", &[]);
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        let mut row_group_writer = writer.next_row_group().unwrap();
-        let res = row_group_writer.close();
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Column length mismatch: 1 != 0"
-            );
-        }
-    }
-
-    #[test]
-    fn test_row_group_writer_num_records_mismatch() {
-        let file = get_temp_file("test_row_group_writer_num_records_mismatch", &[]);
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![
-                    Arc::new(
-                        types::Type::primitive_type_builder("col1", Type::INT32)
-                            .with_repetition(Repetition::REQUIRED)
-                            .build()
-                            .unwrap(),
-                    ),
-                    Arc::new(
-                        types::Type::primitive_type_builder("col2", Type::INT32)
-                            .with_repetition(Repetition::REQUIRED)
-                            .build()
-                            .unwrap(),
-                    ),
-                ])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        let mut row_group_writer = writer.next_row_group().unwrap();
-
-        let mut col_writer = row_group_writer.next_column().unwrap().unwrap();
-        if let ColumnWriter::Int32ColumnWriter(ref mut typed) = col_writer {
-            typed.write_batch(&[1, 2, 3], None, None).unwrap();
-        }
-        row_group_writer.close_column(col_writer).unwrap();
-
-        let mut col_writer = row_group_writer.next_column().unwrap().unwrap();
-        if let ColumnWriter::Int32ColumnWriter(ref mut typed) = col_writer {
-            typed.write_batch(&[1, 2], None, None).unwrap();
-        }
-
-        let res = row_group_writer.close_column(col_writer);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Incorrect number of rows, expected 3 != 2 rows"
-            );
-        }
-    }
-
-    #[test]
-    fn test_file_writer_empty_file() {
-        let file = get_temp_file("test_file_writer_write_empty_file", &[]);
-
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-        assert_eq!(reader.get_row_iter(None).unwrap().count(), 0);
-    }
-
-    #[test]
-    fn test_file_writer_with_metadata() {
-        let file = get_temp_file("test_file_writer_write_with_metadata", &[]);
-
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_key_value_metadata(Some(vec![KeyValue::new(
-                    "key".to_string(),
-                    "value".to_string(),
-                )]))
-                .build(),
-        );
-        let mut writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-        assert_eq!(
-            reader
-                .metadata()
-                .file_metadata()
-                .key_value_metadata()
-                .to_owned()
-                .unwrap()
-                .len(),
-            1
-        );
-    }
-
-    #[test]
-    fn test_file_writer_v2_with_metadata() {
-        let file = get_temp_file("test_file_writer_v2_write_with_metadata", &[]);
-        let field_logical_type = Some(LogicalType::INTEGER(IntType {
-            bit_width: 8,
-            is_signed: false,
-        }));
-        let field = Arc::new(
-            types::Type::primitive_type_builder("col1", Type::INT32)
-                .with_logical_type(field_logical_type.clone())
-                .with_converted_type(field_logical_type.into())
-                .build()
-                .unwrap(),
-        );
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![field.clone()])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_key_value_metadata(Some(vec![KeyValue::new(
-                    "key".to_string(),
-                    "value".to_string(),
-                )]))
-                .set_writer_version(WriterVersion::PARQUET_2_0)
-                .build(),
-        );
-        let mut writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-
-        assert_eq!(
-            reader
-                .metadata()
-                .file_metadata()
-                .key_value_metadata()
-                .to_owned()
-                .unwrap()
-                .len(),
-            1
-        );
-
-        // ARROW-11803: Test that the converted and logical types have been populated
-        let fields = reader.metadata().file_metadata().schema().get_fields();
-        assert_eq!(fields.len(), 1);
-        let read_field = fields.get(0).unwrap();
-        assert_eq!(read_field, &field);
-    }
-
-    #[test]
-    fn test_file_writer_empty_row_groups() {
-        let file = get_temp_file("test_file_writer_write_empty_row_groups", &[]);
-        test_file_roundtrip(file, vec![]);
-    }
-
-    #[test]
-    fn test_file_writer_single_row_group() {
-        let file = get_temp_file("test_file_writer_write_single_row_group", &[]);
-        test_file_roundtrip(file, vec![vec![1, 2, 3, 4, 5]]);
-    }
-
-    #[test]
-    fn test_file_writer_multiple_row_groups() {
-        let file = get_temp_file("test_file_writer_write_multiple_row_groups", &[]);
-        test_file_roundtrip(
-            file,
-            vec![
-                vec![1, 2, 3, 4, 5],
-                vec![1, 2, 3],
-                vec![1],
-                vec![1, 2, 3, 4, 5, 6],
-            ],
-        );
-    }
-
-    #[test]
-    fn test_file_writer_multiple_large_row_groups() {
-        let file = get_temp_file("test_file_writer_multiple_large_row_groups", &[]);
-        test_file_roundtrip(
-            file,
-            vec![vec![123; 1024], vec![124; 1000], vec![125; 15], vec![]],
-        );
-    }
-
-    #[test]
-    fn test_page_writer_data_pages() {
-        let pages = vec![
-            Page::DataPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                def_level_encoding: Encoding::RLE,
-                rep_level_encoding: Encoding::RLE,
-                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
-            },
-            Page::DataPageV2 {
-                buf: ByteBufferPtr::new(vec![4; 128]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                num_nulls: 2,
-                num_rows: 12,
-                def_levels_byte_len: 24,
-                rep_levels_byte_len: 32,
-                is_compressed: false,
-                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
-            },
-        ];
-
-        test_page_roundtrip(&pages[..], Compression::SNAPPY, Type::INT32);
-        test_page_roundtrip(&pages[..], Compression::UNCOMPRESSED, Type::INT32);
-    }
-
-    #[test]
-    fn test_page_writer_dict_pages() {
-        let pages = vec![
-            Page::DictionaryPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5]),
-                num_values: 5,
-                encoding: Encoding::RLE_DICTIONARY,
-                is_sorted: false,
-            },
-            Page::DataPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                def_level_encoding: Encoding::RLE,
-                rep_level_encoding: Encoding::RLE,
-                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
-            },
-            Page::DataPageV2 {
-                buf: ByteBufferPtr::new(vec![4; 128]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                num_nulls: 2,
-                num_rows: 12,
-                def_levels_byte_len: 24,
-                rep_levels_byte_len: 32,
-                is_compressed: false,
-                statistics: None,
-            },
-        ];
-
-        test_page_roundtrip(&pages[..], Compression::SNAPPY, Type::INT32);
-        test_page_roundtrip(&pages[..], Compression::UNCOMPRESSED, Type::INT32);
-    }
-
-    /// Tests writing and reading pages.
-    /// Physical type is for statistics only, should match any defined statistics type in
-    /// pages.
-    fn test_page_roundtrip(pages: &[Page], codec: Compression, physical_type: Type) {
-        let mut compressed_pages = vec![];
-        let mut total_num_values = 0i64;
-        let mut compressor = create_codec(codec).unwrap();
-
-        for page in pages {
-            let uncompressed_len = page.buffer().len();
-
-            let compressed_page = match *page {
-                Page::DataPage {
-                    ref buf,
-                    num_values,
-                    encoding,
-                    def_level_encoding,
-                    rep_level_encoding,
-                    ref statistics,
-                } => {
-                    total_num_values += num_values as i64;
-                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
-
-                    Page::DataPage {
-                        buf: ByteBufferPtr::new(output_buf),
-                        num_values,
-                        encoding,
-                        def_level_encoding,
-                        rep_level_encoding,
-                        statistics: from_thrift(
-                            physical_type,
-                            to_thrift(statistics.as_ref()),
-                        ),
-                    }
-                }
-                Page::DataPageV2 {
-                    ref buf,
-                    num_values,
-                    encoding,
-                    num_nulls,
-                    num_rows,
-                    def_levels_byte_len,
-                    rep_levels_byte_len,
-                    ref statistics,
-                    ..
-                } => {
-                    total_num_values += num_values as i64;
-                    let offset = (def_levels_byte_len + rep_levels_byte_len) as usize;
-                    let cmp_buf =
-                        compress_helper(compressor.as_mut(), &buf.data()[offset..]);
-                    let mut output_buf = Vec::from(&buf.data()[..offset]);
-                    output_buf.extend_from_slice(&cmp_buf[..]);
-
-                    Page::DataPageV2 {
-                        buf: ByteBufferPtr::new(output_buf),
-                        num_values,
-                        encoding,
-                        num_nulls,
-                        num_rows,
-                        def_levels_byte_len,
-                        rep_levels_byte_len,
-                        is_compressed: compressor.is_some(),
-                        statistics: from_thrift(
-                            physical_type,
-                            to_thrift(statistics.as_ref()),
-                        ),
-                    }
-                }
-                Page::DictionaryPage {
-                    ref buf,
-                    num_values,
-                    encoding,
-                    is_sorted,
-                } => {
-                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
-
-                    Page::DictionaryPage {
-                        buf: ByteBufferPtr::new(output_buf),
-                        num_values,
-                        encoding,
-                        is_sorted,
-                    }
-                }
-            };
-
-            let compressed_page = CompressedPage::new(compressed_page, uncompressed_len);
-            compressed_pages.push(compressed_page);
-        }
-
-        let mut buffer: Vec<u8> = vec![];
-        let mut result_pages: Vec<Page> = vec![];
-        {
-            let cursor = Cursor::new(&mut buffer);
-            let mut page_writer = SerializedPageWriter::new(cursor);
-
-            for page in compressed_pages {
-                page_writer.write_page(page).unwrap();
-            }
-            page_writer.close().unwrap();
-        }
-        {
-            let mut page_reader = SerializedPageReader::new(
-                Cursor::new(&buffer),
-                total_num_values,
-                codec,
-                physical_type,
-            )
-            .unwrap();
-
-            while let Some(page) = page_reader.get_next_page().unwrap() {
-                result_pages.push(page);
-            }
-        }
-
-        assert_eq!(result_pages.len(), pages.len());
-        for i in 0..result_pages.len() {
-            assert_page(&result_pages[i], &pages[i]);
-        }
-    }
-
-    /// Helper function to compress a slice
-    fn compress_helper(compressor: Option<&mut Box<dyn Codec>>, data: &[u8]) -> Vec<u8> {
-        let mut output_buf = vec![];
-        if let Some(cmpr) = compressor {
-            cmpr.compress(data, &mut output_buf).unwrap();
-        } else {
-            output_buf.extend_from_slice(data);
-        }
-        output_buf
-    }
-
-    /// Check if pages match.
-    fn assert_page(left: &Page, right: &Page) {
-        assert_eq!(left.page_type(), right.page_type());
-        assert_eq!(left.buffer().data(), right.buffer().data());
-        assert_eq!(left.num_values(), right.num_values());
-        assert_eq!(left.encoding(), right.encoding());
-        assert_eq!(to_thrift(left.statistics()), to_thrift(right.statistics()));
-    }
-
-    /// File write-read roundtrip.
-    /// `data` consists of arrays of values for each row group.
-    fn test_file_roundtrip(file: File, data: Vec<Vec<i32>>) {
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .with_repetition(Repetition::REQUIRED)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut file_writer = assert_send(
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap(),
-        );
-        let mut rows: i64 = 0;
-
-        for subset in &data {
-            let mut row_group_writer = file_writer.next_row_group().unwrap();
-            let col_writer = row_group_writer.next_column().unwrap();
-            if let Some(mut writer) = col_writer {
-                match writer {
-                    ColumnWriter::Int32ColumnWriter(ref mut typed) => {
-                        rows +=
-                            typed.write_batch(&subset[..], None, None).unwrap() as i64;
-                    }
-                    _ => {
-                        unimplemented!();
-                    }
-                }
-                row_group_writer.close_column(writer).unwrap();
-            }
-            file_writer.close_row_group(row_group_writer).unwrap();
-        }
-
-        file_writer.close().unwrap();
-
-        let reader = assert_send(SerializedFileReader::new(file).unwrap());
-        assert_eq!(reader.num_row_groups(), data.len());
-        assert_eq!(
-            reader.metadata().file_metadata().num_rows(),
-            rows,
-            "row count in metadata not equal to number of rows written"
-        );
-        for i in 0..reader.num_row_groups() {
-            let row_group_reader = reader.get_row_group(i).unwrap();
-            let iter = row_group_reader.get_row_iter(None).unwrap();
-            let res = iter
-                .map(|elem| elem.get_int(0).unwrap())
-                .collect::<Vec<i32>>();
-            assert_eq!(res, data[i]);
-        }
-    }
-
-    fn assert_send<T: Send>(t: T) -> T {
-        t
-    }
-
-    #[test]
-    fn test_bytes_writer_empty_row_groups() {
-        test_bytes_roundtrip(vec![]);
-    }
-
-    #[test]
-    fn test_bytes_writer_single_row_group() {
-        test_bytes_roundtrip(vec![vec![1, 2, 3, 4, 5]]);
-    }
-
-    #[test]
-    fn test_bytes_writer_multiple_row_groups() {
-        test_bytes_roundtrip(vec![
-            vec![1, 2, 3, 4, 5],
-            vec![1, 2, 3],
-            vec![1],
-            vec![1, 2, 3, 4, 5, 6],
-        ]);
-    }
-
-    fn test_bytes_roundtrip(data: Vec<Vec<i32>>) {
-        let cursor = InMemoryWriteableCursor::default();
-
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .with_repetition(Repetition::REQUIRED)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-
-        let mut rows: i64 = 0;
-        {
-            let props = Arc::new(WriterProperties::builder().build());
-            let mut writer =
-                SerializedFileWriter::new(cursor.clone(), schema, props).unwrap();
-
-            for subset in &data {
-                let mut row_group_writer = writer.next_row_group().unwrap();
-                let col_writer = row_group_writer.next_column().unwrap();
-                if let Some(mut writer) = col_writer {
-                    match writer {
-                        ColumnWriter::Int32ColumnWriter(ref mut typed) => {
-                            rows += typed.write_batch(&subset[..], None, None).unwrap()
-                                as i64;
-                        }
-                        _ => {
-                            unimplemented!();
-                        }
-                    }
-                    row_group_writer.close_column(writer).unwrap();
-                }
-                writer.close_row_group(row_group_writer).unwrap();
-            }
-
-            writer.close().unwrap();
-        }
-
-        let buffer = cursor.into_inner().unwrap();
-
-        let reading_cursor = crate::file::serialized_reader::SliceableCursor::new(buffer);
-        let reader = SerializedFileReader::new(reading_cursor).unwrap();
-
-        assert_eq!(reader.num_row_groups(), data.len());
-        assert_eq!(
-            reader.metadata().file_metadata().num_rows(),
-            rows,
-            "row count in metadata not equal to number of rows written"
-        );
-        for i in 0..reader.num_row_groups() {
-            let row_group_reader = reader.get_row_group(i).unwrap();
-            let iter = row_group_reader.get_row_iter(None).unwrap();
-            let res = iter
-                .map(|elem| elem.get_int(0).unwrap())
-                .collect::<Vec<i32>>();
-            assert_eq!(res, data[i]);
-        }
-    }
-}
diff --git a/rust/parquet/src/lib.rs b/rust/parquet/src/lib.rs
deleted file mode 100644
index a931b95622d..00000000000
--- a/rust/parquet/src/lib.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(incomplete_features)]
-#![allow(dead_code)]
-#![allow(non_camel_case_types)]
-#![allow(
-    clippy::approx_constant,
-    clippy::cast_ptr_alignment,
-    clippy::float_cmp,
-    clippy::float_equality_without_abs,
-    clippy::from_over_into,
-    clippy::many_single_char_names,
-    clippy::needless_range_loop,
-    clippy::new_without_default,
-    clippy::or_fun_call,
-    clippy::same_item_push,
-    clippy::too_many_arguments,
-    clippy::transmute_ptr_to_ptr,
-    clippy::upper_case_acronyms,
-    clippy::vec_init_then_push
-)]
-
-#[macro_use]
-pub mod errors;
-pub mod basic;
-#[macro_use]
-pub mod data_type;
-
-// Exported for external use, such as benchmarks
-pub use self::encodings::{decoding, encoding};
-pub use self::util::memory;
-
-#[macro_use]
-mod util;
-#[cfg(any(feature = "arrow", test))]
-pub mod arrow;
-pub mod column;
-pub mod compression;
-mod encodings;
-pub mod file;
-pub mod record;
-pub mod schema;
diff --git a/rust/parquet/src/record/api.rs b/rust/parquet/src/record/api.rs
deleted file mode 100644
index 411016e7ce8..00000000000
--- a/rust/parquet/src/record/api.rs
+++ /dev/null
@@ -1,1846 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains Row enum that is used to represent record in Rust.
-
-use std::fmt;
-
-use chrono::{TimeZone, Utc};
-use num_bigint::{BigInt, Sign};
-
-use crate::basic::{ConvertedType, Type as PhysicalType};
-use crate::data_type::{ByteArray, Decimal, Int96};
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-
-#[cfg(feature = "cli")]
-use serde_json::Value;
-
-/// Macro as a shortcut to generate 'not yet implemented' panic error.
-macro_rules! nyi {
-    ($column_descr:ident, $value:ident) => {{
-        unimplemented!(
-            "Conversion for physical type {}, converted type {}, value {:?}",
-            $column_descr.physical_type(),
-            $column_descr.converted_type(),
-            $value
-        );
-    }};
-}
-
-/// `Row` represents a nested Parquet record.
-#[derive(Clone, Debug, PartialEq)]
-pub struct Row {
-    fields: Vec<(String, Field)>,
-}
-
-#[allow(clippy::len_without_is_empty)]
-impl Row {
-    /// Get the number of fields in this row.
-    pub fn len(&self) -> usize {
-        self.fields.len()
-    }
-
-    /// Get an iterator to go through all columns in the row.
-    ///
-    /// # Example
-    ///
-    /// ```no_run
-    /// use std::fs::File;
-    /// use parquet::record::Row;
-    /// use parquet::file::reader::{FileReader, SerializedFileReader};
-    ///
-    /// let file = File::open("/path/to/file").unwrap();
-    /// let reader = SerializedFileReader::new(file).unwrap();
-    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap();
-    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
-    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
-    /// }
-    /// ```
-    pub fn get_column_iter(&self) -> RowColumnIter {
-        RowColumnIter {
-            fields: &self.fields,
-            curr: 0,
-            count: self.fields.len(),
-        }
-    }
-
-    #[cfg(feature = "cli")]
-    pub fn to_json_value(&self) -> Value {
-        Value::Object(
-            self.fields
-                .iter()
-                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
-                .collect(),
-        )
-    }
-}
-
-pub struct RowColumnIter<'a> {
-    fields: &'a Vec<(String, Field)>,
-    curr: usize,
-    count: usize,
-}
-
-impl<'a> Iterator for RowColumnIter<'a> {
-    type Item = (&'a String, &'a Field);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let idx = self.curr;
-        if idx >= self.count {
-            return None;
-        }
-        self.curr += 1;
-        Some((&self.fields[idx].0, &self.fields[idx].1))
-    }
-}
-
-/// Trait for type-safe convenient access to fields within a Row.
-pub trait RowAccessor {
-    fn get_bool(&self, i: usize) -> Result<bool>;
-    fn get_byte(&self, i: usize) -> Result<i8>;
-    fn get_short(&self, i: usize) -> Result<i16>;
-    fn get_int(&self, i: usize) -> Result<i32>;
-    fn get_long(&self, i: usize) -> Result<i64>;
-    fn get_ubyte(&self, i: usize) -> Result<u8>;
-    fn get_ushort(&self, i: usize) -> Result<u16>;
-    fn get_uint(&self, i: usize) -> Result<u32>;
-    fn get_ulong(&self, i: usize) -> Result<u64>;
-    fn get_float(&self, i: usize) -> Result<f32>;
-    fn get_double(&self, i: usize) -> Result<f64>;
-    fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
-    fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
-    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
-    fn get_string(&self, i: usize) -> Result<&String>;
-    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
-    fn get_group(&self, i: usize) -> Result<&Row>;
-    fn get_list(&self, i: usize) -> Result<&List>;
-    fn get_map(&self, i: usize) -> Result<&Map>;
-}
-
-/// Trait for formating fields within a Row.
-pub trait RowFormatter {
-    fn fmt(&self, i: usize) -> &dyn fmt::Display;
-}
-
-/// Macro to generate type-safe get_xxx methods for primitive types,
-/// e.g. `get_bool`, `get_short`.
-macro_rules! row_primitive_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<$TY> {
-            match self.fields[i].1 {
-                Field::$VARIANT(v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.fields[i].1.get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-/// Macro to generate type-safe get_xxx methods for reference types,
-/// e.g. `get_list`, `get_map`.
-macro_rules! row_complex_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<&$TY> {
-            match self.fields[i].1 {
-                Field::$VARIANT(ref v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.fields[i].1.get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-impl RowFormatter for Row {
-    /// Get Display reference for a given field.
-    fn fmt(&self, i: usize) -> &dyn fmt::Display {
-        &self.fields[i].1
-    }
-}
-
-impl RowAccessor for Row {
-    row_primitive_accessor!(get_bool, Bool, bool);
-
-    row_primitive_accessor!(get_byte, Byte, i8);
-
-    row_primitive_accessor!(get_short, Short, i16);
-
-    row_primitive_accessor!(get_int, Int, i32);
-
-    row_primitive_accessor!(get_long, Long, i64);
-
-    row_primitive_accessor!(get_ubyte, UByte, u8);
-
-    row_primitive_accessor!(get_ushort, UShort, u16);
-
-    row_primitive_accessor!(get_uint, UInt, u32);
-
-    row_primitive_accessor!(get_ulong, ULong, u64);
-
-    row_primitive_accessor!(get_float, Float, f32);
-
-    row_primitive_accessor!(get_double, Double, f64);
-
-    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
-
-    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
-
-    row_complex_accessor!(get_decimal, Decimal, Decimal);
-
-    row_complex_accessor!(get_string, Str, String);
-
-    row_complex_accessor!(get_bytes, Bytes, ByteArray);
-
-    row_complex_accessor!(get_group, Group, Row);
-
-    row_complex_accessor!(get_list, ListInternal, List);
-
-    row_complex_accessor!(get_map, MapInternal, Map);
-}
-
-/// Constructs a `Row` from the list of `fields` and returns it.
-#[inline]
-pub fn make_row(fields: Vec<(String, Field)>) -> Row {
-    Row { fields }
-}
-
-impl fmt::Display for Row {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{{")?;
-        for (i, &(ref key, ref value)) in self.fields.iter().enumerate() {
-            key.fmt(f)?;
-            write!(f, ": ")?;
-            value.fmt(f)?;
-            if i < self.fields.len() - 1 {
-                write!(f, ", ")?;
-            }
-        }
-        write!(f, "}}")
-    }
-}
-
-/// `List` represents a list which contains an array of elements.
-#[derive(Clone, Debug, PartialEq)]
-pub struct List {
-    elements: Vec<Field>,
-}
-
-#[allow(clippy::len_without_is_empty)]
-impl List {
-    /// Get the number of fields in this row
-    pub fn len(&self) -> usize {
-        self.elements.len()
-    }
-
-    pub fn elements(&self) -> &[Field] {
-        self.elements.as_slice()
-    }
-}
-
-/// Constructs a `List` from the list of `fields` and returns it.
-#[inline]
-pub fn make_list(elements: Vec<Field>) -> List {
-    List { elements }
-}
-
-/// Trait for type-safe access of an index for a `List`.
-/// Note that the get_XXX methods do not do bound checking.
-pub trait ListAccessor {
-    fn get_bool(&self, i: usize) -> Result<bool>;
-    fn get_byte(&self, i: usize) -> Result<i8>;
-    fn get_short(&self, i: usize) -> Result<i16>;
-    fn get_int(&self, i: usize) -> Result<i32>;
-    fn get_long(&self, i: usize) -> Result<i64>;
-    fn get_ubyte(&self, i: usize) -> Result<u8>;
-    fn get_ushort(&self, i: usize) -> Result<u16>;
-    fn get_uint(&self, i: usize) -> Result<u32>;
-    fn get_ulong(&self, i: usize) -> Result<u64>;
-    fn get_float(&self, i: usize) -> Result<f32>;
-    fn get_double(&self, i: usize) -> Result<f64>;
-    fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
-    fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
-    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
-    fn get_string(&self, i: usize) -> Result<&String>;
-    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
-    fn get_group(&self, i: usize) -> Result<&Row>;
-    fn get_list(&self, i: usize) -> Result<&List>;
-    fn get_map(&self, i: usize) -> Result<&Map>;
-}
-
-/// Macro to generate type-safe get_xxx methods for primitive types,
-/// e.g. get_bool, get_short
-macro_rules! list_primitive_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<$TY> {
-            match self.elements[i] {
-                Field::$VARIANT(v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.elements[i].get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-/// Macro to generate type-safe get_xxx methods for reference types
-/// e.g. get_list, get_map
-macro_rules! list_complex_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<&$TY> {
-            match self.elements[i] {
-                Field::$VARIANT(ref v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.elements[i].get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-impl ListAccessor for List {
-    list_primitive_accessor!(get_bool, Bool, bool);
-
-    list_primitive_accessor!(get_byte, Byte, i8);
-
-    list_primitive_accessor!(get_short, Short, i16);
-
-    list_primitive_accessor!(get_int, Int, i32);
-
-    list_primitive_accessor!(get_long, Long, i64);
-
-    list_primitive_accessor!(get_ubyte, UByte, u8);
-
-    list_primitive_accessor!(get_ushort, UShort, u16);
-
-    list_primitive_accessor!(get_uint, UInt, u32);
-
-    list_primitive_accessor!(get_ulong, ULong, u64);
-
-    list_primitive_accessor!(get_float, Float, f32);
-
-    list_primitive_accessor!(get_double, Double, f64);
-
-    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
-
-    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
-
-    list_complex_accessor!(get_decimal, Decimal, Decimal);
-
-    list_complex_accessor!(get_string, Str, String);
-
-    list_complex_accessor!(get_bytes, Bytes, ByteArray);
-
-    list_complex_accessor!(get_group, Group, Row);
-
-    list_complex_accessor!(get_list, ListInternal, List);
-
-    list_complex_accessor!(get_map, MapInternal, Map);
-}
-
-/// `Map` represents a map which contains a list of key->value pairs.
-#[derive(Clone, Debug, PartialEq)]
-pub struct Map {
-    entries: Vec<(Field, Field)>,
-}
-
-#[allow(clippy::len_without_is_empty)]
-impl Map {
-    /// Get the number of fields in this row
-    pub fn len(&self) -> usize {
-        self.entries.len()
-    }
-
-    pub fn entries(&self) -> &[(Field, Field)] {
-        self.entries.as_slice()
-    }
-}
-
-/// Constructs a `Map` from the list of `entries` and returns it.
-#[inline]
-pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
-    Map { entries }
-}
-
-/// Trait for type-safe access of an index for a `Map`
-pub trait MapAccessor {
-    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
-    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
-}
-
-struct MapList<'a> {
-    elements: Vec<&'a Field>,
-}
-
-/// Macro to generate type-safe get_xxx methods for primitive types,
-/// e.g. get_bool, get_short
-macro_rules! map_list_primitive_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<$TY> {
-            match self.elements[i] {
-                Field::$VARIANT(v) => Ok(*v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.elements[i].get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-impl<'a> ListAccessor for MapList<'a> {
-    map_list_primitive_accessor!(get_bool, Bool, bool);
-
-    map_list_primitive_accessor!(get_byte, Byte, i8);
-
-    map_list_primitive_accessor!(get_short, Short, i16);
-
-    map_list_primitive_accessor!(get_int, Int, i32);
-
-    map_list_primitive_accessor!(get_long, Long, i64);
-
-    map_list_primitive_accessor!(get_ubyte, UByte, u8);
-
-    map_list_primitive_accessor!(get_ushort, UShort, u16);
-
-    map_list_primitive_accessor!(get_uint, UInt, u32);
-
-    map_list_primitive_accessor!(get_ulong, ULong, u64);
-
-    map_list_primitive_accessor!(get_float, Float, f32);
-
-    map_list_primitive_accessor!(get_double, Double, f64);
-
-    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
-
-    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
-
-    list_complex_accessor!(get_decimal, Decimal, Decimal);
-
-    list_complex_accessor!(get_string, Str, String);
-
-    list_complex_accessor!(get_bytes, Bytes, ByteArray);
-
-    list_complex_accessor!(get_group, Group, Row);
-
-    list_complex_accessor!(get_list, ListInternal, List);
-
-    list_complex_accessor!(get_map, MapInternal, Map);
-}
-
-impl MapAccessor for Map {
-    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
-        let map_list = MapList {
-            elements: self.entries.iter().map(|v| &v.0).collect(),
-        };
-        Box::new(map_list)
-    }
-
-    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
-        let map_list = MapList {
-            elements: self.entries.iter().map(|v| &v.1).collect(),
-        };
-        Box::new(map_list)
-    }
-}
-
-/// API to represent a single field in a `Row`.
-#[derive(Clone, Debug, PartialEq)]
-pub enum Field {
-    // Primitive types
-    /// Null value.
-    Null,
-    /// Boolean value (`true`, `false`).
-    Bool(bool),
-    /// Signed integer INT_8.
-    Byte(i8),
-    /// Signed integer INT_16.
-    Short(i16),
-    /// Signed integer INT_32.
-    Int(i32),
-    /// Signed integer INT_64.
-    Long(i64),
-    // Unsigned integer UINT_8.
-    UByte(u8),
-    // Unsigned integer UINT_16.
-    UShort(u16),
-    // Unsigned integer UINT_32.
-    UInt(u32),
-    // Unsigned integer UINT_64.
-    ULong(u64),
-    /// IEEE 32-bit floating point value.
-    Float(f32),
-    /// IEEE 64-bit floating point value.
-    Double(f64),
-    /// Decimal value.
-    Decimal(Decimal),
-    /// UTF-8 encoded character string.
-    Str(String),
-    /// General binary value.
-    Bytes(ByteArray),
-    /// Date without a time of day, stores the number of days from the
-    /// Unix epoch, 1 January 1970.
-    Date(u32),
-    /// Milliseconds from the Unix epoch, 1 January 1970.
-    TimestampMillis(u64),
-    /// Microseconds from the Unix epoch, 1 Janiary 1970.
-    TimestampMicros(u64),
-
-    // ----------------------------------------------------------------------
-    // Complex types
-    /// Struct, child elements are tuples of field-value pairs.
-    Group(Row),
-    /// List of elements.
-    ListInternal(List),
-    /// List of key-value pairs.
-    MapInternal(Map),
-}
-
-impl Field {
-    /// Get the type name.
-    fn get_type_name(&self) -> &'static str {
-        match *self {
-            Field::Null => "Null",
-            Field::Bool(_) => "Bool",
-            Field::Byte(_) => "Byte",
-            Field::Short(_) => "Short",
-            Field::Int(_) => "Int",
-            Field::Long(_) => "Long",
-            Field::UByte(_) => "UByte",
-            Field::UShort(_) => "UShort",
-            Field::UInt(_) => "UInt",
-            Field::ULong(_) => "ULong",
-            Field::Float(_) => "Float",
-            Field::Double(_) => "Double",
-            Field::Decimal(_) => "Decimal",
-            Field::Date(_) => "Date",
-            Field::Str(_) => "Str",
-            Field::Bytes(_) => "Bytes",
-            Field::TimestampMillis(_) => "TimestampMillis",
-            Field::TimestampMicros(_) => "TimestampMicros",
-            Field::Group(_) => "Group",
-            Field::ListInternal(_) => "ListInternal",
-            Field::MapInternal(_) => "MapInternal",
-        }
-    }
-
-    /// Determines if this Row represents a primitive value.
-    pub fn is_primitive(&self) -> bool {
-        !matches!(
-            *self,
-            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
-        )
-    }
-
-    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
-    #[inline]
-    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
-        Field::Bool(value)
-    }
-
-    /// Converts Parquet INT32 type with converted type into `i32` value.
-    #[inline]
-    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
-        match descr.converted_type() {
-            ConvertedType::INT_8 => Field::Byte(value as i8),
-            ConvertedType::INT_16 => Field::Short(value as i16),
-            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
-            ConvertedType::UINT_8 => Field::UByte(value as u8),
-            ConvertedType::UINT_16 => Field::UShort(value as u16),
-            ConvertedType::UINT_32 => Field::UInt(value as u32),
-            ConvertedType::DATE => Field::Date(value as u32),
-            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
-                value,
-                descr.type_precision(),
-                descr.type_scale(),
-            )),
-            _ => nyi!(descr, value),
-        }
-    }
-
-    /// Converts Parquet INT64 type with converted type into `i64` value.
-    #[inline]
-    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
-        match descr.converted_type() {
-            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
-            ConvertedType::UINT_64 => Field::ULong(value as u64),
-            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value as u64),
-            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value as u64),
-            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
-                value,
-                descr.type_precision(),
-                descr.type_scale(),
-            )),
-            _ => nyi!(descr, value),
-        }
-    }
-
-    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
-    /// `Timestamp` value.
-    #[inline]
-    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
-        Field::TimestampMillis(value.to_i64() as u64)
-    }
-
-    /// Converts Parquet FLOAT type with logical type into `f32` value.
-    #[inline]
-    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
-        Field::Float(value)
-    }
-
-    /// Converts Parquet DOUBLE type with converted type into `f64` value.
-    #[inline]
-    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
-        Field::Double(value)
-    }
-
-    /// Converts Parquet BYTE_ARRAY type with converted type into either UTF8 string or
-    /// array of bytes.
-    #[inline]
-    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self {
-        match descr.physical_type() {
-            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
-                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
-                    let value = String::from_utf8(value.data().to_vec()).unwrap();
-                    Field::Str(value)
-                }
-                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
-                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
-                    value,
-                    descr.type_precision(),
-                    descr.type_scale(),
-                )),
-                _ => nyi!(descr, value),
-            },
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
-                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
-                    value,
-                    descr.type_precision(),
-                    descr.type_scale(),
-                )),
-                ConvertedType::NONE => Field::Bytes(value),
-                _ => nyi!(descr, value),
-            },
-            _ => nyi!(descr, value),
-        }
-    }
-
-    #[cfg(feature = "cli")]
-    pub fn to_json_value(&self) -> Value {
-        match &self {
-            Field::Null => Value::Null,
-            Field::Bool(b) => Value::Bool(*b),
-            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
-                .map(Value::Number)
-                .unwrap_or(Value::Null),
-            Field::Double(n) => serde_json::Number::from_f64(*n)
-                .map(Value::Number)
-                .unwrap_or(Value::Null),
-            Field::Decimal(n) => Value::String(convert_decimal_to_string(&n)),
-            Field::Str(s) => Value::String(s.to_owned()),
-            Field::Bytes(b) => Value::String(base64::encode(b.data())),
-            Field::Date(d) => Value::String(convert_date_to_string(*d)),
-            Field::TimestampMillis(ts) => {
-                Value::String(convert_timestamp_millis_to_string(*ts))
-            }
-            Field::TimestampMicros(ts) => {
-                Value::String(convert_timestamp_micros_to_string(*ts))
-            }
-            Field::Group(row) => row.to_json_value(),
-            Field::ListInternal(fields) => {
-                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
-            }
-            Field::MapInternal(map) => Value::Object(
-                map.entries
-                    .iter()
-                    .map(|(key_field, value_field)| {
-                        let key_val = key_field.to_json_value();
-                        let key_str = key_val
-                            .as_str()
-                            .map(|s| s.to_owned())
-                            .unwrap_or_else(|| key_val.to_string());
-                        (key_str, value_field.to_json_value())
-                    })
-                    .collect(),
-            ),
-        }
-    }
-}
-
-impl fmt::Display for Field {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Field::Null => write!(f, "null"),
-            Field::Bool(value) => write!(f, "{}", value),
-            Field::Byte(value) => write!(f, "{}", value),
-            Field::Short(value) => write!(f, "{}", value),
-            Field::Int(value) => write!(f, "{}", value),
-            Field::Long(value) => write!(f, "{}", value),
-            Field::UByte(value) => write!(f, "{}", value),
-            Field::UShort(value) => write!(f, "{}", value),
-            Field::UInt(value) => write!(f, "{}", value),
-            Field::ULong(value) => write!(f, "{}", value),
-            Field::Float(value) => {
-                if !(1e-15..=1e19).contains(&value) {
-                    write!(f, "{:E}", value)
-                } else {
-                    write!(f, "{:?}", value)
-                }
-            }
-            Field::Double(value) => {
-                if !(1e-15..=1e19).contains(&value) {
-                    write!(f, "{:E}", value)
-                } else {
-                    write!(f, "{:?}", value)
-                }
-            }
-            Field::Decimal(ref value) => {
-                write!(f, "{}", convert_decimal_to_string(value))
-            }
-            Field::Str(ref value) => write!(f, "\"{}\"", value),
-            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
-            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
-            Field::TimestampMillis(value) => {
-                write!(f, "{}", convert_timestamp_millis_to_string(value))
-            }
-            Field::TimestampMicros(value) => {
-                write!(f, "{}", convert_timestamp_micros_to_string(value))
-            }
-            Field::Group(ref fields) => write!(f, "{}", fields),
-            Field::ListInternal(ref list) => {
-                let elems = &list.elements;
-                write!(f, "[")?;
-                for (i, field) in elems.iter().enumerate() {
-                    field.fmt(f)?;
-                    if i < elems.len() - 1 {
-                        write!(f, ", ")?;
-                    }
-                }
-                write!(f, "]")
-            }
-            Field::MapInternal(ref map) => {
-                let entries = &map.entries;
-                write!(f, "{{")?;
-                for (i, &(ref key, ref value)) in entries.iter().enumerate() {
-                    key.fmt(f)?;
-                    write!(f, " -> ")?;
-                    value.fmt(f)?;
-                    if i < entries.len() - 1 {
-                        write!(f, ", ")?;
-                    }
-                }
-                write!(f, "}}")
-            }
-        }
-    }
-}
-
-/// Helper method to convert Parquet date into a string.
-/// Input `value` is a number of days since the epoch in UTC.
-/// Date is displayed in local timezone.
-#[inline]
-fn convert_date_to_string(value: u32) -> String {
-    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
-    let dt = Utc.timestamp(value as i64 * NUM_SECONDS_IN_DAY, 0).date();
-    format!("{}", dt.format("%Y-%m-%d %:z"))
-}
-
-/// Helper method to convert Parquet timestamp into a string.
-/// Input `value` is a number of milliseconds since the epoch in UTC.
-/// Datetime is displayed in local timezone.
-#[inline]
-fn convert_timestamp_millis_to_string(value: u64) -> String {
-    let dt = Utc.timestamp((value / 1000) as i64, 0);
-    format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
-}
-
-/// Helper method to convert Parquet timestamp into a string.
-/// Input `value` is a number of microseconds since the epoch in UTC.
-/// Datetime is displayed in local timezone.
-#[inline]
-fn convert_timestamp_micros_to_string(value: u64) -> String {
-    convert_timestamp_millis_to_string(value / 1000)
-}
-
-/// Helper method to convert Parquet decimal into a string.
-/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
-/// when constructing Parquet schema.
-#[inline]
-fn convert_decimal_to_string(decimal: &Decimal) -> String {
-    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
-
-    // Specify as signed bytes to resolve sign as part of conversion.
-    let num = BigInt::from_signed_bytes_be(decimal.data());
-
-    // Offset of the first digit in a string.
-    let negative = if num.sign() == Sign::Minus { 1 } else { 0 };
-    let mut num_str = num.to_string();
-    let mut point = num_str.len() as i32 - decimal.scale() - negative;
-
-    // Convert to string form without scientific notation.
-    if point <= 0 {
-        // Zeros need to be prepended to the unscaled value.
-        while point < 0 {
-            num_str.insert(negative as usize, '0');
-            point += 1;
-        }
-        num_str.insert_str(negative as usize, "0.");
-    } else {
-        // No zeroes need to be prepended to the unscaled value, simply insert decimal
-        // point.
-        num_str.insert((point + negative) as usize, '.');
-    }
-
-    num_str
-}
-
-#[cfg(test)]
-#[allow(clippy::approx_constant, clippy::many_single_char_names)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
-
-    /// Creates test column descriptor based on provided type parameters.
-    macro_rules! make_column_descr {
-        ($physical_type:expr, $logical_type:expr) => {{
-            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
-                .with_converted_type($logical_type)
-                .build()
-                .unwrap();
-            Arc::new(ColumnDescriptor::new(
-                Arc::new(tpe),
-                0,
-                0,
-                ColumnPath::from("col"),
-            ))
-        }};
-        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
-            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
-                .with_converted_type($logical_type)
-                .with_length($len)
-                .with_precision($prec)
-                .with_scale($scale)
-                .build()
-                .unwrap();
-            Arc::new(ColumnDescriptor::new(
-                Arc::new(tpe),
-                0,
-                0,
-                ColumnPath::from("col"),
-            ))
-        }};
-    }
-
-    #[test]
-    fn test_row_convert_bool() {
-        // BOOLEAN value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
-
-        let row = Field::convert_bool(&descr, true);
-        assert_eq!(row, Field::Bool(true));
-
-        let row = Field::convert_bool(&descr, false);
-        assert_eq!(row, Field::Bool(false));
-    }
-
-    #[test]
-    fn test_row_convert_int32() {
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
-        let row = Field::convert_int32(&descr, 111);
-        assert_eq!(row, Field::Byte(111));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
-        let row = Field::convert_int32(&descr, 222);
-        assert_eq!(row, Field::Short(222));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
-        let row = Field::convert_int32(&descr, 333);
-        assert_eq!(row, Field::Int(333));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
-        let row = Field::convert_int32(&descr, -1);
-        assert_eq!(row, Field::UByte(255));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
-        let row = Field::convert_int32(&descr, 256);
-        assert_eq!(row, Field::UShort(256));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
-        let row = Field::convert_int32(&descr, 1234);
-        assert_eq!(row, Field::UInt(1234));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
-        let row = Field::convert_int32(&descr, 444);
-        assert_eq!(row, Field::Int(444));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
-        let row = Field::convert_int32(&descr, 14611);
-        assert_eq!(row, Field::Date(14611));
-
-        let descr =
-            make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
-        let row = Field::convert_int32(&descr, 444);
-        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
-    }
-
-    #[test]
-    fn test_row_convert_int64() {
-        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
-        let row = Field::convert_int64(&descr, 1111);
-        assert_eq!(row, Field::Long(1111));
-
-        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
-        let row = Field::convert_int64(&descr, 78239823);
-        assert_eq!(row, Field::ULong(78239823));
-
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
-        let row = Field::convert_int64(&descr, 1541186529153);
-        assert_eq!(row, Field::TimestampMillis(1541186529153));
-
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
-        let row = Field::convert_int64(&descr, 1541186529153123);
-        assert_eq!(row, Field::TimestampMicros(1541186529153123));
-
-        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
-        let row = Field::convert_int64(&descr, 2222);
-        assert_eq!(row, Field::Long(2222));
-
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
-        let row = Field::convert_int64(&descr, 3333);
-        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
-    }
-
-    #[test]
-    fn test_row_convert_int96() {
-        // INT96 value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
-
-        let value = Int96::from(vec![0, 0, 2454923]);
-        let row = Field::convert_int96(&descr, value);
-        assert_eq!(row, Field::TimestampMillis(1238544000000));
-
-        let value = Int96::from(vec![4165425152, 13, 2454923]);
-        let row = Field::convert_int96(&descr, value);
-        assert_eq!(row, Field::TimestampMillis(1238544060000));
-    }
-
-    #[test]
-    fn test_row_convert_float() {
-        // FLOAT value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
-        let row = Field::convert_float(&descr, 2.31);
-        assert_eq!(row, Field::Float(2.31));
-    }
-
-    #[test]
-    fn test_row_convert_double() {
-        // DOUBLE value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
-        let row = Field::convert_double(&descr, 1.56);
-        assert_eq!(row, Field::Double(1.56));
-    }
-
-    #[test]
-    fn test_row_convert_byte_array() {
-        // UTF8
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
-        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
-        let row = Field::convert_byte_array(&descr, value);
-        assert_eq!(row, Field::Str("ABCD".to_string()));
-
-        // ENUM
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
-        let value = ByteArray::from(vec![b'1', b'2', b'3']);
-        let row = Field::convert_byte_array(&descr, value);
-        assert_eq!(row, Field::Str("123".to_string()));
-
-        // JSON
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
-        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
-        let row = Field::convert_byte_array(&descr, value);
-        assert_eq!(row, Field::Str("{\"a\":1}".to_string()));
-
-        // NONE
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
-        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Bytes(value));
-
-        // BSON
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
-        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Bytes(value));
-
-        // DECIMAL
-        let descr =
-            make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
-        let value = ByteArray::from(vec![207, 200]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 8, 2)));
-
-        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
-        let descr = make_column_descr![
-            PhysicalType::FIXED_LEN_BYTE_ARRAY,
-            ConvertedType::DECIMAL,
-            8,
-            17,
-            5
-        ];
-        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 17, 5)));
-
-        // NONE (FIXED_LEN_BYTE_ARRAY)
-        let descr = make_column_descr![
-            PhysicalType::FIXED_LEN_BYTE_ARRAY,
-            ConvertedType::NONE,
-            6,
-            0,
-            0
-        ];
-        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Bytes(value));
-    }
-
-    #[test]
-    fn test_convert_date_to_string() {
-        fn check_date_conversion(y: u32, m: u32, d: u32) {
-            let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(0, 0, 0);
-            let dt = Utc.from_utc_datetime(&datetime);
-            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as u32);
-            let exp = format!("{}", dt.format("%Y-%m-%d %:z"));
-            assert_eq!(res, exp);
-        }
-
-        check_date_conversion(2010, 1, 2);
-        check_date_conversion(2014, 5, 1);
-        check_date_conversion(2016, 2, 29);
-        check_date_conversion(2017, 9, 12);
-        check_date_conversion(2018, 3, 31);
-    }
-
-    #[test]
-    fn test_convert_timestamp_to_string() {
-        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
-            let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(h, mi, s);
-            let dt = Utc.from_utc_datetime(&datetime);
-            let res = convert_timestamp_millis_to_string(dt.timestamp_millis() as u64);
-            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
-            assert_eq!(res, exp);
-        }
-
-        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
-        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
-        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
-        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
-        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
-    }
-
-    #[test]
-    fn test_convert_float_to_string() {
-        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
-        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
-        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
-        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
-        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
-        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
-        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
-        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
-    }
-
-    #[test]
-    fn test_convert_double_to_string() {
-        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
-        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
-        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
-        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
-        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
-        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
-        assert_eq!(
-            format!("{}", Field::Double(1.79769313486E308)),
-            "1.79769313486E308"
-        );
-        assert_eq!(
-            format!("{}", Field::Double(-1.79769313486E308)),
-            "-1.79769313486E308"
-        );
-    }
-
-    #[test]
-    fn test_convert_decimal_to_string() {
-        // Helper method to compare decimal
-        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
-            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
-            assert_eq!(convert_decimal_to_string(&decimal), res);
-        }
-
-        // This example previously used to fail in some engines
-        check_decimal(
-            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
-            38,
-            18,
-            "1.000000000000000000",
-        );
-        check_decimal(
-            vec![
-                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
-            ],
-            36,
-            28,
-            "-12344.0242342304923409234234293432",
-        );
-        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
-        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
-        check_decimal(vec![207, 200], 10, 2, "-123.44");
-        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
-    }
-
-    #[test]
-    fn test_row_display() {
-        // Primitive types
-        assert_eq!(format!("{}", Field::Null), "null");
-        assert_eq!(format!("{}", Field::Bool(true)), "true");
-        assert_eq!(format!("{}", Field::Bool(false)), "false");
-        assert_eq!(format!("{}", Field::Byte(1)), "1");
-        assert_eq!(format!("{}", Field::Short(2)), "2");
-        assert_eq!(format!("{}", Field::Int(3)), "3");
-        assert_eq!(format!("{}", Field::Long(4)), "4");
-        assert_eq!(format!("{}", Field::UByte(1)), "1");
-        assert_eq!(format!("{}", Field::UShort(2)), "2");
-        assert_eq!(format!("{}", Field::UInt(3)), "3");
-        assert_eq!(format!("{}", Field::ULong(4)), "4");
-        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
-        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
-        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
-        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
-        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
-        assert_eq!(
-            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
-            "[1, 2, 3]"
-        );
-        assert_eq!(
-            format!("{}", Field::Date(14611)),
-            convert_date_to_string(14611)
-        );
-        assert_eq!(
-            format!("{}", Field::TimestampMillis(1262391174000)),
-            convert_timestamp_millis_to_string(1262391174000)
-        );
-        assert_eq!(
-            format!("{}", Field::TimestampMicros(1262391174000000)),
-            convert_timestamp_micros_to_string(1262391174000000)
-        );
-        assert_eq!(
-            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
-            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
-        );
-
-        // Complex types
-        let fields = vec![
-            ("x".to_string(), Field::Null),
-            ("Y".to_string(), Field::Int(2)),
-            ("z".to_string(), Field::Float(3.1)),
-            ("a".to_string(), Field::Str("abc".to_string())),
-        ];
-        let row = Field::Group(make_row(fields));
-        assert_eq!(format!("{}", row), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
-
-        let row = Field::ListInternal(make_list(vec![
-            Field::Int(2),
-            Field::Int(1),
-            Field::Null,
-            Field::Int(12),
-        ]));
-        assert_eq!(format!("{}", row), "[2, 1, null, 12]");
-
-        let row = Field::MapInternal(make_map(vec![
-            (Field::Int(1), Field::Float(1.2)),
-            (Field::Int(2), Field::Float(4.5)),
-            (Field::Int(3), Field::Float(2.3)),
-        ]));
-        assert_eq!(format!("{}", row), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
-    }
-
-    #[test]
-    fn test_is_primitive() {
-        // primitives
-        assert!(Field::Null.is_primitive());
-        assert!(Field::Bool(true).is_primitive());
-        assert!(Field::Bool(false).is_primitive());
-        assert!(Field::Byte(1).is_primitive());
-        assert!(Field::Short(2).is_primitive());
-        assert!(Field::Int(3).is_primitive());
-        assert!(Field::Long(4).is_primitive());
-        assert!(Field::UByte(1).is_primitive());
-        assert!(Field::UShort(2).is_primitive());
-        assert!(Field::UInt(3).is_primitive());
-        assert!(Field::ULong(4).is_primitive());
-        assert!(Field::Float(5.0).is_primitive());
-        assert!(Field::Float(5.1234).is_primitive());
-        assert!(Field::Double(6.0).is_primitive());
-        assert!(Field::Double(6.1234).is_primitive());
-        assert!(Field::Str("abc".to_string()).is_primitive());
-        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
-        assert!(Field::TimestampMillis(12345678).is_primitive());
-        assert!(Field::TimestampMicros(12345678901).is_primitive());
-        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
-
-        // complex types
-        assert_eq!(
-            false,
-            Field::Group(make_row(vec![
-                ("x".to_string(), Field::Null),
-                ("Y".to_string(), Field::Int(2)),
-                ("z".to_string(), Field::Float(3.1)),
-                ("a".to_string(), Field::Str("abc".to_string()))
-            ]))
-            .is_primitive()
-        );
-
-        assert_eq!(
-            false,
-            Field::ListInternal(make_list(vec![
-                Field::Int(2),
-                Field::Int(1),
-                Field::Null,
-                Field::Int(12)
-            ]))
-            .is_primitive()
-        );
-
-        assert_eq!(
-            false,
-            Field::MapInternal(make_map(vec![
-                (Field::Int(1), Field::Float(1.2)),
-                (Field::Int(2), Field::Float(4.5)),
-                (Field::Int(3), Field::Float(2.3))
-            ]))
-            .is_primitive()
-        );
-    }
-
-    #[test]
-    fn test_row_primitive_field_fmt() {
-        // Primitives types
-        let row = make_row(vec![
-            ("00".to_string(), Field::Null),
-            ("01".to_string(), Field::Bool(false)),
-            ("02".to_string(), Field::Byte(3)),
-            ("03".to_string(), Field::Short(4)),
-            ("04".to_string(), Field::Int(5)),
-            ("05".to_string(), Field::Long(6)),
-            ("06".to_string(), Field::UByte(7)),
-            ("07".to_string(), Field::UShort(8)),
-            ("08".to_string(), Field::UInt(9)),
-            ("09".to_string(), Field::ULong(10)),
-            ("10".to_string(), Field::Float(11.1)),
-            ("11".to_string(), Field::Double(12.1)),
-            ("12".to_string(), Field::Str("abc".to_string())),
-            (
-                "13".to_string(),
-                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
-            ),
-            ("14".to_string(), Field::Date(14611)),
-            ("15".to_string(), Field::TimestampMillis(1262391174000)),
-            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
-            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
-        ]);
-
-        assert_eq!("null", format!("{}", row.fmt(0)));
-        assert_eq!("false", format!("{}", row.fmt(1)));
-        assert_eq!("3", format!("{}", row.fmt(2)));
-        assert_eq!("4", format!("{}", row.fmt(3)));
-        assert_eq!("5", format!("{}", row.fmt(4)));
-        assert_eq!("6", format!("{}", row.fmt(5)));
-        assert_eq!("7", format!("{}", row.fmt(6)));
-        assert_eq!("8", format!("{}", row.fmt(7)));
-        assert_eq!("9", format!("{}", row.fmt(8)));
-        assert_eq!("10", format!("{}", row.fmt(9)));
-        assert_eq!("11.1", format!("{}", row.fmt(10)));
-        assert_eq!("12.1", format!("{}", row.fmt(11)));
-        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
-        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
-        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
-        assert_eq!(
-            convert_timestamp_millis_to_string(1262391174000),
-            format!("{}", row.fmt(15))
-        );
-        assert_eq!(
-            convert_timestamp_micros_to_string(1262391174000000),
-            format!("{}", row.fmt(16))
-        );
-        assert_eq!("0.04", format!("{}", row.fmt(17)));
-    }
-
-    #[test]
-    fn test_row_complex_field_fmt() {
-        // Complex types
-        let row = make_row(vec![
-            (
-                "00".to_string(),
-                Field::Group(make_row(vec![
-                    ("x".to_string(), Field::Null),
-                    ("Y".to_string(), Field::Int(2)),
-                ])),
-            ),
-            (
-                "01".to_string(),
-                Field::ListInternal(make_list(vec![
-                    Field::Int(2),
-                    Field::Int(1),
-                    Field::Null,
-                    Field::Int(12),
-                ])),
-            ),
-            (
-                "02".to_string(),
-                Field::MapInternal(make_map(vec![
-                    (Field::Int(1), Field::Float(1.2)),
-                    (Field::Int(2), Field::Float(4.5)),
-                    (Field::Int(3), Field::Float(2.3)),
-                ])),
-            ),
-        ]);
-
-        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
-        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
-        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
-    }
-
-    #[test]
-    fn test_row_primitive_accessors() {
-        // primitives
-        let row = make_row(vec![
-            ("a".to_string(), Field::Null),
-            ("b".to_string(), Field::Bool(false)),
-            ("c".to_string(), Field::Byte(3)),
-            ("d".to_string(), Field::Short(4)),
-            ("e".to_string(), Field::Int(5)),
-            ("f".to_string(), Field::Long(6)),
-            ("g".to_string(), Field::UByte(3)),
-            ("h".to_string(), Field::UShort(4)),
-            ("i".to_string(), Field::UInt(5)),
-            ("j".to_string(), Field::ULong(6)),
-            ("k".to_string(), Field::Float(7.1)),
-            ("l".to_string(), Field::Double(8.1)),
-            ("m".to_string(), Field::Str("abc".to_string())),
-            (
-                "n".to_string(),
-                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
-            ),
-            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
-        ]);
-
-        assert_eq!(false, row.get_bool(1).unwrap());
-        assert_eq!(3, row.get_byte(2).unwrap());
-        assert_eq!(4, row.get_short(3).unwrap());
-        assert_eq!(5, row.get_int(4).unwrap());
-        assert_eq!(6, row.get_long(5).unwrap());
-        assert_eq!(3, row.get_ubyte(6).unwrap());
-        assert_eq!(4, row.get_ushort(7).unwrap());
-        assert_eq!(5, row.get_uint(8).unwrap());
-        assert_eq!(6, row.get_ulong(9).unwrap());
-        assert!(7.1 - row.get_float(10).unwrap() < f32::EPSILON);
-        assert!(8.1 - row.get_double(11).unwrap() < f64::EPSILON);
-        assert_eq!("abc", row.get_string(12).unwrap());
-        assert_eq!(5, row.get_bytes(13).unwrap().len());
-        assert_eq!(7, row.get_decimal(14).unwrap().precision());
-    }
-
-    #[test]
-    fn test_row_primitive_invalid_accessors() {
-        // primitives
-        let row = make_row(vec![
-            ("a".to_string(), Field::Null),
-            ("b".to_string(), Field::Bool(false)),
-            ("c".to_string(), Field::Byte(3)),
-            ("d".to_string(), Field::Short(4)),
-            ("e".to_string(), Field::Int(5)),
-            ("f".to_string(), Field::Long(6)),
-            ("g".to_string(), Field::UByte(3)),
-            ("h".to_string(), Field::UShort(4)),
-            ("i".to_string(), Field::UInt(5)),
-            ("j".to_string(), Field::ULong(6)),
-            ("k".to_string(), Field::Float(7.1)),
-            ("l".to_string(), Field::Double(8.1)),
-            ("m".to_string(), Field::Str("abc".to_string())),
-            (
-                "n".to_string(),
-                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
-            ),
-            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
-        ]);
-
-        for i in 0..row.len() {
-            assert!(row.get_group(i).is_err());
-        }
-    }
-
-    #[test]
-    fn test_row_complex_accessors() {
-        let row = make_row(vec![
-            (
-                "a".to_string(),
-                Field::Group(make_row(vec![
-                    ("x".to_string(), Field::Null),
-                    ("Y".to_string(), Field::Int(2)),
-                ])),
-            ),
-            (
-                "b".to_string(),
-                Field::ListInternal(make_list(vec![
-                    Field::Int(2),
-                    Field::Int(1),
-                    Field::Null,
-                    Field::Int(12),
-                ])),
-            ),
-            (
-                "c".to_string(),
-                Field::MapInternal(make_map(vec![
-                    (Field::Int(1), Field::Float(1.2)),
-                    (Field::Int(2), Field::Float(4.5)),
-                    (Field::Int(3), Field::Float(2.3)),
-                ])),
-            ),
-        ]);
-
-        assert_eq!(2, row.get_group(0).unwrap().len());
-        assert_eq!(4, row.get_list(1).unwrap().len());
-        assert_eq!(3, row.get_map(2).unwrap().len());
-    }
-
-    #[test]
-    fn test_row_complex_invalid_accessors() {
-        let row = make_row(vec![
-            (
-                "a".to_string(),
-                Field::Group(make_row(vec![
-                    ("x".to_string(), Field::Null),
-                    ("Y".to_string(), Field::Int(2)),
-                ])),
-            ),
-            (
-                "b".to_string(),
-                Field::ListInternal(make_list(vec![
-                    Field::Int(2),
-                    Field::Int(1),
-                    Field::Null,
-                    Field::Int(12),
-                ])),
-            ),
-            (
-                "c".to_string(),
-                Field::MapInternal(make_map(vec![
-                    (Field::Int(1), Field::Float(1.2)),
-                    (Field::Int(2), Field::Float(4.5)),
-                    (Field::Int(3), Field::Float(2.3)),
-                ])),
-            ),
-        ]);
-
-        assert_eq!(
-            ParquetError::General("Cannot access Group as Float".to_string()),
-            row.get_float(0).unwrap_err()
-        );
-        assert_eq!(
-            ParquetError::General("Cannot access ListInternal as Float".to_string()),
-            row.get_float(1).unwrap_err()
-        );
-        assert_eq!(
-            ParquetError::General("Cannot access MapInternal as Float".to_string()),
-            row.get_float(2).unwrap_err()
-        );
-    }
-
-    #[test]
-    fn test_list_primitive_accessors() {
-        // primitives
-        let list = make_list(vec![Field::Bool(false)]);
-        assert_eq!(false, list.get_bool(0).unwrap());
-
-        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
-        assert_eq!(4, list.get_byte(1).unwrap());
-
-        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
-        assert_eq!(6, list.get_short(2).unwrap());
-
-        let list = make_list(vec![Field::Int(5)]);
-        assert_eq!(5, list.get_int(0).unwrap());
-
-        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
-        assert_eq!(7, list.get_long(1).unwrap());
-
-        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
-        assert_eq!(4, list.get_ubyte(1).unwrap());
-
-        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
-        assert_eq!(6, list.get_ushort(2).unwrap());
-
-        let list = make_list(vec![Field::UInt(5)]);
-        assert_eq!(5, list.get_uint(0).unwrap());
-
-        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
-        assert_eq!(7, list.get_ulong(1).unwrap());
-
-        let list = make_list(vec![
-            Field::Float(8.1),
-            Field::Float(9.2),
-            Field::Float(10.3),
-        ]);
-        assert!(10.3 - list.get_float(2).unwrap() < f32::EPSILON);
-
-        let list = make_list(vec![Field::Double(3.1415)]);
-        assert!(3.1415 - list.get_double(0).unwrap() < f64::EPSILON);
-
-        let list = make_list(vec![Field::Str("abc".to_string())]);
-        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
-
-        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
-        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
-
-        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
-        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
-    }
-
-    #[test]
-    fn test_list_primitive_invalid_accessors() {
-        // primitives
-        let list = make_list(vec![Field::Bool(false)]);
-        assert!(list.get_byte(0).is_err());
-
-        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
-        assert!(list.get_short(1).is_err());
-
-        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
-        assert!(list.get_int(2).is_err());
-
-        let list = make_list(vec![Field::Int(5)]);
-        assert!(list.get_long(0).is_err());
-
-        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
-        assert!(list.get_float(1).is_err());
-
-        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
-        assert!(list.get_short(1).is_err());
-
-        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
-        assert!(list.get_int(2).is_err());
-
-        let list = make_list(vec![Field::UInt(5)]);
-        assert!(list.get_long(0).is_err());
-
-        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
-        assert!(list.get_float(1).is_err());
-
-        let list = make_list(vec![
-            Field::Float(8.1),
-            Field::Float(9.2),
-            Field::Float(10.3),
-        ]);
-        assert!(list.get_double(2).is_err());
-
-        let list = make_list(vec![Field::Double(3.1415)]);
-        assert!(list.get_string(0).is_err());
-
-        let list = make_list(vec![Field::Str("abc".to_string())]);
-        assert!(list.get_bytes(0).is_err());
-
-        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
-        assert!(list.get_bool(0).is_err());
-
-        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
-        assert!(list.get_bool(0).is_err());
-    }
-
-    #[test]
-    fn test_list_complex_accessors() {
-        let list = make_list(vec![Field::Group(make_row(vec![
-            ("x".to_string(), Field::Null),
-            ("Y".to_string(), Field::Int(2)),
-        ]))]);
-        assert_eq!(2, list.get_group(0).unwrap().len());
-
-        let list = make_list(vec![Field::ListInternal(make_list(vec![
-            Field::Int(2),
-            Field::Int(1),
-            Field::Null,
-            Field::Int(12),
-        ]))]);
-        assert_eq!(4, list.get_list(0).unwrap().len());
-
-        let list = make_list(vec![Field::MapInternal(make_map(vec![
-            (Field::Int(1), Field::Float(1.2)),
-            (Field::Int(2), Field::Float(4.5)),
-            (Field::Int(3), Field::Float(2.3)),
-        ]))]);
-        assert_eq!(3, list.get_map(0).unwrap().len());
-    }
-
-    #[test]
-    fn test_list_complex_invalid_accessors() {
-        let list = make_list(vec![Field::Group(make_row(vec![
-            ("x".to_string(), Field::Null),
-            ("Y".to_string(), Field::Int(2)),
-        ]))]);
-        assert_eq!(
-            general_err!("Cannot access Group as Float".to_string()),
-            list.get_float(0).unwrap_err()
-        );
-
-        let list = make_list(vec![Field::ListInternal(make_list(vec![
-            Field::Int(2),
-            Field::Int(1),
-            Field::Null,
-            Field::Int(12),
-        ]))]);
-        assert_eq!(
-            general_err!("Cannot access ListInternal as Float".to_string()),
-            list.get_float(0).unwrap_err()
-        );
-
-        let list = make_list(vec![Field::MapInternal(make_map(vec![
-            (Field::Int(1), Field::Float(1.2)),
-            (Field::Int(2), Field::Float(4.5)),
-            (Field::Int(3), Field::Float(2.3)),
-        ]))]);
-        assert_eq!(
-            general_err!("Cannot access MapInternal as Float".to_string()),
-            list.get_float(0).unwrap_err()
-        );
-    }
-
-    #[test]
-    fn test_map_accessors() {
-        // a map from int to string
-        let map = make_map(vec![
-            (Field::Int(1), Field::Str("a".to_string())),
-            (Field::Int(2), Field::Str("b".to_string())),
-            (Field::Int(3), Field::Str("c".to_string())),
-            (Field::Int(4), Field::Str("d".to_string())),
-            (Field::Int(5), Field::Str("e".to_string())),
-        ]);
-
-        assert_eq!(5, map.len());
-        for i in 0..5 {
-            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
-            assert_eq!(
-                &((i as u8 + b'a') as char).to_string(),
-                map.get_values().get_string(i).unwrap()
-            );
-        }
-    }
-
-    #[test]
-    #[cfg(feature = "cli")]
-    fn test_to_json_value() {
-        assert_eq!(Field::Null.to_json_value(), Value::Null);
-        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
-        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
-        assert_eq!(
-            Field::Byte(1).to_json_value(),
-            Value::Number(serde_json::Number::from(1))
-        );
-        assert_eq!(
-            Field::Short(2).to_json_value(),
-            Value::Number(serde_json::Number::from(2))
-        );
-        assert_eq!(
-            Field::Int(3).to_json_value(),
-            Value::Number(serde_json::Number::from(3))
-        );
-        assert_eq!(
-            Field::Long(4).to_json_value(),
-            Value::Number(serde_json::Number::from(4))
-        );
-        assert_eq!(
-            Field::UByte(1).to_json_value(),
-            Value::Number(serde_json::Number::from(1))
-        );
-        assert_eq!(
-            Field::UShort(2).to_json_value(),
-            Value::Number(serde_json::Number::from(2))
-        );
-        assert_eq!(
-            Field::UInt(3).to_json_value(),
-            Value::Number(serde_json::Number::from(3))
-        );
-        assert_eq!(
-            Field::ULong(4).to_json_value(),
-            Value::Number(serde_json::Number::from(4))
-        );
-        assert_eq!(
-            Field::Float(5.0).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(f64::from(5.0 as f32)).unwrap())
-        );
-        assert_eq!(
-            Field::Float(5.1234).to_json_value(),
-            Value::Number(
-                serde_json::Number::from_f64(f64::from(5.1234 as f32)).unwrap()
-            )
-        );
-        assert_eq!(
-            Field::Double(6.0).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(6.0 as f64).unwrap())
-        );
-        assert_eq!(
-            Field::Double(6.1234).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(6.1234 as f64).unwrap())
-        );
-        assert_eq!(
-            Field::Str("abc".to_string()).to_json_value(),
-            Value::String(String::from("abc"))
-        );
-        assert_eq!(
-            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
-            Value::String(String::from("0.04"))
-        );
-        assert_eq!(
-            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
-            Value::String(String::from("AQID"))
-        );
-        assert_eq!(
-            Field::TimestampMillis(12345678).to_json_value(),
-            Value::String("1970-01-01 03:25:45 +00:00".to_string())
-        );
-        assert_eq!(
-            Field::TimestampMicros(12345678901).to_json_value(),
-            Value::String(convert_timestamp_micros_to_string(12345678901))
-        );
-
-        let fields = vec![
-            ("X".to_string(), Field::Int(1)),
-            ("Y".to_string(), Field::Double(2.2)),
-            ("Z".to_string(), Field::Str("abc".to_string())),
-        ];
-        let row = Field::Group(make_row(fields));
-        assert_eq!(
-            row.to_json_value(),
-            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
-        );
-
-        let row = Field::ListInternal(make_list(vec![
-            Field::Int(1),
-            Field::Int(12),
-            Field::Null,
-        ]));
-        let array = vec![
-            Value::Number(serde_json::Number::from(1)),
-            Value::Number(serde_json::Number::from(12)),
-            Value::Null,
-        ];
-        assert_eq!(row.to_json_value(), Value::Array(array));
-
-        let row = Field::MapInternal(make_map(vec![
-            (Field::Str("k1".to_string()), Field::Double(1.2)),
-            (Field::Str("k2".to_string()), Field::Double(3.4)),
-            (Field::Str("k3".to_string()), Field::Double(4.5)),
-        ]));
-        assert_eq!(
-            row.to_json_value(),
-            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
-        );
-    }
-}
-
-#[cfg(test)]
-#[allow(clippy::approx_constant, clippy::many_single_char_names)]
-mod api_tests {
-    use super::{make_list, make_map, make_row};
-    use crate::record::Field;
-
-    #[test]
-    fn test_field_visibility() {
-        let row = make_row(vec![(
-            "a".to_string(),
-            Field::Group(make_row(vec![
-                ("x".to_string(), Field::Null),
-                ("Y".to_string(), Field::Int(2)),
-            ])),
-        )]);
-
-        match row.get_column_iter().next() {
-            Some(column) => {
-                assert_eq!("a", column.0);
-                match column.1 {
-                    Field::Group(r) => {
-                        assert_eq!(
-                            &make_row(vec![
-                                ("x".to_string(), Field::Null),
-                                ("Y".to_string(), Field::Int(2)),
-                            ]),
-                            r
-                        );
-                    }
-                    _ => panic!("Expected the first column to be Field::Group"),
-                }
-            }
-            None => panic!("Expected at least one column"),
-        }
-    }
-
-    #[test]
-    fn test_list_element_access() {
-        let expected = vec![
-            Field::Int(1),
-            Field::Group(make_row(vec![
-                ("x".to_string(), Field::Null),
-                ("Y".to_string(), Field::Int(2)),
-            ])),
-        ];
-
-        let list = make_list(expected.clone());
-        assert_eq!(expected.as_slice(), list.elements());
-    }
-
-    #[test]
-    fn test_map_entry_access() {
-        let expected = vec![
-            (Field::Str("one".to_owned()), Field::Int(1)),
-            (Field::Str("two".to_owned()), Field::Int(2)),
-        ];
-
-        let map = make_map(expected.clone());
-        assert_eq!(expected.as_slice(), map.entries());
-    }
-}
diff --git a/rust/parquet/src/record/mod.rs b/rust/parquet/src/record/mod.rs
deleted file mode 100644
index fb4abb539d9..00000000000
--- a/rust/parquet/src/record/mod.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains record-based API for reading Parquet files.
-
-mod api;
-pub mod reader;
-mod record_writer;
-mod triplet;
-
-pub use self::{
-    api::{Field, List, ListAccessor, Map, MapAccessor, Row, RowAccessor},
-    record_writer::RecordWriter,
-};
diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs
deleted file mode 100644
index 691afe8c203..00000000000
--- a/rust/parquet/src/record/reader.rs
+++ /dev/null
@@ -1,1667 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains implementation of record assembly and converting Parquet types into
-//! [`Row`](crate::record::Row)s.
-
-use std::{collections::HashMap, fmt, sync::Arc};
-
-use crate::basic::{ConvertedType, Repetition};
-use crate::errors::{ParquetError, Result};
-use crate::file::reader::{FileReader, RowGroupReader};
-use crate::record::{
-    api::{make_list, make_map, make_row, Field, Row},
-    triplet::TripletIter,
-};
-use crate::schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type, TypePtr};
-
-/// Default batch size for a reader
-const DEFAULT_BATCH_SIZE: usize = 1024;
-
-/// Tree builder for `Reader` enum.
-/// Serves as a container of options for building a reader tree and a builder, and
-/// accessing a records iterator [`RowIter`].
-pub struct TreeBuilder {
-    // Batch size (>= 1) for triplet iterators
-    batch_size: usize,
-}
-
-impl TreeBuilder {
-    /// Creates new tree builder with default parameters.
-    pub fn new() -> Self {
-        Self {
-            batch_size: DEFAULT_BATCH_SIZE,
-        }
-    }
-
-    /// Sets batch size for this tree builder.
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    /// Creates new root reader for provided schema and row group.
-    pub fn build(
-        &self,
-        descr: SchemaDescPtr,
-        row_group_reader: &dyn RowGroupReader,
-    ) -> Reader {
-        // Prepare lookup table of column path -> original column index
-        // This allows to prune columns and map schema leaf nodes to the column readers
-        let mut paths: HashMap<ColumnPath, usize> = HashMap::new();
-        let row_group_metadata = row_group_reader.metadata();
-
-        for col_index in 0..row_group_reader.num_columns() {
-            let col_meta = row_group_metadata.column(col_index);
-            let col_path = col_meta.column_path().clone();
-            paths.insert(col_path, col_index);
-        }
-
-        // Build child readers for the message type
-        let mut readers = Vec::new();
-        let mut path = Vec::new();
-
-        for field in descr.root_schema().get_fields() {
-            let reader = self.reader_tree(
-                field.clone(),
-                &mut path,
-                0,
-                0,
-                &paths,
-                row_group_reader,
-            );
-            readers.push(reader);
-        }
-
-        // Return group reader for message type,
-        // it is always required with definition level 0
-        Reader::GroupReader(None, 0, readers)
-    }
-
-    /// Creates iterator of `Row`s directly from schema descriptor and row group.
-    pub fn as_iter(
-        &self,
-        descr: SchemaDescPtr,
-        row_group_reader: &dyn RowGroupReader,
-    ) -> ReaderIter {
-        let num_records = row_group_reader.metadata().num_rows() as usize;
-        ReaderIter::new(self.build(descr, row_group_reader), num_records)
-    }
-
-    /// Builds tree of readers for the current schema recursively.
-    fn reader_tree(
-        &self,
-        field: TypePtr,
-        mut path: &mut Vec<String>,
-        mut curr_def_level: i16,
-        mut curr_rep_level: i16,
-        paths: &HashMap<ColumnPath, usize>,
-        row_group_reader: &dyn RowGroupReader,
-    ) -> Reader {
-        assert!(field.get_basic_info().has_repetition());
-        // Update current definition and repetition levels for this type
-        let repetition = field.get_basic_info().repetition();
-        match repetition {
-            Repetition::OPTIONAL => {
-                curr_def_level += 1;
-            }
-            Repetition::REPEATED => {
-                curr_def_level += 1;
-                curr_rep_level += 1;
-            }
-            _ => {}
-        }
-
-        path.push(String::from(field.name()));
-        let reader = if field.is_primitive() {
-            let col_path = ColumnPath::new(path.to_vec());
-            let orig_index = *paths.get(&col_path).unwrap();
-            let col_descr = row_group_reader
-                .metadata()
-                .column(orig_index)
-                .column_descr_ptr();
-            let col_reader = row_group_reader.get_column_reader(orig_index).unwrap();
-            let column = TripletIter::new(col_descr, col_reader, self.batch_size);
-            Reader::PrimitiveReader(field, column)
-        } else {
-            match field.get_basic_info().converted_type() {
-                // List types
-                ConvertedType::LIST => {
-                    assert_eq!(
-                        field.get_fields().len(),
-                        1,
-                        "Invalid list type {:?}",
-                        field
-                    );
-
-                    let repeated_field = field.get_fields()[0].clone();
-                    assert_eq!(
-                        repeated_field.get_basic_info().repetition(),
-                        Repetition::REPEATED,
-                        "Invalid list type {:?}",
-                        field
-                    );
-
-                    if Reader::is_element_type(&repeated_field) {
-                        // Support for backward compatible lists
-                        let reader = self.reader_tree(
-                            repeated_field,
-                            &mut path,
-                            curr_def_level,
-                            curr_rep_level,
-                            paths,
-                            row_group_reader,
-                        );
-
-                        Reader::RepeatedReader(
-                            field,
-                            curr_def_level,
-                            curr_rep_level,
-                            Box::new(reader),
-                        )
-                    } else {
-                        let child_field = repeated_field.get_fields()[0].clone();
-
-                        path.push(String::from(repeated_field.name()));
-
-                        let reader = self.reader_tree(
-                            child_field,
-                            &mut path,
-                            curr_def_level + 1,
-                            curr_rep_level + 1,
-                            paths,
-                            row_group_reader,
-                        );
-
-                        path.pop();
-
-                        Reader::RepeatedReader(
-                            field,
-                            curr_def_level,
-                            curr_rep_level,
-                            Box::new(reader),
-                        )
-                    }
-                }
-                // Map types (key-value pairs)
-                ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
-                    assert_eq!(
-                        field.get_fields().len(),
-                        1,
-                        "Invalid map type: {:?}",
-                        field
-                    );
-                    assert!(
-                        !field.get_fields()[0].is_primitive(),
-                        "Invalid map type: {:?}",
-                        field
-                    );
-
-                    let key_value_type = field.get_fields()[0].clone();
-                    assert_eq!(
-                        key_value_type.get_basic_info().repetition(),
-                        Repetition::REPEATED,
-                        "Invalid map type: {:?}",
-                        field
-                    );
-                    assert_eq!(
-                        key_value_type.get_fields().len(),
-                        2,
-                        "Invalid map type: {:?}",
-                        field
-                    );
-
-                    path.push(String::from(key_value_type.name()));
-
-                    let key_type = &key_value_type.get_fields()[0];
-                    assert!(
-                        key_type.is_primitive(),
-                        "Map key type is expected to be a primitive type, but found {:?}",
-                        key_type
-                    );
-                    let key_reader = self.reader_tree(
-                        key_type.clone(),
-                        &mut path,
-                        curr_def_level + 1,
-                        curr_rep_level + 1,
-                        paths,
-                        row_group_reader,
-                    );
-
-                    let value_type = &key_value_type.get_fields()[1];
-                    let value_reader = self.reader_tree(
-                        value_type.clone(),
-                        &mut path,
-                        curr_def_level + 1,
-                        curr_rep_level + 1,
-                        paths,
-                        row_group_reader,
-                    );
-
-                    path.pop();
-
-                    Reader::KeyValueReader(
-                        field,
-                        curr_def_level,
-                        curr_rep_level,
-                        Box::new(key_reader),
-                        Box::new(value_reader),
-                    )
-                }
-                // A repeated field that is neither contained by a `LIST`- or
-                // `MAP`-annotated group nor annotated by `LIST` or `MAP`
-                // should be interpreted as a required list of required
-                // elements where the element type is the type of the field.
-                _ if repetition == Repetition::REPEATED => {
-                    let required_field = Type::group_type_builder(field.name())
-                        .with_repetition(Repetition::REQUIRED)
-                        .with_converted_type(field.get_basic_info().converted_type())
-                        .with_fields(&mut Vec::from(field.get_fields()))
-                        .build()
-                        .unwrap();
-
-                    path.pop();
-
-                    let reader = self.reader_tree(
-                        Arc::new(required_field),
-                        &mut path,
-                        curr_def_level,
-                        curr_rep_level,
-                        paths,
-                        row_group_reader,
-                    );
-
-                    Reader::RepeatedReader(
-                        field,
-                        curr_def_level - 1,
-                        curr_rep_level - 1,
-                        Box::new(reader),
-                    )
-                }
-                // Group types (structs)
-                _ => {
-                    let mut readers = Vec::new();
-                    for child in field.get_fields() {
-                        let reader = self.reader_tree(
-                            child.clone(),
-                            &mut path,
-                            curr_def_level,
-                            curr_rep_level,
-                            paths,
-                            row_group_reader,
-                        );
-                        readers.push(reader);
-                    }
-                    Reader::GroupReader(Some(field), curr_def_level, readers)
-                }
-            }
-        };
-        path.pop();
-
-        Reader::option(repetition, curr_def_level, reader)
-    }
-}
-
-/// Reader tree for record assembly
-pub enum Reader {
-    // Primitive reader with type information and triplet iterator
-    PrimitiveReader(TypePtr, TripletIter),
-    // Optional reader with definition level of a parent and a reader
-    OptionReader(i16, Box<Reader>),
-    // Group (struct) reader with type information, definition level and list of child
-    // readers. When it represents message type, type information is None
-    GroupReader(Option<TypePtr>, i16, Vec<Reader>),
-    // Reader for repeated values, e.g. lists, contains type information, definition
-    // level, repetition level and a child reader
-    RepeatedReader(TypePtr, i16, i16, Box<Reader>),
-    // Reader of key-value pairs, e.g. maps, contains type information, definition
-    // level, repetition level, child reader for keys and child reader for values
-    KeyValueReader(TypePtr, i16, i16, Box<Reader>, Box<Reader>),
-}
-
-impl Reader {
-    /// Wraps reader in option reader based on repetition.
-    fn option(repetition: Repetition, def_level: i16, reader: Reader) -> Self {
-        if repetition == Repetition::OPTIONAL {
-            Reader::OptionReader(def_level - 1, Box::new(reader))
-        } else {
-            reader
-        }
-    }
-
-    /// Returns true if repeated type is an element type for the list.
-    /// Used to determine legacy list types.
-    /// This method is copied from Spark Parquet reader and is based on the reference:
-    /// <https://github.com/apache/parquet-format/blob/master/LogicalTypes.md>
-    ///   #backward-compatibility-rules
-    fn is_element_type(repeated_type: &Type) -> bool {
-        // For legacy 2-level list types with primitive element type, e.g.:
-        //
-        //    // ARRAY<INT> (nullable list, non-null elements)
-        //    optional group my_list (LIST) {
-        //      repeated int32 element;
-        //    }
-        //
-        repeated_type.is_primitive() ||
-    // For legacy 2-level list types whose element type is a group type with 2 or more
-    // fields, e.g.:
-    //
-    //    // ARRAY<STRUCT<str: STRING, num: INT>> (nullable list, non-null elements)
-    //    optional group my_list (LIST) {
-    //      repeated group element {
-    //        required binary str (UTF8);
-    //        required int32 num;
-    //      };
-    //    }
-    //
-    repeated_type.is_group() && repeated_type.get_fields().len() > 1 ||
-    // For legacy 2-level list types generated by parquet-avro (Parquet version < 1.6.0),
-    // e.g.:
-    //
-    //    // ARRAY<STRUCT<str: STRING>> (nullable list, non-null elements)
-    //    optional group my_list (LIST) {
-    //      repeated group array {
-    //        required binary str (UTF8);
-    //      };
-    //    }
-    //
-    repeated_type.name() == "array" ||
-    // For Parquet data generated by parquet-thrift, e.g.:
-    //
-    //    // ARRAY<STRUCT<str: STRING>> (nullable list, non-null elements)
-    //    optional group my_list (LIST) {
-    //      repeated group my_list_tuple {
-    //        required binary str (UTF8);
-    //      };
-    //    }
-    //
-    repeated_type.name().ends_with("_tuple")
-    }
-
-    /// Reads current record as `Row` from the reader tree.
-    /// Automatically advances all necessary readers.
-    /// This must be called on the root level reader (i.e., for Message type).
-    /// Otherwise, it will panic.
-    fn read(&mut self) -> Row {
-        match *self {
-            Reader::GroupReader(_, _, ref mut readers) => {
-                let mut fields = Vec::new();
-                for reader in readers {
-                    fields.push((String::from(reader.field_name()), reader.read_field()));
-                }
-                make_row(fields)
-            }
-            _ => panic!("Cannot call read() on {}", self),
-        }
-    }
-
-    /// Reads current record as `Field` from the reader tree.
-    /// Automatically advances all necessary readers.
-    fn read_field(&mut self) -> Field {
-        match *self {
-            Reader::PrimitiveReader(_, ref mut column) => {
-                let value = column.current_value();
-                column.read_next().unwrap();
-                value
-            }
-            Reader::OptionReader(def_level, ref mut reader) => {
-                if reader.current_def_level() > def_level {
-                    reader.read_field()
-                } else {
-                    reader.advance_columns();
-                    Field::Null
-                }
-            }
-            Reader::GroupReader(_, def_level, ref mut readers) => {
-                let mut fields = Vec::new();
-                for reader in readers {
-                    if reader.repetition() != Repetition::OPTIONAL
-                        || reader.current_def_level() > def_level
-                    {
-                        fields.push((
-                            String::from(reader.field_name()),
-                            reader.read_field(),
-                        ));
-                    } else {
-                        reader.advance_columns();
-                        fields.push((String::from(reader.field_name()), Field::Null));
-                    }
-                }
-                let row = make_row(fields);
-                Field::Group(row)
-            }
-            Reader::RepeatedReader(_, def_level, rep_level, ref mut reader) => {
-                let mut elements = Vec::new();
-                loop {
-                    if reader.current_def_level() > def_level {
-                        elements.push(reader.read_field());
-                    } else {
-                        reader.advance_columns();
-                        // If the current definition level is equal to the definition
-                        // level of this repeated type, then the
-                        // result is an empty list and the repetition level
-                        // will always be <= rl.
-                        break;
-                    }
-
-                    // This covers case when we are out of repetition levels and should
-                    // close the group, or there are no values left to
-                    // buffer.
-                    if !reader.has_next() || reader.current_rep_level() <= rep_level {
-                        break;
-                    }
-                }
-                Field::ListInternal(make_list(elements))
-            }
-            Reader::KeyValueReader(
-                _,
-                def_level,
-                rep_level,
-                ref mut keys,
-                ref mut values,
-            ) => {
-                let mut pairs = Vec::new();
-                loop {
-                    if keys.current_def_level() > def_level {
-                        pairs.push((keys.read_field(), values.read_field()));
-                    } else {
-                        keys.advance_columns();
-                        values.advance_columns();
-                        // If the current definition level is equal to the definition
-                        // level of this repeated type, then the
-                        // result is an empty list and the repetition level
-                        // will always be <= rl.
-                        break;
-                    }
-
-                    // This covers case when we are out of repetition levels and should
-                    // close the group, or there are no values left to
-                    // buffer.
-                    if !keys.has_next() || keys.current_rep_level() <= rep_level {
-                        break;
-                    }
-                }
-
-                Field::MapInternal(make_map(pairs))
-            }
-        }
-    }
-
-    /// Returns field name for the current reader.
-    fn field_name(&self) -> &str {
-        match *self {
-            Reader::PrimitiveReader(ref field, _) => field.name(),
-            Reader::OptionReader(_, ref reader) => reader.field_name(),
-            Reader::GroupReader(ref opt, ..) => match opt {
-                Some(ref field) => field.name(),
-                None => panic!("Field is None for group reader"),
-            },
-            Reader::RepeatedReader(ref field, ..) => field.name(),
-            Reader::KeyValueReader(ref field, ..) => field.name(),
-        }
-    }
-
-    /// Returns repetition for the current reader.
-    fn repetition(&self) -> Repetition {
-        match *self {
-            Reader::PrimitiveReader(ref field, _) => field.get_basic_info().repetition(),
-            Reader::OptionReader(_, ref reader) => reader.repetition(),
-            Reader::GroupReader(ref opt, ..) => match opt {
-                Some(ref field) => field.get_basic_info().repetition(),
-                None => panic!("Field is None for group reader"),
-            },
-            Reader::RepeatedReader(ref field, ..) => field.get_basic_info().repetition(),
-            Reader::KeyValueReader(ref field, ..) => field.get_basic_info().repetition(),
-        }
-    }
-
-    /// Returns true, if current reader has more values, false otherwise.
-    /// Method does not advance internal iterator.
-    fn has_next(&self) -> bool {
-        match *self {
-            Reader::PrimitiveReader(_, ref column) => column.has_next(),
-            Reader::OptionReader(_, ref reader) => reader.has_next(),
-            Reader::GroupReader(_, _, ref readers) => readers.first().unwrap().has_next(),
-            Reader::RepeatedReader(_, _, _, ref reader) => reader.has_next(),
-            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.has_next(),
-        }
-    }
-
-    /// Returns current definition level,
-    /// Method does not advance internal iterator.
-    fn current_def_level(&self) -> i16 {
-        match *self {
-            Reader::PrimitiveReader(_, ref column) => column.current_def_level(),
-            Reader::OptionReader(_, ref reader) => reader.current_def_level(),
-            Reader::GroupReader(_, _, ref readers) => match readers.first() {
-                Some(reader) => reader.current_def_level(),
-                None => panic!("Current definition level: empty group reader"),
-            },
-            Reader::RepeatedReader(_, _, _, ref reader) => reader.current_def_level(),
-            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.current_def_level(),
-        }
-    }
-
-    /// Returns current repetition level.
-    /// Method does not advance internal iterator.
-    fn current_rep_level(&self) -> i16 {
-        match *self {
-            Reader::PrimitiveReader(_, ref column) => column.current_rep_level(),
-            Reader::OptionReader(_, ref reader) => reader.current_rep_level(),
-            Reader::GroupReader(_, _, ref readers) => match readers.first() {
-                Some(reader) => reader.current_rep_level(),
-                None => panic!("Current repetition level: empty group reader"),
-            },
-            Reader::RepeatedReader(_, _, _, ref reader) => reader.current_rep_level(),
-            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.current_rep_level(),
-        }
-    }
-
-    /// Advances leaf columns for the current reader.
-    fn advance_columns(&mut self) {
-        match *self {
-            Reader::PrimitiveReader(_, ref mut column) => {
-                column.read_next().unwrap();
-            }
-            Reader::OptionReader(_, ref mut reader) => {
-                reader.advance_columns();
-            }
-            Reader::GroupReader(_, _, ref mut readers) => {
-                for reader in readers {
-                    reader.advance_columns();
-                }
-            }
-            Reader::RepeatedReader(_, _, _, ref mut reader) => {
-                reader.advance_columns();
-            }
-            Reader::KeyValueReader(_, _, _, ref mut keys, ref mut values) => {
-                keys.advance_columns();
-                values.advance_columns();
-            }
-        }
-    }
-}
-
-impl fmt::Display for Reader {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let s = match self {
-            Reader::PrimitiveReader(..) => "PrimitiveReader",
-            Reader::OptionReader(..) => "OptionReader",
-            Reader::GroupReader(..) => "GroupReader",
-            Reader::RepeatedReader(..) => "RepeatedReader",
-            Reader::KeyValueReader(..) => "KeyValueReader",
-        };
-        write!(f, "{}", s)
-    }
-}
-
-// ----------------------------------------------------------------------
-// Row iterators
-
-/// The enum Either with variants That represet a reference and a box of
-/// [`FileReader`](crate::file::reader::FileReader).
-enum Either<'a> {
-    Left(&'a dyn FileReader),
-    Right(Box<dyn FileReader>),
-}
-
-impl<'a> Either<'a> {
-    fn reader(&self) -> &dyn FileReader {
-        match *self {
-            Either::Left(r) => r,
-            Either::Right(ref r) => &**r,
-        }
-    }
-}
-
-/// Iterator of [`Row`](crate::record::Row)s.
-/// It is used either for a single row group to iterate over data in that row group, or
-/// an entire file with auto buffering of all row groups.
-pub struct RowIter<'a> {
-    descr: SchemaDescPtr,
-    tree_builder: TreeBuilder,
-    file_reader: Option<Either<'a>>,
-    current_row_group: usize,
-    num_row_groups: usize,
-    row_iter: Option<ReaderIter>,
-}
-
-impl<'a> RowIter<'a> {
-    /// Creates a new iterator of [`Row`](crate::record::Row)s.
-    fn new(
-        file_reader: Option<Either<'a>>,
-        row_iter: Option<ReaderIter>,
-        descr: SchemaDescPtr,
-    ) -> Self {
-        let tree_builder = Self::tree_builder();
-        let num_row_groups = match file_reader {
-            Some(ref r) => r.reader().num_row_groups(),
-            None => 0,
-        };
-
-        Self {
-            descr,
-            file_reader,
-            tree_builder,
-            num_row_groups,
-            row_iter,
-            current_row_group: 0,
-        }
-    }
-
-    /// Creates iterator of [`Row`](crate::record::Row)s for all row groups in a
-    /// file.
-    pub fn from_file(proj: Option<Type>, reader: &'a dyn FileReader) -> Result<Self> {
-        let either = Either::Left(reader);
-        let descr = Self::get_proj_descr(
-            proj,
-            reader.metadata().file_metadata().schema_descr_ptr(),
-        )?;
-
-        Ok(Self::new(Some(either), None, descr))
-    }
-
-    /// Creates iterator of [`Row`](crate::record::Row)s for a specific row group.
-    pub fn from_row_group(
-        proj: Option<Type>,
-        reader: &'a dyn RowGroupReader,
-    ) -> Result<Self> {
-        let descr = Self::get_proj_descr(proj, reader.metadata().schema_descr_ptr())?;
-        let tree_builder = Self::tree_builder();
-        let row_iter = tree_builder.as_iter(descr.clone(), reader);
-
-        // For row group we need to set `current_row_group` >= `num_row_groups`, because
-        // we only have one row group and can't buffer more.
-        Ok(Self::new(None, Some(row_iter), descr))
-    }
-
-    /// Creates a iterator of [`Row`](crate::record::Row)s from a
-    /// [`FileReader`](crate::file::reader::FileReader) using the full file schema.
-    pub fn from_file_into(reader: Box<dyn FileReader>) -> Self {
-        let either = Either::Right(reader);
-        let descr = either
-            .reader()
-            .metadata()
-            .file_metadata()
-            .schema_descr_ptr();
-
-        Self::new(Some(either), None, descr)
-    }
-
-    /// Tries to create a iterator of [`Row`](crate::record::Row)s using projections.
-    /// Returns a error if a file reader is not the source of this iterator.
-    ///
-    /// The Projected schema can be a subset of or equal to the file schema,
-    /// when it is None, full file schema is assumed.
-    pub fn project(self, proj: Option<Type>) -> Result<Self> {
-        match self.file_reader {
-            Some(ref either) => {
-                let schema = either
-                    .reader()
-                    .metadata()
-                    .file_metadata()
-                    .schema_descr_ptr();
-                let descr = Self::get_proj_descr(proj, schema)?;
-
-                Ok(Self::new(self.file_reader, None, descr))
-            }
-            None => Err(general_err!("File reader is required to use projections")),
-        }
-    }
-
-    /// Helper method to get schema descriptor for projected schema.
-    /// If projection is None, then full schema is returned.
-    #[inline]
-    fn get_proj_descr(
-        proj: Option<Type>,
-        root_descr: SchemaDescPtr,
-    ) -> Result<SchemaDescPtr> {
-        match proj {
-            Some(projection) => {
-                // check if projection is part of file schema
-                let root_schema = root_descr.root_schema();
-                if !root_schema.check_contains(&projection) {
-                    return Err(general_err!("Root schema does not contain projection"));
-                }
-                Ok(Arc::new(SchemaDescriptor::new(Arc::new(projection))))
-            }
-            None => Ok(root_descr),
-        }
-    }
-
-    /// Returns common tree builder, so the same settings are applied to both iterators
-    /// from file reader and row group.
-    #[inline]
-    fn tree_builder() -> TreeBuilder {
-        TreeBuilder::new()
-    }
-}
-
-impl<'a> Iterator for RowIter<'a> {
-    type Item = Row;
-
-    fn next(&mut self) -> Option<Row> {
-        let mut row = None;
-        if let Some(ref mut iter) = self.row_iter {
-            row = iter.next();
-        }
-
-        while row.is_none() && self.current_row_group < self.num_row_groups {
-            // We do not expect any failures when accessing a row group, and file reader
-            // must be set for selecting next row group.
-            if let Some(ref either) = self.file_reader {
-                let file_reader = either.reader();
-                let row_group_reader = &*file_reader
-                    .get_row_group(self.current_row_group)
-                    .expect("Row group is required to advance");
-
-                let mut iter = self
-                    .tree_builder
-                    .as_iter(self.descr.clone(), row_group_reader);
-
-                row = iter.next();
-
-                self.current_row_group += 1;
-                self.row_iter = Some(iter);
-            }
-        }
-
-        row
-    }
-}
-
-/// Internal iterator of [`Row`](crate::record::Row)s for a reader.
-pub struct ReaderIter {
-    root_reader: Reader,
-    records_left: usize,
-}
-
-impl ReaderIter {
-    fn new(mut root_reader: Reader, num_records: usize) -> Self {
-        // Prepare root reader by advancing all column vectors
-        root_reader.advance_columns();
-        Self {
-            root_reader,
-            records_left: num_records,
-        }
-    }
-}
-
-impl Iterator for ReaderIter {
-    type Item = Row;
-
-    fn next(&mut self) -> Option<Row> {
-        if self.records_left > 0 {
-            self.records_left -= 1;
-            Some(self.root_reader.read())
-        } else {
-            None
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::errors::{ParquetError, Result};
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::record::api::{Field, Row, RowAccessor, RowFormatter};
-    use crate::schema::parser::parse_message_type;
-    use crate::util::test_common::{get_test_file, get_test_path};
-    use std::convert::TryFrom;
-
-    // Convenient macros to assemble row, list, map, and group.
-
-    macro_rules! row {
-        () => {
-            {
-                let result = Vec::new();
-                make_row(result)
-            }
-        };
-        ( $( $e:expr ), + ) => {
-            {
-                let mut result = Vec::new();
-                $(
-                    result.push($e);
-                )*
-                    make_row(result)
-            }
-        }
-    }
-
-    macro_rules! list {
-        () => {
-            {
-                let result = Vec::new();
-                Field::ListInternal(make_list(result))
-            }
-        };
-        ( $( $e:expr ), + ) => {
-            {
-                let mut result = Vec::new();
-                $(
-                    result.push($e);
-                )*
-                    Field::ListInternal(make_list(result))
-            }
-        }
-    }
-
-    macro_rules! map {
-        () => {
-            {
-                let result = Vec::new();
-                Field::MapInternal(make_map(result))
-            }
-        };
-        ( $( $e:expr ), + ) => {
-            {
-                let mut result = Vec::new();
-                $(
-                    result.push($e);
-                )*
-                    Field::MapInternal(make_map(result))
-            }
-        }
-    }
-
-    macro_rules! group {
-        ( $( $e:expr ), * ) => {
-            {
-                Field::Group(row!($( $e ), *))
-            }
-        }
-    }
-
-    #[test]
-    fn test_file_reader_rows_nulls() {
-        let rows = test_file_reader_rows("nulls.snappy.parquet", None).unwrap();
-        let expected_rows = vec![
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_nonnullable() {
-        let rows = test_file_reader_rows("nonnullable.impala.parquet", None).unwrap();
-        let expected_rows = vec![row![
-            ("ID".to_string(), Field::Long(8)),
-            ("Int_Array".to_string(), list![Field::Int(-1)]),
-            (
-                "int_array_array".to_string(),
-                list![list![Field::Int(-1), Field::Int(-2)], list![]]
-            ),
-            (
-                "Int_Map".to_string(),
-                map![(Field::Str("k1".to_string()), Field::Int(-1))]
-            ),
-            (
-                "int_map_array".to_string(),
-                list![
-                    map![],
-                    map![(Field::Str("k1".to_string()), Field::Int(1))],
-                    map![],
-                    map![]
-                ]
-            ),
-            (
-                "nested_Struct".to_string(),
-                group![
-                    ("a".to_string(), Field::Int(-1)),
-                    ("B".to_string(), list![Field::Int(-1)]),
-                    (
-                        "c".to_string(),
-                        group![(
-                            "D".to_string(),
-                            list![list![group![
-                                ("e".to_string(), Field::Int(-1)),
-                                ("f".to_string(), Field::Str("nonnullable".to_string()))
-                            ]]]
-                        )]
-                    ),
-                    ("G".to_string(), map![])
-                ]
-            )
-        ]];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_nullable() {
-        let rows = test_file_reader_rows("nullable.impala.parquet", None).unwrap();
-        let expected_rows = vec![
-            row![
-                ("id".to_string(), Field::Long(1)),
-                (
-                    "int_array".to_string(),
-                    list![Field::Int(1), Field::Int(2), Field::Int(3)]
-                ),
-                (
-                    "int_array_Array".to_string(),
-                    list![
-                        list![Field::Int(1), Field::Int(2)],
-                        list![Field::Int(3), Field::Int(4)]
-                    ]
-                ),
-                (
-                    "int_map".to_string(),
-                    map![
-                        (Field::Str("k1".to_string()), Field::Int(1)),
-                        (Field::Str("k2".to_string()), Field::Int(100))
-                    ]
-                ),
-                (
-                    "int_Map_Array".to_string(),
-                    list![map![(Field::Str("k1".to_string()), Field::Int(1))]]
-                ),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Int(1)),
-                        ("b".to_string(), list![Field::Int(1)]),
-                        (
-                            "C".to_string(),
-                            group![(
-                                "d".to_string(),
-                                list![
-                                    list![
-                                        group![
-                                            ("E".to_string(), Field::Int(10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("aaa".to_string())
-                                            )
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Int(-10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("bbb".to_string())
-                                            )
-                                        ]
-                                    ],
-                                    list![group![
-                                        ("E".to_string(), Field::Int(11)),
-                                        ("F".to_string(), Field::Str("c".to_string()))
-                                    ]]
-                                ]
-                            )]
-                        ),
-                        (
-                            "g".to_string(),
-                            map![(
-                                Field::Str("foo".to_string()),
-                                group![(
-                                    "H".to_string(),
-                                    group![("i".to_string(), list![Field::Double(1.1)])]
-                                )]
-                            )]
-                        )
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(2)),
-                (
-                    "int_array".to_string(),
-                    list![
-                        Field::Null,
-                        Field::Int(1),
-                        Field::Int(2),
-                        Field::Null,
-                        Field::Int(3),
-                        Field::Null
-                    ]
-                ),
-                (
-                    "int_array_Array".to_string(),
-                    list![
-                        list![Field::Null, Field::Int(1), Field::Int(2), Field::Null],
-                        list![Field::Int(3), Field::Null, Field::Int(4)],
-                        list![],
-                        Field::Null
-                    ]
-                ),
-                (
-                    "int_map".to_string(),
-                    map![
-                        (Field::Str("k1".to_string()), Field::Int(2)),
-                        (Field::Str("k2".to_string()), Field::Null)
-                    ]
-                ),
-                (
-                    "int_Map_Array".to_string(),
-                    list![
-                        map![
-                            (Field::Str("k3".to_string()), Field::Null),
-                            (Field::Str("k1".to_string()), Field::Int(1))
-                        ],
-                        Field::Null,
-                        map![]
-                    ]
-                ),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), list![Field::Null]),
-                        (
-                            "C".to_string(),
-                            group![(
-                                "d".to_string(),
-                                list![
-                                    list![
-                                        group![
-                                            ("E".to_string(), Field::Null),
-                                            ("F".to_string(), Field::Null)
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Int(10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("aaa".to_string())
-                                            )
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Null),
-                                            ("F".to_string(), Field::Null)
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Int(-10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("bbb".to_string())
-                                            )
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Null),
-                                            ("F".to_string(), Field::Null)
-                                        ]
-                                    ],
-                                    list![
-                                        group![
-                                            ("E".to_string(), Field::Int(11)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("c".to_string())
-                                            )
-                                        ],
-                                        Field::Null
-                                    ],
-                                    list![],
-                                    Field::Null
-                                ]
-                            )]
-                        ),
-                        (
-                            "g".to_string(),
-                            map![
-                                (
-                                    Field::Str("g1".to_string()),
-                                    group![(
-                                        "H".to_string(),
-                                        group![(
-                                            "i".to_string(),
-                                            list![Field::Double(2.2), Field::Null]
-                                        )]
-                                    )]
-                                ),
-                                (
-                                    Field::Str("g2".to_string()),
-                                    group![(
-                                        "H".to_string(),
-                                        group![("i".to_string(), list![])]
-                                    )]
-                                ),
-                                (Field::Str("g3".to_string()), Field::Null),
-                                (
-                                    Field::Str("g4".to_string()),
-                                    group![(
-                                        "H".to_string(),
-                                        group![("i".to_string(), Field::Null)]
-                                    )]
-                                ),
-                                (
-                                    Field::Str("g5".to_string()),
-                                    group![("H".to_string(), Field::Null)]
-                                )
-                            ]
-                        )
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(3)),
-                ("int_array".to_string(), list![]),
-                ("int_array_Array".to_string(), list![Field::Null]),
-                ("int_map".to_string(), map![]),
-                ("int_Map_Array".to_string(), list![Field::Null, Field::Null]),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), Field::Null),
-                        ("C".to_string(), group![("d".to_string(), list![])]),
-                        ("g".to_string(), map![])
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(4)),
-                ("int_array".to_string(), Field::Null),
-                ("int_array_Array".to_string(), list![]),
-                ("int_map".to_string(), map![]),
-                ("int_Map_Array".to_string(), list![]),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), Field::Null),
-                        ("C".to_string(), group![("d".to_string(), Field::Null)]),
-                        ("g".to_string(), Field::Null)
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(5)),
-                ("int_array".to_string(), Field::Null),
-                ("int_array_Array".to_string(), Field::Null),
-                ("int_map".to_string(), map![]),
-                ("int_Map_Array".to_string(), Field::Null),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), Field::Null),
-                        ("C".to_string(), Field::Null),
-                        (
-                            "g".to_string(),
-                            map![(
-                                Field::Str("foo".to_string()),
-                                group![(
-                                    "H".to_string(),
-                                    group![(
-                                        "i".to_string(),
-                                        list![Field::Double(2.2), Field::Double(3.3)]
-                                    )]
-                                )]
-                            )]
-                        )
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(6)),
-                ("int_array".to_string(), Field::Null),
-                ("int_array_Array".to_string(), Field::Null),
-                ("int_map".to_string(), Field::Null),
-                ("int_Map_Array".to_string(), Field::Null),
-                ("nested_struct".to_string(), Field::Null)
-            ],
-            row![
-                ("id".to_string(), Field::Long(7)),
-                ("int_array".to_string(), Field::Null),
-                (
-                    "int_array_Array".to_string(),
-                    list![Field::Null, list![Field::Int(5), Field::Int(6)]]
-                ),
-                (
-                    "int_map".to_string(),
-                    map![
-                        (Field::Str("k1".to_string()), Field::Null),
-                        (Field::Str("k3".to_string()), Field::Null)
-                    ]
-                ),
-                ("int_Map_Array".to_string(), Field::Null),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Int(7)),
-                        (
-                            "b".to_string(),
-                            list![Field::Int(2), Field::Int(3), Field::Null]
-                        ),
-                        (
-                            "C".to_string(),
-                            group![(
-                                "d".to_string(),
-                                list![list![], list![Field::Null], Field::Null]
-                            )]
-                        ),
-                        ("g".to_string(), Field::Null)
-                    ]
-                )
-            ],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_projection() {
-        let schema = "
-      message spark_schema {
-        REQUIRED DOUBLE c;
-        REQUIRED INT32 b;
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
-        let expected_rows = vec![
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_iter_columns_in_row() {
-        let r = row![
-            ("c".to_string(), Field::Double(1.0)),
-            ("b".to_string(), Field::Int(1))
-        ];
-        let mut result = Vec::new();
-        for (name, record) in r.get_column_iter() {
-            result.push((name, record));
-        }
-        assert_eq!(
-            vec![
-                (&"c".to_string(), &Field::Double(1.0)),
-                (&"b".to_string(), &Field::Int(1))
-            ],
-            result
-        );
-    }
-
-    #[test]
-    fn test_file_reader_rows_projection_map() {
-        let schema = "
-      message spark_schema {
-        OPTIONAL group a (MAP) {
-          REPEATED group key_value {
-            REQUIRED BYTE_ARRAY key (UTF8);
-            OPTIONAL group value (MAP) {
-              REPEATED group key_value {
-                REQUIRED INT32 key;
-                REQUIRED BOOLEAN value;
-              }
-            }
-          }
-        }
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
-        let expected_rows = vec![
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("a".to_string()),
-                    map![
-                        (Field::Int(1), Field::Bool(true)),
-                        (Field::Int(2), Field::Bool(false))
-                    ]
-                )]
-            )],
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("b".to_string()),
-                    map![(Field::Int(1), Field::Bool(true))]
-                )]
-            )],
-            row![(
-                "a".to_string(),
-                map![(Field::Str("c".to_string()), Field::Null)]
-            )],
-            row![("a".to_string(), map![(Field::Str("d".to_string()), map![])])],
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("e".to_string()),
-                    map![(Field::Int(1), Field::Bool(true))]
-                )]
-            )],
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("f".to_string()),
-                    map![
-                        (Field::Int(3), Field::Bool(true)),
-                        (Field::Int(4), Field::Bool(false)),
-                        (Field::Int(5), Field::Bool(true))
-                    ]
-                )]
-            )],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_projection_list() {
-        let schema = "
-      message spark_schema {
-        OPTIONAL group a (LIST) {
-          REPEATED group list {
-            OPTIONAL group element (LIST) {
-              REPEATED group list {
-                OPTIONAL group element (LIST) {
-                  REPEATED group list {
-                    OPTIONAL BYTE_ARRAY element (UTF8);
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_lists.snappy.parquet", Some(schema)).unwrap();
-        let expected_rows = vec![
-            row![(
-                "a".to_string(),
-                list![
-                    list![
-                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
-                        list![Field::Str("c".to_string())]
-                    ],
-                    list![Field::Null, list![Field::Str("d".to_string())]]
-                ]
-            )],
-            row![(
-                "a".to_string(),
-                list![
-                    list![
-                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
-                        list![Field::Str("c".to_string()), Field::Str("d".to_string())]
-                    ],
-                    list![Field::Null, list![Field::Str("e".to_string())]]
-                ]
-            )],
-            row![(
-                "a".to_string(),
-                list![
-                    list![
-                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
-                        list![Field::Str("c".to_string()), Field::Str("d".to_string())],
-                        list![Field::Str("e".to_string())]
-                    ],
-                    list![Field::Null, list![Field::Str("f".to_string())]]
-                ]
-            )],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_invalid_projection() {
-        let schema = "
-      message spark_schema {
-        REQUIRED INT32 key;
-        REQUIRED BOOLEAN value;
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let res = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema));
-        assert!(res.is_err());
-        assert_eq!(
-            res.unwrap_err(),
-            general_err!("Root schema does not contain projection")
-        );
-    }
-
-    #[test]
-    fn test_row_group_rows_invalid_projection() {
-        let schema = "
-      message spark_schema {
-        REQUIRED INT32 key;
-        REQUIRED BOOLEAN value;
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let res = test_row_group_rows("nested_maps.snappy.parquet", Some(schema));
-        assert!(res.is_err());
-        assert_eq!(
-            res.unwrap_err(),
-            general_err!("Root schema does not contain projection")
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Invalid map type")]
-    fn test_file_reader_rows_invalid_map_type() {
-        let schema = "
-      message spark_schema {
-        OPTIONAL group a (MAP) {
-          REPEATED group key_value {
-            REQUIRED BYTE_ARRAY key (UTF8);
-            OPTIONAL group value (MAP) {
-              REPEATED group key_value {
-                REQUIRED INT32 key;
-              }
-            }
-          }
-        }
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
-    }
-
-    #[test]
-    fn test_file_reader_iter() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let vec = vec![path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| RowIter::from_file_into(Box::new(r)))
-            .flat_map(|r| r.get_int(0))
-            .collect::<Vec<_>>();
-
-        assert_eq!(vec, vec![4, 5, 6, 7, 2, 3, 0, 1]);
-    }
-
-    #[test]
-    fn test_file_reader_iter_projection() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let values = vec![path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| {
-                let schema = "message schema { OPTIONAL INT32 id; }";
-                let proj = parse_message_type(&schema).ok();
-
-                RowIter::from_file_into(Box::new(r)).project(proj).unwrap()
-            })
-            .map(|r| format!("id:{}", r.fmt(0)))
-            .collect::<Vec<_>>()
-            .join(", ");
-
-        assert_eq!(values, "id:4, id:5, id:6, id:7, id:2, id:3, id:0, id:1");
-    }
-
-    #[test]
-    fn test_file_reader_iter_projection_err() {
-        let schema = "
-      message spark_schema {
-        REQUIRED INT32 key;
-        REQUIRED BOOLEAN value;
-      }
-    ";
-        let proj = parse_message_type(&schema).ok();
-        let path = get_test_path("nested_maps.snappy.parquet");
-        let reader = SerializedFileReader::try_from(path.as_path()).unwrap();
-        let res = RowIter::from_file_into(Box::new(reader)).project(proj);
-
-        assert!(res.is_err());
-        assert_eq!(
-            res.err().unwrap(),
-            general_err!("Root schema does not contain projection")
-        );
-    }
-
-    #[test]
-    fn test_tree_reader_handle_repeated_fields_with_no_annotation() {
-        // Array field `phoneNumbers` does not contain LIST annotation.
-        // We parse it as struct with `phone` repeated field as array.
-        let rows = test_file_reader_rows("repeated_no_annotation.parquet", None).unwrap();
-        let expected_rows = vec![
-            row![
-                ("id".to_string(), Field::Int(1)),
-                ("phoneNumbers".to_string(), Field::Null)
-            ],
-            row![
-                ("id".to_string(), Field::Int(2)),
-                ("phoneNumbers".to_string(), Field::Null)
-            ],
-            row![
-                ("id".to_string(), Field::Int(3)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![("phone".to_string(), list![])]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Int(4)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![(
-                        "phone".to_string(),
-                        list![group![
-                            ("number".to_string(), Field::Long(5555555555)),
-                            ("kind".to_string(), Field::Null)
-                        ]]
-                    )]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Int(5)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![(
-                        "phone".to_string(),
-                        list![group![
-                            ("number".to_string(), Field::Long(1111111111)),
-                            ("kind".to_string(), Field::Str("home".to_string()))
-                        ]]
-                    )]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Int(6)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![(
-                        "phone".to_string(),
-                        list![
-                            group![
-                                ("number".to_string(), Field::Long(1111111111)),
-                                ("kind".to_string(), Field::Str("home".to_string()))
-                            ],
-                            group![
-                                ("number".to_string(), Field::Long(2222222222)),
-                                ("kind".to_string(), Field::Null)
-                            ],
-                            group![
-                                ("number".to_string(), Field::Long(3333333333)),
-                                ("kind".to_string(), Field::Str("mobile".to_string()))
-                            ]
-                        ]
-                    )]
-                )
-            ],
-        ];
-
-        assert_eq!(rows, expected_rows);
-    }
-
-    fn test_file_reader_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
-        let file = get_test_file(file_name);
-        let file_reader: Box<dyn FileReader> = Box::new(SerializedFileReader::new(file)?);
-        let iter = file_reader.get_row_iter(schema)?;
-        Ok(iter.collect())
-    }
-
-    fn test_row_group_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
-        let file = get_test_file(file_name);
-        let file_reader: Box<dyn FileReader> = Box::new(SerializedFileReader::new(file)?);
-        // Check the first row group only, because files will contain only single row
-        // group
-        let row_group_reader = file_reader.get_row_group(0).unwrap();
-        let iter = row_group_reader.get_row_iter(schema)?;
-        Ok(iter.collect())
-    }
-}
diff --git a/rust/parquet/src/record/record_writer.rs b/rust/parquet/src/record/record_writer.rs
deleted file mode 100644
index 56817eb2eca..00000000000
--- a/rust/parquet/src/record/record_writer.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::super::errors::ParquetError;
-use super::super::file::writer::RowGroupWriter;
-
-pub trait RecordWriter<T> {
-    fn write_to_row_group(
-        &self,
-        row_group_writer: &mut Box<dyn RowGroupWriter>,
-    ) -> Result<(), ParquetError>;
-}
diff --git a/rust/parquet/src/record/triplet.rs b/rust/parquet/src/record/triplet.rs
deleted file mode 100644
index bb4f942fd18..00000000000
--- a/rust/parquet/src/record/triplet.rs
+++ /dev/null
@@ -1,561 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::basic::Type as PhysicalType;
-use crate::column::reader::{get_typed_column_reader, ColumnReader, ColumnReaderImpl};
-use crate::data_type::*;
-use crate::errors::{ParquetError, Result};
-use crate::record::api::Field;
-use crate::schema::types::ColumnDescPtr;
-
-/// Macro to generate simple functions that cover all types of triplet iterator.
-/// $func is a function of a typed triplet iterator and $token is a either {`ref`} or
-/// {`ref`, `mut`}
-macro_rules! triplet_enum_func {
-  ($self:ident, $func:ident, $( $token:tt ),*) => ({
-    match *$self {
-      TripletIter::BoolTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::Int32TripletIter($($token)* typed) => typed.$func(),
-      TripletIter::Int64TripletIter($($token)* typed) => typed.$func(),
-      TripletIter::Int96TripletIter($($token)* typed) => typed.$func(),
-      TripletIter::FloatTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::DoubleTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::ByteArrayTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::FixedLenByteArrayTripletIter($($token)* typed) => typed.$func()
-    }
-  });
-}
-
-/// High level API wrapper on column reader.
-/// Provides per-element access for each primitive column.
-pub enum TripletIter {
-    BoolTripletIter(TypedTripletIter<BoolType>),
-    Int32TripletIter(TypedTripletIter<Int32Type>),
-    Int64TripletIter(TypedTripletIter<Int64Type>),
-    Int96TripletIter(TypedTripletIter<Int96Type>),
-    FloatTripletIter(TypedTripletIter<FloatType>),
-    DoubleTripletIter(TypedTripletIter<DoubleType>),
-    ByteArrayTripletIter(TypedTripletIter<ByteArrayType>),
-    FixedLenByteArrayTripletIter(TypedTripletIter<FixedLenByteArrayType>),
-}
-
-impl TripletIter {
-    /// Creates new triplet for column reader
-    pub fn new(descr: ColumnDescPtr, reader: ColumnReader, batch_size: usize) -> Self {
-        match descr.physical_type() {
-            PhysicalType::BOOLEAN => TripletIter::BoolTripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT32 => TripletIter::Int32TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT64 => TripletIter::Int64TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT96 => TripletIter::Int96TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::FLOAT => TripletIter::FloatTripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::DOUBLE => TripletIter::DoubleTripletIter(
-                TypedTripletIter::new(descr, batch_size, reader),
-            ),
-            PhysicalType::BYTE_ARRAY => TripletIter::ByteArrayTripletIter(
-                TypedTripletIter::new(descr, batch_size, reader),
-            ),
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                TripletIter::FixedLenByteArrayTripletIter(TypedTripletIter::new(
-                    descr, batch_size, reader,
-                ))
-            }
-        }
-    }
-
-    /// Invokes underlying typed triplet iterator to buffer current value.
-    /// Should be called once - either before `is_null` or `current_value`.
-    #[inline]
-    pub fn read_next(&mut self) -> Result<bool> {
-        triplet_enum_func!(self, read_next, ref, mut)
-    }
-
-    /// Provides check on values/levels left without invoking the underlying typed triplet
-    /// iterator.
-    /// Returns true if more values/levels exist, false otherwise.
-    /// It is always in sync with `read_next` method.
-    #[inline]
-    pub fn has_next(&self) -> bool {
-        triplet_enum_func!(self, has_next, ref)
-    }
-
-    /// Returns current definition level for a leaf triplet iterator
-    #[inline]
-    pub fn current_def_level(&self) -> i16 {
-        triplet_enum_func!(self, current_def_level, ref)
-    }
-
-    /// Returns max definition level for a leaf triplet iterator
-    #[inline]
-    pub fn max_def_level(&self) -> i16 {
-        triplet_enum_func!(self, max_def_level, ref)
-    }
-
-    /// Returns current repetition level for a leaf triplet iterator
-    #[inline]
-    pub fn current_rep_level(&self) -> i16 {
-        triplet_enum_func!(self, current_rep_level, ref)
-    }
-
-    /// Returns max repetition level for a leaf triplet iterator
-    #[inline]
-    pub fn max_rep_level(&self) -> i16 {
-        triplet_enum_func!(self, max_rep_level, ref)
-    }
-
-    /// Returns true, if current value is null.
-    /// Based on the fact that for non-null value current definition level
-    /// equals to max definition level.
-    #[inline]
-    pub fn is_null(&self) -> bool {
-        self.current_def_level() < self.max_def_level()
-    }
-
-    /// Updates non-null value for current row.
-    pub fn current_value(&self) -> Field {
-        assert!(!self.is_null(), "Value is null");
-        match *self {
-            TripletIter::BoolTripletIter(ref typed) => {
-                Field::convert_bool(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::Int32TripletIter(ref typed) => {
-                Field::convert_int32(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::Int64TripletIter(ref typed) => {
-                Field::convert_int64(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::Int96TripletIter(ref typed) => {
-                Field::convert_int96(typed.column_descr(), typed.current_value().clone())
-            }
-            TripletIter::FloatTripletIter(ref typed) => {
-                Field::convert_float(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::DoubleTripletIter(ref typed) => {
-                Field::convert_double(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::ByteArrayTripletIter(ref typed) => Field::convert_byte_array(
-                typed.column_descr(),
-                typed.current_value().clone(),
-            ),
-            TripletIter::FixedLenByteArrayTripletIter(ref typed) => {
-                Field::convert_byte_array(
-                    typed.column_descr(),
-                    typed.current_value().clone().into(),
-                )
-            }
-        }
-    }
-}
-
-/// Internal typed triplet iterator as a wrapper for column reader
-/// (primitive leaf column), provides per-element access.
-pub struct TypedTripletIter<T: DataType> {
-    reader: ColumnReaderImpl<T>,
-    column_descr: ColumnDescPtr,
-    batch_size: usize,
-    // type properties
-    max_def_level: i16,
-    max_rep_level: i16,
-    // values and levels
-    values: Vec<T::T>,
-    def_levels: Option<Vec<i16>>,
-    rep_levels: Option<Vec<i16>>,
-    // current index for the triplet (value, def, rep)
-    curr_triplet_index: usize,
-    // how many triplets are left before we need to buffer
-    triplets_left: usize,
-    // helper flag to quickly check if we have more values/levels to read
-    has_next: bool,
-}
-
-impl<T: DataType> TypedTripletIter<T> {
-    /// Creates new typed triplet iterator based on provided column reader.
-    /// Use batch size to specify the amount of values to buffer from column reader.
-    fn new(descr: ColumnDescPtr, batch_size: usize, column_reader: ColumnReader) -> Self {
-        assert!(
-            batch_size > 0,
-            "Expected positive batch size, found: {}",
-            batch_size
-        );
-
-        let max_def_level = descr.max_def_level();
-        let max_rep_level = descr.max_rep_level();
-
-        let def_levels = if max_def_level == 0 {
-            None
-        } else {
-            Some(vec![0; batch_size])
-        };
-        let rep_levels = if max_rep_level == 0 {
-            None
-        } else {
-            Some(vec![0; batch_size])
-        };
-
-        Self {
-            reader: get_typed_column_reader(column_reader),
-            column_descr: descr,
-            batch_size,
-            max_def_level,
-            max_rep_level,
-            values: vec![T::T::default(); batch_size],
-            def_levels,
-            rep_levels,
-            curr_triplet_index: 0,
-            triplets_left: 0,
-            has_next: false,
-        }
-    }
-
-    /// Returns column descriptor reference for the current typed triplet iterator.
-    #[inline]
-    pub fn column_descr(&self) -> &ColumnDescPtr {
-        &self.column_descr
-    }
-
-    /// Returns maximum definition level for the triplet iterator (leaf column).
-    #[inline]
-    fn max_def_level(&self) -> i16 {
-        self.max_def_level
-    }
-
-    /// Returns maximum repetition level for the triplet iterator (leaf column).
-    #[inline]
-    fn max_rep_level(&self) -> i16 {
-        self.max_rep_level
-    }
-
-    /// Returns current value.
-    /// Method does not advance the iterator, therefore can be called multiple times.
-    #[inline]
-    fn current_value(&self) -> &T::T {
-        assert!(
-            self.current_def_level() == self.max_def_level(),
-            "Cannot extract value, max definition level: {}, current level: {}",
-            self.max_def_level(),
-            self.current_def_level()
-        );
-        &self.values[self.curr_triplet_index]
-    }
-
-    /// Returns current definition level.
-    /// If field is required, then maximum definition level is returned.
-    #[inline]
-    fn current_def_level(&self) -> i16 {
-        match self.def_levels {
-            Some(ref vec) => vec[self.curr_triplet_index],
-            None => self.max_def_level,
-        }
-    }
-
-    /// Returns current repetition level.
-    /// If field is required, then maximum repetition level is returned.
-    #[inline]
-    fn current_rep_level(&self) -> i16 {
-        match self.rep_levels {
-            Some(ref vec) => vec[self.curr_triplet_index],
-            None => self.max_rep_level,
-        }
-    }
-
-    /// Quick check if iterator has more values/levels to read.
-    /// It is updated as a result of `read_next` method, so they are synchronized.
-    #[inline]
-    fn has_next(&self) -> bool {
-        self.has_next
-    }
-
-    /// Advances to the next triplet.
-    /// Returns true, if there are more records to read, false there are no records left.
-    fn read_next(&mut self) -> Result<bool> {
-        self.curr_triplet_index += 1;
-
-        if self.curr_triplet_index >= self.triplets_left {
-            let (values_read, levels_read) = {
-                // Get slice of definition levels, if available
-                let def_levels = self.def_levels.as_mut().map(|vec| &mut vec[..]);
-
-                // Get slice of repetition levels, if available
-                let rep_levels = self.rep_levels.as_mut().map(|vec| &mut vec[..]);
-
-                // Buffer triplets
-                self.reader.read_batch(
-                    self.batch_size,
-                    def_levels,
-                    rep_levels,
-                    &mut self.values,
-                )?
-            };
-
-            // No more values or levels to read
-            if values_read == 0 && levels_read == 0 {
-                self.has_next = false;
-                return Ok(false);
-            }
-
-            // We never read values more than levels
-            if levels_read == 0 || values_read == levels_read {
-                // There are no definition levels to read, column is required
-                // or definition levels match values, so it does not require spacing
-                self.curr_triplet_index = 0;
-                self.triplets_left = values_read;
-            } else if values_read < levels_read {
-                // Add spacing for triplets.
-                // The idea is setting values for positions in def_levels when current
-                // definition level equals to maximum definition level.
-                // Values and levels are guaranteed to line up, because of
-                // the column reader method.
-
-                // Note: if values_read == 0, then spacing will not be triggered
-                let mut idx = values_read;
-                let def_levels = self.def_levels.as_ref().unwrap();
-                for i in 0..levels_read {
-                    if def_levels[levels_read - i - 1] == self.max_def_level {
-                        idx -= 1; // This is done to avoid usize becoming a negative value
-                        self.values.swap(levels_read - i - 1, idx);
-                    }
-                }
-                self.curr_triplet_index = 0;
-                self.triplets_left = levels_read;
-            } else {
-                return Err(general_err!(
-                    "Spacing of values/levels is wrong, values_read: {}, levels_read: {}",
-                    values_read,
-                    levels_read
-                ));
-            }
-        }
-
-        self.has_next = true;
-        Ok(true)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::schema::types::ColumnPath;
-    use crate::util::test_common::get_test_file;
-
-    #[test]
-    #[should_panic(expected = "Expected positive batch size, found: 0")]
-    fn test_triplet_zero_batch_size() {
-        let column_path =
-            ColumnPath::from(vec!["b_struct".to_string(), "b_c_int".to_string()]);
-        test_column_in_file("nulls.snappy.parquet", 0, &column_path, &[], &[], &[]);
-    }
-
-    #[test]
-    fn test_triplet_null_column() {
-        let path = vec!["b_struct", "b_c_int"];
-        let values = vec![];
-        let def_levels = vec![1, 1, 1, 1, 1, 1, 1, 1];
-        let rep_levels = vec![0, 0, 0, 0, 0, 0, 0, 0];
-        test_triplet_iter(
-            "nulls.snappy.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_required_column() {
-        let path = vec!["ID"];
-        let values = vec![Field::Long(8)];
-        let def_levels = vec![0];
-        let rep_levels = vec![0];
-        test_triplet_iter(
-            "nonnullable.impala.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_optional_column() {
-        let path = vec!["nested_struct", "A"];
-        let values = vec![Field::Int(1), Field::Int(7)];
-        let def_levels = vec![2, 1, 1, 1, 1, 0, 2];
-        let rep_levels = vec![0, 0, 0, 0, 0, 0, 0];
-        test_triplet_iter(
-            "nullable.impala.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_optional_list_column() {
-        let path = vec!["a", "list", "element", "list", "element", "list", "element"];
-        let values = vec![
-            Field::Str("a".to_string()),
-            Field::Str("b".to_string()),
-            Field::Str("c".to_string()),
-            Field::Str("d".to_string()),
-            Field::Str("a".to_string()),
-            Field::Str("b".to_string()),
-            Field::Str("c".to_string()),
-            Field::Str("d".to_string()),
-            Field::Str("e".to_string()),
-            Field::Str("a".to_string()),
-            Field::Str("b".to_string()),
-            Field::Str("c".to_string()),
-            Field::Str("d".to_string()),
-            Field::Str("e".to_string()),
-            Field::Str("f".to_string()),
-        ];
-        let def_levels = vec![7, 7, 7, 4, 7, 7, 7, 7, 7, 4, 7, 7, 7, 7, 7, 7, 4, 7];
-        let rep_levels = vec![0, 3, 2, 1, 2, 0, 3, 2, 3, 1, 2, 0, 3, 2, 3, 2, 1, 2];
-        test_triplet_iter(
-            "nested_lists.snappy.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_optional_map_column() {
-        let path = vec!["a", "key_value", "value", "key_value", "key"];
-        let values = vec![
-            Field::Int(1),
-            Field::Int(2),
-            Field::Int(1),
-            Field::Int(1),
-            Field::Int(3),
-            Field::Int(4),
-            Field::Int(5),
-        ];
-        let def_levels = vec![4, 4, 4, 2, 3, 4, 4, 4, 4];
-        let rep_levels = vec![0, 2, 0, 0, 0, 0, 0, 2, 2];
-        test_triplet_iter(
-            "nested_maps.snappy.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    // Check triplet iterator across different batch sizes
-    fn test_triplet_iter(
-        file_name: &str,
-        column_path: Vec<&str>,
-        expected_values: &[Field],
-        expected_def_levels: &[i16],
-        expected_rep_levels: &[i16],
-    ) {
-        // Convert path into column path
-        let path: Vec<String> = column_path.iter().map(|x| x.to_string()).collect();
-        let column_path = ColumnPath::from(path);
-
-        let batch_sizes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 128, 256];
-        for batch_size in batch_sizes {
-            test_column_in_file(
-                file_name,
-                batch_size,
-                &column_path,
-                expected_values,
-                expected_def_levels,
-                expected_rep_levels,
-            );
-        }
-    }
-
-    // Check values of a selectd column in a file
-    fn test_column_in_file(
-        file_name: &str,
-        batch_size: usize,
-        column_path: &ColumnPath,
-        expected_values: &[Field],
-        expected_def_levels: &[i16],
-        expected_rep_levels: &[i16],
-    ) {
-        let file = get_test_file(file_name);
-        let file_reader = SerializedFileReader::new(file).unwrap();
-        let metadata = file_reader.metadata();
-        // Get schema descriptor
-        let file_metadata = metadata.file_metadata();
-        let schema = file_metadata.schema_descr();
-        // Get first row group
-        let row_group_reader = file_reader.get_row_group(0).unwrap();
-
-        for i in 0..schema.num_columns() {
-            let descr = schema.column(i);
-            if descr.path() == column_path {
-                let reader = row_group_reader.get_column_reader(i).unwrap();
-                test_triplet_column(
-                    descr,
-                    reader,
-                    batch_size,
-                    expected_values,
-                    expected_def_levels,
-                    expected_rep_levels,
-                );
-            }
-        }
-    }
-
-    // Check values for individual triplet iterator
-    fn test_triplet_column(
-        descr: ColumnDescPtr,
-        reader: ColumnReader,
-        batch_size: usize,
-        expected_values: &[Field],
-        expected_def_levels: &[i16],
-        expected_rep_levels: &[i16],
-    ) {
-        let mut iter = TripletIter::new(descr.clone(), reader, batch_size);
-        let mut values: Vec<Field> = Vec::new();
-        let mut def_levels: Vec<i16> = Vec::new();
-        let mut rep_levels: Vec<i16> = Vec::new();
-
-        assert_eq!(iter.max_def_level(), descr.max_def_level());
-        assert_eq!(iter.max_rep_level(), descr.max_rep_level());
-
-        while let Ok(true) = iter.read_next() {
-            assert!(iter.has_next());
-            if !iter.is_null() {
-                values.push(iter.current_value());
-            }
-            def_levels.push(iter.current_def_level());
-            rep_levels.push(iter.current_rep_level());
-        }
-
-        assert_eq!(values, expected_values);
-        assert_eq!(def_levels, expected_def_levels);
-        assert_eq!(rep_levels, expected_rep_levels);
-    }
-}
diff --git a/rust/parquet/src/schema/mod.rs b/rust/parquet/src/schema/mod.rs
deleted file mode 100644
index 1ebee2e06e8..00000000000
--- a/rust/parquet/src/schema/mod.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet schema definitions and methods to print and parse schema.
-//!
-//! # Example
-//!
-//! ```rust
-//! use parquet::{
-//!     basic::{ConvertedType, Repetition, Type as PhysicalType},
-//!     schema::{parser, printer, types::Type},
-//! };
-//! use std::sync::Arc;
-//!
-//! // Create the following schema:
-//! //
-//! // message schema {
-//! //   OPTIONAL BYTE_ARRAY a (UTF8);
-//! //   REQUIRED INT32 b;
-//! // }
-//!
-//! let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
-//!     .with_converted_type(ConvertedType::UTF8)
-//!     .with_repetition(Repetition::OPTIONAL)
-//!     .build()
-//!     .unwrap();
-//!
-//! let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
-//!     .with_repetition(Repetition::REQUIRED)
-//!     .build()
-//!     .unwrap();
-//!
-//! let schema = Type::group_type_builder("schema")
-//!     .with_fields(&mut vec![Arc::new(field_a), Arc::new(field_b)])
-//!     .build()
-//!     .unwrap();
-//!
-//! let mut buf = Vec::new();
-//!
-//! // Print schema into buffer
-//! printer::print_schema(&mut buf, &schema);
-//!
-//! // Parse schema from the string
-//! let string_schema = String::from_utf8(buf).unwrap();
-//! let parsed_schema = parser::parse_message_type(&string_schema).unwrap();
-//!
-//! assert_eq!(schema, parsed_schema);
-//! ```
-
-pub mod parser;
-pub mod printer;
-pub mod types;
-pub mod visitor;
diff --git a/rust/parquet/src/schema/parser.rs b/rust/parquet/src/schema/parser.rs
deleted file mode 100644
index 3ce347c8745..00000000000
--- a/rust/parquet/src/schema/parser.rs
+++ /dev/null
@@ -1,1241 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet schema parser.
-//! Provides methods to parse and validate string message type into Parquet
-//! [`Type`](crate::schema::types::Type).
-//!
-//! # Example
-//!
-//! ```rust
-//! use parquet::schema::parser::parse_message_type;
-//!
-//! let message_type = "
-//!   message spark_schema {
-//!     OPTIONAL BYTE_ARRAY a (UTF8);
-//!     REQUIRED INT32 b;
-//!     REQUIRED DOUBLE c;
-//!     REQUIRED BOOLEAN d;
-//!     OPTIONAL group e (LIST) {
-//!       REPEATED group list {
-//!         REQUIRED INT32 element;
-//!       }
-//!     }
-//!   }
-//! ";
-//!
-//! let schema = parse_message_type(message_type).expect("Expected valid schema");
-//! println!("{:?}", schema);
-//! ```
-
-use std::sync::Arc;
-
-use crate::basic::{
-    ConvertedType, DecimalType, IntType, LogicalType, Repetition, TimeType, TimeUnit,
-    TimestampType, Type as PhysicalType,
-};
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::{Type, TypePtr};
-
-/// Parses message type as string into a Parquet [`Type`](crate::schema::types::Type)
-/// which, for example, could be used to extract individual columns. Returns Parquet
-/// general error when parsing or validation fails.
-pub fn parse_message_type(message_type: &str) -> Result<Type> {
-    let mut parser = Parser {
-        tokenizer: &mut Tokenizer::from_str(message_type),
-    };
-    parser.parse_message_type()
-}
-
-/// Tokenizer to split message type string into tokens that are separated using characters
-/// defined in `is_schema_delim` method. Tokenizer also preserves delimiters as tokens.
-/// Tokenizer provides Iterator interface to process tokens; it also allows to step back
-/// to reprocess previous tokens.
-struct Tokenizer<'a> {
-    // List of all tokens for a string
-    tokens: Vec<&'a str>,
-    // Current index of vector
-    index: usize,
-}
-
-impl<'a> Tokenizer<'a> {
-    // Create tokenizer from message type string
-    pub fn from_str(string: &'a str) -> Self {
-        let vec = string
-            .split_whitespace()
-            .flat_map(|t| Self::split_token(t))
-            .collect();
-        Tokenizer {
-            tokens: vec,
-            index: 0,
-        }
-    }
-
-    // List of all special characters in schema
-    fn is_schema_delim(c: char) -> bool {
-        c == ';' || c == '{' || c == '}' || c == '(' || c == ')' || c == '=' || c == ','
-    }
-
-    /// Splits string into tokens; input string can already be token or can contain
-    /// delimiters, e.g. required" -> Vec("required") and
-    /// "(UTF8);" -> Vec("(", "UTF8", ")", ";")
-    fn split_token(string: &str) -> Vec<&str> {
-        let mut buffer: Vec<&str> = Vec::new();
-        let mut tail = string;
-        while let Some(index) = tail.find(Self::is_schema_delim) {
-            let (h, t) = tail.split_at(index);
-            if !h.is_empty() {
-                buffer.push(h);
-            }
-            buffer.push(&t[0..1]);
-            tail = &t[1..];
-        }
-        if !tail.is_empty() {
-            buffer.push(tail);
-        }
-        buffer
-    }
-
-    // Move pointer to a previous element
-    fn backtrack(&mut self) {
-        self.index -= 1;
-    }
-}
-
-impl<'a> Iterator for Tokenizer<'a> {
-    type Item = &'a str;
-
-    fn next(&mut self) -> Option<&'a str> {
-        if self.index < self.tokens.len() {
-            self.index += 1;
-            Some(self.tokens[self.index - 1])
-        } else {
-            None
-        }
-    }
-}
-
-/// Internal Schema parser.
-/// Traverses message type using tokenizer and parses each group/primitive type
-/// recursively.
-struct Parser<'a> {
-    tokenizer: &'a mut Tokenizer<'a>,
-}
-
-// Utility function to assert token on validity.
-fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
-    match token {
-        Some(value) if value == expected => Ok(()),
-        Some(other) => Err(general_err!(
-            "Expected '{}', found token '{}'",
-            expected,
-            other
-        )),
-        None => Err(general_err!(
-            "Expected '{}', but no token found (None)",
-            expected
-        )),
-    }
-}
-
-// Utility function to parse i32 or return general error.
-#[inline]
-fn parse_i32(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<i32> {
-    value
-        .ok_or_else(|| general_err!(not_found_msg))
-        .and_then(|v| v.parse::<i32>().map_err(|_| general_err!(parse_fail_msg)))
-}
-
-// Utility function to parse boolean or return general error.
-#[inline]
-fn parse_bool(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<bool> {
-    value
-        .ok_or_else(|| general_err!(not_found_msg))
-        .and_then(|v| {
-            v.to_lowercase()
-                .parse::<bool>()
-                .map_err(|_| general_err!(parse_fail_msg))
-        })
-}
-
-// Utility function to parse TimeUnit or return general error.
-fn parse_timeunit(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<TimeUnit> {
-    value
-        .ok_or_else(|| general_err!(not_found_msg))
-        .and_then(|v| match v.to_uppercase().as_str() {
-            "MILLIS" => Ok(TimeUnit::MILLIS(Default::default())),
-            "MICROS" => Ok(TimeUnit::MICROS(Default::default())),
-            "NANOS" => Ok(TimeUnit::NANOS(Default::default())),
-            _ => Err(general_err!(parse_fail_msg)),
-        })
-}
-
-impl<'a> Parser<'a> {
-    // Entry function to parse message type, uses internal tokenizer.
-    fn parse_message_type(&mut self) -> Result<Type> {
-        // Check that message type starts with "message".
-        match self.tokenizer.next() {
-            Some("message") => {
-                let name = self
-                    .tokenizer
-                    .next()
-                    .ok_or_else(|| general_err!("Expected name, found None"))?;
-                let mut fields = self.parse_child_types()?;
-                Type::group_type_builder(name)
-                    .with_fields(&mut fields)
-                    .build()
-            }
-            _ => Err(general_err!("Message type does not start with 'message'")),
-        }
-    }
-
-    // Parses child types for a current group type.
-    // This is only invoked on root and group types.
-    fn parse_child_types(&mut self) -> Result<Vec<TypePtr>> {
-        assert_token(self.tokenizer.next(), "{")?;
-        let mut vec = Vec::new();
-        while let Some(value) = self.tokenizer.next() {
-            if value == "}" {
-                break;
-            } else {
-                self.tokenizer.backtrack();
-                vec.push(Arc::new(self.add_type()?));
-            }
-        }
-        Ok(vec)
-    }
-
-    fn add_type(&mut self) -> Result<Type> {
-        // Parse repetition
-        let repetition = self
-            .tokenizer
-            .next()
-            .ok_or_else(|| general_err!("Expected repetition, found None"))
-            .and_then(|v| v.to_uppercase().parse::<Repetition>())?;
-
-        match self.tokenizer.next() {
-            Some(group) if group.to_uppercase() == "GROUP" => {
-                self.add_group_type(Some(repetition))
-            }
-            Some(type_string) => {
-                let physical_type = type_string.to_uppercase().parse::<PhysicalType>()?;
-                self.add_primitive_type(repetition, physical_type)
-            }
-            None => Err(general_err!("Invalid type, could not extract next token")),
-        }
-    }
-
-    fn add_group_type(&mut self, repetition: Option<Repetition>) -> Result<Type> {
-        // Parse name of the group type
-        let name = self
-            .tokenizer
-            .next()
-            .ok_or_else(|| general_err!("Expected name, found None"))?;
-
-        // Parse logical or converted type if exists
-        let (logical_type, converted_type) = if let Some("(") = self.tokenizer.next() {
-            let tpe = self
-                .tokenizer
-                .next()
-                .ok_or_else(|| general_err!("Expected converted type, found None"))
-                .and_then(|v| {
-                    // Try logical type first
-                    let upper = v.to_uppercase();
-                    let logical = upper.parse::<LogicalType>();
-                    match logical {
-                        Ok(logical) => Ok((
-                            Some(logical.clone()),
-                            ConvertedType::from(Some(logical)),
-                        )),
-                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
-                    }
-                })?;
-            assert_token(self.tokenizer.next(), ")")?;
-            tpe
-        } else {
-            self.tokenizer.backtrack();
-            (None, ConvertedType::NONE)
-        };
-
-        // Parse optional id
-        let id = if let Some("=") = self.tokenizer.next() {
-            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
-        } else {
-            self.tokenizer.backtrack();
-            None
-        };
-
-        let mut fields = self.parse_child_types()?;
-        let mut builder = Type::group_type_builder(name)
-            .with_logical_type(logical_type)
-            .with_converted_type(converted_type)
-            .with_fields(&mut fields);
-        if let Some(rep) = repetition {
-            builder = builder.with_repetition(rep);
-        }
-        if let Some(id) = id {
-            builder = builder.with_id(id);
-        }
-        builder.build()
-    }
-
-    fn add_primitive_type(
-        &mut self,
-        repetition: Repetition,
-        physical_type: PhysicalType,
-    ) -> Result<Type> {
-        // Read type length if the type is FIXED_LEN_BYTE_ARRAY.
-        let mut length: i32 = -1;
-        if physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY {
-            assert_token(self.tokenizer.next(), "(")?;
-            length = parse_i32(
-                self.tokenizer.next(),
-                "Expected length for FIXED_LEN_BYTE_ARRAY, found None",
-                "Failed to parse length for FIXED_LEN_BYTE_ARRAY",
-            )?;
-            assert_token(self.tokenizer.next(), ")")?;
-        }
-
-        // Parse name of the primitive type
-        let name = self
-            .tokenizer
-            .next()
-            .ok_or_else(|| general_err!("Expected name, found None"))?;
-
-        // Parse converted type
-        let (logical_type, converted_type, precision, scale) = if let Some("(") =
-            self.tokenizer.next()
-        {
-            let (mut logical, mut converted) = self
-                .tokenizer
-                .next()
-                .ok_or_else(|| {
-                    general_err!("Expected logical or converted type, found None")
-                })
-                .and_then(|v| {
-                    let upper = v.to_uppercase();
-                    let logical = upper.parse::<LogicalType>();
-                    match logical {
-                        Ok(logical) => Ok((
-                            Some(logical.clone()),
-                            ConvertedType::from(Some(logical)),
-                        )),
-                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
-                    }
-                })?;
-
-            // Parse precision and scale for decimals
-            let mut precision: i32 = -1;
-            let mut scale: i32 = -1;
-
-            // Parse the concrete logical type
-            if let Some(tpe) = &logical {
-                match tpe {
-                    LogicalType::DECIMAL(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            precision = parse_i32(
-                                self.tokenizer.next(),
-                                "Expected precision, found None",
-                                "Failed to parse precision for DECIMAL type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                scale = parse_i32(
-                                    self.tokenizer.next(),
-                                    "Expected scale, found None",
-                                    "Failed to parse scale for DECIMAL type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::DECIMAL(DecimalType {
-                                    precision,
-                                    scale,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                scale = 0;
-                                logical = Some(LogicalType::DECIMAL(DecimalType {
-                                    precision,
-                                    scale,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            }
-                        }
-                    }
-                    LogicalType::TIME(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let unit = parse_timeunit(
-                                self.tokenizer.next(),
-                                "Invalid timeunit found",
-                                "Failed to parse timeunit for TIME type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_adjusted_to_u_t_c = parse_bool(
-                                    self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse timezone info for TIME type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::TIME(TimeType {
-                                    unit,
-                                    is_adjusted_to_u_t_c,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
-                            }
-                        }
-                    }
-                    LogicalType::TIMESTAMP(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let unit = parse_timeunit(
-                                self.tokenizer.next(),
-                                "Invalid timeunit found",
-                                "Failed to parse timeunit for TIMESTAMP type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_adjusted_to_u_t_c = parse_bool(
-                                    self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse timezone info for TIMESTAMP type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::TIMESTAMP(TimestampType {
-                                    unit,
-                                    is_adjusted_to_u_t_c,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
-                            }
-                        }
-                    }
-                    LogicalType::INTEGER(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let bit_width = parse_i32(
-                                self.tokenizer.next(),
-                                "Invalid bit_width found",
-                                "Failed to parse bit_width for INTEGER type",
-                            )? as i8;
-                            match physical_type {
-                                PhysicalType::INT32 => {
-                                    match bit_width {
-                                        8 | 16 | 32 => {}
-                                        _ => {
-                                            return Err(general_err!("Incorrect bit width {} for INT32", bit_width))
-                                        }
-                                    }
-                                }
-                                PhysicalType::INT64 => {
-                                    if bit_width != 64 {
-                                        return Err(general_err!("Incorrect bit width {} for INT64", bit_width))
-                                    }
-                                }
-                                _ => {
-                                    return Err(general_err!("Logical type INTEGER cannot be used with physical type {}", physical_type))
-                                }
-                            }
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_signed = parse_bool(
-                                    self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse is_signed for INTEGER type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::INTEGER(IntType {
-                                    bit_width,
-                                    is_signed,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
-                            }
-                        }
-                    }
-                    _ => {}
-                }
-            } else if converted == ConvertedType::DECIMAL {
-                if let Some("(") = self.tokenizer.next() {
-                    // Parse precision
-                    precision = parse_i32(
-                        self.tokenizer.next(),
-                        "Expected precision, found None",
-                        "Failed to parse precision for DECIMAL type",
-                    )?;
-
-                    // Parse scale
-                    scale = if let Some(",") = self.tokenizer.next() {
-                        parse_i32(
-                            self.tokenizer.next(),
-                            "Expected scale, found None",
-                            "Failed to parse scale for DECIMAL type",
-                        )?
-                    } else {
-                        // Scale is not provided, set it to 0.
-                        self.tokenizer.backtrack();
-                        0
-                    };
-
-                    assert_token(self.tokenizer.next(), ")")?;
-                } else {
-                    self.tokenizer.backtrack();
-                }
-            }
-
-            assert_token(self.tokenizer.next(), ")")?;
-            (logical, converted, precision, scale)
-        } else {
-            self.tokenizer.backtrack();
-            (None, ConvertedType::NONE, -1, -1)
-        };
-
-        // Parse optional id
-        let id = if let Some("=") = self.tokenizer.next() {
-            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
-        } else {
-            self.tokenizer.backtrack();
-            None
-        };
-        assert_token(self.tokenizer.next(), ";")?;
-
-        let mut builder = Type::primitive_type_builder(name, physical_type)
-            .with_repetition(repetition)
-            .with_logical_type(logical_type)
-            .with_converted_type(converted_type)
-            .with_length(length)
-            .with_precision(precision)
-            .with_scale(scale);
-        if let Some(id) = id {
-            builder = builder.with_id(id);
-        }
-        builder.build()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_tokenize_empty_string() {
-        assert_eq!(Tokenizer::from_str("").next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_delimiters() {
-        let mut iter = Tokenizer::from_str(",;{}()=");
-        assert_eq!(iter.next(), Some(","));
-        assert_eq!(iter.next(), Some(";"));
-        assert_eq!(iter.next(), Some("{"));
-        assert_eq!(iter.next(), Some("}"));
-        assert_eq!(iter.next(), Some("("));
-        assert_eq!(iter.next(), Some(")"));
-        assert_eq!(iter.next(), Some("="));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_delimiters_with_whitespaces() {
-        let mut iter = Tokenizer::from_str(" , ; { } ( ) = ");
-        assert_eq!(iter.next(), Some(","));
-        assert_eq!(iter.next(), Some(";"));
-        assert_eq!(iter.next(), Some("{"));
-        assert_eq!(iter.next(), Some("}"));
-        assert_eq!(iter.next(), Some("("));
-        assert_eq!(iter.next(), Some(")"));
-        assert_eq!(iter.next(), Some("="));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_words() {
-        let mut iter = Tokenizer::from_str("abc def ghi jkl mno");
-        assert_eq!(iter.next(), Some("abc"));
-        assert_eq!(iter.next(), Some("def"));
-        assert_eq!(iter.next(), Some("ghi"));
-        assert_eq!(iter.next(), Some("jkl"));
-        assert_eq!(iter.next(), Some("mno"));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_backtrack() {
-        let mut iter = Tokenizer::from_str("abc;");
-        assert_eq!(iter.next(), Some("abc"));
-        assert_eq!(iter.next(), Some(";"));
-        iter.backtrack();
-        assert_eq!(iter.next(), Some(";"));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_message_type() {
-        let schema = "
-    message schema {
-      required int32 a;
-      optional binary c (UTF8);
-      required group d {
-        required int32 a;
-        optional binary c (UTF8);
-      }
-      required group e (LIST) {
-        repeated group list {
-          required int32 element;
-        }
-      }
-    }
-    ";
-        let iter = Tokenizer::from_str(schema);
-        let mut res = Vec::new();
-        for token in iter {
-            res.push(token);
-        }
-        assert_eq!(
-            res,
-            vec![
-                "message", "schema", "{", "required", "int32", "a", ";", "optional",
-                "binary", "c", "(", "UTF8", ")", ";", "required", "group", "d", "{",
-                "required", "int32", "a", ";", "optional", "binary", "c", "(", "UTF8",
-                ")", ";", "}", "required", "group", "e", "(", "LIST", ")", "{",
-                "repeated", "group", "list", "{", "required", "int32", "element", ";",
-                "}", "}", "}"
-            ]
-        );
-    }
-
-    #[test]
-    fn test_assert_token() {
-        assert!(assert_token(Some("a"), "a").is_ok());
-        assert!(assert_token(Some("a"), "b").is_err());
-        assert!(assert_token(None, "b").is_err());
-    }
-
-    #[test]
-    fn test_parse_message_type_invalid() {
-        let mut iter = Tokenizer::from_str("test");
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Parquet error: Message type does not start with 'message'"
-        );
-    }
-
-    #[test]
-    fn test_parse_message_type_no_name() {
-        let mut iter = Tokenizer::from_str("message");
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Parquet error: Expected name, found None"
-        );
-    }
-
-    #[test]
-    fn test_parse_message_type_fixed_byte_array() {
-        let schema = "
-    message schema {
-      REQUIRED FIXED_LEN_BYTE_ARRAY col;
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        let schema = "
-    message schema {
-      REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_integer() {
-        // Invalid integer syntax
-        let schema = "
-    message root {
-      optional int64 f1 (INTEGER());
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse bit_width for INTEGER type"))
-        );
-
-        // Invalid integer syntax, needs both bit-width and UTC sign
-        let schema = "
-    message root {
-      optional int64 f1 (INTEGER(32,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Incorrect bit width 32 for INT64"))
-        );
-
-        // Invalid integer because of non-numeric bit width
-        let schema = "
-    message root {
-      optional int32 f1 (INTEGER(eight,true));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse bit_width for INTEGER type"))
-        );
-
-        // Valid types
-        let schema = "
-    message root {
-      optional int32 f1 (INTEGER(8,false));
-      optional int32 f2 (INTEGER(8,true));
-      optional int32 f3 (INTEGER(16,false));
-      optional int32 f4 (INTEGER(16,true));
-      optional int32 f5 (INTEGER(32,false));
-      optional int32 f6 (INTEGER(32,true));
-      optional int64 f7 (INTEGER(64,false));
-      optional int64 f7 (INTEGER(64,true));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_temporal() {
-        // Invalid timestamp syntax
-        let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP();
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
-        );
-
-        // Invalid timestamp syntax, needs both unit and UTC adjustment
-        let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP(MILLIS,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!(
-                "Failed to parse timezone info for TIMESTAMP type"
-            ))
-        );
-
-        // Invalid timestamp because of unknown unit
-        let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP(YOCTOS,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
-        );
-
-        // Valid types
-        let schema = "
-    message root {
-      optional int32 f1 (DATE);
-      optional int32 f2 (TIME(MILLIS,true));
-      optional int64 f3 (TIME(MICROS,false));
-      optional int64 f4 (TIME(NANOS,true));
-      optional int64 f5 (TIMESTAMP(MILLIS,true));
-      optional int64 f6 (TIMESTAMP(MICROS,true));
-      optional int64 f7 (TIMESTAMP(NANOS,false));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_decimal() {
-        // It is okay for decimal to omit precision and scale with right syntax.
-        // Here we test wrong syntax of decimal type
-
-        // Invalid decimal syntax
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL();
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Invalid decimal, need precision and scale
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL());
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Invalid decimal because of `,` - has precision, needs scale
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL(8,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Invalid decimal because, we always require either precision or scale to be
-        // specified as part of converted type
-        let schema = "
-    message root {
-      optional int32 f3 (DECIMAL);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Valid decimal (precision, scale)
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL(8, 3));
-      optional int32 f2 (DECIMAL(8));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_1() {
-        let schema = "
-    message root {
-      optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
-      optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut vec![
-                Arc::new(
-                    Type::primitive_type_builder(
-                        "f1",
-                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                    )
-                    .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                        precision: 9,
-                        scale: 3,
-                    })))
-                    .with_converted_type(ConvertedType::DECIMAL)
-                    .with_length(5)
-                    .with_precision(9)
-                    .with_scale(3)
-                    .build()
-                    .unwrap(),
-                ),
-                Arc::new(
-                    Type::primitive_type_builder(
-                        "f2",
-                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                    )
-                    .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                        precision: 38,
-                        scale: 18,
-                    })))
-                    .with_converted_type(ConvertedType::DECIMAL)
-                    .with_length(16)
-                    .with_precision(38)
-                    .with_scale(18)
-                    .build()
-                    .unwrap(),
-                ),
-            ])
-            .build()
-            .unwrap();
-
-        assert_eq!(message, expected);
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_2() {
-        let schema = "
-    message root {
-      required group a0 {
-        optional group a1 (LIST) {
-          repeated binary a2 (UTF8);
-        }
-
-        optional group b1 (LIST) {
-          repeated group b2 {
-            optional int32 b3;
-            optional double b4;
-          }
-        }
-      }
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut vec![Arc::new(
-                Type::group_type_builder("a0")
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_fields(&mut vec![
-                        Arc::new(
-                            Type::group_type_builder("a1")
-                                .with_repetition(Repetition::OPTIONAL)
-                                .with_logical_type(Some(LogicalType::LIST(
-                                    Default::default(),
-                                )))
-                                .with_converted_type(ConvertedType::LIST)
-                                .with_fields(&mut vec![Arc::new(
-                                    Type::primitive_type_builder(
-                                        "a2",
-                                        PhysicalType::BYTE_ARRAY,
-                                    )
-                                    .with_repetition(Repetition::REPEATED)
-                                    .with_converted_type(ConvertedType::UTF8)
-                                    .build()
-                                    .unwrap(),
-                                )])
-                                .build()
-                                .unwrap(),
-                        ),
-                        Arc::new(
-                            Type::group_type_builder("b1")
-                                .with_repetition(Repetition::OPTIONAL)
-                                .with_logical_type(Some(LogicalType::LIST(
-                                    Default::default(),
-                                )))
-                                .with_converted_type(ConvertedType::LIST)
-                                .with_fields(&mut vec![Arc::new(
-                                    Type::group_type_builder("b2")
-                                        .with_repetition(Repetition::REPEATED)
-                                        .with_fields(&mut vec![
-                                            Arc::new(
-                                                Type::primitive_type_builder(
-                                                    "b3",
-                                                    PhysicalType::INT32,
-                                                )
-                                                .build()
-                                                .unwrap(),
-                                            ),
-                                            Arc::new(
-                                                Type::primitive_type_builder(
-                                                    "b4",
-                                                    PhysicalType::DOUBLE,
-                                                )
-                                                .build()
-                                                .unwrap(),
-                                            ),
-                                        ])
-                                        .build()
-                                        .unwrap(),
-                                )])
-                                .build()
-                                .unwrap(),
-                        ),
-                    ])
-                    .build()
-                    .unwrap(),
-            )])
-            .build()
-            .unwrap();
-
-        assert_eq!(message, expected);
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_3() {
-        let schema = "
-    message root {
-      required int32 _1 (INT_8);
-      required int32 _2 (INT_16);
-      required float _3;
-      required double _4;
-      optional int32 _5 (DATE);
-      optional binary _6 (UTF8);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let mut fields = vec![
-            Arc::new(
-                Type::primitive_type_builder("_1", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_converted_type(ConvertedType::INT_8)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_2", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_converted_type(ConvertedType::INT_16)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_3", PhysicalType::FLOAT)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_5", PhysicalType::INT32)
-                    .with_logical_type(Some(LogicalType::DATE(Default::default())))
-                    .with_converted_type(ConvertedType::DATE)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_6", PhysicalType::BYTE_ARRAY)
-                    .with_converted_type(ConvertedType::UTF8)
-                    .build()
-                    .unwrap(),
-            ),
-        ];
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut fields)
-            .build()
-            .unwrap();
-        assert_eq!(message, expected);
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_4() {
-        let schema = "
-    message root {
-      required int32 _1 (INTEGER(8,true));
-      required int32 _2 (INTEGER(16,false));
-      required float _3;
-      required double _4;
-      optional int32 _5 (DATE);
-      optional int32 _6 (TIME(MILLIS,false));
-      optional int64 _7 (TIME(MICROS,true));
-      optional int64 _8 (TIMESTAMP(MILLIS,true));
-      optional int64 _9 (TIMESTAMP(NANOS,false));
-      optional binary _10 (STRING);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let mut fields = vec![
-            Arc::new(
-                Type::primitive_type_builder("_1", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                        bit_width: 8,
-                        is_signed: true,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_2", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                        bit_width: 16,
-                        is_signed: false,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_3", PhysicalType::FLOAT)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_5", PhysicalType::INT32)
-                    .with_logical_type(Some(LogicalType::DATE(Default::default())))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_6", PhysicalType::INT32)
-                    .with_logical_type(Some(LogicalType::TIME(TimeType {
-                        unit: TimeUnit::MILLIS(Default::default()),
-                        is_adjusted_to_u_t_c: false,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_7", PhysicalType::INT64)
-                    .with_logical_type(Some(LogicalType::TIME(TimeType {
-                        unit: TimeUnit::MICROS(Default::default()),
-                        is_adjusted_to_u_t_c: true,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_8", PhysicalType::INT64)
-                    .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
-                        unit: TimeUnit::MILLIS(Default::default()),
-                        is_adjusted_to_u_t_c: true,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_9", PhysicalType::INT64)
-                    .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
-                        unit: TimeUnit::NANOS(Default::default()),
-                        is_adjusted_to_u_t_c: false,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_10", PhysicalType::BYTE_ARRAY)
-                    .with_logical_type(Some(LogicalType::STRING(Default::default())))
-                    .build()
-                    .unwrap(),
-            ),
-        ];
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut fields)
-            .build()
-            .unwrap();
-        assert_eq!(message, expected);
-    }
-}
diff --git a/rust/parquet/src/schema/printer.rs b/rust/parquet/src/schema/printer.rs
deleted file mode 100644
index b1e739f77b4..00000000000
--- a/rust/parquet/src/schema/printer.rs
+++ /dev/null
@@ -1,827 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet schema printer.
-//! Provides methods to print Parquet file schema and list file metadata.
-//!
-//! # Example
-//!
-//! ```rust
-//! use parquet::{
-//!     file::reader::{FileReader, SerializedFileReader},
-//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
-//! };
-//! use std::{fs::File, path::Path};
-//!
-//! // Open a file
-//! let path = Path::new("test.parquet");
-//! if let Ok(file) = File::open(&path) {
-//!     let reader = SerializedFileReader::new(file).unwrap();
-//!     let parquet_metadata = reader.metadata();
-//!
-//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
-//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
-//!
-//!     print_schema(
-//!         &mut std::io::stdout(),
-//!         &parquet_metadata.file_metadata().schema(),
-//!     );
-//! }
-//! ```
-
-use std::{fmt, io};
-
-use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
-use crate::file::metadata::{
-    ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData,
-};
-use crate::schema::types::Type;
-
-/// Prints Parquet metadata [`ParquetMetaData`](crate::file::metadata::ParquetMetaData)
-/// information.
-#[allow(unused_must_use)]
-pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
-    print_file_metadata(out, &metadata.file_metadata());
-    writeln!(out);
-    writeln!(out);
-    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
-    writeln!(out, "row groups:");
-    writeln!(out);
-    for (i, rg) in metadata.row_groups().iter().enumerate() {
-        writeln!(out, "row group {}:", i);
-        print_dashes(out, 80);
-        print_row_group_metadata(out, rg);
-    }
-}
-
-/// Prints file metadata [`FileMetaData`](crate::file::metadata::FileMetaData)
-/// information.
-#[allow(unused_must_use)]
-pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
-    writeln!(out, "version: {}", file_metadata.version());
-    writeln!(out, "num of rows: {}", file_metadata.num_rows());
-    if let Some(created_by) = file_metadata.created_by().as_ref() {
-        writeln!(out, "created by: {}", created_by);
-    }
-    if let Some(metadata) = file_metadata.key_value_metadata() {
-        writeln!(out, "metadata:");
-        for kv in metadata.iter() {
-            writeln!(
-                out,
-                "  {}: {}",
-                &kv.key,
-                kv.value.as_ref().unwrap_or(&"".to_owned())
-            );
-        }
-    }
-    let schema = file_metadata.schema();
-    print_schema(out, schema);
-}
-
-/// Prints Parquet [`Type`](crate::schema::types::Type) information.
-#[allow(unused_must_use)]
-pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
-    // TODO: better if we can pass fmt::Write to Printer.
-    // But how can we make it to accept both io::Write & fmt::Write?
-    let mut s = String::new();
-    {
-        let mut printer = Printer::new(&mut s);
-        printer.print(tp);
-    }
-    writeln!(out, "{}", s);
-}
-
-#[allow(unused_must_use)]
-fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
-    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
-    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
-    writeln!(out);
-    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
-    writeln!(out, "columns: ");
-    for (i, cc) in rg_metadata.columns().iter().enumerate() {
-        writeln!(out);
-        writeln!(out, "column {}:", i);
-        print_dashes(out, 80);
-        print_column_chunk_metadata(out, cc);
-    }
-}
-
-#[allow(unused_must_use)]
-fn print_column_chunk_metadata(
-    out: &mut dyn io::Write,
-    cc_metadata: &ColumnChunkMetaData,
-) {
-    writeln!(out, "column type: {}", cc_metadata.column_type());
-    writeln!(out, "column path: {}", cc_metadata.column_path());
-    let encoding_strs: Vec<_> = cc_metadata
-        .encodings()
-        .iter()
-        .map(|e| format!("{}", e))
-        .collect();
-    writeln!(out, "encodings: {}", encoding_strs.join(" "));
-    let file_path_str = match cc_metadata.file_path() {
-        None => "N/A",
-        Some(ref fp) => *fp,
-    };
-    writeln!(out, "file path: {}", file_path_str);
-    writeln!(out, "file offset: {}", cc_metadata.file_offset());
-    writeln!(out, "num of values: {}", cc_metadata.num_values());
-    writeln!(
-        out,
-        "total compressed size (in bytes): {}",
-        cc_metadata.compressed_size()
-    );
-    writeln!(
-        out,
-        "total uncompressed size (in bytes): {}",
-        cc_metadata.uncompressed_size()
-    );
-    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
-    let index_page_offset_str = match cc_metadata.index_page_offset() {
-        None => "N/A".to_owned(),
-        Some(ipo) => ipo.to_string(),
-    };
-    writeln!(out, "index page offset: {}", index_page_offset_str);
-    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
-        None => "N/A".to_owned(),
-        Some(dpo) => dpo.to_string(),
-    };
-    writeln!(out, "dictionary page offset: {}", dict_page_offset_str);
-    let statistics_str = match cc_metadata.statistics() {
-        None => "N/A".to_owned(),
-        Some(stats) => stats.to_string(),
-    };
-    writeln!(out, "statistics: {}", statistics_str);
-    writeln!(out);
-}
-
-#[allow(unused_must_use)]
-fn print_dashes(out: &mut dyn io::Write, num: i32) {
-    for _ in 0..num {
-        write!(out, "-");
-    }
-    writeln!(out);
-}
-
-const INDENT_WIDTH: i32 = 2;
-
-/// Struct for printing Parquet message type.
-struct Printer<'a> {
-    output: &'a mut dyn fmt::Write,
-    indent: i32,
-}
-
-#[allow(unused_must_use)]
-impl<'a> Printer<'a> {
-    fn new(output: &'a mut dyn fmt::Write) -> Self {
-        Printer { output, indent: 0 }
-    }
-
-    fn print_indent(&mut self) {
-        for _ in 0..self.indent {
-            write!(self.output, " ");
-        }
-    }
-}
-
-#[inline]
-fn print_timeunit(unit: &TimeUnit) -> &str {
-    match unit {
-        TimeUnit::MILLIS(_) => "MILLIS",
-        TimeUnit::MICROS(_) => "MICROS",
-        TimeUnit::NANOS(_) => "NANOS",
-    }
-}
-
-#[inline]
-fn print_logical_and_converted(
-    logical_type: &Option<LogicalType>,
-    converted_type: ConvertedType,
-    precision: i32,
-    scale: i32,
-) -> String {
-    match logical_type {
-        Some(logical_type) => match logical_type {
-            LogicalType::INTEGER(t) => {
-                format!("INTEGER({},{})", t.bit_width, t.is_signed)
-            }
-            LogicalType::DECIMAL(t) => {
-                format!("DECIMAL({},{})", t.precision, t.scale)
-            }
-            LogicalType::TIMESTAMP(t) => {
-                format!(
-                    "TIMESTAMP({},{})",
-                    print_timeunit(&t.unit),
-                    t.is_adjusted_to_u_t_c
-                )
-            }
-            LogicalType::TIME(t) => {
-                format!(
-                    "TIME({},{})",
-                    print_timeunit(&t.unit),
-                    t.is_adjusted_to_u_t_c
-                )
-            }
-            LogicalType::DATE(_) => "DATE".to_string(),
-            LogicalType::BSON(_) => "BSON".to_string(),
-            LogicalType::JSON(_) => "JSON".to_string(),
-            LogicalType::STRING(_) => "STRING".to_string(),
-            LogicalType::UUID(_) => "UUID".to_string(),
-            LogicalType::ENUM(_) => "ENUM".to_string(),
-            LogicalType::LIST(_) => "LIST".to_string(),
-            LogicalType::MAP(_) => "MAP".to_string(),
-            LogicalType::UNKNOWN(_) => "UNKNOWN".to_string(),
-        },
-        None => {
-            // Also print converted type if it is available
-            match converted_type {
-                ConvertedType::NONE => format!(""),
-                decimal @ ConvertedType::DECIMAL => {
-                    // For decimal type we should print precision and scale if they
-                    // are > 0, e.g. DECIMAL(9, 2) -
-                    // DECIMAL(9) - DECIMAL
-                    let precision_scale = match (precision, scale) {
-                        (p, s) if p > 0 && s > 0 => {
-                            format!("{}, {}", p, s)
-                        }
-                        (p, 0) if p > 0 => format!("{}", p),
-                        _ => format!(""),
-                    };
-                    format!("{}{}", decimal, precision_scale)
-                }
-                other_converted_type => {
-                    format!("{}", other_converted_type)
-                }
-            }
-        }
-    }
-}
-
-#[allow(unused_must_use)]
-impl<'a> Printer<'a> {
-    pub fn print(&mut self, tp: &Type) {
-        self.print_indent();
-        match *tp {
-            Type::PrimitiveType {
-                ref basic_info,
-                physical_type,
-                type_length,
-                scale,
-                precision,
-            } => {
-                let phys_type_str = match physical_type {
-                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                        // We need to include length for fixed byte array
-                        format!("{} ({})", physical_type, type_length)
-                    }
-                    _ => format!("{}", physical_type),
-                };
-                // Also print logical type if it is available
-                // If there is a logical type, do not print converted type
-                let logical_type_str = print_logical_and_converted(
-                    &basic_info.logical_type(),
-                    basic_info.converted_type(),
-                    scale,
-                    precision,
-                );
-                if logical_type_str.is_empty() {
-                    write!(
-                        self.output,
-                        "{} {} {};",
-                        basic_info.repetition(),
-                        phys_type_str,
-                        basic_info.name()
-                    );
-                } else {
-                    write!(
-                        self.output,
-                        "{} {} {} ({});",
-                        basic_info.repetition(),
-                        phys_type_str,
-                        basic_info.name(),
-                        logical_type_str
-                    );
-                }
-            }
-            Type::GroupType {
-                ref basic_info,
-                ref fields,
-            } => {
-                if basic_info.has_repetition() {
-                    let r = basic_info.repetition();
-                    write!(self.output, "{} group {} ", r, basic_info.name());
-                    let logical_str = print_logical_and_converted(
-                        &basic_info.logical_type(),
-                        basic_info.converted_type(),
-                        0,
-                        0,
-                    );
-                    if !logical_str.is_empty() {
-                        write!(self.output, "({}) ", logical_str);
-                    }
-                    writeln!(self.output, "{{");
-                } else {
-                    writeln!(self.output, "message {} {{", basic_info.name());
-                }
-
-                self.indent += INDENT_WIDTH;
-                for c in fields {
-                    self.print(&c);
-                    writeln!(self.output);
-                }
-                self.indent -= INDENT_WIDTH;
-                self.print_indent();
-                write!(self.output, "}}");
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::basic::{
-        DateType, DecimalType, IntType, LogicalType, Repetition, TimeType, TimestampType,
-        Type as PhysicalType,
-    };
-    use crate::errors::Result;
-    use crate::schema::{parser::parse_message_type, types::Type};
-
-    fn assert_print_parse_message(message: Type) {
-        let mut s = String::new();
-        {
-            let mut p = Printer::new(&mut s);
-            p.print(&message);
-        }
-        println!("{}", &s);
-        let parsed = parse_message_type(&s).unwrap();
-        assert_eq!(message, parsed);
-    }
-
-    #[test]
-    fn test_print_primitive_type() {
-        let mut s = String::new();
-        {
-            let mut p = Printer::new(&mut s);
-            let field = Type::primitive_type_builder("field", PhysicalType::INT32)
-                .with_repetition(Repetition::REQUIRED)
-                .with_converted_type(ConvertedType::INT_32)
-                .build()
-                .unwrap();
-            p.print(&field);
-        }
-        assert_eq!(&mut s, "REQUIRED INT32 field (INT_32);");
-    }
-
-    #[inline]
-    fn build_primitive_type(
-        name: &str,
-        physical_type: PhysicalType,
-        logical_type: Option<LogicalType>,
-        converted_type: ConvertedType,
-        repetition: Repetition,
-    ) -> Result<Type> {
-        Type::primitive_type_builder(name, physical_type)
-            .with_repetition(repetition)
-            .with_logical_type(logical_type)
-            .with_converted_type(converted_type)
-            .build()
-    }
-
-    #[test]
-    fn test_print_logical_types() {
-        let types_and_strings = vec![
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::INTEGER(IntType {
-                        bit_width: 32,
-                        is_signed: true,
-                    })),
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED INT32 field (INTEGER(32,true));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::INTEGER(IntType {
-                        bit_width: 8,
-                        is_signed: false,
-                    })),
-                    ConvertedType::NONE,
-                    Repetition::OPTIONAL,
-                )
-                .unwrap(),
-                "OPTIONAL INT32 field (INTEGER(8,false));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::INTEGER(IntType {
-                        bit_width: 16,
-                        is_signed: true,
-                    })),
-                    ConvertedType::INT_16,
-                    Repetition::REPEATED,
-                )
-                .unwrap(),
-                "REPEATED INT32 field (INTEGER(16,true));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT64,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::REPEATED,
-                )
-                .unwrap(),
-                "REPEATED INT64 field;",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::FLOAT,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED FLOAT field;",
-            ),
-            (
-                build_primitive_type(
-                    "booleans",
-                    PhysicalType::BOOLEAN,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::OPTIONAL,
-                )
-                .unwrap(),
-                "OPTIONAL BOOLEAN booleans;",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT64,
-                    Some(LogicalType::TIMESTAMP(TimestampType {
-                        is_adjusted_to_u_t_c: true,
-                        unit: TimeUnit::MILLIS(Default::default()),
-                    })),
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::DATE(DateType {})),
-                    ConvertedType::NONE,
-                    Repetition::OPTIONAL,
-                )
-                .unwrap(),
-                "OPTIONAL INT32 field (DATE);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::TIME(TimeType {
-                        unit: TimeUnit::MILLIS(Default::default()),
-                        is_adjusted_to_u_t_c: false,
-                    })),
-                    ConvertedType::TIME_MILLIS,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED INT32 field (TIME(MILLIS,false));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field;",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    None,
-                    ConvertedType::UTF8,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (UTF8);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    Some(LogicalType::JSON(Default::default())),
-                    ConvertedType::JSON,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (JSON);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    Some(LogicalType::BSON(Default::default())),
-                    ConvertedType::BSON,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (BSON);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    Some(LogicalType::STRING(Default::default())),
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (STRING);",
-            ),
-        ];
-
-        types_and_strings.into_iter().for_each(|(field, expected)| {
-            let mut s = String::new();
-            {
-                let mut p = Printer::new(&mut s);
-                p.print(&field);
-            }
-            assert_eq!(&s, expected)
-        });
-    }
-
-    #[inline]
-    fn decimal_length_from_precision(precision: usize) -> i32 {
-        (10.0_f64.powi(precision as i32).log2() / 8.0).ceil() as i32
-    }
-
-    #[test]
-    fn test_print_flba_logical_types() {
-        let types_and_strings = vec![
-            (
-                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    .with_logical_type(None)
-                    .with_converted_type(ConvertedType::INTERVAL)
-                    .with_length(12)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
-            ),
-            (
-                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    .with_logical_type(Some(LogicalType::UUID(Default::default())))
-                    .with_length(16)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
-            ),
-            (
-                Type::primitive_type_builder(
-                    "decimal",
-                    PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                )
-                .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                    precision: 32,
-                    scale: 20,
-                })))
-                .with_precision(32)
-                .with_scale(20)
-                .with_length(decimal_length_from_precision(32))
-                .with_repetition(Repetition::REPEATED)
-                .build()
-                .unwrap(),
-                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
-            ),
-        ];
-
-        types_and_strings.into_iter().for_each(|(field, expected)| {
-            let mut s = String::new();
-            {
-                let mut p = Printer::new(&mut s);
-                p.print(&field);
-            }
-            assert_eq!(&s, expected)
-        });
-    }
-
-    #[test]
-    fn test_print_group_type() {
-        let mut s = String::new();
-        {
-            let mut p = Printer::new(&mut s);
-            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-                .with_repetition(Repetition::REQUIRED)
-                .with_converted_type(ConvertedType::INT_32)
-                .with_id(0)
-                .build();
-            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
-                .with_converted_type(ConvertedType::UTF8)
-                .with_id(1)
-                .build();
-            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
-                .with_logical_type(Some(LogicalType::STRING(Default::default())))
-                .with_id(1)
-                .build();
-            let f4 =
-                Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    .with_repetition(Repetition::REPEATED)
-                    .with_converted_type(ConvertedType::INTERVAL)
-                    .with_length(12)
-                    .with_id(2)
-                    .build();
-            let mut struct_fields = Vec::new();
-            struct_fields.push(Arc::new(f1.unwrap()));
-            struct_fields.push(Arc::new(f2.unwrap()));
-            struct_fields.push(Arc::new(f3.unwrap()));
-            let field = Type::group_type_builder("field")
-                .with_repetition(Repetition::OPTIONAL)
-                .with_fields(&mut struct_fields)
-                .with_id(1)
-                .build()
-                .unwrap();
-            let mut fields = Vec::new();
-            fields.push(Arc::new(field));
-            fields.push(Arc::new(f4.unwrap()));
-            let message = Type::group_type_builder("schema")
-                .with_fields(&mut fields)
-                .with_id(2)
-                .build()
-                .unwrap();
-            p.print(&message);
-        }
-        let expected = "message schema {
-  OPTIONAL group field {
-    REQUIRED INT32 f1 (INT_32);
-    OPTIONAL BYTE_ARRAY f2 (UTF8);
-    OPTIONAL BYTE_ARRAY f3 (STRING);
-  }
-  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
-}";
-        assert_eq!(&mut s, expected);
-    }
-
-    #[test]
-    fn test_print_and_parse_primitive() {
-        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()
-            .unwrap();
-
-        let a1 = Type::group_type_builder("a1")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::LIST(Default::default())))
-            .with_converted_type(ConvertedType::LIST)
-            .with_fields(&mut vec![Arc::new(a2)])
-            .build()
-            .unwrap();
-
-        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-
-        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-
-        let b2 = Type::group_type_builder("b2")
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::NONE)
-            .with_fields(&mut vec![Arc::new(b3), Arc::new(b4)])
-            .build()
-            .unwrap();
-
-        let b1 = Type::group_type_builder("b1")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::LIST(Default::default())))
-            .with_converted_type(ConvertedType::LIST)
-            .with_fields(&mut vec![Arc::new(b2)])
-            .build()
-            .unwrap();
-
-        let a0 = Type::group_type_builder("a0")
-            .with_repetition(Repetition::REQUIRED)
-            .with_fields(&mut vec![Arc::new(a1), Arc::new(b1)])
-            .build()
-            .unwrap();
-
-        let message = Type::group_type_builder("root")
-            .with_fields(&mut vec![Arc::new(a0)])
-            .build()
-            .unwrap();
-
-        assert_print_parse_message(message);
-    }
-
-    #[test]
-    fn test_print_and_parse_nested() {
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_32)
-            .build()
-            .unwrap();
-
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()
-            .unwrap();
-
-        let field = Type::group_type_builder("field")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_fields(&mut vec![Arc::new(f1), Arc::new(f2)])
-            .build()
-            .unwrap();
-
-        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::INTERVAL)
-            .with_length(12)
-            .build()
-            .unwrap();
-
-        let message = Type::group_type_builder("schema")
-            .with_fields(&mut vec![Arc::new(field), Arc::new(f3)])
-            .build()
-            .unwrap();
-
-        assert_print_parse_message(message);
-    }
-
-    #[test]
-    fn test_print_and_parse_decimal() {
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                precision: 9,
-                scale: 2,
-            })))
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(9)
-            .with_scale(2)
-            .build()
-            .unwrap();
-
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                precision: 9,
-                scale: 0,
-            })))
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(9)
-            .with_scale(0)
-            .build()
-            .unwrap();
-
-        let message = Type::group_type_builder("schema")
-            .with_fields(&mut vec![Arc::new(f1), Arc::new(f2)])
-            .build()
-            .unwrap();
-
-        assert_print_parse_message(message);
-    }
-}
diff --git a/rust/parquet/src/schema/types.rs b/rust/parquet/src/schema/types.rs
deleted file mode 100644
index 03b2500a3cd..00000000000
--- a/rust/parquet/src/schema/types.rs
+++ /dev/null
@@ -1,2078 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains structs and methods to build Parquet schema and schema descriptors.
-
-use std::{collections::HashMap, convert::From, fmt, sync::Arc};
-
-use parquet_format::SchemaElement;
-
-use crate::basic::{
-    ConvertedType, LogicalType, Repetition, TimeType, TimeUnit, Type as PhysicalType,
-};
-use crate::errors::{ParquetError, Result};
-
-// ----------------------------------------------------------------------
-// Parquet Type definitions
-
-/// Type alias for `Arc<Type>`.
-pub type TypePtr = Arc<Type>;
-/// Type alias for `Arc<SchemaDescriptor>`.
-pub type SchemaDescPtr = Arc<SchemaDescriptor>;
-/// Type alias for `Arc<ColumnDescriptor>`.
-pub type ColumnDescPtr = Arc<ColumnDescriptor>;
-
-/// Representation of a Parquet type.
-/// Used to describe primitive leaf fields and structs, including top-level schema.
-/// Note that the top-level schema type is represented using `GroupType` whose
-/// repetition is `None`.
-#[derive(Clone, Debug, PartialEq)]
-pub enum Type {
-    PrimitiveType {
-        basic_info: BasicTypeInfo,
-        physical_type: PhysicalType,
-        type_length: i32,
-        scale: i32,
-        precision: i32,
-    },
-    GroupType {
-        basic_info: BasicTypeInfo,
-        fields: Vec<TypePtr>,
-    },
-}
-
-impl Type {
-    /// Creates primitive type builder with provided field name and physical type.
-    pub fn primitive_type_builder(
-        name: &str,
-        physical_type: PhysicalType,
-    ) -> PrimitiveTypeBuilder {
-        PrimitiveTypeBuilder::new(name, physical_type)
-    }
-
-    /// Creates group type builder with provided column name.
-    pub fn group_type_builder(name: &str) -> GroupTypeBuilder {
-        GroupTypeBuilder::new(name)
-    }
-
-    /// Returns [`BasicTypeInfo`] information about the type.
-    pub fn get_basic_info(&self) -> &BasicTypeInfo {
-        match *self {
-            Type::PrimitiveType { ref basic_info, .. } => &basic_info,
-            Type::GroupType { ref basic_info, .. } => &basic_info,
-        }
-    }
-
-    /// Returns this type's field name.
-    pub fn name(&self) -> &str {
-        self.get_basic_info().name()
-    }
-
-    /// Gets the fields from this group type.
-    /// Note that this will panic if called on a non-group type.
-    // TODO: should we return `&[&Type]` here?
-    pub fn get_fields(&self) -> &[TypePtr] {
-        match *self {
-            Type::GroupType { ref fields, .. } => &fields[..],
-            _ => panic!("Cannot call get_fields() on a non-group type"),
-        }
-    }
-
-    /// Gets physical type of this primitive type.
-    /// Note that this will panic if called on a non-primitive type.
-    pub fn get_physical_type(&self) -> PhysicalType {
-        match *self {
-            Type::PrimitiveType {
-                basic_info: _,
-                physical_type,
-                ..
-            } => physical_type,
-            _ => panic!("Cannot call get_physical_type() on a non-primitive type"),
-        }
-    }
-
-    /// Gets precision of this primitive type.
-    /// Note that this will panic if called on a non-primitive type.
-    pub fn get_precision(&self) -> i32 {
-        match *self {
-            Type::PrimitiveType { precision, .. } => precision,
-            _ => panic!("Cannot call get_precision() on non-primitive type"),
-        }
-    }
-
-    /// Gets scale of this primitive type.
-    /// Note that this will panic if called on a non-primitive type.
-    pub fn get_scale(&self) -> i32 {
-        match *self {
-            Type::PrimitiveType { scale, .. } => scale,
-            _ => panic!("Cannot call get_scale() on non-primitive type"),
-        }
-    }
-
-    /// Checks if `sub_type` schema is part of current schema.
-    /// This method can be used to check if projected columns are part of the root schema.
-    pub fn check_contains(&self, sub_type: &Type) -> bool {
-        // Names match, and repetitions match or not set for both
-        let basic_match = self.get_basic_info().name()
-            == sub_type.get_basic_info().name()
-            && (self.is_schema() && sub_type.is_schema()
-                || !self.is_schema()
-                    && !sub_type.is_schema()
-                    && self.get_basic_info().repetition()
-                        == sub_type.get_basic_info().repetition());
-
-        match *self {
-            Type::PrimitiveType { .. } if basic_match && sub_type.is_primitive() => {
-                self.get_physical_type() == sub_type.get_physical_type()
-            }
-            Type::GroupType { .. } if basic_match && sub_type.is_group() => {
-                // build hashmap of name -> TypePtr
-                let mut field_map = HashMap::new();
-                for field in self.get_fields() {
-                    field_map.insert(field.name(), field);
-                }
-
-                for field in sub_type.get_fields() {
-                    if !field_map
-                        .get(field.name())
-                        .map(|tpe| tpe.check_contains(field))
-                        .unwrap_or(false)
-                    {
-                        return false;
-                    }
-                }
-                true
-            }
-            _ => false,
-        }
-    }
-
-    /// Returns `true` if this type is a primitive type, `false` otherwise.
-    pub fn is_primitive(&self) -> bool {
-        matches!(*self, Type::PrimitiveType { .. })
-    }
-
-    /// Returns `true` if this type is a group type, `false` otherwise.
-    pub fn is_group(&self) -> bool {
-        matches!(*self, Type::GroupType { .. })
-    }
-
-    /// Returns `true` if this type is the top-level schema type (message type).
-    pub fn is_schema(&self) -> bool {
-        match *self {
-            Type::GroupType { ref basic_info, .. } => !basic_info.has_repetition(),
-            _ => false,
-        }
-    }
-
-    /// Returns `true` if this type is repeated or optional.
-    /// If this type doesn't have repetition defined, we still treat it as optional.
-    pub fn is_optional(&self) -> bool {
-        self.get_basic_info().has_repetition()
-            && self.get_basic_info().repetition() != Repetition::REQUIRED
-    }
-}
-
-/// A builder for primitive types. All attributes are optional
-/// except the name and physical type.
-/// Note that if not specified explicitly, `Repetition::OPTIONAL` is used.
-pub struct PrimitiveTypeBuilder<'a> {
-    name: &'a str,
-    repetition: Repetition,
-    physical_type: PhysicalType,
-    converted_type: ConvertedType,
-    logical_type: Option<LogicalType>,
-    length: i32,
-    precision: i32,
-    scale: i32,
-    id: Option<i32>,
-}
-
-impl<'a> PrimitiveTypeBuilder<'a> {
-    /// Creates new primitive type builder with provided field name and physical type.
-    pub fn new(name: &'a str, physical_type: PhysicalType) -> Self {
-        Self {
-            name,
-            repetition: Repetition::OPTIONAL,
-            physical_type,
-            converted_type: ConvertedType::NONE,
-            logical_type: None,
-            length: -1,
-            precision: -1,
-            scale: -1,
-            id: None,
-        }
-    }
-
-    /// Sets [`Repetition`](crate::basic::Repetition) for this field and returns itself.
-    pub fn with_repetition(mut self, repetition: Repetition) -> Self {
-        self.repetition = repetition;
-        self
-    }
-
-    /// Sets [`ConvertedType`](crate::basic::ConvertedType) for this field and returns itself.
-    pub fn with_converted_type(mut self, converted_type: ConvertedType) -> Self {
-        self.converted_type = converted_type;
-        self
-    }
-
-    /// Sets [`LogicalType`](crate::basic::LogicalType) for this field and returns itself.
-    /// If only the logical type is populated for a primitive type, the converted type
-    /// will be automatically populated, and can thus be omitted.
-    pub fn with_logical_type(mut self, logical_type: Option<LogicalType>) -> Self {
-        self.logical_type = logical_type;
-        self
-    }
-
-    /// Sets type length and returns itself.
-    /// This is only applied to FIXED_LEN_BYTE_ARRAY and INT96 (INTERVAL) types, because
-    /// they maintain fixed size underlying byte array.
-    /// By default, value is `0`.
-    pub fn with_length(mut self, length: i32) -> Self {
-        self.length = length;
-        self
-    }
-
-    /// Sets precision for Parquet DECIMAL physical type and returns itself.
-    /// By default, it equals to `0` and used only for decimal context.
-    pub fn with_precision(mut self, precision: i32) -> Self {
-        self.precision = precision;
-        self
-    }
-
-    /// Sets scale for Parquet DECIMAL physical type and returns itself.
-    /// By default, it equals to `0` and used only for decimal context.
-    pub fn with_scale(mut self, scale: i32) -> Self {
-        self.scale = scale;
-        self
-    }
-
-    /// Sets optional field id and returns itself.
-    pub fn with_id(mut self, id: i32) -> Self {
-        self.id = Some(id);
-        self
-    }
-
-    /// Creates a new `PrimitiveType` instance from the collected attributes.
-    /// Returns `Err` in case of any building conditions are not met.
-    pub fn build(self) -> Result<Type> {
-        let mut basic_info = BasicTypeInfo {
-            name: String::from(self.name),
-            repetition: Some(self.repetition),
-            converted_type: self.converted_type,
-            logical_type: self.logical_type.clone(),
-            id: self.id,
-        };
-
-        // Check length before logical type, since it is used for logical type validation.
-        if self.physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY && self.length < 0 {
-            return Err(general_err!(
-                "Invalid FIXED_LEN_BYTE_ARRAY length: {}",
-                self.length
-            ));
-        }
-
-        match &self.logical_type {
-            Some(logical_type) => {
-                // If a converted type is populated, check that it is consistent with
-                // its logical type
-                if self.converted_type != ConvertedType::NONE {
-                    if ConvertedType::from(self.logical_type.clone())
-                        != self.converted_type
-                    {
-                        return Err(general_err!(
-                            "Logical type {:?} is imcompatible with converted type {}",
-                            logical_type,
-                            self.converted_type
-                        ));
-                    }
-                } else {
-                    // Populate the converted type for backwards compatibility
-                    basic_info.converted_type = self.logical_type.clone().into();
-                }
-                // Check that logical type and physical type are compatible
-                match (logical_type, self.physical_type) {
-                    (LogicalType::MAP(_), _) | (LogicalType::LIST(_), _) => {
-                        return Err(general_err!(
-                            "{:?} cannot be applied to a primitive type",
-                            logical_type
-                        ));
-                    }
-                    (LogicalType::ENUM(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::DECIMAL(t), _) => {
-                        // Check that scale and precision are consistent with legacy values
-                        if t.scale != self.scale {
-                            return Err(general_err!(
-                                "DECIMAL logical type scale {} must match self.scale {}",
-                                t.scale,
-                                self.scale
-                            ));
-                        }
-                        if t.precision != self.precision {
-                            return Err(general_err!(
-                                "DECIMAL logical type precision {} must match self.precision {}",
-                                t.precision,
-                                self.precision
-                            ));
-                        }
-                        self.check_decimal_precision_scale()?;
-                    }
-                    (LogicalType::DATE(_), PhysicalType::INT32) => {}
-                    (
-                        LogicalType::TIME(TimeType {
-                            unit: TimeUnit::MILLIS(_),
-                            ..
-                        }),
-                        PhysicalType::INT32,
-                    ) => {}
-                    (LogicalType::TIME(t), PhysicalType::INT64) => {
-                        if t.unit == TimeUnit::MILLIS(Default::default()) {
-                            return Err(general_err!(
-                                "Cannot use millisecond unit on INT64 type"
-                            ));
-                        }
-                    }
-                    (LogicalType::TIMESTAMP(_), PhysicalType::INT64) => {}
-                    (LogicalType::INTEGER(t), PhysicalType::INT32)
-                        if t.bit_width <= 32 => {}
-                    (LogicalType::INTEGER(t), PhysicalType::INT64)
-                        if t.bit_width == 64 => {}
-                    // Null type
-                    (LogicalType::UNKNOWN(_), PhysicalType::INT32) => {}
-                    (LogicalType::STRING(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::JSON(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::BSON(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::UUID(_), PhysicalType::FIXED_LEN_BYTE_ARRAY) => {}
-                    (a, b) => {
-                        return Err(general_err!(
-                            "Cannot annotate {:?} from {} fields",
-                            a,
-                            b
-                        ))
-                    }
-                }
-            }
-            None => {}
-        }
-
-        match self.converted_type {
-            ConvertedType::NONE => {}
-            ConvertedType::UTF8 | ConvertedType::BSON | ConvertedType::JSON => {
-                if self.physical_type != PhysicalType::BYTE_ARRAY {
-                    return Err(general_err!(
-                        "{} can only annotate BYTE_ARRAY fields",
-                        self.converted_type
-                    ));
-                }
-            }
-            ConvertedType::DECIMAL => {
-                self.check_decimal_precision_scale()?;
-            }
-            ConvertedType::DATE
-            | ConvertedType::TIME_MILLIS
-            | ConvertedType::UINT_8
-            | ConvertedType::UINT_16
-            | ConvertedType::UINT_32
-            | ConvertedType::INT_8
-            | ConvertedType::INT_16
-            | ConvertedType::INT_32 => {
-                if self.physical_type != PhysicalType::INT32 {
-                    return Err(general_err!(
-                        "{} can only annotate INT32",
-                        self.converted_type
-                    ));
-                }
-            }
-            ConvertedType::TIME_MICROS
-            | ConvertedType::TIMESTAMP_MILLIS
-            | ConvertedType::TIMESTAMP_MICROS
-            | ConvertedType::UINT_64
-            | ConvertedType::INT_64 => {
-                if self.physical_type != PhysicalType::INT64 {
-                    return Err(general_err!(
-                        "{} can only annotate INT64",
-                        self.converted_type
-                    ));
-                }
-            }
-            ConvertedType::INTERVAL => {
-                if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY
-                    || self.length != 12
-                {
-                    return Err(general_err!(
-                        "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
-                    ));
-                }
-            }
-            ConvertedType::ENUM => {
-                if self.physical_type != PhysicalType::BYTE_ARRAY {
-                    return Err(general_err!("ENUM can only annotate BYTE_ARRAY fields"));
-                }
-            }
-            _ => {
-                return Err(general_err!(
-                    "{} cannot be applied to a primitive type",
-                    self.converted_type
-                ));
-            }
-        }
-
-        Ok(Type::PrimitiveType {
-            basic_info,
-            physical_type: self.physical_type,
-            type_length: self.length,
-            scale: self.scale,
-            precision: self.precision,
-        })
-    }
-
-    #[inline]
-    fn check_decimal_precision_scale(&self) -> Result<()> {
-        match self.physical_type {
-            PhysicalType::INT32
-            | PhysicalType::INT64
-            | PhysicalType::BYTE_ARRAY
-            | PhysicalType::FIXED_LEN_BYTE_ARRAY => (),
-            _ => {
-                return Err(general_err!(
-                    "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
-                ));
-            }
-        }
-
-        // Precision is required and must be a non-zero positive integer.
-        if self.precision < 1 {
-            return Err(general_err!(
-                "Invalid DECIMAL precision: {}",
-                self.precision
-            ));
-        }
-
-        // Scale must be zero or a positive integer less than the precision.
-        if self.scale < 0 {
-            return Err(general_err!("Invalid DECIMAL scale: {}", self.scale));
-        }
-
-        if self.scale >= self.precision {
-            return Err(general_err!(
-            "Invalid DECIMAL: scale ({}) cannot be greater than or equal to precision \
-             ({})",
-            self.scale,
-            self.precision
-        ));
-        }
-
-        // Check precision and scale based on physical type limitations.
-        match self.physical_type {
-            PhysicalType::INT32 => {
-                if self.precision > 9 {
-                    return Err(general_err!(
-                        "Cannot represent INT32 as DECIMAL with precision {}",
-                        self.precision
-                    ));
-                }
-            }
-            PhysicalType::INT64 => {
-                if self.precision > 18 {
-                    return Err(general_err!(
-                        "Cannot represent INT64 as DECIMAL with precision {}",
-                        self.precision
-                    ));
-                }
-            }
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                let max_precision =
-                    (2f64.powi(8 * self.length - 1) - 1f64).log10().floor() as i32;
-
-                if self.precision > max_precision {
-                    return Err(general_err!(
-                        "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length {} and \
-                        precision {}. The max precision can only be {}",
-                        self.length,
-                        self.precision,
-                        max_precision
-                    ));
-                }
-            }
-            _ => (), // For BYTE_ARRAY precision is not limited
-        }
-
-        Ok(())
-    }
-}
-
-/// A builder for group types. All attributes are optional except the name.
-/// Note that if not specified explicitly, `None` is used as the repetition of the group,
-/// which means it is a root (message) type.
-pub struct GroupTypeBuilder<'a> {
-    name: &'a str,
-    repetition: Option<Repetition>,
-    converted_type: ConvertedType,
-    logical_type: Option<LogicalType>,
-    fields: Vec<TypePtr>,
-    id: Option<i32>,
-}
-
-impl<'a> GroupTypeBuilder<'a> {
-    /// Creates new group type builder with provided field name.
-    pub fn new(name: &'a str) -> Self {
-        Self {
-            name,
-            repetition: None,
-            converted_type: ConvertedType::NONE,
-            logical_type: None,
-            fields: Vec::new(),
-            id: None,
-        }
-    }
-
-    /// Sets [`Repetition`](crate::basic::Repetition) for this field and returns itself.
-    pub fn with_repetition(mut self, repetition: Repetition) -> Self {
-        self.repetition = Some(repetition);
-        self
-    }
-
-    /// Sets [`ConvertedType`](crate::basic::ConvertedType) for this field and returns itself.
-    pub fn with_converted_type(mut self, converted_type: ConvertedType) -> Self {
-        self.converted_type = converted_type;
-        self
-    }
-
-    /// Sets [`LogicalType`](crate::basic::LogicalType) for this field and returns itself.
-    pub fn with_logical_type(mut self, logical_type: Option<LogicalType>) -> Self {
-        self.logical_type = logical_type;
-        self
-    }
-
-    /// Sets a list of fields that should be child nodes of this field.
-    /// Returns updated self.
-    pub fn with_fields(mut self, fields: &mut Vec<TypePtr>) -> Self {
-        self.fields.append(fields);
-        self
-    }
-
-    /// Sets optional field id and returns itself.
-    pub fn with_id(mut self, id: i32) -> Self {
-        self.id = Some(id);
-        self
-    }
-
-    /// Creates a new `GroupType` instance from the gathered attributes.
-    pub fn build(self) -> Result<Type> {
-        let mut basic_info = BasicTypeInfo {
-            name: String::from(self.name),
-            repetition: self.repetition,
-            converted_type: self.converted_type,
-            logical_type: self.logical_type.clone(),
-            id: self.id,
-        };
-        // Populate the converted type if only the logical type is populated
-        if self.logical_type.is_some() && self.converted_type == ConvertedType::NONE {
-            basic_info.converted_type = self.logical_type.into();
-        }
-        Ok(Type::GroupType {
-            basic_info,
-            fields: self.fields,
-        })
-    }
-}
-
-/// Basic type info. This contains information such as the name of the type,
-/// the repetition level, the logical type and the kind of the type (group, primitive).
-#[derive(Clone, Debug, PartialEq)]
-pub struct BasicTypeInfo {
-    name: String,
-    repetition: Option<Repetition>,
-    converted_type: ConvertedType,
-    logical_type: Option<LogicalType>,
-    id: Option<i32>,
-}
-
-impl BasicTypeInfo {
-    /// Returns field name.
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-
-    /// Returns `true` if type has repetition field set, `false` otherwise.
-    /// This is mostly applied to group type, because primitive type always has
-    /// repetition set.
-    pub fn has_repetition(&self) -> bool {
-        self.repetition.is_some()
-    }
-
-    /// Returns [`Repetition`](crate::basic::Repetition) value for the type.
-    pub fn repetition(&self) -> Repetition {
-        assert!(self.repetition.is_some());
-        self.repetition.unwrap()
-    }
-
-    /// Returns [`ConvertedType`](crate::basic::ConvertedType) value for the type.
-    pub fn converted_type(&self) -> ConvertedType {
-        self.converted_type
-    }
-
-    /// Returns [`LogicalType`](crate::basic::LogicalType) value for the type.
-    pub fn logical_type(&self) -> Option<LogicalType> {
-        // Unlike ConvertedType, LogicalType cannot implement Copy, thus we clone it
-        self.logical_type.clone()
-    }
-
-    /// Returns `true` if id is set, `false` otherwise.
-    pub fn has_id(&self) -> bool {
-        self.id.is_some()
-    }
-
-    /// Returns id value for the type.
-    pub fn id(&self) -> i32 {
-        assert!(self.id.is_some());
-        self.id.unwrap()
-    }
-}
-
-// ----------------------------------------------------------------------
-// Parquet descriptor definitions
-
-/// Represents a path in a nested schema
-#[derive(Clone, PartialEq, Debug, Eq, Hash)]
-pub struct ColumnPath {
-    parts: Vec<String>,
-}
-
-impl ColumnPath {
-    /// Creates new column path from vector of field names.
-    pub fn new(parts: Vec<String>) -> Self {
-        ColumnPath { parts }
-    }
-
-    /// Returns string representation of this column path.
-    /// ```rust
-    /// use parquet::schema::types::ColumnPath;
-    ///
-    /// let path = ColumnPath::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]);
-    /// assert_eq!(&path.string(), "a.b.c");
-    /// ```
-    pub fn string(&self) -> String {
-        self.parts.join(".")
-    }
-
-    /// Appends more components to end of column path.
-    /// ```rust
-    /// use parquet::schema::types::ColumnPath;
-    ///
-    /// let mut path = ColumnPath::new(vec!["a".to_string(), "b".to_string(), "c"
-    /// .to_string()]);
-    /// assert_eq!(&path.string(), "a.b.c");
-    ///
-    /// path.append(vec!["d".to_string(), "e".to_string()]);
-    /// assert_eq!(&path.string(), "a.b.c.d.e");
-    /// ```
-    pub fn append(&mut self, mut tail: Vec<String>) {
-        self.parts.append(&mut tail);
-    }
-
-    pub fn parts(&self) -> &[String] {
-        &self.parts
-    }
-}
-
-impl fmt::Display for ColumnPath {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self.string())
-    }
-}
-
-impl From<Vec<String>> for ColumnPath {
-    fn from(parts: Vec<String>) -> Self {
-        ColumnPath { parts }
-    }
-}
-
-impl<'a> From<&'a str> for ColumnPath {
-    fn from(single_path: &str) -> Self {
-        let s = String::from(single_path);
-        ColumnPath::from(s)
-    }
-}
-
-impl From<String> for ColumnPath {
-    fn from(single_path: String) -> Self {
-        let v = vec![single_path];
-        ColumnPath { parts: v }
-    }
-}
-
-impl AsRef<[String]> for ColumnPath {
-    fn as_ref(&self) -> &[String] {
-        &self.parts
-    }
-}
-
-/// A descriptor for leaf-level primitive columns.
-/// This encapsulates information such as definition and repetition levels and is used to
-/// re-assemble nested data.
-#[derive(Debug, PartialEq)]
-pub struct ColumnDescriptor {
-    // The "leaf" primitive type of this column
-    primitive_type: TypePtr,
-
-    // The maximum definition level for this column
-    max_def_level: i16,
-
-    // The maximum repetition level for this column
-    max_rep_level: i16,
-
-    // The path of this column. For instance, "a.b.c.d".
-    path: ColumnPath,
-}
-
-impl ColumnDescriptor {
-    /// Creates new descriptor for leaf-level column.
-    pub fn new(
-        primitive_type: TypePtr,
-        max_def_level: i16,
-        max_rep_level: i16,
-        path: ColumnPath,
-    ) -> Self {
-        Self {
-            primitive_type,
-            max_def_level,
-            max_rep_level,
-            path,
-        }
-    }
-
-    /// Returns maximum definition level for this column.
-    pub fn max_def_level(&self) -> i16 {
-        self.max_def_level
-    }
-
-    /// Returns maximum repetition level for this column.
-    pub fn max_rep_level(&self) -> i16 {
-        self.max_rep_level
-    }
-
-    /// Returns [`ColumnPath`] for this column.
-    pub fn path(&self) -> &ColumnPath {
-        &self.path
-    }
-
-    /// Returns self type [`Type`](crate::schema::types::Type) for this leaf column.
-    pub fn self_type(&self) -> &Type {
-        self.primitive_type.as_ref()
-    }
-
-    /// Returns self type [`TypePtr`](crate::schema::types::TypePtr)  for this leaf
-    /// column.
-    pub fn self_type_ptr(&self) -> TypePtr {
-        self.primitive_type.clone()
-    }
-
-    /// Returns column name.
-    pub fn name(&self) -> &str {
-        self.primitive_type.name()
-    }
-
-    /// Returns [`ConvertedType`](crate::basic::ConvertedType) for this column.
-    pub fn converted_type(&self) -> ConvertedType {
-        self.primitive_type.get_basic_info().converted_type()
-    }
-
-    /// Returns [`LogicalType`](crate::basic::LogicalType) for this column.
-    pub fn logical_type(&self) -> Option<LogicalType> {
-        self.primitive_type.get_basic_info().logical_type()
-    }
-
-    /// Returns physical type for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn physical_type(&self) -> PhysicalType {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { physical_type, .. } => *physical_type,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-
-    /// Returns type length for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn type_length(&self) -> i32 {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { type_length, .. } => *type_length,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-
-    /// Returns type precision for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn type_precision(&self) -> i32 {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { precision, .. } => *precision,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-
-    /// Returns type scale for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn type_scale(&self) -> i32 {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { scale, .. } => *scale,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-}
-
-/// A schema descriptor. This encapsulates the top-level schemas for all the columns,
-/// as well as all descriptors for all the primitive columns.
-pub struct SchemaDescriptor {
-    // The top-level schema (the "message" type).
-    // This must be a `GroupType` where each field is a root column type in the schema.
-    schema: TypePtr,
-
-    // All the descriptors for primitive columns in this schema, constructed from
-    // `schema` in DFS order.
-    leaves: Vec<ColumnDescPtr>,
-
-    // Mapping from a leaf column's index to the root column type that it
-    // comes from. For instance: the leaf `a.b.c.d` would have a link back to `a`:
-    // -- a  <-----+
-    // -- -- b     |
-    // -- -- -- c  |
-    // -- -- -- -- d
-    leaf_to_base: Vec<TypePtr>,
-}
-
-impl fmt::Debug for SchemaDescriptor {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        // Skip leaves and leaf_to_base as they only a cache information already found in `schema`
-        f.debug_struct("SchemaDescriptor")
-            .field("schema", &self.schema)
-            .finish()
-    }
-}
-
-impl SchemaDescriptor {
-    /// Creates new schema descriptor from Parquet schema.
-    pub fn new(tp: TypePtr) -> Self {
-        assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
-        let mut leaves = vec![];
-        let mut leaf_to_base = Vec::new();
-        for f in tp.get_fields() {
-            let mut path = vec![];
-            build_tree(f, f, 0, 0, &mut leaves, &mut leaf_to_base, &mut path);
-        }
-
-        Self {
-            schema: tp,
-            leaves,
-            leaf_to_base,
-        }
-    }
-
-    /// Returns [`ColumnDescriptor`] for a field position.
-    pub fn column(&self, i: usize) -> ColumnDescPtr {
-        assert!(
-            i < self.leaves.len(),
-            "Index out of bound: {} not in [0, {})",
-            i,
-            self.leaves.len()
-        );
-        self.leaves[i].clone()
-    }
-
-    /// Returns slice of [`ColumnDescriptor`].
-    pub fn columns(&self) -> &[ColumnDescPtr] {
-        &self.leaves
-    }
-
-    /// Returns number of leaf-level columns.
-    pub fn num_columns(&self) -> usize {
-        self.leaves.len()
-    }
-
-    /// Returns column root [`Type`](crate::schema::types::Type) for a field position.
-    pub fn get_column_root(&self, i: usize) -> &Type {
-        let result = self.column_root_of(i);
-        result.as_ref()
-    }
-
-    /// Returns column root [`Type`](crate::schema::types::Type) pointer for a field
-    /// position.
-    pub fn get_column_root_ptr(&self, i: usize) -> TypePtr {
-        let result = self.column_root_of(i);
-        result.clone()
-    }
-
-    fn column_root_of(&self, i: usize) -> &Arc<Type> {
-        assert!(
-            i < self.leaves.len(),
-            "Index out of bound: {} not in [0, {})",
-            i,
-            self.leaves.len()
-        );
-
-        self.leaf_to_base
-            .get(i)
-            .unwrap_or_else(|| panic!("Expected a value for index {} but found None", i))
-    }
-
-    /// Returns schema as [`Type`](crate::schema::types::Type).
-    pub fn root_schema(&self) -> &Type {
-        self.schema.as_ref()
-    }
-
-    pub fn root_schema_ptr(&self) -> TypePtr {
-        self.schema.clone()
-    }
-
-    /// Returns schema name.
-    pub fn name(&self) -> &str {
-        self.schema.name()
-    }
-}
-
-fn build_tree<'a>(
-    tp: &'a TypePtr,
-    base_tp: &TypePtr,
-    mut max_rep_level: i16,
-    mut max_def_level: i16,
-    leaves: &mut Vec<ColumnDescPtr>,
-    leaf_to_base: &mut Vec<TypePtr>,
-    path_so_far: &mut Vec<&'a str>,
-) {
-    assert!(tp.get_basic_info().has_repetition());
-
-    path_so_far.push(tp.name());
-    match tp.get_basic_info().repetition() {
-        Repetition::OPTIONAL => {
-            max_def_level += 1;
-        }
-        Repetition::REPEATED => {
-            max_def_level += 1;
-            max_rep_level += 1;
-        }
-        _ => {}
-    }
-
-    match tp.as_ref() {
-        Type::PrimitiveType { .. } => {
-            let mut path: Vec<String> = vec![];
-            path.extend(path_so_far.iter().copied().map(String::from));
-            leaves.push(Arc::new(ColumnDescriptor::new(
-                tp.clone(),
-                max_def_level,
-                max_rep_level,
-                ColumnPath::new(path),
-            )));
-            leaf_to_base.push(base_tp.clone());
-        }
-        Type::GroupType { ref fields, .. } => {
-            for f in fields {
-                build_tree(
-                    f,
-                    base_tp,
-                    max_rep_level,
-                    max_def_level,
-                    leaves,
-                    leaf_to_base,
-                    path_so_far,
-                );
-                path_so_far.pop();
-            }
-        }
-    }
-}
-
-/// Method to convert from Thrift.
-pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
-    let mut index = 0;
-    let mut schema_nodes = Vec::new();
-    while index < elements.len() {
-        let t = from_thrift_helper(elements, index)?;
-        index = t.0;
-        schema_nodes.push(t.1);
-    }
-    if schema_nodes.len() != 1 {
-        return Err(general_err!(
-            "Expected exactly one root node, but found {}",
-            schema_nodes.len()
-        ));
-    }
-
-    Ok(schema_nodes.remove(0))
-}
-
-/// Constructs a new Type from the `elements`, starting at index `index`.
-/// The first result is the starting index for the next Type after this one. If it is
-/// equal to `elements.len()`, then this Type is the last one.
-/// The second result is the result Type.
-fn from_thrift_helper(
-    elements: &[SchemaElement],
-    index: usize,
-) -> Result<(usize, TypePtr)> {
-    // Whether or not the current node is root (message type).
-    // There is only one message type node in the schema tree.
-    let is_root_node = index == 0;
-
-    if index > elements.len() {
-        return Err(general_err!(
-            "Index out of bound, index = {}, len = {}",
-            index,
-            elements.len()
-        ));
-    }
-    let element = &elements[index];
-    let converted_type = ConvertedType::from(element.converted_type);
-    // LogicalType is only present in v2 Parquet files. ConvertedType is always
-    // populated, regardless of the version of the file (v1 or v2).
-    let logical_type = element
-        .logical_type
-        .as_ref()
-        .map(|value| LogicalType::from(value.clone()));
-    let field_id = elements[index].field_id;
-    match elements[index].num_children {
-        // From parquet-format:
-        //   The children count is used to construct the nested relationship.
-        //   This field is not set when the element is a primitive type
-        // Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we
-        // have to handle this case too.
-        None | Some(0) => {
-            // primitive type
-            if elements[index].repetition_type.is_none() {
-                return Err(general_err!(
-                    "Repetition level must be defined for a primitive type"
-                ));
-            }
-            let repetition = Repetition::from(elements[index].repetition_type.unwrap());
-            let physical_type = PhysicalType::from(elements[index].type_.unwrap());
-            let length = elements[index].type_length.unwrap_or(-1);
-            let scale = elements[index].scale.unwrap_or(-1);
-            let precision = elements[index].precision.unwrap_or(-1);
-            let name = &elements[index].name;
-            let mut builder = Type::primitive_type_builder(name, physical_type)
-                .with_repetition(repetition)
-                .with_converted_type(converted_type)
-                .with_logical_type(logical_type)
-                .with_length(length)
-                .with_precision(precision)
-                .with_scale(scale);
-            if let Some(id) = field_id {
-                builder = builder.with_id(id);
-            }
-            Ok((index + 1, Arc::new(builder.build()?)))
-        }
-        Some(n) => {
-            let repetition = elements[index].repetition_type.map(Repetition::from);
-            let mut fields = vec![];
-            let mut next_index = index + 1;
-            for _ in 0..n {
-                let child_result = from_thrift_helper(elements, next_index as usize)?;
-                next_index = child_result.0;
-                fields.push(child_result.1);
-            }
-
-            let mut builder = Type::group_type_builder(&elements[index].name)
-                .with_converted_type(converted_type)
-                .with_logical_type(logical_type)
-                .with_fields(&mut fields);
-            if let Some(rep) = repetition {
-                // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or
-                // REPEATED for root node.
-                //
-                // We only set repetition for group types that are not top-level message
-                // type. According to parquet-format:
-                //   Root of the schema does not have a repetition_type.
-                //   All other types must have one.
-                if !is_root_node {
-                    builder = builder.with_repetition(rep);
-                }
-            }
-            if let Some(id) = field_id {
-                builder = builder.with_id(id);
-            }
-            Ok((next_index, Arc::new(builder.build().unwrap())))
-        }
-    }
-}
-
-/// Method to convert to Thrift.
-pub fn to_thrift(schema: &Type) -> Result<Vec<SchemaElement>> {
-    if !schema.is_group() {
-        return Err(general_err!("Root schema must be Group type"));
-    }
-    let mut elements: Vec<SchemaElement> = Vec::new();
-    to_thrift_helper(schema, &mut elements);
-    Ok(elements)
-}
-
-/// Constructs list of `SchemaElement` from the schema using depth-first traversal.
-/// Here we assume that schema is always valid and starts with group type.
-fn to_thrift_helper(schema: &Type, elements: &mut Vec<SchemaElement>) {
-    match *schema {
-        Type::PrimitiveType {
-            ref basic_info,
-            physical_type,
-            type_length,
-            scale,
-            precision,
-        } => {
-            let element = SchemaElement {
-                type_: Some(physical_type.into()),
-                type_length: if type_length >= 0 {
-                    Some(type_length)
-                } else {
-                    None
-                },
-                repetition_type: Some(basic_info.repetition().into()),
-                name: basic_info.name().to_owned(),
-                num_children: None,
-                converted_type: basic_info.converted_type().into(),
-                scale: if scale >= 0 { Some(scale) } else { None },
-                precision: if precision >= 0 {
-                    Some(precision)
-                } else {
-                    None
-                },
-                field_id: if basic_info.has_id() {
-                    Some(basic_info.id())
-                } else {
-                    None
-                },
-                logical_type: basic_info.logical_type().map(|value| value.into()),
-            };
-
-            elements.push(element);
-        }
-        Type::GroupType {
-            ref basic_info,
-            ref fields,
-        } => {
-            let repetition = if basic_info.has_repetition() {
-                Some(basic_info.repetition().into())
-            } else {
-                None
-            };
-
-            let element = SchemaElement {
-                type_: None,
-                type_length: None,
-                repetition_type: repetition,
-                name: basic_info.name().to_owned(),
-                num_children: Some(fields.len() as i32),
-                converted_type: basic_info.converted_type().into(),
-                scale: None,
-                precision: None,
-                field_id: if basic_info.has_id() {
-                    Some(basic_info.id())
-                } else {
-                    None
-                },
-                logical_type: basic_info.logical_type().map(|value| value.into()),
-            };
-
-            elements.push(element);
-
-            // Add child elements for a group
-            for field in fields {
-                to_thrift_helper(field, elements);
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::basic::{DecimalType, IntType};
-    use crate::schema::parser::parse_message_type;
-
-    // TODO: add tests for v2 types
-
-    #[test]
-    fn test_primitive_type() {
-        let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: true,
-            })))
-            .with_id(0)
-            .build();
-        assert!(result.is_ok());
-
-        if let Ok(tp) = result {
-            assert!(tp.is_primitive());
-            assert!(!tp.is_group());
-            let basic_info = tp.get_basic_info();
-            assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
-            assert_eq!(
-                basic_info.logical_type(),
-                Some(LogicalType::INTEGER(IntType {
-                    bit_width: 32,
-                    is_signed: true
-                }))
-            );
-            assert_eq!(basic_info.converted_type(), ConvertedType::INT_32);
-            assert_eq!(basic_info.id(), 0);
-            match tp {
-                Type::PrimitiveType { physical_type, .. } => {
-                    assert_eq!(physical_type, PhysicalType::INT32);
-                }
-                _ => panic!(),
-            }
-        }
-
-        // Test illegal inputs with logical type
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REPEATED)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                is_signed: true,
-                bit_width: 8,
-            })))
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot annotate INTEGER(IntType { bit_width: 8, is_signed: true }) from INT64 fields"
-            );
-        }
-
-        // Test illegal inputs with converted type
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::BSON)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: BSON can only annotate BYTE_ARRAY fields"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT96)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(-1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                scale: 32,
-                precision: 12,
-            })))
-            .with_precision(-1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: DECIMAL logical type scale 32 must match self.scale -1"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(-1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid DECIMAL precision: -1"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(0)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid DECIMAL precision: 0"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(format!("{}", e), "Parquet error: Invalid DECIMAL scale: -1");
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(1)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid DECIMAL: scale (2) cannot be greater than or equal to precision (1)"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(18)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot represent INT32 as DECIMAL with precision 18"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(32)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot represent INT64 as DECIMAL with precision 32"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_length(5)
-            .with_precision(12)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length 5 and precision 12. The max precision can only be 11"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::UINT_8)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: UINT_8 can only annotate INT32"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::TIME_MICROS)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: TIME_MICROS can only annotate INT64"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INTERVAL)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INTERVAL)
-            .with_length(1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::ENUM)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: ENUM can only annotate BYTE_ARRAY fields"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::MAP)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: MAP cannot be applied to a primitive type"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_length(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid FIXED_LEN_BYTE_ARRAY length: -1"
-            );
-        }
-    }
-
-    #[test]
-    fn test_group_type() {
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .with_converted_type(ConvertedType::INT_32)
-            .with_id(0)
-            .build();
-        assert!(f1.is_ok());
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
-            .with_converted_type(ConvertedType::UTF8)
-            .with_id(1)
-            .build();
-        assert!(f2.is_ok());
-
-        let mut fields = vec![];
-        fields.push(Arc::new(f1.unwrap()));
-        fields.push(Arc::new(f2.unwrap()));
-
-        let result = Type::group_type_builder("foo")
-            .with_repetition(Repetition::REPEATED)
-            .with_logical_type(Some(LogicalType::LIST(Default::default())))
-            .with_fields(&mut fields)
-            .with_id(1)
-            .build();
-        assert!(result.is_ok());
-
-        let tp = result.unwrap();
-        let basic_info = tp.get_basic_info();
-        assert!(tp.is_group());
-        assert!(!tp.is_primitive());
-        assert_eq!(basic_info.repetition(), Repetition::REPEATED);
-        assert_eq!(
-            basic_info.logical_type(),
-            Some(LogicalType::LIST(Default::default()))
-        );
-        assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
-        assert_eq!(basic_info.id(), 1);
-        assert_eq!(tp.get_fields().len(), 2);
-        assert_eq!(tp.get_fields()[0].name(), "f1");
-        assert_eq!(tp.get_fields()[1].name(), "f2");
-    }
-
-    #[test]
-    fn test_column_descriptor() {
-        let result = test_column_descriptor_helper();
-        assert!(
-            result.is_ok(),
-            "Expected result to be OK but got err:\n {}",
-            result.unwrap_err()
-        );
-    }
-
-    fn test_column_descriptor_helper() -> Result<()> {
-        let tp = Type::primitive_type_builder("name", PhysicalType::BYTE_ARRAY)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()?;
-
-        let descr = ColumnDescriptor::new(Arc::new(tp), 4, 1, ColumnPath::from("name"));
-
-        assert_eq!(descr.path(), &ColumnPath::from("name"));
-        assert_eq!(descr.converted_type(), ConvertedType::UTF8);
-        assert_eq!(descr.physical_type(), PhysicalType::BYTE_ARRAY);
-        assert_eq!(descr.max_def_level(), 4);
-        assert_eq!(descr.max_rep_level(), 1);
-        assert_eq!(descr.name(), "name");
-        assert_eq!(descr.type_length(), -1);
-        assert_eq!(descr.type_precision(), -1);
-        assert_eq!(descr.type_scale(), -1);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_descriptor() {
-        let result = test_schema_descriptor_helper();
-        assert!(
-            result.is_ok(),
-            "Expected result to be OK but got err:\n {}",
-            result.unwrap_err()
-        );
-    }
-
-    // A helper fn to avoid handling the results from type creation
-    fn test_schema_descriptor_helper() -> Result<()> {
-        let mut fields = vec![];
-
-        let inta = Type::primitive_type_builder("a", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_32)
-            .build()?;
-        fields.push(Arc::new(inta));
-        let intb = Type::primitive_type_builder("b", PhysicalType::INT64)
-            .with_converted_type(ConvertedType::INT_64)
-            .build()?;
-        fields.push(Arc::new(intb));
-        let intc = Type::primitive_type_builder("c", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()?;
-        fields.push(Arc::new(intc));
-
-        // 3-level list encoding
-        let item1 = Type::primitive_type_builder("item1", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_64)
-            .build()?;
-        let item2 =
-            Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
-        let item3 = Type::primitive_type_builder("item3", PhysicalType::INT32)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::INT_32)
-            .build()?;
-        let list = Type::group_type_builder("records")
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::LIST)
-            .with_fields(&mut vec![Arc::new(item1), Arc::new(item2), Arc::new(item3)])
-            .build()?;
-        let bag = Type::group_type_builder("bag")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_fields(&mut vec![Arc::new(list)])
-            .build()?;
-        fields.push(Arc::new(bag));
-
-        let schema = Type::group_type_builder("schema")
-            .with_repetition(Repetition::REPEATED)
-            .with_fields(&mut fields)
-            .build()?;
-        let descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let nleaves = 6;
-        assert_eq!(descr.num_columns(), nleaves);
-
-        //                             mdef mrep
-        // required int32 a            0    0
-        // optional int64 b            1    0
-        // repeated byte_array c       1    1
-        // optional group bag          1    0
-        //   repeated group records    2    1
-        //     required int64 item1    2    1
-        //     optional boolean item2  3    1
-        //     repeated int32 item3    3    2
-        let ex_max_def_levels = vec![0, 1, 1, 2, 3, 3];
-        let ex_max_rep_levels = vec![0, 0, 1, 1, 1, 2];
-
-        for i in 0..nleaves {
-            let col = descr.column(i);
-            assert_eq!(col.max_def_level(), ex_max_def_levels[i], "{}", i);
-            assert_eq!(col.max_rep_level(), ex_max_rep_levels[i], "{}", i);
-        }
-
-        assert_eq!(descr.column(0).path().string(), "a");
-        assert_eq!(descr.column(1).path().string(), "b");
-        assert_eq!(descr.column(2).path().string(), "c");
-        assert_eq!(descr.column(3).path().string(), "bag.records.item1");
-        assert_eq!(descr.column(4).path().string(), "bag.records.item2");
-        assert_eq!(descr.column(5).path().string(), "bag.records.item3");
-
-        assert_eq!(descr.get_column_root(0).name(), "a");
-        assert_eq!(descr.get_column_root(3).name(), "bag");
-        assert_eq!(descr.get_column_root(4).name(), "bag");
-        assert_eq!(descr.get_column_root(5).name(), "bag");
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_build_tree_def_rep_levels() {
-        let message_type = "
-    message spark_schema {
-      REQUIRED INT32 a;
-      OPTIONAL group b {
-        OPTIONAL INT32 _1;
-        OPTIONAL INT32 _2;
-      }
-      OPTIONAL group c (LIST) {
-        REPEATED group list {
-          OPTIONAL INT32 element;
-        }
-      }
-    }
-    ";
-        let schema = parse_message_type(message_type).expect("should parse schema");
-        let descr = SchemaDescriptor::new(Arc::new(schema));
-        // required int32 a
-        assert_eq!(descr.column(0).max_def_level(), 0);
-        assert_eq!(descr.column(0).max_rep_level(), 0);
-        // optional int32 b._1
-        assert_eq!(descr.column(1).max_def_level(), 2);
-        assert_eq!(descr.column(1).max_rep_level(), 0);
-        // optional int32 b._2
-        assert_eq!(descr.column(2).max_def_level(), 2);
-        assert_eq!(descr.column(2).max_rep_level(), 0);
-        // repeated optional int32 c.list.element
-        assert_eq!(descr.column(3).max_def_level(), 3);
-        assert_eq!(descr.column(3).max_rep_level(), 1);
-    }
-
-    #[test]
-    #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")]
-    fn test_get_physical_type_panic() {
-        let list = Type::group_type_builder("records")
-            .with_repetition(Repetition::REPEATED)
-            .build()
-            .unwrap();
-        list.get_physical_type();
-    }
-
-    #[test]
-    fn test_get_physical_type_primitive() {
-        let f = Type::primitive_type_builder("f", PhysicalType::INT64)
-            .build()
-            .unwrap();
-        assert_eq!(f.get_physical_type(), PhysicalType::INT64);
-
-        let f = Type::primitive_type_builder("f", PhysicalType::BYTE_ARRAY)
-            .build()
-            .unwrap();
-        assert_eq!(f.get_physical_type(), PhysicalType::BYTE_ARRAY);
-    }
-
-    #[test]
-    fn test_check_contains_primitive_primitive() {
-        // OK
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        assert!(f1.check_contains(&f2));
-
-        // OK: different logical type does not affect check_contains
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_converted_type(ConvertedType::UINT_8)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_converted_type(ConvertedType::UINT_16)
-            .build()
-            .unwrap();
-        assert!(f1.check_contains(&f2));
-
-        // KO: different name
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different type
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different repetition
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-    }
-
-    // function to create a new group type for testing
-    fn test_new_group_type(name: &str, repetition: Repetition, types: Vec<Type>) -> Type {
-        let mut fields = Vec::new();
-        for tpe in types {
-            fields.push(Arc::new(tpe))
-        }
-        Type::group_type_builder(name)
-            .with_repetition(repetition)
-            .with_fields(&mut fields)
-            .build()
-            .unwrap()
-    }
-
-    #[test]
-    fn test_check_contains_group_group() {
-        // OK: should match okay with empty fields
-        let f1 = Type::group_type_builder("f").build().unwrap();
-        let f2 = Type::group_type_builder("f").build().unwrap();
-        assert!(f1.check_contains(&f2));
-
-        // OK: fields match
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        assert!(f1.check_contains(&f2));
-
-        // OK: subset of fields
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f2", PhysicalType::INT64)
-                .build()
-                .unwrap()],
-        );
-        assert!(f1.check_contains(&f2));
-
-        // KO: different name
-        let f1 = Type::group_type_builder("f1").build().unwrap();
-        let f2 = Type::group_type_builder("f2").build().unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different repetition
-        let f1 = Type::group_type_builder("f")
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-        let f2 = Type::group_type_builder("f")
-            .with_repetition(Repetition::REPEATED)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different fields
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::BOOLEAN)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different fields
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f3", PhysicalType::INT32)
-                .build()
-                .unwrap()],
-        );
-        assert!(!f1.check_contains(&f2));
-    }
-
-    #[test]
-    fn test_check_contains_group_primitive() {
-        // KO: should not match
-        let f1 = Type::group_type_builder("f").build().unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-        assert!(!f2.check_contains(&f1));
-
-        // KO: should not match when primitive field is part of group type
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f1", PhysicalType::INT32)
-                .build()
-                .unwrap()],
-        );
-        let f2 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-        assert!(!f2.check_contains(&f1));
-
-        // OK: match nested types
-        let f1 = test_new_group_type(
-            "a",
-            Repetition::REPEATED,
-            vec![
-                test_new_group_type(
-                    "b",
-                    Repetition::REPEATED,
-                    vec![Type::primitive_type_builder("c", PhysicalType::INT32)
-                        .build()
-                        .unwrap()],
-                ),
-                Type::primitive_type_builder("d", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("e", PhysicalType::BOOLEAN)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "a",
-            Repetition::REPEATED,
-            vec![test_new_group_type(
-                "b",
-                Repetition::REPEATED,
-                vec![Type::primitive_type_builder("c", PhysicalType::INT32)
-                    .build()
-                    .unwrap()],
-            )],
-        );
-        assert!(f1.check_contains(&f2)); // should match
-        assert!(!f2.check_contains(&f1)); // should fail
-    }
-
-    #[test]
-    fn test_schema_type_thrift_conversion_err() {
-        let schema = Type::primitive_type_builder("col", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let thrift_schema = to_thrift(&schema);
-        assert!(thrift_schema.is_err());
-        if let Err(e) = thrift_schema {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Root schema must be Group type"
-            );
-        }
-    }
-
-    #[test]
-    fn test_schema_type_thrift_conversion() {
-        let message_type = "
-    message conversions {
-      REQUIRED INT64 id;
-      OPTIONAL group int_array_Array (LIST) {
-        REPEATED group list {
-          OPTIONAL group element (LIST) {
-            REPEATED group list {
-              OPTIONAL INT32 element;
-            }
-          }
-        }
-      }
-      OPTIONAL group int_map (MAP) {
-        REPEATED group map (MAP_KEY_VALUE) {
-          REQUIRED BYTE_ARRAY key (UTF8);
-          OPTIONAL INT32 value;
-        }
-      }
-      OPTIONAL group int_Map_Array (LIST) {
-        REPEATED group list {
-          OPTIONAL group g (MAP) {
-            REPEATED group map (MAP_KEY_VALUE) {
-              REQUIRED BYTE_ARRAY key (UTF8);
-              OPTIONAL group value {
-                OPTIONAL group H {
-                  OPTIONAL group i (LIST) {
-                    REPEATED group list {
-                      OPTIONAL DOUBLE element;
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      OPTIONAL group nested_struct {
-        OPTIONAL INT32 A;
-        OPTIONAL group b (LIST) {
-          REPEATED group list {
-            REQUIRED FIXED_LEN_BYTE_ARRAY (16) element;
-          }
-        }
-      }
-    }
-    ";
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let thrift_schema = to_thrift(&expected_schema).unwrap();
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-
-    #[test]
-    fn test_schema_type_thrift_conversion_decimal() {
-        let message_type = "
-    message decimals {
-      OPTIONAL INT32 field0;
-      OPTIONAL INT64 field1 (DECIMAL (18, 2));
-      OPTIONAL FIXED_LEN_BYTE_ARRAY (16) field2 (DECIMAL (38, 18));
-      OPTIONAL BYTE_ARRAY field3 (DECIMAL (9));
-    }
-    ";
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let thrift_schema = to_thrift(&expected_schema).unwrap();
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-
-    // Tests schema conversion from thrift, when num_children is set to Some(0) for a
-    // primitive type.
-    #[test]
-    fn test_schema_from_thrift_with_num_children_set() {
-        // schema definition written by parquet-cpp version 1.3.2-SNAPSHOT
-        let message_type = "
-    message schema {
-      OPTIONAL BYTE_ARRAY id (UTF8);
-      OPTIONAL BYTE_ARRAY name (UTF8);
-      OPTIONAL BYTE_ARRAY message (UTF8);
-      OPTIONAL INT32 type (UINT_8);
-      OPTIONAL INT64 author_time (TIMESTAMP_MILLIS);
-      OPTIONAL INT64 __index_level_0__;
-    }
-    ";
-
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
-        // Change all of None to Some(0)
-        for mut elem in &mut thrift_schema[..] {
-            if elem.num_children == None {
-                elem.num_children = Some(0);
-            }
-        }
-
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-
-    // Sometimes parquet-cpp sets repetition level for the root node, which is against
-    // the format definition, but we need to handle it by setting it back to None.
-    #[test]
-    fn test_schema_from_thrift_root_has_repetition() {
-        // schema definition written by parquet-cpp version 1.3.2-SNAPSHOT
-        let message_type = "
-    message schema {
-      OPTIONAL BYTE_ARRAY a (UTF8);
-      OPTIONAL INT32 b (UINT_8);
-    }
-    ";
-
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
-        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into());
-
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-}
diff --git a/rust/parquet/src/schema/visitor.rs b/rust/parquet/src/schema/visitor.rs
deleted file mode 100644
index 61bc3be951d..00000000000
--- a/rust/parquet/src/schema/visitor.rs
+++ /dev/null
@@ -1,240 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::basic::{ConvertedType, Repetition};
-use crate::errors::ParquetError::General;
-use crate::errors::Result;
-use crate::schema::types::{Type, TypePtr};
-
-/// A utility trait to help user to traverse against parquet type.
-pub trait TypeVisitor<R, C> {
-    /// Called when a primitive type hit.
-    fn visit_primitive(&mut self, primitive_type: TypePtr, context: C) -> Result<R>;
-
-    /// Default implementation when visiting a list.
-    ///
-    /// It checks list type definition and calls `visit_list_with_item` with extracted
-    /// item type.
-    ///
-    /// To fully understand this algorithm, please refer to
-    /// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md).
-    fn visit_list(&mut self, list_type: TypePtr, context: C) -> Result<R> {
-        match list_type.as_ref() {
-            Type::PrimitiveType { .. } => panic!(
-                "{:?} is a list type and can't be processed as primitive.",
-                list_type
-            ),
-            Type::GroupType {
-                basic_info: _,
-                fields,
-            } if fields.len() == 1 => {
-                let list_item = fields.first().unwrap();
-
-                match list_item.as_ref() {
-                    Type::PrimitiveType { .. } => {
-                        if list_item.get_basic_info().repetition() == Repetition::REPEATED
-                        {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                list_item.clone(),
-                                context,
-                            )
-                        } else {
-                            Err(General(
-                                "Primitive element type of list must be repeated."
-                                    .to_string(),
-                            ))
-                        }
-                    }
-                    Type::GroupType {
-                        basic_info: _,
-                        fields,
-                    } => {
-                        if fields.len() == 1
-                            && list_item.name() != "array"
-                            && list_item.name() != format!("{}_tuple", list_type.name())
-                        {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                fields.first().unwrap().clone(),
-                                context,
-                            )
-                        } else {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                list_item.clone(),
-                                context,
-                            )
-                        }
-                    }
-                }
-            }
-            _ => Err(General(
-                "Group element type of list can only contain one field.".to_string(),
-            )),
-        }
-    }
-
-    /// Called when a struct type hit.
-    fn visit_struct(&mut self, struct_type: TypePtr, context: C) -> Result<R>;
-
-    /// Called when a map type hit.
-    fn visit_map(&mut self, map_type: TypePtr, context: C) -> Result<R>;
-
-    /// A utility method which detects input type and calls corresponding method.
-    fn dispatch(&mut self, cur_type: TypePtr, context: C) -> Result<R> {
-        if cur_type.is_primitive() {
-            self.visit_primitive(cur_type, context)
-        } else {
-            match cur_type.get_basic_info().converted_type() {
-                ConvertedType::LIST => self.visit_list(cur_type, context),
-                ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
-                    self.visit_map(cur_type, context)
-                }
-                _ => self.visit_struct(cur_type, context),
-            }
-        }
-    }
-
-    /// Called by `visit_list`.
-    fn visit_list_with_item(
-        &mut self,
-        list_type: TypePtr,
-        item_type: TypePtr,
-        context: C,
-    ) -> Result<R>;
-}
-
-#[cfg(test)]
-mod tests {
-    use super::TypeVisitor;
-    use crate::basic::Type as PhysicalType;
-    use crate::errors::Result;
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::TypePtr;
-    use std::sync::Arc;
-
-    struct TestVisitorContext {}
-    struct TestVisitor {
-        primitive_visited: bool,
-        struct_visited: bool,
-        list_visited: bool,
-        root_type: TypePtr,
-    }
-
-    impl TypeVisitor<bool, TestVisitorContext> for TestVisitor {
-        fn visit_primitive(
-            &mut self,
-            primitive_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            assert_eq!(
-                self.get_field_by_name(primitive_type.name()).as_ref(),
-                primitive_type.as_ref()
-            );
-            self.primitive_visited = true;
-            Ok(true)
-        }
-
-        fn visit_struct(
-            &mut self,
-            struct_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            assert_eq!(
-                self.get_field_by_name(struct_type.name()).as_ref(),
-                struct_type.as_ref()
-            );
-            self.struct_visited = true;
-            Ok(true)
-        }
-
-        fn visit_map(
-            &mut self,
-            _map_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            unimplemented!()
-        }
-
-        fn visit_list_with_item(
-            &mut self,
-            list_type: TypePtr,
-            item_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            assert_eq!(
-                self.get_field_by_name(list_type.name()).as_ref(),
-                list_type.as_ref()
-            );
-            assert_eq!("element", item_type.name());
-            assert_eq!(PhysicalType::INT32, item_type.get_physical_type());
-            self.list_visited = true;
-            Ok(true)
-        }
-    }
-
-    impl TestVisitor {
-        fn new(root: TypePtr) -> Self {
-            Self {
-                primitive_visited: false,
-                struct_visited: false,
-                list_visited: false,
-                root_type: root,
-            }
-        }
-
-        fn get_field_by_name(&self, name: &str) -> TypePtr {
-            self.root_type
-                .get_fields()
-                .iter()
-                .find(|t| t.name() == name)
-                .cloned()
-                .unwrap()
-        }
-    }
-
-    #[test]
-    fn test_visitor() {
-        let message_type = "
-          message spark_schema {
-            REQUIRED INT32 a;
-            OPTIONAL group inner_schema {
-              REQUIRED INT32 b;
-              REQUIRED DOUBLE c;
-            }
-
-            OPTIONAL group e (LIST) {
-              REPEATED group list {
-                REQUIRED INT32 element;
-              }
-            }
-        ";
-
-        let parquet_type = Arc::new(parse_message_type(&message_type).unwrap());
-
-        let mut visitor = TestVisitor::new(parquet_type.clone());
-        for f in parquet_type.get_fields() {
-            let c = TestVisitorContext {};
-            assert!(visitor.dispatch(f.clone(), c).unwrap());
-        }
-
-        assert!(visitor.struct_visited);
-        assert!(visitor.primitive_visited);
-        assert!(visitor.list_visited);
-    }
-}
diff --git a/rust/parquet/src/util/bit_packing.rs b/rust/parquet/src/util/bit_packing.rs
deleted file mode 100644
index 6b9673f6c30..00000000000
--- a/rust/parquet/src/util/bit_packing.rs
+++ /dev/null
@@ -1,3662 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// Unpack 32 values with bit width `num_bits` from `in_ptr`, and write to `out_ptr`.
-/// Return the `in_ptr` where the starting offset points to the first byte after all the
-/// bytes that were consumed.
-// TODO: may be better to make these more compact using if-else conditions.
-//  However, this may require const generics:
-//     https://github.com/rust-lang/rust/issues/44580
-//  to eliminate the branching cost.
-// TODO: we should use SIMD instructions to further optimize this. I have explored
-//    https://github.com/tantivy-search/bitpacking
-// but the layout it uses for SIMD is different from Parquet.
-// TODO: support packing as well, which is used for encoding.
-pub unsafe fn unpack32(
-    mut in_ptr: *const u32,
-    out_ptr: *mut u32,
-    num_bits: usize,
-) -> *const u32 {
-    in_ptr = match num_bits {
-        0 => nullunpacker32(in_ptr, out_ptr),
-        1 => unpack1_32(in_ptr, out_ptr),
-        2 => unpack2_32(in_ptr, out_ptr),
-        3 => unpack3_32(in_ptr, out_ptr),
-        4 => unpack4_32(in_ptr, out_ptr),
-        5 => unpack5_32(in_ptr, out_ptr),
-        6 => unpack6_32(in_ptr, out_ptr),
-        7 => unpack7_32(in_ptr, out_ptr),
-        8 => unpack8_32(in_ptr, out_ptr),
-        9 => unpack9_32(in_ptr, out_ptr),
-        10 => unpack10_32(in_ptr, out_ptr),
-        11 => unpack11_32(in_ptr, out_ptr),
-        12 => unpack12_32(in_ptr, out_ptr),
-        13 => unpack13_32(in_ptr, out_ptr),
-        14 => unpack14_32(in_ptr, out_ptr),
-        15 => unpack15_32(in_ptr, out_ptr),
-        16 => unpack16_32(in_ptr, out_ptr),
-        17 => unpack17_32(in_ptr, out_ptr),
-        18 => unpack18_32(in_ptr, out_ptr),
-        19 => unpack19_32(in_ptr, out_ptr),
-        20 => unpack20_32(in_ptr, out_ptr),
-        21 => unpack21_32(in_ptr, out_ptr),
-        22 => unpack22_32(in_ptr, out_ptr),
-        23 => unpack23_32(in_ptr, out_ptr),
-        24 => unpack24_32(in_ptr, out_ptr),
-        25 => unpack25_32(in_ptr, out_ptr),
-        26 => unpack26_32(in_ptr, out_ptr),
-        27 => unpack27_32(in_ptr, out_ptr),
-        28 => unpack28_32(in_ptr, out_ptr),
-        29 => unpack29_32(in_ptr, out_ptr),
-        30 => unpack30_32(in_ptr, out_ptr),
-        31 => unpack31_32(in_ptr, out_ptr),
-        32 => unpack32_32(in_ptr, out_ptr),
-        _ => unimplemented!(),
-    };
-    in_ptr
-}
-
-unsafe fn nullunpacker32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    for _ in 0..32 {
-        *out = 0;
-        out = out.offset(1);
-    }
-    in_buf
-}
-
-unsafe fn unpack1_32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 1) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 2) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 3) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 5) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 25) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 27) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 29) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 30) & 1;
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack2_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 2) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 2);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-    *out = (*in_buf) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 2) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 2);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack3_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 3) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 27) % (1u32 << 3);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (3 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 25) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 3);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (3 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 5) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 3);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack4_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack5_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 5) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 25) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (5 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 5);
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (5 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (5 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (5 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack6_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (6 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (6 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (6 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (6 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack7_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (7 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (7 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (7 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (7 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (7 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (7 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack8_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack9_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (9 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (9 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (9 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (9 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (9 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (9 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (9 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (9 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack10_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (10 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (10 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (10 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (10 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (10 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (10 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (10 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (10 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack11_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (11 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (11 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (11 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (11 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (11 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (11 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (11 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (11 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (11 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (11 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack12_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack13_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (13 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (13 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (13 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (13 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (13 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (13 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (13 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (13 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (13 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (13 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (13 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (13 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack14_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (14 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (14 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (14 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (14 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (14 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (14 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (14 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (14 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (14 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (14 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (14 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (14 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack15_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 15);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (15 - 13);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 13) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (15 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (15 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (15 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (15 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (15 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (15 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 15);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (15 - 14);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 14) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (15 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (15 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (15 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (15 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (15 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (15 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 17;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack16_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack17_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (17 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (17 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (17 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (17 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (17 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (17 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (17 - 14);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 14) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (17 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (17 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (17 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (17 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (17 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (17 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (17 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (17 - 13);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 13) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (17 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack18_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (18 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (18 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (18 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (18 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (18 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (18 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (18 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (18 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (18 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (18 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (18 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (18 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (18 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (18 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (18 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (18 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack19_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (19 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (19 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (19 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (19 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (19 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (19 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (19 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (19 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (19 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (19 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (19 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (19 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (19 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (19 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (19 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (19 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (19 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (19 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack20_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack21_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (21 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (21 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (21 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (21 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (21 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (21 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (21 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (21 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (21 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (21 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (21 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (21 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (21 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (21 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (21 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (21 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (21 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (21 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (21 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (21 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack22_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (22 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (22 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (22 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (22 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (22 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (22 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (22 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (22 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (22 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (22 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (22 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (22 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (22 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (22 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (22 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (22 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (22 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (22 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (22 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (22 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack23_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (23 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (23 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (23 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (23 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (23 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (23 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (23 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (23 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (23 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (23 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (23 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (23 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (23 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (23 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (23 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (23 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (23 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (23 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (23 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (23 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (23 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (23 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack24_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack25_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (25 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (25 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (25 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (25 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (25 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (25 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (25 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (25 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (25 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (25 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (25 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (25 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (25 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (25 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (25 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (25 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (25 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (25 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (25 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (25 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (25 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (25 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (25 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (25 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack26_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (26 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (26 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (26 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (26 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (26 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (26 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (26 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (26 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (26 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (26 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (26 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (26 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (26 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (26 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (26 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (26 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (26 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (26 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (26 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (26 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (26 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (26 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (26 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (26 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack27_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (27 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (27 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (27 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (27 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (27 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (27 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (27 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (27 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (27 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (27 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (27 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (27 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (27 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (27 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (27 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (27 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (27 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (27 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (27 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (27 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (27 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 25)) << (27 - 25);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (27 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (27 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (27 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (27 - 5);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 5;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack28_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack29_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 29);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (29 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (29 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (29 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (29 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (29 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (29 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (29 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (29 - 5);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 5;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (29 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 29);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (29 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 25)) << (29 - 25);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (29 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (29 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (29 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (29 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (29 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (29 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (29 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (29 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 29);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 27)) << (29 - 27);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (29 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (29 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (29 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (29 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (29 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (29 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (29 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (29 - 3);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 3;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack30_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 30);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (30 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (30 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (30 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (30 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (30 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (30 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (30 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (30 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (30 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (30 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (30 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (30 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (30 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (30 - 2);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 2;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 30);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (30 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (30 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (30 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (30 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (30 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (30 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (30 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (30 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (30 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (30 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (30 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (30 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (30 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (30 - 2);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 2;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack31_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 31);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 30)) << (31 - 30);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 29)) << (31 - 29);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (31 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 27)) << (31 - 27);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (31 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 25)) << (31 - 25);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (31 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (31 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (31 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (31 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (31 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (31 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (31 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (31 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (31 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (31 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (31 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (31 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (31 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (31 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (31 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (31 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (31 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (31 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (31 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (31 - 5);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 5;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (31 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (31 - 3);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 3;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (31 - 2);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 2;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (31 - 1);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 1;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack32_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-
-    in_buf.offset(1)
-}
diff --git a/rust/parquet/src/util/bit_util.rs b/rust/parquet/src/util/bit_util.rs
deleted file mode 100644
index 677b669287b..00000000000
--- a/rust/parquet/src/util/bit_util.rs
+++ /dev/null
@@ -1,1143 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cmp, mem::size_of};
-
-use crate::data_type::AsBytes;
-use crate::errors::{ParquetError, Result};
-use crate::util::{bit_packing::unpack32, memory::ByteBufferPtr};
-
-#[inline]
-pub fn from_ne_slice<T: FromBytes>(bs: &[u8]) -> T {
-    let mut b = T::Buffer::default();
-    {
-        let b = b.as_mut();
-        let bs = &bs[..b.len()];
-        b.copy_from_slice(bs);
-    }
-    T::from_ne_bytes(b)
-}
-
-pub trait FromBytes: Sized {
-    type Buffer: AsMut<[u8]> + Default;
-    fn from_le_bytes(bs: Self::Buffer) -> Self;
-    fn from_be_bytes(bs: Self::Buffer) -> Self;
-    fn from_ne_bytes(bs: Self::Buffer) -> Self;
-}
-
-macro_rules! from_le_bytes {
-    ($($ty: ty),*) => {
-        $(
-        impl FromBytes for $ty {
-            type Buffer = [u8; size_of::<Self>()];
-            fn from_le_bytes(bs: Self::Buffer) -> Self {
-                <$ty>::from_le_bytes(bs)
-            }
-            fn from_be_bytes(bs: Self::Buffer) -> Self {
-                <$ty>::from_be_bytes(bs)
-            }
-            fn from_ne_bytes(bs: Self::Buffer) -> Self {
-                <$ty>::from_ne_bytes(bs)
-            }
-        }
-        )*
-    };
-}
-
-impl FromBytes for bool {
-    type Buffer = [u8; 1];
-    fn from_le_bytes(bs: Self::Buffer) -> Self {
-        Self::from_ne_bytes(bs)
-    }
-    fn from_be_bytes(bs: Self::Buffer) -> Self {
-        Self::from_ne_bytes(bs)
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        match bs[0] {
-            0 => false,
-            1 => true,
-            _ => panic!("Invalid byte when reading bool"),
-        }
-    }
-}
-
-from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64, f32, f64 }
-
-/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
-/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
-/// compile.
-/// This is copied and modified from byteorder crate.
-macro_rules! read_num_bytes {
-    ($ty:ty, $size:expr, $src:expr) => {{
-        assert!($size <= $src.len());
-        let mut buffer = <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
-        buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
-        <$ty>::from_ne_bytes(buffer)
-    }};
-}
-
-/// Converts value `val` of type `T` to a byte vector, by reading `num_bytes` from `val`.
-/// NOTE: if `val` is less than the size of `T` then it can be truncated.
-#[inline]
-pub fn convert_to_bytes<T>(val: &T, num_bytes: usize) -> Vec<u8>
-where
-    T: ?Sized + AsBytes,
-{
-    let mut bytes: Vec<u8> = vec![0; num_bytes];
-    memcpy_value(val.as_bytes(), num_bytes, &mut bytes);
-    bytes
-}
-
-#[inline]
-pub fn memcpy(source: &[u8], target: &mut [u8]) {
-    assert!(target.len() >= source.len());
-    target[..source.len()].copy_from_slice(source)
-}
-
-#[inline]
-pub fn memcpy_value<T>(source: &T, num_bytes: usize, target: &mut [u8])
-where
-    T: ?Sized + AsBytes,
-{
-    assert!(
-        target.len() >= num_bytes,
-        "Not enough space. Only had {} bytes but need to put {} bytes",
-        target.len(),
-        num_bytes
-    );
-    memcpy(&source.as_bytes()[..num_bytes], target)
-}
-
-/// Returns the ceil of value/divisor
-#[inline]
-pub fn ceil(value: i64, divisor: i64) -> i64 {
-    value / divisor + ((value % divisor != 0) as i64)
-}
-
-/// Returns ceil(log2(x))
-#[inline]
-pub fn log2(mut x: u64) -> i32 {
-    if x == 1 {
-        return 0;
-    }
-    x -= 1;
-    let mut result = 0;
-    while x > 0 {
-        x >>= 1;
-        result += 1;
-    }
-    result
-}
-
-/// Returns the `num_bits` least-significant bits of `v`
-#[inline]
-pub fn trailing_bits(v: u64, num_bits: usize) -> u64 {
-    if num_bits == 0 {
-        return 0;
-    }
-    if num_bits >= 64 {
-        return v;
-    }
-    let n = 64 - num_bits;
-    (v << n) >> n
-}
-
-#[inline]
-pub fn set_array_bit(bits: &mut [u8], i: usize) {
-    bits[i / 8] |= 1 << (i % 8);
-}
-
-#[inline]
-pub fn unset_array_bit(bits: &mut [u8], i: usize) {
-    bits[i / 8] &= !(1 << (i % 8));
-}
-
-/// Returns the minimum number of bits needed to represent the value 'x'
-#[inline]
-pub fn num_required_bits(x: u64) -> usize {
-    for i in (0..64).rev() {
-        if x & (1u64 << i) != 0 {
-            return i + 1;
-        }
-    }
-    0
-}
-
-static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
-
-/// Returns whether bit at position `i` in `data` is set or not
-#[inline]
-pub fn get_bit(data: &[u8], i: usize) -> bool {
-    (data[i >> 3] & BIT_MASK[i & 7]) != 0
-}
-
-/// Utility class for writing bit/byte streams. This class can write data in either
-/// bit packed or byte aligned fashion.
-pub struct BitWriter {
-    buffer: Vec<u8>,
-    max_bytes: usize,
-    buffered_values: u64,
-    byte_offset: usize,
-    bit_offset: usize,
-    start: usize,
-}
-
-impl BitWriter {
-    pub fn new(max_bytes: usize) -> Self {
-        Self {
-            buffer: vec![0; max_bytes],
-            max_bytes,
-            buffered_values: 0,
-            byte_offset: 0,
-            bit_offset: 0,
-            start: 0,
-        }
-    }
-
-    /// Initializes the writer from the existing buffer `buffer` and starting
-    /// offset `start`.
-    pub fn new_from_buf(buffer: Vec<u8>, start: usize) -> Self {
-        assert!(start < buffer.len());
-        let len = buffer.len();
-        Self {
-            buffer,
-            max_bytes: len,
-            buffered_values: 0,
-            byte_offset: start,
-            bit_offset: 0,
-            start,
-        }
-    }
-
-    /// Consumes and returns the current buffer.
-    #[inline]
-    pub fn consume(mut self) -> Vec<u8> {
-        self.flush();
-        self.buffer.truncate(self.byte_offset);
-        self.buffer
-    }
-
-    /// Flushes the internal buffered bits and returns the buffer's content.
-    /// This is a borrow equivalent of `consume` method.
-    #[inline]
-    pub fn flush_buffer(&mut self) -> &[u8] {
-        self.flush();
-        &self.buffer()[0..self.byte_offset]
-    }
-
-    /// Clears the internal state so the buffer can be reused.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.buffered_values = 0;
-        self.byte_offset = self.start;
-        self.bit_offset = 0;
-    }
-
-    /// Flushes the internal buffered bits and the align the buffer to the next byte.
-    #[inline]
-    pub fn flush(&mut self) {
-        let num_bytes = ceil(self.bit_offset as i64, 8) as usize;
-        assert!(self.byte_offset + num_bytes <= self.max_bytes);
-        memcpy_value(
-            &self.buffered_values,
-            num_bytes,
-            &mut self.buffer[self.byte_offset..],
-        );
-        self.buffered_values = 0;
-        self.bit_offset = 0;
-        self.byte_offset += num_bytes;
-    }
-
-    /// Advances the current offset by skipping `num_bytes`, flushing the internal bit
-    /// buffer first.
-    /// This is useful when you want to jump over `num_bytes` bytes and come back later
-    /// to fill these bytes.
-    ///
-    /// Returns error if `num_bytes` is beyond the boundary of the internal buffer.
-    /// Otherwise, returns the old offset.
-    #[inline]
-    pub fn skip(&mut self, num_bytes: usize) -> Result<usize> {
-        self.flush();
-        assert!(self.byte_offset <= self.max_bytes);
-        if self.byte_offset + num_bytes > self.max_bytes {
-            return Err(general_err!(
-                "Not enough bytes left in BitWriter. Need {} but only have {}",
-                self.byte_offset + num_bytes,
-                self.max_bytes
-            ));
-        }
-        let result = self.byte_offset;
-        self.byte_offset += num_bytes;
-        Ok(result)
-    }
-
-    /// Returns a slice containing the next `num_bytes` bytes starting from the current
-    /// offset, and advances the underlying buffer by `num_bytes`.
-    /// This is useful when you want to jump over `num_bytes` bytes and come back later
-    /// to fill these bytes.
-    #[inline]
-    pub fn get_next_byte_ptr(&mut self, num_bytes: usize) -> Result<&mut [u8]> {
-        let offset = self.skip(num_bytes)?;
-        Ok(&mut self.buffer[offset..offset + num_bytes])
-    }
-
-    #[inline]
-    pub fn bytes_written(&self) -> usize {
-        self.byte_offset - self.start + ceil(self.bit_offset as i64, 8) as usize
-    }
-
-    #[inline]
-    pub fn buffer(&self) -> &[u8] {
-        &self.buffer[self.start..]
-    }
-
-    #[inline]
-    pub fn byte_offset(&self) -> usize {
-        self.byte_offset
-    }
-
-    /// Returns the internal buffer length. This is the maximum number of bytes that this
-    /// writer can write. User needs to call `consume` to consume the current buffer
-    /// before more data can be written.
-    #[inline]
-    pub fn buffer_len(&self) -> usize {
-        self.max_bytes
-    }
-
-    pub fn write_at(&mut self, offset: usize, value: u8) {
-        self.buffer[offset] = value;
-    }
-
-    /// Writes the `num_bits` LSB of value `v` to the internal buffer of this writer.
-    /// The `num_bits` must not be greater than 64. This is bit packed.
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_value(&mut self, v: u64, num_bits: usize) -> bool {
-        assert!(num_bits <= 64);
-        assert_eq!(v.checked_shr(num_bits as u32).unwrap_or(0), 0); // covers case v >> 64
-
-        if self.byte_offset * 8 + self.bit_offset + num_bits > self.max_bytes as usize * 8
-        {
-            return false;
-        }
-
-        self.buffered_values |= v << self.bit_offset;
-        self.bit_offset += num_bits;
-        if self.bit_offset >= 64 {
-            memcpy_value(
-                &self.buffered_values,
-                8,
-                &mut self.buffer[self.byte_offset..],
-            );
-            self.byte_offset += 8;
-            self.bit_offset -= 64;
-            self.buffered_values = 0;
-            // Perform checked right shift: v >> offset, where offset < 64, otherwise we
-            // shift all bits
-            self.buffered_values = v
-                .checked_shr((num_bits - self.bit_offset) as u32)
-                .unwrap_or(0);
-        }
-        assert!(self.bit_offset < 64);
-        true
-    }
-
-    /// Writes `val` of `num_bytes` bytes to the next aligned byte. If size of `T` is
-    /// larger than `num_bytes`, extra higher ordered bytes will be ignored.
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_aligned<T: AsBytes>(&mut self, val: T, num_bytes: usize) -> bool {
-        let result = self.get_next_byte_ptr(num_bytes);
-        if result.is_err() {
-            // TODO: should we return `Result` for this func?
-            return false;
-        }
-        let mut ptr = result.unwrap();
-        memcpy_value(&val, num_bytes, &mut ptr);
-        true
-    }
-
-    /// Writes `val` of `num_bytes` bytes at the designated `offset`. The `offset` is the
-    /// offset starting from the beginning of the internal buffer that this writer
-    /// maintains. Note that this will overwrite any existing data between `offset` and
-    /// `offset + num_bytes`. Also that if size of `T` is larger than `num_bytes`, extra
-    /// higher ordered bytes will be ignored.
-    ///
-    /// Returns false if there's not enough room left, or the `pos` is not valid.
-    /// True otherwise.
-    #[inline]
-    pub fn put_aligned_offset<T: AsBytes>(
-        &mut self,
-        val: T,
-        num_bytes: usize,
-        offset: usize,
-    ) -> bool {
-        if num_bytes + offset > self.max_bytes {
-            return false;
-        }
-        memcpy_value(
-            &val,
-            num_bytes,
-            &mut self.buffer[offset..offset + num_bytes],
-        );
-        true
-    }
-
-    /// Writes a VLQ encoded integer `v` to this buffer. The value is byte aligned.
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_vlq_int(&mut self, mut v: u64) -> bool {
-        let mut result = true;
-        while v & 0xFFFFFFFFFFFFFF80 != 0 {
-            result &= self.put_aligned::<u8>(((v & 0x7F) | 0x80) as u8, 1);
-            v >>= 7;
-        }
-        result &= self.put_aligned::<u8>((v & 0x7F) as u8, 1);
-        result
-    }
-
-    /// Writes a zigzag-VLQ encoded (in little endian order) int `v` to this buffer.
-    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive
-    /// numbers are encoded in a zigzag fashion.
-    /// See: https://developers.google.com/protocol-buffers/docs/encoding
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_zigzag_vlq_int(&mut self, v: i64) -> bool {
-        let u: u64 = ((v << 1) ^ (v >> 63)) as u64;
-        self.put_vlq_int(u)
-    }
-}
-
-/// Maximum byte length for a VLQ encoded integer
-/// MAX_VLQ_BYTE_LEN = 5 for i32, and MAX_VLQ_BYTE_LEN = 10 for i64
-pub const MAX_VLQ_BYTE_LEN: usize = 10;
-
-pub struct BitReader {
-    // The byte buffer to read from, passed in by client
-    buffer: ByteBufferPtr,
-
-    // Bytes are memcpy'd from `buffer` and values are read from this variable.
-    // This is faster than reading values byte by byte directly from `buffer`
-    buffered_values: u64,
-
-    //
-    // End                                         Start
-    // |............|B|B|B|B|B|B|B|B|..............|
-    //                   ^          ^
-    //                 bit_offset   byte_offset
-    //
-    // Current byte offset in `buffer`
-    byte_offset: usize,
-
-    // Current bit offset in `buffered_values`
-    bit_offset: usize,
-
-    // Total number of bytes in `buffer`
-    total_bytes: usize,
-}
-
-/// Utility class to read bit/byte stream. This class can read bits or bytes that are
-/// either byte aligned or not.
-impl BitReader {
-    pub fn new(buffer: ByteBufferPtr) -> Self {
-        let total_bytes = buffer.len();
-        let num_bytes = cmp::min(8, total_bytes);
-        let buffered_values = read_num_bytes!(u64, num_bytes, buffer.as_ref());
-        BitReader {
-            buffer,
-            buffered_values,
-            byte_offset: 0,
-            bit_offset: 0,
-            total_bytes,
-        }
-    }
-
-    pub fn reset(&mut self, buffer: ByteBufferPtr) {
-        self.buffer = buffer;
-        self.total_bytes = self.buffer.len();
-        let num_bytes = cmp::min(8, self.total_bytes);
-        self.buffered_values = read_num_bytes!(u64, num_bytes, self.buffer.as_ref());
-        self.byte_offset = 0;
-        self.bit_offset = 0;
-    }
-
-    /// Gets the current byte offset
-    #[inline]
-    pub fn get_byte_offset(&self) -> usize {
-        self.byte_offset + ceil(self.bit_offset as i64, 8) as usize
-    }
-
-    /// Reads a value of type `T` and of size `num_bits`.
-    ///
-    /// Returns `None` if there's not enough data available. `Some` otherwise.
-    pub fn get_value<T: FromBytes>(&mut self, num_bits: usize) -> Option<T> {
-        assert!(num_bits <= 64);
-        assert!(num_bits <= size_of::<T>() * 8);
-
-        if self.byte_offset * 8 + self.bit_offset + num_bits > self.total_bytes * 8 {
-            return None;
-        }
-
-        let mut v = trailing_bits(self.buffered_values, self.bit_offset + num_bits)
-            >> self.bit_offset;
-        self.bit_offset += num_bits;
-
-        if self.bit_offset >= 64 {
-            self.byte_offset += 8;
-            self.bit_offset -= 64;
-
-            self.reload_buffer_values();
-            v |= trailing_bits(self.buffered_values, self.bit_offset)
-                .wrapping_shl((num_bits - self.bit_offset) as u32);
-        }
-
-        // TODO: better to avoid copying here
-        Some(from_ne_slice(v.as_bytes()))
-    }
-
-    pub fn get_batch<T: FromBytes>(&mut self, batch: &mut [T], num_bits: usize) -> usize {
-        assert!(num_bits <= 32);
-        assert!(num_bits <= size_of::<T>() * 8);
-
-        let mut values_to_read = batch.len();
-        let needed_bits = num_bits * values_to_read;
-        let remaining_bits = (self.total_bytes - self.byte_offset) * 8 - self.bit_offset;
-        if remaining_bits < needed_bits {
-            values_to_read = remaining_bits / num_bits;
-        }
-
-        let mut i = 0;
-
-        // First align bit offset to byte offset
-        if self.bit_offset != 0 {
-            while i < values_to_read && self.bit_offset != 0 {
-                batch[i] = self
-                    .get_value(num_bits)
-                    .expect("expected to have more data");
-                i += 1;
-            }
-        }
-
-        unsafe {
-            let in_buf = &self.buffer.data()[self.byte_offset..];
-            let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
-            // FIXME assert!(memory::is_ptr_aligned(in_ptr));
-            if size_of::<T>() == 4 {
-                while values_to_read - i >= 32 {
-                    let out_ptr = &mut batch[i..] as *mut [T] as *mut T as *mut u32;
-                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
-                    self.byte_offset += 4 * num_bits;
-                    i += 32;
-                }
-            } else {
-                let mut out_buf = [0u32; 32];
-                let out_ptr = &mut out_buf as &mut [u32] as *mut [u32] as *mut u32;
-                while values_to_read - i >= 32 {
-                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
-                    self.byte_offset += 4 * num_bits;
-                    for n in 0..32 {
-                        // We need to copy from smaller size to bigger size to avoid
-                        // overwriting other memory regions.
-                        if size_of::<T>() > size_of::<u32>() {
-                            std::ptr::copy_nonoverlapping(
-                                out_buf[n..].as_ptr() as *const u32,
-                                &mut batch[i] as *mut T as *mut u32,
-                                1,
-                            );
-                        } else {
-                            std::ptr::copy_nonoverlapping(
-                                out_buf[n..].as_ptr() as *const T,
-                                &mut batch[i] as *mut T,
-                                1,
-                            );
-                        }
-                        i += 1;
-                    }
-                }
-            }
-        }
-
-        assert!(values_to_read - i < 32);
-
-        self.reload_buffer_values();
-        while i < values_to_read {
-            batch[i] = self
-                .get_value(num_bits)
-                .expect("expected to have more data");
-            i += 1;
-        }
-
-        values_to_read
-    }
-
-    /// Reads a `num_bytes`-sized value from this buffer and return it.
-    /// `T` needs to be a little-endian native type. The value is assumed to be byte
-    /// aligned so the bit reader will be advanced to the start of the next byte before
-    /// reading the value.
-
-    /// Returns `Some` if there's enough bytes left to form a value of `T`.
-    /// Otherwise `None`.
-    pub fn get_aligned<T: FromBytes>(&mut self, num_bytes: usize) -> Option<T> {
-        let bytes_read = ceil(self.bit_offset as i64, 8) as usize;
-        if self.byte_offset + bytes_read + num_bytes > self.total_bytes {
-            return None;
-        }
-
-        // Advance byte_offset to next unread byte and read num_bytes
-        self.byte_offset += bytes_read;
-        let v = read_num_bytes!(
-            T,
-            num_bytes,
-            self.buffer.start_from(self.byte_offset).as_ref()
-        );
-        self.byte_offset += num_bytes;
-
-        // Reset buffered_values
-        self.bit_offset = 0;
-        self.reload_buffer_values();
-        Some(v)
-    }
-
-    /// Reads a VLQ encoded (in little endian order) int from the stream.
-    /// The encoded int must start at the beginning of a byte.
-    ///
-    /// Returns `None` if there's not enough bytes in the stream. `Some` otherwise.
-    pub fn get_vlq_int(&mut self) -> Option<i64> {
-        let mut shift = 0;
-        let mut v: i64 = 0;
-        while let Some(byte) = self.get_aligned::<u8>(1) {
-            v |= ((byte & 0x7F) as i64) << shift;
-            shift += 7;
-            assert!(
-                shift <= MAX_VLQ_BYTE_LEN * 7,
-                "Num of bytes exceed MAX_VLQ_BYTE_LEN ({})",
-                MAX_VLQ_BYTE_LEN
-            );
-            if byte & 0x80 == 0 {
-                return Some(v);
-            }
-        }
-        None
-    }
-
-    /// Reads a zigzag-VLQ encoded (in little endian order) int from the stream
-    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive numbers are
-    /// encoded in a zigzag fashion.
-    /// See: https://developers.google.com/protocol-buffers/docs/encoding
-    ///
-    /// Note: the encoded int must start at the beginning of a byte.
-    ///
-    /// Returns `None` if the number of bytes there's not enough bytes in the stream.
-    /// `Some` otherwise.
-    #[inline]
-    pub fn get_zigzag_vlq_int(&mut self) -> Option<i64> {
-        self.get_vlq_int().map(|v| {
-            let u = v as u64;
-            (u >> 1) as i64 ^ -((u & 1) as i64)
-        })
-    }
-
-    fn reload_buffer_values(&mut self) {
-        let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
-        self.buffered_values = read_num_bytes!(
-            u64,
-            bytes_to_read,
-            self.buffer.start_from(self.byte_offset).as_ref()
-        );
-    }
-}
-
-impl From<Vec<u8>> for BitReader {
-    #[inline]
-    fn from(buffer: Vec<u8>) -> Self {
-        BitReader::new(ByteBufferPtr::new(buffer))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::super::test_common::*;
-    use super::*;
-
-    use rand::distributions::{Distribution, Standard};
-    use std::fmt::Debug;
-
-    #[test]
-    fn test_ceil() {
-        assert_eq!(ceil(0, 1), 0);
-        assert_eq!(ceil(1, 1), 1);
-        assert_eq!(ceil(1, 2), 1);
-        assert_eq!(ceil(1, 8), 1);
-        assert_eq!(ceil(7, 8), 1);
-        assert_eq!(ceil(8, 8), 1);
-        assert_eq!(ceil(9, 8), 2);
-        assert_eq!(ceil(9, 9), 1);
-        assert_eq!(ceil(10000000000, 10), 1000000000);
-        assert_eq!(ceil(10, 10000000000), 1);
-        assert_eq!(ceil(10000000000, 1000000000), 10);
-    }
-
-    #[test]
-    fn test_bit_reader_get_byte_offset() {
-        let buffer = vec![255; 10];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_byte_offset(), 0); // offset (0 bytes, 0 bits)
-        bit_reader.get_value::<i32>(6);
-        assert_eq!(bit_reader.get_byte_offset(), 1); // offset (0 bytes, 6 bits)
-        bit_reader.get_value::<i32>(10);
-        assert_eq!(bit_reader.get_byte_offset(), 2); // offset (0 bytes, 16 bits)
-        bit_reader.get_value::<i32>(20);
-        assert_eq!(bit_reader.get_byte_offset(), 5); // offset (0 bytes, 36 bits)
-        bit_reader.get_value::<i32>(30);
-        assert_eq!(bit_reader.get_byte_offset(), 9); // offset (8 bytes, 2 bits)
-    }
-
-    #[test]
-    fn test_bit_reader_get_value() {
-        let buffer = vec![255, 0];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_value::<i32>(1), Some(1));
-        assert_eq!(bit_reader.get_value::<i32>(2), Some(3));
-        assert_eq!(bit_reader.get_value::<i32>(3), Some(7));
-        assert_eq!(bit_reader.get_value::<i32>(4), Some(3));
-    }
-
-    #[test]
-    fn test_bit_reader_get_value_boundary() {
-        let buffer = vec![10, 0, 0, 0, 20, 0, 30, 0, 0, 0, 40, 0];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_value::<i64>(32), Some(10));
-        assert_eq!(bit_reader.get_value::<i64>(16), Some(20));
-        assert_eq!(bit_reader.get_value::<i64>(32), Some(30));
-        assert_eq!(bit_reader.get_value::<i64>(16), Some(40));
-    }
-
-    #[test]
-    fn test_bit_reader_get_aligned() {
-        // 01110101 11001011
-        let buffer = ByteBufferPtr::new(vec![0x75, 0xCB]);
-        let mut bit_reader = BitReader::new(buffer.all());
-        assert_eq!(bit_reader.get_value::<i32>(3), Some(5));
-        assert_eq!(bit_reader.get_aligned::<i32>(1), Some(203));
-        assert_eq!(bit_reader.get_value::<i32>(1), None);
-        bit_reader.reset(buffer.all());
-        assert_eq!(bit_reader.get_aligned::<i32>(3), None);
-    }
-
-    #[test]
-    fn test_bit_reader_get_vlq_int() {
-        // 10001001 00000001 11110010 10110101 00000110
-        let buffer: Vec<u8> = vec![0x89, 0x01, 0xF2, 0xB5, 0x06];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_vlq_int(), Some(137));
-        assert_eq!(bit_reader.get_vlq_int(), Some(105202));
-    }
-
-    #[test]
-    fn test_bit_reader_get_zigzag_vlq_int() {
-        let buffer: Vec<u8> = vec![0, 1, 2, 3];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(0));
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-1));
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(1));
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-2));
-    }
-
-    #[test]
-    fn test_set_array_bit() {
-        let mut buffer = vec![0, 0, 0];
-        set_array_bit(&mut buffer[..], 1);
-        assert_eq!(buffer, vec![2, 0, 0]);
-        set_array_bit(&mut buffer[..], 4);
-        assert_eq!(buffer, vec![18, 0, 0]);
-        unset_array_bit(&mut buffer[..], 1);
-        assert_eq!(buffer, vec![16, 0, 0]);
-        set_array_bit(&mut buffer[..], 10);
-        assert_eq!(buffer, vec![16, 4, 0]);
-        set_array_bit(&mut buffer[..], 10);
-        assert_eq!(buffer, vec![16, 4, 0]);
-        set_array_bit(&mut buffer[..], 11);
-        assert_eq!(buffer, vec![16, 12, 0]);
-        unset_array_bit(&mut buffer[..], 10);
-        assert_eq!(buffer, vec![16, 8, 0]);
-    }
-
-    #[test]
-    fn test_num_required_bits() {
-        assert_eq!(num_required_bits(0), 0);
-        assert_eq!(num_required_bits(1), 1);
-        assert_eq!(num_required_bits(2), 2);
-        assert_eq!(num_required_bits(4), 3);
-        assert_eq!(num_required_bits(8), 4);
-        assert_eq!(num_required_bits(10), 4);
-        assert_eq!(num_required_bits(12), 4);
-        assert_eq!(num_required_bits(16), 5);
-    }
-
-    #[test]
-    fn test_get_bit() {
-        // 00001101
-        assert_eq!(true, get_bit(&[0b00001101], 0));
-        assert_eq!(false, get_bit(&[0b00001101], 1));
-        assert_eq!(true, get_bit(&[0b00001101], 2));
-        assert_eq!(true, get_bit(&[0b00001101], 3));
-
-        // 01001001 01010010
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 0));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 1));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 2));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 3));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 4));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 5));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 6));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 7));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 8));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 9));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 10));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 11));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 12));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 13));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 14));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 15));
-    }
-
-    #[test]
-    fn test_log2() {
-        assert_eq!(log2(1), 0);
-        assert_eq!(log2(2), 1);
-        assert_eq!(log2(3), 2);
-        assert_eq!(log2(4), 2);
-        assert_eq!(log2(5), 3);
-        assert_eq!(log2(5), 3);
-        assert_eq!(log2(6), 3);
-        assert_eq!(log2(7), 3);
-        assert_eq!(log2(8), 3);
-        assert_eq!(log2(9), 4);
-    }
-
-    #[test]
-    fn test_skip() {
-        let mut writer = BitWriter::new(5);
-        let old_offset = writer.skip(1).expect("skip() should return OK");
-        writer.put_aligned(42, 4);
-        writer.put_aligned_offset(0x10, 1, old_offset);
-        let result = writer.consume();
-        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
-
-        writer = BitWriter::new(4);
-        let result = writer.skip(5);
-        assert!(result.is_err());
-    }
-
-    #[test]
-    fn test_get_next_byte_ptr() {
-        let mut writer = BitWriter::new(5);
-        {
-            let first_byte = writer
-                .get_next_byte_ptr(1)
-                .expect("get_next_byte_ptr() should return OK");
-            first_byte[0] = 0x10;
-        }
-        writer.put_aligned(42, 4);
-        let result = writer.consume();
-        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
-    }
-
-    #[test]
-    fn test_consume_flush_buffer() {
-        let mut writer1 = BitWriter::new(3);
-        let mut writer2 = BitWriter::new(3);
-        for i in 1..10 {
-            writer1.put_value(i, 4);
-            writer2.put_value(i, 4);
-        }
-        let res1 = writer1.flush_buffer();
-        let res2 = writer2.consume();
-        assert_eq!(res1, &res2[..]);
-    }
-
-    #[test]
-    fn test_put_get_bool() {
-        let len = 8;
-        let mut writer = BitWriter::new(len);
-
-        for i in 0..8 {
-            let result = writer.put_value(i % 2, 1);
-            assert!(result);
-        }
-
-        writer.flush();
-        {
-            let buffer = writer.buffer();
-            assert_eq!(buffer[0], 0b10101010);
-        }
-
-        // Write 00110011
-        for i in 0..8 {
-            let result = match i {
-                0 | 1 | 4 | 5 => writer.put_value(false as u64, 1),
-                _ => writer.put_value(true as u64, 1),
-            };
-            assert!(result);
-        }
-        writer.flush();
-        {
-            let buffer = writer.buffer();
-            assert_eq!(buffer[0], 0b10101010);
-            assert_eq!(buffer[1], 0b11001100);
-        }
-
-        let mut reader = BitReader::from(writer.consume());
-
-        for i in 0..8 {
-            let val = reader
-                .get_value::<u8>(1)
-                .expect("get_value() should return OK");
-            assert_eq!(val, i % 2);
-        }
-
-        for i in 0..8 {
-            let val = reader
-                .get_value::<bool>(1)
-                .expect("get_value() should return OK");
-            match i {
-                0 | 1 | 4 | 5 => assert_eq!(val, false),
-                _ => assert_eq!(val, true),
-            }
-        }
-    }
-
-    #[test]
-    fn test_put_value_roundtrip() {
-        test_put_value_rand_numbers(32, 2);
-        test_put_value_rand_numbers(32, 3);
-        test_put_value_rand_numbers(32, 4);
-        test_put_value_rand_numbers(32, 5);
-        test_put_value_rand_numbers(32, 6);
-        test_put_value_rand_numbers(32, 7);
-        test_put_value_rand_numbers(32, 8);
-        test_put_value_rand_numbers(64, 16);
-        test_put_value_rand_numbers(64, 24);
-        test_put_value_rand_numbers(64, 32);
-    }
-
-    fn test_put_value_rand_numbers(total: usize, num_bits: usize) {
-        assert!(num_bits < 64);
-        let num_bytes = ceil(num_bits as i64, 8);
-        let mut writer = BitWriter::new(num_bytes as usize * total);
-        let values: Vec<u64> = random_numbers::<u64>(total)
-            .iter()
-            .map(|v| v & ((1 << num_bits) - 1))
-            .collect();
-        (0..total).for_each(|i| {
-            assert!(
-                writer.put_value(values[i] as u64, num_bits),
-                "[{}]: put_value() failed",
-                i
-            );
-        });
-
-        let mut reader = BitReader::from(writer.consume());
-        (0..total).for_each(|i| {
-            let v = reader
-                .get_value::<u64>(num_bits)
-                .expect("get_value() should return OK");
-            assert_eq!(
-                v, values[i],
-                "[{}]: expected {} but got {}",
-                i, values[i], v
-            );
-        });
-    }
-
-    #[test]
-    fn test_get_batch() {
-        const SIZE: &[usize] = &[1, 31, 32, 33, 128, 129];
-        for s in SIZE {
-            for i in 0..33 {
-                match i {
-                    0..=8 => test_get_batch_helper::<u8>(*s, i),
-                    9..=16 => test_get_batch_helper::<u16>(*s, i),
-                    _ => test_get_batch_helper::<u32>(*s, i),
-                }
-            }
-        }
-    }
-
-    fn test_get_batch_helper<T>(total: usize, num_bits: usize)
-    where
-        T: FromBytes + Default + Clone + Debug + Eq,
-    {
-        assert!(num_bits <= 32);
-        let num_bytes = ceil(num_bits as i64, 8);
-        let mut writer = BitWriter::new(num_bytes as usize * total);
-
-        let values: Vec<u32> = random_numbers::<u32>(total)
-            .iter()
-            .map(|v| v & ((1u64 << num_bits) - 1) as u32)
-            .collect();
-
-        // Generic values used to check against actual values read from `get_batch`.
-        let expected_values: Vec<T> =
-            values.iter().map(|v| from_ne_slice(v.as_bytes())).collect();
-
-        (0..total).for_each(|i| {
-            assert!(writer.put_value(values[i] as u64, num_bits));
-        });
-
-        let buf = writer.consume();
-        let mut reader = BitReader::from(buf);
-        let mut batch = vec![T::default(); values.len()];
-        let values_read = reader.get_batch::<T>(&mut batch, num_bits);
-        assert_eq!(values_read, values.len());
-        for i in 0..batch.len() {
-            assert_eq!(
-                batch[i], expected_values[i],
-                "num_bits = {}, index = {}",
-                num_bits, i
-            );
-        }
-    }
-
-    #[test]
-    fn test_put_aligned_roundtrip() {
-        test_put_aligned_rand_numbers::<u8>(4, 3);
-        test_put_aligned_rand_numbers::<u8>(16, 5);
-        test_put_aligned_rand_numbers::<i16>(32, 7);
-        test_put_aligned_rand_numbers::<i16>(32, 9);
-        test_put_aligned_rand_numbers::<i32>(32, 11);
-        test_put_aligned_rand_numbers::<i32>(32, 13);
-        test_put_aligned_rand_numbers::<i64>(32, 17);
-        test_put_aligned_rand_numbers::<i64>(32, 23);
-    }
-
-    fn test_put_aligned_rand_numbers<T>(total: usize, num_bits: usize)
-    where
-        T: Copy + FromBytes + AsBytes + Debug + PartialEq,
-        Standard: Distribution<T>,
-    {
-        assert!(num_bits <= 32);
-        assert!(total % 2 == 0);
-
-        let aligned_value_byte_width = std::mem::size_of::<T>();
-        let value_byte_width = ceil(num_bits as i64, 8) as usize;
-        let mut writer =
-            BitWriter::new((total / 2) * (aligned_value_byte_width + value_byte_width));
-        let values: Vec<u32> = random_numbers::<u32>(total / 2)
-            .iter()
-            .map(|v| v & ((1 << num_bits) - 1))
-            .collect();
-        let aligned_values = random_numbers::<T>(total / 2);
-
-        for i in 0..total {
-            let j = i / 2;
-            if i % 2 == 0 {
-                assert!(
-                    writer.put_value(values[j] as u64, num_bits),
-                    "[{}]: put_value() failed",
-                    i
-                );
-            } else {
-                assert!(
-                    writer.put_aligned::<T>(aligned_values[j], aligned_value_byte_width),
-                    "[{}]: put_aligned() failed",
-                    i
-                );
-            }
-        }
-
-        let mut reader = BitReader::from(writer.consume());
-        for i in 0..total {
-            let j = i / 2;
-            if i % 2 == 0 {
-                let v = reader
-                    .get_value::<u64>(num_bits)
-                    .expect("get_value() should return OK");
-                assert_eq!(
-                    v, values[j] as u64,
-                    "[{}]: expected {} but got {}",
-                    i, values[j], v
-                );
-            } else {
-                let v = reader
-                    .get_aligned::<T>(aligned_value_byte_width)
-                    .expect("get_aligned() should return OK");
-                assert_eq!(
-                    v, aligned_values[j],
-                    "[{}]: expected {:?} but got {:?}",
-                    i, aligned_values[j], v
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_put_vlq_int() {
-        let total = 64;
-        let mut writer = BitWriter::new(total * 32);
-        let values = random_numbers::<u32>(total);
-        (0..total).for_each(|i| {
-            assert!(
-                writer.put_vlq_int(values[i] as u64),
-                "[{}]; put_vlq_int() failed",
-                i
-            );
-        });
-
-        let mut reader = BitReader::from(writer.consume());
-        (0..total).for_each(|i| {
-            let v = reader
-                .get_vlq_int()
-                .expect("get_vlq_int() should return OK");
-            assert_eq!(
-                v as u32, values[i],
-                "[{}]: expected {} but got {}",
-                i, values[i], v
-            );
-        });
-    }
-
-    #[test]
-    fn test_put_zigzag_vlq_int() {
-        let total = 64;
-        let mut writer = BitWriter::new(total * 32);
-        let values = random_numbers::<i32>(total);
-        (0..total).for_each(|i| {
-            assert!(
-                writer.put_zigzag_vlq_int(values[i] as i64),
-                "[{}]; put_zigzag_vlq_int() failed",
-                i
-            );
-        });
-
-        let mut reader = BitReader::from(writer.consume());
-        (0..total).for_each(|i| {
-            let v = reader
-                .get_zigzag_vlq_int()
-                .expect("get_zigzag_vlq_int() should return OK");
-            assert_eq!(
-                v as i32, values[i],
-                "[{}]: expected {} but got {}",
-                i, values[i], v
-            );
-        });
-    }
-}
diff --git a/rust/parquet/src/util/cursor.rs b/rust/parquet/src/util/cursor.rs
deleted file mode 100644
index bce8383767c..00000000000
--- a/rust/parquet/src/util/cursor.rs
+++ /dev/null
@@ -1,260 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::{self, Cursor, Error, ErrorKind, Read, Seek, SeekFrom, Write};
-use std::sync::{Arc, Mutex};
-use std::{cmp, fmt};
-
-use crate::file::writer::TryClone;
-
-/// This is object to use if your file is already in memory.
-/// The sliceable cursor is similar to std::io::Cursor, except that it makes it easy to create "cursor slices".
-/// To achieve this, it uses Arc instead of shared references. Indeed reference fields are painful
-/// because the lack of Generic Associated Type implies that you would require complex lifetime propagation when
-/// returning such a cursor.
-#[allow(clippy::rc_buffer)]
-pub struct SliceableCursor {
-    inner: Arc<Vec<u8>>,
-    start: u64,
-    length: usize,
-    pos: u64,
-}
-
-impl fmt::Debug for SliceableCursor {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("SliceableCursor")
-            .field("start", &self.start)
-            .field("length", &self.length)
-            .field("pos", &self.pos)
-            .field("inner.len", &self.inner.len())
-            .finish()
-    }
-}
-
-impl SliceableCursor {
-    pub fn new(content: Vec<u8>) -> Self {
-        let size = content.len();
-        SliceableCursor {
-            inner: Arc::new(content),
-            start: 0,
-            pos: 0,
-            length: size,
-        }
-    }
-
-    /// Create a slice cursor using the same data as a current one.
-    pub fn slice(&self, start: u64, length: usize) -> io::Result<Self> {
-        let new_start = self.start + start;
-        if new_start >= self.inner.len() as u64
-            || new_start as usize + length > self.inner.len()
-        {
-            return Err(Error::new(ErrorKind::InvalidInput, "out of bound"));
-        }
-        Ok(SliceableCursor {
-            inner: Arc::clone(&self.inner),
-            start: new_start,
-            pos: new_start,
-            length,
-        })
-    }
-
-    fn remaining_slice(&self) -> &[u8] {
-        let end = self.start as usize + self.length;
-        let offset = cmp::min(self.pos, end as u64) as usize;
-        &self.inner[offset..end]
-    }
-
-    /// Get the length of the current cursor slice
-    pub fn len(&self) -> u64 {
-        self.length as u64
-    }
-
-    /// return true if the cursor is empty (self.len() == 0)
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-
-/// Implementation inspired by std::io::Cursor
-impl Read for SliceableCursor {
-    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
-        let n = Read::read(&mut self.remaining_slice(), buf)?;
-        self.pos += n as u64;
-        Ok(n)
-    }
-}
-
-impl Seek for SliceableCursor {
-    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
-        let new_pos = match pos {
-            SeekFrom::Start(pos) => pos as i64,
-            SeekFrom::End(pos) => self.inner.len() as i64 + pos as i64,
-            SeekFrom::Current(pos) => self.pos as i64 + pos as i64,
-        };
-
-        if new_pos < 0 {
-            Err(Error::new(
-                ErrorKind::InvalidInput,
-                format!(
-                    "Request out of bounds: cur position {} + seek {:?} < 0: {}",
-                    self.pos, pos, new_pos
-                ),
-            ))
-        } else if new_pos >= self.inner.len() as i64 {
-            Err(Error::new(
-                ErrorKind::InvalidInput,
-                format!(
-                    "Request out of bounds: cur position {} + seek {:?} >= length {}: {}",
-                    self.pos,
-                    pos,
-                    self.inner.len(),
-                    new_pos
-                ),
-            ))
-        } else {
-            self.pos = new_pos as u64;
-            Ok(self.start)
-        }
-    }
-}
-
-/// Use this type to write Parquet to memory rather than a file.
-#[derive(Debug, Default, Clone)]
-pub struct InMemoryWriteableCursor {
-    buffer: Arc<Mutex<Cursor<Vec<u8>>>>,
-}
-
-impl InMemoryWriteableCursor {
-    /// Consume this instance and return the underlying buffer as long as there are no other
-    /// references to this instance.
-    pub fn into_inner(self) -> Option<Vec<u8>> {
-        Arc::try_unwrap(self.buffer)
-            .ok()
-            .and_then(|mutex| mutex.into_inner().ok())
-            .map(|cursor| cursor.into_inner())
-    }
-
-    /// Returns a clone of the underlying buffer
-    pub fn data(&self) -> Vec<u8> {
-        let inner = self.buffer.lock().unwrap();
-        inner.get_ref().to_vec()
-    }
-}
-
-impl TryClone for InMemoryWriteableCursor {
-    fn try_clone(&self) -> std::io::Result<Self> {
-        Ok(Self {
-            buffer: self.buffer.clone(),
-        })
-    }
-}
-
-impl Write for InMemoryWriteableCursor {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        let mut inner = self.buffer.lock().unwrap();
-        inner.write(buf)
-    }
-
-    fn flush(&mut self) -> std::io::Result<()> {
-        let mut inner = self.buffer.lock().unwrap();
-        inner.flush()
-    }
-}
-
-impl Seek for InMemoryWriteableCursor {
-    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
-        let mut inner = self.buffer.lock().unwrap();
-        inner.seek(pos)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    /// Create a SliceableCursor of all u8 values in ascending order
-    fn get_u8_range() -> SliceableCursor {
-        let data: Vec<u8> = (0u8..=255).collect();
-        SliceableCursor::new(data)
-    }
-
-    /// Reads all the bytes in the slice and checks that it matches the u8 range from start to end_included
-    fn check_read_all(mut cursor: SliceableCursor, start: u8, end_included: u8) {
-        let mut target = vec![];
-        let cursor_res = cursor.read_to_end(&mut target);
-        println!("{:?}", cursor_res);
-        assert!(!cursor_res.is_err(), "reading error");
-        assert_eq!((end_included - start) as usize + 1, cursor_res.unwrap());
-        assert_eq!((start..=end_included).collect::<Vec<_>>(), target);
-    }
-
-    #[test]
-    fn read_all_whole() {
-        let cursor = get_u8_range();
-        check_read_all(cursor, 0, 255);
-    }
-
-    #[test]
-    fn read_all_slice() {
-        let cursor = get_u8_range().slice(10, 10).expect("error while slicing");
-        check_read_all(cursor, 10, 19);
-    }
-
-    #[test]
-    fn seek_cursor_start() {
-        let mut cursor = get_u8_range();
-
-        cursor.seek(SeekFrom::Start(5)).unwrap();
-        check_read_all(cursor, 5, 255);
-    }
-
-    #[test]
-    fn seek_cursor_current() {
-        let mut cursor = get_u8_range();
-        cursor.seek(SeekFrom::Start(10)).unwrap();
-        cursor.seek(SeekFrom::Current(10)).unwrap();
-        check_read_all(cursor, 20, 255);
-    }
-
-    #[test]
-    fn seek_cursor_end() {
-        let mut cursor = get_u8_range();
-
-        cursor.seek(SeekFrom::End(-10)).unwrap();
-        check_read_all(cursor, 246, 255);
-    }
-
-    #[test]
-    fn seek_cursor_error_too_long() {
-        let mut cursor = get_u8_range();
-        let res = cursor.seek(SeekFrom::Start(1000));
-        let actual_error = res.expect_err("expected error").to_string();
-        let expected_error =
-            "Request out of bounds: cur position 0 + seek Start(1000) >= length 256: 1000";
-        assert_eq!(actual_error, expected_error);
-    }
-
-    #[test]
-    fn seek_cursor_error_too_short() {
-        let mut cursor = get_u8_range();
-        let res = cursor.seek(SeekFrom::End(-1000));
-        let actual_error = res.expect_err("expected error").to_string();
-        let expected_error =
-            "Request out of bounds: cur position 0 + seek End(-1000) < 0: -744";
-        assert_eq!(actual_error, expected_error);
-    }
-}
diff --git a/rust/parquet/src/util/hash_util.rs b/rust/parquet/src/util/hash_util.rs
deleted file mode 100644
index f7849da41a0..00000000000
--- a/rust/parquet/src/util/hash_util.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data_type::AsBytes;
-
-/// Computes hash value for `data`, with a seed value `seed`.
-/// The data type `T` must implement the `AsBytes` trait.
-pub fn hash<T: AsBytes>(data: &T, seed: u32) -> u32 {
-    hash_(data.as_bytes(), seed)
-}
-
-fn hash_(data: &[u8], seed: u32) -> u32 {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    unsafe {
-        if is_x86_feature_detected!("sse4.2") {
-            crc32_hash(data, seed)
-        } else {
-            murmur_hash2_64a(data, seed as u64) as u32
-        }
-    }
-
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
-    unsafe {
-        murmur_hash2_64a(data, seed as u64) as u32
-    }
-}
-
-const MURMUR_PRIME: u64 = 0xc6a4a7935bd1e995;
-const MURMUR_R: i32 = 47;
-
-/// Rust implementation of MurmurHash2, 64-bit version for 64-bit platforms
-///
-/// SAFTETY Only safe on platforms which support unaligned loads (like x86_64)
-unsafe fn murmur_hash2_64a(data_bytes: &[u8], seed: u64) -> u64 {
-    let len = data_bytes.len();
-    let len_64 = (len / 8) * 8;
-    let data_bytes_64 = std::slice::from_raw_parts(
-        &data_bytes[0..len_64] as *const [u8] as *const u64,
-        len / 8,
-    );
-
-    let mut h = seed ^ (MURMUR_PRIME.wrapping_mul(data_bytes.len() as u64));
-    for v in data_bytes_64 {
-        let mut k = *v;
-        k = k.wrapping_mul(MURMUR_PRIME);
-        k ^= k >> MURMUR_R;
-        k = k.wrapping_mul(MURMUR_PRIME);
-        h ^= k;
-        h = h.wrapping_mul(MURMUR_PRIME);
-    }
-
-    let data2 = &data_bytes[len_64..];
-
-    let v = len & 7;
-    if v == 7 {
-        h ^= (data2[6] as u64) << 48;
-    }
-    if v >= 6 {
-        h ^= (data2[5] as u64) << 40;
-    }
-    if v >= 5 {
-        h ^= (data2[4] as u64) << 32;
-    }
-    if v >= 4 {
-        h ^= (data2[3] as u64) << 24;
-    }
-    if v >= 3 {
-        h ^= (data2[2] as u64) << 16;
-    }
-    if v >= 2 {
-        h ^= (data2[1] as u64) << 8;
-    }
-    if v >= 1 {
-        h ^= data2[0] as u64;
-    }
-    if v > 0 {
-        h = h.wrapping_mul(MURMUR_PRIME);
-    }
-
-    h ^= h >> MURMUR_R;
-    h = h.wrapping_mul(MURMUR_PRIME);
-    h ^= h >> MURMUR_R;
-    h
-}
-
-/// CRC32 hash implementation using SSE4 instructions. Borrowed from Impala.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[target_feature(enable = "sse4.2")]
-unsafe fn crc32_hash(bytes: &[u8], seed: u32) -> u32 {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    let u32_num_bytes = std::mem::size_of::<u32>();
-    let mut num_bytes = bytes.len();
-    let num_words = num_bytes / u32_num_bytes;
-    num_bytes %= u32_num_bytes;
-
-    let bytes_u32: &[u32] = std::slice::from_raw_parts(
-        &bytes[0..num_words * u32_num_bytes] as *const [u8] as *const u32,
-        num_words,
-    );
-
-    let mut offset = 0;
-    let mut hash = seed;
-    while offset < num_words {
-        hash = _mm_crc32_u32(hash, bytes_u32[offset]);
-        offset += 1;
-    }
-
-    offset = num_words * u32_num_bytes;
-    while offset < num_bytes {
-        hash = _mm_crc32_u8(hash, bytes[offset]);
-        offset += 1;
-    }
-
-    // The lower half of the CRC hash has poor uniformity, so swap the halves
-    // for anyone who only uses the first several bits of the hash.
-    hash = (hash << 16) | (hash >> 16);
-    hash
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_murmur2_64a() {
-        unsafe {
-            let result = murmur_hash2_64a(b"hello", 123);
-            assert_eq!(result, 2597646618390559622);
-
-            let result = murmur_hash2_64a(b"helloworld", 123);
-            assert_eq!(result, 4934371746140206573);
-
-            let result = murmur_hash2_64a(b"helloworldparquet", 123);
-            assert_eq!(result, 2392198230801491746);
-        }
-    }
-
-    #[test]
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    fn test_crc32() {
-        if is_x86_feature_detected!("sse4.2") {
-            unsafe {
-                let result = crc32_hash(b"hello", 123);
-                assert_eq!(result, 2927487359);
-
-                let result = crc32_hash(b"helloworld", 123);
-                assert_eq!(result, 314229527);
-
-                let result = crc32_hash(b"helloworldparquet", 123);
-                assert_eq!(result, 667078870);
-            }
-        }
-    }
-}
diff --git a/rust/parquet/src/util/io.rs b/rust/parquet/src/util/io.rs
deleted file mode 100644
index 44e99ac0a77..00000000000
--- a/rust/parquet/src/util/io.rs
+++ /dev/null
@@ -1,329 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cell::RefCell, cmp, fmt, io::*};
-
-use crate::file::{reader::Length, writer::ParquetWriter};
-
-const DEFAULT_BUF_SIZE: usize = 8 * 1024;
-
-// ----------------------------------------------------------------------
-
-/// TryClone tries to clone the type and should maintain the `Seek` position of the given
-/// instance.
-pub trait TryClone: Sized {
-    /// Clones the type returning a new instance or an error if it's not possible
-    /// to clone it.
-    fn try_clone(&self) -> Result<Self>;
-}
-
-/// ParquetReader is the interface which needs to be fulfilled to be able to parse a
-/// parquet source.
-pub trait ParquetReader: Read + Seek + Length + TryClone {}
-impl<T: Read + Seek + Length + TryClone> ParquetReader for T {}
-
-// Read/Write wrappers for `File`.
-
-/// Position trait returns the current position in the stream.
-/// Should be viewed as a lighter version of `Seek` that does not allow seek operations,
-/// and does not require mutable reference for the current position.
-pub trait Position {
-    /// Returns position in the stream.
-    fn pos(&self) -> u64;
-}
-
-/// Struct that represents a slice of a file data with independent start position and
-/// length. Internally clones provided file handle, wraps with a custom implementation
-/// of BufReader that resets position before any read.
-///
-/// This is workaround and alternative for `file.try_clone()` method. It clones `File`
-/// while preserving independent position, which is not available with `try_clone()`.
-///
-/// Designed after `arrow::io::RandomAccessFile` and `std::io::BufReader`
-pub struct FileSource<R: ParquetReader> {
-    reader: RefCell<R>,
-    start: u64,     // start position in a file
-    end: u64,       // end position in a file
-    buf: Vec<u8>,   // buffer where bytes read in advance are stored
-    buf_pos: usize, // current position of the reader in the buffer
-    buf_cap: usize, // current number of bytes read into the buffer
-}
-
-impl<R: ParquetReader> fmt::Debug for FileSource<R> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("FileSource")
-            .field("reader", &"OPAQUE")
-            .field("start", &self.start)
-            .field("end", &self.end)
-            .field("buf.len", &self.buf.len())
-            .field("buf_pos", &self.buf_pos)
-            .field("buf_cap", &self.buf_cap)
-            .finish()
-    }
-}
-
-impl<R: ParquetReader> FileSource<R> {
-    /// Creates new file reader with start and length from a file handle
-    pub fn new(fd: &R, start: u64, length: usize) -> Self {
-        let reader = RefCell::new(fd.try_clone().unwrap());
-        Self {
-            reader,
-            start,
-            end: start + length as u64,
-            buf: vec![0_u8; DEFAULT_BUF_SIZE],
-            buf_pos: 0,
-            buf_cap: 0,
-        }
-    }
-
-    fn fill_inner_buf(&mut self) -> Result<&[u8]> {
-        if self.buf_pos >= self.buf_cap {
-            // If we've reached the end of our internal buffer then we need to fetch
-            // some more data from the underlying reader.
-            // Branch using `>=` instead of the more correct `==`
-            // to tell the compiler that the pos..cap slice is always valid.
-            debug_assert!(self.buf_pos == self.buf_cap);
-            let mut reader = self.reader.borrow_mut();
-            reader.seek(SeekFrom::Start(self.start))?; // always seek to start before reading
-            self.buf_cap = reader.read(&mut self.buf)?;
-            self.buf_pos = 0;
-        }
-        Ok(&self.buf[self.buf_pos..self.buf_cap])
-    }
-
-    fn skip_inner_buf(&mut self, buf: &mut [u8]) -> Result<usize> {
-        // discard buffer
-        self.buf_pos = 0;
-        self.buf_cap = 0;
-        // read directly into param buffer
-        let mut reader = self.reader.borrow_mut();
-        reader.seek(SeekFrom::Start(self.start))?; // always seek to start before reading
-        let nread = reader.read(buf)?;
-        self.start += nread as u64;
-        Ok(nread)
-    }
-}
-
-impl<R: ParquetReader> Read for FileSource<R> {
-    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
-        let bytes_to_read = cmp::min(buf.len(), (self.end - self.start) as usize);
-        let buf = &mut buf[0..bytes_to_read];
-
-        // If we don't have any buffered data and we're doing a massive read
-        // (larger than our internal buffer), bypass our internal buffer
-        // entirely.
-        if self.buf_pos == self.buf_cap && buf.len() >= self.buf.len() {
-            return self.skip_inner_buf(buf);
-        }
-        let nread = {
-            let mut rem = self.fill_inner_buf()?;
-            // copy the data from the inner buffer to the param buffer
-            rem.read(buf)?
-        };
-        // consume from buffer
-        self.buf_pos = cmp::min(self.buf_pos + nread, self.buf_cap);
-
-        self.start += nread as u64;
-        Ok(nread)
-    }
-}
-
-impl<R: ParquetReader> Position for FileSource<R> {
-    fn pos(&self) -> u64 {
-        self.start
-    }
-}
-
-impl<R: ParquetReader> Length for FileSource<R> {
-    fn len(&self) -> u64 {
-        self.end - self.start
-    }
-}
-
-/// Struct that represents `File` output stream with position tracking.
-/// Used as a sink in file writer.
-pub struct FileSink<W: ParquetWriter> {
-    buf: BufWriter<W>,
-    // This is not necessarily position in the underlying file,
-    // but rather current position in the sink.
-    pos: u64,
-}
-
-impl<W: ParquetWriter> FileSink<W> {
-    /// Creates new file sink.
-    /// Position is set to whatever position file has.
-    pub fn new(buf: &W) -> Self {
-        let mut owned_buf = buf.try_clone().unwrap();
-        let pos = owned_buf.seek(SeekFrom::Current(0)).unwrap();
-        Self {
-            buf: BufWriter::new(owned_buf),
-            pos,
-        }
-    }
-}
-
-impl<W: ParquetWriter> Write for FileSink<W> {
-    fn write(&mut self, buf: &[u8]) -> Result<usize> {
-        let num_bytes = self.buf.write(buf)?;
-        self.pos += num_bytes as u64;
-        Ok(num_bytes)
-    }
-
-    fn flush(&mut self) -> Result<()> {
-        self.buf.flush()
-    }
-}
-
-impl<W: ParquetWriter> Position for FileSink<W> {
-    fn pos(&self) -> u64 {
-        self.pos
-    }
-}
-
-// Position implementation for Cursor to use in various tests.
-impl<'a> Position for Cursor<&'a mut Vec<u8>> {
-    fn pos(&self) -> u64 {
-        self.position()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::iter;
-
-    use crate::util::test_common::{get_temp_file, get_test_file};
-
-    #[test]
-    fn test_io_read_fully() {
-        let mut buf = vec![0; 8];
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        let bytes_read = src.read(&mut buf[..]).unwrap();
-        assert_eq!(bytes_read, 4);
-        assert_eq!(buf, vec![b'P', b'A', b'R', b'1', 0, 0, 0, 0]);
-    }
-
-    #[test]
-    fn test_io_read_in_chunks() {
-        let mut buf = vec![0; 4];
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        let bytes_read = src.read(&mut buf[0..2]).unwrap();
-        assert_eq!(bytes_read, 2);
-        let bytes_read = src.read(&mut buf[2..]).unwrap();
-        assert_eq!(bytes_read, 2);
-        assert_eq!(buf, vec![b'P', b'A', b'R', b'1']);
-    }
-
-    #[test]
-    fn test_io_read_pos() {
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        let _ = src.read(&mut [0; 1]).unwrap();
-        assert_eq!(src.pos(), 1);
-
-        let _ = src.read(&mut [0; 4]).unwrap();
-        assert_eq!(src.pos(), 4);
-    }
-
-    #[test]
-    fn test_io_read_over_limit() {
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        // Read all bytes from source
-        let _ = src.read(&mut [0; 128]).unwrap();
-        assert_eq!(src.pos(), 4);
-
-        // Try reading again, should return 0 bytes.
-        let bytes_read = src.read(&mut [0; 128]).unwrap();
-        assert_eq!(bytes_read, 0);
-        assert_eq!(src.pos(), 4);
-    }
-
-    #[test]
-    fn test_io_seek_switch() {
-        let mut buf = vec![0; 4];
-        let mut file = get_test_file("alltypes_plain.parquet");
-        let mut src = FileSource::new(&file, 0, 4);
-
-        file.seek(SeekFrom::Start(5_u64))
-            .expect("File seek to a position");
-
-        let bytes_read = src.read(&mut buf[..]).unwrap();
-        assert_eq!(bytes_read, 4);
-        assert_eq!(buf, vec![b'P', b'A', b'R', b'1']);
-    }
-
-    #[test]
-    fn test_io_write_with_pos() {
-        let mut file = get_temp_file("file_sink_test", &[b'a', b'b', b'c']);
-        file.seek(SeekFrom::Current(3)).unwrap();
-
-        // Write into sink
-        let mut sink = FileSink::new(&file);
-        assert_eq!(sink.pos(), 3);
-
-        sink.write_all(&[b'd', b'e', b'f', b'g']).unwrap();
-        assert_eq!(sink.pos(), 7);
-
-        sink.flush().unwrap();
-        assert_eq!(sink.pos(), file.seek(SeekFrom::Current(0)).unwrap());
-
-        // Read data using file chunk
-        let mut res = vec![0u8; 7];
-        let mut chunk =
-            FileSource::new(&file, 0, file.metadata().unwrap().len() as usize);
-        chunk.read_exact(&mut res[..]).unwrap();
-        assert_eq!(res, vec![b'a', b'b', b'c', b'd', b'e', b'f', b'g']);
-    }
-
-    #[test]
-    fn test_io_large_read() {
-        // Generate repeated 'abcdef' pattern and write it into a file
-        let patterned_data: Vec<u8> = iter::repeat(vec![0, 1, 2, 3, 4, 5])
-            .flatten()
-            .take(3 * DEFAULT_BUF_SIZE)
-            .collect();
-        // always use different temp files as test might be run in parallel
-        let mut file = get_temp_file("large_file_sink_test", &patterned_data);
-
-        // seek the underlying file to the first 'd'
-        file.seek(SeekFrom::Start(3)).unwrap();
-
-        // create the FileSource reader that starts at pos 1 ('b')
-        let mut chunk = FileSource::new(&file, 1, patterned_data.len() - 1);
-
-        // read the 'b' at pos 1
-        let mut res = vec![0u8; 1];
-        chunk.read_exact(&mut res).unwrap();
-        assert_eq!(res, &[1]);
-
-        // the underlying file is sought to 'e'
-        file.seek(SeekFrom::Start(4)).unwrap();
-
-        // now read large chunk that starts with 'c' (after 'b')
-        let mut res = vec![0u8; 2 * DEFAULT_BUF_SIZE];
-        chunk.read_exact(&mut res).unwrap();
-        assert_eq!(
-            res,
-            &patterned_data[2..2 + 2 * DEFAULT_BUF_SIZE],
-            "read buf and original data are not equal"
-        );
-    }
-}
diff --git a/rust/parquet/src/util/memory.rs b/rust/parquet/src/util/memory.rs
deleted file mode 100644
index 57d0c243fe6..00000000000
--- a/rust/parquet/src/util/memory.rs
+++ /dev/null
@@ -1,532 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utility methods and structs for working with memory.
-
-use std::{
-    fmt::{Debug, Display, Formatter, Result as FmtResult},
-    io::{Result as IoResult, Write},
-    mem,
-    ops::{Index, IndexMut},
-    sync::{
-        atomic::{AtomicI64, Ordering},
-        Arc, Weak,
-    },
-};
-
-// ----------------------------------------------------------------------
-// Memory Tracker classes
-
-/// Reference counted pointer for [`MemTracker`].
-pub type MemTrackerPtr = Arc<MemTracker>;
-/// Non-owning reference for [`MemTracker`].
-pub type WeakMemTrackerPtr = Weak<MemTracker>;
-
-/// Struct to track memory usage information.
-#[derive(Debug)]
-pub struct MemTracker {
-    // In the tuple, the first element is the current memory allocated (in bytes),
-    // and the second element is the maximum memory allocated so far (in bytes).
-    current_memory_usage: AtomicI64,
-    max_memory_usage: AtomicI64,
-}
-
-impl MemTracker {
-    /// Creates new memory tracker.
-    #[inline]
-    pub fn new() -> MemTracker {
-        MemTracker {
-            current_memory_usage: Default::default(),
-            max_memory_usage: Default::default(),
-        }
-    }
-
-    /// Returns the current memory consumption, in bytes.
-    pub fn memory_usage(&self) -> i64 {
-        self.current_memory_usage.load(Ordering::Acquire)
-    }
-
-    /// Returns the maximum memory consumption so far, in bytes.
-    pub fn max_memory_usage(&self) -> i64 {
-        self.max_memory_usage.load(Ordering::Acquire)
-    }
-
-    /// Adds `num_bytes` to the memory consumption tracked by this memory tracker.
-    #[inline]
-    pub fn alloc(&self, num_bytes: i64) {
-        let new_current = self
-            .current_memory_usage
-            .fetch_add(num_bytes, Ordering::Acquire)
-            + num_bytes;
-        self.max_memory_usage
-            .fetch_max(new_current, Ordering::Acquire);
-    }
-}
-
-// ----------------------------------------------------------------------
-// Buffer classes
-
-/// Type alias for [`Buffer`].
-pub type ByteBuffer = Buffer<u8>;
-/// Type alias for [`BufferPtr`].
-pub type ByteBufferPtr = BufferPtr<u8>;
-
-/// A resize-able buffer class with generic member, with optional memory tracker.
-///
-/// Note that a buffer has two attributes:
-/// `capacity` and `size`: the former is the total number of space reserved for
-/// the buffer, while the latter is the actual number of elements.
-/// Invariant: `capacity` >= `size`.
-/// The total allocated bytes for a buffer equals to `capacity * sizeof<T>()`.
-pub struct Buffer<T: Clone> {
-    data: Vec<T>,
-    mem_tracker: Option<MemTrackerPtr>,
-    type_length: usize,
-}
-
-impl<T: Clone> Buffer<T> {
-    /// Creates new empty buffer.
-    pub fn new() -> Self {
-        Buffer {
-            data: vec![],
-            mem_tracker: None,
-            type_length: std::mem::size_of::<T>(),
-        }
-    }
-
-    /// Adds [`MemTracker`] for this buffer.
-    #[inline]
-    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
-        mc.alloc((self.data.capacity() * self.type_length) as i64);
-        self.mem_tracker = Some(mc);
-        self
-    }
-
-    /// Returns slice of data in this buffer.
-    #[inline]
-    pub fn data(&self) -> &[T] {
-        self.data.as_slice()
-    }
-
-    /// Sets data for this buffer.
-    #[inline]
-    pub fn set_data(&mut self, new_data: Vec<T>) {
-        if let Some(ref mc) = self.mem_tracker {
-            let capacity_diff = new_data.capacity() as i64 - self.data.capacity() as i64;
-            mc.alloc(capacity_diff * self.type_length as i64);
-        }
-        self.data = new_data;
-    }
-
-    /// Resizes underlying data in place to a new length `new_size`.
-    ///
-    /// If `new_size` is less than current length, data is truncated, otherwise, it is
-    /// extended to `new_size` with provided default value `init_value`.
-    ///
-    /// Memory tracker is also updated, if available.
-    #[inline]
-    pub fn resize(&mut self, new_size: usize, init_value: T) {
-        let old_capacity = self.data.capacity();
-        self.data.resize(new_size, init_value);
-        if let Some(ref mc) = self.mem_tracker {
-            let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
-            mc.alloc(capacity_diff * self.type_length as i64);
-        }
-    }
-
-    /// Clears underlying data.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.data.clear()
-    }
-
-    /// Reserves capacity `additional_capacity` for underlying data vector.
-    ///
-    /// Memory tracker is also updated, if available.
-    #[inline]
-    pub fn reserve(&mut self, additional_capacity: usize) {
-        let old_capacity = self.data.capacity();
-        self.data.reserve(additional_capacity);
-        if self.data.capacity() > old_capacity {
-            if let Some(ref mc) = self.mem_tracker {
-                let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
-                mc.alloc(capacity_diff * self.type_length as i64);
-            }
-        }
-    }
-
-    /// Returns [`BufferPtr`] with buffer data.
-    /// Buffer data is reset.
-    #[inline]
-    pub fn consume(&mut self) -> BufferPtr<T> {
-        let old_data = mem::replace(&mut self.data, vec![]);
-        let mut result = BufferPtr::new(old_data);
-        if let Some(ref mc) = self.mem_tracker {
-            result = result.with_mem_tracker(mc.clone());
-        }
-        result
-    }
-
-    /// Adds `value` to the buffer.
-    #[inline]
-    pub fn push(&mut self, value: T) {
-        self.data.push(value)
-    }
-
-    /// Returns current capacity for the buffer.
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        self.data.capacity()
-    }
-
-    /// Returns current size for the buffer.
-    #[inline]
-    pub fn size(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns `true` if memory tracker is added to buffer, `false` otherwise.
-    #[inline]
-    pub fn is_mem_tracked(&self) -> bool {
-        self.mem_tracker.is_some()
-    }
-
-    /// Returns memory tracker associated with this buffer.
-    /// This may panic, if memory tracker is not set, use method above to check if
-    /// memory tracker is available.
-    #[inline]
-    pub fn mem_tracker(&self) -> &MemTrackerPtr {
-        self.mem_tracker.as_ref().unwrap()
-    }
-}
-
-impl<T: Sized + Clone> Index<usize> for Buffer<T> {
-    type Output = T;
-
-    fn index(&self, index: usize) -> &T {
-        &self.data[index]
-    }
-}
-
-impl<T: Sized + Clone> IndexMut<usize> for Buffer<T> {
-    fn index_mut(&mut self, index: usize) -> &mut T {
-        &mut self.data[index]
-    }
-}
-
-// TODO: implement this for other types
-impl Write for Buffer<u8> {
-    #[inline]
-    fn write(&mut self, buf: &[u8]) -> IoResult<usize> {
-        let old_capacity = self.data.capacity();
-        let bytes_written = self.data.write(buf)?;
-        if let Some(ref mc) = self.mem_tracker {
-            if self.data.capacity() - old_capacity > 0 {
-                mc.alloc((self.data.capacity() - old_capacity) as i64)
-            }
-        }
-        Ok(bytes_written)
-    }
-
-    fn flush(&mut self) -> IoResult<()> {
-        // No-op
-        self.data.flush()
-    }
-}
-
-impl AsRef<[u8]> for Buffer<u8> {
-    fn as_ref(&self) -> &[u8] {
-        self.data.as_slice()
-    }
-}
-
-impl<T: Clone> Drop for Buffer<T> {
-    #[inline]
-    fn drop(&mut self) {
-        if let Some(ref mc) = self.mem_tracker {
-            mc.alloc(-((self.data.capacity() * self.type_length) as i64));
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// Immutable Buffer (BufferPtr) classes
-
-/// An representation of a slice on a reference-counting and read-only byte array.
-/// Sub-slices can be further created from this. The byte array will be released
-/// when all slices are dropped.
-#[allow(clippy::rc_buffer)]
-#[derive(Clone, Debug)]
-pub struct BufferPtr<T> {
-    data: Arc<Vec<T>>,
-    start: usize,
-    len: usize,
-    // TODO: will this create too many references? rethink about this.
-    mem_tracker: Option<MemTrackerPtr>,
-}
-
-impl<T> BufferPtr<T> {
-    /// Creates new buffer from a vector.
-    pub fn new(v: Vec<T>) -> Self {
-        let len = v.len();
-        Self {
-            data: Arc::new(v),
-            start: 0,
-            len,
-            mem_tracker: None,
-        }
-    }
-
-    /// Returns slice of data in this buffer.
-    pub fn data(&self) -> &[T] {
-        &self.data[self.start..self.start + self.len]
-    }
-
-    /// Updates this buffer with new `start` position and length `len`.
-    ///
-    /// Range should be within current start position and length.
-    pub fn with_range(mut self, start: usize, len: usize) -> Self {
-        assert!(start <= self.len);
-        assert!(start + len <= self.len);
-        self.start = start;
-        self.len = len;
-        self
-    }
-
-    /// Adds memory tracker to this buffer.
-    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
-        self.mem_tracker = Some(mc);
-        self
-    }
-
-    /// Returns start position of this buffer.
-    pub fn start(&self) -> usize {
-        self.start
-    }
-
-    /// Returns length of this buffer
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether this buffer is empty
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns `true` if this buffer has memory tracker, `false` otherwise.
-    pub fn is_mem_tracked(&self) -> bool {
-        self.mem_tracker.is_some()
-    }
-
-    /// Returns a shallow copy of the buffer.
-    /// Reference counted pointer to the data is copied.
-    pub fn all(&self) -> BufferPtr<T> {
-        BufferPtr {
-            data: self.data.clone(),
-            start: self.start,
-            len: self.len,
-            mem_tracker: self.mem_tracker.as_ref().cloned(),
-        }
-    }
-
-    /// Returns a shallow copy of the buffer that starts with `start` position.
-    pub fn start_from(&self, start: usize) -> BufferPtr<T> {
-        assert!(start <= self.len);
-        BufferPtr {
-            data: self.data.clone(),
-            start: self.start + start,
-            len: self.len - start,
-            mem_tracker: self.mem_tracker.as_ref().cloned(),
-        }
-    }
-
-    /// Returns a shallow copy that is a range slice within this buffer.
-    pub fn range(&self, start: usize, len: usize) -> BufferPtr<T> {
-        assert!(start + len <= self.len);
-        BufferPtr {
-            data: self.data.clone(),
-            start: self.start + start,
-            len,
-            mem_tracker: self.mem_tracker.as_ref().cloned(),
-        }
-    }
-}
-
-impl<T: Sized> Index<usize> for BufferPtr<T> {
-    type Output = T;
-
-    fn index(&self, index: usize) -> &T {
-        assert!(index < self.len);
-        &self.data[self.start + index]
-    }
-}
-
-impl<T: Debug> Display for BufferPtr<T> {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{:?}", self.data)
-    }
-}
-
-impl<T> Drop for BufferPtr<T> {
-    fn drop(&mut self) {
-        if let Some(ref mc) = self.mem_tracker {
-            if Arc::strong_count(&self.data) == 1 && Arc::weak_count(&self.data) == 0 {
-                mc.alloc(-(self.data.capacity() as i64));
-            }
-        }
-    }
-}
-
-impl AsRef<[u8]> for BufferPtr<u8> {
-    fn as_ref(&self) -> &[u8] {
-        &self.data[self.start..self.start + self.len]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_byte_buffer_mem_tracker() {
-        let mem_tracker = Arc::new(MemTracker::new());
-
-        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
-        buffer.set_data(vec![0; 10]);
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-        buffer.set_data(vec![0; 20]);
-        let capacity = buffer.capacity() as i64;
-        assert_eq!(mem_tracker.memory_usage(), capacity);
-
-        let max_capacity = {
-            let mut buffer2 = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
-            buffer2.reserve(30);
-            assert_eq!(
-                mem_tracker.memory_usage(),
-                buffer2.capacity() as i64 + capacity
-            );
-            buffer2.set_data(vec![0; 100]);
-            assert_eq!(
-                mem_tracker.memory_usage(),
-                buffer2.capacity() as i64 + capacity
-            );
-            buffer2.capacity() as i64 + capacity
-        };
-
-        assert_eq!(mem_tracker.memory_usage(), capacity);
-        assert_eq!(mem_tracker.max_memory_usage(), max_capacity);
-
-        buffer.reserve(40);
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-
-        buffer.consume();
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-    }
-
-    #[test]
-    fn test_byte_ptr_mem_tracker() {
-        let mem_tracker = Arc::new(MemTracker::new());
-
-        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
-        buffer.set_data(vec![0; 60]);
-
-        {
-            let buffer_capacity = buffer.capacity() as i64;
-            let buf_ptr = buffer.consume();
-            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-            {
-                let buf_ptr1 = buf_ptr.all();
-                {
-                    let _ = buf_ptr.start_from(20);
-                    assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-                }
-                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-                let _ = buf_ptr1.range(30, 20);
-                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-            }
-            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-        }
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-    }
-
-    #[test]
-    fn test_byte_buffer() {
-        let mut buffer = ByteBuffer::new();
-        assert_eq!(buffer.size(), 0);
-        assert_eq!(buffer.capacity(), 0);
-
-        let mut buffer2 = ByteBuffer::new();
-        buffer2.reserve(40);
-        assert_eq!(buffer2.size(), 0);
-        assert_eq!(buffer2.capacity(), 40);
-
-        buffer.set_data((0..5).collect());
-        assert_eq!(buffer.size(), 5);
-        assert_eq!(buffer[4], 4);
-
-        buffer.set_data((0..20).collect());
-        assert_eq!(buffer.size(), 20);
-        assert_eq!(buffer[10], 10);
-
-        let expected: Vec<u8> = (0..20).collect();
-        {
-            let data = buffer.data();
-            assert_eq!(data, expected.as_slice());
-        }
-
-        buffer.reserve(40);
-        assert!(buffer.capacity() >= 40);
-
-        let byte_ptr = buffer.consume();
-        assert_eq!(buffer.size(), 0);
-        assert_eq!(byte_ptr.as_ref(), expected.as_slice());
-
-        let values: Vec<u8> = (0..30).collect();
-        let _ = buffer.write(values.as_slice());
-        let _ = buffer.flush();
-
-        assert_eq!(buffer.data(), values.as_slice());
-    }
-
-    #[test]
-    fn test_byte_ptr() {
-        let values = (0..50).collect();
-        let ptr = ByteBufferPtr::new(values);
-        assert_eq!(ptr.len(), 50);
-        assert_eq!(ptr.start(), 0);
-        assert_eq!(ptr[40], 40);
-
-        let ptr2 = ptr.all();
-        assert_eq!(ptr2.len(), 50);
-        assert_eq!(ptr2.start(), 0);
-        assert_eq!(ptr2[40], 40);
-
-        let ptr3 = ptr.start_from(20);
-        assert_eq!(ptr3.len(), 30);
-        assert_eq!(ptr3.start(), 20);
-        assert_eq!(ptr3[0], 20);
-
-        let ptr4 = ptr3.range(10, 10);
-        assert_eq!(ptr4.len(), 10);
-        assert_eq!(ptr4.start(), 30);
-        assert_eq!(ptr4[0], 30);
-
-        let expected: Vec<u8> = (30..40).collect();
-        assert_eq!(ptr4.as_ref(), expected.as_slice());
-    }
-}
diff --git a/rust/parquet/src/util/mod.rs b/rust/parquet/src/util/mod.rs
deleted file mode 100644
index af9a1aa1eba..00000000000
--- a/rust/parquet/src/util/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod io;
-pub mod memory;
-#[macro_use]
-pub mod bit_util;
-mod bit_packing;
-pub mod cursor;
-pub mod hash_util;
-
-#[cfg(test)]
-pub mod test_common;
diff --git a/rust/parquet/src/util/test_common/file_util.rs b/rust/parquet/src/util/test_common/file_util.rs
deleted file mode 100644
index 7393b55f1ed..00000000000
--- a/rust/parquet/src/util/test_common/file_util.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{env, fs, io::Write, path::PathBuf, str::FromStr};
-
-/// Returns path to the test parquet file in 'data' directory
-pub fn get_test_path(file_name: &str) -> PathBuf {
-    let mut pathbuf =
-        PathBuf::from_str(&arrow::util::test_util::parquet_test_data()).unwrap();
-    pathbuf.push(file_name);
-    pathbuf
-}
-
-/// Returns file handle for a test parquet file from 'data' directory
-pub fn get_test_file(file_name: &str) -> fs::File {
-    let path = get_test_path(file_name);
-    fs::File::open(path.as_path()).unwrap_or_else(|err| {
-        panic!(
-            "Test file {} could not be opened, did you do `git submodule update`?: {}",
-            path.display(),
-            err
-        )
-    })
-}
-
-/// Returns file handle for a temp file in 'target' directory with a provided content
-pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
-    // build tmp path to a file in "target/debug/testdata"
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(file_name);
-
-    // write file content
-    let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
-    tmp_file.write_all(content).unwrap();
-    tmp_file.sync_all().unwrap();
-
-    // return file handle for both read and write
-    let file = fs::OpenOptions::new()
-        .read(true)
-        .write(true)
-        .open(path_buf.as_path());
-    assert!(file.is_ok());
-    file.unwrap()
-}
-
-pub fn get_temp_filename() -> PathBuf {
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(rand::random::<i16>().to_string());
-
-    path_buf
-}
diff --git a/rust/parquet/src/util/test_common/mod.rs b/rust/parquet/src/util/test_common/mod.rs
deleted file mode 100644
index ed65bbe8a82..00000000000
--- a/rust/parquet/src/util/test_common/mod.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod file_util;
-pub mod page_util;
-pub mod rand_gen;
-
-pub use self::rand_gen::random_bools;
-pub use self::rand_gen::random_bytes;
-pub use self::rand_gen::random_numbers;
-pub use self::rand_gen::random_numbers_range;
-pub use self::rand_gen::RandGen;
-
-pub use self::file_util::get_temp_file;
-pub use self::file_util::get_temp_filename;
-pub use self::file_util::get_test_file;
-pub use self::file_util::get_test_path;
-
-pub use self::page_util::make_pages;
diff --git a/rust/parquet/src/util/test_common/page_util.rs b/rust/parquet/src/util/test_common/page_util.rs
deleted file mode 100644
index 2e0e8e926bc..00000000000
--- a/rust/parquet/src/util/test_common/page_util.rs
+++ /dev/null
@@ -1,313 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::basic::Encoding;
-use crate::column::page::PageReader;
-use crate::column::page::{Page, PageIterator};
-use crate::data_type::DataType;
-use crate::encodings::encoding::{get_encoder, DictEncoder, Encoder};
-use crate::encodings::levels::max_buffer_size;
-use crate::encodings::levels::LevelEncoder;
-use crate::errors::Result;
-use crate::schema::types::{ColumnDescPtr, SchemaDescPtr};
-use crate::util::memory::ByteBufferPtr;
-use crate::util::memory::MemTracker;
-use crate::util::memory::MemTrackerPtr;
-use crate::util::test_common::random_numbers_range;
-use rand::distributions::uniform::SampleUniform;
-use std::collections::VecDeque;
-use std::mem;
-use std::sync::Arc;
-use std::vec::IntoIter;
-
-pub trait DataPageBuilder {
-    fn add_rep_levels(&mut self, max_level: i16, rep_levels: &[i16]);
-    fn add_def_levels(&mut self, max_level: i16, def_levels: &[i16]);
-    fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]);
-    fn add_indices(&mut self, indices: ByteBufferPtr);
-    fn consume(self) -> Page;
-}
-
-/// A utility struct for building data pages (v1 or v2). Callers must call:
-///   - add_rep_levels()
-///   - add_def_levels()
-///   - add_values() for normal data page / add_indices() for dictionary data page
-///   - consume()
-/// in order to populate and obtain a data page.
-pub struct DataPageBuilderImpl {
-    desc: ColumnDescPtr,
-    encoding: Option<Encoding>,
-    mem_tracker: MemTrackerPtr,
-    num_values: u32,
-    buffer: Vec<u8>,
-    rep_levels_byte_len: u32,
-    def_levels_byte_len: u32,
-    datapage_v2: bool,
-}
-
-impl DataPageBuilderImpl {
-    // `num_values` is the number of non-null values to put in the data page.
-    // `datapage_v2` flag is used to indicate if the generated data page should use V2
-    // format or not.
-    pub fn new(desc: ColumnDescPtr, num_values: u32, datapage_v2: bool) -> Self {
-        DataPageBuilderImpl {
-            desc,
-            encoding: None,
-            mem_tracker: Arc::new(MemTracker::new()),
-            num_values,
-            buffer: vec![],
-            rep_levels_byte_len: 0,
-            def_levels_byte_len: 0,
-            datapage_v2,
-        }
-    }
-
-    // Adds levels to the buffer and return number of encoded bytes
-    fn add_levels(&mut self, max_level: i16, levels: &[i16]) -> u32 {
-        let size = max_buffer_size(Encoding::RLE, max_level, levels.len());
-        let mut level_encoder = LevelEncoder::v1(Encoding::RLE, max_level, vec![0; size]);
-        level_encoder.put(levels).expect("put() should be OK");
-        let encoded_levels = level_encoder.consume().expect("consume() should be OK");
-        // Actual encoded bytes (without length offset)
-        let encoded_bytes = &encoded_levels[mem::size_of::<i32>()..];
-        if self.datapage_v2 {
-            // Level encoder always initializes with offset of i32, where it stores
-            // length of encoded data; for data page v2 we explicitly
-            // store length, therefore we should skip i32 bytes.
-            self.buffer.extend_from_slice(encoded_bytes);
-        } else {
-            self.buffer.extend_from_slice(encoded_levels.as_slice());
-        }
-        encoded_bytes.len() as u32
-    }
-}
-
-impl DataPageBuilder for DataPageBuilderImpl {
-    fn add_rep_levels(&mut self, max_levels: i16, rep_levels: &[i16]) {
-        self.num_values = rep_levels.len() as u32;
-        self.rep_levels_byte_len = self.add_levels(max_levels, rep_levels);
-    }
-
-    fn add_def_levels(&mut self, max_levels: i16, def_levels: &[i16]) {
-        assert!(
-            self.num_values == def_levels.len() as u32,
-            "Must call `add_rep_levels() first!`"
-        );
-
-        self.def_levels_byte_len = self.add_levels(max_levels, def_levels);
-    }
-
-    fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]) {
-        assert!(
-            self.num_values >= values.len() as u32,
-            "num_values: {}, values.len(): {}",
-            self.num_values,
-            values.len()
-        );
-        self.encoding = Some(encoding);
-        let mut encoder: Box<dyn Encoder<T>> =
-            get_encoder::<T>(self.desc.clone(), encoding, self.mem_tracker.clone())
-                .expect("get_encoder() should be OK");
-        encoder.put(values).expect("put() should be OK");
-        let encoded_values = encoder
-            .flush_buffer()
-            .expect("consume_buffer() should be OK");
-        self.buffer.extend_from_slice(encoded_values.data());
-    }
-
-    fn add_indices(&mut self, indices: ByteBufferPtr) {
-        self.encoding = Some(Encoding::RLE_DICTIONARY);
-        self.buffer.extend_from_slice(indices.data());
-    }
-
-    fn consume(self) -> Page {
-        if self.datapage_v2 {
-            Page::DataPageV2 {
-                buf: ByteBufferPtr::new(self.buffer),
-                num_values: self.num_values,
-                encoding: self.encoding.unwrap(),
-                num_nulls: 0, /* set to dummy value - don't need this when reading
-                               * data page */
-                num_rows: self.num_values, /* also don't need this when reading
-                                            * data page */
-                def_levels_byte_len: self.def_levels_byte_len,
-                rep_levels_byte_len: self.rep_levels_byte_len,
-                is_compressed: false,
-                statistics: None, // set to None, we do not need statistics for tests
-            }
-        } else {
-            Page::DataPage {
-                buf: ByteBufferPtr::new(self.buffer),
-                num_values: self.num_values,
-                encoding: self.encoding.unwrap(),
-                def_level_encoding: Encoding::RLE,
-                rep_level_encoding: Encoding::RLE,
-                statistics: None, // set to None, we do not need statistics for tests
-            }
-        }
-    }
-}
-
-/// A utility page reader which stores pages in memory.
-pub struct InMemoryPageReader {
-    pages: Box<dyn Iterator<Item = Page>>,
-}
-
-impl InMemoryPageReader {
-    pub fn new(pages: Vec<Page>) -> Self {
-        Self {
-            pages: Box::new(pages.into_iter()),
-        }
-    }
-}
-
-impl PageReader for InMemoryPageReader {
-    fn get_next_page(&mut self) -> Result<Option<Page>> {
-        Ok(self.pages.next())
-    }
-}
-
-/// A utility page iterator which stores page readers in memory, used for tests.
-pub struct InMemoryPageIterator {
-    schema: SchemaDescPtr,
-    column_desc: ColumnDescPtr,
-    page_readers: IntoIter<Box<dyn PageReader>>,
-}
-
-impl InMemoryPageIterator {
-    pub fn new(
-        schema: SchemaDescPtr,
-        column_desc: ColumnDescPtr,
-        pages: Vec<Vec<Page>>,
-    ) -> Self {
-        let page_readers = pages
-            .into_iter()
-            .map(|pages| Box::new(InMemoryPageReader::new(pages)) as Box<dyn PageReader>)
-            .collect::<Vec<Box<dyn PageReader>>>()
-            .into_iter();
-
-        Self {
-            schema,
-            column_desc,
-            page_readers,
-        }
-    }
-}
-
-impl Iterator for InMemoryPageIterator {
-    type Item = Result<Box<dyn PageReader>>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.page_readers.next().map(Ok)
-    }
-}
-
-impl PageIterator for InMemoryPageIterator {
-    fn schema(&mut self) -> Result<SchemaDescPtr> {
-        Ok(self.schema.clone())
-    }
-
-    fn column_schema(&mut self) -> Result<ColumnDescPtr> {
-        Ok(self.column_desc.clone())
-    }
-}
-
-pub fn make_pages<T: DataType>(
-    desc: ColumnDescPtr,
-    encoding: Encoding,
-    num_pages: usize,
-    levels_per_page: usize,
-    min: T::T,
-    max: T::T,
-    def_levels: &mut Vec<i16>,
-    rep_levels: &mut Vec<i16>,
-    values: &mut Vec<T::T>,
-    pages: &mut VecDeque<Page>,
-    use_v2: bool,
-) where
-    T::T: PartialOrd + SampleUniform + Copy,
-{
-    let mut num_values = 0;
-    let max_def_level = desc.max_def_level();
-    let max_rep_level = desc.max_rep_level();
-
-    let mem_tracker = Arc::new(MemTracker::new());
-    let mut dict_encoder = DictEncoder::<T>::new(desc.clone(), mem_tracker);
-
-    for i in 0..num_pages {
-        let mut num_values_cur_page = 0;
-        let level_range = i * levels_per_page..(i + 1) * levels_per_page;
-
-        if max_def_level > 0 {
-            random_numbers_range(levels_per_page, 0, max_def_level + 1, def_levels);
-            for dl in &def_levels[level_range.clone()] {
-                if *dl == max_def_level {
-                    num_values_cur_page += 1;
-                }
-            }
-        } else {
-            num_values_cur_page = levels_per_page;
-        }
-        if max_rep_level > 0 {
-            random_numbers_range(levels_per_page, 0, max_rep_level + 1, rep_levels);
-        }
-        random_numbers_range(num_values_cur_page, min, max, values);
-
-        // Generate the current page
-
-        let mut pb =
-            DataPageBuilderImpl::new(desc.clone(), num_values_cur_page as u32, use_v2);
-        if max_rep_level > 0 {
-            pb.add_rep_levels(max_rep_level, &rep_levels[level_range.clone()]);
-        }
-        if max_def_level > 0 {
-            pb.add_def_levels(max_def_level, &def_levels[level_range]);
-        }
-
-        let value_range = num_values..num_values + num_values_cur_page;
-        match encoding {
-            Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
-                let _ = dict_encoder.put(&values[value_range.clone()]);
-                let indices = dict_encoder
-                    .write_indices()
-                    .expect("write_indices() should be OK");
-                pb.add_indices(indices);
-            }
-            Encoding::PLAIN => {
-                pb.add_values::<T>(encoding, &values[value_range]);
-            }
-            enc => panic!("Unexpected encoding {}", enc),
-        }
-
-        let data_page = pb.consume();
-        pages.push_back(data_page);
-        num_values += num_values_cur_page;
-    }
-
-    if encoding == Encoding::PLAIN_DICTIONARY || encoding == Encoding::RLE_DICTIONARY {
-        let dict = dict_encoder
-            .write_dict()
-            .expect("write_dict() should be OK");
-        let dict_page = Page::DictionaryPage {
-            buf: dict,
-            num_values: dict_encoder.num_entries() as u32,
-            encoding: Encoding::RLE_DICTIONARY,
-            is_sorted: false,
-        };
-        pages.push_front(dict_page);
-    }
-}
diff --git a/rust/parquet/src/util/test_common/rand_gen.rs b/rust/parquet/src/util/test_common/rand_gen.rs
deleted file mode 100644
index ea91b28d496..00000000000
--- a/rust/parquet/src/util/test_common/rand_gen.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use rand::{
-    distributions::{uniform::SampleUniform, Distribution, Standard},
-    thread_rng, Rng,
-};
-
-use crate::data_type::*;
-use crate::util::memory::ByteBufferPtr;
-
-/// Random generator of data type `T` values and sequences.
-pub trait RandGen<T: DataType> {
-    fn gen(len: i32) -> T::T;
-
-    fn gen_vec(len: i32, total: usize) -> Vec<T::T> {
-        let mut result = vec![];
-        for _ in 0..total {
-            result.push(Self::gen(len))
-        }
-        result
-    }
-}
-
-impl RandGen<BoolType> for BoolType {
-    fn gen(_: i32) -> bool {
-        thread_rng().gen::<bool>()
-    }
-}
-
-impl RandGen<Int32Type> for Int32Type {
-    fn gen(_: i32) -> i32 {
-        thread_rng().gen::<i32>()
-    }
-}
-
-impl RandGen<Int64Type> for Int64Type {
-    fn gen(_: i32) -> i64 {
-        thread_rng().gen::<i64>()
-    }
-}
-
-impl RandGen<Int96Type> for Int96Type {
-    fn gen(_: i32) -> Int96 {
-        let mut rng = thread_rng();
-        let mut result = Int96::new();
-        result.set_data(rng.gen::<u32>(), rng.gen::<u32>(), rng.gen::<u32>());
-        result
-    }
-}
-
-impl RandGen<FloatType> for FloatType {
-    fn gen(_: i32) -> f32 {
-        thread_rng().gen::<f32>()
-    }
-}
-
-impl RandGen<DoubleType> for DoubleType {
-    fn gen(_: i32) -> f64 {
-        thread_rng().gen::<f64>()
-    }
-}
-
-impl RandGen<ByteArrayType> for ByteArrayType {
-    fn gen(_: i32) -> ByteArray {
-        let mut rng = thread_rng();
-        let mut result = ByteArray::new();
-        let mut value = vec![];
-        let len = rng.gen_range(0..128);
-        for _ in 0..len {
-            value.push(rng.gen_range(0..255));
-        }
-        result.set_data(ByteBufferPtr::new(value));
-        result
-    }
-}
-
-impl RandGen<FixedLenByteArrayType> for FixedLenByteArrayType {
-    fn gen(len: i32) -> FixedLenByteArray {
-        let mut rng = thread_rng();
-        let value_len = if len < 0 {
-            rng.gen_range(0..128)
-        } else {
-            len as usize
-        };
-        let value = random_bytes(value_len);
-        ByteArray::from(value).into()
-    }
-}
-
-pub fn random_bytes(n: usize) -> Vec<u8> {
-    let mut result = vec![];
-    let mut rng = thread_rng();
-    for _ in 0..n {
-        result.push(rng.gen_range(0..255));
-    }
-    result
-}
-
-pub fn random_bools(n: usize) -> Vec<bool> {
-    let mut result = vec![];
-    let mut rng = thread_rng();
-    for _ in 0..n {
-        result.push(rng.gen::<bool>());
-    }
-    result
-}
-
-pub fn random_numbers<T>(n: usize) -> Vec<T>
-where
-    Standard: Distribution<T>,
-{
-    let mut rng = thread_rng();
-    Standard.sample_iter(&mut rng).take(n).collect()
-}
-
-pub fn random_numbers_range<T>(n: usize, low: T, high: T, result: &mut Vec<T>)
-where
-    T: PartialOrd + SampleUniform + Copy,
-{
-    let mut rng = thread_rng();
-    for _ in 0..n {
-        result.push(rng.gen_range(low..high));
-    }
-}
diff --git a/rust/parquet/tests/custom_writer.rs b/rust/parquet/tests/custom_writer.rs
deleted file mode 100644
index 0a57e79d955..00000000000
--- a/rust/parquet/tests/custom_writer.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fs::File;
-use std::{
-    fs,
-    io::{prelude::*, SeekFrom},
-    sync::Arc,
-};
-
-use parquet::file::writer::TryClone;
-use parquet::{
-    basic::Repetition, basic::Type, file::properties::WriterProperties,
-    file::writer::SerializedFileWriter, schema::types,
-};
-use std::env;
-
-// Test creating some sort of custom writer to ensure the
-// appropriate traits are exposed
-struct CustomWriter {
-    file: File,
-}
-
-impl Write for CustomWriter {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.file.write(buf)
-    }
-    fn flush(&mut self) -> std::io::Result<()> {
-        self.file.flush()
-    }
-}
-
-impl Seek for CustomWriter {
-    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
-        self.file.seek(pos)
-    }
-}
-
-impl TryClone for CustomWriter {
-    fn try_clone(&self) -> std::io::Result<Self> {
-        use std::io::{Error, ErrorKind};
-        Err(Error::new(ErrorKind::Other, "Clone not supported"))
-    }
-}
-
-#[test]
-fn test_custom_writer() {
-    let schema = Arc::new(
-        types::Type::group_type_builder("schema")
-            .with_fields(&mut vec![Arc::new(
-                types::Type::primitive_type_builder("col1", Type::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            )])
-            .build()
-            .unwrap(),
-    );
-    let props = Arc::new(WriterProperties::builder().build());
-
-    let file = get_temp_file("test_custom_file_writer");
-    let test_file = file.try_clone().unwrap();
-
-    let writer = CustomWriter { file };
-
-    // test is that this file can be created
-    let file_writer = SerializedFileWriter::new(writer, schema, props).unwrap();
-    std::mem::drop(file_writer);
-
-    // ensure the file now exists and has non zero size
-    let metadata = test_file.metadata().unwrap();
-    assert!(metadata.len() > 0);
-}
-
-/// Returns file handle for a temp file in 'target' directory with a provided content
-fn get_temp_file(file_name: &str) -> fs::File {
-    // build tmp path to a file in "target/debug/testdata"
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(file_name);
-
-    File::create(path_buf).unwrap()
-}
diff --git a/rust/parquet_derive/Cargo.toml b/rust/parquet_derive/Cargo.toml
deleted file mode 100644
index 6bed07d7a6b..00000000000
--- a/rust/parquet_derive/Cargo.toml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "parquet_derive"
-version = "4.0.0-SNAPSHOT"
-license = "Apache-2.0"
-description = "Derive macros for the Rust implementation of Apache Parquet"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-keywords = [ "parquet" ]
-readme = "README.md"
-edition = "2018"
-
-[lib]
-proc-macro = true
-
-[features]
-chrono = []
-bigdecimal = []
-uuid = []
-
-[dependencies]
-proc-macro2 = "1.0"
-quote = "1.0"
-syn = { version = "1.0", features = ["full", "extra-traits"] }
-parquet = { path = "../parquet", version = "4.0.0-SNAPSHOT" }
diff --git a/rust/parquet_derive/README.md b/rust/parquet_derive/README.md
deleted file mode 100644
index 47ac8ae056d..00000000000
--- a/rust/parquet_derive/README.md
+++ /dev/null
@@ -1,98 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Parquet Derive
-
-A crate for deriving `RecordWriter` for arbitrary, _simple_ structs. This does not generate writers for arbitrarily nested
-structures. It only works for primitives and a few generic structures and
-various levels of reference. Please see features checklist for what is currently
-supported.
-
-Derive also has some support for the chrono time library. You must must enable the `chrono` feature to get this support.
-
-## Usage
-Add this to your Cargo.toml:
-```toml
-[dependencies]
-parquet = "4.0.0-SNAPSHOT"
-parquet_derive = "4.0.0-SNAPSHOT"
-```
-
-and this to your crate root:
-```rust
-extern crate parquet;
-#[macro_use] extern crate parquet_derive;
-```
-
-Example usage of deriving a `RecordWriter` for your struct:
-
-```rust
-use parquet;
-use parquet::record::RecordWriter;
-
-#[derive(ParquetRecordWriter)]
-struct ACompleteRecord<'a> {
-    pub a_bool: bool,
-    pub a_str: &'a str,
-    pub a_string: String,
-    pub a_borrowed_string: &'a String,
-    pub maybe_a_str: Option<&'a str>,
-    pub magic_number: i32,
-    pub low_quality_pi: f32,
-    pub high_quality_pi: f64,
-    pub maybe_pi: Option<f32>,
-    pub maybe_best_pi: Option<f64>,
-}
-
-// Initialize your parquet file
-let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-let mut row_group = writer.next_row_group().unwrap();
-
-// Build up your records
-let chunks = vec![ACompleteRecord{...}];
-
-// The derived `RecordWriter` takes over here
-(&chunks[..]).write_to_row_group(&mut row_group);
-
-writer.close_row_group(row_group).unwrap();
-writer.close().unwrap();
-```
-
-## Features
-- [X] Support writing `String`, `&str`, `bool`, `i32`, `f32`, `f64`, `Vec<u8>`
-- [ ] Support writing dictionaries
-- [X] Support writing logical types like timestamp
-- [X] Derive definition_levels for `Option`
-- [ ] Derive definition levels for nested structures
-- [ ] Derive writing tuple struct
-- [ ] Derive writing `tuple` container types
-
-## Requirements
-- Same as `parquet-rs`
-
-## Test
-Testing a `*_derive` crate requires an intermediate crate. Go to `parquet_derive_test` and run `cargo test` for
-unit tests.
-
-## Docs
-To build documentation, run `cargo doc --no-deps`.
-To compile and view in the browser, run `cargo doc --no-deps --open`.
-
-## License
-Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
diff --git a/rust/parquet_derive/src/lib.rs b/rust/parquet_derive/src/lib.rs
deleted file mode 100644
index 279d0f77f3e..00000000000
--- a/rust/parquet_derive/src/lib.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![recursion_limit = "128"]
-
-extern crate proc_macro;
-extern crate proc_macro2;
-extern crate syn;
-#[macro_use]
-extern crate quote;
-
-extern crate parquet;
-
-use syn::{parse_macro_input, Data, DataStruct, DeriveInput};
-
-mod parquet_field;
-
-/// Derive flat, simple RecordWriter implementations. Works by parsing
-/// a struct tagged with `#[derive(ParquetRecordWriter)]` and emitting
-/// the correct writing code for each field of the struct. Column writers
-/// are generated in the order they are defined.
-///
-/// It is up to the programmer to keep the order of the struct
-/// fields lined up with the schema.
-///
-/// Example:
-///
-/// ```ignore
-/// use parquet;
-/// use parquet::record::RecordWriter;
-/// use parquet::schema::parser::parse_message_type;
-///
-/// use std::sync::Arc;
-//
-/// #[derive(ParquetRecordWriter)]
-/// struct ACompleteRecord<'a> {
-///   pub a_bool: bool,
-///   pub a_str: &'a str,
-/// }
-///
-/// let schema_str = "message schema {
-///   REQUIRED boolean         a_bool;
-///   REQUIRED BINARY          a_str (UTF8);
-/// }";
-///
-/// pub fn write_some_records() {
-///   let samples = vec![
-///     ACompleteRecord {
-///       a_bool: true,
-///       a_str: "I'm true"
-///     },
-///     ACompleteRecord {
-///       a_bool: false,
-///       a_str: "I'm false"
-///     }
-///   ];
-///
-///  let schema = Arc::new(parse_message_type(schema_str).unwrap());
-///
-///  let props = Arc::new(WriterProperties::builder().build());
-///  let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-///
-///  let mut row_group = writer.next_row_group().unwrap();
-///  samples.as_slice().write_to_row_group(&mut row_group).unwrap();
-///  writer.close_row_group(row_group).unwrap();
-///  writer.close().unwrap();
-/// }
-/// ```
-///
-#[proc_macro_derive(ParquetRecordWriter)]
-pub fn parquet_record_writer(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
-    let input: DeriveInput = parse_macro_input!(input as DeriveInput);
-    let fields = match input.data {
-        Data::Struct(DataStruct { fields, .. }) => fields,
-        Data::Enum(_) => unimplemented!("Enum currently is not supported"),
-        Data::Union(_) => unimplemented!("Union currently is not supported"),
-    };
-
-    let field_infos: Vec<_> = fields
-        .iter()
-        .map(|f: &syn::Field| parquet_field::Field::from(f))
-        .collect();
-
-    let writer_snippets: Vec<proc_macro2::TokenStream> =
-        field_infos.iter().map(|x| x.writer_snippet()).collect();
-
-    let derived_for = input.ident;
-    let generics = input.generics;
-
-    (quote! {
-    impl#generics RecordWriter<#derived_for#generics> for &[#derived_for#generics] {
-      fn write_to_row_group(&self, row_group_writer: &mut Box<parquet::file::writer::RowGroupWriter>) -> Result<(), parquet::errors::ParquetError> {
-        let mut row_group_writer = row_group_writer;
-        let records = &self; // Used by all the writer snippets to be more clear
-
-        #(
-          {
-              let mut some_column_writer = row_group_writer.next_column().unwrap();
-              if let Some(mut column_writer) = some_column_writer {
-                  #writer_snippets
-                  row_group_writer.close_column(column_writer)?;
-              } else {
-                  return Err(parquet::errors::ParquetError::General("Failed to get next column".into()))
-              }
-          }
-        );*
-
-        Ok(())
-      }
-    }
-  }).into()
-}
diff --git a/rust/parquet_derive/src/parquet_field.rs b/rust/parquet_derive/src/parquet_field.rs
deleted file mode 100644
index 328f4a6680f..00000000000
--- a/rust/parquet_derive/src/parquet_field.rs
+++ /dev/null
@@ -1,920 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[derive(Debug, PartialEq)]
-pub struct Field {
-    ident: syn::Ident,
-    ty: Type,
-    is_a_byte_buf: bool,
-    third_party_type: Option<ThirdPartyType>,
-}
-
-/// Use third party libraries, detected
-/// at compile time. These libraries will
-/// be written to parquet as their preferred
-/// physical type.
-///
-///   ChronoNaiveDateTime is written as i64
-///   ChronoNaiveDate is written as i32
-#[derive(Debug, PartialEq)]
-enum ThirdPartyType {
-    ChronoNaiveDateTime,
-    ChronoNaiveDate,
-    Uuid,
-}
-
-impl Field {
-    pub fn from(f: &syn::Field) -> Self {
-        let ty = Type::from(f);
-        let is_a_byte_buf = ty.physical_type() == parquet::basic::Type::BYTE_ARRAY;
-
-        let third_party_type = match &ty.last_part()[..] {
-            "NaiveDateTime" => Some(ThirdPartyType::ChronoNaiveDateTime),
-            "NaiveDate" => Some(ThirdPartyType::ChronoNaiveDate),
-            "Uuid" => Some(ThirdPartyType::Uuid),
-            _ => None,
-        };
-
-        Field {
-            ident: f
-                .ident
-                .clone()
-                .expect("Only structs with named fields are currently supported"),
-            ty,
-            is_a_byte_buf,
-            third_party_type,
-        }
-    }
-
-    /// Takes the parsed field of the struct and emits a valid
-    /// column writer snippet. Should match exactly what you
-    /// would write by hand.
-    ///
-    /// Can only generate writers for basic structs, for example:
-    ///
-    /// struct Record {
-    ///   a_bool: bool,
-    ///   maybe_a_bool: Option<bool>
-    /// }
-    ///
-    /// but not
-    ///
-    /// struct UnsupportedNestedRecord {
-    ///   a_property: bool,
-    ///   nested_record: Record
-    /// }
-    ///
-    /// because this parsing logic is not sophisticated enough for definition
-    /// levels beyond 2.
-    pub fn writer_snippet(&self) -> proc_macro2::TokenStream {
-        let ident = &self.ident;
-        let column_writer = self.ty.column_writer();
-
-        let vals_builder = match &self.ty {
-            Type::TypePath(_) => self.copied_direct_vals(),
-            Type::Option(ref first_type) => match **first_type {
-                Type::TypePath(_) => self.option_into_vals(),
-                Type::Reference(_, ref second_type) => match **second_type {
-                    Type::TypePath(_) => self.option_into_vals(),
-                    _ => unimplemented!("Unsupported type encountered"),
-                },
-                ref f => unimplemented!("Unsupported: {:#?}", f),
-            },
-            Type::Reference(_, ref first_type) => match **first_type {
-                Type::TypePath(_) => self.copied_direct_vals(),
-                Type::Option(ref second_type) => match **second_type {
-                    Type::TypePath(_) => self.option_into_vals(),
-                    Type::Reference(_, ref second_type) => match **second_type {
-                        Type::TypePath(_) => self.option_into_vals(),
-                        _ => unimplemented!("Unsupported type encountered"),
-                    },
-                    ref f => unimplemented!("Unsupported: {:#?}", f),
-                },
-                ref f => unimplemented!("Unsupported: {:#?}", f),
-            },
-            f => unimplemented!("Unsupported: {:#?}", f),
-        };
-
-        let definition_levels = match &self.ty {
-            Type::TypePath(_) => None,
-            Type::Option(ref first_type) => match **first_type {
-                Type::TypePath(_) => Some(self.optional_definition_levels()),
-                Type::Option(_) => unimplemented!("Unsupported nesting encountered"),
-                Type::Reference(_, ref second_type)
-                | Type::Vec(ref second_type)
-                | Type::Array(ref second_type) => match **second_type {
-                    Type::TypePath(_) => Some(self.optional_definition_levels()),
-                    _ => unimplemented!("Unsupported nesting encountered"),
-                },
-            },
-            Type::Reference(_, ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type) => match **first_type {
-                Type::TypePath(_) => None,
-                Type::Reference(_, ref second_type)
-                | Type::Vec(ref second_type)
-                | Type::Array(ref second_type)
-                | Type::Option(ref second_type) => match **second_type {
-                    Type::TypePath(_) => Some(self.optional_definition_levels()),
-                    Type::Reference(_, ref third_type) => match **third_type {
-                        Type::TypePath(_) => Some(self.optional_definition_levels()),
-                        _ => unimplemented!("Unsupported definition encountered"),
-                    },
-                    _ => unimplemented!("Unsupported definition encountered"),
-                },
-            },
-        };
-
-        // "vals" is the run of primitive data being written for the column
-        // "definition_levels" is a vector of bools which controls whether a value is missing or present
-        // this TokenStream is only one part of the code for writing a column and
-        // it relies on values calculated in prior code snippets, namely "definition_levels" and "vals_builder".
-        // All the context is put together in this functions final quote and
-        // this expression just switches between non-nullable and nullable write statements
-        let write_batch_expr = if definition_levels.is_some() {
-            quote! {
-                if let #column_writer(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None)?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}", stringify!{#ident})
-                }
-            }
-        } else {
-            quote! {
-                if let #column_writer(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None)?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}", stringify!{#ident})
-                }
-            }
-        };
-
-        quote! {
-            {
-                #definition_levels
-
-                #vals_builder
-
-                #write_batch_expr
-            }
-        }
-    }
-
-    fn option_into_vals(&self) -> proc_macro2::TokenStream {
-        let field_name = &self.ident;
-        let is_a_byte_buf = self.is_a_byte_buf;
-        let is_a_timestamp =
-            self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime);
-        let is_a_date = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDate);
-        let is_a_uuid = self.third_party_type == Some(ThirdPartyType::Uuid);
-        let copy_to_vec = !matches!(
-            self.ty.physical_type(),
-            parquet::basic::Type::BYTE_ARRAY | parquet::basic::Type::FIXED_LEN_BYTE_ARRAY
-        );
-
-        let binding = if copy_to_vec {
-            quote! { let Some(inner) = rec.#field_name }
-        } else {
-            quote! { let Some(ref inner) = rec.#field_name }
-        };
-
-        let some = if is_a_timestamp {
-            quote! { Some(inner.timestamp_millis()) }
-        } else if is_a_date {
-            quote! { Some(inner.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32)  }
-        } else if is_a_uuid {
-            quote! { Some((&inner.to_string()[..]).into()) }
-        } else if is_a_byte_buf {
-            quote! { Some((&inner[..]).into())}
-        } else {
-            quote! { Some(inner) }
-        };
-
-        quote! {
-            let vals: Vec<_> = records.iter().filter_map(|rec| {
-                if #binding {
-                    #some
-                } else {
-                    None
-                }
-            }).collect();
-        }
-    }
-
-    fn copied_direct_vals(&self) -> proc_macro2::TokenStream {
-        let field_name = &self.ident;
-        let is_a_byte_buf = self.is_a_byte_buf;
-        let is_a_timestamp =
-            self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime);
-        let is_a_date = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDate);
-        let is_a_uuid = self.third_party_type == Some(ThirdPartyType::Uuid);
-
-        let access = if is_a_timestamp {
-            quote! { rec.#field_name.timestamp_millis() }
-        } else if is_a_date {
-            quote! { rec.#field_name.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32 }
-        } else if is_a_uuid {
-            quote! { (&rec.#field_name.to_string()[..]).into() }
-        } else if is_a_byte_buf {
-            quote! { (&rec.#field_name[..]).into() }
-        } else {
-            quote! { rec.#field_name }
-        };
-
-        quote! {
-            let vals: Vec<_> = records.iter().map(|rec| #access).collect();
-        }
-    }
-
-    fn optional_definition_levels(&self) -> proc_macro2::TokenStream {
-        let field_name = &self.ident;
-
-        quote! {
-            let definition_levels: Vec<i16> = self
-              .iter()
-              .map(|rec| if rec.#field_name.is_some() { 1 } else { 0 })
-              .collect();
-        }
-    }
-}
-
-#[allow(clippy::enum_variant_names)]
-#[allow(clippy::large_enum_variant)]
-#[derive(Debug, PartialEq)]
-enum Type {
-    Array(Box<Type>),
-    Option(Box<Type>),
-    Vec(Box<Type>),
-    TypePath(syn::Type),
-    Reference(Option<syn::Lifetime>, Box<Type>),
-}
-
-impl Type {
-    /// Takes a rust type and returns the appropriate
-    /// parquet-rs column writer
-    fn column_writer(&self) -> syn::TypePath {
-        use parquet::basic::Type as BasicType;
-
-        match self.physical_type() {
-            BasicType::BOOLEAN => {
-                syn::parse_quote!(parquet::column::writer::ColumnWriter::BoolColumnWriter)
-            }
-            BasicType::INT32 => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::Int32ColumnWriter
-            ),
-            BasicType::INT64 => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::Int64ColumnWriter
-            ),
-            BasicType::INT96 => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::Int96ColumnWriter
-            ),
-            BasicType::FLOAT => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::FloatColumnWriter
-            ),
-            BasicType::DOUBLE => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::DoubleColumnWriter
-            ),
-            BasicType::BYTE_ARRAY => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::ByteArrayColumnWriter
-            ),
-            BasicType::FIXED_LEN_BYTE_ARRAY => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::FixedLenByteArrayColumnWriter
-            ),
-        }
-    }
-
-    /// Helper to simplify a nested field definition to its leaf type
-    ///
-    /// Ex:
-    ///   Option<&String> => Type::TypePath(String)
-    ///   &Option<i32> => Type::TypePath(i32)
-    ///   Vec<Vec<u8>> => Type::Vec(u8)
-    ///
-    /// Useful in determining the physical type of a field and the
-    /// definition levels.
-    fn leaf_type_recursive(&self) -> &Type {
-        self.leaf_type_recursive_helper(self, None)
-    }
-
-    fn leaf_type_recursive_helper<'a>(
-        &'a self,
-        ty: &'a Type,
-        parent_ty: Option<&'a Type>,
-    ) -> &Type {
-        match ty {
-            Type::TypePath(_) => parent_ty.unwrap_or(ty),
-            Type::Option(ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type)
-            | Type::Reference(_, ref first_type) => {
-                self.leaf_type_recursive_helper(first_type, Some(ty))
-            }
-        }
-    }
-
-    /// Helper method to further unwrap leaf_type() to get inner-most
-    /// type information, useful for determining the physical type
-    /// and normalizing the type paths.
-    fn inner_type(&self) -> &syn::Type {
-        let leaf_type = self.leaf_type_recursive();
-
-        match leaf_type {
-            Type::TypePath(ref type_) => type_,
-            Type::Option(ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type)
-            | Type::Reference(_, ref first_type) => match **first_type {
-                Type::TypePath(ref type_) => type_,
-                _ => unimplemented!("leaf_type() should only return shallow types"),
-            },
-        }
-    }
-
-    /// Helper to normalize a type path by extracting the
-    /// most identifiable part
-    ///
-    /// Ex:
-    ///   std::string::String => String
-    ///   Vec<u8> => Vec<u8>
-    ///   chrono::NaiveDateTime => NaiveDateTime
-    ///
-    /// Does run the risk of mis-identifying a type if import
-    /// rename is in play. Please note procedural macros always
-    /// run before type resolution so this is a risk the user
-    /// takes on when renaming imports.
-    fn last_part(&self) -> String {
-        let inner_type = self.inner_type();
-        let inner_type_str = (quote! { #inner_type }).to_string();
-
-        inner_type_str
-            .split("::")
-            .last()
-            .unwrap()
-            .trim()
-            .to_string()
-    }
-
-    /// Converts rust types to parquet physical types.
-    ///
-    /// Ex:
-    ///   [u8; 10] => FIXED_LEN_BYTE_ARRAY
-    ///   Vec<u8>  => BYTE_ARRAY
-    ///   String => BYTE_ARRAY
-    ///   i32 => INT32
-    fn physical_type(&self) -> parquet::basic::Type {
-        use parquet::basic::Type as BasicType;
-
-        let last_part = self.last_part();
-        let leaf_type = self.leaf_type_recursive();
-
-        match leaf_type {
-            Type::Array(ref first_type) => {
-                if let Type::TypePath(_) = **first_type {
-                    if last_part == "u8" {
-                        return BasicType::FIXED_LEN_BYTE_ARRAY;
-                    }
-                }
-            }
-            Type::Vec(ref first_type) => {
-                if let Type::TypePath(_) = **first_type {
-                    if last_part == "u8" {
-                        return BasicType::BYTE_ARRAY;
-                    }
-                }
-            }
-            _ => (),
-        }
-
-        match last_part.trim() {
-            "bool" => BasicType::BOOLEAN,
-            "u8" | "u16" | "u32" => BasicType::INT32,
-            "i8" | "i16" | "i32" | "NaiveDate" => BasicType::INT32,
-            "u64" | "i64" | "usize" | "NaiveDateTime" => BasicType::INT64,
-            "f32" => BasicType::FLOAT,
-            "f64" => BasicType::DOUBLE,
-            "String" | "str" | "Uuid" => BasicType::BYTE_ARRAY,
-            f => unimplemented!("{} currently is not supported", f),
-        }
-    }
-
-    /// Convert a parsed rust field AST in to a more easy to manipulate
-    /// parquet_derive::Field
-    fn from(f: &syn::Field) -> Self {
-        Type::from_type(f, &f.ty)
-    }
-
-    fn from_type(f: &syn::Field, ty: &syn::Type) -> Self {
-        match ty {
-            syn::Type::Path(ref p) => Type::from_type_path(f, p),
-            syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr),
-            syn::Type::Array(ref ta) => Type::from_type_array(f, ta),
-            other => unimplemented!(
-                "Unable to derive {:?} - it is currently an unsupported type\n{:#?}",
-                f.ident.as_ref().unwrap(),
-                other
-            ),
-        }
-    }
-
-    fn from_type_path(f: &syn::Field, p: &syn::TypePath) -> Self {
-        let last_segment = p.path.segments.last().unwrap();
-
-        let is_vec =
-            last_segment.ident == syn::Ident::new("Vec", proc_macro2::Span::call_site());
-        let is_option = last_segment.ident
-            == syn::Ident::new("Option", proc_macro2::Span::call_site());
-
-        if is_vec || is_option {
-            let generic_type = match &last_segment.arguments {
-                syn::PathArguments::AngleBracketed(angle_args) => {
-                    assert_eq!(angle_args.args.len(), 1);
-                    let first_arg = &angle_args.args[0];
-
-                    match first_arg {
-                        syn::GenericArgument::Type(ref typath) => typath.clone(),
-                        other => unimplemented!("Unsupported: {:#?}", other),
-                    }
-                }
-                other => unimplemented!("Unsupported: {:#?}", other),
-            };
-
-            if is_vec {
-                Type::Vec(Box::new(Type::from_type(f, &generic_type)))
-            } else {
-                Type::Option(Box::new(Type::from_type(f, &generic_type)))
-            }
-        } else {
-            Type::TypePath(syn::Type::Path(p.clone()))
-        }
-    }
-
-    fn from_type_reference(f: &syn::Field, tr: &syn::TypeReference) -> Self {
-        let lifetime = tr.lifetime.clone();
-        let inner_type = Type::from_type(f, tr.elem.as_ref());
-        Type::Reference(lifetime, Box::new(inner_type))
-    }
-
-    fn from_type_array(f: &syn::Field, ta: &syn::TypeArray) -> Self {
-        let inner_type = Type::from_type(f, ta.elem.as_ref());
-        Type::Array(Box::new(inner_type))
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use syn::{self, Data, DataStruct, DeriveInput};
-
-    fn extract_fields(input: proc_macro2::TokenStream) -> Vec<syn::Field> {
-        let input: DeriveInput = syn::parse2(input).unwrap();
-
-        let fields = match input.data {
-            Data::Struct(DataStruct { fields, .. }) => fields,
-            _ => panic!("Input must be a struct"),
-        };
-
-        fields.iter().map(|field| field.to_owned()).collect()
-    }
-
-    #[test]
-    fn test_generating_a_simple_writer_snippet() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ABoringStruct {
-            counter: usize,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let counter = Field::from(&fields[0]);
-
-        let snippet = counter.writer_snippet().to_string();
-        assert_eq!(snippet,
-                   (quote!{
-                        {
-                            let vals : Vec < _ > = records . iter ( ) . map ( | rec | rec . counter ) . collect ( );
-
-                            if let parquet::column::writer::ColumnWriter::Int64ColumnWriter ( ref mut typed ) = column_writer {
-                                typed . write_batch ( & vals [ .. ] , None , None ) ?;
-                            }  else {
-                                panic!("Schema and struct disagree on type for {}" , stringify!{ counter } )
-                            }
-                        }
-                   }).to_string()
-        )
-    }
-
-    #[test]
-    fn test_optional_to_writer_snippet() {
-        let struct_def: proc_macro2::TokenStream = quote! {
-          struct StringBorrower<'a> {
-            optional_str: Option<&'a str>,
-            optional_string: &Option<String>,
-            optional_dumb_int: &Option<&i32>,
-          }
-        };
-
-        let fields = extract_fields(struct_def);
-
-        let optional = Field::from(&fields[0]);
-        let snippet = optional.writer_snippet();
-        assert_eq!(snippet.to_string(),
-          (quote! {
-          {
-                let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_str . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
-
-                let vals: Vec <_> = records.iter().filter_map( |rec| {
-                    if let Some ( ref inner ) = rec . optional_str {
-                        Some ( (&inner[..]).into() )
-                    } else {
-                        None
-                    }
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter ( ref mut typed ) = column_writer {
-                    typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify ! { optional_str } )
-                }
-           }
-            }
-          ).to_string());
-
-        let optional = Field::from(&fields[1]);
-        let snippet = optional.writer_snippet();
-        assert_eq!(snippet.to_string(),
-                   (quote!{
-                   {
-                        let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_string . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
-
-                        let vals: Vec <_> = records.iter().filter_map( |rec| {
-                            if let Some ( ref inner ) = rec . optional_string {
-                                Some ( (&inner[..]).into() )
-                            } else {
-                                None
-                            }
-                        }).collect();
-
-                        if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter ( ref mut typed ) = column_writer {
-                            typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
-                        } else {
-                            panic!("Schema and struct disagree on type for {}" , stringify ! { optional_string } )
-                        }
-                    }
-        }).to_string());
-
-        let optional = Field::from(&fields[2]);
-        let snippet = optional.writer_snippet();
-        assert_eq!(snippet.to_string(),
-                   (quote!{
-                    {
-                        let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_dumb_int . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
-
-                        let vals: Vec <_> = records.iter().filter_map( |rec| {
-                            if let Some ( inner ) = rec . optional_dumb_int {
-                                Some ( inner )
-                            } else {
-                                None
-                            }
-                        }).collect();
-
-                        if let parquet::column::writer::ColumnWriter::Int32ColumnWriter ( ref mut typed ) = column_writer {
-                            typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
-                        }  else {
-                            panic!("Schema and struct disagree on type for {}" , stringify ! { optional_dumb_int } )
-                        }
-                    }
-        }).to_string());
-    }
-
-    #[test]
-    fn test_converting_to_column_writer_type() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ABasicStruct {
-            yes_no: bool,
-            name: String,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let processed: Vec<_> = fields.iter().map(|field| Field::from(field)).collect();
-
-        let column_writers: Vec<_> = processed
-            .iter()
-            .map(|field| field.ty.column_writer())
-            .collect();
-
-        assert_eq!(
-            column_writers,
-            vec![
-                syn::parse_quote!(
-                    parquet::column::writer::ColumnWriter::BoolColumnWriter
-                ),
-                syn::parse_quote!(
-                    parquet::column::writer::ColumnWriter::ByteArrayColumnWriter
-                )
-            ]
-        );
-    }
-
-    #[test]
-    fn convert_basic_struct() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ABasicStruct {
-            yes_no: bool,
-            name: String,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let processed: Vec<_> = fields.iter().map(|field| Field::from(field)).collect();
-        assert_eq!(processed.len(), 2);
-
-        assert_eq!(
-            processed,
-            vec![
-                Field {
-                    ident: syn::Ident::new("yes_no", proc_macro2::Span::call_site()),
-                    ty: Type::TypePath(syn::parse_quote!(bool)),
-                    is_a_byte_buf: false,
-                    third_party_type: None,
-                },
-                Field {
-                    ident: syn::Ident::new("name", proc_macro2::Span::call_site()),
-                    ty: Type::TypePath(syn::parse_quote!(String)),
-                    is_a_byte_buf: true,
-                    third_party_type: None,
-                }
-            ]
-        )
-    }
-
-    #[test]
-    fn test_get_inner_type() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct LotsOfInnerTypes {
-            a_vec: Vec<u8>,
-            a_option: std::option::Option<bool>,
-            a_silly_string: std::string::String,
-            a_complicated_thing: std::option::Option<std::result::Result<(),()>>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let converted_fields: Vec<_> =
-            fields.iter().map(|field| Type::from(field)).collect();
-        let inner_types: Vec<_> = converted_fields
-            .iter()
-            .map(|field| field.inner_type())
-            .collect();
-        let inner_types_strs: Vec<_> = inner_types
-            .iter()
-            .map(|ty| (quote! { #ty }).to_string())
-            .collect();
-
-        assert_eq!(
-            inner_types_strs,
-            vec![
-                "u8",
-                "bool",
-                "std :: string :: String",
-                "std :: result :: Result < () , () >"
-            ]
-        )
-    }
-
-    #[test]
-    fn test_physical_type() {
-        use parquet::basic::Type as BasicType;
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct LotsOfInnerTypes {
-            a_buf: Vec<u8>,
-            a_number: i32,
-            a_verbose_option: std::option::Option<bool>,
-            a_silly_string: std::string::String,
-            a_fix_byte_buf: [u8; 10],
-            a_complex_option: Option<&Vec<u8>>,
-            a_complex_vec: &Vec<&Option<u8>>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let converted_fields: Vec<_> =
-            fields.iter().map(|field| Type::from(field)).collect();
-        let physical_types: Vec<_> = converted_fields
-            .iter()
-            .map(|ty| ty.physical_type())
-            .collect();
-
-        assert_eq!(
-            physical_types,
-            vec![
-                BasicType::BYTE_ARRAY,
-                BasicType::INT32,
-                BasicType::BOOLEAN,
-                BasicType::BYTE_ARRAY,
-                BasicType::FIXED_LEN_BYTE_ARRAY,
-                BasicType::BYTE_ARRAY,
-                BasicType::INT32
-            ]
-        )
-    }
-
-    #[test]
-    fn test_convert_comprehensive_owned_struct() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct VecHolder {
-            a_vec: Vec<u8>,
-            a_option: std::option::Option<bool>,
-            a_silly_string: std::string::String,
-            a_complicated_thing: std::option::Option<std::result::Result<(),()>>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let converted_fields: Vec<_> =
-            fields.iter().map(|field| Type::from(field)).collect();
-
-        assert_eq!(
-            converted_fields,
-            vec![
-                Type::Vec(Box::new(Type::TypePath(syn::parse_quote!(u8)))),
-                Type::Option(Box::new(Type::TypePath(syn::parse_quote!(bool)))),
-                Type::TypePath(syn::parse_quote!(std::string::String)),
-                Type::Option(Box::new(Type::TypePath(
-                    syn::parse_quote!(std::result::Result<(),()>)
-                ))),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_convert_borrowed_struct() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct Borrower<'a> {
-            a_str: &'a str,
-            a_borrowed_option: &'a Option<bool>,
-            so_many_borrows: &'a Option<&'a str>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let types: Vec<_> = fields.iter().map(|field| Type::from(field)).collect();
-
-        assert_eq!(
-            types,
-            vec![
-                Type::Reference(
-                    Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                    Box::new(Type::TypePath(syn::parse_quote!(str)))
-                ),
-                Type::Reference(
-                    Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                    Box::new(Type::Option(Box::new(Type::TypePath(syn::parse_quote!(
-                        bool
-                    )))))
-                ),
-                Type::Reference(
-                    Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                    Box::new(Type::Option(Box::new(Type::Reference(
-                        Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                        Box::new(Type::TypePath(syn::parse_quote!(str)))
-                    ))))
-                ),
-            ]
-        );
-    }
-
-    #[test]
-    #[cfg(feature = "chrono")]
-    fn test_chrono_timestamp_millis() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ATimestampStruct {
-            henceforth: chrono::NaiveDateTime,
-            maybe_happened: Option<&chrono::NaiveDateTime>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let when = Field::from(&fields[0]);
-        assert_eq!(when.writer_snippet().to_string(),(quote!{
-            {
-                let vals : Vec<_> = records.iter().map(|rec| rec.henceforth.timestamp_millis() ).collect();
-                if let parquet::column::writer::ColumnWriter::Int64ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ henceforth })
-                }
-            }
-        }).to_string());
-
-        let maybe_happened = Field::from(&fields[1]);
-        assert_eq!(maybe_happened.writer_snippet().to_string(),(quote!{
-            {
-                let definition_levels : Vec<i16> = self.iter().map(|rec| if rec.maybe_happened.is_some() { 1 } else { 0 }).collect();
-                let vals : Vec<_> = records.iter().filter_map(|rec| {
-                    rec.maybe_happened.map(|inner| {  inner.timestamp_millis()  })
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::Int64ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_happened })
-                }
-            }
-        }).to_string());
-    }
-
-    #[test]
-    #[cfg(feature = "chrono")]
-    fn test_chrono_date() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ATimestampStruct {
-            henceforth: chrono::NaiveDate,
-            maybe_happened: Option<&chrono::NaiveDate>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let when = Field::from(&fields[0]);
-        assert_eq!(when.writer_snippet().to_string(),(quote!{
-            {
-                let vals : Vec<_> = records.iter().map(|rec| rec.henceforth.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32).collect();
-                if let parquet::column::writer::ColumnWriter::Int32ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ henceforth })
-                }
-            }
-        }).to_string());
-
-        let maybe_happened = Field::from(&fields[1]);
-        assert_eq!(maybe_happened.writer_snippet().to_string(),(quote!{
-            {
-                let definition_levels : Vec<i16> = self.iter().map(|rec| if rec.maybe_happened.is_some() { 1 } else { 0 }).collect();
-                let vals : Vec<_> = records.iter().filter_map(|rec| {
-                    rec.maybe_happened.map(|inner| {  inner.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32  })
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::Int32ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_happened })
-                }
-            }
-        }).to_string());
-    }
-
-    #[test]
-    #[cfg(feature = "uuid")]
-    fn test_uuid() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ATimestampStruct {
-            unique_id: uuid::Uuid,
-            maybe_unique_id: Option<&uuid::Uuid>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let when = Field::from(&fields[0]);
-        assert_eq!(when.writer_snippet().to_string(),(quote!{
-            {
-                let vals : Vec<_> = records.iter().map(|rec| (&rec.unique_id.to_string()[..]).into() ).collect();
-                if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ unique_id })
-                }
-            }
-        }).to_string());
-
-        let maybe_happened = Field::from(&fields[1]);
-        assert_eq!(maybe_happened.writer_snippet().to_string(),(quote!{
-            {
-                let definition_levels : Vec<i16> = self.iter().map(|rec| if rec.maybe_unique_id.is_some() { 1 } else { 0 }).collect();
-                let vals : Vec<_> = records.iter().filter_map(|rec| {
-                    rec.maybe_unique_id.map(|ref inner| {  (&inner.to_string()[..]).into()  })
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_unique_id })
-                }
-            }
-        }).to_string());
-    }
-}
diff --git a/rust/parquet_derive_test/Cargo.toml b/rust/parquet_derive_test/Cargo.toml
deleted file mode 100644
index 5914d7ef828..00000000000
--- a/rust/parquet_derive_test/Cargo.toml
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "parquet_derive_test"
-version = "4.0.0-SNAPSHOT"
-license = "Apache-2.0"
-description = "Integration test package for parquet-derive"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-keywords = [ "parquet" ]
-edition = "2018"
-publish = false
-
-[dependencies]
-parquet = { path = "../parquet", version = "4.0.0-SNAPSHOT" }
-parquet_derive = { path = "../parquet_derive", version = "4.0.0-SNAPSHOT" }
diff --git a/rust/parquet_derive_test/src/lib.rs b/rust/parquet_derive_test/src/lib.rs
deleted file mode 100644
index b4bfc42cab2..00000000000
--- a/rust/parquet_derive_test/src/lib.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(clippy::approx_constant)]
-
-extern crate parquet;
-
-#[macro_use]
-extern crate parquet_derive;
-
-use parquet::record::RecordWriter;
-
-#[derive(ParquetRecordWriter)]
-struct ACompleteRecord<'a> {
-    pub a_bool: bool,
-    pub a_str: &'a str,
-    pub a_string: String,
-    pub a_borrowed_string: &'a String,
-    pub maybe_a_str: Option<&'a str>,
-    pub maybe_a_string: Option<String>,
-    pub magic_number: i32,
-    pub low_quality_pi: f32,
-    pub high_quality_pi: f64,
-    pub maybe_pi: Option<f32>,
-    pub maybe_best_pi: Option<f64>,
-    pub borrowed_maybe_a_string: &'a Option<String>,
-    pub borrowed_maybe_a_str: &'a Option<&'a str>,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use parquet::{
-        file::{
-            properties::WriterProperties,
-            writer::{FileWriter, SerializedFileWriter},
-        },
-        schema::parser::parse_message_type,
-    };
-    use std::{env, fs, io::Write, sync::Arc};
-
-    #[test]
-    fn test_parquet_derive_hello() {
-        let file = get_temp_file("test_parquet_derive_hello", &[]);
-        let schema_str = "message schema {
-            REQUIRED boolean         a_bool;
-            REQUIRED BINARY          a_str (UTF8);
-            REQUIRED BINARY          a_string (UTF8);
-            REQUIRED BINARY          a_borrowed_string (UTF8);
-            OPTIONAL BINARY          a_maybe_str (UTF8);
-            OPTIONAL BINARY          a_maybe_string (UTF8);
-            REQUIRED INT32           magic_number;
-            REQUIRED FLOAT           low_quality_pi;
-            REQUIRED DOUBLE          high_quality_pi;
-            OPTIONAL FLOAT           maybe_pi;
-            OPTIONAL DOUBLE          maybe_best_pi;
-            OPTIONAL BINARY          borrowed_maybe_a_string (UTF8);
-            OPTIONAL BINARY          borrowed_maybe_a_str (UTF8);
-        }";
-
-        let schema = Arc::new(parse_message_type(schema_str).unwrap());
-
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-
-        let a_str = "hello mother".to_owned();
-        let a_borrowed_string = "cool news".to_owned();
-        let maybe_a_string = Some("it's true, I'm a string".to_owned());
-        let maybe_a_str = Some(&a_str[..]);
-
-        let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
-            a_bool: true,
-            a_str: &a_str[..],
-            a_string: "hello father".into(),
-            a_borrowed_string: &a_borrowed_string,
-            maybe_a_str: Some(&a_str[..]),
-            maybe_a_string: Some(a_str.clone()),
-            magic_number: 100,
-            low_quality_pi: 3.14,
-            high_quality_pi: 3.1415,
-            maybe_pi: Some(3.14),
-            maybe_best_pi: Some(3.1415),
-            borrowed_maybe_a_string: &maybe_a_string,
-            borrowed_maybe_a_str: &maybe_a_str,
-        }];
-
-        let mut row_group = writer.next_row_group().unwrap();
-        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
-        writer.close_row_group(row_group).unwrap();
-        writer.close().unwrap();
-    }
-
-    /// Returns file handle for a temp file in 'target' directory with a provided content
-    pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
-        // build tmp path to a file in "target/debug/testdata"
-        let mut path_buf = env::current_dir().unwrap();
-        path_buf.push("target");
-        path_buf.push("debug");
-        path_buf.push("testdata");
-        fs::create_dir_all(&path_buf).unwrap();
-        path_buf.push(file_name);
-
-        // write file content
-        let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
-        tmp_file.write_all(content).unwrap();
-        tmp_file.sync_all().unwrap();
-
-        // return file handle for both read and write
-        let file = fs::OpenOptions::new()
-            .read(true)
-            .write(true)
-            .open(path_buf.as_path());
-        assert!(file.is_ok());
-        file.unwrap()
-    }
-}
diff --git a/rust/pre-commit.sh b/rust/pre-commit.sh
deleted file mode 100755
index 5ce08079324..00000000000
--- a/rust/pre-commit.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This file is git pre-commit hook.
-#
-# Soft link it as git hook under top dir of apache arrow git repository:
-# $ ln -s  ../../rust/pre-commit.sh .git/hooks/pre-commit
-#
-# This file be run directly:
-# $ ./pre-commit.sh
-
-function RED() {
-	echo "\033[0;31m$@\033[0m"
-}
-
-function GREEN() {
-	echo "\033[0;32m$@\033[0m"
-}
-
-function BYELLOW() {
-	echo "\033[1;33m$@\033[0m"
-}
-
-RUST_DIR="rust"
-
-# env GIT_DIR is set by git when run a pre-commit hook.
-if [ -z "${GIT_DIR}" ]; then
-	GIT_DIR=$(git rev-parse --show-toplevel)
-fi
-
-cd ${GIT_DIR}/${RUST_DIR}
-
-NUM_CHANGES=$(git diff --cached --name-only . |
-	grep -e ".*/*.rs$" |
-	awk '{print $1}' |
-	wc -l)
-
-if [ ${NUM_CHANGES} -eq 0 ]; then
-	echo -e "$(GREEN INFO): no staged changes in *.rs, $(GREEN skip cargo fmt/clippy)"
-	exit 0
-fi
-
-# 1. cargo clippy
-
-echo -e "$(GREEN INFO): cargo clippy ..."
-
-# Cargo clippy always return exit code 0, and `tee` doesn't work.
-# So let's just run cargo clippy.
-cargo clippy
-echo -e "$(GREEN INFO): cargo clippy done"
-
-# 2. cargo fmt: format with nightly and stable.
-
-CHANGED_BY_CARGO_FMT=false
-echo -e "$(GREEN INFO): cargo fmt with nightly and stable ..."
-
-for version in nightly stable; do
-	CMD="cargo +${version} fmt"
-	${CMD} --all -q -- --check 2>/dev/null
-	if [ $? -ne 0 ]; then
-		${CMD} --all
-		echo -e "$(BYELLOW WARN): ${CMD} changed some files"
-		CHANGED_BY_CARGO_FMT=true
-	fi
-done
-
-if ${CHANGED_BY_CARGO_FMT}; then
-	echo -e "$(RED FAIL): git commit $(RED ABORTED), please have a look and run git add/commit again"
-	exit 1
-fi
-
-exit 0
diff --git a/rust/rustfmt.toml b/rust/rustfmt.toml
deleted file mode 100644
index c49cccdd9f5..00000000000
--- a/rust/rustfmt.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-edition = "2018"
-max_width = 90
-
-# ignore generated files
-# ignore = [
-#    "arrow/src/ipc/gen",
-#]
diff --git a/testing b/testing
index b658b087767..6d98243093c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit b658b087767b041b2081766814655b4dd5a9a439
+Subproject commit 6d98243093c0b36442da94de7010f3eacc2a9909